/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "dapl.h" #include "dapl_adapter_util.h" #include "dapl_tavor_ibtf_impl.h" #include "dapl_hca_util.h" #include "dapl_name_service.h" #define MAX_HCAS 64 #define PROP_HCA_GUID "hca-guid" #define PROP_PORT_NUM "port-number" #define PROP_PORT_PKEY "port-pkey" #define DEVDAPLT "/dev/daplt" /* function prototypes */ static DAT_RETURN dapli_process_tavor_node(char *dev_path, int *hca_idx, int try_blueflame); static DAT_RETURN dapli_process_ia(dladm_ib_attr_t *ib_attr, DAPL_HCA *hca_ptr, int hca_idx); #if defined(IBHOSTS_NAMING) #include static int dapli_process_fake_ibds(DAPL_HCA **hca_list, int hca_idx); #endif /* IBHOSTS_NAMING */ static DAPL_OS_LOCK g_tavor_state_lock; static struct dapls_ib_hca_state g_tavor_state[MAX_HCAS]; DAPL_OS_LOCK g_tavor_uar_lock; DAPL_OS_LOCK dapls_ib_dbp_lock; DAT_RETURN dapli_init_hca( IN DAPL_HCA *hca_ptr) { DAT_RETURN dat_status = DAT_SUCCESS; int hca_idx = 0; int check_for_bf = 0; datalink_class_t class; datalink_id_t linkid; dladm_ib_attr_t ib_attr; ibnex_ctl_query_hca_t query_hca; int ibnex_fd = -1; dladm_handle_t dlh; char hca_device_path[MAXPATHLEN]; if (dladm_open(&dlh) != DLADM_STATUS_OK) { dapl_dbg_log(DAPL_DBG_TYPE_ERR, "init_hca: dladm_open failed\n"); return (DAT_INTERNAL_ERROR); } if ((ibnex_fd = open(IBNEX_DEVCTL_DEV, O_RDONLY)) < 0) { dat_status = DAT_ERROR(DAT_NAME_NOT_FOUND, 0); dapl_dbg_log(DAPL_DBG_TYPE_UTIL, "init_hca: could not open ib nexus (%s)\n", strerror(errno)); goto bail; } if ((dladm_name2info(dlh, hca_ptr->name, &linkid, NULL, &class, NULL) != DLADM_STATUS_OK) || (class != DATALINK_CLASS_PART) || (dladm_part_info(dlh, linkid, &ib_attr, DLADM_OPT_ACTIVE) != DLADM_STATUS_OK)) { dat_status = DAT_ERROR(DAT_NAME_NOT_FOUND, 0); dapl_dbg_log(DAPL_DBG_TYPE_UTIL, "init_hca: %s not found - couldn't get partition info\n", hca_ptr->name); goto bail; } bzero(&query_hca, sizeof (query_hca)); query_hca.hca_guid = ib_attr.dia_hca_guid; query_hca.hca_device_path = hca_device_path; query_hca.hca_device_path_alloc_sz = sizeof (hca_device_path); if (ioctl(ibnex_fd, IBNEX_CTL_QUERY_HCA, &query_hca) == -1) { dat_status = DAT_ERROR(DAT_NAME_NOT_FOUND, 0); dapl_dbg_log(DAPL_DBG_TYPE_UTIL, "init_hca: %s not found; query_hca failed\n", hca_ptr->name); goto bail; } if (strcmp(query_hca.hca_info.hca_driver_name, "tavor") == 0) dapls_init_funcs_tavor(hca_ptr); else if (strcmp(query_hca.hca_info.hca_driver_name, "arbel") == 0) dapls_init_funcs_arbel(hca_ptr); else if (strcmp(query_hca.hca_info.hca_driver_name, "hermon") == 0) { dapls_init_funcs_hermon(hca_ptr); check_for_bf = 1; } else { dat_status = DAT_ERROR(DAT_NAME_NOT_FOUND, 0); dapl_dbg_log(DAPL_DBG_TYPE_UTIL, "init_hca: %s not found\n", hca_ptr->name); goto bail; } dat_status = dapli_process_tavor_node(hca_device_path, &hca_idx, check_for_bf); if (dat_status != DAT_SUCCESS) { dapl_dbg_log(DAPL_DBG_TYPE_UTIL, "init_hcas: %s process_tavor_node failed(0x%x)\n", hca_ptr->name, dat_status); goto bail; } #if defined(IBHOSTS_NAMING) if (dapli_process_fake_ibds(hca_ptr, hca_idx) == 0) { /* no entries were found */ dat_status = DAT_ERROR(DAT_NAME_NOT_FOUND, 0); } #else dat_status = dapli_process_ia(&ib_attr, hca_ptr, hca_idx); #endif if (dat_status != DAT_SUCCESS) { dapl_dbg_log(DAPL_DBG_TYPE_UTIL, "init_hcas: %s process_ia failed(0x%x)\n", hca_ptr->name, dat_status); goto bail; } dapl_dbg_log(DAPL_DBG_TYPE_UTIL, "init_hcas: done %s\n", hca_ptr->name); bail: if (ibnex_fd != -1) (void) close(ibnex_fd); dladm_close(dlh); return (dat_status); } static DAT_RETURN dapli_process_tavor_node(char *dev_path, int *hca_idx, int try_blueflame) { char path_buf[MAXPATHLEN]; int i, idx, fd; #ifndef _LP64 int tmpfd; #endif size_t pagesize; void *mapaddr; pid_t cur_pid; off64_t uarpg_offset; dapl_os_lock(&g_tavor_state_lock); for (idx = 0; idx < MAX_HCAS; idx++) { /* * page size == 0 means this entry is not occupied */ if (g_tavor_state[idx].uarpg_size == 0) { break; } } if (idx == MAX_HCAS) { dapl_dbg_log(DAPL_DBG_TYPE_ERR, "process_tavor: all hcas are being used!\n"); dapl_os_unlock(&g_tavor_state_lock); return (DAT_ERROR(DAT_INSUFFICIENT_RESOURCES, 0)); } for (i = 0; i < idx; i++) { if (strcmp(dev_path, g_tavor_state[i].hca_path) == 0) { /* no need for a refcnt */ idx = i; goto done; } } /* Add 16 to accomodate the prefix "/devices" and suffix ":devctl" */ if (strlen("/devices") + strlen(dev_path) + strlen(":devctl") + 1 > MAXPATHLEN) { dapl_dbg_log(DAPL_DBG_TYPE_ERR, "process_tavor: devfs path %s is too long\n", dev_path); dapl_os_unlock(&g_tavor_state_lock); return (DAT_ERROR(DAT_INTERNAL_ERROR, 0)); } (void) dapl_os_strcpy(path_buf, "/devices"); (void) dapl_os_strcat(path_buf, dev_path); (void) dapl_os_strcat(path_buf, ":devctl"); (void) dapl_os_strcpy(g_tavor_state[idx].hca_path, dev_path); pagesize = (size_t)sysconf(_SC_PAGESIZE); if (pagesize == 0) { dapl_dbg_log(DAPL_DBG_TYPE_ERR, "process_tavor: page_size == 0\n"); dapl_os_unlock(&g_tavor_state_lock); return (DAT_ERROR(DAT_INTERNAL_ERROR, 0)); } cur_pid = getpid(); fd = open(path_buf, O_RDWR); if (fd < 0) { dapl_dbg_log(DAPL_DBG_TYPE_ERR, "process_tavor: cannot open %s: %s\n", path_buf, strerror(errno)); dapl_os_unlock(&g_tavor_state_lock); return (DAT_ERROR(DAT_INSUFFICIENT_RESOURCES, 0)); } #ifndef _LP64 /* * libc can't handle fd's greater than 255, in order to * ensure that these values remain available make fd > 255. * Note: not needed for LP64 */ tmpfd = fcntl(fd, F_DUPFD, 256); if (tmpfd < 0) { dapl_dbg_log(DAPL_DBG_TYPE_UTIL, "process_tavor: cannot F_DUPFD: %s\n", strerror(errno)); } else { (void) close(fd); fd = tmpfd; } #endif /* _LP64 */ if (fcntl(fd, F_SETFD, FD_CLOEXEC) < 0) { dapl_dbg_log(DAPL_DBG_TYPE_ERR, "process_tavor: cannot F_SETFD: %s\n", strerror(errno)); (void) close(fd); dapl_os_unlock(&g_tavor_state_lock); return (DAT_ERROR(DAT_INTERNAL_ERROR, 0)); } uarpg_offset = (((off64_t)cur_pid << MLNX_UMAP_RSRC_TYPE_SHIFT) | MLNX_UMAP_UARPG_RSRC) * pagesize; mapaddr = mmap64((void *)0, pagesize, PROT_READ | PROT_WRITE, MAP_SHARED, fd, uarpg_offset); if (mapaddr == MAP_FAILED) { dapl_dbg_log(DAPL_DBG_TYPE_ERR, "process_tavor: mmap failed %s\n", strerror(errno)); (void) close(fd); dapl_os_unlock(&g_tavor_state_lock); return (DAT_ERROR(DAT_INSUFFICIENT_RESOURCES, 0)); } g_tavor_state[idx].hca_fd = fd; g_tavor_state[idx].uarpg_baseaddr = mapaddr; g_tavor_state[idx].uarpg_size = pagesize; if (try_blueflame == 0) goto done; /* Try to do the Hermon Blueflame page mapping */ uarpg_offset = (((off64_t)cur_pid << MLNX_UMAP_RSRC_TYPE_SHIFT) | MLNX_UMAP_BLUEFLAMEPG_RSRC) * pagesize; mapaddr = mmap64((void *)0, pagesize, PROT_READ | PROT_WRITE, MAP_SHARED, fd, uarpg_offset); if (mapaddr == MAP_FAILED) { /* This is not considered to be fatal. Charge on! */ dapl_dbg_log(DAPL_DBG_TYPE_WARN, "process_tavor: mmap of blueflame page failed %s\n", strerror(errno)); } else { g_tavor_state[idx].bf_pg_baseaddr = mapaddr; g_tavor_state[idx].bf_toggle = 0; } done: dapl_os_unlock(&g_tavor_state_lock); *hca_idx = idx; return (DAT_SUCCESS); } static DAT_RETURN dapli_process_ia(dladm_ib_attr_t *ib_attr, DAPL_HCA *hca_ptr, int hca_idx) { struct lifreq lifreq; int sfd, retval, af; char addr_buf[64]; if (ib_attr->dia_hca_guid == 0 || ib_attr->dia_portnum == 0 || ib_attr->dia_pkey == 0) { dapl_dbg_log(DAPL_DBG_TYPE_ERR, "process_ia: invalid properties: guid 0x%016llx, " "port %d, pkey 0x%08x\n", ib_attr->dia_hca_guid, ib_attr->dia_portnum, (uint_t)ib_attr->dia_pkey); return (DAT_ERROR(DAT_INVALID_PARAMETER, 0)); } /* * if an interface has both v4 and v6 addresses plumbed, * we'll take the v4 address. */ af = AF_INET; again: sfd = socket(af, SOCK_DGRAM, 0); if (sfd < 0) { dapl_dbg_log(DAPL_DBG_TYPE_ERR, "process_ia: socket failed: %s\n", strerror(errno)); return (DAT_ERROR(DAT_INSUFFICIENT_RESOURCES, 0)); } /* check if name will fit in lifr_name */ if (dapl_os_strlen(hca_ptr->name) >= LIFNAMSIZ) { (void) close(sfd); dapl_dbg_log(DAPL_DBG_TYPE_ERR, "process_ia: if name overflow %s\n", hca_ptr->name); return (DAT_ERROR(DAT_INVALID_PARAMETER, 0)); } (void) dapl_os_strcpy(lifreq.lifr_name, hca_ptr->name); retval = ioctl(sfd, SIOCGLIFADDR, (caddr_t)&lifreq); if (retval < 0) { (void) close(sfd); if (af == AF_INET6) { /* * the interface is not plumbed. */ dapl_dbg_log(DAPL_DBG_TYPE_UTIL, "process_ia: %s: ip address not found\n", lifreq.lifr_name); return (DAT_ERROR(DAT_INSUFFICIENT_RESOURCES, 0)); } else { /* * we've failed to find a v4 address. now * let's try v6. */ af = AF_INET6; goto again; } } (void) close(sfd); hca_ptr->tavor_idx = hca_idx; hca_ptr->node_GUID = ib_attr->dia_hca_guid; hca_ptr->port_num = ib_attr->dia_portnum; hca_ptr->partition_key = ib_attr->dia_pkey; (void) dapl_os_memcpy((void *)&hca_ptr->hca_address, (void *)&lifreq.lifr_addr, sizeof (hca_ptr->hca_address)); hca_ptr->max_inline_send = dapls_tavor_max_inline(); dapl_dbg_log(DAPL_DBG_TYPE_UTIL, "process_ia: interface %s, hca guid 0x%016llx, port %d, " "pkey 0x%08x, ip addr %s\n", lifreq.lifr_name, hca_ptr->node_GUID, hca_ptr->port_num, hca_ptr->partition_key, dapls_inet_ntop( (struct sockaddr *)&hca_ptr->hca_address, addr_buf, 64)); return (DAT_SUCCESS); } void dapls_ib_state_init(void) { int i; (void) dapl_os_lock_init(&g_tavor_state_lock); (void) dapl_os_lock_init(&g_tavor_uar_lock); (void) dapl_os_lock_init(&dapls_ib_dbp_lock); for (i = 0; i < MAX_HCAS; i++) { g_tavor_state[i].hca_fd = 0; g_tavor_state[i].uarpg_baseaddr = NULL; g_tavor_state[i].uarpg_size = 0; g_tavor_state[i].bf_pg_baseaddr = NULL; } } void dapls_ib_state_fini(void) { int i, count = 0; /* * Uinitialize the per hca instance state */ dapl_os_lock(&g_tavor_state_lock); for (i = 0; i < MAX_HCAS; i++) { if (g_tavor_state[i].uarpg_size == 0) { dapl_os_assert(g_tavor_state[i].uarpg_baseaddr == NULL); continue; } if (munmap(g_tavor_state[i].uarpg_baseaddr, g_tavor_state[i].uarpg_size) < 0) { dapl_dbg_log(DAPL_DBG_TYPE_ERR, "ib_state_fini: " "munmap(0x%p, 0x%llx) failed(%d)\n", g_tavor_state[i].uarpg_baseaddr, g_tavor_state[i].uarpg_size, errno); } if ((g_tavor_state[i].bf_pg_baseaddr != NULL) && (munmap(g_tavor_state[i].bf_pg_baseaddr, g_tavor_state[i].uarpg_size) < 0)) { dapl_dbg_log(DAPL_DBG_TYPE_ERR, "ib_state_fini: " "munmap(0x%p, 0x%llx) of blueflame failed(%d)\n", g_tavor_state[i].bf_pg_baseaddr, g_tavor_state[i].uarpg_size, errno); } (void) close(g_tavor_state[i].hca_fd); count++; } dapl_os_unlock(&g_tavor_state_lock); dapl_os_lock_destroy(&g_tavor_uar_lock); dapl_os_lock_destroy(&g_tavor_state_lock); dapl_os_lock_destroy(&dapls_ib_dbp_lock); dapl_dbg_log(DAPL_DBG_TYPE_UTIL, "ib_state_fini: cleaned %d hcas\n", count); } /* * dapls_ib_open_hca * * Open HCA * * Input: * *hca_ptr pointer to hca device * *ib_hca_handle_p pointer to provide HCA handle * * Output: * none * * Return: * DAT_SUCCESS * DAT_INSUFFICIENT_RESOURCES * */ DAT_RETURN dapls_ib_open_hca( IN DAPL_HCA *hca_ptr, OUT ib_hca_handle_t *ib_hca_handle_p) { dapl_ia_create_t args; DAT_RETURN dat_status; struct dapls_ib_hca_handle *hca_p; int fd; #ifndef _LP64 int tmpfd; #endif int retval; struct sockaddr *s; struct sockaddr_in6 *v6addr; struct sockaddr_in *v4addr; dapl_ia_addr_t *sap; dat_status = dapli_init_hca(hca_ptr); if (dat_status != DAT_SUCCESS) { dapl_dbg_log(DAPL_DBG_TYPE_ERR, "dapls_ib_open_hca: init_hca failed %d\n", dat_status); return (dat_status); } fd = open(DEVDAPLT, O_RDONLY); if (fd < 0) { return (DAT_INSUFFICIENT_RESOURCES); } #ifndef _LP64 /* * libc can't handle fd's greater than 255, in order to * ensure that these values remain available make fd > 255. * Note: not needed for LP64 */ tmpfd = fcntl(fd, F_DUPFD, 256); if (tmpfd < 0) { dapl_dbg_log(DAPL_DBG_TYPE_UTIL, "dapls_ib_open_hca: cannot F_DUPFD: %s\n", strerror(errno)); } else { (void) close(fd); fd = tmpfd; } #endif /* _LP64 */ if (fcntl(fd, F_SETFD, FD_CLOEXEC) < 0) { dapl_dbg_log(DAPL_DBG_TYPE_ERR, "dapls_ib_open_hca: cannot F_SETFD: %s\n", strerror(errno)); (void) close(fd); return (DAT_INTERNAL_ERROR); } hca_p = (struct dapls_ib_hca_handle *)dapl_os_alloc( sizeof (struct dapls_ib_hca_handle)); if (hca_p == NULL) { (void) close(fd); return (DAT_INSUFFICIENT_RESOURCES); } args.ia_guid = hca_ptr->node_GUID; args.ia_port = hca_ptr->port_num; args.ia_pkey = hca_ptr->partition_key; args.ia_version = DAPL_IF_VERSION; (void) dapl_os_memzero((void *)args.ia_sadata, DAPL_ATS_NBYTES); /* pass down local ip address to be stored in SA */ s = (struct sockaddr *)&hca_ptr->hca_address; /* LINTED: E_BAD_PTR_CAST_ALIGN */ sap = (dapl_ia_addr_t *)args.ia_sadata; switch (s->sa_family) { case AF_INET: /* LINTED: E_BAD_PTR_CAST_ALIGN */ v4addr = (struct sockaddr_in *)s; sap->iad_v4 = v4addr->sin_addr; break; case AF_INET6: /* LINTED: E_BAD_PTR_CAST_ALIGN */ v6addr = (struct sockaddr_in6 *)s; sap->iad_v6 = v6addr->sin6_addr; break; default: break; /* fall through */ } retval = ioctl(fd, DAPL_IA_CREATE, &args); if (retval != 0) { dapl_dbg_log(DAPL_DBG_TYPE_ERR, "open_hca: ia_create failed, fd %d, " "guid 0x%016llx, port %d, pkey 0x%x, version %d\n", fd, args.ia_guid, args.ia_port, args.ia_pkey, args.ia_version); dapl_os_free(hca_p, sizeof (*hca_p)); (void) close(fd); return (dapls_convert_error(errno, retval)); } hca_p->ia_fd = fd; hca_p->ia_rnum = args.ia_resnum; hca_p->hca_fd = g_tavor_state[hca_ptr->tavor_idx].hca_fd; hca_p->ia_uar = g_tavor_state[hca_ptr->tavor_idx].uarpg_baseaddr; hca_p->ia_bf = g_tavor_state[hca_ptr->tavor_idx].bf_pg_baseaddr; hca_p->ia_bf_toggle = &g_tavor_state[hca_ptr->tavor_idx].bf_toggle; *ib_hca_handle_p = hca_p; dapl_dbg_log(DAPL_DBG_TYPE_UTIL, "open_hca: ia_created, hca_p 0x%p, fd %d, " "rnum %d, guid 0x%016llx, port %d, pkey 0x%x\n", hca_p, hca_p->ia_fd, hca_p->ia_rnum, hca_ptr->node_GUID, hca_ptr->port_num, hca_ptr->partition_key); return (DAT_SUCCESS); } /* * dapls_ib_close_hca * * Open HCA * * Input: * ib_hca_handle provide HCA handle * * Output: * none * * Return: * DAT_SUCCESS * DAT_INSUFFICIENT_RESOURCES * */ DAT_RETURN dapls_ib_close_hca( IN ib_hca_handle_t ib_hca_handle) { if (ib_hca_handle == NULL) { dapl_dbg_log(DAPL_DBG_TYPE_ERR, "close_hca: ib_hca_handle == NULL\n"); return (DAT_SUCCESS); } dapl_dbg_log(DAPL_DBG_TYPE_UTIL, "close_hca: closing hca 0x%p, fd %d, rnum %d\n", ib_hca_handle, ib_hca_handle->ia_fd, ib_hca_handle->ia_rnum); (void) close(ib_hca_handle->ia_fd); dapl_os_free((void *)ib_hca_handle, sizeof (struct dapls_ib_hca_handle)); return (DAT_SUCCESS); } #if defined(IBHOSTS_NAMING) #define LINE_LEN 256 static int dapli_process_fake_ibds(DAPL_HCA *hca_ptr, int hca_idx) { char line_buf[LINE_LEN]; char host_buf[LINE_LEN]; char localhost[LINE_LEN]; ib_guid_t prefix; ib_guid_t guid; FILE *fp; int count = 0; DAPL_HCA *hca_ptr; fp = fopen("/etc/dapl/ibhosts", "r"); if (fp == NULL) { dapl_dbg_log(DAPL_DBG_TYPE_ERR, "fake_ibds: ibhosts not found!\n"); return (0); } if (gethostname(localhost, LINE_LEN) != 0) { dapl_dbg_log(DAPL_DBG_TYPE_ERR, "fake_ibds: hostname not found!\n"); return (0); } while (!feof(fp)) { (void) fgets(line_buf, LINE_LEN, fp); sscanf(line_buf, "%s %llx %llx", host_buf, &prefix, &guid); (void) sprintf(line_buf, "%s-ib%d", localhost, count + 1); if (strncmp(line_buf, host_buf, strlen(line_buf)) == 0) { guid &= 0xfffffffffffffff0; hca_ptr->tavor_idx = hca_idx; hca_ptr->node_GUID = guid; hca_ptr->port_num = count + 1; hca_ptr->partition_key = 0x0000ffff; count++; } if (count >= 2) break; } (void) fclose(fp); return (count); } #endif /* IBHOSTS_NAMING */