/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. */ /* Solaris Open Fabric kernel verbs */ #include #include #include #include #include #include #include #include static void *statep; char *sol_kverbs_dbg_str = "sol_kverbs"; static llist_head_t client_list = LLIST_HEAD_INIT(client_list); kmutex_t clist_lock; /* mutex for client_list */ static void ofs_async_handler(void *, ibt_hca_hdl_t, ibt_async_code_t, ibt_async_event_t *); /* * set ibt_client_t members. clnt->ib_client must be set before * this func is called. */ static int alloc_ibt_client(ofs_client_t *clnt) { int namelen; ASSERT(clnt->ib_client != NULL); SOL_OFS_DPRINTF_L3(sol_kverbs_dbg_str, "alloc_ibt_client: client: 0x%p", clnt); /* * double-check the name string. if it's longer than MAXNAMELEN * including the string terminator, assuming the name is invalid, * return EINVAL. */ namelen = strlen(clnt->ib_client->name); if (namelen >= MAXNAMELEN) { SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str, "alloc_ibt_client: client: 0x%p => " "namelen(%d) is larger than MAXNAMELEN", clnt, namelen); return (-EINVAL); } clnt->ibt_client.mi_clnt_name = kmem_zalloc(namelen + 1, KM_NOSLEEP); if (clnt->ibt_client.mi_clnt_name == NULL) { SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str, "alloc_ibt_client: client: 0x%p => " "no sufficient memory", clnt); return (-ENOMEM); } bcopy(clnt->ib_client->name, clnt->ibt_client.mi_clnt_name, namelen); clnt->ibt_client.mi_ibt_version = IBTI_V_CURR; if (clnt->ib_client->dip) { clnt->ibt_client.mi_clnt_class = IBT_GENERIC; } else { clnt->ibt_client.mi_clnt_class = IBT_GENERIC_MISC; } clnt->ibt_client.mi_async_handler = ofs_async_handler; return (0); } static void free_ibt_client(ofs_client_t *clnt) { int namelen = strlen(clnt->ib_client->name); ASSERT(namelen < MAXNAMELEN); SOL_OFS_DPRINTF_L3(sol_kverbs_dbg_str, "free_ibt_client: client: 0x%p", clnt); kmem_free(clnt->ibt_client.mi_clnt_name, namelen + 1); clnt->ibt_client.mi_clnt_name = NULL; } /* * get_device() returns a pointer to struct ib_devcie with * the same guid as one passed to the function. */ static ib_device_t * get_device(ofs_client_t *ofs_client, ib_guid_t guid) { ib_device_t *device; llist_head_t *entry; ASSERT(RW_LOCK_HELD(&ofs_client->lock)); SOL_OFS_DPRINTF_L3(sol_kverbs_dbg_str, "get_device: client: 0x%p, guid:0x%p", ofs_client, (void *)(uintptr_t)htonll(guid)); list_for_each(entry, &ofs_client->device_list) { device = entry->ptr; if (device->node_guid == htonll(guid)) { ASSERT(device->reg_state == IB_DEV_CLOSE); ASSERT(device->node_type == RDMA_NODE_IB_CA); ASSERT(device->clnt_hdl == (ofs_client_p_t)ofs_client); return (device); } } SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str, "get_device: client: 0x%p, guid:0x%p => no match guid", ofs_client, (void *)(uintptr_t)htonll(guid)); return (NULL); } /* * ofs_async_handler() is a delegated function to handle asynchrnonous events, * which dispatches each event to corresponding qp/cq handlers registered * with ib_create_qp() and/or ib_create_cq(). */ static void ofs_async_handler(void *clntp, ibt_hca_hdl_t hdl, ibt_async_code_t code, ibt_async_event_t *event) { ofs_client_t *ofs_client = (ofs_client_t *)clntp; struct ib_event ib_event; struct ib_qp *qpp; struct ib_cq *cqp; ASSERT(ofs_client != NULL); cqp = event->ev_cq_hdl ? ibt_get_cq_private(event->ev_cq_hdl) : NULL; qpp = event->ev_chan_hdl ? ibt_get_qp_private(event->ev_chan_hdl) : NULL; SOL_OFS_DPRINTF_L3(sol_kverbs_dbg_str, "ofs_async_handler: client: 0x%p, hca_hdl: 0x%p, code:0x%x, " "event->qp: 0x%p, event->cq: 0x%p, event->srq: 0x%p " "event->guid: 0x%p, event->port: 0x%x", clntp, hdl, code, qpp, cqp, event->ev_srq_hdl, (void *)(uintptr_t)event->ev_hca_guid, event->ev_port); bzero(&ib_event, sizeof (struct ib_event)); switch (code) { case IBT_EVENT_PATH_MIGRATED: FIRE_QP_EVENT(ofs_client, hdl, ib_event, qpp, IB_EVENT_PATH_MIG); return; case IBT_EVENT_SQD: FIRE_QP_EVENT(ofs_client, hdl, ib_event, qpp, IB_EVENT_SQ_DRAINED); return; case IBT_EVENT_COM_EST: FIRE_QP_EVENT(ofs_client, hdl, ib_event, qpp, IB_EVENT_COMM_EST); return; case IBT_ERROR_CATASTROPHIC_CHAN: FIRE_QP_EVENT(ofs_client, hdl, ib_event, qpp, IB_EVENT_QP_FATAL); return; case IBT_ERROR_INVALID_REQUEST_CHAN: FIRE_QP_EVENT(ofs_client, hdl, ib_event, qpp, IB_EVENT_QP_REQ_ERR); return; case IBT_ERROR_ACCESS_VIOLATION_CHAN: FIRE_QP_EVENT(ofs_client, hdl, ib_event, qpp, IB_EVENT_QP_ACCESS_ERR); return; case IBT_ERROR_PATH_MIGRATE_REQ: FIRE_QP_EVENT(ofs_client, hdl, ib_event, qpp, IB_EVENT_PATH_MIG); return; case IBT_EVENT_EMPTY_CHAN: FIRE_QP_EVENT(ofs_client, hdl, ib_event, qpp, IB_EVENT_QP_LAST_WQE_REACHED); return; case IBT_ERROR_CQ: FIRE_CQ_EVENT(ofs_client, hdl, ib_event, cqp, IB_EVENT_CQ_ERR); return; case IBT_HCA_ATTACH_EVENT: { ib_device_t *device; int rtn; /* re-use the device once it was created */ rw_enter(&ofs_client->lock, RW_WRITER); device = get_device(ofs_client, event->ev_hca_guid); if (device == NULL) { device = kmem_alloc(sizeof (ib_device_t), KM_SLEEP); device->node_type = RDMA_NODE_IB_CA; device->reg_state = IB_DEV_CLOSE; device->clnt_hdl = (ofs_client_p_t)ofs_client; device->node_guid = htonll(event->ev_hca_guid); device->data = NULL; /* add this HCA */ ofs_client->hca_num++; llist_head_init(&device->list, device); llist_add_tail(&device->list, &ofs_client->device_list); } device->hca_hdl = NULL; device->local_dma_lkey = 0; device->phys_port_cnt = 0; /* open this HCA */ rtn = ibt_open_hca(ofs_client->ibt_hdl, event->ev_hca_guid, &device->hca_hdl); if (rtn == IBT_SUCCESS) { ibt_hca_attr_t hattr; ofs_client->hca_open_num++; device->reg_state = IB_DEV_OPEN; ibt_set_hca_private(device->hca_hdl, device); rtn = ibt_query_hca(device->hca_hdl, &hattr); if (rtn != IBT_SUCCESS) { device->reg_state = IB_DEV_CLOSE; rtn = ibt_close_hca(device->hca_hdl); ASSERT(rtn == IBT_SUCCESS); ofs_client->hca_open_num--; return; } (void) sprintf(device->name, "%x:%x:%x", hattr.hca_vendor_id, hattr.hca_device_id, hattr.hca_version_id); device->local_dma_lkey = hattr.hca_reserved_lkey; device->phys_port_cnt = hattr.hca_nports; ibt_set_hca_private(device->hca_hdl, device); /* invoke client's callback */ if (ofs_client->ib_client->add) { ofs_client->ib_client->add(device); } } rw_exit(&ofs_client->lock); return; } case IBT_HCA_DETACH_EVENT: { struct ib_device *device; rw_enter(&ofs_client->lock, RW_WRITER); device = ibt_get_hca_private(hdl); if (device->reg_state == IB_DEV_OPEN) { ibt_status_t rtn; /* invoke client's callback */ if (ofs_client->ib_client->remove) { ofs_client->ib_client->remove(device); } /* change the state only */ device->reg_state = IB_DEV_CLOSE; /* close this HCA */ rtn = ibt_close_hca(device->hca_hdl); ASSERT(rtn == IBT_SUCCESS); ofs_client->hca_open_num--; } rw_exit(&ofs_client->lock); return; } case IBT_EVENT_LIMIT_REACHED_SRQ: case IBT_ERROR_CATASTROPHIC_SRQ: default: SOL_OFS_DPRINTF_L3(sol_kverbs_dbg_str, "sol_ofs does not support this event(0x%x).\n" "\t clntp=0x%p, hca_hdl=0x%p, code=%d, eventp=0x%p\n", code, clntp, hdl, code, event); return; } } /* * ib_register_client - Register an IB client * @client:Client to register * * Upper level users of the IB drivers can use ib_register_client() to * register callbacks for IB device addition and removal. When an IB * device is added, each registered client's add method will be called * (in the order the clients were registered), and when a device is * removed, each client's remove method will be called (in the reverse * order that clients were registered). In addition, when * ib_register_client() is called, the client will receive an add * callback for all devices already registered. * * Note that struct ib_client should have a dip pointer to the client, * which is different from the Linux implementation. */ int ib_register_client(struct ib_client *client) { uint_t i, nhcas; /* number of HCAs */ ib_guid_t *guidp; ofs_client_t *ofs_client; llist_head_t *entry, *tmp; ib_device_t *device; int rtn; SOL_OFS_DPRINTF_L3(sol_kverbs_dbg_str, "ib_register_client: client: 0x%p", client); /* get the number of HCAs on this system */ if ((nhcas = ibt_get_hca_list(&guidp)) == 0) { SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str, "ib_register_client: client: 0x%p => no HCA", client); return (-ENXIO); } /* allocate a new sol_ofs_client structure */ ofs_client = kmem_zalloc(sizeof (ofs_client_t), KM_NOSLEEP); if (ofs_client == NULL) { (void) ibt_free_hca_list(guidp, nhcas); SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str, "ib_register_client: client: 0x%p => " "no sufficient memory for ofs_client", client); return (-ENOMEM); } /* set members */ ofs_client->ib_client = client; if ((rtn = alloc_ibt_client(ofs_client)) != 0) { kmem_free(ofs_client, sizeof (ofs_client_t)); (void) ibt_free_hca_list(guidp, nhcas); SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str, "ib_register_client: client: 0x%p => " "alloc_ibt_client failed w/ 0x%x", client, rtn); return (rtn); } ofs_client->state = IB_OFS_CLNT_INITIALIZED; llist_head_init(&ofs_client->device_list, NULL); llist_head_init(&ofs_client->client_list, ofs_client); rw_init(&ofs_client->lock, NULL, RW_DEFAULT, NULL); /* initialize IB client */ rw_enter(&ofs_client->lock, RW_WRITER); if (client->state != IB_CLNT_UNINITIALIZED) { rw_exit(&ofs_client->lock); kmem_free(ofs_client, sizeof (ofs_client_t)); (void) ibt_free_hca_list(guidp, nhcas); SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str, "ib_register_client: client: 0x%p => " "invalid client state(%d)", client, client->state); return (-EPERM); } /* attach this client to IBTF */ rtn = ibt_attach(&ofs_client->ibt_client, client->dip, ofs_client, &ofs_client->ibt_hdl); if (rtn != IBT_SUCCESS) { rw_exit(&ofs_client->lock); free_ibt_client(ofs_client); kmem_free(ofs_client, sizeof (ofs_client_t)); (void) ibt_free_hca_list(guidp, nhcas); SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str, "ib_register_client: client: 0x%p => " "ibt_attach failed w/ 0x%x", client, rtn); return (-EINVAL); } client->clnt_hdl = (ofs_client_p_t)ofs_client; client->state = IB_CLNT_INITIALIZED; /* link this client */ mutex_enter(&clist_lock); llist_add_tail(&ofs_client->client_list, &client_list); mutex_exit(&clist_lock); /* Open HCAs */ ofs_client->hca_num = nhcas; for (i = 0; i < ofs_client->hca_num; i++) { /* allocate the ib_device structure */ device = kmem_zalloc(sizeof (ib_device_t), KM_NOSLEEP); if (device == NULL) { rtn = -ENOMEM; SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str, "ib_register_client: client: 0x%p => " "no sufficient memory for ib_device", client); goto err; } device->node_guid = htonll(guidp[i]); device->node_type = RDMA_NODE_IB_CA; device->reg_state = IB_DEV_CLOSE; device->clnt_hdl = (ofs_client_p_t)ofs_client; llist_head_init(&device->list, device); llist_add_tail(&device->list, &ofs_client->device_list); rtn = ibt_open_hca(ofs_client->ibt_hdl, guidp[i], &device->hca_hdl); if (rtn == IBT_SUCCESS) { ibt_hca_attr_t hattr; ofs_client->hca_open_num++; device->reg_state = IB_DEV_OPEN; rtn = ibt_query_hca(device->hca_hdl, &hattr); if (rtn != IBT_SUCCESS) { rtn = -EIO; SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str, "ib_register_client: client: 0x%p," "hca_hdl: 0x%p ==> " "ibt_query_hca() failed w/ %d", client, device->hca_hdl, rtn); goto err; } (void) sprintf(device->name, "%x:%x:%x", hattr.hca_vendor_id, hattr.hca_device_id, hattr.hca_version_id); device->local_dma_lkey = hattr.hca_reserved_lkey; device->phys_port_cnt = hattr.hca_nports; ibt_set_hca_private(device->hca_hdl, device); /* invoke client's callback */ if (client->add) { client->add(device); } } } if (ofs_client->hca_open_num == 0) { rtn = -ENXIO; SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str, "ib_register_client: client: 0x%p => " "no available HCA", client); goto err; } rw_exit(&ofs_client->lock); (void) ibt_free_hca_list(guidp, nhcas); return (0); err: /* first close all open HCAs */ list_for_each(entry, &ofs_client->device_list) { device = entry->ptr; /* * If it's open already, close it after the remove * callback. */ if (device->reg_state == IB_DEV_OPEN) { ibt_status_t rtn; /* invoke client's callback */ if (client->remove) { client->remove(device); } device->reg_state = IB_DEV_CLOSE; rtn = ibt_close_hca(device->hca_hdl); ASSERT(rtn == IBT_SUCCESS); ofs_client->hca_open_num--; } } ASSERT(ofs_client->hca_open_num == 0); /* then free the devices */ list_for_each_safe(entry, tmp, &ofs_client->device_list) { device = entry->ptr; /* de-link and free the device */ llist_del(entry); kmem_free(device, sizeof (ib_device_t)); ofs_client->hca_num--; } ASSERT(ofs_client->hca_num == 0); /* delink this client */ mutex_enter(&clist_lock); llist_del(&ofs_client->client_list); mutex_exit(&clist_lock); /* detach the client */ client->clnt_hdl = NULL; client->state = IB_CLNT_UNINITIALIZED; (void) ibt_detach(ofs_client->ibt_hdl); rw_exit(&ofs_client->lock); /* free sol_ofs_client */ free_ibt_client(ofs_client); kmem_free(ofs_client, sizeof (ofs_client_t)); (void) ibt_free_hca_list(guidp, nhcas); return (rtn); } /* * ib_unregister_client - Unregister an IB client * @client:Client to unregister * * Upper level users use ib_unregister_client() to remove their client * registration. When ib_unregister_client() is called, the client * will receive a remove callback for each IB device still registered. */ void ib_unregister_client(struct ib_client *client) { ofs_client_t *ofs_client; ib_device_t *device; llist_head_t *entry, *tmp; ASSERT(client->state == IB_CLNT_INITIALIZED && client->clnt_hdl != NULL); SOL_OFS_DPRINTF_L3(sol_kverbs_dbg_str, "ib_unregister_client: client: 0x%p", client); ofs_client = (ofs_client_t *)client->clnt_hdl; rw_enter(&ofs_client->lock, RW_WRITER); /* first close all open HCAs */ list_for_each(entry, &ofs_client->device_list) { device = entry->ptr; /* * If it's open already, close it after the remove * callback. */ if (device->reg_state == IB_DEV_OPEN) { ibt_status_t rtn; /* invoke client's callback */ if (client->remove) { client->remove(device); } device->reg_state = IB_DEV_CLOSE; rtn = ibt_close_hca(device->hca_hdl); if (rtn != IBT_SUCCESS) SOL_OFS_DPRINTF_L3( sol_kverbs_dbg_str, "ib_unregister_client(%p) - " "ibt_close_hca failed %d", client, rtn); ofs_client->hca_open_num--; } } ASSERT(ofs_client->hca_open_num == 0); /* then free the devices */ list_for_each_safe(entry, tmp, &ofs_client->device_list) { device = entry->ptr; /* de-link and free the device */ llist_del(entry); kmem_free(device, sizeof (ib_device_t)); ofs_client->hca_num--; } ASSERT(ofs_client->hca_num == 0); /* delink this client */ mutex_enter(&clist_lock); llist_del(&ofs_client->client_list); mutex_exit(&clist_lock); /* detach the client */ client->clnt_hdl = NULL; client->state = IB_CLNT_UNINITIALIZED; (void) ibt_detach(ofs_client->ibt_hdl); rw_exit(&ofs_client->lock); /* free sol_ofs_client */ free_ibt_client(ofs_client); kmem_free(ofs_client, sizeof (ofs_client_t)); } /* * ofs_lock_enter() and ofs_lock_exit() are used to avoid the recursive * rwlock while the client callbacks are invoked. * * Note that the writer lock is used only in the client callback case, * so that the kverb functions wanting to acquire the reader lock can * safely ignore the reader lock if the writer lock is already held. * The writer lock shouldn't be used in no other plances. */ static inline void ofs_lock_enter(krwlock_t *lock) { if (!RW_WRITE_HELD(lock)) { rw_enter(lock, RW_READER); } } static inline void ofs_lock_exit(krwlock_t *lock) { if (!RW_WRITE_HELD(lock)) { rw_exit(lock); } } /* * ib_get_client_data - Get IB client context * @device:Device to get context for * @client:Client to get context for * * ib_get_client_data() returns client context set with * ib_set_client_data() and returns NULL if it's not found. */ void *ib_get_client_data(struct ib_device *device, struct ib_client *client) { ofs_client_t *ofs_client; struct ib_device *ib_device; boolean_t found = B_FALSE; llist_head_t *entry; void *data; ASSERT(device != 0 && client != 0); ofs_client = (ofs_client_t *)client->clnt_hdl; if (ofs_client == 0) { SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str, "ib_get_client_data: device: 0x%p, client: 0x%p => " "no ofs_client", device, client); return (NULL); } ofs_lock_enter(&ofs_client->lock); list_for_each(entry, &ofs_client->device_list) { ib_device = entry->ptr; if (ib_device->node_guid == device->node_guid) { found = B_TRUE; break; } } if (!found) { ofs_lock_exit(&ofs_client->lock); SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str, "ib_get_client_data: device: 0x%p, client: 0x%p => " "no ib_device found", device, client); return (NULL); } data = ib_device->data; ofs_lock_exit(&ofs_client->lock); SOL_OFS_DPRINTF_L3(sol_kverbs_dbg_str, "ib_get_client_data: device: 0x%p, client: 0x%p", device, client); return (data); } /* * ib_set_client_data - Set IB client context * @device:Device to set context for * @client:Client to set context for * @data:Context to set * * ib_set_client_data() sets client context that can be retrieved with * ib_get_client_data(). If the specified device is not found, the function * returns w/o any operations. */ void ib_set_client_data(struct ib_device *device, struct ib_client *client, void *data) { ofs_client_t *ofs_client; struct ib_device *ib_device; boolean_t found = B_FALSE; llist_head_t *entry; ASSERT(device != 0 && client != 0); ofs_client = (ofs_client_t *)client->clnt_hdl; if (ofs_client == 0) { cmn_err(CE_WARN, "No client context found for %s/%s\n", device->name, client->name); return; } ofs_lock_enter(&ofs_client->lock); list_for_each(entry, &ofs_client->device_list) { ib_device = entry->ptr; if (ib_device->node_guid == device->node_guid) { found = B_TRUE; break; } } if (!found) { cmn_err(CE_WARN, "No client context found for %s/%s\n", device->name, client->name); ofs_lock_exit(&ofs_client->lock); return; } ib_device->data = data; ofs_lock_exit(&ofs_client->lock); SOL_OFS_DPRINTF_L3(sol_kverbs_dbg_str, "ib_set_client_data: device: 0x%p, client: 0x%p, " "data: 0x%p", device, client, data); } /* * ib_query_device - Query IB device attributes * @device:Device to query * @device_attr:Device attributes * * ib_query_device() returns the attributes of a device through the * @device_attr pointer. */ int ib_query_device(struct ib_device *device, struct ib_device_attr *attr) { ofs_client_t *ofs_client = (ofs_client_t *)device->clnt_hdl; ibt_hca_attr_t hattr; int rtn; ofs_lock_enter(&ofs_client->lock); if (device->reg_state != IB_DEV_OPEN) { ofs_lock_exit(&ofs_client->lock); SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str, "ib_query_device: device: 0x%p => " "invalid device state (%d)", device, device->reg_state); return (-ENXIO); } if ((rtn = ibt_query_hca(device->hca_hdl, &hattr)) != IBT_SUCCESS) { ofs_lock_exit(&ofs_client->lock); SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str, "ib_query_device: device: 0x%p => " "ibt_query_hca failed w/ 0x%x", device, rtn); return (-EIO); } ofs_lock_exit(&ofs_client->lock); SOL_OFS_DPRINTF_L3(sol_kverbs_dbg_str, "ib_query_device: device: 0x%p, attr: 0x%p, rtn: 0x%p", device, attr, rtn); /* OF order is major.micro.minor, so keep it here */ attr->fw_ver = (uint64_t)hattr.hca_fw_major_version << 32 | hattr.hca_fw_micro_version << 16 & 0xFFFF0000 | hattr.hca_fw_minor_version & 0xFFFF; attr->device_cap_flags = IB_DEVICE_CHANGE_PHY_PORT | IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_RC_RNR_NAK_GEN; if (hattr.hca_flags & IBT_HCA_PKEY_CNTR) { attr->device_cap_flags |= IB_DEVICE_BAD_PKEY_CNTR; } if (hattr.hca_flags & IBT_HCA_QKEY_CNTR) { attr->device_cap_flags |= IB_DEVICE_BAD_QKEY_CNTR; } if (hattr.hca_flags & IBT_HCA_AUTO_PATH_MIG) { attr->device_cap_flags |= IB_DEVICE_AUTO_PATH_MIG; } if (hattr.hca_flags & IBT_HCA_AH_PORT_CHECK) { attr->device_cap_flags |= IB_DEVICE_UD_AV_PORT_ENFORCE; } attr->vendor_id = hattr.hca_vendor_id; attr->vendor_part_id = hattr.hca_device_id; attr->hw_ver = hattr.hca_version_id; attr->sys_image_guid = htonll(hattr.hca_si_guid); attr->max_mr_size = ~0ull; attr->page_size_cap = IBTF2OF_PGSZ(hattr.hca_page_sz); attr->max_qp = hattr.hca_max_qp; attr->max_qp_wr = hattr.hca_max_qp_sz; attr->max_sge = hattr.hca_max_sgl; attr->max_sge_rd = hattr.hca_max_rd_sgl; attr->max_cq = hattr.hca_max_cq; attr->max_cqe = hattr.hca_max_cq_sz; attr->max_mr = hattr.hca_max_memr; attr->max_pd = hattr.hca_max_pd; attr->max_qp_rd_atom = hattr.hca_max_rdma_in_qp; attr->max_qp_init_rd_atom = hattr.hca_max_rdma_in_qp; attr->max_ee_rd_atom = hattr.hca_max_rdma_in_ee; attr->max_ee_init_rd_atom = hattr.hca_max_rdma_in_ee; attr->max_res_rd_atom = hattr.hca_max_rsc; attr->max_srq = hattr.hca_max_srqs; attr->max_srq_wr = hattr.hca_max_srqs_sz -1; attr->max_srq_sge = hattr.hca_max_srq_sgl; attr->local_ca_ack_delay = hattr.hca_local_ack_delay; attr->atomic_cap = hattr.hca_flags & IBT_HCA_ATOMICS_GLOBAL ? IB_ATOMIC_GLOB : (hattr.hca_flags & IBT_HCA_ATOMICS_HCA ? IB_ATOMIC_HCA : IB_ATOMIC_NONE); attr->max_ee = hattr.hca_max_eec; attr->max_rdd = hattr.hca_max_rdd; attr->max_mw = hattr.hca_max_mem_win; attr->max_pkeys = hattr.hca_max_port_pkey_tbl_sz; attr->max_raw_ipv6_qp = hattr.hca_max_ipv6_qp; attr->max_raw_ethy_qp = hattr.hca_max_ether_qp; attr->max_mcast_grp = hattr.hca_max_mcg; attr->max_mcast_qp_attach = hattr.hca_max_qp_per_mcg; attr->max_total_mcast_qp_attach = hattr.hca_max_mcg_qps; attr->max_ah = hattr.hca_max_ah; attr->max_fmr = hattr.hca_max_fmrs; attr->max_map_per_fmr = hattr.hca_opaque9; /* hca_max_map_per_fmr */ return (0); } /* Protection domains */ struct ib_pd * ib_alloc_pd(struct ib_device *device) { ofs_client_t *ofs_client = (ofs_client_t *)device->clnt_hdl; struct ib_pd *pd; int rtn; if ((pd = kmem_alloc(sizeof (struct ib_pd), KM_NOSLEEP)) == NULL) { SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str, "ib_alloc_pd: device: 0x%p => no sufficient memory", device); return ((struct ib_pd *)-ENOMEM); } ofs_lock_enter(&ofs_client->lock); if (device->reg_state != IB_DEV_OPEN) { ofs_lock_exit(&ofs_client->lock); SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str, "ib_alloc_pd: device: 0x%p => invalid device state (%d)", device, device->reg_state); return ((struct ib_pd *)-ENXIO); } SOL_OFS_DPRINTF_L3(sol_kverbs_dbg_str, "ib_alloc_pd: device: 0x%p", device); rtn = ibt_alloc_pd(device->hca_hdl, IBT_PD_NO_FLAGS, &pd->ibt_pd); ofs_lock_exit(&ofs_client->lock); if (rtn == IBT_SUCCESS) { pd->device = device; SOL_OFS_DPRINTF_L3(sol_kverbs_dbg_str, "ib_alloc_pd: device: 0x%p, pd: 0x%p, ibt_pd: 0x%p, " "rtn: 0x%x", device, pd, pd->ibt_pd, rtn); return (pd); } kmem_free(pd, sizeof (struct ib_pd)); SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str, "ib_alloc_pd: device: 0x%p, pd: 0x%p, ibt_pd: 0x%p => " "ibt_alloc_pd failed w/ 0x%x", device, pd, pd->ibt_pd, rtn); switch (rtn) { case IBT_INSUFF_RESOURCE: return ((struct ib_pd *)-ENOMEM); case IBT_HCA_HDL_INVALID: return ((struct ib_pd *)-EFAULT); default: return ((struct ib_pd *)-EIO); } } int ib_dealloc_pd(struct ib_pd *pd) { ofs_client_t *ofs_client = (ofs_client_t *)pd->device->clnt_hdl; int rtn; ofs_lock_enter(&ofs_client->lock); if (pd->device->reg_state != IB_DEV_OPEN) { ofs_lock_exit(&ofs_client->lock); SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str, "ib_dealloc_pd: pd: 0x%p => invalid device state (%d)", pd, pd->device->reg_state); return (-ENXIO); } SOL_OFS_DPRINTF_L3(sol_kverbs_dbg_str, "ib_dealloc_pd: pd: 0x%p", pd); rtn = ibt_free_pd(pd->device->hca_hdl, pd->ibt_pd); ofs_lock_exit(&ofs_client->lock); if (rtn == IBT_SUCCESS) { SOL_OFS_DPRINTF_L3(sol_kverbs_dbg_str, "ib_dealloc_pd: pd: 0x%p, device: 0x%p, ibt_pd: 0x%p, " "rtn: 0x%x", pd, pd->device, pd->ibt_pd, rtn); kmem_free(pd, sizeof (struct ib_pd)); return (0); } SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str, "ib_dealloc_pd: pd: 0x%p => ibt_free_pd failed w/ 0x%x", pd, rtn); switch (rtn) { case IBT_PD_IN_USE: return (-EBUSY); case IBT_HCA_HDL_INVALID: return (-EFAULT); default: return (-EIO); } } /* * ofs_cq_handler() is a delegated function to handle CQ events, * which dispatches them to corresponding cq handlers registered * with ib_create_cq(). */ static void ofs_cq_handler(ibt_cq_hdl_t ibt_cq, void *arg) { struct ib_cq *cq = (struct ib_cq *)ibt_get_cq_private(ibt_cq); SOL_OFS_DPRINTF_L3(sol_kverbs_dbg_str, "ofs_cq_handler: ibt_cq: 0x%p, ib_cq: 0x%p, comp_handler: 0x%p, " "arg: 0x%p", ibt_cq, cq, cq->comp_handler, arg); if (cq->comp_handler) { cq->comp_handler(cq, cq->cq_context); } } /* * ib_create_cq - Creates a CQ on the specified device. * @device: The device on which to create the CQ. * @comp_handler: A user-specified callback that is invoked when a * completion event occurs on the CQ. * @event_handler: A user-specified callback that is invoked when an * asynchronous event not associated with a completion occurs on the CQ. * @cq_context: Context associated with the CQ returned to the user via * the associated completion and event handlers. * @cqe: The minimum size of the CQ. * @comp_vector - Completion vector used to signal completion events. * Must be >= 0 and < context->num_comp_vectors. * * Users can examine the cq structure to determine the actual CQ size. * * Note that comp_vector is not supported currently. */ struct ib_cq * ib_create_cq(struct ib_device *device, ib_comp_handler comp_handler, void (*event_handler)(struct ib_event *, void *), void *cq_context, int cqe, void *comp_vector) { ofs_client_t *ofs_client = (ofs_client_t *)device->clnt_hdl; ibt_cq_attr_t cq_attr; uint32_t real_size; struct ib_cq *cq; int rtn; if ((cq = kmem_alloc(sizeof (struct ib_cq), KM_NOSLEEP)) == NULL) { SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str, "ib_create_cq: device: 0x%p, comp_handler: 0x%p, " "event_handler: 0x%p, cq_context: 0x%p, cqe: 0x%x, " "comp_vector: %p => no sufficient memory", device, comp_handler, event_handler, cq_context, cqe, comp_vector); return ((struct ib_cq *)-ENOMEM); } ofs_lock_enter(&ofs_client->lock); if (device->reg_state != IB_DEV_OPEN) { ofs_lock_exit(&ofs_client->lock); SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str, "ib_create_cq: device: 0x%p, comp_handler: 0x%p, " "event_handler: 0x%p, cq_context: 0x%p, cqe: 0x%x, " "comp_vector: %p => invalid device state (%d)", device, comp_handler, event_handler, cq_context, cqe, comp_vector, device->reg_state); return ((struct ib_cq *)-ENXIO); } SOL_OFS_DPRINTF_L3(sol_kverbs_dbg_str, "ib_create_cq: device: 0x%p, comp_handler: 0x%p, " "event_handler: 0x%p, cq_context: 0x%p, cqe: 0x%x, " "comp_vector: %d", device, comp_handler, event_handler, cq_context, cqe, comp_vector); cq_attr.cq_size = cqe; cq_attr.cq_sched = comp_vector; cq_attr.cq_flags = IBT_CQ_NO_FLAGS; rtn = ibt_alloc_cq(device->hca_hdl, &cq_attr, &cq->ibt_cq, &real_size); ofs_lock_exit(&ofs_client->lock); if (rtn == IBT_SUCCESS) { cq->device = device; cq->comp_handler = comp_handler; cq->event_handler = event_handler; cq->cq_context = cq_context; cq->cqe = real_size; ibt_set_cq_private(cq->ibt_cq, cq); ibt_set_cq_handler(cq->ibt_cq, ofs_cq_handler, cq_context); mutex_init(&cq->lock, NULL, MUTEX_DEFAULT, NULL); SOL_OFS_DPRINTF_L3(sol_kverbs_dbg_str, "ib_create_cq: device: 0x%p, cqe: 0x%x, ibt_cq: 0x%p, " "rtn: 0x%x", device, cqe, cq->ibt_cq, rtn); return (cq); } kmem_free(cq, sizeof (struct ib_cq)); SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str, "ib_create_cq: device: 0x%p, cqe: 0x%x, ibt_cq: 0x%p => " "ibt_alloc_cq failed w/ 0x%x", device, cqe, cq->ibt_cq, rtn); switch (rtn) { case IBT_HCA_CQ_EXCEEDED: case IBT_INVALID_PARAM: case IBT_HCA_HDL_INVALID: return ((struct ib_cq *)-EINVAL); case IBT_INSUFF_RESOURCE: return ((struct ib_cq *)-ENOMEM); default: return ((struct ib_cq *)-EIO); } } int ib_destroy_cq(struct ib_cq *cq) { ofs_client_t *ofs_client = (ofs_client_t *)cq->device->clnt_hdl; int rtn; ofs_lock_enter(&ofs_client->lock); if (cq->device->reg_state != IB_DEV_OPEN) { ofs_lock_exit(&ofs_client->lock); SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str, "ib_destroy_cq: cq: 0x%p => invalid device state (%d)", cq, cq->device->reg_state); return (-ENXIO); } SOL_OFS_DPRINTF_L3(sol_kverbs_dbg_str, "ib_destroy_cq: cq: 0x%p", cq); /* * if IBTL_ASYNC_PENDING is set, ibt_qp is not freed * at this moment, but yet alive for a while. Then * there is a possibility that this qp is used even after * ib_destroy_cq() is called. To distinguish this case from * others, clear ibt_qp here. */ ibt_set_cq_private(cq->ibt_cq, NULL); rtn = ibt_free_cq(cq->ibt_cq); if (rtn == IBT_SUCCESS) { ofs_lock_exit(&ofs_client->lock); kmem_free(cq, sizeof (struct ib_cq)); SOL_OFS_DPRINTF_L3(sol_kverbs_dbg_str, "ib_destroy_cq: cq: 0x%p, rtn: 0x%x", cq, rtn); return (0); } ibt_set_cq_private(cq->ibt_cq, cq); ofs_lock_exit(&ofs_client->lock); SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str, "ib_destroy_cq: cq: 0x%p => ibt_free_cq failed w/ 0x%x", cq, rtn); switch (rtn) { case IBT_CQ_BUSY: return (-EBUSY); case IBT_HCA_HDL_INVALID: case IBT_CQ_HDL_INVALID: return (-EINVAL); default: return (-EIO); } } struct ib_qp * ib_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *qp_init_attr) { ofs_client_t *ofs_client = pd->device->clnt_hdl; ibt_qp_alloc_attr_t attrs; ibt_chan_sizes_t sizes; ib_qpn_t qpn; ibt_qp_hdl_t ibt_qp; struct ib_qp *qp; int rtn; /* sanity check */ if (!(qp_init_attr->send_cq && qp_init_attr->recv_cq)) { SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str, "ib_create_qp: pd: 0x%p => invalid cqs " "(send_cq=0x%p, recv_cq=0x%p)", pd, qp_init_attr->send_cq, qp_init_attr->recv_cq); return ((struct ib_qp *)-EINVAL); } /* UC, Raw IPv6 and Raw Ethernet are not supported */ if (qp_init_attr->qp_type == IB_QPT_UC || qp_init_attr->qp_type == IB_QPT_RAW_IPV6 || qp_init_attr->qp_type == IB_QPT_RAW_ETY) { SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str, "ib_create_qp: pd: 0x%p => invalid qp_type", pd, qp_init_attr->qp_type); return ((struct ib_qp *)-EINVAL); } if ((qp = kmem_alloc(sizeof (struct ib_qp), KM_NOSLEEP)) == NULL) { SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str, "ib_create_qp: pd: 0x%p, init_attr: 0x%p => " "no sufficient memory", pd, qp_init_attr); return ((struct ib_qp *)-ENOMEM); } ofs_lock_enter(&ofs_client->lock); if (pd->device->reg_state != IB_DEV_OPEN) { ofs_lock_exit(&ofs_client->lock); kmem_free(qp, sizeof (struct ib_qp)); SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str, "ib_create_qp: pd: 0x%p, init_attr: 0x%p => " "invalid device state (%d)", pd, qp_init_attr, pd->device->reg_state); return ((struct ib_qp *)-ENXIO); } SOL_OFS_DPRINTF_L3(sol_kverbs_dbg_str, "ib_create_qp: pd: 0x%p, event_handler: 0x%p, qp_context: 0x%p, " "send_cq: 0x%p, recv_cq: 0x%p, srq: 0x%p, max_send_wr: 0x%x, " "max_recv_wr: 0x%x, max_send_sge: 0x%x, max_recv_sge: 0x%x, " "max_inline_data: 0x%x, sq_sig_type: %d, qp_type: %d, " "port_num: %d", pd, qp_init_attr->event_handler, qp_init_attr->qp_context, qp_init_attr->send_cq, qp_init_attr->recv_cq, qp_init_attr->srq, qp_init_attr->cap.max_send_wr, qp_init_attr->cap.max_recv_wr, qp_init_attr->cap.max_send_sge, qp_init_attr->cap.max_recv_sge, qp_init_attr->cap.max_inline_data, qp_init_attr->sq_sig_type, qp_init_attr->qp_type, qp_init_attr->port_num); attrs.qp_alloc_flags = IBT_QP_NO_FLAGS; if (qp_init_attr->srq) { attrs.qp_alloc_flags |= IBT_QP_USES_SRQ; } attrs.qp_flags = IBT_ALL_SIGNALED | IBT_FAST_REG_RES_LKEY; if (qp_init_attr->sq_sig_type == IB_SIGNAL_REQ_WR) { attrs.qp_flags |= IBT_WR_SIGNALED; } attrs.qp_scq_hdl = qp_init_attr->send_cq->ibt_cq; attrs.qp_rcq_hdl = qp_init_attr->recv_cq->ibt_cq; attrs.qp_pd_hdl = pd->ibt_pd; attrs.qp_sizes.cs_sq = qp_init_attr->cap.max_send_wr; attrs.qp_sizes.cs_rq = qp_init_attr->cap.max_recv_wr; attrs.qp_sizes.cs_sq_sgl = qp_init_attr->cap.max_send_sge; attrs.qp_sizes.cs_rq_sgl = qp_init_attr->cap.max_recv_sge; attrs.qp_sizes.cs_inline = qp_init_attr->cap.max_inline_data; switch (qp_init_attr->qp_type) { case IB_QPT_RC: rtn = ibt_alloc_qp(pd->device->hca_hdl, IBT_RC_RQP, &attrs, &sizes, &qpn, &ibt_qp); break; case IB_QPT_UD: rtn = ibt_alloc_qp(pd->device->hca_hdl, IBT_UD_RQP, &attrs, &sizes, &qpn, &ibt_qp); break; case IB_QPT_SMI: rtn = ibt_alloc_special_qp(pd->device->hca_hdl, qp_init_attr->port_num, IBT_SMI_SQP, &attrs, &sizes, &ibt_qp); break; case IB_QPT_GSI: rtn = ibt_alloc_special_qp(pd->device->hca_hdl, qp_init_attr->port_num, IBT_GSI_SQP, &attrs, &sizes, &ibt_qp); break; default: /* this should never happens */ ofs_lock_exit(&ofs_client->lock); kmem_free(qp, sizeof (struct ib_qp)); return ((struct ib_qp *)-EINVAL); } ofs_lock_exit(&ofs_client->lock); if (rtn == IBT_SUCCESS) { /* fill in ib_qp_cap w/ the real values */ qp_init_attr->cap.max_send_wr = sizes.cs_sq; qp_init_attr->cap.max_recv_wr = sizes.cs_rq; qp_init_attr->cap.max_send_sge = sizes.cs_sq_sgl; qp_init_attr->cap.max_recv_sge = sizes.cs_rq_sgl; /* max_inline_data is not supported */ qp_init_attr->cap.max_inline_data = 0; /* fill in ib_qp */ qp->device = pd->device; qp->pd = pd; qp->send_cq = qp_init_attr->send_cq; qp->recv_cq = qp_init_attr->recv_cq; qp->srq = qp_init_attr->srq; qp->event_handler = qp_init_attr->event_handler; qp->qp_context = qp_init_attr->qp_context; qp->qp_num = qp_init_attr->qp_type == IB_QPT_SMI ? 0 : qp_init_attr->qp_type == IB_QPT_GSI ? 1 : qpn; qp->qp_type = qp_init_attr->qp_type; qp->ibt_qp = ibt_qp; ibt_set_qp_private(qp->ibt_qp, qp); mutex_init(&qp->lock, NULL, MUTEX_DEFAULT, NULL); SOL_OFS_DPRINTF_L3(sol_kverbs_dbg_str, "ib_create_qp: device: 0x%p, pd: 0x%x, init_attr: 0x%p, " "rtn: 0x%x", pd->device, pd, qp_init_attr, rtn); return (qp); } kmem_free(qp, sizeof (struct ib_qp)); SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str, "ib_create_qp: device: 0x%p, pd: 0x%x, init_attr: 0x%p => " "ibt_alloc_(special)_qp failed w/ rtn: 0x%x", pd->device, pd, qp_init_attr, rtn); switch (rtn) { case IBT_NOT_SUPPORTED: case IBT_QP_SRV_TYPE_INVALID: case IBT_CQ_HDL_INVALID: case IBT_HCA_HDL_INVALID: case IBT_INVALID_PARAM: case IBT_SRQ_HDL_INVALID: case IBT_PD_HDL_INVALID: case IBT_HCA_SGL_EXCEEDED: case IBT_HCA_WR_EXCEEDED: return ((struct ib_qp *)-EINVAL); case IBT_INSUFF_RESOURCE: return ((struct ib_qp *)-ENOMEM); default: return ((struct ib_qp *)-EIO); } } int ib_destroy_qp(struct ib_qp *qp) { ofs_client_t *ofs_client = (ofs_client_t *)qp->device->clnt_hdl; int rtn; ofs_lock_enter(&ofs_client->lock); if (qp->device->reg_state != IB_DEV_OPEN) { ofs_lock_exit(&ofs_client->lock); SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str, "ib_destroy_qp: qp: 0x%p => invalid device state (%d)", qp, qp->device->reg_state); return (-ENXIO); } /* * if IBTL_ASYNC_PENDING is set, ibt_qp is not freed * at this moment, but yet alive for a while. Then * there is a possibility that this qp is used even after * ib_destroy_qp() is called. To distinguish this case from * others, clear ibt_qp here. */ ibt_set_qp_private(qp->ibt_qp, NULL); rtn = ibt_free_qp(qp->ibt_qp); if (rtn == IBT_SUCCESS) { ofs_lock_exit(&ofs_client->lock); kmem_free(qp, sizeof (struct ib_qp)); SOL_OFS_DPRINTF_L3(sol_kverbs_dbg_str, "ib_destroy_qp: qp: 0x%p, rtn: 0x%x", qp, rtn); return (0); } ibt_set_qp_private(qp->ibt_qp, qp); ofs_lock_exit(&ofs_client->lock); SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str, "ib_destroy_qp: qp: 0x%p => ibt_free_qp failed w/ 0x%x", qp, rtn); switch (rtn) { case IBT_CHAN_STATE_INVALID: case IBT_HCA_HDL_INVALID: case IBT_QP_HDL_INVALID: return (-EINVAL); default: return (-EIO); } } /* * ib_req_notify_cq - Request completion notification on a CQ. * @cq: The CQ to generate an event for. * @flags: * Must contain exactly one of %IB_CQ_SOLICITED or %IB_CQ_NEXT_COMP * to request an event on the next solicited event or next work * completion at any type, respectively. %IB_CQ_REPORT_MISSED_EVENTS * may also be |ed in to request a hint about missed events, as * described below. * * Return Value: * < 0 means an error occurred while requesting notification * == 0 means notification was requested successfully, and if * IB_CQ_REPORT_MISSED_EVENTS was passed in, then no events * were missed and it is safe to wait for another event. In * this case is it guaranteed that any work completions added * to the CQ since the last CQ poll will trigger a completion * notification event. * > 0 is only returned if IB_CQ_REPORT_MISSED_EVENTS was passed * in. It means that the consumer must poll the CQ again to * make sure it is empty to avoid missing an event because of a * race between requesting notification and an entry being * added to the CQ. This return value means it is possible * (but not guaranteed) that a work completion has been added * to the CQ since the last poll without triggering a * completion notification event. * * Note that IB_CQ_REPORT_MISSED_EVENTS is currently not supported. */ int ib_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags) { ibt_cq_notify_flags_t notify_type; int rtn; ofs_client_t *ofs_client = cq->device->clnt_hdl; ofs_lock_enter(&ofs_client->lock); if (cq->device->reg_state != IB_DEV_OPEN) { ofs_lock_exit(&ofs_client->lock); SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str, "ib_req_notify_cq: cq: 0x%p, flag: 0x%x", cq, flags); return (-ENXIO); } SOL_OFS_DPRINTF_L3(sol_kverbs_dbg_str, "ib_req_notify_cq: cq: 0x%p, flag: 0x%x", cq, flags); switch (flags & IB_CQ_SOLICITED_MASK) { case IB_CQ_SOLICITED: notify_type = IBT_NEXT_SOLICITED; break; case IB_CQ_NEXT_COMP: notify_type = IBT_NEXT_COMPLETION; break; default: /* Currently only two flags are supported */ ofs_lock_exit(&ofs_client->lock); SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str, "ib_req_notify_cq: cq: 0x%p, flag: 0x%x => invalid flag", cq, flags); return (-EINVAL); } rtn = ibt_enable_cq_notify(cq->ibt_cq, notify_type); ofs_lock_exit(&ofs_client->lock); if (rtn == IBT_SUCCESS) { SOL_OFS_DPRINTF_L3(sol_kverbs_dbg_str, "ib_req_notify_cq: cq: 0x%p, flag: 0x%x rtn: 0x%x", cq, flags, rtn); return (0); } SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str, "ib_req_notify_cq: cq: 0x%p, flag: 0x%x => ibt_enable_cq_notify " "failed w/ 0x%x", cq, flags, rtn); switch (rtn) { case IBT_HCA_HDL_INVALID: case IBT_CQ_HDL_INVALID: case IBT_CQ_NOTIFY_TYPE_INVALID: return (-EINVAL); default: return (-EIO); } } static const struct { int valid; enum ib_qp_attr_mask req_param[IB_QPT_RAW_ETY + 1]; enum ib_qp_attr_mask opt_param[IB_QPT_RAW_ETY + 1]; } qp_state_table[IB_QPS_ERR + 1][IB_QPS_ERR + 1] = { [IB_QPS_RESET] = { [IB_QPS_RESET] = { .valid = 1 }, [IB_QPS_INIT] = { .valid = 1, .req_param = { [IB_QPT_UD] = (IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_QKEY), [IB_QPT_UC] = (IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_ACCESS_FLAGS), [IB_QPT_RC] = (IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_ACCESS_FLAGS), [IB_QPT_SMI] = (IB_QP_PKEY_INDEX | IB_QP_QKEY), [IB_QPT_GSI] = (IB_QP_PKEY_INDEX | IB_QP_QKEY), } }, }, [IB_QPS_INIT] = { [IB_QPS_RESET] = { .valid = 1 }, [IB_QPS_ERR] = { .valid = 1 }, [IB_QPS_INIT] = { .valid = 1, .opt_param = { [IB_QPT_UD] = (IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_QKEY), [IB_QPT_UC] = (IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_ACCESS_FLAGS), [IB_QPT_RC] = (IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_ACCESS_FLAGS), [IB_QPT_SMI] = (IB_QP_PKEY_INDEX | IB_QP_QKEY), [IB_QPT_GSI] = (IB_QP_PKEY_INDEX | IB_QP_QKEY), } }, [IB_QPS_RTR] = { .valid = 1, .req_param = { [IB_QPT_UC] = (IB_QP_AV | IB_QP_PATH_MTU | IB_QP_DEST_QPN | IB_QP_RQ_PSN), [IB_QPT_RC] = (IB_QP_AV | IB_QP_PATH_MTU | IB_QP_DEST_QPN | IB_QP_RQ_PSN | IB_QP_MAX_DEST_RD_ATOMIC | IB_QP_MIN_RNR_TIMER), }, .opt_param = { [IB_QPT_UD] = (IB_QP_PKEY_INDEX | IB_QP_QKEY), [IB_QPT_UC] = (IB_QP_ALT_PATH | IB_QP_ACCESS_FLAGS | IB_QP_PKEY_INDEX), [IB_QPT_RC] = (IB_QP_ALT_PATH | IB_QP_ACCESS_FLAGS | IB_QP_PKEY_INDEX), [IB_QPT_SMI] = (IB_QP_PKEY_INDEX | IB_QP_QKEY), [IB_QPT_GSI] = (IB_QP_PKEY_INDEX | IB_QP_QKEY), } } }, [IB_QPS_RTR] = { [IB_QPS_RESET] = { .valid = 1 }, [IB_QPS_ERR] = { .valid = 1 }, [IB_QPS_RTS] = { .valid = 1, .req_param = { [IB_QPT_UD] = IB_QP_SQ_PSN, [IB_QPT_UC] = IB_QP_SQ_PSN, [IB_QPT_RC] = (IB_QP_TIMEOUT | IB_QP_RETRY_CNT | IB_QP_RNR_RETRY | IB_QP_SQ_PSN | IB_QP_MAX_QP_RD_ATOMIC), [IB_QPT_SMI] = IB_QP_SQ_PSN, [IB_QPT_GSI] = IB_QP_SQ_PSN, }, .opt_param = { [IB_QPT_UD] = (IB_QP_CUR_STATE | IB_QP_QKEY), [IB_QPT_UC] = (IB_QP_CUR_STATE | IB_QP_ALT_PATH | IB_QP_ACCESS_FLAGS | IB_QP_PATH_MIG_STATE), [IB_QPT_RC] = (IB_QP_CUR_STATE | IB_QP_ALT_PATH | IB_QP_ACCESS_FLAGS | IB_QP_MIN_RNR_TIMER | IB_QP_PATH_MIG_STATE), [IB_QPT_SMI] = (IB_QP_CUR_STATE | IB_QP_QKEY), [IB_QPT_GSI] = (IB_QP_CUR_STATE | IB_QP_QKEY), } } }, [IB_QPS_RTS] = { [IB_QPS_RESET] = { .valid = 1 }, [IB_QPS_ERR] = { .valid = 1 }, [IB_QPS_RTS] = { .valid = 1, .opt_param = { [IB_QPT_UD] = (IB_QP_CUR_STATE | IB_QP_QKEY), [IB_QPT_UC] = (IB_QP_CUR_STATE | IB_QP_ACCESS_FLAGS | IB_QP_ALT_PATH | IB_QP_PATH_MIG_STATE), [IB_QPT_RC] = (IB_QP_CUR_STATE | IB_QP_ACCESS_FLAGS | IB_QP_ALT_PATH | IB_QP_PATH_MIG_STATE | IB_QP_MIN_RNR_TIMER), [IB_QPT_SMI] = (IB_QP_CUR_STATE | IB_QP_QKEY), [IB_QPT_GSI] = (IB_QP_CUR_STATE | IB_QP_QKEY), } }, [IB_QPS_SQD] = { .valid = 1, .opt_param = { [IB_QPT_UD] = IB_QP_EN_SQD_ASYNC_NOTIFY, [IB_QPT_UC] = IB_QP_EN_SQD_ASYNC_NOTIFY, [IB_QPT_RC] = IB_QP_EN_SQD_ASYNC_NOTIFY, [IB_QPT_SMI] = IB_QP_EN_SQD_ASYNC_NOTIFY, [IB_QPT_GSI] = IB_QP_EN_SQD_ASYNC_NOTIFY } }, }, [IB_QPS_SQD] = { [IB_QPS_RESET] = { .valid = 1 }, [IB_QPS_ERR] = { .valid = 1 }, [IB_QPS_RTS] = { .valid = 1, .opt_param = { [IB_QPT_UD] = (IB_QP_CUR_STATE | IB_QP_QKEY), [IB_QPT_UC] = (IB_QP_CUR_STATE | IB_QP_ALT_PATH | IB_QP_ACCESS_FLAGS | IB_QP_PATH_MIG_STATE), [IB_QPT_RC] = (IB_QP_CUR_STATE | IB_QP_ALT_PATH | IB_QP_ACCESS_FLAGS | IB_QP_MIN_RNR_TIMER | IB_QP_PATH_MIG_STATE), [IB_QPT_SMI] = (IB_QP_CUR_STATE | IB_QP_QKEY), [IB_QPT_GSI] = (IB_QP_CUR_STATE | IB_QP_QKEY), } }, [IB_QPS_SQD] = { .valid = 1, .opt_param = { [IB_QPT_UD] = (IB_QP_PKEY_INDEX | IB_QP_QKEY), [IB_QPT_UC] = (IB_QP_AV | IB_QP_ALT_PATH | IB_QP_ACCESS_FLAGS | IB_QP_PKEY_INDEX | IB_QP_PATH_MIG_STATE), [IB_QPT_RC] = (IB_QP_PORT | IB_QP_AV | IB_QP_TIMEOUT | IB_QP_RETRY_CNT | IB_QP_RNR_RETRY | IB_QP_MAX_QP_RD_ATOMIC | IB_QP_MAX_DEST_RD_ATOMIC | IB_QP_ALT_PATH | IB_QP_ACCESS_FLAGS | IB_QP_PKEY_INDEX | IB_QP_MIN_RNR_TIMER | IB_QP_PATH_MIG_STATE), [IB_QPT_SMI] = (IB_QP_PKEY_INDEX | IB_QP_QKEY), [IB_QPT_GSI] = (IB_QP_PKEY_INDEX | IB_QP_QKEY), } } }, [IB_QPS_SQE] = { [IB_QPS_RESET] = { .valid = 1 }, [IB_QPS_ERR] = { .valid = 1 }, [IB_QPS_RTS] = { .valid = 1, .opt_param = { [IB_QPT_UD] = (IB_QP_CUR_STATE | IB_QP_QKEY), [IB_QPT_UC] = (IB_QP_CUR_STATE | IB_QP_ACCESS_FLAGS), [IB_QPT_SMI] = (IB_QP_CUR_STATE | IB_QP_QKEY), [IB_QPT_GSI] = (IB_QP_CUR_STATE | IB_QP_QKEY), } } }, [IB_QPS_ERR] = { [IB_QPS_RESET] = { .valid = 1 }, [IB_QPS_ERR] = { .valid = 1 } } }; static inline int ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state, enum ib_qp_type type, enum ib_qp_attr_mask mask) { enum ib_qp_attr_mask req_param, opt_param; if (cur_state < 0 || cur_state > IB_QPS_ERR || next_state < 0 || next_state > IB_QPS_ERR) { SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str, "ib_modify_qp_is_ok: cur_state: %d, next_state: %d, " "qp_type: %d, attr_mask: 0x%x => invalid state(1)", cur_state, next_state, type, mask); return (0); } if (mask & IB_QP_CUR_STATE && cur_state != IB_QPS_RTR && cur_state != IB_QPS_RTS && cur_state != IB_QPS_SQD && cur_state != IB_QPS_SQE) { SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str, "ib_modify_qp_is_ok: cur_state: %d, next_state: %d, " "qp_type: %d, attr_mask: 0x%x => invalid state(2)", cur_state, next_state, type, mask); return (0); } if (!qp_state_table[cur_state][next_state].valid) { SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str, "ib_modify_qp_is_ok: cur_state: %d, next_state: %d, " "qp_type: %d, attr_mask: 0x%x => state is not valid", cur_state, next_state, type, mask); return (0); } req_param = qp_state_table[cur_state][next_state].req_param[type]; opt_param = qp_state_table[cur_state][next_state].opt_param[type]; if ((mask & req_param) != req_param) { SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str, "ib_modify_qp_is_ok: cur_state: %d, next_state: %d, " "qp_type: %d, attr_mask: 0x%x => " "required param doesn't match. req_param = 0x%x", cur_state, next_state, type, mask, req_param); return (0); } if (mask & ~(req_param | opt_param | IB_QP_STATE)) { SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str, "ib_modify_qp_is_ok: cur_state: %d, next_state: %d, " "qp_type: %d, attr_mask: 0x%x => " "unsupported options. req_param = 0x%x, opt_param = 0x%x", cur_state, next_state, type, mask, req_param, opt_param); return (0); } return (1); } static inline enum ib_qp_state qp_current_state(ibt_qp_query_attr_t *qp_attr) { ASSERT(qp_attr->qp_info.qp_state != IBT_STATE_SQDRAIN); return (enum ib_qp_state)(qp_attr->qp_info.qp_state); } static inline ibt_tran_srv_t of2ibtf_qp_type(enum ib_qp_type type) { switch (type) { case IB_QPT_SMI: case IB_QPT_GSI: case IB_QPT_UD: return (IBT_UD_SRV); case IB_QPT_RC: return (IBT_RC_SRV); case IB_QPT_UC: return (IBT_UC_SRV); case IB_QPT_RAW_IPV6: return (IBT_RAWIP_SRV); case IB_QPT_RAW_ETY: default: ASSERT(type == IB_QPT_RAW_ETY); return (IBT_RAWETHER_SRV); } } static inline void set_av(struct ib_ah_attr *attr, ibt_cep_path_t *pathp) { ibt_adds_vect_t *av = &pathp->cep_adds_vect; pathp->cep_hca_port_num = attr->port_num; av->av_srate = OF2IBTF_SRATE(attr->static_rate); av->av_srvl = attr->sl & 0xF; av->av_send_grh = attr->ah_flags & IB_AH_GRH ? 1 : 0; if (av->av_send_grh) { av->av_dgid.gid_prefix = attr->grh.dgid.global.subnet_prefix; av->av_dgid.gid_guid = attr->grh.dgid.global.interface_id; av->av_flow = attr->grh.flow_label & 0xFFFFF; av->av_tclass = attr->grh.traffic_class; av->av_hop = attr->grh.hop_limit; av->av_sgid_ix = attr->grh.sgid_index; } av->av_dlid = attr->dlid; av->av_src_path = attr->src_path_bits; } int ib_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr, int attr_mask) { enum ib_qp_state cur_state, new_state; ibt_hca_attr_t hattr; ibt_qp_query_attr_t qp_attr; ibt_qp_info_t modify_attr; ibt_cep_modify_flags_t flags; int rtn; ofs_client_t *ofs_client = qp->device->clnt_hdl; ofs_lock_enter(&ofs_client->lock); if (qp->device->reg_state != IB_DEV_OPEN) { ofs_lock_exit(&ofs_client->lock); SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str, "ib_modify_qp: qp: 0x%p => invalid device state (%d)", qp, qp->device->reg_state); return (-ENXIO); } rtn = ibt_query_hca(qp->device->hca_hdl, &hattr); if (rtn != IBT_SUCCESS) { ofs_lock_exit(&ofs_client->lock); SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str, "ib_modify_qp: qp: 0x%p, hca_hdl: 0x%p => " "ibt_query_hca() failed w/ %d", qp, qp->device->hca_hdl, rtn); return (-EIO); } /* only one thread per qp is allowed during the qp modification */ mutex_enter(&qp->lock); /* Get the current QP attributes first */ bzero(&qp_attr, sizeof (ibt_qp_query_attr_t)); if ((rtn = ibt_query_qp(qp->ibt_qp, &qp_attr)) != IBT_SUCCESS) { mutex_exit(&qp->lock); ofs_lock_exit(&ofs_client->lock); SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str, "ib_modify_qp: qp: 0x%p, attr: 0x%p, attr_mask: 0x%x => " "ibt_query_qp failed w/ 0x%x", qp, attr, attr_mask, rtn); return (-EIO); } /* Get the current and new state for this QP */ cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp_current_state(&qp_attr); new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state; /* Sanity check of the current/new states */ if (cur_state == new_state && cur_state == IB_QPS_RESET) { /* Linux OF returns 0 in this case */ mutex_exit(&qp->lock); ofs_lock_exit(&ofs_client->lock); SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str, "ib_modify_qp: qp: 0x%p, attr: 0x%p, attr_mask: 0x%x => " "invalid state (both of current/new states are RESET)", qp, attr, attr_mask); return (0); } /* * Check if this modification request is supported with the new * and/or current state. */ if (!ib_modify_qp_is_ok(cur_state, new_state, qp->qp_type, attr_mask)) { mutex_exit(&qp->lock); ofs_lock_exit(&ofs_client->lock); SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str, "ib_modify_qp: qp: 0x%p, attr: 0x%p, attr_mask: 0x%x => " "invalid arguments", qp, attr, attr_mask); return (-EINVAL); } /* Sanity checks */ if (attr_mask & IB_QP_PORT && (attr->port_num == 0 || attr->port_num > hattr.hca_nports)) { mutex_exit(&qp->lock); ofs_lock_exit(&ofs_client->lock); SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str, "ib_modify_qp: qp: 0x%p, attr: 0x%p, attr_mask: 0x%x => " "invalid attr->port_num(%d), max_nports(%d)", qp, attr, attr_mask, attr->port_num, hattr.hca_nports); return (-EINVAL); } if (attr_mask & IB_QP_PKEY_INDEX && attr->pkey_index >= hattr.hca_max_port_pkey_tbl_sz) { mutex_exit(&qp->lock); ofs_lock_exit(&ofs_client->lock); SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str, "ib_modify_qp: qp: 0x%p, attr: 0x%p, attr_mask: 0x%x => " "invalid attr->pkey_index(%d), max_pkey_index(%d)", qp, attr, attr_mask, attr->pkey_index, hattr.hca_max_port_pkey_tbl_sz); return (-EINVAL); } if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC && attr->max_rd_atomic > hattr.hca_max_rdma_out_qp) { mutex_exit(&qp->lock); ofs_lock_exit(&ofs_client->lock); SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str, "ib_modify_qp: qp: 0x%p, attr: 0x%p, attr_mask: 0x%x => " "invalid attr->max_rd_atomic(0x%x), max_rdma_out_qp(0x%x)", qp, attr, attr_mask, attr->max_rd_atomic, hattr.hca_max_rdma_out_qp); return (-EINVAL); } if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC && attr->max_dest_rd_atomic > hattr.hca_max_rdma_in_qp) { mutex_exit(&qp->lock); ofs_lock_exit(&ofs_client->lock); SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str, "ib_modify_qp: qp: 0x%p, attr: 0x%p, attr_mask: 0x%x => " "invalid attr->max_dest_rd_atomic(0x%x), " "max_rdma_in_qp(0x%x)", qp, attr, attr_mask, attr->max_dest_rd_atomic, hattr.hca_max_rdma_in_qp); return (-EINVAL); } /* copy the current setting */ modify_attr = qp_attr.qp_info; /* * Since it's already checked if the modification request matches * the new and/or current states, just assign both of states to * modify_attr here. The current state is required if qp_state * is RTR, but it's harmelss otherwise, so it's set always. */ modify_attr.qp_current_state = OF2IBTF_STATE(cur_state); modify_attr.qp_state = OF2IBTF_STATE(new_state); modify_attr.qp_trans = of2ibtf_qp_type(qp->qp_type); /* Convert OF modification requests into IBTF ones */ flags = IBT_CEP_SET_STATE; /* IBTF needs IBT_CEP_SET_STATE */ if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) { flags |= IBT_CEP_SET_RESET_INIT; } else if (cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR) { flags |= IBT_CEP_SET_INIT_RTR; } else if (cur_state == IB_QPS_RTR && new_state == IB_QPS_RTS) { flags |= IBT_CEP_SET_RTR_RTS; } if (attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY) { flags |= IBT_CEP_SET_SQD_EVENT; } if (attr_mask & IB_QP_ACCESS_FLAGS) { modify_attr.qp_flags &= ~(IBT_CEP_RDMA_RD | IBT_CEP_RDMA_WR | IBT_CEP_ATOMIC); if (attr->qp_access_flags & IB_ACCESS_REMOTE_READ) { flags |= IBT_CEP_SET_RDMA_R; modify_attr.qp_flags |= IBT_CEP_RDMA_RD; } if (attr->qp_access_flags & IB_ACCESS_REMOTE_WRITE) { flags |= IBT_CEP_SET_RDMA_W; modify_attr.qp_flags |= IBT_CEP_RDMA_WR; } if (attr->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC) { flags |= IBT_CEP_SET_ATOMIC; modify_attr.qp_flags |= IBT_CEP_ATOMIC; } } if (attr_mask & IB_QP_PKEY_INDEX) { flags |= IBT_CEP_SET_PKEY_IX; switch (qp->qp_type) { case IB_QPT_SMI: case IB_QPT_GSI: case IB_QPT_UD: modify_attr.qp_transport.ud.ud_pkey_ix = attr->pkey_index; break; case IB_QPT_RC: modify_attr.qp_transport.rc.rc_path.cep_pkey_ix = attr->pkey_index; break; case IB_QPT_UC: modify_attr.qp_transport.uc.uc_path.cep_pkey_ix = attr->pkey_index; break; default: /* This should never happen */ mutex_exit(&qp->lock); ofs_lock_exit(&ofs_client->lock); SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str, "ib_modify_qp(IB_QP_PKEY_INDEX): qp: 0x%p, " "attr: 0x%p, attr_mask: 0x%x => " "invalid qp->qp_type(%d)", qp, attr, attr_mask, qp->qp_type); return (-EINVAL); } } if (attr_mask & IB_QP_PORT) { flags |= IBT_CEP_SET_PORT; switch (qp->qp_type) { case IB_QPT_SMI: case IB_QPT_GSI: case IB_QPT_UD: modify_attr.qp_transport.ud.ud_port = attr->port_num; break; case IB_QPT_RC: modify_attr.qp_transport.rc.rc_path.cep_hca_port_num = attr->port_num; break; case IB_QPT_UC: modify_attr.qp_transport.uc.uc_path.cep_hca_port_num = attr->port_num; break; default: /* This should never happen */ mutex_exit(&qp->lock); ofs_lock_exit(&ofs_client->lock); SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str, "ib_modify_qp(IB_QP_PORT): qp: 0x%p, " "attr: 0x%p, attr_mask: 0x%x => " "invalid qp->qp_type(%d)", qp, attr, attr_mask, qp->qp_type); return (-EINVAL); } } if (attr_mask & IB_QP_QKEY) { ASSERT(qp->qp_type == IB_QPT_UD || qp->qp_type == IB_QPT_SMI || qp->qp_type == IB_QPT_GSI); flags |= IBT_CEP_SET_QKEY; modify_attr.qp_transport.ud.ud_qkey = attr->qkey; } if (attr_mask & IB_QP_AV) { flags |= IBT_CEP_SET_ADDS_VECT; switch (qp->qp_type) { case IB_QPT_RC: set_av(&attr->ah_attr, &modify_attr.qp_transport.rc.rc_path); break; case IB_QPT_UC: set_av(&attr->ah_attr, &modify_attr.qp_transport.uc.uc_path); break; case IB_QPT_SMI: case IB_QPT_GSI: case IB_QPT_UD: default: /* This should never happen */ mutex_exit(&qp->lock); ofs_lock_exit(&ofs_client->lock); SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str, "ib_modify_qp(IB_QP_AV): qp: 0x%p, " "attr: 0x%p, attr_mask: 0x%x => " "invalid qp->qp_type(%d)", qp, attr, attr_mask, qp->qp_type); return (-EINVAL); } } if (attr_mask & IB_QP_PATH_MTU) { switch (qp->qp_type) { case IB_QPT_RC: modify_attr.qp_transport.rc.rc_path_mtu = OF2IBTF_PATH_MTU(attr->path_mtu); break; case IB_QPT_UC: modify_attr.qp_transport.uc.uc_path_mtu = OF2IBTF_PATH_MTU(attr->path_mtu); break; case IB_QPT_SMI: case IB_QPT_GSI: case IB_QPT_UD: default: /* nothing to do */ break; } } if (attr_mask & IB_QP_TIMEOUT && qp->qp_type == IB_QPT_RC) { flags |= IBT_CEP_SET_TIMEOUT; modify_attr.qp_transport.rc.rc_path.cep_timeout = attr->timeout; } if (attr_mask & IB_QP_RETRY_CNT && qp->qp_type == IB_QPT_RC) { flags |= IBT_CEP_SET_RETRY; modify_attr.qp_transport.rc.rc_retry_cnt = attr->retry_cnt & 0x7; } if (attr_mask & IB_QP_RNR_RETRY && qp->qp_type == IB_QPT_RC) { flags |= IBT_CEP_SET_RNR_NAK_RETRY; modify_attr.qp_transport.rc.rc_rnr_retry_cnt = attr->rnr_retry & 0x7; } if (attr_mask & IB_QP_RQ_PSN) { switch (qp->qp_type) { case IB_QPT_RC: modify_attr.qp_transport.rc.rc_rq_psn = attr->rq_psn & 0xFFFFFF; break; case IB_QPT_UC: modify_attr.qp_transport.uc.uc_rq_psn = attr->rq_psn & 0xFFFFFF; break; case IB_QPT_SMI: case IB_QPT_GSI: case IB_QPT_UD: default: /* nothing to do */ break; } } if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC && qp->qp_type == IB_QPT_RC) { if (attr->max_rd_atomic) { flags |= IBT_CEP_SET_RDMARA_OUT; modify_attr.qp_transport.rc.rc_rdma_ra_out = attr->max_rd_atomic; } } if (attr_mask & IB_QP_ALT_PATH) { /* Sanity checks */ if (attr->alt_port_num == 0 || attr->alt_port_num > hattr.hca_nports) { mutex_exit(&qp->lock); ofs_lock_exit(&ofs_client->lock); SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str, "ib_modify_qp: qp: 0x%p, attr: 0x%p, " "attr_mask: 0x%x => invalid attr->alt_port_num" "(%d), max_nports(%d)", qp, attr, attr_mask, attr->alt_port_num, hattr.hca_nports); return (-EINVAL); } if (attr->alt_pkey_index >= hattr.hca_max_port_pkey_tbl_sz) { mutex_exit(&qp->lock); ofs_lock_exit(&ofs_client->lock); SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str, "ib_modify_qp: qp: 0x%p, attr: 0x%p, " "attr_mask: 0x%x => invalid attr->alt_pkey_index" "(%d), max_port_key_index(%d)", qp, attr, attr_mask, attr->alt_pkey_index, hattr.hca_max_port_pkey_tbl_sz); return (-EINVAL); } flags |= IBT_CEP_SET_ALT_PATH; switch (qp->qp_type) { case IB_QPT_RC: modify_attr.qp_transport.rc.rc_alt_path. cep_pkey_ix = attr->alt_pkey_index; modify_attr.qp_transport.rc.rc_alt_path. cep_hca_port_num = attr->alt_port_num; set_av(&attr->alt_ah_attr, &modify_attr.qp_transport.rc.rc_alt_path); modify_attr.qp_transport.rc.rc_alt_path. cep_timeout = attr->alt_timeout; break; case IB_QPT_UC: modify_attr.qp_transport.uc.uc_alt_path. cep_pkey_ix = attr->alt_pkey_index; modify_attr.qp_transport.uc.uc_alt_path. cep_hca_port_num = attr->alt_port_num; set_av(&attr->alt_ah_attr, &modify_attr.qp_transport.uc.uc_alt_path); modify_attr.qp_transport.uc.uc_alt_path. cep_timeout = attr->alt_timeout; break; case IB_QPT_SMI: case IB_QPT_GSI: case IB_QPT_UD: default: /* This should never happen */ mutex_exit(&qp->lock); ofs_lock_exit(&ofs_client->lock); SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str, "ib_modify_qp(IB_QP_ALT_PATH): qp: 0x%p, " "attr: 0x%p, attr_mask: 0x%x => " "invalid qp->qp_type(%d)", qp, attr, attr_mask, qp->qp_type); return (-EINVAL); } } if (attr_mask & IB_QP_MIN_RNR_TIMER && qp->qp_type == IB_QPT_RC) { flags |= IBT_CEP_SET_MIN_RNR_NAK; modify_attr.qp_transport.rc.rc_min_rnr_nak = attr->min_rnr_timer & 0x1F; } if (attr_mask & IB_QP_SQ_PSN) { switch (qp->qp_type) { case IB_QPT_SMI: case IB_QPT_GSI: case IB_QPT_UD: modify_attr.qp_transport.ud.ud_sq_psn = attr->sq_psn; break; case IB_QPT_RC: modify_attr.qp_transport.rc.rc_sq_psn = attr->sq_psn; break; case IB_QPT_UC: modify_attr.qp_transport.uc.uc_sq_psn = attr->sq_psn; break; default: /* This should never happen */ mutex_exit(&qp->lock); ofs_lock_exit(&ofs_client->lock); SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str, "ib_modify_qp(IB_QP_SQ_PSN): qp: 0x%p, " "attr: 0x%p, attr_mask: 0x%x => " "invalid qp->qp_type(%d)", qp, attr, attr_mask, qp->qp_type); return (-EINVAL); } } if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC && qp->qp_type == IB_QPT_RC) { /* Linux OF sets the value if max_dest_rd_atomic is not zero */ if (attr->max_dest_rd_atomic) { flags |= IBT_CEP_SET_RDMARA_IN; modify_attr.qp_transport.rc.rc_rdma_ra_in = attr->max_dest_rd_atomic; } } if (attr_mask & IB_QP_PATH_MIG_STATE) { flags |= IBT_CEP_SET_MIG; switch (qp->qp_type) { case IB_QPT_RC: modify_attr.qp_transport.rc.rc_mig_state = OF2IBTF_PATH_MIG_STATE(attr->path_mig_state); break; case IB_QPT_UC: modify_attr.qp_transport.uc.uc_mig_state = OF2IBTF_PATH_MIG_STATE(attr->path_mig_state); break; case IB_QPT_SMI: case IB_QPT_GSI: case IB_QPT_UD: default: /* This should never happen */ mutex_exit(&qp->lock); ofs_lock_exit(&ofs_client->lock); SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str, "ib_modify_qp(IB_QP_PATH_MIG_STATE): qp: 0x%p, " "attr: 0x%p, attr_mask: 0x%x => " "invalid qp->qp_type(%d)", qp, attr, attr_mask, qp->qp_type); return (-EINVAL); } } if (attr_mask & IB_QP_CAP) { /* IB_QP_CAP is not supported */ mutex_exit(&qp->lock); ofs_lock_exit(&ofs_client->lock); SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str, "ib_modify_qp: qp: 0x%p, attr: 0x%p, " "attr_mask: 0x%x => IB_QP_CAP is not supported", qp, attr, attr_mask); return (-EINVAL); } if (attr_mask & IB_QP_DEST_QPN) { switch (qp->qp_type) { case IB_QPT_RC: modify_attr.qp_transport.rc.rc_dst_qpn = attr->dest_qp_num; break; case IB_QPT_UC: modify_attr.qp_transport.uc.uc_dst_qpn = attr->dest_qp_num; break; case IB_QPT_SMI: case IB_QPT_GSI: case IB_QPT_UD: default: /* This should never happen */ mutex_exit(&qp->lock); ofs_lock_exit(&ofs_client->lock); SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str, "ib_modify_qp(IB_QP_DEST_PSN): qp: 0x%p, " "attr: 0x%p, attr_mask: 0x%x => " "invalid qp->qp_type(%d)", qp, attr, attr_mask, qp->qp_type); return (-EINVAL); } } SOL_OFS_DPRINTF_L3(sol_kverbs_dbg_str, "ib_modify_qp: qp: 0x%p, attr: 0x%p, attr_mask: 0x%x, " "flags: 0x%x, modify_attr: 0x%p", qp, attr, attr_mask, flags, &modify_attr); /* Modify the QP attributes */ rtn = ibt_modify_qp(qp->ibt_qp, flags, &modify_attr, NULL); if (rtn == IBT_SUCCESS) { mutex_exit(&qp->lock); ofs_lock_exit(&ofs_client->lock); return (0); } mutex_exit(&qp->lock); ofs_lock_exit(&ofs_client->lock); SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str, "ib_modify_qp: qp: 0x%p, attr: 0x%p, attr_mask: 0x%x => " "ibt_modify_qp failed w/ %d, flags: 0x%x", qp, attr, attr_mask, rtn, flags); switch (rtn) { case IBT_HCA_HDL_INVALID: case IBT_QP_HDL_INVALID: case IBT_QP_SRV_TYPE_INVALID: case IBT_QP_STATE_INVALID: case IBT_HCA_PORT_INVALID: case IBT_PKEY_IX_ILLEGAL: return (-EINVAL); default: return (-EIO); } } static inline enum ib_wc_status ibt2of_wc_status(ibt_wc_status_t status) { switch (status) { case IBT_WC_LOCAL_LEN_ERR: return (IB_WC_LOC_LEN_ERR); case IBT_WC_LOCAL_CHAN_OP_ERR: return (IB_WC_LOC_QP_OP_ERR); case IBT_WC_LOCAL_PROTECT_ERR: return (IB_WC_LOC_PROT_ERR); case IBT_WC_WR_FLUSHED_ERR: return (IB_WC_WR_FLUSH_ERR); case IBT_WC_MEM_WIN_BIND_ERR: return (IB_WC_MW_BIND_ERR); case IBT_WC_BAD_RESPONSE_ERR: return (IB_WC_BAD_RESP_ERR); case IBT_WC_LOCAL_ACCESS_ERR: return (IB_WC_LOC_ACCESS_ERR); case IBT_WC_REMOTE_INVALID_REQ_ERR: return (IB_WC_REM_INV_REQ_ERR); case IBT_WC_REMOTE_ACCESS_ERR: return (IB_WC_REM_ACCESS_ERR); case IBT_WC_REMOTE_OP_ERR: return (IB_WC_REM_OP_ERR); case IBT_WC_TRANS_TIMEOUT_ERR: return (IB_WC_RETRY_EXC_ERR); case IBT_WC_RNR_NAK_TIMEOUT_ERR: return (IB_WC_RNR_RETRY_EXC_ERR); case IBT_WC_SUCCESS: default: /* Hermon doesn't support EEC yet */ ASSERT(status == IBT_WC_SUCCESS); return (IB_WC_SUCCESS); } } static inline enum ib_wc_opcode ibt2of_wc_opcode(ibt_wrc_opcode_t wc_type) { switch (wc_type) { case IBT_WRC_SEND: return (IB_WC_SEND); case IBT_WRC_RDMAR: return (IB_WC_RDMA_READ); case IBT_WRC_RDMAW: return (IB_WC_RDMA_WRITE); case IBT_WRC_CSWAP: return (IB_WC_COMP_SWAP); case IBT_WRC_FADD: return (IB_WC_FETCH_ADD); case IBT_WRC_BIND: return (IB_WC_BIND_MW); case IBT_WRC_RECV: return (IB_WC_RECV); case IBT_WRC_RECV_RDMAWI: default: ASSERT(wc_type == IBT_WRC_RECV_RDMAWI); return (IB_WC_RECV_RDMA_WITH_IMM); } } static inline int ibt2of_wc_flags(ibt_wc_flags_t wc_flags) { return (wc_flags & ~IBT_WC_CKSUM_OK); } static inline void set_wc(ibt_wc_t *ibt_wc, struct ib_wc *wc) { wc->wr_id = ibt_wc->wc_id; wc->status = ibt2of_wc_status(ibt_wc->wc_status); /* opcode can be undefined if status is not success */ if (wc->status == IB_WC_SUCCESS) { wc->opcode = ibt2of_wc_opcode(ibt_wc->wc_type); } wc->vendor_err = 0; /* not supported */ wc->byte_len = ibt_wc->wc_bytes_xfer; wc->qp = NULL; /* not supported */ wc->imm_data = htonl(ibt_wc->wc_immed_data); wc->src_qp = ibt_wc->wc_qpn; wc->wc_flags = ibt2of_wc_flags(ibt_wc->wc_flags); wc->pkey_index = ibt_wc->wc_pkey_ix; wc->slid = ibt_wc->wc_slid; wc->sl = ibt_wc->wc_sl; wc->dlid_path_bits = ibt_wc->wc_path_bits; wc->port_num = 0; /* not supported */ } /* * ib_poll_cq - poll a CQ for completion(s) * @cq:the CQ being polled * @num_entries:maximum number of completions to return * @wc:array of at least @num_entries &struct ib_wc where completions * will be returned * * Poll a CQ for (possibly multiple) completions. If the return value * is < 0, an error occurred. If the return value is >= 0, it is the * number of completions returned. If the return value is * non-negative and < num_entries, then the CQ was emptied. * * Note that three following memebers in struct ib_wc are not supported * currently, and the values are always either 0 or NULL. * u32 vendor_err; * struct ib_qp *qp; * u8 port_num; */ int ib_poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc) { ibt_wc_t ibt_wc; int npolled; ibt_status_t rtn; ofs_client_t *ofs_client = (ofs_client_t *)cq->device->clnt_hdl; ofs_lock_enter(&ofs_client->lock); if (cq->device->reg_state != IB_DEV_OPEN) { ofs_lock_exit(&ofs_client->lock); SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str, "ib_poll_cq: cq: 0x%p => invalid device state (%d)", cq, cq->device->reg_state); return (-ENXIO); } SOL_OFS_DPRINTF_L3(sol_kverbs_dbg_str, "ib_poll_cq: cq: 0x%p, num_entries: %d, wc: 0x%p, " "ibt_cq: 0x%p, ibt_wc: 0x%p", cq, num_entries, wc, cq->ibt_cq, &ibt_wc); /* only one thread per cq is allowed during ibt_poll_cq() */ mutex_enter(&cq->lock); for (npolled = 0; npolled < num_entries; ++npolled) { bzero(&ibt_wc, sizeof (ibt_wc_t)); rtn = ibt_poll_cq(cq->ibt_cq, &ibt_wc, 1, NULL); if (rtn != IBT_SUCCESS) { break; } /* save this result to struct ib_wc */ set_wc(&ibt_wc, wc + npolled); } mutex_exit(&cq->lock); ofs_lock_exit(&ofs_client->lock); if (rtn == IBT_SUCCESS || rtn == IBT_CQ_EMPTY) { return (npolled); } SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str, "ib_poll_cq: cq: 0x%p, num_entries: %d, wc: 0x%p => " "ibt_poll_cq failed w/ %d, npolled = %d", cq, num_entries, wc, rtn, npolled); switch (rtn) { case IBT_HCA_HDL_INVALID: case IBT_CQ_HDL_INVALID: case IBT_INVALID_PARAM: return (-EINVAL); default: return (-EIO); } } ibt_hca_hdl_t ib_get_ibt_hca_hdl(struct ib_device *device) { return (device->hca_hdl); } ibt_channel_hdl_t ib_get_ibt_channel_hdl(struct rdma_cm_id *cm) { return (cm->qp == NULL ? NULL : cm->qp->ibt_qp); }