/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. */ #include #include #include #include #include #include #include #include #include #include /* * iser_ib.c * Routines for InfiniBand transport for iSER * * This file contains the routines to interface with the IBT API to attach and * allocate IB resources, handle async events, and post recv work requests. * */ static iser_hca_t *iser_ib_gid2hca(ib_gid_t gid); static iser_hca_t *iser_ib_guid2hca(ib_guid_t guid); static iser_hca_t *iser_ib_alloc_hca(ib_guid_t guid); static int iser_ib_free_hca(iser_hca_t *hca); static int iser_ib_update_hcaports(iser_hca_t *hca); static int iser_ib_init_hcas(void); static int iser_ib_fini_hcas(void); static iser_sbind_t *iser_ib_get_bind( iser_svc_t *iser_svc, ib_guid_t hca_guid, ib_gid_t gid); static int iser_ib_activate_port( idm_svc_t *idm_svc, ib_guid_t guid, ib_gid_t gid); static void iser_ib_deactivate_port(ib_guid_t hca_guid, ib_gid_t gid); static void iser_ib_init_qp(iser_chan_t *chan, uint_t sq_size, uint_t rq_size); static void iser_ib_fini_qp(iser_qp_t *qp); static int iser_ib_setup_cq(ibt_hca_hdl_t hca_hdl, uint_t cq_size, ibt_cq_hdl_t *cq_hdl); static void iser_ib_setup_chanargs(uint8_t hca_port, ibt_cq_hdl_t scq_hdl, ibt_cq_hdl_t rcq_hdl, uint_t sq_size, uint_t rq_size, ibt_pd_hdl_t hca_pdhdl, ibt_rc_chan_alloc_args_t *cargs); static void iser_ib_handle_portup_event(ibt_hca_hdl_t hdl, ibt_async_event_t *event); static void iser_ib_handle_portdown_event(ibt_hca_hdl_t hdl, ibt_async_event_t *event); static void iser_ib_handle_hca_detach_event(ibt_hca_hdl_t hdl, ibt_async_event_t *event); static void iser_ib_post_recv_task(void *arg); static struct ibt_clnt_modinfo_s iser_ib_modinfo = { IBTI_V_CURR, IBT_STORAGE_DEV, iser_ib_async_handler, NULL, "iSER" }; /* * iser_ib_init * * This function registers the HCA drivers with IBTF and registers and binds * iSER as a service with IBTF. */ int iser_ib_init(void) { int status; /* Register with IBTF */ status = ibt_attach(&iser_ib_modinfo, iser_state->is_dip, iser_state, &iser_state->is_ibhdl); if (status != DDI_SUCCESS) { ISER_LOG(CE_NOTE, "iser_ib_init: ibt_attach failed (0x%x)", status); return (DDI_FAILURE); } /* Create the global work request kmem_cache */ iser_state->iser_wr_cache = kmem_cache_create("iser_wr_cache", sizeof (iser_wr_t), 0, NULL, NULL, NULL, iser_state, NULL, KM_SLEEP); /* Populate our list of HCAs */ status = iser_ib_init_hcas(); if (status != DDI_SUCCESS) { /* HCAs failed to initialize, tear it down */ kmem_cache_destroy(iser_state->iser_wr_cache); (void) ibt_detach(iser_state->is_ibhdl); iser_state->is_ibhdl = NULL; ISER_LOG(CE_NOTE, "iser_ib_init: failed to initialize HCAs"); return (DDI_FAILURE); } /* Target will register iSER as a service with IBTF when required */ /* Target will bind this service when it comes online */ return (DDI_SUCCESS); } /* * iser_ib_fini * * This function unbinds and degisters the iSER service from IBTF */ int iser_ib_fini(void) { /* IDM would have already disabled all the services */ /* Teardown the HCA list and associated resources */ if (iser_ib_fini_hcas() != DDI_SUCCESS) return (DDI_FAILURE); /* Teardown the global work request kmem_cache */ kmem_cache_destroy(iser_state->iser_wr_cache); /* Deregister with IBTF */ if (iser_state->is_ibhdl != NULL) { (void) ibt_detach(iser_state->is_ibhdl); iser_state->is_ibhdl = NULL; } return (DDI_SUCCESS); } /* * iser_ib_register_service * * This function registers the iSER service using the RDMA-Aware Service ID. */ int iser_ib_register_service(idm_svc_t *idm_svc) { ibt_srv_desc_t srvdesc; iser_svc_t *iser_svc; int status; bzero(&srvdesc, sizeof (ibt_srv_desc_t)); /* Set up IBTI client callback handler from the CM */ srvdesc.sd_handler = iser_ib_cm_handler; srvdesc.sd_flags = IBT_SRV_NO_FLAGS; iser_svc = (iser_svc_t *)idm_svc->is_iser_svc; /* Register the service on the specified port */ status = ibt_register_service( iser_state->is_ibhdl, &srvdesc, iser_svc->is_svcid, 1, &iser_svc->is_srvhdl, NULL); return (status); } /* * iser_ib_bind_service * * This function binds a given iSER service on all available HCA ports. The * current specification does not allow user to specify transport bindings * for each iscsi target. The ULP invokes this function to bind the target * to all available iser ports after checking for the presence of an IB HCA. * iSER is "configured" whenever an IB-capable IP address exists. The lack * of active IB ports is a less-fatal condition, and sockets would be used * as the transport even though an Infiniband HCA is configured but unusable. * */ int iser_ib_bind_service(idm_svc_t *idm_svc) { iser_hca_t *hca; ib_gid_t gid; int num_ports = 0; int num_binds = 0; int num_inactive_binds = 0; /* if HCA ports inactive */ int status; int i; ASSERT(idm_svc != NULL); ASSERT(idm_svc->is_iser_svc != NULL); /* Register the iSER service on all available ports */ mutex_enter(&iser_state->is_hcalist_lock); for (hca = list_head(&iser_state->is_hcalist); hca != NULL; hca = list_next(&iser_state->is_hcalist, hca)) { for (i = 0; i < hca->hca_num_ports; i++) { num_ports++; if (hca->hca_port_info[i].p_linkstate != IBT_PORT_ACTIVE) { /* * Move on. We will attempt to bind service * in our async handler if the port comes up * at a later time. */ num_inactive_binds++; continue; } gid = hca->hca_port_info[i].p_sgid_tbl[0]; /* If the port is already bound, skip */ if (iser_ib_get_bind( idm_svc->is_iser_svc, hca->hca_guid, gid) == NULL) { status = iser_ib_activate_port( idm_svc, hca->hca_guid, gid); if (status != IBT_SUCCESS) { ISER_LOG(CE_NOTE, "iser_ib_bind_service: " "iser_ib_activate_port failure " "(0x%x)", status); continue; } } num_binds++; } } mutex_exit(&iser_state->is_hcalist_lock); if (num_binds) { ISER_LOG(CE_NOTE, "iser_ib_bind_service: Service available on " "(%d) of (%d) ports", num_binds, num_ports); return (ISER_STATUS_SUCCESS); } else if (num_inactive_binds) { ISER_LOG(CE_NOTE, "iser_ib_bind_service: Could not bind " "service, HCA ports are not active."); /* * still considered success, the async handler will bind * the service when the port comes up at a later time */ return (ISER_STATUS_SUCCESS); } else { ISER_LOG(CE_NOTE, "iser_ib_bind_service: Did not bind service"); return (ISER_STATUS_FAIL); } } /* * iser_ib_unbind_service * * This function unbinds a given service on a all HCA ports */ void iser_ib_unbind_service(idm_svc_t *idm_svc) { iser_svc_t *iser_svc; iser_sbind_t *is_sbind, *next_sb; if (idm_svc != NULL && idm_svc->is_iser_svc != NULL) { iser_svc = idm_svc->is_iser_svc; for (is_sbind = list_head(&iser_svc->is_sbindlist); is_sbind != NULL; is_sbind = next_sb) { next_sb = list_next(&iser_svc->is_sbindlist, is_sbind); (void) ibt_unbind_service(iser_svc->is_srvhdl, is_sbind->is_sbindhdl); list_remove(&iser_svc->is_sbindlist, is_sbind); kmem_free(is_sbind, sizeof (iser_sbind_t)); } } } /* ARGSUSED */ void iser_ib_deregister_service(idm_svc_t *idm_svc) { iser_svc_t *iser_svc; if (idm_svc != NULL && idm_svc->is_iser_svc != NULL) { iser_svc = (iser_svc_t *)idm_svc->is_iser_svc; (void) ibt_deregister_service(iser_state->is_ibhdl, iser_svc->is_srvhdl); (void) ibt_release_ip_sid(iser_svc->is_svcid); } } /* * iser_ib_get_paths * This function finds the IB path between the local and the remote address. * */ int iser_ib_get_paths(ibt_ip_addr_t *local_ip, ibt_ip_addr_t *remote_ip, ibt_path_info_t *path, ibt_path_ip_src_t *path_src_ip) { ibt_ip_path_attr_t ipattr; int status; (void) bzero(&ipattr, sizeof (ibt_ip_path_attr_t)); ipattr.ipa_dst_ip = remote_ip; ipattr.ipa_src_ip = *local_ip; ipattr.ipa_max_paths = 1; ipattr.ipa_ndst = 1; (void) bzero(path, sizeof (ibt_path_info_t)); status = ibt_get_ip_paths(iser_state->is_ibhdl, IBT_PATH_NO_FLAGS, &ipattr, path, NULL, path_src_ip); if (status != IBT_SUCCESS) { ISER_LOG(CE_NOTE, "ibt_get_ip_paths: ibt_get_ip_paths " "failure: status (%d)", status); return (status); } if (local_ip != NULL) { ISER_LOG(CE_NOTE, "iser_ib_get_paths success: IP[%x to %x]", local_ip->un.ip4addr, remote_ip->un.ip4addr); } else { ISER_LOG(CE_NOTE, "iser_ib_get_paths success: " "IP[INADDR_ANY to %x]", remote_ip->un.ip4addr); } return (ISER_STATUS_SUCCESS); } /* * iser_ib_alloc_channel_nopathlookup * * This function allocates a reliable connected channel. This function does * not invoke ibt_get_ip_paths() to do the path lookup. The HCA GUID and * port are input to this function. */ iser_chan_t * iser_ib_alloc_channel_nopathlookup(ib_guid_t hca_guid, uint8_t hca_port) { iser_hca_t *hca; iser_chan_t *chan; /* Lookup the hca using the gid in the path info */ hca = iser_ib_guid2hca(hca_guid); if (hca == NULL) { ISER_LOG(CE_NOTE, "iser_ib_alloc_channel_nopathlookup: failed " "to lookup HCA(%llx) handle", (longlong_t)hca_guid); return (NULL); } chan = iser_ib_alloc_rc_channel(hca, hca_port); if (chan == NULL) { ISER_LOG(CE_NOTE, "iser_ib_alloc_channel_nopathlookup: failed " "to alloc channel on HCA(%llx) %d", (longlong_t)hca_guid, hca_port); return (NULL); } ISER_LOG(CE_NOTE, "iser_ib_alloc_channel_pathlookup success: " "chanhdl (0x%p), HCA(%llx) %d", (void *)chan->ic_chanhdl, (longlong_t)hca_guid, hca_port); return (chan); } /* * iser_ib_alloc_channel_pathlookup * * This function allocates a reliable connected channel but first invokes * ibt_get_ip_paths() with the given local and remote addres to get the * HCA lgid and the port number. */ iser_chan_t * iser_ib_alloc_channel_pathlookup( ibt_ip_addr_t *local_ip, ibt_ip_addr_t *remote_ip) { ibt_path_info_t ibt_path; ibt_path_ip_src_t path_src_ip; ib_gid_t lgid; uint8_t hca_port; /* from path */ iser_hca_t *hca; iser_chan_t *chan; int status; /* Lookup a path to the given destination */ status = iser_ib_get_paths( local_ip, remote_ip, &ibt_path, &path_src_ip); if (status != ISER_STATUS_SUCCESS) { ISER_LOG(CE_NOTE, "iser_ib_alloc_channel_pathlookup: faild " "Path lookup IP:[%llx to %llx] failed: status (%d)", (longlong_t)local_ip->un.ip4addr, (longlong_t)remote_ip->un.ip4addr, status); return (NULL); } /* get the local gid from the path info */ lgid = ibt_path.pi_prim_cep_path.cep_adds_vect.av_sgid; /* get the hca port from the path info */ hca_port = ibt_path.pi_prim_cep_path.cep_hca_port_num; /* Lookup the hca using the gid in the path info */ hca = iser_ib_gid2hca(lgid); if (hca == NULL) { ISER_LOG(CE_NOTE, "iser_ib_alloc_channel_pathlookup: failed " "to lookup HCA (%llx) handle", (longlong_t)hca->hca_guid); return (NULL); } chan = iser_ib_alloc_rc_channel(hca, hca_port); if (chan == NULL) { ISER_LOG(CE_NOTE, "iser_ib_alloc_channel_pathlookup: failed " "to alloc channel from IP:[%llx to %llx] on HCA (%llx) %d", (longlong_t)local_ip->un.ip4addr, (longlong_t)remote_ip->un.ip4addr, (longlong_t)hca->hca_guid, hca_port); return (NULL); } ISER_LOG(CE_NOTE, "iser_ib_alloc_channel_pathlookup success: " "chanhdl (0x%p), IP:[%llx to %llx], lgid (%llx:%llx), HCA(%llx) %d", (void *)chan->ic_chanhdl, (longlong_t)local_ip->un.ip4addr, (longlong_t)remote_ip->un.ip4addr, (longlong_t)lgid.gid_prefix, (longlong_t)lgid.gid_guid, (longlong_t)hca->hca_guid, hca_port); chan->ic_ibt_path = ibt_path; chan->ic_localip = path_src_ip.ip_primary; chan->ic_remoteip = *remote_ip; return (chan); } /* * iser_ib_alloc_rc_channel * * This function allocates a reliable communication channel using the specified * channel attributes. */ iser_chan_t * iser_ib_alloc_rc_channel(iser_hca_t *hca, uint8_t hca_port) { iser_chan_t *chan; ibt_rc_chan_alloc_args_t chanargs; uint_t sq_size, rq_size; int status; chan = kmem_zalloc(sizeof (iser_chan_t), KM_SLEEP); mutex_init(&chan->ic_chan_lock, NULL, MUTEX_DRIVER, NULL); mutex_init(&chan->ic_sq_post_lock, NULL, MUTEX_DRIVER, NULL); /* Set up the iSER channel handle with HCA */ chan->ic_hca = hca; /* * Determine the queue sizes, based upon the HCA query data. * For our Work Queues, we will use either our default value, * or the HCA's maximum value, whichever is smaller. */ sq_size = min(hca->hca_attr.hca_max_chan_sz, ISER_IB_SENDQ_SIZE); rq_size = min(hca->hca_attr.hca_max_chan_sz, ISER_IB_RECVQ_SIZE); /* * For our Completion Queues, we again check the device maximum. * We want to end up with CQs that are the next size up from the * WQs they are servicing so that they have some overhead. */ if (hca->hca_attr.hca_max_cq_sz >= (sq_size + 1)) { chan->ic_sendcq_sz = sq_size + 1; } else { chan->ic_sendcq_sz = hca->hca_attr.hca_max_cq_sz; sq_size = chan->ic_sendcq_sz - 1; } if (hca->hca_attr.hca_max_cq_sz >= (rq_size + 1)) { chan->ic_recvcq_sz = rq_size + 1; } else { chan->ic_recvcq_sz = hca->hca_attr.hca_max_cq_sz; rq_size = chan->ic_recvcq_sz - 1; } /* Initialize the iSER channel's QP handle */ iser_ib_init_qp(chan, sq_size, rq_size); /* Set up the Send Completion Queue */ status = iser_ib_setup_cq(hca->hca_hdl, chan->ic_sendcq_sz, &chan->ic_sendcq); if (status != ISER_STATUS_SUCCESS) { iser_ib_fini_qp(&chan->ic_qp); mutex_destroy(&chan->ic_chan_lock); mutex_destroy(&chan->ic_sq_post_lock); kmem_free(chan, sizeof (iser_chan_t)); return (NULL); } ibt_set_cq_handler(chan->ic_sendcq, iser_ib_sendcq_handler, chan); (void) ibt_enable_cq_notify(chan->ic_sendcq, IBT_NEXT_COMPLETION); /* Set up the Receive Completion Queue */ status = iser_ib_setup_cq(hca->hca_hdl, chan->ic_recvcq_sz, &chan->ic_recvcq); if (status != ISER_STATUS_SUCCESS) { (void) ibt_free_cq(chan->ic_sendcq); iser_ib_fini_qp(&chan->ic_qp); mutex_destroy(&chan->ic_chan_lock); mutex_destroy(&chan->ic_sq_post_lock); kmem_free(chan, sizeof (iser_chan_t)); return (NULL); } ibt_set_cq_handler(chan->ic_recvcq, iser_ib_recvcq_handler, chan); (void) ibt_enable_cq_notify(chan->ic_recvcq, IBT_NEXT_COMPLETION); /* Setup the channel arguments */ iser_ib_setup_chanargs(hca_port, chan->ic_sendcq, chan->ic_recvcq, sq_size, rq_size, hca->hca_pdhdl, &chanargs); status = ibt_alloc_rc_channel(hca->hca_hdl, IBT_ACHAN_NO_FLAGS, &chanargs, &chan->ic_chanhdl, NULL); if (status != IBT_SUCCESS) { ISER_LOG(CE_NOTE, "iser_ib_alloc_rc_channel: failed " "ibt_alloc_rc_channel: status (%d)", status); (void) ibt_free_cq(chan->ic_sendcq); (void) ibt_free_cq(chan->ic_recvcq); iser_ib_fini_qp(&chan->ic_qp); mutex_destroy(&chan->ic_chan_lock); mutex_destroy(&chan->ic_sq_post_lock); kmem_free(chan, sizeof (iser_chan_t)); return (NULL); } /* Set the 'channel' as the client private data */ (void) ibt_set_chan_private(chan->ic_chanhdl, chan); return (chan); } /* * iser_ib_open_rc_channel * This function opens a RC connection on the given allocated RC channel */ int iser_ib_open_rc_channel(iser_chan_t *chan) { ibt_ip_cm_info_t ipcm_info; iser_private_data_t iser_priv_data; ibt_chan_open_args_t ocargs; ibt_rc_returns_t ocreturns; int status; mutex_enter(&chan->ic_chan_lock); /* * For connection establishment, the initiator sends a CM REQ using the * iSER RDMA-Aware Service ID. Included are the source and destination * IP addresses, and the src port. */ bzero(&ipcm_info, sizeof (ibt_ip_cm_info_t)); ipcm_info.src_addr = chan->ic_localip; ipcm_info.dst_addr = chan->ic_remoteip; ipcm_info.src_port = chan->ic_lport; /* * The CM Private Data field defines the iSER connection parameters * such as zero based virtual address exception (ZBVAE) and Send with * invalidate Exception (SIE). * * Solaris IBT does not currently support ZBVAE or SIE. */ iser_priv_data.rsvd1 = 0; iser_priv_data.sie = 1; iser_priv_data.zbvae = 1; status = ibt_format_ip_private_data(&ipcm_info, sizeof (iser_private_data_t), &iser_priv_data); if (status != IBT_SUCCESS) { ISER_LOG(CE_NOTE, "iser_ib_open_rc_channel failed: %d", status); mutex_exit(&chan->ic_chan_lock); return (status); } /* * Set the SID we are attempting to connect to, based upon the * remote port number. */ chan->ic_ibt_path.pi_sid = ibt_get_ip_sid(IPPROTO_TCP, chan->ic_rport); /* Set up the args for the channel open */ bzero(&ocargs, sizeof (ibt_chan_open_args_t)); ocargs.oc_path = &chan->ic_ibt_path; ocargs.oc_cm_handler = iser_ib_cm_handler; ocargs.oc_cm_clnt_private = iser_state; ocargs.oc_rdma_ra_out = 4; ocargs.oc_rdma_ra_in = 4; ocargs.oc_path_retry_cnt = 2; ocargs.oc_path_rnr_retry_cnt = 2; ocargs.oc_priv_data_len = sizeof (iser_private_data_t); ocargs.oc_priv_data = &iser_priv_data; bzero(&ocreturns, sizeof (ibt_rc_returns_t)); status = ibt_open_rc_channel(chan->ic_chanhdl, IBT_OCHAN_NO_FLAGS, IBT_BLOCKING, &ocargs, &ocreturns); if (status != IBT_SUCCESS) { ISER_LOG(CE_NOTE, "iser_ib_open_rc_channel failed: %d", status); mutex_exit(&chan->ic_chan_lock); return (status); } mutex_exit(&chan->ic_chan_lock); return (IDM_STATUS_SUCCESS); } /* * iser_ib_close_rc_channel * This function closes the RC channel related to this iser_chan handle. * We invoke this in a non-blocking, no callbacks context. */ void iser_ib_close_rc_channel(iser_chan_t *chan) { int status; mutex_enter(&chan->ic_chan_lock); status = ibt_close_rc_channel(chan->ic_chanhdl, IBT_BLOCKING, NULL, 0, NULL, NULL, 0); if (status != IBT_SUCCESS) { ISER_LOG(CE_NOTE, "iser_ib_close_rc_channel: " "ibt_close_rc_channel failed: status (%d)", status); } mutex_exit(&chan->ic_chan_lock); } /* * iser_ib_free_rc_channel * * This function tears down an RC channel's QP initialization and frees it. * Note that we do not need synchronization here; the channel has been * closed already, so we should only have completion polling occuring. Once * complete, we are free to free the IBTF channel, WQ and CQ resources, and * our own related resources. */ void iser_ib_free_rc_channel(iser_chan_t *chan) { iser_qp_t *iser_qp; iser_qp = &chan->ic_qp; /* Ensure the SQ is empty */ while (chan->ic_sq_post_count != 0) { mutex_exit(&chan->ic_conn->ic_lock); delay(drv_usectohz(ISER_DELAY_HALF_SECOND)); mutex_enter(&chan->ic_conn->ic_lock); } mutex_destroy(&chan->ic_sq_post_lock); /* Ensure the RQ is empty */ (void) ibt_flush_channel(chan->ic_chanhdl); mutex_enter(&iser_qp->qp_lock); while (iser_qp->rq_level != 0) { mutex_exit(&iser_qp->qp_lock); mutex_exit(&chan->ic_conn->ic_lock); delay(drv_usectohz(ISER_DELAY_HALF_SECOND)); mutex_enter(&chan->ic_conn->ic_lock); mutex_enter(&iser_qp->qp_lock); } /* Free our QP handle */ mutex_exit(&iser_qp->qp_lock); (void) iser_ib_fini_qp(iser_qp); /* Free the IBT channel resources */ (void) ibt_free_channel(chan->ic_chanhdl); chan->ic_chanhdl = NULL; /* Free the CQs */ (void) ibt_free_cq(chan->ic_sendcq); (void) ibt_free_cq(chan->ic_recvcq); /* Free the chan handle */ mutex_destroy(&chan->ic_chan_lock); kmem_free(chan, sizeof (iser_chan_t)); } /* * iser_ib_post_recv * * This function handles keeping the RQ full on a given channel. * This routine will mostly be run on a taskq, and will check the * current fill level of the RQ, and post as many WRs as necessary * to fill it again. */ int iser_ib_post_recv_async(ibt_channel_hdl_t chanhdl) { iser_chan_t *chan; int status; /* Pull our iSER channel handle from the private data */ chan = (iser_chan_t *)ibt_get_chan_private(chanhdl); /* * Caller must check that chan->ic_conn->ic_stage indicates * the connection is active (not closing, not closed) and * it must hold the mutex cross the check and the call to this function */ ASSERT(mutex_owned(&chan->ic_conn->ic_lock)); ASSERT((chan->ic_conn->ic_stage >= ISER_CONN_STAGE_ALLOCATED) && (chan->ic_conn->ic_stage <= ISER_CONN_STAGE_LOGGED_IN)); idm_conn_hold(chan->ic_conn->ic_idmc); status = ddi_taskq_dispatch(iser_taskq, iser_ib_post_recv_task, (void *)chanhdl, DDI_NOSLEEP); if (status != DDI_SUCCESS) { idm_conn_rele(chan->ic_conn->ic_idmc); } return (status); } static void iser_ib_post_recv_task(void *arg) { ibt_channel_hdl_t chanhdl = arg; iser_chan_t *chan; /* Pull our iSER channel handle from the private data */ chan = (iser_chan_t *)ibt_get_chan_private(chanhdl); iser_ib_post_recv(chanhdl); idm_conn_rele(chan->ic_conn->ic_idmc); } void iser_ib_post_recv(ibt_channel_hdl_t chanhdl) { iser_chan_t *chan; iser_hca_t *hca; iser_msg_t *msg; ibt_recv_wr_t *wrlist, wr[ISER_IB_RQ_POST_MAX]; int rq_space, msg_ret; int total_num, npost; uint_t nposted; int status, i; iser_qp_t *iser_qp; /* Pull our iSER channel handle from the private data */ chan = (iser_chan_t *)ibt_get_chan_private(chanhdl); ASSERT(chan != NULL); mutex_enter(&chan->ic_conn->ic_lock); /* Bail out if the connection is closed; no need for more recv WRs */ if ((chan->ic_conn->ic_stage == ISER_CONN_STAGE_CLOSING) || (chan->ic_conn->ic_stage == ISER_CONN_STAGE_CLOSED)) { mutex_exit(&chan->ic_conn->ic_lock); return; } /* get the QP handle from the iser_chan */ iser_qp = &chan->ic_qp; hca = chan->ic_hca; if (hca == NULL) { ISER_LOG(CE_NOTE, "iser_ib_post_recv: unable to retrieve " "HCA handle"); mutex_exit(&chan->ic_conn->ic_lock); return; } /* check for space to post on the RQ */ mutex_enter(&iser_qp->qp_lock); rq_space = iser_qp->rq_depth - iser_qp->rq_level; if (rq_space == 0) { /* The RQ is full, clear the pending flag and return */ iser_qp->rq_taskqpending = B_FALSE; mutex_exit(&iser_qp->qp_lock); mutex_exit(&chan->ic_conn->ic_lock); return; } /* Keep track of the lowest value for rq_min_post_level */ if (iser_qp->rq_level < iser_qp->rq_min_post_level) iser_qp->rq_min_post_level = iser_qp->rq_level; mutex_exit(&iser_qp->qp_lock); /* we've room to post, so pull from the msg cache */ msg = iser_msg_get(hca, rq_space, &msg_ret); if (msg == NULL) { ISER_LOG(CE_NOTE, "iser_ib_post_recv: no message handles " "available in msg cache currently"); /* * There are no messages on the cache. Wait a half- * second, then try again. */ delay(drv_usectohz(ISER_DELAY_HALF_SECOND)); status = iser_ib_post_recv_async(chanhdl); if (status != DDI_SUCCESS) { ISER_LOG(CE_NOTE, "iser_ib_post_recv: failed to " "redispatch routine"); /* Failed to dispatch, clear pending flag */ mutex_enter(&iser_qp->qp_lock); iser_qp->rq_taskqpending = B_FALSE; mutex_exit(&iser_qp->qp_lock); } mutex_exit(&chan->ic_conn->ic_lock); return; } if (msg_ret != rq_space) { ISER_LOG(CE_NOTE, "iser_ib_post_recv: requested number of " "messages not allocated: requested (%d) allocated (%d)", rq_space, msg_ret); /* We got some, but not all, of our requested depth */ rq_space = msg_ret; } /* * Now, walk through the allocated WRs and post them, * ISER_IB_RQ_POST_MAX (or less) at a time. */ wrlist = &wr[0]; total_num = rq_space; while (total_num) { /* determine the number to post on this iteration */ npost = (total_num > ISER_IB_RQ_POST_MAX) ? ISER_IB_RQ_POST_MAX : total_num; /* build a list of WRs from the msg list */ for (i = 0; i < npost; i++) { wrlist[i].wr_id = (ibt_wrid_t)(uintptr_t)msg; wrlist[i].wr_nds = ISER_IB_SGLIST_SIZE; wrlist[i].wr_sgl = &msg->msg_ds; msg = msg->nextp; } /* post the list to the RQ */ nposted = 0; status = ibt_post_recv(chanhdl, wrlist, npost, &nposted); if ((status != IBT_SUCCESS) || (nposted != npost)) { ISER_LOG(CE_NOTE, "iser_ib_post_recv: ibt_post_recv " "failed: requested (%d) posted (%d) status (%d)", npost, nposted, status); total_num -= nposted; break; } /* decrement total number to post by the number posted */ total_num -= nposted; } mutex_enter(&iser_qp->qp_lock); if (total_num != 0) { ISER_LOG(CE_NOTE, "iser_ib_post_recv: unable to fill RQ, " "failed to post (%d) WRs", total_num); iser_qp->rq_level += rq_space - total_num; } else { iser_qp->rq_level += rq_space; } /* * Now that we've filled the RQ, check that all of the recv WRs * haven't just been immediately consumed. If so, taskqpending is * still B_TRUE, so we need to fire off a taskq thread to post * more WRs. */ if (iser_qp->rq_level == 0) { mutex_exit(&iser_qp->qp_lock); status = iser_ib_post_recv_async(chanhdl); if (status != DDI_SUCCESS) { ISER_LOG(CE_NOTE, "iser_ib_post_recv: failed to " "dispatch followup routine"); /* Failed to dispatch, clear pending flag */ mutex_enter(&iser_qp->qp_lock); iser_qp->rq_taskqpending = B_FALSE; mutex_exit(&iser_qp->qp_lock); } } else { /* * We're done, we've filled the RQ. Clear the taskq * flag so that we can run again. */ iser_qp->rq_taskqpending = B_FALSE; mutex_exit(&iser_qp->qp_lock); } mutex_exit(&chan->ic_conn->ic_lock); } /* * iser_ib_handle_portup_event() * This handles the IBT_EVENT_PORT_UP unaffiliated asynchronous event. * * To facilitate a seamless bringover of the port and configure the CM service * for inbound iSER service requests on this newly active port, the existing * IDM services will be checked for iSER support. * If an iSER service was already created, then this service will simply be * bound to the gid of the newly active port. If on the other hand, the CM * service did not exist, i.e. only socket communication, then a new CM * service will be first registered with the saved service parameters and * then bound to the newly active port. * */ /* ARGSUSED */ static void iser_ib_handle_portup_event(ibt_hca_hdl_t hdl, ibt_async_event_t *event) { iser_hca_t *hca; ib_gid_t gid; idm_svc_t *idm_svc; int status; ISER_LOG(CE_NOTE, "iser_ib_handle_portup_event: HCA(0x%llx) port(%d)", (longlong_t)event->ev_hca_guid, event->ev_port); /* * Query all ports on the HCA and update the port information * maintainted in the iser_hca_t structure */ hca = iser_ib_guid2hca(event->ev_hca_guid); if (hca == NULL) { /* HCA is just made available, first port on that HCA */ hca = iser_ib_alloc_hca(event->ev_hca_guid); if (hca == NULL) { ISER_LOG(CE_NOTE, "iser_ib_handle_portup_event " "iser_ib_alloc_hca failed: HCA(0x%llx) port(%d)", (longlong_t)event->ev_hca_guid, event->ev_port); return; } mutex_enter(&iser_state->is_hcalist_lock); list_insert_tail(&iser_state->is_hcalist, hca); iser_state->is_num_hcas++; mutex_exit(&iser_state->is_hcalist_lock); } else { status = iser_ib_update_hcaports(hca); if (status != IBT_SUCCESS) { ISER_LOG(CE_NOTE, "iser_ib_handle_portup_event " "status(0x%x): iser_ib_update_hcaports failed: " "HCA(0x%llx) port(%d)", status, (longlong_t)event->ev_hca_guid, event->ev_port); return; } } gid = hca->hca_port_info[event->ev_port - 1].p_sgid_tbl[0]; /* * Iterate through the global list of IDM target services * and check for existing iSER CM service. */ mutex_enter(&idm.idm_global_mutex); for (idm_svc = list_head(&idm.idm_tgt_svc_list); idm_svc != NULL; idm_svc = list_next(&idm.idm_tgt_svc_list, idm_svc)) { if (idm_svc->is_iser_svc == NULL) { /* Establish a new CM service for iSER requests */ status = iser_tgt_svc_create( &idm_svc->is_svc_req, idm_svc); if (status != IBT_SUCCESS) { ISER_LOG(CE_NOTE, "iser_ib_handle_portup_event " "status(0x%x): iser_tgt_svc_create failed: " "HCA(0x%llx) port(%d)", status, (longlong_t)event->ev_hca_guid, event->ev_port); continue; } } status = iser_ib_activate_port( idm_svc, event->ev_hca_guid, gid); if (status != IBT_SUCCESS) { ISER_LOG(CE_NOTE, "iser_ib_handle_portup_event " "status(0x%x): Bind service on port " "(%llx:%llx) failed", status, (longlong_t)gid.gid_prefix, (longlong_t)gid.gid_guid); continue; } ISER_LOG(CE_NOTE, "iser_ib_handle_portup_event: service bound " "HCA(0x%llx) port(%d)", (longlong_t)event->ev_hca_guid, event->ev_port); } mutex_exit(&idm.idm_global_mutex); ISER_LOG(CE_NOTE, "iser_ib_handle_portup_event success: " "HCA(0x%llx) port(%d)", (longlong_t)event->ev_hca_guid, event->ev_port); } /* * iser_ib_handle_portdown_event() * This handles the IBT_EVENT_PORT_DOWN unaffiliated asynchronous error. * * Unconfigure the CM service on the deactivated port and teardown the * connections that are using the CM service. */ /* ARGSUSED */ static void iser_ib_handle_portdown_event(ibt_hca_hdl_t hdl, ibt_async_event_t *event) { iser_hca_t *hca; ib_gid_t gid; int status; /* * Query all ports on the HCA and update the port information * maintainted in the iser_hca_t structure */ hca = iser_ib_guid2hca(event->ev_hca_guid); ASSERT(hca != NULL); status = iser_ib_update_hcaports(hca); if (status != IBT_SUCCESS) { ISER_LOG(CE_NOTE, "iser_ib_handle_portdown_event status(0x%x): " "ibt_ib_update_hcaports failed: HCA(0x%llx) port(%d)", status, (longlong_t)event->ev_hca_guid, event->ev_port); return; } /* get the gid of the new port */ gid = hca->hca_port_info[event->ev_port - 1].p_sgid_tbl[0]; iser_ib_deactivate_port(event->ev_hca_guid, gid); ISER_LOG(CE_NOTE, "iser_ib_handle_portdown_event success: " "HCA(0x%llx) port(%d)", (longlong_t)event->ev_hca_guid, event->ev_port); } /* * iser_ib_handle_hca_detach_event() * Quiesce all activity bound for the port, teardown the connection, unbind * iSER services on all ports and release the HCA handle. */ /* ARGSUSED */ static void iser_ib_handle_hca_detach_event(ibt_hca_hdl_t hdl, ibt_async_event_t *event) { iser_hca_t *nexthca, *hca; int i, status; ISER_LOG(CE_NOTE, "iser_ib_handle_hca_detach_event: HCA(0x%llx)", (longlong_t)event->ev_hca_guid); hca = iser_ib_guid2hca(event->ev_hca_guid); for (i = 0; i < hca->hca_num_ports; i++) { iser_ib_deactivate_port(hca->hca_guid, hca->hca_port_info[i].p_sgid_tbl[0]); } /* * Update the HCA list maintained in the iser_state. Free the * resources allocated to the HCA, i.e. caches, protection domain */ mutex_enter(&iser_state->is_hcalist_lock); for (hca = list_head(&iser_state->is_hcalist); hca != NULL; hca = nexthca) { nexthca = list_next(&iser_state->is_hcalist, hca); if (hca->hca_guid == event->ev_hca_guid) { list_remove(&iser_state->is_hcalist, hca); iser_state->is_num_hcas--; status = iser_ib_free_hca(hca); if (status != DDI_SUCCESS) { ISER_LOG(CE_WARN, "iser_ib_handle_hca_detach: " "Failed to free hca(%p)", (void *)hca); list_insert_tail(&iser_state->is_hcalist, hca); iser_state->is_num_hcas++; } /* No way to return status to IBT if this fails */ } } mutex_exit(&iser_state->is_hcalist_lock); } /* * iser_ib_async_handler * An IBT Asynchronous Event handler is registered it with the framework and * passed via the ibt_attach() routine. This function handles the following * asynchronous events. * IBT_EVENT_PORT_UP * IBT_ERROR_PORT_DOWN * IBT_HCA_ATTACH_EVENT * IBT_HCA_DETACH_EVENT */ /* ARGSUSED */ void iser_ib_async_handler(void *clntp, ibt_hca_hdl_t hdl, ibt_async_code_t code, ibt_async_event_t *event) { switch (code) { case IBT_EVENT_PORT_UP: iser_ib_handle_portup_event(hdl, event); break; case IBT_ERROR_PORT_DOWN: iser_ib_handle_portdown_event(hdl, event); break; case IBT_HCA_ATTACH_EVENT: /* * A new HCA device is available for use, ignore this * event because the corresponding IBT_EVENT_PORT_UP * events will get triggered and handled accordingly. */ break; case IBT_HCA_DETACH_EVENT: iser_ib_handle_hca_detach_event(hdl, event); break; default: break; } } /* * iser_ib_init_hcas * * This function opens all the HCA devices, gathers the HCA state information * and adds the HCA handle for each HCA found in the iser_soft_state. */ static int iser_ib_init_hcas(void) { ib_guid_t *guid; int num_hcas; int i; iser_hca_t *hca; /* Retrieve the HCA list */ num_hcas = ibt_get_hca_list(&guid); if (num_hcas == 0) { /* * This shouldn't happen, but might if we have all HCAs * detach prior to initialization. */ return (DDI_FAILURE); } /* Initialize the hcalist lock */ mutex_init(&iser_state->is_hcalist_lock, NULL, MUTEX_DRIVER, NULL); /* Create the HCA list */ list_create(&iser_state->is_hcalist, sizeof (iser_hca_t), offsetof(iser_hca_t, hca_node)); for (i = 0; i < num_hcas; i++) { ISER_LOG(CE_NOTE, "iser_ib_init_hcas: initializing HCA " "(0x%llx)", (longlong_t)guid[i]); hca = iser_ib_alloc_hca(guid[i]); if (hca == NULL) { /* This shouldn't happen, teardown and fail */ (void) iser_ib_fini_hcas(); (void) ibt_free_hca_list(guid, num_hcas); return (DDI_FAILURE); } mutex_enter(&iser_state->is_hcalist_lock); list_insert_tail(&iser_state->is_hcalist, hca); iser_state->is_num_hcas++; mutex_exit(&iser_state->is_hcalist_lock); } /* Free the IBT HCA list */ (void) ibt_free_hca_list(guid, num_hcas); /* Check that we've initialized at least one HCA */ mutex_enter(&iser_state->is_hcalist_lock); if (list_is_empty(&iser_state->is_hcalist)) { ISER_LOG(CE_NOTE, "iser_ib_init_hcas: failed to initialize " "any HCAs"); mutex_exit(&iser_state->is_hcalist_lock); (void) iser_ib_fini_hcas(); return (DDI_FAILURE); } mutex_exit(&iser_state->is_hcalist_lock); return (DDI_SUCCESS); } /* * iser_ib_fini_hcas * * Teardown the iSER HCA list initialized above. */ static int iser_ib_fini_hcas(void) { iser_hca_t *nexthca, *hca; int status; mutex_enter(&iser_state->is_hcalist_lock); for (hca = list_head(&iser_state->is_hcalist); hca != NULL; hca = nexthca) { nexthca = list_next(&iser_state->is_hcalist, hca); list_remove(&iser_state->is_hcalist, hca); status = iser_ib_free_hca(hca); if (status != IBT_SUCCESS) { ISER_LOG(CE_NOTE, "iser_ib_fini_hcas: failed to free " "HCA during fini"); list_insert_tail(&iser_state->is_hcalist, hca); return (DDI_FAILURE); } iser_state->is_num_hcas--; } mutex_exit(&iser_state->is_hcalist_lock); list_destroy(&iser_state->is_hcalist); mutex_destroy(&iser_state->is_hcalist_lock); return (DDI_SUCCESS); } /* * iser_ib_alloc_hca * * This function opens the given HCA device, gathers the HCA state information * and adds the HCA handle */ static iser_hca_t * iser_ib_alloc_hca(ib_guid_t guid) { iser_hca_t *hca; int status; /* Allocate an iser_hca_t HCA handle */ hca = (iser_hca_t *)kmem_zalloc(sizeof (iser_hca_t), KM_SLEEP); /* Open this HCA */ status = ibt_open_hca(iser_state->is_ibhdl, guid, &hca->hca_hdl); if (status != IBT_SUCCESS) { ISER_LOG(CE_NOTE, "iser_ib_alloc_hca: ibt_open_hca failed:" " guid (0x%llx) status (0x%x)", (longlong_t)guid, status); kmem_free(hca, sizeof (iser_hca_t)); return (NULL); } hca->hca_guid = guid; hca->hca_clnt_hdl = iser_state->is_ibhdl; /* Query the HCA */ status = ibt_query_hca(hca->hca_hdl, &hca->hca_attr); if (status != IBT_SUCCESS) { ISER_LOG(CE_NOTE, "iser_ib_alloc_hca: ibt_query_hca " "failure: guid (0x%llx) status (0x%x)", (longlong_t)guid, status); (void) ibt_close_hca(hca->hca_hdl); kmem_free(hca, sizeof (iser_hca_t)); return (NULL); } /* Query all ports on the HCA */ status = ibt_query_hca_ports(hca->hca_hdl, 0, &hca->hca_port_info, &hca->hca_num_ports, &hca->hca_port_info_sz); if (status != IBT_SUCCESS) { ISER_LOG(CE_NOTE, "iser_ib_alloc_hca: " "ibt_query_hca_ports failure: guid (0x%llx) " "status (0x%x)", (longlong_t)guid, status); (void) ibt_close_hca(hca->hca_hdl); kmem_free(hca, sizeof (iser_hca_t)); return (NULL); } /* Allocate a single PD on this HCA */ status = ibt_alloc_pd(hca->hca_hdl, IBT_PD_NO_FLAGS, &hca->hca_pdhdl); if (status != IBT_SUCCESS) { ISER_LOG(CE_NOTE, "iser_ib_alloc_hca: ibt_alloc_pd " "failure: guid (0x%llx) status (0x%x)", (longlong_t)guid, status); (void) ibt_close_hca(hca->hca_hdl); ibt_free_portinfo(hca->hca_port_info, hca->hca_port_info_sz); kmem_free(hca, sizeof (iser_hca_t)); return (NULL); } /* Initialize the message and data MR caches for this HCA */ iser_init_hca_caches(hca); return (hca); } static int iser_ib_free_hca(iser_hca_t *hca) { int status; ibt_hca_portinfo_t *hca_port_info; uint_t hca_port_info_sz; ASSERT(hca != NULL); if (hca->hca_failed) return (DDI_FAILURE); hca_port_info = hca->hca_port_info; hca_port_info_sz = hca->hca_port_info_sz; /* * Free the memory regions before freeing * the associated protection domain */ iser_fini_hca_caches(hca); status = ibt_free_pd(hca->hca_hdl, hca->hca_pdhdl); if (status != IBT_SUCCESS) { ISER_LOG(CE_NOTE, "iser_ib_free_hca: failed to free PD " "status=0x%x", status); goto out_caches; } status = ibt_close_hca(hca->hca_hdl); if (status != IBT_SUCCESS) { ISER_LOG(CE_NOTE, "iser_ib_fini_hcas: failed to close HCA " "status=0x%x", status); goto out_pd; } ibt_free_portinfo(hca_port_info, hca_port_info_sz); kmem_free(hca, sizeof (iser_hca_t)); return (DDI_SUCCESS); /* * We only managed to partially tear down the HCA, try to put it back * like it was before returning. */ out_pd: status = ibt_alloc_pd(hca->hca_hdl, IBT_PD_NO_FLAGS, &hca->hca_pdhdl); if (status != IBT_SUCCESS) { hca->hca_failed = B_TRUE; /* Report error and exit */ ISER_LOG(CE_NOTE, "iser_ib_free_hca: could not re-alloc PD " "status=0x%x", status); return (DDI_FAILURE); } out_caches: iser_init_hca_caches(hca); return (DDI_FAILURE); } static int iser_ib_update_hcaports(iser_hca_t *hca) { ibt_hca_portinfo_t *pinfop, *oldpinfop; uint_t size, oldsize, nport; int status; ASSERT(hca != NULL); status = ibt_query_hca_ports(hca->hca_hdl, 0, &pinfop, &nport, &size); if (status != IBT_SUCCESS) { ISER_LOG(CE_NOTE, "ibt_query_hca_ports failed: %d", status); return (status); } oldpinfop = hca->hca_port_info; oldsize = hca->hca_port_info_sz; hca->hca_port_info = pinfop; hca->hca_port_info_sz = size; (void) ibt_free_portinfo(oldpinfop, oldsize); return (IBT_SUCCESS); } /* * iser_ib_gid2hca * Given a gid, find the corresponding hca */ iser_hca_t * iser_ib_gid2hca(ib_gid_t gid) { iser_hca_t *hca; int i; mutex_enter(&iser_state->is_hcalist_lock); for (hca = list_head(&iser_state->is_hcalist); hca != NULL; hca = list_next(&iser_state->is_hcalist, hca)) { for (i = 0; i < hca->hca_num_ports; i++) { if ((hca->hca_port_info[i].p_sgid_tbl[0].gid_prefix == gid.gid_prefix) && (hca->hca_port_info[i].p_sgid_tbl[0].gid_guid == gid.gid_guid)) { mutex_exit(&iser_state->is_hcalist_lock); return (hca); } } } mutex_exit(&iser_state->is_hcalist_lock); return (NULL); } /* * iser_ib_guid2hca * Given a HCA guid, find the corresponding HCA */ iser_hca_t * iser_ib_guid2hca(ib_guid_t guid) { iser_hca_t *hca; mutex_enter(&iser_state->is_hcalist_lock); for (hca = list_head(&iser_state->is_hcalist); hca != NULL; hca = list_next(&iser_state->is_hcalist, hca)) { if (hca->hca_guid == guid) { mutex_exit(&iser_state->is_hcalist_lock); return (hca); } } mutex_exit(&iser_state->is_hcalist_lock); return (NULL); } /* * iser_ib_conv_sockaddr2ibtaddr * This function converts a socket address into the IBT format */ void iser_ib_conv_sockaddr2ibtaddr( idm_sockaddr_t *saddr, ibt_ip_addr_t *ibt_addr) { if (saddr == NULL) { ibt_addr->family = AF_UNSPEC; ibt_addr->un.ip4addr = 0; } else { switch (saddr->sin.sa_family) { case AF_INET: ibt_addr->family = saddr->sin4.sin_family; ibt_addr->un.ip4addr = saddr->sin4.sin_addr.s_addr; break; case AF_INET6: ibt_addr->family = saddr->sin6.sin6_family; ibt_addr->un.ip6addr = saddr->sin6.sin6_addr; break; default: ibt_addr->family = AF_UNSPEC; } } } /* * iser_ib_conv_ibtaddr2sockaddr * This function converts an IBT ip address handle to a sockaddr */ void iser_ib_conv_ibtaddr2sockaddr(struct sockaddr_storage *ss, ibt_ip_addr_t *ibt_addr, in_port_t port) { struct sockaddr_in *sin; struct sockaddr_in6 *sin6; switch (ibt_addr->family) { case AF_INET: case AF_UNSPEC: sin = (struct sockaddr_in *)ibt_addr; sin->sin_port = ntohs(port); bcopy(sin, ss, sizeof (struct sockaddr_in)); break; case AF_INET6: sin6 = (struct sockaddr_in6 *)ibt_addr; sin6->sin6_port = ntohs(port); bcopy(sin6, ss, sizeof (struct sockaddr_in6)); break; default: ISER_LOG(CE_NOTE, "iser_ib_conv_ibtaddr2sockaddr: " "unknown family type: 0x%x", ibt_addr->family); } } /* * iser_ib_setup_cq * This function sets up the Completion Queue size and allocates the specified * Completion Queue */ static int iser_ib_setup_cq(ibt_hca_hdl_t hca_hdl, uint_t cq_size, ibt_cq_hdl_t *cq_hdl) { ibt_cq_attr_t cq_attr; int status; cq_attr.cq_size = cq_size; cq_attr.cq_sched = 0; cq_attr.cq_flags = IBT_CQ_NO_FLAGS; /* Allocate a Completion Queue */ status = ibt_alloc_cq(hca_hdl, &cq_attr, cq_hdl, NULL); if (status != IBT_SUCCESS) { ISER_LOG(CE_NOTE, "iser_ib_setup_cq: ibt_alloc_cq failure (%d)", status); return (status); } return (ISER_STATUS_SUCCESS); } /* * iser_ib_setup_chanargs * */ static void iser_ib_setup_chanargs(uint8_t hca_port, ibt_cq_hdl_t scq_hdl, ibt_cq_hdl_t rcq_hdl, uint_t sq_size, uint_t rq_size, ibt_pd_hdl_t hca_pdhdl, ibt_rc_chan_alloc_args_t *cargs) { bzero(cargs, sizeof (ibt_rc_chan_alloc_args_t)); /* * Set up the size of the channels send queue, receive queue and the * maximum number of elements in a scatter gather list of work requests * posted to the send and receive queues. */ cargs->rc_sizes.cs_sq = sq_size; cargs->rc_sizes.cs_rq = rq_size; cargs->rc_sizes.cs_sq_sgl = ISER_IB_SGLIST_SIZE; cargs->rc_sizes.cs_rq_sgl = ISER_IB_SGLIST_SIZE; /* * All Work requests signaled on a WR basis will receive a send * request completion. */ cargs->rc_flags = IBT_ALL_SIGNALED; /* Enable RDMA read and RDMA write on the channel end points */ cargs->rc_control = IBT_CEP_RDMA_RD | IBT_CEP_RDMA_WR; /* Set the local hca port on which the channel is allocated */ cargs->rc_hca_port_num = hca_port; /* Set the Send and Receive Completion Queue handles */ cargs->rc_scq = scq_hdl; cargs->rc_rcq = rcq_hdl; /* Set the protection domain associated with the channel */ cargs->rc_pd = hca_pdhdl; /* No SRQ usage */ cargs->rc_srq = NULL; } /* * iser_ib_init_qp * Initialize the QP handle */ void iser_ib_init_qp(iser_chan_t *chan, uint_t sq_size, uint_t rq_size) { /* Initialize the handle lock */ mutex_init(&chan->ic_qp.qp_lock, NULL, MUTEX_DRIVER, NULL); /* Record queue sizes */ chan->ic_qp.sq_size = sq_size; chan->ic_qp.rq_size = rq_size; /* Initialize the RQ monitoring data */ chan->ic_qp.rq_depth = rq_size; chan->ic_qp.rq_level = 0; chan->ic_qp.rq_lwm = (chan->ic_recvcq_sz * ISER_IB_RQ_LWM_PCT) / 100; /* Initialize the taskq flag */ chan->ic_qp.rq_taskqpending = B_FALSE; } /* * iser_ib_fini_qp * Teardown the QP handle */ void iser_ib_fini_qp(iser_qp_t *qp) { /* Destroy the handle lock */ mutex_destroy(&qp->qp_lock); } static int iser_ib_activate_port(idm_svc_t *idm_svc, ib_guid_t guid, ib_gid_t gid) { iser_svc_t *iser_svc; iser_sbind_t *is_sbind; int status; iser_svc = idm_svc->is_iser_svc; /* * Save the address of the service bind handle in the * iser_svc_t to undo the service binding at a later time */ is_sbind = kmem_zalloc(sizeof (iser_sbind_t), KM_SLEEP); is_sbind->is_gid = gid; is_sbind->is_guid = guid; status = ibt_bind_service(iser_svc->is_srvhdl, gid, NULL, idm_svc, &is_sbind->is_sbindhdl); if (status != IBT_SUCCESS) { ISER_LOG(CE_NOTE, "iser_ib_activate_port: status(0x%x): " "Bind service(%llx) on port(%llx:%llx) failed", status, (longlong_t)iser_svc->is_svcid, (longlong_t)gid.gid_prefix, (longlong_t)gid.gid_guid); kmem_free(is_sbind, sizeof (iser_sbind_t)); return (status); } list_insert_tail(&iser_svc->is_sbindlist, is_sbind); return (IBT_SUCCESS); } static void iser_ib_deactivate_port(ib_guid_t hca_guid, ib_gid_t gid) { iser_svc_t *iser_svc; iser_conn_t *iser_conn; iser_sbind_t *is_sbind; idm_conn_t *idm_conn; /* * Iterate through the global list of IDM target connections. * Issue a TRANSPORT_FAIL for any connections on this port, and * if there is a bound service running on the port, tear it down. */ mutex_enter(&idm.idm_global_mutex); for (idm_conn = list_head(&idm.idm_tgt_conn_list); idm_conn != NULL; idm_conn = list_next(&idm.idm_tgt_conn_list, idm_conn)) { if (idm_conn->ic_transport_type != IDM_TRANSPORT_TYPE_ISER) { /* this is not an iSER connection, skip it */ continue; } iser_conn = idm_conn->ic_transport_private; if (iser_conn->ic_chan->ic_ibt_path.pi_hca_guid != hca_guid) { /* this iSER connection is on a different port */ continue; } /* Fail the transport for this connection */ idm_conn_event(idm_conn, CE_TRANSPORT_FAIL, IDM_STATUS_FAIL); if (idm_conn->ic_conn_type == CONN_TYPE_INI) { /* initiator connection, nothing else to do */ continue; } /* Check for a service binding */ iser_svc = idm_conn->ic_svc_binding->is_iser_svc; is_sbind = iser_ib_get_bind(iser_svc, hca_guid, gid); if (is_sbind != NULL) { /* This service is still bound, tear it down */ (void) ibt_unbind_service(iser_svc->is_srvhdl, is_sbind->is_sbindhdl); list_remove(&iser_svc->is_sbindlist, is_sbind); kmem_free(is_sbind, sizeof (iser_sbind_t)); } } mutex_exit(&idm.idm_global_mutex); } static iser_sbind_t * iser_ib_get_bind(iser_svc_t *iser_svc, ib_guid_t hca_guid, ib_gid_t gid) { iser_sbind_t *is_sbind; for (is_sbind = list_head(&iser_svc->is_sbindlist); is_sbind != NULL; is_sbind = list_next(&iser_svc->is_sbindlist, is_sbind)) { if ((is_sbind->is_guid == hca_guid) && (is_sbind->is_gid.gid_prefix == gid.gid_prefix) && (is_sbind->is_gid.gid_guid == gid.gid_guid)) { return (is_sbind); } } return (NULL); }