1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include <sys/types.h> 27 #include <sys/ddi.h> 28 #include <sys/types.h> 29 #include <sys/socket.h> 30 #include <netinet/in.h> 31 #include <sys/sunddi.h> 32 #include <sys/sysmacros.h> 33 #include <sys/iscsi_protocol.h> 34 35 #include <sys/ib/clients/iser/iser.h> 36 #include <sys/ib/clients/iser/iser_idm.h> 37 38 /* 39 * iser_ib.c 40 * Routines for InfiniBand transport for iSER 41 * 42 * This file contains the routines to interface with the IBT API to attach and 43 * allocate IB resources, handle async events, and post recv work requests. 44 * 45 */ 46 47 static iser_hca_t *iser_ib_gid2hca(ib_gid_t gid); 48 static iser_hca_t *iser_ib_guid2hca(ib_guid_t guid); 49 50 static iser_hca_t *iser_ib_alloc_hca(ib_guid_t guid); 51 static int iser_ib_free_hca(iser_hca_t *hca); 52 static int iser_ib_update_hcaports(iser_hca_t *hca); 53 static int iser_ib_init_hcas(void); 54 static int iser_ib_fini_hcas(void); 55 56 static iser_sbind_t *iser_ib_get_bind( 57 iser_svc_t *iser_svc, ib_guid_t hca_guid, ib_gid_t gid); 58 static int iser_ib_activate_port( 59 idm_svc_t *idm_svc, ib_guid_t guid, ib_gid_t gid); 60 static void iser_ib_deactivate_port(ib_guid_t hca_guid, ib_gid_t gid); 61 62 static void iser_ib_init_qp(iser_chan_t *chan, uint_t sq_size, uint_t rq_size); 63 static void iser_ib_fini_qp(iser_qp_t *qp); 64 65 static int iser_ib_setup_cq(ibt_hca_hdl_t hca_hdl, uint_t cq_size, 66 ibt_cq_hdl_t *cq_hdl); 67 68 static void iser_ib_setup_chanargs(uint8_t hca_port, ibt_cq_hdl_t scq_hdl, 69 ibt_cq_hdl_t rcq_hdl, uint_t sq_size, uint_t rq_size, 70 ibt_pd_hdl_t hca_pdhdl, ibt_rc_chan_alloc_args_t *cargs); 71 72 static void iser_ib_handle_portup_event(ibt_hca_hdl_t hdl, 73 ibt_async_event_t *event); 74 static void iser_ib_handle_portdown_event(ibt_hca_hdl_t hdl, 75 ibt_async_event_t *event); 76 static void iser_ib_handle_hca_detach_event(ibt_hca_hdl_t hdl, 77 ibt_async_event_t *event); 78 79 static struct ibt_clnt_modinfo_s iser_ib_modinfo = { 80 IBTI_V_CURR, 81 IBT_STORAGE_DEV, 82 iser_ib_async_handler, 83 NULL, 84 "iSER" 85 }; 86 87 /* 88 * iser_ib_init 89 * 90 * This function registers the HCA drivers with IBTF and registers and binds 91 * iSER as a service with IBTF. 92 */ 93 int 94 iser_ib_init(void) 95 { 96 int status; 97 98 /* Register with IBTF */ 99 status = ibt_attach(&iser_ib_modinfo, iser_state->is_dip, iser_state, 100 &iser_state->is_ibhdl); 101 if (status != DDI_SUCCESS) { 102 ISER_LOG(CE_NOTE, "iser_ib_init: ibt_attach failed (0x%x)", 103 status); 104 return (DDI_FAILURE); 105 } 106 107 /* Create the global work request kmem_cache */ 108 iser_state->iser_wr_cache = kmem_cache_create("iser_wr_cache", 109 sizeof (iser_wr_t), 0, NULL, NULL, NULL, 110 iser_state, NULL, KM_SLEEP); 111 112 /* Populate our list of HCAs */ 113 status = iser_ib_init_hcas(); 114 if (status != DDI_SUCCESS) { 115 /* HCAs failed to initialize, tear it down */ 116 kmem_cache_destroy(iser_state->iser_wr_cache); 117 (void) ibt_detach(iser_state->is_ibhdl); 118 iser_state->is_ibhdl = NULL; 119 ISER_LOG(CE_NOTE, "iser_ib_init: failed to initialize HCAs"); 120 return (DDI_FAILURE); 121 } 122 123 /* Target will register iSER as a service with IBTF when required */ 124 125 /* Target will bind this service when it comes online */ 126 127 return (DDI_SUCCESS); 128 } 129 130 /* 131 * iser_ib_fini 132 * 133 * This function unbinds and degisters the iSER service from IBTF 134 */ 135 int 136 iser_ib_fini(void) 137 { 138 /* IDM would have already disabled all the services */ 139 140 /* Teardown the HCA list and associated resources */ 141 if (iser_ib_fini_hcas() != DDI_SUCCESS) 142 return (DDI_FAILURE); 143 144 /* Teardown the global work request kmem_cache */ 145 kmem_cache_destroy(iser_state->iser_wr_cache); 146 147 /* Deregister with IBTF */ 148 if (iser_state->is_ibhdl != NULL) { 149 (void) ibt_detach(iser_state->is_ibhdl); 150 iser_state->is_ibhdl = NULL; 151 } 152 153 return (DDI_SUCCESS); 154 } 155 156 /* 157 * iser_ib_register_service 158 * 159 * This function registers the iSER service using the RDMA-Aware Service ID. 160 */ 161 int 162 iser_ib_register_service(idm_svc_t *idm_svc) 163 { 164 ibt_srv_desc_t srvdesc; 165 iser_svc_t *iser_svc; 166 int status; 167 168 bzero(&srvdesc, sizeof (ibt_srv_desc_t)); 169 170 /* Set up IBTI client callback handler from the CM */ 171 srvdesc.sd_handler = iser_ib_cm_handler; 172 173 srvdesc.sd_flags = IBT_SRV_NO_FLAGS; 174 175 iser_svc = (iser_svc_t *)idm_svc->is_iser_svc; 176 177 /* Register the service on the specified port */ 178 status = ibt_register_service( 179 iser_state->is_ibhdl, &srvdesc, 180 iser_svc->is_svcid, 1, &iser_svc->is_srvhdl, NULL); 181 182 return (status); 183 } 184 185 /* 186 * iser_ib_bind_service 187 * 188 * This function binds a given iSER service on all available HCA ports 189 */ 190 int 191 iser_ib_bind_service(idm_svc_t *idm_svc) 192 { 193 iser_hca_t *hca; 194 ib_gid_t gid; 195 int num_ports = 0; 196 int num_binds = 0; 197 int status; 198 int i; 199 200 ASSERT(idm_svc != NULL); 201 ASSERT(idm_svc->is_iser_svc != NULL); 202 203 /* Register the iSER service on all available ports */ 204 mutex_enter(&iser_state->is_hcalist_lock); 205 206 for (hca = list_head(&iser_state->is_hcalist); 207 hca != NULL; 208 hca = list_next(&iser_state->is_hcalist, hca)) { 209 210 for (i = 0; i < hca->hca_num_ports; i++) { 211 num_ports++; 212 if (hca->hca_port_info[i].p_linkstate != 213 IBT_PORT_ACTIVE) { 214 /* 215 * Move on. We will attempt to bind service 216 * in our async handler if the port comes up 217 * at a later time. 218 */ 219 continue; 220 } 221 222 gid = hca->hca_port_info[i].p_sgid_tbl[0]; 223 224 /* If the port is already bound, skip */ 225 if (iser_ib_get_bind( 226 idm_svc->is_iser_svc, hca->hca_guid, gid) == NULL) { 227 228 status = iser_ib_activate_port( 229 idm_svc, hca->hca_guid, gid); 230 if (status != IBT_SUCCESS) { 231 ISER_LOG(CE_NOTE, 232 "iser_ib_bind_service: " 233 "iser_ib_activate_port failure " 234 "(0x%x)", status); 235 continue; 236 } 237 } 238 num_binds++; 239 } 240 } 241 mutex_exit(&iser_state->is_hcalist_lock); 242 243 if (num_binds) { 244 ISER_LOG(CE_NOTE, "iser_ib_bind_service: Service available on " 245 "(%d) of (%d) ports", num_binds, num_ports); 246 return (ISER_STATUS_SUCCESS); 247 } else { 248 ISER_LOG(CE_NOTE, "iser_ib_bind_service: Did not bind service"); 249 return (ISER_STATUS_FAIL); 250 } 251 } 252 253 /* 254 * iser_ib_unbind_service 255 * 256 * This function unbinds a given service on a all HCA ports 257 */ 258 void 259 iser_ib_unbind_service(idm_svc_t *idm_svc) 260 { 261 iser_svc_t *iser_svc; 262 iser_sbind_t *is_sbind, *next_sb; 263 264 if (idm_svc != NULL && idm_svc->is_iser_svc != NULL) { 265 266 iser_svc = idm_svc->is_iser_svc; 267 268 for (is_sbind = list_head(&iser_svc->is_sbindlist); 269 is_sbind != NULL; 270 is_sbind = next_sb) { 271 next_sb = list_next(&iser_svc->is_sbindlist, is_sbind); 272 ibt_unbind_service(iser_svc->is_srvhdl, 273 is_sbind->is_sbindhdl); 274 list_remove(&iser_svc->is_sbindlist, is_sbind); 275 kmem_free(is_sbind, sizeof (iser_sbind_t)); 276 } 277 } 278 } 279 280 /* ARGSUSED */ 281 void 282 iser_ib_deregister_service(idm_svc_t *idm_svc) 283 { 284 iser_svc_t *iser_svc; 285 286 if (idm_svc != NULL && idm_svc->is_iser_svc != NULL) { 287 288 iser_svc = (iser_svc_t *)idm_svc->is_iser_svc; 289 ibt_deregister_service(iser_state->is_ibhdl, 290 iser_svc->is_srvhdl); 291 ibt_release_ip_sid(iser_svc->is_svcid); 292 } 293 } 294 295 /* 296 * iser_ib_get_paths 297 * This function finds the IB path between the local and the remote address. 298 * 299 */ 300 int 301 iser_ib_get_paths(ibt_ip_addr_t *local_ip, ibt_ip_addr_t *remote_ip, 302 ibt_path_info_t *path, ibt_path_ip_src_t *path_src_ip) 303 { 304 ibt_ip_path_attr_t ipattr; 305 int status; 306 307 (void) bzero(&ipattr, sizeof (ibt_ip_path_attr_t)); 308 ipattr.ipa_dst_ip = remote_ip; 309 ipattr.ipa_src_ip = *local_ip; 310 ipattr.ipa_max_paths = 1; 311 ipattr.ipa_ndst = 1; 312 313 (void) bzero(path, sizeof (ibt_path_info_t)); 314 status = ibt_get_ip_paths(iser_state->is_ibhdl, IBT_PATH_NO_FLAGS, 315 &ipattr, path, NULL, path_src_ip); 316 if (status != IBT_SUCCESS) { 317 ISER_LOG(CE_NOTE, "ibt_get_ip_paths: ibt_get_ip_paths " 318 "failure: status (%d)", status); 319 return (status); 320 } 321 322 if (local_ip != NULL) { 323 ISER_LOG(CE_NOTE, "iser_ib_get_paths success: IP[%x to %x]", 324 local_ip->un.ip4addr, remote_ip->un.ip4addr); 325 } else { 326 ISER_LOG(CE_NOTE, "iser_ib_get_paths success: " 327 "IP[INADDR_ANY to %x]", remote_ip->un.ip4addr); 328 } 329 330 return (ISER_STATUS_SUCCESS); 331 } 332 333 /* 334 * iser_ib_alloc_rc_channel 335 * 336 * This function allocates a reliable communication channel using the specified 337 * channel attributes. 338 */ 339 iser_chan_t * 340 iser_ib_alloc_rc_channel(ibt_ip_addr_t *local_ip, ibt_ip_addr_t *remote_ip) 341 { 342 343 iser_chan_t *chan; 344 ib_gid_t lgid; 345 uint8_t hca_port; /* from path */ 346 iser_hca_t *hca; 347 ibt_path_ip_src_t path_src_ip; 348 ibt_rc_chan_alloc_args_t chanargs; 349 uint_t sq_size, rq_size; 350 int status; 351 352 chan = kmem_zalloc(sizeof (iser_chan_t), KM_SLEEP); 353 354 mutex_init(&chan->ic_lock, NULL, MUTEX_DRIVER, NULL); 355 mutex_init(&chan->ic_sq_post_lock, NULL, MUTEX_DRIVER, NULL); 356 357 /* Lookup a path to the given destination */ 358 status = iser_ib_get_paths(local_ip, remote_ip, &chan->ic_ibt_path, 359 &path_src_ip); 360 361 if (status != ISER_STATUS_SUCCESS) { 362 ISER_LOG(CE_NOTE, "iser_ib_get_paths failed: status (%d)", 363 status); 364 mutex_destroy(&chan->ic_lock); 365 mutex_destroy(&chan->ic_sq_post_lock); 366 kmem_free(chan, sizeof (iser_chan_t)); 367 return (NULL); 368 } 369 370 /* get the local gid from the path info */ 371 lgid = chan->ic_ibt_path.pi_prim_cep_path.cep_adds_vect.av_sgid; 372 373 /* get the hca port from the path info */ 374 hca_port = chan->ic_ibt_path.pi_prim_cep_path.cep_hca_port_num; 375 376 /* Lookup the hca using the gid in the path info */ 377 hca = iser_ib_gid2hca(lgid); 378 if (hca == NULL) { 379 ISER_LOG(CE_NOTE, "iser_ib_alloc_rc_channel: failed " 380 "to lookup HCA handle"); 381 mutex_destroy(&chan->ic_lock); 382 mutex_destroy(&chan->ic_sq_post_lock); 383 kmem_free(chan, sizeof (iser_chan_t)); 384 return (NULL); 385 } 386 387 /* Set up the iSER channel handle with HCA and IP data */ 388 chan->ic_hca = hca; 389 chan->ic_localip = path_src_ip.ip_primary; 390 chan->ic_remoteip = *remote_ip; 391 392 /* 393 * Determine the queue sizes, based upon the HCA query data. 394 * For our Work Queues, we will use either our default value, 395 * or the HCA's maximum value, whichever is smaller. 396 */ 397 sq_size = min(hca->hca_attr.hca_max_chan_sz, ISER_IB_SENDQ_SIZE); 398 rq_size = min(hca->hca_attr.hca_max_chan_sz, ISER_IB_RECVQ_SIZE); 399 400 /* 401 * For our Completion Queues, we again check the device maximum. 402 * We want to end up with CQs that are the next size up from the 403 * WQs they are servicing so that they have some overhead. 404 */ 405 if (hca->hca_attr.hca_max_cq_sz >= (sq_size + 1)) { 406 chan->ic_sendcq_sz = sq_size + 1; 407 } else { 408 chan->ic_sendcq_sz = hca->hca_attr.hca_max_cq_sz; 409 sq_size = chan->ic_sendcq_sz - 1; 410 } 411 412 if (hca->hca_attr.hca_max_cq_sz >= (rq_size + 1)) { 413 chan->ic_recvcq_sz = rq_size + 1; 414 } else { 415 chan->ic_recvcq_sz = hca->hca_attr.hca_max_cq_sz; 416 rq_size = chan->ic_recvcq_sz - 1; 417 } 418 419 /* Initialize the iSER channel's QP handle */ 420 iser_ib_init_qp(chan, sq_size, rq_size); 421 422 /* Set up the Send Completion Queue */ 423 status = iser_ib_setup_cq(hca->hca_hdl, chan->ic_sendcq_sz, 424 &chan->ic_sendcq); 425 if (status != ISER_STATUS_SUCCESS) { 426 iser_ib_fini_qp(&chan->ic_qp); 427 mutex_destroy(&chan->ic_lock); 428 mutex_destroy(&chan->ic_sq_post_lock); 429 kmem_free(chan, sizeof (iser_chan_t)); 430 return (NULL); 431 } 432 ibt_set_cq_handler(chan->ic_sendcq, iser_ib_sendcq_handler, chan); 433 ibt_enable_cq_notify(chan->ic_sendcq, IBT_NEXT_COMPLETION); 434 435 /* Set up the Receive Completion Queue */ 436 status = iser_ib_setup_cq(hca->hca_hdl, chan->ic_recvcq_sz, 437 &chan->ic_recvcq); 438 if (status != ISER_STATUS_SUCCESS) { 439 (void) ibt_free_cq(chan->ic_sendcq); 440 iser_ib_fini_qp(&chan->ic_qp); 441 mutex_destroy(&chan->ic_lock); 442 mutex_destroy(&chan->ic_sq_post_lock); 443 kmem_free(chan, sizeof (iser_chan_t)); 444 return (NULL); 445 } 446 ibt_set_cq_handler(chan->ic_recvcq, iser_ib_recvcq_handler, chan); 447 ibt_enable_cq_notify(chan->ic_recvcq, IBT_NEXT_COMPLETION); 448 449 /* Setup the channel arguments */ 450 iser_ib_setup_chanargs(hca_port, chan->ic_sendcq, chan->ic_recvcq, 451 sq_size, rq_size, hca->hca_pdhdl, &chanargs); 452 453 status = ibt_alloc_rc_channel(hca->hca_hdl, 454 IBT_ACHAN_NO_FLAGS, &chanargs, &chan->ic_chanhdl, NULL); 455 if (status != IBT_SUCCESS) { 456 ISER_LOG(CE_NOTE, "iser_ib_alloc_rc_channel: failed " 457 "ibt_alloc_rc_channel: status (%d)", status); 458 (void) ibt_free_cq(chan->ic_sendcq); 459 (void) ibt_free_cq(chan->ic_recvcq); 460 iser_ib_fini_qp(&chan->ic_qp); 461 mutex_destroy(&chan->ic_lock); 462 mutex_destroy(&chan->ic_sq_post_lock); 463 kmem_free(chan, sizeof (iser_chan_t)); 464 return (NULL); 465 } 466 467 /* Set the 'channel' as the client private data */ 468 (void) ibt_set_chan_private(chan->ic_chanhdl, chan); 469 470 ISER_LOG(CE_NOTE, "iser_ib_alloc_rc_channel success: " 471 "chanhdl (0x%p), IP:[%llx to %llx], lgid (%llx:%llx), HCA(%llx) %d", 472 (void *)chan->ic_chanhdl, 473 (longlong_t)local_ip->un.ip4addr, 474 (longlong_t)remote_ip->un.ip4addr, 475 (longlong_t)lgid.gid_prefix, (longlong_t)lgid.gid_guid, 476 (longlong_t)hca->hca_guid, hca_port); 477 478 return (chan); 479 } 480 481 /* 482 * iser_ib_open_rc_channel 483 * This function opens a RC connection on the given allocated RC channel 484 */ 485 int 486 iser_ib_open_rc_channel(iser_chan_t *chan) 487 { 488 ibt_ip_cm_info_t ipcm_info; 489 iser_private_data_t iser_priv_data; 490 ibt_chan_open_args_t ocargs; 491 ibt_rc_returns_t ocreturns; 492 int status; 493 494 mutex_enter(&chan->ic_lock); 495 496 /* 497 * For connection establishment, the initiator sends a CM REQ using the 498 * iSER RDMA-Aware Service ID. Included are the source and destination 499 * IP addresses, and the src port. 500 */ 501 bzero(&ipcm_info, sizeof (ibt_ip_cm_info_t)); 502 ipcm_info.src_addr = chan->ic_localip; 503 ipcm_info.dst_addr = chan->ic_remoteip; 504 ipcm_info.src_port = chan->ic_lport; 505 506 /* 507 * The CM Private Data field defines the iSER connection parameters 508 * such as zero based virtual address exception (ZBVAE) and Send with 509 * invalidate Exception (SIE). 510 * 511 * Solaris IBT does not currently support ZBVAE or SIE. 512 */ 513 iser_priv_data.rsvd1 = 0; 514 iser_priv_data.sie = 1; 515 iser_priv_data.zbvae = 1; 516 517 status = ibt_format_ip_private_data(&ipcm_info, 518 sizeof (iser_private_data_t), &iser_priv_data); 519 if (status != IBT_SUCCESS) { 520 ISER_LOG(CE_NOTE, "iser_ib_open_rc_channel failed: %d", status); 521 mutex_exit(&chan->ic_lock); 522 return (status); 523 } 524 525 /* 526 * Set the SID we are attempting to connect to, based upon the 527 * remote port number. 528 */ 529 chan->ic_ibt_path.pi_sid = ibt_get_ip_sid(IPPROTO_TCP, chan->ic_rport); 530 531 /* Set up the args for the channel open */ 532 bzero(&ocargs, sizeof (ibt_chan_open_args_t)); 533 ocargs.oc_path = &chan->ic_ibt_path; 534 ocargs.oc_cm_handler = iser_ib_cm_handler; 535 ocargs.oc_cm_clnt_private = iser_state; 536 ocargs.oc_rdma_ra_out = 4; 537 ocargs.oc_rdma_ra_in = 4; 538 ocargs.oc_path_retry_cnt = 2; 539 ocargs.oc_path_rnr_retry_cnt = 2; 540 ocargs.oc_priv_data_len = sizeof (iser_private_data_t); 541 ocargs.oc_priv_data = &iser_priv_data; 542 543 bzero(&ocreturns, sizeof (ibt_rc_returns_t)); 544 545 status = ibt_open_rc_channel(chan->ic_chanhdl, 546 IBT_OCHAN_NO_FLAGS, IBT_BLOCKING, &ocargs, &ocreturns); 547 548 if (status != IBT_SUCCESS) { 549 ISER_LOG(CE_NOTE, "iser_ib_open_rc_channel failed: %d", status); 550 mutex_exit(&chan->ic_lock); 551 return (status); 552 } 553 554 mutex_exit(&chan->ic_lock); 555 return (IDM_STATUS_SUCCESS); 556 } 557 558 /* 559 * iser_ib_close_rc_channel 560 * This function closes the RC channel related to this iser_chan handle. 561 * We invoke this in a non-blocking, no callbacks context. 562 */ 563 void 564 iser_ib_close_rc_channel(iser_chan_t *chan) 565 { 566 int status; 567 568 mutex_enter(&chan->ic_lock); 569 status = ibt_close_rc_channel(chan->ic_chanhdl, IBT_BLOCKING, NULL, 570 0, NULL, NULL, 0); 571 if (status != IBT_SUCCESS) { 572 ISER_LOG(CE_NOTE, "iser_ib_close_rc_channel: " 573 "ibt_close_rc_channel failed: status (%d)", status); 574 } 575 mutex_exit(&chan->ic_lock); 576 } 577 578 /* 579 * iser_ib_free_rc_channel 580 * 581 * This function tears down an RC channel's QP initialization and frees it. 582 * Note that we do not need synchronization here; the channel has been 583 * closed already, so we should only have completion polling occuring. Once 584 * complete, we are free to free the IBTF channel, WQ and CQ resources, and 585 * our own related resources. 586 */ 587 void 588 iser_ib_free_rc_channel(iser_chan_t *chan) 589 { 590 iser_qp_t *iser_qp; 591 592 iser_qp = &chan->ic_qp; 593 594 /* Ensure the SQ is empty */ 595 while (chan->ic_sq_post_count != 0) { 596 mutex_exit(&chan->ic_conn->ic_lock); 597 delay(drv_usectohz(ISER_DELAY_HALF_SECOND)); 598 mutex_enter(&chan->ic_conn->ic_lock); 599 } 600 mutex_destroy(&chan->ic_sq_post_lock); 601 602 /* Ensure the RQ is empty */ 603 (void) ibt_flush_channel(chan->ic_chanhdl); 604 mutex_enter(&iser_qp->qp_lock); 605 while (iser_qp->rq_level != 0) { 606 mutex_exit(&iser_qp->qp_lock); 607 mutex_exit(&chan->ic_conn->ic_lock); 608 delay(drv_usectohz(ISER_DELAY_HALF_SECOND)); 609 mutex_enter(&chan->ic_conn->ic_lock); 610 mutex_enter(&iser_qp->qp_lock); 611 } 612 613 /* Free our QP handle */ 614 mutex_exit(&iser_qp->qp_lock); 615 (void) iser_ib_fini_qp(iser_qp); 616 617 /* Free the IBT channel resources */ 618 (void) ibt_free_channel(chan->ic_chanhdl); 619 chan->ic_chanhdl = NULL; 620 621 /* Free the CQs */ 622 ibt_free_cq(chan->ic_sendcq); 623 ibt_free_cq(chan->ic_recvcq); 624 625 /* Free the chan handle */ 626 mutex_destroy(&chan->ic_lock); 627 kmem_free(chan, sizeof (iser_chan_t)); 628 } 629 630 /* 631 * iser_ib_post_recv 632 * 633 * This function handles keeping the RQ full on a given channel. 634 * This routine will mostly be run on a taskq, and will check the 635 * current fill level of the RQ, and post as many WRs as necessary 636 * to fill it again. 637 */ 638 void 639 iser_ib_post_recv(void *arg) 640 { 641 ibt_channel_hdl_t chanhdl; 642 iser_chan_t *chan; 643 iser_hca_t *hca; 644 iser_msg_t *msg; 645 ibt_recv_wr_t *wrlist, wr[ISER_IB_RQ_POST_MAX]; 646 int rq_space, msg_ret; 647 int total_num, npost; 648 uint_t nposted; 649 int status, i; 650 iser_qp_t *iser_qp; 651 ib_gid_t lgid; 652 653 chanhdl = (ibt_channel_hdl_t)arg; 654 655 /* Pull our iSER channel handle from the private data */ 656 chan = (iser_chan_t *)ibt_get_chan_private(chanhdl); 657 658 /* It is possible to run after the channel has been freed */ 659 if (chan == NULL) { 660 return; 661 } 662 mutex_enter(&chan->ic_conn->ic_lock); 663 664 /* Bail out if the connection is closed; no need for more recv WRs */ 665 if ((chan->ic_conn->ic_stage == ISER_CONN_STAGE_CLOSING) || 666 (chan->ic_conn->ic_stage == ISER_CONN_STAGE_CLOSED)) { 667 mutex_exit(&chan->ic_conn->ic_lock); 668 return; 669 } 670 671 /* get the QP handle from the iser_chan */ 672 iser_qp = &chan->ic_qp; 673 674 /* get the local gid from the path info */ 675 lgid = chan->ic_ibt_path.pi_prim_cep_path.cep_adds_vect.av_sgid; 676 677 /* get the hca port from the path info */ 678 hca = iser_ib_gid2hca(lgid); 679 if (hca == NULL) { 680 ISER_LOG(CE_NOTE, "iser_ib_post_recv: unable to retrieve " 681 "HCA handle"); 682 mutex_exit(&chan->ic_conn->ic_lock); 683 return; 684 } 685 686 /* check for space to post on the RQ */ 687 mutex_enter(&iser_qp->qp_lock); 688 rq_space = iser_qp->rq_depth - iser_qp->rq_level; 689 if (rq_space == 0) { 690 /* The RQ is full, clear the pending flag and return */ 691 iser_qp->rq_taskqpending = B_FALSE; 692 mutex_exit(&iser_qp->qp_lock); 693 mutex_exit(&chan->ic_conn->ic_lock); 694 return; 695 } 696 697 /* Keep track of the lowest value for rq_min_post_level */ 698 if (iser_qp->rq_level < iser_qp->rq_min_post_level) 699 iser_qp->rq_min_post_level = iser_qp->rq_level; 700 701 mutex_exit(&iser_qp->qp_lock); 702 703 /* we've room to post, so pull from the msg cache */ 704 msg = iser_msg_get(hca, rq_space, &msg_ret); 705 if (msg == NULL) { 706 ISER_LOG(CE_NOTE, "iser_ib_post_recv: no message handles " 707 "available in msg cache currently"); 708 /* 709 * There are no messages on the cache. Wait a half- 710 * second, then try again. 711 */ 712 delay(drv_usectohz(ISER_DELAY_HALF_SECOND)); 713 status = ddi_taskq_dispatch(iser_taskq, iser_ib_post_recv, 714 (void *)chanhdl, DDI_NOSLEEP); 715 if (status != DDI_SUCCESS) { 716 ISER_LOG(CE_NOTE, "iser_ib_post_recv: failed to " 717 "redispatch routine"); 718 /* Failed to dispatch, clear pending flag */ 719 mutex_enter(&iser_qp->qp_lock); 720 iser_qp->rq_taskqpending = B_FALSE; 721 mutex_exit(&iser_qp->qp_lock); 722 } 723 mutex_exit(&chan->ic_conn->ic_lock); 724 return; 725 } 726 727 if (msg_ret != rq_space) { 728 ISER_LOG(CE_NOTE, "iser_ib_post_recv: requested number of " 729 "messages not allocated: requested (%d) allocated (%d)", 730 rq_space, msg_ret); 731 /* We got some, but not all, of our requested depth */ 732 rq_space = msg_ret; 733 } 734 735 /* 736 * Now, walk through the allocated WRs and post them, 737 * ISER_IB_RQ_POST_MAX (or less) at a time. 738 */ 739 wrlist = &wr[0]; 740 total_num = rq_space; 741 742 while (total_num) { 743 /* determine the number to post on this iteration */ 744 npost = (total_num > ISER_IB_RQ_POST_MAX) ? 745 ISER_IB_RQ_POST_MAX : total_num; 746 747 /* build a list of WRs from the msg list */ 748 for (i = 0; i < npost; i++) { 749 wrlist[i].wr_id = (ibt_wrid_t)(uintptr_t)msg; 750 wrlist[i].wr_nds = ISER_IB_SGLIST_SIZE; 751 wrlist[i].wr_sgl = &msg->msg_ds; 752 msg = msg->nextp; 753 } 754 755 /* post the list to the RQ */ 756 nposted = 0; 757 status = ibt_post_recv(chanhdl, wrlist, npost, &nposted); 758 if ((status != IBT_SUCCESS) || (nposted != npost)) { 759 ISER_LOG(CE_NOTE, "iser_ib_post_recv: ibt_post_recv " 760 "failed: requested (%d) posted (%d) status (%d)", 761 npost, nposted, status); 762 total_num -= nposted; 763 break; 764 } 765 766 /* decrement total number to post by the number posted */ 767 total_num -= nposted; 768 } 769 770 mutex_enter(&iser_qp->qp_lock); 771 if (total_num != 0) { 772 ISER_LOG(CE_NOTE, "iser_ib_post_recv: unable to fill RQ, " 773 "failed to post (%d) WRs", total_num); 774 iser_qp->rq_level += rq_space - total_num; 775 } else { 776 iser_qp->rq_level += rq_space; 777 } 778 779 /* 780 * Now that we've filled the RQ, check that all of the recv WRs 781 * haven't just been immediately consumed. If so, taskqpending is 782 * still B_TRUE, so we need to fire off a taskq thread to post 783 * more WRs. 784 */ 785 if (iser_qp->rq_level == 0) { 786 mutex_exit(&iser_qp->qp_lock); 787 status = ddi_taskq_dispatch(iser_taskq, iser_ib_post_recv, 788 (void *)chanhdl, DDI_NOSLEEP); 789 if (status != DDI_SUCCESS) { 790 ISER_LOG(CE_NOTE, "iser_ib_post_recv: failed to " 791 "dispatch followup routine"); 792 /* Failed to dispatch, clear pending flag */ 793 mutex_enter(&iser_qp->qp_lock); 794 iser_qp->rq_taskqpending = B_FALSE; 795 mutex_exit(&iser_qp->qp_lock); 796 } 797 } else { 798 /* 799 * We're done, we've filled the RQ. Clear the taskq 800 * flag so that we can run again. 801 */ 802 iser_qp->rq_taskqpending = B_FALSE; 803 mutex_exit(&iser_qp->qp_lock); 804 } 805 806 mutex_exit(&chan->ic_conn->ic_lock); 807 } 808 809 /* 810 * iser_ib_handle_portup_event() 811 * This handles the IBT_EVENT_PORT_UP unaffiliated asynchronous event. 812 * 813 * To facilitate a seamless bringover of the port and configure the CM service 814 * for inbound iSER service requests on this newly active port, the existing 815 * IDM services will be checked for iSER support. 816 * If an iSER service was already created, then this service will simply be 817 * bound to the gid of the newly active port. If on the other hand, the CM 818 * service did not exist, i.e. only socket communication, then a new CM 819 * service will be first registered with the saved service parameters and 820 * then bound to the newly active port. 821 * 822 */ 823 /* ARGSUSED */ 824 static void 825 iser_ib_handle_portup_event(ibt_hca_hdl_t hdl, ibt_async_event_t *event) 826 { 827 iser_hca_t *hca; 828 ib_gid_t gid; 829 idm_svc_t *idm_svc; 830 int status; 831 832 ISER_LOG(CE_NOTE, "iser_ib_handle_portup_event: HCA(0x%llx) port(%d)", 833 (longlong_t)event->ev_hca_guid, event->ev_port); 834 835 /* 836 * Query all ports on the HCA and update the port information 837 * maintainted in the iser_hca_t structure 838 */ 839 hca = iser_ib_guid2hca(event->ev_hca_guid); 840 if (hca == NULL) { 841 842 /* HCA is just made available, first port on that HCA */ 843 hca = iser_ib_alloc_hca(event->ev_hca_guid); 844 845 mutex_enter(&iser_state->is_hcalist_lock); 846 list_insert_tail(&iser_state->is_hcalist, hca); 847 iser_state->is_num_hcas++; 848 mutex_exit(&iser_state->is_hcalist_lock); 849 850 } else { 851 852 status = iser_ib_update_hcaports(hca); 853 854 if (status != IBT_SUCCESS) { 855 ISER_LOG(CE_NOTE, "iser_ib_handle_portup_event " 856 "status(0x%x): iser_ib_update_hcaports failed: " 857 "HCA(0x%llx) port(%d)", status, 858 (longlong_t)event->ev_hca_guid, event->ev_port); 859 return; 860 } 861 } 862 863 gid = hca->hca_port_info[event->ev_port - 1].p_sgid_tbl[0]; 864 865 /* 866 * Iterate through the global list of IDM target services 867 * and check for existing iSER CM service. 868 */ 869 mutex_enter(&idm.idm_global_mutex); 870 for (idm_svc = list_head(&idm.idm_tgt_svc_list); 871 idm_svc != NULL; 872 idm_svc = list_next(&idm.idm_tgt_svc_list, idm_svc)) { 873 874 875 if (idm_svc->is_iser_svc == NULL) { 876 877 /* Establish a new CM service for iSER requests */ 878 status = iser_tgt_svc_create( 879 &idm_svc->is_svc_req, idm_svc); 880 881 if (status != IBT_SUCCESS) { 882 ISER_LOG(CE_NOTE, "iser_ib_handle_portup_event " 883 "status(0x%x): iser_tgt_svc_create failed: " 884 "HCA(0x%llx) port(%d)", status, 885 (longlong_t)event->ev_hca_guid, 886 event->ev_port); 887 888 continue; 889 } 890 } 891 892 status = iser_ib_activate_port( 893 idm_svc, event->ev_hca_guid, gid); 894 if (status != IBT_SUCCESS) { 895 896 ISER_LOG(CE_NOTE, "iser_ib_handle_portup_event " 897 "status(0x%x): Bind service on port " 898 "(%llx:%llx) failed", 899 status, (longlong_t)gid.gid_prefix, 900 (longlong_t)gid.gid_guid); 901 902 continue; 903 } 904 ISER_LOG(CE_NOTE, "iser_ib_handle_portup_event: service bound " 905 "HCA(0x%llx) port(%d)", (longlong_t)event->ev_hca_guid, 906 event->ev_port); 907 } 908 mutex_exit(&idm.idm_global_mutex); 909 910 ISER_LOG(CE_NOTE, "iser_ib_handle_portup_event success: " 911 "HCA(0x%llx) port(%d)", (longlong_t)event->ev_hca_guid, 912 event->ev_port); 913 } 914 915 /* 916 * iser_ib_handle_portdown_event() 917 * This handles the IBT_EVENT_PORT_DOWN unaffiliated asynchronous error. 918 * 919 * Unconfigure the CM service on the deactivated port and teardown the 920 * connections that are using the CM service. 921 */ 922 /* ARGSUSED */ 923 static void 924 iser_ib_handle_portdown_event(ibt_hca_hdl_t hdl, ibt_async_event_t *event) 925 { 926 iser_hca_t *hca; 927 ib_gid_t gid; 928 int status; 929 930 /* 931 * Query all ports on the HCA and update the port information 932 * maintainted in the iser_hca_t structure 933 */ 934 hca = iser_ib_guid2hca(event->ev_hca_guid); 935 ASSERT(hca != NULL); 936 937 status = iser_ib_update_hcaports(hca); 938 if (status != IBT_SUCCESS) { 939 ISER_LOG(CE_NOTE, "iser_ib_handle_portdown_event status(0x%x): " 940 "ibt_ib_update_hcaports failed: HCA(0x%llx) port(%d)", 941 status, (longlong_t)event->ev_hca_guid, event->ev_port); 942 return; 943 } 944 945 /* get the gid of the new port */ 946 gid = hca->hca_port_info[event->ev_port - 1].p_sgid_tbl[0]; 947 iser_ib_deactivate_port(event->ev_hca_guid, gid); 948 949 ISER_LOG(CE_NOTE, "iser_ib_handle_portdown_event success: " 950 "HCA(0x%llx) port(%d)", (longlong_t)event->ev_hca_guid, 951 event->ev_port); 952 } 953 954 /* 955 * iser_ib_handle_hca_detach_event() 956 * Quiesce all activity bound for the port, teardown the connection, unbind 957 * iSER services on all ports and release the HCA handle. 958 */ 959 /* ARGSUSED */ 960 static void 961 iser_ib_handle_hca_detach_event(ibt_hca_hdl_t hdl, ibt_async_event_t *event) 962 { 963 iser_hca_t *nexthca, *hca; 964 int i, status; 965 966 ISER_LOG(CE_NOTE, "iser_ib_handle_hca_detach_event: HCA(0x%llx)", 967 (longlong_t)event->ev_hca_guid); 968 969 hca = iser_ib_guid2hca(event->ev_hca_guid); 970 for (i = 0; i < hca->hca_num_ports; i++) { 971 iser_ib_deactivate_port(hca->hca_guid, 972 hca->hca_port_info[i].p_sgid_tbl[0]); 973 } 974 975 /* 976 * Update the HCA list maintained in the iser_state. Free the 977 * resources allocated to the HCA, i.e. caches, protection domain 978 */ 979 mutex_enter(&iser_state->is_hcalist_lock); 980 981 for (hca = list_head(&iser_state->is_hcalist); 982 hca != NULL; 983 hca = nexthca) { 984 985 nexthca = list_next(&iser_state->is_hcalist, hca); 986 987 if (hca->hca_guid == event->ev_hca_guid) { 988 989 list_remove(&iser_state->is_hcalist, hca); 990 iser_state->is_num_hcas--; 991 992 status = iser_ib_free_hca(hca); 993 if (status != DDI_SUCCESS) { 994 ISER_LOG(CE_WARN, "iser_ib_handle_hca_detach: " 995 "Failed to free hca(%p)", (void *)hca); 996 list_insert_tail(&iser_state->is_hcalist, hca); 997 iser_state->is_num_hcas++; 998 } 999 /* No way to return status to IBT if this fails */ 1000 } 1001 } 1002 mutex_exit(&iser_state->is_hcalist_lock); 1003 1004 } 1005 1006 /* 1007 * iser_ib_async_handler 1008 * An IBT Asynchronous Event handler is registered it with the framework and 1009 * passed via the ibt_attach() routine. This function handles the following 1010 * asynchronous events. 1011 * IBT_EVENT_PORT_UP 1012 * IBT_ERROR_PORT_DOWN 1013 * IBT_HCA_ATTACH_EVENT 1014 * IBT_HCA_DETACH_EVENT 1015 */ 1016 /* ARGSUSED */ 1017 void 1018 iser_ib_async_handler(void *clntp, ibt_hca_hdl_t hdl, ibt_async_code_t code, 1019 ibt_async_event_t *event) 1020 { 1021 switch (code) { 1022 case IBT_EVENT_PORT_UP: 1023 iser_ib_handle_portup_event(hdl, event); 1024 break; 1025 1026 case IBT_ERROR_PORT_DOWN: 1027 iser_ib_handle_portdown_event(hdl, event); 1028 break; 1029 1030 case IBT_HCA_ATTACH_EVENT: 1031 /* 1032 * A new HCA device is available for use, ignore this 1033 * event because the corresponding IBT_EVENT_PORT_UP 1034 * events will get triggered and handled accordingly. 1035 */ 1036 break; 1037 1038 case IBT_HCA_DETACH_EVENT: 1039 iser_ib_handle_hca_detach_event(hdl, event); 1040 break; 1041 1042 default: 1043 break; 1044 } 1045 } 1046 1047 /* 1048 * iser_ib_init_hcas 1049 * 1050 * This function opens all the HCA devices, gathers the HCA state information 1051 * and adds the HCA handle for each HCA found in the iser_soft_state. 1052 */ 1053 static int 1054 iser_ib_init_hcas(void) 1055 { 1056 ib_guid_t *guid; 1057 int num_hcas; 1058 int i; 1059 iser_hca_t *hca; 1060 1061 /* Retrieve the HCA list */ 1062 num_hcas = ibt_get_hca_list(&guid); 1063 if (num_hcas == 0) { 1064 /* 1065 * This shouldn't happen, but might if we have all HCAs 1066 * detach prior to initialization. 1067 */ 1068 return (DDI_FAILURE); 1069 } 1070 1071 /* Initialize the hcalist lock */ 1072 mutex_init(&iser_state->is_hcalist_lock, NULL, MUTEX_DRIVER, NULL); 1073 1074 /* Create the HCA list */ 1075 list_create(&iser_state->is_hcalist, sizeof (iser_hca_t), 1076 offsetof(iser_hca_t, hca_node)); 1077 1078 for (i = 0; i < num_hcas; i++) { 1079 1080 ISER_LOG(CE_NOTE, "iser_ib_init_hcas: initializing HCA " 1081 "(0x%llx)", (longlong_t)guid[i]); 1082 1083 hca = iser_ib_alloc_hca(guid[i]); 1084 if (hca == NULL) { 1085 /* This shouldn't happen, teardown and fail */ 1086 (void) iser_ib_fini_hcas(); 1087 (void) ibt_free_hca_list(guid, num_hcas); 1088 return (DDI_FAILURE); 1089 } 1090 1091 mutex_enter(&iser_state->is_hcalist_lock); 1092 list_insert_tail(&iser_state->is_hcalist, hca); 1093 iser_state->is_num_hcas++; 1094 mutex_exit(&iser_state->is_hcalist_lock); 1095 1096 } 1097 1098 /* Free the IBT HCA list */ 1099 (void) ibt_free_hca_list(guid, num_hcas); 1100 1101 /* Check that we've initialized at least one HCA */ 1102 mutex_enter(&iser_state->is_hcalist_lock); 1103 if (list_is_empty(&iser_state->is_hcalist)) { 1104 ISER_LOG(CE_NOTE, "iser_ib_init_hcas: failed to initialize " 1105 "any HCAs"); 1106 1107 mutex_exit(&iser_state->is_hcalist_lock); 1108 (void) iser_ib_fini_hcas(); 1109 return (DDI_FAILURE); 1110 } 1111 mutex_exit(&iser_state->is_hcalist_lock); 1112 1113 return (DDI_SUCCESS); 1114 } 1115 1116 /* 1117 * iser_ib_fini_hcas 1118 * 1119 * Teardown the iSER HCA list initialized above. 1120 */ 1121 static int 1122 iser_ib_fini_hcas(void) 1123 { 1124 iser_hca_t *nexthca, *hca; 1125 int status; 1126 1127 mutex_enter(&iser_state->is_hcalist_lock); 1128 for (hca = list_head(&iser_state->is_hcalist); 1129 hca != NULL; 1130 hca = nexthca) { 1131 1132 nexthca = list_next(&iser_state->is_hcalist, hca); 1133 1134 list_remove(&iser_state->is_hcalist, hca); 1135 1136 status = iser_ib_free_hca(hca); 1137 if (status != IBT_SUCCESS) { 1138 ISER_LOG(CE_NOTE, "iser_ib_fini_hcas: failed to free " 1139 "HCA during fini"); 1140 list_insert_tail(&iser_state->is_hcalist, hca); 1141 return (DDI_FAILURE); 1142 } 1143 1144 iser_state->is_num_hcas--; 1145 1146 } 1147 mutex_exit(&iser_state->is_hcalist_lock); 1148 list_destroy(&iser_state->is_hcalist); 1149 mutex_destroy(&iser_state->is_hcalist_lock); 1150 1151 return (DDI_SUCCESS); 1152 } 1153 1154 /* 1155 * iser_ib_alloc_hca 1156 * 1157 * This function opens the given HCA device, gathers the HCA state information 1158 * and adds the HCA handle 1159 */ 1160 static iser_hca_t * 1161 iser_ib_alloc_hca(ib_guid_t guid) 1162 { 1163 iser_hca_t *hca; 1164 int status; 1165 1166 /* Allocate an iser_hca_t HCA handle */ 1167 hca = (iser_hca_t *)kmem_zalloc(sizeof (iser_hca_t), KM_SLEEP); 1168 1169 /* Open this HCA */ 1170 status = ibt_open_hca(iser_state->is_ibhdl, guid, &hca->hca_hdl); 1171 if (status != IBT_SUCCESS) { 1172 ISER_LOG(CE_NOTE, "iser_ib_alloc_hca: ibt_open_hca failed:" 1173 " guid (0x%llx) status (0x%x)", (longlong_t)guid, status); 1174 kmem_free(hca, sizeof (iser_hca_t)); 1175 return (NULL); 1176 } 1177 1178 hca->hca_guid = guid; 1179 hca->hca_clnt_hdl = iser_state->is_ibhdl; 1180 1181 /* Query the HCA */ 1182 status = ibt_query_hca(hca->hca_hdl, &hca->hca_attr); 1183 if (status != IBT_SUCCESS) { 1184 ISER_LOG(CE_NOTE, "iser_ib_alloc_hca: ibt_query_hca " 1185 "failure: guid (0x%llx) status (0x%x)", 1186 (longlong_t)guid, status); 1187 (void) ibt_close_hca(hca->hca_hdl); 1188 kmem_free(hca, sizeof (iser_hca_t)); 1189 return (NULL); 1190 } 1191 1192 /* Query all ports on the HCA */ 1193 status = ibt_query_hca_ports(hca->hca_hdl, 0, 1194 &hca->hca_port_info, &hca->hca_num_ports, 1195 &hca->hca_port_info_sz); 1196 if (status != IBT_SUCCESS) { 1197 ISER_LOG(CE_NOTE, "iser_ib_alloc_hca: " 1198 "ibt_query_hca_ports failure: guid (0x%llx) " 1199 "status (0x%x)", (longlong_t)guid, status); 1200 (void) ibt_close_hca(hca->hca_hdl); 1201 kmem_free(hca, sizeof (iser_hca_t)); 1202 return (NULL); 1203 } 1204 1205 /* Allocate a single PD on this HCA */ 1206 status = ibt_alloc_pd(hca->hca_hdl, IBT_PD_NO_FLAGS, 1207 &hca->hca_pdhdl); 1208 if (status != IBT_SUCCESS) { 1209 ISER_LOG(CE_NOTE, "iser_ib_alloc_hca: ibt_alloc_pd " 1210 "failure: guid (0x%llx) status (0x%x)", 1211 (longlong_t)guid, status); 1212 (void) ibt_close_hca(hca->hca_hdl); 1213 ibt_free_portinfo(hca->hca_port_info, hca->hca_port_info_sz); 1214 kmem_free(hca, sizeof (iser_hca_t)); 1215 return (NULL); 1216 } 1217 1218 /* Initialize the message and data MR caches for this HCA */ 1219 iser_init_hca_caches(hca); 1220 1221 return (hca); 1222 } 1223 1224 static int 1225 iser_ib_free_hca(iser_hca_t *hca) 1226 { 1227 int status; 1228 ibt_hca_portinfo_t *hca_port_info; 1229 uint_t hca_port_info_sz; 1230 1231 ASSERT(hca != NULL); 1232 if (hca->hca_failed) 1233 return (DDI_FAILURE); 1234 1235 hca_port_info = hca->hca_port_info; 1236 hca_port_info_sz = hca->hca_port_info_sz; 1237 1238 /* 1239 * Free the memory regions before freeing 1240 * the associated protection domain 1241 */ 1242 iser_fini_hca_caches(hca); 1243 1244 status = ibt_free_pd(hca->hca_hdl, hca->hca_pdhdl); 1245 if (status != IBT_SUCCESS) { 1246 ISER_LOG(CE_NOTE, "iser_ib_free_hca: failed to free PD " 1247 "status=0x%x", status); 1248 goto out_caches; 1249 } 1250 1251 status = ibt_close_hca(hca->hca_hdl); 1252 if (status != IBT_SUCCESS) { 1253 ISER_LOG(CE_NOTE, "iser_ib_fini_hcas: failed to close HCA " 1254 "status=0x%x", status); 1255 goto out_pd; 1256 } 1257 1258 ibt_free_portinfo(hca_port_info, hca_port_info_sz); 1259 1260 kmem_free(hca, sizeof (iser_hca_t)); 1261 return (DDI_SUCCESS); 1262 1263 /* 1264 * We only managed to partially tear down the HCA, try to put it back 1265 * like it was before returning. 1266 */ 1267 out_pd: 1268 status = ibt_alloc_pd(hca->hca_hdl, IBT_PD_NO_FLAGS, &hca->hca_pdhdl); 1269 if (status != IBT_SUCCESS) { 1270 hca->hca_failed = B_TRUE; 1271 /* Report error and exit */ 1272 ISER_LOG(CE_NOTE, "iser_ib_free_hca: could not re-alloc PD " 1273 "status=0x%x", status); 1274 return (DDI_FAILURE); 1275 } 1276 1277 out_caches: 1278 iser_init_hca_caches(hca); 1279 1280 return (DDI_FAILURE); 1281 } 1282 1283 static int 1284 iser_ib_update_hcaports(iser_hca_t *hca) 1285 { 1286 ibt_hca_portinfo_t *pinfop, *oldpinfop; 1287 uint_t size, oldsize, nport; 1288 int status; 1289 1290 ASSERT(hca != NULL); 1291 1292 status = ibt_query_hca_ports(hca->hca_hdl, 0, &pinfop, &nport, &size); 1293 if (status != IBT_SUCCESS) { 1294 ISER_LOG(CE_NOTE, "ibt_query_hca_ports failed: %d", status); 1295 return (status); 1296 } 1297 1298 oldpinfop = hca->hca_port_info; 1299 oldsize = hca->hca_port_info_sz; 1300 hca->hca_port_info = pinfop; 1301 hca->hca_port_info_sz = size; 1302 1303 (void) ibt_free_portinfo(oldpinfop, oldsize); 1304 1305 return (IBT_SUCCESS); 1306 } 1307 1308 /* 1309 * iser_ib_gid2hca 1310 * Given a gid, find the corresponding hca 1311 */ 1312 iser_hca_t * 1313 iser_ib_gid2hca(ib_gid_t gid) 1314 { 1315 1316 iser_hca_t *hca; 1317 int i; 1318 1319 mutex_enter(&iser_state->is_hcalist_lock); 1320 for (hca = list_head(&iser_state->is_hcalist); 1321 hca != NULL; 1322 hca = list_next(&iser_state->is_hcalist, hca)) { 1323 1324 for (i = 0; i < hca->hca_num_ports; i++) { 1325 if ((hca->hca_port_info[i].p_sgid_tbl[0].gid_prefix == 1326 gid.gid_prefix) && 1327 (hca->hca_port_info[i].p_sgid_tbl[0].gid_guid == 1328 gid.gid_guid)) { 1329 1330 mutex_exit(&iser_state->is_hcalist_lock); 1331 1332 return (hca); 1333 } 1334 } 1335 } 1336 mutex_exit(&iser_state->is_hcalist_lock); 1337 return (NULL); 1338 } 1339 1340 /* 1341 * iser_ib_guid2hca 1342 * Given a HCA guid, find the corresponding HCA 1343 */ 1344 iser_hca_t * 1345 iser_ib_guid2hca(ib_guid_t guid) 1346 { 1347 1348 iser_hca_t *hca; 1349 1350 mutex_enter(&iser_state->is_hcalist_lock); 1351 for (hca = list_head(&iser_state->is_hcalist); 1352 hca != NULL; 1353 hca = list_next(&iser_state->is_hcalist, hca)) { 1354 1355 if (hca->hca_guid == guid) { 1356 mutex_exit(&iser_state->is_hcalist_lock); 1357 return (hca); 1358 } 1359 } 1360 mutex_exit(&iser_state->is_hcalist_lock); 1361 return (NULL); 1362 } 1363 1364 /* 1365 * iser_ib_conv_sockaddr2ibtaddr 1366 * This function converts a socket address into the IBT format 1367 */ 1368 void iser_ib_conv_sockaddr2ibtaddr( 1369 idm_sockaddr_t *saddr, ibt_ip_addr_t *ibt_addr) 1370 { 1371 if (saddr == NULL) { 1372 ibt_addr->family = AF_UNSPEC; 1373 ibt_addr->un.ip4addr = 0; 1374 } else { 1375 switch (saddr->sin.sa_family) { 1376 case AF_INET: 1377 1378 ibt_addr->family = saddr->sin4.sin_family; 1379 ibt_addr->un.ip4addr = saddr->sin4.sin_addr.s_addr; 1380 break; 1381 1382 case AF_INET6: 1383 1384 ibt_addr->family = saddr->sin6.sin6_family; 1385 ibt_addr->un.ip6addr = saddr->sin6.sin6_addr; 1386 break; 1387 1388 default: 1389 ibt_addr->family = AF_UNSPEC; 1390 } 1391 1392 } 1393 } 1394 1395 /* 1396 * iser_ib_conv_ibtaddr2sockaddr 1397 * This function converts an IBT ip address handle to a sockaddr 1398 */ 1399 void iser_ib_conv_ibtaddr2sockaddr(struct sockaddr_storage *ss, 1400 ibt_ip_addr_t *ibt_addr, in_port_t port) 1401 { 1402 struct sockaddr_in *sin; 1403 struct sockaddr_in6 *sin6; 1404 1405 switch (ibt_addr->family) { 1406 case AF_INET: 1407 case AF_UNSPEC: 1408 1409 sin = (struct sockaddr_in *)ibt_addr; 1410 sin->sin_port = ntohs(port); 1411 bcopy(sin, ss, sizeof (struct sockaddr_in)); 1412 break; 1413 1414 case AF_INET6: 1415 1416 sin6 = (struct sockaddr_in6 *)ibt_addr; 1417 sin6->sin6_port = ntohs(port); 1418 bcopy(sin6, ss, sizeof (struct sockaddr_in6)); 1419 break; 1420 1421 default: 1422 ISER_LOG(CE_NOTE, "iser_ib_conv_ibtaddr2sockaddr: " 1423 "unknown family type: 0x%x", ibt_addr->family); 1424 } 1425 } 1426 1427 /* 1428 * iser_ib_setup_cq 1429 * This function sets up the Completion Queue size and allocates the specified 1430 * Completion Queue 1431 */ 1432 static int 1433 iser_ib_setup_cq(ibt_hca_hdl_t hca_hdl, uint_t cq_size, ibt_cq_hdl_t *cq_hdl) 1434 { 1435 1436 ibt_cq_attr_t cq_attr; 1437 int status; 1438 1439 cq_attr.cq_size = cq_size; 1440 cq_attr.cq_sched = 0; 1441 cq_attr.cq_flags = IBT_CQ_NO_FLAGS; 1442 1443 /* Allocate a Completion Queue */ 1444 status = ibt_alloc_cq(hca_hdl, &cq_attr, cq_hdl, NULL); 1445 if (status != IBT_SUCCESS) { 1446 ISER_LOG(CE_NOTE, "iser_ib_setup_cq: ibt_alloc_cq failure (%d)", 1447 status); 1448 return (status); 1449 } 1450 1451 return (ISER_STATUS_SUCCESS); 1452 } 1453 1454 /* 1455 * iser_ib_setup_chanargs 1456 * 1457 */ 1458 static void 1459 iser_ib_setup_chanargs(uint8_t hca_port, ibt_cq_hdl_t scq_hdl, 1460 ibt_cq_hdl_t rcq_hdl, uint_t sq_size, uint_t rq_size, 1461 ibt_pd_hdl_t hca_pdhdl, ibt_rc_chan_alloc_args_t *cargs) 1462 { 1463 1464 bzero(cargs, sizeof (ibt_rc_chan_alloc_args_t)); 1465 1466 /* 1467 * Set up the size of the channels send queue, receive queue and the 1468 * maximum number of elements in a scatter gather list of work requests 1469 * posted to the send and receive queues. 1470 */ 1471 cargs->rc_sizes.cs_sq = sq_size; 1472 cargs->rc_sizes.cs_rq = rq_size; 1473 cargs->rc_sizes.cs_sq_sgl = ISER_IB_SGLIST_SIZE; 1474 cargs->rc_sizes.cs_rq_sgl = ISER_IB_SGLIST_SIZE; 1475 1476 /* 1477 * All Work requests signaled on a WR basis will receive a send 1478 * request completion. 1479 */ 1480 cargs->rc_flags = IBT_ALL_SIGNALED; 1481 1482 /* Enable RDMA read and RDMA write on the channel end points */ 1483 cargs->rc_control = IBT_CEP_RDMA_RD | IBT_CEP_RDMA_WR; 1484 1485 /* Set the local hca port on which the channel is allocated */ 1486 cargs->rc_hca_port_num = hca_port; 1487 1488 /* Set the Send and Receive Completion Queue handles */ 1489 cargs->rc_scq = scq_hdl; 1490 cargs->rc_rcq = rcq_hdl; 1491 1492 /* Set the protection domain associated with the channel */ 1493 cargs->rc_pd = hca_pdhdl; 1494 1495 /* No SRQ usage */ 1496 cargs->rc_srq = NULL; 1497 } 1498 1499 /* 1500 * iser_ib_init_qp 1501 * Initialize the QP handle 1502 */ 1503 void 1504 iser_ib_init_qp(iser_chan_t *chan, uint_t sq_size, uint_t rq_size) 1505 { 1506 /* Initialize the handle lock */ 1507 mutex_init(&chan->ic_qp.qp_lock, NULL, MUTEX_DRIVER, NULL); 1508 1509 /* Record queue sizes */ 1510 chan->ic_qp.sq_size = sq_size; 1511 chan->ic_qp.rq_size = rq_size; 1512 1513 /* Initialize the RQ monitoring data */ 1514 chan->ic_qp.rq_depth = rq_size; 1515 chan->ic_qp.rq_level = 0; 1516 chan->ic_qp.rq_lwm = (chan->ic_recvcq_sz * ISER_IB_RQ_LWM_PCT) / 100; 1517 1518 /* Initialize the taskq flag */ 1519 chan->ic_qp.rq_taskqpending = B_FALSE; 1520 } 1521 1522 /* 1523 * iser_ib_fini_qp 1524 * Teardown the QP handle 1525 */ 1526 void 1527 iser_ib_fini_qp(iser_qp_t *qp) 1528 { 1529 /* Destroy the handle lock */ 1530 mutex_destroy(&qp->qp_lock); 1531 } 1532 1533 static int 1534 iser_ib_activate_port(idm_svc_t *idm_svc, ib_guid_t guid, ib_gid_t gid) 1535 { 1536 iser_svc_t *iser_svc; 1537 iser_sbind_t *is_sbind; 1538 int status; 1539 1540 iser_svc = idm_svc->is_iser_svc; 1541 1542 /* 1543 * Save the address of the service bind handle in the 1544 * iser_svc_t to undo the service binding at a later time 1545 */ 1546 is_sbind = kmem_zalloc(sizeof (iser_sbind_t), KM_SLEEP); 1547 is_sbind->is_gid = gid; 1548 is_sbind->is_guid = guid; 1549 1550 status = ibt_bind_service(iser_svc->is_srvhdl, gid, NULL, 1551 idm_svc, &is_sbind->is_sbindhdl); 1552 1553 if (status != IBT_SUCCESS) { 1554 ISER_LOG(CE_NOTE, "iser_ib_activate_port: status(0x%x): " 1555 "Bind service(%llx) on port(%llx:%llx) failed", 1556 status, (longlong_t)iser_svc->is_svcid, 1557 (longlong_t)gid.gid_prefix, (longlong_t)gid.gid_guid); 1558 1559 kmem_free(is_sbind, sizeof (iser_sbind_t)); 1560 1561 return (status); 1562 } 1563 1564 list_insert_tail(&iser_svc->is_sbindlist, is_sbind); 1565 1566 return (IBT_SUCCESS); 1567 } 1568 1569 static void 1570 iser_ib_deactivate_port(ib_guid_t hca_guid, ib_gid_t gid) 1571 { 1572 iser_svc_t *iser_svc; 1573 iser_conn_t *iser_conn; 1574 iser_sbind_t *is_sbind; 1575 idm_conn_t *idm_conn; 1576 1577 /* 1578 * Iterate through the global list of IDM target connections. 1579 * Issue a TRANSPORT_FAIL for any connections on this port, and 1580 * if there is a bound service running on the port, tear it down. 1581 */ 1582 mutex_enter(&idm.idm_global_mutex); 1583 for (idm_conn = list_head(&idm.idm_tgt_conn_list); 1584 idm_conn != NULL; 1585 idm_conn = list_next(&idm.idm_tgt_conn_list, idm_conn)) { 1586 1587 if (idm_conn->ic_transport_type != IDM_TRANSPORT_TYPE_ISER) { 1588 /* this is not an iSER connection, skip it */ 1589 continue; 1590 } 1591 1592 iser_conn = idm_conn->ic_transport_private; 1593 if (iser_conn->ic_chan->ic_ibt_path.pi_hca_guid != hca_guid) { 1594 /* this iSER connection is on a different port */ 1595 continue; 1596 } 1597 1598 /* Fail the transport for this connection */ 1599 idm_conn_event(idm_conn, CE_TRANSPORT_FAIL, IDM_STATUS_FAIL); 1600 1601 if (idm_conn->ic_conn_type == CONN_TYPE_INI) { 1602 /* initiator connection, nothing else to do */ 1603 continue; 1604 } 1605 1606 /* Check for a service binding */ 1607 iser_svc = idm_conn->ic_svc_binding->is_iser_svc; 1608 is_sbind = iser_ib_get_bind(iser_svc, hca_guid, gid); 1609 if (is_sbind != NULL) { 1610 /* This service is still bound, tear it down */ 1611 ibt_unbind_service(iser_svc->is_srvhdl, 1612 is_sbind->is_sbindhdl); 1613 list_remove(&iser_svc->is_sbindlist, is_sbind); 1614 kmem_free(is_sbind, sizeof (iser_sbind_t)); 1615 } 1616 } 1617 mutex_exit(&idm.idm_global_mutex); 1618 } 1619 1620 static iser_sbind_t * 1621 iser_ib_get_bind(iser_svc_t *iser_svc, ib_guid_t hca_guid, ib_gid_t gid) 1622 { 1623 iser_sbind_t *is_sbind; 1624 1625 for (is_sbind = list_head(&iser_svc->is_sbindlist); 1626 is_sbind != NULL; 1627 is_sbind = list_next(&iser_svc->is_sbindlist, is_sbind)) { 1628 1629 if ((is_sbind->is_guid == hca_guid) && 1630 (is_sbind->is_gid.gid_prefix == gid.gid_prefix) && 1631 (is_sbind->is_gid.gid_guid == gid.gid_guid)) { 1632 return (is_sbind); 1633 } 1634 } 1635 return (NULL); 1636 } 1637