1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include <sys/types.h> 27 #include <sys/ddi.h> 28 #include <sys/types.h> 29 #include <sys/socket.h> 30 #include <netinet/in.h> 31 #include <sys/sunddi.h> 32 #include <sys/sysmacros.h> 33 #include <sys/iscsi_protocol.h> 34 35 #include <sys/ib/clients/iser/iser.h> 36 #include <sys/ib/clients/iser/iser_idm.h> 37 38 /* 39 * iser_ib.c 40 * Routines for InfiniBand transport for iSER 41 * 42 * This file contains the routines to interface with the IBT API to attach and 43 * allocate IB resources, handle async events, and post recv work requests. 44 * 45 */ 46 47 static iser_hca_t *iser_ib_gid2hca(ib_gid_t gid); 48 static iser_hca_t *iser_ib_guid2hca(ib_guid_t guid); 49 50 static iser_hca_t *iser_ib_alloc_hca(ib_guid_t guid); 51 static int iser_ib_free_hca(iser_hca_t *hca); 52 static int iser_ib_update_hcaports(iser_hca_t *hca); 53 static int iser_ib_init_hcas(void); 54 static int iser_ib_fini_hcas(void); 55 56 static iser_sbind_t *iser_ib_get_bind( 57 iser_svc_t *iser_svc, ib_guid_t hca_guid, ib_gid_t gid); 58 static int iser_ib_activate_port( 59 idm_svc_t *idm_svc, ib_guid_t guid, ib_gid_t gid); 60 static void iser_ib_deactivate_port(ib_guid_t hca_guid, ib_gid_t gid); 61 62 static void iser_ib_init_qp(iser_chan_t *chan, uint_t sq_size, uint_t rq_size); 63 static void iser_ib_fini_qp(iser_qp_t *qp); 64 65 static int iser_ib_setup_cq(ibt_hca_hdl_t hca_hdl, uint_t cq_size, 66 ibt_cq_hdl_t *cq_hdl); 67 68 static void iser_ib_setup_chanargs(uint8_t hca_port, ibt_cq_hdl_t scq_hdl, 69 ibt_cq_hdl_t rcq_hdl, uint_t sq_size, uint_t rq_size, 70 ibt_pd_hdl_t hca_pdhdl, ibt_rc_chan_alloc_args_t *cargs); 71 72 static void iser_ib_handle_portup_event(ibt_hca_hdl_t hdl, 73 ibt_async_event_t *event); 74 static void iser_ib_handle_portdown_event(ibt_hca_hdl_t hdl, 75 ibt_async_event_t *event); 76 static void iser_ib_handle_hca_detach_event(ibt_hca_hdl_t hdl, 77 ibt_async_event_t *event); 78 79 static void iser_ib_post_recv_task(void *arg); 80 81 static struct ibt_clnt_modinfo_s iser_ib_modinfo = { 82 IBTI_V_CURR, 83 IBT_STORAGE_DEV, 84 iser_ib_async_handler, 85 NULL, 86 "iSER" 87 }; 88 89 /* 90 * iser_ib_init 91 * 92 * This function registers the HCA drivers with IBTF and registers and binds 93 * iSER as a service with IBTF. 94 */ 95 int 96 iser_ib_init(void) 97 { 98 int status; 99 100 /* Register with IBTF */ 101 status = ibt_attach(&iser_ib_modinfo, iser_state->is_dip, iser_state, 102 &iser_state->is_ibhdl); 103 if (status != DDI_SUCCESS) { 104 ISER_LOG(CE_NOTE, "iser_ib_init: ibt_attach failed (0x%x)", 105 status); 106 return (DDI_FAILURE); 107 } 108 109 /* Create the global work request kmem_cache */ 110 iser_state->iser_wr_cache = kmem_cache_create("iser_wr_cache", 111 sizeof (iser_wr_t), 0, NULL, NULL, NULL, 112 iser_state, NULL, KM_SLEEP); 113 114 /* Populate our list of HCAs */ 115 status = iser_ib_init_hcas(); 116 if (status != DDI_SUCCESS) { 117 /* HCAs failed to initialize, tear it down */ 118 kmem_cache_destroy(iser_state->iser_wr_cache); 119 (void) ibt_detach(iser_state->is_ibhdl); 120 iser_state->is_ibhdl = NULL; 121 ISER_LOG(CE_NOTE, "iser_ib_init: failed to initialize HCAs"); 122 return (DDI_FAILURE); 123 } 124 125 /* Target will register iSER as a service with IBTF when required */ 126 127 /* Target will bind this service when it comes online */ 128 129 return (DDI_SUCCESS); 130 } 131 132 /* 133 * iser_ib_fini 134 * 135 * This function unbinds and degisters the iSER service from IBTF 136 */ 137 int 138 iser_ib_fini(void) 139 { 140 /* IDM would have already disabled all the services */ 141 142 /* Teardown the HCA list and associated resources */ 143 if (iser_ib_fini_hcas() != DDI_SUCCESS) 144 return (DDI_FAILURE); 145 146 /* Teardown the global work request kmem_cache */ 147 kmem_cache_destroy(iser_state->iser_wr_cache); 148 149 /* Deregister with IBTF */ 150 if (iser_state->is_ibhdl != NULL) { 151 (void) ibt_detach(iser_state->is_ibhdl); 152 iser_state->is_ibhdl = NULL; 153 } 154 155 return (DDI_SUCCESS); 156 } 157 158 /* 159 * iser_ib_register_service 160 * 161 * This function registers the iSER service using the RDMA-Aware Service ID. 162 */ 163 int 164 iser_ib_register_service(idm_svc_t *idm_svc) 165 { 166 ibt_srv_desc_t srvdesc; 167 iser_svc_t *iser_svc; 168 int status; 169 170 bzero(&srvdesc, sizeof (ibt_srv_desc_t)); 171 172 /* Set up IBTI client callback handler from the CM */ 173 srvdesc.sd_handler = iser_ib_cm_handler; 174 175 srvdesc.sd_flags = IBT_SRV_NO_FLAGS; 176 177 iser_svc = (iser_svc_t *)idm_svc->is_iser_svc; 178 179 /* Register the service on the specified port */ 180 status = ibt_register_service( 181 iser_state->is_ibhdl, &srvdesc, 182 iser_svc->is_svcid, 1, &iser_svc->is_srvhdl, NULL); 183 184 return (status); 185 } 186 187 /* 188 * iser_ib_bind_service 189 * 190 * This function binds a given iSER service on all available HCA ports 191 */ 192 int 193 iser_ib_bind_service(idm_svc_t *idm_svc) 194 { 195 iser_hca_t *hca; 196 ib_gid_t gid; 197 int num_ports = 0; 198 int num_binds = 0; 199 int status; 200 int i; 201 202 ASSERT(idm_svc != NULL); 203 ASSERT(idm_svc->is_iser_svc != NULL); 204 205 /* Register the iSER service on all available ports */ 206 mutex_enter(&iser_state->is_hcalist_lock); 207 208 for (hca = list_head(&iser_state->is_hcalist); 209 hca != NULL; 210 hca = list_next(&iser_state->is_hcalist, hca)) { 211 212 for (i = 0; i < hca->hca_num_ports; i++) { 213 num_ports++; 214 if (hca->hca_port_info[i].p_linkstate != 215 IBT_PORT_ACTIVE) { 216 /* 217 * Move on. We will attempt to bind service 218 * in our async handler if the port comes up 219 * at a later time. 220 */ 221 continue; 222 } 223 224 gid = hca->hca_port_info[i].p_sgid_tbl[0]; 225 226 /* If the port is already bound, skip */ 227 if (iser_ib_get_bind( 228 idm_svc->is_iser_svc, hca->hca_guid, gid) == NULL) { 229 230 status = iser_ib_activate_port( 231 idm_svc, hca->hca_guid, gid); 232 if (status != IBT_SUCCESS) { 233 ISER_LOG(CE_NOTE, 234 "iser_ib_bind_service: " 235 "iser_ib_activate_port failure " 236 "(0x%x)", status); 237 continue; 238 } 239 } 240 num_binds++; 241 } 242 } 243 mutex_exit(&iser_state->is_hcalist_lock); 244 245 if (num_binds) { 246 ISER_LOG(CE_NOTE, "iser_ib_bind_service: Service available on " 247 "(%d) of (%d) ports", num_binds, num_ports); 248 return (ISER_STATUS_SUCCESS); 249 } else { 250 ISER_LOG(CE_NOTE, "iser_ib_bind_service: Did not bind service"); 251 return (ISER_STATUS_FAIL); 252 } 253 } 254 255 /* 256 * iser_ib_unbind_service 257 * 258 * This function unbinds a given service on a all HCA ports 259 */ 260 void 261 iser_ib_unbind_service(idm_svc_t *idm_svc) 262 { 263 iser_svc_t *iser_svc; 264 iser_sbind_t *is_sbind, *next_sb; 265 266 if (idm_svc != NULL && idm_svc->is_iser_svc != NULL) { 267 268 iser_svc = idm_svc->is_iser_svc; 269 270 for (is_sbind = list_head(&iser_svc->is_sbindlist); 271 is_sbind != NULL; 272 is_sbind = next_sb) { 273 next_sb = list_next(&iser_svc->is_sbindlist, is_sbind); 274 ibt_unbind_service(iser_svc->is_srvhdl, 275 is_sbind->is_sbindhdl); 276 list_remove(&iser_svc->is_sbindlist, is_sbind); 277 kmem_free(is_sbind, sizeof (iser_sbind_t)); 278 } 279 } 280 } 281 282 /* ARGSUSED */ 283 void 284 iser_ib_deregister_service(idm_svc_t *idm_svc) 285 { 286 iser_svc_t *iser_svc; 287 288 if (idm_svc != NULL && idm_svc->is_iser_svc != NULL) { 289 290 iser_svc = (iser_svc_t *)idm_svc->is_iser_svc; 291 ibt_deregister_service(iser_state->is_ibhdl, 292 iser_svc->is_srvhdl); 293 ibt_release_ip_sid(iser_svc->is_svcid); 294 } 295 } 296 297 /* 298 * iser_ib_get_paths 299 * This function finds the IB path between the local and the remote address. 300 * 301 */ 302 int 303 iser_ib_get_paths(ibt_ip_addr_t *local_ip, ibt_ip_addr_t *remote_ip, 304 ibt_path_info_t *path, ibt_path_ip_src_t *path_src_ip) 305 { 306 ibt_ip_path_attr_t ipattr; 307 int status; 308 309 (void) bzero(&ipattr, sizeof (ibt_ip_path_attr_t)); 310 ipattr.ipa_dst_ip = remote_ip; 311 ipattr.ipa_src_ip = *local_ip; 312 ipattr.ipa_max_paths = 1; 313 ipattr.ipa_ndst = 1; 314 315 (void) bzero(path, sizeof (ibt_path_info_t)); 316 status = ibt_get_ip_paths(iser_state->is_ibhdl, IBT_PATH_NO_FLAGS, 317 &ipattr, path, NULL, path_src_ip); 318 if (status != IBT_SUCCESS) { 319 ISER_LOG(CE_NOTE, "ibt_get_ip_paths: ibt_get_ip_paths " 320 "failure: status (%d)", status); 321 return (status); 322 } 323 324 if (local_ip != NULL) { 325 ISER_LOG(CE_NOTE, "iser_ib_get_paths success: IP[%x to %x]", 326 local_ip->un.ip4addr, remote_ip->un.ip4addr); 327 } else { 328 ISER_LOG(CE_NOTE, "iser_ib_get_paths success: " 329 "IP[INADDR_ANY to %x]", remote_ip->un.ip4addr); 330 } 331 332 return (ISER_STATUS_SUCCESS); 333 } 334 335 /* 336 * iser_ib_alloc_rc_channel 337 * 338 * This function allocates a reliable communication channel using the specified 339 * channel attributes. 340 */ 341 iser_chan_t * 342 iser_ib_alloc_rc_channel(ibt_ip_addr_t *local_ip, ibt_ip_addr_t *remote_ip) 343 { 344 345 iser_chan_t *chan; 346 ib_gid_t lgid; 347 uint8_t hca_port; /* from path */ 348 iser_hca_t *hca; 349 ibt_path_ip_src_t path_src_ip; 350 ibt_rc_chan_alloc_args_t chanargs; 351 uint_t sq_size, rq_size; 352 int status; 353 354 chan = kmem_zalloc(sizeof (iser_chan_t), KM_SLEEP); 355 356 mutex_init(&chan->ic_lock, NULL, MUTEX_DRIVER, NULL); 357 mutex_init(&chan->ic_sq_post_lock, NULL, MUTEX_DRIVER, NULL); 358 359 /* Lookup a path to the given destination */ 360 status = iser_ib_get_paths(local_ip, remote_ip, &chan->ic_ibt_path, 361 &path_src_ip); 362 363 if (status != ISER_STATUS_SUCCESS) { 364 ISER_LOG(CE_NOTE, "iser_ib_get_paths failed: status (%d)", 365 status); 366 mutex_destroy(&chan->ic_lock); 367 mutex_destroy(&chan->ic_sq_post_lock); 368 kmem_free(chan, sizeof (iser_chan_t)); 369 return (NULL); 370 } 371 372 /* get the local gid from the path info */ 373 lgid = chan->ic_ibt_path.pi_prim_cep_path.cep_adds_vect.av_sgid; 374 375 /* get the hca port from the path info */ 376 hca_port = chan->ic_ibt_path.pi_prim_cep_path.cep_hca_port_num; 377 378 /* Lookup the hca using the gid in the path info */ 379 hca = iser_ib_gid2hca(lgid); 380 if (hca == NULL) { 381 ISER_LOG(CE_NOTE, "iser_ib_alloc_rc_channel: failed " 382 "to lookup HCA handle"); 383 mutex_destroy(&chan->ic_lock); 384 mutex_destroy(&chan->ic_sq_post_lock); 385 kmem_free(chan, sizeof (iser_chan_t)); 386 return (NULL); 387 } 388 389 /* Set up the iSER channel handle with HCA and IP data */ 390 chan->ic_hca = hca; 391 chan->ic_localip = path_src_ip.ip_primary; 392 chan->ic_remoteip = *remote_ip; 393 394 /* 395 * Determine the queue sizes, based upon the HCA query data. 396 * For our Work Queues, we will use either our default value, 397 * or the HCA's maximum value, whichever is smaller. 398 */ 399 sq_size = min(hca->hca_attr.hca_max_chan_sz, ISER_IB_SENDQ_SIZE); 400 rq_size = min(hca->hca_attr.hca_max_chan_sz, ISER_IB_RECVQ_SIZE); 401 402 /* 403 * For our Completion Queues, we again check the device maximum. 404 * We want to end up with CQs that are the next size up from the 405 * WQs they are servicing so that they have some overhead. 406 */ 407 if (hca->hca_attr.hca_max_cq_sz >= (sq_size + 1)) { 408 chan->ic_sendcq_sz = sq_size + 1; 409 } else { 410 chan->ic_sendcq_sz = hca->hca_attr.hca_max_cq_sz; 411 sq_size = chan->ic_sendcq_sz - 1; 412 } 413 414 if (hca->hca_attr.hca_max_cq_sz >= (rq_size + 1)) { 415 chan->ic_recvcq_sz = rq_size + 1; 416 } else { 417 chan->ic_recvcq_sz = hca->hca_attr.hca_max_cq_sz; 418 rq_size = chan->ic_recvcq_sz - 1; 419 } 420 421 /* Initialize the iSER channel's QP handle */ 422 iser_ib_init_qp(chan, sq_size, rq_size); 423 424 /* Set up the Send Completion Queue */ 425 status = iser_ib_setup_cq(hca->hca_hdl, chan->ic_sendcq_sz, 426 &chan->ic_sendcq); 427 if (status != ISER_STATUS_SUCCESS) { 428 iser_ib_fini_qp(&chan->ic_qp); 429 mutex_destroy(&chan->ic_lock); 430 mutex_destroy(&chan->ic_sq_post_lock); 431 kmem_free(chan, sizeof (iser_chan_t)); 432 return (NULL); 433 } 434 ibt_set_cq_handler(chan->ic_sendcq, iser_ib_sendcq_handler, chan); 435 ibt_enable_cq_notify(chan->ic_sendcq, IBT_NEXT_COMPLETION); 436 437 /* Set up the Receive Completion Queue */ 438 status = iser_ib_setup_cq(hca->hca_hdl, chan->ic_recvcq_sz, 439 &chan->ic_recvcq); 440 if (status != ISER_STATUS_SUCCESS) { 441 (void) ibt_free_cq(chan->ic_sendcq); 442 iser_ib_fini_qp(&chan->ic_qp); 443 mutex_destroy(&chan->ic_lock); 444 mutex_destroy(&chan->ic_sq_post_lock); 445 kmem_free(chan, sizeof (iser_chan_t)); 446 return (NULL); 447 } 448 ibt_set_cq_handler(chan->ic_recvcq, iser_ib_recvcq_handler, chan); 449 ibt_enable_cq_notify(chan->ic_recvcq, IBT_NEXT_COMPLETION); 450 451 /* Setup the channel arguments */ 452 iser_ib_setup_chanargs(hca_port, chan->ic_sendcq, chan->ic_recvcq, 453 sq_size, rq_size, hca->hca_pdhdl, &chanargs); 454 455 status = ibt_alloc_rc_channel(hca->hca_hdl, 456 IBT_ACHAN_NO_FLAGS, &chanargs, &chan->ic_chanhdl, NULL); 457 if (status != IBT_SUCCESS) { 458 ISER_LOG(CE_NOTE, "iser_ib_alloc_rc_channel: failed " 459 "ibt_alloc_rc_channel: status (%d)", status); 460 (void) ibt_free_cq(chan->ic_sendcq); 461 (void) ibt_free_cq(chan->ic_recvcq); 462 iser_ib_fini_qp(&chan->ic_qp); 463 mutex_destroy(&chan->ic_lock); 464 mutex_destroy(&chan->ic_sq_post_lock); 465 kmem_free(chan, sizeof (iser_chan_t)); 466 return (NULL); 467 } 468 469 /* Set the 'channel' as the client private data */ 470 (void) ibt_set_chan_private(chan->ic_chanhdl, chan); 471 472 ISER_LOG(CE_NOTE, "iser_ib_alloc_rc_channel success: " 473 "chanhdl (0x%p), IP:[%llx to %llx], lgid (%llx:%llx), HCA(%llx) %d", 474 (void *)chan->ic_chanhdl, 475 (longlong_t)local_ip->un.ip4addr, 476 (longlong_t)remote_ip->un.ip4addr, 477 (longlong_t)lgid.gid_prefix, (longlong_t)lgid.gid_guid, 478 (longlong_t)hca->hca_guid, hca_port); 479 480 return (chan); 481 } 482 483 /* 484 * iser_ib_open_rc_channel 485 * This function opens a RC connection on the given allocated RC channel 486 */ 487 int 488 iser_ib_open_rc_channel(iser_chan_t *chan) 489 { 490 ibt_ip_cm_info_t ipcm_info; 491 iser_private_data_t iser_priv_data; 492 ibt_chan_open_args_t ocargs; 493 ibt_rc_returns_t ocreturns; 494 int status; 495 496 mutex_enter(&chan->ic_lock); 497 498 /* 499 * For connection establishment, the initiator sends a CM REQ using the 500 * iSER RDMA-Aware Service ID. Included are the source and destination 501 * IP addresses, and the src port. 502 */ 503 bzero(&ipcm_info, sizeof (ibt_ip_cm_info_t)); 504 ipcm_info.src_addr = chan->ic_localip; 505 ipcm_info.dst_addr = chan->ic_remoteip; 506 ipcm_info.src_port = chan->ic_lport; 507 508 /* 509 * The CM Private Data field defines the iSER connection parameters 510 * such as zero based virtual address exception (ZBVAE) and Send with 511 * invalidate Exception (SIE). 512 * 513 * Solaris IBT does not currently support ZBVAE or SIE. 514 */ 515 iser_priv_data.rsvd1 = 0; 516 iser_priv_data.sie = 1; 517 iser_priv_data.zbvae = 1; 518 519 status = ibt_format_ip_private_data(&ipcm_info, 520 sizeof (iser_private_data_t), &iser_priv_data); 521 if (status != IBT_SUCCESS) { 522 ISER_LOG(CE_NOTE, "iser_ib_open_rc_channel failed: %d", status); 523 mutex_exit(&chan->ic_lock); 524 return (status); 525 } 526 527 /* 528 * Set the SID we are attempting to connect to, based upon the 529 * remote port number. 530 */ 531 chan->ic_ibt_path.pi_sid = ibt_get_ip_sid(IPPROTO_TCP, chan->ic_rport); 532 533 /* Set up the args for the channel open */ 534 bzero(&ocargs, sizeof (ibt_chan_open_args_t)); 535 ocargs.oc_path = &chan->ic_ibt_path; 536 ocargs.oc_cm_handler = iser_ib_cm_handler; 537 ocargs.oc_cm_clnt_private = iser_state; 538 ocargs.oc_rdma_ra_out = 4; 539 ocargs.oc_rdma_ra_in = 4; 540 ocargs.oc_path_retry_cnt = 2; 541 ocargs.oc_path_rnr_retry_cnt = 2; 542 ocargs.oc_priv_data_len = sizeof (iser_private_data_t); 543 ocargs.oc_priv_data = &iser_priv_data; 544 545 bzero(&ocreturns, sizeof (ibt_rc_returns_t)); 546 547 status = ibt_open_rc_channel(chan->ic_chanhdl, 548 IBT_OCHAN_NO_FLAGS, IBT_BLOCKING, &ocargs, &ocreturns); 549 550 if (status != IBT_SUCCESS) { 551 ISER_LOG(CE_NOTE, "iser_ib_open_rc_channel failed: %d", status); 552 mutex_exit(&chan->ic_lock); 553 return (status); 554 } 555 556 mutex_exit(&chan->ic_lock); 557 return (IDM_STATUS_SUCCESS); 558 } 559 560 /* 561 * iser_ib_close_rc_channel 562 * This function closes the RC channel related to this iser_chan handle. 563 * We invoke this in a non-blocking, no callbacks context. 564 */ 565 void 566 iser_ib_close_rc_channel(iser_chan_t *chan) 567 { 568 int status; 569 570 mutex_enter(&chan->ic_lock); 571 status = ibt_close_rc_channel(chan->ic_chanhdl, IBT_BLOCKING, NULL, 572 0, NULL, NULL, 0); 573 if (status != IBT_SUCCESS) { 574 ISER_LOG(CE_NOTE, "iser_ib_close_rc_channel: " 575 "ibt_close_rc_channel failed: status (%d)", status); 576 } 577 mutex_exit(&chan->ic_lock); 578 } 579 580 /* 581 * iser_ib_free_rc_channel 582 * 583 * This function tears down an RC channel's QP initialization and frees it. 584 * Note that we do not need synchronization here; the channel has been 585 * closed already, so we should only have completion polling occuring. Once 586 * complete, we are free to free the IBTF channel, WQ and CQ resources, and 587 * our own related resources. 588 */ 589 void 590 iser_ib_free_rc_channel(iser_chan_t *chan) 591 { 592 iser_qp_t *iser_qp; 593 594 iser_qp = &chan->ic_qp; 595 596 /* Ensure the SQ is empty */ 597 while (chan->ic_sq_post_count != 0) { 598 mutex_exit(&chan->ic_conn->ic_lock); 599 delay(drv_usectohz(ISER_DELAY_HALF_SECOND)); 600 mutex_enter(&chan->ic_conn->ic_lock); 601 } 602 mutex_destroy(&chan->ic_sq_post_lock); 603 604 /* Ensure the RQ is empty */ 605 (void) ibt_flush_channel(chan->ic_chanhdl); 606 mutex_enter(&iser_qp->qp_lock); 607 while (iser_qp->rq_level != 0) { 608 mutex_exit(&iser_qp->qp_lock); 609 mutex_exit(&chan->ic_conn->ic_lock); 610 delay(drv_usectohz(ISER_DELAY_HALF_SECOND)); 611 mutex_enter(&chan->ic_conn->ic_lock); 612 mutex_enter(&iser_qp->qp_lock); 613 } 614 615 /* Free our QP handle */ 616 mutex_exit(&iser_qp->qp_lock); 617 (void) iser_ib_fini_qp(iser_qp); 618 619 /* Free the IBT channel resources */ 620 (void) ibt_free_channel(chan->ic_chanhdl); 621 chan->ic_chanhdl = NULL; 622 623 /* Free the CQs */ 624 ibt_free_cq(chan->ic_sendcq); 625 ibt_free_cq(chan->ic_recvcq); 626 627 /* Free the chan handle */ 628 mutex_destroy(&chan->ic_lock); 629 kmem_free(chan, sizeof (iser_chan_t)); 630 } 631 632 /* 633 * iser_ib_post_recv 634 * 635 * This function handles keeping the RQ full on a given channel. 636 * This routine will mostly be run on a taskq, and will check the 637 * current fill level of the RQ, and post as many WRs as necessary 638 * to fill it again. 639 */ 640 641 int 642 iser_ib_post_recv_async(ibt_channel_hdl_t chanhdl) 643 { 644 iser_chan_t *chan; 645 int status; 646 647 /* Pull our iSER channel handle from the private data */ 648 chan = (iser_chan_t *)ibt_get_chan_private(chanhdl); 649 650 idm_conn_hold(chan->ic_conn->ic_idmc); 651 status = ddi_taskq_dispatch(iser_taskq, iser_ib_post_recv_task, 652 (void *)chanhdl, DDI_NOSLEEP); 653 if (status != DDI_SUCCESS) { 654 idm_conn_rele(chan->ic_conn->ic_idmc); 655 } 656 657 return (status); 658 } 659 660 static void 661 iser_ib_post_recv_task(void *arg) 662 { 663 ibt_channel_hdl_t chanhdl = arg; 664 iser_chan_t *chan; 665 666 /* Pull our iSER channel handle from the private data */ 667 chan = (iser_chan_t *)ibt_get_chan_private(chanhdl); 668 669 iser_ib_post_recv(chanhdl); 670 idm_conn_rele(chan->ic_conn->ic_idmc); 671 } 672 673 void 674 iser_ib_post_recv(ibt_channel_hdl_t chanhdl) 675 { 676 iser_chan_t *chan; 677 iser_hca_t *hca; 678 iser_msg_t *msg; 679 ibt_recv_wr_t *wrlist, wr[ISER_IB_RQ_POST_MAX]; 680 int rq_space, msg_ret; 681 int total_num, npost; 682 uint_t nposted; 683 int status, i; 684 iser_qp_t *iser_qp; 685 ib_gid_t lgid; 686 687 /* Pull our iSER channel handle from the private data */ 688 chan = (iser_chan_t *)ibt_get_chan_private(chanhdl); 689 690 ASSERT(chan != NULL); 691 692 mutex_enter(&chan->ic_conn->ic_lock); 693 694 /* Bail out if the connection is closed; no need for more recv WRs */ 695 if ((chan->ic_conn->ic_stage == ISER_CONN_STAGE_CLOSING) || 696 (chan->ic_conn->ic_stage == ISER_CONN_STAGE_CLOSED)) { 697 mutex_exit(&chan->ic_conn->ic_lock); 698 return; 699 } 700 701 /* get the QP handle from the iser_chan */ 702 iser_qp = &chan->ic_qp; 703 704 /* get the local gid from the path info */ 705 lgid = chan->ic_ibt_path.pi_prim_cep_path.cep_adds_vect.av_sgid; 706 707 /* get the hca port from the path info */ 708 hca = iser_ib_gid2hca(lgid); 709 if (hca == NULL) { 710 ISER_LOG(CE_NOTE, "iser_ib_post_recv: unable to retrieve " 711 "HCA handle"); 712 mutex_exit(&chan->ic_conn->ic_lock); 713 return; 714 } 715 716 /* check for space to post on the RQ */ 717 mutex_enter(&iser_qp->qp_lock); 718 rq_space = iser_qp->rq_depth - iser_qp->rq_level; 719 if (rq_space == 0) { 720 /* The RQ is full, clear the pending flag and return */ 721 iser_qp->rq_taskqpending = B_FALSE; 722 mutex_exit(&iser_qp->qp_lock); 723 mutex_exit(&chan->ic_conn->ic_lock); 724 return; 725 } 726 727 /* Keep track of the lowest value for rq_min_post_level */ 728 if (iser_qp->rq_level < iser_qp->rq_min_post_level) 729 iser_qp->rq_min_post_level = iser_qp->rq_level; 730 731 mutex_exit(&iser_qp->qp_lock); 732 733 /* we've room to post, so pull from the msg cache */ 734 msg = iser_msg_get(hca, rq_space, &msg_ret); 735 if (msg == NULL) { 736 ISER_LOG(CE_NOTE, "iser_ib_post_recv: no message handles " 737 "available in msg cache currently"); 738 /* 739 * There are no messages on the cache. Wait a half- 740 * second, then try again. 741 */ 742 delay(drv_usectohz(ISER_DELAY_HALF_SECOND)); 743 status = iser_ib_post_recv_async(chanhdl); 744 if (status != DDI_SUCCESS) { 745 ISER_LOG(CE_NOTE, "iser_ib_post_recv: failed to " 746 "redispatch routine"); 747 /* Failed to dispatch, clear pending flag */ 748 mutex_enter(&iser_qp->qp_lock); 749 iser_qp->rq_taskqpending = B_FALSE; 750 mutex_exit(&iser_qp->qp_lock); 751 } 752 mutex_exit(&chan->ic_conn->ic_lock); 753 return; 754 } 755 756 if (msg_ret != rq_space) { 757 ISER_LOG(CE_NOTE, "iser_ib_post_recv: requested number of " 758 "messages not allocated: requested (%d) allocated (%d)", 759 rq_space, msg_ret); 760 /* We got some, but not all, of our requested depth */ 761 rq_space = msg_ret; 762 } 763 764 /* 765 * Now, walk through the allocated WRs and post them, 766 * ISER_IB_RQ_POST_MAX (or less) at a time. 767 */ 768 wrlist = &wr[0]; 769 total_num = rq_space; 770 771 while (total_num) { 772 /* determine the number to post on this iteration */ 773 npost = (total_num > ISER_IB_RQ_POST_MAX) ? 774 ISER_IB_RQ_POST_MAX : total_num; 775 776 /* build a list of WRs from the msg list */ 777 for (i = 0; i < npost; i++) { 778 wrlist[i].wr_id = (ibt_wrid_t)(uintptr_t)msg; 779 wrlist[i].wr_nds = ISER_IB_SGLIST_SIZE; 780 wrlist[i].wr_sgl = &msg->msg_ds; 781 msg = msg->nextp; 782 } 783 784 /* post the list to the RQ */ 785 nposted = 0; 786 status = ibt_post_recv(chanhdl, wrlist, npost, &nposted); 787 if ((status != IBT_SUCCESS) || (nposted != npost)) { 788 ISER_LOG(CE_NOTE, "iser_ib_post_recv: ibt_post_recv " 789 "failed: requested (%d) posted (%d) status (%d)", 790 npost, nposted, status); 791 total_num -= nposted; 792 break; 793 } 794 795 /* decrement total number to post by the number posted */ 796 total_num -= nposted; 797 } 798 799 mutex_enter(&iser_qp->qp_lock); 800 if (total_num != 0) { 801 ISER_LOG(CE_NOTE, "iser_ib_post_recv: unable to fill RQ, " 802 "failed to post (%d) WRs", total_num); 803 iser_qp->rq_level += rq_space - total_num; 804 } else { 805 iser_qp->rq_level += rq_space; 806 } 807 808 /* 809 * Now that we've filled the RQ, check that all of the recv WRs 810 * haven't just been immediately consumed. If so, taskqpending is 811 * still B_TRUE, so we need to fire off a taskq thread to post 812 * more WRs. 813 */ 814 if (iser_qp->rq_level == 0) { 815 mutex_exit(&iser_qp->qp_lock); 816 status = iser_ib_post_recv_async(chanhdl); 817 if (status != DDI_SUCCESS) { 818 ISER_LOG(CE_NOTE, "iser_ib_post_recv: failed to " 819 "dispatch followup routine"); 820 /* Failed to dispatch, clear pending flag */ 821 mutex_enter(&iser_qp->qp_lock); 822 iser_qp->rq_taskqpending = B_FALSE; 823 mutex_exit(&iser_qp->qp_lock); 824 } 825 } else { 826 /* 827 * We're done, we've filled the RQ. Clear the taskq 828 * flag so that we can run again. 829 */ 830 iser_qp->rq_taskqpending = B_FALSE; 831 mutex_exit(&iser_qp->qp_lock); 832 } 833 834 mutex_exit(&chan->ic_conn->ic_lock); 835 } 836 837 /* 838 * iser_ib_handle_portup_event() 839 * This handles the IBT_EVENT_PORT_UP unaffiliated asynchronous event. 840 * 841 * To facilitate a seamless bringover of the port and configure the CM service 842 * for inbound iSER service requests on this newly active port, the existing 843 * IDM services will be checked for iSER support. 844 * If an iSER service was already created, then this service will simply be 845 * bound to the gid of the newly active port. If on the other hand, the CM 846 * service did not exist, i.e. only socket communication, then a new CM 847 * service will be first registered with the saved service parameters and 848 * then bound to the newly active port. 849 * 850 */ 851 /* ARGSUSED */ 852 static void 853 iser_ib_handle_portup_event(ibt_hca_hdl_t hdl, ibt_async_event_t *event) 854 { 855 iser_hca_t *hca; 856 ib_gid_t gid; 857 idm_svc_t *idm_svc; 858 int status; 859 860 ISER_LOG(CE_NOTE, "iser_ib_handle_portup_event: HCA(0x%llx) port(%d)", 861 (longlong_t)event->ev_hca_guid, event->ev_port); 862 863 /* 864 * Query all ports on the HCA and update the port information 865 * maintainted in the iser_hca_t structure 866 */ 867 hca = iser_ib_guid2hca(event->ev_hca_guid); 868 if (hca == NULL) { 869 870 /* HCA is just made available, first port on that HCA */ 871 hca = iser_ib_alloc_hca(event->ev_hca_guid); 872 873 mutex_enter(&iser_state->is_hcalist_lock); 874 list_insert_tail(&iser_state->is_hcalist, hca); 875 iser_state->is_num_hcas++; 876 mutex_exit(&iser_state->is_hcalist_lock); 877 878 } else { 879 880 status = iser_ib_update_hcaports(hca); 881 882 if (status != IBT_SUCCESS) { 883 ISER_LOG(CE_NOTE, "iser_ib_handle_portup_event " 884 "status(0x%x): iser_ib_update_hcaports failed: " 885 "HCA(0x%llx) port(%d)", status, 886 (longlong_t)event->ev_hca_guid, event->ev_port); 887 return; 888 } 889 } 890 891 gid = hca->hca_port_info[event->ev_port - 1].p_sgid_tbl[0]; 892 893 /* 894 * Iterate through the global list of IDM target services 895 * and check for existing iSER CM service. 896 */ 897 mutex_enter(&idm.idm_global_mutex); 898 for (idm_svc = list_head(&idm.idm_tgt_svc_list); 899 idm_svc != NULL; 900 idm_svc = list_next(&idm.idm_tgt_svc_list, idm_svc)) { 901 902 903 if (idm_svc->is_iser_svc == NULL) { 904 905 /* Establish a new CM service for iSER requests */ 906 status = iser_tgt_svc_create( 907 &idm_svc->is_svc_req, idm_svc); 908 909 if (status != IBT_SUCCESS) { 910 ISER_LOG(CE_NOTE, "iser_ib_handle_portup_event " 911 "status(0x%x): iser_tgt_svc_create failed: " 912 "HCA(0x%llx) port(%d)", status, 913 (longlong_t)event->ev_hca_guid, 914 event->ev_port); 915 916 continue; 917 } 918 } 919 920 status = iser_ib_activate_port( 921 idm_svc, event->ev_hca_guid, gid); 922 if (status != IBT_SUCCESS) { 923 924 ISER_LOG(CE_NOTE, "iser_ib_handle_portup_event " 925 "status(0x%x): Bind service on port " 926 "(%llx:%llx) failed", 927 status, (longlong_t)gid.gid_prefix, 928 (longlong_t)gid.gid_guid); 929 930 continue; 931 } 932 ISER_LOG(CE_NOTE, "iser_ib_handle_portup_event: service bound " 933 "HCA(0x%llx) port(%d)", (longlong_t)event->ev_hca_guid, 934 event->ev_port); 935 } 936 mutex_exit(&idm.idm_global_mutex); 937 938 ISER_LOG(CE_NOTE, "iser_ib_handle_portup_event success: " 939 "HCA(0x%llx) port(%d)", (longlong_t)event->ev_hca_guid, 940 event->ev_port); 941 } 942 943 /* 944 * iser_ib_handle_portdown_event() 945 * This handles the IBT_EVENT_PORT_DOWN unaffiliated asynchronous error. 946 * 947 * Unconfigure the CM service on the deactivated port and teardown the 948 * connections that are using the CM service. 949 */ 950 /* ARGSUSED */ 951 static void 952 iser_ib_handle_portdown_event(ibt_hca_hdl_t hdl, ibt_async_event_t *event) 953 { 954 iser_hca_t *hca; 955 ib_gid_t gid; 956 int status; 957 958 /* 959 * Query all ports on the HCA and update the port information 960 * maintainted in the iser_hca_t structure 961 */ 962 hca = iser_ib_guid2hca(event->ev_hca_guid); 963 ASSERT(hca != NULL); 964 965 status = iser_ib_update_hcaports(hca); 966 if (status != IBT_SUCCESS) { 967 ISER_LOG(CE_NOTE, "iser_ib_handle_portdown_event status(0x%x): " 968 "ibt_ib_update_hcaports failed: HCA(0x%llx) port(%d)", 969 status, (longlong_t)event->ev_hca_guid, event->ev_port); 970 return; 971 } 972 973 /* get the gid of the new port */ 974 gid = hca->hca_port_info[event->ev_port - 1].p_sgid_tbl[0]; 975 iser_ib_deactivate_port(event->ev_hca_guid, gid); 976 977 ISER_LOG(CE_NOTE, "iser_ib_handle_portdown_event success: " 978 "HCA(0x%llx) port(%d)", (longlong_t)event->ev_hca_guid, 979 event->ev_port); 980 } 981 982 /* 983 * iser_ib_handle_hca_detach_event() 984 * Quiesce all activity bound for the port, teardown the connection, unbind 985 * iSER services on all ports and release the HCA handle. 986 */ 987 /* ARGSUSED */ 988 static void 989 iser_ib_handle_hca_detach_event(ibt_hca_hdl_t hdl, ibt_async_event_t *event) 990 { 991 iser_hca_t *nexthca, *hca; 992 int i, status; 993 994 ISER_LOG(CE_NOTE, "iser_ib_handle_hca_detach_event: HCA(0x%llx)", 995 (longlong_t)event->ev_hca_guid); 996 997 hca = iser_ib_guid2hca(event->ev_hca_guid); 998 for (i = 0; i < hca->hca_num_ports; i++) { 999 iser_ib_deactivate_port(hca->hca_guid, 1000 hca->hca_port_info[i].p_sgid_tbl[0]); 1001 } 1002 1003 /* 1004 * Update the HCA list maintained in the iser_state. Free the 1005 * resources allocated to the HCA, i.e. caches, protection domain 1006 */ 1007 mutex_enter(&iser_state->is_hcalist_lock); 1008 1009 for (hca = list_head(&iser_state->is_hcalist); 1010 hca != NULL; 1011 hca = nexthca) { 1012 1013 nexthca = list_next(&iser_state->is_hcalist, hca); 1014 1015 if (hca->hca_guid == event->ev_hca_guid) { 1016 1017 list_remove(&iser_state->is_hcalist, hca); 1018 iser_state->is_num_hcas--; 1019 1020 status = iser_ib_free_hca(hca); 1021 if (status != DDI_SUCCESS) { 1022 ISER_LOG(CE_WARN, "iser_ib_handle_hca_detach: " 1023 "Failed to free hca(%p)", (void *)hca); 1024 list_insert_tail(&iser_state->is_hcalist, hca); 1025 iser_state->is_num_hcas++; 1026 } 1027 /* No way to return status to IBT if this fails */ 1028 } 1029 } 1030 mutex_exit(&iser_state->is_hcalist_lock); 1031 1032 } 1033 1034 /* 1035 * iser_ib_async_handler 1036 * An IBT Asynchronous Event handler is registered it with the framework and 1037 * passed via the ibt_attach() routine. This function handles the following 1038 * asynchronous events. 1039 * IBT_EVENT_PORT_UP 1040 * IBT_ERROR_PORT_DOWN 1041 * IBT_HCA_ATTACH_EVENT 1042 * IBT_HCA_DETACH_EVENT 1043 */ 1044 /* ARGSUSED */ 1045 void 1046 iser_ib_async_handler(void *clntp, ibt_hca_hdl_t hdl, ibt_async_code_t code, 1047 ibt_async_event_t *event) 1048 { 1049 switch (code) { 1050 case IBT_EVENT_PORT_UP: 1051 iser_ib_handle_portup_event(hdl, event); 1052 break; 1053 1054 case IBT_ERROR_PORT_DOWN: 1055 iser_ib_handle_portdown_event(hdl, event); 1056 break; 1057 1058 case IBT_HCA_ATTACH_EVENT: 1059 /* 1060 * A new HCA device is available for use, ignore this 1061 * event because the corresponding IBT_EVENT_PORT_UP 1062 * events will get triggered and handled accordingly. 1063 */ 1064 break; 1065 1066 case IBT_HCA_DETACH_EVENT: 1067 iser_ib_handle_hca_detach_event(hdl, event); 1068 break; 1069 1070 default: 1071 break; 1072 } 1073 } 1074 1075 /* 1076 * iser_ib_init_hcas 1077 * 1078 * This function opens all the HCA devices, gathers the HCA state information 1079 * and adds the HCA handle for each HCA found in the iser_soft_state. 1080 */ 1081 static int 1082 iser_ib_init_hcas(void) 1083 { 1084 ib_guid_t *guid; 1085 int num_hcas; 1086 int i; 1087 iser_hca_t *hca; 1088 1089 /* Retrieve the HCA list */ 1090 num_hcas = ibt_get_hca_list(&guid); 1091 if (num_hcas == 0) { 1092 /* 1093 * This shouldn't happen, but might if we have all HCAs 1094 * detach prior to initialization. 1095 */ 1096 return (DDI_FAILURE); 1097 } 1098 1099 /* Initialize the hcalist lock */ 1100 mutex_init(&iser_state->is_hcalist_lock, NULL, MUTEX_DRIVER, NULL); 1101 1102 /* Create the HCA list */ 1103 list_create(&iser_state->is_hcalist, sizeof (iser_hca_t), 1104 offsetof(iser_hca_t, hca_node)); 1105 1106 for (i = 0; i < num_hcas; i++) { 1107 1108 ISER_LOG(CE_NOTE, "iser_ib_init_hcas: initializing HCA " 1109 "(0x%llx)", (longlong_t)guid[i]); 1110 1111 hca = iser_ib_alloc_hca(guid[i]); 1112 if (hca == NULL) { 1113 /* This shouldn't happen, teardown and fail */ 1114 (void) iser_ib_fini_hcas(); 1115 (void) ibt_free_hca_list(guid, num_hcas); 1116 return (DDI_FAILURE); 1117 } 1118 1119 mutex_enter(&iser_state->is_hcalist_lock); 1120 list_insert_tail(&iser_state->is_hcalist, hca); 1121 iser_state->is_num_hcas++; 1122 mutex_exit(&iser_state->is_hcalist_lock); 1123 1124 } 1125 1126 /* Free the IBT HCA list */ 1127 (void) ibt_free_hca_list(guid, num_hcas); 1128 1129 /* Check that we've initialized at least one HCA */ 1130 mutex_enter(&iser_state->is_hcalist_lock); 1131 if (list_is_empty(&iser_state->is_hcalist)) { 1132 ISER_LOG(CE_NOTE, "iser_ib_init_hcas: failed to initialize " 1133 "any HCAs"); 1134 1135 mutex_exit(&iser_state->is_hcalist_lock); 1136 (void) iser_ib_fini_hcas(); 1137 return (DDI_FAILURE); 1138 } 1139 mutex_exit(&iser_state->is_hcalist_lock); 1140 1141 return (DDI_SUCCESS); 1142 } 1143 1144 /* 1145 * iser_ib_fini_hcas 1146 * 1147 * Teardown the iSER HCA list initialized above. 1148 */ 1149 static int 1150 iser_ib_fini_hcas(void) 1151 { 1152 iser_hca_t *nexthca, *hca; 1153 int status; 1154 1155 mutex_enter(&iser_state->is_hcalist_lock); 1156 for (hca = list_head(&iser_state->is_hcalist); 1157 hca != NULL; 1158 hca = nexthca) { 1159 1160 nexthca = list_next(&iser_state->is_hcalist, hca); 1161 1162 list_remove(&iser_state->is_hcalist, hca); 1163 1164 status = iser_ib_free_hca(hca); 1165 if (status != IBT_SUCCESS) { 1166 ISER_LOG(CE_NOTE, "iser_ib_fini_hcas: failed to free " 1167 "HCA during fini"); 1168 list_insert_tail(&iser_state->is_hcalist, hca); 1169 return (DDI_FAILURE); 1170 } 1171 1172 iser_state->is_num_hcas--; 1173 1174 } 1175 mutex_exit(&iser_state->is_hcalist_lock); 1176 list_destroy(&iser_state->is_hcalist); 1177 mutex_destroy(&iser_state->is_hcalist_lock); 1178 1179 return (DDI_SUCCESS); 1180 } 1181 1182 /* 1183 * iser_ib_alloc_hca 1184 * 1185 * This function opens the given HCA device, gathers the HCA state information 1186 * and adds the HCA handle 1187 */ 1188 static iser_hca_t * 1189 iser_ib_alloc_hca(ib_guid_t guid) 1190 { 1191 iser_hca_t *hca; 1192 int status; 1193 1194 /* Allocate an iser_hca_t HCA handle */ 1195 hca = (iser_hca_t *)kmem_zalloc(sizeof (iser_hca_t), KM_SLEEP); 1196 1197 /* Open this HCA */ 1198 status = ibt_open_hca(iser_state->is_ibhdl, guid, &hca->hca_hdl); 1199 if (status != IBT_SUCCESS) { 1200 ISER_LOG(CE_NOTE, "iser_ib_alloc_hca: ibt_open_hca failed:" 1201 " guid (0x%llx) status (0x%x)", (longlong_t)guid, status); 1202 kmem_free(hca, sizeof (iser_hca_t)); 1203 return (NULL); 1204 } 1205 1206 hca->hca_guid = guid; 1207 hca->hca_clnt_hdl = iser_state->is_ibhdl; 1208 1209 /* Query the HCA */ 1210 status = ibt_query_hca(hca->hca_hdl, &hca->hca_attr); 1211 if (status != IBT_SUCCESS) { 1212 ISER_LOG(CE_NOTE, "iser_ib_alloc_hca: ibt_query_hca " 1213 "failure: guid (0x%llx) status (0x%x)", 1214 (longlong_t)guid, status); 1215 (void) ibt_close_hca(hca->hca_hdl); 1216 kmem_free(hca, sizeof (iser_hca_t)); 1217 return (NULL); 1218 } 1219 1220 /* Query all ports on the HCA */ 1221 status = ibt_query_hca_ports(hca->hca_hdl, 0, 1222 &hca->hca_port_info, &hca->hca_num_ports, 1223 &hca->hca_port_info_sz); 1224 if (status != IBT_SUCCESS) { 1225 ISER_LOG(CE_NOTE, "iser_ib_alloc_hca: " 1226 "ibt_query_hca_ports failure: guid (0x%llx) " 1227 "status (0x%x)", (longlong_t)guid, status); 1228 (void) ibt_close_hca(hca->hca_hdl); 1229 kmem_free(hca, sizeof (iser_hca_t)); 1230 return (NULL); 1231 } 1232 1233 /* Allocate a single PD on this HCA */ 1234 status = ibt_alloc_pd(hca->hca_hdl, IBT_PD_NO_FLAGS, 1235 &hca->hca_pdhdl); 1236 if (status != IBT_SUCCESS) { 1237 ISER_LOG(CE_NOTE, "iser_ib_alloc_hca: ibt_alloc_pd " 1238 "failure: guid (0x%llx) status (0x%x)", 1239 (longlong_t)guid, status); 1240 (void) ibt_close_hca(hca->hca_hdl); 1241 ibt_free_portinfo(hca->hca_port_info, hca->hca_port_info_sz); 1242 kmem_free(hca, sizeof (iser_hca_t)); 1243 return (NULL); 1244 } 1245 1246 /* Initialize the message and data MR caches for this HCA */ 1247 iser_init_hca_caches(hca); 1248 1249 return (hca); 1250 } 1251 1252 static int 1253 iser_ib_free_hca(iser_hca_t *hca) 1254 { 1255 int status; 1256 ibt_hca_portinfo_t *hca_port_info; 1257 uint_t hca_port_info_sz; 1258 1259 ASSERT(hca != NULL); 1260 if (hca->hca_failed) 1261 return (DDI_FAILURE); 1262 1263 hca_port_info = hca->hca_port_info; 1264 hca_port_info_sz = hca->hca_port_info_sz; 1265 1266 /* 1267 * Free the memory regions before freeing 1268 * the associated protection domain 1269 */ 1270 iser_fini_hca_caches(hca); 1271 1272 status = ibt_free_pd(hca->hca_hdl, hca->hca_pdhdl); 1273 if (status != IBT_SUCCESS) { 1274 ISER_LOG(CE_NOTE, "iser_ib_free_hca: failed to free PD " 1275 "status=0x%x", status); 1276 goto out_caches; 1277 } 1278 1279 status = ibt_close_hca(hca->hca_hdl); 1280 if (status != IBT_SUCCESS) { 1281 ISER_LOG(CE_NOTE, "iser_ib_fini_hcas: failed to close HCA " 1282 "status=0x%x", status); 1283 goto out_pd; 1284 } 1285 1286 ibt_free_portinfo(hca_port_info, hca_port_info_sz); 1287 1288 kmem_free(hca, sizeof (iser_hca_t)); 1289 return (DDI_SUCCESS); 1290 1291 /* 1292 * We only managed to partially tear down the HCA, try to put it back 1293 * like it was before returning. 1294 */ 1295 out_pd: 1296 status = ibt_alloc_pd(hca->hca_hdl, IBT_PD_NO_FLAGS, &hca->hca_pdhdl); 1297 if (status != IBT_SUCCESS) { 1298 hca->hca_failed = B_TRUE; 1299 /* Report error and exit */ 1300 ISER_LOG(CE_NOTE, "iser_ib_free_hca: could not re-alloc PD " 1301 "status=0x%x", status); 1302 return (DDI_FAILURE); 1303 } 1304 1305 out_caches: 1306 iser_init_hca_caches(hca); 1307 1308 return (DDI_FAILURE); 1309 } 1310 1311 static int 1312 iser_ib_update_hcaports(iser_hca_t *hca) 1313 { 1314 ibt_hca_portinfo_t *pinfop, *oldpinfop; 1315 uint_t size, oldsize, nport; 1316 int status; 1317 1318 ASSERT(hca != NULL); 1319 1320 status = ibt_query_hca_ports(hca->hca_hdl, 0, &pinfop, &nport, &size); 1321 if (status != IBT_SUCCESS) { 1322 ISER_LOG(CE_NOTE, "ibt_query_hca_ports failed: %d", status); 1323 return (status); 1324 } 1325 1326 oldpinfop = hca->hca_port_info; 1327 oldsize = hca->hca_port_info_sz; 1328 hca->hca_port_info = pinfop; 1329 hca->hca_port_info_sz = size; 1330 1331 (void) ibt_free_portinfo(oldpinfop, oldsize); 1332 1333 return (IBT_SUCCESS); 1334 } 1335 1336 /* 1337 * iser_ib_gid2hca 1338 * Given a gid, find the corresponding hca 1339 */ 1340 iser_hca_t * 1341 iser_ib_gid2hca(ib_gid_t gid) 1342 { 1343 1344 iser_hca_t *hca; 1345 int i; 1346 1347 mutex_enter(&iser_state->is_hcalist_lock); 1348 for (hca = list_head(&iser_state->is_hcalist); 1349 hca != NULL; 1350 hca = list_next(&iser_state->is_hcalist, hca)) { 1351 1352 for (i = 0; i < hca->hca_num_ports; i++) { 1353 if ((hca->hca_port_info[i].p_sgid_tbl[0].gid_prefix == 1354 gid.gid_prefix) && 1355 (hca->hca_port_info[i].p_sgid_tbl[0].gid_guid == 1356 gid.gid_guid)) { 1357 1358 mutex_exit(&iser_state->is_hcalist_lock); 1359 1360 return (hca); 1361 } 1362 } 1363 } 1364 mutex_exit(&iser_state->is_hcalist_lock); 1365 return (NULL); 1366 } 1367 1368 /* 1369 * iser_ib_guid2hca 1370 * Given a HCA guid, find the corresponding HCA 1371 */ 1372 iser_hca_t * 1373 iser_ib_guid2hca(ib_guid_t guid) 1374 { 1375 1376 iser_hca_t *hca; 1377 1378 mutex_enter(&iser_state->is_hcalist_lock); 1379 for (hca = list_head(&iser_state->is_hcalist); 1380 hca != NULL; 1381 hca = list_next(&iser_state->is_hcalist, hca)) { 1382 1383 if (hca->hca_guid == guid) { 1384 mutex_exit(&iser_state->is_hcalist_lock); 1385 return (hca); 1386 } 1387 } 1388 mutex_exit(&iser_state->is_hcalist_lock); 1389 return (NULL); 1390 } 1391 1392 /* 1393 * iser_ib_conv_sockaddr2ibtaddr 1394 * This function converts a socket address into the IBT format 1395 */ 1396 void iser_ib_conv_sockaddr2ibtaddr( 1397 idm_sockaddr_t *saddr, ibt_ip_addr_t *ibt_addr) 1398 { 1399 if (saddr == NULL) { 1400 ibt_addr->family = AF_UNSPEC; 1401 ibt_addr->un.ip4addr = 0; 1402 } else { 1403 switch (saddr->sin.sa_family) { 1404 case AF_INET: 1405 1406 ibt_addr->family = saddr->sin4.sin_family; 1407 ibt_addr->un.ip4addr = saddr->sin4.sin_addr.s_addr; 1408 break; 1409 1410 case AF_INET6: 1411 1412 ibt_addr->family = saddr->sin6.sin6_family; 1413 ibt_addr->un.ip6addr = saddr->sin6.sin6_addr; 1414 break; 1415 1416 default: 1417 ibt_addr->family = AF_UNSPEC; 1418 } 1419 1420 } 1421 } 1422 1423 /* 1424 * iser_ib_conv_ibtaddr2sockaddr 1425 * This function converts an IBT ip address handle to a sockaddr 1426 */ 1427 void iser_ib_conv_ibtaddr2sockaddr(struct sockaddr_storage *ss, 1428 ibt_ip_addr_t *ibt_addr, in_port_t port) 1429 { 1430 struct sockaddr_in *sin; 1431 struct sockaddr_in6 *sin6; 1432 1433 switch (ibt_addr->family) { 1434 case AF_INET: 1435 case AF_UNSPEC: 1436 1437 sin = (struct sockaddr_in *)ibt_addr; 1438 sin->sin_port = ntohs(port); 1439 bcopy(sin, ss, sizeof (struct sockaddr_in)); 1440 break; 1441 1442 case AF_INET6: 1443 1444 sin6 = (struct sockaddr_in6 *)ibt_addr; 1445 sin6->sin6_port = ntohs(port); 1446 bcopy(sin6, ss, sizeof (struct sockaddr_in6)); 1447 break; 1448 1449 default: 1450 ISER_LOG(CE_NOTE, "iser_ib_conv_ibtaddr2sockaddr: " 1451 "unknown family type: 0x%x", ibt_addr->family); 1452 } 1453 } 1454 1455 /* 1456 * iser_ib_setup_cq 1457 * This function sets up the Completion Queue size and allocates the specified 1458 * Completion Queue 1459 */ 1460 static int 1461 iser_ib_setup_cq(ibt_hca_hdl_t hca_hdl, uint_t cq_size, ibt_cq_hdl_t *cq_hdl) 1462 { 1463 1464 ibt_cq_attr_t cq_attr; 1465 int status; 1466 1467 cq_attr.cq_size = cq_size; 1468 cq_attr.cq_sched = 0; 1469 cq_attr.cq_flags = IBT_CQ_NO_FLAGS; 1470 1471 /* Allocate a Completion Queue */ 1472 status = ibt_alloc_cq(hca_hdl, &cq_attr, cq_hdl, NULL); 1473 if (status != IBT_SUCCESS) { 1474 ISER_LOG(CE_NOTE, "iser_ib_setup_cq: ibt_alloc_cq failure (%d)", 1475 status); 1476 return (status); 1477 } 1478 1479 return (ISER_STATUS_SUCCESS); 1480 } 1481 1482 /* 1483 * iser_ib_setup_chanargs 1484 * 1485 */ 1486 static void 1487 iser_ib_setup_chanargs(uint8_t hca_port, ibt_cq_hdl_t scq_hdl, 1488 ibt_cq_hdl_t rcq_hdl, uint_t sq_size, uint_t rq_size, 1489 ibt_pd_hdl_t hca_pdhdl, ibt_rc_chan_alloc_args_t *cargs) 1490 { 1491 1492 bzero(cargs, sizeof (ibt_rc_chan_alloc_args_t)); 1493 1494 /* 1495 * Set up the size of the channels send queue, receive queue and the 1496 * maximum number of elements in a scatter gather list of work requests 1497 * posted to the send and receive queues. 1498 */ 1499 cargs->rc_sizes.cs_sq = sq_size; 1500 cargs->rc_sizes.cs_rq = rq_size; 1501 cargs->rc_sizes.cs_sq_sgl = ISER_IB_SGLIST_SIZE; 1502 cargs->rc_sizes.cs_rq_sgl = ISER_IB_SGLIST_SIZE; 1503 1504 /* 1505 * All Work requests signaled on a WR basis will receive a send 1506 * request completion. 1507 */ 1508 cargs->rc_flags = IBT_ALL_SIGNALED; 1509 1510 /* Enable RDMA read and RDMA write on the channel end points */ 1511 cargs->rc_control = IBT_CEP_RDMA_RD | IBT_CEP_RDMA_WR; 1512 1513 /* Set the local hca port on which the channel is allocated */ 1514 cargs->rc_hca_port_num = hca_port; 1515 1516 /* Set the Send and Receive Completion Queue handles */ 1517 cargs->rc_scq = scq_hdl; 1518 cargs->rc_rcq = rcq_hdl; 1519 1520 /* Set the protection domain associated with the channel */ 1521 cargs->rc_pd = hca_pdhdl; 1522 1523 /* No SRQ usage */ 1524 cargs->rc_srq = NULL; 1525 } 1526 1527 /* 1528 * iser_ib_init_qp 1529 * Initialize the QP handle 1530 */ 1531 void 1532 iser_ib_init_qp(iser_chan_t *chan, uint_t sq_size, uint_t rq_size) 1533 { 1534 /* Initialize the handle lock */ 1535 mutex_init(&chan->ic_qp.qp_lock, NULL, MUTEX_DRIVER, NULL); 1536 1537 /* Record queue sizes */ 1538 chan->ic_qp.sq_size = sq_size; 1539 chan->ic_qp.rq_size = rq_size; 1540 1541 /* Initialize the RQ monitoring data */ 1542 chan->ic_qp.rq_depth = rq_size; 1543 chan->ic_qp.rq_level = 0; 1544 chan->ic_qp.rq_lwm = (chan->ic_recvcq_sz * ISER_IB_RQ_LWM_PCT) / 100; 1545 1546 /* Initialize the taskq flag */ 1547 chan->ic_qp.rq_taskqpending = B_FALSE; 1548 } 1549 1550 /* 1551 * iser_ib_fini_qp 1552 * Teardown the QP handle 1553 */ 1554 void 1555 iser_ib_fini_qp(iser_qp_t *qp) 1556 { 1557 /* Destroy the handle lock */ 1558 mutex_destroy(&qp->qp_lock); 1559 } 1560 1561 static int 1562 iser_ib_activate_port(idm_svc_t *idm_svc, ib_guid_t guid, ib_gid_t gid) 1563 { 1564 iser_svc_t *iser_svc; 1565 iser_sbind_t *is_sbind; 1566 int status; 1567 1568 iser_svc = idm_svc->is_iser_svc; 1569 1570 /* 1571 * Save the address of the service bind handle in the 1572 * iser_svc_t to undo the service binding at a later time 1573 */ 1574 is_sbind = kmem_zalloc(sizeof (iser_sbind_t), KM_SLEEP); 1575 is_sbind->is_gid = gid; 1576 is_sbind->is_guid = guid; 1577 1578 status = ibt_bind_service(iser_svc->is_srvhdl, gid, NULL, 1579 idm_svc, &is_sbind->is_sbindhdl); 1580 1581 if (status != IBT_SUCCESS) { 1582 ISER_LOG(CE_NOTE, "iser_ib_activate_port: status(0x%x): " 1583 "Bind service(%llx) on port(%llx:%llx) failed", 1584 status, (longlong_t)iser_svc->is_svcid, 1585 (longlong_t)gid.gid_prefix, (longlong_t)gid.gid_guid); 1586 1587 kmem_free(is_sbind, sizeof (iser_sbind_t)); 1588 1589 return (status); 1590 } 1591 1592 list_insert_tail(&iser_svc->is_sbindlist, is_sbind); 1593 1594 return (IBT_SUCCESS); 1595 } 1596 1597 static void 1598 iser_ib_deactivate_port(ib_guid_t hca_guid, ib_gid_t gid) 1599 { 1600 iser_svc_t *iser_svc; 1601 iser_conn_t *iser_conn; 1602 iser_sbind_t *is_sbind; 1603 idm_conn_t *idm_conn; 1604 1605 /* 1606 * Iterate through the global list of IDM target connections. 1607 * Issue a TRANSPORT_FAIL for any connections on this port, and 1608 * if there is a bound service running on the port, tear it down. 1609 */ 1610 mutex_enter(&idm.idm_global_mutex); 1611 for (idm_conn = list_head(&idm.idm_tgt_conn_list); 1612 idm_conn != NULL; 1613 idm_conn = list_next(&idm.idm_tgt_conn_list, idm_conn)) { 1614 1615 if (idm_conn->ic_transport_type != IDM_TRANSPORT_TYPE_ISER) { 1616 /* this is not an iSER connection, skip it */ 1617 continue; 1618 } 1619 1620 iser_conn = idm_conn->ic_transport_private; 1621 if (iser_conn->ic_chan->ic_ibt_path.pi_hca_guid != hca_guid) { 1622 /* this iSER connection is on a different port */ 1623 continue; 1624 } 1625 1626 /* Fail the transport for this connection */ 1627 idm_conn_event(idm_conn, CE_TRANSPORT_FAIL, IDM_STATUS_FAIL); 1628 1629 if (idm_conn->ic_conn_type == CONN_TYPE_INI) { 1630 /* initiator connection, nothing else to do */ 1631 continue; 1632 } 1633 1634 /* Check for a service binding */ 1635 iser_svc = idm_conn->ic_svc_binding->is_iser_svc; 1636 is_sbind = iser_ib_get_bind(iser_svc, hca_guid, gid); 1637 if (is_sbind != NULL) { 1638 /* This service is still bound, tear it down */ 1639 ibt_unbind_service(iser_svc->is_srvhdl, 1640 is_sbind->is_sbindhdl); 1641 list_remove(&iser_svc->is_sbindlist, is_sbind); 1642 kmem_free(is_sbind, sizeof (iser_sbind_t)); 1643 } 1644 } 1645 mutex_exit(&idm.idm_global_mutex); 1646 } 1647 1648 static iser_sbind_t * 1649 iser_ib_get_bind(iser_svc_t *iser_svc, ib_guid_t hca_guid, ib_gid_t gid) 1650 { 1651 iser_sbind_t *is_sbind; 1652 1653 for (is_sbind = list_head(&iser_svc->is_sbindlist); 1654 is_sbind != NULL; 1655 is_sbind = list_next(&iser_svc->is_sbindlist, is_sbind)) { 1656 1657 if ((is_sbind->is_guid == hca_guid) && 1658 (is_sbind->is_gid.gid_prefix == gid.gid_prefix) && 1659 (is_sbind->is_gid.gid_guid == gid.gid_guid)) { 1660 return (is_sbind); 1661 } 1662 } 1663 return (NULL); 1664 } 1665