1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* 26 * Copyright (c) 2005 SilverStorm Technologies, Inc. All rights reserved. 27 * 28 * This software is available to you under a choice of one of two 29 * licenses. You may choose to be licensed under the terms of the GNU 30 * General Public License (GPL) Version 2, available from the file 31 * COPYING in the main directory of this source tree, or the 32 * OpenIB.org BSD license below: 33 * 34 * Redistribution and use in source and binary forms, with or 35 * without modification, are permitted provided that the following 36 * conditions are met: 37 * 38 * - Redistributions of source code must retain the above 39 * copyright notice, this list of conditions and the following 40 * disclaimer. 41 * 42 * - Redistributions in binary form must reproduce the above 43 * copyright notice, this list of conditions and the following 44 * disclaimer in the documentation and/or other materials 45 * provided with the distribution. 46 * 47 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 48 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 49 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 50 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 51 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 52 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 53 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 54 * SOFTWARE. 55 * 56 */ 57 /* 58 * Sun elects to include this software in Sun product 59 * under the OpenIB BSD license. 60 * 61 * 62 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 63 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 64 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 65 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 66 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 67 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 68 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 69 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 70 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 71 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 72 * POSSIBILITY OF SUCH DAMAGE. 73 */ 74 75 #pragma ident "%Z%%M% %I% %E% SMI" 76 77 #include <sys/types.h> 78 #include <sys/ddi.h> 79 #include <sys/sunddi.h> 80 #include <sys/ib/ibtl/ibti.h> 81 #include <sys/ib/ibtl/ibtl_types.h> 82 #include <sys/ib/clients/rds/rdsib_cm.h> 83 #include <sys/ib/clients/rds/rdsib_ib.h> 84 #include <sys/ib/clients/rds/rdsib_buf.h> 85 #include <sys/ib/clients/rds/rdsib_ep.h> 86 #include <sys/ib/clients/rds/rds_kstat.h> 87 88 static void rds_async_handler(void *clntp, ibt_hca_hdl_t hdl, 89 ibt_async_code_t code, ibt_async_event_t *event); 90 91 static struct ibt_clnt_modinfo_s rds_ib_modinfo = { 92 IBTI_V2, 93 IBT_NETWORK, 94 rds_async_handler, 95 NULL, 96 "RDS" 97 }; 98 99 /* performance tunables */ 100 uint_t rds_no_interrupts = 0; 101 uint_t rds_poll_percent_full = 25; 102 uint_t rds_wc_signal = IBT_NEXT_SOLICITED; 103 uint_t rds_waittime_ms = 100; /* ms */ 104 105 extern dev_info_t *rdsib_dev_info; 106 extern void rds_close_sessions(); 107 108 static void 109 rdsib_validate_chan_sizes(ibt_hca_attr_t *hattrp) 110 { 111 /* The SQ size should not be more than that supported by the HCA */ 112 if (((MaxDataSendBuffers + RDS_NUM_ACKS) > hattrp->hca_max_chan_sz) || 113 ((MaxDataSendBuffers + RDS_NUM_ACKS) > hattrp->hca_max_cq_sz)) { 114 RDS_DPRINTF0("RDSIB", "MaxDataSendBuffers + %d is greater " 115 "than that supported by the HCA driver " 116 "(%d + %d > %d or %d), lowering it to a supported value.", 117 RDS_NUM_ACKS, MaxDataSendBuffers, RDS_NUM_ACKS, 118 hattrp->hca_max_chan_sz, hattrp->hca_max_cq_sz); 119 120 MaxDataSendBuffers = (hattrp->hca_max_chan_sz > 121 hattrp->hca_max_cq_sz) ? 122 hattrp->hca_max_cq_sz - RDS_NUM_ACKS : 123 hattrp->hca_max_chan_sz - RDS_NUM_ACKS; 124 } 125 126 /* The RQ size should not be more than that supported by the HCA */ 127 if ((MaxDataRecvBuffers > hattrp->hca_max_chan_sz) || 128 (MaxDataRecvBuffers > hattrp->hca_max_cq_sz)) { 129 RDS_DPRINTF0("RDSIB", "MaxDataRecvBuffers is greater than that " 130 "supported by the HCA driver (%d > %d or %d), lowering it " 131 "to a supported value.", MaxDataRecvBuffers, 132 hattrp->hca_max_chan_sz, hattrp->hca_max_cq_sz); 133 134 MaxDataRecvBuffers = (hattrp->hca_max_chan_sz > 135 hattrp->hca_max_cq_sz) ? hattrp->hca_max_cq_sz : 136 hattrp->hca_max_chan_sz; 137 } 138 139 /* The SQ size should not be more than that supported by the HCA */ 140 if ((MaxCtrlSendBuffers > hattrp->hca_max_chan_sz) || 141 (MaxCtrlSendBuffers > hattrp->hca_max_cq_sz)) { 142 RDS_DPRINTF0("RDSIB", "MaxCtrlSendBuffers is greater than that " 143 "supported by the HCA driver (%d > %d or %d), lowering it " 144 "to a supported value.", MaxCtrlSendBuffers, 145 hattrp->hca_max_chan_sz, hattrp->hca_max_cq_sz); 146 147 MaxCtrlSendBuffers = (hattrp->hca_max_chan_sz > 148 hattrp->hca_max_cq_sz) ? hattrp->hca_max_cq_sz : 149 hattrp->hca_max_chan_sz; 150 } 151 152 /* The RQ size should not be more than that supported by the HCA */ 153 if ((MaxCtrlRecvBuffers > hattrp->hca_max_chan_sz) || 154 (MaxCtrlRecvBuffers > hattrp->hca_max_cq_sz)) { 155 RDS_DPRINTF0("RDSIB", "MaxCtrlRecvBuffers is greater than that " 156 "supported by the HCA driver (%d > %d or %d), lowering it " 157 "to a supported value.", MaxCtrlRecvBuffers, 158 hattrp->hca_max_chan_sz, hattrp->hca_max_cq_sz); 159 160 MaxCtrlRecvBuffers = (hattrp->hca_max_chan_sz > 161 hattrp->hca_max_cq_sz) ? hattrp->hca_max_cq_sz : 162 hattrp->hca_max_chan_sz; 163 } 164 165 /* The MaxRecvMemory should be less than that supported by the HCA */ 166 if ((MaxRecvMemory * 1024) > hattrp->hca_max_memr_len) { 167 RDS_DPRINTF0("RDSIB", "MaxRecvMemory is greater than that " 168 "supported by the HCA driver (%d > %d), lowering it to %d", 169 MaxRecvMemory, hattrp->hca_max_memr_len, 170 hattrp->hca_max_memr_len); 171 172 MaxRecvMemory = hattrp->hca_max_memr_len; 173 } 174 } 175 176 /* 177 * Called on open of first RDS socket 178 */ 179 int 180 rdsib_open_ib() 181 { 182 ib_guid_t *guidp; 183 rds_hca_t *hcap, *hcap1; 184 uint_t ix, hcaix, nhcas; 185 int ret; 186 187 RDS_DPRINTF4("rdsib_open_ib", "enter: statep %p", rdsib_statep); 188 189 ASSERT(rdsib_statep != NULL); 190 if (rdsib_statep == NULL) { 191 RDS_DPRINTF1("rdsib_open_ib", "RDS Statep not initialized"); 192 return (-1); 193 } 194 195 /* How many hcas are there? */ 196 nhcas = ibt_get_hca_list(&guidp); 197 if (nhcas == 0) { 198 RDS_DPRINTF2("rdsib_open_ib", "No IB HCAs Available"); 199 return (-1); 200 } 201 202 RDS_DPRINTF3("rdsib_open_ib", "Number of HCAs: %d", nhcas); 203 204 /* Register with IBTF */ 205 ret = ibt_attach(&rds_ib_modinfo, rdsib_dev_info, rdsib_statep, 206 &rdsib_statep->rds_ibhdl); 207 if (ret != IBT_SUCCESS) { 208 RDS_DPRINTF2(LABEL, "ibt_attach failed: %d", ret); 209 (void) ibt_free_hca_list(guidp, nhcas); 210 return (-1); 211 } 212 213 /* 214 * Open each HCA and gather its information. Don't care about HCAs 215 * that cannot be opened. It is OK as long as atleast one HCA can be 216 * opened. 217 * Initialize a HCA only if all the information is available. 218 */ 219 hcap1 = NULL; 220 for (ix = 0, hcaix = 0; ix < nhcas; ix++) { 221 RDS_DPRINTF3(LABEL, "Open HCA: 0x%llx", guidp[ix]); 222 223 hcap = (rds_hca_t *)kmem_zalloc(sizeof (rds_hca_t), KM_SLEEP); 224 225 ret = ibt_open_hca(rdsib_statep->rds_ibhdl, guidp[ix], 226 &hcap->hca_hdl); 227 if (ret != IBT_SUCCESS) { 228 RDS_DPRINTF2("rdsib_open_ib", 229 "ibt_open_hca: 0x%llx failed: %d", guidp[ix], ret); 230 kmem_free(hcap, sizeof (rds_hca_t)); 231 continue; 232 } 233 234 hcap->hca_guid = guidp[ix]; 235 236 ret = ibt_query_hca(hcap->hca_hdl, &hcap->hca_attr); 237 if (ret != IBT_SUCCESS) { 238 RDS_DPRINTF2("rdsib_open_ib", 239 "Query HCA: 0x%llx failed: %d", guidp[ix], ret); 240 ret = ibt_close_hca(hcap->hca_hdl); 241 ASSERT(ret == IBT_SUCCESS); 242 kmem_free(hcap, sizeof (rds_hca_t)); 243 continue; 244 } 245 246 ret = ibt_query_hca_ports(hcap->hca_hdl, 0, 247 &hcap->hca_pinfop, &hcap->hca_nports, &hcap->hca_pinfo_sz); 248 if (ret != IBT_SUCCESS) { 249 RDS_DPRINTF2("rdsib_open_ib", 250 "Query HCA 0x%llx ports failed: %d", guidp[ix], 251 ret); 252 ret = ibt_close_hca(hcap->hca_hdl); 253 ASSERT(ret == IBT_SUCCESS); 254 kmem_free(hcap, sizeof (rds_hca_t)); 255 continue; 256 } 257 258 /* Only one PD per HCA is allocated, so do it here */ 259 ret = ibt_alloc_pd(hcap->hca_hdl, IBT_PD_NO_FLAGS, 260 &hcap->hca_pdhdl); 261 if (ret != IBT_SUCCESS) { 262 RDS_DPRINTF2(LABEL, "ibt_alloc_pd 0x%llx failed: %d", 263 guidp[ix], ret); 264 (void) ibt_free_portinfo(hcap->hca_pinfop, 265 hcap->hca_pinfo_sz); 266 ret = ibt_close_hca(hcap->hca_hdl); 267 ASSERT(ret == IBT_SUCCESS); 268 kmem_free(hcap, sizeof (rds_hca_t)); 269 continue; 270 } 271 272 rdsib_validate_chan_sizes(&hcap->hca_attr); 273 274 /* this HCA is fully initialized, go to the next one */ 275 hcaix++; 276 hcap->hca_nextp = hcap1; 277 hcap1 = hcap; 278 } 279 280 /* free the HCA list, we are done with it */ 281 (void) ibt_free_hca_list(guidp, nhcas); 282 283 if (hcaix == 0) { 284 /* Failed to Initialize even one HCA */ 285 RDS_DPRINTF2("rdsib_open_ib", "No HCAs are initialized"); 286 (void) ibt_detach(rdsib_statep->rds_ibhdl); 287 rdsib_statep->rds_ibhdl = NULL; 288 return (-1); 289 } 290 291 if (hcaix < nhcas) { 292 RDS_DPRINTF2("rdsib_open_ib", "HCAs %d/%d failed to initialize", 293 (nhcas - hcaix), nhcas); 294 } 295 296 rdsib_statep->rds_hcalistp = hcap1; 297 rdsib_statep->rds_nhcas = hcaix; 298 299 /* register the RDS service */ 300 rdsib_statep->rds_srvhdl = 301 rds_register_service(rdsib_statep->rds_ibhdl); 302 if (rdsib_statep->rds_srvhdl == NULL) { 303 RDS_DPRINTF2("rdsib_open_ib", "Service registration failed"); 304 } else { 305 /* bind the service on all available ports */ 306 ret = rds_bind_service(rdsib_statep); 307 if (ret != 0) { 308 RDS_DPRINTF2("rdsib_open_ib", "Bind service failed"); 309 } 310 } 311 312 RDS_DPRINTF4("rdsib_open_ib", "return: statep %p", rdsib_statep); 313 314 return (0); 315 } 316 317 /* 318 * Called when all ports are closed. 319 */ 320 void 321 rdsib_close_ib() 322 { 323 rds_hca_t *hcap, *nextp; 324 int ret; 325 326 RDS_DPRINTF2("rds_close_ib", "enter: statep %p", rdsib_statep); 327 328 if (rdsib_statep->rds_srvhdl != NULL) { 329 (void) ibt_unbind_all_services(rdsib_statep->rds_srvhdl); 330 (void) ibt_deregister_service(rdsib_statep->rds_ibhdl, 331 rdsib_statep->rds_srvhdl); 332 } 333 334 /* close and destroy all the sessions */ 335 rds_close_sessions(NULL); 336 337 /* Release all HCA resources */ 338 rw_enter(&rdsib_statep->rds_hca_lock, RW_WRITER); 339 hcap = rdsib_statep->rds_hcalistp; 340 rdsib_statep->rds_hcalistp = NULL; 341 rdsib_statep->rds_nhcas = 0; 342 rw_exit(&rdsib_statep->rds_hca_lock); 343 344 while (hcap != NULL) { 345 nextp = hcap->hca_nextp; 346 347 ret = ibt_free_pd(hcap->hca_hdl, hcap->hca_pdhdl); 348 ASSERT(ret == IBT_SUCCESS); 349 350 (void) ibt_free_portinfo(hcap->hca_pinfop, hcap->hca_pinfo_sz); 351 352 ret = ibt_close_hca(hcap->hca_hdl); 353 ASSERT(ret == IBT_SUCCESS); 354 355 kmem_free(hcap, sizeof (rds_hca_t)); 356 hcap = nextp; 357 } 358 359 /* Deregister with IBTF */ 360 if (rdsib_statep->rds_ibhdl != NULL) { 361 (void) ibt_detach(rdsib_statep->rds_ibhdl); 362 rdsib_statep->rds_ibhdl = NULL; 363 } 364 365 RDS_DPRINTF2("rds_close_ib", "return: statep %p", rdsib_statep); 366 } 367 368 /* Return hcap, given the hca guid */ 369 rds_hca_t * 370 rds_get_hcap(rds_state_t *statep, ib_guid_t hca_guid) 371 { 372 rds_hca_t *hcap; 373 374 RDS_DPRINTF4("rds_get_hcap", "rds_get_hcap: Enter: statep: 0x%p " 375 "guid: %llx", statep, hca_guid); 376 377 rw_enter(&statep->rds_hca_lock, RW_READER); 378 379 hcap = statep->rds_hcalistp; 380 while ((hcap != NULL) && (hcap->hca_guid != hca_guid)) { 381 hcap = hcap->hca_nextp; 382 } 383 384 rw_exit(&statep->rds_hca_lock); 385 386 RDS_DPRINTF4("rds_get_hcap", "rds_get_hcap: return"); 387 388 return (hcap); 389 } 390 391 /* Return hcap, given a gid */ 392 rds_hca_t * 393 rds_gid_to_hcap(rds_state_t *statep, ib_gid_t gid) 394 { 395 rds_hca_t *hcap; 396 uint_t ix; 397 398 RDS_DPRINTF4("rds_gid_to_hcap", "Enter: statep: 0x%p gid: %llx:%llx", 399 statep, gid.gid_prefix, gid.gid_guid); 400 401 rw_enter(&statep->rds_hca_lock, RW_READER); 402 403 hcap = statep->rds_hcalistp; 404 while (hcap != NULL) { 405 for (ix = 0; ix < hcap->hca_nports; ix++) { 406 if ((hcap->hca_pinfop[ix].p_sgid_tbl[0].gid_prefix == 407 gid.gid_prefix) && 408 (hcap->hca_pinfop[ix].p_sgid_tbl[0].gid_guid == 409 gid.gid_guid)) { 410 RDS_DPRINTF4("rds_gid_to_hcap", 411 "gid found in hcap: 0x%p", hcap); 412 rw_exit(&statep->rds_hca_lock); 413 return (hcap); 414 } 415 } 416 hcap = hcap->hca_nextp; 417 } 418 419 rw_exit(&statep->rds_hca_lock); 420 421 return (NULL); 422 } 423 424 /* This is called from the send CQ handler */ 425 void 426 rds_send_acknowledgement(rds_ep_t *ep) 427 { 428 int ret; 429 uint_t ix; 430 431 RDS_DPRINTF4("rds_send_acknowledgement", "Enter EP(%p)", ep); 432 433 mutex_enter(&ep->ep_lock); 434 435 ASSERT(ep->ep_rdmacnt != 0); 436 437 /* 438 * The previous ACK completed successfully, send the next one 439 * if more messages were received after sending the last ACK 440 */ 441 if (ep->ep_rbufid != *(uintptr_t *)(uintptr_t)ep->ep_ackds.ds_va) { 442 *(uintptr_t *)(uintptr_t)ep->ep_ackds.ds_va = ep->ep_rbufid; 443 mutex_exit(&ep->ep_lock); 444 445 /* send acknowledgement */ 446 RDS_INCR_TXACKS(); 447 ret = ibt_post_send(ep->ep_chanhdl, &ep->ep_ackwr, 1, &ix); 448 if (ret != IBT_SUCCESS) { 449 RDS_DPRINTF1("rds_send_acknowledgement", 450 "EP(%p): ibt_post_send for acknowledgement " 451 "failed: %d, SQ depth: %d", 452 ep, ret, ep->ep_sndpool.pool_nbusy); 453 mutex_enter(&ep->ep_lock); 454 ep->ep_rdmacnt--; 455 mutex_exit(&ep->ep_lock); 456 } 457 } else { 458 /* ACKed all messages, no more to ACK */ 459 ep->ep_rdmacnt--; 460 mutex_exit(&ep->ep_lock); 461 return; 462 } 463 464 RDS_DPRINTF4("rds_send_acknowledgement", "Return EP(%p)", ep); 465 } 466 467 static int 468 rds_poll_ctrl_completions(ibt_cq_hdl_t cq, rds_ep_t *ep) 469 { 470 ibt_wc_t wc; 471 uint_t npolled; 472 rds_buf_t *bp; 473 rds_ctrl_pkt_t *cpkt; 474 rds_qp_t *recvqp; 475 int ret = IBT_SUCCESS; 476 477 RDS_DPRINTF4("rds_poll_ctrl_completions", "Enter: EP(%p)", ep); 478 479 bzero(&wc, sizeof (ibt_wc_t)); 480 ret = ibt_poll_cq(cq, &wc, 1, &npolled); 481 if (ret != IBT_SUCCESS) { 482 if (ret != IBT_CQ_EMPTY) { 483 RDS_DPRINTF2(LABEL, "EP(%p) CQ(%p): ibt_poll_cq " 484 "returned: %d", ep, cq, ret); 485 } else { 486 RDS_DPRINTF5(LABEL, "EP(%p) CQ(%p): ibt_poll_cq " 487 "returned: IBT_CQ_EMPTY", ep, cq); 488 } 489 return (ret); 490 } 491 492 bp = (rds_buf_t *)(uintptr_t)wc.wc_id; 493 494 if (wc.wc_status != IBT_WC_SUCCESS) { 495 mutex_enter(&ep->ep_recvqp.qp_lock); 496 ep->ep_recvqp.qp_level--; 497 mutex_exit(&ep->ep_recvqp.qp_lock); 498 499 /* Free the buffer */ 500 bp->buf_state = RDS_RCVBUF_FREE; 501 rds_free_recv_buf(bp, 1); 502 503 /* Receive completion failure */ 504 if (wc.wc_status != IBT_WC_WR_FLUSHED_ERR) { 505 RDS_DPRINTF2("rds_poll_ctrl_completions", 506 "EP(%p) CQ(%p) BP(%p): WC Error Status: %d", 507 ep, cq, wc.wc_id, wc.wc_status); 508 } 509 return (ret); 510 } 511 512 /* there is one less in the RQ */ 513 recvqp = &ep->ep_recvqp; 514 mutex_enter(&recvqp->qp_lock); 515 recvqp->qp_level--; 516 if ((recvqp->qp_taskqpending == B_FALSE) && 517 (recvqp->qp_level <= recvqp->qp_lwm)) { 518 /* Time to post more buffers into the RQ */ 519 recvqp->qp_taskqpending = B_TRUE; 520 mutex_exit(&recvqp->qp_lock); 521 522 ret = ddi_taskq_dispatch(rds_taskq, 523 rds_post_recv_buf, (void *)ep->ep_chanhdl, DDI_NOSLEEP); 524 if (ret != DDI_SUCCESS) { 525 RDS_DPRINTF1(LABEL, "ddi_taskq_dispatch failed: %d", 526 ret); 527 mutex_enter(&recvqp->qp_lock); 528 recvqp->qp_taskqpending = B_FALSE; 529 mutex_exit(&recvqp->qp_lock); 530 } 531 } else { 532 mutex_exit(&recvqp->qp_lock); 533 } 534 535 cpkt = (rds_ctrl_pkt_t *)(uintptr_t)bp->buf_ds.ds_va; 536 rds_handle_control_message(ep->ep_sp, cpkt); 537 538 bp->buf_state = RDS_RCVBUF_FREE; 539 rds_free_recv_buf(bp, 1); 540 541 RDS_DPRINTF4("rds_poll_ctrl_completions", "Return: EP(%p)", ep); 542 543 return (ret); 544 } 545 546 #define RDS_POST_FEW_ATATIME 100 547 /* Post recv WRs into the RQ. Assumes the ep->refcnt is already incremented */ 548 void 549 rds_post_recv_buf(void *arg) 550 { 551 ibt_channel_hdl_t chanhdl; 552 rds_ep_t *ep; 553 rds_session_t *sp; 554 rds_qp_t *recvqp; 555 rds_bufpool_t *gp; 556 rds_buf_t *bp, *bp1; 557 ibt_recv_wr_t *wrp, wr[RDS_POST_FEW_ATATIME]; 558 rds_hca_t *hcap; 559 uint_t npost, nspace, rcv_len; 560 uint_t ix, jx, kx; 561 int ret; 562 563 chanhdl = (ibt_channel_hdl_t)arg; 564 RDS_DPRINTF4("rds_post_recv_buf", "Enter: CHAN(%p)", chanhdl); 565 RDS_INCR_POST_RCV_BUF_CALLS(); 566 567 ep = (rds_ep_t *)ibt_get_chan_private(chanhdl); 568 ASSERT(ep != NULL); 569 sp = ep->ep_sp; 570 recvqp = &ep->ep_recvqp; 571 572 RDS_DPRINTF5("rds_post_recv_buf", "EP(%p)", ep); 573 574 /* get the hcap for the HCA hosting this channel */ 575 hcap = rds_get_hcap(rdsib_statep, ep->ep_hca_guid); 576 if (hcap == NULL) { 577 RDS_DPRINTF2("rds_post_recv_buf", "HCA (0x%llx) not found", 578 ep->ep_hca_guid); 579 return; 580 } 581 582 /* Make sure the session is still connected */ 583 rw_enter(&sp->session_lock, RW_READER); 584 if ((sp->session_state != RDS_SESSION_STATE_INIT) && 585 (sp->session_state != RDS_SESSION_STATE_CONNECTED)) { 586 RDS_DPRINTF2("rds_post_recv_buf", "EP(%p): Session is not " 587 "in active state (%d)", ep, sp->session_state); 588 rw_exit(&sp->session_lock); 589 return; 590 } 591 rw_exit(&sp->session_lock); 592 593 /* how many can be posted */ 594 mutex_enter(&recvqp->qp_lock); 595 nspace = recvqp->qp_depth - recvqp->qp_level; 596 if (nspace == 0) { 597 RDS_DPRINTF2("rds_post_recv_buf", "RQ is FULL"); 598 recvqp->qp_taskqpending = B_FALSE; 599 mutex_exit(&recvqp->qp_lock); 600 return; 601 } 602 mutex_exit(&recvqp->qp_lock); 603 604 if (ep->ep_type == RDS_EP_TYPE_DATA) { 605 gp = &rds_dpool; 606 rcv_len = RdsPktSize; 607 } else { 608 gp = &rds_cpool; 609 rcv_len = RDS_CTRLPKT_SIZE; 610 } 611 612 bp = rds_get_buf(gp, nspace, &jx); 613 if (bp == NULL) { 614 RDS_DPRINTF2(LABEL, "EP(%p): No Recv buffers available", ep); 615 /* try again later */ 616 ret = ddi_taskq_dispatch(rds_taskq, rds_post_recv_buf, 617 (void *)ep->ep_chanhdl, DDI_NOSLEEP); 618 if (ret != DDI_SUCCESS) { 619 RDS_DPRINTF1(LABEL, "ddi_taskq_dispatch failed: %d", 620 ret); 621 mutex_enter(&recvqp->qp_lock); 622 recvqp->qp_taskqpending = B_FALSE; 623 mutex_exit(&recvqp->qp_lock); 624 } 625 return; 626 } 627 628 if (jx != nspace) { 629 RDS_DPRINTF2(LABEL, "EP(%p): Recv buffers " 630 "needed: %d available: %d", ep, nspace, jx); 631 nspace = jx; 632 } 633 634 bp1 = bp; 635 for (ix = 0; ix < nspace; ix++) { 636 bp1->buf_ep = ep; 637 ASSERT(bp1->buf_state == RDS_RCVBUF_FREE); 638 bp1->buf_state = RDS_RCVBUF_POSTED; 639 bp1->buf_ds.ds_key = hcap->hca_lkey; 640 bp1->buf_ds.ds_len = rcv_len; 641 bp1 = bp1->buf_nextp; 642 } 643 644 #if 0 645 wrp = kmem_zalloc(RDS_POST_FEW_ATATIME * sizeof (ibt_recv_wr_t), 646 KM_SLEEP); 647 #else 648 wrp = &wr[0]; 649 #endif 650 651 npost = nspace; 652 while (npost) { 653 jx = (npost > RDS_POST_FEW_ATATIME) ? 654 RDS_POST_FEW_ATATIME : npost; 655 for (ix = 0; ix < jx; ix++) { 656 wrp[ix].wr_id = (uintptr_t)bp; 657 wrp[ix].wr_nds = 1; 658 wrp[ix].wr_sgl = &bp->buf_ds; 659 bp = bp->buf_nextp; 660 } 661 662 ret = ibt_post_recv(chanhdl, wrp, jx, &kx); 663 if ((ret != IBT_SUCCESS) || (kx != jx)) { 664 RDS_DPRINTF1(LABEL, "ibt_post_recv for %d WRs failed: " 665 "%d", npost, ret); 666 npost -= kx; 667 break; 668 } 669 670 npost -= jx; 671 } 672 673 mutex_enter(&recvqp->qp_lock); 674 if (npost != 0) { 675 RDS_DPRINTF2("rds_post_recv_buf", 676 "EP(%p) Failed to post %d WRs", ep, npost); 677 recvqp->qp_level += (nspace - npost); 678 } else { 679 recvqp->qp_level += nspace; 680 } 681 682 /* 683 * sometimes, the recv WRs can get consumed as soon as they are 684 * posted. In that case, taskq thread to post more WRs to the RQ will 685 * not be scheduled as the taskqpending flag is still set. 686 */ 687 if (recvqp->qp_level == 0) { 688 mutex_exit(&recvqp->qp_lock); 689 ret = ddi_taskq_dispatch(rds_taskq, 690 rds_post_recv_buf, (void *)ep->ep_chanhdl, DDI_NOSLEEP); 691 if (ret != DDI_SUCCESS) { 692 RDS_DPRINTF1("rds_post_recv_buf", 693 "ddi_taskq_dispatch failed: %d", ret); 694 mutex_enter(&recvqp->qp_lock); 695 recvqp->qp_taskqpending = B_FALSE; 696 mutex_exit(&recvqp->qp_lock); 697 } 698 } else { 699 recvqp->qp_taskqpending = B_FALSE; 700 mutex_exit(&recvqp->qp_lock); 701 } 702 703 #if 0 704 kmem_free(wrp, RDS_POST_FEW_ATATIME * sizeof (ibt_recv_wr_t)); 705 #endif 706 707 RDS_DPRINTF4("rds_post_recv_buf", "Return: EP(%p)", ep); 708 } 709 710 static int 711 rds_poll_data_completions(ibt_cq_hdl_t cq, rds_ep_t *ep) 712 { 713 ibt_wc_t wc; 714 rds_buf_t *bp; 715 rds_data_hdr_t *pktp; 716 rds_qp_t *recvqp; 717 uint_t npolled; 718 int ret = IBT_SUCCESS; 719 720 721 RDS_DPRINTF4("rds_poll_data_completions", "Enter: EP(%p)", ep); 722 723 bzero(&wc, sizeof (ibt_wc_t)); 724 ret = ibt_poll_cq(cq, &wc, 1, &npolled); 725 if (ret != IBT_SUCCESS) { 726 if (ret != IBT_CQ_EMPTY) { 727 RDS_DPRINTF2(LABEL, "EP(%p) CQ(%p): ibt_poll_cq " 728 "returned: %d", ep, cq, ret); 729 } else { 730 RDS_DPRINTF5(LABEL, "EP(%p) CQ(%p): ibt_poll_cq " 731 "returned: IBT_CQ_EMPTY", ep, cq); 732 } 733 return (ret); 734 } 735 736 bp = (rds_buf_t *)(uintptr_t)wc.wc_id; 737 ASSERT(bp->buf_state == RDS_RCVBUF_POSTED); 738 bp->buf_state = RDS_RCVBUF_ONSOCKQ; 739 bp->buf_nextp = NULL; 740 741 if (wc.wc_status != IBT_WC_SUCCESS) { 742 mutex_enter(&ep->ep_recvqp.qp_lock); 743 ep->ep_recvqp.qp_level--; 744 mutex_exit(&ep->ep_recvqp.qp_lock); 745 746 /* free the buffer */ 747 bp->buf_state = RDS_RCVBUF_FREE; 748 rds_free_recv_buf(bp, 1); 749 750 /* Receive completion failure */ 751 if (wc.wc_status != IBT_WC_WR_FLUSHED_ERR) { 752 RDS_DPRINTF2("rds_poll_data_completions", 753 "EP(%p) CQ(%p) BP(%p): WC Error Status: %d", 754 ep, cq, wc.wc_id, wc.wc_status); 755 RDS_INCR_RXERRS(); 756 } 757 return (ret); 758 } 759 760 /* there is one less in the RQ */ 761 recvqp = &ep->ep_recvqp; 762 mutex_enter(&recvqp->qp_lock); 763 recvqp->qp_level--; 764 if ((recvqp->qp_taskqpending == B_FALSE) && 765 (recvqp->qp_level <= recvqp->qp_lwm)) { 766 /* Time to post more buffers into the RQ */ 767 recvqp->qp_taskqpending = B_TRUE; 768 mutex_exit(&recvqp->qp_lock); 769 770 ret = ddi_taskq_dispatch(rds_taskq, 771 rds_post_recv_buf, (void *)ep->ep_chanhdl, DDI_NOSLEEP); 772 if (ret != DDI_SUCCESS) { 773 RDS_DPRINTF1(LABEL, "ddi_taskq_dispatch failed: %d", 774 ret); 775 mutex_enter(&recvqp->qp_lock); 776 recvqp->qp_taskqpending = B_FALSE; 777 mutex_exit(&recvqp->qp_lock); 778 } 779 } else { 780 mutex_exit(&recvqp->qp_lock); 781 } 782 783 pktp = (rds_data_hdr_t *)(uintptr_t)bp->buf_ds.ds_va; 784 ASSERT(pktp->dh_datalen != 0); 785 786 RDS_DPRINTF5(LABEL, "Message Received: sendIP: 0x%x recvIP: 0x%x " 787 "sendport: %d recvport: %d npkts: %d pktno: %d", ep->ep_remip, 788 ep->ep_myip, pktp->dh_sendport, pktp->dh_recvport, 789 pktp->dh_npkts, pktp->dh_psn); 790 791 RDS_DPRINTF3(LABEL, "BP(%p): npkts: %d psn: %d", bp, 792 pktp->dh_npkts, pktp->dh_psn); 793 794 if (pktp->dh_npkts == 1) { 795 /* single pkt or last packet */ 796 if (pktp->dh_psn != 0) { 797 /* last packet of a segmented message */ 798 ASSERT(ep->ep_seglbp != NULL); 799 ep->ep_seglbp->buf_nextp = bp; 800 ep->ep_seglbp = bp; 801 rds_received_msg(ep, ep->ep_segfbp); 802 ep->ep_segfbp = NULL; 803 ep->ep_seglbp = NULL; 804 } else { 805 /* single packet */ 806 rds_received_msg(ep, bp); 807 } 808 } else { 809 /* multi-pkt msg */ 810 if (pktp->dh_psn == 0) { 811 /* first packet */ 812 ASSERT(ep->ep_segfbp == NULL); 813 ep->ep_segfbp = bp; 814 ep->ep_seglbp = bp; 815 } else { 816 /* intermediate packet */ 817 ASSERT(ep->ep_segfbp != NULL); 818 ep->ep_seglbp->buf_nextp = bp; 819 ep->ep_seglbp = bp; 820 } 821 } 822 823 RDS_DPRINTF4("rds_poll_data_completions", "Return: EP(%p)", ep); 824 825 return (ret); 826 } 827 828 void 829 rds_recvcq_handler(ibt_cq_hdl_t cq, void *arg) 830 { 831 rds_ep_t *ep; 832 int ret = IBT_SUCCESS; 833 int (*func)(ibt_cq_hdl_t, rds_ep_t *); 834 835 ep = (rds_ep_t *)arg; 836 837 RDS_DPRINTF4("rds_recvcq_handler", "enter: EP(%p)", ep); 838 839 if (ep->ep_type == RDS_EP_TYPE_DATA) { 840 func = rds_poll_data_completions; 841 } else { 842 func = rds_poll_ctrl_completions; 843 } 844 845 do { 846 ret = func(cq, ep); 847 } while (ret != IBT_CQ_EMPTY); 848 849 /* enable the CQ */ 850 ret = ibt_enable_cq_notify(cq, rds_wc_signal); 851 if (ret != IBT_SUCCESS) { 852 RDS_DPRINTF2(LABEL, "EP(%p) CQ(%p): ibt_enable_cq_notify " 853 "failed: %d", ep, cq, ret); 854 return; 855 } 856 857 do { 858 ret = func(cq, ep); 859 } while (ret != IBT_CQ_EMPTY); 860 861 RDS_DPRINTF4("rds_recvcq_handler", "Return: EP(%p)", ep); 862 } 863 864 void 865 rds_poll_send_completions(ibt_cq_hdl_t cq, rds_ep_t *ep, boolean_t lock) 866 { 867 ibt_wc_t wc[RDS_NUM_DATA_SEND_WCS]; 868 uint_t npolled, nret, send_error = 0; 869 rds_buf_t *headp, *tailp, *bp; 870 int ret, ix; 871 872 RDS_DPRINTF4("rds_poll_send_completions", "Enter EP(%p)", ep); 873 874 headp = NULL; 875 tailp = NULL; 876 npolled = 0; 877 do { 878 ret = ibt_poll_cq(cq, wc, RDS_NUM_DATA_SEND_WCS, &nret); 879 if (ret != IBT_SUCCESS) { 880 if (ret != IBT_CQ_EMPTY) { 881 RDS_DPRINTF2(LABEL, "EP(%p) CQ(%p): " 882 "ibt_poll_cq returned: %d", ep, cq, ret); 883 } else { 884 RDS_DPRINTF5(LABEL, "EP(%p) CQ(%p): " 885 "ibt_poll_cq returned: IBT_CQ_EMPTY", 886 ep, cq); 887 } 888 889 break; 890 } 891 892 for (ix = 0; ix < nret; ix++) { 893 if (wc[ix].wc_status == IBT_WC_SUCCESS) { 894 if (wc[ix].wc_type == IBT_WRC_RDMAW) { 895 rds_send_acknowledgement(ep); 896 continue; 897 } 898 899 bp = (rds_buf_t *)(uintptr_t)wc[ix].wc_id; 900 ASSERT(bp->buf_state == RDS_SNDBUF_PENDING); 901 bp->buf_state = RDS_SNDBUF_FREE; 902 } else if (wc[ix].wc_status == IBT_WC_WR_FLUSHED_ERR) { 903 RDS_INCR_TXERRS(); 904 RDS_DPRINTF5("rds_poll_send_completions", 905 "EP(%p): WC ID: %p ERROR: %d", ep, 906 wc[ix].wc_id, wc[ix].wc_status); 907 908 if (wc[ix].wc_id == RDS_RDMAW_WRID) { 909 mutex_enter(&ep->ep_lock); 910 ep->ep_rdmacnt--; 911 mutex_exit(&ep->ep_lock); 912 continue; 913 } 914 915 bp = (rds_buf_t *)(uintptr_t)wc[ix].wc_id; 916 bp->buf_state = RDS_SNDBUF_ERROR; 917 } else { 918 RDS_INCR_TXERRS(); 919 RDS_DPRINTF2("rds_poll_send_completions", 920 "EP(%p): WC ID: %p ERROR: %d", ep, 921 wc[ix].wc_id, wc[ix].wc_status); 922 if (send_error == 0) { 923 rds_session_t *sp = ep->ep_sp; 924 925 /* don't let anyone send anymore */ 926 rw_enter(&sp->session_lock, RW_WRITER); 927 if (sp->session_state != 928 RDS_SESSION_STATE_ERROR) { 929 sp->session_state = 930 RDS_SESSION_STATE_ERROR; 931 /* Make this the active end */ 932 sp->session_type = 933 RDS_SESSION_ACTIVE; 934 } 935 rw_exit(&sp->session_lock); 936 } 937 938 send_error++; 939 940 if (wc[ix].wc_id == RDS_RDMAW_WRID) { 941 mutex_enter(&ep->ep_lock); 942 ep->ep_rdmacnt--; 943 mutex_exit(&ep->ep_lock); 944 continue; 945 } 946 947 bp = (rds_buf_t *)(uintptr_t)wc[ix].wc_id; 948 bp->buf_state = RDS_SNDBUF_ERROR; 949 } 950 951 bp->buf_nextp = NULL; 952 if (headp) { 953 tailp->buf_nextp = bp; 954 tailp = bp; 955 } else { 956 headp = bp; 957 tailp = bp; 958 } 959 960 npolled++; 961 } 962 963 if (rds_no_interrupts && (npolled > 100)) { 964 break; 965 } 966 967 if (rds_no_interrupts == 1) { 968 break; 969 } 970 } while (ret != IBT_CQ_EMPTY); 971 972 RDS_DPRINTF5("rds_poll_send_completions", "Npolled: %d send_error: %d", 973 npolled, send_error); 974 975 /* put the buffers to the pool */ 976 if (npolled != 0) { 977 rds_free_send_buf(ep, headp, tailp, npolled, lock); 978 } 979 980 if (send_error != 0) { 981 rds_handle_send_error(ep); 982 } 983 984 RDS_DPRINTF4("rds_poll_send_completions", "Return EP(%p)", ep); 985 } 986 987 void 988 rds_sendcq_handler(ibt_cq_hdl_t cq, void *arg) 989 { 990 rds_ep_t *ep; 991 int ret; 992 993 ep = (rds_ep_t *)arg; 994 995 RDS_DPRINTF4("rds_sendcq_handler", "Enter: EP(%p)", ep); 996 997 /* enable the CQ */ 998 ret = ibt_enable_cq_notify(cq, IBT_NEXT_COMPLETION); 999 if (ret != IBT_SUCCESS) { 1000 RDS_DPRINTF2(LABEL, "EP(%p) CQ(%p): ibt_enable_cq_notify " 1001 "failed: %d", ep, cq, ret); 1002 return; 1003 } 1004 1005 rds_poll_send_completions(cq, ep, B_FALSE); 1006 1007 RDS_DPRINTF4("rds_sendcq_handler", "Return: EP(%p)", ep); 1008 } 1009 1010 void 1011 rds_ep_free_rc_channel(rds_ep_t *ep) 1012 { 1013 int ret; 1014 1015 RDS_DPRINTF2("rds_ep_free_rc_channel", "EP(%p) - Enter", ep); 1016 1017 ASSERT(mutex_owned(&ep->ep_lock)); 1018 1019 /* free the QP */ 1020 if (ep->ep_chanhdl != NULL) { 1021 /* wait until the RQ is empty */ 1022 (void) ibt_flush_channel(ep->ep_chanhdl); 1023 (void) rds_is_recvq_empty(ep, B_TRUE); 1024 ret = ibt_free_channel(ep->ep_chanhdl); 1025 if (ret != IBT_SUCCESS) { 1026 RDS_DPRINTF1("rds_ep_free_rc_channel", "EP(%p) " 1027 "ibt_free_channel returned: %d", ep, ret); 1028 } 1029 ep->ep_chanhdl = NULL; 1030 } else { 1031 RDS_DPRINTF2("rds_ep_free_rc_channel", 1032 "EP(%p) Channel is ALREADY FREE", ep); 1033 } 1034 1035 /* free the Send CQ */ 1036 if (ep->ep_sendcq != NULL) { 1037 ret = ibt_free_cq(ep->ep_sendcq); 1038 if (ret != IBT_SUCCESS) { 1039 RDS_DPRINTF1("rds_ep_free_rc_channel", 1040 "EP(%p) - for sendcq, ibt_free_cq returned %d", 1041 ep, ret); 1042 } 1043 ep->ep_sendcq = NULL; 1044 } else { 1045 RDS_DPRINTF2("rds_ep_free_rc_channel", 1046 "EP(%p) SendCQ is ALREADY FREE", ep); 1047 } 1048 1049 /* free the Recv CQ */ 1050 if (ep->ep_recvcq != NULL) { 1051 ret = ibt_free_cq(ep->ep_recvcq); 1052 if (ret != IBT_SUCCESS) { 1053 RDS_DPRINTF1("rds_ep_free_rc_channel", 1054 "EP(%p) - for recvcq, ibt_free_cq returned %d", 1055 ep, ret); 1056 } 1057 ep->ep_recvcq = NULL; 1058 } else { 1059 RDS_DPRINTF2("rds_ep_free_rc_channel", 1060 "EP(%p) RecvCQ is ALREADY FREE", ep); 1061 } 1062 1063 RDS_DPRINTF2("rds_ep_free_rc_channel", "EP(%p) - Return", ep); 1064 } 1065 1066 /* Allocate resources for RC channel */ 1067 ibt_channel_hdl_t 1068 rds_ep_alloc_rc_channel(rds_ep_t *ep, uint8_t hca_port) 1069 { 1070 int ret = IBT_SUCCESS; 1071 ibt_cq_attr_t scqattr, rcqattr; 1072 ibt_rc_chan_alloc_args_t chanargs; 1073 ibt_channel_hdl_t chanhdl; 1074 rds_session_t *sp; 1075 rds_hca_t *hcap; 1076 1077 RDS_DPRINTF4("rds_ep_alloc_rc_channel", "Enter: 0x%p port: %d", 1078 ep, hca_port); 1079 1080 /* Update the EP with the right IP address and HCA guid */ 1081 sp = ep->ep_sp; 1082 ASSERT(sp != NULL); 1083 rw_enter(&sp->session_lock, RW_READER); 1084 mutex_enter(&ep->ep_lock); 1085 ep->ep_myip = sp->session_myip; 1086 ep->ep_remip = sp->session_remip; 1087 hcap = rds_gid_to_hcap(rdsib_statep, sp->session_lgid); 1088 ep->ep_hca_guid = hcap->hca_guid; 1089 mutex_exit(&ep->ep_lock); 1090 rw_exit(&sp->session_lock); 1091 1092 /* reset taskqpending flag here */ 1093 ep->ep_recvqp.qp_taskqpending = B_FALSE; 1094 1095 if (ep->ep_type == RDS_EP_TYPE_CTRL) { 1096 scqattr.cq_size = MaxCtrlSendBuffers; 1097 scqattr.cq_sched = NULL; 1098 scqattr.cq_flags = IBT_CQ_NO_FLAGS; 1099 1100 rcqattr.cq_size = MaxCtrlRecvBuffers; 1101 rcqattr.cq_sched = NULL; 1102 rcqattr.cq_flags = IBT_CQ_NO_FLAGS; 1103 1104 chanargs.rc_sizes.cs_sq = MaxCtrlSendBuffers; 1105 chanargs.rc_sizes.cs_rq = MaxCtrlRecvBuffers; 1106 chanargs.rc_sizes.cs_sq_sgl = 1; 1107 chanargs.rc_sizes.cs_rq_sgl = 1; 1108 } else { 1109 scqattr.cq_size = MaxDataSendBuffers + RDS_NUM_ACKS; 1110 scqattr.cq_sched = NULL; 1111 scqattr.cq_flags = IBT_CQ_NO_FLAGS; 1112 1113 rcqattr.cq_size = MaxDataRecvBuffers; 1114 rcqattr.cq_sched = NULL; 1115 rcqattr.cq_flags = IBT_CQ_NO_FLAGS; 1116 1117 chanargs.rc_sizes.cs_sq = MaxDataSendBuffers + RDS_NUM_ACKS; 1118 chanargs.rc_sizes.cs_rq = MaxDataRecvBuffers; 1119 chanargs.rc_sizes.cs_sq_sgl = 1; 1120 chanargs.rc_sizes.cs_rq_sgl = 1; 1121 } 1122 1123 if (ep->ep_sendcq == NULL) { 1124 /* returned size is always greater than the requested size */ 1125 ret = ibt_alloc_cq(hcap->hca_hdl, &scqattr, 1126 &ep->ep_sendcq, NULL); 1127 if (ret != IBT_SUCCESS) { 1128 RDS_DPRINTF2(LABEL, "ibt_alloc_cq for sendCQ " 1129 "failed, size = %d: %d", scqattr.cq_size, ret); 1130 return (NULL); 1131 } 1132 1133 (void) ibt_set_cq_handler(ep->ep_sendcq, rds_sendcq_handler, 1134 ep); 1135 1136 if (rds_no_interrupts == 0) { 1137 ret = ibt_enable_cq_notify(ep->ep_sendcq, 1138 IBT_NEXT_COMPLETION); 1139 if (ret != IBT_SUCCESS) { 1140 RDS_DPRINTF2(LABEL, 1141 "ibt_enable_cq_notify failed: %d", ret); 1142 (void) ibt_free_cq(ep->ep_sendcq); 1143 ep->ep_sendcq = NULL; 1144 return (NULL); 1145 } 1146 } 1147 } 1148 1149 if (ep->ep_recvcq == NULL) { 1150 /* returned size is always greater than the requested size */ 1151 ret = ibt_alloc_cq(hcap->hca_hdl, &rcqattr, 1152 &ep->ep_recvcq, NULL); 1153 if (ret != IBT_SUCCESS) { 1154 RDS_DPRINTF2(LABEL, "ibt_alloc_cq for recvCQ " 1155 "failed, size = %d: %d", rcqattr.cq_size, ret); 1156 (void) ibt_free_cq(ep->ep_sendcq); 1157 ep->ep_sendcq = NULL; 1158 return (NULL); 1159 } 1160 1161 (void) ibt_set_cq_handler(ep->ep_recvcq, rds_recvcq_handler, 1162 ep); 1163 1164 ret = ibt_enable_cq_notify(ep->ep_recvcq, rds_wc_signal); 1165 if (ret != IBT_SUCCESS) { 1166 RDS_DPRINTF2(LABEL, 1167 "ibt_enable_cq_notify failed: %d", ret); 1168 (void) ibt_free_cq(ep->ep_recvcq); 1169 ep->ep_recvcq = NULL; 1170 (void) ibt_free_cq(ep->ep_sendcq); 1171 ep->ep_sendcq = NULL; 1172 return (NULL); 1173 } 1174 } 1175 1176 chanargs.rc_flags = IBT_ALL_SIGNALED; 1177 chanargs.rc_control = IBT_CEP_RDMA_RD | IBT_CEP_RDMA_WR | 1178 IBT_CEP_ATOMIC; 1179 chanargs.rc_hca_port_num = hca_port; 1180 chanargs.rc_scq = ep->ep_sendcq; 1181 chanargs.rc_rcq = ep->ep_recvcq; 1182 chanargs.rc_pd = hcap->hca_pdhdl; 1183 chanargs.rc_srq = NULL; 1184 1185 ret = ibt_alloc_rc_channel(hcap->hca_hdl, 1186 IBT_ACHAN_NO_FLAGS, &chanargs, &chanhdl, NULL); 1187 if (ret != IBT_SUCCESS) { 1188 RDS_DPRINTF2(LABEL, "ibt_alloc_rc_channel fail: %d", 1189 ret); 1190 (void) ibt_free_cq(ep->ep_recvcq); 1191 ep->ep_recvcq = NULL; 1192 (void) ibt_free_cq(ep->ep_sendcq); 1193 ep->ep_sendcq = NULL; 1194 return (NULL); 1195 } 1196 1197 /* Chan private should contain the ep */ 1198 (void) ibt_set_chan_private(chanhdl, ep); 1199 1200 RDS_DPRINTF4("rds_ep_alloc_rc_channel", "Return: 0x%p", chanhdl); 1201 1202 return (chanhdl); 1203 } 1204 1205 1206 #if 0 1207 1208 /* Return node guid given a port gid */ 1209 ib_guid_t 1210 rds_gid_to_node_guid(ib_gid_t gid) 1211 { 1212 ibt_node_info_t nodeinfo; 1213 int ret; 1214 1215 RDS_DPRINTF4("rds_gid_to_node_guid", "Enter: gid: %llx:%llx", 1216 gid.gid_prefix, gid.gid_guid); 1217 1218 ret = ibt_gid_to_node_info(gid, &nodeinfo); 1219 if (ret != IBT_SUCCESS) { 1220 RDS_DPRINTF2(LABEL, "ibt_gid_node_info for gid: %llx:%llx " 1221 "failed", gid.gid_prefix, gid.gid_guid); 1222 return (0LL); 1223 } 1224 1225 RDS_DPRINTF4("rds_gid_to_node_guid", "Return: Node guid: %llx", 1226 nodeinfo.n_node_guid); 1227 1228 return (nodeinfo.n_node_guid); 1229 } 1230 1231 #endif 1232 1233 static void 1234 rds_handle_portup_event(rds_state_t *statep, ibt_hca_hdl_t hdl, 1235 ibt_async_event_t *event) 1236 { 1237 rds_hca_t *hcap; 1238 ibt_hca_portinfo_t *newpinfop, *oldpinfop; 1239 uint_t newsize, oldsize, nport; 1240 ib_gid_t gid; 1241 int ret; 1242 1243 RDS_DPRINTF2("rds_handle_portup_event", 1244 "Enter: GUID: 0x%llx Statep: %p", event->ev_hca_guid, statep); 1245 1246 hcap = rds_get_hcap(statep, event->ev_hca_guid); 1247 if (hcap == NULL) { 1248 RDS_DPRINTF2("rds_handle_portup_event", "HCA: 0x%llx is " 1249 "not in our list", event->ev_hca_guid); 1250 return; 1251 } 1252 1253 ret = ibt_query_hca_ports(hdl, 0, &newpinfop, &nport, &newsize); 1254 if (ret != IBT_SUCCESS) { 1255 RDS_DPRINTF2(LABEL, "ibt_query_hca_ports failed: %d", ret); 1256 return; 1257 } 1258 1259 oldpinfop = hcap->hca_pinfop; 1260 oldsize = hcap->hca_pinfo_sz; 1261 hcap->hca_pinfop = newpinfop; 1262 hcap->hca_pinfo_sz = newsize; 1263 1264 /* structure copy */ 1265 gid = newpinfop[event->ev_port - 1].p_sgid_tbl[0]; 1266 1267 /* bind RDS service on the port, pass statep as cm_private */ 1268 ret = ibt_bind_service(statep->rds_srvhdl, gid, NULL, statep, NULL); 1269 if (ret != IBT_SUCCESS) { 1270 RDS_DPRINTF2(LABEL, "Bind service for HCA: 0x%llx Port: %d " 1271 "gid %llx:%llx returned: %d", event->ev_hca_guid, 1272 event->ev_port, gid.gid_prefix, gid.gid_guid, ret); 1273 } 1274 1275 (void) ibt_free_portinfo(oldpinfop, oldsize); 1276 1277 RDS_DPRINTF2("rds_handle_portup_event", "Return: GUID: 0x%llx", 1278 event->ev_hca_guid); 1279 } 1280 1281 static void 1282 rds_async_handler(void *clntp, ibt_hca_hdl_t hdl, ibt_async_code_t code, 1283 ibt_async_event_t *event) 1284 { 1285 rds_state_t *statep; 1286 1287 RDS_DPRINTF2("rds_async_handler", "Async code: %d", code); 1288 1289 switch (code) { 1290 case IBT_EVENT_PORT_UP: 1291 statep = (rds_state_t *)clntp; 1292 rds_handle_portup_event(statep, hdl, event); 1293 break; 1294 1295 default: 1296 RDS_DPRINTF2(LABEL, "Async event: %d not handled", code); 1297 } 1298 1299 RDS_DPRINTF2("rds_async_handler", "Return: code: %d", code); 1300 } 1301