1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* 26 * Copyright (c) 2005 SilverStorm Technologies, Inc. All rights reserved. 27 * 28 * This software is available to you under a choice of one of two 29 * licenses. You may choose to be licensed under the terms of the GNU 30 * General Public License (GPL) Version 2, available from the file 31 * COPYING in the main directory of this source tree, or the 32 * OpenIB.org BSD license below: 33 * 34 * Redistribution and use in source and binary forms, with or 35 * without modification, are permitted provided that the following 36 * conditions are met: 37 * 38 * - Redistributions of source code must retain the above 39 * copyright notice, this list of conditions and the following 40 * disclaimer. 41 * 42 * - Redistributions in binary form must reproduce the above 43 * copyright notice, this list of conditions and the following 44 * disclaimer in the documentation and/or other materials 45 * provided with the distribution. 46 * 47 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 48 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 49 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 50 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 51 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 52 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 53 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 54 * SOFTWARE. 55 * 56 */ 57 /* 58 * Sun elects to include this software in Sun product 59 * under the OpenIB BSD license. 60 * 61 * 62 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 63 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 64 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 65 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 66 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 67 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 68 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 69 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 70 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 71 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 72 * POSSIBILITY OF SUCH DAMAGE. 73 */ 74 75 #pragma ident "%Z%%M% %I% %E% SMI" 76 77 #include <sys/ib/clients/rds/rdsib_cm.h> 78 #include <sys/ib/clients/rds/rdsib_ib.h> 79 #include <sys/ib/clients/rds/rdsib_buf.h> 80 #include <sys/ib/clients/rds/rdsib_ep.h> 81 #include <sys/ib/clients/rds/rds_kstat.h> 82 83 /* 84 * This File contains the buffer management code 85 */ 86 87 #define DUMP_USER_PARAMS() \ 88 RDS_DPRINTF3(LABEL, "MaxNodes = %d", MaxNodes); \ 89 RDS_DPRINTF3(LABEL, "UserBufferSize = %d", UserBufferSize); \ 90 RDS_DPRINTF3(LABEL, "RdsPktSize = %d", RdsPktSize); \ 91 RDS_DPRINTF3(LABEL, "MaxDataSendBuffers = %d", MaxDataSendBuffers); \ 92 RDS_DPRINTF3(LABEL, "MaxDataRecvBuffers = %d", MaxDataRecvBuffers); \ 93 RDS_DPRINTF3(LABEL, "MaxCtrlSendBuffers = %d", MaxCtrlSendBuffers); \ 94 RDS_DPRINTF3(LABEL, "MaxCtrlRecvBuffers = %d", MaxCtrlRecvBuffers); \ 95 RDS_DPRINTF3(LABEL, "DataRecvBufferLWM = %d", DataRecvBufferLWM); \ 96 RDS_DPRINTF3(LABEL, "PendingRxPktsHWM = %d", PendingRxPktsHWM); \ 97 RDS_DPRINTF3(LABEL, "MinRnrRetry = %d", MinRnrRetry) 98 99 static void 100 rds_free_mblk(char *arg) 101 { 102 rds_buf_t *bp = (rds_buf_t *)(uintptr_t)arg; 103 104 /* Free the recv buffer */ 105 RDS_DPRINTF4("rds_free_mblk", "Enter: BP(%p)", bp); 106 ASSERT(bp->buf_state == RDS_RCVBUF_ONSOCKQ); 107 rds_free_recv_buf(bp, 1); 108 RDS_DECR_RXPKTS_PEND(1); 109 RDS_DPRINTF4("rds_free_mblk", "Return: BP(%p)", bp); 110 } 111 112 void 113 rds_free_recv_caches(rds_state_t *statep) 114 { 115 rds_hca_t *hcap; 116 int ret; 117 118 RDS_DPRINTF4("rds_free_recv_caches", "Enter"); 119 120 mutex_enter(&rds_dpool.pool_lock); 121 if (rds_dpool.pool_memp == NULL) { 122 RDS_DPRINTF2("rds_free_recv_caches", "Caches are empty"); 123 mutex_exit(&rds_dpool.pool_lock); 124 return; 125 } 126 127 /* 128 * All buffers must have been freed as all sessions are closed 129 * and destroyed 130 */ 131 ASSERT(rds_dpool.pool_nbusy == 0); 132 RDS_DPRINTF2("rds_free_recv_caches", "Data Pool has " 133 "pending buffers: %d", rds_dpool.pool_nbusy); 134 while (rds_dpool.pool_nbusy != 0) { 135 mutex_exit(&rds_dpool.pool_lock); 136 delay(drv_usectohz(1000000)); 137 mutex_enter(&rds_dpool.pool_lock); 138 } 139 140 hcap = statep->rds_hcalistp; 141 while (hcap != NULL) { 142 if (hcap->hca_mrhdl != NULL) { 143 ret = ibt_deregister_mr(hcap->hca_hdl, 144 hcap->hca_mrhdl); 145 if (ret == IBT_SUCCESS) { 146 hcap->hca_mrhdl = NULL; 147 hcap->hca_lkey = 0; 148 hcap->hca_rkey = 0; 149 } else { 150 RDS_DPRINTF2(LABEL, "ibt_deregister_mr " 151 "failed: %d, mrhdl: 0x%p", ret, 152 hcap->hca_mrhdl); 153 } 154 } 155 hcap = hcap->hca_nextp; 156 } 157 158 kmem_free(rds_dpool.pool_bufmemp, (rds_dpool.pool_nbuffers + 159 rds_cpool.pool_nbuffers) * sizeof (rds_buf_t)); 160 rds_dpool.pool_bufmemp = NULL; 161 162 kmem_free(rds_dpool.pool_memp, rds_dpool.pool_memsize); 163 rds_dpool.pool_memp = NULL; 164 165 mutex_exit(&rds_dpool.pool_lock); 166 167 RDS_DPRINTF4("rds_free_recv_caches", "Return"); 168 } 169 170 int 171 rds_init_recv_caches(rds_state_t *statep) 172 { 173 uint8_t *mp; 174 rds_buf_t *bp; 175 rds_hca_t *hcap; 176 uint32_t nsessions; 177 uint_t ix; 178 uint_t nctrlrx; 179 uint8_t *memp; 180 uint_t memsize, nbuf; 181 rds_buf_t *bufmemp; 182 ibt_mr_attr_t mem_attr; 183 ibt_mr_desc_t mem_desc; 184 int ret; 185 186 RDS_DPRINTF4("rds_init_recv_caches", "Enter"); 187 188 DUMP_USER_PARAMS(); 189 190 mutex_enter(&rds_dpool.pool_lock); 191 if (rds_dpool.pool_memp != NULL) { 192 RDS_DPRINTF2("rds_init_recv_caches", "Pools are already " 193 "initialized"); 194 mutex_exit(&rds_dpool.pool_lock); 195 return (0); 196 } 197 198 /* 199 * High water mark for the receive buffers in the system. If the 200 * number of buffers used crosses this mark then all sockets in 201 * would be stalled. The port quota for the sockets is set based 202 * on this limit. 203 */ 204 rds_rx_pkts_pending_hwm = (PendingRxPktsHWM * NDataRX)/100; 205 206 /* nsessions can never be less than 1 */ 207 nsessions = MaxNodes - 1; 208 nctrlrx = (nsessions + 1) * MaxCtrlRecvBuffers; 209 210 RDS_DPRINTF3(LABEL, "Number of Possible Sessions: %d", nsessions); 211 212 /* Add the hdr */ 213 RdsPktSize = UserBufferSize + RDS_DATA_HDR_SZ; 214 215 memsize = (NDataRX * RdsPktSize) + (nctrlrx * RDS_CTRLPKT_SIZE); 216 nbuf = NDataRX + nctrlrx; 217 RDS_DPRINTF3(LABEL, "RDS Buffer Pool Memory: %lld", memsize); 218 RDS_DPRINTF3(LABEL, "Total Buffers: %d", nbuf); 219 220 memp = (uint8_t *)kmem_zalloc(memsize, KM_NOSLEEP); 221 if (memp == NULL) { 222 RDS_DPRINTF1(LABEL, "RDS Memory allocation failed"); 223 mutex_exit(&rds_dpool.pool_lock); 224 return (-1); 225 } 226 227 RDS_DPRINTF3(LABEL, "RDS Buffer Entries Memory: %lld", 228 nbuf * sizeof (rds_buf_t)); 229 230 /* allocate memory for buffer entries */ 231 bufmemp = (rds_buf_t *)kmem_zalloc(nbuf * sizeof (rds_buf_t), 232 KM_SLEEP); 233 234 /* register the memory with all HCAs */ 235 mem_attr.mr_vaddr = (ib_vaddr_t)(uintptr_t)memp; 236 mem_attr.mr_len = memsize; 237 mem_attr.mr_as = NULL; 238 mem_attr.mr_flags = IBT_MR_ENABLE_LOCAL_WRITE; 239 240 hcap = statep->rds_hcalistp; 241 while (hcap != NULL) { 242 ret = ibt_register_mr(hcap->hca_hdl, hcap->hca_pdhdl, 243 &mem_attr, &hcap->hca_mrhdl, &mem_desc); 244 if (ret != IBT_SUCCESS) { 245 RDS_DPRINTF2(LABEL, "ibt_register_mr failed: %d", ret); 246 hcap = statep->rds_hcalistp; 247 while ((hcap) && (hcap->hca_mrhdl != NULL)) { 248 ret = ibt_deregister_mr(hcap->hca_hdl, 249 hcap->hca_mrhdl); 250 if (ret == IBT_SUCCESS) { 251 hcap->hca_mrhdl = NULL; 252 hcap->hca_lkey = 0; 253 hcap->hca_rkey = 0; 254 } else { 255 RDS_DPRINTF2(LABEL, "ibt_deregister_mr " 256 "failed: %d, mrhdl: 0x%p", ret, 257 hcap->hca_mrhdl); 258 } 259 hcap = hcap->hca_nextp; 260 } 261 kmem_free(bufmemp, nbuf * sizeof (rds_buf_t)); 262 kmem_free(memp, memsize); 263 mutex_exit(&rds_dpool.pool_lock); 264 return (-1); 265 } 266 267 hcap->hca_lkey = mem_desc.md_lkey; 268 hcap->hca_rkey = mem_desc.md_rkey; 269 270 hcap = hcap->hca_nextp; 271 } 272 273 /* Initialize data pool */ 274 rds_dpool.pool_memp = memp; 275 rds_dpool.pool_memsize = memsize; 276 rds_dpool.pool_bufmemp = bufmemp; 277 rds_dpool.pool_nbuffers = NDataRX; 278 rds_dpool.pool_nbusy = 0; 279 rds_dpool.pool_nfree = NDataRX; 280 281 /* chain the buffers */ 282 mp = memp; 283 bp = bufmemp; 284 for (ix = 0; ix < NDataRX; ix++) { 285 bp[ix].buf_nextp = &bp[ix + 1]; 286 bp[ix].buf_ds.ds_va = (ib_vaddr_t)(uintptr_t)mp; 287 bp[ix].buf_state = RDS_RCVBUF_FREE; 288 bp[ix].buf_frtn.free_func = rds_free_mblk; 289 bp[ix].buf_frtn.free_arg = (char *)&bp[ix]; 290 mp = mp + RdsPktSize; 291 } 292 bp[NDataRX - 1].buf_nextp = NULL; 293 rds_dpool.pool_headp = &bp[0]; 294 rds_dpool.pool_tailp = &bp[NDataRX - 1]; 295 296 /* Initialize ctrl pool */ 297 rds_cpool.pool_nbuffers = nctrlrx; 298 rds_cpool.pool_nbusy = 0; 299 rds_cpool.pool_nfree = nctrlrx; 300 301 /* chain the buffers */ 302 for (ix = NDataRX; ix < nbuf - 1; ix++) { 303 bp[ix].buf_nextp = &bp[ix + 1]; 304 bp[ix].buf_ds.ds_va = (ib_vaddr_t)(uintptr_t)mp; 305 mp = mp + RDS_CTRLPKT_SIZE; 306 } 307 bp[nbuf - 1].buf_ds.ds_va = (ib_vaddr_t)(uintptr_t)mp; 308 bp[nbuf - 1].buf_nextp = NULL; 309 rds_cpool.pool_headp = &bp[NDataRX]; 310 rds_cpool.pool_tailp = &bp[nbuf - 1]; 311 312 mutex_exit(&rds_dpool.pool_lock); 313 314 RDS_DPRINTF3(LABEL, "rdsmemp start: %p end: %p", memp, mp); 315 RDS_DPRINTF4("rds_init_recv_caches", "Return"); 316 return (0); 317 } 318 319 void 320 rds_free_send_pool(rds_ep_t *ep) 321 { 322 rds_bufpool_t *pool; 323 rds_hca_t *hcap; 324 int ret; 325 326 pool = &ep->ep_sndpool; 327 328 mutex_enter(&pool->pool_lock); 329 if (pool->pool_memp == NULL) { 330 mutex_exit(&pool->pool_lock); 331 RDS_DPRINTF2("rds_free_send_pool", 332 "EP(%p) DOUBLE Free on Send Pool", ep); 333 return; 334 } 335 336 /* get the hcap for the HCA hosting this channel */ 337 hcap = rds_get_hcap(rdsib_statep, ep->ep_hca_guid); 338 if (hcap == NULL) { 339 RDS_DPRINTF2("rds_free_send_pool", "HCA (0x%llx) not found", 340 ep->ep_hca_guid); 341 } else { 342 ret = ibt_deregister_mr(hcap->hca_hdl, ep->ep_snd_mrhdl); 343 if (ret != IBT_SUCCESS) { 344 RDS_DPRINTF2(LABEL, 345 "ibt_deregister_mr failed: %d, mrhdl: 0x%p", 346 ret, ep->ep_snd_mrhdl); 347 } 348 349 if (ep->ep_ack_addr) { 350 ret = ibt_deregister_mr(hcap->hca_hdl, ep->ep_ackhdl); 351 if (ret != IBT_SUCCESS) { 352 RDS_DPRINTF2(LABEL, 353 "ibt_deregister_mr ackhdl failed: %d, " 354 "mrhdl: 0x%p", ret, ep->ep_ackhdl); 355 } 356 357 kmem_free((void *)ep->ep_ack_addr, sizeof (uintptr_t)); 358 ep->ep_ack_addr = NULL; 359 } 360 } 361 362 kmem_free(pool->pool_memp, pool->pool_memsize); 363 kmem_free(pool->pool_bufmemp, 364 pool->pool_nbuffers * sizeof (rds_buf_t)); 365 pool->pool_memp = NULL; 366 pool->pool_bufmemp = NULL; 367 mutex_exit(&pool->pool_lock); 368 } 369 370 int 371 rds_init_send_pool(rds_ep_t *ep, ib_guid_t hca_guid) 372 { 373 uint8_t *mp; 374 rds_buf_t *bp; 375 rds_hca_t *hcap; 376 uint_t ix, rcv_len; 377 ibt_mr_attr_t mem_attr; 378 ibt_mr_desc_t mem_desc; 379 uint8_t *memp; 380 rds_buf_t *bufmemp; 381 uintptr_t ack_addr = NULL; 382 uint_t memsize; 383 uint_t nbuf; 384 rds_bufpool_t *spool; 385 rds_data_hdr_t *pktp; 386 int ret; 387 388 RDS_DPRINTF2("rds_init_send_pool", "Enter"); 389 390 spool = &ep->ep_sndpool; 391 392 ASSERT(spool->pool_memp == NULL); 393 ASSERT(ep->ep_hca_guid == 0); 394 395 /* get the hcap for the HCA hosting this channel */ 396 hcap = rds_get_hcap(rdsib_statep, hca_guid); 397 if (hcap == NULL) { 398 RDS_DPRINTF2("rds_init_send_pool", "HCA (0x%llx) not found", 399 hca_guid); 400 return (-1); 401 } 402 403 if (ep->ep_type == RDS_EP_TYPE_DATA) { 404 spool->pool_nbuffers = MaxDataSendBuffers; 405 spool->pool_nbusy = 0; 406 spool->pool_nfree = MaxDataSendBuffers; 407 memsize = (MaxDataSendBuffers * RdsPktSize) + 408 sizeof (uintptr_t); 409 rcv_len = RdsPktSize; 410 } else { 411 spool->pool_nbuffers = MaxCtrlSendBuffers; 412 spool->pool_nbusy = 0; 413 spool->pool_nfree = MaxCtrlSendBuffers; 414 memsize = MaxCtrlSendBuffers * RDS_CTRLPKT_SIZE; 415 rcv_len = RDS_CTRLPKT_SIZE; 416 } 417 nbuf = spool->pool_nbuffers; 418 419 RDS_DPRINTF3(LABEL, "RDS Send Pool Memory: %lld", memsize); 420 421 memp = (uint8_t *)kmem_zalloc(memsize, KM_NOSLEEP); 422 if (memp == NULL) { 423 RDS_DPRINTF1(LABEL, "RDS Send Memory allocation failed"); 424 return (-1); 425 } 426 427 RDS_DPRINTF3(LABEL, "RDS Buffer Entries Memory: %lld", 428 nbuf * sizeof (rds_buf_t)); 429 430 /* allocate memory for buffer entries */ 431 bufmemp = (rds_buf_t *)kmem_zalloc(nbuf * sizeof (rds_buf_t), 432 KM_SLEEP); 433 434 if (ep->ep_type == RDS_EP_TYPE_DATA) { 435 ack_addr = (uintptr_t)kmem_zalloc(sizeof (uintptr_t), KM_SLEEP); 436 437 /* register the memory with the HCA for this channel */ 438 mem_attr.mr_vaddr = (ib_vaddr_t)ack_addr; 439 mem_attr.mr_len = sizeof (uintptr_t); 440 mem_attr.mr_as = NULL; 441 mem_attr.mr_flags = IBT_MR_SLEEP | IBT_MR_ENABLE_LOCAL_WRITE | 442 IBT_MR_ENABLE_REMOTE_WRITE; 443 444 ret = ibt_register_mr(hcap->hca_hdl, hcap->hca_pdhdl, 445 &mem_attr, &ep->ep_ackhdl, &mem_desc); 446 if (ret != IBT_SUCCESS) { 447 RDS_DPRINTF2("rds_init_send_pool", 448 "EP(%p): ibt_register_mr for ack failed: %d", 449 ep, ret); 450 kmem_free(memp, memsize); 451 kmem_free(bufmemp, nbuf * sizeof (rds_buf_t)); 452 kmem_free((void *)ack_addr, sizeof (uintptr_t)); 453 return (-1); 454 } 455 ep->ep_ack_rkey = mem_desc.md_rkey; 456 ep->ep_ack_addr = ack_addr; 457 } 458 459 /* register the memory with the HCA for this channel */ 460 mem_attr.mr_vaddr = (ib_vaddr_t)(uintptr_t)memp; 461 mem_attr.mr_len = memsize; 462 mem_attr.mr_as = NULL; 463 mem_attr.mr_flags = IBT_MR_SLEEP | IBT_MR_ENABLE_LOCAL_WRITE; 464 465 ret = ibt_register_mr(hcap->hca_hdl, hcap->hca_pdhdl, 466 &mem_attr, &ep->ep_snd_mrhdl, &mem_desc); 467 if (ret != IBT_SUCCESS) { 468 RDS_DPRINTF2("rds_init_send_pool", "EP(%p): ibt_register_mr " 469 "failed: %d", ep, ret); 470 kmem_free(memp, memsize); 471 kmem_free(bufmemp, nbuf * sizeof (rds_buf_t)); 472 if (ack_addr != NULL) 473 kmem_free((void *)ack_addr, sizeof (uintptr_t)); 474 return (-1); 475 } 476 ep->ep_snd_lkey = mem_desc.md_lkey; 477 478 479 /* Initialize the pool */ 480 spool->pool_memp = memp; 481 spool->pool_memsize = memsize; 482 spool->pool_bufmemp = bufmemp; 483 spool->pool_sqpoll_pending = B_FALSE; 484 485 /* chain the buffers and initialize them */ 486 mp = memp; 487 bp = bufmemp; 488 489 if (ep->ep_type == RDS_EP_TYPE_DATA) { 490 for (ix = 0; ix < nbuf - 1; ix++) { 491 bp[ix].buf_nextp = &bp[ix + 1]; 492 bp[ix].buf_ep = ep; 493 bp[ix].buf_ds.ds_va = (ib_vaddr_t)(uintptr_t)mp; 494 bp[ix].buf_ds.ds_key = ep->ep_snd_lkey; 495 bp[ix].buf_state = RDS_SNDBUF_FREE; 496 pktp = (rds_data_hdr_t *)(uintptr_t)mp; 497 pktp->dh_bufid = (uintptr_t)&bp[ix]; 498 mp = mp + rcv_len; 499 } 500 bp[nbuf - 1].buf_nextp = NULL; 501 bp[nbuf - 1].buf_ep = ep; 502 bp[nbuf - 1].buf_ds.ds_va = (ib_vaddr_t)(uintptr_t)mp; 503 bp[nbuf - 1].buf_ds.ds_key = ep->ep_snd_lkey; 504 bp[nbuf - 1].buf_state = RDS_SNDBUF_FREE; 505 pktp = (rds_data_hdr_t *)(uintptr_t)mp; 506 pktp->dh_bufid = (uintptr_t)&bp[nbuf - 1]; 507 508 spool->pool_headp = &bp[0]; 509 spool->pool_tailp = &bp[nbuf - 1]; 510 511 mp = mp + rcv_len; 512 ep->ep_ackds.ds_va = (ib_vaddr_t)(uintptr_t)mp; 513 ep->ep_ackds.ds_key = ep->ep_snd_lkey; 514 ep->ep_ackds.ds_len = sizeof (uintptr_t); 515 516 *(uintptr_t *)ep->ep_ack_addr = (uintptr_t)spool->pool_tailp; 517 } else { 518 /* control send pool */ 519 for (ix = 0; ix < nbuf - 1; ix++) { 520 bp[ix].buf_nextp = &bp[ix + 1]; 521 bp[ix].buf_ep = ep; 522 bp[ix].buf_ds.ds_va = (ib_vaddr_t)(uintptr_t)mp; 523 bp[ix].buf_ds.ds_key = ep->ep_snd_lkey; 524 bp[ix].buf_state = RDS_SNDBUF_FREE; 525 mp = mp + rcv_len; 526 } 527 bp[nbuf - 1].buf_nextp = NULL; 528 bp[nbuf - 1].buf_ep = ep; 529 bp[nbuf - 1].buf_ds.ds_va = (ib_vaddr_t)(uintptr_t)mp; 530 bp[nbuf - 1].buf_ds.ds_key = ep->ep_snd_lkey; 531 bp[nbuf - 1].buf_state = RDS_SNDBUF_FREE; 532 spool->pool_headp = &bp[0]; 533 spool->pool_tailp = &bp[nbuf - 1]; 534 } 535 536 RDS_DPRINTF3(LABEL, "rdsmemp start: %p end: %p", memp, mp); 537 RDS_DPRINTF2("rds_init_send_pool", "Return"); 538 539 return (0); 540 } 541 542 int 543 rds_reinit_send_pool(rds_ep_t *ep, ib_guid_t hca_guid) 544 { 545 rds_buf_t *bp; 546 rds_hca_t *hcap; 547 ibt_mr_attr_t mem_attr; 548 ibt_mr_desc_t mem_desc; 549 rds_bufpool_t *spool; 550 int ret; 551 552 RDS_DPRINTF2("rds_reinit_send_pool", "Enter: EP(%p)", ep); 553 554 spool = &ep->ep_sndpool; 555 ASSERT(spool->pool_memp != NULL); 556 557 /* deregister the send pool memory from the previous HCA */ 558 hcap = rds_get_hcap(rdsib_statep, ep->ep_hca_guid); 559 if (hcap == NULL) { 560 RDS_DPRINTF2("rds_reinit_send_pool", "HCA (0x%llx) not found", 561 ep->ep_hca_guid); 562 } else { 563 if (ep->ep_snd_mrhdl != NULL) { 564 (void) ibt_deregister_mr(hcap->hca_hdl, 565 ep->ep_snd_mrhdl); 566 ep->ep_snd_mrhdl = NULL; 567 ep->ep_snd_lkey = 0; 568 } 569 570 if ((ep->ep_type == RDS_EP_TYPE_DATA) && 571 (ep->ep_ackhdl != NULL)) { 572 (void) ibt_deregister_mr(hcap->hca_hdl, ep->ep_ackhdl); 573 ep->ep_ackhdl = NULL; 574 ep->ep_ack_rkey = 0; 575 } 576 577 ep->ep_hca_guid = NULL; 578 } 579 580 /* get the hcap for the new HCA */ 581 hcap = rds_get_hcap(rdsib_statep, hca_guid); 582 if (hcap == NULL) { 583 RDS_DPRINTF2("rds_reinit_send_pool", "HCA (0x%llx) not found", 584 hca_guid); 585 return (-1); 586 } 587 588 /* register the send memory */ 589 mem_attr.mr_vaddr = (ib_vaddr_t)(uintptr_t)spool->pool_memp; 590 mem_attr.mr_len = spool->pool_memsize; 591 mem_attr.mr_as = NULL; 592 mem_attr.mr_flags = IBT_MR_SLEEP | IBT_MR_ENABLE_LOCAL_WRITE; 593 594 ret = ibt_register_mr(hcap->hca_hdl, hcap->hca_pdhdl, 595 &mem_attr, &ep->ep_snd_mrhdl, &mem_desc); 596 if (ret != IBT_SUCCESS) { 597 RDS_DPRINTF2("rds_reinit_send_pool", 598 "EP(%p): ibt_register_mr failed: %d", ep, ret); 599 return (-1); 600 } 601 ep->ep_snd_lkey = mem_desc.md_lkey; 602 603 /* register the acknowledgement space */ 604 if (ep->ep_type == RDS_EP_TYPE_DATA) { 605 mem_attr.mr_vaddr = (ib_vaddr_t)ep->ep_ack_addr; 606 mem_attr.mr_len = sizeof (uintptr_t); 607 mem_attr.mr_as = NULL; 608 mem_attr.mr_flags = IBT_MR_SLEEP | IBT_MR_ENABLE_LOCAL_WRITE | 609 IBT_MR_ENABLE_REMOTE_WRITE; 610 611 ret = ibt_register_mr(hcap->hca_hdl, hcap->hca_pdhdl, 612 &mem_attr, &ep->ep_ackhdl, &mem_desc); 613 if (ret != IBT_SUCCESS) { 614 RDS_DPRINTF2("rds_reinit_send_pool", 615 "EP(%p): ibt_register_mr for ack failed: %d", 616 ep, ret); 617 (void) ibt_deregister_mr(hcap->hca_hdl, 618 ep->ep_snd_mrhdl); 619 ep->ep_snd_mrhdl = NULL; 620 ep->ep_snd_lkey = 0; 621 return (-1); 622 } 623 ep->ep_ack_rkey = mem_desc.md_rkey; 624 625 /* update the LKEY in the acknowledgement WR */ 626 ep->ep_ackds.ds_key = ep->ep_snd_lkey; 627 } 628 629 /* update the LKEY in each buffer */ 630 bp = spool->pool_headp; 631 while (bp) { 632 bp->buf_ds.ds_key = ep->ep_snd_lkey; 633 bp = bp->buf_nextp; 634 } 635 636 ep->ep_hca_guid = hca_guid; 637 638 RDS_DPRINTF2("rds_reinit_send_pool", "Return: EP(%p)", ep); 639 640 return (0); 641 } 642 643 void 644 rds_free_recv_pool(rds_ep_t *ep) 645 { 646 rds_bufpool_t *pool; 647 648 if (ep->ep_type == RDS_EP_TYPE_DATA) { 649 pool = &rds_dpool; 650 } else { 651 pool = &rds_cpool; 652 } 653 654 mutex_enter(&ep->ep_rcvpool.pool_lock); 655 if (ep->ep_rcvpool.pool_nfree != 0) { 656 rds_free_buf(pool, ep->ep_rcvpool.pool_headp, 657 ep->ep_rcvpool.pool_nfree); 658 ep->ep_rcvpool.pool_nfree = 0; 659 ep->ep_rcvpool.pool_headp = NULL; 660 ep->ep_rcvpool.pool_tailp = NULL; 661 } 662 mutex_exit(&ep->ep_rcvpool.pool_lock); 663 } 664 665 int 666 rds_init_recv_pool(rds_ep_t *ep) 667 { 668 rds_bufpool_t *rpool; 669 rds_qp_t *recvqp; 670 671 recvqp = &ep->ep_recvqp; 672 rpool = &ep->ep_rcvpool; 673 if (ep->ep_type == RDS_EP_TYPE_DATA) { 674 recvqp->qp_depth = MaxDataRecvBuffers; 675 recvqp->qp_level = 0; 676 recvqp->qp_lwm = (DataRecvBufferLWM * MaxDataRecvBuffers)/100; 677 recvqp->qp_taskqpending = B_FALSE; 678 679 rpool->pool_nbuffers = MaxDataRecvBuffers; 680 rpool->pool_nbusy = 0; 681 rpool->pool_nfree = 0; 682 } else { 683 recvqp->qp_depth = MaxCtrlRecvBuffers; 684 recvqp->qp_level = 0; 685 recvqp->qp_lwm = (CtrlRecvBufferLWM * MaxCtrlRecvBuffers)/100; 686 recvqp->qp_taskqpending = B_FALSE; 687 688 rpool->pool_nbuffers = MaxCtrlRecvBuffers; 689 rpool->pool_nbusy = 0; 690 rpool->pool_nfree = 0; 691 } 692 693 return (0); 694 } 695 696 /* Free buffers to the global pool, either cpool or dpool */ 697 void 698 rds_free_buf(rds_bufpool_t *pool, rds_buf_t *bp, uint_t nbuf) 699 { 700 uint_t ix; 701 702 RDS_DPRINTF4("rds_free_buf", "Enter"); 703 704 ASSERT(nbuf != 0); 705 706 mutex_enter(&pool->pool_lock); 707 708 if (pool->pool_nfree != 0) { 709 pool->pool_tailp->buf_nextp = bp; 710 } else { 711 pool->pool_headp = bp; 712 } 713 714 if (nbuf == 1) { 715 ASSERT(bp->buf_state == RDS_RCVBUF_FREE); 716 bp->buf_ep = NULL; 717 bp->buf_nextp = NULL; 718 pool->pool_tailp = bp; 719 } else { 720 for (ix = 1; ix < nbuf; ix++) { 721 ASSERT(bp->buf_state == RDS_RCVBUF_FREE); 722 bp->buf_ep = NULL; 723 bp = bp->buf_nextp; 724 } 725 ASSERT(bp->buf_state == RDS_RCVBUF_FREE); 726 bp->buf_ep = NULL; 727 bp->buf_nextp = NULL; 728 pool->pool_tailp = bp; 729 } 730 /* tail is always the last buffer */ 731 pool->pool_tailp->buf_nextp = NULL; 732 733 pool->pool_nfree += nbuf; 734 pool->pool_nbusy -= nbuf; 735 736 mutex_exit(&pool->pool_lock); 737 738 RDS_DPRINTF4("rds_free_buf", "Return"); 739 } 740 741 /* Get buffers from the global pools, either cpool or dpool */ 742 rds_buf_t * 743 rds_get_buf(rds_bufpool_t *pool, uint_t nbuf, uint_t *nret) 744 { 745 rds_buf_t *bp = NULL, *bp1; 746 uint_t ix; 747 748 RDS_DPRINTF4("rds_get_buf", "Enter"); 749 750 mutex_enter(&pool->pool_lock); 751 752 RDS_DPRINTF3("rds_get_buf", "Available: %d Needed: %d", 753 pool->pool_nfree, nbuf); 754 755 if (nbuf < pool->pool_nfree) { 756 *nret = nbuf; 757 758 bp1 = pool->pool_headp; 759 for (ix = 1; ix < nbuf; ix++) { 760 bp1 = bp1->buf_nextp; 761 } 762 763 bp = pool->pool_headp; 764 pool->pool_headp = bp1->buf_nextp; 765 bp1->buf_nextp = NULL; 766 767 pool->pool_nfree -= nbuf; 768 pool->pool_nbusy += nbuf; 769 } else if (nbuf >= pool->pool_nfree) { 770 *nret = pool->pool_nfree; 771 772 bp = pool->pool_headp; 773 774 pool->pool_headp = NULL; 775 pool->pool_tailp = NULL; 776 777 pool->pool_nbusy += pool->pool_nfree; 778 pool->pool_nfree = 0; 779 } 780 781 mutex_exit(&pool->pool_lock); 782 783 RDS_DPRINTF4("rds_get_buf", "Return"); 784 785 return (bp); 786 } 787 788 boolean_t 789 rds_is_recvq_empty(rds_ep_t *ep, boolean_t wait) 790 { 791 rds_qp_t *recvqp; 792 rds_bufpool_t *rpool; 793 boolean_t ret = B_TRUE; 794 795 recvqp = &ep->ep_recvqp; 796 mutex_enter(&recvqp->qp_lock); 797 RDS_DPRINTF2("rds_is_recvq_empty", "EP(%p): QP has %d WRs", 798 ep, recvqp->qp_level); 799 if (wait) { 800 /* wait until the RQ is empty */ 801 while (recvqp->qp_level != 0) { 802 /* wait one second and try again */ 803 mutex_exit(&recvqp->qp_lock); 804 delay(drv_usectohz(1000000)); 805 mutex_enter(&recvqp->qp_lock); 806 } 807 } else if (recvqp->qp_level != 0) { 808 ret = B_FALSE; 809 } 810 mutex_exit(&recvqp->qp_lock); 811 812 rpool = &ep->ep_rcvpool; 813 mutex_enter(&rpool->pool_lock); 814 RDS_DPRINTF2("rds_is_recvq_empty", "EP(%p): " 815 "There are %d pending buffers on sockqs", ep, rpool->pool_nbusy); 816 if (wait) { 817 /* Wait for all buffers to be freed by sockfs */ 818 while (rpool->pool_nbusy != 0) { 819 /* wait one second and try again */ 820 mutex_exit(&rpool->pool_lock); 821 delay(drv_usectohz(1000000)); 822 mutex_enter(&rpool->pool_lock); 823 } 824 } else if (rpool->pool_nbusy != 0) { 825 ret = B_FALSE; 826 } 827 mutex_exit(&rpool->pool_lock); 828 829 return (ret); 830 } 831 832 boolean_t 833 rds_is_sendq_empty(rds_ep_t *ep, uint_t wait) 834 { 835 rds_bufpool_t *spool; 836 rds_buf_t *bp; 837 boolean_t ret1 = B_TRUE; 838 839 /* check if all the sends completed */ 840 spool = &ep->ep_sndpool; 841 mutex_enter(&spool->pool_lock); 842 RDS_DPRINTF2("rds_is_sendq_empty", "EP(%p): " 843 "Send Pool contains: %d", ep, spool->pool_nbusy); 844 if (wait) { 845 while (spool->pool_nbusy != 0) { 846 if (rds_no_interrupts) { 847 /* wait one second and try again */ 848 delay(drv_usectohz(1000000)); 849 rds_poll_send_completions(ep->ep_sendcq, ep, 850 B_TRUE); 851 } else { 852 /* wait one second and try again */ 853 mutex_exit(&spool->pool_lock); 854 delay(drv_usectohz(1000000)); 855 mutex_enter(&spool->pool_lock); 856 } 857 } 858 859 if ((wait == 2) && (ep->ep_type == RDS_EP_TYPE_DATA)) { 860 rds_buf_t *ackbp; 861 862 /* 863 * If the last one is acknowledged then everything 864 * is acknowledged 865 */ 866 bp = spool->pool_tailp; 867 ackbp = *(rds_buf_t **)ep->ep_ack_addr; 868 RDS_DPRINTF2("rds_is_sendq_empty", "EP(%p): " 869 "Checking for acknowledgements", ep); 870 while (bp != ackbp) { 871 RDS_DPRINTF2("rds_is_sendq_empty", 872 "EP(%p) BP(0x%p/0x%p) last " 873 "sent/acknowledged", ep, bp, ackbp); 874 mutex_exit(&spool->pool_lock); 875 delay(drv_usectohz(1000000)); 876 mutex_enter(&spool->pool_lock); 877 878 bp = spool->pool_tailp; 879 ackbp = *(rds_buf_t **)ep->ep_ack_addr; 880 } 881 } 882 } else if (spool->pool_nbusy != 0) { 883 ret1 = B_FALSE; 884 } 885 mutex_exit(&spool->pool_lock); 886 887 /* check if all the rdma acks completed */ 888 mutex_enter(&ep->ep_lock); 889 RDS_DPRINTF2("rds_is_sendq_empty", "EP(%p): " 890 "Outstanding RDMA Acks: %d", ep, ep->ep_rdmacnt); 891 if (wait) { 892 while (ep->ep_rdmacnt != 0) { 893 if (rds_no_interrupts) { 894 /* wait one second and try again */ 895 delay(drv_usectohz(1000000)); 896 rds_poll_send_completions(ep->ep_sendcq, ep, 897 B_FALSE); 898 } else { 899 /* wait one second and try again */ 900 mutex_exit(&ep->ep_lock); 901 delay(drv_usectohz(1000000)); 902 mutex_enter(&ep->ep_lock); 903 } 904 } 905 } else if (ep->ep_rdmacnt != 0) { 906 ret1 = B_FALSE; 907 } 908 mutex_exit(&ep->ep_lock); 909 910 return (ret1); 911 } 912 913 /* Get buffers from the send pool */ 914 rds_buf_t * 915 rds_get_send_buf(rds_ep_t *ep, uint_t nbuf) 916 { 917 rds_buf_t *bp = NULL, *bp1; 918 rds_bufpool_t *spool; 919 uint_t waittime = rds_waittime_ms * 1000; 920 uint_t ix; 921 int ret; 922 923 RDS_DPRINTF4("rds_get_send_buf", "Enter: EP(%p) Buffers requested: %d", 924 ep, nbuf); 925 926 spool = &ep->ep_sndpool; 927 mutex_enter(&spool->pool_lock); 928 929 if (rds_no_interrupts) { 930 if ((spool->pool_sqpoll_pending == B_FALSE) && 931 (spool->pool_nbusy > 932 (spool->pool_nbuffers * rds_poll_percent_full)/100)) { 933 spool->pool_sqpoll_pending = B_TRUE; 934 mutex_exit(&spool->pool_lock); 935 rds_poll_send_completions(ep->ep_sendcq, ep, B_FALSE); 936 mutex_enter(&spool->pool_lock); 937 spool->pool_sqpoll_pending = B_FALSE; 938 } 939 } 940 941 if (spool->pool_nfree < nbuf) { 942 /* wait for buffers to become available */ 943 spool->pool_cv_count += nbuf; 944 ret = cv_timedwait_sig(&spool->pool_cv, &spool->pool_lock, 945 ddi_get_lbolt() + drv_usectohz(waittime)); 946 /* ret = cv_wait_sig(&spool->pool_cv, &spool->pool_lock); */ 947 if (ret == 0) { 948 /* signal pending */ 949 spool->pool_cv_count -= nbuf; 950 mutex_exit(&spool->pool_lock); 951 return (NULL); 952 } 953 954 spool->pool_cv_count -= nbuf; 955 } 956 957 /* Have the number of buffers needed */ 958 if (spool->pool_nfree > nbuf) { 959 bp = spool->pool_headp; 960 961 if (ep->ep_type == RDS_EP_TYPE_DATA) { 962 rds_buf_t *ackbp; 963 ackbp = *(rds_buf_t **)ep->ep_ack_addr; 964 965 /* check if all the needed buffers are acknowledged */ 966 bp1 = bp; 967 for (ix = 0; ix < nbuf; ix++) { 968 if ((bp1 == ackbp) || 969 (bp1->buf_state != RDS_SNDBUF_FREE)) { 970 /* 971 * The buffer is not yet signalled or 972 * is not yet acknowledged 973 */ 974 RDS_DPRINTF5("rds_get_send_buf", 975 "EP(%p) Buffer (%p) not yet " 976 "acked/completed", ep, bp1); 977 mutex_exit(&spool->pool_lock); 978 return (NULL); 979 } 980 981 bp1 = bp1->buf_nextp; 982 } 983 } 984 985 /* mark the buffers as pending */ 986 bp1 = bp; 987 for (ix = 1; ix < nbuf; ix++) { 988 ASSERT(bp1->buf_state == RDS_SNDBUF_FREE); 989 bp1->buf_state = RDS_SNDBUF_PENDING; 990 bp1 = bp1->buf_nextp; 991 } 992 ASSERT(bp1->buf_state == RDS_SNDBUF_FREE); 993 bp1->buf_state = RDS_SNDBUF_PENDING; 994 995 spool->pool_headp = bp1->buf_nextp; 996 bp1->buf_nextp = NULL; 997 if (spool->pool_headp == NULL) 998 spool->pool_tailp = NULL; 999 spool->pool_nfree -= nbuf; 1000 spool->pool_nbusy += nbuf; 1001 } 1002 mutex_exit(&spool->pool_lock); 1003 1004 RDS_DPRINTF4("rds_get_send_buf", "Return: EP(%p) Buffers requested: %d", 1005 ep, nbuf); 1006 1007 return (bp); 1008 } 1009 1010 #define RDS_MIN_BUF_TO_WAKE_THREADS 10 1011 1012 void 1013 rds_free_send_buf(rds_ep_t *ep, rds_buf_t *headp, rds_buf_t *tailp, uint_t nbuf, 1014 boolean_t lock) 1015 { 1016 rds_bufpool_t *spool; 1017 rds_buf_t *tmp; 1018 1019 RDS_DPRINTF4("rds_free_send_buf", "Enter"); 1020 1021 ASSERT(nbuf != 0); 1022 1023 if (tailp == NULL) { 1024 if (nbuf > 1) { 1025 tmp = headp; 1026 while (tmp->buf_nextp) { 1027 tmp = tmp->buf_nextp; 1028 } 1029 tailp = tmp; 1030 } else { 1031 tailp = headp; 1032 } 1033 } 1034 1035 spool = &ep->ep_sndpool; 1036 1037 if (lock == B_FALSE) { 1038 /* lock is not held outside */ 1039 mutex_enter(&spool->pool_lock); 1040 } 1041 1042 if (spool->pool_nfree) { 1043 spool->pool_tailp->buf_nextp = headp; 1044 } else { 1045 spool->pool_headp = headp; 1046 } 1047 spool->pool_tailp = tailp; 1048 1049 spool->pool_nfree += nbuf; 1050 spool->pool_nbusy -= nbuf; 1051 1052 if ((spool->pool_cv_count > 0) && 1053 (spool->pool_nfree > RDS_MIN_BUF_TO_WAKE_THREADS)) { 1054 if (spool->pool_nfree >= spool->pool_cv_count) 1055 cv_broadcast(&spool->pool_cv); 1056 else 1057 cv_signal(&spool->pool_cv); 1058 } 1059 1060 if (lock == B_FALSE) { 1061 mutex_exit(&spool->pool_lock); 1062 } 1063 1064 RDS_DPRINTF4("rds_free_send_buf", "Return"); 1065 } 1066 1067 #define RDS_NBUFFERS_TO_PUTBACK 100 1068 void 1069 rds_free_recv_buf(rds_buf_t *bp, uint_t nbuf) 1070 { 1071 rds_ep_t *ep; 1072 rds_bufpool_t *rpool; 1073 rds_buf_t *bp1; 1074 uint_t ix; 1075 1076 RDS_DPRINTF4("rds_free_recv_buf", "Enter"); 1077 1078 ASSERT(nbuf != 0); 1079 1080 ep = bp->buf_ep; 1081 rpool = &ep->ep_rcvpool; 1082 1083 mutex_enter(&rpool->pool_lock); 1084 1085 /* Add the buffers to the local pool */ 1086 if (rpool->pool_tailp == NULL) { 1087 ASSERT(rpool->pool_headp == NULL); 1088 ASSERT(rpool->pool_nfree == 0); 1089 rpool->pool_headp = bp; 1090 bp1 = bp; 1091 for (ix = 1; ix < nbuf; ix++) { 1092 if (bp1->buf_state == RDS_RCVBUF_ONSOCKQ) { 1093 rpool->pool_nbusy--; 1094 } 1095 bp1->buf_state = RDS_RCVBUF_FREE; 1096 bp1 = bp1->buf_nextp; 1097 } 1098 bp1->buf_nextp = NULL; 1099 if (bp->buf_state == RDS_RCVBUF_ONSOCKQ) { 1100 rpool->pool_nbusy--; 1101 } 1102 bp->buf_state = RDS_RCVBUF_FREE; 1103 rpool->pool_tailp = bp1; 1104 rpool->pool_nfree += nbuf; 1105 } else { 1106 bp1 = bp; 1107 for (ix = 1; ix < nbuf; ix++) { 1108 if (bp1->buf_state == RDS_RCVBUF_ONSOCKQ) { 1109 rpool->pool_nbusy--; 1110 } 1111 bp1->buf_state = RDS_RCVBUF_FREE; 1112 bp1 = bp1->buf_nextp; 1113 } 1114 bp1->buf_nextp = NULL; 1115 if (bp->buf_state == RDS_RCVBUF_ONSOCKQ) { 1116 rpool->pool_nbusy--; 1117 } 1118 bp->buf_state = RDS_RCVBUF_FREE; 1119 rpool->pool_tailp->buf_nextp = bp; 1120 rpool->pool_tailp = bp1; 1121 rpool->pool_nfree += nbuf; 1122 } 1123 1124 if (rpool->pool_nfree >= RDS_NBUFFERS_TO_PUTBACK) { 1125 bp = rpool->pool_headp; 1126 nbuf = rpool->pool_nfree; 1127 rpool->pool_headp = NULL; 1128 rpool->pool_tailp = NULL; 1129 rpool->pool_nfree = 0; 1130 mutex_exit(&rpool->pool_lock); 1131 1132 /* Free the buffers to the global pool */ 1133 if (ep->ep_type == RDS_EP_TYPE_DATA) { 1134 rds_free_buf(&rds_dpool, bp, nbuf); 1135 } else { 1136 rds_free_buf(&rds_cpool, bp, nbuf); 1137 } 1138 1139 return; 1140 } 1141 mutex_exit(&rpool->pool_lock); 1142 1143 RDS_DPRINTF4("rds_free_recv_buf", "Return"); 1144 } 1145