1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * hermon_wr.c 29 * Hermon Work Request Processing Routines 30 * 31 * Implements all the routines necessary to provide the PostSend(), 32 * PostRecv() and PostSRQ() verbs. Also contains all the code 33 * necessary to implement the Hermon WRID tracking mechanism. 34 */ 35 36 #include <sys/types.h> 37 #include <sys/conf.h> 38 #include <sys/ddi.h> 39 #include <sys/sunddi.h> 40 #include <sys/modctl.h> 41 #include <sys/avl.h> 42 43 #include <sys/ib/adapters/hermon/hermon.h> 44 45 static uint32_t hermon_wr_get_immediate(ibt_send_wr_t *wr); 46 static int hermon_wr_bind_check(hermon_state_t *state, ibt_send_wr_t *wr); 47 static int hermon_wqe_send_build(hermon_state_t *state, hermon_qphdl_t qp, 48 ibt_send_wr_t *wr, uint64_t *desc, uint_t *size); 49 static int hermon_wqe_mlx_build(hermon_state_t *state, hermon_qphdl_t qp, 50 ibt_send_wr_t *wr, uint64_t *desc, uint_t *size); 51 static void hermon_wqe_headroom(uint_t from, hermon_qphdl_t qp); 52 static int hermon_wqe_recv_build(hermon_state_t *state, hermon_qphdl_t qp, 53 ibt_recv_wr_t *wr, uint64_t *desc); 54 static int hermon_wqe_srq_build(hermon_state_t *state, hermon_srqhdl_t srq, 55 ibt_recv_wr_t *wr, uint64_t *desc); 56 static void hermon_wqe_sync(void *hdl, uint_t sync_from, 57 uint_t sync_to, uint_t sync_type, uint_t flag); 58 static hermon_workq_avl_t *hermon_wrid_wqavl_find(hermon_cqhdl_t cq, uint_t qpn, 59 uint_t send_or_recv); 60 static void hermon_cq_workq_add(hermon_cqhdl_t cq, hermon_workq_avl_t *wqavl); 61 static void hermon_cq_workq_remove(hermon_cqhdl_t cq, 62 hermon_workq_avl_t *wqavl); 63 64 static ibt_wr_ds_t null_sgl = { 0, 0x00000100, 0 }; 65 66 static int 67 hermon_post_send_ud(hermon_state_t *state, hermon_qphdl_t qp, 68 ibt_send_wr_t *wr, uint_t num_wr, uint_t *num_posted) 69 { 70 hermon_hw_snd_wqe_ud_t *ud; 71 hermon_workq_hdr_t *wq; 72 hermon_ahhdl_t ah; 73 ibt_ud_dest_t *dest; 74 uint64_t *desc; 75 uint32_t desc_sz; 76 uint32_t signaled_dbd, solicited; 77 uint32_t head, tail, next_tail, qsize_msk; 78 uint32_t hdrmwqes; 79 uint32_t nopcode, fence, immed_data = 0; 80 hermon_hw_wqe_sgl_t *ds, *old_ds; 81 ibt_wr_ds_t *sgl; 82 uint32_t nds, dnds; 83 int i, j, last_ds, num_ds, status; 84 uint32_t *wqe_start; 85 int sectperwqe; 86 uint_t posted_cnt = 0; 87 88 /* initialize the FMA retry loop */ 89 hermon_pio_init(fm_loop_cnt, fm_status, fm_test_num); 90 91 ASSERT(MUTEX_HELD(&qp->qp_sq_lock)); 92 _NOTE(LOCK_RELEASED_AS_SIDE_EFFECT(&qp->qp_sq_lock)) 93 94 /* Grab the lock for the WRID list */ 95 membar_consumer(); 96 97 /* Save away some initial QP state */ 98 wq = qp->qp_sq_wqhdr; 99 qsize_msk = wq->wq_mask; 100 hdrmwqes = qp->qp_sq_hdrmwqes; /* in WQEs */ 101 sectperwqe = 1 << (qp->qp_sq_log_wqesz - 2); 102 103 tail = wq->wq_tail; 104 head = wq->wq_head; 105 status = DDI_SUCCESS; 106 107 post_next: 108 /* 109 * Check for "queue full" condition. If the queue 110 * is already full, then no more WQEs can be posted. 111 * So break out, ring a doorbell (if necessary) and 112 * return an error 113 */ 114 if (wq->wq_full != 0) { 115 status = IBT_QP_FULL; 116 goto done; 117 } 118 119 next_tail = (tail + 1) & qsize_msk; 120 if (((tail + hdrmwqes) & qsize_msk) == head) { 121 wq->wq_full = 1; 122 } 123 124 desc = HERMON_QP_SQ_ENTRY(qp, tail); 125 126 ud = (hermon_hw_snd_wqe_ud_t *)((uintptr_t)desc + 127 sizeof (hermon_hw_snd_wqe_ctrl_t)); 128 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)ud + 129 sizeof (hermon_hw_snd_wqe_ud_t)); 130 nds = wr->wr_nds; 131 sgl = wr->wr_sgl; 132 num_ds = 0; 133 134 /* need to know the count of destination nds for backward loop */ 135 for (dnds = 0, i = 0; i < nds; i++) { 136 if (sgl[i].ds_len != 0) 137 dnds++; 138 } 139 140 /* 141 * Build a Send or Send_LSO WQE 142 */ 143 if (wr->wr_opcode == IBT_WRC_SEND_LSO) { 144 int total_len; 145 146 nopcode = HERMON_WQE_SEND_NOPCODE_LSO; 147 if (wr->wr.ud_lso.lso_hdr_sz > 60) { 148 nopcode |= (1 << 6); /* ReRead bit must be set */ 149 } 150 dest = wr->wr.ud_lso.lso_ud_dest; 151 ah = (hermon_ahhdl_t)dest->ud_ah; 152 if (ah == NULL) { 153 status = IBT_AH_HDL_INVALID; 154 goto done; 155 } 156 HERMON_WQE_BUILD_UD(qp, ud, ah, dest); 157 158 total_len = (4 + 0xf + wr->wr.ud_lso.lso_hdr_sz) & ~0xf; 159 if ((uintptr_t)ds + total_len + (nds * 16) > 160 (uintptr_t)desc + (1 << qp->qp_sq_log_wqesz)) { 161 status = IBT_QP_SGL_LEN_INVALID; 162 goto done; 163 } 164 old_ds = ds; 165 bcopy(wr->wr.ud_lso.lso_hdr, (uint32_t *)old_ds + 1, 166 wr->wr.ud_lso.lso_hdr_sz); 167 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)ds + total_len); 168 i = 0; 169 } else if (wr->wr_opcode == IBT_WRC_SEND) { 170 if (wr->wr_flags & IBT_WR_SEND_IMMED) { 171 nopcode = HERMON_WQE_SEND_NOPCODE_SENDI; 172 immed_data = wr->wr.ud.udwr_immed; 173 } else { 174 nopcode = HERMON_WQE_SEND_NOPCODE_SEND; 175 } 176 dest = wr->wr.ud.udwr_dest; 177 ah = (hermon_ahhdl_t)dest->ud_ah; 178 if (ah == NULL) { 179 status = IBT_AH_HDL_INVALID; 180 goto done; 181 } 182 HERMON_WQE_BUILD_UD(qp, ud, ah, dest); 183 i = 0; 184 } else { 185 status = IBT_QP_OP_TYPE_INVALID; 186 goto done; 187 } 188 189 if (nds > qp->qp_sq_sgl) { 190 status = IBT_QP_SGL_LEN_INVALID; 191 goto done; 192 } 193 for (last_ds = num_ds, j = i; j < nds; j++) { 194 if (sgl[j].ds_len != 0) 195 last_ds++; /* real last ds of wqe to fill */ 196 } 197 desc_sz = ((uintptr_t)&ds[last_ds] - (uintptr_t)desc) >> 0x4; 198 for (j = nds; --j >= i; ) { 199 if (sgl[j].ds_len == 0) { 200 continue; 201 } 202 203 /* 204 * Fill in the Data Segment(s) for the current WQE, using the 205 * information contained in the scatter-gather list of the 206 * work request. 207 */ 208 last_ds--; 209 HERMON_WQE_BUILD_DATA_SEG_SEND(&ds[last_ds], &sgl[j]); 210 } 211 212 membar_producer(); 213 214 if (wr->wr_opcode == IBT_WRC_SEND_LSO) { 215 HERMON_WQE_BUILD_LSO(qp, old_ds, wr->wr.ud_lso.lso_mss, 216 wr->wr.ud_lso.lso_hdr_sz); 217 } 218 219 fence = (wr->wr_flags & IBT_WR_SEND_FENCE) ? 1 : 0; 220 221 signaled_dbd = ((qp->qp_sq_sigtype == HERMON_QP_SQ_ALL_SIGNALED) || 222 (wr->wr_flags & IBT_WR_SEND_SIGNAL)) ? 1 : 0; 223 224 solicited = (wr->wr_flags & IBT_WR_SEND_SOLICIT) ? 1 : 0; 225 226 HERMON_WQE_SET_CTRL_SEGMENT(desc, desc_sz, fence, immed_data, 227 solicited, signaled_dbd, wr->wr_flags & IBT_WR_SEND_CKSUM, qp); 228 229 wq->wq_wrid[tail] = wr->wr_id; 230 231 tail = next_tail; 232 233 /* Update some of the state in the QP */ 234 wq->wq_tail = tail; 235 236 membar_producer(); 237 238 /* Now set the ownership bit and opcode (first dword). */ 239 HERMON_SET_SEND_WQE_OWNER(qp, (uint32_t *)desc, nopcode); 240 241 posted_cnt++; 242 if (--num_wr > 0) { 243 /* do the invalidate of the headroom */ 244 wqe_start = (uint32_t *)HERMON_QP_SQ_ENTRY(qp, 245 (tail + hdrmwqes) & qsize_msk); 246 for (i = 16; i < sectperwqe; i += 16) { 247 wqe_start[i] = 0xFFFFFFFF; 248 } 249 250 wr++; 251 goto post_next; 252 } 253 done: 254 if (posted_cnt != 0) { 255 ddi_acc_handle_t uarhdl = hermon_get_uarhdl(state); 256 257 membar_producer(); 258 259 /* the FMA retry loop starts for Hermon doorbell register. */ 260 hermon_pio_start(state, uarhdl, pio_error, fm_loop_cnt, 261 fm_status, fm_test_num); 262 263 HERMON_UAR_DOORBELL(state, uarhdl, 264 (uint64_t *)(void *)&state->hs_uar->send, 265 (uint64_t)qp->qp_ring); 266 267 /* the FMA retry loop ends. */ 268 hermon_pio_end(state, uarhdl, pio_error, fm_loop_cnt, 269 fm_status, fm_test_num); 270 271 /* do the invalidate of the headroom */ 272 wqe_start = (uint32_t *)HERMON_QP_SQ_ENTRY(qp, 273 (tail + hdrmwqes) & qsize_msk); 274 for (i = 16; i < sectperwqe; i += 16) { 275 wqe_start[i] = 0xFFFFFFFF; 276 } 277 } 278 if (num_posted != NULL) 279 *num_posted = posted_cnt; 280 281 mutex_exit(&qp->qp_sq_lock); 282 283 return (status); 284 285 pio_error: 286 mutex_exit(&qp->qp_sq_lock); 287 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 288 return (ibc_get_ci_failure(0)); 289 } 290 291 static int 292 hermon_post_send_rc(hermon_state_t *state, hermon_qphdl_t qp, 293 ibt_send_wr_t *wr, uint_t num_wr, uint_t *num_posted) 294 { 295 uint64_t *desc; 296 hermon_workq_hdr_t *wq; 297 uint32_t desc_sz; 298 uint32_t signaled_dbd, solicited; 299 uint32_t head, tail, next_tail, qsize_msk; 300 uint32_t hdrmwqes; 301 int status; 302 uint32_t nopcode, fence, immed_data = 0; 303 hermon_hw_snd_wqe_remaddr_t *rc; 304 hermon_hw_snd_wqe_atomic_t *at; 305 hermon_hw_snd_wqe_bind_t *bn; 306 hermon_hw_wqe_sgl_t *ds; 307 ibt_wr_ds_t *sgl; 308 uint32_t nds; 309 int i, last_ds, num_ds; 310 uint32_t *wqe_start; 311 int sectperwqe; 312 uint_t posted_cnt = 0; 313 314 /* initialize the FMA retry loop */ 315 hermon_pio_init(fm_loop_cnt, fm_status, fm_test_num); 316 317 ASSERT(MUTEX_HELD(&qp->qp_sq_lock)); 318 _NOTE(LOCK_RELEASED_AS_SIDE_EFFECT(&qp->qp_sq_lock)) 319 320 /* make sure we see any update of wq_head */ 321 membar_consumer(); 322 323 /* Save away some initial QP state */ 324 wq = qp->qp_sq_wqhdr; 325 qsize_msk = wq->wq_mask; 326 hdrmwqes = qp->qp_sq_hdrmwqes; /* in WQEs */ 327 sectperwqe = 1 << (qp->qp_sq_log_wqesz - 2); 328 329 tail = wq->wq_tail; 330 head = wq->wq_head; 331 status = DDI_SUCCESS; 332 333 post_next: 334 /* 335 * Check for "queue full" condition. If the queue 336 * is already full, then no more WQEs can be posted. 337 * So break out, ring a doorbell (if necessary) and 338 * return an error 339 */ 340 if (wq->wq_full != 0) { 341 status = IBT_QP_FULL; 342 goto done; 343 } 344 next_tail = (tail + 1) & qsize_msk; 345 if (((tail + hdrmwqes) & qsize_msk) == head) { 346 wq->wq_full = 1; 347 } 348 349 desc = HERMON_QP_SQ_ENTRY(qp, tail); 350 351 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)desc + 352 sizeof (hermon_hw_snd_wqe_ctrl_t)); 353 nds = wr->wr_nds; 354 sgl = wr->wr_sgl; 355 num_ds = 0; 356 357 /* 358 * Validate the operation type. For RC requests, we allow 359 * "Send", "RDMA Read", "RDMA Write", various "Atomic" 360 * operations, and memory window "Bind" 361 */ 362 switch (wr->wr_opcode) { 363 default: 364 status = IBT_QP_OP_TYPE_INVALID; 365 goto done; 366 367 case IBT_WRC_SEND: 368 if (wr->wr_flags & IBT_WR_SEND_IMMED) { 369 nopcode = HERMON_WQE_SEND_NOPCODE_SENDI; 370 immed_data = wr->wr.rc.rcwr.send_immed; 371 } else { 372 nopcode = HERMON_WQE_SEND_NOPCODE_SEND; 373 } 374 break; 375 376 /* 377 * If this is an RDMA Read or RDMA Write request, then fill 378 * in the "Remote Address" header fields. 379 */ 380 case IBT_WRC_RDMAW: 381 if (wr->wr_flags & IBT_WR_SEND_IMMED) { 382 nopcode = HERMON_WQE_SEND_NOPCODE_RDMAWI; 383 immed_data = wr->wr.rc.rcwr.rdma.rdma_immed; 384 } else { 385 nopcode = HERMON_WQE_SEND_NOPCODE_RDMAW; 386 } 387 /* FALLTHROUGH */ 388 case IBT_WRC_RDMAR: 389 if (wr->wr_opcode == IBT_WRC_RDMAR) 390 nopcode = HERMON_WQE_SEND_NOPCODE_RDMAR; 391 rc = (hermon_hw_snd_wqe_remaddr_t *)((uintptr_t)desc + 392 sizeof (hermon_hw_snd_wqe_ctrl_t)); 393 394 /* 395 * Build the Remote Address Segment for the WQE, using 396 * the information from the RC work request. 397 */ 398 HERMON_WQE_BUILD_REMADDR(qp, rc, &wr->wr.rc.rcwr.rdma); 399 400 /* Update "ds" for filling in Data Segments (below) */ 401 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)rc + 402 sizeof (hermon_hw_snd_wqe_remaddr_t)); 403 break; 404 405 /* 406 * If this is one of the Atomic type operations (i.e 407 * Compare-Swap or Fetch-Add), then fill in both the "Remote 408 * Address" header fields and the "Atomic" header fields. 409 */ 410 case IBT_WRC_CSWAP: 411 nopcode = HERMON_WQE_SEND_NOPCODE_ATMCS; 412 /* FALLTHROUGH */ 413 case IBT_WRC_FADD: 414 if (wr->wr_opcode == IBT_WRC_FADD) 415 nopcode = HERMON_WQE_SEND_NOPCODE_ATMFA; 416 rc = (hermon_hw_snd_wqe_remaddr_t *)((uintptr_t)desc + 417 sizeof (hermon_hw_snd_wqe_ctrl_t)); 418 at = (hermon_hw_snd_wqe_atomic_t *)((uintptr_t)rc + 419 sizeof (hermon_hw_snd_wqe_remaddr_t)); 420 421 /* 422 * Build the Remote Address and Atomic Segments for 423 * the WQE, using the information from the RC Atomic 424 * work request. 425 */ 426 HERMON_WQE_BUILD_RC_ATOMIC_REMADDR(qp, rc, wr); 427 HERMON_WQE_BUILD_ATOMIC(qp, at, wr->wr.rc.rcwr.atomic); 428 429 /* Update "ds" for filling in Data Segments (below) */ 430 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)at + 431 sizeof (hermon_hw_snd_wqe_atomic_t)); 432 433 /* 434 * Update "nds" and "sgl" because Atomic requests have 435 * only a single Data Segment. 436 */ 437 nds = 1; 438 sgl = wr->wr_sgl; 439 break; 440 441 /* 442 * If this is memory window Bind operation, then we call the 443 * hermon_wr_bind_check() routine to validate the request and 444 * to generate the updated RKey. If this is successful, then 445 * we fill in the WQE's "Bind" header fields. 446 */ 447 case IBT_WRC_BIND: 448 nopcode = HERMON_WQE_SEND_NOPCODE_BIND; 449 status = hermon_wr_bind_check(state, wr); 450 if (status != DDI_SUCCESS) 451 goto done; 452 453 bn = (hermon_hw_snd_wqe_bind_t *)((uintptr_t)desc + 454 sizeof (hermon_hw_snd_wqe_ctrl_t)); 455 456 /* 457 * Build the Bind Memory Window Segments for the WQE, 458 * using the information from the RC Bind memory 459 * window work request. 460 */ 461 HERMON_WQE_BUILD_BIND(qp, bn, wr->wr.rc.rcwr.bind); 462 463 /* 464 * Update the "ds" pointer. Even though the "bind" 465 * operation requires no SGLs, this is necessary to 466 * facilitate the correct descriptor size calculations 467 * (below). 468 */ 469 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)bn + 470 sizeof (hermon_hw_snd_wqe_bind_t)); 471 nds = 0; 472 } 473 474 /* 475 * Now fill in the Data Segments (SGL) for the Send WQE based 476 * on the values setup above (i.e. "sgl", "nds", and the "ds" 477 * pointer. Start by checking for a valid number of SGL entries 478 */ 479 if (nds > qp->qp_sq_sgl) { 480 status = IBT_QP_SGL_LEN_INVALID; 481 goto done; 482 } 483 484 for (last_ds = num_ds, i = 0; i < nds; i++) { 485 if (sgl[i].ds_len != 0) 486 last_ds++; /* real last ds of wqe to fill */ 487 } 488 desc_sz = ((uintptr_t)&ds[last_ds] - (uintptr_t)desc) >> 0x4; 489 for (i = nds; --i >= 0; ) { 490 if (sgl[i].ds_len == 0) { 491 continue; 492 } 493 494 /* 495 * Fill in the Data Segment(s) for the current WQE, using the 496 * information contained in the scatter-gather list of the 497 * work request. 498 */ 499 last_ds--; 500 HERMON_WQE_BUILD_DATA_SEG_SEND(&ds[last_ds], &sgl[i]); 501 } 502 503 fence = (wr->wr_flags & IBT_WR_SEND_FENCE) ? 1 : 0; 504 505 signaled_dbd = ((qp->qp_sq_sigtype == HERMON_QP_SQ_ALL_SIGNALED) || 506 (wr->wr_flags & IBT_WR_SEND_SIGNAL)) ? 1 : 0; 507 508 solicited = (wr->wr_flags & IBT_WR_SEND_SOLICIT) ? 1 : 0; 509 510 HERMON_WQE_SET_CTRL_SEGMENT(desc, desc_sz, fence, immed_data, solicited, 511 signaled_dbd, wr->wr_flags & IBT_WR_SEND_CKSUM, qp); 512 513 wq->wq_wrid[tail] = wr->wr_id; 514 515 tail = next_tail; 516 517 /* Update some of the state in the QP */ 518 wq->wq_tail = tail; 519 520 membar_producer(); 521 522 /* Now set the ownership bit of the first one in the chain. */ 523 HERMON_SET_SEND_WQE_OWNER(qp, (uint32_t *)desc, nopcode); 524 525 posted_cnt++; 526 if (--num_wr > 0) { 527 /* do the invalidate of the headroom */ 528 wqe_start = (uint32_t *)HERMON_QP_SQ_ENTRY(qp, 529 (tail + hdrmwqes) & qsize_msk); 530 for (i = 16; i < sectperwqe; i += 16) { 531 wqe_start[i] = 0xFFFFFFFF; 532 } 533 534 wr++; 535 goto post_next; 536 } 537 done: 538 539 if (posted_cnt != 0) { 540 ddi_acc_handle_t uarhdl = hermon_get_uarhdl(state); 541 542 membar_producer(); 543 544 /* the FMA retry loop starts for Hermon doorbell register. */ 545 hermon_pio_start(state, uarhdl, pio_error, fm_loop_cnt, 546 fm_status, fm_test_num); 547 548 /* Ring the doorbell */ 549 HERMON_UAR_DOORBELL(state, uarhdl, 550 (uint64_t *)(void *)&state->hs_uar->send, 551 (uint64_t)qp->qp_ring); 552 553 /* the FMA retry loop ends. */ 554 hermon_pio_end(state, uarhdl, pio_error, fm_loop_cnt, 555 fm_status, fm_test_num); 556 557 /* do the invalidate of the headroom */ 558 wqe_start = (uint32_t *)HERMON_QP_SQ_ENTRY(qp, 559 (tail + hdrmwqes) & qsize_msk); 560 for (i = 16; i < sectperwqe; i += 16) { 561 wqe_start[i] = 0xFFFFFFFF; 562 } 563 } 564 /* 565 * Update the "num_posted" return value (if necessary). 566 * Then drop the locks and return success. 567 */ 568 if (num_posted != NULL) { 569 *num_posted = posted_cnt; 570 } 571 572 mutex_exit(&qp->qp_sq_lock); 573 return (status); 574 575 pio_error: 576 mutex_exit(&qp->qp_sq_lock); 577 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 578 return (ibc_get_ci_failure(0)); 579 } 580 581 /* 582 * hermon_post_send() 583 * Context: Can be called from interrupt or base context. 584 */ 585 int 586 hermon_post_send(hermon_state_t *state, hermon_qphdl_t qp, 587 ibt_send_wr_t *wr, uint_t num_wr, uint_t *num_posted) 588 { 589 ibt_send_wr_t *curr_wr; 590 hermon_workq_hdr_t *wq; 591 hermon_ahhdl_t ah; 592 uint64_t *desc, *prev; 593 uint32_t desc_sz; 594 uint32_t signaled_dbd, solicited; 595 uint32_t head, tail, next_tail, qsize_msk; 596 uint32_t sync_from, sync_to; 597 uint32_t hdrmwqes; 598 uint_t currindx, wrindx, numremain; 599 uint_t chainlen; 600 uint_t posted_cnt, maxstat; 601 uint_t total_posted; 602 int status; 603 uint32_t nopcode, fence, immed_data = 0; 604 uint32_t prev_nopcode; 605 606 /* initialize the FMA retry loop */ 607 hermon_pio_init(fm_loop_cnt, fm_status, fm_test); 608 609 /* 610 * Check for user-mappable QP memory. Note: We do not allow kernel 611 * clients to post to QP memory that is accessible directly by the 612 * user. If the QP memory is user accessible, then return an error. 613 */ 614 if (qp->qp_is_umap) { 615 return (IBT_QP_HDL_INVALID); 616 } 617 618 mutex_enter(&qp->qp_lock); 619 620 /* 621 * Check QP state. Can not post Send requests from the "Reset", 622 * "Init", or "RTR" states 623 */ 624 if ((qp->qp_state == HERMON_QP_RESET) || 625 (qp->qp_state == HERMON_QP_INIT) || 626 (qp->qp_state == HERMON_QP_RTR)) { 627 mutex_exit(&qp->qp_lock); 628 return (IBT_QP_STATE_INVALID); 629 } 630 mutex_exit(&qp->qp_lock); 631 mutex_enter(&qp->qp_sq_lock); 632 633 if (qp->qp_is_special) 634 goto post_many; 635 636 /* Use these optimized functions most of the time */ 637 if (qp->qp_serv_type == HERMON_QP_UD) 638 return (hermon_post_send_ud(state, qp, wr, num_wr, num_posted)); 639 640 if (qp->qp_serv_type == HERMON_QP_RC) 641 return (hermon_post_send_rc(state, qp, wr, num_wr, num_posted)); 642 643 if (qp->qp_serv_type == HERMON_QP_UC) 644 goto post_many; 645 646 mutex_exit(&qp->qp_sq_lock); 647 return (IBT_QP_SRV_TYPE_INVALID); 648 649 post_many: 650 /* general loop for non-optimized posting */ 651 652 /* Grab the lock for the WRID list */ 653 membar_consumer(); 654 655 /* Save away some initial QP state */ 656 wq = qp->qp_sq_wqhdr; 657 qsize_msk = wq->wq_mask; 658 tail = wq->wq_tail; 659 head = wq->wq_head; 660 hdrmwqes = qp->qp_sq_hdrmwqes; /* in WQEs */ 661 662 /* Initialize posted_cnt */ 663 posted_cnt = 0; 664 total_posted = 0; 665 666 /* 667 * For each ibt_send_wr_t in the wr[] list passed in, parse the 668 * request and build a Send WQE. NOTE: Because we are potentially 669 * building a chain of WQEs to post, we want to build them all first, 670 * and set the valid (HW Ownership) bit on all but the first. 671 * However, we do not want to validate the first one until the 672 * entire chain of WQEs has been built. Then in the final 673 * we set the valid bit in the first, flush if needed, and as a last 674 * step ring the appropriate doorbell. NOTE: the doorbell ring may 675 * NOT be needed if the HCA is already processing, but the doorbell 676 * ring will be done regardless. NOTE ALSO: It is possible for 677 * more Work Requests to be posted than the HW will support at one 678 * shot. If this happens, we need to be able to post and ring 679 * several chains here until the the entire request is complete. 680 * NOTE ALSO: the term "chain" is used to differentiate it from 681 * Work Request List passed in; and because that's the terminology 682 * from the previous generations of HCA - but the WQEs are not, in fact 683 * chained together for Hermon 684 */ 685 686 wrindx = 0; 687 numremain = num_wr; 688 status = DDI_SUCCESS; 689 while ((wrindx < num_wr) && (status == DDI_SUCCESS)) { 690 /* 691 * For the first WQE on a new chain we need "prev" to point 692 * to the current descriptor. 693 */ 694 prev = HERMON_QP_SQ_ENTRY(qp, tail); 695 696 /* 697 * unlike Tavor & Arbel, tail will maintain the number of the 698 * next (this) WQE to be posted. Since there is no backward linking 699 * in Hermon, we can always just look ahead 700 */ 701 /* 702 * Before we begin, save the current "tail index" for later 703 * DMA sync 704 */ 705 /* NOTE: don't need to go back one like arbel/tavor */ 706 sync_from = tail; 707 708 /* 709 * Break the request up into lists that are less than or 710 * equal to the maximum number of WQEs that can be posted 711 * per doorbell ring - 256 currently 712 */ 713 chainlen = (numremain > HERMON_QP_MAXDESC_PER_DB) ? 714 HERMON_QP_MAXDESC_PER_DB : numremain; 715 numremain -= chainlen; 716 717 for (currindx = 0; currindx < chainlen; currindx++, wrindx++) { 718 /* 719 * Check for "queue full" condition. If the queue 720 * is already full, then no more WQEs can be posted. 721 * So break out, ring a doorbell (if necessary) and 722 * return an error 723 */ 724 if (wq->wq_full != 0) { 725 status = IBT_QP_FULL; 726 break; 727 } 728 729 /* 730 * Increment the "tail index". Check for "queue 731 * full" condition incl. headroom. If we detect that 732 * the current work request is going to fill the work 733 * queue, then we mark this condition and continue. 734 * Don't need >=, because going one-by-one we have to 735 * hit it exactly sooner or later 736 */ 737 738 next_tail = (tail + 1) & qsize_msk; 739 if (((tail + hdrmwqes) & qsize_msk) == head) { 740 wq->wq_full = 1; 741 } 742 743 /* 744 * Get the address of the location where the next 745 * Send WQE should be built 746 */ 747 desc = HERMON_QP_SQ_ENTRY(qp, tail); 748 /* 749 * Call hermon_wqe_send_build() to build the WQE 750 * at the given address. This routine uses the 751 * information in the ibt_send_wr_t list (wr[]) and 752 * returns the size of the WQE when it returns. 753 */ 754 status = hermon_wqe_send_build(state, qp, 755 &wr[wrindx], desc, &desc_sz); 756 if (status != DDI_SUCCESS) { 757 break; 758 } 759 760 /* 761 * Now, build the Ctrl Segment based on 762 * what was just done 763 */ 764 curr_wr = &wr[wrindx]; 765 766 switch (curr_wr->wr_opcode) { 767 case IBT_WRC_RDMAW: 768 if (curr_wr->wr_flags & IBT_WR_SEND_IMMED) { 769 nopcode = 770 HERMON_WQE_SEND_NOPCODE_RDMAWI; 771 immed_data = 772 hermon_wr_get_immediate(curr_wr); 773 } else { 774 nopcode = HERMON_WQE_SEND_NOPCODE_RDMAW; 775 } 776 break; 777 778 case IBT_WRC_SEND: 779 if (curr_wr->wr_flags & IBT_WR_SEND_IMMED) { 780 nopcode = HERMON_WQE_SEND_NOPCODE_SENDI; 781 immed_data = 782 hermon_wr_get_immediate(curr_wr); 783 } else { 784 nopcode = HERMON_WQE_SEND_NOPCODE_SEND; 785 } 786 break; 787 788 case IBT_WRC_SEND_LSO: 789 nopcode = HERMON_WQE_SEND_NOPCODE_LSO; 790 break; 791 792 case IBT_WRC_RDMAR: 793 nopcode = HERMON_WQE_SEND_NOPCODE_RDMAR; 794 break; 795 796 case IBT_WRC_CSWAP: 797 nopcode = HERMON_WQE_SEND_NOPCODE_ATMCS; 798 break; 799 800 case IBT_WRC_FADD: 801 nopcode = HERMON_WQE_SEND_NOPCODE_ATMFA; 802 break; 803 804 case IBT_WRC_BIND: 805 nopcode = HERMON_WQE_SEND_NOPCODE_BIND; 806 break; 807 } 808 809 fence = (curr_wr->wr_flags & IBT_WR_SEND_FENCE) ? 1 : 0; 810 811 /* 812 * now, build up the control segment, leaving the 813 * owner bit as it is 814 */ 815 816 if ((qp->qp_sq_sigtype == HERMON_QP_SQ_ALL_SIGNALED) || 817 (curr_wr->wr_flags & IBT_WR_SEND_SIGNAL)) { 818 signaled_dbd = 1; 819 } else { 820 signaled_dbd = 0; 821 } 822 if (curr_wr->wr_flags & IBT_WR_SEND_SOLICIT) 823 solicited = 1; 824 else 825 solicited = 0; 826 827 if (qp->qp_is_special) { 828 /* Ensure correctness, set the ReRead bit */ 829 nopcode |= (1 << 6); 830 ah = (hermon_ahhdl_t) 831 curr_wr->wr.ud.udwr_dest->ud_ah; 832 mutex_enter(&ah->ah_lock); 833 maxstat = ah->ah_udav->max_stat_rate; 834 HERMON_WQE_SET_MLX_CTRL_SEGMENT(desc, desc_sz, 835 signaled_dbd, maxstat, ah->ah_udav->rlid, 836 qp, ah->ah_udav->sl); 837 mutex_exit(&ah->ah_lock); 838 } else { 839 HERMON_WQE_SET_CTRL_SEGMENT(desc, desc_sz, 840 fence, immed_data, solicited, 841 signaled_dbd, curr_wr->wr_flags & 842 IBT_WR_SEND_CKSUM, qp); 843 } 844 wq->wq_wrid[tail] = curr_wr->wr_id; 845 846 /* 847 * If this is not the first descriptor on the current 848 * chain, then set the ownership bit. 849 */ 850 if (currindx != 0) { /* not the first */ 851 membar_producer(); 852 HERMON_SET_SEND_WQE_OWNER(qp, 853 (uint32_t *)desc, nopcode); 854 } else 855 prev_nopcode = nopcode; 856 857 /* 858 * Update the current "tail index" and increment 859 * "posted_cnt" 860 */ 861 tail = next_tail; 862 posted_cnt++; 863 } 864 865 /* 866 * If we reach here and there are one or more WQEs which have 867 * been successfully built as a chain, we have to finish up 868 * and prepare them for writing to the HW 869 * The steps are: 870 * 1. do the headroom fixup 871 * 2. add in the size of the headroom for the sync 872 * 3. write the owner bit for the first WQE 873 * 4. sync them 874 * 5. fix up the structures 875 * 6. hit the doorbell in UAR 876 */ 877 if (posted_cnt != 0) { 878 ddi_acc_handle_t uarhdl = hermon_get_uarhdl(state); 879 880 /* 881 * Save away updated "tail index" for the DMA sync 882 * including the headroom that will be needed 883 */ 884 sync_to = (tail + hdrmwqes) & qsize_msk; 885 886 /* do the invalidate of the headroom */ 887 888 hermon_wqe_headroom(tail, qp); 889 890 /* Do a DMA sync for current send WQE(s) */ 891 hermon_wqe_sync(qp, sync_from, sync_to, HERMON_WR_SEND, 892 DDI_DMA_SYNC_FORDEV); 893 894 /* Update some of the state in the QP */ 895 wq->wq_tail = tail; 896 total_posted += posted_cnt; 897 posted_cnt = 0; 898 899 membar_producer(); 900 901 /* 902 * Now set the ownership bit of the first 903 * one in the chain 904 */ 905 HERMON_SET_SEND_WQE_OWNER(qp, (uint32_t *)prev, 906 prev_nopcode); 907 908 /* the FMA retry loop starts for Hermon doorbell. */ 909 hermon_pio_start(state, uarhdl, pio_error, fm_loop_cnt, 910 fm_status, fm_test); 911 912 HERMON_UAR_DOORBELL(state, uarhdl, 913 (uint64_t *)(void *)&state->hs_uar->send, 914 (uint64_t)qp->qp_ring); 915 916 /* the FMA retry loop ends. */ 917 hermon_pio_end(state, uarhdl, pio_error, fm_loop_cnt, 918 fm_status, fm_test); 919 } 920 } 921 922 /* 923 * Update the "num_posted" return value (if necessary). 924 * Then drop the locks and return success. 925 */ 926 if (num_posted != NULL) { 927 *num_posted = total_posted; 928 } 929 mutex_exit(&qp->qp_sq_lock); 930 return (status); 931 932 pio_error: 933 mutex_exit(&qp->qp_sq_lock); 934 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 935 return (ibc_get_ci_failure(0)); 936 } 937 938 939 /* 940 * hermon_post_recv() 941 * Context: Can be called from interrupt or base context. 942 */ 943 int 944 hermon_post_recv(hermon_state_t *state, hermon_qphdl_t qp, 945 ibt_recv_wr_t *wr, uint_t num_wr, uint_t *num_posted) 946 { 947 uint64_t *desc; 948 hermon_workq_hdr_t *wq; 949 uint32_t head, tail, next_tail, qsize_msk; 950 uint32_t sync_from, sync_to; 951 uint_t wrindx; 952 uint_t posted_cnt; 953 int status; 954 955 /* 956 * Check for user-mappable QP memory. Note: We do not allow kernel 957 * clients to post to QP memory that is accessible directly by the 958 * user. If the QP memory is user accessible, then return an error. 959 */ 960 if (qp->qp_is_umap) { 961 return (IBT_QP_HDL_INVALID); 962 } 963 964 /* Initialize posted_cnt */ 965 posted_cnt = 0; 966 967 mutex_enter(&qp->qp_lock); 968 969 /* 970 * Check if QP is associated with an SRQ 971 */ 972 if (qp->qp_srq_en == HERMON_QP_SRQ_ENABLED) { 973 mutex_exit(&qp->qp_lock); 974 return (IBT_SRQ_IN_USE); 975 } 976 977 /* 978 * Check QP state. Can not post Recv requests from the "Reset" state 979 */ 980 if (qp->qp_state == HERMON_QP_RESET) { 981 mutex_exit(&qp->qp_lock); 982 return (IBT_QP_STATE_INVALID); 983 } 984 985 /* Check that work request transport type is valid */ 986 if ((qp->qp_serv_type != HERMON_QP_UD) && 987 (qp->qp_serv_type != HERMON_QP_RC) && 988 (qp->qp_serv_type != HERMON_QP_UC)) { 989 mutex_exit(&qp->qp_lock); 990 return (IBT_QP_SRV_TYPE_INVALID); 991 } 992 993 mutex_exit(&qp->qp_lock); 994 mutex_enter(&qp->qp_rq_lock); 995 996 /* 997 * Grab the lock for the WRID list, i.e., membar_consumer(). 998 * This is not needed because the mutex_enter() above has 999 * the same effect. 1000 */ 1001 1002 /* Save away some initial QP state */ 1003 wq = qp->qp_rq_wqhdr; 1004 qsize_msk = wq->wq_mask; 1005 tail = wq->wq_tail; 1006 head = wq->wq_head; 1007 1008 wrindx = 0; 1009 status = DDI_SUCCESS; 1010 /* 1011 * Before we begin, save the current "tail index" for later 1012 * DMA sync 1013 */ 1014 sync_from = tail; 1015 1016 for (wrindx = 0; wrindx < num_wr; wrindx++) { 1017 if (wq->wq_full != 0) { 1018 status = IBT_QP_FULL; 1019 break; 1020 } 1021 next_tail = (tail + 1) & qsize_msk; 1022 if (next_tail == head) { 1023 wq->wq_full = 1; 1024 } 1025 desc = HERMON_QP_RQ_ENTRY(qp, tail); 1026 status = hermon_wqe_recv_build(state, qp, &wr[wrindx], desc); 1027 if (status != DDI_SUCCESS) { 1028 break; 1029 } 1030 1031 wq->wq_wrid[tail] = wr[wrindx].wr_id; 1032 qp->qp_rq_wqecntr++; 1033 1034 tail = next_tail; 1035 posted_cnt++; 1036 } 1037 1038 if (posted_cnt != 0) { 1039 /* Save away updated "tail index" for the DMA sync */ 1040 sync_to = tail; 1041 1042 hermon_wqe_sync(qp, sync_from, sync_to, HERMON_WR_RECV, 1043 DDI_DMA_SYNC_FORDEV); 1044 1045 wq->wq_tail = tail; 1046 1047 membar_producer(); /* ensure wrids are visible */ 1048 1049 /* Update the doorbell record w/ wqecntr */ 1050 HERMON_UAR_DB_RECORD_WRITE(qp->qp_rq_vdbr, 1051 qp->qp_rq_wqecntr & 0xFFFF); 1052 } 1053 1054 if (num_posted != NULL) { 1055 *num_posted = posted_cnt; 1056 } 1057 1058 1059 mutex_exit(&qp->qp_rq_lock); 1060 return (status); 1061 } 1062 1063 /* 1064 * hermon_post_srq() 1065 * Context: Can be called from interrupt or base context. 1066 */ 1067 int 1068 hermon_post_srq(hermon_state_t *state, hermon_srqhdl_t srq, 1069 ibt_recv_wr_t *wr, uint_t num_wr, uint_t *num_posted) 1070 { 1071 uint64_t *desc; 1072 hermon_workq_hdr_t *wq; 1073 uint_t indx, wrindx; 1074 uint_t posted_cnt; 1075 int status; 1076 1077 mutex_enter(&srq->srq_lock); 1078 1079 /* 1080 * Check for user-mappable QP memory. Note: We do not allow kernel 1081 * clients to post to QP memory that is accessible directly by the 1082 * user. If the QP memory is user accessible, then return an error. 1083 */ 1084 if (srq->srq_is_umap) { 1085 mutex_exit(&srq->srq_lock); 1086 return (IBT_SRQ_HDL_INVALID); 1087 } 1088 1089 /* 1090 * Check SRQ state. Can not post Recv requests when SRQ is in error 1091 */ 1092 if (srq->srq_state == HERMON_SRQ_STATE_ERROR) { 1093 mutex_exit(&srq->srq_lock); 1094 return (IBT_QP_STATE_INVALID); 1095 } 1096 1097 status = DDI_SUCCESS; 1098 posted_cnt = 0; 1099 wq = srq->srq_wq_wqhdr; 1100 indx = wq->wq_head; 1101 1102 for (wrindx = 0; wrindx < num_wr; wrindx++) { 1103 1104 if (indx == wq->wq_tail) { 1105 status = IBT_QP_FULL; 1106 break; 1107 } 1108 desc = HERMON_SRQ_WQE_ADDR(srq, indx); 1109 1110 wq->wq_wrid[indx] = wr[wrindx].wr_id; 1111 1112 status = hermon_wqe_srq_build(state, srq, &wr[wrindx], desc); 1113 if (status != DDI_SUCCESS) { 1114 break; 1115 } 1116 1117 hermon_wqe_sync(srq, indx, indx + 1, 1118 HERMON_WR_SRQ, DDI_DMA_SYNC_FORDEV); 1119 posted_cnt++; 1120 indx = htons(((uint16_t *)desc)[1]); 1121 wq->wq_head = indx; 1122 } 1123 1124 if (posted_cnt != 0) { 1125 1126 srq->srq_wq_wqecntr += posted_cnt; 1127 1128 membar_producer(); /* ensure wrids are visible */ 1129 1130 /* Ring the doorbell w/ wqecntr */ 1131 HERMON_UAR_DB_RECORD_WRITE(srq->srq_wq_vdbr, 1132 srq->srq_wq_wqecntr & 0xFFFF); 1133 } 1134 1135 if (num_posted != NULL) { 1136 *num_posted = posted_cnt; 1137 } 1138 1139 mutex_exit(&srq->srq_lock); 1140 return (status); 1141 } 1142 1143 1144 /* 1145 * hermon_wqe_send_build() 1146 * Context: Can be called from interrupt or base context. 1147 */ 1148 static int 1149 hermon_wqe_send_build(hermon_state_t *state, hermon_qphdl_t qp, 1150 ibt_send_wr_t *wr, uint64_t *desc, uint_t *size) 1151 { 1152 hermon_hw_snd_wqe_ud_t *ud; 1153 hermon_hw_snd_wqe_remaddr_t *rc; 1154 hermon_hw_snd_wqe_atomic_t *at; 1155 hermon_hw_snd_wqe_remaddr_t *uc; 1156 hermon_hw_snd_wqe_bind_t *bn; 1157 hermon_hw_wqe_sgl_t *ds, *old_ds; 1158 ibt_ud_dest_t *dest; 1159 ibt_wr_ds_t *sgl; 1160 hermon_ahhdl_t ah; 1161 uint32_t nds; 1162 int i, j, last_ds, num_ds, status; 1163 int tmpsize; 1164 1165 ASSERT(MUTEX_HELD(&qp->qp_sq_lock)); 1166 1167 /* Initialize the information for the Data Segments */ 1168 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)desc + 1169 sizeof (hermon_hw_snd_wqe_ctrl_t)); 1170 nds = wr->wr_nds; 1171 sgl = wr->wr_sgl; 1172 num_ds = 0; 1173 i = 0; 1174 1175 /* 1176 * Build a Send WQE depends first and foremost on the transport 1177 * type of Work Request (i.e. UD, RC, or UC) 1178 */ 1179 switch (wr->wr_trans) { 1180 case IBT_UD_SRV: 1181 /* Ensure that work request transport type matches QP type */ 1182 if (qp->qp_serv_type != HERMON_QP_UD) { 1183 return (IBT_QP_SRV_TYPE_INVALID); 1184 } 1185 1186 /* 1187 * Validate the operation type. For UD requests, only the 1188 * "Send" and "Send LSO" operations are valid. 1189 */ 1190 if (wr->wr_opcode != IBT_WRC_SEND && 1191 wr->wr_opcode != IBT_WRC_SEND_LSO) { 1192 return (IBT_QP_OP_TYPE_INVALID); 1193 } 1194 1195 /* 1196 * If this is a Special QP (QP0 or QP1), then we need to 1197 * build MLX WQEs instead. So jump to hermon_wqe_mlx_build() 1198 * and return whatever status it returns 1199 */ 1200 if (qp->qp_is_special) { 1201 if (wr->wr_opcode == IBT_WRC_SEND_LSO) { 1202 return (IBT_QP_OP_TYPE_INVALID); 1203 } 1204 status = hermon_wqe_mlx_build(state, qp, 1205 wr, desc, size); 1206 return (status); 1207 } 1208 1209 /* 1210 * Otherwise, if this is a normal UD Send request, then fill 1211 * all the fields in the Hermon UD header for the WQE. Note: 1212 * to do this we'll need to extract some information from the 1213 * Address Handle passed with the work request. 1214 */ 1215 ud = (hermon_hw_snd_wqe_ud_t *)((uintptr_t)desc + 1216 sizeof (hermon_hw_snd_wqe_ctrl_t)); 1217 if (wr->wr_opcode == IBT_WRC_SEND) { 1218 dest = wr->wr.ud.udwr_dest; 1219 } else { 1220 dest = wr->wr.ud_lso.lso_ud_dest; 1221 } 1222 ah = (hermon_ahhdl_t)dest->ud_ah; 1223 if (ah == NULL) { 1224 return (IBT_AH_HDL_INVALID); 1225 } 1226 1227 /* 1228 * Build the Unreliable Datagram Segment for the WQE, using 1229 * the information from the address handle and the work 1230 * request. 1231 */ 1232 /* mutex_enter(&ah->ah_lock); */ 1233 if (wr->wr_opcode == IBT_WRC_SEND) { 1234 HERMON_WQE_BUILD_UD(qp, ud, ah, wr->wr.ud.udwr_dest); 1235 } else { /* IBT_WRC_SEND_LSO */ 1236 HERMON_WQE_BUILD_UD(qp, ud, ah, 1237 wr->wr.ud_lso.lso_ud_dest); 1238 } 1239 /* mutex_exit(&ah->ah_lock); */ 1240 1241 /* Update "ds" for filling in Data Segments (below) */ 1242 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)ud + 1243 sizeof (hermon_hw_snd_wqe_ud_t)); 1244 1245 if (wr->wr_opcode == IBT_WRC_SEND_LSO) { 1246 int total_len; 1247 1248 total_len = (4 + 0xf + wr->wr.ud_lso.lso_hdr_sz) & ~0xf; 1249 if ((uintptr_t)ds + total_len + (nds * 16) > 1250 (uintptr_t)desc + (1 << qp->qp_sq_log_wqesz)) 1251 return (IBT_QP_SGL_LEN_INVALID); 1252 1253 bcopy(wr->wr.ud_lso.lso_hdr, (uint32_t *)ds + 1, 1254 wr->wr.ud_lso.lso_hdr_sz); 1255 old_ds = ds; 1256 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)ds + total_len); 1257 for (; i < nds; i++) { 1258 if (sgl[i].ds_len == 0) 1259 continue; 1260 HERMON_WQE_BUILD_DATA_SEG_SEND(&ds[num_ds], 1261 &sgl[i]); 1262 num_ds++; 1263 i++; 1264 break; 1265 } 1266 membar_producer(); 1267 HERMON_WQE_BUILD_LSO(qp, old_ds, wr->wr.ud_lso.lso_mss, 1268 wr->wr.ud_lso.lso_hdr_sz); 1269 } 1270 1271 break; 1272 1273 case IBT_RC_SRV: 1274 /* Ensure that work request transport type matches QP type */ 1275 if (qp->qp_serv_type != HERMON_QP_RC) { 1276 return (IBT_QP_SRV_TYPE_INVALID); 1277 } 1278 1279 /* 1280 * Validate the operation type. For RC requests, we allow 1281 * "Send", "RDMA Read", "RDMA Write", various "Atomic" 1282 * operations, and memory window "Bind" 1283 */ 1284 if ((wr->wr_opcode != IBT_WRC_SEND) && 1285 (wr->wr_opcode != IBT_WRC_RDMAR) && 1286 (wr->wr_opcode != IBT_WRC_RDMAW) && 1287 (wr->wr_opcode != IBT_WRC_CSWAP) && 1288 (wr->wr_opcode != IBT_WRC_FADD) && 1289 (wr->wr_opcode != IBT_WRC_BIND)) { 1290 return (IBT_QP_OP_TYPE_INVALID); 1291 } 1292 1293 /* 1294 * If this is a Send request, then all we need to do is break 1295 * out and here and begin the Data Segment processing below 1296 */ 1297 if (wr->wr_opcode == IBT_WRC_SEND) { 1298 break; 1299 } 1300 1301 /* 1302 * If this is an RDMA Read or RDMA Write request, then fill 1303 * in the "Remote Address" header fields. 1304 */ 1305 if ((wr->wr_opcode == IBT_WRC_RDMAR) || 1306 (wr->wr_opcode == IBT_WRC_RDMAW)) { 1307 rc = (hermon_hw_snd_wqe_remaddr_t *)((uintptr_t)desc + 1308 sizeof (hermon_hw_snd_wqe_ctrl_t)); 1309 1310 /* 1311 * Build the Remote Address Segment for the WQE, using 1312 * the information from the RC work request. 1313 */ 1314 HERMON_WQE_BUILD_REMADDR(qp, rc, &wr->wr.rc.rcwr.rdma); 1315 1316 /* Update "ds" for filling in Data Segments (below) */ 1317 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)rc + 1318 sizeof (hermon_hw_snd_wqe_remaddr_t)); 1319 break; 1320 } 1321 1322 /* 1323 * If this is one of the Atomic type operations (i.e 1324 * Compare-Swap or Fetch-Add), then fill in both the "Remote 1325 * Address" header fields and the "Atomic" header fields. 1326 */ 1327 if ((wr->wr_opcode == IBT_WRC_CSWAP) || 1328 (wr->wr_opcode == IBT_WRC_FADD)) { 1329 rc = (hermon_hw_snd_wqe_remaddr_t *)((uintptr_t)desc + 1330 sizeof (hermon_hw_snd_wqe_ctrl_t)); 1331 at = (hermon_hw_snd_wqe_atomic_t *)((uintptr_t)rc + 1332 sizeof (hermon_hw_snd_wqe_remaddr_t)); 1333 1334 /* 1335 * Build the Remote Address and Atomic Segments for 1336 * the WQE, using the information from the RC Atomic 1337 * work request. 1338 */ 1339 HERMON_WQE_BUILD_RC_ATOMIC_REMADDR(qp, rc, wr); 1340 HERMON_WQE_BUILD_ATOMIC(qp, at, wr->wr.rc.rcwr.atomic); 1341 1342 /* Update "ds" for filling in Data Segments (below) */ 1343 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)at + 1344 sizeof (hermon_hw_snd_wqe_atomic_t)); 1345 1346 /* 1347 * Update "nds" and "sgl" because Atomic requests have 1348 * only a single Data Segment (and they are encoded 1349 * somewhat differently in the work request. 1350 */ 1351 nds = 1; 1352 sgl = wr->wr_sgl; 1353 break; 1354 } 1355 1356 /* 1357 * If this is memory window Bind operation, then we call the 1358 * hermon_wr_bind_check() routine to validate the request and 1359 * to generate the updated RKey. If this is successful, then 1360 * we fill in the WQE's "Bind" header fields. 1361 */ 1362 if (wr->wr_opcode == IBT_WRC_BIND) { 1363 status = hermon_wr_bind_check(state, wr); 1364 if (status != DDI_SUCCESS) { 1365 return (status); 1366 } 1367 1368 bn = (hermon_hw_snd_wqe_bind_t *)((uintptr_t)desc + 1369 sizeof (hermon_hw_snd_wqe_ctrl_t)); 1370 1371 /* 1372 * Build the Bind Memory Window Segments for the WQE, 1373 * using the information from the RC Bind memory 1374 * window work request. 1375 */ 1376 HERMON_WQE_BUILD_BIND(qp, bn, wr->wr.rc.rcwr.bind); 1377 1378 /* 1379 * Update the "ds" pointer. Even though the "bind" 1380 * operation requires no SGLs, this is necessary to 1381 * facilitate the correct descriptor size calculations 1382 * (below). 1383 */ 1384 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)bn + 1385 sizeof (hermon_hw_snd_wqe_bind_t)); 1386 nds = 0; 1387 } 1388 break; 1389 1390 case IBT_UC_SRV: 1391 /* Ensure that work request transport type matches QP type */ 1392 if (qp->qp_serv_type != HERMON_QP_UC) { 1393 return (IBT_QP_SRV_TYPE_INVALID); 1394 } 1395 1396 /* 1397 * Validate the operation type. For UC requests, we only 1398 * allow "Send", "RDMA Write", and memory window "Bind". 1399 * Note: Unlike RC, UC does not allow "RDMA Read" or "Atomic" 1400 * operations 1401 */ 1402 if ((wr->wr_opcode != IBT_WRC_SEND) && 1403 (wr->wr_opcode != IBT_WRC_RDMAW) && 1404 (wr->wr_opcode != IBT_WRC_BIND)) { 1405 return (IBT_QP_OP_TYPE_INVALID); 1406 } 1407 1408 /* 1409 * If this is a Send request, then all we need to do is break 1410 * out and here and begin the Data Segment processing below 1411 */ 1412 if (wr->wr_opcode == IBT_WRC_SEND) { 1413 break; 1414 } 1415 1416 /* 1417 * If this is an RDMA Write request, then fill in the "Remote 1418 * Address" header fields. 1419 */ 1420 if (wr->wr_opcode == IBT_WRC_RDMAW) { 1421 uc = (hermon_hw_snd_wqe_remaddr_t *)((uintptr_t)desc + 1422 sizeof (hermon_hw_snd_wqe_ctrl_t)); 1423 1424 /* 1425 * Build the Remote Address Segment for the WQE, using 1426 * the information from the UC work request. 1427 */ 1428 HERMON_WQE_BUILD_REMADDR(qp, uc, &wr->wr.uc.ucwr.rdma); 1429 1430 /* Update "ds" for filling in Data Segments (below) */ 1431 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)uc + 1432 sizeof (hermon_hw_snd_wqe_remaddr_t)); 1433 break; 1434 } 1435 1436 /* 1437 * If this is memory window Bind operation, then we call the 1438 * hermon_wr_bind_check() routine to validate the request and 1439 * to generate the updated RKey. If this is successful, then 1440 * we fill in the WQE's "Bind" header fields. 1441 */ 1442 if (wr->wr_opcode == IBT_WRC_BIND) { 1443 status = hermon_wr_bind_check(state, wr); 1444 if (status != DDI_SUCCESS) { 1445 return (status); 1446 } 1447 1448 bn = (hermon_hw_snd_wqe_bind_t *)((uintptr_t)desc + 1449 sizeof (hermon_hw_snd_wqe_ctrl_t)); 1450 1451 /* 1452 * Build the Bind Memory Window Segments for the WQE, 1453 * using the information from the UC Bind memory 1454 * window work request. 1455 */ 1456 HERMON_WQE_BUILD_BIND(qp, bn, wr->wr.uc.ucwr.bind); 1457 1458 /* 1459 * Update the "ds" pointer. Even though the "bind" 1460 * operation requires no SGLs, this is necessary to 1461 * facilitate the correct descriptor size calculations 1462 * (below). 1463 */ 1464 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)bn + 1465 sizeof (hermon_hw_snd_wqe_bind_t)); 1466 nds = 0; 1467 } 1468 break; 1469 1470 default: 1471 return (IBT_QP_SRV_TYPE_INVALID); 1472 } 1473 1474 /* 1475 * Now fill in the Data Segments (SGL) for the Send WQE based on 1476 * the values setup above (i.e. "sgl", "nds", and the "ds" pointer 1477 * Start by checking for a valid number of SGL entries 1478 */ 1479 if (nds > qp->qp_sq_sgl) { 1480 return (IBT_QP_SGL_LEN_INVALID); 1481 } 1482 1483 /* 1484 * For each SGL in the Send Work Request, fill in the Send WQE's data 1485 * segments. Note: We skip any SGL with zero size because Hermon 1486 * hardware cannot handle a zero for "byte_cnt" in the WQE. Actually 1487 * the encoding for zero means a 2GB transfer. 1488 */ 1489 for (last_ds = num_ds, j = i; j < nds; j++) { 1490 if (sgl[j].ds_len != 0) 1491 last_ds++; /* real last ds of wqe to fill */ 1492 } 1493 1494 /* 1495 * Return the size of descriptor (in 16-byte chunks) 1496 * For Hermon, we want them (for now) to be on stride size 1497 * boundaries, which was implicit in Tavor/Arbel 1498 * 1499 */ 1500 tmpsize = ((uintptr_t)&ds[last_ds] - (uintptr_t)desc); 1501 1502 *size = tmpsize >> 0x4; 1503 1504 for (j = nds; --j >= i; ) { 1505 if (sgl[j].ds_len == 0) { 1506 continue; 1507 } 1508 1509 /* 1510 * Fill in the Data Segment(s) for the current WQE, using the 1511 * information contained in the scatter-gather list of the 1512 * work request. 1513 */ 1514 last_ds--; 1515 HERMON_WQE_BUILD_DATA_SEG_SEND(&ds[last_ds], &sgl[j]); 1516 } 1517 1518 return (DDI_SUCCESS); 1519 } 1520 1521 1522 1523 /* 1524 * hermon_wqe_mlx_build() 1525 * Context: Can be called from interrupt or base context. 1526 */ 1527 static int 1528 hermon_wqe_mlx_build(hermon_state_t *state, hermon_qphdl_t qp, 1529 ibt_send_wr_t *wr, uint64_t *desc, uint_t *size) 1530 { 1531 hermon_ahhdl_t ah; 1532 hermon_hw_udav_t *udav; 1533 ib_lrh_hdr_t *lrh; 1534 ib_grh_t *grh; 1535 ib_bth_hdr_t *bth; 1536 ib_deth_hdr_t *deth; 1537 hermon_hw_wqe_sgl_t *ds; 1538 ibt_wr_ds_t *sgl; 1539 uint8_t *mgmtclass, *hpoint, *hcount; 1540 uint32_t nds, offset, pktlen; 1541 uint32_t desc_sz; 1542 int i, num_ds; 1543 int tmpsize; 1544 1545 ASSERT(MUTEX_HELD(&qp->qp_sq_lock)); 1546 1547 /* Initialize the information for the Data Segments */ 1548 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)desc + 1549 sizeof (hermon_hw_mlx_wqe_nextctrl_t)); 1550 1551 /* 1552 * Pull the address handle from the work request. The UDAV will 1553 * be used to answer some questions about the request. 1554 */ 1555 ah = (hermon_ahhdl_t)wr->wr.ud.udwr_dest->ud_ah; 1556 if (ah == NULL) { 1557 return (IBT_AH_HDL_INVALID); 1558 } 1559 mutex_enter(&ah->ah_lock); 1560 udav = ah->ah_udav; 1561 1562 /* 1563 * If the request is for QP1 and the destination LID is equal to 1564 * the Permissive LID, then return an error. This combination is 1565 * not allowed 1566 */ 1567 if ((udav->rlid == IB_LID_PERMISSIVE) && 1568 (qp->qp_is_special == HERMON_QP_GSI)) { 1569 mutex_exit(&ah->ah_lock); 1570 return (IBT_AH_HDL_INVALID); 1571 } 1572 1573 /* 1574 * Calculate the size of the packet headers, including the GRH 1575 * (if necessary) 1576 */ 1577 desc_sz = sizeof (ib_lrh_hdr_t) + sizeof (ib_bth_hdr_t) + 1578 sizeof (ib_deth_hdr_t); 1579 if (udav->grh) { 1580 desc_sz += sizeof (ib_grh_t); 1581 } 1582 1583 /* 1584 * Begin to build the first "inline" data segment for the packet 1585 * headers. Note: By specifying "inline" we can build the contents 1586 * of the MAD packet headers directly into the work queue (as part 1587 * descriptor). This has the advantage of both speeding things up 1588 * and of not requiring the driver to allocate/register any additional 1589 * memory for the packet headers. 1590 */ 1591 HERMON_WQE_BUILD_INLINE(qp, &ds[0], desc_sz); 1592 desc_sz += 4; 1593 1594 /* 1595 * Build Local Route Header (LRH) 1596 * We start here by building the LRH into a temporary location. 1597 * When we have finished we copy the LRH data into the descriptor. 1598 * 1599 * Notice that the VL values are hardcoded. This is not a problem 1600 * because VL15 is decided later based on the value in the MLX 1601 * transport "next/ctrl" header (see the "vl15" bit below), and it 1602 * is otherwise (meaning for QP1) chosen from the SL-to-VL table 1603 * values. This rule does not hold for loopback packets however 1604 * (all of which bypass the SL-to-VL tables) and it is the reason 1605 * that non-QP0 MADs are setup with VL hardcoded to zero below. 1606 * 1607 * Notice also that Source LID is hardcoded to the Permissive LID 1608 * (0xFFFF). This is also not a problem because if the Destination 1609 * LID is not the Permissive LID, then the "slr" value in the MLX 1610 * transport "next/ctrl" header will be set to zero and the hardware 1611 * will pull the LID from value in the port. 1612 */ 1613 lrh = (ib_lrh_hdr_t *)((uintptr_t)&ds[0] + 4); 1614 pktlen = (desc_sz + 0x100) >> 2; 1615 HERMON_WQE_BUILD_MLX_LRH(lrh, qp, udav, pktlen); 1616 1617 /* 1618 * Build Global Route Header (GRH) 1619 * This is only built if necessary as defined by the "grh" bit in 1620 * the address vector. Note: We also calculate the offset to the 1621 * next header (BTH) based on whether or not the "grh" bit is set. 1622 */ 1623 if (udav->grh) { 1624 /* 1625 * If the request is for QP0, then return an error. The 1626 * combination of global routine (GRH) and QP0 is not allowed. 1627 */ 1628 if (qp->qp_is_special == HERMON_QP_SMI) { 1629 mutex_exit(&ah->ah_lock); 1630 return (IBT_AH_HDL_INVALID); 1631 } 1632 grh = (ib_grh_t *)((uintptr_t)lrh + sizeof (ib_lrh_hdr_t)); 1633 HERMON_WQE_BUILD_MLX_GRH(state, grh, qp, udav, pktlen); 1634 1635 bth = (ib_bth_hdr_t *)((uintptr_t)grh + sizeof (ib_grh_t)); 1636 } else { 1637 bth = (ib_bth_hdr_t *)((uintptr_t)lrh + sizeof (ib_lrh_hdr_t)); 1638 } 1639 mutex_exit(&ah->ah_lock); 1640 1641 1642 /* 1643 * Build Base Transport Header (BTH) 1644 * Notice that the M, PadCnt, and TVer fields are all set 1645 * to zero implicitly. This is true for all Management Datagrams 1646 * MADs whether GSI are SMI. 1647 */ 1648 HERMON_WQE_BUILD_MLX_BTH(state, bth, qp, wr); 1649 1650 /* 1651 * Build Datagram Extended Transport Header (DETH) 1652 */ 1653 deth = (ib_deth_hdr_t *)((uintptr_t)bth + sizeof (ib_bth_hdr_t)); 1654 HERMON_WQE_BUILD_MLX_DETH(deth, qp); 1655 1656 /* Ensure that the Data Segment is aligned on a 16-byte boundary */ 1657 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)deth + sizeof (ib_deth_hdr_t)); 1658 ds = (hermon_hw_wqe_sgl_t *)(((uintptr_t)ds + 0xF) & ~0xF); 1659 nds = wr->wr_nds; 1660 sgl = wr->wr_sgl; 1661 num_ds = 0; 1662 1663 /* 1664 * Now fill in the Data Segments (SGL) for the MLX WQE based on the 1665 * values set up above (i.e. "sgl", "nds", and the "ds" pointer 1666 * Start by checking for a valid number of SGL entries 1667 */ 1668 if (nds > qp->qp_sq_sgl) { 1669 return (IBT_QP_SGL_LEN_INVALID); 1670 } 1671 1672 /* 1673 * For each SGL in the Send Work Request, fill in the MLX WQE's data 1674 * segments. Note: We skip any SGL with zero size because Hermon 1675 * hardware cannot handle a zero for "byte_cnt" in the WQE. Actually 1676 * the encoding for zero means a 2GB transfer. Because of this special 1677 * encoding in the hardware, we mask the requested length with 1678 * HERMON_WQE_SGL_BYTE_CNT_MASK (so that 2GB will end up encoded as 1679 * zero.) 1680 */ 1681 mgmtclass = hpoint = hcount = NULL; 1682 offset = 0; 1683 for (i = 0; i < nds; i++) { 1684 if (sgl[i].ds_len == 0) { 1685 continue; 1686 } 1687 1688 /* 1689 * Fill in the Data Segment(s) for the MLX send WQE, using 1690 * the information contained in the scatter-gather list of 1691 * the work request. 1692 */ 1693 HERMON_WQE_BUILD_DATA_SEG_SEND(&ds[num_ds], &sgl[i]); 1694 1695 /* 1696 * Search through the contents of all MADs posted to QP0 to 1697 * initialize pointers to the places where Directed Route "hop 1698 * pointer", "hop count", and "mgmtclass" would be. Hermon 1699 * needs these updated (i.e. incremented or decremented, as 1700 * necessary) by software. 1701 */ 1702 if (qp->qp_is_special == HERMON_QP_SMI) { 1703 1704 HERMON_SPECIAL_QP_DRMAD_GET_MGMTCLASS(mgmtclass, 1705 offset, sgl[i].ds_va, sgl[i].ds_len); 1706 1707 HERMON_SPECIAL_QP_DRMAD_GET_HOPPOINTER(hpoint, 1708 offset, sgl[i].ds_va, sgl[i].ds_len); 1709 1710 HERMON_SPECIAL_QP_DRMAD_GET_HOPCOUNT(hcount, 1711 offset, sgl[i].ds_va, sgl[i].ds_len); 1712 1713 offset += sgl[i].ds_len; 1714 } 1715 num_ds++; 1716 } 1717 1718 /* 1719 * Hermon's Directed Route MADs need to have the "hop pointer" 1720 * incremented/decremented (as necessary) depending on whether it is 1721 * currently less than or greater than the "hop count" (i.e. whether 1722 * the MAD is a request or a response.) 1723 */ 1724 if (qp->qp_is_special == HERMON_QP_SMI) { 1725 HERMON_SPECIAL_QP_DRMAD_DO_HOPPOINTER_MODIFY(*mgmtclass, 1726 *hpoint, *hcount); 1727 } 1728 1729 /* 1730 * Now fill in the ICRC Data Segment. This data segment is inlined 1731 * just like the packets headers above, but it is only four bytes and 1732 * set to zero (to indicate that we wish the hardware to generate ICRC. 1733 */ 1734 HERMON_WQE_BUILD_INLINE_ICRC(qp, &ds[num_ds], 4, 0); 1735 num_ds++; 1736 1737 /* 1738 * Return the size of descriptor (in 16-byte chunks) 1739 * For Hermon, we want them (for now) to be on stride size 1740 * boundaries, which was implicit in Tavor/Arbel 1741 */ 1742 tmpsize = ((uintptr_t)&ds[num_ds] - (uintptr_t)desc); 1743 1744 *size = tmpsize >> 0x04; 1745 1746 return (DDI_SUCCESS); 1747 } 1748 1749 1750 1751 /* 1752 * hermon_wqe_recv_build() 1753 * Context: Can be called from interrupt or base context. 1754 */ 1755 /* ARGSUSED */ 1756 static int 1757 hermon_wqe_recv_build(hermon_state_t *state, hermon_qphdl_t qp, 1758 ibt_recv_wr_t *wr, uint64_t *desc) 1759 { 1760 hermon_hw_wqe_sgl_t *ds; 1761 int i, num_ds; 1762 1763 ASSERT(MUTEX_HELD(&qp->qp_rq_lock)); 1764 1765 /* 1766 * Fill in the Data Segments (SGL) for the Recv WQE - don't 1767 * need to have a reserved for the ctrl, there is none on the 1768 * recv queue for hermon, but will need to put an invalid 1769 * (null) scatter pointer per PRM 1770 */ 1771 ds = (hermon_hw_wqe_sgl_t *)(uintptr_t)desc; 1772 num_ds = 0; 1773 1774 /* Check for valid number of SGL entries */ 1775 if (wr->wr_nds > qp->qp_rq_sgl) { 1776 return (IBT_QP_SGL_LEN_INVALID); 1777 } 1778 1779 /* 1780 * For each SGL in the Recv Work Request, fill in the Recv WQE's data 1781 * segments. Note: We skip any SGL with zero size because Hermon 1782 * hardware cannot handle a zero for "byte_cnt" in the WQE. Actually 1783 * the encoding for zero means a 2GB transfer. Because of this special 1784 * encoding in the hardware, we mask the requested length with 1785 * HERMON_WQE_SGL_BYTE_CNT_MASK (so that 2GB will end up encoded as 1786 * zero.) 1787 */ 1788 for (i = 0; i < wr->wr_nds; i++) { 1789 if (wr->wr_sgl[i].ds_len == 0) { 1790 continue; 1791 } 1792 1793 /* 1794 * Fill in the Data Segment(s) for the receive WQE, using the 1795 * information contained in the scatter-gather list of the 1796 * work request. 1797 */ 1798 HERMON_WQE_BUILD_DATA_SEG_RECV(&ds[num_ds], &wr->wr_sgl[i]); 1799 num_ds++; 1800 } 1801 1802 /* put the null sgl pointer as well if needed */ 1803 if (num_ds < qp->qp_rq_sgl) { 1804 HERMON_WQE_BUILD_DATA_SEG_RECV(&ds[num_ds], &null_sgl); 1805 } 1806 1807 return (DDI_SUCCESS); 1808 } 1809 1810 1811 1812 /* 1813 * hermon_wqe_srq_build() 1814 * Context: Can be called from interrupt or base context. 1815 */ 1816 /* ARGSUSED */ 1817 static int 1818 hermon_wqe_srq_build(hermon_state_t *state, hermon_srqhdl_t srq, 1819 ibt_recv_wr_t *wr, uint64_t *desc) 1820 { 1821 hermon_hw_wqe_sgl_t *ds; 1822 int i, num_ds; 1823 1824 ASSERT(MUTEX_HELD(&srq->srq_lock)); 1825 1826 /* Fill in the Data Segments (SGL) for the Recv WQE */ 1827 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)desc + 1828 sizeof (hermon_hw_srq_wqe_next_t)); 1829 num_ds = 0; 1830 1831 /* Check for valid number of SGL entries */ 1832 if (wr->wr_nds > srq->srq_wq_sgl) { 1833 return (IBT_QP_SGL_LEN_INVALID); 1834 } 1835 1836 /* 1837 * For each SGL in the Recv Work Request, fill in the Recv WQE's data 1838 * segments. Note: We skip any SGL with zero size because Hermon 1839 * hardware cannot handle a zero for "byte_cnt" in the WQE. Actually 1840 * the encoding for zero means a 2GB transfer. Because of this special 1841 * encoding in the hardware, we mask the requested length with 1842 * HERMON_WQE_SGL_BYTE_CNT_MASK (so that 2GB will end up encoded as 1843 * zero.) 1844 */ 1845 for (i = 0; i < wr->wr_nds; i++) { 1846 if (wr->wr_sgl[i].ds_len == 0) { 1847 continue; 1848 } 1849 1850 /* 1851 * Fill in the Data Segment(s) for the receive WQE, using the 1852 * information contained in the scatter-gather list of the 1853 * work request. 1854 */ 1855 HERMON_WQE_BUILD_DATA_SEG_RECV(&ds[num_ds], &wr->wr_sgl[i]); 1856 num_ds++; 1857 } 1858 1859 /* 1860 * put in the null sgl pointer as well, if needed 1861 */ 1862 if (num_ds < srq->srq_wq_sgl) { 1863 HERMON_WQE_BUILD_DATA_SEG_RECV(&ds[num_ds], &null_sgl); 1864 } 1865 1866 return (DDI_SUCCESS); 1867 } 1868 1869 1870 /* 1871 * hermon_wr_get_immediate() 1872 * Context: Can be called from interrupt or base context. 1873 */ 1874 static uint32_t 1875 hermon_wr_get_immediate(ibt_send_wr_t *wr) 1876 { 1877 /* 1878 * This routine extracts the "immediate data" from the appropriate 1879 * location in the IBTF work request. Because of the way the 1880 * work request structure is defined, the location for this data 1881 * depends on the actual work request operation type. 1882 */ 1883 1884 /* For RDMA Write, test if RC or UC */ 1885 if (wr->wr_opcode == IBT_WRC_RDMAW) { 1886 if (wr->wr_trans == IBT_RC_SRV) { 1887 return (wr->wr.rc.rcwr.rdma.rdma_immed); 1888 } else { /* IBT_UC_SRV */ 1889 return (wr->wr.uc.ucwr.rdma.rdma_immed); 1890 } 1891 } 1892 1893 /* For Send, test if RC, UD, or UC */ 1894 if (wr->wr_opcode == IBT_WRC_SEND) { 1895 if (wr->wr_trans == IBT_RC_SRV) { 1896 return (wr->wr.rc.rcwr.send_immed); 1897 } else if (wr->wr_trans == IBT_UD_SRV) { 1898 return (wr->wr.ud.udwr_immed); 1899 } else { /* IBT_UC_SRV */ 1900 return (wr->wr.uc.ucwr.send_immed); 1901 } 1902 } 1903 1904 /* 1905 * If any other type of request, then immediate is undefined 1906 */ 1907 return (0); 1908 } 1909 1910 /* 1911 * hermon_wqe_headroom() 1912 * Context: can be called from interrupt or base, currently only from 1913 * base context. 1914 * Routine that fills in the headroom for the Send Queue 1915 */ 1916 1917 static void 1918 hermon_wqe_headroom(uint_t from, hermon_qphdl_t qp) 1919 { 1920 uint32_t *wqe_start, *wqe_top, *wqe_base, qsize; 1921 int hdrmwqes, wqesizebytes, sectperwqe; 1922 uint32_t invalue; 1923 int i, j; 1924 1925 qsize = qp->qp_sq_bufsz; 1926 wqesizebytes = 1 << qp->qp_sq_log_wqesz; 1927 sectperwqe = wqesizebytes >> 6; /* 64 bytes/section */ 1928 hdrmwqes = qp->qp_sq_hdrmwqes; 1929 wqe_base = (uint32_t *)HERMON_QP_SQ_ENTRY(qp, 0); 1930 wqe_top = (uint32_t *)HERMON_QP_SQ_ENTRY(qp, qsize); 1931 wqe_start = (uint32_t *)HERMON_QP_SQ_ENTRY(qp, from); 1932 1933 for (i = 0; i < hdrmwqes; i++) { 1934 for (j = 0; j < sectperwqe; j++) { 1935 if (j == 0) { /* 1st section of wqe */ 1936 /* perserve ownership bit */ 1937 invalue = ddi_get32(qp->qp_wqinfo.qa_acchdl, 1938 wqe_start) | 0x7FFFFFFF; 1939 } else { 1940 /* or just invalidate it */ 1941 invalue = 0xFFFFFFFF; 1942 } 1943 ddi_put32(qp->qp_wqinfo.qa_acchdl, wqe_start, invalue); 1944 wqe_start += 16; /* move 64 bytes */ 1945 } 1946 if (wqe_start == wqe_top) /* hit the end of the queue */ 1947 wqe_start = wqe_base; /* wrap to start */ 1948 } 1949 } 1950 1951 /* 1952 * hermon_wqe_sync() 1953 * Context: Can be called from interrupt or base context. 1954 */ 1955 static void 1956 hermon_wqe_sync(void *hdl, uint_t sync_from, uint_t sync_to, 1957 uint_t sync_type, uint_t flag) 1958 { 1959 hermon_qphdl_t qp; 1960 hermon_srqhdl_t srq; 1961 uint64_t *wqe_from, *wqe_to; 1962 uint64_t *wq_base, *wq_top, *qp_base; 1963 ddi_dma_handle_t dmahdl; 1964 off_t offset; 1965 size_t length; 1966 uint32_t qsize; 1967 int status; 1968 1969 if (sync_type == HERMON_WR_SRQ) { 1970 srq = (hermon_srqhdl_t)hdl; 1971 /* Get the DMA handle from SRQ context */ 1972 dmahdl = srq->srq_mrhdl->mr_bindinfo.bi_dmahdl; 1973 /* get base addr of the buffer */ 1974 qp_base = (uint64_t *)(void *)srq->srq_wq_buf; 1975 } else { 1976 qp = (hermon_qphdl_t)hdl; 1977 /* Get the DMA handle from QP context */ 1978 dmahdl = qp->qp_mrhdl->mr_bindinfo.bi_dmahdl; 1979 /* Determine the base address of the QP buffer */ 1980 if (qp->qp_sq_baseaddr == 0) { 1981 qp_base = (uint64_t *)(void *)(qp->qp_sq_buf); 1982 } else { 1983 qp_base = (uint64_t *)(void *)(qp->qp_rq_buf); 1984 } 1985 } 1986 1987 /* 1988 * Depending on the type of the work queue, we grab information 1989 * about the address ranges we need to DMA sync. 1990 */ 1991 1992 if (sync_type == HERMON_WR_SEND) { 1993 wqe_from = HERMON_QP_SQ_ENTRY(qp, sync_from); 1994 wqe_to = HERMON_QP_SQ_ENTRY(qp, sync_to); 1995 qsize = qp->qp_sq_bufsz; 1996 1997 wq_base = HERMON_QP_SQ_ENTRY(qp, 0); 1998 wq_top = HERMON_QP_SQ_ENTRY(qp, qsize); 1999 } else if (sync_type == HERMON_WR_RECV) { 2000 wqe_from = HERMON_QP_RQ_ENTRY(qp, sync_from); 2001 wqe_to = HERMON_QP_RQ_ENTRY(qp, sync_to); 2002 qsize = qp->qp_rq_bufsz; 2003 2004 wq_base = HERMON_QP_RQ_ENTRY(qp, 0); 2005 wq_top = HERMON_QP_RQ_ENTRY(qp, qsize); 2006 } else { 2007 wqe_from = HERMON_SRQ_WQ_ENTRY(srq, sync_from); 2008 wqe_to = HERMON_SRQ_WQ_ENTRY(srq, sync_to); 2009 qsize = srq->srq_wq_bufsz; 2010 2011 wq_base = HERMON_SRQ_WQ_ENTRY(srq, 0); 2012 wq_top = HERMON_SRQ_WQ_ENTRY(srq, qsize); 2013 } 2014 2015 /* 2016 * There are two possible cases for the beginning and end of the WQE 2017 * chain we are trying to sync. Either this is the simple case, where 2018 * the end of the chain is below the beginning of the chain, or it is 2019 * the "wrap-around" case, where the end of the chain has wrapped over 2020 * the end of the queue. In the former case, we simply need to 2021 * calculate the span from beginning to end and sync it. In the latter 2022 * case, however, we need to calculate the span from the top of the 2023 * work queue to the end of the chain and sync that, and then we need 2024 * to find the other portion (from beginning of chain to end of queue) 2025 * and sync that as well. Note: if the "top to end" span is actually 2026 * zero length, then we don't do a DMA sync because a zero length DMA 2027 * sync unnecessarily syncs the entire work queue. 2028 */ 2029 if (wqe_to > wqe_from) { 2030 /* "From Beginning to End" */ 2031 2032 offset = (off_t)((uintptr_t)wqe_from - (uintptr_t)qp_base); 2033 length = (size_t)((uintptr_t)wqe_to - (uintptr_t)wqe_from); 2034 2035 status = ddi_dma_sync(dmahdl, offset, length, flag); 2036 if (status != DDI_SUCCESS) { 2037 return; 2038 } 2039 } else { 2040 /* "From Top to End" */ 2041 2042 offset = (off_t)((uintptr_t)wq_base - (uintptr_t)qp_base); 2043 length = (size_t)((uintptr_t)wqe_to - (uintptr_t)wq_base); 2044 if (length) { 2045 status = ddi_dma_sync(dmahdl, offset, length, flag); 2046 if (status != DDI_SUCCESS) { 2047 return; 2048 } 2049 } 2050 2051 /* "From Beginning to Bottom" */ 2052 2053 offset = (off_t)((uintptr_t)wqe_from - (uintptr_t)qp_base); 2054 length = (size_t)((uintptr_t)wq_top - (uintptr_t)wqe_from); 2055 status = ddi_dma_sync(dmahdl, offset, length, flag); 2056 if (status != DDI_SUCCESS) { 2057 return; 2058 } 2059 } 2060 } 2061 2062 2063 /* 2064 * hermon_wr_bind_check() 2065 * Context: Can be called from interrupt or base context. 2066 */ 2067 /* ARGSUSED */ 2068 static int 2069 hermon_wr_bind_check(hermon_state_t *state, ibt_send_wr_t *wr) 2070 { 2071 ibt_bind_flags_t bind_flags; 2072 uint64_t vaddr, len; 2073 uint64_t reg_start_addr, reg_end_addr; 2074 hermon_mwhdl_t mw; 2075 hermon_mrhdl_t mr; 2076 hermon_rsrc_t *mpt; 2077 uint32_t new_rkey; 2078 2079 /* Check for a valid Memory Window handle in the WR */ 2080 mw = (hermon_mwhdl_t)wr->wr.rc.rcwr.bind->bind_ibt_mw_hdl; 2081 if (mw == NULL) { 2082 return (IBT_MW_HDL_INVALID); 2083 } 2084 2085 /* Check for a valid Memory Region handle in the WR */ 2086 mr = (hermon_mrhdl_t)wr->wr.rc.rcwr.bind->bind_ibt_mr_hdl; 2087 if (mr == NULL) { 2088 return (IBT_MR_HDL_INVALID); 2089 } 2090 2091 mutex_enter(&mr->mr_lock); 2092 mutex_enter(&mw->mr_lock); 2093 2094 /* 2095 * Check here to see if the memory region has already been partially 2096 * deregistered as a result of a hermon_umap_umemlock_cb() callback. 2097 * If so, this is an error, return failure. 2098 */ 2099 if ((mr->mr_is_umem) && (mr->mr_umemcookie == NULL)) { 2100 mutex_exit(&mr->mr_lock); 2101 mutex_exit(&mw->mr_lock); 2102 return (IBT_MR_HDL_INVALID); 2103 } 2104 2105 /* Check for a valid Memory Window RKey (i.e. a matching RKey) */ 2106 if (mw->mr_rkey != wr->wr.rc.rcwr.bind->bind_rkey) { 2107 mutex_exit(&mr->mr_lock); 2108 mutex_exit(&mw->mr_lock); 2109 return (IBT_MR_RKEY_INVALID); 2110 } 2111 2112 /* Check for a valid Memory Region LKey (i.e. a matching LKey) */ 2113 if (mr->mr_lkey != wr->wr.rc.rcwr.bind->bind_lkey) { 2114 mutex_exit(&mr->mr_lock); 2115 mutex_exit(&mw->mr_lock); 2116 return (IBT_MR_LKEY_INVALID); 2117 } 2118 2119 /* 2120 * Now check for valid "vaddr" and "len". Note: We don't check the 2121 * "vaddr" range when "len == 0" (i.e. on unbind operations) 2122 */ 2123 len = wr->wr.rc.rcwr.bind->bind_len; 2124 if (len != 0) { 2125 vaddr = wr->wr.rc.rcwr.bind->bind_va; 2126 reg_start_addr = mr->mr_bindinfo.bi_addr; 2127 reg_end_addr = mr->mr_bindinfo.bi_addr + 2128 (mr->mr_bindinfo.bi_len - 1); 2129 if ((vaddr < reg_start_addr) || (vaddr > reg_end_addr)) { 2130 mutex_exit(&mr->mr_lock); 2131 mutex_exit(&mw->mr_lock); 2132 return (IBT_MR_VA_INVALID); 2133 } 2134 vaddr = (vaddr + len) - 1; 2135 if (vaddr > reg_end_addr) { 2136 mutex_exit(&mr->mr_lock); 2137 mutex_exit(&mw->mr_lock); 2138 return (IBT_MR_LEN_INVALID); 2139 } 2140 } 2141 2142 /* 2143 * Validate the bind access flags. Remote Write and Atomic access for 2144 * the Memory Window require that Local Write access be set in the 2145 * corresponding Memory Region. 2146 */ 2147 bind_flags = wr->wr.rc.rcwr.bind->bind_flags; 2148 if (((bind_flags & IBT_WR_BIND_WRITE) || 2149 (bind_flags & IBT_WR_BIND_ATOMIC)) && 2150 !(mr->mr_accflag & IBT_MR_LOCAL_WRITE)) { 2151 mutex_exit(&mr->mr_lock); 2152 mutex_exit(&mw->mr_lock); 2153 return (IBT_MR_ACCESS_REQ_INVALID); 2154 } 2155 2156 /* Calculate the new RKey for the Memory Window */ 2157 mpt = mw->mr_mptrsrcp; 2158 new_rkey = hermon_mr_keycalc(mpt->hr_indx); 2159 new_rkey = hermon_mr_key_swap(new_rkey); 2160 2161 wr->wr.rc.rcwr.bind->bind_rkey_out = new_rkey; 2162 mw->mr_rkey = new_rkey; 2163 2164 mutex_exit(&mr->mr_lock); 2165 mutex_exit(&mw->mr_lock); 2166 return (DDI_SUCCESS); 2167 } 2168 2169 2170 /* 2171 * hermon_wrid_from_reset_handling() 2172 * Context: Can be called from interrupt or base context. 2173 */ 2174 /* ARGSUSED */ 2175 int 2176 hermon_wrid_from_reset_handling(hermon_state_t *state, hermon_qphdl_t qp) 2177 { 2178 hermon_workq_hdr_t *swq, *rwq; 2179 uint_t qp_srq_en; 2180 2181 if (qp->qp_is_umap) 2182 return (DDI_SUCCESS); 2183 2184 /* grab the cq lock(s) to modify the wqavl tree */ 2185 mutex_enter(&qp->qp_rq_cqhdl->cq_lock); 2186 #ifdef __lock_lint 2187 mutex_enter(&qp->qp_sq_cqhdl->cq_lock); 2188 #else 2189 if (qp->qp_rq_cqhdl != qp->qp_sq_cqhdl) 2190 mutex_enter(&qp->qp_sq_cqhdl->cq_lock); 2191 #endif 2192 2193 /* Chain the newly allocated work queue header to the CQ's list */ 2194 hermon_cq_workq_add(qp->qp_sq_cqhdl, &qp->qp_sq_wqavl); 2195 2196 swq = qp->qp_sq_wqhdr; 2197 swq->wq_head = 0; 2198 swq->wq_tail = 0; 2199 swq->wq_full = 0; 2200 2201 /* 2202 * Now we repeat all the above operations for the receive work queue, 2203 * or shared receive work queue. 2204 * 2205 * Note: We still use the 'qp_rq_cqhdl' even in the SRQ case. 2206 */ 2207 qp_srq_en = qp->qp_srq_en; 2208 2209 #ifdef __lock_lint 2210 mutex_enter(&qp->qp_srqhdl->srq_lock); 2211 #else 2212 if (qp_srq_en == HERMON_QP_SRQ_ENABLED) { 2213 mutex_enter(&qp->qp_srqhdl->srq_lock); 2214 } else { 2215 rwq = qp->qp_rq_wqhdr; 2216 rwq->wq_head = 0; 2217 rwq->wq_tail = 0; 2218 rwq->wq_full = 0; 2219 qp->qp_rq_wqecntr = 0; 2220 } 2221 #endif 2222 hermon_cq_workq_add(qp->qp_rq_cqhdl, &qp->qp_rq_wqavl); 2223 2224 #ifdef __lock_lint 2225 mutex_exit(&qp->qp_srqhdl->srq_lock); 2226 #else 2227 if (qp_srq_en == HERMON_QP_SRQ_ENABLED) { 2228 mutex_exit(&qp->qp_srqhdl->srq_lock); 2229 } 2230 #endif 2231 2232 #ifdef __lock_lint 2233 mutex_exit(&qp->qp_sq_cqhdl->cq_lock); 2234 #else 2235 if (qp->qp_rq_cqhdl != qp->qp_sq_cqhdl) 2236 mutex_exit(&qp->qp_sq_cqhdl->cq_lock); 2237 #endif 2238 mutex_exit(&qp->qp_rq_cqhdl->cq_lock); 2239 return (DDI_SUCCESS); 2240 } 2241 2242 2243 /* 2244 * hermon_wrid_to_reset_handling() 2245 * Context: Can be called from interrupt or base context. 2246 */ 2247 int 2248 hermon_wrid_to_reset_handling(hermon_state_t *state, hermon_qphdl_t qp) 2249 { 2250 uint_t qp_srq_en; 2251 2252 if (qp->qp_is_umap) 2253 return (DDI_SUCCESS); 2254 2255 /* 2256 * If there are unpolled entries in these CQs, they are 2257 * polled/flushed. 2258 * Grab the CQ lock(s) before manipulating the lists. 2259 */ 2260 mutex_enter(&qp->qp_rq_cqhdl->cq_lock); 2261 #ifdef __lock_lint 2262 mutex_enter(&qp->qp_sq_cqhdl->cq_lock); 2263 #else 2264 if (qp->qp_rq_cqhdl != qp->qp_sq_cqhdl) 2265 mutex_enter(&qp->qp_sq_cqhdl->cq_lock); 2266 #endif 2267 2268 qp_srq_en = qp->qp_srq_en; 2269 #ifdef __lock_lint 2270 mutex_enter(&qp->qp_srqhdl->srq_lock); 2271 #else 2272 if (qp_srq_en == HERMON_QP_SRQ_ENABLED) { 2273 mutex_enter(&qp->qp_srqhdl->srq_lock); 2274 } 2275 #endif 2276 /* 2277 * Flush the entries on the CQ for this QP's QPN. 2278 */ 2279 hermon_cq_entries_flush(state, qp); 2280 2281 #ifdef __lock_lint 2282 mutex_exit(&qp->qp_srqhdl->srq_lock); 2283 #else 2284 if (qp_srq_en == HERMON_QP_SRQ_ENABLED) { 2285 mutex_exit(&qp->qp_srqhdl->srq_lock); 2286 } 2287 #endif 2288 2289 hermon_cq_workq_remove(qp->qp_rq_cqhdl, &qp->qp_rq_wqavl); 2290 hermon_cq_workq_remove(qp->qp_sq_cqhdl, &qp->qp_sq_wqavl); 2291 2292 #ifdef __lock_lint 2293 mutex_exit(&qp->qp_sq_cqhdl->cq_lock); 2294 #else 2295 if (qp->qp_rq_cqhdl != qp->qp_sq_cqhdl) 2296 mutex_exit(&qp->qp_sq_cqhdl->cq_lock); 2297 #endif 2298 mutex_exit(&qp->qp_rq_cqhdl->cq_lock); 2299 2300 return (IBT_SUCCESS); 2301 } 2302 2303 2304 /* 2305 * hermon_wrid_get_entry() 2306 * Context: Can be called from interrupt or base context. 2307 */ 2308 uint64_t 2309 hermon_wrid_get_entry(hermon_cqhdl_t cq, hermon_hw_cqe_t *cqe) 2310 { 2311 hermon_workq_avl_t *wqa; 2312 hermon_workq_hdr_t *wq; 2313 uint64_t wrid; 2314 uint_t send_or_recv, qpnum; 2315 uint32_t indx; 2316 2317 /* 2318 * Determine whether this CQE is a send or receive completion. 2319 */ 2320 send_or_recv = HERMON_CQE_SENDRECV_GET(cq, cqe); 2321 2322 /* Find the work queue for this QP number (send or receive side) */ 2323 qpnum = HERMON_CQE_QPNUM_GET(cq, cqe); 2324 wqa = hermon_wrid_wqavl_find(cq, qpnum, send_or_recv); 2325 wq = wqa->wqa_wq; 2326 2327 /* 2328 * Regardless of whether the completion is the result of a "success" 2329 * or a "failure", we lock the list of "containers" and attempt to 2330 * search for the the first matching completion (i.e. the first WR 2331 * with a matching WQE addr and size). Once we find it, we pull out 2332 * the "wrid" field and return it (see below). XXX Note: One possible 2333 * future enhancement would be to enable this routine to skip over 2334 * any "unsignaled" completions to go directly to the next "signaled" 2335 * entry on success. 2336 */ 2337 indx = HERMON_CQE_WQEADDRSZ_GET(cq, cqe) & wq->wq_mask; 2338 wrid = wq->wq_wrid[indx]; 2339 if (wqa->wqa_srq_en) { 2340 struct hermon_sw_srq_s *srq; 2341 uint64_t *desc; 2342 2343 /* put wqe back on the srq free list */ 2344 srq = wqa->wqa_srq; 2345 mutex_enter(&srq->srq_lock); 2346 desc = HERMON_SRQ_WQE_ADDR(srq, wq->wq_tail); 2347 ((uint16_t *)desc)[1] = htons(indx); 2348 wq->wq_tail = indx; 2349 mutex_exit(&srq->srq_lock); 2350 } else { 2351 wq->wq_head = (indx + 1) & wq->wq_mask; 2352 wq->wq_full = 0; 2353 } 2354 2355 return (wrid); 2356 } 2357 2358 2359 int 2360 hermon_wrid_workq_compare(const void *p1, const void *p2) 2361 { 2362 hermon_workq_compare_t *cmpp; 2363 hermon_workq_avl_t *curr; 2364 2365 cmpp = (hermon_workq_compare_t *)p1; 2366 curr = (hermon_workq_avl_t *)p2; 2367 2368 if (cmpp->cmp_qpn < curr->wqa_qpn) 2369 return (-1); 2370 else if (cmpp->cmp_qpn > curr->wqa_qpn) 2371 return (+1); 2372 else if (cmpp->cmp_type < curr->wqa_type) 2373 return (-1); 2374 else if (cmpp->cmp_type > curr->wqa_type) 2375 return (+1); 2376 else 2377 return (0); 2378 } 2379 2380 2381 /* 2382 * hermon_wrid_workq_find() 2383 * Context: Can be called from interrupt or base context. 2384 */ 2385 static hermon_workq_avl_t * 2386 hermon_wrid_wqavl_find(hermon_cqhdl_t cq, uint_t qpn, uint_t wq_type) 2387 { 2388 hermon_workq_avl_t *curr; 2389 hermon_workq_compare_t cmp; 2390 2391 /* 2392 * Walk the CQ's work queue list, trying to find a send or recv queue 2393 * with the same QP number. We do this even if we are going to later 2394 * create a new entry because it helps us easily find the end of the 2395 * list. 2396 */ 2397 cmp.cmp_qpn = qpn; 2398 cmp.cmp_type = wq_type; 2399 #ifdef __lock_lint 2400 hermon_wrid_workq_compare(NULL, NULL); 2401 #endif 2402 curr = avl_find(&cq->cq_wrid_wqhdr_avl_tree, &cmp, NULL); 2403 2404 return (curr); 2405 } 2406 2407 2408 /* 2409 * hermon_wrid_wqhdr_create() 2410 * Context: Can be called from base context. 2411 */ 2412 /* ARGSUSED */ 2413 hermon_workq_hdr_t * 2414 hermon_wrid_wqhdr_create(int bufsz) 2415 { 2416 hermon_workq_hdr_t *wqhdr; 2417 2418 /* 2419 * Allocate space for the wqhdr, and an array to record all the wrids. 2420 */ 2421 wqhdr = (hermon_workq_hdr_t *)kmem_zalloc(sizeof (*wqhdr), KM_NOSLEEP); 2422 if (wqhdr == NULL) { 2423 return (NULL); 2424 } 2425 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*wqhdr)) 2426 wqhdr->wq_wrid = kmem_zalloc(bufsz * sizeof (uint64_t), KM_NOSLEEP); 2427 if (wqhdr->wq_wrid == NULL) { 2428 kmem_free(wqhdr, sizeof (*wqhdr)); 2429 return (NULL); 2430 } 2431 wqhdr->wq_size = bufsz; 2432 wqhdr->wq_mask = bufsz - 1; 2433 2434 return (wqhdr); 2435 } 2436 2437 void 2438 hermon_wrid_wqhdr_destroy(hermon_workq_hdr_t *wqhdr) 2439 { 2440 kmem_free(wqhdr->wq_wrid, wqhdr->wq_size * sizeof (uint64_t)); 2441 kmem_free(wqhdr, sizeof (*wqhdr)); 2442 } 2443 2444 2445 /* 2446 * hermon_cq_workq_add() 2447 * Context: Can be called from interrupt or base context. 2448 */ 2449 static void 2450 hermon_cq_workq_add(hermon_cqhdl_t cq, hermon_workq_avl_t *wqavl) 2451 { 2452 hermon_workq_compare_t cmp; 2453 avl_index_t where; 2454 2455 cmp.cmp_qpn = wqavl->wqa_qpn; 2456 cmp.cmp_type = wqavl->wqa_type; 2457 #ifdef __lock_lint 2458 hermon_wrid_workq_compare(NULL, NULL); 2459 #endif 2460 (void) avl_find(&cq->cq_wrid_wqhdr_avl_tree, &cmp, &where); 2461 avl_insert(&cq->cq_wrid_wqhdr_avl_tree, wqavl, where); 2462 } 2463 2464 2465 /* 2466 * hermon_cq_workq_remove() 2467 * Context: Can be called from interrupt or base context. 2468 */ 2469 static void 2470 hermon_cq_workq_remove(hermon_cqhdl_t cq, hermon_workq_avl_t *wqavl) 2471 { 2472 #ifdef __lock_lint 2473 hermon_wrid_workq_compare(NULL, NULL); 2474 #endif 2475 avl_remove(&cq->cq_wrid_wqhdr_avl_tree, wqavl); 2476 } 2477