1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * hermon_wr.c 29 * Hermon Work Request Processing Routines 30 * 31 * Implements all the routines necessary to provide the PostSend(), 32 * PostRecv() and PostSRQ() verbs. Also contains all the code 33 * necessary to implement the Hermon WRID tracking mechanism. 34 */ 35 36 #include <sys/types.h> 37 #include <sys/conf.h> 38 #include <sys/ddi.h> 39 #include <sys/sunddi.h> 40 #include <sys/modctl.h> 41 #include <sys/avl.h> 42 43 #include <sys/ib/adapters/hermon/hermon.h> 44 45 static uint32_t hermon_wr_get_immediate(ibt_send_wr_t *wr); 46 static int hermon_wr_bind_check(hermon_state_t *state, ibt_send_wr_t *wr); 47 static int hermon_wqe_send_build(hermon_state_t *state, hermon_qphdl_t qp, 48 ibt_send_wr_t *wr, uint64_t *desc, uint_t *size); 49 static int hermon_wqe_mlx_build(hermon_state_t *state, hermon_qphdl_t qp, 50 ibt_send_wr_t *wr, uint64_t *desc, uint_t *size); 51 static void hermon_wqe_headroom(uint_t from, hermon_qphdl_t qp); 52 static int hermon_wqe_recv_build(hermon_state_t *state, hermon_qphdl_t qp, 53 ibt_recv_wr_t *wr, uint64_t *desc); 54 static int hermon_wqe_srq_build(hermon_state_t *state, hermon_srqhdl_t srq, 55 ibt_recv_wr_t *wr, uint64_t *desc); 56 static void hermon_wqe_sync(void *hdl, uint_t sync_from, 57 uint_t sync_to, uint_t sync_type, uint_t flag); 58 static hermon_workq_avl_t *hermon_wrid_wqavl_find(hermon_cqhdl_t cq, uint_t qpn, 59 uint_t send_or_recv); 60 static void hermon_cq_workq_add(hermon_cqhdl_t cq, hermon_workq_avl_t *wqavl); 61 static void hermon_cq_workq_remove(hermon_cqhdl_t cq, 62 hermon_workq_avl_t *wqavl); 63 64 static ibt_wr_ds_t null_sgl = { 0, 0x00000100, 0 }; 65 66 static int 67 hermon_post_send_ud(hermon_state_t *state, hermon_qphdl_t qp, 68 ibt_send_wr_t *wr, uint_t num_wr, uint_t *num_posted) 69 { 70 hermon_hw_snd_wqe_ud_t *ud; 71 hermon_workq_hdr_t *wq; 72 hermon_ahhdl_t ah; 73 ibt_ud_dest_t *dest; 74 uint64_t *desc; 75 uint32_t desc_sz; 76 uint32_t signaled_dbd, solicited; 77 uint32_t head, tail, next_tail, qsize_msk; 78 uint32_t hdrmwqes; 79 uint32_t nopcode, fence, immed_data = 0; 80 hermon_hw_wqe_sgl_t *ds, *old_ds; 81 ibt_wr_ds_t *sgl; 82 uint32_t nds, dnds; 83 int i, j, last_ds, num_ds, status; 84 uint32_t *wqe_start; 85 int sectperwqe; 86 uint_t posted_cnt = 0; 87 88 /* initialize the FMA retry loop */ 89 hermon_pio_init(fm_loop_cnt, fm_status, fm_test_num); 90 91 ASSERT(MUTEX_HELD(&qp->qp_sq_lock)); 92 _NOTE(LOCK_RELEASED_AS_SIDE_EFFECT(&qp->qp_sq_lock)) 93 94 /* Grab the lock for the WRID list */ 95 membar_consumer(); 96 97 /* Save away some initial QP state */ 98 wq = qp->qp_sq_wqhdr; 99 qsize_msk = wq->wq_mask; 100 hdrmwqes = qp->qp_sq_hdrmwqes; /* in WQEs */ 101 sectperwqe = 1 << (qp->qp_sq_log_wqesz - 2); 102 103 tail = wq->wq_tail; 104 head = wq->wq_head; 105 status = DDI_SUCCESS; 106 107 post_next: 108 /* 109 * Check for "queue full" condition. If the queue 110 * is already full, then no more WQEs can be posted. 111 * So break out, ring a doorbell (if necessary) and 112 * return an error 113 */ 114 if (wq->wq_full != 0) { 115 status = IBT_QP_FULL; 116 goto done; 117 } 118 119 next_tail = (tail + 1) & qsize_msk; 120 if (((tail + hdrmwqes) & qsize_msk) == head) { 121 wq->wq_full = 1; 122 } 123 124 desc = HERMON_QP_SQ_ENTRY(qp, tail); 125 126 ud = (hermon_hw_snd_wqe_ud_t *)((uintptr_t)desc + 127 sizeof (hermon_hw_snd_wqe_ctrl_t)); 128 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)ud + 129 sizeof (hermon_hw_snd_wqe_ud_t)); 130 nds = wr->wr_nds; 131 sgl = wr->wr_sgl; 132 num_ds = 0; 133 134 /* need to know the count of destination nds for backward loop */ 135 for (dnds = 0, i = 0; i < nds; i++) { 136 if (sgl[i].ds_len != 0) 137 dnds++; 138 } 139 140 /* 141 * Build a Send or Send_LSO WQE 142 */ 143 if (wr->wr_opcode == IBT_WRC_SEND_LSO) { 144 int total_len; 145 146 nopcode = HERMON_WQE_SEND_NOPCODE_LSO; 147 if (wr->wr.ud_lso.lso_hdr_sz > 60) { 148 nopcode |= (1 << 6); /* ReRead bit must be set */ 149 } 150 dest = wr->wr.ud_lso.lso_ud_dest; 151 ah = (hermon_ahhdl_t)dest->ud_ah; 152 if (ah == NULL) { 153 status = IBT_AH_HDL_INVALID; 154 goto done; 155 } 156 HERMON_WQE_BUILD_UD(qp, ud, ah, dest); 157 158 total_len = (4 + 0xf + wr->wr.ud_lso.lso_hdr_sz) & ~0xf; 159 if ((uintptr_t)ds + total_len + (nds * 16) > 160 (uintptr_t)desc + (1 << qp->qp_sq_log_wqesz)) { 161 status = IBT_QP_SGL_LEN_INVALID; 162 goto done; 163 } 164 old_ds = ds; 165 bcopy(wr->wr.ud_lso.lso_hdr, (uint32_t *)old_ds + 1, 166 wr->wr.ud_lso.lso_hdr_sz); 167 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)ds + total_len); 168 i = 0; 169 } else if (wr->wr_opcode == IBT_WRC_SEND) { 170 if (wr->wr_flags & IBT_WR_SEND_IMMED) { 171 nopcode = HERMON_WQE_SEND_NOPCODE_SENDI; 172 immed_data = wr->wr.ud.udwr_immed; 173 } else { 174 nopcode = HERMON_WQE_SEND_NOPCODE_SEND; 175 } 176 dest = wr->wr.ud.udwr_dest; 177 ah = (hermon_ahhdl_t)dest->ud_ah; 178 if (ah == NULL) { 179 status = IBT_AH_HDL_INVALID; 180 goto done; 181 } 182 HERMON_WQE_BUILD_UD(qp, ud, ah, dest); 183 i = 0; 184 } else { 185 status = IBT_QP_OP_TYPE_INVALID; 186 goto done; 187 } 188 189 if (nds > qp->qp_sq_sgl) { 190 status = IBT_QP_SGL_LEN_INVALID; 191 goto done; 192 } 193 for (last_ds = num_ds, j = i; j < nds; j++) { 194 if (sgl[j].ds_len != 0) 195 last_ds++; /* real last ds of wqe to fill */ 196 } 197 desc_sz = ((uintptr_t)&ds[last_ds] - (uintptr_t)desc) >> 0x4; 198 for (j = nds; --j >= i; ) { 199 if (sgl[j].ds_len == 0) { 200 continue; 201 } 202 203 /* 204 * Fill in the Data Segment(s) for the current WQE, using the 205 * information contained in the scatter-gather list of the 206 * work request. 207 */ 208 last_ds--; 209 HERMON_WQE_BUILD_DATA_SEG_SEND(&ds[last_ds], &sgl[j]); 210 } 211 212 membar_producer(); 213 214 if (wr->wr_opcode == IBT_WRC_SEND_LSO) { 215 HERMON_WQE_BUILD_LSO(qp, old_ds, wr->wr.ud_lso.lso_mss, 216 wr->wr.ud_lso.lso_hdr_sz); 217 } 218 219 fence = (wr->wr_flags & IBT_WR_SEND_FENCE) ? 1 : 0; 220 221 signaled_dbd = ((qp->qp_sq_sigtype == HERMON_QP_SQ_ALL_SIGNALED) || 222 (wr->wr_flags & IBT_WR_SEND_SIGNAL)) ? 1 : 0; 223 224 solicited = (wr->wr_flags & IBT_WR_SEND_SOLICIT) ? 1 : 0; 225 226 HERMON_WQE_SET_CTRL_SEGMENT(desc, desc_sz, fence, immed_data, 227 solicited, signaled_dbd, wr->wr_flags & IBT_WR_SEND_CKSUM, qp); 228 229 wq->wq_wrid[tail] = wr->wr_id; 230 231 tail = next_tail; 232 233 /* Update some of the state in the QP */ 234 wq->wq_tail = tail; 235 236 membar_producer(); 237 238 /* Now set the ownership bit and opcode (first dword). */ 239 HERMON_SET_SEND_WQE_OWNER(qp, (uint32_t *)desc, nopcode); 240 241 posted_cnt++; 242 if (--num_wr > 0) { 243 /* do the invalidate of the headroom */ 244 wqe_start = (uint32_t *)HERMON_QP_SQ_ENTRY(qp, 245 (tail + hdrmwqes) & qsize_msk); 246 for (i = 16; i < sectperwqe; i += 16) { 247 wqe_start[i] = 0xFFFFFFFF; 248 } 249 250 wr++; 251 goto post_next; 252 } 253 done: 254 if (posted_cnt != 0) { 255 ddi_acc_handle_t uarhdl = hermon_get_uarhdl(state); 256 257 membar_producer(); 258 259 /* the FMA retry loop starts for Hermon doorbell register. */ 260 hermon_pio_start(state, uarhdl, pio_error, fm_loop_cnt, 261 fm_status, fm_test_num); 262 263 HERMON_UAR_DOORBELL(state, uarhdl, 264 (uint64_t *)(void *)&state->hs_uar->send, 265 (uint64_t)qp->qp_ring); 266 267 /* the FMA retry loop ends. */ 268 hermon_pio_end(state, uarhdl, pio_error, fm_loop_cnt, 269 fm_status, fm_test_num); 270 271 /* do the invalidate of the headroom */ 272 wqe_start = (uint32_t *)HERMON_QP_SQ_ENTRY(qp, 273 (tail + hdrmwqes) & qsize_msk); 274 for (i = 16; i < sectperwqe; i += 16) { 275 wqe_start[i] = 0xFFFFFFFF; 276 } 277 } 278 if (num_posted != NULL) 279 *num_posted = posted_cnt; 280 281 mutex_exit(&qp->qp_sq_lock); 282 283 return (status); 284 285 pio_error: 286 mutex_exit(&qp->qp_sq_lock); 287 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 288 return (ibc_get_ci_failure(0)); 289 } 290 291 static int 292 hermon_post_send_rc(hermon_state_t *state, hermon_qphdl_t qp, 293 ibt_send_wr_t *wr, uint_t num_wr, uint_t *num_posted) 294 { 295 uint64_t *desc; 296 hermon_workq_hdr_t *wq; 297 uint32_t desc_sz; 298 uint32_t signaled_dbd, solicited; 299 uint32_t head, tail, next_tail, qsize_msk; 300 uint32_t hdrmwqes; 301 int status; 302 uint32_t nopcode, fence, immed_data = 0; 303 hermon_hw_snd_wqe_remaddr_t *rc; 304 hermon_hw_snd_wqe_atomic_t *at; 305 hermon_hw_snd_wqe_bind_t *bn; 306 hermon_hw_wqe_sgl_t *ds; 307 ibt_wr_ds_t *sgl; 308 uint32_t nds; 309 int i, last_ds, num_ds; 310 uint32_t *wqe_start; 311 int sectperwqe; 312 uint_t posted_cnt = 0; 313 314 /* initialize the FMA retry loop */ 315 hermon_pio_init(fm_loop_cnt, fm_status, fm_test_num); 316 317 ASSERT(MUTEX_HELD(&qp->qp_sq_lock)); 318 _NOTE(LOCK_RELEASED_AS_SIDE_EFFECT(&qp->qp_sq_lock)) 319 320 /* make sure we see any update of wq_head */ 321 membar_consumer(); 322 323 /* Save away some initial QP state */ 324 wq = qp->qp_sq_wqhdr; 325 qsize_msk = wq->wq_mask; 326 hdrmwqes = qp->qp_sq_hdrmwqes; /* in WQEs */ 327 sectperwqe = 1 << (qp->qp_sq_log_wqesz - 2); 328 329 tail = wq->wq_tail; 330 head = wq->wq_head; 331 status = DDI_SUCCESS; 332 333 post_next: 334 /* 335 * Check for "queue full" condition. If the queue 336 * is already full, then no more WQEs can be posted. 337 * So break out, ring a doorbell (if necessary) and 338 * return an error 339 */ 340 if (wq->wq_full != 0) { 341 status = IBT_QP_FULL; 342 goto done; 343 } 344 next_tail = (tail + 1) & qsize_msk; 345 if (((tail + hdrmwqes) & qsize_msk) == head) { 346 wq->wq_full = 1; 347 } 348 349 desc = HERMON_QP_SQ_ENTRY(qp, tail); 350 351 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)desc + 352 sizeof (hermon_hw_snd_wqe_ctrl_t)); 353 nds = wr->wr_nds; 354 sgl = wr->wr_sgl; 355 num_ds = 0; 356 357 /* 358 * Validate the operation type. For RC requests, we allow 359 * "Send", "RDMA Read", "RDMA Write", various "Atomic" 360 * operations, and memory window "Bind" 361 */ 362 switch (wr->wr_opcode) { 363 default: 364 status = IBT_QP_OP_TYPE_INVALID; 365 goto done; 366 367 case IBT_WRC_SEND: 368 if (wr->wr_flags & IBT_WR_SEND_IMMED) { 369 nopcode = HERMON_WQE_SEND_NOPCODE_SENDI; 370 immed_data = wr->wr.rc.rcwr.send_immed; 371 } else { 372 nopcode = HERMON_WQE_SEND_NOPCODE_SEND; 373 } 374 break; 375 376 /* 377 * If this is an RDMA Read or RDMA Write request, then fill 378 * in the "Remote Address" header fields. 379 */ 380 case IBT_WRC_RDMAW: 381 if (wr->wr_flags & IBT_WR_SEND_IMMED) { 382 nopcode = HERMON_WQE_SEND_NOPCODE_RDMAWI; 383 immed_data = wr->wr.rc.rcwr.rdma.rdma_immed; 384 } else { 385 nopcode = HERMON_WQE_SEND_NOPCODE_RDMAW; 386 } 387 /* FALLTHROUGH */ 388 case IBT_WRC_RDMAR: 389 if (wr->wr_opcode == IBT_WRC_RDMAR) 390 nopcode = HERMON_WQE_SEND_NOPCODE_RDMAR; 391 rc = (hermon_hw_snd_wqe_remaddr_t *)((uintptr_t)desc + 392 sizeof (hermon_hw_snd_wqe_ctrl_t)); 393 394 /* 395 * Build the Remote Address Segment for the WQE, using 396 * the information from the RC work request. 397 */ 398 HERMON_WQE_BUILD_REMADDR(qp, rc, &wr->wr.rc.rcwr.rdma); 399 400 /* Update "ds" for filling in Data Segments (below) */ 401 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)rc + 402 sizeof (hermon_hw_snd_wqe_remaddr_t)); 403 break; 404 405 /* 406 * If this is one of the Atomic type operations (i.e 407 * Compare-Swap or Fetch-Add), then fill in both the "Remote 408 * Address" header fields and the "Atomic" header fields. 409 */ 410 case IBT_WRC_CSWAP: 411 nopcode = HERMON_WQE_SEND_NOPCODE_ATMCS; 412 /* FALLTHROUGH */ 413 case IBT_WRC_FADD: 414 if (wr->wr_opcode == IBT_WRC_FADD) 415 nopcode = HERMON_WQE_SEND_NOPCODE_ATMFA; 416 rc = (hermon_hw_snd_wqe_remaddr_t *)((uintptr_t)desc + 417 sizeof (hermon_hw_snd_wqe_ctrl_t)); 418 at = (hermon_hw_snd_wqe_atomic_t *)((uintptr_t)rc + 419 sizeof (hermon_hw_snd_wqe_remaddr_t)); 420 421 /* 422 * Build the Remote Address and Atomic Segments for 423 * the WQE, using the information from the RC Atomic 424 * work request. 425 */ 426 HERMON_WQE_BUILD_RC_ATOMIC_REMADDR(qp, rc, wr); 427 HERMON_WQE_BUILD_ATOMIC(qp, at, wr->wr.rc.rcwr.atomic); 428 429 /* Update "ds" for filling in Data Segments (below) */ 430 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)at + 431 sizeof (hermon_hw_snd_wqe_atomic_t)); 432 433 /* 434 * Update "nds" and "sgl" because Atomic requests have 435 * only a single Data Segment. 436 */ 437 nds = 1; 438 sgl = wr->wr_sgl; 439 break; 440 441 /* 442 * If this is memory window Bind operation, then we call the 443 * hermon_wr_bind_check() routine to validate the request and 444 * to generate the updated RKey. If this is successful, then 445 * we fill in the WQE's "Bind" header fields. 446 */ 447 case IBT_WRC_BIND: 448 nopcode = HERMON_WQE_SEND_NOPCODE_BIND; 449 status = hermon_wr_bind_check(state, wr); 450 if (status != DDI_SUCCESS) 451 goto done; 452 453 bn = (hermon_hw_snd_wqe_bind_t *)((uintptr_t)desc + 454 sizeof (hermon_hw_snd_wqe_ctrl_t)); 455 456 /* 457 * Build the Bind Memory Window Segments for the WQE, 458 * using the information from the RC Bind memory 459 * window work request. 460 */ 461 HERMON_WQE_BUILD_BIND(qp, bn, wr->wr.rc.rcwr.bind); 462 463 /* 464 * Update the "ds" pointer. Even though the "bind" 465 * operation requires no SGLs, this is necessary to 466 * facilitate the correct descriptor size calculations 467 * (below). 468 */ 469 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)bn + 470 sizeof (hermon_hw_snd_wqe_bind_t)); 471 nds = 0; 472 } 473 474 /* 475 * Now fill in the Data Segments (SGL) for the Send WQE based 476 * on the values setup above (i.e. "sgl", "nds", and the "ds" 477 * pointer. Start by checking for a valid number of SGL entries 478 */ 479 if (nds > qp->qp_sq_sgl) { 480 status = IBT_QP_SGL_LEN_INVALID; 481 goto done; 482 } 483 484 for (last_ds = num_ds, i = 0; i < nds; i++) { 485 if (sgl[i].ds_len != 0) 486 last_ds++; /* real last ds of wqe to fill */ 487 } 488 desc_sz = ((uintptr_t)&ds[last_ds] - (uintptr_t)desc) >> 0x4; 489 for (i = nds; --i >= 0; ) { 490 if (sgl[i].ds_len == 0) { 491 continue; 492 } 493 494 /* 495 * Fill in the Data Segment(s) for the current WQE, using the 496 * information contained in the scatter-gather list of the 497 * work request. 498 */ 499 last_ds--; 500 HERMON_WQE_BUILD_DATA_SEG_SEND(&ds[last_ds], &sgl[i]); 501 } 502 503 fence = (wr->wr_flags & IBT_WR_SEND_FENCE) ? 1 : 0; 504 505 signaled_dbd = ((qp->qp_sq_sigtype == HERMON_QP_SQ_ALL_SIGNALED) || 506 (wr->wr_flags & IBT_WR_SEND_SIGNAL)) ? 1 : 0; 507 508 solicited = (wr->wr_flags & IBT_WR_SEND_SOLICIT) ? 1 : 0; 509 510 HERMON_WQE_SET_CTRL_SEGMENT(desc, desc_sz, fence, immed_data, solicited, 511 signaled_dbd, wr->wr_flags & IBT_WR_SEND_CKSUM, qp); 512 513 wq->wq_wrid[tail] = wr->wr_id; 514 515 tail = next_tail; 516 517 /* Update some of the state in the QP */ 518 wq->wq_tail = tail; 519 520 membar_producer(); 521 522 /* Now set the ownership bit of the first one in the chain. */ 523 HERMON_SET_SEND_WQE_OWNER(qp, (uint32_t *)desc, nopcode); 524 525 posted_cnt++; 526 if (--num_wr > 0) { 527 /* do the invalidate of the headroom */ 528 wqe_start = (uint32_t *)HERMON_QP_SQ_ENTRY(qp, 529 (tail + hdrmwqes) & qsize_msk); 530 for (i = 16; i < sectperwqe; i += 16) { 531 wqe_start[i] = 0xFFFFFFFF; 532 } 533 534 wr++; 535 goto post_next; 536 } 537 done: 538 539 if (posted_cnt != 0) { 540 ddi_acc_handle_t uarhdl = hermon_get_uarhdl(state); 541 542 membar_producer(); 543 544 /* the FMA retry loop starts for Hermon doorbell register. */ 545 hermon_pio_start(state, uarhdl, pio_error, fm_loop_cnt, 546 fm_status, fm_test_num); 547 548 /* Ring the doorbell */ 549 HERMON_UAR_DOORBELL(state, uarhdl, 550 (uint64_t *)(void *)&state->hs_uar->send, 551 (uint64_t)qp->qp_ring); 552 553 /* the FMA retry loop ends. */ 554 hermon_pio_end(state, uarhdl, pio_error, fm_loop_cnt, 555 fm_status, fm_test_num); 556 557 /* do the invalidate of the headroom */ 558 wqe_start = (uint32_t *)HERMON_QP_SQ_ENTRY(qp, 559 (tail + hdrmwqes) & qsize_msk); 560 for (i = 16; i < sectperwqe; i += 16) { 561 wqe_start[i] = 0xFFFFFFFF; 562 } 563 } 564 /* 565 * Update the "num_posted" return value (if necessary). 566 * Then drop the locks and return success. 567 */ 568 if (num_posted != NULL) { 569 *num_posted = posted_cnt; 570 } 571 572 mutex_exit(&qp->qp_sq_lock); 573 return (status); 574 575 pio_error: 576 mutex_exit(&qp->qp_sq_lock); 577 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 578 return (ibc_get_ci_failure(0)); 579 } 580 581 /* 582 * hermon_post_send() 583 * Context: Can be called from interrupt or base context. 584 */ 585 int 586 hermon_post_send(hermon_state_t *state, hermon_qphdl_t qp, 587 ibt_send_wr_t *wr, uint_t num_wr, uint_t *num_posted) 588 { 589 ibt_send_wr_t *curr_wr; 590 hermon_workq_hdr_t *wq; 591 hermon_ahhdl_t ah; 592 uint64_t *desc, *prev; 593 uint32_t desc_sz; 594 uint32_t signaled_dbd, solicited; 595 uint32_t head, tail, next_tail, qsize_msk; 596 uint32_t sync_from, sync_to; 597 uint32_t hdrmwqes; 598 uint_t currindx, wrindx, numremain; 599 uint_t chainlen; 600 uint_t posted_cnt, maxstat; 601 uint_t total_posted; 602 int status; 603 uint32_t nopcode, fence, immed_data = 0; 604 uint32_t prev_nopcode; 605 606 /* initialize the FMA retry loop */ 607 hermon_pio_init(fm_loop_cnt, fm_status, fm_test); 608 609 /* 610 * Check for user-mappable QP memory. Note: We do not allow kernel 611 * clients to post to QP memory that is accessible directly by the 612 * user. If the QP memory is user accessible, then return an error. 613 */ 614 if (qp->qp_is_umap) { 615 return (IBT_QP_HDL_INVALID); 616 } 617 618 mutex_enter(&qp->qp_lock); 619 620 /* 621 * Check QP state. Can not post Send requests from the "Reset", 622 * "Init", or "RTR" states 623 */ 624 if ((qp->qp_state == HERMON_QP_RESET) || 625 (qp->qp_state == HERMON_QP_INIT) || 626 (qp->qp_state == HERMON_QP_RTR)) { 627 mutex_exit(&qp->qp_lock); 628 return (IBT_QP_STATE_INVALID); 629 } 630 mutex_exit(&qp->qp_lock); 631 mutex_enter(&qp->qp_sq_lock); 632 633 if (qp->qp_is_special) 634 goto post_many; 635 636 /* Use these optimized functions most of the time */ 637 if (qp->qp_serv_type == HERMON_QP_UD) 638 return (hermon_post_send_ud(state, qp, wr, num_wr, num_posted)); 639 640 if (qp->qp_serv_type == HERMON_QP_RC) 641 return (hermon_post_send_rc(state, qp, wr, num_wr, num_posted)); 642 643 if (qp->qp_serv_type == HERMON_QP_UC) 644 goto post_many; 645 646 mutex_exit(&qp->qp_sq_lock); 647 return (IBT_QP_SRV_TYPE_INVALID); 648 649 post_many: 650 /* general loop for non-optimized posting */ 651 652 /* Grab the lock for the WRID list */ 653 membar_consumer(); 654 655 /* Save away some initial QP state */ 656 wq = qp->qp_sq_wqhdr; 657 qsize_msk = wq->wq_mask; 658 tail = wq->wq_tail; 659 head = wq->wq_head; 660 hdrmwqes = qp->qp_sq_hdrmwqes; /* in WQEs */ 661 662 /* Initialize posted_cnt */ 663 posted_cnt = 0; 664 total_posted = 0; 665 666 /* 667 * For each ibt_send_wr_t in the wr[] list passed in, parse the 668 * request and build a Send WQE. NOTE: Because we are potentially 669 * building a chain of WQEs to post, we want to build them all first, 670 * and set the valid (HW Ownership) bit on all but the first. 671 * However, we do not want to validate the first one until the 672 * entire chain of WQEs has been built. Then in the final 673 * we set the valid bit in the first, flush if needed, and as a last 674 * step ring the appropriate doorbell. NOTE: the doorbell ring may 675 * NOT be needed if the HCA is already processing, but the doorbell 676 * ring will be done regardless. NOTE ALSO: It is possible for 677 * more Work Requests to be posted than the HW will support at one 678 * shot. If this happens, we need to be able to post and ring 679 * several chains here until the the entire request is complete. 680 * NOTE ALSO: the term "chain" is used to differentiate it from 681 * Work Request List passed in; and because that's the terminology 682 * from the previous generations of HCA - but the WQEs are not, in fact 683 * chained together for Hermon 684 */ 685 686 wrindx = 0; 687 numremain = num_wr; 688 status = DDI_SUCCESS; 689 while ((wrindx < num_wr) && (status == DDI_SUCCESS)) { 690 /* 691 * For the first WQE on a new chain we need "prev" to point 692 * to the current descriptor. 693 */ 694 prev = HERMON_QP_SQ_ENTRY(qp, tail); 695 696 /* 697 * unlike Tavor & Arbel, tail will maintain the number of the 698 * next (this) WQE to be posted. Since there is no backward linking 699 * in Hermon, we can always just look ahead 700 */ 701 /* 702 * Before we begin, save the current "tail index" for later 703 * DMA sync 704 */ 705 /* NOTE: don't need to go back one like arbel/tavor */ 706 sync_from = tail; 707 708 /* 709 * Break the request up into lists that are less than or 710 * equal to the maximum number of WQEs that can be posted 711 * per doorbell ring - 256 currently 712 */ 713 chainlen = (numremain > HERMON_QP_MAXDESC_PER_DB) ? 714 HERMON_QP_MAXDESC_PER_DB : numremain; 715 numremain -= chainlen; 716 717 for (currindx = 0; currindx < chainlen; currindx++, wrindx++) { 718 /* 719 * Check for "queue full" condition. If the queue 720 * is already full, then no more WQEs can be posted. 721 * So break out, ring a doorbell (if necessary) and 722 * return an error 723 */ 724 if (wq->wq_full != 0) { 725 status = IBT_QP_FULL; 726 break; 727 } 728 729 /* 730 * Increment the "tail index". Check for "queue 731 * full" condition incl. headroom. If we detect that 732 * the current work request is going to fill the work 733 * queue, then we mark this condition and continue. 734 * Don't need >=, because going one-by-one we have to 735 * hit it exactly sooner or later 736 */ 737 738 next_tail = (tail + 1) & qsize_msk; 739 if (((tail + hdrmwqes) & qsize_msk) == head) { 740 wq->wq_full = 1; 741 } 742 743 /* 744 * Get the address of the location where the next 745 * Send WQE should be built 746 */ 747 desc = HERMON_QP_SQ_ENTRY(qp, tail); 748 /* 749 * Call hermon_wqe_send_build() to build the WQE 750 * at the given address. This routine uses the 751 * information in the ibt_send_wr_t list (wr[]) and 752 * returns the size of the WQE when it returns. 753 */ 754 status = hermon_wqe_send_build(state, qp, 755 &wr[wrindx], desc, &desc_sz); 756 if (status != DDI_SUCCESS) { 757 break; 758 } 759 760 /* 761 * Now, build the Ctrl Segment based on 762 * what was just done 763 */ 764 curr_wr = &wr[wrindx]; 765 766 switch (curr_wr->wr_opcode) { 767 case IBT_WRC_RDMAW: 768 if (curr_wr->wr_flags & IBT_WR_SEND_IMMED) { 769 nopcode = 770 HERMON_WQE_SEND_NOPCODE_RDMAWI; 771 immed_data = 772 hermon_wr_get_immediate(curr_wr); 773 } else { 774 nopcode = HERMON_WQE_SEND_NOPCODE_RDMAW; 775 } 776 break; 777 778 case IBT_WRC_SEND: 779 if (curr_wr->wr_flags & IBT_WR_SEND_IMMED) { 780 nopcode = HERMON_WQE_SEND_NOPCODE_SENDI; 781 immed_data = 782 hermon_wr_get_immediate(curr_wr); 783 } else { 784 nopcode = HERMON_WQE_SEND_NOPCODE_SEND; 785 } 786 break; 787 788 case IBT_WRC_SEND_LSO: 789 nopcode = HERMON_WQE_SEND_NOPCODE_LSO; 790 break; 791 792 case IBT_WRC_RDMAR: 793 nopcode = HERMON_WQE_SEND_NOPCODE_RDMAR; 794 break; 795 796 case IBT_WRC_CSWAP: 797 nopcode = HERMON_WQE_SEND_NOPCODE_ATMCS; 798 break; 799 800 case IBT_WRC_FADD: 801 nopcode = HERMON_WQE_SEND_NOPCODE_ATMFA; 802 break; 803 804 case IBT_WRC_BIND: 805 nopcode = HERMON_WQE_SEND_NOPCODE_BIND; 806 break; 807 } 808 809 fence = (curr_wr->wr_flags & IBT_WR_SEND_FENCE) ? 1 : 0; 810 811 /* 812 * now, build up the control segment, leaving the 813 * owner bit as it is 814 */ 815 816 if ((qp->qp_sq_sigtype == HERMON_QP_SQ_ALL_SIGNALED) || 817 (curr_wr->wr_flags & IBT_WR_SEND_SIGNAL)) { 818 signaled_dbd = 1; 819 } else { 820 signaled_dbd = 0; 821 } 822 if (curr_wr->wr_flags & IBT_WR_SEND_SOLICIT) 823 solicited = 1; 824 else 825 solicited = 0; 826 827 if (qp->qp_is_special) { 828 ah = (hermon_ahhdl_t) 829 curr_wr->wr.ud.udwr_dest->ud_ah; 830 mutex_enter(&ah->ah_lock); 831 maxstat = ah->ah_udav->max_stat_rate; 832 HERMON_WQE_SET_MLX_CTRL_SEGMENT(desc, desc_sz, 833 signaled_dbd, maxstat, ah->ah_udav->rlid, 834 qp, ah->ah_udav->sl); 835 mutex_exit(&ah->ah_lock); 836 } else { 837 HERMON_WQE_SET_CTRL_SEGMENT(desc, desc_sz, 838 fence, immed_data, solicited, 839 signaled_dbd, curr_wr->wr_flags & 840 IBT_WR_SEND_CKSUM, qp); 841 } 842 wq->wq_wrid[tail] = curr_wr->wr_id; 843 844 /* 845 * If this is not the first descriptor on the current 846 * chain, then set the ownership bit. 847 */ 848 if (currindx != 0) { /* not the first */ 849 membar_producer(); 850 HERMON_SET_SEND_WQE_OWNER(qp, 851 (uint32_t *)desc, nopcode); 852 } else 853 prev_nopcode = nopcode; 854 855 /* 856 * Update the current "tail index" and increment 857 * "posted_cnt" 858 */ 859 tail = next_tail; 860 posted_cnt++; 861 } 862 863 /* 864 * If we reach here and there are one or more WQEs which have 865 * been successfully built as a chain, we have to finish up 866 * and prepare them for writing to the HW 867 * The steps are: 868 * 1. do the headroom fixup 869 * 2. add in the size of the headroom for the sync 870 * 3. write the owner bit for the first WQE 871 * 4. sync them 872 * 5. fix up the structures 873 * 6. hit the doorbell in UAR 874 */ 875 if (posted_cnt != 0) { 876 ddi_acc_handle_t uarhdl = hermon_get_uarhdl(state); 877 878 /* 879 * Save away updated "tail index" for the DMA sync 880 * including the headroom that will be needed 881 */ 882 sync_to = (tail + hdrmwqes) & qsize_msk; 883 884 /* do the invalidate of the headroom */ 885 886 hermon_wqe_headroom(tail, qp); 887 888 /* Do a DMA sync for current send WQE(s) */ 889 hermon_wqe_sync(qp, sync_from, sync_to, HERMON_WR_SEND, 890 DDI_DMA_SYNC_FORDEV); 891 892 /* Update some of the state in the QP */ 893 wq->wq_tail = tail; 894 total_posted += posted_cnt; 895 posted_cnt = 0; 896 897 membar_producer(); 898 899 /* 900 * Now set the ownership bit of the first 901 * one in the chain 902 */ 903 HERMON_SET_SEND_WQE_OWNER(qp, (uint32_t *)prev, 904 prev_nopcode); 905 906 /* the FMA retry loop starts for Hermon doorbell. */ 907 hermon_pio_start(state, uarhdl, pio_error, fm_loop_cnt, 908 fm_status, fm_test); 909 910 HERMON_UAR_DOORBELL(state, uarhdl, 911 (uint64_t *)(void *)&state->hs_uar->send, 912 (uint64_t)qp->qp_ring); 913 914 /* the FMA retry loop ends. */ 915 hermon_pio_end(state, uarhdl, pio_error, fm_loop_cnt, 916 fm_status, fm_test); 917 } 918 } 919 920 /* 921 * Update the "num_posted" return value (if necessary). 922 * Then drop the locks and return success. 923 */ 924 if (num_posted != NULL) { 925 *num_posted = total_posted; 926 } 927 mutex_exit(&qp->qp_sq_lock); 928 return (status); 929 930 pio_error: 931 mutex_exit(&qp->qp_sq_lock); 932 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 933 return (ibc_get_ci_failure(0)); 934 } 935 936 937 /* 938 * hermon_post_recv() 939 * Context: Can be called from interrupt or base context. 940 */ 941 int 942 hermon_post_recv(hermon_state_t *state, hermon_qphdl_t qp, 943 ibt_recv_wr_t *wr, uint_t num_wr, uint_t *num_posted) 944 { 945 uint64_t *desc; 946 hermon_workq_hdr_t *wq; 947 uint32_t head, tail, next_tail, qsize_msk; 948 uint32_t sync_from, sync_to; 949 uint_t wrindx; 950 uint_t posted_cnt; 951 int status; 952 953 /* 954 * Check for user-mappable QP memory. Note: We do not allow kernel 955 * clients to post to QP memory that is accessible directly by the 956 * user. If the QP memory is user accessible, then return an error. 957 */ 958 if (qp->qp_is_umap) { 959 return (IBT_QP_HDL_INVALID); 960 } 961 962 /* Initialize posted_cnt */ 963 posted_cnt = 0; 964 965 mutex_enter(&qp->qp_lock); 966 967 /* 968 * Check if QP is associated with an SRQ 969 */ 970 if (qp->qp_srq_en == HERMON_QP_SRQ_ENABLED) { 971 mutex_exit(&qp->qp_lock); 972 return (IBT_SRQ_IN_USE); 973 } 974 975 /* 976 * Check QP state. Can not post Recv requests from the "Reset" state 977 */ 978 if (qp->qp_state == HERMON_QP_RESET) { 979 mutex_exit(&qp->qp_lock); 980 return (IBT_QP_STATE_INVALID); 981 } 982 983 /* Check that work request transport type is valid */ 984 if ((qp->qp_serv_type != HERMON_QP_UD) && 985 (qp->qp_serv_type != HERMON_QP_RC) && 986 (qp->qp_serv_type != HERMON_QP_UC)) { 987 mutex_exit(&qp->qp_lock); 988 return (IBT_QP_SRV_TYPE_INVALID); 989 } 990 991 mutex_exit(&qp->qp_lock); 992 mutex_enter(&qp->qp_rq_lock); 993 994 /* 995 * Grab the lock for the WRID list, i.e., membar_consumer(). 996 * This is not needed because the mutex_enter() above has 997 * the same effect. 998 */ 999 1000 /* Save away some initial QP state */ 1001 wq = qp->qp_rq_wqhdr; 1002 qsize_msk = wq->wq_mask; 1003 tail = wq->wq_tail; 1004 head = wq->wq_head; 1005 1006 wrindx = 0; 1007 status = DDI_SUCCESS; 1008 /* 1009 * Before we begin, save the current "tail index" for later 1010 * DMA sync 1011 */ 1012 sync_from = tail; 1013 1014 for (wrindx = 0; wrindx < num_wr; wrindx++) { 1015 if (wq->wq_full != 0) { 1016 status = IBT_QP_FULL; 1017 break; 1018 } 1019 next_tail = (tail + 1) & qsize_msk; 1020 if (next_tail == head) { 1021 wq->wq_full = 1; 1022 } 1023 desc = HERMON_QP_RQ_ENTRY(qp, tail); 1024 status = hermon_wqe_recv_build(state, qp, &wr[wrindx], desc); 1025 if (status != DDI_SUCCESS) { 1026 break; 1027 } 1028 1029 wq->wq_wrid[tail] = wr[wrindx].wr_id; 1030 qp->qp_rq_wqecntr++; 1031 1032 tail = next_tail; 1033 posted_cnt++; 1034 } 1035 1036 if (posted_cnt != 0) { 1037 /* Save away updated "tail index" for the DMA sync */ 1038 sync_to = tail; 1039 1040 hermon_wqe_sync(qp, sync_from, sync_to, HERMON_WR_RECV, 1041 DDI_DMA_SYNC_FORDEV); 1042 1043 wq->wq_tail = tail; 1044 1045 membar_producer(); /* ensure wrids are visible */ 1046 1047 /* Update the doorbell record w/ wqecntr */ 1048 HERMON_UAR_DB_RECORD_WRITE(qp->qp_rq_vdbr, 1049 qp->qp_rq_wqecntr & 0xFFFF); 1050 } 1051 1052 if (num_posted != NULL) { 1053 *num_posted = posted_cnt; 1054 } 1055 1056 1057 mutex_exit(&qp->qp_rq_lock); 1058 return (status); 1059 } 1060 1061 /* 1062 * hermon_post_srq() 1063 * Context: Can be called from interrupt or base context. 1064 */ 1065 int 1066 hermon_post_srq(hermon_state_t *state, hermon_srqhdl_t srq, 1067 ibt_recv_wr_t *wr, uint_t num_wr, uint_t *num_posted) 1068 { 1069 uint64_t *desc; 1070 hermon_workq_hdr_t *wq; 1071 uint_t indx, wrindx; 1072 uint_t posted_cnt; 1073 int status; 1074 1075 mutex_enter(&srq->srq_lock); 1076 1077 /* 1078 * Check for user-mappable QP memory. Note: We do not allow kernel 1079 * clients to post to QP memory that is accessible directly by the 1080 * user. If the QP memory is user accessible, then return an error. 1081 */ 1082 if (srq->srq_is_umap) { 1083 mutex_exit(&srq->srq_lock); 1084 return (IBT_SRQ_HDL_INVALID); 1085 } 1086 1087 /* 1088 * Check SRQ state. Can not post Recv requests when SRQ is in error 1089 */ 1090 if (srq->srq_state == HERMON_SRQ_STATE_ERROR) { 1091 mutex_exit(&srq->srq_lock); 1092 return (IBT_QP_STATE_INVALID); 1093 } 1094 1095 status = DDI_SUCCESS; 1096 posted_cnt = 0; 1097 wq = srq->srq_wq_wqhdr; 1098 indx = wq->wq_head; 1099 1100 for (wrindx = 0; wrindx < num_wr; wrindx++) { 1101 1102 if (indx == wq->wq_tail) { 1103 status = IBT_QP_FULL; 1104 break; 1105 } 1106 desc = HERMON_SRQ_WQE_ADDR(srq, indx); 1107 1108 wq->wq_wrid[indx] = wr[wrindx].wr_id; 1109 1110 status = hermon_wqe_srq_build(state, srq, &wr[wrindx], desc); 1111 if (status != DDI_SUCCESS) { 1112 break; 1113 } 1114 1115 hermon_wqe_sync(srq, indx, indx + 1, 1116 HERMON_WR_SRQ, DDI_DMA_SYNC_FORDEV); 1117 posted_cnt++; 1118 indx = htons(((uint16_t *)desc)[1]); 1119 wq->wq_head = indx; 1120 } 1121 1122 if (posted_cnt != 0) { 1123 1124 srq->srq_wq_wqecntr += posted_cnt; 1125 1126 membar_producer(); /* ensure wrids are visible */ 1127 1128 /* Ring the doorbell w/ wqecntr */ 1129 HERMON_UAR_DB_RECORD_WRITE(srq->srq_wq_vdbr, 1130 srq->srq_wq_wqecntr & 0xFFFF); 1131 } 1132 1133 if (num_posted != NULL) { 1134 *num_posted = posted_cnt; 1135 } 1136 1137 mutex_exit(&srq->srq_lock); 1138 return (status); 1139 } 1140 1141 1142 /* 1143 * hermon_wqe_send_build() 1144 * Context: Can be called from interrupt or base context. 1145 */ 1146 static int 1147 hermon_wqe_send_build(hermon_state_t *state, hermon_qphdl_t qp, 1148 ibt_send_wr_t *wr, uint64_t *desc, uint_t *size) 1149 { 1150 hermon_hw_snd_wqe_ud_t *ud; 1151 hermon_hw_snd_wqe_remaddr_t *rc; 1152 hermon_hw_snd_wqe_atomic_t *at; 1153 hermon_hw_snd_wqe_remaddr_t *uc; 1154 hermon_hw_snd_wqe_bind_t *bn; 1155 hermon_hw_wqe_sgl_t *ds, *old_ds; 1156 ibt_ud_dest_t *dest; 1157 ibt_wr_ds_t *sgl; 1158 hermon_ahhdl_t ah; 1159 uint32_t nds; 1160 int i, j, last_ds, num_ds, status; 1161 int tmpsize; 1162 1163 ASSERT(MUTEX_HELD(&qp->qp_sq_lock)); 1164 1165 /* Initialize the information for the Data Segments */ 1166 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)desc + 1167 sizeof (hermon_hw_snd_wqe_ctrl_t)); 1168 nds = wr->wr_nds; 1169 sgl = wr->wr_sgl; 1170 num_ds = 0; 1171 i = 0; 1172 1173 /* 1174 * Build a Send WQE depends first and foremost on the transport 1175 * type of Work Request (i.e. UD, RC, or UC) 1176 */ 1177 switch (wr->wr_trans) { 1178 case IBT_UD_SRV: 1179 /* Ensure that work request transport type matches QP type */ 1180 if (qp->qp_serv_type != HERMON_QP_UD) { 1181 return (IBT_QP_SRV_TYPE_INVALID); 1182 } 1183 1184 /* 1185 * Validate the operation type. For UD requests, only the 1186 * "Send" and "Send LSO" operations are valid. 1187 */ 1188 if (wr->wr_opcode != IBT_WRC_SEND && 1189 wr->wr_opcode != IBT_WRC_SEND_LSO) { 1190 return (IBT_QP_OP_TYPE_INVALID); 1191 } 1192 1193 /* 1194 * If this is a Special QP (QP0 or QP1), then we need to 1195 * build MLX WQEs instead. So jump to hermon_wqe_mlx_build() 1196 * and return whatever status it returns 1197 */ 1198 if (qp->qp_is_special) { 1199 if (wr->wr_opcode == IBT_WRC_SEND_LSO) { 1200 return (IBT_QP_OP_TYPE_INVALID); 1201 } 1202 status = hermon_wqe_mlx_build(state, qp, 1203 wr, desc, size); 1204 return (status); 1205 } 1206 1207 /* 1208 * Otherwise, if this is a normal UD Send request, then fill 1209 * all the fields in the Hermon UD header for the WQE. Note: 1210 * to do this we'll need to extract some information from the 1211 * Address Handle passed with the work request. 1212 */ 1213 ud = (hermon_hw_snd_wqe_ud_t *)((uintptr_t)desc + 1214 sizeof (hermon_hw_snd_wqe_ctrl_t)); 1215 if (wr->wr_opcode == IBT_WRC_SEND) { 1216 dest = wr->wr.ud.udwr_dest; 1217 } else { 1218 dest = wr->wr.ud_lso.lso_ud_dest; 1219 } 1220 ah = (hermon_ahhdl_t)dest->ud_ah; 1221 if (ah == NULL) { 1222 return (IBT_AH_HDL_INVALID); 1223 } 1224 1225 /* 1226 * Build the Unreliable Datagram Segment for the WQE, using 1227 * the information from the address handle and the work 1228 * request. 1229 */ 1230 /* mutex_enter(&ah->ah_lock); */ 1231 if (wr->wr_opcode == IBT_WRC_SEND) { 1232 HERMON_WQE_BUILD_UD(qp, ud, ah, wr->wr.ud.udwr_dest); 1233 } else { /* IBT_WRC_SEND_LSO */ 1234 HERMON_WQE_BUILD_UD(qp, ud, ah, 1235 wr->wr.ud_lso.lso_ud_dest); 1236 } 1237 /* mutex_exit(&ah->ah_lock); */ 1238 1239 /* Update "ds" for filling in Data Segments (below) */ 1240 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)ud + 1241 sizeof (hermon_hw_snd_wqe_ud_t)); 1242 1243 if (wr->wr_opcode == IBT_WRC_SEND_LSO) { 1244 int total_len; 1245 1246 total_len = (4 + 0xf + wr->wr.ud_lso.lso_hdr_sz) & ~0xf; 1247 if ((uintptr_t)ds + total_len + (nds * 16) > 1248 (uintptr_t)desc + (1 << qp->qp_sq_log_wqesz)) 1249 return (IBT_QP_SGL_LEN_INVALID); 1250 1251 bcopy(wr->wr.ud_lso.lso_hdr, (uint32_t *)ds + 1, 1252 wr->wr.ud_lso.lso_hdr_sz); 1253 old_ds = ds; 1254 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)ds + total_len); 1255 for (; i < nds; i++) { 1256 if (sgl[i].ds_len == 0) 1257 continue; 1258 HERMON_WQE_BUILD_DATA_SEG_SEND(&ds[num_ds], 1259 &sgl[i]); 1260 num_ds++; 1261 i++; 1262 break; 1263 } 1264 membar_producer(); 1265 HERMON_WQE_BUILD_LSO(qp, old_ds, wr->wr.ud_lso.lso_mss, 1266 wr->wr.ud_lso.lso_hdr_sz); 1267 } 1268 1269 break; 1270 1271 case IBT_RC_SRV: 1272 /* Ensure that work request transport type matches QP type */ 1273 if (qp->qp_serv_type != HERMON_QP_RC) { 1274 return (IBT_QP_SRV_TYPE_INVALID); 1275 } 1276 1277 /* 1278 * Validate the operation type. For RC requests, we allow 1279 * "Send", "RDMA Read", "RDMA Write", various "Atomic" 1280 * operations, and memory window "Bind" 1281 */ 1282 if ((wr->wr_opcode != IBT_WRC_SEND) && 1283 (wr->wr_opcode != IBT_WRC_RDMAR) && 1284 (wr->wr_opcode != IBT_WRC_RDMAW) && 1285 (wr->wr_opcode != IBT_WRC_CSWAP) && 1286 (wr->wr_opcode != IBT_WRC_FADD) && 1287 (wr->wr_opcode != IBT_WRC_BIND)) { 1288 return (IBT_QP_OP_TYPE_INVALID); 1289 } 1290 1291 /* 1292 * If this is a Send request, then all we need to do is break 1293 * out and here and begin the Data Segment processing below 1294 */ 1295 if (wr->wr_opcode == IBT_WRC_SEND) { 1296 break; 1297 } 1298 1299 /* 1300 * If this is an RDMA Read or RDMA Write request, then fill 1301 * in the "Remote Address" header fields. 1302 */ 1303 if ((wr->wr_opcode == IBT_WRC_RDMAR) || 1304 (wr->wr_opcode == IBT_WRC_RDMAW)) { 1305 rc = (hermon_hw_snd_wqe_remaddr_t *)((uintptr_t)desc + 1306 sizeof (hermon_hw_snd_wqe_ctrl_t)); 1307 1308 /* 1309 * Build the Remote Address Segment for the WQE, using 1310 * the information from the RC work request. 1311 */ 1312 HERMON_WQE_BUILD_REMADDR(qp, rc, &wr->wr.rc.rcwr.rdma); 1313 1314 /* Update "ds" for filling in Data Segments (below) */ 1315 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)rc + 1316 sizeof (hermon_hw_snd_wqe_remaddr_t)); 1317 break; 1318 } 1319 1320 /* 1321 * If this is one of the Atomic type operations (i.e 1322 * Compare-Swap or Fetch-Add), then fill in both the "Remote 1323 * Address" header fields and the "Atomic" header fields. 1324 */ 1325 if ((wr->wr_opcode == IBT_WRC_CSWAP) || 1326 (wr->wr_opcode == IBT_WRC_FADD)) { 1327 rc = (hermon_hw_snd_wqe_remaddr_t *)((uintptr_t)desc + 1328 sizeof (hermon_hw_snd_wqe_ctrl_t)); 1329 at = (hermon_hw_snd_wqe_atomic_t *)((uintptr_t)rc + 1330 sizeof (hermon_hw_snd_wqe_remaddr_t)); 1331 1332 /* 1333 * Build the Remote Address and Atomic Segments for 1334 * the WQE, using the information from the RC Atomic 1335 * work request. 1336 */ 1337 HERMON_WQE_BUILD_RC_ATOMIC_REMADDR(qp, rc, wr); 1338 HERMON_WQE_BUILD_ATOMIC(qp, at, wr->wr.rc.rcwr.atomic); 1339 1340 /* Update "ds" for filling in Data Segments (below) */ 1341 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)at + 1342 sizeof (hermon_hw_snd_wqe_atomic_t)); 1343 1344 /* 1345 * Update "nds" and "sgl" because Atomic requests have 1346 * only a single Data Segment (and they are encoded 1347 * somewhat differently in the work request. 1348 */ 1349 nds = 1; 1350 sgl = wr->wr_sgl; 1351 break; 1352 } 1353 1354 /* 1355 * If this is memory window Bind operation, then we call the 1356 * hermon_wr_bind_check() routine to validate the request and 1357 * to generate the updated RKey. If this is successful, then 1358 * we fill in the WQE's "Bind" header fields. 1359 */ 1360 if (wr->wr_opcode == IBT_WRC_BIND) { 1361 status = hermon_wr_bind_check(state, wr); 1362 if (status != DDI_SUCCESS) { 1363 return (status); 1364 } 1365 1366 bn = (hermon_hw_snd_wqe_bind_t *)((uintptr_t)desc + 1367 sizeof (hermon_hw_snd_wqe_ctrl_t)); 1368 1369 /* 1370 * Build the Bind Memory Window Segments for the WQE, 1371 * using the information from the RC Bind memory 1372 * window work request. 1373 */ 1374 HERMON_WQE_BUILD_BIND(qp, bn, wr->wr.rc.rcwr.bind); 1375 1376 /* 1377 * Update the "ds" pointer. Even though the "bind" 1378 * operation requires no SGLs, this is necessary to 1379 * facilitate the correct descriptor size calculations 1380 * (below). 1381 */ 1382 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)bn + 1383 sizeof (hermon_hw_snd_wqe_bind_t)); 1384 nds = 0; 1385 } 1386 break; 1387 1388 case IBT_UC_SRV: 1389 /* Ensure that work request transport type matches QP type */ 1390 if (qp->qp_serv_type != HERMON_QP_UC) { 1391 return (IBT_QP_SRV_TYPE_INVALID); 1392 } 1393 1394 /* 1395 * Validate the operation type. For UC requests, we only 1396 * allow "Send", "RDMA Write", and memory window "Bind". 1397 * Note: Unlike RC, UC does not allow "RDMA Read" or "Atomic" 1398 * operations 1399 */ 1400 if ((wr->wr_opcode != IBT_WRC_SEND) && 1401 (wr->wr_opcode != IBT_WRC_RDMAW) && 1402 (wr->wr_opcode != IBT_WRC_BIND)) { 1403 return (IBT_QP_OP_TYPE_INVALID); 1404 } 1405 1406 /* 1407 * If this is a Send request, then all we need to do is break 1408 * out and here and begin the Data Segment processing below 1409 */ 1410 if (wr->wr_opcode == IBT_WRC_SEND) { 1411 break; 1412 } 1413 1414 /* 1415 * If this is an RDMA Write request, then fill in the "Remote 1416 * Address" header fields. 1417 */ 1418 if (wr->wr_opcode == IBT_WRC_RDMAW) { 1419 uc = (hermon_hw_snd_wqe_remaddr_t *)((uintptr_t)desc + 1420 sizeof (hermon_hw_snd_wqe_ctrl_t)); 1421 1422 /* 1423 * Build the Remote Address Segment for the WQE, using 1424 * the information from the UC work request. 1425 */ 1426 HERMON_WQE_BUILD_REMADDR(qp, uc, &wr->wr.uc.ucwr.rdma); 1427 1428 /* Update "ds" for filling in Data Segments (below) */ 1429 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)uc + 1430 sizeof (hermon_hw_snd_wqe_remaddr_t)); 1431 break; 1432 } 1433 1434 /* 1435 * If this is memory window Bind operation, then we call the 1436 * hermon_wr_bind_check() routine to validate the request and 1437 * to generate the updated RKey. If this is successful, then 1438 * we fill in the WQE's "Bind" header fields. 1439 */ 1440 if (wr->wr_opcode == IBT_WRC_BIND) { 1441 status = hermon_wr_bind_check(state, wr); 1442 if (status != DDI_SUCCESS) { 1443 return (status); 1444 } 1445 1446 bn = (hermon_hw_snd_wqe_bind_t *)((uintptr_t)desc + 1447 sizeof (hermon_hw_snd_wqe_ctrl_t)); 1448 1449 /* 1450 * Build the Bind Memory Window Segments for the WQE, 1451 * using the information from the UC Bind memory 1452 * window work request. 1453 */ 1454 HERMON_WQE_BUILD_BIND(qp, bn, wr->wr.uc.ucwr.bind); 1455 1456 /* 1457 * Update the "ds" pointer. Even though the "bind" 1458 * operation requires no SGLs, this is necessary to 1459 * facilitate the correct descriptor size calculations 1460 * (below). 1461 */ 1462 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)bn + 1463 sizeof (hermon_hw_snd_wqe_bind_t)); 1464 nds = 0; 1465 } 1466 break; 1467 1468 default: 1469 return (IBT_QP_SRV_TYPE_INVALID); 1470 } 1471 1472 /* 1473 * Now fill in the Data Segments (SGL) for the Send WQE based on 1474 * the values setup above (i.e. "sgl", "nds", and the "ds" pointer 1475 * Start by checking for a valid number of SGL entries 1476 */ 1477 if (nds > qp->qp_sq_sgl) { 1478 return (IBT_QP_SGL_LEN_INVALID); 1479 } 1480 1481 /* 1482 * For each SGL in the Send Work Request, fill in the Send WQE's data 1483 * segments. Note: We skip any SGL with zero size because Hermon 1484 * hardware cannot handle a zero for "byte_cnt" in the WQE. Actually 1485 * the encoding for zero means a 2GB transfer. 1486 */ 1487 for (last_ds = num_ds, j = i; j < nds; j++) { 1488 if (sgl[j].ds_len != 0) 1489 last_ds++; /* real last ds of wqe to fill */ 1490 } 1491 1492 /* 1493 * Return the size of descriptor (in 16-byte chunks) 1494 * For Hermon, we want them (for now) to be on stride size 1495 * boundaries, which was implicit in Tavor/Arbel 1496 * 1497 */ 1498 tmpsize = ((uintptr_t)&ds[last_ds] - (uintptr_t)desc); 1499 1500 *size = tmpsize >> 0x4; 1501 1502 for (j = nds; --j >= i; ) { 1503 if (sgl[j].ds_len == 0) { 1504 continue; 1505 } 1506 1507 /* 1508 * Fill in the Data Segment(s) for the current WQE, using the 1509 * information contained in the scatter-gather list of the 1510 * work request. 1511 */ 1512 last_ds--; 1513 HERMON_WQE_BUILD_DATA_SEG_SEND(&ds[last_ds], &sgl[j]); 1514 } 1515 1516 return (DDI_SUCCESS); 1517 } 1518 1519 1520 1521 /* 1522 * hermon_wqe_mlx_build() 1523 * Context: Can be called from interrupt or base context. 1524 */ 1525 static int 1526 hermon_wqe_mlx_build(hermon_state_t *state, hermon_qphdl_t qp, 1527 ibt_send_wr_t *wr, uint64_t *desc, uint_t *size) 1528 { 1529 hermon_ahhdl_t ah; 1530 hermon_hw_udav_t *udav; 1531 ib_lrh_hdr_t *lrh; 1532 ib_grh_t *grh; 1533 ib_bth_hdr_t *bth; 1534 ib_deth_hdr_t *deth; 1535 hermon_hw_wqe_sgl_t *ds; 1536 ibt_wr_ds_t *sgl; 1537 uint8_t *mgmtclass, *hpoint, *hcount; 1538 uint32_t nds, offset, pktlen; 1539 uint32_t desc_sz; 1540 int i, num_ds; 1541 int tmpsize; 1542 1543 ASSERT(MUTEX_HELD(&qp->qp_sq_lock)); 1544 1545 /* Initialize the information for the Data Segments */ 1546 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)desc + 1547 sizeof (hermon_hw_mlx_wqe_nextctrl_t)); 1548 1549 /* 1550 * Pull the address handle from the work request. The UDAV will 1551 * be used to answer some questions about the request. 1552 */ 1553 ah = (hermon_ahhdl_t)wr->wr.ud.udwr_dest->ud_ah; 1554 if (ah == NULL) { 1555 return (IBT_AH_HDL_INVALID); 1556 } 1557 mutex_enter(&ah->ah_lock); 1558 udav = ah->ah_udav; 1559 1560 /* 1561 * If the request is for QP1 and the destination LID is equal to 1562 * the Permissive LID, then return an error. This combination is 1563 * not allowed 1564 */ 1565 if ((udav->rlid == IB_LID_PERMISSIVE) && 1566 (qp->qp_is_special == HERMON_QP_GSI)) { 1567 mutex_exit(&ah->ah_lock); 1568 return (IBT_AH_HDL_INVALID); 1569 } 1570 1571 /* 1572 * Calculate the size of the packet headers, including the GRH 1573 * (if necessary) 1574 */ 1575 desc_sz = sizeof (ib_lrh_hdr_t) + sizeof (ib_bth_hdr_t) + 1576 sizeof (ib_deth_hdr_t); 1577 if (udav->grh) { 1578 desc_sz += sizeof (ib_grh_t); 1579 } 1580 1581 /* 1582 * Begin to build the first "inline" data segment for the packet 1583 * headers. Note: By specifying "inline" we can build the contents 1584 * of the MAD packet headers directly into the work queue (as part 1585 * descriptor). This has the advantage of both speeding things up 1586 * and of not requiring the driver to allocate/register any additional 1587 * memory for the packet headers. 1588 */ 1589 HERMON_WQE_BUILD_INLINE(qp, &ds[0], desc_sz); 1590 desc_sz += 4; 1591 1592 /* 1593 * Build Local Route Header (LRH) 1594 * We start here by building the LRH into a temporary location. 1595 * When we have finished we copy the LRH data into the descriptor. 1596 * 1597 * Notice that the VL values are hardcoded. This is not a problem 1598 * because VL15 is decided later based on the value in the MLX 1599 * transport "next/ctrl" header (see the "vl15" bit below), and it 1600 * is otherwise (meaning for QP1) chosen from the SL-to-VL table 1601 * values. This rule does not hold for loopback packets however 1602 * (all of which bypass the SL-to-VL tables) and it is the reason 1603 * that non-QP0 MADs are setup with VL hardcoded to zero below. 1604 * 1605 * Notice also that Source LID is hardcoded to the Permissive LID 1606 * (0xFFFF). This is also not a problem because if the Destination 1607 * LID is not the Permissive LID, then the "slr" value in the MLX 1608 * transport "next/ctrl" header will be set to zero and the hardware 1609 * will pull the LID from value in the port. 1610 */ 1611 lrh = (ib_lrh_hdr_t *)((uintptr_t)&ds[0] + 4); 1612 pktlen = (desc_sz + 0x100) >> 2; 1613 HERMON_WQE_BUILD_MLX_LRH(lrh, qp, udav, pktlen); 1614 1615 /* 1616 * Build Global Route Header (GRH) 1617 * This is only built if necessary as defined by the "grh" bit in 1618 * the address vector. Note: We also calculate the offset to the 1619 * next header (BTH) based on whether or not the "grh" bit is set. 1620 */ 1621 if (udav->grh) { 1622 /* 1623 * If the request is for QP0, then return an error. The 1624 * combination of global routine (GRH) and QP0 is not allowed. 1625 */ 1626 if (qp->qp_is_special == HERMON_QP_SMI) { 1627 mutex_exit(&ah->ah_lock); 1628 return (IBT_AH_HDL_INVALID); 1629 } 1630 grh = (ib_grh_t *)((uintptr_t)lrh + sizeof (ib_lrh_hdr_t)); 1631 HERMON_WQE_BUILD_MLX_GRH(state, grh, qp, udav, pktlen); 1632 1633 bth = (ib_bth_hdr_t *)((uintptr_t)grh + sizeof (ib_grh_t)); 1634 } else { 1635 bth = (ib_bth_hdr_t *)((uintptr_t)lrh + sizeof (ib_lrh_hdr_t)); 1636 } 1637 mutex_exit(&ah->ah_lock); 1638 1639 1640 /* 1641 * Build Base Transport Header (BTH) 1642 * Notice that the M, PadCnt, and TVer fields are all set 1643 * to zero implicitly. This is true for all Management Datagrams 1644 * MADs whether GSI are SMI. 1645 */ 1646 HERMON_WQE_BUILD_MLX_BTH(state, bth, qp, wr); 1647 1648 /* 1649 * Build Datagram Extended Transport Header (DETH) 1650 */ 1651 deth = (ib_deth_hdr_t *)((uintptr_t)bth + sizeof (ib_bth_hdr_t)); 1652 HERMON_WQE_BUILD_MLX_DETH(deth, qp); 1653 1654 /* Ensure that the Data Segment is aligned on a 16-byte boundary */ 1655 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)deth + sizeof (ib_deth_hdr_t)); 1656 ds = (hermon_hw_wqe_sgl_t *)(((uintptr_t)ds + 0xF) & ~0xF); 1657 nds = wr->wr_nds; 1658 sgl = wr->wr_sgl; 1659 num_ds = 0; 1660 1661 /* 1662 * Now fill in the Data Segments (SGL) for the MLX WQE based on the 1663 * values set up above (i.e. "sgl", "nds", and the "ds" pointer 1664 * Start by checking for a valid number of SGL entries 1665 */ 1666 if (nds > qp->qp_sq_sgl) { 1667 return (IBT_QP_SGL_LEN_INVALID); 1668 } 1669 1670 /* 1671 * For each SGL in the Send Work Request, fill in the MLX WQE's data 1672 * segments. Note: We skip any SGL with zero size because Hermon 1673 * hardware cannot handle a zero for "byte_cnt" in the WQE. Actually 1674 * the encoding for zero means a 2GB transfer. Because of this special 1675 * encoding in the hardware, we mask the requested length with 1676 * HERMON_WQE_SGL_BYTE_CNT_MASK (so that 2GB will end up encoded as 1677 * zero.) 1678 */ 1679 mgmtclass = hpoint = hcount = NULL; 1680 offset = 0; 1681 for (i = 0; i < nds; i++) { 1682 if (sgl[i].ds_len == 0) { 1683 continue; 1684 } 1685 1686 /* 1687 * Fill in the Data Segment(s) for the MLX send WQE, using 1688 * the information contained in the scatter-gather list of 1689 * the work request. 1690 */ 1691 HERMON_WQE_BUILD_DATA_SEG_SEND(&ds[num_ds], &sgl[i]); 1692 1693 /* 1694 * Search through the contents of all MADs posted to QP0 to 1695 * initialize pointers to the places where Directed Route "hop 1696 * pointer", "hop count", and "mgmtclass" would be. Hermon 1697 * needs these updated (i.e. incremented or decremented, as 1698 * necessary) by software. 1699 */ 1700 if (qp->qp_is_special == HERMON_QP_SMI) { 1701 1702 HERMON_SPECIAL_QP_DRMAD_GET_MGMTCLASS(mgmtclass, 1703 offset, sgl[i].ds_va, sgl[i].ds_len); 1704 1705 HERMON_SPECIAL_QP_DRMAD_GET_HOPPOINTER(hpoint, 1706 offset, sgl[i].ds_va, sgl[i].ds_len); 1707 1708 HERMON_SPECIAL_QP_DRMAD_GET_HOPCOUNT(hcount, 1709 offset, sgl[i].ds_va, sgl[i].ds_len); 1710 1711 offset += sgl[i].ds_len; 1712 } 1713 num_ds++; 1714 } 1715 1716 /* 1717 * Hermon's Directed Route MADs need to have the "hop pointer" 1718 * incremented/decremented (as necessary) depending on whether it is 1719 * currently less than or greater than the "hop count" (i.e. whether 1720 * the MAD is a request or a response.) 1721 */ 1722 if (qp->qp_is_special == HERMON_QP_SMI) { 1723 HERMON_SPECIAL_QP_DRMAD_DO_HOPPOINTER_MODIFY(*mgmtclass, 1724 *hpoint, *hcount); 1725 } 1726 1727 /* 1728 * Now fill in the ICRC Data Segment. This data segment is inlined 1729 * just like the packets headers above, but it is only four bytes and 1730 * set to zero (to indicate that we wish the hardware to generate ICRC. 1731 */ 1732 HERMON_WQE_BUILD_INLINE_ICRC(qp, &ds[num_ds], 4, 0); 1733 num_ds++; 1734 1735 /* 1736 * Return the size of descriptor (in 16-byte chunks) 1737 * For Hermon, we want them (for now) to be on stride size 1738 * boundaries, which was implicit in Tavor/Arbel 1739 */ 1740 tmpsize = ((uintptr_t)&ds[num_ds] - (uintptr_t)desc); 1741 1742 *size = tmpsize >> 0x04; 1743 1744 return (DDI_SUCCESS); 1745 } 1746 1747 1748 1749 /* 1750 * hermon_wqe_recv_build() 1751 * Context: Can be called from interrupt or base context. 1752 */ 1753 /* ARGSUSED */ 1754 static int 1755 hermon_wqe_recv_build(hermon_state_t *state, hermon_qphdl_t qp, 1756 ibt_recv_wr_t *wr, uint64_t *desc) 1757 { 1758 hermon_hw_wqe_sgl_t *ds; 1759 int i, num_ds; 1760 1761 ASSERT(MUTEX_HELD(&qp->qp_rq_lock)); 1762 1763 /* 1764 * Fill in the Data Segments (SGL) for the Recv WQE - don't 1765 * need to have a reserved for the ctrl, there is none on the 1766 * recv queue for hermon, but will need to put an invalid 1767 * (null) scatter pointer per PRM 1768 */ 1769 ds = (hermon_hw_wqe_sgl_t *)(uintptr_t)desc; 1770 num_ds = 0; 1771 1772 /* Check for valid number of SGL entries */ 1773 if (wr->wr_nds > qp->qp_rq_sgl) { 1774 return (IBT_QP_SGL_LEN_INVALID); 1775 } 1776 1777 /* 1778 * For each SGL in the Recv Work Request, fill in the Recv WQE's data 1779 * segments. Note: We skip any SGL with zero size because Hermon 1780 * hardware cannot handle a zero for "byte_cnt" in the WQE. Actually 1781 * the encoding for zero means a 2GB transfer. Because of this special 1782 * encoding in the hardware, we mask the requested length with 1783 * HERMON_WQE_SGL_BYTE_CNT_MASK (so that 2GB will end up encoded as 1784 * zero.) 1785 */ 1786 for (i = 0; i < wr->wr_nds; i++) { 1787 if (wr->wr_sgl[i].ds_len == 0) { 1788 continue; 1789 } 1790 1791 /* 1792 * Fill in the Data Segment(s) for the receive WQE, using the 1793 * information contained in the scatter-gather list of the 1794 * work request. 1795 */ 1796 HERMON_WQE_BUILD_DATA_SEG_RECV(&ds[num_ds], &wr->wr_sgl[i]); 1797 num_ds++; 1798 } 1799 1800 /* put the null sgl pointer as well if needed */ 1801 if (num_ds < qp->qp_rq_sgl) { 1802 HERMON_WQE_BUILD_DATA_SEG_RECV(&ds[num_ds], &null_sgl); 1803 } 1804 1805 return (DDI_SUCCESS); 1806 } 1807 1808 1809 1810 /* 1811 * hermon_wqe_srq_build() 1812 * Context: Can be called from interrupt or base context. 1813 */ 1814 /* ARGSUSED */ 1815 static int 1816 hermon_wqe_srq_build(hermon_state_t *state, hermon_srqhdl_t srq, 1817 ibt_recv_wr_t *wr, uint64_t *desc) 1818 { 1819 hermon_hw_wqe_sgl_t *ds; 1820 int i, num_ds; 1821 1822 ASSERT(MUTEX_HELD(&srq->srq_lock)); 1823 1824 /* Fill in the Data Segments (SGL) for the Recv WQE */ 1825 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)desc + 1826 sizeof (hermon_hw_srq_wqe_next_t)); 1827 num_ds = 0; 1828 1829 /* Check for valid number of SGL entries */ 1830 if (wr->wr_nds > srq->srq_wq_sgl) { 1831 return (IBT_QP_SGL_LEN_INVALID); 1832 } 1833 1834 /* 1835 * For each SGL in the Recv Work Request, fill in the Recv WQE's data 1836 * segments. Note: We skip any SGL with zero size because Hermon 1837 * hardware cannot handle a zero for "byte_cnt" in the WQE. Actually 1838 * the encoding for zero means a 2GB transfer. Because of this special 1839 * encoding in the hardware, we mask the requested length with 1840 * HERMON_WQE_SGL_BYTE_CNT_MASK (so that 2GB will end up encoded as 1841 * zero.) 1842 */ 1843 for (i = 0; i < wr->wr_nds; i++) { 1844 if (wr->wr_sgl[i].ds_len == 0) { 1845 continue; 1846 } 1847 1848 /* 1849 * Fill in the Data Segment(s) for the receive WQE, using the 1850 * information contained in the scatter-gather list of the 1851 * work request. 1852 */ 1853 HERMON_WQE_BUILD_DATA_SEG_RECV(&ds[num_ds], &wr->wr_sgl[i]); 1854 num_ds++; 1855 } 1856 1857 /* 1858 * put in the null sgl pointer as well, if needed 1859 */ 1860 if (num_ds < srq->srq_wq_sgl) { 1861 HERMON_WQE_BUILD_DATA_SEG_RECV(&ds[num_ds], &null_sgl); 1862 } 1863 1864 return (DDI_SUCCESS); 1865 } 1866 1867 1868 /* 1869 * hermon_wr_get_immediate() 1870 * Context: Can be called from interrupt or base context. 1871 */ 1872 static uint32_t 1873 hermon_wr_get_immediate(ibt_send_wr_t *wr) 1874 { 1875 /* 1876 * This routine extracts the "immediate data" from the appropriate 1877 * location in the IBTF work request. Because of the way the 1878 * work request structure is defined, the location for this data 1879 * depends on the actual work request operation type. 1880 */ 1881 1882 /* For RDMA Write, test if RC or UC */ 1883 if (wr->wr_opcode == IBT_WRC_RDMAW) { 1884 if (wr->wr_trans == IBT_RC_SRV) { 1885 return (wr->wr.rc.rcwr.rdma.rdma_immed); 1886 } else { /* IBT_UC_SRV */ 1887 return (wr->wr.uc.ucwr.rdma.rdma_immed); 1888 } 1889 } 1890 1891 /* For Send, test if RC, UD, or UC */ 1892 if (wr->wr_opcode == IBT_WRC_SEND) { 1893 if (wr->wr_trans == IBT_RC_SRV) { 1894 return (wr->wr.rc.rcwr.send_immed); 1895 } else if (wr->wr_trans == IBT_UD_SRV) { 1896 return (wr->wr.ud.udwr_immed); 1897 } else { /* IBT_UC_SRV */ 1898 return (wr->wr.uc.ucwr.send_immed); 1899 } 1900 } 1901 1902 /* 1903 * If any other type of request, then immediate is undefined 1904 */ 1905 return (0); 1906 } 1907 1908 /* 1909 * hermon_wqe_headroom() 1910 * Context: can be called from interrupt or base, currently only from 1911 * base context. 1912 * Routine that fills in the headroom for the Send Queue 1913 */ 1914 1915 static void 1916 hermon_wqe_headroom(uint_t from, hermon_qphdl_t qp) 1917 { 1918 uint32_t *wqe_start, *wqe_top, *wqe_base, qsize; 1919 int hdrmwqes, wqesizebytes, sectperwqe; 1920 uint32_t invalue; 1921 int i, j; 1922 1923 qsize = qp->qp_sq_bufsz; 1924 wqesizebytes = 1 << qp->qp_sq_log_wqesz; 1925 sectperwqe = wqesizebytes >> 6; /* 64 bytes/section */ 1926 hdrmwqes = qp->qp_sq_hdrmwqes; 1927 wqe_base = (uint32_t *)HERMON_QP_SQ_ENTRY(qp, 0); 1928 wqe_top = (uint32_t *)HERMON_QP_SQ_ENTRY(qp, qsize); 1929 wqe_start = (uint32_t *)HERMON_QP_SQ_ENTRY(qp, from); 1930 1931 for (i = 0; i < hdrmwqes; i++) { 1932 for (j = 0; j < sectperwqe; j++) { 1933 if (j == 0) { /* 1st section of wqe */ 1934 /* perserve ownership bit */ 1935 invalue = ddi_get32(qp->qp_wqinfo.qa_acchdl, 1936 wqe_start) | 0x7FFFFFFF; 1937 } else { 1938 /* or just invalidate it */ 1939 invalue = 0xFFFFFFFF; 1940 } 1941 ddi_put32(qp->qp_wqinfo.qa_acchdl, wqe_start, invalue); 1942 wqe_start += 16; /* move 64 bytes */ 1943 } 1944 if (wqe_start == wqe_top) /* hit the end of the queue */ 1945 wqe_start = wqe_base; /* wrap to start */ 1946 } 1947 } 1948 1949 /* 1950 * hermon_wqe_sync() 1951 * Context: Can be called from interrupt or base context. 1952 */ 1953 static void 1954 hermon_wqe_sync(void *hdl, uint_t sync_from, uint_t sync_to, 1955 uint_t sync_type, uint_t flag) 1956 { 1957 hermon_qphdl_t qp; 1958 hermon_srqhdl_t srq; 1959 uint64_t *wqe_from, *wqe_to; 1960 uint64_t *wq_base, *wq_top, *qp_base; 1961 ddi_dma_handle_t dmahdl; 1962 off_t offset; 1963 size_t length; 1964 uint32_t qsize; 1965 int status; 1966 1967 if (sync_type == HERMON_WR_SRQ) { 1968 srq = (hermon_srqhdl_t)hdl; 1969 /* Get the DMA handle from SRQ context */ 1970 dmahdl = srq->srq_mrhdl->mr_bindinfo.bi_dmahdl; 1971 /* get base addr of the buffer */ 1972 qp_base = (uint64_t *)(void *)srq->srq_wq_buf; 1973 } else { 1974 qp = (hermon_qphdl_t)hdl; 1975 /* Get the DMA handle from QP context */ 1976 dmahdl = qp->qp_mrhdl->mr_bindinfo.bi_dmahdl; 1977 /* Determine the base address of the QP buffer */ 1978 if (qp->qp_sq_baseaddr == 0) { 1979 qp_base = (uint64_t *)(void *)(qp->qp_sq_buf); 1980 } else { 1981 qp_base = (uint64_t *)(void *)(qp->qp_rq_buf); 1982 } 1983 } 1984 1985 /* 1986 * Depending on the type of the work queue, we grab information 1987 * about the address ranges we need to DMA sync. 1988 */ 1989 1990 if (sync_type == HERMON_WR_SEND) { 1991 wqe_from = HERMON_QP_SQ_ENTRY(qp, sync_from); 1992 wqe_to = HERMON_QP_SQ_ENTRY(qp, sync_to); 1993 qsize = qp->qp_sq_bufsz; 1994 1995 wq_base = HERMON_QP_SQ_ENTRY(qp, 0); 1996 wq_top = HERMON_QP_SQ_ENTRY(qp, qsize); 1997 } else if (sync_type == HERMON_WR_RECV) { 1998 wqe_from = HERMON_QP_RQ_ENTRY(qp, sync_from); 1999 wqe_to = HERMON_QP_RQ_ENTRY(qp, sync_to); 2000 qsize = qp->qp_rq_bufsz; 2001 2002 wq_base = HERMON_QP_RQ_ENTRY(qp, 0); 2003 wq_top = HERMON_QP_RQ_ENTRY(qp, qsize); 2004 } else { 2005 wqe_from = HERMON_SRQ_WQ_ENTRY(srq, sync_from); 2006 wqe_to = HERMON_SRQ_WQ_ENTRY(srq, sync_to); 2007 qsize = srq->srq_wq_bufsz; 2008 2009 wq_base = HERMON_SRQ_WQ_ENTRY(srq, 0); 2010 wq_top = HERMON_SRQ_WQ_ENTRY(srq, qsize); 2011 } 2012 2013 /* 2014 * There are two possible cases for the beginning and end of the WQE 2015 * chain we are trying to sync. Either this is the simple case, where 2016 * the end of the chain is below the beginning of the chain, or it is 2017 * the "wrap-around" case, where the end of the chain has wrapped over 2018 * the end of the queue. In the former case, we simply need to 2019 * calculate the span from beginning to end and sync it. In the latter 2020 * case, however, we need to calculate the span from the top of the 2021 * work queue to the end of the chain and sync that, and then we need 2022 * to find the other portion (from beginning of chain to end of queue) 2023 * and sync that as well. Note: if the "top to end" span is actually 2024 * zero length, then we don't do a DMA sync because a zero length DMA 2025 * sync unnecessarily syncs the entire work queue. 2026 */ 2027 if (wqe_to > wqe_from) { 2028 /* "From Beginning to End" */ 2029 2030 offset = (off_t)((uintptr_t)wqe_from - (uintptr_t)qp_base); 2031 length = (size_t)((uintptr_t)wqe_to - (uintptr_t)wqe_from); 2032 2033 status = ddi_dma_sync(dmahdl, offset, length, flag); 2034 if (status != DDI_SUCCESS) { 2035 return; 2036 } 2037 } else { 2038 /* "From Top to End" */ 2039 2040 offset = (off_t)((uintptr_t)wq_base - (uintptr_t)qp_base); 2041 length = (size_t)((uintptr_t)wqe_to - (uintptr_t)wq_base); 2042 if (length) { 2043 status = ddi_dma_sync(dmahdl, offset, length, flag); 2044 if (status != DDI_SUCCESS) { 2045 return; 2046 } 2047 } 2048 2049 /* "From Beginning to Bottom" */ 2050 2051 offset = (off_t)((uintptr_t)wqe_from - (uintptr_t)qp_base); 2052 length = (size_t)((uintptr_t)wq_top - (uintptr_t)wqe_from); 2053 status = ddi_dma_sync(dmahdl, offset, length, flag); 2054 if (status != DDI_SUCCESS) { 2055 return; 2056 } 2057 } 2058 } 2059 2060 2061 /* 2062 * hermon_wr_bind_check() 2063 * Context: Can be called from interrupt or base context. 2064 */ 2065 /* ARGSUSED */ 2066 static int 2067 hermon_wr_bind_check(hermon_state_t *state, ibt_send_wr_t *wr) 2068 { 2069 ibt_bind_flags_t bind_flags; 2070 uint64_t vaddr, len; 2071 uint64_t reg_start_addr, reg_end_addr; 2072 hermon_mwhdl_t mw; 2073 hermon_mrhdl_t mr; 2074 hermon_rsrc_t *mpt; 2075 uint32_t new_rkey; 2076 2077 /* Check for a valid Memory Window handle in the WR */ 2078 mw = (hermon_mwhdl_t)wr->wr.rc.rcwr.bind->bind_ibt_mw_hdl; 2079 if (mw == NULL) { 2080 return (IBT_MW_HDL_INVALID); 2081 } 2082 2083 /* Check for a valid Memory Region handle in the WR */ 2084 mr = (hermon_mrhdl_t)wr->wr.rc.rcwr.bind->bind_ibt_mr_hdl; 2085 if (mr == NULL) { 2086 return (IBT_MR_HDL_INVALID); 2087 } 2088 2089 mutex_enter(&mr->mr_lock); 2090 mutex_enter(&mw->mr_lock); 2091 2092 /* 2093 * Check here to see if the memory region has already been partially 2094 * deregistered as a result of a hermon_umap_umemlock_cb() callback. 2095 * If so, this is an error, return failure. 2096 */ 2097 if ((mr->mr_is_umem) && (mr->mr_umemcookie == NULL)) { 2098 mutex_exit(&mr->mr_lock); 2099 mutex_exit(&mw->mr_lock); 2100 return (IBT_MR_HDL_INVALID); 2101 } 2102 2103 /* Check for a valid Memory Window RKey (i.e. a matching RKey) */ 2104 if (mw->mr_rkey != wr->wr.rc.rcwr.bind->bind_rkey) { 2105 mutex_exit(&mr->mr_lock); 2106 mutex_exit(&mw->mr_lock); 2107 return (IBT_MR_RKEY_INVALID); 2108 } 2109 2110 /* Check for a valid Memory Region LKey (i.e. a matching LKey) */ 2111 if (mr->mr_lkey != wr->wr.rc.rcwr.bind->bind_lkey) { 2112 mutex_exit(&mr->mr_lock); 2113 mutex_exit(&mw->mr_lock); 2114 return (IBT_MR_LKEY_INVALID); 2115 } 2116 2117 /* 2118 * Now check for valid "vaddr" and "len". Note: We don't check the 2119 * "vaddr" range when "len == 0" (i.e. on unbind operations) 2120 */ 2121 len = wr->wr.rc.rcwr.bind->bind_len; 2122 if (len != 0) { 2123 vaddr = wr->wr.rc.rcwr.bind->bind_va; 2124 reg_start_addr = mr->mr_bindinfo.bi_addr; 2125 reg_end_addr = mr->mr_bindinfo.bi_addr + 2126 (mr->mr_bindinfo.bi_len - 1); 2127 if ((vaddr < reg_start_addr) || (vaddr > reg_end_addr)) { 2128 mutex_exit(&mr->mr_lock); 2129 mutex_exit(&mw->mr_lock); 2130 return (IBT_MR_VA_INVALID); 2131 } 2132 vaddr = (vaddr + len) - 1; 2133 if (vaddr > reg_end_addr) { 2134 mutex_exit(&mr->mr_lock); 2135 mutex_exit(&mw->mr_lock); 2136 return (IBT_MR_LEN_INVALID); 2137 } 2138 } 2139 2140 /* 2141 * Validate the bind access flags. Remote Write and Atomic access for 2142 * the Memory Window require that Local Write access be set in the 2143 * corresponding Memory Region. 2144 */ 2145 bind_flags = wr->wr.rc.rcwr.bind->bind_flags; 2146 if (((bind_flags & IBT_WR_BIND_WRITE) || 2147 (bind_flags & IBT_WR_BIND_ATOMIC)) && 2148 !(mr->mr_accflag & IBT_MR_LOCAL_WRITE)) { 2149 mutex_exit(&mr->mr_lock); 2150 mutex_exit(&mw->mr_lock); 2151 return (IBT_MR_ACCESS_REQ_INVALID); 2152 } 2153 2154 /* Calculate the new RKey for the Memory Window */ 2155 mpt = mw->mr_mptrsrcp; 2156 new_rkey = hermon_mr_keycalc(mpt->hr_indx); 2157 new_rkey = hermon_mr_key_swap(new_rkey); 2158 2159 wr->wr.rc.rcwr.bind->bind_rkey_out = new_rkey; 2160 mw->mr_rkey = new_rkey; 2161 2162 mutex_exit(&mr->mr_lock); 2163 mutex_exit(&mw->mr_lock); 2164 return (DDI_SUCCESS); 2165 } 2166 2167 2168 /* 2169 * hermon_wrid_from_reset_handling() 2170 * Context: Can be called from interrupt or base context. 2171 */ 2172 /* ARGSUSED */ 2173 int 2174 hermon_wrid_from_reset_handling(hermon_state_t *state, hermon_qphdl_t qp) 2175 { 2176 hermon_workq_hdr_t *swq, *rwq; 2177 uint_t qp_srq_en; 2178 2179 if (qp->qp_is_umap) 2180 return (DDI_SUCCESS); 2181 2182 /* grab the cq lock(s) to modify the wqavl tree */ 2183 mutex_enter(&qp->qp_rq_cqhdl->cq_lock); 2184 #ifdef __lock_lint 2185 mutex_enter(&qp->qp_sq_cqhdl->cq_lock); 2186 #else 2187 if (qp->qp_rq_cqhdl != qp->qp_sq_cqhdl) 2188 mutex_enter(&qp->qp_sq_cqhdl->cq_lock); 2189 #endif 2190 2191 /* Chain the newly allocated work queue header to the CQ's list */ 2192 hermon_cq_workq_add(qp->qp_sq_cqhdl, &qp->qp_sq_wqavl); 2193 2194 swq = qp->qp_sq_wqhdr; 2195 swq->wq_head = 0; 2196 swq->wq_tail = 0; 2197 swq->wq_full = 0; 2198 2199 /* 2200 * Now we repeat all the above operations for the receive work queue, 2201 * or shared receive work queue. 2202 * 2203 * Note: We still use the 'qp_rq_cqhdl' even in the SRQ case. 2204 */ 2205 qp_srq_en = qp->qp_srq_en; 2206 2207 #ifdef __lock_lint 2208 mutex_enter(&qp->qp_srqhdl->srq_lock); 2209 #else 2210 if (qp_srq_en == HERMON_QP_SRQ_ENABLED) { 2211 mutex_enter(&qp->qp_srqhdl->srq_lock); 2212 } else { 2213 rwq = qp->qp_rq_wqhdr; 2214 rwq->wq_head = 0; 2215 rwq->wq_tail = 0; 2216 rwq->wq_full = 0; 2217 qp->qp_rq_wqecntr = 0; 2218 } 2219 #endif 2220 hermon_cq_workq_add(qp->qp_rq_cqhdl, &qp->qp_rq_wqavl); 2221 2222 #ifdef __lock_lint 2223 mutex_exit(&qp->qp_srqhdl->srq_lock); 2224 #else 2225 if (qp_srq_en == HERMON_QP_SRQ_ENABLED) { 2226 mutex_exit(&qp->qp_srqhdl->srq_lock); 2227 } 2228 #endif 2229 2230 #ifdef __lock_lint 2231 mutex_exit(&qp->qp_sq_cqhdl->cq_lock); 2232 #else 2233 if (qp->qp_rq_cqhdl != qp->qp_sq_cqhdl) 2234 mutex_exit(&qp->qp_sq_cqhdl->cq_lock); 2235 #endif 2236 mutex_exit(&qp->qp_rq_cqhdl->cq_lock); 2237 return (DDI_SUCCESS); 2238 } 2239 2240 2241 /* 2242 * hermon_wrid_to_reset_handling() 2243 * Context: Can be called from interrupt or base context. 2244 */ 2245 int 2246 hermon_wrid_to_reset_handling(hermon_state_t *state, hermon_qphdl_t qp) 2247 { 2248 uint_t qp_srq_en; 2249 2250 if (qp->qp_is_umap) 2251 return (DDI_SUCCESS); 2252 2253 /* 2254 * If there are unpolled entries in these CQs, they are 2255 * polled/flushed. 2256 * Grab the CQ lock(s) before manipulating the lists. 2257 */ 2258 mutex_enter(&qp->qp_rq_cqhdl->cq_lock); 2259 #ifdef __lock_lint 2260 mutex_enter(&qp->qp_sq_cqhdl->cq_lock); 2261 #else 2262 if (qp->qp_rq_cqhdl != qp->qp_sq_cqhdl) 2263 mutex_enter(&qp->qp_sq_cqhdl->cq_lock); 2264 #endif 2265 2266 qp_srq_en = qp->qp_srq_en; 2267 #ifdef __lock_lint 2268 mutex_enter(&qp->qp_srqhdl->srq_lock); 2269 #else 2270 if (qp_srq_en == HERMON_QP_SRQ_ENABLED) { 2271 mutex_enter(&qp->qp_srqhdl->srq_lock); 2272 } 2273 #endif 2274 /* 2275 * Flush the entries on the CQ for this QP's QPN. 2276 */ 2277 hermon_cq_entries_flush(state, qp); 2278 2279 #ifdef __lock_lint 2280 mutex_exit(&qp->qp_srqhdl->srq_lock); 2281 #else 2282 if (qp_srq_en == HERMON_QP_SRQ_ENABLED) { 2283 mutex_exit(&qp->qp_srqhdl->srq_lock); 2284 } 2285 #endif 2286 2287 hermon_cq_workq_remove(qp->qp_rq_cqhdl, &qp->qp_rq_wqavl); 2288 hermon_cq_workq_remove(qp->qp_sq_cqhdl, &qp->qp_sq_wqavl); 2289 2290 #ifdef __lock_lint 2291 mutex_exit(&qp->qp_sq_cqhdl->cq_lock); 2292 #else 2293 if (qp->qp_rq_cqhdl != qp->qp_sq_cqhdl) 2294 mutex_exit(&qp->qp_sq_cqhdl->cq_lock); 2295 #endif 2296 mutex_exit(&qp->qp_rq_cqhdl->cq_lock); 2297 2298 return (IBT_SUCCESS); 2299 } 2300 2301 2302 /* 2303 * hermon_wrid_get_entry() 2304 * Context: Can be called from interrupt or base context. 2305 */ 2306 uint64_t 2307 hermon_wrid_get_entry(hermon_cqhdl_t cq, hermon_hw_cqe_t *cqe) 2308 { 2309 hermon_workq_avl_t *wqa; 2310 hermon_workq_hdr_t *wq; 2311 uint64_t wrid; 2312 uint_t send_or_recv, qpnum; 2313 uint32_t indx; 2314 2315 /* 2316 * Determine whether this CQE is a send or receive completion. 2317 */ 2318 send_or_recv = HERMON_CQE_SENDRECV_GET(cq, cqe); 2319 2320 /* Find the work queue for this QP number (send or receive side) */ 2321 qpnum = HERMON_CQE_QPNUM_GET(cq, cqe); 2322 wqa = hermon_wrid_wqavl_find(cq, qpnum, send_or_recv); 2323 wq = wqa->wqa_wq; 2324 2325 /* 2326 * Regardless of whether the completion is the result of a "success" 2327 * or a "failure", we lock the list of "containers" and attempt to 2328 * search for the the first matching completion (i.e. the first WR 2329 * with a matching WQE addr and size). Once we find it, we pull out 2330 * the "wrid" field and return it (see below). XXX Note: One possible 2331 * future enhancement would be to enable this routine to skip over 2332 * any "unsignaled" completions to go directly to the next "signaled" 2333 * entry on success. 2334 */ 2335 indx = HERMON_CQE_WQEADDRSZ_GET(cq, cqe) & wq->wq_mask; 2336 wrid = wq->wq_wrid[indx]; 2337 if (wqa->wqa_srq_en) { 2338 struct hermon_sw_srq_s *srq; 2339 uint64_t *desc; 2340 2341 /* put wqe back on the srq free list */ 2342 srq = wqa->wqa_srq; 2343 mutex_enter(&srq->srq_lock); 2344 desc = HERMON_SRQ_WQE_ADDR(srq, wq->wq_tail); 2345 ((uint16_t *)desc)[1] = htons(indx); 2346 wq->wq_tail = indx; 2347 mutex_exit(&srq->srq_lock); 2348 } else { 2349 wq->wq_head = (indx + 1) & wq->wq_mask; 2350 wq->wq_full = 0; 2351 } 2352 2353 return (wrid); 2354 } 2355 2356 2357 int 2358 hermon_wrid_workq_compare(const void *p1, const void *p2) 2359 { 2360 hermon_workq_compare_t *cmpp; 2361 hermon_workq_avl_t *curr; 2362 2363 cmpp = (hermon_workq_compare_t *)p1; 2364 curr = (hermon_workq_avl_t *)p2; 2365 2366 if (cmpp->cmp_qpn < curr->wqa_qpn) 2367 return (-1); 2368 else if (cmpp->cmp_qpn > curr->wqa_qpn) 2369 return (+1); 2370 else if (cmpp->cmp_type < curr->wqa_type) 2371 return (-1); 2372 else if (cmpp->cmp_type > curr->wqa_type) 2373 return (+1); 2374 else 2375 return (0); 2376 } 2377 2378 2379 /* 2380 * hermon_wrid_workq_find() 2381 * Context: Can be called from interrupt or base context. 2382 */ 2383 static hermon_workq_avl_t * 2384 hermon_wrid_wqavl_find(hermon_cqhdl_t cq, uint_t qpn, uint_t wq_type) 2385 { 2386 hermon_workq_avl_t *curr; 2387 hermon_workq_compare_t cmp; 2388 2389 /* 2390 * Walk the CQ's work queue list, trying to find a send or recv queue 2391 * with the same QP number. We do this even if we are going to later 2392 * create a new entry because it helps us easily find the end of the 2393 * list. 2394 */ 2395 cmp.cmp_qpn = qpn; 2396 cmp.cmp_type = wq_type; 2397 #ifdef __lock_lint 2398 hermon_wrid_workq_compare(NULL, NULL); 2399 #endif 2400 curr = avl_find(&cq->cq_wrid_wqhdr_avl_tree, &cmp, NULL); 2401 2402 return (curr); 2403 } 2404 2405 2406 /* 2407 * hermon_wrid_wqhdr_create() 2408 * Context: Can be called from base context. 2409 */ 2410 /* ARGSUSED */ 2411 hermon_workq_hdr_t * 2412 hermon_wrid_wqhdr_create(int bufsz) 2413 { 2414 hermon_workq_hdr_t *wqhdr; 2415 2416 /* 2417 * Allocate space for the wqhdr, and an array to record all the wrids. 2418 */ 2419 wqhdr = (hermon_workq_hdr_t *)kmem_zalloc(sizeof (*wqhdr), KM_NOSLEEP); 2420 if (wqhdr == NULL) { 2421 return (NULL); 2422 } 2423 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*wqhdr)) 2424 wqhdr->wq_wrid = kmem_zalloc(bufsz * sizeof (uint64_t), KM_NOSLEEP); 2425 if (wqhdr->wq_wrid == NULL) { 2426 kmem_free(wqhdr, sizeof (*wqhdr)); 2427 return (NULL); 2428 } 2429 wqhdr->wq_size = bufsz; 2430 wqhdr->wq_mask = bufsz - 1; 2431 2432 return (wqhdr); 2433 } 2434 2435 void 2436 hermon_wrid_wqhdr_destroy(hermon_workq_hdr_t *wqhdr) 2437 { 2438 kmem_free(wqhdr->wq_wrid, wqhdr->wq_size * sizeof (uint64_t)); 2439 kmem_free(wqhdr, sizeof (*wqhdr)); 2440 } 2441 2442 2443 /* 2444 * hermon_cq_workq_add() 2445 * Context: Can be called from interrupt or base context. 2446 */ 2447 static void 2448 hermon_cq_workq_add(hermon_cqhdl_t cq, hermon_workq_avl_t *wqavl) 2449 { 2450 hermon_workq_compare_t cmp; 2451 avl_index_t where; 2452 2453 cmp.cmp_qpn = wqavl->wqa_qpn; 2454 cmp.cmp_type = wqavl->wqa_type; 2455 #ifdef __lock_lint 2456 hermon_wrid_workq_compare(NULL, NULL); 2457 #endif 2458 (void) avl_find(&cq->cq_wrid_wqhdr_avl_tree, &cmp, &where); 2459 avl_insert(&cq->cq_wrid_wqhdr_avl_tree, wqavl, where); 2460 } 2461 2462 2463 /* 2464 * hermon_cq_workq_remove() 2465 * Context: Can be called from interrupt or base context. 2466 */ 2467 static void 2468 hermon_cq_workq_remove(hermon_cqhdl_t cq, hermon_workq_avl_t *wqavl) 2469 { 2470 #ifdef __lock_lint 2471 hermon_wrid_workq_compare(NULL, NULL); 2472 #endif 2473 avl_remove(&cq->cq_wrid_wqhdr_avl_tree, wqavl); 2474 } 2475