1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include <sys/types.h> 27 #include <sys/ddi.h> 28 #include <sys/types.h> 29 #include <sys/socket.h> 30 #include <netinet/in.h> 31 #include <sys/sunddi.h> 32 #include <sys/sdt.h> 33 #include <sys/ib/ibtl/ibti.h> 34 #include <sys/ib/ibtl/ibtl_types.h> 35 36 #include <sys/ib/clients/iser/iser.h> 37 38 /* 39 * iser_cq.c 40 * Routines for completion queue handlers for iSER. 41 */ 42 static void iser_msg_handle(iser_chan_t *chan, iser_msg_t *msg); 43 int iser_iscsihdr_handle(iser_chan_t *chan, iser_msg_t *msg); 44 static int iser_ib_poll_send_completions(ibt_cq_hdl_t cq_hdl, 45 iser_chan_t *iser_chan); 46 static int iser_ib_poll_recv_completions(ibt_cq_hdl_t cq_hdl, 47 iser_chan_t *iser_chan); 48 49 void 50 iser_ib_sendcq_handler(ibt_cq_hdl_t cq_hdl, void *arg) 51 { 52 iser_chan_t *iser_chan; 53 ibt_status_t status; 54 55 iser_chan = (iser_chan_t *)arg; 56 57 /* Poll completions until the CQ is empty */ 58 do { 59 status = iser_ib_poll_send_completions(cq_hdl, iser_chan); 60 } while (status != IBT_CQ_EMPTY); 61 62 /* We've emptied the CQ, rearm it before we're done here */ 63 status = ibt_enable_cq_notify(cq_hdl, IBT_NEXT_COMPLETION); 64 if (status != IBT_SUCCESS) { 65 /* Unexpected error */ 66 ISER_LOG(CE_NOTE, "iser_ib_sendcq_handler: " 67 "ibt_enable_cq_notify error (%d)", status); 68 return; 69 } 70 71 /* Now, check for more completions after the rearm */ 72 do { 73 status = iser_ib_poll_send_completions(cq_hdl, iser_chan); 74 } while (status != IBT_CQ_EMPTY); 75 } 76 77 static int 78 iser_ib_poll_send_completions(ibt_cq_hdl_t cq_hdl, iser_chan_t *iser_chan) 79 { 80 ibt_wc_t wc[ISER_IB_SCQ_POLL_MAX]; 81 ibt_wrid_t wrid; 82 idm_buf_t *idb = NULL; 83 idm_task_t *idt = NULL; 84 iser_wr_t *wr = NULL; 85 int i; 86 uint_t npoll = 0; 87 ibt_status_t status; 88 iser_conn_t *iser_conn; 89 idm_status_t idm_status; 90 91 iser_conn = iser_chan->ic_conn; 92 93 /* 94 * Poll ISER_IB_SCQ_POLL_MAX completions from the CQ. 95 */ 96 status = ibt_poll_cq(cq_hdl, wc, ISER_IB_SCQ_POLL_MAX, &npoll); 97 98 if (status != IBT_SUCCESS) { 99 if (status != IBT_CQ_EMPTY) { 100 /* Unexpected error */ 101 ISER_LOG(CE_NOTE, "iser_ib_sendcq_handler: ibt_poll_cq " 102 "error (%d)", status); 103 } 104 /* CQ is empty. Either way, move along... */ 105 return (status); 106 } 107 108 /* 109 * Handle each of the completions we've polled 110 */ 111 for (i = 0; i < npoll; i++) { 112 113 DTRACE_PROBE3(iser__send__cqe, iser_chan_t *, iser_chan, 114 ibt_wc_t *, &wc[i], ibt_wc_status_t, wc[i].wc_status); 115 116 /* Grab the wrid of the completion */ 117 wrid = wc[i].wc_id; 118 119 /* Decrement this channel's SQ posted count */ 120 mutex_enter(&iser_chan->ic_sq_post_lock); 121 iser_chan->ic_sq_post_count--; 122 mutex_exit(&iser_chan->ic_sq_post_lock); 123 124 /* Pull in the wr handle */ 125 wr = (iser_wr_t *)(uintptr_t)wrid; 126 ASSERT(wr != NULL); 127 128 /* Set an idm_status for return to IDM */ 129 idm_status = (wc[i].wc_status == IBT_WC_SUCCESS) ? 130 IDM_STATUS_SUCCESS : IDM_STATUS_FAIL; 131 132 /* 133 * A non-success status here indicates the QP went 134 * into an error state while this WR was being 135 * processed. This can also happen when the 136 * channel is closed on the remote end. Clean up 137 * the resources, then push CE_TRANSPORT_FAIL 138 * into IDM. 139 */ 140 if (wc[i].wc_status != IBT_WC_SUCCESS) { 141 /* 142 * Free the resources attached to this 143 * completion. 144 */ 145 if (wr->iw_msg != NULL) { 146 /* Free iser_msg handle */ 147 iser_msg_free(wr->iw_msg); 148 } 149 150 if (wr->iw_pdu != NULL) { 151 /* Complete the PDU */ 152 idm_pdu_complete(wr->iw_pdu, idm_status); 153 } 154 155 if (wr->iw_buf != NULL) { 156 /* Invoke buffer callback */ 157 idb = wr->iw_buf; 158 #ifdef DEBUG 159 bcopy(&wc[i], 160 &((iser_buf_t *)idb->idb_buf_private)-> 161 buf_wc, sizeof (ibt_wc_t)); 162 #endif 163 idt = idb->idb_task_binding; 164 mutex_enter(&idt->idt_mutex); 165 if (wr->iw_type == ISER_WR_RDMAW) { 166 idm_buf_tx_to_ini_done(idt, idb, 167 IDM_STATUS_FAIL); 168 } else { /* ISER_WR_RDMAR */ 169 idm_buf_rx_from_ini_done(idt, idb, 170 IDM_STATUS_FAIL); 171 } 172 } 173 174 /* Free the iser wr handle */ 175 iser_wr_free(wr); 176 177 /* 178 * Tell IDM that the channel has gone down, 179 * unless he already knows. 180 */ 181 mutex_enter(&iser_conn->ic_lock); 182 switch (iser_conn->ic_stage) { 183 case ISER_CONN_STAGE_IC_DISCONNECTED: 184 case ISER_CONN_STAGE_IC_FREED: 185 case ISER_CONN_STAGE_CLOSING: 186 case ISER_CONN_STAGE_CLOSED: 187 break; 188 189 default: 190 idm_conn_event(iser_conn->ic_idmc, 191 CE_TRANSPORT_FAIL, idm_status); 192 iser_conn->ic_stage = ISER_CONN_STAGE_CLOSING; 193 } 194 mutex_exit(&iser_conn->ic_lock); 195 196 /* Move onto the next completion */ 197 continue; 198 } 199 200 /* 201 * For a success status, just invoke the PDU or 202 * buffer completion. We use our WR handle's 203 * "iw_type" here so that we can properly process 204 * because the CQE's opcode is invalid if the status 205 * is failed. 206 */ 207 switch (wr->iw_type) { 208 case ISER_WR_SEND: 209 /* Free the msg handle */ 210 ASSERT(wr->iw_msg != NULL); 211 iser_msg_free(wr->iw_msg); 212 213 if (wr->iw_pdu == NULL) { 214 /* This is a hello exchange message */ 215 mutex_enter(&iser_conn->ic_lock); 216 if (iser_conn->ic_stage == 217 ISER_CONN_STAGE_HELLOREPLY_SENT) { 218 /* 219 * We're on the target side, 220 * and have just successfully 221 * sent the HelloReply msg. 222 */ 223 iser_conn->ic_stage = 224 ISER_CONN_STAGE_LOGGED_IN; 225 } 226 mutex_exit(&iser_conn->ic_lock); 227 } else { 228 /* This is a normal control message */ 229 idm_pdu_complete(wr->iw_pdu, idm_status); 230 } 231 232 /* Free the wr handle */ 233 iser_wr_free(wr); 234 235 break; 236 237 case ISER_WR_RDMAW: 238 case ISER_WR_RDMAR: 239 /* 240 * Invoke the appropriate callback; 241 * the buffer will be freed there. 242 */ 243 idb = wr->iw_buf; 244 #ifdef DEBUG 245 bcopy(&wc[i], 246 &((iser_buf_t *)idb->idb_buf_private)->buf_wc, 247 sizeof (ibt_wc_t)); 248 #endif 249 idt = idb->idb_task_binding; 250 251 mutex_enter(&idt->idt_mutex); 252 if (wr->iw_type == ISER_WR_RDMAW) { 253 idm_buf_tx_to_ini_done(idt, idb, idm_status); 254 } else { 255 idm_buf_rx_from_ini_done(idt, idb, idm_status); 256 } 257 258 /* Free the wr handle */ 259 iser_wr_free(wr); 260 261 break; 262 263 default: 264 ASSERT(0); 265 break; 266 } 267 } 268 269 return (status); 270 } 271 272 void 273 iser_ib_recvcq_handler(ibt_cq_hdl_t cq_hdl, void *arg) 274 { 275 iser_chan_t *iser_chan; 276 ibt_status_t status; 277 278 iser_chan = (iser_chan_t *)arg; 279 280 /* Poll completions until the CQ is empty */ 281 do { 282 status = iser_ib_poll_recv_completions(cq_hdl, iser_chan); 283 } while (status != IBT_CQ_EMPTY); 284 285 /* We've emptied the CQ, rearm it before we're done here */ 286 status = ibt_enable_cq_notify(cq_hdl, IBT_NEXT_COMPLETION); 287 if (status != IBT_SUCCESS) { 288 /* Unexpected error */ 289 ISER_LOG(CE_NOTE, "iser_ib_recvcq_handler: " 290 "ibt_enable_cq_notify error (%d)", status); 291 return; 292 } 293 294 /* Now, check for more completions after the rearm */ 295 do { 296 status = iser_ib_poll_recv_completions(cq_hdl, iser_chan); 297 } while (status != IBT_CQ_EMPTY); 298 } 299 300 static int 301 iser_ib_poll_recv_completions(ibt_cq_hdl_t cq_hdl, iser_chan_t *iser_chan) 302 { 303 ibt_wc_t wc; 304 iser_msg_t *msg; 305 iser_qp_t *iser_qp; 306 int status; 307 308 iser_qp = &(iser_chan->ic_qp); 309 310 bzero(&wc, sizeof (ibt_wc_t)); 311 status = ibt_poll_cq(cq_hdl, &wc, 1, NULL); 312 if (status == IBT_CQ_EMPTY) { 313 /* CQ is empty, return */ 314 return (status); 315 } 316 317 if (status != IBT_SUCCESS) { 318 /* Unexpected error */ 319 ISER_LOG(CE_NOTE, "iser_ib_poll_recv_completions: " 320 "ibt_poll_cq error (%d)", status); 321 mutex_enter(&iser_qp->qp_lock); 322 iser_qp->rq_level--; 323 mutex_exit(&iser_qp->qp_lock); 324 /* Free the msg handle (if we got it back) */ 325 if ((msg = (iser_msg_t *)(uintptr_t)wc.wc_id) != NULL) { 326 iser_msg_free(msg); 327 } 328 return (status); 329 } 330 331 /* Retrieve the iSER msg handle */ 332 msg = (iser_msg_t *)(uintptr_t)wc.wc_id; 333 ASSERT(msg != NULL); 334 335 /* 336 * Decrement the posted level in the RQ, then check 337 * to see if we need to fill the RQ back up (or if 338 * we are already on the taskq). 339 */ 340 mutex_enter(&iser_chan->ic_conn->ic_lock); 341 mutex_enter(&iser_qp->qp_lock); 342 iser_qp->rq_level--; 343 344 if ((iser_qp->rq_taskqpending == B_FALSE) && 345 (iser_qp->rq_level <= iser_qp->rq_lwm) && 346 (iser_chan->ic_conn->ic_stage >= ISER_CONN_STAGE_IC_CONNECTED) && 347 (iser_chan->ic_conn->ic_stage <= ISER_CONN_STAGE_LOGGED_IN)) { 348 /* Set the pending flag and fire off a post_recv */ 349 iser_qp->rq_taskqpending = B_TRUE; 350 mutex_exit(&iser_qp->qp_lock); 351 352 status = iser_ib_post_recv_async(iser_chan->ic_chanhdl); 353 354 if (status != DDI_SUCCESS) { 355 ISER_LOG(CE_NOTE, "iser_ib_poll_recv_completions: " 356 "task dispatch failed"); 357 /* Failure to launch, unset the pending flag */ 358 mutex_enter(&iser_qp->qp_lock); 359 iser_qp->rq_taskqpending = B_FALSE; 360 mutex_exit(&iser_qp->qp_lock); 361 } 362 } else { 363 mutex_exit(&iser_qp->qp_lock); 364 } 365 366 DTRACE_PROBE3(iser__recv__cqe, iser_chan_t *, iser_chan, 367 ibt_wc_t *, &wc, ibt_wc_status_t, wc.wc_status); 368 if (wc.wc_status != IBT_WC_SUCCESS) { 369 /* 370 * Tell IDM that the channel has gone down, 371 * unless he already knows. 372 */ 373 switch (iser_chan->ic_conn->ic_stage) { 374 case ISER_CONN_STAGE_IC_DISCONNECTED: 375 case ISER_CONN_STAGE_IC_FREED: 376 case ISER_CONN_STAGE_CLOSING: 377 case ISER_CONN_STAGE_CLOSED: 378 break; 379 380 default: 381 idm_conn_event(iser_chan->ic_conn->ic_idmc, 382 CE_TRANSPORT_FAIL, IDM_STATUS_FAIL); 383 iser_chan->ic_conn->ic_stage = 384 ISER_CONN_STAGE_CLOSING; 385 } 386 mutex_exit(&iser_chan->ic_conn->ic_lock); 387 388 iser_msg_free(msg); 389 return (DDI_SUCCESS); 390 } else { 391 mutex_exit(&iser_chan->ic_conn->ic_lock); 392 393 /* 394 * We have an iSER message in, let's handle it. 395 * We will free the iser_msg_t later in this path, 396 * depending upon the action required. 397 */ 398 iser_msg_handle(iser_chan, msg); 399 return (DDI_SUCCESS); 400 } 401 } 402 403 static void 404 iser_msg_handle(iser_chan_t *chan, iser_msg_t *msg) 405 { 406 int opcode; 407 iser_ctrl_hdr_t *hdr = NULL; 408 iser_conn_t *iser_conn = chan->ic_conn; 409 int status; 410 411 hdr = (iser_ctrl_hdr_t *)(uintptr_t)msg->msg_ds.ds_va; 412 ASSERT(hdr != NULL); 413 414 opcode = hdr->opcode; 415 if (opcode == ISER_OPCODE_CTRL_TYPE_PDU) { 416 /* 417 * Handle an iSCSI Control PDU iSER message. 418 * Note we'll free the msg handle in the PDU callback. 419 */ 420 status = iser_iscsihdr_handle(chan, msg); 421 if (status != DDI_SUCCESS) { 422 /* 423 * We are unable to handle this message, and 424 * have no way to recover from this. Fail the 425 * transport. 426 */ 427 ISER_LOG(CE_NOTE, "iser_msg_handle: failed " 428 "iser_iscsihdr_handle"); 429 iser_msg_free(msg); 430 idm_conn_event(iser_conn->ic_idmc, 431 CE_TRANSPORT_FAIL, IDM_STATUS_FAIL); 432 } 433 } else if (opcode == ISER_OPCODE_HELLO_MSG) { /* at the target */ 434 /* 435 * We are currently not supporting Hello Exchange, 436 * since OFED iSER does not. May be revisited. 437 */ 438 ASSERT(opcode != ISER_OPCODE_HELLO_MSG); 439 440 if (iser_conn->ic_type != ISER_CONN_TYPE_TGT) { 441 idm_conn_event(iser_conn->ic_idmc, 442 CE_TRANSPORT_FAIL, IDM_STATUS_FAIL); 443 } 444 445 iser_hello_hdr_t *hello_hdr = (iser_hello_hdr_t *)hdr; 446 447 ISER_LOG(CE_NOTE, "received Hello message: opcode[%d], " 448 "maxver[%d], minver[%d], iser_ird[%d], msg (0x%p)", 449 hello_hdr->opcode, hello_hdr->maxver, hello_hdr->minver, 450 ntohs(hello_hdr->iser_ird), (void *)msg); 451 452 mutex_enter(&iser_conn->ic_lock); 453 454 if (iser_conn->ic_stage != ISER_CONN_STAGE_HELLO_WAIT) { 455 /* target is not expected to receive a Hello */ 456 idm_conn_event(iser_conn->ic_idmc, 457 CE_TRANSPORT_FAIL, IDM_STATUS_FAIL); 458 } 459 460 iser_conn->ic_stage = ISER_CONN_STAGE_HELLOREPLY_SENT; 461 mutex_exit(&iser_conn->ic_lock); 462 463 /* Prepare and send a HelloReply message */ 464 status = iser_xfer_helloreply_msg(chan); 465 if (status != ISER_STATUS_SUCCESS) { 466 467 mutex_enter(&iser_conn->ic_lock); 468 iser_conn->ic_stage = 469 ISER_CONN_STAGE_HELLOREPLY_SENT_FAIL; 470 mutex_exit(&iser_conn->ic_lock); 471 472 idm_conn_event(iser_conn->ic_idmc, 473 CE_TRANSPORT_FAIL, status); 474 } 475 476 /* Free this msg handle */ 477 iser_msg_free(msg); 478 479 } else if (opcode == ISER_OPCODE_HELLOREPLY_MSG) { /* at initiator */ 480 481 /* 482 * We are currently not supporting Hello Exchange, 483 * since OFED iSER does not. May be revisited. 484 */ 485 ASSERT(opcode != ISER_OPCODE_HELLOREPLY_MSG); 486 487 if (iser_conn->ic_type != ISER_CONN_TYPE_INI) { 488 idm_conn_event(iser_conn->ic_idmc, 489 CE_TRANSPORT_FAIL, status); 490 } 491 492 iser_helloreply_hdr_t *hello_hdr = (iser_helloreply_hdr_t *)hdr; 493 494 ISER_LOG(CE_NOTE, "received Hello Reply message: opcode[%d], " 495 "maxver[%d], curver[%d], iser_ord[%d], msg (0x%p)", 496 hello_hdr->opcode, hello_hdr->maxver, hello_hdr->curver, 497 ntohs(hello_hdr->iser_ord), (void *)msg); 498 499 /* Free this msg handle */ 500 iser_msg_free(msg); 501 502 /* 503 * Signal the receipt of HelloReply to the waiting thread 504 * so that the initiator can proceed to the Full Feature 505 * Phase. 506 */ 507 mutex_enter(&iser_conn->ic_lock); 508 iser_conn->ic_stage = ISER_CONN_STAGE_HELLOREPLY_RCV; 509 cv_signal(&iser_conn->ic_stage_cv); 510 mutex_exit(&iser_conn->ic_lock); 511 } else { 512 /* Protocol error: free the msg handle and fail the session */ 513 ISER_LOG(CE_NOTE, "iser_msg_handle: unsupported opcode (0x%x): " 514 "terminating session on IDM handle (0x%p)", opcode, 515 (void *) iser_conn->ic_idmc); 516 517 iser_msg_free(msg); 518 idm_conn_event(iser_conn->ic_idmc, CE_TRANSPORT_FAIL, 519 IDM_STATUS_FAIL); 520 } 521 } 522 523 #define IDM_PDU_OPCODE(PDU) \ 524 ((PDU)->isp_hdr->opcode & ISCSI_OPCODE_MASK) 525 526 /* network to host translation for 24b integers */ 527 static uint32_t 528 n2h24(uchar_t *ptr) 529 { 530 return ((ptr[0] << 16) | (ptr[1] << 8) | ptr[2]); 531 } 532 533 /* ARGSUSED */ 534 static void 535 iser_rx_pdu_cb(idm_pdu_t *pdu, idm_status_t status) 536 { 537 /* Free the iser msg handle and the PDU handle */ 538 iser_msg_free((iser_msg_t *)pdu->isp_transport_private); 539 idm_pdu_free(pdu); 540 } 541 542 int 543 iser_iscsihdr_handle(iser_chan_t *chan, iser_msg_t *msg) 544 { 545 idm_pdu_t *pdu; 546 uint8_t *iser_hdrp; 547 uint8_t *iscsi_hdrp; 548 iscsi_hdr_t *bhs; 549 550 pdu = idm_pdu_alloc_nosleep(sizeof (iscsi_hdr_t), 0); 551 pdu->isp_ic = chan->ic_conn->ic_idmc; 552 ASSERT(pdu->isp_ic != NULL); 553 554 /* Set the iser_msg handle into the transport-private field */ 555 pdu->isp_transport_private = (void *)msg; 556 557 /* Set up a pointer in the pdu handle to the iSER header */ 558 iser_hdrp = (uint8_t *)(uintptr_t)msg->msg_ds.ds_va; 559 if (iser_hdrp == NULL) { 560 ISER_LOG(CE_NOTE, "iser_iscsihdr_handle: iser_hdrp is NULL"); 561 idm_pdu_free(pdu); 562 return (ISER_STATUS_FAIL); 563 } 564 pdu->isp_transport_hdr = (void *)iser_hdrp; 565 pdu->isp_transport_hdrlen = ISER_HEADER_LENGTH; 566 567 /* 568 * Set up a pointer to the iSCSI header, which is directly 569 * after the iSER header in the message. 570 */ 571 iscsi_hdrp = ((uint8_t *)(uintptr_t)msg->msg_ds.ds_va) + 572 ISER_HEADER_LENGTH; 573 if (iscsi_hdrp == NULL) { 574 ISER_LOG(CE_NOTE, "iser_iscsihdr_handle: iscsi_hdrp is NULL"); 575 idm_pdu_free(pdu); 576 return (ISER_STATUS_FAIL); 577 } 578 pdu->isp_hdr = (iscsi_hdr_t *)(uintptr_t)iscsi_hdrp; 579 580 /* Fill in the BHS */ 581 bhs = pdu->isp_hdr; 582 pdu->isp_hdrlen = sizeof (iscsi_hdr_t) + 583 (bhs->hlength * sizeof (uint32_t)); 584 pdu->isp_datalen = n2h24(bhs->dlength); 585 pdu->isp_callback = iser_rx_pdu_cb; 586 587 /* 588 * If datalen > 0, then non-scsi data may be present. Allocate 589 * space in the PDU handle and set a pointer to the data. 590 */ 591 if (pdu->isp_datalen) { 592 pdu->isp_data = ((uint8_t *)(uintptr_t)pdu->isp_hdr) + 593 pdu->isp_hdrlen; 594 } 595 596 /* Process RX PDU */ 597 idm_pdu_rx(pdu->isp_ic, pdu); 598 599 return (DDI_SUCCESS); 600 } 601