1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include <sys/types.h> 27 #include <sys/ddi.h> 28 #include <sys/types.h> 29 #include <sys/socket.h> 30 #include <netinet/in.h> 31 #include <sys/sunddi.h> 32 #include <sys/sdt.h> 33 #include <sys/ib/ibtl/ibti.h> 34 #include <sys/ib/ibtl/ibtl_types.h> 35 36 #include <sys/ib/clients/iser/iser.h> 37 38 /* 39 * iser_cq.c 40 * Routines for completion queue handlers for iSER. 41 */ 42 static void iser_msg_handle(iser_chan_t *chan, iser_msg_t *msg); 43 int iser_iscsihdr_handle(iser_chan_t *chan, iser_msg_t *msg); 44 static int iser_ib_poll_send_completions(ibt_cq_hdl_t cq_hdl, 45 iser_chan_t *iser_chan); 46 static int iser_ib_poll_recv_completions(ibt_cq_hdl_t cq_hdl, 47 iser_chan_t *iser_chan); 48 49 void 50 iser_ib_sendcq_handler(ibt_cq_hdl_t cq_hdl, void *arg) 51 { 52 iser_chan_t *iser_chan; 53 ibt_status_t status; 54 55 iser_chan = (iser_chan_t *)arg; 56 57 /* Poll completions until the CQ is empty */ 58 do { 59 status = iser_ib_poll_send_completions(cq_hdl, iser_chan); 60 } while (status != IBT_CQ_EMPTY); 61 62 /* We've emptied the CQ, rearm it before we're done here */ 63 status = ibt_enable_cq_notify(cq_hdl, IBT_NEXT_COMPLETION); 64 if (status != IBT_SUCCESS) { 65 /* Unexpected error */ 66 ISER_LOG(CE_NOTE, "iser_ib_sendcq_handler: " 67 "ibt_enable_cq_notify error (%d)", status); 68 return; 69 } 70 71 /* Now, check for more completions after the rearm */ 72 do { 73 status = iser_ib_poll_send_completions(cq_hdl, iser_chan); 74 } while (status != IBT_CQ_EMPTY); 75 } 76 77 static int 78 iser_ib_poll_send_completions(ibt_cq_hdl_t cq_hdl, iser_chan_t *iser_chan) 79 { 80 ibt_wc_t wc[ISER_IB_SCQ_POLL_MAX]; 81 ibt_wrid_t wrid; 82 idm_buf_t *idb = NULL; 83 idm_task_t *idt = NULL; 84 iser_wr_t *wr = NULL; 85 int i; 86 uint_t npoll = 0; 87 ibt_status_t status; 88 iser_conn_t *iser_conn; 89 idm_status_t idm_status; 90 91 iser_conn = iser_chan->ic_conn; 92 93 /* 94 * Poll ISER_IB_SCQ_POLL_MAX completions from the CQ. 95 */ 96 status = ibt_poll_cq(cq_hdl, wc, ISER_IB_SCQ_POLL_MAX, &npoll); 97 98 if (status != IBT_SUCCESS) { 99 if (status != IBT_CQ_EMPTY) { 100 /* Unexpected error */ 101 ISER_LOG(CE_NOTE, "iser_ib_sendcq_handler: ibt_poll_cq " 102 "error (%d)", status); 103 } 104 /* CQ is empty. Either way, move along... */ 105 return (status); 106 } 107 108 /* 109 * Handle each of the completions we've polled 110 */ 111 for (i = 0; i < npoll; i++) { 112 113 DTRACE_PROBE3(iser__send__cqe, iser_chan_t *, iser_chan, 114 ibt_wc_t *, &wc[i], ibt_wc_status_t, wc[i].wc_status); 115 116 /* Grab the wrid of the completion */ 117 wrid = wc[i].wc_id; 118 119 /* Decrement this channel's SQ posted count */ 120 mutex_enter(&iser_chan->ic_sq_post_lock); 121 iser_chan->ic_sq_post_count--; 122 mutex_exit(&iser_chan->ic_sq_post_lock); 123 124 /* Pull in the wr handle */ 125 wr = (iser_wr_t *)(uintptr_t)wrid; 126 ASSERT(wr != NULL); 127 128 /* Set an idm_status for return to IDM */ 129 idm_status = (wc[i].wc_status == IBT_WC_SUCCESS) ? 130 IDM_STATUS_SUCCESS : IDM_STATUS_FAIL; 131 132 /* 133 * A non-success status here indicates the QP went 134 * into an error state while this WR was being 135 * processed. This can also happen when the 136 * channel is closed on the remote end. Clean up 137 * the resources, then push CE_TRANSPORT_FAIL 138 * into IDM. 139 */ 140 if (wc[i].wc_status != IBT_WC_SUCCESS) { 141 /* 142 * Free the resources attached to this 143 * completion. 144 */ 145 if (wr->iw_msg != NULL) { 146 /* Free iser_msg handle */ 147 iser_msg_free(wr->iw_msg); 148 } 149 150 if (wr->iw_pdu != NULL) { 151 /* Complete the PDU */ 152 idm_pdu_complete(wr->iw_pdu, idm_status); 153 } 154 155 if (wr->iw_buf != NULL) { 156 /* Invoke buffer callback */ 157 idb = wr->iw_buf; 158 #ifdef DEBUG 159 bcopy(&wc[i], 160 &((iser_buf_t *)idb->idb_buf_private)-> 161 buf_wc, sizeof (ibt_wc_t)); 162 #endif 163 idt = idb->idb_task_binding; 164 mutex_enter(&idt->idt_mutex); 165 if (wr->iw_type == ISER_WR_RDMAW) { 166 idm_buf_tx_to_ini_done(idt, idb, 167 IDM_STATUS_FAIL); 168 } else { /* ISER_WR_RDMAR */ 169 idm_buf_rx_from_ini_done(idt, idb, 170 IDM_STATUS_FAIL); 171 } 172 } 173 174 /* Free the iser wr handle */ 175 iser_wr_free(wr); 176 177 /* 178 * Tell IDM that the channel has gone down, 179 * unless he already knows. 180 */ 181 mutex_enter(&iser_conn->ic_lock); 182 switch (iser_conn->ic_stage) { 183 case ISER_CONN_STAGE_IC_DISCONNECTED: 184 case ISER_CONN_STAGE_IC_FREED: 185 case ISER_CONN_STAGE_CLOSING: 186 case ISER_CONN_STAGE_CLOSED: 187 break; 188 189 default: 190 idm_conn_event(iser_conn->ic_idmc, 191 CE_TRANSPORT_FAIL, idm_status); 192 iser_conn->ic_stage = ISER_CONN_STAGE_CLOSING; 193 } 194 mutex_exit(&iser_conn->ic_lock); 195 196 /* Move onto the next completion */ 197 continue; 198 } 199 200 /* 201 * For a success status, just invoke the PDU or 202 * buffer completion. We use our WR handle's 203 * "iw_type" here so that we can properly process 204 * because the CQE's opcode is invalid if the status 205 * is failed. 206 */ 207 switch (wr->iw_type) { 208 case ISER_WR_SEND: 209 /* Free the msg handle */ 210 ASSERT(wr->iw_msg != NULL); 211 iser_msg_free(wr->iw_msg); 212 213 if (wr->iw_pdu == NULL) { 214 /* This is a hello exchange message */ 215 mutex_enter(&iser_conn->ic_lock); 216 if (iser_conn->ic_stage == 217 ISER_CONN_STAGE_HELLOREPLY_SENT) { 218 /* 219 * We're on the target side, 220 * and have just successfully 221 * sent the HelloReply msg. 222 */ 223 iser_conn->ic_stage = 224 ISER_CONN_STAGE_LOGGED_IN; 225 } 226 mutex_exit(&iser_conn->ic_lock); 227 } else { 228 /* This is a normal control message */ 229 idm_pdu_complete(wr->iw_pdu, idm_status); 230 } 231 232 /* Free the wr handle */ 233 iser_wr_free(wr); 234 235 break; 236 237 case ISER_WR_RDMAW: 238 case ISER_WR_RDMAR: 239 /* 240 * Invoke the appropriate callback; 241 * the buffer will be freed there. 242 */ 243 idb = wr->iw_buf; 244 #ifdef DEBUG 245 bcopy(&wc[i], 246 &((iser_buf_t *)idb->idb_buf_private)->buf_wc, 247 sizeof (ibt_wc_t)); 248 #endif 249 idt = idb->idb_task_binding; 250 251 mutex_enter(&idt->idt_mutex); 252 if (wr->iw_type == ISER_WR_RDMAW) { 253 idm_buf_tx_to_ini_done(idt, idb, idm_status); 254 } else { 255 idm_buf_rx_from_ini_done(idt, idb, idm_status); 256 } 257 258 /* Free the wr handle */ 259 iser_wr_free(wr); 260 261 break; 262 263 default: 264 ASSERT(0); 265 break; 266 } 267 } 268 269 return (status); 270 } 271 272 void 273 iser_ib_recvcq_handler(ibt_cq_hdl_t cq_hdl, void *arg) 274 { 275 iser_chan_t *iser_chan; 276 ibt_status_t status; 277 278 iser_chan = (iser_chan_t *)arg; 279 280 /* Poll completions until the CQ is empty */ 281 do { 282 status = iser_ib_poll_recv_completions(cq_hdl, iser_chan); 283 } while (status != IBT_CQ_EMPTY); 284 285 /* We've emptied the CQ, rearm it before we're done here */ 286 status = ibt_enable_cq_notify(cq_hdl, IBT_NEXT_COMPLETION); 287 if (status != IBT_SUCCESS) { 288 /* Unexpected error */ 289 ISER_LOG(CE_NOTE, "iser_ib_recvcq_handler: " 290 "ibt_enable_cq_notify error (%d)", status); 291 return; 292 } 293 294 /* Now, check for more completions after the rearm */ 295 do { 296 status = iser_ib_poll_recv_completions(cq_hdl, iser_chan); 297 } while (status != IBT_CQ_EMPTY); 298 } 299 300 static int 301 iser_ib_poll_recv_completions(ibt_cq_hdl_t cq_hdl, iser_chan_t *iser_chan) 302 { 303 ibt_wc_t wc; 304 iser_msg_t *msg; 305 iser_qp_t *iser_qp; 306 int status; 307 308 iser_qp = &(iser_chan->ic_qp); 309 310 bzero(&wc, sizeof (ibt_wc_t)); 311 status = ibt_poll_cq(cq_hdl, &wc, 1, NULL); 312 if (status == IBT_CQ_EMPTY) { 313 /* CQ is empty, return */ 314 return (status); 315 } 316 317 if (status != IBT_SUCCESS) { 318 /* Unexpected error */ 319 ISER_LOG(CE_NOTE, "iser_ib_poll_recv_completions: " 320 "ibt_poll_cq error (%d)", status); 321 mutex_enter(&iser_qp->qp_lock); 322 iser_qp->rq_level--; 323 mutex_exit(&iser_qp->qp_lock); 324 /* Free the msg handle (if we got it back) */ 325 if ((msg = (iser_msg_t *)(uintptr_t)wc.wc_id) != NULL) { 326 iser_msg_free(msg); 327 } 328 return (status); 329 } 330 331 /* Retrieve the iSER msg handle */ 332 msg = (iser_msg_t *)(uintptr_t)wc.wc_id; 333 ASSERT(msg != NULL); 334 335 /* 336 * Decrement the posted level in the RQ, then check 337 * to see if we need to fill the RQ back up (or if 338 * we are already on the taskq). 339 */ 340 mutex_enter(&iser_chan->ic_conn->ic_lock); 341 mutex_enter(&iser_qp->qp_lock); 342 iser_qp->rq_level--; 343 344 if ((iser_qp->rq_taskqpending == B_FALSE) && 345 (iser_qp->rq_level <= iser_qp->rq_lwm)) { 346 /* Set the pending flag and fire off a post_recv */ 347 iser_qp->rq_taskqpending = B_TRUE; 348 mutex_exit(&iser_qp->qp_lock); 349 350 status = iser_ib_post_recv_async(iser_chan->ic_chanhdl); 351 352 if (status != DDI_SUCCESS) { 353 ISER_LOG(CE_NOTE, "iser_ib_poll_recv_completions: " 354 "task dispatch failed"); 355 /* Failure to launch, unset the pending flag */ 356 mutex_enter(&iser_qp->qp_lock); 357 iser_qp->rq_taskqpending = B_FALSE; 358 mutex_exit(&iser_qp->qp_lock); 359 } 360 } else { 361 mutex_exit(&iser_qp->qp_lock); 362 } 363 mutex_exit(&iser_chan->ic_conn->ic_lock); 364 365 DTRACE_PROBE3(iser__recv__cqe, iser_chan_t *, iser_chan, 366 ibt_wc_t *, &wc, ibt_wc_status_t, wc.wc_status); 367 if (wc.wc_status != IBT_WC_SUCCESS) { 368 /* 369 * Tell IDM that the channel has gone down, 370 * unless he already knows. 371 */ 372 mutex_enter(&iser_chan->ic_conn->ic_lock); 373 switch (iser_chan->ic_conn->ic_stage) { 374 case ISER_CONN_STAGE_IC_DISCONNECTED: 375 case ISER_CONN_STAGE_IC_FREED: 376 case ISER_CONN_STAGE_CLOSING: 377 case ISER_CONN_STAGE_CLOSED: 378 break; 379 380 default: 381 idm_conn_event(iser_chan->ic_conn->ic_idmc, 382 CE_TRANSPORT_FAIL, IDM_STATUS_FAIL); 383 iser_chan->ic_conn->ic_stage = 384 ISER_CONN_STAGE_CLOSING; 385 } 386 mutex_exit(&iser_chan->ic_conn->ic_lock); 387 388 iser_msg_free(msg); 389 return (DDI_SUCCESS); 390 } else { 391 /* 392 * We have an iSER message in, let's handle it. 393 * We will free the iser_msg_t later in this path, 394 * depending upon the action required. 395 */ 396 iser_msg_handle(iser_chan, msg); 397 return (DDI_SUCCESS); 398 } 399 } 400 401 static void 402 iser_msg_handle(iser_chan_t *chan, iser_msg_t *msg) 403 { 404 int opcode; 405 iser_ctrl_hdr_t *hdr = NULL; 406 iser_conn_t *iser_conn = chan->ic_conn; 407 int status; 408 409 hdr = (iser_ctrl_hdr_t *)(uintptr_t)msg->msg_ds.ds_va; 410 ASSERT(hdr != NULL); 411 412 opcode = hdr->opcode; 413 if (opcode == ISER_OPCODE_CTRL_TYPE_PDU) { 414 /* 415 * Handle an iSCSI Control PDU iSER message. 416 * Note we'll free the msg handle in the PDU callback. 417 */ 418 status = iser_iscsihdr_handle(chan, msg); 419 if (status != DDI_SUCCESS) { 420 /* 421 * We are unable to handle this message, and 422 * have no way to recover from this. Fail the 423 * transport. 424 */ 425 ISER_LOG(CE_NOTE, "iser_msg_handle: failed " 426 "iser_iscsihdr_handle"); 427 iser_msg_free(msg); 428 idm_conn_event(iser_conn->ic_idmc, 429 CE_TRANSPORT_FAIL, IDM_STATUS_FAIL); 430 } 431 } else if (opcode == ISER_OPCODE_HELLO_MSG) { /* at the target */ 432 /* 433 * We are currently not supporting Hello Exchange, 434 * since OFED iSER does not. May be revisited. 435 */ 436 ASSERT(opcode != ISER_OPCODE_HELLO_MSG); 437 438 if (iser_conn->ic_type != ISER_CONN_TYPE_TGT) { 439 idm_conn_event(iser_conn->ic_idmc, 440 CE_TRANSPORT_FAIL, IDM_STATUS_FAIL); 441 } 442 443 iser_hello_hdr_t *hello_hdr = (iser_hello_hdr_t *)hdr; 444 445 ISER_LOG(CE_NOTE, "received Hello message: opcode[%d], " 446 "maxver[%d], minver[%d], iser_ird[%d], msg (0x%p)", 447 hello_hdr->opcode, hello_hdr->maxver, hello_hdr->minver, 448 ntohs(hello_hdr->iser_ird), (void *)msg); 449 450 mutex_enter(&iser_conn->ic_lock); 451 452 if (iser_conn->ic_stage != ISER_CONN_STAGE_HELLO_WAIT) { 453 /* target is not expected to receive a Hello */ 454 idm_conn_event(iser_conn->ic_idmc, 455 CE_TRANSPORT_FAIL, IDM_STATUS_FAIL); 456 } 457 458 iser_conn->ic_stage = ISER_CONN_STAGE_HELLOREPLY_SENT; 459 mutex_exit(&iser_conn->ic_lock); 460 461 /* Prepare and send a HelloReply message */ 462 status = iser_xfer_helloreply_msg(chan); 463 if (status != ISER_STATUS_SUCCESS) { 464 465 mutex_enter(&iser_conn->ic_lock); 466 iser_conn->ic_stage = 467 ISER_CONN_STAGE_HELLOREPLY_SENT_FAIL; 468 mutex_exit(&iser_conn->ic_lock); 469 470 idm_conn_event(iser_conn->ic_idmc, 471 CE_TRANSPORT_FAIL, status); 472 } 473 474 /* Free this msg handle */ 475 iser_msg_free(msg); 476 477 } else if (opcode == ISER_OPCODE_HELLOREPLY_MSG) { /* at initiator */ 478 479 /* 480 * We are currently not supporting Hello Exchange, 481 * since OFED iSER does not. May be revisited. 482 */ 483 ASSERT(opcode != ISER_OPCODE_HELLOREPLY_MSG); 484 485 if (iser_conn->ic_type != ISER_CONN_TYPE_INI) { 486 idm_conn_event(iser_conn->ic_idmc, 487 CE_TRANSPORT_FAIL, status); 488 } 489 490 iser_helloreply_hdr_t *hello_hdr = (iser_helloreply_hdr_t *)hdr; 491 492 ISER_LOG(CE_NOTE, "received Hello Reply message: opcode[%d], " 493 "maxver[%d], curver[%d], iser_ord[%d], msg (0x%p)", 494 hello_hdr->opcode, hello_hdr->maxver, hello_hdr->curver, 495 ntohs(hello_hdr->iser_ord), (void *)msg); 496 497 /* Free this msg handle */ 498 iser_msg_free(msg); 499 500 /* 501 * Signal the receipt of HelloReply to the waiting thread 502 * so that the initiator can proceed to the Full Feature 503 * Phase. 504 */ 505 mutex_enter(&iser_conn->ic_lock); 506 iser_conn->ic_stage = ISER_CONN_STAGE_HELLOREPLY_RCV; 507 cv_signal(&iser_conn->ic_stage_cv); 508 mutex_exit(&iser_conn->ic_lock); 509 } else { 510 /* Protocol error: free the msg handle and fail the session */ 511 ISER_LOG(CE_NOTE, "iser_msg_handle: unsupported opcode (0x%x): " 512 "terminating session on IDM handle (0x%p)", opcode, 513 (void *) iser_conn->ic_idmc); 514 515 iser_msg_free(msg); 516 idm_conn_event(iser_conn->ic_idmc, CE_TRANSPORT_FAIL, 517 IDM_STATUS_FAIL); 518 } 519 } 520 521 #define IDM_PDU_OPCODE(PDU) \ 522 ((PDU)->isp_hdr->opcode & ISCSI_OPCODE_MASK) 523 524 /* network to host translation for 24b integers */ 525 static uint32_t 526 n2h24(uchar_t *ptr) 527 { 528 return ((ptr[0] << 16) | (ptr[1] << 8) | ptr[2]); 529 } 530 531 /* ARGSUSED */ 532 static void 533 iser_rx_pdu_cb(idm_pdu_t *pdu, idm_status_t status) 534 { 535 /* Free the iser msg handle and the PDU handle */ 536 iser_msg_free((iser_msg_t *)pdu->isp_transport_private); 537 idm_pdu_free(pdu); 538 } 539 540 int 541 iser_iscsihdr_handle(iser_chan_t *chan, iser_msg_t *msg) 542 { 543 idm_pdu_t *pdu; 544 uint8_t *iser_hdrp; 545 uint8_t *iscsi_hdrp; 546 iscsi_hdr_t *bhs; 547 548 pdu = idm_pdu_alloc_nosleep(sizeof (iscsi_hdr_t), 0); 549 pdu->isp_ic = chan->ic_conn->ic_idmc; 550 ASSERT(pdu->isp_ic != NULL); 551 552 /* Set the iser_msg handle into the transport-private field */ 553 pdu->isp_transport_private = (void *)msg; 554 555 /* Set up a pointer in the pdu handle to the iSER header */ 556 iser_hdrp = (uint8_t *)(uintptr_t)msg->msg_ds.ds_va; 557 if (iser_hdrp == NULL) { 558 ISER_LOG(CE_NOTE, "iser_iscsihdr_handle: iser_hdrp is NULL"); 559 idm_pdu_free(pdu); 560 return (ISER_STATUS_FAIL); 561 } 562 pdu->isp_transport_hdr = (void *)iser_hdrp; 563 pdu->isp_transport_hdrlen = ISER_HEADER_LENGTH; 564 565 /* 566 * Set up a pointer to the iSCSI header, which is directly 567 * after the iSER header in the message. 568 */ 569 iscsi_hdrp = ((uint8_t *)(uintptr_t)msg->msg_ds.ds_va) + 570 ISER_HEADER_LENGTH; 571 if (iscsi_hdrp == NULL) { 572 ISER_LOG(CE_NOTE, "iser_iscsihdr_handle: iscsi_hdrp is NULL"); 573 idm_pdu_free(pdu); 574 return (ISER_STATUS_FAIL); 575 } 576 pdu->isp_hdr = (iscsi_hdr_t *)(uintptr_t)iscsi_hdrp; 577 578 /* Fill in the BHS */ 579 bhs = pdu->isp_hdr; 580 pdu->isp_hdrlen = sizeof (iscsi_hdr_t) + 581 (bhs->hlength * sizeof (uint32_t)); 582 pdu->isp_datalen = n2h24(bhs->dlength); 583 pdu->isp_callback = iser_rx_pdu_cb; 584 585 /* 586 * If datalen > 0, then non-scsi data may be present. Allocate 587 * space in the PDU handle and set a pointer to the data. 588 */ 589 if (pdu->isp_datalen) { 590 pdu->isp_data = ((uint8_t *)(uintptr_t)pdu->isp_hdr) + 591 pdu->isp_hdrlen; 592 } 593 594 /* Process RX PDU */ 595 idm_pdu_rx(pdu->isp_ic, pdu); 596 597 return (DDI_SUCCESS); 598 } 599