1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* 26 * Copyright (c) 2005 SilverStorm Technologies, Inc. All rights reserved. 27 * 28 * This software is available to you under a choice of one of two 29 * licenses. You may choose to be licensed under the terms of the GNU 30 * General Public License (GPL) Version 2, available from the file 31 * COPYING in the main directory of this source tree, or the 32 * OpenIB.org BSD license below: 33 * 34 * Redistribution and use in source and binary forms, with or 35 * without modification, are permitted provided that the following 36 * conditions are met: 37 * 38 * - Redistributions of source code must retain the above 39 * copyright notice, this list of conditions and the following 40 * disclaimer. 41 * 42 * - Redistributions in binary form must reproduce the above 43 * copyright notice, this list of conditions and the following 44 * disclaimer in the documentation and/or other materials 45 * provided with the distribution. 46 * 47 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 48 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 49 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 50 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 51 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 52 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 53 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 54 * SOFTWARE. 55 * 56 */ 57 /* 58 * Sun elects to include this software in Sun product 59 * under the OpenIB BSD license. 60 * 61 * 62 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 63 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 64 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 65 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 66 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 67 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 68 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 69 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 70 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 71 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 72 * POSSIBILITY OF SUCH DAMAGE. 73 */ 74 75 #pragma ident "%Z%%M% %I% %E% SMI" 76 77 #include <sys/ib/clients/rds/rdsib_cm.h> 78 #include <sys/ib/clients/rds/rdsib_ib.h> 79 #include <sys/ib/clients/rds/rdsib_buf.h> 80 #include <sys/ib/clients/rds/rdsib_ep.h> 81 82 /* 83 * This file contains CM related work: 84 * 85 * Service registration/deregistration 86 * Path lookup 87 * CM connection callbacks 88 * CM active and passive connection establishment 89 * Connection failover 90 */ 91 92 /* 93 * Handle an incoming CM REQ 94 */ 95 /* ARGSUSED */ 96 static ibt_cm_status_t 97 rds_handle_cm_req(rds_state_t *statep, ibt_cm_event_t *evp, 98 ibt_cm_return_args_t *rargsp, void *rcmp, ibt_priv_data_len_t rcmp_len) 99 { 100 ibt_cm_req_rcv_t *reqp; 101 ib_gid_t lgid, rgid; 102 rds_cm_private_data_t cmp; 103 rds_session_t *sp; 104 rds_ep_t *ep; 105 ibt_channel_hdl_t chanhdl; 106 int ret; 107 108 RDS_DPRINTF2("rds_handle_cm_req", "Enter"); 109 110 reqp = &evp->cm_event.req; 111 rgid = reqp->req_prim_addr.av_dgid; /* requester gid */ 112 lgid = reqp->req_prim_addr.av_sgid; /* receiver gid */ 113 114 RDS_DPRINTF2(LABEL, "REQ Received: From: %llx:%llx To: %llx:%llx", 115 rgid.gid_prefix, rgid.gid_guid, lgid.gid_prefix, lgid.gid_guid); 116 117 /* validate service id */ 118 if (reqp->req_service_id == RDS_SERVICE_ID) { 119 RDS_DPRINTF0(LABEL, "Version Mismatch: Remote system " 120 "(GUID: 0x%llx) is running an older version of RDS", 121 rgid.gid_guid); 122 return (IBT_CM_REJECT); 123 } 124 125 /* 126 * CM private data brings IP information 127 * Private data received is a stream of bytes and may not be properly 128 * aligned. So, bcopy the data onto the stack before accessing it. 129 */ 130 bcopy((uint8_t *)evp->cm_priv_data, &cmp, 131 sizeof (rds_cm_private_data_t)); 132 133 RDS_DPRINTF2(LABEL, "REQ Received: From IP: 0x%x To IP: 0x%x type: %d", 134 cmp.cmp_localip, cmp.cmp_remip, cmp.cmp_eptype); 135 136 if (cmp.cmp_version != RDS_VERSION) { 137 RDS_DPRINTF0(LABEL, "Version Mismatch: Local version: %d " 138 "Remote version: %d", RDS_VERSION, cmp.cmp_version); 139 return (IBT_CM_REJECT); 140 } 141 142 if (cmp.cmp_arch != RDS_THIS_ARCH) { 143 RDS_DPRINTF2(LABEL, "ARCH does not match (%d != %d)", 144 cmp.cmp_arch, RDS_THIS_ARCH); 145 return (IBT_CM_REJECT); 146 } 147 148 if ((cmp.cmp_eptype != RDS_EP_TYPE_CTRL) && 149 (cmp.cmp_eptype != RDS_EP_TYPE_DATA)) { 150 RDS_DPRINTF2(LABEL, "Unknown Channel type: %d", cmp.cmp_eptype); 151 return (IBT_CM_REJECT); 152 } 153 154 /* user_buffer_size should be same on all nodes */ 155 if (cmp.cmp_user_buffer_size != UserBufferSize) { 156 RDS_DPRINTF2(LABEL, 157 "UserBufferSize Mismatch, this node: %d remote node: %d", 158 UserBufferSize, cmp.cmp_user_buffer_size); 159 return (IBT_CM_REJECT); 160 } 161 162 /* 163 * RDS needs more time to process a failover REQ so send an MRA. 164 * Otherwise, the remote may retry the REQ and fail the connection. 165 */ 166 if ((cmp.cmp_failover) && (cmp.cmp_eptype == RDS_EP_TYPE_DATA)) { 167 RDS_DPRINTF2("rds_handle_cm_req", "Session Failover, send MRA"); 168 (void) ibt_cm_delay(IBT_CM_DELAY_REQ, evp->cm_session_id, 169 10000000 /* 10 sec */, NULL, 0); 170 } 171 172 /* Is there a session to the destination node? */ 173 rw_enter(&statep->rds_sessionlock, RW_READER); 174 sp = rds_session_lkup(statep, cmp.cmp_localip, rgid.gid_guid); 175 rw_exit(&statep->rds_sessionlock); 176 177 if (sp == NULL) { 178 /* 179 * currently there is no session to the destination 180 * remote ip in the private data is the local ip and vice 181 * versa 182 */ 183 sp = rds_session_create(statep, cmp.cmp_remip, cmp.cmp_localip, 184 reqp, RDS_SESSION_PASSIVE); 185 if (sp == NULL) { 186 /* Check the list anyway. */ 187 rw_enter(&statep->rds_sessionlock, RW_READER); 188 sp = rds_session_lkup(statep, cmp.cmp_localip, 189 rgid.gid_guid); 190 rw_exit(&statep->rds_sessionlock); 191 if (sp == NULL) { 192 /* 193 * The only way this can fail is due to lack 194 * of kernel resources 195 */ 196 return (IBT_CM_REJECT); 197 } 198 } 199 } 200 201 rw_enter(&sp->session_lock, RW_WRITER); 202 203 /* catch peer-to-peer case as soon as possible */ 204 if ((sp->session_state == RDS_SESSION_STATE_CREATED) || 205 (sp->session_state == RDS_SESSION_STATE_INIT)) { 206 /* Check possible peer-to-peer case here */ 207 if (sp->session_type != RDS_SESSION_PASSIVE) { 208 RDS_DPRINTF2("rds_handle_cm_req", 209 "SP(%p) Peer-peer connection handling", sp); 210 if (lgid.gid_guid > rgid.gid_guid) { 211 /* this node is active so reject this request */ 212 rw_exit(&sp->session_lock); 213 return (IBT_CM_REJECT); 214 } else { 215 /* this node is passive, change the session */ 216 sp->session_type = RDS_SESSION_PASSIVE; 217 sp->session_lgid = lgid; 218 sp->session_rgid = rgid; 219 } 220 } 221 } 222 223 RDS_DPRINTF2(LABEL, "SP(%p) state: %d", sp, sp->session_state); 224 225 switch (sp->session_state) { 226 case RDS_SESSION_STATE_CONNECTED: 227 RDS_DPRINTF2(LABEL, "STALE Session Detected SP(%p)", sp); 228 sp->session_state = RDS_SESSION_STATE_ERROR; 229 RDS_DPRINTF3("rds_handle_cm_req", "SP(%p) State " 230 "RDS_SESSION_STATE_ERROR", sp); 231 232 /* FALLTHRU */ 233 case RDS_SESSION_STATE_ERROR: 234 case RDS_SESSION_STATE_PASSIVE_CLOSING: 235 sp->session_type = RDS_SESSION_PASSIVE; 236 rw_exit(&sp->session_lock); 237 238 rds_session_close(sp, IBT_NOCALLBACKS, 1); 239 240 /* move the session to init state */ 241 rw_enter(&sp->session_lock, RW_WRITER); 242 ret = rds_session_reinit(sp, lgid); 243 sp->session_myip = cmp.cmp_remip; 244 sp->session_lgid = lgid; 245 sp->session_rgid = rgid; 246 if (ret != 0) { 247 rds_session_fini(sp); 248 sp->session_state = RDS_SESSION_STATE_FAILED; 249 RDS_DPRINTF3("rds_handle_cm_req", "SP(%p) State " 250 "RDS_SESSION_STATE_FAILED", sp); 251 rw_exit(&sp->session_lock); 252 return (IBT_CM_REJECT); 253 } else { 254 sp->session_state = RDS_SESSION_STATE_INIT; 255 RDS_DPRINTF3("rds_handle_cm_req", "SP(%p) State " 256 "RDS_SESSION_STATE_INIT", sp); 257 } 258 259 if (cmp.cmp_eptype == RDS_EP_TYPE_CTRL) { 260 ep = &sp->session_ctrlep; 261 } else { 262 ep = &sp->session_dataep; 263 } 264 break; 265 case RDS_SESSION_STATE_CREATED: 266 case RDS_SESSION_STATE_FAILED: 267 case RDS_SESSION_STATE_FINI: 268 /* 269 * Initialize both channels, we accept this connection 270 * only if both channels are initialized 271 */ 272 sp->session_type = RDS_SESSION_PASSIVE; 273 sp->session_lgid = lgid; 274 sp->session_rgid = rgid; 275 sp->session_state = RDS_SESSION_STATE_CREATED; 276 RDS_DPRINTF3("rds_handle_cm_req", "SP(%p) State " 277 "RDS_SESSION_STATE_CREATED", sp); 278 ret = rds_session_init(sp); 279 if (ret != 0) { 280 /* Seems like there are not enough resources */ 281 sp->session_state = RDS_SESSION_STATE_FAILED; 282 RDS_DPRINTF3("rds_handle_cm_req", "SP(%p) State " 283 "RDS_SESSION_STATE_FAILED", sp); 284 rw_exit(&sp->session_lock); 285 return (IBT_CM_REJECT); 286 } 287 sp->session_state = RDS_SESSION_STATE_INIT; 288 RDS_DPRINTF3("rds_handle_cm_req", "SP(%p) State " 289 "RDS_SESSION_STATE_INIT", sp); 290 291 /* FALLTHRU */ 292 case RDS_SESSION_STATE_INIT: 293 /* 294 * When re-using an existing session, make sure the 295 * session is still through the same HCA. Otherwise, the 296 * memory registrations have to moved to the new HCA. 297 */ 298 if (cmp.cmp_eptype == RDS_EP_TYPE_DATA) { 299 if (sp->session_lgid.gid_guid != lgid.gid_guid) { 300 RDS_DPRINTF2("rds_handle_cm_req", 301 "Existing Session but different gid " 302 "existing: 0x%llx, new: 0x%llx, " 303 "sending an MRA", 304 sp->session_lgid.gid_guid, lgid.gid_guid); 305 (void) ibt_cm_delay(IBT_CM_DELAY_REQ, 306 evp->cm_session_id, 10000000 /* 10 sec */, 307 NULL, 0); 308 ret = rds_session_reinit(sp, lgid); 309 if (ret != 0) { 310 rds_session_fini(sp); 311 sp->session_state = 312 RDS_SESSION_STATE_FAILED; 313 sp->session_failover = 0; 314 RDS_DPRINTF3("rds_failover_session", 315 "SP(%p) State " 316 "RDS_SESSION_STATE_FAILED", sp); 317 rw_exit(&sp->session_lock); 318 return (IBT_CM_REJECT); 319 } 320 } 321 ep = &sp->session_dataep; 322 } else { 323 ep = &sp->session_ctrlep; 324 } 325 326 break; 327 default: 328 RDS_DPRINTF2(LABEL, "ERROR: SP(%p) is in an unexpected " 329 "state: %d", sp, sp->session_state); 330 rw_exit(&sp->session_lock); 331 return (IBT_CM_REJECT); 332 } 333 334 sp->session_failover = 0; /* reset any previous value */ 335 if (cmp.cmp_failover) { 336 RDS_DPRINTF2("rds_handle_cm_req", 337 "SP(%p) Failover Session (BP %p)", sp, cmp.cmp_last_bufid); 338 sp->session_failover = 1; 339 } 340 341 mutex_enter(&ep->ep_lock); 342 if (ep->ep_state == RDS_EP_STATE_UNCONNECTED) { 343 ep->ep_state = RDS_EP_STATE_PASSIVE_PENDING; 344 sp->session_type = RDS_SESSION_PASSIVE; 345 rw_exit(&sp->session_lock); 346 } else if (ep->ep_state == RDS_EP_STATE_ACTIVE_PENDING) { 347 rw_exit(&sp->session_lock); 348 /* 349 * Peer to peer connection. There is an active 350 * connection pending on this ep. The one with 351 * greater port guid becomes active and the 352 * other becomes passive. 353 */ 354 RDS_DPRINTF2("rds_handle_cm_req", 355 "EP(%p) Peer-peer connection handling", ep); 356 if (lgid.gid_guid > rgid.gid_guid) { 357 /* this node is active so reject this request */ 358 mutex_exit(&ep->ep_lock); 359 RDS_DPRINTF2(LABEL, "SP(%p) EP(%p): " 360 "Rejecting passive in favor of active", sp, ep); 361 return (IBT_CM_REJECT); 362 } else { 363 /* 364 * This session is not the active end, change it 365 * to passive end. 366 */ 367 ASSERT(sp->session_type == RDS_SESSION_ACTIVE); 368 ep->ep_state = RDS_EP_STATE_PASSIVE_PENDING; 369 370 rw_enter(&sp->session_lock, RW_WRITER); 371 sp->session_type = RDS_SESSION_PASSIVE; 372 sp->session_lgid = lgid; 373 sp->session_rgid = rgid; 374 rw_exit(&sp->session_lock); 375 } 376 } else { 377 rw_exit(&sp->session_lock); 378 } 379 380 ep->ep_lbufid = cmp.cmp_last_bufid; 381 ep->ep_ackwr.wr.rc.rcwr.rdma.rdma_raddr = (ib_vaddr_t)cmp.cmp_ack_addr; 382 ep->ep_ackwr.wr.rc.rcwr.rdma.rdma_rkey = cmp.cmp_ack_rkey; 383 cmp.cmp_last_bufid = ep->ep_rbufid; 384 cmp.cmp_ack_addr = ep->ep_ack_addr; 385 cmp.cmp_ack_rkey = ep->ep_ack_rkey; 386 mutex_exit(&ep->ep_lock); 387 388 /* continue with accepting the connection request for this channel */ 389 chanhdl = rds_ep_alloc_rc_channel(ep, reqp->req_prim_hca_port); 390 if (chanhdl == NULL) { 391 mutex_enter(&ep->ep_lock); 392 ep->ep_state = RDS_EP_STATE_UNCONNECTED; 393 mutex_exit(&ep->ep_lock); 394 return (IBT_CM_REJECT); 395 } 396 397 /* pre-post recv buffers in the RQ */ 398 rds_post_recv_buf((void *)chanhdl); 399 400 rargsp->cm_ret_len = sizeof (rds_cm_private_data_t); 401 bcopy((uint8_t *)&cmp, rcmp, sizeof (rds_cm_private_data_t)); 402 rargsp->cm_ret.rep.cm_channel = chanhdl; 403 rargsp->cm_ret.rep.cm_rdma_ra_out = 4; 404 rargsp->cm_ret.rep.cm_rdma_ra_in = 4; 405 rargsp->cm_ret.rep.cm_rnr_retry_cnt = MinRnrRetry; 406 407 RDS_DPRINTF2("rds_handle_cm_req", "Return: SP(%p) EP(%p) Chan (%p)", 408 sp, ep, chanhdl); 409 410 return (IBT_CM_ACCEPT); 411 } 412 413 /* 414 * Handle an incoming CM REP 415 * Pre-post recv buffers for the QP 416 */ 417 /* ARGSUSED */ 418 static ibt_cm_status_t 419 rds_handle_cm_rep(ibt_cm_event_t *evp, ibt_cm_return_args_t *rargsp, 420 void *rcmp, ibt_priv_data_len_t rcmp_len) 421 { 422 rds_ep_t *ep; 423 rds_cm_private_data_t cmp; 424 425 RDS_DPRINTF2("rds_handle_cm_rep", "Enter"); 426 427 /* pre-post recv buffers in the RQ */ 428 rds_post_recv_buf((void *)evp->cm_channel); 429 430 ep = (rds_ep_t *)ibt_get_chan_private(evp->cm_channel); 431 bcopy((uint8_t *)evp->cm_priv_data, &cmp, 432 sizeof (rds_cm_private_data_t)); 433 ep->ep_lbufid = cmp.cmp_last_bufid; 434 ep->ep_ackwr.wr.rc.rcwr.rdma.rdma_raddr = (ib_vaddr_t)cmp.cmp_ack_addr; 435 ep->ep_ackwr.wr.rc.rcwr.rdma.rdma_rkey = cmp.cmp_ack_rkey; 436 437 rargsp->cm_ret_len = 0; 438 439 RDS_DPRINTF2("rds_handle_cm_rep", "Return: lbufid: %p", ep->ep_lbufid); 440 441 return (IBT_CM_ACCEPT); 442 } 443 444 /* 445 * Handle CONN EST 446 */ 447 static ibt_cm_status_t 448 rds_handle_cm_conn_est(ibt_cm_event_t *evp) 449 { 450 rds_session_t *sp; 451 rds_ep_t *ep; 452 453 ep = (rds_ep_t *)ibt_get_chan_private(evp->cm_channel); 454 455 RDS_DPRINTF2("rds_handle_cm_conn_est", "EP(%p) State: %d", ep, 456 ep->ep_state); 457 458 mutex_enter(&ep->ep_lock); 459 ASSERT((ep->ep_state == RDS_EP_STATE_ACTIVE_PENDING) || 460 (ep->ep_state == RDS_EP_STATE_PASSIVE_PENDING)); 461 ep->ep_state = RDS_EP_STATE_CONNECTED; 462 ep->ep_chanhdl = evp->cm_channel; 463 sp = ep->ep_sp; 464 mutex_exit(&ep->ep_lock); 465 466 (void) rds_session_active(sp); 467 468 RDS_DPRINTF2("rds_handle_cm_conn_est", "Return"); 469 return (IBT_CM_ACCEPT); 470 } 471 472 /* 473 * Handle CONN CLOSED 474 */ 475 static ibt_cm_status_t 476 rds_handle_cm_conn_closed(ibt_cm_event_t *evp) 477 { 478 rds_ep_t *ep; 479 rds_session_t *sp; 480 481 /* Catch DREQs but ignore DREPs */ 482 if (evp->cm_event.closed != IBT_CM_CLOSED_DREQ_RCVD) { 483 RDS_DPRINTF2("rds_handle_cm_conn_closed", 484 "Ignoring Event: %d received", evp->cm_event.closed); 485 return (IBT_CM_ACCEPT); 486 } 487 488 ep = (rds_ep_t *)ibt_get_chan_private(evp->cm_channel); 489 sp = ep->ep_sp; 490 RDS_DPRINTF2("rds_handle_cm_conn_closed", "EP(%p) Enter", ep); 491 492 mutex_enter(&ep->ep_lock); 493 if (ep->ep_state != RDS_EP_STATE_CONNECTED) { 494 /* Ignore this DREQ */ 495 RDS_DPRINTF2("rds_handle_cm_conn_closed", 496 "EP(%p) not connected, state: %d", ep, ep->ep_state); 497 mutex_exit(&ep->ep_lock); 498 return (IBT_CM_ACCEPT); 499 } 500 ep->ep_state = RDS_EP_STATE_CLOSING; 501 mutex_exit(&ep->ep_lock); 502 503 rw_enter(&sp->session_lock, RW_WRITER); 504 RDS_DPRINTF2("rds_handle_cm_conn_closed", "SP(%p) - state: %d", sp, 505 sp->session_state); 506 507 switch (sp->session_state) { 508 case RDS_SESSION_STATE_CONNECTED: 509 sp->session_state = RDS_SESSION_STATE_PASSIVE_CLOSING; 510 RDS_DPRINTF3("rds_handle_cm_conn_closed", "SP(%p) State " 511 "RDS_SESSION_STATE_PASSIVE_CLOSING", sp); 512 break; 513 514 case RDS_SESSION_STATE_PASSIVE_CLOSING: 515 sp->session_state = RDS_SESSION_STATE_CLOSED; 516 RDS_DPRINTF3("rds_handle_cm_conn_closed", "SP(%p) State " 517 "RDS_SESSION_STATE_CLOSED", sp); 518 rds_passive_session_fini(sp); 519 sp->session_state = RDS_SESSION_STATE_FINI; 520 RDS_DPRINTF3("rds_handle_cm_conn_closed", 521 "SP(%p) State RDS_SESSION_STATE_FINI", sp); 522 break; 523 524 case RDS_SESSION_STATE_ACTIVE_CLOSING: 525 case RDS_SESSION_STATE_ERROR: 526 case RDS_SESSION_STATE_CLOSED: 527 break; 528 529 case RDS_SESSION_STATE_INIT: 530 sp->session_state = RDS_SESSION_STATE_ERROR; 531 RDS_DPRINTF3("rds_handle_cm_conn_closed", "SP(%p) State " 532 "RDS_SESSION_STATE_ERROR", sp); 533 rds_passive_session_fini(sp); 534 sp->session_state = RDS_SESSION_STATE_FAILED; 535 RDS_DPRINTF3("rds_handle_cm_conn_closed", 536 "SP(%p) State RDS_SESSION_STATE_FAILED", sp); 537 break; 538 539 default: 540 RDS_DPRINTF2("rds_handle_cm_conn_closed", 541 "SP(%p) - Unexpected state: %d", sp, sp->session_state); 542 rds_passive_session_fini(sp); 543 sp->session_state = RDS_SESSION_STATE_FAILED; 544 RDS_DPRINTF3("rds_handle_cm_conn_closed", "SP(%p) State " 545 "RDS_SESSION_STATE_FAILED", sp); 546 } 547 rw_exit(&sp->session_lock); 548 549 mutex_enter(&ep->ep_lock); 550 ep->ep_state = RDS_EP_STATE_CLOSED; 551 mutex_exit(&ep->ep_lock); 552 553 RDS_DPRINTF2("rds_handle_cm_conn_closed", "SP(%p) Return", sp); 554 return (IBT_CM_ACCEPT); 555 } 556 557 /* 558 * Handle EVENT FAILURE 559 */ 560 static ibt_cm_status_t 561 rds_handle_cm_event_failure(ibt_cm_event_t *evp) 562 { 563 rds_ep_t *ep; 564 rds_session_t *sp; 565 int ret; 566 567 RDS_DPRINTF2("rds_handle_cm_event_failure", "Enter: Chan hdl: 0x%p " 568 "Code: %d msg: %d reason: %d", evp->cm_channel, 569 evp->cm_event.failed.cf_code, evp->cm_event.failed.cf_msg, 570 evp->cm_event.failed.cf_reason); 571 572 if (evp->cm_event.failed.cf_reason == IBT_CM_INVALID_SID) { 573 RDS_DPRINTF0(LABEL, 574 "Received REJ with reason IBT_CM_INVALID_SID: " 575 "The remote system could be running an older RDS version"); 576 } 577 578 if (evp->cm_channel == NULL) { 579 return (IBT_CM_ACCEPT); 580 } 581 582 ep = (rds_ep_t *)ibt_get_chan_private(evp->cm_channel); 583 sp = ep->ep_sp; 584 585 mutex_enter(&ep->ep_lock); 586 ep->ep_state = RDS_EP_STATE_ERROR; 587 mutex_exit(&ep->ep_lock); 588 589 rw_enter(&sp->session_lock, RW_WRITER); 590 if (sp->session_type == RDS_SESSION_PASSIVE) { 591 RDS_DPRINTF2("rds_handle_cm_event_failure", 592 "SP(%p) - state: %d", sp, sp->session_state); 593 if ((sp->session_state == RDS_SESSION_STATE_INIT) || 594 (sp->session_state == RDS_SESSION_STATE_CONNECTED)) { 595 sp->session_state = RDS_SESSION_STATE_ERROR; 596 RDS_DPRINTF3("rds_handle_cm_event_failure", 597 "SP(%p) State RDS_SESSION_STATE_ERROR", sp); 598 599 /* 600 * Store the cm_channel for freeing later 601 * Active side frees it on ibt_open_rc_channel 602 * failure 603 */ 604 if (ep->ep_chanhdl == NULL) { 605 ep->ep_chanhdl = evp->cm_channel; 606 } 607 rw_exit(&sp->session_lock); 608 609 /* 610 * rds_passive_session_fini should not be called 611 * directly in the CM handler. It will cause a deadlock. 612 */ 613 ret = ddi_taskq_dispatch(rds_taskq, 614 rds_cleanup_passive_session, (void *)sp, 615 DDI_NOSLEEP); 616 if (ret != DDI_SUCCESS) { 617 RDS_DPRINTF1("rds_handle_cm_event_failure", 618 "SP(%p) TaskQ dispatch FAILED:%d", sp, ret); 619 } 620 return (IBT_CM_ACCEPT); 621 } 622 } 623 rw_exit(&sp->session_lock); 624 625 RDS_DPRINTF2("rds_handle_cm_event_failure", "SP(%p) Return", sp); 626 return (IBT_CM_ACCEPT); 627 } 628 629 /* 630 * CM Handler 631 * 632 * Called by IBCM 633 * The cm_private type differs for active and passive events. 634 */ 635 ibt_cm_status_t 636 rds_cm_handler(void *cm_private, ibt_cm_event_t *eventp, 637 ibt_cm_return_args_t *ret_args, void *ret_priv_data, 638 ibt_priv_data_len_t ret_len_max) 639 { 640 ibt_cm_status_t ret = IBT_CM_ACCEPT; 641 642 RDS_DPRINTF2("rds_cm_handler", "Enter: event: %d", eventp->cm_type); 643 644 switch (eventp->cm_type) { 645 case IBT_CM_EVENT_REQ_RCV: 646 ret = rds_handle_cm_req((rds_state_t *)cm_private, eventp, 647 ret_args, ret_priv_data, ret_len_max); 648 break; 649 case IBT_CM_EVENT_REP_RCV: 650 ret = rds_handle_cm_rep(eventp, ret_args, ret_priv_data, 651 ret_len_max); 652 break; 653 case IBT_CM_EVENT_MRA_RCV: 654 /* Not supported */ 655 break; 656 case IBT_CM_EVENT_CONN_EST: 657 ret = rds_handle_cm_conn_est(eventp); 658 break; 659 case IBT_CM_EVENT_CONN_CLOSED: 660 ret = rds_handle_cm_conn_closed(eventp); 661 break; 662 case IBT_CM_EVENT_FAILURE: 663 ret = rds_handle_cm_event_failure(eventp); 664 break; 665 case IBT_CM_EVENT_LAP_RCV: 666 /* Not supported */ 667 RDS_DPRINTF2(LABEL, "LAP message received"); 668 break; 669 case IBT_CM_EVENT_APR_RCV: 670 /* Not supported */ 671 RDS_DPRINTF2(LABEL, "APR message received"); 672 break; 673 default: 674 break; 675 } 676 677 RDS_DPRINTF2("rds_cm_handler", "Return"); 678 679 return (ret); 680 } 681 682 /* This is based on OFED Linux RDS */ 683 #define RDS_PORT_NUM 6556 684 685 /* 686 * Register the wellknown service with service id: RDS_SERVICE_ID 687 * Incoming connection requests should arrive on this service id. 688 */ 689 ibt_srv_hdl_t 690 rds_register_service(ibt_clnt_hdl_t rds_ibhdl) 691 { 692 ibt_srv_hdl_t srvhdl; 693 ibt_srv_desc_t srvdesc; 694 int ret; 695 696 RDS_DPRINTF2("rds_register_service", "Enter: 0x%p", rds_ibhdl); 697 698 bzero(&srvdesc, sizeof (ibt_srv_desc_t)); 699 srvdesc.sd_handler = rds_cm_handler; 700 srvdesc.sd_flags = IBT_SRV_NO_FLAGS; 701 702 /* 703 * Register the old service id for backward compatibility 704 * REQs received on this service id would be rejected 705 */ 706 ret = ibt_register_service(rds_ibhdl, &srvdesc, RDS_SERVICE_ID, 707 1, &rdsib_statep->rds_old_srvhdl, NULL); 708 if (ret != IBT_SUCCESS) { 709 RDS_DPRINTF2(LABEL, 710 "RDS Service (0x%llx) Registration Failed: %d", 711 RDS_SERVICE_ID, ret); 712 return (NULL); 713 } 714 715 /* 716 * This is the new service id as per: 717 * Annex A11: RDMA IP CM Service 718 */ 719 rdsib_statep->rds_service_id = ibt_get_ip_sid(IPPROTO_TCP, 720 RDS_PORT_NUM); 721 ret = ibt_register_service(rds_ibhdl, &srvdesc, 722 rdsib_statep->rds_service_id, 1, &srvhdl, NULL); 723 if (ret != IBT_SUCCESS) { 724 RDS_DPRINTF2(LABEL, 725 "RDS Service (0x%llx) Registration Failed: %d", 726 rdsib_statep->rds_service_id, ret); 727 return (NULL); 728 } 729 730 RDS_DPRINTF2("rds_register_service", "Return: 0x%p", srvhdl); 731 return (srvhdl); 732 } 733 734 /* Bind the RDS service on all ports */ 735 int 736 rds_bind_service(rds_state_t *statep) 737 { 738 rds_hca_t *hcap; 739 ib_gid_t gid; 740 uint_t jx, nbinds = 0, nports = 0; 741 int ret; 742 743 RDS_DPRINTF2("rds_bind_service", "Enter: 0x%p", statep); 744 745 hcap = statep->rds_hcalistp; 746 while (hcap != NULL) { 747 for (jx = 0; jx < hcap->hca_nports; jx++) { 748 nports++; 749 if (hcap->hca_pinfop[jx].p_linkstate != 750 IBT_PORT_ACTIVE) { 751 /* 752 * service bind will be called in the async 753 * handler when the port comes up 754 */ 755 continue; 756 } 757 758 gid = hcap->hca_pinfop[jx].p_sgid_tbl[0]; 759 RDS_DPRINTF5(LABEL, "HCA: 0x%llx Port: %d " 760 "gid: %llx:%llx", hcap->hca_guid, 761 hcap->hca_pinfop[jx].p_port_num, gid.gid_prefix, 762 gid.gid_guid); 763 764 /* pass statep as cm_private */ 765 ret = ibt_bind_service(statep->rds_srvhdl, gid, 766 NULL, statep, NULL); 767 if (ret != IBT_SUCCESS) { 768 RDS_DPRINTF2(LABEL, "Bind service for " 769 "HCA: 0x%llx Port: %d gid %llx:%llx " 770 "failed: %d", hcap->hca_guid, 771 hcap->hca_pinfop[jx].p_port_num, 772 gid.gid_prefix, gid.gid_guid, ret); 773 continue; 774 } 775 776 nbinds++; 777 778 /* bind the old service, ignore if it fails */ 779 ret = ibt_bind_service(statep->rds_old_srvhdl, gid, 780 NULL, statep, NULL); 781 if (ret != IBT_SUCCESS) { 782 RDS_DPRINTF2(LABEL, "Bind service for " 783 "HCA: 0x%llx Port: %d gid %llx:%llx " 784 "failed: %d", hcap->hca_guid, 785 hcap->hca_pinfop[jx].p_port_num, 786 gid.gid_prefix, gid.gid_guid, ret); 787 } 788 } 789 hcap = hcap->hca_nextp; 790 } 791 792 RDS_DPRINTF2(LABEL, "RDS Service available on %d/%d ports", 793 nbinds, nports); 794 795 #if 0 796 if (nbinds == 0) { 797 return (-1); 798 } 799 #endif 800 801 RDS_DPRINTF2("rds_bind_service", "Return"); 802 803 return (0); 804 } 805 806 /* Open an RC connection */ 807 int 808 rds_open_rc_channel(rds_ep_t *ep, ibt_path_info_t *pinfo, 809 ibt_execution_mode_t mode, ibt_channel_hdl_t *chanhdl) 810 { 811 rds_session_t *sp; 812 ibt_chan_open_args_t ocargs; 813 ibt_rc_returns_t ocrets; 814 rds_cm_private_data_t cmp; 815 uint8_t hca_port; 816 ibt_channel_hdl_t hdl; 817 ibt_status_t ret = 0; 818 ibt_ip_cm_info_t ipcm_info; 819 820 RDS_DPRINTF2("rds_open_rc_channel", "Enter: EP(%p) mode: %d", ep, mode); 821 822 sp = ep->ep_sp; 823 824 bzero(&ipcm_info, sizeof (ibt_ip_cm_info_t)); 825 ipcm_info.src_addr.family = AF_INET; 826 ipcm_info.src_addr.un.ip4addr = sp->session_myip; 827 ipcm_info.dst_addr.family = AF_INET; 828 ipcm_info.dst_addr.un.ip4addr = sp->session_remip; 829 ipcm_info.src_port = 6556; /* based on OFED RDS */ 830 ret = ibt_format_ip_private_data(&ipcm_info, 831 sizeof (rds_cm_private_data_t), &cmp); 832 if (ret != IBT_SUCCESS) { 833 RDS_DPRINTF2(LABEL, "SP(%p) EP(%p) ibt_format_ip_private_data " 834 "failed: %d", sp, ep, ret); 835 return (-1); 836 } 837 838 hca_port = pinfo->pi_prim_cep_path.cep_hca_port_num; 839 840 hdl = rds_ep_alloc_rc_channel(ep, hca_port); 841 if (hdl == NULL) { 842 return (-1); 843 } 844 845 cmp.cmp_version = RDS_VERSION; 846 cmp.cmp_arch = RDS_THIS_ARCH; 847 cmp.cmp_remip = sp->session_remip; 848 cmp.cmp_localip = sp->session_myip; 849 cmp.cmp_eptype = ep->ep_type; 850 cmp.cmp_failover = sp->session_failover; 851 cmp.cmp_last_bufid = ep->ep_rbufid; 852 cmp.cmp_user_buffer_size = UserBufferSize; 853 cmp.cmp_ack_addr = ep->ep_ack_addr; 854 cmp.cmp_ack_rkey = ep->ep_ack_rkey; 855 856 bzero(&ocargs, sizeof (ibt_chan_open_args_t)); 857 bzero(&ocrets, sizeof (ibt_rc_returns_t)); 858 ocargs.oc_path = pinfo; 859 ocargs.oc_cm_handler = rds_cm_handler; 860 ocargs.oc_cm_clnt_private = NULL; 861 ocargs.oc_rdma_ra_out = 4; 862 ocargs.oc_rdma_ra_in = 4; 863 ocargs.oc_priv_data_len = sizeof (rds_cm_private_data_t); 864 ocargs.oc_priv_data = &cmp; 865 ocargs.oc_path_retry_cnt = IBPathRetryCount; 866 ocargs.oc_path_rnr_retry_cnt = MinRnrRetry; 867 ret = ibt_open_rc_channel(hdl, IBT_OCHAN_NO_FLAGS, 868 mode, &ocargs, &ocrets); 869 if (ret != IBT_SUCCESS) { 870 RDS_DPRINTF2(LABEL, "SP(%p) EP(%p) ibt_open_rc_channel " 871 "failed: %d", sp, ep, ret); 872 (void) ibt_flush_channel(hdl); 873 (void) ibt_free_channel(hdl); 874 /* cleanup stuff allocated in rds_ep_alloc_rc_channel */ 875 (void) ibt_free_cq(ep->ep_recvcq); 876 ep->ep_recvcq = NULL; 877 (void) ibt_free_cq(ep->ep_sendcq); 878 ep->ep_sendcq = NULL; 879 return (-1); 880 } 881 882 *chanhdl = hdl; 883 884 RDS_DPRINTF2("rds_open_rc_channel", "Return: EP(%p) Chan: %p", ep, 885 *chanhdl); 886 887 return (0); 888 } 889 890 int 891 rds_close_rc_channel(ibt_channel_hdl_t chanhdl, ibt_execution_mode_t mode) 892 { 893 int ret; 894 895 RDS_DPRINTF2("rds_close_rc_channel", "Enter: Chan(%p) Mode(%d)", 896 chanhdl, mode); 897 898 ret = ibt_close_rc_channel(chanhdl, mode, NULL, 0, NULL, NULL, 0); 899 900 RDS_DPRINTF2("rds_close_rc_channel", "Return Chan(%p)", chanhdl); 901 902 return (ret); 903 } 904