1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* 26 * Copyright (c) 2005 SilverStorm Technologies, Inc. All rights reserved. 27 * 28 * This software is available to you under a choice of one of two 29 * licenses. You may choose to be licensed under the terms of the GNU 30 * General Public License (GPL) Version 2, available from the file 31 * COPYING in the main directory of this source tree, or the 32 * OpenIB.org BSD license below: 33 * 34 * Redistribution and use in source and binary forms, with or 35 * without modification, are permitted provided that the following 36 * conditions are met: 37 * 38 * - Redistributions of source code must retain the above 39 * copyright notice, this list of conditions and the following 40 * disclaimer. 41 * 42 * - Redistributions in binary form must reproduce the above 43 * copyright notice, this list of conditions and the following 44 * disclaimer in the documentation and/or other materials 45 * provided with the distribution. 46 * 47 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 48 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 49 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 50 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 51 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 52 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 53 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 54 * SOFTWARE. 55 * 56 */ 57 /* 58 * Sun elects to include this software in Sun product 59 * under the OpenIB BSD license. 60 * 61 * 62 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 63 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 64 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 65 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 66 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 67 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 68 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 69 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 70 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 71 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 72 * POSSIBILITY OF SUCH DAMAGE. 73 */ 74 75 #pragma ident "%Z%%M% %I% %E% SMI" 76 77 #include <sys/ib/clients/rds/rdsib_cm.h> 78 #include <sys/ib/clients/rds/rdsib_ib.h> 79 #include <sys/ib/clients/rds/rdsib_buf.h> 80 #include <sys/ib/clients/rds/rdsib_ep.h> 81 82 /* 83 * This file contains CM related work: 84 * 85 * Service registration/deregistration 86 * Path lookup 87 * CM connection callbacks 88 * CM active and passive connection establishment 89 * Connection failover 90 */ 91 92 #define SRCIP src_addr.un.ip4addr 93 #define DSTIP dst_addr.un.ip4addr 94 95 /* 96 * Handle an incoming CM REQ 97 */ 98 /* ARGSUSED */ 99 static ibt_cm_status_t 100 rds_handle_cm_req(rds_state_t *statep, ibt_cm_event_t *evp, 101 ibt_cm_return_args_t *rargsp, void *rcmp, ibt_priv_data_len_t rcmp_len) 102 { 103 ibt_cm_req_rcv_t *reqp; 104 ib_gid_t lgid, rgid; 105 rds_cm_private_data_t cmp; 106 rds_session_t *sp; 107 rds_ep_t *ep; 108 ibt_channel_hdl_t chanhdl; 109 ibt_ip_cm_info_t ipcm_info; 110 int ret; 111 112 RDS_DPRINTF2("rds_handle_cm_req", "Enter"); 113 114 reqp = &evp->cm_event.req; 115 rgid = reqp->req_prim_addr.av_dgid; /* requester gid */ 116 lgid = reqp->req_prim_addr.av_sgid; /* receiver gid */ 117 118 RDS_DPRINTF2(LABEL, "REQ Received: From: %llx:%llx To: %llx:%llx", 119 rgid.gid_prefix, rgid.gid_guid, lgid.gid_prefix, lgid.gid_guid); 120 121 /* validate service id */ 122 if (reqp->req_service_id == RDS_SERVICE_ID) { 123 RDS_DPRINTF0(LABEL, "Version Mismatch: Remote system " 124 "(GUID: 0x%llx) is running an older version of RDS", 125 rgid.gid_guid); 126 return (IBT_CM_REJECT); 127 } 128 129 /* 130 * CM private data brings IP information 131 * Private data received is a stream of bytes and may not be properly 132 * aligned. So, bcopy the data onto the stack before accessing it. 133 */ 134 bcopy((uint8_t *)evp->cm_priv_data, &cmp, 135 sizeof (rds_cm_private_data_t)); 136 137 /* extract the CM IP info */ 138 ret = ibt_get_ip_data(evp->cm_priv_data_len, evp->cm_priv_data, 139 &ipcm_info); 140 if (ret != IBT_SUCCESS) { 141 RDS_DPRINTF2("rds_handle_cm_req", "ibt_get_ip_data failed: %d", 142 ret); 143 return (IBT_CM_REJECT); 144 } 145 146 RDS_DPRINTF2("rds_handle_cm_req", 147 "REQ Received: From IP: 0x%x To IP: 0x%x type: %d", 148 ipcm_info.SRCIP, ipcm_info.DSTIP, cmp.cmp_eptype); 149 150 if (cmp.cmp_version != RDS_VERSION) { 151 RDS_DPRINTF0(LABEL, "Version Mismatch: Local version: %d " 152 "Remote version: %d", RDS_VERSION, cmp.cmp_version); 153 return (IBT_CM_REJECT); 154 } 155 156 /* RDS supports V4 addresses only */ 157 if ((ipcm_info.src_addr.family != AF_INET) || 158 (ipcm_info.dst_addr.family != AF_INET)) { 159 RDS_DPRINTF2(LABEL, "Unsupported Address Family: " 160 "src: %d dst: %d", ipcm_info.src_addr.family, 161 ipcm_info.dst_addr.family); 162 return (IBT_CM_REJECT); 163 } 164 165 if (cmp.cmp_arch != RDS_THIS_ARCH) { 166 RDS_DPRINTF2(LABEL, "ARCH does not match (%d != %d)", 167 cmp.cmp_arch, RDS_THIS_ARCH); 168 return (IBT_CM_REJECT); 169 } 170 171 if ((cmp.cmp_eptype != RDS_EP_TYPE_CTRL) && 172 (cmp.cmp_eptype != RDS_EP_TYPE_DATA)) { 173 RDS_DPRINTF2(LABEL, "Unknown Channel type: %d", cmp.cmp_eptype); 174 return (IBT_CM_REJECT); 175 } 176 177 /* user_buffer_size should be same on all nodes */ 178 if (cmp.cmp_user_buffer_size != UserBufferSize) { 179 RDS_DPRINTF2(LABEL, 180 "UserBufferSize Mismatch, this node: %d remote node: %d", 181 UserBufferSize, cmp.cmp_user_buffer_size); 182 return (IBT_CM_REJECT); 183 } 184 185 /* 186 * RDS needs more time to process a failover REQ so send an MRA. 187 * Otherwise, the remote may retry the REQ and fail the connection. 188 */ 189 if ((cmp.cmp_failover) && (cmp.cmp_eptype == RDS_EP_TYPE_DATA)) { 190 RDS_DPRINTF2("rds_handle_cm_req", "Session Failover, send MRA"); 191 (void) ibt_cm_delay(IBT_CM_DELAY_REQ, evp->cm_session_id, 192 10000000 /* 10 sec */, NULL, 0); 193 } 194 195 /* Is there a session to the destination node? */ 196 rw_enter(&statep->rds_sessionlock, RW_READER); 197 sp = rds_session_lkup(statep, ipcm_info.SRCIP, rgid.gid_guid); 198 rw_exit(&statep->rds_sessionlock); 199 200 if (sp == NULL) { 201 /* 202 * currently there is no session to the destination 203 * remote ip in the private data is the local ip and vice 204 * versa 205 */ 206 sp = rds_session_create(statep, ipcm_info.DSTIP, 207 ipcm_info.SRCIP, reqp, RDS_SESSION_PASSIVE); 208 if (sp == NULL) { 209 /* Check the list anyway. */ 210 rw_enter(&statep->rds_sessionlock, RW_READER); 211 sp = rds_session_lkup(statep, ipcm_info.SRCIP, 212 rgid.gid_guid); 213 rw_exit(&statep->rds_sessionlock); 214 if (sp == NULL) { 215 /* 216 * The only way this can fail is due to lack 217 * of kernel resources 218 */ 219 return (IBT_CM_REJECT); 220 } 221 } 222 } 223 224 rw_enter(&sp->session_lock, RW_WRITER); 225 226 /* catch peer-to-peer case as soon as possible */ 227 if ((sp->session_state == RDS_SESSION_STATE_CREATED) || 228 (sp->session_state == RDS_SESSION_STATE_INIT)) { 229 /* Check possible peer-to-peer case here */ 230 if (sp->session_type != RDS_SESSION_PASSIVE) { 231 RDS_DPRINTF2("rds_handle_cm_req", 232 "SP(%p) Peer-peer connection handling", sp); 233 if (lgid.gid_guid > rgid.gid_guid) { 234 /* this node is active so reject this request */ 235 rw_exit(&sp->session_lock); 236 return (IBT_CM_REJECT); 237 } else { 238 /* this node is passive, change the session */ 239 sp->session_type = RDS_SESSION_PASSIVE; 240 sp->session_lgid = lgid; 241 sp->session_rgid = rgid; 242 } 243 } 244 } 245 246 RDS_DPRINTF2(LABEL, "SP(%p) state: %d", sp, sp->session_state); 247 248 switch (sp->session_state) { 249 case RDS_SESSION_STATE_CONNECTED: 250 RDS_DPRINTF2(LABEL, "STALE Session Detected SP(%p)", sp); 251 sp->session_state = RDS_SESSION_STATE_ERROR; 252 RDS_DPRINTF3("rds_handle_cm_req", "SP(%p) State " 253 "RDS_SESSION_STATE_ERROR", sp); 254 255 /* FALLTHRU */ 256 case RDS_SESSION_STATE_ERROR: 257 case RDS_SESSION_STATE_PASSIVE_CLOSING: 258 sp->session_type = RDS_SESSION_PASSIVE; 259 rw_exit(&sp->session_lock); 260 261 rds_session_close(sp, IBT_NOCALLBACKS, 1); 262 263 /* move the session to init state */ 264 rw_enter(&sp->session_lock, RW_WRITER); 265 ret = rds_session_reinit(sp, lgid); 266 sp->session_myip = ipcm_info.DSTIP; 267 sp->session_lgid = lgid; 268 sp->session_rgid = rgid; 269 if (ret != 0) { 270 rds_session_fini(sp); 271 sp->session_state = RDS_SESSION_STATE_FAILED; 272 RDS_DPRINTF3("rds_handle_cm_req", "SP(%p) State " 273 "RDS_SESSION_STATE_FAILED", sp); 274 rw_exit(&sp->session_lock); 275 return (IBT_CM_REJECT); 276 } else { 277 sp->session_state = RDS_SESSION_STATE_INIT; 278 RDS_DPRINTF3("rds_handle_cm_req", "SP(%p) State " 279 "RDS_SESSION_STATE_INIT", sp); 280 } 281 282 if (cmp.cmp_eptype == RDS_EP_TYPE_CTRL) { 283 ep = &sp->session_ctrlep; 284 } else { 285 ep = &sp->session_dataep; 286 } 287 break; 288 case RDS_SESSION_STATE_CREATED: 289 case RDS_SESSION_STATE_FAILED: 290 case RDS_SESSION_STATE_FINI: 291 /* 292 * Initialize both channels, we accept this connection 293 * only if both channels are initialized 294 */ 295 sp->session_type = RDS_SESSION_PASSIVE; 296 sp->session_lgid = lgid; 297 sp->session_rgid = rgid; 298 sp->session_state = RDS_SESSION_STATE_CREATED; 299 RDS_DPRINTF3("rds_handle_cm_req", "SP(%p) State " 300 "RDS_SESSION_STATE_CREATED", sp); 301 ret = rds_session_init(sp); 302 if (ret != 0) { 303 /* Seems like there are not enough resources */ 304 sp->session_state = RDS_SESSION_STATE_FAILED; 305 RDS_DPRINTF3("rds_handle_cm_req", "SP(%p) State " 306 "RDS_SESSION_STATE_FAILED", sp); 307 rw_exit(&sp->session_lock); 308 return (IBT_CM_REJECT); 309 } 310 sp->session_state = RDS_SESSION_STATE_INIT; 311 RDS_DPRINTF3("rds_handle_cm_req", "SP(%p) State " 312 "RDS_SESSION_STATE_INIT", sp); 313 314 /* FALLTHRU */ 315 case RDS_SESSION_STATE_INIT: 316 /* 317 * When re-using an existing session, make sure the 318 * session is still through the same HCA. Otherwise, the 319 * memory registrations have to moved to the new HCA. 320 */ 321 if (cmp.cmp_eptype == RDS_EP_TYPE_DATA) { 322 if (sp->session_lgid.gid_guid != lgid.gid_guid) { 323 RDS_DPRINTF2("rds_handle_cm_req", 324 "Existing Session but different gid " 325 "existing: 0x%llx, new: 0x%llx, " 326 "sending an MRA", 327 sp->session_lgid.gid_guid, lgid.gid_guid); 328 (void) ibt_cm_delay(IBT_CM_DELAY_REQ, 329 evp->cm_session_id, 10000000 /* 10 sec */, 330 NULL, 0); 331 ret = rds_session_reinit(sp, lgid); 332 if (ret != 0) { 333 rds_session_fini(sp); 334 sp->session_state = 335 RDS_SESSION_STATE_FAILED; 336 sp->session_failover = 0; 337 RDS_DPRINTF3("rds_failover_session", 338 "SP(%p) State " 339 "RDS_SESSION_STATE_FAILED", sp); 340 rw_exit(&sp->session_lock); 341 return (IBT_CM_REJECT); 342 } 343 } 344 ep = &sp->session_dataep; 345 } else { 346 ep = &sp->session_ctrlep; 347 } 348 349 break; 350 default: 351 RDS_DPRINTF2(LABEL, "ERROR: SP(%p) is in an unexpected " 352 "state: %d", sp, sp->session_state); 353 rw_exit(&sp->session_lock); 354 return (IBT_CM_REJECT); 355 } 356 357 sp->session_failover = 0; /* reset any previous value */ 358 if (cmp.cmp_failover) { 359 RDS_DPRINTF2("rds_handle_cm_req", 360 "SP(%p) Failover Session (BP %p)", sp, cmp.cmp_last_bufid); 361 sp->session_failover = 1; 362 } 363 364 mutex_enter(&ep->ep_lock); 365 if (ep->ep_state == RDS_EP_STATE_UNCONNECTED) { 366 ep->ep_state = RDS_EP_STATE_PASSIVE_PENDING; 367 sp->session_type = RDS_SESSION_PASSIVE; 368 rw_exit(&sp->session_lock); 369 } else if (ep->ep_state == RDS_EP_STATE_ACTIVE_PENDING) { 370 rw_exit(&sp->session_lock); 371 /* 372 * Peer to peer connection. There is an active 373 * connection pending on this ep. The one with 374 * greater port guid becomes active and the 375 * other becomes passive. 376 */ 377 RDS_DPRINTF2("rds_handle_cm_req", 378 "EP(%p) Peer-peer connection handling", ep); 379 if (lgid.gid_guid > rgid.gid_guid) { 380 /* this node is active so reject this request */ 381 mutex_exit(&ep->ep_lock); 382 RDS_DPRINTF2(LABEL, "SP(%p) EP(%p): " 383 "Rejecting passive in favor of active", sp, ep); 384 return (IBT_CM_REJECT); 385 } else { 386 /* 387 * This session is not the active end, change it 388 * to passive end. 389 */ 390 ASSERT(sp->session_type == RDS_SESSION_ACTIVE); 391 ep->ep_state = RDS_EP_STATE_PASSIVE_PENDING; 392 393 rw_enter(&sp->session_lock, RW_WRITER); 394 sp->session_type = RDS_SESSION_PASSIVE; 395 sp->session_lgid = lgid; 396 sp->session_rgid = rgid; 397 rw_exit(&sp->session_lock); 398 } 399 } else { 400 rw_exit(&sp->session_lock); 401 } 402 403 ep->ep_lbufid = cmp.cmp_last_bufid; 404 ep->ep_ackwr.wr.rc.rcwr.rdma.rdma_raddr = (ib_vaddr_t)cmp.cmp_ack_addr; 405 ep->ep_ackwr.wr.rc.rcwr.rdma.rdma_rkey = cmp.cmp_ack_rkey; 406 cmp.cmp_last_bufid = ep->ep_rbufid; 407 cmp.cmp_ack_addr = ep->ep_ack_addr; 408 cmp.cmp_ack_rkey = ep->ep_ack_rkey; 409 mutex_exit(&ep->ep_lock); 410 411 /* continue with accepting the connection request for this channel */ 412 chanhdl = rds_ep_alloc_rc_channel(ep, reqp->req_prim_hca_port); 413 if (chanhdl == NULL) { 414 mutex_enter(&ep->ep_lock); 415 ep->ep_state = RDS_EP_STATE_UNCONNECTED; 416 mutex_exit(&ep->ep_lock); 417 return (IBT_CM_REJECT); 418 } 419 420 /* pre-post recv buffers in the RQ */ 421 rds_post_recv_buf((void *)chanhdl); 422 423 rargsp->cm_ret_len = sizeof (rds_cm_private_data_t); 424 bcopy((uint8_t *)&cmp, rcmp, sizeof (rds_cm_private_data_t)); 425 rargsp->cm_ret.rep.cm_channel = chanhdl; 426 rargsp->cm_ret.rep.cm_rdma_ra_out = 4; 427 rargsp->cm_ret.rep.cm_rdma_ra_in = 4; 428 rargsp->cm_ret.rep.cm_rnr_retry_cnt = MinRnrRetry; 429 430 RDS_DPRINTF2("rds_handle_cm_req", "Return: SP(%p) EP(%p) Chan (%p)", 431 sp, ep, chanhdl); 432 433 return (IBT_CM_ACCEPT); 434 } 435 436 /* 437 * Handle an incoming CM REP 438 * Pre-post recv buffers for the QP 439 */ 440 /* ARGSUSED */ 441 static ibt_cm_status_t 442 rds_handle_cm_rep(ibt_cm_event_t *evp, ibt_cm_return_args_t *rargsp, 443 void *rcmp, ibt_priv_data_len_t rcmp_len) 444 { 445 rds_ep_t *ep; 446 rds_cm_private_data_t cmp; 447 448 RDS_DPRINTF2("rds_handle_cm_rep", "Enter"); 449 450 /* pre-post recv buffers in the RQ */ 451 rds_post_recv_buf((void *)evp->cm_channel); 452 453 ep = (rds_ep_t *)ibt_get_chan_private(evp->cm_channel); 454 bcopy((uint8_t *)evp->cm_priv_data, &cmp, 455 sizeof (rds_cm_private_data_t)); 456 ep->ep_lbufid = cmp.cmp_last_bufid; 457 ep->ep_ackwr.wr.rc.rcwr.rdma.rdma_raddr = (ib_vaddr_t)cmp.cmp_ack_addr; 458 ep->ep_ackwr.wr.rc.rcwr.rdma.rdma_rkey = cmp.cmp_ack_rkey; 459 460 rargsp->cm_ret_len = 0; 461 462 RDS_DPRINTF2("rds_handle_cm_rep", "Return: lbufid: %p", ep->ep_lbufid); 463 464 return (IBT_CM_ACCEPT); 465 } 466 467 /* 468 * Handle CONN EST 469 */ 470 static ibt_cm_status_t 471 rds_handle_cm_conn_est(ibt_cm_event_t *evp) 472 { 473 rds_session_t *sp; 474 rds_ep_t *ep; 475 476 ep = (rds_ep_t *)ibt_get_chan_private(evp->cm_channel); 477 478 RDS_DPRINTF2("rds_handle_cm_conn_est", "EP(%p) State: %d", ep, 479 ep->ep_state); 480 481 mutex_enter(&ep->ep_lock); 482 ASSERT((ep->ep_state == RDS_EP_STATE_ACTIVE_PENDING) || 483 (ep->ep_state == RDS_EP_STATE_PASSIVE_PENDING)); 484 ep->ep_state = RDS_EP_STATE_CONNECTED; 485 ep->ep_chanhdl = evp->cm_channel; 486 sp = ep->ep_sp; 487 mutex_exit(&ep->ep_lock); 488 489 (void) rds_session_active(sp); 490 491 RDS_DPRINTF2("rds_handle_cm_conn_est", "Return"); 492 return (IBT_CM_ACCEPT); 493 } 494 495 /* 496 * Handle CONN CLOSED 497 */ 498 static ibt_cm_status_t 499 rds_handle_cm_conn_closed(ibt_cm_event_t *evp) 500 { 501 rds_ep_t *ep; 502 rds_session_t *sp; 503 504 /* Catch DREQs but ignore DREPs */ 505 if (evp->cm_event.closed != IBT_CM_CLOSED_DREQ_RCVD) { 506 RDS_DPRINTF2("rds_handle_cm_conn_closed", 507 "Ignoring Event: %d received", evp->cm_event.closed); 508 return (IBT_CM_ACCEPT); 509 } 510 511 ep = (rds_ep_t *)ibt_get_chan_private(evp->cm_channel); 512 sp = ep->ep_sp; 513 RDS_DPRINTF2("rds_handle_cm_conn_closed", "EP(%p) Enter", ep); 514 515 mutex_enter(&ep->ep_lock); 516 if (ep->ep_state != RDS_EP_STATE_CONNECTED) { 517 /* Ignore this DREQ */ 518 RDS_DPRINTF2("rds_handle_cm_conn_closed", 519 "EP(%p) not connected, state: %d", ep, ep->ep_state); 520 mutex_exit(&ep->ep_lock); 521 return (IBT_CM_ACCEPT); 522 } 523 ep->ep_state = RDS_EP_STATE_CLOSING; 524 mutex_exit(&ep->ep_lock); 525 526 rw_enter(&sp->session_lock, RW_WRITER); 527 RDS_DPRINTF2("rds_handle_cm_conn_closed", "SP(%p) - state: %d", sp, 528 sp->session_state); 529 530 switch (sp->session_state) { 531 case RDS_SESSION_STATE_CONNECTED: 532 sp->session_state = RDS_SESSION_STATE_PASSIVE_CLOSING; 533 RDS_DPRINTF3("rds_handle_cm_conn_closed", "SP(%p) State " 534 "RDS_SESSION_STATE_PASSIVE_CLOSING", sp); 535 break; 536 537 case RDS_SESSION_STATE_PASSIVE_CLOSING: 538 sp->session_state = RDS_SESSION_STATE_CLOSED; 539 RDS_DPRINTF3("rds_handle_cm_conn_closed", "SP(%p) State " 540 "RDS_SESSION_STATE_CLOSED", sp); 541 rds_passive_session_fini(sp); 542 sp->session_state = RDS_SESSION_STATE_FINI; 543 RDS_DPRINTF3("rds_handle_cm_conn_closed", 544 "SP(%p) State RDS_SESSION_STATE_FINI", sp); 545 break; 546 547 case RDS_SESSION_STATE_ACTIVE_CLOSING: 548 case RDS_SESSION_STATE_ERROR: 549 case RDS_SESSION_STATE_CLOSED: 550 break; 551 552 case RDS_SESSION_STATE_INIT: 553 sp->session_state = RDS_SESSION_STATE_ERROR; 554 RDS_DPRINTF3("rds_handle_cm_conn_closed", "SP(%p) State " 555 "RDS_SESSION_STATE_ERROR", sp); 556 rds_passive_session_fini(sp); 557 sp->session_state = RDS_SESSION_STATE_FAILED; 558 RDS_DPRINTF3("rds_handle_cm_conn_closed", 559 "SP(%p) State RDS_SESSION_STATE_FAILED", sp); 560 break; 561 562 default: 563 RDS_DPRINTF2("rds_handle_cm_conn_closed", 564 "SP(%p) - Unexpected state: %d", sp, sp->session_state); 565 rds_passive_session_fini(sp); 566 sp->session_state = RDS_SESSION_STATE_FAILED; 567 RDS_DPRINTF3("rds_handle_cm_conn_closed", "SP(%p) State " 568 "RDS_SESSION_STATE_FAILED", sp); 569 } 570 rw_exit(&sp->session_lock); 571 572 mutex_enter(&ep->ep_lock); 573 ep->ep_state = RDS_EP_STATE_CLOSED; 574 mutex_exit(&ep->ep_lock); 575 576 RDS_DPRINTF2("rds_handle_cm_conn_closed", "SP(%p) Return", sp); 577 return (IBT_CM_ACCEPT); 578 } 579 580 /* 581 * Handle EVENT FAILURE 582 */ 583 static ibt_cm_status_t 584 rds_handle_cm_event_failure(ibt_cm_event_t *evp) 585 { 586 rds_ep_t *ep; 587 rds_session_t *sp; 588 int ret; 589 590 RDS_DPRINTF2("rds_handle_cm_event_failure", "Enter: Chan hdl: 0x%p " 591 "Code: %d msg: %d reason: %d", evp->cm_channel, 592 evp->cm_event.failed.cf_code, evp->cm_event.failed.cf_msg, 593 evp->cm_event.failed.cf_reason); 594 595 if (evp->cm_event.failed.cf_reason == IBT_CM_INVALID_SID) { 596 RDS_DPRINTF0(LABEL, 597 "Received REJ with reason IBT_CM_INVALID_SID: " 598 "The remote system could be running an older RDS version"); 599 } 600 601 if (evp->cm_channel == NULL) { 602 return (IBT_CM_ACCEPT); 603 } 604 605 ep = (rds_ep_t *)ibt_get_chan_private(evp->cm_channel); 606 sp = ep->ep_sp; 607 608 mutex_enter(&ep->ep_lock); 609 ep->ep_state = RDS_EP_STATE_ERROR; 610 mutex_exit(&ep->ep_lock); 611 612 rw_enter(&sp->session_lock, RW_WRITER); 613 if (sp->session_type == RDS_SESSION_PASSIVE) { 614 RDS_DPRINTF2("rds_handle_cm_event_failure", 615 "SP(%p) - state: %d", sp, sp->session_state); 616 if ((sp->session_state == RDS_SESSION_STATE_INIT) || 617 (sp->session_state == RDS_SESSION_STATE_CONNECTED)) { 618 sp->session_state = RDS_SESSION_STATE_ERROR; 619 RDS_DPRINTF3("rds_handle_cm_event_failure", 620 "SP(%p) State RDS_SESSION_STATE_ERROR", sp); 621 622 /* 623 * Store the cm_channel for freeing later 624 * Active side frees it on ibt_open_rc_channel 625 * failure 626 */ 627 if (ep->ep_chanhdl == NULL) { 628 ep->ep_chanhdl = evp->cm_channel; 629 } 630 rw_exit(&sp->session_lock); 631 632 /* 633 * rds_passive_session_fini should not be called 634 * directly in the CM handler. It will cause a deadlock. 635 */ 636 ret = ddi_taskq_dispatch(rds_taskq, 637 rds_cleanup_passive_session, (void *)sp, 638 DDI_NOSLEEP); 639 if (ret != DDI_SUCCESS) { 640 RDS_DPRINTF1("rds_handle_cm_event_failure", 641 "SP(%p) TaskQ dispatch FAILED:%d", sp, ret); 642 } 643 return (IBT_CM_ACCEPT); 644 } 645 } 646 rw_exit(&sp->session_lock); 647 648 RDS_DPRINTF2("rds_handle_cm_event_failure", "SP(%p) Return", sp); 649 return (IBT_CM_ACCEPT); 650 } 651 652 /* 653 * CM Handler 654 * 655 * Called by IBCM 656 * The cm_private type differs for active and passive events. 657 */ 658 ibt_cm_status_t 659 rds_cm_handler(void *cm_private, ibt_cm_event_t *eventp, 660 ibt_cm_return_args_t *ret_args, void *ret_priv_data, 661 ibt_priv_data_len_t ret_len_max) 662 { 663 ibt_cm_status_t ret = IBT_CM_ACCEPT; 664 665 RDS_DPRINTF2("rds_cm_handler", "Enter: event: %d", eventp->cm_type); 666 667 switch (eventp->cm_type) { 668 case IBT_CM_EVENT_REQ_RCV: 669 ret = rds_handle_cm_req((rds_state_t *)cm_private, eventp, 670 ret_args, ret_priv_data, ret_len_max); 671 break; 672 case IBT_CM_EVENT_REP_RCV: 673 ret = rds_handle_cm_rep(eventp, ret_args, ret_priv_data, 674 ret_len_max); 675 break; 676 case IBT_CM_EVENT_MRA_RCV: 677 /* Not supported */ 678 break; 679 case IBT_CM_EVENT_CONN_EST: 680 ret = rds_handle_cm_conn_est(eventp); 681 break; 682 case IBT_CM_EVENT_CONN_CLOSED: 683 ret = rds_handle_cm_conn_closed(eventp); 684 break; 685 case IBT_CM_EVENT_FAILURE: 686 ret = rds_handle_cm_event_failure(eventp); 687 break; 688 case IBT_CM_EVENT_LAP_RCV: 689 /* Not supported */ 690 RDS_DPRINTF2(LABEL, "LAP message received"); 691 break; 692 case IBT_CM_EVENT_APR_RCV: 693 /* Not supported */ 694 RDS_DPRINTF2(LABEL, "APR message received"); 695 break; 696 default: 697 break; 698 } 699 700 RDS_DPRINTF2("rds_cm_handler", "Return"); 701 702 return (ret); 703 } 704 705 /* This is based on OFED Linux RDS */ 706 #define RDS_PORT_NUM 6556 707 708 /* 709 * Register the wellknown service with service id: RDS_SERVICE_ID 710 * Incoming connection requests should arrive on this service id. 711 */ 712 ibt_srv_hdl_t 713 rds_register_service(ibt_clnt_hdl_t rds_ibhdl) 714 { 715 ibt_srv_hdl_t srvhdl; 716 ibt_srv_desc_t srvdesc; 717 int ret; 718 719 RDS_DPRINTF2("rds_register_service", "Enter: 0x%p", rds_ibhdl); 720 721 bzero(&srvdesc, sizeof (ibt_srv_desc_t)); 722 srvdesc.sd_handler = rds_cm_handler; 723 srvdesc.sd_flags = IBT_SRV_NO_FLAGS; 724 725 /* 726 * Register the old service id for backward compatibility 727 * REQs received on this service id would be rejected 728 */ 729 ret = ibt_register_service(rds_ibhdl, &srvdesc, RDS_SERVICE_ID, 730 1, &rdsib_statep->rds_old_srvhdl, NULL); 731 if (ret != IBT_SUCCESS) { 732 RDS_DPRINTF2(LABEL, 733 "RDS Service (0x%llx) Registration Failed: %d", 734 RDS_SERVICE_ID, ret); 735 return (NULL); 736 } 737 738 /* 739 * This is the new service id as per: 740 * Annex A11: RDMA IP CM Service 741 */ 742 rdsib_statep->rds_service_id = ibt_get_ip_sid(IPPROTO_TCP, 743 RDS_PORT_NUM); 744 ret = ibt_register_service(rds_ibhdl, &srvdesc, 745 rdsib_statep->rds_service_id, 1, &srvhdl, NULL); 746 if (ret != IBT_SUCCESS) { 747 RDS_DPRINTF2(LABEL, 748 "RDS Service (0x%llx) Registration Failed: %d", 749 rdsib_statep->rds_service_id, ret); 750 return (NULL); 751 } 752 753 RDS_DPRINTF2("rds_register_service", "Return: 0x%p", srvhdl); 754 return (srvhdl); 755 } 756 757 /* Bind the RDS service on all ports */ 758 int 759 rds_bind_service(rds_state_t *statep) 760 { 761 rds_hca_t *hcap; 762 ib_gid_t gid; 763 uint_t jx, nbinds = 0, nports = 0; 764 int ret; 765 766 RDS_DPRINTF2("rds_bind_service", "Enter: 0x%p", statep); 767 768 hcap = statep->rds_hcalistp; 769 while (hcap != NULL) { 770 for (jx = 0; jx < hcap->hca_nports; jx++) { 771 nports++; 772 if (hcap->hca_pinfop[jx].p_linkstate != 773 IBT_PORT_ACTIVE) { 774 /* 775 * service bind will be called in the async 776 * handler when the port comes up 777 */ 778 continue; 779 } 780 781 gid = hcap->hca_pinfop[jx].p_sgid_tbl[0]; 782 RDS_DPRINTF5(LABEL, "HCA: 0x%llx Port: %d " 783 "gid: %llx:%llx", hcap->hca_guid, 784 hcap->hca_pinfop[jx].p_port_num, gid.gid_prefix, 785 gid.gid_guid); 786 787 /* pass statep as cm_private */ 788 ret = ibt_bind_service(statep->rds_srvhdl, gid, 789 NULL, statep, NULL); 790 if (ret != IBT_SUCCESS) { 791 RDS_DPRINTF2(LABEL, "Bind service for " 792 "HCA: 0x%llx Port: %d gid %llx:%llx " 793 "failed: %d", hcap->hca_guid, 794 hcap->hca_pinfop[jx].p_port_num, 795 gid.gid_prefix, gid.gid_guid, ret); 796 continue; 797 } 798 799 nbinds++; 800 801 /* bind the old service, ignore if it fails */ 802 ret = ibt_bind_service(statep->rds_old_srvhdl, gid, 803 NULL, statep, NULL); 804 if (ret != IBT_SUCCESS) { 805 RDS_DPRINTF2(LABEL, "Bind service for " 806 "HCA: 0x%llx Port: %d gid %llx:%llx " 807 "failed: %d", hcap->hca_guid, 808 hcap->hca_pinfop[jx].p_port_num, 809 gid.gid_prefix, gid.gid_guid, ret); 810 } 811 } 812 hcap = hcap->hca_nextp; 813 } 814 815 RDS_DPRINTF2(LABEL, "RDS Service available on %d/%d ports", 816 nbinds, nports); 817 818 #if 0 819 if (nbinds == 0) { 820 return (-1); 821 } 822 #endif 823 824 RDS_DPRINTF2("rds_bind_service", "Return"); 825 826 return (0); 827 } 828 829 /* Open an RC connection */ 830 int 831 rds_open_rc_channel(rds_ep_t *ep, ibt_path_info_t *pinfo, 832 ibt_execution_mode_t mode, ibt_channel_hdl_t *chanhdl) 833 { 834 rds_session_t *sp; 835 ibt_chan_open_args_t ocargs; 836 ibt_rc_returns_t ocrets; 837 rds_cm_private_data_t cmp; 838 uint8_t hca_port; 839 ibt_channel_hdl_t hdl; 840 ibt_status_t ret = 0; 841 ibt_ip_cm_info_t ipcm_info; 842 843 RDS_DPRINTF2("rds_open_rc_channel", "Enter: EP(%p) mode: %d", ep, mode); 844 845 sp = ep->ep_sp; 846 847 bzero(&ipcm_info, sizeof (ibt_ip_cm_info_t)); 848 ipcm_info.src_addr.family = AF_INET; 849 ipcm_info.SRCIP = sp->session_myip; 850 ipcm_info.dst_addr.family = AF_INET; 851 ipcm_info.DSTIP = sp->session_remip; 852 ipcm_info.src_port = 6556; /* based on OFED RDS */ 853 ret = ibt_format_ip_private_data(&ipcm_info, 854 sizeof (rds_cm_private_data_t), &cmp); 855 if (ret != IBT_SUCCESS) { 856 RDS_DPRINTF2(LABEL, "SP(%p) EP(%p) ibt_format_ip_private_data " 857 "failed: %d", sp, ep, ret); 858 return (-1); 859 } 860 861 hca_port = pinfo->pi_prim_cep_path.cep_hca_port_num; 862 863 hdl = rds_ep_alloc_rc_channel(ep, hca_port); 864 if (hdl == NULL) { 865 return (-1); 866 } 867 868 cmp.cmp_version = RDS_VERSION; 869 cmp.cmp_arch = RDS_THIS_ARCH; 870 cmp.cmp_eptype = ep->ep_type; 871 cmp.cmp_failover = sp->session_failover; 872 cmp.cmp_last_bufid = ep->ep_rbufid; 873 cmp.cmp_user_buffer_size = UserBufferSize; 874 cmp.cmp_ack_addr = ep->ep_ack_addr; 875 cmp.cmp_ack_rkey = ep->ep_ack_rkey; 876 877 bzero(&ocargs, sizeof (ibt_chan_open_args_t)); 878 bzero(&ocrets, sizeof (ibt_rc_returns_t)); 879 ocargs.oc_path = pinfo; 880 ocargs.oc_cm_handler = rds_cm_handler; 881 ocargs.oc_cm_clnt_private = NULL; 882 ocargs.oc_rdma_ra_out = 4; 883 ocargs.oc_rdma_ra_in = 4; 884 ocargs.oc_priv_data_len = sizeof (rds_cm_private_data_t); 885 ocargs.oc_priv_data = &cmp; 886 ocargs.oc_path_retry_cnt = IBPathRetryCount; 887 ocargs.oc_path_rnr_retry_cnt = MinRnrRetry; 888 ret = ibt_open_rc_channel(hdl, IBT_OCHAN_NO_FLAGS, 889 mode, &ocargs, &ocrets); 890 if (ret != IBT_SUCCESS) { 891 RDS_DPRINTF2(LABEL, "SP(%p) EP(%p) ibt_open_rc_channel " 892 "failed: %d", sp, ep, ret); 893 (void) ibt_flush_channel(hdl); 894 (void) ibt_free_channel(hdl); 895 896 /* cleanup stuff allocated in rds_ep_alloc_rc_channel */ 897 mutex_enter(&ep->ep_lock); 898 rds_ep_free_rc_channel(ep); 899 mutex_exit(&ep->ep_lock); 900 901 return (-1); 902 } 903 904 *chanhdl = hdl; 905 906 RDS_DPRINTF2("rds_open_rc_channel", "Return: EP(%p) Chan: %p", ep, 907 *chanhdl); 908 909 return (0); 910 } 911 912 int 913 rds_close_rc_channel(ibt_channel_hdl_t chanhdl, ibt_execution_mode_t mode) 914 { 915 int ret; 916 917 RDS_DPRINTF2("rds_close_rc_channel", "Enter: Chan(%p) Mode(%d)", 918 chanhdl, mode); 919 920 ret = ibt_close_rc_channel(chanhdl, mode, NULL, 0, NULL, NULL, 0); 921 922 RDS_DPRINTF2("rds_close_rc_channel", "Return Chan(%p)", chanhdl); 923 924 return (ret); 925 } 926