1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* 26 * Copyright (c) 2005 SilverStorm Technologies, Inc. All rights reserved. 27 * 28 * This software is available to you under a choice of one of two 29 * licenses. You may choose to be licensed under the terms of the GNU 30 * General Public License (GPL) Version 2, available from the file 31 * COPYING in the main directory of this source tree, or the 32 * OpenIB.org BSD license below: 33 * 34 * Redistribution and use in source and binary forms, with or 35 * without modification, are permitted provided that the following 36 * conditions are met: 37 * 38 * - Redistributions of source code must retain the above 39 * copyright notice, this list of conditions and the following 40 * disclaimer. 41 * 42 * - Redistributions in binary form must reproduce the above 43 * copyright notice, this list of conditions and the following 44 * disclaimer in the documentation and/or other materials 45 * provided with the distribution. 46 * 47 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 48 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 49 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 50 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 51 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 52 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 53 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 54 * SOFTWARE. 55 * 56 */ 57 /* 58 * Sun elects to include this software in Sun product 59 * under the OpenIB BSD license. 60 * 61 * 62 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 63 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 64 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 65 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 66 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 67 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 68 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 69 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 70 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 71 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 72 * POSSIBILITY OF SUCH DAMAGE. 73 */ 74 75 #pragma ident "%Z%%M% %I% %E% SMI" 76 77 #include <sys/ib/clients/rds/rdsib_cm.h> 78 #include <sys/ib/clients/rds/rdsib_ib.h> 79 #include <sys/ib/clients/rds/rdsib_buf.h> 80 #include <sys/ib/clients/rds/rdsib_ep.h> 81 82 /* 83 * This file contains CM related work: 84 * 85 * Service registration/deregistration 86 * Path lookup 87 * CM connection callbacks 88 * CM active and passive connection establishment 89 * Connection failover 90 */ 91 92 #define SRCIP src_addr.un.ip4addr 93 #define DSTIP dst_addr.un.ip4addr 94 95 /* 96 * Handle an incoming CM REQ 97 */ 98 /* ARGSUSED */ 99 static ibt_cm_status_t 100 rds_handle_cm_req(rds_state_t *statep, ibt_cm_event_t *evp, 101 ibt_cm_return_args_t *rargsp, void *rcmp, ibt_priv_data_len_t rcmp_len) 102 { 103 ibt_cm_req_rcv_t *reqp; 104 ib_gid_t lgid, rgid; 105 rds_cm_private_data_t cmp; 106 rds_session_t *sp; 107 rds_ep_t *ep; 108 ibt_channel_hdl_t chanhdl; 109 ibt_ip_cm_info_t ipcm_info; 110 int ret; 111 112 RDS_DPRINTF2("rds_handle_cm_req", "Enter"); 113 114 reqp = &evp->cm_event.req; 115 rgid = reqp->req_prim_addr.av_dgid; /* requester gid */ 116 lgid = reqp->req_prim_addr.av_sgid; /* receiver gid */ 117 118 RDS_DPRINTF2(LABEL, "REQ Received: From: %llx:%llx To: %llx:%llx", 119 rgid.gid_prefix, rgid.gid_guid, lgid.gid_prefix, lgid.gid_guid); 120 121 /* validate service id */ 122 if (reqp->req_service_id == RDS_SERVICE_ID) { 123 RDS_DPRINTF0(LABEL, "Version Mismatch: Remote system " 124 "(GUID: 0x%llx) is running an older version of RDS", 125 rgid.gid_guid); 126 return (IBT_CM_REJECT); 127 } 128 129 /* 130 * CM private data brings IP information 131 * Private data received is a stream of bytes and may not be properly 132 * aligned. So, bcopy the data onto the stack before accessing it. 133 */ 134 bcopy((uint8_t *)evp->cm_priv_data, &cmp, 135 sizeof (rds_cm_private_data_t)); 136 137 /* extract the CM IP info */ 138 ret = ibt_get_ip_data(evp->cm_priv_data_len, evp->cm_priv_data, 139 &ipcm_info); 140 if (ret != IBT_SUCCESS) { 141 RDS_DPRINTF2("rds_handle_cm_req", "ibt_get_ip_data failed: %d", 142 ret); 143 return (IBT_CM_REJECT); 144 } 145 146 RDS_DPRINTF2("rds_handle_cm_req", 147 "REQ Received: From IP: 0x%x To IP: 0x%x type: %d", 148 ipcm_info.SRCIP, ipcm_info.DSTIP, cmp.cmp_eptype); 149 150 if (cmp.cmp_version != RDS_VERSION) { 151 RDS_DPRINTF0(LABEL, "Version Mismatch: Local version: %d " 152 "Remote version: %d", RDS_VERSION, cmp.cmp_version); 153 return (IBT_CM_REJECT); 154 } 155 156 /* RDS supports V4 addresses only */ 157 if ((ipcm_info.src_addr.family != AF_INET) || 158 (ipcm_info.dst_addr.family != AF_INET)) { 159 RDS_DPRINTF2(LABEL, "Unsupported Address Family: " 160 "src: %d dst: %d", ipcm_info.src_addr.family, 161 ipcm_info.dst_addr.family); 162 return (IBT_CM_REJECT); 163 } 164 165 if (cmp.cmp_arch != RDS_THIS_ARCH) { 166 RDS_DPRINTF2(LABEL, "ARCH does not match (%d != %d)", 167 cmp.cmp_arch, RDS_THIS_ARCH); 168 return (IBT_CM_REJECT); 169 } 170 171 if ((cmp.cmp_eptype != RDS_EP_TYPE_CTRL) && 172 (cmp.cmp_eptype != RDS_EP_TYPE_DATA)) { 173 RDS_DPRINTF2(LABEL, "Unknown Channel type: %d", cmp.cmp_eptype); 174 return (IBT_CM_REJECT); 175 } 176 177 /* user_buffer_size should be same on all nodes */ 178 if (cmp.cmp_user_buffer_size != UserBufferSize) { 179 RDS_DPRINTF2(LABEL, 180 "UserBufferSize Mismatch, this node: %d remote node: %d", 181 UserBufferSize, cmp.cmp_user_buffer_size); 182 return (IBT_CM_REJECT); 183 } 184 185 /* 186 * RDS needs more time to process a failover REQ so send an MRA. 187 * Otherwise, the remote may retry the REQ and fail the connection. 188 */ 189 if ((cmp.cmp_failover) && (cmp.cmp_eptype == RDS_EP_TYPE_DATA)) { 190 RDS_DPRINTF2("rds_handle_cm_req", "Session Failover, send MRA"); 191 (void) ibt_cm_delay(IBT_CM_DELAY_REQ, evp->cm_session_id, 192 10000000 /* 10 sec */, NULL, 0); 193 } 194 195 /* Is there a session to the destination node? */ 196 rw_enter(&statep->rds_sessionlock, RW_READER); 197 sp = rds_session_lkup(statep, ipcm_info.SRCIP, rgid.gid_guid); 198 rw_exit(&statep->rds_sessionlock); 199 200 if (sp == NULL) { 201 /* 202 * currently there is no session to the destination 203 * remote ip in the private data is the local ip and vice 204 * versa 205 */ 206 sp = rds_session_create(statep, ipcm_info.DSTIP, 207 ipcm_info.SRCIP, reqp, RDS_SESSION_PASSIVE); 208 if (sp == NULL) { 209 /* Check the list anyway. */ 210 rw_enter(&statep->rds_sessionlock, RW_READER); 211 sp = rds_session_lkup(statep, ipcm_info.SRCIP, 212 rgid.gid_guid); 213 rw_exit(&statep->rds_sessionlock); 214 if (sp == NULL) { 215 /* 216 * The only way this can fail is due to lack 217 * of kernel resources 218 */ 219 return (IBT_CM_REJECT); 220 } 221 } 222 } 223 224 rw_enter(&sp->session_lock, RW_WRITER); 225 226 /* catch peer-to-peer case as soon as possible */ 227 if ((sp->session_state == RDS_SESSION_STATE_CREATED) || 228 (sp->session_state == RDS_SESSION_STATE_INIT)) { 229 /* Check possible peer-to-peer case here */ 230 if (sp->session_type != RDS_SESSION_PASSIVE) { 231 RDS_DPRINTF2("rds_handle_cm_req", 232 "SP(%p) Peer-peer connection handling", sp); 233 if (lgid.gid_guid > rgid.gid_guid) { 234 /* this node is active so reject this request */ 235 rw_exit(&sp->session_lock); 236 return (IBT_CM_REJECT); 237 } else { 238 /* this node is passive, change the session */ 239 sp->session_type = RDS_SESSION_PASSIVE; 240 sp->session_lgid = lgid; 241 sp->session_rgid = rgid; 242 } 243 } 244 } 245 246 RDS_DPRINTF2(LABEL, "SP(%p) state: %d", sp, sp->session_state); 247 248 switch (sp->session_state) { 249 case RDS_SESSION_STATE_CONNECTED: 250 RDS_DPRINTF2(LABEL, "STALE Session Detected SP(%p)", sp); 251 sp->session_state = RDS_SESSION_STATE_ERROR; 252 RDS_DPRINTF3("rds_handle_cm_req", "SP(%p) State " 253 "RDS_SESSION_STATE_ERROR", sp); 254 255 /* FALLTHRU */ 256 case RDS_SESSION_STATE_ERROR: 257 case RDS_SESSION_STATE_PASSIVE_CLOSING: 258 sp->session_type = RDS_SESSION_PASSIVE; 259 rw_exit(&sp->session_lock); 260 261 rds_session_close(sp, IBT_NOCALLBACKS, 1); 262 263 /* move the session to init state */ 264 rw_enter(&sp->session_lock, RW_WRITER); 265 ret = rds_session_reinit(sp, lgid); 266 sp->session_myip = ipcm_info.DSTIP; 267 sp->session_lgid = lgid; 268 sp->session_rgid = rgid; 269 if (ret != 0) { 270 rds_session_fini(sp); 271 sp->session_state = RDS_SESSION_STATE_FAILED; 272 RDS_DPRINTF3("rds_handle_cm_req", "SP(%p) State " 273 "RDS_SESSION_STATE_FAILED", sp); 274 rw_exit(&sp->session_lock); 275 return (IBT_CM_REJECT); 276 } else { 277 sp->session_state = RDS_SESSION_STATE_INIT; 278 RDS_DPRINTF3("rds_handle_cm_req", "SP(%p) State " 279 "RDS_SESSION_STATE_INIT", sp); 280 } 281 282 if (cmp.cmp_eptype == RDS_EP_TYPE_CTRL) { 283 ep = &sp->session_ctrlep; 284 } else { 285 ep = &sp->session_dataep; 286 } 287 break; 288 case RDS_SESSION_STATE_CREATED: 289 case RDS_SESSION_STATE_FAILED: 290 case RDS_SESSION_STATE_FINI: 291 /* 292 * Initialize both channels, we accept this connection 293 * only if both channels are initialized 294 */ 295 sp->session_type = RDS_SESSION_PASSIVE; 296 sp->session_lgid = lgid; 297 sp->session_rgid = rgid; 298 sp->session_state = RDS_SESSION_STATE_CREATED; 299 RDS_DPRINTF3("rds_handle_cm_req", "SP(%p) State " 300 "RDS_SESSION_STATE_CREATED", sp); 301 ret = rds_session_init(sp); 302 if (ret != 0) { 303 /* Seems like there are not enough resources */ 304 sp->session_state = RDS_SESSION_STATE_FAILED; 305 RDS_DPRINTF3("rds_handle_cm_req", "SP(%p) State " 306 "RDS_SESSION_STATE_FAILED", sp); 307 rw_exit(&sp->session_lock); 308 return (IBT_CM_REJECT); 309 } 310 sp->session_state = RDS_SESSION_STATE_INIT; 311 RDS_DPRINTF3("rds_handle_cm_req", "SP(%p) State " 312 "RDS_SESSION_STATE_INIT", sp); 313 314 /* FALLTHRU */ 315 case RDS_SESSION_STATE_INIT: 316 /* 317 * When re-using an existing session, make sure the 318 * session is still through the same HCA. Otherwise, the 319 * memory registrations have to moved to the new HCA. 320 */ 321 if (cmp.cmp_eptype == RDS_EP_TYPE_DATA) { 322 if (sp->session_lgid.gid_guid != lgid.gid_guid) { 323 RDS_DPRINTF2("rds_handle_cm_req", 324 "Existing Session but different gid " 325 "existing: 0x%llx, new: 0x%llx, " 326 "sending an MRA", 327 sp->session_lgid.gid_guid, lgid.gid_guid); 328 (void) ibt_cm_delay(IBT_CM_DELAY_REQ, 329 evp->cm_session_id, 10000000 /* 10 sec */, 330 NULL, 0); 331 ret = rds_session_reinit(sp, lgid); 332 if (ret != 0) { 333 rds_session_fini(sp); 334 sp->session_state = 335 RDS_SESSION_STATE_FAILED; 336 sp->session_failover = 0; 337 RDS_DPRINTF3("rds_failover_session", 338 "SP(%p) State " 339 "RDS_SESSION_STATE_FAILED", sp); 340 rw_exit(&sp->session_lock); 341 return (IBT_CM_REJECT); 342 } 343 } 344 ep = &sp->session_dataep; 345 } else { 346 ep = &sp->session_ctrlep; 347 } 348 349 break; 350 default: 351 RDS_DPRINTF2(LABEL, "ERROR: SP(%p) is in an unexpected " 352 "state: %d", sp, sp->session_state); 353 rw_exit(&sp->session_lock); 354 return (IBT_CM_REJECT); 355 } 356 357 sp->session_failover = 0; /* reset any previous value */ 358 if (cmp.cmp_failover) { 359 RDS_DPRINTF2("rds_handle_cm_req", 360 "SP(%p) Failover Session (BP %p)", sp, cmp.cmp_last_bufid); 361 sp->session_failover = 1; 362 } 363 364 mutex_enter(&ep->ep_lock); 365 if (ep->ep_state == RDS_EP_STATE_UNCONNECTED) { 366 ep->ep_state = RDS_EP_STATE_PASSIVE_PENDING; 367 sp->session_type = RDS_SESSION_PASSIVE; 368 rw_exit(&sp->session_lock); 369 } else if (ep->ep_state == RDS_EP_STATE_ACTIVE_PENDING) { 370 rw_exit(&sp->session_lock); 371 /* 372 * Peer to peer connection. There is an active 373 * connection pending on this ep. The one with 374 * greater port guid becomes active and the 375 * other becomes passive. 376 */ 377 RDS_DPRINTF2("rds_handle_cm_req", 378 "EP(%p) Peer-peer connection handling", ep); 379 if (lgid.gid_guid > rgid.gid_guid) { 380 /* this node is active so reject this request */ 381 mutex_exit(&ep->ep_lock); 382 RDS_DPRINTF2(LABEL, "SP(%p) EP(%p): " 383 "Rejecting passive in favor of active", sp, ep); 384 return (IBT_CM_REJECT); 385 } else { 386 /* 387 * This session is not the active end, change it 388 * to passive end. 389 */ 390 ep->ep_state = RDS_EP_STATE_PASSIVE_PENDING; 391 392 rw_enter(&sp->session_lock, RW_WRITER); 393 sp->session_type = RDS_SESSION_PASSIVE; 394 sp->session_lgid = lgid; 395 sp->session_rgid = rgid; 396 rw_exit(&sp->session_lock); 397 } 398 } else { 399 rw_exit(&sp->session_lock); 400 } 401 402 ep->ep_lbufid = cmp.cmp_last_bufid; 403 ep->ep_ackwr.wr.rc.rcwr.rdma.rdma_raddr = (ib_vaddr_t)cmp.cmp_ack_addr; 404 ep->ep_ackwr.wr.rc.rcwr.rdma.rdma_rkey = cmp.cmp_ack_rkey; 405 cmp.cmp_last_bufid = ep->ep_rbufid; 406 cmp.cmp_ack_addr = ep->ep_ack_addr; 407 cmp.cmp_ack_rkey = ep->ep_ack_rkey; 408 mutex_exit(&ep->ep_lock); 409 410 /* continue with accepting the connection request for this channel */ 411 chanhdl = rds_ep_alloc_rc_channel(ep, reqp->req_prim_hca_port); 412 if (chanhdl == NULL) { 413 mutex_enter(&ep->ep_lock); 414 ep->ep_state = RDS_EP_STATE_UNCONNECTED; 415 mutex_exit(&ep->ep_lock); 416 return (IBT_CM_REJECT); 417 } 418 419 /* pre-post recv buffers in the RQ */ 420 rds_post_recv_buf((void *)chanhdl); 421 422 rargsp->cm_ret_len = sizeof (rds_cm_private_data_t); 423 bcopy((uint8_t *)&cmp, rcmp, sizeof (rds_cm_private_data_t)); 424 rargsp->cm_ret.rep.cm_channel = chanhdl; 425 rargsp->cm_ret.rep.cm_rdma_ra_out = 4; 426 rargsp->cm_ret.rep.cm_rdma_ra_in = 4; 427 rargsp->cm_ret.rep.cm_rnr_retry_cnt = MinRnrRetry; 428 429 RDS_DPRINTF2("rds_handle_cm_req", "Return: SP(%p) EP(%p) Chan (%p)", 430 sp, ep, chanhdl); 431 432 return (IBT_CM_ACCEPT); 433 } 434 435 /* 436 * Handle an incoming CM REP 437 * Pre-post recv buffers for the QP 438 */ 439 /* ARGSUSED */ 440 static ibt_cm_status_t 441 rds_handle_cm_rep(ibt_cm_event_t *evp, ibt_cm_return_args_t *rargsp, 442 void *rcmp, ibt_priv_data_len_t rcmp_len) 443 { 444 rds_ep_t *ep; 445 rds_cm_private_data_t cmp; 446 447 RDS_DPRINTF2("rds_handle_cm_rep", "Enter"); 448 449 /* pre-post recv buffers in the RQ */ 450 rds_post_recv_buf((void *)evp->cm_channel); 451 452 ep = (rds_ep_t *)ibt_get_chan_private(evp->cm_channel); 453 bcopy((uint8_t *)evp->cm_priv_data, &cmp, 454 sizeof (rds_cm_private_data_t)); 455 ep->ep_lbufid = cmp.cmp_last_bufid; 456 ep->ep_ackwr.wr.rc.rcwr.rdma.rdma_raddr = (ib_vaddr_t)cmp.cmp_ack_addr; 457 ep->ep_ackwr.wr.rc.rcwr.rdma.rdma_rkey = cmp.cmp_ack_rkey; 458 459 rargsp->cm_ret_len = 0; 460 461 RDS_DPRINTF2("rds_handle_cm_rep", "Return: lbufid: %p", ep->ep_lbufid); 462 463 return (IBT_CM_ACCEPT); 464 } 465 466 /* 467 * Handle CONN EST 468 */ 469 static ibt_cm_status_t 470 rds_handle_cm_conn_est(ibt_cm_event_t *evp) 471 { 472 rds_session_t *sp; 473 rds_ep_t *ep; 474 475 ep = (rds_ep_t *)ibt_get_chan_private(evp->cm_channel); 476 477 RDS_DPRINTF2("rds_handle_cm_conn_est", "EP(%p) State: %d", ep, 478 ep->ep_state); 479 480 mutex_enter(&ep->ep_lock); 481 ASSERT((ep->ep_state == RDS_EP_STATE_ACTIVE_PENDING) || 482 (ep->ep_state == RDS_EP_STATE_PASSIVE_PENDING)); 483 ep->ep_state = RDS_EP_STATE_CONNECTED; 484 ep->ep_chanhdl = evp->cm_channel; 485 sp = ep->ep_sp; 486 mutex_exit(&ep->ep_lock); 487 488 (void) rds_session_active(sp); 489 490 RDS_DPRINTF2("rds_handle_cm_conn_est", "Return"); 491 return (IBT_CM_ACCEPT); 492 } 493 494 /* 495 * Handle CONN CLOSED 496 */ 497 static ibt_cm_status_t 498 rds_handle_cm_conn_closed(ibt_cm_event_t *evp) 499 { 500 rds_ep_t *ep; 501 rds_session_t *sp; 502 503 /* Catch DREQs but ignore DREPs */ 504 if (evp->cm_event.closed != IBT_CM_CLOSED_DREQ_RCVD) { 505 RDS_DPRINTF2("rds_handle_cm_conn_closed", 506 "Ignoring Event: %d received", evp->cm_event.closed); 507 return (IBT_CM_ACCEPT); 508 } 509 510 ep = (rds_ep_t *)ibt_get_chan_private(evp->cm_channel); 511 sp = ep->ep_sp; 512 RDS_DPRINTF2("rds_handle_cm_conn_closed", "EP(%p) Enter", ep); 513 514 mutex_enter(&ep->ep_lock); 515 if (ep->ep_state != RDS_EP_STATE_CONNECTED) { 516 /* Ignore this DREQ */ 517 RDS_DPRINTF2("rds_handle_cm_conn_closed", 518 "EP(%p) not connected, state: %d", ep, ep->ep_state); 519 mutex_exit(&ep->ep_lock); 520 return (IBT_CM_ACCEPT); 521 } 522 ep->ep_state = RDS_EP_STATE_CLOSING; 523 mutex_exit(&ep->ep_lock); 524 525 rw_enter(&sp->session_lock, RW_WRITER); 526 RDS_DPRINTF2("rds_handle_cm_conn_closed", "SP(%p) - state: %d", sp, 527 sp->session_state); 528 529 switch (sp->session_state) { 530 case RDS_SESSION_STATE_CONNECTED: 531 sp->session_state = RDS_SESSION_STATE_PASSIVE_CLOSING; 532 RDS_DPRINTF3("rds_handle_cm_conn_closed", "SP(%p) State " 533 "RDS_SESSION_STATE_PASSIVE_CLOSING", sp); 534 break; 535 536 case RDS_SESSION_STATE_PASSIVE_CLOSING: 537 sp->session_state = RDS_SESSION_STATE_CLOSED; 538 RDS_DPRINTF3("rds_handle_cm_conn_closed", "SP(%p) State " 539 "RDS_SESSION_STATE_CLOSED", sp); 540 rds_passive_session_fini(sp); 541 sp->session_state = RDS_SESSION_STATE_FINI; 542 RDS_DPRINTF3("rds_handle_cm_conn_closed", 543 "SP(%p) State RDS_SESSION_STATE_FINI", sp); 544 break; 545 546 case RDS_SESSION_STATE_ACTIVE_CLOSING: 547 case RDS_SESSION_STATE_ERROR: 548 case RDS_SESSION_STATE_CLOSED: 549 break; 550 551 case RDS_SESSION_STATE_INIT: 552 sp->session_state = RDS_SESSION_STATE_ERROR; 553 RDS_DPRINTF3("rds_handle_cm_conn_closed", "SP(%p) State " 554 "RDS_SESSION_STATE_ERROR", sp); 555 rds_passive_session_fini(sp); 556 sp->session_state = RDS_SESSION_STATE_FAILED; 557 RDS_DPRINTF3("rds_handle_cm_conn_closed", 558 "SP(%p) State RDS_SESSION_STATE_FAILED", sp); 559 break; 560 561 default: 562 RDS_DPRINTF2("rds_handle_cm_conn_closed", 563 "SP(%p) - Unexpected state: %d", sp, sp->session_state); 564 rds_passive_session_fini(sp); 565 sp->session_state = RDS_SESSION_STATE_FAILED; 566 RDS_DPRINTF3("rds_handle_cm_conn_closed", "SP(%p) State " 567 "RDS_SESSION_STATE_FAILED", sp); 568 } 569 rw_exit(&sp->session_lock); 570 571 mutex_enter(&ep->ep_lock); 572 ep->ep_state = RDS_EP_STATE_CLOSED; 573 mutex_exit(&ep->ep_lock); 574 575 RDS_DPRINTF2("rds_handle_cm_conn_closed", "SP(%p) Return", sp); 576 return (IBT_CM_ACCEPT); 577 } 578 579 /* 580 * Handle EVENT FAILURE 581 */ 582 static ibt_cm_status_t 583 rds_handle_cm_event_failure(ibt_cm_event_t *evp) 584 { 585 rds_ep_t *ep; 586 rds_session_t *sp; 587 int ret; 588 589 RDS_DPRINTF2("rds_handle_cm_event_failure", "Enter: Chan hdl: 0x%p " 590 "Code: %d msg: %d reason: %d", evp->cm_channel, 591 evp->cm_event.failed.cf_code, evp->cm_event.failed.cf_msg, 592 evp->cm_event.failed.cf_reason); 593 594 if (evp->cm_event.failed.cf_reason == IBT_CM_INVALID_SID) { 595 RDS_DPRINTF0(LABEL, 596 "Received REJ with reason IBT_CM_INVALID_SID: " 597 "The remote system could be running an older RDS version"); 598 } 599 600 if (evp->cm_channel == NULL) { 601 return (IBT_CM_ACCEPT); 602 } 603 604 if ((evp->cm_event.failed.cf_code != IBT_CM_FAILURE_STALE) && 605 (evp->cm_event.failed.cf_msg == IBT_CM_FAILURE_REQ)) { 606 /* 607 * This end is active, just ignore, ibt_open_rc_channel() 608 * caller will take care of cleanup. 609 */ 610 RDS_DPRINTF2("rds_handle_cm_event_failure", 611 "Ignoring this event: Chan hdl: 0x%p", evp->cm_channel); 612 return (IBT_CM_ACCEPT); 613 } 614 615 ep = (rds_ep_t *)ibt_get_chan_private(evp->cm_channel); 616 sp = ep->ep_sp; 617 618 rw_enter(&sp->session_lock, RW_WRITER); 619 if (sp->session_type == RDS_SESSION_PASSIVE) { 620 RDS_DPRINTF2("rds_handle_cm_event_failure", 621 "SP(%p) - state: %d", sp, sp->session_state); 622 if ((sp->session_state == RDS_SESSION_STATE_INIT) || 623 (sp->session_state == RDS_SESSION_STATE_CONNECTED)) { 624 sp->session_state = RDS_SESSION_STATE_ERROR; 625 RDS_DPRINTF3("rds_handle_cm_event_failure", 626 "SP(%p) State RDS_SESSION_STATE_ERROR", sp); 627 628 /* 629 * Store the cm_channel for freeing later 630 * Active side frees it on ibt_open_rc_channel 631 * failure 632 */ 633 if (ep->ep_chanhdl == NULL) { 634 ep->ep_chanhdl = evp->cm_channel; 635 } 636 rw_exit(&sp->session_lock); 637 638 /* 639 * rds_passive_session_fini should not be called 640 * directly in the CM handler. It will cause a deadlock. 641 */ 642 ret = ddi_taskq_dispatch(rds_taskq, 643 rds_cleanup_passive_session, (void *)sp, 644 DDI_NOSLEEP); 645 if (ret != DDI_SUCCESS) { 646 RDS_DPRINTF1("rds_handle_cm_event_failure", 647 "SP(%p) TaskQ dispatch FAILED:%d", sp, ret); 648 } 649 return (IBT_CM_ACCEPT); 650 } 651 } 652 rw_exit(&sp->session_lock); 653 654 RDS_DPRINTF2("rds_handle_cm_event_failure", "SP(%p) Return", sp); 655 return (IBT_CM_ACCEPT); 656 } 657 658 /* 659 * CM Handler 660 * 661 * Called by IBCM 662 * The cm_private type differs for active and passive events. 663 */ 664 ibt_cm_status_t 665 rds_cm_handler(void *cm_private, ibt_cm_event_t *eventp, 666 ibt_cm_return_args_t *ret_args, void *ret_priv_data, 667 ibt_priv_data_len_t ret_len_max) 668 { 669 ibt_cm_status_t ret = IBT_CM_ACCEPT; 670 671 RDS_DPRINTF2("rds_cm_handler", "Enter: event: %d", eventp->cm_type); 672 673 switch (eventp->cm_type) { 674 case IBT_CM_EVENT_REQ_RCV: 675 ret = rds_handle_cm_req((rds_state_t *)cm_private, eventp, 676 ret_args, ret_priv_data, ret_len_max); 677 break; 678 case IBT_CM_EVENT_REP_RCV: 679 ret = rds_handle_cm_rep(eventp, ret_args, ret_priv_data, 680 ret_len_max); 681 break; 682 case IBT_CM_EVENT_MRA_RCV: 683 /* Not supported */ 684 break; 685 case IBT_CM_EVENT_CONN_EST: 686 ret = rds_handle_cm_conn_est(eventp); 687 break; 688 case IBT_CM_EVENT_CONN_CLOSED: 689 ret = rds_handle_cm_conn_closed(eventp); 690 break; 691 case IBT_CM_EVENT_FAILURE: 692 ret = rds_handle_cm_event_failure(eventp); 693 break; 694 case IBT_CM_EVENT_LAP_RCV: 695 /* Not supported */ 696 RDS_DPRINTF2(LABEL, "LAP message received"); 697 break; 698 case IBT_CM_EVENT_APR_RCV: 699 /* Not supported */ 700 RDS_DPRINTF2(LABEL, "APR message received"); 701 break; 702 default: 703 break; 704 } 705 706 RDS_DPRINTF2("rds_cm_handler", "Return"); 707 708 return (ret); 709 } 710 711 /* This is based on OFED Linux RDS */ 712 #define RDS_PORT_NUM 6556 713 714 /* 715 * Register the wellknown service with service id: RDS_SERVICE_ID 716 * Incoming connection requests should arrive on this service id. 717 */ 718 ibt_srv_hdl_t 719 rds_register_service(ibt_clnt_hdl_t rds_ibhdl) 720 { 721 ibt_srv_hdl_t srvhdl; 722 ibt_srv_desc_t srvdesc; 723 int ret; 724 725 RDS_DPRINTF2("rds_register_service", "Enter: 0x%p", rds_ibhdl); 726 727 bzero(&srvdesc, sizeof (ibt_srv_desc_t)); 728 srvdesc.sd_handler = rds_cm_handler; 729 srvdesc.sd_flags = IBT_SRV_NO_FLAGS; 730 731 /* 732 * Register the old service id for backward compatibility 733 * REQs received on this service id would be rejected 734 */ 735 ret = ibt_register_service(rds_ibhdl, &srvdesc, RDS_SERVICE_ID, 736 1, &rdsib_statep->rds_old_srvhdl, NULL); 737 if (ret != IBT_SUCCESS) { 738 RDS_DPRINTF2(LABEL, 739 "RDS Service (0x%llx) Registration Failed: %d", 740 RDS_SERVICE_ID, ret); 741 return (NULL); 742 } 743 744 /* 745 * This is the new service id as per: 746 * Annex A11: RDMA IP CM Service 747 */ 748 rdsib_statep->rds_service_id = ibt_get_ip_sid(IPPROTO_TCP, 749 RDS_PORT_NUM); 750 ret = ibt_register_service(rds_ibhdl, &srvdesc, 751 rdsib_statep->rds_service_id, 1, &srvhdl, NULL); 752 if (ret != IBT_SUCCESS) { 753 RDS_DPRINTF2(LABEL, 754 "RDS Service (0x%llx) Registration Failed: %d", 755 rdsib_statep->rds_service_id, ret); 756 return (NULL); 757 } 758 759 RDS_DPRINTF2("rds_register_service", "Return: 0x%p", srvhdl); 760 return (srvhdl); 761 } 762 763 /* Bind the RDS service on all ports */ 764 int 765 rds_bind_service(rds_state_t *statep) 766 { 767 rds_hca_t *hcap; 768 ib_gid_t gid; 769 uint_t jx, nbinds = 0, nports = 0; 770 int ret; 771 772 RDS_DPRINTF2("rds_bind_service", "Enter: 0x%p", statep); 773 774 hcap = statep->rds_hcalistp; 775 while (hcap != NULL) { 776 for (jx = 0; jx < hcap->hca_nports; jx++) { 777 nports++; 778 if (hcap->hca_pinfop[jx].p_linkstate != 779 IBT_PORT_ACTIVE) { 780 /* 781 * service bind will be called in the async 782 * handler when the port comes up 783 */ 784 continue; 785 } 786 787 gid = hcap->hca_pinfop[jx].p_sgid_tbl[0]; 788 RDS_DPRINTF5(LABEL, "HCA: 0x%llx Port: %d " 789 "gid: %llx:%llx", hcap->hca_guid, 790 hcap->hca_pinfop[jx].p_port_num, gid.gid_prefix, 791 gid.gid_guid); 792 793 /* pass statep as cm_private */ 794 ret = ibt_bind_service(statep->rds_srvhdl, gid, 795 NULL, statep, NULL); 796 if (ret != IBT_SUCCESS) { 797 RDS_DPRINTF2(LABEL, "Bind service for " 798 "HCA: 0x%llx Port: %d gid %llx:%llx " 799 "failed: %d", hcap->hca_guid, 800 hcap->hca_pinfop[jx].p_port_num, 801 gid.gid_prefix, gid.gid_guid, ret); 802 continue; 803 } 804 805 nbinds++; 806 807 /* bind the old service, ignore if it fails */ 808 ret = ibt_bind_service(statep->rds_old_srvhdl, gid, 809 NULL, statep, NULL); 810 if (ret != IBT_SUCCESS) { 811 RDS_DPRINTF2(LABEL, "Bind service for " 812 "HCA: 0x%llx Port: %d gid %llx:%llx " 813 "failed: %d", hcap->hca_guid, 814 hcap->hca_pinfop[jx].p_port_num, 815 gid.gid_prefix, gid.gid_guid, ret); 816 } 817 } 818 hcap = hcap->hca_nextp; 819 } 820 821 RDS_DPRINTF2(LABEL, "RDS Service available on %d/%d ports", 822 nbinds, nports); 823 824 #if 0 825 if (nbinds == 0) { 826 return (-1); 827 } 828 #endif 829 830 RDS_DPRINTF2("rds_bind_service", "Return"); 831 832 return (0); 833 } 834 835 /* Open an RC connection */ 836 int 837 rds_open_rc_channel(rds_ep_t *ep, ibt_path_info_t *pinfo, 838 ibt_execution_mode_t mode, ibt_channel_hdl_t *chanhdl) 839 { 840 rds_session_t *sp; 841 ibt_chan_open_args_t ocargs; 842 ibt_rc_returns_t ocrets; 843 rds_cm_private_data_t cmp; 844 uint8_t hca_port; 845 ibt_channel_hdl_t hdl; 846 ibt_status_t ret = 0; 847 ibt_ip_cm_info_t ipcm_info; 848 849 RDS_DPRINTF2("rds_open_rc_channel", "Enter: EP(%p) mode: %d", ep, mode); 850 851 sp = ep->ep_sp; 852 853 bzero(&ipcm_info, sizeof (ibt_ip_cm_info_t)); 854 ipcm_info.src_addr.family = AF_INET; 855 ipcm_info.SRCIP = sp->session_myip; 856 ipcm_info.dst_addr.family = AF_INET; 857 ipcm_info.DSTIP = sp->session_remip; 858 ipcm_info.src_port = 6556; /* based on OFED RDS */ 859 ret = ibt_format_ip_private_data(&ipcm_info, 860 sizeof (rds_cm_private_data_t), &cmp); 861 if (ret != IBT_SUCCESS) { 862 RDS_DPRINTF2(LABEL, "SP(%p) EP(%p) ibt_format_ip_private_data " 863 "failed: %d", sp, ep, ret); 864 return (-1); 865 } 866 867 hca_port = pinfo->pi_prim_cep_path.cep_hca_port_num; 868 869 hdl = rds_ep_alloc_rc_channel(ep, hca_port); 870 if (hdl == NULL) { 871 return (-1); 872 } 873 874 cmp.cmp_version = RDS_VERSION; 875 cmp.cmp_arch = RDS_THIS_ARCH; 876 cmp.cmp_eptype = ep->ep_type; 877 cmp.cmp_failover = sp->session_failover; 878 cmp.cmp_last_bufid = ep->ep_rbufid; 879 cmp.cmp_user_buffer_size = UserBufferSize; 880 cmp.cmp_ack_addr = ep->ep_ack_addr; 881 cmp.cmp_ack_rkey = ep->ep_ack_rkey; 882 883 bzero(&ocargs, sizeof (ibt_chan_open_args_t)); 884 bzero(&ocrets, sizeof (ibt_rc_returns_t)); 885 ocargs.oc_path = pinfo; 886 ocargs.oc_cm_handler = rds_cm_handler; 887 ocargs.oc_cm_clnt_private = NULL; 888 ocargs.oc_rdma_ra_out = 4; 889 ocargs.oc_rdma_ra_in = 4; 890 ocargs.oc_priv_data_len = sizeof (rds_cm_private_data_t); 891 ocargs.oc_priv_data = &cmp; 892 ocargs.oc_path_retry_cnt = IBPathRetryCount; 893 ocargs.oc_path_rnr_retry_cnt = MinRnrRetry; 894 ret = ibt_open_rc_channel(hdl, IBT_OCHAN_NO_FLAGS, 895 mode, &ocargs, &ocrets); 896 if (ret != IBT_SUCCESS) { 897 RDS_DPRINTF2(LABEL, "SP(%p) EP(%p) ibt_open_rc_channel " 898 "failed: %d", sp, ep, ret); 899 (void) ibt_flush_channel(hdl); 900 (void) ibt_free_channel(hdl); 901 902 mutex_enter(&ep->ep_lock); 903 /* don't cleanup if this failure is due to peer-peer race */ 904 if (ep->ep_state == RDS_EP_STATE_ACTIVE_PENDING) { 905 /* cleanup stuff allocated in rds_ep_alloc_rc_channel */ 906 ep->ep_state = RDS_EP_STATE_ERROR; 907 rds_ep_free_rc_channel(ep); 908 } 909 mutex_exit(&ep->ep_lock); 910 911 return (-1); 912 } 913 914 *chanhdl = hdl; 915 916 RDS_DPRINTF2("rds_open_rc_channel", "Return: EP(%p) Chan: %p", ep, 917 *chanhdl); 918 919 return (0); 920 } 921 922 int 923 rds_close_rc_channel(ibt_channel_hdl_t chanhdl, ibt_execution_mode_t mode) 924 { 925 int ret; 926 927 RDS_DPRINTF2("rds_close_rc_channel", "Enter: Chan(%p) Mode(%d)", 928 chanhdl, mode); 929 930 ret = ibt_close_rc_channel(chanhdl, mode, NULL, 0, NULL, NULL, 0); 931 932 RDS_DPRINTF2("rds_close_rc_channel", "Return Chan(%p)", chanhdl); 933 934 return (ret); 935 } 936