1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* 26 * Copyright (c) 2005 SilverStorm Technologies, Inc. All rights reserved. 27 * 28 * This software is available to you under a choice of one of two 29 * licenses. You may choose to be licensed under the terms of the GNU 30 * General Public License (GPL) Version 2, available from the file 31 * COPYING in the main directory of this source tree, or the 32 * OpenIB.org BSD license below: 33 * 34 * Redistribution and use in source and binary forms, with or 35 * without modification, are permitted provided that the following 36 * conditions are met: 37 * 38 * - Redistributions of source code must retain the above 39 * copyright notice, this list of conditions and the following 40 * disclaimer. 41 * 42 * - Redistributions in binary form must reproduce the above 43 * copyright notice, this list of conditions and the following 44 * disclaimer in the documentation and/or other materials 45 * provided with the distribution. 46 * 47 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 48 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 49 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 50 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 51 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 52 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 53 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 54 * SOFTWARE. 55 * 56 */ 57 /* 58 * Sun elects to include this software in Sun product 59 * under the OpenIB BSD license. 60 * 61 * 62 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 63 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 64 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 65 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 66 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 67 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 68 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 69 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 70 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 71 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 72 * POSSIBILITY OF SUCH DAMAGE. 73 */ 74 75 #pragma ident "%Z%%M% %I% %E% SMI" 76 77 #include <sys/ib/clients/rds/rdsib_cm.h> 78 #include <sys/ib/clients/rds/rdsib_ib.h> 79 #include <sys/ib/clients/rds/rdsib_buf.h> 80 #include <sys/ib/clients/rds/rdsib_ep.h> 81 82 /* 83 * This file contains CM related work: 84 * 85 * Service registration/deregistration 86 * Path lookup 87 * CM connection callbacks 88 * CM active and passive connection establishment 89 * Connection failover 90 */ 91 92 #define SRCIP src_addr.un.ip4addr 93 #define DSTIP dst_addr.un.ip4addr 94 95 /* 96 * Handle an incoming CM REQ 97 */ 98 /* ARGSUSED */ 99 static ibt_cm_status_t 100 rds_handle_cm_req(rds_state_t *statep, ibt_cm_event_t *evp, 101 ibt_cm_return_args_t *rargsp, void *rcmp, ibt_priv_data_len_t rcmp_len) 102 { 103 ibt_cm_req_rcv_t *reqp; 104 ib_gid_t lgid, rgid; 105 rds_cm_private_data_t cmp; 106 rds_session_t *sp; 107 rds_ep_t *ep; 108 ibt_channel_hdl_t chanhdl; 109 ibt_ip_cm_info_t ipcm_info; 110 int ret; 111 112 RDS_DPRINTF2("rds_handle_cm_req", "Enter"); 113 114 reqp = &evp->cm_event.req; 115 rgid = reqp->req_prim_addr.av_dgid; /* requester gid */ 116 lgid = reqp->req_prim_addr.av_sgid; /* receiver gid */ 117 118 RDS_DPRINTF2(LABEL, "REQ Received: From: %llx:%llx To: %llx:%llx", 119 rgid.gid_prefix, rgid.gid_guid, lgid.gid_prefix, lgid.gid_guid); 120 121 /* validate service id */ 122 if (reqp->req_service_id == RDS_SERVICE_ID) { 123 RDS_DPRINTF0(LABEL, "Version Mismatch: Remote system " 124 "(GUID: 0x%llx) is running an older version of RDS", 125 rgid.gid_guid); 126 return (IBT_CM_REJECT); 127 } 128 129 /* 130 * CM private data brings IP information 131 * Private data received is a stream of bytes and may not be properly 132 * aligned. So, bcopy the data onto the stack before accessing it. 133 */ 134 bcopy((uint8_t *)evp->cm_priv_data, &cmp, 135 sizeof (rds_cm_private_data_t)); 136 137 /* extract the CM IP info */ 138 ret = ibt_get_ip_data(evp->cm_priv_data_len, evp->cm_priv_data, 139 &ipcm_info); 140 if (ret != IBT_SUCCESS) { 141 RDS_DPRINTF2("rds_handle_cm_req", "ibt_get_ip_data failed: %d", 142 ret); 143 return (IBT_CM_REJECT); 144 } 145 146 RDS_DPRINTF2("rds_handle_cm_req", 147 "REQ Received: From IP: 0x%x To IP: 0x%x type: %d", 148 ntohl(ipcm_info.SRCIP), ntohl(ipcm_info.DSTIP), cmp.cmp_eptype); 149 150 if (cmp.cmp_version != RDS_VERSION) { 151 RDS_DPRINTF0(LABEL, "Version Mismatch: Local version: %d " 152 "Remote version: %d", RDS_VERSION, cmp.cmp_version); 153 return (IBT_CM_REJECT); 154 } 155 156 /* RDS supports V4 addresses only */ 157 if ((ipcm_info.src_addr.family != AF_INET) || 158 (ipcm_info.dst_addr.family != AF_INET)) { 159 RDS_DPRINTF2(LABEL, "Unsupported Address Family: " 160 "src: %d dst: %d", ipcm_info.src_addr.family, 161 ipcm_info.dst_addr.family); 162 return (IBT_CM_REJECT); 163 } 164 165 if (cmp.cmp_arch != RDS_THIS_ARCH) { 166 RDS_DPRINTF2(LABEL, "ARCH does not match (%d != %d)", 167 cmp.cmp_arch, RDS_THIS_ARCH); 168 return (IBT_CM_REJECT); 169 } 170 171 if ((cmp.cmp_eptype != RDS_EP_TYPE_CTRL) && 172 (cmp.cmp_eptype != RDS_EP_TYPE_DATA)) { 173 RDS_DPRINTF2(LABEL, "Unknown Channel type: %d", cmp.cmp_eptype); 174 return (IBT_CM_REJECT); 175 } 176 177 /* user_buffer_size should be same on all nodes */ 178 if (cmp.cmp_user_buffer_size != UserBufferSize) { 179 RDS_DPRINTF2(LABEL, 180 "UserBufferSize Mismatch, this node: %d remote node: %d", 181 UserBufferSize, cmp.cmp_user_buffer_size); 182 return (IBT_CM_REJECT); 183 } 184 185 /* 186 * RDS needs more time to process a failover REQ so send an MRA. 187 * Otherwise, the remote may retry the REQ and fail the connection. 188 */ 189 if ((cmp.cmp_failover) && (cmp.cmp_eptype == RDS_EP_TYPE_DATA)) { 190 RDS_DPRINTF2("rds_handle_cm_req", "Session Failover, send MRA"); 191 (void) ibt_cm_delay(IBT_CM_DELAY_REQ, evp->cm_session_id, 192 10000000 /* 10 sec */, NULL, 0); 193 } 194 195 /* Is there a session to the destination node? */ 196 rw_enter(&statep->rds_sessionlock, RW_READER); 197 sp = rds_session_lkup(statep, ntohl(ipcm_info.SRCIP), rgid.gid_guid); 198 rw_exit(&statep->rds_sessionlock); 199 200 if (sp == NULL) { 201 /* 202 * currently there is no session to the destination 203 * remote ip in the private data is the local ip and vice 204 * versa 205 */ 206 sp = rds_session_create(statep, ntohl(ipcm_info.DSTIP), 207 ntohl(ipcm_info.SRCIP), reqp, RDS_SESSION_PASSIVE); 208 if (sp == NULL) { 209 /* Check the list anyway. */ 210 rw_enter(&statep->rds_sessionlock, RW_READER); 211 sp = rds_session_lkup(statep, ntohl(ipcm_info.SRCIP), 212 rgid.gid_guid); 213 rw_exit(&statep->rds_sessionlock); 214 if (sp == NULL) { 215 /* 216 * The only way this can fail is due to lack 217 * of kernel resources 218 */ 219 return (IBT_CM_REJECT); 220 } 221 } 222 } 223 224 rw_enter(&sp->session_lock, RW_WRITER); 225 226 /* catch peer-to-peer case as soon as possible */ 227 if ((sp->session_state == RDS_SESSION_STATE_CREATED) || 228 (sp->session_state == RDS_SESSION_STATE_INIT)) { 229 /* Check possible peer-to-peer case here */ 230 if (sp->session_type != RDS_SESSION_PASSIVE) { 231 RDS_DPRINTF2("rds_handle_cm_req", 232 "SP(%p) Peer-peer connection handling", sp); 233 if (lgid.gid_guid > rgid.gid_guid) { 234 /* this node is active so reject this request */ 235 rw_exit(&sp->session_lock); 236 return (IBT_CM_REJECT); 237 } else { 238 /* this node is passive, change the session */ 239 sp->session_type = RDS_SESSION_PASSIVE; 240 sp->session_lgid = lgid; 241 sp->session_rgid = rgid; 242 } 243 } 244 } 245 246 RDS_DPRINTF2(LABEL, "SP(%p) state: %d", sp, sp->session_state); 247 248 switch (sp->session_state) { 249 case RDS_SESSION_STATE_CONNECTED: 250 RDS_DPRINTF2(LABEL, "STALE Session Detected SP(%p)", sp); 251 sp->session_state = RDS_SESSION_STATE_ERROR; 252 RDS_DPRINTF3("rds_handle_cm_req", "SP(%p) State " 253 "RDS_SESSION_STATE_ERROR", sp); 254 255 /* FALLTHRU */ 256 case RDS_SESSION_STATE_ERROR: 257 case RDS_SESSION_STATE_PASSIVE_CLOSING: 258 sp->session_type = RDS_SESSION_PASSIVE; 259 rw_exit(&sp->session_lock); 260 261 /* Handling this will take some time, so send an MRA */ 262 (void) ibt_cm_delay(IBT_CM_DELAY_REQ, evp->cm_session_id, 263 10000000 /* 10 sec */, NULL, 0); 264 265 /* 266 * Any pending completions don't get flushed until the channel 267 * is closed. So, passing 0 here will not wait for pending 268 * completions in rds_session_close before closing the channel 269 */ 270 rds_session_close(sp, IBT_NOCALLBACKS, 0); 271 272 /* move the session to init state */ 273 rw_enter(&sp->session_lock, RW_WRITER); 274 ret = rds_session_reinit(sp, lgid); 275 sp->session_myip = ntohl(ipcm_info.DSTIP); 276 sp->session_lgid = lgid; 277 sp->session_rgid = rgid; 278 if (ret != 0) { 279 rds_session_fini(sp); 280 sp->session_state = RDS_SESSION_STATE_FAILED; 281 RDS_DPRINTF3("rds_handle_cm_req", "SP(%p) State " 282 "RDS_SESSION_STATE_FAILED", sp); 283 rw_exit(&sp->session_lock); 284 return (IBT_CM_REJECT); 285 } else { 286 sp->session_state = RDS_SESSION_STATE_INIT; 287 RDS_DPRINTF3("rds_handle_cm_req", "SP(%p) State " 288 "RDS_SESSION_STATE_INIT", sp); 289 } 290 291 if (cmp.cmp_eptype == RDS_EP_TYPE_CTRL) { 292 ep = &sp->session_ctrlep; 293 } else { 294 ep = &sp->session_dataep; 295 } 296 break; 297 case RDS_SESSION_STATE_CREATED: 298 case RDS_SESSION_STATE_FAILED: 299 case RDS_SESSION_STATE_FINI: 300 /* 301 * Initialize both channels, we accept this connection 302 * only if both channels are initialized 303 */ 304 sp->session_type = RDS_SESSION_PASSIVE; 305 sp->session_lgid = lgid; 306 sp->session_rgid = rgid; 307 sp->session_state = RDS_SESSION_STATE_CREATED; 308 RDS_DPRINTF3("rds_handle_cm_req", "SP(%p) State " 309 "RDS_SESSION_STATE_CREATED", sp); 310 ret = rds_session_init(sp); 311 if (ret != 0) { 312 /* Seems like there are not enough resources */ 313 sp->session_state = RDS_SESSION_STATE_FAILED; 314 RDS_DPRINTF3("rds_handle_cm_req", "SP(%p) State " 315 "RDS_SESSION_STATE_FAILED", sp); 316 rw_exit(&sp->session_lock); 317 return (IBT_CM_REJECT); 318 } 319 sp->session_state = RDS_SESSION_STATE_INIT; 320 RDS_DPRINTF3("rds_handle_cm_req", "SP(%p) State " 321 "RDS_SESSION_STATE_INIT", sp); 322 323 /* FALLTHRU */ 324 case RDS_SESSION_STATE_INIT: 325 /* 326 * When re-using an existing session, make sure the 327 * session is still through the same HCA. Otherwise, the 328 * memory registrations have to moved to the new HCA. 329 */ 330 if (cmp.cmp_eptype == RDS_EP_TYPE_DATA) { 331 if (sp->session_lgid.gid_guid != lgid.gid_guid) { 332 RDS_DPRINTF2("rds_handle_cm_req", 333 "Existing Session but different gid " 334 "existing: 0x%llx, new: 0x%llx, " 335 "sending an MRA", 336 sp->session_lgid.gid_guid, lgid.gid_guid); 337 (void) ibt_cm_delay(IBT_CM_DELAY_REQ, 338 evp->cm_session_id, 10000000 /* 10 sec */, 339 NULL, 0); 340 ret = rds_session_reinit(sp, lgid); 341 if (ret != 0) { 342 rds_session_fini(sp); 343 sp->session_state = 344 RDS_SESSION_STATE_FAILED; 345 sp->session_failover = 0; 346 RDS_DPRINTF3("rds_failover_session", 347 "SP(%p) State " 348 "RDS_SESSION_STATE_FAILED", sp); 349 rw_exit(&sp->session_lock); 350 return (IBT_CM_REJECT); 351 } 352 } 353 ep = &sp->session_dataep; 354 } else { 355 ep = &sp->session_ctrlep; 356 } 357 358 break; 359 default: 360 RDS_DPRINTF2(LABEL, "ERROR: SP(%p) is in an unexpected " 361 "state: %d", sp, sp->session_state); 362 rw_exit(&sp->session_lock); 363 return (IBT_CM_REJECT); 364 } 365 366 sp->session_failover = 0; /* reset any previous value */ 367 if (cmp.cmp_failover) { 368 RDS_DPRINTF2("rds_handle_cm_req", 369 "SP(%p) Failover Session (BP %p)", sp, cmp.cmp_last_bufid); 370 sp->session_failover = 1; 371 } 372 373 mutex_enter(&ep->ep_lock); 374 if (ep->ep_state == RDS_EP_STATE_UNCONNECTED) { 375 ep->ep_state = RDS_EP_STATE_PASSIVE_PENDING; 376 sp->session_type = RDS_SESSION_PASSIVE; 377 rw_exit(&sp->session_lock); 378 } else if (ep->ep_state == RDS_EP_STATE_ACTIVE_PENDING) { 379 rw_exit(&sp->session_lock); 380 /* 381 * Peer to peer connection. There is an active 382 * connection pending on this ep. The one with 383 * greater port guid becomes active and the 384 * other becomes passive. 385 */ 386 RDS_DPRINTF2("rds_handle_cm_req", 387 "EP(%p) Peer-peer connection handling", ep); 388 if (lgid.gid_guid > rgid.gid_guid) { 389 /* this node is active so reject this request */ 390 mutex_exit(&ep->ep_lock); 391 RDS_DPRINTF2(LABEL, "SP(%p) EP(%p): " 392 "Rejecting passive in favor of active", sp, ep); 393 return (IBT_CM_REJECT); 394 } else { 395 /* 396 * This session is not the active end, change it 397 * to passive end. 398 */ 399 ep->ep_state = RDS_EP_STATE_PASSIVE_PENDING; 400 401 rw_enter(&sp->session_lock, RW_WRITER); 402 sp->session_type = RDS_SESSION_PASSIVE; 403 sp->session_lgid = lgid; 404 sp->session_rgid = rgid; 405 rw_exit(&sp->session_lock); 406 } 407 } else { 408 rw_exit(&sp->session_lock); 409 } 410 411 ep->ep_lbufid = cmp.cmp_last_bufid; 412 ep->ep_ackwr.wr.rc.rcwr.rdma.rdma_raddr = (ib_vaddr_t)cmp.cmp_ack_addr; 413 ep->ep_ackwr.wr.rc.rcwr.rdma.rdma_rkey = cmp.cmp_ack_rkey; 414 cmp.cmp_last_bufid = ep->ep_rbufid; 415 cmp.cmp_ack_addr = ep->ep_ack_addr; 416 cmp.cmp_ack_rkey = ep->ep_ack_rkey; 417 mutex_exit(&ep->ep_lock); 418 419 /* continue with accepting the connection request for this channel */ 420 chanhdl = rds_ep_alloc_rc_channel(ep, reqp->req_prim_hca_port); 421 if (chanhdl == NULL) { 422 mutex_enter(&ep->ep_lock); 423 ep->ep_state = RDS_EP_STATE_UNCONNECTED; 424 mutex_exit(&ep->ep_lock); 425 return (IBT_CM_REJECT); 426 } 427 428 /* pre-post recv buffers in the RQ */ 429 rds_post_recv_buf((void *)chanhdl); 430 431 rargsp->cm_ret_len = sizeof (rds_cm_private_data_t); 432 bcopy((uint8_t *)&cmp, rcmp, sizeof (rds_cm_private_data_t)); 433 rargsp->cm_ret.rep.cm_channel = chanhdl; 434 rargsp->cm_ret.rep.cm_rdma_ra_out = 4; 435 rargsp->cm_ret.rep.cm_rdma_ra_in = 4; 436 rargsp->cm_ret.rep.cm_rnr_retry_cnt = MinRnrRetry; 437 438 RDS_DPRINTF2("rds_handle_cm_req", "Return: SP(%p) EP(%p) Chan (%p)", 439 sp, ep, chanhdl); 440 441 return (IBT_CM_ACCEPT); 442 } 443 444 /* 445 * Handle an incoming CM REP 446 * Pre-post recv buffers for the QP 447 */ 448 /* ARGSUSED */ 449 static ibt_cm_status_t 450 rds_handle_cm_rep(ibt_cm_event_t *evp, ibt_cm_return_args_t *rargsp, 451 void *rcmp, ibt_priv_data_len_t rcmp_len) 452 { 453 rds_ep_t *ep; 454 rds_cm_private_data_t cmp; 455 456 RDS_DPRINTF2("rds_handle_cm_rep", "Enter"); 457 458 /* pre-post recv buffers in the RQ */ 459 rds_post_recv_buf((void *)evp->cm_channel); 460 461 ep = (rds_ep_t *)ibt_get_chan_private(evp->cm_channel); 462 bcopy((uint8_t *)evp->cm_priv_data, &cmp, 463 sizeof (rds_cm_private_data_t)); 464 ep->ep_lbufid = cmp.cmp_last_bufid; 465 ep->ep_ackwr.wr.rc.rcwr.rdma.rdma_raddr = (ib_vaddr_t)cmp.cmp_ack_addr; 466 ep->ep_ackwr.wr.rc.rcwr.rdma.rdma_rkey = cmp.cmp_ack_rkey; 467 468 rargsp->cm_ret_len = 0; 469 470 RDS_DPRINTF2("rds_handle_cm_rep", "Return: lbufid: %p", ep->ep_lbufid); 471 472 return (IBT_CM_ACCEPT); 473 } 474 475 /* 476 * Handle CONN EST 477 */ 478 static ibt_cm_status_t 479 rds_handle_cm_conn_est(ibt_cm_event_t *evp) 480 { 481 rds_session_t *sp; 482 rds_ep_t *ep; 483 484 ep = (rds_ep_t *)ibt_get_chan_private(evp->cm_channel); 485 486 RDS_DPRINTF2("rds_handle_cm_conn_est", "EP(%p) State: %d", ep, 487 ep->ep_state); 488 489 mutex_enter(&ep->ep_lock); 490 ASSERT((ep->ep_state == RDS_EP_STATE_ACTIVE_PENDING) || 491 (ep->ep_state == RDS_EP_STATE_PASSIVE_PENDING)); 492 ep->ep_state = RDS_EP_STATE_CONNECTED; 493 ep->ep_chanhdl = evp->cm_channel; 494 sp = ep->ep_sp; 495 mutex_exit(&ep->ep_lock); 496 497 (void) rds_session_active(sp); 498 499 RDS_DPRINTF2("rds_handle_cm_conn_est", "Return"); 500 return (IBT_CM_ACCEPT); 501 } 502 503 /* 504 * Handle CONN CLOSED 505 */ 506 static ibt_cm_status_t 507 rds_handle_cm_conn_closed(ibt_cm_event_t *evp) 508 { 509 rds_ep_t *ep; 510 rds_session_t *sp; 511 512 /* Catch DREQs but ignore DREPs */ 513 if (evp->cm_event.closed != IBT_CM_CLOSED_DREQ_RCVD) { 514 RDS_DPRINTF2("rds_handle_cm_conn_closed", 515 "Ignoring Event: %d received", evp->cm_event.closed); 516 return (IBT_CM_ACCEPT); 517 } 518 519 ep = (rds_ep_t *)ibt_get_chan_private(evp->cm_channel); 520 sp = ep->ep_sp; 521 RDS_DPRINTF2("rds_handle_cm_conn_closed", "EP(%p) Chan(%p) Enter", 522 ep, evp->cm_channel); 523 524 mutex_enter(&ep->ep_lock); 525 if (ep->ep_state != RDS_EP_STATE_CONNECTED) { 526 /* Ignore this DREQ */ 527 RDS_DPRINTF2("rds_handle_cm_conn_closed", 528 "EP(%p) not connected, state: %d", ep, ep->ep_state); 529 mutex_exit(&ep->ep_lock); 530 return (IBT_CM_ACCEPT); 531 } 532 ep->ep_state = RDS_EP_STATE_CLOSING; 533 mutex_exit(&ep->ep_lock); 534 535 rw_enter(&sp->session_lock, RW_WRITER); 536 RDS_DPRINTF2("rds_handle_cm_conn_closed", "SP(%p) - state: %d", sp, 537 sp->session_state); 538 539 switch (sp->session_state) { 540 case RDS_SESSION_STATE_CONNECTED: 541 sp->session_state = RDS_SESSION_STATE_PASSIVE_CLOSING; 542 RDS_DPRINTF3("rds_handle_cm_conn_closed", "SP(%p) State " 543 "RDS_SESSION_STATE_PASSIVE_CLOSING", sp); 544 break; 545 546 case RDS_SESSION_STATE_PASSIVE_CLOSING: 547 sp->session_state = RDS_SESSION_STATE_CLOSED; 548 RDS_DPRINTF3("rds_handle_cm_conn_closed", "SP(%p) State " 549 "RDS_SESSION_STATE_CLOSED", sp); 550 rds_passive_session_fini(sp); 551 sp->session_state = RDS_SESSION_STATE_FINI; 552 RDS_DPRINTF3("rds_handle_cm_conn_closed", 553 "SP(%p) State RDS_SESSION_STATE_FINI", sp); 554 break; 555 556 case RDS_SESSION_STATE_ACTIVE_CLOSING: 557 case RDS_SESSION_STATE_ERROR: 558 case RDS_SESSION_STATE_CLOSED: 559 break; 560 561 case RDS_SESSION_STATE_INIT: 562 sp->session_state = RDS_SESSION_STATE_ERROR; 563 RDS_DPRINTF3("rds_handle_cm_conn_closed", "SP(%p) State " 564 "RDS_SESSION_STATE_ERROR", sp); 565 rds_passive_session_fini(sp); 566 sp->session_state = RDS_SESSION_STATE_FAILED; 567 RDS_DPRINTF3("rds_handle_cm_conn_closed", 568 "SP(%p) State RDS_SESSION_STATE_FAILED", sp); 569 break; 570 571 default: 572 RDS_DPRINTF2("rds_handle_cm_conn_closed", 573 "SP(%p) - Unexpected state: %d", sp, sp->session_state); 574 rds_passive_session_fini(sp); 575 sp->session_state = RDS_SESSION_STATE_FAILED; 576 RDS_DPRINTF3("rds_handle_cm_conn_closed", "SP(%p) State " 577 "RDS_SESSION_STATE_FAILED", sp); 578 } 579 rw_exit(&sp->session_lock); 580 581 mutex_enter(&ep->ep_lock); 582 ep->ep_state = RDS_EP_STATE_CLOSED; 583 mutex_exit(&ep->ep_lock); 584 585 RDS_DPRINTF2("rds_handle_cm_conn_closed", "SP(%p) Return", sp); 586 return (IBT_CM_ACCEPT); 587 } 588 589 /* 590 * Handle EVENT FAILURE 591 */ 592 static ibt_cm_status_t 593 rds_handle_cm_event_failure(ibt_cm_event_t *evp) 594 { 595 rds_ep_t *ep; 596 rds_session_t *sp; 597 int ret; 598 599 RDS_DPRINTF2("rds_handle_cm_event_failure", "Enter: Chan hdl: 0x%p " 600 "Code: %d msg: %d reason: %d", evp->cm_channel, 601 evp->cm_event.failed.cf_code, evp->cm_event.failed.cf_msg, 602 evp->cm_event.failed.cf_reason); 603 604 if (evp->cm_event.failed.cf_reason == IBT_CM_INVALID_SID) { 605 RDS_DPRINTF0(LABEL, 606 "Received REJ with reason IBT_CM_INVALID_SID: " 607 "The remote system could be running an older RDS version"); 608 } 609 610 if (evp->cm_channel == NULL) { 611 return (IBT_CM_ACCEPT); 612 } 613 614 if ((evp->cm_event.failed.cf_code != IBT_CM_FAILURE_STALE) && 615 (evp->cm_event.failed.cf_msg == IBT_CM_FAILURE_REQ)) { 616 /* 617 * This end is active, just ignore, ibt_open_rc_channel() 618 * caller will take care of cleanup. 619 */ 620 RDS_DPRINTF2("rds_handle_cm_event_failure", 621 "Ignoring this event: Chan hdl: 0x%p", evp->cm_channel); 622 return (IBT_CM_ACCEPT); 623 } 624 625 ep = (rds_ep_t *)ibt_get_chan_private(evp->cm_channel); 626 sp = ep->ep_sp; 627 628 rw_enter(&sp->session_lock, RW_WRITER); 629 if (sp->session_type == RDS_SESSION_PASSIVE) { 630 RDS_DPRINTF2("rds_handle_cm_event_failure", 631 "SP(%p) - state: %d", sp, sp->session_state); 632 if ((sp->session_state == RDS_SESSION_STATE_INIT) || 633 (sp->session_state == RDS_SESSION_STATE_CONNECTED)) { 634 sp->session_state = RDS_SESSION_STATE_ERROR; 635 RDS_DPRINTF3("rds_handle_cm_event_failure", 636 "SP(%p) State RDS_SESSION_STATE_ERROR", sp); 637 638 /* 639 * Store the cm_channel for freeing later 640 * Active side frees it on ibt_open_rc_channel 641 * failure 642 */ 643 if (ep->ep_chanhdl == NULL) { 644 ep->ep_chanhdl = evp->cm_channel; 645 } 646 rw_exit(&sp->session_lock); 647 648 /* 649 * rds_passive_session_fini should not be called 650 * directly in the CM handler. It will cause a deadlock. 651 */ 652 ret = ddi_taskq_dispatch(rds_taskq, 653 rds_cleanup_passive_session, (void *)sp, 654 DDI_NOSLEEP); 655 if (ret != DDI_SUCCESS) { 656 RDS_DPRINTF1("rds_handle_cm_event_failure", 657 "SP(%p) TaskQ dispatch FAILED:%d", sp, ret); 658 } 659 return (IBT_CM_ACCEPT); 660 } 661 } 662 rw_exit(&sp->session_lock); 663 664 RDS_DPRINTF2("rds_handle_cm_event_failure", "SP(%p) Return", sp); 665 return (IBT_CM_ACCEPT); 666 } 667 668 /* 669 * CM Handler 670 * 671 * Called by IBCM 672 * The cm_private type differs for active and passive events. 673 */ 674 ibt_cm_status_t 675 rds_cm_handler(void *cm_private, ibt_cm_event_t *eventp, 676 ibt_cm_return_args_t *ret_args, void *ret_priv_data, 677 ibt_priv_data_len_t ret_len_max) 678 { 679 ibt_cm_status_t ret = IBT_CM_ACCEPT; 680 681 RDS_DPRINTF2("rds_cm_handler", "Enter: event: %d", eventp->cm_type); 682 683 switch (eventp->cm_type) { 684 case IBT_CM_EVENT_REQ_RCV: 685 ret = rds_handle_cm_req((rds_state_t *)cm_private, eventp, 686 ret_args, ret_priv_data, ret_len_max); 687 break; 688 case IBT_CM_EVENT_REP_RCV: 689 ret = rds_handle_cm_rep(eventp, ret_args, ret_priv_data, 690 ret_len_max); 691 break; 692 case IBT_CM_EVENT_MRA_RCV: 693 /* Not supported */ 694 break; 695 case IBT_CM_EVENT_CONN_EST: 696 ret = rds_handle_cm_conn_est(eventp); 697 break; 698 case IBT_CM_EVENT_CONN_CLOSED: 699 ret = rds_handle_cm_conn_closed(eventp); 700 break; 701 case IBT_CM_EVENT_FAILURE: 702 ret = rds_handle_cm_event_failure(eventp); 703 break; 704 case IBT_CM_EVENT_LAP_RCV: 705 /* Not supported */ 706 RDS_DPRINTF2(LABEL, "LAP message received"); 707 break; 708 case IBT_CM_EVENT_APR_RCV: 709 /* Not supported */ 710 RDS_DPRINTF2(LABEL, "APR message received"); 711 break; 712 default: 713 break; 714 } 715 716 RDS_DPRINTF2("rds_cm_handler", "Return"); 717 718 return (ret); 719 } 720 721 /* This is based on OFED Linux RDS */ 722 #define RDS_PORT_NUM 6556 723 724 /* 725 * Register the wellknown service with service id: RDS_SERVICE_ID 726 * Incoming connection requests should arrive on this service id. 727 */ 728 ibt_srv_hdl_t 729 rds_register_service(ibt_clnt_hdl_t rds_ibhdl) 730 { 731 ibt_srv_hdl_t srvhdl; 732 ibt_srv_desc_t srvdesc; 733 int ret; 734 735 RDS_DPRINTF2("rds_register_service", "Enter: 0x%p", rds_ibhdl); 736 737 bzero(&srvdesc, sizeof (ibt_srv_desc_t)); 738 srvdesc.sd_handler = rds_cm_handler; 739 srvdesc.sd_flags = IBT_SRV_NO_FLAGS; 740 741 /* 742 * Register the old service id for backward compatibility 743 * REQs received on this service id would be rejected 744 */ 745 ret = ibt_register_service(rds_ibhdl, &srvdesc, RDS_SERVICE_ID, 746 1, &rdsib_statep->rds_old_srvhdl, NULL); 747 if (ret != IBT_SUCCESS) { 748 RDS_DPRINTF2(LABEL, 749 "RDS Service (0x%llx) Registration Failed: %d", 750 RDS_SERVICE_ID, ret); 751 return (NULL); 752 } 753 754 /* 755 * This is the new service id as per: 756 * Annex A11: RDMA IP CM Service 757 */ 758 rdsib_statep->rds_service_id = ibt_get_ip_sid(IPPROTO_TCP, 759 RDS_PORT_NUM); 760 ret = ibt_register_service(rds_ibhdl, &srvdesc, 761 rdsib_statep->rds_service_id, 1, &srvhdl, NULL); 762 if (ret != IBT_SUCCESS) { 763 RDS_DPRINTF2(LABEL, 764 "RDS Service (0x%llx) Registration Failed: %d", 765 rdsib_statep->rds_service_id, ret); 766 return (NULL); 767 } 768 769 RDS_DPRINTF2("rds_register_service", "Return: 0x%p", srvhdl); 770 return (srvhdl); 771 } 772 773 /* Bind the RDS service on all ports */ 774 int 775 rds_bind_service(rds_state_t *statep) 776 { 777 rds_hca_t *hcap; 778 ib_gid_t gid; 779 uint_t jx, nbinds = 0, nports = 0; 780 int ret; 781 782 RDS_DPRINTF2("rds_bind_service", "Enter: 0x%p", statep); 783 784 hcap = statep->rds_hcalistp; 785 while (hcap != NULL) { 786 for (jx = 0; jx < hcap->hca_nports; jx++) { 787 nports++; 788 if (hcap->hca_pinfop[jx].p_linkstate != 789 IBT_PORT_ACTIVE) { 790 /* 791 * service bind will be called in the async 792 * handler when the port comes up 793 */ 794 continue; 795 } 796 797 gid = hcap->hca_pinfop[jx].p_sgid_tbl[0]; 798 RDS_DPRINTF5(LABEL, "HCA: 0x%llx Port: %d " 799 "gid: %llx:%llx", hcap->hca_guid, 800 hcap->hca_pinfop[jx].p_port_num, gid.gid_prefix, 801 gid.gid_guid); 802 803 /* pass statep as cm_private */ 804 ret = ibt_bind_service(statep->rds_srvhdl, gid, 805 NULL, statep, NULL); 806 if (ret != IBT_SUCCESS) { 807 RDS_DPRINTF2(LABEL, "Bind service for " 808 "HCA: 0x%llx Port: %d gid %llx:%llx " 809 "failed: %d", hcap->hca_guid, 810 hcap->hca_pinfop[jx].p_port_num, 811 gid.gid_prefix, gid.gid_guid, ret); 812 continue; 813 } 814 815 nbinds++; 816 817 /* bind the old service, ignore if it fails */ 818 ret = ibt_bind_service(statep->rds_old_srvhdl, gid, 819 NULL, statep, NULL); 820 if (ret != IBT_SUCCESS) { 821 RDS_DPRINTF2(LABEL, "Bind service for " 822 "HCA: 0x%llx Port: %d gid %llx:%llx " 823 "failed: %d", hcap->hca_guid, 824 hcap->hca_pinfop[jx].p_port_num, 825 gid.gid_prefix, gid.gid_guid, ret); 826 } 827 } 828 hcap = hcap->hca_nextp; 829 } 830 831 RDS_DPRINTF2(LABEL, "RDS Service available on %d/%d ports", 832 nbinds, nports); 833 834 #if 0 835 if (nbinds == 0) { 836 return (-1); 837 } 838 #endif 839 840 RDS_DPRINTF2("rds_bind_service", "Return"); 841 842 return (0); 843 } 844 845 /* Open an RC connection */ 846 int 847 rds_open_rc_channel(rds_ep_t *ep, ibt_path_info_t *pinfo, 848 ibt_execution_mode_t mode, ibt_channel_hdl_t *chanhdl) 849 { 850 rds_session_t *sp; 851 ibt_chan_open_args_t ocargs; 852 ibt_rc_returns_t ocrets; 853 rds_cm_private_data_t cmp; 854 uint8_t hca_port; 855 ibt_channel_hdl_t hdl; 856 ibt_status_t ret = 0; 857 ibt_ip_cm_info_t ipcm_info; 858 859 RDS_DPRINTF2("rds_open_rc_channel", "Enter: EP(%p) mode: %d", ep, mode); 860 861 sp = ep->ep_sp; 862 863 bzero(&ipcm_info, sizeof (ibt_ip_cm_info_t)); 864 ipcm_info.src_addr.family = AF_INET; 865 ipcm_info.SRCIP = htonl(sp->session_myip); 866 ipcm_info.dst_addr.family = AF_INET; 867 ipcm_info.DSTIP = htonl(sp->session_remip); 868 ipcm_info.src_port = htons(RDS_PORT_NUM); 869 ret = ibt_format_ip_private_data(&ipcm_info, 870 sizeof (rds_cm_private_data_t), &cmp); 871 if (ret != IBT_SUCCESS) { 872 RDS_DPRINTF2(LABEL, "SP(%p) EP(%p) ibt_format_ip_private_data " 873 "failed: %d", sp, ep, ret); 874 return (-1); 875 } 876 877 hca_port = pinfo->pi_prim_cep_path.cep_hca_port_num; 878 879 hdl = rds_ep_alloc_rc_channel(ep, hca_port); 880 if (hdl == NULL) { 881 return (-1); 882 } 883 884 cmp.cmp_version = RDS_VERSION; 885 cmp.cmp_arch = RDS_THIS_ARCH; 886 cmp.cmp_eptype = ep->ep_type; 887 cmp.cmp_failover = sp->session_failover; 888 cmp.cmp_last_bufid = ep->ep_rbufid; 889 cmp.cmp_user_buffer_size = UserBufferSize; 890 cmp.cmp_ack_addr = ep->ep_ack_addr; 891 cmp.cmp_ack_rkey = ep->ep_ack_rkey; 892 893 bzero(&ocargs, sizeof (ibt_chan_open_args_t)); 894 bzero(&ocrets, sizeof (ibt_rc_returns_t)); 895 ocargs.oc_path = pinfo; 896 ocargs.oc_cm_handler = rds_cm_handler; 897 ocargs.oc_cm_clnt_private = NULL; 898 ocargs.oc_rdma_ra_out = 4; 899 ocargs.oc_rdma_ra_in = 4; 900 ocargs.oc_priv_data_len = sizeof (rds_cm_private_data_t); 901 ocargs.oc_priv_data = &cmp; 902 ocargs.oc_path_retry_cnt = IBPathRetryCount; 903 ocargs.oc_path_rnr_retry_cnt = MinRnrRetry; 904 ret = ibt_open_rc_channel(hdl, IBT_OCHAN_NO_FLAGS, 905 mode, &ocargs, &ocrets); 906 if (ret != IBT_SUCCESS) { 907 RDS_DPRINTF2(LABEL, "SP(%p) EP(%p) ibt_open_rc_channel " 908 "failed: %d", sp, ep, ret); 909 (void) ibt_flush_channel(hdl); 910 (void) ibt_free_channel(hdl); 911 912 mutex_enter(&ep->ep_lock); 913 /* don't cleanup if this failure is due to peer-peer race */ 914 if (ep->ep_state == RDS_EP_STATE_ACTIVE_PENDING) { 915 /* cleanup stuff allocated in rds_ep_alloc_rc_channel */ 916 ep->ep_state = RDS_EP_STATE_ERROR; 917 rds_ep_free_rc_channel(ep); 918 } 919 mutex_exit(&ep->ep_lock); 920 921 return (-1); 922 } 923 924 *chanhdl = hdl; 925 926 RDS_DPRINTF2("rds_open_rc_channel", "Return: EP(%p) Chan: %p", ep, 927 *chanhdl); 928 929 return (0); 930 } 931 932 int 933 rds_close_rc_channel(ibt_channel_hdl_t chanhdl, ibt_execution_mode_t mode) 934 { 935 int ret; 936 937 RDS_DPRINTF2("rds_close_rc_channel", "Enter: Chan(%p) Mode(%d)", 938 chanhdl, mode); 939 940 ret = ibt_close_rc_channel(chanhdl, mode, NULL, 0, NULL, NULL, 0); 941 942 RDS_DPRINTF2("rds_close_rc_channel", "Return Chan(%p)", chanhdl); 943 944 return (ret); 945 } 946