xref: /illumos-gate/usr/src/uts/common/io/ib/clients/rds/rdsib_cm.c (revision 8257fab973a69800a3a3309e8af21fc1876d2df9)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 /*
26  * Copyright (c) 2005 SilverStorm Technologies, Inc. All rights reserved.
27  *
28  * This software is available to you under a choice of one of two
29  * licenses.  You may choose to be licensed under the terms of the GNU
30  * General Public License (GPL) Version 2, available from the file
31  * COPYING in the main directory of this source tree, or the
32  * OpenIB.org BSD license below:
33  *
34  *     Redistribution and use in source and binary forms, with or
35  *     without modification, are permitted provided that the following
36  *     conditions are met:
37  *
38  *	- Redistributions of source code must retain the above
39  *	  copyright notice, this list of conditions and the following
40  *	  disclaimer.
41  *
42  *	- Redistributions in binary form must reproduce the above
43  *	  copyright notice, this list of conditions and the following
44  *	  disclaimer in the documentation and/or other materials
45  *	  provided with the distribution.
46  *
47  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
48  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
49  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
50  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
51  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
52  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
53  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
54  * SOFTWARE.
55  *
56  */
57 /*
58  * Sun elects to include this software in Sun product
59  * under the OpenIB BSD license.
60  *
61  *
62  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
63  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
64  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
65  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
66  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
67  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
68  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
69  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
70  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
71  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
72  * POSSIBILITY OF SUCH DAMAGE.
73  */
74 
75 #pragma ident	"%Z%%M%	%I%	%E% SMI"
76 
77 #include <sys/ib/clients/rds/rdsib_cm.h>
78 #include <sys/ib/clients/rds/rdsib_ib.h>
79 #include <sys/ib/clients/rds/rdsib_buf.h>
80 #include <sys/ib/clients/rds/rdsib_ep.h>
81 
82 /*
83  * This file contains CM related work:
84  *
85  * Service registration/deregistration
86  * Path lookup
87  * CM connection callbacks
88  * CM active and passive connection establishment
89  * Connection failover
90  */
91 
92 /*
93  * Handle an incoming CM REQ
94  */
95 /* ARGSUSED */
96 static ibt_cm_status_t
97 rds_handle_cm_req(rds_state_t *statep, ibt_cm_event_t *evp,
98     ibt_cm_return_args_t *rargsp, void *rcmp, ibt_priv_data_len_t rcmp_len)
99 {
100 	ibt_cm_req_rcv_t	*reqp;
101 	ib_gid_t		lgid, rgid;
102 	rds_cm_private_data_t	cmp;
103 	rds_session_t		*sp;
104 	rds_ep_t		*ep;
105 	ibt_channel_hdl_t	chanhdl;
106 	int			ret;
107 
108 	RDS_DPRINTF2("rds_handle_cm_req", "Enter");
109 
110 	reqp = &evp->cm_event.req;
111 	rgid = reqp->req_prim_addr.av_dgid; /* requester gid */
112 	lgid = reqp->req_prim_addr.av_sgid; /* receiver gid */
113 
114 	RDS_DPRINTF2(LABEL, "REQ Received: From: %llx:%llx To: %llx:%llx",
115 	    rgid.gid_prefix, rgid.gid_guid, lgid.gid_prefix, lgid.gid_guid);
116 
117 	/*
118 	 * CM private data brings IP information
119 	 * Private data received is a stream of bytes and may not be properly
120 	 * aligned. So, bcopy the data onto the stack before accessing it.
121 	 */
122 	bcopy((uint8_t *)evp->cm_priv_data, &cmp,
123 	    sizeof (rds_cm_private_data_t));
124 
125 	RDS_DPRINTF2(LABEL, "REQ Received: From IP: 0x%x To IP: 0x%x type: %d",
126 	    cmp.cmp_localip, cmp.cmp_remip, cmp.cmp_eptype);
127 
128 	if (cmp.cmp_version != RDS_VERSION) {
129 		RDS_DPRINTF2(LABEL, "Version Mismatch: Local version: %d "
130 		    "Remote version: %d", RDS_VERSION, cmp.cmp_version);
131 		return (IBT_CM_REJECT);
132 	}
133 
134 	if (cmp.cmp_arch != RDS_THIS_ARCH) {
135 		RDS_DPRINTF2(LABEL, "ARCH does not match (%d != %d)",
136 		    cmp.cmp_arch, RDS_THIS_ARCH);
137 		return (IBT_CM_REJECT);
138 	}
139 
140 	if ((cmp.cmp_eptype != RDS_EP_TYPE_CTRL) &&
141 	    (cmp.cmp_eptype != RDS_EP_TYPE_DATA)) {
142 		RDS_DPRINTF2(LABEL, "Unknown Channel type: %d", cmp.cmp_eptype);
143 		return (IBT_CM_REJECT);
144 	}
145 
146 	/* user_buffer_size should be same on all nodes */
147 	if (cmp.cmp_user_buffer_size != UserBufferSize) {
148 		RDS_DPRINTF2(LABEL,
149 		    "UserBufferSize Mismatch, this node: %d remote node: %d",
150 		    UserBufferSize, cmp.cmp_user_buffer_size);
151 		return (IBT_CM_REJECT);
152 	}
153 
154 	/*
155 	 * RDS needs more time to process a failover REQ so send an MRA.
156 	 * Otherwise, the remote may retry the REQ and fail the connection.
157 	 */
158 	if ((cmp.cmp_failover) && (cmp.cmp_eptype == RDS_EP_TYPE_DATA)) {
159 		RDS_DPRINTF2("rds_handle_cm_req", "Session Failover, send MRA");
160 		(void) ibt_cm_delay(IBT_CM_DELAY_REQ, evp->cm_session_id,
161 		    10000000 /* 10 sec */, NULL, 0);
162 	}
163 
164 	/* Is there a session to the destination node? */
165 	rw_enter(&statep->rds_sessionlock, RW_READER);
166 	sp = rds_session_lkup(statep, cmp.cmp_localip, rgid.gid_guid);
167 	rw_exit(&statep->rds_sessionlock);
168 
169 	if (sp == NULL) {
170 		/*
171 		 * currently there is no session to the destination
172 		 * remote ip in the private data is the local ip and vice
173 		 * versa
174 		 */
175 		sp = rds_session_create(statep, cmp.cmp_remip, cmp.cmp_localip,
176 		    reqp, RDS_SESSION_PASSIVE);
177 		if (sp == NULL) {
178 			/* Check the list anyway. */
179 			rw_enter(&statep->rds_sessionlock, RW_READER);
180 			sp = rds_session_lkup(statep, cmp.cmp_localip,
181 			    rgid.gid_guid);
182 			rw_exit(&statep->rds_sessionlock);
183 			if (sp == NULL) {
184 				/*
185 				 * The only way this can fail is due to lack
186 				 * of kernel resources
187 				 */
188 				return (IBT_CM_REJECT);
189 			}
190 		}
191 	}
192 
193 	rw_enter(&sp->session_lock, RW_WRITER);
194 
195 	/* catch peer-to-peer case as soon as possible */
196 	if ((sp->session_state == RDS_SESSION_STATE_CREATED) ||
197 	    (sp->session_state == RDS_SESSION_STATE_INIT)) {
198 		/* Check possible peer-to-peer case here */
199 		if (sp->session_type != RDS_SESSION_PASSIVE) {
200 			RDS_DPRINTF2("rds_handle_cm_req",
201 			    "SP(%p) Peer-peer connection handling", sp);
202 			if (lgid.gid_guid > rgid.gid_guid) {
203 				/* this node is active so reject this request */
204 				rw_exit(&sp->session_lock);
205 				return (IBT_CM_REJECT);
206 			} else {
207 				/* this node is passive, change the session */
208 				sp->session_type = RDS_SESSION_PASSIVE;
209 				sp->session_lgid = lgid;
210 				sp->session_rgid = rgid;
211 			}
212 		}
213 	}
214 
215 	RDS_DPRINTF2(LABEL, "SP(%p) state: %d", sp, sp->session_state);
216 
217 	switch (sp->session_state) {
218 	case RDS_SESSION_STATE_CONNECTED:
219 		RDS_DPRINTF2(LABEL, "STALE Session Detected SP(%p)", sp);
220 		sp->session_state = RDS_SESSION_STATE_ERROR;
221 		RDS_DPRINTF3("rds_handle_cm_req", "SP(%p) State "
222 		    "RDS_SESSION_STATE_ERROR", sp);
223 
224 		/* FALLTHRU */
225 	case RDS_SESSION_STATE_ERROR:
226 	case RDS_SESSION_STATE_PASSIVE_CLOSING:
227 		sp->session_type = RDS_SESSION_PASSIVE;
228 		rw_exit(&sp->session_lock);
229 
230 		rds_session_close(sp, IBT_NOCALLBACKS, 1);
231 
232 		/* move the session to init state */
233 		rw_enter(&sp->session_lock, RW_WRITER);
234 		ret = rds_session_reinit(sp, lgid);
235 		sp->session_myip = cmp.cmp_remip;
236 		sp->session_lgid = lgid;
237 		sp->session_rgid = rgid;
238 		if (ret != 0) {
239 			rds_session_fini(sp);
240 			sp->session_state = RDS_SESSION_STATE_FAILED;
241 			RDS_DPRINTF3("rds_handle_cm_req", "SP(%p) State "
242 			    "RDS_SESSION_STATE_FAILED", sp);
243 			rw_exit(&sp->session_lock);
244 			return (IBT_CM_REJECT);
245 		} else {
246 			sp->session_state = RDS_SESSION_STATE_INIT;
247 			RDS_DPRINTF3("rds_handle_cm_req", "SP(%p) State "
248 			    "RDS_SESSION_STATE_INIT", sp);
249 		}
250 
251 		if (cmp.cmp_eptype == RDS_EP_TYPE_CTRL) {
252 			ep = &sp->session_ctrlep;
253 		} else {
254 			ep = &sp->session_dataep;
255 		}
256 		break;
257 	case RDS_SESSION_STATE_CREATED:
258 	case RDS_SESSION_STATE_FAILED:
259 	case RDS_SESSION_STATE_FINI:
260 		/*
261 		 * Initialize both channels, we accept this connection
262 		 * only if both channels are initialized
263 		 */
264 		sp->session_type = RDS_SESSION_PASSIVE;
265 		sp->session_lgid = lgid;
266 		sp->session_rgid = rgid;
267 		sp->session_state = RDS_SESSION_STATE_CREATED;
268 		RDS_DPRINTF3("rds_handle_cm_req", "SP(%p) State "
269 		    "RDS_SESSION_STATE_CREATED", sp);
270 		ret = rds_session_init(sp);
271 		if (ret != 0) {
272 			/* Seems like there are not enough resources */
273 			sp->session_state = RDS_SESSION_STATE_FAILED;
274 			RDS_DPRINTF3("rds_handle_cm_req", "SP(%p) State "
275 			    "RDS_SESSION_STATE_FAILED", sp);
276 			rw_exit(&sp->session_lock);
277 			return (IBT_CM_REJECT);
278 		}
279 		sp->session_state = RDS_SESSION_STATE_INIT;
280 		RDS_DPRINTF3("rds_handle_cm_req", "SP(%p) State "
281 		    "RDS_SESSION_STATE_INIT", sp);
282 
283 		/* FALLTHRU */
284 	case RDS_SESSION_STATE_INIT:
285 		/*
286 		 * When re-using an existing session, make sure the
287 		 * session is still through the same HCA. Otherwise, the
288 		 * memory registrations have to moved to the new HCA.
289 		 */
290 		if (cmp.cmp_eptype == RDS_EP_TYPE_DATA) {
291 			if (sp->session_lgid.gid_guid != lgid.gid_guid) {
292 				RDS_DPRINTF2("rds_handle_cm_req",
293 				    "Existing Session but different gid "
294 				    "existing: 0x%llx, new: 0x%llx, "
295 				    "sending an MRA",
296 				    sp->session_lgid.gid_guid, lgid.gid_guid);
297 				(void) ibt_cm_delay(IBT_CM_DELAY_REQ,
298 				    evp->cm_session_id, 10000000 /* 10 sec */,
299 				    NULL, 0);
300 				ret = rds_session_reinit(sp, lgid);
301 				if (ret != 0) {
302 					rds_session_fini(sp);
303 					sp->session_state =
304 					    RDS_SESSION_STATE_FAILED;
305 					sp->session_failover = 0;
306 					RDS_DPRINTF3("rds_failover_session",
307 					    "SP(%p) State "
308 					    "RDS_SESSION_STATE_FAILED", sp);
309 					rw_exit(&sp->session_lock);
310 					return (IBT_CM_REJECT);
311 				}
312 			}
313 			ep = &sp->session_dataep;
314 		} else {
315 			ep = &sp->session_ctrlep;
316 		}
317 
318 		break;
319 	default:
320 		RDS_DPRINTF2(LABEL, "ERROR: SP(%p) is in an unexpected "
321 		    "state: %d", sp, sp->session_state);
322 		rw_exit(&sp->session_lock);
323 		return (IBT_CM_REJECT);
324 	}
325 
326 	sp->session_failover = 0; /* reset any previous value */
327 	if (cmp.cmp_failover) {
328 		RDS_DPRINTF2("rds_handle_cm_req",
329 		    "SP(%p) Failover Session (BP %p)", sp, cmp.cmp_last_bufid);
330 		sp->session_failover = 1;
331 	}
332 
333 	mutex_enter(&ep->ep_lock);
334 	if (ep->ep_state == RDS_EP_STATE_UNCONNECTED) {
335 		ep->ep_state = RDS_EP_STATE_PASSIVE_PENDING;
336 		sp->session_type = RDS_SESSION_PASSIVE;
337 		rw_exit(&sp->session_lock);
338 	} else if (ep->ep_state == RDS_EP_STATE_ACTIVE_PENDING) {
339 		rw_exit(&sp->session_lock);
340 		/*
341 		 * Peer to peer connection. There is an active
342 		 * connection pending on this ep. The one with
343 		 * greater port guid becomes active and the
344 		 * other becomes passive.
345 		 */
346 		RDS_DPRINTF2("rds_handle_cm_req",
347 		    "EP(%p) Peer-peer connection handling", ep);
348 		if (lgid.gid_guid > rgid.gid_guid) {
349 			/* this node is active so reject this request */
350 			mutex_exit(&ep->ep_lock);
351 			RDS_DPRINTF2(LABEL, "SP(%p) EP(%p): "
352 			    "Rejecting passive in favor of active", sp, ep);
353 			return (IBT_CM_REJECT);
354 		} else {
355 			/*
356 			 * This session is not the active end, change it
357 			 * to passive end.
358 			 */
359 			ASSERT(sp->session_type == RDS_SESSION_ACTIVE);
360 			ep->ep_state = RDS_EP_STATE_PASSIVE_PENDING;
361 
362 			rw_enter(&sp->session_lock, RW_WRITER);
363 			sp->session_type = RDS_SESSION_PASSIVE;
364 			sp->session_lgid = lgid;
365 			sp->session_rgid = rgid;
366 			rw_exit(&sp->session_lock);
367 		}
368 	} else {
369 		rw_exit(&sp->session_lock);
370 	}
371 
372 	ep->ep_lbufid = cmp.cmp_last_bufid;
373 	ep->ep_ackwr.wr.rc.rcwr.rdma.rdma_raddr = (ib_vaddr_t)cmp.cmp_ack_addr;
374 	ep->ep_ackwr.wr.rc.rcwr.rdma.rdma_rkey = cmp.cmp_ack_rkey;
375 	cmp.cmp_last_bufid = ep->ep_rbufid;
376 	cmp.cmp_ack_addr = ep->ep_ack_addr;
377 	cmp.cmp_ack_rkey = ep->ep_ack_rkey;
378 	mutex_exit(&ep->ep_lock);
379 
380 	/* continue with accepting the connection request for this channel */
381 	chanhdl = rds_ep_alloc_rc_channel(ep, reqp->req_prim_hca_port);
382 	if (chanhdl == NULL) {
383 		mutex_enter(&ep->ep_lock);
384 		ep->ep_state = RDS_EP_STATE_UNCONNECTED;
385 		mutex_exit(&ep->ep_lock);
386 		return (IBT_CM_REJECT);
387 	}
388 
389 	/* pre-post recv buffers in the RQ */
390 	rds_post_recv_buf((void *)chanhdl);
391 
392 	rargsp->cm_ret_len = sizeof (rds_cm_private_data_t);
393 	bcopy((uint8_t *)&cmp, rcmp, sizeof (rds_cm_private_data_t));
394 	rargsp->cm_ret.rep.cm_channel = chanhdl;
395 	rargsp->cm_ret.rep.cm_rdma_ra_out = 4;
396 	rargsp->cm_ret.rep.cm_rdma_ra_in = 4;
397 	rargsp->cm_ret.rep.cm_rnr_retry_cnt = MinRnrRetry;
398 
399 	RDS_DPRINTF2("rds_handle_cm_req", "Return: SP(%p) EP(%p) Chan (%p)",
400 	    sp, ep, chanhdl);
401 
402 	return (IBT_CM_ACCEPT);
403 }
404 
405 /*
406  * Handle an incoming CM REP
407  * Pre-post recv buffers for the QP
408  */
409 /* ARGSUSED */
410 static ibt_cm_status_t
411 rds_handle_cm_rep(ibt_cm_event_t *evp, ibt_cm_return_args_t *rargsp,
412     void *rcmp, ibt_priv_data_len_t rcmp_len)
413 {
414 	rds_ep_t	*ep;
415 	rds_cm_private_data_t	cmp;
416 
417 	RDS_DPRINTF2("rds_handle_cm_rep", "Enter");
418 
419 	/* pre-post recv buffers in the RQ */
420 	rds_post_recv_buf((void *)evp->cm_channel);
421 
422 	ep = (rds_ep_t *)ibt_get_chan_private(evp->cm_channel);
423 	bcopy((uint8_t *)evp->cm_priv_data, &cmp,
424 	    sizeof (rds_cm_private_data_t));
425 	ep->ep_lbufid = cmp.cmp_last_bufid;
426 	ep->ep_ackwr.wr.rc.rcwr.rdma.rdma_raddr = (ib_vaddr_t)cmp.cmp_ack_addr;
427 	ep->ep_ackwr.wr.rc.rcwr.rdma.rdma_rkey = cmp.cmp_ack_rkey;
428 
429 	rargsp->cm_ret_len = 0;
430 
431 	RDS_DPRINTF2("rds_handle_cm_rep", "Return: lbufid: %p", ep->ep_lbufid);
432 
433 	return (IBT_CM_ACCEPT);
434 }
435 
436 /*
437  * Handle CONN EST
438  */
439 static ibt_cm_status_t
440 rds_handle_cm_conn_est(ibt_cm_event_t *evp)
441 {
442 	rds_session_t	*sp;
443 	rds_ep_t	*ep;
444 
445 	ep = (rds_ep_t *)ibt_get_chan_private(evp->cm_channel);
446 
447 	RDS_DPRINTF2("rds_handle_cm_conn_est", "EP(%p) State: %d", ep,
448 	    ep->ep_state);
449 
450 	mutex_enter(&ep->ep_lock);
451 	ASSERT((ep->ep_state == RDS_EP_STATE_ACTIVE_PENDING) ||
452 	    (ep->ep_state == RDS_EP_STATE_PASSIVE_PENDING));
453 	ep->ep_state = RDS_EP_STATE_CONNECTED;
454 	ep->ep_chanhdl = evp->cm_channel;
455 	sp = ep->ep_sp;
456 	mutex_exit(&ep->ep_lock);
457 
458 	(void) rds_session_active(sp);
459 
460 	RDS_DPRINTF2("rds_handle_cm_conn_est", "Return");
461 	return (IBT_CM_ACCEPT);
462 }
463 
464 /*
465  * Handle CONN CLOSED
466  */
467 static ibt_cm_status_t
468 rds_handle_cm_conn_closed(ibt_cm_event_t *evp)
469 {
470 	rds_ep_t	*ep;
471 	rds_session_t	*sp;
472 
473 	/* Catch DREQs but ignore DREPs */
474 	if (evp->cm_event.closed != IBT_CM_CLOSED_DREQ_RCVD) {
475 		RDS_DPRINTF2("rds_handle_cm_conn_closed",
476 		    "Ignoring Event: %d received", evp->cm_event.closed);
477 		return (IBT_CM_ACCEPT);
478 	}
479 
480 	ep = (rds_ep_t *)ibt_get_chan_private(evp->cm_channel);
481 	sp = ep->ep_sp;
482 	RDS_DPRINTF2("rds_handle_cm_conn_closed", "EP(%p) Enter", ep);
483 
484 	mutex_enter(&ep->ep_lock);
485 	if (ep->ep_state != RDS_EP_STATE_CONNECTED) {
486 		/* Ignore this DREQ */
487 		RDS_DPRINTF2("rds_handle_cm_conn_closed",
488 		    "EP(%p) not connected, state: %d", ep, ep->ep_state);
489 		mutex_exit(&ep->ep_lock);
490 		return (IBT_CM_ACCEPT);
491 	}
492 	ep->ep_state = RDS_EP_STATE_CLOSING;
493 	mutex_exit(&ep->ep_lock);
494 
495 	rw_enter(&sp->session_lock, RW_WRITER);
496 	RDS_DPRINTF2("rds_handle_cm_conn_closed", "SP(%p) - state: %d", sp,
497 	    sp->session_state);
498 
499 	switch (sp->session_state) {
500 	case RDS_SESSION_STATE_CONNECTED:
501 		sp->session_state = RDS_SESSION_STATE_PASSIVE_CLOSING;
502 		RDS_DPRINTF3("rds_handle_cm_conn_closed", "SP(%p) State "
503 		    "RDS_SESSION_STATE_PASSIVE_CLOSING", sp);
504 		break;
505 
506 	case RDS_SESSION_STATE_PASSIVE_CLOSING:
507 		sp->session_state = RDS_SESSION_STATE_CLOSED;
508 		RDS_DPRINTF3("rds_handle_cm_conn_closed", "SP(%p) State "
509 		    "RDS_SESSION_STATE_CLOSED", sp);
510 		rds_passive_session_fini(sp);
511 		sp->session_state = RDS_SESSION_STATE_FINI;
512 		RDS_DPRINTF3("rds_handle_cm_conn_closed",
513 		    "SP(%p) State RDS_SESSION_STATE_FINI", sp);
514 		break;
515 
516 	case RDS_SESSION_STATE_ACTIVE_CLOSING:
517 	case RDS_SESSION_STATE_ERROR:
518 	case RDS_SESSION_STATE_CLOSED:
519 		break;
520 
521 	case RDS_SESSION_STATE_INIT:
522 		sp->session_state = RDS_SESSION_STATE_ERROR;
523 		RDS_DPRINTF3("rds_handle_cm_conn_closed", "SP(%p) State "
524 		    "RDS_SESSION_STATE_ERROR", sp);
525 		rds_passive_session_fini(sp);
526 		sp->session_state = RDS_SESSION_STATE_FAILED;
527 		RDS_DPRINTF3("rds_handle_cm_conn_closed",
528 		    "SP(%p) State RDS_SESSION_STATE_FAILED", sp);
529 		break;
530 
531 	default:
532 		RDS_DPRINTF2("rds_handle_cm_conn_closed",
533 		    "SP(%p) - Unexpected state: %d", sp, sp->session_state);
534 		rds_passive_session_fini(sp);
535 		sp->session_state = RDS_SESSION_STATE_FAILED;
536 		RDS_DPRINTF3("rds_handle_cm_conn_closed", "SP(%p) State "
537 		    "RDS_SESSION_STATE_FAILED", sp);
538 	}
539 	rw_exit(&sp->session_lock);
540 
541 	mutex_enter(&ep->ep_lock);
542 	ep->ep_state = RDS_EP_STATE_CLOSED;
543 	mutex_exit(&ep->ep_lock);
544 
545 	RDS_DPRINTF2("rds_handle_cm_conn_closed", "SP(%p) Return", sp);
546 	return (IBT_CM_ACCEPT);
547 }
548 
549 /*
550  * Handle EVENT FAILURE
551  */
552 static ibt_cm_status_t
553 rds_handle_cm_event_failure(ibt_cm_event_t *evp)
554 {
555 	rds_ep_t	*ep;
556 	rds_session_t	*sp;
557 	int		ret;
558 
559 	RDS_DPRINTF2("rds_handle_cm_event_failure", "Enter: Chan hdl: 0x%p "
560 	    "Code: %d msg: %d reason: %d", evp->cm_channel,
561 	    evp->cm_event.failed.cf_code, evp->cm_event.failed.cf_msg,
562 	    evp->cm_event.failed.cf_reason);
563 
564 	if (evp->cm_channel == NULL) {
565 		return (IBT_CM_ACCEPT);
566 	}
567 
568 	ep = (rds_ep_t *)ibt_get_chan_private(evp->cm_channel);
569 	sp = ep->ep_sp;
570 
571 	mutex_enter(&ep->ep_lock);
572 	ep->ep_state = RDS_EP_STATE_ERROR;
573 	mutex_exit(&ep->ep_lock);
574 
575 	rw_enter(&sp->session_lock, RW_WRITER);
576 	if (sp->session_type == RDS_SESSION_PASSIVE) {
577 		RDS_DPRINTF2("rds_handle_cm_event_failure",
578 		    "SP(%p) - state: %d", sp, sp->session_state);
579 		if ((sp->session_state == RDS_SESSION_STATE_INIT) ||
580 		    (sp->session_state == RDS_SESSION_STATE_CONNECTED)) {
581 			sp->session_state = RDS_SESSION_STATE_ERROR;
582 			RDS_DPRINTF3("rds_handle_cm_event_failure",
583 			    "SP(%p) State RDS_SESSION_STATE_ERROR", sp);
584 
585 			/*
586 			 * Store the cm_channel for freeing later
587 			 * Active side frees it on ibt_open_rc_channel
588 			 * failure
589 			 */
590 			if (ep->ep_chanhdl == NULL) {
591 				ep->ep_chanhdl = evp->cm_channel;
592 			}
593 			rw_exit(&sp->session_lock);
594 
595 			/*
596 			 * rds_passive_session_fini should not be called
597 			 * directly in the CM handler. It will cause a deadlock.
598 			 */
599 			ret = ddi_taskq_dispatch(rds_taskq,
600 			    rds_cleanup_passive_session, (void *)sp,
601 			    DDI_NOSLEEP);
602 			if (ret != DDI_SUCCESS) {
603 				RDS_DPRINTF1("rds_handle_cm_event_failure",
604 				    "SP(%p) TaskQ dispatch FAILED:%d", sp, ret);
605 			}
606 			return (IBT_CM_ACCEPT);
607 		}
608 	}
609 	rw_exit(&sp->session_lock);
610 
611 	RDS_DPRINTF2("rds_handle_cm_event_failure", "SP(%p) Return", sp);
612 	return (IBT_CM_ACCEPT);
613 }
614 
615 /*
616  * CM Handler
617  *
618  * Called by IBCM
619  * The cm_private type differs for active and passive events.
620  */
621 ibt_cm_status_t
622 rds_cm_handler(void *cm_private, ibt_cm_event_t *eventp,
623     ibt_cm_return_args_t *ret_args, void *ret_priv_data,
624     ibt_priv_data_len_t ret_len_max)
625 {
626 	ibt_cm_status_t		ret = IBT_CM_ACCEPT;
627 
628 	RDS_DPRINTF2("rds_cm_handler", "Enter: event: %d", eventp->cm_type);
629 
630 	switch (eventp->cm_type) {
631 	case IBT_CM_EVENT_REQ_RCV:
632 		ret = rds_handle_cm_req((rds_state_t *)cm_private, eventp,
633 		    ret_args, ret_priv_data, ret_len_max);
634 		break;
635 	case IBT_CM_EVENT_REP_RCV:
636 		ret = rds_handle_cm_rep(eventp, ret_args, ret_priv_data,
637 		    ret_len_max);
638 		break;
639 	case IBT_CM_EVENT_MRA_RCV:
640 		/* Not supported */
641 		break;
642 	case IBT_CM_EVENT_CONN_EST:
643 		ret = rds_handle_cm_conn_est(eventp);
644 		break;
645 	case IBT_CM_EVENT_CONN_CLOSED:
646 		ret = rds_handle_cm_conn_closed(eventp);
647 		break;
648 	case IBT_CM_EVENT_FAILURE:
649 		ret = rds_handle_cm_event_failure(eventp);
650 		break;
651 	case IBT_CM_EVENT_LAP_RCV:
652 		/* Not supported */
653 		RDS_DPRINTF2(LABEL, "LAP message received");
654 		break;
655 	case IBT_CM_EVENT_APR_RCV:
656 		/* Not supported */
657 		RDS_DPRINTF2(LABEL, "APR message received");
658 		break;
659 	default:
660 		break;
661 	}
662 
663 	RDS_DPRINTF2("rds_cm_handler", "Return");
664 
665 	return (ret);
666 }
667 
668 /*
669  * Register the wellknown service with service id: RDS_SERVICE_ID
670  * Incoming connection requests should arrive on this service id.
671  */
672 ibt_srv_hdl_t
673 rds_register_service(ibt_clnt_hdl_t rds_ibhdl)
674 {
675 	ibt_srv_hdl_t	srvhdl;
676 	ibt_srv_desc_t	srvdesc;
677 	int		ret;
678 
679 	RDS_DPRINTF2("rds_register_service", "Enter: 0x%p", rds_ibhdl);
680 
681 	bzero(&srvdesc, sizeof (ibt_srv_desc_t));
682 	srvdesc.sd_handler = rds_cm_handler;
683 	srvdesc.sd_flags = IBT_SRV_NO_FLAGS;
684 
685 	ret = ibt_register_service(rds_ibhdl, &srvdesc, RDS_SERVICE_ID,
686 	    1, &srvhdl, NULL);
687 	if (ret != IBT_SUCCESS) {
688 		RDS_DPRINTF2(LABEL, "RDS Service Registration Failed: %d",
689 		    ret);
690 		return (NULL);
691 	}
692 
693 	RDS_DPRINTF2("rds_register_service", "Return: 0x%p", srvhdl);
694 	return (srvhdl);
695 }
696 
697 /* Bind the RDS service on all ports */
698 int
699 rds_bind_service(rds_state_t *statep)
700 {
701 	rds_hca_t	*hcap;
702 	ib_gid_t	gid;
703 	uint_t		jx, nbinds = 0, nports = 0;
704 	int		ret;
705 
706 	RDS_DPRINTF2("rds_bind_service", "Enter: 0x%p", statep);
707 
708 	hcap = statep->rds_hcalistp;
709 	while (hcap != NULL) {
710 		for (jx = 0; jx < hcap->hca_nports; jx++) {
711 			nports++;
712 			if (hcap->hca_pinfop[jx].p_linkstate !=
713 			    IBT_PORT_ACTIVE) {
714 				/*
715 				 * service bind will be called in the async
716 				 * handler when the port comes up
717 				 */
718 				continue;
719 			}
720 
721 			gid = hcap->hca_pinfop[jx].p_sgid_tbl[0];
722 			RDS_DPRINTF5(LABEL, "HCA: 0x%llx Port: %d "
723 			    "gid: %llx:%llx", hcap->hca_guid,
724 			    hcap->hca_pinfop[jx].p_port_num, gid.gid_prefix,
725 			    gid.gid_guid);
726 
727 			/* pass statep as cm_private */
728 			ret = ibt_bind_service(statep->rds_srvhdl, gid,
729 			    NULL, statep, NULL);
730 			if (ret != IBT_SUCCESS) {
731 				RDS_DPRINTF2(LABEL, "Bind service for "
732 				    "HCA: 0x%llx Port: %d gid %llx:%llx "
733 				    "failed: %d", hcap->hca_guid,
734 				    hcap->hca_pinfop[jx].p_port_num,
735 				    gid.gid_prefix, gid.gid_guid, ret);
736 				continue;
737 			}
738 
739 			nbinds++;
740 		}
741 		hcap = hcap->hca_nextp;
742 	}
743 
744 	RDS_DPRINTF2(LABEL, "RDS Service available on %d/%d ports",
745 	    nbinds, nports);
746 
747 #if 0
748 	if (nbinds == 0) {
749 		return (-1);
750 	}
751 #endif
752 
753 	RDS_DPRINTF2("rds_bind_service", "Return");
754 
755 	return (0);
756 }
757 
758 /* Open an RC connection */
759 int
760 rds_open_rc_channel(rds_ep_t *ep, ibt_path_info_t *pinfo,
761     ibt_execution_mode_t mode, ibt_channel_hdl_t *chanhdl)
762 {
763 	rds_session_t		*sp;
764 	ibt_chan_open_args_t	ocargs;
765 	ibt_rc_returns_t	ocrets;
766 	rds_cm_private_data_t	cmp;
767 	uint8_t			hca_port;
768 	ibt_channel_hdl_t	hdl;
769 	int			ret = 0;
770 
771 	RDS_DPRINTF2("rds_open_rc_channel", "Enter: EP(%p) mode: %d", ep, mode);
772 
773 	sp = ep->ep_sp;
774 
775 	hca_port = pinfo->pi_prim_cep_path.cep_hca_port_num;
776 
777 	hdl = rds_ep_alloc_rc_channel(ep, hca_port);
778 	if (hdl == NULL) {
779 		return (-1);
780 	}
781 
782 	cmp.cmp_version = RDS_VERSION;
783 	cmp.cmp_arch = RDS_THIS_ARCH;
784 	cmp.cmp_remip = sp->session_remip;
785 	cmp.cmp_localip = sp->session_myip;
786 	cmp.cmp_eptype = ep->ep_type;
787 	cmp.cmp_failover = sp->session_failover;
788 	cmp.cmp_last_bufid = ep->ep_rbufid;
789 	cmp.cmp_user_buffer_size = UserBufferSize;
790 	cmp.cmp_ack_addr = ep->ep_ack_addr;
791 	cmp.cmp_ack_rkey = ep->ep_ack_rkey;
792 
793 	bzero(&ocargs, sizeof (ibt_chan_open_args_t));
794 	bzero(&ocrets, sizeof (ibt_rc_returns_t));
795 	ocargs.oc_path = pinfo;
796 	ocargs.oc_cm_handler = rds_cm_handler;
797 	ocargs.oc_cm_clnt_private = NULL;
798 	ocargs.oc_rdma_ra_out = 4;
799 	ocargs.oc_rdma_ra_in = 4;
800 	ocargs.oc_priv_data_len = sizeof (rds_cm_private_data_t);
801 	ocargs.oc_priv_data = &cmp;
802 	ocargs.oc_path_retry_cnt = IBPathRetryCount;
803 	ocargs.oc_path_rnr_retry_cnt = MinRnrRetry;
804 	ret = ibt_open_rc_channel(hdl, IBT_OCHAN_NO_FLAGS,
805 	    mode, &ocargs, &ocrets);
806 	if (ret != IBT_SUCCESS) {
807 		RDS_DPRINTF2(LABEL, "SP(%p) EP(%p) ibt_open_rc_channel "
808 		    "failed: %d", sp, ep, ret);
809 		(void) ibt_flush_channel(hdl);
810 		(void) ibt_free_channel(hdl);
811 		/* cleanup stuff allocated in rds_ep_alloc_rc_channel */
812 		(void) ibt_free_cq(ep->ep_recvcq);
813 		ep->ep_recvcq = NULL;
814 		(void) ibt_free_cq(ep->ep_sendcq);
815 		ep->ep_sendcq = NULL;
816 		return (-1);
817 	}
818 
819 	*chanhdl = hdl;
820 
821 	RDS_DPRINTF2("rds_open_rc_channel", "Return: EP(%p) Chan: %p", ep,
822 	    *chanhdl);
823 
824 	return (0);
825 }
826 
827 int
828 rds_close_rc_channel(ibt_channel_hdl_t chanhdl, ibt_execution_mode_t mode)
829 {
830 	int	ret;
831 
832 	RDS_DPRINTF2("rds_close_rc_channel", "Enter: Chan(%p) Mode(%d)",
833 	    chanhdl, mode);
834 
835 	ret = ibt_close_rc_channel(chanhdl, mode, NULL, 0, NULL, NULL, 0);
836 
837 	RDS_DPRINTF2("rds_close_rc_channel", "Return Chan(%p)", chanhdl);
838 
839 	return (ret);
840 }
841