1b86efd96Sagiri /*
2b86efd96Sagiri  * CDDL HEADER START
3b86efd96Sagiri  *
4b86efd96Sagiri  * The contents of this file are subject to the terms of the
5b86efd96Sagiri  * Common Development and Distribution License (the "License").
6b86efd96Sagiri  * You may not use this file except in compliance with the License.
7b86efd96Sagiri  *
8b86efd96Sagiri  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9b86efd96Sagiri  * or http://www.opensolaris.org/os/licensing.
10b86efd96Sagiri  * See the License for the specific language governing permissions
11b86efd96Sagiri  * and limitations under the License.
12b86efd96Sagiri  *
13b86efd96Sagiri  * When distributing Covered Code, include this CDDL HEADER in each
14b86efd96Sagiri  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15b86efd96Sagiri  * If applicable, add the following below this CDDL HEADER, with the
16b86efd96Sagiri  * fields enclosed by brackets "[]" replaced with your own identifying
17b86efd96Sagiri  * information: Portions Copyright [yyyy] [name of copyright owner]
18b86efd96Sagiri  *
19b86efd96Sagiri  * CDDL HEADER END
20b86efd96Sagiri  */
21b86efd96Sagiri /*
22*0c19630bSagiri  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23b86efd96Sagiri  * Use is subject to license terms.
24b86efd96Sagiri  */
25b86efd96Sagiri /*
26b86efd96Sagiri  * Copyright (c) 2005 SilverStorm Technologies, Inc. All rights reserved.
27b86efd96Sagiri  *
28b86efd96Sagiri  * This software is available to you under a choice of one of two
29b86efd96Sagiri  * licenses.  You may choose to be licensed under the terms of the GNU
30b86efd96Sagiri  * General Public License (GPL) Version 2, available from the file
31b86efd96Sagiri  * COPYING in the main directory of this source tree, or the
32b86efd96Sagiri  * OpenIB.org BSD license below:
33b86efd96Sagiri  *
34b86efd96Sagiri  *     Redistribution and use in source and binary forms, with or
35b86efd96Sagiri  *     without modification, are permitted provided that the following
36b86efd96Sagiri  *     conditions are met:
37b86efd96Sagiri  *
38b86efd96Sagiri  *	- Redistributions of source code must retain the above
39b86efd96Sagiri  *	  copyright notice, this list of conditions and the following
40b86efd96Sagiri  *	  disclaimer.
41b86efd96Sagiri  *
42b86efd96Sagiri  *	- Redistributions in binary form must reproduce the above
43b86efd96Sagiri  *	  copyright notice, this list of conditions and the following
44b86efd96Sagiri  *	  disclaimer in the documentation and/or other materials
45b86efd96Sagiri  *	  provided with the distribution.
46b86efd96Sagiri  *
47b86efd96Sagiri  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
48b86efd96Sagiri  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
49b86efd96Sagiri  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
50b86efd96Sagiri  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
51b86efd96Sagiri  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
52b86efd96Sagiri  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
53b86efd96Sagiri  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
54b86efd96Sagiri  * SOFTWARE.
55b86efd96Sagiri  *
56b86efd96Sagiri  */
57b86efd96Sagiri /*
58b86efd96Sagiri  * Sun elects to include this software in Sun product
59b86efd96Sagiri  * under the OpenIB BSD license.
60b86efd96Sagiri  *
61b86efd96Sagiri  *
62b86efd96Sagiri  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
63b86efd96Sagiri  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
64b86efd96Sagiri  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
65b86efd96Sagiri  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
66b86efd96Sagiri  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
67b86efd96Sagiri  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
68b86efd96Sagiri  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
69b86efd96Sagiri  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
70b86efd96Sagiri  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
71b86efd96Sagiri  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
72b86efd96Sagiri  * POSSIBILITY OF SUCH DAMAGE.
73b86efd96Sagiri  */
74b86efd96Sagiri 
75b86efd96Sagiri #include <sys/ib/clients/rds/rdsib_cm.h>
76b86efd96Sagiri #include <sys/ib/clients/rds/rdsib_ib.h>
77b86efd96Sagiri #include <sys/ib/clients/rds/rdsib_buf.h>
78b86efd96Sagiri #include <sys/ib/clients/rds/rdsib_ep.h>
79b86efd96Sagiri 
80b86efd96Sagiri /*
81b86efd96Sagiri  * This file contains CM related work:
82b86efd96Sagiri  *
83b86efd96Sagiri  * Service registration/deregistration
84b86efd96Sagiri  * Path lookup
85b86efd96Sagiri  * CM connection callbacks
86b86efd96Sagiri  * CM active and passive connection establishment
87b86efd96Sagiri  * Connection failover
88b86efd96Sagiri  */
89b86efd96Sagiri 
90948cd88eSagiri #define	SRCIP	src_addr.un.ip4addr
91948cd88eSagiri #define	DSTIP	dst_addr.un.ip4addr
92948cd88eSagiri 
93b86efd96Sagiri /*
94b86efd96Sagiri  * Handle an incoming CM REQ
95b86efd96Sagiri  */
96b86efd96Sagiri /* ARGSUSED */
97b86efd96Sagiri static ibt_cm_status_t
rds_handle_cm_req(rds_state_t * statep,ibt_cm_event_t * evp,ibt_cm_return_args_t * rargsp,void * rcmp,ibt_priv_data_len_t rcmp_len)98b86efd96Sagiri rds_handle_cm_req(rds_state_t *statep, ibt_cm_event_t *evp,
99b86efd96Sagiri     ibt_cm_return_args_t *rargsp, void *rcmp, ibt_priv_data_len_t rcmp_len)
100b86efd96Sagiri {
101b86efd96Sagiri 	ibt_cm_req_rcv_t	*reqp;
102b86efd96Sagiri 	ib_gid_t		lgid, rgid;
103b86efd96Sagiri 	rds_cm_private_data_t	cmp;
104b86efd96Sagiri 	rds_session_t		*sp;
105b86efd96Sagiri 	rds_ep_t		*ep;
106b86efd96Sagiri 	ibt_channel_hdl_t	chanhdl;
107948cd88eSagiri 	ibt_ip_cm_info_t	ipcm_info;
108*0c19630bSagiri 	uint8_t			save_state, save_type;
109b86efd96Sagiri 	int			ret;
110b86efd96Sagiri 
111b86efd96Sagiri 	RDS_DPRINTF2("rds_handle_cm_req", "Enter");
112b86efd96Sagiri 
113b86efd96Sagiri 	reqp = &evp->cm_event.req;
114b86efd96Sagiri 	rgid = reqp->req_prim_addr.av_dgid; /* requester gid */
115b86efd96Sagiri 	lgid = reqp->req_prim_addr.av_sgid; /* receiver gid */
116b86efd96Sagiri 
117b86efd96Sagiri 	RDS_DPRINTF2(LABEL, "REQ Received: From: %llx:%llx To: %llx:%llx",
118b86efd96Sagiri 	    rgid.gid_prefix, rgid.gid_guid, lgid.gid_prefix, lgid.gid_guid);
119b86efd96Sagiri 
120b86efd96Sagiri 	/*
121b86efd96Sagiri 	 * CM private data brings IP information
122b86efd96Sagiri 	 * Private data received is a stream of bytes and may not be properly
123b86efd96Sagiri 	 * aligned. So, bcopy the data onto the stack before accessing it.
124b86efd96Sagiri 	 */
125b86efd96Sagiri 	bcopy((uint8_t *)evp->cm_priv_data, &cmp,
126b86efd96Sagiri 	    sizeof (rds_cm_private_data_t));
127b86efd96Sagiri 
128948cd88eSagiri 	/* extract the CM IP info */
129948cd88eSagiri 	ret = ibt_get_ip_data(evp->cm_priv_data_len, evp->cm_priv_data,
130948cd88eSagiri 	    &ipcm_info);
131948cd88eSagiri 	if (ret != IBT_SUCCESS) {
132948cd88eSagiri 		RDS_DPRINTF2("rds_handle_cm_req", "ibt_get_ip_data failed: %d",
133948cd88eSagiri 		    ret);
134948cd88eSagiri 		return (IBT_CM_REJECT);
135948cd88eSagiri 	}
136948cd88eSagiri 
137948cd88eSagiri 	RDS_DPRINTF2("rds_handle_cm_req",
138948cd88eSagiri 	    "REQ Received: From IP: 0x%x To IP: 0x%x type: %d",
139d22e11ebSBill Taylor 	    ipcm_info.SRCIP, ipcm_info.DSTIP, cmp.cmp_eptype);
140b86efd96Sagiri 
141b86efd96Sagiri 	if (cmp.cmp_version != RDS_VERSION) {
14274242422Sagiri 		RDS_DPRINTF2(LABEL, "Version Mismatch: Local version: %d "
143b86efd96Sagiri 		    "Remote version: %d", RDS_VERSION, cmp.cmp_version);
144b86efd96Sagiri 		return (IBT_CM_REJECT);
145b86efd96Sagiri 	}
146b86efd96Sagiri 
147948cd88eSagiri 	/* RDS supports V4 addresses only */
148948cd88eSagiri 	if ((ipcm_info.src_addr.family != AF_INET) ||
149948cd88eSagiri 	    (ipcm_info.dst_addr.family != AF_INET)) {
150948cd88eSagiri 		RDS_DPRINTF2(LABEL, "Unsupported Address Family: "
151948cd88eSagiri 		    "src: %d dst: %d", ipcm_info.src_addr.family,
152948cd88eSagiri 		    ipcm_info.dst_addr.family);
153948cd88eSagiri 		return (IBT_CM_REJECT);
154948cd88eSagiri 	}
155948cd88eSagiri 
156b86efd96Sagiri 	if (cmp.cmp_arch != RDS_THIS_ARCH) {
157b86efd96Sagiri 		RDS_DPRINTF2(LABEL, "ARCH does not match (%d != %d)",
158b86efd96Sagiri 		    cmp.cmp_arch, RDS_THIS_ARCH);
159b86efd96Sagiri 		return (IBT_CM_REJECT);
160b86efd96Sagiri 	}
161b86efd96Sagiri 
162b86efd96Sagiri 	if ((cmp.cmp_eptype != RDS_EP_TYPE_CTRL) &&
163b86efd96Sagiri 	    (cmp.cmp_eptype != RDS_EP_TYPE_DATA)) {
164b86efd96Sagiri 		RDS_DPRINTF2(LABEL, "Unknown Channel type: %d", cmp.cmp_eptype);
165b86efd96Sagiri 		return (IBT_CM_REJECT);
166b86efd96Sagiri 	}
167b86efd96Sagiri 
168b86efd96Sagiri 	/* user_buffer_size should be same on all nodes */
169b86efd96Sagiri 	if (cmp.cmp_user_buffer_size != UserBufferSize) {
170b86efd96Sagiri 		RDS_DPRINTF2(LABEL,
171b86efd96Sagiri 		    "UserBufferSize Mismatch, this node: %d remote node: %d",
172b86efd96Sagiri 		    UserBufferSize, cmp.cmp_user_buffer_size);
173b86efd96Sagiri 		return (IBT_CM_REJECT);
174b86efd96Sagiri 	}
175b86efd96Sagiri 
176c1f8b08eSagiri 	/*
177c1f8b08eSagiri 	 * RDS needs more time to process a failover REQ so send an MRA.
178c1f8b08eSagiri 	 * Otherwise, the remote may retry the REQ and fail the connection.
179c1f8b08eSagiri 	 */
180c1f8b08eSagiri 	if ((cmp.cmp_failover) && (cmp.cmp_eptype == RDS_EP_TYPE_DATA)) {
181c1f8b08eSagiri 		RDS_DPRINTF2("rds_handle_cm_req", "Session Failover, send MRA");
182c1f8b08eSagiri 		(void) ibt_cm_delay(IBT_CM_DELAY_REQ, evp->cm_session_id,
183c1f8b08eSagiri 		    10000000 /* 10 sec */, NULL, 0);
184c1f8b08eSagiri 	}
185c1f8b08eSagiri 
186b86efd96Sagiri 	/* Is there a session to the destination node? */
187b86efd96Sagiri 	rw_enter(&statep->rds_sessionlock, RW_READER);
188d22e11ebSBill Taylor 	sp = rds_session_lkup(statep, ipcm_info.SRCIP, rgid.gid_guid);
189b86efd96Sagiri 	rw_exit(&statep->rds_sessionlock);
190b86efd96Sagiri 
191b86efd96Sagiri 	if (sp == NULL) {
192b86efd96Sagiri 		/*
193b86efd96Sagiri 		 * currently there is no session to the destination
194b86efd96Sagiri 		 * remote ip in the private data is the local ip and vice
195b86efd96Sagiri 		 * versa
196b86efd96Sagiri 		 */
197d22e11ebSBill Taylor 		sp = rds_session_create(statep, ipcm_info.DSTIP,
198d22e11ebSBill Taylor 		    ipcm_info.SRCIP, reqp, RDS_SESSION_PASSIVE);
199b86efd96Sagiri 		if (sp == NULL) {
200b86efd96Sagiri 			/* Check the list anyway. */
201b86efd96Sagiri 			rw_enter(&statep->rds_sessionlock, RW_READER);
202d22e11ebSBill Taylor 			sp = rds_session_lkup(statep, ipcm_info.SRCIP,
203b86efd96Sagiri 			    rgid.gid_guid);
204b86efd96Sagiri 			rw_exit(&statep->rds_sessionlock);
205b86efd96Sagiri 			if (sp == NULL) {
206b86efd96Sagiri 				/*
207b86efd96Sagiri 				 * The only way this can fail is due to lack
208b86efd96Sagiri 				 * of kernel resources
209b86efd96Sagiri 				 */
210b86efd96Sagiri 				return (IBT_CM_REJECT);
211b86efd96Sagiri 			}
212b86efd96Sagiri 		}
213b86efd96Sagiri 	}
214b86efd96Sagiri 
215b86efd96Sagiri 	rw_enter(&sp->session_lock, RW_WRITER);
216b86efd96Sagiri 
217b86efd96Sagiri 	/* catch peer-to-peer case as soon as possible */
2188257fab9Sagiri 	if ((sp->session_state == RDS_SESSION_STATE_CREATED) ||
2198257fab9Sagiri 	    (sp->session_state == RDS_SESSION_STATE_INIT)) {
220b86efd96Sagiri 		/* Check possible peer-to-peer case here */
221b86efd96Sagiri 		if (sp->session_type != RDS_SESSION_PASSIVE) {
2228257fab9Sagiri 			RDS_DPRINTF2("rds_handle_cm_req",
2238257fab9Sagiri 			    "SP(%p) Peer-peer connection handling", sp);
224b86efd96Sagiri 			if (lgid.gid_guid > rgid.gid_guid) {
225b86efd96Sagiri 				/* this node is active so reject this request */
226b86efd96Sagiri 				rw_exit(&sp->session_lock);
227b86efd96Sagiri 				return (IBT_CM_REJECT);
228b86efd96Sagiri 			} else {
229b86efd96Sagiri 				/* this node is passive, change the session */
230b86efd96Sagiri 				sp->session_type = RDS_SESSION_PASSIVE;
231b86efd96Sagiri 				sp->session_lgid = lgid;
232b86efd96Sagiri 				sp->session_rgid = rgid;
233b86efd96Sagiri 			}
234b86efd96Sagiri 		}
235b86efd96Sagiri 	}
236b86efd96Sagiri 
237b86efd96Sagiri 	RDS_DPRINTF2(LABEL, "SP(%p) state: %d", sp, sp->session_state);
238*0c19630bSagiri 	save_state = sp->session_state;
239*0c19630bSagiri 	save_type = sp->session_type;
240b86efd96Sagiri 
241b86efd96Sagiri 	switch (sp->session_state) {
242b86efd96Sagiri 	case RDS_SESSION_STATE_CONNECTED:
243b86efd96Sagiri 		RDS_DPRINTF2(LABEL, "STALE Session Detected SP(%p)", sp);
244b86efd96Sagiri 		sp->session_state = RDS_SESSION_STATE_ERROR;
245b86efd96Sagiri 		RDS_DPRINTF3("rds_handle_cm_req", "SP(%p) State "
246b86efd96Sagiri 		    "RDS_SESSION_STATE_ERROR", sp);
247b86efd96Sagiri 
248b86efd96Sagiri 		/* FALLTHRU */
249b86efd96Sagiri 	case RDS_SESSION_STATE_ERROR:
250b86efd96Sagiri 	case RDS_SESSION_STATE_PASSIVE_CLOSING:
251*0c19630bSagiri 		/*
252*0c19630bSagiri 		 * Some other thread must be processing this session,
253*0c19630bSagiri 		 * this thread must wait until the other thread finishes.
254*0c19630bSagiri 		 */
255b86efd96Sagiri 		sp->session_type = RDS_SESSION_PASSIVE;
256b86efd96Sagiri 		rw_exit(&sp->session_lock);
257b86efd96Sagiri 
258d99cb22fSagiri 		/* Handling this will take some time, so send an MRA */
259d99cb22fSagiri 		(void) ibt_cm_delay(IBT_CM_DELAY_REQ, evp->cm_session_id,
260d99cb22fSagiri 		    10000000 /* 10 sec */, NULL, 0);
261d99cb22fSagiri 
262d99cb22fSagiri 		/*
263d99cb22fSagiri 		 * Any pending completions don't get flushed until the channel
264d99cb22fSagiri 		 * is closed. So, passing 0 here will not wait for pending
265d99cb22fSagiri 		 * completions in rds_session_close before closing the channel
266d99cb22fSagiri 		 */
267d99cb22fSagiri 		rds_session_close(sp, IBT_NOCALLBACKS, 0);
268b86efd96Sagiri 
269b86efd96Sagiri 		rw_enter(&sp->session_lock, RW_WRITER);
270*0c19630bSagiri 
271*0c19630bSagiri 		/*
272*0c19630bSagiri 		 * If the session was in ERROR, then either a failover thread
273*0c19630bSagiri 		 * or event_failure thread would be processing this session.
274*0c19630bSagiri 		 * This thread should wait for event_failure thread to
275*0c19630bSagiri 		 * complete. This need not wait for failover thread.
276*0c19630bSagiri 		 */
277*0c19630bSagiri 		if ((save_state != RDS_SESSION_STATE_CONNECTED) &&
278*0c19630bSagiri 		    (save_type == RDS_SESSION_PASSIVE)) {
279*0c19630bSagiri 				/*
280*0c19630bSagiri 				 * The other thread is event_failure thread,
281*0c19630bSagiri 				 * wait until it finishes.
282*0c19630bSagiri 				 */
283*0c19630bSagiri 				while (!((sp->session_state ==
284*0c19630bSagiri 				    RDS_SESSION_STATE_FAILED) ||
285*0c19630bSagiri 				    (sp->session_state ==
286*0c19630bSagiri 				    RDS_SESSION_STATE_FINI))) {
287*0c19630bSagiri 					rw_exit(&sp->session_lock);
288*0c19630bSagiri 					delay(drv_usectohz(1000000));
289*0c19630bSagiri 					rw_enter(&sp->session_lock, RW_WRITER);
290*0c19630bSagiri 				}
291c1f8b08eSagiri 		}
292b86efd96Sagiri 
293*0c19630bSagiri 		/* move the session to init state */
294*0c19630bSagiri 		if ((sp->session_state == RDS_SESSION_STATE_ERROR) ||
295*0c19630bSagiri 		    (sp->session_state == RDS_SESSION_STATE_PASSIVE_CLOSING)) {
296*0c19630bSagiri 			ret = rds_session_reinit(sp, lgid);
297*0c19630bSagiri 			sp->session_myip = ipcm_info.DSTIP;
298*0c19630bSagiri 			sp->session_lgid = lgid;
299*0c19630bSagiri 			sp->session_rgid = rgid;
300*0c19630bSagiri 			if (ret != 0) {
301*0c19630bSagiri 				rds_session_fini(sp);
302*0c19630bSagiri 				sp->session_state = RDS_SESSION_STATE_FAILED;
303*0c19630bSagiri 				RDS_DPRINTF3("rds_handle_cm_req",
304*0c19630bSagiri 				    "SP(%p) State RDS_SESSION_STATE_FAILED",
305*0c19630bSagiri 				    sp);
306*0c19630bSagiri 				rw_exit(&sp->session_lock);
307*0c19630bSagiri 				return (IBT_CM_REJECT);
308*0c19630bSagiri 			} else {
309*0c19630bSagiri 				sp->session_state = RDS_SESSION_STATE_INIT;
310*0c19630bSagiri 				RDS_DPRINTF3("rds_handle_cm_req",
311*0c19630bSagiri 				    "SP(%p) State RDS_SESSION_STATE_INIT", sp);
312*0c19630bSagiri 			}
313*0c19630bSagiri 
314*0c19630bSagiri 			if (cmp.cmp_eptype == RDS_EP_TYPE_CTRL) {
315*0c19630bSagiri 				ep = &sp->session_ctrlep;
316*0c19630bSagiri 			} else {
317*0c19630bSagiri 				ep = &sp->session_dataep;
318*0c19630bSagiri 			}
319*0c19630bSagiri 			break;
320b86efd96Sagiri 		}
321*0c19630bSagiri 
322*0c19630bSagiri 		/* FALLTHRU */
323b86efd96Sagiri 	case RDS_SESSION_STATE_CREATED:
324b86efd96Sagiri 	case RDS_SESSION_STATE_FAILED:
325b86efd96Sagiri 	case RDS_SESSION_STATE_FINI:
326b86efd96Sagiri 		/*
327b86efd96Sagiri 		 * Initialize both channels, we accept this connection
328b86efd96Sagiri 		 * only if both channels are initialized
329b86efd96Sagiri 		 */
3308257fab9Sagiri 		sp->session_type = RDS_SESSION_PASSIVE;
3318257fab9Sagiri 		sp->session_lgid = lgid;
3328257fab9Sagiri 		sp->session_rgid = rgid;
333b86efd96Sagiri 		sp->session_state = RDS_SESSION_STATE_CREATED;
334b86efd96Sagiri 		RDS_DPRINTF3("rds_handle_cm_req", "SP(%p) State "
335b86efd96Sagiri 		    "RDS_SESSION_STATE_CREATED", sp);
336b86efd96Sagiri 		ret = rds_session_init(sp);
337b86efd96Sagiri 		if (ret != 0) {
338b86efd96Sagiri 			/* Seems like there are not enough resources */
339b86efd96Sagiri 			sp->session_state = RDS_SESSION_STATE_FAILED;
340b86efd96Sagiri 			RDS_DPRINTF3("rds_handle_cm_req", "SP(%p) State "
341b86efd96Sagiri 			    "RDS_SESSION_STATE_FAILED", sp);
342b86efd96Sagiri 			rw_exit(&sp->session_lock);
343b86efd96Sagiri 			return (IBT_CM_REJECT);
344b86efd96Sagiri 		}
345b86efd96Sagiri 		sp->session_state = RDS_SESSION_STATE_INIT;
346b86efd96Sagiri 		RDS_DPRINTF3("rds_handle_cm_req", "SP(%p) State "
347b86efd96Sagiri 		    "RDS_SESSION_STATE_INIT", sp);
348b86efd96Sagiri 
349b86efd96Sagiri 		/* FALLTHRU */
350b86efd96Sagiri 	case RDS_SESSION_STATE_INIT:
3518257fab9Sagiri 		/*
3528257fab9Sagiri 		 * When re-using an existing session, make sure the
3538257fab9Sagiri 		 * session is still through the same HCA. Otherwise, the
3548257fab9Sagiri 		 * memory registrations have to moved to the new HCA.
3558257fab9Sagiri 		 */
3568257fab9Sagiri 		if (cmp.cmp_eptype == RDS_EP_TYPE_DATA) {
3578257fab9Sagiri 			if (sp->session_lgid.gid_guid != lgid.gid_guid) {
3588257fab9Sagiri 				RDS_DPRINTF2("rds_handle_cm_req",
3598257fab9Sagiri 				    "Existing Session but different gid "
3608257fab9Sagiri 				    "existing: 0x%llx, new: 0x%llx, "
3618257fab9Sagiri 				    "sending an MRA",
3628257fab9Sagiri 				    sp->session_lgid.gid_guid, lgid.gid_guid);
3638257fab9Sagiri 				(void) ibt_cm_delay(IBT_CM_DELAY_REQ,
3648257fab9Sagiri 				    evp->cm_session_id, 10000000 /* 10 sec */,
3658257fab9Sagiri 				    NULL, 0);
3668257fab9Sagiri 				ret = rds_session_reinit(sp, lgid);
3678257fab9Sagiri 				if (ret != 0) {
3688257fab9Sagiri 					rds_session_fini(sp);
3698257fab9Sagiri 					sp->session_state =
3708257fab9Sagiri 					    RDS_SESSION_STATE_FAILED;
3718257fab9Sagiri 					sp->session_failover = 0;
3728257fab9Sagiri 					RDS_DPRINTF3("rds_failover_session",
3738257fab9Sagiri 					    "SP(%p) State "
3748257fab9Sagiri 					    "RDS_SESSION_STATE_FAILED", sp);
3758257fab9Sagiri 					rw_exit(&sp->session_lock);
3768257fab9Sagiri 					return (IBT_CM_REJECT);
3778257fab9Sagiri 				}
3788257fab9Sagiri 			}
379b86efd96Sagiri 			ep = &sp->session_dataep;
3808257fab9Sagiri 		} else {
3818257fab9Sagiri 			ep = &sp->session_ctrlep;
382b86efd96Sagiri 		}
383b86efd96Sagiri 
384b86efd96Sagiri 		break;
385b86efd96Sagiri 	default:
386b86efd96Sagiri 		RDS_DPRINTF2(LABEL, "ERROR: SP(%p) is in an unexpected "
387b86efd96Sagiri 		    "state: %d", sp, sp->session_state);
388b86efd96Sagiri 		rw_exit(&sp->session_lock);
389b86efd96Sagiri 		return (IBT_CM_REJECT);
390b86efd96Sagiri 	}
391b86efd96Sagiri 
3928257fab9Sagiri 	sp->session_failover = 0; /* reset any previous value */
393b86efd96Sagiri 	if (cmp.cmp_failover) {
394b86efd96Sagiri 		RDS_DPRINTF2("rds_handle_cm_req",
395b86efd96Sagiri 		    "SP(%p) Failover Session (BP %p)", sp, cmp.cmp_last_bufid);
3968257fab9Sagiri 		sp->session_failover = 1;
397b86efd96Sagiri 	}
398b86efd96Sagiri 
399b86efd96Sagiri 	mutex_enter(&ep->ep_lock);
400b86efd96Sagiri 	if (ep->ep_state == RDS_EP_STATE_UNCONNECTED) {
401b86efd96Sagiri 		ep->ep_state = RDS_EP_STATE_PASSIVE_PENDING;
402b86efd96Sagiri 		sp->session_type = RDS_SESSION_PASSIVE;
403b86efd96Sagiri 		rw_exit(&sp->session_lock);
404b86efd96Sagiri 	} else if (ep->ep_state == RDS_EP_STATE_ACTIVE_PENDING) {
405b86efd96Sagiri 		rw_exit(&sp->session_lock);
406b86efd96Sagiri 		/*
407b86efd96Sagiri 		 * Peer to peer connection. There is an active
408b86efd96Sagiri 		 * connection pending on this ep. The one with
409b86efd96Sagiri 		 * greater port guid becomes active and the
410b86efd96Sagiri 		 * other becomes passive.
411b86efd96Sagiri 		 */
4128257fab9Sagiri 		RDS_DPRINTF2("rds_handle_cm_req",
4138257fab9Sagiri 		    "EP(%p) Peer-peer connection handling", ep);
414b86efd96Sagiri 		if (lgid.gid_guid > rgid.gid_guid) {
415b86efd96Sagiri 			/* this node is active so reject this request */
416b86efd96Sagiri 			mutex_exit(&ep->ep_lock);
417b86efd96Sagiri 			RDS_DPRINTF2(LABEL, "SP(%p) EP(%p): "
418b86efd96Sagiri 			    "Rejecting passive in favor of active", sp, ep);
419b86efd96Sagiri 			return (IBT_CM_REJECT);
420b86efd96Sagiri 		} else {
421b86efd96Sagiri 			/*
422b86efd96Sagiri 			 * This session is not the active end, change it
423b86efd96Sagiri 			 * to passive end.
424b86efd96Sagiri 			 */
425b86efd96Sagiri 			ep->ep_state = RDS_EP_STATE_PASSIVE_PENDING;
426b86efd96Sagiri 
427b86efd96Sagiri 			rw_enter(&sp->session_lock, RW_WRITER);
428b86efd96Sagiri 			sp->session_type = RDS_SESSION_PASSIVE;
429b86efd96Sagiri 			sp->session_lgid = lgid;
430b86efd96Sagiri 			sp->session_rgid = rgid;
431b86efd96Sagiri 			rw_exit(&sp->session_lock);
432b86efd96Sagiri 		}
433b86efd96Sagiri 	} else {
434b86efd96Sagiri 		rw_exit(&sp->session_lock);
435b86efd96Sagiri 	}
436b86efd96Sagiri 
437b86efd96Sagiri 	ep->ep_lbufid = cmp.cmp_last_bufid;
438b86efd96Sagiri 	ep->ep_ackwr.wr.rc.rcwr.rdma.rdma_raddr = (ib_vaddr_t)cmp.cmp_ack_addr;
439b86efd96Sagiri 	ep->ep_ackwr.wr.rc.rcwr.rdma.rdma_rkey = cmp.cmp_ack_rkey;
440b86efd96Sagiri 	cmp.cmp_last_bufid = ep->ep_rbufid;
441b86efd96Sagiri 	cmp.cmp_ack_addr = ep->ep_ack_addr;
442b86efd96Sagiri 	cmp.cmp_ack_rkey = ep->ep_ack_rkey;
443b86efd96Sagiri 	mutex_exit(&ep->ep_lock);
444b86efd96Sagiri 
445b86efd96Sagiri 	/* continue with accepting the connection request for this channel */
446b86efd96Sagiri 	chanhdl = rds_ep_alloc_rc_channel(ep, reqp->req_prim_hca_port);
447b86efd96Sagiri 	if (chanhdl == NULL) {
448b86efd96Sagiri 		mutex_enter(&ep->ep_lock);
449b86efd96Sagiri 		ep->ep_state = RDS_EP_STATE_UNCONNECTED;
450b86efd96Sagiri 		mutex_exit(&ep->ep_lock);
451b86efd96Sagiri 		return (IBT_CM_REJECT);
452b86efd96Sagiri 	}
453b86efd96Sagiri 
454b86efd96Sagiri 	/* pre-post recv buffers in the RQ */
455b86efd96Sagiri 	rds_post_recv_buf((void *)chanhdl);
456b86efd96Sagiri 
457b86efd96Sagiri 	rargsp->cm_ret_len = sizeof (rds_cm_private_data_t);
458b86efd96Sagiri 	bcopy((uint8_t *)&cmp, rcmp, sizeof (rds_cm_private_data_t));
459b86efd96Sagiri 	rargsp->cm_ret.rep.cm_channel = chanhdl;
460b86efd96Sagiri 	rargsp->cm_ret.rep.cm_rdma_ra_out = 4;
461b86efd96Sagiri 	rargsp->cm_ret.rep.cm_rdma_ra_in = 4;
462b86efd96Sagiri 	rargsp->cm_ret.rep.cm_rnr_retry_cnt = MinRnrRetry;
463b86efd96Sagiri 
464b86efd96Sagiri 	RDS_DPRINTF2("rds_handle_cm_req", "Return: SP(%p) EP(%p) Chan (%p)",
465b86efd96Sagiri 	    sp, ep, chanhdl);
466b86efd96Sagiri 
467b86efd96Sagiri 	return (IBT_CM_ACCEPT);
468b86efd96Sagiri }
469b86efd96Sagiri 
470b86efd96Sagiri /*
471b86efd96Sagiri  * Handle an incoming CM REP
472b86efd96Sagiri  * Pre-post recv buffers for the QP
473b86efd96Sagiri  */
474b86efd96Sagiri /* ARGSUSED */
475b86efd96Sagiri static ibt_cm_status_t
rds_handle_cm_rep(ibt_cm_event_t * evp,ibt_cm_return_args_t * rargsp,void * rcmp,ibt_priv_data_len_t rcmp_len)476b86efd96Sagiri rds_handle_cm_rep(ibt_cm_event_t *evp, ibt_cm_return_args_t *rargsp,
477b86efd96Sagiri     void *rcmp, ibt_priv_data_len_t rcmp_len)
478b86efd96Sagiri {
479b86efd96Sagiri 	rds_ep_t	*ep;
480b86efd96Sagiri 	rds_cm_private_data_t	cmp;
481b86efd96Sagiri 
482b86efd96Sagiri 	RDS_DPRINTF2("rds_handle_cm_rep", "Enter");
483b86efd96Sagiri 
484b86efd96Sagiri 	/* pre-post recv buffers in the RQ */
485b86efd96Sagiri 	rds_post_recv_buf((void *)evp->cm_channel);
486b86efd96Sagiri 
487b86efd96Sagiri 	ep = (rds_ep_t *)ibt_get_chan_private(evp->cm_channel);
488b86efd96Sagiri 	bcopy((uint8_t *)evp->cm_priv_data, &cmp,
489b86efd96Sagiri 	    sizeof (rds_cm_private_data_t));
490b86efd96Sagiri 	ep->ep_lbufid = cmp.cmp_last_bufid;
491b86efd96Sagiri 	ep->ep_ackwr.wr.rc.rcwr.rdma.rdma_raddr = (ib_vaddr_t)cmp.cmp_ack_addr;
492b86efd96Sagiri 	ep->ep_ackwr.wr.rc.rcwr.rdma.rdma_rkey = cmp.cmp_ack_rkey;
493b86efd96Sagiri 
494b86efd96Sagiri 	rargsp->cm_ret_len = 0;
495b86efd96Sagiri 
496b86efd96Sagiri 	RDS_DPRINTF2("rds_handle_cm_rep", "Return: lbufid: %p", ep->ep_lbufid);
497b86efd96Sagiri 
498b86efd96Sagiri 	return (IBT_CM_ACCEPT);
499b86efd96Sagiri }
500b86efd96Sagiri 
501b86efd96Sagiri /*
502b86efd96Sagiri  * Handle CONN EST
503b86efd96Sagiri  */
504b86efd96Sagiri static ibt_cm_status_t
rds_handle_cm_conn_est(ibt_cm_event_t * evp)505b86efd96Sagiri rds_handle_cm_conn_est(ibt_cm_event_t *evp)
506b86efd96Sagiri {
507b86efd96Sagiri 	rds_session_t	*sp;
508b86efd96Sagiri 	rds_ep_t	*ep;
509b86efd96Sagiri 
510b86efd96Sagiri 	ep = (rds_ep_t *)ibt_get_chan_private(evp->cm_channel);
511b86efd96Sagiri 
512b86efd96Sagiri 	RDS_DPRINTF2("rds_handle_cm_conn_est", "EP(%p) State: %d", ep,
513b86efd96Sagiri 	    ep->ep_state);
514b86efd96Sagiri 
515b86efd96Sagiri 	mutex_enter(&ep->ep_lock);
516b86efd96Sagiri 	ASSERT((ep->ep_state == RDS_EP_STATE_ACTIVE_PENDING) ||
517b86efd96Sagiri 	    (ep->ep_state == RDS_EP_STATE_PASSIVE_PENDING));
518b86efd96Sagiri 	ep->ep_state = RDS_EP_STATE_CONNECTED;
519b86efd96Sagiri 	ep->ep_chanhdl = evp->cm_channel;
520b86efd96Sagiri 	sp = ep->ep_sp;
521b86efd96Sagiri 	mutex_exit(&ep->ep_lock);
522b86efd96Sagiri 
523b86efd96Sagiri 	(void) rds_session_active(sp);
524b86efd96Sagiri 
525b86efd96Sagiri 	RDS_DPRINTF2("rds_handle_cm_conn_est", "Return");
526b86efd96Sagiri 	return (IBT_CM_ACCEPT);
527b86efd96Sagiri }
528b86efd96Sagiri 
529b86efd96Sagiri /*
530b86efd96Sagiri  * Handle CONN CLOSED
531b86efd96Sagiri  */
532b86efd96Sagiri static ibt_cm_status_t
rds_handle_cm_conn_closed(ibt_cm_event_t * evp)533b86efd96Sagiri rds_handle_cm_conn_closed(ibt_cm_event_t *evp)
534b86efd96Sagiri {
535b86efd96Sagiri 	rds_ep_t	*ep;
536b86efd96Sagiri 	rds_session_t	*sp;
537b86efd96Sagiri 
538b86efd96Sagiri 	/* Catch DREQs but ignore DREPs */
539b86efd96Sagiri 	if (evp->cm_event.closed != IBT_CM_CLOSED_DREQ_RCVD) {
540b86efd96Sagiri 		RDS_DPRINTF2("rds_handle_cm_conn_closed",
541b86efd96Sagiri 		    "Ignoring Event: %d received", evp->cm_event.closed);
542b86efd96Sagiri 		return (IBT_CM_ACCEPT);
543b86efd96Sagiri 	}
544b86efd96Sagiri 
545b86efd96Sagiri 	ep = (rds_ep_t *)ibt_get_chan_private(evp->cm_channel);
546b86efd96Sagiri 	sp = ep->ep_sp;
547d99cb22fSagiri 	RDS_DPRINTF2("rds_handle_cm_conn_closed", "EP(%p) Chan(%p) Enter",
548d99cb22fSagiri 	    ep, evp->cm_channel);
549b86efd96Sagiri 
550b86efd96Sagiri 	mutex_enter(&ep->ep_lock);
551b86efd96Sagiri 	if (ep->ep_state != RDS_EP_STATE_CONNECTED) {
552b86efd96Sagiri 		/* Ignore this DREQ */
553b86efd96Sagiri 		RDS_DPRINTF2("rds_handle_cm_conn_closed",
554b86efd96Sagiri 		    "EP(%p) not connected, state: %d", ep, ep->ep_state);
555b86efd96Sagiri 		mutex_exit(&ep->ep_lock);
556b86efd96Sagiri 		return (IBT_CM_ACCEPT);
557b86efd96Sagiri 	}
558b86efd96Sagiri 	ep->ep_state = RDS_EP_STATE_CLOSING;
559b86efd96Sagiri 	mutex_exit(&ep->ep_lock);
560b86efd96Sagiri 
561b86efd96Sagiri 	rw_enter(&sp->session_lock, RW_WRITER);
562b86efd96Sagiri 	RDS_DPRINTF2("rds_handle_cm_conn_closed", "SP(%p) - state: %d", sp,
563b86efd96Sagiri 	    sp->session_state);
564b86efd96Sagiri 
565b86efd96Sagiri 	switch (sp->session_state) {
566b86efd96Sagiri 	case RDS_SESSION_STATE_CONNECTED:
56700a3eaf3SRamaswamy Tummala 	case RDS_SESSION_STATE_HCA_CLOSING:
568b86efd96Sagiri 		sp->session_state = RDS_SESSION_STATE_PASSIVE_CLOSING;
569b86efd96Sagiri 		RDS_DPRINTF3("rds_handle_cm_conn_closed", "SP(%p) State "
570b86efd96Sagiri 		    "RDS_SESSION_STATE_PASSIVE_CLOSING", sp);
571b86efd96Sagiri 		break;
572b86efd96Sagiri 
573b86efd96Sagiri 	case RDS_SESSION_STATE_PASSIVE_CLOSING:
574b86efd96Sagiri 		sp->session_state = RDS_SESSION_STATE_CLOSED;
575b86efd96Sagiri 		RDS_DPRINTF3("rds_handle_cm_conn_closed", "SP(%p) State "
576b86efd96Sagiri 		    "RDS_SESSION_STATE_CLOSED", sp);
577b86efd96Sagiri 		rds_passive_session_fini(sp);
578b86efd96Sagiri 		sp->session_state = RDS_SESSION_STATE_FINI;
579b86efd96Sagiri 		RDS_DPRINTF3("rds_handle_cm_conn_closed",
580b86efd96Sagiri 		    "SP(%p) State RDS_SESSION_STATE_FINI", sp);
581b86efd96Sagiri 		break;
582b86efd96Sagiri 
583b86efd96Sagiri 	case RDS_SESSION_STATE_ACTIVE_CLOSING:
584b86efd96Sagiri 	case RDS_SESSION_STATE_ERROR:
585b86efd96Sagiri 	case RDS_SESSION_STATE_CLOSED:
586b86efd96Sagiri 		break;
587b86efd96Sagiri 
588b86efd96Sagiri 	case RDS_SESSION_STATE_INIT:
589b86efd96Sagiri 		sp->session_state = RDS_SESSION_STATE_ERROR;
590b86efd96Sagiri 		RDS_DPRINTF3("rds_handle_cm_conn_closed", "SP(%p) State "
591b86efd96Sagiri 		    "RDS_SESSION_STATE_ERROR", sp);
592b86efd96Sagiri 		rds_passive_session_fini(sp);
593b86efd96Sagiri 		sp->session_state = RDS_SESSION_STATE_FAILED;
594b86efd96Sagiri 		RDS_DPRINTF3("rds_handle_cm_conn_closed",
595b86efd96Sagiri 		    "SP(%p) State RDS_SESSION_STATE_FAILED", sp);
596b86efd96Sagiri 		break;
597b86efd96Sagiri 
598b86efd96Sagiri 	default:
599b86efd96Sagiri 		RDS_DPRINTF2("rds_handle_cm_conn_closed",
600b86efd96Sagiri 		    "SP(%p) - Unexpected state: %d", sp, sp->session_state);
601b86efd96Sagiri 		rds_passive_session_fini(sp);
602b86efd96Sagiri 		sp->session_state = RDS_SESSION_STATE_FAILED;
603b86efd96Sagiri 		RDS_DPRINTF3("rds_handle_cm_conn_closed", "SP(%p) State "
604b86efd96Sagiri 		    "RDS_SESSION_STATE_FAILED", sp);
605b86efd96Sagiri 	}
606b86efd96Sagiri 	rw_exit(&sp->session_lock);
607b86efd96Sagiri 
608b86efd96Sagiri 	mutex_enter(&ep->ep_lock);
609b86efd96Sagiri 	ep->ep_state = RDS_EP_STATE_CLOSED;
610b86efd96Sagiri 	mutex_exit(&ep->ep_lock);
611b86efd96Sagiri 
612b86efd96Sagiri 	RDS_DPRINTF2("rds_handle_cm_conn_closed", "SP(%p) Return", sp);
613b86efd96Sagiri 	return (IBT_CM_ACCEPT);
614b86efd96Sagiri }
615b86efd96Sagiri 
616b86efd96Sagiri /*
617b86efd96Sagiri  * Handle EVENT FAILURE
618b86efd96Sagiri  */
619b86efd96Sagiri static ibt_cm_status_t
rds_handle_cm_event_failure(ibt_cm_event_t * evp)620b86efd96Sagiri rds_handle_cm_event_failure(ibt_cm_event_t *evp)
621b86efd96Sagiri {
622b86efd96Sagiri 	rds_ep_t	*ep;
623b86efd96Sagiri 	rds_session_t	*sp;
624b86efd96Sagiri 	int		ret;
625b86efd96Sagiri 
626b86efd96Sagiri 	RDS_DPRINTF2("rds_handle_cm_event_failure", "Enter: Chan hdl: 0x%p "
627b86efd96Sagiri 	    "Code: %d msg: %d reason: %d", evp->cm_channel,
628b86efd96Sagiri 	    evp->cm_event.failed.cf_code, evp->cm_event.failed.cf_msg,
629b86efd96Sagiri 	    evp->cm_event.failed.cf_reason);
630b86efd96Sagiri 
631015f8fffShiremath 	if (evp->cm_event.failed.cf_reason == IBT_CM_INVALID_SID) {
63274242422Sagiri 		RDS_DPRINTF2(LABEL,
633015f8fffShiremath 		    "Received REJ with reason IBT_CM_INVALID_SID: "
63474242422Sagiri 		    "RDS may not be loaded on the remote system");
635015f8fffShiremath 	}
636015f8fffShiremath 
637b86efd96Sagiri 	if (evp->cm_channel == NULL) {
638b86efd96Sagiri 		return (IBT_CM_ACCEPT);
639b86efd96Sagiri 	}
640b86efd96Sagiri 
641cd03c4aeSagiri 	if ((evp->cm_event.failed.cf_code != IBT_CM_FAILURE_STALE) &&
642cd03c4aeSagiri 	    (evp->cm_event.failed.cf_msg == IBT_CM_FAILURE_REQ)) {
643cd03c4aeSagiri 		/*
644cd03c4aeSagiri 		 * This end is active, just ignore, ibt_open_rc_channel()
645cd03c4aeSagiri 		 * caller will take care of cleanup.
646cd03c4aeSagiri 		 */
647cd03c4aeSagiri 		RDS_DPRINTF2("rds_handle_cm_event_failure",
648cd03c4aeSagiri 		    "Ignoring this event: Chan hdl: 0x%p", evp->cm_channel);
649cd03c4aeSagiri 		return (IBT_CM_ACCEPT);
650cd03c4aeSagiri 	}
651cd03c4aeSagiri 
652b86efd96Sagiri 	ep = (rds_ep_t *)ibt_get_chan_private(evp->cm_channel);
653b86efd96Sagiri 	sp = ep->ep_sp;
654b86efd96Sagiri 
655b86efd96Sagiri 	rw_enter(&sp->session_lock, RW_WRITER);
656b86efd96Sagiri 	if (sp->session_type == RDS_SESSION_PASSIVE) {
657b86efd96Sagiri 		RDS_DPRINTF2("rds_handle_cm_event_failure",
658b86efd96Sagiri 		    "SP(%p) - state: %d", sp, sp->session_state);
659b86efd96Sagiri 		if ((sp->session_state == RDS_SESSION_STATE_INIT) ||
660b86efd96Sagiri 		    (sp->session_state == RDS_SESSION_STATE_CONNECTED)) {
661b86efd96Sagiri 			sp->session_state = RDS_SESSION_STATE_ERROR;
662b86efd96Sagiri 			RDS_DPRINTF3("rds_handle_cm_event_failure",
663b86efd96Sagiri 			    "SP(%p) State RDS_SESSION_STATE_ERROR", sp);
664c1f8b08eSagiri 
665c1f8b08eSagiri 			/*
666c1f8b08eSagiri 			 * Store the cm_channel for freeing later
667c1f8b08eSagiri 			 * Active side frees it on ibt_open_rc_channel
668c1f8b08eSagiri 			 * failure
669c1f8b08eSagiri 			 */
670c1f8b08eSagiri 			if (ep->ep_chanhdl == NULL) {
671c1f8b08eSagiri 				ep->ep_chanhdl = evp->cm_channel;
672c1f8b08eSagiri 			}
673b86efd96Sagiri 			rw_exit(&sp->session_lock);
674b86efd96Sagiri 
675b86efd96Sagiri 			/*
676b86efd96Sagiri 			 * rds_passive_session_fini should not be called
677b86efd96Sagiri 			 * directly in the CM handler. It will cause a deadlock.
678b86efd96Sagiri 			 */
679b86efd96Sagiri 			ret = ddi_taskq_dispatch(rds_taskq,
680b86efd96Sagiri 			    rds_cleanup_passive_session, (void *)sp,
681b86efd96Sagiri 			    DDI_NOSLEEP);
682b86efd96Sagiri 			if (ret != DDI_SUCCESS) {
68374242422Sagiri 				RDS_DPRINTF2("rds_handle_cm_event_failure",
684b86efd96Sagiri 				    "SP(%p) TaskQ dispatch FAILED:%d", sp, ret);
685b86efd96Sagiri 			}
686b86efd96Sagiri 			return (IBT_CM_ACCEPT);
687b86efd96Sagiri 		}
688b86efd96Sagiri 	}
689b86efd96Sagiri 	rw_exit(&sp->session_lock);
690b86efd96Sagiri 
691b86efd96Sagiri 	RDS_DPRINTF2("rds_handle_cm_event_failure", "SP(%p) Return", sp);
692b86efd96Sagiri 	return (IBT_CM_ACCEPT);
693b86efd96Sagiri }
694b86efd96Sagiri 
695b86efd96Sagiri /*
696b86efd96Sagiri  * CM Handler
697b86efd96Sagiri  *
698b86efd96Sagiri  * Called by IBCM
699b86efd96Sagiri  * The cm_private type differs for active and passive events.
700b86efd96Sagiri  */
701b86efd96Sagiri ibt_cm_status_t
rds_cm_handler(void * cm_private,ibt_cm_event_t * eventp,ibt_cm_return_args_t * ret_args,void * ret_priv_data,ibt_priv_data_len_t ret_len_max)702b86efd96Sagiri rds_cm_handler(void *cm_private, ibt_cm_event_t *eventp,
703b86efd96Sagiri     ibt_cm_return_args_t *ret_args, void *ret_priv_data,
704b86efd96Sagiri     ibt_priv_data_len_t ret_len_max)
705b86efd96Sagiri {
706b86efd96Sagiri 	ibt_cm_status_t		ret = IBT_CM_ACCEPT;
707b86efd96Sagiri 
708b86efd96Sagiri 	RDS_DPRINTF2("rds_cm_handler", "Enter: event: %d", eventp->cm_type);
709b86efd96Sagiri 
710b86efd96Sagiri 	switch (eventp->cm_type) {
711b86efd96Sagiri 	case IBT_CM_EVENT_REQ_RCV:
712b86efd96Sagiri 		ret = rds_handle_cm_req((rds_state_t *)cm_private, eventp,
713b86efd96Sagiri 		    ret_args, ret_priv_data, ret_len_max);
714b86efd96Sagiri 		break;
715b86efd96Sagiri 	case IBT_CM_EVENT_REP_RCV:
716b86efd96Sagiri 		ret = rds_handle_cm_rep(eventp, ret_args, ret_priv_data,
717b86efd96Sagiri 		    ret_len_max);
718b86efd96Sagiri 		break;
719b86efd96Sagiri 	case IBT_CM_EVENT_MRA_RCV:
720b86efd96Sagiri 		/* Not supported */
721b86efd96Sagiri 		break;
722b86efd96Sagiri 	case IBT_CM_EVENT_CONN_EST:
723b86efd96Sagiri 		ret = rds_handle_cm_conn_est(eventp);
724b86efd96Sagiri 		break;
725b86efd96Sagiri 	case IBT_CM_EVENT_CONN_CLOSED:
726b86efd96Sagiri 		ret = rds_handle_cm_conn_closed(eventp);
727b86efd96Sagiri 		break;
728b86efd96Sagiri 	case IBT_CM_EVENT_FAILURE:
729b86efd96Sagiri 		ret = rds_handle_cm_event_failure(eventp);
730b86efd96Sagiri 		break;
731b86efd96Sagiri 	case IBT_CM_EVENT_LAP_RCV:
732b86efd96Sagiri 		/* Not supported */
733b86efd96Sagiri 		RDS_DPRINTF2(LABEL, "LAP message received");
734b86efd96Sagiri 		break;
735b86efd96Sagiri 	case IBT_CM_EVENT_APR_RCV:
736b86efd96Sagiri 		/* Not supported */
737b86efd96Sagiri 		RDS_DPRINTF2(LABEL, "APR message received");
738b86efd96Sagiri 		break;
739b86efd96Sagiri 	default:
740b86efd96Sagiri 		break;
741b86efd96Sagiri 	}
742b86efd96Sagiri 
743b86efd96Sagiri 	RDS_DPRINTF2("rds_cm_handler", "Return");
744b86efd96Sagiri 
745b86efd96Sagiri 	return (ret);
746b86efd96Sagiri }
747b86efd96Sagiri 
748015f8fffShiremath /* This is based on OFED Linux RDS */
749015f8fffShiremath #define	RDS_PORT_NUM	6556
750015f8fffShiremath 
751b86efd96Sagiri /*
752b86efd96Sagiri  * Register the wellknown service with service id: RDS_SERVICE_ID
753b86efd96Sagiri  * Incoming connection requests should arrive on this service id.
754b86efd96Sagiri  */
755b86efd96Sagiri ibt_srv_hdl_t
rds_register_service(ibt_clnt_hdl_t rds_ibhdl)756b86efd96Sagiri rds_register_service(ibt_clnt_hdl_t rds_ibhdl)
757b86efd96Sagiri {
758b86efd96Sagiri 	ibt_srv_hdl_t	srvhdl;
759b86efd96Sagiri 	ibt_srv_desc_t	srvdesc;
760b86efd96Sagiri 	int		ret;
761b86efd96Sagiri 
762b86efd96Sagiri 	RDS_DPRINTF2("rds_register_service", "Enter: 0x%p", rds_ibhdl);
763b86efd96Sagiri 
764b86efd96Sagiri 	bzero(&srvdesc, sizeof (ibt_srv_desc_t));
765b86efd96Sagiri 	srvdesc.sd_handler = rds_cm_handler;
766b86efd96Sagiri 	srvdesc.sd_flags = IBT_SRV_NO_FLAGS;
767b86efd96Sagiri 
768015f8fffShiremath 	/*
769015f8fffShiremath 	 * This is the new service id as per:
770015f8fffShiremath 	 * Annex A11: RDMA IP CM Service
771015f8fffShiremath 	 */
772015f8fffShiremath 	rdsib_statep->rds_service_id = ibt_get_ip_sid(IPPROTO_TCP,
773015f8fffShiremath 	    RDS_PORT_NUM);
774015f8fffShiremath 	ret = ibt_register_service(rds_ibhdl, &srvdesc,
775015f8fffShiremath 	    rdsib_statep->rds_service_id, 1, &srvhdl, NULL);
776015f8fffShiremath 	if (ret != IBT_SUCCESS) {
777015f8fffShiremath 		RDS_DPRINTF2(LABEL,
778015f8fffShiremath 		    "RDS Service (0x%llx) Registration Failed: %d",
779015f8fffShiremath 		    rdsib_statep->rds_service_id, ret);
780b86efd96Sagiri 		return (NULL);
781b86efd96Sagiri 	}
782b86efd96Sagiri 
783b86efd96Sagiri 	RDS_DPRINTF2("rds_register_service", "Return: 0x%p", srvhdl);
784b86efd96Sagiri 	return (srvhdl);
785b86efd96Sagiri }
786b86efd96Sagiri 
787b86efd96Sagiri /* Bind the RDS service on all ports */
788b86efd96Sagiri int
rds_bind_service(rds_state_t * statep)789b86efd96Sagiri rds_bind_service(rds_state_t *statep)
790b86efd96Sagiri {
791b86efd96Sagiri 	rds_hca_t	*hcap;
792b86efd96Sagiri 	ib_gid_t	gid;
793b86efd96Sagiri 	uint_t		jx, nbinds = 0, nports = 0;
794b86efd96Sagiri 	int		ret;
795b86efd96Sagiri 
796b86efd96Sagiri 	RDS_DPRINTF2("rds_bind_service", "Enter: 0x%p", statep);
797b86efd96Sagiri 
79800a3eaf3SRamaswamy Tummala 	rw_enter(&statep->rds_hca_lock, RW_READER);
79900a3eaf3SRamaswamy Tummala 
800b86efd96Sagiri 	hcap = statep->rds_hcalistp;
801b86efd96Sagiri 	while (hcap != NULL) {
80200a3eaf3SRamaswamy Tummala 
80300a3eaf3SRamaswamy Tummala 		/* skip the HCAs that are not fully online */
80400a3eaf3SRamaswamy Tummala 		if ((hcap->hca_state != RDS_HCA_STATE_OPEN) &&
80500a3eaf3SRamaswamy Tummala 		    (hcap->hca_state != RDS_HCA_STATE_MEM_REGISTERED)) {
80600a3eaf3SRamaswamy Tummala 			RDS_DPRINTF2("rds_bind_service",
80700a3eaf3SRamaswamy Tummala 			    "Skipping HCA: 0x%llx, state: %d",
80800a3eaf3SRamaswamy Tummala 			    hcap->hca_guid, hcap->hca_state);
80900a3eaf3SRamaswamy Tummala 			hcap = hcap->hca_nextp;
81000a3eaf3SRamaswamy Tummala 			continue;
81100a3eaf3SRamaswamy Tummala 		}
81200a3eaf3SRamaswamy Tummala 
81300a3eaf3SRamaswamy Tummala 		/* currently, we have space for only 4 bindhdls */
81400a3eaf3SRamaswamy Tummala 		ASSERT(hcap->hca_nports < 4);
815b86efd96Sagiri 		for (jx = 0; jx < hcap->hca_nports; jx++) {
816b86efd96Sagiri 			nports++;
817b86efd96Sagiri 			if (hcap->hca_pinfop[jx].p_linkstate !=
818b86efd96Sagiri 			    IBT_PORT_ACTIVE) {
819b86efd96Sagiri 				/*
820b86efd96Sagiri 				 * service bind will be called in the async
82100a3eaf3SRamaswamy Tummala 				 * handler when the port comes up. Clear any
82200a3eaf3SRamaswamy Tummala 				 * stale bind handle.
823b86efd96Sagiri 				 */
82400a3eaf3SRamaswamy Tummala 				hcap->hca_bindhdl[jx] = NULL;
825b86efd96Sagiri 				continue;
826b86efd96Sagiri 			}
827b86efd96Sagiri 
828b86efd96Sagiri 			gid = hcap->hca_pinfop[jx].p_sgid_tbl[0];
829b86efd96Sagiri 			RDS_DPRINTF5(LABEL, "HCA: 0x%llx Port: %d "
830b86efd96Sagiri 			    "gid: %llx:%llx", hcap->hca_guid,
831b86efd96Sagiri 			    hcap->hca_pinfop[jx].p_port_num, gid.gid_prefix,
832b86efd96Sagiri 			    gid.gid_guid);
833b86efd96Sagiri 
834b86efd96Sagiri 			/* pass statep as cm_private */
835b86efd96Sagiri 			ret = ibt_bind_service(statep->rds_srvhdl, gid,
83600a3eaf3SRamaswamy Tummala 			    NULL, statep, &hcap->hca_bindhdl[jx]);
837b86efd96Sagiri 			if (ret != IBT_SUCCESS) {
838b86efd96Sagiri 				RDS_DPRINTF2(LABEL, "Bind service for "
839b86efd96Sagiri 				    "HCA: 0x%llx Port: %d gid %llx:%llx "
840b86efd96Sagiri 				    "failed: %d", hcap->hca_guid,
841b86efd96Sagiri 				    hcap->hca_pinfop[jx].p_port_num,
842b86efd96Sagiri 				    gid.gid_prefix, gid.gid_guid, ret);
843b86efd96Sagiri 				continue;
844b86efd96Sagiri 			}
845b86efd96Sagiri 
846b86efd96Sagiri 			nbinds++;
847b86efd96Sagiri 		}
848b86efd96Sagiri 		hcap = hcap->hca_nextp;
849b86efd96Sagiri 	}
850b86efd96Sagiri 
85100a3eaf3SRamaswamy Tummala 	rw_exit(&statep->rds_hca_lock);
85200a3eaf3SRamaswamy Tummala 
853b86efd96Sagiri 	RDS_DPRINTF2(LABEL, "RDS Service available on %d/%d ports",
854b86efd96Sagiri 	    nbinds, nports);
855b86efd96Sagiri 
856b86efd96Sagiri #if 0
857b86efd96Sagiri 	if (nbinds == 0) {
858b86efd96Sagiri 		return (-1);
859b86efd96Sagiri 	}
860b86efd96Sagiri #endif
861b86efd96Sagiri 
862b86efd96Sagiri 	RDS_DPRINTF2("rds_bind_service", "Return");
863b86efd96Sagiri 
864b86efd96Sagiri 	return (0);
865b86efd96Sagiri }
866b86efd96Sagiri 
867b86efd96Sagiri /* Open an RC connection */
868b86efd96Sagiri int
rds_open_rc_channel(rds_ep_t * ep,ibt_path_info_t * pinfo,ibt_execution_mode_t mode,ibt_channel_hdl_t * chanhdl)869b86efd96Sagiri rds_open_rc_channel(rds_ep_t *ep, ibt_path_info_t *pinfo,
870b86efd96Sagiri     ibt_execution_mode_t mode, ibt_channel_hdl_t *chanhdl)
871b86efd96Sagiri {
872b86efd96Sagiri 	rds_session_t		*sp;
873b86efd96Sagiri 	ibt_chan_open_args_t	ocargs;
874b86efd96Sagiri 	ibt_rc_returns_t	ocrets;
875b86efd96Sagiri 	rds_cm_private_data_t	cmp;
876b86efd96Sagiri 	uint8_t			hca_port;
877b86efd96Sagiri 	ibt_channel_hdl_t	hdl;
878015f8fffShiremath 	ibt_status_t		ret = 0;
879015f8fffShiremath 	ibt_ip_cm_info_t	ipcm_info;
880b86efd96Sagiri 
881b86efd96Sagiri 	RDS_DPRINTF2("rds_open_rc_channel", "Enter: EP(%p) mode: %d", ep, mode);
882b86efd96Sagiri 
883b86efd96Sagiri 	sp = ep->ep_sp;
884b86efd96Sagiri 
885015f8fffShiremath 	bzero(&ipcm_info, sizeof (ibt_ip_cm_info_t));
886015f8fffShiremath 	ipcm_info.src_addr.family = AF_INET;
887d22e11ebSBill Taylor 	ipcm_info.SRCIP = sp->session_myip;
888015f8fffShiremath 	ipcm_info.dst_addr.family = AF_INET;
889d22e11ebSBill Taylor 	ipcm_info.DSTIP = sp->session_remip;
890d22e11ebSBill Taylor 	ipcm_info.src_port = RDS_PORT_NUM;
891015f8fffShiremath 	ret = ibt_format_ip_private_data(&ipcm_info,
892015f8fffShiremath 	    sizeof (rds_cm_private_data_t), &cmp);
893015f8fffShiremath 	if (ret != IBT_SUCCESS) {
894015f8fffShiremath 		RDS_DPRINTF2(LABEL, "SP(%p) EP(%p) ibt_format_ip_private_data "
895015f8fffShiremath 		    "failed: %d", sp, ep, ret);
896015f8fffShiremath 		return (-1);
897015f8fffShiremath 	}
898015f8fffShiremath 
899b86efd96Sagiri 	hca_port = pinfo->pi_prim_cep_path.cep_hca_port_num;
900b86efd96Sagiri 
901b86efd96Sagiri 	hdl = rds_ep_alloc_rc_channel(ep, hca_port);
902b86efd96Sagiri 	if (hdl == NULL) {
903b86efd96Sagiri 		return (-1);
904b86efd96Sagiri 	}
905b86efd96Sagiri 
906b86efd96Sagiri 	cmp.cmp_version = RDS_VERSION;
907b86efd96Sagiri 	cmp.cmp_arch = RDS_THIS_ARCH;
908b86efd96Sagiri 	cmp.cmp_eptype = ep->ep_type;
909b86efd96Sagiri 	cmp.cmp_failover = sp->session_failover;
910b86efd96Sagiri 	cmp.cmp_last_bufid = ep->ep_rbufid;
911b86efd96Sagiri 	cmp.cmp_user_buffer_size = UserBufferSize;
912b86efd96Sagiri 	cmp.cmp_ack_addr = ep->ep_ack_addr;
913b86efd96Sagiri 	cmp.cmp_ack_rkey = ep->ep_ack_rkey;
914b86efd96Sagiri 
915b86efd96Sagiri 	bzero(&ocargs, sizeof (ibt_chan_open_args_t));
916b86efd96Sagiri 	bzero(&ocrets, sizeof (ibt_rc_returns_t));
917b86efd96Sagiri 	ocargs.oc_path = pinfo;
918b86efd96Sagiri 	ocargs.oc_cm_handler = rds_cm_handler;
919b86efd96Sagiri 	ocargs.oc_cm_clnt_private = NULL;
920b86efd96Sagiri 	ocargs.oc_rdma_ra_out = 4;
921b86efd96Sagiri 	ocargs.oc_rdma_ra_in = 4;
922b86efd96Sagiri 	ocargs.oc_priv_data_len = sizeof (rds_cm_private_data_t);
923b86efd96Sagiri 	ocargs.oc_priv_data = &cmp;
924b86efd96Sagiri 	ocargs.oc_path_retry_cnt = IBPathRetryCount;
925b86efd96Sagiri 	ocargs.oc_path_rnr_retry_cnt = MinRnrRetry;
926b86efd96Sagiri 	ret = ibt_open_rc_channel(hdl, IBT_OCHAN_NO_FLAGS,
927b86efd96Sagiri 	    mode, &ocargs, &ocrets);
928b86efd96Sagiri 	if (ret != IBT_SUCCESS) {
929b86efd96Sagiri 		RDS_DPRINTF2(LABEL, "SP(%p) EP(%p) ibt_open_rc_channel "
930b86efd96Sagiri 		    "failed: %d", sp, ep, ret);
931b86efd96Sagiri 		(void) ibt_flush_channel(hdl);
932b86efd96Sagiri 		(void) ibt_free_channel(hdl);
9335763ba1eSagiri 
9345763ba1eSagiri 		mutex_enter(&ep->ep_lock);
935cd03c4aeSagiri 		/* don't cleanup if this failure is due to peer-peer race */
936cd03c4aeSagiri 		if (ep->ep_state == RDS_EP_STATE_ACTIVE_PENDING) {
937cd03c4aeSagiri 			/* cleanup stuff allocated in rds_ep_alloc_rc_channel */
938cd03c4aeSagiri 			ep->ep_state = RDS_EP_STATE_ERROR;
939cd03c4aeSagiri 			rds_ep_free_rc_channel(ep);
940cd03c4aeSagiri 		}
9415763ba1eSagiri 		mutex_exit(&ep->ep_lock);
9425763ba1eSagiri 
943c1f8b08eSagiri 		return (-1);
944b86efd96Sagiri 	}
945b86efd96Sagiri 
946b86efd96Sagiri 	*chanhdl = hdl;
947b86efd96Sagiri 
948b86efd96Sagiri 	RDS_DPRINTF2("rds_open_rc_channel", "Return: EP(%p) Chan: %p", ep,
949b86efd96Sagiri 	    *chanhdl);
950b86efd96Sagiri 
951c1f8b08eSagiri 	return (0);
952b86efd96Sagiri }
953b86efd96Sagiri 
954b86efd96Sagiri int
rds_close_rc_channel(ibt_channel_hdl_t chanhdl,ibt_execution_mode_t mode)955b86efd96Sagiri rds_close_rc_channel(ibt_channel_hdl_t chanhdl, ibt_execution_mode_t mode)
956b86efd96Sagiri {
957b86efd96Sagiri 	int	ret;
958b86efd96Sagiri 
959b86efd96Sagiri 	RDS_DPRINTF2("rds_close_rc_channel", "Enter: Chan(%p) Mode(%d)",
960b86efd96Sagiri 	    chanhdl, mode);
961b86efd96Sagiri 
962b86efd96Sagiri 	ret = ibt_close_rc_channel(chanhdl, mode, NULL, 0, NULL, NULL, 0);
963b86efd96Sagiri 
964b86efd96Sagiri 	RDS_DPRINTF2("rds_close_rc_channel", "Return Chan(%p)", chanhdl);
965b86efd96Sagiri 
966b86efd96Sagiri 	return (ret);
967b86efd96Sagiri }
968