1b86efd96Sagiri /*
2b86efd96Sagiri  * CDDL HEADER START
3b86efd96Sagiri  *
4b86efd96Sagiri  * The contents of this file are subject to the terms of the
5b86efd96Sagiri  * Common Development and Distribution License (the "License").
6b86efd96Sagiri  * You may not use this file except in compliance with the License.
7b86efd96Sagiri  *
8b86efd96Sagiri  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9b86efd96Sagiri  * or http://www.opensolaris.org/os/licensing.
10b86efd96Sagiri  * See the License for the specific language governing permissions
11b86efd96Sagiri  * and limitations under the License.
12b86efd96Sagiri  *
13b86efd96Sagiri  * When distributing Covered Code, include this CDDL HEADER in each
14b86efd96Sagiri  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15b86efd96Sagiri  * If applicable, add the following below this CDDL HEADER, with the
16b86efd96Sagiri  * fields enclosed by brackets "[]" replaced with your own identifying
17b86efd96Sagiri  * information: Portions Copyright [yyyy] [name of copyright owner]
18b86efd96Sagiri  *
19b86efd96Sagiri  * CDDL HEADER END
20b86efd96Sagiri  */
21b86efd96Sagiri /*
220c19630bSagiri  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23b86efd96Sagiri  * Use is subject to license terms.
24b86efd96Sagiri  */
25b86efd96Sagiri /*
26b86efd96Sagiri  * Copyright (c) 2005 SilverStorm Technologies, Inc. All rights reserved.
27b86efd96Sagiri  *
28b86efd96Sagiri  * This software is available to you under a choice of one of two
29b86efd96Sagiri  * licenses.  You may choose to be licensed under the terms of the GNU
30b86efd96Sagiri  * General Public License (GPL) Version 2, available from the file
31b86efd96Sagiri  * COPYING in the main directory of this source tree, or the
32b86efd96Sagiri  * OpenIB.org BSD license below:
33b86efd96Sagiri  *
34b86efd96Sagiri  *     Redistribution and use in source and binary forms, with or
35b86efd96Sagiri  *     without modification, are permitted provided that the following
36b86efd96Sagiri  *     conditions are met:
37b86efd96Sagiri  *
38b86efd96Sagiri  *	- Redistributions of source code must retain the above
39b86efd96Sagiri  *	  copyright notice, this list of conditions and the following
40b86efd96Sagiri  *	  disclaimer.
41b86efd96Sagiri  *
42b86efd96Sagiri  *	- Redistributions in binary form must reproduce the above
43b86efd96Sagiri  *	  copyright notice, this list of conditions and the following
44b86efd96Sagiri  *	  disclaimer in the documentation and/or other materials
45b86efd96Sagiri  *	  provided with the distribution.
46b86efd96Sagiri  *
47b86efd96Sagiri  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
48b86efd96Sagiri  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
49b86efd96Sagiri  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
50b86efd96Sagiri  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
51b86efd96Sagiri  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
52b86efd96Sagiri  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
53b86efd96Sagiri  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
54b86efd96Sagiri  * SOFTWARE.
55b86efd96Sagiri  *
56b86efd96Sagiri  */
57b86efd96Sagiri /*
58b86efd96Sagiri  * Sun elects to include this software in Sun product
59b86efd96Sagiri  * under the OpenIB BSD license.
60b86efd96Sagiri  *
61b86efd96Sagiri  *
62b86efd96Sagiri  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
63b86efd96Sagiri  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
64b86efd96Sagiri  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
65b86efd96Sagiri  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
66b86efd96Sagiri  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
67b86efd96Sagiri  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
68b86efd96Sagiri  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
69b86efd96Sagiri  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
70b86efd96Sagiri  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
71b86efd96Sagiri  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
72b86efd96Sagiri  * POSSIBILITY OF SUCH DAMAGE.
73b86efd96Sagiri  */
74b86efd96Sagiri 
75b86efd96Sagiri #include <sys/stream.h>
76b86efd96Sagiri #include <sys/ib/clients/rds/rdsib_cm.h>
77b86efd96Sagiri #include <sys/ib/clients/rds/rdsib_ib.h>
78b86efd96Sagiri #include <sys/ib/clients/rds/rdsib_buf.h>
79b86efd96Sagiri #include <sys/ib/clients/rds/rdsib_ep.h>
80b86efd96Sagiri #include <sys/ib/clients/rds/rds_kstat.h>
81b86efd96Sagiri #include <sys/zone.h>
82b86efd96Sagiri 
83b86efd96Sagiri #define	RDS_POLL_CQ_IN_2TICKS	1
84b86efd96Sagiri 
85b86efd96Sagiri /*
86b86efd96Sagiri  * This File contains the endpoint related calls
87b86efd96Sagiri  */
88b86efd96Sagiri 
89b86efd96Sagiri extern boolean_t rds_islocal(ipaddr_t addr);
90b86efd96Sagiri extern uint_t rds_wc_signal;
91b86efd96Sagiri 
928257fab9Sagiri #define	RDS_LOOPBACK	0
938257fab9Sagiri #define	RDS_LOCAL	1
948257fab9Sagiri #define	RDS_REMOTE	2
958257fab9Sagiri 
96015f8fffShiremath #define	IBT_IPADDR	1
97015f8fffShiremath 
98b86efd96Sagiri static uint8_t
rds_is_port_marked(rds_session_t * sp,in_port_t port,uint_t qualifier)998257fab9Sagiri rds_is_port_marked(rds_session_t *sp, in_port_t port, uint_t qualifier)
100b86efd96Sagiri {
101b86efd96Sagiri 	uint8_t	ret;
102b86efd96Sagiri 
1038257fab9Sagiri 	switch (qualifier) {
1048257fab9Sagiri 	case RDS_LOOPBACK: /* loopback */
1058257fab9Sagiri 		rw_enter(&rds_loopback_portmap_lock, RW_READER);
1068257fab9Sagiri 		ret = (rds_loopback_portmap[port/8] & (1 << (port % 8)));
1078257fab9Sagiri 		rw_exit(&rds_loopback_portmap_lock);
1088257fab9Sagiri 		break;
1098257fab9Sagiri 
1108257fab9Sagiri 	case RDS_LOCAL: /* Session local */
1118257fab9Sagiri 		ASSERT(sp != NULL);
1128257fab9Sagiri 		rw_enter(&sp->session_local_portmap_lock, RW_READER);
1138257fab9Sagiri 		ret = (sp->session_local_portmap[port/8] & (1 << (port % 8)));
1148257fab9Sagiri 		rw_exit(&sp->session_local_portmap_lock);
1158257fab9Sagiri 		break;
1168257fab9Sagiri 
1178257fab9Sagiri 	case RDS_REMOTE: /* Session remote */
1188257fab9Sagiri 		ASSERT(sp != NULL);
1198257fab9Sagiri 		rw_enter(&sp->session_remote_portmap_lock, RW_READER);
1208257fab9Sagiri 		ret = (sp->session_remote_portmap[port/8] & (1 << (port % 8)));
1218257fab9Sagiri 		rw_exit(&sp->session_remote_portmap_lock);
1228257fab9Sagiri 		break;
123b86efd96Sagiri 	}
124b86efd96Sagiri 
125b86efd96Sagiri 	return (ret);
126b86efd96Sagiri }
127b86efd96Sagiri 
128b86efd96Sagiri static uint8_t
rds_check_n_mark_port(rds_session_t * sp,in_port_t port,uint_t qualifier)1298257fab9Sagiri rds_check_n_mark_port(rds_session_t *sp, in_port_t port, uint_t qualifier)
130b86efd96Sagiri {
131b86efd96Sagiri 	uint8_t	ret;
132b86efd96Sagiri 
1338257fab9Sagiri 	switch (qualifier) {
1348257fab9Sagiri 	case RDS_LOOPBACK: /* loopback */
1358257fab9Sagiri 		rw_enter(&rds_loopback_portmap_lock, RW_WRITER);
1368257fab9Sagiri 		ret = (rds_loopback_portmap[port/8] & (1 << (port % 8)));
137b86efd96Sagiri 		if (!ret) {
138b86efd96Sagiri 			/* port is not marked, mark it */
1398257fab9Sagiri 			rds_loopback_portmap[port/8] =
1408257fab9Sagiri 			    rds_loopback_portmap[port/8] | (1 << (port % 8));
141b86efd96Sagiri 		}
1428257fab9Sagiri 		rw_exit(&rds_loopback_portmap_lock);
1438257fab9Sagiri 		break;
1448257fab9Sagiri 
1458257fab9Sagiri 	case RDS_LOCAL: /* Session local */
1468257fab9Sagiri 		ASSERT(sp != NULL);
1478257fab9Sagiri 		rw_enter(&sp->session_local_portmap_lock, RW_WRITER);
1488257fab9Sagiri 		ret = (sp->session_local_portmap[port/8] & (1 << (port % 8)));
1498257fab9Sagiri 		if (!ret) {
1508257fab9Sagiri 			/* port is not marked, mark it */
1518257fab9Sagiri 			sp->session_local_portmap[port/8] =
1528257fab9Sagiri 			    sp->session_local_portmap[port/8] |
1538257fab9Sagiri 			    (1 << (port % 8));
1548257fab9Sagiri 		}
1558257fab9Sagiri 		rw_exit(&sp->session_local_portmap_lock);
1568257fab9Sagiri 		break;
1578257fab9Sagiri 
1588257fab9Sagiri 	case RDS_REMOTE: /* Session remote */
1598257fab9Sagiri 		ASSERT(sp != NULL);
1608257fab9Sagiri 		rw_enter(&sp->session_remote_portmap_lock, RW_WRITER);
1618257fab9Sagiri 		ret = (sp->session_remote_portmap[port/8] & (1 << (port % 8)));
162b86efd96Sagiri 		if (!ret) {
163b86efd96Sagiri 			/* port is not marked, mark it */
1648257fab9Sagiri 			sp->session_remote_portmap[port/8] =
1658257fab9Sagiri 			    sp->session_remote_portmap[port/8] |
1668257fab9Sagiri 			    (1 << (port % 8));
167b86efd96Sagiri 		}
1688257fab9Sagiri 		rw_exit(&sp->session_remote_portmap_lock);
1698257fab9Sagiri 		break;
170b86efd96Sagiri 	}
171b86efd96Sagiri 
172b86efd96Sagiri 	return (ret);
173b86efd96Sagiri }
174b86efd96Sagiri 
175b86efd96Sagiri static uint8_t
rds_check_n_unmark_port(rds_session_t * sp,in_port_t port,uint_t qualifier)1768257fab9Sagiri rds_check_n_unmark_port(rds_session_t *sp, in_port_t port, uint_t qualifier)
177b86efd96Sagiri {
178b86efd96Sagiri 	uint8_t	ret;
179b86efd96Sagiri 
1808257fab9Sagiri 	switch (qualifier) {
1818257fab9Sagiri 	case RDS_LOOPBACK: /* loopback */
1828257fab9Sagiri 		rw_enter(&rds_loopback_portmap_lock, RW_WRITER);
1838257fab9Sagiri 		ret = (rds_loopback_portmap[port/8] & (1 << (port % 8)));
184b86efd96Sagiri 		if (ret) {
185b86efd96Sagiri 			/* port is marked, unmark it */
1868257fab9Sagiri 			rds_loopback_portmap[port/8] =
1878257fab9Sagiri 			    rds_loopback_portmap[port/8] & ~(1 << (port % 8));
188b86efd96Sagiri 		}
1898257fab9Sagiri 		rw_exit(&rds_loopback_portmap_lock);
1908257fab9Sagiri 		break;
1918257fab9Sagiri 
1928257fab9Sagiri 	case RDS_LOCAL: /* Session local */
1938257fab9Sagiri 		ASSERT(sp != NULL);
1948257fab9Sagiri 		rw_enter(&sp->session_local_portmap_lock, RW_WRITER);
1958257fab9Sagiri 		ret = (sp->session_local_portmap[port/8] & (1 << (port % 8)));
1968257fab9Sagiri 		if (ret) {
1978257fab9Sagiri 			/* port is marked, unmark it */
1988257fab9Sagiri 			sp->session_local_portmap[port/8] =
1998257fab9Sagiri 			    sp->session_local_portmap[port/8] &
2008257fab9Sagiri 			    ~(1 << (port % 8));
2018257fab9Sagiri 		}
2028257fab9Sagiri 		rw_exit(&sp->session_local_portmap_lock);
2038257fab9Sagiri 		break;
2048257fab9Sagiri 
2058257fab9Sagiri 	case RDS_REMOTE: /* Session remote */
2068257fab9Sagiri 		ASSERT(sp != NULL);
2078257fab9Sagiri 		rw_enter(&sp->session_remote_portmap_lock, RW_WRITER);
2088257fab9Sagiri 		ret = (sp->session_remote_portmap[port/8] & (1 << (port % 8)));
209b86efd96Sagiri 		if (ret) {
210b86efd96Sagiri 			/* port is marked, unmark it */
2118257fab9Sagiri 			sp->session_remote_portmap[port/8] =
2128257fab9Sagiri 			    sp->session_remote_portmap[port/8] &
2138257fab9Sagiri 			    ~(1 << (port % 8));
214b86efd96Sagiri 		}
2158257fab9Sagiri 		rw_exit(&sp->session_remote_portmap_lock);
2168257fab9Sagiri 		break;
217b86efd96Sagiri 	}
218b86efd96Sagiri 
219b86efd96Sagiri 	return (ret);
220b86efd96Sagiri }
221b86efd96Sagiri 
222b86efd96Sagiri static void
rds_mark_all_ports(rds_session_t * sp,uint_t qualifier)2238257fab9Sagiri rds_mark_all_ports(rds_session_t *sp, uint_t qualifier)
224b86efd96Sagiri {
2258257fab9Sagiri 	switch (qualifier) {
2268257fab9Sagiri 	case RDS_LOOPBACK: /* loopback */
2278257fab9Sagiri 		rw_enter(&rds_loopback_portmap_lock, RW_WRITER);
2288257fab9Sagiri 		(void) memset(rds_loopback_portmap, 0xFF, RDS_PORT_MAP_SIZE);
2298257fab9Sagiri 		rw_exit(&rds_loopback_portmap_lock);
2308257fab9Sagiri 		break;
2318257fab9Sagiri 
2328257fab9Sagiri 	case RDS_LOCAL: /* Session local */
2338257fab9Sagiri 		ASSERT(sp != NULL);
2348257fab9Sagiri 		rw_enter(&sp->session_local_portmap_lock, RW_WRITER);
2358257fab9Sagiri 		(void) memset(sp->session_local_portmap, 0xFF,
2368257fab9Sagiri 		    RDS_PORT_MAP_SIZE);
2378257fab9Sagiri 		rw_exit(&sp->session_local_portmap_lock);
2388257fab9Sagiri 		break;
2398257fab9Sagiri 
2408257fab9Sagiri 	case RDS_REMOTE: /* Session remote */
2418257fab9Sagiri 		ASSERT(sp != NULL);
2428257fab9Sagiri 		rw_enter(&sp->session_remote_portmap_lock, RW_WRITER);
2438257fab9Sagiri 		(void) memset(sp->session_remote_portmap, 0xFF,
2448257fab9Sagiri 		    RDS_PORT_MAP_SIZE);
2458257fab9Sagiri 		rw_exit(&sp->session_remote_portmap_lock);
2468257fab9Sagiri 		break;
247b86efd96Sagiri 	}
248b86efd96Sagiri }
249b86efd96Sagiri 
250b86efd96Sagiri static void
rds_unmark_all_ports(rds_session_t * sp,uint_t qualifier)2518257fab9Sagiri rds_unmark_all_ports(rds_session_t *sp, uint_t qualifier)
252b86efd96Sagiri {
2538257fab9Sagiri 	switch (qualifier) {
2548257fab9Sagiri 	case RDS_LOOPBACK: /* loopback */
2558257fab9Sagiri 		rw_enter(&rds_loopback_portmap_lock, RW_WRITER);
2568257fab9Sagiri 		bzero(rds_loopback_portmap, RDS_PORT_MAP_SIZE);
2578257fab9Sagiri 		rw_exit(&rds_loopback_portmap_lock);
2588257fab9Sagiri 		break;
2598257fab9Sagiri 
2608257fab9Sagiri 	case RDS_LOCAL: /* Session local */
2618257fab9Sagiri 		ASSERT(sp != NULL);
2628257fab9Sagiri 		rw_enter(&sp->session_local_portmap_lock, RW_WRITER);
2638257fab9Sagiri 		bzero(sp->session_local_portmap, RDS_PORT_MAP_SIZE);
2648257fab9Sagiri 		rw_exit(&sp->session_local_portmap_lock);
2658257fab9Sagiri 		break;
2668257fab9Sagiri 
2678257fab9Sagiri 	case RDS_REMOTE: /* Session remote */
2688257fab9Sagiri 		ASSERT(sp != NULL);
2698257fab9Sagiri 		rw_enter(&sp->session_remote_portmap_lock, RW_WRITER);
2708257fab9Sagiri 		bzero(sp->session_remote_portmap, RDS_PORT_MAP_SIZE);
2718257fab9Sagiri 		rw_exit(&sp->session_remote_portmap_lock);
2728257fab9Sagiri 		break;
273b86efd96Sagiri 	}
274b86efd96Sagiri }
275b86efd96Sagiri 
27674242422Sagiri static boolean_t
rds_add_session(rds_session_t * sp,boolean_t locked)277b86efd96Sagiri rds_add_session(rds_session_t *sp, boolean_t locked)
278b86efd96Sagiri {
27974242422Sagiri 	boolean_t retval = B_TRUE;
28074242422Sagiri 
281b86efd96Sagiri 	RDS_DPRINTF2("rds_add_session", "Enter: SP(%p)", sp);
282b86efd96Sagiri 
283b86efd96Sagiri 	if (!locked) {
284b86efd96Sagiri 		rw_enter(&rdsib_statep->rds_sessionlock, RW_WRITER);
285b86efd96Sagiri 	}
286b86efd96Sagiri 
28774242422Sagiri 	/* Don't allow more sessions than configured in rdsib.conf */
28874242422Sagiri 	if (rdsib_statep->rds_nsessions >= (MaxNodes - 1)) {
28974242422Sagiri 		RDS_DPRINTF1("rds_add_session", "Max session limit reached");
29074242422Sagiri 		retval = B_FALSE;
29174242422Sagiri 	} else {
29274242422Sagiri 		sp->session_nextp = rdsib_statep->rds_sessionlistp;
29374242422Sagiri 		rdsib_statep->rds_sessionlistp = sp;
29474242422Sagiri 		rdsib_statep->rds_nsessions++;
29574242422Sagiri 		RDS_INCR_SESS();
29674242422Sagiri 	}
297b86efd96Sagiri 
298b86efd96Sagiri 	if (!locked) {
299b86efd96Sagiri 		rw_exit(&rdsib_statep->rds_sessionlock);
300b86efd96Sagiri 	}
301b86efd96Sagiri 
302b86efd96Sagiri 	RDS_DPRINTF2("rds_add_session", "Return: SP(%p)", sp);
30374242422Sagiri 
30474242422Sagiri 	return (retval);
305b86efd96Sagiri }
306b86efd96Sagiri 
307b86efd96Sagiri /* Session lookup based on destination IP or destination node guid */
308b86efd96Sagiri rds_session_t *
rds_session_lkup(rds_state_t * statep,ipaddr_t remoteip,ib_guid_t node_guid)309b86efd96Sagiri rds_session_lkup(rds_state_t *statep, ipaddr_t remoteip, ib_guid_t node_guid)
310b86efd96Sagiri {
311b86efd96Sagiri 	rds_session_t	*sp;
312b86efd96Sagiri 
313b86efd96Sagiri 	RDS_DPRINTF4("rds_session_lkup", "Enter: 0x%p 0x%x 0x%llx", statep,
314b86efd96Sagiri 	    remoteip, node_guid);
315b86efd96Sagiri 
316b86efd96Sagiri 	/* A read/write lock is expected, will panic if none of them are held */
317b86efd96Sagiri 	ASSERT(rw_lock_held(&statep->rds_sessionlock));
318b86efd96Sagiri 	sp = statep->rds_sessionlistp;
319b86efd96Sagiri 	while (sp) {
3208257fab9Sagiri 		if ((sp->session_remip == remoteip) || ((node_guid != 0) &&
3218257fab9Sagiri 		    (sp->session_rgid.gid_guid == node_guid))) {
322b86efd96Sagiri 			break;
323b86efd96Sagiri 		}
324b86efd96Sagiri 
325b86efd96Sagiri 		sp = sp->session_nextp;
326b86efd96Sagiri 	}
327b86efd96Sagiri 
328b86efd96Sagiri 	RDS_DPRINTF4("rds_session_lkup", "Return: SP(%p)", sp);
329b86efd96Sagiri 
330b86efd96Sagiri 	return (sp);
331b86efd96Sagiri }
332b86efd96Sagiri 
3335763ba1eSagiri boolean_t
rds_session_lkup_by_sp(rds_session_t * sp)3345763ba1eSagiri rds_session_lkup_by_sp(rds_session_t *sp)
3355763ba1eSagiri {
3365763ba1eSagiri 	rds_session_t *sessionp;
3375763ba1eSagiri 
3385763ba1eSagiri 	RDS_DPRINTF4("rds_session_lkup_by_sp", "Enter: 0x%p", sp);
3395763ba1eSagiri 
3405763ba1eSagiri 	rw_enter(&rdsib_statep->rds_sessionlock, RW_READER);
3415763ba1eSagiri 	sessionp = rdsib_statep->rds_sessionlistp;
3425763ba1eSagiri 	while (sessionp) {
3435763ba1eSagiri 		if (sessionp == sp) {
3445763ba1eSagiri 			rw_exit(&rdsib_statep->rds_sessionlock);
3455763ba1eSagiri 			return (B_TRUE);
3465763ba1eSagiri 		}
3475763ba1eSagiri 
3485763ba1eSagiri 		sessionp = sessionp->session_nextp;
3495763ba1eSagiri 	}
3505763ba1eSagiri 	rw_exit(&rdsib_statep->rds_sessionlock);
3515763ba1eSagiri 
3525763ba1eSagiri 	return (B_FALSE);
3535763ba1eSagiri }
3545763ba1eSagiri 
355b86efd96Sagiri static void
rds_ep_fini(rds_ep_t * ep)356b86efd96Sagiri rds_ep_fini(rds_ep_t *ep)
357b86efd96Sagiri {
358b86efd96Sagiri 	RDS_DPRINTF3("rds_ep_fini", "Enter: EP(%p) type: %d", ep, ep->ep_type);
359b86efd96Sagiri 
360b86efd96Sagiri 	/* free send pool */
361b86efd96Sagiri 	rds_free_send_pool(ep);
362b86efd96Sagiri 
363b86efd96Sagiri 	/* free recv pool */
364b86efd96Sagiri 	rds_free_recv_pool(ep);
365b86efd96Sagiri 
3668257fab9Sagiri 	mutex_enter(&ep->ep_lock);
3678257fab9Sagiri 	ep->ep_hca_guid = 0;
3688257fab9Sagiri 	mutex_exit(&ep->ep_lock);
3698257fab9Sagiri 
370b86efd96Sagiri 	RDS_DPRINTF3("rds_ep_fini", "Return EP(%p)", ep);
371b86efd96Sagiri }
372b86efd96Sagiri 
373b86efd96Sagiri /* Assumes SP write lock is held */
374b86efd96Sagiri int
rds_ep_init(rds_ep_t * ep,ib_guid_t hca_guid)3758257fab9Sagiri rds_ep_init(rds_ep_t *ep, ib_guid_t hca_guid)
376b86efd96Sagiri {
377b86efd96Sagiri 	uint_t		ret;
378b86efd96Sagiri 
379b86efd96Sagiri 	RDS_DPRINTF3("rds_ep_init", "Enter: EP(%p) Type: %d", ep, ep->ep_type);
380b86efd96Sagiri 
381b86efd96Sagiri 	/* send pool */
3828257fab9Sagiri 	ret = rds_init_send_pool(ep, hca_guid);
383b86efd96Sagiri 	if (ret != 0) {
384b86efd96Sagiri 		RDS_DPRINTF2(LABEL, "EP(%p): rds_init_send_pool failed: %d",
385b86efd96Sagiri 		    ep, ret);
386b86efd96Sagiri 		return (-1);
387b86efd96Sagiri 	}
388b86efd96Sagiri 
389b86efd96Sagiri 	/* recv pool */
390b86efd96Sagiri 	ret = rds_init_recv_pool(ep);
391b86efd96Sagiri 	if (ret != 0) {
392b86efd96Sagiri 		RDS_DPRINTF2(LABEL, "EP(%p): rds_init_recv_pool failed: %d",
393b86efd96Sagiri 		    ep, ret);
394b86efd96Sagiri 		rds_free_send_pool(ep);
395b86efd96Sagiri 		return (-1);
396b86efd96Sagiri 	}
397b86efd96Sagiri 
398b86efd96Sagiri 	/* reset the ep state */
399b86efd96Sagiri 	mutex_enter(&ep->ep_lock);
400b86efd96Sagiri 	ep->ep_state = RDS_EP_STATE_UNCONNECTED;
4018257fab9Sagiri 	ep->ep_hca_guid = hca_guid;
402*accc2981SToomas Soome 	ep->ep_lbufid = 0;
403*accc2981SToomas Soome 	ep->ep_rbufid = 0;
404b86efd96Sagiri 	ep->ep_segfbp = NULL;
405b86efd96Sagiri 	ep->ep_seglbp = NULL;
406b86efd96Sagiri 
407b86efd96Sagiri 	/* Initialize the WR to send acknowledgements */
408b86efd96Sagiri 	ep->ep_ackwr.wr_id = RDS_RDMAW_WRID;
409b86efd96Sagiri 	ep->ep_ackwr.wr_flags = IBT_WR_SEND_SOLICIT;
410b86efd96Sagiri 	ep->ep_ackwr.wr_trans = IBT_RC_SRV;
411b86efd96Sagiri 	ep->ep_ackwr.wr_opcode = IBT_WRC_RDMAW;
412b86efd96Sagiri 	ep->ep_ackwr.wr_nds = 1;
413b86efd96Sagiri 	ep->ep_ackwr.wr_sgl = &ep->ep_ackds;
414*accc2981SToomas Soome 	ep->ep_ackwr.wr.rc.rcwr.rdma.rdma_raddr = 0;
415b86efd96Sagiri 	ep->ep_ackwr.wr.rc.rcwr.rdma.rdma_rkey = 0;
416b86efd96Sagiri 	mutex_exit(&ep->ep_lock);
417b86efd96Sagiri 
418b86efd96Sagiri 	RDS_DPRINTF3("rds_ep_init", "Return: EP(%p) type: %d", ep, ep->ep_type);
419b86efd96Sagiri 
420b86efd96Sagiri 	return (0);
421b86efd96Sagiri }
422b86efd96Sagiri 
423c1f8b08eSagiri static int
rds_ep_reinit(rds_ep_t * ep,ib_guid_t hca_guid)424c1f8b08eSagiri rds_ep_reinit(rds_ep_t *ep, ib_guid_t hca_guid)
425c1f8b08eSagiri {
426c1f8b08eSagiri 	int	ret;
427c1f8b08eSagiri 
428c1f8b08eSagiri 	RDS_DPRINTF3("rds_ep_reinit", "Enter: EP(%p) Type: %d",
429c1f8b08eSagiri 	    ep, ep->ep_type);
430c1f8b08eSagiri 
431c1f8b08eSagiri 	/* Re-initialize send pool */
432c1f8b08eSagiri 	ret = rds_reinit_send_pool(ep, hca_guid);
433c1f8b08eSagiri 	if (ret != 0) {
434c1f8b08eSagiri 		RDS_DPRINTF2("rds_ep_reinit",
435c1f8b08eSagiri 		    "EP(%p): rds_reinit_send_pool failed: %d", ep, ret);
436c1f8b08eSagiri 		return (-1);
437c1f8b08eSagiri 	}
438c1f8b08eSagiri 
439c1f8b08eSagiri 	/* free all the receive buffers in the pool */
440c1f8b08eSagiri 	rds_free_recv_pool(ep);
441c1f8b08eSagiri 
442c1f8b08eSagiri 	RDS_DPRINTF3("rds_ep_reinit", "Return: EP(%p) Type: %d",
443c1f8b08eSagiri 	    ep, ep->ep_type);
444c1f8b08eSagiri 
445c1f8b08eSagiri 	return (0);
446c1f8b08eSagiri }
447c1f8b08eSagiri 
448b86efd96Sagiri void
rds_session_fini(rds_session_t * sp)449b86efd96Sagiri rds_session_fini(rds_session_t *sp)
450b86efd96Sagiri {
451b86efd96Sagiri 	RDS_DPRINTF2("rds_session_fini", "Enter: SP(0x%p)", sp);
452b86efd96Sagiri 
453b86efd96Sagiri 	rds_ep_fini(&sp->session_dataep);
454b86efd96Sagiri 	rds_ep_fini(&sp->session_ctrlep);
455b86efd96Sagiri 
456b86efd96Sagiri 	RDS_DPRINTF2("rds_session_fini", "Return: SP(0x%p)", sp);
457b86efd96Sagiri }
458b86efd96Sagiri 
459b86efd96Sagiri /*
460b86efd96Sagiri  * Allocate and initialize the resources needed for the control and
461b86efd96Sagiri  * data channels
462b86efd96Sagiri  */
463b86efd96Sagiri int
rds_session_init(rds_session_t * sp)464b86efd96Sagiri rds_session_init(rds_session_t *sp)
465b86efd96Sagiri {
466b86efd96Sagiri 	int		ret;
4678257fab9Sagiri 	rds_hca_t	*hcap;
4688257fab9Sagiri 	ib_guid_t	hca_guid;
469b86efd96Sagiri 
470b86efd96Sagiri 	RDS_DPRINTF2("rds_session_init", "Enter: SP(0x%p)", sp);
471b86efd96Sagiri 
472b86efd96Sagiri 	/* CALLED WITH SESSION WRITE LOCK */
473b86efd96Sagiri 
4748257fab9Sagiri 	hcap = rds_gid_to_hcap(rdsib_statep, sp->session_lgid);
4758257fab9Sagiri 	if (hcap == NULL) {
47674242422Sagiri 		RDS_DPRINTF2("rds_session_init", "SGID is on an uninitialized "
4778257fab9Sagiri 		    "HCA: %llx", sp->session_lgid.gid_guid);
4788257fab9Sagiri 		return (-1);
4798257fab9Sagiri 	}
4808257fab9Sagiri 
4818257fab9Sagiri 	hca_guid = hcap->hca_guid;
48200a3eaf3SRamaswamy Tummala 	sp->session_hca_guid = hca_guid;
4838257fab9Sagiri 
484b86efd96Sagiri 	/* allocate and initialize the ctrl channel */
4858257fab9Sagiri 	ret = rds_ep_init(&sp->session_ctrlep, hca_guid);
486b86efd96Sagiri 	if (ret != 0) {
487b86efd96Sagiri 		RDS_DPRINTF2(LABEL, "SP(%p): Ctrl EP(%p) initialization "
488b86efd96Sagiri 		    "failed", sp, &sp->session_ctrlep);
489b86efd96Sagiri 		return (-1);
490b86efd96Sagiri 	}
491b86efd96Sagiri 
492b86efd96Sagiri 	RDS_DPRINTF2(LABEL, "SP(%p) Control EP(%p)", sp, &sp->session_ctrlep);
493b86efd96Sagiri 
494b86efd96Sagiri 	/* allocate and initialize the data channel */
4958257fab9Sagiri 	ret = rds_ep_init(&sp->session_dataep, hca_guid);
496b86efd96Sagiri 	if (ret != 0) {
497b86efd96Sagiri 		RDS_DPRINTF2(LABEL, "SP(%p): Data EP(%p) initialization "
498b86efd96Sagiri 		    "failed", sp, &sp->session_dataep);
499b86efd96Sagiri 		rds_ep_fini(&sp->session_ctrlep);
500b86efd96Sagiri 		return (-1);
501b86efd96Sagiri 	}
502b86efd96Sagiri 
5038257fab9Sagiri 	/* Clear the portmaps */
5048257fab9Sagiri 	rds_unmark_all_ports(sp, RDS_LOCAL);
5058257fab9Sagiri 	rds_unmark_all_ports(sp, RDS_REMOTE);
5068257fab9Sagiri 
507b86efd96Sagiri 	RDS_DPRINTF2(LABEL, "SP(%p) Data EP(%p)", sp, &sp->session_dataep);
508b86efd96Sagiri 
509b86efd96Sagiri 	RDS_DPRINTF2("rds_session_init", "Return");
510b86efd96Sagiri 
511b86efd96Sagiri 	return (0);
512b86efd96Sagiri }
513b86efd96Sagiri 
514c1f8b08eSagiri /*
515c1f8b08eSagiri  * This should be called before moving a session from ERROR state to
516c1f8b08eSagiri  * INIT state. This will update the HCA keys incase the session has moved from
517c1f8b08eSagiri  * one HCA to another.
518c1f8b08eSagiri  */
519c1f8b08eSagiri int
rds_session_reinit(rds_session_t * sp,ib_gid_t lgid)520c1f8b08eSagiri rds_session_reinit(rds_session_t *sp, ib_gid_t lgid)
521c1f8b08eSagiri {
522c1f8b08eSagiri 	rds_hca_t	*hcap, *hcap1;
523c1f8b08eSagiri 	int		ret;
524c1f8b08eSagiri 
5250c19630bSagiri 	RDS_DPRINTF2("rds_session_reinit", "Enter: SP(0x%p) - state: %d",
5260c19630bSagiri 	    sp, sp->session_state);
527c1f8b08eSagiri 
528c1f8b08eSagiri 	/* CALLED WITH SESSION WRITE LOCK */
529c1f8b08eSagiri 
530d99cb22fSagiri 	/* Clear the portmaps */
531d99cb22fSagiri 	rds_unmark_all_ports(sp, RDS_LOCAL);
532d99cb22fSagiri 	rds_unmark_all_ports(sp, RDS_REMOTE);
533d99cb22fSagiri 
5340c19630bSagiri 	/* This should not happen but just a safe guard */
535*accc2981SToomas Soome 	if (sp->session_dataep.ep_ack_addr == 0) {
5360c19630bSagiri 		RDS_DPRINTF2("rds_session_reinit",
5370c19630bSagiri 		    "ERROR: Unexpected: SP(0x%p) - state: %d",
5380c19630bSagiri 		    sp, sp->session_state);
5390c19630bSagiri 		return (-1);
5400c19630bSagiri 	}
5410c19630bSagiri 
542d99cb22fSagiri 	/* make the last buffer as the acknowledged */
543d99cb22fSagiri 	*(uintptr_t *)sp->session_dataep.ep_ack_addr =
544d99cb22fSagiri 	    (uintptr_t)sp->session_dataep.ep_sndpool.pool_tailp;
545d99cb22fSagiri 
546c1f8b08eSagiri 	hcap = rds_gid_to_hcap(rdsib_statep, lgid);
547c1f8b08eSagiri 	if (hcap == NULL) {
54874242422Sagiri 		RDS_DPRINTF2("rds_session_reinit", "SGID is on an "
549c1f8b08eSagiri 		    "uninitialized HCA: %llx", lgid.gid_guid);
550c1f8b08eSagiri 		return (-1);
551c1f8b08eSagiri 	}
552c1f8b08eSagiri 
553c1f8b08eSagiri 	hcap1 = rds_gid_to_hcap(rdsib_statep, sp->session_lgid);
554c1f8b08eSagiri 	if (hcap1 == NULL) {
55574242422Sagiri 		RDS_DPRINTF2("rds_session_reinit", "Seems like HCA %llx "
556c1f8b08eSagiri 		    "is unplugged", sp->session_lgid.gid_guid);
557c1f8b08eSagiri 	} else if (hcap->hca_guid == hcap1->hca_guid) {
558c1f8b08eSagiri 		/*
559c1f8b08eSagiri 		 * No action is needed as the session did not move across
560c1f8b08eSagiri 		 * HCAs
561c1f8b08eSagiri 		 */
562c1f8b08eSagiri 		RDS_DPRINTF2("rds_session_reinit", "Failover on the same HCA");
563c1f8b08eSagiri 		return (0);
564c1f8b08eSagiri 	}
565c1f8b08eSagiri 
566c1f8b08eSagiri 	RDS_DPRINTF2("rds_session_reinit", "Failover across HCAs");
567c1f8b08eSagiri 
56800a3eaf3SRamaswamy Tummala 	sp->session_hca_guid = hcap->hca_guid;
56900a3eaf3SRamaswamy Tummala 
570c1f8b08eSagiri 	/* re-initialize the control channel */
571c1f8b08eSagiri 	ret = rds_ep_reinit(&sp->session_ctrlep, hcap->hca_guid);
572c1f8b08eSagiri 	if (ret != 0) {
573c1f8b08eSagiri 		RDS_DPRINTF2("rds_session_reinit",
574c1f8b08eSagiri 		    "SP(%p): Ctrl EP(%p) re-initialization failed",
575c1f8b08eSagiri 		    sp, &sp->session_ctrlep);
576c1f8b08eSagiri 		return (-1);
577c1f8b08eSagiri 	}
578c1f8b08eSagiri 
579c1f8b08eSagiri 	RDS_DPRINTF2("rds_session_reinit", "SP(%p) Control EP(%p)",
580c1f8b08eSagiri 	    sp, &sp->session_ctrlep);
581c1f8b08eSagiri 
582c1f8b08eSagiri 	/* re-initialize the data channel */
583c1f8b08eSagiri 	ret = rds_ep_reinit(&sp->session_dataep, hcap->hca_guid);
584c1f8b08eSagiri 	if (ret != 0) {
585c1f8b08eSagiri 		RDS_DPRINTF2("rds_session_reinit",
586c1f8b08eSagiri 		    "SP(%p): Data EP(%p) re-initialization failed",
587c1f8b08eSagiri 		    sp, &sp->session_dataep);
588c1f8b08eSagiri 		return (-1);
589c1f8b08eSagiri 	}
590c1f8b08eSagiri 
591c1f8b08eSagiri 	RDS_DPRINTF2("rds_session_reinit", "SP(%p) Data EP(%p)",
592c1f8b08eSagiri 	    sp, &sp->session_dataep);
593c1f8b08eSagiri 
594c1f8b08eSagiri 	sp->session_lgid = lgid;
595c1f8b08eSagiri 
596c1f8b08eSagiri 	RDS_DPRINTF2("rds_session_reinit", "Return: SP(0x%p)", sp);
597c1f8b08eSagiri 
598c1f8b08eSagiri 	return (0);
599c1f8b08eSagiri }
600c1f8b08eSagiri 
601b86efd96Sagiri static int
rds_session_connect(rds_session_t * sp)602b86efd96Sagiri rds_session_connect(rds_session_t *sp)
603b86efd96Sagiri {
604b86efd96Sagiri 	ibt_channel_hdl_t	ctrlchan, datachan;
605b86efd96Sagiri 	rds_ep_t		*ep;
606b86efd96Sagiri 	int			ret;
607b86efd96Sagiri 
608b86efd96Sagiri 	RDS_DPRINTF2("rds_session_connect", "Enter SP(%p)", sp);
609b86efd96Sagiri 
610015f8fffShiremath 	sp->session_pinfo.pi_sid = rdsib_statep->rds_service_id;
611b86efd96Sagiri 
612b86efd96Sagiri 	/* Override the packet life time based on the conf file */
613b86efd96Sagiri 	if (IBPktLifeTime != 0) {
614015f8fffShiremath 		sp->session_pinfo.pi_prim_cep_path.cep_cm_opaque1 =
615015f8fffShiremath 		    IBPktLifeTime;
616b86efd96Sagiri 	}
617b86efd96Sagiri 
618b86efd96Sagiri 	/* Session type may change if we run into peer-to-peer case. */
619b86efd96Sagiri 	rw_enter(&sp->session_lock, RW_READER);
620b86efd96Sagiri 	if (sp->session_type == RDS_SESSION_PASSIVE) {
621b86efd96Sagiri 		RDS_DPRINTF2("rds_session_connect", "SP(%p) is no longer the "
622b86efd96Sagiri 		    "active end", sp);
623b86efd96Sagiri 		rw_exit(&sp->session_lock);
624b86efd96Sagiri 		return (0); /* return success */
625b86efd96Sagiri 	}
626b86efd96Sagiri 	rw_exit(&sp->session_lock);
627b86efd96Sagiri 
628b86efd96Sagiri 	/* connect the data ep first */
629b86efd96Sagiri 	ep = &sp->session_dataep;
630b86efd96Sagiri 	mutex_enter(&ep->ep_lock);
631b86efd96Sagiri 	if (ep->ep_state == RDS_EP_STATE_UNCONNECTED) {
632b86efd96Sagiri 		ep->ep_state = RDS_EP_STATE_ACTIVE_PENDING;
633b86efd96Sagiri 		mutex_exit(&ep->ep_lock);
634015f8fffShiremath 		ret = rds_open_rc_channel(ep, &sp->session_pinfo, IBT_BLOCKING,
635015f8fffShiremath 		    &datachan);
636b86efd96Sagiri 		if (ret != IBT_SUCCESS) {
637b86efd96Sagiri 			RDS_DPRINTF2(LABEL, "EP(%p): rds_open_rc_channel "
638c1f8b08eSagiri 			    "failed: %d", ep, ret);
639b86efd96Sagiri 			return (-1);
640b86efd96Sagiri 		}
641b86efd96Sagiri 		sp->session_dataep.ep_chanhdl = datachan;
642b86efd96Sagiri 	} else {
643b86efd96Sagiri 		RDS_DPRINTF2(LABEL, "SP(%p) Data EP(%p) is in "
644b86efd96Sagiri 		    "unexpected state: %d", sp, ep, ep->ep_state);
645b86efd96Sagiri 		mutex_exit(&ep->ep_lock);
646b86efd96Sagiri 		return (-1);
647b86efd96Sagiri 	}
648b86efd96Sagiri 
649b86efd96Sagiri 	RDS_DPRINTF3(LABEL, "SP(%p) EP(%p): Data channel is connected",
650b86efd96Sagiri 	    sp, ep);
651b86efd96Sagiri 
652b86efd96Sagiri 	ep = &sp->session_ctrlep;
653b86efd96Sagiri 	mutex_enter(&ep->ep_lock);
654b86efd96Sagiri 	if (ep->ep_state == RDS_EP_STATE_UNCONNECTED) {
655b86efd96Sagiri 		ep->ep_state = RDS_EP_STATE_ACTIVE_PENDING;
656b86efd96Sagiri 		mutex_exit(&ep->ep_lock);
657015f8fffShiremath 		ret = rds_open_rc_channel(ep, &sp->session_pinfo, IBT_BLOCKING,
658015f8fffShiremath 		    &ctrlchan);
659b86efd96Sagiri 		if (ret != IBT_SUCCESS) {
660b86efd96Sagiri 			RDS_DPRINTF2(LABEL, "EP(%p): rds_open_rc_channel "
661b86efd96Sagiri 			    "failed: %d", ep, ret);
662b86efd96Sagiri 			return (-1);
663b86efd96Sagiri 		}
664b86efd96Sagiri 		sp->session_ctrlep.ep_chanhdl = ctrlchan;
665b86efd96Sagiri 	} else {
666b86efd96Sagiri 		RDS_DPRINTF2(LABEL, "SP(%p) Control EP(%p) is in "
667b86efd96Sagiri 		    "unexpected state: %d", sp, ep, ep->ep_state);
668b86efd96Sagiri 		mutex_exit(&ep->ep_lock);
669b86efd96Sagiri 		return (-1);
670b86efd96Sagiri 	}
671b86efd96Sagiri 
672c1f8b08eSagiri 	RDS_DPRINTF2(LABEL, "Session (%p) 0x%x <--> 0x%x is CONNECTED",
673c1f8b08eSagiri 	    sp, sp->session_myip, sp->session_remip);
674c1f8b08eSagiri 
675b86efd96Sagiri 	RDS_DPRINTF2("rds_session_connect", "Return SP(%p)", sp);
676b86efd96Sagiri 
677b86efd96Sagiri 	return (0);
678b86efd96Sagiri }
679b86efd96Sagiri 
680b86efd96Sagiri /*
681b86efd96Sagiri  * Can be called with or without session_lock.
682b86efd96Sagiri  */
683b86efd96Sagiri void
rds_session_close(rds_session_t * sp,ibt_execution_mode_t mode,uint_t wait)684b86efd96Sagiri rds_session_close(rds_session_t *sp, ibt_execution_mode_t mode, uint_t wait)
685b86efd96Sagiri {
686b86efd96Sagiri 	rds_ep_t		*ep;
687b86efd96Sagiri 
688b86efd96Sagiri 	RDS_DPRINTF2("rds_session_close", "SP(%p) State: %d", sp,
689b86efd96Sagiri 	    sp->session_state);
690b86efd96Sagiri 
691b86efd96Sagiri 	ep = &sp->session_dataep;
692b86efd96Sagiri 	RDS_DPRINTF3(LABEL, "EP(%p) State: %d", ep, ep->ep_state);
693b86efd96Sagiri 
694b86efd96Sagiri 	/* wait until the SQ is empty before closing */
695d99cb22fSagiri 	if (wait != 0) {
696d99cb22fSagiri 		(void) rds_is_sendq_empty(ep, wait);
697d99cb22fSagiri 	}
698b86efd96Sagiri 
699b86efd96Sagiri 	mutex_enter(&ep->ep_lock);
700b86efd96Sagiri 	while (ep->ep_state == RDS_EP_STATE_CLOSING) {
701b86efd96Sagiri 		mutex_exit(&ep->ep_lock);
702b86efd96Sagiri 		delay(drv_usectohz(300000));
703b86efd96Sagiri 		mutex_enter(&ep->ep_lock);
704b86efd96Sagiri 	}
705b86efd96Sagiri 
706b86efd96Sagiri 	if (ep->ep_state == RDS_EP_STATE_CONNECTED) {
707b86efd96Sagiri 		ep->ep_state = RDS_EP_STATE_CLOSING;
708b86efd96Sagiri 		mutex_exit(&ep->ep_lock);
709b86efd96Sagiri 		(void) rds_close_rc_channel(ep->ep_chanhdl, mode);
710d99cb22fSagiri 		if (wait == 0) {
711d99cb22fSagiri 			/* make sure all WCs are flushed before proceeding */
712d99cb22fSagiri 			(void) rds_is_sendq_empty(ep, 1);
713d99cb22fSagiri 		}
714b86efd96Sagiri 		mutex_enter(&ep->ep_lock);
715b86efd96Sagiri 	}
716b86efd96Sagiri 	rds_ep_free_rc_channel(ep);
717b86efd96Sagiri 	ep->ep_state = RDS_EP_STATE_UNCONNECTED;
718b86efd96Sagiri 	ep->ep_segfbp = NULL;
719b86efd96Sagiri 	ep->ep_seglbp = NULL;
720b86efd96Sagiri 	mutex_exit(&ep->ep_lock);
721b86efd96Sagiri 
722b86efd96Sagiri 	ep = &sp->session_ctrlep;
723b86efd96Sagiri 	RDS_DPRINTF3(LABEL, "EP(%p) State: %d", ep, ep->ep_state);
724b86efd96Sagiri 
725b86efd96Sagiri 	/* wait until the SQ is empty before closing */
726d99cb22fSagiri 	if (wait != 0) {
727d99cb22fSagiri 		(void) rds_is_sendq_empty(ep, wait);
728d99cb22fSagiri 	}
729b86efd96Sagiri 
730b86efd96Sagiri 	mutex_enter(&ep->ep_lock);
731b86efd96Sagiri 	while (ep->ep_state == RDS_EP_STATE_CLOSING) {
732b86efd96Sagiri 		mutex_exit(&ep->ep_lock);
733b86efd96Sagiri 		delay(drv_usectohz(300000));
734b86efd96Sagiri 		mutex_enter(&ep->ep_lock);
735b86efd96Sagiri 	}
736b86efd96Sagiri 
737b86efd96Sagiri 	if (ep->ep_state == RDS_EP_STATE_CONNECTED) {
738b86efd96Sagiri 		ep->ep_state = RDS_EP_STATE_CLOSING;
739015f8fffShiremath 		mutex_exit(&ep->ep_lock);
740b86efd96Sagiri 		(void) rds_close_rc_channel(ep->ep_chanhdl, mode);
741d99cb22fSagiri 		if (wait == 0) {
742d99cb22fSagiri 			/* make sure all WCs are flushed before proceeding */
743d99cb22fSagiri 			(void) rds_is_sendq_empty(ep, 1);
744d99cb22fSagiri 		}
745b86efd96Sagiri 		mutex_enter(&ep->ep_lock);
746b86efd96Sagiri 	}
747b86efd96Sagiri 	rds_ep_free_rc_channel(ep);
748b86efd96Sagiri 	ep->ep_state = RDS_EP_STATE_UNCONNECTED;
749b86efd96Sagiri 	ep->ep_segfbp = NULL;
750b86efd96Sagiri 	ep->ep_seglbp = NULL;
751b86efd96Sagiri 	mutex_exit(&ep->ep_lock);
752b86efd96Sagiri 
753b86efd96Sagiri 	RDS_DPRINTF2("rds_session_close", "Return (%p)", sp);
754b86efd96Sagiri }
755b86efd96Sagiri 
756b86efd96Sagiri /* Free the session */
757b86efd96Sagiri static void
rds_destroy_session(rds_session_t * sp)758b86efd96Sagiri rds_destroy_session(rds_session_t *sp)
759b86efd96Sagiri {
760b86efd96Sagiri 	rds_ep_t	*ep;
761b86efd96Sagiri 	rds_bufpool_t	*pool;
762b86efd96Sagiri 
763b86efd96Sagiri 	ASSERT((sp->session_state == RDS_SESSION_STATE_CLOSED) ||
764b86efd96Sagiri 	    (sp->session_state == RDS_SESSION_STATE_FAILED) ||
765b86efd96Sagiri 	    (sp->session_state == RDS_SESSION_STATE_FINI) ||
766b86efd96Sagiri 	    (sp->session_state == RDS_SESSION_STATE_PASSIVE_CLOSING));
767b86efd96Sagiri 
768b86efd96Sagiri 	rw_enter(&sp->session_lock, RW_READER);
769b86efd96Sagiri 	RDS_DPRINTF2("rds_destroy_session", "SP(%p) State: %d", sp,
770b86efd96Sagiri 	    sp->session_state);
771b86efd96Sagiri 	while (!((sp->session_state == RDS_SESSION_STATE_CLOSED) ||
772b86efd96Sagiri 	    (sp->session_state == RDS_SESSION_STATE_FAILED) ||
773b86efd96Sagiri 	    (sp->session_state == RDS_SESSION_STATE_FINI))) {
774b86efd96Sagiri 		rw_exit(&sp->session_lock);
775b86efd96Sagiri 		delay(drv_usectohz(1000000));
776b86efd96Sagiri 		rw_enter(&sp->session_lock, RW_READER);
777b86efd96Sagiri 		RDS_DPRINTF2("rds_destroy_session", "SP(%p) State: %d WAITING "
778b86efd96Sagiri 		    "ON SESSION", sp, sp->session_state);
779b86efd96Sagiri 	}
780b86efd96Sagiri 	rw_exit(&sp->session_lock);
781b86efd96Sagiri 
782b86efd96Sagiri 	/* data channel */
783b86efd96Sagiri 	ep = &sp->session_dataep;
784b86efd96Sagiri 
785b86efd96Sagiri 	/* send pool locks */
786b86efd96Sagiri 	pool = &ep->ep_sndpool;
787b86efd96Sagiri 	cv_destroy(&pool->pool_cv);
788b86efd96Sagiri 	mutex_destroy(&pool->pool_lock);
789b86efd96Sagiri 
790b86efd96Sagiri 	/* recv pool locks */
791b86efd96Sagiri 	pool = &ep->ep_rcvpool;
792b86efd96Sagiri 	cv_destroy(&pool->pool_cv);
793b86efd96Sagiri 	mutex_destroy(&pool->pool_lock);
794b86efd96Sagiri 	mutex_destroy(&ep->ep_recvqp.qp_lock);
795b86efd96Sagiri 
796b86efd96Sagiri 	/* control channel */
797b86efd96Sagiri 	ep = &sp->session_ctrlep;
798b86efd96Sagiri 
799b86efd96Sagiri 	/* send pool locks */
800b86efd96Sagiri 	pool = &ep->ep_sndpool;
801b86efd96Sagiri 	cv_destroy(&pool->pool_cv);
802b86efd96Sagiri 	mutex_destroy(&pool->pool_lock);
803b86efd96Sagiri 
804b86efd96Sagiri 	/* recv pool locks */
805b86efd96Sagiri 	pool = &ep->ep_rcvpool;
806b86efd96Sagiri 	cv_destroy(&pool->pool_cv);
807b86efd96Sagiri 	mutex_destroy(&pool->pool_lock);
808b86efd96Sagiri 	mutex_destroy(&ep->ep_recvqp.qp_lock);
809b86efd96Sagiri 
810b86efd96Sagiri 	/* session */
811b86efd96Sagiri 	rw_destroy(&sp->session_lock);
8128257fab9Sagiri 	rw_destroy(&sp->session_local_portmap_lock);
8138257fab9Sagiri 	rw_destroy(&sp->session_remote_portmap_lock);
814b86efd96Sagiri 
815b86efd96Sagiri 	/* free the session */
816b86efd96Sagiri 	kmem_free(sp, sizeof (rds_session_t));
817b86efd96Sagiri 
818b86efd96Sagiri 	RDS_DPRINTF2("rds_destroy_session", "SP(%p) Return", sp);
819b86efd96Sagiri }
820b86efd96Sagiri 
821b86efd96Sagiri /* This is called on the taskq thread */
82200a3eaf3SRamaswamy Tummala void
rds_failover_session(void * arg)823b86efd96Sagiri rds_failover_session(void *arg)
824b86efd96Sagiri {
825b86efd96Sagiri 	rds_session_t	*sp = (rds_session_t *)arg;
826b86efd96Sagiri 	ib_gid_t	lgid, rgid;
827b86efd96Sagiri 	ipaddr_t	myip, remip;
828b86efd96Sagiri 	int		ret, cnt = 0;
82900a3eaf3SRamaswamy Tummala 	uint8_t		sp_state;
830b86efd96Sagiri 
831b86efd96Sagiri 	RDS_DPRINTF2("rds_failover_session", "Enter: (%p)", sp);
832b86efd96Sagiri 
8335763ba1eSagiri 	/* Make sure the session is still alive */
8345763ba1eSagiri 	if (rds_session_lkup_by_sp(sp) == B_FALSE) {
8355763ba1eSagiri 		RDS_DPRINTF2("rds_failover_session",
8365763ba1eSagiri 		    "Return: SP(%p) not ALIVE", sp);
8375763ba1eSagiri 		return;
8385763ba1eSagiri 	}
8395763ba1eSagiri 
840b86efd96Sagiri 	RDS_INCR_FAILOVERS();
841b86efd96Sagiri 
842b86efd96Sagiri 	rw_enter(&sp->session_lock, RW_WRITER);
843b86efd96Sagiri 	if (sp->session_type != RDS_SESSION_ACTIVE) {
844b86efd96Sagiri 		/*
845b86efd96Sagiri 		 * The remote side must have seen the error and initiated
846b86efd96Sagiri 		 * a re-connect.
847b86efd96Sagiri 		 */
848b86efd96Sagiri 		RDS_DPRINTF2("rds_failover_session",
849b86efd96Sagiri 		    "SP(%p) has become passive", sp);
850b86efd96Sagiri 		rw_exit(&sp->session_lock);
851b86efd96Sagiri 		return;
852b86efd96Sagiri 	}
8538257fab9Sagiri 	sp->session_failover = 1;
85400a3eaf3SRamaswamy Tummala 	sp_state = sp->session_state;
855b86efd96Sagiri 	rw_exit(&sp->session_lock);
856b86efd96Sagiri 
857b86efd96Sagiri 	/*
858b86efd96Sagiri 	 * The session is in ERROR state but close both channels
859b86efd96Sagiri 	 * for a clean start.
860b86efd96Sagiri 	 */
86100a3eaf3SRamaswamy Tummala 	if (sp_state == RDS_SESSION_STATE_ERROR) {
86200a3eaf3SRamaswamy Tummala 		rds_session_close(sp, IBT_BLOCKING, 1);
86300a3eaf3SRamaswamy Tummala 	}
864b86efd96Sagiri 
865b86efd96Sagiri 	/* wait 1 sec before re-connecting */
866b86efd96Sagiri 	delay(drv_usectohz(1000000));
867b86efd96Sagiri 
868b86efd96Sagiri 	do {
869015f8fffShiremath 		ibt_ip_path_attr_t	ipattr;
870015f8fffShiremath 		ibt_ip_addr_t		dstip;
871015f8fffShiremath 
872b86efd96Sagiri 		/* The ipaddr should be in the network order */
873b86efd96Sagiri 		myip = sp->session_myip;
874b86efd96Sagiri 		remip = sp->session_remip;
875b86efd96Sagiri 		ret = rds_sc_path_lookup(&myip, &remip);
876b86efd96Sagiri 		if (ret == 0) {
877b86efd96Sagiri 			RDS_DPRINTF2(LABEL, "Path not found (0x%x 0x%x)",
878b86efd96Sagiri 			    myip, remip);
879b86efd96Sagiri 		}
880b86efd96Sagiri 		/* check if we have (new) path from the source to destination */
8818257fab9Sagiri 		lgid.gid_prefix = 0;
8828257fab9Sagiri 		lgid.gid_guid = 0;
8838257fab9Sagiri 		rgid.gid_prefix = 0;
8848257fab9Sagiri 		rgid.gid_guid = 0;
885015f8fffShiremath 
886015f8fffShiremath 		bzero(&ipattr, sizeof (ibt_ip_path_attr_t));
887015f8fffShiremath 		dstip.family = AF_INET;
888d22e11ebSBill Taylor 		dstip.un.ip4addr = remip;
889015f8fffShiremath 		ipattr.ipa_dst_ip = &dstip;
890015f8fffShiremath 		ipattr.ipa_src_ip.family = AF_INET;
891d22e11ebSBill Taylor 		ipattr.ipa_src_ip.un.ip4addr = myip;
892015f8fffShiremath 		ipattr.ipa_ndst = 1;
893015f8fffShiremath 		ipattr.ipa_max_paths = 1;
894015f8fffShiremath 		RDS_DPRINTF2(LABEL, "ibt_get_ip_paths: 0x%x <-> 0x%x ",
895015f8fffShiremath 		    myip, remip);
896015f8fffShiremath 		ret = ibt_get_ip_paths(rdsib_statep->rds_ibhdl,
897015f8fffShiremath 		    IBT_PATH_NO_FLAGS, &ipattr, &sp->session_pinfo, NULL, NULL);
898015f8fffShiremath 		if (ret == IBT_SUCCESS) {
899015f8fffShiremath 			RDS_DPRINTF2(LABEL, "ibt_get_ip_paths success");
900015f8fffShiremath 			lgid = sp->session_pinfo.
901015f8fffShiremath 			    pi_prim_cep_path.cep_adds_vect.av_sgid;
902015f8fffShiremath 			rgid = sp->session_pinfo.
903015f8fffShiremath 			    pi_prim_cep_path.cep_adds_vect.av_dgid;
904b86efd96Sagiri 			break;
905b86efd96Sagiri 		}
906b86efd96Sagiri 
90774242422Sagiri 		RDS_DPRINTF2(LABEL, "ibt_get_ip_paths failed, ret: %d ", ret);
9088257fab9Sagiri 
909b86efd96Sagiri 		/* wait 1 sec before re-trying */
910b86efd96Sagiri 		delay(drv_usectohz(1000000));
911b86efd96Sagiri 		cnt++;
9128257fab9Sagiri 	} while (cnt < 5);
913b86efd96Sagiri 
914015f8fffShiremath 	if (ret != IBT_SUCCESS) {
915b86efd96Sagiri 		rw_enter(&sp->session_lock, RW_WRITER);
916b86efd96Sagiri 		if (sp->session_type == RDS_SESSION_ACTIVE) {
917b86efd96Sagiri 			rds_session_fini(sp);
918b86efd96Sagiri 			sp->session_state = RDS_SESSION_STATE_FAILED;
9198257fab9Sagiri 			sp->session_failover = 0;
920c1f8b08eSagiri 			RDS_DPRINTF3("rds_failover_session",
921c1f8b08eSagiri 			    "SP(%p) State RDS_SESSION_STATE_FAILED", sp);
922b86efd96Sagiri 		} else {
923b86efd96Sagiri 			RDS_DPRINTF2("rds_failover_session",
924b86efd96Sagiri 			    "SP(%p) has become passive", sp);
925b86efd96Sagiri 		}
926b86efd96Sagiri 		rw_exit(&sp->session_lock);
927b86efd96Sagiri 		return;
928b86efd96Sagiri 	}
929b86efd96Sagiri 
930b86efd96Sagiri 	RDS_DPRINTF2(LABEL, "lgid: %llx:%llx rgid: %llx:%llx",
931b86efd96Sagiri 	    lgid.gid_prefix, lgid.gid_guid, rgid.gid_prefix,
932b86efd96Sagiri 	    rgid.gid_guid);
933b86efd96Sagiri 
934b86efd96Sagiri 	rw_enter(&sp->session_lock, RW_WRITER);
935b86efd96Sagiri 	if (sp->session_type != RDS_SESSION_ACTIVE) {
936b86efd96Sagiri 		/*
937b86efd96Sagiri 		 * The remote side must have seen the error and initiated
938b86efd96Sagiri 		 * a re-connect.
939b86efd96Sagiri 		 */
940b86efd96Sagiri 		RDS_DPRINTF2("rds_failover_session",
941b86efd96Sagiri 		    "SP(%p) has become passive", sp);
942b86efd96Sagiri 		rw_exit(&sp->session_lock);
943b86efd96Sagiri 		return;
944b86efd96Sagiri 	}
945b86efd96Sagiri 
946b86efd96Sagiri 	/* move the session to init state */
947c1f8b08eSagiri 	ret = rds_session_reinit(sp, lgid);
948b86efd96Sagiri 	sp->session_lgid = lgid;
949b86efd96Sagiri 	sp->session_rgid = rgid;
950c1f8b08eSagiri 	if (ret != 0) {
951c1f8b08eSagiri 		rds_session_fini(sp);
952c1f8b08eSagiri 		sp->session_state = RDS_SESSION_STATE_FAILED;
9538257fab9Sagiri 		sp->session_failover = 0;
954c1f8b08eSagiri 		RDS_DPRINTF3("rds_failover_session",
955c1f8b08eSagiri 		    "SP(%p) State RDS_SESSION_STATE_FAILED", sp);
956c1f8b08eSagiri 		rw_exit(&sp->session_lock);
957c1f8b08eSagiri 		return;
958c1f8b08eSagiri 	} else {
959c1f8b08eSagiri 		sp->session_state = RDS_SESSION_STATE_INIT;
960c1f8b08eSagiri 		RDS_DPRINTF3("rds_failover_session",
961c1f8b08eSagiri 		    "SP(%p) State RDS_SESSION_STATE_INIT", sp);
962c1f8b08eSagiri 	}
963b86efd96Sagiri 	rw_exit(&sp->session_lock);
964b86efd96Sagiri 
965b86efd96Sagiri 	rds_session_open(sp);
966b86efd96Sagiri 
967b86efd96Sagiri 	RDS_DPRINTF2("rds_failover_session", "Return: (%p)", sp);
968b86efd96Sagiri }
969b86efd96Sagiri 
970b86efd96Sagiri void
rds_handle_send_error(rds_ep_t * ep)971b86efd96Sagiri rds_handle_send_error(rds_ep_t *ep)
972b86efd96Sagiri {
973b86efd96Sagiri 	if (rds_is_sendq_empty(ep, 0)) {
974b86efd96Sagiri 		/* Session should already be in ERROR, try to reconnect */
975b86efd96Sagiri 		RDS_DPRINTF2("rds_handle_send_error",
976b86efd96Sagiri 		    "Dispatching taskq to failover SP(%p)", ep->ep_sp);
977b86efd96Sagiri 		(void) ddi_taskq_dispatch(rds_taskq, rds_failover_session,
978b86efd96Sagiri 		    (void *)ep->ep_sp, DDI_SLEEP);
979b86efd96Sagiri 	}
980b86efd96Sagiri }
981b86efd96Sagiri 
982b86efd96Sagiri /*
983b86efd96Sagiri  * Called in the CM handler on the passive side
984b86efd96Sagiri  * Called on a taskq thread.
985b86efd96Sagiri  */
986b86efd96Sagiri void
rds_cleanup_passive_session(void * arg)987b86efd96Sagiri rds_cleanup_passive_session(void *arg)
988b86efd96Sagiri {
989b86efd96Sagiri 	rds_session_t	*sp = arg;
990b86efd96Sagiri 
991b86efd96Sagiri 	RDS_DPRINTF2("rds_cleanup_passive_session", "SP(%p) State: %d", sp,
992b86efd96Sagiri 	    sp->session_state);
993b86efd96Sagiri 	ASSERT((sp->session_state == RDS_SESSION_STATE_CLOSED) ||
994b86efd96Sagiri 	    (sp->session_state == RDS_SESSION_STATE_ERROR));
995b86efd96Sagiri 
996b86efd96Sagiri 	rds_session_close(sp, IBT_BLOCKING, 1);
997b86efd96Sagiri 
998b86efd96Sagiri 	rw_enter(&sp->session_lock, RW_WRITER);
999b86efd96Sagiri 	if (sp->session_state == RDS_SESSION_STATE_CLOSED) {
1000b86efd96Sagiri 		rds_session_fini(sp);
1001b86efd96Sagiri 		sp->session_state = RDS_SESSION_STATE_FINI;
10028257fab9Sagiri 		sp->session_failover = 0;
1003b86efd96Sagiri 		RDS_DPRINTF3("rds_cleanup_passive_session",
1004b86efd96Sagiri 		    "SP(%p) State RDS_SESSION_STATE_FINI", sp);
1005b86efd96Sagiri 	} else if (sp->session_state == RDS_SESSION_STATE_ERROR) {
1006b86efd96Sagiri 		rds_session_fini(sp);
1007b86efd96Sagiri 		sp->session_state = RDS_SESSION_STATE_FAILED;
10088257fab9Sagiri 		sp->session_failover = 0;
1009b86efd96Sagiri 		RDS_DPRINTF3("rds_cleanup_passive_session",
1010b86efd96Sagiri 		    "SP(%p) State RDS_SESSION_STATE_FAILED", sp);
1011b86efd96Sagiri 	}
1012b86efd96Sagiri 	rw_exit(&sp->session_lock);
1013b86efd96Sagiri 
1014b86efd96Sagiri 	RDS_DPRINTF2("rds_cleanup_passive_session", "Return: SP (%p)", sp);
1015b86efd96Sagiri }
1016b86efd96Sagiri 
1017b86efd96Sagiri /*
1018b86efd96Sagiri  * Called by the CM handler on the passive side
1019b86efd96Sagiri  * Called with WRITE lock on the session
1020b86efd96Sagiri  */
1021b86efd96Sagiri void
rds_passive_session_fini(rds_session_t * sp)1022b86efd96Sagiri rds_passive_session_fini(rds_session_t *sp)
1023b86efd96Sagiri {
1024b86efd96Sagiri 	rds_ep_t	*ep;
1025b86efd96Sagiri 
1026b86efd96Sagiri 	RDS_DPRINTF2("rds_passive_session_fini", "SP(%p) State: %d", sp,
1027b86efd96Sagiri 	    sp->session_state);
1028b86efd96Sagiri 	ASSERT((sp->session_state == RDS_SESSION_STATE_CLOSED) ||
1029b86efd96Sagiri 	    (sp->session_state == RDS_SESSION_STATE_ERROR));
1030b86efd96Sagiri 
1031b86efd96Sagiri 	/* clean the data channel */
1032b86efd96Sagiri 	ep = &sp->session_dataep;
1033b86efd96Sagiri 	(void) rds_is_sendq_empty(ep, 1);
1034b86efd96Sagiri 	mutex_enter(&ep->ep_lock);
1035b86efd96Sagiri 	RDS_DPRINTF2("rds_passive_session_fini", "EP(%p) State: %d", ep,
1036b86efd96Sagiri 	    ep->ep_state);
1037b86efd96Sagiri 	rds_ep_free_rc_channel(ep);
1038b86efd96Sagiri 	mutex_exit(&ep->ep_lock);
1039b86efd96Sagiri 
1040b86efd96Sagiri 	/* clean the control channel */
1041b86efd96Sagiri 	ep = &sp->session_ctrlep;
1042b86efd96Sagiri 	(void) rds_is_sendq_empty(ep, 1);
1043b86efd96Sagiri 	mutex_enter(&ep->ep_lock);
1044b86efd96Sagiri 	RDS_DPRINTF2("rds_passive_session_fini", "EP(%p) State: %d", ep,
1045b86efd96Sagiri 	    ep->ep_state);
1046b86efd96Sagiri 	rds_ep_free_rc_channel(ep);
1047b86efd96Sagiri 	mutex_exit(&ep->ep_lock);
1048b86efd96Sagiri 
1049b86efd96Sagiri 	rds_session_fini(sp);
10508257fab9Sagiri 	sp->session_failover = 0;
1051b86efd96Sagiri 
1052b86efd96Sagiri 	RDS_DPRINTF2("rds_passive_session_fini", "Return: SP (%p)", sp);
1053b86efd96Sagiri }
1054b86efd96Sagiri 
105500a3eaf3SRamaswamy Tummala void
rds_close_this_session(rds_session_t * sp,uint8_t wait)105600a3eaf3SRamaswamy Tummala rds_close_this_session(rds_session_t *sp, uint8_t wait)
105700a3eaf3SRamaswamy Tummala {
105800a3eaf3SRamaswamy Tummala 	switch (sp->session_state) {
105900a3eaf3SRamaswamy Tummala 	case RDS_SESSION_STATE_CONNECTED:
106000a3eaf3SRamaswamy Tummala 		sp->session_state = RDS_SESSION_STATE_ACTIVE_CLOSING;
106100a3eaf3SRamaswamy Tummala 		rw_exit(&sp->session_lock);
106200a3eaf3SRamaswamy Tummala 
106300a3eaf3SRamaswamy Tummala 		rds_session_close(sp, IBT_BLOCKING, wait);
106400a3eaf3SRamaswamy Tummala 
106500a3eaf3SRamaswamy Tummala 		rw_enter(&sp->session_lock, RW_WRITER);
106600a3eaf3SRamaswamy Tummala 		sp->session_state = RDS_SESSION_STATE_CLOSED;
106700a3eaf3SRamaswamy Tummala 		RDS_DPRINTF3("rds_close_sessions",
106800a3eaf3SRamaswamy Tummala 		    "SP(%p) State RDS_SESSION_STATE_CLOSED", sp);
106900a3eaf3SRamaswamy Tummala 		rds_session_fini(sp);
107000a3eaf3SRamaswamy Tummala 		sp->session_state = RDS_SESSION_STATE_FINI;
107100a3eaf3SRamaswamy Tummala 		sp->session_failover = 0;
107200a3eaf3SRamaswamy Tummala 		RDS_DPRINTF3("rds_close_sessions",
107300a3eaf3SRamaswamy Tummala 		    "SP(%p) State RDS_SESSION_STATE_FINI", sp);
107400a3eaf3SRamaswamy Tummala 		break;
107500a3eaf3SRamaswamy Tummala 
107600a3eaf3SRamaswamy Tummala 	case RDS_SESSION_STATE_ERROR:
107700a3eaf3SRamaswamy Tummala 	case RDS_SESSION_STATE_PASSIVE_CLOSING:
107800a3eaf3SRamaswamy Tummala 	case RDS_SESSION_STATE_INIT:
107900a3eaf3SRamaswamy Tummala 		sp->session_state = RDS_SESSION_STATE_ACTIVE_CLOSING;
108000a3eaf3SRamaswamy Tummala 		rw_exit(&sp->session_lock);
108100a3eaf3SRamaswamy Tummala 
108200a3eaf3SRamaswamy Tummala 		rds_session_close(sp, IBT_BLOCKING, wait);
108300a3eaf3SRamaswamy Tummala 
108400a3eaf3SRamaswamy Tummala 		rw_enter(&sp->session_lock, RW_WRITER);
108500a3eaf3SRamaswamy Tummala 		sp->session_state = RDS_SESSION_STATE_CLOSED;
108600a3eaf3SRamaswamy Tummala 		RDS_DPRINTF3("rds_close_sessions",
108700a3eaf3SRamaswamy Tummala 		    "SP(%p) State RDS_SESSION_STATE_CLOSED", sp);
108800a3eaf3SRamaswamy Tummala 		/* FALLTHRU */
108900a3eaf3SRamaswamy Tummala 	case RDS_SESSION_STATE_CLOSED:
109000a3eaf3SRamaswamy Tummala 		rds_session_fini(sp);
109100a3eaf3SRamaswamy Tummala 		sp->session_state = RDS_SESSION_STATE_FINI;
109200a3eaf3SRamaswamy Tummala 		sp->session_failover = 0;
109300a3eaf3SRamaswamy Tummala 		RDS_DPRINTF3("rds_close_sessions",
109400a3eaf3SRamaswamy Tummala 		    "SP(%p) State RDS_SESSION_STATE_FINI", sp);
109500a3eaf3SRamaswamy Tummala 		break;
109600a3eaf3SRamaswamy Tummala 	}
109700a3eaf3SRamaswamy Tummala }
109800a3eaf3SRamaswamy Tummala 
1099b86efd96Sagiri /*
1100b86efd96Sagiri  * Can be called:
1101b86efd96Sagiri  * 1. on driver detach
1102b86efd96Sagiri  * 2. on taskq thread
1103b86efd96Sagiri  * arg is always NULL
1104b86efd96Sagiri  */
1105b86efd96Sagiri /* ARGSUSED */
1106b86efd96Sagiri void
rds_close_sessions(void * arg)1107b86efd96Sagiri rds_close_sessions(void *arg)
1108b86efd96Sagiri {
1109b86efd96Sagiri 	rds_session_t *sp, *spnextp;
1110b86efd96Sagiri 
1111b86efd96Sagiri 	RDS_DPRINTF2("rds_close_sessions", "Enter");
1112b86efd96Sagiri 
1113b86efd96Sagiri 	/* wait until all the buffers are freed by the sockets */
1114b86efd96Sagiri 	while (RDS_GET_RXPKTS_PEND() != 0) {
1115b86efd96Sagiri 		/* wait one second and try again */
1116b86efd96Sagiri 		RDS_DPRINTF2("rds_close_sessions", "waiting on "
1117b86efd96Sagiri 		    "pending packets", RDS_GET_RXPKTS_PEND());
1118b86efd96Sagiri 		delay(drv_usectohz(1000000));
1119b86efd96Sagiri 	}
1120b86efd96Sagiri 	RDS_DPRINTF2("rds_close_sessions", "No more RX packets pending");
1121b86efd96Sagiri 
1122b86efd96Sagiri 	/* close all the sessions */
1123b86efd96Sagiri 	rw_enter(&rdsib_statep->rds_sessionlock, RW_WRITER);
1124b86efd96Sagiri 	sp = rdsib_statep->rds_sessionlistp;
1125b86efd96Sagiri 	while (sp) {
1126b86efd96Sagiri 		rw_enter(&sp->session_lock, RW_WRITER);
1127b86efd96Sagiri 		RDS_DPRINTF2("rds_close_sessions", "SP(%p) State: %d", sp,
1128b86efd96Sagiri 		    sp->session_state);
112900a3eaf3SRamaswamy Tummala 		rds_close_this_session(sp, 2);
1130b86efd96Sagiri 		rw_exit(&sp->session_lock);
1131b86efd96Sagiri 		sp = sp->session_nextp;
1132b86efd96Sagiri 	}
1133b86efd96Sagiri 
1134b86efd96Sagiri 	sp = rdsib_statep->rds_sessionlistp;
1135b86efd96Sagiri 	rdsib_statep->rds_sessionlistp = NULL;
1136b86efd96Sagiri 	rdsib_statep->rds_nsessions = 0;
1137b86efd96Sagiri 	rw_exit(&rdsib_statep->rds_sessionlock);
1138b86efd96Sagiri 
1139b86efd96Sagiri 	while (sp) {
1140b86efd96Sagiri 		spnextp = sp->session_nextp;
1141b86efd96Sagiri 		rds_destroy_session(sp);
1142b86efd96Sagiri 		RDS_DECR_SESS();
1143b86efd96Sagiri 		sp = spnextp;
1144b86efd96Sagiri 	}
1145b86efd96Sagiri 
1146b86efd96Sagiri 	/* free the global pool */
1147b86efd96Sagiri 	rds_free_recv_caches(rdsib_statep);
1148b86efd96Sagiri 
1149b86efd96Sagiri 	RDS_DPRINTF2("rds_close_sessions", "Return");
1150b86efd96Sagiri }
1151b86efd96Sagiri 
1152b86efd96Sagiri void
rds_session_open(rds_session_t * sp)1153b86efd96Sagiri rds_session_open(rds_session_t *sp)
1154b86efd96Sagiri {
1155b86efd96Sagiri 	int		ret;
1156b86efd96Sagiri 
1157b86efd96Sagiri 	RDS_DPRINTF2("rds_session_open", "Enter SP(%p)", sp);
1158b86efd96Sagiri 
1159b86efd96Sagiri 	ret = rds_session_connect(sp);
1160b86efd96Sagiri 	if (ret == -1) {
1161b86efd96Sagiri 		/*
1162b86efd96Sagiri 		 * may be the session has become passive due to
1163b86efd96Sagiri 		 * hitting peer-to-peer case
1164b86efd96Sagiri 		 */
1165b86efd96Sagiri 		rw_enter(&sp->session_lock, RW_READER);
1166b86efd96Sagiri 		if (sp->session_type == RDS_SESSION_PASSIVE) {
1167b86efd96Sagiri 			RDS_DPRINTF2("rds_session_open", "SP(%p) "
1168b86efd96Sagiri 			    "has become passive from active", sp);
1169b86efd96Sagiri 			rw_exit(&sp->session_lock);
1170b86efd96Sagiri 			return;
1171b86efd96Sagiri 		}
1172b86efd96Sagiri 
1173b86efd96Sagiri 		/* get the lock for writing */
1174b86efd96Sagiri 		rw_exit(&sp->session_lock);
1175b86efd96Sagiri 		rw_enter(&sp->session_lock, RW_WRITER);
1176b86efd96Sagiri 		sp->session_state = RDS_SESSION_STATE_ERROR;
1177b86efd96Sagiri 		RDS_DPRINTF3("rds_session_open",
1178b86efd96Sagiri 		    "SP(%p) State RDS_SESSION_STATE_ERROR", sp);
1179b86efd96Sagiri 		rw_exit(&sp->session_lock);
1180b86efd96Sagiri 
1181b86efd96Sagiri 		/* Connect request failed */
1182b86efd96Sagiri 		rds_session_close(sp, IBT_BLOCKING, 1);
1183b86efd96Sagiri 
1184b86efd96Sagiri 		rw_enter(&sp->session_lock, RW_WRITER);
1185b86efd96Sagiri 		rds_session_fini(sp);
1186b86efd96Sagiri 		sp->session_state = RDS_SESSION_STATE_FAILED;
11878257fab9Sagiri 		sp->session_failover = 0;
1188b86efd96Sagiri 		RDS_DPRINTF3("rds_session_open",
1189b86efd96Sagiri 		    "SP(%p) State RDS_SESSION_STATE_FAILED", sp);
1190b86efd96Sagiri 		rw_exit(&sp->session_lock);
1191b86efd96Sagiri 
1192b86efd96Sagiri 		return;
1193b86efd96Sagiri 	}
1194b86efd96Sagiri 
1195b86efd96Sagiri 	RDS_DPRINTF2("rds_session_open", "Return: SP(%p)", sp);
1196b86efd96Sagiri }
1197b86efd96Sagiri 
1198b86efd96Sagiri /*
1199b86efd96Sagiri  * Creates a session and inserts it into the list of sessions. The session
1200b86efd96Sagiri  * state would be CREATED.
1201b86efd96Sagiri  * Return Values:
1202b86efd96Sagiri  *	EWOULDBLOCK
1203b86efd96Sagiri  */
1204b86efd96Sagiri rds_session_t *
rds_session_create(rds_state_t * statep,ipaddr_t localip,ipaddr_t remip,ibt_cm_req_rcv_t * reqp,uint8_t type)1205b86efd96Sagiri rds_session_create(rds_state_t *statep, ipaddr_t localip, ipaddr_t remip,
1206b86efd96Sagiri     ibt_cm_req_rcv_t *reqp, uint8_t type)
1207b86efd96Sagiri {
1208b86efd96Sagiri 	ib_gid_t	lgid, rgid;
1209b86efd96Sagiri 	rds_session_t	*newp, *oldp;
1210b86efd96Sagiri 	rds_ep_t	*dataep, *ctrlep;
1211b86efd96Sagiri 	rds_bufpool_t	*pool;
1212b86efd96Sagiri 	int		ret;
1213b86efd96Sagiri 
1214d99cb22fSagiri 	RDS_DPRINTF2("rds_session_create", "Enter: 0x%p 0x%x 0x%x, type: %d",
1215d99cb22fSagiri 	    statep, localip, remip, type);
1216b86efd96Sagiri 
121774242422Sagiri 	/* Check if there is space for a new session */
121874242422Sagiri 	rw_enter(&statep->rds_sessionlock, RW_READER);
121974242422Sagiri 	if (statep->rds_nsessions >= (MaxNodes - 1)) {
122074242422Sagiri 		rw_exit(&statep->rds_sessionlock);
122174242422Sagiri 		RDS_DPRINTF1("rds_session_create", "No More Sessions allowed");
122274242422Sagiri 		return (NULL);
122374242422Sagiri 	}
122474242422Sagiri 	rw_exit(&statep->rds_sessionlock);
122574242422Sagiri 
1226b86efd96Sagiri 	/* Allocate and initialize global buffer pool */
1227b86efd96Sagiri 	ret = rds_init_recv_caches(statep);
1228b86efd96Sagiri 	if (ret != 0) {
1229b86efd96Sagiri 		RDS_DPRINTF2(LABEL, "Buffer Cache Initialization failed");
1230b86efd96Sagiri 		return (NULL);
1231b86efd96Sagiri 	}
1232b86efd96Sagiri 
1233b86efd96Sagiri 	/* enough memory for session (includes 2 endpoints) */
1234b86efd96Sagiri 	newp = kmem_zalloc(sizeof (rds_session_t), KM_SLEEP);
1235b86efd96Sagiri 
1236b86efd96Sagiri 	newp->session_remip = remip;
1237b86efd96Sagiri 	newp->session_myip = localip;
1238b86efd96Sagiri 	newp->session_type = type;
1239b86efd96Sagiri 	newp->session_state = RDS_SESSION_STATE_CREATED;
1240b86efd96Sagiri 	RDS_DPRINTF3("rds_session_create",
1241b86efd96Sagiri 	    "SP(%p) State RDS_SESSION_STATE_CREATED", newp);
1242b86efd96Sagiri 	rw_init(&newp->session_lock, NULL, RW_DRIVER, NULL);
12438257fab9Sagiri 	rw_init(&newp->session_local_portmap_lock, NULL, RW_DRIVER, NULL);
12448257fab9Sagiri 	rw_init(&newp->session_remote_portmap_lock, NULL, RW_DRIVER, NULL);
1245b86efd96Sagiri 
1246b86efd96Sagiri 	/* Initialize data endpoint */
1247b86efd96Sagiri 	dataep = &newp->session_dataep;
1248b86efd96Sagiri 	dataep->ep_remip = newp->session_remip;
1249b86efd96Sagiri 	dataep->ep_myip = newp->session_myip;
1250b86efd96Sagiri 	dataep->ep_state = RDS_EP_STATE_UNCONNECTED;
1251b86efd96Sagiri 	dataep->ep_sp = newp;
1252b86efd96Sagiri 	dataep->ep_type = RDS_EP_TYPE_DATA;
1253b86efd96Sagiri 	mutex_init(&dataep->ep_lock, NULL, MUTEX_DRIVER, NULL);
1254b86efd96Sagiri 
1255b86efd96Sagiri 	/* Initialize send pool locks */
1256b86efd96Sagiri 	pool = &dataep->ep_sndpool;
1257b86efd96Sagiri 	mutex_init(&pool->pool_lock, NULL, MUTEX_DRIVER, NULL);
1258b86efd96Sagiri 	cv_init(&pool->pool_cv, NULL, CV_DRIVER, NULL);
1259b86efd96Sagiri 
1260b86efd96Sagiri 	/* Initialize recv pool locks */
1261b86efd96Sagiri 	pool = &dataep->ep_rcvpool;
1262b86efd96Sagiri 	mutex_init(&dataep->ep_recvqp.qp_lock, NULL, MUTEX_DRIVER, NULL);
1263b86efd96Sagiri 	mutex_init(&pool->pool_lock, NULL, MUTEX_DRIVER, NULL);
1264b86efd96Sagiri 	cv_init(&pool->pool_cv, NULL, CV_DRIVER, NULL);
1265b86efd96Sagiri 
1266b86efd96Sagiri 	/* Initialize control endpoint */
1267b86efd96Sagiri 	ctrlep = &newp->session_ctrlep;
1268b86efd96Sagiri 	ctrlep->ep_remip = newp->session_remip;
1269b86efd96Sagiri 	ctrlep->ep_myip = newp->session_myip;
1270b86efd96Sagiri 	ctrlep->ep_state = RDS_EP_STATE_UNCONNECTED;
1271b86efd96Sagiri 	ctrlep->ep_sp = newp;
1272b86efd96Sagiri 	ctrlep->ep_type = RDS_EP_TYPE_CTRL;
1273b86efd96Sagiri 	mutex_init(&ctrlep->ep_lock, NULL, MUTEX_DRIVER, NULL);
1274b86efd96Sagiri 
1275b86efd96Sagiri 	/* Initialize send pool locks */
1276b86efd96Sagiri 	pool = &ctrlep->ep_sndpool;
1277b86efd96Sagiri 	mutex_init(&pool->pool_lock, NULL, MUTEX_DRIVER, NULL);
1278b86efd96Sagiri 	cv_init(&pool->pool_cv, NULL, CV_DRIVER, NULL);
1279b86efd96Sagiri 
1280b86efd96Sagiri 	/* Initialize recv pool locks */
1281b86efd96Sagiri 	pool = &ctrlep->ep_rcvpool;
1282b86efd96Sagiri 	mutex_init(&ctrlep->ep_recvqp.qp_lock, NULL, MUTEX_DRIVER, NULL);
1283b86efd96Sagiri 	mutex_init(&pool->pool_lock, NULL, MUTEX_DRIVER, NULL);
1284b86efd96Sagiri 	cv_init(&pool->pool_cv, NULL, CV_DRIVER, NULL);
1285b86efd96Sagiri 
1286b86efd96Sagiri 	/* lkup if there is already a session */
1287b86efd96Sagiri 	rw_enter(&statep->rds_sessionlock, RW_WRITER);
1288b86efd96Sagiri 	oldp = rds_session_lkup(statep, remip, 0);
1289b86efd96Sagiri 	if (oldp != NULL) {
1290b86efd96Sagiri 		/* A session to this destination exists */
1291b86efd96Sagiri 		rw_exit(&statep->rds_sessionlock);
1292b86efd96Sagiri 		rw_destroy(&newp->session_lock);
12938257fab9Sagiri 		rw_destroy(&newp->session_local_portmap_lock);
12948257fab9Sagiri 		rw_destroy(&newp->session_remote_portmap_lock);
1295b86efd96Sagiri 		mutex_destroy(&dataep->ep_lock);
1296b86efd96Sagiri 		mutex_destroy(&ctrlep->ep_lock);
1297b86efd96Sagiri 		kmem_free(newp, sizeof (rds_session_t));
1298b86efd96Sagiri 		return (NULL);
1299b86efd96Sagiri 	}
1300b86efd96Sagiri 
1301b86efd96Sagiri 	/* Insert this session into the list */
130274242422Sagiri 	if (rds_add_session(newp, B_TRUE) != B_TRUE) {
130374242422Sagiri 		/* No room to add this session */
130474242422Sagiri 		rw_exit(&statep->rds_sessionlock);
130574242422Sagiri 		rw_destroy(&newp->session_lock);
130674242422Sagiri 		rw_destroy(&newp->session_local_portmap_lock);
130774242422Sagiri 		rw_destroy(&newp->session_remote_portmap_lock);
130874242422Sagiri 		mutex_destroy(&dataep->ep_lock);
130974242422Sagiri 		mutex_destroy(&ctrlep->ep_lock);
131074242422Sagiri 		kmem_free(newp, sizeof (rds_session_t));
131174242422Sagiri 		return (NULL);
131274242422Sagiri 	}
1313b86efd96Sagiri 
1314b86efd96Sagiri 	/* unlock the session list */
1315b86efd96Sagiri 	rw_exit(&statep->rds_sessionlock);
1316b86efd96Sagiri 
1317b86efd96Sagiri 	if (type == RDS_SESSION_ACTIVE) {
1318d99cb22fSagiri 		ipaddr_t		localip1, remip1;
1319015f8fffShiremath 		ibt_ip_path_attr_t	ipattr;
1320015f8fffShiremath 		ibt_ip_addr_t		dstip;
1321b86efd96Sagiri 
1322b86efd96Sagiri 		/* The ipaddr should be in the network order */
1323b86efd96Sagiri 		localip1 = localip;
1324b86efd96Sagiri 		remip1 = remip;
1325b86efd96Sagiri 		ret = rds_sc_path_lookup(&localip1, &remip1);
1326b86efd96Sagiri 		if (ret == 0) {
1327b86efd96Sagiri 			RDS_DPRINTF2(LABEL, "Path not found (0x%x 0x%x)",
1328b86efd96Sagiri 			    localip, remip);
1329b86efd96Sagiri 		}
1330b86efd96Sagiri 
1331b86efd96Sagiri 		/* Get the gids for the source and destination ip addrs */
13328257fab9Sagiri 		lgid.gid_prefix = 0;
13338257fab9Sagiri 		lgid.gid_guid = 0;
13348257fab9Sagiri 		rgid.gid_prefix = 0;
13358257fab9Sagiri 		rgid.gid_guid = 0;
1336015f8fffShiremath 
1337015f8fffShiremath 		bzero(&ipattr, sizeof (ibt_ip_path_attr_t));
1338015f8fffShiremath 		dstip.family = AF_INET;
1339d22e11ebSBill Taylor 		dstip.un.ip4addr = remip1;
1340015f8fffShiremath 		ipattr.ipa_dst_ip = &dstip;
1341015f8fffShiremath 		ipattr.ipa_src_ip.family = AF_INET;
1342d22e11ebSBill Taylor 		ipattr.ipa_src_ip.un.ip4addr = localip1;
1343015f8fffShiremath 		ipattr.ipa_ndst = 1;
1344015f8fffShiremath 		ipattr.ipa_max_paths = 1;
1345015f8fffShiremath 		RDS_DPRINTF2(LABEL, "ibt_get_ip_paths: 0x%x <-> 0x%x ",
1346015f8fffShiremath 		    localip1, remip1);
1347015f8fffShiremath 		ret = ibt_get_ip_paths(rdsib_statep->rds_ibhdl,
1348015f8fffShiremath 		    IBT_PATH_NO_FLAGS, &ipattr, &newp->session_pinfo,
1349015f8fffShiremath 		    NULL, NULL);
1350015f8fffShiremath 		if (ret != IBT_SUCCESS) {
135174242422Sagiri 			RDS_DPRINTF2(LABEL, "ibt_get_ip_paths failed, ret: %d "
13528257fab9Sagiri 			    "lgid: %llx:%llx rgid: %llx:%llx", lgid.gid_prefix,
13538257fab9Sagiri 			    lgid.gid_guid, rgid.gid_prefix, rgid.gid_guid);
13548257fab9Sagiri 
1355b86efd96Sagiri 			RDS_SESSION_TRANSITION(newp, RDS_SESSION_STATE_FAILED);
1356b86efd96Sagiri 			return (NULL);
1357b86efd96Sagiri 		}
1358015f8fffShiremath 		RDS_DPRINTF2(LABEL, "ibt_get_ip_paths success");
1359015f8fffShiremath 		lgid =
1360015f8fffShiremath 		    newp->session_pinfo.pi_prim_cep_path.cep_adds_vect.av_sgid;
1361015f8fffShiremath 		rgid =
1362015f8fffShiremath 		    newp->session_pinfo.pi_prim_cep_path.cep_adds_vect.av_dgid;
1363b86efd96Sagiri 
1364b86efd96Sagiri 		RDS_DPRINTF2(LABEL, "lgid: %llx:%llx rgid: %llx:%llx",
1365b86efd96Sagiri 		    lgid.gid_prefix, lgid.gid_guid, rgid.gid_prefix,
1366b86efd96Sagiri 		    rgid.gid_guid);
1367b86efd96Sagiri 	}
1368b86efd96Sagiri 
1369b86efd96Sagiri 	rw_enter(&newp->session_lock, RW_WRITER);
1370b86efd96Sagiri 	/* check for peer-to-peer case */
1371b86efd96Sagiri 	if (type == newp->session_type) {
1372b86efd96Sagiri 		/* no peer-to-peer case */
1373b86efd96Sagiri 		if (type == RDS_SESSION_ACTIVE) {
1374b86efd96Sagiri 			newp->session_lgid = lgid;
1375b86efd96Sagiri 			newp->session_rgid = rgid;
1376b86efd96Sagiri 		} else {
1377b86efd96Sagiri 			/* rgid is requester gid & lgid is receiver gid */
1378b86efd96Sagiri 			newp->session_rgid = reqp->req_prim_addr.av_dgid;
1379b86efd96Sagiri 			newp->session_lgid = reqp->req_prim_addr.av_sgid;
1380b86efd96Sagiri 		}
1381b86efd96Sagiri 	}
1382b86efd96Sagiri 	rw_exit(&newp->session_lock);
1383b86efd96Sagiri 
1384b86efd96Sagiri 	RDS_DPRINTF2("rds_session_create", "Return SP(%p)", newp);
1385b86efd96Sagiri 
1386b86efd96Sagiri 	return (newp);
1387b86efd96Sagiri }
1388b86efd96Sagiri 
138900a3eaf3SRamaswamy Tummala void
rds_handle_close_session_request(void * arg)139000a3eaf3SRamaswamy Tummala rds_handle_close_session_request(void *arg)
139100a3eaf3SRamaswamy Tummala {
139200a3eaf3SRamaswamy Tummala 	rds_session_t	*sp = (rds_session_t *)arg;
139300a3eaf3SRamaswamy Tummala 
139400a3eaf3SRamaswamy Tummala 	RDS_DPRINTF2("rds_handle_close_session_request",
139500a3eaf3SRamaswamy Tummala 	    "Enter: Closing this Session (%p)", sp);
139600a3eaf3SRamaswamy Tummala 
139700a3eaf3SRamaswamy Tummala 	rw_enter(&sp->session_lock, RW_WRITER);
139800a3eaf3SRamaswamy Tummala 	RDS_DPRINTF2("rds_handle_close_session_request",
139900a3eaf3SRamaswamy Tummala 	    "SP(%p) State: %d", sp, sp->session_state);
140000a3eaf3SRamaswamy Tummala 	rds_close_this_session(sp, 2);
140100a3eaf3SRamaswamy Tummala 	rw_exit(&sp->session_lock);
140200a3eaf3SRamaswamy Tummala 
140300a3eaf3SRamaswamy Tummala 	RDS_DPRINTF2("rds_handle_close_session_request", "Return SP(%p)", sp);
140400a3eaf3SRamaswamy Tummala }
140500a3eaf3SRamaswamy Tummala 
1406b86efd96Sagiri void
rds_handle_control_message(rds_session_t * sp,rds_ctrl_pkt_t * cpkt)1407b86efd96Sagiri rds_handle_control_message(rds_session_t *sp, rds_ctrl_pkt_t *cpkt)
1408b86efd96Sagiri {
1409b86efd96Sagiri 	RDS_DPRINTF4("rds_handle_control_message", "Enter: SP(%p) code: %d "
1410b86efd96Sagiri 	    "port: %d", sp, cpkt->rcp_code, cpkt->rcp_port);
1411b86efd96Sagiri 
1412b86efd96Sagiri 	switch (cpkt->rcp_code) {
1413b86efd96Sagiri 	case RDS_CTRL_CODE_STALL:
1414b86efd96Sagiri 		RDS_INCR_STALLS_RCVD();
14158257fab9Sagiri 		(void) rds_check_n_mark_port(sp, cpkt->rcp_port, RDS_REMOTE);
1416b86efd96Sagiri 		break;
1417b86efd96Sagiri 	case RDS_CTRL_CODE_UNSTALL:
1418b86efd96Sagiri 		RDS_INCR_UNSTALLS_RCVD();
14198257fab9Sagiri 		(void) rds_check_n_unmark_port(sp, cpkt->rcp_port, RDS_REMOTE);
1420b86efd96Sagiri 		break;
1421b86efd96Sagiri 	case RDS_CTRL_CODE_STALL_PORTS:
14228257fab9Sagiri 		rds_mark_all_ports(sp, RDS_REMOTE);
1423b86efd96Sagiri 		break;
1424b86efd96Sagiri 	case RDS_CTRL_CODE_UNSTALL_PORTS:
14258257fab9Sagiri 		rds_unmark_all_ports(sp, RDS_REMOTE);
1426b86efd96Sagiri 		break;
1427b86efd96Sagiri 	case RDS_CTRL_CODE_HEARTBEAT:
1428b86efd96Sagiri 		break;
142900a3eaf3SRamaswamy Tummala 	case RDS_CTRL_CODE_CLOSE_SESSION:
143000a3eaf3SRamaswamy Tummala 		RDS_DPRINTF2("rds_handle_control_message",
143100a3eaf3SRamaswamy Tummala 		    "SP(%p) Remote Requested to close this session", sp);
143200a3eaf3SRamaswamy Tummala 		(void) ddi_taskq_dispatch(rds_taskq,
143300a3eaf3SRamaswamy Tummala 		    rds_handle_close_session_request, (void *)sp, DDI_SLEEP);
143400a3eaf3SRamaswamy Tummala 		break;
1435b86efd96Sagiri 	default:
1436b86efd96Sagiri 		RDS_DPRINTF2(LABEL, "ERROR: Invalid Control code: %d",
1437b86efd96Sagiri 		    cpkt->rcp_code);
1438b86efd96Sagiri 		break;
1439b86efd96Sagiri 	}
1440b86efd96Sagiri 
1441b86efd96Sagiri 	RDS_DPRINTF4("rds_handle_control_message", "Return");
1442b86efd96Sagiri }
1443b86efd96Sagiri 
14448257fab9Sagiri int
rds_post_control_message(rds_session_t * sp,uint8_t code,in_port_t port)14458257fab9Sagiri rds_post_control_message(rds_session_t *sp, uint8_t code, in_port_t port)
1446b86efd96Sagiri {
1447b86efd96Sagiri 	ibt_send_wr_t	wr;
1448b86efd96Sagiri 	rds_ep_t	*ep;
1449b86efd96Sagiri 	rds_buf_t	*bp;
1450b86efd96Sagiri 	rds_ctrl_pkt_t	*cp;
1451b86efd96Sagiri 	int		ret;
1452b86efd96Sagiri 
1453b86efd96Sagiri 	RDS_DPRINTF4("rds_post_control_message", "Enter: SP(%p) Code: %d "
14548257fab9Sagiri 	    "Port: %d", sp, code, port);
1455b86efd96Sagiri 
1456b86efd96Sagiri 	ep = &sp->session_ctrlep;
1457b86efd96Sagiri 
1458b86efd96Sagiri 	bp = rds_get_send_buf(ep, 1);
1459b86efd96Sagiri 	if (bp == NULL) {
1460b86efd96Sagiri 		RDS_DPRINTF2(LABEL, "No buffers available to send control "
14618257fab9Sagiri 		    "message: SP(%p) Code: %d Port: %d", sp, code,
14628257fab9Sagiri 		    port);
14638257fab9Sagiri 		return (-1);
1464b86efd96Sagiri 	}
1465b86efd96Sagiri 
1466b86efd96Sagiri 	cp = (rds_ctrl_pkt_t *)(uintptr_t)bp->buf_ds.ds_va;
14678257fab9Sagiri 	cp->rcp_code = code;
14688257fab9Sagiri 	cp->rcp_port = port;
1469b86efd96Sagiri 	bp->buf_ds.ds_len = RDS_CTRLPKT_SIZE;
1470b86efd96Sagiri 
1471b86efd96Sagiri 	wr.wr_id = (uintptr_t)bp;
1472b86efd96Sagiri 	wr.wr_flags = IBT_WR_SEND_SOLICIT;
1473b86efd96Sagiri 	wr.wr_trans = IBT_RC_SRV;
1474b86efd96Sagiri 	wr.wr_opcode = IBT_WRC_SEND;
1475b86efd96Sagiri 	wr.wr_nds = 1;
1476b86efd96Sagiri 	wr.wr_sgl = &bp->buf_ds;
1477b86efd96Sagiri 	RDS_DPRINTF5(LABEL, "ds_va %p ds_len %d ds_lkey 0x%llx",
1478b86efd96Sagiri 	    bp->buf_ds.ds_va, bp->buf_ds.ds_len, bp->buf_ds.ds_key);
1479b86efd96Sagiri 	ret = ibt_post_send(ep->ep_chanhdl, &wr, 1, NULL);
1480b86efd96Sagiri 	if (ret != IBT_SUCCESS) {
1481b86efd96Sagiri 		RDS_DPRINTF2(LABEL, "EP(%p): ibt_post_send failed: "
1482b86efd96Sagiri 		    "%d", ep, ret);
1483b86efd96Sagiri 		bp->buf_state = RDS_SNDBUF_FREE;
1484b86efd96Sagiri 		rds_free_send_buf(ep, bp, NULL, 1, B_FALSE);
14858257fab9Sagiri 		return (-1);
1486b86efd96Sagiri 	}
1487b86efd96Sagiri 
1488b86efd96Sagiri 	RDS_DPRINTF4("rds_post_control_message", "Return SP(%p) Code: %d "
14898257fab9Sagiri 	    "Port: %d", sp, code, port);
1490b86efd96Sagiri 
14918257fab9Sagiri 	return (0);
1492b86efd96Sagiri }
1493b86efd96Sagiri 
1494b86efd96Sagiri void
rds_stall_port(rds_session_t * sp,in_port_t port,uint_t qualifier)14958257fab9Sagiri rds_stall_port(rds_session_t *sp, in_port_t port, uint_t qualifier)
1496b86efd96Sagiri {
14978257fab9Sagiri 	int		ret;
1498b86efd96Sagiri 
14998257fab9Sagiri 	RDS_DPRINTF4("rds_stall_port", "Enter: SP(%p) Port %d", sp, port);
1500b86efd96Sagiri 
1501b86efd96Sagiri 	RDS_INCR_STALLS_TRIGGERED();
1502b86efd96Sagiri 
15038257fab9Sagiri 	if (!rds_check_n_mark_port(sp, port, qualifier)) {
15048257fab9Sagiri 
15058257fab9Sagiri 		if (sp != NULL) {
15068257fab9Sagiri 			ret = rds_post_control_message(sp,
15078257fab9Sagiri 			    RDS_CTRL_CODE_STALL, port);
15088257fab9Sagiri 			if (ret != 0) {
15098257fab9Sagiri 				(void) rds_check_n_unmark_port(sp, port,
15108257fab9Sagiri 				    qualifier);
15118257fab9Sagiri 				return;
15128257fab9Sagiri 			}
15138257fab9Sagiri 			RDS_INCR_STALLS_SENT();
15148257fab9Sagiri 		}
1515b86efd96Sagiri 	} else {
1516b86efd96Sagiri 		RDS_DPRINTF3(LABEL,
1517b86efd96Sagiri 		    "Port %d is already in stall state", port);
1518b86efd96Sagiri 	}
1519b86efd96Sagiri 
15208257fab9Sagiri 	RDS_DPRINTF4("rds_stall_port", "Return: SP(%p) Port %d", sp, port);
1521b86efd96Sagiri }
1522b86efd96Sagiri 
1523b86efd96Sagiri void
rds_resume_port(in_port_t port)1524b86efd96Sagiri rds_resume_port(in_port_t port)
1525b86efd96Sagiri {
15268257fab9Sagiri 	rds_session_t	*sp;
1527b86efd96Sagiri 	uint_t		ix;
15288257fab9Sagiri 	int		ret;
1529b86efd96Sagiri 
1530b86efd96Sagiri 	RDS_DPRINTF4("rds_resume_port", "Enter: Port %d", port);
1531b86efd96Sagiri 
1532b86efd96Sagiri 	RDS_INCR_UNSTALLS_TRIGGERED();
15338257fab9Sagiri 
15348257fab9Sagiri 	/* resume loopback traffic */
15358257fab9Sagiri 	(void) rds_check_n_unmark_port(NULL, port, RDS_LOOPBACK);
15368257fab9Sagiri 
15378257fab9Sagiri 	/* send unstall messages to resume the remote traffic */
15388257fab9Sagiri 	rw_enter(&rdsib_statep->rds_sessionlock, RW_READER);
15398257fab9Sagiri 
15408257fab9Sagiri 	sp = rdsib_statep->rds_sessionlistp;
15418257fab9Sagiri 	for (ix = 0; ix < rdsib_statep->rds_nsessions; ix++) {
15428257fab9Sagiri 		ASSERT(sp != NULL);
15438257fab9Sagiri 		if ((sp->session_state == RDS_SESSION_STATE_CONNECTED) &&
15448257fab9Sagiri 		    (rds_check_n_unmark_port(sp, port, RDS_LOCAL))) {
15458257fab9Sagiri 				ret = rds_post_control_message(sp,
15468257fab9Sagiri 				    RDS_CTRL_CODE_UNSTALL, port);
15478257fab9Sagiri 				if (ret != 0) {
15488257fab9Sagiri 					(void) rds_check_n_mark_port(sp, port,
15498257fab9Sagiri 					    RDS_LOCAL);
15508257fab9Sagiri 				} else {
15518257fab9Sagiri 					RDS_INCR_UNSTALLS_SENT();
15528257fab9Sagiri 				}
1553b86efd96Sagiri 		}
1554b86efd96Sagiri 
15558257fab9Sagiri 		sp = sp->session_nextp;
1556b86efd96Sagiri 	}
1557b86efd96Sagiri 
15588257fab9Sagiri 	rw_exit(&rdsib_statep->rds_sessionlock);
15598257fab9Sagiri 
1560b86efd96Sagiri 	RDS_DPRINTF4("rds_resume_port", "Return: Port %d", port);
1561b86efd96Sagiri }
1562b86efd96Sagiri 
1563b86efd96Sagiri static int
rds_build_n_post_msg(rds_ep_t * ep,uio_t * uiop,in_port_t sendport,in_port_t recvport)1564b86efd96Sagiri rds_build_n_post_msg(rds_ep_t *ep, uio_t *uiop, in_port_t sendport,
1565b86efd96Sagiri     in_port_t recvport)
1566b86efd96Sagiri {
1567b86efd96Sagiri 	ibt_send_wr_t	*wrp, wr;
1568b86efd96Sagiri 	rds_buf_t	*bp, *bp1;
1569b86efd96Sagiri 	rds_data_hdr_t	*pktp;
1570b86efd96Sagiri 	uint32_t	msgsize, npkts, residual, pktno, ix;
1571b86efd96Sagiri 	int		ret;
1572b86efd96Sagiri 
1573b86efd96Sagiri 	RDS_DPRINTF4("rds_build_n_post_msg", "Enter: EP(%p) UIOP(%p)",
1574b86efd96Sagiri 	    ep, uiop);
1575b86efd96Sagiri 
1576b86efd96Sagiri 	/* how many pkts are needed to carry this msg */
1577b86efd96Sagiri 	msgsize = uiop->uio_resid;
1578b86efd96Sagiri 	npkts = ((msgsize - 1) / UserBufferSize) + 1;
1579b86efd96Sagiri 	residual = ((msgsize - 1) % UserBufferSize) + 1;
1580b86efd96Sagiri 
1581b86efd96Sagiri 	RDS_DPRINTF5(LABEL, "EP(%p) UIOP(%p) msg size: %d npkts: %d", ep, uiop,
1582b86efd96Sagiri 	    msgsize, npkts);
1583b86efd96Sagiri 
1584b86efd96Sagiri 	/* Get the buffers needed to post this message */
1585b86efd96Sagiri 	bp = rds_get_send_buf(ep, npkts);
1586b86efd96Sagiri 	if (bp == NULL) {
1587b86efd96Sagiri 		RDS_INCR_ENOBUFS();
1588b86efd96Sagiri 		return (ENOBUFS);
1589b86efd96Sagiri 	}
1590b86efd96Sagiri 
1591b86efd96Sagiri 	if (npkts > 1) {
1592b86efd96Sagiri 		/*
1593b86efd96Sagiri 		 * multi-pkt messages are posted at the same time as a list
1594b86efd96Sagiri 		 * of WRs
1595b86efd96Sagiri 		 */
1596b86efd96Sagiri 		wrp = (ibt_send_wr_t *)kmem_zalloc(sizeof (ibt_send_wr_t) *
1597b86efd96Sagiri 		    npkts, KM_SLEEP);
1598b86efd96Sagiri 	}
1599b86efd96Sagiri 
1600b86efd96Sagiri 
1601b86efd96Sagiri 	pktno = 0;
1602b86efd96Sagiri 	bp1 = bp;
1603b86efd96Sagiri 	do {
1604b86efd96Sagiri 		/* prepare the header */
1605b86efd96Sagiri 		pktp = (rds_data_hdr_t *)(uintptr_t)bp1->buf_ds.ds_va;
1606b86efd96Sagiri 		pktp->dh_datalen = UserBufferSize;
1607b86efd96Sagiri 		pktp->dh_npkts = npkts - pktno;
1608b86efd96Sagiri 		pktp->dh_psn = pktno;
1609b86efd96Sagiri 		pktp->dh_sendport = sendport;
1610b86efd96Sagiri 		pktp->dh_recvport = recvport;
1611b86efd96Sagiri 		bp1->buf_ds.ds_len = RdsPktSize;
1612b86efd96Sagiri 
1613b86efd96Sagiri 		/* copy the data */
1614b86efd96Sagiri 		ret = uiomove((uint8_t *)pktp + RDS_DATA_HDR_SZ,
1615b86efd96Sagiri 		    UserBufferSize, UIO_WRITE, uiop);
1616b86efd96Sagiri 		if (ret != 0) {
1617b86efd96Sagiri 			break;
1618b86efd96Sagiri 		}
1619b86efd96Sagiri 
1620b86efd96Sagiri 		if (uiop->uio_resid == 0) {
1621b86efd96Sagiri 			pktp->dh_datalen = residual;
1622b86efd96Sagiri 			bp1->buf_ds.ds_len = residual + RDS_DATA_HDR_SZ;
1623b86efd96Sagiri 			break;
1624b86efd96Sagiri 		}
1625b86efd96Sagiri 		pktno++;
1626b86efd96Sagiri 		bp1 = bp1->buf_nextp;
1627b86efd96Sagiri 	} while (uiop->uio_resid);
1628b86efd96Sagiri 
1629b86efd96Sagiri 	if (ret) {
1630b86efd96Sagiri 		/* uiomove failed */
1631b86efd96Sagiri 		RDS_DPRINTF2("rds_build_n_post_msg", "UIO(%p) Move FAILED: %d",
1632b86efd96Sagiri 		    uiop, ret);
1633b86efd96Sagiri 		if (npkts > 1) {
1634b86efd96Sagiri 			kmem_free(wrp, npkts * sizeof (ibt_send_wr_t));
1635b86efd96Sagiri 		}
1636b86efd96Sagiri 		rds_free_send_buf(ep, bp, NULL, npkts, B_FALSE);
1637b86efd96Sagiri 		return (ret);
1638b86efd96Sagiri 	}
1639b86efd96Sagiri 
1640b86efd96Sagiri 	if (npkts > 1) {
1641b86efd96Sagiri 		/* multi-pkt message */
1642b86efd96Sagiri 		RDS_DPRINTF5(LABEL, "EP(%p) Sending Multiple Packets", ep);
1643b86efd96Sagiri 
1644b86efd96Sagiri 		bp1 = bp;
1645b86efd96Sagiri 		for (ix = 0; ix < npkts; ix++) {
1646b86efd96Sagiri 			wrp[ix].wr_id = (uintptr_t)bp1;
1647b86efd96Sagiri 			wrp[ix].wr_flags = IBT_WR_NO_FLAGS;
1648b86efd96Sagiri 			wrp[ix].wr_trans = IBT_RC_SRV;
1649b86efd96Sagiri 			wrp[ix].wr_opcode = IBT_WRC_SEND;
1650b86efd96Sagiri 			wrp[ix].wr_nds = 1;
1651b86efd96Sagiri 			wrp[ix].wr_sgl = &bp1->buf_ds;
1652b86efd96Sagiri 			bp1 = bp1->buf_nextp;
1653b86efd96Sagiri 		}
1654b86efd96Sagiri 		wrp[npkts - 1].wr_flags = IBT_WR_SEND_SOLICIT;
1655b86efd96Sagiri 
1656b86efd96Sagiri 		ret = ibt_post_send(ep->ep_chanhdl, wrp, npkts, &ix);
1657b86efd96Sagiri 		if (ret != IBT_SUCCESS) {
1658b86efd96Sagiri 			RDS_DPRINTF2(LABEL, "EP(%p): ibt_post_send failed: "
1659b86efd96Sagiri 			    "%d for %d pkts", ep, ret, npkts);
1660b86efd96Sagiri 			rds_free_send_buf(ep, bp, NULL, npkts, B_FALSE);
1661b86efd96Sagiri 			kmem_free(wrp, npkts * sizeof (ibt_send_wr_t));
1662b86efd96Sagiri 			return (ret);
1663b86efd96Sagiri 		}
1664b86efd96Sagiri 
1665b86efd96Sagiri 		kmem_free(wrp, npkts * sizeof (ibt_send_wr_t));
1666b86efd96Sagiri 	} else {
1667b86efd96Sagiri 		/* single pkt */
1668b86efd96Sagiri 		RDS_DPRINTF5(LABEL, "EP(%p) Sending Single Packet", ep);
1669b86efd96Sagiri 		wr.wr_id = (uintptr_t)bp;
1670b86efd96Sagiri 		wr.wr_flags = IBT_WR_SEND_SOLICIT;
1671b86efd96Sagiri 		wr.wr_trans = IBT_RC_SRV;
1672b86efd96Sagiri 		wr.wr_opcode = IBT_WRC_SEND;
1673b86efd96Sagiri 		wr.wr_nds = 1;
1674b86efd96Sagiri 		wr.wr_sgl = &bp->buf_ds;
1675b86efd96Sagiri 		RDS_DPRINTF5(LABEL, "ds_va %p ds_key 0x%llx ds_len %d ",
1676b86efd96Sagiri 		    bp->buf_ds.ds_va, bp->buf_ds.ds_key, bp->buf_ds.ds_len);
1677b86efd96Sagiri 		ret = ibt_post_send(ep->ep_chanhdl, &wr, 1, NULL);
1678b86efd96Sagiri 		if (ret != IBT_SUCCESS) {
1679b86efd96Sagiri 			RDS_DPRINTF2(LABEL, "EP(%p): ibt_post_send failed: "
1680b86efd96Sagiri 			    "%d", ep, ret);
1681b86efd96Sagiri 			rds_free_send_buf(ep, bp, NULL, 1, B_FALSE);
1682b86efd96Sagiri 			return (ret);
1683b86efd96Sagiri 		}
1684b86efd96Sagiri 	}
1685b86efd96Sagiri 
1686b86efd96Sagiri 	RDS_INCR_TXPKTS(npkts);
1687b86efd96Sagiri 	RDS_INCR_TXBYTES(msgsize);
1688b86efd96Sagiri 
1689b86efd96Sagiri 	RDS_DPRINTF4("rds_build_n_post_msg", "Return: EP(%p) UIOP(%p)",
1690b86efd96Sagiri 	    ep, uiop);
1691b86efd96Sagiri 
1692b86efd96Sagiri 	return (0);
1693b86efd96Sagiri }
1694b86efd96Sagiri 
1695b86efd96Sagiri static int
rds_deliver_loopback_msg(uio_t * uiop,ipaddr_t recvip,ipaddr_t sendip,in_port_t recvport,in_port_t sendport,zoneid_t zoneid)1696b86efd96Sagiri rds_deliver_loopback_msg(uio_t *uiop, ipaddr_t recvip, ipaddr_t sendip,
1697b86efd96Sagiri     in_port_t recvport, in_port_t sendport, zoneid_t zoneid)
1698b86efd96Sagiri {
1699b86efd96Sagiri 	mblk_t		*mp;
1700b86efd96Sagiri 	int		ret;
1701b86efd96Sagiri 
1702b86efd96Sagiri 	RDS_DPRINTF4("rds_deliver_loopback_msg", "Enter");
1703b86efd96Sagiri 
1704b86efd96Sagiri 	RDS_DPRINTF3(LABEL, "Loopback message: sendport: "
1705b86efd96Sagiri 	    "%d to recvport: %d", sendport, recvport);
1706b86efd96Sagiri 
1707b86efd96Sagiri 	mp = allocb(uiop->uio_resid, BPRI_MED);
1708b86efd96Sagiri 	if (mp == NULL) {
1709b86efd96Sagiri 		RDS_DPRINTF2(LABEL, "allocb failed, size: %d\n",
1710b86efd96Sagiri 		    uiop->uio_resid);
1711b86efd96Sagiri 		return (ENOSPC);
1712b86efd96Sagiri 	}
1713b86efd96Sagiri 	mp->b_wptr = mp->b_rptr + uiop->uio_resid;
1714b86efd96Sagiri 
1715b86efd96Sagiri 	ret = uiomove(mp->b_rptr, uiop->uio_resid, UIO_WRITE, uiop);
1716b86efd96Sagiri 	if (ret) {
1717b86efd96Sagiri 		RDS_DPRINTF2(LABEL, "ERROR: uiomove returned: %d", ret);
1718b86efd96Sagiri 		freeb(mp);
1719b86efd96Sagiri 		return (ret);
1720b86efd96Sagiri 	}
1721b86efd96Sagiri 
1722b86efd96Sagiri 	ret = rds_deliver_new_msg(mp, recvip, sendip, recvport, sendport,
1723b86efd96Sagiri 	    zoneid);
1724b86efd96Sagiri 	if (ret != 0) {
1725b86efd96Sagiri 		if (ret == ENOSPC) {
1726b86efd96Sagiri 			/*
1727b86efd96Sagiri 			 * The message is delivered but cannot take more,
17288257fab9Sagiri 			 * stop further loopback traffic to this port
1729b86efd96Sagiri 			 */
17308257fab9Sagiri 			RDS_DPRINTF3("rds_deliver_loopback_msg",
17318257fab9Sagiri 			    "Port %d NO SPACE", recvport);
17328257fab9Sagiri 			rds_stall_port(NULL, recvport, RDS_LOOPBACK);
1733b86efd96Sagiri 		} else {
1734b86efd96Sagiri 			RDS_DPRINTF2(LABEL, "Loopback message: port %d -> "
1735b86efd96Sagiri 			    "port %d failed: %d", sendport, recvport, ret);
1736b86efd96Sagiri 			return (ret);
1737b86efd96Sagiri 		}
1738b86efd96Sagiri 	}
1739b86efd96Sagiri 
1740b86efd96Sagiri 	RDS_DPRINTF4("rds_deliver_loopback_msg", "Return");
1741b86efd96Sagiri 	return (0);
1742b86efd96Sagiri }
1743b86efd96Sagiri 
1744b86efd96Sagiri static void
rds_resend_messages(void * arg)1745b86efd96Sagiri rds_resend_messages(void *arg)
1746b86efd96Sagiri {
1747b86efd96Sagiri 	rds_session_t	*sp = (rds_session_t *)arg;
1748b86efd96Sagiri 	rds_ep_t	*ep;
1749b86efd96Sagiri 	rds_bufpool_t	*spool;
1750b86efd96Sagiri 	rds_buf_t	*bp, *endp, *tmp;
1751b86efd96Sagiri 	ibt_send_wr_t	*wrp;
1752b86efd96Sagiri 	uint_t		nwr = 0, ix, jx;
1753b86efd96Sagiri 	int		ret;
1754b86efd96Sagiri 
1755b86efd96Sagiri 	RDS_DPRINTF2("rds_resend_messages", "Enter: SP(%p)", sp);
1756b86efd96Sagiri 
1757b86efd96Sagiri 	ep = &sp->session_dataep;
1758b86efd96Sagiri 
1759b86efd96Sagiri 	spool = &ep->ep_sndpool;
1760b86efd96Sagiri 	mutex_enter(&spool->pool_lock);
1761b86efd96Sagiri 
1762b86efd96Sagiri 	ASSERT(spool->pool_nfree == spool->pool_nbuffers);
1763b86efd96Sagiri 
1764*accc2981SToomas Soome 	if (ep->ep_lbufid == 0) {
1765b86efd96Sagiri 		RDS_DPRINTF2("rds_resend_messages",
1766b86efd96Sagiri 		    "SP(%p) Remote session is cleaned up ", sp);
1767b86efd96Sagiri 		/*
1768b86efd96Sagiri 		 * The remote end cleaned up its session. There may be loss
1769b86efd96Sagiri 		 * of messages. Mark all buffers as acknowledged.
1770b86efd96Sagiri 		 */
1771b86efd96Sagiri 		tmp = spool->pool_tailp;
1772b86efd96Sagiri 	} else {
1773b86efd96Sagiri 		tmp = (rds_buf_t *)ep->ep_lbufid;
1774b86efd96Sagiri 		RDS_DPRINTF2("rds_resend_messages",
1775b86efd96Sagiri 		    "SP(%p) Last successful BP(%p) ", sp, tmp);
1776b86efd96Sagiri 	}
1777b86efd96Sagiri 
1778b86efd96Sagiri 	endp = spool->pool_tailp;
1779b86efd96Sagiri 	bp = spool->pool_headp;
1780b86efd96Sagiri 	jx = 0;
1781b86efd96Sagiri 	while ((bp != NULL) && (bp != tmp)) {
1782b86efd96Sagiri 		bp->buf_state = RDS_SNDBUF_FREE;
1783b86efd96Sagiri 		jx++;
1784b86efd96Sagiri 		bp = bp->buf_nextp;
1785b86efd96Sagiri 	}
1786b86efd96Sagiri 
1787b86efd96Sagiri 	if (bp == NULL) {
1788b86efd96Sagiri 		mutex_exit(&spool->pool_lock);
1789b86efd96Sagiri 		RDS_DPRINTF2("rds_resend_messages", "Alert: lbufid(%p) is not "
1790b86efd96Sagiri 		    "found in the list", tmp);
1791b86efd96Sagiri 
1792b86efd96Sagiri 		rw_enter(&sp->session_lock, RW_WRITER);
1793b86efd96Sagiri 		if (sp->session_state == RDS_SESSION_STATE_INIT) {
1794b86efd96Sagiri 			sp->session_state = RDS_SESSION_STATE_CONNECTED;
1795b86efd96Sagiri 		} else {
1796b86efd96Sagiri 			RDS_DPRINTF2("rds_resend_messages", "SP(%p) State: %d "
1797b86efd96Sagiri 			    "Expected State: %d", sp, sp->session_state,
1798b86efd96Sagiri 			    RDS_SESSION_STATE_CONNECTED);
1799b86efd96Sagiri 		}
18008257fab9Sagiri 		sp->session_failover = 0;
1801b86efd96Sagiri 		rw_exit(&sp->session_lock);
1802b86efd96Sagiri 		return;
1803b86efd96Sagiri 	}
1804b86efd96Sagiri 
1805b86efd96Sagiri 	/* Found the match */
1806b86efd96Sagiri 	bp->buf_state = RDS_SNDBUF_FREE;
1807b86efd96Sagiri 	jx++;
1808b86efd96Sagiri 
1809b86efd96Sagiri 	spool->pool_tailp = bp;
1810b86efd96Sagiri 	bp = bp->buf_nextp;
1811b86efd96Sagiri 	spool->pool_tailp->buf_nextp = NULL;
1812b86efd96Sagiri 	nwr = spool->pool_nfree - jx;
1813b86efd96Sagiri 	spool->pool_nfree = jx;
1814b86efd96Sagiri 	mutex_exit(&spool->pool_lock);
1815b86efd96Sagiri 
1816b86efd96Sagiri 	RDS_DPRINTF2("rds_resend_messages", "SP(%p): Number of "
1817b86efd96Sagiri 	    "bufs (BP %p) to re-send: %d", sp, bp, nwr);
1818b86efd96Sagiri 
1819b86efd96Sagiri 	if (bp) {
1820b86efd96Sagiri 		wrp = (ibt_send_wr_t *)kmem_zalloc(sizeof (ibt_send_wr_t) * 100,
1821b86efd96Sagiri 		    KM_SLEEP);
1822b86efd96Sagiri 
1823b86efd96Sagiri 		while (nwr) {
1824b86efd96Sagiri 			jx = (nwr > 100) ? 100 : nwr;
1825b86efd96Sagiri 
1826b86efd96Sagiri 			tmp = bp;
1827b86efd96Sagiri 			for (ix = 0; ix < jx; ix++) {
1828b86efd96Sagiri 				bp->buf_state = RDS_SNDBUF_PENDING;
1829b86efd96Sagiri 				wrp[ix].wr_id = (uintptr_t)bp;
1830b86efd96Sagiri 				wrp[ix].wr_flags = IBT_WR_SEND_SOLICIT;
1831b86efd96Sagiri 				wrp[ix].wr_trans = IBT_RC_SRV;
1832b86efd96Sagiri 				wrp[ix].wr_opcode = IBT_WRC_SEND;
1833b86efd96Sagiri 				wrp[ix].wr_nds = 1;
1834b86efd96Sagiri 				wrp[ix].wr_sgl = &bp->buf_ds;
1835b86efd96Sagiri 				bp = bp->buf_nextp;
1836b86efd96Sagiri 			}
1837b86efd96Sagiri 
1838b86efd96Sagiri 			ret = ibt_post_send(ep->ep_chanhdl, wrp, jx, &ix);
1839b86efd96Sagiri 			if (ret != IBT_SUCCESS) {
1840b86efd96Sagiri 				RDS_DPRINTF2(LABEL, "EP(%p): ibt_post_send "
1841b86efd96Sagiri 				    "failed: %d for % pkts", ep, ret, jx);
1842b86efd96Sagiri 				break;
1843b86efd96Sagiri 			}
1844b86efd96Sagiri 
1845b86efd96Sagiri 			mutex_enter(&spool->pool_lock);
1846b86efd96Sagiri 			spool->pool_nbusy += jx;
1847b86efd96Sagiri 			mutex_exit(&spool->pool_lock);
1848b86efd96Sagiri 
1849b86efd96Sagiri 			nwr -= jx;
1850b86efd96Sagiri 		}
1851b86efd96Sagiri 
1852b86efd96Sagiri 		kmem_free(wrp, sizeof (ibt_send_wr_t) * 100);
1853b86efd96Sagiri 
1854b86efd96Sagiri 		if (nwr != 0) {
1855b86efd96Sagiri 
1856b86efd96Sagiri 			/*
1857b86efd96Sagiri 			 * An error while failover is in progress. Some WRs are
1858b86efd96Sagiri 			 * posted while other remain. If any of the posted WRs
1859b86efd96Sagiri 			 * complete in error then they would dispatch a taskq to
1860b86efd96Sagiri 			 * do a failover. Getting the session lock will prevent
1861b86efd96Sagiri 			 * the taskq to wait until we are done here.
1862b86efd96Sagiri 			 */
1863b86efd96Sagiri 			rw_enter(&sp->session_lock, RW_READER);
1864b86efd96Sagiri 
1865b86efd96Sagiri 			/*
1866b86efd96Sagiri 			 * Wait until all the previous WRs are completed and
1867b86efd96Sagiri 			 * then queue the remaining, otherwise the order of
1868b86efd96Sagiri 			 * the messages may change.
1869b86efd96Sagiri 			 */
1870b86efd96Sagiri 			(void) rds_is_sendq_empty(ep, 1);
1871b86efd96Sagiri 
1872b86efd96Sagiri 			/* free the remaining buffers */
1873b86efd96Sagiri 			rds_free_send_buf(ep, tmp, endp, nwr, B_FALSE);
1874b86efd96Sagiri 
1875b86efd96Sagiri 			rw_exit(&sp->session_lock);
1876b86efd96Sagiri 			return;
1877b86efd96Sagiri 		}
1878b86efd96Sagiri 	}
1879b86efd96Sagiri 
1880b86efd96Sagiri 	rw_enter(&sp->session_lock, RW_WRITER);
1881b86efd96Sagiri 	if (sp->session_state == RDS_SESSION_STATE_INIT) {
1882b86efd96Sagiri 		sp->session_state = RDS_SESSION_STATE_CONNECTED;
1883b86efd96Sagiri 	} else {
1884b86efd96Sagiri 		RDS_DPRINTF2("rds_resend_messages", "SP(%p) State: %d "
1885b86efd96Sagiri 		    "Expected State: %d", sp, sp->session_state,
1886b86efd96Sagiri 		    RDS_SESSION_STATE_CONNECTED);
1887b86efd96Sagiri 	}
18888257fab9Sagiri 	sp->session_failover = 0;
1889b86efd96Sagiri 	rw_exit(&sp->session_lock);
1890b86efd96Sagiri 
1891b86efd96Sagiri 	RDS_DPRINTF2("rds_resend_messages", "Return: SP(%p)", sp);
1892b86efd96Sagiri }
1893b86efd96Sagiri 
1894b86efd96Sagiri /*
1895b86efd96Sagiri  * This is called when a channel is connected. Transition the session to
1896b86efd96Sagiri  * CONNECTED state iff both channels are connected.
1897b86efd96Sagiri  */
1898b86efd96Sagiri void
rds_session_active(rds_session_t * sp)1899b86efd96Sagiri rds_session_active(rds_session_t *sp)
1900b86efd96Sagiri {
1901b86efd96Sagiri 	rds_ep_t	*ep;
1902b86efd96Sagiri 	uint_t		failover;
1903b86efd96Sagiri 
1904b86efd96Sagiri 	RDS_DPRINTF2("rds_session_active", "Enter: 0x%p", sp);
1905b86efd96Sagiri 
1906b86efd96Sagiri 	rw_enter(&sp->session_lock, RW_READER);
1907b86efd96Sagiri 
1908b86efd96Sagiri 	failover = sp->session_failover;
1909b86efd96Sagiri 
1910b86efd96Sagiri 	/*
1911b86efd96Sagiri 	 * we establish the data channel first, so check the control channel
1912b86efd96Sagiri 	 * first but make sure it is initialized.
1913b86efd96Sagiri 	 */
1914b86efd96Sagiri 	ep = &sp->session_ctrlep;
1915b86efd96Sagiri 	mutex_enter(&ep->ep_lock);
1916b86efd96Sagiri 	if (ep->ep_state != RDS_EP_STATE_CONNECTED) {
1917b86efd96Sagiri 		/* the session is not ready yet */
1918b86efd96Sagiri 		mutex_exit(&ep->ep_lock);
1919b86efd96Sagiri 		rw_exit(&sp->session_lock);
1920b86efd96Sagiri 		return;
1921b86efd96Sagiri 	}
1922b86efd96Sagiri 	mutex_exit(&ep->ep_lock);
1923b86efd96Sagiri 
1924b86efd96Sagiri 	/* control channel is connected, check the data channel */
1925b86efd96Sagiri 	ep = &sp->session_dataep;
1926b86efd96Sagiri 	mutex_enter(&ep->ep_lock);
1927b86efd96Sagiri 	if (ep->ep_state != RDS_EP_STATE_CONNECTED) {
1928b86efd96Sagiri 		/* data channel is not yet connected */
1929b86efd96Sagiri 		mutex_exit(&ep->ep_lock);
1930b86efd96Sagiri 		rw_exit(&sp->session_lock);
1931b86efd96Sagiri 		return;
1932b86efd96Sagiri 	}
1933b86efd96Sagiri 	mutex_exit(&ep->ep_lock);
1934b86efd96Sagiri 
1935b86efd96Sagiri 	if (failover) {
1936b86efd96Sagiri 		rw_exit(&sp->session_lock);
1937b86efd96Sagiri 
1938b86efd96Sagiri 		/*
1939b86efd96Sagiri 		 * The session has failed over. Previous msgs have to be
1940b86efd96Sagiri 		 * re-sent before the session is moved to the connected
1941b86efd96Sagiri 		 * state.
1942b86efd96Sagiri 		 */
1943b86efd96Sagiri 		RDS_DPRINTF2("rds_session_active", "SP(%p) Dispatching taskq "
1944b86efd96Sagiri 		    "to re-send messages", sp);
1945b86efd96Sagiri 		(void) ddi_taskq_dispatch(rds_taskq,
1946b86efd96Sagiri 		    rds_resend_messages, (void *)sp, DDI_SLEEP);
1947b86efd96Sagiri 		return;
1948b86efd96Sagiri 	}
1949b86efd96Sagiri 
1950b86efd96Sagiri 	/* the session is ready */
1951b86efd96Sagiri 	sp->session_state = RDS_SESSION_STATE_CONNECTED;
1952b86efd96Sagiri 	RDS_DPRINTF3("rds_session_active",
1953b86efd96Sagiri 	    "SP(%p) State RDS_SESSION_STATE_CONNECTED", sp);
1954b86efd96Sagiri 
1955b86efd96Sagiri 	rw_exit(&sp->session_lock);
1956b86efd96Sagiri 
1957b86efd96Sagiri 	RDS_DPRINTF2("rds_session_active", "Return: SP(%p) is CONNECTED", sp);
1958b86efd96Sagiri }
1959b86efd96Sagiri 
1960b86efd96Sagiri static int
rds_ep_sendmsg(rds_ep_t * ep,uio_t * uiop,in_port_t sendport,in_port_t recvport)1961b86efd96Sagiri rds_ep_sendmsg(rds_ep_t *ep, uio_t *uiop, in_port_t sendport,
1962b86efd96Sagiri     in_port_t recvport)
1963b86efd96Sagiri {
1964b86efd96Sagiri 	int	ret;
1965b86efd96Sagiri 
1966b86efd96Sagiri 	RDS_DPRINTF4("rds_ep_sendmsg", "Enter: EP(%p) sendport: %d recvport: "
1967b86efd96Sagiri 	    "%d", ep, sendport, recvport);
1968b86efd96Sagiri 
19698257fab9Sagiri 	/* make sure the remote port is not stalled */
19708257fab9Sagiri 	if (rds_is_port_marked(ep->ep_sp, recvport, RDS_REMOTE)) {
1971b86efd96Sagiri 		RDS_DPRINTF2(LABEL, "SP(%p) Port:%d is in stall state",
1972b86efd96Sagiri 		    ep->ep_sp, recvport);
1973b86efd96Sagiri 		RDS_INCR_EWOULDBLOCK();
1974b86efd96Sagiri 		ret = ENOMEM;
1975b86efd96Sagiri 	} else {
1976b86efd96Sagiri 		ret = rds_build_n_post_msg(ep, uiop, sendport, recvport);
1977b86efd96Sagiri 	}
1978b86efd96Sagiri 
1979b86efd96Sagiri 	RDS_DPRINTF4("rds_ep_sendmsg", "Return: EP(%p)", ep);
1980b86efd96Sagiri 
1981b86efd96Sagiri 	return (ret);
1982b86efd96Sagiri }
1983b86efd96Sagiri 
1984b86efd96Sagiri /* Send a message to a destination socket */
1985b86efd96Sagiri int
rds_sendmsg(uio_t * uiop,ipaddr_t sendip,ipaddr_t recvip,in_port_t sendport,in_port_t recvport,zoneid_t zoneid)1986b86efd96Sagiri rds_sendmsg(uio_t *uiop, ipaddr_t sendip, ipaddr_t recvip, in_port_t sendport,
1987b86efd96Sagiri     in_port_t recvport, zoneid_t zoneid)
1988b86efd96Sagiri {
1989b86efd96Sagiri 	rds_session_t	*sp;
1990b86efd96Sagiri 	ib_gid_t	lgid, rgid;
1991b86efd96Sagiri 	int		ret;
1992b86efd96Sagiri 
1993b86efd96Sagiri 	RDS_DPRINTF4("rds_sendmsg", "Enter: uiop: 0x%p, srcIP: 0x%x destIP: "
1994b86efd96Sagiri 	    "0x%x sndport: %d recvport: %d", uiop, sendip, recvip,
1995b86efd96Sagiri 	    sendport, recvport);
1996b86efd96Sagiri 
1997b86efd96Sagiri 	/* If msg length is 0, just return success */
1998b86efd96Sagiri 	if (uiop->uio_resid == 0) {
1999b86efd96Sagiri 		RDS_DPRINTF2("rds_sendmsg", "Zero sized message");
2000b86efd96Sagiri 		return (0);
2001b86efd96Sagiri 	}
2002b86efd96Sagiri 
2003b86efd96Sagiri 	/* Is there a session to the destination? */
2004b86efd96Sagiri 	rw_enter(&rdsib_statep->rds_sessionlock, RW_READER);
2005b86efd96Sagiri 	sp = rds_session_lkup(rdsib_statep, recvip, 0);
2006b86efd96Sagiri 	rw_exit(&rdsib_statep->rds_sessionlock);
2007b86efd96Sagiri 
2008b86efd96Sagiri 	/* Is this a loopback message? */
2009b86efd96Sagiri 	if ((sp == NULL) && (rds_islocal(recvip))) {
2010b86efd96Sagiri 		/* make sure the port is not stalled */
20118257fab9Sagiri 		if (rds_is_port_marked(NULL, recvport, RDS_LOOPBACK)) {
2012b86efd96Sagiri 			RDS_DPRINTF2(LABEL, "Local Port:%d is in stall state",
2013b86efd96Sagiri 			    recvport);
2014b86efd96Sagiri 			RDS_INCR_EWOULDBLOCK();
2015b86efd96Sagiri 			return (ENOMEM);
2016b86efd96Sagiri 		}
2017b86efd96Sagiri 		ret = rds_deliver_loopback_msg(uiop, recvip, sendip, recvport,
2018b86efd96Sagiri 		    sendport, zoneid);
2019b86efd96Sagiri 		return (ret);
2020b86efd96Sagiri 	}
2021b86efd96Sagiri 
2022b86efd96Sagiri 	/* Not a loopback message */
2023b86efd96Sagiri 	if (sp == NULL) {
2024b86efd96Sagiri 		/* There is no session to the destination, create one. */
2025b86efd96Sagiri 		RDS_DPRINTF3(LABEL, "There is no session to the destination "
2026b86efd96Sagiri 		    "IP: 0x%x", recvip);
2027b86efd96Sagiri 		sp = rds_session_create(rdsib_statep, sendip, recvip, NULL,
2028b86efd96Sagiri 		    RDS_SESSION_ACTIVE);
2029b86efd96Sagiri 		if (sp != NULL) {
2030b86efd96Sagiri 			rw_enter(&sp->session_lock, RW_WRITER);
2031b86efd96Sagiri 			if (sp->session_type == RDS_SESSION_ACTIVE) {
2032b86efd96Sagiri 				ret = rds_session_init(sp);
2033b86efd96Sagiri 				if (ret != 0) {
2034b86efd96Sagiri 					RDS_DPRINTF2("rds_sendmsg",
2035b86efd96Sagiri 					    "SP(%p): rds_session_init failed",
2036b86efd96Sagiri 					    sp);
2037b86efd96Sagiri 					sp->session_state =
2038b86efd96Sagiri 					    RDS_SESSION_STATE_FAILED;
2039b86efd96Sagiri 					RDS_DPRINTF3("rds_sendmsg",
2040b86efd96Sagiri 					    "SP(%p) State "
2041b86efd96Sagiri 					    "RDS_SESSION_STATE_FAILED", sp);
2042b86efd96Sagiri 					rw_exit(&sp->session_lock);
2043b86efd96Sagiri 					return (EFAULT);
2044b86efd96Sagiri 				}
2045b86efd96Sagiri 				sp->session_state = RDS_SESSION_STATE_INIT;
2046b86efd96Sagiri 				RDS_DPRINTF3("rds_sendmsg",
2047b86efd96Sagiri 				    "SP(%p) State "
2048b86efd96Sagiri 				    "RDS_SESSION_STATE_INIT", sp);
2049b86efd96Sagiri 				rw_exit(&sp->session_lock);
2050b86efd96Sagiri 				rds_session_open(sp);
2051b86efd96Sagiri 			} else {
2052b86efd96Sagiri 				rw_exit(&sp->session_lock);
2053b86efd96Sagiri 			}
2054b86efd96Sagiri 		} else {
2055b86efd96Sagiri 			/* Is a session created for this destination */
2056b86efd96Sagiri 			rw_enter(&rdsib_statep->rds_sessionlock, RW_READER);
2057b86efd96Sagiri 			sp = rds_session_lkup(rdsib_statep, recvip, 0);
2058b86efd96Sagiri 			rw_exit(&rdsib_statep->rds_sessionlock);
2059b86efd96Sagiri 			if (sp == NULL) {
2060b86efd96Sagiri 				return (EFAULT);
2061b86efd96Sagiri 			}
2062b86efd96Sagiri 		}
2063b86efd96Sagiri 	}
2064b86efd96Sagiri 
2065b86efd96Sagiri 	/* There is a session to the destination */
2066b86efd96Sagiri 	rw_enter(&sp->session_lock, RW_READER);
2067b86efd96Sagiri 	if (sp->session_state == RDS_SESSION_STATE_CONNECTED) {
2068b86efd96Sagiri 		rw_exit(&sp->session_lock);
2069b86efd96Sagiri 
2070b86efd96Sagiri 		ret = rds_ep_sendmsg(&sp->session_dataep, uiop, sendport,
2071b86efd96Sagiri 		    recvport);
2072b86efd96Sagiri 		return (ret);
2073b86efd96Sagiri 	} else if ((sp->session_state == RDS_SESSION_STATE_FAILED) ||
2074b86efd96Sagiri 	    (sp->session_state == RDS_SESSION_STATE_FINI)) {
2075b86efd96Sagiri 		ipaddr_t sendip1, recvip1;
2076b86efd96Sagiri 
2077b86efd96Sagiri 		RDS_DPRINTF3("rds_sendmsg", "SP(%p) is not connected, State: "
2078d99cb22fSagiri 		    "%d", sp, sp->session_state);
2079b86efd96Sagiri 		rw_exit(&sp->session_lock);
2080b86efd96Sagiri 		rw_enter(&sp->session_lock, RW_WRITER);
2081b86efd96Sagiri 		if ((sp->session_state == RDS_SESSION_STATE_FAILED) ||
2082b86efd96Sagiri 		    (sp->session_state == RDS_SESSION_STATE_FINI)) {
2083015f8fffShiremath 			ibt_ip_path_attr_t	ipattr;
2084015f8fffShiremath 			ibt_ip_addr_t		dstip;
2085015f8fffShiremath 
2086b86efd96Sagiri 			sp->session_state = RDS_SESSION_STATE_CREATED;
2087b86efd96Sagiri 			sp->session_type = RDS_SESSION_ACTIVE;
2088b86efd96Sagiri 			RDS_DPRINTF3("rds_sendmsg", "SP(%p) State "
2089b86efd96Sagiri 			    "RDS_SESSION_STATE_CREATED", sp);
2090b86efd96Sagiri 			rw_exit(&sp->session_lock);
2091b86efd96Sagiri 
2092b86efd96Sagiri 
2093b86efd96Sagiri 			/* The ipaddr should be in the network order */
2094b86efd96Sagiri 			sendip1 = sendip;
2095b86efd96Sagiri 			recvip1 = recvip;
2096b86efd96Sagiri 			ret = rds_sc_path_lookup(&sendip1, &recvip1);
2097b86efd96Sagiri 			if (ret == 0) {
2098b86efd96Sagiri 				RDS_DPRINTF2(LABEL, "Path not found "
2099b86efd96Sagiri 				    "(0x%x 0x%x)", sendip1, recvip1);
2100b86efd96Sagiri 			}
2101b86efd96Sagiri 
2102b86efd96Sagiri 			/* Resolve the IP addresses */
21038257fab9Sagiri 			lgid.gid_prefix = 0;
21048257fab9Sagiri 			lgid.gid_guid = 0;
21058257fab9Sagiri 			rgid.gid_prefix = 0;
21068257fab9Sagiri 			rgid.gid_guid = 0;
2107015f8fffShiremath 
2108015f8fffShiremath 			bzero(&ipattr, sizeof (ibt_ip_path_attr_t));
2109015f8fffShiremath 			dstip.family = AF_INET;
2110d22e11ebSBill Taylor 			dstip.un.ip4addr = recvip1;
2111015f8fffShiremath 			ipattr.ipa_dst_ip = &dstip;
2112015f8fffShiremath 			ipattr.ipa_src_ip.family = AF_INET;
2113d22e11ebSBill Taylor 			ipattr.ipa_src_ip.un.ip4addr = sendip1;
2114015f8fffShiremath 			ipattr.ipa_ndst = 1;
2115015f8fffShiremath 			ipattr.ipa_max_paths = 1;
2116015f8fffShiremath 			RDS_DPRINTF2(LABEL, "ibt_get_ip_paths: 0x%x <-> 0x%x ",
2117015f8fffShiremath 			    sendip1, recvip1);
2118015f8fffShiremath 			ret = ibt_get_ip_paths(rdsib_statep->rds_ibhdl,
2119015f8fffShiremath 			    IBT_PATH_NO_FLAGS, &ipattr, &sp->session_pinfo,
2120015f8fffShiremath 			    NULL, NULL);
2121015f8fffShiremath 			if (ret != IBT_SUCCESS) {
212274242422Sagiri 				RDS_DPRINTF2("rds_sendmsg",
2123015f8fffShiremath 				    "ibt_get_ip_paths failed, ret: %d ", ret);
21248257fab9Sagiri 
2125b86efd96Sagiri 				rw_enter(&sp->session_lock, RW_WRITER);
2126b86efd96Sagiri 				if (sp->session_type == RDS_SESSION_ACTIVE) {
2127b86efd96Sagiri 					sp->session_state =
2128b86efd96Sagiri 					    RDS_SESSION_STATE_FAILED;
2129b86efd96Sagiri 					RDS_DPRINTF3("rds_sendmsg",
2130b86efd96Sagiri 					    "SP(%p) State "
2131b86efd96Sagiri 					    "RDS_SESSION_STATE_FAILED", sp);
2132b86efd96Sagiri 					rw_exit(&sp->session_lock);
2133b86efd96Sagiri 					return (EFAULT);
2134b86efd96Sagiri 				} else {
2135b86efd96Sagiri 					rw_exit(&sp->session_lock);
2136b86efd96Sagiri 					return (ENOMEM);
2137b86efd96Sagiri 				}
2138b86efd96Sagiri 			}
2139015f8fffShiremath 			RDS_DPRINTF2(LABEL, "ibt_get_ip_paths success");
2140015f8fffShiremath 			lgid = sp->session_pinfo.
2141015f8fffShiremath 			    pi_prim_cep_path.cep_adds_vect.av_sgid;
2142015f8fffShiremath 			rgid = sp->session_pinfo.
2143015f8fffShiremath 			    pi_prim_cep_path.cep_adds_vect.av_dgid;
2144b86efd96Sagiri 
2145b86efd96Sagiri 			RDS_DPRINTF2(LABEL, "lgid: %llx:%llx rgid: %llx:%llx",
2146b86efd96Sagiri 			    lgid.gid_prefix, lgid.gid_guid, rgid.gid_prefix,
2147b86efd96Sagiri 			    rgid.gid_guid);
2148b86efd96Sagiri 
2149b86efd96Sagiri 			rw_enter(&sp->session_lock, RW_WRITER);
2150b86efd96Sagiri 			if (sp->session_type == RDS_SESSION_ACTIVE) {
2151b86efd96Sagiri 				sp->session_lgid = lgid;
2152b86efd96Sagiri 				sp->session_rgid = rgid;
2153b86efd96Sagiri 				ret = rds_session_init(sp);
2154b86efd96Sagiri 				if (ret != 0) {
2155b86efd96Sagiri 					RDS_DPRINTF2("rds_sendmsg",
2156b86efd96Sagiri 					    "SP(%p): rds_session_init failed",
2157b86efd96Sagiri 					    sp);
2158b86efd96Sagiri 					sp->session_state =
2159b86efd96Sagiri 					    RDS_SESSION_STATE_FAILED;
2160b86efd96Sagiri 					RDS_DPRINTF3("rds_sendmsg",
2161b86efd96Sagiri 					    "SP(%p) State "
2162b86efd96Sagiri 					    "RDS_SESSION_STATE_FAILED", sp);
2163b86efd96Sagiri 					rw_exit(&sp->session_lock);
2164b86efd96Sagiri 					return (EFAULT);
2165b86efd96Sagiri 				}
2166b86efd96Sagiri 				sp->session_state = RDS_SESSION_STATE_INIT;
2167b86efd96Sagiri 				rw_exit(&sp->session_lock);
2168b86efd96Sagiri 
2169b86efd96Sagiri 				rds_session_open(sp);
2170b86efd96Sagiri 
2171b86efd96Sagiri 			} else {
21728257fab9Sagiri 				RDS_DPRINTF2("rds_sendmsg",
21738257fab9Sagiri 				    "SP(%p): type changed to %d",
21748257fab9Sagiri 				    sp, sp->session_type);
2175b86efd96Sagiri 				rw_exit(&sp->session_lock);
2176b86efd96Sagiri 				return (ENOMEM);
2177b86efd96Sagiri 			}
2178b86efd96Sagiri 		} else {
21798257fab9Sagiri 			RDS_DPRINTF2("rds_sendmsg",
21808257fab9Sagiri 			    "SP(%p): Session state %d changed",
2181b86efd96Sagiri 			    sp, sp->session_state);
2182b86efd96Sagiri 			rw_exit(&sp->session_lock);
2183b86efd96Sagiri 			return (ENOMEM);
2184b86efd96Sagiri 		}
2185b86efd96Sagiri 	} else {
218600a3eaf3SRamaswamy Tummala 		RDS_DPRINTF4("rds_sendmsg", "SP(%p): Session is in %d state",
2187b86efd96Sagiri 		    sp, sp->session_state);
2188b86efd96Sagiri 		rw_exit(&sp->session_lock);
2189b86efd96Sagiri 		return (ENOMEM);
2190b86efd96Sagiri 	}
2191b86efd96Sagiri 
2192b86efd96Sagiri 	rw_enter(&sp->session_lock, RW_READER);
2193b86efd96Sagiri 	if (sp->session_state == RDS_SESSION_STATE_CONNECTED) {
2194b86efd96Sagiri 		rw_exit(&sp->session_lock);
2195b86efd96Sagiri 
2196b86efd96Sagiri 		ret = rds_ep_sendmsg(&sp->session_dataep, uiop, sendport,
2197b86efd96Sagiri 		    recvport);
2198b86efd96Sagiri 	} else {
21998257fab9Sagiri 		RDS_DPRINTF2("rds_sendmsg", "SP(%p): state(%d) not connected",
2200b86efd96Sagiri 		    sp, sp->session_state);
2201b86efd96Sagiri 		rw_exit(&sp->session_lock);
2202b86efd96Sagiri 	}
2203b86efd96Sagiri 
2204b86efd96Sagiri 	RDS_DPRINTF4("rds_sendmsg", "Return: SP(%p) ret: %d", sp, ret);
2205b86efd96Sagiri 
2206b86efd96Sagiri 	return (ret);
2207b86efd96Sagiri }
2208b86efd96Sagiri 
2209b86efd96Sagiri /* Note: This is called on the CQ handler thread */
2210b86efd96Sagiri void
rds_received_msg(rds_ep_t * ep,rds_buf_t * bp)2211b86efd96Sagiri rds_received_msg(rds_ep_t *ep, rds_buf_t *bp)
2212b86efd96Sagiri {
2213b86efd96Sagiri 	mblk_t		*mp, *mp1;
2214b86efd96Sagiri 	rds_data_hdr_t	*pktp, *pktp1;
2215b86efd96Sagiri 	uint8_t		*datap;
2216b86efd96Sagiri 	rds_buf_t	*bp1;
2217b86efd96Sagiri 	rds_bufpool_t	*rpool;
2218b86efd96Sagiri 	uint_t		npkts, ix;
2219b86efd96Sagiri 	int		ret;
2220b86efd96Sagiri 
2221b86efd96Sagiri 	RDS_DPRINTF4("rds_received_msg", "Enter: EP(%p)", ep);
2222b86efd96Sagiri 
2223b86efd96Sagiri 	pktp = (rds_data_hdr_t *)(uintptr_t)bp->buf_ds.ds_va;
2224b86efd96Sagiri 	datap = ((uint8_t *)(uintptr_t)bp->buf_ds.ds_va) + RDS_DATA_HDR_SZ;
2225b86efd96Sagiri 	npkts = pktp->dh_npkts;
2226b86efd96Sagiri 
2227b86efd96Sagiri 	/* increment rx pending here */
2228b86efd96Sagiri 	rpool = &ep->ep_rcvpool;
2229b86efd96Sagiri 	mutex_enter(&rpool->pool_lock);
2230b86efd96Sagiri 	rpool->pool_nbusy += npkts;
2231b86efd96Sagiri 	mutex_exit(&rpool->pool_lock);
2232b86efd96Sagiri 
2233b86efd96Sagiri 	/* this will get freed by sockfs */
2234b86efd96Sagiri 	mp = esballoc(datap, pktp->dh_datalen, BPRI_HI, &bp->buf_frtn);
2235b86efd96Sagiri 	if (mp == NULL) {
2236b86efd96Sagiri 		RDS_DPRINTF2(LABEL, "EP(%p) BP(%p): allocb failed",
2237b86efd96Sagiri 		    ep, bp);
2238b86efd96Sagiri 		rds_free_recv_buf(bp, npkts);
2239b86efd96Sagiri 		return;
2240b86efd96Sagiri 	}
2241b86efd96Sagiri 	mp->b_wptr = datap + pktp->dh_datalen;
2242b86efd96Sagiri 	mp->b_datap->db_type = M_DATA;
2243b86efd96Sagiri 
2244b86efd96Sagiri 	mp1 = mp;
2245b86efd96Sagiri 	bp1 = bp->buf_nextp;
2246b86efd96Sagiri 	while (bp1 != NULL) {
2247b86efd96Sagiri 		pktp1 = (rds_data_hdr_t *)(uintptr_t)bp1->buf_ds.ds_va;
2248b86efd96Sagiri 		datap = ((uint8_t *)(uintptr_t)bp1->buf_ds.ds_va) +
2249b86efd96Sagiri 		    RDS_DATA_HDR_SZ;
2250b86efd96Sagiri 
2251b86efd96Sagiri 		mp1->b_cont = esballoc(datap, pktp1->dh_datalen,
2252b86efd96Sagiri 		    BPRI_HI, &bp1->buf_frtn);
2253b86efd96Sagiri 		if (mp1->b_cont == NULL) {
2254b86efd96Sagiri 			RDS_DPRINTF2(LABEL, "EP(%p) BP(%p): allocb failed",
2255b86efd96Sagiri 			    ep, bp1);
2256b86efd96Sagiri 			freemsg(mp);
2257b86efd96Sagiri 			rds_free_recv_buf(bp1, pktp1->dh_npkts);
2258b86efd96Sagiri 			return;
2259b86efd96Sagiri 		}
2260b86efd96Sagiri 		mp1 = mp1->b_cont;
2261b86efd96Sagiri 		mp1->b_wptr = datap + pktp1->dh_datalen;
2262b86efd96Sagiri 		mp1->b_datap->db_type = M_DATA;
2263b86efd96Sagiri 
2264b86efd96Sagiri 		bp1 = bp1->buf_nextp;
2265b86efd96Sagiri 	}
2266b86efd96Sagiri 
2267b86efd96Sagiri 	RDS_INCR_RXPKTS_PEND(npkts);
2268b86efd96Sagiri 	RDS_INCR_RXPKTS(npkts);
2269b86efd96Sagiri 	RDS_INCR_RXBYTES(msgdsize(mp));
2270b86efd96Sagiri 
2271b86efd96Sagiri 	RDS_DPRINTF5(LABEL, "Deliver Message: sendIP: 0x%x recvIP: 0x%x "
2272b86efd96Sagiri 	    "sendport: %d recvport: %d npkts: %d pktno: %d", ep->ep_remip,
2273b86efd96Sagiri 	    ep->ep_myip, pktp->dh_sendport, pktp->dh_recvport,
2274b86efd96Sagiri 	    npkts, pktp->dh_psn);
2275b86efd96Sagiri 
2276b86efd96Sagiri 	/* store the last buffer id, no lock needed */
2277b86efd96Sagiri 	if (npkts > 1) {
2278b86efd96Sagiri 		ep->ep_rbufid = pktp1->dh_bufid;
2279b86efd96Sagiri 	} else {
2280b86efd96Sagiri 		ep->ep_rbufid = pktp->dh_bufid;
2281b86efd96Sagiri 	}
2282b86efd96Sagiri 
2283b86efd96Sagiri 	ret = rds_deliver_new_msg(mp, ep->ep_myip, ep->ep_remip,
2284b86efd96Sagiri 	    pktp->dh_recvport, pktp->dh_sendport, ALL_ZONES);
2285b86efd96Sagiri 	if (ret != 0) {
2286b86efd96Sagiri 		if (ret == ENOSPC) {
2287b86efd96Sagiri 			/*
2288b86efd96Sagiri 			 * The message is delivered but cannot take more,
22898257fab9Sagiri 			 * stop further remote messages coming to this port
2290b86efd96Sagiri 			 */
22918257fab9Sagiri 			RDS_DPRINTF3("rds_received_msg", "Port %d NO SPACE",
2292b86efd96Sagiri 			    pktp->dh_recvport);
22938257fab9Sagiri 			rds_stall_port(ep->ep_sp, pktp->dh_recvport, RDS_LOCAL);
2294b86efd96Sagiri 		} else {
229574242422Sagiri 			RDS_DPRINTF2(LABEL, "rds_deliver_new_msg returned: %d",
2296b86efd96Sagiri 			    ret);
2297b86efd96Sagiri 		}
2298b86efd96Sagiri 	}
2299b86efd96Sagiri 
2300b86efd96Sagiri 	mutex_enter(&ep->ep_lock);
2301d99cb22fSagiri 	/* The first message can come in before the conn est event */
2302d99cb22fSagiri 	if ((ep->ep_rdmacnt == 0) && (ep->ep_state == RDS_EP_STATE_CONNECTED)) {
2303b86efd96Sagiri 		ep->ep_rdmacnt++;
2304b86efd96Sagiri 		*(uintptr_t *)(uintptr_t)ep->ep_ackds.ds_va = ep->ep_rbufid;
2305b86efd96Sagiri 		mutex_exit(&ep->ep_lock);
2306b86efd96Sagiri 
2307b86efd96Sagiri 		/* send acknowledgement */
2308b86efd96Sagiri 		RDS_INCR_TXACKS();
2309b86efd96Sagiri 		ret = ibt_post_send(ep->ep_chanhdl, &ep->ep_ackwr, 1, &ix);
2310b86efd96Sagiri 		if (ret != IBT_SUCCESS) {
231174242422Sagiri 			RDS_DPRINTF2(LABEL, "EP(%p): ibt_post_send for "
2312b86efd96Sagiri 			    "acknowledgement failed: %d, SQ depth: %d",
2313b86efd96Sagiri 			    ep, ret, ep->ep_sndpool.pool_nbusy);
2314b86efd96Sagiri 			mutex_enter(&ep->ep_lock);
2315b86efd96Sagiri 			ep->ep_rdmacnt--;
2316b86efd96Sagiri 			mutex_exit(&ep->ep_lock);
2317b86efd96Sagiri 		}
2318b86efd96Sagiri 	} else {
2319b86efd96Sagiri 		/* no room to send acknowledgement */
2320b86efd96Sagiri 		mutex_exit(&ep->ep_lock);
2321b86efd96Sagiri 	}
2322b86efd96Sagiri 
2323b86efd96Sagiri 	RDS_DPRINTF4("rds_received_msg", "Return: EP(%p)", ep);
2324b86efd96Sagiri }
2325