1*b494511aSVenki Rajagopalan /*
2*b494511aSVenki Rajagopalan  * CDDL HEADER START
3*b494511aSVenki Rajagopalan  *
4*b494511aSVenki Rajagopalan  * The contents of this file are subject to the terms of the
5*b494511aSVenki Rajagopalan  * Common Development and Distribution License (the "License").
6*b494511aSVenki Rajagopalan  * You may not use this file except in compliance with the License.
7*b494511aSVenki Rajagopalan  *
8*b494511aSVenki Rajagopalan  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9*b494511aSVenki Rajagopalan  * or http://www.opensolaris.org/os/licensing.
10*b494511aSVenki Rajagopalan  * See the License for the specific language governing permissions
11*b494511aSVenki Rajagopalan  * and limitations under the License.
12*b494511aSVenki Rajagopalan  *
13*b494511aSVenki Rajagopalan  * When distributing Covered Code, include this CDDL HEADER in each
14*b494511aSVenki Rajagopalan  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15*b494511aSVenki Rajagopalan  * If applicable, add the following below this CDDL HEADER, with the
16*b494511aSVenki Rajagopalan  * fields enclosed by brackets "[]" replaced with your own identifying
17*b494511aSVenki Rajagopalan  * information: Portions Copyright [yyyy] [name of copyright owner]
18*b494511aSVenki Rajagopalan  *
19*b494511aSVenki Rajagopalan  * CDDL HEADER END
20*b494511aSVenki Rajagopalan  */
21*b494511aSVenki Rajagopalan 
22*b494511aSVenki Rajagopalan /*
23*b494511aSVenki Rajagopalan  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
24*b494511aSVenki Rajagopalan  */
25*b494511aSVenki Rajagopalan 
26*b494511aSVenki Rajagopalan #include <sys/types.h>
27*b494511aSVenki Rajagopalan #include <sys/kmem.h>
28*b494511aSVenki Rajagopalan #include <sys/conf.h>
29*b494511aSVenki Rajagopalan #include <sys/ddi.h>
30*b494511aSVenki Rajagopalan #include <sys/sunddi.h>
31*b494511aSVenki Rajagopalan #include <sys/ksynch.h>
32*b494511aSVenki Rajagopalan #include <sys/dlpi.h>			/* HCKSUM_INET_FULL_V4 */
33*b494511aSVenki Rajagopalan #include <sys/pattr.h>			/* HCK_FULLCKSUM */
34*b494511aSVenki Rajagopalan #include <sys/ib/mgt/sm_attr.h>		/* SM_INIT_TYPE_REPLY_... */
35*b494511aSVenki Rajagopalan 
36*b494511aSVenki Rajagopalan #include <sys/ib/clients/eoib/eib_impl.h>
37*b494511aSVenki Rajagopalan 
38*b494511aSVenki Rajagopalan /*
39*b494511aSVenki Rajagopalan  * Declarations private to this file
40*b494511aSVenki Rajagopalan  */
41*b494511aSVenki Rajagopalan static void eib_ibt_reset_partitions(eib_t *);
42*b494511aSVenki Rajagopalan static void eib_ibt_wakeup_sqd_waiters(eib_t *, ibt_channel_hdl_t);
43*b494511aSVenki Rajagopalan static int eib_ibt_chan_pkey(eib_t *, eib_chan_t *, ib_pkey_t, boolean_t,
44*b494511aSVenki Rajagopalan     boolean_t *);
45*b494511aSVenki Rajagopalan static boolean_t eib_ibt_has_chan_pkey_changed(eib_t *, eib_chan_t *);
46*b494511aSVenki Rajagopalan static boolean_t eib_ibt_has_any_pkey_changed(eib_t *);
47*b494511aSVenki Rajagopalan static int eib_ibt_fill_avect(eib_t *, eib_avect_t *, ib_lid_t);
48*b494511aSVenki Rajagopalan static void eib_ibt_record_srate(eib_t *);
49*b494511aSVenki Rajagopalan 
50*b494511aSVenki Rajagopalan /*
51*b494511aSVenki Rajagopalan  * Definitions private to this file
52*b494511aSVenki Rajagopalan  */
53*b494511aSVenki Rajagopalan 
54*b494511aSVenki Rajagopalan /*
55*b494511aSVenki Rajagopalan  * SM's init type reply flags
56*b494511aSVenki Rajagopalan  */
57*b494511aSVenki Rajagopalan #define	EIB_PORT_ATTR_LOADED(itr)				\
58*b494511aSVenki Rajagopalan 	(((itr) & SM_INIT_TYPE_REPLY_NO_LOAD_REPLY) == 0)
59*b494511aSVenki Rajagopalan #define	EIB_PORT_ATTR_NOT_PRESERVED(itr)			\
60*b494511aSVenki Rajagopalan 	(((itr) & SM_INIT_TYPE_PRESERVE_CONTENT_REPLY) == 0)
61*b494511aSVenki Rajagopalan #define	EIB_PORT_PRES_NOT_PRESERVED(itr)			\
62*b494511aSVenki Rajagopalan 	(((itr) & SM_INIT_TYPE_PRESERVE_PRESENCE_REPLY) == 0)
63*b494511aSVenki Rajagopalan 
64*b494511aSVenki Rajagopalan /*
65*b494511aSVenki Rajagopalan  * eib_ibt_hca_init() initialization progress flags
66*b494511aSVenki Rajagopalan  */
67*b494511aSVenki Rajagopalan #define	EIB_HCAINIT_HCA_OPENED		0x01
68*b494511aSVenki Rajagopalan #define	EIB_HCAINIT_ATTRS_ALLOCD	0x02
69*b494511aSVenki Rajagopalan #define	EIB_HCAINIT_HCA_PORTS_QUERIED	0x04
70*b494511aSVenki Rajagopalan #define	EIB_HCAINIT_PD_ALLOCD		0x08
71*b494511aSVenki Rajagopalan #define	EIB_HCAINIT_CAPAB_RECORDED	0x10
72*b494511aSVenki Rajagopalan 
73*b494511aSVenki Rajagopalan int
eib_ibt_hca_init(eib_t * ss)74*b494511aSVenki Rajagopalan eib_ibt_hca_init(eib_t *ss)
75*b494511aSVenki Rajagopalan {
76*b494511aSVenki Rajagopalan 	ibt_status_t ret;
77*b494511aSVenki Rajagopalan 	ibt_hca_portinfo_t *pi;
78*b494511aSVenki Rajagopalan 	uint_t num_pi;
79*b494511aSVenki Rajagopalan 	uint_t sz_pi;
80*b494511aSVenki Rajagopalan 	uint_t progress = 0;
81*b494511aSVenki Rajagopalan 
82*b494511aSVenki Rajagopalan 	if (ss->ei_hca_hdl)
83*b494511aSVenki Rajagopalan 		return (EIB_E_SUCCESS);
84*b494511aSVenki Rajagopalan 
85*b494511aSVenki Rajagopalan 	/*
86*b494511aSVenki Rajagopalan 	 * Open the HCA
87*b494511aSVenki Rajagopalan 	 */
88*b494511aSVenki Rajagopalan 	ret = ibt_open_hca(ss->ei_ibt_hdl, ss->ei_props->ep_hca_guid,
89*b494511aSVenki Rajagopalan 	    &ss->ei_hca_hdl);
90*b494511aSVenki Rajagopalan 	if (ret != IBT_SUCCESS) {
91*b494511aSVenki Rajagopalan 		EIB_DPRINTF_ERR(ss->ei_instance,
92*b494511aSVenki Rajagopalan 		    "ibt_open_hca(hca_guid=0x%llx) "
93*b494511aSVenki Rajagopalan 		    "failed, ret=%d", ss->ei_props->ep_hca_guid, ret);
94*b494511aSVenki Rajagopalan 		goto ibt_hca_init_fail;
95*b494511aSVenki Rajagopalan 	}
96*b494511aSVenki Rajagopalan 	progress |= EIB_HCAINIT_HCA_OPENED;
97*b494511aSVenki Rajagopalan 
98*b494511aSVenki Rajagopalan 	/*
99*b494511aSVenki Rajagopalan 	 * Query and store HCA attributes
100*b494511aSVenki Rajagopalan 	 */
101*b494511aSVenki Rajagopalan 	ss->ei_hca_attrs = kmem_zalloc(sizeof (ibt_hca_attr_t), KM_SLEEP);
102*b494511aSVenki Rajagopalan 	progress |= EIB_HCAINIT_ATTRS_ALLOCD;
103*b494511aSVenki Rajagopalan 
104*b494511aSVenki Rajagopalan 	ret = ibt_query_hca(ss->ei_hca_hdl, ss->ei_hca_attrs);
105*b494511aSVenki Rajagopalan 	if (ret != IBT_SUCCESS) {
106*b494511aSVenki Rajagopalan 		EIB_DPRINTF_ERR(ss->ei_instance,
107*b494511aSVenki Rajagopalan 		    "ibt_query_hca(hca_hdl=0x%llx, "
108*b494511aSVenki Rajagopalan 		    "hca_guid=0x%llx) failed, ret=%d",
109*b494511aSVenki Rajagopalan 		    ss->ei_hca_hdl, ss->ei_props->ep_hca_guid, ret);
110*b494511aSVenki Rajagopalan 		goto ibt_hca_init_fail;
111*b494511aSVenki Rajagopalan 	}
112*b494511aSVenki Rajagopalan 
113*b494511aSVenki Rajagopalan 	/*
114*b494511aSVenki Rajagopalan 	 * At this point, we don't even care about the linkstate, we only want
115*b494511aSVenki Rajagopalan 	 * to record our invariant base port guid and mtu
116*b494511aSVenki Rajagopalan 	 */
117*b494511aSVenki Rajagopalan 	ret = ibt_query_hca_ports(ss->ei_hca_hdl, ss->ei_props->ep_port_num,
118*b494511aSVenki Rajagopalan 	    &pi, &num_pi, &sz_pi);
119*b494511aSVenki Rajagopalan 	if (ret != IBT_SUCCESS) {
120*b494511aSVenki Rajagopalan 		EIB_DPRINTF_ERR(ss->ei_instance,
121*b494511aSVenki Rajagopalan 		    "ibt_query_hca_ports(hca_hdl=0x%llx, "
122*b494511aSVenki Rajagopalan 		    "port=0x%x) failed, ret=%d", ss->ei_hca_hdl,
123*b494511aSVenki Rajagopalan 		    ss->ei_props->ep_port_num, ret);
124*b494511aSVenki Rajagopalan 		goto ibt_hca_init_fail;
125*b494511aSVenki Rajagopalan 	}
126*b494511aSVenki Rajagopalan 	if (num_pi != 1) {
127*b494511aSVenki Rajagopalan 		EIB_DPRINTF_ERR(ss->ei_instance,
128*b494511aSVenki Rajagopalan 		    "ibt_query_hca_ports(hca_hdl=0x%llx, "
129*b494511aSVenki Rajagopalan 		    "port=0x%x) returned num_pi=%d", ss->ei_hca_hdl,
130*b494511aSVenki Rajagopalan 		    ss->ei_props->ep_port_num, num_pi);
131*b494511aSVenki Rajagopalan 		ibt_free_portinfo(pi, sz_pi);
132*b494511aSVenki Rajagopalan 		goto ibt_hca_init_fail;
133*b494511aSVenki Rajagopalan 	}
134*b494511aSVenki Rajagopalan 
135*b494511aSVenki Rajagopalan 	ss->ei_props->ep_sgid = pi->p_sgid_tbl[0];
136*b494511aSVenki Rajagopalan 	ss->ei_props->ep_mtu = (128 << pi->p_mtu);
137*b494511aSVenki Rajagopalan 	ibt_free_portinfo(pi, sz_pi);
138*b494511aSVenki Rajagopalan 
139*b494511aSVenki Rajagopalan 	progress |= EIB_HCAINIT_HCA_PORTS_QUERIED;
140*b494511aSVenki Rajagopalan 
141*b494511aSVenki Rajagopalan 	/*
142*b494511aSVenki Rajagopalan 	 * Allocate a protection domain for all our transactions
143*b494511aSVenki Rajagopalan 	 */
144*b494511aSVenki Rajagopalan 	ret = ibt_alloc_pd(ss->ei_hca_hdl, IBT_PD_NO_FLAGS, &ss->ei_pd_hdl);
145*b494511aSVenki Rajagopalan 	if (ret != IBT_SUCCESS) {
146*b494511aSVenki Rajagopalan 		EIB_DPRINTF_ERR(ss->ei_instance,
147*b494511aSVenki Rajagopalan 		    "ibt_alloc_pd(hca_hdl=0x%llx, "
148*b494511aSVenki Rajagopalan 		    "hca_guid=0x%llx) failed, ret=%d",
149*b494511aSVenki Rajagopalan 		    ss->ei_hca_hdl, ss->ei_props->ep_hca_guid, ret);
150*b494511aSVenki Rajagopalan 		goto ibt_hca_init_fail;
151*b494511aSVenki Rajagopalan 	}
152*b494511aSVenki Rajagopalan 	progress |= EIB_HCAINIT_PD_ALLOCD;
153*b494511aSVenki Rajagopalan 
154*b494511aSVenki Rajagopalan 	/*
155*b494511aSVenki Rajagopalan 	 * Finally, record the capabilities
156*b494511aSVenki Rajagopalan 	 */
157*b494511aSVenki Rajagopalan 	ss->ei_caps = kmem_zalloc(sizeof (eib_caps_t), KM_SLEEP);
158*b494511aSVenki Rajagopalan 	eib_ibt_record_capab(ss, ss->ei_hca_attrs, ss->ei_caps);
159*b494511aSVenki Rajagopalan 	eib_ibt_record_srate(ss);
160*b494511aSVenki Rajagopalan 
161*b494511aSVenki Rajagopalan 	progress |= EIB_HCAINIT_CAPAB_RECORDED;
162*b494511aSVenki Rajagopalan 
163*b494511aSVenki Rajagopalan 	return (EIB_E_SUCCESS);
164*b494511aSVenki Rajagopalan 
165*b494511aSVenki Rajagopalan ibt_hca_init_fail:
166*b494511aSVenki Rajagopalan 	eib_rb_ibt_hca_init(ss, progress);
167*b494511aSVenki Rajagopalan 	return (EIB_E_FAILURE);
168*b494511aSVenki Rajagopalan }
169*b494511aSVenki Rajagopalan 
170*b494511aSVenki Rajagopalan void
eib_ibt_link_mod(eib_t * ss)171*b494511aSVenki Rajagopalan eib_ibt_link_mod(eib_t *ss)
172*b494511aSVenki Rajagopalan {
173*b494511aSVenki Rajagopalan 	eib_node_state_t *ns = ss->ei_node_state;
174*b494511aSVenki Rajagopalan 	ibt_hca_portinfo_t *pi;
175*b494511aSVenki Rajagopalan 	ibt_status_t ret;
176*b494511aSVenki Rajagopalan 	uint8_t vn0_mac[ETHERADDRL];
177*b494511aSVenki Rajagopalan 	boolean_t all_zombies = B_FALSE;
178*b494511aSVenki Rajagopalan 	boolean_t all_need_rejoin = B_FALSE;
179*b494511aSVenki Rajagopalan 	uint_t num_pi;
180*b494511aSVenki Rajagopalan 	uint_t sz_pi;
181*b494511aSVenki Rajagopalan 	uint8_t itr;
182*b494511aSVenki Rajagopalan 
183*b494511aSVenki Rajagopalan 	if (ns->ns_link_state == LINK_STATE_UNKNOWN)
184*b494511aSVenki Rajagopalan 		return;
185*b494511aSVenki Rajagopalan 
186*b494511aSVenki Rajagopalan 	/*
187*b494511aSVenki Rajagopalan 	 * See if we can get the port attributes or we're as good as down.
188*b494511aSVenki Rajagopalan 	 */
189*b494511aSVenki Rajagopalan 	ret = ibt_query_hca_ports(ss->ei_hca_hdl, ss->ei_props->ep_port_num,
190*b494511aSVenki Rajagopalan 	    &pi, &num_pi, &sz_pi);
191*b494511aSVenki Rajagopalan 	if ((ret != IBT_SUCCESS) || (pi->p_linkstate != IBT_PORT_ACTIVE)) {
192*b494511aSVenki Rajagopalan 		ibt_free_portinfo(pi, sz_pi);
193*b494511aSVenki Rajagopalan 		eib_mac_link_down(ss, B_FALSE);
194*b494511aSVenki Rajagopalan 		return;
195*b494511aSVenki Rajagopalan 	}
196*b494511aSVenki Rajagopalan 
197*b494511aSVenki Rajagopalan 	/*
198*b494511aSVenki Rajagopalan 	 * If the SM re-initialized the port attributes, but did not preserve
199*b494511aSVenki Rajagopalan 	 * the old attributes, we need to check more.
200*b494511aSVenki Rajagopalan 	 */
201*b494511aSVenki Rajagopalan 	itr = pi->p_init_type_reply;
202*b494511aSVenki Rajagopalan 	if (EIB_PORT_ATTR_LOADED(itr) && EIB_PORT_ATTR_NOT_PRESERVED(itr)) {
203*b494511aSVenki Rajagopalan 		/*
204*b494511aSVenki Rajagopalan 		 * We're just coming back up; if we see that our base lid
205*b494511aSVenki Rajagopalan 		 * or sgid table has changed, we'll update these and try to
206*b494511aSVenki Rajagopalan 		 * restart all active vnics. If any of the vnic pkeys have
207*b494511aSVenki Rajagopalan 		 * changed, we'll reset the affected channels to the new pkey.
208*b494511aSVenki Rajagopalan 		 */
209*b494511aSVenki Rajagopalan 		if (bcmp(pi->p_sgid_tbl, &ss->ei_props->ep_sgid,
210*b494511aSVenki Rajagopalan 		    sizeof (ib_gid_t)) != 0) {
211*b494511aSVenki Rajagopalan 			EIB_DPRINTF_VERBOSE(ss->ei_instance,
212*b494511aSVenki Rajagopalan 			    "eib_ibt_link_mod: port sgid table changed "
213*b494511aSVenki Rajagopalan 			    "(old %llx.%llx != new %llx.%llx), "
214*b494511aSVenki Rajagopalan 			    "all vnics are zombies now.",
215*b494511aSVenki Rajagopalan 			    ss->ei_props->ep_sgid.gid_prefix,
216*b494511aSVenki Rajagopalan 			    ss->ei_props->ep_sgid.gid_guid,
217*b494511aSVenki Rajagopalan 			    pi->p_sgid_tbl[0].gid_prefix,
218*b494511aSVenki Rajagopalan 			    pi->p_sgid_tbl[0].gid_guid);
219*b494511aSVenki Rajagopalan 
220*b494511aSVenki Rajagopalan 			ss->ei_props->ep_sgid = pi->p_sgid_tbl[0];
221*b494511aSVenki Rajagopalan 			all_zombies = B_TRUE;
222*b494511aSVenki Rajagopalan 
223*b494511aSVenki Rajagopalan 		} else if (ss->ei_props->ep_blid != pi->p_base_lid) {
224*b494511aSVenki Rajagopalan 			EIB_DPRINTF_VERBOSE(ss->ei_instance,
225*b494511aSVenki Rajagopalan 			    "eib_ibt_link_mod: port base lid changed "
226*b494511aSVenki Rajagopalan 			    "(old 0x%x != new 0x%x), "
227*b494511aSVenki Rajagopalan 			    "all vnics are zombies now.",
228*b494511aSVenki Rajagopalan 			    ss->ei_props->ep_blid, pi->p_base_lid);
229*b494511aSVenki Rajagopalan 
230*b494511aSVenki Rajagopalan 			ss->ei_props->ep_blid = pi->p_base_lid;
231*b494511aSVenki Rajagopalan 			all_zombies = B_TRUE;
232*b494511aSVenki Rajagopalan 
233*b494511aSVenki Rajagopalan 		} else if (eib_ibt_has_any_pkey_changed(ss)) {
234*b494511aSVenki Rajagopalan 			EIB_DPRINTF_VERBOSE(ss->ei_instance,
235*b494511aSVenki Rajagopalan 			    "eib_ibt_link_mod: pkey has changed for vnic(s), "
236*b494511aSVenki Rajagopalan 			    "resetting all partitions");
237*b494511aSVenki Rajagopalan 
238*b494511aSVenki Rajagopalan 			eib_ibt_reset_partitions(ss);
239*b494511aSVenki Rajagopalan 		}
240*b494511aSVenki Rajagopalan 	}
241*b494511aSVenki Rajagopalan 
242*b494511aSVenki Rajagopalan 	if (pi) {
243*b494511aSVenki Rajagopalan 		ibt_free_portinfo(pi, sz_pi);
244*b494511aSVenki Rajagopalan 	}
245*b494511aSVenki Rajagopalan 
246*b494511aSVenki Rajagopalan 	/*
247*b494511aSVenki Rajagopalan 	 * If the SM hasn't preserved our presence in MCGs, we need to
248*b494511aSVenki Rajagopalan 	 * rejoin all of them.
249*b494511aSVenki Rajagopalan 	 */
250*b494511aSVenki Rajagopalan 	if (EIB_PORT_PRES_NOT_PRESERVED(itr)) {
251*b494511aSVenki Rajagopalan 		EIB_DPRINTF_VERBOSE(ss->ei_instance, "eib_ibt_link_mod: "
252*b494511aSVenki Rajagopalan 		    "hca_guid=0x%llx, port=0x%x presence not preserved in SM, "
253*b494511aSVenki Rajagopalan 		    "rejoining all mcgs", ss->ei_props->ep_hca_guid,
254*b494511aSVenki Rajagopalan 		    ss->ei_props->ep_port_num);
255*b494511aSVenki Rajagopalan 
256*b494511aSVenki Rajagopalan 		all_need_rejoin = B_TRUE;
257*b494511aSVenki Rajagopalan 	}
258*b494511aSVenki Rajagopalan 
259*b494511aSVenki Rajagopalan 	/*
260*b494511aSVenki Rajagopalan 	 * Before we do the actual work of restarting/rejoining, we need to
261*b494511aSVenki Rajagopalan 	 * see if the GW is reachable at this point of time.  If not, we
262*b494511aSVenki Rajagopalan 	 * still continue to keep our link "down."  Whenever the GW becomes
263*b494511aSVenki Rajagopalan 	 * reachable again, we'll restart/rejoin all the vnics that we've
264*b494511aSVenki Rajagopalan 	 * just marked.
265*b494511aSVenki Rajagopalan 	 */
266*b494511aSVenki Rajagopalan 	mutex_enter(&ss->ei_vnic_lock);
267*b494511aSVenki Rajagopalan 	if (all_zombies) {
268*b494511aSVenki Rajagopalan 		ss->ei_zombie_vnics = ss->ei_active_vnics;
269*b494511aSVenki Rajagopalan 	}
270*b494511aSVenki Rajagopalan 	if (all_need_rejoin) {
271*b494511aSVenki Rajagopalan 		ss->ei_rejoin_vnics = ss->ei_active_vnics;
272*b494511aSVenki Rajagopalan 	}
273*b494511aSVenki Rajagopalan 	if (ss->ei_gw_unreachable) {
274*b494511aSVenki Rajagopalan 		mutex_exit(&ss->ei_vnic_lock);
275*b494511aSVenki Rajagopalan 
276*b494511aSVenki Rajagopalan 		EIB_DPRINTF_WARN(ss->ei_instance, "eib_ibt_link_mod: "
277*b494511aSVenki Rajagopalan 		    "gateway (gw_port=0x%x) unreachable for "
278*b494511aSVenki Rajagopalan 		    "hca_guid=0x%llx, port=0x%x, link state down",
279*b494511aSVenki Rajagopalan 		    ss->ei_gw_props->pp_gw_portid, ss->ei_props->ep_hca_guid,
280*b494511aSVenki Rajagopalan 		    ss->ei_props->ep_port_num);
281*b494511aSVenki Rajagopalan 
282*b494511aSVenki Rajagopalan 		eib_mac_link_down(ss, B_FALSE);
283*b494511aSVenki Rajagopalan 		return;
284*b494511aSVenki Rajagopalan 	}
285*b494511aSVenki Rajagopalan 	mutex_exit(&ss->ei_vnic_lock);
286*b494511aSVenki Rajagopalan 
287*b494511aSVenki Rajagopalan 	/*
288*b494511aSVenki Rajagopalan 	 * Try to awaken the dead if possible
289*b494511aSVenki Rajagopalan 	 */
290*b494511aSVenki Rajagopalan 	bcopy(eib_zero_mac, vn0_mac, ETHERADDRL);
291*b494511aSVenki Rajagopalan 	if (all_zombies) {
292*b494511aSVenki Rajagopalan 		EIB_DPRINTF_VERBOSE(ss->ei_instance, "eib_ibt_link_mod: "
293*b494511aSVenki Rajagopalan 		    "hca_guid=0x%llx, hca_port=0x%x, gw_port=0x%x, "
294*b494511aSVenki Rajagopalan 		    "attempting to resurrect zombies",
295*b494511aSVenki Rajagopalan 		    ss->ei_props->ep_hca_guid, ss->ei_props->ep_port_num,
296*b494511aSVenki Rajagopalan 		    ss->ei_gw_props->pp_gw_portid);
297*b494511aSVenki Rajagopalan 
298*b494511aSVenki Rajagopalan 		eib_vnic_resurrect_zombies(ss, vn0_mac);
299*b494511aSVenki Rajagopalan 	}
300*b494511aSVenki Rajagopalan 
301*b494511aSVenki Rajagopalan 	/*
302*b494511aSVenki Rajagopalan 	 * Re-join the mcgs if we need to
303*b494511aSVenki Rajagopalan 	 */
304*b494511aSVenki Rajagopalan 	if (all_need_rejoin) {
305*b494511aSVenki Rajagopalan 		EIB_DPRINTF_VERBOSE(ss->ei_instance, "eib_ibt_link_mod: "
306*b494511aSVenki Rajagopalan 		    "hca_guid=0x%llx, hca_port=0x%x, gw_port=0x%x, "
307*b494511aSVenki Rajagopalan 		    "attempting to rejoin mcgs",
308*b494511aSVenki Rajagopalan 		    ss->ei_props->ep_hca_guid, ss->ei_props->ep_port_num,
309*b494511aSVenki Rajagopalan 		    ss->ei_gw_props->pp_gw_portid);
310*b494511aSVenki Rajagopalan 
311*b494511aSVenki Rajagopalan 		eib_vnic_rejoin_mcgs(ss);
312*b494511aSVenki Rajagopalan 	}
313*b494511aSVenki Rajagopalan 
314*b494511aSVenki Rajagopalan 	/*
315*b494511aSVenki Rajagopalan 	 * If we've restarted the zombies because the gateway went down and
316*b494511aSVenki Rajagopalan 	 * came back, it is possible our unicast mac address changed from
317*b494511aSVenki Rajagopalan 	 * what it was earlier. If so, we need to update our unicast address
318*b494511aSVenki Rajagopalan 	 * with the mac layer before marking the link up.
319*b494511aSVenki Rajagopalan 	 */
320*b494511aSVenki Rajagopalan 	if (bcmp(vn0_mac, eib_zero_mac, ETHERADDRL) != 0)
321*b494511aSVenki Rajagopalan 		mac_unicst_update(ss->ei_mac_hdl, vn0_mac);
322*b494511aSVenki Rajagopalan 
323*b494511aSVenki Rajagopalan 	/*
324*b494511aSVenki Rajagopalan 	 * Notify the link state up if required
325*b494511aSVenki Rajagopalan 	 */
326*b494511aSVenki Rajagopalan 	eib_mac_link_up(ss, B_FALSE);
327*b494511aSVenki Rajagopalan }
328*b494511aSVenki Rajagopalan 
329*b494511aSVenki Rajagopalan int
eib_ibt_modify_chan_pkey(eib_t * ss,eib_chan_t * chan,ib_pkey_t pkey)330*b494511aSVenki Rajagopalan eib_ibt_modify_chan_pkey(eib_t *ss, eib_chan_t *chan, ib_pkey_t pkey)
331*b494511aSVenki Rajagopalan {
332*b494511aSVenki Rajagopalan 	/*
333*b494511aSVenki Rajagopalan 	 * Make sure the channel pkey and index are set to what we need
334*b494511aSVenki Rajagopalan 	 */
335*b494511aSVenki Rajagopalan 	return (eib_ibt_chan_pkey(ss, chan, pkey, B_TRUE, NULL));
336*b494511aSVenki Rajagopalan }
337*b494511aSVenki Rajagopalan 
338*b494511aSVenki Rajagopalan eib_avect_t *
eib_ibt_hold_avect(eib_t * ss,ib_lid_t dlid,uint8_t sl)339*b494511aSVenki Rajagopalan eib_ibt_hold_avect(eib_t *ss, ib_lid_t dlid, uint8_t sl)
340*b494511aSVenki Rajagopalan {
341*b494511aSVenki Rajagopalan 	uint_t ndx = dlid % EIB_AV_NBUCKETS;	/* simple hashing */
342*b494511aSVenki Rajagopalan 	eib_avect_t *av;
343*b494511aSVenki Rajagopalan 	eib_avect_t *prev;
344*b494511aSVenki Rajagopalan 	int ret;
345*b494511aSVenki Rajagopalan 
346*b494511aSVenki Rajagopalan 	mutex_enter(&ss->ei_av_lock);
347*b494511aSVenki Rajagopalan 
348*b494511aSVenki Rajagopalan 	/*
349*b494511aSVenki Rajagopalan 	 * See if we have the address vector
350*b494511aSVenki Rajagopalan 	 */
351*b494511aSVenki Rajagopalan 	prev = NULL;
352*b494511aSVenki Rajagopalan 	for (av = ss->ei_av[ndx]; av; av = av->av_next) {
353*b494511aSVenki Rajagopalan 		prev = av;
354*b494511aSVenki Rajagopalan 		if ((av->av_vect).av_dlid == dlid)
355*b494511aSVenki Rajagopalan 			break;
356*b494511aSVenki Rajagopalan 	}
357*b494511aSVenki Rajagopalan 
358*b494511aSVenki Rajagopalan 	/*
359*b494511aSVenki Rajagopalan 	 * If we don't have it, create a new one and chain it to
360*b494511aSVenki Rajagopalan 	 * the same bucket
361*b494511aSVenki Rajagopalan 	 */
362*b494511aSVenki Rajagopalan 	if (av == NULL) {
363*b494511aSVenki Rajagopalan 		av = kmem_zalloc(sizeof (eib_avect_t), KM_NOSLEEP);
364*b494511aSVenki Rajagopalan 		if (av == NULL) {
365*b494511aSVenki Rajagopalan 			mutex_exit(&ss->ei_av_lock);
366*b494511aSVenki Rajagopalan 			EIB_DPRINTF_WARN(ss->ei_instance, "eib_ibt_hold_avect: "
367*b494511aSVenki Rajagopalan 			    "no memory, could not allocate address vector");
368*b494511aSVenki Rajagopalan 			return (NULL);
369*b494511aSVenki Rajagopalan 		}
370*b494511aSVenki Rajagopalan 
371*b494511aSVenki Rajagopalan 		ret = EIB_E_FAILURE;
372*b494511aSVenki Rajagopalan 		if (!eib_wa_no_av_discover)
373*b494511aSVenki Rajagopalan 			ret = eib_ibt_fill_avect(ss, av, dlid);
374*b494511aSVenki Rajagopalan 
375*b494511aSVenki Rajagopalan 		if (ret != EIB_E_SUCCESS) {
376*b494511aSVenki Rajagopalan 			(av->av_vect).av_srate = IBT_SRATE_10;
377*b494511aSVenki Rajagopalan 			(av->av_vect).av_srvl = sl;
378*b494511aSVenki Rajagopalan 			(av->av_vect).av_port_num = ss->ei_props->ep_port_num;
379*b494511aSVenki Rajagopalan 			(av->av_vect).av_send_grh = B_FALSE;
380*b494511aSVenki Rajagopalan 			(av->av_vect).av_dlid = dlid;
381*b494511aSVenki Rajagopalan 			(av->av_vect).av_src_path = 0;	/* we use base lid */
382*b494511aSVenki Rajagopalan 		}
383*b494511aSVenki Rajagopalan 
384*b494511aSVenki Rajagopalan 		if (prev)
385*b494511aSVenki Rajagopalan 			prev->av_next = av;
386*b494511aSVenki Rajagopalan 		else
387*b494511aSVenki Rajagopalan 			ss->ei_av[ndx] = av;
388*b494511aSVenki Rajagopalan 	}
389*b494511aSVenki Rajagopalan 
390*b494511aSVenki Rajagopalan 	/*
391*b494511aSVenki Rajagopalan 	 * Increment the address vector reference count before returning
392*b494511aSVenki Rajagopalan 	 */
393*b494511aSVenki Rajagopalan 	(av->av_ref)++;
394*b494511aSVenki Rajagopalan 
395*b494511aSVenki Rajagopalan 	mutex_exit(&ss->ei_av_lock);
396*b494511aSVenki Rajagopalan 
397*b494511aSVenki Rajagopalan 	return (av);
398*b494511aSVenki Rajagopalan }
399*b494511aSVenki Rajagopalan 
400*b494511aSVenki Rajagopalan static int
eib_ibt_fill_avect(eib_t * ss,eib_avect_t * av,ib_lid_t dlid)401*b494511aSVenki Rajagopalan eib_ibt_fill_avect(eib_t *ss, eib_avect_t *av, ib_lid_t dlid)
402*b494511aSVenki Rajagopalan {
403*b494511aSVenki Rajagopalan 	ibt_node_info_t ni;
404*b494511aSVenki Rajagopalan 	ibt_path_attr_t attr;
405*b494511aSVenki Rajagopalan 	ibt_path_info_t path;
406*b494511aSVenki Rajagopalan 	ibt_status_t ret;
407*b494511aSVenki Rajagopalan 	ib_gid_t dgid;
408*b494511aSVenki Rajagopalan 
409*b494511aSVenki Rajagopalan 	if ((ret = ibt_lid_to_node_info(dlid, &ni)) != IBT_SUCCESS) {
410*b494511aSVenki Rajagopalan 		EIB_DPRINTF_WARN(ss->ei_instance, "eib_ibt_fill_avect: "
411*b494511aSVenki Rajagopalan 		    "ibt_lid_to_node_info(dlid=0x%x) failed, ret=%d",
412*b494511aSVenki Rajagopalan 		    dlid, ret);
413*b494511aSVenki Rajagopalan 		return (EIB_E_FAILURE);
414*b494511aSVenki Rajagopalan 	}
415*b494511aSVenki Rajagopalan 	dgid.gid_prefix = ss->ei_gw_props->pp_gw_sn_prefix;
416*b494511aSVenki Rajagopalan 	dgid.gid_guid = ni.n_port_guid;
417*b494511aSVenki Rajagopalan 
418*b494511aSVenki Rajagopalan 	/*
419*b494511aSVenki Rajagopalan 	 * Get the reversible path information for this destination
420*b494511aSVenki Rajagopalan 	 */
421*b494511aSVenki Rajagopalan 	bzero(&attr, sizeof (ibt_path_info_t));
422*b494511aSVenki Rajagopalan 	attr.pa_sgid = ss->ei_props->ep_sgid;
423*b494511aSVenki Rajagopalan 	attr.pa_dgids = &dgid;
424*b494511aSVenki Rajagopalan 	attr.pa_num_dgids = 1;
425*b494511aSVenki Rajagopalan 
426*b494511aSVenki Rajagopalan 	bzero(&path, sizeof (ibt_path_info_t));
427*b494511aSVenki Rajagopalan 	ret = ibt_get_paths(ss->ei_ibt_hdl, IBT_PATH_NO_FLAGS,
428*b494511aSVenki Rajagopalan 	    &attr, 1, &path, NULL);
429*b494511aSVenki Rajagopalan 	if ((ret != IBT_SUCCESS) || (path.pi_hca_guid == 0)) {
430*b494511aSVenki Rajagopalan 		EIB_DPRINTF_WARN(ss->ei_instance, "eib_ibt_fill_avect: "
431*b494511aSVenki Rajagopalan 		    "ibt_get_paths(dgid=%llx.%llx) failed, ret=%d",
432*b494511aSVenki Rajagopalan 		    dgid.gid_prefix, dgid.gid_guid);
433*b494511aSVenki Rajagopalan 		return (EIB_E_FAILURE);
434*b494511aSVenki Rajagopalan 	}
435*b494511aSVenki Rajagopalan 
436*b494511aSVenki Rajagopalan 	/*
437*b494511aSVenki Rajagopalan 	 * Fill in the address vector
438*b494511aSVenki Rajagopalan 	 */
439*b494511aSVenki Rajagopalan 	bcopy(&path.pi_prim_cep_path.cep_adds_vect, &av->av_vect,
440*b494511aSVenki Rajagopalan 	    sizeof (ibt_adds_vect_t));
441*b494511aSVenki Rajagopalan 
442*b494511aSVenki Rajagopalan 	return (EIB_E_SUCCESS);
443*b494511aSVenki Rajagopalan }
444*b494511aSVenki Rajagopalan 
445*b494511aSVenki Rajagopalan void
eib_ibt_release_avect(eib_t * ss,eib_avect_t * av)446*b494511aSVenki Rajagopalan eib_ibt_release_avect(eib_t *ss, eib_avect_t *av)
447*b494511aSVenki Rajagopalan {
448*b494511aSVenki Rajagopalan 	mutex_enter(&ss->ei_av_lock);
449*b494511aSVenki Rajagopalan 
450*b494511aSVenki Rajagopalan 	ASSERT(av->av_ref > 0);
451*b494511aSVenki Rajagopalan 	(av->av_ref)--;
452*b494511aSVenki Rajagopalan 
453*b494511aSVenki Rajagopalan 	mutex_exit(&ss->ei_av_lock);
454*b494511aSVenki Rajagopalan }
455*b494511aSVenki Rajagopalan 
456*b494511aSVenki Rajagopalan void
eib_ibt_free_avects(eib_t * ss)457*b494511aSVenki Rajagopalan eib_ibt_free_avects(eib_t *ss)
458*b494511aSVenki Rajagopalan {
459*b494511aSVenki Rajagopalan 	eib_avect_t *av;
460*b494511aSVenki Rajagopalan 	eib_avect_t *av_next;
461*b494511aSVenki Rajagopalan 	int ndx;
462*b494511aSVenki Rajagopalan 
463*b494511aSVenki Rajagopalan 	mutex_enter(&ss->ei_av_lock);
464*b494511aSVenki Rajagopalan 	for (ndx = 0; ndx < EIB_AV_NBUCKETS; ndx++) {
465*b494511aSVenki Rajagopalan 		for (av = ss->ei_av[ndx]; av; av = av_next) {
466*b494511aSVenki Rajagopalan 			av_next = av->av_next;
467*b494511aSVenki Rajagopalan 
468*b494511aSVenki Rajagopalan 			ASSERT(av->av_ref == 0);
469*b494511aSVenki Rajagopalan 			kmem_free(av, sizeof (eib_avect_t));
470*b494511aSVenki Rajagopalan 		}
471*b494511aSVenki Rajagopalan 		ss->ei_av[ndx] = NULL;
472*b494511aSVenki Rajagopalan 	}
473*b494511aSVenki Rajagopalan 	mutex_exit(&ss->ei_av_lock);
474*b494511aSVenki Rajagopalan }
475*b494511aSVenki Rajagopalan 
476*b494511aSVenki Rajagopalan /*ARGSUSED*/
477*b494511aSVenki Rajagopalan void
eib_ibt_async_handler(void * clnt_private,ibt_hca_hdl_t hca_hdl,ibt_async_code_t code,ibt_async_event_t * event)478*b494511aSVenki Rajagopalan eib_ibt_async_handler(void *clnt_private, ibt_hca_hdl_t hca_hdl,
479*b494511aSVenki Rajagopalan     ibt_async_code_t code, ibt_async_event_t *event)
480*b494511aSVenki Rajagopalan {
481*b494511aSVenki Rajagopalan 	eib_t *ss = (eib_t *)clnt_private;
482*b494511aSVenki Rajagopalan 	eib_event_t *evi;
483*b494511aSVenki Rajagopalan 	uint_t ev_code;
484*b494511aSVenki Rajagopalan 
485*b494511aSVenki Rajagopalan 	ev_code = EIB_EV_NONE;
486*b494511aSVenki Rajagopalan 
487*b494511aSVenki Rajagopalan 	switch (code) {
488*b494511aSVenki Rajagopalan 	case IBT_EVENT_SQD:
489*b494511aSVenki Rajagopalan 		EIB_DPRINTF_VERBOSE(ss->ei_instance,
490*b494511aSVenki Rajagopalan 		    "eib_ibt_async_handler: got IBT_EVENT_SQD");
491*b494511aSVenki Rajagopalan 		eib_ibt_wakeup_sqd_waiters(ss, event->ev_chan_hdl);
492*b494511aSVenki Rajagopalan 		break;
493*b494511aSVenki Rajagopalan 
494*b494511aSVenki Rajagopalan 	case IBT_EVENT_PORT_UP:
495*b494511aSVenki Rajagopalan 		if (event->ev_port == ss->ei_props->ep_port_num) {
496*b494511aSVenki Rajagopalan 			EIB_DPRINTF_VERBOSE(ss->ei_instance,
497*b494511aSVenki Rajagopalan 			    "eib_ibt_async_handler: got IBT_EVENT_PORT_UP");
498*b494511aSVenki Rajagopalan 			ev_code = EIB_EV_PORT_UP;
499*b494511aSVenki Rajagopalan 		}
500*b494511aSVenki Rajagopalan 		break;
501*b494511aSVenki Rajagopalan 
502*b494511aSVenki Rajagopalan 	case IBT_ERROR_PORT_DOWN:
503*b494511aSVenki Rajagopalan 		if (event->ev_port == ss->ei_props->ep_port_num) {
504*b494511aSVenki Rajagopalan 			EIB_DPRINTF_VERBOSE(ss->ei_instance,
505*b494511aSVenki Rajagopalan 			    "eib_ibt_async_handler: got IBT_ERROR_PORT_DOWN");
506*b494511aSVenki Rajagopalan 			ev_code = EIB_EV_PORT_DOWN;
507*b494511aSVenki Rajagopalan 		}
508*b494511aSVenki Rajagopalan 		break;
509*b494511aSVenki Rajagopalan 
510*b494511aSVenki Rajagopalan 	case IBT_CLNT_REREG_EVENT:
511*b494511aSVenki Rajagopalan 		if (event->ev_port == ss->ei_props->ep_port_num) {
512*b494511aSVenki Rajagopalan 			EIB_DPRINTF_VERBOSE(ss->ei_instance,
513*b494511aSVenki Rajagopalan 			    "eib_ibt_async_handler: got IBT_CLNT_REREG_EVENT");
514*b494511aSVenki Rajagopalan 			ev_code = EIB_EV_CLNT_REREG;
515*b494511aSVenki Rajagopalan 		}
516*b494511aSVenki Rajagopalan 		break;
517*b494511aSVenki Rajagopalan 
518*b494511aSVenki Rajagopalan 	case IBT_PORT_CHANGE_EVENT:
519*b494511aSVenki Rajagopalan 		if ((event->ev_port == ss->ei_props->ep_port_num) &&
520*b494511aSVenki Rajagopalan 		    (event->ev_port_flags & IBT_PORT_CHANGE_PKEY)) {
521*b494511aSVenki Rajagopalan 			EIB_DPRINTF_VERBOSE(ss->ei_instance,
522*b494511aSVenki Rajagopalan 			    "eib_ibt_async_handler: "
523*b494511aSVenki Rajagopalan 			    "got IBT_PORT_CHANGE_EVENT(PKEY_CHANGE)");
524*b494511aSVenki Rajagopalan 			ev_code = EIB_EV_PKEY_CHANGE;
525*b494511aSVenki Rajagopalan 		} else if ((event->ev_port == ss->ei_props->ep_port_num) &&
526*b494511aSVenki Rajagopalan 		    (event->ev_port_flags & IBT_PORT_CHANGE_SGID)) {
527*b494511aSVenki Rajagopalan 			EIB_DPRINTF_VERBOSE(ss->ei_instance,
528*b494511aSVenki Rajagopalan 			    "eib_ibt_async_handler: "
529*b494511aSVenki Rajagopalan 			    "got IBT_PORT_CHANGE_EVENT(SGID_CHANGE)");
530*b494511aSVenki Rajagopalan 			ev_code = EIB_EV_SGID_CHANGE;
531*b494511aSVenki Rajagopalan 		}
532*b494511aSVenki Rajagopalan 		break;
533*b494511aSVenki Rajagopalan 
534*b494511aSVenki Rajagopalan 	case IBT_HCA_ATTACH_EVENT:
535*b494511aSVenki Rajagopalan 		/*
536*b494511aSVenki Rajagopalan 		 * For HCA attach, after a new HCA is plugged in and
537*b494511aSVenki Rajagopalan 		 * configured using cfgadm, an explicit plumb will need
538*b494511aSVenki Rajagopalan 		 * to be run, so we don't need to do anything here.
539*b494511aSVenki Rajagopalan 		 */
540*b494511aSVenki Rajagopalan 		EIB_DPRINTF_VERBOSE(ss->ei_instance, "eib_ibt_async_handler: "
541*b494511aSVenki Rajagopalan 		    "got IBT_HCA_ATTACH_EVENT");
542*b494511aSVenki Rajagopalan 		break;
543*b494511aSVenki Rajagopalan 
544*b494511aSVenki Rajagopalan 	case IBT_HCA_DETACH_EVENT:
545*b494511aSVenki Rajagopalan 		/*
546*b494511aSVenki Rajagopalan 		 * Before an HCA unplug, cfgadm is expected to trigger
547*b494511aSVenki Rajagopalan 		 * any rcm scripts to unplumb the EoIB instances on the
548*b494511aSVenki Rajagopalan 		 * card. If so, we should not be holding any hca resource,
549*b494511aSVenki Rajagopalan 		 * since we don't do ibt_open_hca() until plumb time. However,
550*b494511aSVenki Rajagopalan 		 * if an earlier unplumb hadn't cleaned up the hca resources
551*b494511aSVenki Rajagopalan 		 * properly because the network layer hadn't returned the
552*b494511aSVenki Rajagopalan 		 * buffers at that time, we could be holding hca resources.
553*b494511aSVenki Rajagopalan 		 * We'll try to release them here, and protect the code from
554*b494511aSVenki Rajagopalan 		 * racing with some other plumb/unplumb operation.
555*b494511aSVenki Rajagopalan 		 */
556*b494511aSVenki Rajagopalan 		EIB_DPRINTF_VERBOSE(ss->ei_instance, "eib_ibt_async_handler: "
557*b494511aSVenki Rajagopalan 		    "got IBT_HCA_DETACH_EVENT");
558*b494511aSVenki Rajagopalan 
559*b494511aSVenki Rajagopalan 		eib_mac_set_nic_state(ss, EIB_NIC_STOPPING);
560*b494511aSVenki Rajagopalan 		eib_rb_rsrc_setup_bufs(ss, B_FALSE);
561*b494511aSVenki Rajagopalan 		if (ss->ei_tx || ss->ei_rx || ss->ei_lso) {
562*b494511aSVenki Rajagopalan 			EIB_DPRINTF_WARN(ss->ei_instance,
563*b494511aSVenki Rajagopalan 			    "eib_events_handler: nw layer still holding "
564*b494511aSVenki Rajagopalan 			    "hca resources, could not detach HCA");
565*b494511aSVenki Rajagopalan 		} else if (ss->ei_hca_hdl) {
566*b494511aSVenki Rajagopalan 			eib_rb_ibt_hca_init(ss, ~0);
567*b494511aSVenki Rajagopalan 		}
568*b494511aSVenki Rajagopalan 		eib_mac_clr_nic_state(ss, EIB_NIC_STOPPING);
569*b494511aSVenki Rajagopalan 
570*b494511aSVenki Rajagopalan 		break;
571*b494511aSVenki Rajagopalan 	}
572*b494511aSVenki Rajagopalan 
573*b494511aSVenki Rajagopalan 	if (ev_code != EIB_EV_NONE) {
574*b494511aSVenki Rajagopalan 		evi = kmem_zalloc(sizeof (eib_event_t), KM_NOSLEEP);
575*b494511aSVenki Rajagopalan 		if (evi == NULL) {
576*b494511aSVenki Rajagopalan 			EIB_DPRINTF_WARN(ss->ei_instance,
577*b494511aSVenki Rajagopalan 			    "eib_ibt_async_handler: "
578*b494511aSVenki Rajagopalan 			    "no memory, could not handle event 0x%lx", ev_code);
579*b494511aSVenki Rajagopalan 		} else {
580*b494511aSVenki Rajagopalan 			evi->ev_code = ev_code;
581*b494511aSVenki Rajagopalan 			evi->ev_arg = NULL;
582*b494511aSVenki Rajagopalan 			eib_svc_enqueue_event(ss, evi);
583*b494511aSVenki Rajagopalan 		}
584*b494511aSVenki Rajagopalan 	}
585*b494511aSVenki Rajagopalan }
586*b494511aSVenki Rajagopalan 
587*b494511aSVenki Rajagopalan /*ARGSUSED*/
588*b494511aSVenki Rajagopalan void
eib_ibt_record_capab(eib_t * ss,ibt_hca_attr_t * hca_attrs,eib_caps_t * caps)589*b494511aSVenki Rajagopalan eib_ibt_record_capab(eib_t *ss, ibt_hca_attr_t *hca_attrs, eib_caps_t *caps)
590*b494511aSVenki Rajagopalan {
591*b494511aSVenki Rajagopalan 	uint_t max_swqe = EIB_DATA_MAX_SWQE;
592*b494511aSVenki Rajagopalan 	uint_t max_rwqe = EIB_DATA_MAX_RWQE;
593*b494511aSVenki Rajagopalan 
594*b494511aSVenki Rajagopalan 	/*
595*b494511aSVenki Rajagopalan 	 * Checksum
596*b494511aSVenki Rajagopalan 	 */
597*b494511aSVenki Rajagopalan 	caps->cp_cksum_flags = 0;
598*b494511aSVenki Rajagopalan 	if ((!eib_wa_no_cksum_offload) &&
599*b494511aSVenki Rajagopalan 	    (hca_attrs->hca_flags & IBT_HCA_CKSUM_FULL)) {
600*b494511aSVenki Rajagopalan 		caps->cp_cksum_flags =
601*b494511aSVenki Rajagopalan 		    HCK_FULLCKSUM | HCKSUM_INET_FULL_V4;
602*b494511aSVenki Rajagopalan 		    /* HCKSUM_INET_FULL_V4 | HCKSUM_IPHDRCKSUM; */
603*b494511aSVenki Rajagopalan 	}
604*b494511aSVenki Rajagopalan 
605*b494511aSVenki Rajagopalan 	/*
606*b494511aSVenki Rajagopalan 	 * Reserved L-Key
607*b494511aSVenki Rajagopalan 	 */
608*b494511aSVenki Rajagopalan 	if (hca_attrs->hca_flags2 & IBT_HCA2_RES_LKEY) {
609*b494511aSVenki Rajagopalan 		caps->cp_resv_lkey_capab = 1;
610*b494511aSVenki Rajagopalan 		caps->cp_resv_lkey = hca_attrs->hca_reserved_lkey;
611*b494511aSVenki Rajagopalan 	}
612*b494511aSVenki Rajagopalan 
613*b494511aSVenki Rajagopalan 	/*
614*b494511aSVenki Rajagopalan 	 * LSO
615*b494511aSVenki Rajagopalan 	 */
616*b494511aSVenki Rajagopalan 	caps->cp_lso_maxlen = 0;
617*b494511aSVenki Rajagopalan 	if (!eib_wa_no_lso) {
618*b494511aSVenki Rajagopalan 		if (hca_attrs->hca_max_lso_size > EIB_LSO_MAXLEN) {
619*b494511aSVenki Rajagopalan 			caps->cp_lso_maxlen = EIB_LSO_MAXLEN;
620*b494511aSVenki Rajagopalan 		} else {
621*b494511aSVenki Rajagopalan 			caps->cp_lso_maxlen = hca_attrs->hca_max_lso_size;
622*b494511aSVenki Rajagopalan 		}
623*b494511aSVenki Rajagopalan 	}
624*b494511aSVenki Rajagopalan 
625*b494511aSVenki Rajagopalan 	/*
626*b494511aSVenki Rajagopalan 	 * SGL
627*b494511aSVenki Rajagopalan 	 *
628*b494511aSVenki Rajagopalan 	 * Translating virtual address regions into physical regions
629*b494511aSVenki Rajagopalan 	 * for using the Reserved LKey feature results in a wr sgl that
630*b494511aSVenki Rajagopalan 	 * is a little longer. Since failing ibt_map_mem_iov() is costly,
631*b494511aSVenki Rajagopalan 	 * we'll record a high-water mark (65%) when we should stop
632*b494511aSVenki Rajagopalan 	 * trying to use Reserved LKey
633*b494511aSVenki Rajagopalan 	 */
634*b494511aSVenki Rajagopalan 	if (hca_attrs->hca_flags & IBT_HCA_WQE_SIZE_INFO) {
635*b494511aSVenki Rajagopalan 		caps->cp_max_sgl = hca_attrs->hca_ud_send_sgl_sz;
636*b494511aSVenki Rajagopalan 	} else {
637*b494511aSVenki Rajagopalan 		caps->cp_max_sgl = hca_attrs->hca_max_sgl;
638*b494511aSVenki Rajagopalan 	}
639*b494511aSVenki Rajagopalan 	if (caps->cp_max_sgl > EIB_MAX_SGL) {
640*b494511aSVenki Rajagopalan 		caps->cp_max_sgl = EIB_MAX_SGL;
641*b494511aSVenki Rajagopalan 	}
642*b494511aSVenki Rajagopalan 	caps->cp_hiwm_sgl = (caps->cp_max_sgl * 65) / 100;
643*b494511aSVenki Rajagopalan 
644*b494511aSVenki Rajagopalan 	/*
645*b494511aSVenki Rajagopalan 	 * SWQE/RWQE: meet max chan size and max cq size limits (leave room
646*b494511aSVenki Rajagopalan 	 * to avoid cq overflow event)
647*b494511aSVenki Rajagopalan 	 */
648*b494511aSVenki Rajagopalan 	if (max_swqe > hca_attrs->hca_max_chan_sz)
649*b494511aSVenki Rajagopalan 		max_swqe = hca_attrs->hca_max_chan_sz;
650*b494511aSVenki Rajagopalan 	if (max_swqe > (hca_attrs->hca_max_cq_sz - 1))
651*b494511aSVenki Rajagopalan 		max_swqe = hca_attrs->hca_max_cq_sz - 1;
652*b494511aSVenki Rajagopalan 	caps->cp_max_swqe = max_swqe;
653*b494511aSVenki Rajagopalan 
654*b494511aSVenki Rajagopalan 	if (max_rwqe > hca_attrs->hca_max_chan_sz)
655*b494511aSVenki Rajagopalan 		max_rwqe = hca_attrs->hca_max_chan_sz;
656*b494511aSVenki Rajagopalan 	if (max_rwqe > (hca_attrs->hca_max_cq_sz - 1))
657*b494511aSVenki Rajagopalan 		max_rwqe = hca_attrs->hca_max_cq_sz - 1;
658*b494511aSVenki Rajagopalan 	caps->cp_max_rwqe = max_rwqe;
659*b494511aSVenki Rajagopalan }
660*b494511aSVenki Rajagopalan 
661*b494511aSVenki Rajagopalan void
eib_rb_ibt_hca_init(eib_t * ss,uint_t progress)662*b494511aSVenki Rajagopalan eib_rb_ibt_hca_init(eib_t *ss, uint_t progress)
663*b494511aSVenki Rajagopalan {
664*b494511aSVenki Rajagopalan 	ibt_status_t ret;
665*b494511aSVenki Rajagopalan 
666*b494511aSVenki Rajagopalan 	if (progress & EIB_HCAINIT_CAPAB_RECORDED) {
667*b494511aSVenki Rajagopalan 		if (ss->ei_caps) {
668*b494511aSVenki Rajagopalan 			kmem_free(ss->ei_caps, sizeof (eib_caps_t));
669*b494511aSVenki Rajagopalan 			ss->ei_caps = NULL;
670*b494511aSVenki Rajagopalan 		}
671*b494511aSVenki Rajagopalan 	}
672*b494511aSVenki Rajagopalan 
673*b494511aSVenki Rajagopalan 	if (progress & EIB_HCAINIT_PD_ALLOCD) {
674*b494511aSVenki Rajagopalan 		if (ss->ei_pd_hdl) {
675*b494511aSVenki Rajagopalan 			ret = ibt_free_pd(ss->ei_hca_hdl, ss->ei_pd_hdl);
676*b494511aSVenki Rajagopalan 			if (ret != IBT_SUCCESS) {
677*b494511aSVenki Rajagopalan 				EIB_DPRINTF_WARN(ss->ei_instance,
678*b494511aSVenki Rajagopalan 				    "eib_rb_ibt_hca_init: "
679*b494511aSVenki Rajagopalan 				    "ibt_free_pd(hca_hdl=0x%lx, pd_hdl=0x%lx) "
680*b494511aSVenki Rajagopalan 				    "failed, ret=%d", ss->ei_hca_hdl,
681*b494511aSVenki Rajagopalan 				    ss->ei_pd_hdl, ret);
682*b494511aSVenki Rajagopalan 			}
683*b494511aSVenki Rajagopalan 			ss->ei_pd_hdl = NULL;
684*b494511aSVenki Rajagopalan 		}
685*b494511aSVenki Rajagopalan 	}
686*b494511aSVenki Rajagopalan 
687*b494511aSVenki Rajagopalan 	if (progress & EIB_HCAINIT_HCA_PORTS_QUERIED) {
688*b494511aSVenki Rajagopalan 		ss->ei_props->ep_mtu = 0;
689*b494511aSVenki Rajagopalan 		bzero(&ss->ei_props->ep_sgid, sizeof (ib_gid_t));
690*b494511aSVenki Rajagopalan 	}
691*b494511aSVenki Rajagopalan 
692*b494511aSVenki Rajagopalan 	if (progress & EIB_HCAINIT_ATTRS_ALLOCD) {
693*b494511aSVenki Rajagopalan 		kmem_free(ss->ei_hca_attrs, sizeof (ibt_hca_attr_t));
694*b494511aSVenki Rajagopalan 		ss->ei_hca_attrs = NULL;
695*b494511aSVenki Rajagopalan 	}
696*b494511aSVenki Rajagopalan 
697*b494511aSVenki Rajagopalan 	if (progress & EIB_HCAINIT_HCA_OPENED) {
698*b494511aSVenki Rajagopalan 		ret = ibt_close_hca(ss->ei_hca_hdl);
699*b494511aSVenki Rajagopalan 		if (ret != IBT_SUCCESS) {
700*b494511aSVenki Rajagopalan 			EIB_DPRINTF_WARN(ss->ei_instance,
701*b494511aSVenki Rajagopalan 			    "ibt_close_hca(hca_hdl=0x%lx) failed, "
702*b494511aSVenki Rajagopalan 			    "ret=%d", ss->ei_hca_hdl, ret);
703*b494511aSVenki Rajagopalan 		}
704*b494511aSVenki Rajagopalan 		ss->ei_hca_hdl = NULL;
705*b494511aSVenki Rajagopalan 	}
706*b494511aSVenki Rajagopalan }
707*b494511aSVenki Rajagopalan 
708*b494511aSVenki Rajagopalan static void
eib_ibt_reset_partitions(eib_t * ss)709*b494511aSVenki Rajagopalan eib_ibt_reset_partitions(eib_t *ss)
710*b494511aSVenki Rajagopalan {
711*b494511aSVenki Rajagopalan 	eib_vnic_t *vnic;
712*b494511aSVenki Rajagopalan 	eib_chan_t *chan = NULL;
713*b494511aSVenki Rajagopalan 	uint64_t av;
714*b494511aSVenki Rajagopalan 	int inst = 0;
715*b494511aSVenki Rajagopalan 
716*b494511aSVenki Rajagopalan 	/*
717*b494511aSVenki Rajagopalan 	 * We already have the vhub pkey recorded in our eib_chan_t.
718*b494511aSVenki Rajagopalan 	 * We only need to make sure our pkey index still matches it.
719*b494511aSVenki Rajagopalan 	 * If not, modify the channel appropriately and update our
720*b494511aSVenki Rajagopalan 	 * records.
721*b494511aSVenki Rajagopalan 	 */
722*b494511aSVenki Rajagopalan 	if ((chan = ss->ei_admin_chan) != NULL)
723*b494511aSVenki Rajagopalan 		(void) eib_ibt_modify_chan_pkey(ss, chan, chan->ch_pkey);
724*b494511aSVenki Rajagopalan 
725*b494511aSVenki Rajagopalan 	mutex_enter(&ss->ei_vnic_lock);
726*b494511aSVenki Rajagopalan 	av = ss->ei_active_vnics;
727*b494511aSVenki Rajagopalan 	while ((inst = EIB_FIND_LSB_SET(av)) != -1) {
728*b494511aSVenki Rajagopalan 		if ((vnic = ss->ei_vnic[inst]) != NULL) {
729*b494511aSVenki Rajagopalan 			if ((chan = vnic->vn_ctl_chan) != NULL) {
730*b494511aSVenki Rajagopalan 				(void) eib_ibt_modify_chan_pkey(ss, chan,
731*b494511aSVenki Rajagopalan 				    chan->ch_pkey);
732*b494511aSVenki Rajagopalan 			}
733*b494511aSVenki Rajagopalan 			if ((chan = vnic->vn_data_chan) != NULL) {
734*b494511aSVenki Rajagopalan 				(void) eib_ibt_modify_chan_pkey(ss, chan,
735*b494511aSVenki Rajagopalan 				    chan->ch_pkey);
736*b494511aSVenki Rajagopalan 			}
737*b494511aSVenki Rajagopalan 		}
738*b494511aSVenki Rajagopalan 		av &= (~((uint64_t)1 << inst));
739*b494511aSVenki Rajagopalan 	}
740*b494511aSVenki Rajagopalan 	mutex_exit(&ss->ei_vnic_lock);
741*b494511aSVenki Rajagopalan }
742*b494511aSVenki Rajagopalan 
743*b494511aSVenki Rajagopalan static void
eib_ibt_wakeup_sqd_waiters(eib_t * ss,ibt_channel_hdl_t ev_chan_hdl)744*b494511aSVenki Rajagopalan eib_ibt_wakeup_sqd_waiters(eib_t *ss, ibt_channel_hdl_t ev_chan_hdl)
745*b494511aSVenki Rajagopalan {
746*b494511aSVenki Rajagopalan 	eib_vnic_t *vnic;
747*b494511aSVenki Rajagopalan 	eib_chan_t *chan = NULL;
748*b494511aSVenki Rajagopalan 	uint64_t av;
749*b494511aSVenki Rajagopalan 	int inst = 0;
750*b494511aSVenki Rajagopalan 
751*b494511aSVenki Rajagopalan 	/*
752*b494511aSVenki Rajagopalan 	 * See if this channel has been waiting for its queue to drain.
753*b494511aSVenki Rajagopalan 	 *
754*b494511aSVenki Rajagopalan 	 * Note that since this is especially likely to be called during
755*b494511aSVenki Rajagopalan 	 * logging in to the gateway, we also need to check the vnic
756*b494511aSVenki Rajagopalan 	 * currently being created.
757*b494511aSVenki Rajagopalan 	 */
758*b494511aSVenki Rajagopalan 	mutex_enter(&ss->ei_vnic_lock);
759*b494511aSVenki Rajagopalan 
760*b494511aSVenki Rajagopalan 	if ((vnic = ss->ei_vnic_pending) != NULL) {
761*b494511aSVenki Rajagopalan 		chan = vnic->vn_ctl_chan;
762*b494511aSVenki Rajagopalan 		if ((chan) && (chan->ch_chan == ev_chan_hdl))
763*b494511aSVenki Rajagopalan 			goto wakeup_sqd_waiters;
764*b494511aSVenki Rajagopalan 
765*b494511aSVenki Rajagopalan 		chan = vnic->vn_data_chan;
766*b494511aSVenki Rajagopalan 		if ((chan) && (chan->ch_chan == ev_chan_hdl))
767*b494511aSVenki Rajagopalan 			goto wakeup_sqd_waiters;
768*b494511aSVenki Rajagopalan 	}
769*b494511aSVenki Rajagopalan 
770*b494511aSVenki Rajagopalan 	av = ss->ei_active_vnics;
771*b494511aSVenki Rajagopalan 	while ((inst = EIB_FIND_LSB_SET(av)) != -1) {
772*b494511aSVenki Rajagopalan 		if ((vnic = ss->ei_vnic[inst]) != NULL) {
773*b494511aSVenki Rajagopalan 			chan = vnic->vn_ctl_chan;
774*b494511aSVenki Rajagopalan 			if (chan->ch_chan == ev_chan_hdl)
775*b494511aSVenki Rajagopalan 				break;
776*b494511aSVenki Rajagopalan 
777*b494511aSVenki Rajagopalan 			chan = vnic->vn_data_chan;
778*b494511aSVenki Rajagopalan 			if (chan->ch_chan == ev_chan_hdl)
779*b494511aSVenki Rajagopalan 				break;
780*b494511aSVenki Rajagopalan 		}
781*b494511aSVenki Rajagopalan 		av &= (~((uint64_t)1 << inst));
782*b494511aSVenki Rajagopalan 	}
783*b494511aSVenki Rajagopalan 
784*b494511aSVenki Rajagopalan wakeup_sqd_waiters:
785*b494511aSVenki Rajagopalan 	if (chan) {
786*b494511aSVenki Rajagopalan 		mutex_enter(&chan->ch_cep_lock);
787*b494511aSVenki Rajagopalan 		chan->ch_cep_state = IBT_STATE_SQD;
788*b494511aSVenki Rajagopalan 		cv_broadcast(&chan->ch_cep_cv);
789*b494511aSVenki Rajagopalan 		mutex_exit(&chan->ch_cep_lock);
790*b494511aSVenki Rajagopalan 	}
791*b494511aSVenki Rajagopalan 
792*b494511aSVenki Rajagopalan 	mutex_exit(&ss->ei_vnic_lock);
793*b494511aSVenki Rajagopalan }
794*b494511aSVenki Rajagopalan 
795*b494511aSVenki Rajagopalan static int
eib_ibt_chan_pkey(eib_t * ss,eib_chan_t * chan,ib_pkey_t new_pkey,boolean_t set,boolean_t * pkey_changed)796*b494511aSVenki Rajagopalan eib_ibt_chan_pkey(eib_t *ss, eib_chan_t *chan, ib_pkey_t new_pkey,
797*b494511aSVenki Rajagopalan     boolean_t set, boolean_t *pkey_changed)
798*b494511aSVenki Rajagopalan {
799*b494511aSVenki Rajagopalan 	ibt_qp_info_t qp_attr;
800*b494511aSVenki Rajagopalan 	ibt_status_t ret;
801*b494511aSVenki Rajagopalan 	uint16_t new_pkey_ix;
802*b494511aSVenki Rajagopalan 
803*b494511aSVenki Rajagopalan 	ret = ibt_pkey2index(ss->ei_hca_hdl, ss->ei_props->ep_port_num,
804*b494511aSVenki Rajagopalan 	    new_pkey, &new_pkey_ix);
805*b494511aSVenki Rajagopalan 	if (ret != IBT_SUCCESS) {
806*b494511aSVenki Rajagopalan 		EIB_DPRINTF_WARN(ss->ei_instance, "eib_ibt_chan_pkey: "
807*b494511aSVenki Rajagopalan 		    "ibt_pkey2index(hca_hdl=0x%llx, port_num=0x%x, "
808*b494511aSVenki Rajagopalan 		    "pkey=0x%x) failed, ret=%d",
809*b494511aSVenki Rajagopalan 		    ss->ei_hca_hdl, ss->ei_props->ep_port_num, new_pkey, ret);
810*b494511aSVenki Rajagopalan 		return (EIB_E_FAILURE);
811*b494511aSVenki Rajagopalan 	}
812*b494511aSVenki Rajagopalan 
813*b494511aSVenki Rajagopalan 	/*
814*b494511aSVenki Rajagopalan 	 * If the pkey and the pkey index we have already matches the
815*b494511aSVenki Rajagopalan 	 * new one, nothing to do.
816*b494511aSVenki Rajagopalan 	 */
817*b494511aSVenki Rajagopalan 	mutex_enter(&chan->ch_pkey_lock);
818*b494511aSVenki Rajagopalan 	if ((chan->ch_pkey == new_pkey) && (chan->ch_pkey_ix == new_pkey_ix)) {
819*b494511aSVenki Rajagopalan 		if (pkey_changed) {
820*b494511aSVenki Rajagopalan 			*pkey_changed = B_FALSE;
821*b494511aSVenki Rajagopalan 		}
822*b494511aSVenki Rajagopalan 		mutex_exit(&chan->ch_pkey_lock);
823*b494511aSVenki Rajagopalan 		return (EIB_E_SUCCESS);
824*b494511aSVenki Rajagopalan 	}
825*b494511aSVenki Rajagopalan 	if (pkey_changed) {
826*b494511aSVenki Rajagopalan 		*pkey_changed = B_TRUE;
827*b494511aSVenki Rajagopalan 	}
828*b494511aSVenki Rajagopalan 	mutex_exit(&chan->ch_pkey_lock);
829*b494511aSVenki Rajagopalan 
830*b494511aSVenki Rajagopalan 	/*
831*b494511aSVenki Rajagopalan 	 * Otherwise, if we're asked only to test if the pkey index
832*b494511aSVenki Rajagopalan 	 * supplied matches the one recorded in the channel, return
833*b494511aSVenki Rajagopalan 	 * success, but don't set the pkey.
834*b494511aSVenki Rajagopalan 	 */
835*b494511aSVenki Rajagopalan 	if (!set) {
836*b494511aSVenki Rajagopalan 		return (EIB_E_SUCCESS);
837*b494511aSVenki Rajagopalan 	}
838*b494511aSVenki Rajagopalan 
839*b494511aSVenki Rajagopalan 	/*
840*b494511aSVenki Rajagopalan 	 * Otherwise, we need to change channel pkey.  Pause the
841*b494511aSVenki Rajagopalan 	 * channel sendq first.
842*b494511aSVenki Rajagopalan 	 */
843*b494511aSVenki Rajagopalan 	ret = ibt_pause_sendq(chan->ch_chan, IBT_CEP_SET_SQD_EVENT);
844*b494511aSVenki Rajagopalan 	if (ret != IBT_SUCCESS) {
845*b494511aSVenki Rajagopalan 		EIB_DPRINTF_WARN(ss->ei_instance, "eib_ibt_chan_pkey: "
846*b494511aSVenki Rajagopalan 		    "ibt_pause_sendq(chan_hdl=0x%llx) failed, ret=%d",
847*b494511aSVenki Rajagopalan 		    chan->ch_chan, ret);
848*b494511aSVenki Rajagopalan 		return (EIB_E_FAILURE);
849*b494511aSVenki Rajagopalan 	}
850*b494511aSVenki Rajagopalan 
851*b494511aSVenki Rajagopalan 	/*
852*b494511aSVenki Rajagopalan 	 * Wait for the channel to enter the IBT_STATE_SQD state
853*b494511aSVenki Rajagopalan 	 */
854*b494511aSVenki Rajagopalan 	mutex_enter(&chan->ch_cep_lock);
855*b494511aSVenki Rajagopalan 	while (chan->ch_cep_state != IBT_STATE_SQD)
856*b494511aSVenki Rajagopalan 		cv_wait(&chan->ch_cep_cv, &chan->ch_cep_lock);
857*b494511aSVenki Rajagopalan 	mutex_exit(&chan->ch_cep_lock);
858*b494511aSVenki Rajagopalan 
859*b494511aSVenki Rajagopalan 	/*
860*b494511aSVenki Rajagopalan 	 * Modify the qp with the supplied pkey index and unpause the channel
861*b494511aSVenki Rajagopalan 	 * If either of these operations fail, we'll leave the channel in
862*b494511aSVenki Rajagopalan 	 * the paused state and fail.
863*b494511aSVenki Rajagopalan 	 */
864*b494511aSVenki Rajagopalan 	bzero(&qp_attr, sizeof (ibt_qp_info_t));
865*b494511aSVenki Rajagopalan 
866*b494511aSVenki Rajagopalan 	qp_attr.qp_trans = IBT_UD_SRV;
867*b494511aSVenki Rajagopalan 	qp_attr.qp_current_state = IBT_STATE_SQD;
868*b494511aSVenki Rajagopalan 	qp_attr.qp_state = IBT_STATE_SQD;
869*b494511aSVenki Rajagopalan 	qp_attr.qp_transport.ud.ud_pkey_ix = new_pkey_ix;
870*b494511aSVenki Rajagopalan 
871*b494511aSVenki Rajagopalan 	/*
872*b494511aSVenki Rajagopalan 	 * Modify the qp to set the new pkey index, then unpause the
873*b494511aSVenki Rajagopalan 	 * channel and put it in RTS state and update the new values
874*b494511aSVenki Rajagopalan 	 * in our records
875*b494511aSVenki Rajagopalan 	 */
876*b494511aSVenki Rajagopalan 	mutex_enter(&chan->ch_pkey_lock);
877*b494511aSVenki Rajagopalan 
878*b494511aSVenki Rajagopalan 	ret = ibt_modify_qp(chan->ch_chan,
879*b494511aSVenki Rajagopalan 	    IBT_CEP_SET_STATE | IBT_CEP_SET_PKEY_IX, &qp_attr, NULL);
880*b494511aSVenki Rajagopalan 	if (ret != IBT_SUCCESS) {
881*b494511aSVenki Rajagopalan 		mutex_exit(&chan->ch_pkey_lock);
882*b494511aSVenki Rajagopalan 		EIB_DPRINTF_WARN(ss->ei_instance, "eib_ibt_chan_pkey: "
883*b494511aSVenki Rajagopalan 		    "ibt_modify_qp(chan_hdl=0x%llx, IBT_CEP_SET_PKEY_IX) "
884*b494511aSVenki Rajagopalan 		    "failed for new_pkey_ix=0x%x, ret=%d",
885*b494511aSVenki Rajagopalan 		    chan->ch_chan, new_pkey_ix, ret);
886*b494511aSVenki Rajagopalan 		return (EIB_E_FAILURE);
887*b494511aSVenki Rajagopalan 	}
888*b494511aSVenki Rajagopalan 
889*b494511aSVenki Rajagopalan 	if ((ret = ibt_unpause_sendq(chan->ch_chan)) != IBT_SUCCESS) {
890*b494511aSVenki Rajagopalan 		mutex_exit(&chan->ch_pkey_lock);
891*b494511aSVenki Rajagopalan 		EIB_DPRINTF_WARN(ss->ei_instance, "eib_ibt_chan_pkey: "
892*b494511aSVenki Rajagopalan 		    "ibt_unpause_sendq(chan_hdl=0x%llx) failed, ret=%d",
893*b494511aSVenki Rajagopalan 		    chan->ch_chan, ret);
894*b494511aSVenki Rajagopalan 		return (EIB_E_FAILURE);
895*b494511aSVenki Rajagopalan 	}
896*b494511aSVenki Rajagopalan 
897*b494511aSVenki Rajagopalan 	chan->ch_pkey = new_pkey;
898*b494511aSVenki Rajagopalan 	chan->ch_pkey_ix = new_pkey_ix;
899*b494511aSVenki Rajagopalan 	mutex_exit(&chan->ch_pkey_lock);
900*b494511aSVenki Rajagopalan 
901*b494511aSVenki Rajagopalan 	return (EIB_E_SUCCESS);
902*b494511aSVenki Rajagopalan }
903*b494511aSVenki Rajagopalan 
904*b494511aSVenki Rajagopalan static boolean_t
eib_ibt_has_chan_pkey_changed(eib_t * ss,eib_chan_t * chan)905*b494511aSVenki Rajagopalan eib_ibt_has_chan_pkey_changed(eib_t *ss, eib_chan_t *chan)
906*b494511aSVenki Rajagopalan {
907*b494511aSVenki Rajagopalan 	boolean_t changed;
908*b494511aSVenki Rajagopalan 	int ret;
909*b494511aSVenki Rajagopalan 
910*b494511aSVenki Rajagopalan 	/*
911*b494511aSVenki Rajagopalan 	 * Don't modify the pkey, just ask if the pkey index for the channel's
912*b494511aSVenki Rajagopalan 	 * pkey has changed for any reason.  If we fail, assume that the pkey
913*b494511aSVenki Rajagopalan 	 * has changed.
914*b494511aSVenki Rajagopalan 	 */
915*b494511aSVenki Rajagopalan 	ret = eib_ibt_chan_pkey(ss, chan, chan->ch_pkey, B_FALSE, &changed);
916*b494511aSVenki Rajagopalan 	if (ret != EIB_E_SUCCESS)
917*b494511aSVenki Rajagopalan 		changed = B_TRUE;
918*b494511aSVenki Rajagopalan 
919*b494511aSVenki Rajagopalan 	return (changed);
920*b494511aSVenki Rajagopalan }
921*b494511aSVenki Rajagopalan 
922*b494511aSVenki Rajagopalan static boolean_t
eib_ibt_has_any_pkey_changed(eib_t * ss)923*b494511aSVenki Rajagopalan eib_ibt_has_any_pkey_changed(eib_t *ss)
924*b494511aSVenki Rajagopalan {
925*b494511aSVenki Rajagopalan 	eib_vnic_t *vnic;
926*b494511aSVenki Rajagopalan 	eib_chan_t *chan = NULL;
927*b494511aSVenki Rajagopalan 	uint64_t av;
928*b494511aSVenki Rajagopalan 	int inst = 0;
929*b494511aSVenki Rajagopalan 
930*b494511aSVenki Rajagopalan 	/*
931*b494511aSVenki Rajagopalan 	 * Return true if the pkey index of any our pkeys (of the channels
932*b494511aSVenki Rajagopalan 	 * of all active vnics) has changed.
933*b494511aSVenki Rajagopalan 	 */
934*b494511aSVenki Rajagopalan 
935*b494511aSVenki Rajagopalan 	chan = ss->ei_admin_chan;
936*b494511aSVenki Rajagopalan 	if ((chan) && (eib_ibt_has_chan_pkey_changed(ss, chan)))
937*b494511aSVenki Rajagopalan 		return (B_TRUE);
938*b494511aSVenki Rajagopalan 
939*b494511aSVenki Rajagopalan 	mutex_enter(&ss->ei_vnic_lock);
940*b494511aSVenki Rajagopalan 	av = ss->ei_active_vnics;
941*b494511aSVenki Rajagopalan 	while ((inst = EIB_FIND_LSB_SET(av)) != -1) {
942*b494511aSVenki Rajagopalan 		if ((vnic = ss->ei_vnic[inst]) != NULL) {
943*b494511aSVenki Rajagopalan 			chan = vnic->vn_ctl_chan;
944*b494511aSVenki Rajagopalan 			if ((chan) && (eib_ibt_has_chan_pkey_changed(ss, chan)))
945*b494511aSVenki Rajagopalan 				return (B_TRUE);
946*b494511aSVenki Rajagopalan 
947*b494511aSVenki Rajagopalan 			chan = vnic->vn_data_chan;
948*b494511aSVenki Rajagopalan 			if ((chan) && (eib_ibt_has_chan_pkey_changed(ss, chan)))
949*b494511aSVenki Rajagopalan 				return (B_TRUE);
950*b494511aSVenki Rajagopalan 		}
951*b494511aSVenki Rajagopalan 		av &= (~((uint64_t)1 << inst));
952*b494511aSVenki Rajagopalan 	}
953*b494511aSVenki Rajagopalan 	mutex_exit(&ss->ei_vnic_lock);
954*b494511aSVenki Rajagopalan 
955*b494511aSVenki Rajagopalan 	return (B_FALSE);
956*b494511aSVenki Rajagopalan }
957*b494511aSVenki Rajagopalan 
958*b494511aSVenki Rajagopalan /*
959*b494511aSVenki Rajagopalan  * This routine is currently used simply to derive and record the port
960*b494511aSVenki Rajagopalan  * speed from the loopback path information (for debug purposes).  For
961*b494511aSVenki Rajagopalan  * EoIB, currently the srate used in address vectors to IB neighbors
962*b494511aSVenki Rajagopalan  * and the gateway is fixed at IBT_SRATE_10. Eventually though, this
963*b494511aSVenki Rajagopalan  * information (and sl) has to come from the gateway for all destinations
964*b494511aSVenki Rajagopalan  * in the vhub table.
965*b494511aSVenki Rajagopalan  */
966*b494511aSVenki Rajagopalan static void
eib_ibt_record_srate(eib_t * ss)967*b494511aSVenki Rajagopalan eib_ibt_record_srate(eib_t *ss)
968*b494511aSVenki Rajagopalan {
969*b494511aSVenki Rajagopalan 	ib_gid_t sgid = ss->ei_props->ep_sgid;
970*b494511aSVenki Rajagopalan 	ibt_srate_t srate = IBT_SRATE_10;
971*b494511aSVenki Rajagopalan 	ibt_path_info_t path;
972*b494511aSVenki Rajagopalan 	ibt_path_attr_t path_attr;
973*b494511aSVenki Rajagopalan 	ibt_status_t ret;
974*b494511aSVenki Rajagopalan 	uint8_t num_paths;
975*b494511aSVenki Rajagopalan 
976*b494511aSVenki Rajagopalan 	bzero(&path_attr, sizeof (path_attr));
977*b494511aSVenki Rajagopalan 	path_attr.pa_dgids = &sgid;
978*b494511aSVenki Rajagopalan 	path_attr.pa_num_dgids = 1;
979*b494511aSVenki Rajagopalan 	path_attr.pa_sgid = sgid;
980*b494511aSVenki Rajagopalan 
981*b494511aSVenki Rajagopalan 	ret = ibt_get_paths(ss->ei_ibt_hdl, IBT_PATH_NO_FLAGS,
982*b494511aSVenki Rajagopalan 	    &path_attr, 1, &path, &num_paths);
983*b494511aSVenki Rajagopalan 	if (ret == IBT_SUCCESS && num_paths >= 1) {
984*b494511aSVenki Rajagopalan 		switch (srate = path.pi_prim_cep_path.cep_adds_vect.av_srate) {
985*b494511aSVenki Rajagopalan 		case IBT_SRATE_2:
986*b494511aSVenki Rajagopalan 		case IBT_SRATE_10:
987*b494511aSVenki Rajagopalan 		case IBT_SRATE_30:
988*b494511aSVenki Rajagopalan 		case IBT_SRATE_5:
989*b494511aSVenki Rajagopalan 		case IBT_SRATE_20:
990*b494511aSVenki Rajagopalan 		case IBT_SRATE_40:
991*b494511aSVenki Rajagopalan 		case IBT_SRATE_60:
992*b494511aSVenki Rajagopalan 		case IBT_SRATE_80:
993*b494511aSVenki Rajagopalan 		case IBT_SRATE_120:
994*b494511aSVenki Rajagopalan 			break;
995*b494511aSVenki Rajagopalan 		default:
996*b494511aSVenki Rajagopalan 			srate = IBT_SRATE_10;
997*b494511aSVenki Rajagopalan 		}
998*b494511aSVenki Rajagopalan 	}
999*b494511aSVenki Rajagopalan 
1000*b494511aSVenki Rajagopalan 	ss->ei_props->ep_srate = srate;
1001*b494511aSVenki Rajagopalan 
1002*b494511aSVenki Rajagopalan 	EIB_DPRINTF_DEBUG(ss->ei_instance, "eib_ibt_record_srate: "
1003*b494511aSVenki Rajagopalan 	    "srate = %d", srate);
1004*b494511aSVenki Rajagopalan }
1005