1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
24  */
25 
26 /*
27  * This file implements the client interfaces of the IBMF.
28  */
29 
30 #include <sys/ib/mgt/ibmf/ibmf_impl.h>
31 #include <sys/ib/mgt/ib_mad.h>
32 
33 extern ibmf_state_t *ibmf_statep;
34 
35 /* global settable */
36 int	ibmf_send_wqes_per_port = IBMF_MAX_SQ_WRE;
37 int	ibmf_recv_wqes_per_port = IBMF_MAX_RQ_WRE;
38 int	ibmf_send_wqes_posted_per_qp = IBMF_MAX_POSTED_SQ_PER_QP;
39 int	ibmf_recv_wqes_posted_per_qp = IBMF_MAX_POSTED_RQ_PER_QP;
40 
41 int	ibmf_taskq_max_tasks = 1024;
42 
43 int	ibmf_trace_level = DPRINT_L0;
44 
45 #define	IBMF_MAD_CL_HDR_OFF_1	0
46 #define	IBMF_MAD_CL_HDR_OFF_2	12
47 #define	IBMF_MAD_CL_HDR_SZ_1	40
48 #define	IBMF_MAD_CL_HDR_SZ_2	20
49 #define	IBMF_MAD_CL_HDR_SZ_3	0
50 #define	IBMF_MAD_CL_HDR_SZ_4	4
51 
52 #define	IBMF_VALID_CLIENT_TYPE(client_type)		\
53 	((client_type) == SUBN_AGENT ||			\
54 	(client_type) == SUBN_MANAGER ||		\
55 	(client_type) == SUBN_ADM_AGENT ||		\
56 	(client_type) == SUBN_ADM_MANAGER ||		\
57 	(client_type) == PERF_AGENT ||			\
58 	(client_type) == PERF_MANAGER ||		\
59 	(client_type) == BM_AGENT ||			\
60 	(client_type) == BM_MANAGER ||			\
61 	(client_type) == DEV_MGT_AGENT ||		\
62 	(client_type) == DEV_MGT_MANAGER ||		\
63 	(client_type) == COMM_MGT_MANAGER_AGENT ||	\
64 	(client_type) == SNMP_MANAGER_AGENT ||		\
65 	(client_type) == VENDOR_09_MANAGER_AGENT ||	\
66 	(client_type) == VENDOR_0A_MANAGER_AGENT ||	\
67 	(client_type) == VENDOR_0B_MANAGER_AGENT ||	\
68 	(client_type) == VENDOR_0C_MANAGER_AGENT ||	\
69 	(client_type) == VENDOR_0D_MANAGER_AGENT ||	\
70 	(client_type) == VENDOR_0E_MANAGER_AGENT ||	\
71 	(client_type) == VENDOR_0F_MANAGER_AGENT ||	\
72 	(client_type) == VENDOR_30_MANAGER_AGENT ||	\
73 	(client_type) == VENDOR_31_MANAGER_AGENT ||	\
74 	(client_type) == VENDOR_32_MANAGER_AGENT ||	\
75 	(client_type) == VENDOR_33_MANAGER_AGENT ||	\
76 	(client_type) == VENDOR_34_MANAGER_AGENT ||	\
77 	(client_type) == VENDOR_35_MANAGER_AGENT ||	\
78 	(client_type) == VENDOR_36_MANAGER_AGENT ||	\
79 	(client_type) == VENDOR_37_MANAGER_AGENT ||	\
80 	(client_type) == VENDOR_38_MANAGER_AGENT ||	\
81 	(client_type) == VENDOR_39_MANAGER_AGENT ||	\
82 	(client_type) == VENDOR_3A_MANAGER_AGENT ||	\
83 	(client_type) == VENDOR_3B_MANAGER_AGENT ||	\
84 	(client_type) == VENDOR_3C_MANAGER_AGENT ||	\
85 	(client_type) == VENDOR_3D_MANAGER_AGENT ||	\
86 	(client_type) == VENDOR_3E_MANAGER_AGENT ||	\
87 	(client_type) == VENDOR_3F_MANAGER_AGENT ||	\
88 	(client_type) == VENDOR_40_MANAGER_AGENT ||	\
89 	(client_type) == VENDOR_41_MANAGER_AGENT ||	\
90 	(client_type) == VENDOR_42_MANAGER_AGENT ||	\
91 	(client_type) == VENDOR_43_MANAGER_AGENT ||	\
92 	(client_type) == VENDOR_44_MANAGER_AGENT ||	\
93 	(client_type) == VENDOR_45_MANAGER_AGENT ||	\
94 	(client_type) == VENDOR_46_MANAGER_AGENT ||	\
95 	(client_type) == VENDOR_47_MANAGER_AGENT ||	\
96 	(client_type) == VENDOR_48_MANAGER_AGENT ||	\
97 	(client_type) == VENDOR_49_MANAGER_AGENT ||	\
98 	(client_type) == VENDOR_4A_MANAGER_AGENT ||	\
99 	(client_type) == VENDOR_4B_MANAGER_AGENT ||	\
100 	(client_type) == VENDOR_4C_MANAGER_AGENT ||	\
101 	(client_type) == VENDOR_4D_MANAGER_AGENT ||	\
102 	(client_type) == VENDOR_4E_MANAGER_AGENT ||	\
103 	(client_type) == VENDOR_4F_MANAGER_AGENT ||	\
104 	(client_type) == APPLICATION_10_MANAGER_AGENT || \
105 	(client_type) == APPLICATION_11_MANAGER_AGENT || \
106 	(client_type) == APPLICATION_12_MANAGER_AGENT || \
107 	(client_type) == APPLICATION_13_MANAGER_AGENT || \
108 	(client_type) == APPLICATION_14_MANAGER_AGENT || \
109 	(client_type) == APPLICATION_15_MANAGER_AGENT || \
110 	(client_type) == APPLICATION_16_MANAGER_AGENT || \
111 	(client_type) == APPLICATION_17_MANAGER_AGENT || \
112 	(client_type) == APPLICATION_18_MANAGER_AGENT || \
113 	(client_type) == APPLICATION_19_MANAGER_AGENT || \
114 	(client_type) == APPLICATION_1A_MANAGER_AGENT || \
115 	(client_type) == APPLICATION_1B_MANAGER_AGENT || \
116 	(client_type) == APPLICATION_1C_MANAGER_AGENT || \
117 	(client_type) == APPLICATION_1D_MANAGER_AGENT || \
118 	(client_type) == APPLICATION_1E_MANAGER_AGENT || \
119 	(client_type) == APPLICATION_1F_MANAGER_AGENT || \
120 	(client_type) == APPLICATION_20_MANAGER_AGENT || \
121 	(client_type) == APPLICATION_21_MANAGER_AGENT || \
122 	(client_type) == APPLICATION_22_MANAGER_AGENT || \
123 	(client_type) == APPLICATION_23_MANAGER_AGENT || \
124 	(client_type) == APPLICATION_24_MANAGER_AGENT || \
125 	(client_type) == APPLICATION_25_MANAGER_AGENT || \
126 	(client_type) == APPLICATION_26_MANAGER_AGENT || \
127 	(client_type) == APPLICATION_27_MANAGER_AGENT || \
128 	(client_type) == APPLICATION_28_MANAGER_AGENT || \
129 	(client_type) == APPLICATION_29_MANAGER_AGENT || \
130 	(client_type) == APPLICATION_2A_MANAGER_AGENT || \
131 	(client_type) == APPLICATION_2B_MANAGER_AGENT || \
132 	(client_type) == APPLICATION_2C_MANAGER_AGENT || \
133 	(client_type) == APPLICATION_2D_MANAGER_AGENT || \
134 	(client_type) == APPLICATION_2E_MANAGER_AGENT || \
135 	(client_type) == APPLICATION_2F_MANAGER_AGENT || \
136 	(client_type) == UNIVERSAL_CLASS)
137 
138 static ibmf_ci_t *ibmf_i_lookup_ci(ib_guid_t ci_guid);
139 static int ibmf_i_init_ci(ibmf_register_info_t *client_infop,
140     ibmf_ci_t *cip);
141 static void ibmf_i_uninit_ci(ibmf_ci_t *cip);
142 static void ibmf_i_init_ci_done(ibmf_ci_t *cip);
143 static void ibmf_i_uninit_ci_done(ibmf_ci_t *cip);
144 static int ibmf_i_init_qp(ibmf_ci_t *ibmf_cip, ibmf_qp_t *qpp);
145 static void ibmf_i_uninit_qp(ibmf_ci_t *ibmf_cip, ibmf_qp_t *qpp);
146 static int ibmf_i_init_cqs(ibmf_ci_t *cip);
147 static void ibmf_i_fini_cqs(ibmf_ci_t *cip);
148 static void ibmf_i_init_qplist(ibmf_ci_t *ibmf_cip);
149 static void ibmf_i_fini_qplist(ibmf_ci_t *ibmf_cip);
150 static int ibmf_i_lookup_client_by_info(ibmf_ci_t *ibmf_cip,
151     ibmf_register_info_t *ir_client, ibmf_client_t **clientpp);
152 
153 /*
154  * ibmf_init():
155  *	Initializes module state and registers with the IBT framework.
156  * 	Returns 0 if initialization was successful, else returns non-zero.
157  */
158 int
ibmf_init(void)159 ibmf_init(void)
160 {
161 	ibt_status_t 	status;
162 	ibt_clnt_hdl_t 	ibmf_ibt_handle;
163 
164 	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_init_start,
165 	    IBMF_TNF_TRACE, "", "ibmf_init() enter\n");
166 
167 	/* setup the IBT module information */
168 	ibmf_statep->ibmf_ibt_modinfo.mi_ibt_version = IBTI_V_CURR;
169 	ibmf_statep->ibmf_ibt_modinfo.mi_clnt_class = IBT_IBMA;
170 	ibmf_statep->ibmf_ibt_modinfo.mi_async_handler
171 	    = ibmf_ibt_async_handler;
172 	ibmf_statep->ibmf_ibt_modinfo.mi_reserved = NULL;
173 	ibmf_statep->ibmf_ibt_modinfo.mi_clnt_name = "ibmf";
174 
175 	/* setup a connection to IB transport layer (IBTF) */
176 	status = ibt_attach(&ibmf_statep->ibmf_ibt_modinfo, (void *)NULL,
177 	    (void *)NULL, (void *)&ibmf_ibt_handle);
178 	if (status != IBT_SUCCESS) {
179 		IBMF_TRACE_2(IBMF_TNF_NODEBUG, DPRINT_L1, ibmf_init_err,
180 		    IBMF_TNF_ERROR, "", "%s, status = %d\n", tnf_string, msg,
181 		    "ibt attach failed", tnf_uint, status, status);
182 		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_init_end,
183 		    IBMF_TNF_TRACE, "", "ibmf_init() exit\n");
184 		return (1);
185 	}
186 
187 	/* initialize the IBMF state context */
188 	ibmf_statep->ibmf_ibt_handle = ibmf_ibt_handle;
189 	ibmf_statep->ibmf_ci_list = (ibmf_ci_t *)NULL;
190 	ibmf_statep->ibmf_ci_list_tail = (ibmf_ci_t *)NULL;
191 	mutex_init(&ibmf_statep->ibmf_mutex, NULL, MUTEX_DRIVER, NULL);
192 	ibmf_statep->ibmf_cq_handler = ibmf_i_mad_completions;
193 
194 	ibmf_statep->ibmf_taskq = taskq_create("ibmf_taskq", IBMF_TASKQ_1THREAD,
195 	    MINCLSYSPRI, 1, ibmf_taskq_max_tasks, TASKQ_PREPOPULATE);
196 
197 	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_init_end,
198 	    IBMF_TNF_TRACE, "", "ibmf_init() exit\n");
199 
200 	return (0);
201 }
202 
203 /*
204  * ibmf_fini():
205  *	Cleans up module state resources and unregisters from IBT framework.
206  */
207 int
ibmf_fini(void)208 ibmf_fini(void)
209 {
210 	ibmf_ci_t	*cip;
211 	ibmf_ci_t	*tcip;
212 	ibt_status_t	status;
213 
214 	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_fini_start,
215 	    IBMF_TNF_TRACE, "", "ibmf_fini() enter\n");
216 
217 	ASSERT(MUTEX_NOT_HELD(&ibmf_statep->ibmf_mutex));
218 
219 	mutex_enter(&ibmf_statep->ibmf_mutex);
220 
221 	/* free all the Channel Interface (CI) context structures */
222 	cip = ibmf_statep->ibmf_ci_list;
223 	tcip = NULL;
224 	while (cip != (ibmf_ci_t *)NULL) {
225 
226 		mutex_enter(&cip->ci_mutex);
227 		ASSERT((cip->ci_state == IBMF_CI_STATE_PRESENT && cip->ci_ref ==
228 		    0) || (cip->ci_state == IBMF_CI_STATE_GONE));
229 		ASSERT(cip->ci_init_state == IBMF_CI_INIT_HCA_LINKED);
230 		ASSERT(cip->ci_qp_list == NULL && cip->ci_qp_list_tail == NULL);
231 		if (tcip != (ibmf_ci_t *)NULL)
232 			tcip->ci_next = cip->ci_next;
233 		if (ibmf_statep->ibmf_ci_list_tail == cip)
234 			ibmf_statep->ibmf_ci_list_tail = NULL;
235 		if (ibmf_statep->ibmf_ci_list == cip)
236 			ibmf_statep->ibmf_ci_list = cip->ci_next;
237 		tcip = cip->ci_next;
238 		mutex_exit(&cip->ci_mutex);
239 		/* free up the ci structure */
240 		if (cip->ci_port_kstatp != NULL) {
241 			kstat_delete(cip->ci_port_kstatp);
242 		}
243 		mutex_destroy(&cip->ci_mutex);
244 		mutex_destroy(&cip->ci_clients_mutex);
245 		mutex_destroy(&cip->ci_wqe_mutex);
246 		cv_destroy(&cip->ci_state_cv);
247 		cv_destroy(&cip->ci_wqes_cv);
248 		kmem_free((void *) cip, sizeof (ibmf_ci_t));
249 		cip = tcip;
250 	}
251 
252 	ASSERT(ibmf_statep->ibmf_ci_list == NULL);
253 	ASSERT(ibmf_statep->ibmf_ci_list_tail == NULL);
254 
255 	taskq_destroy(ibmf_statep->ibmf_taskq);
256 
257 	mutex_exit(&ibmf_statep->ibmf_mutex);
258 
259 	/* detach from IBTF */
260 	status = ibt_detach(ibmf_statep->ibmf_ibt_handle);
261 	if (status != IBT_SUCCESS) {
262 		IBMF_TRACE_2(IBMF_TNF_NODEBUG, DPRINT_L1, ibmf_fini_err,
263 		    IBMF_TNF_ERROR, "", "%s, status = %d\n", tnf_string, msg,
264 		    "ibt detach error", tnf_uint, status, status);
265 		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_fini_end,
266 		    IBMF_TNF_TRACE, "", "ibmf_fini() exit\n");
267 		return (1);
268 	}
269 
270 	mutex_destroy(&ibmf_statep->ibmf_mutex);
271 
272 	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_fini_end,
273 	    IBMF_TNF_TRACE, "", "ibmf_fini() exit\n");
274 
275 	return (0);
276 }
277 
278 /*
279  * ibmf_i_validate_class_mask():
280  *	Checks client type value in client information structure.
281  */
282 int
ibmf_i_validate_class_mask(ibmf_register_info_t * client_infop)283 ibmf_i_validate_class_mask(ibmf_register_info_t	*client_infop)
284 {
285 	IBMF_TRACE_1(IBMF_TNF_DEBUG, DPRINT_L4,
286 	    ibmf_i_validate_class_mask_start, IBMF_TNF_TRACE, "",
287 	    "ibmf_i_validate_class_mask() enter, client_infop = %p\n",
288 	    tnf_opaque, client_infop, client_infop);
289 
290 	if (IBMF_VALID_CLIENT_TYPE(client_infop->ir_client_class) == B_FALSE) {
291 		IBMF_TRACE_2(IBMF_TNF_NODEBUG, DPRINT_L1,
292 		    ibmf_i_validate_class_mask_err, IBMF_TNF_ERROR, "",
293 		    "%s, class = %x\n", tnf_string, msg,
294 		    "invalid class", tnf_uint, class,
295 		    client_infop->ir_client_class);
296 		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
297 		    ibmf_i_validate_class_mask_end, IBMF_TNF_TRACE, "",
298 		    "ibmf_i_validate_class_mask() exit\n");
299 		return (IBMF_BAD_CLASS);
300 	}
301 
302 	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_validate_class_mask_end,
303 	    IBMF_TNF_TRACE, "", "ibmf_i_validate_class_mask() exit\n");
304 	return (IBMF_SUCCESS);
305 }
306 
307 /*
308  * ibmf_i_validate_ci_guid_and_port():
309  *	Checks validity of port number and HCA GUID at client
310  *	registration time.
311  */
312 int
ibmf_i_validate_ci_guid_and_port(ib_guid_t hca_guid,uint8_t port_num)313 ibmf_i_validate_ci_guid_and_port(ib_guid_t hca_guid, uint8_t port_num)
314 {
315 	ibt_status_t	status;
316 	ibt_hca_attr_t	hca_attrs;
317 
318 	IBMF_TRACE_2(IBMF_TNF_DEBUG, DPRINT_L4,
319 	    ibmf_i_validate_ci_guid_and_port_start, IBMF_TNF_TRACE, "",
320 	    "ibmf_i_validate_ci_guid_and_port() enter, hca_guid = %x, "
321 	    "port_num = %d\n", tnf_opaque, hca_guid, hca_guid,
322 	    tnf_uint, port_num, port_num);
323 
324 	/* check for incorrect port number specification */
325 	if (port_num == 0) {
326 		IBMF_TRACE_1(IBMF_TNF_NODEBUG, 1,
327 		    ibmf_i_validate_ci_guid_and_port_err, IBMF_TNF_ERROR, "",
328 		    "%s\n", tnf_string, msg, "port num is 0");
329 		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
330 		    ibmf_i_validate_ci_guid_and_port_end, IBMF_TNF_TRACE, "",
331 		    "ibmf_i_validate_ci_guid_and_port() exit\n");
332 		return (IBMF_BAD_PORT);
333 	}
334 
335 	/* call IB transport layer for HCA attributes */
336 	status = ibt_query_hca_byguid(hca_guid, &hca_attrs);
337 	if (status != IBT_SUCCESS) {
338 		IBMF_TRACE_2(IBMF_TNF_NODEBUG, DPRINT_L1,
339 		    ibmf_i_validate_ci_guid_and_port_err,
340 		    IBMF_TNF_ERROR, "", "%s, status = %d\n", tnf_string, msg,
341 		    "query_hca_guid failed", tnf_uint, status, status);
342 		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
343 		    ibmf_i_validate_ci_guid_and_port_end, IBMF_TNF_TRACE, "",
344 		    "ibmf_i_validate_ci_guid_and_port() exit\n");
345 		return (IBMF_BAD_NODE);
346 	}
347 
348 	/* check if the specified port number is within the HCAs range */
349 	if (port_num > hca_attrs.hca_nports) {
350 		IBMF_TRACE_3(IBMF_TNF_NODEBUG, 1,
351 		    ibmf_i_validate_ci_guid_and_port_err, IBMF_TNF_ERROR, "",
352 		    "%s, num = %d, hca_ports = %d\n",
353 		    tnf_string, msg, "port num > valid ports",
354 		    tnf_uint, num, port_num, tnf_uint, hca_nports,
355 		    hca_attrs.hca_nports);
356 		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
357 		    ibmf_i_validate_ci_guid_and_port_end, IBMF_TNF_TRACE, "",
358 		    "ibmf_i_validate_ci_guid_and_port() exit\n");
359 		return (IBMF_BAD_PORT);
360 	}
361 
362 	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
363 	    ibmf_i_validate_ci_guid_and_port_end, IBMF_TNF_TRACE, "",
364 	    "ibmf_i_validate_ci_guid_and_port() exit\n");
365 	return (IBMF_SUCCESS);
366 }
367 
368 /*
369  * ibmf_i_lookup_ci():
370  * 	Lookup the ci and return if found. If the CI is not found, returns
371  * 	NULL.
372  */
373 static ibmf_ci_t *
ibmf_i_lookup_ci(ib_guid_t ci_guid)374 ibmf_i_lookup_ci(ib_guid_t ci_guid)
375 {
376 	ibmf_ci_t	*cip = NULL;
377 
378 	ASSERT(MUTEX_NOT_HELD(&ibmf_statep->ibmf_mutex));
379 
380 	IBMF_TRACE_1(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_lookup_ci_start,
381 	    IBMF_TNF_TRACE, "", "ibmf_i_lookup_ci(): enter, guid = 0x%x\n",
382 	    tnf_uint64, guid, ci_guid);
383 
384 	/* walk the CI list looking for one that matches the provided GUID */
385 	mutex_enter(&ibmf_statep->ibmf_mutex);
386 	cip = ibmf_statep->ibmf_ci_list;
387 	while (cip != (ibmf_ci_t *)NULL) {
388 		if (ci_guid == cip->ci_node_guid) {
389 			/* found it in our list */
390 			break;
391 		}
392 		cip = cip->ci_next;
393 	}
394 	mutex_exit(&ibmf_statep->ibmf_mutex);
395 
396 	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_lookup_ci_end,
397 	    IBMF_TNF_TRACE, "", "ibmf_i_lookup_ci() exit\n");
398 
399 	return (cip);
400 }
401 
402 /*
403  * ibmf_i_get_ci():
404  *	Get the CI structure based on the HCA GUID from a list if it exists.
405  *	If the CI structure does not exist, and the HCA GUID is valid,
406  *	create a new CI structure and add it to the list.
407  */
408 int
ibmf_i_get_ci(ibmf_register_info_t * client_infop,ibmf_ci_t ** cipp)409 ibmf_i_get_ci(ibmf_register_info_t *client_infop, ibmf_ci_t **cipp)
410 {
411 	ibmf_ci_t 		*cip;
412 	ibt_status_t		status;
413 	boolean_t		invalid = B_FALSE;
414 	ibt_hca_attr_t		hca_attrs;
415 	ibmf_port_kstat_t	*ksp;
416 
417 	IBMF_TRACE_1(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_get_ci_start,
418 	    IBMF_TNF_TRACE, "", "ibmf_i_get_ci() enter, clinfop = %p\n",
419 	    tnf_opaque, client_infop, client_infop);
420 
421 	/* look for a CI context with a matching GUID */
422 	cip = ibmf_i_lookup_ci(client_infop->ir_ci_guid);
423 
424 	if (cip == NULL) {
425 
426 		/*
427 		 * attempt to create the ci. First, verify the ci exists.
428 		 * If it exists, allocate ci memory and insert in the ci list.
429 		 * It is possible that some other thread raced with us
430 		 * and inserted created ci while we are blocked in
431 		 * allocating memory. Check for that case and if that is indeed
432 		 * the case, free up what we allocated and try to get a
433 		 * reference count on the ci that the other thread added.
434 		 */
435 		status = ibt_query_hca_byguid(client_infop->ir_ci_guid,
436 		    &hca_attrs);
437 		if (status == IBT_SUCCESS) {
438 
439 			ibmf_ci_t *tcip;
440 			char buf[128];
441 
442 			/* allocate memory for the CI structure */
443 			cip = (ibmf_ci_t *)kmem_zalloc(sizeof (ibmf_ci_t),
444 			    KM_SLEEP);
445 
446 			_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*cip))
447 
448 			mutex_init(&cip->ci_mutex, NULL, MUTEX_DRIVER, NULL);
449 			mutex_init(&cip->ci_clients_mutex, NULL, MUTEX_DRIVER,
450 			    NULL);
451 			mutex_init(&cip->ci_wqe_mutex, NULL, MUTEX_DRIVER,
452 			    NULL);
453 			cv_init(&cip->ci_state_cv, NULL, CV_DRIVER, NULL);
454 			cv_init(&cip->ci_wqes_cv, NULL, CV_DRIVER, NULL);
455 
456 			(void) sprintf(buf, "r%08X",
457 			    (uint32_t)client_infop->ir_ci_guid);
458 			mutex_enter(&cip->ci_mutex);
459 
460 			cip->ci_state = IBMF_CI_STATE_PRESENT;
461 			cip->ci_node_guid = client_infop->ir_ci_guid;
462 
463 			/* set up per CI kstats */
464 			(void) sprintf(buf, "ibmf_%016" PRIx64 "_%d_stat",
465 			    client_infop->ir_ci_guid,
466 			    client_infop->ir_port_num);
467 			if ((cip->ci_port_kstatp = kstat_create("ibmf", 0, buf,
468 			    "misc", KSTAT_TYPE_NAMED,
469 			    sizeof (ibmf_port_kstat_t) / sizeof (kstat_named_t),
470 			    KSTAT_FLAG_WRITABLE)) == NULL) {
471 				mutex_exit(&cip->ci_mutex);
472 				mutex_destroy(&cip->ci_mutex);
473 				mutex_destroy(&cip->ci_clients_mutex);
474 				mutex_destroy(&cip->ci_wqe_mutex);
475 				cv_destroy(&cip->ci_state_cv);
476 				cv_destroy(&cip->ci_wqes_cv);
477 				kmem_free((void *)cip, sizeof (ibmf_ci_t));
478 				IBMF_TRACE_1(IBMF_TNF_NODEBUG, DPRINT_L1,
479 				    ibmf_i_get_ci_err, IBMF_TNF_ERROR, "",
480 				    "%s\n", tnf_string, msg,
481 				    "kstat create failed");
482 				IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
483 				    ibmf_i_get_ci_end, IBMF_TNF_TRACE, "",
484 				    "ibmf_i_get_ci() exit\n");
485 				return (IBMF_NO_RESOURCES);
486 			}
487 			ksp = (ibmf_port_kstat_t *)cip->ci_port_kstatp->ks_data;
488 			kstat_named_init(&ksp->clients_registered,
489 			    "clients_registered", KSTAT_DATA_UINT32);
490 			kstat_named_init(&ksp->client_regs_failed,
491 			    "client_registrations_failed", KSTAT_DATA_UINT32);
492 			kstat_named_init(&ksp->send_wqes_alloced,
493 			    "send_wqes_allocated", KSTAT_DATA_UINT32);
494 			kstat_named_init(&ksp->recv_wqes_alloced,
495 			    "receive_wqes_allocated", KSTAT_DATA_UINT32);
496 			kstat_named_init(&ksp->swqe_allocs_failed,
497 			    "send_wqe_allocs_failed", KSTAT_DATA_UINT32);
498 			kstat_named_init(&ksp->rwqe_allocs_failed,
499 			    "recv_wqe_allocs_failed", KSTAT_DATA_UINT32);
500 			kstat_install(cip->ci_port_kstatp);
501 
502 			mutex_exit(&cip->ci_mutex);
503 
504 			mutex_enter(&ibmf_statep->ibmf_mutex);
505 
506 			tcip = ibmf_statep->ibmf_ci_list;
507 			while (tcip != (ibmf_ci_t *)NULL) {
508 				if (client_infop->ir_ci_guid ==
509 				    tcip->ci_node_guid) {
510 					/* found it in our list */
511 					break;
512 				}
513 				tcip = tcip->ci_next;
514 			}
515 
516 			/* if the ci isn't on the list, add it */
517 			if (tcip == NULL) {
518 				cip->ci_next = NULL;
519 
520 				_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*cip))
521 
522 				if (ibmf_statep->ibmf_ci_list_tail != NULL)
523 					ibmf_statep->ibmf_ci_list_tail->
524 					    ci_next = cip;
525 				if (ibmf_statep->ibmf_ci_list == NULL)
526 					ibmf_statep->ibmf_ci_list = cip;
527 				ibmf_statep->ibmf_ci_list_tail = cip;
528 
529 				mutex_enter(&cip->ci_mutex);
530 				cip->ci_init_state |= IBMF_CI_INIT_HCA_LINKED;
531 				mutex_exit(&cip->ci_mutex);
532 
533 			} else {
534 				/* free cip and set it to the one on the list */
535 				kstat_delete(cip->ci_port_kstatp);
536 				mutex_destroy(&cip->ci_mutex);
537 				mutex_destroy(&cip->ci_clients_mutex);
538 				mutex_destroy(&cip->ci_wqe_mutex);
539 				cv_destroy(&cip->ci_state_cv);
540 				cv_destroy(&cip->ci_wqes_cv);
541 				kmem_free((void *)cip, sizeof (ibmf_ci_t));
542 
543 				_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*cip))
544 
545 				cip = tcip;
546 			}
547 			mutex_exit(&ibmf_statep->ibmf_mutex);
548 		} else {
549 			/* we didn't find it and the CI doesn't exist */
550 			IBMF_TRACE_1(IBMF_TNF_DEBUG, DPRINT_L1,
551 			    ibmf_i_get_ci_err, IBMF_TNF_ERROR, "", "%s\n",
552 			    tnf_string, msg, "GUID doesn't exist");
553 			IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
554 			    ibmf_i_get_ci_end, IBMF_TNF_TRACE, "",
555 			    "ibmf_i_get_ci() exit\n");
556 			return (IBMF_TRANSPORT_FAILURE);
557 		}
558 	}
559 
560 	ASSERT(cip != NULL);
561 
562 	/*
563 	 * We now have a CI context structure, either found it on the list,
564 	 * or created it.
565 	 * We now proceed to intialize the CI context.
566 	 */
567 	for (;;) {
568 		mutex_enter(&cip->ci_mutex);
569 
570 		/* CI is INITED & no state change in progress; we are all set */
571 		if (cip->ci_state == IBMF_CI_STATE_INITED && (cip->
572 		    ci_state_flags & (IBMF_CI_STATE_INVALIDATING |
573 		    IBMF_CI_STATE_UNINITING)) == 0) {
574 
575 			cip->ci_ref++;
576 			mutex_exit(&cip->ci_mutex);
577 
578 			break;
579 		}
580 
581 		/* CI is PRESENT; transition it to INITED */
582 		if (cip->ci_state == IBMF_CI_STATE_PRESENT && (cip->
583 		    ci_state_flags & (IBMF_CI_STATE_INVALIDATING |
584 		    IBMF_CI_STATE_INITING)) == 0) {
585 
586 			/* mark state as initing and init the ci */
587 			cip->ci_state_flags |= IBMF_CI_STATE_INITING;
588 			mutex_exit(&cip->ci_mutex);
589 
590 			_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*cip))
591 
592 			if (ibmf_i_init_ci(client_infop, cip) != IBMF_SUCCESS) {
593 				invalid = B_TRUE;
594 				break;
595 			}
596 
597 			_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*cip))
598 
599 			continue;
600 		}
601 
602 		/*
603 		 * If CI is GONE and no validation is in progress, we should
604 		 * return failure. Also, if CI is INITED but in the process of
605 		 * being made GONE (ie., a hot remove in progress), return
606 		 * failure.
607 		 */
608 		if ((cip->ci_state == IBMF_CI_STATE_GONE && (cip->
609 		    ci_state_flags & IBMF_CI_STATE_VALIDATING) == 0) ||
610 		    (cip->ci_state == IBMF_CI_STATE_INITED && (cip->
611 		    ci_state_flags & IBMF_CI_STATE_INVALIDATING) != 0)) {
612 
613 			IBMF_TRACE_2(IBMF_TNF_NODEBUG, DPRINT_L1,
614 			    ibmf_i_get_ci_err, IBMF_TNF_ERROR, "",
615 			    "ci_state = %x, ci_state_flags = %x\n",
616 			    tnf_opaque, cip->ci_state, cip->ci_state,
617 			    tnf_opaque, cip->ci_state_flags,
618 			    cip->ci_state_flags);
619 
620 			invalid = B_TRUE;
621 			mutex_exit(&cip->ci_mutex);
622 
623 			break;
624 		}
625 
626 		/* a state change in progress; block waiting for state change */
627 		if (cip->ci_state_flags & IBMF_CI_STATE_VALIDATING)
628 			cip->ci_state_flags |= IBMF_CI_STATE_VALIDATE_WAIT;
629 		else if (cip->ci_state_flags & IBMF_CI_STATE_INITING)
630 			cip->ci_state_flags |= IBMF_CI_STATE_INIT_WAIT;
631 		else if (cip->ci_state_flags & IBMF_CI_STATE_UNINITING)
632 			cip->ci_state_flags |= IBMF_CI_STATE_UNINIT_WAIT;
633 
634 		cv_wait(&cip->ci_state_cv, &cip->ci_mutex);
635 
636 		mutex_exit(&cip->ci_mutex);
637 	}
638 
639 	if (invalid == B_TRUE) {
640 		IBMF_TRACE_0(IBMF_TNF_NODEBUG, DPRINT_L2, ibmf_i_get_ci_err,
641 		    IBMF_TNF_ERROR, "", "ibmf_i_get_ci() error\n");
642 		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_get_ci_end,
643 		    IBMF_TNF_TRACE, "", "ibmf_i_get_ci() exit\n");
644 		return (IBMF_FAILURE);
645 	}
646 
647 	if (cip != NULL) {
648 		*cipp = cip;
649 		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_get_ci_end,
650 		    IBMF_TNF_TRACE, "", "ibmf_i_get_ci() exit\n");
651 		return (IBMF_SUCCESS);
652 	} else {
653 		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_get_ci_end,
654 		    IBMF_TNF_TRACE, "", "ibmf_i_get_ci() exit\n");
655 		return (IBMF_FAILURE);
656 	}
657 }
658 
659 /*
660  * ibmf_i_release_ci():
661  *	Drop the reference count for the CI.
662  */
663 void
ibmf_i_release_ci(ibmf_ci_t * cip)664 ibmf_i_release_ci(ibmf_ci_t *cip)
665 {
666 	uint_t ref;
667 
668 	IBMF_TRACE_1(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_release_ci_start,
669 	    IBMF_TNF_TRACE, "", "ibmf_i_release_ci() enter, cip = %p\n",
670 	    tnf_opaque, cip, cip);
671 
672 	ASSERT(MUTEX_NOT_HELD(&cip->ci_mutex));
673 
674 	mutex_enter(&cip->ci_mutex);
675 	ref = cip->ci_ref--;
676 	if (ref == 1) {
677 		ASSERT(cip->ci_state == IBMF_CI_STATE_INITED);
678 		cip->ci_state_flags |= IBMF_CI_STATE_UNINITING;
679 	}
680 	mutex_exit(&cip->ci_mutex);
681 
682 	if (ref == 1) {
683 		ibmf_i_uninit_ci(cip);
684 	}
685 
686 	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_release_ci_end,
687 	    IBMF_TNF_TRACE, "", "ibmf_i_release_ci() exit\n");
688 }
689 
690 /*
691  * ibmf_i_init_ci():
692  *	Initialize the CI structure by setting up the HCA, allocating
693  *	protection domains, completion queues, a pool of WQEs.
694  */
695 /* ARGSUSED */
696 static int
ibmf_i_init_ci(ibmf_register_info_t * client_infop,ibmf_ci_t * cip)697 ibmf_i_init_ci(ibmf_register_info_t *client_infop, ibmf_ci_t *cip)
698 {
699 	ibt_pd_hdl_t		pd;
700 	ibt_status_t		status;
701 	ib_guid_t		ci_guid;
702 	ibt_hca_attr_t		hca_attrs;
703 	ibt_hca_hdl_t		hca_handle;
704 	ibt_pd_flags_t		pd_flags = IBT_PD_NO_FLAGS;
705 	boolean_t		error = B_FALSE;
706 	int			ibmfstatus = IBMF_SUCCESS;
707 	char			errmsg[128];
708 
709 	_NOTE(ASSUMING_PROTECTED(*cip))
710 
711 	ASSERT(MUTEX_NOT_HELD(&ibmf_statep->ibmf_mutex));
712 	ASSERT(MUTEX_NOT_HELD(&cip->ci_mutex));
713 
714 	IBMF_TRACE_1(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_init_ci_start,
715 	    IBMF_TNF_TRACE, "", "ibmf_i_init_ci() enter, cip = %p\n",
716 	    tnf_opaque, ibmf_ci, cip);
717 
718 	mutex_enter(&cip->ci_mutex);
719 	ci_guid = cip->ci_node_guid;
720 	ASSERT(cip->ci_state == IBMF_CI_STATE_PRESENT);
721 	ASSERT((cip->ci_state_flags & IBMF_CI_STATE_INITING) != 0);
722 	mutex_exit(&cip->ci_mutex);
723 
724 	/* set up a connection to the HCA specified by the GUID */
725 	status = ibt_open_hca(ibmf_statep->ibmf_ibt_handle, ci_guid,
726 	    &hca_handle);
727 	ASSERT(status != IBT_HCA_IN_USE);
728 	if (status != IBT_SUCCESS) {
729 		ibmf_i_init_ci_done(cip);
730 		(void) sprintf(errmsg, "ibt open hca failed, status = 0x%x",
731 		    status);
732 		error = B_TRUE;
733 		ibmfstatus = IBMF_TRANSPORT_FAILURE;
734 		goto bail;
735 	}
736 
737 	/* get the HCA attributes */
738 	status = ibt_query_hca(hca_handle, &hca_attrs);
739 	if (status != IBT_SUCCESS) {
740 		(void) ibt_close_hca(hca_handle);
741 		ibmf_i_init_ci_done(cip);
742 		(void) sprintf(errmsg, "ibt query hca failed, status = 0x%x",
743 		    status);
744 		error = B_TRUE;
745 		ibmfstatus = IBMF_TRANSPORT_FAILURE;
746 		goto bail;
747 	}
748 
749 	/* allocate a Protection Domain */
750 	status = ibt_alloc_pd(hca_handle, pd_flags, &pd);
751 	if (status != IBT_SUCCESS) {
752 		(void) ibt_close_hca(hca_handle);
753 		ibmf_i_init_ci_done(cip);
754 		(void) sprintf(errmsg, "alloc PD failed, status = 0x%x",
755 		    status);
756 		error = B_TRUE;
757 		ibmfstatus = IBMF_TRANSPORT_FAILURE;
758 		goto bail;
759 	}
760 
761 	/* init the ci */
762 	mutex_enter(&cip->ci_mutex);
763 	cip->ci_nports = hca_attrs.hca_nports;
764 	cip->ci_vendor_id = hca_attrs.hca_vendor_id;
765 	cip->ci_device_id = hca_attrs.hca_device_id;
766 	cip->ci_ci_handle = hca_handle;
767 	cip->ci_pd = pd;
768 	cip->ci_init_state |= IBMF_CI_INIT_HCA_INITED;
769 	mutex_exit(&cip->ci_mutex);
770 
771 	/* initialize cqs */
772 	if (ibmf_i_init_cqs(cip) != IBMF_SUCCESS) {
773 		(void) ibt_free_pd(cip->ci_ci_handle, cip->ci_pd);
774 		mutex_enter(&cip->ci_mutex);
775 		cip->ci_init_state &= ~IBMF_CI_INIT_HCA_INITED;
776 		mutex_exit(&cip->ci_mutex);
777 		(void) ibt_close_hca(cip->ci_ci_handle);
778 		ibmf_i_init_ci_done(cip);
779 		(void) sprintf(errmsg, "init CQs failed");
780 		error = B_TRUE;
781 		ibmfstatus = IBMF_FAILURE;
782 		goto bail;
783 	}
784 
785 	/* initialize wqes */
786 	if (ibmf_i_init_wqes(cip) != IBMF_SUCCESS) {
787 		ibmf_i_fini_cqs(cip);
788 		(void) ibt_free_pd(cip->ci_ci_handle, cip->ci_pd);
789 		mutex_enter(&cip->ci_mutex);
790 		cip->ci_init_state &= ~IBMF_CI_INIT_HCA_INITED;
791 		mutex_exit(&cip->ci_mutex);
792 		(void) ibt_close_hca(cip->ci_ci_handle);
793 		ibmf_i_init_ci_done(cip);
794 		(void) sprintf(errmsg, "init WQEs failed");
795 		error = B_TRUE;
796 		ibmfstatus = IBMF_FAILURE;
797 		goto bail;
798 	}
799 
800 	/* initialize the UD destination structure pool */
801 	ibmf_i_init_ud_dest(cip);
802 
803 	/* initialize the QP list */
804 	ibmf_i_init_qplist(cip);
805 
806 	/* initialize condition variable, state, and enable CQ notification */
807 	cip->ci_init_state |= IBMF_CI_INIT_MUTEX_CV_INITED;
808 	(void) ibt_enable_cq_notify(cip->ci_cq_handle, IBT_NEXT_COMPLETION);
809 	(void) ibt_enable_cq_notify(cip->ci_alt_cq_handle, IBT_NEXT_COMPLETION);
810 
811 	/* set state to INITED */
812 	mutex_enter(&cip->ci_mutex);
813 	cip->ci_state = IBMF_CI_STATE_INITED;
814 	mutex_exit(&cip->ci_mutex);
815 
816 	/* wake up waiters blocked on an initialization done event */
817 	ibmf_i_init_ci_done(cip);
818 
819 bail:
820 	if (error) {
821 		IBMF_TRACE_2(IBMF_TNF_NODEBUG, DPRINT_L1, ibmf_i_init_ci_err,
822 		    IBMF_TNF_ERROR, "", "%s, status = %d\n", tnf_string, msg,
823 		    errmsg, tnf_uint, ibmfstatus, ibmfstatus);
824 	}
825 
826 	IBMF_TRACE_1(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_init_ci_end,
827 	    IBMF_TNF_TRACE, "", "ibmf_i_init_ci() exit, cip = %p\n",
828 	    tnf_opaque, ibmf_ci, cip);
829 
830 	return (ibmfstatus);
831 }
832 
833 /*
834  * ibmf_i_uninit_ci():
835  *	Free up the resources allocated when initializing the CI structure.
836  */
837 static void
ibmf_i_uninit_ci(ibmf_ci_t * cip)838 ibmf_i_uninit_ci(ibmf_ci_t *cip)
839 {
840 	IBMF_TRACE_1(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_uninit_ci_start,
841 	    IBMF_TNF_TRACE, "", "ibmf_i_uninit_ci() enter, cip = %p\n",
842 	    tnf_opaque, cip, cip);
843 
844 	ASSERT(MUTEX_HELD(&cip->ci_mutex) == 0);
845 
846 	/* clean up the QP list */
847 	ibmf_i_fini_qplist(cip);
848 
849 	/* empty completions directly */
850 	ibmf_i_mad_completions(cip->ci_cq_handle, (void*)cip);
851 	ibmf_i_mad_completions(cip->ci_alt_cq_handle, (void*)cip);
852 
853 	mutex_enter(&cip->ci_mutex);
854 	if (cip->ci_init_state & IBMF_CI_INIT_MUTEX_CV_INITED) {
855 		cip->ci_init_state &= ~IBMF_CI_INIT_MUTEX_CV_INITED;
856 	}
857 	mutex_exit(&cip->ci_mutex);
858 
859 	/* clean up the UD destination structure pool */
860 	ibmf_i_fini_ud_dest(cip);
861 
862 	/* clean up any WQE caches */
863 	ibmf_i_fini_wqes(cip);
864 
865 	/* free up the completion queues */
866 	ibmf_i_fini_cqs(cip);
867 
868 	/* free up the protection domain */
869 	(void) ibt_free_pd(cip->ci_ci_handle, cip->ci_pd);
870 
871 	/* close the HCA connection */
872 	(void) ibt_close_hca(cip->ci_ci_handle);
873 
874 	/* set state down to PRESENT */
875 	mutex_enter(&cip->ci_mutex);
876 	cip->ci_init_state &= ~IBMF_CI_INIT_HCA_INITED;
877 	cip->ci_state = IBMF_CI_STATE_PRESENT;
878 	mutex_exit(&cip->ci_mutex);
879 
880 	/* wake up waiters blocked on an un-initialization done event */
881 	ibmf_i_uninit_ci_done(cip);
882 
883 	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_uninit_ci_end,
884 	    IBMF_TNF_TRACE, "", "ibmf_i_uninit_ci() exit\n");
885 }
886 
887 /*
888  * ibmf_i_init_ci_done():
889  *	Mark CI initialization as "done", and wake up any waiters.
890  */
891 static void
ibmf_i_init_ci_done(ibmf_ci_t * cip)892 ibmf_i_init_ci_done(ibmf_ci_t *cip)
893 {
894 	IBMF_TRACE_1(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_init_ci_done_start,
895 	    IBMF_TNF_TRACE, "", "ibmf_i_init_ci_done() enter, cip = %p\n",
896 	    tnf_opaque, cip, cip);
897 
898 	mutex_enter(&cip->ci_mutex);
899 	cip->ci_state_flags &= ~IBMF_CI_STATE_INITING;
900 	if (cip->ci_state_flags & IBMF_CI_STATE_INIT_WAIT) {
901 		cip->ci_state_flags &= ~IBMF_CI_STATE_INIT_WAIT;
902 		cv_broadcast(&cip->ci_state_cv);
903 	}
904 	mutex_exit(&cip->ci_mutex);
905 
906 	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_init_ci_done_end,
907 	    IBMF_TNF_TRACE, "", "ibmf_i_init_ci_done() exit\n");
908 }
909 
910 /*
911  * ibmf_i_uninit_ci_done():
912  *	Mark CI uninitialization as "done", and wake up any waiters.
913  */
914 static void
ibmf_i_uninit_ci_done(ibmf_ci_t * cip)915 ibmf_i_uninit_ci_done(ibmf_ci_t *cip)
916 {
917 	IBMF_TRACE_1(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_uninit_ci_done_start,
918 	    IBMF_TNF_TRACE, "", "ibmf_i_uninit_ci_done() enter, cip = %p\n",
919 	    tnf_opaque, cip, cip);
920 
921 	mutex_enter(&cip->ci_mutex);
922 	cip->ci_state_flags &= ~IBMF_CI_STATE_UNINITING;
923 	if (cip->ci_state_flags & IBMF_CI_STATE_UNINIT_WAIT) {
924 		cip->ci_state_flags &= ~IBMF_CI_STATE_UNINIT_WAIT;
925 		cv_broadcast(&cip->ci_state_cv);
926 	}
927 	mutex_exit(&cip->ci_mutex);
928 
929 	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_uninit_ci_done_end,
930 	    IBMF_TNF_TRACE, "", "ibmf_i_uninit_ci_done() exit\n");
931 }
932 
933 /*
934  * ibmf_i_init_cqs():
935  *	Allocate a completion queue and set the CQ handler.
936  */
937 static int
ibmf_i_init_cqs(ibmf_ci_t * cip)938 ibmf_i_init_cqs(ibmf_ci_t *cip)
939 {
940 	ibt_status_t		status;
941 	ibt_cq_attr_t		cq_attrs;
942 	ibt_cq_hdl_t		cq_handle;
943 	uint32_t		num_entries;
944 
945 	ASSERT(MUTEX_NOT_HELD(&cip->ci_mutex));
946 
947 	IBMF_TRACE_1(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_init_cqs_start,
948 	    IBMF_TNF_TRACE, "", "ibmf_i_init_cqs() enter, cip = %p\n",
949 	    tnf_opaque, cip, cip);
950 
951 	/*
952 	 * Allocate completion queue handle.
953 	 * The CQ size should be a 2^n - 1 value to avoid excess CQ allocation
954 	 * as done by some HCAs when the CQ size is specified as a 2^n
955 	 * quantity.
956 	 */
957 	cq_attrs.cq_size = (cip->ci_nports * (ibmf_send_wqes_posted_per_qp +
958 	    ibmf_recv_wqes_posted_per_qp)) - 1;
959 
960 	cq_attrs.cq_sched = NULL;
961 	cq_attrs.cq_flags = 0;
962 
963 	/* Get the CQ handle for the special QPs */
964 	status = ibt_alloc_cq(cip->ci_ci_handle, &cq_attrs,
965 	    &cq_handle, &num_entries);
966 	if (status != IBT_SUCCESS) {
967 		IBMF_TRACE_2(IBMF_TNF_NODEBUG, DPRINT_L1, ibmf_i_init_cqs_err,
968 		    IBMF_TNF_ERROR, "", "%s, status = %d\n", tnf_string, msg,
969 		    "ibt_alloc_cq failed", tnf_uint, ibt_status, status);
970 		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_init_cqs_end,
971 		    IBMF_TNF_TRACE, "", "ibmf_i_init_cqs() exit\n");
972 		return (IBMF_TRANSPORT_FAILURE);
973 	}
974 	ibt_set_cq_handler(cq_handle, ibmf_statep->ibmf_cq_handler, cip);
975 	cip->ci_cq_handle = cq_handle;
976 
977 	/* Get the CQ handle for the alternate QPs */
978 	status = ibt_alloc_cq(cip->ci_ci_handle, &cq_attrs,
979 	    &cq_handle, &num_entries);
980 	if (status != IBT_SUCCESS) {
981 		(void) ibt_free_cq(cip->ci_cq_handle);
982 		IBMF_TRACE_2(IBMF_TNF_NODEBUG, DPRINT_L1, ibmf_i_init_cqs_err,
983 		    IBMF_TNF_ERROR, "", "%s, status = %d\n", tnf_string, msg,
984 		    "ibt_alloc_cq failed", tnf_uint, ibt_status, status);
985 		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_init_cqs_end,
986 		    IBMF_TNF_TRACE, "", "ibmf_i_init_cqs() exit\n");
987 		return (IBMF_TRANSPORT_FAILURE);
988 	}
989 	ibt_set_cq_handler(cq_handle, ibmf_statep->ibmf_cq_handler, cip);
990 	cip->ci_alt_cq_handle = cq_handle;
991 
992 	/* set state to CQ INITED */
993 	mutex_enter(&cip->ci_mutex);
994 	cip->ci_init_state |= IBMF_CI_INIT_CQ_INITED;
995 	mutex_exit(&cip->ci_mutex);
996 
997 	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_init_cqs_end,
998 	    IBMF_TNF_TRACE, "", "ibmf_i_init_cqs() exit\n");
999 
1000 	return (IBMF_SUCCESS);
1001 }
1002 
1003 /*
1004  * ibmf_i_fini_cqs():
1005  *	Free up the completion queue
1006  */
1007 static void
ibmf_i_fini_cqs(ibmf_ci_t * cip)1008 ibmf_i_fini_cqs(ibmf_ci_t *cip)
1009 {
1010 	ibt_status_t	status;
1011 	uint_t		ci_init_state;
1012 
1013 	IBMF_TRACE_1(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_fini_cqs_start,
1014 	    IBMF_TNF_TRACE, "", "ibmf_i_fini_cqs() enter, cip = %p\n",
1015 	    tnf_opaque, cip, cip);
1016 
1017 	mutex_enter(&cip->ci_mutex);
1018 	ci_init_state = cip->ci_init_state;
1019 	cip->ci_init_state &= ~IBMF_CI_INIT_CQ_INITED;
1020 	mutex_exit(&cip->ci_mutex);
1021 
1022 	if (ci_init_state & IBMF_CI_INIT_CQ_INITED) {
1023 		status = ibt_free_cq(cip->ci_alt_cq_handle);
1024 		if (status != IBT_SUCCESS) {
1025 			IBMF_TRACE_2(IBMF_TNF_NODEBUG, DPRINT_L3,
1026 			    ibmf_i_fini_cqs_err, IBMF_TNF_ERROR, "",
1027 			    "%s, status = %d\n", tnf_string, msg,
1028 			    "ibt free cqs failed", tnf_uint, status, status);
1029 		}
1030 
1031 		status = ibt_free_cq(cip->ci_cq_handle);
1032 		if (status != IBT_SUCCESS) {
1033 			IBMF_TRACE_2(IBMF_TNF_NODEBUG, DPRINT_L3,
1034 			    ibmf_i_fini_cqs_err, IBMF_TNF_ERROR, "",
1035 			    "%s, status = %d\n", tnf_string, msg,
1036 			    "ibt free cqs failed", tnf_uint, status, status);
1037 		}
1038 	}
1039 
1040 	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_fini_cqs_end,
1041 	    IBMF_TNF_TRACE, "", "ibmf_i_fini_cqs() exit");
1042 }
1043 
1044 /*
1045  * ibmf_i_init_qplist():
1046  *	Set the QP list inited state flag
1047  */
1048 static void
ibmf_i_init_qplist(ibmf_ci_t * ibmf_cip)1049 ibmf_i_init_qplist(ibmf_ci_t *ibmf_cip)
1050 {
1051 	IBMF_TRACE_1(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_init_qplist_start,
1052 	    IBMF_TNF_TRACE, "", "ibmf_i_init_qplist() enter, cip = %p\n",
1053 	    tnf_opaque, cip, ibmf_cip);
1054 
1055 	mutex_enter(&ibmf_cip->ci_mutex);
1056 	ASSERT((ibmf_cip->ci_init_state & IBMF_CI_INIT_QP_LIST_INITED) == 0);
1057 	ASSERT(ibmf_cip->ci_qp_list == NULL && ibmf_cip->ci_qp_list_tail ==
1058 	    NULL);
1059 	cv_init(&ibmf_cip->ci_qp_cv, NULL, CV_DRIVER, NULL);
1060 	ibmf_cip->ci_init_state |= IBMF_CI_INIT_QP_LIST_INITED;
1061 	mutex_exit(&ibmf_cip->ci_mutex);
1062 
1063 	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_init_qplist_end,
1064 	    IBMF_TNF_TRACE, "", "ibmf_i_init_qplist() exit\n");
1065 }
1066 
1067 /*
1068  * ibmf_i_fini_qplist():
1069  *	Clean up the QP list
1070  */
1071 static void
ibmf_i_fini_qplist(ibmf_ci_t * ibmf_cip)1072 ibmf_i_fini_qplist(ibmf_ci_t *ibmf_cip)
1073 {
1074 	ibmf_qp_t *qpp;
1075 	ibmf_alt_qp_t *altqpp;
1076 	ibt_status_t status;
1077 
1078 	IBMF_TRACE_1(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_fini_qplist_start,
1079 	    IBMF_TNF_TRACE, "", "ibmf_i_fini_qplist() enter, cip = %p\n",
1080 	    tnf_opaque, cip, ibmf_cip);
1081 
1082 	mutex_enter(&ibmf_cip->ci_mutex);
1083 
1084 	if ((ibmf_cip->ci_init_state & IBMF_CI_INIT_QP_LIST_INITED) != 0) {
1085 
1086 		/* walk through the qp list and free the memory */
1087 		qpp = ibmf_cip->ci_qp_list;
1088 		while (qpp != NULL) {
1089 			/* Remove qpp from the list */
1090 			ibmf_cip->ci_qp_list = qpp->iq_next;
1091 
1092 			ASSERT(qpp->iq_qp_ref == 0);
1093 			ASSERT(qpp->iq_flags == IBMF_QP_FLAGS_INVALID);
1094 			mutex_exit(&ibmf_cip->ci_mutex);
1095 			if (qpp->iq_qp_handle != NULL) {
1096 				/* Flush the special QP */
1097 				status = ibt_flush_qp(qpp->iq_qp_handle);
1098 				if (status != IBT_SUCCESS) {
1099 					IBMF_TRACE_2(IBMF_TNF_NODEBUG,
1100 					    DPRINT_L1, ibmf_i_fini_qplist_err,
1101 					    IBMF_TNF_ERROR, "",
1102 					    "%s, status = %d\n", tnf_string,
1103 					    msg, "ibt_flush_qp returned error",
1104 					    tnf_int, status, status);
1105 				}
1106 
1107 				/* Grab the ci_mutex mutex before waiting */
1108 				mutex_enter(&ibmf_cip->ci_mutex);
1109 
1110 				/* Wait if WQEs for special QPs are alloced */
1111 				while (ibmf_cip->ci_wqes_alloced != 0) {
1112 					cv_wait(&ibmf_cip->ci_wqes_cv,
1113 					    &ibmf_cip->ci_mutex);
1114 				}
1115 
1116 				mutex_exit(&ibmf_cip->ci_mutex);
1117 
1118 				/* Free the special QP */
1119 				status = ibt_free_qp(qpp->iq_qp_handle);
1120 				if (status != IBT_SUCCESS) {
1121 					IBMF_TRACE_2(IBMF_TNF_NODEBUG,
1122 					    DPRINT_L1, ibmf_i_fini_qplist_err,
1123 					    IBMF_TNF_ERROR, "",
1124 					    "%s, status = %d\n", tnf_string,
1125 					    msg, "ibt_free_qp returned error",
1126 					    tnf_int, status, status);
1127 				}
1128 			}
1129 			mutex_destroy(&qpp->iq_mutex);
1130 			kmem_free((void *)qpp, sizeof (ibmf_qp_t));
1131 
1132 			/* Grab the mutex again before accessing the QP list */
1133 			mutex_enter(&ibmf_cip->ci_mutex);
1134 			qpp = ibmf_cip->ci_qp_list;
1135 		}
1136 
1137 		cv_destroy(&ibmf_cip->ci_qp_cv);
1138 
1139 		ibmf_cip->ci_qp_list = ibmf_cip->ci_qp_list_tail = NULL;
1140 		ibmf_cip->ci_init_state &=  ~IBMF_CI_INIT_QP_LIST_INITED;
1141 
1142 		altqpp = ibmf_cip->ci_alt_qp_list;
1143 		while (altqpp != NULL) {
1144 			/* Remove altqpp from the list */
1145 			ibmf_cip->ci_alt_qp_list = altqpp->isq_next;
1146 			mutex_exit(&ibmf_cip->ci_mutex);
1147 
1148 			if (altqpp->isq_qp_handle != NULL) {
1149 				/* Flush the special QP */
1150 				status = ibt_flush_qp(altqpp->isq_qp_handle);
1151 				if (status != IBT_SUCCESS) {
1152 					IBMF_TRACE_2(IBMF_TNF_NODEBUG,
1153 					    DPRINT_L1, ibmf_i_fini_qplist_err,
1154 					    IBMF_TNF_ERROR, "",
1155 					    "%s, status = %d\n", tnf_string,
1156 					    msg, "ibt_flush_qp returned error",
1157 					    tnf_int, status, status);
1158 				}
1159 
1160 				/* Free the special QP */
1161 				status = ibt_free_qp(altqpp->isq_qp_handle);
1162 				if (status != IBT_SUCCESS) {
1163 					IBMF_TRACE_2(IBMF_TNF_NODEBUG,
1164 					    DPRINT_L1, ibmf_i_fini_qplist_err,
1165 					    IBMF_TNF_ERROR, "",
1166 					    "%s, status = %d\n", tnf_string,
1167 					    msg, "ibt_free_qp returned error",
1168 					    tnf_int, status, status);
1169 				}
1170 			}
1171 			mutex_destroy(&altqpp->isq_mutex);
1172 			kmem_free((void *)altqpp, sizeof (ibmf_alt_qp_t));
1173 
1174 			/* Grab the mutex again before accessing the QP list */
1175 			mutex_enter(&ibmf_cip->ci_mutex);
1176 			altqpp = ibmf_cip->ci_alt_qp_list;
1177 		}
1178 	}
1179 
1180 	mutex_exit(&ibmf_cip->ci_mutex);
1181 
1182 	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_fini_qplist_end,
1183 	    IBMF_TNF_TRACE, "", "ibmf_i_fini_qplist() exit\n");
1184 }
1185 
1186 /*
1187  * ibmf_i_alloc_client():
1188  *	Allocate and initialize the client structure.
1189  */
1190 int
ibmf_i_alloc_client(ibmf_register_info_t * client_infop,uint_t flags,ibmf_client_t ** clientpp)1191 ibmf_i_alloc_client(ibmf_register_info_t *client_infop, uint_t flags,
1192     ibmf_client_t **clientpp)
1193 {
1194 	ibmf_client_t		*ibmf_clientp;
1195 	char			buf[128];
1196 	ibmf_kstat_t		*ksp;
1197 
1198 	IBMF_TRACE_1(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_alloc_client_start,
1199 	    IBMF_TNF_TRACE, "", "ibmf_i_alloc_client() enter, "
1200 	    "client_infop = %p\n", tnf_opaque, client_infop, client_infop);
1201 
1202 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*ibmf_clientp))
1203 
1204 	/* allocate memory for ibmf_client and initialize it */
1205 	ibmf_clientp = kmem_zalloc(sizeof (ibmf_client_t), KM_SLEEP);
1206 	mutex_init(&ibmf_clientp->ic_mutex, NULL, MUTEX_DRIVER, NULL);
1207 	mutex_init(&ibmf_clientp->ic_msg_mutex, NULL, MUTEX_DRIVER, NULL);
1208 	mutex_init(&ibmf_clientp->ic_kstat_mutex, NULL, MUTEX_DRIVER, NULL);
1209 	cv_init(&ibmf_clientp->ic_recv_cb_teardown_cv, NULL, CV_DRIVER, NULL);
1210 
1211 	(void) sprintf(buf, "s%08X_0x%08X",
1212 	    (uint32_t)client_infop->ir_ci_guid, client_infop->ir_client_class);
1213 
1214 	/* create a taskq to handle send completions based on reg flags */
1215 	if ((flags & IBMF_REG_FLAG_NO_OFFLOAD) == 0) {
1216 		if (flags & IBMF_REG_FLAG_SINGLE_OFFLOAD)
1217 			ibmf_clientp->ic_send_taskq = taskq_create(buf,
1218 			    IBMF_TASKQ_1THREAD, MINCLSYSPRI, 1,
1219 			    ibmf_taskq_max_tasks, TASKQ_PREPOPULATE);
1220 		else
1221 			ibmf_clientp->ic_send_taskq = taskq_create(buf,
1222 			    IBMF_TASKQ_NTHREADS, MINCLSYSPRI, 1,
1223 			    ibmf_taskq_max_tasks,
1224 			    TASKQ_DYNAMIC | TASKQ_PREPOPULATE);
1225 		if (ibmf_clientp->ic_send_taskq == NULL) {
1226 			cv_destroy(&ibmf_clientp->ic_recv_cb_teardown_cv);
1227 			mutex_destroy(&ibmf_clientp->ic_mutex);
1228 			mutex_destroy(&ibmf_clientp->ic_msg_mutex);
1229 			mutex_destroy(&ibmf_clientp->ic_kstat_mutex);
1230 			kmem_free((void *)ibmf_clientp, sizeof (ibmf_client_t));
1231 			IBMF_TRACE_1(IBMF_TNF_NODEBUG, DPRINT_L1,
1232 			    ibmf_i_alloc_client_err, IBMF_TNF_ERROR, "", "%s\n",
1233 			    tnf_string, msg, buf);
1234 			IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
1235 			    ibmf_i_alloc_client_end, IBMF_TNF_TRACE, "",
1236 			    "ibmf_i_alloc_client() exit\n");
1237 			return (IBMF_NO_RESOURCES);
1238 		}
1239 	}
1240 	ibmf_clientp->ic_init_state_class |= IBMF_CI_INIT_SEND_TASKQ_DONE;
1241 
1242 	(void) sprintf(buf, "r%08X_0x%08X",
1243 	    (uint32_t)client_infop->ir_ci_guid, client_infop->ir_client_class);
1244 
1245 	/* create a taskq to handle receive completions on reg flags */
1246 	if ((flags & IBMF_REG_FLAG_NO_OFFLOAD) == 0) {
1247 		if (flags & IBMF_REG_FLAG_SINGLE_OFFLOAD)
1248 			ibmf_clientp->ic_recv_taskq = taskq_create(buf,
1249 			    IBMF_TASKQ_1THREAD, MINCLSYSPRI, 1,
1250 			    ibmf_taskq_max_tasks, TASKQ_PREPOPULATE);
1251 		else
1252 			ibmf_clientp->ic_recv_taskq = taskq_create(buf,
1253 			    IBMF_TASKQ_NTHREADS, MINCLSYSPRI, 1,
1254 			    ibmf_taskq_max_tasks,
1255 			    TASKQ_DYNAMIC | TASKQ_PREPOPULATE);
1256 		if (ibmf_clientp->ic_recv_taskq == NULL) {
1257 			cv_destroy(&ibmf_clientp->ic_recv_cb_teardown_cv);
1258 			mutex_destroy(&ibmf_clientp->ic_mutex);
1259 			mutex_destroy(&ibmf_clientp->ic_msg_mutex);
1260 			mutex_destroy(&ibmf_clientp->ic_kstat_mutex);
1261 			taskq_destroy(ibmf_clientp->ic_send_taskq);
1262 			kmem_free((void *)ibmf_clientp, sizeof (ibmf_client_t));
1263 			IBMF_TRACE_1(IBMF_TNF_NODEBUG, DPRINT_L1,
1264 			    ibmf_i_alloc_client_err, IBMF_TNF_ERROR, "", "%s\n",
1265 			    tnf_string, msg, buf);
1266 			IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
1267 			    ibmf_i_alloc_client_end, IBMF_TNF_TRACE, "",
1268 			    "ibmf_i_alloc_client() exit\n");
1269 			return (IBMF_NO_RESOURCES);
1270 		}
1271 	}
1272 	ibmf_clientp->ic_init_state_class |= IBMF_CI_INIT_RECV_TASKQ_DONE;
1273 	ibmf_clientp->ic_client_info.ci_guid = client_infop->ir_ci_guid;
1274 	ibmf_clientp->ic_client_info.port_num = client_infop->ir_port_num;
1275 
1276 	/* Get the base LID */
1277 	(void) ibt_get_port_state_byguid(ibmf_clientp->ic_client_info.ci_guid,
1278 	    ibmf_clientp->ic_client_info.port_num, NULL,
1279 	    &ibmf_clientp->ic_base_lid);
1280 
1281 	ibmf_clientp->ic_client_info.client_class =
1282 	    client_infop->ir_client_class;
1283 
1284 	/* set up the per client ibmf kstats */
1285 	(void) sprintf(buf, "ibmf_%016" PRIx64 "_%d_%X_stat",
1286 	    client_infop->ir_ci_guid, client_infop->ir_port_num,
1287 	    client_infop->ir_client_class);
1288 	if ((ibmf_clientp->ic_kstatp = kstat_create("ibmf", 0, buf, "misc",
1289 	    KSTAT_TYPE_NAMED, sizeof (ibmf_kstat_t) / sizeof (kstat_named_t),
1290 	    KSTAT_FLAG_WRITABLE)) == NULL) {
1291 		cv_destroy(&ibmf_clientp->ic_recv_cb_teardown_cv);
1292 		mutex_destroy(&ibmf_clientp->ic_mutex);
1293 		mutex_destroy(&ibmf_clientp->ic_msg_mutex);
1294 		mutex_destroy(&ibmf_clientp->ic_kstat_mutex);
1295 		if ((flags & IBMF_REG_FLAG_NO_OFFLOAD) == 0) {
1296 			taskq_destroy(ibmf_clientp->ic_send_taskq);
1297 			taskq_destroy(ibmf_clientp->ic_recv_taskq);
1298 		}
1299 		kmem_free((void *)ibmf_clientp, sizeof (ibmf_client_t));
1300 		IBMF_TRACE_1(IBMF_TNF_NODEBUG, DPRINT_L1,
1301 		    ibmf_i_alloc_client_err, IBMF_TNF_ERROR, "", "%s\n",
1302 		    tnf_string, msg, "kstat creation failed");
1303 		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
1304 		    ibmf_i_alloc_client_end, IBMF_TNF_TRACE, "",
1305 		    "ibmf_i_alloc_client() exit\n");
1306 		return (IBMF_NO_RESOURCES);
1307 	}
1308 	ksp = (ibmf_kstat_t *)ibmf_clientp->ic_kstatp->ks_data;
1309 	kstat_named_init(&ksp->msgs_alloced, "messages_allocated",
1310 	    KSTAT_DATA_UINT32);
1311 	kstat_named_init(&ksp->msgs_active, "messages_active",
1312 	    KSTAT_DATA_UINT32);
1313 	kstat_named_init(&ksp->msgs_sent, "messages_sent", KSTAT_DATA_UINT32);
1314 	kstat_named_init(&ksp->msgs_received, "messages_received",
1315 	    KSTAT_DATA_UINT32);
1316 	kstat_named_init(&ksp->sends_active, "sends_active", KSTAT_DATA_UINT32);
1317 	kstat_named_init(&ksp->recvs_active, "receives_active",
1318 	    KSTAT_DATA_UINT32);
1319 	kstat_named_init(&ksp->ud_dests_alloced, "ud_dests_allocated",
1320 	    KSTAT_DATA_UINT32);
1321 	kstat_named_init(&ksp->alt_qps_alloced, "alt_qps_allocated",
1322 	    KSTAT_DATA_UINT32);
1323 	kstat_named_init(&ksp->send_cb_active, "send_callbacks_active",
1324 	    KSTAT_DATA_UINT32);
1325 	kstat_named_init(&ksp->recv_cb_active, "receive_callbacks_active",
1326 	    KSTAT_DATA_UINT32);
1327 	kstat_named_init(&ksp->recv_bufs_alloced, "receive_bufs_allocated",
1328 	    KSTAT_DATA_UINT32);
1329 	kstat_named_init(&ksp->msg_allocs_failed, "msg_allocs_failed",
1330 	    KSTAT_DATA_UINT32);
1331 	kstat_named_init(&ksp->uddest_allocs_failed, "uddest_allocs_failed",
1332 	    KSTAT_DATA_UINT32);
1333 	kstat_named_init(&ksp->alt_qp_allocs_failed, "alt_qp_allocs_failed",
1334 	    KSTAT_DATA_UINT32);
1335 	kstat_named_init(&ksp->send_pkt_failed, "send_pkt_failed",
1336 	    KSTAT_DATA_UINT32);
1337 	kstat_named_init(&ksp->rmpp_errors, "rmpp_errors",
1338 	    KSTAT_DATA_UINT32);
1339 
1340 	kstat_install(ibmf_clientp->ic_kstatp);
1341 
1342 	*clientpp = ibmf_clientp;
1343 
1344 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*ibmf_clientp))
1345 
1346 	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_alloc_client_end,
1347 	    IBMF_TNF_TRACE, "", "ibmf_i_alloc_client() exit\n");
1348 
1349 	return (IBMF_SUCCESS);
1350 }
1351 
1352 /*
1353  * ibmf_i_free_client():
1354  *	Free up the client structure and release resources
1355  */
1356 void
ibmf_i_free_client(ibmf_client_t * clientp)1357 ibmf_i_free_client(ibmf_client_t *clientp)
1358 {
1359 	IBMF_TRACE_1(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_free_client_start,
1360 	    IBMF_TNF_TRACE, "", "ibmf_i_free_client() enter, clientp = %p\n",
1361 	    tnf_opaque, clientp, clientp);
1362 
1363 	/* delete the general ibmf kstats */
1364 	if (clientp->ic_kstatp != NULL) {
1365 		kstat_delete(clientp->ic_kstatp);
1366 		clientp->ic_kstatp = NULL;
1367 	}
1368 
1369 	/* release references and destroy the resources */
1370 	if (clientp->ic_init_state_class & IBMF_CI_INIT_SEND_TASKQ_DONE) {
1371 		if ((clientp->ic_reg_flags & IBMF_REG_FLAG_NO_OFFLOAD) == 0) {
1372 			taskq_destroy(clientp->ic_send_taskq);
1373 		}
1374 		clientp->ic_init_state_class &= ~IBMF_CI_INIT_SEND_TASKQ_DONE;
1375 	}
1376 
1377 	if (clientp->ic_init_state_class & IBMF_CI_INIT_RECV_TASKQ_DONE) {
1378 		if ((clientp->ic_reg_flags & IBMF_REG_FLAG_NO_OFFLOAD) == 0) {
1379 			taskq_destroy(clientp->ic_recv_taskq);
1380 		}
1381 		clientp->ic_init_state_class &= ~IBMF_CI_INIT_RECV_TASKQ_DONE;
1382 	}
1383 
1384 	mutex_destroy(&clientp->ic_mutex);
1385 	mutex_destroy(&clientp->ic_msg_mutex);
1386 	mutex_destroy(&clientp->ic_kstat_mutex);
1387 	cv_destroy(&clientp->ic_recv_cb_teardown_cv);
1388 	kmem_free((void *)clientp, sizeof (ibmf_client_t));
1389 
1390 	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_free_client_end,
1391 	    IBMF_TNF_TRACE, "", "ibmf_i_free_client() exit\n");
1392 }
1393 
1394 /*
1395  * ibmf_i_validate_classes_and_port():
1396  *	Validate the class type and get the client structure
1397  */
1398 int
ibmf_i_validate_classes_and_port(ibmf_ci_t * ibmf_cip,ibmf_register_info_t * client_infop)1399 ibmf_i_validate_classes_and_port(ibmf_ci_t *ibmf_cip,
1400     ibmf_register_info_t *client_infop)
1401 {
1402 	ibmf_client_t		*ibmf_clientp;
1403 	int			status;
1404 
1405 	IBMF_TRACE_2(IBMF_TNF_DEBUG, DPRINT_L4,
1406 	    ibmf_i_validate_classes_and_port_start, IBMF_TNF_TRACE, "",
1407 	    "ibmf_i_validate_classes_and_port() enter, cip = %p, "
1408 	    "clientp = %p\n", tnf_opaque, cip, ibmf_cip,
1409 	    tnf_opaque, client_infop, client_infop);
1410 
1411 	/*
1412 	 * the Solaris implementation of IBMF does not support
1413 	 * the UNIVERSAL_CLASS
1414 	 */
1415 	if (client_infop->ir_client_class == UNIVERSAL_CLASS) {
1416 		IBMF_TRACE_1(IBMF_TNF_NODEBUG, DPRINT_L1,
1417 		    ibmf_i_validate_classes_and_port_err, IBMF_TNF_ERROR, "",
1418 		    "%s\n", tnf_string, msg,
1419 		    "UNIVERSAL class is not supported");
1420 		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
1421 		    ibmf_i_validate_classes_and_port_end, IBMF_TNF_TRACE, "",
1422 		    "ibmf_i_validate_classes_and_port() exit\n");
1423 		return (IBMF_NOT_SUPPORTED);
1424 	}
1425 
1426 	/*
1427 	 * Check if the client context already exists on the list
1428 	 * maintained in the CI context. If it is, then the client class
1429 	 * has already been registered for.
1430 	 */
1431 	status = ibmf_i_lookup_client_by_info(ibmf_cip, client_infop,
1432 	    &ibmf_clientp);
1433 	if (status != IBMF_SUCCESS) {
1434 		/* client class has not been previously registered for */
1435 		status = IBMF_SUCCESS;
1436 	} else {
1437 		IBMF_TRACE_1(IBMF_TNF_NODEBUG, DPRINT_L1,
1438 		    ibmf_i_validate_classes_and_port_err, IBMF_TNF_ERROR, "",
1439 		    "client already registered, class = 0x%X\n",
1440 		    tnf_uint, class, client_infop->ir_client_class);
1441 		status = IBMF_PORT_IN_USE;
1442 	}
1443 
1444 	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
1445 	    ibmf_i_validate_classes_and_port_end, IBMF_TNF_TRACE, "",
1446 	    "ibmf_i_validate_classes_and_port() exit\n");
1447 	return (status);
1448 }
1449 
1450 /*
1451  * ibmf_i_lookup_client_by_info():
1452  *	Get the client structure from the list
1453  */
1454 static int
ibmf_i_lookup_client_by_info(ibmf_ci_t * ibmf_cip,ibmf_register_info_t * ir_client,ibmf_client_t ** clientpp)1455 ibmf_i_lookup_client_by_info(ibmf_ci_t *ibmf_cip,
1456     ibmf_register_info_t *ir_client, ibmf_client_t **clientpp)
1457 {
1458 	ibmf_client_t *clientp;
1459 
1460 	IBMF_TRACE_2(IBMF_TNF_DEBUG, DPRINT_L4,
1461 	    ibmf_i_lookup_client_by_info_start, IBMF_TNF_TRACE, "",
1462 	    "ibmf_i_lookup_client_by_info() enter, cip = %p, clientinfo = %p\n",
1463 	    tnf_opaque, cip, ibmf_cip, tnf_opaque, clientinfo, ir_client);
1464 
1465 	ASSERT(MUTEX_NOT_HELD(&ibmf_cip->ci_clients_mutex));
1466 
1467 	/*
1468 	 * walk the CI's client list searching for one with the specified class
1469 	 */
1470 	mutex_enter(&ibmf_cip->ci_clients_mutex);
1471 	clientp = ibmf_cip->ci_clients;
1472 	while (clientp != NULL) {
1473 		ibmf_client_info_t *tmp = &clientp->ic_client_info;
1474 		if (tmp->client_class == ir_client->ir_client_class &&
1475 		    ir_client->ir_client_class != UNIVERSAL_CLASS &&
1476 		    tmp->ci_guid == ir_client->ir_ci_guid &&
1477 		    tmp->port_num == ir_client->ir_port_num) {
1478 			/* found our match */
1479 			break;
1480 		}
1481 		clientp = clientp->ic_next;
1482 	}
1483 	mutex_exit(&ibmf_cip->ci_clients_mutex);
1484 
1485 	if (clientp != NULL) {
1486 		*clientpp = clientp;
1487 		IBMF_TRACE_1(IBMF_TNF_DEBUG, DPRINT_L4,
1488 		    ibmf_i_lookup_client_by_info_end, IBMF_TNF_TRACE, "",
1489 		    "ibmf_i_lookup_client_by_info(): clientp = %p\n",
1490 		    tnf_opaque, clientp, clientp);
1491 		return (IBMF_SUCCESS);
1492 	} else {
1493 		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
1494 		    ibmf_i_lookup_client_by_info_end, IBMF_TNF_TRACE, "",
1495 		    "ibmf_i_lookup_client_by_info() exit\n");
1496 		return (IBMF_FAILURE);
1497 	}
1498 }
1499 
1500 /*
1501  * ibmf_i_add_client():
1502  *	Add a new client to the client list
1503  */
1504 void
ibmf_i_add_client(ibmf_ci_t * ibmf_cip,ibmf_client_t * ibmf_clientp)1505 ibmf_i_add_client(ibmf_ci_t *ibmf_cip, ibmf_client_t *ibmf_clientp)
1506 {
1507 	IBMF_TRACE_2(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_add_start,
1508 	    IBMF_TNF_TRACE, "",
1509 	    "ibmf_i_add_client() enter, cip = %p, clientp = %p\n",
1510 	    tnf_opaque, ibmf_ci, ibmf_cip, tnf_opaque, client, ibmf_clientp);
1511 
1512 	ASSERT(MUTEX_NOT_HELD(&ibmf_cip->ci_clients_mutex));
1513 
1514 	mutex_enter(&ibmf_cip->ci_clients_mutex);
1515 	ibmf_clientp->ic_next = NULL;
1516 	ibmf_clientp->ic_prev = ibmf_cip->ci_clients_last;
1517 	if (ibmf_cip->ci_clients == NULL) {
1518 		ibmf_cip->ci_clients = ibmf_clientp;
1519 	}
1520 	if (ibmf_cip->ci_clients_last) {
1521 		ibmf_cip->ci_clients_last->ic_next = ibmf_clientp;
1522 	}
1523 	ibmf_cip->ci_clients_last = ibmf_clientp;
1524 	mutex_exit(&ibmf_cip->ci_clients_mutex);
1525 
1526 	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_add_end,
1527 	    IBMF_TNF_TRACE, "", "ibmf_i_add_client() exit\n");
1528 }
1529 
1530 /*
1531  * ibmf_i_delete_client():
1532  *	Delete a client from the client list
1533  */
1534 void
ibmf_i_delete_client(ibmf_ci_t * ibmf_cip,ibmf_client_t * ibmf_clientp)1535 ibmf_i_delete_client(ibmf_ci_t *ibmf_cip, ibmf_client_t *ibmf_clientp)
1536 {
1537 	IBMF_TRACE_2(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_delete_client_start,
1538 	    IBMF_TNF_TRACE, "", "ibmf_i_delete_client() enter, "
1539 	    "ibmf_i_delete_client() enter, cip = %p, clientp = %p\n",
1540 	    tnf_opaque, ibmf_ci, ibmf_cip, tnf_opaque, client, ibmf_clientp);
1541 
1542 	ASSERT(MUTEX_NOT_HELD(&ibmf_cip->ci_clients_mutex));
1543 
1544 	mutex_enter(&ibmf_cip->ci_clients_mutex);
1545 	if (ibmf_clientp->ic_next)
1546 		ibmf_clientp->ic_next->ic_prev = ibmf_clientp->ic_prev;
1547 
1548 	if (ibmf_clientp->ic_prev)
1549 		ibmf_clientp->ic_prev->ic_next = ibmf_clientp->ic_next;
1550 
1551 	if (ibmf_cip->ci_clients == ibmf_clientp) {
1552 		ibmf_cip->ci_clients = ibmf_clientp->ic_next;
1553 	}
1554 	if (ibmf_cip->ci_clients_last == ibmf_clientp) {
1555 		ibmf_cip->ci_clients_last = ibmf_clientp->ic_prev;
1556 	}
1557 	mutex_exit(&ibmf_cip->ci_clients_mutex);
1558 
1559 	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_delete_client_end,
1560 	    IBMF_TNF_TRACE, "", "ibmf_i_delete_client() exit\n");
1561 }
1562 
1563 /*
1564  * ibmf_i_get_qp():
1565  *	Get the QP structure based on the client class
1566  */
1567 int
ibmf_i_get_qp(ibmf_ci_t * ibmf_cip,uint_t port_num,ibmf_client_type_t class,ibmf_qp_t ** qppp)1568 ibmf_i_get_qp(ibmf_ci_t *ibmf_cip, uint_t port_num, ibmf_client_type_t class,
1569     ibmf_qp_t **qppp)
1570 {
1571 	ibmf_qp_t		*qpp;
1572 	int			qp_num, status = IBMF_SUCCESS;
1573 
1574 	IBMF_TRACE_3(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_get_qp_start,
1575 	    IBMF_TNF_TRACE, "", "ibmf_i_get_qp() enter, cip = %p, "
1576 	    "port = %d, class = %x\n", tnf_opaque, ibmf_ci, ibmf_cip,
1577 	    tnf_int, port, port_num, tnf_opaque, class, class);
1578 
1579 	ASSERT(MUTEX_NOT_HELD(&ibmf_cip->ci_mutex));
1580 
1581 	mutex_enter(&ibmf_cip->ci_mutex);
1582 
1583 	/*
1584 	 * walk through the list of qps on this ci, looking for one that
1585 	 * corresponds to the type and class the caller is interested in.
1586 	 * If it is not there, we need allocate it from the transport. Since
1587 	 * qp0 & qp1 can only be allocated once, we maintain a reference count
1588 	 * and call the transport for allocation iff the ref count is 0.
1589 	 */
1590 	qp_num = (class == SUBN_AGENT || class == SUBN_MANAGER) ? 0 : 1;
1591 
1592 	qpp = ibmf_cip->ci_qp_list;
1593 	while (qpp != NULL) {
1594 		if (port_num == qpp->iq_port_num && qp_num == qpp->iq_qp_num)
1595 			break;
1596 		qpp = qpp->iq_next;
1597 	}
1598 
1599 	if (qpp == NULL) {
1600 		/*
1601 		 * allocate qp and add it the qp list; recheck to
1602 		 * catch races
1603 		 */
1604 		ibmf_qp_t *tqpp;
1605 
1606 		mutex_exit(&ibmf_cip->ci_mutex);
1607 
1608 		tqpp = (ibmf_qp_t *)kmem_zalloc(sizeof (ibmf_qp_t), KM_SLEEP);
1609 
1610 		/* check the list under lock */
1611 		mutex_enter(&ibmf_cip->ci_mutex);
1612 
1613 		qpp = ibmf_cip->ci_qp_list;
1614 		while (qpp != NULL) {
1615 			if (port_num == qpp->iq_port_num && qp_num ==
1616 			    qpp->iq_qp_num)
1617 				break;
1618 			qpp = qpp->iq_next;
1619 		}
1620 
1621 		if (qpp != NULL) {
1622 			/* some one raced past us and added to the list */
1623 			kmem_free((void *)tqpp, sizeof (ibmf_qp_t));
1624 		} else {
1625 			/* add this to the qp list */
1626 			qpp = tqpp;
1627 			_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*qpp))
1628 			qpp->iq_next = NULL;
1629 			if (ibmf_cip->ci_qp_list == NULL)
1630 				ibmf_cip->ci_qp_list = qpp;
1631 			if (ibmf_cip->ci_qp_list_tail != NULL)
1632 				ibmf_cip->ci_qp_list_tail->iq_next = qpp;
1633 			ibmf_cip->ci_qp_list_tail = qpp;
1634 			qpp->iq_port_num = port_num;
1635 			qpp->iq_qp_num = qp_num;
1636 			qpp->iq_flags = IBMF_QP_FLAGS_INVALID;
1637 			mutex_init(&qpp->iq_mutex, NULL, MUTEX_DRIVER, NULL);
1638 		}
1639 	}
1640 
1641 	/* we now have a QP context */
1642 	for (;;) {
1643 		if (qpp->iq_flags == IBMF_QP_FLAGS_INITING) {
1644 
1645 			/* block till qp is in VALID state */
1646 			cv_wait(&ibmf_cip->ci_qp_cv, &ibmf_cip->ci_mutex);
1647 			continue;
1648 
1649 		}
1650 
1651 		if (qpp->iq_flags == IBMF_QP_FLAGS_UNINITING) {
1652 
1653 			/* block till qp is in INVALID state */
1654 			cv_wait(&ibmf_cip->ci_qp_cv, &ibmf_cip->ci_mutex);
1655 			continue;
1656 		}
1657 
1658 		if (qpp->iq_flags == IBMF_QP_FLAGS_INVALID) {
1659 			if ((status = ibmf_i_init_qp(ibmf_cip, qpp)) !=
1660 			    IBMF_SUCCESS) {
1661 				ibmf_qp_t *tqpp;
1662 
1663 				/*
1664 				 * Remove the QP context from the CI's list.
1665 				 * Only initialized QPs should be on the list.
1666 				 * We know that this QP is on the list, so
1667 				 * the list is not empty.
1668 				 */
1669 				tqpp = ibmf_cip->ci_qp_list;
1670 				if (tqpp == qpp) {
1671 					/* Only QP context on the list */
1672 					ibmf_cip->ci_qp_list = NULL;
1673 					ibmf_cip->ci_qp_list_tail = NULL;
1674 				}
1675 
1676 				/* Find the QP context before the last one */
1677 				if (tqpp != qpp) {
1678 					while (tqpp->iq_next != qpp) {
1679 						tqpp = tqpp->iq_next;
1680 					}
1681 
1682 					/*
1683 					 * We are at the second last element of
1684 					 * the list. Readjust the tail pointer.
1685 					 * Remove the last element from the
1686 					 * list.
1687 					 */
1688 					tqpp->iq_next = NULL;
1689 					ibmf_cip->ci_qp_list_tail = tqpp;
1690 				}
1691 
1692 				/* Free up the QP context */
1693 				kmem_free((void *)qpp, sizeof (ibmf_qp_t));
1694 
1695 				break;
1696 			}
1697 			continue;
1698 		}
1699 
1700 		if (qpp->iq_flags == IBMF_QP_FLAGS_INITED) {
1701 			qpp->iq_qp_ref++;
1702 			break;
1703 		}
1704 	}
1705 
1706 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*qpp))
1707 
1708 	mutex_exit(&ibmf_cip->ci_mutex);
1709 
1710 	if (status == IBMF_SUCCESS) {
1711 		*qppp = qpp;
1712 		IBMF_TRACE_1(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_get_qp_end,
1713 		    IBMF_TNF_TRACE, "", "ibmf_i_get_qp() exit "
1714 		    "qp_handle = %p\n", tnf_opaque, qp_handle, qpp);
1715 		return (IBMF_SUCCESS);
1716 	} else {
1717 		IBMF_TRACE_1(IBMF_TNF_NODEBUG, DPRINT_L1, ibmf_i_get_qp_err,
1718 		    IBMF_TNF_ERROR, "", "%s\n", tnf_string, msg,
1719 		    "ibmf_i_get_qp(): qp_not found");
1720 		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_get_qp_end,
1721 		    IBMF_TNF_TRACE, "", "ibmf_i_get_qp() exit\n");
1722 		return (status);
1723 	}
1724 }
1725 
1726 /*
1727  * ibmf_i_release_qp():
1728  *	Drop the reference count on the QP structure
1729  */
1730 void
ibmf_i_release_qp(ibmf_ci_t * ibmf_cip,ibmf_qp_t ** qppp)1731 ibmf_i_release_qp(ibmf_ci_t *ibmf_cip, ibmf_qp_t **qppp)
1732 {
1733 	ibmf_qp_t	*qpp;
1734 
1735 	IBMF_TRACE_2(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_release_qp_start,
1736 	    IBMF_TNF_TRACE, "", "ibmf_i_release_qp() enter, cip = %p, "
1737 	    "qpp = %p\n", tnf_opaque, cip, ibmf_cip, tnf_opaque, qpp, *qppp);
1738 
1739 	ASSERT(MUTEX_NOT_HELD(&ibmf_cip->ci_mutex));
1740 
1741 	mutex_enter(&ibmf_cip->ci_mutex);
1742 	qpp = *qppp;
1743 	qpp->iq_qp_ref--;
1744 	if (qpp->iq_qp_ref == 0)
1745 		ibmf_i_uninit_qp(ibmf_cip, qpp);
1746 	mutex_exit(&ibmf_cip->ci_mutex);
1747 
1748 	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_release_qp_end,
1749 	    IBMF_TNF_TRACE, "", "ibmf_i_release_qp() exit\n");
1750 }
1751 
1752 /*
1753  * ibmf_i_init_qp():
1754  *	Set up the QP context, request a QP from the IBT framework
1755  *	and initialize it
1756  */
1757 static int
ibmf_i_init_qp(ibmf_ci_t * ibmf_cip,ibmf_qp_t * qpp)1758 ibmf_i_init_qp(ibmf_ci_t *ibmf_cip, ibmf_qp_t *qpp)
1759 {
1760 	ibt_sqp_type_t		qp_type;
1761 	ibt_qp_alloc_attr_t	qp_attrs;
1762 	ibt_qp_hdl_t		qp_handle;
1763 	ibt_qp_info_t		qp_modify_attr;
1764 	ibt_status_t		ibt_status;
1765 	int			i, status;
1766 
1767 	IBMF_TRACE_3(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_init_qp_start,
1768 	    IBMF_TNF_TRACE, "", "ibmf_i_init_qp() enter, cip = %p, "
1769 	    "port = %d, qp = %d\n", tnf_opaque, ibmf_ci, ibmf_cip, tnf_int,
1770 	    port, qpp->iq_port_num, tnf_int, num, qpp->iq_qp_num);
1771 
1772 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(qpp->iq_qp_handle))
1773 
1774 	ASSERT(MUTEX_HELD(&ibmf_cip->ci_mutex));
1775 
1776 	qpp->iq_flags = IBMF_QP_FLAGS_INITING;
1777 	mutex_exit(&ibmf_cip->ci_mutex);
1778 	if (qpp->iq_qp_handle) {	/* closed but not yet freed */
1779 		ibt_status = ibt_free_qp(qpp->iq_qp_handle);
1780 		if (ibt_status != IBT_SUCCESS) {
1781 			IBMF_TRACE_2(IBMF_TNF_NODEBUG, DPRINT_L1,
1782 			    ibmf_i_init_qp_err, IBMF_TNF_ERROR, "",
1783 			    "%s, status = %d\n", tnf_string, msg,
1784 			    "ibt_free_qp returned error",
1785 			    tnf_uint, ibt_status, ibt_status);
1786 		}
1787 		qpp->iq_qp_handle = NULL;
1788 	}
1789 	ASSERT(qpp->iq_qp_num == 0 || qpp->iq_qp_num == 1);
1790 	if (qpp->iq_qp_num == 0)
1791 		qp_type = IBT_SMI_SQP;
1792 	else
1793 		qp_type = IBT_GSI_SQP;
1794 	qp_attrs.qp_scq_hdl = ibmf_cip->ci_cq_handle;
1795 	qp_attrs.qp_rcq_hdl = ibmf_cip->ci_cq_handle;
1796 	qp_attrs.qp_pd_hdl = ibmf_cip->ci_pd;
1797 	qp_attrs.qp_sizes.cs_sq_sgl = 1;
1798 	qp_attrs.qp_sizes.cs_rq_sgl = IBMF_MAX_RQ_WR_SGL_ELEMENTS;
1799 	qp_attrs.qp_sizes.cs_sq = ibmf_send_wqes_posted_per_qp;
1800 	qp_attrs.qp_sizes.cs_rq = ibmf_recv_wqes_posted_per_qp;
1801 	qp_attrs.qp_flags = IBT_ALL_SIGNALED;
1802 	qp_attrs.qp_alloc_flags = IBT_QP_NO_FLAGS;
1803 
1804 	/* call the IB transport to allocate a special QP */
1805 	ibt_status = ibt_alloc_special_qp(ibmf_cip->ci_ci_handle,
1806 	    qpp->iq_port_num, qp_type, &qp_attrs, NULL, &qp_handle);
1807 	if (ibt_status != IBT_SUCCESS) {
1808 		mutex_enter(&ibmf_cip->ci_mutex);
1809 		qpp->iq_flags = IBMF_QP_FLAGS_INVALID;
1810 		cv_broadcast(&ibmf_cip->ci_qp_cv);
1811 		IBMF_TRACE_1(IBMF_TNF_NODEBUG, DPRINT_L1, ibmf_i_init_qp_err,
1812 		    IBMF_TNF_ERROR, "", "ibmf_i_init_qp() error status = %d\n",
1813 		    tnf_uint, ibt_status, ibt_status);
1814 		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_init_qp_end,
1815 		    IBMF_TNF_TRACE, "", "ibmf_i_init_qp() exit\n");
1816 		return (IBMF_TRANSPORT_FAILURE);
1817 	}
1818 
1819 	/* initialize qpp */
1820 	qpp->iq_qp_handle = qp_handle;
1821 	qp_modify_attr.qp_trans = IBT_UD_SRV;
1822 	qp_modify_attr.qp_flags = IBT_CEP_NO_FLAGS;
1823 
1824 	/* get the pkey index for the specified pkey */
1825 	if (ibmf_i_get_pkeyix(ibmf_cip->ci_ci_handle, IBMF_P_KEY_DEF_LIMITED,
1826 	    qpp->iq_port_num, &qp_modify_attr.qp_transport.ud.ud_pkey_ix) !=
1827 	    IBMF_SUCCESS) {
1828 		ibt_status = ibt_free_qp(qpp->iq_qp_handle);
1829 		if (ibt_status != IBT_SUCCESS) {
1830 			IBMF_TRACE_2(IBMF_TNF_NODEBUG, DPRINT_L1,
1831 			    ibmf_i_init_qp_err, IBMF_TNF_ERROR, "",
1832 			    "%s, status = %d\n", tnf_string, msg,
1833 			    "ibt_free_qp returned error",
1834 			    tnf_uint, ibt_status, ibt_status);
1835 		}
1836 		mutex_enter(&ibmf_cip->ci_mutex);
1837 		qpp->iq_flags = IBMF_QP_FLAGS_INVALID;
1838 		cv_broadcast(&ibmf_cip->ci_qp_cv);
1839 		IBMF_TRACE_0(IBMF_TNF_NODEBUG, DPRINT_L1, ibmf_i_init_qp_err,
1840 		    IBMF_TNF_ERROR, "", "ibmf_init_qp(): failed to get "
1841 		    "pkey index\n");
1842 		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_init_qp_end,
1843 		    IBMF_TNF_TRACE, "", "ibmf_i_init_qp() exit\n");
1844 		return (IBMF_FAILURE);
1845 	}
1846 	qp_modify_attr.qp_transport.ud.ud_sq_psn = 0;
1847 	qp_modify_attr.qp_transport.ud.ud_port = qpp->iq_port_num;
1848 	qp_modify_attr.qp_transport.ud.ud_qkey = IBMF_MGMT_Q_KEY;
1849 
1850 	/* call the IB transport to initialize the QP */
1851 	ibt_status = ibt_initialize_qp(qp_handle, &qp_modify_attr);
1852 	if (ibt_status != IBT_SUCCESS) {
1853 		ibt_status = ibt_free_qp(qpp->iq_qp_handle);
1854 		if (ibt_status != IBT_SUCCESS) {
1855 			IBMF_TRACE_2(IBMF_TNF_NODEBUG, DPRINT_L1,
1856 			    ibmf_i_init_qp_err, IBMF_TNF_ERROR, "",
1857 			    "%s, status = %d\n", tnf_string, msg,
1858 			    "ibt_free_qp returned error",
1859 			    tnf_uint, ibt_status, ibt_status);
1860 		}
1861 		mutex_enter(&ibmf_cip->ci_mutex);
1862 		qpp->iq_flags = IBMF_QP_FLAGS_INVALID;
1863 		cv_broadcast(&ibmf_cip->ci_qp_cv);
1864 		IBMF_TRACE_1(IBMF_TNF_NODEBUG, DPRINT_L1, ibmf_i_init_qp_err,
1865 		    IBMF_TNF_ERROR, "", "ibmf_init_qp(): error status = %d\n",
1866 		    tnf_uint, ibt_status, ibt_status);
1867 		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_init_qp_end,
1868 		    IBMF_TNF_TRACE, "", "ibmf_i_init_qp() exit\n");
1869 		return (IBMF_TRANSPORT_FAILURE);
1870 	}
1871 
1872 	/* post receive wqes to the RQ to handle unsolicited inbound packets  */
1873 	for (i = 0; i < ibmf_recv_wqes_per_port; i++) {
1874 		status =  ibmf_i_post_recv_buffer(ibmf_cip, qpp,
1875 		    B_TRUE, IBMF_QP_HANDLE_DEFAULT);
1876 		if (status != IBMF_SUCCESS) {
1877 			IBMF_TRACE_1(IBMF_TNF_DEBUG, DPRINT_L1,
1878 			    ibmf_i_init_qp, IBMF_TNF_TRACE, "",
1879 			    "%s\n", tnf_string, msg, "ibmf_i_init_qp(): "
1880 			    "ibmf_i_post_recv_buffer() failed");
1881 		}
1882 	}
1883 	mutex_enter(&ibmf_cip->ci_mutex);
1884 
1885 	/* set the state and signal blockers */
1886 	qpp->iq_flags = IBMF_QP_FLAGS_INITED;
1887 	cv_broadcast(&ibmf_cip->ci_qp_cv);
1888 
1889 	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_init_qp_end,
1890 	    IBMF_TNF_TRACE, "", "ibmf_i_init_qp() exit\n");
1891 	return (IBMF_SUCCESS);
1892 }
1893 
1894 /*
1895  * ibmf_i_uninit_qp():
1896  *	Invalidate the QP context
1897  */
1898 static void
ibmf_i_uninit_qp(ibmf_ci_t * ibmf_cip,ibmf_qp_t * qpp)1899 ibmf_i_uninit_qp(ibmf_ci_t *ibmf_cip, ibmf_qp_t *qpp)
1900 {
1901 	ibt_status_t		status;
1902 
1903 	IBMF_TRACE_2(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_uninit_qp_start,
1904 	    IBMF_TNF_TRACE, "", "ibmf_i_uninit_qp() enter, cip = %p "
1905 	    "qpp = %p\n", tnf_opaque, cip, ibmf_cip, tnf_opaque, qpp, qpp);
1906 
1907 	ASSERT(MUTEX_HELD(&ibmf_cip->ci_mutex));
1908 
1909 	/* mark the state as uniniting */
1910 	ASSERT(qpp->iq_qp_ref == 0);
1911 	qpp->iq_flags = IBMF_QP_FLAGS_UNINITING;
1912 	mutex_exit(&ibmf_cip->ci_mutex);
1913 
1914 	/* note: we ignore error values from ibt_flush_qp */
1915 	status = ibt_flush_qp(qpp->iq_qp_handle);
1916 	if (status != IBT_SUCCESS) {
1917 		IBMF_TRACE_2(IBMF_TNF_NODEBUG, DPRINT_L2,
1918 		    ibmf_i_uninit_qp_err, IBMF_TNF_ERROR, "",
1919 		    "ibmf_i_uninit_qp(): %s, status = %d\n", tnf_string, msg,
1920 		    "ibt_flush_qp returned error", tnf_int, status, status);
1921 	}
1922 
1923 	/* mark state as INVALID and signal any blockers */
1924 	mutex_enter(&ibmf_cip->ci_mutex);
1925 	qpp->iq_flags = IBMF_QP_FLAGS_INVALID;
1926 	cv_broadcast(&ibmf_cip->ci_qp_cv);
1927 
1928 	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_uninit_qp_end,
1929 	    IBMF_TNF_TRACE, "", "ibmf_i_uninit_qp() exit\n");
1930 }
1931 
1932 /*
1933  * ibmf_i_alloc_msg():
1934  *	Allocate and set up a message context
1935  */
1936 int
ibmf_i_alloc_msg(ibmf_client_t * clientp,ibmf_msg_impl_t ** msgp,int km_flags)1937 ibmf_i_alloc_msg(ibmf_client_t *clientp, ibmf_msg_impl_t **msgp, int km_flags)
1938 {
1939 	ibmf_msg_impl_t *msgimplp;
1940 
1941 	IBMF_TRACE_3(IBMF_TNF_DEBUG, DPRINT_L4,
1942 	    ibmf_i_alloc_msg_start, IBMF_TNF_TRACE, "",
1943 	    "ibmf_i_alloc_msg() enter, clientp = %p, msg = %p, "
1944 	    " kmflags = %d\n", tnf_opaque, clientp, clientp, tnf_opaque, msg,
1945 	    *msgp, tnf_int, km_flags, km_flags);
1946 
1947 	/* allocate the message context */
1948 	msgimplp = (ibmf_msg_impl_t *)kmem_zalloc(sizeof (ibmf_msg_impl_t),
1949 	    km_flags);
1950 	if (msgimplp != NULL) {
1951 		if (km_flags == KM_SLEEP) {
1952 			ibmf_i_pop_ud_dest_thread(clientp->ic_myci);
1953 		}
1954 	} else {
1955 		IBMF_TRACE_1(IBMF_TNF_NODEBUG, DPRINT_L1,
1956 		    ibmf_i_alloc_msg_err, IBMF_TNF_ERROR, "",
1957 		    "ibmf_i_alloc_msg(): %s\n",
1958 		    tnf_string, msg, "kmem_xalloc failed");
1959 		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_alloc_msg_end,
1960 		    IBMF_TNF_TRACE, "", "ibmf_i_alloc_msg() exit\n");
1961 		return (IBMF_NO_RESOURCES);
1962 	}
1963 
1964 	*msgp = msgimplp;
1965 	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_alloc_msg_end,
1966 	    IBMF_TNF_TRACE, "", "ibmf_i_alloc_msg() exit\n");
1967 	return (IBMF_SUCCESS);
1968 }
1969 
1970 /*
1971  * ibmf_i_free_msg():
1972  *	frees up all buffers allocated by IBMF for
1973  * 	this message context, and then frees up the context
1974  */
1975 void
ibmf_i_free_msg(ibmf_msg_impl_t * msgimplp)1976 ibmf_i_free_msg(ibmf_msg_impl_t *msgimplp)
1977 {
1978 	ibmf_msg_bufs_t *msgbufp = &msgimplp->im_msgbufs_recv;
1979 	ibmf_client_t *clientp = (ibmf_client_t *)msgimplp->im_client;
1980 	uint32_t	cl_hdr_sz, cl_hdr_off;
1981 
1982 	IBMF_TRACE_1(IBMF_TNF_DEBUG, DPRINT_L4,
1983 	    ibmf_i_free_msg_start, IBMF_TNF_TRACE, "",
1984 	    "ibmf_i_free_msg() enter, msg = %p\n", tnf_opaque, msg, msgimplp);
1985 
1986 	/* free up the UD destination resource */
1987 	if (msgimplp->im_ibmf_ud_dest != NULL) {
1988 		ibmf_i_free_ud_dest(clientp, msgimplp);
1989 		ibmf_i_clean_ud_dest_list(clientp->ic_myci, B_FALSE);
1990 	}
1991 
1992 	/* free up the receive buffer if allocated previously */
1993 	if (msgbufp->im_bufs_mad_hdr != NULL) {
1994 		ibmf_i_mgt_class_to_hdr_sz_off(
1995 		    msgbufp->im_bufs_mad_hdr->MgmtClass,
1996 		    &cl_hdr_sz, &cl_hdr_off);
1997 		kmem_free(msgbufp->im_bufs_mad_hdr, sizeof (ib_mad_hdr_t) +
1998 		    cl_hdr_off + msgbufp->im_bufs_cl_hdr_len +
1999 		    msgbufp->im_bufs_cl_data_len);
2000 		mutex_enter(&clientp->ic_kstat_mutex);
2001 		IBMF_SUB32_KSTATS(clientp, recv_bufs_alloced, 1);
2002 		mutex_exit(&clientp->ic_kstat_mutex);
2003 	}
2004 
2005 	/* destroy the message mutex */
2006 	mutex_destroy(&msgimplp->im_mutex);
2007 
2008 	/* free the message context */
2009 	kmem_free(msgimplp, sizeof (ibmf_msg_impl_t));
2010 
2011 	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_free_msg_end,
2012 	    IBMF_TNF_TRACE, "", "ibmf_i_free_msg() exit\n");
2013 }
2014 
2015 /*
2016  * ibmf_i_msg_transport():
2017  *	Send a message posted by the IBMF client using the RMPP protocol
2018  *	if specified
2019  */
2020 int
ibmf_i_msg_transport(ibmf_client_t * clientp,ibmf_qp_handle_t ibmf_qp_handle,ibmf_msg_impl_t * msgimplp,int blocking)2021 ibmf_i_msg_transport(ibmf_client_t *clientp, ibmf_qp_handle_t ibmf_qp_handle,
2022     ibmf_msg_impl_t *msgimplp, int blocking)
2023 {
2024 	ib_mad_hdr_t	*madhdrp;
2025 	ibmf_msg_bufs_t *msgbufp, *smsgbufp;
2026 	uint32_t	cl_hdr_sz, cl_hdr_off;
2027 	boolean_t	isDS = 0; /* double sided (sequenced) transaction */
2028 	boolean_t	error = B_FALSE;
2029 	int		status = IBMF_SUCCESS;
2030 	uint_t		refcnt;
2031 	char		errmsg[128];
2032 	timeout_id_t	msg_rp_unset_id, msg_tr_unset_id;
2033 
2034 	IBMF_TRACE_4(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_msg_transport_start,
2035 	    IBMF_TNF_TRACE, "", "ibmf_i_msg_transport(): clientp = 0x%p, "
2036 	    "qphdl = 0x%p, msgp = 0x%p, block = %d\n",
2037 	    tnf_opaque, clientp, clientp, tnf_opaque, qphdl, ibmf_qp_handle,
2038 	    tnf_opaque, msg, msgimplp, tnf_uint, block, blocking);
2039 
2040 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*msgimplp, *msgbufp))
2041 
2042 	mutex_enter(&msgimplp->im_mutex);
2043 
2044 	madhdrp = msgimplp->im_msgbufs_send.im_bufs_mad_hdr;
2045 	msgbufp = &msgimplp->im_msgbufs_recv;
2046 	smsgbufp = &msgimplp->im_msgbufs_send;
2047 
2048 	/*
2049 	 * check if transp_op_flags specify that the transaction is
2050 	 * a single packet, then the size of the message header + data
2051 	 * does not exceed 256 bytes
2052 	 */
2053 	if ((msgimplp->im_transp_op_flags & IBMF_MSG_TRANS_FLAG_RMPP) == 0) {
2054 		ibmf_i_mgt_class_to_hdr_sz_off(
2055 		    smsgbufp->im_bufs_mad_hdr->MgmtClass,
2056 		    &cl_hdr_sz, &cl_hdr_off);
2057 
2058 		if ((sizeof (ib_mad_hdr_t) + cl_hdr_off +
2059 		    smsgbufp->im_bufs_cl_hdr_len +
2060 		    smsgbufp->im_bufs_cl_data_len) > IBMF_MAD_SIZE) {
2061 			mutex_exit(&msgimplp->im_mutex);
2062 			(void) sprintf(errmsg,
2063 			    "Non-RMPP message size is too large");
2064 			error = B_TRUE;
2065 			status = IBMF_BAD_SIZE;
2066 			goto bail;
2067 		}
2068 	}
2069 
2070 	/* more message context initialization */
2071 	msgimplp->im_qp_hdl 	= ibmf_qp_handle;
2072 	msgimplp->im_tid	= b2h64(madhdrp->TransactionID);
2073 	msgimplp->im_mgt_class 	= madhdrp->MgmtClass;
2074 	msgimplp->im_unsolicited = B_FALSE;
2075 	msgimplp->im_trans_state_flags = IBMF_TRANS_STATE_FLAG_UNINIT;
2076 	bzero(&msgimplp->im_rmpp_ctx, sizeof (ibmf_rmpp_ctx_t));
2077 	msgimplp->im_rmpp_ctx.rmpp_state = IBMF_RMPP_STATE_UNDEFINED;
2078 	msgimplp->im_rmpp_ctx.rmpp_respt = IBMF_RMPP_DEFAULT_RRESPT;
2079 	msgimplp->im_rmpp_ctx.rmpp_retry_cnt = 0;
2080 	msgimplp->im_ref_count = 0;
2081 	msgimplp->im_pending_send_compls = 0;
2082 	IBMF_MSG_INCR_REFCNT(msgimplp);
2083 	if (msgimplp->im_retrans.retrans_retries == 0)
2084 		msgimplp->im_retrans.retrans_retries = IBMF_RETRANS_DEF_RETRIES;
2085 	if (msgimplp->im_retrans.retrans_rtv == 0)
2086 		msgimplp->im_retrans.retrans_rtv = IBMF_RETRANS_DEF_RTV;
2087 	if (msgimplp->im_retrans.retrans_rttv == 0)
2088 		msgimplp->im_retrans.retrans_rttv = IBMF_RETRANS_DEF_RTTV;
2089 
2090 	IBMF_TRACE_5(IBMF_TNF_DEBUG, DPRINT_L3, ibmf_i_msg_transport,
2091 	    IBMF_TNF_TRACE, "", "ibmf_i_msg_transport(): %s, msgp = 0x%p, "
2092 	    "class = 0x%x, method = 0x%x, attributeID = 0x%x\n",
2093 	    tnf_string, msg, "Added message", tnf_opaque, msgimplp,
2094 	    msgimplp, tnf_opaque, class, msgimplp->im_mgt_class, tnf_opaque,
2095 	    method, madhdrp->R_Method, tnf_opaque, attrib_id,
2096 	    b2h16(madhdrp->AttributeID));
2097 
2098 	IBMF_TRACE_3(IBMF_TNF_DEBUG, DPRINT_L3, ibmf_i_msg_transport,
2099 	    IBMF_TNF_TRACE, "", "ibmf_i_msg_transport(): msgp = 0x%p, "
2100 	    "TID = 0x%p, transp_op_flags = 0x%x\n",
2101 	    tnf_opaque, msgimplp, msgimplp, tnf_opaque, tid, msgimplp->im_tid,
2102 	    tnf_uint, transp_op_flags, msgimplp->im_transp_op_flags);
2103 
2104 	/*
2105 	 * Do not allow reuse of a message where the receive buffers are
2106 	 * being used as send buffers if this is a sequenced transaction
2107 	 */
2108 	if ((madhdrp == msgbufp->im_bufs_mad_hdr) &&
2109 	    (msgimplp->im_transp_op_flags & IBMF_MSG_TRANS_FLAG_SEQ)) {
2110 		IBMF_MSG_DECR_REFCNT(msgimplp);
2111 		mutex_exit(&msgimplp->im_mutex);
2112 		(void) sprintf(errmsg,
2113 		    "Send and Recv buffers are the same for sequenced"
2114 		    " transaction");
2115 		error = B_TRUE;
2116 		status = IBMF_REQ_INVALID;
2117 		goto bail;
2118 	}
2119 
2120 	/* set transaction flags */
2121 	if (msgimplp->im_transp_op_flags & IBMF_MSG_TRANS_FLAG_SEQ)
2122 		msgimplp->im_flags |= IBMF_MSG_FLAGS_SEQUENCED;
2123 
2124 	if (msgimplp->im_transp_op_flags & IBMF_MSG_TRANS_FLAG_RMPP)
2125 		msgimplp->im_flags |= IBMF_MSG_FLAGS_SEND_RMPP;
2126 	else
2127 		msgimplp->im_flags |= IBMF_MSG_FLAGS_NOT_RMPP;
2128 
2129 	/* free recv buffers if this is a reused message */
2130 	if ((msgbufp->im_bufs_mad_hdr != NULL) &&
2131 	    (msgimplp->im_transp_op_flags & IBMF_MSG_TRANS_FLAG_SEQ)) {
2132 
2133 		IBMF_TRACE_3(IBMF_TNF_DEBUG, DPRINT_L3, ibmf_i_msg_transport,
2134 		    IBMF_TNF_TRACE, "", "ibmf_i_msg_transport(): %s, "
2135 		    "msgp = 0x%p, mad_hdrp = 0x%p\n", tnf_string, msg,
2136 		    "Freeing recv buffer for reused message",
2137 		    tnf_opaque, msgimplp, msgimplp,
2138 		    tnf_opaque, mad_hdr, msgbufp->im_bufs_mad_hdr);
2139 
2140 		ibmf_i_mgt_class_to_hdr_sz_off(
2141 		    msgbufp->im_bufs_mad_hdr->MgmtClass,
2142 		    &cl_hdr_sz, &cl_hdr_off);
2143 
2144 		kmem_free(msgbufp->im_bufs_mad_hdr, sizeof (ib_mad_hdr_t) +
2145 		    cl_hdr_off + msgbufp->im_bufs_cl_hdr_len +
2146 		    msgbufp->im_bufs_cl_data_len);
2147 
2148 		msgbufp->im_bufs_mad_hdr = NULL;
2149 		msgbufp->im_bufs_cl_hdr = NULL;
2150 		msgbufp->im_bufs_cl_hdr_len = 0;
2151 		msgbufp->im_bufs_cl_data = NULL;
2152 		msgbufp->im_bufs_cl_data_len = 0;
2153 	}
2154 
2155 	mutex_exit(&msgimplp->im_mutex);
2156 
2157 	/* initialize (and possibly allocate) the address handle */
2158 	status = ibmf_i_alloc_ud_dest(clientp, msgimplp,
2159 	    &msgimplp->im_ud_dest, blocking);
2160 	if (status != IBMF_SUCCESS) {
2161 		(void) sprintf(errmsg, "ibmf_i_alloc_ud_dest() failed");
2162 		error = B_TRUE;
2163 		goto bail;
2164 	}
2165 
2166 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*msgimplp, *msgbufp))
2167 
2168 	/* add the message to the client context's message list */
2169 	ibmf_i_client_add_msg(clientp, msgimplp);
2170 
2171 	mutex_enter(&msgimplp->im_mutex);
2172 
2173 	/* no one should have touched our state */
2174 	ASSERT(msgimplp->im_trans_state_flags == IBMF_TRANS_STATE_FLAG_UNINIT);
2175 
2176 	/* transition out of uninit state */
2177 	msgimplp->im_trans_state_flags = IBMF_TRANS_STATE_FLAG_INIT;
2178 
2179 	IBMF_TRACE_5(IBMF_TNF_DEBUG, DPRINT_L3, ibmf_i_msg_transport,
2180 	    IBMF_TNF_TRACE, "", "ibmf_i_msg_transport(): msgp = 0x%p, "
2181 	    "local_lid = 0x%x, remote_lid = 0x%x, remote_qpn = 0x%x, "
2182 	    "block = %d\n", tnf_opaque, msgp, msgimplp,
2183 	    tnf_uint, local_lid, msgimplp->im_local_addr.ia_local_lid,
2184 	    tnf_uint, remote_lid, msgimplp->im_local_addr.ia_remote_lid,
2185 	    tnf_uint, remote_qpn, msgimplp->im_local_addr.ia_remote_qno,
2186 	    tnf_uint, blocking, blocking);
2187 
2188 	IBMF_TRACE_2(IBMF_TNF_DEBUG, DPRINT_L3, ibmf_i_msg_transport,
2189 	    IBMF_TNF_TRACE, "", "ibmf_i_msg_transport(): "
2190 	    "unsetting timer %p %d\n", tnf_opaque, msgimplp, msgimplp,
2191 	    tnf_opaque, timeout_id, msgimplp->im_rp_timeout_id);
2192 
2193 	ASSERT(msgimplp->im_rp_timeout_id == 0);
2194 	ASSERT(msgimplp->im_tr_timeout_id == 0);
2195 
2196 	if ((msgimplp->im_transp_op_flags & IBMF_MSG_TRANS_FLAG_RMPP) == 0) {
2197 
2198 		/* Non-RMPP transaction */
2199 
2200 		status = ibmf_i_send_single_pkt(clientp, ibmf_qp_handle,
2201 		    msgimplp, blocking);
2202 		if (status != IBMF_SUCCESS) {
2203 			IBMF_MSG_DECR_REFCNT(msgimplp);
2204 			mutex_exit(&msgimplp->im_mutex);
2205 			ibmf_i_client_rem_msg(clientp, msgimplp, &refcnt);
2206 			(void) sprintf(errmsg, "Single packet send failed");
2207 			error = B_TRUE;
2208 			goto bail;
2209 		}
2210 
2211 	} else if (msgimplp->im_transp_op_flags & IBMF_MSG_TRANS_FLAG_RMPP) {
2212 
2213 		/* RMPP transaction */
2214 
2215 		/* check if client supports RMPP traffic */
2216 		if ((clientp->ic_reg_flags & IBMF_REG_FLAG_RMPP) == 0) {
2217 			IBMF_MSG_DECR_REFCNT(msgimplp);
2218 			mutex_exit(&msgimplp->im_mutex);
2219 			ibmf_i_client_rem_msg(clientp, msgimplp, &refcnt);
2220 			(void) sprintf(errmsg, "Class does not support RMPP");
2221 			error = B_TRUE;
2222 			status = IBMF_BAD_RMPP_OPT;
2223 			goto bail;
2224 		}
2225 
2226 		/* for non-special QPs, check if QP supports RMPP traffic */
2227 		if (ibmf_qp_handle != IBMF_QP_HANDLE_DEFAULT &&
2228 		    (((ibmf_alt_qp_t *)ibmf_qp_handle)->isq_supports_rmpp ==
2229 		    B_FALSE)) {
2230 			IBMF_MSG_DECR_REFCNT(msgimplp);
2231 			mutex_exit(&msgimplp->im_mutex);
2232 			ibmf_i_client_rem_msg(clientp, msgimplp, &refcnt);
2233 			(void) sprintf(errmsg, "QP does not support RMPP");
2234 			error = B_TRUE;
2235 			status = IBMF_BAD_RMPP_OPT;
2236 			goto bail;
2237 		}
2238 
2239 		/* check if transaction is "double sided" (send and receive) */
2240 		if (msgimplp->im_transp_op_flags & IBMF_MSG_TRANS_FLAG_SEQ)
2241 			isDS = 1;
2242 
2243 		status = ibmf_i_send_rmpp_pkts(clientp, ibmf_qp_handle,
2244 		    msgimplp, isDS, blocking);
2245 		if (status != IBMF_SUCCESS) {
2246 			IBMF_MSG_DECR_REFCNT(msgimplp);
2247 			mutex_exit(&msgimplp->im_mutex);
2248 			ibmf_i_client_rem_msg(clientp, msgimplp, &refcnt);
2249 			(void) sprintf(errmsg, "RMPP packets send failed");
2250 			error = B_TRUE;
2251 			goto bail;
2252 		}
2253 	}
2254 
2255 	/*
2256 	 * decrement the reference count so notify_client() can remove the
2257 	 * message when it's ready
2258 	 */
2259 	IBMF_MSG_DECR_REFCNT(msgimplp);
2260 
2261 	/* check if the transaction is a blocking transaction */
2262 	if (blocking && ((msgimplp->im_trans_state_flags &
2263 	    IBMF_TRANS_STATE_FLAG_SIGNALED) == 0)) {
2264 
2265 		/* indicate that the tranaction is waiting */
2266 		msgimplp->im_trans_state_flags |= IBMF_TRANS_STATE_FLAG_WAIT;
2267 
2268 		IBMF_TRACE_2(IBMF_TNF_DEBUG, DPRINT_L3, ibmf_i_msg_transport,
2269 		    IBMF_TNF_TRACE, "",
2270 		    "ibmf_i_msg_transport(): %s, msgp = 0x%p\n",
2271 		    tnf_string, msg, "blocking for completion",
2272 		    tnf_opaque, msgimplp, msgimplp);
2273 
2274 		/* wait for transaction completion */
2275 		cv_wait(&msgimplp->im_trans_cv, &msgimplp->im_mutex);
2276 
2277 		IBMF_TRACE_2(IBMF_TNF_DEBUG, DPRINT_L3, ibmf_i_msg_transport,
2278 		    IBMF_TNF_TRACE, "",
2279 		    "ibmf_i_msg_transport(): %s, msgp = 0x%p\n",
2280 		    tnf_string, msg, "unblocking for completion",
2281 		    tnf_opaque, msgimplp, msgimplp);
2282 
2283 		/* clean up flags */
2284 		msgimplp->im_trans_state_flags &= ~IBMF_TRANS_STATE_FLAG_WAIT;
2285 		msgimplp->im_flags &= ~IBMF_MSG_FLAGS_BUSY;
2286 
2287 		if (msgimplp->im_msg_status != IBMF_SUCCESS) {
2288 
2289 			IBMF_TRACE_1(IBMF_TNF_NODEBUG, DPRINT_L1,
2290 			    ibmf_i_msg_transport_err, IBMF_TNF_ERROR, "",
2291 			    "ibmf_i_msg_transport(): msg_status = %d\n",
2292 			    tnf_uint, msgstatus, msgimplp->im_msg_status);
2293 
2294 			status = msgimplp->im_msg_status;
2295 		}
2296 	} else if (blocking && (msgimplp->im_trans_state_flags &
2297 	    IBMF_TRANS_STATE_FLAG_SIGNALED)) {
2298 		msgimplp->im_flags &= ~IBMF_MSG_FLAGS_BUSY;
2299 
2300 		if (msgimplp->im_msg_status != IBMF_SUCCESS) {
2301 			IBMF_TRACE_1(IBMF_TNF_NODEBUG, DPRINT_L1,
2302 			    ibmf_i_msg_transport_err, IBMF_TNF_ERROR, "",
2303 			    "ibmf_i_msg_transport(): msg_status = %d\n",
2304 			    tnf_uint, msgstatus, msgimplp->im_msg_status);
2305 			status = msgimplp->im_msg_status;
2306 		}
2307 	}
2308 
2309 	msg_rp_unset_id = msg_tr_unset_id = 0;
2310 	msg_rp_unset_id = msgimplp->im_rp_unset_timeout_id;
2311 	msg_tr_unset_id = msgimplp->im_tr_unset_timeout_id;
2312 	msgimplp->im_rp_unset_timeout_id = 0;
2313 	msgimplp->im_tr_unset_timeout_id = 0;
2314 
2315 	mutex_exit(&msgimplp->im_mutex);
2316 
2317 	/* Unset the timers */
2318 	if (msg_rp_unset_id != 0) {
2319 		(void) untimeout(msg_rp_unset_id);
2320 	}
2321 
2322 	if (msg_tr_unset_id != 0) {
2323 		(void) untimeout(msg_tr_unset_id);
2324 	}
2325 
2326 	/* increment kstats of the number of sent messages */
2327 	mutex_enter(&clientp->ic_kstat_mutex);
2328 	IBMF_ADD32_KSTATS(clientp, msgs_sent, 1);
2329 	mutex_exit(&clientp->ic_kstat_mutex);
2330 
2331 bail:
2332 	if (error) {
2333 		IBMF_TRACE_2(IBMF_TNF_NODEBUG, DPRINT_L1,
2334 		    ibmf_i_msg_transport_err, IBMF_TNF_ERROR, "",
2335 		    "ibmf_i_msg_transport(): %s, msgp = 0x%p\n",
2336 		    tnf_string, msg, errmsg, tnf_opaque, msgimplp, msgimplp);
2337 	}
2338 
2339 	IBMF_TRACE_1(IBMF_TNF_DEBUG, DPRINT_L4,	ibmf_i_msg_transport_end,
2340 	    IBMF_TNF_TRACE, "", "ibmf_i_msg_transport() exit, status = %d\n",
2341 	    tnf_uint, status, status);
2342 
2343 	return (status);
2344 }
2345 
2346 /*
2347  * ibmf_i_init_msg():
2348  *	Initialize the message fields
2349  */
2350 void
ibmf_i_init_msg(ibmf_msg_impl_t * msgimplp,ibmf_msg_cb_t trans_cb,void * trans_cb_arg,ibmf_retrans_t * retrans,boolean_t block)2351 ibmf_i_init_msg(ibmf_msg_impl_t *msgimplp, ibmf_msg_cb_t trans_cb,
2352     void *trans_cb_arg, ibmf_retrans_t *retrans, boolean_t block)
2353 {
2354 	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_init_msg_start,
2355 	    IBMF_TNF_TRACE, "", "ibmf_i_init_msg() enter\n");
2356 
2357 	_NOTE(ASSUMING_PROTECTED(msgimplp->im_trans_cb,
2358 	    msgimplp->im_trans_cb_arg))
2359 
2360 	if (block == B_TRUE)
2361 		msgimplp->im_msg_flags |= IBMF_MSG_FLAGS_BLOCKING;
2362 	msgimplp->im_trans_cb = trans_cb;
2363 	msgimplp->im_trans_cb_arg = trans_cb_arg;
2364 
2365 	bzero(&msgimplp->im_retrans, sizeof (ibmf_retrans_t));
2366 	if (retrans != NULL) {
2367 		bcopy((void *)retrans, (void *)&msgimplp->im_retrans,
2368 		    sizeof (ibmf_retrans_t));
2369 	}
2370 
2371 	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_init_msg_end,
2372 	    IBMF_TNF_TRACE, "", "ibmf_i_init_msg() exit\n");
2373 }
2374 
2375 /*
2376  * ibmf_i_alloc_qp():
2377  *	Allocate a QP context for the alternate QPs
2378  */
2379 int
ibmf_i_alloc_qp(ibmf_client_t * clientp,ib_pkey_t p_key,ib_qkey_t q_key,uint_t flags,ibmf_qp_handle_t * ibmf_qp_handlep)2380 ibmf_i_alloc_qp(ibmf_client_t *clientp, ib_pkey_t p_key, ib_qkey_t q_key,
2381     uint_t flags, ibmf_qp_handle_t *ibmf_qp_handlep)
2382 {
2383 	ibmf_ci_t		*ibmf_cip = clientp->ic_myci;
2384 	ibt_qp_alloc_attr_t	qp_attrs;
2385 	ibt_qp_info_t		qp_modify_attr;
2386 	ibmf_alt_qp_t		*qp_ctx;
2387 	uint16_t		pkey_ix;
2388 	ibt_status_t		ibt_status;
2389 	int			i, blocking;
2390 	boolean_t		error = B_FALSE;
2391 	int			status = IBMF_SUCCESS;
2392 	char			errmsg[128];
2393 
2394 
2395 	IBMF_TRACE_3(IBMF_TNF_DEBUG, DPRINT_L4,
2396 	    ibmf_i_alloc_qp_start, IBMF_TNF_TRACE, "",
2397 	    "ibmf_i_alloc_qp() enter, clientp = %p, pkey = %x, qkey = %x \n",
2398 	    tnf_opaque, clientp, clientp, tnf_uint, p_key, p_key,
2399 	    tnf_uint, q_key, q_key);
2400 
2401 	/*
2402 	 * get the pkey index associated with this pkey if present in table
2403 	 */
2404 	if (ibmf_i_get_pkeyix(clientp->ic_ci_handle, p_key,
2405 	    clientp->ic_client_info.port_num, &pkey_ix) != IBMF_SUCCESS) {
2406 		(void) sprintf(errmsg, "pkey not in table, pkey = %x", p_key);
2407 		error = B_TRUE;
2408 		status = IBMF_FAILURE;
2409 		goto bail;
2410 	}
2411 
2412 	/* allocate QP context memory */
2413 	qp_ctx = (ibmf_alt_qp_t *)kmem_zalloc(sizeof (ibmf_alt_qp_t),
2414 	    (flags & IBMF_ALLOC_SLEEP) ? KM_SLEEP : KM_NOSLEEP);
2415 	if (qp_ctx == NULL) {
2416 		(void) sprintf(errmsg, "failed to kmem_zalloc qp ctx");
2417 		error = B_TRUE;
2418 		status = IBMF_NO_RESOURCES;
2419 		goto bail;
2420 	}
2421 
2422 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*qp_ctx));
2423 
2424 	/* setup the qp attrs for the alloc call */
2425 	qp_attrs.qp_scq_hdl = ibmf_cip->ci_alt_cq_handle;
2426 	qp_attrs.qp_rcq_hdl = ibmf_cip->ci_alt_cq_handle;
2427 	qp_attrs.qp_pd_hdl = ibmf_cip->ci_pd;
2428 	qp_attrs.qp_sizes.cs_sq_sgl = IBMF_MAX_SQ_WR_SGL_ELEMENTS;
2429 	qp_attrs.qp_sizes.cs_rq_sgl = IBMF_MAX_RQ_WR_SGL_ELEMENTS;
2430 	qp_attrs.qp_sizes.cs_sq = ibmf_send_wqes_posted_per_qp;
2431 	qp_attrs.qp_sizes.cs_rq = ibmf_recv_wqes_posted_per_qp;
2432 	qp_attrs.qp_flags = IBT_ALL_SIGNALED;
2433 	qp_attrs.qp_alloc_flags = IBT_QP_NO_FLAGS;
2434 
2435 	/* request IBT for a qp with the desired attributes */
2436 	ibt_status = ibt_alloc_qp(clientp->ic_ci_handle, IBT_UD_RQP,
2437 	    &qp_attrs, &qp_ctx->isq_qp_sizes, &qp_ctx->isq_qpn,
2438 	    &qp_ctx->isq_qp_handle);
2439 	if (ibt_status != IBT_SUCCESS) {
2440 		kmem_free(qp_ctx, sizeof (ibmf_alt_qp_t));
2441 		(void) sprintf(errmsg, "failed to alloc qp, status = %d",
2442 		    ibt_status);
2443 		error = B_TRUE;
2444 		status = IBMF_NO_RESOURCES;
2445 		goto bail;
2446 	}
2447 
2448 	qp_modify_attr.qp_trans = IBT_UD_SRV;
2449 	qp_modify_attr.qp_flags = IBT_CEP_NO_FLAGS;
2450 	qp_modify_attr.qp_transport.ud.ud_qkey = q_key;
2451 	qp_modify_attr.qp_transport.ud.ud_sq_psn = 0;
2452 	qp_modify_attr.qp_transport.ud.ud_pkey_ix = pkey_ix;
2453 	qp_modify_attr.qp_transport.ud.ud_port =
2454 	    clientp->ic_client_info.port_num;
2455 
2456 	/* Set up the client handle in the QP context */
2457 	qp_ctx->isq_client_hdl = clientp;
2458 
2459 	/* call the IB transport to initialize the QP */
2460 	ibt_status = ibt_initialize_qp(qp_ctx->isq_qp_handle, &qp_modify_attr);
2461 	if (ibt_status != IBT_SUCCESS) {
2462 		(void) ibt_free_qp(qp_ctx->isq_qp_handle);
2463 		kmem_free(qp_ctx, sizeof (ibmf_alt_qp_t));
2464 		(void) sprintf(errmsg, "failed to initialize qp, status = %d",
2465 		    ibt_status);
2466 		error = B_TRUE;
2467 		status = IBMF_NO_RESOURCES;
2468 		goto bail;
2469 	}
2470 
2471 	/* Set up the WQE caches */
2472 	status = ibmf_i_init_altqp_wqes(qp_ctx);
2473 	if (status != IBMF_SUCCESS) {
2474 		(void) ibt_free_qp(qp_ctx->isq_qp_handle);
2475 		kmem_free(qp_ctx, sizeof (ibmf_alt_qp_t));
2476 		(void) sprintf(errmsg, "failed to init wqe caches, status = %d",
2477 		    status);
2478 		error = B_TRUE;
2479 		goto bail;
2480 	}
2481 
2482 	qp_ctx->isq_next = NULL;
2483 	qp_ctx->isq_pkey = p_key;
2484 	qp_ctx->isq_qkey = q_key;
2485 	qp_ctx->isq_port_num = clientp->ic_client_info.port_num;
2486 	mutex_init(&qp_ctx->isq_mutex, NULL, MUTEX_DRIVER, NULL);
2487 	mutex_init(&qp_ctx->isq_wqe_mutex, NULL, MUTEX_DRIVER, NULL);
2488 	cv_init(&qp_ctx->isq_recv_cb_teardown_cv, NULL, CV_DRIVER, NULL);
2489 	cv_init(&qp_ctx->isq_sqd_cv, NULL, CV_DRIVER, NULL);
2490 	cv_init(&qp_ctx->isq_wqes_cv, NULL, CV_DRIVER, NULL);
2491 
2492 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*qp_ctx));
2493 
2494 	/* add alt qp to the list in CI context */
2495 	mutex_enter(&ibmf_cip->ci_mutex);
2496 	if (ibmf_cip->ci_alt_qp_list == NULL) {
2497 		ibmf_cip->ci_alt_qp_list = qp_ctx;
2498 	} else {
2499 		ibmf_alt_qp_t *qpp;
2500 
2501 		qpp = ibmf_cip->ci_alt_qp_list;
2502 		while (qpp->isq_next != NULL) {
2503 			qpp = qpp->isq_next;
2504 		}
2505 		qpp->isq_next = qp_ctx;
2506 	}
2507 	mutex_exit(&ibmf_cip->ci_mutex);
2508 
2509 	*ibmf_qp_handlep = (ibmf_qp_handle_t)qp_ctx;
2510 
2511 	if (flags & IBMF_ALLOC_SLEEP)
2512 		blocking = 1;
2513 	else
2514 		blocking = 0;
2515 
2516 	/* post the max number of buffers to RQ */
2517 	for (i = 0; i < ibmf_recv_wqes_per_port; i++) {
2518 		status = ibmf_i_post_recv_buffer(ibmf_cip, clientp->ic_qp,
2519 		    blocking, *ibmf_qp_handlep);
2520 		if (status != IBMF_SUCCESS) {
2521 			IBMF_TRACE_2(IBMF_TNF_DEBUG, DPRINT_L3,
2522 			    ibmf_i_alloc_qp, IBMF_TNF_TRACE, "",
2523 			    "ibmf_i_alloc_qp(): %s, status = %d\n",
2524 			    tnf_string, msg, "ibmf_i_post_recv_buffer() failed",
2525 			    tnf_int, status, status);
2526 		}
2527 	}
2528 
2529 	mutex_enter(&clientp->ic_kstat_mutex);
2530 	IBMF_ADD32_KSTATS(clientp, alt_qps_alloced, 1);
2531 	mutex_exit(&clientp->ic_kstat_mutex);
2532 
2533 bail:
2534 	if (error) {
2535 		IBMF_TRACE_1(IBMF_TNF_NODEBUG, DPRINT_L1,
2536 		    ibmf_i_alloc_qp_err, IBMF_TNF_TRACE, "",
2537 		    "ibmf_i_alloc_qp(): %s\n", tnf_string, msg, errmsg);
2538 	}
2539 
2540 	IBMF_TRACE_1(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_alloc_qp_end,
2541 	    IBMF_TNF_TRACE, "", "ibmf_i_alloc_qp() exit, qp = %p\n",
2542 	    tnf_opaque, qp_handlep, *ibmf_qp_handlep);
2543 	return (status);
2544 }
2545 
2546 /*
2547  * ibmf_i_free_qp():
2548  *	Free an alternate QP context
2549  */
2550 /* ARGSUSED */
2551 int
ibmf_i_free_qp(ibmf_qp_handle_t ibmf_qp_handle,uint_t flags)2552 ibmf_i_free_qp(ibmf_qp_handle_t ibmf_qp_handle, uint_t flags)
2553 {
2554 	ibmf_alt_qp_t		*qp_ctx = (ibmf_alt_qp_t *)ibmf_qp_handle;
2555 	ibmf_client_t		*clientp = qp_ctx->isq_client_hdl;
2556 	ibmf_ci_t		*ibmf_cip = qp_ctx->isq_client_hdl->ic_myci;
2557 	ibmf_alt_qp_t		*qpp, *pqpp;
2558 	ibt_status_t		ibt_status;
2559 
2560 	IBMF_TRACE_2(IBMF_TNF_DEBUG, DPRINT_L4,
2561 	    ibmf_i_free_qp_start, IBMF_TNF_TRACE, "",
2562 	    "ibmf_i_free_qp() enter, qp_hdl = %p, flags = %x\n",
2563 	    tnf_opaque, qp_hdl, ibmf_qp_handle, tnf_uint, flags, flags);
2564 
2565 	/* remove qp from the list in CI context */
2566 
2567 	mutex_enter(&ibmf_cip->ci_mutex);
2568 	qpp = ibmf_cip->ci_alt_qp_list;
2569 	ASSERT(qpp != NULL);
2570 	if (qpp == qp_ctx) {
2571 		ibmf_cip->ci_alt_qp_list = qpp->isq_next;
2572 	} else {
2573 		while (qpp != NULL) {
2574 			if (qpp == qp_ctx)
2575 				break;
2576 			pqpp = qpp;
2577 			qpp = qpp->isq_next;
2578 		}
2579 		ASSERT(qpp != NULL);
2580 		pqpp->isq_next = qpp->isq_next;
2581 	}
2582 
2583 	mutex_exit(&ibmf_cip->ci_mutex);
2584 
2585 	/* flush the WQEs in the QP queues */
2586 	ibt_status = ibt_flush_qp(qp_ctx->isq_qp_handle);
2587 	if (ibt_status != IBT_SUCCESS) {
2588 		IBMF_TRACE_2(IBMF_TNF_NODEBUG, DPRINT_L1,
2589 		    ibmf_i_free_qp_err, IBMF_TNF_TRACE, "",
2590 		    "ibmf_i_free_qp(): %s, status = %d\n",
2591 		    tnf_string, msg, "failed to close qp",
2592 		    tnf_uint, ibt_status, ibt_status);
2593 		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_free_qp_end,
2594 		    IBMF_TNF_TRACE, "", "ibmf_i_free_qp() exit\n");
2595 		return (IBMF_TRANSPORT_FAILURE);
2596 	}
2597 
2598 	/* Call the MAD completion handler */
2599 	ibmf_i_mad_completions(ibmf_cip->ci_alt_cq_handle, (void*)ibmf_cip);
2600 
2601 	/* Wait here for all WQE owned by this QP to get freed */
2602 	mutex_enter(&qpp->isq_mutex);
2603 	while (qpp->isq_wqes_alloced != 0) {
2604 		cv_wait(&qpp->isq_wqes_cv, &qpp->isq_mutex);
2605 	}
2606 	mutex_exit(&qpp->isq_mutex);
2607 
2608 	cv_destroy(&qp_ctx->isq_recv_cb_teardown_cv);
2609 	cv_destroy(&qp_ctx->isq_sqd_cv);
2610 	cv_destroy(&qp_ctx->isq_wqes_cv);
2611 
2612 	/* call the IB transport to free the QP */
2613 	ibt_status = ibt_free_qp(qp_ctx->isq_qp_handle);
2614 	if (ibt_status != IBT_SUCCESS) {
2615 		IBMF_TRACE_2(IBMF_TNF_NODEBUG, DPRINT_L1,
2616 		    ibmf_i_free_qp_err, IBMF_TNF_TRACE, "",
2617 		    "ibmf_i_free_qp(): %s, status = %d\n",
2618 		    tnf_string, msg, "failed to free qp",
2619 		    tnf_uint, ibt_status, ibt_status);
2620 		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_free_qp_end,
2621 		    IBMF_TNF_TRACE, "", "ibmf_i_free_qp() exit\n");
2622 		return (IBMF_TRANSPORT_FAILURE);
2623 	}
2624 
2625 	/* Clean up the WQE caches */
2626 	ibmf_i_fini_altqp_wqes(qp_ctx);
2627 	mutex_destroy(&qp_ctx->isq_wqe_mutex);
2628 	mutex_destroy(&qp_ctx->isq_mutex);
2629 
2630 	mutex_enter(&clientp->ic_kstat_mutex);
2631 	IBMF_SUB32_KSTATS(clientp, alt_qps_alloced, 1);
2632 	mutex_exit(&clientp->ic_kstat_mutex);
2633 
2634 	kmem_free(qp_ctx, sizeof (ibmf_alt_qp_t));
2635 
2636 	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_free_qp_end,
2637 	    IBMF_TNF_TRACE, "", "ibmf_i_free_qp() exit\n");
2638 
2639 	return (IBMF_SUCCESS);
2640 }
2641 
2642 /*
2643  * ibmf_i_query_qp():
2644  *	Query an alternate QP context
2645  */
2646 /* ARGSUSED */
2647 int
ibmf_i_query_qp(ibmf_qp_handle_t ibmf_qp_handle,uint_t flags,uint_t * qp_nump,ib_pkey_t * p_keyp,ib_qkey_t * q_keyp,uint8_t * portnump)2648 ibmf_i_query_qp(ibmf_qp_handle_t ibmf_qp_handle, uint_t flags,
2649     uint_t *qp_nump, ib_pkey_t *p_keyp, ib_qkey_t *q_keyp, uint8_t *portnump)
2650 {
2651 	ibt_qp_query_attr_t	qp_query;
2652 	ibmf_alt_qp_t		*qp_ctx = (ibmf_alt_qp_t *)ibmf_qp_handle;
2653 	uint16_t		pkey_ix;
2654 	ibt_status_t		ibt_status;
2655 
2656 	IBMF_TRACE_2(IBMF_TNF_DEBUG, DPRINT_L4,
2657 	    ibmf_i_free_qp_start, IBMF_TNF_TRACE, "",
2658 	    "ibmf_i_free_qp() enter, qp_hdl = %p, flags = %x\n",
2659 	    tnf_opaque, qp_hdl, ibmf_qp_handle, tnf_uint, flags, flags);
2660 
2661 	ibt_status = ibt_query_qp(qp_ctx->isq_qp_handle, &qp_query);
2662 	if (ibt_status != IBT_SUCCESS) {
2663 		IBMF_TRACE_2(IBMF_TNF_NODEBUG, DPRINT_L1,
2664 		    ibmf_i_query_qp_err, IBMF_TNF_TRACE, "",
2665 		    "ibmf_i_query_qp(): %s, status = %d\n",
2666 		    tnf_string, msg, "failed to query qp",
2667 		    tnf_uint, ibt_status, ibt_status);
2668 		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_query_qp_end,
2669 		    IBMF_TNF_TRACE, "", "ibmf_i_query_qp() exit\n");
2670 		return (IBMF_TRANSPORT_FAILURE);
2671 	}
2672 
2673 	/* move the desired attributes into the locations provided */
2674 	*qp_nump = qp_query.qp_qpn;
2675 	*q_keyp = qp_query.qp_info.qp_transport.ud.ud_qkey;
2676 	*portnump = qp_query.qp_info.qp_transport.ud.ud_port;
2677 
2678 	pkey_ix = qp_query.qp_info.qp_transport.ud.ud_pkey_ix;
2679 
2680 	/* get the pkey based on the pkey_ix */
2681 	ibt_status = ibt_index2pkey(qp_ctx->isq_client_hdl->ic_ci_handle,
2682 	    *portnump, pkey_ix, p_keyp);
2683 	if (ibt_status != IBT_SUCCESS) {
2684 		IBMF_TRACE_3(IBMF_TNF_NODEBUG, DPRINT_L1,
2685 		    ibmf_i_query_qp_err, IBMF_TNF_TRACE, "",
2686 		    "ibmf_i_query_qp(): %s, pkey_ix = %d, status = %d\n",
2687 		    tnf_string, msg, "failed to get pkey from index",
2688 		    tnf_uint, pkey_ix, pkey_ix,
2689 		    tnf_uint, ibt_status, ibt_status);
2690 		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_query_qp_end,
2691 		    IBMF_TNF_TRACE, "", "ibmf_i_query_qp() exit\n");
2692 		return (IBMF_TRANSPORT_FAILURE);
2693 	}
2694 
2695 	IBMF_TRACE_4(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_query_qp_end,
2696 	    IBMF_TNF_TRACE, "", "ibmf_i_query_qp() exit, qp_num = 0x%x, "
2697 	    "pkey = 0x%x, qkey = 0x%x, portnum = %d\n",
2698 	    tnf_uint, qp_num, *qp_nump, tnf_uint, pkey, *p_keyp,
2699 	    tnf_uint, qkey, *q_keyp, tnf_uint, portnum, *portnump);
2700 
2701 	return (IBMF_SUCCESS);
2702 }
2703 
2704 /*
2705  * ibmf_i_modify_qp():
2706  *	Modify an alternate QP context
2707  */
2708 /* ARGSUSED */
2709 int
ibmf_i_modify_qp(ibmf_qp_handle_t ibmf_qp_handle,ib_pkey_t p_key,ib_qkey_t q_key,uint_t flags)2710 ibmf_i_modify_qp(ibmf_qp_handle_t ibmf_qp_handle, ib_pkey_t p_key,
2711     ib_qkey_t q_key, uint_t flags)
2712 {
2713 	ibmf_alt_qp_t		*qp_ctx = (ibmf_alt_qp_t *)ibmf_qp_handle;
2714 	ibmf_client_t		*clientp = qp_ctx->isq_client_hdl;
2715 	ibmf_ci_t		*ibmf_cip = clientp->ic_myci;
2716 	ibmf_alt_qp_t		*qpp;
2717 	ibt_qp_info_t		qp_mod;
2718 	ibt_cep_modify_flags_t	qp_mod_flags;
2719 	ibt_queue_sizes_t	actual_sz;
2720 	uint16_t		pkey_ix;
2721 	ibt_status_t		ibt_status;
2722 
2723 	IBMF_TRACE_4(IBMF_TNF_DEBUG, DPRINT_L4,
2724 	    ibmf_i_modify_qp_start, IBMF_TNF_TRACE, "",
2725 	    "ibmf_i_modify_qp() enter, qp_hdl = %p, flags = %x, pkey = 0x%x, "
2726 	    "qkey = 0x%x\n", tnf_opaque, qp_hdl, ibmf_qp_handle,
2727 	    tnf_uint, flags, flags, tnf_uint, p_key, p_key,
2728 	    tnf_uint, q_key, q_key);
2729 
2730 	/*
2731 	 * get the pkey index associated with this pkey if present in table
2732 	 */
2733 	if (ibmf_i_get_pkeyix(clientp->ic_ci_handle, p_key,
2734 	    clientp->ic_client_info.port_num, &pkey_ix) != IBMF_SUCCESS) {
2735 		IBMF_TRACE_2(IBMF_TNF_NODEBUG, DPRINT_L1,
2736 		    ibmf_i_modify_qp_err, IBMF_TNF_TRACE, "",
2737 		    "ibmf_i_modify_qp(): %s, pkey = %x\n",
2738 		    tnf_string, msg, "pkey not in table",
2739 		    tnf_uint, pkey, p_key);
2740 		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_modify_qp_end,
2741 		    IBMF_TNF_TRACE, "", "ibmf_i_modify_qp() exit\n");
2742 		return (IBMF_FAILURE);
2743 	}
2744 
2745 	/* Find the QP context in the CI QP context list */
2746 	mutex_enter(&ibmf_cip->ci_mutex);
2747 	qpp = ibmf_cip->ci_alt_qp_list;
2748 	while (qpp != NULL) {
2749 		if (qpp == qp_ctx) {
2750 			break;
2751 		}
2752 		qpp = qpp->isq_next;
2753 	}
2754 
2755 	if (qpp == NULL) {
2756 		mutex_exit(&ibmf_cip->ci_mutex);
2757 
2758 		IBMF_TRACE_1(IBMF_TNF_NODEBUG, DPRINT_L1,
2759 		    ibmf_i_modify_qp_err, IBMF_TNF_TRACE, "",
2760 		    "ibmf_i_modify_qp(): %s\n",
2761 		    tnf_string, msg, "QP not in altqp list");
2762 		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_modify_qp_end,
2763 		    IBMF_TNF_TRACE, "", "ibmf_i_modify_qp() exit\n");
2764 		return (IBMF_BAD_QP_HANDLE);
2765 
2766 	} else {
2767 
2768 		mutex_enter(&qp_ctx->isq_mutex);
2769 	}
2770 
2771 	mutex_exit(&ibmf_cip->ci_mutex);
2772 
2773 	/*
2774 	 * Transition the QP to SQD state
2775 	 */
2776 	bzero(&qp_mod, sizeof (ibt_qp_info_t));
2777 	qp_mod.qp_trans = IBT_UD_SRV;
2778 	qp_mod.qp_state = IBT_STATE_SQD;
2779 	qp_mod_flags = IBT_CEP_SET_STATE | IBT_CEP_SET_SQD_EVENT;
2780 	ibt_status = ibt_modify_qp(qp_ctx->isq_qp_handle, qp_mod_flags,
2781 	    &qp_mod, &actual_sz);
2782 	if (ibt_status != IBT_SUCCESS) {
2783 		mutex_exit(&qp_ctx->isq_mutex);
2784 		IBMF_TRACE_2(IBMF_TNF_NODEBUG, DPRINT_L1,
2785 		    ibmf_i_modify_qp_err, IBMF_TNF_TRACE, "",
2786 		    "ibmf_i_modify_qp(): %s, qp_hdl = %p\n",
2787 		    tnf_string, msg, "QP transition RTS to SQD failed",
2788 		    tnf_opaque, qp_handle, qp_ctx->isq_qp_handle);
2789 		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_modify_qp_end,
2790 		    IBMF_TNF_TRACE, "", "ibmf_i_modify_qp() exit\n");
2791 		return (IBMF_TRANSPORT_FAILURE);
2792 	}
2793 
2794 	/*
2795 	 * Wait for an event indicating that the QP is in SQD state
2796 	 */
2797 	cv_wait(&qp_ctx->isq_sqd_cv, &qp_ctx->isq_mutex);
2798 
2799 	/* Setup QP modification information for transition to RTS state */
2800 	bzero(&qp_mod, sizeof (ibt_qp_info_t));
2801 	qp_mod.qp_trans = IBT_UD_SRV;
2802 	qp_mod.qp_state = IBT_STATE_RTS;
2803 	qp_mod.qp_current_state = IBT_STATE_SQD;
2804 	qp_mod.qp_transport.ud.ud_pkey_ix = pkey_ix;
2805 	qp_mod.qp_transport.ud.ud_qkey = q_key;
2806 	qp_mod_flags = IBT_CEP_SET_STATE | IBT_CEP_SET_PKEY_IX |
2807 	    IBT_CEP_SET_QKEY;
2808 
2809 	/*
2810 	 * transition the QP back to RTS state to allow
2811 	 * modification of the pkey and qkey
2812 	 */
2813 
2814 	ibt_status = ibt_modify_qp(qp_ctx->isq_qp_handle, qp_mod_flags,
2815 	    &qp_mod, &actual_sz);
2816 	if (ibt_status != IBT_SUCCESS) {
2817 		mutex_exit(&qp_ctx->isq_mutex);
2818 		IBMF_TRACE_3(IBMF_TNF_NODEBUG, DPRINT_L1,
2819 		    ibmf_i_modify_qp_err, IBMF_TNF_TRACE, "",
2820 		    "ibmf_i_modify_qp(): %s, qp_hdl = %p, status = %d\n",
2821 		    tnf_string, msg, "QP transition SQD to RTS failed",
2822 		    tnf_opaque, qp_handle, qp_ctx->isq_qp_handle,
2823 		    tnf_uint, ibt_status, ibt_status);
2824 		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_modify_qp_end,
2825 		    IBMF_TNF_TRACE, "", "ibmf_i_modify_qp() exit\n");
2826 		return (IBMF_TRANSPORT_FAILURE);
2827 	}
2828 
2829 	qp_ctx->isq_pkey = p_key;
2830 	qp_ctx->isq_qkey = q_key;
2831 	mutex_exit(&qp_ctx->isq_mutex);
2832 
2833 	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_modify_qp_end,
2834 	    IBMF_TNF_TRACE, "", "ibmf_i_modify_qp() exit\n");
2835 	return (IBMF_SUCCESS);
2836 }
2837 
2838 /*
2839  * ibmf_i_post_recv_buffer():
2840  *	Post a WQE to the RQ of the specified QP
2841  */
2842 int
ibmf_i_post_recv_buffer(ibmf_ci_t * cip,ibmf_qp_t * qpp,boolean_t block,ibmf_qp_handle_t ibmf_qp_handle)2843 ibmf_i_post_recv_buffer(ibmf_ci_t *cip, ibmf_qp_t *qpp, boolean_t block,
2844     ibmf_qp_handle_t ibmf_qp_handle)
2845 {
2846 	int			ret;
2847 	ibt_wr_ds_t		*sgl;
2848 	ibt_status_t		status;
2849 	ibmf_recv_wqe_t		*recv_wqep;
2850 	ibt_qp_hdl_t		ibt_qp_handle;
2851 	struct kmem_cache	*kmem_cachep;
2852 	ibmf_alt_qp_t		*altqp;
2853 
2854 	IBMF_TRACE_4(IBMF_TNF_DEBUG, DPRINT_L4,
2855 	    ibmf_i_post_recv_buffer_start, IBMF_TNF_TRACE, "",
2856 	    "ibmf_i_post_recv_buffer() enter, cip = %p, qpp = %p, "
2857 	    "qp_hdl = %p, block = %d\n", tnf_opaque, cip, cip,
2858 	    tnf_opaque, qpp, qpp, tnf_opaque, qp_hdl, ibmf_qp_handle,
2859 	    tnf_uint, block, block);
2860 
2861 	/*
2862 	 * if we haven't hit the max wqes per qp, attempt to allocate a recv
2863 	 * wqe and post it to the recv queue.
2864 	 * It is possible for more than one thread to get through this
2865 	 * check below and post wqes that could push us above the
2866 	 * ibmf_recv_wqes_posted_per_qp. We catch that case when the recv
2867 	 * completion is signaled.
2868 	 */
2869 	ASSERT(MUTEX_NOT_HELD(&cip->ci_mutex));
2870 
2871 	/* Get the WQE kmem cache pointer based on the QP type */
2872 	if (ibmf_qp_handle == IBMF_QP_HANDLE_DEFAULT)
2873 		kmem_cachep = cip->ci_recv_wqes_cache;
2874 	else {
2875 		altqp = (ibmf_alt_qp_t *)ibmf_qp_handle;
2876 		kmem_cachep = altqp->isq_recv_wqes_cache;
2877 	}
2878 
2879 	/* allocate a receive WQE from the receive WQE kmem cache */
2880 	recv_wqep = kmem_cache_alloc(kmem_cachep,
2881 	    (block == B_TRUE ? KM_SLEEP : KM_NOSLEEP));
2882 	if (recv_wqep == NULL) {
2883 		/*
2884 		 * Attempt to extend the cache and then retry the
2885 		 * kmem_cache_alloc()
2886 		 */
2887 		if (ibmf_i_extend_wqe_cache(cip, ibmf_qp_handle, block) ==
2888 		    IBMF_NO_RESOURCES) {
2889 			mutex_enter(&cip->ci_mutex);
2890 			IBMF_ADD32_PORT_KSTATS(cip, rwqe_allocs_failed, 1);
2891 			mutex_exit(&cip->ci_mutex);
2892 			IBMF_TRACE_2(IBMF_TNF_NODEBUG, DPRINT_L1,
2893 			    ibmf_i_post_recv_buffer_err, IBMF_TNF_ERROR, "",
2894 			    "ibmf_i_post_recv_buffer(): %s, status = %d\n",
2895 			    tnf_string, msg, "alloc recv_wqe failed",
2896 			    tnf_int, ibmf_status, IBMF_NO_RESOURCES);
2897 			IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
2898 			    ibmf_i_post_recv_buffer_end, IBMF_TNF_TRACE, "",
2899 			    "ibmf_i_post_recv_buffer() exit\n");
2900 			return (IBMF_NO_RESOURCES);
2901 		} else {
2902 			recv_wqep = kmem_cache_alloc(kmem_cachep,
2903 			    (block == B_TRUE ? KM_SLEEP : KM_NOSLEEP));
2904 			if (recv_wqep == NULL) {
2905 				/* Allocation failed again. Give up here. */
2906 				mutex_enter(&cip->ci_mutex);
2907 				IBMF_ADD32_PORT_KSTATS(cip, rwqe_allocs_failed,
2908 				    1);
2909 				mutex_exit(&cip->ci_mutex);
2910 				IBMF_TRACE_2(IBMF_TNF_NODEBUG, DPRINT_L1,
2911 				    ibmf_i_post_recv_buffer_err,
2912 				    IBMF_TNF_ERROR, "",
2913 				    "ibmf_i_post_recv_buffer(): %s, "
2914 				    "status = %d\n",
2915 				    tnf_string, msg, "alloc recv_wqe failed",
2916 				    tnf_int, ibmf_status, IBMF_NO_RESOURCES);
2917 				IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
2918 				    ibmf_i_post_recv_buffer_end,
2919 				    IBMF_TNF_TRACE, "",
2920 				    "ibmf_i_post_recv_buffer() exit\n");
2921 				return (IBMF_NO_RESOURCES);
2922 			}
2923 		}
2924 	}
2925 
2926 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*recv_wqep))
2927 
2928 	/*
2929 	 * if the qp handle provided in ibmf_send_pkt() or
2930 	 * ibmf_setup_recv_cb() is not the default qp handle
2931 	 * for this client, then the wqe must be queued on this qp,
2932 	 * else use the default qp handle set up during ibmf_register()
2933 	 */
2934 	if (ibmf_qp_handle == IBMF_QP_HANDLE_DEFAULT) {
2935 		ibt_qp_handle = qpp->iq_qp_handle;
2936 	} else {
2937 		ibt_qp_handle =
2938 		    ((ibmf_alt_qp_t *)ibmf_qp_handle)->isq_qp_handle;
2939 	}
2940 
2941 	/* allocate memory for the scatter-gather list */
2942 	sgl = kmem_zalloc(IBMF_MAX_RQ_WR_SGL_ELEMENTS * sizeof (ibt_wr_ds_t),
2943 	    (block == B_TRUE) ? KM_SLEEP : KM_NOSLEEP);
2944 	if (sgl == NULL) {
2945 		kmem_cache_free(kmem_cachep, recv_wqep);
2946 		IBMF_TRACE_1(IBMF_TNF_NODEBUG, DPRINT_L1,
2947 		    ibmf_i_post_recv_buffer_err, IBMF_TNF_ERROR, "",
2948 		    "ibmf_i_post_recv_buffer(): %s\n",
2949 		    tnf_string, msg, "failed to kmem_zalloc qp ctx");
2950 		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
2951 		    ibmf_i_post_recv_buffer_end, IBMF_TNF_TRACE, "",
2952 		    "ibmf_i_post_recv_buffer() exit\n");
2953 		return (IBMF_NO_RESOURCES);
2954 	}
2955 
2956 	/* initialize it */
2957 	ibmf_i_init_recv_wqe(qpp, sgl, recv_wqep, ibt_qp_handle,
2958 	    ibmf_qp_handle);
2959 
2960 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*recv_wqep))
2961 
2962 	/* and post it */
2963 	status = ibt_post_recv(recv_wqep->recv_qp_handle, &recv_wqep->recv_wr,
2964 	    1, NULL);
2965 
2966 	ret = ibmf_i_ibt_to_ibmf_status(status);
2967 	if (ret != IBMF_SUCCESS) {
2968 		kmem_free(sgl, IBMF_MAX_RQ_WR_SGL_ELEMENTS *
2969 		    sizeof (ibt_wr_ds_t));
2970 		kmem_cache_free(kmem_cachep, recv_wqep);
2971 		IBMF_TRACE_2(IBMF_TNF_NODEBUG, DPRINT_L1,
2972 		    ibmf_i_post_recv_buffer_err, IBMF_TNF_ERROR, "",
2973 		    "ibmf_i_post_recv_buffer(): %s, status = %d\n",
2974 		    tnf_string, msg, "ibt_post_recv failed",
2975 		    tnf_uint, ibt_status, status);
2976 		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
2977 		    ibmf_i_post_recv_buffer_end, IBMF_TNF_TRACE, "",
2978 		    "ibmf_i_post_recv_buffer() exit\n");
2979 		return (ret);
2980 	}
2981 
2982 	mutex_enter(&cip->ci_mutex);
2983 	IBMF_ADD32_PORT_KSTATS(cip, recv_wqes_alloced, 1);
2984 	mutex_exit(&cip->ci_mutex);
2985 	if (ibmf_qp_handle == IBMF_QP_HANDLE_DEFAULT) {
2986 		mutex_enter(&qpp->iq_mutex);
2987 		qpp->iq_rwqes_posted++;
2988 		mutex_exit(&qpp->iq_mutex);
2989 		mutex_enter(&cip->ci_mutex);
2990 		cip->ci_wqes_alloced++;
2991 		mutex_exit(&cip->ci_mutex);
2992 	} else {
2993 		mutex_enter(&altqp->isq_mutex);
2994 		altqp->isq_wqes_alloced++;
2995 		altqp->isq_rwqes_posted++;
2996 		mutex_exit(&altqp->isq_mutex);
2997 	}
2998 
2999 	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_post_recv_buffer_end,
3000 	    IBMF_TNF_TRACE, "", "ibmf_i_post_recv_buffer() exit\n");
3001 
3002 	return (ret);
3003 }
3004 
3005 /*
3006  * ibmf_i_mgt_class_to_hdr_sz_off():
3007  *	Determine class header offser and size for management classes
3008  */
3009 void
ibmf_i_mgt_class_to_hdr_sz_off(uint32_t mgt_class,uint32_t * szp,uint32_t * offp)3010 ibmf_i_mgt_class_to_hdr_sz_off(uint32_t mgt_class, uint32_t *szp,
3011     uint32_t *offp)
3012 {
3013 	uint32_t	hdr_sz = 0, hdr_off = 0;
3014 
3015 	IBMF_TRACE_1(IBMF_TNF_DEBUG, DPRINT_L4,
3016 	    ibmf_i_mgt_class_to_hdr_sz_off_start, IBMF_TNF_TRACE, "",
3017 	    "ibmf_i_mgt_class_to_hdr_sz_off(): mgt_class = 0x%x\n",
3018 	    tnf_uint, mgt_class, mgt_class);
3019 
3020 	switch (mgt_class) {
3021 	case MAD_MGMT_CLASS_SUBN_LID_ROUTED :
3022 	case MAD_MGMT_CLASS_SUBN_DIRECT_ROUTE :
3023 	case MAD_MGMT_CLASS_PERF :
3024 	case MAD_MGMT_CLASS_BM :
3025 	case MAD_MGMT_CLASS_DEV_MGT :
3026 	case MAD_MGMT_CLASS_SNMP :
3027 	case MAD_MGMT_CLASS_COMM_MGT:
3028 		hdr_sz = IBMF_MAD_CL_HDR_SZ_1;
3029 		hdr_off = IBMF_MAD_CL_HDR_OFF_1;
3030 		break;
3031 	case MAD_MGMT_CLASS_SUBN_ADM :
3032 		hdr_sz = IBMF_MAD_CL_HDR_SZ_2;
3033 		hdr_off = IBMF_MAD_CL_HDR_OFF_2;
3034 		break;
3035 	default:
3036 		if (((mgt_class >= MAD_MGMT_CLASS_VENDOR_START) &&
3037 		    (mgt_class <= MAD_MGMT_CLASS_VENDOR_END)) ||
3038 		    ((mgt_class >= MAD_MGMT_CLASS_APPLICATION_START) &&
3039 		    (mgt_class <= MAD_MGMT_CLASS_APPLICATION_END))) {
3040 			hdr_sz = IBMF_MAD_CL_HDR_SZ_3;
3041 			hdr_off = IBMF_MAD_CL_HDR_OFF_1;
3042 		} else if ((mgt_class >= MAD_MGMT_CLASS_VENDOR2_START) &&
3043 		    (mgt_class <= MAD_MGMT_CLASS_VENDOR2_END)) {
3044 			hdr_sz = IBMF_MAD_CL_HDR_SZ_4;
3045 			hdr_off = IBMF_MAD_CL_HDR_OFF_2;
3046 		} else {
3047 			IBMF_TRACE_1(IBMF_TNF_DEBUG, DPRINT_L4,
3048 			    ibmf_i_mgt_class_to_hdr_sz_off_start,
3049 			    IBMF_TNF_TRACE, "",
3050 			    "ibmf_i_mgt_class_to_hdr_sz_off():"
3051 			    "got illegal management class = 0x%x\n",
3052 			    tnf_uint, mgt_class, mgt_class);
3053 		}
3054 		break;
3055 	}
3056 
3057 
3058 
3059 	*szp = hdr_sz;
3060 	*offp = hdr_off;
3061 
3062 	IBMF_TRACE_2(IBMF_TNF_DEBUG, DPRINT_L4,
3063 	    ibmf_i_mgt_class_to_hdr_sz_off_end, IBMF_TNF_TRACE, "",
3064 	    "ibmf_i_mgt_class_to_hdr_sz_off() exit,hdr_sz = %d, hdr_off = %d\n",
3065 	    tnf_uint, hdr_sz, hdr_sz, tnf_uint, hdr_off, hdr_off);
3066 }
3067 
3068 /*
3069  * ibmf_i_lookup_client_by_mgmt_class():
3070  *	Lookup the client context based on the management class of
3071  *	the incoming packet
3072  */
3073 int
ibmf_i_lookup_client_by_mgmt_class(ibmf_ci_t * ibmf_cip,int port_num,ibmf_client_type_t class,ibmf_client_t ** clientpp)3074 ibmf_i_lookup_client_by_mgmt_class(ibmf_ci_t *ibmf_cip, int port_num,
3075     ibmf_client_type_t class, ibmf_client_t **clientpp)
3076 {
3077 	ibmf_client_t 		*clientp;
3078 	ibmf_client_info_t	*client_infop;
3079 
3080 	IBMF_TRACE_3(IBMF_TNF_DEBUG, DPRINT_L4,
3081 	    ibmf_i_lookup_client_by_mgmt_class_start, IBMF_TNF_TRACE, "",
3082 	    "ibmf_i_lookup_client_by_mgmt_class() enter, cip = %p, "
3083 	    "port_num = %d, class = 0x%x\n", tnf_opaque, cip, ibmf_cip,
3084 	    tnf_int, port, port_num, tnf_opaque, class, class);
3085 
3086 	ASSERT(MUTEX_NOT_HELD(&ibmf_cip->ci_clients_mutex));
3087 
3088 	mutex_enter(&ibmf_cip->ci_clients_mutex);
3089 
3090 	clientp = ibmf_cip->ci_clients;
3091 
3092 	/* walk client context list looking for class/portnum match */
3093 	while (clientp != NULL) {
3094 		client_infop = &clientp->ic_client_info;
3095 		if (class == client_infop->client_class &&
3096 		    port_num == client_infop->port_num) {
3097 			/* found our match */
3098 			break;
3099 		}
3100 		clientp = clientp->ic_next;
3101 	}
3102 
3103 	mutex_exit(&ibmf_cip->ci_clients_mutex);
3104 
3105 	if (clientp != NULL) {
3106 		*clientpp = clientp;
3107 		IBMF_TRACE_1(IBMF_TNF_DEBUG, DPRINT_L4,
3108 		    ibmf_i_lookup_client_by_mgmt_class_end, IBMF_TNF_TRACE, "",
3109 		    "ibmf_i_lookup_client_by_mgmt_class() exit, clp = %p\n",
3110 		    tnf_opaque, clientp, clientp);
3111 		return (IBMF_SUCCESS);
3112 	} else {
3113 		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
3114 		    ibmf_i_lookup_client_by_mgmt_class_end, IBMF_TNF_TRACE, "",
3115 		    "ibmf_i_lookup_client_by_mgmt_class() failure exit\n");
3116 		return (IBMF_FAILURE);
3117 	}
3118 }
3119 
3120 /*
3121  * ibmf_i_get_pkeyix():
3122  *	Get the pkey index of the pkey in the pkey table of the specified
3123  *	port. Take into account the partition membership.
3124  */
3125 int
ibmf_i_get_pkeyix(ibt_hca_hdl_t hca_handle,ib_pkey_t pkey,uint8_t port,ib_pkey_t * pkeyixp)3126 ibmf_i_get_pkeyix(ibt_hca_hdl_t hca_handle, ib_pkey_t pkey, uint8_t port,
3127     ib_pkey_t *pkeyixp)
3128 {
3129 	ib_pkey_t		tpkey;
3130 	ibt_status_t		ibt_status;
3131 
3132 	IBMF_TRACE_3(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_get_pkeyix_start,
3133 	    IBMF_TNF_TRACE, "", "ibmf_i_get_pkeyix() enter, hcahdl = %p, "
3134 	    "pkey = 0x%x, port = %d\n", tnf_opaque, hcahdl, hca_handle,
3135 	    tnf_int, pkey, pkey, tnf_int, port, port);
3136 
3137 	/*
3138 	 * If the client specifies the FULL membership pkey and the
3139 	 * pkey is not in the table, this function should fail.
3140 	 */
3141 	if (pkey & IBMF_PKEY_MEMBERSHIP_MASK) {
3142 		ibt_status = ibt_pkey2index(hca_handle, port,
3143 		    pkey, pkeyixp);
3144 		if (ibt_status != IBT_SUCCESS) {
3145 			IBMF_TRACE_1(IBMF_TNF_NODEBUG, DPRINT_L1,
3146 			    ibmf_i_get_pkeyix_err, IBMF_TNF_ERROR, "",
3147 			    "ibmf_i_get_pkeyix() error status = %d\n",
3148 			    tnf_uint, ibt_status, ibt_status);
3149 			IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
3150 			    ibmf_i_get_pkeyix_end, IBMF_TNF_TRACE, "",
3151 			    "ibmf_i_get_pkeyix() exit\n");
3152 			return (IBMF_TRANSPORT_FAILURE);
3153 		}
3154 		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_get_pkeyix_end,
3155 		    IBMF_TNF_TRACE, "", "ibmf_i_get_pkeyix() exit\n");
3156 		return (IBMF_SUCCESS);
3157 	}
3158 
3159 	/*
3160 	 * Limited member pkey processing
3161 	 * Check if this limited member pkey is in the pkey table
3162 	 */
3163 	ibt_status = ibt_pkey2index(hca_handle, port, pkey, pkeyixp);
3164 	if (ibt_status == IBT_SUCCESS) {
3165 		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
3166 		    ibmf_i_get_pkeyix_end, IBMF_TNF_TRACE, "",
3167 		    "ibmf_i_get_pkeyix() exit\n");
3168 		return (IBMF_SUCCESS);
3169 	}
3170 
3171 	/*
3172 	 * Could not find the limited member version of the pkey.
3173 	 * Now check if the full member version of the pkey is in the
3174 	 * pkey table. If not, fail the call.
3175 	 */
3176 	tpkey = pkey | IBMF_PKEY_MEMBERSHIP_MASK;
3177 	ibt_status = ibt_pkey2index(hca_handle, port, tpkey, pkeyixp);
3178 	if (ibt_status != IBT_SUCCESS) {
3179 		IBMF_TRACE_1(IBMF_TNF_NODEBUG, DPRINT_L1,
3180 		    ibmf_i_get_pkeyix_err, IBMF_TNF_ERROR, "",
3181 		    "ibmf_i_get_pkeyix() error status = %d\n",
3182 		    tnf_uint, ibt_status, ibt_status);
3183 		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
3184 		    ibmf_i_get_pkeyix_end, IBMF_TNF_TRACE, "",
3185 		    "ibmf_i_get_pkeyix() exit\n");
3186 		return (IBMF_TRANSPORT_FAILURE);
3187 	}
3188 
3189 	IBMF_TRACE_1(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_get_pkeyix_end,
3190 	    IBMF_TNF_TRACE, "", "ibmf_i_get_pkeyix(): pkey_ix = %d\n",
3191 	    tnf_int, pkeyix, *pkeyixp);
3192 	return (IBMF_SUCCESS);
3193 }
3194 
3195 /*
3196  * ibmf_i_pkey_ix_to_key():
3197  *	Figure out pkey from pkey index
3198  */
3199 int
ibmf_i_pkey_ix_to_key(ibmf_ci_t * cip,uint_t port_num,uint_t pkey_ix,ib_pkey_t * pkeyp)3200 ibmf_i_pkey_ix_to_key(ibmf_ci_t *cip, uint_t port_num, uint_t pkey_ix,
3201     ib_pkey_t *pkeyp)
3202 {
3203 	ibt_status_t		ibt_status;
3204 
3205 	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_pkey_ix_to_key_start,
3206 	    IBMF_TNF_TRACE, "", "ibmf_i_pkey_ix_to_key() enter\n");
3207 
3208 	ibt_status = ibt_index2pkey(cip->ci_ci_handle, port_num, pkey_ix,
3209 	    pkeyp);
3210 	if (ibt_status != IBT_SUCCESS) {
3211 		IBMF_TRACE_1(IBMF_TNF_NODEBUG, DPRINT_L1,
3212 		    ibmf_i_pkey_ix_to_key, IBMF_TNF_TRACE, "",
3213 		    "ibmf_i_pkey_ix_to_key(): ibt_index2pkey failed for "
3214 		    " pkey index %d \n", tnf_uint, pkey_ix, pkey_ix);
3215 		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
3216 		    ibmf_i_pkey_ix_to_key_end,
3217 		    IBMF_TNF_TRACE, "", "ibmf_i_pkey_ix_to_key() exit\n");
3218 		return (IBMF_TRANSPORT_FAILURE);
3219 	}
3220 
3221 	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_pkey_ix_to_key_end,
3222 	    IBMF_TNF_TRACE, "", "ibmf_i_pkey_ix_to_key() exit\n");
3223 
3224 	return (IBMF_SUCCESS);
3225 }
3226 
3227 /*
3228  * ibmf_i_ibt_to_ibmf_status():
3229  *	Map IBT return code to IBMF return code
3230  */
3231 int
ibmf_i_ibt_to_ibmf_status(ibt_status_t ibt_status)3232 ibmf_i_ibt_to_ibmf_status(ibt_status_t ibt_status)
3233 {
3234 	int ibmf_status;
3235 
3236 	IBMF_TRACE_1(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_ibt_to_ibmf_status_start,
3237 	    IBMF_TNF_TRACE, "", "ibmf_i_ibt_to_ibmf_status() enter, "
3238 	    "status = %d\n", tnf_uint, ibt_status, ibt_status);
3239 
3240 	switch (ibt_status) {
3241 
3242 	case IBT_SUCCESS:
3243 		ibmf_status = IBMF_SUCCESS;
3244 		break;
3245 
3246 	case IBT_INSUFF_KERNEL_RESOURCE:
3247 	case IBT_INSUFF_RESOURCE:
3248 	case IBT_QP_FULL:
3249 		ibmf_status = IBMF_NO_RESOURCES;
3250 		break;
3251 
3252 	case IBT_HCA_IN_USE:
3253 	case IBT_QP_IN_USE:
3254 	case IBT_CQ_BUSY:
3255 	case IBT_PD_IN_USE:
3256 	case IBT_MR_IN_USE:
3257 		ibmf_status = IBMF_BUSY;
3258 		break;
3259 
3260 	default:
3261 		ibmf_status = IBMF_FAILURE;
3262 		break;
3263 	}
3264 
3265 	IBMF_TRACE_2(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_ibt_to_ibmf_status_end,
3266 	    IBMF_TNF_TRACE, "", "ibmf_i_ibt_to_ibmf_status() exit, "
3267 	    "ibt_status = %d, ibmf_status = %d\n", tnf_uint, ibt_status,
3268 	    ibt_status, tnf_int, ibmf_status, ibmf_status);
3269 
3270 	return (ibmf_status);
3271 }
3272 
3273 /*
3274  * ibmf_i_ibt_wc_to_ibmf_status():
3275  *	Map work completion code to IBMF return code
3276  */
3277 int
ibmf_i_ibt_wc_to_ibmf_status(ibt_wc_status_t ibt_wc_status)3278 ibmf_i_ibt_wc_to_ibmf_status(ibt_wc_status_t ibt_wc_status)
3279 {
3280 	int ibmf_status;
3281 
3282 	IBMF_TRACE_1(IBMF_TNF_DEBUG, DPRINT_L4,
3283 	    ibmf_i_ibt_wc_to_ibmf_status_start, IBMF_TNF_TRACE, "",
3284 	    "ibmf_i_ibt_to_ibmf_status() enter, status = %d\n",
3285 	    tnf_uint, ibt_wc_status, ibt_wc_status);
3286 
3287 	switch (ibt_wc_status) {
3288 
3289 	case IBT_WC_SUCCESS:
3290 		ibmf_status = IBMF_SUCCESS;
3291 		break;
3292 
3293 	default:
3294 		ibmf_status = IBMF_FAILURE;
3295 		break;
3296 	}
3297 
3298 	IBMF_TRACE_2(IBMF_TNF_DEBUG, DPRINT_L4,
3299 	    ibmf_i_ibt_wc_to_ibmf_status_end, IBMF_TNF_TRACE, "",
3300 	    "ibmf_i_ibt_to_ibmf_status() exit, wc_status = %d, "
3301 	    "ibmf_status = %d\n", tnf_uint, ibt_wc_status,
3302 	    ibt_wc_status, tnf_int, ibmf_status, ibmf_status);
3303 
3304 	return (ibmf_status);
3305 }
3306 
3307 /*
3308  * ibmf_i_is_ibmf_handle_valid():
3309  *	Validate the ibmf handle
3310  */
3311 int
ibmf_i_is_ibmf_handle_valid(ibmf_handle_t ibmf_handle)3312 ibmf_i_is_ibmf_handle_valid(ibmf_handle_t ibmf_handle)
3313 {
3314 	ibmf_ci_t	*cip;
3315 	ibmf_client_t	*clp, *clientp = (ibmf_client_t *)ibmf_handle;
3316 
3317 	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
3318 	    ibmf_i_is_ibmf_handle_valid_start, IBMF_TNF_TRACE, "",
3319 	    "ibmf_i_is_ibmf_handle_valid() enter\n");
3320 
3321 	mutex_enter(&ibmf_statep->ibmf_mutex);
3322 
3323 	cip = ibmf_statep->ibmf_ci_list;
3324 
3325 	/* iterate through all the channel interace contexts */
3326 	while (cip != NULL) {
3327 
3328 		mutex_enter(&cip->ci_clients_mutex);
3329 
3330 		clp = cip->ci_clients;
3331 
3332 		/* search all registration contexts for this ci */
3333 		while (clp != NULL) {
3334 			if (clp == clientp)
3335 				break;
3336 			clp = clp->ic_next;
3337 		}
3338 
3339 		mutex_exit(&cip->ci_clients_mutex);
3340 
3341 		if (clp == clientp) {
3342 			/* ci found */
3343 			break;
3344 		} else {
3345 			/* ci not found, move onto next ci */
3346 			cip = cip->ci_next;
3347 		}
3348 	}
3349 
3350 	mutex_exit(&ibmf_statep->ibmf_mutex);
3351 
3352 	if (cip != NULL) {
3353 		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
3354 		    ibmf_i_is_ibmf_handle_valid_end, IBMF_TNF_TRACE, "",
3355 		    "ibmf_i_is_ibmf_handle_valid() exit\n");
3356 		return (IBMF_SUCCESS);
3357 	} else {
3358 		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
3359 		    ibmf_i_is_ibmf_handle_valid_end, IBMF_TNF_TRACE, "",
3360 		    "ibmf_i_is_ibmf_handle_valid() failure exit\n");
3361 		return (IBMF_FAILURE);
3362 	}
3363 }
3364 
3365 /*
3366  * ibmf_i_is_qp_handle_valid():
3367  *	Validate the QP handle
3368  */
3369 int
ibmf_i_is_qp_handle_valid(ibmf_handle_t ibmf_handle,ibmf_qp_handle_t ibmf_qp_handle)3370 ibmf_i_is_qp_handle_valid(ibmf_handle_t ibmf_handle,
3371     ibmf_qp_handle_t ibmf_qp_handle)
3372 {
3373 	ibmf_client_t	*clientp = (ibmf_client_t *)ibmf_handle;
3374 	ibmf_alt_qp_t	*alt_qp, *qpp = (ibmf_alt_qp_t *)ibmf_qp_handle;
3375 	ibmf_ci_t	*cip = clientp->ic_myci;
3376 
3377 	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
3378 	    ibmf_i_is_qp_handle_valid_start, IBMF_TNF_TRACE, "",
3379 	    "ibmf_i_is_qp_handle_valid() enter\n");
3380 
3381 	/* the default qp handle is always valid */
3382 	if (ibmf_qp_handle == IBMF_QP_HANDLE_DEFAULT)
3383 		return (IBMF_SUCCESS);
3384 
3385 	mutex_enter(&cip->ci_mutex);
3386 
3387 	alt_qp = cip->ci_alt_qp_list;
3388 
3389 	while (alt_qp != NULL) {
3390 		if (alt_qp == qpp) {
3391 			/* qp handle found */
3392 			break;
3393 		} else {
3394 			/* qp handle not found, get next qp on list */
3395 			alt_qp = alt_qp->isq_next;
3396 		}
3397 	}
3398 
3399 	mutex_exit(&cip->ci_mutex);
3400 
3401 	if (alt_qp != NULL) {
3402 		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
3403 		    ibmf_i_is_qp_handle_valid_end, IBMF_TNF_TRACE, "",
3404 		    "ibmf_i_is_qp_handle_valid() exit\n");
3405 		return (IBMF_SUCCESS);
3406 	} else {
3407 		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
3408 		    ibmf_i_is_qp_handle_valid_end, IBMF_TNF_TRACE, "",
3409 		    "ibmf_i_is_qp_handle_valid() failure exit\n");
3410 		return (IBMF_FAILURE);
3411 	}
3412 }
3413 
3414 void
ibmf_dprintf(int l,const char * fmt,...)3415 ibmf_dprintf(int l, const char *fmt, ...)
3416 {
3417 	va_list ap;
3418 
3419 	if ((l) > ibmf_trace_level) {
3420 
3421 		return;
3422 	}
3423 
3424 	va_start(ap, fmt);
3425 	(void) vprintf(fmt, ap);
3426 	va_end(ap);
3427 }
3428 
3429 /*
3430  * ibmf_setup_term_ctx():
3431  * Sets up a message context that is the duplicate of the one
3432  * passed in the regmsgimplp argument. The duplicate message context
3433  * is not visible to the client. It is managed internally by ibmf
3434  * to process the RMPP receiver termination flow logic for the
3435  * transaction while the client is notified of the completion of the
3436  * same transaction (i.e. all the solicited data has been received).
3437  */
3438 int
ibmf_setup_term_ctx(ibmf_client_t * clientp,ibmf_msg_impl_t * regmsgimplp)3439 ibmf_setup_term_ctx(ibmf_client_t *clientp, ibmf_msg_impl_t *regmsgimplp)
3440 {
3441 	ibmf_msg_impl_t	*msgimplp;
3442 	size_t		offset;
3443 	uint32_t	cl_hdr_sz, cl_hdr_off;
3444 	int		status;
3445 
3446 	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
3447 	    ibmf_setup_term_ctx_start, IBMF_TNF_TRACE, "",
3448 	    "ibmf_setup_term_ctx() enter\n");
3449 
3450 	/*
3451 	 * Allocate the termination message context
3452 	 */
3453 	msgimplp = (ibmf_msg_impl_t *)kmem_zalloc(sizeof (ibmf_msg_impl_t),
3454 	    KM_NOSLEEP);
3455 	if (msgimplp == NULL) {
3456 		IBMF_TRACE_1(IBMF_TNF_NODEBUG, DPRINT_L1,
3457 		    ibmf_setup_term_ctx_error, IBMF_TNF_ERROR, "",
3458 		    "ibmf_setup_term_ctx(): %s\n", tnf_string, msg,
3459 		    "message mem allocation failure");
3460 		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
3461 		    ibmf_setup_term_ctx_end, IBMF_TNF_TRACE, "",
3462 		    "ibmf_setup_term_ctx() exit\n");
3463 		return (IBMF_NO_RESOURCES);
3464 	}
3465 
3466 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*msgimplp))
3467 
3468 	/* Copy the message context to the termination message structure */
3469 	*msgimplp = *regmsgimplp;
3470 
3471 	/* Initialize the message mutex */
3472 	mutex_init(&msgimplp->im_mutex, NULL, MUTEX_DRIVER, NULL);
3473 
3474 	/*
3475 	 * Allocate enough memory for the MAD header only.
3476 	 */
3477 	msgimplp->im_msgbufs_recv.im_bufs_mad_hdr =
3478 	    (ib_mad_hdr_t *)kmem_zalloc(IBMF_MAD_SIZE, KM_NOSLEEP);
3479 	if (msgimplp->im_msgbufs_recv.im_bufs_mad_hdr == NULL) {
3480 		kmem_free(msgimplp, sizeof (ibmf_msg_impl_t));
3481 		IBMF_TRACE_1(IBMF_TNF_NODEBUG, DPRINT_L1,
3482 		    ibmf_setup_term_ctx_error, IBMF_TNF_ERROR, "",
3483 		    "ibmf_setup_term_ctx(): %s\n", tnf_string, msg,
3484 		    "recv buf mem allocation failure");
3485 		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
3486 		    ibmf_setup_term_ctx_end, IBMF_TNF_TRACE, "",
3487 		    "ibmf_setup_term_ctx() exit\n");
3488 		return (IBMF_NO_RESOURCES);
3489 	}
3490 
3491 	/* Copy over just the MAD header contents */
3492 	bcopy((const void *)regmsgimplp->im_msgbufs_recv.im_bufs_mad_hdr,
3493 	    (void *)msgimplp->im_msgbufs_recv.im_bufs_mad_hdr,
3494 	    sizeof (ib_mad_hdr_t));
3495 
3496 	offset = sizeof (ib_mad_hdr_t);
3497 	ibmf_i_mgt_class_to_hdr_sz_off(
3498 	    regmsgimplp->im_msgbufs_recv.im_bufs_mad_hdr->MgmtClass,
3499 	    &cl_hdr_sz, &cl_hdr_off);
3500 	offset += cl_hdr_off;
3501 
3502 	/*
3503 	 * Copy the management class header
3504 	 */
3505 	msgimplp->im_msgbufs_recv.im_bufs_cl_hdr =
3506 	    (uchar_t *)msgimplp->im_msgbufs_recv.im_bufs_mad_hdr + offset;
3507 	msgimplp->im_msgbufs_recv.im_bufs_cl_hdr_len =
3508 	    regmsgimplp->im_msgbufs_recv.im_bufs_cl_hdr_len;
3509 	bcopy((void *)regmsgimplp->im_msgbufs_recv.im_bufs_cl_hdr,
3510 	    (void *)msgimplp->im_msgbufs_recv.im_bufs_cl_hdr,
3511 	    regmsgimplp->im_msgbufs_recv.im_bufs_cl_hdr_len);
3512 
3513 	/*
3514 	 * Clear the termination message timers copied from the regular message
3515 	 * since ibmf_i_set_timer() expects them to be cleared.
3516 	 */
3517 	msgimplp->im_rp_timeout_id = 0;
3518 	msgimplp->im_tr_timeout_id = 0;
3519 
3520 	/* Mark this message as being in a receiver RMPP mode */
3521 	msgimplp->im_flags |= IBMF_MSG_FLAGS_RECV_RMPP;
3522 
3523 	/* Mark this message as being a "termination flow" message */
3524 	msgimplp->im_flags |= IBMF_MSG_FLAGS_TERMINATION;
3525 
3526 	/*
3527 	 * Clear the IBMF_MSG_FLAGS_SET_TERMINATION copied over from the regular
3528 	 * message.
3529 	 */
3530 	msgimplp->im_flags &= ~IBMF_MSG_FLAGS_SET_TERMINATION;
3531 
3532 	/*
3533 	 * Clear the trans_state RECV_DONE and DONE flags so that the
3534 	 * protocol continues with the termination message context.
3535 	 */
3536 	msgimplp->im_trans_state_flags &= ~IBMF_TRANS_STATE_FLAG_RECV_DONE;
3537 	msgimplp->im_trans_state_flags &= ~IBMF_TRANS_STATE_FLAG_DONE;
3538 
3539 	/* Clear out references to the old UD dest handles */
3540 	msgimplp->im_ibmf_ud_dest = NULL;
3541 	msgimplp->im_ud_dest = NULL;
3542 
3543 	/*
3544 	 * Request new UD dest resources for the termination phase.
3545 	 * The old UD dest resources are freed when the IBMF client
3546 	 * calls ibmf_free_msg(), so they cannot be relied on to exist
3547 	 * when the RMPP termination loop completes.
3548 	 */
3549 	status = ibmf_i_alloc_ud_dest(clientp, msgimplp, &msgimplp->im_ud_dest,
3550 	    B_FALSE);
3551 	if (status != IBMF_SUCCESS) {
3552 		kmem_free(msgimplp, sizeof (ibmf_msg_impl_t));
3553 		IBMF_TRACE_2(IBMF_TNF_NODEBUG, DPRINT_L1,
3554 		    ibmf_setup_term_ctx_err, IBMF_TNF_ERROR, "",
3555 		    "ibmf_setup_term_ctx(): %s, status = %d\n",
3556 		    tnf_string, msg, "UD destination resource allocation"
3557 		    " failed", tnf_int, ibmf_status, status);
3558 		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
3559 		    ibmf_setup_term_ctx_end, IBMF_TNF_TRACE, "",
3560 		    "ibmf_setup_term_ctx() exit\n");
3561 		return (status);
3562 	}
3563 
3564 	/*
3565 	 * Add the message to the termination client list by virtue of
3566 	 * having the IBMF_MSG_FLAGS_TERMINATION "im_flags" flag set.
3567 	 */
3568 	ibmf_i_client_add_msg(clientp, msgimplp);
3569 
3570 	/*
3571 	 * Increase the "allocted messages" count so that the client
3572 	 * does not unregister before this message has been freed.
3573 	 * This is necessary because we want the client context to
3574 	 * be around when the receive timeout expires for this termination
3575 	 * loop, otherwise the code will access freed memory and crash.
3576 	 */
3577 	mutex_enter(&clientp->ic_mutex);
3578 	clientp->ic_msgs_alloced++;
3579 	mutex_exit(&clientp->ic_mutex);
3580 
3581 	mutex_enter(&msgimplp->im_mutex);
3582 	/* Set the response timer for the termination message. */
3583 	ibmf_i_set_timer(ibmf_i_recv_timeout, msgimplp, IBMF_RESP_TIMER);
3584 	mutex_exit(&msgimplp->im_mutex);
3585 
3586 	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_setup_term_ctx_end,
3587 	    IBMF_TNF_TRACE, "", "ibmf_setup_term_ctx() exit\n");
3588 
3589 	return (IBMF_SUCCESS);
3590 }
3591