xref: /illumos-gate/usr/src/uts/common/io/ib/mgt/ibmf/ibmf_recv.c (revision 7133abc28e375d1167979a08d693cf44e827b7cf)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 
27 /*
28  * This file implements the MAD receive logic in IBMF.
29  */
30 
31 #include <sys/ib/mgt/ibmf/ibmf_impl.h>
32 #include <sys/ib/mgt/ibmf/ibmf_saa_impl.h>
33 
34 extern ibmf_state_t *ibmf_statep;
35 extern int ibmf_recv_wqes_per_port;
36 extern int ibmf_send_wqes_posted_per_qp;
37 extern int ibmf_recv_wqes_posted_per_qp;
38 
39 #define	IBMF_RECV_WR_ID_TO_ADDR(id, ptr)		 \
40 	(ptr) = (void *)(uintptr_t)((uint64_t)(id) & ~IBMF_RCV_CQE)
41 
42 #define	IBMF_QP0_NUM			0
43 #define	IBMF_QP1_NUM			1
44 #define	IBMF_BM_MAD_ATTR_MOD_REQRESP_BIT	0x00000001
45 #define	IBMF_BM_MAD_ATTR_MOD_RESP		0x1
46 
47 /*
48  * Structure defintion of entries in the module names table
49  */
50 typedef struct _ibmf_mod_names_t {
51 	char			mod_name[8];
52 	ibmf_client_type_t	mgt_class;
53 } ibmf_mod_names_t;
54 
55 typedef struct _ibmf_mod_load_args_t {
56 	ibmf_ci_t		*cip;
57 	ibmf_recv_wqe_t		*recv_wqep;
58 	char			*modname;
59 	ibmf_client_type_t	ibmf_class;
60 } ibmf_mod_load_args_t;
61 
62 extern int ibmf_trace_level;
63 extern int ibmf_send_wqes_posted_per_qp;
64 extern int ibmf_recv_wqes_posted_per_qp;
65 
66 static void ibmf_i_do_recv_cb(void *taskq_arg);
67 static int ibmf_i_repost_recv_buffer(ibmf_ci_t *cip,
68     ibmf_recv_wqe_t *recv_wqep);
69 static int ibmf_i_get_class(ib_mad_hdr_t *madhdrp,
70     ibmf_qp_handle_t dest_ibmf_qp_handle, ib_lid_t slid,
71     ibmf_client_type_t *dest_classp);
72 static void ibmf_i_handle_non_rmpp(ibmf_client_t *clientp,
73     ibmf_msg_impl_t *msgimplp, uchar_t *mad);
74 static void ibmf_get_mod_name(uint8_t mad_class, ibmf_client_type_t class,
75     char *modname);
76 static void ibmf_module_load(void *taskq_arg);
77 static void ibmf_send_busy(ibmf_mod_load_args_t *modlargsp);
78 
79 #define	AGENT_CLASS(class)					\
80 	(((class & 0x000F0000) == IBMF_AGENT_ID))
81 #define	MANAGER_CLASS(class)				\
82 	(((class & 0x000F0000) == IBMF_MANAGER_ID))
83 #define	AGENT_MANAGER_CLASS(class)				\
84 	(((class & 0x000F0000) == IBMF_AGENT_MANAGER_ID))
85 #define	IS_MANDATORY_CLASS(class)			\
86 	((class == PERF_AGENT) || (class == BM_AGENT))
87 
88 char 	ibmf_client_modname[16];
89 
90 /*
91  * ibmf_i_handle_recv_completion():
92  *	Process the WQE from the RQ, obtain the management class of the
93  *	packet and retrieve the corresponding client context
94  */
95 void
96 ibmf_i_handle_recv_completion(ibmf_ci_t *cip, ibt_wc_t *wcp)
97 {
98 	int			ret;
99 	ibmf_client_type_t	class;
100 	ibmf_client_t		*clientp;
101 	ib_mad_hdr_t		*madhdrp;
102 	ibmf_recv_wqe_t		*recv_wqep;
103 	ibt_recv_wr_t		*rwrp;
104 	ibmf_qp_handle_t	ibmf_qp_handle;
105 	struct kmem_cache	*kmem_cachep;
106 	ibmf_alt_qp_t		*altqp;
107 
108 	IBMF_TRACE_2(IBMF_TNF_DEBUG, DPRINT_L4,
109 	    ibmf_i_handle_recv_completion_start, IBMF_TNF_TRACE, "",
110 	    "ibmf_i_handle_recv_completion() enter, cip = %p, wcp = %p\n",
111 	    tnf_opaque, cip, cip, tnf_opaque, wcp, wcp);
112 
113 	mutex_enter(&cip->ci_ud_dest_list_mutex);
114 	if (cip->ci_ud_dest_list_count < IBMF_UD_DEST_LO_WATER_MARK) {
115 		ret = ibmf_ud_dest_tq_disp(cip);
116 		if (ret == 0) {
117 			IBMF_TRACE_1(IBMF_TNF_NODEBUG, DPRINT_L3,
118 			    ibmf_i_handle_recv_completion_err, IBMF_TNF_ERROR,
119 			    "", "ibmf_i_handle_recv_completion(): %s\n",
120 			    tnf_string, msg, "taskq dispatch of ud_dest "
121 			    "population thread failed");
122 		}
123 	}
124 	mutex_exit(&cip->ci_ud_dest_list_mutex);
125 
126 	ASSERT(IBMF_IS_RECV_WR_ID(wcp->wc_id));
127 	IBMF_RECV_WR_ID_TO_ADDR(wcp->wc_id, recv_wqep);
128 
129 	rwrp = &recv_wqep->recv_wr;
130 
131 	/* Retrieve the QP handle from the receive WQE context */
132 	ibmf_qp_handle = recv_wqep->recv_ibmf_qp_handle;
133 
134 	/* Get the WQE kmem cache pointer based on the QP type */
135 	if (ibmf_qp_handle == IBMF_QP_HANDLE_DEFAULT) {
136 		kmem_cachep = cip->ci_recv_wqes_cache;
137 	} else {
138 		altqp = (ibmf_alt_qp_t *)ibmf_qp_handle;
139 		kmem_cachep = altqp->isq_recv_wqes_cache;
140 	}
141 
142 	/*
143 	 * if the wqe is being flushed due to shutting down of the qp, free
144 	 * the wqe and return.
145 	 */
146 	if (wcp->wc_status == IBT_WC_WR_FLUSHED_ERR) {
147 		kmem_free(rwrp->wr_sgl, IBMF_MAX_RQ_WR_SGL_ELEMENTS *
148 		    sizeof (ibt_wr_ds_t));
149 		kmem_cache_free(kmem_cachep, recv_wqep);
150 		mutex_enter(&cip->ci_mutex);
151 		IBMF_SUB32_PORT_KSTATS(cip, recv_wqes_alloced, 1);
152 		mutex_exit(&cip->ci_mutex);
153 		if (ibmf_qp_handle == IBMF_QP_HANDLE_DEFAULT) {
154 			mutex_enter(&cip->ci_mutex);
155 			cip->ci_wqes_alloced--;
156 			if (cip->ci_wqes_alloced == 0)
157 				cv_signal(&cip->ci_wqes_cv);
158 			mutex_exit(&cip->ci_mutex);
159 		} else {
160 			mutex_enter(&altqp->isq_mutex);
161 			altqp->isq_wqes_alloced--;
162 			if (altqp->isq_wqes_alloced == 0)
163 				cv_signal(&altqp->isq_wqes_cv);
164 			mutex_exit(&altqp->isq_mutex);
165 		}
166 		IBMF_TRACE_1(IBMF_TNF_DEBUG, DPRINT_L3,
167 		    ibmf_i_handle_recv_completion, IBMF_TNF_TRACE,
168 		    "", "ibmf_i_handle_recv_completion(): %s\n",
169 		    tnf_string, msg, "recv wqe flushed");
170 		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
171 		    ibmf_i_handle_recv_completion_end, IBMF_TNF_TRACE,
172 		    "", "ibmf_i_handle_recv_completion() exit\n");
173 		return;
174 	}
175 
176 	/*
177 	 * Dynamic Posting of WQEs to the Receive Queue (RQ) of the QP:
178 	 * If the number of RQ WQEs posted to the QP drops below half
179 	 * the initial number of RQ WQEs posted to the QP, then, one additional
180 	 * WQE is posted to the RQ of the QP while processing this CQE.
181 	 */
182 	if (ibmf_qp_handle == IBMF_QP_HANDLE_DEFAULT) {
183 		ibmf_qp_t *qpp = recv_wqep->recv_qpp;
184 
185 		mutex_enter(&qpp->iq_mutex);
186 		qpp->iq_rwqes_posted--;
187 		if (qpp->iq_rwqes_posted <= (ibmf_recv_wqes_per_port >> 1)) {
188 			mutex_exit(&qpp->iq_mutex);
189 
190 			IBMF_TRACE_2(IBMF_TNF_DEBUG, DPRINT_L3,
191 			    ibmf_i_handle_recv_compl, IBMF_TNF_TRACE, "",
192 			    "ibmf_i_handle_recv_compl(): %s, "
193 			    "QP# = %d\n", tnf_string, msg,
194 			    "Posting more RQ WQEs",
195 			    tnf_int, qpnum, qpp->iq_qp_num);
196 
197 			/* Post an additional WQE to the RQ */
198 			ret = ibmf_i_post_recv_buffer(cip, qpp,
199 			    B_FALSE, ibmf_qp_handle);
200 			if (ret != IBMF_SUCCESS) {
201 				IBMF_TRACE_2(IBMF_TNF_DEBUG, DPRINT_L3,
202 				    ibmf_i_handle_recv_compl, IBMF_TNF_TRACE,
203 				    "", "ibmf_i_handle_recv_compl(): %s, "
204 				    "status = %d\n", tnf_string, msg,
205 				    "ibmf_i_post_recv_buffer() failed",
206 				    tnf_int, status, ret);
207 			}
208 
209 			mutex_enter(&qpp->iq_mutex);
210 		}
211 		mutex_exit(&qpp->iq_mutex);
212 	} else {
213 		mutex_enter(&altqp->isq_mutex);
214 		altqp->isq_rwqes_posted--;
215 		if (altqp->isq_rwqes_posted <= (ibmf_recv_wqes_per_port >> 1)) {
216 			mutex_exit(&altqp->isq_mutex);
217 
218 			IBMF_TRACE_2(IBMF_TNF_DEBUG, DPRINT_L3,
219 			    ibmf_i_handle_recv_compl, IBMF_TNF_TRACE, "",
220 			    "ibmf_i_handle_recv_compl(): %s, "
221 			    "QP# = %d\n", tnf_string, msg,
222 			    "Posting more RQ WQEs",
223 			    tnf_int, qpnum, altqp->isq_qpn);
224 
225 			/* Post an additional WQE to the RQ */
226 			ret = ibmf_i_post_recv_buffer(cip, NULL,
227 			    B_FALSE, ibmf_qp_handle);
228 			if (ret != IBMF_SUCCESS) {
229 				IBMF_TRACE_2(IBMF_TNF_DEBUG, DPRINT_L3,
230 				    ibmf_i_handle_recv_compl, IBMF_TNF_TRACE,
231 				    "", "ibmf_i_handle_recv_compl(): %s, "
232 				    "status = %d\n", tnf_string, msg,
233 				    "ibmf_i_post_recv_buffer() failed",
234 				    tnf_int, status, ret);
235 			}
236 
237 			mutex_enter(&altqp->isq_mutex);
238 		}
239 		mutex_exit(&altqp->isq_mutex);
240 	}
241 
242 	/*
243 	 * for all other completion errors, repost the wqe, and if that
244 	 * fails, free the wqe and return.
245 	 */
246 	if (wcp->wc_status != IBT_WC_SUCCESS) {
247 		(void) ibmf_i_repost_recv_buffer(cip, recv_wqep);
248 		IBMF_TRACE_2(IBMF_TNF_NODEBUG, DPRINT_L1,
249 		    ibmf_i_handle_recv_completion_err, IBMF_TNF_ERROR,
250 		    "", "ibmf_i_handle_recv_completion(): %s, wc_status = %d\n",
251 		    tnf_string, msg, "bad completion status received",
252 		    tnf_uint, wc_status, wcp->wc_status);
253 		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
254 		    ibmf_i_handle_recv_completion_end, IBMF_TNF_TRACE,
255 		    "", "ibmf_i_handle_recv_completion() exit\n");
256 		return;
257 	}
258 
259 	/* find the client corresponding to this recv cqe */
260 	madhdrp = (ib_mad_hdr_t *)((uintptr_t)recv_wqep->recv_mem +
261 	    sizeof (ib_grh_t));
262 
263 	/* drop packet if MAD Base Version is not as expected */
264 	if (madhdrp->BaseVersion != MAD_CLASS_BASE_VERS_1) {
265 		(void) ibmf_i_repost_recv_buffer(cip, recv_wqep);
266 		IBMF_TRACE_1(IBMF_TNF_NODEBUG, DPRINT_L1,
267 		    ibmf_i_handle_recv_completion_err, IBMF_TNF_ERROR,
268 		    "", "ibmf_i_handle_recv_completion(): %s\n",
269 		    tnf_string, msg, "bad MAD version");
270 		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
271 		    ibmf_i_handle_recv_completion_end, IBMF_TNF_TRACE,
272 		    "", "ibmf_i_handle_recv_completion() exit\n");
273 		return;
274 	}
275 
276 	if (ibmf_i_get_class(madhdrp, recv_wqep->recv_ibmf_qp_handle,
277 	    wcp->wc_slid, &class) != IBMF_SUCCESS) {
278 		/* bad class & type? */
279 #ifdef DEBUG
280 		ibmf_i_dump_wcp(cip, wcp, recv_wqep);
281 #endif
282 		(void) ibmf_i_repost_recv_buffer(cip, recv_wqep);
283 		IBMF_TRACE_1(IBMF_TNF_NODEBUG, DPRINT_L1,
284 		    ibmf_i_handle_recv_completion_err, IBMF_TNF_ERROR,
285 		    "", "ibmf_i_handle_recv_completion(): %s\n",
286 		    tnf_string, msg, "bad class/type");
287 		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
288 		    ibmf_i_handle_recv_completion_end, IBMF_TNF_TRACE,
289 		    "", "ibmf_i_handle_recv_completion() exit\n");
290 		return;
291 	}
292 
293 	ret = ibmf_i_lookup_client_by_mgmt_class(cip, recv_wqep->recv_port_num,
294 	    class, &clientp);
295 	if (ret == IBMF_SUCCESS) {
296 		_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*recv_wqep))
297 		recv_wqep->recv_client = clientp;
298 		recv_wqep->recv_wc = *wcp; /* struct copy */
299 
300 		/*
301 		 * Increment the kstats for the number of active receiver side
302 		 * callbacks
303 		 */
304 		mutex_enter(&clientp->ic_kstat_mutex);
305 		IBMF_ADD32_KSTATS(clientp, recv_cb_active, 1);
306 		mutex_exit(&clientp->ic_kstat_mutex);
307 
308 		if ((clientp->ic_reg_flags & IBMF_REG_FLAG_NO_OFFLOAD) == 0) {
309 			/* Dispatch the taskq thread to do further processing */
310 			ret = taskq_dispatch(clientp->ic_recv_taskq,
311 			    ibmf_i_do_recv_cb, recv_wqep, TQ_NOSLEEP);
312 			if (ret == 0) {
313 				mutex_enter(&clientp->ic_kstat_mutex);
314 				IBMF_SUB32_KSTATS(clientp, recv_cb_active, 1);
315 				mutex_exit(&clientp->ic_kstat_mutex);
316 				IBMF_TRACE_1(IBMF_TNF_NODEBUG, DPRINT_L1,
317 				    ibmf_i_handle_recv_completion_err,
318 				    IBMF_TNF_ERROR, "",
319 				    "ibmf_i_handle_recv_completion(): %s\n",
320 				    tnf_string, msg, "dispatch failed");
321 				(void) ibmf_i_repost_recv_buffer(cip,
322 				    recv_wqep);
323 				IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
324 				    ibmf_i_handle_recv_completion_end,
325 				    IBMF_TNF_TRACE, "",
326 				    "ibmf_i_handle_recv_completion() exit\n");
327 				return;
328 			}
329 		} else {
330 			ibmf_i_do_recv_cb((void *)recv_wqep);
331 		}
332 
333 		/*
334 		 * Decrement the kstats for the number of active receiver side
335 		 * callbacks
336 		 */
337 		mutex_enter(&clientp->ic_kstat_mutex);
338 		IBMF_SUB32_KSTATS(clientp, recv_cb_active, 1);
339 		mutex_exit(&clientp->ic_kstat_mutex);
340 
341 	} else {
342 		/*
343 		 * A client has not registered to receive MADs of this
344 		 * management class. IBMF must attempt to load the
345 		 * client and request a resend of the request MAD.
346 		 * The name of the client MAD is derived using a
347 		 * convention described in PSARC case 2003/753.
348 		 */
349 
350 		ibmf_mod_load_args_t	*modlargsp;
351 
352 		recv_wqep->recv_wc = *wcp; /* struct copy */
353 
354 		IBMF_TRACE_3(IBMF_TNF_NODEBUG, DPRINT_L4,
355 		    ibmf_i_handle_recv_completion_err, IBMF_TNF_ERROR, "",
356 		    "ibmf_i_handle_recv_completion(): %s, port = %d, "
357 		    "class = 0x%x\n",
358 		    tnf_string, msg, "no client registered", tnf_uint, port,
359 		    recv_wqep->recv_port_num, tnf_opaque, class, class);
360 
361 		/* Construct the IBMF client module name */
362 		ibmf_get_mod_name(madhdrp->MgmtClass, class,
363 		    ibmf_client_modname);
364 
365 		/* Load the module using a taskq thread */
366 		modlargsp = (ibmf_mod_load_args_t *)kmem_zalloc(
367 		    sizeof (ibmf_mod_load_args_t), KM_NOSLEEP);
368 		if (modlargsp != NULL) {
369 			_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*modlargsp))
370 			modlargsp->cip		= cip;
371 			modlargsp->recv_wqep	= recv_wqep;
372 			modlargsp->modname	= ibmf_client_modname;
373 			modlargsp->ibmf_class	= class;
374 			ret = taskq_dispatch(ibmf_statep->ibmf_taskq,
375 			    ibmf_module_load, modlargsp, TQ_NOSLEEP);
376 			if (ret == 0) {
377 				kmem_free(modlargsp,
378 				    sizeof (ibmf_mod_load_args_t));
379 				IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
380 				    ibmf_i_handle_recv_completion_error,
381 				    IBMF_TNF_TRACE, "",
382 				    "ibmf_i_handle_recv_completion(): Failed "
383 				    "to dispatch ibmf_module_load taskq\n");
384 				(void) ibmf_i_repost_recv_buffer(cip,
385 				    recv_wqep);
386 			}
387 		} else {
388 			IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
389 			    ibmf_i_handle_recv_completion_end, IBMF_TNF_TRACE,
390 			    "", "ibmf_i_handle_recv_completion(): "
391 			    "Failed to allocate memory for modlargs\n");
392 			(void) ibmf_i_repost_recv_buffer(cip, recv_wqep);
393 		}
394 	}
395 
396 	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
397 	    ibmf_i_handle_recv_completion_end, IBMF_TNF_TRACE, "",
398 	    "ibmf_i_handle_recv_completion() exit\n");
399 }
400 
401 /*
402  * ibmf_i_do_recv_cb():
403  *	This routine does the following:
404  *	o looks for a message in the client's message list
405  *	o creates a new message if one does not exist for unsolicited data
406  *	o invoke routines to do specific handling for rmpp and non-rmpp cases
407  *	o on a failure, the receive WQE is reposted to the RQ
408  */
409 static void
410 ibmf_i_do_recv_cb(void *taskq_arg)
411 {
412 	ibt_wc_t		*wcp;
413 	ibmf_msg_impl_t		*msgimplp;
414 	ibmf_client_t		*clientp;
415 	ibmf_addr_info_t	addrinfo;
416 	ibmf_recv_wqe_t		*recv_wqep;
417 	ib_grh_t		*ib_grh;
418 	boolean_t		grhpresent;
419 	ibmf_qp_handle_t	ibmf_qp_handle;
420 	ib_mad_hdr_t		*mad_hdr;
421 	ibmf_rmpp_hdr_t		*rmpp_hdr;
422 	ibmf_alt_qp_t		*qpp;
423 	ib_gid_t		gid;
424 	ib_lid_t		lid;
425 	int			msg_trans_state_flags, msg_flags;
426 	uint_t			ref_cnt;
427 	timeout_id_t		msg_rp_unset_id, msg_tr_unset_id;
428 	timeout_id_t		msg_rp_set_id, msg_tr_set_id;
429 	int			status;
430 	saa_port_t		*saa_portp;
431 
432 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*recv_wqep))
433 
434 	/* The taskq_arg argument is a pointer to the receive WQE context */
435 	recv_wqep = taskq_arg;
436 
437 	/* Retrieve the QP handle from the receive WQE context */
438 	ibmf_qp_handle = recv_wqep->recv_ibmf_qp_handle;
439 
440 	IBMF_TRACE_1(IBMF_TNF_DEBUG, DPRINT_L4,
441 	    ibmf_i_do_recv_cb_start, IBMF_TNF_TRACE, "",
442 	    "ibmf_i_do_recv_cb() enter, recv_wqep = %p\n",
443 	    tnf_opaque, recv_wqep, recv_wqep);
444 
445 	/* Retrieve the client context pointer from the receive WQE context */
446 	clientp = recv_wqep->recv_client;
447 
448 	/* Get a pointer to the IBT work completion structure */
449 	wcp = &recv_wqep->recv_wc;
450 
451 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*wcp))
452 
453 	/*
454 	 * Identify the port by the  LID or GID depending on whether the
455 	 * Global Route Header is valid or not
456 	 */
457 	if (wcp->wc_flags & IBT_WC_GRH_PRESENT) {
458 		grhpresent = B_TRUE;
459 		ib_grh = (ib_grh_t *)recv_wqep->recv_mem;
460 		gid.gid_prefix	= b2h64(ib_grh->SGID.gid_prefix);
461 		gid.gid_guid 	= b2h64(ib_grh->SGID.gid_guid);
462 	} else {
463 		grhpresent = B_FALSE;
464 		lid = wcp->wc_slid;
465 	}
466 
467 	/* Get a pointer to the MAD header */
468 	mad_hdr = (ib_mad_hdr_t *)((uintptr_t)recv_wqep->recv_mem +
469 	    sizeof (ib_grh_t));
470 
471 	/* Get a pointer to the RMPP header */
472 	rmpp_hdr = (ibmf_rmpp_hdr_t *)((uintptr_t)recv_wqep->recv_mem +
473 	    sizeof (ib_grh_t) + sizeof (ib_mad_hdr_t));
474 
475 	IBMF_TRACE_5(IBMF_TNF_DEBUG, DPRINT_L3,
476 	    ibmf_i_do_recv_cb, IBMF_TNF_TRACE, "",
477 	    "ibmf_i_do_recv_cb(): %s, tid = %016" PRIx64 ", class = 0x%x, "
478 	    "attrID = 0x%x, lid = 0x%x\n",
479 	    tnf_string, msg, "Received MAD", tnf_opaque, tid,
480 	    b2h64(mad_hdr->TransactionID), tnf_opaque, class,
481 	    mad_hdr->MgmtClass, tnf_opaque, attr_id,
482 	    b2h16(mad_hdr->AttributeID), tnf_opaque, remote_lid, lid);
483 
484 	/*
485 	 * Look for the matching message in the client's message list
486 	 * NOTE: if the message is found, the message reference count will
487 	 * have been increased by 1.
488 	 */
489 	msgimplp = ibmf_i_find_msg(clientp, b2h64(mad_hdr->TransactionID),
490 	    mad_hdr->MgmtClass, mad_hdr->R_Method, lid, &gid, grhpresent,
491 	    rmpp_hdr, IBMF_REG_MSG_LIST);
492 
493 	/*
494 	 * If the message is not on the regular message list, search
495 	 * for it in the termination message list.
496 	 */
497 	if (msgimplp == NULL) {
498 		msgimplp = ibmf_i_find_msg(clientp,
499 		    b2h64(mad_hdr->TransactionID), mad_hdr->MgmtClass,
500 		    mad_hdr->R_Method, lid, &gid, grhpresent, rmpp_hdr,
501 		    IBMF_TERM_MSG_LIST);
502 	}
503 
504 	if (msgimplp != NULL) {
505 
506 		/* if this packet is from the SA */
507 		if (clientp->ic_client_info.client_class == SUBN_ADM_MANAGER) {
508 
509 			/*
510 			 * ibmf_saa's callback arg is its saa_portp;
511 			 * take advantage of this fact to quickly update the
512 			 * port's SA uptime.  ibmf_saa uses the up time to
513 			 * determine if the SA is still alive
514 			 */
515 			saa_portp = clientp->ic_async_cb_arg;
516 
517 			/* update the SA uptime */
518 			mutex_enter(&saa_portp->saa_pt_mutex);
519 
520 			saa_portp->saa_pt_sa_uptime = gethrtime();
521 
522 			mutex_exit(&saa_portp->saa_pt_mutex);
523 		}
524 
525 		mutex_enter(&msgimplp->im_mutex);
526 
527 		/*
528 		 * Clear timers for transactions of solicited incoming packets
529 		 */
530 		if (msgimplp->im_rp_timeout_id != 0) {
531 			ibmf_i_unset_timer(msgimplp, IBMF_RESP_TIMER);
532 		}
533 
534 		/*
535 		 * If a MAD is received in the middle of an RMPP receive
536 		 * transaction, and the MAD's RMPPFlags.Active bit is 0,
537 		 * drop the MAD
538 		 */
539 		if (ibmf_i_is_rmpp(clientp, ibmf_qp_handle) &&
540 		    (msgimplp->im_flags & IBMF_MSG_FLAGS_RECV_RMPP) &&
541 		    ((rmpp_hdr->rmpp_flags & IBMF_RMPP_FLAGS_ACTIVE) == 0)) {
542 			mutex_exit(&msgimplp->im_mutex);
543 			(void) ibmf_i_repost_recv_buffer(clientp->ic_myci,
544 			    recv_wqep);
545 			IBMF_TRACE_2(IBMF_TNF_NODEBUG, DPRINT_L3,
546 			    ibmf_i_do_recv_cb_error, IBMF_TNF_ERROR, "",
547 			    "ibmf_i_do_recv_cb(): %s, msg = %p\n",
548 			    tnf_string, msg,
549 			    "Non-RMPP MAD received in RMPP transaction, "
550 			    "dropping MAD", tnf_opaque, msgimplp, msgimplp);
551 			IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
552 			    ibmf_i_do_recv_cb_end, IBMF_TNF_TRACE, "",
553 			    "ibmf_i_do_recv_cb() exit\n");
554 			return;
555 		}
556 
557 		/*
558 		 * If the message has been marked unitialized or done
559 		 * release the message mutex and return
560 		 */
561 		if ((msgimplp->im_trans_state_flags &
562 		    IBMF_TRANS_STATE_FLAG_DONE) ||
563 		    (msgimplp->im_trans_state_flags &
564 		    IBMF_TRANS_STATE_FLAG_UNINIT)) {
565 			IBMF_MSG_DECR_REFCNT(msgimplp);
566 			msg_trans_state_flags = msgimplp->im_trans_state_flags;
567 			msg_flags = msgimplp->im_flags;
568 			ref_cnt = msgimplp->im_ref_count;
569 			mutex_exit(&msgimplp->im_mutex);
570 			(void) ibmf_i_repost_recv_buffer(clientp->ic_myci,
571 			    recv_wqep);
572 			/*
573 			 * This thread may notify the client only if the
574 			 * transaction is done, the message has been removed
575 			 * from the client's message list, and the message
576 			 * reference count is 0.
577 			 * If the transaction is done, and the message reference
578 			 * count = 0, there is still a possibility that a
579 			 * packet could arrive for the message and its reference
580 			 * count increased if the message is still on the list.
581 			 * If the message is still on the list, it will be
582 			 * removed by a call to ibmf_i_client_rem_msg() at
583 			 * the completion point of the transaction.
584 			 * So, the reference count should be checked after the
585 			 * message has been removed.
586 			 */
587 			if ((msg_trans_state_flags &
588 			    IBMF_TRANS_STATE_FLAG_DONE) &&
589 			    !(msg_flags & IBMF_MSG_FLAGS_ON_LIST) &&
590 			    (ref_cnt == 0)) {
591 
592 				ibmf_i_notify_sequence(clientp, msgimplp,
593 				    msg_flags);
594 
595 			}
596 			IBMF_TRACE_2(IBMF_TNF_NODEBUG, DPRINT_L3,
597 			    ibmf_i_do_recv_cb_error, IBMF_TNF_ERROR, "",
598 			    "ibmf_i_do_recv_cb(): %s, msg = %p\n",
599 			    tnf_string, msg,
600 			    "Message already marked for removal, dropping MAD",
601 			    tnf_opaque, msgimplp, msgimplp);
602 			IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
603 			    ibmf_i_do_recv_cb_end, IBMF_TNF_TRACE, "",
604 			    "ibmf_i_do_recv_cb() exit\n");
605 			return;
606 		}
607 	} else {
608 		/* unsolicited message packet */
609 
610 		/*
611 		 * Check if the client context, the alternate QP context
612 		 * (if not the default QP), and the incoming MAD support RMPP
613 		 */
614 		if (ibmf_i_is_rmpp(clientp, ibmf_qp_handle) &&
615 		    (rmpp_hdr->rmpp_flags & IBMF_RMPP_FLAGS_ACTIVE)) {
616 
617 			/* Only unsolicited packets should be data seg 1 */
618 			if ((rmpp_hdr->rmpp_flags &
619 			    IBMF_RMPP_FLAGS_FIRST_PKT) == 0) {
620 				(void) ibmf_i_repost_recv_buffer(
621 				    clientp->ic_myci, recv_wqep);
622 				IBMF_TRACE_1(IBMF_TNF_NODEBUG, DPRINT_L3,
623 				    ibmf_i_do_recv_cb_error, IBMF_TNF_TRACE, "",
624 				    "ibmf_i_do_recv_cb(): %s\n",
625 				    tnf_string, msg,
626 				    "unsolicited rmpp packet not first packet");
627 				IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
628 				    ibmf_i_do_recv_cb_end, IBMF_TNF_TRACE, "",
629 				    "ibmf_i_do_recv_cb() exit\n");
630 				return;
631 			}
632 		}
633 
634 		/*
635 		 * Before we alloc a message context, check to see if
636 		 * a callback has been registered with the client
637 		 * for this unsolicited message.
638 		 * If one has been registered, increment the recvs active
639 		 * count to get the teardown routine to wait until
640 		 * this callback is complete.
641 		 */
642 		if (ibmf_qp_handle == IBMF_QP_HANDLE_DEFAULT) {
643 
644 			mutex_enter(&clientp->ic_mutex);
645 			if (clientp->ic_recv_cb == NULL) {
646 				mutex_exit(&clientp->ic_mutex);
647 				(void) ibmf_i_repost_recv_buffer(
648 				    clientp->ic_myci, recv_wqep);
649 				IBMF_TRACE_2(IBMF_TNF_NODEBUG, DPRINT_L1,
650 				    ibmf_i_do_recv_cb_error, IBMF_TNF_ERROR, "",
651 				    "ibmf_i_do_recv_cb(): %s, class %x\n",
652 				    tnf_string, msg,
653 				    "ibmf_tear_down_recv_cb already occurred",
654 				    tnf_opaque, class,
655 				    clientp->ic_client_info.client_class);
656 				IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
657 				    ibmf_i_do_recv_cb_end, IBMF_TNF_TRACE, "",
658 				    "ibmf_i_do_recv_cb() exit\n");
659 				return;
660 			}
661 			IBMF_RECV_CB_SETUP(clientp);
662 			mutex_exit(&clientp->ic_mutex);
663 		} else {
664 			qpp = (ibmf_alt_qp_t *)ibmf_qp_handle;
665 
666 			mutex_enter(&qpp->isq_mutex);
667 			if (qpp->isq_recv_cb == NULL) {
668 				mutex_exit(&qpp->isq_mutex);
669 				(void) ibmf_i_repost_recv_buffer(
670 				    clientp->ic_myci, recv_wqep);
671 				IBMF_TRACE_2(IBMF_TNF_NODEBUG, DPRINT_L1,
672 				    ibmf_i_do_recv_cb_error, IBMF_TNF_ERROR, "",
673 				    "ibmf_i_do_recv_cb(): %s, class %x\n",
674 				    tnf_string, msg,
675 				    "ibmf_tear_down_recv_cb already occurred",
676 				    tnf_opaque, class,
677 				    clientp->ic_client_info.client_class);
678 				IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
679 				    ibmf_i_do_recv_cb_end, IBMF_TNF_TRACE, "",
680 				    "ibmf_i_do_recv_cb() exit\n");
681 				return;
682 			}
683 			IBMF_ALT_RECV_CB_SETUP(qpp);
684 			mutex_exit(&qpp->isq_mutex);
685 		}
686 
687 		/*
688 		 * Allocate a message context
689 		 */
690 		msgimplp = (ibmf_msg_impl_t *)kmem_zalloc(
691 		    sizeof (ibmf_msg_impl_t), KM_NOSLEEP);
692 
693 		_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*msgimplp))
694 
695 		/* If we cannot allocate memory, drop the packet and clean up */
696 		if (msgimplp == NULL) {
697 			if (ibmf_qp_handle == IBMF_QP_HANDLE_DEFAULT) {
698 				mutex_enter(&clientp->ic_mutex);
699 				IBMF_RECV_CB_CLEANUP(clientp);
700 				mutex_exit(&clientp->ic_mutex);
701 			} else {
702 				qpp = (ibmf_alt_qp_t *)ibmf_qp_handle;
703 				mutex_enter(&qpp->isq_mutex);
704 				IBMF_ALT_RECV_CB_CLEANUP(qpp);
705 				mutex_exit(&qpp->isq_mutex);
706 			}
707 			(void) ibmf_i_repost_recv_buffer(clientp->ic_myci,
708 			    recv_wqep);
709 			IBMF_TRACE_1(IBMF_TNF_NODEBUG, DPRINT_L1,
710 			    ibmf_i_do_recv_cb_error, IBMF_TNF_ERROR, "",
711 			    "ibmf_i_do_recv_cb(): %s\n", tnf_string, msg,
712 			    "mem allocation failure");
713 			IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
714 			    ibmf_i_do_recv_cb_end, IBMF_TNF_TRACE, "",
715 			    "ibmf_i_do_recv_cb() exit\n");
716 			return;
717 		}
718 
719 		/* Get the port's base LID if it's not in the client context */
720 		if ((clientp->ic_base_lid == 0) &&
721 		    (clientp->ic_qp->iq_qp_num != 0)) {
722 			(void) ibt_get_port_state_byguid(
723 			    clientp->ic_client_info.ci_guid,
724 			    clientp->ic_client_info.port_num, NULL,
725 			    &clientp->ic_base_lid);
726 			if (clientp->ic_base_lid == 0) {
727 				IBMF_TRACE_1(IBMF_TNF_NODEBUG, DPRINT_L1,
728 				    ibmf_i_do_recv_cb_error, IBMF_TNF_ERROR, "",
729 				    "ibmf_i_do_recv_cb(): %s\n",
730 				    tnf_string, msg, "base_lid is undefined");
731 			}
732 		}
733 
734 		/* Set up address information */
735 		addrinfo.ia_local_lid = clientp->ic_base_lid +
736 		    wcp->wc_path_bits;
737 		addrinfo.ia_remote_lid = wcp->wc_slid;
738 		addrinfo.ia_remote_qno = wcp->wc_qpn;
739 
740 		/* Get the pkey, including the correct partiton membership */
741 		if (ibmf_qp_handle == IBMF_QP_HANDLE_DEFAULT) {
742 			if (recv_wqep->recv_qpp->iq_qp_num == IBMF_QP1_NUM) {
743 
744 				/*
745 				 * here too we expect the pkey index in the work
746 				 * completion belongs to a pkey in the pkey
747 				 * table
748 				 */
749 				status = ibmf_i_pkey_ix_to_key(
750 				    clientp->ic_myci, recv_wqep->recv_port_num,
751 				    wcp->wc_pkey_ix, &addrinfo.ia_p_key);
752 				if (status != IBMF_SUCCESS) {
753 					IBMF_TRACE_2(IBMF_TNF_NODEBUG,
754 					    DPRINT_L1, ibmf_i_do_recv_cb_error,
755 					    IBMF_TNF_ERROR, "",
756 					    "ibmf_i_do_recv_cb(): "
757 					    "get_pkey failed for ix %d,"
758 					    "status = %d\n", tnf_uint,
759 					    pkeyix, wcp->wc_pkey_ix, tnf_uint,
760 					    ibmf_status, status);
761 					mutex_enter(&clientp->ic_mutex);
762 					IBMF_RECV_CB_CLEANUP(clientp);
763 					mutex_exit(&clientp->ic_mutex);
764 					(void) ibmf_i_repost_recv_buffer(
765 					    clientp->ic_myci, recv_wqep);
766 					mutex_destroy(&msgimplp->im_mutex);
767 					cv_destroy(&msgimplp->im_trans_cv);
768 					kmem_free(msgimplp,
769 					    sizeof (ibmf_msg_impl_t));
770 					IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
771 					    ibmf_i_do_recv_cb_end,
772 					    IBMF_TNF_TRACE, "",
773 					    "ibmf_i_do_recv_cb() exit\n");
774 					return;
775 				}
776 			}
777 			addrinfo.ia_q_key = IBMF_MGMT_Q_KEY;
778 		} else {
779 			qpp = (ibmf_alt_qp_t *)ibmf_qp_handle;
780 
781 			/* For alternate QPs, the pkey is in the QP context */
782 			mutex_enter(&qpp->isq_mutex);
783 			addrinfo.ia_p_key = qpp->isq_pkey;
784 			addrinfo.ia_q_key = qpp->isq_qkey;
785 			mutex_exit(&qpp->isq_mutex);
786 		}
787 
788 		addrinfo.ia_service_level = wcp->wc_sl;
789 		msgimplp->im_local_addr = addrinfo;
790 
791 		/* Initialize the message context */
792 		cv_init(&msgimplp->im_trans_cv, NULL, CV_DRIVER, NULL);
793 		mutex_init(&msgimplp->im_mutex, NULL, MUTEX_DRIVER, NULL);
794 		msgimplp->im_client = clientp;
795 		msgimplp->im_qp_hdl = ibmf_qp_handle;
796 		msgimplp->im_flags = 0;
797 		msgimplp->im_unsolicited = B_TRUE;
798 		msgimplp->im_tid = b2h64(mad_hdr->TransactionID);
799 		msgimplp->im_mgt_class = mad_hdr->MgmtClass;
800 		msgimplp->im_retrans.retrans_retries = IBMF_RETRANS_DEF_RETRIES;
801 		msgimplp->im_retrans.retrans_rtv = IBMF_RETRANS_DEF_RTV;
802 		msgimplp->im_retrans.retrans_rttv = IBMF_RETRANS_DEF_RTTV;
803 		msgimplp->im_retrans.retrans_trans_to =
804 		    IBMF_RETRANS_DEF_TRANS_TO;
805 		msgimplp->im_rmpp_ctx.rmpp_state = IBMF_RMPP_STATE_UNDEFINED;
806 		msgimplp->im_rmpp_ctx.rmpp_respt = IBMF_RMPP_DEFAULT_RRESPT;
807 		IBMF_MSG_INCR_REFCNT(msgimplp);
808 		msgimplp->im_trans_state_flags = IBMF_TRANS_STATE_FLAG_UNINIT;
809 
810 		/*
811 		 * Initialize (and possibly allocate) the IBT UD destination
812 		 * address handle.
813 		 */
814 		status = ibmf_i_alloc_ud_dest(clientp, msgimplp,
815 		    &msgimplp->im_ud_dest, B_FALSE);
816 		if (status != IBMF_SUCCESS) {
817 			if (ibmf_qp_handle == IBMF_QP_HANDLE_DEFAULT) {
818 				mutex_enter(&clientp->ic_mutex);
819 				IBMF_RECV_CB_CLEANUP(clientp);
820 				mutex_exit(&clientp->ic_mutex);
821 			} else {
822 				qpp = (ibmf_alt_qp_t *)ibmf_qp_handle;
823 				mutex_enter(&qpp->isq_mutex);
824 				IBMF_ALT_RECV_CB_CLEANUP(qpp);
825 				mutex_exit(&qpp->isq_mutex);
826 			}
827 			(void) ibmf_i_repost_recv_buffer(clientp->ic_myci,
828 			    recv_wqep);
829 			mutex_destroy(&msgimplp->im_mutex);
830 			cv_destroy(&msgimplp->im_trans_cv);
831 			kmem_free(msgimplp, sizeof (ibmf_msg_impl_t));
832 			IBMF_TRACE_2(IBMF_TNF_NODEBUG, DPRINT_L1,
833 			    ibmf_i_do_recv_cb_error, IBMF_TNF_ERROR, "",
834 			    "ibmf_i_do_recv_cb(): %s, status = %d\n",
835 			    tnf_string, msg, "alloc ah failed", tnf_uint,
836 			    ibmf_status, status);
837 			IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
838 			    ibmf_i_do_recv_cb_end, IBMF_TNF_TRACE, "",
839 			    "ibmf_i_do_recv_cb() exit\n");
840 			return;
841 		}
842 
843 		_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*msgimplp))
844 
845 		/* add message to client's list */
846 		ibmf_i_client_add_msg(clientp, msgimplp);
847 
848 		mutex_enter(&msgimplp->im_mutex);
849 
850 		/* no one should have touched our state */
851 		ASSERT(msgimplp->im_trans_state_flags ==
852 		    IBMF_TRANS_STATE_FLAG_UNINIT);
853 
854 		/* transition out of uninit state */
855 		msgimplp->im_trans_state_flags = IBMF_TRANS_STATE_FLAG_INIT;
856 	}
857 
858 	/* fill in the grh with the contents of the recv wqe */
859 	if (grhpresent == B_TRUE) {
860 		uint32_t tmp32;
861 
862 		msgimplp->im_msg_flags |= IBMF_MSG_FLAGS_GLOBAL_ADDRESS;
863 		ib_grh = (ib_grh_t *)recv_wqep->recv_mem;
864 		msgimplp->im_global_addr.ig_sender_gid.gid_prefix =
865 		    b2h64(ib_grh->SGID.gid_prefix);
866 		msgimplp->im_global_addr.ig_sender_gid.gid_guid =
867 		    b2h64(ib_grh->SGID.gid_guid);
868 		msgimplp->im_global_addr.ig_recver_gid.gid_prefix =
869 		    b2h64(ib_grh->DGID.gid_prefix);
870 		msgimplp->im_global_addr.ig_recver_gid.gid_guid =
871 		    b2h64(ib_grh->DGID.gid_guid);
872 		/*
873 		 * swap to get byte order back to wire format on little endian
874 		 * systems so we can apply the GRH masks
875 		 */
876 		tmp32 = b2h32(ib_grh->IPVer_TC_Flow);
877 		msgimplp->im_global_addr.ig_flow_label =
878 		    tmp32 & IB_GRH_FLOW_LABEL_MASK;
879 		msgimplp->im_global_addr.ig_tclass =
880 		    (tmp32 & IB_GRH_TCLASS_MASK) >> 20;
881 		msgimplp->im_global_addr.ig_hop_limit =
882 		    ib_grh->HopLmt;
883 	}
884 
885 	/* Perform RMPP or non-RMPP processing */
886 	if (ibmf_i_is_rmpp(clientp, ibmf_qp_handle) &&
887 	    (rmpp_hdr->rmpp_flags & IBMF_RMPP_FLAGS_ACTIVE)) {
888 		IBMF_TRACE_5(IBMF_TNF_DEBUG, DPRINT_L3,
889 		    ibmf_i_do_recv_cb, IBMF_TNF_TRACE, "",
890 		    "ibmf_i_do_recv_cb(): %s, tid = %016" PRIx64 ","
891 		    "flags = 0x%x rmpp_type = %d, rmpp_segnum = %d\n",
892 		    tnf_string, msg, "Handling rmpp MAD",
893 		    tnf_opaque, tid, b2h64(mad_hdr->TransactionID),
894 		    tnf_opaque, flags, rmpp_hdr->rmpp_flags,
895 		    tnf_opaque, type, rmpp_hdr->rmpp_type,
896 		    tnf_opaque, segment, b2h32(rmpp_hdr->rmpp_segnum));
897 
898 		/*
899 		 * Set the RMPP state to "receiver active" on the first packet
900 		 * of all RMPP message, and initialize the
901 		 * the expected segment to 1.
902 		 */
903 		if ((msgimplp->im_rmpp_ctx.rmpp_state ==
904 		    IBMF_RMPP_STATE_UNDEFINED) &&
905 		    (rmpp_hdr->rmpp_flags & IBMF_RMPP_FLAGS_FIRST_PKT)) {
906 
907 			msgimplp->im_flags |= IBMF_MSG_FLAGS_RECV_RMPP;
908 
909 			if (rmpp_hdr->rmpp_type == IBMF_RMPP_TYPE_DATA) {
910 				msgimplp->im_rmpp_ctx.rmpp_state =
911 				    IBMF_RMPP_STATE_RECEVR_ACTIVE;
912 
913 				IBMF_TRACE_2(IBMF_TNF_DEBUG, DPRINT_L3,
914 				    ibmf_i_do_recv_cb, IBMF_TNF_TRACE, "",
915 				    "ibmf_i_do_recv_cb(): %s, msgimplp = %p\n",
916 				    tnf_string, msg, "first RMPP pkt received",
917 				    tnf_opaque, msgimplp, msgimplp);
918 			}
919 
920 			msgimplp->im_rmpp_ctx.rmpp_es = 1;
921 			msgimplp->im_rmpp_ctx.rmpp_wl = 1;
922 			msgimplp->im_rmpp_ctx.rmpp_wf = 1;
923 
924 			/* set double-sided transfer flag for certain methods */
925 			if (mad_hdr->R_Method == SA_SUBN_ADM_GET_MULTI)
926 				msgimplp->im_rmpp_ctx.rmpp_is_ds = B_TRUE;
927 			else	msgimplp->im_rmpp_ctx.rmpp_is_ds = B_FALSE;
928 
929 			msgimplp->im_trans_state_flags |=
930 			    IBMF_TRANS_STATE_FLAG_RECV_ACTIVE;
931 		}
932 
933 		if (rmpp_hdr->rmpp_resp_time != IBMF_RMPP_DEFAULT_RRESPT) {
934 			msgimplp->im_retrans.retrans_rtv =
935 			    1 << rmpp_hdr->rmpp_resp_time;
936 
937 			IBMF_TRACE_2(IBMF_TNF_DEBUG, DPRINT_L3,
938 			    ibmf_i_do_recv_cb, IBMF_TNF_TRACE, "",
939 			    "ibmf_i_do_recv_cb: %s, resp_time %d\n",
940 			    tnf_string, msg, "new resp time received",
941 			    tnf_uint, resp_time, rmpp_hdr->rmpp_resp_time);
942 		}
943 
944 		ibmf_i_handle_rmpp(clientp, ibmf_qp_handle, msgimplp,
945 		    (uchar_t *)((uintptr_t)recv_wqep->recv_mem +
946 		    sizeof (ib_grh_t)));
947 	} else {
948 
949 		msgimplp->im_trans_state_flags |=
950 		    IBMF_TRANS_STATE_FLAG_RECV_ACTIVE;
951 
952 		ibmf_i_handle_non_rmpp(clientp, msgimplp,
953 		    (uchar_t *)((uintptr_t)recv_wqep->recv_mem +
954 		    sizeof (ib_grh_t)));
955 	}
956 
957 	msg_rp_unset_id = msg_tr_unset_id = msg_rp_set_id = msg_tr_set_id = 0;
958 
959 	/*
960 	 * Save the transaction state flags and the timeout IDs
961 	 * before releasing the mutex as they may be changed after that.
962 	 */
963 	msg_trans_state_flags = msgimplp->im_trans_state_flags;
964 	msg_flags = msgimplp->im_flags;
965 	msg_rp_unset_id = msgimplp->im_rp_unset_timeout_id;
966 	msg_tr_unset_id = msgimplp->im_tr_unset_timeout_id;
967 	msgimplp->im_rp_unset_timeout_id = 0;
968 	msgimplp->im_tr_unset_timeout_id = 0;
969 
970 	/*
971 	 * Decrement the message reference count
972 	 * This count was incremented either when the message was found
973 	 * on the client's message list (ibmf_i_find_msg()) or when
974 	 * a new message was created for unsolicited data
975 	 */
976 	IBMF_MSG_DECR_REFCNT(msgimplp);
977 
978 	if (msg_trans_state_flags & IBMF_TRANS_STATE_FLAG_DONE) {
979 		if (msgimplp->im_rp_timeout_id != 0) {
980 			msg_rp_set_id = msgimplp->im_rp_timeout_id;
981 			msgimplp->im_rp_timeout_id = 0;
982 		}
983 		if (msgimplp->im_tr_timeout_id != 0) {
984 			msg_tr_set_id = msgimplp->im_tr_timeout_id;
985 			msgimplp->im_tr_timeout_id = 0;
986 		}
987 	}
988 
989 	mutex_exit(&msgimplp->im_mutex);
990 
991 	/*
992 	 * Call untimeout() after releasing the lock because the
993 	 * lock is acquired in the timeout handler as well. Untimeout()
994 	 * does not return until the timeout handler has run, if it already
995 	 * fired, which would result in a deadlock if we did not first
996 	 * release the im_mutex lock.
997 	 */
998 	if (msg_rp_unset_id != 0) {
999 		(void) untimeout(msg_rp_unset_id);
1000 	}
1001 
1002 	if (msg_tr_unset_id != 0) {
1003 		(void) untimeout(msg_tr_unset_id);
1004 	}
1005 
1006 	if (msg_rp_set_id != 0) {
1007 		(void) untimeout(msg_rp_set_id);
1008 	}
1009 
1010 	if (msg_tr_set_id != 0) {
1011 		(void) untimeout(msg_tr_set_id);
1012 	}
1013 
1014 	/* Increment the kstats for number of messages received */
1015 	mutex_enter(&clientp->ic_kstat_mutex);
1016 	IBMF_ADD32_KSTATS(clientp, msgs_received, 1);
1017 	mutex_exit(&clientp->ic_kstat_mutex);
1018 
1019 	/*
1020 	 * now that we are done gleaning all we want out of the receive
1021 	 * completion, we repost the receive request.
1022 	 */
1023 	(void) ibmf_i_repost_recv_buffer(clientp->ic_myci, recv_wqep);
1024 
1025 	/*
1026 	 * If the transaction flags indicate a completed transaction,
1027 	 * notify the client
1028 	 */
1029 	if (msg_trans_state_flags & IBMF_TRANS_STATE_FLAG_DONE) {
1030 		IBMF_TRACE_2(IBMF_TNF_DEBUG, DPRINT_L3,
1031 		    ibmf_i_do_recv_cb, IBMF_TNF_TRACE, "",
1032 		    "ibmf_i_do_recv_cb(): %s, msgimplp = %p\n",
1033 		    tnf_string, msg, "notifying client",
1034 		    tnf_opaque, msgimplp, msgimplp);
1035 
1036 		/* Remove the message from the client's message list */
1037 		ibmf_i_client_rem_msg(clientp, msgimplp, &ref_cnt);
1038 
1039 		/*
1040 		 * Notify the client if the message reference count is zero.
1041 		 * At this point, we know that the transaction is done and
1042 		 * the message has been removed from the client's message list.
1043 		 * So, we only need to make sure the reference count is zero
1044 		 * before notifying the client.
1045 		 */
1046 		if (ref_cnt == 0) {
1047 
1048 			ibmf_i_notify_sequence(clientp, msgimplp, msg_flags);
1049 
1050 		}
1051 	}
1052 
1053 	IBMF_TRACE_1(IBMF_TNF_DEBUG, DPRINT_L4,
1054 	    ibmf_i_do_recv_cb_end, IBMF_TNF_TRACE, "",
1055 	    "ibmf_i_do_recv_cb() exit, msgimplp = %p\n",
1056 	    tnf_opaque, msgimplp, msgimplp);
1057 }
1058 
1059 /*
1060  * ibmf_i_handle_non_rmpp():
1061  *	Handle non-RMPP processing of an incoming IB packet
1062  */
1063 void
1064 ibmf_i_handle_non_rmpp(ibmf_client_t *clientp, ibmf_msg_impl_t *msgimplp,
1065     uchar_t *mad)
1066 {
1067 	ibmf_rmpp_ctx_t	*rmpp_ctx = &msgimplp->im_rmpp_ctx;
1068 	ib_mad_hdr_t	*mad_hdr;
1069 	size_t		offset;
1070 	uchar_t		*msgbufp;
1071 	uint32_t	clhdrsz, clhdroff;
1072 
1073 	IBMF_TRACE_3(IBMF_TNF_DEBUG, DPRINT_L4,
1074 	    ibmf_i_handle_non_rmpp_start, IBMF_TNF_TRACE, "",
1075 	    "ibmf_i_handle_non_rmpp(): clientp = 0x%p, "
1076 	    "msgp = 0x%p, madp = 0x%p\n", tnf_opaque, clientp, clientp,
1077 	    tnf_opaque, msg, msgimplp, tnf_opaque, mad, mad);
1078 
1079 	ASSERT(MUTEX_HELD(&msgimplp->im_mutex));
1080 
1081 	/* Get the MAD header */
1082 	mad_hdr = (ib_mad_hdr_t *)mad;
1083 
1084 	/* Determine the MAD's class header size */
1085 	ibmf_i_mgt_class_to_hdr_sz_off(mad_hdr->MgmtClass, &clhdrsz, &clhdroff);
1086 
1087 	/* Allocate the message receive buffers if not already allocated */
1088 	if (msgimplp->im_msgbufs_recv.im_bufs_mad_hdr == NULL) {
1089 
1090 		msgimplp->im_msgbufs_recv.im_bufs_mad_hdr =
1091 		    (ib_mad_hdr_t *)kmem_zalloc(IBMF_MAD_SIZE, KM_NOSLEEP);
1092 		if (msgimplp->im_msgbufs_recv.im_bufs_mad_hdr == NULL) {
1093 
1094 			IBMF_TRACE_1(IBMF_TNF_NODEBUG, DPRINT_L1,
1095 			    ibmf_i_handle_non_rmpp_err, IBMF_TNF_ERROR, "",
1096 			    "ibmf_i_handle_non_rmpp(): %s\n", tnf_string, msg,
1097 			    "mem allocation failure (non-rmpp payload)");
1098 
1099 			IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
1100 			    ibmf_i_handle_non_rmpp_end, IBMF_TNF_TRACE, "",
1101 			    "ibmf_i_handle_non_rmpp() exit\n");
1102 
1103 			return;
1104 		}
1105 		mutex_enter(&clientp->ic_kstat_mutex);
1106 		IBMF_ADD32_KSTATS(clientp, recv_bufs_alloced, 1);
1107 		mutex_exit(&clientp->ic_kstat_mutex);
1108 	}
1109 
1110 	/* Get a pointer to the MAD location in the receive buffer */
1111 	msgbufp = (uchar_t *)msgimplp->im_msgbufs_recv.im_bufs_mad_hdr;
1112 
1113 	/* Copy the incoming MAD into the receive buffer */
1114 	bcopy((const void *)mad, (void *)msgbufp, IBMF_MAD_SIZE);
1115 
1116 	/* Get the offset of the class header */
1117 	offset = sizeof (ib_mad_hdr_t) + clhdroff;
1118 
1119 	/* initialize class header pointer */
1120 	if (clhdrsz == 0) {
1121 		msgimplp->im_msgbufs_recv.im_bufs_cl_hdr = NULL;
1122 	} else {
1123 		msgimplp->im_msgbufs_recv.im_bufs_cl_hdr =
1124 		    (void *)(msgbufp + offset);
1125 	}
1126 	msgimplp->im_msgbufs_recv.im_bufs_cl_hdr_len = clhdrsz;
1127 
1128 	offset += clhdrsz;
1129 
1130 	/* initialize data area pointer */
1131 	msgimplp->im_msgbufs_recv.im_bufs_cl_data = (void *)(msgbufp + offset);
1132 	msgimplp->im_msgbufs_recv.im_bufs_cl_data_len = IBMF_MAD_SIZE -
1133 	    sizeof (ib_mad_hdr_t) - clhdroff - clhdrsz;
1134 
1135 	rmpp_ctx->rmpp_state = IBMF_RMPP_STATE_DONE;
1136 	ibmf_i_terminate_transaction(clientp, msgimplp, IBMF_SUCCESS);
1137 
1138 	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,	ibmf_i_handle_non_rmpp_end,
1139 	    IBMF_TNF_TRACE, "", "ibmf_i_handle_non_rmpp() exit\n");
1140 }
1141 
1142 /*
1143  * ibmf_i_repost_recv_buffer():
1144  *	Repost a WQE to the RQ after processing it
1145  */
1146 /* ARGSUSED */
1147 int
1148 ibmf_i_repost_recv_buffer(ibmf_ci_t *cip, ibmf_recv_wqe_t *recv_wqep)
1149 {
1150 	int			ret;
1151 	ibt_status_t		status;
1152 	ibmf_qp_handle_t	ibmf_qp_handle = recv_wqep->recv_ibmf_qp_handle;
1153 	struct kmem_cache	*kmem_cachep;
1154 	ibmf_alt_qp_t		*altqp;
1155 	ibmf_qp_t		*qpp;
1156 
1157 	IBMF_TRACE_2(IBMF_TNF_DEBUG, DPRINT_L4,
1158 	    ibmf_i_repost_recv_buffer_start, IBMF_TNF_TRACE, "",
1159 	    "ibmf_i_repost_recv_buffer() enter, cip = %p, rwqep = %p\n",
1160 	    tnf_opaque, cip, cip, tnf_opaque, rwqep, recv_wqep);
1161 
1162 	ASSERT(MUTEX_NOT_HELD(&cip->ci_mutex));
1163 
1164 	/* Get the WQE kmem cache pointer based on the QP type */
1165 	if (ibmf_qp_handle == IBMF_QP_HANDLE_DEFAULT) {
1166 		kmem_cachep = cip->ci_recv_wqes_cache;
1167 		qpp = recv_wqep->recv_qpp;
1168 	} else {
1169 		altqp = (ibmf_alt_qp_t *)ibmf_qp_handle;
1170 		kmem_cachep = altqp->isq_recv_wqes_cache;
1171 	}
1172 
1173 	/* post recv wqe; free it if the post fails */
1174 	status = ibt_post_recv(recv_wqep->recv_qp_handle, &recv_wqep->recv_wr,
1175 	    1, NULL);
1176 
1177 	ret = ibmf_i_ibt_to_ibmf_status(status);
1178 	if (ret != IBMF_SUCCESS) {
1179 		IBMF_TRACE_2(IBMF_TNF_NODEBUG, DPRINT_L1,
1180 		    ibmf_i_repost_recv_buffer_err, IBMF_TNF_ERROR, "",
1181 		    "ibmf_i_repost_recv_buffer(): %s, status = %d\n",
1182 		    tnf_string, msg, "repost_recv failed", tnf_uint,
1183 		    ibt_status, status);
1184 		kmem_free(recv_wqep->recv_wr.wr_sgl,
1185 		    IBMF_MAX_RQ_WR_SGL_ELEMENTS * sizeof (ibt_wr_ds_t));
1186 		kmem_cache_free(kmem_cachep, recv_wqep);
1187 		mutex_enter(&cip->ci_mutex);
1188 		IBMF_SUB32_PORT_KSTATS(cip, recv_wqes_alloced, 1);
1189 		mutex_exit(&cip->ci_mutex);
1190 		if (ibmf_qp_handle == IBMF_QP_HANDLE_DEFAULT) {
1191 			mutex_enter(&cip->ci_mutex);
1192 			cip->ci_wqes_alloced--;
1193 			if (cip->ci_wqes_alloced == 0)
1194 				cv_signal(&cip->ci_wqes_cv);
1195 			mutex_exit(&cip->ci_mutex);
1196 		} else {
1197 			mutex_enter(&altqp->isq_mutex);
1198 			altqp->isq_wqes_alloced--;
1199 			if (altqp->isq_wqes_alloced == 0)
1200 				cv_signal(&altqp->isq_wqes_cv);
1201 			mutex_exit(&altqp->isq_mutex);
1202 		}
1203 	}
1204 
1205 	if (ibmf_qp_handle == IBMF_QP_HANDLE_DEFAULT) {
1206 		mutex_enter(&qpp->iq_mutex);
1207 		qpp->iq_rwqes_posted++;
1208 		mutex_exit(&qpp->iq_mutex);
1209 	} else {
1210 		mutex_enter(&altqp->isq_mutex);
1211 		altqp->isq_rwqes_posted++;
1212 		mutex_exit(&altqp->isq_mutex);
1213 	}
1214 
1215 	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_repost_recv_buffer_end,
1216 	    IBMF_TNF_TRACE, "", "ibmf_i_repost_recv_buffer() exit\n");
1217 	return (ret);
1218 }
1219 
1220 /*
1221  * ibmf_i_get_class:
1222  * Parses the mad header and determines which class should be notified of the
1223  * notification.
1224  *
1225  * Input Argument
1226  * madhdrp    contents of mad header for the packet
1227  *
1228  * Output Argument
1229  * dest_classp pointer to the class type of the client that should be notified
1230  *
1231  * Returns
1232  * status
1233  */
1234 static int
1235 ibmf_i_get_class(ib_mad_hdr_t *madhdrp, ibmf_qp_handle_t dest_ibmf_qp_handle,
1236     ib_lid_t slid, ibmf_client_type_t *dest_classp)
1237 {
1238 	int		method = madhdrp->R_Method;
1239 	int		attrib = b2h16(madhdrp->AttributeID);
1240 	int		class = madhdrp->MgmtClass;
1241 	uint32_t	attrib_mod = b2h32(madhdrp->AttributeModifier);
1242 
1243 	IBMF_TRACE_4(IBMF_TNF_DEBUG, DPRINT_L4,
1244 	    ibmf_i_get_class_start, IBMF_TNF_TRACE, "",
1245 	    "ibmf_i_get_class() enter, class = 0x%x, method = 0x%x, "
1246 	    "attribute = 0x%x, dest_qp_hdl = 0x%p\n",
1247 	    tnf_opaque, class, class,
1248 	    tnf_opaque, method, method,
1249 	    tnf_opaque, attrib, attrib,
1250 	    tnf_opaque, ibmf_qp_handle, dest_ibmf_qp_handle);
1251 
1252 	/* set default for error checking */
1253 	*dest_classp = 0;
1254 
1255 	/*
1256 	 * Determine the class type
1257 	 */
1258 	switch (class) {
1259 	case MAD_MGMT_CLASS_SUBN_LID_ROUTED:
1260 	case MAD_MGMT_CLASS_SUBN_DIRECT_ROUTE:
1261 
1262 		/*
1263 		 * tavor generates trap by sending mad with slid 0;
1264 		 * deliver this to SMA
1265 		 */
1266 		if ((method == MAD_METHOD_TRAP) && (slid == 0)) {
1267 			*dest_classp = SUBN_AGENT;
1268 			break;
1269 		}
1270 
1271 		/* this is derived from table 109 of IB Spec 1.1, vol1 */
1272 		if (attrib == SM_SMINFO_ATTRID || method == MAD_METHOD_TRAP ||
1273 		    method == MAD_METHOD_GET_RESPONSE)
1274 			*dest_classp = SUBN_MANAGER;
1275 		else
1276 			*dest_classp = SUBN_AGENT;
1277 
1278 		break;
1279 	case MAD_MGMT_CLASS_SUBN_ADM:
1280 
1281 		/*
1282 		 * Deliver to SA client (agent) if packet was sent to default qp
1283 		 * Deliver to ibmf_saa client (manager) if packet was sent to
1284 		 * alternate qp
1285 		 */
1286 		if (dest_ibmf_qp_handle == IBMF_QP_HANDLE_DEFAULT)
1287 			*dest_classp = SUBN_ADM_AGENT;
1288 		else
1289 			*dest_classp = SUBN_ADM_MANAGER;
1290 		break;
1291 	case MAD_MGMT_CLASS_PERF:
1292 
1293 		/* Deliver to PM if response bit is set */
1294 		if ((method & MAD_RESPONSE_BIT_MASK) == MAD_RESPONSE_BIT)
1295 			*dest_classp = PERF_MANAGER;
1296 		else
1297 			*dest_classp = PERF_AGENT;
1298 		break;
1299 	case MAD_MGMT_CLASS_BM:
1300 
1301 		/*
1302 		 * Deliver to BM if response bit is set, packet is a trap,
1303 		 * or packet is a BMSend
1304 		 */
1305 		if (((method & MAD_RESPONSE_BIT_MASK) == MAD_RESPONSE_BIT) ||
1306 		    (method == MAD_METHOD_TRAP) ||
1307 		    ((method == MAD_METHOD_SEND) &&
1308 		    ((attrib_mod & IBMF_BM_MAD_ATTR_MOD_REQRESP_BIT) ==
1309 		    IBMF_BM_MAD_ATTR_MOD_RESP)))
1310 			*dest_classp = BM_MANAGER;
1311 		else
1312 			*dest_classp = BM_AGENT;
1313 
1314 		break;
1315 	case MAD_MGMT_CLASS_DEV_MGT:
1316 
1317 		/* Deliver to DM if response bit is set or packet is a trap */
1318 		if (((method & MAD_RESPONSE_BIT_MASK) == MAD_RESPONSE_BIT) ||
1319 		    (method == MAD_METHOD_TRAP))
1320 			*dest_classp = DEV_MGT_MANAGER;
1321 		else
1322 			*dest_classp = DEV_MGT_AGENT;
1323 		break;
1324 	case MAD_MGMT_CLASS_COMM_MGT:
1325 		*dest_classp = COMM_MGT_MANAGER_AGENT;
1326 		break;
1327 	case MAD_MGMT_CLASS_SNMP:
1328 		*dest_classp = SNMP_MANAGER_AGENT;
1329 		break;
1330 	default:
1331 
1332 		if ((class >= MAD_MGMT_CLASS_VENDOR_START) &&
1333 		    (class <= MAD_MGMT_CLASS_VENDOR_END)) {
1334 			*dest_classp = VENDOR_09_MANAGER_AGENT +
1335 			    (class - MAD_MGMT_CLASS_VENDOR_START);
1336 		} else if ((class >= MAD_MGMT_CLASS_VENDOR2_START) &&
1337 		    (class <= MAD_MGMT_CLASS_VENDOR2_END)) {
1338 			*dest_classp = VENDOR_30_MANAGER_AGENT +
1339 			    (class - MAD_MGMT_CLASS_VENDOR2_START);
1340 		} else if ((class >= MAD_MGMT_CLASS_APPLICATION_START) &&
1341 		    (class <= MAD_MGMT_CLASS_APPLICATION_END)) {
1342 			*dest_classp = APPLICATION_10_MANAGER_AGENT +
1343 			    (class - MAD_MGMT_CLASS_APPLICATION_START);
1344 		}
1345 
1346 		break;
1347 	}
1348 
1349 	if (*dest_classp == 0) {
1350 		IBMF_TRACE_2(IBMF_TNF_NODEBUG, DPRINT_L1,
1351 		    ibmf_i_get_class_type_err, IBMF_TNF_TRACE, "",
1352 		    "ibmf_i_get_class(): %s, class = 0x%x\n",
1353 		    tnf_string, msg, "invalid class", tnf_opaque, class, class);
1354 		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_get_class_end,
1355 		    IBMF_TNF_TRACE, "", "ibmf_i_get_class() exit\n");
1356 		return (IBMF_FAILURE);
1357 	}
1358 
1359 	IBMF_TRACE_1(IBMF_TNF_DEBUG, DPRINT_L4,
1360 	    ibmf_i_get_class_end, IBMF_TNF_TRACE, "",
1361 	    "ibmf_i_get_class() exit, class = 0x%x\n",
1362 	    tnf_opaque, class, *dest_classp);
1363 
1364 	return (IBMF_SUCCESS);
1365 }
1366 
1367 /*
1368  * ibmf_get_mod_name():
1369  * Constructs the module name based on the naming convention described in
1370  * PSARC case 2003/753.
1371  * The name should be "sunwibmgt<MgtClass><a_m>
1372  * where:
1373  *	MgtClass = Management class field in the MAD header.
1374  *		   Two lower-case characters are used to represent
1375  *		   this 8-bit value as 2 hex digits.
1376  *	a_m	 = "a" if the client is an agent-only module
1377  *		   "m" if the client is a manager-only module
1378  *		   ""  if the client is both agent and manager.
1379  *
1380  * Input Argument
1381  * mad_class	management class in the MAD header
1382  * class	IBMF management class of incoming MAD
1383  *
1384  * Output Argument
1385  * modname	pointer to the character array that holds the module name
1386  *
1387  * Status
1388  * None
1389  */
1390 static void
1391 ibmf_get_mod_name(uint8_t mad_class, ibmf_client_type_t class, char *modname)
1392 {
1393 	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_get_mod_name_start,
1394 	    IBMF_TNF_TRACE, "", "ibmf_get_mod_name_qphdl() enter\n");
1395 
1396 	if (AGENT_CLASS(class)) {
1397 		(void) sprintf(modname, "sunwibmgt%02xa", mad_class);
1398 	} else if (MANAGER_CLASS(class)) {
1399 		(void) sprintf(modname, "sunwibmgt%02xm", mad_class);
1400 	} else {
1401 		/* AGENT+MANAGER class */
1402 		(void) sprintf(modname, "sunwibmgt%02x", mad_class);
1403 	}
1404 
1405 	IBMF_TRACE_1(IBMF_TNF_DEBUG, DPRINT_L3, ibmf_get_mod_name,
1406 	    IBMF_TNF_TRACE, "", "ibmf_get_mod_name(): name = %s\n",
1407 	    tnf_string, msg, modname);
1408 
1409 	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_get_mod_name_end,
1410 	    IBMF_TNF_TRACE, "", "ibmf_get_mod_name() exit\n");
1411 }
1412 
1413 /*
1414  * ibmf_send_busy():
1415  *
1416  * When a MAD request is received for an IB mandatory agent (BMA or PMA),
1417  * which has not yet registered with IBMF, IBMF returns a BUSY MAD
1418  * to the source of the request to solicit a retry while IBMF attempts
1419  * to load the mandatory agent.
1420  * A temporary, alternate QP is allocated for the purpose of sending the
1421  * MAD. This QP is configured to be in the same partition as the manager
1422  * that sent the request.
1423  *
1424  * Input Argument
1425  * modlargsp	Pointer to ibmf_mod_load_args_t structure
1426  *
1427  * Output Argument
1428  * None
1429  *
1430  * Status
1431  * None
1432  */
1433 static void
1434 ibmf_send_busy(ibmf_mod_load_args_t *modlargsp)
1435 {
1436 	ibmf_ci_t		*cip = modlargsp->cip;
1437 	ibmf_recv_wqe_t		*recv_wqep = modlargsp->recv_wqep;
1438 	ibt_wr_ds_t		sgl[1];
1439 	ibmf_send_wqe_t		*send_wqep;
1440 	ibt_send_wr_t		*swrp;
1441 	ibmf_msg_impl_t 	*msgimplp;
1442 	ibmf_ud_dest_t		*ibmf_ud_dest;
1443 	ibt_ud_dest_t		*ud_dest;
1444 	ib_mad_hdr_t		*smadhdrp, *rmadhdrp;
1445 	ibt_adds_vect_t		adds_vec;
1446 	ibt_wc_t		*wcp = &recv_wqep->recv_wc;
1447 	ibt_status_t		ibtstatus;
1448 	uint_t			num_work_reqs;
1449 	ibt_qp_alloc_attr_t	qp_attrs;
1450 	ibt_qp_info_t		qp_modify_attr;
1451 	ibt_chan_sizes_t	qp_sizes;
1452 	ib_qpn_t		qp_num;
1453 	ibt_qp_hdl_t		ibt_qp_handle;
1454 	ibt_mr_hdl_t		mem_hdl;
1455 	ibt_mr_desc_t		mem_desc;
1456 	ibt_mr_attr_t		mem_attr;
1457 
1458 	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_send_busy_start,
1459 	    IBMF_TNF_TRACE, "", "ibmf_send_busy() enter\n");
1460 
1461 	/* setup the qp attrs for the alloc call */
1462 	qp_attrs.qp_scq_hdl = cip->ci_alt_cq_handle;
1463 	qp_attrs.qp_rcq_hdl = cip->ci_alt_cq_handle;
1464 	qp_attrs.qp_pd_hdl = cip->ci_pd;
1465 	qp_attrs.qp_sizes.cs_sq_sgl = IBMF_MAX_SQ_WR_SGL_ELEMENTS;
1466 	qp_attrs.qp_sizes.cs_rq_sgl = IBMF_MAX_RQ_WR_SGL_ELEMENTS;
1467 	qp_attrs.qp_sizes.cs_sq = ibmf_send_wqes_posted_per_qp;
1468 	qp_attrs.qp_sizes.cs_rq = ibmf_recv_wqes_posted_per_qp;
1469 	qp_attrs.qp_flags = IBT_ALL_SIGNALED;
1470 	qp_attrs.qp_alloc_flags = IBT_QP_NO_FLAGS;
1471 
1472 	/* request IBT for a qp with the desired attributes */
1473 	ibtstatus = ibt_alloc_qp(cip->ci_ci_handle, IBT_UD_RQP,
1474 	    &qp_attrs, &qp_sizes, &qp_num, &ibt_qp_handle);
1475 	if (ibtstatus != IBT_SUCCESS) {
1476 		IBMF_TRACE_2(IBMF_TNF_NODEBUG, DPRINT_L1, ibmf_send_busy_err,
1477 		    IBMF_TNF_ERROR, "", "ibmf_send_busy(): %s, status = %d\n",
1478 		    tnf_string, msg, "failed to allocate alternate QP",
1479 		    tnf_int, ibt_status, ibtstatus);
1480 		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_send_busy_end,
1481 		    IBMF_TNF_TRACE, "", "ibmf_send_busy() exit\n");
1482 		return;
1483 	}
1484 
1485 	qp_modify_attr.qp_trans = IBT_UD_SRV;
1486 	qp_modify_attr.qp_flags = IBT_CEP_NO_FLAGS;
1487 	qp_modify_attr.qp_transport.ud.ud_qkey = IB_GSI_QKEY;
1488 	qp_modify_attr.qp_transport.ud.ud_sq_psn = 0;
1489 	qp_modify_attr.qp_transport.ud.ud_pkey_ix = wcp->wc_pkey_ix;
1490 	qp_modify_attr.qp_transport.ud.ud_port = recv_wqep->recv_port_num;
1491 
1492 	/* call the IB transport to initialize the QP */
1493 	ibtstatus = ibt_initialize_qp(ibt_qp_handle, &qp_modify_attr);
1494 	if (ibtstatus != IBT_SUCCESS) {
1495 		(void) ibt_free_qp(ibt_qp_handle);
1496 		IBMF_TRACE_2(IBMF_TNF_NODEBUG, DPRINT_L1, ibmf_send_busy_err,
1497 		    IBMF_TNF_ERROR, "", "ibmf_send_busy(): %s, status = %d\n",
1498 		    tnf_string, msg, "failed to initialize alternate QP",
1499 		    tnf_int, ibt_status, ibtstatus);
1500 		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_send_busy_end,
1501 		    IBMF_TNF_TRACE, "", "ibmf_send_busy() exit\n");
1502 		return;
1503 	}
1504 
1505 	/* allocate the message context */
1506 	msgimplp = (ibmf_msg_impl_t *)kmem_zalloc(sizeof (ibmf_msg_impl_t),
1507 	    KM_SLEEP);
1508 
1509 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*msgimplp))
1510 
1511 	ibmf_i_pop_ud_dest_thread(cip);
1512 
1513 	/*
1514 	 * Get a UD dest structure from the pool, this will not fail
1515 	 * because ibmf_i_pop_ud_dest_thread() calls
1516 	 * ibmf_i_populate_ud_dest_list with the KM_SLEEP flag.
1517 	 */
1518 	ibmf_ud_dest = ibmf_i_get_ud_dest(cip);
1519 
1520 	msgimplp->im_ibmf_ud_dest = ibmf_ud_dest;
1521 	msgimplp->im_ud_dest = &ibmf_ud_dest->ud_dest;
1522 	msgimplp->im_qp_hdl = NULL;
1523 
1524 	/*
1525 	 * Reset send_done to indicate we have not received the completion
1526 	 * for this send yet.
1527 	 */
1528 	msgimplp->im_trans_state_flags &= ~IBMF_TRANS_STATE_FLAG_SEND_DONE;
1529 
1530 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*send_wqep))
1531 
1532 	/*
1533 	 * Allocate resources needed to send a UD packet including the
1534 	 * send WQE context
1535 	 */
1536 	send_wqep = (ibmf_send_wqe_t *)kmem_zalloc(sizeof (ibmf_send_wqe_t),
1537 	    KM_SLEEP);
1538 	send_wqep->send_mem = (void *)kmem_zalloc(IBMF_MEM_PER_WQE, KM_SLEEP);
1539 
1540 	mem_attr.mr_vaddr = (ib_vaddr_t)(uintptr_t)send_wqep->send_mem;
1541 	mem_attr.mr_len = IBMF_MEM_PER_WQE;
1542 	mem_attr.mr_flags = IBT_MR_SLEEP | IBT_MR_ENABLE_LOCAL_WRITE;
1543 	mem_attr.mr_as = NULL;
1544 
1545 	/* Register the allocated memory */
1546 	ibtstatus = ibt_register_mr(cip->ci_ci_handle, cip->ci_pd, &mem_attr,
1547 	    &mem_hdl, &mem_desc);
1548 	if (ibtstatus != IBT_SUCCESS) {
1549 		kmem_free(send_wqep->send_mem, IBMF_MEM_PER_WQE);
1550 		kmem_free(send_wqep, sizeof (ibmf_send_wqe_t));
1551 		ibmf_i_put_ud_dest(cip, msgimplp->im_ibmf_ud_dest);
1552 		kmem_free(msgimplp, sizeof (ibmf_msg_impl_t));
1553 		(void) ibt_free_qp(ibt_qp_handle);
1554 		IBMF_TRACE_2(IBMF_TNF_NODEBUG, DPRINT_L1, ibmf_send_busy_err,
1555 		    IBMF_TNF_ERROR, "", "ibmf_send_busy(): %s, status = %d\n",
1556 		    tnf_string, msg, "failed to register memory",
1557 		    tnf_int, ibt_status, ibtstatus);
1558 		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_send_busy_end,
1559 		    IBMF_TNF_TRACE, "", "ibmf_send_busy() exit\n");
1560 		return;
1561 	}
1562 
1563 	send_wqep->send_sg_lkey = mem_desc.md_lkey;
1564 	send_wqep->send_mem_hdl = mem_hdl;
1565 
1566 	swrp = &send_wqep->send_wr;
1567 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*swrp))
1568 
1569 	/* use send wqe pointer as the WR ID */
1570 	swrp->wr_id		= (ibt_wrid_t)(uintptr_t)send_wqep;
1571 	ASSERT(swrp->wr_id != NULL);
1572 	swrp->wr_flags		= IBT_WR_NO_FLAGS;
1573 	swrp->wr_opcode		= IBT_WRC_SEND;
1574 	swrp->wr_trans		= IBT_UD_SRV;
1575 
1576 	send_wqep->send_client	= NULL;
1577 	send_wqep->send_msg	= msgimplp;
1578 
1579 	/* Initialize the scatter-gather list */
1580 	sgl[0].ds_va		= (ib_vaddr_t)(uintptr_t)send_wqep->send_mem;
1581 	sgl[0].ds_key		= send_wqep->send_sg_lkey;
1582 	sgl[0].ds_len		= IBMF_MAD_SIZE;
1583 
1584 	wcp			= &recv_wqep->recv_wc;
1585 
1586 	/* Initialize the address vector */
1587 	adds_vec.av_send_grh	= B_FALSE;
1588 	adds_vec.av_dlid	= wcp->wc_slid;
1589 	adds_vec.av_src_path	= wcp->wc_path_bits;
1590 	adds_vec.av_srvl	= 0;
1591 	adds_vec.av_srate	= IBT_SRATE_1X;
1592 	adds_vec.av_port_num	= recv_wqep->recv_port_num;
1593 
1594 	ud_dest			= msgimplp->im_ud_dest;
1595 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*ud_dest))
1596 	ud_dest->ud_qkey	= IB_GSI_QKEY;
1597 	ud_dest->ud_dst_qpn	= wcp->wc_qpn;
1598 
1599 	/* modify the address handle with the address vector information */
1600 	ibtstatus = ibt_modify_ah(cip->ci_ci_handle, ud_dest->ud_ah, &adds_vec);
1601 	if (ibtstatus != IBT_SUCCESS) {
1602 		(void) ibt_deregister_mr(cip->ci_ci_handle, mem_hdl);
1603 		kmem_free(send_wqep->send_mem, IBMF_MEM_PER_WQE);
1604 		kmem_free(send_wqep, sizeof (ibmf_send_wqe_t));
1605 		ibmf_i_put_ud_dest(cip, msgimplp->im_ibmf_ud_dest);
1606 		kmem_free(msgimplp, sizeof (ibmf_msg_impl_t));
1607 		IBMF_TRACE_2(IBMF_TNF_NODEBUG, DPRINT_L1, ibmf_send_busy_err,
1608 		    IBMF_TNF_ERROR, "", "ibmf_send_busy(): %s, status = %d\n",
1609 		    tnf_string, msg, "ibt modify ah failed", tnf_uint,
1610 		    ibt_status, ibtstatus);
1611 		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_send_busy_end,
1612 		    IBMF_TNF_TRACE, "", "ibmf_send_busy(() exit\n");
1613 		return;
1614 	}
1615 
1616 	bzero(send_wqep->send_mem, IBMF_MAD_SIZE);
1617 
1618 	rmadhdrp = (ib_mad_hdr_t *)((uintptr_t)recv_wqep->recv_mem +
1619 	    sizeof (ib_grh_t));
1620 	smadhdrp = (ib_mad_hdr_t *)send_wqep->send_mem;
1621 
1622 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*rmadhdrp))
1623 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*smadhdrp))
1624 
1625 	/* Set up the MAD header */
1626 	smadhdrp->BaseVersion	= rmadhdrp->BaseVersion;
1627 	smadhdrp->MgmtClass	= rmadhdrp->MgmtClass;
1628 	smadhdrp->ClassVersion	= rmadhdrp->ClassVersion;
1629 	smadhdrp->R_Method	= MAD_METHOD_GET_RESPONSE;
1630 	smadhdrp->Status	= MAD_STATUS_BUSY;
1631 	smadhdrp->TransactionID	= rmadhdrp->TransactionID;
1632 	smadhdrp->AttributeID	= rmadhdrp->AttributeID;
1633 	smadhdrp->AttributeModifier = rmadhdrp->AttributeModifier;
1634 
1635 	swrp->wr_sgl		= sgl;
1636 	swrp->wr_nds		= 1;
1637 	swrp->wr.ud.udwr_dest	= msgimplp->im_ud_dest;
1638 	send_wqep->send_port_num = recv_wqep->recv_port_num;
1639 	send_wqep->send_qp_handle = ibt_qp_handle;
1640 	send_wqep->send_ibmf_qp_handle = NULL;
1641 
1642 	/* Post the MAD to the IBT layer */
1643 	num_work_reqs		= 1;
1644 
1645 	ibtstatus = ibt_post_send(ibt_qp_handle, &send_wqep->send_wr,
1646 	    num_work_reqs, NULL);
1647 	if (ibtstatus != IBT_SUCCESS) {
1648 		(void) ibt_deregister_mr(cip->ci_ci_handle, mem_hdl);
1649 		kmem_free(send_wqep->send_mem, IBMF_MEM_PER_WQE);
1650 		kmem_free(send_wqep, sizeof (ibmf_send_wqe_t));
1651 		ibmf_i_put_ud_dest(cip, msgimplp->im_ibmf_ud_dest);
1652 		kmem_free(msgimplp, sizeof (ibmf_msg_impl_t));
1653 		IBMF_TRACE_2(IBMF_TNF_NODEBUG, DPRINT_L1,
1654 		    ibmf_send_busy_err, IBMF_TNF_TRACE, "",
1655 		    "ibmf_send_busy(): %s, status = %d\n", tnf_string, msg,
1656 		    "post send failure", tnf_uint, ibt_status, ibtstatus);
1657 		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_send_busy_end,
1658 		    IBMF_TNF_TRACE, "", "ibmf_send_busy(() exit\n");
1659 		return;
1660 	}
1661 
1662 	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_send_busy_end,
1663 	    IBMF_TNF_TRACE, "", "ibmf_send_busy() exit\n");
1664 }
1665 
1666 /*
1667  * ibmf_module_load():
1668  * This function attempts to load a client module that has not yet
1669  * registered with IBMF at the time a request MAD arrives for it.
1670  * Prior to loading the module, it sends a busy MAD to the sender of
1671  * the request MAD, this soliciting a resend of the request MAD.
1672  *
1673  * Input Argument
1674  * modlargsp	Pointer to ibmf_mod_load_args_t structure
1675  *
1676  * Output Argument
1677  * None
1678  *
1679  * Status
1680  * None
1681  */
1682 static void
1683 ibmf_module_load(void *taskq_arg)
1684 {
1685 	char *modname;
1686 	ibmf_mod_load_args_t *modlargsp = (ibmf_mod_load_args_t *)taskq_arg;
1687 	ibmf_ci_t *cip = modlargsp->cip;
1688 	ibmf_recv_wqe_t	*recv_wqep = modlargsp->recv_wqep;
1689 	ibmf_client_type_t class = modlargsp->ibmf_class;
1690 
1691 	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_module_load_start,
1692 	    IBMF_TNF_TRACE, "", "ibmf_module_load_busy() enter\n");
1693 	modname = modlargsp->modname;
1694 
1695 	if (IS_MANDATORY_CLASS(class)) {
1696 		ibmf_send_busy(modlargsp);
1697 	}
1698 
1699 	if (modload("misc", modname) < 0) {
1700 		(void) ibmf_i_repost_recv_buffer(cip, recv_wqep);
1701 		kmem_free(modlargsp, sizeof (ibmf_mod_load_args_t));
1702 		IBMF_TRACE_1(IBMF_TNF_DEBUG, DPRINT_L1, ibmf_module_load_error,
1703 		    IBMF_TNF_TRACE, "",
1704 		    "ibmf_module_load(): modload failed for %s\n",
1705 		    tnf_string, module, modname);
1706 		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_module_load_end,
1707 		    IBMF_TNF_TRACE, "", "ibmf_module_load() exit\n");
1708 		return;
1709 	}
1710 
1711 	(void) ibmf_i_repost_recv_buffer(cip, recv_wqep);
1712 
1713 	kmem_free(modlargsp, sizeof (ibmf_mod_load_args_t));
1714 
1715 	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_module_load_end,
1716 	    IBMF_TNF_TRACE, "", "ibmf_module_load_busy() exit\n");
1717 }
1718