1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*
28  * This file implements the MAD send logic in IBMF.
29  */
30 
31 #include <sys/ib/mgt/ibmf/ibmf_impl.h>
32 
33 #define	IBMF_SEND_WR_ID_TO_ADDR(id, ptr)		\
34 	(ptr) = (void *)(uintptr_t)(id)
35 
36 extern int ibmf_trace_level;
37 
38 static void ibmf_i_do_send_cb(void *taskq_arg);
39 static void ibmf_i_do_send_compl(ibmf_handle_t ibmf_handle,
40     ibmf_msg_impl_t *msgimplp, ibmf_send_wqe_t *send_wqep);
41 
42 /*
43  * ibmf_i_issue_pkt():
44  *	Post an IB packet on the specified QP's send queue
45  */
46 int
ibmf_i_issue_pkt(ibmf_client_t * clientp,ibmf_msg_impl_t * msgimplp,ibmf_qp_handle_t ibmf_qp_handle,ibmf_send_wqe_t * send_wqep)47 ibmf_i_issue_pkt(ibmf_client_t *clientp, ibmf_msg_impl_t *msgimplp,
48     ibmf_qp_handle_t ibmf_qp_handle, ibmf_send_wqe_t *send_wqep)
49 {
50 	int			ret;
51 	ibt_status_t		status;
52 	ibt_wr_ds_t		sgl[1];
53 	ibt_qp_hdl_t		ibt_qp_handle;
54 
55 	_NOTE(ASSUMING_PROTECTED(*send_wqep))
56 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*send_wqep))
57 
58 	IBMF_TRACE_4(IBMF_TNF_DEBUG, DPRINT_L4,
59 	    ibmf_i_issue_pkt_start, IBMF_TNF_TRACE, "",
60 	    "ibmf_i_issue_pkt() enter, clientp = %p, msg = %p, "
61 	    "qp_hdl = %p,  swqep = %p\n", tnf_opaque, clientp, clientp,
62 	    tnf_opaque, msg, msgimplp, tnf_opaque, ibmf_qp_handle,
63 	    ibmf_qp_handle, tnf_opaque, send_wqep, send_wqep);
64 
65 	ASSERT(MUTEX_HELD(&msgimplp->im_mutex));
66 	ASSERT(MUTEX_NOT_HELD(&clientp->ic_mutex));
67 
68 	/*
69 	 * if the qp handle provided in ibmf_send_pkt()
70 	 * is not the default qp handle for this client,
71 	 * then the wqe must be sent on this qp,
72 	 * else use the default qp handle set up during ibmf_register()
73 	 */
74 	if (ibmf_qp_handle == IBMF_QP_HANDLE_DEFAULT) {
75 		ibt_qp_handle = clientp->ic_qp->iq_qp_handle;
76 	} else {
77 		ibt_qp_handle =
78 		    ((ibmf_alt_qp_t *)ibmf_qp_handle)->isq_qp_handle;
79 	}
80 
81 	/* initialize the send WQE */
82 	ibmf_i_init_send_wqe(clientp, msgimplp, sgl, send_wqep,
83 	    msgimplp->im_ud_dest, ibt_qp_handle, ibmf_qp_handle);
84 
85 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*send_wqep))
86 
87 	/*
88 	 * Issue the wqe to the transport.
89 	 * NOTE: ibt_post_send() will not block, so, it is ok
90 	 * to hold the msgimpl mutex across this call.
91 	 */
92 	status = ibt_post_send(send_wqep->send_qp_handle, &send_wqep->send_wr,
93 	    1, NULL);
94 	if (status != IBT_SUCCESS) {
95 		mutex_enter(&clientp->ic_kstat_mutex);
96 		IBMF_ADD32_KSTATS(clientp, send_pkt_failed, 1);
97 		mutex_exit(&clientp->ic_kstat_mutex);
98 		IBMF_TRACE_2(IBMF_TNF_NODEBUG, DPRINT_L1,
99 		    ibmf_i_issue_pkt_err, IBMF_TNF_TRACE, "",
100 		    "ibmf_i_issue_pkt(): %s, status = %d\n",
101 		    tnf_string, msg, "post send failure",
102 		    tnf_uint, ibt_status, status);
103 		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_issue_pkt_end,
104 		    IBMF_TNF_TRACE, "", "ibmf_i_issue_pkt(() exit\n");
105 		return (IBMF_TRANSPORT_FAILURE);
106 	}
107 
108 	ret = IBMF_SUCCESS;
109 
110 	/* bump the number of active sends */
111 	if (ibmf_qp_handle == IBMF_QP_HANDLE_DEFAULT) {
112 		mutex_enter(&clientp->ic_mutex);
113 		clientp->ic_sends_active++;
114 		mutex_exit(&clientp->ic_mutex);
115 		mutex_enter(&clientp->ic_kstat_mutex);
116 		IBMF_ADD32_KSTATS(clientp, sends_active, 1);
117 		mutex_exit(&clientp->ic_kstat_mutex);
118 	} else {
119 		ibmf_alt_qp_t *qpp = (ibmf_alt_qp_t *)ibmf_qp_handle;
120 		mutex_enter(&qpp->isq_mutex);
121 		qpp->isq_sends_active++;
122 		mutex_exit(&qpp->isq_mutex);
123 		mutex_enter(&clientp->ic_kstat_mutex);
124 		IBMF_ADD32_KSTATS(clientp, sends_active, 1);
125 		mutex_exit(&clientp->ic_kstat_mutex);
126 	}
127 
128 	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_issue_pkt_end,
129 	    IBMF_TNF_TRACE, "", "ibmf_i_issue_pkt() exit\n");
130 	return (ret);
131 }
132 
133 /*
134  * ibmf_i_send_pkt()
135  *	Send an IB packet after allocating send resources
136  */
137 int
ibmf_i_send_pkt(ibmf_client_t * clientp,ibmf_qp_handle_t ibmf_qp_handle,ibmf_msg_impl_t * msgimplp,int block)138 ibmf_i_send_pkt(ibmf_client_t *clientp, ibmf_qp_handle_t ibmf_qp_handle,
139     ibmf_msg_impl_t *msgimplp, int block)
140 {
141 	ibmf_send_wqe_t	*send_wqep;
142 	int		status;
143 
144 	IBMF_TRACE_4(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_send_pkt_start,
145 	    IBMF_TNF_TRACE, "",
146 	    "ibmf_i_send_pkt(): clientp = 0x%p, qp_hdl = 0x%p, "
147 	    "msgp = 0x%p, block = %d\n", tnf_opaque, clientp, clientp,
148 	    tnf_opaque, qp_hdl, ibmf_qp_handle, tnf_opaque, msg, msgimplp,
149 	    tnf_uint, block, block);
150 
151 	ASSERT(MUTEX_HELD(&msgimplp->im_mutex));
152 
153 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*send_wqep))
154 
155 	/*
156 	 * Reset send_done to indicate we have not received the completion
157 	 * for this send yet.
158 	 */
159 	msgimplp->im_trans_state_flags &= ~IBMF_TRANS_STATE_FLAG_SEND_DONE;
160 
161 	/*
162 	 * Allocate resources needed to send a UD packet including the
163 	 * send WQE context
164 	 */
165 	status = ibmf_i_alloc_send_resources(clientp->ic_myci,
166 	    msgimplp, block, &send_wqep);
167 	if (status != IBMF_SUCCESS) {
168 		IBMF_TRACE_2(IBMF_TNF_NODEBUG, DPRINT_L1, ibmf_i_send_pkt_err,
169 		    IBMF_TNF_ERROR, "", "ibmf_i_send_pkt(): %s, status = %d\n",
170 		    tnf_string, msg, "unable to allocate send resources",
171 		    tnf_uint, status, status);
172 		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,	ibmf_i_send_pkt_end,
173 		    IBMF_TNF_TRACE, "", "ibmf_i_send_pkt() exit\n");
174 		return (status);
175 	}
176 
177 	/* Set the segment number in the send WQE context */
178 	if (msgimplp->im_flags & IBMF_MSG_FLAGS_SEND_RMPP)
179 		send_wqep->send_rmpp_segment = msgimplp->im_rmpp_ctx.rmpp_ns;
180 
181 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*send_wqep))
182 
183 	/*
184 	 * Increment the count of pending send completions.
185 	 * Only when this count is zero should the client be notified
186 	 * of completion of the transaction.
187 	 */
188 	msgimplp->im_pending_send_compls += 1;
189 
190 	/* Send the packet */
191 	status = ibmf_i_issue_pkt(clientp, msgimplp, ibmf_qp_handle, send_wqep);
192 	if (status != IBMF_SUCCESS) {
193 		ibmf_i_free_send_resources(clientp->ic_myci, msgimplp,
194 		    send_wqep);
195 		IBMF_TRACE_2(IBMF_TNF_NODEBUG, DPRINT_L1, ibmf_i_send_pkt_err,
196 		    IBMF_TNF_ERROR, "", "ibmf_i_send_pkt(): %s, status = %d\n",
197 		    tnf_string, msg, "unable to issue packet",
198 		    tnf_uint, status, status);
199 		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,	ibmf_i_send_pkt_end,
200 		    IBMF_TNF_TRACE, "", "ibmf_i_send_pkt() exit\n");
201 		return (status);
202 	}
203 
204 	IBMF_TRACE_1(IBMF_TNF_DEBUG, DPRINT_L4,	ibmf_i_send_pkt_end,
205 	    IBMF_TNF_TRACE, "", "ibmf_i_send_pkt() exit, status = %d\n",
206 	    tnf_uint, status, status);
207 
208 	return (IBMF_SUCCESS);
209 }
210 
211 /*
212  * ibmf_i_send_single_pkt():
213  *	Send a single IB packet.  Only used to send non-RMPP packets.
214  */
215 int
ibmf_i_send_single_pkt(ibmf_client_t * clientp,ibmf_qp_handle_t ibmf_qp_handle,ibmf_msg_impl_t * msgimplp,int block)216 ibmf_i_send_single_pkt(ibmf_client_t *clientp, ibmf_qp_handle_t ibmf_qp_handle,
217     ibmf_msg_impl_t *msgimplp, int block)
218 {
219 	int	status;
220 
221 	IBMF_TRACE_4(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_send_single_pkt_start,
222 	    IBMF_TNF_TRACE, "",
223 	    "ibmf_i_send_single_pkt(): clientp = 0x%p, qp_hdl = 0x%p, "
224 	    "msgp = 0x%p, block = %d\n", tnf_opaque, clientp, clientp,
225 	    tnf_opaque, qp_hdl, ibmf_qp_handle, tnf_opaque, msg, msgimplp,
226 	    tnf_uint, block, block);
227 
228 	ASSERT(MUTEX_HELD(&msgimplp->im_mutex));
229 
230 	status = ibmf_i_send_pkt(clientp, ibmf_qp_handle, msgimplp, block);
231 	if (status != IBMF_SUCCESS) {
232 		IBMF_TRACE_2(IBMF_TNF_NODEBUG, DPRINT_L1,
233 		    ibmf_i_send_single_pkt_err, IBMF_TNF_ERROR, "",
234 		    "ibmf_i_send_single_pkt(): %s, msgp = 0x%p\n",
235 		    tnf_string, msg, "unable to send packet",
236 		    tnf_uint, status, status);
237 		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
238 		    ibmf_i_send_single_pkt_end, IBMF_TNF_TRACE, "",
239 		    "ibmf_i_send_single_pkt() exit\n");
240 		return (status);
241 	}
242 
243 	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,	ibmf_i_send_single_pkt_end,
244 	    IBMF_TNF_TRACE, "", "ibmf_i_send_single_pkt() exit\n");
245 	return (IBMF_SUCCESS);
246 }
247 
248 /*
249  * ibmf_i_handle_send_completion():
250  *	Process the WQE from the SQ identified in the work completion entry.
251  */
252 /* ARGSUSED */
253 void
ibmf_i_handle_send_completion(ibmf_ci_t * cip,ibt_wc_t * wcp)254 ibmf_i_handle_send_completion(ibmf_ci_t *cip, ibt_wc_t *wcp)
255 {
256 	ibmf_client_t		*clientp, *cclientp;
257 	ibmf_send_wqe_t		*send_wqep;
258 	ibmf_qp_handle_t	ibmf_qp_handle;
259 	ibmf_alt_qp_t		*qpp;
260 	int			ret;
261 
262 	IBMF_TRACE_2(IBMF_TNF_DEBUG, DPRINT_L4,
263 	    ibmf_i_handle_send_completion_start, IBMF_TNF_TRACE, "",
264 	    "ibmf_i_handle_send_completion() enter, cip = %p, wcp = %p\n",
265 	    tnf_opaque, cip, cip, tnf_opaque, wcp, wcp);
266 
267 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*send_wqep))
268 
269 	ASSERT(wcp->wc_id != 0);
270 
271 	ASSERT(IBMF_IS_SEND_WR_ID(wcp->wc_id));
272 
273 	/* get the IBMF send WQE context */
274 	IBMF_SEND_WR_ID_TO_ADDR(wcp->wc_id, send_wqep);
275 
276 	ASSERT(send_wqep != NULL);
277 
278 	/* get the client context */
279 	cclientp =  clientp = send_wqep->send_client;
280 
281 	/* Check if this is a completion for a BUSY MAD sent by IBMF */
282 	if (clientp == NULL) {
283 		ibmf_msg_impl_t		*msgimplp;
284 
285 		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L3,
286 		    ibmf_i_handle_send_completion, IBMF_TNF_TRACE, "",
287 		    "ibmf_i_handle_send_completion(): NULL client\n");
288 
289 		msgimplp = send_wqep->send_msg;
290 
291 		/*
292 		 * Deregister registered memory and free it, and
293 		 * free up the send WQE context
294 		 */
295 		(void) ibt_deregister_mr(cip->ci_ci_handle,
296 		    send_wqep->send_mem_hdl);
297 		kmem_free(send_wqep->send_mem, IBMF_MEM_PER_WQE);
298 		kmem_free(send_wqep, sizeof (ibmf_send_wqe_t));
299 
300 		/* Free up the message context */
301 		ibmf_i_put_ud_dest(cip, msgimplp->im_ibmf_ud_dest);
302 		ibmf_i_clean_ud_dest_list(cip, B_FALSE);
303 		kmem_free(msgimplp, sizeof (ibmf_msg_impl_t));
304 		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
305 		    ibmf_i_handle_send_completion_end, IBMF_TNF_TRACE, "",
306 		    "ibmf_i_handle_send_completion() exit\n");
307 		return;
308 	}
309 
310 	/* get the QP handle */
311 	ibmf_qp_handle = send_wqep->send_ibmf_qp_handle;
312 	qpp = (ibmf_alt_qp_t *)ibmf_qp_handle;
313 
314 	ASSERT(clientp != NULL);
315 
316 	/* decrement the number of active sends */
317 	if (ibmf_qp_handle == IBMF_QP_HANDLE_DEFAULT) {
318 		mutex_enter(&clientp->ic_mutex);
319 		clientp->ic_sends_active--;
320 		mutex_exit(&clientp->ic_mutex);
321 	} else {
322 		mutex_enter(&qpp->isq_mutex);
323 		qpp->isq_sends_active--;
324 		mutex_exit(&qpp->isq_mutex);
325 	}
326 
327 	mutex_enter(&clientp->ic_kstat_mutex);
328 	IBMF_SUB32_KSTATS(clientp, sends_active, 1);
329 	mutex_exit(&clientp->ic_kstat_mutex);
330 
331 	send_wqep->send_status = ibmf_i_ibt_wc_to_ibmf_status(wcp->wc_status);
332 
333 	/*
334 	 * issue the callback using taskq. If no taskq or if the
335 	 * dispatch fails, we do the send processing in the callback context
336 	 * which is the interrupt context
337 	 */
338 	if (cclientp->ic_send_taskq == NULL) {
339 		/* Do the processing in callback context */
340 		mutex_enter(&clientp->ic_kstat_mutex);
341 		IBMF_ADD32_KSTATS(clientp, send_cb_active, 1);
342 		mutex_exit(&clientp->ic_kstat_mutex);
343 		ibmf_i_do_send_cb((void *)send_wqep);
344 		mutex_enter(&clientp->ic_kstat_mutex);
345 		IBMF_SUB32_KSTATS(clientp, send_cb_active, 1);
346 		mutex_exit(&clientp->ic_kstat_mutex);
347 		IBMF_TRACE_1(IBMF_TNF_DEBUG, DPRINT_L4,
348 		    ibmf_i_handle_send_err, IBMF_TNF_ERROR, "",
349 		    "ibmf_i_handle_send_completion(): %s\n",
350 		    tnf_string, msg, "ci_send_taskq == NULL");
351 		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
352 		    ibmf_i_handle_send_completion_end, IBMF_TNF_TRACE, "",
353 		    "ibmf_i_handle_send_completion() exit\n");
354 		return;
355 	}
356 
357 	mutex_enter(&clientp->ic_kstat_mutex);
358 	IBMF_ADD32_KSTATS(clientp, send_cb_active, 1);
359 	mutex_exit(&clientp->ic_kstat_mutex);
360 
361 	/* Use taskq for processing if the IBMF_REG_FLAG_NO_OFFLOAD isn't set */
362 	if ((clientp->ic_reg_flags & IBMF_REG_FLAG_NO_OFFLOAD) == 0) {
363 		ret = taskq_dispatch(cclientp->ic_send_taskq, ibmf_i_do_send_cb,
364 		    send_wqep, TQ_NOSLEEP);
365 		if (ret == TASKQID_INVALID) {
366 			IBMF_TRACE_1(IBMF_TNF_DEBUG, DPRINT_L4,
367 			    ibmf_i_handle_send_err, IBMF_TNF_ERROR, "",
368 			    "ibmf_i_handle_send_completion(): %s\n",
369 			    tnf_string, msg, "send: dispatch failed");
370 			ibmf_i_do_send_cb((void *)send_wqep);
371 		}
372 	} else {
373 		ibmf_i_do_send_cb((void *)send_wqep);
374 	}
375 
376 	mutex_enter(&clientp->ic_kstat_mutex);
377 	IBMF_SUB32_KSTATS(clientp, send_cb_active, 1);
378 	mutex_exit(&clientp->ic_kstat_mutex);
379 
380 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*send_wqep))
381 
382 	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
383 	    ibmf_i_handle_send_completion_end, IBMF_TNF_TRACE, "",
384 	    "ibmf_i_handle_send_completion() exit\n");
385 }
386 
387 /*
388  * ibmf_i_do_send_cb():
389  *	Do the send completion processing
390  */
391 static void
ibmf_i_do_send_cb(void * taskq_arg)392 ibmf_i_do_send_cb(void *taskq_arg)
393 {
394 	ibmf_ci_t		*cip;
395 	ibmf_msg_impl_t		*msgimplp;
396 	ibmf_client_t		*clientp;
397 	ibmf_send_wqe_t		*send_wqep;
398 	boolean_t		found;
399 	int			msg_trans_state_flags, msg_flags;
400 	uint_t			ref_cnt;
401 	ibmf_qp_handle_t	ibmf_qp_handle;
402 	struct kmem_cache	*kmem_cachep;
403 	timeout_id_t		msg_rp_unset_id, msg_tr_unset_id;
404 	timeout_id_t		msg_rp_set_id, msg_tr_set_id;
405 	ibmf_alt_qp_t		*altqp;
406 	boolean_t		inc_refcnt;
407 
408 	send_wqep = taskq_arg;
409 
410 	IBMF_TRACE_1(IBMF_TNF_DEBUG, DPRINT_L4,
411 	    ibmf_i_do_send_cb_start, IBMF_TNF_TRACE, "",
412 	    "ibmf_i_do_send_cb() enter, send_wqep = %p\n",
413 	    tnf_opaque, send_wqep, send_wqep);
414 
415 	clientp = send_wqep->send_client;
416 	cip = clientp->ic_myci;
417 	msgimplp = send_wqep->send_msg;
418 
419 	/* get the QP handle */
420 	ibmf_qp_handle = send_wqep->send_ibmf_qp_handle;
421 
422 	/* Get the WQE kmem cache pointer based on the QP type */
423 	if (ibmf_qp_handle == IBMF_QP_HANDLE_DEFAULT)
424 		kmem_cachep = cip->ci_send_wqes_cache;
425 	else {
426 		altqp = (ibmf_alt_qp_t *)ibmf_qp_handle;
427 		kmem_cachep = altqp->isq_send_wqes_cache;
428 	}
429 
430 	/* Look for a message in the client's message list */
431 	inc_refcnt = B_TRUE;
432 	found = ibmf_i_find_msg_client(clientp, msgimplp, inc_refcnt);
433 
434 	/*
435 	 * If the message context was not found, then it's likely
436 	 * been freed up. So, do nothing in this timeout handler
437 	 */
438 	if (found == B_FALSE) {
439 		kmem_cache_free(kmem_cachep, send_wqep);
440 		mutex_enter(&cip->ci_mutex);
441 		IBMF_SUB32_PORT_KSTATS(cip, send_wqes_alloced, 1);
442 		mutex_exit(&cip->ci_mutex);
443 		if (ibmf_qp_handle == IBMF_QP_HANDLE_DEFAULT) {
444 			mutex_enter(&cip->ci_mutex);
445 			cip->ci_wqes_alloced--;
446 			if (cip->ci_wqes_alloced == 0)
447 				cv_signal(&cip->ci_wqes_cv);
448 			mutex_exit(&cip->ci_mutex);
449 		} else {
450 			mutex_enter(&altqp->isq_mutex);
451 			altqp->isq_wqes_alloced--;
452 			if (altqp->isq_wqes_alloced == 0)
453 				cv_signal(&altqp->isq_wqes_cv);
454 			mutex_exit(&altqp->isq_mutex);
455 		}
456 		IBMF_TRACE_1(IBMF_TNF_DEBUG, DPRINT_L3,
457 		    ibmf_i_do_send_cb, IBMF_TNF_TRACE, "",
458 		    "ibmf_i_do_send_cb(): %s\n", tnf_string, msg,
459 		    "Message not found, return without processing send cb");
460 		return;
461 	}
462 
463 	/* Grab the message context lock */
464 	mutex_enter(&msgimplp->im_mutex);
465 
466 	/*
467 	 * Decrement the count of pending send completions for
468 	 * this transaction
469 	 */
470 	msgimplp->im_pending_send_compls -= 1;
471 
472 	/*
473 	 * If the pending send completions is not zero, then we must
474 	 * not attempt to notify the client of a transaction completion
475 	 * in this instance of the send completion handler. Notification
476 	 * of transaction completion should be provided only by the
477 	 * last send completion so that all send completions are accounted
478 	 * for before the client is notified and subsequently attempts to
479 	 * reuse the message for an other transaction.
480 	 * If this is not done, the message may be reused while the
481 	 * send WR from the old transaction is still active in the QP's WQ.
482 	 * This could result in an attempt to modify the address handle with
483 	 * information for the new transaction which could be potentially
484 	 * incompatible, such as an incorrect port number. Such an
485 	 * incompatible modification of the address handle of the old
486 	 * transaction could result in a QP error.
487 	 */
488 	if (msgimplp->im_pending_send_compls != 0) {
489 		IBMF_MSG_DECR_REFCNT(msgimplp);
490 		mutex_exit(&msgimplp->im_mutex);
491 		kmem_cache_free(kmem_cachep, send_wqep);
492 		mutex_enter(&cip->ci_mutex);
493 		IBMF_SUB32_PORT_KSTATS(cip, send_wqes_alloced, 1);
494 		mutex_exit(&cip->ci_mutex);
495 		if (ibmf_qp_handle == IBMF_QP_HANDLE_DEFAULT) {
496 			mutex_enter(&cip->ci_mutex);
497 			cip->ci_wqes_alloced--;
498 			if (cip->ci_wqes_alloced == 0)
499 				cv_signal(&cip->ci_wqes_cv);
500 			mutex_exit(&cip->ci_mutex);
501 		} else {
502 			mutex_enter(&altqp->isq_mutex);
503 			altqp->isq_wqes_alloced--;
504 			if (altqp->isq_wqes_alloced == 0)
505 				cv_signal(&altqp->isq_wqes_cv);
506 			mutex_exit(&altqp->isq_mutex);
507 		}
508 		IBMF_TRACE_1(IBMF_TNF_DEBUG, DPRINT_L3,
509 		    ibmf_i_do_send_cb, IBMF_TNF_TRACE, "",
510 		    "ibmf_i_do_send_cb(): %s\n", tnf_string, msg,
511 		    "Message found with pending send completions, "
512 		    "return without processing send cb");
513 		return;
514 	}
515 
516 	/*
517 	 * If the message has been marked unitialized or done
518 	 * release the message mutex and return
519 	 */
520 	if ((msgimplp->im_trans_state_flags & IBMF_TRANS_STATE_FLAG_UNINIT) ||
521 	    (msgimplp->im_trans_state_flags & IBMF_TRANS_STATE_FLAG_DONE)) {
522 		IBMF_MSG_DECR_REFCNT(msgimplp);
523 		msg_trans_state_flags = msgimplp->im_trans_state_flags;
524 		msg_flags = msgimplp->im_flags;
525 		ref_cnt = msgimplp->im_ref_count;
526 		mutex_exit(&msgimplp->im_mutex);
527 		/*
528 		 * This thread may notify the client only if the
529 		 * transaction is done, the message has been removed
530 		 * from the client's message list, and the message
531 		 * reference count is 0.
532 		 * If the transaction is done, and the message reference
533 		 * count = 0, there is still a possibility that a
534 		 * packet could arrive for the message and its reference
535 		 * count increased if the message is still on the list.
536 		 * If the message is still on the list, it will be
537 		 * removed by a call to ibmf_i_client_rem_msg() at
538 		 * the completion point of the transaction.
539 		 * So, the reference count should be checked after the
540 		 * message has been removed.
541 		 */
542 		if ((msg_trans_state_flags & IBMF_TRANS_STATE_FLAG_DONE) &&
543 		    !(msg_flags & IBMF_MSG_FLAGS_ON_LIST) &&
544 		    (ref_cnt == 0)) {
545 
546 			ibmf_i_notify_sequence(clientp, msgimplp, msg_flags);
547 
548 		}
549 		kmem_cache_free(kmem_cachep, send_wqep);
550 		mutex_enter(&cip->ci_mutex);
551 		IBMF_SUB32_PORT_KSTATS(cip, send_wqes_alloced, 1);
552 		mutex_exit(&cip->ci_mutex);
553 		if (ibmf_qp_handle == IBMF_QP_HANDLE_DEFAULT) {
554 			mutex_enter(&cip->ci_mutex);
555 			cip->ci_wqes_alloced--;
556 			if (cip->ci_wqes_alloced == 0)
557 				cv_signal(&cip->ci_wqes_cv);
558 			mutex_exit(&cip->ci_mutex);
559 		} else {
560 			mutex_enter(&altqp->isq_mutex);
561 			altqp->isq_wqes_alloced--;
562 			if (altqp->isq_wqes_alloced == 0)
563 				cv_signal(&altqp->isq_wqes_cv);
564 			mutex_exit(&altqp->isq_mutex);
565 		}
566 		IBMF_TRACE_2(IBMF_TNF_DEBUG, DPRINT_L3,
567 		    ibmf_i_do_send_cb, IBMF_TNF_TRACE, "",
568 		    "ibmf_i_do_send_cb(): %s, msg = %p\n", tnf_string, msg,
569 		    "Message marked for removal, return without processing "
570 		    "send cb", tnf_opaque, msgimplp, msgimplp);
571 		return;
572 	}
573 
574 	/* Perform send completion processing of the message context */
575 	ibmf_i_do_send_compl((ibmf_handle_t)clientp, msgimplp, send_wqep);
576 
577 	msg_rp_unset_id = msg_tr_unset_id = msg_rp_set_id = msg_tr_set_id = 0;
578 
579 	/* Save the message flags before releasing the mutex */
580 	msg_trans_state_flags = msgimplp->im_trans_state_flags;
581 	msg_flags = msgimplp->im_flags;
582 	msg_rp_unset_id = msgimplp->im_rp_unset_timeout_id;
583 	msg_tr_unset_id = msgimplp->im_tr_unset_timeout_id;
584 	msgimplp->im_rp_unset_timeout_id = 0;
585 	msgimplp->im_tr_unset_timeout_id = 0;
586 
587 	/*
588 	 * Decrement the message reference count
589 	 * This count was inceremented when the message was found on the
590 	 * client's message list
591 	 */
592 	IBMF_MSG_DECR_REFCNT(msgimplp);
593 
594 	if (msg_trans_state_flags & IBMF_TRANS_STATE_FLAG_DONE) {
595 		if (msgimplp->im_rp_timeout_id != 0) {
596 			msg_rp_set_id = msgimplp->im_rp_timeout_id;
597 			msgimplp->im_rp_timeout_id = 0;
598 		}
599 		if (msgimplp->im_tr_timeout_id != 0) {
600 			msg_tr_set_id = msgimplp->im_tr_timeout_id;
601 			msgimplp->im_tr_timeout_id = 0;
602 		}
603 	}
604 
605 	mutex_exit(&msgimplp->im_mutex);
606 
607 	if (msg_rp_unset_id != 0) {
608 		(void) untimeout(msg_rp_unset_id);
609 	}
610 
611 	if (msg_tr_unset_id != 0) {
612 		(void) untimeout(msg_tr_unset_id);
613 	}
614 
615 	if (msg_rp_set_id != 0) {
616 		(void) untimeout(msg_rp_set_id);
617 	}
618 
619 	if (msg_tr_set_id != 0) {
620 		(void) untimeout(msg_tr_set_id);
621 	}
622 
623 	IBMF_TRACE_2(IBMF_TNF_DEBUG, DPRINT_L3,
624 	    ibmf_i_do_send_cb, IBMF_TNF_TRACE, "",
625 	    "ibmf_i_do_send_cb(): %s, msg = %p\n",
626 	    tnf_string, msg, "Send callback done.  Dec ref count",
627 	    tnf_opaque, msgimplp, msgimplp);
628 
629 	/*
630 	 * If the transaction is done, signal the block thread if the
631 	 * transaction is blocking, or call the client's transaction done
632 	 * notification callback
633 	 */
634 	if (msg_trans_state_flags & IBMF_TRANS_STATE_FLAG_DONE) {
635 
636 		/* Remove the message from the client's message list */
637 		ibmf_i_client_rem_msg(clientp, msgimplp, &ref_cnt);
638 
639 		/*
640 		 * Notify the client if the message reference count is zero.
641 		 * At this point, we know that the transaction is done and
642 		 * the message has been removed from the client's message list.
643 		 * So, we only need to make sure the reference count is zero
644 		 * before notifying the client.
645 		 */
646 		if (ref_cnt == 0) {
647 
648 			ibmf_i_notify_sequence(clientp, msgimplp, msg_flags);
649 
650 		}
651 	}
652 
653 	kmem_cache_free(kmem_cachep, send_wqep);
654 	mutex_enter(&cip->ci_mutex);
655 	IBMF_SUB32_PORT_KSTATS(cip, send_wqes_alloced, 1);
656 	mutex_exit(&cip->ci_mutex);
657 	if (ibmf_qp_handle == IBMF_QP_HANDLE_DEFAULT) {
658 		mutex_enter(&cip->ci_mutex);
659 		cip->ci_wqes_alloced--;
660 		if (cip->ci_wqes_alloced == 0)
661 			cv_signal(&cip->ci_wqes_cv);
662 		mutex_exit(&cip->ci_mutex);
663 	} else {
664 		mutex_enter(&altqp->isq_mutex);
665 		altqp->isq_wqes_alloced--;
666 		if (altqp->isq_wqes_alloced == 0)
667 			cv_signal(&altqp->isq_wqes_cv);
668 		mutex_exit(&altqp->isq_mutex);
669 	}
670 
671 	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
672 	    ibmf_i_do_send_cb_end, IBMF_TNF_TRACE, "",
673 	    "ibmf_i_do_send_cb() exit\n");
674 }
675 
676 /*
677  * ibmf_i_do_send_compl():
678  *	Determine if the transaction is complete
679  */
680 /* ARGSUSED */
681 static void
ibmf_i_do_send_compl(ibmf_handle_t ibmf_handle,ibmf_msg_impl_t * msgimplp,ibmf_send_wqe_t * send_wqep)682 ibmf_i_do_send_compl(ibmf_handle_t ibmf_handle, ibmf_msg_impl_t *msgimplp,
683     ibmf_send_wqe_t *send_wqep)
684 {
685 	IBMF_TRACE_4(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_do_send_compl_start,
686 	    IBMF_TNF_TRACE, "", "ibmf_i_do_send_compl(): ibmf_hdl = 0x%p "
687 	    "msgp = %p, send_wqep = 0x%p, msg_flags = 0x%x\n",
688 	    tnf_opaque, ibmf_hdl, ibmf_handle, tnf_opaque, msgimplp, msgimplp,
689 	    tnf_opaque, send_wqep, send_wqep,
690 	    tnf_opaque, msg_flags, msgimplp->im_flags);
691 
692 	ASSERT(MUTEX_HELD(&msgimplp->im_mutex));
693 
694 	/*
695 	 * For RMPP transactions, we only care about the final packet of the
696 	 * transaction.  For others, the code does not need to wait for the send
697 	 * completion (although bad things can happen if it never occurs).
698 	 * The final packets of a transaction are sent when the state is either
699 	 * ABORT or RECEVR_TERMINATE.
700 	 * Don't mark the transaction as send_done if there are still more
701 	 * packets to be sent, including doing the second part of a double-sided
702 	 * transaction.
703 	 */
704 	if ((msgimplp->im_flags & IBMF_MSG_FLAGS_RECV_RMPP) ||
705 	    (msgimplp->im_flags & IBMF_MSG_FLAGS_SEND_RMPP)) {
706 
707 		IBMF_TRACE_3(IBMF_TNF_DEBUG, DPRINT_L3,
708 		    ibmf_i_do_send_compl, IBMF_TNF_TRACE, "",
709 		    "ibmf_i_do_send_compl(): %s msgp = %p, rmpp_state = 0x%x\n",
710 		    tnf_string, msg, "Received send callback for RMPP trans",
711 		    tnf_opaque, msg, msgimplp,
712 		    tnf_opaque, rmpp_state, msgimplp->im_rmpp_ctx.rmpp_state);
713 
714 		/*
715 		 * For ABORT state, we should not return control to
716 		 * the client from the send completion handler.
717 		 * Control should be returned in the error timeout handler.
718 		 *
719 		 * The exception is when the IBMF_TRANS_STATE_FLAG_RECV_DONE
720 		 * flag has already been set. This flag is set when
721 		 * ibmf_i_terminate_transaction is called from one of the
722 		 * three timeout handlers. In this case return control from
723 		 * here.
724 		 */
725 		if (msgimplp->im_rmpp_ctx.rmpp_state == IBMF_RMPP_STATE_ABORT) {
726 			msgimplp->im_trans_state_flags |=
727 			    IBMF_TRANS_STATE_FLAG_SEND_DONE;
728 			if (msgimplp->im_trans_state_flags &
729 			    IBMF_TRANS_STATE_FLAG_RECV_DONE) {
730 				msgimplp->im_trans_state_flags |=
731 				    IBMF_TRANS_STATE_FLAG_DONE;
732 			}
733 		}
734 
735 		if ((msgimplp->im_rmpp_ctx.rmpp_state ==
736 		    IBMF_RMPP_STATE_RECEVR_TERMINATE) ||
737 		    (msgimplp->im_rmpp_ctx.rmpp_state ==
738 		    IBMF_RMPP_STATE_DONE)) {
739 			msgimplp->im_trans_state_flags |=
740 			    IBMF_TRANS_STATE_FLAG_SEND_DONE;
741 			if (msgimplp->im_trans_state_flags  &
742 			    IBMF_TRANS_STATE_FLAG_RECV_DONE) {
743 				msgimplp->im_trans_state_flags |=
744 				    IBMF_TRANS_STATE_FLAG_DONE;
745 			}
746 		}
747 
748 		/*
749 		 * If the transaction is a send-only RMPP, then
750 		 * set the SEND_DONE flag on every send completion
751 		 * as long as there are no outstanding ones.
752 		 * This is needed so that the transaction can return
753 		 * in the receive path, where ibmf_i_terminate_transaction
754 		 * is called from ibmf_i_rmpp_sender_active_flow,
755 		 * after checking if the SEND_DONE flag is set.
756 		 * When a new MAD is sent as part of the RMPP transaction,
757 		 * the SEND_DONE flag will get reset.
758 		 * The RECV_DONE indicates that the last ACK was received.
759 		 */
760 		if ((msgimplp->im_flags & IBMF_MSG_FLAGS_SEQUENCED) == 0) {
761 			if (msgimplp->im_pending_send_compls == 0) {
762 				msgimplp->im_trans_state_flags |=
763 				    IBMF_TRANS_STATE_FLAG_SEND_DONE;
764 				if (msgimplp->im_trans_state_flags  &
765 				    IBMF_TRANS_STATE_FLAG_RECV_DONE) {
766 					msgimplp->im_trans_state_flags |=
767 					    IBMF_TRANS_STATE_FLAG_DONE;
768 				}
769 			}
770 		}
771 
772 		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
773 		    ibmf_i_do_send_compl_end, IBMF_TNF_TRACE, "",
774 		    "ibmf_i_do_send_compl() exit\n");
775 		return;
776 	}
777 
778 	/*
779 	 * Only non-RMPP send completion gets here.
780 	 * If the send is a single-packet send that does not use RMPP, and if
781 	 * the transaction is not a sequenced transaction, call the transaction
782 	 * callback handler after flagging the transaction as done.  If the
783 	 * message is sequenced, start a timer to bound the wait for the first
784 	 * data packet of the response.
785 	 */
786 	if (msgimplp->im_flags & IBMF_MSG_FLAGS_SEQUENCED) {
787 
788 		IBMF_TRACE_2(IBMF_TNF_DEBUG, DPRINT_L3,
789 		    ibmf_i_do_send_compl, IBMF_TNF_TRACE, "",
790 		    "ibmf_i_do_send_compl(): %s msgp = %p\n", tnf_string, msg,
791 		    "Sequenced transaction, setting response timer",
792 		    tnf_opaque, msg, msgimplp);
793 
794 		/*
795 		 * Check if the send completion already occured,
796 		 * which could imply that this is a send completion
797 		 * for some previous transaction that has come in very late.
798 		 * In this case exit here.
799 		 */
800 		if (msgimplp->im_trans_state_flags  &
801 		    IBMF_TRANS_STATE_FLAG_SEND_DONE) {
802 			IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
803 			    ibmf_i_do_send_compl_end, IBMF_TNF_TRACE, "",
804 			    "ibmf_i_do_send_compl() exit, "
805 			    "Duplicate SEND completion\n");
806 			return;
807 		}
808 
809 		/* mark as send_compl happened */
810 		msgimplp->im_trans_state_flags |=
811 		    IBMF_TRANS_STATE_FLAG_SEND_DONE;
812 
813 		if (msgimplp->im_trans_state_flags  &
814 		    IBMF_TRANS_STATE_FLAG_RECV_DONE) {
815 			msgimplp->im_trans_state_flags |=
816 			    IBMF_TRANS_STATE_FLAG_DONE;
817 			IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
818 			    ibmf_i_do_send_compl_end, IBMF_TNF_TRACE, "",
819 			    "ibmf_i_do_send_compl() exit, RECV_DONE\n");
820 			return;
821 		}
822 
823 		/*
824 		 * check if response was received before send
825 		 * completion
826 		 */
827 		if (((msgimplp->im_trans_state_flags &
828 		    IBMF_TRANS_STATE_FLAG_DONE) == 0) &&
829 		    ((msgimplp->im_trans_state_flags &
830 		    IBMF_TRANS_STATE_FLAG_RECV_ACTIVE) == 0)) {
831 			/* set timer for first packet of response */
832 			ibmf_i_set_timer(ibmf_i_send_timeout, msgimplp,
833 			    IBMF_RESP_TIMER);
834 		}
835 	} else {
836 		msgimplp->im_msg_status = IBMF_SUCCESS;
837 		msgimplp->im_trans_state_flags |=
838 		    IBMF_TRANS_STATE_FLAG_SEND_DONE;
839 		msgimplp->im_trans_state_flags |= IBMF_TRANS_STATE_FLAG_DONE;
840 	}
841 
842 	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_do_send_compl_end,
843 	    IBMF_TNF_TRACE, "", "ibmf_i_do_send_compl() exit\n");
844 }
845