1*b494511aSVenki Rajagopalan /*
2*b494511aSVenki Rajagopalan  * CDDL HEADER START
3*b494511aSVenki Rajagopalan  *
4*b494511aSVenki Rajagopalan  * The contents of this file are subject to the terms of the
5*b494511aSVenki Rajagopalan  * Common Development and Distribution License (the "License").
6*b494511aSVenki Rajagopalan  * You may not use this file except in compliance with the License.
7*b494511aSVenki Rajagopalan  *
8*b494511aSVenki Rajagopalan  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9*b494511aSVenki Rajagopalan  * or http://www.opensolaris.org/os/licensing.
10*b494511aSVenki Rajagopalan  * See the License for the specific language governing permissions
11*b494511aSVenki Rajagopalan  * and limitations under the License.
12*b494511aSVenki Rajagopalan  *
13*b494511aSVenki Rajagopalan  * When distributing Covered Code, include this CDDL HEADER in each
14*b494511aSVenki Rajagopalan  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15*b494511aSVenki Rajagopalan  * If applicable, add the following below this CDDL HEADER, with the
16*b494511aSVenki Rajagopalan  * fields enclosed by brackets "[]" replaced with your own identifying
17*b494511aSVenki Rajagopalan  * information: Portions Copyright [yyyy] [name of copyright owner]
18*b494511aSVenki Rajagopalan  *
19*b494511aSVenki Rajagopalan  * CDDL HEADER END
20*b494511aSVenki Rajagopalan  */
21*b494511aSVenki Rajagopalan 
22*b494511aSVenki Rajagopalan /*
23*b494511aSVenki Rajagopalan  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
24*b494511aSVenki Rajagopalan  */
25*b494511aSVenki Rajagopalan 
26*b494511aSVenki Rajagopalan #include <sys/types.h>
27*b494511aSVenki Rajagopalan #include <sys/kmem.h>
28*b494511aSVenki Rajagopalan #include <sys/conf.h>
29*b494511aSVenki Rajagopalan #include <sys/ddi.h>
30*b494511aSVenki Rajagopalan #include <sys/sunddi.h>
31*b494511aSVenki Rajagopalan #include <sys/ksynch.h>
32*b494511aSVenki Rajagopalan 
33*b494511aSVenki Rajagopalan #include <sys/ib/clients/eoib/eib_impl.h>
34*b494511aSVenki Rajagopalan 
35*b494511aSVenki Rajagopalan /*
36*b494511aSVenki Rajagopalan  * Declarations private to this file
37*b494511aSVenki Rajagopalan  */
38*b494511aSVenki Rajagopalan static int eib_rsrc_setup_txbufs(eib_t *, int *);
39*b494511aSVenki Rajagopalan static int eib_rsrc_setup_rxbufs(eib_t *, int *);
40*b494511aSVenki Rajagopalan static int eib_rsrc_setup_lsobufs(eib_t *, int *);
41*b494511aSVenki Rajagopalan static void eib_rsrc_init_wqe_pool(eib_t *, eib_wqe_pool_t **,
42*b494511aSVenki Rajagopalan     ib_memlen_t, int);
43*b494511aSVenki Rajagopalan static void eib_rsrc_fini_wqe_pool(eib_t *, eib_wqe_pool_t **);
44*b494511aSVenki Rajagopalan static boolean_t eib_rsrc_ok_to_free_pool(eib_t *, eib_wqe_pool_t *, boolean_t);
45*b494511aSVenki Rajagopalan static int eib_rsrc_grab_wqes(eib_t *, eib_wqe_pool_t *, eib_wqe_t **, uint_t,
46*b494511aSVenki Rajagopalan     uint_t *, int);
47*b494511aSVenki Rajagopalan static void eib_rsrc_return_wqes(eib_t *, eib_wqe_pool_t *, eib_wqe_t **,
48*b494511aSVenki Rajagopalan     uint_t);
49*b494511aSVenki Rajagopalan 
50*b494511aSVenki Rajagopalan static void eib_rb_rsrc_setup_txbufs(eib_t *, boolean_t);
51*b494511aSVenki Rajagopalan static void eib_rb_rsrc_setup_rxbufs(eib_t *, boolean_t);
52*b494511aSVenki Rajagopalan static void eib_rb_rsrc_setup_lsobufs(eib_t *, boolean_t);
53*b494511aSVenki Rajagopalan 
54*b494511aSVenki Rajagopalan /*
55*b494511aSVenki Rajagopalan  * Definitions private to this file
56*b494511aSVenki Rajagopalan  */
57*b494511aSVenki Rajagopalan static uint_t eib_lso_num_bufs = EIB_LSO_NUM_BUFS;	/* tunable? */
58*b494511aSVenki Rajagopalan 
59*b494511aSVenki Rajagopalan int
eib_rsrc_setup_bufs(eib_t * ss,int * err)60*b494511aSVenki Rajagopalan eib_rsrc_setup_bufs(eib_t *ss, int *err)
61*b494511aSVenki Rajagopalan {
62*b494511aSVenki Rajagopalan 	if (eib_rsrc_setup_txbufs(ss, err) != EIB_E_SUCCESS)
63*b494511aSVenki Rajagopalan 		return (EIB_E_FAILURE);
64*b494511aSVenki Rajagopalan 
65*b494511aSVenki Rajagopalan 	if (ss->ei_caps->cp_lso_maxlen && ss->ei_caps->cp_cksum_flags &&
66*b494511aSVenki Rajagopalan 	    ss->ei_caps->cp_resv_lkey_capab) {
67*b494511aSVenki Rajagopalan 		if (eib_rsrc_setup_lsobufs(ss, err) != EIB_E_SUCCESS) {
68*b494511aSVenki Rajagopalan 			eib_rb_rsrc_setup_txbufs(ss, B_FALSE);
69*b494511aSVenki Rajagopalan 			return (EIB_E_FAILURE);
70*b494511aSVenki Rajagopalan 		}
71*b494511aSVenki Rajagopalan 	}
72*b494511aSVenki Rajagopalan 
73*b494511aSVenki Rajagopalan 	if (eib_rsrc_setup_rxbufs(ss, err) != EIB_E_SUCCESS) {
74*b494511aSVenki Rajagopalan 		eib_rb_rsrc_setup_lsobufs(ss, B_FALSE);
75*b494511aSVenki Rajagopalan 		eib_rb_rsrc_setup_txbufs(ss, B_FALSE);
76*b494511aSVenki Rajagopalan 		return (EIB_E_FAILURE);
77*b494511aSVenki Rajagopalan 	}
78*b494511aSVenki Rajagopalan 
79*b494511aSVenki Rajagopalan 	return (EIB_E_SUCCESS);
80*b494511aSVenki Rajagopalan }
81*b494511aSVenki Rajagopalan 
82*b494511aSVenki Rajagopalan int
eib_rsrc_grab_swqes(eib_t * ss,eib_wqe_t ** wqes,uint_t n_req,uint_t * actual,int pri)83*b494511aSVenki Rajagopalan eib_rsrc_grab_swqes(eib_t *ss, eib_wqe_t **wqes, uint_t n_req, uint_t *actual,
84*b494511aSVenki Rajagopalan     int pri)
85*b494511aSVenki Rajagopalan {
86*b494511aSVenki Rajagopalan 	eib_wqe_t *wqe;
87*b494511aSVenki Rajagopalan 	uint32_t *encap_hdr;
88*b494511aSVenki Rajagopalan 	int ret;
89*b494511aSVenki Rajagopalan 	int i;
90*b494511aSVenki Rajagopalan 
91*b494511aSVenki Rajagopalan 	ASSERT(ss->ei_tx != NULL);
92*b494511aSVenki Rajagopalan 
93*b494511aSVenki Rajagopalan 	ret = eib_rsrc_grab_wqes(ss, ss->ei_tx, wqes, n_req, actual, pri);
94*b494511aSVenki Rajagopalan 	if (ret != EIB_E_SUCCESS)
95*b494511aSVenki Rajagopalan 		return (EIB_E_FAILURE);
96*b494511aSVenki Rajagopalan 
97*b494511aSVenki Rajagopalan 	/*
98*b494511aSVenki Rajagopalan 	 * See note for eib_rsrc_grab_swqe()
99*b494511aSVenki Rajagopalan 	 */
100*b494511aSVenki Rajagopalan 	for (i = 0; i < (*actual); i++) {
101*b494511aSVenki Rajagopalan 		wqe = wqes[i];
102*b494511aSVenki Rajagopalan 		wqe->qe_wr.send.wr_flags = IBT_WR_NO_FLAGS;
103*b494511aSVenki Rajagopalan 		wqe->qe_wr.send.wr.ud.udwr_dest = wqe->qe_dest;
104*b494511aSVenki Rajagopalan 		wqe->qe_wr.send.wr_opcode = IBT_WRC_SEND;
105*b494511aSVenki Rajagopalan 		wqe->qe_wr.send.wr_nds = 1;
106*b494511aSVenki Rajagopalan 		wqe->qe_wr.send.wr_sgl = &wqe->qe_sgl;
107*b494511aSVenki Rajagopalan 		wqe->qe_nxt_post = NULL;
108*b494511aSVenki Rajagopalan 		wqe->qe_iov_hdl = NULL;
109*b494511aSVenki Rajagopalan 
110*b494511aSVenki Rajagopalan 		encap_hdr = (uint32_t *)(void *)wqe->qe_payload_hdr;
111*b494511aSVenki Rajagopalan 		*encap_hdr = htonl(EIB_TX_ENCAP_HDR);
112*b494511aSVenki Rajagopalan 	}
113*b494511aSVenki Rajagopalan 
114*b494511aSVenki Rajagopalan 	return (EIB_E_SUCCESS);
115*b494511aSVenki Rajagopalan }
116*b494511aSVenki Rajagopalan 
117*b494511aSVenki Rajagopalan int
eib_rsrc_grab_rwqes(eib_t * ss,eib_wqe_t ** wqes,uint_t n_req,uint_t * actual,int pri)118*b494511aSVenki Rajagopalan eib_rsrc_grab_rwqes(eib_t *ss, eib_wqe_t **wqes, uint_t n_req, uint_t *actual,
119*b494511aSVenki Rajagopalan     int pri)
120*b494511aSVenki Rajagopalan {
121*b494511aSVenki Rajagopalan 	ASSERT(ss->ei_rx != NULL);
122*b494511aSVenki Rajagopalan 
123*b494511aSVenki Rajagopalan 	return (eib_rsrc_grab_wqes(ss, ss->ei_rx, wqes, n_req, actual, pri));
124*b494511aSVenki Rajagopalan }
125*b494511aSVenki Rajagopalan 
126*b494511aSVenki Rajagopalan int
eib_rsrc_grab_lsobufs(eib_t * ss,uint_t req_sz,ibt_wr_ds_t * sgl,uint32_t * nds)127*b494511aSVenki Rajagopalan eib_rsrc_grab_lsobufs(eib_t *ss, uint_t req_sz, ibt_wr_ds_t *sgl, uint32_t *nds)
128*b494511aSVenki Rajagopalan {
129*b494511aSVenki Rajagopalan 	eib_lsobkt_t *bkt = ss->ei_lso;
130*b494511aSVenki Rajagopalan 	eib_lsobuf_t *elem;
131*b494511aSVenki Rajagopalan 	eib_lsobuf_t *nxt;
132*b494511aSVenki Rajagopalan 	uint_t frag_sz;
133*b494511aSVenki Rajagopalan 	uint_t num_needed;
134*b494511aSVenki Rajagopalan 	int i;
135*b494511aSVenki Rajagopalan 
136*b494511aSVenki Rajagopalan 	ASSERT(req_sz != 0);
137*b494511aSVenki Rajagopalan 	ASSERT(sgl != NULL);
138*b494511aSVenki Rajagopalan 	ASSERT(nds != NULL);
139*b494511aSVenki Rajagopalan 
140*b494511aSVenki Rajagopalan 	/*
141*b494511aSVenki Rajagopalan 	 * Determine how many bufs we'd need for the size requested
142*b494511aSVenki Rajagopalan 	 */
143*b494511aSVenki Rajagopalan 	num_needed = req_sz / EIB_LSO_BUFSZ;
144*b494511aSVenki Rajagopalan 	if ((frag_sz = req_sz % EIB_LSO_BUFSZ) != 0)
145*b494511aSVenki Rajagopalan 		num_needed++;
146*b494511aSVenki Rajagopalan 
147*b494511aSVenki Rajagopalan 	if (bkt == NULL)
148*b494511aSVenki Rajagopalan 		return (EIB_E_FAILURE);
149*b494511aSVenki Rajagopalan 
150*b494511aSVenki Rajagopalan 	/*
151*b494511aSVenki Rajagopalan 	 * If we don't have enough lso bufs, return failure
152*b494511aSVenki Rajagopalan 	 */
153*b494511aSVenki Rajagopalan 	mutex_enter(&bkt->bk_lock);
154*b494511aSVenki Rajagopalan 	if (bkt->bk_nfree < num_needed) {
155*b494511aSVenki Rajagopalan 		mutex_exit(&bkt->bk_lock);
156*b494511aSVenki Rajagopalan 		return (EIB_E_FAILURE);
157*b494511aSVenki Rajagopalan 	}
158*b494511aSVenki Rajagopalan 
159*b494511aSVenki Rajagopalan 	/*
160*b494511aSVenki Rajagopalan 	 * Pick the first "num_needed" bufs from the free list
161*b494511aSVenki Rajagopalan 	 */
162*b494511aSVenki Rajagopalan 	elem = bkt->bk_free_head;
163*b494511aSVenki Rajagopalan 	for (i = 0; i < num_needed; i++) {
164*b494511aSVenki Rajagopalan 		ASSERT(elem->lb_isfree != 0);
165*b494511aSVenki Rajagopalan 		ASSERT(elem->lb_buf != NULL);
166*b494511aSVenki Rajagopalan 
167*b494511aSVenki Rajagopalan 		nxt = elem->lb_next;
168*b494511aSVenki Rajagopalan 
169*b494511aSVenki Rajagopalan 		sgl[i].ds_va = (ib_vaddr_t)(uintptr_t)elem->lb_buf;
170*b494511aSVenki Rajagopalan 		sgl[i].ds_key = bkt->bk_lkey;
171*b494511aSVenki Rajagopalan 		sgl[i].ds_len = EIB_LSO_BUFSZ;
172*b494511aSVenki Rajagopalan 
173*b494511aSVenki Rajagopalan 		elem->lb_isfree = 0;
174*b494511aSVenki Rajagopalan 		elem->lb_next = NULL;
175*b494511aSVenki Rajagopalan 
176*b494511aSVenki Rajagopalan 		elem = nxt;
177*b494511aSVenki Rajagopalan 	}
178*b494511aSVenki Rajagopalan 	bkt->bk_free_head = elem;
179*b494511aSVenki Rajagopalan 
180*b494511aSVenki Rajagopalan 	/*
181*b494511aSVenki Rajagopalan 	 * If the requested size is not a multiple of EIB_LSO_BUFSZ, we need
182*b494511aSVenki Rajagopalan 	 * to adjust the last sgl entry's length. Since we know we need atleast
183*b494511aSVenki Rajagopalan 	 * one, the i-1 use below is ok.
184*b494511aSVenki Rajagopalan 	 */
185*b494511aSVenki Rajagopalan 	if (frag_sz) {
186*b494511aSVenki Rajagopalan 		sgl[i-1].ds_len = frag_sz;
187*b494511aSVenki Rajagopalan 	}
188*b494511aSVenki Rajagopalan 
189*b494511aSVenki Rajagopalan 	/*
190*b494511aSVenki Rajagopalan 	 * Update nfree count and return
191*b494511aSVenki Rajagopalan 	 */
192*b494511aSVenki Rajagopalan 	bkt->bk_nfree -= num_needed;
193*b494511aSVenki Rajagopalan 
194*b494511aSVenki Rajagopalan 	mutex_exit(&bkt->bk_lock);
195*b494511aSVenki Rajagopalan 
196*b494511aSVenki Rajagopalan 	*nds = num_needed;
197*b494511aSVenki Rajagopalan 
198*b494511aSVenki Rajagopalan 	return (EIB_E_SUCCESS);
199*b494511aSVenki Rajagopalan }
200*b494511aSVenki Rajagopalan 
201*b494511aSVenki Rajagopalan eib_wqe_t *
eib_rsrc_grab_swqe(eib_t * ss,int pri)202*b494511aSVenki Rajagopalan eib_rsrc_grab_swqe(eib_t *ss, int pri)
203*b494511aSVenki Rajagopalan {
204*b494511aSVenki Rajagopalan 	eib_wqe_t *wqe = NULL;
205*b494511aSVenki Rajagopalan 	uint32_t *encap_hdr;
206*b494511aSVenki Rajagopalan 
207*b494511aSVenki Rajagopalan 	ASSERT(ss->ei_tx != NULL);
208*b494511aSVenki Rajagopalan 	(void) eib_rsrc_grab_wqes(ss, ss->ei_tx, &wqe, 1, NULL, pri);
209*b494511aSVenki Rajagopalan 
210*b494511aSVenki Rajagopalan 	/*
211*b494511aSVenki Rajagopalan 	 * Let's reset the swqe basic wr parameters to default. We need
212*b494511aSVenki Rajagopalan 	 * to do this because this swqe could've previously been used
213*b494511aSVenki Rajagopalan 	 * for a checksum offload (when the flags would've been set)
214*b494511aSVenki Rajagopalan 	 * or for an LSO send (in which case the opcode would've been set
215*b494511aSVenki Rajagopalan 	 * to a different value), or been iov mapped (in which case the
216*b494511aSVenki Rajagopalan 	 * sgl/nds could've been set to different values).  We'll make
217*b494511aSVenki Rajagopalan 	 * it easy and initialize it here, so simple transactions can
218*b494511aSVenki Rajagopalan 	 * go through without any special effort by the caller.
219*b494511aSVenki Rajagopalan 	 *
220*b494511aSVenki Rajagopalan 	 * Note that even though the wqe structure is common for both
221*b494511aSVenki Rajagopalan 	 * send and recv, they're in two independent pools and the wqe
222*b494511aSVenki Rajagopalan 	 * type remains the same throughout its lifetime. So we don't
223*b494511aSVenki Rajagopalan 	 * have to worry about resetting any other field.
224*b494511aSVenki Rajagopalan 	 */
225*b494511aSVenki Rajagopalan 	if (wqe) {
226*b494511aSVenki Rajagopalan 		wqe->qe_wr.send.wr_flags = IBT_WR_NO_FLAGS;
227*b494511aSVenki Rajagopalan 		wqe->qe_wr.send.wr.ud.udwr_dest = wqe->qe_dest;
228*b494511aSVenki Rajagopalan 		wqe->qe_wr.send.wr_opcode = IBT_WRC_SEND;
229*b494511aSVenki Rajagopalan 		wqe->qe_wr.send.wr_nds = 1;
230*b494511aSVenki Rajagopalan 		wqe->qe_wr.send.wr_sgl = &wqe->qe_sgl;
231*b494511aSVenki Rajagopalan 		wqe->qe_nxt_post = NULL;
232*b494511aSVenki Rajagopalan 		wqe->qe_iov_hdl = NULL;
233*b494511aSVenki Rajagopalan 
234*b494511aSVenki Rajagopalan 		encap_hdr = (uint32_t *)(void *)wqe->qe_payload_hdr;
235*b494511aSVenki Rajagopalan 		*encap_hdr = htonl(EIB_TX_ENCAP_HDR);
236*b494511aSVenki Rajagopalan 	}
237*b494511aSVenki Rajagopalan 
238*b494511aSVenki Rajagopalan 	return (wqe);
239*b494511aSVenki Rajagopalan }
240*b494511aSVenki Rajagopalan 
241*b494511aSVenki Rajagopalan eib_wqe_t *
eib_rsrc_grab_rwqe(eib_t * ss,int pri)242*b494511aSVenki Rajagopalan eib_rsrc_grab_rwqe(eib_t *ss, int pri)
243*b494511aSVenki Rajagopalan {
244*b494511aSVenki Rajagopalan 	eib_wqe_t *wqe = NULL;
245*b494511aSVenki Rajagopalan 
246*b494511aSVenki Rajagopalan 	ASSERT(ss->ei_rx != NULL);
247*b494511aSVenki Rajagopalan 	(void) eib_rsrc_grab_wqes(ss, ss->ei_rx, &wqe, 1, NULL, pri);
248*b494511aSVenki Rajagopalan 
249*b494511aSVenki Rajagopalan 	return (wqe);
250*b494511aSVenki Rajagopalan }
251*b494511aSVenki Rajagopalan 
252*b494511aSVenki Rajagopalan void
eib_rsrc_return_swqe(eib_t * ss,eib_wqe_t * wqe,eib_chan_t * chan)253*b494511aSVenki Rajagopalan eib_rsrc_return_swqe(eib_t *ss, eib_wqe_t *wqe, eib_chan_t *chan)
254*b494511aSVenki Rajagopalan {
255*b494511aSVenki Rajagopalan 	ASSERT(ss->ei_tx != NULL);
256*b494511aSVenki Rajagopalan 
257*b494511aSVenki Rajagopalan 	eib_rsrc_return_wqes(ss, ss->ei_tx, &wqe, 1);
258*b494511aSVenki Rajagopalan 	if (chan) {
259*b494511aSVenki Rajagopalan 		eib_rsrc_decr_posted_swqe(ss, chan);
260*b494511aSVenki Rajagopalan 	}
261*b494511aSVenki Rajagopalan }
262*b494511aSVenki Rajagopalan 
263*b494511aSVenki Rajagopalan 
264*b494511aSVenki Rajagopalan void
eib_rsrc_return_rwqe(eib_t * ss,eib_wqe_t * wqe,eib_chan_t * chan)265*b494511aSVenki Rajagopalan eib_rsrc_return_rwqe(eib_t *ss, eib_wqe_t *wqe, eib_chan_t *chan)
266*b494511aSVenki Rajagopalan {
267*b494511aSVenki Rajagopalan 	ASSERT(ss->ei_rx != NULL);
268*b494511aSVenki Rajagopalan 
269*b494511aSVenki Rajagopalan 	eib_rsrc_return_wqes(ss, ss->ei_rx, &wqe, 1);
270*b494511aSVenki Rajagopalan 	if (chan) {
271*b494511aSVenki Rajagopalan 		eib_rsrc_decr_posted_rwqe(ss, chan);
272*b494511aSVenki Rajagopalan 	}
273*b494511aSVenki Rajagopalan }
274*b494511aSVenki Rajagopalan 
275*b494511aSVenki Rajagopalan void
eib_rsrc_return_lsobufs(eib_t * ss,ibt_wr_ds_t * sgl_p,uint32_t nds)276*b494511aSVenki Rajagopalan eib_rsrc_return_lsobufs(eib_t *ss, ibt_wr_ds_t *sgl_p, uint32_t nds)
277*b494511aSVenki Rajagopalan {
278*b494511aSVenki Rajagopalan 	eib_lsobkt_t *bkt = ss->ei_lso;
279*b494511aSVenki Rajagopalan 	eib_lsobuf_t *elem;
280*b494511aSVenki Rajagopalan 	uint8_t *va;
281*b494511aSVenki Rajagopalan 	ptrdiff_t ndx;
282*b494511aSVenki Rajagopalan 	int i;
283*b494511aSVenki Rajagopalan 
284*b494511aSVenki Rajagopalan 	/*
285*b494511aSVenki Rajagopalan 	 * Nowhere to return the buffers to ??
286*b494511aSVenki Rajagopalan 	 */
287*b494511aSVenki Rajagopalan 	if (bkt == NULL)
288*b494511aSVenki Rajagopalan 		return;
289*b494511aSVenki Rajagopalan 
290*b494511aSVenki Rajagopalan 	mutex_enter(&bkt->bk_lock);
291*b494511aSVenki Rajagopalan 
292*b494511aSVenki Rajagopalan 	for (i = 0; i < nds; i++) {
293*b494511aSVenki Rajagopalan 		va = (uint8_t *)(uintptr_t)sgl_p[i].ds_va;
294*b494511aSVenki Rajagopalan 
295*b494511aSVenki Rajagopalan 		ASSERT(va >= bkt->bk_mem);
296*b494511aSVenki Rajagopalan 		ASSERT(va < (bkt->bk_mem + bkt->bk_nelem * EIB_LSO_BUFSZ));
297*b494511aSVenki Rajagopalan 
298*b494511aSVenki Rajagopalan 		/*
299*b494511aSVenki Rajagopalan 		 * Figure out the buflist element this sgl buffer corresponds
300*b494511aSVenki Rajagopalan 		 * to and put it back at the head
301*b494511aSVenki Rajagopalan 		 */
302*b494511aSVenki Rajagopalan 		ndx = ((uintptr_t)va - (uintptr_t)bkt->bk_mem) / EIB_LSO_BUFSZ;
303*b494511aSVenki Rajagopalan 		elem = bkt->bk_bufl + ndx;
304*b494511aSVenki Rajagopalan 
305*b494511aSVenki Rajagopalan 		ASSERT(elem->lb_isfree == 0);
306*b494511aSVenki Rajagopalan 		ASSERT(elem->lb_buf == va);
307*b494511aSVenki Rajagopalan 
308*b494511aSVenki Rajagopalan 		elem->lb_isfree = 1;
309*b494511aSVenki Rajagopalan 		elem->lb_next = bkt->bk_free_head;
310*b494511aSVenki Rajagopalan 		bkt->bk_free_head = elem;
311*b494511aSVenki Rajagopalan 	}
312*b494511aSVenki Rajagopalan 	bkt->bk_nfree += nds;
313*b494511aSVenki Rajagopalan 
314*b494511aSVenki Rajagopalan 	/*
315*b494511aSVenki Rajagopalan 	 * If the number of available lso buffers just crossed the
316*b494511aSVenki Rajagopalan 	 * threshold, wakeup anyone who may be sleeping on the event.
317*b494511aSVenki Rajagopalan 	 */
318*b494511aSVenki Rajagopalan 	if (((bkt->bk_nfree - nds) < EIB_LSO_FREE_BUFS_THRESH) &&
319*b494511aSVenki Rajagopalan 	    (bkt->bk_nfree >= EIB_LSO_FREE_BUFS_THRESH)) {
320*b494511aSVenki Rajagopalan 		cv_broadcast(&bkt->bk_cv);
321*b494511aSVenki Rajagopalan 	}
322*b494511aSVenki Rajagopalan 
323*b494511aSVenki Rajagopalan 	mutex_exit(&bkt->bk_lock);
324*b494511aSVenki Rajagopalan }
325*b494511aSVenki Rajagopalan 
326*b494511aSVenki Rajagopalan /*ARGSUSED*/
327*b494511aSVenki Rajagopalan void
eib_rsrc_decr_posted_swqe(eib_t * ss,eib_chan_t * chan)328*b494511aSVenki Rajagopalan eib_rsrc_decr_posted_swqe(eib_t *ss, eib_chan_t *chan)
329*b494511aSVenki Rajagopalan {
330*b494511aSVenki Rajagopalan 	ASSERT(chan != NULL);
331*b494511aSVenki Rajagopalan 
332*b494511aSVenki Rajagopalan 	mutex_enter(&chan->ch_tx_lock);
333*b494511aSVenki Rajagopalan 
334*b494511aSVenki Rajagopalan 	chan->ch_tx_posted--;
335*b494511aSVenki Rajagopalan 	if ((chan->ch_tear_down) && (chan->ch_tx_posted == 0)) {
336*b494511aSVenki Rajagopalan 		cv_signal(&chan->ch_tx_cv);
337*b494511aSVenki Rajagopalan 	}
338*b494511aSVenki Rajagopalan 
339*b494511aSVenki Rajagopalan 	mutex_exit(&chan->ch_tx_lock);
340*b494511aSVenki Rajagopalan }
341*b494511aSVenki Rajagopalan 
342*b494511aSVenki Rajagopalan void
eib_rsrc_decr_posted_rwqe(eib_t * ss,eib_chan_t * chan)343*b494511aSVenki Rajagopalan eib_rsrc_decr_posted_rwqe(eib_t *ss, eib_chan_t *chan)
344*b494511aSVenki Rajagopalan {
345*b494511aSVenki Rajagopalan 	eib_chan_t *tail;
346*b494511aSVenki Rajagopalan 	boolean_t queue_for_refill = B_FALSE;
347*b494511aSVenki Rajagopalan 
348*b494511aSVenki Rajagopalan 	ASSERT(chan != NULL);
349*b494511aSVenki Rajagopalan 
350*b494511aSVenki Rajagopalan 	/*
351*b494511aSVenki Rajagopalan 	 * Decrement the ch_rx_posted count. If we are tearing this channel
352*b494511aSVenki Rajagopalan 	 * down, signal the waiter when the count reaches 0.  If we aren't
353*b494511aSVenki Rajagopalan 	 * tearing the channel down, see if the count has gone below the low
354*b494511aSVenki Rajagopalan 	 * water mark.  If it has, and if this channel isn't already being
355*b494511aSVenki Rajagopalan 	 * refilled, queue the channel up with the service thread for a
356*b494511aSVenki Rajagopalan 	 * rwqe refill.
357*b494511aSVenki Rajagopalan 	 */
358*b494511aSVenki Rajagopalan 	mutex_enter(&chan->ch_rx_lock);
359*b494511aSVenki Rajagopalan 	chan->ch_rx_posted--;
360*b494511aSVenki Rajagopalan 	if (chan->ch_tear_down) {
361*b494511aSVenki Rajagopalan 		if (chan->ch_rx_posted == 0)
362*b494511aSVenki Rajagopalan 			cv_signal(&chan->ch_rx_cv);
363*b494511aSVenki Rajagopalan 	} else if (chan->ch_rx_posted < chan->ch_lwm_rwqes) {
364*b494511aSVenki Rajagopalan 		if (chan->ch_rx_refilling == B_FALSE) {
365*b494511aSVenki Rajagopalan 			chan->ch_rx_refilling = B_TRUE;
366*b494511aSVenki Rajagopalan 			queue_for_refill = B_TRUE;
367*b494511aSVenki Rajagopalan 		}
368*b494511aSVenki Rajagopalan 	}
369*b494511aSVenki Rajagopalan 	mutex_exit(&chan->ch_rx_lock);
370*b494511aSVenki Rajagopalan 
371*b494511aSVenki Rajagopalan 	if (queue_for_refill) {
372*b494511aSVenki Rajagopalan 		mutex_enter(&ss->ei_rxpost_lock);
373*b494511aSVenki Rajagopalan 
374*b494511aSVenki Rajagopalan 		chan->ch_rxpost_next = NULL;
375*b494511aSVenki Rajagopalan 		for (tail = ss->ei_rxpost; tail; tail = tail->ch_rxpost_next) {
376*b494511aSVenki Rajagopalan 			if (tail->ch_rxpost_next == NULL)
377*b494511aSVenki Rajagopalan 				break;
378*b494511aSVenki Rajagopalan 		}
379*b494511aSVenki Rajagopalan 		if (tail) {
380*b494511aSVenki Rajagopalan 			tail->ch_rxpost_next = chan;
381*b494511aSVenki Rajagopalan 		} else {
382*b494511aSVenki Rajagopalan 			ss->ei_rxpost = chan;
383*b494511aSVenki Rajagopalan 		}
384*b494511aSVenki Rajagopalan 
385*b494511aSVenki Rajagopalan 		cv_signal(&ss->ei_rxpost_cv);
386*b494511aSVenki Rajagopalan 		mutex_exit(&ss->ei_rxpost_lock);
387*b494511aSVenki Rajagopalan 	}
388*b494511aSVenki Rajagopalan }
389*b494511aSVenki Rajagopalan 
390*b494511aSVenki Rajagopalan void
eib_rsrc_txwqes_needed(eib_t * ss)391*b494511aSVenki Rajagopalan eib_rsrc_txwqes_needed(eib_t *ss)
392*b494511aSVenki Rajagopalan {
393*b494511aSVenki Rajagopalan 	eib_wqe_pool_t *wp = ss->ei_tx;
394*b494511aSVenki Rajagopalan 
395*b494511aSVenki Rajagopalan 	EIB_INCR_COUNTER(&ss->ei_stats->st_noxmitbuf);
396*b494511aSVenki Rajagopalan 
397*b494511aSVenki Rajagopalan 	mutex_enter(&wp->wp_lock);
398*b494511aSVenki Rajagopalan 	if ((wp->wp_status & EIB_TXWQE_SHORT) == 0) {
399*b494511aSVenki Rajagopalan 		wp->wp_status |= EIB_TXWQE_SHORT;
400*b494511aSVenki Rajagopalan 		cv_broadcast(&wp->wp_cv);
401*b494511aSVenki Rajagopalan 	}
402*b494511aSVenki Rajagopalan 	mutex_exit(&wp->wp_lock);
403*b494511aSVenki Rajagopalan }
404*b494511aSVenki Rajagopalan 
405*b494511aSVenki Rajagopalan void
eib_rsrc_lsobufs_needed(eib_t * ss)406*b494511aSVenki Rajagopalan eib_rsrc_lsobufs_needed(eib_t *ss)
407*b494511aSVenki Rajagopalan {
408*b494511aSVenki Rajagopalan 	eib_lsobkt_t *bkt = ss->ei_lso;
409*b494511aSVenki Rajagopalan 
410*b494511aSVenki Rajagopalan 	EIB_INCR_COUNTER(&ss->ei_stats->st_noxmitbuf);
411*b494511aSVenki Rajagopalan 
412*b494511aSVenki Rajagopalan 	if (bkt == NULL) {
413*b494511aSVenki Rajagopalan 		EIB_DPRINTF_WARN(ss->ei_instance,
414*b494511aSVenki Rajagopalan 		    "eib_rsrc_lsobufs_needed: "
415*b494511aSVenki Rajagopalan 		    "lso bufs seem to be needed even though "
416*b494511aSVenki Rajagopalan 		    "LSO support was not advertised");
417*b494511aSVenki Rajagopalan 		return;
418*b494511aSVenki Rajagopalan 	}
419*b494511aSVenki Rajagopalan 
420*b494511aSVenki Rajagopalan 	mutex_enter(&bkt->bk_lock);
421*b494511aSVenki Rajagopalan 	if ((bkt->bk_status & EIB_LBUF_SHORT) == 0) {
422*b494511aSVenki Rajagopalan 		bkt->bk_status |= EIB_LBUF_SHORT;
423*b494511aSVenki Rajagopalan 		cv_broadcast(&bkt->bk_cv);
424*b494511aSVenki Rajagopalan 	}
425*b494511aSVenki Rajagopalan 	mutex_exit(&bkt->bk_lock);
426*b494511aSVenki Rajagopalan }
427*b494511aSVenki Rajagopalan 
428*b494511aSVenki Rajagopalan boolean_t
eib_rsrc_rxpool_low(eib_wqe_t * wqe)429*b494511aSVenki Rajagopalan eib_rsrc_rxpool_low(eib_wqe_t *wqe)
430*b494511aSVenki Rajagopalan {
431*b494511aSVenki Rajagopalan 	eib_wqe_pool_t *wp = wqe->qe_pool;
432*b494511aSVenki Rajagopalan 	boolean_t ret = B_FALSE;
433*b494511aSVenki Rajagopalan 
434*b494511aSVenki Rajagopalan 	/*
435*b494511aSVenki Rajagopalan 	 * Set the EIB_RXWQE_SHORT flag when the number of free wqes
436*b494511aSVenki Rajagopalan 	 * in the rx pool falls below the low threshold for rwqes and
437*b494511aSVenki Rajagopalan 	 * clear it only when the number of free wqes gets back above
438*b494511aSVenki Rajagopalan 	 * the high water mark.
439*b494511aSVenki Rajagopalan 	 */
440*b494511aSVenki Rajagopalan 	mutex_enter(&wp->wp_lock);
441*b494511aSVenki Rajagopalan 
442*b494511aSVenki Rajagopalan 	if (wp->wp_nfree <= EIB_NFREE_RWQES_LOW) {
443*b494511aSVenki Rajagopalan 		wp->wp_status |= (EIB_RXWQE_SHORT);
444*b494511aSVenki Rajagopalan 	} else if (wp->wp_nfree >= EIB_NFREE_RWQES_HWM) {
445*b494511aSVenki Rajagopalan 		wp->wp_status &= (~EIB_RXWQE_SHORT);
446*b494511aSVenki Rajagopalan 	}
447*b494511aSVenki Rajagopalan 
448*b494511aSVenki Rajagopalan 	if ((wp->wp_status & EIB_RXWQE_SHORT) == EIB_RXWQE_SHORT)
449*b494511aSVenki Rajagopalan 		ret = B_TRUE;
450*b494511aSVenki Rajagopalan 
451*b494511aSVenki Rajagopalan 	mutex_exit(&wp->wp_lock);
452*b494511aSVenki Rajagopalan 
453*b494511aSVenki Rajagopalan 	return (ret);
454*b494511aSVenki Rajagopalan }
455*b494511aSVenki Rajagopalan 
456*b494511aSVenki Rajagopalan void
eib_rb_rsrc_setup_bufs(eib_t * ss,boolean_t force)457*b494511aSVenki Rajagopalan eib_rb_rsrc_setup_bufs(eib_t *ss, boolean_t force)
458*b494511aSVenki Rajagopalan {
459*b494511aSVenki Rajagopalan 	eib_rb_rsrc_setup_rxbufs(ss, force);
460*b494511aSVenki Rajagopalan 	eib_rb_rsrc_setup_lsobufs(ss, force);
461*b494511aSVenki Rajagopalan 	eib_rb_rsrc_setup_txbufs(ss, force);
462*b494511aSVenki Rajagopalan }
463*b494511aSVenki Rajagopalan 
464*b494511aSVenki Rajagopalan static int
eib_rsrc_setup_txbufs(eib_t * ss,int * err)465*b494511aSVenki Rajagopalan eib_rsrc_setup_txbufs(eib_t *ss, int *err)
466*b494511aSVenki Rajagopalan {
467*b494511aSVenki Rajagopalan 	eib_wqe_pool_t *tx;
468*b494511aSVenki Rajagopalan 	eib_wqe_t *wqe;
469*b494511aSVenki Rajagopalan 	ibt_ud_dest_hdl_t dest;
470*b494511aSVenki Rajagopalan 	ibt_mr_attr_t attr;
471*b494511aSVenki Rajagopalan 	ibt_mr_desc_t desc;
472*b494511aSVenki Rajagopalan 	ibt_status_t ret;
473*b494511aSVenki Rajagopalan 	kthread_t *kt;
474*b494511aSVenki Rajagopalan 	uint32_t *encap_hdr;
475*b494511aSVenki Rajagopalan 	uint8_t	*buf;
476*b494511aSVenki Rajagopalan 	uint_t mtu = ss->ei_props->ep_mtu;
477*b494511aSVenki Rajagopalan 	uint_t tx_bufsz;
478*b494511aSVenki Rajagopalan 	uint_t blk;
479*b494511aSVenki Rajagopalan 	uint_t ndx;
480*b494511aSVenki Rajagopalan 	uint_t i;
481*b494511aSVenki Rajagopalan 	int lso_enabled;
482*b494511aSVenki Rajagopalan 
483*b494511aSVenki Rajagopalan 	/*
484*b494511aSVenki Rajagopalan 	 * Try to allocate and initialize the tx wqe pool
485*b494511aSVenki Rajagopalan 	 */
486*b494511aSVenki Rajagopalan 	if (ss->ei_tx != NULL)
487*b494511aSVenki Rajagopalan 		return (EIB_E_SUCCESS);
488*b494511aSVenki Rajagopalan 
489*b494511aSVenki Rajagopalan 	/*
490*b494511aSVenki Rajagopalan 	 * If we keep the tx buffers as mtu-sized, then potentially every
491*b494511aSVenki Rajagopalan 	 * LSO request that cannot be satisfactorily mapped, will use up
492*b494511aSVenki Rajagopalan 	 * the 8K large (default size) lso buffers. This may be inadvisable
493*b494511aSVenki Rajagopalan 	 * given that lso buffers are a scarce resource.  Instead, we'll
494*b494511aSVenki Rajagopalan 	 * slightly raise the size of the copy buffers in the send wqes
495*b494511aSVenki Rajagopalan 	 * (say to EIB_TX_COPY_THRESH) so that requests that cannot be
496*b494511aSVenki Rajagopalan 	 * mapped could still avoid using the 8K LSO buffers if they're
497*b494511aSVenki Rajagopalan 	 * less than the copy threshold size.
498*b494511aSVenki Rajagopalan 	 */
499*b494511aSVenki Rajagopalan 	lso_enabled = ss->ei_caps->cp_lso_maxlen &&
500*b494511aSVenki Rajagopalan 	    ss->ei_caps->cp_cksum_flags && ss->ei_caps->cp_resv_lkey_capab;
501*b494511aSVenki Rajagopalan 	tx_bufsz = ((lso_enabled) && (EIB_TX_COPY_THRESH > mtu)) ?
502*b494511aSVenki Rajagopalan 	    EIB_TX_COPY_THRESH : mtu;
503*b494511aSVenki Rajagopalan 
504*b494511aSVenki Rajagopalan 	eib_rsrc_init_wqe_pool(ss, &ss->ei_tx, tx_bufsz, EIB_WP_TYPE_TX);
505*b494511aSVenki Rajagopalan 	tx = ss->ei_tx;
506*b494511aSVenki Rajagopalan 
507*b494511aSVenki Rajagopalan 	/*
508*b494511aSVenki Rajagopalan 	 * Register the TX memory region with IBTF for use
509*b494511aSVenki Rajagopalan 	 */
510*b494511aSVenki Rajagopalan 	attr.mr_vaddr = tx->wp_vaddr;
511*b494511aSVenki Rajagopalan 	attr.mr_len = tx->wp_memsz;
512*b494511aSVenki Rajagopalan 	attr.mr_as = NULL;
513*b494511aSVenki Rajagopalan 	attr.mr_flags = IBT_MR_SLEEP;
514*b494511aSVenki Rajagopalan 
515*b494511aSVenki Rajagopalan 	ret = ibt_register_mr(ss->ei_hca_hdl, ss->ei_pd_hdl, &attr,
516*b494511aSVenki Rajagopalan 	    &tx->wp_mr, &desc);
517*b494511aSVenki Rajagopalan 	if (ret != IBT_SUCCESS) {
518*b494511aSVenki Rajagopalan 		EIB_DPRINTF_ERR(ss->ei_instance, "eib_rsrc_setup_txbufs: "
519*b494511aSVenki Rajagopalan 		    "ibt_register_mr() failed for tx "
520*b494511aSVenki Rajagopalan 		    "region (0x%llx, 0x%llx) with ret=%d",
521*b494511aSVenki Rajagopalan 		    attr.mr_vaddr, attr.mr_len, ret);
522*b494511aSVenki Rajagopalan 
523*b494511aSVenki Rajagopalan 		*err = EINVAL;
524*b494511aSVenki Rajagopalan 		goto rsrc_setup_txbufs_fail;
525*b494511aSVenki Rajagopalan 	}
526*b494511aSVenki Rajagopalan 	tx->wp_lkey = desc.md_lkey;
527*b494511aSVenki Rajagopalan 
528*b494511aSVenki Rajagopalan 	/*
529*b494511aSVenki Rajagopalan 	 * Now setup the tx wqes
530*b494511aSVenki Rajagopalan 	 */
531*b494511aSVenki Rajagopalan 	buf = (uint8_t *)(uintptr_t)(tx->wp_vaddr);
532*b494511aSVenki Rajagopalan 	for (i = 0, blk = 0; blk < EIB_BLKS_PER_POOL; blk++) {
533*b494511aSVenki Rajagopalan 		for (ndx = 0; ndx < EIB_WQES_PER_BLK; ndx++, i++) {
534*b494511aSVenki Rajagopalan 			wqe = &tx->wp_wqe[i];
535*b494511aSVenki Rajagopalan 			/*
536*b494511aSVenki Rajagopalan 			 * Allocate a UD destination handle
537*b494511aSVenki Rajagopalan 			 */
538*b494511aSVenki Rajagopalan 			ret = ibt_alloc_ud_dest(ss->ei_hca_hdl,
539*b494511aSVenki Rajagopalan 			    IBT_UD_DEST_NO_FLAGS, ss->ei_pd_hdl, &dest);
540*b494511aSVenki Rajagopalan 			if (ret != IBT_SUCCESS) {
541*b494511aSVenki Rajagopalan 				EIB_DPRINTF_ERR(ss->ei_instance,
542*b494511aSVenki Rajagopalan 				    "eib_rsrc_setup_txbufs: "
543*b494511aSVenki Rajagopalan 				    "ibt_alloc_ud_dest(hca_hdl=0x%llx) "
544*b494511aSVenki Rajagopalan 				    "failed, ret=%d", ss->ei_hca_hdl, ret);
545*b494511aSVenki Rajagopalan 
546*b494511aSVenki Rajagopalan 				*err = ENOMEM;
547*b494511aSVenki Rajagopalan 				goto rsrc_setup_txbufs_fail;
548*b494511aSVenki Rajagopalan 			}
549*b494511aSVenki Rajagopalan 
550*b494511aSVenki Rajagopalan 			/*
551*b494511aSVenki Rajagopalan 			 * These parameters should remain fixed throughout the
552*b494511aSVenki Rajagopalan 			 * lifetime of this wqe.
553*b494511aSVenki Rajagopalan 			 */
554*b494511aSVenki Rajagopalan 			wqe->qe_pool = tx;
555*b494511aSVenki Rajagopalan 			wqe->qe_cpbuf = buf;
556*b494511aSVenki Rajagopalan 			wqe->qe_bufsz = tx_bufsz;
557*b494511aSVenki Rajagopalan 
558*b494511aSVenki Rajagopalan 			/*
559*b494511aSVenki Rajagopalan 			 * The qe_dest and qe_payload_hdr are specific to tx
560*b494511aSVenki Rajagopalan 			 * only, but remain unchanged throughout the lifetime
561*b494511aSVenki Rajagopalan 			 * of the wqe.
562*b494511aSVenki Rajagopalan 			 *
563*b494511aSVenki Rajagopalan 			 * The payload header is normally used when we have an
564*b494511aSVenki Rajagopalan 			 * LSO packet to send.  Since the EoIB encapsulation
565*b494511aSVenki Rajagopalan 			 * header won't be part of the message we get from the
566*b494511aSVenki Rajagopalan 			 * network layer, we'll need to copy the lso header into
567*b494511aSVenki Rajagopalan 			 * a new buffer every time before we hand over the LSO
568*b494511aSVenki Rajagopalan 			 * send request to the hca driver.
569*b494511aSVenki Rajagopalan 			 */
570*b494511aSVenki Rajagopalan 			wqe->qe_dest = dest;
571*b494511aSVenki Rajagopalan 			wqe->qe_payload_hdr =
572*b494511aSVenki Rajagopalan 			    kmem_zalloc(EIB_MAX_PAYLOAD_HDR_SZ, KM_SLEEP);
573*b494511aSVenki Rajagopalan 
574*b494511aSVenki Rajagopalan 			/*
575*b494511aSVenki Rajagopalan 			 * The encapsulation header is at the start of the
576*b494511aSVenki Rajagopalan 			 * payload header and is initialized to the default
577*b494511aSVenki Rajagopalan 			 * encapsulation header we use (no multiple segments,
578*b494511aSVenki Rajagopalan 			 * no FCS). This part of the header is not expected
579*b494511aSVenki Rajagopalan 			 * to change.
580*b494511aSVenki Rajagopalan 			 */
581*b494511aSVenki Rajagopalan 			encap_hdr = (uint32_t *)(void *)wqe->qe_payload_hdr;
582*b494511aSVenki Rajagopalan 			*encap_hdr = htonl(EIB_TX_ENCAP_HDR);
583*b494511aSVenki Rajagopalan 
584*b494511aSVenki Rajagopalan 			/*
585*b494511aSVenki Rajagopalan 			 * The parameter set below are used in tx and rx paths.
586*b494511aSVenki Rajagopalan 			 * These parameters (except ds_key) are reset to these
587*b494511aSVenki Rajagopalan 			 * default values in eib_rsrc_return_wqes().
588*b494511aSVenki Rajagopalan 			 */
589*b494511aSVenki Rajagopalan 			wqe->qe_sgl.ds_key = tx->wp_lkey;
590*b494511aSVenki Rajagopalan 			wqe->qe_sgl.ds_va = (ib_vaddr_t)(uintptr_t)buf;
591*b494511aSVenki Rajagopalan 			wqe->qe_sgl.ds_len = wqe->qe_bufsz;
592*b494511aSVenki Rajagopalan 			wqe->qe_mp = NULL;
593*b494511aSVenki Rajagopalan 			wqe->qe_info =
594*b494511aSVenki Rajagopalan 			    ((blk & EIB_WQEBLK_MASK) << EIB_WQEBLK_SHIFT) |
595*b494511aSVenki Rajagopalan 			    ((ndx & EIB_WQENDX_MASK) << EIB_WQENDX_SHIFT) |
596*b494511aSVenki Rajagopalan 			    ((uint_t)EIB_WQE_TX << EIB_WQETYP_SHIFT);
597*b494511aSVenki Rajagopalan 
598*b494511aSVenki Rajagopalan 			/*
599*b494511aSVenki Rajagopalan 			 * These tx-specific parameters (except wr_id and
600*b494511aSVenki Rajagopalan 			 * wr_trans) are reset in eib_rsrc_grab_swqes() to make
601*b494511aSVenki Rajagopalan 			 * sure any freshly acquired swqe from the pool has
602*b494511aSVenki Rajagopalan 			 * these default settings for the caller.
603*b494511aSVenki Rajagopalan 			 */
604*b494511aSVenki Rajagopalan 			wqe->qe_wr.send.wr_id = (ibt_wrid_t)(uintptr_t)wqe;
605*b494511aSVenki Rajagopalan 			wqe->qe_wr.send.wr_trans = IBT_UD_SRV;
606*b494511aSVenki Rajagopalan 			wqe->qe_wr.send.wr_flags = IBT_WR_NO_FLAGS;
607*b494511aSVenki Rajagopalan 			wqe->qe_wr.send.wr.ud.udwr_dest = wqe->qe_dest;
608*b494511aSVenki Rajagopalan 			wqe->qe_wr.send.wr_opcode = IBT_WRC_SEND;
609*b494511aSVenki Rajagopalan 			wqe->qe_wr.send.wr_nds = 1;
610*b494511aSVenki Rajagopalan 			wqe->qe_wr.send.wr_sgl = &wqe->qe_sgl;
611*b494511aSVenki Rajagopalan 			wqe->qe_nxt_post = NULL;
612*b494511aSVenki Rajagopalan 			wqe->qe_iov_hdl = NULL;
613*b494511aSVenki Rajagopalan 
614*b494511aSVenki Rajagopalan 			buf += wqe->qe_bufsz;
615*b494511aSVenki Rajagopalan 		}
616*b494511aSVenki Rajagopalan 	}
617*b494511aSVenki Rajagopalan 
618*b494511aSVenki Rajagopalan 	/*
619*b494511aSVenki Rajagopalan 	 * Before returning, create a kernel thread to monitor the status
620*b494511aSVenki Rajagopalan 	 * of wqes in the tx wqe pool.  Note that this thread cannot be
621*b494511aSVenki Rajagopalan 	 * created from eib_state_init() during attach(), since the thread
622*b494511aSVenki Rajagopalan 	 * expects the wqe pool to be allocated and ready when it starts,
623*b494511aSVenki Rajagopalan 	 * and the tx bufs initialization only happens during eib_m_start().
624*b494511aSVenki Rajagopalan 	 */
625*b494511aSVenki Rajagopalan 	kt = thread_create(NULL, 0, eib_monitor_tx_wqes, ss, 0,
626*b494511aSVenki Rajagopalan 	    &p0, TS_RUN, minclsyspri);
627*b494511aSVenki Rajagopalan 	ss->ei_txwqe_monitor = kt->t_did;
628*b494511aSVenki Rajagopalan 
629*b494511aSVenki Rajagopalan 	return (EIB_E_SUCCESS);
630*b494511aSVenki Rajagopalan 
631*b494511aSVenki Rajagopalan rsrc_setup_txbufs_fail:
632*b494511aSVenki Rajagopalan 	eib_rb_rsrc_setup_txbufs(ss, B_FALSE);
633*b494511aSVenki Rajagopalan 	return (EIB_E_FAILURE);
634*b494511aSVenki Rajagopalan }
635*b494511aSVenki Rajagopalan 
636*b494511aSVenki Rajagopalan static int
eib_rsrc_setup_rxbufs(eib_t * ss,int * err)637*b494511aSVenki Rajagopalan eib_rsrc_setup_rxbufs(eib_t *ss, int *err)
638*b494511aSVenki Rajagopalan {
639*b494511aSVenki Rajagopalan 	eib_wqe_pool_t *rx;
640*b494511aSVenki Rajagopalan 	eib_wqe_t *wqe;
641*b494511aSVenki Rajagopalan 	ibt_mr_attr_t attr;
642*b494511aSVenki Rajagopalan 	ibt_mr_desc_t desc;
643*b494511aSVenki Rajagopalan 	ibt_status_t ret;
644*b494511aSVenki Rajagopalan 	uint8_t	*buf;
645*b494511aSVenki Rajagopalan 	uint_t mtu = ss->ei_props->ep_mtu;
646*b494511aSVenki Rajagopalan 	uint_t blk;
647*b494511aSVenki Rajagopalan 	uint_t ndx;
648*b494511aSVenki Rajagopalan 	uint_t i;
649*b494511aSVenki Rajagopalan 
650*b494511aSVenki Rajagopalan 	/*
651*b494511aSVenki Rajagopalan 	 * Try to allocate and initialize the wqe pool. When this is called
652*b494511aSVenki Rajagopalan 	 * during a plumb via the mac m_start callback, we need to make
653*b494511aSVenki Rajagopalan 	 * sure there is a need to allocate a wqe pool afresh.  If during a
654*b494511aSVenki Rajagopalan 	 * previous unplumb we didn't free the wqe pool because the nw layer
655*b494511aSVenki Rajagopalan 	 * was holding on to some rx buffers, we don't need to allocate new
656*b494511aSVenki Rajagopalan 	 * pool and set up the buffers again; we'll just start re-using the
657*b494511aSVenki Rajagopalan 	 * previous one.
658*b494511aSVenki Rajagopalan 	 */
659*b494511aSVenki Rajagopalan 	if (ss->ei_rx != NULL)
660*b494511aSVenki Rajagopalan 		return (EIB_E_SUCCESS);
661*b494511aSVenki Rajagopalan 
662*b494511aSVenki Rajagopalan 	/*
663*b494511aSVenki Rajagopalan 	 * The receive buffer has to work for all channels, specifically the
664*b494511aSVenki Rajagopalan 	 * data qp of the vnics.  This means that the buffer must be large
665*b494511aSVenki Rajagopalan 	 * enough to hold MTU sized IB payload (including the EoIB and ethernet
666*b494511aSVenki Rajagopalan 	 * headers) plus the GRH. In addition, because the ethernet header is
667*b494511aSVenki Rajagopalan 	 * either 14 or 18 bytes (tagless or vlan tagged), we should have the
668*b494511aSVenki Rajagopalan 	 * buffer filled in such a way that the IP header starts at atleast a
669*b494511aSVenki Rajagopalan 	 * 4-byte aligned address.  In order to do this, we need to have some
670*b494511aSVenki Rajagopalan 	 * additional room.
671*b494511aSVenki Rajagopalan 	 */
672*b494511aSVenki Rajagopalan 	eib_rsrc_init_wqe_pool(ss, &ss->ei_rx,
673*b494511aSVenki Rajagopalan 	    mtu + EIB_GRH_SZ + EIB_IPHDR_ALIGN_ROOM, EIB_WP_TYPE_RX);
674*b494511aSVenki Rajagopalan 	rx = ss->ei_rx;
675*b494511aSVenki Rajagopalan 
676*b494511aSVenki Rajagopalan 	/*
677*b494511aSVenki Rajagopalan 	 * Register the RX memory region with IBTF for use
678*b494511aSVenki Rajagopalan 	 */
679*b494511aSVenki Rajagopalan 	attr.mr_vaddr = rx->wp_vaddr;
680*b494511aSVenki Rajagopalan 	attr.mr_len = rx->wp_memsz;
681*b494511aSVenki Rajagopalan 	attr.mr_as = NULL;
682*b494511aSVenki Rajagopalan 	attr.mr_flags = IBT_MR_SLEEP | IBT_MR_ENABLE_LOCAL_WRITE;
683*b494511aSVenki Rajagopalan 
684*b494511aSVenki Rajagopalan 	ret = ibt_register_mr(ss->ei_hca_hdl, ss->ei_pd_hdl, &attr,
685*b494511aSVenki Rajagopalan 	    &rx->wp_mr, &desc);
686*b494511aSVenki Rajagopalan 	if (ret != IBT_SUCCESS) {
687*b494511aSVenki Rajagopalan 		EIB_DPRINTF_ERR(ss->ei_instance, "eib_rsrc_setup_rxbufs: "
688*b494511aSVenki Rajagopalan 		    "ibt_register_mr() failed for rx "
689*b494511aSVenki Rajagopalan 		    "region (0x%llx, 0x%llx) with ret=%d",
690*b494511aSVenki Rajagopalan 		    attr.mr_vaddr, attr.mr_len, ret);
691*b494511aSVenki Rajagopalan 
692*b494511aSVenki Rajagopalan 		*err = EINVAL;
693*b494511aSVenki Rajagopalan 		goto rsrc_setup_rxbufs_fail;
694*b494511aSVenki Rajagopalan 	}
695*b494511aSVenki Rajagopalan 	rx->wp_lkey = desc.md_lkey;
696*b494511aSVenki Rajagopalan 
697*b494511aSVenki Rajagopalan 	/*
698*b494511aSVenki Rajagopalan 	 * Now setup the rx wqes
699*b494511aSVenki Rajagopalan 	 */
700*b494511aSVenki Rajagopalan 	buf = (uint8_t *)(uintptr_t)(rx->wp_vaddr);
701*b494511aSVenki Rajagopalan 	for (i = 0, blk = 0; blk < EIB_BLKS_PER_POOL; blk++) {
702*b494511aSVenki Rajagopalan 		for (ndx = 0; ndx < EIB_WQES_PER_BLK; ndx++, i++) {
703*b494511aSVenki Rajagopalan 			wqe = &rx->wp_wqe[i];
704*b494511aSVenki Rajagopalan 
705*b494511aSVenki Rajagopalan 			/*
706*b494511aSVenki Rajagopalan 			 * These parameters should remain fixed throughout the
707*b494511aSVenki Rajagopalan 			 * lifetime of this recv wqe. The qe_frp will only be
708*b494511aSVenki Rajagopalan 			 * used by the data channel of vnics and will remain
709*b494511aSVenki Rajagopalan 			 * unused by other channels.
710*b494511aSVenki Rajagopalan 			 */
711*b494511aSVenki Rajagopalan 			wqe->qe_pool = rx;
712*b494511aSVenki Rajagopalan 			wqe->qe_cpbuf = buf;
713*b494511aSVenki Rajagopalan 			wqe->qe_bufsz = mtu + EIB_GRH_SZ + EIB_IPHDR_ALIGN_ROOM;
714*b494511aSVenki Rajagopalan 			wqe->qe_wr.recv.wr_id = (ibt_wrid_t)(uintptr_t)wqe;
715*b494511aSVenki Rajagopalan 			wqe->qe_wr.recv.wr_nds = 1;
716*b494511aSVenki Rajagopalan 			wqe->qe_wr.recv.wr_sgl = &wqe->qe_sgl;
717*b494511aSVenki Rajagopalan 			wqe->qe_frp.free_func = eib_data_rx_recycle;
718*b494511aSVenki Rajagopalan 			wqe->qe_frp.free_arg = (caddr_t)wqe;
719*b494511aSVenki Rajagopalan 
720*b494511aSVenki Rajagopalan 			/*
721*b494511aSVenki Rajagopalan 			 * The parameter set below are used in tx and rx paths.
722*b494511aSVenki Rajagopalan 			 * These parameters (except ds_key) are reset to these
723*b494511aSVenki Rajagopalan 			 * default values in eib_rsrc_return_wqes().
724*b494511aSVenki Rajagopalan 			 */
725*b494511aSVenki Rajagopalan 			wqe->qe_sgl.ds_key = rx->wp_lkey;
726*b494511aSVenki Rajagopalan 			wqe->qe_sgl.ds_va = (ib_vaddr_t)(uintptr_t)buf;
727*b494511aSVenki Rajagopalan 			wqe->qe_sgl.ds_len = wqe->qe_bufsz;
728*b494511aSVenki Rajagopalan 			wqe->qe_mp = NULL;
729*b494511aSVenki Rajagopalan 			wqe->qe_info =
730*b494511aSVenki Rajagopalan 			    ((blk & EIB_WQEBLK_MASK) << EIB_WQEBLK_SHIFT) |
731*b494511aSVenki Rajagopalan 			    ((ndx & EIB_WQENDX_MASK) << EIB_WQENDX_SHIFT) |
732*b494511aSVenki Rajagopalan 			    ((uint_t)EIB_WQE_RX << EIB_WQETYP_SHIFT);
733*b494511aSVenki Rajagopalan 
734*b494511aSVenki Rajagopalan 			/*
735*b494511aSVenki Rajagopalan 			 * These rx-specific parameters are also reset to
736*b494511aSVenki Rajagopalan 			 * these default values in eib_rsrc_return_wqes().
737*b494511aSVenki Rajagopalan 			 */
738*b494511aSVenki Rajagopalan 			wqe->qe_chan = NULL;
739*b494511aSVenki Rajagopalan 			wqe->qe_vnic_inst = -1;
740*b494511aSVenki Rajagopalan 
741*b494511aSVenki Rajagopalan 			buf += (mtu + EIB_GRH_SZ + EIB_IPHDR_ALIGN_ROOM);
742*b494511aSVenki Rajagopalan 		}
743*b494511aSVenki Rajagopalan 	}
744*b494511aSVenki Rajagopalan 
745*b494511aSVenki Rajagopalan 	return (EIB_E_SUCCESS);
746*b494511aSVenki Rajagopalan 
747*b494511aSVenki Rajagopalan rsrc_setup_rxbufs_fail:
748*b494511aSVenki Rajagopalan 	eib_rb_rsrc_setup_rxbufs(ss, B_FALSE);
749*b494511aSVenki Rajagopalan 	return (EIB_E_FAILURE);
750*b494511aSVenki Rajagopalan }
751*b494511aSVenki Rajagopalan 
752*b494511aSVenki Rajagopalan static int
eib_rsrc_setup_lsobufs(eib_t * ss,int * err)753*b494511aSVenki Rajagopalan eib_rsrc_setup_lsobufs(eib_t *ss, int *err)
754*b494511aSVenki Rajagopalan {
755*b494511aSVenki Rajagopalan 	eib_lsobkt_t *bkt;
756*b494511aSVenki Rajagopalan 	eib_lsobuf_t *elem;
757*b494511aSVenki Rajagopalan 	eib_lsobuf_t *tail;
758*b494511aSVenki Rajagopalan 	ibt_mr_attr_t attr;
759*b494511aSVenki Rajagopalan 	ibt_mr_desc_t desc;
760*b494511aSVenki Rajagopalan 	kthread_t *kt;
761*b494511aSVenki Rajagopalan 
762*b494511aSVenki Rajagopalan 	uint8_t *lsomem;
763*b494511aSVenki Rajagopalan 	uint8_t *memp;
764*b494511aSVenki Rajagopalan 	ibt_status_t ret;
765*b494511aSVenki Rajagopalan 	int i;
766*b494511aSVenki Rajagopalan 
767*b494511aSVenki Rajagopalan 	/*
768*b494511aSVenki Rajagopalan 	 * Allocate the lso bucket and space for buffers
769*b494511aSVenki Rajagopalan 	 */
770*b494511aSVenki Rajagopalan 	bkt = kmem_zalloc(sizeof (eib_lsobkt_t), KM_SLEEP);
771*b494511aSVenki Rajagopalan 	lsomem = kmem_zalloc(eib_lso_num_bufs * EIB_LSO_BUFSZ, KM_SLEEP);
772*b494511aSVenki Rajagopalan 
773*b494511aSVenki Rajagopalan 	/*
774*b494511aSVenki Rajagopalan 	 * Register lso memory and save the lkey
775*b494511aSVenki Rajagopalan 	 */
776*b494511aSVenki Rajagopalan 	attr.mr_vaddr = (uint64_t)(uintptr_t)lsomem;
777*b494511aSVenki Rajagopalan 	attr.mr_len = eib_lso_num_bufs * EIB_LSO_BUFSZ;
778*b494511aSVenki Rajagopalan 	attr.mr_as = NULL;
779*b494511aSVenki Rajagopalan 	attr.mr_flags = IBT_MR_SLEEP;
780*b494511aSVenki Rajagopalan 
781*b494511aSVenki Rajagopalan 	ret = ibt_register_mr(ss->ei_hca_hdl, ss->ei_pd_hdl, &attr,
782*b494511aSVenki Rajagopalan 	    &bkt->bk_mr_hdl, &desc);
783*b494511aSVenki Rajagopalan 	if (ret != IBT_SUCCESS) {
784*b494511aSVenki Rajagopalan 		*err = EINVAL;
785*b494511aSVenki Rajagopalan 		EIB_DPRINTF_ERR(ss->ei_instance, "eib_rsrc_setup_lsobufs: "
786*b494511aSVenki Rajagopalan 		    "ibt_register_mr() failed for LSO "
787*b494511aSVenki Rajagopalan 		    "region (0x%llx, 0x%llx) with ret=%d",
788*b494511aSVenki Rajagopalan 		    attr.mr_vaddr, attr.mr_len, ret);
789*b494511aSVenki Rajagopalan 
790*b494511aSVenki Rajagopalan 		kmem_free(lsomem, eib_lso_num_bufs * EIB_LSO_BUFSZ);
791*b494511aSVenki Rajagopalan 		kmem_free(bkt, sizeof (eib_lsobkt_t));
792*b494511aSVenki Rajagopalan 
793*b494511aSVenki Rajagopalan 		return (EIB_E_FAILURE);
794*b494511aSVenki Rajagopalan 	}
795*b494511aSVenki Rajagopalan 	bkt->bk_lkey = desc.md_lkey;
796*b494511aSVenki Rajagopalan 
797*b494511aSVenki Rajagopalan 	/*
798*b494511aSVenki Rajagopalan 	 * Now allocate the buflist.  Note that the elements in the buflist and
799*b494511aSVenki Rajagopalan 	 * the buffers in the lso memory have a permanent 1-1 relation, so we
800*b494511aSVenki Rajagopalan 	 * can always derive the address of a buflist entry from the address of
801*b494511aSVenki Rajagopalan 	 * an lso buffer.
802*b494511aSVenki Rajagopalan 	 */
803*b494511aSVenki Rajagopalan 	bkt->bk_bufl = kmem_zalloc(eib_lso_num_bufs * sizeof (eib_lsobuf_t),
804*b494511aSVenki Rajagopalan 	    KM_SLEEP);
805*b494511aSVenki Rajagopalan 
806*b494511aSVenki Rajagopalan 	/*
807*b494511aSVenki Rajagopalan 	 * Set up the lso buf chain
808*b494511aSVenki Rajagopalan 	 */
809*b494511aSVenki Rajagopalan 	memp = lsomem;
810*b494511aSVenki Rajagopalan 	elem = bkt->bk_bufl;
811*b494511aSVenki Rajagopalan 	for (i = 0; i < eib_lso_num_bufs; i++) {
812*b494511aSVenki Rajagopalan 		elem->lb_isfree = 1;
813*b494511aSVenki Rajagopalan 		elem->lb_buf = memp;
814*b494511aSVenki Rajagopalan 		elem->lb_next = elem + 1;
815*b494511aSVenki Rajagopalan 
816*b494511aSVenki Rajagopalan 		tail = elem;
817*b494511aSVenki Rajagopalan 
818*b494511aSVenki Rajagopalan 		memp += EIB_LSO_BUFSZ;
819*b494511aSVenki Rajagopalan 		elem++;
820*b494511aSVenki Rajagopalan 	}
821*b494511aSVenki Rajagopalan 	tail->lb_next = NULL;
822*b494511aSVenki Rajagopalan 
823*b494511aSVenki Rajagopalan 	/*
824*b494511aSVenki Rajagopalan 	 * Set up the LSO buffer information in eib state
825*b494511aSVenki Rajagopalan 	 */
826*b494511aSVenki Rajagopalan 	bkt->bk_free_head = bkt->bk_bufl;
827*b494511aSVenki Rajagopalan 	bkt->bk_mem = lsomem;
828*b494511aSVenki Rajagopalan 	bkt->bk_nelem = eib_lso_num_bufs;
829*b494511aSVenki Rajagopalan 	bkt->bk_nfree = bkt->bk_nelem;
830*b494511aSVenki Rajagopalan 
831*b494511aSVenki Rajagopalan 	mutex_init(&bkt->bk_lock, NULL, MUTEX_DRIVER, NULL);
832*b494511aSVenki Rajagopalan 	cv_init(&bkt->bk_cv, NULL, CV_DEFAULT, NULL);
833*b494511aSVenki Rajagopalan 
834*b494511aSVenki Rajagopalan 	ss->ei_lso = bkt;
835*b494511aSVenki Rajagopalan 
836*b494511aSVenki Rajagopalan 	/*
837*b494511aSVenki Rajagopalan 	 * Before returning, create a kernel thread to monitor the status
838*b494511aSVenki Rajagopalan 	 * of lso bufs
839*b494511aSVenki Rajagopalan 	 */
840*b494511aSVenki Rajagopalan 	kt = thread_create(NULL, 0, eib_monitor_lso_bufs, ss, 0,
841*b494511aSVenki Rajagopalan 	    &p0, TS_RUN, minclsyspri);
842*b494511aSVenki Rajagopalan 	ss->ei_lsobufs_monitor = kt->t_did;
843*b494511aSVenki Rajagopalan 
844*b494511aSVenki Rajagopalan 	return (EIB_E_SUCCESS);
845*b494511aSVenki Rajagopalan }
846*b494511aSVenki Rajagopalan 
847*b494511aSVenki Rajagopalan static void
eib_rsrc_init_wqe_pool(eib_t * ss,eib_wqe_pool_t ** wpp,ib_memlen_t bufsz,int wp_type)848*b494511aSVenki Rajagopalan eib_rsrc_init_wqe_pool(eib_t *ss, eib_wqe_pool_t **wpp, ib_memlen_t bufsz,
849*b494511aSVenki Rajagopalan     int wp_type)
850*b494511aSVenki Rajagopalan {
851*b494511aSVenki Rajagopalan 	eib_wqe_pool_t *wp;
852*b494511aSVenki Rajagopalan 	uint_t wp_wqesz;
853*b494511aSVenki Rajagopalan 	int i;
854*b494511aSVenki Rajagopalan 
855*b494511aSVenki Rajagopalan 	ASSERT(wpp != NULL);
856*b494511aSVenki Rajagopalan 	ASSERT(*wpp == NULL);
857*b494511aSVenki Rajagopalan 
858*b494511aSVenki Rajagopalan 	/*
859*b494511aSVenki Rajagopalan 	 * Allocate the wqe pool, wqes and bufs
860*b494511aSVenki Rajagopalan 	 */
861*b494511aSVenki Rajagopalan 	wp = kmem_zalloc(sizeof (eib_wqe_pool_t), KM_SLEEP);
862*b494511aSVenki Rajagopalan 	wp_wqesz = EIB_WQES_PER_POOL * sizeof (eib_wqe_t);
863*b494511aSVenki Rajagopalan 	wp->wp_wqe = (eib_wqe_t *)kmem_zalloc(wp_wqesz, KM_SLEEP);
864*b494511aSVenki Rajagopalan 	wp->wp_memsz = EIB_WQES_PER_POOL * bufsz;
865*b494511aSVenki Rajagopalan 	wp->wp_vaddr = (ib_vaddr_t)(uintptr_t)kmem_zalloc(wp->wp_memsz,
866*b494511aSVenki Rajagopalan 	    KM_SLEEP);
867*b494511aSVenki Rajagopalan 	wp->wp_ss = ss;
868*b494511aSVenki Rajagopalan 	wp->wp_type = wp_type;
869*b494511aSVenki Rajagopalan 	wp->wp_nfree_lwm = (wp_type == EIB_WP_TYPE_TX) ?
870*b494511aSVenki Rajagopalan 	    EIB_NFREE_SWQES_LWM : EIB_NFREE_RWQES_LWM;
871*b494511aSVenki Rajagopalan 
872*b494511aSVenki Rajagopalan 	/*
873*b494511aSVenki Rajagopalan 	 * Initialize the lock and bitmaps: everything is available at first,
874*b494511aSVenki Rajagopalan 	 * but note that if the number of blocks per pool is less than 64, we
875*b494511aSVenki Rajagopalan 	 * need to initialize those extra bits as "unavailable" - these will
876*b494511aSVenki Rajagopalan 	 * remain unavailable throughout.
877*b494511aSVenki Rajagopalan 	 */
878*b494511aSVenki Rajagopalan 	mutex_init(&wp->wp_lock, NULL, MUTEX_DRIVER, NULL);
879*b494511aSVenki Rajagopalan 	cv_init(&wp->wp_cv, NULL, CV_DEFAULT, NULL);
880*b494511aSVenki Rajagopalan 
881*b494511aSVenki Rajagopalan 	wp->wp_nfree = EIB_WQES_PER_POOL;
882*b494511aSVenki Rajagopalan 	wp->wp_free_blks = (EIB_BLKS_PER_POOL >= 64) ? (~0) :
883*b494511aSVenki Rajagopalan 	    (((uint64_t)1 << EIB_BLKS_PER_POOL) - 1);
884*b494511aSVenki Rajagopalan 	for (i = 0; i < EIB_BLKS_PER_POOL; i++)
885*b494511aSVenki Rajagopalan 		wp->wp_free_wqes[i] = ~0;
886*b494511aSVenki Rajagopalan 
887*b494511aSVenki Rajagopalan 	*wpp = wp;
888*b494511aSVenki Rajagopalan }
889*b494511aSVenki Rajagopalan 
890*b494511aSVenki Rajagopalan /*ARGSUSED*/
891*b494511aSVenki Rajagopalan static void
eib_rsrc_fini_wqe_pool(eib_t * ss,eib_wqe_pool_t ** wpp)892*b494511aSVenki Rajagopalan eib_rsrc_fini_wqe_pool(eib_t *ss, eib_wqe_pool_t **wpp)
893*b494511aSVenki Rajagopalan {
894*b494511aSVenki Rajagopalan 	eib_wqe_pool_t *wp;
895*b494511aSVenki Rajagopalan 
896*b494511aSVenki Rajagopalan 	ASSERT(wpp != NULL);
897*b494511aSVenki Rajagopalan 
898*b494511aSVenki Rajagopalan 	wp = *wpp;
899*b494511aSVenki Rajagopalan 	ASSERT(*wpp != NULL);
900*b494511aSVenki Rajagopalan 
901*b494511aSVenki Rajagopalan 	cv_destroy(&wp->wp_cv);
902*b494511aSVenki Rajagopalan 	mutex_destroy(&wp->wp_lock);
903*b494511aSVenki Rajagopalan 
904*b494511aSVenki Rajagopalan 	kmem_free((void *)(uintptr_t)(wp->wp_vaddr), wp->wp_memsz);
905*b494511aSVenki Rajagopalan 	kmem_free(wp->wp_wqe, EIB_WQES_PER_POOL * sizeof (eib_wqe_t));
906*b494511aSVenki Rajagopalan 	kmem_free(wp, sizeof (eib_wqe_pool_t));
907*b494511aSVenki Rajagopalan 
908*b494511aSVenki Rajagopalan 	*wpp = NULL;
909*b494511aSVenki Rajagopalan }
910*b494511aSVenki Rajagopalan 
911*b494511aSVenki Rajagopalan /*ARGSUSED*/
912*b494511aSVenki Rajagopalan static boolean_t
eib_rsrc_ok_to_free_pool(eib_t * ss,eib_wqe_pool_t * wp,boolean_t force)913*b494511aSVenki Rajagopalan eib_rsrc_ok_to_free_pool(eib_t *ss, eib_wqe_pool_t *wp, boolean_t force)
914*b494511aSVenki Rajagopalan {
915*b494511aSVenki Rajagopalan 	uint64_t free_blks;
916*b494511aSVenki Rajagopalan 	int i;
917*b494511aSVenki Rajagopalan 
918*b494511aSVenki Rajagopalan 	/*
919*b494511aSVenki Rajagopalan 	 * See if we can release all memory allocated for buffers, wqes and
920*b494511aSVenki Rajagopalan 	 * the pool.  Note that in the case of data channel rx buffers, some
921*b494511aSVenki Rajagopalan 	 * of the buffers may not be free if the nw layer is holding on to
922*b494511aSVenki Rajagopalan 	 * them still.  If this is the case, we cannot free the wqe pool now
923*b494511aSVenki Rajagopalan 	 * or a subsequent access by the nw layer to the buffers will cause
924*b494511aSVenki Rajagopalan 	 * a panic.
925*b494511aSVenki Rajagopalan 	 */
926*b494511aSVenki Rajagopalan 	ASSERT(wp != NULL);
927*b494511aSVenki Rajagopalan 
928*b494511aSVenki Rajagopalan 	/*
929*b494511aSVenki Rajagopalan 	 * If force-free flag is set, we can always release the memory.
930*b494511aSVenki Rajagopalan 	 * Note that this flag is unused currently, and should be removed.
931*b494511aSVenki Rajagopalan 	 */
932*b494511aSVenki Rajagopalan 	if (force == B_TRUE)
933*b494511aSVenki Rajagopalan 		return (B_TRUE);
934*b494511aSVenki Rajagopalan 
935*b494511aSVenki Rajagopalan 	mutex_enter(&wp->wp_lock);
936*b494511aSVenki Rajagopalan 
937*b494511aSVenki Rajagopalan 	/*
938*b494511aSVenki Rajagopalan 	 * If a whole block remains allocated, obviously we cannot free
939*b494511aSVenki Rajagopalan 	 * the pool
940*b494511aSVenki Rajagopalan 	 */
941*b494511aSVenki Rajagopalan 	free_blks = (EIB_BLKS_PER_POOL >= 64) ? (~0) :
942*b494511aSVenki Rajagopalan 	    (((uint64_t)1 << EIB_BLKS_PER_POOL) - 1);
943*b494511aSVenki Rajagopalan 	if (wp->wp_free_blks != free_blks) {
944*b494511aSVenki Rajagopalan 		mutex_exit(&wp->wp_lock);
945*b494511aSVenki Rajagopalan 		return (B_FALSE);
946*b494511aSVenki Rajagopalan 	}
947*b494511aSVenki Rajagopalan 
948*b494511aSVenki Rajagopalan 	/*
949*b494511aSVenki Rajagopalan 	 * If even a single wqe within any one block remains in-use, we
950*b494511aSVenki Rajagopalan 	 * cannot free the pool
951*b494511aSVenki Rajagopalan 	 */
952*b494511aSVenki Rajagopalan 	for (i = 0; i < EIB_BLKS_PER_POOL; i++) {
953*b494511aSVenki Rajagopalan 		if (wp->wp_free_wqes[i] != (~0)) {
954*b494511aSVenki Rajagopalan 			mutex_exit(&wp->wp_lock);
955*b494511aSVenki Rajagopalan 			return (B_FALSE);
956*b494511aSVenki Rajagopalan 		}
957*b494511aSVenki Rajagopalan 	}
958*b494511aSVenki Rajagopalan 
959*b494511aSVenki Rajagopalan 	mutex_exit(&wp->wp_lock);
960*b494511aSVenki Rajagopalan 
961*b494511aSVenki Rajagopalan 	return (B_TRUE);
962*b494511aSVenki Rajagopalan }
963*b494511aSVenki Rajagopalan 
964*b494511aSVenki Rajagopalan /*ARGSUSED*/
965*b494511aSVenki Rajagopalan static int
eib_rsrc_grab_wqes(eib_t * ss,eib_wqe_pool_t * wp,eib_wqe_t ** wqes,uint_t n_req,uint_t * actual,int pri)966*b494511aSVenki Rajagopalan eib_rsrc_grab_wqes(eib_t *ss, eib_wqe_pool_t *wp, eib_wqe_t **wqes,
967*b494511aSVenki Rajagopalan     uint_t n_req, uint_t *actual, int pri)
968*b494511aSVenki Rajagopalan {
969*b494511aSVenki Rajagopalan 	uint_t n_allocd = 0;
970*b494511aSVenki Rajagopalan 	int blk;
971*b494511aSVenki Rajagopalan 	int ndx;
972*b494511aSVenki Rajagopalan 	int wqe_ndx;
973*b494511aSVenki Rajagopalan 
974*b494511aSVenki Rajagopalan 	ASSERT(wp != NULL);
975*b494511aSVenki Rajagopalan 	ASSERT(wqes != NULL);
976*b494511aSVenki Rajagopalan 
977*b494511aSVenki Rajagopalan 	mutex_enter(&wp->wp_lock);
978*b494511aSVenki Rajagopalan 
979*b494511aSVenki Rajagopalan 	/*
980*b494511aSVenki Rajagopalan 	 * If this is a low priority request, adjust the number requested
981*b494511aSVenki Rajagopalan 	 * so we don't allocate beyond the low-water-mark
982*b494511aSVenki Rajagopalan 	 */
983*b494511aSVenki Rajagopalan 	if (pri == EIB_WPRI_LO) {
984*b494511aSVenki Rajagopalan 		if (wp->wp_nfree <= wp->wp_nfree_lwm)
985*b494511aSVenki Rajagopalan 			n_req = 0;
986*b494511aSVenki Rajagopalan 		else if ((wp->wp_nfree - n_req) < wp->wp_nfree_lwm)
987*b494511aSVenki Rajagopalan 			n_req = wp->wp_nfree - wp->wp_nfree_lwm;
988*b494511aSVenki Rajagopalan 	}
989*b494511aSVenki Rajagopalan 
990*b494511aSVenki Rajagopalan 	for (n_allocd = 0;  n_allocd < n_req; n_allocd++) {
991*b494511aSVenki Rajagopalan 		/*
992*b494511aSVenki Rajagopalan 		 * If the entire pool is unavailable, quit
993*b494511aSVenki Rajagopalan 		 */
994*b494511aSVenki Rajagopalan 		if (wp->wp_free_blks == 0)
995*b494511aSVenki Rajagopalan 			break;
996*b494511aSVenki Rajagopalan 
997*b494511aSVenki Rajagopalan 		/*
998*b494511aSVenki Rajagopalan 		 * Find the first wqe that's available
999*b494511aSVenki Rajagopalan 		 */
1000*b494511aSVenki Rajagopalan 		blk = EIB_FIND_LSB_SET(wp->wp_free_blks);
1001*b494511aSVenki Rajagopalan 		ASSERT(blk != -1);
1002*b494511aSVenki Rajagopalan 		ndx = EIB_FIND_LSB_SET(wp->wp_free_wqes[blk]);
1003*b494511aSVenki Rajagopalan 		ASSERT(ndx != -1);
1004*b494511aSVenki Rajagopalan 
1005*b494511aSVenki Rajagopalan 		/*
1006*b494511aSVenki Rajagopalan 		 * Mark the wqe as allocated
1007*b494511aSVenki Rajagopalan 		 */
1008*b494511aSVenki Rajagopalan 		wp->wp_free_wqes[blk] &= (~((uint64_t)1 << ndx));
1009*b494511aSVenki Rajagopalan 
1010*b494511aSVenki Rajagopalan 		/*
1011*b494511aSVenki Rajagopalan 		 * If this was the last free wqe in this block, mark
1012*b494511aSVenki Rajagopalan 		 * the block itself as unavailable
1013*b494511aSVenki Rajagopalan 		 */
1014*b494511aSVenki Rajagopalan 		if (wp->wp_free_wqes[blk] == 0)
1015*b494511aSVenki Rajagopalan 			wp->wp_free_blks &= (~((uint64_t)1 << blk));
1016*b494511aSVenki Rajagopalan 
1017*b494511aSVenki Rajagopalan 		/*
1018*b494511aSVenki Rajagopalan 		 * Return this wqe to the caller
1019*b494511aSVenki Rajagopalan 		 */
1020*b494511aSVenki Rajagopalan 		wqe_ndx = blk * EIB_WQES_PER_BLK + ndx;
1021*b494511aSVenki Rajagopalan 		wqes[n_allocd] = &(wp->wp_wqe[wqe_ndx]);
1022*b494511aSVenki Rajagopalan 	}
1023*b494511aSVenki Rajagopalan 
1024*b494511aSVenki Rajagopalan 	wp->wp_nfree -= n_allocd;
1025*b494511aSVenki Rajagopalan 
1026*b494511aSVenki Rajagopalan 	mutex_exit(&wp->wp_lock);
1027*b494511aSVenki Rajagopalan 
1028*b494511aSVenki Rajagopalan 	if (n_allocd == 0)
1029*b494511aSVenki Rajagopalan 		return (EIB_E_FAILURE);
1030*b494511aSVenki Rajagopalan 
1031*b494511aSVenki Rajagopalan 	if (actual) {
1032*b494511aSVenki Rajagopalan 		*actual = n_allocd;
1033*b494511aSVenki Rajagopalan 	}
1034*b494511aSVenki Rajagopalan 
1035*b494511aSVenki Rajagopalan 	return (EIB_E_SUCCESS);
1036*b494511aSVenki Rajagopalan }
1037*b494511aSVenki Rajagopalan 
1038*b494511aSVenki Rajagopalan /*ARGSUSED*/
1039*b494511aSVenki Rajagopalan static void
eib_rsrc_return_wqes(eib_t * ss,eib_wqe_pool_t * wp,eib_wqe_t ** wqes,uint_t n_wqes)1040*b494511aSVenki Rajagopalan eib_rsrc_return_wqes(eib_t *ss, eib_wqe_pool_t *wp, eib_wqe_t **wqes,
1041*b494511aSVenki Rajagopalan     uint_t n_wqes)
1042*b494511aSVenki Rajagopalan {
1043*b494511aSVenki Rajagopalan 	eib_wqe_t *wqe;
1044*b494511aSVenki Rajagopalan 	uint_t n_freed = 0;
1045*b494511aSVenki Rajagopalan 	uint_t blk;
1046*b494511aSVenki Rajagopalan 	uint_t ndx;
1047*b494511aSVenki Rajagopalan 
1048*b494511aSVenki Rajagopalan 	ASSERT(wp != NULL);
1049*b494511aSVenki Rajagopalan 	ASSERT(wqes != NULL);
1050*b494511aSVenki Rajagopalan 
1051*b494511aSVenki Rajagopalan 	mutex_enter(&wp->wp_lock);
1052*b494511aSVenki Rajagopalan 	for (n_freed = 0;  n_freed < n_wqes; n_freed++) {
1053*b494511aSVenki Rajagopalan 		wqe = wqes[n_freed];
1054*b494511aSVenki Rajagopalan 
1055*b494511aSVenki Rajagopalan 		/*
1056*b494511aSVenki Rajagopalan 		 * This wqe is being returned back to the pool, so clear
1057*b494511aSVenki Rajagopalan 		 * any wqe flags and reset buffer address and size in the
1058*b494511aSVenki Rajagopalan 		 * single segment sgl back to what they were initially.
1059*b494511aSVenki Rajagopalan 		 * Also erase any mblk pointer and callback function ptrs.
1060*b494511aSVenki Rajagopalan 		 */
1061*b494511aSVenki Rajagopalan 		wqe->qe_sgl.ds_va = (ib_vaddr_t)(uintptr_t)wqe->qe_cpbuf;
1062*b494511aSVenki Rajagopalan 		wqe->qe_sgl.ds_len = wqe->qe_bufsz;
1063*b494511aSVenki Rajagopalan 		wqe->qe_mp = NULL;
1064*b494511aSVenki Rajagopalan 		wqe->qe_chan = NULL;
1065*b494511aSVenki Rajagopalan 		wqe->qe_vnic_inst = -1;
1066*b494511aSVenki Rajagopalan 		wqe->qe_info &= (~EIB_WQEFLGS_MASK);
1067*b494511aSVenki Rajagopalan 
1068*b494511aSVenki Rajagopalan 		/*
1069*b494511aSVenki Rajagopalan 		 * Mark the wqe free in its block
1070*b494511aSVenki Rajagopalan 		 */
1071*b494511aSVenki Rajagopalan 		blk = EIB_WQE_BLK(wqe->qe_info);
1072*b494511aSVenki Rajagopalan 		ndx = EIB_WQE_NDX(wqe->qe_info);
1073*b494511aSVenki Rajagopalan 
1074*b494511aSVenki Rajagopalan 		wp->wp_free_wqes[blk] |= ((uint64_t)1 << ndx);
1075*b494511aSVenki Rajagopalan 
1076*b494511aSVenki Rajagopalan 		/*
1077*b494511aSVenki Rajagopalan 		 * This block now has atleast one wqe free, so mark
1078*b494511aSVenki Rajagopalan 		 * the block itself as available and move on to the
1079*b494511aSVenki Rajagopalan 		 * next wqe to free
1080*b494511aSVenki Rajagopalan 		 */
1081*b494511aSVenki Rajagopalan 		wp->wp_free_blks |= ((uint64_t)1 << blk);
1082*b494511aSVenki Rajagopalan 	}
1083*b494511aSVenki Rajagopalan 
1084*b494511aSVenki Rajagopalan 	wp->wp_nfree += n_freed;
1085*b494511aSVenki Rajagopalan 
1086*b494511aSVenki Rajagopalan 	/*
1087*b494511aSVenki Rajagopalan 	 * If the number of available wqes in the pool has just crossed
1088*b494511aSVenki Rajagopalan 	 * the high-water-mark, wakeup anyone who may be sleeping on it.
1089*b494511aSVenki Rajagopalan 	 */
1090*b494511aSVenki Rajagopalan 	if ((wp->wp_type == EIB_WP_TYPE_TX) &&
1091*b494511aSVenki Rajagopalan 	    ((wp->wp_nfree - n_freed) < EIB_NFREE_SWQES_HWM) &&
1092*b494511aSVenki Rajagopalan 	    (wp->wp_nfree >= EIB_NFREE_SWQES_HWM)) {
1093*b494511aSVenki Rajagopalan 		cv_broadcast(&wp->wp_cv);
1094*b494511aSVenki Rajagopalan 	}
1095*b494511aSVenki Rajagopalan 
1096*b494511aSVenki Rajagopalan 	mutex_exit(&wp->wp_lock);
1097*b494511aSVenki Rajagopalan }
1098*b494511aSVenki Rajagopalan 
1099*b494511aSVenki Rajagopalan static void
eib_rb_rsrc_setup_txbufs(eib_t * ss,boolean_t force)1100*b494511aSVenki Rajagopalan eib_rb_rsrc_setup_txbufs(eib_t *ss, boolean_t force)
1101*b494511aSVenki Rajagopalan {
1102*b494511aSVenki Rajagopalan 	eib_wqe_pool_t *wp = ss->ei_tx;
1103*b494511aSVenki Rajagopalan 	eib_wqe_t *wqe;
1104*b494511aSVenki Rajagopalan 	ibt_ud_dest_hdl_t dest;
1105*b494511aSVenki Rajagopalan 	ibt_status_t ret;
1106*b494511aSVenki Rajagopalan 	uint8_t *plhdr;
1107*b494511aSVenki Rajagopalan 	int i;
1108*b494511aSVenki Rajagopalan 
1109*b494511aSVenki Rajagopalan 	if (wp == NULL)
1110*b494511aSVenki Rajagopalan 		return;
1111*b494511aSVenki Rajagopalan 
1112*b494511aSVenki Rajagopalan 	/*
1113*b494511aSVenki Rajagopalan 	 * Check if it's ok to free the tx wqe pool (i.e. all buffers have
1114*b494511aSVenki Rajagopalan 	 * been reclaimed) and if so, stop the txwqe monitor thread (and wait
1115*b494511aSVenki Rajagopalan 	 * for it to die), release the UD destination handles, deregister
1116*b494511aSVenki Rajagopalan 	 * memory and fini the wqe pool.
1117*b494511aSVenki Rajagopalan 	 */
1118*b494511aSVenki Rajagopalan 	if (eib_rsrc_ok_to_free_pool(ss, wp, force)) {
1119*b494511aSVenki Rajagopalan 		eib_stop_monitor_tx_wqes(ss);
1120*b494511aSVenki Rajagopalan 
1121*b494511aSVenki Rajagopalan 		for (i = 0; i < EIB_WQES_PER_POOL; i++) {
1122*b494511aSVenki Rajagopalan 			wqe = &wp->wp_wqe[i];
1123*b494511aSVenki Rajagopalan 			if ((plhdr = wqe->qe_payload_hdr) != NULL) {
1124*b494511aSVenki Rajagopalan 				kmem_free(plhdr, EIB_MAX_PAYLOAD_HDR_SZ);
1125*b494511aSVenki Rajagopalan 			}
1126*b494511aSVenki Rajagopalan 			if ((dest = wqe->qe_dest) != NULL) {
1127*b494511aSVenki Rajagopalan 				ret = ibt_free_ud_dest(dest);
1128*b494511aSVenki Rajagopalan 				if (ret != IBT_SUCCESS) {
1129*b494511aSVenki Rajagopalan 					EIB_DPRINTF_WARN(ss->ei_instance,
1130*b494511aSVenki Rajagopalan 					    "eib_rb_rsrc_setup_txbufs: "
1131*b494511aSVenki Rajagopalan 					    "ibt_free_ud_dest() failed, ret=%d",
1132*b494511aSVenki Rajagopalan 					    ret);
1133*b494511aSVenki Rajagopalan 				}
1134*b494511aSVenki Rajagopalan 			}
1135*b494511aSVenki Rajagopalan 		}
1136*b494511aSVenki Rajagopalan 		if (wp->wp_mr) {
1137*b494511aSVenki Rajagopalan 			if ((ret = ibt_deregister_mr(ss->ei_hca_hdl,
1138*b494511aSVenki Rajagopalan 			    wp->wp_mr)) != IBT_SUCCESS) {
1139*b494511aSVenki Rajagopalan 				EIB_DPRINTF_WARN(ss->ei_instance,
1140*b494511aSVenki Rajagopalan 				    "eib_rb_rsrc_setup_txbufs: "
1141*b494511aSVenki Rajagopalan 				    "ibt_deregister_mr() failed, ret=%d", ret);
1142*b494511aSVenki Rajagopalan 			}
1143*b494511aSVenki Rajagopalan 			wp->wp_mr = NULL;
1144*b494511aSVenki Rajagopalan 		}
1145*b494511aSVenki Rajagopalan 		eib_rsrc_fini_wqe_pool(ss, &ss->ei_tx);
1146*b494511aSVenki Rajagopalan 	}
1147*b494511aSVenki Rajagopalan }
1148*b494511aSVenki Rajagopalan 
1149*b494511aSVenki Rajagopalan void
eib_rb_rsrc_setup_rxbufs(eib_t * ss,boolean_t force)1150*b494511aSVenki Rajagopalan eib_rb_rsrc_setup_rxbufs(eib_t *ss, boolean_t force)
1151*b494511aSVenki Rajagopalan {
1152*b494511aSVenki Rajagopalan 	eib_wqe_pool_t *rx = ss->ei_rx;
1153*b494511aSVenki Rajagopalan 	ibt_status_t ret;
1154*b494511aSVenki Rajagopalan 
1155*b494511aSVenki Rajagopalan 	if (rx == NULL)
1156*b494511aSVenki Rajagopalan 		return;
1157*b494511aSVenki Rajagopalan 
1158*b494511aSVenki Rajagopalan 	/*
1159*b494511aSVenki Rajagopalan 	 * Check if it's ok to free the rx wqe pool (i.e. all buffers have
1160*b494511aSVenki Rajagopalan 	 * been reclaimed) and if so, deregister memory and fini the wqe pool.
1161*b494511aSVenki Rajagopalan 	 */
1162*b494511aSVenki Rajagopalan 	if (eib_rsrc_ok_to_free_pool(ss, rx, force)) {
1163*b494511aSVenki Rajagopalan 		if (rx->wp_mr) {
1164*b494511aSVenki Rajagopalan 			if ((ret = ibt_deregister_mr(ss->ei_hca_hdl,
1165*b494511aSVenki Rajagopalan 			    rx->wp_mr)) != IBT_SUCCESS) {
1166*b494511aSVenki Rajagopalan 				EIB_DPRINTF_WARN(ss->ei_instance,
1167*b494511aSVenki Rajagopalan 				    "eib_rb_rsrc_setup_rxbufs: "
1168*b494511aSVenki Rajagopalan 				    "ibt_deregister_mr() failed, ret=%d", ret);
1169*b494511aSVenki Rajagopalan 			}
1170*b494511aSVenki Rajagopalan 			rx->wp_mr = NULL;
1171*b494511aSVenki Rajagopalan 		}
1172*b494511aSVenki Rajagopalan 
1173*b494511aSVenki Rajagopalan 		eib_rsrc_fini_wqe_pool(ss, &ss->ei_rx);
1174*b494511aSVenki Rajagopalan 	}
1175*b494511aSVenki Rajagopalan }
1176*b494511aSVenki Rajagopalan 
1177*b494511aSVenki Rajagopalan static void
eib_rb_rsrc_setup_lsobufs(eib_t * ss,boolean_t force)1178*b494511aSVenki Rajagopalan eib_rb_rsrc_setup_lsobufs(eib_t *ss, boolean_t force)
1179*b494511aSVenki Rajagopalan {
1180*b494511aSVenki Rajagopalan 	eib_lsobkt_t *bkt;
1181*b494511aSVenki Rajagopalan 	ibt_status_t ret;
1182*b494511aSVenki Rajagopalan 
1183*b494511aSVenki Rajagopalan 	/*
1184*b494511aSVenki Rajagopalan 	 * Remove the lso bucket from the state
1185*b494511aSVenki Rajagopalan 	 */
1186*b494511aSVenki Rajagopalan 	if ((bkt = ss->ei_lso) == NULL)
1187*b494511aSVenki Rajagopalan 		return;
1188*b494511aSVenki Rajagopalan 
1189*b494511aSVenki Rajagopalan 	/*
1190*b494511aSVenki Rajagopalan 	 * Try to stop the lso bufs monitor thread. If we fail, we simply
1191*b494511aSVenki Rajagopalan 	 * return.  We'll have another shot at it later from detach() with
1192*b494511aSVenki Rajagopalan 	 * the force flag set.
1193*b494511aSVenki Rajagopalan 	 */
1194*b494511aSVenki Rajagopalan 	if (eib_stop_monitor_lso_bufs(ss, force) != EIB_E_SUCCESS)
1195*b494511aSVenki Rajagopalan 		return;
1196*b494511aSVenki Rajagopalan 
1197*b494511aSVenki Rajagopalan 	/*
1198*b494511aSVenki Rajagopalan 	 * Free the buflist
1199*b494511aSVenki Rajagopalan 	 */
1200*b494511aSVenki Rajagopalan 	if (bkt->bk_bufl) {
1201*b494511aSVenki Rajagopalan 		kmem_free(bkt->bk_bufl, bkt->bk_nelem * sizeof (eib_lsobuf_t));
1202*b494511aSVenki Rajagopalan 		bkt->bk_bufl = NULL;
1203*b494511aSVenki Rajagopalan 	}
1204*b494511aSVenki Rajagopalan 
1205*b494511aSVenki Rajagopalan 	/*
1206*b494511aSVenki Rajagopalan 	 * Deregister LSO memory and free it
1207*b494511aSVenki Rajagopalan 	 */
1208*b494511aSVenki Rajagopalan 	if (bkt->bk_mr_hdl) {
1209*b494511aSVenki Rajagopalan 		if ((ret = ibt_deregister_mr(ss->ei_hca_hdl,
1210*b494511aSVenki Rajagopalan 		    bkt->bk_mr_hdl)) != IBT_SUCCESS) {
1211*b494511aSVenki Rajagopalan 			EIB_DPRINTF_WARN(ss->ei_instance,
1212*b494511aSVenki Rajagopalan 			    "eib_rb_rsrc_setup_lsobufs: "
1213*b494511aSVenki Rajagopalan 			    "ibt_deregister_mr() failed, ret=%d", ret);
1214*b494511aSVenki Rajagopalan 		}
1215*b494511aSVenki Rajagopalan 		bkt->bk_mr_hdl = NULL;
1216*b494511aSVenki Rajagopalan 	}
1217*b494511aSVenki Rajagopalan 	if (bkt->bk_mem) {
1218*b494511aSVenki Rajagopalan 		kmem_free(bkt->bk_mem, bkt->bk_nelem * EIB_LSO_BUFSZ);
1219*b494511aSVenki Rajagopalan 		bkt->bk_mem = NULL;
1220*b494511aSVenki Rajagopalan 	}
1221*b494511aSVenki Rajagopalan 
1222*b494511aSVenki Rajagopalan 	/*
1223*b494511aSVenki Rajagopalan 	 * Destroy the mutex and condvar
1224*b494511aSVenki Rajagopalan 	 */
1225*b494511aSVenki Rajagopalan 	cv_destroy(&bkt->bk_cv);
1226*b494511aSVenki Rajagopalan 	mutex_destroy(&bkt->bk_lock);
1227*b494511aSVenki Rajagopalan 
1228*b494511aSVenki Rajagopalan 	/*
1229*b494511aSVenki Rajagopalan 	 * Finally, free the lso bucket itself
1230*b494511aSVenki Rajagopalan 	 */
1231*b494511aSVenki Rajagopalan 	kmem_free(bkt, sizeof (eib_lsobkt_t));
1232*b494511aSVenki Rajagopalan 	ss->ei_lso = NULL;
1233*b494511aSVenki Rajagopalan }
1234