1*9e39c5baSBill Taylor /*
2*9e39c5baSBill Taylor  * CDDL HEADER START
3*9e39c5baSBill Taylor  *
4*9e39c5baSBill Taylor  * The contents of this file are subject to the terms of the
5*9e39c5baSBill Taylor  * Common Development and Distribution License (the "License").
6*9e39c5baSBill Taylor  * You may not use this file except in compliance with the License.
7*9e39c5baSBill Taylor  *
8*9e39c5baSBill Taylor  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9*9e39c5baSBill Taylor  * or http://www.opensolaris.org/os/licensing.
10*9e39c5baSBill Taylor  * See the License for the specific language governing permissions
11*9e39c5baSBill Taylor  * and limitations under the License.
12*9e39c5baSBill Taylor  *
13*9e39c5baSBill Taylor  * When distributing Covered Code, include this CDDL HEADER in each
14*9e39c5baSBill Taylor  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15*9e39c5baSBill Taylor  * If applicable, add the following below this CDDL HEADER, with the
16*9e39c5baSBill Taylor  * fields enclosed by brackets "[]" replaced with your own identifying
17*9e39c5baSBill Taylor  * information: Portions Copyright [yyyy] [name of copyright owner]
18*9e39c5baSBill Taylor  *
19*9e39c5baSBill Taylor  * CDDL HEADER END
20*9e39c5baSBill Taylor  */
21*9e39c5baSBill Taylor 
22*9e39c5baSBill Taylor /*
23*9e39c5baSBill Taylor  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24*9e39c5baSBill Taylor  * Use is subject to license terms.
25*9e39c5baSBill Taylor  */
26*9e39c5baSBill Taylor 
27*9e39c5baSBill Taylor /*
28*9e39c5baSBill Taylor  * This file may contain confidential information of
29*9e39c5baSBill Taylor  * Mellanox Technologies, Ltd. and should not be distributed in source
30*9e39c5baSBill Taylor  * form without approval from Sun Legal.
31*9e39c5baSBill Taylor  */
32*9e39c5baSBill Taylor 
33*9e39c5baSBill Taylor #include "dapl.h"
34*9e39c5baSBill Taylor #include "dapl_tavor_hw.h"
35*9e39c5baSBill Taylor #include "dapl_tavor_wr.h"
36*9e39c5baSBill Taylor #include "dapl_tavor_ibtf_impl.h"
37*9e39c5baSBill Taylor 
38*9e39c5baSBill Taylor /*
39*9e39c5baSBill Taylor  * Function signatures
40*9e39c5baSBill Taylor  */
41*9e39c5baSBill Taylor extern uint64_t dapls_tavor_wrid_get_entry(ib_cq_handle_t, tavor_hw_cqe_t *,
42*9e39c5baSBill Taylor     uint_t, uint_t, dapls_tavor_wrid_entry_t *);
43*9e39c5baSBill Taylor extern void dapls_tavor_wrid_cq_reap(ib_cq_handle_t);
44*9e39c5baSBill Taylor extern DAPL_OS_LOCK g_tavor_uar_lock;
45*9e39c5baSBill Taylor 
46*9e39c5baSBill Taylor #ifndef	_LP64
47*9e39c5baSBill Taylor extern void dapls_atomic_assign_64(uint64_t, uint64_t *);
48*9e39c5baSBill Taylor #endif
49*9e39c5baSBill Taylor 
50*9e39c5baSBill Taylor static int dapli_tavor_wqe_send_build(ib_qp_handle_t, ibt_send_wr_t *,
51*9e39c5baSBill Taylor     uint64_t *, uint_t *);
52*9e39c5baSBill Taylor static void dapli_tavor_wqe_send_linknext(ibt_send_wr_t *, uint64_t *,
53*9e39c5baSBill Taylor     boolean_t, uint32_t, uint_t, uint64_t *, tavor_sw_wqe_dbinfo_t *);
54*9e39c5baSBill Taylor static DAT_RETURN dapli_tavor_wqe_recv_build(ib_qp_handle_t, ibt_recv_wr_t *,
55*9e39c5baSBill Taylor     uint64_t *, uint_t *);
56*9e39c5baSBill Taylor static void dapli_tavor_wqe_recv_linknext(uint64_t *, boolean_t, uint32_t,
57*9e39c5baSBill Taylor     uint_t, uint64_t *);
58*9e39c5baSBill Taylor static int dapli_tavor_cq_cqe_consume(ib_cq_handle_t, tavor_hw_cqe_t *,
59*9e39c5baSBill Taylor     ibt_wc_t *);
60*9e39c5baSBill Taylor static int dapli_tavor_cq_errcqe_consume(ib_cq_handle_t, tavor_hw_cqe_t *,
61*9e39c5baSBill Taylor     ibt_wc_t *);
62*9e39c5baSBill Taylor 
63*9e39c5baSBill Taylor /* exported to other HCAs */
64*9e39c5baSBill Taylor extern void dapli_tavor_wrid_add_entry(dapls_tavor_workq_hdr_t *, uint64_t,
65*9e39c5baSBill Taylor     uint32_t, uint_t);
66*9e39c5baSBill Taylor extern void dapli_tavor_wrid_add_entry_srq(ib_srq_handle_t, uint64_t, uint32_t);
67*9e39c5baSBill Taylor 
68*9e39c5baSBill Taylor /*
69*9e39c5baSBill Taylor  * Note: The 64 bit doorbells need to written atomically.
70*9e39c5baSBill Taylor  * In 32 bit libraries we need to use the special assembly rtn
71*9e39c5baSBill Taylor  * because compiler generated code splits into 2 word writes
72*9e39c5baSBill Taylor  */
73*9e39c5baSBill Taylor 
74*9e39c5baSBill Taylor #if defined(_LP64) || defined(__lint)
75*9e39c5baSBill Taylor /* use a macro to ensure inlining on S10 amd64 compiler */
76*9e39c5baSBill Taylor #define	dapli_tavor_cq_doorbell(ia_uar, cq_cmd, cqn, cq_param) \
77*9e39c5baSBill Taylor 	((tavor_hw_uar_t *)ia_uar)->cq = HTOBE_64( \
78*9e39c5baSBill Taylor 	    ((uint64_t)cq_cmd << TAVOR_CQDB_CMD_SHIFT) | \
79*9e39c5baSBill Taylor 	    ((uint64_t)cqn << TAVOR_CQDB_CQN_SHIFT) | cq_param)
80*9e39c5baSBill Taylor #else
81*9e39c5baSBill Taylor 
82*9e39c5baSBill Taylor /*
83*9e39c5baSBill Taylor  * dapli_tavor_cq_doorbell()
84*9e39c5baSBill Taylor  * Takes the specified cq cmd and cq number and rings the cq doorbell
85*9e39c5baSBill Taylor  */
86*9e39c5baSBill Taylor static void
dapli_tavor_cq_doorbell(dapls_hw_uar_t ia_uar,uint32_t cq_cmd,uint32_t cqn,uint32_t cq_param)87*9e39c5baSBill Taylor dapli_tavor_cq_doorbell(dapls_hw_uar_t ia_uar, uint32_t cq_cmd, uint32_t cqn,
88*9e39c5baSBill Taylor     uint32_t cq_param)
89*9e39c5baSBill Taylor {
90*9e39c5baSBill Taylor 	uint64_t doorbell;
91*9e39c5baSBill Taylor 
92*9e39c5baSBill Taylor 	/* Build the doorbell from the parameters */
93*9e39c5baSBill Taylor 	doorbell = ((uint64_t)cq_cmd << TAVOR_CQDB_CMD_SHIFT) |
94*9e39c5baSBill Taylor 	    ((uint64_t)cqn << TAVOR_CQDB_CQN_SHIFT) | cq_param;
95*9e39c5baSBill Taylor 
96*9e39c5baSBill Taylor 	/* Write the doorbell to UAR */
97*9e39c5baSBill Taylor #ifdef _LP64
98*9e39c5baSBill Taylor 	((tavor_hw_uar_t *)ia_uar)->cq = HTOBE_64(doorbell);
99*9e39c5baSBill Taylor 	/* 32 bit version */
100*9e39c5baSBill Taylor #elif defined(i386)
101*9e39c5baSBill Taylor 	dapl_os_lock(&g_tavor_uar_lock);
102*9e39c5baSBill Taylor 	/*
103*9e39c5baSBill Taylor 	 * For 32 bit intel we assign the doorbell in the order
104*9e39c5baSBill Taylor 	 * prescribed by the Tavor PRM, lower to upper addresses
105*9e39c5baSBill Taylor 	 */
106*9e39c5baSBill Taylor 	((tavor_hw_uar32_t *)ia_uar)->cq[0] =
107*9e39c5baSBill Taylor 	    (uint32_t)HTOBE_32(doorbell >> 32);
108*9e39c5baSBill Taylor 	((tavor_hw_uar32_t *)ia_uar)->cq[1] =
109*9e39c5baSBill Taylor 	    (uint32_t)HTOBE_32(doorbell & 0x00000000ffffffff);
110*9e39c5baSBill Taylor 	dapl_os_unlock(&g_tavor_uar_lock);
111*9e39c5baSBill Taylor #else
112*9e39c5baSBill Taylor 	dapls_atomic_assign_64(HTOBE_64(doorbell),
113*9e39c5baSBill Taylor 	    &((tavor_hw_uar_t *)ia_uar)->cq);
114*9e39c5baSBill Taylor #endif
115*9e39c5baSBill Taylor }
116*9e39c5baSBill Taylor #pragma inline(dapli_tavor_cq_doorbell)
117*9e39c5baSBill Taylor 
118*9e39c5baSBill Taylor #endif	/* _LP64 */
119*9e39c5baSBill Taylor 
120*9e39c5baSBill Taylor #if defined(_LP64) || defined(__lint)
121*9e39c5baSBill Taylor #define	dapli_tavor_qp_send_doorbell(ia_uar, nda, nds, qpn, fence, nopcode) \
122*9e39c5baSBill Taylor 	((tavor_hw_uar_t *)ia_uar)->send = HTOBE_64( \
123*9e39c5baSBill Taylor 	    (((uint64_t)nda & TAVOR_QPSNDDB_NDA_MASK) << \
124*9e39c5baSBill Taylor 	    TAVOR_QPSNDDB_NDA_SHIFT) | \
125*9e39c5baSBill Taylor 	    ((uint64_t)fence << TAVOR_QPSNDDB_F_SHIFT) | \
126*9e39c5baSBill Taylor 	    ((uint64_t)nopcode << TAVOR_QPSNDDB_NOPCODE_SHIFT) | \
127*9e39c5baSBill Taylor 	    ((uint64_t)qpn << TAVOR_QPSNDDB_QPN_SHIFT) | nds)
128*9e39c5baSBill Taylor #else
129*9e39c5baSBill Taylor 
130*9e39c5baSBill Taylor /*
131*9e39c5baSBill Taylor  * dapli_tavor_qp_send_doorbell()
132*9e39c5baSBill Taylor  * Takes the specified next descriptor information, qp number, opcode and
133*9e39c5baSBill Taylor  * rings the send doorbell
134*9e39c5baSBill Taylor  */
135*9e39c5baSBill Taylor static void
dapli_tavor_qp_send_doorbell(dapls_hw_uar_t ia_uar,uint32_t nda,uint32_t nds,uint32_t qpn,uint32_t fence,uint32_t nopcode)136*9e39c5baSBill Taylor dapli_tavor_qp_send_doorbell(dapls_hw_uar_t ia_uar, uint32_t nda,
137*9e39c5baSBill Taylor     uint32_t nds, uint32_t qpn, uint32_t fence, uint32_t nopcode)
138*9e39c5baSBill Taylor {
139*9e39c5baSBill Taylor 	uint64_t doorbell;
140*9e39c5baSBill Taylor 
141*9e39c5baSBill Taylor 	/* Build the doorbell from the parameters */
142*9e39c5baSBill Taylor 	doorbell = (((uint64_t)nda & TAVOR_QPSNDDB_NDA_MASK) <<
143*9e39c5baSBill Taylor 	    TAVOR_QPSNDDB_NDA_SHIFT) |
144*9e39c5baSBill Taylor 	    ((uint64_t)fence << TAVOR_QPSNDDB_F_SHIFT) |
145*9e39c5baSBill Taylor 	    ((uint64_t)nopcode << TAVOR_QPSNDDB_NOPCODE_SHIFT) |
146*9e39c5baSBill Taylor 	    ((uint64_t)qpn << TAVOR_QPSNDDB_QPN_SHIFT) | nds;
147*9e39c5baSBill Taylor 
148*9e39c5baSBill Taylor 	/* Write the doorbell to UAR */
149*9e39c5baSBill Taylor #ifdef _LP64
150*9e39c5baSBill Taylor 	((tavor_hw_uar_t *)ia_uar)->send = HTOBE_64(doorbell);
151*9e39c5baSBill Taylor #else
152*9e39c5baSBill Taylor #if defined(i386)
153*9e39c5baSBill Taylor 	dapl_os_lock(&g_tavor_uar_lock);
154*9e39c5baSBill Taylor 	/*
155*9e39c5baSBill Taylor 	 * For 32 bit intel we assign the doorbell in the order
156*9e39c5baSBill Taylor 	 * prescribed by the Tavor PRM, lower to upper addresses
157*9e39c5baSBill Taylor 	 */
158*9e39c5baSBill Taylor 	((tavor_hw_uar32_t *)ia_uar)->send[0] =
159*9e39c5baSBill Taylor 	    (uint32_t)HTOBE_32(doorbell >> 32);
160*9e39c5baSBill Taylor 	((tavor_hw_uar32_t *)ia_uar)->send[1] =
161*9e39c5baSBill Taylor 	    (uint32_t)HTOBE_32(doorbell & 0x00000000ffffffff);
162*9e39c5baSBill Taylor 	dapl_os_unlock(&g_tavor_uar_lock);
163*9e39c5baSBill Taylor #else
164*9e39c5baSBill Taylor 	dapls_atomic_assign_64(HTOBE_64(doorbell),
165*9e39c5baSBill Taylor 	    &((tavor_hw_uar_t *)ia_uar)->send);
166*9e39c5baSBill Taylor #endif
167*9e39c5baSBill Taylor #endif
168*9e39c5baSBill Taylor }
169*9e39c5baSBill Taylor #pragma inline(dapli_tavor_qp_send_doorbell)
170*9e39c5baSBill Taylor #endif	/* _LP64 */
171*9e39c5baSBill Taylor 
172*9e39c5baSBill Taylor #if defined(_LP64) || defined(__lint)
173*9e39c5baSBill Taylor 
174*9e39c5baSBill Taylor #define	dapli_tavor_qp_recv_doorbell(ia_uar, nda, nds, qpn, credits) \
175*9e39c5baSBill Taylor 	((tavor_hw_uar_t *)ia_uar)->recv = HTOBE_64( \
176*9e39c5baSBill Taylor 	    (((uint64_t)nda & TAVOR_QPRCVDB_NDA_MASK) << \
177*9e39c5baSBill Taylor 	    TAVOR_QPRCVDB_NDA_SHIFT) | \
178*9e39c5baSBill Taylor 	    ((uint64_t)nds << TAVOR_QPRCVDB_NDS_SHIFT) | \
179*9e39c5baSBill Taylor 	    ((uint64_t)qpn << TAVOR_QPRCVDB_QPN_SHIFT) | credits)
180*9e39c5baSBill Taylor #else
181*9e39c5baSBill Taylor 
182*9e39c5baSBill Taylor /*
183*9e39c5baSBill Taylor  * dapli_tavor_qp_recv_doorbell()
184*9e39c5baSBill Taylor  * Takes the specified next descriptor information, qp number and
185*9e39c5baSBill Taylor  * rings the recv doorbell
186*9e39c5baSBill Taylor  */
187*9e39c5baSBill Taylor static void
dapli_tavor_qp_recv_doorbell(dapls_hw_uar_t ia_uar,uint32_t nda,uint32_t nds,uint32_t qpn,uint32_t credits)188*9e39c5baSBill Taylor dapli_tavor_qp_recv_doorbell(dapls_hw_uar_t ia_uar, uint32_t nda,
189*9e39c5baSBill Taylor     uint32_t nds, uint32_t qpn, uint32_t credits)
190*9e39c5baSBill Taylor {
191*9e39c5baSBill Taylor 	uint64_t doorbell;
192*9e39c5baSBill Taylor 
193*9e39c5baSBill Taylor 	/* Build the doorbell from the parameters */
194*9e39c5baSBill Taylor 	doorbell = (((uint64_t)nda & TAVOR_QPRCVDB_NDA_MASK) <<
195*9e39c5baSBill Taylor 	    TAVOR_QPRCVDB_NDA_SHIFT) |
196*9e39c5baSBill Taylor 	    ((uint64_t)nds << TAVOR_QPRCVDB_NDS_SHIFT) |
197*9e39c5baSBill Taylor 	    ((uint64_t)qpn << TAVOR_QPRCVDB_QPN_SHIFT) | credits;
198*9e39c5baSBill Taylor 
199*9e39c5baSBill Taylor 	/* Write the doorbell to UAR */
200*9e39c5baSBill Taylor #ifdef _LP64
201*9e39c5baSBill Taylor 	((tavor_hw_uar_t *)ia_uar)->recv = HTOBE_64(doorbell);
202*9e39c5baSBill Taylor #else
203*9e39c5baSBill Taylor #if defined(i386)
204*9e39c5baSBill Taylor 	dapl_os_lock(&g_tavor_uar_lock);
205*9e39c5baSBill Taylor 	/*
206*9e39c5baSBill Taylor 	 * For 32 bit intel we assign the doorbell in the order
207*9e39c5baSBill Taylor 	 * prescribed by the Tavor PRM, lower to upper addresses
208*9e39c5baSBill Taylor 	 */
209*9e39c5baSBill Taylor 	((tavor_hw_uar32_t *)ia_uar)->recv[0] =
210*9e39c5baSBill Taylor 	    (uint32_t)HTOBE_32(doorbell >> 32);
211*9e39c5baSBill Taylor 	((tavor_hw_uar32_t *)ia_uar)->recv[1] =
212*9e39c5baSBill Taylor 	    (uint32_t)HTOBE_32(doorbell & 0x00000000ffffffff);
213*9e39c5baSBill Taylor 	dapl_os_unlock(&g_tavor_uar_lock);
214*9e39c5baSBill Taylor #else
215*9e39c5baSBill Taylor 	dapls_atomic_assign_64(HTOBE_64(doorbell),
216*9e39c5baSBill Taylor 	    &((tavor_hw_uar_t *)ia_uar)->recv);
217*9e39c5baSBill Taylor #endif
218*9e39c5baSBill Taylor #endif
219*9e39c5baSBill Taylor }
220*9e39c5baSBill Taylor #pragma inline(dapli_tavor_qp_recv_doorbell)
221*9e39c5baSBill Taylor #endif	/* _LP64 */
222*9e39c5baSBill Taylor 
223*9e39c5baSBill Taylor 
224*9e39c5baSBill Taylor /*
225*9e39c5baSBill Taylor  * dapls_tavor_max_inline()
226*9e39c5baSBill Taylor  * Return the max inline value that should be used.
227*9e39c5baSBill Taylor  * Env variable DAPL_MAX_INLINE can override the default.
228*9e39c5baSBill Taylor  * If it's not set (or set to -1), default behavior is used.
229*9e39c5baSBill Taylor  * If it's zero or negative (except -1) inline is not done.
230*9e39c5baSBill Taylor  */
231*9e39c5baSBill Taylor int
dapls_tavor_max_inline(void)232*9e39c5baSBill Taylor dapls_tavor_max_inline(void)
233*9e39c5baSBill Taylor {
234*9e39c5baSBill Taylor 	static int max_inline_env = -2;
235*9e39c5baSBill Taylor 
236*9e39c5baSBill Taylor 	/* Check the env exactly once, otherwise return previous value. */
237*9e39c5baSBill Taylor 	if (max_inline_env != -2)
238*9e39c5baSBill Taylor 		return (max_inline_env);
239*9e39c5baSBill Taylor 
240*9e39c5baSBill Taylor 	max_inline_env = dapl_os_get_env_val("DAPL_MAX_INLINE", -1);
241*9e39c5baSBill Taylor 	if (max_inline_env != -1)
242*9e39c5baSBill Taylor 		if (max_inline_env <= 0)
243*9e39c5baSBill Taylor 			max_inline_env = 0;	/* no inlining */
244*9e39c5baSBill Taylor 	return (max_inline_env);
245*9e39c5baSBill Taylor }
246*9e39c5baSBill Taylor 
247*9e39c5baSBill Taylor /*
248*9e39c5baSBill Taylor  * dapls_ib_max_request_iov(), aka, max send sgl size.
249*9e39c5baSBill Taylor  * The send queue's scatter/gather list is used for "inline" data.
250*9e39c5baSBill Taylor  *
251*9e39c5baSBill Taylor  * By default, compute reasonable send queue size based on #iovs, #wqes,
252*9e39c5baSBill Taylor  * max_iovs, and max inline byte count.  If the #wqes is large, then we
253*9e39c5baSBill Taylor  * limit how much the SGL (space for inline data) can take.  The heuristic
254*9e39c5baSBill Taylor  * is to increase the memory for the send queue to a maximum of 32KB:
255*9e39c5baSBill Taylor  *
256*9e39c5baSBill Taylor  *	< 128 wqes	increase to at most 256 minus header
257*9e39c5baSBill Taylor  *	< 256 wqes	increase to at most 128 minus header
258*9e39c5baSBill Taylor  *	>= 256 wqes	use SGL unaltered
259*9e39c5baSBill Taylor  *
260*9e39c5baSBill Taylor  * If the env is supplied (max_inline >= 0), use it without checking.
261*9e39c5baSBill Taylor  */
262*9e39c5baSBill Taylor int
dapls_ib_max_request_iov(int iovs,int wqes,int max_iovs,int max_inline_bytes)263*9e39c5baSBill Taylor dapls_ib_max_request_iov(int iovs, int wqes, int max_iovs,
264*9e39c5baSBill Taylor     int max_inline_bytes)
265*9e39c5baSBill Taylor {
266*9e39c5baSBill Taylor 	int ret_iovs;
267*9e39c5baSBill Taylor 
268*9e39c5baSBill Taylor 	if (max_inline_bytes > 0) {
269*9e39c5baSBill Taylor 		ret_iovs = max_inline_bytes / sizeof (tavor_hw_wqe_sgl_t);
270*9e39c5baSBill Taylor 	} else if (wqes < 128) {
271*9e39c5baSBill Taylor 		max_inline_bytes = 256 - TAVOR_INLINE_HEADER_SIZE_MAX;
272*9e39c5baSBill Taylor 		ret_iovs = max_inline_bytes / sizeof (tavor_hw_wqe_sgl_t);
273*9e39c5baSBill Taylor 	} else if (wqes < 256) {
274*9e39c5baSBill Taylor 		max_inline_bytes = 128 - TAVOR_INLINE_HEADER_SIZE_MAX;
275*9e39c5baSBill Taylor 		ret_iovs = max_inline_bytes / sizeof (tavor_hw_wqe_sgl_t);
276*9e39c5baSBill Taylor 	} else {
277*9e39c5baSBill Taylor 		ret_iovs = iovs;
278*9e39c5baSBill Taylor 	}
279*9e39c5baSBill Taylor 
280*9e39c5baSBill Taylor 	if (ret_iovs > max_iovs)	/* do not exceed max */
281*9e39c5baSBill Taylor 		ret_iovs = max_iovs;
282*9e39c5baSBill Taylor 	if (iovs > ret_iovs)		/* never decrease iovs */
283*9e39c5baSBill Taylor 		ret_iovs = iovs;
284*9e39c5baSBill Taylor 	return (ret_iovs);
285*9e39c5baSBill Taylor }
286*9e39c5baSBill Taylor 
287*9e39c5baSBill Taylor /*
288*9e39c5baSBill Taylor  * dapli_tavor_wqe_send_build()
289*9e39c5baSBill Taylor  * Constructs a WQE for a given ibt_send_wr_t
290*9e39c5baSBill Taylor  */
291*9e39c5baSBill Taylor static int
dapli_tavor_wqe_send_build(ib_qp_handle_t qp,ibt_send_wr_t * wr,uint64_t * addr,uint_t * size)292*9e39c5baSBill Taylor dapli_tavor_wqe_send_build(ib_qp_handle_t qp, ibt_send_wr_t *wr,
293*9e39c5baSBill Taylor     uint64_t *addr, uint_t *size)
294*9e39c5baSBill Taylor {
295*9e39c5baSBill Taylor 	tavor_hw_snd_wqe_remaddr_t	*rc;
296*9e39c5baSBill Taylor 	tavor_hw_snd_wqe_bind_t		*bn;
297*9e39c5baSBill Taylor 	tavor_hw_wqe_sgl_t		*ds;
298*9e39c5baSBill Taylor 	ibt_wr_ds_t			*sgl;
299*9e39c5baSBill Taylor 	uint32_t			nds;
300*9e39c5baSBill Taylor 	uint32_t			len, total_len;
301*9e39c5baSBill Taylor 	uint32_t			tavor_num_mpt_mask;
302*9e39c5baSBill Taylor 	uint32_t			new_rkey;
303*9e39c5baSBill Taylor 	uint32_t			old_rkey;
304*9e39c5baSBill Taylor 	int				i, num_ds;
305*9e39c5baSBill Taylor 	int				max_inline_bytes = -1;
306*9e39c5baSBill Taylor 
307*9e39c5baSBill Taylor 	nds = wr->wr_nds;
308*9e39c5baSBill Taylor 	sgl = wr->wr_sgl;
309*9e39c5baSBill Taylor 	num_ds = 0;
310*9e39c5baSBill Taylor 
311*9e39c5baSBill Taylor 	/*
312*9e39c5baSBill Taylor 	 * RC is the only supported transport in UDAPL
313*9e39c5baSBill Taylor 	 * For RC requests, we allow "Send", "RDMA Read", "RDMA Write"
314*9e39c5baSBill Taylor 	 */
315*9e39c5baSBill Taylor 	switch (wr->wr_opcode) {
316*9e39c5baSBill Taylor 	case IBT_WRC_SEND:
317*9e39c5baSBill Taylor 		/*
318*9e39c5baSBill Taylor 		 * If this is a Send request, then all we need is
319*9e39c5baSBill Taylor 		 * the Data Segment processing below.
320*9e39c5baSBill Taylor 		 * Initialize the information for the Data Segments
321*9e39c5baSBill Taylor 		 */
322*9e39c5baSBill Taylor 		ds = (tavor_hw_wqe_sgl_t *)((uintptr_t)addr +
323*9e39c5baSBill Taylor 		    sizeof (tavor_hw_snd_wqe_nextctrl_t));
324*9e39c5baSBill Taylor 		if (qp->qp_sq_inline != 0)
325*9e39c5baSBill Taylor 			max_inline_bytes =
326*9e39c5baSBill Taylor 			    qp->qp_sq_wqesz - TAVOR_INLINE_HEADER_SIZE_SEND;
327*9e39c5baSBill Taylor 		break;
328*9e39c5baSBill Taylor 	case IBT_WRC_RDMAW:
329*9e39c5baSBill Taylor 		if (qp->qp_sq_inline != 0)
330*9e39c5baSBill Taylor 			max_inline_bytes =
331*9e39c5baSBill Taylor 			    qp->qp_sq_wqesz - TAVOR_INLINE_HEADER_SIZE_RDMAW;
332*9e39c5baSBill Taylor 		/* FALLTHROUGH */
333*9e39c5baSBill Taylor 	case IBT_WRC_RDMAR:
334*9e39c5baSBill Taylor 		if (qp->qp_sq_inline < 0 && wr->wr_opcode == IBT_WRC_RDMAR)
335*9e39c5baSBill Taylor 			qp->qp_sq_inline = 0;
336*9e39c5baSBill Taylor 		/*
337*9e39c5baSBill Taylor 		 * If this is an RDMA Read or RDMA Write request, then fill
338*9e39c5baSBill Taylor 		 * in the "Remote Address" header fields.
339*9e39c5baSBill Taylor 		 */
340*9e39c5baSBill Taylor 		rc = (tavor_hw_snd_wqe_remaddr_t *)((uintptr_t)addr +
341*9e39c5baSBill Taylor 		    sizeof (tavor_hw_snd_wqe_nextctrl_t));
342*9e39c5baSBill Taylor 
343*9e39c5baSBill Taylor 		/*
344*9e39c5baSBill Taylor 		 * Build the Remote Address Segment for the WQE, using
345*9e39c5baSBill Taylor 		 * the information from the RC work request.
346*9e39c5baSBill Taylor 		 */
347*9e39c5baSBill Taylor 		TAVOR_WQE_BUILD_REMADDR(rc, &wr->wr.rc.rcwr.rdma);
348*9e39c5baSBill Taylor 
349*9e39c5baSBill Taylor 		/* Update "ds" for filling in Data Segments (below) */
350*9e39c5baSBill Taylor 		ds = (tavor_hw_wqe_sgl_t *)((uintptr_t)rc +
351*9e39c5baSBill Taylor 		    sizeof (tavor_hw_snd_wqe_remaddr_t));
352*9e39c5baSBill Taylor 		break;
353*9e39c5baSBill Taylor 	case IBT_WRC_BIND:
354*9e39c5baSBill Taylor 		/*
355*9e39c5baSBill Taylor 		 * Generate a new R_key
356*9e39c5baSBill Taylor 		 * Increment the upper "unconstrained" bits and need to keep
357*9e39c5baSBill Taylor 		 * the lower "constrained" bits the same it represents
358*9e39c5baSBill Taylor 		 * the MPT index.
359*9e39c5baSBill Taylor 		 */
360*9e39c5baSBill Taylor 		old_rkey = wr->wr.rc.rcwr.bind->bind_rkey;
361*9e39c5baSBill Taylor 		tavor_num_mpt_mask = (uint32_t)(1 << qp->qp_num_mpt_shift) - 1;
362*9e39c5baSBill Taylor 		new_rkey = (old_rkey >> qp->qp_num_mpt_shift);
363*9e39c5baSBill Taylor 		new_rkey++;
364*9e39c5baSBill Taylor 		new_rkey = ((new_rkey << qp->qp_num_mpt_shift) |
365*9e39c5baSBill Taylor 		    (old_rkey & tavor_num_mpt_mask));
366*9e39c5baSBill Taylor 
367*9e39c5baSBill Taylor 		wr->wr.rc.rcwr.bind->bind_rkey_out = new_rkey;
368*9e39c5baSBill Taylor 
369*9e39c5baSBill Taylor 		bn = (tavor_hw_snd_wqe_bind_t *)((uintptr_t)addr +
370*9e39c5baSBill Taylor 		    sizeof (tavor_hw_snd_wqe_nextctrl_t));
371*9e39c5baSBill Taylor 
372*9e39c5baSBill Taylor 		/*
373*9e39c5baSBill Taylor 		 * Build the Bind Memory Window Segments for the WQE,
374*9e39c5baSBill Taylor 		 * using the information from the RC Bind memory
375*9e39c5baSBill Taylor 		 * window work request.
376*9e39c5baSBill Taylor 		 */
377*9e39c5baSBill Taylor 		TAVOR_WQE_BUILD_BIND(bn, wr->wr.rc.rcwr.bind);
378*9e39c5baSBill Taylor 
379*9e39c5baSBill Taylor 		/*
380*9e39c5baSBill Taylor 		 * Update the "ds" pointer.  Even though the "bind"
381*9e39c5baSBill Taylor 		 * operation requires no SGLs, this is necessary to
382*9e39c5baSBill Taylor 		 * facilitate the correct descriptor size calculations
383*9e39c5baSBill Taylor 		 * (below).
384*9e39c5baSBill Taylor 		 */
385*9e39c5baSBill Taylor 		ds = (tavor_hw_wqe_sgl_t *)((uintptr_t)bn +
386*9e39c5baSBill Taylor 		    sizeof (tavor_hw_snd_wqe_bind_t));
387*9e39c5baSBill Taylor 		break;
388*9e39c5baSBill Taylor 	default:
389*9e39c5baSBill Taylor 		dapl_dbg_log(DAPL_DBG_TYPE_ERR,
390*9e39c5baSBill Taylor 		    "dapli_tavor_wqe_send_build: invalid wr_opcode=%d\n",
391*9e39c5baSBill Taylor 		    wr->wr_opcode);
392*9e39c5baSBill Taylor 		return (DAT_INTERNAL_ERROR);
393*9e39c5baSBill Taylor 	}
394*9e39c5baSBill Taylor 
395*9e39c5baSBill Taylor 	/*
396*9e39c5baSBill Taylor 	 * Now fill in the Data Segments (SGL) for the Send WQE based on
397*9e39c5baSBill Taylor 	 * the values setup above (i.e. "sgl", "nds", and the "ds" pointer
398*9e39c5baSBill Taylor 	 * Start by checking for a valid number of SGL entries
399*9e39c5baSBill Taylor 	 */
400*9e39c5baSBill Taylor 	if (nds > qp->qp_sq_sgl) {
401*9e39c5baSBill Taylor 		return (DAT_INVALID_PARAMETER);
402*9e39c5baSBill Taylor 	}
403*9e39c5baSBill Taylor 
404*9e39c5baSBill Taylor 	/*
405*9e39c5baSBill Taylor 	 * For each SGL in the Send Work Request, fill in the Send WQE's data
406*9e39c5baSBill Taylor 	 * segments.  Note: We skip any SGL with zero size because Tavor
407*9e39c5baSBill Taylor 	 * hardware cannot handle a zero for "byte_cnt" in the WQE.  Actually
408*9e39c5baSBill Taylor 	 * the encoding for zero means a 2GB transfer.  Because of this special
409*9e39c5baSBill Taylor 	 * encoding in the hardware, we mask the requested length with
410*9e39c5baSBill Taylor 	 * TAVOR_WQE_SGL_BYTE_CNT_MASK (so that 2GB will end up encoded as
411*9e39c5baSBill Taylor 	 * zero.)
412*9e39c5baSBill Taylor 	 */
413*9e39c5baSBill Taylor 
414*9e39c5baSBill Taylor 	if (max_inline_bytes != -1) {		/* compute total_len */
415*9e39c5baSBill Taylor 		total_len = 0;
416*9e39c5baSBill Taylor 		for (i = 0; i < nds; i++)
417*9e39c5baSBill Taylor 			total_len += sgl[i].ds_len;
418*9e39c5baSBill Taylor 		if (total_len > max_inline_bytes)
419*9e39c5baSBill Taylor 			max_inline_bytes = -1;	/* too big, do not "inline" */
420*9e39c5baSBill Taylor 	}
421*9e39c5baSBill Taylor 	if (max_inline_bytes != -1) {		/* do "inline" */
422*9e39c5baSBill Taylor 		uint8_t *dst = (uint8_t *)((uint32_t *)ds + 1);
423*9e39c5baSBill Taylor 		*(uint32_t *)ds =
424*9e39c5baSBill Taylor 		    HTOBE_32(total_len | TAVOR_WQE_SGL_INLINE_MASK);
425*9e39c5baSBill Taylor 		for (i = 0; i < nds; i++) {
426*9e39c5baSBill Taylor 			if ((len = sgl[i].ds_len) == 0) {
427*9e39c5baSBill Taylor 				continue;
428*9e39c5baSBill Taylor 			}
429*9e39c5baSBill Taylor 			(void) dapl_os_memcpy(dst,
430*9e39c5baSBill Taylor 			    (void *)(uintptr_t)sgl[i].ds_va, len);
431*9e39c5baSBill Taylor 			dst += len;
432*9e39c5baSBill Taylor 		}
433*9e39c5baSBill Taylor 		/* Return the size of descriptor (in 16-byte chunks) */
434*9e39c5baSBill Taylor 		*size = ((uintptr_t)dst - (uintptr_t)addr + 15) >> 4;
435*9e39c5baSBill Taylor 	} else {
436*9e39c5baSBill Taylor 		for (i = 0; i < nds; i++) {
437*9e39c5baSBill Taylor 			if (sgl[i].ds_len == 0) {
438*9e39c5baSBill Taylor 				continue;
439*9e39c5baSBill Taylor 			}
440*9e39c5baSBill Taylor 
441*9e39c5baSBill Taylor 			/*
442*9e39c5baSBill Taylor 			 * Fill in the Data Segment(s) for the current WQE,
443*9e39c5baSBill Taylor 			 * using the information contained in the
444*9e39c5baSBill Taylor 			 * scatter-gather list of the work request.
445*9e39c5baSBill Taylor 			 */
446*9e39c5baSBill Taylor 			TAVOR_WQE_BUILD_DATA_SEG(&ds[num_ds], &sgl[i]);
447*9e39c5baSBill Taylor 			num_ds++;
448*9e39c5baSBill Taylor 		}
449*9e39c5baSBill Taylor 
450*9e39c5baSBill Taylor 		/* Return the size of descriptor (in 16-byte chunks) */
451*9e39c5baSBill Taylor 		*size = ((uintptr_t)&ds[num_ds] - (uintptr_t)addr) >> 4;
452*9e39c5baSBill Taylor 	}
453*9e39c5baSBill Taylor 
454*9e39c5baSBill Taylor 	return (DAT_SUCCESS);
455*9e39c5baSBill Taylor }
456*9e39c5baSBill Taylor 
457*9e39c5baSBill Taylor /*
458*9e39c5baSBill Taylor  * dapli_tavor_wqe_send_linknext()
459*9e39c5baSBill Taylor  * Takes a WQE and links it to the prev WQE chain
460*9e39c5baSBill Taylor  */
461*9e39c5baSBill Taylor static void
dapli_tavor_wqe_send_linknext(ibt_send_wr_t * curr_wr,uint64_t * curr_addr,boolean_t ns,uint32_t curr_desc,uint_t curr_descsz,uint64_t * prev_addr,tavor_sw_wqe_dbinfo_t * dbinfo)462*9e39c5baSBill Taylor dapli_tavor_wqe_send_linknext(ibt_send_wr_t *curr_wr, uint64_t *curr_addr,
463*9e39c5baSBill Taylor     boolean_t ns, uint32_t curr_desc, uint_t curr_descsz, uint64_t *prev_addr,
464*9e39c5baSBill Taylor     tavor_sw_wqe_dbinfo_t *dbinfo)
465*9e39c5baSBill Taylor {
466*9e39c5baSBill Taylor 	uint64_t	next, ctrl;
467*9e39c5baSBill Taylor 	uint32_t	nopcode, fence;
468*9e39c5baSBill Taylor 
469*9e39c5baSBill Taylor 	next = 0;
470*9e39c5baSBill Taylor 	ctrl = 0;
471*9e39c5baSBill Taylor 
472*9e39c5baSBill Taylor 	/* Set the "c" (i.e. "signaled") bit appropriately */
473*9e39c5baSBill Taylor 	if (curr_wr->wr_flags & IBT_WR_SEND_SIGNAL) {
474*9e39c5baSBill Taylor 		ctrl = ctrl | TAVOR_WQE_SEND_SIGNALED_MASK;
475*9e39c5baSBill Taylor 	}
476*9e39c5baSBill Taylor 
477*9e39c5baSBill Taylor 	/* Set the "s" (i.e. "solicited") bit appropriately */
478*9e39c5baSBill Taylor 	if (curr_wr->wr_flags & IBT_WR_SEND_SOLICIT) {
479*9e39c5baSBill Taylor 		ctrl = ctrl | TAVOR_WQE_SEND_SOLICIT_MASK;
480*9e39c5baSBill Taylor 	}
481*9e39c5baSBill Taylor 	/* Set the "e" (i.e. "event") bit if notification is needed */
482*9e39c5baSBill Taylor 	if (!ns) {
483*9e39c5baSBill Taylor 		ctrl = ctrl | TAVOR_WQE_RCV_EVENT_MASK;
484*9e39c5baSBill Taylor 	}
485*9e39c5baSBill Taylor 
486*9e39c5baSBill Taylor 	/*
487*9e39c5baSBill Taylor 	 * The "i" bit is unused since uDAPL doesn't support
488*9e39c5baSBill Taylor 	 * the immediate data
489*9e39c5baSBill Taylor 	 */
490*9e39c5baSBill Taylor 
491*9e39c5baSBill Taylor 	/* initialize the ctrl and next fields of the current descriptor */
492*9e39c5baSBill Taylor 	TAVOR_WQE_LINKNEXT(curr_addr, ctrl, next);
493*9e39c5baSBill Taylor 
494*9e39c5baSBill Taylor 	/*
495*9e39c5baSBill Taylor 	 * Calculate the "next" field of the prev descriptor.  This amounts
496*9e39c5baSBill Taylor 	 * to setting up the "next_wqe_addr", "nopcode", "fence", and "nds"
497*9e39c5baSBill Taylor 	 * fields (see tavor_hw.h for more).
498*9e39c5baSBill Taylor 	 */
499*9e39c5baSBill Taylor 
500*9e39c5baSBill Taylor 	/*
501*9e39c5baSBill Taylor 	 * Determine the value for the Tavor WQE "nopcode" field
502*9e39c5baSBill Taylor 	 * by using the IBTF opcode from the work request
503*9e39c5baSBill Taylor 	 */
504*9e39c5baSBill Taylor 	switch (curr_wr->wr_opcode) {
505*9e39c5baSBill Taylor 	case IBT_WRC_RDMAW:
506*9e39c5baSBill Taylor 		nopcode = TAVOR_WQE_SEND_NOPCODE_RDMAW;
507*9e39c5baSBill Taylor 		break;
508*9e39c5baSBill Taylor 
509*9e39c5baSBill Taylor 	case IBT_WRC_SEND:
510*9e39c5baSBill Taylor 		nopcode = TAVOR_WQE_SEND_NOPCODE_SEND;
511*9e39c5baSBill Taylor 		break;
512*9e39c5baSBill Taylor 
513*9e39c5baSBill Taylor 	case IBT_WRC_RDMAR:
514*9e39c5baSBill Taylor 		nopcode = TAVOR_WQE_SEND_NOPCODE_RDMAR;
515*9e39c5baSBill Taylor 		break;
516*9e39c5baSBill Taylor 
517*9e39c5baSBill Taylor 	case IBT_WRC_BIND:
518*9e39c5baSBill Taylor 		nopcode = TAVOR_WQE_SEND_NOPCODE_BIND;
519*9e39c5baSBill Taylor 		break;
520*9e39c5baSBill Taylor 	default:
521*9e39c5baSBill Taylor 		/* Unsupported opcodes in UDAPL */
522*9e39c5baSBill Taylor 		dapl_dbg_log(DAPL_DBG_TYPE_ERR,
523*9e39c5baSBill Taylor 		    "dapli_tavor_wqe_send_linknext: invalid nopcode=%d\n",
524*9e39c5baSBill Taylor 		    nopcode);
525*9e39c5baSBill Taylor 		return;
526*9e39c5baSBill Taylor 	}
527*9e39c5baSBill Taylor 
528*9e39c5baSBill Taylor 	next  = ((uint64_t)curr_desc & TAVOR_WQE_NDA_MASK) << 32;
529*9e39c5baSBill Taylor 	next  = next | ((uint64_t)nopcode << 32);
530*9e39c5baSBill Taylor 	fence = (curr_wr->wr_flags & IBT_WR_SEND_FENCE) ? 1 : 0;
531*9e39c5baSBill Taylor 	if (fence) {
532*9e39c5baSBill Taylor 		next = next | TAVOR_WQE_SEND_FENCE_MASK;
533*9e39c5baSBill Taylor 	}
534*9e39c5baSBill Taylor 	next = next | (curr_descsz & TAVOR_WQE_NDS_MASK);
535*9e39c5baSBill Taylor 
536*9e39c5baSBill Taylor 	/*
537*9e39c5baSBill Taylor 	 * A send queue doorbell will be rung for the next
538*9e39c5baSBill Taylor 	 * WQE on the chain, set the current WQE's "dbd" bit.
539*9e39c5baSBill Taylor 	 * Note: We also update the "dbinfo" structure here to pass
540*9e39c5baSBill Taylor 	 * back information about what should (later) be included
541*9e39c5baSBill Taylor 	 * in the send queue doorbell.
542*9e39c5baSBill Taylor 	 */
543*9e39c5baSBill Taylor 	next = next | TAVOR_WQE_DBD_MASK;
544*9e39c5baSBill Taylor 	dbinfo->db_nopcode = nopcode;
545*9e39c5baSBill Taylor 	dbinfo->db_fence   = fence;
546*9e39c5baSBill Taylor 
547*9e39c5baSBill Taylor 	/*
548*9e39c5baSBill Taylor 	 * Send queue doorbell will be rung for the next WQE on
549*9e39c5baSBill Taylor 	 * the chain, update the prev WQE's "next" field and return.
550*9e39c5baSBill Taylor 	 */
551*9e39c5baSBill Taylor 	if (prev_addr != NULL) {
552*9e39c5baSBill Taylor 		TAVOR_WQE_LINKFIRST(prev_addr, next);
553*9e39c5baSBill Taylor 	}
554*9e39c5baSBill Taylor }
555*9e39c5baSBill Taylor 
556*9e39c5baSBill Taylor 
557*9e39c5baSBill Taylor /*
558*9e39c5baSBill Taylor  * dapli_tavor_wqe_recv_build()
559*9e39c5baSBill Taylor  * Builds the recv WQE for a given ibt_recv_wr_t
560*9e39c5baSBill Taylor  */
561*9e39c5baSBill Taylor static DAT_RETURN
dapli_tavor_wqe_recv_build(ib_qp_handle_t qp,ibt_recv_wr_t * wr,uint64_t * addr,uint_t * size)562*9e39c5baSBill Taylor dapli_tavor_wqe_recv_build(ib_qp_handle_t qp, ibt_recv_wr_t *wr,
563*9e39c5baSBill Taylor     uint64_t *addr, uint_t *size)
564*9e39c5baSBill Taylor {
565*9e39c5baSBill Taylor 	tavor_hw_wqe_sgl_t	*ds;
566*9e39c5baSBill Taylor 	int			i;
567*9e39c5baSBill Taylor 	int			num_ds;
568*9e39c5baSBill Taylor 
569*9e39c5baSBill Taylor 	/* Fill in the Data Segments (SGL) for the Recv WQE */
570*9e39c5baSBill Taylor 	ds = (tavor_hw_wqe_sgl_t *)((uintptr_t)addr +
571*9e39c5baSBill Taylor 	    sizeof (tavor_hw_rcv_wqe_nextctrl_t));
572*9e39c5baSBill Taylor 	num_ds = 0;
573*9e39c5baSBill Taylor 
574*9e39c5baSBill Taylor 	/* Check for valid number of SGL entries */
575*9e39c5baSBill Taylor 	if (wr->wr_nds > qp->qp_rq_sgl) {
576*9e39c5baSBill Taylor 		return (DAT_INVALID_PARAMETER);
577*9e39c5baSBill Taylor 	}
578*9e39c5baSBill Taylor 
579*9e39c5baSBill Taylor 	/*
580*9e39c5baSBill Taylor 	 * For each SGL in the Recv Work Request, fill in the Recv WQE's data
581*9e39c5baSBill Taylor 	 * segments.  Note: We skip any SGL with zero size because Tavor
582*9e39c5baSBill Taylor 	 * hardware cannot handle a zero for "byte_cnt" in the WQE.  Actually
583*9e39c5baSBill Taylor 	 * the encoding for zero means a 2GB transfer.  Because of this special
584*9e39c5baSBill Taylor 	 * encoding in the hardware, we mask the requested length with
585*9e39c5baSBill Taylor 	 * TAVOR_WQE_SGL_BYTE_CNT_MASK (so that 2GB will end up encoded as
586*9e39c5baSBill Taylor 	 * zero.)
587*9e39c5baSBill Taylor 	 */
588*9e39c5baSBill Taylor 	for (i = 0; i < wr->wr_nds; i++) {
589*9e39c5baSBill Taylor 		if (wr->wr_sgl[i].ds_len == 0) {
590*9e39c5baSBill Taylor 			continue;
591*9e39c5baSBill Taylor 		}
592*9e39c5baSBill Taylor 
593*9e39c5baSBill Taylor 		/*
594*9e39c5baSBill Taylor 		 * Fill in the Data Segment(s) for the receive WQE, using the
595*9e39c5baSBill Taylor 		 * information contained in the scatter-gather list of the
596*9e39c5baSBill Taylor 		 * work request.
597*9e39c5baSBill Taylor 		 */
598*9e39c5baSBill Taylor 		TAVOR_WQE_BUILD_DATA_SEG(&ds[num_ds], &wr->wr_sgl[i]);
599*9e39c5baSBill Taylor 		num_ds++;
600*9e39c5baSBill Taylor 	}
601*9e39c5baSBill Taylor 
602*9e39c5baSBill Taylor 	/* Return the size of descriptor (in 16-byte chunks) */
603*9e39c5baSBill Taylor 	*size = ((uintptr_t)&ds[num_ds] - (uintptr_t)addr) >> 0x4;
604*9e39c5baSBill Taylor 
605*9e39c5baSBill Taylor 	return (DAT_SUCCESS);
606*9e39c5baSBill Taylor }
607*9e39c5baSBill Taylor 
608*9e39c5baSBill Taylor 
609*9e39c5baSBill Taylor /*
610*9e39c5baSBill Taylor  * dapli_tavor_wqe_recv_linknext()
611*9e39c5baSBill Taylor  * Links a recv WQE to the prev chain
612*9e39c5baSBill Taylor  */
613*9e39c5baSBill Taylor static void
dapli_tavor_wqe_recv_linknext(uint64_t * curr_addr,boolean_t ns,uint32_t curr_desc,uint_t curr_descsz,uint64_t * prev_addr)614*9e39c5baSBill Taylor dapli_tavor_wqe_recv_linknext(uint64_t *curr_addr, boolean_t ns,
615*9e39c5baSBill Taylor     uint32_t curr_desc, uint_t curr_descsz, uint64_t *prev_addr)
616*9e39c5baSBill Taylor {
617*9e39c5baSBill Taylor 	uint64_t	next;
618*9e39c5baSBill Taylor 	uint64_t	ctrl = 0;
619*9e39c5baSBill Taylor 
620*9e39c5baSBill Taylor 	/*
621*9e39c5baSBill Taylor 	 * Note: curr_addr is the last WQE (In uDAPL we manipulate 1 WQE
622*9e39c5baSBill Taylor 	 * at a time. If there is no next descriptor (i.e. if the current
623*9e39c5baSBill Taylor 	 * descriptor is the last WQE on the chain), then set "next" field
624*9e39c5baSBill Taylor 	 * to TAVOR_WQE_DBD_MASK.  This is because the Tavor hardware
625*9e39c5baSBill Taylor 	 * requires the "dbd" bit to be set to one for all Recv WQEs.
626*9e39c5baSBill Taylor 	 * In either case, we must add a single bit in the "reserved" field
627*9e39c5baSBill Taylor 	 * (TAVOR_RCV_WQE_NDA0_WA_MASK) following the NDA.  This is the
628*9e39c5baSBill Taylor 	 * workaround for a known Tavor errata that can cause Recv WQEs with
629*9e39c5baSBill Taylor 	 * zero in the NDA field to behave improperly.
630*9e39c5baSBill Taylor 	 *
631*9e39c5baSBill Taylor 	 * If notification suppression is not desired then we set
632*9e39c5baSBill Taylor 	 * the "E" bit in the ctrl field.
633*9e39c5baSBill Taylor 	 */
634*9e39c5baSBill Taylor 
635*9e39c5baSBill Taylor 	next = TAVOR_WQE_DBD_MASK | TAVOR_RCV_WQE_NDA0_WA_MASK;
636*9e39c5baSBill Taylor 	if (!ns) { /* notification needed - so set the "E" bit */
637*9e39c5baSBill Taylor 		ctrl = TAVOR_WQE_RCV_EVENT_MASK;
638*9e39c5baSBill Taylor 	}
639*9e39c5baSBill Taylor 
640*9e39c5baSBill Taylor 	/* update the WQE */
641*9e39c5baSBill Taylor 	TAVOR_WQE_LINKNEXT(curr_addr, ctrl, next);
642*9e39c5baSBill Taylor 
643*9e39c5baSBill Taylor 	if (prev_addr != NULL) {
644*9e39c5baSBill Taylor 		/*
645*9e39c5baSBill Taylor 		 * Calculate the "next" field of the descriptor.  This amounts
646*9e39c5baSBill Taylor 		 * to setting up the "next_wqe_addr", "dbd", and "nds" fields
647*9e39c5baSBill Taylor 		 * (see tavor_hw.h for more).
648*9e39c5baSBill Taylor 		 */
649*9e39c5baSBill Taylor 		next = ((uint64_t)curr_desc & TAVOR_WQE_NDA_MASK) << 32;
650*9e39c5baSBill Taylor 		next = next | (curr_descsz & TAVOR_WQE_NDS_MASK) |
651*9e39c5baSBill Taylor 		    TAVOR_WQE_DBD_MASK | TAVOR_RCV_WQE_NDA0_WA_MASK;
652*9e39c5baSBill Taylor 
653*9e39c5baSBill Taylor 		/*
654*9e39c5baSBill Taylor 		 * If this WQE is supposed to be linked to the previous
655*9e39c5baSBill Taylor 		 * descriptor, then we need to update not only the previous
656*9e39c5baSBill Taylor 		 * WQE's "next" fields but we must not touch this WQE's
657*9e39c5baSBill Taylor 		 * "ctrl" fields.
658*9e39c5baSBill Taylor 		 */
659*9e39c5baSBill Taylor 		TAVOR_WQE_LINKFIRST(prev_addr, next);
660*9e39c5baSBill Taylor 	}
661*9e39c5baSBill Taylor }
662*9e39c5baSBill Taylor 
663*9e39c5baSBill Taylor /*
664*9e39c5baSBill Taylor  * dapli_tavor_wqe_srq_build()
665*9e39c5baSBill Taylor  * Builds the recv WQE for a given ibt_recv_wr_t
666*9e39c5baSBill Taylor  */
667*9e39c5baSBill Taylor static DAT_RETURN
dapli_tavor_wqe_srq_build(ib_srq_handle_t srq,ibt_recv_wr_t * wr,uint64_t * addr)668*9e39c5baSBill Taylor dapli_tavor_wqe_srq_build(ib_srq_handle_t srq, ibt_recv_wr_t *wr,
669*9e39c5baSBill Taylor     uint64_t *addr)
670*9e39c5baSBill Taylor {
671*9e39c5baSBill Taylor 	tavor_hw_wqe_sgl_t	*ds;
672*9e39c5baSBill Taylor 	ibt_wr_ds_t		end_sgl;
673*9e39c5baSBill Taylor 	int			i;
674*9e39c5baSBill Taylor 	int			num_ds;
675*9e39c5baSBill Taylor 
676*9e39c5baSBill Taylor 	/* Fill in the Data Segments (SGL) for the Recv WQE */
677*9e39c5baSBill Taylor 	ds = (tavor_hw_wqe_sgl_t *)((uintptr_t)addr +
678*9e39c5baSBill Taylor 	    sizeof (tavor_hw_rcv_wqe_nextctrl_t));
679*9e39c5baSBill Taylor 	num_ds = 0;
680*9e39c5baSBill Taylor 
681*9e39c5baSBill Taylor 	/* Check for valid number of SGL entries */
682*9e39c5baSBill Taylor 	if (wr->wr_nds > srq->srq_wq_sgl) {
683*9e39c5baSBill Taylor 		return (DAT_INVALID_PARAMETER);
684*9e39c5baSBill Taylor 	}
685*9e39c5baSBill Taylor 
686*9e39c5baSBill Taylor 	/*
687*9e39c5baSBill Taylor 	 * For each SGL in the Recv Work Request, fill in the Recv WQE's data
688*9e39c5baSBill Taylor 	 * segments.  Note: We skip any SGL with zero size because Tavor
689*9e39c5baSBill Taylor 	 * hardware cannot handle a zero for "byte_cnt" in the WQE.  Actually
690*9e39c5baSBill Taylor 	 * the encoding for zero means a 2GB transfer.  Because of this special
691*9e39c5baSBill Taylor 	 * encoding in the hardware, we mask the requested length with
692*9e39c5baSBill Taylor 	 * TAVOR_WQE_SGL_BYTE_CNT_MASK (so that 2GB will end up encoded as
693*9e39c5baSBill Taylor 	 * zero.)
694*9e39c5baSBill Taylor 	 */
695*9e39c5baSBill Taylor 	for (i = 0; i < wr->wr_nds; i++) {
696*9e39c5baSBill Taylor 		if (wr->wr_sgl[i].ds_len == 0) {
697*9e39c5baSBill Taylor 			continue;
698*9e39c5baSBill Taylor 		}
699*9e39c5baSBill Taylor 
700*9e39c5baSBill Taylor 		/*
701*9e39c5baSBill Taylor 		 * Fill in the Data Segment(s) for the receive WQE, using the
702*9e39c5baSBill Taylor 		 * information contained in the scatter-gather list of the
703*9e39c5baSBill Taylor 		 * work request.
704*9e39c5baSBill Taylor 		 */
705*9e39c5baSBill Taylor 		TAVOR_WQE_BUILD_DATA_SEG(&ds[num_ds], &wr->wr_sgl[i]);
706*9e39c5baSBill Taylor 		num_ds++;
707*9e39c5baSBill Taylor 	}
708*9e39c5baSBill Taylor 
709*9e39c5baSBill Taylor 	/*
710*9e39c5baSBill Taylor 	 * For SRQ, if the number of data segments is less than the maximum
711*9e39c5baSBill Taylor 	 * specified at alloc, then we have to fill in a special "key" entry in
712*9e39c5baSBill Taylor 	 * the sgl entry after the last valid one in this post request.  We do
713*9e39c5baSBill Taylor 	 * that here.
714*9e39c5baSBill Taylor 	 */
715*9e39c5baSBill Taylor 	if (num_ds < srq->srq_wq_sgl) {
716*9e39c5baSBill Taylor 		end_sgl.ds_va  = (ib_vaddr_t)0;
717*9e39c5baSBill Taylor 		end_sgl.ds_len = (ib_msglen_t)0;
718*9e39c5baSBill Taylor 		end_sgl.ds_key = (ibt_lkey_t)1;
719*9e39c5baSBill Taylor 		TAVOR_WQE_BUILD_DATA_SEG(&ds[num_ds], &end_sgl);
720*9e39c5baSBill Taylor 	}
721*9e39c5baSBill Taylor 
722*9e39c5baSBill Taylor 	return (DAT_SUCCESS);
723*9e39c5baSBill Taylor }
724*9e39c5baSBill Taylor 
725*9e39c5baSBill Taylor /*
726*9e39c5baSBill Taylor  * dapli_tavor_wqe_srq_linknext()
727*9e39c5baSBill Taylor  * Links a srq recv WQE to the prev chain
728*9e39c5baSBill Taylor  */
729*9e39c5baSBill Taylor static void
dapli_tavor_wqe_srq_linknext(uint64_t * curr_addr,boolean_t ns,uint32_t curr_desc,uint64_t * prev_addr)730*9e39c5baSBill Taylor dapli_tavor_wqe_srq_linknext(uint64_t *curr_addr, boolean_t ns,
731*9e39c5baSBill Taylor     uint32_t curr_desc, uint64_t *prev_addr)
732*9e39c5baSBill Taylor {
733*9e39c5baSBill Taylor 	uint64_t	next;
734*9e39c5baSBill Taylor 	uint64_t	ctrl = 0;
735*9e39c5baSBill Taylor 
736*9e39c5baSBill Taylor 	/*
737*9e39c5baSBill Taylor 	 * Note: curr_addr is the last WQE (In uDAPL we manipulate 1 WQE
738*9e39c5baSBill Taylor 	 * at a time. If there is no next descriptor (i.e. if the current
739*9e39c5baSBill Taylor 	 * descriptor is the last WQE on the chain), then set "next" field
740*9e39c5baSBill Taylor 	 * to TAVOR_WQE_DBD_MASK.  This is because the Tavor hardware
741*9e39c5baSBill Taylor 	 * requires the "dbd" bit to be set to one for all Recv WQEs.
742*9e39c5baSBill Taylor 	 * In either case, we must add a single bit in the "reserved" field
743*9e39c5baSBill Taylor 	 * (TAVOR_RCV_WQE_NDA0_WA_MASK) following the NDA.  This is the
744*9e39c5baSBill Taylor 	 * workaround for a known Tavor errata that can cause Recv WQEs with
745*9e39c5baSBill Taylor 	 * zero in the NDA field to behave improperly.
746*9e39c5baSBill Taylor 	 *
747*9e39c5baSBill Taylor 	 * If notification suppression is not desired then we set
748*9e39c5baSBill Taylor 	 * the "E" bit in the ctrl field.
749*9e39c5baSBill Taylor 	 */
750*9e39c5baSBill Taylor 
751*9e39c5baSBill Taylor 	next = TAVOR_RCV_WQE_NDA0_WA_MASK;
752*9e39c5baSBill Taylor 	if (!ns) { /* notification needed - so set the "E" bit */
753*9e39c5baSBill Taylor 		ctrl = TAVOR_WQE_RCV_EVENT_MASK;
754*9e39c5baSBill Taylor 	}
755*9e39c5baSBill Taylor 
756*9e39c5baSBill Taylor 	/* update the WQE */
757*9e39c5baSBill Taylor 	TAVOR_WQE_LINKNEXT(curr_addr, ctrl, next);
758*9e39c5baSBill Taylor 
759*9e39c5baSBill Taylor 	if (prev_addr != NULL) {
760*9e39c5baSBill Taylor 		/*
761*9e39c5baSBill Taylor 		 * Calculate the "next" field of the descriptor.  This amounts
762*9e39c5baSBill Taylor 		 * to setting up the "next_wqe_addr", "dbd", and "nds" fields
763*9e39c5baSBill Taylor 		 * (see tavor_hw.h for more).
764*9e39c5baSBill Taylor 		 */
765*9e39c5baSBill Taylor 		next = ((uint64_t)curr_desc & TAVOR_WQE_NDA_MASK) << 32;
766*9e39c5baSBill Taylor 		next = next | TAVOR_WQE_DBD_MASK | TAVOR_RCV_WQE_NDA0_WA_MASK;
767*9e39c5baSBill Taylor 
768*9e39c5baSBill Taylor 		/*
769*9e39c5baSBill Taylor 		 * If this WQE is supposed to be linked to the previous
770*9e39c5baSBill Taylor 		 * descriptor, then we need to update not only the previous
771*9e39c5baSBill Taylor 		 * WQE's "next" fields but we must not touch this WQE's
772*9e39c5baSBill Taylor 		 * "ctrl" fields.
773*9e39c5baSBill Taylor 		 */
774*9e39c5baSBill Taylor 		TAVOR_WQE_LINKFIRST(prev_addr, next);
775*9e39c5baSBill Taylor 	}
776*9e39c5baSBill Taylor }
777*9e39c5baSBill Taylor 
778*9e39c5baSBill Taylor /*
779*9e39c5baSBill Taylor  * dapli_tavor_cq_peek()
780*9e39c5baSBill Taylor  * Peeks into a given CQ to check if there are any events that can be
781*9e39c5baSBill Taylor  * polled. It returns the number of CQEs that can be polled.
782*9e39c5baSBill Taylor  */
783*9e39c5baSBill Taylor static void
dapli_tavor_cq_peek(ib_cq_handle_t cq,int * num_cqe)784*9e39c5baSBill Taylor dapli_tavor_cq_peek(ib_cq_handle_t cq, int *num_cqe)
785*9e39c5baSBill Taylor {
786*9e39c5baSBill Taylor 	tavor_hw_cqe_t		*cqe;
787*9e39c5baSBill Taylor 	uint32_t		imm_eth_pkey_cred;
788*9e39c5baSBill Taylor 	uint32_t		cons_indx;
789*9e39c5baSBill Taylor 	uint32_t		wrap_around_mask;
790*9e39c5baSBill Taylor 	uint32_t		polled_cnt;
791*9e39c5baSBill Taylor 	uint_t			doorbell_cnt;
792*9e39c5baSBill Taylor 	uint_t			opcode;
793*9e39c5baSBill Taylor 
794*9e39c5baSBill Taylor 	/* Get the consumer index */
795*9e39c5baSBill Taylor 	cons_indx = cq->cq_consindx;
796*9e39c5baSBill Taylor 
797*9e39c5baSBill Taylor 	/*
798*9e39c5baSBill Taylor 	 * Calculate the wrap around mask.  Note: This operation only works
799*9e39c5baSBill Taylor 	 * because all Tavor completion queues have power-of-2 sizes
800*9e39c5baSBill Taylor 	 */
801*9e39c5baSBill Taylor 	wrap_around_mask = (cq->cq_size - 1);
802*9e39c5baSBill Taylor 
803*9e39c5baSBill Taylor 	/* Calculate the pointer to the first CQ entry */
804*9e39c5baSBill Taylor 	cqe = &cq->cq_addr[cons_indx];
805*9e39c5baSBill Taylor 
806*9e39c5baSBill Taylor 	/*
807*9e39c5baSBill Taylor 	 * Count entries in the CQ until we find an entry owned by
808*9e39c5baSBill Taylor 	 * the hardware.
809*9e39c5baSBill Taylor 	 */
810*9e39c5baSBill Taylor 	polled_cnt = 0;
811*9e39c5baSBill Taylor 	while (TAVOR_CQE_OWNER_IS_SW(cqe)) {
812*9e39c5baSBill Taylor 		opcode = TAVOR_CQE_OPCODE_GET(cqe);
813*9e39c5baSBill Taylor 		/* Error CQE map to multiple work completions */
814*9e39c5baSBill Taylor 		if ((opcode == TAVOR_CQE_SEND_ERR_OPCODE) ||
815*9e39c5baSBill Taylor 		    (opcode == TAVOR_CQE_RECV_ERR_OPCODE)) {
816*9e39c5baSBill Taylor 			imm_eth_pkey_cred =
817*9e39c5baSBill Taylor 			    TAVOR_CQE_IMM_ETH_PKEY_CRED_GET(cqe);
818*9e39c5baSBill Taylor 			doorbell_cnt =
819*9e39c5baSBill Taylor 			    imm_eth_pkey_cred & TAVOR_CQE_ERR_DBDCNT_MASK;
820*9e39c5baSBill Taylor 			polled_cnt += (doorbell_cnt + 1);
821*9e39c5baSBill Taylor 		} else {
822*9e39c5baSBill Taylor 			polled_cnt++;
823*9e39c5baSBill Taylor 		}
824*9e39c5baSBill Taylor 		/* Increment the consumer index */
825*9e39c5baSBill Taylor 		cons_indx = (cons_indx + 1) & wrap_around_mask;
826*9e39c5baSBill Taylor 
827*9e39c5baSBill Taylor 		/* Update the pointer to the next CQ entry */
828*9e39c5baSBill Taylor 		cqe = &cq->cq_addr[cons_indx];
829*9e39c5baSBill Taylor 	}
830*9e39c5baSBill Taylor 
831*9e39c5baSBill Taylor 	*num_cqe = polled_cnt;
832*9e39c5baSBill Taylor }
833*9e39c5baSBill Taylor 
834*9e39c5baSBill Taylor /*
835*9e39c5baSBill Taylor  * dapli_tavor_cq_poll()
836*9e39c5baSBill Taylor  * This routine polls CQEs out of a CQ and puts them into the ibt_wc_t
837*9e39c5baSBill Taylor  * array that is passed in.
838*9e39c5baSBill Taylor  */
839*9e39c5baSBill Taylor static DAT_RETURN
dapli_tavor_cq_poll(ib_cq_handle_t cq,ibt_wc_t * wc_p,uint_t num_wc,uint_t * num_polled)840*9e39c5baSBill Taylor dapli_tavor_cq_poll(ib_cq_handle_t cq, ibt_wc_t *wc_p, uint_t num_wc,
841*9e39c5baSBill Taylor     uint_t *num_polled)
842*9e39c5baSBill Taylor {
843*9e39c5baSBill Taylor 	tavor_hw_cqe_t		*cqe;
844*9e39c5baSBill Taylor 	uint32_t		cons_indx;
845*9e39c5baSBill Taylor 	uint32_t		wrap_around_mask;
846*9e39c5baSBill Taylor 	uint32_t		polled_cnt;
847*9e39c5baSBill Taylor 	uint32_t		num_to_increment;
848*9e39c5baSBill Taylor 	DAT_RETURN		dat_status;
849*9e39c5baSBill Taylor 	int			status;
850*9e39c5baSBill Taylor 
851*9e39c5baSBill Taylor 	/* Get the consumer index */
852*9e39c5baSBill Taylor 	cons_indx = cq->cq_consindx;
853*9e39c5baSBill Taylor 
854*9e39c5baSBill Taylor 	/*
855*9e39c5baSBill Taylor 	 * Calculate the wrap around mask.  Note: This operation only works
856*9e39c5baSBill Taylor 	 * because all Tavor completion queues have power-of-2 sizes
857*9e39c5baSBill Taylor 	 */
858*9e39c5baSBill Taylor 	wrap_around_mask = (cq->cq_size - 1);
859*9e39c5baSBill Taylor 
860*9e39c5baSBill Taylor 	/* Calculate the pointer to the first CQ entry */
861*9e39c5baSBill Taylor 	cqe = &cq->cq_addr[cons_indx];
862*9e39c5baSBill Taylor 
863*9e39c5baSBill Taylor 	/*
864*9e39c5baSBill Taylor 	 * Keep pulling entries from the CQ until we find an entry owned by
865*9e39c5baSBill Taylor 	 * the hardware.  As long as there the CQE's owned by SW, process
866*9e39c5baSBill Taylor 	 * each entry by calling dapli_tavor_cq_cqe_consume() and updating the
867*9e39c5baSBill Taylor 	 * CQ consumer index.  Note:  We only update the consumer index if
868*9e39c5baSBill Taylor 	 * dapli_tavor_cq_cqe_consume() returns TAVOR_CQ_SYNC_AND_DB.
869*9e39c5baSBill Taylor 	 * Otherwise, it indicates that we are going to "recycle" the CQE
870*9e39c5baSBill Taylor 	 * (probably because it is a error CQE and corresponds to more than one
871*9e39c5baSBill Taylor 	 * completion).
872*9e39c5baSBill Taylor 	 */
873*9e39c5baSBill Taylor 	polled_cnt = 0;
874*9e39c5baSBill Taylor 	while (TAVOR_CQE_OWNER_IS_SW(cqe)) {
875*9e39c5baSBill Taylor 		status = dapli_tavor_cq_cqe_consume(cq, cqe,
876*9e39c5baSBill Taylor 		    &wc_p[polled_cnt++]);
877*9e39c5baSBill Taylor 		if (status == TAVOR_CQ_SYNC_AND_DB) {
878*9e39c5baSBill Taylor 			/* Reset entry to hardware ownership */
879*9e39c5baSBill Taylor 			TAVOR_CQE_OWNER_SET_HW(cqe);
880*9e39c5baSBill Taylor 
881*9e39c5baSBill Taylor 			/* Increment the consumer index */
882*9e39c5baSBill Taylor 			cons_indx = (cons_indx + 1) & wrap_around_mask;
883*9e39c5baSBill Taylor 
884*9e39c5baSBill Taylor 			/* Update the pointer to the next CQ entry */
885*9e39c5baSBill Taylor 			cqe = &cq->cq_addr[cons_indx];
886*9e39c5baSBill Taylor 		}
887*9e39c5baSBill Taylor 
888*9e39c5baSBill Taylor 		/*
889*9e39c5baSBill Taylor 		 * If we have run out of space to store work completions,
890*9e39c5baSBill Taylor 		 * then stop and return the ones we have pulled of the CQ.
891*9e39c5baSBill Taylor 		 */
892*9e39c5baSBill Taylor 		if (polled_cnt >= num_wc) {
893*9e39c5baSBill Taylor 			break;
894*9e39c5baSBill Taylor 		}
895*9e39c5baSBill Taylor 	}
896*9e39c5baSBill Taylor 
897*9e39c5baSBill Taylor 	dat_status = DAT_SUCCESS;
898*9e39c5baSBill Taylor 	/*
899*9e39c5baSBill Taylor 	 * Now we only ring the doorbell (to update the consumer index) if
900*9e39c5baSBill Taylor 	 * we've actually consumed a CQ entry.  If we have, for example,
901*9e39c5baSBill Taylor 	 * pulled from a CQE that we are still in the process of "recycling"
902*9e39c5baSBill Taylor 	 * for error purposes, then we would not update the consumer index.
903*9e39c5baSBill Taylor 	 */
904*9e39c5baSBill Taylor 	if ((polled_cnt != 0) && (cq->cq_consindx != cons_indx)) {
905*9e39c5baSBill Taylor 		/*
906*9e39c5baSBill Taylor 		 * Post doorbell to update the consumer index.  Doorbell
907*9e39c5baSBill Taylor 		 * value indicates number of entries consumed (minus 1)
908*9e39c5baSBill Taylor 		 */
909*9e39c5baSBill Taylor 		if (cons_indx > cq->cq_consindx) {
910*9e39c5baSBill Taylor 			num_to_increment = (cons_indx - cq->cq_consindx) - 1;
911*9e39c5baSBill Taylor 		} else {
912*9e39c5baSBill Taylor 			num_to_increment = ((cons_indx + cq->cq_size) -
913*9e39c5baSBill Taylor 			    cq->cq_consindx) - 1;
914*9e39c5baSBill Taylor 		}
915*9e39c5baSBill Taylor 		cq->cq_consindx = cons_indx;
916*9e39c5baSBill Taylor 		dapli_tavor_cq_doorbell(cq->cq_iauar, TAVOR_CQDB_INCR_CONSINDX,
917*9e39c5baSBill Taylor 		    cq->cq_num, num_to_increment);
918*9e39c5baSBill Taylor 	} else if (polled_cnt == 0) {
919*9e39c5baSBill Taylor 		/*
920*9e39c5baSBill Taylor 		 * If the CQ is empty, we can try to free up some of the WRID
921*9e39c5baSBill Taylor 		 * list containers.
922*9e39c5baSBill Taylor 		 */
923*9e39c5baSBill Taylor 		if (cq->cq_wrid_reap_head)	/* look before leaping */
924*9e39c5baSBill Taylor 			dapls_tavor_wrid_cq_reap(cq);
925*9e39c5baSBill Taylor 		dat_status = DAT_ERROR(DAT_QUEUE_EMPTY, 0);
926*9e39c5baSBill Taylor 	}
927*9e39c5baSBill Taylor 
928*9e39c5baSBill Taylor 	if (num_polled != NULL) {
929*9e39c5baSBill Taylor 		*num_polled = polled_cnt;
930*9e39c5baSBill Taylor 	}
931*9e39c5baSBill Taylor 
932*9e39c5baSBill Taylor 	return (dat_status);
933*9e39c5baSBill Taylor }
934*9e39c5baSBill Taylor 
935*9e39c5baSBill Taylor /*
936*9e39c5baSBill Taylor  * dapli_tavor_cq_poll_one()
937*9e39c5baSBill Taylor  * This routine polls one CQE out of a CQ and puts ot into the ibt_wc_t
938*9e39c5baSBill Taylor  * that is passed in.  See above for more comments/details.
939*9e39c5baSBill Taylor  */
940*9e39c5baSBill Taylor static DAT_RETURN
dapli_tavor_cq_poll_one(ib_cq_handle_t cq,ibt_wc_t * wc_p)941*9e39c5baSBill Taylor dapli_tavor_cq_poll_one(ib_cq_handle_t cq, ibt_wc_t *wc_p)
942*9e39c5baSBill Taylor {
943*9e39c5baSBill Taylor 	tavor_hw_cqe_t		*cqe;
944*9e39c5baSBill Taylor 	uint32_t		cons_indx;
945*9e39c5baSBill Taylor 	DAT_RETURN		dat_status;
946*9e39c5baSBill Taylor 	int			status;
947*9e39c5baSBill Taylor 
948*9e39c5baSBill Taylor 	/* Get the consumer index */
949*9e39c5baSBill Taylor 	cons_indx = cq->cq_consindx;
950*9e39c5baSBill Taylor 
951*9e39c5baSBill Taylor 	/* Calculate the pointer to the first CQ entry */
952*9e39c5baSBill Taylor 	cqe = &cq->cq_addr[cons_indx];
953*9e39c5baSBill Taylor 
954*9e39c5baSBill Taylor 	/*
955*9e39c5baSBill Taylor 	 * Keep pulling entries from the CQ until we find an entry owned by
956*9e39c5baSBill Taylor 	 * the hardware.  As long as there the CQE's owned by SW, process
957*9e39c5baSBill Taylor 	 * each entry by calling dapli_tavor_cq_cqe_consume() and updating the
958*9e39c5baSBill Taylor 	 * CQ consumer index.  Note:  We only update the consumer index if
959*9e39c5baSBill Taylor 	 * dapli_tavor_cq_cqe_consume() returns TAVOR_CQ_SYNC_AND_DB.
960*9e39c5baSBill Taylor 	 * Otherwise, it indicates that we are going to "recycle" the CQE
961*9e39c5baSBill Taylor 	 * (probably because it is a error CQE and corresponds to more than one
962*9e39c5baSBill Taylor 	 * completion).
963*9e39c5baSBill Taylor 	 */
964*9e39c5baSBill Taylor 	if (TAVOR_CQE_OWNER_IS_SW(cqe)) {
965*9e39c5baSBill Taylor 		status = dapli_tavor_cq_cqe_consume(cq, cqe, wc_p);
966*9e39c5baSBill Taylor 		if (status == TAVOR_CQ_SYNC_AND_DB) {
967*9e39c5baSBill Taylor 			/* Reset entry to hardware ownership */
968*9e39c5baSBill Taylor 			TAVOR_CQE_OWNER_SET_HW(cqe);
969*9e39c5baSBill Taylor 
970*9e39c5baSBill Taylor 			/* Increment the consumer index */
971*9e39c5baSBill Taylor 			cq->cq_consindx =
972*9e39c5baSBill Taylor 			    (cons_indx + 1) & (cq->cq_size - 1);
973*9e39c5baSBill Taylor 			dapli_tavor_cq_doorbell(cq->cq_iauar,
974*9e39c5baSBill Taylor 			    TAVOR_CQDB_INCR_CONSINDX,
975*9e39c5baSBill Taylor 			    cq->cq_num, 0);
976*9e39c5baSBill Taylor 		}
977*9e39c5baSBill Taylor 		dat_status = DAT_SUCCESS;
978*9e39c5baSBill Taylor 	} else {
979*9e39c5baSBill Taylor 		if (cq->cq_wrid_reap_head)	/* look before leaping */
980*9e39c5baSBill Taylor 			dapls_tavor_wrid_cq_reap(cq);
981*9e39c5baSBill Taylor 		dat_status = DAT_ERROR(DAT_QUEUE_EMPTY, 0);
982*9e39c5baSBill Taylor 	}
983*9e39c5baSBill Taylor 	return (dat_status);
984*9e39c5baSBill Taylor }
985*9e39c5baSBill Taylor 
986*9e39c5baSBill Taylor /*
987*9e39c5baSBill Taylor  * dapli_tavor_cq_cqe_consume()
988*9e39c5baSBill Taylor  * Converts a given CQE into a ibt_wc_t object
989*9e39c5baSBill Taylor  */
990*9e39c5baSBill Taylor static int
dapli_tavor_cq_cqe_consume(ib_cq_handle_t cqhdl,tavor_hw_cqe_t * cqe,ibt_wc_t * wc)991*9e39c5baSBill Taylor dapli_tavor_cq_cqe_consume(ib_cq_handle_t cqhdl, tavor_hw_cqe_t *cqe,
992*9e39c5baSBill Taylor     ibt_wc_t *wc)
993*9e39c5baSBill Taylor {
994*9e39c5baSBill Taylor 	uint_t		flags;
995*9e39c5baSBill Taylor 	uint_t		type;
996*9e39c5baSBill Taylor 	uint_t		opcode;
997*9e39c5baSBill Taylor 	int		status;
998*9e39c5baSBill Taylor 
999*9e39c5baSBill Taylor 	/*
1000*9e39c5baSBill Taylor 	 * Determine if this is an "error" CQE by examining "opcode".  If it
1001*9e39c5baSBill Taylor 	 * is an error CQE, then call dapli_tavor_cq_errcqe_consume() and return
1002*9e39c5baSBill Taylor 	 * whatever status it returns.  Otherwise, this is a successful
1003*9e39c5baSBill Taylor 	 * completion.
1004*9e39c5baSBill Taylor 	 */
1005*9e39c5baSBill Taylor 	opcode = TAVOR_CQE_OPCODE_GET(cqe);
1006*9e39c5baSBill Taylor 	if ((opcode == TAVOR_CQE_SEND_ERR_OPCODE) ||
1007*9e39c5baSBill Taylor 	    (opcode == TAVOR_CQE_RECV_ERR_OPCODE)) {
1008*9e39c5baSBill Taylor 		status = dapli_tavor_cq_errcqe_consume(cqhdl, cqe, wc);
1009*9e39c5baSBill Taylor 		return (status);
1010*9e39c5baSBill Taylor 	}
1011*9e39c5baSBill Taylor 
1012*9e39c5baSBill Taylor 	/*
1013*9e39c5baSBill Taylor 	 * Fetch the Work Request ID using the information in the CQE.
1014*9e39c5baSBill Taylor 	 * See tavor_wr.c for more details.
1015*9e39c5baSBill Taylor 	 */
1016*9e39c5baSBill Taylor 	wc->wc_id = dapls_tavor_wrid_get_entry(cqhdl, cqe,
1017*9e39c5baSBill Taylor 	    TAVOR_CQE_SENDRECV_GET(cqe), 0, NULL);
1018*9e39c5baSBill Taylor 	wc->wc_qpn = TAVOR_CQE_QPNUM_GET(cqe);
1019*9e39c5baSBill Taylor 
1020*9e39c5baSBill Taylor 	/*
1021*9e39c5baSBill Taylor 	 * Parse the CQE opcode to determine completion type.  This will set
1022*9e39c5baSBill Taylor 	 * not only the type of the completion, but also any flags that might
1023*9e39c5baSBill Taylor 	 * be associated with it (e.g. whether immediate data is present).
1024*9e39c5baSBill Taylor 	 */
1025*9e39c5baSBill Taylor 	flags = IBT_WC_NO_FLAGS;
1026*9e39c5baSBill Taylor 	if (TAVOR_CQE_SENDRECV_GET(cqe) != TAVOR_COMPLETION_RECV) {
1027*9e39c5baSBill Taylor 
1028*9e39c5baSBill Taylor 		/*
1029*9e39c5baSBill Taylor 		 * Send CQE
1030*9e39c5baSBill Taylor 		 *
1031*9e39c5baSBill Taylor 		 * The following opcodes will not be generated in uDAPL
1032*9e39c5baSBill Taylor 		 * case TAVOR_CQE_SND_RDMAWR_IMM:
1033*9e39c5baSBill Taylor 		 * case TAVOR_CQE_SND_SEND_IMM:
1034*9e39c5baSBill Taylor 		 * case TAVOR_CQE_SND_ATOMIC_CS:
1035*9e39c5baSBill Taylor 		 * case TAVOR_CQE_SND_ATOMIC_FA:
1036*9e39c5baSBill Taylor 		 */
1037*9e39c5baSBill Taylor 		switch (opcode) {
1038*9e39c5baSBill Taylor 		case TAVOR_CQE_SND_RDMAWR:
1039*9e39c5baSBill Taylor 			type = IBT_WRC_RDMAW;
1040*9e39c5baSBill Taylor 			break;
1041*9e39c5baSBill Taylor 
1042*9e39c5baSBill Taylor 		case TAVOR_CQE_SND_SEND:
1043*9e39c5baSBill Taylor 			type = IBT_WRC_SEND;
1044*9e39c5baSBill Taylor 			break;
1045*9e39c5baSBill Taylor 
1046*9e39c5baSBill Taylor 		case TAVOR_CQE_SND_RDMARD:
1047*9e39c5baSBill Taylor 			type = IBT_WRC_RDMAR;
1048*9e39c5baSBill Taylor 			wc->wc_bytes_xfer = TAVOR_CQE_BYTECNT_GET(cqe);
1049*9e39c5baSBill Taylor 			break;
1050*9e39c5baSBill Taylor 
1051*9e39c5baSBill Taylor 		case TAVOR_CQE_SND_BIND_MW:
1052*9e39c5baSBill Taylor 			type = IBT_WRC_BIND;
1053*9e39c5baSBill Taylor 			break;
1054*9e39c5baSBill Taylor 
1055*9e39c5baSBill Taylor 		default:
1056*9e39c5baSBill Taylor 			wc->wc_status = IBT_WC_LOCAL_CHAN_OP_ERR;
1057*9e39c5baSBill Taylor 			return (TAVOR_CQ_SYNC_AND_DB);
1058*9e39c5baSBill Taylor 		}
1059*9e39c5baSBill Taylor 	} else {
1060*9e39c5baSBill Taylor 
1061*9e39c5baSBill Taylor 		/*
1062*9e39c5baSBill Taylor 		 * Receive CQE
1063*9e39c5baSBill Taylor 		 *
1064*9e39c5baSBill Taylor 		 * The following opcodes will not be generated in uDAPL
1065*9e39c5baSBill Taylor 		 *
1066*9e39c5baSBill Taylor 		 * case TAVOR_CQE_RCV_RECV_IMM:
1067*9e39c5baSBill Taylor 		 * case TAVOR_CQE_RCV_RECV_IMM2:
1068*9e39c5baSBill Taylor 		 * case TAVOR_CQE_RCV_RDMAWR_IMM:
1069*9e39c5baSBill Taylor 		 * case TAVOR_CQE_RCV_RDMAWR_IMM2:
1070*9e39c5baSBill Taylor 		 */
1071*9e39c5baSBill Taylor 		switch (opcode & 0x1F) {
1072*9e39c5baSBill Taylor 		case TAVOR_CQE_RCV_RECV:
1073*9e39c5baSBill Taylor 			/* FALLTHROUGH */
1074*9e39c5baSBill Taylor 		case TAVOR_CQE_RCV_RECV2:
1075*9e39c5baSBill Taylor 			type = IBT_WRC_RECV;
1076*9e39c5baSBill Taylor 			wc->wc_bytes_xfer = TAVOR_CQE_BYTECNT_GET(cqe);
1077*9e39c5baSBill Taylor 			break;
1078*9e39c5baSBill Taylor 		default:
1079*9e39c5baSBill Taylor 			wc->wc_status = IBT_WC_LOCAL_CHAN_OP_ERR;
1080*9e39c5baSBill Taylor 			return (TAVOR_CQ_SYNC_AND_DB);
1081*9e39c5baSBill Taylor 		}
1082*9e39c5baSBill Taylor 	}
1083*9e39c5baSBill Taylor 	wc->wc_type = type;
1084*9e39c5baSBill Taylor 	wc->wc_flags = flags;
1085*9e39c5baSBill Taylor 	/* If we got here, completion status must be success */
1086*9e39c5baSBill Taylor 	wc->wc_status = IBT_WC_SUCCESS;
1087*9e39c5baSBill Taylor 
1088*9e39c5baSBill Taylor 	return (TAVOR_CQ_SYNC_AND_DB);
1089*9e39c5baSBill Taylor }
1090*9e39c5baSBill Taylor 
1091*9e39c5baSBill Taylor 
1092*9e39c5baSBill Taylor /*
1093*9e39c5baSBill Taylor  * dapli_tavor_cq_errcqe_consume()
1094*9e39c5baSBill Taylor  */
1095*9e39c5baSBill Taylor static int
dapli_tavor_cq_errcqe_consume(ib_cq_handle_t cqhdl,tavor_hw_cqe_t * cqe,ibt_wc_t * wc)1096*9e39c5baSBill Taylor dapli_tavor_cq_errcqe_consume(ib_cq_handle_t cqhdl, tavor_hw_cqe_t *cqe,
1097*9e39c5baSBill Taylor     ibt_wc_t *wc)
1098*9e39c5baSBill Taylor {
1099*9e39c5baSBill Taylor 	dapls_tavor_wrid_entry_t	wre;
1100*9e39c5baSBill Taylor 	uint32_t		next_wqeaddr;
1101*9e39c5baSBill Taylor 	uint32_t		imm_eth_pkey_cred;
1102*9e39c5baSBill Taylor 	uint_t			nextwqesize, dbd;
1103*9e39c5baSBill Taylor 	uint_t			doorbell_cnt, status;
1104*9e39c5baSBill Taylor 	uint_t			opcode = TAVOR_CQE_OPCODE_GET(cqe);
1105*9e39c5baSBill Taylor 
1106*9e39c5baSBill Taylor 	dapl_dbg_log(DAPL_DBG_TYPE_EVD, "errcqe_consume:cqe.eth=%x, wqe=%x\n",
1107*9e39c5baSBill Taylor 	    TAVOR_CQE_IMM_ETH_PKEY_CRED_GET(cqe),
1108*9e39c5baSBill Taylor 	    TAVOR_CQE_WQEADDRSZ_GET(cqe));
1109*9e39c5baSBill Taylor 
1110*9e39c5baSBill Taylor 	/*
1111*9e39c5baSBill Taylor 	 * Fetch the Work Request ID using the information in the CQE.
1112*9e39c5baSBill Taylor 	 * See tavor_wr.c for more details.
1113*9e39c5baSBill Taylor 	 */
1114*9e39c5baSBill Taylor 	wc->wc_id = dapls_tavor_wrid_get_entry(cqhdl, cqe,
1115*9e39c5baSBill Taylor 	    (opcode == TAVOR_CQE_SEND_ERR_OPCODE) ? TAVOR_COMPLETION_SEND :
1116*9e39c5baSBill Taylor 	    TAVOR_COMPLETION_RECV, 1, &wre);
1117*9e39c5baSBill Taylor 	wc->wc_qpn = TAVOR_CQE_QPNUM_GET(cqe);
1118*9e39c5baSBill Taylor 
1119*9e39c5baSBill Taylor 	/*
1120*9e39c5baSBill Taylor 	 * Parse the CQE opcode to determine completion type.  We know that
1121*9e39c5baSBill Taylor 	 * the CQE is an error completion, so we extract only the completion
1122*9e39c5baSBill Taylor 	 * status here.
1123*9e39c5baSBill Taylor 	 */
1124*9e39c5baSBill Taylor 	imm_eth_pkey_cred = TAVOR_CQE_IMM_ETH_PKEY_CRED_GET(cqe);
1125*9e39c5baSBill Taylor 	status = imm_eth_pkey_cred >> TAVOR_CQE_ERR_STATUS_SHIFT;
1126*9e39c5baSBill Taylor 	switch (status) {
1127*9e39c5baSBill Taylor 	case TAVOR_CQE_LOC_LEN_ERR:
1128*9e39c5baSBill Taylor 		status = IBT_WC_LOCAL_LEN_ERR;
1129*9e39c5baSBill Taylor 		break;
1130*9e39c5baSBill Taylor 
1131*9e39c5baSBill Taylor 	case TAVOR_CQE_LOC_OP_ERR:
1132*9e39c5baSBill Taylor 		status = IBT_WC_LOCAL_CHAN_OP_ERR;
1133*9e39c5baSBill Taylor 		break;
1134*9e39c5baSBill Taylor 
1135*9e39c5baSBill Taylor 	case TAVOR_CQE_LOC_PROT_ERR:
1136*9e39c5baSBill Taylor 		status = IBT_WC_LOCAL_PROTECT_ERR;
1137*9e39c5baSBill Taylor 		break;
1138*9e39c5baSBill Taylor 
1139*9e39c5baSBill Taylor 	case TAVOR_CQE_WR_FLUSHED_ERR:
1140*9e39c5baSBill Taylor 		status = IBT_WC_WR_FLUSHED_ERR;
1141*9e39c5baSBill Taylor 		break;
1142*9e39c5baSBill Taylor 
1143*9e39c5baSBill Taylor 	case TAVOR_CQE_MW_BIND_ERR:
1144*9e39c5baSBill Taylor 		status = IBT_WC_MEM_WIN_BIND_ERR;
1145*9e39c5baSBill Taylor 		break;
1146*9e39c5baSBill Taylor 
1147*9e39c5baSBill Taylor 	case TAVOR_CQE_BAD_RESPONSE_ERR:
1148*9e39c5baSBill Taylor 		status = IBT_WC_BAD_RESPONSE_ERR;
1149*9e39c5baSBill Taylor 		break;
1150*9e39c5baSBill Taylor 
1151*9e39c5baSBill Taylor 	case TAVOR_CQE_LOCAL_ACCESS_ERR:
1152*9e39c5baSBill Taylor 		status = IBT_WC_LOCAL_ACCESS_ERR;
1153*9e39c5baSBill Taylor 		break;
1154*9e39c5baSBill Taylor 
1155*9e39c5baSBill Taylor 	case TAVOR_CQE_REM_INV_REQ_ERR:
1156*9e39c5baSBill Taylor 		status = IBT_WC_REMOTE_INVALID_REQ_ERR;
1157*9e39c5baSBill Taylor 		break;
1158*9e39c5baSBill Taylor 
1159*9e39c5baSBill Taylor 	case TAVOR_CQE_REM_ACC_ERR:
1160*9e39c5baSBill Taylor 		status = IBT_WC_REMOTE_ACCESS_ERR;
1161*9e39c5baSBill Taylor 		break;
1162*9e39c5baSBill Taylor 
1163*9e39c5baSBill Taylor 	case TAVOR_CQE_REM_OP_ERR:
1164*9e39c5baSBill Taylor 		status = IBT_WC_REMOTE_OP_ERR;
1165*9e39c5baSBill Taylor 		break;
1166*9e39c5baSBill Taylor 
1167*9e39c5baSBill Taylor 	case TAVOR_CQE_TRANS_TO_ERR:
1168*9e39c5baSBill Taylor 		status = IBT_WC_TRANS_TIMEOUT_ERR;
1169*9e39c5baSBill Taylor 		break;
1170*9e39c5baSBill Taylor 
1171*9e39c5baSBill Taylor 	case TAVOR_CQE_RNRNAK_TO_ERR:
1172*9e39c5baSBill Taylor 		status = IBT_WC_RNR_NAK_TIMEOUT_ERR;
1173*9e39c5baSBill Taylor 		break;
1174*9e39c5baSBill Taylor 
1175*9e39c5baSBill Taylor 	/*
1176*9e39c5baSBill Taylor 	 * The following error codes are not supported in the Tavor driver
1177*9e39c5baSBill Taylor 	 * as they relate only to Reliable Datagram completion statuses:
1178*9e39c5baSBill Taylor 	 *    case TAVOR_CQE_LOCAL_RDD_VIO_ERR:
1179*9e39c5baSBill Taylor 	 *    case TAVOR_CQE_REM_INV_RD_REQ_ERR:
1180*9e39c5baSBill Taylor 	 *    case TAVOR_CQE_EEC_REM_ABORTED_ERR:
1181*9e39c5baSBill Taylor 	 *    case TAVOR_CQE_INV_EEC_NUM_ERR:
1182*9e39c5baSBill Taylor 	 *    case TAVOR_CQE_INV_EEC_STATE_ERR:
1183*9e39c5baSBill Taylor 	 *    case TAVOR_CQE_LOC_EEC_ERR:
1184*9e39c5baSBill Taylor 	 */
1185*9e39c5baSBill Taylor 
1186*9e39c5baSBill Taylor 	default:
1187*9e39c5baSBill Taylor 		status = IBT_WC_LOCAL_CHAN_OP_ERR;
1188*9e39c5baSBill Taylor 		break;
1189*9e39c5baSBill Taylor 	}
1190*9e39c5baSBill Taylor 	wc->wc_status = status;
1191*9e39c5baSBill Taylor 	wc->wc_type = 0;
1192*9e39c5baSBill Taylor 	/*
1193*9e39c5baSBill Taylor 	 * Now we do all the checking that's necessary to handle completion
1194*9e39c5baSBill Taylor 	 * queue entry "recycling"
1195*9e39c5baSBill Taylor 	 *
1196*9e39c5baSBill Taylor 	 * It is not necessary here to try to sync the WQE as we are only
1197*9e39c5baSBill Taylor 	 * attempting to read from the Work Queue (and hardware does not
1198*9e39c5baSBill Taylor 	 * write to it).
1199*9e39c5baSBill Taylor 	 */
1200*9e39c5baSBill Taylor 
1201*9e39c5baSBill Taylor 	/*
1202*9e39c5baSBill Taylor 	 * We can get doorbell info, WQE address, size for the next WQE
1203*9e39c5baSBill Taylor 	 * from the "wre" (which was filled in above in the call to the
1204*9e39c5baSBill Taylor 	 * tavor_wrid_get_entry() routine)
1205*9e39c5baSBill Taylor 	 */
1206*9e39c5baSBill Taylor 	dbd = (wre.wr_signaled_dbd & TAVOR_WRID_ENTRY_DOORBELLED) ? 1 : 0;
1207*9e39c5baSBill Taylor 	next_wqeaddr = wre.wr_wqeaddrsz;
1208*9e39c5baSBill Taylor 	nextwqesize  = wre.wr_wqeaddrsz & TAVOR_WQE_NDS_MASK;
1209*9e39c5baSBill Taylor 
1210*9e39c5baSBill Taylor 	/*
1211*9e39c5baSBill Taylor 	 * Get the doorbell count from the CQE.  This indicates how many
1212*9e39c5baSBill Taylor 	 * completions this one CQE represents.
1213*9e39c5baSBill Taylor 	 */
1214*9e39c5baSBill Taylor 	doorbell_cnt = imm_eth_pkey_cred & TAVOR_CQE_ERR_DBDCNT_MASK;
1215*9e39c5baSBill Taylor 
1216*9e39c5baSBill Taylor 	/*
1217*9e39c5baSBill Taylor 	 * Determine if we're ready to consume this CQE yet or not.  If the
1218*9e39c5baSBill Taylor 	 * next WQE has size zero (i.e. no next WQE) or if the doorbell count
1219*9e39c5baSBill Taylor 	 * is down to zero, then this is the last/only completion represented
1220*9e39c5baSBill Taylor 	 * by the current CQE (return TAVOR_CQ_SYNC_AND_DB).  Otherwise, the
1221*9e39c5baSBill Taylor 	 * current CQE needs to be recycled (see below).
1222*9e39c5baSBill Taylor 	 */
1223*9e39c5baSBill Taylor 	if ((nextwqesize == 0) || ((doorbell_cnt == 0) && (dbd == 1))) {
1224*9e39c5baSBill Taylor 		/*
1225*9e39c5baSBill Taylor 		 * Consume the CQE
1226*9e39c5baSBill Taylor 		 *    Return status to indicate that doorbell and sync may be
1227*9e39c5baSBill Taylor 		 *    necessary.
1228*9e39c5baSBill Taylor 		 */
1229*9e39c5baSBill Taylor 		return (TAVOR_CQ_SYNC_AND_DB);
1230*9e39c5baSBill Taylor 
1231*9e39c5baSBill Taylor 	} else {
1232*9e39c5baSBill Taylor 		/*
1233*9e39c5baSBill Taylor 		 * Recycle the CQE for use in the next PollCQ() call
1234*9e39c5baSBill Taylor 		 *    Decrement the doorbell count, modify the error status,
1235*9e39c5baSBill Taylor 		 *    and update the WQE address and size (to point to the
1236*9e39c5baSBill Taylor 		 *    next WQE on the chain.  Put these update entries back
1237*9e39c5baSBill Taylor 		 *    into the CQE.
1238*9e39c5baSBill Taylor 		 *    Despite the fact that we have updated the CQE, it is not
1239*9e39c5baSBill Taylor 		 *    necessary for us to attempt to sync this entry just yet
1240*9e39c5baSBill Taylor 		 *    as we have not changed the "hardware's view" of the
1241*9e39c5baSBill Taylor 		 *    entry (i.e. we have not modified the "owner" bit - which
1242*9e39c5baSBill Taylor 		 *    is all that the Tavor hardware really cares about.
1243*9e39c5baSBill Taylor 		 */
1244*9e39c5baSBill Taylor 		doorbell_cnt = doorbell_cnt - dbd;
1245*9e39c5baSBill Taylor 		TAVOR_CQE_IMM_ETH_PKEY_CRED_SET(cqe,
1246*9e39c5baSBill Taylor 		    ((TAVOR_CQE_WR_FLUSHED_ERR << TAVOR_CQE_ERR_STATUS_SHIFT) |
1247*9e39c5baSBill Taylor 		    (doorbell_cnt & TAVOR_CQE_ERR_DBDCNT_MASK)));
1248*9e39c5baSBill Taylor 		TAVOR_CQE_WQEADDRSZ_SET(cqe,
1249*9e39c5baSBill Taylor 		    TAVOR_QP_WQEADDRSZ(next_wqeaddr, nextwqesize));
1250*9e39c5baSBill Taylor 		dapl_dbg_log(DAPL_DBG_TYPE_EVD,
1251*9e39c5baSBill Taylor 		    "errcqe_consume: recycling cqe.eth=%x, wqe=%x\n",
1252*9e39c5baSBill Taylor 		    TAVOR_CQE_IMM_ETH_PKEY_CRED_GET(cqe),
1253*9e39c5baSBill Taylor 		    TAVOR_CQE_WQEADDRSZ_GET(cqe));
1254*9e39c5baSBill Taylor 		return (TAVOR_CQ_RECYCLE_ENTRY);
1255*9e39c5baSBill Taylor 	}
1256*9e39c5baSBill Taylor }
1257*9e39c5baSBill Taylor 
1258*9e39c5baSBill Taylor /*
1259*9e39c5baSBill Taylor  * dapli_tavor_cq_notify()
1260*9e39c5baSBill Taylor  * This function is used for arming the CQ by ringing the CQ doorbell.
1261*9e39c5baSBill Taylor  */
1262*9e39c5baSBill Taylor static DAT_RETURN
dapli_tavor_cq_notify(ib_cq_handle_t cq,int flags,uint32_t param)1263*9e39c5baSBill Taylor dapli_tavor_cq_notify(ib_cq_handle_t cq, int flags, uint32_t param)
1264*9e39c5baSBill Taylor {
1265*9e39c5baSBill Taylor 	uint32_t	cqnum;
1266*9e39c5baSBill Taylor 
1267*9e39c5baSBill Taylor 	/*
1268*9e39c5baSBill Taylor 	 * Determine if we are trying to get the next completion or the next
1269*9e39c5baSBill Taylor 	 * "solicited" completion.  Then hit the appropriate doorbell.
1270*9e39c5baSBill Taylor 	 */
1271*9e39c5baSBill Taylor 	cqnum = cq->cq_num;
1272*9e39c5baSBill Taylor 	if (flags == IB_NOTIFY_ON_NEXT_COMP) {
1273*9e39c5baSBill Taylor 		dapli_tavor_cq_doorbell(cq->cq_iauar, TAVOR_CQDB_NOTIFY_CQ,
1274*9e39c5baSBill Taylor 		    cqnum, TAVOR_CQDB_DEFAULT_PARAM);
1275*9e39c5baSBill Taylor 
1276*9e39c5baSBill Taylor 	} else if (flags == IB_NOTIFY_ON_NEXT_SOLICITED) {
1277*9e39c5baSBill Taylor 		dapli_tavor_cq_doorbell(cq->cq_iauar,
1278*9e39c5baSBill Taylor 		    TAVOR_CQDB_NOTIFY_CQ_SOLICIT, cqnum,
1279*9e39c5baSBill Taylor 		    TAVOR_CQDB_DEFAULT_PARAM);
1280*9e39c5baSBill Taylor 
1281*9e39c5baSBill Taylor 	} else if (flags == IB_NOTIFY_ON_NEXT_NCOMP) {
1282*9e39c5baSBill Taylor 		dapli_tavor_cq_doorbell(cq->cq_iauar, TAVOR_CQDB_NOTIFY_NCQ,
1283*9e39c5baSBill Taylor 		    cqnum, param);
1284*9e39c5baSBill Taylor 	} else {
1285*9e39c5baSBill Taylor 		return (DAT_INVALID_PARAMETER);
1286*9e39c5baSBill Taylor 	}
1287*9e39c5baSBill Taylor 
1288*9e39c5baSBill Taylor 	return (DAT_SUCCESS);
1289*9e39c5baSBill Taylor }
1290*9e39c5baSBill Taylor 
1291*9e39c5baSBill Taylor /*
1292*9e39c5baSBill Taylor  * dapli_tavor_post_send()
1293*9e39c5baSBill Taylor  */
1294*9e39c5baSBill Taylor static DAT_RETURN
dapli_tavor_post_send(DAPL_EP * ep,ibt_send_wr_t * wr,boolean_t ns)1295*9e39c5baSBill Taylor dapli_tavor_post_send(DAPL_EP *ep, ibt_send_wr_t *wr, boolean_t ns)
1296*9e39c5baSBill Taylor {
1297*9e39c5baSBill Taylor 	tavor_sw_wqe_dbinfo_t		dbinfo;
1298*9e39c5baSBill Taylor 	dapls_tavor_wrid_list_hdr_t	*wridlist;
1299*9e39c5baSBill Taylor 	dapls_tavor_wrid_entry_t	*wre_last;
1300*9e39c5baSBill Taylor 	uint32_t			desc;
1301*9e39c5baSBill Taylor 	uint64_t			*wqe_addr;
1302*9e39c5baSBill Taylor 	uint32_t			desc_sz;
1303*9e39c5baSBill Taylor 	uint32_t			wqeaddrsz, signaled_dbd;
1304*9e39c5baSBill Taylor 	uint32_t			head, tail, next_tail, qsize_msk;
1305*9e39c5baSBill Taylor 	int				status;
1306*9e39c5baSBill Taylor 	ib_qp_handle_t			qp;
1307*9e39c5baSBill Taylor 
1308*9e39c5baSBill Taylor 	if ((ep->qp_state == IBT_STATE_RESET) ||
1309*9e39c5baSBill Taylor 	    (ep->qp_state == IBT_STATE_INIT) ||
1310*9e39c5baSBill Taylor 	    (ep->qp_state == IBT_STATE_RTR)) {
1311*9e39c5baSBill Taylor 		dapl_dbg_log(DAPL_DBG_TYPE_ERR,
1312*9e39c5baSBill Taylor 		    "post_send: invalid qp_state %d\n", ep->qp_state);
1313*9e39c5baSBill Taylor 		return (DAT_INVALID_STATE);
1314*9e39c5baSBill Taylor 	}
1315*9e39c5baSBill Taylor 
1316*9e39c5baSBill Taylor 	qp = ep->qp_handle;
1317*9e39c5baSBill Taylor 
1318*9e39c5baSBill Taylor 	/* Grab the lock for the WRID list */
1319*9e39c5baSBill Taylor 	dapl_os_lock(&qp->qp_sq_wqhdr->wq_wrid_lock->wrl_lock);
1320*9e39c5baSBill Taylor 	wridlist  = qp->qp_sq_wqhdr->wq_wrid_post;
1321*9e39c5baSBill Taylor 
1322*9e39c5baSBill Taylor 	/* Save away some initial QP state */
1323*9e39c5baSBill Taylor 	qsize_msk = qp->qp_sq_wqhdr->wq_size - 1;
1324*9e39c5baSBill Taylor 	tail	  = qp->qp_sq_wqhdr->wq_tail;
1325*9e39c5baSBill Taylor 	head	  = qp->qp_sq_wqhdr->wq_head;
1326*9e39c5baSBill Taylor 
1327*9e39c5baSBill Taylor 	/*
1328*9e39c5baSBill Taylor 	 * Check for "queue full" condition.  If the queue is already full,
1329*9e39c5baSBill Taylor 	 * then no more WQEs can be posted, return an error
1330*9e39c5baSBill Taylor 	 */
1331*9e39c5baSBill Taylor 	if (qp->qp_sq_wqhdr->wq_full != 0) {
1332*9e39c5baSBill Taylor 		dapl_os_unlock(&qp->qp_sq_wqhdr->wq_wrid_lock->wrl_lock);
1333*9e39c5baSBill Taylor 		return (DAT_INSUFFICIENT_RESOURCES);
1334*9e39c5baSBill Taylor 	}
1335*9e39c5baSBill Taylor 
1336*9e39c5baSBill Taylor 	/*
1337*9e39c5baSBill Taylor 	 * Increment the "tail index" and check for "queue full" condition.
1338*9e39c5baSBill Taylor 	 * If we detect that the current work request is going to fill the
1339*9e39c5baSBill Taylor 	 * work queue, then we mark this condition and continue.
1340*9e39c5baSBill Taylor 	 */
1341*9e39c5baSBill Taylor 	next_tail = (tail + 1) & qsize_msk;
1342*9e39c5baSBill Taylor 	if (next_tail == head) {
1343*9e39c5baSBill Taylor 		qp->qp_sq_wqhdr->wq_full = 1;
1344*9e39c5baSBill Taylor 	}
1345*9e39c5baSBill Taylor 
1346*9e39c5baSBill Taylor 	/*
1347*9e39c5baSBill Taylor 	 * Get the user virtual address of the location where the next
1348*9e39c5baSBill Taylor 	 * Send WQE should be built
1349*9e39c5baSBill Taylor 	 */
1350*9e39c5baSBill Taylor 	wqe_addr = TAVOR_QP_SQ_ENTRY(qp, tail);
1351*9e39c5baSBill Taylor 
1352*9e39c5baSBill Taylor 	/*
1353*9e39c5baSBill Taylor 	 * Call tavor_wqe_send_build() to build the WQE at the given address.
1354*9e39c5baSBill Taylor 	 * This routine uses the information in the ibt_send_wr_t and
1355*9e39c5baSBill Taylor 	 * returns the size of the WQE when it returns.
1356*9e39c5baSBill Taylor 	 */
1357*9e39c5baSBill Taylor 	status = dapli_tavor_wqe_send_build(qp, wr, wqe_addr, &desc_sz);
1358*9e39c5baSBill Taylor 	if (status != DAT_SUCCESS) {
1359*9e39c5baSBill Taylor 		dapl_os_unlock(&qp->qp_sq_wqhdr->wq_wrid_lock->wrl_lock);
1360*9e39c5baSBill Taylor 		return (status);
1361*9e39c5baSBill Taylor 	}
1362*9e39c5baSBill Taylor 
1363*9e39c5baSBill Taylor 	/*
1364*9e39c5baSBill Taylor 	 * Get the descriptor (io address) corresponding to the location
1365*9e39c5baSBill Taylor 	 * Send WQE was built.
1366*9e39c5baSBill Taylor 	 */
1367*9e39c5baSBill Taylor 	desc = TAVOR_QP_SQ_DESC(qp, tail);
1368*9e39c5baSBill Taylor 
1369*9e39c5baSBill Taylor 	dapl_os_assert(desc >= qp->qp_sq_desc_addr &&
1370*9e39c5baSBill Taylor 	    desc <= (qp->qp_sq_desc_addr +
1371*9e39c5baSBill Taylor 	    qp->qp_sq_numwqe*qp->qp_sq_wqesz));
1372*9e39c5baSBill Taylor 
1373*9e39c5baSBill Taylor 	/*
1374*9e39c5baSBill Taylor 	 * Add a WRID entry to the WRID list.  Need to calculate the
1375*9e39c5baSBill Taylor 	 * "wqeaddrsz" and "signaled_dbd" values to pass to
1376*9e39c5baSBill Taylor 	 * dapli_tavor_wrid_add_entry()
1377*9e39c5baSBill Taylor 	 */
1378*9e39c5baSBill Taylor 	wqeaddrsz = TAVOR_QP_WQEADDRSZ(desc, desc_sz);
1379*9e39c5baSBill Taylor 
1380*9e39c5baSBill Taylor 	if (wr->wr_flags & IBT_WR_SEND_SIGNAL) {
1381*9e39c5baSBill Taylor 		signaled_dbd = TAVOR_WRID_ENTRY_SIGNALED;
1382*9e39c5baSBill Taylor 	}
1383*9e39c5baSBill Taylor 
1384*9e39c5baSBill Taylor 	dapli_tavor_wrid_add_entry(qp->qp_sq_wqhdr, wr->wr_id, wqeaddrsz,
1385*9e39c5baSBill Taylor 	    signaled_dbd);
1386*9e39c5baSBill Taylor 
1387*9e39c5baSBill Taylor 	/*
1388*9e39c5baSBill Taylor 	 * Now link the wqe to the old chain (if there was one)
1389*9e39c5baSBill Taylor 	 */
1390*9e39c5baSBill Taylor 	dapli_tavor_wqe_send_linknext(wr, wqe_addr, ns, desc, desc_sz,
1391*9e39c5baSBill Taylor 	    qp->qp_sq_lastwqeaddr, &dbinfo);
1392*9e39c5baSBill Taylor 
1393*9e39c5baSBill Taylor 	/*
1394*9e39c5baSBill Taylor 	 * Now if the WRID tail entry is non-NULL, then this
1395*9e39c5baSBill Taylor 	 * represents the entry to which we are chaining the
1396*9e39c5baSBill Taylor 	 * new entries.  Since we are going to ring the
1397*9e39c5baSBill Taylor 	 * doorbell for this WQE, we want set its "dbd" bit.
1398*9e39c5baSBill Taylor 	 *
1399*9e39c5baSBill Taylor 	 * On the other hand, if the tail is NULL, even though
1400*9e39c5baSBill Taylor 	 * we will have rung the doorbell for the previous WQE
1401*9e39c5baSBill Taylor 	 * (for the hardware's sake) it is irrelevant to our
1402*9e39c5baSBill Taylor 	 * purposes (for tracking WRIDs) because we know the
1403*9e39c5baSBill Taylor 	 * request must have already completed.
1404*9e39c5baSBill Taylor 	 */
1405*9e39c5baSBill Taylor 	wre_last = wridlist->wl_wre_old_tail;
1406*9e39c5baSBill Taylor 	if (wre_last != NULL) {
1407*9e39c5baSBill Taylor 		wre_last->wr_signaled_dbd |= TAVOR_WRID_ENTRY_DOORBELLED;
1408*9e39c5baSBill Taylor 	}
1409*9e39c5baSBill Taylor 
1410*9e39c5baSBill Taylor 	/* Update some of the state in the QP */
1411*9e39c5baSBill Taylor 	qp->qp_sq_lastwqeaddr	 = wqe_addr;
1412*9e39c5baSBill Taylor 	qp->qp_sq_wqhdr->wq_tail = next_tail;
1413*9e39c5baSBill Taylor 
1414*9e39c5baSBill Taylor 	/* Ring the doorbell */
1415*9e39c5baSBill Taylor 	dapli_tavor_qp_send_doorbell(qp->qp_iauar, desc, desc_sz,
1416*9e39c5baSBill Taylor 	    qp->qp_num, dbinfo.db_fence, dbinfo.db_nopcode);
1417*9e39c5baSBill Taylor 
1418*9e39c5baSBill Taylor 	dapl_os_unlock(&qp->qp_sq_wqhdr->wq_wrid_lock->wrl_lock);
1419*9e39c5baSBill Taylor 
1420*9e39c5baSBill Taylor 	return (DAT_SUCCESS);
1421*9e39c5baSBill Taylor }
1422*9e39c5baSBill Taylor 
1423*9e39c5baSBill Taylor /*
1424*9e39c5baSBill Taylor  * dapli_tavor_post_recv()
1425*9e39c5baSBill Taylor  */
1426*9e39c5baSBill Taylor static DAT_RETURN
dapli_tavor_post_recv(DAPL_EP * ep,ibt_recv_wr_t * wr,boolean_t ns)1427*9e39c5baSBill Taylor dapli_tavor_post_recv(DAPL_EP	*ep, ibt_recv_wr_t *wr, boolean_t ns)
1428*9e39c5baSBill Taylor {
1429*9e39c5baSBill Taylor 	dapls_tavor_wrid_list_hdr_t	*wridlist;
1430*9e39c5baSBill Taylor 	dapls_tavor_wrid_entry_t	*wre_last;
1431*9e39c5baSBill Taylor 	ib_qp_handle_t			qp;
1432*9e39c5baSBill Taylor 	DAT_RETURN			status;
1433*9e39c5baSBill Taylor 	uint32_t			desc;
1434*9e39c5baSBill Taylor 	uint64_t			*wqe_addr;
1435*9e39c5baSBill Taylor 	uint32_t			desc_sz;
1436*9e39c5baSBill Taylor 	uint32_t			wqeaddrsz;
1437*9e39c5baSBill Taylor 	uint32_t			head, tail, next_tail, qsize_msk;
1438*9e39c5baSBill Taylor 
1439*9e39c5baSBill Taylor 	if (ep->qp_state == IBT_STATE_RESET) {
1440*9e39c5baSBill Taylor 		dapl_dbg_log(DAPL_DBG_TYPE_ERR,
1441*9e39c5baSBill Taylor 		    "post_recv: invalid qp_state %d\n", ep->qp_state);
1442*9e39c5baSBill Taylor 		return (DAT_INVALID_STATE);
1443*9e39c5baSBill Taylor 	}
1444*9e39c5baSBill Taylor 	qp = ep->qp_handle;
1445*9e39c5baSBill Taylor 
1446*9e39c5baSBill Taylor 	/* Grab the lock for the WRID list */
1447*9e39c5baSBill Taylor 	dapl_os_lock(&qp->qp_rq_wqhdr->wq_wrid_lock->wrl_lock);
1448*9e39c5baSBill Taylor 	wridlist  = qp->qp_rq_wqhdr->wq_wrid_post;
1449*9e39c5baSBill Taylor 
1450*9e39c5baSBill Taylor 	/* Save away some initial QP state */
1451*9e39c5baSBill Taylor 	qsize_msk = qp->qp_rq_wqhdr->wq_size - 1;
1452*9e39c5baSBill Taylor 	tail	  = qp->qp_rq_wqhdr->wq_tail;
1453*9e39c5baSBill Taylor 	head	  = qp->qp_rq_wqhdr->wq_head;
1454*9e39c5baSBill Taylor 
1455*9e39c5baSBill Taylor 	/*
1456*9e39c5baSBill Taylor 	 * For the ibt_recv_wr_t passed in, parse the request and build a
1457*9e39c5baSBill Taylor 	 * Recv WQE. Link the WQE with the previous WQE and ring the
1458*9e39c5baSBill Taylor 	 * door bell.
1459*9e39c5baSBill Taylor 	 */
1460*9e39c5baSBill Taylor 
1461*9e39c5baSBill Taylor 	/*
1462*9e39c5baSBill Taylor 	 * Check for "queue full" condition.  If the queue is already full,
1463*9e39c5baSBill Taylor 	 * then no more WQEs can be posted. So return an error.
1464*9e39c5baSBill Taylor 	 */
1465*9e39c5baSBill Taylor 	if (qp->qp_rq_wqhdr->wq_full != 0) {
1466*9e39c5baSBill Taylor 		dapl_os_unlock(&qp->qp_rq_wqhdr->wq_wrid_lock->wrl_lock);
1467*9e39c5baSBill Taylor 		return (DAT_INSUFFICIENT_RESOURCES);
1468*9e39c5baSBill Taylor 	}
1469*9e39c5baSBill Taylor 
1470*9e39c5baSBill Taylor 	/*
1471*9e39c5baSBill Taylor 	 * Increment the "tail index" and check for "queue
1472*9e39c5baSBill Taylor 	 * full" condition.  If we detect that the current
1473*9e39c5baSBill Taylor 	 * work request is going to fill the work queue, then
1474*9e39c5baSBill Taylor 	 * we mark this condition and continue.
1475*9e39c5baSBill Taylor 	 */
1476*9e39c5baSBill Taylor 	next_tail = (tail + 1) & qsize_msk;
1477*9e39c5baSBill Taylor 	if (next_tail == head) {
1478*9e39c5baSBill Taylor 		qp->qp_rq_wqhdr->wq_full = 1;
1479*9e39c5baSBill Taylor 	}
1480*9e39c5baSBill Taylor 
1481*9e39c5baSBill Taylor 	/* Get the descriptor (IO Address) of the WQE to be built */
1482*9e39c5baSBill Taylor 	desc = TAVOR_QP_RQ_DESC(qp, tail);
1483*9e39c5baSBill Taylor 	/* The user virtual address of the WQE to be built */
1484*9e39c5baSBill Taylor 	wqe_addr = TAVOR_QP_RQ_ENTRY(qp, tail);
1485*9e39c5baSBill Taylor 
1486*9e39c5baSBill Taylor 	/*
1487*9e39c5baSBill Taylor 	 * Call tavor_wqe_recv_build() to build the WQE at the given
1488*9e39c5baSBill Taylor 	 * address. This routine uses the information in the
1489*9e39c5baSBill Taylor 	 * ibt_recv_wr_t and returns the size of the WQE.
1490*9e39c5baSBill Taylor 	 */
1491*9e39c5baSBill Taylor 	status = dapli_tavor_wqe_recv_build(qp, wr, wqe_addr, &desc_sz);
1492*9e39c5baSBill Taylor 	if (status != DAT_SUCCESS) {
1493*9e39c5baSBill Taylor 		dapl_os_unlock(&qp->qp_rq_wqhdr->wq_wrid_lock->wrl_lock);
1494*9e39c5baSBill Taylor 		return (DAT_INTERNAL_ERROR);
1495*9e39c5baSBill Taylor 	}
1496*9e39c5baSBill Taylor 
1497*9e39c5baSBill Taylor 	/*
1498*9e39c5baSBill Taylor 	 * Add a WRID entry to the WRID list.  Need to calculate the
1499*9e39c5baSBill Taylor 	 * "wqeaddrsz" and "signaled_dbd" values to pass to
1500*9e39c5baSBill Taylor 	 * dapli_tavor_wrid_add_entry().
1501*9e39c5baSBill Taylor 	 * Note: all Recv WQEs are essentially "signaled"
1502*9e39c5baSBill Taylor 	 */
1503*9e39c5baSBill Taylor 	wqeaddrsz = TAVOR_QP_WQEADDRSZ(desc, desc_sz);
1504*9e39c5baSBill Taylor 	dapli_tavor_wrid_add_entry(qp->qp_rq_wqhdr, wr->wr_id, wqeaddrsz,
1505*9e39c5baSBill Taylor 	    (uint32_t)TAVOR_WRID_ENTRY_SIGNALED);
1506*9e39c5baSBill Taylor 
1507*9e39c5baSBill Taylor 	/*
1508*9e39c5baSBill Taylor 	 * Now link the chain to the old chain (if there was one)
1509*9e39c5baSBill Taylor 	 * and ring the doorbel for the recv work queue.
1510*9e39c5baSBill Taylor 	 */
1511*9e39c5baSBill Taylor 	dapli_tavor_wqe_recv_linknext(wqe_addr, ns, desc, desc_sz,
1512*9e39c5baSBill Taylor 	    qp->qp_rq_lastwqeaddr);
1513*9e39c5baSBill Taylor 
1514*9e39c5baSBill Taylor 	/*
1515*9e39c5baSBill Taylor 	 * Now if the WRID tail entry is non-NULL, then this
1516*9e39c5baSBill Taylor 	 * represents the entry to which we are chaining the
1517*9e39c5baSBill Taylor 	 * new entries.  Since we are going to ring the
1518*9e39c5baSBill Taylor 	 * doorbell for this WQE, we want set its "dbd" bit.
1519*9e39c5baSBill Taylor 	 *
1520*9e39c5baSBill Taylor 	 * On the other hand, if the tail is NULL, even though
1521*9e39c5baSBill Taylor 	 * we will have rung the doorbell for the previous WQE
1522*9e39c5baSBill Taylor 	 * (for the hardware's sake) it is irrelevant to our
1523*9e39c5baSBill Taylor 	 * purposes (for tracking WRIDs) because we know the
1524*9e39c5baSBill Taylor 	 * request must have already completed.
1525*9e39c5baSBill Taylor 	 */
1526*9e39c5baSBill Taylor 	wre_last = wridlist->wl_wre_old_tail;
1527*9e39c5baSBill Taylor 	if (wre_last != NULL) {
1528*9e39c5baSBill Taylor 		wre_last->wr_signaled_dbd |= TAVOR_WRID_ENTRY_DOORBELLED;
1529*9e39c5baSBill Taylor 	}
1530*9e39c5baSBill Taylor 
1531*9e39c5baSBill Taylor 	/* Update some of the state in the QP */
1532*9e39c5baSBill Taylor 	qp->qp_rq_lastwqeaddr	 = wqe_addr;
1533*9e39c5baSBill Taylor 	qp->qp_rq_wqhdr->wq_tail = next_tail;
1534*9e39c5baSBill Taylor 
1535*9e39c5baSBill Taylor 	/* Ring the doorbell */
1536*9e39c5baSBill Taylor 	dapli_tavor_qp_recv_doorbell(qp->qp_iauar, desc, desc_sz,
1537*9e39c5baSBill Taylor 	    qp->qp_num, 1);
1538*9e39c5baSBill Taylor 
1539*9e39c5baSBill Taylor 	dapl_os_unlock(&qp->qp_rq_wqhdr->wq_wrid_lock->wrl_lock);
1540*9e39c5baSBill Taylor 
1541*9e39c5baSBill Taylor 	return (DAT_SUCCESS);
1542*9e39c5baSBill Taylor }
1543*9e39c5baSBill Taylor 
1544*9e39c5baSBill Taylor /*
1545*9e39c5baSBill Taylor  * dapli_tavor_post_srq()
1546*9e39c5baSBill Taylor  */
1547*9e39c5baSBill Taylor static DAT_RETURN
dapli_tavor_post_srq(DAPL_SRQ * srqp,ibt_recv_wr_t * wr,boolean_t ns)1548*9e39c5baSBill Taylor dapli_tavor_post_srq(DAPL_SRQ *srqp, ibt_recv_wr_t *wr, boolean_t ns)
1549*9e39c5baSBill Taylor {
1550*9e39c5baSBill Taylor 	ib_srq_handle_t			srq;
1551*9e39c5baSBill Taylor 	DAT_RETURN			status;
1552*9e39c5baSBill Taylor 	uint32_t			desc;
1553*9e39c5baSBill Taylor 	uint64_t			*wqe_addr;
1554*9e39c5baSBill Taylor 	uint64_t			*last_wqe_addr;
1555*9e39c5baSBill Taylor 	uint32_t			head, next_head, qsize_msk;
1556*9e39c5baSBill Taylor 	uint32_t			wqe_index;
1557*9e39c5baSBill Taylor 
1558*9e39c5baSBill Taylor 
1559*9e39c5baSBill Taylor 	srq = srqp->srq_handle;
1560*9e39c5baSBill Taylor 
1561*9e39c5baSBill Taylor 	/* Grab the lock for the WRID list */
1562*9e39c5baSBill Taylor 	dapl_os_lock(&srq->srq_wridlist->wl_lock->wrl_lock);
1563*9e39c5baSBill Taylor 
1564*9e39c5baSBill Taylor 	/*
1565*9e39c5baSBill Taylor 	 * For the ibt_recv_wr_t passed in, parse the request and build a
1566*9e39c5baSBill Taylor 	 * Recv WQE. Link the WQE with the previous WQE and ring the
1567*9e39c5baSBill Taylor 	 * door bell.
1568*9e39c5baSBill Taylor 	 */
1569*9e39c5baSBill Taylor 
1570*9e39c5baSBill Taylor 	/*
1571*9e39c5baSBill Taylor 	 * Check for "queue full" condition.  If the queue is already full,
1572*9e39c5baSBill Taylor 	 * ie. there are no free entries, then no more WQEs can be posted.
1573*9e39c5baSBill Taylor 	 * So return an error.
1574*9e39c5baSBill Taylor 	 */
1575*9e39c5baSBill Taylor 	if (srq->srq_wridlist->wl_freel_entries == 0) {
1576*9e39c5baSBill Taylor 		dapl_os_unlock(&srq->srq_wridlist->wl_lock->wrl_lock);
1577*9e39c5baSBill Taylor 		return (DAT_INSUFFICIENT_RESOURCES);
1578*9e39c5baSBill Taylor 	}
1579*9e39c5baSBill Taylor 
1580*9e39c5baSBill Taylor 	/* Save away some initial SRQ state */
1581*9e39c5baSBill Taylor 	qsize_msk = srq->srq_wridlist->wl_size - 1;
1582*9e39c5baSBill Taylor 	head	  = srq->srq_wridlist->wl_freel_head;
1583*9e39c5baSBill Taylor 
1584*9e39c5baSBill Taylor 	next_head = (head + 1) & qsize_msk;
1585*9e39c5baSBill Taylor 
1586*9e39c5baSBill Taylor 	/* Get the descriptor (IO Address) of the WQE to be built */
1587*9e39c5baSBill Taylor 	desc = srq->srq_wridlist->wl_free_list[head];
1588*9e39c5baSBill Taylor 
1589*9e39c5baSBill Taylor 	wqe_index = TAVOR_SRQ_WQ_INDEX(srq->srq_wq_desc_addr, desc,
1590*9e39c5baSBill Taylor 	    srq->srq_wq_wqesz);
1591*9e39c5baSBill Taylor 
1592*9e39c5baSBill Taylor 	/* The user virtual address of the WQE to be built */
1593*9e39c5baSBill Taylor 	wqe_addr = TAVOR_SRQ_WQ_ENTRY(srq, wqe_index);
1594*9e39c5baSBill Taylor 
1595*9e39c5baSBill Taylor 	/*
1596*9e39c5baSBill Taylor 	 * Call dapli_tavor_wqe_srq_build() to build the WQE at the given
1597*9e39c5baSBill Taylor 	 * address. This routine uses the information in the
1598*9e39c5baSBill Taylor 	 * ibt_recv_wr_t and returns the size of the WQE.
1599*9e39c5baSBill Taylor 	 */
1600*9e39c5baSBill Taylor 	status = dapli_tavor_wqe_srq_build(srq, wr, wqe_addr);
1601*9e39c5baSBill Taylor 	if (status != DAT_SUCCESS) {
1602*9e39c5baSBill Taylor 		dapl_os_unlock(&srq->srq_wridlist->wl_lock->wrl_lock);
1603*9e39c5baSBill Taylor 		return (status);
1604*9e39c5baSBill Taylor 	}
1605*9e39c5baSBill Taylor 
1606*9e39c5baSBill Taylor 	/*
1607*9e39c5baSBill Taylor 	 * Add a WRID entry to the WRID list.
1608*9e39c5baSBill Taylor 	 */
1609*9e39c5baSBill Taylor 	dapli_tavor_wrid_add_entry_srq(srq, wr->wr_id, wqe_index);
1610*9e39c5baSBill Taylor 
1611*9e39c5baSBill Taylor 	if (srq->srq_wq_lastwqeindex == -1) {
1612*9e39c5baSBill Taylor 		last_wqe_addr = NULL;
1613*9e39c5baSBill Taylor 	} else {
1614*9e39c5baSBill Taylor 		last_wqe_addr = TAVOR_SRQ_WQ_ENTRY(srq,
1615*9e39c5baSBill Taylor 		    srq->srq_wq_lastwqeindex);
1616*9e39c5baSBill Taylor 	}
1617*9e39c5baSBill Taylor 	/*
1618*9e39c5baSBill Taylor 	 * Now link the chain to the old chain (if there was one)
1619*9e39c5baSBill Taylor 	 * and ring the doorbell for the SRQ.
1620*9e39c5baSBill Taylor 	 */
1621*9e39c5baSBill Taylor 	dapli_tavor_wqe_srq_linknext(wqe_addr, ns, desc, last_wqe_addr);
1622*9e39c5baSBill Taylor 
1623*9e39c5baSBill Taylor 	/* Update some of the state in the SRQ */
1624*9e39c5baSBill Taylor 	srq->srq_wq_lastwqeindex	 = wqe_index;
1625*9e39c5baSBill Taylor 	srq->srq_wridlist->wl_freel_head = next_head;
1626*9e39c5baSBill Taylor 	srq->srq_wridlist->wl_freel_entries--;
1627*9e39c5baSBill Taylor 	dapl_os_assert(srq->srq_wridlist->wl_freel_entries <=
1628*9e39c5baSBill Taylor 	    srq->srq_wridlist->wl_size);
1629*9e39c5baSBill Taylor 
1630*9e39c5baSBill Taylor 	/* Ring the doorbell - for SRQ nds = 0 */
1631*9e39c5baSBill Taylor 	dapli_tavor_qp_recv_doorbell(srq->srq_iauar, desc, 0,
1632*9e39c5baSBill Taylor 	    srq->srq_num, 1);
1633*9e39c5baSBill Taylor 
1634*9e39c5baSBill Taylor 	dapl_os_unlock(&srq->srq_wridlist->wl_lock->wrl_lock);
1635*9e39c5baSBill Taylor 
1636*9e39c5baSBill Taylor 	return (DAT_SUCCESS);
1637*9e39c5baSBill Taylor }
1638*9e39c5baSBill Taylor 
1639*9e39c5baSBill Taylor /*
1640*9e39c5baSBill Taylor  * dapli_tavor_wrid_add_entry()
1641*9e39c5baSBill Taylor  */
1642*9e39c5baSBill Taylor extern void
dapli_tavor_wrid_add_entry(dapls_tavor_workq_hdr_t * wq,uint64_t wrid,uint32_t wqeaddrsz,uint_t signaled_dbd)1643*9e39c5baSBill Taylor dapli_tavor_wrid_add_entry(dapls_tavor_workq_hdr_t *wq, uint64_t wrid,
1644*9e39c5baSBill Taylor     uint32_t wqeaddrsz, uint_t signaled_dbd)
1645*9e39c5baSBill Taylor {
1646*9e39c5baSBill Taylor 	dapls_tavor_wrid_entry_t	*wre_tmp;
1647*9e39c5baSBill Taylor 	uint32_t			head, tail, size;
1648*9e39c5baSBill Taylor 
1649*9e39c5baSBill Taylor 	/*
1650*9e39c5baSBill Taylor 	 * Find the entry in the container pointed to by the "tail" index.
1651*9e39c5baSBill Taylor 	 * Add all of the relevant information to that entry, including WRID,
1652*9e39c5baSBill Taylor 	 * "wqeaddrsz" parameter, and whether it was signaled/unsignaled
1653*9e39c5baSBill Taylor 	 * and/or doorbelled.
1654*9e39c5baSBill Taylor 	 */
1655*9e39c5baSBill Taylor 	head = wq->wq_wrid_post->wl_head;
1656*9e39c5baSBill Taylor 	tail = wq->wq_wrid_post->wl_tail;
1657*9e39c5baSBill Taylor 	size = wq->wq_wrid_post->wl_size;
1658*9e39c5baSBill Taylor 	wre_tmp = &wq->wq_wrid_post->wl_wre[tail];
1659*9e39c5baSBill Taylor 	wre_tmp->wr_wrid	  = wrid;
1660*9e39c5baSBill Taylor 	wre_tmp->wr_wqeaddrsz	  = wqeaddrsz;
1661*9e39c5baSBill Taylor 	wre_tmp->wr_signaled_dbd  = signaled_dbd;
1662*9e39c5baSBill Taylor 
1663*9e39c5baSBill Taylor 	/*
1664*9e39c5baSBill Taylor 	 * Update the "wrid_old_tail" pointer to point to the entry we just
1665*9e39c5baSBill Taylor 	 * inserted into the queue.  By tracking this pointer (the pointer to
1666*9e39c5baSBill Taylor 	 * the most recently inserted entry) it will possible later in the
1667*9e39c5baSBill Taylor 	 * PostSend() and PostRecv() code paths to find the entry that needs
1668*9e39c5baSBill Taylor 	 * its "doorbelled" flag set (see comment in tavor_post_recv() and/or
1669*9e39c5baSBill Taylor 	 * tavor_post_send()).
1670*9e39c5baSBill Taylor 	 */
1671*9e39c5baSBill Taylor 	wq->wq_wrid_post->wl_wre_old_tail = wre_tmp;
1672*9e39c5baSBill Taylor 
1673*9e39c5baSBill Taylor 	/* Update the tail index */
1674*9e39c5baSBill Taylor 	tail = ((tail + 1) & (size - 1));
1675*9e39c5baSBill Taylor 	wq->wq_wrid_post->wl_tail = tail;
1676*9e39c5baSBill Taylor 
1677*9e39c5baSBill Taylor 	/*
1678*9e39c5baSBill Taylor 	 * If the "tail" index has just wrapped over into the "head" index,
1679*9e39c5baSBill Taylor 	 * then we have filled the container.  We use the "full" flag to
1680*9e39c5baSBill Taylor 	 * indicate this condition and to distinguish it from the "empty"
1681*9e39c5baSBill Taylor 	 * condition (where head and tail are also equal).
1682*9e39c5baSBill Taylor 	 */
1683*9e39c5baSBill Taylor 	if (head == tail) {
1684*9e39c5baSBill Taylor 		wq->wq_wrid_post->wl_full = 1;
1685*9e39c5baSBill Taylor 	}
1686*9e39c5baSBill Taylor }
1687*9e39c5baSBill Taylor 
1688*9e39c5baSBill Taylor /*
1689*9e39c5baSBill Taylor  * dapli_tavor_wrid_add_entry_srq()
1690*9e39c5baSBill Taylor  */
1691*9e39c5baSBill Taylor extern void
dapli_tavor_wrid_add_entry_srq(ib_srq_handle_t srq,uint64_t wrid,uint32_t wqe_index)1692*9e39c5baSBill Taylor dapli_tavor_wrid_add_entry_srq(ib_srq_handle_t srq, uint64_t wrid,
1693*9e39c5baSBill Taylor     uint32_t wqe_index)
1694*9e39c5baSBill Taylor {
1695*9e39c5baSBill Taylor 	dapls_tavor_wrid_entry_t	*wre;
1696*9e39c5baSBill Taylor 
1697*9e39c5baSBill Taylor 	/* ASSERT on impossible wqe_index values */
1698*9e39c5baSBill Taylor 	dapl_os_assert(wqe_index < srq->srq_wq_numwqe);
1699*9e39c5baSBill Taylor 
1700*9e39c5baSBill Taylor 	/*
1701*9e39c5baSBill Taylor 	 * Setup the WRE.
1702*9e39c5baSBill Taylor 	 *
1703*9e39c5baSBill Taylor 	 * Given the 'wqe_index' value, we store the WRID at this WRE offset.
1704*9e39c5baSBill Taylor 	 * And we set the WRE to be signaled_dbd so that on poll CQ we can find
1705*9e39c5baSBill Taylor 	 * this information and associate the WRID to the WQE found on the CQE.
1706*9e39c5baSBill Taylor 	 * Note: all Recv WQEs are essentially "signaled"
1707*9e39c5baSBill Taylor 	 */
1708*9e39c5baSBill Taylor 	wre = &srq->srq_wridlist->wl_wre[wqe_index];
1709*9e39c5baSBill Taylor 	wre->wr_wrid = wrid;
1710*9e39c5baSBill Taylor 	wre->wr_signaled_dbd = (uint32_t)TAVOR_WRID_ENTRY_SIGNALED;
1711*9e39c5baSBill Taylor }
1712*9e39c5baSBill Taylor 
1713*9e39c5baSBill Taylor /*
1714*9e39c5baSBill Taylor  * dapli_tavor_cq_srq_entries_flush()
1715*9e39c5baSBill Taylor  */
1716*9e39c5baSBill Taylor static void
dapli_tavor_cq_srq_entries_flush(ib_qp_handle_t qp)1717*9e39c5baSBill Taylor dapli_tavor_cq_srq_entries_flush(ib_qp_handle_t qp)
1718*9e39c5baSBill Taylor {
1719*9e39c5baSBill Taylor 	ib_cq_handle_t		cq;
1720*9e39c5baSBill Taylor 	dapls_tavor_workq_hdr_t	*wqhdr;
1721*9e39c5baSBill Taylor 	tavor_hw_cqe_t		*cqe;
1722*9e39c5baSBill Taylor 	tavor_hw_cqe_t		*next_cqe;
1723*9e39c5baSBill Taylor 	uint32_t		cons_indx, tail_cons_indx, wrap_around_mask;
1724*9e39c5baSBill Taylor 	uint32_t		new_indx, check_indx, indx;
1725*9e39c5baSBill Taylor 	uint32_t		num_to_increment;
1726*9e39c5baSBill Taylor 	int			cqe_qpnum, cqe_type;
1727*9e39c5baSBill Taylor 	int			outstanding_cqes, removed_cqes;
1728*9e39c5baSBill Taylor 	int			i;
1729*9e39c5baSBill Taylor 
1730*9e39c5baSBill Taylor 	/* ASSERT(MUTEX_HELD(&qp->qp_rq_cqhdl->cq_lock)); */
1731*9e39c5baSBill Taylor 
1732*9e39c5baSBill Taylor 	cq = qp->qp_rq_cqhdl;
1733*9e39c5baSBill Taylor 	wqhdr = qp->qp_rq_wqhdr;
1734*9e39c5baSBill Taylor 
1735*9e39c5baSBill Taylor 	dapl_os_assert(wqhdr->wq_wrid_post != NULL);
1736*9e39c5baSBill Taylor 	dapl_os_assert(wqhdr->wq_wrid_post->wl_srq_en != 0);
1737*9e39c5baSBill Taylor 
1738*9e39c5baSBill Taylor 	/* Get the consumer index */
1739*9e39c5baSBill Taylor 	cons_indx = cq->cq_consindx;
1740*9e39c5baSBill Taylor 
1741*9e39c5baSBill Taylor 	/*
1742*9e39c5baSBill Taylor 	 * Calculate the wrap around mask.  Note: This operation only works
1743*9e39c5baSBill Taylor 	 * because all Tavor completion queues have power-of-2 sizes
1744*9e39c5baSBill Taylor 	 */
1745*9e39c5baSBill Taylor 	wrap_around_mask = (cq->cq_size - 1);
1746*9e39c5baSBill Taylor 
1747*9e39c5baSBill Taylor 	/* Calculate the pointer to the first CQ entry */
1748*9e39c5baSBill Taylor 	cqe = &cq->cq_addr[cons_indx];
1749*9e39c5baSBill Taylor 
1750*9e39c5baSBill Taylor 	/*
1751*9e39c5baSBill Taylor 	 * Loop through the CQ looking for entries owned by software.  If an
1752*9e39c5baSBill Taylor 	 * entry is owned by software then we increment an 'outstanding_cqes'
1753*9e39c5baSBill Taylor 	 * count to know how many entries total we have on our CQ.  We use this
1754*9e39c5baSBill Taylor 	 * value further down to know how many entries to loop through looking
1755*9e39c5baSBill Taylor 	 * for our same QP number.
1756*9e39c5baSBill Taylor 	 */
1757*9e39c5baSBill Taylor 	outstanding_cqes = 0;
1758*9e39c5baSBill Taylor 	tail_cons_indx = cons_indx;
1759*9e39c5baSBill Taylor 	while (TAVOR_CQE_OWNER_IS_SW(cqe)) {
1760*9e39c5baSBill Taylor 		/* increment total cqes count */
1761*9e39c5baSBill Taylor 		outstanding_cqes++;
1762*9e39c5baSBill Taylor 
1763*9e39c5baSBill Taylor 		/* increment the consumer index */
1764*9e39c5baSBill Taylor 		tail_cons_indx = (tail_cons_indx + 1) & wrap_around_mask;
1765*9e39c5baSBill Taylor 
1766*9e39c5baSBill Taylor 		/* update the pointer to the next cq entry */
1767*9e39c5baSBill Taylor 		cqe = &cq->cq_addr[tail_cons_indx];
1768*9e39c5baSBill Taylor 	}
1769*9e39c5baSBill Taylor 
1770*9e39c5baSBill Taylor 	/*
1771*9e39c5baSBill Taylor 	 * Using the 'tail_cons_indx' that was just set, we now know how many
1772*9e39c5baSBill Taylor 	 * total CQEs possible there are.  Set the 'check_indx' and the
1773*9e39c5baSBill Taylor 	 * 'new_indx' to the last entry identified by 'tail_cons_indx'
1774*9e39c5baSBill Taylor 	 */
1775*9e39c5baSBill Taylor 	check_indx = new_indx = (tail_cons_indx - 1) & wrap_around_mask;
1776*9e39c5baSBill Taylor 
1777*9e39c5baSBill Taylor 	for (i = 0; i < outstanding_cqes; i++) {
1778*9e39c5baSBill Taylor 		cqe = &cq->cq_addr[check_indx];
1779*9e39c5baSBill Taylor 
1780*9e39c5baSBill Taylor 		/* Grab QP number from CQE */
1781*9e39c5baSBill Taylor 		cqe_qpnum = TAVOR_CQE_QPNUM_GET(cqe);
1782*9e39c5baSBill Taylor 		cqe_type = TAVOR_CQE_SENDRECV_GET(cqe);
1783*9e39c5baSBill Taylor 
1784*9e39c5baSBill Taylor 		/*
1785*9e39c5baSBill Taylor 		 * If the QP number is the same in the CQE as the QP that we
1786*9e39c5baSBill Taylor 		 * have on this SRQ, then we must free up the entry off the
1787*9e39c5baSBill Taylor 		 * SRQ.  We also make sure that the completion type is of the
1788*9e39c5baSBill Taylor 		 * 'TAVOR_COMPLETION_RECV' type.  So any send completions on
1789*9e39c5baSBill Taylor 		 * this CQ will be left as-is.  The handling of returning
1790*9e39c5baSBill Taylor 		 * entries back to HW ownership happens further down.
1791*9e39c5baSBill Taylor 		 */
1792*9e39c5baSBill Taylor 		if (cqe_qpnum == qp->qp_num &&
1793*9e39c5baSBill Taylor 		    cqe_type == TAVOR_COMPLETION_RECV) {
1794*9e39c5baSBill Taylor 			/* Add back to SRQ free list */
1795*9e39c5baSBill Taylor 			(void) dapli_tavor_wrid_find_match_srq(
1796*9e39c5baSBill Taylor 			    wqhdr->wq_wrid_post, cqe);
1797*9e39c5baSBill Taylor 		} else {
1798*9e39c5baSBill Taylor 			/* Do Copy */
1799*9e39c5baSBill Taylor 			if (check_indx != new_indx) {
1800*9e39c5baSBill Taylor 				next_cqe = &cq->cq_addr[new_indx];
1801*9e39c5baSBill Taylor 				/*
1802*9e39c5baSBill Taylor 				 * Copy the CQE into the "next_cqe"
1803*9e39c5baSBill Taylor 				 * pointer.
1804*9e39c5baSBill Taylor 				 */
1805*9e39c5baSBill Taylor 				(void) dapl_os_memcpy(next_cqe, cqe,
1806*9e39c5baSBill Taylor 				    sizeof (tavor_hw_cqe_t));
1807*9e39c5baSBill Taylor 			}
1808*9e39c5baSBill Taylor 			new_indx = (new_indx - 1) & wrap_around_mask;
1809*9e39c5baSBill Taylor 		}
1810*9e39c5baSBill Taylor 		/* Move index to next CQE to check */
1811*9e39c5baSBill Taylor 		check_indx = (check_indx - 1) & wrap_around_mask;
1812*9e39c5baSBill Taylor 	}
1813*9e39c5baSBill Taylor 
1814*9e39c5baSBill Taylor 	/* Initialize removed cqes count */
1815*9e39c5baSBill Taylor 	removed_cqes = 0;
1816*9e39c5baSBill Taylor 
1817*9e39c5baSBill Taylor 	/* If an entry was removed */
1818*9e39c5baSBill Taylor 	if (check_indx != new_indx) {
1819*9e39c5baSBill Taylor 
1820*9e39c5baSBill Taylor 		/*
1821*9e39c5baSBill Taylor 		 * Set current pointer back to the beginning consumer index.
1822*9e39c5baSBill Taylor 		 * At this point, all unclaimed entries have been copied to the
1823*9e39c5baSBill Taylor 		 * index specified by 'new_indx'.  This 'new_indx' will be used
1824*9e39c5baSBill Taylor 		 * as the new consumer index after we mark all freed entries as
1825*9e39c5baSBill Taylor 		 * having HW ownership.  We do that here.
1826*9e39c5baSBill Taylor 		 */
1827*9e39c5baSBill Taylor 
1828*9e39c5baSBill Taylor 		/* Loop through all entries until we reach our new pointer */
1829*9e39c5baSBill Taylor 		for (indx = cons_indx; indx <= new_indx;
1830*9e39c5baSBill Taylor 		    indx = (indx + 1) & wrap_around_mask) {
1831*9e39c5baSBill Taylor 			removed_cqes++;
1832*9e39c5baSBill Taylor 			cqe = &cq->cq_addr[indx];
1833*9e39c5baSBill Taylor 
1834*9e39c5baSBill Taylor 			/* Reset entry to hardware ownership */
1835*9e39c5baSBill Taylor 			TAVOR_CQE_OWNER_SET_HW(cqe);
1836*9e39c5baSBill Taylor 		}
1837*9e39c5baSBill Taylor 	}
1838*9e39c5baSBill Taylor 
1839*9e39c5baSBill Taylor 	/*
1840*9e39c5baSBill Taylor 	 * Update consumer index to be the 'new_indx'.  This moves it past all
1841*9e39c5baSBill Taylor 	 * removed entries.  Because 'new_indx' is pointing to the last
1842*9e39c5baSBill Taylor 	 * previously valid SW owned entry, we add 1 to point the cons_indx to
1843*9e39c5baSBill Taylor 	 * the first HW owned entry.
1844*9e39c5baSBill Taylor 	 */
1845*9e39c5baSBill Taylor 	cons_indx = (new_indx + 1) & wrap_around_mask;
1846*9e39c5baSBill Taylor 
1847*9e39c5baSBill Taylor 	/*
1848*9e39c5baSBill Taylor 	 * Now we only ring the doorbell (to update the consumer index) if
1849*9e39c5baSBill Taylor 	 * we've actually consumed a CQ entry.  If we found no QP number
1850*9e39c5baSBill Taylor 	 * matches above, then we would not have removed anything.  So only if
1851*9e39c5baSBill Taylor 	 * something was removed do we ring the doorbell.
1852*9e39c5baSBill Taylor 	 */
1853*9e39c5baSBill Taylor 	if ((removed_cqes != 0) && (cq->cq_consindx != cons_indx)) {
1854*9e39c5baSBill Taylor 		/*
1855*9e39c5baSBill Taylor 		 * Post doorbell to update the consumer index.  Doorbell
1856*9e39c5baSBill Taylor 		 * value indicates number of entries consumed (minus 1)
1857*9e39c5baSBill Taylor 		 */
1858*9e39c5baSBill Taylor 		if (cons_indx > cq->cq_consindx) {
1859*9e39c5baSBill Taylor 			num_to_increment = (cons_indx - cq->cq_consindx) - 1;
1860*9e39c5baSBill Taylor 		} else {
1861*9e39c5baSBill Taylor 			num_to_increment = ((cons_indx + cq->cq_size) -
1862*9e39c5baSBill Taylor 			    cq->cq_consindx) - 1;
1863*9e39c5baSBill Taylor 		}
1864*9e39c5baSBill Taylor 		cq->cq_consindx = cons_indx;
1865*9e39c5baSBill Taylor 
1866*9e39c5baSBill Taylor 		dapli_tavor_cq_doorbell(cq->cq_iauar, TAVOR_CQDB_INCR_CONSINDX,
1867*9e39c5baSBill Taylor 		    cq->cq_num, num_to_increment);
1868*9e39c5baSBill Taylor 	}
1869*9e39c5baSBill Taylor }
1870*9e39c5baSBill Taylor 
1871*9e39c5baSBill Taylor /* ARGSUSED */
1872*9e39c5baSBill Taylor static void
dapli_tavor_qp_init(ib_qp_handle_t qp)1873*9e39c5baSBill Taylor dapli_tavor_qp_init(ib_qp_handle_t qp)
1874*9e39c5baSBill Taylor {
1875*9e39c5baSBill Taylor }
1876*9e39c5baSBill Taylor 
1877*9e39c5baSBill Taylor /* ARGSUSED */
1878*9e39c5baSBill Taylor static void
dapli_tavor_cq_init(ib_cq_handle_t cq)1879*9e39c5baSBill Taylor dapli_tavor_cq_init(ib_cq_handle_t cq)
1880*9e39c5baSBill Taylor {
1881*9e39c5baSBill Taylor }
1882*9e39c5baSBill Taylor 
1883*9e39c5baSBill Taylor /* ARGSUSED */
1884*9e39c5baSBill Taylor static void
dapli_tavor_srq_init(ib_srq_handle_t srq)1885*9e39c5baSBill Taylor dapli_tavor_srq_init(ib_srq_handle_t srq)
1886*9e39c5baSBill Taylor {
1887*9e39c5baSBill Taylor }
1888*9e39c5baSBill Taylor 
1889*9e39c5baSBill Taylor void
dapls_init_funcs_tavor(DAPL_HCA * hca_ptr)1890*9e39c5baSBill Taylor dapls_init_funcs_tavor(DAPL_HCA *hca_ptr)
1891*9e39c5baSBill Taylor {
1892*9e39c5baSBill Taylor 	hca_ptr->post_send = dapli_tavor_post_send;
1893*9e39c5baSBill Taylor 	hca_ptr->post_recv = dapli_tavor_post_recv;
1894*9e39c5baSBill Taylor 	hca_ptr->post_srq = dapli_tavor_post_srq;
1895*9e39c5baSBill Taylor 	hca_ptr->cq_peek = dapli_tavor_cq_peek;
1896*9e39c5baSBill Taylor 	hca_ptr->cq_poll = dapli_tavor_cq_poll;
1897*9e39c5baSBill Taylor 	hca_ptr->cq_poll_one = dapli_tavor_cq_poll_one;
1898*9e39c5baSBill Taylor 	hca_ptr->cq_notify = dapli_tavor_cq_notify;
1899*9e39c5baSBill Taylor 	hca_ptr->srq_flush = dapli_tavor_cq_srq_entries_flush;
1900*9e39c5baSBill Taylor 	hca_ptr->qp_init = dapli_tavor_qp_init;
1901*9e39c5baSBill Taylor 	hca_ptr->cq_init = dapli_tavor_cq_init;
1902*9e39c5baSBill Taylor 	hca_ptr->srq_init = dapli_tavor_srq_init;
1903*9e39c5baSBill Taylor 	hca_ptr->hermon_resize_cq = 0;
1904*9e39c5baSBill Taylor }
1905