19e39c5baSBill Taylor /*
29e39c5baSBill Taylor  * CDDL HEADER START
39e39c5baSBill Taylor  *
49e39c5baSBill Taylor  * The contents of this file are subject to the terms of the
59e39c5baSBill Taylor  * Common Development and Distribution License (the "License").
69e39c5baSBill Taylor  * You may not use this file except in compliance with the License.
79e39c5baSBill Taylor  *
89e39c5baSBill Taylor  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
99e39c5baSBill Taylor  * or http://www.opensolaris.org/os/licensing.
109e39c5baSBill Taylor  * See the License for the specific language governing permissions
119e39c5baSBill Taylor  * and limitations under the License.
129e39c5baSBill Taylor  *
139e39c5baSBill Taylor  * When distributing Covered Code, include this CDDL HEADER in each
149e39c5baSBill Taylor  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
159e39c5baSBill Taylor  * If applicable, add the following below this CDDL HEADER, with the
169e39c5baSBill Taylor  * fields enclosed by brackets "[]" replaced with your own identifying
179e39c5baSBill Taylor  * information: Portions Copyright [yyyy] [name of copyright owner]
189e39c5baSBill Taylor  *
199e39c5baSBill Taylor  * CDDL HEADER END
209e39c5baSBill Taylor  */
219e39c5baSBill Taylor 
229e39c5baSBill Taylor /*
23*86644ba2SBill Taylor  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
249e39c5baSBill Taylor  * Use is subject to license terms.
259e39c5baSBill Taylor  */
269e39c5baSBill Taylor 
279e39c5baSBill Taylor /*
289e39c5baSBill Taylor  * tavor_wr.c
299e39c5baSBill Taylor  *    Tavor Work Request Processing Routines
309e39c5baSBill Taylor  *
319e39c5baSBill Taylor  *    Implements all the routines necessary to provide the PostSend(),
329e39c5baSBill Taylor  *    PostRecv() and PostSRQ() verbs.  Also contains all the code
339e39c5baSBill Taylor  *    necessary to implement the Tavor WRID tracking mechanism.
349e39c5baSBill Taylor  */
359e39c5baSBill Taylor 
369e39c5baSBill Taylor #include <sys/types.h>
379e39c5baSBill Taylor #include <sys/conf.h>
389e39c5baSBill Taylor #include <sys/ddi.h>
399e39c5baSBill Taylor #include <sys/sunddi.h>
409e39c5baSBill Taylor #include <sys/modctl.h>
419e39c5baSBill Taylor #include <sys/avl.h>
429e39c5baSBill Taylor 
439e39c5baSBill Taylor #include <sys/ib/adapters/tavor/tavor.h>
449e39c5baSBill Taylor 
459e39c5baSBill Taylor static void tavor_qp_send_doorbell(tavor_state_t *state, uint32_t nda,
469e39c5baSBill Taylor     uint32_t nds, uint32_t qpn, uint32_t fence, uint32_t nopcode);
479e39c5baSBill Taylor #pragma inline(tavor_qp_send_doorbell)
489e39c5baSBill Taylor static void tavor_qp_recv_doorbell(tavor_state_t *state, uint32_t nda,
499e39c5baSBill Taylor     uint32_t nds, uint32_t qpn, uint32_t credits);
509e39c5baSBill Taylor #pragma inline(tavor_qp_recv_doorbell)
519e39c5baSBill Taylor static uint32_t tavor_wr_get_immediate(ibt_send_wr_t *wr);
529e39c5baSBill Taylor static int tavor_wr_bind_check(tavor_state_t *state, ibt_send_wr_t *wr);
539e39c5baSBill Taylor static int tavor_wqe_send_build(tavor_state_t *state, tavor_qphdl_t qp,
549e39c5baSBill Taylor     ibt_send_wr_t *wr, uint64_t *desc, uint_t *size);
559e39c5baSBill Taylor static void tavor_wqe_send_linknext(ibt_send_wr_t *curr_wr,
569e39c5baSBill Taylor     ibt_send_wr_t *prev_wr, uint64_t *curr_desc, uint_t curr_descsz,
579e39c5baSBill Taylor     uint64_t *prev_desc, tavor_sw_wqe_dbinfo_t *dbinfo, tavor_qphdl_t qp);
589e39c5baSBill Taylor static int tavor_wqe_mlx_build(tavor_state_t *state, tavor_qphdl_t qp,
599e39c5baSBill Taylor     ibt_send_wr_t *wr, uint64_t *desc, uint_t *size);
609e39c5baSBill Taylor static void tavor_wqe_mlx_linknext(ibt_send_wr_t *prev_wr, uint64_t *curr_desc,
619e39c5baSBill Taylor     uint_t curr_descsz, uint64_t *prev_desc, tavor_sw_wqe_dbinfo_t *dbinfo,
629e39c5baSBill Taylor     tavor_qphdl_t qp);
639e39c5baSBill Taylor static int tavor_wqe_recv_build(tavor_state_t *state, tavor_qphdl_t qp,
649e39c5baSBill Taylor     ibt_recv_wr_t *wr, uint64_t *desc, uint_t *size);
659e39c5baSBill Taylor static void tavor_wqe_recv_linknext(uint64_t *desc, uint_t desc_sz,
669e39c5baSBill Taylor     uint64_t *prev, tavor_qphdl_t qp);
679e39c5baSBill Taylor static int tavor_wqe_srq_build(tavor_state_t *state, tavor_srqhdl_t srq,
689e39c5baSBill Taylor     ibt_recv_wr_t *wr, uint64_t *desc);
699e39c5baSBill Taylor static void tavor_wqe_srq_linknext(uint64_t *desc, uint64_t *prev,
709e39c5baSBill Taylor     tavor_srqhdl_t srq);
719e39c5baSBill Taylor static void tavor_wqe_sync(void *hdl, uint_t sync_from,
729e39c5baSBill Taylor     uint_t sync_to, uint_t sync_type, uint_t flag);
739e39c5baSBill Taylor static tavor_wrid_entry_t *tavor_wrid_find_match(tavor_workq_hdr_t *wq,
749e39c5baSBill Taylor     tavor_cqhdl_t cq, tavor_hw_cqe_t *cqe);
759e39c5baSBill Taylor static void tavor_wrid_reaplist_add(tavor_cqhdl_t cq, tavor_workq_hdr_t *wq);
769e39c5baSBill Taylor static tavor_workq_hdr_t *tavor_wrid_wqhdr_find(tavor_cqhdl_t cq, uint_t qpn,
779e39c5baSBill Taylor     uint_t send_or_recv);
789e39c5baSBill Taylor static tavor_workq_hdr_t *tavor_wrid_wqhdr_create(tavor_state_t *state,
799e39c5baSBill Taylor     tavor_cqhdl_t cq, uint_t qpn, uint_t wq_type, uint_t create_wql);
809e39c5baSBill Taylor static uint32_t tavor_wrid_get_wqeaddrsz(tavor_workq_hdr_t *wq);
819e39c5baSBill Taylor static void tavor_wrid_wqhdr_add(tavor_workq_hdr_t *wqhdr,
829e39c5baSBill Taylor     tavor_wrid_list_hdr_t *wrid_list);
839e39c5baSBill Taylor static void tavor_wrid_wqhdr_remove(tavor_workq_hdr_t *wqhdr,
849e39c5baSBill Taylor     tavor_wrid_list_hdr_t *wrid_list);
859e39c5baSBill Taylor static tavor_workq_hdr_t *tavor_wrid_list_reap(tavor_wrid_list_hdr_t *wq);
869e39c5baSBill Taylor static void tavor_wrid_wqhdr_lock_both(tavor_qphdl_t qp);
879e39c5baSBill Taylor static void tavor_wrid_wqhdr_unlock_both(tavor_qphdl_t qp);
889e39c5baSBill Taylor static void tavor_cq_wqhdr_add(tavor_cqhdl_t cq, tavor_workq_hdr_t *wqhdr);
899e39c5baSBill Taylor static void tavor_cq_wqhdr_remove(tavor_cqhdl_t cq, tavor_workq_hdr_t *wqhdr);
909e39c5baSBill Taylor 
919e39c5baSBill Taylor /*
929e39c5baSBill Taylor  * tavor_post_send()
939e39c5baSBill Taylor  *    Context: Can be called from interrupt or base context.
949e39c5baSBill Taylor  */
959e39c5baSBill Taylor int
tavor_post_send(tavor_state_t * state,tavor_qphdl_t qp,ibt_send_wr_t * wr,uint_t num_wr,uint_t * num_posted)969e39c5baSBill Taylor tavor_post_send(tavor_state_t *state, tavor_qphdl_t qp,
979e39c5baSBill Taylor     ibt_send_wr_t *wr, uint_t num_wr, uint_t *num_posted)
989e39c5baSBill Taylor {
999e39c5baSBill Taylor 	tavor_sw_wqe_dbinfo_t		dbinfo;
1009e39c5baSBill Taylor 	tavor_wrid_list_hdr_t		*wridlist;
1019e39c5baSBill Taylor 	tavor_wrid_entry_t		*wre_last;
1029e39c5baSBill Taylor 	uint64_t			*desc, *prev, *first;
1039e39c5baSBill Taylor 	uint32_t			desc_sz, first_sz;
1049e39c5baSBill Taylor 	uint32_t			wqeaddrsz, signaled_dbd;
1059e39c5baSBill Taylor 	uint32_t			head, tail, next_tail, qsize_msk;
1069e39c5baSBill Taylor 	uint32_t			sync_from, sync_to;
1079e39c5baSBill Taylor 	uint_t				currindx, wrindx, numremain;
1089e39c5baSBill Taylor 	uint_t				chainlen, chainbegin, posted_cnt;
1099e39c5baSBill Taylor 	uint_t				maxdb = TAVOR_QP_MAXDESC_PER_DB;
1109e39c5baSBill Taylor 	int				status;
1119e39c5baSBill Taylor 
1129e39c5baSBill Taylor 	/*
1139e39c5baSBill Taylor 	 * Check for user-mappable QP memory.  Note:  We do not allow kernel
1149e39c5baSBill Taylor 	 * clients to post to QP memory that is accessible directly by the
1159e39c5baSBill Taylor 	 * user.  If the QP memory is user accessible, then return an error.
1169e39c5baSBill Taylor 	 */
1179e39c5baSBill Taylor 	if (qp->qp_is_umap) {
1189e39c5baSBill Taylor 		return (IBT_QP_HDL_INVALID);
1199e39c5baSBill Taylor 	}
1209e39c5baSBill Taylor 
1219e39c5baSBill Taylor 	/* Initialize posted_cnt */
1229e39c5baSBill Taylor 	posted_cnt = 0;
1239e39c5baSBill Taylor 
1249e39c5baSBill Taylor 	mutex_enter(&qp->qp_lock);
1259e39c5baSBill Taylor 
1269e39c5baSBill Taylor 	/*
1279e39c5baSBill Taylor 	 * Check QP state.  Can not post Send requests from the "Reset",
1289e39c5baSBill Taylor 	 * "Init", or "RTR" states
1299e39c5baSBill Taylor 	 */
1309e39c5baSBill Taylor 	if ((qp->qp_state == TAVOR_QP_RESET) ||
1319e39c5baSBill Taylor 	    (qp->qp_state == TAVOR_QP_INIT) ||
1329e39c5baSBill Taylor 	    (qp->qp_state == TAVOR_QP_RTR)) {
1339e39c5baSBill Taylor 		mutex_exit(&qp->qp_lock);
1349e39c5baSBill Taylor 		return (IBT_QP_STATE_INVALID);
1359e39c5baSBill Taylor 	}
1369e39c5baSBill Taylor 
1379e39c5baSBill Taylor 	/* Grab the lock for the WRID list */
1389e39c5baSBill Taylor 	mutex_enter(&qp->qp_sq_wqhdr->wq_wrid_wql->wql_lock);
1399e39c5baSBill Taylor 	wridlist  = qp->qp_sq_wqhdr->wq_wrid_post;
1409e39c5baSBill Taylor 
1419e39c5baSBill Taylor 	/* Save away some initial QP state */
1429e39c5baSBill Taylor 	qsize_msk = qp->qp_sq_wqhdr->wq_size - 1;
1439e39c5baSBill Taylor 	tail	  = qp->qp_sq_wqhdr->wq_tail;
1449e39c5baSBill Taylor 	head	  = qp->qp_sq_wqhdr->wq_head;
1459e39c5baSBill Taylor 
1469e39c5baSBill Taylor 	/*
1479e39c5baSBill Taylor 	 * For each ibt_send_wr_t in the wr[] list passed in, parse the
1489e39c5baSBill Taylor 	 * request and build a Send WQE.  Note:  Because we are potentially
1499e39c5baSBill Taylor 	 * building a chain of WQEs, we want to link them all together.
1509e39c5baSBill Taylor 	 * However, we do not want to link the first one to the previous
1519e39c5baSBill Taylor 	 * WQE until the entire chain has been linked.  Then in the last
1529e39c5baSBill Taylor 	 * step we ring the appropriate doorbell.  Note:  It is possible for
1539e39c5baSBill Taylor 	 * more Work Requests to be posted than the HW will support at one
1549e39c5baSBill Taylor 	 * shot.  If this happens, we need to be able to post and ring
1559e39c5baSBill Taylor 	 * several chains here until the the entire request is complete.
1569e39c5baSBill Taylor 	 */
1579e39c5baSBill Taylor 	wrindx = 0;
1589e39c5baSBill Taylor 	numremain = num_wr;
1599e39c5baSBill Taylor 	status	  = DDI_SUCCESS;
1609e39c5baSBill Taylor 	while ((wrindx < num_wr) && (status == DDI_SUCCESS)) {
1619e39c5baSBill Taylor 		/*
1629e39c5baSBill Taylor 		 * For the first WQE on a new chain we need "prev" to point
1639e39c5baSBill Taylor 		 * to the current descriptor.  As we begin to process
1649e39c5baSBill Taylor 		 * further, "prev" will be updated to point to the previous
1659e39c5baSBill Taylor 		 * WQE on the current chain (see below).
1669e39c5baSBill Taylor 		 */
1679e39c5baSBill Taylor 		prev = TAVOR_QP_SQ_ENTRY(qp, tail);
1689e39c5baSBill Taylor 
1699e39c5baSBill Taylor 		/*
1709e39c5baSBill Taylor 		 * Before we begin, save the current "tail index" for later
1719e39c5baSBill Taylor 		 * DMA sync
1729e39c5baSBill Taylor 		 */
1739e39c5baSBill Taylor 		sync_from = tail;
1749e39c5baSBill Taylor 
1759e39c5baSBill Taylor 		/*
1769e39c5baSBill Taylor 		 * Break the request up into chains that are less than or
1779e39c5baSBill Taylor 		 * equal to the maximum number of WQEs that can be posted
1789e39c5baSBill Taylor 		 * per doorbell ring
1799e39c5baSBill Taylor 		 */
1809e39c5baSBill Taylor 		chainlen   = (numremain > maxdb) ? maxdb : numremain;
1819e39c5baSBill Taylor 		numremain -= chainlen;
1829e39c5baSBill Taylor 		chainbegin = wrindx;
1839e39c5baSBill Taylor 		for (currindx = 0; currindx < chainlen; currindx++, wrindx++) {
1849e39c5baSBill Taylor 			/*
1859e39c5baSBill Taylor 			 * Check for "queue full" condition.  If the queue
1869e39c5baSBill Taylor 			 * is already full, then no more WQEs can be posted.
1879e39c5baSBill Taylor 			 * So break out, ring a doorbell (if necessary) and
1889e39c5baSBill Taylor 			 * return an error
1899e39c5baSBill Taylor 			 */
1909e39c5baSBill Taylor 			if (qp->qp_sq_wqhdr->wq_full != 0) {
1919e39c5baSBill Taylor 				status = IBT_QP_FULL;
1929e39c5baSBill Taylor 				break;
1939e39c5baSBill Taylor 			}
1949e39c5baSBill Taylor 
1959e39c5baSBill Taylor 			/*
1969e39c5baSBill Taylor 			 * Increment the "tail index" and check for "queue
1979e39c5baSBill Taylor 			 * full" condition.  If we detect that the current
1989e39c5baSBill Taylor 			 * work request is going to fill the work queue, then
1999e39c5baSBill Taylor 			 * we mark this condition and continue.
2009e39c5baSBill Taylor 			 */
2019e39c5baSBill Taylor 			next_tail = (tail + 1) & qsize_msk;
2029e39c5baSBill Taylor 			if (next_tail == head) {
2039e39c5baSBill Taylor 				qp->qp_sq_wqhdr->wq_full = 1;
2049e39c5baSBill Taylor 			}
2059e39c5baSBill Taylor 
2069e39c5baSBill Taylor 			/*
2079e39c5baSBill Taylor 			 * Get the address of the location where the next
2089e39c5baSBill Taylor 			 * Send WQE should be built
2099e39c5baSBill Taylor 			 */
2109e39c5baSBill Taylor 			desc = TAVOR_QP_SQ_ENTRY(qp, tail);
2119e39c5baSBill Taylor 
2129e39c5baSBill Taylor 			/*
2139e39c5baSBill Taylor 			 * Call tavor_wqe_send_build() to build the WQE
2149e39c5baSBill Taylor 			 * at the given address.  This routine uses the
2159e39c5baSBill Taylor 			 * information in the ibt_send_wr_t list (wr[]) and
2169e39c5baSBill Taylor 			 * returns the size of the WQE when it returns.
2179e39c5baSBill Taylor 			 */
2189e39c5baSBill Taylor 			status = tavor_wqe_send_build(state, qp,
2199e39c5baSBill Taylor 			    &wr[wrindx], desc, &desc_sz);
2209e39c5baSBill Taylor 			if (status != DDI_SUCCESS) {
2219e39c5baSBill Taylor 				break;
2229e39c5baSBill Taylor 			}
2239e39c5baSBill Taylor 
2249e39c5baSBill Taylor 			/*
2259e39c5baSBill Taylor 			 * Add a WRID entry to the WRID list.  Need to
2269e39c5baSBill Taylor 			 * calculate the "wqeaddrsz" and "signaled_dbd"
2279e39c5baSBill Taylor 			 * values to pass to tavor_wrid_add_entry()
2289e39c5baSBill Taylor 			 */
2299e39c5baSBill Taylor 			wqeaddrsz = TAVOR_QP_WQEADDRSZ((uint64_t *)(uintptr_t)
2309e39c5baSBill Taylor 			    ((uint64_t)(uintptr_t)desc - qp->qp_desc_off),
2319e39c5baSBill Taylor 			    desc_sz);
2329e39c5baSBill Taylor 			if ((qp->qp_sq_sigtype == TAVOR_QP_SQ_ALL_SIGNALED) ||
2339e39c5baSBill Taylor 			    (wr[wrindx].wr_flags & IBT_WR_SEND_SIGNAL)) {
2349e39c5baSBill Taylor 				signaled_dbd = TAVOR_WRID_ENTRY_SIGNALED;
2359e39c5baSBill Taylor 			} else {
2369e39c5baSBill Taylor 				signaled_dbd = 0;
2379e39c5baSBill Taylor 			}
2389e39c5baSBill Taylor 			tavor_wrid_add_entry(qp->qp_sq_wqhdr,
2399e39c5baSBill Taylor 			    wr[wrindx].wr_id, wqeaddrsz, signaled_dbd);
2409e39c5baSBill Taylor 
2419e39c5baSBill Taylor 			/*
2429e39c5baSBill Taylor 			 * If this is not the first descriptor on the current
2439e39c5baSBill Taylor 			 * chain, then link it to the previous WQE.  Otherwise,
2449e39c5baSBill Taylor 			 * save the address and size of this descriptor (in
2459e39c5baSBill Taylor 			 * "first" and "first_sz" respectively) and continue.
2469e39c5baSBill Taylor 			 * Note: Linking a WQE to the the previous one will
2479e39c5baSBill Taylor 			 * depend on whether the two WQEs are from "special
2489e39c5baSBill Taylor 			 * QPs" (i.e. MLX transport WQEs) or whether they are
2499e39c5baSBill Taylor 			 * normal Send WQEs.
2509e39c5baSBill Taylor 			 */
2519e39c5baSBill Taylor 			if (currindx != 0) {
2529e39c5baSBill Taylor 				if (qp->qp_is_special) {
2539e39c5baSBill Taylor 					tavor_wqe_mlx_linknext(&wr[wrindx - 1],
2549e39c5baSBill Taylor 					    desc, desc_sz, prev, NULL, qp);
2559e39c5baSBill Taylor 				} else {
2569e39c5baSBill Taylor 					tavor_wqe_send_linknext(&wr[wrindx],
2579e39c5baSBill Taylor 					    &wr[wrindx - 1], desc, desc_sz,
2589e39c5baSBill Taylor 					    prev, NULL, qp);
2599e39c5baSBill Taylor 				}
2609e39c5baSBill Taylor 				prev = desc;
2619e39c5baSBill Taylor 			} else {
2629e39c5baSBill Taylor 				first	 = desc;
2639e39c5baSBill Taylor 				first_sz = desc_sz;
2649e39c5baSBill Taylor 			}
2659e39c5baSBill Taylor 
2669e39c5baSBill Taylor 			/*
2679e39c5baSBill Taylor 			 * Update the current "tail index" and increment
2689e39c5baSBill Taylor 			 * "posted_cnt"
2699e39c5baSBill Taylor 			 */
2709e39c5baSBill Taylor 			tail = next_tail;
2719e39c5baSBill Taylor 			posted_cnt++;
2729e39c5baSBill Taylor 		}
2739e39c5baSBill Taylor 
2749e39c5baSBill Taylor 		/*
2759e39c5baSBill Taylor 		 * If we reach here and there are one or more WQEs which have
2769e39c5baSBill Taylor 		 * been successfully chained together, then we need to link
2779e39c5baSBill Taylor 		 * the current chain to the previously executing chain of
2789e39c5baSBill Taylor 		 * descriptor (if there is one) and ring the doorbell for the
2799e39c5baSBill Taylor 		 * send work queue.
2809e39c5baSBill Taylor 		 */
2819e39c5baSBill Taylor 		if (currindx != 0) {
2829e39c5baSBill Taylor 			/*
2839e39c5baSBill Taylor 			 * Before we link the chain, we need to ensure that the
2849e39c5baSBill Taylor 			 * "next" field on the last WQE is set to NULL (to
2859e39c5baSBill Taylor 			 * indicate the end of the chain).  Note: Just as it
2869e39c5baSBill Taylor 			 * did above, the format for the "next" fields in a
2879e39c5baSBill Taylor 			 * given WQE depend on whether the WQE is MLX
2889e39c5baSBill Taylor 			 * transport or not.
2899e39c5baSBill Taylor 			 */
2909e39c5baSBill Taylor 			if (qp->qp_is_special) {
2919e39c5baSBill Taylor 				tavor_wqe_mlx_linknext(&wr[chainbegin +
2929e39c5baSBill Taylor 				    currindx - 1], NULL, 0, prev, NULL, qp);
2939e39c5baSBill Taylor 			} else {
2949e39c5baSBill Taylor 				tavor_wqe_send_linknext(NULL,
2959e39c5baSBill Taylor 				    &wr[chainbegin + currindx - 1], NULL, 0,
2969e39c5baSBill Taylor 				    prev, NULL, qp);
2979e39c5baSBill Taylor 			}
2989e39c5baSBill Taylor 
2999e39c5baSBill Taylor 			/* Save away updated "tail index" for the DMA sync */
3009e39c5baSBill Taylor 			sync_to = tail;
3019e39c5baSBill Taylor 
3029e39c5baSBill Taylor 			/* Do a DMA sync for current send WQE(s) */
3039e39c5baSBill Taylor 			tavor_wqe_sync(qp, sync_from, sync_to, TAVOR_WR_SEND,
3049e39c5baSBill Taylor 			    DDI_DMA_SYNC_FORDEV);
3059e39c5baSBill Taylor 
3069e39c5baSBill Taylor 			/*
3079e39c5baSBill Taylor 			 * Now link the chain to the old chain (if there was
3089e39c5baSBill Taylor 			 * one.  Note: still need to pay attention to whether
3099e39c5baSBill Taylor 			 * the QP used MLX transport WQEs or not.
3109e39c5baSBill Taylor 			 */
3119e39c5baSBill Taylor 			if (qp->qp_is_special) {
3129e39c5baSBill Taylor 				tavor_wqe_mlx_linknext(NULL, first, first_sz,
3139e39c5baSBill Taylor 				    qp->qp_sq_lastwqeaddr, &dbinfo, qp);
3149e39c5baSBill Taylor 			} else {
3159e39c5baSBill Taylor 				tavor_wqe_send_linknext(&wr[chainbegin], NULL,
3169e39c5baSBill Taylor 				    first, first_sz, qp->qp_sq_lastwqeaddr,
3179e39c5baSBill Taylor 				    &dbinfo, qp);
3189e39c5baSBill Taylor 			}
3199e39c5baSBill Taylor 
3209e39c5baSBill Taylor 			/*
3219e39c5baSBill Taylor 			 * If there was a valid previous WQE (i.e. non-NULL),
3229e39c5baSBill Taylor 			 * then sync it too.  This is because we have updated
3239e39c5baSBill Taylor 			 * its "next" fields and we want to ensure that the
3249e39c5baSBill Taylor 			 * hardware can see the changes.
3259e39c5baSBill Taylor 			 */
3269e39c5baSBill Taylor 			if (qp->qp_sq_lastwqeaddr != NULL) {
3279e39c5baSBill Taylor 				sync_to   = sync_from;
3289e39c5baSBill Taylor 				sync_from = (sync_from - 1) & qsize_msk;
3299e39c5baSBill Taylor 				tavor_wqe_sync(qp, sync_from, sync_to,
3309e39c5baSBill Taylor 				    TAVOR_WR_SEND, DDI_DMA_SYNC_FORDEV);
3319e39c5baSBill Taylor 			}
3329e39c5baSBill Taylor 
3339e39c5baSBill Taylor 			/*
3349e39c5baSBill Taylor 			 * Now if the WRID tail entry is non-NULL, then this
3359e39c5baSBill Taylor 			 * represents the entry to which we are chaining the
3369e39c5baSBill Taylor 			 * new entries.  Since we are going to ring the
3379e39c5baSBill Taylor 			 * doorbell for this WQE, we want set its "dbd" bit.
3389e39c5baSBill Taylor 			 *
3399e39c5baSBill Taylor 			 * On the other hand, if the tail is NULL, even though
3409e39c5baSBill Taylor 			 * we will have rung the doorbell for the previous WQE
3419e39c5baSBill Taylor 			 * (for the hardware's sake) it is irrelevant to our
3429e39c5baSBill Taylor 			 * purposes (for tracking WRIDs) because we know the
3439e39c5baSBill Taylor 			 * request must have already completed.
3449e39c5baSBill Taylor 			 */
3459e39c5baSBill Taylor 			wre_last = wridlist->wl_wre_old_tail;
3469e39c5baSBill Taylor 			if (wre_last != NULL) {
3479e39c5baSBill Taylor 				wre_last->wr_signaled_dbd |=
3489e39c5baSBill Taylor 				    TAVOR_WRID_ENTRY_DOORBELLED;
3499e39c5baSBill Taylor 			}
3509e39c5baSBill Taylor 
3519e39c5baSBill Taylor 			/* Update some of the state in the QP */
3529e39c5baSBill Taylor 			qp->qp_sq_lastwqeaddr	 = desc;
3539e39c5baSBill Taylor 			qp->qp_sq_wqhdr->wq_tail = tail;
3549e39c5baSBill Taylor 
3559e39c5baSBill Taylor 			/* Ring the doorbell */
3569e39c5baSBill Taylor 			tavor_qp_send_doorbell(state,
3579e39c5baSBill Taylor 			    (uint32_t)((uintptr_t)first - qp->qp_desc_off),
3589e39c5baSBill Taylor 			    first_sz, qp->qp_qpnum, dbinfo.db_fence,
3599e39c5baSBill Taylor 			    dbinfo.db_nopcode);
3609e39c5baSBill Taylor 		}
3619e39c5baSBill Taylor 	}
3629e39c5baSBill Taylor 
3639e39c5baSBill Taylor 	/*
3649e39c5baSBill Taylor 	 * Update the "num_posted" return value (if necessary).  Then drop
3659e39c5baSBill Taylor 	 * the locks and return success.
3669e39c5baSBill Taylor 	 */
3679e39c5baSBill Taylor 	if (num_posted != NULL) {
3689e39c5baSBill Taylor 		*num_posted = posted_cnt;
3699e39c5baSBill Taylor 	}
3709e39c5baSBill Taylor 
3719e39c5baSBill Taylor 	mutex_exit(&qp->qp_sq_wqhdr->wq_wrid_wql->wql_lock);
3729e39c5baSBill Taylor 	mutex_exit(&qp->qp_lock);
3739e39c5baSBill Taylor 
3749e39c5baSBill Taylor 	return (status);
3759e39c5baSBill Taylor }
3769e39c5baSBill Taylor 
3779e39c5baSBill Taylor 
3789e39c5baSBill Taylor /*
3799e39c5baSBill Taylor  * tavor_post_recv()
3809e39c5baSBill Taylor  *    Context: Can be called from interrupt or base context.
3819e39c5baSBill Taylor  */
3829e39c5baSBill Taylor int
tavor_post_recv(tavor_state_t * state,tavor_qphdl_t qp,ibt_recv_wr_t * wr,uint_t num_wr,uint_t * num_posted)3839e39c5baSBill Taylor tavor_post_recv(tavor_state_t *state, tavor_qphdl_t qp,
3849e39c5baSBill Taylor     ibt_recv_wr_t *wr, uint_t num_wr, uint_t *num_posted)
3859e39c5baSBill Taylor {
3869e39c5baSBill Taylor 	uint64_t			*desc, *prev, *first;
3879e39c5baSBill Taylor 	uint32_t			desc_sz, first_sz;
3889e39c5baSBill Taylor 	uint32_t			wqeaddrsz, signaled_dbd;
3899e39c5baSBill Taylor 	uint32_t			head, tail, next_tail, qsize_msk;
3909e39c5baSBill Taylor 	uint32_t			sync_from, sync_to;
3919e39c5baSBill Taylor 	uint_t				currindx, wrindx, numremain;
3929e39c5baSBill Taylor 	uint_t				chainlen, posted_cnt;
3939e39c5baSBill Taylor 	uint_t				maxdb = TAVOR_QP_MAXDESC_PER_DB;
3949e39c5baSBill Taylor 	int				status;
3959e39c5baSBill Taylor 
3969e39c5baSBill Taylor 	/*
3979e39c5baSBill Taylor 	 * Check for user-mappable QP memory.  Note:  We do not allow kernel
3989e39c5baSBill Taylor 	 * clients to post to QP memory that is accessible directly by the
3999e39c5baSBill Taylor 	 * user.  If the QP memory is user accessible, then return an error.
4009e39c5baSBill Taylor 	 */
4019e39c5baSBill Taylor 	if (qp->qp_is_umap) {
4029e39c5baSBill Taylor 		return (IBT_QP_HDL_INVALID);
4039e39c5baSBill Taylor 	}
4049e39c5baSBill Taylor 
4059e39c5baSBill Taylor 	/* Initialize posted_cnt */
4069e39c5baSBill Taylor 	posted_cnt = 0;
4079e39c5baSBill Taylor 
4089e39c5baSBill Taylor 	mutex_enter(&qp->qp_lock);
4099e39c5baSBill Taylor 
4109e39c5baSBill Taylor 	/*
4119e39c5baSBill Taylor 	 * Check if QP is associated with an SRQ
4129e39c5baSBill Taylor 	 */
4139e39c5baSBill Taylor 	if (qp->qp_srq_en == TAVOR_QP_SRQ_ENABLED) {
4149e39c5baSBill Taylor 		mutex_exit(&qp->qp_lock);
4159e39c5baSBill Taylor 		return (IBT_SRQ_IN_USE);
4169e39c5baSBill Taylor 	}
4179e39c5baSBill Taylor 
4189e39c5baSBill Taylor 	/*
4199e39c5baSBill Taylor 	 * Check QP state.  Can not post Recv requests from the "Reset" state
4209e39c5baSBill Taylor 	 */
4219e39c5baSBill Taylor 	if (qp->qp_state == TAVOR_QP_RESET) {
4229e39c5baSBill Taylor 		mutex_exit(&qp->qp_lock);
4239e39c5baSBill Taylor 		return (IBT_QP_STATE_INVALID);
4249e39c5baSBill Taylor 	}
4259e39c5baSBill Taylor 
4269e39c5baSBill Taylor 	/* Grab the lock for the WRID list */
4279e39c5baSBill Taylor 	mutex_enter(&qp->qp_rq_wqhdr->wq_wrid_wql->wql_lock);
4289e39c5baSBill Taylor 
4299e39c5baSBill Taylor 	/* Save away some initial QP state */
4309e39c5baSBill Taylor 	qsize_msk = qp->qp_rq_wqhdr->wq_size - 1;
4319e39c5baSBill Taylor 	tail	  = qp->qp_rq_wqhdr->wq_tail;
4329e39c5baSBill Taylor 	head	  = qp->qp_rq_wqhdr->wq_head;
4339e39c5baSBill Taylor 
4349e39c5baSBill Taylor 	/*
4359e39c5baSBill Taylor 	 * For each ibt_recv_wr_t in the wr[] list passed in, parse the
4369e39c5baSBill Taylor 	 * request and build a Recv WQE.  Note:  Because we are potentially
4379e39c5baSBill Taylor 	 * building a chain of WQEs, we want to link them all together.
4389e39c5baSBill Taylor 	 * However, we do not want to link the first one to the previous
4399e39c5baSBill Taylor 	 * WQE until the entire chain has been linked.  Then in the last
4409e39c5baSBill Taylor 	 * step we ring the appropriate doorbell.  Note:  It is possible for
4419e39c5baSBill Taylor 	 * more Work Requests to be posted than the HW will support at one
4429e39c5baSBill Taylor 	 * shot.  If this happens, we need to be able to post and ring
4439e39c5baSBill Taylor 	 * several chains here until the the entire request is complete.
4449e39c5baSBill Taylor 	 */
4459e39c5baSBill Taylor 	wrindx = 0;
4469e39c5baSBill Taylor 	numremain = num_wr;
4479e39c5baSBill Taylor 	status	  = DDI_SUCCESS;
4489e39c5baSBill Taylor 	while ((wrindx < num_wr) && (status == DDI_SUCCESS)) {
4499e39c5baSBill Taylor 		/*
4509e39c5baSBill Taylor 		 * For the first WQE on a new chain we need "prev" to point
4519e39c5baSBill Taylor 		 * to the current descriptor.  As we begin to process
4529e39c5baSBill Taylor 		 * further, "prev" will be updated to point to the previous
4539e39c5baSBill Taylor 		 * WQE on the current chain (see below).
4549e39c5baSBill Taylor 		 */
4559e39c5baSBill Taylor 		prev = TAVOR_QP_RQ_ENTRY(qp, tail);
4569e39c5baSBill Taylor 
4579e39c5baSBill Taylor 		/*
4589e39c5baSBill Taylor 		 * Before we begin, save the current "tail index" for later
4599e39c5baSBill Taylor 		 * DMA sync
4609e39c5baSBill Taylor 		 */
4619e39c5baSBill Taylor 		sync_from = tail;
4629e39c5baSBill Taylor 
4639e39c5baSBill Taylor 		/*
4649e39c5baSBill Taylor 		 * Break the request up into chains that are less than or
4659e39c5baSBill Taylor 		 * equal to the maximum number of WQEs that can be posted
4669e39c5baSBill Taylor 		 * per doorbell ring
4679e39c5baSBill Taylor 		 */
4689e39c5baSBill Taylor 		chainlen = (numremain > maxdb) ? maxdb : numremain;
4699e39c5baSBill Taylor 		numremain -= chainlen;
4709e39c5baSBill Taylor 		for (currindx = 0; currindx < chainlen; currindx++, wrindx++) {
4719e39c5baSBill Taylor 			/*
4729e39c5baSBill Taylor 			 * Check for "queue full" condition.  If the queue
4739e39c5baSBill Taylor 			 * is already full, then no more WQEs can be posted.
4749e39c5baSBill Taylor 			 * So break out, ring a doorbell (if necessary) and
4759e39c5baSBill Taylor 			 * return an error
4769e39c5baSBill Taylor 			 */
4779e39c5baSBill Taylor 			if (qp->qp_rq_wqhdr->wq_full != 0) {
4789e39c5baSBill Taylor 				status = IBT_QP_FULL;
4799e39c5baSBill Taylor 				break;
4809e39c5baSBill Taylor 			}
4819e39c5baSBill Taylor 
4829e39c5baSBill Taylor 			/*
4839e39c5baSBill Taylor 			 * Increment the "tail index" and check for "queue
4849e39c5baSBill Taylor 			 * full" condition.  If we detect that the current
4859e39c5baSBill Taylor 			 * work request is going to fill the work queue, then
4869e39c5baSBill Taylor 			 * we mark this condition and continue.
4879e39c5baSBill Taylor 			 */
4889e39c5baSBill Taylor 			next_tail = (tail + 1) & qsize_msk;
4899e39c5baSBill Taylor 			if (next_tail == head) {
4909e39c5baSBill Taylor 				qp->qp_rq_wqhdr->wq_full = 1;
4919e39c5baSBill Taylor 			}
4929e39c5baSBill Taylor 
4939e39c5baSBill Taylor 			/*
4949e39c5baSBill Taylor 			 * Get the address of the location where the next
4959e39c5baSBill Taylor 			 * Recv WQE should be built
4969e39c5baSBill Taylor 			 */
4979e39c5baSBill Taylor 			desc = TAVOR_QP_RQ_ENTRY(qp, tail);
4989e39c5baSBill Taylor 
4999e39c5baSBill Taylor 			/*
5009e39c5baSBill Taylor 			 * Call tavor_wqe_recv_build() to build the WQE
5019e39c5baSBill Taylor 			 * at the given address.  This routine uses the
5029e39c5baSBill Taylor 			 * information in the ibt_recv_wr_t list (wr[]) and
5039e39c5baSBill Taylor 			 * returns the size of the WQE when it returns.
5049e39c5baSBill Taylor 			 */
5059e39c5baSBill Taylor 			status = tavor_wqe_recv_build(state, qp, &wr[wrindx],
5069e39c5baSBill Taylor 			    desc, &desc_sz);
5079e39c5baSBill Taylor 			if (status != DDI_SUCCESS) {
5089e39c5baSBill Taylor 				break;
5099e39c5baSBill Taylor 			}
5109e39c5baSBill Taylor 
5119e39c5baSBill Taylor 			/*
5129e39c5baSBill Taylor 			 * Add a WRID entry to the WRID list.  Need to
5139e39c5baSBill Taylor 			 * calculate the "wqeaddrsz" and "signaled_dbd"
5149e39c5baSBill Taylor 			 * values to pass to tavor_wrid_add_entry().  Note:
515*86644ba2SBill Taylor 			 * all Recv WQEs are essentially "signaled" and
516*86644ba2SBill Taylor 			 * "doorbelled" (since Tavor HW requires all
517*86644ba2SBill Taylor 			 * RecvWQE's to have their "DBD" bits set).
5189e39c5baSBill Taylor 			 */
5199e39c5baSBill Taylor 			wqeaddrsz = TAVOR_QP_WQEADDRSZ((uint64_t *)(uintptr_t)
5209e39c5baSBill Taylor 			    ((uint64_t)(uintptr_t)desc - qp->qp_desc_off),
5219e39c5baSBill Taylor 			    desc_sz);
522*86644ba2SBill Taylor 			signaled_dbd = TAVOR_WRID_ENTRY_SIGNALED |
523*86644ba2SBill Taylor 			    TAVOR_WRID_ENTRY_DOORBELLED;
5249e39c5baSBill Taylor 			tavor_wrid_add_entry(qp->qp_rq_wqhdr,
5259e39c5baSBill Taylor 			    wr[wrindx].wr_id, wqeaddrsz, signaled_dbd);
5269e39c5baSBill Taylor 
5279e39c5baSBill Taylor 			/*
5289e39c5baSBill Taylor 			 * If this is not the first descriptor on the current
5299e39c5baSBill Taylor 			 * chain, then link it to the previous WQE.  Otherwise,
5309e39c5baSBill Taylor 			 * save the address and size of this descriptor (in
5319e39c5baSBill Taylor 			 * "first" and "first_sz" respectively) and continue.
5329e39c5baSBill Taylor 			 */
5339e39c5baSBill Taylor 			if (currindx != 0) {
5349e39c5baSBill Taylor 				tavor_wqe_recv_linknext(desc, desc_sz, prev,
5359e39c5baSBill Taylor 				    qp);
5369e39c5baSBill Taylor 				prev = desc;
5379e39c5baSBill Taylor 			} else {
5389e39c5baSBill Taylor 				first	 = desc;
5399e39c5baSBill Taylor 				first_sz = desc_sz;
5409e39c5baSBill Taylor 			}
5419e39c5baSBill Taylor 
5429e39c5baSBill Taylor 			/*
5439e39c5baSBill Taylor 			 * Update the current "tail index" and increment
5449e39c5baSBill Taylor 			 * "posted_cnt"
5459e39c5baSBill Taylor 			 */
5469e39c5baSBill Taylor 			tail = next_tail;
5479e39c5baSBill Taylor 			posted_cnt++;
5489e39c5baSBill Taylor 		}
5499e39c5baSBill Taylor 
5509e39c5baSBill Taylor 		/*
5519e39c5baSBill Taylor 		 * If we reach here and there are one or more WQEs which have
5529e39c5baSBill Taylor 		 * been successfully chained together, then we need to link
5539e39c5baSBill Taylor 		 * the current chain to the previously executing chain of
5549e39c5baSBill Taylor 		 * descriptor (if there is one) and ring the doorbell for the
5559e39c5baSBill Taylor 		 * recv work queue.
5569e39c5baSBill Taylor 		 */
5579e39c5baSBill Taylor 		if (currindx != 0) {
5589e39c5baSBill Taylor 			/*
5599e39c5baSBill Taylor 			 * Before we link the chain, we need to ensure that the
5609e39c5baSBill Taylor 			 * "next" field on the last WQE is set to NULL (to
5619e39c5baSBill Taylor 			 * indicate the end of the chain).
5629e39c5baSBill Taylor 			 */
5639e39c5baSBill Taylor 			tavor_wqe_recv_linknext(NULL, 0, prev, qp);
5649e39c5baSBill Taylor 
5659e39c5baSBill Taylor 			/* Save away updated "tail index" for the DMA sync */
5669e39c5baSBill Taylor 			sync_to = tail;
5679e39c5baSBill Taylor 
5689e39c5baSBill Taylor 			/* Do a DMA sync for current recv WQE(s) */
5699e39c5baSBill Taylor 			tavor_wqe_sync(qp, sync_from, sync_to, TAVOR_WR_RECV,
5709e39c5baSBill Taylor 			    DDI_DMA_SYNC_FORDEV);
5719e39c5baSBill Taylor 
5729e39c5baSBill Taylor 			/*
5739e39c5baSBill Taylor 			 * Now link the chain to the old chain (if there was
5749e39c5baSBill Taylor 			 * one.
5759e39c5baSBill Taylor 			 */
5769e39c5baSBill Taylor 			tavor_wqe_recv_linknext(first, first_sz,
5779e39c5baSBill Taylor 			    qp->qp_rq_lastwqeaddr, qp);
5789e39c5baSBill Taylor 
5799e39c5baSBill Taylor 			/*
5809e39c5baSBill Taylor 			 * If there was a valid previous WQE (i.e. non-NULL),
5819e39c5baSBill Taylor 			 * then sync it too.  This is because we have updated
5829e39c5baSBill Taylor 			 * its "next" fields and we want to ensure that the
5839e39c5baSBill Taylor 			 * hardware can see the changes.
5849e39c5baSBill Taylor 			 */
5859e39c5baSBill Taylor 			if (qp->qp_rq_lastwqeaddr != NULL) {
5869e39c5baSBill Taylor 				sync_to	  = sync_from;
5879e39c5baSBill Taylor 				sync_from = (sync_from - 1) & qsize_msk;
5889e39c5baSBill Taylor 				tavor_wqe_sync(qp, sync_from, sync_to,
5899e39c5baSBill Taylor 				    TAVOR_WR_RECV, DDI_DMA_SYNC_FORDEV);
5909e39c5baSBill Taylor 			}
5919e39c5baSBill Taylor 
5929e39c5baSBill Taylor 			/* Update some of the state in the QP */
5939e39c5baSBill Taylor 			qp->qp_rq_lastwqeaddr	 = desc;
5949e39c5baSBill Taylor 			qp->qp_rq_wqhdr->wq_tail = tail;
5959e39c5baSBill Taylor 
5969e39c5baSBill Taylor 			/* Ring the doorbell */
5979e39c5baSBill Taylor 			tavor_qp_recv_doorbell(state,
5989e39c5baSBill Taylor 			    (uint32_t)((uintptr_t)first - qp->qp_desc_off),
5999e39c5baSBill Taylor 			    first_sz, qp->qp_qpnum, (chainlen % maxdb));
6009e39c5baSBill Taylor 		}
6019e39c5baSBill Taylor 	}
6029e39c5baSBill Taylor 
6039e39c5baSBill Taylor 	/*
6049e39c5baSBill Taylor 	 * Update the "num_posted" return value (if necessary).  Then drop
6059e39c5baSBill Taylor 	 * the locks and return success.
6069e39c5baSBill Taylor 	 */
6079e39c5baSBill Taylor 	if (num_posted != NULL) {
6089e39c5baSBill Taylor 		*num_posted = posted_cnt;
6099e39c5baSBill Taylor 	}
6109e39c5baSBill Taylor 
6119e39c5baSBill Taylor 	mutex_exit(&qp->qp_rq_wqhdr->wq_wrid_wql->wql_lock);
6129e39c5baSBill Taylor 	mutex_exit(&qp->qp_lock);
6139e39c5baSBill Taylor 
6149e39c5baSBill Taylor 	return (status);
6159e39c5baSBill Taylor }
6169e39c5baSBill Taylor 
6179e39c5baSBill Taylor /*
6189e39c5baSBill Taylor  * tavor_post_srq()
6199e39c5baSBill Taylor  *    Context: Can be called from interrupt or base context.
6209e39c5baSBill Taylor  */
6219e39c5baSBill Taylor int
tavor_post_srq(tavor_state_t * state,tavor_srqhdl_t srq,ibt_recv_wr_t * wr,uint_t num_wr,uint_t * num_posted)6229e39c5baSBill Taylor tavor_post_srq(tavor_state_t *state, tavor_srqhdl_t srq,
6239e39c5baSBill Taylor     ibt_recv_wr_t *wr, uint_t num_wr, uint_t *num_posted)
6249e39c5baSBill Taylor {
6259e39c5baSBill Taylor 	uint64_t			*desc, *prev, *first, *last_wqe_addr;
6269e39c5baSBill Taylor 	uint32_t			signaled_dbd;
6279e39c5baSBill Taylor 	uint32_t			sync_indx;
6289e39c5baSBill Taylor 	uint_t				currindx, wrindx, numremain;
6299e39c5baSBill Taylor 	uint_t				chainlen, posted_cnt;
6309e39c5baSBill Taylor 	uint_t				maxdb = TAVOR_QP_MAXDESC_PER_DB;
6319e39c5baSBill Taylor 	int				status;
6329e39c5baSBill Taylor 
6339e39c5baSBill Taylor 	/*
6349e39c5baSBill Taylor 	 * Check for user-mappable QP memory.  Note:  We do not allow kernel
6359e39c5baSBill Taylor 	 * clients to post to QP memory that is accessible directly by the
6369e39c5baSBill Taylor 	 * user.  If the QP memory is user accessible, then return an error.
6379e39c5baSBill Taylor 	 */
6389e39c5baSBill Taylor 	if (srq->srq_is_umap) {
6399e39c5baSBill Taylor 		return (IBT_SRQ_HDL_INVALID);
6409e39c5baSBill Taylor 	}
6419e39c5baSBill Taylor 
6429e39c5baSBill Taylor 	/* Initialize posted_cnt */
6439e39c5baSBill Taylor 	posted_cnt = 0;
6449e39c5baSBill Taylor 
6459e39c5baSBill Taylor 	mutex_enter(&srq->srq_lock);
6469e39c5baSBill Taylor 
6479e39c5baSBill Taylor 	/*
6489e39c5baSBill Taylor 	 * Check SRQ state.  Can not post Recv requests when SRQ is in error
6499e39c5baSBill Taylor 	 */
6509e39c5baSBill Taylor 	if (srq->srq_state == TAVOR_SRQ_STATE_ERROR) {
6519e39c5baSBill Taylor 		mutex_exit(&srq->srq_lock);
6529e39c5baSBill Taylor 		return (IBT_QP_STATE_INVALID);
6539e39c5baSBill Taylor 	}
6549e39c5baSBill Taylor 
6559e39c5baSBill Taylor 	/* Grab the lock for the WRID list */
6569e39c5baSBill Taylor 	mutex_enter(&srq->srq_wrid_wql->wql_lock);
6579e39c5baSBill Taylor 
6589e39c5baSBill Taylor 	/*
6599e39c5baSBill Taylor 	 * For each ibt_recv_wr_t in the wr[] list passed in, parse the
6609e39c5baSBill Taylor 	 * request and build a Recv WQE.  Note:  Because we are potentially
6619e39c5baSBill Taylor 	 * building a chain of WQEs, we want to link them all together.
6629e39c5baSBill Taylor 	 * However, we do not want to link the first one to the previous
6639e39c5baSBill Taylor 	 * WQE until the entire chain has been linked.  Then in the last
6649e39c5baSBill Taylor 	 * step we ring the appropriate doorbell.  Note:  It is possible for
6659e39c5baSBill Taylor 	 * more Work Requests to be posted than the HW will support at one
6669e39c5baSBill Taylor 	 * shot.  If this happens, we need to be able to post and ring
6679e39c5baSBill Taylor 	 * several chains here until the the entire request is complete.
6689e39c5baSBill Taylor 	 */
6699e39c5baSBill Taylor 	wrindx = 0;
6709e39c5baSBill Taylor 	numremain = num_wr;
6719e39c5baSBill Taylor 	status	  = DDI_SUCCESS;
6729e39c5baSBill Taylor 	while ((wrindx < num_wr) && (status == DDI_SUCCESS)) {
6739e39c5baSBill Taylor 		/*
6749e39c5baSBill Taylor 		 * For the first WQE on a new chain we need "prev" to point
6759e39c5baSBill Taylor 		 * to the current descriptor.  As we begin to process
6769e39c5baSBill Taylor 		 * further, "prev" will be updated to point to the previous
6779e39c5baSBill Taylor 		 * WQE on the current chain (see below).
6789e39c5baSBill Taylor 		 */
6799e39c5baSBill Taylor 		if (srq->srq_wq_lastwqeindx == -1) {
6809e39c5baSBill Taylor 			prev = NULL;
6819e39c5baSBill Taylor 		} else {
6829e39c5baSBill Taylor 			prev = TAVOR_SRQ_WQE_ADDR(srq, srq->srq_wq_lastwqeindx);
6839e39c5baSBill Taylor 		}
6849e39c5baSBill Taylor 
6859e39c5baSBill Taylor 		/*
6869e39c5baSBill Taylor 		 * Break the request up into chains that are less than or
6879e39c5baSBill Taylor 		 * equal to the maximum number of WQEs that can be posted
6889e39c5baSBill Taylor 		 * per doorbell ring
6899e39c5baSBill Taylor 		 */
6909e39c5baSBill Taylor 		chainlen = (numremain > maxdb) ? maxdb : numremain;
6919e39c5baSBill Taylor 		numremain -= chainlen;
6929e39c5baSBill Taylor 		for (currindx = 0; currindx < chainlen; currindx++, wrindx++) {
6939e39c5baSBill Taylor 
6949e39c5baSBill Taylor 			/*
6959e39c5baSBill Taylor 			 * Check for "queue full" condition.  If the queue
6969e39c5baSBill Taylor 			 * is already full, then no more WQEs can be posted.
6979e39c5baSBill Taylor 			 * So break out, ring a doorbell (if necessary) and
6989e39c5baSBill Taylor 			 * return an error
6999e39c5baSBill Taylor 			 */
7009e39c5baSBill Taylor 			if (srq->srq_wridlist->wl_free_list_indx == -1) {
7019e39c5baSBill Taylor 				status = IBT_QP_FULL;
7029e39c5baSBill Taylor 				break;
7039e39c5baSBill Taylor 			}
7049e39c5baSBill Taylor 
7059e39c5baSBill Taylor 			/*
7069e39c5baSBill Taylor 			 * Get the address of the location where the next
7079e39c5baSBill Taylor 			 * Recv WQE should be built
7089e39c5baSBill Taylor 			 */
7099e39c5baSBill Taylor 			desc = TAVOR_SRQ_WQE_ADDR(srq,
7109e39c5baSBill Taylor 			    srq->srq_wridlist->wl_free_list_indx);
7119e39c5baSBill Taylor 
7129e39c5baSBill Taylor 			/*
7139e39c5baSBill Taylor 			 * Add a WRID entry to the WRID list.  Need to
7149e39c5baSBill Taylor 			 * set the "signaled_dbd" values to pass to
7159e39c5baSBill Taylor 			 * tavor_wrid_add_entry().  Note: all Recv WQEs are
7169e39c5baSBill Taylor 			 * essentially "signaled"
7179e39c5baSBill Taylor 			 *
7189e39c5baSBill Taylor 			 * The 'size' is stored at srq_alloc time, in the
7199e39c5baSBill Taylor 			 * srq_wq_stride.  This is a constant value required
7209e39c5baSBill Taylor 			 * for SRQ.
7219e39c5baSBill Taylor 			 */
7229e39c5baSBill Taylor 			signaled_dbd = TAVOR_WRID_ENTRY_SIGNALED;
7239e39c5baSBill Taylor 			tavor_wrid_add_entry_srq(srq, wr[wrindx].wr_id,
7249e39c5baSBill Taylor 			    signaled_dbd);
7259e39c5baSBill Taylor 
7269e39c5baSBill Taylor 			/*
7279e39c5baSBill Taylor 			 * Call tavor_wqe_srq_build() to build the WQE
7289e39c5baSBill Taylor 			 * at the given address.  This routine uses the
7299e39c5baSBill Taylor 			 * information in the ibt_recv_wr_t list (wr[]) and
7309e39c5baSBill Taylor 			 * returns the size of the WQE when it returns.
7319e39c5baSBill Taylor 			 */
7329e39c5baSBill Taylor 			status = tavor_wqe_srq_build(state, srq, &wr[wrindx],
7339e39c5baSBill Taylor 			    desc);
7349e39c5baSBill Taylor 			if (status != DDI_SUCCESS) {
7359e39c5baSBill Taylor 				break;
7369e39c5baSBill Taylor 			}
7379e39c5baSBill Taylor 
7389e39c5baSBill Taylor 			/*
7399e39c5baSBill Taylor 			 * If this is not the first descriptor on the current
7409e39c5baSBill Taylor 			 * chain, then link it to the previous WQE.  Otherwise,
7419e39c5baSBill Taylor 			 * save the address of this descriptor (in "first") and
7429e39c5baSBill Taylor 			 * continue.
7439e39c5baSBill Taylor 			 */
7449e39c5baSBill Taylor 			if (currindx != 0) {
7459e39c5baSBill Taylor 				tavor_wqe_srq_linknext(desc, prev, srq);
7469e39c5baSBill Taylor 				sync_indx = TAVOR_SRQ_WQE_INDEX(
7479e39c5baSBill Taylor 				    srq->srq_wq_buf, prev,
7489e39c5baSBill Taylor 				    srq->srq_wq_log_wqesz);
7499e39c5baSBill Taylor 
7509e39c5baSBill Taylor 				/* Do a DMA sync for previous recv WQE */
7519e39c5baSBill Taylor 				tavor_wqe_sync(srq, sync_indx, sync_indx+1,
7529e39c5baSBill Taylor 				    TAVOR_WR_SRQ, DDI_DMA_SYNC_FORDEV);
7539e39c5baSBill Taylor 
7549e39c5baSBill Taylor 				prev = desc;
7559e39c5baSBill Taylor 			} else {
7569e39c5baSBill Taylor 
7579e39c5baSBill Taylor 				/*
7589e39c5baSBill Taylor 				 * In this case, the last WQE on the chain is
7599e39c5baSBill Taylor 				 * also considered 'first'.  So set prev to
7609e39c5baSBill Taylor 				 * first, here.
7619e39c5baSBill Taylor 				 */
7629e39c5baSBill Taylor 				first = prev = desc;
7639e39c5baSBill Taylor 			}
7649e39c5baSBill Taylor 
7659e39c5baSBill Taylor 			/*
7669e39c5baSBill Taylor 			 * Increment "posted_cnt"
7679e39c5baSBill Taylor 			 */
7689e39c5baSBill Taylor 			posted_cnt++;
7699e39c5baSBill Taylor 		}
7709e39c5baSBill Taylor 
7719e39c5baSBill Taylor 		/*
7729e39c5baSBill Taylor 		 * If we reach here and there are one or more WQEs which have
7739e39c5baSBill Taylor 		 * been successfully chained together, then we need to link
7749e39c5baSBill Taylor 		 * the current chain to the previously executing chain of
7759e39c5baSBill Taylor 		 * descriptor (if there is one) and ring the doorbell for the
7769e39c5baSBill Taylor 		 * recv work queue.
7779e39c5baSBill Taylor 		 */
7789e39c5baSBill Taylor 		if (currindx != 0) {
7799e39c5baSBill Taylor 			/*
7809e39c5baSBill Taylor 			 * Before we link the chain, we need to ensure that the
7819e39c5baSBill Taylor 			 * "next" field on the last WQE is set to NULL (to
7829e39c5baSBill Taylor 			 * indicate the end of the chain).
7839e39c5baSBill Taylor 			 */
7849e39c5baSBill Taylor 			tavor_wqe_srq_linknext(NULL, prev, srq);
7859e39c5baSBill Taylor 
7869e39c5baSBill Taylor 			sync_indx = TAVOR_SRQ_WQE_INDEX(srq->srq_wq_buf, prev,
7879e39c5baSBill Taylor 			    srq->srq_wq_log_wqesz);
7889e39c5baSBill Taylor 
7899e39c5baSBill Taylor 			/* Do a DMA sync for current recv WQE */
7909e39c5baSBill Taylor 			tavor_wqe_sync(srq, sync_indx, sync_indx+1,
7919e39c5baSBill Taylor 			    TAVOR_WR_SRQ, DDI_DMA_SYNC_FORDEV);
7929e39c5baSBill Taylor 
7939e39c5baSBill Taylor 			/*
7949e39c5baSBill Taylor 			 * Now link the chain to the old chain (if there was
7959e39c5baSBill Taylor 			 * one).
7969e39c5baSBill Taylor 			 */
7979e39c5baSBill Taylor 			if (srq->srq_wq_lastwqeindx == -1) {
7989e39c5baSBill Taylor 				last_wqe_addr = NULL;
7999e39c5baSBill Taylor 			} else {
8009e39c5baSBill Taylor 				last_wqe_addr = TAVOR_SRQ_WQE_ADDR(srq,
8019e39c5baSBill Taylor 				    srq->srq_wq_lastwqeindx);
8029e39c5baSBill Taylor 			}
8039e39c5baSBill Taylor 			tavor_wqe_srq_linknext(first, last_wqe_addr, srq);
8049e39c5baSBill Taylor 
8059e39c5baSBill Taylor 			/*
8069e39c5baSBill Taylor 			 * If there was a valid previous WQE (i.e. valid index),
8079e39c5baSBill Taylor 			 * then sync it too.  This is because we have updated
8089e39c5baSBill Taylor 			 * its "next" fields and we want to ensure that the
8099e39c5baSBill Taylor 			 * hardware can see the changes.
8109e39c5baSBill Taylor 			 */
8119e39c5baSBill Taylor 			if (srq->srq_wq_lastwqeindx != -1) {
8129e39c5baSBill Taylor 				sync_indx = srq->srq_wq_lastwqeindx;
8139e39c5baSBill Taylor 				tavor_wqe_sync(srq, sync_indx, sync_indx+1,
8149e39c5baSBill Taylor 				    TAVOR_WR_SRQ, DDI_DMA_SYNC_FORDEV);
8159e39c5baSBill Taylor 			}
8169e39c5baSBill Taylor 
8179e39c5baSBill Taylor 			/* Update some of the state in the QP */
8189e39c5baSBill Taylor 			srq->srq_wq_lastwqeindx = TAVOR_SRQ_WQE_INDEX(
8199e39c5baSBill Taylor 			    srq->srq_wq_buf, desc,
8209e39c5baSBill Taylor 			    srq->srq_wq_log_wqesz);
8219e39c5baSBill Taylor 
8229e39c5baSBill Taylor 			/* Ring the doorbell */
8239e39c5baSBill Taylor 			/* SRQ needs NDS of 0 */
8249e39c5baSBill Taylor 			tavor_qp_recv_doorbell(state,
8259e39c5baSBill Taylor 			    (uint32_t)((uintptr_t)first - srq->srq_desc_off),
8269e39c5baSBill Taylor 			    0, srq->srq_srqnum, (chainlen % maxdb));
8279e39c5baSBill Taylor 		}
8289e39c5baSBill Taylor 	}
8299e39c5baSBill Taylor 
8309e39c5baSBill Taylor 	/*
8319e39c5baSBill Taylor 	 * Update the "num_posted" return value (if necessary).  Then drop
8329e39c5baSBill Taylor 	 * the locks and return success.
8339e39c5baSBill Taylor 	 */
8349e39c5baSBill Taylor 	if (num_posted != NULL) {
8359e39c5baSBill Taylor 		*num_posted = posted_cnt;
8369e39c5baSBill Taylor 	}
8379e39c5baSBill Taylor 
8389e39c5baSBill Taylor 	mutex_exit(&srq->srq_wrid_wql->wql_lock);
8399e39c5baSBill Taylor 	mutex_exit(&srq->srq_lock);
8409e39c5baSBill Taylor 
8419e39c5baSBill Taylor 	return (status);
8429e39c5baSBill Taylor }
8439e39c5baSBill Taylor 
8449e39c5baSBill Taylor 
8459e39c5baSBill Taylor /*
8469e39c5baSBill Taylor  * tavor_qp_send_doorbell()
8479e39c5baSBill Taylor  *    Context: Can be called from interrupt or base context.
8489e39c5baSBill Taylor  */
8499e39c5baSBill Taylor static void
tavor_qp_send_doorbell(tavor_state_t * state,uint32_t nda,uint32_t nds,uint32_t qpn,uint32_t fence,uint32_t nopcode)8509e39c5baSBill Taylor tavor_qp_send_doorbell(tavor_state_t *state, uint32_t nda, uint32_t nds,
8519e39c5baSBill Taylor     uint32_t qpn, uint32_t fence, uint32_t nopcode)
8529e39c5baSBill Taylor {
8539e39c5baSBill Taylor 	uint64_t	doorbell = 0;
8549e39c5baSBill Taylor 
8559e39c5baSBill Taylor 	/* Build the doorbell from the parameters */
8569e39c5baSBill Taylor 	doorbell = (((uint64_t)nda & TAVOR_QPSNDDB_NDA_MASK) <<
8579e39c5baSBill Taylor 	    TAVOR_QPSNDDB_NDA_SHIFT) |
8589e39c5baSBill Taylor 	    ((uint64_t)fence << TAVOR_QPSNDDB_F_SHIFT) |
8599e39c5baSBill Taylor 	    ((uint64_t)nopcode << TAVOR_QPSNDDB_NOPCODE_SHIFT) |
8609e39c5baSBill Taylor 	    ((uint64_t)qpn << TAVOR_QPSNDDB_QPN_SHIFT) | nds;
8619e39c5baSBill Taylor 
8629e39c5baSBill Taylor 	/* Write the doorbell to UAR */
8639e39c5baSBill Taylor 	TAVOR_UAR_DOORBELL(state, (uint64_t *)&state->ts_uar->send,
8649e39c5baSBill Taylor 	    doorbell);
8659e39c5baSBill Taylor }
8669e39c5baSBill Taylor 
8679e39c5baSBill Taylor 
8689e39c5baSBill Taylor /*
8699e39c5baSBill Taylor  * tavor_qp_recv_doorbell()
8709e39c5baSBill Taylor  *    Context: Can be called from interrupt or base context.
8719e39c5baSBill Taylor  */
8729e39c5baSBill Taylor static void
tavor_qp_recv_doorbell(tavor_state_t * state,uint32_t nda,uint32_t nds,uint32_t qpn,uint32_t credits)8739e39c5baSBill Taylor tavor_qp_recv_doorbell(tavor_state_t *state, uint32_t nda, uint32_t nds,
8749e39c5baSBill Taylor     uint32_t qpn, uint32_t credits)
8759e39c5baSBill Taylor {
8769e39c5baSBill Taylor 	uint64_t	doorbell = 0;
8779e39c5baSBill Taylor 
8789e39c5baSBill Taylor 	/* Build the doorbell from the parameters */
8799e39c5baSBill Taylor 	doorbell = (((uint64_t)nda & TAVOR_QPRCVDB_NDA_MASK) <<
8809e39c5baSBill Taylor 	    TAVOR_QPRCVDB_NDA_SHIFT) |
8819e39c5baSBill Taylor 	    ((uint64_t)nds << TAVOR_QPRCVDB_NDS_SHIFT) |
8829e39c5baSBill Taylor 	    ((uint64_t)qpn << TAVOR_QPRCVDB_QPN_SHIFT) | credits;
8839e39c5baSBill Taylor 
8849e39c5baSBill Taylor 	/* Write the doorbell to UAR */
8859e39c5baSBill Taylor 	TAVOR_UAR_DOORBELL(state, (uint64_t *)&state->ts_uar->recv,
8869e39c5baSBill Taylor 	    doorbell);
8879e39c5baSBill Taylor }
8889e39c5baSBill Taylor 
8899e39c5baSBill Taylor 
8909e39c5baSBill Taylor /*
8919e39c5baSBill Taylor  * tavor_wqe_send_build()
8929e39c5baSBill Taylor  *    Context: Can be called from interrupt or base context.
8939e39c5baSBill Taylor  */
8949e39c5baSBill Taylor static int
tavor_wqe_send_build(tavor_state_t * state,tavor_qphdl_t qp,ibt_send_wr_t * wr,uint64_t * desc,uint_t * size)8959e39c5baSBill Taylor tavor_wqe_send_build(tavor_state_t *state, tavor_qphdl_t qp,
8969e39c5baSBill Taylor     ibt_send_wr_t *wr, uint64_t *desc, uint_t *size)
8979e39c5baSBill Taylor {
8989e39c5baSBill Taylor 	tavor_hw_snd_wqe_ud_t		*ud;
8999e39c5baSBill Taylor 	tavor_hw_snd_wqe_remaddr_t	*rc;
9009e39c5baSBill Taylor 	tavor_hw_snd_wqe_atomic_t	*at;
9019e39c5baSBill Taylor 	tavor_hw_snd_wqe_remaddr_t	*uc;
9029e39c5baSBill Taylor 	tavor_hw_snd_wqe_bind_t		*bn;
9039e39c5baSBill Taylor 	tavor_hw_wqe_sgl_t		*ds;
9049e39c5baSBill Taylor 	ibt_wr_ds_t			*sgl;
9059e39c5baSBill Taylor 	tavor_ahhdl_t			ah;
9069e39c5baSBill Taylor 	uint32_t			nds;
9079e39c5baSBill Taylor 	int				i, num_ds, status;
9089e39c5baSBill Taylor 
9099e39c5baSBill Taylor 	ASSERT(MUTEX_HELD(&qp->qp_lock));
9109e39c5baSBill Taylor 
9119e39c5baSBill Taylor 	/* Initialize the information for the Data Segments */
9129e39c5baSBill Taylor 	ds = (tavor_hw_wqe_sgl_t *)((uintptr_t)desc +
9139e39c5baSBill Taylor 	    sizeof (tavor_hw_snd_wqe_nextctrl_t));
9149e39c5baSBill Taylor 	nds = wr->wr_nds;
9159e39c5baSBill Taylor 	sgl = wr->wr_sgl;
9169e39c5baSBill Taylor 	num_ds = 0;
9179e39c5baSBill Taylor 
9189e39c5baSBill Taylor 	/*
9199e39c5baSBill Taylor 	 * Build a Send WQE depends first and foremost on the transport
9209e39c5baSBill Taylor 	 * type of Work Request (i.e. UD, RC, or UC)
9219e39c5baSBill Taylor 	 */
9229e39c5baSBill Taylor 	switch (wr->wr_trans) {
9239e39c5baSBill Taylor 	case IBT_UD_SRV:
9249e39c5baSBill Taylor 		/* Ensure that work request transport type matches QP type */
9259e39c5baSBill Taylor 		if (qp->qp_serv_type != TAVOR_QP_UD) {
9269e39c5baSBill Taylor 			return (IBT_QP_SRV_TYPE_INVALID);
9279e39c5baSBill Taylor 		}
9289e39c5baSBill Taylor 
9299e39c5baSBill Taylor 		/*
9309e39c5baSBill Taylor 		 * Validate the operation type.  For UD requests, only the
9319e39c5baSBill Taylor 		 * "Send" operation is valid
9329e39c5baSBill Taylor 		 */
9339e39c5baSBill Taylor 		if (wr->wr_opcode != IBT_WRC_SEND) {
9349e39c5baSBill Taylor 			return (IBT_QP_OP_TYPE_INVALID);
9359e39c5baSBill Taylor 		}
9369e39c5baSBill Taylor 
9379e39c5baSBill Taylor 		/*
9389e39c5baSBill Taylor 		 * If this is a Special QP (QP0 or QP1), then we need to
9399e39c5baSBill Taylor 		 * build MLX WQEs instead.  So jump to tavor_wqe_mlx_build()
9409e39c5baSBill Taylor 		 * and return whatever status it returns
9419e39c5baSBill Taylor 		 */
9429e39c5baSBill Taylor 		if (qp->qp_is_special) {
9439e39c5baSBill Taylor 			status = tavor_wqe_mlx_build(state, qp, wr, desc, size);
9449e39c5baSBill Taylor 			return (status);
9459e39c5baSBill Taylor 		}
9469e39c5baSBill Taylor 
9479e39c5baSBill Taylor 		/*
9489e39c5baSBill Taylor 		 * Otherwise, if this is a normal UD Send request, then fill
9499e39c5baSBill Taylor 		 * all the fields in the Tavor UD header for the WQE.  Note:
9509e39c5baSBill Taylor 		 * to do this we'll need to extract some information from the
9519e39c5baSBill Taylor 		 * Address Handle passed with the work request.
9529e39c5baSBill Taylor 		 */
9539e39c5baSBill Taylor 		ud = (tavor_hw_snd_wqe_ud_t *)((uintptr_t)desc +
9549e39c5baSBill Taylor 		    sizeof (tavor_hw_snd_wqe_nextctrl_t));
9559e39c5baSBill Taylor 		ah = (tavor_ahhdl_t)wr->wr.ud.udwr_dest->ud_ah;
9569e39c5baSBill Taylor 		if (ah == NULL) {
9579e39c5baSBill Taylor 			return (IBT_AH_HDL_INVALID);
9589e39c5baSBill Taylor 		}
9599e39c5baSBill Taylor 
9609e39c5baSBill Taylor 		/*
9619e39c5baSBill Taylor 		 * Build the Unreliable Datagram Segment for the WQE, using
9629e39c5baSBill Taylor 		 * the information from the address handle and the work
9639e39c5baSBill Taylor 		 * request.
9649e39c5baSBill Taylor 		 */
9659e39c5baSBill Taylor 		mutex_enter(&ah->ah_lock);
9669e39c5baSBill Taylor 		TAVOR_WQE_BUILD_UD(qp, ud, ah, wr);
9679e39c5baSBill Taylor 		mutex_exit(&ah->ah_lock);
9689e39c5baSBill Taylor 
9699e39c5baSBill Taylor 		/* Update "ds" for filling in Data Segments (below) */
9709e39c5baSBill Taylor 		ds = (tavor_hw_wqe_sgl_t *)((uintptr_t)ud +
9719e39c5baSBill Taylor 		    sizeof (tavor_hw_snd_wqe_ud_t));
9729e39c5baSBill Taylor 		break;
9739e39c5baSBill Taylor 
9749e39c5baSBill Taylor 	case IBT_RC_SRV:
9759e39c5baSBill Taylor 		/* Ensure that work request transport type matches QP type */
9769e39c5baSBill Taylor 		if (qp->qp_serv_type != TAVOR_QP_RC) {
9779e39c5baSBill Taylor 			return (IBT_QP_SRV_TYPE_INVALID);
9789e39c5baSBill Taylor 		}
9799e39c5baSBill Taylor 
9809e39c5baSBill Taylor 		/*
9819e39c5baSBill Taylor 		 * Validate the operation type.  For RC requests, we allow
9829e39c5baSBill Taylor 		 * "Send", "RDMA Read", "RDMA Write", various "Atomic"
9839e39c5baSBill Taylor 		 * operations, and memory window "Bind"
9849e39c5baSBill Taylor 		 */
9859e39c5baSBill Taylor 		if ((wr->wr_opcode != IBT_WRC_SEND) &&
9869e39c5baSBill Taylor 		    (wr->wr_opcode != IBT_WRC_RDMAR) &&
9879e39c5baSBill Taylor 		    (wr->wr_opcode != IBT_WRC_RDMAW) &&
9889e39c5baSBill Taylor 		    (wr->wr_opcode != IBT_WRC_CSWAP) &&
9899e39c5baSBill Taylor 		    (wr->wr_opcode != IBT_WRC_FADD) &&
9909e39c5baSBill Taylor 		    (wr->wr_opcode != IBT_WRC_BIND)) {
9919e39c5baSBill Taylor 			return (IBT_QP_OP_TYPE_INVALID);
9929e39c5baSBill Taylor 		}
9939e39c5baSBill Taylor 
9949e39c5baSBill Taylor 		/*
9959e39c5baSBill Taylor 		 * If this is a Send request, then all we need to do is break
9969e39c5baSBill Taylor 		 * out and here and begin the Data Segment processing below
9979e39c5baSBill Taylor 		 */
9989e39c5baSBill Taylor 		if (wr->wr_opcode == IBT_WRC_SEND) {
9999e39c5baSBill Taylor 			break;
10009e39c5baSBill Taylor 		}
10019e39c5baSBill Taylor 
10029e39c5baSBill Taylor 		/*
10039e39c5baSBill Taylor 		 * If this is an RDMA Read or RDMA Write request, then fill
10049e39c5baSBill Taylor 		 * in the "Remote Address" header fields.
10059e39c5baSBill Taylor 		 */
10069e39c5baSBill Taylor 		if ((wr->wr_opcode == IBT_WRC_RDMAR) ||
10079e39c5baSBill Taylor 		    (wr->wr_opcode == IBT_WRC_RDMAW)) {
10089e39c5baSBill Taylor 			rc = (tavor_hw_snd_wqe_remaddr_t *)((uintptr_t)desc +
10099e39c5baSBill Taylor 			    sizeof (tavor_hw_snd_wqe_nextctrl_t));
10109e39c5baSBill Taylor 
10119e39c5baSBill Taylor 			/*
10129e39c5baSBill Taylor 			 * Build the Remote Address Segment for the WQE, using
10139e39c5baSBill Taylor 			 * the information from the RC work request.
10149e39c5baSBill Taylor 			 */
10159e39c5baSBill Taylor 			TAVOR_WQE_BUILD_REMADDR(qp, rc, &wr->wr.rc.rcwr.rdma);
10169e39c5baSBill Taylor 
10179e39c5baSBill Taylor 			/* Update "ds" for filling in Data Segments (below) */
10189e39c5baSBill Taylor 			ds = (tavor_hw_wqe_sgl_t *)((uintptr_t)rc +
10199e39c5baSBill Taylor 			    sizeof (tavor_hw_snd_wqe_remaddr_t));
10209e39c5baSBill Taylor 			break;
10219e39c5baSBill Taylor 		}
10229e39c5baSBill Taylor 
10239e39c5baSBill Taylor 		/*
10249e39c5baSBill Taylor 		 * If this is one of the Atomic type operations (i.e
10259e39c5baSBill Taylor 		 * Compare-Swap or Fetch-Add), then fill in both the "Remote
10269e39c5baSBill Taylor 		 * Address" header fields and the "Atomic" header fields.
10279e39c5baSBill Taylor 		 */
10289e39c5baSBill Taylor 		if ((wr->wr_opcode == IBT_WRC_CSWAP) ||
10299e39c5baSBill Taylor 		    (wr->wr_opcode == IBT_WRC_FADD)) {
10309e39c5baSBill Taylor 			rc = (tavor_hw_snd_wqe_remaddr_t *)((uintptr_t)desc +
10319e39c5baSBill Taylor 			    sizeof (tavor_hw_snd_wqe_nextctrl_t));
10329e39c5baSBill Taylor 			at = (tavor_hw_snd_wqe_atomic_t *)((uintptr_t)rc +
10339e39c5baSBill Taylor 			    sizeof (tavor_hw_snd_wqe_remaddr_t));
10349e39c5baSBill Taylor 
10359e39c5baSBill Taylor 			/*
10369e39c5baSBill Taylor 			 * Build the Remote Address and Atomic Segments for
10379e39c5baSBill Taylor 			 * the WQE, using the information from the RC Atomic
10389e39c5baSBill Taylor 			 * work request.
10399e39c5baSBill Taylor 			 */
10409e39c5baSBill Taylor 			TAVOR_WQE_BUILD_RC_ATOMIC_REMADDR(qp, rc, wr);
10419e39c5baSBill Taylor 			TAVOR_WQE_BUILD_ATOMIC(qp, at, wr->wr.rc.rcwr.atomic);
10429e39c5baSBill Taylor 
10439e39c5baSBill Taylor 			/* Update "ds" for filling in Data Segments (below) */
10449e39c5baSBill Taylor 			ds = (tavor_hw_wqe_sgl_t *)((uintptr_t)at +
10459e39c5baSBill Taylor 			    sizeof (tavor_hw_snd_wqe_atomic_t));
10469e39c5baSBill Taylor 
10479e39c5baSBill Taylor 			/*
10489e39c5baSBill Taylor 			 * Update "nds" and "sgl" because Atomic requests have
10499e39c5baSBill Taylor 			 * only a single Data Segment (and they are encoded
10509e39c5baSBill Taylor 			 * somewhat differently in the work request.
10519e39c5baSBill Taylor 			 */
10529e39c5baSBill Taylor 			nds = 1;
10539e39c5baSBill Taylor 			sgl = wr->wr_sgl;
10549e39c5baSBill Taylor 			break;
10559e39c5baSBill Taylor 		}
10569e39c5baSBill Taylor 
10579e39c5baSBill Taylor 		/*
10589e39c5baSBill Taylor 		 * If this is memory window Bind operation, then we call the
10599e39c5baSBill Taylor 		 * tavor_wr_bind_check() routine to validate the request and
10609e39c5baSBill Taylor 		 * to generate the updated RKey.  If this is successful, then
10619e39c5baSBill Taylor 		 * we fill in the WQE's "Bind" header fields.
10629e39c5baSBill Taylor 		 */
10639e39c5baSBill Taylor 		if (wr->wr_opcode == IBT_WRC_BIND) {
10649e39c5baSBill Taylor 			status = tavor_wr_bind_check(state, wr);
10659e39c5baSBill Taylor 			if (status != DDI_SUCCESS) {
10669e39c5baSBill Taylor 				return (status);
10679e39c5baSBill Taylor 			}
10689e39c5baSBill Taylor 
10699e39c5baSBill Taylor 			bn = (tavor_hw_snd_wqe_bind_t *)((uintptr_t)desc +
10709e39c5baSBill Taylor 			    sizeof (tavor_hw_snd_wqe_nextctrl_t));
10719e39c5baSBill Taylor 
10729e39c5baSBill Taylor 			/*
10739e39c5baSBill Taylor 			 * Build the Bind Memory Window Segments for the WQE,
10749e39c5baSBill Taylor 			 * using the information from the RC Bind memory
10759e39c5baSBill Taylor 			 * window work request.
10769e39c5baSBill Taylor 			 */
10779e39c5baSBill Taylor 			TAVOR_WQE_BUILD_BIND(qp, bn, wr->wr.rc.rcwr.bind);
10789e39c5baSBill Taylor 
10799e39c5baSBill Taylor 			/*
10809e39c5baSBill Taylor 			 * Update the "ds" pointer.  Even though the "bind"
10819e39c5baSBill Taylor 			 * operation requires no SGLs, this is necessary to
10829e39c5baSBill Taylor 			 * facilitate the correct descriptor size calculations
10839e39c5baSBill Taylor 			 * (below).
10849e39c5baSBill Taylor 			 */
10859e39c5baSBill Taylor 			ds = (tavor_hw_wqe_sgl_t *)((uintptr_t)bn +
10869e39c5baSBill Taylor 			    sizeof (tavor_hw_snd_wqe_bind_t));
10879e39c5baSBill Taylor 			nds = 0;
10889e39c5baSBill Taylor 		}
10899e39c5baSBill Taylor 		break;
10909e39c5baSBill Taylor 
10919e39c5baSBill Taylor 	case IBT_UC_SRV:
10929e39c5baSBill Taylor 		/* Ensure that work request transport type matches QP type */
10939e39c5baSBill Taylor 		if (qp->qp_serv_type != TAVOR_QP_UC) {
10949e39c5baSBill Taylor 			return (IBT_QP_SRV_TYPE_INVALID);
10959e39c5baSBill Taylor 		}
10969e39c5baSBill Taylor 
10979e39c5baSBill Taylor 		/*
10989e39c5baSBill Taylor 		 * Validate the operation type.  For UC requests, we only
10999e39c5baSBill Taylor 		 * allow "Send", "RDMA Write", and memory window "Bind".
11009e39c5baSBill Taylor 		 * Note: Unlike RC, UC does not allow "RDMA Read" or "Atomic"
11019e39c5baSBill Taylor 		 * operations
11029e39c5baSBill Taylor 		 */
11039e39c5baSBill Taylor 		if ((wr->wr_opcode != IBT_WRC_SEND) &&
11049e39c5baSBill Taylor 		    (wr->wr_opcode != IBT_WRC_RDMAW) &&
11059e39c5baSBill Taylor 		    (wr->wr_opcode != IBT_WRC_BIND)) {
11069e39c5baSBill Taylor 			return (IBT_QP_OP_TYPE_INVALID);
11079e39c5baSBill Taylor 		}
11089e39c5baSBill Taylor 
11099e39c5baSBill Taylor 		/*
11109e39c5baSBill Taylor 		 * If this is a Send request, then all we need to do is break
11119e39c5baSBill Taylor 		 * out and here and begin the Data Segment processing below
11129e39c5baSBill Taylor 		 */
11139e39c5baSBill Taylor 		if (wr->wr_opcode == IBT_WRC_SEND) {
11149e39c5baSBill Taylor 			break;
11159e39c5baSBill Taylor 		}
11169e39c5baSBill Taylor 
11179e39c5baSBill Taylor 		/*
11189e39c5baSBill Taylor 		 * If this is an RDMA Write request, then fill in the "Remote
11199e39c5baSBill Taylor 		 * Address" header fields.
11209e39c5baSBill Taylor 		 */
11219e39c5baSBill Taylor 		if (wr->wr_opcode == IBT_WRC_RDMAW) {
11229e39c5baSBill Taylor 			uc = (tavor_hw_snd_wqe_remaddr_t *)((uintptr_t)desc +
11239e39c5baSBill Taylor 			    sizeof (tavor_hw_snd_wqe_nextctrl_t));
11249e39c5baSBill Taylor 
11259e39c5baSBill Taylor 			/*
11269e39c5baSBill Taylor 			 * Build the Remote Address Segment for the WQE, using
11279e39c5baSBill Taylor 			 * the information from the UC work request.
11289e39c5baSBill Taylor 			 */
11299e39c5baSBill Taylor 			TAVOR_WQE_BUILD_REMADDR(qp, uc, &wr->wr.uc.ucwr.rdma);
11309e39c5baSBill Taylor 
11319e39c5baSBill Taylor 			/* Update "ds" for filling in Data Segments (below) */
11329e39c5baSBill Taylor 			ds = (tavor_hw_wqe_sgl_t *)((uintptr_t)uc +
11339e39c5baSBill Taylor 			    sizeof (tavor_hw_snd_wqe_remaddr_t));
11349e39c5baSBill Taylor 			break;
11359e39c5baSBill Taylor 		}
11369e39c5baSBill Taylor 
11379e39c5baSBill Taylor 		/*
11389e39c5baSBill Taylor 		 * If this is memory window Bind operation, then we call the
11399e39c5baSBill Taylor 		 * tavor_wr_bind_check() routine to validate the request and
11409e39c5baSBill Taylor 		 * to generate the updated RKey.  If this is successful, then
11419e39c5baSBill Taylor 		 * we fill in the WQE's "Bind" header fields.
11429e39c5baSBill Taylor 		 */
11439e39c5baSBill Taylor 		if (wr->wr_opcode == IBT_WRC_BIND) {
11449e39c5baSBill Taylor 			status = tavor_wr_bind_check(state, wr);
11459e39c5baSBill Taylor 			if (status != DDI_SUCCESS) {
11469e39c5baSBill Taylor 				return (status);
11479e39c5baSBill Taylor 			}
11489e39c5baSBill Taylor 
11499e39c5baSBill Taylor 			bn = (tavor_hw_snd_wqe_bind_t *)((uintptr_t)desc +
11509e39c5baSBill Taylor 			    sizeof (tavor_hw_snd_wqe_nextctrl_t));
11519e39c5baSBill Taylor 
11529e39c5baSBill Taylor 			/*
11539e39c5baSBill Taylor 			 * Build the Bind Memory Window Segments for the WQE,
11549e39c5baSBill Taylor 			 * using the information from the UC Bind memory
11559e39c5baSBill Taylor 			 * window work request.
11569e39c5baSBill Taylor 			 */
11579e39c5baSBill Taylor 			TAVOR_WQE_BUILD_BIND(qp, bn, wr->wr.uc.ucwr.bind);
11589e39c5baSBill Taylor 
11599e39c5baSBill Taylor 			/*
11609e39c5baSBill Taylor 			 * Update the "ds" pointer.  Even though the "bind"
11619e39c5baSBill Taylor 			 * operation requires no SGLs, this is necessary to
11629e39c5baSBill Taylor 			 * facilitate the correct descriptor size calculations
11639e39c5baSBill Taylor 			 * (below).
11649e39c5baSBill Taylor 			 */
11659e39c5baSBill Taylor 			ds = (tavor_hw_wqe_sgl_t *)((uintptr_t)bn +
11669e39c5baSBill Taylor 			    sizeof (tavor_hw_snd_wqe_bind_t));
11679e39c5baSBill Taylor 			nds = 0;
11689e39c5baSBill Taylor 		}
11699e39c5baSBill Taylor 		break;
11709e39c5baSBill Taylor 
11719e39c5baSBill Taylor 	default:
11729e39c5baSBill Taylor 		return (IBT_QP_SRV_TYPE_INVALID);
11739e39c5baSBill Taylor 	}
11749e39c5baSBill Taylor 
11759e39c5baSBill Taylor 	/*
11769e39c5baSBill Taylor 	 * Now fill in the Data Segments (SGL) for the Send WQE based on
11779e39c5baSBill Taylor 	 * the values setup above (i.e. "sgl", "nds", and the "ds" pointer
11789e39c5baSBill Taylor 	 * Start by checking for a valid number of SGL entries
11799e39c5baSBill Taylor 	 */
11809e39c5baSBill Taylor 	if (nds > qp->qp_sq_sgl) {
11819e39c5baSBill Taylor 		return (IBT_QP_SGL_LEN_INVALID);
11829e39c5baSBill Taylor 	}
11839e39c5baSBill Taylor 
11849e39c5baSBill Taylor 	/*
11859e39c5baSBill Taylor 	 * For each SGL in the Send Work Request, fill in the Send WQE's data
11869e39c5baSBill Taylor 	 * segments.  Note: We skip any SGL with zero size because Tavor
11879e39c5baSBill Taylor 	 * hardware cannot handle a zero for "byte_cnt" in the WQE.  Actually
11889e39c5baSBill Taylor 	 * the encoding for zero means a 2GB transfer.  Because of this special
11899e39c5baSBill Taylor 	 * encoding in the hardware, we mask the requested length with
11909e39c5baSBill Taylor 	 * TAVOR_WQE_SGL_BYTE_CNT_MASK (so that 2GB will end up encoded as
11919e39c5baSBill Taylor 	 * zero.)
11929e39c5baSBill Taylor 	 */
11939e39c5baSBill Taylor 	for (i = 0; i < nds; i++) {
11949e39c5baSBill Taylor 		if (sgl[i].ds_len == 0) {
11959e39c5baSBill Taylor 			continue;
11969e39c5baSBill Taylor 		}
11979e39c5baSBill Taylor 
11989e39c5baSBill Taylor 		/*
11999e39c5baSBill Taylor 		 * Fill in the Data Segment(s) for the current WQE, using the
12009e39c5baSBill Taylor 		 * information contained in the scatter-gather list of the
12019e39c5baSBill Taylor 		 * work request.
12029e39c5baSBill Taylor 		 */
12039e39c5baSBill Taylor 		TAVOR_WQE_BUILD_DATA_SEG(qp, &ds[num_ds], &sgl[i]);
12049e39c5baSBill Taylor 		num_ds++;
12059e39c5baSBill Taylor 	}
12069e39c5baSBill Taylor 
12079e39c5baSBill Taylor 	/* Return the size of descriptor (in 16-byte chunks) */
12089e39c5baSBill Taylor 	*size = ((uintptr_t)&ds[num_ds] - (uintptr_t)desc) >> 4;
12099e39c5baSBill Taylor 
12109e39c5baSBill Taylor 	return (DDI_SUCCESS);
12119e39c5baSBill Taylor }
12129e39c5baSBill Taylor 
12139e39c5baSBill Taylor 
12149e39c5baSBill Taylor /*
12159e39c5baSBill Taylor  * tavor_wqe_send_linknext()
12169e39c5baSBill Taylor  *    Context: Can be called from interrupt or base context.
12179e39c5baSBill Taylor  */
12189e39c5baSBill Taylor static void
tavor_wqe_send_linknext(ibt_send_wr_t * curr_wr,ibt_send_wr_t * prev_wr,uint64_t * curr_desc,uint_t curr_descsz,uint64_t * prev_desc,tavor_sw_wqe_dbinfo_t * dbinfo,tavor_qphdl_t qp)12199e39c5baSBill Taylor tavor_wqe_send_linknext(ibt_send_wr_t *curr_wr, ibt_send_wr_t *prev_wr,
12209e39c5baSBill Taylor     uint64_t *curr_desc, uint_t curr_descsz, uint64_t *prev_desc,
12219e39c5baSBill Taylor     tavor_sw_wqe_dbinfo_t *dbinfo, tavor_qphdl_t qp)
12229e39c5baSBill Taylor {
12239e39c5baSBill Taylor 	uint64_t	next, ctrl;
12249e39c5baSBill Taylor 	uint32_t	nopcode, fence;
12259e39c5baSBill Taylor 
12269e39c5baSBill Taylor 	/*
12279e39c5baSBill Taylor 	 * Calculate the "next" field of the descriptor.  This amounts to
12289e39c5baSBill Taylor 	 * setting up the "next_wqe_addr", "nopcode", "fence", and "nds"
12299e39c5baSBill Taylor 	 * fields (see tavor_hw.h for more).  Note:  If there is no next
12309e39c5baSBill Taylor 	 * descriptor (i.e. if the current descriptor is the last WQE on
12319e39c5baSBill Taylor 	 * the chain), then set "next" to zero.
12329e39c5baSBill Taylor 	 */
12339e39c5baSBill Taylor 	if (curr_desc != NULL) {
12349e39c5baSBill Taylor 		/*
12359e39c5baSBill Taylor 		 * Determine the value for the Tavor WQE "nopcode" field
12369e39c5baSBill Taylor 		 * by using the IBTF opcode from the work request
12379e39c5baSBill Taylor 		 */
12389e39c5baSBill Taylor 		switch (curr_wr->wr_opcode) {
12399e39c5baSBill Taylor 		case IBT_WRC_RDMAW:
12409e39c5baSBill Taylor 			if (curr_wr->wr_flags & IBT_WR_SEND_IMMED) {
12419e39c5baSBill Taylor 				nopcode = TAVOR_WQE_SEND_NOPCODE_RDMAWI;
12429e39c5baSBill Taylor 			} else {
12439e39c5baSBill Taylor 				nopcode = TAVOR_WQE_SEND_NOPCODE_RDMAW;
12449e39c5baSBill Taylor 			}
12459e39c5baSBill Taylor 			break;
12469e39c5baSBill Taylor 
12479e39c5baSBill Taylor 		case IBT_WRC_SEND:
12489e39c5baSBill Taylor 			if (curr_wr->wr_flags & IBT_WR_SEND_IMMED) {
12499e39c5baSBill Taylor 				nopcode = TAVOR_WQE_SEND_NOPCODE_SENDI;
12509e39c5baSBill Taylor 			} else {
12519e39c5baSBill Taylor 				nopcode = TAVOR_WQE_SEND_NOPCODE_SEND;
12529e39c5baSBill Taylor 			}
12539e39c5baSBill Taylor 			break;
12549e39c5baSBill Taylor 
12559e39c5baSBill Taylor 		case IBT_WRC_RDMAR:
12569e39c5baSBill Taylor 			nopcode = TAVOR_WQE_SEND_NOPCODE_RDMAR;
12579e39c5baSBill Taylor 			break;
12589e39c5baSBill Taylor 
12599e39c5baSBill Taylor 		case IBT_WRC_CSWAP:
12609e39c5baSBill Taylor 			nopcode = TAVOR_WQE_SEND_NOPCODE_ATMCS;
12619e39c5baSBill Taylor 			break;
12629e39c5baSBill Taylor 
12639e39c5baSBill Taylor 		case IBT_WRC_FADD:
12649e39c5baSBill Taylor 			nopcode = TAVOR_WQE_SEND_NOPCODE_ATMFA;
12659e39c5baSBill Taylor 			break;
12669e39c5baSBill Taylor 
12679e39c5baSBill Taylor 		case IBT_WRC_BIND:
12689e39c5baSBill Taylor 			nopcode = TAVOR_WQE_SEND_NOPCODE_BIND;
12699e39c5baSBill Taylor 			break;
12709e39c5baSBill Taylor 		}
12719e39c5baSBill Taylor 
12729e39c5baSBill Taylor 		curr_desc = (uint64_t *)(uintptr_t)((uintptr_t)curr_desc
12739e39c5baSBill Taylor 		    - qp->qp_desc_off);
12749e39c5baSBill Taylor 		next  = ((uint64_t)(uintptr_t)curr_desc &
12759e39c5baSBill Taylor 		    TAVOR_WQE_NDA_MASK) << 32;
12769e39c5baSBill Taylor 		next  = next | ((uint64_t)nopcode << 32);
12779e39c5baSBill Taylor 		fence = (curr_wr->wr_flags & IBT_WR_SEND_FENCE) ? 1 : 0;
12789e39c5baSBill Taylor 		if (fence) {
12799e39c5baSBill Taylor 			next = next | TAVOR_WQE_SEND_FENCE_MASK;
12809e39c5baSBill Taylor 		}
12819e39c5baSBill Taylor 		next = next | (curr_descsz & TAVOR_WQE_NDS_MASK);
12829e39c5baSBill Taylor 
12839e39c5baSBill Taylor 		/*
12849e39c5baSBill Taylor 		 * If a send queue doorbell will be rung for the next
12859e39c5baSBill Taylor 		 * WQE on the chain, then set the current WQE's "dbd" bit.
12869e39c5baSBill Taylor 		 * Note: We also update the "dbinfo" structure here to pass
12879e39c5baSBill Taylor 		 * back information about what should (later) be included
12889e39c5baSBill Taylor 		 * in the send queue doorbell.
12899e39c5baSBill Taylor 		 */
12909e39c5baSBill Taylor 		if (dbinfo) {
12919e39c5baSBill Taylor 			next = next | TAVOR_WQE_DBD_MASK;
12929e39c5baSBill Taylor 			dbinfo->db_nopcode = nopcode;
12939e39c5baSBill Taylor 			dbinfo->db_fence   = fence;
12949e39c5baSBill Taylor 		}
12959e39c5baSBill Taylor 	} else {
12969e39c5baSBill Taylor 		next = 0;
12979e39c5baSBill Taylor 	}
12989e39c5baSBill Taylor 
12999e39c5baSBill Taylor 	/*
13009e39c5baSBill Taylor 	 * If this WQE is supposed to be linked to the previous descriptor,
13019e39c5baSBill Taylor 	 * then we need to update not only the previous WQE's "next" fields
13029e39c5baSBill Taylor 	 * but we must also update this WQE's "ctrl" fields (i.e. the "c", "e",
13039e39c5baSBill Taylor 	 * "s", "i" and "immediate" fields - see tavor_hw.h for more).  Note:
13049e39c5baSBill Taylor 	 * the "e" bit is always hardcoded to zero.
13059e39c5baSBill Taylor 	 */
13069e39c5baSBill Taylor 	if (prev_desc != NULL) {
13079e39c5baSBill Taylor 		/*
13089e39c5baSBill Taylor 		 * If a send queue doorbell will be rung for the next WQE on
13099e39c5baSBill Taylor 		 * the chain, then update the current WQE's "next" field and
13109e39c5baSBill Taylor 		 * return.
13119e39c5baSBill Taylor 		 * Note: We don't want to modify the "ctrl" field here because
13129e39c5baSBill Taylor 		 * that portion of the previous WQE has already been set
13139e39c5baSBill Taylor 		 * correctly at some previous point in time.
13149e39c5baSBill Taylor 		 */
13159e39c5baSBill Taylor 		if (dbinfo) {
13169e39c5baSBill Taylor 			TAVOR_WQE_LINKFIRST(qp, prev_desc, next);
13179e39c5baSBill Taylor 			return;
13189e39c5baSBill Taylor 		}
13199e39c5baSBill Taylor 
13209e39c5baSBill Taylor 		ctrl = 0;
13219e39c5baSBill Taylor 
13229e39c5baSBill Taylor 		/* Set the "c" (i.e. "signaled") bit appropriately */
13239e39c5baSBill Taylor 		if (prev_wr->wr_flags & IBT_WR_SEND_SIGNAL) {
13249e39c5baSBill Taylor 			ctrl = ctrl | TAVOR_WQE_SEND_SIGNALED_MASK;
13259e39c5baSBill Taylor 		}
13269e39c5baSBill Taylor 
13279e39c5baSBill Taylor 		/* Set the "s" (i.e. "solicited") bit appropriately */
13289e39c5baSBill Taylor 		if (prev_wr->wr_flags & IBT_WR_SEND_SOLICIT) {
13299e39c5baSBill Taylor 			ctrl = ctrl | TAVOR_WQE_SEND_SOLICIT_MASK;
13309e39c5baSBill Taylor 		}
13319e39c5baSBill Taylor 
13329e39c5baSBill Taylor 		/* Set the "i" bit and the immediate data appropriately */
13339e39c5baSBill Taylor 		if (prev_wr->wr_flags & IBT_WR_SEND_IMMED) {
13349e39c5baSBill Taylor 			ctrl = ctrl | TAVOR_WQE_SEND_IMMEDIATE_MASK;
13359e39c5baSBill Taylor 			ctrl = ctrl | tavor_wr_get_immediate(prev_wr);
13369e39c5baSBill Taylor 		}
13379e39c5baSBill Taylor 
13389e39c5baSBill Taylor 		TAVOR_WQE_LINKNEXT(qp, prev_desc, ctrl, next);
13399e39c5baSBill Taylor 	}
13409e39c5baSBill Taylor }
13419e39c5baSBill Taylor 
13429e39c5baSBill Taylor 
13439e39c5baSBill Taylor /*
13449e39c5baSBill Taylor  * tavor_wqe_mlx_build()
13459e39c5baSBill Taylor  *    Context: Can be called from interrupt or base context.
13469e39c5baSBill Taylor  */
13479e39c5baSBill Taylor static int
tavor_wqe_mlx_build(tavor_state_t * state,tavor_qphdl_t qp,ibt_send_wr_t * wr,uint64_t * desc,uint_t * size)13489e39c5baSBill Taylor tavor_wqe_mlx_build(tavor_state_t *state, tavor_qphdl_t qp,
13499e39c5baSBill Taylor     ibt_send_wr_t *wr, uint64_t *desc, uint_t *size)
13509e39c5baSBill Taylor {
13519e39c5baSBill Taylor 	tavor_hw_udav_t		udav;
13529e39c5baSBill Taylor 	tavor_ahhdl_t		ah;
13539e39c5baSBill Taylor 	ib_lrh_hdr_t		*lrh;
13549e39c5baSBill Taylor 	ib_grh_t		*grh;
13559e39c5baSBill Taylor 	ib_bth_hdr_t		*bth;
13569e39c5baSBill Taylor 	ib_deth_hdr_t		*deth;
13579e39c5baSBill Taylor 	tavor_hw_wqe_sgl_t	*ds;
13589e39c5baSBill Taylor 	ibt_wr_ds_t		*sgl;
13599e39c5baSBill Taylor 	uint8_t			*mgmtclass, *hpoint, *hcount;
13609e39c5baSBill Taylor 	uint64_t		data;
13619e39c5baSBill Taylor 	uint32_t		nds, offset, pktlen;
13629e39c5baSBill Taylor 	uint32_t		desc_sz, udav_sz;
13639e39c5baSBill Taylor 	int			i, num_ds;
13649e39c5baSBill Taylor 
13659e39c5baSBill Taylor 	ASSERT(MUTEX_HELD(&qp->qp_lock));
13669e39c5baSBill Taylor 
13679e39c5baSBill Taylor 	/* Initialize the information for the Data Segments */
13689e39c5baSBill Taylor 	ds = (tavor_hw_wqe_sgl_t *)((uintptr_t)desc +
13699e39c5baSBill Taylor 	    sizeof (tavor_hw_mlx_wqe_nextctrl_t));
13709e39c5baSBill Taylor 
13719e39c5baSBill Taylor 	/*
13729e39c5baSBill Taylor 	 * Pull the address handle from the work request and read in
13739e39c5baSBill Taylor 	 * the contents of the UDAV.  This will be used to answer some
13749e39c5baSBill Taylor 	 * questions about the request.
13759e39c5baSBill Taylor 	 */
13769e39c5baSBill Taylor 	ah = (tavor_ahhdl_t)wr->wr.ud.udwr_dest->ud_ah;
13779e39c5baSBill Taylor 	if (ah == NULL) {
13789e39c5baSBill Taylor 		return (IBT_AH_HDL_INVALID);
13799e39c5baSBill Taylor 	}
13809e39c5baSBill Taylor 	mutex_enter(&ah->ah_lock);
13819e39c5baSBill Taylor 	udav_sz = sizeof (tavor_hw_udav_t) >> 3;
13829e39c5baSBill Taylor 	for (i = 0; i < udav_sz; i++) {
13839e39c5baSBill Taylor 		data = ddi_get64(ah->ah_udavrsrcp->tr_acchdl,
13849e39c5baSBill Taylor 		    ((uint64_t *)ah->ah_udavrsrcp->tr_addr + i));
13859e39c5baSBill Taylor 		((uint64_t *)&udav)[i] = data;
13869e39c5baSBill Taylor 	}
13879e39c5baSBill Taylor 	mutex_exit(&ah->ah_lock);
13889e39c5baSBill Taylor 
13899e39c5baSBill Taylor 	/*
13909e39c5baSBill Taylor 	 * If the request is for QP1 and the destination LID is equal to
13919e39c5baSBill Taylor 	 * the Permissive LID, then return an error.  This combination is
13929e39c5baSBill Taylor 	 * not allowed
13939e39c5baSBill Taylor 	 */
13949e39c5baSBill Taylor 	if ((udav.rlid == IB_LID_PERMISSIVE) &&
13959e39c5baSBill Taylor 	    (qp->qp_is_special == TAVOR_QP_GSI)) {
13969e39c5baSBill Taylor 		return (IBT_AH_HDL_INVALID);
13979e39c5baSBill Taylor 	}
13989e39c5baSBill Taylor 
13999e39c5baSBill Taylor 	/*
14009e39c5baSBill Taylor 	 * Calculate the size of the packet headers, including the GRH
14019e39c5baSBill Taylor 	 * (if necessary)
14029e39c5baSBill Taylor 	 */
14039e39c5baSBill Taylor 	desc_sz = sizeof (ib_lrh_hdr_t) + sizeof (ib_bth_hdr_t) +
14049e39c5baSBill Taylor 	    sizeof (ib_deth_hdr_t);
14059e39c5baSBill Taylor 	if (udav.grh) {
14069e39c5baSBill Taylor 		desc_sz += sizeof (ib_grh_t);
14079e39c5baSBill Taylor 	}
14089e39c5baSBill Taylor 
14099e39c5baSBill Taylor 	/*
14109e39c5baSBill Taylor 	 * Begin to build the first "inline" data segment for the packet
14119e39c5baSBill Taylor 	 * headers.  Note:  By specifying "inline" we can build the contents
14129e39c5baSBill Taylor 	 * of the MAD packet headers directly into the work queue (as part
14139e39c5baSBill Taylor 	 * descriptor).  This has the advantage of both speeding things up
14149e39c5baSBill Taylor 	 * and of not requiring the driver to allocate/register any additional
14159e39c5baSBill Taylor 	 * memory for the packet headers.
14169e39c5baSBill Taylor 	 */
14179e39c5baSBill Taylor 	TAVOR_WQE_BUILD_INLINE(qp, &ds[0], desc_sz);
14189e39c5baSBill Taylor 	desc_sz += 4;
14199e39c5baSBill Taylor 
14209e39c5baSBill Taylor 	/*
14219e39c5baSBill Taylor 	 * Build Local Route Header (LRH)
14229e39c5baSBill Taylor 	 *    We start here by building the LRH into a temporary location.
14239e39c5baSBill Taylor 	 *    When we have finished we copy the LRH data into the descriptor.
14249e39c5baSBill Taylor 	 *
14259e39c5baSBill Taylor 	 *    Notice that the VL values are hardcoded.  This is not a problem
14269e39c5baSBill Taylor 	 *    because VL15 is decided later based on the value in the MLX
14279e39c5baSBill Taylor 	 *    transport "next/ctrl" header (see the "vl15" bit below), and it
14289e39c5baSBill Taylor 	 *    is otherwise (meaning for QP1) chosen from the SL-to-VL table
14299e39c5baSBill Taylor 	 *    values.  This rule does not hold for loopback packets however
14309e39c5baSBill Taylor 	 *    (all of which bypass the SL-to-VL tables) and it is the reason
14319e39c5baSBill Taylor 	 *    that non-QP0 MADs are setup with VL hardcoded to zero below.
14329e39c5baSBill Taylor 	 *
14339e39c5baSBill Taylor 	 *    Notice also that Source LID is hardcoded to the Permissive LID
14349e39c5baSBill Taylor 	 *    (0xFFFF).  This is also not a problem because if the Destination
14359e39c5baSBill Taylor 	 *    LID is not the Permissive LID, then the "slr" value in the MLX
14369e39c5baSBill Taylor 	 *    transport "next/ctrl" header will be set to zero and the hardware
14379e39c5baSBill Taylor 	 *    will pull the LID from value in the port.
14389e39c5baSBill Taylor 	 */
14399e39c5baSBill Taylor 	lrh = (ib_lrh_hdr_t *)((uintptr_t)&ds[0] + 4);
14409e39c5baSBill Taylor 	pktlen = (desc_sz + 0x100) >> 2;
14419e39c5baSBill Taylor 	TAVOR_WQE_BUILD_MLX_LRH(lrh, qp, udav, pktlen);
14429e39c5baSBill Taylor 
14439e39c5baSBill Taylor 	/*
14449e39c5baSBill Taylor 	 * Build Global Route Header (GRH)
14459e39c5baSBill Taylor 	 *    This is only built if necessary as defined by the "grh" bit in
14469e39c5baSBill Taylor 	 *    the address vector.  Note:  We also calculate the offset to the
14479e39c5baSBill Taylor 	 *    next header (BTH) based on whether or not the "grh" bit is set.
14489e39c5baSBill Taylor 	 */
14499e39c5baSBill Taylor 	if (udav.grh) {
14509e39c5baSBill Taylor 		/*
14519e39c5baSBill Taylor 		 * If the request is for QP0, then return an error.  The
14529e39c5baSBill Taylor 		 * combination of global routine (GRH) and QP0 is not allowed.
14539e39c5baSBill Taylor 		 */
14549e39c5baSBill Taylor 		if (qp->qp_is_special == TAVOR_QP_SMI) {
14559e39c5baSBill Taylor 			return (IBT_AH_HDL_INVALID);
14569e39c5baSBill Taylor 		}
14579e39c5baSBill Taylor 		grh = (ib_grh_t *)((uintptr_t)lrh + sizeof (ib_lrh_hdr_t));
14589e39c5baSBill Taylor 		TAVOR_WQE_BUILD_MLX_GRH(state, grh, qp, udav, pktlen);
14599e39c5baSBill Taylor 
14609e39c5baSBill Taylor 		bth = (ib_bth_hdr_t *)((uintptr_t)grh + sizeof (ib_grh_t));
14619e39c5baSBill Taylor 	} else {
14629e39c5baSBill Taylor 		bth = (ib_bth_hdr_t *)((uintptr_t)lrh + sizeof (ib_lrh_hdr_t));
14639e39c5baSBill Taylor 	}
14649e39c5baSBill Taylor 
14659e39c5baSBill Taylor 
14669e39c5baSBill Taylor 	/*
14679e39c5baSBill Taylor 	 * Build Base Transport Header (BTH)
14689e39c5baSBill Taylor 	 *    Notice that the M, PadCnt, and TVer fields are all set
14699e39c5baSBill Taylor 	 *    to zero implicitly.  This is true for all Management Datagrams
14709e39c5baSBill Taylor 	 *    MADs whether GSI are SMI.
14719e39c5baSBill Taylor 	 */
14729e39c5baSBill Taylor 	TAVOR_WQE_BUILD_MLX_BTH(state, bth, qp, wr);
14739e39c5baSBill Taylor 
14749e39c5baSBill Taylor 	/*
14759e39c5baSBill Taylor 	 * Build Datagram Extended Transport Header (DETH)
14769e39c5baSBill Taylor 	 */
14779e39c5baSBill Taylor 	deth = (ib_deth_hdr_t *)((uintptr_t)bth + sizeof (ib_bth_hdr_t));
14789e39c5baSBill Taylor 	TAVOR_WQE_BUILD_MLX_DETH(deth, qp);
14799e39c5baSBill Taylor 
14809e39c5baSBill Taylor 	/* Ensure that the Data Segment is aligned on a 16-byte boundary */
14819e39c5baSBill Taylor 	ds = (tavor_hw_wqe_sgl_t *)((uintptr_t)deth + sizeof (ib_deth_hdr_t));
14829e39c5baSBill Taylor 	ds = (tavor_hw_wqe_sgl_t *)(((uintptr_t)ds + 0xF) & ~0xF);
14839e39c5baSBill Taylor 	nds = wr->wr_nds;
14849e39c5baSBill Taylor 	sgl = wr->wr_sgl;
14859e39c5baSBill Taylor 	num_ds = 0;
14869e39c5baSBill Taylor 
14879e39c5baSBill Taylor 	/*
14889e39c5baSBill Taylor 	 * Now fill in the Data Segments (SGL) for the MLX WQE based on the
14899e39c5baSBill Taylor 	 * values set up above (i.e. "sgl", "nds", and the "ds" pointer
14909e39c5baSBill Taylor 	 * Start by checking for a valid number of SGL entries
14919e39c5baSBill Taylor 	 */
14929e39c5baSBill Taylor 	if (nds > qp->qp_sq_sgl) {
14939e39c5baSBill Taylor 		return (IBT_QP_SGL_LEN_INVALID);
14949e39c5baSBill Taylor 	}
14959e39c5baSBill Taylor 
14969e39c5baSBill Taylor 	/*
14979e39c5baSBill Taylor 	 * For each SGL in the Send Work Request, fill in the MLX WQE's data
14989e39c5baSBill Taylor 	 * segments.  Note: We skip any SGL with zero size because Tavor
14999e39c5baSBill Taylor 	 * hardware cannot handle a zero for "byte_cnt" in the WQE.  Actually
15009e39c5baSBill Taylor 	 * the encoding for zero means a 2GB transfer.  Because of this special
15019e39c5baSBill Taylor 	 * encoding in the hardware, we mask the requested length with
15029e39c5baSBill Taylor 	 * TAVOR_WQE_SGL_BYTE_CNT_MASK (so that 2GB will end up encoded as
15039e39c5baSBill Taylor 	 * zero.)
15049e39c5baSBill Taylor 	 */
15059e39c5baSBill Taylor 	mgmtclass = hpoint = hcount = NULL;
15069e39c5baSBill Taylor 	offset = 0;
15079e39c5baSBill Taylor 	for (i = 0; i < nds; i++) {
15089e39c5baSBill Taylor 		if (sgl[i].ds_len == 0) {
15099e39c5baSBill Taylor 			continue;
15109e39c5baSBill Taylor 		}
15119e39c5baSBill Taylor 
15129e39c5baSBill Taylor 		/*
15139e39c5baSBill Taylor 		 * Fill in the Data Segment(s) for the MLX send WQE, using
15149e39c5baSBill Taylor 		 * the information contained in the scatter-gather list of
15159e39c5baSBill Taylor 		 * the work request.
15169e39c5baSBill Taylor 		 */
15179e39c5baSBill Taylor 		TAVOR_WQE_BUILD_DATA_SEG(qp, &ds[num_ds], &sgl[i]);
15189e39c5baSBill Taylor 
15199e39c5baSBill Taylor 		/*
15209e39c5baSBill Taylor 		 * Search through the contents of all MADs posted to QP0 to
15219e39c5baSBill Taylor 		 * initialize pointers to the places where Directed Route "hop
15229e39c5baSBill Taylor 		 * pointer", "hop count", and "mgmtclass" would be.  Tavor
15239e39c5baSBill Taylor 		 * needs these updated (i.e. incremented or decremented, as
15249e39c5baSBill Taylor 		 * necessary) by software.
15259e39c5baSBill Taylor 		 */
15269e39c5baSBill Taylor 		if (qp->qp_is_special == TAVOR_QP_SMI) {
15279e39c5baSBill Taylor 
15289e39c5baSBill Taylor 			TAVOR_SPECIAL_QP_DRMAD_GET_MGMTCLASS(mgmtclass,
15299e39c5baSBill Taylor 			    offset, sgl[i].ds_va, sgl[i].ds_len);
15309e39c5baSBill Taylor 
15319e39c5baSBill Taylor 			TAVOR_SPECIAL_QP_DRMAD_GET_HOPPOINTER(hpoint,
15329e39c5baSBill Taylor 			    offset, sgl[i].ds_va, sgl[i].ds_len);
15339e39c5baSBill Taylor 
15349e39c5baSBill Taylor 			TAVOR_SPECIAL_QP_DRMAD_GET_HOPCOUNT(hcount,
15359e39c5baSBill Taylor 			    offset, sgl[i].ds_va, sgl[i].ds_len);
15369e39c5baSBill Taylor 
15379e39c5baSBill Taylor 			offset += sgl[i].ds_len;
15389e39c5baSBill Taylor 		}
15399e39c5baSBill Taylor 		num_ds++;
15409e39c5baSBill Taylor 	}
15419e39c5baSBill Taylor 
15429e39c5baSBill Taylor 	/*
15439e39c5baSBill Taylor 	 * Tavor's Directed Route MADs need to have the "hop pointer"
15449e39c5baSBill Taylor 	 * incremented/decremented (as necessary) depending on whether it is
15459e39c5baSBill Taylor 	 * currently less than or greater than the "hop count" (i.e. whether
15469e39c5baSBill Taylor 	 * the MAD is a request or a response.)
15479e39c5baSBill Taylor 	 */
15489e39c5baSBill Taylor 	if (qp->qp_is_special == TAVOR_QP_SMI) {
15499e39c5baSBill Taylor 		TAVOR_SPECIAL_QP_DRMAD_DO_HOPPOINTER_MODIFY(*mgmtclass,
15509e39c5baSBill Taylor 		    *hpoint, *hcount);
15519e39c5baSBill Taylor 	}
15529e39c5baSBill Taylor 
15539e39c5baSBill Taylor 	/*
15549e39c5baSBill Taylor 	 * Now fill in the ICRC Data Segment.  This data segment is inlined
15559e39c5baSBill Taylor 	 * just like the packets headers above, but it is only four bytes and
15569e39c5baSBill Taylor 	 * set to zero (to indicate that we wish the hardware to generate ICRC.
15579e39c5baSBill Taylor 	 */
15589e39c5baSBill Taylor 	TAVOR_WQE_BUILD_INLINE_ICRC(qp, &ds[num_ds], 4, 0);
15599e39c5baSBill Taylor 	num_ds++;
15609e39c5baSBill Taylor 
15619e39c5baSBill Taylor 	/* Return the size of descriptor (in 16-byte chunks) */
15629e39c5baSBill Taylor 	*size = ((uintptr_t)&ds[num_ds] - (uintptr_t)desc) >> 0x4;
15639e39c5baSBill Taylor 
15649e39c5baSBill Taylor 	return (DDI_SUCCESS);
15659e39c5baSBill Taylor }
15669e39c5baSBill Taylor 
15679e39c5baSBill Taylor 
15689e39c5baSBill Taylor /*
15699e39c5baSBill Taylor  * tavor_wqe_mlx_linknext()
15709e39c5baSBill Taylor  *    Context: Can be called from interrupt or base context.
15719e39c5baSBill Taylor  */
15729e39c5baSBill Taylor static void
tavor_wqe_mlx_linknext(ibt_send_wr_t * prev_wr,uint64_t * curr_desc,uint_t curr_descsz,uint64_t * prev_desc,tavor_sw_wqe_dbinfo_t * dbinfo,tavor_qphdl_t qp)15739e39c5baSBill Taylor tavor_wqe_mlx_linknext(ibt_send_wr_t *prev_wr, uint64_t *curr_desc,
15749e39c5baSBill Taylor     uint_t curr_descsz, uint64_t *prev_desc, tavor_sw_wqe_dbinfo_t *dbinfo,
15759e39c5baSBill Taylor     tavor_qphdl_t qp)
15769e39c5baSBill Taylor {
15779e39c5baSBill Taylor 	tavor_hw_udav_t		udav;
15789e39c5baSBill Taylor 	tavor_ahhdl_t		ah;
15799e39c5baSBill Taylor 	uint64_t		next, ctrl, data;
15809e39c5baSBill Taylor 	uint_t			nopcode;
15819e39c5baSBill Taylor 	uint_t			udav_sz;
15829e39c5baSBill Taylor 	int			i;
15839e39c5baSBill Taylor 
15849e39c5baSBill Taylor 	/*
15859e39c5baSBill Taylor 	 * Calculate the "next" field of the descriptor.  This amounts to
15869e39c5baSBill Taylor 	 * setting up the "next_wqe_addr", "nopcode", and "nds" fields (see
15879e39c5baSBill Taylor 	 * tavor_hw.h for more).  Note:  If there is no next descriptor (i.e.
15889e39c5baSBill Taylor 	 * if the current descriptor is the last WQE on the chain), then set
15899e39c5baSBill Taylor 	 * "next" to zero.
15909e39c5baSBill Taylor 	 */
15919e39c5baSBill Taylor 	if (curr_desc != NULL) {
15929e39c5baSBill Taylor 		/*
15939e39c5baSBill Taylor 		 * The only valid Tavor WQE "nopcode" for MLX transport
15949e39c5baSBill Taylor 		 * requests is the "Send" code.
15959e39c5baSBill Taylor 		 */
15969e39c5baSBill Taylor 		nopcode = TAVOR_WQE_SEND_NOPCODE_SEND;
15979e39c5baSBill Taylor 		curr_desc = (uint64_t *)(uintptr_t)((uint64_t)
15989e39c5baSBill Taylor 		    (uintptr_t)curr_desc - qp->qp_desc_off);
15999e39c5baSBill Taylor 		next = (uint64_t)((uintptr_t)curr_desc &
16009e39c5baSBill Taylor 		    TAVOR_WQE_NDA_MASK) << 32;
16019e39c5baSBill Taylor 		next = next | ((uint64_t)nopcode << 32);
16029e39c5baSBill Taylor 		next = next | (curr_descsz & TAVOR_WQE_NDS_MASK);
16039e39c5baSBill Taylor 
16049e39c5baSBill Taylor 		/*
16059e39c5baSBill Taylor 		 * If a send queue doorbell will be rung for the next
16069e39c5baSBill Taylor 		 * WQE on the chain, then set the current WQE's "dbd" bit.
16079e39c5baSBill Taylor 		 * Note: We also update the "dbinfo" structure here to pass
16089e39c5baSBill Taylor 		 * back information about what should (later) be included
16099e39c5baSBill Taylor 		 * in the send queue doorbell.
16109e39c5baSBill Taylor 		 */
16119e39c5baSBill Taylor 		if (dbinfo) {
16129e39c5baSBill Taylor 			next = next | TAVOR_WQE_DBD_MASK;
16139e39c5baSBill Taylor 			dbinfo->db_nopcode = nopcode;
16149e39c5baSBill Taylor 			dbinfo->db_fence   = 0;
16159e39c5baSBill Taylor 		}
16169e39c5baSBill Taylor 	} else {
16179e39c5baSBill Taylor 		next = 0;
16189e39c5baSBill Taylor 	}
16199e39c5baSBill Taylor 
16209e39c5baSBill Taylor 	/*
16219e39c5baSBill Taylor 	 * If this WQE is supposed to be linked to the previous descriptor,
16229e39c5baSBill Taylor 	 * then we need to update not only the previous WQE's "next" fields
16239e39c5baSBill Taylor 	 * but we must also update this WQE's "ctrl" fields (i.e. the "vl15",
16249e39c5baSBill Taylor 	 * "slr", "max_srate", "sl", "c", "e", "rlid", and "vcrc" fields -
16259e39c5baSBill Taylor 	 * see tavor_hw.h for more) Note: the "e" bit and "vcrc" fields are
16269e39c5baSBill Taylor 	 * always hardcoded to zero.
16279e39c5baSBill Taylor 	 */
16289e39c5baSBill Taylor 	if (prev_desc != NULL) {
16299e39c5baSBill Taylor 		/*
16309e39c5baSBill Taylor 		 * If a send queue doorbell will be rung for the next WQE on
16319e39c5baSBill Taylor 		 * the chain, then update the current WQE's "next" field and
16329e39c5baSBill Taylor 		 * return.
16339e39c5baSBill Taylor 		 * Note: We don't want to modify the "ctrl" field here because
16349e39c5baSBill Taylor 		 * that portion of the previous WQE has already been set
16359e39c5baSBill Taylor 		 * correctly at some previous point in time.
16369e39c5baSBill Taylor 		 */
16379e39c5baSBill Taylor 		if (dbinfo) {
16389e39c5baSBill Taylor 			TAVOR_WQE_LINKFIRST(qp, prev_desc, next);
16399e39c5baSBill Taylor 			return;
16409e39c5baSBill Taylor 		}
16419e39c5baSBill Taylor 
16429e39c5baSBill Taylor 		/*
16439e39c5baSBill Taylor 		 * Pull the address handle from the work request and read in
16449e39c5baSBill Taylor 		 * the contents of the UDAV.  This will be used to answer some
16459e39c5baSBill Taylor 		 * questions about the request.
16469e39c5baSBill Taylor 		 */
16479e39c5baSBill Taylor 		ah = (tavor_ahhdl_t)prev_wr->wr.ud.udwr_dest->ud_ah;
16489e39c5baSBill Taylor 		mutex_enter(&ah->ah_lock);
16499e39c5baSBill Taylor 		udav_sz = sizeof (tavor_hw_udav_t) >> 3;
16509e39c5baSBill Taylor 		for (i = 0; i < udav_sz; i++) {
16519e39c5baSBill Taylor 			data = ddi_get64(ah->ah_udavrsrcp->tr_acchdl,
16529e39c5baSBill Taylor 			    ((uint64_t *)ah->ah_udavrsrcp->tr_addr + i));
16539e39c5baSBill Taylor 			((uint64_t *)&udav)[i] = data;
16549e39c5baSBill Taylor 		}
16559e39c5baSBill Taylor 		mutex_exit(&ah->ah_lock);
16569e39c5baSBill Taylor 
16579e39c5baSBill Taylor 		ctrl = 0;
16589e39c5baSBill Taylor 
16599e39c5baSBill Taylor 		/* Only QP0 uses VL15, otherwise use VL in the packet */
16609e39c5baSBill Taylor 		if (qp->qp_is_special == TAVOR_QP_SMI) {
16619e39c5baSBill Taylor 			ctrl = ctrl | TAVOR_WQE_MLXHDR_VL15_MASK;
16629e39c5baSBill Taylor 		}
16639e39c5baSBill Taylor 
16649e39c5baSBill Taylor 		/*
16659e39c5baSBill Taylor 		 * The SLR (Source LID Replace) bit determines whether the
16669e39c5baSBill Taylor 		 * source LID for an outgoing MLX packet should come from the
16679e39c5baSBill Taylor 		 * PortInfo (SLR = 0) or should be left as it is in the
16689e39c5baSBill Taylor 		 * descriptor (SLR = 1).  The latter is necessary for packets
16699e39c5baSBill Taylor 		 * to be sent with the Permissive LID.
16709e39c5baSBill Taylor 		 */
16719e39c5baSBill Taylor 		if (udav.rlid == IB_LID_PERMISSIVE) {
16729e39c5baSBill Taylor 			ctrl = ctrl | TAVOR_WQE_MLXHDR_SLR_MASK;
16739e39c5baSBill Taylor 		}
16749e39c5baSBill Taylor 
16759e39c5baSBill Taylor 		/* Fill in the max static rate from the address handle */
16769e39c5baSBill Taylor 		ctrl = ctrl | ((uint64_t)udav.max_stat_rate <<
16779e39c5baSBill Taylor 		    TAVOR_WQE_MLXHDR_SRATE_SHIFT);
16789e39c5baSBill Taylor 
16799e39c5baSBill Taylor 		/* All VL15 (i.e. SMI) traffic is required to use SL 0 */
16809e39c5baSBill Taylor 		if (qp->qp_is_special != TAVOR_QP_SMI) {
16819e39c5baSBill Taylor 			ctrl = ctrl | ((uint64_t)udav.sl <<
16829e39c5baSBill Taylor 			    TAVOR_WQE_MLXHDR_SL_SHIFT);
16839e39c5baSBill Taylor 		}
16849e39c5baSBill Taylor 
16859e39c5baSBill Taylor 		/* Set the "c" (i.e. "signaled") bit appropriately */
16869e39c5baSBill Taylor 		if (prev_wr->wr_flags & IBT_WR_SEND_SIGNAL) {
16879e39c5baSBill Taylor 			ctrl = ctrl | TAVOR_WQE_MLXHDR_SIGNALED_MASK;
16889e39c5baSBill Taylor 		}
16899e39c5baSBill Taylor 
16909e39c5baSBill Taylor 		/* Fill in the destination LID from the address handle */
16919e39c5baSBill Taylor 		ctrl = ctrl | ((uint64_t)udav.rlid <<
16929e39c5baSBill Taylor 		    TAVOR_WQE_MLXHDR_RLID_SHIFT);
16939e39c5baSBill Taylor 
16949e39c5baSBill Taylor 		TAVOR_WQE_LINKNEXT(qp, prev_desc, ctrl, next);
16959e39c5baSBill Taylor 	}
16969e39c5baSBill Taylor }
16979e39c5baSBill Taylor 
16989e39c5baSBill Taylor 
16999e39c5baSBill Taylor /*
17009e39c5baSBill Taylor  * tavor_wqe_recv_build()
17019e39c5baSBill Taylor  *    Context: Can be called from interrupt or base context.
17029e39c5baSBill Taylor  */
17039e39c5baSBill Taylor /* ARGSUSED */
17049e39c5baSBill Taylor static int
tavor_wqe_recv_build(tavor_state_t * state,tavor_qphdl_t qp,ibt_recv_wr_t * wr,uint64_t * desc,uint_t * size)17059e39c5baSBill Taylor tavor_wqe_recv_build(tavor_state_t *state, tavor_qphdl_t qp,
17069e39c5baSBill Taylor     ibt_recv_wr_t *wr, uint64_t *desc, uint_t *size)
17079e39c5baSBill Taylor {
17089e39c5baSBill Taylor 	tavor_hw_wqe_sgl_t	*ds;
17099e39c5baSBill Taylor 	int			i, num_ds;
17109e39c5baSBill Taylor 
17119e39c5baSBill Taylor 	ASSERT(MUTEX_HELD(&qp->qp_lock));
17129e39c5baSBill Taylor 
17139e39c5baSBill Taylor 	/* Check that work request transport type is valid */
17149e39c5baSBill Taylor 	if ((qp->qp_serv_type != TAVOR_QP_UD) &&
17159e39c5baSBill Taylor 	    (qp->qp_serv_type != TAVOR_QP_RC) &&
17169e39c5baSBill Taylor 	    (qp->qp_serv_type != TAVOR_QP_UC)) {
17179e39c5baSBill Taylor 		return (IBT_QP_SRV_TYPE_INVALID);
17189e39c5baSBill Taylor 	}
17199e39c5baSBill Taylor 
17209e39c5baSBill Taylor 	/* Fill in the Data Segments (SGL) for the Recv WQE */
17219e39c5baSBill Taylor 	ds = (tavor_hw_wqe_sgl_t *)((uintptr_t)desc +
17229e39c5baSBill Taylor 	    sizeof (tavor_hw_rcv_wqe_nextctrl_t));
17239e39c5baSBill Taylor 	num_ds = 0;
17249e39c5baSBill Taylor 
17259e39c5baSBill Taylor 	/* Check for valid number of SGL entries */
17269e39c5baSBill Taylor 	if (wr->wr_nds > qp->qp_rq_sgl) {
17279e39c5baSBill Taylor 		return (IBT_QP_SGL_LEN_INVALID);
17289e39c5baSBill Taylor 	}
17299e39c5baSBill Taylor 
17309e39c5baSBill Taylor 	/*
17319e39c5baSBill Taylor 	 * For each SGL in the Recv Work Request, fill in the Recv WQE's data
17329e39c5baSBill Taylor 	 * segments.  Note: We skip any SGL with zero size because Tavor
17339e39c5baSBill Taylor 	 * hardware cannot handle a zero for "byte_cnt" in the WQE.  Actually
17349e39c5baSBill Taylor 	 * the encoding for zero means a 2GB transfer.  Because of this special
17359e39c5baSBill Taylor 	 * encoding in the hardware, we mask the requested length with
17369e39c5baSBill Taylor 	 * TAVOR_WQE_SGL_BYTE_CNT_MASK (so that 2GB will end up encoded as
17379e39c5baSBill Taylor 	 * zero.)
17389e39c5baSBill Taylor 	 */
17399e39c5baSBill Taylor 	for (i = 0; i < wr->wr_nds; i++) {
17409e39c5baSBill Taylor 		if (wr->wr_sgl[i].ds_len == 0) {
17419e39c5baSBill Taylor 			continue;
17429e39c5baSBill Taylor 		}
17439e39c5baSBill Taylor 
17449e39c5baSBill Taylor 		/*
17459e39c5baSBill Taylor 		 * Fill in the Data Segment(s) for the receive WQE, using the
17469e39c5baSBill Taylor 		 * information contained in the scatter-gather list of the
17479e39c5baSBill Taylor 		 * work request.
17489e39c5baSBill Taylor 		 */
17499e39c5baSBill Taylor 		TAVOR_WQE_BUILD_DATA_SEG(qp, &ds[num_ds], &wr->wr_sgl[i]);
17509e39c5baSBill Taylor 		num_ds++;
17519e39c5baSBill Taylor 	}
17529e39c5baSBill Taylor 
17539e39c5baSBill Taylor 	/* Return the size of descriptor (in 16-byte chunks) */
17549e39c5baSBill Taylor 	*size = ((uintptr_t)&ds[num_ds] - (uintptr_t)desc) >> 0x4;
17559e39c5baSBill Taylor 
17569e39c5baSBill Taylor 	return (DDI_SUCCESS);
17579e39c5baSBill Taylor }
17589e39c5baSBill Taylor 
17599e39c5baSBill Taylor 
17609e39c5baSBill Taylor /*
17619e39c5baSBill Taylor  * tavor_wqe_recv_linknext()
17629e39c5baSBill Taylor  *    Context: Can be called from interrupt or base context.
17639e39c5baSBill Taylor  */
17649e39c5baSBill Taylor static void
tavor_wqe_recv_linknext(uint64_t * curr_desc,uint_t curr_descsz,uint64_t * prev_desc,tavor_qphdl_t qp)17659e39c5baSBill Taylor tavor_wqe_recv_linknext(uint64_t *curr_desc, uint_t curr_descsz,
17669e39c5baSBill Taylor     uint64_t *prev_desc, tavor_qphdl_t qp)
17679e39c5baSBill Taylor {
17689e39c5baSBill Taylor 	uint64_t	next;
17699e39c5baSBill Taylor 
17709e39c5baSBill Taylor 	/*
17719e39c5baSBill Taylor 	 * Calculate the "next" field of the descriptor.  This amounts to
17729e39c5baSBill Taylor 	 * setting up the "next_wqe_addr", "dbd", and "nds" fields (see
17739e39c5baSBill Taylor 	 * tavor_hw.h for more).  Note:  If there is no next descriptor (i.e.
17749e39c5baSBill Taylor 	 * if the current descriptor is the last WQE on the chain), then set
17759e39c5baSBill Taylor 	 * "next" field to TAVOR_WQE_DBD_MASK.  This is because the Tavor
17769e39c5baSBill Taylor 	 * hardware requires the "dbd" bit to be set to one for all Recv WQEs.
17779e39c5baSBill Taylor 	 * In either case, we must add a single bit in the "reserved" field
17789e39c5baSBill Taylor 	 * (TAVOR_RCV_WQE_NDA0_WA_MASK) following the NDA.  This is the
17799e39c5baSBill Taylor 	 * workaround for a known Tavor errata that can cause Recv WQEs with
17809e39c5baSBill Taylor 	 * zero in the NDA field to behave improperly.
17819e39c5baSBill Taylor 	 */
17829e39c5baSBill Taylor 	if (curr_desc != NULL) {
17839e39c5baSBill Taylor 		curr_desc = (uint64_t *)(uintptr_t)((uintptr_t)curr_desc -
17849e39c5baSBill Taylor 		    qp->qp_desc_off);
17859e39c5baSBill Taylor 		next = (uint64_t)((uintptr_t)curr_desc &
17869e39c5baSBill Taylor 		    TAVOR_WQE_NDA_MASK) << 32;
17879e39c5baSBill Taylor 		next = next | (curr_descsz & TAVOR_WQE_NDS_MASK) |
17889e39c5baSBill Taylor 		    TAVOR_WQE_DBD_MASK | TAVOR_RCV_WQE_NDA0_WA_MASK;
17899e39c5baSBill Taylor 	} else {
17909e39c5baSBill Taylor 		next = TAVOR_WQE_DBD_MASK | TAVOR_RCV_WQE_NDA0_WA_MASK;
17919e39c5baSBill Taylor 	}
17929e39c5baSBill Taylor 
17939e39c5baSBill Taylor 	/*
17949e39c5baSBill Taylor 	 * If this WQE is supposed to be linked to the previous descriptor,
17959e39c5baSBill Taylor 	 * then we need to update not only the previous WQE's "next" fields
17969e39c5baSBill Taylor 	 * but we must also update this WQE's "ctrl" fields (i.e. the "c" and
17979e39c5baSBill Taylor 	 * "e" bits - see tavor_hw.h for more).  Note: both the "c" and "e"
17989e39c5baSBill Taylor 	 * bits are always hardcoded to zero.
17999e39c5baSBill Taylor 	 */
18009e39c5baSBill Taylor 	if (prev_desc != NULL) {
18019e39c5baSBill Taylor 		TAVOR_WQE_LINKNEXT(qp, prev_desc, 0, next);
18029e39c5baSBill Taylor 	}
18039e39c5baSBill Taylor }
18049e39c5baSBill Taylor 
18059e39c5baSBill Taylor 
18069e39c5baSBill Taylor /*
18079e39c5baSBill Taylor  * tavor_wqe_srq_build()
18089e39c5baSBill Taylor  *    Context: Can be called from interrupt or base context.
18099e39c5baSBill Taylor  */
18109e39c5baSBill Taylor /* ARGSUSED */
18119e39c5baSBill Taylor static int
tavor_wqe_srq_build(tavor_state_t * state,tavor_srqhdl_t srq,ibt_recv_wr_t * wr,uint64_t * desc)18129e39c5baSBill Taylor tavor_wqe_srq_build(tavor_state_t *state, tavor_srqhdl_t srq,
18139e39c5baSBill Taylor     ibt_recv_wr_t *wr, uint64_t *desc)
18149e39c5baSBill Taylor {
18159e39c5baSBill Taylor 	tavor_hw_wqe_sgl_t	*ds;
18169e39c5baSBill Taylor 	ibt_wr_ds_t		end_sgl;
18179e39c5baSBill Taylor 	int			i, num_ds;
18189e39c5baSBill Taylor 
18199e39c5baSBill Taylor 	ASSERT(MUTEX_HELD(&srq->srq_lock));
18209e39c5baSBill Taylor 
18219e39c5baSBill Taylor 	/* Fill in the Data Segments (SGL) for the Recv WQE */
18229e39c5baSBill Taylor 	ds = (tavor_hw_wqe_sgl_t *)((uintptr_t)desc +
18239e39c5baSBill Taylor 	    sizeof (tavor_hw_rcv_wqe_nextctrl_t));
18249e39c5baSBill Taylor 	num_ds = 0;
18259e39c5baSBill Taylor 
18269e39c5baSBill Taylor 	/* Check for valid number of SGL entries */
18279e39c5baSBill Taylor 	if (wr->wr_nds > srq->srq_wq_sgl) {
18289e39c5baSBill Taylor 		return (IBT_QP_SGL_LEN_INVALID);
18299e39c5baSBill Taylor 	}
18309e39c5baSBill Taylor 
18319e39c5baSBill Taylor 	/*
18329e39c5baSBill Taylor 	 * For each SGL in the Recv Work Request, fill in the Recv WQE's data
18339e39c5baSBill Taylor 	 * segments.  Note: We skip any SGL with zero size because Tavor
18349e39c5baSBill Taylor 	 * hardware cannot handle a zero for "byte_cnt" in the WQE.  Actually
18359e39c5baSBill Taylor 	 * the encoding for zero means a 2GB transfer.  Because of this special
18369e39c5baSBill Taylor 	 * encoding in the hardware, we mask the requested length with
18379e39c5baSBill Taylor 	 * TAVOR_WQE_SGL_BYTE_CNT_MASK (so that 2GB will end up encoded as
18389e39c5baSBill Taylor 	 * zero.)
18399e39c5baSBill Taylor 	 */
18409e39c5baSBill Taylor 	for (i = 0; i < wr->wr_nds; i++) {
18419e39c5baSBill Taylor 		if (wr->wr_sgl[i].ds_len == 0) {
18429e39c5baSBill Taylor 			continue;
18439e39c5baSBill Taylor 		}
18449e39c5baSBill Taylor 
18459e39c5baSBill Taylor 		/*
18469e39c5baSBill Taylor 		 * Fill in the Data Segment(s) for the receive WQE, using the
18479e39c5baSBill Taylor 		 * information contained in the scatter-gather list of the
18489e39c5baSBill Taylor 		 * work request.
18499e39c5baSBill Taylor 		 */
18509e39c5baSBill Taylor 		TAVOR_WQE_BUILD_DATA_SEG_SRQ(srq, &ds[num_ds], &wr->wr_sgl[i]);
18519e39c5baSBill Taylor 		num_ds++;
18529e39c5baSBill Taylor 	}
18539e39c5baSBill Taylor 
18549e39c5baSBill Taylor 	/*
18559e39c5baSBill Taylor 	 * For SRQ, if the number of data segments is less than the maximum
18569e39c5baSBill Taylor 	 * specified at alloc, then we have to fill in a special "key" entry in
18579e39c5baSBill Taylor 	 * the sgl entry after the last valid one in this post request.  We do
18589e39c5baSBill Taylor 	 * that here.
18599e39c5baSBill Taylor 	 */
18609e39c5baSBill Taylor 	if (num_ds < srq->srq_wq_sgl) {
18619e39c5baSBill Taylor 		end_sgl.ds_va  = 0;
18629e39c5baSBill Taylor 		end_sgl.ds_len = 0;
18639e39c5baSBill Taylor 		end_sgl.ds_key = 0x1;
18649e39c5baSBill Taylor 		TAVOR_WQE_BUILD_DATA_SEG_SRQ(srq, &ds[num_ds], &end_sgl);
18659e39c5baSBill Taylor 	}
18669e39c5baSBill Taylor 
18679e39c5baSBill Taylor 	return (DDI_SUCCESS);
18689e39c5baSBill Taylor }
18699e39c5baSBill Taylor 
18709e39c5baSBill Taylor 
18719e39c5baSBill Taylor /*
18729e39c5baSBill Taylor  * tavor_wqe_srq_linknext()
18739e39c5baSBill Taylor  *    Context: Can be called from interrupt or base context.
18749e39c5baSBill Taylor  */
18759e39c5baSBill Taylor static void
tavor_wqe_srq_linknext(uint64_t * curr_desc,uint64_t * prev_desc,tavor_srqhdl_t srq)18769e39c5baSBill Taylor tavor_wqe_srq_linknext(uint64_t *curr_desc, uint64_t *prev_desc,
18779e39c5baSBill Taylor     tavor_srqhdl_t srq)
18789e39c5baSBill Taylor {
18799e39c5baSBill Taylor 	uint64_t	next;
18809e39c5baSBill Taylor 
18819e39c5baSBill Taylor 	/*
18829e39c5baSBill Taylor 	 * Calculate the "next" field of the descriptor.  This amounts to
18839e39c5baSBill Taylor 	 * setting up the "next_wqe_addr", "dbd", and "nds" fields (see
18849e39c5baSBill Taylor 	 * tavor_hw.h for more).  Note:  If there is no next descriptor (i.e.
18859e39c5baSBill Taylor 	 * if the current descriptor is the last WQE on the chain), then set
18869e39c5baSBill Taylor 	 * "next" field to TAVOR_WQE_DBD_MASK.  This is because the Tavor
18879e39c5baSBill Taylor 	 * hardware requires the "dbd" bit to be set to one for all Recv WQEs.
18889e39c5baSBill Taylor 	 * In either case, we must add a single bit in the "reserved" field
18899e39c5baSBill Taylor 	 * (TAVOR_RCV_WQE_NDA0_WA_MASK) following the NDA.  This is the
18909e39c5baSBill Taylor 	 * workaround for a known Tavor errata that can cause Recv WQEs with
18919e39c5baSBill Taylor 	 * zero in the NDA field to behave improperly.
18929e39c5baSBill Taylor 	 */
18939e39c5baSBill Taylor 	if (curr_desc != NULL) {
18949e39c5baSBill Taylor 		curr_desc = (uint64_t *)(uintptr_t)((uintptr_t)curr_desc -
18959e39c5baSBill Taylor 		    srq->srq_desc_off);
18969e39c5baSBill Taylor 		next = (uint64_t)((uintptr_t)curr_desc &
18979e39c5baSBill Taylor 		    TAVOR_WQE_NDA_MASK) << 32;
18989e39c5baSBill Taylor 		next = next | TAVOR_WQE_DBD_MASK | TAVOR_RCV_WQE_NDA0_WA_MASK;
18999e39c5baSBill Taylor 	} else {
19009e39c5baSBill Taylor 		next = TAVOR_RCV_WQE_NDA0_WA_MASK;
19019e39c5baSBill Taylor 	}
19029e39c5baSBill Taylor 
19039e39c5baSBill Taylor 	/*
19049e39c5baSBill Taylor 	 * If this WQE is supposed to be linked to the previous descriptor,
19059e39c5baSBill Taylor 	 * then we need to update not only the previous WQE's "next" fields
19069e39c5baSBill Taylor 	 * but we must also update this WQE's "ctrl" fields (i.e. the "c" and
19079e39c5baSBill Taylor 	 * "e" bits - see tavor_hw.h for more).  Note: both the "c" and "e"
19089e39c5baSBill Taylor 	 * bits are always hardcoded to zero.
19099e39c5baSBill Taylor 	 */
19109e39c5baSBill Taylor 	if (prev_desc != NULL) {
19119e39c5baSBill Taylor 		TAVOR_WQE_LINKNEXT_SRQ(srq, prev_desc, 0, next);
19129e39c5baSBill Taylor 	}
19139e39c5baSBill Taylor }
19149e39c5baSBill Taylor 
19159e39c5baSBill Taylor 
19169e39c5baSBill Taylor /*
19179e39c5baSBill Taylor  * tavor_wr_get_immediate()
19189e39c5baSBill Taylor  *    Context: Can be called from interrupt or base context.
19199e39c5baSBill Taylor  */
19209e39c5baSBill Taylor static uint32_t
tavor_wr_get_immediate(ibt_send_wr_t * wr)19219e39c5baSBill Taylor tavor_wr_get_immediate(ibt_send_wr_t *wr)
19229e39c5baSBill Taylor {
19239e39c5baSBill Taylor 	/*
19249e39c5baSBill Taylor 	 * This routine extracts the "immediate data" from the appropriate
19259e39c5baSBill Taylor 	 * location in the IBTF work request.  Because of the way the
19269e39c5baSBill Taylor 	 * work request structure is defined, the location for this data
19279e39c5baSBill Taylor 	 * depends on the actual work request operation type.
19289e39c5baSBill Taylor 	 */
19299e39c5baSBill Taylor 
19309e39c5baSBill Taylor 	/* For RDMA Write, test if RC or UC */
19319e39c5baSBill Taylor 	if (wr->wr_opcode == IBT_WRC_RDMAW) {
19329e39c5baSBill Taylor 		if (wr->wr_trans == IBT_RC_SRV) {
19339e39c5baSBill Taylor 			return (wr->wr.rc.rcwr.rdma.rdma_immed);
19349e39c5baSBill Taylor 		} else {  /* IBT_UC_SRV */
19359e39c5baSBill Taylor 			return (wr->wr.uc.ucwr.rdma.rdma_immed);
19369e39c5baSBill Taylor 		}
19379e39c5baSBill Taylor 	}
19389e39c5baSBill Taylor 
19399e39c5baSBill Taylor 	/* For Send, test if RC, UD, or UC */
19409e39c5baSBill Taylor 	if (wr->wr_opcode == IBT_WRC_SEND) {
19419e39c5baSBill Taylor 		if (wr->wr_trans == IBT_RC_SRV) {
19429e39c5baSBill Taylor 			return (wr->wr.rc.rcwr.send_immed);
19439e39c5baSBill Taylor 		} else if (wr->wr_trans == IBT_UD_SRV) {
19449e39c5baSBill Taylor 			return (wr->wr.ud.udwr_immed);
19459e39c5baSBill Taylor 		} else {  /* IBT_UC_SRV */
19469e39c5baSBill Taylor 			return (wr->wr.uc.ucwr.send_immed);
19479e39c5baSBill Taylor 		}
19489e39c5baSBill Taylor 	}
19499e39c5baSBill Taylor 
19509e39c5baSBill Taylor 	/*
19519e39c5baSBill Taylor 	 * If any other type of request, then immediate is undefined
19529e39c5baSBill Taylor 	 */
19539e39c5baSBill Taylor 	return (0);
19549e39c5baSBill Taylor }
19559e39c5baSBill Taylor 
19569e39c5baSBill Taylor 
19579e39c5baSBill Taylor /*
19589e39c5baSBill Taylor  * tavor_wqe_sync()
19599e39c5baSBill Taylor  *    Context: Can be called from interrupt or base context.
19609e39c5baSBill Taylor  */
19619e39c5baSBill Taylor static void
tavor_wqe_sync(void * hdl,uint_t sync_from,uint_t sync_to,uint_t sync_type,uint_t flag)19629e39c5baSBill Taylor tavor_wqe_sync(void *hdl, uint_t sync_from, uint_t sync_to,
19639e39c5baSBill Taylor     uint_t sync_type, uint_t flag)
19649e39c5baSBill Taylor {
19659e39c5baSBill Taylor 	tavor_qphdl_t		qp;
19669e39c5baSBill Taylor 	tavor_srqhdl_t		srq;
19679e39c5baSBill Taylor 	uint_t			is_sync_req;
19689e39c5baSBill Taylor 	uint64_t		*wqe_from, *wqe_to, *wqe_base, *wqe_top;
19699e39c5baSBill Taylor 	ddi_dma_handle_t	dmahdl;
19709e39c5baSBill Taylor 	off_t			offset;
19719e39c5baSBill Taylor 	size_t			length;
19729e39c5baSBill Taylor 	uint32_t		qsize;
19739e39c5baSBill Taylor 	int			status;
19749e39c5baSBill Taylor 
19759e39c5baSBill Taylor 	if (sync_type == TAVOR_WR_SRQ) {
19769e39c5baSBill Taylor 		srq = (tavor_srqhdl_t)hdl;
19779e39c5baSBill Taylor 		is_sync_req = srq->srq_sync;
19789e39c5baSBill Taylor 		/* Get the DMA handle from SRQ context */
19799e39c5baSBill Taylor 		dmahdl = srq->srq_mrhdl->mr_bindinfo.bi_dmahdl;
19809e39c5baSBill Taylor 	} else {
19819e39c5baSBill Taylor 		qp = (tavor_qphdl_t)hdl;
19829e39c5baSBill Taylor 		is_sync_req = qp->qp_sync;
19839e39c5baSBill Taylor 		/* Get the DMA handle from QP context */
19849e39c5baSBill Taylor 		dmahdl = qp->qp_mrhdl->mr_bindinfo.bi_dmahdl;
19859e39c5baSBill Taylor 	}
19869e39c5baSBill Taylor 
19879e39c5baSBill Taylor 	/* Determine if the work queues need to be synced or not */
19889e39c5baSBill Taylor 	if (is_sync_req == 0) {
19899e39c5baSBill Taylor 		return;
19909e39c5baSBill Taylor 	}
19919e39c5baSBill Taylor 
19929e39c5baSBill Taylor 	/*
19939e39c5baSBill Taylor 	 * Depending on the type of the work queue, we grab information
19949e39c5baSBill Taylor 	 * about the address ranges we need to DMA sync.
19959e39c5baSBill Taylor 	 */
19969e39c5baSBill Taylor 	if (sync_type == TAVOR_WR_SEND) {
19979e39c5baSBill Taylor 		wqe_from = TAVOR_QP_SQ_ENTRY(qp, sync_from);
19989e39c5baSBill Taylor 		wqe_to   = TAVOR_QP_SQ_ENTRY(qp, sync_to);
19999e39c5baSBill Taylor 		qsize	 = qp->qp_sq_bufsz;
20009e39c5baSBill Taylor 
20019e39c5baSBill Taylor 		wqe_base = TAVOR_QP_SQ_ENTRY(qp, 0);
20029e39c5baSBill Taylor 		wqe_top	 = TAVOR_QP_SQ_ENTRY(qp, qsize);
20039e39c5baSBill Taylor 	} else if (sync_type == TAVOR_WR_RECV) {
20049e39c5baSBill Taylor 		wqe_from = TAVOR_QP_RQ_ENTRY(qp, sync_from);
20059e39c5baSBill Taylor 		wqe_to   = TAVOR_QP_RQ_ENTRY(qp, sync_to);
20069e39c5baSBill Taylor 		qsize	 = qp->qp_rq_bufsz;
20079e39c5baSBill Taylor 
20089e39c5baSBill Taylor 		wqe_base = TAVOR_QP_RQ_ENTRY(qp, 0);
20099e39c5baSBill Taylor 		wqe_top	 = TAVOR_QP_RQ_ENTRY(qp, qsize);
20109e39c5baSBill Taylor 	} else {
20119e39c5baSBill Taylor 		wqe_from = TAVOR_SRQ_WQ_ENTRY(srq, sync_from);
20129e39c5baSBill Taylor 		wqe_to   = TAVOR_SRQ_WQ_ENTRY(srq, sync_to);
20139e39c5baSBill Taylor 		qsize	 = srq->srq_wq_bufsz;
20149e39c5baSBill Taylor 
20159e39c5baSBill Taylor 		wqe_base = TAVOR_SRQ_WQ_ENTRY(srq, 0);
20169e39c5baSBill Taylor 		wqe_top	 = TAVOR_SRQ_WQ_ENTRY(srq, qsize);
20179e39c5baSBill Taylor 	}
20189e39c5baSBill Taylor 
20199e39c5baSBill Taylor 	/*
20209e39c5baSBill Taylor 	 * There are two possible cases for the beginning and end of the WQE
20219e39c5baSBill Taylor 	 * chain we are trying to sync.  Either this is the simple case, where
20229e39c5baSBill Taylor 	 * the end of the chain is below the beginning of the chain, or it is
20239e39c5baSBill Taylor 	 * the "wrap-around" case, where the end of the chain has wrapped over
20249e39c5baSBill Taylor 	 * the end of the queue.  In the former case, we simply need to
20259e39c5baSBill Taylor 	 * calculate the span from beginning to end and sync it.  In the latter
20269e39c5baSBill Taylor 	 * case, however, we need to calculate the span from the top of the
20279e39c5baSBill Taylor 	 * work queue to the end of the chain and sync that, and then we need
20289e39c5baSBill Taylor 	 * to find the other portion (from beginning of chain to end of queue)
20299e39c5baSBill Taylor 	 * and sync that as well.  Note: if the "top to end" span is actually
20309e39c5baSBill Taylor 	 * zero length, then we don't do a DMA sync because a zero length DMA
20319e39c5baSBill Taylor 	 * sync unnecessarily syncs the entire work queue.
20329e39c5baSBill Taylor 	 */
20339e39c5baSBill Taylor 	if (wqe_to > wqe_from) {
20349e39c5baSBill Taylor 		/* "From Beginning to End" */
20359e39c5baSBill Taylor 		offset = (off_t)((uintptr_t)wqe_from - (uintptr_t)wqe_base);
20369e39c5baSBill Taylor 		length = (size_t)((uintptr_t)wqe_to - (uintptr_t)wqe_from);
20379e39c5baSBill Taylor 
20389e39c5baSBill Taylor 		status = ddi_dma_sync(dmahdl, offset, length, flag);
20399e39c5baSBill Taylor 		if (status != DDI_SUCCESS) {
20409e39c5baSBill Taylor 			return;
20419e39c5baSBill Taylor 		}
20429e39c5baSBill Taylor 	} else {
20439e39c5baSBill Taylor 		/* "From Top to End" */
20449e39c5baSBill Taylor 		offset = (off_t)0;
20459e39c5baSBill Taylor 		length = (size_t)((uintptr_t)wqe_to - (uintptr_t)wqe_base);
20469e39c5baSBill Taylor 		if (length) {
20479e39c5baSBill Taylor 			status = ddi_dma_sync(dmahdl, offset, length, flag);
20489e39c5baSBill Taylor 			if (status != DDI_SUCCESS) {
20499e39c5baSBill Taylor 				return;
20509e39c5baSBill Taylor 			}
20519e39c5baSBill Taylor 		}
20529e39c5baSBill Taylor 
20539e39c5baSBill Taylor 		/* "From Beginning to Bottom" */
20549e39c5baSBill Taylor 		offset = (off_t)((uintptr_t)wqe_from - (uintptr_t)wqe_base);
20559e39c5baSBill Taylor 		length = (size_t)((uintptr_t)wqe_top - (uintptr_t)wqe_from);
20569e39c5baSBill Taylor 		status = ddi_dma_sync(dmahdl, offset, length, flag);
20579e39c5baSBill Taylor 		if (status != DDI_SUCCESS) {
20589e39c5baSBill Taylor 			return;
20599e39c5baSBill Taylor 		}
20609e39c5baSBill Taylor 	}
20619e39c5baSBill Taylor }
20629e39c5baSBill Taylor 
20639e39c5baSBill Taylor 
20649e39c5baSBill Taylor /*
20659e39c5baSBill Taylor  * tavor_wr_bind_check()
20669e39c5baSBill Taylor  *    Context: Can be called from interrupt or base context.
20679e39c5baSBill Taylor  */
20689e39c5baSBill Taylor static int
tavor_wr_bind_check(tavor_state_t * state,ibt_send_wr_t * wr)20699e39c5baSBill Taylor tavor_wr_bind_check(tavor_state_t *state, ibt_send_wr_t *wr)
20709e39c5baSBill Taylor {
20719e39c5baSBill Taylor 	ibt_bind_flags_t	bind_flags;
20729e39c5baSBill Taylor 	uint64_t		vaddr, len;
20739e39c5baSBill Taylor 	uint64_t		reg_start_addr, reg_end_addr;
20749e39c5baSBill Taylor 	tavor_mwhdl_t		mw;
20759e39c5baSBill Taylor 	tavor_mrhdl_t		mr;
20769e39c5baSBill Taylor 	tavor_rsrc_t		*mpt;
20779e39c5baSBill Taylor 	uint32_t		new_rkey;
20789e39c5baSBill Taylor 
20799e39c5baSBill Taylor 	/* Check for a valid Memory Window handle in the WR */
20809e39c5baSBill Taylor 	mw = (tavor_mwhdl_t)wr->wr.rc.rcwr.bind->bind_ibt_mw_hdl;
20819e39c5baSBill Taylor 	if (mw == NULL) {
20829e39c5baSBill Taylor 		return (IBT_MW_HDL_INVALID);
20839e39c5baSBill Taylor 	}
20849e39c5baSBill Taylor 
20859e39c5baSBill Taylor 	/* Check for a valid Memory Region handle in the WR */
20869e39c5baSBill Taylor 	mr = (tavor_mrhdl_t)wr->wr.rc.rcwr.bind->bind_ibt_mr_hdl;
20879e39c5baSBill Taylor 	if (mr == NULL) {
20889e39c5baSBill Taylor 		return (IBT_MR_HDL_INVALID);
20899e39c5baSBill Taylor 	}
20909e39c5baSBill Taylor 
20919e39c5baSBill Taylor 	mutex_enter(&mr->mr_lock);
20929e39c5baSBill Taylor 	mutex_enter(&mw->mr_lock);
20939e39c5baSBill Taylor 
20949e39c5baSBill Taylor 	/*
20959e39c5baSBill Taylor 	 * Check here to see if the memory region has already been partially
20969e39c5baSBill Taylor 	 * deregistered as a result of a tavor_umap_umemlock_cb() callback.
20979e39c5baSBill Taylor 	 * If so, this is an error, return failure.
20989e39c5baSBill Taylor 	 */
20999e39c5baSBill Taylor 	if ((mr->mr_is_umem) && (mr->mr_umemcookie == NULL)) {
21009e39c5baSBill Taylor 		mutex_exit(&mr->mr_lock);
21019e39c5baSBill Taylor 		mutex_exit(&mw->mr_lock);
21029e39c5baSBill Taylor 		return (IBT_MR_HDL_INVALID);
21039e39c5baSBill Taylor 	}
21049e39c5baSBill Taylor 
21059e39c5baSBill Taylor 	/* Check for a valid Memory Window RKey (i.e. a matching RKey) */
21069e39c5baSBill Taylor 	if (mw->mr_rkey != wr->wr.rc.rcwr.bind->bind_rkey) {
21079e39c5baSBill Taylor 		mutex_exit(&mr->mr_lock);
21089e39c5baSBill Taylor 		mutex_exit(&mw->mr_lock);
21099e39c5baSBill Taylor 		return (IBT_MR_RKEY_INVALID);
21109e39c5baSBill Taylor 	}
21119e39c5baSBill Taylor 
21129e39c5baSBill Taylor 	/* Check for a valid Memory Region LKey (i.e. a matching LKey) */
21139e39c5baSBill Taylor 	if (mr->mr_lkey != wr->wr.rc.rcwr.bind->bind_lkey) {
21149e39c5baSBill Taylor 		mutex_exit(&mr->mr_lock);
21159e39c5baSBill Taylor 		mutex_exit(&mw->mr_lock);
21169e39c5baSBill Taylor 		return (IBT_MR_LKEY_INVALID);
21179e39c5baSBill Taylor 	}
21189e39c5baSBill Taylor 
21199e39c5baSBill Taylor 	/*
21209e39c5baSBill Taylor 	 * Now check for valid "vaddr" and "len".  Note:  We don't check the
21219e39c5baSBill Taylor 	 * "vaddr" range when "len == 0" (i.e. on unbind operations)
21229e39c5baSBill Taylor 	 */
21239e39c5baSBill Taylor 	len = wr->wr.rc.rcwr.bind->bind_len;
21249e39c5baSBill Taylor 	if (len != 0) {
21259e39c5baSBill Taylor 		vaddr = wr->wr.rc.rcwr.bind->bind_va;
21269e39c5baSBill Taylor 		reg_start_addr = mr->mr_bindinfo.bi_addr;
21279e39c5baSBill Taylor 		reg_end_addr   = mr->mr_bindinfo.bi_addr +
21289e39c5baSBill Taylor 		    (mr->mr_bindinfo.bi_len - 1);
21299e39c5baSBill Taylor 		if ((vaddr < reg_start_addr) || (vaddr > reg_end_addr)) {
21309e39c5baSBill Taylor 			mutex_exit(&mr->mr_lock);
21319e39c5baSBill Taylor 			mutex_exit(&mw->mr_lock);
21329e39c5baSBill Taylor 			return (IBT_MR_VA_INVALID);
21339e39c5baSBill Taylor 		}
21349e39c5baSBill Taylor 		vaddr = (vaddr + len) - 1;
21359e39c5baSBill Taylor 		if (vaddr > reg_end_addr) {
21369e39c5baSBill Taylor 			mutex_exit(&mr->mr_lock);
21379e39c5baSBill Taylor 			mutex_exit(&mw->mr_lock);
21389e39c5baSBill Taylor 			return (IBT_MR_LEN_INVALID);
21399e39c5baSBill Taylor 		}
21409e39c5baSBill Taylor 	}
21419e39c5baSBill Taylor 
21429e39c5baSBill Taylor 	/*
21439e39c5baSBill Taylor 	 * Validate the bind access flags.  Remote Write and Atomic access for
21449e39c5baSBill Taylor 	 * the Memory Window require that Local Write access be set in the
21459e39c5baSBill Taylor 	 * corresponding Memory Region.
21469e39c5baSBill Taylor 	 */
21479e39c5baSBill Taylor 	bind_flags = wr->wr.rc.rcwr.bind->bind_flags;
21489e39c5baSBill Taylor 	if (((bind_flags & IBT_WR_BIND_WRITE) ||
21499e39c5baSBill Taylor 	    (bind_flags & IBT_WR_BIND_ATOMIC)) &&
21509e39c5baSBill Taylor 	    !(mr->mr_accflag & IBT_MR_LOCAL_WRITE)) {
21519e39c5baSBill Taylor 		mutex_exit(&mr->mr_lock);
21529e39c5baSBill Taylor 		mutex_exit(&mw->mr_lock);
21539e39c5baSBill Taylor 		return (IBT_MR_ACCESS_REQ_INVALID);
21549e39c5baSBill Taylor 	}
21559e39c5baSBill Taylor 
21569e39c5baSBill Taylor 	/* Calculate the new RKey for the Memory Window */
21579e39c5baSBill Taylor 	mpt = mw->mr_mptrsrcp;
21589e39c5baSBill Taylor 	tavor_mr_keycalc(state, mpt->tr_indx, &new_rkey);
21599e39c5baSBill Taylor 
21609e39c5baSBill Taylor 	wr->wr.rc.rcwr.bind->bind_rkey_out = new_rkey;
21619e39c5baSBill Taylor 	mw->mr_rkey = new_rkey;
21629e39c5baSBill Taylor 
21639e39c5baSBill Taylor 	mutex_exit(&mr->mr_lock);
21649e39c5baSBill Taylor 	mutex_exit(&mw->mr_lock);
21659e39c5baSBill Taylor 	return (DDI_SUCCESS);
21669e39c5baSBill Taylor }
21679e39c5baSBill Taylor 
21689e39c5baSBill Taylor 
21699e39c5baSBill Taylor /*
21709e39c5baSBill Taylor  * tavor_wrid_from_reset_handling()
21719e39c5baSBill Taylor  *    Context: Can be called from interrupt or base context.
21729e39c5baSBill Taylor  */
21739e39c5baSBill Taylor int
tavor_wrid_from_reset_handling(tavor_state_t * state,tavor_qphdl_t qp)21749e39c5baSBill Taylor tavor_wrid_from_reset_handling(tavor_state_t *state, tavor_qphdl_t qp)
21759e39c5baSBill Taylor {
21769e39c5baSBill Taylor 	tavor_workq_hdr_t	*swq, *rwq;
21779e39c5baSBill Taylor 	tavor_wrid_list_hdr_t	*s_wridlist, *r_wridlist;
21789e39c5baSBill Taylor 	uint_t			create_new_swq = 0, create_new_rwq = 0;
21799e39c5baSBill Taylor 	uint_t			create_wql = 0;
21809e39c5baSBill Taylor 	uint_t			qp_srq_en;
21819e39c5baSBill Taylor 
21829e39c5baSBill Taylor 	/*
21839e39c5baSBill Taylor 	 * For each of this QP's Work Queues, make sure we have a (properly
21849e39c5baSBill Taylor 	 * initialized) Work Request ID list attached to the relevant
21859e39c5baSBill Taylor 	 * completion queue.  Grab the CQ lock(s) before manipulating the
21869e39c5baSBill Taylor 	 * lists.
21879e39c5baSBill Taylor 	 */
21889e39c5baSBill Taylor 	tavor_wrid_wqhdr_lock_both(qp);
21899e39c5baSBill Taylor 	swq = tavor_wrid_wqhdr_find(qp->qp_sq_cqhdl, qp->qp_qpnum,
21909e39c5baSBill Taylor 	    TAVOR_WR_SEND);
21919e39c5baSBill Taylor 	if (swq == NULL) {
21929e39c5baSBill Taylor 		/* Couldn't find matching work queue header, create it */
21939e39c5baSBill Taylor 		create_new_swq = create_wql = 1;
21949e39c5baSBill Taylor 		swq = tavor_wrid_wqhdr_create(state, qp->qp_sq_cqhdl,
21959e39c5baSBill Taylor 		    qp->qp_qpnum, TAVOR_WR_SEND, create_wql);
21969e39c5baSBill Taylor 		if (swq == NULL) {
21979e39c5baSBill Taylor 			/*
21989e39c5baSBill Taylor 			 * If we couldn't find/allocate space for the workq
21999e39c5baSBill Taylor 			 * header, then drop the lock(s) and return failure.
22009e39c5baSBill Taylor 			 */
22019e39c5baSBill Taylor 			tavor_wrid_wqhdr_unlock_both(qp);
22029e39c5baSBill Taylor 			return (ibc_get_ci_failure(0));
22039e39c5baSBill Taylor 		}
22049e39c5baSBill Taylor 	}
22059e39c5baSBill Taylor 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*swq))
22069e39c5baSBill Taylor 	qp->qp_sq_wqhdr = swq;
22079e39c5baSBill Taylor 	swq->wq_size = qp->qp_sq_bufsz;
22089e39c5baSBill Taylor 	swq->wq_head = 0;
22099e39c5baSBill Taylor 	swq->wq_tail = 0;
22109e39c5baSBill Taylor 	swq->wq_full = 0;
22119e39c5baSBill Taylor 
22129e39c5baSBill Taylor 	/*
22139e39c5baSBill Taylor 	 * Allocate space for the tavor_wrid_entry_t container
22149e39c5baSBill Taylor 	 */
22159e39c5baSBill Taylor 	s_wridlist = tavor_wrid_get_list(swq->wq_size);
22169e39c5baSBill Taylor 	if (s_wridlist == NULL) {
22179e39c5baSBill Taylor 		/*
22189e39c5baSBill Taylor 		 * If we couldn't allocate space for tracking the WRID
22199e39c5baSBill Taylor 		 * entries, then cleanup the workq header from above (if
22209e39c5baSBill Taylor 		 * necessary, i.e. if we created the workq header).  Then
22219e39c5baSBill Taylor 		 * drop the lock(s) and return failure.
22229e39c5baSBill Taylor 		 */
22239e39c5baSBill Taylor 		if (create_new_swq) {
22249e39c5baSBill Taylor 			tavor_cq_wqhdr_remove(qp->qp_sq_cqhdl, swq);
22259e39c5baSBill Taylor 		}
22269e39c5baSBill Taylor 
22279e39c5baSBill Taylor 		tavor_wrid_wqhdr_unlock_both(qp);
22289e39c5baSBill Taylor 		return (ibc_get_ci_failure(0));
22299e39c5baSBill Taylor 	}
22309e39c5baSBill Taylor 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*s_wridlist))
22319e39c5baSBill Taylor 	s_wridlist->wl_wqhdr = swq;
22329e39c5baSBill Taylor 
22339e39c5baSBill Taylor 	/* Chain the new WRID list container to the workq hdr list */
22349e39c5baSBill Taylor 	mutex_enter(&swq->wq_wrid_wql->wql_lock);
22359e39c5baSBill Taylor 	tavor_wrid_wqhdr_add(swq, s_wridlist);
22369e39c5baSBill Taylor 	mutex_exit(&swq->wq_wrid_wql->wql_lock);
22379e39c5baSBill Taylor 
22389e39c5baSBill Taylor 	qp_srq_en = qp->qp_srq_en;
22399e39c5baSBill Taylor 
22409e39c5baSBill Taylor #ifdef __lock_lint
22419e39c5baSBill Taylor 	mutex_enter(&qp->qp_srqhdl->srq_lock);
22429e39c5baSBill Taylor #else
22439e39c5baSBill Taylor 	if (qp_srq_en == TAVOR_QP_SRQ_ENABLED) {
22449e39c5baSBill Taylor 		mutex_enter(&qp->qp_srqhdl->srq_lock);
22459e39c5baSBill Taylor 	}
22469e39c5baSBill Taylor #endif
22479e39c5baSBill Taylor 	/*
22489e39c5baSBill Taylor 	 * Now we repeat all the above operations for the receive work queue,
22499e39c5baSBill Taylor 	 * or shared receive work queue.
22509e39c5baSBill Taylor 	 *
22519e39c5baSBill Taylor 	 * Note: We still use the 'qp_rq_cqhdl' even in the SRQ case.
22529e39c5baSBill Taylor 	 */
22539e39c5baSBill Taylor 	rwq = tavor_wrid_wqhdr_find(qp->qp_rq_cqhdl, qp->qp_qpnum,
22549e39c5baSBill Taylor 	    TAVOR_WR_RECV);
22559e39c5baSBill Taylor 	if (rwq == NULL) {
22569e39c5baSBill Taylor 		create_new_rwq = create_wql = 1;
22579e39c5baSBill Taylor 
22589e39c5baSBill Taylor 		/*
22599e39c5baSBill Taylor 		 * If this QP is associated with an SRQ, and this isn't the
22609e39c5baSBill Taylor 		 * first QP on the SRQ, then the 'srq_wrid_wql' will already be
22619e39c5baSBill Taylor 		 * created.  Since the WQL is created at 'wqhdr_create' time we
22629e39c5baSBill Taylor 		 * pass in the flag 'create_wql' here to be 0 if we have
22639e39c5baSBill Taylor 		 * already created it.  And later on below we then next setup
22649e39c5baSBill Taylor 		 * the WQL and rwq information based off the existing SRQ info.
22659e39c5baSBill Taylor 		 */
22669e39c5baSBill Taylor 		if (qp_srq_en == TAVOR_QP_SRQ_ENABLED &&
22679e39c5baSBill Taylor 		    qp->qp_srqhdl->srq_wrid_wql != NULL) {
22689e39c5baSBill Taylor 			create_wql = 0;
22699e39c5baSBill Taylor 		}
22709e39c5baSBill Taylor 
22719e39c5baSBill Taylor 		rwq = tavor_wrid_wqhdr_create(state, qp->qp_rq_cqhdl,
22729e39c5baSBill Taylor 		    qp->qp_qpnum, TAVOR_WR_RECV, create_wql);
22739e39c5baSBill Taylor 		if (rwq == NULL) {
22749e39c5baSBill Taylor 			/*
22759e39c5baSBill Taylor 			 * If we couldn't find/allocate space for the workq
22769e39c5baSBill Taylor 			 * header, then free all the send queue resources we
22779e39c5baSBill Taylor 			 * just allocated and setup (above), drop the lock(s)
22789e39c5baSBill Taylor 			 * and return failure.
22799e39c5baSBill Taylor 			 */
22809e39c5baSBill Taylor 			mutex_enter(&swq->wq_wrid_wql->wql_lock);
22819e39c5baSBill Taylor 			tavor_wrid_wqhdr_remove(swq, s_wridlist);
22829e39c5baSBill Taylor 			mutex_exit(&swq->wq_wrid_wql->wql_lock);
22839e39c5baSBill Taylor 			if (create_new_swq) {
22849e39c5baSBill Taylor 				tavor_cq_wqhdr_remove(qp->qp_sq_cqhdl,
22859e39c5baSBill Taylor 				    swq);
22869e39c5baSBill Taylor 			}
22879e39c5baSBill Taylor 
22889e39c5baSBill Taylor #ifdef __lock_lint
22899e39c5baSBill Taylor 			mutex_exit(&qp->qp_srqhdl->srq_lock);
22909e39c5baSBill Taylor #else
22919e39c5baSBill Taylor 			if (qp_srq_en == TAVOR_QP_SRQ_ENABLED) {
22929e39c5baSBill Taylor 				mutex_exit(&qp->qp_srqhdl->srq_lock);
22939e39c5baSBill Taylor 			}
22949e39c5baSBill Taylor #endif
22959e39c5baSBill Taylor 
22969e39c5baSBill Taylor 			tavor_wrid_wqhdr_unlock_both(qp);
22979e39c5baSBill Taylor 			return (ibc_get_ci_failure(0));
22989e39c5baSBill Taylor 		}
22999e39c5baSBill Taylor 	}
23009e39c5baSBill Taylor 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*rwq))
23019e39c5baSBill Taylor 
23029e39c5baSBill Taylor 	/*
23039e39c5baSBill Taylor 	 * Setup receive workq hdr
23049e39c5baSBill Taylor 	 *
23059e39c5baSBill Taylor 	 * If the QP is on an SRQ, we setup the SRQ specific fields, setting
23069e39c5baSBill Taylor 	 * keeping a copy of the rwq pointer, setting the rwq bufsize
23079e39c5baSBill Taylor 	 * appropriately, and initializing our part of the WQLock.
23089e39c5baSBill Taylor 	 *
23099e39c5baSBill Taylor 	 * In the normal QP case, the QP recv queue bufsize is used.
23109e39c5baSBill Taylor 	 */
23119e39c5baSBill Taylor 	if (qp_srq_en == TAVOR_QP_SRQ_ENABLED) {
23129e39c5baSBill Taylor 		rwq->wq_size = qp->qp_srqhdl->srq_wq_bufsz;
23139e39c5baSBill Taylor 		if (qp->qp_srqhdl->srq_wrid_wql == NULL) {
23149e39c5baSBill Taylor 			qp->qp_srqhdl->srq_wrid_wql = rwq->wq_wrid_wql;
23159e39c5baSBill Taylor 		} else {
23169e39c5baSBill Taylor 			rwq->wq_wrid_wql = qp->qp_srqhdl->srq_wrid_wql;
23179e39c5baSBill Taylor 		}
23189e39c5baSBill Taylor 		tavor_wql_refcnt_inc(qp->qp_srqhdl->srq_wrid_wql);
23199e39c5baSBill Taylor 
23209e39c5baSBill Taylor 	} else {
23219e39c5baSBill Taylor 		rwq->wq_size = qp->qp_rq_bufsz;
23229e39c5baSBill Taylor 	}
23239e39c5baSBill Taylor 
23249e39c5baSBill Taylor 	qp->qp_rq_wqhdr = rwq;
23259e39c5baSBill Taylor 	rwq->wq_head = 0;
23269e39c5baSBill Taylor 	rwq->wq_tail = 0;
23279e39c5baSBill Taylor 	rwq->wq_full = 0;
23289e39c5baSBill Taylor 
23299e39c5baSBill Taylor 	/*
23309e39c5baSBill Taylor 	 * Allocate space for the tavor_wrid_entry_t container.
23319e39c5baSBill Taylor 	 *
23329e39c5baSBill Taylor 	 * If QP is on an SRQ, and the wrq_wridlist is NULL then we must
23339e39c5baSBill Taylor 	 * allocate the wridlist normally.  However, if the srq_wridlist is !=
23349e39c5baSBill Taylor 	 * NULL, then we know this SRQ has already been initialized, thus the
23359e39c5baSBill Taylor 	 * wridlist has already been initialized.  So we re-use the
23369e39c5baSBill Taylor 	 * srq_wridlist as the r_wridlist for this QP in this case.
23379e39c5baSBill Taylor 	 */
23389e39c5baSBill Taylor 	if (qp_srq_en == TAVOR_QP_SRQ_ENABLED &&
23399e39c5baSBill Taylor 	    qp->qp_srqhdl->srq_wridlist != NULL) {
23409e39c5baSBill Taylor 		/* Use existing srq_wridlist pointer */
23419e39c5baSBill Taylor 		r_wridlist = qp->qp_srqhdl->srq_wridlist;
23429e39c5baSBill Taylor 		ASSERT(r_wridlist != NULL);
23439e39c5baSBill Taylor 	} else {
23449e39c5baSBill Taylor 		/* Allocate memory for the r_wridlist */
23459e39c5baSBill Taylor 		r_wridlist = tavor_wrid_get_list(rwq->wq_size);
23469e39c5baSBill Taylor 	}
23479e39c5baSBill Taylor 
23489e39c5baSBill Taylor 	/*
23499e39c5baSBill Taylor 	 * If the memory allocation failed for r_wridlist (or the SRQ pointer
23509e39c5baSBill Taylor 	 * is mistakenly NULL), we cleanup our previous swq allocation from
23519e39c5baSBill Taylor 	 * above
23529e39c5baSBill Taylor 	 */
23539e39c5baSBill Taylor 	if (r_wridlist == NULL) {
23549e39c5baSBill Taylor 		/*
23559e39c5baSBill Taylor 		 * If we couldn't allocate space for tracking the WRID
23569e39c5baSBill Taylor 		 * entries, then cleanup all the stuff from above.  Then
23579e39c5baSBill Taylor 		 * drop the lock(s) and return failure.
23589e39c5baSBill Taylor 		 */
23599e39c5baSBill Taylor 		mutex_enter(&swq->wq_wrid_wql->wql_lock);
23609e39c5baSBill Taylor 		tavor_wrid_wqhdr_remove(swq, s_wridlist);
23619e39c5baSBill Taylor 		mutex_exit(&swq->wq_wrid_wql->wql_lock);
23629e39c5baSBill Taylor 		if (create_new_swq) {
23639e39c5baSBill Taylor 			tavor_cq_wqhdr_remove(qp->qp_sq_cqhdl, swq);
23649e39c5baSBill Taylor 		}
23659e39c5baSBill Taylor 		if (create_new_rwq) {
23669e39c5baSBill Taylor 			tavor_cq_wqhdr_remove(qp->qp_rq_cqhdl, rwq);
23679e39c5baSBill Taylor 		}
23689e39c5baSBill Taylor 
23699e39c5baSBill Taylor #ifdef __lock_lint
23709e39c5baSBill Taylor 		mutex_exit(&qp->qp_srqhdl->srq_lock);
23719e39c5baSBill Taylor #else
23729e39c5baSBill Taylor 		if (qp_srq_en == TAVOR_QP_SRQ_ENABLED) {
23739e39c5baSBill Taylor 			mutex_exit(&qp->qp_srqhdl->srq_lock);
23749e39c5baSBill Taylor 		}
23759e39c5baSBill Taylor #endif
23769e39c5baSBill Taylor 
23779e39c5baSBill Taylor 		tavor_wrid_wqhdr_unlock_both(qp);
23789e39c5baSBill Taylor 		return (ibc_get_ci_failure(0));
23799e39c5baSBill Taylor 	}
23809e39c5baSBill Taylor 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*r_wridlist))
23819e39c5baSBill Taylor 
23829e39c5baSBill Taylor 	/*
23839e39c5baSBill Taylor 	 * Initialize the wridlist
23849e39c5baSBill Taylor 	 *
23859e39c5baSBill Taylor 	 * In the normal QP case, there is no special initialization needed.
23869e39c5baSBill Taylor 	 * We simply setup the wridlist backpointer to be the receive wqhdr
23879e39c5baSBill Taylor 	 * (rwq).
23889e39c5baSBill Taylor 	 *
23899e39c5baSBill Taylor 	 * But in the SRQ case, there is no backpointer to the wqhdr possible.
23909e39c5baSBill Taylor 	 * Instead we set 'wl_srq_en', specifying this wridlist is on an SRQ
23919e39c5baSBill Taylor 	 * and thus potentially shared across multiple QPs with the SRQ.  We
23929e39c5baSBill Taylor 	 * also setup the srq_wridlist pointer to be the r_wridlist, and
23939e39c5baSBill Taylor 	 * intialize the freelist to an invalid index.  This srq_wridlist
23949e39c5baSBill Taylor 	 * pointer is used above on future moves from_reset to let us know that
23959e39c5baSBill Taylor 	 * the srq_wridlist has been initialized already.
23969e39c5baSBill Taylor 	 *
23979e39c5baSBill Taylor 	 * And finally, if we are in a non-UMAP case, we setup the srq wrid
23989e39c5baSBill Taylor 	 * free list.
23999e39c5baSBill Taylor 	 */
24009e39c5baSBill Taylor 	if (qp_srq_en == TAVOR_QP_SRQ_ENABLED &&
24019e39c5baSBill Taylor 	    qp->qp_srqhdl->srq_wridlist == NULL) {
24029e39c5baSBill Taylor 		r_wridlist->wl_srq_en = 1;
24039e39c5baSBill Taylor 		r_wridlist->wl_free_list_indx = -1;
24049e39c5baSBill Taylor 		qp->qp_srqhdl->srq_wridlist = r_wridlist;
24059e39c5baSBill Taylor 
24069e39c5baSBill Taylor 		/* Initialize srq wrid free list */
24079e39c5baSBill Taylor 		if (qp->qp_srqhdl->srq_is_umap == 0) {
24089e39c5baSBill Taylor 			mutex_enter(&rwq->wq_wrid_wql->wql_lock);
24099e39c5baSBill Taylor 			tavor_wrid_list_srq_init(r_wridlist, qp->qp_srqhdl, 0);
24109e39c5baSBill Taylor 			mutex_exit(&rwq->wq_wrid_wql->wql_lock);
24119e39c5baSBill Taylor 		}
24129e39c5baSBill Taylor 	} else {
24139e39c5baSBill Taylor 		r_wridlist->wl_wqhdr = rwq;
24149e39c5baSBill Taylor 	}
24159e39c5baSBill Taylor 
24169e39c5baSBill Taylor 	/* Chain the WRID list "container" to the workq hdr list */
24179e39c5baSBill Taylor 	mutex_enter(&rwq->wq_wrid_wql->wql_lock);
24189e39c5baSBill Taylor 	tavor_wrid_wqhdr_add(rwq, r_wridlist);
24199e39c5baSBill Taylor 	mutex_exit(&rwq->wq_wrid_wql->wql_lock);
24209e39c5baSBill Taylor 
24219e39c5baSBill Taylor #ifdef __lock_lint
24229e39c5baSBill Taylor 	mutex_exit(&qp->qp_srqhdl->srq_lock);
24239e39c5baSBill Taylor #else
24249e39c5baSBill Taylor 	if (qp_srq_en == TAVOR_QP_SRQ_ENABLED) {
24259e39c5baSBill Taylor 		mutex_exit(&qp->qp_srqhdl->srq_lock);
24269e39c5baSBill Taylor 	}
24279e39c5baSBill Taylor #endif
24289e39c5baSBill Taylor 
24299e39c5baSBill Taylor 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*r_wridlist))
24309e39c5baSBill Taylor 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*rwq))
24319e39c5baSBill Taylor 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*s_wridlist))
24329e39c5baSBill Taylor 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*swq))
24339e39c5baSBill Taylor 
24349e39c5baSBill Taylor 	tavor_wrid_wqhdr_unlock_both(qp);
24359e39c5baSBill Taylor 	return (DDI_SUCCESS);
24369e39c5baSBill Taylor }
24379e39c5baSBill Taylor 
24389e39c5baSBill Taylor 
24399e39c5baSBill Taylor /*
24409e39c5baSBill Taylor  * tavor_wrid_to_reset_handling()
24419e39c5baSBill Taylor  *    Context: Can be called from interrupt or base context.
24429e39c5baSBill Taylor  */
24439e39c5baSBill Taylor void
tavor_wrid_to_reset_handling(tavor_state_t * state,tavor_qphdl_t qp)24449e39c5baSBill Taylor tavor_wrid_to_reset_handling(tavor_state_t *state, tavor_qphdl_t qp)
24459e39c5baSBill Taylor {
24469e39c5baSBill Taylor 	uint_t		free_wqhdr = 0;
24479e39c5baSBill Taylor 
24489e39c5baSBill Taylor 	/*
24499e39c5baSBill Taylor 	 * For each of this QP's Work Queues, move the WRID "container" to
24509e39c5baSBill Taylor 	 * the "reapable" list.  Although there may still be unpolled
24519e39c5baSBill Taylor 	 * entries in these containers, it is not a big deal.  We will not
24529e39c5baSBill Taylor 	 * reap the list until either the Poll CQ command detects an empty
24539e39c5baSBill Taylor 	 * condition or the CQ itself is freed.  Grab the CQ lock(s) before
24549e39c5baSBill Taylor 	 * manipulating the lists.
24559e39c5baSBill Taylor 	 */
24569e39c5baSBill Taylor 	mutex_enter(&qp->qp_rq_cqhdl->cq_lock);
24579e39c5baSBill Taylor 	tavor_wrid_wqhdr_lock_both(qp);
24589e39c5baSBill Taylor 	tavor_wrid_reaplist_add(qp->qp_sq_cqhdl, qp->qp_sq_wqhdr);
24599e39c5baSBill Taylor 
24609e39c5baSBill Taylor 	/*
24619e39c5baSBill Taylor 	 * Add the receive work queue header on to the reaplist.  But if we are
24629e39c5baSBill Taylor 	 * on SRQ, then don't add anything to the reaplist.  Instead we flush
24639e39c5baSBill Taylor 	 * the SRQ entries on the CQ, remove wridlist from WQHDR, and free the
24649e39c5baSBill Taylor 	 * WQHDR (if needed).  We must hold the WQL for these operations, yet
24659e39c5baSBill Taylor 	 * the call to tavor_cq_wqhdr_remove grabs the WQL internally.  So we
24669e39c5baSBill Taylor 	 * drop WQL before that call.  Then release the CQ WQHDR locks and the
24679e39c5baSBill Taylor 	 * CQ lock and return.
24689e39c5baSBill Taylor 	 */
24699e39c5baSBill Taylor 	if (qp->qp_srq_en == TAVOR_QP_SRQ_ENABLED) {
24709e39c5baSBill Taylor 
24719e39c5baSBill Taylor 		/*
24729e39c5baSBill Taylor 		 * Pull off all (if any) entries for this QP from CQ.  This
24739e39c5baSBill Taylor 		 * only includes entries that have not yet been polled
24749e39c5baSBill Taylor 		 */
24759e39c5baSBill Taylor 		mutex_enter(&qp->qp_rq_wqhdr->wq_wrid_wql->wql_lock);
24769e39c5baSBill Taylor 		tavor_cq_srq_entries_flush(state, qp);
24779e39c5baSBill Taylor 
24789e39c5baSBill Taylor 		/* Remove wridlist from WQHDR */
24799e39c5baSBill Taylor 		tavor_wrid_wqhdr_remove(qp->qp_rq_wqhdr,
24809e39c5baSBill Taylor 		    qp->qp_rq_wqhdr->wq_wrid_post);
24819e39c5baSBill Taylor 
24829e39c5baSBill Taylor 		/* If wridlist chain is now empty, remove the wqhdr as well */
24839e39c5baSBill Taylor 		if (qp->qp_rq_wqhdr->wq_wrid_post == NULL) {
24849e39c5baSBill Taylor 			free_wqhdr = 1;
24859e39c5baSBill Taylor 		} else {
24869e39c5baSBill Taylor 			free_wqhdr = 0;
24879e39c5baSBill Taylor 		}
24889e39c5baSBill Taylor 
24899e39c5baSBill Taylor 		mutex_exit(&qp->qp_rq_wqhdr->wq_wrid_wql->wql_lock);
24909e39c5baSBill Taylor 
24919e39c5baSBill Taylor 		/* Free the WQHDR */
24929e39c5baSBill Taylor 		if (free_wqhdr) {
24939e39c5baSBill Taylor 			tavor_cq_wqhdr_remove(qp->qp_rq_cqhdl, qp->qp_rq_wqhdr);
24949e39c5baSBill Taylor 		}
24959e39c5baSBill Taylor 	} else {
24969e39c5baSBill Taylor 		tavor_wrid_reaplist_add(qp->qp_rq_cqhdl, qp->qp_rq_wqhdr);
24979e39c5baSBill Taylor 	}
24989e39c5baSBill Taylor 	tavor_wrid_wqhdr_unlock_both(qp);
24999e39c5baSBill Taylor 	mutex_exit(&qp->qp_rq_cqhdl->cq_lock);
25009e39c5baSBill Taylor }
25019e39c5baSBill Taylor 
25029e39c5baSBill Taylor 
25039e39c5baSBill Taylor /*
25049e39c5baSBill Taylor  * tavor_wrid_add_entry()
25059e39c5baSBill Taylor  *    Context: Can be called from interrupt or base context.
25069e39c5baSBill Taylor  */
25079e39c5baSBill Taylor void
tavor_wrid_add_entry(tavor_workq_hdr_t * wq,uint64_t wrid,uint32_t wqeaddrsz,uint_t signaled_dbd)25089e39c5baSBill Taylor tavor_wrid_add_entry(tavor_workq_hdr_t *wq, uint64_t wrid, uint32_t wqeaddrsz,
25099e39c5baSBill Taylor     uint_t signaled_dbd)
25109e39c5baSBill Taylor {
25119e39c5baSBill Taylor 	tavor_wrid_entry_t	*wre_tmp;
25129e39c5baSBill Taylor 	uint32_t		head, tail, size;
25139e39c5baSBill Taylor 
25149e39c5baSBill Taylor 	ASSERT(MUTEX_HELD(&wq->wq_wrid_wql->wql_lock));
25159e39c5baSBill Taylor 
25169e39c5baSBill Taylor 	/*
25179e39c5baSBill Taylor 	 * Find the entry in the container pointed to by the "tail" index.
25189e39c5baSBill Taylor 	 * Add all of the relevant information to that entry, including WRID,
25199e39c5baSBill Taylor 	 * "wqeaddrsz" parameter, and whether it was signaled/unsignaled
25209e39c5baSBill Taylor 	 * and/or doorbelled.
25219e39c5baSBill Taylor 	 */
25229e39c5baSBill Taylor 	head = wq->wq_wrid_post->wl_head;
25239e39c5baSBill Taylor 	tail = wq->wq_wrid_post->wl_tail;
25249e39c5baSBill Taylor 	size = wq->wq_wrid_post->wl_size;
25259e39c5baSBill Taylor 	wre_tmp = &wq->wq_wrid_post->wl_wre[tail];
25269e39c5baSBill Taylor 	wre_tmp->wr_wrid	  = wrid;
25279e39c5baSBill Taylor 	wre_tmp->wr_wqeaddrsz	  = wqeaddrsz;
25289e39c5baSBill Taylor 	wre_tmp->wr_signaled_dbd  = signaled_dbd;
25299e39c5baSBill Taylor 
25309e39c5baSBill Taylor 	/*
25319e39c5baSBill Taylor 	 * Update the "wrid_old_tail" pointer to point to the entry we just
25329e39c5baSBill Taylor 	 * inserted into the queue.  By tracking this pointer (the pointer to
25339e39c5baSBill Taylor 	 * the most recently inserted entry) it will possible later in the
25349e39c5baSBill Taylor 	 * PostSend() and PostRecv() code paths to find the entry that needs
25359e39c5baSBill Taylor 	 * its "doorbelled" flag set (see comment in tavor_post_recv() and/or
25369e39c5baSBill Taylor 	 * tavor_post_send()).
25379e39c5baSBill Taylor 	 */
25389e39c5baSBill Taylor 	wq->wq_wrid_post->wl_wre_old_tail = wre_tmp;
25399e39c5baSBill Taylor 
25409e39c5baSBill Taylor 	/* Update the tail index */
25419e39c5baSBill Taylor 	tail = ((tail + 1) & (size - 1));
25429e39c5baSBill Taylor 	wq->wq_wrid_post->wl_tail = tail;
25439e39c5baSBill Taylor 
25449e39c5baSBill Taylor 	/*
25459e39c5baSBill Taylor 	 * If the "tail" index has just wrapped over into the "head" index,
25469e39c5baSBill Taylor 	 * then we have filled the container.  We use the "full" flag to
25479e39c5baSBill Taylor 	 * indicate this condition and to distinguish it from the "empty"
25489e39c5baSBill Taylor 	 * condition (where head and tail are also equal).
25499e39c5baSBill Taylor 	 */
25509e39c5baSBill Taylor 	if (head == tail) {
25519e39c5baSBill Taylor 		wq->wq_wrid_post->wl_full = 1;
25529e39c5baSBill Taylor 	}
25539e39c5baSBill Taylor }
25549e39c5baSBill Taylor 
25559e39c5baSBill Taylor /*
25569e39c5baSBill Taylor  * tavor_wrid_add_entry_srq()
25579e39c5baSBill Taylor  * Context: Can be called from interrupt or base context
25589e39c5baSBill Taylor  */
25599e39c5baSBill Taylor void
tavor_wrid_add_entry_srq(tavor_srqhdl_t srq,uint64_t wrid,uint_t signaled_dbd)25609e39c5baSBill Taylor tavor_wrid_add_entry_srq(tavor_srqhdl_t srq, uint64_t wrid, uint_t signaled_dbd)
25619e39c5baSBill Taylor {
25629e39c5baSBill Taylor 	tavor_wrid_entry_t	*wre;
25639e39c5baSBill Taylor 	uint64_t		*wl_wqe;
25649e39c5baSBill Taylor 	uint32_t		wqe_index;
25659e39c5baSBill Taylor 
25669e39c5baSBill Taylor 	/*
25679e39c5baSBill Taylor 	 * Find the next available WQE from the SRQ free_list.  Then update the
25689e39c5baSBill Taylor 	 * free_list to point to the next entry
25699e39c5baSBill Taylor 	 */
25709e39c5baSBill Taylor 	wl_wqe = TAVOR_SRQ_WQE_ADDR(srq, srq->srq_wridlist->wl_free_list_indx);
25719e39c5baSBill Taylor 
25729e39c5baSBill Taylor 	wqe_index = srq->srq_wridlist->wl_free_list_indx;
25739e39c5baSBill Taylor 
25749e39c5baSBill Taylor 	/* ASSERT on impossible wqe_index values */
25759e39c5baSBill Taylor 	ASSERT(wqe_index < srq->srq_wq_bufsz);
25769e39c5baSBill Taylor 
25779e39c5baSBill Taylor 	/*
25789e39c5baSBill Taylor 	 * Setup the WRE.
25799e39c5baSBill Taylor 	 *
25809e39c5baSBill Taylor 	 * Given the 'wqe_index' value, we store the WRID at this WRE offset.
25819e39c5baSBill Taylor 	 * And we set the WRE to be signaled_dbd so that on poll CQ we can find
25829e39c5baSBill Taylor 	 * this information and associate the WRID to the WQE found on the CQE.
25839e39c5baSBill Taylor 	 */
25849e39c5baSBill Taylor 	wre = &srq->srq_wridlist->wl_wre[wqe_index];
25859e39c5baSBill Taylor 	wre->wr_wrid = wrid;
25869e39c5baSBill Taylor 	wre->wr_signaled_dbd  = signaled_dbd;
25879e39c5baSBill Taylor 
25889e39c5baSBill Taylor 	/* Update the free list index */
25899e39c5baSBill Taylor 	srq->srq_wridlist->wl_free_list_indx = ddi_get32(
25909e39c5baSBill Taylor 	    srq->srq_wridlist->wl_acchdl, (uint32_t *)wl_wqe);
25919e39c5baSBill Taylor }
25929e39c5baSBill Taylor 
25939e39c5baSBill Taylor 
25949e39c5baSBill Taylor /*
25959e39c5baSBill Taylor  * tavor_wrid_get_entry()
25969e39c5baSBill Taylor  *    Context: Can be called from interrupt or base context.
25979e39c5baSBill Taylor  */
25989e39c5baSBill Taylor uint64_t
tavor_wrid_get_entry(tavor_cqhdl_t cq,tavor_hw_cqe_t * cqe,tavor_wrid_entry_t * wre)25999e39c5baSBill Taylor tavor_wrid_get_entry(tavor_cqhdl_t cq, tavor_hw_cqe_t *cqe,
26009e39c5baSBill Taylor     tavor_wrid_entry_t *wre)
26019e39c5baSBill Taylor {
26029e39c5baSBill Taylor 	tavor_workq_hdr_t	*wq;
26039e39c5baSBill Taylor 	tavor_wrid_entry_t	*wre_tmp;
26049e39c5baSBill Taylor 	uint64_t		wrid;
26059e39c5baSBill Taylor 	uint_t			send_or_recv, qpnum, error, opcode;
26069e39c5baSBill Taylor 
26079e39c5baSBill Taylor 	/* Lock the list of work queues associated with this CQ */
26089e39c5baSBill Taylor 	mutex_enter(&cq->cq_wrid_wqhdr_lock);
26099e39c5baSBill Taylor 
26109e39c5baSBill Taylor 	/*
26119e39c5baSBill Taylor 	 * Determine whether this CQE is a send or receive completion (and
26129e39c5baSBill Taylor 	 * whether it was a "successful" completion or not)
26139e39c5baSBill Taylor 	 */
26149e39c5baSBill Taylor 	opcode = TAVOR_CQE_OPCODE_GET(cq, cqe);
26159e39c5baSBill Taylor 	if ((opcode == TAVOR_CQE_SEND_ERR_OPCODE) ||
26169e39c5baSBill Taylor 	    (opcode == TAVOR_CQE_RECV_ERR_OPCODE)) {
26179e39c5baSBill Taylor 		error = 1;
26189e39c5baSBill Taylor 		send_or_recv = (opcode == TAVOR_CQE_SEND_ERR_OPCODE) ?
26199e39c5baSBill Taylor 		    TAVOR_COMPLETION_SEND : TAVOR_COMPLETION_RECV;
26209e39c5baSBill Taylor 	} else {
26219e39c5baSBill Taylor 		error = 0;
26229e39c5baSBill Taylor 		send_or_recv = TAVOR_CQE_SENDRECV_GET(cq, cqe);
26239e39c5baSBill Taylor 	}
26249e39c5baSBill Taylor 
26259e39c5baSBill Taylor 	/* Find the work queue for this QP number (send or receive side) */
26269e39c5baSBill Taylor 	qpnum = TAVOR_CQE_QPNUM_GET(cq, cqe);
26279e39c5baSBill Taylor 	wq = tavor_wrid_wqhdr_find(cq, qpnum, send_or_recv);
26289e39c5baSBill Taylor 	ASSERT(wq != NULL);
26299e39c5baSBill Taylor 
26309e39c5baSBill Taylor 	/*
26319e39c5baSBill Taylor 	 * Regardless of whether the completion is the result of a "success"
26329e39c5baSBill Taylor 	 * or a "failure", we lock the list of "containers" and attempt to
26339e39c5baSBill Taylor 	 * search for the the first matching completion (i.e. the first WR
26349e39c5baSBill Taylor 	 * with a matching WQE addr and size).  Once we find it, we pull out
26359e39c5baSBill Taylor 	 * the "wrid" field and return it (see below).  Note: One possible
26369e39c5baSBill Taylor 	 * future enhancement would be to enable this routine to skip over
26379e39c5baSBill Taylor 	 * any "unsignaled" completions to go directly to the next "signaled"
26389e39c5baSBill Taylor 	 * entry on success. XXX
26399e39c5baSBill Taylor 	 */
26409e39c5baSBill Taylor 	mutex_enter(&wq->wq_wrid_wql->wql_lock);
26419e39c5baSBill Taylor 	wre_tmp = tavor_wrid_find_match(wq, cq, cqe);
26429e39c5baSBill Taylor 
26439e39c5baSBill Taylor 	/*
26449e39c5baSBill Taylor 	 * If this is a "successful" completion, then we assert that this
26459e39c5baSBill Taylor 	 * completion must be a "signaled" completion.
26469e39c5baSBill Taylor 	 */
26479e39c5baSBill Taylor 	ASSERT(error || (wre_tmp->wr_signaled_dbd & TAVOR_WRID_ENTRY_SIGNALED));
26489e39c5baSBill Taylor 
26499e39c5baSBill Taylor 	/*
26509e39c5baSBill Taylor 	 * If the completion is a "failed" completion, then we save away the
26519e39c5baSBill Taylor 	 * contents of the entry (into the "wre" field passed in) for use
26529e39c5baSBill Taylor 	 * in later CQE processing. Note: We use the tavor_wrid_get_wqeaddrsz()
26539e39c5baSBill Taylor 	 * function to grab "wqeaddrsz" from the next entry in the container.
26549e39c5baSBill Taylor 	 * This is required for error processing (where updating these fields
26559e39c5baSBill Taylor 	 * properly is necessary to correct handling of the "error" CQE)
26569e39c5baSBill Taylor 	 */
26579e39c5baSBill Taylor 	if (error && (wre != NULL)) {
26589e39c5baSBill Taylor 		*wre = *wre_tmp;
26599e39c5baSBill Taylor 		wre->wr_wqeaddrsz = tavor_wrid_get_wqeaddrsz(wq);
26609e39c5baSBill Taylor 	}
26619e39c5baSBill Taylor 
26629e39c5baSBill Taylor 	/* Pull out the WRID and return it */
26639e39c5baSBill Taylor 	wrid = wre_tmp->wr_wrid;
26649e39c5baSBill Taylor 
26659e39c5baSBill Taylor 	mutex_exit(&wq->wq_wrid_wql->wql_lock);
26669e39c5baSBill Taylor 	mutex_exit(&cq->cq_wrid_wqhdr_lock);
26679e39c5baSBill Taylor 
26689e39c5baSBill Taylor 	return (wrid);
26699e39c5baSBill Taylor }
26709e39c5baSBill Taylor 
26719e39c5baSBill Taylor 
26729e39c5baSBill Taylor /*
26739e39c5baSBill Taylor  * tavor_wrid_find_match()
26749e39c5baSBill Taylor  *    Context: Can be called from interrupt or base context.
26759e39c5baSBill Taylor  */
26769e39c5baSBill Taylor static tavor_wrid_entry_t *
tavor_wrid_find_match(tavor_workq_hdr_t * wq,tavor_cqhdl_t cq,tavor_hw_cqe_t * cqe)26779e39c5baSBill Taylor tavor_wrid_find_match(tavor_workq_hdr_t *wq, tavor_cqhdl_t cq,
26789e39c5baSBill Taylor     tavor_hw_cqe_t *cqe)
26799e39c5baSBill Taylor {
26809e39c5baSBill Taylor 	tavor_wrid_entry_t	*curr = NULL;
26819e39c5baSBill Taylor 	tavor_wrid_list_hdr_t	*container;
26829e39c5baSBill Taylor 	uint32_t		wqeaddr_size;
26839e39c5baSBill Taylor 	uint32_t		head, tail, size;
26849e39c5baSBill Taylor 	int			found = 0, last_container;
26859e39c5baSBill Taylor 
26869e39c5baSBill Taylor 	ASSERT(MUTEX_HELD(&wq->wq_wrid_wql->wql_lock));
26879e39c5baSBill Taylor 
26889e39c5baSBill Taylor 	/* Pull the "wqeaddrsz" information from the CQE */
26899e39c5baSBill Taylor 	wqeaddr_size = TAVOR_CQE_WQEADDRSZ_GET(cq, cqe);
26909e39c5baSBill Taylor 
26919e39c5baSBill Taylor 	/*
26929e39c5baSBill Taylor 	 * Walk the "containers" list(s), find first WR with a matching WQE
26939e39c5baSBill Taylor 	 * addr.  If the current "container" is not the last one on the list,
26949e39c5baSBill Taylor 	 * i.e. not the current one to which we are posting new WRID entries,
26959e39c5baSBill Taylor 	 * then we do not attempt to update the "q_head", "q_tail", and
26969e39c5baSBill Taylor 	 * "q_full" indicators on the main work queue header.  We do, however,
26979e39c5baSBill Taylor 	 * update the "head" and "full" indicators on the individual containers
26989e39c5baSBill Taylor 	 * as we go.  This is imperative because we need to be able to
26999e39c5baSBill Taylor 	 * determine when the current container has been emptied (so that we
27009e39c5baSBill Taylor 	 * can move on to the next container).
27019e39c5baSBill Taylor 	 */
27029e39c5baSBill Taylor 	container = wq->wq_wrid_poll;
27039e39c5baSBill Taylor 	while (container != NULL) {
27049e39c5baSBill Taylor 		/* Is this the last/only "container" on the list */
27059e39c5baSBill Taylor 		last_container = (container != wq->wq_wrid_post) ? 0 : 1;
27069e39c5baSBill Taylor 
27079e39c5baSBill Taylor 		/*
27089e39c5baSBill Taylor 		 * First check if we are on an SRQ.  If so, we grab the entry
27099e39c5baSBill Taylor 		 * and break out.  Since SRQ wridlist's are never added to
27109e39c5baSBill Taylor 		 * reaplist, they can only be the last container.
27119e39c5baSBill Taylor 		 */
27129e39c5baSBill Taylor 		if (container->wl_srq_en) {
27139e39c5baSBill Taylor 			ASSERT(last_container == 1);
27149e39c5baSBill Taylor 			curr = tavor_wrid_find_match_srq(container, cq, cqe);
27159e39c5baSBill Taylor 			break;
27169e39c5baSBill Taylor 		}
27179e39c5baSBill Taylor 
27189e39c5baSBill Taylor 		/*
27199e39c5baSBill Taylor 		 * Grab the current "head", "tail" and "size" fields before
27209e39c5baSBill Taylor 		 * walking the list in the current container. Note: the "size"
27219e39c5baSBill Taylor 		 * field here must always be a power-of-2.  The "full"
27229e39c5baSBill Taylor 		 * parameter is checked (and updated) here to distinguish the
27239e39c5baSBill Taylor 		 * "queue full" condition from "queue empty".
27249e39c5baSBill Taylor 		 */
27259e39c5baSBill Taylor 		head = container->wl_head;
27269e39c5baSBill Taylor 		tail = container->wl_tail;
27279e39c5baSBill Taylor 		size = container->wl_size;
27289e39c5baSBill Taylor 		while ((head != tail) || (container->wl_full)) {
27299e39c5baSBill Taylor 			container->wl_full = 0;
27309e39c5baSBill Taylor 			curr = &container->wl_wre[head];
27319e39c5baSBill Taylor 			head = ((head + 1) & (size - 1));
27329e39c5baSBill Taylor 
27339e39c5baSBill Taylor 			/*
27349e39c5baSBill Taylor 			 * If the current entry's "wqeaddrsz" matches the one
27359e39c5baSBill Taylor 			 * we're searching for, then this must correspond to
27369e39c5baSBill Taylor 			 * the work request that caused the completion.  Set
27379e39c5baSBill Taylor 			 * the "found" flag and bail out.
27389e39c5baSBill Taylor 			 */
27399e39c5baSBill Taylor 			if (curr->wr_wqeaddrsz == wqeaddr_size) {
27409e39c5baSBill Taylor 				found = 1;
27419e39c5baSBill Taylor 				break;
27429e39c5baSBill Taylor 			}
27439e39c5baSBill Taylor 		}
27449e39c5baSBill Taylor 
27459e39c5baSBill Taylor 		/*
27469e39c5baSBill Taylor 		 * If the current container is empty (having reached here the
27479e39c5baSBill Taylor 		 * "head == tail" condition can only mean that the container
27489e39c5baSBill Taylor 		 * is empty), then NULL out the "wrid_old_tail" field (see
27499e39c5baSBill Taylor 		 * tavor_post_send() and tavor_post_recv() for more details)
27509e39c5baSBill Taylor 		 * and (potentially) remove the current container from future
27519e39c5baSBill Taylor 		 * searches.
27529e39c5baSBill Taylor 		 */
27539e39c5baSBill Taylor 		if (head == tail) {
27549e39c5baSBill Taylor 
27559e39c5baSBill Taylor 			container->wl_wre_old_tail = NULL;
27569e39c5baSBill Taylor 			/*
27579e39c5baSBill Taylor 			 * If this wasn't the last "container" on the chain,
27589e39c5baSBill Taylor 			 * i.e. the one to which new WRID entries will be
27599e39c5baSBill Taylor 			 * added, then remove it from the list.
27609e39c5baSBill Taylor 			 * Note: we don't "lose" the memory pointed to by this
27619e39c5baSBill Taylor 			 * because we should have already put this container
27629e39c5baSBill Taylor 			 * on the "reapable" list (from where it will later be
27639e39c5baSBill Taylor 			 * pulled).
27649e39c5baSBill Taylor 			 */
27659e39c5baSBill Taylor 			if (!last_container) {
27669e39c5baSBill Taylor 				wq->wq_wrid_poll = container->wl_next;
27679e39c5baSBill Taylor 			}
27689e39c5baSBill Taylor 		}
27699e39c5baSBill Taylor 
27709e39c5baSBill Taylor 		/* Update the head index for the container */
27719e39c5baSBill Taylor 		container->wl_head = head;
27729e39c5baSBill Taylor 
27739e39c5baSBill Taylor 		/*
27749e39c5baSBill Taylor 		 * If the entry was found in this container, then continue to
27759e39c5baSBill Taylor 		 * bail out.  Else reset the "curr" pointer and move on to the
27769e39c5baSBill Taylor 		 * next container (if there is one).  Note: the only real
27779e39c5baSBill Taylor 		 * reason for setting "curr = NULL" here is so that the ASSERT
27789e39c5baSBill Taylor 		 * below can catch the case where no matching entry was found
27799e39c5baSBill Taylor 		 * on any of the lists.
27809e39c5baSBill Taylor 		 */
27819e39c5baSBill Taylor 		if (found) {
27829e39c5baSBill Taylor 			break;
27839e39c5baSBill Taylor 		} else {
27849e39c5baSBill Taylor 			curr = NULL;
27859e39c5baSBill Taylor 			container = container->wl_next;
27869e39c5baSBill Taylor 		}
27879e39c5baSBill Taylor 	}
27889e39c5baSBill Taylor 
27899e39c5baSBill Taylor 	/*
27909e39c5baSBill Taylor 	 * Update work queue header's "head" and "full" conditions to match
27919e39c5baSBill Taylor 	 * the last entry on the container list.  (Note: Only if we're pulling
27929e39c5baSBill Taylor 	 * entries from the last work queue portion of the list, i.e. not from
27939e39c5baSBill Taylor 	 * the previous portions that may be the "reapable" list.)
27949e39c5baSBill Taylor 	 */
27959e39c5baSBill Taylor 	if (last_container) {
27969e39c5baSBill Taylor 		wq->wq_head = wq->wq_wrid_post->wl_head;
27979e39c5baSBill Taylor 		wq->wq_full = wq->wq_wrid_post->wl_full;
27989e39c5baSBill Taylor 	}
27999e39c5baSBill Taylor 
28009e39c5baSBill Taylor 	/* Ensure that we've actually found what we were searching for */
28019e39c5baSBill Taylor 	ASSERT(curr != NULL);
28029e39c5baSBill Taylor 
28039e39c5baSBill Taylor 	return (curr);
28049e39c5baSBill Taylor }
28059e39c5baSBill Taylor 
28069e39c5baSBill Taylor 
28079e39c5baSBill Taylor /*
28089e39c5baSBill Taylor  * tavor_wrid_find_match_srq()
28099e39c5baSBill Taylor  *    Context: Can be called from interrupt or base context.
28109e39c5baSBill Taylor  */
28119e39c5baSBill Taylor tavor_wrid_entry_t *
tavor_wrid_find_match_srq(tavor_wrid_list_hdr_t * wl,tavor_cqhdl_t cq,tavor_hw_cqe_t * cqe)28129e39c5baSBill Taylor tavor_wrid_find_match_srq(tavor_wrid_list_hdr_t *wl, tavor_cqhdl_t cq,
28139e39c5baSBill Taylor     tavor_hw_cqe_t *cqe)
28149e39c5baSBill Taylor {
28159e39c5baSBill Taylor 	tavor_wrid_entry_t	*wre;
28169e39c5baSBill Taylor 	uint64_t		*wl_wqe;
28179e39c5baSBill Taylor 	uint32_t		wqe_index;
28189e39c5baSBill Taylor 	uint64_t		wqe_addr;
28199e39c5baSBill Taylor 	uint32_t		cqe_wqe_addr;
28209e39c5baSBill Taylor 
28219e39c5baSBill Taylor 	/* Grab the WQE addr out of the CQE */
28229e39c5baSBill Taylor 	cqe_wqe_addr = TAVOR_CQE_WQEADDRSZ_GET(cq, cqe) & 0xFFFFFFC0;
28239e39c5baSBill Taylor 
28249e39c5baSBill Taylor 	/*
28259e39c5baSBill Taylor 	 * Use the WQE addr as the lower 32-bit, we add back on the
28269e39c5baSBill Taylor 	 * 'wl_srq_desc_off' because we have a zero-based queue.  Then the
28279e39c5baSBill Taylor 	 * upper 32-bit of the 'wl_srq_wq_buf' OR'd on gives us the WQE addr in
28289e39c5baSBill Taylor 	 * the SRQ Work Queue itself.  We use this address as the index to find
28299e39c5baSBill Taylor 	 * out which Work Queue Entry this CQE corresponds with.
28309e39c5baSBill Taylor 	 *
28319e39c5baSBill Taylor 	 * We also use this address below to add the WQE back on to the free
28329e39c5baSBill Taylor 	 * list.
28339e39c5baSBill Taylor 	 */
28349e39c5baSBill Taylor 	wqe_addr = ((uintptr_t)wl->wl_srq_wq_buf & 0xFFFFFFFF00000000ull) |
28359e39c5baSBill Taylor 	    (cqe_wqe_addr + wl->wl_srq_desc_off);
28369e39c5baSBill Taylor 
28379e39c5baSBill Taylor 	/*
28389e39c5baSBill Taylor 	 * Given the 'wqe_addr' just calculated and the srq buf address, we
28399e39c5baSBill Taylor 	 * find the 'wqe_index'.  The 'wre' returned below contains the WRID
28409e39c5baSBill Taylor 	 * that we are looking for.  This indexes into the wre_list for this
28419e39c5baSBill Taylor 	 * specific WQE.
28429e39c5baSBill Taylor 	 */
28439e39c5baSBill Taylor 	wqe_index = TAVOR_SRQ_WQE_INDEX(wl->wl_srq_wq_buf, wqe_addr,
28449e39c5baSBill Taylor 	    wl->wl_srq_log_wqesz);
28459e39c5baSBill Taylor 
28469e39c5baSBill Taylor 	/* ASSERT on impossible wqe_index values */
28479e39c5baSBill Taylor 	ASSERT(wqe_index < wl->wl_srq_wq_bufsz);
28489e39c5baSBill Taylor 
28499e39c5baSBill Taylor 	/* Get the pointer to this WQE */
28509e39c5baSBill Taylor 	wl_wqe = (uint64_t *)(uintptr_t)wqe_addr;
28519e39c5baSBill Taylor 
28529e39c5baSBill Taylor 	/* Put this WQE index back on the free list */
28539e39c5baSBill Taylor 	ddi_put32(wl->wl_acchdl, (uint32_t *)wl_wqe, wl->wl_free_list_indx);
28549e39c5baSBill Taylor 	wl->wl_free_list_indx = wqe_index;
28559e39c5baSBill Taylor 
28569e39c5baSBill Taylor 	/* Using the index, return the Work Request ID Entry (wre) */
28579e39c5baSBill Taylor 	wre = &wl->wl_wre[wqe_index];
28589e39c5baSBill Taylor 
28599e39c5baSBill Taylor 	return (wre);
28609e39c5baSBill Taylor }
28619e39c5baSBill Taylor 
28629e39c5baSBill Taylor 
28639e39c5baSBill Taylor /*
28649e39c5baSBill Taylor  * tavor_wrid_cq_reap()
28659e39c5baSBill Taylor  *    Context: Can be called from interrupt or base context.
28669e39c5baSBill Taylor  */
28679e39c5baSBill Taylor void
tavor_wrid_cq_reap(tavor_cqhdl_t cq)28689e39c5baSBill Taylor tavor_wrid_cq_reap(tavor_cqhdl_t cq)
28699e39c5baSBill Taylor {
28709e39c5baSBill Taylor 	tavor_workq_hdr_t	*consume_wqhdr;
28719e39c5baSBill Taylor 	tavor_wrid_list_hdr_t	*container, *to_free;
28729e39c5baSBill Taylor 
28739e39c5baSBill Taylor 	ASSERT(MUTEX_HELD(&cq->cq_lock));
28749e39c5baSBill Taylor 
28759e39c5baSBill Taylor 	/* Lock the list of work queues associated with this CQ */
28769e39c5baSBill Taylor 	mutex_enter(&cq->cq_wrid_wqhdr_lock);
28779e39c5baSBill Taylor 
28789e39c5baSBill Taylor 	/* Walk the "reapable" list and free up containers */
28799e39c5baSBill Taylor 	container = cq->cq_wrid_reap_head;
28809e39c5baSBill Taylor 	while (container != NULL) {
28819e39c5baSBill Taylor 		to_free	  = container;
28829e39c5baSBill Taylor 		container = container->wl_reap_next;
28839e39c5baSBill Taylor 		/*
28849e39c5baSBill Taylor 		 * If reaping the WRID list containers pulls the last
28859e39c5baSBill Taylor 		 * container from the given work queue header, then we free
28869e39c5baSBill Taylor 		 * the work queue header as well.
28879e39c5baSBill Taylor 		 */
28889e39c5baSBill Taylor 		consume_wqhdr = tavor_wrid_list_reap(to_free);
28899e39c5baSBill Taylor 		if (consume_wqhdr != NULL) {
28909e39c5baSBill Taylor 			tavor_cq_wqhdr_remove(cq, consume_wqhdr);
28919e39c5baSBill Taylor 		}
28929e39c5baSBill Taylor 	}
28939e39c5baSBill Taylor 
28949e39c5baSBill Taylor 	/* Once finished reaping, we reset the CQ's reap list */
28959e39c5baSBill Taylor 	cq->cq_wrid_reap_head = cq->cq_wrid_reap_tail = NULL;
28969e39c5baSBill Taylor 
28979e39c5baSBill Taylor 	mutex_exit(&cq->cq_wrid_wqhdr_lock);
28989e39c5baSBill Taylor }
28999e39c5baSBill Taylor 
29009e39c5baSBill Taylor 
29019e39c5baSBill Taylor /*
29029e39c5baSBill Taylor  * tavor_wrid_cq_force_reap()
29039e39c5baSBill Taylor  *    Context: Can be called from interrupt or base context.
29049e39c5baSBill Taylor  */
29059e39c5baSBill Taylor void
tavor_wrid_cq_force_reap(tavor_cqhdl_t cq)29069e39c5baSBill Taylor tavor_wrid_cq_force_reap(tavor_cqhdl_t cq)
29079e39c5baSBill Taylor {
29089e39c5baSBill Taylor 	tavor_workq_hdr_t	*curr;
29099e39c5baSBill Taylor 	tavor_wrid_list_hdr_t	*container, *to_free;
29109e39c5baSBill Taylor 	avl_tree_t		*treep;
29119e39c5baSBill Taylor 	void			*cookie = NULL;
29129e39c5baSBill Taylor 
29139e39c5baSBill Taylor 	ASSERT(MUTEX_HELD(&cq->cq_lock));
29149e39c5baSBill Taylor 
29159e39c5baSBill Taylor 	/*
29169e39c5baSBill Taylor 	 * The first step is to walk the "reapable" list and free up those
29179e39c5baSBill Taylor 	 * containers.  This is necessary because the containers on the
29189e39c5baSBill Taylor 	 * reapable list are not otherwise connected to the work queue headers
29199e39c5baSBill Taylor 	 * anymore.
29209e39c5baSBill Taylor 	 */
29219e39c5baSBill Taylor 	tavor_wrid_cq_reap(cq);
29229e39c5baSBill Taylor 
29239e39c5baSBill Taylor 	/* Now lock the list of work queues associated with this CQ */
29249e39c5baSBill Taylor 	mutex_enter(&cq->cq_wrid_wqhdr_lock);
29259e39c5baSBill Taylor 
29269e39c5baSBill Taylor 	/*
29279e39c5baSBill Taylor 	 * Walk the list of work queue headers and free up all the WRID list
29289e39c5baSBill Taylor 	 * containers chained to it.  Note: We don't need to grab the locks
29299e39c5baSBill Taylor 	 * for each of the individual WRID lists here because the only way
29309e39c5baSBill Taylor 	 * things can be added or removed from the list at this point would be
29319e39c5baSBill Taylor 	 * through post a work request to a QP.  But if we've come this far,
29329e39c5baSBill Taylor 	 * then we can be assured that there are no longer any QP associated
29339e39c5baSBill Taylor 	 * with the CQ that we are trying to free.
29349e39c5baSBill Taylor 	 */
29359e39c5baSBill Taylor #ifdef __lock_lint
29369e39c5baSBill Taylor 	tavor_wrid_wqhdr_compare(NULL, NULL);
29379e39c5baSBill Taylor #endif
29389e39c5baSBill Taylor 	treep = &cq->cq_wrid_wqhdr_avl_tree;
29399e39c5baSBill Taylor 	while ((curr = avl_destroy_nodes(treep, &cookie)) != NULL) {
29409e39c5baSBill Taylor 		_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*curr))
29419e39c5baSBill Taylor 		container = curr->wq_wrid_poll;
29429e39c5baSBill Taylor 		while (container != NULL) {
29439e39c5baSBill Taylor 			to_free	  = container;
29449e39c5baSBill Taylor 			container = container->wl_next;
29459e39c5baSBill Taylor 			/*
29469e39c5baSBill Taylor 			 * If reaping the WRID list containers pulls the last
29479e39c5baSBill Taylor 			 * container from the given work queue header, then
29489e39c5baSBill Taylor 			 * we free the work queue header as well.  Note: we
29499e39c5baSBill Taylor 			 * ignore the return value because we know that the
29509e39c5baSBill Taylor 			 * work queue header should always be freed once the
29519e39c5baSBill Taylor 			 * list of containers has come to an end.
29529e39c5baSBill Taylor 			 */
29539e39c5baSBill Taylor 			(void) tavor_wrid_list_reap(to_free);
29549e39c5baSBill Taylor 			if (container == NULL) {
29559e39c5baSBill Taylor 				tavor_cq_wqhdr_remove(cq, curr);
29569e39c5baSBill Taylor 			}
29579e39c5baSBill Taylor 		}
29589e39c5baSBill Taylor 	}
29599e39c5baSBill Taylor 	avl_destroy(treep);
29609e39c5baSBill Taylor 
29619e39c5baSBill Taylor 	mutex_exit(&cq->cq_wrid_wqhdr_lock);
29629e39c5baSBill Taylor }
29639e39c5baSBill Taylor 
29649e39c5baSBill Taylor 
29659e39c5baSBill Taylor /*
29669e39c5baSBill Taylor  * tavor_wrid_get_list()
29679e39c5baSBill Taylor  *    Context: Can be called from interrupt or base context.
29689e39c5baSBill Taylor  */
29699e39c5baSBill Taylor tavor_wrid_list_hdr_t *
tavor_wrid_get_list(uint32_t qsize)29709e39c5baSBill Taylor tavor_wrid_get_list(uint32_t qsize)
29719e39c5baSBill Taylor {
29729e39c5baSBill Taylor 	tavor_wrid_list_hdr_t	*wridlist;
29739e39c5baSBill Taylor 	uint32_t		size;
29749e39c5baSBill Taylor 
29759e39c5baSBill Taylor 	/*
29769e39c5baSBill Taylor 	 * The WRID list "container" consists of the tavor_wrid_list_hdr_t,
29779e39c5baSBill Taylor 	 * which holds the pointers necessary for maintaining the "reapable"
29789e39c5baSBill Taylor 	 * list, chaining together multiple "containers" old and new, and
29799e39c5baSBill Taylor 	 * tracking the head, tail, size, etc. for each container.
29809e39c5baSBill Taylor 	 *
29819e39c5baSBill Taylor 	 * The "container" also holds all the tavor_wrid_entry_t's, which is
29829e39c5baSBill Taylor 	 * allocated separately, one for each entry on the corresponding work
29839e39c5baSBill Taylor 	 * queue.
29849e39c5baSBill Taylor 	 */
29859e39c5baSBill Taylor 	size = sizeof (tavor_wrid_list_hdr_t);
29869e39c5baSBill Taylor 
29879e39c5baSBill Taylor 	/*
29889e39c5baSBill Taylor 	 * Note that this allocation has to be a NOSLEEP operation here
29899e39c5baSBill Taylor 	 * because we are holding the "wqhdr_list_lock" and, therefore,
29909e39c5baSBill Taylor 	 * could get raised to the interrupt level.
29919e39c5baSBill Taylor 	 */
29929e39c5baSBill Taylor 	wridlist = (tavor_wrid_list_hdr_t *)kmem_zalloc(size, KM_NOSLEEP);
29939e39c5baSBill Taylor 	if (wridlist == NULL) {
29949e39c5baSBill Taylor 		return (NULL);
29959e39c5baSBill Taylor 	}
29969e39c5baSBill Taylor 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*wridlist))
29979e39c5baSBill Taylor 
29989e39c5baSBill Taylor 	/* Complete the "container" initialization */
29999e39c5baSBill Taylor 	wridlist->wl_size = qsize;
30009e39c5baSBill Taylor 	wridlist->wl_full = 0;
30019e39c5baSBill Taylor 	wridlist->wl_head = 0;
30029e39c5baSBill Taylor 	wridlist->wl_tail = 0;
30039e39c5baSBill Taylor 	wridlist->wl_wre = (tavor_wrid_entry_t *)kmem_zalloc(qsize *
30049e39c5baSBill Taylor 	    sizeof (tavor_wrid_entry_t), KM_NOSLEEP);
30059e39c5baSBill Taylor 	if (wridlist->wl_wre == NULL) {
30069e39c5baSBill Taylor 		kmem_free(wridlist, size);
30079e39c5baSBill Taylor 		return (NULL);
30089e39c5baSBill Taylor 	}
30099e39c5baSBill Taylor 	wridlist->wl_wre_old_tail  = NULL;
30109e39c5baSBill Taylor 	wridlist->wl_reap_next = NULL;
30119e39c5baSBill Taylor 	wridlist->wl_next  = NULL;
30129e39c5baSBill Taylor 	wridlist->wl_prev  = NULL;
30139e39c5baSBill Taylor 	wridlist->wl_srq_en = 0;
30149e39c5baSBill Taylor 
30159e39c5baSBill Taylor 	return (wridlist);
30169e39c5baSBill Taylor }
30179e39c5baSBill Taylor 
30189e39c5baSBill Taylor /*
30199e39c5baSBill Taylor  * tavor_wrid_list_srq_init()
30209e39c5baSBill Taylor  * Context: Can be called from interrupt or base context
30219e39c5baSBill Taylor  */
30229e39c5baSBill Taylor void
tavor_wrid_list_srq_init(tavor_wrid_list_hdr_t * wridlist,tavor_srqhdl_t srq,uint_t wq_start)30239e39c5baSBill Taylor tavor_wrid_list_srq_init(tavor_wrid_list_hdr_t *wridlist, tavor_srqhdl_t srq,
30249e39c5baSBill Taylor     uint_t wq_start)
30259e39c5baSBill Taylor {
30269e39c5baSBill Taylor 	uint64_t *wl_wqe;
30279e39c5baSBill Taylor 	int wqe_index;
30289e39c5baSBill Taylor 
30299e39c5baSBill Taylor 	ASSERT(MUTEX_HELD(&srq->srq_wrid_wql->wql_lock));
30309e39c5baSBill Taylor 
30319e39c5baSBill Taylor 	/* Setup pointers for use later when we are polling the CQ */
30329e39c5baSBill Taylor 	wridlist->wl_srq_wq_buf = srq->srq_wq_buf;
30339e39c5baSBill Taylor 	wridlist->wl_srq_wq_bufsz = srq->srq_wq_bufsz;
30349e39c5baSBill Taylor 	wridlist->wl_srq_log_wqesz = srq->srq_wq_log_wqesz;
30359e39c5baSBill Taylor 	wridlist->wl_srq_desc_off = srq->srq_desc_off;
30369e39c5baSBill Taylor 	wridlist->wl_acchdl = srq->srq_wqinfo.qa_acchdl;
30379e39c5baSBill Taylor 
30389e39c5baSBill Taylor 	/* Given wq_start to start initializing buf at, verify sanity */
30399e39c5baSBill Taylor 	ASSERT(wq_start >= 0 && wq_start < srq->srq_wq_bufsz);
30409e39c5baSBill Taylor 
30419e39c5baSBill Taylor 	/*
30429e39c5baSBill Taylor 	 * Initialize wridlist free list
30439e39c5baSBill Taylor 	 *
30449e39c5baSBill Taylor 	 * For each WQ up to the size of our queue, we store an index in the WQ
30459e39c5baSBill Taylor 	 * memory itself, representing the next available free entry.  The
30469e39c5baSBill Taylor 	 * 'wl_free_list_indx' always holds the index of the next available
30479e39c5baSBill Taylor 	 * free entry in the WQ.  If 'wl_free_list_indx' is -1, then we are
30489e39c5baSBill Taylor 	 * completely full.  This gives us the advantage of being able to have
30499e39c5baSBill Taylor 	 * entries complete or be polled off the WQ out-of-order.
30509e39c5baSBill Taylor 	 *
30519e39c5baSBill Taylor 	 * For now, we write the free_list entries inside the WQ itself.  It
30529e39c5baSBill Taylor 	 * may be useful in the future to store this information in a separate
30539e39c5baSBill Taylor 	 * structure for debugging purposes.
30549e39c5baSBill Taylor 	 */
30559e39c5baSBill Taylor 	for (wqe_index = wq_start; wqe_index < srq->srq_wq_bufsz; wqe_index++) {
30569e39c5baSBill Taylor 		wl_wqe = TAVOR_SRQ_WQE_ADDR(srq, wqe_index);
30579e39c5baSBill Taylor 		ddi_put32(wridlist->wl_acchdl, (uint32_t *)wl_wqe,
30589e39c5baSBill Taylor 		    wridlist->wl_free_list_indx);
30599e39c5baSBill Taylor 		wridlist->wl_free_list_indx = wqe_index;
30609e39c5baSBill Taylor 	}
30619e39c5baSBill Taylor }
30629e39c5baSBill Taylor 
30639e39c5baSBill Taylor 
30649e39c5baSBill Taylor /*
30659e39c5baSBill Taylor  * tavor_wrid_reaplist_add()
30669e39c5baSBill Taylor  *    Context: Can be called from interrupt or base context.
30679e39c5baSBill Taylor  */
30689e39c5baSBill Taylor static void
tavor_wrid_reaplist_add(tavor_cqhdl_t cq,tavor_workq_hdr_t * wq)30699e39c5baSBill Taylor tavor_wrid_reaplist_add(tavor_cqhdl_t cq, tavor_workq_hdr_t *wq)
30709e39c5baSBill Taylor {
30719e39c5baSBill Taylor 	ASSERT(MUTEX_HELD(&cq->cq_wrid_wqhdr_lock));
30729e39c5baSBill Taylor 
30739e39c5baSBill Taylor 	mutex_enter(&wq->wq_wrid_wql->wql_lock);
30749e39c5baSBill Taylor 
30759e39c5baSBill Taylor 	/*
30769e39c5baSBill Taylor 	 * Add the "post" container (the last one on the current chain) to
30779e39c5baSBill Taylor 	 * the CQ's "reapable" list
30789e39c5baSBill Taylor 	 */
30799e39c5baSBill Taylor 	if ((cq->cq_wrid_reap_head == NULL) &&
30809e39c5baSBill Taylor 	    (cq->cq_wrid_reap_tail == NULL)) {
30819e39c5baSBill Taylor 		cq->cq_wrid_reap_head = wq->wq_wrid_post;
30829e39c5baSBill Taylor 		cq->cq_wrid_reap_tail = wq->wq_wrid_post;
30839e39c5baSBill Taylor 	} else {
30849e39c5baSBill Taylor 		cq->cq_wrid_reap_tail->wl_reap_next = wq->wq_wrid_post;
30859e39c5baSBill Taylor 		cq->cq_wrid_reap_tail = wq->wq_wrid_post;
30869e39c5baSBill Taylor 	}
30879e39c5baSBill Taylor 
30889e39c5baSBill Taylor 	mutex_exit(&wq->wq_wrid_wql->wql_lock);
30899e39c5baSBill Taylor }
30909e39c5baSBill Taylor 
30919e39c5baSBill Taylor 
30929e39c5baSBill Taylor int
tavor_wrid_wqhdr_compare(const void * p1,const void * p2)30939e39c5baSBill Taylor tavor_wrid_wqhdr_compare(const void *p1, const void *p2)
30949e39c5baSBill Taylor {
30959e39c5baSBill Taylor 	tavor_workq_compare_t	*cmpp;
30969e39c5baSBill Taylor 	tavor_workq_hdr_t	*curr;
30979e39c5baSBill Taylor 
30989e39c5baSBill Taylor 	cmpp = (tavor_workq_compare_t *)p1;
30999e39c5baSBill Taylor 	curr = (tavor_workq_hdr_t *)p2;
31009e39c5baSBill Taylor 
31019e39c5baSBill Taylor 	if (cmpp->cmp_qpn < curr->wq_qpn)
31029e39c5baSBill Taylor 		return (-1);
31039e39c5baSBill Taylor 	else if (cmpp->cmp_qpn > curr->wq_qpn)
31049e39c5baSBill Taylor 		return (+1);
31059e39c5baSBill Taylor 	else if (cmpp->cmp_type < curr->wq_type)
31069e39c5baSBill Taylor 		return (-1);
31079e39c5baSBill Taylor 	else if (cmpp->cmp_type > curr->wq_type)
31089e39c5baSBill Taylor 		return (+1);
31099e39c5baSBill Taylor 	else
31109e39c5baSBill Taylor 		return (0);
31119e39c5baSBill Taylor }
31129e39c5baSBill Taylor 
31139e39c5baSBill Taylor 
31149e39c5baSBill Taylor /*
31159e39c5baSBill Taylor  * tavor_wrid_wqhdr_find()
31169e39c5baSBill Taylor  *    Context: Can be called from interrupt or base context.
31179e39c5baSBill Taylor  */
31189e39c5baSBill Taylor static tavor_workq_hdr_t *
tavor_wrid_wqhdr_find(tavor_cqhdl_t cq,uint_t qpn,uint_t wq_type)31199e39c5baSBill Taylor tavor_wrid_wqhdr_find(tavor_cqhdl_t cq, uint_t qpn, uint_t wq_type)
31209e39c5baSBill Taylor {
31219e39c5baSBill Taylor 	tavor_workq_hdr_t	*curr;
31229e39c5baSBill Taylor 	tavor_workq_compare_t	cmp;
31239e39c5baSBill Taylor 
31249e39c5baSBill Taylor 	ASSERT(MUTEX_HELD(&cq->cq_wrid_wqhdr_lock));
31259e39c5baSBill Taylor 
31269e39c5baSBill Taylor 	/*
31279e39c5baSBill Taylor 	 * Walk the CQ's work queue list, trying to find a send or recv queue
31289e39c5baSBill Taylor 	 * with the same QP number.  We do this even if we are going to later
31299e39c5baSBill Taylor 	 * create a new entry because it helps us easily find the end of the
31309e39c5baSBill Taylor 	 * list.
31319e39c5baSBill Taylor 	 */
31329e39c5baSBill Taylor 	cmp.cmp_qpn = qpn;
31339e39c5baSBill Taylor 	cmp.cmp_type = wq_type;
31349e39c5baSBill Taylor #ifdef __lock_lint
31359e39c5baSBill Taylor 	tavor_wrid_wqhdr_compare(NULL, NULL);
31369e39c5baSBill Taylor #endif
31379e39c5baSBill Taylor 	curr = avl_find(&cq->cq_wrid_wqhdr_avl_tree, &cmp, NULL);
31389e39c5baSBill Taylor 
31399e39c5baSBill Taylor 	return (curr);
31409e39c5baSBill Taylor }
31419e39c5baSBill Taylor 
31429e39c5baSBill Taylor 
31439e39c5baSBill Taylor /*
31449e39c5baSBill Taylor  * tavor_wrid_wqhdr_create()
31459e39c5baSBill Taylor  *    Context: Can be called from interrupt or base context.
31469e39c5baSBill Taylor  */
31479e39c5baSBill Taylor static tavor_workq_hdr_t *
tavor_wrid_wqhdr_create(tavor_state_t * state,tavor_cqhdl_t cq,uint_t qpn,uint_t wq_type,uint_t create_wql)31489e39c5baSBill Taylor tavor_wrid_wqhdr_create(tavor_state_t *state, tavor_cqhdl_t cq, uint_t qpn,
31499e39c5baSBill Taylor     uint_t wq_type, uint_t create_wql)
31509e39c5baSBill Taylor {
31519e39c5baSBill Taylor 	tavor_workq_hdr_t	*wqhdr_tmp;
31529e39c5baSBill Taylor 
31539e39c5baSBill Taylor 	ASSERT(MUTEX_HELD(&cq->cq_wrid_wqhdr_lock));
31549e39c5baSBill Taylor 
31559e39c5baSBill Taylor 	/*
31569e39c5baSBill Taylor 	 * Allocate space a work queue header structure and initialize it.
31579e39c5baSBill Taylor 	 * Each work queue header structure includes a "wq_wrid_wql"
31589e39c5baSBill Taylor 	 * which needs to be initialized.  Note that this allocation has to be
31599e39c5baSBill Taylor 	 * a NOSLEEP operation because we are holding the "cq_wrid_wqhdr_lock"
31609e39c5baSBill Taylor 	 * and, therefore, could get raised to the interrupt level.
31619e39c5baSBill Taylor 	 */
31629e39c5baSBill Taylor 	wqhdr_tmp = (tavor_workq_hdr_t *)kmem_zalloc(
31639e39c5baSBill Taylor 	    sizeof (tavor_workq_hdr_t), KM_NOSLEEP);
31649e39c5baSBill Taylor 	if (wqhdr_tmp == NULL) {
31659e39c5baSBill Taylor 		return (NULL);
31669e39c5baSBill Taylor 	}
31679e39c5baSBill Taylor 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*wqhdr_tmp))
31689e39c5baSBill Taylor 	wqhdr_tmp->wq_qpn	= qpn;
31699e39c5baSBill Taylor 	wqhdr_tmp->wq_type	= wq_type;
31709e39c5baSBill Taylor 
31719e39c5baSBill Taylor 	if (create_wql) {
31729e39c5baSBill Taylor 		wqhdr_tmp->wq_wrid_wql = tavor_wrid_wql_create(state);
31739e39c5baSBill Taylor 		if (wqhdr_tmp->wq_wrid_wql == NULL) {
31749e39c5baSBill Taylor 			kmem_free(wqhdr_tmp, sizeof (tavor_workq_hdr_t));
31759e39c5baSBill Taylor 			return (NULL);
31769e39c5baSBill Taylor 		}
31779e39c5baSBill Taylor 	}
31789e39c5baSBill Taylor 
31799e39c5baSBill Taylor 	wqhdr_tmp->wq_wrid_poll = NULL;
31809e39c5baSBill Taylor 	wqhdr_tmp->wq_wrid_post = NULL;
31819e39c5baSBill Taylor 
31829e39c5baSBill Taylor 	/* Chain the newly allocated work queue header to the CQ's list */
31839e39c5baSBill Taylor 	tavor_cq_wqhdr_add(cq, wqhdr_tmp);
31849e39c5baSBill Taylor 
31859e39c5baSBill Taylor 	return (wqhdr_tmp);
31869e39c5baSBill Taylor }
31879e39c5baSBill Taylor 
31889e39c5baSBill Taylor 
31899e39c5baSBill Taylor /*
31909e39c5baSBill Taylor  * tavor_wrid_wql_create()
31919e39c5baSBill Taylor  *    Context: Can be called from interrupt or base context.
31929e39c5baSBill Taylor  */
31939e39c5baSBill Taylor tavor_wq_lock_t *
tavor_wrid_wql_create(tavor_state_t * state)31949e39c5baSBill Taylor tavor_wrid_wql_create(tavor_state_t *state)
31959e39c5baSBill Taylor {
31969e39c5baSBill Taylor 	tavor_wq_lock_t *wql;
31979e39c5baSBill Taylor 
31989e39c5baSBill Taylor 	/*
31999e39c5baSBill Taylor 	 * Allocate the WQL and initialize it.
32009e39c5baSBill Taylor 	 */
32019e39c5baSBill Taylor 	wql = kmem_zalloc(sizeof (tavor_wq_lock_t), KM_NOSLEEP);
32029e39c5baSBill Taylor 	if (wql == NULL) {
32039e39c5baSBill Taylor 		return (NULL);
32049e39c5baSBill Taylor 	}
32059e39c5baSBill Taylor 
32069e39c5baSBill Taylor 	mutex_init(&wql->wql_lock, NULL, MUTEX_DRIVER,
32079e39c5baSBill Taylor 	    DDI_INTR_PRI(state->ts_intrmsi_pri));
32089e39c5baSBill Taylor 
32099e39c5baSBill Taylor 	/* Add refcount to WQL */
32109e39c5baSBill Taylor 	tavor_wql_refcnt_inc(wql);
32119e39c5baSBill Taylor 
32129e39c5baSBill Taylor 	return (wql);
32139e39c5baSBill Taylor }
32149e39c5baSBill Taylor 
32159e39c5baSBill Taylor 
32169e39c5baSBill Taylor /*
32179e39c5baSBill Taylor  * tavor_wrid_get_wqeaddrsz()
32189e39c5baSBill Taylor  *    Context: Can be called from interrupt or base context.
32199e39c5baSBill Taylor  */
32209e39c5baSBill Taylor static uint32_t
tavor_wrid_get_wqeaddrsz(tavor_workq_hdr_t * wq)32219e39c5baSBill Taylor tavor_wrid_get_wqeaddrsz(tavor_workq_hdr_t *wq)
32229e39c5baSBill Taylor {
32239e39c5baSBill Taylor 	tavor_wrid_entry_t	*wre;
32249e39c5baSBill Taylor 	uint32_t		wqeaddrsz;
32259e39c5baSBill Taylor 	uint32_t		head;
32269e39c5baSBill Taylor 
32279e39c5baSBill Taylor 	/*
32289e39c5baSBill Taylor 	 * If the container is empty, then there is no next entry. So just
32299e39c5baSBill Taylor 	 * return zero.  Note: the "head == tail" condition here can only
32309e39c5baSBill Taylor 	 * mean that the container is empty because we have previously pulled
32319e39c5baSBill Taylor 	 * something from the container.
32329e39c5baSBill Taylor 	 *
32339e39c5baSBill Taylor 	 * If the container is not empty, then find the next entry and return
32349e39c5baSBill Taylor 	 * the contents of its "wqeaddrsz" field.
32359e39c5baSBill Taylor 	 */
32369e39c5baSBill Taylor 	if (wq->wq_wrid_poll->wl_head == wq->wq_wrid_poll->wl_tail) {
32379e39c5baSBill Taylor 		wqeaddrsz = 0;
32389e39c5baSBill Taylor 	} else {
32399e39c5baSBill Taylor 		/*
32409e39c5baSBill Taylor 		 * We don't need to calculate the "next" head pointer here
32419e39c5baSBill Taylor 		 * because "head" should already point to the next entry on
32429e39c5baSBill Taylor 		 * the list (since we just pulled something off - in
32439e39c5baSBill Taylor 		 * tavor_wrid_find_match() - and moved the head index forward.)
32449e39c5baSBill Taylor 		 */
32459e39c5baSBill Taylor 		head = wq->wq_wrid_poll->wl_head;
32469e39c5baSBill Taylor 		wre = &wq->wq_wrid_poll->wl_wre[head];
32479e39c5baSBill Taylor 		wqeaddrsz = wre->wr_wqeaddrsz;
32489e39c5baSBill Taylor 	}
32499e39c5baSBill Taylor 	return (wqeaddrsz);
32509e39c5baSBill Taylor }
32519e39c5baSBill Taylor 
32529e39c5baSBill Taylor 
32539e39c5baSBill Taylor /*
32549e39c5baSBill Taylor  * tavor_wrid_wqhdr_add()
32559e39c5baSBill Taylor  *    Context: Can be called from interrupt or base context.
32569e39c5baSBill Taylor  */
32579e39c5baSBill Taylor static void
tavor_wrid_wqhdr_add(tavor_workq_hdr_t * wqhdr,tavor_wrid_list_hdr_t * wridlist)32589e39c5baSBill Taylor tavor_wrid_wqhdr_add(tavor_workq_hdr_t *wqhdr,
32599e39c5baSBill Taylor     tavor_wrid_list_hdr_t *wridlist)
32609e39c5baSBill Taylor {
32619e39c5baSBill Taylor 	ASSERT(MUTEX_HELD(&wqhdr->wq_wrid_wql->wql_lock));
32629e39c5baSBill Taylor 
32639e39c5baSBill Taylor 	/* Chain the new WRID list "container" to the work queue list */
32649e39c5baSBill Taylor 	if ((wqhdr->wq_wrid_post == NULL) &&
32659e39c5baSBill Taylor 	    (wqhdr->wq_wrid_poll == NULL)) {
32669e39c5baSBill Taylor 		wqhdr->wq_wrid_poll = wridlist;
32679e39c5baSBill Taylor 		wqhdr->wq_wrid_post = wridlist;
32689e39c5baSBill Taylor 	} else {
32699e39c5baSBill Taylor 		wqhdr->wq_wrid_post->wl_next = wridlist;
32709e39c5baSBill Taylor 		wridlist->wl_prev = wqhdr->wq_wrid_post;
32719e39c5baSBill Taylor 		wqhdr->wq_wrid_post = wridlist;
32729e39c5baSBill Taylor 	}
32739e39c5baSBill Taylor }
32749e39c5baSBill Taylor 
32759e39c5baSBill Taylor 
32769e39c5baSBill Taylor /*
32779e39c5baSBill Taylor  * tavor_wrid_wqhdr_remove()
32789e39c5baSBill Taylor  *    Context: Can be called from interrupt or base context.
32799e39c5baSBill Taylor  *
32809e39c5baSBill Taylor  *    Note: this is only called to remove the most recently added WRID list
32819e39c5baSBill Taylor  *    container (i.e. in tavor_from_reset() above)
32829e39c5baSBill Taylor  */
32839e39c5baSBill Taylor static void
tavor_wrid_wqhdr_remove(tavor_workq_hdr_t * wqhdr,tavor_wrid_list_hdr_t * wridlist)32849e39c5baSBill Taylor tavor_wrid_wqhdr_remove(tavor_workq_hdr_t *wqhdr,
32859e39c5baSBill Taylor     tavor_wrid_list_hdr_t *wridlist)
32869e39c5baSBill Taylor {
32879e39c5baSBill Taylor 	tavor_wrid_list_hdr_t	*prev, *next;
32889e39c5baSBill Taylor 
32899e39c5baSBill Taylor 	ASSERT(MUTEX_HELD(&wqhdr->wq_wrid_wql->wql_lock));
32909e39c5baSBill Taylor 
32919e39c5baSBill Taylor 	/* Unlink the WRID list "container" from the work queue list */
32929e39c5baSBill Taylor 	prev = wridlist->wl_prev;
32939e39c5baSBill Taylor 	next = wridlist->wl_next;
32949e39c5baSBill Taylor 	if (prev != NULL) {
32959e39c5baSBill Taylor 		prev->wl_next = next;
32969e39c5baSBill Taylor 	}
32979e39c5baSBill Taylor 	if (next != NULL) {
32989e39c5baSBill Taylor 		next->wl_prev = prev;
32999e39c5baSBill Taylor 	}
33009e39c5baSBill Taylor 
33019e39c5baSBill Taylor 	/*
33029e39c5baSBill Taylor 	 * Update any pointers in the work queue hdr that may point to this
33039e39c5baSBill Taylor 	 * WRID list container
33049e39c5baSBill Taylor 	 */
33059e39c5baSBill Taylor 	if (wqhdr->wq_wrid_post == wridlist) {
33069e39c5baSBill Taylor 		wqhdr->wq_wrid_post = prev;
33079e39c5baSBill Taylor 	}
33089e39c5baSBill Taylor 	if (wqhdr->wq_wrid_poll == wridlist) {
33099e39c5baSBill Taylor 		wqhdr->wq_wrid_poll = NULL;
33109e39c5baSBill Taylor 	}
33119e39c5baSBill Taylor }
33129e39c5baSBill Taylor 
33139e39c5baSBill Taylor 
33149e39c5baSBill Taylor /*
33159e39c5baSBill Taylor  * tavor_wrid_list_reap()
33169e39c5baSBill Taylor  *    Context: Can be called from interrupt or base context.
33179e39c5baSBill Taylor  *    Note: The "wqhdr_list_lock" must be held.
33189e39c5baSBill Taylor  */
33199e39c5baSBill Taylor static tavor_workq_hdr_t *
tavor_wrid_list_reap(tavor_wrid_list_hdr_t * wridlist)33209e39c5baSBill Taylor tavor_wrid_list_reap(tavor_wrid_list_hdr_t *wridlist)
33219e39c5baSBill Taylor {
33229e39c5baSBill Taylor 	tavor_workq_hdr_t	*wqhdr, *consume_wqhdr = NULL;
33239e39c5baSBill Taylor 	tavor_wrid_list_hdr_t	*prev, *next;
33249e39c5baSBill Taylor 	uint32_t		size;
33259e39c5baSBill Taylor 
33269e39c5baSBill Taylor 	/* Get the back pointer to the work queue header (see below) */
33279e39c5baSBill Taylor 	wqhdr = wridlist->wl_wqhdr;
33289e39c5baSBill Taylor 	mutex_enter(&wqhdr->wq_wrid_wql->wql_lock);
33299e39c5baSBill Taylor 
33309e39c5baSBill Taylor 	/* Unlink the WRID list "container" from the work queue list */
33319e39c5baSBill Taylor 	prev = wridlist->wl_prev;
33329e39c5baSBill Taylor 	next = wridlist->wl_next;
33339e39c5baSBill Taylor 	if (prev != NULL) {
33349e39c5baSBill Taylor 		prev->wl_next = next;
33359e39c5baSBill Taylor 	}
33369e39c5baSBill Taylor 	if (next != NULL) {
33379e39c5baSBill Taylor 		next->wl_prev = prev;
33389e39c5baSBill Taylor 	}
33399e39c5baSBill Taylor 
33409e39c5baSBill Taylor 	/*
33419e39c5baSBill Taylor 	 * If the back pointer to the work queue header shows that it
33429e39c5baSBill Taylor 	 * was pointing to the entry we are about to remove, then the work
33439e39c5baSBill Taylor 	 * queue header is reapable as well.
33449e39c5baSBill Taylor 	 */
33459e39c5baSBill Taylor 	if ((wqhdr->wq_wrid_poll == wridlist) &&
33469e39c5baSBill Taylor 	    (wqhdr->wq_wrid_post == wridlist)) {
33479e39c5baSBill Taylor 		consume_wqhdr = wqhdr;
33489e39c5baSBill Taylor 	}
33499e39c5baSBill Taylor 
33509e39c5baSBill Taylor 	/* Be sure to update the "poll" and "post" container pointers */
33519e39c5baSBill Taylor 	if (wqhdr->wq_wrid_poll == wridlist) {
33529e39c5baSBill Taylor 		wqhdr->wq_wrid_poll = next;
33539e39c5baSBill Taylor 	}
33549e39c5baSBill Taylor 	if (wqhdr->wq_wrid_post == wridlist) {
33559e39c5baSBill Taylor 		wqhdr->wq_wrid_post = NULL;
33569e39c5baSBill Taylor 	}
33579e39c5baSBill Taylor 
33589e39c5baSBill Taylor 	/* Calculate the size and free the container */
33599e39c5baSBill Taylor 	size = (wridlist->wl_size * sizeof (tavor_wrid_entry_t));
33609e39c5baSBill Taylor 	kmem_free(wridlist->wl_wre, size);
33619e39c5baSBill Taylor 	kmem_free(wridlist, sizeof (tavor_wrid_list_hdr_t));
33629e39c5baSBill Taylor 
33639e39c5baSBill Taylor 	mutex_exit(&wqhdr->wq_wrid_wql->wql_lock);
33649e39c5baSBill Taylor 
33659e39c5baSBill Taylor 	return (consume_wqhdr);
33669e39c5baSBill Taylor }
33679e39c5baSBill Taylor 
33689e39c5baSBill Taylor 
33699e39c5baSBill Taylor /*
33709e39c5baSBill Taylor  * tavor_wrid_wqhdr_lock_both()
33719e39c5baSBill Taylor  *    Context: Can be called from interrupt or base context.
33729e39c5baSBill Taylor  */
33739e39c5baSBill Taylor static void
tavor_wrid_wqhdr_lock_both(tavor_qphdl_t qp)33749e39c5baSBill Taylor tavor_wrid_wqhdr_lock_both(tavor_qphdl_t qp)
33759e39c5baSBill Taylor {
33769e39c5baSBill Taylor 	tavor_cqhdl_t	sq_cq, rq_cq;
33779e39c5baSBill Taylor 
33789e39c5baSBill Taylor 	sq_cq = qp->qp_sq_cqhdl;
33799e39c5baSBill Taylor 	rq_cq = qp->qp_rq_cqhdl;
33809e39c5baSBill Taylor 
33819e39c5baSBill Taylor _NOTE(MUTEX_ACQUIRED_AS_SIDE_EFFECT(&sq_cq->cq_wrid_wqhdr_lock))
33829e39c5baSBill Taylor _NOTE(MUTEX_ACQUIRED_AS_SIDE_EFFECT(&rq_cq->cq_wrid_wqhdr_lock))
33839e39c5baSBill Taylor 
33849e39c5baSBill Taylor 	/*
33859e39c5baSBill Taylor 	 * If both work queues (send and recv) share a completion queue, then
33869e39c5baSBill Taylor 	 * grab the common lock.  If they use different CQs (hence different
33879e39c5baSBill Taylor 	 * "cq_wrid_wqhdr_list" locks), then grab the send one first, then the
33889e39c5baSBill Taylor 	 * receive.  We do this consistently and correctly in
33899e39c5baSBill Taylor 	 * tavor_wrid_wqhdr_unlock_both() below to avoid introducing any kind
33909e39c5baSBill Taylor 	 * of dead lock condition.  Note:  We add the "__lock_lint" code here
33919e39c5baSBill Taylor 	 * to fake out warlock into thinking we've grabbed both locks (when,
33929e39c5baSBill Taylor 	 * in fact, we only needed the one).
33939e39c5baSBill Taylor 	 */
33949e39c5baSBill Taylor 	if (sq_cq == rq_cq) {
33959e39c5baSBill Taylor 		mutex_enter(&sq_cq->cq_wrid_wqhdr_lock);
33969e39c5baSBill Taylor #ifdef	__lock_lint
33979e39c5baSBill Taylor 		mutex_enter(&rq_cq->cq_wrid_wqhdr_lock);
33989e39c5baSBill Taylor #endif
33999e39c5baSBill Taylor 	} else {
34009e39c5baSBill Taylor 		mutex_enter(&sq_cq->cq_wrid_wqhdr_lock);
34019e39c5baSBill Taylor 		mutex_enter(&rq_cq->cq_wrid_wqhdr_lock);
34029e39c5baSBill Taylor 	}
34039e39c5baSBill Taylor }
34049e39c5baSBill Taylor 
34059e39c5baSBill Taylor /*
34069e39c5baSBill Taylor  * tavor_wrid_wqhdr_unlock_both()
34079e39c5baSBill Taylor  *    Context: Can be called from interrupt or base context.
34089e39c5baSBill Taylor  */
34099e39c5baSBill Taylor static void
tavor_wrid_wqhdr_unlock_both(tavor_qphdl_t qp)34109e39c5baSBill Taylor tavor_wrid_wqhdr_unlock_both(tavor_qphdl_t qp)
34119e39c5baSBill Taylor {
34129e39c5baSBill Taylor 	tavor_cqhdl_t	sq_cq, rq_cq;
34139e39c5baSBill Taylor 
34149e39c5baSBill Taylor 	sq_cq = qp->qp_sq_cqhdl;
34159e39c5baSBill Taylor 	rq_cq = qp->qp_rq_cqhdl;
34169e39c5baSBill Taylor 
34179e39c5baSBill Taylor _NOTE(LOCK_RELEASED_AS_SIDE_EFFECT(&rq_cq->cq_wrid_wqhdr_lock))
34189e39c5baSBill Taylor _NOTE(LOCK_RELEASED_AS_SIDE_EFFECT(&sq_cq->cq_wrid_wqhdr_lock))
34199e39c5baSBill Taylor 
34209e39c5baSBill Taylor 	/*
34219e39c5baSBill Taylor 	 * See tavor_wrid_wqhdr_lock_both() above for more detail
34229e39c5baSBill Taylor 	 */
34239e39c5baSBill Taylor 	if (sq_cq == rq_cq) {
34249e39c5baSBill Taylor #ifdef	__lock_lint
34259e39c5baSBill Taylor 		mutex_exit(&rq_cq->cq_wrid_wqhdr_lock);
34269e39c5baSBill Taylor #endif
34279e39c5baSBill Taylor 		mutex_exit(&sq_cq->cq_wrid_wqhdr_lock);
34289e39c5baSBill Taylor 	} else {
34299e39c5baSBill Taylor 		mutex_exit(&rq_cq->cq_wrid_wqhdr_lock);
34309e39c5baSBill Taylor 		mutex_exit(&sq_cq->cq_wrid_wqhdr_lock);
34319e39c5baSBill Taylor 	}
34329e39c5baSBill Taylor }
34339e39c5baSBill Taylor 
34349e39c5baSBill Taylor 
34359e39c5baSBill Taylor /*
34369e39c5baSBill Taylor  * tavor_cq_wqhdr_add()
34379e39c5baSBill Taylor  *    Context: Can be called from interrupt or base context.
34389e39c5baSBill Taylor  */
34399e39c5baSBill Taylor static void
tavor_cq_wqhdr_add(tavor_cqhdl_t cq,tavor_workq_hdr_t * wqhdr)34409e39c5baSBill Taylor tavor_cq_wqhdr_add(tavor_cqhdl_t cq, tavor_workq_hdr_t *wqhdr)
34419e39c5baSBill Taylor {
34429e39c5baSBill Taylor 	tavor_workq_compare_t	cmp;
34439e39c5baSBill Taylor 	avl_index_t		where;
34449e39c5baSBill Taylor 
34459e39c5baSBill Taylor 	ASSERT(MUTEX_HELD(&cq->cq_wrid_wqhdr_lock));
34469e39c5baSBill Taylor 
34479e39c5baSBill Taylor 	cmp.cmp_qpn = wqhdr->wq_qpn;
34489e39c5baSBill Taylor 	cmp.cmp_type = wqhdr->wq_type;
34499e39c5baSBill Taylor #ifdef __lock_lint
34509e39c5baSBill Taylor 	tavor_wrid_wqhdr_compare(NULL, NULL);
34519e39c5baSBill Taylor #endif
34529e39c5baSBill Taylor 	(void) avl_find(&cq->cq_wrid_wqhdr_avl_tree, &cmp, &where);
34539e39c5baSBill Taylor 	/*
34549e39c5baSBill Taylor 	 * If the CQ's work queue list is empty, then just add it.
34559e39c5baSBill Taylor 	 * Otherwise, chain it to the beginning of the list.
34569e39c5baSBill Taylor 	 */
34579e39c5baSBill Taylor 	avl_insert(&cq->cq_wrid_wqhdr_avl_tree, wqhdr, where);
34589e39c5baSBill Taylor }
34599e39c5baSBill Taylor 
34609e39c5baSBill Taylor 
34619e39c5baSBill Taylor /*
34629e39c5baSBill Taylor  * tavor_cq_wqhdr_remove()
34639e39c5baSBill Taylor  *    Context: Can be called from interrupt or base context.
34649e39c5baSBill Taylor  */
34659e39c5baSBill Taylor static void
tavor_cq_wqhdr_remove(tavor_cqhdl_t cq,tavor_workq_hdr_t * wqhdr)34669e39c5baSBill Taylor tavor_cq_wqhdr_remove(tavor_cqhdl_t cq, tavor_workq_hdr_t *wqhdr)
34679e39c5baSBill Taylor {
34689e39c5baSBill Taylor 	ASSERT(MUTEX_HELD(&cq->cq_wrid_wqhdr_lock));
34699e39c5baSBill Taylor 
34709e39c5baSBill Taylor #ifdef __lock_lint
34719e39c5baSBill Taylor 	tavor_wrid_wqhdr_compare(NULL, NULL);
34729e39c5baSBill Taylor #endif
34739e39c5baSBill Taylor 	/* Remove "wqhdr" from the work queue header list on "cq" */
34749e39c5baSBill Taylor 	avl_remove(&cq->cq_wrid_wqhdr_avl_tree, wqhdr);
34759e39c5baSBill Taylor 
34769e39c5baSBill Taylor 	/*
34779e39c5baSBill Taylor 	 * Release reference to WQL; If this is the last reference, this call
34789e39c5baSBill Taylor 	 * also has the side effect of freeing up the 'wq_wrid_wql' memory.
34799e39c5baSBill Taylor 	 */
34809e39c5baSBill Taylor 	tavor_wql_refcnt_dec(wqhdr->wq_wrid_wql);
34819e39c5baSBill Taylor 
34829e39c5baSBill Taylor 	/* Free the memory associated with "wqhdr" */
34839e39c5baSBill Taylor 	kmem_free(wqhdr, sizeof (tavor_workq_hdr_t));
34849e39c5baSBill Taylor }
34859e39c5baSBill Taylor 
34869e39c5baSBill Taylor 
34879e39c5baSBill Taylor /*
34889e39c5baSBill Taylor  * tavor_wql_refcnt_inc()
34899e39c5baSBill Taylor  * Context: Can be called from interrupt or base context
34909e39c5baSBill Taylor  */
34919e39c5baSBill Taylor void
tavor_wql_refcnt_inc(tavor_wq_lock_t * wql)34929e39c5baSBill Taylor tavor_wql_refcnt_inc(tavor_wq_lock_t *wql)
34939e39c5baSBill Taylor {
34949e39c5baSBill Taylor 	ASSERT(wql != NULL);
34959e39c5baSBill Taylor 
34969e39c5baSBill Taylor 	mutex_enter(&wql->wql_lock);
34979e39c5baSBill Taylor 	wql->wql_refcnt++;
34989e39c5baSBill Taylor 	mutex_exit(&wql->wql_lock);
34999e39c5baSBill Taylor }
35009e39c5baSBill Taylor 
35019e39c5baSBill Taylor /*
35029e39c5baSBill Taylor  * tavor_wql_refcnt_dec()
35039e39c5baSBill Taylor  * Context: Can be called from interrupt or base context
35049e39c5baSBill Taylor  */
35059e39c5baSBill Taylor void
tavor_wql_refcnt_dec(tavor_wq_lock_t * wql)35069e39c5baSBill Taylor tavor_wql_refcnt_dec(tavor_wq_lock_t *wql)
35079e39c5baSBill Taylor {
35089e39c5baSBill Taylor 	int	refcnt;
35099e39c5baSBill Taylor 
35109e39c5baSBill Taylor 	ASSERT(wql != NULL);
35119e39c5baSBill Taylor 
35129e39c5baSBill Taylor 	mutex_enter(&wql->wql_lock);
35139e39c5baSBill Taylor 	wql->wql_refcnt--;
35149e39c5baSBill Taylor 	refcnt = wql->wql_refcnt;
35159e39c5baSBill Taylor 	mutex_exit(&wql->wql_lock);
35169e39c5baSBill Taylor 
35179e39c5baSBill Taylor 	/*
35189e39c5baSBill Taylor 	 *
35199e39c5baSBill Taylor 	 * Free up WQL memory if we're the last one associated with this
35209e39c5baSBill Taylor 	 * structure.
35219e39c5baSBill Taylor 	 */
35229e39c5baSBill Taylor 	if (refcnt == 0) {
35239e39c5baSBill Taylor 		mutex_destroy(&wql->wql_lock);
35249e39c5baSBill Taylor 		kmem_free(wql, sizeof (tavor_wq_lock_t));
35259e39c5baSBill Taylor 	}
35269e39c5baSBill Taylor }
3527