1ebb7c6fdSAlex Wilson /*
2ebb7c6fdSAlex Wilson  * This file and its contents are supplied under the terms of the
3ebb7c6fdSAlex Wilson  * Common Development and Distribution License ("CDDL"), version 1.0.
4ebb7c6fdSAlex Wilson  * You may only use this file in accordance with the terms of version
5ebb7c6fdSAlex Wilson  * 1.0 of the CDDL.
6ebb7c6fdSAlex Wilson  *
7ebb7c6fdSAlex Wilson  * A full copy of the text of the CDDL should have accompanied this
8ebb7c6fdSAlex Wilson  * source.  A copy of the CDDL is also available via the Internet at
9ebb7c6fdSAlex Wilson  * http://www.illumos.org/license/CDDL.
10ebb7c6fdSAlex Wilson  */
11ebb7c6fdSAlex Wilson 
12ebb7c6fdSAlex Wilson /*
13*80d1a7bdSAlex Wilson  * Copyright (c) 2021, the University of Queensland
1422d05228SPaul Winder  * Copyright 2020 RackTop Systems, Inc.
15260b7832SAndy Fiddaman  * Copyright 2020 OmniOS Community Edition (OmniOSce) Association.
16ebb7c6fdSAlex Wilson  */
17ebb7c6fdSAlex Wilson 
18ebb7c6fdSAlex Wilson /*
19ebb7c6fdSAlex Wilson  * Mellanox Connect-X 4/5/6 driver.
20ebb7c6fdSAlex Wilson  */
21ebb7c6fdSAlex Wilson 
22ebb7c6fdSAlex Wilson #include <sys/modctl.h>
23ebb7c6fdSAlex Wilson #include <sys/conf.h>
24ebb7c6fdSAlex Wilson #include <sys/devops.h>
25ebb7c6fdSAlex Wilson #include <sys/sysmacros.h>
260207f820SPaul Winder #include <sys/disp.h>
275f0e3176SPaul Winder #include <sys/sdt.h>
28ebb7c6fdSAlex Wilson 
29ebb7c6fdSAlex Wilson #include <sys/mac_provider.h>
30ebb7c6fdSAlex Wilson 
31ebb7c6fdSAlex Wilson #include <mlxcx.h>
32ebb7c6fdSAlex Wilson 
3322d05228SPaul Winder /*
3422d05228SPaul Winder  * CTASSERT(s) to cover bad values which would induce bugs.
3522d05228SPaul Winder  */
3622d05228SPaul Winder CTASSERT(MLXCX_CQ_LWM_GAP >= MLXCX_CQ_HWM_GAP);
3722d05228SPaul Winder 
380207f820SPaul Winder /*
390207f820SPaul Winder  * Disable interrupts.
400207f820SPaul Winder  * The act of calling ddi_intr_disable() does not guarantee an interrupt
410207f820SPaul Winder  * routine is not running, so flag the vector as quiescing and wait
420207f820SPaul Winder  * for anything active to finish.
430207f820SPaul Winder  */
440207f820SPaul Winder void
mlxcx_intr_disable(mlxcx_t * mlxp)450207f820SPaul Winder mlxcx_intr_disable(mlxcx_t *mlxp)
460207f820SPaul Winder {
470207f820SPaul Winder 	int i;
480207f820SPaul Winder 
490207f820SPaul Winder 	mlxcx_cmd_eq_disable(mlxp);
500207f820SPaul Winder 
510207f820SPaul Winder 	for (i = 0; i < mlxp->mlx_intr_count; ++i) {
520207f820SPaul Winder 		mlxcx_event_queue_t *mleq = &mlxp->mlx_eqs[i];
530207f820SPaul Winder 
540207f820SPaul Winder 		mutex_enter(&mleq->mleq_mtx);
550207f820SPaul Winder 
560207f820SPaul Winder 		if ((mleq->mleq_state & MLXCX_EQ_INTR_ENABLED) == 0) {
570207f820SPaul Winder 			mutex_exit(&mleq->mleq_mtx);
580207f820SPaul Winder 			continue;
590207f820SPaul Winder 		}
600207f820SPaul Winder 
610207f820SPaul Winder 		(void) ddi_intr_disable(mlxp->mlx_intr_handles[i]);
620207f820SPaul Winder 
630207f820SPaul Winder 		mleq->mleq_state |= MLXCX_EQ_INTR_QUIESCE;
640207f820SPaul Winder 		while ((mleq->mleq_state & MLXCX_EQ_INTR_ACTIVE) != 0)
650207f820SPaul Winder 			cv_wait(&mleq->mleq_cv, &mleq->mleq_mtx);
660207f820SPaul Winder 
670207f820SPaul Winder 		mleq->mleq_state &= ~MLXCX_EQ_INTR_ENABLED;
680207f820SPaul Winder 
690207f820SPaul Winder 		mutex_exit(&mleq->mleq_mtx);
700207f820SPaul Winder 	}
710207f820SPaul Winder }
720207f820SPaul Winder 
73ebb7c6fdSAlex Wilson void
mlxcx_intr_teardown(mlxcx_t * mlxp)74ebb7c6fdSAlex Wilson mlxcx_intr_teardown(mlxcx_t *mlxp)
75ebb7c6fdSAlex Wilson {
76ebb7c6fdSAlex Wilson 	int i;
77ebb7c6fdSAlex Wilson 	int ret;
78ebb7c6fdSAlex Wilson 
79ebb7c6fdSAlex Wilson 	for (i = 0; i < mlxp->mlx_intr_count; ++i) {
80ebb7c6fdSAlex Wilson 		mlxcx_event_queue_t *mleq = &mlxp->mlx_eqs[i];
815f0e3176SPaul Winder 
82ebb7c6fdSAlex Wilson 		mutex_enter(&mleq->mleq_mtx);
83ebb7c6fdSAlex Wilson 		VERIFY0(mleq->mleq_state & MLXCX_EQ_ALLOC);
84ebb7c6fdSAlex Wilson 		if (mleq->mleq_state & MLXCX_EQ_CREATED)
85ebb7c6fdSAlex Wilson 			VERIFY(mleq->mleq_state & MLXCX_EQ_DESTROYED);
865f0e3176SPaul Winder 		if (i >= mlxp->mlx_intr_cq0) {
87ebb7c6fdSAlex Wilson 			VERIFY(avl_is_empty(&mleq->mleq_cqs));
88ebb7c6fdSAlex Wilson 			avl_destroy(&mleq->mleq_cqs);
89ebb7c6fdSAlex Wilson 		}
90ebb7c6fdSAlex Wilson 		mutex_exit(&mleq->mleq_mtx);
91ebb7c6fdSAlex Wilson 		(void) ddi_intr_remove_handler(mlxp->mlx_intr_handles[i]);
92ebb7c6fdSAlex Wilson 		ret = ddi_intr_free(mlxp->mlx_intr_handles[i]);
93ebb7c6fdSAlex Wilson 		if (ret != DDI_SUCCESS) {
94ebb7c6fdSAlex Wilson 			mlxcx_warn(mlxp, "failed to free interrupt %d: %d",
95ebb7c6fdSAlex Wilson 			    i, ret);
96ebb7c6fdSAlex Wilson 		}
97ebb7c6fdSAlex Wilson 		mutex_destroy(&mleq->mleq_mtx);
980207f820SPaul Winder 		cv_destroy(&mleq->mleq_cv);
99ebb7c6fdSAlex Wilson 	}
100ebb7c6fdSAlex Wilson 	kmem_free(mlxp->mlx_intr_handles, mlxp->mlx_intr_size);
101ebb7c6fdSAlex Wilson 	kmem_free(mlxp->mlx_eqs, mlxp->mlx_eqs_size);
102ebb7c6fdSAlex Wilson 	mlxp->mlx_intr_handles = NULL;
103ebb7c6fdSAlex Wilson 	mlxp->mlx_eqs = NULL;
104ebb7c6fdSAlex Wilson }
105ebb7c6fdSAlex Wilson 
106ebb7c6fdSAlex Wilson /*
107ebb7c6fdSAlex Wilson  * Get the next SW-owned entry on the event queue, or NULL if we reach the end.
108ebb7c6fdSAlex Wilson  */
109ebb7c6fdSAlex Wilson static mlxcx_eventq_ent_t *
mlxcx_eq_next(mlxcx_event_queue_t * mleq)110ebb7c6fdSAlex Wilson mlxcx_eq_next(mlxcx_event_queue_t *mleq)
111ebb7c6fdSAlex Wilson {
112ebb7c6fdSAlex Wilson 	mlxcx_eventq_ent_t *ent;
113ebb7c6fdSAlex Wilson 	ddi_fm_error_t err;
114ebb7c6fdSAlex Wilson 	uint_t ci;
115ebb7c6fdSAlex Wilson 	const uint_t swowner = ((mleq->mleq_cc >> mleq->mleq_entshift) & 1);
116ebb7c6fdSAlex Wilson 
1170207f820SPaul Winder 	/*
1180207f820SPaul Winder 	 * This should only be called from interrupt context to ensure
1190207f820SPaul Winder 	 * correctness of mleq_cc.
1200207f820SPaul Winder 	 */
1210207f820SPaul Winder 	ASSERT(servicing_interrupt());
122ebb7c6fdSAlex Wilson 	ASSERT(mleq->mleq_state & MLXCX_EQ_CREATED);
123ebb7c6fdSAlex Wilson 	ASSERT0(mleq->mleq_state & MLXCX_EQ_DESTROYED);
124ebb7c6fdSAlex Wilson 
125ebb7c6fdSAlex Wilson 	/* mleq_nents is always a power of 2 */
126ebb7c6fdSAlex Wilson 	ci = mleq->mleq_cc & (mleq->mleq_nents - 1);
127ebb7c6fdSAlex Wilson 
128ebb7c6fdSAlex Wilson 	ent = &mleq->mleq_ent[ci];
129ebb7c6fdSAlex Wilson 	VERIFY0(ddi_dma_sync(mleq->mleq_dma.mxdb_dma_handle,
130ebb7c6fdSAlex Wilson 	    (uintptr_t)ent - (uintptr_t)mleq->mleq_ent,
131ebb7c6fdSAlex Wilson 	    sizeof (mlxcx_eventq_ent_t), DDI_DMA_SYNC_FORCPU));
132ebb7c6fdSAlex Wilson 	ddi_fm_dma_err_get(mleq->mleq_dma.mxdb_dma_handle, &err,
133ebb7c6fdSAlex Wilson 	    DDI_FME_VERSION);
134ebb7c6fdSAlex Wilson 	if (err.fme_status == DDI_FM_OK && (ent->mleqe_owner & 1) == swowner) {
135ebb7c6fdSAlex Wilson 		/* The PRM says we have to membar here, so we're doing it */
136ebb7c6fdSAlex Wilson 		membar_consumer();
137ebb7c6fdSAlex Wilson 		++mleq->mleq_cc;
138ebb7c6fdSAlex Wilson 		return (ent);
139ebb7c6fdSAlex Wilson 	}
140ebb7c6fdSAlex Wilson 	/*
141ebb7c6fdSAlex Wilson 	 * In the case of a DMA error, we should re-arm this EQ and then come
142ebb7c6fdSAlex Wilson 	 * back and try again when the device wakes us back up.
143ebb7c6fdSAlex Wilson 	 *
144ebb7c6fdSAlex Wilson 	 * Hopefully the fault will be gone by then.
145ebb7c6fdSAlex Wilson 	 */
146ebb7c6fdSAlex Wilson 	ddi_fm_dma_err_clear(mleq->mleq_dma.mxdb_dma_handle, DDI_FME_VERSION);
147ebb7c6fdSAlex Wilson 
148ebb7c6fdSAlex Wilson 	return (NULL);
149ebb7c6fdSAlex Wilson }
150ebb7c6fdSAlex Wilson 
151ebb7c6fdSAlex Wilson void
mlxcx_arm_eq(mlxcx_t * mlxp,mlxcx_event_queue_t * mleq)152ebb7c6fdSAlex Wilson mlxcx_arm_eq(mlxcx_t *mlxp, mlxcx_event_queue_t *mleq)
153ebb7c6fdSAlex Wilson {
154ebb7c6fdSAlex Wilson 	uint_t try = 0;
155ebb7c6fdSAlex Wilson 	ddi_fm_error_t err;
156ebb7c6fdSAlex Wilson 	bits32_t v = new_bits32();
157ebb7c6fdSAlex Wilson 
1580207f820SPaul Winder 	/*
1590207f820SPaul Winder 	 * This is only called during initialization when the EQ is
1600207f820SPaul Winder 	 * armed for the first time, and when re-armed at the end of
1610207f820SPaul Winder 	 * interrupt processing.
1620207f820SPaul Winder 	 */
1630207f820SPaul Winder 	ASSERT(mutex_owned(&mleq->mleq_mtx) || servicing_interrupt());
164ebb7c6fdSAlex Wilson 	ASSERT(mleq->mleq_state & MLXCX_EQ_CREATED);
165ebb7c6fdSAlex Wilson 	ASSERT0(mleq->mleq_state & MLXCX_EQ_DESTROYED);
166ebb7c6fdSAlex Wilson 	ASSERT0(mleq->mleq_state & MLXCX_EQ_ARMED);
167ebb7c6fdSAlex Wilson 	ASSERT0(mleq->mleq_state & MLXCX_EQ_POLLING);
168ebb7c6fdSAlex Wilson 
169ebb7c6fdSAlex Wilson 	mleq->mleq_state |= MLXCX_EQ_ARMED;
170ebb7c6fdSAlex Wilson 	mleq->mleq_cc_armed = mleq->mleq_cc;
171ebb7c6fdSAlex Wilson 
172ebb7c6fdSAlex Wilson 	set_bits32(&v, MLXCX_EQ_ARM_EQN, mleq->mleq_num);
173ebb7c6fdSAlex Wilson 	set_bits32(&v, MLXCX_EQ_ARM_CI, mleq->mleq_cc);
174ebb7c6fdSAlex Wilson 
175ebb7c6fdSAlex Wilson retry:
176ebb7c6fdSAlex Wilson 	mlxcx_uar_put32(mlxp, mleq->mleq_uar, MLXCX_UAR_EQ_ARM,
177ebb7c6fdSAlex Wilson 	    from_bits32(v));
178ebb7c6fdSAlex Wilson 	ddi_fm_acc_err_get(mlxp->mlx_regs_handle, &err,
179ebb7c6fdSAlex Wilson 	    DDI_FME_VERSION);
180ebb7c6fdSAlex Wilson 	if (err.fme_status == DDI_FM_OK)
181ebb7c6fdSAlex Wilson 		return;
182ebb7c6fdSAlex Wilson 	if (try++ < mlxcx_doorbell_tries) {
183ebb7c6fdSAlex Wilson 		ddi_fm_acc_err_clear(mlxp->mlx_regs_handle, DDI_FME_VERSION);
184ebb7c6fdSAlex Wilson 		goto retry;
185ebb7c6fdSAlex Wilson 	}
186ebb7c6fdSAlex Wilson 	ddi_fm_service_impact(mlxp->mlx_dip, DDI_SERVICE_LOST);
187ebb7c6fdSAlex Wilson }
188ebb7c6fdSAlex Wilson 
189ebb7c6fdSAlex Wilson static void
mlxcx_update_eq(mlxcx_t * mlxp,mlxcx_event_queue_t * mleq)190ebb7c6fdSAlex Wilson mlxcx_update_eq(mlxcx_t *mlxp, mlxcx_event_queue_t *mleq)
191ebb7c6fdSAlex Wilson {
192ebb7c6fdSAlex Wilson 	bits32_t v = new_bits32();
193ebb7c6fdSAlex Wilson 	ddi_fm_error_t err;
194ebb7c6fdSAlex Wilson 
1950207f820SPaul Winder 	/*
1960207f820SPaul Winder 	 * This should only be called from interrupt context to ensure
1970207f820SPaul Winder 	 * correctness of mleq_cc.
1980207f820SPaul Winder 	 */
1990207f820SPaul Winder 	ASSERT(servicing_interrupt());
200ebb7c6fdSAlex Wilson 	ASSERT(mleq->mleq_state & MLXCX_EQ_CREATED);
201ebb7c6fdSAlex Wilson 	ASSERT0(mleq->mleq_state & MLXCX_EQ_DESTROYED);
202ebb7c6fdSAlex Wilson 	ASSERT0(mleq->mleq_state & MLXCX_EQ_ARMED);
203ebb7c6fdSAlex Wilson 
204ebb7c6fdSAlex Wilson 	set_bits32(&v, MLXCX_EQ_ARM_EQN, mleq->mleq_num);
205ebb7c6fdSAlex Wilson 	set_bits32(&v, MLXCX_EQ_ARM_CI, mleq->mleq_cc);
206ebb7c6fdSAlex Wilson 
207ebb7c6fdSAlex Wilson 	mlxcx_uar_put32(mlxp, mleq->mleq_uar, MLXCX_UAR_EQ_NOARM,
208ebb7c6fdSAlex Wilson 	    from_bits32(v));
209ebb7c6fdSAlex Wilson 	ddi_fm_acc_err_get(mlxp->mlx_regs_handle, &err,
210ebb7c6fdSAlex Wilson 	    DDI_FME_VERSION);
211ebb7c6fdSAlex Wilson 	ddi_fm_acc_err_clear(mlxp->mlx_regs_handle, DDI_FME_VERSION);
212ebb7c6fdSAlex Wilson 	/*
213ebb7c6fdSAlex Wilson 	 * Ignore the error, if it's still happening when we try to re-arm the
214ebb7c6fdSAlex Wilson 	 * EQ, we will note the impact then.
215ebb7c6fdSAlex Wilson 	 */
216ebb7c6fdSAlex Wilson }
217ebb7c6fdSAlex Wilson 
218ebb7c6fdSAlex Wilson static mlxcx_completionq_ent_t *
mlxcx_cq_next(mlxcx_completion_queue_t * mlcq)219ebb7c6fdSAlex Wilson mlxcx_cq_next(mlxcx_completion_queue_t *mlcq)
220ebb7c6fdSAlex Wilson {
221ebb7c6fdSAlex Wilson 	mlxcx_completionq_ent_t *ent;
222ebb7c6fdSAlex Wilson 	ddi_fm_error_t err;
223ebb7c6fdSAlex Wilson 	uint_t ci;
224ebb7c6fdSAlex Wilson 	const uint_t swowner = ((mlcq->mlcq_cc >> mlcq->mlcq_entshift) & 1);
225ebb7c6fdSAlex Wilson 
226ebb7c6fdSAlex Wilson 	ASSERT(mutex_owned(&mlcq->mlcq_mtx));
227ebb7c6fdSAlex Wilson 	ASSERT(mlcq->mlcq_state & MLXCX_CQ_CREATED);
228ebb7c6fdSAlex Wilson 	ASSERT0(mlcq->mlcq_state & MLXCX_CQ_DESTROYED);
229ebb7c6fdSAlex Wilson 
230ebb7c6fdSAlex Wilson 	/* mlcq_nents is always a power of 2 */
231ebb7c6fdSAlex Wilson 	ci = mlcq->mlcq_cc & (mlcq->mlcq_nents - 1);
232ebb7c6fdSAlex Wilson 
233ebb7c6fdSAlex Wilson 	ent = &mlcq->mlcq_ent[ci];
234ebb7c6fdSAlex Wilson 	VERIFY0(ddi_dma_sync(mlcq->mlcq_dma.mxdb_dma_handle,
235ebb7c6fdSAlex Wilson 	    (uintptr_t)ent - (uintptr_t)mlcq->mlcq_ent,
236ebb7c6fdSAlex Wilson 	    sizeof (mlxcx_completionq_ent_t), DDI_DMA_SYNC_FORCPU));
237ebb7c6fdSAlex Wilson 	ddi_fm_dma_err_get(mlcq->mlcq_dma.mxdb_dma_handle, &err,
238ebb7c6fdSAlex Wilson 	    DDI_FME_VERSION);
239ebb7c6fdSAlex Wilson 	if (err.fme_status == DDI_FM_OK && (ent->mlcqe_owner & 1) == swowner) {
240ebb7c6fdSAlex Wilson 		/* The PRM says we have to membar here, so we're doing it */
241ebb7c6fdSAlex Wilson 		membar_consumer();
242ebb7c6fdSAlex Wilson 		++mlcq->mlcq_cc;
243ebb7c6fdSAlex Wilson 		return (ent);
244ebb7c6fdSAlex Wilson 	}
245ebb7c6fdSAlex Wilson 	ddi_fm_dma_err_clear(mlcq->mlcq_dma.mxdb_dma_handle, DDI_FME_VERSION);
246ebb7c6fdSAlex Wilson 
247ebb7c6fdSAlex Wilson 	return (NULL);
248ebb7c6fdSAlex Wilson }
249ebb7c6fdSAlex Wilson 
25022d05228SPaul Winder void
mlxcx_update_cqci(mlxcx_t * mlxp,mlxcx_completion_queue_t * mlcq)25122d05228SPaul Winder mlxcx_update_cqci(mlxcx_t *mlxp, mlxcx_completion_queue_t *mlcq)
25222d05228SPaul Winder {
25322d05228SPaul Winder 	ddi_fm_error_t err;
25422d05228SPaul Winder 	uint_t try = 0;
25522d05228SPaul Winder 
25622d05228SPaul Winder 	mlcq->mlcq_doorbell->mlcqd_update_ci = to_be24(mlcq->mlcq_cc);
25722d05228SPaul Winder 
25822d05228SPaul Winder retry:
25922d05228SPaul Winder 	MLXCX_DMA_SYNC(mlcq->mlcq_doorbell_dma, DDI_DMA_SYNC_FORDEV);
26022d05228SPaul Winder 	ddi_fm_dma_err_get(mlcq->mlcq_doorbell_dma.mxdb_dma_handle, &err,
26122d05228SPaul Winder 	    DDI_FME_VERSION);
26222d05228SPaul Winder 	if (err.fme_status != DDI_FM_OK) {
26322d05228SPaul Winder 		if (try++ < mlxcx_doorbell_tries) {
26422d05228SPaul Winder 			ddi_fm_dma_err_clear(
26522d05228SPaul Winder 			    mlcq->mlcq_doorbell_dma.mxdb_dma_handle,
26622d05228SPaul Winder 			    DDI_FME_VERSION);
26722d05228SPaul Winder 			goto retry;
26822d05228SPaul Winder 		} else {
26922d05228SPaul Winder 			ddi_fm_service_impact(mlxp->mlx_dip, DDI_SERVICE_LOST);
27022d05228SPaul Winder 			return;
27122d05228SPaul Winder 		}
27222d05228SPaul Winder 	}
27322d05228SPaul Winder }
27422d05228SPaul Winder 
275ebb7c6fdSAlex Wilson void
mlxcx_arm_cq(mlxcx_t * mlxp,mlxcx_completion_queue_t * mlcq)276ebb7c6fdSAlex Wilson mlxcx_arm_cq(mlxcx_t *mlxp, mlxcx_completion_queue_t *mlcq)
277ebb7c6fdSAlex Wilson {
278ebb7c6fdSAlex Wilson 	bits32_t dbval = new_bits32();
279ebb7c6fdSAlex Wilson 	uint64_t udbval;
280ebb7c6fdSAlex Wilson 	ddi_fm_error_t err;
281ebb7c6fdSAlex Wilson 	uint_t try = 0;
282ebb7c6fdSAlex Wilson 
2830207f820SPaul Winder 	ASSERT(mutex_owned(&mlcq->mlcq_arm_mtx));
284ebb7c6fdSAlex Wilson 	ASSERT(mutex_owned(&mlcq->mlcq_mtx));
285ebb7c6fdSAlex Wilson 	ASSERT(mlcq->mlcq_state & MLXCX_CQ_CREATED);
286ebb7c6fdSAlex Wilson 	ASSERT0(mlcq->mlcq_state & MLXCX_CQ_DESTROYED);
287ebb7c6fdSAlex Wilson 
2880207f820SPaul Winder 	if (mlcq->mlcq_state & MLXCX_CQ_ARMED) {
289ebb7c6fdSAlex Wilson 		ASSERT3U(mlcq->mlcq_ec, >, mlcq->mlcq_ec_armed);
2900207f820SPaul Winder 	}
291ebb7c6fdSAlex Wilson 
292ebb7c6fdSAlex Wilson 	if (mlcq->mlcq_state & MLXCX_CQ_TEARDOWN)
293ebb7c6fdSAlex Wilson 		return;
294ebb7c6fdSAlex Wilson 
2950207f820SPaul Winder 	atomic_or_uint(&mlcq->mlcq_state, MLXCX_CQ_ARMED);
296ebb7c6fdSAlex Wilson 	mlcq->mlcq_cc_armed = mlcq->mlcq_cc;
297ebb7c6fdSAlex Wilson 	mlcq->mlcq_ec_armed = mlcq->mlcq_ec;
298ebb7c6fdSAlex Wilson 
299ebb7c6fdSAlex Wilson 	set_bits32(&dbval, MLXCX_CQ_ARM_SEQ, mlcq->mlcq_ec);
300ebb7c6fdSAlex Wilson 	set_bits32(&dbval, MLXCX_CQ_ARM_CI, mlcq->mlcq_cc);
301ebb7c6fdSAlex Wilson 
302ebb7c6fdSAlex Wilson 	udbval = (uint64_t)from_bits32(dbval) << 32;
303ebb7c6fdSAlex Wilson 	udbval |= mlcq->mlcq_num & 0xffffff;
304ebb7c6fdSAlex Wilson 
305ebb7c6fdSAlex Wilson 	mlcq->mlcq_doorbell->mlcqd_update_ci = to_be24(mlcq->mlcq_cc);
306ebb7c6fdSAlex Wilson 	mlcq->mlcq_doorbell->mlcqd_arm_ci = dbval;
307ebb7c6fdSAlex Wilson 
308ebb7c6fdSAlex Wilson retry:
309ebb7c6fdSAlex Wilson 	MLXCX_DMA_SYNC(mlcq->mlcq_doorbell_dma, DDI_DMA_SYNC_FORDEV);
310ebb7c6fdSAlex Wilson 	ddi_fm_dma_err_get(mlcq->mlcq_doorbell_dma.mxdb_dma_handle, &err,
311ebb7c6fdSAlex Wilson 	    DDI_FME_VERSION);
312ebb7c6fdSAlex Wilson 	if (err.fme_status != DDI_FM_OK) {
313ebb7c6fdSAlex Wilson 		if (try++ < mlxcx_doorbell_tries) {
314ebb7c6fdSAlex Wilson 			ddi_fm_dma_err_clear(
315ebb7c6fdSAlex Wilson 			    mlcq->mlcq_doorbell_dma.mxdb_dma_handle,
316ebb7c6fdSAlex Wilson 			    DDI_FME_VERSION);
317ebb7c6fdSAlex Wilson 			goto retry;
318ebb7c6fdSAlex Wilson 		} else {
319ebb7c6fdSAlex Wilson 			goto err;
320ebb7c6fdSAlex Wilson 		}
321ebb7c6fdSAlex Wilson 	}
322ebb7c6fdSAlex Wilson 
323ebb7c6fdSAlex Wilson 	mlxcx_uar_put64(mlxp, mlcq->mlcq_uar, MLXCX_UAR_CQ_ARM, udbval);
324ebb7c6fdSAlex Wilson 	ddi_fm_acc_err_get(mlxp->mlx_regs_handle, &err,
325ebb7c6fdSAlex Wilson 	    DDI_FME_VERSION);
326ebb7c6fdSAlex Wilson 	if (err.fme_status == DDI_FM_OK)
327ebb7c6fdSAlex Wilson 		return;
328ebb7c6fdSAlex Wilson 	if (try++ < mlxcx_doorbell_tries) {
329ebb7c6fdSAlex Wilson 		ddi_fm_acc_err_clear(mlxp->mlx_regs_handle, DDI_FME_VERSION);
330ebb7c6fdSAlex Wilson 		goto retry;
331ebb7c6fdSAlex Wilson 	}
332ebb7c6fdSAlex Wilson 
333ebb7c6fdSAlex Wilson err:
334ebb7c6fdSAlex Wilson 	ddi_fm_service_impact(mlxp->mlx_dip, DDI_SERVICE_LOST);
335ebb7c6fdSAlex Wilson }
336ebb7c6fdSAlex Wilson 
337ebb7c6fdSAlex Wilson const char *
mlxcx_event_name(mlxcx_event_t evt)338ebb7c6fdSAlex Wilson mlxcx_event_name(mlxcx_event_t evt)
339ebb7c6fdSAlex Wilson {
340ebb7c6fdSAlex Wilson 	switch (evt) {
341ebb7c6fdSAlex Wilson 	case MLXCX_EVENT_COMPLETION:
342ebb7c6fdSAlex Wilson 		return ("COMPLETION");
343ebb7c6fdSAlex Wilson 	case MLXCX_EVENT_PATH_MIGRATED:
344ebb7c6fdSAlex Wilson 		return ("PATH_MIGRATED");
345ebb7c6fdSAlex Wilson 	case MLXCX_EVENT_COMM_ESTABLISH:
346ebb7c6fdSAlex Wilson 		return ("COMM_ESTABLISH");
347ebb7c6fdSAlex Wilson 	case MLXCX_EVENT_SENDQ_DRAIN:
348ebb7c6fdSAlex Wilson 		return ("SENDQ_DRAIN");
349ebb7c6fdSAlex Wilson 	case MLXCX_EVENT_LAST_WQE:
350ebb7c6fdSAlex Wilson 		return ("LAST_WQE");
351ebb7c6fdSAlex Wilson 	case MLXCX_EVENT_SRQ_LIMIT:
352ebb7c6fdSAlex Wilson 		return ("SRQ_LIMIT");
353ebb7c6fdSAlex Wilson 	case MLXCX_EVENT_DCT_ALL_CLOSED:
354ebb7c6fdSAlex Wilson 		return ("DCT_ALL_CLOSED");
355ebb7c6fdSAlex Wilson 	case MLXCX_EVENT_DCT_ACCKEY_VIOL:
356ebb7c6fdSAlex Wilson 		return ("DCT_ACCKEY_VIOL");
357ebb7c6fdSAlex Wilson 	case MLXCX_EVENT_CQ_ERROR:
358ebb7c6fdSAlex Wilson 		return ("CQ_ERROR");
359ebb7c6fdSAlex Wilson 	case MLXCX_EVENT_WQ_CATASTROPHE:
360ebb7c6fdSAlex Wilson 		return ("WQ_CATASTROPHE");
361ebb7c6fdSAlex Wilson 	case MLXCX_EVENT_PATH_MIGRATE_FAIL:
362ebb7c6fdSAlex Wilson 		return ("PATH_MIGRATE_FAIL");
363ebb7c6fdSAlex Wilson 	case MLXCX_EVENT_PAGE_FAULT:
364ebb7c6fdSAlex Wilson 		return ("PAGE_FAULT");
365ebb7c6fdSAlex Wilson 	case MLXCX_EVENT_WQ_INVALID_REQ:
366ebb7c6fdSAlex Wilson 		return ("WQ_INVALID_REQ");
367ebb7c6fdSAlex Wilson 	case MLXCX_EVENT_WQ_ACCESS_VIOL:
368ebb7c6fdSAlex Wilson 		return ("WQ_ACCESS_VIOL");
369ebb7c6fdSAlex Wilson 	case MLXCX_EVENT_SRQ_CATASTROPHE:
370ebb7c6fdSAlex Wilson 		return ("SRQ_CATASTROPHE");
371ebb7c6fdSAlex Wilson 	case MLXCX_EVENT_INTERNAL_ERROR:
372ebb7c6fdSAlex Wilson 		return ("INTERNAL_ERROR");
373ebb7c6fdSAlex Wilson 	case MLXCX_EVENT_PORT_STATE:
374ebb7c6fdSAlex Wilson 		return ("PORT_STATE");
375ebb7c6fdSAlex Wilson 	case MLXCX_EVENT_GPIO:
376ebb7c6fdSAlex Wilson 		return ("GPIO");
377ebb7c6fdSAlex Wilson 	case MLXCX_EVENT_PORT_MODULE:
378ebb7c6fdSAlex Wilson 		return ("PORT_MODULE");
379ebb7c6fdSAlex Wilson 	case MLXCX_EVENT_TEMP_WARNING:
380ebb7c6fdSAlex Wilson 		return ("TEMP_WARNING");
381ebb7c6fdSAlex Wilson 	case MLXCX_EVENT_REMOTE_CONFIG:
382ebb7c6fdSAlex Wilson 		return ("REMOTE_CONFIG");
383ebb7c6fdSAlex Wilson 	case MLXCX_EVENT_DCBX_CHANGE:
384ebb7c6fdSAlex Wilson 		return ("DCBX_CHANGE");
385ebb7c6fdSAlex Wilson 	case MLXCX_EVENT_DOORBELL_CONGEST:
386ebb7c6fdSAlex Wilson 		return ("DOORBELL_CONGEST");
387ebb7c6fdSAlex Wilson 	case MLXCX_EVENT_STALL_VL:
388ebb7c6fdSAlex Wilson 		return ("STALL_VL");
389ebb7c6fdSAlex Wilson 	case MLXCX_EVENT_CMD_COMPLETION:
390ebb7c6fdSAlex Wilson 		return ("CMD_COMPLETION");
391ebb7c6fdSAlex Wilson 	case MLXCX_EVENT_PAGE_REQUEST:
392ebb7c6fdSAlex Wilson 		return ("PAGE_REQUEST");
393ebb7c6fdSAlex Wilson 	case MLXCX_EVENT_NIC_VPORT:
394ebb7c6fdSAlex Wilson 		return ("NIC_VPORT");
395ebb7c6fdSAlex Wilson 	case MLXCX_EVENT_EC_PARAMS_CHANGE:
396ebb7c6fdSAlex Wilson 		return ("EC_PARAMS_CHANGE");
397ebb7c6fdSAlex Wilson 	case MLXCX_EVENT_XRQ_ERROR:
398ebb7c6fdSAlex Wilson 		return ("XRQ_ERROR");
399ebb7c6fdSAlex Wilson 	}
400ebb7c6fdSAlex Wilson 	return ("UNKNOWN");
401ebb7c6fdSAlex Wilson }
402ebb7c6fdSAlex Wilson 
403ebb7c6fdSAlex Wilson /* Should be called only when link state has changed. */
404ebb7c6fdSAlex Wilson void
mlxcx_update_link_state(mlxcx_t * mlxp,mlxcx_port_t * port)405ebb7c6fdSAlex Wilson mlxcx_update_link_state(mlxcx_t *mlxp, mlxcx_port_t *port)
406ebb7c6fdSAlex Wilson {
407ebb7c6fdSAlex Wilson 	link_state_t ls;
408ebb7c6fdSAlex Wilson 
409ebb7c6fdSAlex Wilson 	mutex_enter(&port->mlp_mtx);
410ebb7c6fdSAlex Wilson 	(void) mlxcx_cmd_query_port_status(mlxp, port);
411ebb7c6fdSAlex Wilson 	(void) mlxcx_cmd_query_port_speed(mlxp, port);
412d77e6e0fSPaul Winder 	(void) mlxcx_cmd_query_port_fec(mlxp, port);
413ebb7c6fdSAlex Wilson 
414ebb7c6fdSAlex Wilson 	switch (port->mlp_oper_status) {
415ebb7c6fdSAlex Wilson 	case MLXCX_PORT_STATUS_UP:
416ebb7c6fdSAlex Wilson 	case MLXCX_PORT_STATUS_UP_ONCE:
417ebb7c6fdSAlex Wilson 		ls = LINK_STATE_UP;
418ebb7c6fdSAlex Wilson 		break;
419ebb7c6fdSAlex Wilson 	case MLXCX_PORT_STATUS_DOWN:
420ebb7c6fdSAlex Wilson 		ls = LINK_STATE_DOWN;
421ebb7c6fdSAlex Wilson 		break;
422ebb7c6fdSAlex Wilson 	default:
423ebb7c6fdSAlex Wilson 		ls = LINK_STATE_UNKNOWN;
424ebb7c6fdSAlex Wilson 	}
425*80d1a7bdSAlex Wilson 
426*80d1a7bdSAlex Wilson 	if (mlxp->mlx_mac_hdl != NULL)
427*80d1a7bdSAlex Wilson 		mac_link_update(mlxp->mlx_mac_hdl, ls);
428ebb7c6fdSAlex Wilson 
429ebb7c6fdSAlex Wilson 	mutex_exit(&port->mlp_mtx);
430ebb7c6fdSAlex Wilson }
431ebb7c6fdSAlex Wilson 
4325f0e3176SPaul Winder CTASSERT(MLXCX_MANAGE_PAGES_MAX_PAGES < UINT_MAX);
4335f0e3176SPaul Winder 
434ebb7c6fdSAlex Wilson static void
mlxcx_give_pages_once(mlxcx_t * mlxp,size_t npages)435ebb7c6fdSAlex Wilson mlxcx_give_pages_once(mlxcx_t *mlxp, size_t npages)
436ebb7c6fdSAlex Wilson {
437ebb7c6fdSAlex Wilson 	ddi_device_acc_attr_t acc;
438ebb7c6fdSAlex Wilson 	ddi_dma_attr_t attr;
439ebb7c6fdSAlex Wilson 	mlxcx_dev_page_t *mdp;
4405f0e3176SPaul Winder 	mlxcx_dev_page_t **pages;
4415f0e3176SPaul Winder 	size_t i;
442ebb7c6fdSAlex Wilson 	const ddi_dma_cookie_t *ck;
443ebb7c6fdSAlex Wilson 
4445f0e3176SPaul Winder 	/*
4455f0e3176SPaul Winder 	 * If this isn't enough, the HCA will ask for more
4465f0e3176SPaul Winder 	 */
4475f0e3176SPaul Winder 	npages = MIN(npages, MLXCX_MANAGE_PAGES_MAX_PAGES);
4485f0e3176SPaul Winder 
4495f0e3176SPaul Winder 	pages = kmem_zalloc(sizeof (*pages) * npages, KM_SLEEP);
450ebb7c6fdSAlex Wilson 
4515f0e3176SPaul Winder 	for (i = 0; i < npages; i++) {
452ebb7c6fdSAlex Wilson 		mdp = kmem_zalloc(sizeof (mlxcx_dev_page_t), KM_SLEEP);
453ebb7c6fdSAlex Wilson 		mlxcx_dma_acc_attr(mlxp, &acc);
454ebb7c6fdSAlex Wilson 		mlxcx_dma_page_attr(mlxp, &attr);
455ebb7c6fdSAlex Wilson 		if (!mlxcx_dma_alloc(mlxp, &mdp->mxdp_dma, &attr, &acc,
456ebb7c6fdSAlex Wilson 		    B_TRUE, MLXCX_HW_PAGE_SIZE, B_TRUE)) {
4575f0e3176SPaul Winder 			mlxcx_warn(mlxp, "failed to allocate 4k page %u/%lu", i,
4585f0e3176SPaul Winder 			    npages);
4595f0e3176SPaul Winder 			kmem_free(mdp, sizeof (mlxcx_dev_page_t));
460ebb7c6fdSAlex Wilson 			goto cleanup_npages;
461ebb7c6fdSAlex Wilson 		}
462ebb7c6fdSAlex Wilson 		ck = mlxcx_dma_cookie_one(&mdp->mxdp_dma);
463ebb7c6fdSAlex Wilson 		mdp->mxdp_pa = ck->dmac_laddress;
464ebb7c6fdSAlex Wilson 		pages[i] = mdp;
465ebb7c6fdSAlex Wilson 	}
466ebb7c6fdSAlex Wilson 
467ebb7c6fdSAlex Wilson 	mutex_enter(&mlxp->mlx_pagemtx);
468ebb7c6fdSAlex Wilson 
469ebb7c6fdSAlex Wilson 	if (!mlxcx_cmd_give_pages(mlxp,
4705f0e3176SPaul Winder 	    MLXCX_MANAGE_PAGES_OPMOD_GIVE_PAGES, npages, pages)) {
4715f0e3176SPaul Winder 		mlxcx_warn(mlxp, "!hardware refused our gift of %lu "
4725f0e3176SPaul Winder 		    "pages!", npages);
4735f0e3176SPaul Winder 		mutex_exit(&mlxp->mlx_pagemtx);
474ebb7c6fdSAlex Wilson 		goto cleanup_npages;
475ebb7c6fdSAlex Wilson 	}
476ebb7c6fdSAlex Wilson 
4775f0e3176SPaul Winder 	for (i = 0; i < npages; i++) {
478ebb7c6fdSAlex Wilson 		avl_add(&mlxp->mlx_pages, pages[i]);
479ebb7c6fdSAlex Wilson 	}
4805f0e3176SPaul Winder 	mlxp->mlx_npages += npages;
481ebb7c6fdSAlex Wilson 	mutex_exit(&mlxp->mlx_pagemtx);
482ebb7c6fdSAlex Wilson 
4835f0e3176SPaul Winder 	kmem_free(pages, sizeof (*pages) * npages);
4845f0e3176SPaul Winder 
485ebb7c6fdSAlex Wilson 	return;
486ebb7c6fdSAlex Wilson 
487ebb7c6fdSAlex Wilson cleanup_npages:
4885f0e3176SPaul Winder 	for (i = 0; i < npages; i++) {
4895f0e3176SPaul Winder 		if ((mdp = pages[i]) == NULL)
4905f0e3176SPaul Winder 			break;
4915f0e3176SPaul Winder 
492ebb7c6fdSAlex Wilson 		mlxcx_dma_free(&mdp->mxdp_dma);
493ebb7c6fdSAlex Wilson 		kmem_free(mdp, sizeof (mlxcx_dev_page_t));
494ebb7c6fdSAlex Wilson 	}
495ebb7c6fdSAlex Wilson 	/* Tell the hardware we had an allocation failure. */
496ebb7c6fdSAlex Wilson 	(void) mlxcx_cmd_give_pages(mlxp, MLXCX_MANAGE_PAGES_OPMOD_ALLOC_FAIL,
497ebb7c6fdSAlex Wilson 	    0, NULL);
498ebb7c6fdSAlex Wilson 	mutex_exit(&mlxp->mlx_pagemtx);
4995f0e3176SPaul Winder 
5005f0e3176SPaul Winder 	kmem_free(pages, sizeof (*pages) * npages);
501ebb7c6fdSAlex Wilson }
502ebb7c6fdSAlex Wilson 
503ebb7c6fdSAlex Wilson static void
mlxcx_take_pages_once(mlxcx_t * mlxp,size_t npages)504ebb7c6fdSAlex Wilson mlxcx_take_pages_once(mlxcx_t *mlxp, size_t npages)
505ebb7c6fdSAlex Wilson {
506ebb7c6fdSAlex Wilson 	uint_t i;
5075f0e3176SPaul Winder 	int32_t ret;
5085f0e3176SPaul Winder 	uint64_t *pas;
509ebb7c6fdSAlex Wilson 	mlxcx_dev_page_t *mdp, probe;
510ebb7c6fdSAlex Wilson 
5115f0e3176SPaul Winder 	pas = kmem_alloc(sizeof (*pas) * npages, KM_SLEEP);
512ebb7c6fdSAlex Wilson 
5135f0e3176SPaul Winder 	if (!mlxcx_cmd_return_pages(mlxp, npages, pas, &ret)) {
5145f0e3176SPaul Winder 		kmem_free(pas, sizeof (*pas) * npages);
515ebb7c6fdSAlex Wilson 		return;
516ebb7c6fdSAlex Wilson 	}
517ebb7c6fdSAlex Wilson 
5185f0e3176SPaul Winder 	mutex_enter(&mlxp->mlx_pagemtx);
5195f0e3176SPaul Winder 
5205f0e3176SPaul Winder 	ASSERT0(avl_is_empty(&mlxp->mlx_pages));
5215f0e3176SPaul Winder 
522ebb7c6fdSAlex Wilson 	for (i = 0; i < ret; i++) {
523ebb7c6fdSAlex Wilson 		bzero(&probe, sizeof (probe));
524ebb7c6fdSAlex Wilson 		probe.mxdp_pa = pas[i];
525ebb7c6fdSAlex Wilson 
526ebb7c6fdSAlex Wilson 		mdp = avl_find(&mlxp->mlx_pages, &probe, NULL);
527ebb7c6fdSAlex Wilson 
528ebb7c6fdSAlex Wilson 		if (mdp != NULL) {
529ebb7c6fdSAlex Wilson 			avl_remove(&mlxp->mlx_pages, mdp);
530ebb7c6fdSAlex Wilson 			mlxp->mlx_npages--;
531ebb7c6fdSAlex Wilson 			mlxcx_dma_free(&mdp->mxdp_dma);
532ebb7c6fdSAlex Wilson 			kmem_free(mdp, sizeof (mlxcx_dev_page_t));
533ebb7c6fdSAlex Wilson 		} else {
534ebb7c6fdSAlex Wilson 			mlxcx_warn(mlxp, "hardware returned a page "
535ebb7c6fdSAlex Wilson 			    "with PA 0x%" PRIx64 " but we have no "
536ebb7c6fdSAlex Wilson 			    "record of giving out such a page", pas[i]);
537ebb7c6fdSAlex Wilson 		}
538ebb7c6fdSAlex Wilson 	}
539ebb7c6fdSAlex Wilson 
540ebb7c6fdSAlex Wilson 	mutex_exit(&mlxp->mlx_pagemtx);
5415f0e3176SPaul Winder 
5425f0e3176SPaul Winder 	kmem_free(pas, sizeof (*pas) * npages);
5435f0e3176SPaul Winder }
5445f0e3176SPaul Winder 
5455f0e3176SPaul Winder static void
mlxcx_pages_task(void * arg)5465f0e3176SPaul Winder mlxcx_pages_task(void *arg)
5475f0e3176SPaul Winder {
5485f0e3176SPaul Winder 	mlxcx_async_param_t *param = arg;
5495f0e3176SPaul Winder 	mlxcx_t *mlxp = param->mla_mlx;
5505f0e3176SPaul Winder 	int32_t npages;
5515f0e3176SPaul Winder 
5525f0e3176SPaul Winder 	/*
5535f0e3176SPaul Winder 	 * We can drop the pending status now, as we've extracted what
5545f0e3176SPaul Winder 	 * is needed to process the pages request.
5555f0e3176SPaul Winder 	 *
5565f0e3176SPaul Winder 	 * Even though we should never get another pages request until
5575f0e3176SPaul Winder 	 * we have responded to this, along with the guard in mlxcx_sync_intr,
5585f0e3176SPaul Winder 	 * this safely allows the reuse of mlxcx_async_param_t.
5595f0e3176SPaul Winder 	 */
5605f0e3176SPaul Winder 	mutex_enter(&param->mla_mtx);
5615f0e3176SPaul Winder 	npages = param->mla_pages.mlp_npages;
5625f0e3176SPaul Winder 	param->mla_pending = B_FALSE;
5635f0e3176SPaul Winder 	bzero(&param->mla_pages, sizeof (param->mla_pages));
5645f0e3176SPaul Winder 	mutex_exit(&param->mla_mtx);
5655f0e3176SPaul Winder 
5665f0e3176SPaul Winder 	/*
5675f0e3176SPaul Winder 	 * The PRM describes npages as: "Number of missing / unneeded pages
5685f0e3176SPaul Winder 	 * (signed number, msb indicate sign)". The implication is that
5695f0e3176SPaul Winder 	 * it will not be zero. We are expected to use this to give or
5705f0e3176SPaul Winder 	 * take back pages (based on the sign) using the MANAGE_PAGES
5715f0e3176SPaul Winder 	 * command but we can't determine whether to give or take
5725f0e3176SPaul Winder 	 * when npages is zero. So we do nothing.
5735f0e3176SPaul Winder 	 */
5745f0e3176SPaul Winder 	if (npages > 0) {
5755f0e3176SPaul Winder 		mlxcx_give_pages_once(mlxp, npages);
5765f0e3176SPaul Winder 	} else if (npages < 0) {
5775f0e3176SPaul Winder 		mlxcx_take_pages_once(mlxp, -1 * npages);
5785f0e3176SPaul Winder 	}
5795f0e3176SPaul Winder }
5805f0e3176SPaul Winder 
5815f0e3176SPaul Winder static void
mlxcx_link_state_task(void * arg)5825f0e3176SPaul Winder mlxcx_link_state_task(void *arg)
5835f0e3176SPaul Winder {
5845f0e3176SPaul Winder 	mlxcx_async_param_t *param = arg;
5855f0e3176SPaul Winder 	mlxcx_port_t *port;
5865f0e3176SPaul Winder 	mlxcx_t *mlxp;
5875f0e3176SPaul Winder 
5885f0e3176SPaul Winder 	/*
5895f0e3176SPaul Winder 	 * Gather the argruments from the parameters and clear the
5905f0e3176SPaul Winder 	 * pending status.
5915f0e3176SPaul Winder 	 *
5925f0e3176SPaul Winder 	 * The pending status must be cleared *before* we update the
5935f0e3176SPaul Winder 	 * link state. This is both safe and required to ensure we always
5945f0e3176SPaul Winder 	 * have the correct link state. It is safe because taskq_ents are
5955f0e3176SPaul Winder 	 * reusable (by the caller of taskq_dispatch_ent()) once the
5965f0e3176SPaul Winder 	 * task function has started executing. It is necessarily before
5975f0e3176SPaul Winder 	 * updating the link state to guarantee further link state change
5985f0e3176SPaul Winder 	 * events are not missed and we always have the current link state.
5995f0e3176SPaul Winder 	 */
6005f0e3176SPaul Winder 	mutex_enter(&param->mla_mtx);
6015f0e3176SPaul Winder 	mlxp = param->mla_mlx;
6025f0e3176SPaul Winder 	port = param->mla_port;
6035f0e3176SPaul Winder 	param->mla_pending = B_FALSE;
6045f0e3176SPaul Winder 	mutex_exit(&param->mla_mtx);
6055f0e3176SPaul Winder 
6065f0e3176SPaul Winder 	mlxcx_update_link_state(mlxp, port);
607ebb7c6fdSAlex Wilson }
608ebb7c6fdSAlex Wilson 
609ebb7c6fdSAlex Wilson static const char *
mlxcx_module_error_string(mlxcx_module_error_type_t err)610ebb7c6fdSAlex Wilson mlxcx_module_error_string(mlxcx_module_error_type_t err)
611ebb7c6fdSAlex Wilson {
612ebb7c6fdSAlex Wilson 	switch (err) {
613ebb7c6fdSAlex Wilson 	case MLXCX_MODULE_ERR_POWER_BUDGET:
614ebb7c6fdSAlex Wilson 		return ("POWER_BUDGET");
615ebb7c6fdSAlex Wilson 	case MLXCX_MODULE_ERR_LONG_RANGE:
616ebb7c6fdSAlex Wilson 		return ("LONG_RANGE");
617ebb7c6fdSAlex Wilson 	case MLXCX_MODULE_ERR_BUS_STUCK:
618ebb7c6fdSAlex Wilson 		return ("BUS_STUCK");
619ebb7c6fdSAlex Wilson 	case MLXCX_MODULE_ERR_NO_EEPROM:
620ebb7c6fdSAlex Wilson 		return ("NO_EEPROM");
621ebb7c6fdSAlex Wilson 	case MLXCX_MODULE_ERR_ENFORCEMENT:
622ebb7c6fdSAlex Wilson 		return ("ENFORCEMENT");
623ebb7c6fdSAlex Wilson 	case MLXCX_MODULE_ERR_UNKNOWN_IDENT:
624ebb7c6fdSAlex Wilson 		return ("UNKNOWN_IDENT");
625ebb7c6fdSAlex Wilson 	case MLXCX_MODULE_ERR_HIGH_TEMP:
626ebb7c6fdSAlex Wilson 		return ("HIGH_TEMP");
627ebb7c6fdSAlex Wilson 	case MLXCX_MODULE_ERR_CABLE_SHORTED:
628ebb7c6fdSAlex Wilson 		return ("CABLE_SHORTED");
629ebb7c6fdSAlex Wilson 	default:
630ebb7c6fdSAlex Wilson 		return ("UNKNOWN");
631ebb7c6fdSAlex Wilson 	}
632ebb7c6fdSAlex Wilson }
633ebb7c6fdSAlex Wilson 
634ebb7c6fdSAlex Wilson static void
mlxcx_report_module_error(mlxcx_t * mlxp,mlxcx_evdata_port_mod_t * evd)635ebb7c6fdSAlex Wilson mlxcx_report_module_error(mlxcx_t *mlxp, mlxcx_evdata_port_mod_t *evd)
636ebb7c6fdSAlex Wilson {
637ebb7c6fdSAlex Wilson 	uint64_t ena;
638ebb7c6fdSAlex Wilson 	char buf[FM_MAX_CLASS];
639ebb7c6fdSAlex Wilson 	const char *lename;
640ebb7c6fdSAlex Wilson 	const char *ename;
641ebb7c6fdSAlex Wilson 	const char *stname;
642ebb7c6fdSAlex Wilson 	uint_t eno = 0;
643ebb7c6fdSAlex Wilson 	mlxcx_module_status_t state = evd->mled_port_mod_module_status;
644ebb7c6fdSAlex Wilson 
645ebb7c6fdSAlex Wilson 	switch (state) {
646ebb7c6fdSAlex Wilson 	case MLXCX_MODULE_ERROR:
647ebb7c6fdSAlex Wilson 		stname = "error";
648ebb7c6fdSAlex Wilson 		eno = evd->mled_port_mod_error_type;
649ebb7c6fdSAlex Wilson 		lename = mlxcx_module_error_string(eno);
650ebb7c6fdSAlex Wilson 		switch (eno) {
651ebb7c6fdSAlex Wilson 		case MLXCX_MODULE_ERR_ENFORCEMENT:
652ebb7c6fdSAlex Wilson 			ename = DDI_FM_TXR_ERROR_WHITELIST;
653ebb7c6fdSAlex Wilson 			break;
654ebb7c6fdSAlex Wilson 		case MLXCX_MODULE_ERR_UNKNOWN_IDENT:
655ebb7c6fdSAlex Wilson 		case MLXCX_MODULE_ERR_NO_EEPROM:
656ebb7c6fdSAlex Wilson 			ename = DDI_FM_TXR_ERROR_NOTSUPP;
657ebb7c6fdSAlex Wilson 			break;
658ebb7c6fdSAlex Wilson 		case MLXCX_MODULE_ERR_HIGH_TEMP:
659ebb7c6fdSAlex Wilson 			ename = DDI_FM_TXR_ERROR_OVERTEMP;
660ebb7c6fdSAlex Wilson 			break;
661ebb7c6fdSAlex Wilson 		case MLXCX_MODULE_ERR_POWER_BUDGET:
662ebb7c6fdSAlex Wilson 		case MLXCX_MODULE_ERR_LONG_RANGE:
663ebb7c6fdSAlex Wilson 		case MLXCX_MODULE_ERR_CABLE_SHORTED:
664ebb7c6fdSAlex Wilson 			ename = DDI_FM_TXR_ERROR_HWFAIL;
665ebb7c6fdSAlex Wilson 			break;
666ebb7c6fdSAlex Wilson 		case MLXCX_MODULE_ERR_BUS_STUCK:
667ebb7c6fdSAlex Wilson 		default:
668ebb7c6fdSAlex Wilson 			ename = DDI_FM_TXR_ERROR_UNKNOWN;
669ebb7c6fdSAlex Wilson 		}
670ebb7c6fdSAlex Wilson 		break;
671ebb7c6fdSAlex Wilson 	default:
672ebb7c6fdSAlex Wilson 		return;
673ebb7c6fdSAlex Wilson 	}
674ebb7c6fdSAlex Wilson 
675ebb7c6fdSAlex Wilson 	(void) snprintf(buf, FM_MAX_CLASS, "%s.%s",
676ebb7c6fdSAlex Wilson 	    DDI_FM_NIC, DDI_FM_TXR_ERROR);
677ebb7c6fdSAlex Wilson 	ena = fm_ena_generate(0, FM_ENA_FMT1);
678ebb7c6fdSAlex Wilson 	if (!DDI_FM_EREPORT_CAP(mlxp->mlx_fm_caps))
679ebb7c6fdSAlex Wilson 		return;
680ebb7c6fdSAlex Wilson 
681ebb7c6fdSAlex Wilson 	ddi_fm_ereport_post(mlxp->mlx_dip, buf, ena, DDI_NOSLEEP,
682ebb7c6fdSAlex Wilson 	    /* compulsory FM props */
683ebb7c6fdSAlex Wilson 	    FM_VERSION, DATA_TYPE_UINT8, FM_EREPORT_VERS0,
684ebb7c6fdSAlex Wilson 	    /* generic NIC txr error event props */
685ebb7c6fdSAlex Wilson 	    "error", DATA_TYPE_STRING, ename,
686ebb7c6fdSAlex Wilson 	    "port_index", DATA_TYPE_UINT8, 0,
687ebb7c6fdSAlex Wilson 	    "txr_index", DATA_TYPE_UINT8, evd->mled_port_mod_module,
688ebb7c6fdSAlex Wilson 	    /* local props */
689ebb7c6fdSAlex Wilson 	    "mlxcx_state", DATA_TYPE_STRING, stname,
690ebb7c6fdSAlex Wilson 	    "mlxcx_error", DATA_TYPE_STRING, lename,
691ebb7c6fdSAlex Wilson 	    "mlxcx_error_num", DATA_TYPE_UINT8, eno,
692ebb7c6fdSAlex Wilson 	    NULL);
693ebb7c6fdSAlex Wilson 	ddi_fm_service_impact(mlxp->mlx_dip, DDI_SERVICE_LOST);
694ebb7c6fdSAlex Wilson }
695ebb7c6fdSAlex Wilson 
6960207f820SPaul Winder /*
6970207f820SPaul Winder  * Common beginning of interrupt processing.
6980207f820SPaul Winder  * Confirm interrupt hasn't been disabled, verify its state and
6990207f820SPaul Winder  * mark the vector as active.
7000207f820SPaul Winder  */
7010207f820SPaul Winder static boolean_t
mlxcx_intr_ini(mlxcx_t * mlxp,mlxcx_event_queue_t * mleq)7020207f820SPaul Winder mlxcx_intr_ini(mlxcx_t *mlxp, mlxcx_event_queue_t *mleq)
7030207f820SPaul Winder {
7040207f820SPaul Winder 	mutex_enter(&mleq->mleq_mtx);
7050207f820SPaul Winder 
7060207f820SPaul Winder 	if ((mleq->mleq_state & MLXCX_EQ_INTR_ENABLED) == 0) {
7070207f820SPaul Winder 		mutex_exit(&mleq->mleq_mtx);
7080207f820SPaul Winder 		return (B_FALSE);
7090207f820SPaul Winder 	}
7100207f820SPaul Winder 
7110207f820SPaul Winder 	if (!(mleq->mleq_state & MLXCX_EQ_ALLOC) ||
7120207f820SPaul Winder 	    !(mleq->mleq_state & MLXCX_EQ_CREATED) ||
7130207f820SPaul Winder 	    (mleq->mleq_state & MLXCX_EQ_DESTROYED)) {
7140207f820SPaul Winder 		mlxcx_warn(mlxp, "intr %d in bad eq state",
7150207f820SPaul Winder 		    mleq->mleq_intr_index);
7160207f820SPaul Winder 		mutex_exit(&mleq->mleq_mtx);
7170207f820SPaul Winder 		return (B_FALSE);
7180207f820SPaul Winder 	}
7190207f820SPaul Winder 
7200207f820SPaul Winder 	mleq->mleq_state |= MLXCX_EQ_INTR_ACTIVE;
7210207f820SPaul Winder 	mutex_exit(&mleq->mleq_mtx);
7220207f820SPaul Winder 
7230207f820SPaul Winder 	return (B_TRUE);
7240207f820SPaul Winder }
7250207f820SPaul Winder 
7260207f820SPaul Winder /*
7270207f820SPaul Winder  * End of interrupt processing.
7280207f820SPaul Winder  * Mark vector as no longer active and if shutdown is blocked on this vector,
7290207f820SPaul Winder  * wake it up.
7300207f820SPaul Winder  */
7310207f820SPaul Winder static void
mlxcx_intr_fini(mlxcx_event_queue_t * mleq)7320207f820SPaul Winder mlxcx_intr_fini(mlxcx_event_queue_t *mleq)
7330207f820SPaul Winder {
7340207f820SPaul Winder 	mutex_enter(&mleq->mleq_mtx);
7350207f820SPaul Winder 	if ((mleq->mleq_state & MLXCX_EQ_INTR_QUIESCE) != 0)
7360207f820SPaul Winder 		cv_signal(&mleq->mleq_cv);
7370207f820SPaul Winder 
7380207f820SPaul Winder 	mleq->mleq_state &= ~MLXCX_EQ_INTR_ACTIVE;
7390207f820SPaul Winder 	mutex_exit(&mleq->mleq_mtx);
7400207f820SPaul Winder }
7410207f820SPaul Winder 
742ebb7c6fdSAlex Wilson static uint_t
mlxcx_intr_async(caddr_t arg,caddr_t arg2)7435f0e3176SPaul Winder mlxcx_intr_async(caddr_t arg, caddr_t arg2)
744ebb7c6fdSAlex Wilson {
745ebb7c6fdSAlex Wilson 	mlxcx_t *mlxp = (mlxcx_t *)arg;
746ebb7c6fdSAlex Wilson 	mlxcx_event_queue_t *mleq = (mlxcx_event_queue_t *)arg2;
747ebb7c6fdSAlex Wilson 	mlxcx_eventq_ent_t *ent;
7485f0e3176SPaul Winder 	mlxcx_async_param_t *param;
749ebb7c6fdSAlex Wilson 	uint_t portn;
7505f0e3176SPaul Winder 	uint16_t func;
751ebb7c6fdSAlex Wilson 
7520207f820SPaul Winder 	if (!mlxcx_intr_ini(mlxp, mleq))
7535f0e3176SPaul Winder 		return (DDI_INTR_CLAIMED);
754ebb7c6fdSAlex Wilson 
755ebb7c6fdSAlex Wilson 	ent = mlxcx_eq_next(mleq);
756ebb7c6fdSAlex Wilson 	if (ent == NULL) {
7570207f820SPaul Winder 		goto done;
758ebb7c6fdSAlex Wilson 	}
759ebb7c6fdSAlex Wilson 
760ebb7c6fdSAlex Wilson 	ASSERT(mleq->mleq_state & MLXCX_EQ_ARMED);
761ebb7c6fdSAlex Wilson 	mleq->mleq_state &= ~MLXCX_EQ_ARMED;
762ebb7c6fdSAlex Wilson 
763ebb7c6fdSAlex Wilson 	for (; ent != NULL; ent = mlxcx_eq_next(mleq)) {
7645f0e3176SPaul Winder 		DTRACE_PROBE2(event, mlxcx_t *, mlxp, mlxcx_eventq_ent_t *,
7655f0e3176SPaul Winder 		    ent);
7665f0e3176SPaul Winder 
767*80d1a7bdSAlex Wilson 		/*
768*80d1a7bdSAlex Wilson 		 * Handle events which can be processed while we're still in
769*80d1a7bdSAlex Wilson 		 * mlxcx_attach(). Everything on the mlxcx_t which these events
770*80d1a7bdSAlex Wilson 		 * use must be allocated and set up prior to the call to
771*80d1a7bdSAlex Wilson 		 * mlxcx_setup_async_eqs().
772*80d1a7bdSAlex Wilson 		 */
773ebb7c6fdSAlex Wilson 		switch (ent->mleqe_event_type) {
7745f0e3176SPaul Winder 		case MLXCX_EVENT_CMD_COMPLETION:
7755f0e3176SPaul Winder 			mlxcx_cmd_completion(mlxp, ent);
776*80d1a7bdSAlex Wilson 			continue;
777ebb7c6fdSAlex Wilson 		case MLXCX_EVENT_PAGE_REQUEST:
7785f0e3176SPaul Winder 			func = from_be16(ent->mleqe_page_request.
7795f0e3176SPaul Winder 			    mled_page_request_function_id);
7805f0e3176SPaul Winder 			VERIFY3U(func, <=, MLXCX_FUNC_ID_MAX);
7815f0e3176SPaul Winder 
7825f0e3176SPaul Winder 			param = &mlxp->mlx_npages_req[func];
7835f0e3176SPaul Winder 			mutex_enter(&param->mla_mtx);
7845f0e3176SPaul Winder 			if (param->mla_pending) {
7855f0e3176SPaul Winder 				/*
7865f0e3176SPaul Winder 				 * The PRM states we will not get another
7875f0e3176SPaul Winder 				 * page request event until any pending have
7885f0e3176SPaul Winder 				 * been posted as complete to the HCA.
7895f0e3176SPaul Winder 				 * This will guard against this anyway.
7905f0e3176SPaul Winder 				 */
7915f0e3176SPaul Winder 				mutex_exit(&param->mla_mtx);
7925f0e3176SPaul Winder 				mlxcx_warn(mlxp, "Unexpected page request "
7935f0e3176SPaul Winder 				    "whilst another is pending");
794*80d1a7bdSAlex Wilson 				continue;
7955f0e3176SPaul Winder 			}
7965f0e3176SPaul Winder 			param->mla_pages.mlp_npages =
7975f0e3176SPaul Winder 			    (int32_t)from_be32(ent->mleqe_page_request.
798ebb7c6fdSAlex Wilson 			    mled_page_request_num_pages);
7995f0e3176SPaul Winder 			param->mla_pages.mlp_func = func;
8005f0e3176SPaul Winder 			param->mla_pending = B_TRUE;
8015f0e3176SPaul Winder 			ASSERT3P(param->mla_mlx, ==, mlxp);
8025f0e3176SPaul Winder 			mutex_exit(&param->mla_mtx);
8035f0e3176SPaul Winder 
8045f0e3176SPaul Winder 			taskq_dispatch_ent(mlxp->mlx_async_tq, mlxcx_pages_task,
8055f0e3176SPaul Winder 			    param, 0, &param->mla_tqe);
806*80d1a7bdSAlex Wilson 			continue;
807*80d1a7bdSAlex Wilson 		}
808*80d1a7bdSAlex Wilson 
809*80d1a7bdSAlex Wilson 		/*
810*80d1a7bdSAlex Wilson 		 * All other events should be ignored while in attach.
811*80d1a7bdSAlex Wilson 		 */
812*80d1a7bdSAlex Wilson 		mutex_enter(&mleq->mleq_mtx);
813*80d1a7bdSAlex Wilson 		if (mleq->mleq_state & MLXCX_EQ_ATTACHING) {
814*80d1a7bdSAlex Wilson 			mutex_exit(&mleq->mleq_mtx);
815*80d1a7bdSAlex Wilson 			continue;
816*80d1a7bdSAlex Wilson 		}
817*80d1a7bdSAlex Wilson 		mutex_exit(&mleq->mleq_mtx);
818*80d1a7bdSAlex Wilson 
819*80d1a7bdSAlex Wilson 		switch (ent->mleqe_event_type) {
820ebb7c6fdSAlex Wilson 		case MLXCX_EVENT_PORT_STATE:
821ebb7c6fdSAlex Wilson 			portn = get_bits8(
822ebb7c6fdSAlex Wilson 			    ent->mleqe_port_state.mled_port_state_port_num,
823ebb7c6fdSAlex Wilson 			    MLXCX_EVENT_PORT_NUM) - 1;
824ebb7c6fdSAlex Wilson 			if (portn >= mlxp->mlx_nports)
825ebb7c6fdSAlex Wilson 				break;
8265f0e3176SPaul Winder 
8275f0e3176SPaul Winder 			param = &mlxp->mlx_ports[portn].mlx_port_event;
8285f0e3176SPaul Winder 			mutex_enter(&param->mla_mtx);
8295f0e3176SPaul Winder 			if (param->mla_pending) {
8305f0e3176SPaul Winder 				/*
8315f0e3176SPaul Winder 				 * There is a link state event pending
8325f0e3176SPaul Winder 				 * processing. When that event is handled
8335f0e3176SPaul Winder 				 * it will get the current link state.
8345f0e3176SPaul Winder 				 */
8355f0e3176SPaul Winder 				mutex_exit(&param->mla_mtx);
8365f0e3176SPaul Winder 				break;
8375f0e3176SPaul Winder 			}
8385f0e3176SPaul Winder 
8395f0e3176SPaul Winder 			ASSERT3P(param->mla_mlx, ==, mlxp);
8405f0e3176SPaul Winder 			ASSERT3P(param->mla_port, ==, &mlxp->mlx_ports[portn]);
8415f0e3176SPaul Winder 
8425f0e3176SPaul Winder 			param->mla_pending = B_TRUE;
8435f0e3176SPaul Winder 			mutex_exit(&param->mla_mtx);
8445f0e3176SPaul Winder 
8455f0e3176SPaul Winder 			taskq_dispatch_ent(mlxp->mlx_async_tq,
8465f0e3176SPaul Winder 			    mlxcx_link_state_task, param, 0, &param->mla_tqe);
847ebb7c6fdSAlex Wilson 			break;
848ebb7c6fdSAlex Wilson 		case MLXCX_EVENT_PORT_MODULE:
849ebb7c6fdSAlex Wilson 			mlxcx_report_module_error(mlxp, &ent->mleqe_port_mod);
850ebb7c6fdSAlex Wilson 			break;
851ebb7c6fdSAlex Wilson 		default:
8525f0e3176SPaul Winder 			mlxcx_warn(mlxp, "unhandled event 0x%x on intr %d",
85322d05228SPaul Winder 			    ent->mleqe_event_type, mleq->mleq_intr_index);
854ebb7c6fdSAlex Wilson 		}
855ebb7c6fdSAlex Wilson 	}
856ebb7c6fdSAlex Wilson 
857ebb7c6fdSAlex Wilson 	mlxcx_arm_eq(mlxp, mleq);
858ebb7c6fdSAlex Wilson 
8590207f820SPaul Winder done:
8600207f820SPaul Winder 	mlxcx_intr_fini(mleq);
861ebb7c6fdSAlex Wilson 	return (DDI_INTR_CLAIMED);
862ebb7c6fdSAlex Wilson }
863ebb7c6fdSAlex Wilson 
86422d05228SPaul Winder static boolean_t
mlxcx_process_cq(mlxcx_t * mlxp,mlxcx_completion_queue_t * mlcq,mblk_t ** mpp,size_t bytelim)86522d05228SPaul Winder mlxcx_process_cq(mlxcx_t *mlxp, mlxcx_completion_queue_t *mlcq, mblk_t **mpp,
86622d05228SPaul Winder     size_t bytelim)
867ebb7c6fdSAlex Wilson {
86822d05228SPaul Winder 	mlxcx_work_queue_t *wq = mlcq->mlcq_wq;
869ebb7c6fdSAlex Wilson 	mlxcx_completionq_ent_t *cent;
87022d05228SPaul Winder 	mblk_t *mp, *cmp, *nmp;
87122d05228SPaul Winder 	mlxcx_buffer_t *buf;
87222d05228SPaul Winder 	boolean_t found, added;
873ebb7c6fdSAlex Wilson 	size_t bytes = 0;
87422d05228SPaul Winder 	uint_t rx_frames = 0;
87522d05228SPaul Winder 	uint_t comp_cnt = 0;
87622d05228SPaul Winder 	int64_t wqebbs, bufcnt;
877ebb7c6fdSAlex Wilson 
87822d05228SPaul Winder 	*mpp = NULL;
879ebb7c6fdSAlex Wilson 
880ebb7c6fdSAlex Wilson 	if (!(mlcq->mlcq_state & MLXCX_CQ_ALLOC) ||
881ebb7c6fdSAlex Wilson 	    !(mlcq->mlcq_state & MLXCX_CQ_CREATED) ||
882ebb7c6fdSAlex Wilson 	    (mlcq->mlcq_state & MLXCX_CQ_DESTROYED) ||
883ebb7c6fdSAlex Wilson 	    (mlcq->mlcq_state & MLXCX_CQ_TEARDOWN)) {
88422d05228SPaul Winder 		return (B_FALSE);
885ebb7c6fdSAlex Wilson 	}
886ebb7c6fdSAlex Wilson 
887ebb7c6fdSAlex Wilson 	nmp = cmp = mp = NULL;
888ebb7c6fdSAlex Wilson 
88922d05228SPaul Winder 	wqebbs = 0;
89022d05228SPaul Winder 	bufcnt = 0;
89122d05228SPaul Winder 	for (cent = mlxcx_cq_next(mlcq); cent != NULL;
89222d05228SPaul Winder 	    cent = mlxcx_cq_next(mlcq)) {
893ebb7c6fdSAlex Wilson 		/*
894ebb7c6fdSAlex Wilson 		 * Teardown and ring stop can atomic_or this flag
895ebb7c6fdSAlex Wilson 		 * into our state if they want us to stop early.
896ebb7c6fdSAlex Wilson 		 */
897ebb7c6fdSAlex Wilson 		if (mlcq->mlcq_state & MLXCX_CQ_TEARDOWN)
89822d05228SPaul Winder 			return (B_FALSE);
899ebb7c6fdSAlex Wilson 
90022d05228SPaul Winder 		comp_cnt++;
901ebb7c6fdSAlex Wilson 		if (cent->mlcqe_opcode == MLXCX_CQE_OP_REQ &&
902ebb7c6fdSAlex Wilson 		    cent->mlcqe_send_wqe_opcode == MLXCX_WQE_OP_NOP) {
903ebb7c6fdSAlex Wilson 			/* NOP */
90422d05228SPaul Winder 			atomic_dec_64(&wq->mlwq_wqebb_used);
905ebb7c6fdSAlex Wilson 			goto nextcq;
906ebb7c6fdSAlex Wilson 		}
907ebb7c6fdSAlex Wilson 
90822d05228SPaul Winder lookagain:
90922d05228SPaul Winder 		/*
91022d05228SPaul Winder 		 * Generally the buffer we're looking for will be
91122d05228SPaul Winder 		 * at the front of the list, so this loop won't
91222d05228SPaul Winder 		 * need to look far.
91322d05228SPaul Winder 		 */
914ebb7c6fdSAlex Wilson 		buf = list_head(&mlcq->mlcq_buffers);
915ebb7c6fdSAlex Wilson 		found = B_FALSE;
916ebb7c6fdSAlex Wilson 		while (buf != NULL) {
917ebb7c6fdSAlex Wilson 			if ((buf->mlb_wqe_index & UINT16_MAX) ==
918ebb7c6fdSAlex Wilson 			    from_be16(cent->mlcqe_wqe_counter)) {
919ebb7c6fdSAlex Wilson 				found = B_TRUE;
920ebb7c6fdSAlex Wilson 				break;
921ebb7c6fdSAlex Wilson 			}
922ebb7c6fdSAlex Wilson 			buf = list_next(&mlcq->mlcq_buffers, buf);
923ebb7c6fdSAlex Wilson 		}
92422d05228SPaul Winder 
925ebb7c6fdSAlex Wilson 		if (!found) {
92622d05228SPaul Winder 			/*
92722d05228SPaul Winder 			 * If there's any buffers waiting on the
92822d05228SPaul Winder 			 * buffers_b list, then merge those into
92922d05228SPaul Winder 			 * the main list and have another look.
93022d05228SPaul Winder 			 *
93122d05228SPaul Winder 			 * The wq enqueue routines push new buffers
93222d05228SPaul Winder 			 * into buffers_b so that they can avoid
93322d05228SPaul Winder 			 * taking the mlcq_mtx and blocking us for
93422d05228SPaul Winder 			 * every single packet.
93522d05228SPaul Winder 			 */
93622d05228SPaul Winder 			added = B_FALSE;
93722d05228SPaul Winder 			mutex_enter(&mlcq->mlcq_bufbmtx);
93822d05228SPaul Winder 			if (!list_is_empty(&mlcq->mlcq_buffers_b)) {
93922d05228SPaul Winder 				list_move_tail(&mlcq->mlcq_buffers,
94022d05228SPaul Winder 				    &mlcq->mlcq_buffers_b);
94122d05228SPaul Winder 				added = B_TRUE;
94222d05228SPaul Winder 			}
94322d05228SPaul Winder 			mutex_exit(&mlcq->mlcq_bufbmtx);
94422d05228SPaul Winder 			if (added)
94522d05228SPaul Winder 				goto lookagain;
94622d05228SPaul Winder 
947e1447ca9SPaul Winder 			/*
948e1447ca9SPaul Winder 			 * This check could go just after the lookagain
949e1447ca9SPaul Winder 			 * label, but it is a hot code path so we don't
950e1447ca9SPaul Winder 			 * want to unnecessarily grab a lock and check
951e1447ca9SPaul Winder 			 * a flag for a relatively rare event (the ring
952e1447ca9SPaul Winder 			 * being stopped).
953e1447ca9SPaul Winder 			 */
954e1447ca9SPaul Winder 			mutex_enter(&wq->mlwq_mtx);
955e1447ca9SPaul Winder 			if ((wq->mlwq_state & MLXCX_WQ_STARTED) == 0) {
956e1447ca9SPaul Winder 				mutex_exit(&wq->mlwq_mtx);
957e1447ca9SPaul Winder 				goto nextcq;
958e1447ca9SPaul Winder 			}
959e1447ca9SPaul Winder 			mutex_exit(&wq->mlwq_mtx);
960e1447ca9SPaul Winder 
961ebb7c6fdSAlex Wilson 			buf = list_head(&mlcq->mlcq_buffers);
962ebb7c6fdSAlex Wilson 			mlxcx_warn(mlxp, "got completion on CQ %x but "
963ebb7c6fdSAlex Wilson 			    "no buffer matching wqe found: %x (first "
964ebb7c6fdSAlex Wilson 			    "buffer counter = %x)", mlcq->mlcq_num,
965ebb7c6fdSAlex Wilson 			    from_be16(cent->mlcqe_wqe_counter),
96622d05228SPaul Winder 			    buf == NULL ? UINT32_MAX :
96722d05228SPaul Winder 			    buf->mlb_wqe_index);
968ebb7c6fdSAlex Wilson 			mlxcx_fm_ereport(mlxp, DDI_FM_DEVICE_INVAL_STATE);
969ebb7c6fdSAlex Wilson 			goto nextcq;
970ebb7c6fdSAlex Wilson 		}
97122d05228SPaul Winder 
97222d05228SPaul Winder 		/*
97322d05228SPaul Winder 		 * The buf is likely to be freed below, count this now.
97422d05228SPaul Winder 		 */
97522d05228SPaul Winder 		wqebbs += buf->mlb_wqebbs;
97622d05228SPaul Winder 
977ebb7c6fdSAlex Wilson 		list_remove(&mlcq->mlcq_buffers, buf);
97822d05228SPaul Winder 		bufcnt++;
979ebb7c6fdSAlex Wilson 
98022d05228SPaul Winder 		switch (mlcq->mlcq_wq->mlwq_type) {
98122d05228SPaul Winder 		case MLXCX_WQ_TYPE_SENDQ:
98222d05228SPaul Winder 			mlxcx_tx_completion(mlxp, mlcq, cent, buf);
98322d05228SPaul Winder 			break;
98422d05228SPaul Winder 		case MLXCX_WQ_TYPE_RECVQ:
98522d05228SPaul Winder 			nmp = mlxcx_rx_completion(mlxp, mlcq, cent, buf);
986ebb7c6fdSAlex Wilson 			bytes += from_be32(cent->mlcqe_byte_cnt);
98722d05228SPaul Winder 			if (nmp != NULL) {
98822d05228SPaul Winder 				if (cmp != NULL) {
98922d05228SPaul Winder 					cmp->b_next = nmp;
99022d05228SPaul Winder 					cmp = nmp;
99122d05228SPaul Winder 				} else {
99222d05228SPaul Winder 					mp = cmp = nmp;
99322d05228SPaul Winder 				}
99422d05228SPaul Winder 
99522d05228SPaul Winder 				rx_frames++;
996ebb7c6fdSAlex Wilson 			}
99722d05228SPaul Winder 			break;
998ebb7c6fdSAlex Wilson 		}
999ebb7c6fdSAlex Wilson 
100022d05228SPaul Winder 		/*
100122d05228SPaul Winder 		 * Update the consumer index with what has been processed,
100222d05228SPaul Winder 		 * followed by driver counters. It is important to tell the
100322d05228SPaul Winder 		 * hardware first, otherwise when we throw more packets at
100422d05228SPaul Winder 		 * it, it may get an overflow error.
100522d05228SPaul Winder 		 * We do this whenever we've processed enough to bridge the
100622d05228SPaul Winder 		 * high->low water mark.
100722d05228SPaul Winder 		 */
100822d05228SPaul Winder 		if (bufcnt > (MLXCX_CQ_LWM_GAP - MLXCX_CQ_HWM_GAP)) {
100922d05228SPaul Winder 			mlxcx_update_cqci(mlxp, mlcq);
101022d05228SPaul Winder 			/*
101122d05228SPaul Winder 			 * Both these variables are incremented using
101222d05228SPaul Winder 			 * atomics as they are modified in other code paths
101322d05228SPaul Winder 			 * (Eg during tx) which hold different locks.
101422d05228SPaul Winder 			 */
101522d05228SPaul Winder 			atomic_add_64(&mlcq->mlcq_bufcnt, -bufcnt);
101622d05228SPaul Winder 			atomic_add_64(&wq->mlwq_wqebb_used, -wqebbs);
101722d05228SPaul Winder 			wqebbs = 0;
101822d05228SPaul Winder 			bufcnt = 0;
101922d05228SPaul Winder 			comp_cnt = 0;
102022d05228SPaul Winder 		}
102122d05228SPaul Winder nextcq:
102222d05228SPaul Winder 		if (rx_frames > mlxp->mlx_props.mldp_rx_per_cq ||
102322d05228SPaul Winder 		    (bytelim != 0 && bytes > bytelim))
1024ebb7c6fdSAlex Wilson 			break;
1025ebb7c6fdSAlex Wilson 	}
1026ebb7c6fdSAlex Wilson 
102722d05228SPaul Winder 	if (comp_cnt > 0) {
102822d05228SPaul Winder 		mlxcx_update_cqci(mlxp, mlcq);
102922d05228SPaul Winder 		atomic_add_64(&mlcq->mlcq_bufcnt, -bufcnt);
103022d05228SPaul Winder 		atomic_add_64(&wq->mlwq_wqebb_used, -wqebbs);
103122d05228SPaul Winder 	}
103222d05228SPaul Winder 
103322d05228SPaul Winder 	*mpp = mp;
103422d05228SPaul Winder 	return (B_TRUE);
103522d05228SPaul Winder }
103622d05228SPaul Winder 
103722d05228SPaul Winder 
103822d05228SPaul Winder mblk_t *
mlxcx_rx_poll(mlxcx_t * mlxp,mlxcx_completion_queue_t * mlcq,size_t bytelim)103922d05228SPaul Winder mlxcx_rx_poll(mlxcx_t *mlxp, mlxcx_completion_queue_t *mlcq, size_t bytelim)
104022d05228SPaul Winder {
104122d05228SPaul Winder 	mblk_t *mp = NULL;
104222d05228SPaul Winder 
104322d05228SPaul Winder 	ASSERT(mutex_owned(&mlcq->mlcq_mtx));
104422d05228SPaul Winder 
104522d05228SPaul Winder 	ASSERT(mlcq->mlcq_wq != NULL);
104622d05228SPaul Winder 	ASSERT3U(mlcq->mlcq_wq->mlwq_type, ==, MLXCX_WQ_TYPE_RECVQ);
104722d05228SPaul Winder 
104822d05228SPaul Winder 	(void) mlxcx_process_cq(mlxp, mlcq, &mp, bytelim);
104922d05228SPaul Winder 
1050ebb7c6fdSAlex Wilson 	return (mp);
1051ebb7c6fdSAlex Wilson }
1052ebb7c6fdSAlex Wilson 
1053ebb7c6fdSAlex Wilson static uint_t
mlxcx_intr_n(caddr_t arg,caddr_t arg2)1054ebb7c6fdSAlex Wilson mlxcx_intr_n(caddr_t arg, caddr_t arg2)
1055ebb7c6fdSAlex Wilson {
1056ebb7c6fdSAlex Wilson 	mlxcx_t *mlxp = (mlxcx_t *)arg;
1057ebb7c6fdSAlex Wilson 	mlxcx_event_queue_t *mleq = (mlxcx_event_queue_t *)arg2;
1058ebb7c6fdSAlex Wilson 	mlxcx_eventq_ent_t *ent;
1059ebb7c6fdSAlex Wilson 	mlxcx_completion_queue_t *mlcq, probe;
106022d05228SPaul Winder 	mlxcx_work_queue_t *mlwq;
106122d05228SPaul Winder 	mblk_t *mp = NULL;
106222d05228SPaul Winder 	boolean_t tellmac = B_FALSE;
1063ebb7c6fdSAlex Wilson 
10640207f820SPaul Winder 	if (!mlxcx_intr_ini(mlxp, mleq))
1065ebb7c6fdSAlex Wilson 		return (DDI_INTR_CLAIMED);
1066ebb7c6fdSAlex Wilson 
1067ebb7c6fdSAlex Wilson 	ent = mlxcx_eq_next(mleq);
1068ebb7c6fdSAlex Wilson 	if (ent == NULL) {
1069ebb7c6fdSAlex Wilson 		if (++mleq->mleq_badintrs > mlxcx_stuck_intr_count) {
1070ebb7c6fdSAlex Wilson 			mlxcx_fm_ereport(mlxp, DDI_FM_DEVICE_BADINT_LIMIT);
1071ebb7c6fdSAlex Wilson 			ddi_fm_service_impact(mlxp->mlx_dip, DDI_SERVICE_LOST);
1072ebb7c6fdSAlex Wilson 			(void) ddi_intr_disable(mlxp->mlx_intr_handles[
1073ebb7c6fdSAlex Wilson 			    mleq->mleq_intr_index]);
1074ebb7c6fdSAlex Wilson 		}
10750207f820SPaul Winder 		goto done;
1076ebb7c6fdSAlex Wilson 	}
1077ebb7c6fdSAlex Wilson 	mleq->mleq_badintrs = 0;
1078ebb7c6fdSAlex Wilson 
1079*80d1a7bdSAlex Wilson 	mutex_enter(&mleq->mleq_mtx);
1080ebb7c6fdSAlex Wilson 	ASSERT(mleq->mleq_state & MLXCX_EQ_ARMED);
1081ebb7c6fdSAlex Wilson 	mleq->mleq_state &= ~MLXCX_EQ_ARMED;
1082*80d1a7bdSAlex Wilson #if defined(DEBUG)
1083*80d1a7bdSAlex Wilson 	/*
1084*80d1a7bdSAlex Wilson 	 * If we're still in mlxcx_attach and an intr_n fired, something really
1085*80d1a7bdSAlex Wilson 	 * weird is going on. This shouldn't happen in the absence of a driver
1086*80d1a7bdSAlex Wilson 	 * or firmware bug, so in the interests of minimizing branches in this
1087*80d1a7bdSAlex Wilson 	 * function this check is under DEBUG.
1088*80d1a7bdSAlex Wilson 	 */
1089*80d1a7bdSAlex Wilson 	if (mleq->mleq_state & MLXCX_EQ_ATTACHING) {
1090*80d1a7bdSAlex Wilson 		mutex_exit(&mleq->mleq_mtx);
1091*80d1a7bdSAlex Wilson 		mlxcx_warn(mlxp, "intr_n (%u) fired during attach, disabling "
1092*80d1a7bdSAlex Wilson 		    "vector", mleq->mleq_intr_index);
1093*80d1a7bdSAlex Wilson 		mlxcx_fm_ereport(mlxp, DDI_FM_DEVICE_INVAL_STATE);
1094*80d1a7bdSAlex Wilson 		ddi_fm_service_impact(mlxp->mlx_dip, DDI_SERVICE_LOST);
1095*80d1a7bdSAlex Wilson 		(void) ddi_intr_disable(mlxp->mlx_intr_handles[
1096*80d1a7bdSAlex Wilson 		    mleq->mleq_intr_index]);
1097*80d1a7bdSAlex Wilson 		goto done;
1098*80d1a7bdSAlex Wilson 	}
1099*80d1a7bdSAlex Wilson #endif
1100*80d1a7bdSAlex Wilson 	mutex_exit(&mleq->mleq_mtx);
1101ebb7c6fdSAlex Wilson 
1102ebb7c6fdSAlex Wilson 	for (; ent != NULL; ent = mlxcx_eq_next(mleq)) {
1103ebb7c6fdSAlex Wilson 		ASSERT3U(ent->mleqe_event_type, ==, MLXCX_EVENT_COMPLETION);
1104ebb7c6fdSAlex Wilson 
1105ebb7c6fdSAlex Wilson 		probe.mlcq_num =
1106ebb7c6fdSAlex Wilson 		    from_be24(ent->mleqe_completion.mled_completion_cqn);
11070207f820SPaul Winder 		mutex_enter(&mleq->mleq_mtx);
1108ebb7c6fdSAlex Wilson 		mlcq = avl_find(&mleq->mleq_cqs, &probe, NULL);
11090207f820SPaul Winder 		mutex_exit(&mleq->mleq_mtx);
1110ebb7c6fdSAlex Wilson 
1111ebb7c6fdSAlex Wilson 		if (mlcq == NULL)
1112*80d1a7bdSAlex Wilson 			goto update_eq;
1113ebb7c6fdSAlex Wilson 
111422d05228SPaul Winder 		mlwq = mlcq->mlcq_wq;
111522d05228SPaul Winder 
1116ebb7c6fdSAlex Wilson 		/*
11170207f820SPaul Winder 		 * mlcq_arm_mtx is used to avoid race conditions between
11180207f820SPaul Winder 		 * this interrupt routine and the transition from polling
11190207f820SPaul Winder 		 * back to interrupt mode. When exiting poll mode the
11200207f820SPaul Winder 		 * CQ is likely to be un-armed, which means there will
11210207f820SPaul Winder 		 * be no events for the CQ coming though here,
11220207f820SPaul Winder 		 * consequently very low contention on mlcq_arm_mtx.
1123ebb7c6fdSAlex Wilson 		 *
11240207f820SPaul Winder 		 * mlcq_arm_mtx must be released before calls into mac
11250207f820SPaul Winder 		 * layer in order to avoid deadlocks.
1126ebb7c6fdSAlex Wilson 		 */
11270207f820SPaul Winder 		mutex_enter(&mlcq->mlcq_arm_mtx);
11280207f820SPaul Winder 		mlcq->mlcq_ec++;
1129ebb7c6fdSAlex Wilson 		atomic_and_uint(&mlcq->mlcq_state, ~MLXCX_CQ_ARMED);
1130ebb7c6fdSAlex Wilson 
1131ebb7c6fdSAlex Wilson 		if (mutex_tryenter(&mlcq->mlcq_mtx) == 0) {
1132ebb7c6fdSAlex Wilson 			/*
113322d05228SPaul Winder 			 * If we failed to take the mutex because the
113422d05228SPaul Winder 			 * polling function has it, just move on.
113522d05228SPaul Winder 			 * We don't want to block other CQs behind
113622d05228SPaul Winder 			 * this one.
1137ebb7c6fdSAlex Wilson 			 */
11380207f820SPaul Winder 			if ((mlcq->mlcq_state & MLXCX_CQ_POLLING) != 0) {
11390207f820SPaul Winder 				mutex_exit(&mlcq->mlcq_arm_mtx);
114022d05228SPaul Winder 				goto update_eq;
11410207f820SPaul Winder 			}
114222d05228SPaul Winder 
1143ebb7c6fdSAlex Wilson 			/* Otherwise we will wait. */
1144ebb7c6fdSAlex Wilson 			mutex_enter(&mlcq->mlcq_mtx);
1145ebb7c6fdSAlex Wilson 		}
1146ebb7c6fdSAlex Wilson 
114722d05228SPaul Winder 		if ((mlcq->mlcq_state & MLXCX_CQ_POLLING) == 0 &&
114822d05228SPaul Winder 		    mlxcx_process_cq(mlxp, mlcq, &mp, 0)) {
1149ebb7c6fdSAlex Wilson 			/*
115022d05228SPaul Winder 			 * The ring is not in polling mode and we processed
115122d05228SPaul Winder 			 * some completion queue entries.
1152ebb7c6fdSAlex Wilson 			 */
115322d05228SPaul Winder 			if ((mlcq->mlcq_state & MLXCX_CQ_BLOCKED_MAC) != 0 &&
115422d05228SPaul Winder 			    mlcq->mlcq_bufcnt < mlcq->mlcq_buflwm) {
115522d05228SPaul Winder 				atomic_and_uint(&mlcq->mlcq_state,
115622d05228SPaul Winder 				    ~MLXCX_CQ_BLOCKED_MAC);
115722d05228SPaul Winder 				tellmac = B_TRUE;
1158ebb7c6fdSAlex Wilson 			}
1159ebb7c6fdSAlex Wilson 
116022d05228SPaul Winder 			if ((mlwq->mlwq_state & MLXCX_WQ_BLOCKED_MAC) != 0 &&
116122d05228SPaul Winder 			    mlwq->mlwq_wqebb_used < mlwq->mlwq_buflwm) {
116222d05228SPaul Winder 				atomic_and_uint(&mlwq->mlwq_state,
116322d05228SPaul Winder 				    ~MLXCX_WQ_BLOCKED_MAC);
116422d05228SPaul Winder 				tellmac = B_TRUE;
1165ebb7c6fdSAlex Wilson 			}
1166ebb7c6fdSAlex Wilson 
116722d05228SPaul Winder 			mlxcx_arm_cq(mlxp, mlcq);
1168ebb7c6fdSAlex Wilson 
116922d05228SPaul Winder 			mutex_exit(&mlcq->mlcq_mtx);
11700207f820SPaul Winder 			mutex_exit(&mlcq->mlcq_arm_mtx);
1171ebb7c6fdSAlex Wilson 
117222d05228SPaul Winder 			if (tellmac) {
117322d05228SPaul Winder 				mac_tx_ring_update(mlxp->mlx_mac_hdl,
117422d05228SPaul Winder 				    mlcq->mlcq_mac_hdl);
117522d05228SPaul Winder 				tellmac = B_FALSE;
1176ebb7c6fdSAlex Wilson 			}
1177ebb7c6fdSAlex Wilson 
117822d05228SPaul Winder 			if (mp != NULL) {
117922d05228SPaul Winder 				mac_rx_ring(mlxp->mlx_mac_hdl,
118022d05228SPaul Winder 				    mlcq->mlcq_mac_hdl, mp, mlcq->mlcq_mac_gen);
118122d05228SPaul Winder 			}
118222d05228SPaul Winder 		} else {
118322d05228SPaul Winder 			mutex_exit(&mlcq->mlcq_mtx);
11840207f820SPaul Winder 			mutex_exit(&mlcq->mlcq_arm_mtx);
1185ebb7c6fdSAlex Wilson 		}
1186ebb7c6fdSAlex Wilson 
118722d05228SPaul Winder update_eq:
1188ebb7c6fdSAlex Wilson 		/*
1189ebb7c6fdSAlex Wilson 		 * Updating the consumer counter for an EQ requires a write
1190ebb7c6fdSAlex Wilson 		 * to the UAR, which is possibly expensive.
1191ebb7c6fdSAlex Wilson 		 *
1192ebb7c6fdSAlex Wilson 		 * Try to do it only often enough to stop us wrapping around.
1193ebb7c6fdSAlex Wilson 		 */
1194ebb7c6fdSAlex Wilson 		if ((mleq->mleq_cc & 0x7) == 0)
1195ebb7c6fdSAlex Wilson 			mlxcx_update_eq(mlxp, mleq);
1196ebb7c6fdSAlex Wilson 	}
1197ebb7c6fdSAlex Wilson 
1198ebb7c6fdSAlex Wilson 	mlxcx_arm_eq(mlxp, mleq);
1199ebb7c6fdSAlex Wilson 
12000207f820SPaul Winder done:
12010207f820SPaul Winder 	mlxcx_intr_fini(mleq);
1202ebb7c6fdSAlex Wilson 	return (DDI_INTR_CLAIMED);
1203ebb7c6fdSAlex Wilson }
1204ebb7c6fdSAlex Wilson 
1205ebb7c6fdSAlex Wilson boolean_t
mlxcx_intr_setup(mlxcx_t * mlxp)1206ebb7c6fdSAlex Wilson mlxcx_intr_setup(mlxcx_t *mlxp)
1207ebb7c6fdSAlex Wilson {
1208ebb7c6fdSAlex Wilson 	dev_info_t *dip = mlxp->mlx_dip;
1209ebb7c6fdSAlex Wilson 	int ret;
1210ebb7c6fdSAlex Wilson 	int nintrs = 0;
1211ebb7c6fdSAlex Wilson 	int navail = 0;
1212ebb7c6fdSAlex Wilson 	int types, i;
1213ebb7c6fdSAlex Wilson 	mlxcx_eventq_type_t eqt = MLXCX_EQ_TYPE_ANY;
1214ebb7c6fdSAlex Wilson 
1215ebb7c6fdSAlex Wilson 	ret = ddi_intr_get_supported_types(dip, &types);
1216ebb7c6fdSAlex Wilson 	if (ret != DDI_SUCCESS) {
1217e1447ca9SPaul Winder 		mlxcx_warn(mlxp, "Failed to get supported interrupt types");
1218ebb7c6fdSAlex Wilson 		return (B_FALSE);
1219ebb7c6fdSAlex Wilson 	}
1220ebb7c6fdSAlex Wilson 
1221ebb7c6fdSAlex Wilson 	if (!(types & DDI_INTR_TYPE_MSIX)) {
1222ebb7c6fdSAlex Wilson 		mlxcx_warn(mlxp, "MSI-X interrupts not available, but mlxcx "
1223ebb7c6fdSAlex Wilson 		    "requires MSI-X");
1224ebb7c6fdSAlex Wilson 		return (B_FALSE);
1225ebb7c6fdSAlex Wilson 	}
1226ebb7c6fdSAlex Wilson 
1227ebb7c6fdSAlex Wilson 	ret = ddi_intr_get_nintrs(dip, DDI_INTR_TYPE_MSIX, &nintrs);
1228ebb7c6fdSAlex Wilson 	if (ret != DDI_SUCCESS) {
1229e1447ca9SPaul Winder 		mlxcx_warn(mlxp, "Failed to get number of interrupts");
1230ebb7c6fdSAlex Wilson 		return (B_FALSE);
1231ebb7c6fdSAlex Wilson 	}
1232ebb7c6fdSAlex Wilson 	if (nintrs < 2) {
1233260b7832SAndy Fiddaman 		mlxcx_warn(mlxp, "%d MSI-X interrupts supported, but mlxcx "
1234ebb7c6fdSAlex Wilson 		    "requires 2", nintrs);
1235ebb7c6fdSAlex Wilson 		return (B_FALSE);
1236ebb7c6fdSAlex Wilson 	}
1237ebb7c6fdSAlex Wilson 
1238ebb7c6fdSAlex Wilson 	ret = ddi_intr_get_navail(dip, DDI_INTR_TYPE_MSIX, &navail);
1239260b7832SAndy Fiddaman 	if (ret != DDI_SUCCESS) {
1240260b7832SAndy Fiddaman 		mlxcx_warn(mlxp,
1241260b7832SAndy Fiddaman 		    "Failed to get number of available interrupts");
1242260b7832SAndy Fiddaman 		return (B_FALSE);
1243260b7832SAndy Fiddaman 	}
1244ebb7c6fdSAlex Wilson 	if (navail < 2) {
1245ebb7c6fdSAlex Wilson 		mlxcx_warn(mlxp, "%d MSI-X interrupts available, but mlxcx "
1246ebb7c6fdSAlex Wilson 		    "requires 2", navail);
1247ebb7c6fdSAlex Wilson 		return (B_FALSE);
1248ebb7c6fdSAlex Wilson 	}
1249ebb7c6fdSAlex Wilson 
1250ebb7c6fdSAlex Wilson 	mlxp->mlx_intr_size = navail * sizeof (ddi_intr_handle_t);
1251ebb7c6fdSAlex Wilson 	mlxp->mlx_intr_handles = kmem_alloc(mlxp->mlx_intr_size, KM_SLEEP);
12525f0e3176SPaul Winder 	/*
12535f0e3176SPaul Winder 	 * Interrupts for Completion Queues events start from vector 1
12545f0e3176SPaul Winder 	 * up to available vectors. Vector 0 is used for asynchronous
12555f0e3176SPaul Winder 	 * events.
12565f0e3176SPaul Winder 	 */
12575f0e3176SPaul Winder 	mlxp->mlx_intr_cq0 = 1;
1258ebb7c6fdSAlex Wilson 
1259ebb7c6fdSAlex Wilson 	ret = ddi_intr_alloc(dip, mlxp->mlx_intr_handles, DDI_INTR_TYPE_MSIX,
1260ebb7c6fdSAlex Wilson 	    0, navail, &mlxp->mlx_intr_count, DDI_INTR_ALLOC_NORMAL);
1261ebb7c6fdSAlex Wilson 	if (ret != DDI_SUCCESS) {
1262e1447ca9SPaul Winder 		mlxcx_warn(mlxp, "Failed to allocate %d interrupts", navail);
1263ebb7c6fdSAlex Wilson 		mlxcx_intr_teardown(mlxp);
1264ebb7c6fdSAlex Wilson 		return (B_FALSE);
1265ebb7c6fdSAlex Wilson 	}
12665f0e3176SPaul Winder 	if (mlxp->mlx_intr_count < mlxp->mlx_intr_cq0 + 1) {
1267e1447ca9SPaul Winder 		mlxcx_warn(mlxp, "%d MSI-X interrupts allocated, but mlxcx "
1268e1447ca9SPaul Winder 		    "requires %d", mlxp->mlx_intr_count,
1269e1447ca9SPaul Winder 		    mlxp->mlx_intr_cq0 + 1);
1270ebb7c6fdSAlex Wilson 		mlxcx_intr_teardown(mlxp);
1271ebb7c6fdSAlex Wilson 		return (B_FALSE);
1272ebb7c6fdSAlex Wilson 	}
1273ebb7c6fdSAlex Wilson 	mlxp->mlx_intr_type = DDI_INTR_TYPE_MSIX;
1274ebb7c6fdSAlex Wilson 
1275ebb7c6fdSAlex Wilson 	ret = ddi_intr_get_pri(mlxp->mlx_intr_handles[0], &mlxp->mlx_intr_pri);
1276ebb7c6fdSAlex Wilson 	if (ret != DDI_SUCCESS) {
1277e1447ca9SPaul Winder 		mlxcx_warn(mlxp, "Failed to get interrupt priority");
1278ebb7c6fdSAlex Wilson 		mlxcx_intr_teardown(mlxp);
1279ebb7c6fdSAlex Wilson 		return (B_FALSE);
1280ebb7c6fdSAlex Wilson 	}
1281ebb7c6fdSAlex Wilson 
1282e1447ca9SPaul Winder 	/*
1283e1447ca9SPaul Winder 	 * Set the interrupt priority for the asynchronous handler higher
1284e1447ca9SPaul Winder 	 * than the ring handlers. Some operations which issue commands,
1285e1447ca9SPaul Winder 	 * and thus rely on the async interrupt handler for posting
1286e1447ca9SPaul Winder 	 * completion, do so with a CQ mutex held. The CQ mutex is also
1287e1447ca9SPaul Winder 	 * acquired during ring processing, so if the ring processing vector
1288e1447ca9SPaul Winder 	 * happens to be assigned to the same CPU as the async vector
1289e1447ca9SPaul Winder 	 * it can hold off the async interrupt thread and lead to a deadlock.
1290e1447ca9SPaul Winder 	 * By assigning a higher priority to the async vector, it will
1291e1447ca9SPaul Winder 	 * always be dispatched.
1292e1447ca9SPaul Winder 	 */
1293e1447ca9SPaul Winder 	mlxp->mlx_async_intr_pri = mlxp->mlx_intr_pri;
1294e1447ca9SPaul Winder 	if (mlxp->mlx_async_intr_pri < LOCK_LEVEL) {
1295e1447ca9SPaul Winder 		mlxp->mlx_async_intr_pri++;
1296e1447ca9SPaul Winder 	} else {
1297e1447ca9SPaul Winder 		mlxp->mlx_intr_pri--;
1298e1447ca9SPaul Winder 	}
1299e1447ca9SPaul Winder 
1300ebb7c6fdSAlex Wilson 	mlxp->mlx_eqs_size = mlxp->mlx_intr_count *
1301ebb7c6fdSAlex Wilson 	    sizeof (mlxcx_event_queue_t);
1302ebb7c6fdSAlex Wilson 	mlxp->mlx_eqs = kmem_zalloc(mlxp->mlx_eqs_size, KM_SLEEP);
1303ebb7c6fdSAlex Wilson 
13045f0e3176SPaul Winder 	/*
13055f0e3176SPaul Winder 	 * In the failure path, mlxcx_intr_teardown() expects this
13065f0e3176SPaul Winder 	 * mutex and avl tree to be init'ed - so do it now.
13075f0e3176SPaul Winder 	 */
13085f0e3176SPaul Winder 	for (i = 0; i < mlxp->mlx_intr_count; ++i) {
1309e1447ca9SPaul Winder 		uint_t pri = (i == 0) ? mlxp->mlx_async_intr_pri :
1310e1447ca9SPaul Winder 		    mlxp->mlx_intr_pri;
1311e1447ca9SPaul Winder 
13125f0e3176SPaul Winder 		mutex_init(&mlxp->mlx_eqs[i].mleq_mtx, NULL, MUTEX_DRIVER,
1313e1447ca9SPaul Winder 		    DDI_INTR_PRI(pri));
13140207f820SPaul Winder 		cv_init(&mlxp->mlx_eqs[i].mleq_cv, NULL, CV_DRIVER, NULL);
13155f0e3176SPaul Winder 
13165f0e3176SPaul Winder 		if (i < mlxp->mlx_intr_cq0)
13175f0e3176SPaul Winder 			continue;
13185f0e3176SPaul Winder 
13195f0e3176SPaul Winder 		avl_create(&mlxp->mlx_eqs[i].mleq_cqs, mlxcx_cq_compare,
13205f0e3176SPaul Winder 		    sizeof (mlxcx_completion_queue_t),
13215f0e3176SPaul Winder 		    offsetof(mlxcx_completion_queue_t, mlcq_eq_entry));
13225f0e3176SPaul Winder 	}
13235f0e3176SPaul Winder 
1324260b7832SAndy Fiddaman 	while (mlxp->mlx_async_intr_pri > DDI_INTR_PRI_MIN) {
1325260b7832SAndy Fiddaman 		ret = ddi_intr_set_pri(mlxp->mlx_intr_handles[0],
1326260b7832SAndy Fiddaman 		    mlxp->mlx_async_intr_pri);
1327260b7832SAndy Fiddaman 		if (ret == DDI_SUCCESS)
1328260b7832SAndy Fiddaman 			break;
1329260b7832SAndy Fiddaman 		mlxcx_note(mlxp,
1330260b7832SAndy Fiddaman 		    "!Failed to set interrupt priority to %u for "
1331e1447ca9SPaul Winder 		    "async interrupt vector", mlxp->mlx_async_intr_pri);
1332260b7832SAndy Fiddaman 		/*
1333260b7832SAndy Fiddaman 		 * If it was not possible to set the IPL for the async
1334260b7832SAndy Fiddaman 		 * interrupt to the desired value, then try a lower priority.
1335260b7832SAndy Fiddaman 		 * Some PSMs can only accommodate a limited number of vectors
1336260b7832SAndy Fiddaman 		 * at eatch priority level (or group of priority levels). Since
1337260b7832SAndy Fiddaman 		 * the async priority must be set higher than the ring
1338260b7832SAndy Fiddaman 		 * handlers, lower both. The ring handler priority is set
1339260b7832SAndy Fiddaman 		 * below.
1340260b7832SAndy Fiddaman 		 */
1341260b7832SAndy Fiddaman 		mlxp->mlx_async_intr_pri--;
1342260b7832SAndy Fiddaman 		mlxp->mlx_intr_pri--;
1343260b7832SAndy Fiddaman 	}
1344260b7832SAndy Fiddaman 
1345260b7832SAndy Fiddaman 	if (mlxp->mlx_async_intr_pri == DDI_INTR_PRI_MIN) {
1346260b7832SAndy Fiddaman 		mlxcx_warn(mlxp, "Failed to find an interrupt priority for "
1347260b7832SAndy Fiddaman 		    "async interrupt vector");
1348e1447ca9SPaul Winder 		mlxcx_intr_teardown(mlxp);
1349e1447ca9SPaul Winder 		return (B_FALSE);
1350e1447ca9SPaul Winder 	}
1351e1447ca9SPaul Winder 
13525f0e3176SPaul Winder 	ret = ddi_intr_add_handler(mlxp->mlx_intr_handles[0], mlxcx_intr_async,
1353ebb7c6fdSAlex Wilson 	    (caddr_t)mlxp, (caddr_t)&mlxp->mlx_eqs[0]);
1354ebb7c6fdSAlex Wilson 	if (ret != DDI_SUCCESS) {
1355e1447ca9SPaul Winder 		mlxcx_warn(mlxp, "Failed to add async interrupt handler");
1356ebb7c6fdSAlex Wilson 		mlxcx_intr_teardown(mlxp);
1357ebb7c6fdSAlex Wilson 		return (B_FALSE);
1358ebb7c6fdSAlex Wilson 	}
1359ebb7c6fdSAlex Wilson 
1360ebb7c6fdSAlex Wilson 	/*
1361ebb7c6fdSAlex Wilson 	 * If we have enough interrupts, set their "type" fields so that we
1362ebb7c6fdSAlex Wilson 	 * avoid mixing RX and TX queues on the same EQs.
1363ebb7c6fdSAlex Wilson 	 */
1364ebb7c6fdSAlex Wilson 	if (mlxp->mlx_intr_count >= 8) {
1365ebb7c6fdSAlex Wilson 		eqt = MLXCX_EQ_TYPE_RX;
1366ebb7c6fdSAlex Wilson 	}
1367ebb7c6fdSAlex Wilson 
13685f0e3176SPaul Winder 	for (i = mlxp->mlx_intr_cq0; i < mlxp->mlx_intr_count; ++i) {
1369ebb7c6fdSAlex Wilson 		mlxp->mlx_eqs[i].mleq_intr_index = i;
1370ebb7c6fdSAlex Wilson 
1371ebb7c6fdSAlex Wilson 		mlxp->mlx_eqs[i].mleq_type = eqt;
1372ebb7c6fdSAlex Wilson 		/*
1373ebb7c6fdSAlex Wilson 		 * If eqt is still ANY, just leave it set to that
1374ebb7c6fdSAlex Wilson 		 * (no else here).
1375ebb7c6fdSAlex Wilson 		 */
1376ebb7c6fdSAlex Wilson 		if (eqt == MLXCX_EQ_TYPE_RX) {
1377ebb7c6fdSAlex Wilson 			eqt = MLXCX_EQ_TYPE_TX;
1378ebb7c6fdSAlex Wilson 		} else if (eqt == MLXCX_EQ_TYPE_TX) {
1379ebb7c6fdSAlex Wilson 			eqt = MLXCX_EQ_TYPE_RX;
1380ebb7c6fdSAlex Wilson 		}
1381ebb7c6fdSAlex Wilson 
1382260b7832SAndy Fiddaman 		while (mlxp->mlx_intr_pri >= DDI_INTR_PRI_MIN) {
1383260b7832SAndy Fiddaman 			ret = ddi_intr_set_pri(mlxp->mlx_intr_handles[i],
1384260b7832SAndy Fiddaman 			    mlxp->mlx_intr_pri);
1385260b7832SAndy Fiddaman 			if (ret == DDI_SUCCESS)
1386260b7832SAndy Fiddaman 				break;
1387260b7832SAndy Fiddaman 			mlxcx_note(mlxp, "!Failed to set interrupt priority to "
1388260b7832SAndy Fiddaman 			    "%u for interrupt vector %d",
1389260b7832SAndy Fiddaman 			    mlxp->mlx_intr_pri, i);
1390260b7832SAndy Fiddaman 			mlxp->mlx_intr_pri--;
1391260b7832SAndy Fiddaman 		}
1392260b7832SAndy Fiddaman 		if (mlxp->mlx_intr_pri < DDI_INTR_PRI_MIN) {
1393260b7832SAndy Fiddaman 			mlxcx_warn(mlxp,
1394260b7832SAndy Fiddaman 			    "Failed to find an interrupt priority for "
1395260b7832SAndy Fiddaman 			    "interrupt vector %d", i);
1396e1447ca9SPaul Winder 			mlxcx_intr_teardown(mlxp);
1397e1447ca9SPaul Winder 			return (B_FALSE);
1398e1447ca9SPaul Winder 		}
1399e1447ca9SPaul Winder 
1400ebb7c6fdSAlex Wilson 		ret = ddi_intr_add_handler(mlxp->mlx_intr_handles[i],
1401ebb7c6fdSAlex Wilson 		    mlxcx_intr_n, (caddr_t)mlxp, (caddr_t)&mlxp->mlx_eqs[i]);
1402ebb7c6fdSAlex Wilson 		if (ret != DDI_SUCCESS) {
1403e1447ca9SPaul Winder 			mlxcx_warn(mlxp, "Failed to add interrupt handler %d",
1404e1447ca9SPaul Winder 			    i);
1405ebb7c6fdSAlex Wilson 			mlxcx_intr_teardown(mlxp);
1406ebb7c6fdSAlex Wilson 			return (B_FALSE);
1407ebb7c6fdSAlex Wilson 		}
1408ebb7c6fdSAlex Wilson 	}
1409ebb7c6fdSAlex Wilson 
1410ebb7c6fdSAlex Wilson 	return (B_TRUE);
1411ebb7c6fdSAlex Wilson }
1412