xref: /illumos-gate/usr/src/uts/common/io/i40e/i40e_intr.c (revision 09aee612)
1 /*
2  * This file and its contents are supplied under the terms of the
3  * Common Development and Distribution License ("CDDL"), version 1.0.
4  * You may only use this file in accordance with the terms of version
5  * 1.0 of the CDDL.
6  *
7  * A full copy of the text of the CDDL should have accompanied this
8  * source.  A copy of the CDDL is also available via the Internet at
9  * http://www.illumos.org/license/CDDL.
10  */
11 
12 /*
13  * Copyright 2018 Joyent, Inc.
14  * Copyright 2017 Tegile Systems, Inc.  All rights reserved.
15  */
16 
17 /*
18  * -------------------------
19  * Interrupt Handling Theory
20  * -------------------------
21  *
22  * There are a couple different sets of interrupts that we need to worry about:
23  *
24  *   - Interrupts from receive queues
25  *   - Interrupts from transmit queues
26  *   - 'Other Interrupts', such as the administrative queue
27  *
28  * 'Other Interrupts' are asynchronous events such as a link status change event
29  * being posted to the administrative queue, unrecoverable ECC errors, and more.
30  * If we have something being posted to the administrative queue, then we go
31  * through and process it, because it's generally enabled as a separate logical
32  * interrupt. Note, we may need to do more here eventually. To re-enable the
33  * interrupts from the 'Other Interrupts' section, we need to clear the PBA and
34  * write ENA to PFINT_ICR0.
35  *
36  * Interrupts from the transmit and receive queues indicates that our requests
37  * have been processed. In the rx case, it means that we have data that we
38  * should take a look at and send up the stack. In the tx case, it means that
39  * data which we got from MAC has now been sent out on the wire and we can free
40  * the associated data. Most of the logic for acting upon the presence of this
41  * data can be found in i40e_transciever.c which handles all of the DMA, rx, and
42  * tx operations. This file is dedicated to handling and dealing with interrupt
43  * processing.
44  *
45  * All devices supported by this driver support three kinds of interrupts:
46  *
47  *   o Extended Message Signaled Interrupts (MSI-X)
48  *   o Message Signaled Interrupts (MSI)
49  *   o Legacy PCI interrupts (INTx)
50  *
51  * Generally speaking the hardware logically handles MSI and INTx the same and
52  * restricts us to only using a single interrupt, which isn't the interesting
53  * case. With MSI-X available, each physical function of the device provides the
54  * opportunity for multiple interrupts which is what we'll focus on.
55  *
56  * --------------------
57  * Interrupt Management
58  * --------------------
59  *
60  * By default, the admin queue, which consists of the asynchronous other
61  * interrupts is always bound to MSI-X vector zero. Next, we spread out all of
62  * the other interrupts that we have available to us over the remaining
63  * interrupt vectors.
64  *
65  * This means that there may be multiple queues, both tx and rx, which are
66  * mapped to the same interrupt. When the interrupt fires, we'll have to check
67  * all of them for servicing, before we go through and indicate that the
68  * interrupt is claimed.
69  *
70  * The hardware provides the means of mapping various queues to MSI-X interrupts
71  * by programming the I40E_QINT_RQCTL() and I4OE_QINT_TQCTL() registers. These
72  * registers can also be used to enable and disable whether or not the queue is
73  * a source of interrupts. As part of this, the hardware requires that we
74  * maintain a linked list of queues for each interrupt vector. While it may seem
75  * like this is only there for the purproses of ITRs, that's not the case. The
76  * first queue must be programmed in I40E_QINT_LNKLSTN(%vector) register. Each
77  * queue defines the next one in either the I40E_QINT_RQCTL or I40E_QINT_TQCTL
78  * register.
79  *
80  * Finally, the individual interrupt vector itself has the ability to be enabled
81  * and disabled. The overall interrupt is controlled through the
82  * I40E_PFINT_DYN_CTLN() register. This is used to turn on and off the interrupt
83  * as a whole.
84  *
85  * Note that this means that both the individual queue and the interrupt as a
86  * whole can be toggled and re-enabled.
87  *
88  * -------------------
89  * Non-MSIX Management
90  * -------------------
91  *
92  * We may have a case where the Operating System is unable to actually allocate
93  * any MSI-X to the system. In such a world, there is only one transmit/receive
94  * queue pair and it is bound to the same interrupt with index zero. The
95  * hardware doesn't allow us access to additional interrupt vectors in these
96  * modes. Note that technically we could support more transmit/receive queues if
97  * we wanted.
98  *
99  * In this world, because the interrupts for the admin queue and traffic are
100  * mixed together, we have to consult ICR0 to determine what has occurred. The
101  * QINT_TQCTL and QINT_RQCTL registers have a field, 'MSI-X 0 index' which
102  * allows us to set a specific bit in ICR0. There are up to seven such bits;
103  * however, we only use the bit 0 and 1 for the rx and tx queue respectively.
104  * These are contained by the I40E_INTR_NOTX_{R|T}X_QUEUE and
105  * I40E_INTR_NOTX_{R|T}X_MASK registers respectively.
106  *
107  * Unfortunately, these corresponding queue bits have no corresponding entry in
108  * the ICR0_ENA register. So instead, when enabling interrupts on the queues, we
109  * end up enabling it on the queue registers rather than on the MSI-X registers.
110  * In the MSI-X world, because they can be enabled and disabled, this is
111  * different and the queues can always be enabled and disabled, but the
112  * interrupts themselves are toggled (ignoring the question of interrupt
113  * blanking for polling on rings).
114  *
115  * Finally, we still have to set up the interrupt linked list, but the list is
116  * instead rooted at the register I40E_PFINT_LNKLST0, rather than being tied to
117  * one of the other MSI-X registers.
118  *
119  * --------------------
120  * Interrupt Moderation
121  * --------------------
122  *
123  * The XL710 hardware has three different interrupt moderation registers per
124  * interrupt. Unsurprisingly, we use these for:
125  *
126  *   o RX interrupts
127  *   o TX interrupts
128  *   o 'Other interrupts' (link status change, admin queue, etc.)
129  *
130  * By default, we throttle 'other interrupts' the most, then TX interrupts, and
131  * then RX interrupts. The default values for these were based on trying to
132  * reason about both the importance and frequency of events. Generally speaking
133  * 'other interrupts' are not very frequent and they're not important for the
134  * I/O data path in and of itself (though they may indicate issues with the I/O
135  * data path).
136  *
137  * On the flip side, when we're not polling, RX interrupts are very important.
138  * The longer we wait for them, the more latency that we inject into the system.
139  * However, if we allow interrupts to occur too frequently, we risk a few
140  * problems:
141  *
142  *  1) Abusing system resources. Without proper interrupt blanking and polling,
143  *     we can see upwards of 200k-300k interrupts per second on the system.
144  *
145  *  2) Not enough data coalescing to enable polling. In other words, the more
146  *     data that we allow to build up, the more likely we'll be able to enable
147  *     polling mode and allowing us to better handle bulk data.
148  *
149  * In-between the 'other interrupts' and the TX interrupts we have the
150  * reclamation of TX buffers. This operation is not quite as important as we
151  * generally size the ring large enough that we should be able to reclaim a
152  * substantial amount of the descriptors that we have used per interrupt. So
153  * while it's important that this interrupt occur, we don't necessarily need it
154  * firing as frequently as RX; it doesn't, on its own, induce additional latency
155  * into the system.
156  *
157  * Based on all this we currently assign static ITR values for the system. While
158  * we could move to a dynamic system (the hardware supports that), we'd want to
159  * make sure that we're seeing problems from this that we believe would be
160  * generally helped by the added complexity.
161  *
162  * Based on this, the default values that we have allow for the following
163  * interrupt thresholds:
164  *
165  *    o 20k interrupts/s for RX
166  *    o 5k interrupts/s for TX
167  *    o 2k interupts/s for 'Other Interrupts'
168  */
169 
170 #include "i40e_sw.h"
171 
172 #define	I40E_INTR_NOTX_QUEUE	0
173 #define	I40E_INTR_NOTX_INTR	0
174 #define	I40E_INTR_NOTX_RX_QUEUE	0
175 #define	I40E_INTR_NOTX_RX_MASK	(1 << I40E_PFINT_ICR0_QUEUE_0_SHIFT)
176 #define	I40E_INTR_NOTX_TX_QUEUE	1
177 #define	I40E_INTR_NOTX_TX_MASK	(1 << I40E_PFINT_ICR0_QUEUE_1_SHIFT)
178 
179 void
180 i40e_intr_set_itr(i40e_t *i40e, i40e_itr_index_t itr, uint_t val)
181 {
182 	int i;
183 	i40e_hw_t *hw = &i40e->i40e_hw_space;
184 
185 	VERIFY3U(val, <=, I40E_MAX_ITR);
186 	VERIFY3U(itr, <, I40E_ITR_INDEX_NONE);
187 
188 	/*
189 	 * No matter the interrupt mode, the ITR for other interrupts is always
190 	 * on interrupt zero and the same is true if we're not using MSI-X.
191 	 */
192 	if (itr == I40E_ITR_INDEX_OTHER ||
193 	    i40e->i40e_intr_type != DDI_INTR_TYPE_MSIX) {
194 		I40E_WRITE_REG(hw, I40E_PFINT_ITR0(itr), val);
195 		return;
196 	}
197 
198 	for (i = 0; i < i40e->i40e_num_trqpairs; i++) {
199 		I40E_WRITE_REG(hw, I40E_PFINT_ITRN(itr, i), val);
200 	}
201 }
202 
203 /*
204  * Re-enable the adminq. Note that the adminq doesn't have a traditional queue
205  * associated with it from an interrupt perspective and just lives on ICR0.
206  * However when MSI-X interrupts are not being used, then this also enables and
207  * disables those interrupts.
208  */
209 static void
210 i40e_intr_adminq_enable(i40e_t *i40e)
211 {
212 	i40e_hw_t *hw = &i40e->i40e_hw_space;
213 	uint32_t reg;
214 
215 	reg = I40E_PFINT_DYN_CTL0_INTENA_MASK |
216 	    I40E_PFINT_DYN_CTL0_CLEARPBA_MASK |
217 	    (I40E_ITR_INDEX_NONE << I40E_PFINT_DYN_CTL0_ITR_INDX_SHIFT);
218 	I40E_WRITE_REG(hw, I40E_PFINT_DYN_CTL0, reg);
219 	i40e_flush(hw);
220 }
221 
222 static void
223 i40e_intr_adminq_disable(i40e_t *i40e)
224 {
225 	i40e_hw_t *hw = &i40e->i40e_hw_space;
226 	uint32_t reg;
227 
228 	reg = I40E_ITR_INDEX_NONE << I40E_PFINT_DYN_CTL0_ITR_INDX_SHIFT;
229 	I40E_WRITE_REG(hw, I40E_PFINT_DYN_CTL0, reg);
230 }
231 
232 /*
233  * The next two functions enable/disable the reception of interrupts
234  * on the given vector. Only vectors 1..N are programmed by these
235  * functions; vector 0 is special and handled by a different register.
236  * We must subtract one from the vector because i40e implicitly adds
237  * one to the vector value. See section 10.2.2.10.13 for more details.
238  */
239 static void
240 i40e_intr_io_enable(i40e_t *i40e, int vector)
241 {
242 	uint32_t reg;
243 	i40e_hw_t *hw = &i40e->i40e_hw_space;
244 
245 	ASSERT3S(vector, >, 0);
246 	reg = I40E_PFINT_DYN_CTLN_INTENA_MASK |
247 	    I40E_PFINT_DYN_CTLN_CLEARPBA_MASK |
248 	    (I40E_ITR_INDEX_NONE << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT);
249 	I40E_WRITE_REG(hw, I40E_PFINT_DYN_CTLN(vector - 1), reg);
250 }
251 
252 static void
253 i40e_intr_io_disable(i40e_t *i40e, int vector)
254 {
255 	uint32_t reg;
256 	i40e_hw_t *hw = &i40e->i40e_hw_space;
257 
258 	ASSERT3S(vector, >, 0);
259 	reg = I40E_ITR_INDEX_NONE << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT;
260 	I40E_WRITE_REG(hw, I40E_PFINT_DYN_CTLN(vector - 1), reg);
261 }
262 
263 /*
264  * When MSI-X interrupts are being used, then we can enable the actual
265  * interrupts themselves. However, when they are not, we instead have to turn
266  * towards the queue's CAUSE_ENA bit and enable that.
267  */
268 void
269 i40e_intr_io_enable_all(i40e_t *i40e)
270 {
271 	if (i40e->i40e_intr_type == DDI_INTR_TYPE_MSIX) {
272 		int i;
273 
274 		for (i = 1; i < i40e->i40e_intr_count; i++) {
275 			i40e_intr_io_enable(i40e, i);
276 		}
277 	} else {
278 		uint32_t reg;
279 		i40e_hw_t *hw = &i40e->i40e_hw_space;
280 
281 		reg = I40E_READ_REG(hw, I40E_QINT_RQCTL(I40E_INTR_NOTX_QUEUE));
282 		reg |= I40E_QINT_RQCTL_CAUSE_ENA_MASK;
283 		I40E_WRITE_REG(hw, I40E_QINT_RQCTL(I40E_INTR_NOTX_QUEUE), reg);
284 
285 		reg = I40E_READ_REG(hw, I40E_QINT_TQCTL(I40E_INTR_NOTX_QUEUE));
286 		reg |= I40E_QINT_TQCTL_CAUSE_ENA_MASK;
287 		I40E_WRITE_REG(hw, I40E_QINT_TQCTL(I40E_INTR_NOTX_QUEUE), reg);
288 	}
289 }
290 
291 /*
292  * When MSI-X interrupts are being used, then we can disable the actual
293  * interrupts themselves. However, when they are not, we instead have to turn
294  * towards the queue's CAUSE_ENA bit and disable that.
295  */
296 void
297 i40e_intr_io_disable_all(i40e_t *i40e)
298 {
299 	if (i40e->i40e_intr_type == DDI_INTR_TYPE_MSIX) {
300 		int i;
301 
302 		for (i = 1; i < i40e->i40e_intr_count; i++) {
303 			i40e_intr_io_disable(i40e, i);
304 		}
305 	} else {
306 		uint32_t reg;
307 		i40e_hw_t *hw = &i40e->i40e_hw_space;
308 
309 		reg = I40E_READ_REG(hw, I40E_QINT_RQCTL(I40E_INTR_NOTX_QUEUE));
310 		reg &= ~I40E_QINT_RQCTL_CAUSE_ENA_MASK;
311 		I40E_WRITE_REG(hw, I40E_QINT_RQCTL(I40E_INTR_NOTX_QUEUE), reg);
312 
313 		reg = I40E_READ_REG(hw, I40E_QINT_TQCTL(I40E_INTR_NOTX_QUEUE));
314 		reg &= ~I40E_QINT_TQCTL_CAUSE_ENA_MASK;
315 		I40E_WRITE_REG(hw, I40E_QINT_TQCTL(I40E_INTR_NOTX_QUEUE), reg);
316 	}
317 }
318 
319 /*
320  * As part of disabling the tx and rx queue's we're technically supposed to
321  * remove the linked list entries. The simplest way is to clear the LNKLSTN
322  * register by setting it to I40E_QUEUE_TYPE_EOL (0x7FF).
323  *
324  * Note all of the FM register access checks are performed by the caller.
325  */
326 void
327 i40e_intr_io_clear_cause(i40e_t *i40e)
328 {
329 	int i;
330 	i40e_hw_t *hw = &i40e->i40e_hw_space;
331 
332 	if (i40e->i40e_intr_type != DDI_INTR_TYPE_MSIX) {
333 		uint32_t reg;
334 		reg = I40E_QUEUE_TYPE_EOL;
335 		I40E_WRITE_REG(hw, I40E_PFINT_LNKLST0, reg);
336 		return;
337 	}
338 
339 	for (i = 0; i < i40e->i40e_num_trqpairs; i++) {
340 		uint32_t reg;
341 #ifdef DEBUG
342 		/*
343 		 * Verify that the interrupt in question is disabled. This is a
344 		 * prerequisite of modifying the data in question.
345 		 */
346 		reg = I40E_READ_REG(hw, I40E_PFINT_DYN_CTLN(i));
347 		VERIFY0(reg & I40E_PFINT_DYN_CTLN_INTENA_MASK);
348 #endif
349 		reg = I40E_QUEUE_TYPE_EOL;
350 		I40E_WRITE_REG(hw, I40E_PFINT_LNKLSTN(i), reg);
351 	}
352 
353 	i40e_flush(hw);
354 }
355 
356 /*
357  * Finalize interrupt handling. Mostly this disables the admin queue.
358  */
359 void
360 i40e_intr_chip_fini(i40e_t *i40e)
361 {
362 #ifdef DEBUG
363 	int i;
364 	uint32_t reg;
365 
366 	i40e_hw_t *hw = &i40e->i40e_hw_space;
367 
368 	/*
369 	 * Take a look and verify that all other interrupts have been disabled
370 	 * and the interrupt linked lists have been zeroed.
371 	 */
372 	if (i40e->i40e_intr_type == DDI_INTR_TYPE_MSIX) {
373 		for (i = 0; i < i40e->i40e_num_trqpairs; i++) {
374 			reg = I40E_READ_REG(hw, I40E_PFINT_DYN_CTLN(i));
375 			VERIFY0(reg & I40E_PFINT_DYN_CTLN_INTENA_MASK);
376 
377 			reg = I40E_READ_REG(hw, I40E_PFINT_LNKLSTN(i));
378 			VERIFY3U(reg, ==, I40E_QUEUE_TYPE_EOL);
379 		}
380 	}
381 #endif
382 
383 	i40e_intr_adminq_disable(i40e);
384 }
385 
386 /*
387  * Set the head of the interrupt linked list. The PFINT_LNKLSTN[N]
388  * register actually refers to the 'N + 1' interrupt vector. E.g.,
389  * PFINT_LNKLSTN[0] refers to interrupt vector 1.
390  */
391 static void
392 i40e_set_lnklstn(i40e_t *i40e, uint_t vector, uint_t queue)
393 {
394 	uint32_t	reg;
395 	i40e_hw_t	*hw = &i40e->i40e_hw_space;
396 
397 	reg = (queue << I40E_PFINT_LNKLSTN_FIRSTQ_INDX_SHIFT) |
398 	    (I40E_QUEUE_TYPE_RX << I40E_PFINT_LNKLSTN_FIRSTQ_TYPE_SHIFT);
399 
400 	I40E_WRITE_REG(hw, I40E_PFINT_LNKLSTN(vector), reg);
401 	DEBUGOUT2("PFINT_LNKLSTN[%u] = 0x%x", vector, reg);
402 }
403 
404 /*
405  * Set the QINT_RQCTL[queue] register. The next queue is always the Tx
406  * queue associated with this Rx queue. Unlike PFINT_LNKLSTN, the
407  * vector should be the actual vector this queue is on -- i.e., it
408  * should be equal to itrq_rx_intrvec.
409  */
410 static void
411 i40e_set_rqctl(i40e_t *i40e, uint_t vector, uint_t queue)
412 {
413 	uint32_t	reg;
414 	i40e_hw_t	*hw = &i40e->i40e_hw_space;
415 
416 	ASSERT3U(vector, ==, i40e->i40e_trqpairs[queue].itrq_rx_intrvec);
417 
418 	reg = (vector << I40E_QINT_RQCTL_MSIX_INDX_SHIFT) |
419 	    (I40E_ITR_INDEX_RX << I40E_QINT_RQCTL_ITR_INDX_SHIFT) |
420 	    (queue << I40E_QINT_RQCTL_NEXTQ_INDX_SHIFT) |
421 	    (I40E_QUEUE_TYPE_TX << I40E_QINT_RQCTL_NEXTQ_TYPE_SHIFT) |
422 	    I40E_QINT_RQCTL_CAUSE_ENA_MASK;
423 
424 	I40E_WRITE_REG(hw, I40E_QINT_RQCTL(queue), reg);
425 	DEBUGOUT2("QINT_RQCTL[%u] = 0x%x", queue, reg);
426 }
427 
428 /*
429  * Like i40e_set_rqctl(), but for QINT_TQCTL[queue]. The next queue is
430  * either the Rx queue of another TRQP, or EOL.
431  */
432 static void
433 i40e_set_tqctl(i40e_t *i40e, uint_t vector, uint_t queue, uint_t next_queue)
434 {
435 	uint32_t	reg;
436 	i40e_hw_t	*hw = &i40e->i40e_hw_space;
437 
438 	ASSERT3U(vector, ==, i40e->i40e_trqpairs[queue].itrq_tx_intrvec);
439 
440 	reg = (vector << I40E_QINT_TQCTL_MSIX_INDX_SHIFT) |
441 	    (I40E_ITR_INDEX_TX << I40E_QINT_TQCTL_ITR_INDX_SHIFT) |
442 	    (next_queue << I40E_QINT_TQCTL_NEXTQ_INDX_SHIFT) |
443 	    (I40E_QUEUE_TYPE_RX << I40E_QINT_TQCTL_NEXTQ_TYPE_SHIFT) |
444 	    I40E_QINT_TQCTL_CAUSE_ENA_MASK;
445 
446 	I40E_WRITE_REG(hw, I40E_QINT_TQCTL(queue), reg);
447 	DEBUGOUT2("QINT_TQCTL[%u] = 0x%x", queue, reg);
448 }
449 
450 /*
451  * Program the interrupt linked list. Each vector has a linked list of
452  * queues which act as event sources for that vector. When one of
453  * those sources has an event the associated interrupt vector is
454  * fired. This mapping must match the mapping found in
455  * i40e_map_intrs_to_vectors().
456  *
457  * See section 7.5.3 for more information about the configuration of
458  * the interrupt linked list.
459  */
460 static void
461 i40e_intr_init_queue_msix(i40e_t *i40e)
462 {
463 	uint_t intr_count;
464 
465 	/*
466 	 * The 0th vector is for 'Other Interrupts' only (subject to
467 	 * change in the future).
468 	 */
469 	intr_count = i40e->i40e_intr_count - 1;
470 
471 	for (uint_t vec = 0; vec < intr_count; vec++) {
472 		boolean_t head = B_TRUE;
473 
474 		for (uint_t qidx = vec; qidx < i40e->i40e_num_trqpairs;
475 		     qidx += intr_count) {
476 			uint_t next_qidx = qidx + intr_count;
477 
478 			next_qidx = (next_qidx > i40e->i40e_num_trqpairs) ?
479 			    I40E_QUEUE_TYPE_EOL : next_qidx;
480 
481 			if (head) {
482 				i40e_set_lnklstn(i40e, vec, qidx);
483 				head = B_FALSE;
484 			}
485 
486 			i40e_set_rqctl(i40e, vec + 1, qidx);
487 			i40e_set_tqctl(i40e, vec + 1, qidx, next_qidx);
488 		}
489 	}
490 }
491 
492 /*
493  * Set up a single queue to share the admin queue interrupt in the non-MSI-X
494  * world. Note we do not enable the queue as an interrupt cause at this time. We
495  * don't have any other vector of control here, unlike with the MSI-X interrupt
496  * case.
497  */
498 static void
499 i40e_intr_init_queue_shared(i40e_t *i40e)
500 {
501 	i40e_hw_t *hw = &i40e->i40e_hw_space;
502 	uint32_t reg;
503 
504 	VERIFY(i40e->i40e_intr_type == DDI_INTR_TYPE_FIXED ||
505 	    i40e->i40e_intr_type == DDI_INTR_TYPE_MSI);
506 
507 	reg = (I40E_INTR_NOTX_QUEUE << I40E_PFINT_LNKLST0_FIRSTQ_INDX_SHIFT) |
508 	    (I40E_QUEUE_TYPE_RX << I40E_PFINT_LNKLSTN_FIRSTQ_TYPE_SHIFT);
509 	I40E_WRITE_REG(hw, I40E_PFINT_LNKLST0, reg);
510 
511 	reg = (I40E_INTR_NOTX_INTR << I40E_QINT_RQCTL_MSIX_INDX_SHIFT) |
512 	    (I40E_ITR_INDEX_RX << I40E_QINT_RQCTL_ITR_INDX_SHIFT) |
513 	    (I40E_INTR_NOTX_RX_QUEUE << I40E_QINT_RQCTL_MSIX0_INDX_SHIFT) |
514 	    (I40E_INTR_NOTX_QUEUE << I40E_QINT_RQCTL_NEXTQ_INDX_SHIFT) |
515 	    (I40E_QUEUE_TYPE_TX << I40E_QINT_RQCTL_NEXTQ_TYPE_SHIFT);
516 
517 	I40E_WRITE_REG(hw, I40E_QINT_RQCTL(I40E_INTR_NOTX_QUEUE), reg);
518 
519 	reg = (I40E_INTR_NOTX_INTR << I40E_QINT_TQCTL_MSIX_INDX_SHIFT) |
520 	    (I40E_ITR_INDEX_TX << I40E_QINT_TQCTL_ITR_INDX_SHIFT) |
521 	    (I40E_INTR_NOTX_TX_QUEUE << I40E_QINT_TQCTL_MSIX0_INDX_SHIFT) |
522 	    (I40E_QUEUE_TYPE_EOL << I40E_QINT_TQCTL_NEXTQ_INDX_SHIFT) |
523 	    (I40E_QUEUE_TYPE_RX << I40E_QINT_TQCTL_NEXTQ_TYPE_SHIFT);
524 
525 	I40E_WRITE_REG(hw, I40E_QINT_TQCTL(I40E_INTR_NOTX_QUEUE), reg);
526 }
527 
528 /*
529  * Enable the specified queue as a valid source of interrupts. Note, this should
530  * only be used as part of the GLDv3's interrupt blanking routines. The debug
531  * build assertions are specific to that.
532  */
533 void
534 i40e_intr_rx_queue_enable(i40e_trqpair_t *itrq)
535 {
536 	uint32_t reg;
537 	uint_t queue = itrq->itrq_index;
538 	i40e_hw_t *hw = &itrq->itrq_i40e->i40e_hw_space;
539 
540 	ASSERT(MUTEX_HELD(&itrq->itrq_rx_lock));
541 	ASSERT(queue < itrq->itrq_i40e->i40e_num_trqpairs);
542 
543 	reg = I40E_READ_REG(hw, I40E_QINT_RQCTL(queue));
544 	ASSERT0(reg & I40E_QINT_RQCTL_CAUSE_ENA_MASK);
545 	reg |= I40E_QINT_RQCTL_CAUSE_ENA_MASK;
546 	I40E_WRITE_REG(hw, I40E_QINT_RQCTL(queue), reg);
547 }
548 
549 /*
550  * Disable the specified queue as a valid source of interrupts. Note, this
551  * should only be used as part of the GLDv3's interrupt blanking routines. The
552  * debug build assertions are specific to that.
553  */
554 void
555 i40e_intr_rx_queue_disable(i40e_trqpair_t *itrq)
556 {
557 	uint32_t reg;
558 	uint_t queue = itrq->itrq_index;
559 	i40e_hw_t *hw = &itrq->itrq_i40e->i40e_hw_space;
560 
561 	ASSERT(MUTEX_HELD(&itrq->itrq_rx_lock));
562 	ASSERT(queue < itrq->itrq_i40e->i40e_num_trqpairs);
563 
564 	reg = I40E_READ_REG(hw, I40E_QINT_RQCTL(queue));
565 	ASSERT3U(reg & I40E_QINT_RQCTL_CAUSE_ENA_MASK, ==,
566 	    I40E_QINT_RQCTL_CAUSE_ENA_MASK);
567 	reg &= ~I40E_QINT_RQCTL_CAUSE_ENA_MASK;
568 	I40E_WRITE_REG(hw, I40E_QINT_RQCTL(queue), reg);
569 }
570 
571 /*
572  * Start up the various chip's interrupt handling. We not only configure the
573  * adminq here, but we also go through and configure all of the actual queues,
574  * the interrupt linked lists, and others.
575  */
576 void
577 i40e_intr_chip_init(i40e_t *i40e)
578 {
579 	i40e_hw_t *hw = &i40e->i40e_hw_space;
580 	uint32_t reg;
581 
582 	/*
583 	 * Ensure that all non adminq interrupts are disabled at the chip level.
584 	 */
585 	i40e_intr_io_disable_all(i40e);
586 
587 	I40E_WRITE_REG(hw, I40E_PFINT_ICR0_ENA, 0);
588 	(void) I40E_READ_REG(hw, I40E_PFINT_ICR0);
589 
590 	/*
591 	 * Always enable all of the other-class interrupts to be on their own
592 	 * ITR. This only needs to be set on interrupt zero, which has its own
593 	 * special setting.
594 	 */
595 	reg = I40E_ITR_INDEX_OTHER << I40E_PFINT_STAT_CTL0_OTHER_ITR_INDX_SHIFT;
596 	I40E_WRITE_REG(hw, I40E_PFINT_STAT_CTL0, reg);
597 
598 	/*
599 	 * Enable interrupt types we expect to receive. At the moment, this
600 	 * is limited to the adminq; however, we'll want to review 11.2.2.9.22
601 	 * for more types here as we add support for detecting them, handling
602 	 * them, and resetting the device as appropriate.
603 	 */
604 	reg = I40E_PFINT_ICR0_ENA_ADMINQ_MASK;
605 	I40E_WRITE_REG(hw, I40E_PFINT_ICR0_ENA, reg);
606 
607 	/*
608 	 * Always set the interrupt linked list to empty. We'll come back and
609 	 * change this if MSI-X are actually on the scene.
610 	 */
611 	I40E_WRITE_REG(hw, I40E_PFINT_LNKLST0, I40E_QUEUE_TYPE_EOL);
612 
613 	i40e_intr_adminq_enable(i40e);
614 
615 	/*
616 	 * Set up all of the queues and map them to interrupts based on the bit
617 	 * assignments.
618 	 */
619 	if (i40e->i40e_intr_type == DDI_INTR_TYPE_MSIX) {
620 		i40e_intr_init_queue_msix(i40e);
621 	} else {
622 		i40e_intr_init_queue_shared(i40e);
623 	}
624 
625 	/*
626 	 * Finally set all of the default ITRs for the interrupts. Note that the
627 	 * queues will have been set up above.
628 	 */
629 	i40e_intr_set_itr(i40e, I40E_ITR_INDEX_RX, i40e->i40e_rx_itr);
630 	i40e_intr_set_itr(i40e, I40E_ITR_INDEX_TX, i40e->i40e_tx_itr);
631 	i40e_intr_set_itr(i40e, I40E_ITR_INDEX_OTHER, i40e->i40e_other_itr);
632 }
633 
634 static void
635 i40e_intr_adminq_work(i40e_t *i40e)
636 {
637 	struct i40e_hw *hw = &i40e->i40e_hw_space;
638 	struct i40e_arq_event_info evt;
639 	uint16_t remain = 1;
640 
641 	bzero(&evt, sizeof (struct i40e_arq_event_info));
642 	evt.buf_len = I40E_ADMINQ_BUFSZ;
643 	evt.msg_buf = i40e->i40e_aqbuf;
644 
645 	while (remain != 0) {
646 		enum i40e_status_code ret;
647 		uint16_t opcode;
648 
649 		/*
650 		 * At the moment, the only error code that seems to be returned
651 		 * is one saying that there's no work. In such a case we leave
652 		 * this be.
653 		 */
654 		ret = i40e_clean_arq_element(hw, &evt, &remain);
655 		if (ret != I40E_SUCCESS)
656 			break;
657 
658 		opcode = LE_16(evt.desc.opcode);
659 		switch (opcode) {
660 		case i40e_aqc_opc_get_link_status:
661 			mutex_enter(&i40e->i40e_general_lock);
662 			i40e_link_check(i40e);
663 			mutex_exit(&i40e->i40e_general_lock);
664 			break;
665 		default:
666 			/*
667 			 * Longer term we'll want to enable other causes here
668 			 * and get these cleaned up and doing something.
669 			 */
670 			break;
671 		}
672 	}
673 }
674 
675 static void
676 i40e_intr_rx_work(i40e_t *i40e, i40e_trqpair_t *itrq)
677 {
678 	mblk_t *mp = NULL;
679 
680 	mutex_enter(&itrq->itrq_rx_lock);
681 	if (!itrq->itrq_intr_poll)
682 		mp = i40e_ring_rx(itrq, I40E_POLL_NULL);
683 	mutex_exit(&itrq->itrq_rx_lock);
684 
685 	if (mp == NULL)
686 		return;
687 
688 	mac_rx_ring(i40e->i40e_mac_hdl, itrq->itrq_macrxring, mp,
689 	    itrq->itrq_rxgen);
690 }
691 
692 /* ARGSUSED */
693 static void
694 i40e_intr_tx_work(i40e_t *i40e, i40e_trqpair_t *itrq)
695 {
696 	i40e_tx_recycle_ring(itrq);
697 }
698 
699 /*
700  * At the moment, the only 'other' interrupt on ICR0 that we handle is the
701  * adminq. We should go through and support the other notifications at some
702  * point.
703  */
704 static void
705 i40e_intr_other_work(i40e_t *i40e)
706 {
707 	struct i40e_hw *hw = &i40e->i40e_hw_space;
708 	uint32_t reg;
709 
710 	reg = I40E_READ_REG(hw, I40E_PFINT_ICR0);
711 	if (i40e_check_acc_handle(i40e->i40e_osdep_space.ios_reg_handle) !=
712 	    DDI_FM_OK) {
713 		ddi_fm_service_impact(i40e->i40e_dip, DDI_SERVICE_DEGRADED);
714 		atomic_or_32(&i40e->i40e_state, I40E_ERROR);
715 		return;
716 	}
717 
718 	if (reg & I40E_PFINT_ICR0_ADMINQ_MASK)
719 		i40e_intr_adminq_work(i40e);
720 
721 	/*
722 	 * Make sure that the adminq interrupt is not masked and then explicitly
723 	 * enable the adminq and thus the other interrupt.
724 	 */
725 	reg = I40E_READ_REG(hw, I40E_PFINT_ICR0_ENA);
726 	reg |= I40E_PFINT_ICR0_ENA_ADMINQ_MASK;
727 	I40E_WRITE_REG(hw, I40E_PFINT_ICR0_ENA, reg);
728 
729 	i40e_intr_adminq_enable(i40e);
730 }
731 
732 /*
733  * Handle an MSI-X interrupt. See section 7.5.1.3 for an overview of
734  * the MSI-X interrupt sequence.
735  */
736 uint_t
737 i40e_intr_msix(void *arg1, void *arg2)
738 {
739 	i40e_t *i40e = (i40e_t *)arg1;
740 	uint_t vector_idx = (uint_t)(uintptr_t)arg2;
741 
742 	ASSERT3U(vector_idx, <, i40e->i40e_intr_count);
743 
744 	/*
745 	 * When using MSI-X interrupts, vector 0 is always reserved for the
746 	 * adminq at this time. Though longer term, we'll want to also bridge
747 	 * some I/O to them.
748 	 */
749 	if (vector_idx == 0) {
750 		i40e_intr_other_work(i40e);
751 		return (DDI_INTR_CLAIMED);
752 	}
753 
754 	ASSERT3U(vector_idx, >, 0);
755 
756 	/*
757 	 * We determine the queue indexes via simple arithmetic (as
758 	 * opposed to keeping explicit state like a bitmap). While
759 	 * conveinent, it does mean that i40e_map_intrs_to_vectors(),
760 	 * i40e_intr_init_queue_msix(), and this function must be
761 	 * modified as a unit.
762 	 *
763 	 * We subtract 1 from the vector to offset the addition we
764 	 * performed during i40e_map_intrs_to_vectors().
765 	 */
766 	for (uint_t i = vector_idx - 1; i < i40e->i40e_num_trqpairs;
767 	     i += (i40e->i40e_intr_count - 1)) {
768 		i40e_trqpair_t *itrq = &i40e->i40e_trqpairs[i];
769 
770 		ASSERT3U(i, <, i40e->i40e_num_trqpairs);
771 		ASSERT3P(itrq, !=, NULL);
772 		i40e_intr_rx_work(i40e, itrq);
773 		i40e_intr_tx_work(i40e, itrq);
774 	}
775 
776 	i40e_intr_io_enable(i40e, vector_idx);
777 	return (DDI_INTR_CLAIMED);
778 }
779 
780 static uint_t
781 i40e_intr_notx(i40e_t *i40e, boolean_t shared)
782 {
783 	i40e_hw_t *hw = &i40e->i40e_hw_space;
784 	uint32_t reg;
785 	i40e_trqpair_t *itrq = &i40e->i40e_trqpairs[0];
786 	int ret = DDI_INTR_CLAIMED;
787 
788 	if (shared == B_TRUE) {
789 		mutex_enter(&i40e->i40e_general_lock);
790 		if (i40e->i40e_state & I40E_SUSPENDED) {
791 			mutex_exit(&i40e->i40e_general_lock);
792 			return (DDI_INTR_UNCLAIMED);
793 		}
794 		mutex_exit(&i40e->i40e_general_lock);
795 	}
796 
797 	reg = I40E_READ_REG(hw, I40E_PFINT_ICR0);
798 	if (i40e_check_acc_handle(i40e->i40e_osdep_space.ios_reg_handle) !=
799 	    DDI_FM_OK) {
800 		ddi_fm_service_impact(i40e->i40e_dip, DDI_SERVICE_DEGRADED);
801 		atomic_or_32(&i40e->i40e_state, I40E_ERROR);
802 		return (DDI_INTR_CLAIMED);
803 	}
804 
805 	if (reg == 0) {
806 		if (shared == B_TRUE)
807 			ret = DDI_INTR_UNCLAIMED;
808 		goto done;
809 	}
810 
811 	if (reg & I40E_PFINT_ICR0_ADMINQ_MASK)
812 		i40e_intr_adminq_work(i40e);
813 
814 	if (reg & I40E_INTR_NOTX_RX_MASK)
815 		i40e_intr_rx_work(i40e, itrq);
816 
817 	if (reg & I40E_INTR_NOTX_TX_MASK)
818 		i40e_intr_tx_work(i40e, itrq);
819 
820 done:
821 	i40e_intr_adminq_enable(i40e);
822 	return (ret);
823 
824 }
825 
826 /* ARGSUSED */
827 uint_t
828 i40e_intr_msi(void *arg1, void *arg2)
829 {
830 	i40e_t *i40e = (i40e_t *)arg1;
831 
832 	return (i40e_intr_notx(i40e, B_FALSE));
833 }
834 
835 /* ARGSUSED */
836 uint_t
837 i40e_intr_legacy(void *arg1, void *arg2)
838 {
839 	i40e_t *i40e = (i40e_t *)arg1;
840 
841 	return (i40e_intr_notx(i40e, B_TRUE));
842 }
843