/* * This file and its contents are supplied under the terms of the * Common Development and Distribution License ("CDDL"), version 1.0. * You may only use this file in accordance with the terms of version * 1.0 of the CDDL. * * A full copy of the text of the CDDL should have accompanied this * source. A copy of the CDDL is also available via the Internet at * http://www.illumos.org/license/CDDL. */ /* * Copyright 2019 Joyent, Inc. * Copyright 2017 Tegile Systems, Inc. All rights reserved. * Copyright 2020 RackTop Systems, Inc. */ /* * ------------------------- * Interrupt Handling Theory * ------------------------- * * There are a couple different sets of interrupts that we need to worry about: * * - Interrupts from receive queues * - Interrupts from transmit queues * - 'Other Interrupts', such as the administrative queue * * 'Other Interrupts' are asynchronous events such as a link status change event * being posted to the administrative queue, unrecoverable ECC errors, and more. * If we have something being posted to the administrative queue, then we go * through and process it, because it's generally enabled as a separate logical * interrupt. Note, we may need to do more here eventually. To re-enable the * interrupts from the 'Other Interrupts' section, we need to clear the PBA and * write ENA to PFINT_ICR0. * * Interrupts from the transmit and receive queues indicates that our requests * have been processed. In the rx case, it means that we have data that we * should take a look at and send up the stack. In the tx case, it means that * data which we got from MAC has now been sent out on the wire and we can free * the associated data. Most of the logic for acting upon the presence of this * data can be found in i40e_transciever.c which handles all of the DMA, rx, and * tx operations. This file is dedicated to handling and dealing with interrupt * processing. * * All devices supported by this driver support three kinds of interrupts: * * o Extended Message Signaled Interrupts (MSI-X) * o Message Signaled Interrupts (MSI) * o Legacy PCI interrupts (INTx) * * Generally speaking the hardware logically handles MSI and INTx the same and * restricts us to only using a single interrupt, which isn't the interesting * case. With MSI-X available, each physical function of the device provides the * opportunity for multiple interrupts which is what we'll focus on. * * -------------------- * Interrupt Management * -------------------- * * By default, the admin queue, which consists of the asynchronous other * interrupts is always bound to MSI-X vector zero. Next, we spread out all of * the other interrupts that we have available to us over the remaining * interrupt vectors. * * This means that there may be multiple queues, both tx and rx, which are * mapped to the same interrupt. When the interrupt fires, we'll have to check * all of them for servicing, before we go through and indicate that the * interrupt is claimed. * * The hardware provides the means of mapping various queues to MSI-X interrupts * by programming the I40E_QINT_RQCTL() and I4OE_QINT_TQCTL() registers. These * registers can also be used to enable and disable whether or not the queue is * a source of interrupts. As part of this, the hardware requires that we * maintain a linked list of queues for each interrupt vector. While it may seem * like this is only there for the purproses of ITRs, that's not the case. The * first queue must be programmed in I40E_QINT_LNKLSTN(%vector) register. Each * queue defines the next one in either the I40E_QINT_RQCTL or I40E_QINT_TQCTL * register. * * Finally, the individual interrupt vector itself has the ability to be enabled * and disabled. The overall interrupt is controlled through the * I40E_PFINT_DYN_CTLN() register. This is used to turn on and off the interrupt * as a whole. * * Note that this means that both the individual queue and the interrupt as a * whole can be toggled and re-enabled. * * ------------------- * Non-MSIX Management * ------------------- * * We may have a case where the Operating System is unable to actually allocate * any MSI-X to the system. In such a world, there is only one transmit/receive * queue pair and it is bound to the same interrupt with index zero. The * hardware doesn't allow us access to additional interrupt vectors in these * modes. Note that technically we could support more transmit/receive queues if * we wanted. * * In this world, because the interrupts for the admin queue and traffic are * mixed together, we have to consult ICR0 to determine what has occurred. The * QINT_TQCTL and QINT_RQCTL registers have a field, 'MSI-X 0 index' which * allows us to set a specific bit in ICR0. There are up to seven such bits; * however, we only use the bit 0 and 1 for the rx and tx queue respectively. * These are contained by the I40E_INTR_NOTX_{R|T}X_QUEUE and * I40E_INTR_NOTX_{R|T}X_MASK registers respectively. * * Unfortunately, these corresponding queue bits have no corresponding entry in * the ICR0_ENA register. So instead, when enabling interrupts on the queues, we * end up enabling it on the queue registers rather than on the MSI-X registers. * In the MSI-X world, because they can be enabled and disabled, this is * different and the queues can always be enabled and disabled, but the * interrupts themselves are toggled (ignoring the question of interrupt * blanking for polling on rings). * * Finally, we still have to set up the interrupt linked list, but the list is * instead rooted at the register I40E_PFINT_LNKLST0, rather than being tied to * one of the other MSI-X registers. * * -------------------- * Interrupt Moderation * -------------------- * * The XL710 hardware has three different interrupt moderation registers per * interrupt. Unsurprisingly, we use these for: * * o RX interrupts * o TX interrupts * o 'Other interrupts' (link status change, admin queue, etc.) * * By default, we throttle 'other interrupts' the most, then TX interrupts, and * then RX interrupts. The default values for these were based on trying to * reason about both the importance and frequency of events. Generally speaking * 'other interrupts' are not very frequent and they're not important for the * I/O data path in and of itself (though they may indicate issues with the I/O * data path). * * On the flip side, when we're not polling, RX interrupts are very important. * The longer we wait for them, the more latency that we inject into the system. * However, if we allow interrupts to occur too frequently, we risk a few * problems: * * 1) Abusing system resources. Without proper interrupt blanking and polling, * we can see upwards of 200k-300k interrupts per second on the system. * * 2) Not enough data coalescing to enable polling. In other words, the more * data that we allow to build up, the more likely we'll be able to enable * polling mode and allowing us to better handle bulk data. * * In-between the 'other interrupts' and the TX interrupts we have the * reclamation of TX buffers. This operation is not quite as important as we * generally size the ring large enough that we should be able to reclaim a * substantial amount of the descriptors that we have used per interrupt. So * while it's important that this interrupt occur, we don't necessarily need it * firing as frequently as RX; it doesn't, on its own, induce additional latency * into the system. * * Based on all this we currently assign static ITR values for the system. While * we could move to a dynamic system (the hardware supports that), we'd want to * make sure that we're seeing problems from this that we believe would be * generally helped by the added complexity. * * Based on this, the default values that we have allow for the following * interrupt thresholds: * * o 20k interrupts/s for RX * o 5k interrupts/s for TX * o 2k interupts/s for 'Other Interrupts' */ #include "i40e_sw.h" #define I40E_INTR_NOTX_QUEUE 0 #define I40E_INTR_NOTX_INTR 0 #define I40E_INTR_NOTX_RX_QUEUE 0 #define I40E_INTR_NOTX_RX_MASK (1 << I40E_PFINT_ICR0_QUEUE_0_SHIFT) #define I40E_INTR_NOTX_TX_QUEUE 1 #define I40E_INTR_NOTX_TX_MASK (1 << I40E_PFINT_ICR0_QUEUE_1_SHIFT) void i40e_intr_set_itr(i40e_t *i40e, i40e_itr_index_t itr, uint_t val) { int i; i40e_hw_t *hw = &i40e->i40e_hw_space; VERIFY3U(val, <=, I40E_MAX_ITR); VERIFY3U(itr, <, I40E_ITR_INDEX_NONE); /* * No matter the interrupt mode, the ITR for other interrupts is always * on interrupt zero and the same is true if we're not using MSI-X. */ if (itr == I40E_ITR_INDEX_OTHER || i40e->i40e_intr_type != DDI_INTR_TYPE_MSIX) { I40E_WRITE_REG(hw, I40E_PFINT_ITR0(itr), val); return; } for (i = 0; i < i40e->i40e_num_trqpairs; i++) { I40E_WRITE_REG(hw, I40E_PFINT_ITRN(itr, i), val); } } /* * Re-enable the adminq. Note that the adminq doesn't have a traditional queue * associated with it from an interrupt perspective and just lives on ICR0. * However when MSI-X interrupts are not being used, then this also enables and * disables those interrupts. */ static void i40e_intr_adminq_enable(i40e_t *i40e) { i40e_hw_t *hw = &i40e->i40e_hw_space; uint32_t reg; reg = I40E_PFINT_DYN_CTL0_INTENA_MASK | I40E_PFINT_DYN_CTL0_CLEARPBA_MASK | (I40E_ITR_INDEX_NONE << I40E_PFINT_DYN_CTL0_ITR_INDX_SHIFT); I40E_WRITE_REG(hw, I40E_PFINT_DYN_CTL0, reg); i40e_flush(hw); } static void i40e_intr_adminq_disable(i40e_t *i40e) { i40e_hw_t *hw = &i40e->i40e_hw_space; uint32_t reg; reg = I40E_ITR_INDEX_NONE << I40E_PFINT_DYN_CTL0_ITR_INDX_SHIFT; I40E_WRITE_REG(hw, I40E_PFINT_DYN_CTL0, reg); } /* * The next two functions enable/disable the reception of interrupts * on the given vector. Only vectors 1..N are programmed by these * functions; vector 0 is special and handled by a different register. * We must subtract one from the vector because i40e implicitly adds * one to the vector value. See section 10.2.2.10.13 for more details. */ static void i40e_intr_io_enable(i40e_t *i40e, int vector) { uint32_t reg; i40e_hw_t *hw = &i40e->i40e_hw_space; ASSERT3S(vector, >, 0); reg = I40E_PFINT_DYN_CTLN_INTENA_MASK | I40E_PFINT_DYN_CTLN_CLEARPBA_MASK | (I40E_ITR_INDEX_NONE << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT); I40E_WRITE_REG(hw, I40E_PFINT_DYN_CTLN(vector - 1), reg); } static void i40e_intr_io_disable(i40e_t *i40e, int vector) { uint32_t reg; i40e_hw_t *hw = &i40e->i40e_hw_space; ASSERT3S(vector, >, 0); reg = I40E_ITR_INDEX_NONE << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT; I40E_WRITE_REG(hw, I40E_PFINT_DYN_CTLN(vector - 1), reg); } /* * When MSI-X interrupts are being used, then we can enable the actual * interrupts themselves. However, when they are not, we instead have to turn * towards the queue's CAUSE_ENA bit and enable that. */ void i40e_intr_io_enable_all(i40e_t *i40e) { if (i40e->i40e_intr_type == DDI_INTR_TYPE_MSIX) { int i; for (i = 1; i < i40e->i40e_intr_count; i++) { i40e_intr_io_enable(i40e, i); } } else { uint32_t reg; i40e_hw_t *hw = &i40e->i40e_hw_space; reg = I40E_READ_REG(hw, I40E_QINT_RQCTL(I40E_INTR_NOTX_QUEUE)); reg |= I40E_QINT_RQCTL_CAUSE_ENA_MASK; I40E_WRITE_REG(hw, I40E_QINT_RQCTL(I40E_INTR_NOTX_QUEUE), reg); reg = I40E_READ_REG(hw, I40E_QINT_TQCTL(I40E_INTR_NOTX_QUEUE)); reg |= I40E_QINT_TQCTL_CAUSE_ENA_MASK; I40E_WRITE_REG(hw, I40E_QINT_TQCTL(I40E_INTR_NOTX_QUEUE), reg); } } /* * When MSI-X interrupts are being used, then we can disable the actual * interrupts themselves. However, when they are not, we instead have to turn * towards the queue's CAUSE_ENA bit and disable that. */ void i40e_intr_io_disable_all(i40e_t *i40e) { if (i40e->i40e_intr_type == DDI_INTR_TYPE_MSIX) { int i; for (i = 1; i < i40e->i40e_intr_count; i++) { i40e_intr_io_disable(i40e, i); } } else { uint32_t reg; i40e_hw_t *hw = &i40e->i40e_hw_space; reg = I40E_READ_REG(hw, I40E_QINT_RQCTL(I40E_INTR_NOTX_QUEUE)); reg &= ~I40E_QINT_RQCTL_CAUSE_ENA_MASK; I40E_WRITE_REG(hw, I40E_QINT_RQCTL(I40E_INTR_NOTX_QUEUE), reg); reg = I40E_READ_REG(hw, I40E_QINT_TQCTL(I40E_INTR_NOTX_QUEUE)); reg &= ~I40E_QINT_TQCTL_CAUSE_ENA_MASK; I40E_WRITE_REG(hw, I40E_QINT_TQCTL(I40E_INTR_NOTX_QUEUE), reg); } } /* * As part of disabling the tx and rx queue's we're technically supposed to * remove the linked list entries. The simplest way is to clear the LNKLSTN * register by setting it to I40E_QUEUE_TYPE_EOL (0x7FF). * * Note all of the FM register access checks are performed by the caller. */ void i40e_intr_io_clear_cause(i40e_t *i40e) { uint32_t i; i40e_hw_t *hw = &i40e->i40e_hw_space; if (i40e->i40e_intr_type != DDI_INTR_TYPE_MSIX) { uint32_t reg; reg = I40E_QUEUE_TYPE_EOL; I40E_WRITE_REG(hw, I40E_PFINT_LNKLST0, reg); return; } for (i = 0; i < i40e->i40e_intr_count - 1; i++) { uint32_t reg; reg = I40E_QUEUE_TYPE_EOL; I40E_WRITE_REG(hw, I40E_PFINT_LNKLSTN(i), reg); } i40e_flush(hw); } /* * Finalize interrupt handling. Mostly this disables the admin queue. */ void i40e_intr_chip_fini(i40e_t *i40e) { #ifdef DEBUG int i; uint32_t reg; i40e_hw_t *hw = &i40e->i40e_hw_space; /* * Take a look and verify that all other interrupts have been disabled * and the interrupt linked lists have been zeroed. */ if (i40e->i40e_intr_type == DDI_INTR_TYPE_MSIX) { for (i = 0; i < i40e->i40e_intr_count - 1; i++) { reg = I40E_READ_REG(hw, I40E_PFINT_DYN_CTLN(i)); VERIFY0(reg & I40E_PFINT_DYN_CTLN_INTENA_MASK); reg = I40E_READ_REG(hw, I40E_PFINT_LNKLSTN(i)); VERIFY3U(reg, ==, I40E_QUEUE_TYPE_EOL); } } #endif i40e_intr_adminq_disable(i40e); } /* * Set the head of the interrupt linked list. The PFINT_LNKLSTN[N] * register actually refers to the 'N + 1' interrupt vector. E.g., * PFINT_LNKLSTN[0] refers to interrupt vector 1. */ static void i40e_set_lnklstn(i40e_t *i40e, uint_t vector, uint_t queue) { uint32_t reg; i40e_hw_t *hw = &i40e->i40e_hw_space; reg = (queue << I40E_PFINT_LNKLSTN_FIRSTQ_INDX_SHIFT) | (I40E_QUEUE_TYPE_RX << I40E_PFINT_LNKLSTN_FIRSTQ_TYPE_SHIFT); I40E_WRITE_REG(hw, I40E_PFINT_LNKLSTN(vector), reg); DEBUGOUT2("PFINT_LNKLSTN[%u] = 0x%x", vector, reg); } /* * Set the QINT_RQCTL[queue] register. The next queue is always the Tx * queue associated with this Rx queue. Unlike PFINT_LNKLSTN, the * vector should be the actual vector this queue is on -- i.e., it * should be equal to itrq_rx_intrvec. */ static void i40e_set_rqctl(i40e_t *i40e, uint_t vector, uint_t queue) { uint32_t reg; i40e_hw_t *hw = &i40e->i40e_hw_space; ASSERT3U(vector, ==, i40e->i40e_trqpairs[queue].itrq_rx_intrvec); reg = (vector << I40E_QINT_RQCTL_MSIX_INDX_SHIFT) | (I40E_ITR_INDEX_RX << I40E_QINT_RQCTL_ITR_INDX_SHIFT) | (queue << I40E_QINT_RQCTL_NEXTQ_INDX_SHIFT) | (I40E_QUEUE_TYPE_TX << I40E_QINT_RQCTL_NEXTQ_TYPE_SHIFT) | I40E_QINT_RQCTL_CAUSE_ENA_MASK; I40E_WRITE_REG(hw, I40E_QINT_RQCTL(queue), reg); DEBUGOUT2("QINT_RQCTL[%u] = 0x%x", queue, reg); } /* * Like i40e_set_rqctl(), but for QINT_TQCTL[queue]. The next queue is * either the Rx queue of another TRQP, or EOL. */ static void i40e_set_tqctl(i40e_t *i40e, uint_t vector, uint_t queue, uint_t next_queue) { uint32_t reg; i40e_hw_t *hw = &i40e->i40e_hw_space; ASSERT3U(vector, ==, i40e->i40e_trqpairs[queue].itrq_tx_intrvec); reg = (vector << I40E_QINT_TQCTL_MSIX_INDX_SHIFT) | (I40E_ITR_INDEX_TX << I40E_QINT_TQCTL_ITR_INDX_SHIFT) | (next_queue << I40E_QINT_TQCTL_NEXTQ_INDX_SHIFT) | (I40E_QUEUE_TYPE_RX << I40E_QINT_TQCTL_NEXTQ_TYPE_SHIFT) | I40E_QINT_TQCTL_CAUSE_ENA_MASK; I40E_WRITE_REG(hw, I40E_QINT_TQCTL(queue), reg); DEBUGOUT2("QINT_TQCTL[%u] = 0x%x", queue, reg); } /* * Program the interrupt linked list. Each vector has a linked list of * queues which act as event sources for that vector. When one of * those sources has an event the associated interrupt vector is * fired. This mapping must match the mapping found in * i40e_map_intrs_to_vectors(). * * See section 7.5.3 for more information about the configuration of * the interrupt linked list. */ static void i40e_intr_init_queue_msix(i40e_t *i40e) { uint_t intr_count; /* * The 0th vector is for 'Other Interrupts' only (subject to * change in the future). */ intr_count = i40e->i40e_intr_count - 1; for (uint_t vec = 0; vec < intr_count; vec++) { boolean_t head = B_TRUE; for (uint_t qidx = vec; qidx < i40e->i40e_num_trqpairs; qidx += intr_count) { uint_t next_qidx = qidx + intr_count; next_qidx = (next_qidx > i40e->i40e_num_trqpairs) ? I40E_QUEUE_TYPE_EOL : next_qidx; if (head) { i40e_set_lnklstn(i40e, vec, qidx); head = B_FALSE; } i40e_set_rqctl(i40e, vec + 1, qidx); i40e_set_tqctl(i40e, vec + 1, qidx, next_qidx); } } } /* * Set up a single queue to share the admin queue interrupt in the non-MSI-X * world. Note we do not enable the queue as an interrupt cause at this time. We * don't have any other vector of control here, unlike with the MSI-X interrupt * case. */ static void i40e_intr_init_queue_shared(i40e_t *i40e) { i40e_hw_t *hw = &i40e->i40e_hw_space; uint32_t reg; VERIFY(i40e->i40e_intr_type == DDI_INTR_TYPE_FIXED || i40e->i40e_intr_type == DDI_INTR_TYPE_MSI); reg = (I40E_INTR_NOTX_QUEUE << I40E_PFINT_LNKLST0_FIRSTQ_INDX_SHIFT) | (I40E_QUEUE_TYPE_RX << I40E_PFINT_LNKLSTN_FIRSTQ_TYPE_SHIFT); I40E_WRITE_REG(hw, I40E_PFINT_LNKLST0, reg); reg = (I40E_INTR_NOTX_INTR << I40E_QINT_RQCTL_MSIX_INDX_SHIFT) | (I40E_ITR_INDEX_RX << I40E_QINT_RQCTL_ITR_INDX_SHIFT) | (I40E_INTR_NOTX_RX_QUEUE << I40E_QINT_RQCTL_MSIX0_INDX_SHIFT) | (I40E_INTR_NOTX_QUEUE << I40E_QINT_RQCTL_NEXTQ_INDX_SHIFT) | (I40E_QUEUE_TYPE_TX << I40E_QINT_RQCTL_NEXTQ_TYPE_SHIFT); I40E_WRITE_REG(hw, I40E_QINT_RQCTL(I40E_INTR_NOTX_QUEUE), reg); reg = (I40E_INTR_NOTX_INTR << I40E_QINT_TQCTL_MSIX_INDX_SHIFT) | (I40E_ITR_INDEX_TX << I40E_QINT_TQCTL_ITR_INDX_SHIFT) | (I40E_INTR_NOTX_TX_QUEUE << I40E_QINT_TQCTL_MSIX0_INDX_SHIFT) | (I40E_QUEUE_TYPE_EOL << I40E_QINT_TQCTL_NEXTQ_INDX_SHIFT) | (I40E_QUEUE_TYPE_RX << I40E_QINT_TQCTL_NEXTQ_TYPE_SHIFT); I40E_WRITE_REG(hw, I40E_QINT_TQCTL(I40E_INTR_NOTX_QUEUE), reg); } /* * Enable the specified queue as a valid source of interrupts. Note, this should * only be used as part of the GLDv3's interrupt blanking routines. The debug * build assertions are specific to that. */ void i40e_intr_rx_queue_enable(i40e_trqpair_t *itrq) { uint32_t reg; uint_t queue = itrq->itrq_index; i40e_hw_t *hw = &itrq->itrq_i40e->i40e_hw_space; ASSERT(MUTEX_HELD(&itrq->itrq_rx_lock)); ASSERT(queue < itrq->itrq_i40e->i40e_num_trqpairs); reg = I40E_READ_REG(hw, I40E_QINT_RQCTL(queue)); ASSERT0(reg & I40E_QINT_RQCTL_CAUSE_ENA_MASK); reg |= I40E_QINT_RQCTL_CAUSE_ENA_MASK; I40E_WRITE_REG(hw, I40E_QINT_RQCTL(queue), reg); } /* * Disable the specified queue as a valid source of interrupts. Note, this * should only be used as part of the GLDv3's interrupt blanking routines. The * debug build assertions are specific to that. */ void i40e_intr_rx_queue_disable(i40e_trqpair_t *itrq) { uint32_t reg; uint_t queue = itrq->itrq_index; i40e_hw_t *hw = &itrq->itrq_i40e->i40e_hw_space; ASSERT(MUTEX_HELD(&itrq->itrq_rx_lock)); ASSERT(queue < itrq->itrq_i40e->i40e_num_trqpairs); reg = I40E_READ_REG(hw, I40E_QINT_RQCTL(queue)); ASSERT3U(reg & I40E_QINT_RQCTL_CAUSE_ENA_MASK, ==, I40E_QINT_RQCTL_CAUSE_ENA_MASK); reg &= ~I40E_QINT_RQCTL_CAUSE_ENA_MASK; I40E_WRITE_REG(hw, I40E_QINT_RQCTL(queue), reg); } /* * Start up the various chip's interrupt handling. We not only configure the * adminq here, but we also go through and configure all of the actual queues, * the interrupt linked lists, and others. */ void i40e_intr_chip_init(i40e_t *i40e) { i40e_hw_t *hw = &i40e->i40e_hw_space; uint32_t reg; /* * Ensure that all non adminq interrupts are disabled at the chip level. */ i40e_intr_io_disable_all(i40e); I40E_WRITE_REG(hw, I40E_PFINT_ICR0_ENA, 0); (void) I40E_READ_REG(hw, I40E_PFINT_ICR0); /* * Always enable all of the other-class interrupts to be on their own * ITR. This only needs to be set on interrupt zero, which has its own * special setting. */ reg = I40E_ITR_INDEX_OTHER << I40E_PFINT_STAT_CTL0_OTHER_ITR_INDX_SHIFT; I40E_WRITE_REG(hw, I40E_PFINT_STAT_CTL0, reg); /* * Enable interrupt types we expect to receive. At the moment, this * is limited to the adminq; however, we'll want to review 11.2.2.9.22 * for more types here as we add support for detecting them, handling * them, and resetting the device as appropriate. */ reg = I40E_PFINT_ICR0_ENA_ADMINQ_MASK; I40E_WRITE_REG(hw, I40E_PFINT_ICR0_ENA, reg); /* * Always set the interrupt linked list to empty. We'll come back and * change this if MSI-X are actually on the scene. */ I40E_WRITE_REG(hw, I40E_PFINT_LNKLST0, I40E_QUEUE_TYPE_EOL); i40e_intr_adminq_enable(i40e); /* * Set up all of the queues and map them to interrupts based on the bit * assignments. */ if (i40e->i40e_intr_type == DDI_INTR_TYPE_MSIX) { i40e_intr_init_queue_msix(i40e); } else { i40e_intr_init_queue_shared(i40e); } /* * Finally set all of the default ITRs for the interrupts. Note that the * queues will have been set up above. */ i40e_intr_set_itr(i40e, I40E_ITR_INDEX_RX, i40e->i40e_rx_itr); i40e_intr_set_itr(i40e, I40E_ITR_INDEX_TX, i40e->i40e_tx_itr); i40e_intr_set_itr(i40e, I40E_ITR_INDEX_OTHER, i40e->i40e_other_itr); } static void i40e_intr_adminq_work(i40e_t *i40e) { struct i40e_hw *hw = &i40e->i40e_hw_space; struct i40e_arq_event_info evt; uint16_t remain = 1; bzero(&evt, sizeof (struct i40e_arq_event_info)); evt.buf_len = I40E_ADMINQ_BUFSZ; evt.msg_buf = i40e->i40e_aqbuf; while (remain != 0) { enum i40e_status_code ret; uint16_t opcode; /* * At the moment, the only error code that seems to be returned * is one saying that there's no work. In such a case we leave * this be. */ ret = i40e_clean_arq_element(hw, &evt, &remain); if (ret != I40E_SUCCESS) break; opcode = LE_16(evt.desc.opcode); switch (opcode) { case i40e_aqc_opc_get_link_status: mutex_enter(&i40e->i40e_general_lock); i40e_link_check(i40e); mutex_exit(&i40e->i40e_general_lock); break; default: /* * Longer term we'll want to enable other causes here * and get these cleaned up and doing something. */ break; } } } static void i40e_intr_rx_work(i40e_t *i40e, i40e_trqpair_t *itrq) { mblk_t *mp = NULL; mutex_enter(&itrq->itrq_rx_lock); if (!itrq->itrq_intr_poll) mp = i40e_ring_rx(itrq, I40E_POLL_NULL); mutex_exit(&itrq->itrq_rx_lock); if (mp == NULL) return; mac_rx_ring(i40e->i40e_mac_hdl, itrq->itrq_macrxring, mp, itrq->itrq_rxgen); } /* ARGSUSED */ static void i40e_intr_tx_work(i40e_t *i40e, i40e_trqpair_t *itrq) { i40e_tx_recycle_ring(itrq); } /* * At the moment, the only 'other' interrupt on ICR0 that we handle is the * adminq. We should go through and support the other notifications at some * point. */ static void i40e_intr_other_work(i40e_t *i40e) { struct i40e_hw *hw = &i40e->i40e_hw_space; uint32_t reg; reg = I40E_READ_REG(hw, I40E_PFINT_ICR0); if (i40e_check_acc_handle(i40e->i40e_osdep_space.ios_reg_handle) != DDI_FM_OK) { ddi_fm_service_impact(i40e->i40e_dip, DDI_SERVICE_DEGRADED); atomic_or_32(&i40e->i40e_state, I40E_ERROR); return; } if (reg & I40E_PFINT_ICR0_ADMINQ_MASK) i40e_intr_adminq_work(i40e); /* * Make sure that the adminq interrupt is not masked and then explicitly * enable the adminq and thus the other interrupt. */ reg = I40E_READ_REG(hw, I40E_PFINT_ICR0_ENA); reg |= I40E_PFINT_ICR0_ENA_ADMINQ_MASK; I40E_WRITE_REG(hw, I40E_PFINT_ICR0_ENA, reg); i40e_intr_adminq_enable(i40e); } /* * The prolog/epilog pair of functions ensure the integrity of the trqpair * across ring stop/start operations. * * A ring stop operation will wait whilst an interrupt is processing a * trqpair, and when a ring is stopped the interrupt handler will skip * the trqpair. */ static boolean_t i40e_intr_trqpair_prolog(i40e_trqpair_t *itrq) { boolean_t enabled; mutex_enter(&itrq->itrq_intr_lock); enabled = !itrq->itrq_intr_quiesce; if (enabled) itrq->itrq_intr_busy = B_TRUE; mutex_exit(&itrq->itrq_intr_lock); return (enabled); } static void i40e_intr_trqpair_epilog(i40e_trqpair_t *itrq) { mutex_enter(&itrq->itrq_intr_lock); itrq->itrq_intr_busy = B_FALSE; if (itrq->itrq_intr_quiesce) cv_signal(&itrq->itrq_intr_cv); mutex_exit(&itrq->itrq_intr_lock); } /* * Tell any active interrupt vectors the ring is quiescing, then * wait until any active interrupt thread has finished with this * trqpair. */ void i40e_intr_quiesce(i40e_trqpair_t *itrq) { mutex_enter(&itrq->itrq_intr_lock); itrq->itrq_intr_quiesce = B_TRUE; while (itrq->itrq_intr_busy) cv_wait(&itrq->itrq_intr_cv, &itrq->itrq_intr_lock); mutex_exit(&itrq->itrq_intr_lock); } /* * Handle an MSI-X interrupt. See section 7.5.1.3 for an overview of * the MSI-X interrupt sequence. */ uint_t i40e_intr_msix(void *arg1, void *arg2) { i40e_t *i40e = (i40e_t *)arg1; uint_t vector_idx = (uint_t)(uintptr_t)arg2; ASSERT3U(vector_idx, <, i40e->i40e_intr_count); /* * When using MSI-X interrupts, vector 0 is always reserved for the * adminq at this time. Though longer term, we'll want to also bridge * some I/O to them. */ if (vector_idx == 0) { i40e_intr_other_work(i40e); return (DDI_INTR_CLAIMED); } ASSERT3U(vector_idx, >, 0); /* * We determine the queue indexes via simple arithmetic (as * opposed to keeping explicit state like a bitmap). While * conveinent, it does mean that i40e_map_intrs_to_vectors(), * i40e_intr_init_queue_msix(), and this function must be * modified as a unit. * * We subtract 1 from the vector to offset the addition we * performed during i40e_map_intrs_to_vectors(). */ for (uint_t i = vector_idx - 1; i < i40e->i40e_num_trqpairs; i += (i40e->i40e_intr_count - 1)) { i40e_trqpair_t *itrq = &i40e->i40e_trqpairs[i]; ASSERT3U(i, <, i40e->i40e_num_trqpairs); ASSERT3P(itrq, !=, NULL); if (!i40e_intr_trqpair_prolog(itrq)) continue; i40e_intr_rx_work(i40e, itrq); i40e_intr_tx_work(i40e, itrq); i40e_intr_trqpair_epilog(itrq); } i40e_intr_io_enable(i40e, vector_idx); return (DDI_INTR_CLAIMED); } static uint_t i40e_intr_notx(i40e_t *i40e, boolean_t shared) { i40e_hw_t *hw = &i40e->i40e_hw_space; uint32_t reg; i40e_trqpair_t *itrq = &i40e->i40e_trqpairs[0]; int ret = DDI_INTR_CLAIMED; if (shared == B_TRUE) { mutex_enter(&i40e->i40e_general_lock); if (i40e->i40e_state & I40E_SUSPENDED) { mutex_exit(&i40e->i40e_general_lock); return (DDI_INTR_UNCLAIMED); } mutex_exit(&i40e->i40e_general_lock); } reg = I40E_READ_REG(hw, I40E_PFINT_ICR0); if (i40e_check_acc_handle(i40e->i40e_osdep_space.ios_reg_handle) != DDI_FM_OK) { ddi_fm_service_impact(i40e->i40e_dip, DDI_SERVICE_DEGRADED); atomic_or_32(&i40e->i40e_state, I40E_ERROR); return (DDI_INTR_CLAIMED); } if (reg == 0) { if (shared == B_TRUE) ret = DDI_INTR_UNCLAIMED; goto done; } if (reg & I40E_PFINT_ICR0_ADMINQ_MASK) i40e_intr_adminq_work(i40e); if (i40e_intr_trqpair_prolog(itrq)) { if (reg & I40E_INTR_NOTX_RX_MASK) i40e_intr_rx_work(i40e, itrq); if (reg & I40E_INTR_NOTX_TX_MASK) i40e_intr_tx_work(i40e, itrq); i40e_intr_trqpair_epilog(itrq); } done: i40e_intr_adminq_enable(i40e); return (ret); } /* ARGSUSED */ uint_t i40e_intr_msi(void *arg1, void *arg2) { i40e_t *i40e = (i40e_t *)arg1; return (i40e_intr_notx(i40e, B_FALSE)); } /* ARGSUSED */ uint_t i40e_intr_legacy(void *arg1, void *arg2) { i40e_t *i40e = (i40e_t *)arg1; return (i40e_intr_notx(i40e, B_TRUE)); }