xref: /illumos-gate/usr/src/uts/common/io/i40e/i40e_main.c (revision df36e06d)
19d26e4fcSRobert Mustacchi /*
29d26e4fcSRobert Mustacchi  * This file and its contents are supplied under the terms of the
39d26e4fcSRobert Mustacchi  * Common Development and Distribution License ("CDDL"), version 1.0.
49d26e4fcSRobert Mustacchi  * You may only use this file in accordance with the terms of version
59d26e4fcSRobert Mustacchi  * 1.0 of the CDDL.
69d26e4fcSRobert Mustacchi  *
79d26e4fcSRobert Mustacchi  * A full copy of the text of the CDDL should have accompanied this
89d26e4fcSRobert Mustacchi  * source.  A copy of the CDDL is also available via the Internet at
99d26e4fcSRobert Mustacchi  * http://www.illumos.org/license/CDDL.
109d26e4fcSRobert Mustacchi  */
119d26e4fcSRobert Mustacchi 
129d26e4fcSRobert Mustacchi /*
139d26e4fcSRobert Mustacchi  * Copyright 2015 OmniTI Computer Consulting, Inc. All rights reserved.
1409aee612SRyan Zezeski  * Copyright 2019 Joyent, Inc.
15396505afSPaul Winder  * Copyright 2017 Tegile Systems, Inc.  All rights reserved.
16234a3cfbSPaul Winder  * Copyright 2020 RackTop Systems, Inc.
1788628b1bSRyan Zezeski  * Copyright 2020 Ryan Zezeski
18*df36e06dSRobert Mustacchi  * Copyright 2021 Oxide Computer Company
199d26e4fcSRobert Mustacchi  */
209d26e4fcSRobert Mustacchi 
219d26e4fcSRobert Mustacchi /*
229d26e4fcSRobert Mustacchi  * i40e - Intel 10/40 Gb Ethernet driver
239d26e4fcSRobert Mustacchi  *
249d26e4fcSRobert Mustacchi  * The i40e driver is the main software device driver for the Intel 40 Gb family
259d26e4fcSRobert Mustacchi  * of devices. Note that these devices come in many flavors with both 40 GbE
269d26e4fcSRobert Mustacchi  * ports and 10 GbE ports. This device is the successor to the 82599 family of
279d26e4fcSRobert Mustacchi  * devices (ixgbe).
289d26e4fcSRobert Mustacchi  *
299d26e4fcSRobert Mustacchi  * Unlike previous generations of Intel 1 GbE and 10 GbE devices, the 40 GbE
309d26e4fcSRobert Mustacchi  * devices defined in the XL710 controller (previously known as Fortville) are a
319d26e4fcSRobert Mustacchi  * rather different beast and have a small switch embedded inside of them. In
329d26e4fcSRobert Mustacchi  * addition, the way that most of the programming is done has been overhauled.
339d26e4fcSRobert Mustacchi  * As opposed to just using PCIe memory mapped registers, it also has an
349d26e4fcSRobert Mustacchi  * administrative queue which is used to communicate with firmware running on
359d26e4fcSRobert Mustacchi  * the chip.
369d26e4fcSRobert Mustacchi  *
379d26e4fcSRobert Mustacchi  * Each physical function in the hardware shows up as a device that this driver
389d26e4fcSRobert Mustacchi  * will bind to. The hardware splits many resources evenly across all of the
399d26e4fcSRobert Mustacchi  * physical functions present on the device, while other resources are instead
409d26e4fcSRobert Mustacchi  * shared across the entire card and its up to the device driver to
419d26e4fcSRobert Mustacchi  * intelligently partition them.
429d26e4fcSRobert Mustacchi  *
439d26e4fcSRobert Mustacchi  * ------------
449d26e4fcSRobert Mustacchi  * Organization
459d26e4fcSRobert Mustacchi  * ------------
469d26e4fcSRobert Mustacchi  *
479d26e4fcSRobert Mustacchi  * This driver is made up of several files which have their own theory
489d26e4fcSRobert Mustacchi  * statements spread across them. We'll touch on the high level purpose of each
499d26e4fcSRobert Mustacchi  * file here, and then we'll get into more discussion on how the device is
509d26e4fcSRobert Mustacchi  * generally modelled with respect to the interfaces in illumos.
519d26e4fcSRobert Mustacchi  *
529d26e4fcSRobert Mustacchi  * i40e_gld.c: This file contains all of the bindings to MAC and the networking
539d26e4fcSRobert Mustacchi  *             stack.
549d26e4fcSRobert Mustacchi  *
559d26e4fcSRobert Mustacchi  * i40e_intr.c: This file contains all of the interrupt service routines and
569d26e4fcSRobert Mustacchi  *              contains logic to enable and disable interrupts on the hardware.
579d26e4fcSRobert Mustacchi  *              It also contains the logic to map hardware resources such as the
589d26e4fcSRobert Mustacchi  *              rings to and from interrupts and controls their ability to fire.
599d26e4fcSRobert Mustacchi  *
609d26e4fcSRobert Mustacchi  *              There is a big theory statement on interrupts present there.
619d26e4fcSRobert Mustacchi  *
629d26e4fcSRobert Mustacchi  * i40e_main.c: The file that you're currently in. It interfaces with the
639d26e4fcSRobert Mustacchi  *              traditional OS DDI interfaces and is in charge of configuring
649d26e4fcSRobert Mustacchi  *              the device.
659d26e4fcSRobert Mustacchi  *
669d26e4fcSRobert Mustacchi  * i40e_osdep.[ch]: These files contain interfaces and definitions needed to
679d26e4fcSRobert Mustacchi  *                  work with Intel's common code for the device.
689d26e4fcSRobert Mustacchi  *
699d26e4fcSRobert Mustacchi  * i40e_stats.c: This file contains the general work and logic around our
709d26e4fcSRobert Mustacchi  *               kstats. A theory statement on their organization and use of the
719d26e4fcSRobert Mustacchi  *               hardware exists there.
729d26e4fcSRobert Mustacchi  *
739d26e4fcSRobert Mustacchi  * i40e_sw.h: This header file contains all of the primary structure definitions
749d26e4fcSRobert Mustacchi  *            and constants that are used across the entire driver.
759d26e4fcSRobert Mustacchi  *
769d26e4fcSRobert Mustacchi  * i40e_transceiver.c: This file contains all of the logic for sending and
779d26e4fcSRobert Mustacchi  *                     receiving data. It contains all of the ring and DMA
789d26e4fcSRobert Mustacchi  *                     allocation logic, as well as, the actual interfaces to
799d26e4fcSRobert Mustacchi  *                     send and receive data.
809d26e4fcSRobert Mustacchi  *
819d26e4fcSRobert Mustacchi  *                     A big theory statement on ring management, descriptors,
829d26e4fcSRobert Mustacchi  *                     and how it ties into the OS is present there.
839d26e4fcSRobert Mustacchi  *
849d26e4fcSRobert Mustacchi  * --------------
859d26e4fcSRobert Mustacchi  * General Design
869d26e4fcSRobert Mustacchi  * --------------
879d26e4fcSRobert Mustacchi  *
889d26e4fcSRobert Mustacchi  * Before we go too far into the general way we've laid out data structures and
899d26e4fcSRobert Mustacchi  * the like, it's worth taking some time to explain how the hardware is
909d26e4fcSRobert Mustacchi  * organized. This organization informs a lot of how we do things at this time
919d26e4fcSRobert Mustacchi  * in the driver.
929d26e4fcSRobert Mustacchi  *
939d26e4fcSRobert Mustacchi  * Each physical device consists of a number of one or more ports, which are
949d26e4fcSRobert Mustacchi  * considered physical functions in the PCI sense and thus each get enumerated
959d26e4fcSRobert Mustacchi  * by the system, resulting in an instance being created and attached to. While
969d26e4fcSRobert Mustacchi  * there are many resources that are unique to each physical function eg.
979d26e4fcSRobert Mustacchi  * instance of the device, there are many that are shared across all of them.
989d26e4fcSRobert Mustacchi  * Several resources have an amount reserved for each Virtual Station Interface
999d26e4fcSRobert Mustacchi  * (VSI) and then a static pool of resources, available for all functions on the
1009d26e4fcSRobert Mustacchi  * card.
1019d26e4fcSRobert Mustacchi  *
1029d26e4fcSRobert Mustacchi  * The most important resource in hardware are its transmit and receive queue
1039d26e4fcSRobert Mustacchi  * pairs (i40e_trqpair_t). These should be thought of as rings in GLDv3
1049d26e4fcSRobert Mustacchi  * parlance. There are a set number of these on each device; however, they are
1059d26e4fcSRobert Mustacchi  * statically partitioned among all of the different physical functions.
1069d26e4fcSRobert Mustacchi  *
1079d26e4fcSRobert Mustacchi  * 'Fortville' (the code name for this device family) is basically a switch. To
1089d26e4fcSRobert Mustacchi  * map MAC addresses and other things to queues, we end up having to create
1099d26e4fcSRobert Mustacchi  * Virtual Station Interfaces (VSIs) and establish forwarding rules that direct
1109d26e4fcSRobert Mustacchi  * traffic to a queue. A VSI owns a collection of queues and has a series of
1119d26e4fcSRobert Mustacchi  * forwarding rules that point to it. One way to think of this is to treat it
1129d26e4fcSRobert Mustacchi  * like MAC does a VNIC. When MAC refers to a group, a collection of rings and
1139d26e4fcSRobert Mustacchi  * classification resources, that is a VSI in i40e.
1149d26e4fcSRobert Mustacchi  *
1159d26e4fcSRobert Mustacchi  * The sets of VSIs is shared across the entire device, though there may be some
1169d26e4fcSRobert Mustacchi  * amount that are reserved to each PF. Because the GLDv3 does not let us change
1179d26e4fcSRobert Mustacchi  * the number of groups dynamically, we instead statically divide this amount
1189d26e4fcSRobert Mustacchi  * evenly between all the functions that exist. In addition, we have the same
1199d26e4fcSRobert Mustacchi  * problem with the mac address forwarding rules. There are a static number that
1209d26e4fcSRobert Mustacchi  * exist shared across all the functions.
1219d26e4fcSRobert Mustacchi  *
1229d26e4fcSRobert Mustacchi  * To handle both of these resources, what we end up doing is going through and
1239d26e4fcSRobert Mustacchi  * determining which functions belong to the same device. Nominally one might do
1249d26e4fcSRobert Mustacchi  * this by having a nexus driver; however, a prime requirement for a nexus
1259d26e4fcSRobert Mustacchi  * driver is identifying the various children and activating them. While it is
1269d26e4fcSRobert Mustacchi  * possible to get this information from NVRAM, we would end up duplicating a
1279d26e4fcSRobert Mustacchi  * lot of the PCI enumeration logic. Really, at the end of the day, the device
1289d26e4fcSRobert Mustacchi  * doesn't give us the traditional identification properties we want from a
1299d26e4fcSRobert Mustacchi  * nexus driver.
1309d26e4fcSRobert Mustacchi  *
1319d26e4fcSRobert Mustacchi  * Instead, we rely on some properties that are guaranteed to be unique. While
1329d26e4fcSRobert Mustacchi  * it might be tempting to leverage the PBA or serial number of the device from
1339d26e4fcSRobert Mustacchi  * NVRAM, there is nothing that says that two devices can't be mis-programmed to
1349d26e4fcSRobert Mustacchi  * have the same values in NVRAM. Instead, we uniquely identify a group of
1359d26e4fcSRobert Mustacchi  * functions based on their parent in the /devices tree, their PCI bus and PCI
1369d26e4fcSRobert Mustacchi  * function identifiers. Using either on their own may not be sufficient.
1379d26e4fcSRobert Mustacchi  *
1389d26e4fcSRobert Mustacchi  * For each unique PCI device that we encounter, we'll create a i40e_device_t.
1399d26e4fcSRobert Mustacchi  * From there, because we don't have a good way to tell the GLDv3 about sharing
1409d26e4fcSRobert Mustacchi  * resources between everything, we'll end up just dividing the resources
1419d26e4fcSRobert Mustacchi  * evenly between all of the functions. Longer term, if we don't have to declare
1429d26e4fcSRobert Mustacchi  * to the GLDv3 that these resources are shared, then we'll maintain a pool and
1437267b93fSMarcel Telka  * have each PF allocate from the pool in the device, thus if only two of four
1449d26e4fcSRobert Mustacchi  * ports are being used, for example, then all of the resources can still be
1459d26e4fcSRobert Mustacchi  * used.
1469d26e4fcSRobert Mustacchi  *
1479d26e4fcSRobert Mustacchi  * -------------------------------------------
1489d26e4fcSRobert Mustacchi  * Transmit and Receive Queue Pair Allocations
1499d26e4fcSRobert Mustacchi  * -------------------------------------------
1509d26e4fcSRobert Mustacchi  *
1519d26e4fcSRobert Mustacchi  * NVRAM ends up assigning each PF its own share of the transmit and receive LAN
1529d26e4fcSRobert Mustacchi  * queue pairs, we have no way of modifying it, only observing it. From there,
1539d26e4fcSRobert Mustacchi  * it's up to us to map these queues to VSIs and VFs. Since we don't support any
1549d26e4fcSRobert Mustacchi  * VFs at this time, we only focus on assignments to VSIs.
1559d26e4fcSRobert Mustacchi  *
1569d26e4fcSRobert Mustacchi  * At the moment, we used a static mapping of transmit/receive queue pairs to a
1579d26e4fcSRobert Mustacchi  * given VSI (eg. rings to a group). Though in the fullness of time, we want to
1589d26e4fcSRobert Mustacchi  * make this something which is fully dynamic and take advantage of documented,
1599d26e4fcSRobert Mustacchi  * but not yet available functionality for adding filters based on VXLAN and
1609d26e4fcSRobert Mustacchi  * other encapsulation technologies.
1619d26e4fcSRobert Mustacchi  *
1629d26e4fcSRobert Mustacchi  * -------------------------------------
1639d26e4fcSRobert Mustacchi  * Broadcast, Multicast, and Promiscuous
1649d26e4fcSRobert Mustacchi  * -------------------------------------
1659d26e4fcSRobert Mustacchi  *
1669d26e4fcSRobert Mustacchi  * As part of the GLDv3, we need to make sure that we can handle receiving
1679d26e4fcSRobert Mustacchi  * broadcast and multicast traffic. As well as enabling promiscuous mode when
1689d26e4fcSRobert Mustacchi  * requested. GLDv3 requires that all broadcast and multicast traffic be
1699d26e4fcSRobert Mustacchi  * retrieved by the default group, eg. the first one. This is the same thing as
1709d26e4fcSRobert Mustacchi  * the default VSI.
1719d26e4fcSRobert Mustacchi  *
1729d26e4fcSRobert Mustacchi  * To receieve broadcast traffic, we enable it through the admin queue, rather
1739d26e4fcSRobert Mustacchi  * than use one of our filters for it. For multicast traffic, we reserve a
1749d26e4fcSRobert Mustacchi  * certain number of the hash filters and assign them to a given PF. When we
1757267b93fSMarcel Telka  * exceed those, we then switch to using promiscuous mode for multicast traffic.
1769d26e4fcSRobert Mustacchi  *
1779d26e4fcSRobert Mustacchi  * More specifically, once we exceed the number of filters (indicated because
1789d26e4fcSRobert Mustacchi  * the i40e_t`i40e_resources.ifr_nmcastfilt ==
1799d26e4fcSRobert Mustacchi  * i40e_t`i40e_resources.ifr_nmcastfilt_used), we then instead need to toggle
1809d26e4fcSRobert Mustacchi  * promiscuous mode. If promiscuous mode is toggled then we keep track of the
1819d26e4fcSRobert Mustacchi  * number of MACs added to it by incrementing i40e_t`i40e_mcast_promisc_count.
1829d26e4fcSRobert Mustacchi  * That will stay enabled until that count reaches zero indicating that we have
1839d26e4fcSRobert Mustacchi  * only added multicast addresses that we have a corresponding entry for.
1849d26e4fcSRobert Mustacchi  *
1859d26e4fcSRobert Mustacchi  * Because MAC itself wants to toggle promiscuous mode, which includes both
1869d26e4fcSRobert Mustacchi  * unicast and multicast traffic, we go through and keep track of that
1879d26e4fcSRobert Mustacchi  * ourselves. That is maintained through the use of the i40e_t`i40e_promisc_on
1889d26e4fcSRobert Mustacchi  * member.
1899d26e4fcSRobert Mustacchi  *
1909d26e4fcSRobert Mustacchi  * --------------
1919d26e4fcSRobert Mustacchi  * VSI Management
1929d26e4fcSRobert Mustacchi  * --------------
1939d26e4fcSRobert Mustacchi  *
19409aee612SRyan Zezeski  * The PFs share 384 VSIs. The firmware creates one VSI per PF by default.
19509aee612SRyan Zezeski  * During chip start we retrieve the SEID of this VSI and assign it as the
19609aee612SRyan Zezeski  * default VSI for our VEB (one VEB per PF). We then add additional VSIs to
19709aee612SRyan Zezeski  * the VEB up to the determined number of rx groups: i40e_t`i40e_num_rx_groups.
19809aee612SRyan Zezeski  * We currently cap this number to I40E_GROUP_MAX to a) make sure all PFs can
19909aee612SRyan Zezeski  * allocate the same number of VSIs, and b) to keep the interrupt multiplexing
20009aee612SRyan Zezeski  * under control. In the future, when we improve the interrupt allocation, we
20109aee612SRyan Zezeski  * may want to revisit this cap to make better use of the available VSIs. The
20209aee612SRyan Zezeski  * VSI allocation and configuration can be found in i40e_chip_start().
2039d26e4fcSRobert Mustacchi  *
2049d26e4fcSRobert Mustacchi  * ----------------
2059d26e4fcSRobert Mustacchi  * Structure Layout
2069d26e4fcSRobert Mustacchi  * ----------------
2079d26e4fcSRobert Mustacchi  *
2089d26e4fcSRobert Mustacchi  * The following images relates the core data structures together. The primary
2099d26e4fcSRobert Mustacchi  * structure in the system is the i40e_t. It itself contains multiple rings,
2109d26e4fcSRobert Mustacchi  * i40e_trqpair_t's which contain the various transmit and receive data. The
2119d26e4fcSRobert Mustacchi  * receive data is stored outside of the i40e_trqpair_t and instead in the
2129d26e4fcSRobert Mustacchi  * i40e_rx_data_t. The i40e_t has a corresponding i40e_device_t which keeps
2139d26e4fcSRobert Mustacchi  * track of per-physical device state. Finally, for every active descriptor,
2149d26e4fcSRobert Mustacchi  * there is a corresponding control block, which is where the
2159d26e4fcSRobert Mustacchi  * i40e_rx_control_block_t and the i40e_tx_control_block_t come from.
2169d26e4fcSRobert Mustacchi  *
2179d26e4fcSRobert Mustacchi  *   +-----------------------+       +-----------------------+
2189d26e4fcSRobert Mustacchi  *   | Global i40e_t list    |       | Global Device list    |
2199d26e4fcSRobert Mustacchi  *   |                       |    +--|                       |
2209d26e4fcSRobert Mustacchi  *   | i40e_glist            |    |  | i40e_dlist            |
2219d26e4fcSRobert Mustacchi  *   +-----------------------+    |  +-----------------------+
2229d26e4fcSRobert Mustacchi  *       |                        v
2239d26e4fcSRobert Mustacchi  *       |      +------------------------+      +-----------------------+
2249d26e4fcSRobert Mustacchi  *       |      | Device-wide Structure  |----->| Device-wide Structure |--> ...
2259d26e4fcSRobert Mustacchi  *       |      | i40e_device_t          |      | i40e_device_t         |
2269d26e4fcSRobert Mustacchi  *       |      |                        |      +-----------------------+
2279d26e4fcSRobert Mustacchi  *       |      | dev_info_t *     ------+--> Parent in devices tree.
2289d26e4fcSRobert Mustacchi  *       |      | uint_t           ------+--> PCI bus number
2299d26e4fcSRobert Mustacchi  *       |      | uint_t           ------+--> PCI device number
2309d26e4fcSRobert Mustacchi  *       |      | uint_t           ------+--> Number of functions
2319d26e4fcSRobert Mustacchi  *       |      | i40e_switch_rsrcs_t ---+--> Captured total switch resources
2329d26e4fcSRobert Mustacchi  *       |      | list_t           ------+-------------+
2339d26e4fcSRobert Mustacchi  *       |      +------------------------+             |
2349d26e4fcSRobert Mustacchi  *       |                           ^                 |
2359d26e4fcSRobert Mustacchi  *       |                           +--------+        |
2369d26e4fcSRobert Mustacchi  *       |                                    |        v
2379d26e4fcSRobert Mustacchi  *       |  +---------------------------+     |   +-------------------+
2389d26e4fcSRobert Mustacchi  *       +->| GLDv3 Device, per PF      |-----|-->| GLDv3 Device (PF) |--> ...
2399d26e4fcSRobert Mustacchi  *          | i40e_t                    |     |   | i40e_t            |
2409d26e4fcSRobert Mustacchi  *          | **Primary Structure**     |     |   +-------------------+
2419d26e4fcSRobert Mustacchi  *          |                           |     |
2429d26e4fcSRobert Mustacchi  *          | i40e_device_t *         --+-----+
2439d26e4fcSRobert Mustacchi  *          | i40e_state_t            --+---> Device State
2449d26e4fcSRobert Mustacchi  *          | i40e_hw_t               --+---> Intel common code structure
2459d26e4fcSRobert Mustacchi  *          | mac_handle_t            --+---> GLDv3 handle to MAC
2469d26e4fcSRobert Mustacchi  *          | ddi_periodic_t          --+---> Link activity timer
24709aee612SRyan Zezeski  *          | i40e_vsi_t *            --+---> Array of VSIs
2489d26e4fcSRobert Mustacchi  *          | i40e_func_rsrc_t        --+---> Available hardware resources
2499d26e4fcSRobert Mustacchi  *          | i40e_switch_rsrc_t *    --+---> Switch resource snapshot
2509d26e4fcSRobert Mustacchi  *          | i40e_sdu                --+---> Current MTU
2519d26e4fcSRobert Mustacchi  *          | i40e_frame_max          --+---> Current HW frame size
2529d26e4fcSRobert Mustacchi  *          | i40e_uaddr_t *          --+---> Array of assigned unicast MACs
2539d26e4fcSRobert Mustacchi  *          | i40e_maddr_t *          --+---> Array of assigned multicast MACs
2549d26e4fcSRobert Mustacchi  *          | i40e_mcast_promisccount --+---> Active multicast state
2559d26e4fcSRobert Mustacchi  *          | i40e_promisc_on         --+---> Current promiscuous mode state
25609aee612SRyan Zezeski  *          | uint_t                  --+---> Number of transmit/receive pairs
25709aee612SRyan Zezeski  *          | i40e_rx_group_t *       --+---> Array of Rx groups
2589d26e4fcSRobert Mustacchi  *          | kstat_t *               --+---> PF kstats
2599d26e4fcSRobert Mustacchi  *          | i40e_pf_stats_t         --+---> PF kstat backing data
2609d26e4fcSRobert Mustacchi  *          | i40e_trqpair_t *        --+---------+
2619d26e4fcSRobert Mustacchi  *          +---------------------------+         |
2629d26e4fcSRobert Mustacchi  *                                                |
2639d26e4fcSRobert Mustacchi  *                                                v
2649d26e4fcSRobert Mustacchi  *  +-------------------------------+       +-----------------------------+
2659d26e4fcSRobert Mustacchi  *  | Transmit/Receive Queue Pair   |-------| Transmit/Receive Queue Pair |->...
2669d26e4fcSRobert Mustacchi  *  | i40e_trqpair_t                |       | i40e_trqpair_t              |
2679d26e4fcSRobert Mustacchi  *  + Ring Data Structure           |       +-----------------------------+
2689d26e4fcSRobert Mustacchi  *  |                               |
2699d26e4fcSRobert Mustacchi  *  | mac_ring_handle_t             +--> MAC RX ring handle
2709d26e4fcSRobert Mustacchi  *  | mac_ring_handle_t             +--> MAC TX ring handle
2719d26e4fcSRobert Mustacchi  *  | i40e_rxq_stat_t             --+--> RX Queue stats
2729d26e4fcSRobert Mustacchi  *  | i40e_txq_stat_t             --+--> TX Queue stats
2739d26e4fcSRobert Mustacchi  *  | uint32_t (tx ring size)       +--> TX Ring Size
2749d26e4fcSRobert Mustacchi  *  | uint32_t (tx free list size)  +--> TX Free List Size
2759d26e4fcSRobert Mustacchi  *  | i40e_dma_buffer_t     --------+--> TX Descriptor ring DMA
2769d26e4fcSRobert Mustacchi  *  | i40e_tx_desc_t *      --------+--> TX descriptor ring
2779d26e4fcSRobert Mustacchi  *  | volatile unt32_t *            +--> TX Write back head
2789d26e4fcSRobert Mustacchi  *  | uint32_t               -------+--> TX ring head
2799d26e4fcSRobert Mustacchi  *  | uint32_t               -------+--> TX ring tail
2809d26e4fcSRobert Mustacchi  *  | uint32_t               -------+--> Num TX desc free
2819d26e4fcSRobert Mustacchi  *  | i40e_tx_control_block_t *   --+--> TX control block array  ---+
2829d26e4fcSRobert Mustacchi  *  | i40e_tx_control_block_t **  --+--> TCB work list          ----+
2839d26e4fcSRobert Mustacchi  *  | i40e_tx_control_block_t **  --+--> TCB free list           ---+
2849d26e4fcSRobert Mustacchi  *  | uint32_t               -------+--> Free TCB count             |
2859d26e4fcSRobert Mustacchi  *  | i40e_rx_data_t *       -------+--+                            v
2869d26e4fcSRobert Mustacchi  *  +-------------------------------+  |          +---------------------------+
2879d26e4fcSRobert Mustacchi  *                                     |          | Per-TX Frame Metadata     |
2889d26e4fcSRobert Mustacchi  *                                     |          | i40e_tx_control_block_t   |
2899d26e4fcSRobert Mustacchi  *                +--------------------+          |                           |
2909d26e4fcSRobert Mustacchi  *                |           mblk to transmit <--+---      mblk_t *          |
2919d26e4fcSRobert Mustacchi  *                |           type of transmit <--+---      i40e_tx_type_t    |
2929d26e4fcSRobert Mustacchi  *                |              TX DMA handle <--+---      ddi_dma_handle_t  |
2939d26e4fcSRobert Mustacchi  *                v              TX DMA buffer <--+---      i40e_dma_buffer_t |
2949d26e4fcSRobert Mustacchi  *    +------------------------------+            +---------------------------+
2959d26e4fcSRobert Mustacchi  *    | Core Receive Data            |
2969d26e4fcSRobert Mustacchi  *    | i40e_rx_data_t               |
2979d26e4fcSRobert Mustacchi  *    |                              |
2989d26e4fcSRobert Mustacchi  *    | i40e_dma_buffer_t          --+--> RX descriptor DMA Data
2999d26e4fcSRobert Mustacchi  *    | i40e_rx_desc_t             --+--> RX descriptor ring
3009d26e4fcSRobert Mustacchi  *    | uint32_t                   --+--> Next free desc.
3019d26e4fcSRobert Mustacchi  *    | i40e_rx_control_block_t *  --+--> RX Control Block Array  ---+
3029d26e4fcSRobert Mustacchi  *    | i40e_rx_control_block_t ** --+--> RCB work list           ---+
3039d26e4fcSRobert Mustacchi  *    | i40e_rx_control_block_t ** --+--> RCB free list           ---+
3049d26e4fcSRobert Mustacchi  *    +------------------------------+                               |
3059d26e4fcSRobert Mustacchi  *                ^                                                  |
3069d26e4fcSRobert Mustacchi  *                |     +---------------------------+                |
3079d26e4fcSRobert Mustacchi  *                |     | Per-RX Frame Metadata     |<---------------+
3089d26e4fcSRobert Mustacchi  *                |     | i40e_rx_control_block_t   |
3099d26e4fcSRobert Mustacchi  *                |     |                           |
3109d26e4fcSRobert Mustacchi  *                |     | mblk_t *              ----+--> Received mblk_t data
3119d26e4fcSRobert Mustacchi  *                |     | uint32_t              ----+--> Reference count
3129d26e4fcSRobert Mustacchi  *                |     | i40e_dma_buffer_t     ----+--> Receive data DMA info
3139d26e4fcSRobert Mustacchi  *                |     | frtn_t                ----+--> mblk free function info
3149d26e4fcSRobert Mustacchi  *                +-----+-- i40e_rx_data_t *        |
3159d26e4fcSRobert Mustacchi  *                      +---------------------------+
3169d26e4fcSRobert Mustacchi  *
3179d26e4fcSRobert Mustacchi  * -------------
3189d26e4fcSRobert Mustacchi  * Lock Ordering
3199d26e4fcSRobert Mustacchi  * -------------
3209d26e4fcSRobert Mustacchi  *
3219d26e4fcSRobert Mustacchi  * In order to ensure that we don't deadlock, the following represents the
3229d26e4fcSRobert Mustacchi  * lock order being used. When grabbing locks, follow the following order. Lower
3239d26e4fcSRobert Mustacchi  * numbers are more important. Thus, the i40e_glock which is number 0, must be
3249d26e4fcSRobert Mustacchi  * taken before any other locks in the driver. On the other hand, the
3259d26e4fcSRobert Mustacchi  * i40e_t`i40e_stat_lock, has the highest number because it's the least
3269d26e4fcSRobert Mustacchi  * important lock. Note, that just because one lock is higher than another does
3279d26e4fcSRobert Mustacchi  * not mean that all intermediary locks are required.
3289d26e4fcSRobert Mustacchi  *
3299d26e4fcSRobert Mustacchi  * 0) i40e_glock
3309d26e4fcSRobert Mustacchi  * 1) i40e_t`i40e_general_lock
3319d26e4fcSRobert Mustacchi  *
3329d26e4fcSRobert Mustacchi  * 2) i40e_trqpair_t`itrq_rx_lock
3339d26e4fcSRobert Mustacchi  * 3) i40e_trqpair_t`itrq_tx_lock
334aa2a44afSPaul Winder  * 4) i40e_trqpair_t`itrq_intr_lock
335aa2a44afSPaul Winder  * 5) i40e_t`i40e_rx_pending_lock
336aa2a44afSPaul Winder  * 6) i40e_trqpair_t`itrq_tcb_lock
3379d26e4fcSRobert Mustacchi  *
338aa2a44afSPaul Winder  * 7) i40e_t`i40e_stat_lock
3399d26e4fcSRobert Mustacchi  *
3409d26e4fcSRobert Mustacchi  * Rules and expectations:
3419d26e4fcSRobert Mustacchi  *
3429d26e4fcSRobert Mustacchi  * 1) A thread holding locks belong to one PF should not hold locks belonging to
3439d26e4fcSRobert Mustacchi  * a second. If for some reason this becomes necessary, locks should be grabbed
3449d26e4fcSRobert Mustacchi  * based on the list order in the i40e_device_t, which implies that the
3459d26e4fcSRobert Mustacchi  * i40e_glock is held.
3469d26e4fcSRobert Mustacchi  *
3479d26e4fcSRobert Mustacchi  * 2) When grabbing locks between multiple transmit and receive queues, the
3489d26e4fcSRobert Mustacchi  * locks for the lowest number transmit/receive queue should be grabbed first.
3499d26e4fcSRobert Mustacchi  *
3509d26e4fcSRobert Mustacchi  * 3) When grabbing both the transmit and receive lock for a given queue, always
3519d26e4fcSRobert Mustacchi  * grab i40e_trqpair_t`itrq_rx_lock before the i40e_trqpair_t`itrq_tx_lock.
3529d26e4fcSRobert Mustacchi  *
3539d26e4fcSRobert Mustacchi  * 4) The following pairs of locks are not expected to be held at the same time:
3549d26e4fcSRobert Mustacchi  *
3559d26e4fcSRobert Mustacchi  * o i40e_t`i40e_rx_pending_lock and i40e_trqpair_t`itrq_tcb_lock
356aa2a44afSPaul Winder  * o i40e_trqpair_t`itrq_intr_lock is not expected to be held with any
357aa2a44afSPaul Winder  *   other lock except i40e_t`i40e_general_lock in mc_start(9E) and
358aa2a44afSPaul Winder  *   mc_stop(9e).
3599d26e4fcSRobert Mustacchi  *
3609d26e4fcSRobert Mustacchi  * -----------
3619d26e4fcSRobert Mustacchi  * Future Work
3629d26e4fcSRobert Mustacchi  * -----------
3639d26e4fcSRobert Mustacchi  *
3649d26e4fcSRobert Mustacchi  * At the moment the i40e_t driver is rather bare bones, allowing us to start
3659d26e4fcSRobert Mustacchi  * getting data flowing and folks using it while we develop additional features.
3669d26e4fcSRobert Mustacchi  * While bugs have been filed to cover this future work, the following gives an
3679d26e4fcSRobert Mustacchi  * overview of expected work:
3689d26e4fcSRobert Mustacchi  *
3699d26e4fcSRobert Mustacchi  *  o DMA binding and breaking up the locking in ring recycling.
3709d26e4fcSRobert Mustacchi  *  o Enhanced detection of device errors
3719d26e4fcSRobert Mustacchi  *  o Participation in IRM
3729d26e4fcSRobert Mustacchi  *  o FMA device reset
3739d26e4fcSRobert Mustacchi  *  o Stall detection, temperature error detection, etc.
3749d26e4fcSRobert Mustacchi  *  o More dynamic resource pools
3759d26e4fcSRobert Mustacchi  */
3769d26e4fcSRobert Mustacchi 
3779d26e4fcSRobert Mustacchi #include "i40e_sw.h"
3789d26e4fcSRobert Mustacchi 
37909aee612SRyan Zezeski static char i40e_ident[] = "Intel 10/40Gb Ethernet v1.0.3";
3809d26e4fcSRobert Mustacchi 
3819d26e4fcSRobert Mustacchi /*
3829d26e4fcSRobert Mustacchi  * The i40e_glock primarily protects the lists below and the i40e_device_t
3839d26e4fcSRobert Mustacchi  * structures.
3849d26e4fcSRobert Mustacchi  */
3859d26e4fcSRobert Mustacchi static kmutex_t i40e_glock;
3869d26e4fcSRobert Mustacchi static list_t i40e_glist;
3879d26e4fcSRobert Mustacchi static list_t i40e_dlist;
3889d26e4fcSRobert Mustacchi 
3899d26e4fcSRobert Mustacchi /*
3909d26e4fcSRobert Mustacchi  * Access attributes for register mapping.
3919d26e4fcSRobert Mustacchi  */
3929d26e4fcSRobert Mustacchi static ddi_device_acc_attr_t i40e_regs_acc_attr = {
3939d26e4fcSRobert Mustacchi 	DDI_DEVICE_ATTR_V1,
3949d26e4fcSRobert Mustacchi 	DDI_STRUCTURE_LE_ACC,
3959d26e4fcSRobert Mustacchi 	DDI_STRICTORDER_ACC,
3969d26e4fcSRobert Mustacchi 	DDI_FLAGERR_ACC
3979d26e4fcSRobert Mustacchi };
3989d26e4fcSRobert Mustacchi 
3999d26e4fcSRobert Mustacchi /*
4009d26e4fcSRobert Mustacchi  * Logging function for this driver.
4019d26e4fcSRobert Mustacchi  */
4029d26e4fcSRobert Mustacchi static void
i40e_dev_err(i40e_t * i40e,int level,boolean_t console,const char * fmt,va_list ap)4039d26e4fcSRobert Mustacchi i40e_dev_err(i40e_t *i40e, int level, boolean_t console, const char *fmt,
4049d26e4fcSRobert Mustacchi     va_list ap)
4059d26e4fcSRobert Mustacchi {
4069d26e4fcSRobert Mustacchi 	char buf[1024];
4079d26e4fcSRobert Mustacchi 
4089d26e4fcSRobert Mustacchi 	(void) vsnprintf(buf, sizeof (buf), fmt, ap);
4099d26e4fcSRobert Mustacchi 
4109d26e4fcSRobert Mustacchi 	if (i40e == NULL) {
4119d26e4fcSRobert Mustacchi 		cmn_err(level, (console) ? "%s: %s" : "!%s: %s",
4129d26e4fcSRobert Mustacchi 		    I40E_MODULE_NAME, buf);
4139d26e4fcSRobert Mustacchi 	} else {
4149d26e4fcSRobert Mustacchi 		dev_err(i40e->i40e_dip, level, (console) ? "%s" : "!%s",
4159d26e4fcSRobert Mustacchi 		    buf);
4169d26e4fcSRobert Mustacchi 	}
4179d26e4fcSRobert Mustacchi }
4189d26e4fcSRobert Mustacchi 
4199d26e4fcSRobert Mustacchi /*
4209d26e4fcSRobert Mustacchi  * Because there's the stupid trailing-comma problem with the C preprocessor
4219d26e4fcSRobert Mustacchi  * and variable arguments, I need to instantiate these.	 Pardon the redundant
4229d26e4fcSRobert Mustacchi  * code.
4239d26e4fcSRobert Mustacchi  */
4249d26e4fcSRobert Mustacchi /*PRINTFLIKE2*/
4259d26e4fcSRobert Mustacchi void
i40e_error(i40e_t * i40e,const char * fmt,...)4269d26e4fcSRobert Mustacchi i40e_error(i40e_t *i40e, const char *fmt, ...)
4279d26e4fcSRobert Mustacchi {
4289d26e4fcSRobert Mustacchi 	va_list ap;
4299d26e4fcSRobert Mustacchi 
4309d26e4fcSRobert Mustacchi 	va_start(ap, fmt);
4319d26e4fcSRobert Mustacchi 	i40e_dev_err(i40e, CE_WARN, B_FALSE, fmt, ap);
4329d26e4fcSRobert Mustacchi 	va_end(ap);
4339d26e4fcSRobert Mustacchi }
4349d26e4fcSRobert Mustacchi 
4359d26e4fcSRobert Mustacchi /*PRINTFLIKE2*/
4369d26e4fcSRobert Mustacchi void
i40e_log(i40e_t * i40e,const char * fmt,...)4379d26e4fcSRobert Mustacchi i40e_log(i40e_t *i40e, const char *fmt, ...)
4389d26e4fcSRobert Mustacchi {
4399d26e4fcSRobert Mustacchi 	va_list ap;
4409d26e4fcSRobert Mustacchi 
4419d26e4fcSRobert Mustacchi 	va_start(ap, fmt);
4429d26e4fcSRobert Mustacchi 	i40e_dev_err(i40e, CE_NOTE, B_FALSE, fmt, ap);
4439d26e4fcSRobert Mustacchi 	va_end(ap);
4449d26e4fcSRobert Mustacchi }
4459d26e4fcSRobert Mustacchi 
4469d26e4fcSRobert Mustacchi /*PRINTFLIKE2*/
4479d26e4fcSRobert Mustacchi void
i40e_notice(i40e_t * i40e,const char * fmt,...)4489d26e4fcSRobert Mustacchi i40e_notice(i40e_t *i40e, const char *fmt, ...)
4499d26e4fcSRobert Mustacchi {
4509d26e4fcSRobert Mustacchi 	va_list ap;
4519d26e4fcSRobert Mustacchi 
4529d26e4fcSRobert Mustacchi 	va_start(ap, fmt);
4539d26e4fcSRobert Mustacchi 	i40e_dev_err(i40e, CE_NOTE, B_TRUE, fmt, ap);
4549d26e4fcSRobert Mustacchi 	va_end(ap);
4559d26e4fcSRobert Mustacchi }
4569d26e4fcSRobert Mustacchi 
457b9d34b9dSRobert Mustacchi /*
458b9d34b9dSRobert Mustacchi  * Various parts of the driver need to know if the controller is from the X722
459b9d34b9dSRobert Mustacchi  * family, which has a few additional capabilities and different programming
460b9d34b9dSRobert Mustacchi  * means. We don't consider virtual functions as part of this as they are quite
461b9d34b9dSRobert Mustacchi  * different and will require substantially more work.
462b9d34b9dSRobert Mustacchi  */
463b9d34b9dSRobert Mustacchi static boolean_t
i40e_is_x722(i40e_t * i40e)464b9d34b9dSRobert Mustacchi i40e_is_x722(i40e_t *i40e)
465b9d34b9dSRobert Mustacchi {
466b9d34b9dSRobert Mustacchi 	return (i40e->i40e_hw_space.mac.type == I40E_MAC_X722);
467b9d34b9dSRobert Mustacchi }
468b9d34b9dSRobert Mustacchi 
4699d26e4fcSRobert Mustacchi static void
i40e_device_rele(i40e_t * i40e)4709d26e4fcSRobert Mustacchi i40e_device_rele(i40e_t *i40e)
4719d26e4fcSRobert Mustacchi {
4729d26e4fcSRobert Mustacchi 	i40e_device_t *idp = i40e->i40e_device;
4739d26e4fcSRobert Mustacchi 
4749d26e4fcSRobert Mustacchi 	if (idp == NULL)
4759d26e4fcSRobert Mustacchi 		return;
4769d26e4fcSRobert Mustacchi 
4779d26e4fcSRobert Mustacchi 	mutex_enter(&i40e_glock);
4789d26e4fcSRobert Mustacchi 	VERIFY(idp->id_nreg > 0);
4799d26e4fcSRobert Mustacchi 	list_remove(&idp->id_i40e_list, i40e);
4809d26e4fcSRobert Mustacchi 	idp->id_nreg--;
4819d26e4fcSRobert Mustacchi 	if (idp->id_nreg == 0) {
4829d26e4fcSRobert Mustacchi 		list_remove(&i40e_dlist, idp);
4839d26e4fcSRobert Mustacchi 		list_destroy(&idp->id_i40e_list);
4849d26e4fcSRobert Mustacchi 		kmem_free(idp->id_rsrcs, sizeof (i40e_switch_rsrc_t) *
4859d26e4fcSRobert Mustacchi 		    idp->id_rsrcs_alloc);
4869d26e4fcSRobert Mustacchi 		kmem_free(idp, sizeof (i40e_device_t));
4879d26e4fcSRobert Mustacchi 	}
4889d26e4fcSRobert Mustacchi 	i40e->i40e_device = NULL;
4899d26e4fcSRobert Mustacchi 	mutex_exit(&i40e_glock);
4909d26e4fcSRobert Mustacchi }
4919d26e4fcSRobert Mustacchi 
4929d26e4fcSRobert Mustacchi static i40e_device_t *
i40e_device_find(i40e_t * i40e,dev_info_t * parent,uint_t bus,uint_t device)4939d26e4fcSRobert Mustacchi i40e_device_find(i40e_t *i40e, dev_info_t *parent, uint_t bus, uint_t device)
4949d26e4fcSRobert Mustacchi {
4959d26e4fcSRobert Mustacchi 	i40e_device_t *idp;
4969d26e4fcSRobert Mustacchi 	mutex_enter(&i40e_glock);
4979d26e4fcSRobert Mustacchi 	for (idp = list_head(&i40e_dlist); idp != NULL;
4989d26e4fcSRobert Mustacchi 	    idp = list_next(&i40e_dlist, idp)) {
4999d26e4fcSRobert Mustacchi 		if (idp->id_parent == parent && idp->id_pci_bus == bus &&
5009d26e4fcSRobert Mustacchi 		    idp->id_pci_device == device) {
5019d26e4fcSRobert Mustacchi 			break;
5029d26e4fcSRobert Mustacchi 		}
5039d26e4fcSRobert Mustacchi 	}
5049d26e4fcSRobert Mustacchi 
5059d26e4fcSRobert Mustacchi 	if (idp != NULL) {
5069d26e4fcSRobert Mustacchi 		VERIFY(idp->id_nreg < idp->id_nfuncs);
5079d26e4fcSRobert Mustacchi 		idp->id_nreg++;
5089d26e4fcSRobert Mustacchi 	} else {
5099d26e4fcSRobert Mustacchi 		i40e_hw_t *hw = &i40e->i40e_hw_space;
5109d26e4fcSRobert Mustacchi 		ASSERT(hw->num_ports > 0);
5119d26e4fcSRobert Mustacchi 		ASSERT(hw->num_partitions > 0);
5129d26e4fcSRobert Mustacchi 
5139d26e4fcSRobert Mustacchi 		/*
5149d26e4fcSRobert Mustacchi 		 * The Intel common code doesn't exactly keep the number of PCI
5159d26e4fcSRobert Mustacchi 		 * functions. But it calculates it during discovery of
5169d26e4fcSRobert Mustacchi 		 * partitions and ports. So what we do is undo the calculation
5179d26e4fcSRobert Mustacchi 		 * that it does originally, as functions are evenly spread
5189d26e4fcSRobert Mustacchi 		 * across ports in the rare case of partitions.
5199d26e4fcSRobert Mustacchi 		 */
5209d26e4fcSRobert Mustacchi 		idp = kmem_alloc(sizeof (i40e_device_t), KM_SLEEP);
5219d26e4fcSRobert Mustacchi 		idp->id_parent = parent;
5229d26e4fcSRobert Mustacchi 		idp->id_pci_bus = bus;
5239d26e4fcSRobert Mustacchi 		idp->id_pci_device = device;
5249d26e4fcSRobert Mustacchi 		idp->id_nfuncs = hw->num_ports * hw->num_partitions;
5259d26e4fcSRobert Mustacchi 		idp->id_nreg = 1;
5269d26e4fcSRobert Mustacchi 		idp->id_rsrcs_alloc = i40e->i40e_switch_rsrc_alloc;
5279d26e4fcSRobert Mustacchi 		idp->id_rsrcs_act = i40e->i40e_switch_rsrc_actual;
5289d26e4fcSRobert Mustacchi 		idp->id_rsrcs = kmem_alloc(sizeof (i40e_switch_rsrc_t) *
5299d26e4fcSRobert Mustacchi 		    idp->id_rsrcs_alloc, KM_SLEEP);
5309d26e4fcSRobert Mustacchi 		bcopy(i40e->i40e_switch_rsrcs, idp->id_rsrcs,
5319d26e4fcSRobert Mustacchi 		    sizeof (i40e_switch_rsrc_t) * idp->id_rsrcs_alloc);
5329d26e4fcSRobert Mustacchi 		list_create(&idp->id_i40e_list, sizeof (i40e_t),
5339d26e4fcSRobert Mustacchi 		    offsetof(i40e_t, i40e_dlink));
5349d26e4fcSRobert Mustacchi 
5359d26e4fcSRobert Mustacchi 		list_insert_tail(&i40e_dlist, idp);
5369d26e4fcSRobert Mustacchi 	}
5379d26e4fcSRobert Mustacchi 
5389d26e4fcSRobert Mustacchi 	list_insert_tail(&idp->id_i40e_list, i40e);
5399d26e4fcSRobert Mustacchi 	mutex_exit(&i40e_glock);
5409d26e4fcSRobert Mustacchi 
5419d26e4fcSRobert Mustacchi 	return (idp);
5429d26e4fcSRobert Mustacchi }
5439d26e4fcSRobert Mustacchi 
5449d26e4fcSRobert Mustacchi static void
i40e_link_state_set(i40e_t * i40e,link_state_t state)5459d26e4fcSRobert Mustacchi i40e_link_state_set(i40e_t *i40e, link_state_t state)
5469d26e4fcSRobert Mustacchi {
5479d26e4fcSRobert Mustacchi 	if (i40e->i40e_link_state == state)
5489d26e4fcSRobert Mustacchi 		return;
5499d26e4fcSRobert Mustacchi 
5509d26e4fcSRobert Mustacchi 	i40e->i40e_link_state = state;
5519d26e4fcSRobert Mustacchi 	mac_link_update(i40e->i40e_mac_hdl, i40e->i40e_link_state);
5529d26e4fcSRobert Mustacchi }
5539d26e4fcSRobert Mustacchi 
5549d26e4fcSRobert Mustacchi /*
5559d26e4fcSRobert Mustacchi  * This is a basic link check routine. Mostly we're using this just to see
5569d26e4fcSRobert Mustacchi  * if we can get any accurate information about the state of the link being
5579d26e4fcSRobert Mustacchi  * up or down, as well as updating the link state, speed, etc. information.
5589d26e4fcSRobert Mustacchi  */
5599d26e4fcSRobert Mustacchi void
i40e_link_check(i40e_t * i40e)5609d26e4fcSRobert Mustacchi i40e_link_check(i40e_t *i40e)
5619d26e4fcSRobert Mustacchi {
5629d26e4fcSRobert Mustacchi 	i40e_hw_t *hw = &i40e->i40e_hw_space;
5639d26e4fcSRobert Mustacchi 	boolean_t ls;
5649d26e4fcSRobert Mustacchi 	int ret;
5659d26e4fcSRobert Mustacchi 
5669d26e4fcSRobert Mustacchi 	ASSERT(MUTEX_HELD(&i40e->i40e_general_lock));
5679d26e4fcSRobert Mustacchi 
5689d26e4fcSRobert Mustacchi 	hw->phy.get_link_info = B_TRUE;
5699d26e4fcSRobert Mustacchi 	if ((ret = i40e_get_link_status(hw, &ls)) != I40E_SUCCESS) {
5709d26e4fcSRobert Mustacchi 		i40e->i40e_s_link_status_errs++;
5719d26e4fcSRobert Mustacchi 		i40e->i40e_s_link_status_lasterr = ret;
5729d26e4fcSRobert Mustacchi 		return;
5739d26e4fcSRobert Mustacchi 	}
5749d26e4fcSRobert Mustacchi 
5759d26e4fcSRobert Mustacchi 	/*
5769d26e4fcSRobert Mustacchi 	 * Firmware abstracts all of the mac and phy information for us, so we
5779d26e4fcSRobert Mustacchi 	 * can use i40e_get_link_status to determine the current state.
5789d26e4fcSRobert Mustacchi 	 */
5799d26e4fcSRobert Mustacchi 	if (ls == B_TRUE) {
5809d26e4fcSRobert Mustacchi 		enum i40e_aq_link_speed speed;
5819d26e4fcSRobert Mustacchi 
5829d26e4fcSRobert Mustacchi 		speed = i40e_get_link_speed(hw);
5839d26e4fcSRobert Mustacchi 
5849d26e4fcSRobert Mustacchi 		/*
5859d26e4fcSRobert Mustacchi 		 * Translate from an i40e value to a value in Mbits/s.
5869d26e4fcSRobert Mustacchi 		 */
5879d26e4fcSRobert Mustacchi 		switch (speed) {
5889d26e4fcSRobert Mustacchi 		case I40E_LINK_SPEED_100MB:
5899d26e4fcSRobert Mustacchi 			i40e->i40e_link_speed = 100;
5909d26e4fcSRobert Mustacchi 			break;
5919d26e4fcSRobert Mustacchi 		case I40E_LINK_SPEED_1GB:
5929d26e4fcSRobert Mustacchi 			i40e->i40e_link_speed = 1000;
5939d26e4fcSRobert Mustacchi 			break;
594*df36e06dSRobert Mustacchi 		case I40E_LINK_SPEED_2_5GB:
595*df36e06dSRobert Mustacchi 			i40e->i40e_link_speed = 2500;
596*df36e06dSRobert Mustacchi 			break;
597*df36e06dSRobert Mustacchi 		case I40E_LINK_SPEED_5GB:
598*df36e06dSRobert Mustacchi 			i40e->i40e_link_speed = 5000;
599*df36e06dSRobert Mustacchi 			break;
6009d26e4fcSRobert Mustacchi 		case I40E_LINK_SPEED_10GB:
6019d26e4fcSRobert Mustacchi 			i40e->i40e_link_speed = 10000;
6029d26e4fcSRobert Mustacchi 			break;
6039d26e4fcSRobert Mustacchi 		case I40E_LINK_SPEED_20GB:
6049d26e4fcSRobert Mustacchi 			i40e->i40e_link_speed = 20000;
6059d26e4fcSRobert Mustacchi 			break;
6069d26e4fcSRobert Mustacchi 		case I40E_LINK_SPEED_40GB:
6079d26e4fcSRobert Mustacchi 			i40e->i40e_link_speed = 40000;
6089d26e4fcSRobert Mustacchi 			break;
6093d75a287SRobert Mustacchi 		case I40E_LINK_SPEED_25GB:
6103d75a287SRobert Mustacchi 			i40e->i40e_link_speed = 25000;
6113d75a287SRobert Mustacchi 			break;
6129d26e4fcSRobert Mustacchi 		default:
6139d26e4fcSRobert Mustacchi 			i40e->i40e_link_speed = 0;
6149d26e4fcSRobert Mustacchi 			break;
6159d26e4fcSRobert Mustacchi 		}
6169d26e4fcSRobert Mustacchi 
6179d26e4fcSRobert Mustacchi 		/*
6189d26e4fcSRobert Mustacchi 		 * At this time, hardware does not support half-duplex
6199d26e4fcSRobert Mustacchi 		 * operation, hence why we don't ask the hardware about our
6209d26e4fcSRobert Mustacchi 		 * current speed.
6219d26e4fcSRobert Mustacchi 		 */
6229d26e4fcSRobert Mustacchi 		i40e->i40e_link_duplex = LINK_DUPLEX_FULL;
6239d26e4fcSRobert Mustacchi 		i40e_link_state_set(i40e, LINK_STATE_UP);
6249d26e4fcSRobert Mustacchi 	} else {
6259d26e4fcSRobert Mustacchi 		i40e->i40e_link_speed = 0;
6269d26e4fcSRobert Mustacchi 		i40e->i40e_link_duplex = 0;
6279d26e4fcSRobert Mustacchi 		i40e_link_state_set(i40e, LINK_STATE_DOWN);
6289d26e4fcSRobert Mustacchi 	}
6299d26e4fcSRobert Mustacchi }
6309d26e4fcSRobert Mustacchi 
6319d26e4fcSRobert Mustacchi static void
i40e_rem_intrs(i40e_t * i40e)6329d26e4fcSRobert Mustacchi i40e_rem_intrs(i40e_t *i40e)
6339d26e4fcSRobert Mustacchi {
6349d26e4fcSRobert Mustacchi 	int i, rc;
6359d26e4fcSRobert Mustacchi 
6369d26e4fcSRobert Mustacchi 	for (i = 0; i < i40e->i40e_intr_count; i++) {
6379d26e4fcSRobert Mustacchi 		rc = ddi_intr_free(i40e->i40e_intr_handles[i]);
6389d26e4fcSRobert Mustacchi 		if (rc != DDI_SUCCESS) {
6399d26e4fcSRobert Mustacchi 			i40e_log(i40e, "failed to free interrupt %d: %d",
6409d26e4fcSRobert Mustacchi 			    i, rc);
6419d26e4fcSRobert Mustacchi 		}
6429d26e4fcSRobert Mustacchi 	}
6439d26e4fcSRobert Mustacchi 
6449d26e4fcSRobert Mustacchi 	kmem_free(i40e->i40e_intr_handles, i40e->i40e_intr_size);
6459d26e4fcSRobert Mustacchi 	i40e->i40e_intr_handles = NULL;
6469d26e4fcSRobert Mustacchi }
6479d26e4fcSRobert Mustacchi 
6489d26e4fcSRobert Mustacchi static void
i40e_rem_intr_handlers(i40e_t * i40e)6499d26e4fcSRobert Mustacchi i40e_rem_intr_handlers(i40e_t *i40e)
6509d26e4fcSRobert Mustacchi {
6519d26e4fcSRobert Mustacchi 	int i, rc;
6529d26e4fcSRobert Mustacchi 
6539d26e4fcSRobert Mustacchi 	for (i = 0; i < i40e->i40e_intr_count; i++) {
6549d26e4fcSRobert Mustacchi 		rc = ddi_intr_remove_handler(i40e->i40e_intr_handles[i]);
6559d26e4fcSRobert Mustacchi 		if (rc != DDI_SUCCESS) {
6569d26e4fcSRobert Mustacchi 			i40e_log(i40e, "failed to remove interrupt %d: %d",
6579d26e4fcSRobert Mustacchi 			    i, rc);
6589d26e4fcSRobert Mustacchi 		}
6599d26e4fcSRobert Mustacchi 	}
6609d26e4fcSRobert Mustacchi }
6619d26e4fcSRobert Mustacchi 
6629d26e4fcSRobert Mustacchi /*
6639d26e4fcSRobert Mustacchi  * illumos Fault Management Architecture (FMA) support.
6649d26e4fcSRobert Mustacchi  */
6659d26e4fcSRobert Mustacchi 
6669d26e4fcSRobert Mustacchi int
i40e_check_acc_handle(ddi_acc_handle_t handle)6679d26e4fcSRobert Mustacchi i40e_check_acc_handle(ddi_acc_handle_t handle)
6689d26e4fcSRobert Mustacchi {
6699d26e4fcSRobert Mustacchi 	ddi_fm_error_t de;
6709d26e4fcSRobert Mustacchi 
6719d26e4fcSRobert Mustacchi 	ddi_fm_acc_err_get(handle, &de, DDI_FME_VERSION);
6729d26e4fcSRobert Mustacchi 	ddi_fm_acc_err_clear(handle, DDI_FME_VERSION);
6739d26e4fcSRobert Mustacchi 	return (de.fme_status);
6749d26e4fcSRobert Mustacchi }
6759d26e4fcSRobert Mustacchi 
6769d26e4fcSRobert Mustacchi int
i40e_check_dma_handle(ddi_dma_handle_t handle)6779d26e4fcSRobert Mustacchi i40e_check_dma_handle(ddi_dma_handle_t handle)
6789d26e4fcSRobert Mustacchi {
6799d26e4fcSRobert Mustacchi 	ddi_fm_error_t de;
6809d26e4fcSRobert Mustacchi 
6819d26e4fcSRobert Mustacchi 	ddi_fm_dma_err_get(handle, &de, DDI_FME_VERSION);
6829d26e4fcSRobert Mustacchi 	return (de.fme_status);
6839d26e4fcSRobert Mustacchi }
6849d26e4fcSRobert Mustacchi 
6859d26e4fcSRobert Mustacchi /*
6869d26e4fcSRobert Mustacchi  * Fault service error handling callback function.
6879d26e4fcSRobert Mustacchi  */
6889d26e4fcSRobert Mustacchi /* ARGSUSED */
6899d26e4fcSRobert Mustacchi static int
i40e_fm_error_cb(dev_info_t * dip,ddi_fm_error_t * err,const void * impl_data)6909d26e4fcSRobert Mustacchi i40e_fm_error_cb(dev_info_t *dip, ddi_fm_error_t *err, const void *impl_data)
6919d26e4fcSRobert Mustacchi {
6929d26e4fcSRobert Mustacchi 	pci_ereport_post(dip, err, NULL);
6939d26e4fcSRobert Mustacchi 	return (err->fme_status);
6949d26e4fcSRobert Mustacchi }
6959d26e4fcSRobert Mustacchi 
6969d26e4fcSRobert Mustacchi static void
i40e_fm_init(i40e_t * i40e)6979d26e4fcSRobert Mustacchi i40e_fm_init(i40e_t *i40e)
6989d26e4fcSRobert Mustacchi {
6999d26e4fcSRobert Mustacchi 	ddi_iblock_cookie_t iblk;
7009d26e4fcSRobert Mustacchi 
7019d26e4fcSRobert Mustacchi 	i40e->i40e_fm_capabilities = ddi_prop_get_int(DDI_DEV_T_ANY,
7029d26e4fcSRobert Mustacchi 	    i40e->i40e_dip, DDI_PROP_DONTPASS, "fm_capable",
7039d26e4fcSRobert Mustacchi 	    DDI_FM_EREPORT_CAPABLE | DDI_FM_ACCCHK_CAPABLE |
7049d26e4fcSRobert Mustacchi 	    DDI_FM_DMACHK_CAPABLE | DDI_FM_ERRCB_CAPABLE);
7059d26e4fcSRobert Mustacchi 
7069d26e4fcSRobert Mustacchi 	if (i40e->i40e_fm_capabilities < 0) {
7079d26e4fcSRobert Mustacchi 		i40e->i40e_fm_capabilities = 0;
7089d26e4fcSRobert Mustacchi 	} else if (i40e->i40e_fm_capabilities > 0xf) {
7099d26e4fcSRobert Mustacchi 		i40e->i40e_fm_capabilities = DDI_FM_EREPORT_CAPABLE |
7109d26e4fcSRobert Mustacchi 		    DDI_FM_ACCCHK_CAPABLE | DDI_FM_DMACHK_CAPABLE |
7119d26e4fcSRobert Mustacchi 		    DDI_FM_ERRCB_CAPABLE;
7129d26e4fcSRobert Mustacchi 	}
7139d26e4fcSRobert Mustacchi 
7149d26e4fcSRobert Mustacchi 	/*
7159d26e4fcSRobert Mustacchi 	 * Only register with IO Fault Services if we have some capability
7169d26e4fcSRobert Mustacchi 	 */
7179d26e4fcSRobert Mustacchi 	if (i40e->i40e_fm_capabilities & DDI_FM_ACCCHK_CAPABLE) {
7189d26e4fcSRobert Mustacchi 		i40e_regs_acc_attr.devacc_attr_access = DDI_FLAGERR_ACC;
7199d26e4fcSRobert Mustacchi 	} else {
7209d26e4fcSRobert Mustacchi 		i40e_regs_acc_attr.devacc_attr_access = DDI_DEFAULT_ACC;
7219d26e4fcSRobert Mustacchi 	}
7229d26e4fcSRobert Mustacchi 
7239d26e4fcSRobert Mustacchi 	if (i40e->i40e_fm_capabilities) {
7249d26e4fcSRobert Mustacchi 		ddi_fm_init(i40e->i40e_dip, &i40e->i40e_fm_capabilities, &iblk);
7259d26e4fcSRobert Mustacchi 
7269d26e4fcSRobert Mustacchi 		if (DDI_FM_EREPORT_CAP(i40e->i40e_fm_capabilities) ||
7279d26e4fcSRobert Mustacchi 		    DDI_FM_ERRCB_CAP(i40e->i40e_fm_capabilities)) {
7289d26e4fcSRobert Mustacchi 			pci_ereport_setup(i40e->i40e_dip);
7299d26e4fcSRobert Mustacchi 		}
7309d26e4fcSRobert Mustacchi 
7319d26e4fcSRobert Mustacchi 		if (DDI_FM_ERRCB_CAP(i40e->i40e_fm_capabilities)) {
7329d26e4fcSRobert Mustacchi 			ddi_fm_handler_register(i40e->i40e_dip,
7339d26e4fcSRobert Mustacchi 			    i40e_fm_error_cb, (void*)i40e);
7349d26e4fcSRobert Mustacchi 		}
7359d26e4fcSRobert Mustacchi 	}
7369d26e4fcSRobert Mustacchi 
7379d26e4fcSRobert Mustacchi 	if (i40e->i40e_fm_capabilities & DDI_FM_DMACHK_CAPABLE) {
7389d26e4fcSRobert Mustacchi 		i40e_init_dma_attrs(i40e, B_TRUE);
7399d26e4fcSRobert Mustacchi 	} else {
7409d26e4fcSRobert Mustacchi 		i40e_init_dma_attrs(i40e, B_FALSE);
7419d26e4fcSRobert Mustacchi 	}
7429d26e4fcSRobert Mustacchi }
7439d26e4fcSRobert Mustacchi 
7449d26e4fcSRobert Mustacchi static void
i40e_fm_fini(i40e_t * i40e)7459d26e4fcSRobert Mustacchi i40e_fm_fini(i40e_t *i40e)
7469d26e4fcSRobert Mustacchi {
7479d26e4fcSRobert Mustacchi 	if (i40e->i40e_fm_capabilities) {
7489d26e4fcSRobert Mustacchi 
7499d26e4fcSRobert Mustacchi 		if (DDI_FM_EREPORT_CAP(i40e->i40e_fm_capabilities) ||
7509d26e4fcSRobert Mustacchi 		    DDI_FM_ERRCB_CAP(i40e->i40e_fm_capabilities))
7519d26e4fcSRobert Mustacchi 			pci_ereport_teardown(i40e->i40e_dip);
7529d26e4fcSRobert Mustacchi 
7539d26e4fcSRobert Mustacchi 		if (DDI_FM_ERRCB_CAP(i40e->i40e_fm_capabilities))
7549d26e4fcSRobert Mustacchi 			ddi_fm_handler_unregister(i40e->i40e_dip);
7559d26e4fcSRobert Mustacchi 
7569d26e4fcSRobert Mustacchi 		ddi_fm_fini(i40e->i40e_dip);
7579d26e4fcSRobert Mustacchi 	}
7589d26e4fcSRobert Mustacchi }
7599d26e4fcSRobert Mustacchi 
7609d26e4fcSRobert Mustacchi void
i40e_fm_ereport(i40e_t * i40e,char * detail)7619d26e4fcSRobert Mustacchi i40e_fm_ereport(i40e_t *i40e, char *detail)
7629d26e4fcSRobert Mustacchi {
7639d26e4fcSRobert Mustacchi 	uint64_t ena;
7649d26e4fcSRobert Mustacchi 	char buf[FM_MAX_CLASS];
7659d26e4fcSRobert Mustacchi 
7669d26e4fcSRobert Mustacchi 	(void) snprintf(buf, FM_MAX_CLASS, "%s.%s", DDI_FM_DEVICE, detail);
7679d26e4fcSRobert Mustacchi 	ena = fm_ena_generate(0, FM_ENA_FMT1);
7689d26e4fcSRobert Mustacchi 	if (DDI_FM_EREPORT_CAP(i40e->i40e_fm_capabilities)) {
7699d26e4fcSRobert Mustacchi 		ddi_fm_ereport_post(i40e->i40e_dip, buf, ena, DDI_NOSLEEP,
7709d26e4fcSRobert Mustacchi 		    FM_VERSION, DATA_TYPE_UINT8, FM_EREPORT_VERS0, NULL);
7719d26e4fcSRobert Mustacchi 	}
7729d26e4fcSRobert Mustacchi }
7739d26e4fcSRobert Mustacchi 
7749d26e4fcSRobert Mustacchi /*
77509aee612SRyan Zezeski  * Here we're trying to set the SEID of the default VSI. In general,
77609aee612SRyan Zezeski  * when we come through and look at this shortly after attach, we
77709aee612SRyan Zezeski  * expect there to only be a single element present, which is the
77809aee612SRyan Zezeski  * default VSI. Importantly, each PF seems to not see any other
77909aee612SRyan Zezeski  * devices, in part because of the simple switch mode that we're
78009aee612SRyan Zezeski  * using. If for some reason, we see more artifacts, we'll need to
78109aee612SRyan Zezeski  * revisit what we're doing here.
7829d26e4fcSRobert Mustacchi  */
78309aee612SRyan Zezeski static boolean_t
i40e_set_def_vsi_seid(i40e_t * i40e)78409aee612SRyan Zezeski i40e_set_def_vsi_seid(i40e_t *i40e)
7859d26e4fcSRobert Mustacchi {
7869d26e4fcSRobert Mustacchi 	i40e_hw_t *hw = &i40e->i40e_hw_space;
7879d26e4fcSRobert Mustacchi 	struct i40e_aqc_get_switch_config_resp *sw_config;
7889d26e4fcSRobert Mustacchi 	uint8_t aq_buf[I40E_AQ_LARGE_BUF];
7899d26e4fcSRobert Mustacchi 	uint16_t next = 0;
7909d26e4fcSRobert Mustacchi 	int rc;
7919d26e4fcSRobert Mustacchi 
7929d26e4fcSRobert Mustacchi 	/* LINTED: E_BAD_PTR_CAST_ALIGN */
7939d26e4fcSRobert Mustacchi 	sw_config = (struct i40e_aqc_get_switch_config_resp *)aq_buf;
7949d26e4fcSRobert Mustacchi 	rc = i40e_aq_get_switch_config(hw, sw_config, sizeof (aq_buf), &next,
7959d26e4fcSRobert Mustacchi 	    NULL);
7969d26e4fcSRobert Mustacchi 	if (rc != I40E_SUCCESS) {
7979d26e4fcSRobert Mustacchi 		i40e_error(i40e, "i40e_aq_get_switch_config() failed %d: %d",
7989d26e4fcSRobert Mustacchi 		    rc, hw->aq.asq_last_status);
79909aee612SRyan Zezeski 		return (B_FALSE);
8009d26e4fcSRobert Mustacchi 	}
8019d26e4fcSRobert Mustacchi 
8029d26e4fcSRobert Mustacchi 	if (LE_16(sw_config->header.num_reported) != 1) {
8039d26e4fcSRobert Mustacchi 		i40e_error(i40e, "encountered multiple (%d) switching units "
8049d26e4fcSRobert Mustacchi 		    "during attach, not proceeding",
8059d26e4fcSRobert Mustacchi 		    LE_16(sw_config->header.num_reported));
80609aee612SRyan Zezeski 		return (B_FALSE);
80709aee612SRyan Zezeski 	}
80809aee612SRyan Zezeski 
80909aee612SRyan Zezeski 	I40E_DEF_VSI_SEID(i40e) = sw_config->element[0].seid;
81009aee612SRyan Zezeski 	return (B_TRUE);
81109aee612SRyan Zezeski }
81209aee612SRyan Zezeski 
81309aee612SRyan Zezeski /*
81409aee612SRyan Zezeski  * Get the SEID of the uplink MAC.
81509aee612SRyan Zezeski  */
81609aee612SRyan Zezeski static int
i40e_get_mac_seid(i40e_t * i40e)81709aee612SRyan Zezeski i40e_get_mac_seid(i40e_t *i40e)
81809aee612SRyan Zezeski {
81909aee612SRyan Zezeski 	i40e_hw_t *hw = &i40e->i40e_hw_space;
82009aee612SRyan Zezeski 	struct i40e_aqc_get_switch_config_resp *sw_config;
82109aee612SRyan Zezeski 	uint8_t aq_buf[I40E_AQ_LARGE_BUF];
822