19d26e4fcSRobert Mustacchi /*
29d26e4fcSRobert Mustacchi * This file and its contents are supplied under the terms of the
39d26e4fcSRobert Mustacchi * Common Development and Distribution License ("CDDL"), version 1.0.
49d26e4fcSRobert Mustacchi * You may only use this file in accordance with the terms of version
59d26e4fcSRobert Mustacchi * 1.0 of the CDDL.
69d26e4fcSRobert Mustacchi *
79d26e4fcSRobert Mustacchi * A full copy of the text of the CDDL should have accompanied this
89d26e4fcSRobert Mustacchi * source. A copy of the CDDL is also available via the Internet at
99d26e4fcSRobert Mustacchi * http://www.illumos.org/license/CDDL.
109d26e4fcSRobert Mustacchi */
119d26e4fcSRobert Mustacchi
129d26e4fcSRobert Mustacchi /*
139d26e4fcSRobert Mustacchi * Copyright 2015 OmniTI Computer Consulting, Inc. All rights reserved.
1409aee612SRyan Zezeski * Copyright 2019 Joyent, Inc.
15396505afSPaul Winder * Copyright 2017 Tegile Systems, Inc. All rights reserved.
16234a3cfbSPaul Winder * Copyright 2020 RackTop Systems, Inc.
1788628b1bSRyan Zezeski * Copyright 2020 Ryan Zezeski
18*df36e06dSRobert Mustacchi * Copyright 2021 Oxide Computer Company
199d26e4fcSRobert Mustacchi */
209d26e4fcSRobert Mustacchi
219d26e4fcSRobert Mustacchi /*
229d26e4fcSRobert Mustacchi * i40e - Intel 10/40 Gb Ethernet driver
239d26e4fcSRobert Mustacchi *
249d26e4fcSRobert Mustacchi * The i40e driver is the main software device driver for the Intel 40 Gb family
259d26e4fcSRobert Mustacchi * of devices. Note that these devices come in many flavors with both 40 GbE
269d26e4fcSRobert Mustacchi * ports and 10 GbE ports. This device is the successor to the 82599 family of
279d26e4fcSRobert Mustacchi * devices (ixgbe).
289d26e4fcSRobert Mustacchi *
299d26e4fcSRobert Mustacchi * Unlike previous generations of Intel 1 GbE and 10 GbE devices, the 40 GbE
309d26e4fcSRobert Mustacchi * devices defined in the XL710 controller (previously known as Fortville) are a
319d26e4fcSRobert Mustacchi * rather different beast and have a small switch embedded inside of them. In
329d26e4fcSRobert Mustacchi * addition, the way that most of the programming is done has been overhauled.
339d26e4fcSRobert Mustacchi * As opposed to just using PCIe memory mapped registers, it also has an
349d26e4fcSRobert Mustacchi * administrative queue which is used to communicate with firmware running on
359d26e4fcSRobert Mustacchi * the chip.
369d26e4fcSRobert Mustacchi *
379d26e4fcSRobert Mustacchi * Each physical function in the hardware shows up as a device that this driver
389d26e4fcSRobert Mustacchi * will bind to. The hardware splits many resources evenly across all of the
399d26e4fcSRobert Mustacchi * physical functions present on the device, while other resources are instead
409d26e4fcSRobert Mustacchi * shared across the entire card and its up to the device driver to
419d26e4fcSRobert Mustacchi * intelligently partition them.
429d26e4fcSRobert Mustacchi *
439d26e4fcSRobert Mustacchi * ------------
449d26e4fcSRobert Mustacchi * Organization
459d26e4fcSRobert Mustacchi * ------------
469d26e4fcSRobert Mustacchi *
479d26e4fcSRobert Mustacchi * This driver is made up of several files which have their own theory
489d26e4fcSRobert Mustacchi * statements spread across them. We'll touch on the high level purpose of each
499d26e4fcSRobert Mustacchi * file here, and then we'll get into more discussion on how the device is
509d26e4fcSRobert Mustacchi * generally modelled with respect to the interfaces in illumos.
519d26e4fcSRobert Mustacchi *
529d26e4fcSRobert Mustacchi * i40e_gld.c: This file contains all of the bindings to MAC and the networking
539d26e4fcSRobert Mustacchi * stack.
549d26e4fcSRobert Mustacchi *
559d26e4fcSRobert Mustacchi * i40e_intr.c: This file contains all of the interrupt service routines and
569d26e4fcSRobert Mustacchi * contains logic to enable and disable interrupts on the hardware.
579d26e4fcSRobert Mustacchi * It also contains the logic to map hardware resources such as the
589d26e4fcSRobert Mustacchi * rings to and from interrupts and controls their ability to fire.
599d26e4fcSRobert Mustacchi *
609d26e4fcSRobert Mustacchi * There is a big theory statement on interrupts present there.
619d26e4fcSRobert Mustacchi *
629d26e4fcSRobert Mustacchi * i40e_main.c: The file that you're currently in. It interfaces with the
639d26e4fcSRobert Mustacchi * traditional OS DDI interfaces and is in charge of configuring
649d26e4fcSRobert Mustacchi * the device.
659d26e4fcSRobert Mustacchi *
669d26e4fcSRobert Mustacchi * i40e_osdep.[ch]: These files contain interfaces and definitions needed to
679d26e4fcSRobert Mustacchi * work with Intel's common code for the device.
689d26e4fcSRobert Mustacchi *
699d26e4fcSRobert Mustacchi * i40e_stats.c: This file contains the general work and logic around our
709d26e4fcSRobert Mustacchi * kstats. A theory statement on their organization and use of the
719d26e4fcSRobert Mustacchi * hardware exists there.
729d26e4fcSRobert Mustacchi *
739d26e4fcSRobert Mustacchi * i40e_sw.h: This header file contains all of the primary structure definitions
749d26e4fcSRobert Mustacchi * and constants that are used across the entire driver.
759d26e4fcSRobert Mustacchi *
769d26e4fcSRobert Mustacchi * i40e_transceiver.c: This file contains all of the logic for sending and
779d26e4fcSRobert Mustacchi * receiving data. It contains all of the ring and DMA
789d26e4fcSRobert Mustacchi * allocation logic, as well as, the actual interfaces to
799d26e4fcSRobert Mustacchi * send and receive data.
809d26e4fcSRobert Mustacchi *
819d26e4fcSRobert Mustacchi * A big theory statement on ring management, descriptors,
829d26e4fcSRobert Mustacchi * and how it ties into the OS is present there.
839d26e4fcSRobert Mustacchi *
849d26e4fcSRobert Mustacchi * --------------
859d26e4fcSRobert Mustacchi * General Design
869d26e4fcSRobert Mustacchi * --------------
879d26e4fcSRobert Mustacchi *
889d26e4fcSRobert Mustacchi * Before we go too far into the general way we've laid out data structures and
899d26e4fcSRobert Mustacchi * the like, it's worth taking some time to explain how the hardware is
909d26e4fcSRobert Mustacchi * organized. This organization informs a lot of how we do things at this time
919d26e4fcSRobert Mustacchi * in the driver.
929d26e4fcSRobert Mustacchi *
939d26e4fcSRobert Mustacchi * Each physical device consists of a number of one or more ports, which are
949d26e4fcSRobert Mustacchi * considered physical functions in the PCI sense and thus each get enumerated
959d26e4fcSRobert Mustacchi * by the system, resulting in an instance being created and attached to. While
969d26e4fcSRobert Mustacchi * there are many resources that are unique to each physical function eg.
979d26e4fcSRobert Mustacchi * instance of the device, there are many that are shared across all of them.
989d26e4fcSRobert Mustacchi * Several resources have an amount reserved for each Virtual Station Interface
999d26e4fcSRobert Mustacchi * (VSI) and then a static pool of resources, available for all functions on the
1009d26e4fcSRobert Mustacchi * card.
1019d26e4fcSRobert Mustacchi *
1029d26e4fcSRobert Mustacchi * The most important resource in hardware are its transmit and receive queue
1039d26e4fcSRobert Mustacchi * pairs (i40e_trqpair_t). These should be thought of as rings in GLDv3
1049d26e4fcSRobert Mustacchi * parlance. There are a set number of these on each device; however, they are
1059d26e4fcSRobert Mustacchi * statically partitioned among all of the different physical functions.
1069d26e4fcSRobert Mustacchi *
1079d26e4fcSRobert Mustacchi * 'Fortville' (the code name for this device family) is basically a switch. To
1089d26e4fcSRobert Mustacchi * map MAC addresses and other things to queues, we end up having to create
1099d26e4fcSRobert Mustacchi * Virtual Station Interfaces (VSIs) and establish forwarding rules that direct
1109d26e4fcSRobert Mustacchi * traffic to a queue. A VSI owns a collection of queues and has a series of
1119d26e4fcSRobert Mustacchi * forwarding rules that point to it. One way to think of this is to treat it
1129d26e4fcSRobert Mustacchi * like MAC does a VNIC. When MAC refers to a group, a collection of rings and
1139d26e4fcSRobert Mustacchi * classification resources, that is a VSI in i40e.
1149d26e4fcSRobert Mustacchi *
1159d26e4fcSRobert Mustacchi * The sets of VSIs is shared across the entire device, though there may be some
1169d26e4fcSRobert Mustacchi * amount that are reserved to each PF. Because the GLDv3 does not let us change
1179d26e4fcSRobert Mustacchi * the number of groups dynamically, we instead statically divide this amount
1189d26e4fcSRobert Mustacchi * evenly between all the functions that exist. In addition, we have the same
1199d26e4fcSRobert Mustacchi * problem with the mac address forwarding rules. There are a static number that
1209d26e4fcSRobert Mustacchi * exist shared across all the functions.
1219d26e4fcSRobert Mustacchi *
1229d26e4fcSRobert Mustacchi * To handle both of these resources, what we end up doing is going through and
1239d26e4fcSRobert Mustacchi * determining which functions belong to the same device. Nominally one might do
1249d26e4fcSRobert Mustacchi * this by having a nexus driver; however, a prime requirement for a nexus
1259d26e4fcSRobert Mustacchi * driver is identifying the various children and activating them. While it is
1269d26e4fcSRobert Mustacchi * possible to get this information from NVRAM, we would end up duplicating a
1279d26e4fcSRobert Mustacchi * lot of the PCI enumeration logic. Really, at the end of the day, the device
1289d26e4fcSRobert Mustacchi * doesn't give us the traditional identification properties we want from a
1299d26e4fcSRobert Mustacchi * nexus driver.
1309d26e4fcSRobert Mustacchi *
1319d26e4fcSRobert Mustacchi * Instead, we rely on some properties that are guaranteed to be unique. While
1329d26e4fcSRobert Mustacchi * it might be tempting to leverage the PBA or serial number of the device from
1339d26e4fcSRobert Mustacchi * NVRAM, there is nothing that says that two devices can't be mis-programmed to
1349d26e4fcSRobert Mustacchi * have the same values in NVRAM. Instead, we uniquely identify a group of
1359d26e4fcSRobert Mustacchi * functions based on their parent in the /devices tree, their PCI bus and PCI
1369d26e4fcSRobert Mustacchi * function identifiers. Using either on their own may not be sufficient.
1379d26e4fcSRobert Mustacchi *
1389d26e4fcSRobert Mustacchi * For each unique PCI device that we encounter, we'll create a i40e_device_t.
1399d26e4fcSRobert Mustacchi * From there, because we don't have a good way to tell the GLDv3 about sharing
1409d26e4fcSRobert Mustacchi * resources between everything, we'll end up just dividing the resources
1419d26e4fcSRobert Mustacchi * evenly between all of the functions. Longer term, if we don't have to declare
1429d26e4fcSRobert Mustacchi * to the GLDv3 that these resources are shared, then we'll maintain a pool and
1437267b93fSMarcel Telka * have each PF allocate from the pool in the device, thus if only two of four
1449d26e4fcSRobert Mustacchi * ports are being used, for example, then all of the resources can still be
1459d26e4fcSRobert Mustacchi * used.
1469d26e4fcSRobert Mustacchi *
1479d26e4fcSRobert Mustacchi * -------------------------------------------
1489d26e4fcSRobert Mustacchi * Transmit and Receive Queue Pair Allocations
1499d26e4fcSRobert Mustacchi * -------------------------------------------
1509d26e4fcSRobert Mustacchi *
1519d26e4fcSRobert Mustacchi * NVRAM ends up assigning each PF its own share of the transmit and receive LAN
1529d26e4fcSRobert Mustacchi * queue pairs, we have no way of modifying it, only observing it. From there,
1539d26e4fcSRobert Mustacchi * it's up to us to map these queues to VSIs and VFs. Since we don't support any
1549d26e4fcSRobert Mustacchi * VFs at this time, we only focus on assignments to VSIs.
1559d26e4fcSRobert Mustacchi *
1569d26e4fcSRobert Mustacchi * At the moment, we used a static mapping of transmit/receive queue pairs to a
1579d26e4fcSRobert Mustacchi * given VSI (eg. rings to a group). Though in the fullness of time, we want to
1589d26e4fcSRobert Mustacchi * make this something which is fully dynamic and take advantage of documented,
1599d26e4fcSRobert Mustacchi * but not yet available functionality for adding filters based on VXLAN and
1609d26e4fcSRobert Mustacchi * other encapsulation technologies.
1619d26e4fcSRobert Mustacchi *
1629d26e4fcSRobert Mustacchi * -------------------------------------
1639d26e4fcSRobert Mustacchi * Broadcast, Multicast, and Promiscuous
1649d26e4fcSRobert Mustacchi * -------------------------------------
1659d26e4fcSRobert Mustacchi *
1669d26e4fcSRobert Mustacchi * As part of the GLDv3, we need to make sure that we can handle receiving
1679d26e4fcSRobert Mustacchi * broadcast and multicast traffic. As well as enabling promiscuous mode when
1689d26e4fcSRobert Mustacchi * requested. GLDv3 requires that all broadcast and multicast traffic be
1699d26e4fcSRobert Mustacchi * retrieved by the default group, eg. the first one. This is the same thing as
1709d26e4fcSRobert Mustacchi * the default VSI.
1719d26e4fcSRobert Mustacchi *
1729d26e4fcSRobert Mustacchi * To receieve broadcast traffic, we enable it through the admin queue, rather
1739d26e4fcSRobert Mustacchi * than use one of our filters for it. For multicast traffic, we reserve a
1749d26e4fcSRobert Mustacchi * certain number of the hash filters and assign them to a given PF. When we
1757267b93fSMarcel Telka * exceed those, we then switch to using promiscuous mode for multicast traffic.
1769d26e4fcSRobert Mustacchi *
1779d26e4fcSRobert Mustacchi * More specifically, once we exceed the number of filters (indicated because
1789d26e4fcSRobert Mustacchi * the i40e_t`i40e_resources.ifr_nmcastfilt ==
1799d26e4fcSRobert Mustacchi * i40e_t`i40e_resources.ifr_nmcastfilt_used), we then instead need to toggle
1809d26e4fcSRobert Mustacchi * promiscuous mode. If promiscuous mode is toggled then we keep track of the
1819d26e4fcSRobert Mustacchi * number of MACs added to it by incrementing i40e_t`i40e_mcast_promisc_count.
1829d26e4fcSRobert Mustacchi * That will stay enabled until that count reaches zero indicating that we have
1839d26e4fcSRobert Mustacchi * only added multicast addresses that we have a corresponding entry for.
1849d26e4fcSRobert Mustacchi *
1859d26e4fcSRobert Mustacchi * Because MAC itself wants to toggle promiscuous mode, which includes both
1869d26e4fcSRobert Mustacchi * unicast and multicast traffic, we go through and keep track of that
1879d26e4fcSRobert Mustacchi * ourselves. That is maintained through the use of the i40e_t`i40e_promisc_on
1889d26e4fcSRobert Mustacchi * member.
1899d26e4fcSRobert Mustacchi *
1909d26e4fcSRobert Mustacchi * --------------
1919d26e4fcSRobert Mustacchi * VSI Management
1929d26e4fcSRobert Mustacchi * --------------
1939d26e4fcSRobert Mustacchi *
19409aee612SRyan Zezeski * The PFs share 384 VSIs. The firmware creates one VSI per PF by default.
19509aee612SRyan Zezeski * During chip start we retrieve the SEID of this VSI and assign it as the
19609aee612SRyan Zezeski * default VSI for our VEB (one VEB per PF). We then add additional VSIs to
19709aee612SRyan Zezeski * the VEB up to the determined number of rx groups: i40e_t`i40e_num_rx_groups.
19809aee612SRyan Zezeski * We currently cap this number to I40E_GROUP_MAX to a) make sure all PFs can
19909aee612SRyan Zezeski * allocate the same number of VSIs, and b) to keep the interrupt multiplexing
20009aee612SRyan Zezeski * under control. In the future, when we improve the interrupt allocation, we
20109aee612SRyan Zezeski * may want to revisit this cap to make better use of the available VSIs. The
20209aee612SRyan Zezeski * VSI allocation and configuration can be found in i40e_chip_start().
2039d26e4fcSRobert Mustacchi *
2049d26e4fcSRobert Mustacchi * ----------------
2059d26e4fcSRobert Mustacchi * Structure Layout
2069d26e4fcSRobert Mustacchi * ----------------
2079d26e4fcSRobert Mustacchi *
2089d26e4fcSRobert Mustacchi * The following images relates the core data structures together. The primary
2099d26e4fcSRobert Mustacchi * structure in the system is the i40e_t. It itself contains multiple rings,
2109d26e4fcSRobert Mustacchi * i40e_trqpair_t's which contain the various transmit and receive data. The
2119d26e4fcSRobert Mustacchi * receive data is stored outside of the i40e_trqpair_t and instead in the
2129d26e4fcSRobert Mustacchi * i40e_rx_data_t. The i40e_t has a corresponding i40e_device_t which keeps
2139d26e4fcSRobert Mustacchi * track of per-physical device state. Finally, for every active descriptor,
2149d26e4fcSRobert Mustacchi * there is a corresponding control block, which is where the
2159d26e4fcSRobert Mustacchi * i40e_rx_control_block_t and the i40e_tx_control_block_t come from.
2169d26e4fcSRobert Mustacchi *
2179d26e4fcSRobert Mustacchi * +-----------------------+ +-----------------------+
2189d26e4fcSRobert Mustacchi * | Global i40e_t list | | Global Device list |
2199d26e4fcSRobert Mustacchi * | | +--| |
2209d26e4fcSRobert Mustacchi * | i40e_glist | | | i40e_dlist |
2219d26e4fcSRobert Mustacchi * +-----------------------+ | +-----------------------+
2229d26e4fcSRobert Mustacchi * | v
2239d26e4fcSRobert Mustacchi * | +------------------------+ +-----------------------+
2249d26e4fcSRobert Mustacchi * | | Device-wide Structure |----->| Device-wide Structure |--> ...
2259d26e4fcSRobert Mustacchi * | | i40e_device_t | | i40e_device_t |
2269d26e4fcSRobert Mustacchi * | | | +-----------------------+
2279d26e4fcSRobert Mustacchi * | | dev_info_t * ------+--> Parent in devices tree.
2289d26e4fcSRobert Mustacchi * | | uint_t ------+--> PCI bus number
2299d26e4fcSRobert Mustacchi * | | uint_t ------+--> PCI device number
2309d26e4fcSRobert Mustacchi * | | uint_t ------+--> Number of functions
2319d26e4fcSRobert Mustacchi * | | i40e_switch_rsrcs_t ---+--> Captured total switch resources
2329d26e4fcSRobert Mustacchi * | | list_t ------+-------------+
2339d26e4fcSRobert Mustacchi * | +------------------------+ |
2349d26e4fcSRobert Mustacchi * | ^ |
2359d26e4fcSRobert Mustacchi * | +--------+ |
2369d26e4fcSRobert Mustacchi * | | v
2379d26e4fcSRobert Mustacchi * | +---------------------------+ | +-------------------+
2389d26e4fcSRobert Mustacchi * +->| GLDv3 Device, per PF |-----|-->| GLDv3 Device (PF) |--> ...
2399d26e4fcSRobert Mustacchi * | i40e_t | | | i40e_t |
2409d26e4fcSRobert Mustacchi * | **Primary Structure** | | +-------------------+
2419d26e4fcSRobert Mustacchi * | | |
2429d26e4fcSRobert Mustacchi * | i40e_device_t * --+-----+
2439d26e4fcSRobert Mustacchi * | i40e_state_t --+---> Device State
2449d26e4fcSRobert Mustacchi * | i40e_hw_t --+---> Intel common code structure
2459d26e4fcSRobert Mustacchi * | mac_handle_t --+---> GLDv3 handle to MAC
2469d26e4fcSRobert Mustacchi * | ddi_periodic_t --+---> Link activity timer
24709aee612SRyan Zezeski * | i40e_vsi_t * --+---> Array of VSIs
2489d26e4fcSRobert Mustacchi * | i40e_func_rsrc_t --+---> Available hardware resources
2499d26e4fcSRobert Mustacchi * | i40e_switch_rsrc_t * --+---> Switch resource snapshot
2509d26e4fcSRobert Mustacchi * | i40e_sdu --+---> Current MTU
2519d26e4fcSRobert Mustacchi * | i40e_frame_max --+---> Current HW frame size
2529d26e4fcSRobert Mustacchi * | i40e_uaddr_t * --+---> Array of assigned unicast MACs
2539d26e4fcSRobert Mustacchi * | i40e_maddr_t * --+---> Array of assigned multicast MACs
2549d26e4fcSRobert Mustacchi * | i40e_mcast_promisccount --+---> Active multicast state
2559d26e4fcSRobert Mustacchi * | i40e_promisc_on --+---> Current promiscuous mode state
25609aee612SRyan Zezeski * | uint_t --+---> Number of transmit/receive pairs
25709aee612SRyan Zezeski * | i40e_rx_group_t * --+---> Array of Rx groups
2589d26e4fcSRobert Mustacchi * | kstat_t * --+---> PF kstats
2599d26e4fcSRobert Mustacchi * | i40e_pf_stats_t --+---> PF kstat backing data
2609d26e4fcSRobert Mustacchi * | i40e_trqpair_t * --+---------+
2619d26e4fcSRobert Mustacchi * +---------------------------+ |
2629d26e4fcSRobert Mustacchi * |
2639d26e4fcSRobert Mustacchi * v
2649d26e4fcSRobert Mustacchi * +-------------------------------+ +-----------------------------+
2659d26e4fcSRobert Mustacchi * | Transmit/Receive Queue Pair |-------| Transmit/Receive Queue Pair |->...
2669d26e4fcSRobert Mustacchi * | i40e_trqpair_t | | i40e_trqpair_t |
2679d26e4fcSRobert Mustacchi * + Ring Data Structure | +-----------------------------+
2689d26e4fcSRobert Mustacchi * | |
2699d26e4fcSRobert Mustacchi * | mac_ring_handle_t +--> MAC RX ring handle
2709d26e4fcSRobert Mustacchi * | mac_ring_handle_t +--> MAC TX ring handle
2719d26e4fcSRobert Mustacchi * | i40e_rxq_stat_t --+--> RX Queue stats
2729d26e4fcSRobert Mustacchi * | i40e_txq_stat_t --+--> TX Queue stats
2739d26e4fcSRobert Mustacchi * | uint32_t (tx ring size) +--> TX Ring Size
2749d26e4fcSRobert Mustacchi * | uint32_t (tx free list size) +--> TX Free List Size
2759d26e4fcSRobert Mustacchi * | i40e_dma_buffer_t --------+--> TX Descriptor ring DMA
2769d26e4fcSRobert Mustacchi * | i40e_tx_desc_t * --------+--> TX descriptor ring
2779d26e4fcSRobert Mustacchi * | volatile unt32_t * +--> TX Write back head
2789d26e4fcSRobert Mustacchi * | uint32_t -------+--> TX ring head
2799d26e4fcSRobert Mustacchi * | uint32_t -------+--> TX ring tail
2809d26e4fcSRobert Mustacchi * | uint32_t -------+--> Num TX desc free
2819d26e4fcSRobert Mustacchi * | i40e_tx_control_block_t * --+--> TX control block array ---+
2829d26e4fcSRobert Mustacchi * | i40e_tx_control_block_t ** --+--> TCB work list ----+
2839d26e4fcSRobert Mustacchi * | i40e_tx_control_block_t ** --+--> TCB free list ---+
2849d26e4fcSRobert Mustacchi * | uint32_t -------+--> Free TCB count |
2859d26e4fcSRobert Mustacchi * | i40e_rx_data_t * -------+--+ v
2869d26e4fcSRobert Mustacchi * +-------------------------------+ | +---------------------------+
2879d26e4fcSRobert Mustacchi * | | Per-TX Frame Metadata |
2889d26e4fcSRobert Mustacchi * | | i40e_tx_control_block_t |
2899d26e4fcSRobert Mustacchi * +--------------------+ | |
2909d26e4fcSRobert Mustacchi * | mblk to transmit <--+--- mblk_t * |
2919d26e4fcSRobert Mustacchi * | type of transmit <--+--- i40e_tx_type_t |
2929d26e4fcSRobert Mustacchi * | TX DMA handle <--+--- ddi_dma_handle_t |
2939d26e4fcSRobert Mustacchi * v TX DMA buffer <--+--- i40e_dma_buffer_t |
2949d26e4fcSRobert Mustacchi * +------------------------------+ +---------------------------+
2959d26e4fcSRobert Mustacchi * | Core Receive Data |
2969d26e4fcSRobert Mustacchi * | i40e_rx_data_t |
2979d26e4fcSRobert Mustacchi * | |
2989d26e4fcSRobert Mustacchi * | i40e_dma_buffer_t --+--> RX descriptor DMA Data
2999d26e4fcSRobert Mustacchi * | i40e_rx_desc_t --+--> RX descriptor ring
3009d26e4fcSRobert Mustacchi * | uint32_t --+--> Next free desc.
3019d26e4fcSRobert Mustacchi * | i40e_rx_control_block_t * --+--> RX Control Block Array ---+
3029d26e4fcSRobert Mustacchi * | i40e_rx_control_block_t ** --+--> RCB work list ---+
3039d26e4fcSRobert Mustacchi * | i40e_rx_control_block_t ** --+--> RCB free list ---+
3049d26e4fcSRobert Mustacchi * +------------------------------+ |
3059d26e4fcSRobert Mustacchi * ^ |
3069d26e4fcSRobert Mustacchi * | +---------------------------+ |
3079d26e4fcSRobert Mustacchi * | | Per-RX Frame Metadata |<---------------+
3089d26e4fcSRobert Mustacchi * | | i40e_rx_control_block_t |
3099d26e4fcSRobert Mustacchi * | | |
3109d26e4fcSRobert Mustacchi * | | mblk_t * ----+--> Received mblk_t data
3119d26e4fcSRobert Mustacchi * | | uint32_t ----+--> Reference count
3129d26e4fcSRobert Mustacchi * | | i40e_dma_buffer_t ----+--> Receive data DMA info
3139d26e4fcSRobert Mustacchi * | | frtn_t ----+--> mblk free function info
3149d26e4fcSRobert Mustacchi * +-----+-- i40e_rx_data_t * |
3159d26e4fcSRobert Mustacchi * +---------------------------+
3169d26e4fcSRobert Mustacchi *
3179d26e4fcSRobert Mustacchi * -------------
3189d26e4fcSRobert Mustacchi * Lock Ordering
3199d26e4fcSRobert Mustacchi * -------------
3209d26e4fcSRobert Mustacchi *
3219d26e4fcSRobert Mustacchi * In order to ensure that we don't deadlock, the following represents the
3229d26e4fcSRobert Mustacchi * lock order being used. When grabbing locks, follow the following order. Lower
3239d26e4fcSRobert Mustacchi * numbers are more important. Thus, the i40e_glock which is number 0, must be
3249d26e4fcSRobert Mustacchi * taken before any other locks in the driver. On the other hand, the
3259d26e4fcSRobert Mustacchi * i40e_t`i40e_stat_lock, has the highest number because it's the least
3269d26e4fcSRobert Mustacchi * important lock. Note, that just because one lock is higher than another does
3279d26e4fcSRobert Mustacchi * not mean that all intermediary locks are required.
3289d26e4fcSRobert Mustacchi *
3299d26e4fcSRobert Mustacchi * 0) i40e_glock
3309d26e4fcSRobert Mustacchi * 1) i40e_t`i40e_general_lock
3319d26e4fcSRobert Mustacchi *
3329d26e4fcSRobert Mustacchi * 2) i40e_trqpair_t`itrq_rx_lock
3339d26e4fcSRobert Mustacchi * 3) i40e_trqpair_t`itrq_tx_lock
334aa2a44afSPaul Winder * 4) i40e_trqpair_t`itrq_intr_lock
335aa2a44afSPaul Winder * 5) i40e_t`i40e_rx_pending_lock
336aa2a44afSPaul Winder * 6) i40e_trqpair_t`itrq_tcb_lock
3379d26e4fcSRobert Mustacchi *
338aa2a44afSPaul Winder * 7) i40e_t`i40e_stat_lock
3399d26e4fcSRobert Mustacchi *
3409d26e4fcSRobert Mustacchi * Rules and expectations:
3419d26e4fcSRobert Mustacchi *
3429d26e4fcSRobert Mustacchi * 1) A thread holding locks belong to one PF should not hold locks belonging to
3439d26e4fcSRobert Mustacchi * a second. If for some reason this becomes necessary, locks should be grabbed
3449d26e4fcSRobert Mustacchi * based on the list order in the i40e_device_t, which implies that the
3459d26e4fcSRobert Mustacchi * i40e_glock is held.
3469d26e4fcSRobert Mustacchi *
3479d26e4fcSRobert Mustacchi * 2) When grabbing locks between multiple transmit and receive queues, the
3489d26e4fcSRobert Mustacchi * locks for the lowest number transmit/receive queue should be grabbed first.
3499d26e4fcSRobert Mustacchi *
3509d26e4fcSRobert Mustacchi * 3) When grabbing both the transmit and receive lock for a given queue, always
3519d26e4fcSRobert Mustacchi * grab i40e_trqpair_t`itrq_rx_lock before the i40e_trqpair_t`itrq_tx_lock.
3529d26e4fcSRobert Mustacchi *
3539d26e4fcSRobert Mustacchi * 4) The following pairs of locks are not expected to be held at the same time:
3549d26e4fcSRobert Mustacchi *
3559d26e4fcSRobert Mustacchi * o i40e_t`i40e_rx_pending_lock and i40e_trqpair_t`itrq_tcb_lock
356aa2a44afSPaul Winder * o i40e_trqpair_t`itrq_intr_lock is not expected to be held with any
357aa2a44afSPaul Winder * other lock except i40e_t`i40e_general_lock in mc_start(9E) and
358aa2a44afSPaul Winder * mc_stop(9e).
3599d26e4fcSRobert Mustacchi *
3609d26e4fcSRobert Mustacchi * -----------
3619d26e4fcSRobert Mustacchi * Future Work
3629d26e4fcSRobert Mustacchi * -----------
3639d26e4fcSRobert Mustacchi *
3649d26e4fcSRobert Mustacchi * At the moment the i40e_t driver is rather bare bones, allowing us to start
3659d26e4fcSRobert Mustacchi * getting data flowing and folks using it while we develop additional features.
3669d26e4fcSRobert Mustacchi * While bugs have been filed to cover this future work, the following gives an
3679d26e4fcSRobert Mustacchi * overview of expected work:
3689d26e4fcSRobert Mustacchi *
3699d26e4fcSRobert Mustacchi * o DMA binding and breaking up the locking in ring recycling.
3709d26e4fcSRobert Mustacchi * o Enhanced detection of device errors
3719d26e4fcSRobert Mustacchi * o Participation in IRM
3729d26e4fcSRobert Mustacchi * o FMA device reset
3739d26e4fcSRobert Mustacchi * o Stall detection, temperature error detection, etc.
3749d26e4fcSRobert Mustacchi * o More dynamic resource pools
3759d26e4fcSRobert Mustacchi */
3769d26e4fcSRobert Mustacchi
3779d26e4fcSRobert Mustacchi #include "i40e_sw.h"
3789d26e4fcSRobert Mustacchi
37909aee612SRyan Zezeski static char i40e_ident[] = "Intel 10/40Gb Ethernet v1.0.3";
3809d26e4fcSRobert Mustacchi
3819d26e4fcSRobert Mustacchi /*
3829d26e4fcSRobert Mustacchi * The i40e_glock primarily protects the lists below and the i40e_device_t
3839d26e4fcSRobert Mustacchi * structures.
3849d26e4fcSRobert Mustacchi */
3859d26e4fcSRobert Mustacchi static kmutex_t i40e_glock;
3869d26e4fcSRobert Mustacchi static list_t i40e_glist;
3879d26e4fcSRobert Mustacchi static list_t i40e_dlist;
3889d26e4fcSRobert Mustacchi
3899d26e4fcSRobert Mustacchi /*
3909d26e4fcSRobert Mustacchi * Access attributes for register mapping.
3919d26e4fcSRobert Mustacchi */
3929d26e4fcSRobert Mustacchi static ddi_device_acc_attr_t i40e_regs_acc_attr = {
3939d26e4fcSRobert Mustacchi DDI_DEVICE_ATTR_V1,
3949d26e4fcSRobert Mustacchi DDI_STRUCTURE_LE_ACC,
3959d26e4fcSRobert Mustacchi DDI_STRICTORDER_ACC,
3969d26e4fcSRobert Mustacchi DDI_FLAGERR_ACC
3979d26e4fcSRobert Mustacchi };
3989d26e4fcSRobert Mustacchi
3999d26e4fcSRobert Mustacchi /*
4009d26e4fcSRobert Mustacchi * Logging function for this driver.
4019d26e4fcSRobert Mustacchi */
4029d26e4fcSRobert Mustacchi static void
i40e_dev_err(i40e_t * i40e,int level,boolean_t console,const char * fmt,va_list ap)4039d26e4fcSRobert Mustacchi i40e_dev_err(i40e_t *i40e, int level, boolean_t console, const char *fmt,
4049d26e4fcSRobert Mustacchi va_list ap)
4059d26e4fcSRobert Mustacchi {
4069d26e4fcSRobert Mustacchi char buf[1024];
4079d26e4fcSRobert Mustacchi
4089d26e4fcSRobert Mustacchi (void) vsnprintf(buf, sizeof (buf), fmt, ap);
4099d26e4fcSRobert Mustacchi
4109d26e4fcSRobert Mustacchi if (i40e == NULL) {
4119d26e4fcSRobert Mustacchi cmn_err(level, (console) ? "%s: %s" : "!%s: %s",
4129d26e4fcSRobert Mustacchi I40E_MODULE_NAME, buf);
4139d26e4fcSRobert Mustacchi } else {
4149d26e4fcSRobert Mustacchi dev_err(i40e->i40e_dip, level, (console) ? "%s" : "!%s",
4159d26e4fcSRobert Mustacchi buf);
4169d26e4fcSRobert Mustacchi }
4179d26e4fcSRobert Mustacchi }
4189d26e4fcSRobert Mustacchi
4199d26e4fcSRobert Mustacchi /*
4209d26e4fcSRobert Mustacchi * Because there's the stupid trailing-comma problem with the C preprocessor
4219d26e4fcSRobert Mustacchi * and variable arguments, I need to instantiate these. Pardon the redundant
4229d26e4fcSRobert Mustacchi * code.
4239d26e4fcSRobert Mustacchi */
4249d26e4fcSRobert Mustacchi /*PRINTFLIKE2*/
4259d26e4fcSRobert Mustacchi void
i40e_error(i40e_t * i40e,const char * fmt,...)4269d26e4fcSRobert Mustacchi i40e_error(i40e_t *i40e, const char *fmt, ...)
4279d26e4fcSRobert Mustacchi {
4289d26e4fcSRobert Mustacchi va_list ap;
4299d26e4fcSRobert Mustacchi
4309d26e4fcSRobert Mustacchi va_start(ap, fmt);
4319d26e4fcSRobert Mustacchi i40e_dev_err(i40e, CE_WARN, B_FALSE, fmt, ap);
4329d26e4fcSRobert Mustacchi va_end(ap);
4339d26e4fcSRobert Mustacchi }
4349d26e4fcSRobert Mustacchi
4359d26e4fcSRobert Mustacchi /*PRINTFLIKE2*/
4369d26e4fcSRobert Mustacchi void
i40e_log(i40e_t * i40e,const char * fmt,...)4379d26e4fcSRobert Mustacchi i40e_log(i40e_t *i40e, const char *fmt, ...)
4389d26e4fcSRobert Mustacchi {
4399d26e4fcSRobert Mustacchi va_list ap;
4409d26e4fcSRobert Mustacchi
4419d26e4fcSRobert Mustacchi va_start(ap, fmt);
4429d26e4fcSRobert Mustacchi i40e_dev_err(i40e, CE_NOTE, B_FALSE, fmt, ap);
4439d26e4fcSRobert Mustacchi va_end(ap);
4449d26e4fcSRobert Mustacchi }
4459d26e4fcSRobert Mustacchi
4469d26e4fcSRobert Mustacchi /*PRINTFLIKE2*/
4479d26e4fcSRobert Mustacchi void
i40e_notice(i40e_t * i40e,const char * fmt,...)4489d26e4fcSRobert Mustacchi i40e_notice(i40e_t *i40e, const char *fmt, ...)
4499d26e4fcSRobert Mustacchi {
4509d26e4fcSRobert Mustacchi va_list ap;
4519d26e4fcSRobert Mustacchi
4529d26e4fcSRobert Mustacchi va_start(ap, fmt);
4539d26e4fcSRobert Mustacchi i40e_dev_err(i40e, CE_NOTE, B_TRUE, fmt, ap);
4549d26e4fcSRobert Mustacchi va_end(ap);
4559d26e4fcSRobert Mustacchi }
4569d26e4fcSRobert Mustacchi
457b9d34b9dSRobert Mustacchi /*
458b9d34b9dSRobert Mustacchi * Various parts of the driver need to know if the controller is from the X722
459b9d34b9dSRobert Mustacchi * family, which has a few additional capabilities and different programming
460b9d34b9dSRobert Mustacchi * means. We don't consider virtual functions as part of this as they are quite
461b9d34b9dSRobert Mustacchi * different and will require substantially more work.
462b9d34b9dSRobert Mustacchi */
463b9d34b9dSRobert Mustacchi static boolean_t
i40e_is_x722(i40e_t * i40e)464b9d34b9dSRobert Mustacchi i40e_is_x722(i40e_t *i40e)
465b9d34b9dSRobert Mustacchi {
466b9d34b9dSRobert Mustacchi return (i40e->i40e_hw_space.mac.type == I40E_MAC_X722);
467b9d34b9dSRobert Mustacchi }
468b9d34b9dSRobert Mustacchi
4699d26e4fcSRobert Mustacchi static void
i40e_device_rele(i40e_t * i40e)4709d26e4fcSRobert Mustacchi i40e_device_rele(i40e_t *i40e)
4719d26e4fcSRobert Mustacchi {
4729d26e4fcSRobert Mustacchi i40e_device_t *idp = i40e->i40e_device;
4739d26e4fcSRobert Mustacchi
4749d26e4fcSRobert Mustacchi if (idp == NULL)
4759d26e4fcSRobert Mustacchi return;
4769d26e4fcSRobert Mustacchi
4779d26e4fcSRobert Mustacchi mutex_enter(&i40e_glock);
4789d26e4fcSRobert Mustacchi VERIFY(idp->id_nreg > 0);
4799d26e4fcSRobert Mustacchi list_remove(&idp->id_i40e_list, i40e);
4809d26e4fcSRobert Mustacchi idp->id_nreg--;
4819d26e4fcSRobert Mustacchi if (idp->id_nreg == 0) {
4829d26e4fcSRobert Mustacchi list_remove(&i40e_dlist, idp);
4839d26e4fcSRobert Mustacchi list_destroy(&idp->id_i40e_list);
4849d26e4fcSRobert Mustacchi kmem_free(idp->id_rsrcs, sizeof (i40e_switch_rsrc_t) *
4859d26e4fcSRobert Mustacchi idp->id_rsrcs_alloc);
4869d26e4fcSRobert Mustacchi kmem_free(idp, sizeof (i40e_device_t));
4879d26e4fcSRobert Mustacchi }
4889d26e4fcSRobert Mustacchi i40e->i40e_device = NULL;
4899d26e4fcSRobert Mustacchi mutex_exit(&i40e_glock);
4909d26e4fcSRobert Mustacchi }
4919d26e4fcSRobert Mustacchi
4929d26e4fcSRobert Mustacchi static i40e_device_t *
i40e_device_find(i40e_t * i40e,dev_info_t * parent,uint_t bus,uint_t device)4939d26e4fcSRobert Mustacchi i40e_device_find(i40e_t *i40e, dev_info_t *parent, uint_t bus, uint_t device)
4949d26e4fcSRobert Mustacchi {
4959d26e4fcSRobert Mustacchi i40e_device_t *idp;
4969d26e4fcSRobert Mustacchi mutex_enter(&i40e_glock);
4979d26e4fcSRobert Mustacchi for (idp = list_head(&i40e_dlist); idp != NULL;
4989d26e4fcSRobert Mustacchi idp = list_next(&i40e_dlist, idp)) {
4999d26e4fcSRobert Mustacchi if (idp->id_parent == parent && idp->id_pci_bus == bus &&
5009d26e4fcSRobert Mustacchi idp->id_pci_device == device) {
5019d26e4fcSRobert Mustacchi break;
5029d26e4fcSRobert Mustacchi }
5039d26e4fcSRobert Mustacchi }
5049d26e4fcSRobert Mustacchi
5059d26e4fcSRobert Mustacchi if (idp != NULL) {
5069d26e4fcSRobert Mustacchi VERIFY(idp->id_nreg < idp->id_nfuncs);
5079d26e4fcSRobert Mustacchi idp->id_nreg++;
5089d26e4fcSRobert Mustacchi } else {
5099d26e4fcSRobert Mustacchi i40e_hw_t *hw = &i40e->i40e_hw_space;
5109d26e4fcSRobert Mustacchi ASSERT(hw->num_ports > 0);
5119d26e4fcSRobert Mustacchi ASSERT(hw->num_partitions > 0);
5129d26e4fcSRobert Mustacchi
5139d26e4fcSRobert Mustacchi /*
5149d26e4fcSRobert Mustacchi * The Intel common code doesn't exactly keep the number of PCI
5159d26e4fcSRobert Mustacchi * functions. But it calculates it during discovery of
5169d26e4fcSRobert Mustacchi * partitions and ports. So what we do is undo the calculation
5179d26e4fcSRobert Mustacchi * that it does originally, as functions are evenly spread
5189d26e4fcSRobert Mustacchi * across ports in the rare case of partitions.
5199d26e4fcSRobert Mustacchi */
5209d26e4fcSRobert Mustacchi idp = kmem_alloc(sizeof (i40e_device_t), KM_SLEEP);
5219d26e4fcSRobert Mustacchi idp->id_parent = parent;
5229d26e4fcSRobert Mustacchi idp->id_pci_bus = bus;
5239d26e4fcSRobert Mustacchi idp->id_pci_device = device;
5249d26e4fcSRobert Mustacchi idp->id_nfuncs = hw->num_ports * hw->num_partitions;
5259d26e4fcSRobert Mustacchi idp->id_nreg = 1;
5269d26e4fcSRobert Mustacchi idp->id_rsrcs_alloc = i40e->i40e_switch_rsrc_alloc;
5279d26e4fcSRobert Mustacchi idp->id_rsrcs_act = i40e->i40e_switch_rsrc_actual;
5289d26e4fcSRobert Mustacchi idp->id_rsrcs = kmem_alloc(sizeof (i40e_switch_rsrc_t) *
5299d26e4fcSRobert Mustacchi idp->id_rsrcs_alloc, KM_SLEEP);
5309d26e4fcSRobert Mustacchi bcopy(i40e->i40e_switch_rsrcs, idp->id_rsrcs,
5319d26e4fcSRobert Mustacchi sizeof (i40e_switch_rsrc_t) * idp->id_rsrcs_alloc);
5329d26e4fcSRobert Mustacchi list_create(&idp->id_i40e_list, sizeof (i40e_t),
5339d26e4fcSRobert Mustacchi offsetof(i40e_t, i40e_dlink));
5349d26e4fcSRobert Mustacchi
5359d26e4fcSRobert Mustacchi list_insert_tail(&i40e_dlist, idp);
5369d26e4fcSRobert Mustacchi }
5379d26e4fcSRobert Mustacchi
5389d26e4fcSRobert Mustacchi list_insert_tail(&idp->id_i40e_list, i40e);
5399d26e4fcSRobert Mustacchi mutex_exit(&i40e_glock);
5409d26e4fcSRobert Mustacchi
5419d26e4fcSRobert Mustacchi return (idp);
5429d26e4fcSRobert Mustacchi }
5439d26e4fcSRobert Mustacchi
5449d26e4fcSRobert Mustacchi static void
i40e_link_state_set(i40e_t * i40e,link_state_t state)5459d26e4fcSRobert Mustacchi i40e_link_state_set(i40e_t *i40e, link_state_t state)
5469d26e4fcSRobert Mustacchi {
5479d26e4fcSRobert Mustacchi if (i40e->i40e_link_state == state)
5489d26e4fcSRobert Mustacchi return;
5499d26e4fcSRobert Mustacchi
5509d26e4fcSRobert Mustacchi i40e->i40e_link_state = state;
5519d26e4fcSRobert Mustacchi mac_link_update(i40e->i40e_mac_hdl, i40e->i40e_link_state);
5529d26e4fcSRobert Mustacchi }
5539d26e4fcSRobert Mustacchi
5549d26e4fcSRobert Mustacchi /*
5559d26e4fcSRobert Mustacchi * This is a basic link check routine. Mostly we're using this just to see
5569d26e4fcSRobert Mustacchi * if we can get any accurate information about the state of the link being
5579d26e4fcSRobert Mustacchi * up or down, as well as updating the link state, speed, etc. information.
5589d26e4fcSRobert Mustacchi */
5599d26e4fcSRobert Mustacchi void
i40e_link_check(i40e_t * i40e)5609d26e4fcSRobert Mustacchi i40e_link_check(i40e_t *i40e)
5619d26e4fcSRobert Mustacchi {
5629d26e4fcSRobert Mustacchi i40e_hw_t *hw = &i40e->i40e_hw_space;
5639d26e4fcSRobert Mustacchi boolean_t ls;
5649d26e4fcSRobert Mustacchi int ret;
5659d26e4fcSRobert Mustacchi
5669d26e4fcSRobert Mustacchi ASSERT(MUTEX_HELD(&i40e->i40e_general_lock));
5679d26e4fcSRobert Mustacchi
5689d26e4fcSRobert Mustacchi hw->phy.get_link_info = B_TRUE;
5699d26e4fcSRobert Mustacchi if ((ret = i40e_get_link_status(hw, &ls)) != I40E_SUCCESS) {
5709d26e4fcSRobert Mustacchi i40e->i40e_s_link_status_errs++;
5719d26e4fcSRobert Mustacchi i40e->i40e_s_link_status_lasterr = ret;
5729d26e4fcSRobert Mustacchi return;
5739d26e4fcSRobert Mustacchi }
5749d26e4fcSRobert Mustacchi
5759d26e4fcSRobert Mustacchi /*
5769d26e4fcSRobert Mustacchi * Firmware abstracts all of the mac and phy information for us, so we
5779d26e4fcSRobert Mustacchi * can use i40e_get_link_status to determine the current state.
5789d26e4fcSRobert Mustacchi */
5799d26e4fcSRobert Mustacchi if (ls == B_TRUE) {
5809d26e4fcSRobert Mustacchi enum i40e_aq_link_speed speed;
5819d26e4fcSRobert Mustacchi
5829d26e4fcSRobert Mustacchi speed = i40e_get_link_speed(hw);
5839d26e4fcSRobert Mustacchi
5849d26e4fcSRobert Mustacchi /*
5859d26e4fcSRobert Mustacchi * Translate from an i40e value to a value in Mbits/s.
5869d26e4fcSRobert Mustacchi */
5879d26e4fcSRobert Mustacchi switch (speed) {
5889d26e4fcSRobert Mustacchi case I40E_LINK_SPEED_100MB:
5899d26e4fcSRobert Mustacchi i40e->i40e_link_speed = 100;
5909d26e4fcSRobert Mustacchi break;
5919d26e4fcSRobert Mustacchi case I40E_LINK_SPEED_1GB:
5929d26e4fcSRobert Mustacchi i40e->i40e_link_speed = 1000;
5939d26e4fcSRobert Mustacchi break;
594*df36e06dSRobert Mustacchi case I40E_LINK_SPEED_2_5GB:
595*df36e06dSRobert Mustacchi i40e->i40e_link_speed = 2500;
596*df36e06dSRobert Mustacchi break;
597*df36e06dSRobert Mustacchi case I40E_LINK_SPEED_5GB:
598*df36e06dSRobert Mustacchi i40e->i40e_link_speed = 5000;
599*df36e06dSRobert Mustacchi break;
6009d26e4fcSRobert Mustacchi case I40E_LINK_SPEED_10GB:
6019d26e4fcSRobert Mustacchi i40e->i40e_link_speed = 10000;
6029d26e4fcSRobert Mustacchi break;
6039d26e4fcSRobert Mustacchi case I40E_LINK_SPEED_20GB:
6049d26e4fcSRobert Mustacchi i40e->i40e_link_speed = 20000;
6059d26e4fcSRobert Mustacchi break;
6069d26e4fcSRobert Mustacchi case I40E_LINK_SPEED_40GB:
6079d26e4fcSRobert Mustacchi i40e->i40e_link_speed = 40000;
6089d26e4fcSRobert Mustacchi break;
6093d75a287SRobert Mustacchi case I40E_LINK_SPEED_25GB:
6103d75a287SRobert Mustacchi i40e->i40e_link_speed = 25000;
6113d75a287SRobert Mustacchi break;
6129d26e4fcSRobert Mustacchi default:
6139d26e4fcSRobert Mustacchi i40e->i40e_link_speed = 0;
6149d26e4fcSRobert Mustacchi break;
6159d26e4fcSRobert Mustacchi }
6169d26e4fcSRobert Mustacchi
6179d26e4fcSRobert Mustacchi /*
6189d26e4fcSRobert Mustacchi * At this time, hardware does not support half-duplex
6199d26e4fcSRobert Mustacchi * operation, hence why we don't ask the hardware about our
6209d26e4fcSRobert Mustacchi * current speed.
6219d26e4fcSRobert Mustacchi */
6229d26e4fcSRobert Mustacchi i40e->i40e_link_duplex = LINK_DUPLEX_FULL;
6239d26e4fcSRobert Mustacchi i40e_link_state_set(i40e, LINK_STATE_UP);
6249d26e4fcSRobert Mustacchi } else {
6259d26e4fcSRobert Mustacchi i40e->i40e_link_speed = 0;
6269d26e4fcSRobert Mustacchi i40e->i40e_link_duplex = 0;
6279d26e4fcSRobert Mustacchi i40e_link_state_set(i40e, LINK_STATE_DOWN);
6289d26e4fcSRobert Mustacchi }
6299d26e4fcSRobert Mustacchi }
6309d26e4fcSRobert Mustacchi
6319d26e4fcSRobert Mustacchi static void
i40e_rem_intrs(i40e_t * i40e)6329d26e4fcSRobert Mustacchi i40e_rem_intrs(i40e_t *i40e)
6339d26e4fcSRobert Mustacchi {
6349d26e4fcSRobert Mustacchi int i, rc;
6359d26e4fcSRobert Mustacchi
6369d26e4fcSRobert Mustacchi for (i = 0; i < i40e->i40e_intr_count; i++) {
6379d26e4fcSRobert Mustacchi rc = ddi_intr_free(i40e->i40e_intr_handles[i]);
6389d26e4fcSRobert Mustacchi if (rc != DDI_SUCCESS) {
6399d26e4fcSRobert Mustacchi i40e_log(i40e, "failed to free interrupt %d: %d",
6409d26e4fcSRobert Mustacchi i, rc);
6419d26e4fcSRobert Mustacchi }
6429d26e4fcSRobert Mustacchi }
6439d26e4fcSRobert Mustacchi
6449d26e4fcSRobert Mustacchi kmem_free(i40e->i40e_intr_handles, i40e->i40e_intr_size);
6459d26e4fcSRobert Mustacchi i40e->i40e_intr_handles = NULL;
6469d26e4fcSRobert Mustacchi }
6479d26e4fcSRobert Mustacchi
6489d26e4fcSRobert Mustacchi static void
i40e_rem_intr_handlers(i40e_t * i40e)6499d26e4fcSRobert Mustacchi i40e_rem_intr_handlers(i40e_t *i40e)
6509d26e4fcSRobert Mustacchi {
6519d26e4fcSRobert Mustacchi int i, rc;
6529d26e4fcSRobert Mustacchi
6539d26e4fcSRobert Mustacchi for (i = 0; i < i40e->i40e_intr_count; i++) {
6549d26e4fcSRobert Mustacchi rc = ddi_intr_remove_handler(i40e->i40e_intr_handles[i]);
6559d26e4fcSRobert Mustacchi if (rc != DDI_SUCCESS) {
6569d26e4fcSRobert Mustacchi i40e_log(i40e, "failed to remove interrupt %d: %d",
6579d26e4fcSRobert Mustacchi i, rc);
6589d26e4fcSRobert Mustacchi }
6599d26e4fcSRobert Mustacchi }
6609d26e4fcSRobert Mustacchi }
6619d26e4fcSRobert Mustacchi
6629d26e4fcSRobert Mustacchi /*
6639d26e4fcSRobert Mustacchi * illumos Fault Management Architecture (FMA) support.
6649d26e4fcSRobert Mustacchi */
6659d26e4fcSRobert Mustacchi
6669d26e4fcSRobert Mustacchi int
i40e_check_acc_handle(ddi_acc_handle_t handle)6679d26e4fcSRobert Mustacchi i40e_check_acc_handle(ddi_acc_handle_t handle)
6689d26e4fcSRobert Mustacchi {
6699d26e4fcSRobert Mustacchi ddi_fm_error_t de;
6709d26e4fcSRobert Mustacchi
6719d26e4fcSRobert Mustacchi ddi_fm_acc_err_get(handle, &de, DDI_FME_VERSION);
6729d26e4fcSRobert Mustacchi ddi_fm_acc_err_clear(handle, DDI_FME_VERSION);
6739d26e4fcSRobert Mustacchi return (de.fme_status);
6749d26e4fcSRobert Mustacchi }
6759d26e4fcSRobert Mustacchi
6769d26e4fcSRobert Mustacchi int
i40e_check_dma_handle(ddi_dma_handle_t handle)6779d26e4fcSRobert Mustacchi i40e_check_dma_handle(ddi_dma_handle_t handle)
6789d26e4fcSRobert Mustacchi {
6799d26e4fcSRobert Mustacchi ddi_fm_error_t de;
6809d26e4fcSRobert Mustacchi
6819d26e4fcSRobert Mustacchi ddi_fm_dma_err_get(handle, &de, DDI_FME_VERSION);
6829d26e4fcSRobert Mustacchi return (de.fme_status);
6839d26e4fcSRobert Mustacchi }
6849d26e4fcSRobert Mustacchi
6859d26e4fcSRobert Mustacchi /*
6869d26e4fcSRobert Mustacchi * Fault service error handling callback function.
6879d26e4fcSRobert Mustacchi */
6889d26e4fcSRobert Mustacchi /* ARGSUSED */
6899d26e4fcSRobert Mustacchi static int
i40e_fm_error_cb(dev_info_t * dip,ddi_fm_error_t * err,const void * impl_data)6909d26e4fcSRobert Mustacchi i40e_fm_error_cb(dev_info_t *dip, ddi_fm_error_t *err, const void *impl_data)
6919d26e4fcSRobert Mustacchi {
6929d26e4fcSRobert Mustacchi pci_ereport_post(dip, err, NULL);
6939d26e4fcSRobert Mustacchi return (err->fme_status);
6949d26e4fcSRobert Mustacchi }
6959d26e4fcSRobert Mustacchi
6969d26e4fcSRobert Mustacchi static void
i40e_fm_init(i40e_t * i40e)6979d26e4fcSRobert Mustacchi i40e_fm_init(i40e_t *i40e)
6989d26e4fcSRobert Mustacchi {
6999d26e4fcSRobert Mustacchi ddi_iblock_cookie_t iblk;
7009d26e4fcSRobert Mustacchi
7019d26e4fcSRobert Mustacchi i40e->i40e_fm_capabilities = ddi_prop_get_int(DDI_DEV_T_ANY,
7029d26e4fcSRobert Mustacchi i40e->i40e_dip, DDI_PROP_DONTPASS, "fm_capable",
7039d26e4fcSRobert Mustacchi DDI_FM_EREPORT_CAPABLE | DDI_FM_ACCCHK_CAPABLE |
7049d26e4fcSRobert Mustacchi DDI_FM_DMACHK_CAPABLE | DDI_FM_ERRCB_CAPABLE);
7059d26e4fcSRobert Mustacchi
7069d26e4fcSRobert Mustacchi if (i40e->i40e_fm_capabilities < 0) {
7079d26e4fcSRobert Mustacchi i40e->i40e_fm_capabilities = 0;
7089d26e4fcSRobert Mustacchi } else if (i40e->i40e_fm_capabilities > 0xf) {
7099d26e4fcSRobert Mustacchi i40e->i40e_fm_capabilities = DDI_FM_EREPORT_CAPABLE |
7109d26e4fcSRobert Mustacchi DDI_FM_ACCCHK_CAPABLE | DDI_FM_DMACHK_CAPABLE |
7119d26e4fcSRobert Mustacchi DDI_FM_ERRCB_CAPABLE;
7129d26e4fcSRobert Mustacchi }
7139d26e4fcSRobert Mustacchi
7149d26e4fcSRobert Mustacchi /*
7159d26e4fcSRobert Mustacchi * Only register with IO Fault Services if we have some capability
7169d26e4fcSRobert Mustacchi */
7179d26e4fcSRobert Mustacchi if (i40e->i40e_fm_capabilities & DDI_FM_ACCCHK_CAPABLE) {
7189d26e4fcSRobert Mustacchi i40e_regs_acc_attr.devacc_attr_access = DDI_FLAGERR_ACC;
7199d26e4fcSRobert Mustacchi } else {
7209d26e4fcSRobert Mustacchi i40e_regs_acc_attr.devacc_attr_access = DDI_DEFAULT_ACC;
7219d26e4fcSRobert Mustacchi }
7229d26e4fcSRobert Mustacchi
7239d26e4fcSRobert Mustacchi if (i40e->i40e_fm_capabilities) {
7249d26e4fcSRobert Mustacchi ddi_fm_init(i40e->i40e_dip, &i40e->i40e_fm_capabilities, &iblk);
7259d26e4fcSRobert Mustacchi
7269d26e4fcSRobert Mustacchi if (DDI_FM_EREPORT_CAP(i40e->i40e_fm_capabilities) ||
7279d26e4fcSRobert Mustacchi DDI_FM_ERRCB_CAP(i40e->i40e_fm_capabilities)) {
7289d26e4fcSRobert Mustacchi pci_ereport_setup(i40e->i40e_dip);
7299d26e4fcSRobert Mustacchi }
7309d26e4fcSRobert Mustacchi
7319d26e4fcSRobert Mustacchi if (DDI_FM_ERRCB_CAP(i40e->i40e_fm_capabilities)) {
7329d26e4fcSRobert Mustacchi ddi_fm_handler_register(i40e->i40e_dip,
7339d26e4fcSRobert Mustacchi i40e_fm_error_cb, (void*)i40e);
7349d26e4fcSRobert Mustacchi }
7359d26e4fcSRobert Mustacchi }
7369d26e4fcSRobert Mustacchi
7379d26e4fcSRobert Mustacchi if (i40e->i40e_fm_capabilities & DDI_FM_DMACHK_CAPABLE) {
7389d26e4fcSRobert Mustacchi i40e_init_dma_attrs(i40e, B_TRUE);
7399d26e4fcSRobert Mustacchi } else {
7409d26e4fcSRobert Mustacchi i40e_init_dma_attrs(i40e, B_FALSE);
7419d26e4fcSRobert Mustacchi }
7429d26e4fcSRobert Mustacchi }
7439d26e4fcSRobert Mustacchi
7449d26e4fcSRobert Mustacchi static void
i40e_fm_fini(i40e_t * i40e)7459d26e4fcSRobert Mustacchi i40e_fm_fini(i40e_t *i40e)
7469d26e4fcSRobert Mustacchi {
7479d26e4fcSRobert Mustacchi if (i40e->i40e_fm_capabilities) {
7489d26e4fcSRobert Mustacchi
7499d26e4fcSRobert Mustacchi if (DDI_FM_EREPORT_CAP(i40e->i40e_fm_capabilities) ||
7509d26e4fcSRobert Mustacchi DDI_FM_ERRCB_CAP(i40e->i40e_fm_capabilities))
7519d26e4fcSRobert Mustacchi pci_ereport_teardown(i40e->i40e_dip);
7529d26e4fcSRobert Mustacchi
7539d26e4fcSRobert Mustacchi if (DDI_FM_ERRCB_CAP(i40e->i40e_fm_capabilities))
7549d26e4fcSRobert Mustacchi ddi_fm_handler_unregister(i40e->i40e_dip);
7559d26e4fcSRobert Mustacchi
7569d26e4fcSRobert Mustacchi ddi_fm_fini(i40e->i40e_dip);
7579d26e4fcSRobert Mustacchi }
7589d26e4fcSRobert Mustacchi }
7599d26e4fcSRobert Mustacchi
7609d26e4fcSRobert Mustacchi void
i40e_fm_ereport(i40e_t * i40e,char * detail)7619d26e4fcSRobert Mustacchi i40e_fm_ereport(i40e_t *i40e, char *detail)
7629d26e4fcSRobert Mustacchi {
7639d26e4fcSRobert Mustacchi uint64_t ena;
7649d26e4fcSRobert Mustacchi char buf[FM_MAX_CLASS];
7659d26e4fcSRobert Mustacchi
7669d26e4fcSRobert Mustacchi (void) snprintf(buf, FM_MAX_CLASS, "%s.%s", DDI_FM_DEVICE, detail);
7679d26e4fcSRobert Mustacchi ena = fm_ena_generate(0, FM_ENA_FMT1);
7689d26e4fcSRobert Mustacchi if (DDI_FM_EREPORT_CAP(i40e->i40e_fm_capabilities)) {
7699d26e4fcSRobert Mustacchi ddi_fm_ereport_post(i40e->i40e_dip, buf, ena, DDI_NOSLEEP,
7709d26e4fcSRobert Mustacchi FM_VERSION, DATA_TYPE_UINT8, FM_EREPORT_VERS0, NULL);
7719d26e4fcSRobert Mustacchi }
7729d26e4fcSRobert Mustacchi }
7739d26e4fcSRobert Mustacchi
7749d26e4fcSRobert Mustacchi /*
77509aee612SRyan Zezeski * Here we're trying to set the SEID of the default VSI. In general,
77609aee612SRyan Zezeski * when we come through and look at this shortly after attach, we
77709aee612SRyan Zezeski * expect there to only be a single element present, which is the
77809aee612SRyan Zezeski * default VSI. Importantly, each PF seems to not see any other
77909aee612SRyan Zezeski * devices, in part because of the simple switch mode that we're
78009aee612SRyan Zezeski * using. If for some reason, we see more artifacts, we'll need to
78109aee612SRyan Zezeski * revisit what we're doing here.
7829d26e4fcSRobert Mustacchi */
78309aee612SRyan Zezeski static boolean_t
i40e_set_def_vsi_seid(i40e_t * i40e)78409aee612SRyan Zezeski i40e_set_def_vsi_seid(i40e_t *i40e)
7859d26e4fcSRobert Mustacchi {
7869d26e4fcSRobert Mustacchi i40e_hw_t *hw = &i40e->i40e_hw_space;
7879d26e4fcSRobert Mustacchi struct i40e_aqc_get_switch_config_resp *sw_config;
7889d26e4fcSRobert Mustacchi uint8_t aq_buf[I40E_AQ_LARGE_BUF];
7899d26e4fcSRobert Mustacchi uint16_t next = 0;
7909d26e4fcSRobert Mustacchi int rc;
7919d26e4fcSRobert Mustacchi
7929d26e4fcSRobert Mustacchi /* LINTED: E_BAD_PTR_CAST_ALIGN */
7939d26e4fcSRobert Mustacchi sw_config = (struct i40e_aqc_get_switch_config_resp *)aq_buf;
7949d26e4fcSRobert Mustacchi rc = i40e_aq_get_switch_config(hw, sw_config, sizeof (aq_buf), &next,
7959d26e4fcSRobert Mustacchi NULL);
7969d26e4fcSRobert Mustacchi if (rc != I40E_SUCCESS) {
7979d26e4fcSRobert Mustacchi i40e_error(i40e, "i40e_aq_get_switch_config() failed %d: %d",
7989d26e4fcSRobert Mustacchi rc, hw->aq.asq_last_status);
79909aee612SRyan Zezeski return (B_FALSE);
8009d26e4fcSRobert Mustacchi }
8019d26e4fcSRobert Mustacchi
8029d26e4fcSRobert Mustacchi if (LE_16(sw_config->header.num_reported) != 1) {
8039d26e4fcSRobert Mustacchi i40e_error(i40e, "encountered multiple (%d) switching units "
8049d26e4fcSRobert Mustacchi "during attach, not proceeding",
8059d26e4fcSRobert Mustacchi LE_16(sw_config->header.num_reported));
80609aee612SRyan Zezeski return (B_FALSE);
80709aee612SRyan Zezeski }
80809aee612SRyan Zezeski
80909aee612SRyan Zezeski I40E_DEF_VSI_SEID(i40e) = sw_config->element[0].seid;
81009aee612SRyan Zezeski return (B_TRUE);
81109aee612SRyan Zezeski }
81209aee612SRyan Zezeski
81309aee612SRyan Zezeski /*
81409aee612SRyan Zezeski * Get the SEID of the uplink MAC.
81509aee612SRyan Zezeski */
81609aee612SRyan Zezeski static int
i40e_get_mac_seid(i40e_t * i40e)81709aee612SRyan Zezeski i40e_get_mac_seid(i40e_t *i40e)
81809aee612SRyan Zezeski {
81909aee612SRyan Zezeski i40e_hw_t *hw = &i40e->i40e_hw_space;
82009aee612SRyan Zezeski struct i40e_aqc_get_switch_config_resp *sw_config;
82109aee612SRyan Zezeski uint8_t aq_buf[I40E_AQ_LARGE_BUF];
822