1/*-
2 * Copyright (c) 2013 Tsubai Masanari
3 * Copyright (c) 2013 Bryan Venteicher <bryanv@FreeBSD.org>
4 * Copyright (c) 2018 Patrick Kelsey
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 *
18 * $OpenBSD: src/sys/dev/pci/if_vmx.c,v 1.11 2013/06/22 00:28:10 uebayasi Exp $
19 */
20
21/* Driver for VMware vmxnet3 virtual ethernet devices. */
22
23#include <sys/cdefs.h>
24__FBSDID("$FreeBSD$");
25
26#include "opt_rss.h"
27
28#include <sys/param.h>
29#include <sys/systm.h>
30#include <sys/kernel.h>
31#include <sys/endian.h>
32#include <sys/sockio.h>
33#include <sys/mbuf.h>
34#include <sys/malloc.h>
35#include <sys/module.h>
36#include <sys/socket.h>
37#include <sys/sysctl.h>
38#include <sys/smp.h>
39#include <vm/vm.h>
40#include <vm/pmap.h>
41
42#include <net/ethernet.h>
43#include <net/if.h>
44#include <net/if_var.h>
45#include <net/if_arp.h>
46#include <net/if_dl.h>
47#include <net/if_types.h>
48#include <net/if_media.h>
49#include <net/if_vlan_var.h>
50#include <net/iflib.h>
51#ifdef RSS
52#include <net/rss_config.h>
53#endif
54
55#include <netinet/in_systm.h>
56#include <netinet/in.h>
57#include <netinet/ip.h>
58#include <netinet/ip6.h>
59#include <netinet6/ip6_var.h>
60#include <netinet/udp.h>
61#include <netinet/tcp.h>
62
63#include <machine/bus.h>
64#include <machine/resource.h>
65#include <sys/bus.h>
66#include <sys/rman.h>
67
68#include <dev/pci/pcireg.h>
69#include <dev/pci/pcivar.h>
70
71#include "ifdi_if.h"
72
73#include "if_vmxreg.h"
74#include "if_vmxvar.h"
75
76#include "opt_inet.h"
77#include "opt_inet6.h"
78
79#define VMXNET3_VMWARE_VENDOR_ID	0x15AD
80#define VMXNET3_VMWARE_DEVICE_ID	0x07B0
81
82static pci_vendor_info_t vmxnet3_vendor_info_array[] =
83{
84	PVID(VMXNET3_VMWARE_VENDOR_ID, VMXNET3_VMWARE_DEVICE_ID, "VMware VMXNET3 Ethernet Adapter"),
85	/* required last entry */
86	PVID_END
87};
88
89static void	*vmxnet3_register(device_t);
90static int	vmxnet3_attach_pre(if_ctx_t);
91static int	vmxnet3_msix_intr_assign(if_ctx_t, int);
92static void	vmxnet3_free_irqs(struct vmxnet3_softc *);
93static int	vmxnet3_attach_post(if_ctx_t);
94static int	vmxnet3_detach(if_ctx_t);
95static int	vmxnet3_shutdown(if_ctx_t);
96static int	vmxnet3_suspend(if_ctx_t);
97static int	vmxnet3_resume(if_ctx_t);
98
99static int	vmxnet3_alloc_resources(struct vmxnet3_softc *);
100static void	vmxnet3_free_resources(struct vmxnet3_softc *);
101static int	vmxnet3_check_version(struct vmxnet3_softc *);
102static void	vmxnet3_set_interrupt_idx(struct vmxnet3_softc *);
103
104static int	vmxnet3_queues_shared_alloc(struct vmxnet3_softc *);
105static void	vmxnet3_init_txq(struct vmxnet3_softc *, int);
106static int	vmxnet3_tx_queues_alloc(if_ctx_t, caddr_t *, uint64_t *, int, int);
107static void	vmxnet3_init_rxq(struct vmxnet3_softc *, int, int);
108static int	vmxnet3_rx_queues_alloc(if_ctx_t, caddr_t *, uint64_t *, int, int);
109static void	vmxnet3_queues_free(if_ctx_t);
110
111static int	vmxnet3_alloc_shared_data(struct vmxnet3_softc *);
112static void	vmxnet3_free_shared_data(struct vmxnet3_softc *);
113static int	vmxnet3_alloc_mcast_table(struct vmxnet3_softc *);
114static void	vmxnet3_free_mcast_table(struct vmxnet3_softc *);
115static void	vmxnet3_init_shared_data(struct vmxnet3_softc *);
116static void	vmxnet3_reinit_rss_shared_data(struct vmxnet3_softc *);
117static void	vmxnet3_reinit_shared_data(struct vmxnet3_softc *);
118static int	vmxnet3_alloc_data(struct vmxnet3_softc *);
119static void	vmxnet3_free_data(struct vmxnet3_softc *);
120
121static void	vmxnet3_evintr(struct vmxnet3_softc *);
122static int	vmxnet3_isc_txd_encap(void *, if_pkt_info_t);
123static void	vmxnet3_isc_txd_flush(void *, uint16_t, qidx_t);
124static int	vmxnet3_isc_txd_credits_update(void *, uint16_t, bool);
125static int	vmxnet3_isc_rxd_available(void *, uint16_t, qidx_t, qidx_t);
126static int	vmxnet3_isc_rxd_pkt_get(void *, if_rxd_info_t);
127static void	vmxnet3_isc_rxd_refill(void *, if_rxd_update_t);
128static void	vmxnet3_isc_rxd_flush(void *, uint16_t, uint8_t, qidx_t);
129static int	vmxnet3_legacy_intr(void *);
130static int	vmxnet3_rxq_intr(void *);
131static int	vmxnet3_event_intr(void *);
132
133static void	vmxnet3_stop(if_ctx_t);
134
135static void	vmxnet3_txinit(struct vmxnet3_softc *, struct vmxnet3_txqueue *);
136static void	vmxnet3_rxinit(struct vmxnet3_softc *, struct vmxnet3_rxqueue *);
137static void	vmxnet3_reinit_queues(struct vmxnet3_softc *);
138static int	vmxnet3_enable_device(struct vmxnet3_softc *);
139static void	vmxnet3_reinit_rxfilters(struct vmxnet3_softc *);
140static void	vmxnet3_init(if_ctx_t);
141static void	vmxnet3_multi_set(if_ctx_t);
142static int	vmxnet3_mtu_set(if_ctx_t, uint32_t);
143static void	vmxnet3_media_status(if_ctx_t, struct ifmediareq *);
144static int	vmxnet3_media_change(if_ctx_t);
145static int	vmxnet3_promisc_set(if_ctx_t, int);
146static uint64_t	vmxnet3_get_counter(if_ctx_t, ift_counter);
147static void	vmxnet3_update_admin_status(if_ctx_t);
148static void	vmxnet3_txq_timer(if_ctx_t, uint16_t);
149
150static void	vmxnet3_update_vlan_filter(struct vmxnet3_softc *, int,
151		    uint16_t);
152static void	vmxnet3_vlan_register(if_ctx_t, uint16_t);
153static void	vmxnet3_vlan_unregister(if_ctx_t, uint16_t);
154static void	vmxnet3_set_rxfilter(struct vmxnet3_softc *, int);
155
156static void	vmxnet3_refresh_host_stats(struct vmxnet3_softc *);
157static int	vmxnet3_link_is_up(struct vmxnet3_softc *);
158static void	vmxnet3_link_status(struct vmxnet3_softc *);
159static void	vmxnet3_set_lladdr(struct vmxnet3_softc *);
160static void	vmxnet3_get_lladdr(struct vmxnet3_softc *);
161
162static void	vmxnet3_setup_txq_sysctl(struct vmxnet3_txqueue *,
163		    struct sysctl_ctx_list *, struct sysctl_oid_list *);
164static void	vmxnet3_setup_rxq_sysctl(struct vmxnet3_rxqueue *,
165		    struct sysctl_ctx_list *, struct sysctl_oid_list *);
166static void	vmxnet3_setup_queue_sysctl(struct vmxnet3_softc *,
167		    struct sysctl_ctx_list *, struct sysctl_oid_list *);
168static void	vmxnet3_setup_sysctl(struct vmxnet3_softc *);
169
170static void	vmxnet3_write_bar0(struct vmxnet3_softc *, bus_size_t,
171		    uint32_t);
172static uint32_t	vmxnet3_read_bar1(struct vmxnet3_softc *, bus_size_t);
173static void	vmxnet3_write_bar1(struct vmxnet3_softc *, bus_size_t,
174		    uint32_t);
175static void	vmxnet3_write_cmd(struct vmxnet3_softc *, uint32_t);
176static uint32_t	vmxnet3_read_cmd(struct vmxnet3_softc *, uint32_t);
177
178static int	vmxnet3_tx_queue_intr_enable(if_ctx_t, uint16_t);
179static int	vmxnet3_rx_queue_intr_enable(if_ctx_t, uint16_t);
180static void	vmxnet3_link_intr_enable(if_ctx_t);
181static void	vmxnet3_enable_intr(struct vmxnet3_softc *, int);
182static void	vmxnet3_disable_intr(struct vmxnet3_softc *, int);
183static void	vmxnet3_intr_enable_all(if_ctx_t);
184static void	vmxnet3_intr_disable_all(if_ctx_t);
185
186typedef enum {
187	VMXNET3_BARRIER_RD,
188	VMXNET3_BARRIER_WR,
189	VMXNET3_BARRIER_RDWR,
190} vmxnet3_barrier_t;
191
192static void	vmxnet3_barrier(struct vmxnet3_softc *, vmxnet3_barrier_t);
193
194static device_method_t vmxnet3_methods[] = {
195	/* Device interface */
196	DEVMETHOD(device_register, vmxnet3_register),
197	DEVMETHOD(device_probe, iflib_device_probe),
198	DEVMETHOD(device_attach, iflib_device_attach),
199	DEVMETHOD(device_detach, iflib_device_detach),
200	DEVMETHOD(device_shutdown, iflib_device_shutdown),
201	DEVMETHOD(device_suspend, iflib_device_suspend),
202	DEVMETHOD(device_resume, iflib_device_resume),
203	DEVMETHOD_END
204};
205
206static driver_t vmxnet3_driver = {
207	"vmx", vmxnet3_methods, sizeof(struct vmxnet3_softc)
208};
209
210static devclass_t vmxnet3_devclass;
211DRIVER_MODULE(vmx, pci, vmxnet3_driver, vmxnet3_devclass, 0, 0);
212IFLIB_PNP_INFO(pci, vmx, vmxnet3_vendor_info_array);
213MODULE_VERSION(vmx, 2);
214
215MODULE_DEPEND(vmx, pci, 1, 1, 1);
216MODULE_DEPEND(vmx, ether, 1, 1, 1);
217MODULE_DEPEND(vmx, iflib, 1, 1, 1);
218
219static device_method_t vmxnet3_iflib_methods[] = {
220	DEVMETHOD(ifdi_tx_queues_alloc, vmxnet3_tx_queues_alloc),
221	DEVMETHOD(ifdi_rx_queues_alloc, vmxnet3_rx_queues_alloc),
222	DEVMETHOD(ifdi_queues_free, vmxnet3_queues_free),
223
224	DEVMETHOD(ifdi_attach_pre, vmxnet3_attach_pre),
225	DEVMETHOD(ifdi_attach_post, vmxnet3_attach_post),
226	DEVMETHOD(ifdi_detach, vmxnet3_detach),
227
228	DEVMETHOD(ifdi_init, vmxnet3_init),
229	DEVMETHOD(ifdi_stop, vmxnet3_stop),
230	DEVMETHOD(ifdi_multi_set, vmxnet3_multi_set),
231	DEVMETHOD(ifdi_mtu_set, vmxnet3_mtu_set),
232	DEVMETHOD(ifdi_media_status, vmxnet3_media_status),
233	DEVMETHOD(ifdi_media_change, vmxnet3_media_change),
234	DEVMETHOD(ifdi_promisc_set, vmxnet3_promisc_set),
235	DEVMETHOD(ifdi_get_counter, vmxnet3_get_counter),
236	DEVMETHOD(ifdi_update_admin_status, vmxnet3_update_admin_status),
237	DEVMETHOD(ifdi_timer, vmxnet3_txq_timer),
238
239	DEVMETHOD(ifdi_tx_queue_intr_enable, vmxnet3_tx_queue_intr_enable),
240	DEVMETHOD(ifdi_rx_queue_intr_enable, vmxnet3_rx_queue_intr_enable),
241	DEVMETHOD(ifdi_link_intr_enable, vmxnet3_link_intr_enable),
242	DEVMETHOD(ifdi_intr_enable, vmxnet3_intr_enable_all),
243	DEVMETHOD(ifdi_intr_disable, vmxnet3_intr_disable_all),
244	DEVMETHOD(ifdi_msix_intr_assign, vmxnet3_msix_intr_assign),
245
246	DEVMETHOD(ifdi_vlan_register, vmxnet3_vlan_register),
247	DEVMETHOD(ifdi_vlan_unregister, vmxnet3_vlan_unregister),
248
249	DEVMETHOD(ifdi_shutdown, vmxnet3_shutdown),
250	DEVMETHOD(ifdi_suspend, vmxnet3_suspend),
251	DEVMETHOD(ifdi_resume, vmxnet3_resume),
252
253	DEVMETHOD_END
254};
255
256static driver_t vmxnet3_iflib_driver = {
257	"vmx", vmxnet3_iflib_methods, sizeof(struct vmxnet3_softc)
258};
259
260struct if_txrx vmxnet3_txrx = {
261	.ift_txd_encap = vmxnet3_isc_txd_encap,
262	.ift_txd_flush = vmxnet3_isc_txd_flush,
263	.ift_txd_credits_update = vmxnet3_isc_txd_credits_update,
264	.ift_rxd_available = vmxnet3_isc_rxd_available,
265	.ift_rxd_pkt_get = vmxnet3_isc_rxd_pkt_get,
266	.ift_rxd_refill = vmxnet3_isc_rxd_refill,
267	.ift_rxd_flush = vmxnet3_isc_rxd_flush,
268	.ift_legacy_intr = vmxnet3_legacy_intr
269};
270
271static struct if_shared_ctx vmxnet3_sctx_init = {
272	.isc_magic = IFLIB_MAGIC,
273	.isc_q_align = 512,
274
275	.isc_tx_maxsize = VMXNET3_TX_MAXSIZE,
276	.isc_tx_maxsegsize = VMXNET3_TX_MAXSEGSIZE,
277	.isc_tso_maxsize = VMXNET3_TSO_MAXSIZE + sizeof(struct ether_vlan_header),
278	.isc_tso_maxsegsize = VMXNET3_TX_MAXSEGSIZE,
279
280	/*
281	 * These values are used to configure the busdma tag used for
282	 * receive descriptors.  Each receive descriptor only points to one
283	 * buffer.
284	 */
285	.isc_rx_maxsize = VMXNET3_RX_MAXSEGSIZE, /* One buf per descriptor */
286	.isc_rx_nsegments = 1,  /* One mapping per descriptor */
287	.isc_rx_maxsegsize = VMXNET3_RX_MAXSEGSIZE,
288
289	.isc_admin_intrcnt = 1,
290	.isc_vendor_info = vmxnet3_vendor_info_array,
291	.isc_driver_version = "2",
292	.isc_driver = &vmxnet3_iflib_driver,
293	.isc_flags = IFLIB_HAS_RXCQ | IFLIB_HAS_TXCQ | IFLIB_SINGLE_IRQ_RX_ONLY,
294
295	/*
296	 * Number of receive queues per receive queue set, with associated
297	 * descriptor settings for each.
298	 */
299	.isc_nrxqs = 3,
300	.isc_nfl = 2, /* one free list for each receive command queue */
301	.isc_nrxd_min = {VMXNET3_MIN_RX_NDESC, VMXNET3_MIN_RX_NDESC, VMXNET3_MIN_RX_NDESC},
302	.isc_nrxd_max = {VMXNET3_MAX_RX_NDESC, VMXNET3_MAX_RX_NDESC, VMXNET3_MAX_RX_NDESC},
303	.isc_nrxd_default = {VMXNET3_DEF_RX_NDESC, VMXNET3_DEF_RX_NDESC, VMXNET3_DEF_RX_NDESC},
304
305	/*
306	 * Number of transmit queues per transmit queue set, with associated
307	 * descriptor settings for each.
308	 */
309	.isc_ntxqs = 2,
310	.isc_ntxd_min = {VMXNET3_MIN_TX_NDESC, VMXNET3_MIN_TX_NDESC},
311	.isc_ntxd_max = {VMXNET3_MAX_TX_NDESC, VMXNET3_MAX_TX_NDESC},
312	.isc_ntxd_default = {VMXNET3_DEF_TX_NDESC, VMXNET3_DEF_TX_NDESC},
313};
314
315static void *
316vmxnet3_register(device_t dev)
317{
318	return (&vmxnet3_sctx_init);
319}
320
321static int
322trunc_powerof2(int val)
323{
324
325	return (1U << (fls(val) - 1));
326}
327
328static int
329vmxnet3_attach_pre(if_ctx_t ctx)
330{
331	device_t dev;
332	if_softc_ctx_t scctx;
333	struct vmxnet3_softc *sc;
334	uint32_t intr_config;
335	int error;
336
337	dev = iflib_get_dev(ctx);
338	sc = iflib_get_softc(ctx);
339	sc->vmx_dev = dev;
340	sc->vmx_ctx = ctx;
341	sc->vmx_sctx = iflib_get_sctx(ctx);
342	sc->vmx_scctx = iflib_get_softc_ctx(ctx);
343	sc->vmx_ifp = iflib_get_ifp(ctx);
344	sc->vmx_media = iflib_get_media(ctx);
345	scctx = sc->vmx_scctx;
346
347	scctx->isc_tx_nsegments = VMXNET3_TX_MAXSEGS;
348	scctx->isc_tx_tso_segments_max = VMXNET3_TX_MAXSEGS;
349	/* isc_tx_tso_size_max doesn't include possible vlan header */
350	scctx->isc_tx_tso_size_max = VMXNET3_TSO_MAXSIZE;
351	scctx->isc_tx_tso_segsize_max = VMXNET3_TX_MAXSEGSIZE;
352	scctx->isc_txrx = &vmxnet3_txrx;
353
354	/* If 0, the iflib tunable was not set, so set to the default */
355	if (scctx->isc_nrxqsets == 0)
356		scctx->isc_nrxqsets = VMXNET3_DEF_RX_QUEUES;
357	scctx->isc_nrxqsets = trunc_powerof2(scctx->isc_nrxqsets);
358	scctx->isc_nrxqsets_max = min(VMXNET3_MAX_RX_QUEUES, mp_ncpus);
359	scctx->isc_nrxqsets_max = trunc_powerof2(scctx->isc_nrxqsets_max);
360
361	/* If 0, the iflib tunable was not set, so set to the default */
362	if (scctx->isc_ntxqsets == 0)
363		scctx->isc_ntxqsets = VMXNET3_DEF_TX_QUEUES;
364	scctx->isc_ntxqsets = trunc_powerof2(scctx->isc_ntxqsets);
365	scctx->isc_ntxqsets_max = min(VMXNET3_MAX_TX_QUEUES, mp_ncpus);
366	scctx->isc_ntxqsets_max = trunc_powerof2(scctx->isc_ntxqsets_max);
367
368	/*
369	 * Enforce that the transmit completion queue descriptor count is
370	 * the same as the transmit command queue descriptor count.
371	 */
372	scctx->isc_ntxd[0] = scctx->isc_ntxd[1];
373	scctx->isc_txqsizes[0] =
374	    sizeof(struct vmxnet3_txcompdesc) * scctx->isc_ntxd[0];
375	scctx->isc_txqsizes[1] =
376	    sizeof(struct vmxnet3_txdesc) * scctx->isc_ntxd[1];
377
378	/*
379	 * Enforce that the receive completion queue descriptor count is the
380	 * sum of the receive command queue descriptor counts, and that the
381	 * second receive command queue descriptor count is the same as the
382	 * first one.
383	 */
384	scctx->isc_nrxd[2] = scctx->isc_nrxd[1];
385	scctx->isc_nrxd[0] = scctx->isc_nrxd[1] + scctx->isc_nrxd[2];
386	scctx->isc_rxqsizes[0] =
387	    sizeof(struct vmxnet3_rxcompdesc) * scctx->isc_nrxd[0];
388	scctx->isc_rxqsizes[1] =
389	    sizeof(struct vmxnet3_rxdesc) * scctx->isc_nrxd[1];
390	scctx->isc_rxqsizes[2] =
391	    sizeof(struct vmxnet3_rxdesc) * scctx->isc_nrxd[2];
392
393	/*
394	 * Initialize the max frame size and descriptor queue buffer
395	 * sizes.
396	 */
397	vmxnet3_mtu_set(ctx, if_getmtu(sc->vmx_ifp));
398
399	scctx->isc_rss_table_size = UPT1_RSS_MAX_IND_TABLE_SIZE;
400
401	/* Map PCI BARs */
402	error = vmxnet3_alloc_resources(sc);
403	if (error)
404		goto fail;
405
406	/* Check device versions */
407	error = vmxnet3_check_version(sc);
408	if (error)
409		goto fail;
410
411	/*
412	 * The interrupt mode can be set in the hypervisor configuration via
413	 * the parameter ethernet<N>.intrMode.
414	 */
415	intr_config = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_INTRCFG);
416	sc->vmx_intr_mask_mode = (intr_config >> 2) & 0x03;
417
418	/*
419	 * Configure the softc context to attempt to configure the interrupt
420	 * mode now indicated by intr_config.  iflib will follow the usual
421	 * fallback path MSI-X -> MSI -> LEGACY, starting at the configured
422	 * starting mode.
423	 */
424	switch (intr_config & 0x03) {
425	case VMXNET3_IT_AUTO:
426	case VMXNET3_IT_MSIX:
427		scctx->isc_msix_bar = pci_msix_table_bar(dev);
428		break;
429	case VMXNET3_IT_MSI:
430		scctx->isc_msix_bar = -1;
431		scctx->isc_disable_msix = 1;
432		break;
433	case VMXNET3_IT_LEGACY:
434		scctx->isc_msix_bar = 0;
435		break;
436	}
437
438	scctx->isc_tx_csum_flags = VMXNET3_CSUM_ALL_OFFLOAD;
439	scctx->isc_capabilities = scctx->isc_capenable =
440	    IFCAP_TXCSUM | IFCAP_TXCSUM_IPV6 |
441	    IFCAP_TSO4 | IFCAP_TSO6 |
442	    IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6 |
443	    IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING |
444	    IFCAP_VLAN_HWCSUM | IFCAP_VLAN_HWTSO |
445	    IFCAP_JUMBO_MTU;
446
447	/* These capabilities are not enabled by default. */
448	scctx->isc_capabilities |= IFCAP_LRO | IFCAP_VLAN_HWFILTER;
449
450	vmxnet3_get_lladdr(sc);
451	iflib_set_mac(ctx, sc->vmx_lladdr);
452
453	return (0);
454fail:
455	/*
456	 * We must completely clean up anything allocated above as iflib
457	 * will not invoke any other driver entry points as a result of this
458	 * failure.
459	 */
460	vmxnet3_free_resources(sc);
461
462	return (error);
463}
464
465static int
466vmxnet3_msix_intr_assign(if_ctx_t ctx, int msix)
467{
468	struct vmxnet3_softc *sc;
469	if_softc_ctx_t scctx;
470	struct vmxnet3_rxqueue *rxq;
471	int error;
472	int i;
473	char irq_name[16];
474
475	sc = iflib_get_softc(ctx);
476	scctx = sc->vmx_scctx;
477
478	for (i = 0; i < scctx->isc_nrxqsets; i++) {
479		snprintf(irq_name, sizeof(irq_name), "rxq%d", i);
480
481		rxq = &sc->vmx_rxq[i];
482		error = iflib_irq_alloc_generic(ctx, &rxq->vxrxq_irq, i + 1,
483		    IFLIB_INTR_RX, vmxnet3_rxq_intr, rxq, i, irq_name);
484		if (error) {
485			device_printf(iflib_get_dev(ctx),
486			    "Failed to register rxq %d interrupt handler\n", i);
487			return (error);
488		}
489	}
490
491	for (i = 0; i < scctx->isc_ntxqsets; i++) {
492		snprintf(irq_name, sizeof(irq_name), "txq%d", i);
493
494		/*
495		 * Don't provide the corresponding rxq irq for reference -
496		 * we want the transmit task to be attached to a task queue
497		 * that is different from the one used by the corresponding
498		 * rxq irq.  That is because the TX doorbell writes are very
499		 * expensive as virtualized MMIO operations, so we want to
500		 * be able to defer them to another core when possible so
501		 * that they don't steal receive processing cycles during
502		 * stack turnarounds like TCP ACK generation.  The other
503		 * piece to this approach is enabling the iflib abdicate
504		 * option (currently via an interface-specific
505		 * tunable/sysctl).
506		 */
507		iflib_softirq_alloc_generic(ctx, NULL, IFLIB_INTR_TX, NULL, i,
508		    irq_name);
509	}
510
511	error = iflib_irq_alloc_generic(ctx, &sc->vmx_event_intr_irq,
512	    scctx->isc_nrxqsets + 1, IFLIB_INTR_ADMIN, vmxnet3_event_intr, sc, 0,
513	    "event");
514	if (error) {
515		device_printf(iflib_get_dev(ctx),
516		    "Failed to register event interrupt handler\n");
517		return (error);
518	}
519
520	return (0);
521}
522
523static void
524vmxnet3_free_irqs(struct vmxnet3_softc *sc)
525{
526	if_softc_ctx_t scctx;
527	struct vmxnet3_rxqueue *rxq;
528	int i;
529
530	scctx = sc->vmx_scctx;
531
532	for (i = 0; i < scctx->isc_nrxqsets; i++) {
533		rxq = &sc->vmx_rxq[i];
534		iflib_irq_free(sc->vmx_ctx, &rxq->vxrxq_irq);
535	}
536
537	iflib_irq_free(sc->vmx_ctx, &sc->vmx_event_intr_irq);
538}
539
540static int
541vmxnet3_attach_post(if_ctx_t ctx)
542{
543	device_t dev;
544	if_softc_ctx_t scctx;
545	struct vmxnet3_softc *sc;
546	int error;
547
548	dev = iflib_get_dev(ctx);
549	scctx = iflib_get_softc_ctx(ctx);
550	sc = iflib_get_softc(ctx);
551
552	if (scctx->isc_nrxqsets > 1)
553		sc->vmx_flags |= VMXNET3_FLAG_RSS;
554
555	error = vmxnet3_alloc_data(sc);
556	if (error)
557		goto fail;
558
559	vmxnet3_set_interrupt_idx(sc);
560	vmxnet3_setup_sysctl(sc);
561
562	ifmedia_add(sc->vmx_media, IFM_ETHER | IFM_AUTO, 0, NULL);
563	ifmedia_set(sc->vmx_media, IFM_ETHER | IFM_AUTO);
564
565fail:
566	return (error);
567}
568
569static int
570vmxnet3_detach(if_ctx_t ctx)
571{
572	struct vmxnet3_softc *sc;
573
574	sc = iflib_get_softc(ctx);
575
576	vmxnet3_free_irqs(sc);
577	vmxnet3_free_data(sc);
578	vmxnet3_free_resources(sc);
579
580	return (0);
581}
582
583static int
584vmxnet3_shutdown(if_ctx_t ctx)
585{
586
587	return (0);
588}
589
590static int
591vmxnet3_suspend(if_ctx_t ctx)
592{
593
594	return (0);
595}
596
597static int
598vmxnet3_resume(if_ctx_t ctx)
599{
600
601	return (0);
602}
603
604static int
605vmxnet3_alloc_resources(struct vmxnet3_softc *sc)
606{
607	device_t dev;
608	int rid;
609
610	dev = sc->vmx_dev;
611
612	rid = PCIR_BAR(0);
613	sc->vmx_res0 = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid,
614	    RF_ACTIVE);
615	if (sc->vmx_res0 == NULL) {
616		device_printf(dev,
617		    "could not map BAR0 memory\n");
618		return (ENXIO);
619	}
620
621	sc->vmx_iot0 = rman_get_bustag(sc->vmx_res0);
622	sc->vmx_ioh0 = rman_get_bushandle(sc->vmx_res0);
623
624	rid = PCIR_BAR(1);
625	sc->vmx_res1 = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid,
626	    RF_ACTIVE);
627	if (sc->vmx_res1 == NULL) {
628		device_printf(dev,
629		    "could not map BAR1 memory\n");
630		return (ENXIO);
631	}
632
633	sc->vmx_iot1 = rman_get_bustag(sc->vmx_res1);
634	sc->vmx_ioh1 = rman_get_bushandle(sc->vmx_res1);
635
636	return (0);
637}
638
639static void
640vmxnet3_free_resources(struct vmxnet3_softc *sc)
641{
642	device_t dev;
643
644	dev = sc->vmx_dev;
645
646	if (sc->vmx_res0 != NULL) {
647		bus_release_resource(dev, SYS_RES_MEMORY,
648		    rman_get_rid(sc->vmx_res0), sc->vmx_res0);
649		sc->vmx_res0 = NULL;
650	}
651
652	if (sc->vmx_res1 != NULL) {
653		bus_release_resource(dev, SYS_RES_MEMORY,
654		    rman_get_rid(sc->vmx_res1), sc->vmx_res1);
655		sc->vmx_res1 = NULL;
656	}
657}
658
659static int
660vmxnet3_check_version(struct vmxnet3_softc *sc)
661{
662	device_t dev;
663	uint32_t version;
664
665	dev = sc->vmx_dev;
666
667	version = vmxnet3_read_bar1(sc, VMXNET3_BAR1_VRRS);
668	if ((version & 0x01) == 0) {
669		device_printf(dev, "unsupported hardware version %#x\n",
670		    version);
671		return (ENOTSUP);
672	}
673	vmxnet3_write_bar1(sc, VMXNET3_BAR1_VRRS, 1);
674
675	version = vmxnet3_read_bar1(sc, VMXNET3_BAR1_UVRS);
676	if ((version & 0x01) == 0) {
677		device_printf(dev, "unsupported UPT version %#x\n", version);
678		return (ENOTSUP);
679	}
680	vmxnet3_write_bar1(sc, VMXNET3_BAR1_UVRS, 1);
681
682	return (0);
683}
684
685static void
686vmxnet3_set_interrupt_idx(struct vmxnet3_softc *sc)
687{
688	if_softc_ctx_t scctx;
689	struct vmxnet3_txqueue *txq;
690	struct vmxnet3_txq_shared *txs;
691	struct vmxnet3_rxqueue *rxq;
692	struct vmxnet3_rxq_shared *rxs;
693	int intr_idx;
694	int i;
695
696	scctx = sc->vmx_scctx;
697
698	/*
699	 * There is always one interrupt per receive queue, assigned
700	 * starting with the first interrupt.  When there is only one
701	 * interrupt available, the event interrupt shares the receive queue
702	 * interrupt, otherwise it uses the interrupt following the last
703	 * receive queue interrupt.  Transmit queues are not assigned
704	 * interrupts, so they are given indexes beyond the indexes that
705	 * correspond to the real interrupts.
706	 */
707
708	/* The event interrupt is always the last vector. */
709	sc->vmx_event_intr_idx = scctx->isc_vectors - 1;
710
711	intr_idx = 0;
712	for (i = 0; i < scctx->isc_nrxqsets; i++, intr_idx++) {
713		rxq = &sc->vmx_rxq[i];
714		rxs = rxq->vxrxq_rs;
715		rxq->vxrxq_intr_idx = intr_idx;
716		rxs->intr_idx = rxq->vxrxq_intr_idx;
717	}
718
719	/*
720	 * Assign the tx queues interrupt indexes above what we are actually
721	 * using.  These interrupts will never be enabled.
722	 */
723	intr_idx = scctx->isc_vectors;
724	for (i = 0; i < scctx->isc_ntxqsets; i++, intr_idx++) {
725		txq = &sc->vmx_txq[i];
726		txs = txq->vxtxq_ts;
727		txq->vxtxq_intr_idx = intr_idx;
728		txs->intr_idx = txq->vxtxq_intr_idx;
729	}
730}
731
732static int
733vmxnet3_queues_shared_alloc(struct vmxnet3_softc *sc)
734{
735	if_softc_ctx_t scctx;
736	int size;
737	int error;
738
739	scctx = sc->vmx_scctx;
740
741	/*
742	 * The txq and rxq shared data areas must be allocated contiguously
743	 * as vmxnet3_driver_shared contains only a single address member
744	 * for the shared queue data area.
745	 */
746	size = scctx->isc_ntxqsets * sizeof(struct vmxnet3_txq_shared) +
747	    scctx->isc_nrxqsets * sizeof(struct vmxnet3_rxq_shared);
748	error = iflib_dma_alloc_align(sc->vmx_ctx, size, 128, &sc->vmx_qs_dma, 0);
749	if (error) {
750		device_printf(sc->vmx_dev, "cannot alloc queue shared memory\n");
751		return (error);
752	}
753
754	return (0);
755}
756
757static void
758vmxnet3_init_txq(struct vmxnet3_softc *sc, int q)
759{
760	struct vmxnet3_txqueue *txq;
761	struct vmxnet3_comp_ring *txc;
762	struct vmxnet3_txring *txr;
763	if_softc_ctx_t scctx;
764
765	txq = &sc->vmx_txq[q];
766	txc = &txq->vxtxq_comp_ring;
767	txr = &txq->vxtxq_cmd_ring;
768	scctx = sc->vmx_scctx;
769
770	snprintf(txq->vxtxq_name, sizeof(txq->vxtxq_name), "%s-tx%d",
771	    device_get_nameunit(sc->vmx_dev), q);
772
773	txq->vxtxq_sc = sc;
774	txq->vxtxq_id = q;
775	txc->vxcr_ndesc = scctx->isc_ntxd[0];
776	txr->vxtxr_ndesc = scctx->isc_ntxd[1];
777}
778
779static int
780vmxnet3_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs,
781    int ntxqs, int ntxqsets)
782{
783	struct vmxnet3_softc *sc;
784	int q;
785	int error;
786	caddr_t kva;
787
788	sc = iflib_get_softc(ctx);
789
790	/* Allocate the array of transmit queues */
791	sc->vmx_txq = malloc(sizeof(struct vmxnet3_txqueue) *
792	    ntxqsets, M_DEVBUF, M_NOWAIT | M_ZERO);
793	if (sc->vmx_txq == NULL)
794		return (ENOMEM);
795
796	/* Initialize driver state for each transmit queue */
797	for (q = 0; q < ntxqsets; q++)
798		vmxnet3_init_txq(sc, q);
799
800	/*
801	 * Allocate queue state that is shared with the device.  This check
802	 * and call is performed in both vmxnet3_tx_queues_alloc() and
803	 * vmxnet3_rx_queues_alloc() so that we don't have to care which
804	 * order iflib invokes those routines in.
805	 */
806	if (sc->vmx_qs_dma.idi_size == 0) {
807		error = vmxnet3_queues_shared_alloc(sc);
808		if (error)
809			return (error);
810	}
811
812	kva = sc->vmx_qs_dma.idi_vaddr;
813	for (q = 0; q < ntxqsets; q++) {
814		sc->vmx_txq[q].vxtxq_ts = (struct vmxnet3_txq_shared *) kva;
815		kva += sizeof(struct vmxnet3_txq_shared);
816	}
817
818	/* Record descriptor ring vaddrs and paddrs */
819	for (q = 0; q < ntxqsets; q++) {
820		struct vmxnet3_txqueue *txq;
821		struct vmxnet3_txring *txr;
822		struct vmxnet3_comp_ring *txc;
823
824		txq = &sc->vmx_txq[q];
825		txc = &txq->vxtxq_comp_ring;
826		txr = &txq->vxtxq_cmd_ring;
827
828		/* Completion ring */
829		txc->vxcr_u.txcd =
830		    (struct vmxnet3_txcompdesc *) vaddrs[q * ntxqs + 0];
831		txc->vxcr_paddr = paddrs[q * ntxqs + 0];
832
833		/* Command ring */
834		txr->vxtxr_txd =
835		    (struct vmxnet3_txdesc *) vaddrs[q * ntxqs + 1];
836		txr->vxtxr_paddr = paddrs[q * ntxqs + 1];
837	}
838
839	return (0);
840}
841
842static void
843vmxnet3_init_rxq(struct vmxnet3_softc *sc, int q, int nrxqs)
844{
845	struct vmxnet3_rxqueue *rxq;
846	struct vmxnet3_comp_ring *rxc;
847	struct vmxnet3_rxring *rxr;
848	if_softc_ctx_t scctx;
849	int i;
850
851	rxq = &sc->vmx_rxq[q];
852	rxc = &rxq->vxrxq_comp_ring;
853	scctx = sc->vmx_scctx;
854
855	snprintf(rxq->vxrxq_name, sizeof(rxq->vxrxq_name), "%s-rx%d",
856	    device_get_nameunit(sc->vmx_dev), q);
857
858	rxq->vxrxq_sc = sc;
859	rxq->vxrxq_id = q;
860
861	/*
862	 * First rxq is the completion queue, so there are nrxqs - 1 command
863	 * rings starting at iflib queue id 1.
864	 */
865	rxc->vxcr_ndesc = scctx->isc_nrxd[0];
866	for (i = 0; i < nrxqs - 1; i++) {
867		rxr = &rxq->vxrxq_cmd_ring[i];
868		rxr->vxrxr_ndesc = scctx->isc_nrxd[i + 1];
869	}
870}
871
872static int
873vmxnet3_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs,
874    int nrxqs, int nrxqsets)
875{
876	struct vmxnet3_softc *sc;
877	if_softc_ctx_t scctx;
878	int q;
879	int i;
880	int error;
881	caddr_t kva;
882
883	sc = iflib_get_softc(ctx);
884	scctx = sc->vmx_scctx;
885
886	/* Allocate the array of receive queues */
887	sc->vmx_rxq = malloc(sizeof(struct vmxnet3_rxqueue) *
888	    nrxqsets, M_DEVBUF, M_NOWAIT | M_ZERO);
889	if (sc->vmx_rxq == NULL)
890		return (ENOMEM);
891
892	/* Initialize driver state for each receive queue */
893	for (q = 0; q < nrxqsets; q++)
894		vmxnet3_init_rxq(sc, q, nrxqs);
895
896	/*
897	 * Allocate queue state that is shared with the device.  This check
898	 * and call is performed in both vmxnet3_tx_queues_alloc() and
899	 * vmxnet3_rx_queues_alloc() so that we don't have to care which
900	 * order iflib invokes those routines in.
901	 */
902	if (sc->vmx_qs_dma.idi_size == 0) {
903		error = vmxnet3_queues_shared_alloc(sc);
904		if (error)
905			return (error);
906	}
907
908	kva = sc->vmx_qs_dma.idi_vaddr +
909	    scctx->isc_ntxqsets * sizeof(struct vmxnet3_txq_shared);
910	for (q = 0; q < nrxqsets; q++) {
911		sc->vmx_rxq[q].vxrxq_rs = (struct vmxnet3_rxq_shared *) kva;
912		kva += sizeof(struct vmxnet3_rxq_shared);
913	}
914
915	/* Record descriptor ring vaddrs and paddrs */
916	for (q = 0; q < nrxqsets; q++) {
917		struct vmxnet3_rxqueue *rxq;
918		struct vmxnet3_rxring *rxr;
919		struct vmxnet3_comp_ring *rxc;
920
921		rxq = &sc->vmx_rxq[q];
922		rxc = &rxq->vxrxq_comp_ring;
923
924		/* Completion ring */
925		rxc->vxcr_u.rxcd =
926		    (struct vmxnet3_rxcompdesc *) vaddrs[q * nrxqs + 0];
927		rxc->vxcr_paddr = paddrs[q * nrxqs + 0];
928
929		/* Command ring(s) */
930		for (i = 0; i < nrxqs - 1; i++) {
931			rxr = &rxq->vxrxq_cmd_ring[i];
932
933			rxr->vxrxr_rxd =
934			    (struct vmxnet3_rxdesc *) vaddrs[q * nrxqs + 1 + i];
935			rxr->vxrxr_paddr = paddrs[q * nrxqs + 1 + i];
936		}
937	}
938
939	return (0);
940}
941
942static void
943vmxnet3_queues_free(if_ctx_t ctx)
944{
945	struct vmxnet3_softc *sc;
946
947	sc = iflib_get_softc(ctx);
948
949	/* Free queue state area that is shared with the device */
950	if (sc->vmx_qs_dma.idi_size != 0) {
951		iflib_dma_free(&sc->vmx_qs_dma);
952		sc->vmx_qs_dma.idi_size = 0;
953	}
954
955	/* Free array of receive queues */
956	if (sc->vmx_rxq != NULL) {
957		free(sc->vmx_rxq, M_DEVBUF);
958		sc->vmx_rxq = NULL;
959	}
960
961	/* Free array of transmit queues */
962	if (sc->vmx_txq != NULL) {
963		free(sc->vmx_txq, M_DEVBUF);
964		sc->vmx_txq = NULL;
965	}
966}
967
968static int
969vmxnet3_alloc_shared_data(struct vmxnet3_softc *sc)
970{
971	device_t dev;
972	size_t size;
973	int error;
974
975	dev = sc->vmx_dev;
976
977	/* Top level state structure shared with the device */
978	size = sizeof(struct vmxnet3_driver_shared);
979	error = iflib_dma_alloc_align(sc->vmx_ctx, size, 1, &sc->vmx_ds_dma, 0);
980	if (error) {
981		device_printf(dev, "cannot alloc shared memory\n");
982		return (error);
983	}
984	sc->vmx_ds = (struct vmxnet3_driver_shared *) sc->vmx_ds_dma.idi_vaddr;
985
986	/* RSS table state shared with the device */
987	if (sc->vmx_flags & VMXNET3_FLAG_RSS) {
988		size = sizeof(struct vmxnet3_rss_shared);
989		error = iflib_dma_alloc_align(sc->vmx_ctx, size, 128,
990		    &sc->vmx_rss_dma, 0);
991		if (error) {
992			device_printf(dev, "cannot alloc rss shared memory\n");
993			return (error);
994		}
995		sc->vmx_rss =
996		    (struct vmxnet3_rss_shared *) sc->vmx_rss_dma.idi_vaddr;
997	}
998
999	return (0);
1000}
1001
1002static void
1003vmxnet3_free_shared_data(struct vmxnet3_softc *sc)
1004{
1005
1006	/* Free RSS table state shared with the device */
1007	if (sc->vmx_rss != NULL) {
1008		iflib_dma_free(&sc->vmx_rss_dma);
1009		sc->vmx_rss = NULL;
1010	}
1011
1012	/* Free top level state structure shared with the device */
1013	if (sc->vmx_ds != NULL) {
1014		iflib_dma_free(&sc->vmx_ds_dma);
1015		sc->vmx_ds = NULL;
1016	}
1017}
1018
1019static int
1020vmxnet3_alloc_mcast_table(struct vmxnet3_softc *sc)
1021{
1022	int error;
1023
1024	/* Multicast table state shared with the device */
1025	error = iflib_dma_alloc_align(sc->vmx_ctx,
1026	    VMXNET3_MULTICAST_MAX * ETHER_ADDR_LEN, 32, &sc->vmx_mcast_dma, 0);
1027	if (error)
1028		device_printf(sc->vmx_dev, "unable to alloc multicast table\n");
1029	else
1030		sc->vmx_mcast = sc->vmx_mcast_dma.idi_vaddr;
1031
1032	return (error);
1033}
1034
1035static void
1036vmxnet3_free_mcast_table(struct vmxnet3_softc *sc)
1037{
1038
1039	/* Free multicast table state shared with the device */
1040	if (sc->vmx_mcast != NULL) {
1041		iflib_dma_free(&sc->vmx_mcast_dma);
1042		sc->vmx_mcast = NULL;
1043	}
1044}
1045
1046static void
1047vmxnet3_init_shared_data(struct vmxnet3_softc *sc)
1048{
1049	struct vmxnet3_driver_shared *ds;
1050	if_shared_ctx_t sctx;
1051	if_softc_ctx_t scctx;
1052	struct vmxnet3_txqueue *txq;
1053	struct vmxnet3_txq_shared *txs;
1054	struct vmxnet3_rxqueue *rxq;
1055	struct vmxnet3_rxq_shared *rxs;
1056	int i;
1057
1058	ds = sc->vmx_ds;
1059	sctx = sc->vmx_sctx;
1060	scctx = sc->vmx_scctx;
1061
1062	/*
1063	 * Initialize fields of the shared data that remains the same across
1064	 * reinits. Note the shared data is zero'd when allocated.
1065	 */
1066
1067	ds->magic = VMXNET3_REV1_MAGIC;
1068
1069	/* DriverInfo */
1070	ds->version = VMXNET3_DRIVER_VERSION;
1071	ds->guest = VMXNET3_GOS_FREEBSD |
1072#ifdef __LP64__
1073	    VMXNET3_GOS_64BIT;
1074#else
1075	    VMXNET3_GOS_32BIT;
1076#endif
1077	ds->vmxnet3_revision = 1;
1078	ds->upt_version = 1;
1079
1080	/* Misc. conf */
1081	ds->driver_data = vtophys(sc);
1082	ds->driver_data_len = sizeof(struct vmxnet3_softc);
1083	ds->queue_shared = sc->vmx_qs_dma.idi_paddr;
1084	ds->queue_shared_len = sc->vmx_qs_dma.idi_size;
1085	ds->nrxsg_max = IFLIB_MAX_RX_SEGS;
1086
1087	/* RSS conf */
1088	if (sc->vmx_flags & VMXNET3_FLAG_RSS) {
1089		ds->rss.version = 1;
1090		ds->rss.paddr = sc->vmx_rss_dma.idi_paddr;
1091		ds->rss.len = sc->vmx_rss_dma.idi_size;
1092	}
1093
1094	/* Interrupt control. */
1095	ds->automask = sc->vmx_intr_mask_mode == VMXNET3_IMM_AUTO;
1096	/*
1097	 * Total number of interrupt indexes we are using in the shared
1098	 * config data, even though we don't actually allocate interrupt
1099	 * resources for the tx queues.  Some versions of the device will
1100	 * fail to initialize successfully if interrupt indexes are used in
1101	 * the shared config that exceed the number of interrupts configured
1102	 * here.
1103	 */
1104	ds->nintr = (scctx->isc_vectors == 1) ?
1105	    2 : (scctx->isc_nrxqsets + scctx->isc_ntxqsets + 1);
1106	ds->evintr = sc->vmx_event_intr_idx;
1107	ds->ictrl = VMXNET3_ICTRL_DISABLE_ALL;
1108
1109	for (i = 0; i < ds->nintr; i++)
1110		ds->modlevel[i] = UPT1_IMOD_ADAPTIVE;
1111
1112	/* Receive filter. */
1113	ds->mcast_table = sc->vmx_mcast_dma.idi_paddr;
1114	ds->mcast_tablelen = sc->vmx_mcast_dma.idi_size;
1115
1116	/* Tx queues */
1117	for (i = 0; i < scctx->isc_ntxqsets; i++) {
1118		txq = &sc->vmx_txq[i];
1119		txs = txq->vxtxq_ts;
1120
1121		txs->cmd_ring = txq->vxtxq_cmd_ring.vxtxr_paddr;
1122		txs->cmd_ring_len = txq->vxtxq_cmd_ring.vxtxr_ndesc;
1123		txs->comp_ring = txq->vxtxq_comp_ring.vxcr_paddr;
1124		txs->comp_ring_len = txq->vxtxq_comp_ring.vxcr_ndesc;
1125		txs->driver_data = vtophys(txq);
1126		txs->driver_data_len = sizeof(struct vmxnet3_txqueue);
1127	}
1128
1129	/* Rx queues */
1130	for (i = 0; i < scctx->isc_nrxqsets; i++) {
1131		rxq = &sc->vmx_rxq[i];
1132		rxs = rxq->vxrxq_rs;
1133
1134		rxs->cmd_ring[0] = rxq->vxrxq_cmd_ring[0].vxrxr_paddr;
1135		rxs->cmd_ring_len[0] = rxq->vxrxq_cmd_ring[0].vxrxr_ndesc;
1136		rxs->cmd_ring[1] = rxq->vxrxq_cmd_ring[1].vxrxr_paddr;
1137		rxs->cmd_ring_len[1] = rxq->vxrxq_cmd_ring[1].vxrxr_ndesc;
1138		rxs->comp_ring = rxq->vxrxq_comp_ring.vxcr_paddr;
1139		rxs->comp_ring_len = rxq->vxrxq_comp_ring.vxcr_ndesc;
1140		rxs->driver_data = vtophys(rxq);
1141		rxs->driver_data_len = sizeof(struct vmxnet3_rxqueue);
1142	}
1143}
1144
1145static void
1146vmxnet3_reinit_rss_shared_data(struct vmxnet3_softc *sc)
1147{
1148	/*
1149	 * Use the same key as the Linux driver until FreeBSD can do
1150	 * RSS (presumably Toeplitz) in software.
1151	 */
1152	static const uint8_t rss_key[UPT1_RSS_MAX_KEY_SIZE] = {
1153	    0x3b, 0x56, 0xd1, 0x56, 0x13, 0x4a, 0xe7, 0xac,
1154	    0xe8, 0x79, 0x09, 0x75, 0xe8, 0x65, 0x79, 0x28,
1155	    0x35, 0x12, 0xb9, 0x56, 0x7c, 0x76, 0x4b, 0x70,
1156	    0xd8, 0x56, 0xa3, 0x18, 0x9b, 0x0a, 0xee, 0xf3,
1157	    0x96, 0xa6, 0x9f, 0x8f, 0x9e, 0x8c, 0x90, 0xc9,
1158	};
1159
1160	struct vmxnet3_driver_shared *ds;
1161	if_softc_ctx_t scctx;
1162	struct vmxnet3_rss_shared *rss;
1163#ifdef RSS
1164	uint8_t rss_algo;
1165#endif
1166	int i;
1167
1168	ds = sc->vmx_ds;
1169	scctx = sc->vmx_scctx;
1170	rss = sc->vmx_rss;
1171
1172	rss->hash_type =
1173	    UPT1_RSS_HASH_TYPE_IPV4 | UPT1_RSS_HASH_TYPE_TCP_IPV4 |
1174	    UPT1_RSS_HASH_TYPE_IPV6 | UPT1_RSS_HASH_TYPE_TCP_IPV6;
1175	rss->hash_func = UPT1_RSS_HASH_FUNC_TOEPLITZ;
1176	rss->hash_key_size = UPT1_RSS_MAX_KEY_SIZE;
1177	rss->ind_table_size = UPT1_RSS_MAX_IND_TABLE_SIZE;
1178#ifdef RSS
1179	/*
1180	 * If the software RSS is configured to anything else other than
1181	 * Toeplitz, then just do Toeplitz in "hardware" for the sake of
1182	 * the packet distribution, but report the hash as opaque to
1183	 * disengage from the software RSS.
1184	 */
1185	rss_algo = rss_gethashalgo();
1186	if (rss_algo == RSS_HASH_TOEPLITZ) {
1187		rss_getkey(rss->hash_key);
1188		for (i = 0; i < UPT1_RSS_MAX_IND_TABLE_SIZE; i++) {
1189			rss->ind_table[i] = rss_get_indirection_to_bucket(i) %
1190			    scctx->isc_nrxqsets;
1191		}
1192		sc->vmx_flags |= VMXNET3_FLAG_SOFT_RSS;
1193	} else
1194#endif
1195	{
1196		memcpy(rss->hash_key, rss_key, UPT1_RSS_MAX_KEY_SIZE);
1197		for (i = 0; i < UPT1_RSS_MAX_IND_TABLE_SIZE; i++)
1198			rss->ind_table[i] = i % scctx->isc_nrxqsets;
1199		sc->vmx_flags &= ~VMXNET3_FLAG_SOFT_RSS;
1200	}
1201}
1202
1203static void
1204vmxnet3_reinit_shared_data(struct vmxnet3_softc *sc)
1205{
1206	struct ifnet *ifp;
1207	struct vmxnet3_driver_shared *ds;
1208	if_softc_ctx_t scctx;
1209
1210	ifp = sc->vmx_ifp;
1211	ds = sc->vmx_ds;
1212	scctx = sc->vmx_scctx;
1213
1214	ds->mtu = ifp->if_mtu;
1215	ds->ntxqueue = scctx->isc_ntxqsets;
1216	ds->nrxqueue = scctx->isc_nrxqsets;
1217
1218	ds->upt_features = 0;
1219	if (ifp->if_capenable & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6))
1220		ds->upt_features |= UPT1_F_CSUM;
1221	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1222		ds->upt_features |= UPT1_F_VLAN;
1223	if (ifp->if_capenable & IFCAP_LRO)
1224		ds->upt_features |= UPT1_F_LRO;
1225
1226	if (sc->vmx_flags & VMXNET3_FLAG_RSS) {
1227		ds->upt_features |= UPT1_F_RSS;
1228		vmxnet3_reinit_rss_shared_data(sc);
1229	}
1230
1231	vmxnet3_write_bar1(sc, VMXNET3_BAR1_DSL, sc->vmx_ds_dma.idi_paddr);
1232	vmxnet3_write_bar1(sc, VMXNET3_BAR1_DSH,
1233	    (uint64_t) sc->vmx_ds_dma.idi_paddr >> 32);
1234}
1235
1236static int
1237vmxnet3_alloc_data(struct vmxnet3_softc *sc)
1238{
1239	int error;
1240
1241	error = vmxnet3_alloc_shared_data(sc);
1242	if (error)
1243		return (error);
1244
1245	error = vmxnet3_alloc_mcast_table(sc);
1246	if (error)
1247		return (error);
1248
1249	vmxnet3_init_shared_data(sc);
1250
1251	return (0);
1252}
1253
1254static void
1255vmxnet3_free_data(struct vmxnet3_softc *sc)
1256{
1257
1258	vmxnet3_free_mcast_table(sc);
1259	vmxnet3_free_shared_data(sc);
1260}
1261
1262static void
1263vmxnet3_evintr(struct vmxnet3_softc *sc)
1264{
1265	device_t dev;
1266	struct vmxnet3_txq_shared *ts;
1267	struct vmxnet3_rxq_shared *rs;
1268	uint32_t event;
1269
1270	dev = sc->vmx_dev;
1271
1272	/* Clear events. */
1273	event = sc->vmx_ds->event;
1274	vmxnet3_write_bar1(sc, VMXNET3_BAR1_EVENT, event);
1275
1276	if (event & VMXNET3_EVENT_LINK)
1277		vmxnet3_link_status(sc);
1278
1279	if (event & (VMXNET3_EVENT_TQERROR | VMXNET3_EVENT_RQERROR)) {
1280		vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_STATUS);
1281		ts = sc->vmx_txq[0].vxtxq_ts;
1282		if (ts->stopped != 0)
1283			device_printf(dev, "Tx queue error %#x\n", ts->error);
1284		rs = sc->vmx_rxq[0].vxrxq_rs;
1285		if (rs->stopped != 0)
1286			device_printf(dev, "Rx queue error %#x\n", rs->error);
1287
1288		/* XXX - rely on liflib watchdog to reset us? */
1289		device_printf(dev, "Rx/Tx queue error event ... "
1290		    "waiting for iflib watchdog reset\n");
1291	}
1292
1293	if (event & VMXNET3_EVENT_DIC)
1294		device_printf(dev, "device implementation change event\n");
1295	if (event & VMXNET3_EVENT_DEBUG)
1296		device_printf(dev, "debug event\n");
1297}
1298
1299static int
1300vmxnet3_isc_txd_encap(void *vsc, if_pkt_info_t pi)
1301{
1302	struct vmxnet3_softc *sc;
1303	struct vmxnet3_txqueue *txq;
1304	struct vmxnet3_txring *txr;
1305	struct vmxnet3_txdesc *txd, *sop;
1306	bus_dma_segment_t *segs;
1307	int nsegs;
1308	int pidx;
1309	int hdrlen;
1310	int i;
1311	int gen;
1312
1313	sc = vsc;
1314	txq = &sc->vmx_txq[pi->ipi_qsidx];
1315	txr = &txq->vxtxq_cmd_ring;
1316	segs = pi->ipi_segs;
1317	nsegs = pi->ipi_nsegs;
1318	pidx = pi->ipi_pidx;
1319
1320	KASSERT(nsegs <= VMXNET3_TX_MAXSEGS,
1321	    ("%s: packet with too many segments %d", __func__, nsegs));
1322
1323	sop = &txr->vxtxr_txd[pidx];
1324	gen = txr->vxtxr_gen ^ 1;	/* Owned by cpu (yet) */
1325
1326	for (i = 0; i < nsegs; i++) {
1327		txd = &txr->vxtxr_txd[pidx];
1328
1329		txd->addr = segs[i].ds_addr;
1330		txd->len = segs[i].ds_len;
1331		txd->gen = gen;
1332		txd->dtype = 0;
1333		txd->offload_mode = VMXNET3_OM_NONE;
1334		txd->offload_pos = 0;
1335		txd->hlen = 0;
1336		txd->eop = 0;
1337		txd->compreq = 0;
1338		txd->vtag_mode = 0;
1339		txd->vtag = 0;
1340
1341		if (++pidx == txr->vxtxr_ndesc) {
1342			pidx = 0;
1343			txr->vxtxr_gen ^= 1;
1344		}
1345		gen = txr->vxtxr_gen;
1346	}
1347	txd->eop = 1;
1348	txd->compreq = !!(pi->ipi_flags & IPI_TX_INTR);
1349	pi->ipi_new_pidx = pidx;
1350
1351	/*
1352	 * VLAN
1353	 */
1354	if (pi->ipi_mflags & M_VLANTAG) {
1355		sop->vtag_mode = 1;
1356		sop->vtag = pi->ipi_vtag;
1357	}
1358
1359	/*
1360	 * TSO and checksum offloads
1361	 */
1362	hdrlen = pi->ipi_ehdrlen + pi->ipi_ip_hlen;
1363	if (pi->ipi_csum_flags & CSUM_TSO) {
1364		sop->offload_mode = VMXNET3_OM_TSO;
1365		sop->hlen = hdrlen + pi->ipi_tcp_hlen;
1366		sop->offload_pos = pi->ipi_tso_segsz;
1367	} else if (pi->ipi_csum_flags & (VMXNET3_CSUM_OFFLOAD |
1368	    VMXNET3_CSUM_OFFLOAD_IPV6)) {
1369		sop->offload_mode = VMXNET3_OM_CSUM;
1370		sop->hlen = hdrlen;
1371		sop->offload_pos = hdrlen +
1372		    ((pi->ipi_ipproto == IPPROTO_TCP) ?
1373			offsetof(struct tcphdr, th_sum) :
1374			offsetof(struct udphdr, uh_sum));
1375	}
1376
1377	/* Finally, change the ownership. */
1378	vmxnet3_barrier(sc, VMXNET3_BARRIER_WR);
1379	sop->gen ^= 1;
1380
1381	return (0);
1382}
1383
1384static void
1385vmxnet3_isc_txd_flush(void *vsc, uint16_t txqid, qidx_t pidx)
1386{
1387	struct vmxnet3_softc *sc;
1388	struct vmxnet3_txqueue *txq;
1389
1390	sc = vsc;
1391	txq = &sc->vmx_txq[txqid];
1392
1393	/*
1394	 * pidx is what we last set ipi_new_pidx to in
1395	 * vmxnet3_isc_txd_encap()
1396	 */
1397
1398	/*
1399	 * Avoid expensive register updates if the flush request is
1400	 * redundant.
1401	 */
1402	if (txq->vxtxq_last_flush == pidx)
1403		return;
1404	txq->vxtxq_last_flush = pidx;
1405	vmxnet3_write_bar0(sc, VMXNET3_BAR0_TXH(txq->vxtxq_id), pidx);
1406}
1407
1408static int
1409vmxnet3_isc_txd_credits_update(void *vsc, uint16_t txqid, bool clear)
1410{
1411	struct vmxnet3_softc *sc;
1412	struct vmxnet3_txqueue *txq;
1413	struct vmxnet3_comp_ring *txc;
1414	struct vmxnet3_txcompdesc *txcd;
1415	struct vmxnet3_txring *txr;
1416	int processed;
1417
1418	sc = vsc;
1419	txq = &sc->vmx_txq[txqid];
1420	txc = &txq->vxtxq_comp_ring;
1421	txr = &txq->vxtxq_cmd_ring;
1422
1423	/*
1424	 * If clear is true, we need to report the number of TX command ring
1425	 * descriptors that have been processed by the device.  If clear is
1426	 * false, we just need to report whether or not at least one TX
1427	 * command ring descriptor has been processed by the device.
1428	 */
1429	processed = 0;
1430	for (;;) {
1431		txcd = &txc->vxcr_u.txcd[txc->vxcr_next];
1432		if (txcd->gen != txc->vxcr_gen)
1433			break;
1434		else if (!clear)
1435			return (1);
1436		vmxnet3_barrier(sc, VMXNET3_BARRIER_RD);
1437
1438		if (++txc->vxcr_next == txc->vxcr_ndesc) {
1439			txc->vxcr_next = 0;
1440			txc->vxcr_gen ^= 1;
1441		}
1442
1443		if (txcd->eop_idx < txr->vxtxr_next)
1444			processed += txr->vxtxr_ndesc -
1445			    (txr->vxtxr_next - txcd->eop_idx) + 1;
1446		else
1447			processed += txcd->eop_idx - txr->vxtxr_next + 1;
1448		txr->vxtxr_next = (txcd->eop_idx + 1) % txr->vxtxr_ndesc;
1449	}
1450
1451	return (processed);
1452}
1453
1454static int
1455vmxnet3_isc_rxd_available(void *vsc, uint16_t rxqid, qidx_t idx, qidx_t budget)
1456{
1457	struct vmxnet3_softc *sc;
1458	struct vmxnet3_rxqueue *rxq;
1459	struct vmxnet3_comp_ring *rxc;
1460	struct vmxnet3_rxcompdesc *rxcd;
1461	int avail;
1462	int completed_gen;
1463#ifdef INVARIANTS
1464	int expect_sop = 1;
1465#endif
1466	sc = vsc;
1467	rxq = &sc->vmx_rxq[rxqid];
1468	rxc = &rxq->vxrxq_comp_ring;
1469
1470	avail = 0;
1471	completed_gen = rxc->vxcr_gen;
1472	for (;;) {
1473		rxcd = &rxc->vxcr_u.rxcd[idx];
1474		if (rxcd->gen != completed_gen)
1475			break;
1476		vmxnet3_barrier(sc, VMXNET3_BARRIER_RD);
1477
1478#ifdef INVARIANTS
1479		if (expect_sop)
1480			KASSERT(rxcd->sop, ("%s: expected sop", __func__));
1481		else
1482			KASSERT(!rxcd->sop, ("%s: unexpected sop", __func__));
1483		expect_sop = rxcd->eop;
1484#endif
1485		if (rxcd->eop && (rxcd->len != 0))
1486			avail++;
1487		if (avail > budget)
1488			break;
1489		if (++idx == rxc->vxcr_ndesc) {
1490			idx = 0;
1491			completed_gen ^= 1;
1492		}
1493	}
1494
1495	return (avail);
1496}
1497
1498static int
1499vmxnet3_isc_rxd_pkt_get(void *vsc, if_rxd_info_t ri)
1500{
1501	struct vmxnet3_softc *sc;
1502	if_softc_ctx_t scctx;
1503	struct vmxnet3_rxqueue *rxq;
1504	struct vmxnet3_comp_ring *rxc;
1505	struct vmxnet3_rxcompdesc *rxcd;
1506	struct vmxnet3_rxring *rxr;
1507	struct vmxnet3_rxdesc *rxd;
1508	if_rxd_frag_t frag;
1509	int cqidx;
1510	uint16_t total_len;
1511	uint8_t nfrags;
1512	uint8_t i;
1513	uint8_t flid;
1514
1515	sc = vsc;
1516	scctx = sc->vmx_scctx;
1517	rxq = &sc->vmx_rxq[ri->iri_qsidx];
1518	rxc = &rxq->vxrxq_comp_ring;
1519
1520	/*
1521	 * Get a single packet starting at the given index in the completion
1522	 * queue.  That we have been called indicates that
1523	 * vmxnet3_isc_rxd_available() has already verified that either
1524	 * there is a complete packet available starting at the given index,
1525	 * or there are one or more zero length packets starting at the
1526	 * given index followed by a complete packet, so no verification of
1527	 * ownership of the descriptors (and no associated read barrier) is
1528	 * required here.
1529	 */
1530	cqidx = ri->iri_cidx;
1531	rxcd = &rxc->vxcr_u.rxcd[cqidx];
1532	while (rxcd->len == 0) {
1533		KASSERT(rxcd->sop && rxcd->eop,
1534		    ("%s: zero-length packet without both sop and eop set",
1535			__func__));
1536		rxc->vxcr_zero_length++;
1537		if (++cqidx == rxc->vxcr_ndesc) {
1538			cqidx = 0;
1539			rxc->vxcr_gen ^= 1;
1540		}
1541		rxcd = &rxc->vxcr_u.rxcd[cqidx];
1542	}
1543	KASSERT(rxcd->sop, ("%s: expected sop", __func__));
1544
1545	/*
1546	 * RSS and flow ID.
1547	 * Types other than M_HASHTYPE_NONE and M_HASHTYPE_OPAQUE_HASH should
1548	 * be used only if the software RSS is enabled and it uses the same
1549	 * algorithm and the hash key as the "hardware".  If the software RSS
1550	 * is not enabled, then it's simply pointless to use those types.
1551	 * If it's enabled but with different parameters, then hash values will
1552	 * not match.
1553	 */
1554	ri->iri_flowid = rxcd->rss_hash;
1555#ifdef RSS
1556	if ((sc->vmx_flags & VMXNET3_FLAG_SOFT_RSS) != 0) {
1557		switch (rxcd->rss_type) {
1558		case VMXNET3_RCD_RSS_TYPE_NONE:
1559			ri->iri_flowid = ri->iri_qsidx;
1560			ri->iri_rsstype = M_HASHTYPE_NONE;
1561			break;
1562		case VMXNET3_RCD_RSS_TYPE_IPV4:
1563			ri->iri_rsstype = M_HASHTYPE_RSS_IPV4;
1564			break;
1565		case VMXNET3_RCD_RSS_TYPE_TCPIPV4:
1566			ri->iri_rsstype = M_HASHTYPE_RSS_TCP_IPV4;
1567			break;
1568		case VMXNET3_RCD_RSS_TYPE_IPV6:
1569			ri->iri_rsstype = M_HASHTYPE_RSS_IPV6;
1570			break;
1571		case VMXNET3_RCD_RSS_TYPE_TCPIPV6:
1572			ri->iri_rsstype = M_HASHTYPE_RSS_TCP_IPV6;
1573			break;
1574		default:
1575			ri->iri_rsstype = M_HASHTYPE_OPAQUE_HASH;
1576			break;
1577		}
1578	} else
1579#endif
1580	{
1581		switch (rxcd->rss_type) {
1582		case VMXNET3_RCD_RSS_TYPE_NONE:
1583			ri->iri_flowid = ri->iri_qsidx;
1584			ri->iri_rsstype = M_HASHTYPE_NONE;
1585			break;
1586		default:
1587			ri->iri_rsstype = M_HASHTYPE_OPAQUE_HASH;
1588			break;
1589		}
1590	}
1591
1592	/*
1593	 * The queue numbering scheme used for rxcd->qid is as follows:
1594	 *  - All of the command ring 0s are numbered [0, nrxqsets - 1]
1595	 *  - All of the command ring 1s are numbered [nrxqsets, 2*nrxqsets - 1]
1596	 *
1597	 * Thus, rxcd->qid less than nrxqsets indicates command ring (and
1598	 * flid) 0, and rxcd->qid greater than or equal to nrxqsets
1599	 * indicates command ring (and flid) 1.
1600	 */
1601	nfrags = 0;
1602	total_len = 0;
1603	do {
1604		rxcd = &rxc->vxcr_u.rxcd[cqidx];
1605		KASSERT(rxcd->gen == rxc->vxcr_gen,
1606		    ("%s: generation mismatch", __func__));
1607		flid = (rxcd->qid >= scctx->isc_nrxqsets) ? 1 : 0;
1608		rxr = &rxq->vxrxq_cmd_ring[flid];
1609		rxd = &rxr->vxrxr_rxd[rxcd->rxd_idx];
1610
1611		frag = &ri->iri_frags[nfrags];
1612		frag->irf_flid = flid;
1613		frag->irf_idx = rxcd->rxd_idx;
1614		frag->irf_len = rxcd->len;
1615		total_len += rxcd->len;
1616		nfrags++;
1617		if (++cqidx == rxc->vxcr_ndesc) {
1618			cqidx = 0;
1619			rxc->vxcr_gen ^= 1;
1620		}
1621	} while (!rxcd->eop);
1622
1623	ri->iri_cidx = cqidx;
1624	ri->iri_nfrags = nfrags;
1625	ri->iri_len = total_len;
1626
1627	/*
1628	 * If there's an error, the last descriptor in the packet will
1629	 * have the error indicator set.  In this case, set all
1630	 * fragment lengths to zero.  This will cause iflib to discard
1631	 * the packet, but process all associated descriptors through
1632	 * the refill mechanism.
1633	 */
1634	if (__predict_false(rxcd->error)) {
1635		rxc->vxcr_pkt_errors++;
1636		for (i = 0; i < nfrags; i++) {
1637			frag = &ri->iri_frags[i];
1638			frag->irf_len = 0;
1639		}
1640	} else {
1641		/* Checksum offload information is in the last descriptor. */
1642		if (!rxcd->no_csum) {
1643			uint32_t csum_flags = 0;
1644
1645			if (rxcd->ipv4) {
1646				csum_flags |= CSUM_IP_CHECKED;
1647				if (rxcd->ipcsum_ok)
1648					csum_flags |= CSUM_IP_VALID;
1649			}
1650			if (!rxcd->fragment && (rxcd->tcp || rxcd->udp)) {
1651				csum_flags |= CSUM_L4_CALC;
1652				if (rxcd->csum_ok) {
1653					csum_flags |= CSUM_L4_VALID;
1654					ri->iri_csum_data = 0xffff;
1655				}
1656			}
1657			ri->iri_csum_flags = csum_flags;
1658		}
1659
1660		/* VLAN information is in the last descriptor. */
1661		if (rxcd->vlan) {
1662			ri->iri_flags |= M_VLANTAG;
1663			ri->iri_vtag = rxcd->vtag;
1664		}
1665	}
1666
1667	return (0);
1668}
1669
1670static void
1671vmxnet3_isc_rxd_refill(void *vsc, if_rxd_update_t iru)
1672{
1673	struct vmxnet3_softc *sc;
1674	struct vmxnet3_rxqueue *rxq;
1675	struct vmxnet3_rxring *rxr;
1676	struct vmxnet3_rxdesc *rxd;
1677	uint64_t *paddrs;
1678	int count;
1679	int len;
1680	int idx;
1681	int i;
1682	uint8_t flid;
1683	uint8_t btype;
1684
1685	count = iru->iru_count;
1686	len = iru->iru_buf_size;
1687	flid = iru->iru_flidx;
1688	paddrs = iru->iru_paddrs;
1689
1690	sc = vsc;
1691	rxq = &sc->vmx_rxq[iru->iru_qsidx];
1692	rxr = &rxq->vxrxq_cmd_ring[flid];
1693	rxd = rxr->vxrxr_rxd;
1694
1695	/*
1696	 * Command ring 0 is filled with BTYPE_HEAD descriptors, and
1697	 * command ring 1 is filled with BTYPE_BODY descriptors.
1698	 */
1699	btype = (flid == 0) ? VMXNET3_BTYPE_HEAD : VMXNET3_BTYPE_BODY;
1700	/*
1701	 * The refill entries from iflib will advance monotonically,
1702	 * but the refilled descriptors may not be contiguous due to
1703	 * earlier skipping of descriptors by the device.  The refill
1704	 * entries from iflib need an entire state update, while the
1705	 * descriptors previously skipped by the device only need to
1706	 * have their generation numbers updated.
1707	 */
1708	idx = rxr->vxrxr_refill_start;
1709	i = 0;
1710	do {
1711		if (idx == iru->iru_idxs[i]) {
1712			rxd[idx].addr = paddrs[i];
1713			rxd[idx].len = len;
1714			rxd[idx].btype = btype;
1715			i++;
1716		} else
1717			rxr->vxrxr_desc_skips++;
1718		rxd[idx].gen = rxr->vxrxr_gen;
1719
1720		if (++idx == rxr->vxrxr_ndesc) {
1721			idx = 0;
1722			rxr->vxrxr_gen ^= 1;
1723		}
1724	} while (i != count);
1725	rxr->vxrxr_refill_start = idx;
1726}
1727
1728static void
1729vmxnet3_isc_rxd_flush(void *vsc, uint16_t rxqid, uint8_t flid, qidx_t pidx)
1730{
1731	struct vmxnet3_softc *sc;
1732	struct vmxnet3_rxqueue *rxq;
1733	struct vmxnet3_rxring *rxr;
1734	bus_size_t r;
1735
1736	sc = vsc;
1737	rxq = &sc->vmx_rxq[rxqid];
1738	rxr = &rxq->vxrxq_cmd_ring[flid];
1739
1740	if (flid == 0)
1741		r = VMXNET3_BAR0_RXH1(rxqid);
1742	else
1743		r = VMXNET3_BAR0_RXH2(rxqid);
1744
1745	vmxnet3_write_bar0(sc, r, pidx);
1746}
1747
1748static int
1749vmxnet3_legacy_intr(void *xsc)
1750{
1751	struct vmxnet3_softc *sc;
1752	if_softc_ctx_t scctx;
1753	if_ctx_t ctx;
1754
1755	sc = xsc;
1756	scctx = sc->vmx_scctx;
1757	ctx = sc->vmx_ctx;
1758
1759	/*
1760	 * When there is only a single interrupt configured, this routine
1761	 * runs in fast interrupt context, following which the rxq 0 task
1762	 * will be enqueued.
1763	 */
1764	if (scctx->isc_intr == IFLIB_INTR_LEGACY) {
1765		if (vmxnet3_read_bar1(sc, VMXNET3_BAR1_INTR) == 0)
1766			return (FILTER_HANDLED);
1767	}
1768	if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE)
1769		vmxnet3_intr_disable_all(ctx);
1770
1771	if (sc->vmx_ds->event != 0)
1772		iflib_admin_intr_deferred(ctx);
1773
1774	/*
1775	 * XXX - When there is both rxq and event activity, do we care
1776	 * whether the rxq 0 task or the admin task re-enables the interrupt
1777	 * first?
1778	 */
1779	return (FILTER_SCHEDULE_THREAD);
1780}
1781
1782static int
1783vmxnet3_rxq_intr(void *vrxq)
1784{
1785	struct vmxnet3_softc *sc;
1786	struct vmxnet3_rxqueue *rxq;
1787
1788	rxq = vrxq;
1789	sc = rxq->vxrxq_sc;
1790
1791	if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE)
1792		vmxnet3_disable_intr(sc, rxq->vxrxq_intr_idx);
1793
1794	return (FILTER_SCHEDULE_THREAD);
1795}
1796
1797static int
1798vmxnet3_event_intr(void *vsc)
1799{
1800	struct vmxnet3_softc *sc;
1801
1802	sc = vsc;
1803
1804	if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE)
1805		vmxnet3_disable_intr(sc, sc->vmx_event_intr_idx);
1806
1807	/*
1808	 * The work will be done via vmxnet3_update_admin_status(), and the
1809	 * interrupt will be re-enabled in vmxnet3_link_intr_enable().
1810	 *
1811	 * The interrupt will be re-enabled by vmxnet3_link_intr_enable().
1812	 */
1813	return (FILTER_SCHEDULE_THREAD);
1814}
1815
1816static void
1817vmxnet3_stop(if_ctx_t ctx)
1818{
1819	struct vmxnet3_softc *sc;
1820
1821	sc = iflib_get_softc(ctx);
1822
1823	sc->vmx_link_active = 0;
1824	vmxnet3_write_cmd(sc, VMXNET3_CMD_DISABLE);
1825	vmxnet3_write_cmd(sc, VMXNET3_CMD_RESET);
1826}
1827
1828static void
1829vmxnet3_txinit(struct vmxnet3_softc *sc, struct vmxnet3_txqueue *txq)
1830{
1831	struct vmxnet3_txring *txr;
1832	struct vmxnet3_comp_ring *txc;
1833
1834	txq->vxtxq_last_flush = -1;
1835
1836	txr = &txq->vxtxq_cmd_ring;
1837	txr->vxtxr_next = 0;
1838	txr->vxtxr_gen = VMXNET3_INIT_GEN;
1839	/*
1840	 * iflib has zeroed out the descriptor array during the prior attach
1841	 * or stop
1842	 */
1843
1844	txc = &txq->vxtxq_comp_ring;
1845	txc->vxcr_next = 0;
1846	txc->vxcr_gen = VMXNET3_INIT_GEN;
1847	/*
1848	 * iflib has zeroed out the descriptor array during the prior attach
1849	 * or stop
1850	 */
1851}
1852
1853static void
1854vmxnet3_rxinit(struct vmxnet3_softc *sc, struct vmxnet3_rxqueue *rxq)
1855{
1856	struct vmxnet3_rxring *rxr;
1857	struct vmxnet3_comp_ring *rxc;
1858	int i;
1859
1860	/*
1861	 * The descriptors will be populated with buffers during a
1862	 * subsequent invocation of vmxnet3_isc_rxd_refill()
1863	 */
1864	for (i = 0; i < sc->vmx_sctx->isc_nrxqs - 1; i++) {
1865		rxr = &rxq->vxrxq_cmd_ring[i];
1866		rxr->vxrxr_gen = VMXNET3_INIT_GEN;
1867		rxr->vxrxr_desc_skips = 0;
1868		rxr->vxrxr_refill_start = 0;
1869		/*
1870		 * iflib has zeroed out the descriptor array during the
1871		 * prior attach or stop
1872		 */
1873	}
1874
1875	for (/**/; i < VMXNET3_RXRINGS_PERQ; i++) {
1876		rxr = &rxq->vxrxq_cmd_ring[i];
1877		rxr->vxrxr_gen = 0;
1878		rxr->vxrxr_desc_skips = 0;
1879		rxr->vxrxr_refill_start = 0;
1880		bzero(rxr->vxrxr_rxd,
1881		    rxr->vxrxr_ndesc * sizeof(struct vmxnet3_rxdesc));
1882	}
1883
1884	rxc = &rxq->vxrxq_comp_ring;
1885	rxc->vxcr_next = 0;
1886	rxc->vxcr_gen = VMXNET3_INIT_GEN;
1887	rxc->vxcr_zero_length = 0;
1888	rxc->vxcr_pkt_errors = 0;
1889	/*
1890	 * iflib has zeroed out the descriptor array during the prior attach
1891	 * or stop
1892	 */
1893}
1894
1895static void
1896vmxnet3_reinit_queues(struct vmxnet3_softc *sc)
1897{
1898	if_softc_ctx_t scctx;
1899	int q;
1900
1901	scctx = sc->vmx_scctx;
1902
1903	for (q = 0; q < scctx->isc_ntxqsets; q++)
1904		vmxnet3_txinit(sc, &sc->vmx_txq[q]);
1905
1906	for (q = 0; q < scctx->isc_nrxqsets; q++)
1907		vmxnet3_rxinit(sc, &sc->vmx_rxq[q]);
1908}
1909
1910static int
1911vmxnet3_enable_device(struct vmxnet3_softc *sc)
1912{
1913	if_softc_ctx_t scctx;
1914	int q;
1915
1916	scctx = sc->vmx_scctx;
1917
1918	if (vmxnet3_read_cmd(sc, VMXNET3_CMD_ENABLE) != 0) {
1919		device_printf(sc->vmx_dev, "device enable command failed!\n");
1920		return (1);
1921	}
1922
1923	/* Reset the Rx queue heads. */
1924	for (q = 0; q < scctx->isc_nrxqsets; q++) {
1925		vmxnet3_write_bar0(sc, VMXNET3_BAR0_RXH1(q), 0);
1926		vmxnet3_write_bar0(sc, VMXNET3_BAR0_RXH2(q), 0);
1927	}
1928
1929	return (0);
1930}
1931
1932static void
1933vmxnet3_reinit_rxfilters(struct vmxnet3_softc *sc)
1934{
1935	struct ifnet *ifp;
1936
1937	ifp = sc->vmx_ifp;
1938
1939	vmxnet3_set_rxfilter(sc, if_getflags(ifp));
1940
1941	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
1942		bcopy(sc->vmx_vlan_filter, sc->vmx_ds->vlan_filter,
1943		    sizeof(sc->vmx_ds->vlan_filter));
1944	else
1945		bzero(sc->vmx_ds->vlan_filter,
1946		    sizeof(sc->vmx_ds->vlan_filter));
1947	vmxnet3_write_cmd(sc, VMXNET3_CMD_VLAN_FILTER);
1948}
1949
1950static void
1951vmxnet3_init(if_ctx_t ctx)
1952{
1953	struct vmxnet3_softc *sc;
1954
1955	sc = iflib_get_softc(ctx);
1956
1957	/* Use the current MAC address. */
1958	bcopy(IF_LLADDR(sc->vmx_ifp), sc->vmx_lladdr, ETHER_ADDR_LEN);
1959	vmxnet3_set_lladdr(sc);
1960
1961	vmxnet3_reinit_shared_data(sc);
1962	vmxnet3_reinit_queues(sc);
1963
1964	vmxnet3_enable_device(sc);
1965
1966	vmxnet3_reinit_rxfilters(sc);
1967	vmxnet3_link_status(sc);
1968}
1969
1970static void
1971vmxnet3_multi_set(if_ctx_t ctx)
1972{
1973
1974	vmxnet3_set_rxfilter(iflib_get_softc(ctx),
1975	    if_getflags(iflib_get_ifp(ctx)));
1976}
1977
1978static int
1979vmxnet3_mtu_set(if_ctx_t ctx, uint32_t mtu)
1980{
1981	struct vmxnet3_softc *sc;
1982	if_softc_ctx_t scctx;
1983
1984	sc = iflib_get_softc(ctx);
1985	scctx = sc->vmx_scctx;
1986
1987	if (mtu > VMXNET3_TX_MAXSIZE - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN +
1988		ETHER_CRC_LEN))
1989		return (EINVAL);
1990
1991	/*
1992	 * Update the max frame size so that the rx mbuf size is
1993	 * chosen based on the new mtu during the interface init that
1994	 * will occur after this routine returns.
1995	 */
1996	scctx->isc_max_frame_size = mtu +
1997		ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + ETHER_CRC_LEN;
1998	/* RX completion queue - n/a */
1999	scctx->isc_rxd_buf_size[0] = 0;
2000	/*
2001	 * For header-type descriptors (used for first segment of
2002	 * packet), let iflib determine the buffer size based on the
2003	 * max frame size.
2004	 */
2005	scctx->isc_rxd_buf_size[1] = 0;
2006	/*
2007	 * For body-type descriptors (used for jumbo frames and LRO),
2008	 * always use page-sized buffers.
2009	 */
2010	scctx->isc_rxd_buf_size[2] = MJUMPAGESIZE;
2011
2012	return (0);
2013}
2014
2015static void
2016vmxnet3_media_status(if_ctx_t ctx, struct ifmediareq * ifmr)
2017{
2018	struct vmxnet3_softc *sc;
2019
2020	sc = iflib_get_softc(ctx);
2021
2022	ifmr->ifm_status = IFM_AVALID;
2023	ifmr->ifm_active = IFM_ETHER;
2024
2025	if (vmxnet3_link_is_up(sc) != 0) {
2026		ifmr->ifm_status |= IFM_ACTIVE;
2027		ifmr->ifm_active |= IFM_AUTO;
2028	} else
2029		ifmr->ifm_active |= IFM_NONE;
2030}
2031
2032static int
2033vmxnet3_media_change(if_ctx_t ctx)
2034{
2035
2036	/* Ignore. */
2037	return (0);
2038}
2039
2040static int
2041vmxnet3_promisc_set(if_ctx_t ctx, int flags)
2042{
2043
2044	vmxnet3_set_rxfilter(iflib_get_softc(ctx), flags);
2045
2046	return (0);
2047}
2048
2049static uint64_t
2050vmxnet3_get_counter(if_ctx_t ctx, ift_counter cnt)
2051{
2052	if_t ifp = iflib_get_ifp(ctx);
2053
2054	if (cnt < IFCOUNTERS)
2055		return if_get_counter_default(ifp, cnt);
2056
2057	return (0);
2058}
2059
2060static void
2061vmxnet3_update_admin_status(if_ctx_t ctx)
2062{
2063	struct vmxnet3_softc *sc;
2064
2065	sc = iflib_get_softc(ctx);
2066	if (sc->vmx_ds->event != 0)
2067		vmxnet3_evintr(sc);
2068
2069	vmxnet3_refresh_host_stats(sc);
2070}
2071
2072static void
2073vmxnet3_txq_timer(if_ctx_t ctx, uint16_t qid)
2074{
2075	/* Host stats refresh is global, so just trigger it on txq 0 */
2076	if (qid == 0)
2077		vmxnet3_refresh_host_stats(iflib_get_softc(ctx));
2078}
2079
2080static void
2081vmxnet3_update_vlan_filter(struct vmxnet3_softc *sc, int add, uint16_t tag)
2082{
2083	int idx, bit;
2084
2085	if (tag == 0 || tag > 4095)
2086		return;
2087
2088	idx = (tag >> 5) & 0x7F;
2089	bit = tag & 0x1F;
2090
2091	/* Update our private VLAN bitvector. */
2092	if (add)
2093		sc->vmx_vlan_filter[idx] |= (1 << bit);
2094	else
2095		sc->vmx_vlan_filter[idx] &= ~(1 << bit);
2096}
2097
2098static void
2099vmxnet3_vlan_register(if_ctx_t ctx, uint16_t tag)
2100{
2101
2102	vmxnet3_update_vlan_filter(iflib_get_softc(ctx), 1, tag);
2103}
2104
2105static void
2106vmxnet3_vlan_unregister(if_ctx_t ctx, uint16_t tag)
2107{
2108
2109	vmxnet3_update_vlan_filter(iflib_get_softc(ctx), 0, tag);
2110}
2111
2112static u_int
2113vmxnet3_hash_maddr(void *arg, struct sockaddr_dl *sdl, u_int count)
2114{
2115	struct vmxnet3_softc *sc = arg;
2116
2117	if (count < VMXNET3_MULTICAST_MAX)
2118		bcopy(LLADDR(sdl), &sc->vmx_mcast[count * ETHER_ADDR_LEN],
2119		    ETHER_ADDR_LEN);
2120
2121	return (1);
2122}
2123
2124static void
2125vmxnet3_set_rxfilter(struct vmxnet3_softc *sc, int flags)
2126{
2127	struct ifnet *ifp;
2128	struct vmxnet3_driver_shared *ds;
2129	u_int mode;
2130
2131	ifp = sc->vmx_ifp;
2132	ds = sc->vmx_ds;
2133
2134	mode = VMXNET3_RXMODE_UCAST | VMXNET3_RXMODE_BCAST;
2135	if (flags & IFF_PROMISC)
2136		mode |= VMXNET3_RXMODE_PROMISC;
2137	if (flags & IFF_ALLMULTI)
2138		mode |= VMXNET3_RXMODE_ALLMULTI;
2139	else {
2140		int cnt;
2141
2142		cnt = if_foreach_llmaddr(ifp, vmxnet3_hash_maddr, sc);
2143		if (cnt >= VMXNET3_MULTICAST_MAX) {
2144			cnt = 0;
2145			mode |= VMXNET3_RXMODE_ALLMULTI;
2146		} else if (cnt > 0)
2147			mode |= VMXNET3_RXMODE_MCAST;
2148		ds->mcast_tablelen = cnt * ETHER_ADDR_LEN;
2149	}
2150
2151	ds->rxmode = mode;
2152
2153	vmxnet3_write_cmd(sc, VMXNET3_CMD_SET_FILTER);
2154	vmxnet3_write_cmd(sc, VMXNET3_CMD_SET_RXMODE);
2155}
2156
2157static void
2158vmxnet3_refresh_host_stats(struct vmxnet3_softc *sc)
2159{
2160
2161	vmxnet3_write_cmd(sc, VMXNET3_CMD_GET_STATS);
2162}
2163
2164static int
2165vmxnet3_link_is_up(struct vmxnet3_softc *sc)
2166{
2167	uint32_t status;
2168
2169	status = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_LINK);
2170	return !!(status & 0x1);
2171}
2172
2173static void
2174vmxnet3_link_status(struct vmxnet3_softc *sc)
2175{
2176	if_ctx_t ctx;
2177	uint64_t speed;
2178	int link;
2179
2180	ctx = sc->vmx_ctx;
2181	link = vmxnet3_link_is_up(sc);
2182	speed = IF_Gbps(10);
2183
2184	if (link != 0 && sc->vmx_link_active == 0) {
2185		sc->vmx_link_active = 1;
2186		iflib_link_state_change(ctx, LINK_STATE_UP, speed);
2187	} else if (link == 0 && sc->vmx_link_active != 0) {
2188		sc->vmx_link_active = 0;
2189		iflib_link_state_change(ctx, LINK_STATE_DOWN, speed);
2190	}
2191}
2192
2193static void
2194vmxnet3_set_lladdr(struct vmxnet3_softc *sc)
2195{
2196	uint32_t ml, mh;
2197
2198	ml  = sc->vmx_lladdr[0];
2199	ml |= sc->vmx_lladdr[1] << 8;
2200	ml |= sc->vmx_lladdr[2] << 16;
2201	ml |= sc->vmx_lladdr[3] << 24;
2202	vmxnet3_write_bar1(sc, VMXNET3_BAR1_MACL, ml);
2203
2204	mh  = sc->vmx_lladdr[4];
2205	mh |= sc->vmx_lladdr[5] << 8;
2206	vmxnet3_write_bar1(sc, VMXNET3_BAR1_MACH, mh);
2207}
2208
2209static void
2210vmxnet3_get_lladdr(struct vmxnet3_softc *sc)
2211{
2212	uint32_t ml, mh;
2213
2214	ml = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_MACL);
2215	mh = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_MACH);
2216
2217	sc->vmx_lladdr[0] = ml;
2218	sc->vmx_lladdr[1] = ml >> 8;
2219	sc->vmx_lladdr[2] = ml >> 16;
2220	sc->vmx_lladdr[3] = ml >> 24;
2221	sc->vmx_lladdr[4] = mh;
2222	sc->vmx_lladdr[5] = mh >> 8;
2223}
2224
2225static void
2226vmxnet3_setup_txq_sysctl(struct vmxnet3_txqueue *txq,
2227    struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child)
2228{
2229	struct sysctl_oid *node, *txsnode;
2230	struct sysctl_oid_list *list, *txslist;
2231	struct UPT1_TxStats *txstats;
2232	char namebuf[16];
2233
2234	txstats = &txq->vxtxq_ts->stats;
2235
2236	snprintf(namebuf, sizeof(namebuf), "txq%d", txq->vxtxq_id);
2237	node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
2238	    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Transmit Queue");
2239	txq->vxtxq_sysctl = list = SYSCTL_CHILDREN(node);
2240
2241	/*
2242	 * Add statistics reported by the host. These are updated by the
2243	 * iflib txq timer on txq 0.
2244	 */
2245	txsnode = SYSCTL_ADD_NODE(ctx, list, OID_AUTO, "hstats",
2246	    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Host Statistics");
2247	txslist = SYSCTL_CHILDREN(txsnode);
2248	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "tso_packets", CTLFLAG_RD,
2249	    &txstats->TSO_packets, "TSO packets");
2250	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "tso_bytes", CTLFLAG_RD,
2251	    &txstats->TSO_bytes, "TSO bytes");
2252	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "ucast_packets", CTLFLAG_RD,
2253	    &txstats->ucast_packets, "Unicast packets");
2254	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "unicast_bytes", CTLFLAG_RD,
2255	    &txstats->ucast_bytes, "Unicast bytes");
2256	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "mcast_packets", CTLFLAG_RD,
2257	    &txstats->mcast_packets, "Multicast packets");
2258	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "mcast_bytes", CTLFLAG_RD,
2259	    &txstats->mcast_bytes, "Multicast bytes");
2260	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "error", CTLFLAG_RD,
2261	    &txstats->error, "Errors");
2262	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "discard", CTLFLAG_RD,
2263	    &txstats->discard, "Discards");
2264}
2265
2266static void
2267vmxnet3_setup_rxq_sysctl(struct vmxnet3_rxqueue *rxq,
2268    struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child)
2269{
2270	struct sysctl_oid *node, *rxsnode;
2271	struct sysctl_oid_list *list, *rxslist;
2272	struct UPT1_RxStats *rxstats;
2273	char namebuf[16];
2274
2275	rxstats = &rxq->vxrxq_rs->stats;
2276
2277	snprintf(namebuf, sizeof(namebuf), "rxq%d", rxq->vxrxq_id);
2278	node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
2279	    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Receive Queue");
2280	rxq->vxrxq_sysctl = list = SYSCTL_CHILDREN(node);
2281
2282	/*
2283	 * Add statistics reported by the host. These are updated by the
2284	 * iflib txq timer on txq 0.
2285	 */
2286	rxsnode = SYSCTL_ADD_NODE(ctx, list, OID_AUTO, "hstats",
2287	    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Host Statistics");
2288	rxslist = SYSCTL_CHILDREN(rxsnode);
2289	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "lro_packets", CTLFLAG_RD,
2290	    &rxstats->LRO_packets, "LRO packets");
2291	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "lro_bytes", CTLFLAG_RD,
2292	    &rxstats->LRO_bytes, "LRO bytes");
2293	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "ucast_packets", CTLFLAG_RD,
2294	    &rxstats->ucast_packets, "Unicast packets");
2295	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "unicast_bytes", CTLFLAG_RD,
2296	    &rxstats->ucast_bytes, "Unicast bytes");
2297	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "mcast_packets", CTLFLAG_RD,
2298	    &rxstats->mcast_packets, "Multicast packets");
2299	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "mcast_bytes", CTLFLAG_RD,
2300	    &rxstats->mcast_bytes, "Multicast bytes");
2301	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "bcast_packets", CTLFLAG_RD,
2302	    &rxstats->bcast_packets, "Broadcast packets");
2303	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "bcast_bytes", CTLFLAG_RD,
2304	    &rxstats->bcast_bytes, "Broadcast bytes");
2305	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "nobuffer", CTLFLAG_RD,
2306	    &rxstats->nobuffer, "No buffer");
2307	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "error", CTLFLAG_RD,
2308	    &rxstats->error, "Errors");
2309}
2310
2311static void
2312vmxnet3_setup_debug_sysctl(struct vmxnet3_softc *sc,
2313    struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child)
2314{
2315	if_softc_ctx_t scctx;
2316	struct sysctl_oid *node;
2317	struct sysctl_oid_list *list;
2318	int i;
2319
2320	scctx = sc->vmx_scctx;
2321
2322	for (i = 0; i < scctx->isc_ntxqsets; i++) {
2323		struct vmxnet3_txqueue *txq = &sc->vmx_txq[i];
2324
2325		node = SYSCTL_ADD_NODE(ctx, txq->vxtxq_sysctl, OID_AUTO,
2326		    "debug", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "");
2327		list = SYSCTL_CHILDREN(node);
2328
2329		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd_next", CTLFLAG_RD,
2330		    &txq->vxtxq_cmd_ring.vxtxr_next, 0, "");
2331		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd_ndesc", CTLFLAG_RD,
2332		    &txq->vxtxq_cmd_ring.vxtxr_ndesc, 0, "");
2333		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "cmd_gen", CTLFLAG_RD,
2334		    &txq->vxtxq_cmd_ring.vxtxr_gen, 0, "");
2335		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "comp_next", CTLFLAG_RD,
2336		    &txq->vxtxq_comp_ring.vxcr_next, 0, "");
2337		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "comp_ndesc", CTLFLAG_RD,
2338		    &txq->vxtxq_comp_ring.vxcr_ndesc, 0,"");
2339		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "comp_gen", CTLFLAG_RD,
2340		    &txq->vxtxq_comp_ring.vxcr_gen, 0, "");
2341	}
2342
2343	for (i = 0; i < scctx->isc_nrxqsets; i++) {
2344		struct vmxnet3_rxqueue *rxq = &sc->vmx_rxq[i];
2345
2346		node = SYSCTL_ADD_NODE(ctx, rxq->vxrxq_sysctl, OID_AUTO,
2347		    "debug", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "");
2348		list = SYSCTL_CHILDREN(node);
2349
2350		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd0_ndesc", CTLFLAG_RD,
2351		    &rxq->vxrxq_cmd_ring[0].vxrxr_ndesc, 0, "");
2352		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "cmd0_gen", CTLFLAG_RD,
2353		    &rxq->vxrxq_cmd_ring[0].vxrxr_gen, 0, "");
2354		SYSCTL_ADD_U64(ctx, list, OID_AUTO, "cmd0_desc_skips", CTLFLAG_RD,
2355		    &rxq->vxrxq_cmd_ring[0].vxrxr_desc_skips, 0, "");
2356		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd1_ndesc", CTLFLAG_RD,
2357		    &rxq->vxrxq_cmd_ring[1].vxrxr_ndesc, 0, "");
2358		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "cmd1_gen", CTLFLAG_RD,
2359		    &rxq->vxrxq_cmd_ring[1].vxrxr_gen, 0, "");
2360		SYSCTL_ADD_U64(ctx, list, OID_AUTO, "cmd1_desc_skips", CTLFLAG_RD,
2361		    &rxq->vxrxq_cmd_ring[1].vxrxr_desc_skips, 0, "");
2362		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "comp_ndesc", CTLFLAG_RD,
2363		    &rxq->vxrxq_comp_ring.vxcr_ndesc, 0,"");
2364		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "comp_gen", CTLFLAG_RD,
2365		    &rxq->vxrxq_comp_ring.vxcr_gen, 0, "");
2366		SYSCTL_ADD_U64(ctx, list, OID_AUTO, "comp_zero_length", CTLFLAG_RD,
2367		    &rxq->vxrxq_comp_ring.vxcr_zero_length, 0, "");
2368		SYSCTL_ADD_U64(ctx, list, OID_AUTO, "comp_pkt_errors", CTLFLAG_RD,
2369		    &rxq->vxrxq_comp_ring.vxcr_pkt_errors, 0, "");
2370	}
2371}
2372
2373static void
2374vmxnet3_setup_queue_sysctl(struct vmxnet3_softc *sc,
2375    struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child)
2376{
2377	if_softc_ctx_t scctx;
2378	int i;
2379
2380	scctx = sc->vmx_scctx;
2381
2382	for (i = 0; i < scctx->isc_ntxqsets; i++)
2383		vmxnet3_setup_txq_sysctl(&sc->vmx_txq[i], ctx, child);
2384	for (i = 0; i < scctx->isc_nrxqsets; i++)
2385		vmxnet3_setup_rxq_sysctl(&sc->vmx_rxq[i], ctx, child);
2386
2387	vmxnet3_setup_debug_sysctl(sc, ctx, child);
2388}
2389
2390static void
2391vmxnet3_setup_sysctl(struct vmxnet3_softc *sc)
2392{
2393	device_t dev;
2394	struct sysctl_ctx_list *ctx;
2395	struct sysctl_oid *tree;
2396	struct sysctl_oid_list *child;
2397
2398	dev = sc->vmx_dev;
2399	ctx = device_get_sysctl_ctx(dev);
2400	tree = device_get_sysctl_tree(dev);
2401	child = SYSCTL_CHILDREN(tree);
2402
2403	vmxnet3_setup_queue_sysctl(sc, ctx, child);
2404}
2405
2406static void
2407vmxnet3_write_bar0(struct vmxnet3_softc *sc, bus_size_t r, uint32_t v)
2408{
2409
2410	bus_space_write_4(sc->vmx_iot0, sc->vmx_ioh0, r, v);
2411}
2412
2413static uint32_t
2414vmxnet3_read_bar1(struct vmxnet3_softc *sc, bus_size_t r)
2415{
2416
2417	return (bus_space_read_4(sc->vmx_iot1, sc->vmx_ioh1, r));
2418}
2419
2420static void
2421vmxnet3_write_bar1(struct vmxnet3_softc *sc, bus_size_t r, uint32_t v)
2422{
2423
2424	bus_space_write_4(sc->vmx_iot1, sc->vmx_ioh1, r, v);
2425}
2426
2427static void
2428vmxnet3_write_cmd(struct vmxnet3_softc *sc, uint32_t cmd)
2429{
2430
2431	vmxnet3_write_bar1(sc, VMXNET3_BAR1_CMD, cmd);
2432}
2433
2434static uint32_t
2435vmxnet3_read_cmd(struct vmxnet3_softc *sc, uint32_t cmd)
2436{
2437
2438	vmxnet3_write_cmd(sc, cmd);
2439	bus_space_barrier(sc->vmx_iot1, sc->vmx_ioh1, 0, 0,
2440	    BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE);
2441	return (vmxnet3_read_bar1(sc, VMXNET3_BAR1_CMD));
2442}
2443
2444static void
2445vmxnet3_enable_intr(struct vmxnet3_softc *sc, int irq)
2446{
2447
2448	vmxnet3_write_bar0(sc, VMXNET3_BAR0_IMASK(irq), 0);
2449}
2450
2451static void
2452vmxnet3_disable_intr(struct vmxnet3_softc *sc, int irq)
2453{
2454
2455	vmxnet3_write_bar0(sc, VMXNET3_BAR0_IMASK(irq), 1);
2456}
2457
2458static int
2459vmxnet3_tx_queue_intr_enable(if_ctx_t ctx, uint16_t qid)
2460{
2461	/* Not using interrupts for TX */
2462	return (0);
2463}
2464
2465static int
2466vmxnet3_rx_queue_intr_enable(if_ctx_t ctx, uint16_t qid)
2467{
2468	struct vmxnet3_softc *sc;
2469
2470	sc = iflib_get_softc(ctx);
2471	vmxnet3_enable_intr(sc, sc->vmx_rxq[qid].vxrxq_intr_idx);
2472	return (0);
2473}
2474
2475static void
2476vmxnet3_link_intr_enable(if_ctx_t ctx)
2477{
2478	struct vmxnet3_softc *sc;
2479
2480	sc = iflib_get_softc(ctx);
2481	vmxnet3_enable_intr(sc, sc->vmx_event_intr_idx);
2482}
2483
2484static void
2485vmxnet3_intr_enable_all(if_ctx_t ctx)
2486{
2487	struct vmxnet3_softc *sc;
2488	if_softc_ctx_t scctx;
2489	int i;
2490
2491	sc = iflib_get_softc(ctx);
2492	scctx = sc->vmx_scctx;
2493	sc->vmx_ds->ictrl &= ~VMXNET3_ICTRL_DISABLE_ALL;
2494	for (i = 0; i < scctx->isc_vectors; i++)
2495		vmxnet3_enable_intr(sc, i);
2496}
2497
2498static void
2499vmxnet3_intr_disable_all(if_ctx_t ctx)
2500{
2501	struct vmxnet3_softc *sc;
2502	int i;
2503
2504	sc = iflib_get_softc(ctx);
2505	/*
2506	 * iflib may invoke this routine before vmxnet3_attach_post() has
2507	 * run, which is before the top level shared data area is
2508	 * initialized and the device made aware of it.
2509	 */
2510	if (sc->vmx_ds != NULL)
2511		sc->vmx_ds->ictrl |= VMXNET3_ICTRL_DISABLE_ALL;
2512	for (i = 0; i < VMXNET3_MAX_INTRS; i++)
2513		vmxnet3_disable_intr(sc, i);
2514}
2515
2516/*
2517 * Since this is a purely paravirtualized device, we do not have
2518 * to worry about DMA coherency. But at times, we must make sure
2519 * both the compiler and CPU do not reorder memory operations.
2520 */
2521static inline void
2522vmxnet3_barrier(struct vmxnet3_softc *sc, vmxnet3_barrier_t type)
2523{
2524
2525	switch (type) {
2526	case VMXNET3_BARRIER_RD:
2527		rmb();
2528		break;
2529	case VMXNET3_BARRIER_WR:
2530		wmb();
2531		break;
2532	case VMXNET3_BARRIER_RDWR:
2533		mb();
2534		break;
2535	default:
2536		panic("%s: bad barrier type %d", __func__, type);
2537	}
2538}
2539