xref: /illumos-gate/usr/src/uts/common/io/vioif/vioif.c (revision d4221574)
18a324c92SDan McDonald /*
28a324c92SDan McDonald  * This file and its contents are supplied under the terms of the
38a324c92SDan McDonald  * Common Development and Distribution License ("CDDL"), version 1.0.
48a324c92SDan McDonald  * You may only use this file in accordance with the terms of version
58a324c92SDan McDonald  * 1.0 of the CDDL.
68a324c92SDan McDonald  *
78a324c92SDan McDonald  * A full copy of the text of the CDDL should have accompanied this
88a324c92SDan McDonald  * source.  A copy of the CDDL is also available via the Internet at
98a324c92SDan McDonald  * http://www.illumos.org/license/CDDL.
108a324c92SDan McDonald  */
118a324c92SDan McDonald 
128a324c92SDan McDonald /*
138a324c92SDan McDonald  * Copyright 2013 Nexenta Inc.  All rights reserved.
14970db7b7SDan Kimmel  * Copyright (c) 2014, 2016 by Delphix. All rights reserved.
1535d41f28SJason King  * Copyright 2021 Joyent, Inc.
16aefa9c84SJoshua M. Clulow  * Copyright 2019 Joshua M. Clulow <josh@sysmgr.org>
178a324c92SDan McDonald  */
188a324c92SDan McDonald 
198a324c92SDan McDonald /* Based on the NetBSD virtio driver by Minoura Makoto. */
208a324c92SDan McDonald /*
218a324c92SDan McDonald  * Copyright (c) 2010 Minoura Makoto.
228a324c92SDan McDonald  * All rights reserved.
238a324c92SDan McDonald  *
248a324c92SDan McDonald  * Redistribution and use in source and binary forms, with or without
258a324c92SDan McDonald  * modification, are permitted provided that the following conditions
268a324c92SDan McDonald  * are met:
278a324c92SDan McDonald  * 1. Redistributions of source code must retain the above copyright
288a324c92SDan McDonald  *    notice, this list of conditions and the following disclaimer.
298a324c92SDan McDonald  * 2. Redistributions in binary form must reproduce the above copyright
308a324c92SDan McDonald  *    notice, this list of conditions and the following disclaimer in the
318a324c92SDan McDonald  *    documentation and/or other materials provided with the distribution.
328a324c92SDan McDonald  *
338a324c92SDan McDonald  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
348a324c92SDan McDonald  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
358a324c92SDan McDonald  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
368a324c92SDan McDonald  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
378a324c92SDan McDonald  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
388a324c92SDan McDonald  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
398a324c92SDan McDonald  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
408a324c92SDan McDonald  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
418a324c92SDan McDonald  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
428a324c92SDan McDonald  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
438a324c92SDan McDonald  */
448a324c92SDan McDonald 
45f8296c60SJoshua M. Clulow /*
46f8296c60SJoshua M. Clulow  * VIRTIO NETWORK DRIVER
47f8296c60SJoshua M. Clulow  */
48f8296c60SJoshua M. Clulow 
498a324c92SDan McDonald #include <sys/types.h>
508a324c92SDan McDonald #include <sys/errno.h>
518a324c92SDan McDonald #include <sys/param.h>
528a324c92SDan McDonald #include <sys/stropts.h>
538a324c92SDan McDonald #include <sys/stream.h>
548a324c92SDan McDonald #include <sys/strsubr.h>
558a324c92SDan McDonald #include <sys/kmem.h>
568a324c92SDan McDonald #include <sys/conf.h>
578a324c92SDan McDonald #include <sys/devops.h>
588a324c92SDan McDonald #include <sys/ksynch.h>
598a324c92SDan McDonald #include <sys/stat.h>
608a324c92SDan McDonald #include <sys/modctl.h>
618a324c92SDan McDonald #include <sys/debug.h>
628a324c92SDan McDonald #include <sys/pci.h>
638a324c92SDan McDonald #include <sys/ethernet.h>
648a324c92SDan McDonald #include <sys/vlan.h>
65f8296c60SJoshua M. Clulow #include <sys/sysmacros.h>
66aefa9c84SJoshua M. Clulow #include <sys/smbios.h>
678a324c92SDan McDonald 
688a324c92SDan McDonald #include <sys/dlpi.h>
698a324c92SDan McDonald #include <sys/taskq.h>
708a324c92SDan McDonald 
718a324c92SDan McDonald #include <sys/pattr.h>
728a324c92SDan McDonald #include <sys/strsun.h>
738a324c92SDan McDonald 
748a324c92SDan McDonald #include <sys/random.h>
7594c3dad2SToomas Soome #include <sys/containerof.h>
768a324c92SDan McDonald #include <sys/stream.h>
77d240edafSRobert Mustacchi #include <inet/tcp.h>
788a324c92SDan McDonald 
798a324c92SDan McDonald #include <sys/mac.h>
808a324c92SDan McDonald #include <sys/mac_provider.h>
818a324c92SDan McDonald #include <sys/mac_ether.h>
828a324c92SDan McDonald 
83f8296c60SJoshua M. Clulow #include "virtio.h"
84f8296c60SJoshua M. Clulow #include "vioif.h"
858a324c92SDan McDonald 
86*d4221574SAndy Fiddaman /*
87*d4221574SAndy Fiddaman  * While most hypervisors support the control queue, older versions of bhyve
88*d4221574SAndy Fiddaman  * on illumos did not. To allow the historic behaviour of the illumos vioif
89*d4221574SAndy Fiddaman  * driver, the following tuneable causes us to pretend that the request always
90*d4221574SAndy Fiddaman  * succeeds if the underlying virtual device does not have support.
91*d4221574SAndy Fiddaman  */
92*d4221574SAndy Fiddaman int vioif_fake_promisc_success = 1;
938a324c92SDan McDonald 
94f8296c60SJoshua M. Clulow static int vioif_quiesce(dev_info_t *);
95f8296c60SJoshua M. Clulow static int vioif_attach(dev_info_t *, ddi_attach_cmd_t);
96f8296c60SJoshua M. Clulow static int vioif_detach(dev_info_t *, ddi_detach_cmd_t);
97f8296c60SJoshua M. Clulow static boolean_t vioif_has_feature(vioif_t *, uint32_t);
98f8296c60SJoshua M. Clulow static void vioif_reclaim_restart(vioif_t *);
99f8296c60SJoshua M. Clulow static int vioif_m_stat(void *, uint_t, uint64_t *);
100f8296c60SJoshua M. Clulow static void vioif_m_stop(void *);
101f8296c60SJoshua M. Clulow static int vioif_m_start(void *);
102f8296c60SJoshua M. Clulow static int vioif_m_multicst(void *, boolean_t, const uint8_t *);
103f8296c60SJoshua M. Clulow static int vioif_m_setpromisc(void *, boolean_t);
104f8296c60SJoshua M. Clulow static int vioif_m_unicst(void *, const uint8_t *);
105f8296c60SJoshua M. Clulow static mblk_t *vioif_m_tx(void *, mblk_t *);
106f8296c60SJoshua M. Clulow static int vioif_m_setprop(void *, const char *, mac_prop_id_t, uint_t,
107f8296c60SJoshua M. Clulow     const void *);
108f8296c60SJoshua M. Clulow static int vioif_m_getprop(void *, const char *, mac_prop_id_t, uint_t, void *);
109f8296c60SJoshua M. Clulow static void vioif_m_propinfo(void *, const char *, mac_prop_id_t,
110f8296c60SJoshua M. Clulow     mac_prop_info_handle_t);
111f8296c60SJoshua M. Clulow static boolean_t vioif_m_getcapab(void *, mac_capab_t, void *);
112f8296c60SJoshua M. Clulow static uint_t vioif_add_rx(vioif_t *);
113f8296c60SJoshua M. Clulow 
114f8296c60SJoshua M. Clulow 
115f8296c60SJoshua M. Clulow static struct cb_ops vioif_cb_ops = {
116f8296c60SJoshua M. Clulow 	.cb_rev =			CB_REV,
117f8296c60SJoshua M. Clulow 	.cb_flag =			D_MP | D_NEW,
118f8296c60SJoshua M. Clulow 
119f8296c60SJoshua M. Clulow 	.cb_open =			nulldev,
120f8296c60SJoshua M. Clulow 	.cb_close =			nulldev,
121f8296c60SJoshua M. Clulow 	.cb_strategy =			nodev,
122f8296c60SJoshua M. Clulow 	.cb_print =			nodev,
123f8296c60SJoshua M. Clulow 	.cb_dump =			nodev,
124f8296c60SJoshua M. Clulow 	.cb_read =			nodev,
125f8296c60SJoshua M. Clulow 	.cb_write =			nodev,
126f8296c60SJoshua M. Clulow 	.cb_ioctl =			nodev,
127f8296c60SJoshua M. Clulow 	.cb_devmap =			nodev,
128f8296c60SJoshua M. Clulow 	.cb_mmap =			nodev,
129f8296c60SJoshua M. Clulow 	.cb_segmap =			nodev,
130f8296c60SJoshua M. Clulow 	.cb_chpoll =			nochpoll,
131f8296c60SJoshua M. Clulow 	.cb_prop_op =			ddi_prop_op,
132f8296c60SJoshua M. Clulow 	.cb_str =			NULL,
133f8296c60SJoshua M. Clulow 	.cb_aread =			nodev,
134f8296c60SJoshua M. Clulow 	.cb_awrite =			nodev,
1358a324c92SDan McDonald };
1368a324c92SDan McDonald 
137f8296c60SJoshua M. Clulow static struct dev_ops vioif_dev_ops = {
138f8296c60SJoshua M. Clulow 	.devo_rev =			DEVO_REV,
139f8296c60SJoshua M. Clulow 	.devo_refcnt =			0,
1408a324c92SDan McDonald 
141f8296c60SJoshua M. Clulow 	.devo_attach =			vioif_attach,
142f8296c60SJoshua M. Clulow 	.devo_detach =			vioif_detach,
143f8296c60SJoshua M. Clulow 	.devo_quiesce =			vioif_quiesce,
1448a324c92SDan McDonald 
145f8296c60SJoshua M. Clulow 	.devo_cb_ops =			&vioif_cb_ops,
1468a324c92SDan McDonald 
147f8296c60SJoshua M. Clulow 	.devo_getinfo =			NULL,
148f8296c60SJoshua M. Clulow 	.devo_identify =		nulldev,
149f8296c60SJoshua M. Clulow 	.devo_probe =			nulldev,
150f8296c60SJoshua M. Clulow 	.devo_reset =			nodev,
151f8296c60SJoshua M. Clulow 	.devo_bus_ops =			NULL,
152f8296c60SJoshua M. Clulow 	.devo_power =			NULL,
1538a324c92SDan McDonald };
1548a324c92SDan McDonald 
155f8296c60SJoshua M. Clulow static struct modldrv vioif_modldrv = {
156f8296c60SJoshua M. Clulow 	.drv_modops =			&mod_driverops,
157f8296c60SJoshua M. Clulow 	.drv_linkinfo =			"VIRTIO network driver",
158f8296c60SJoshua M. Clulow 	.drv_dev_ops =			&vioif_dev_ops
1598a324c92SDan McDonald };
1608a324c92SDan McDonald 
161f8296c60SJoshua M. Clulow static struct modlinkage vioif_modlinkage = {
162f8296c60SJoshua M. Clulow 	.ml_rev =			MODREV_1,
163f8296c60SJoshua M. Clulow 	.ml_linkage =			{ &vioif_modldrv, NULL }
1648a324c92SDan McDonald };
1658a324c92SDan McDonald 
166f8296c60SJoshua M. Clulow static mac_callbacks_t vioif_mac_callbacks = {
167f8296c60SJoshua M. Clulow 	.mc_getstat =			vioif_m_stat,
168f8296c60SJoshua M. Clulow 	.mc_start =			vioif_m_start,
169f8296c60SJoshua M. Clulow 	.mc_stop =			vioif_m_stop,
170f8296c60SJoshua M. Clulow 	.mc_setpromisc =		vioif_m_setpromisc,
171f8296c60SJoshua M. Clulow 	.mc_multicst =			vioif_m_multicst,
172f8296c60SJoshua M. Clulow 	.mc_unicst =			vioif_m_unicst,
173f8296c60SJoshua M. Clulow 	.mc_tx =			vioif_m_tx,
174f8296c60SJoshua M. Clulow 
175f8296c60SJoshua M. Clulow 	.mc_callbacks =			(MC_GETCAPAB | MC_SETPROP |
176f8296c60SJoshua M. Clulow 					    MC_GETPROP | MC_PROPINFO),
177f8296c60SJoshua M. Clulow 	.mc_getcapab =			vioif_m_getcapab,
178f8296c60SJoshua M. Clulow 	.mc_setprop =			vioif_m_setprop,
179f8296c60SJoshua M. Clulow 	.mc_getprop =			vioif_m_getprop,
180f8296c60SJoshua M. Clulow 	.mc_propinfo =			vioif_m_propinfo,
1818a324c92SDan McDonald };
1828a324c92SDan McDonald 
183f8296c60SJoshua M. Clulow static const uchar_t vioif_broadcast[ETHERADDRL] = {
184f8296c60SJoshua M. Clulow 	0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF
1858a324c92SDan McDonald };
1868a324c92SDan McDonald 
1878a324c92SDan McDonald /*
188f8296c60SJoshua M. Clulow  * Interval for the periodic TX reclaim.
1898a324c92SDan McDonald  */
190f8296c60SJoshua M. Clulow uint_t vioif_reclaim_ms = 200;
1918a324c92SDan McDonald 
192aefa9c84SJoshua M. Clulow /*
193aefa9c84SJoshua M. Clulow  * Allow the operator to override the kinds of interrupts we'll use for
194aefa9c84SJoshua M. Clulow  * vioif.  This value defaults to -1 so that it can be overridden to 0 in
195aefa9c84SJoshua M. Clulow  * /etc/system.
196aefa9c84SJoshua M. Clulow  */
197aefa9c84SJoshua M. Clulow int vioif_allowed_int_types = -1;
198aefa9c84SJoshua M. Clulow 
1998a324c92SDan McDonald /*
200f8296c60SJoshua M. Clulow  * DMA attribute template for transmit and receive buffers.  The SGL entry
201f8296c60SJoshua M. Clulow  * count will be modified before using the template.  Note that these
202f8296c60SJoshua M. Clulow  * allocations are aligned so that VIOIF_HEADER_SKIP places the IP header in
203f8296c60SJoshua M. Clulow  * received frames at the correct offset for the networking stack.
2048a324c92SDan McDonald  */
205f8296c60SJoshua M. Clulow ddi_dma_attr_t vioif_dma_attr_bufs = {
206f8296c60SJoshua M. Clulow 	.dma_attr_version =		DMA_ATTR_V0,
207f8296c60SJoshua M. Clulow 	.dma_attr_addr_lo =		0x0000000000000000,
208f8296c60SJoshua M. Clulow 	.dma_attr_addr_hi =		0xFFFFFFFFFFFFFFFF,
209f8296c60SJoshua M. Clulow 	.dma_attr_count_max =		0x00000000FFFFFFFF,
210f8296c60SJoshua M. Clulow 	.dma_attr_align =		VIOIF_HEADER_ALIGN,
211f8296c60SJoshua M. Clulow 	.dma_attr_burstsizes =		1,
212f8296c60SJoshua M. Clulow 	.dma_attr_minxfer =		1,
213f8296c60SJoshua M. Clulow 	.dma_attr_maxxfer =		0x00000000FFFFFFFF,
214f8296c60SJoshua M. Clulow 	.dma_attr_seg =			0x00000000FFFFFFFF,
215f8296c60SJoshua M. Clulow 	.dma_attr_sgllen =		0,
216f8296c60SJoshua M. Clulow 	.dma_attr_granular =		1,
217f8296c60SJoshua M. Clulow 	.dma_attr_flags =		0
2188a324c92SDan McDonald };
2198a324c92SDan McDonald 
2208a324c92SDan McDonald /*
221f8296c60SJoshua M. Clulow  * DMA attributes for mapping larger transmit buffers from the networking
222f8296c60SJoshua M. Clulow  * stack.  The requirements are quite loose, but note that the SGL entry length
223f8296c60SJoshua M. Clulow  * field is 32-bit.
2248a324c92SDan McDonald  */
225f8296c60SJoshua M. Clulow ddi_dma_attr_t vioif_dma_attr_external = {
226f8296c60SJoshua M. Clulow 	.dma_attr_version =		DMA_ATTR_V0,
227f8296c60SJoshua M. Clulow 	.dma_attr_addr_lo =		0x0000000000000000,
228f8296c60SJoshua M. Clulow 	.dma_attr_addr_hi =		0xFFFFFFFFFFFFFFFF,
229f8296c60SJoshua M. Clulow 	.dma_attr_count_max =		0x00000000FFFFFFFF,
230f8296c60SJoshua M. Clulow 	.dma_attr_align =		1,
231f8296c60SJoshua M. Clulow 	.dma_attr_burstsizes =		1,
232f8296c60SJoshua M. Clulow 	.dma_attr_minxfer =		1,
233f8296c60SJoshua M. Clulow 	.dma_attr_maxxfer =		0x00000000FFFFFFFF,
234f8296c60SJoshua M. Clulow 	.dma_attr_seg =			0x00000000FFFFFFFF,
235f8296c60SJoshua M. Clulow 	.dma_attr_sgllen =		VIOIF_MAX_SEGS - 1,
236f8296c60SJoshua M. Clulow 	.dma_attr_granular =		1,
237f8296c60SJoshua M. Clulow 	.dma_attr_flags =		0
2388a324c92SDan McDonald };
2398a324c92SDan McDonald 
2408a324c92SDan McDonald 
2418a324c92SDan McDonald /*
242f8296c60SJoshua M. Clulow  * VIRTIO NET MAC PROPERTIES
2438a324c92SDan McDonald  */
244f8296c60SJoshua M. Clulow #define	VIOIF_MACPROP_TXCOPY_THRESH	"_txcopy_thresh"
245f8296c60SJoshua M. Clulow #define	VIOIF_MACPROP_TXCOPY_THRESH_DEF	300
246f8296c60SJoshua M. Clulow #define	VIOIF_MACPROP_TXCOPY_THRESH_MAX	640
2478a324c92SDan McDonald 
248f8296c60SJoshua M. Clulow #define	VIOIF_MACPROP_RXCOPY_THRESH	"_rxcopy_thresh"
249f8296c60SJoshua M. Clulow #define	VIOIF_MACPROP_RXCOPY_THRESH_DEF	300
250f8296c60SJoshua M. Clulow #define	VIOIF_MACPROP_RXCOPY_THRESH_MAX	640
2518a324c92SDan McDonald 
2528a324c92SDan McDonald static char *vioif_priv_props[] = {
253f8296c60SJoshua M. Clulow 	VIOIF_MACPROP_TXCOPY_THRESH,
254f8296c60SJoshua M. Clulow 	VIOIF_MACPROP_RXCOPY_THRESH,
2558a324c92SDan McDonald 	NULL
2568a324c92SDan McDonald };
2578a324c92SDan McDonald 
2589e0bf232SPatrick Mooney 
259f8296c60SJoshua M. Clulow static vioif_txbuf_t *
vioif_txbuf_alloc(vioif_t * vif)260f8296c60SJoshua M. Clulow vioif_txbuf_alloc(vioif_t *vif)
2618a324c92SDan McDonald {
262f8296c60SJoshua M. Clulow 	vioif_txbuf_t *tb;
2638a324c92SDan McDonald 
264f8296c60SJoshua M. Clulow 	VERIFY(MUTEX_HELD(&vif->vif_mutex));
2658a324c92SDan McDonald 
266f8296c60SJoshua M. Clulow 	if ((tb = list_remove_head(&vif->vif_txbufs)) != NULL) {
267f8296c60SJoshua M. Clulow 		vif->vif_ntxbufs_alloc++;
2688a324c92SDan McDonald 	}
2698a324c92SDan McDonald 
270f8296c60SJoshua M. Clulow 	return (tb);
2718a324c92SDan McDonald }
2728a324c92SDan McDonald 
2738a324c92SDan McDonald static void
vioif_txbuf_free(vioif_t * vif,vioif_txbuf_t * tb)274f8296c60SJoshua M. Clulow vioif_txbuf_free(vioif_t *vif, vioif_txbuf_t *tb)
2758a324c92SDan McDonald {
276f8296c60SJoshua M. Clulow 	VERIFY(MUTEX_HELD(&vif->vif_mutex));
277f8296c60SJoshua M. Clulow 
278f8296c60SJoshua M. Clulow 	VERIFY3U(vif->vif_ntxbufs_alloc, >, 0);
279f8296c60SJoshua M. Clulow 	vif->vif_ntxbufs_alloc--;
2808a324c92SDan McDonald 
281f8296c60SJoshua M. Clulow 	virtio_chain_clear(tb->tb_chain);
282f8296c60SJoshua M. Clulow 	list_insert_head(&vif->vif_txbufs, tb);
2838a324c92SDan McDonald }
2848a324c92SDan McDonald 
285f8296c60SJoshua M. Clulow static vioif_rxbuf_t *
vioif_rxbuf_alloc(vioif_t * vif)286f8296c60SJoshua M. Clulow vioif_rxbuf_alloc(vioif_t *vif)
2878a324c92SDan McDonald {
288f8296c60SJoshua M. Clulow 	vioif_rxbuf_t *rb;
2898a324c92SDan McDonald 
290f8296c60SJoshua M. Clulow 	VERIFY(MUTEX_HELD(&vif->vif_mutex));
2918a324c92SDan McDonald 
292f8296c60SJoshua M. Clulow 	if ((rb = list_remove_head(&vif->vif_rxbufs)) != NULL) {
293f8296c60SJoshua M. Clulow 		vif->vif_nrxbufs_alloc++;
2948a324c92SDan McDonald 	}
2958a324c92SDan McDonald 
296f8296c60SJoshua M. Clulow 	return (rb);
297f8296c60SJoshua M. Clulow }
2988a324c92SDan McDonald 
299f8296c60SJoshua M. Clulow static void
vioif_rxbuf_free(vioif_t * vif,vioif_rxbuf_t * rb)300f8296c60SJoshua M. Clulow vioif_rxbuf_free(vioif_t *vif, vioif_rxbuf_t *rb)
301f8296c60SJoshua M. Clulow {
302f8296c60SJoshua M. Clulow 	VERIFY(MUTEX_HELD(&vif->vif_mutex));
3038a324c92SDan McDonald 
304f8296c60SJoshua M. Clulow 	VERIFY3U(vif->vif_nrxbufs_alloc, >, 0);
305f8296c60SJoshua M. Clulow 	vif->vif_nrxbufs_alloc--;
3068a324c92SDan McDonald 
307f8296c60SJoshua M. Clulow 	virtio_chain_clear(rb->rb_chain);
308f8296c60SJoshua M. Clulow 	list_insert_head(&vif->vif_rxbufs, rb);
3098a324c92SDan McDonald }
3108a324c92SDan McDonald 
3118a324c92SDan McDonald static void
vioif_rx_free_callback(caddr_t free_arg)312f8296c60SJoshua M. Clulow vioif_rx_free_callback(caddr_t free_arg)
3138a324c92SDan McDonald {
314f8296c60SJoshua M. Clulow 	vioif_rxbuf_t *rb = (vioif_rxbuf_t *)free_arg;
315f8296c60SJoshua M. Clulow 	vioif_t *vif = rb->rb_vioif;
316f8296c60SJoshua M. Clulow 
317f8296c60SJoshua M. Clulow 	mutex_enter(&vif->vif_mutex);
318f8296c60SJoshua M. Clulow 
319f8296c60SJoshua M. Clulow 	/*
320f8296c60SJoshua M. Clulow 	 * Return this receive buffer to the free list.
321f8296c60SJoshua M. Clulow 	 */
322f8296c60SJoshua M. Clulow 	vioif_rxbuf_free(vif, rb);
323f8296c60SJoshua M. Clulow 
324f8296c60SJoshua M. Clulow 	VERIFY3U(vif->vif_nrxbufs_onloan, >, 0);
325f8296c60SJoshua M. Clulow 	vif->vif_nrxbufs_onloan--;
3268a324c92SDan McDonald 
327f8296c60SJoshua M. Clulow 	/*
328f8296c60SJoshua M. Clulow 	 * Attempt to replenish the receive queue with at least the buffer we
329f8296c60SJoshua M. Clulow 	 * just freed.  There isn't a great way to deal with failure here,
330f8296c60SJoshua M. Clulow 	 * though because we'll only loan at most half of the buffers there
331f8296c60SJoshua M. Clulow 	 * should always be at least some available even if this fails.
332f8296c60SJoshua M. Clulow 	 */
333f8296c60SJoshua M. Clulow 	(void) vioif_add_rx(vif);
3348a324c92SDan McDonald 
335f8296c60SJoshua M. Clulow 	mutex_exit(&vif->vif_mutex);
3368a324c92SDan McDonald }
3378a324c92SDan McDonald 
33835d41f28SJason King static vioif_ctrlbuf_t *
vioif_ctrlbuf_alloc(vioif_t * vif)33935d41f28SJason King vioif_ctrlbuf_alloc(vioif_t *vif)
34035d41f28SJason King {
34135d41f28SJason King 	vioif_ctrlbuf_t *cb;
34235d41f28SJason King 
34335d41f28SJason King 	VERIFY(MUTEX_HELD(&vif->vif_mutex));
34435d41f28SJason King 
34535d41f28SJason King 	if ((cb = list_remove_head(&vif->vif_ctrlbufs)) != NULL) {
34635d41f28SJason King 		vif->vif_nctrlbufs_alloc++;
34735d41f28SJason King 	}
34835d41f28SJason King 
34935d41f28SJason King 	return (cb);
35035d41f28SJason King }
35135d41f28SJason King 
35235d41f28SJason King static void
vioif_ctrlbuf_free(vioif_t * vif,vioif_ctrlbuf_t * cb)35335d41f28SJason King vioif_ctrlbuf_free(vioif_t *vif, vioif_ctrlbuf_t *cb)
35435d41f28SJason King {
35535d41f28SJason King 	VERIFY(MUTEX_HELD(&vif->vif_mutex));
35635d41f28SJason King 
35735d41f28SJason King 	VERIFY3U(vif->vif_nctrlbufs_alloc, >, 0);
35835d41f28SJason King 	vif->vif_nctrlbufs_alloc--;
35935d41f28SJason King 
36035d41f28SJason King 	virtio_chain_clear(cb->cb_chain);
36135d41f28SJason King 	list_insert_head(&vif->vif_ctrlbufs, cb);
36235d41f28SJason King }
36335d41f28SJason King 
3648a324c92SDan McDonald static void
vioif_free_bufs(vioif_t * vif)365f8296c60SJoshua M. Clulow vioif_free_bufs(vioif_t *vif)
3668a324c92SDan McDonald {
367f8296c60SJoshua M. Clulow 	VERIFY(MUTEX_HELD(&vif->vif_mutex));
3688a324c92SDan McDonald 
369f8296c60SJoshua M. Clulow 	VERIFY3U(vif->vif_ntxbufs_alloc, ==, 0);
370f8296c60SJoshua M. Clulow 	for (uint_t i = 0; i < vif->vif_txbufs_capacity; i++) {
371f8296c60SJoshua M. Clulow 		vioif_txbuf_t *tb = &vif->vif_txbufs_mem[i];
3728a324c92SDan McDonald 
373f8296c60SJoshua M. Clulow 		/*
374f8296c60SJoshua M. Clulow 		 * Ensure that this txbuf is now in the free list:
375f8296c60SJoshua M. Clulow 		 */
376f8296c60SJoshua M. Clulow 		VERIFY(list_link_active(&tb->tb_link));
377f8296c60SJoshua M. Clulow 		list_remove(&vif->vif_txbufs, tb);
3788a324c92SDan McDonald 
379f8296c60SJoshua M. Clulow 		/*
380f8296c60SJoshua M. Clulow 		 * We should not have an mblk chain at this point.
381f8296c60SJoshua M. Clulow 		 */
382f8296c60SJoshua M. Clulow 		VERIFY3P(tb->tb_mp, ==, NULL);
3838a324c92SDan McDonald 
384f8296c60SJoshua M. Clulow 		if (tb->tb_dma != NULL) {
385f8296c60SJoshua M. Clulow 			virtio_dma_free(tb->tb_dma);
386f8296c60SJoshua M. Clulow 			tb->tb_dma = NULL;
387f8296c60SJoshua M. Clulow 		}
3888a324c92SDan McDonald 
389f8296c60SJoshua M. Clulow 		if (tb->tb_chain != NULL) {
390f8296c60SJoshua M. Clulow 			virtio_chain_free(tb->tb_chain);
391f8296c60SJoshua M. Clulow 			tb->tb_chain = NULL;
392f8296c60SJoshua M. Clulow 		}
393f8296c60SJoshua M. Clulow 
394f8296c60SJoshua M. Clulow 		if (tb->tb_dmaext != NULL) {
395f8296c60SJoshua M. Clulow 			for (uint_t j = 0; j < tb->tb_dmaext_capacity; j++) {
396f8296c60SJoshua M. Clulow 				if (tb->tb_dmaext[j] != NULL) {
397f8296c60SJoshua M. Clulow 					virtio_dma_free(
398f8296c60SJoshua M. Clulow 					    tb->tb_dmaext[j]);
399f8296c60SJoshua M. Clulow 					tb->tb_dmaext[j] = NULL;
400f8296c60SJoshua M. Clulow 				}
401f8296c60SJoshua M. Clulow 			}
4028a324c92SDan McDonald 
403f8296c60SJoshua M. Clulow 			kmem_free(tb->tb_dmaext,
404f8296c60SJoshua M. Clulow 			    sizeof (virtio_dma_t *) * tb->tb_dmaext_capacity);
405f8296c60SJoshua M. Clulow 			tb->tb_dmaext = NULL;
406f8296c60SJoshua M. Clulow 			tb->tb_dmaext_capacity = 0;
407f8296c60SJoshua M. Clulow 		}
408f8296c60SJoshua M. Clulow 	}
409f8296c60SJoshua M. Clulow 	VERIFY(list_is_empty(&vif->vif_txbufs));
410f8296c60SJoshua M. Clulow 	if (vif->vif_txbufs_mem != NULL) {
411f8296c60SJoshua M. Clulow 		kmem_free(vif->vif_txbufs_mem,
412f8296c60SJoshua M. Clulow 		    sizeof (vioif_txbuf_t) * vif->vif_txbufs_capacity);
413f8296c60SJoshua M. Clulow 		vif->vif_txbufs_mem = NULL;
414f8296c60SJoshua M. Clulow 		vif->vif_txbufs_capacity = 0;
4158a324c92SDan McDonald 	}
4168a324c92SDan McDonald 
417f8296c60SJoshua M. Clulow 	VERIFY3U(vif->vif_nrxbufs_alloc, ==, 0);
418f8296c60SJoshua M. Clulow 	for (uint_t i = 0; i < vif->vif_rxbufs_capacity; i++) {
419f8296c60SJoshua M. Clulow 		vioif_rxbuf_t *rb = &vif->vif_rxbufs_mem[i];
4208a324c92SDan McDonald 
421f8296c60SJoshua M. Clulow 		/*
422f8296c60SJoshua M. Clulow 		 * Ensure that this rxbuf is now in the free list:
423f8296c60SJoshua M. Clulow 		 */
424f8296c60SJoshua M. Clulow 		VERIFY(list_link_active(&rb->rb_link));
425f8296c60SJoshua M. Clulow 		list_remove(&vif->vif_rxbufs, rb);
4268a324c92SDan McDonald 
427f8296c60SJoshua M. Clulow 		if (rb->rb_dma != NULL) {
428f8296c60SJoshua M. Clulow 			virtio_dma_free(rb->rb_dma);
429f8296c60SJoshua M. Clulow 			rb->rb_dma = NULL;
430f8296c60SJoshua M. Clulow 		}
431f8296c60SJoshua M. Clulow 
432f8296c60SJoshua M. Clulow 		if (rb->rb_chain != NULL) {
433f8296c60SJoshua M. Clulow 			virtio_chain_free(rb->rb_chain);
434f8296c60SJoshua M. Clulow 			rb->rb_chain = NULL;
435f8296c60SJoshua M. Clulow 		}
436f8296c60SJoshua M. Clulow 	}
437f8296c60SJoshua M. Clulow 	VERIFY(list_is_empty(&vif->vif_rxbufs));
438f8296c60SJoshua M. Clulow 	if (vif->vif_rxbufs_mem != NULL) {
439f8296c60SJoshua M. Clulow 		kmem_free(vif->vif_rxbufs_mem,
440f8296c60SJoshua M. Clulow 		    sizeof (vioif_rxbuf_t) * vif->vif_rxbufs_capacity);
441f8296c60SJoshua M. Clulow 		vif->vif_rxbufs_mem = NULL;
442f8296c60SJoshua M. Clulow 		vif->vif_rxbufs_capacity = 0;
4438a324c92SDan McDonald 	}
44435d41f28SJason King 
44535d41f28SJason King 	if (vif->vif_has_ctrlq) {
44635d41f28SJason King 		VERIFY3U(vif->vif_nctrlbufs_alloc, ==, 0);
44735d41f28SJason King 		for (uint_t i = 0; i < vif->vif_ctrlbufs_capacity; i++) {
44835d41f28SJason King 			vioif_ctrlbuf_t *cb = &vif->vif_ctrlbufs_mem[i];
44935d41f28SJason King 
45035d41f28SJason King 			/*
45135d41f28SJason King 			 * Ensure that this ctrlbuf is now in the free list
45235d41f28SJason King 			 */
45335d41f28SJason King 			VERIFY(list_link_active(&cb->cb_link));
45435d41f28SJason King 			list_remove(&vif->vif_ctrlbufs, cb);
45535d41f28SJason King 
45635d41f28SJason King 			if (cb->cb_dma != NULL) {
45735d41f28SJason King 				virtio_dma_free(cb->cb_dma);
45835d41f28SJason King 				cb->cb_dma = NULL;
45935d41f28SJason King 			}
46035d41f28SJason King 
46135d41f28SJason King 			if (cb->cb_chain != NULL) {
46235d41f28SJason King 				virtio_chain_free(cb->cb_chain);
46335d41f28SJason King 				cb->cb_chain = NULL;
46435d41f28SJason King 			}
46535d41f28SJason King 		}
46635d41f28SJason King 		VERIFY(list_is_empty(&vif->vif_ctrlbufs));
46735d41f28SJason King 		if (vif->vif_ctrlbufs_mem != NULL) {
46835d41f28SJason King 			kmem_free(vif->vif_ctrlbufs_mem,
46935d41f28SJason King 			    sizeof (vioif_ctrlbuf_t) *
47035d41f28SJason King 			    vif->vif_ctrlbufs_capacity);
47135d41f28SJason King 			vif->vif_ctrlbufs_mem = NULL;
47235d41f28SJason King 			vif->vif_ctrlbufs_capacity = 0;
47335d41f28SJason King 		}
47435d41f28SJason King 	}
4758a324c92SDan McDonald }
4768a324c92SDan McDonald 
4778a324c92SDan McDonald static int
vioif_alloc_bufs(vioif_t * vif)478f8296c60SJoshua M. Clulow vioif_alloc_bufs(vioif_t *vif)
4798a324c92SDan McDonald {
480f8296c60SJoshua M. Clulow 	VERIFY(MUTEX_HELD(&vif->vif_mutex));
4818a324c92SDan McDonald 
482f8296c60SJoshua M. Clulow 	/*
483f8296c60SJoshua M. Clulow 	 * Allocate one contiguous chunk of memory for the transmit and receive
484f8296c60SJoshua M. Clulow 	 * buffer tracking objects.  If the ring is unusually small, we'll
485f8296c60SJoshua M. Clulow 	 * reduce our target buffer count accordingly.
486f8296c60SJoshua M. Clulow 	 */
487f8296c60SJoshua M. Clulow 	vif->vif_txbufs_capacity = MIN(VIRTIO_NET_TX_BUFS,
488f8296c60SJoshua M. Clulow 	    virtio_queue_size(vif->vif_tx_vq));
489f8296c60SJoshua M. Clulow 	vif->vif_txbufs_mem = kmem_zalloc(
490f8296c60SJoshua M. Clulow 	    sizeof (vioif_txbuf_t) * vif->vif_txbufs_capacity, KM_SLEEP);
491f8296c60SJoshua M. Clulow 	list_create(&vif->vif_txbufs, sizeof (vioif_txbuf_t),
492f8296c60SJoshua M. Clulow 	    offsetof(vioif_txbuf_t, tb_link));
493f8296c60SJoshua M. Clulow 
494f8296c60SJoshua M. Clulow 	vif->vif_rxbufs_capacity = MIN(VIRTIO_NET_RX_BUFS,
495f8296c60SJoshua M. Clulow 	    virtio_queue_size(vif->vif_rx_vq));
496f8296c60SJoshua M. Clulow 	vif->vif_rxbufs_mem = kmem_zalloc(
497f8296c60SJoshua M. Clulow 	    sizeof (vioif_rxbuf_t) * vif->vif_rxbufs_capacity, KM_SLEEP);
498f8296c60SJoshua M. Clulow 	list_create(&vif->vif_rxbufs, sizeof (vioif_rxbuf_t),
499f8296c60SJoshua M. Clulow 	    offsetof(vioif_rxbuf_t, rb_link));
5008a324c92SDan McDonald 
50135d41f28SJason King 	if (vif->vif_has_ctrlq) {
50235d41f28SJason King 		vif->vif_ctrlbufs_capacity = MIN(VIRTIO_NET_CTRL_BUFS,
50335d41f28SJason King 		    virtio_queue_size(vif->vif_ctrl_vq));
50435d41f28SJason King 		vif->vif_ctrlbufs_mem = kmem_zalloc(
50535d41f28SJason King 		    sizeof (vioif_ctrlbuf_t) * vif->vif_ctrlbufs_capacity,
50635d41f28SJason King 		    KM_SLEEP);
50735d41f28SJason King 	}
50835d41f28SJason King 	list_create(&vif->vif_ctrlbufs, sizeof (vioif_ctrlbuf_t),
50935d41f28SJason King 	    offsetof(vioif_ctrlbuf_t, cb_link));
51035d41f28SJason King 
511f8296c60SJoshua M. Clulow 	/*
512f8296c60SJoshua M. Clulow 	 * Do not loan more than half of our allocated receive buffers into
513f8296c60SJoshua M. Clulow 	 * the networking stack.
514f8296c60SJoshua M. Clulow 	 */
515f8296c60SJoshua M. Clulow 	vif->vif_nrxbufs_onloan_max = vif->vif_rxbufs_capacity / 2;
5168a324c92SDan McDonald 
5178a324c92SDan McDonald 	/*
518f8296c60SJoshua M. Clulow 	 * Put everything in the free list straight away in order to simplify
519f8296c60SJoshua M. Clulow 	 * the use of vioif_free_bufs() for cleanup on allocation failure.
5208a324c92SDan McDonald 	 */
521f8296c60SJoshua M. Clulow 	for (uint_t i = 0; i < vif->vif_txbufs_capacity; i++) {
522f8296c60SJoshua M. Clulow 		list_insert_tail(&vif->vif_txbufs, &vif->vif_txbufs_mem[i]);
523f8296c60SJoshua M. Clulow 	}
524f8296c60SJoshua M. Clulow 	for (uint_t i = 0; i < vif->vif_rxbufs_capacity; i++) {
525f8296c60SJoshua M. Clulow 		list_insert_tail(&vif->vif_rxbufs, &vif->vif_rxbufs_mem[i]);
5268a324c92SDan McDonald 	}
52735d41f28SJason King 	for (uint_t i = 0; i < vif->vif_ctrlbufs_capacity; i++) {
52835d41f28SJason King 		list_insert_tail(&vif->vif_ctrlbufs, &vif->vif_ctrlbufs_mem[i]);
52935d41f28SJason King 	}
5308a324c92SDan McDonald 
531f8296c60SJoshua M. Clulow 	/*
532f8296c60SJoshua M. Clulow 	 * Start from the DMA attribute template common to both transmit and
533f8296c60SJoshua M. Clulow 	 * receive buffers.  The SGL entry count will be modified for each
534f8296c60SJoshua M. Clulow 	 * buffer type.
535f8296c60SJoshua M. Clulow 	 */
536f8296c60SJoshua M. Clulow 	ddi_dma_attr_t attr = vioif_dma_attr_bufs;
5378a324c92SDan McDonald 
538f8296c60SJoshua M. Clulow 	/*
539f8296c60SJoshua M. Clulow 	 * The transmit inline buffer is small (less than a page), so it's
540f8296c60SJoshua M. Clulow 	 * reasonable to request a single cookie.
541f8296c60SJoshua M. Clulow 	 */
542f8296c60SJoshua M. Clulow 	attr.dma_attr_sgllen = 1;
543f8296c60SJoshua M. Clulow 
544f8296c60SJoshua M. Clulow 	for (vioif_txbuf_t *tb = list_head(&vif->vif_txbufs); tb != NULL;
545f8296c60SJoshua M. Clulow 	    tb = list_next(&vif->vif_txbufs, tb)) {
546f8296c60SJoshua M. Clulow 		if ((tb->tb_dma = virtio_dma_alloc(vif->vif_virtio,
547f8296c60SJoshua M. Clulow 		    VIOIF_TX_INLINE_SIZE, &attr,
548f8296c60SJoshua M. Clulow 		    DDI_DMA_STREAMING | DDI_DMA_WRITE, KM_SLEEP)) == NULL) {
549f8296c60SJoshua M. Clulow 			goto fail;
5508a324c92SDan McDonald 		}
551f8296c60SJoshua M. Clulow 		VERIFY3U(virtio_dma_ncookies(tb->tb_dma), ==, 1);
5528a324c92SDan McDonald 
553f8296c60SJoshua M. Clulow 		if ((tb->tb_chain = virtio_chain_alloc(vif->vif_tx_vq,
554f8296c60SJoshua M. Clulow 		    KM_SLEEP)) == NULL) {
555f8296c60SJoshua M. Clulow 			goto fail;
5568a324c92SDan McDonald 		}
557f8296c60SJoshua M. Clulow 		virtio_chain_data_set(tb->tb_chain, tb);
5588a324c92SDan McDonald 
559f8296c60SJoshua M. Clulow 		tb->tb_dmaext_capacity = VIOIF_MAX_SEGS - 1;
560f8296c60SJoshua M. Clulow 		tb->tb_dmaext = kmem_zalloc(
561f8296c60SJoshua M. Clulow 		    sizeof (virtio_dma_t *) * tb->tb_dmaext_capacity,
562f8296c60SJoshua M. Clulow 		    KM_SLEEP);
563f8296c60SJoshua M. Clulow 	}
5648a324c92SDan McDonald 
56535d41f28SJason King 	/*
56635d41f28SJason King 	 * Control queue buffers are also small (less than a page), so we'll
56735d41f28SJason King 	 * also request a single cookie for them.
56835d41f28SJason King 	 */
56935d41f28SJason King 	for (vioif_ctrlbuf_t *cb = list_head(&vif->vif_ctrlbufs); cb != NULL;
57035d41f28SJason King 	    cb = list_next(&vif->vif_ctrlbufs, cb)) {
57135d41f28SJason King 		if ((cb->cb_dma = virtio_dma_alloc(vif->vif_virtio,
57235d41f28SJason King 		    VIOIF_CTRL_SIZE, &attr,
57335d41f28SJason King 		    DDI_DMA_STREAMING | DDI_DMA_RDWR, KM_SLEEP)) == NULL) {
57435d41f28SJason King 			goto fail;
57535d41f28SJason King 		}
57635d41f28SJason King 		VERIFY3U(virtio_dma_ncookies(cb->cb_dma), ==, 1);
57735d41f28SJason King 
57835d41f28SJason King 		if ((cb->cb_chain = virtio_chain_alloc(vif->vif_ctrl_vq,
57935d41f28SJason King 		    KM_SLEEP)) == NULL) {
58035d41f28SJason King 			goto fail;
58135d41f28SJason King 		}
58235d41f28SJason King 		virtio_chain_data_set(cb->cb_chain, cb);
58335d41f28SJason King 	}
58435d41f28SJason King 
585f8296c60SJoshua M. Clulow 	/*
586f8296c60SJoshua M. Clulow 	 * The receive buffers are larger, and we can tolerate a large number
587f8296c60SJoshua M. Clulow 	 * of segments.  Adjust the SGL entry count, setting aside one segment
588f8296c60SJoshua M. Clulow 	 * for the virtio net header.
589f8296c60SJoshua M. Clulow 	 */
590f8296c60SJoshua M. Clulow 	attr.dma_attr_sgllen = VIOIF_MAX_SEGS - 1;
591f8296c60SJoshua M. Clulow 
592f8296c60SJoshua M. Clulow 	for (vioif_rxbuf_t *rb = list_head(&vif->vif_rxbufs); rb != NULL;
593f8296c60SJoshua M. Clulow 	    rb = list_next(&vif->vif_rxbufs, rb)) {
594f8296c60SJoshua M. Clulow 		if ((rb->rb_dma = virtio_dma_alloc(vif->vif_virtio,
595f8296c60SJoshua M. Clulow 		    VIOIF_RX_BUF_SIZE, &attr, DDI_DMA_STREAMING | DDI_DMA_READ,
596f8296c60SJoshua M. Clulow 		    KM_SLEEP)) == NULL) {
597f8296c60SJoshua M. Clulow 			goto fail;
5988a324c92SDan McDonald 		}
5998a324c92SDan McDonald 
600f8296c60SJoshua M. Clulow 		if ((rb->rb_chain = virtio_chain_alloc(vif->vif_rx_vq,
601f8296c60SJoshua M. Clulow 		    KM_SLEEP)) == NULL) {
602f8296c60SJoshua M. Clulow 			goto fail;
603f8296c60SJoshua M. Clulow 		}
604f8296c60SJoshua M. Clulow 		virtio_chain_data_set(rb->rb_chain, rb);
6058a324c92SDan McDonald 
6068a324c92SDan McDonald 		/*
607f8296c60SJoshua M. Clulow 		 * Ensure that the first cookie is sufficient to cover the
608f8296c60SJoshua M. Clulow 		 * header skip region plus one byte.
6098a324c92SDan McDonald 		 */
610f8296c60SJoshua M. Clulow 		VERIFY3U(virtio_dma_cookie_size(rb->rb_dma, 0), >=,
611f8296c60SJoshua M. Clulow 		    VIOIF_HEADER_SKIP + 1);
6128a324c92SDan McDonald 
6138a324c92SDan McDonald 		/*
614f8296c60SJoshua M. Clulow 		 * Ensure that the frame data begins at a location with a
615f8296c60SJoshua M. Clulow 		 * correctly aligned IP header.
6168a324c92SDan McDonald 		 */
617f8296c60SJoshua M. Clulow 		VERIFY3U((uintptr_t)virtio_dma_va(rb->rb_dma,
618f8296c60SJoshua M. Clulow 		    VIOIF_HEADER_SKIP) % 4, ==, 2);
6198a324c92SDan McDonald 
620f8296c60SJoshua M. Clulow 		rb->rb_vioif = vif;
621f8296c60SJoshua M. Clulow 		rb->rb_frtn.free_func = vioif_rx_free_callback;
622f8296c60SJoshua M. Clulow 		rb->rb_frtn.free_arg = (caddr_t)rb;
6238a324c92SDan McDonald 	}
6248a324c92SDan McDonald 
625f8296c60SJoshua M. Clulow 	return (0);
6268a324c92SDan McDonald 
627f8296c60SJoshua M. Clulow fail:
628f8296c60SJoshua M. Clulow 	vioif_free_bufs(vif);
6298a324c92SDan McDonald 	return (ENOMEM);
6308a324c92SDan McDonald }
6318a324c92SDan McDonald 
63235d41f28SJason King static int
vioif_ctrlq_req(vioif_t * vif,uint8_t class,uint8_t cmd,void * data,size_t datalen)63335d41f28SJason King vioif_ctrlq_req(vioif_t *vif, uint8_t class, uint8_t cmd, void *data,
63435d41f28SJason King     size_t datalen)
63535d41f28SJason King {
63635d41f28SJason King 	vioif_ctrlbuf_t *cb = NULL;
63735d41f28SJason King 	virtio_chain_t *vic = NULL;
63835d41f28SJason King 	uint8_t *p = NULL;
63935d41f28SJason King 	uint64_t pa = 0;
64035d41f28SJason King 	uint8_t *ackp = NULL;
64135d41f28SJason King 	struct virtio_net_ctrlq_hdr hdr = {
64235d41f28SJason King 		.vnch_class = class,
64335d41f28SJason King 		.vnch_command = cmd,
64435d41f28SJason King 	};
64535d41f28SJason King 	const size_t hdrlen = sizeof (hdr);
64635d41f28SJason King 	const size_t acklen = 1; /* the ack is always 1 byte */
64735d41f28SJason King 	size_t totlen = hdrlen + datalen + acklen;
64835d41f28SJason King 	int r = DDI_SUCCESS;
64935d41f28SJason King 
65035d41f28SJason King 	/*
65135d41f28SJason King 	 * We shouldn't be called unless the ctrlq feature has been
65235d41f28SJason King 	 * negotiated with the host
65335d41f28SJason King 	 */
65435d41f28SJason King 	VERIFY(vif->vif_has_ctrlq);
65535d41f28SJason King 
65635d41f28SJason King 	mutex_enter(&vif->vif_mutex);
65735d41f28SJason King 	cb = vioif_ctrlbuf_alloc(vif);
65835d41f28SJason King 	if (cb == NULL) {
65935d41f28SJason King 		vif->vif_noctrlbuf++;
66035d41f28SJason King 		mutex_exit(&vif->vif_mutex);
66135d41f28SJason King 		r = DDI_FAILURE;
66235d41f28SJason King 		goto done;
66335d41f28SJason King 	}
66435d41f28SJason King 	mutex_exit(&vif->vif_mutex);
66535d41f28SJason King 
66635d41f28SJason King 	if (totlen > virtio_dma_size(cb->cb_dma)) {
66735d41f28SJason King 		vif->vif_ctrlbuf_toosmall++;
66835d41f28SJason King 		r = DDI_FAILURE;
66935d41f28SJason King 		goto done;
67035d41f28SJason King 	}
67135d41f28SJason King 
67235d41f28SJason King 	/*
67335d41f28SJason King 	 * Clear the entire buffer. Technically not necessary, but useful
67435d41f28SJason King 	 * if trying to troubleshoot an issue, and probably not a bad idea
67535d41f28SJason King 	 * to not let any old data linger.
67635d41f28SJason King 	 */
67735d41f28SJason King 	p = virtio_dma_va(cb->cb_dma, 0);
67835d41f28SJason King 	bzero(p, virtio_dma_size(cb->cb_dma));
67935d41f28SJason King 
68035d41f28SJason King 	/*
68135d41f28SJason King 	 * We currently do not support VIRTIO_F_ANY_LAYOUT. That means,
68235d41f28SJason King 	 * that we must put the header, the data, and the ack in their
68335d41f28SJason King 	 * own respective descriptors. Since all the currently supported
68435d41f28SJason King 	 * control queue commands take _very_ small amounts of data, we
68535d41f28SJason King 	 * use a single DMA buffer for all of it, but use 3 descriptors to
68635d41f28SJason King 	 * reference (respectively) the header, the data, and the ack byte
68735d41f28SJason King 	 * within that memory to adhere to the virtio spec.
68835d41f28SJason King 	 *
68935d41f28SJason King 	 * If we add support for control queue features such as custom
69035d41f28SJason King 	 * MAC filtering tables, which might require larger amounts of
69135d41f28SJason King 	 * memory, we likely will want to add more sophistication here
69235d41f28SJason King 	 * and optionally use additional allocated memory to hold that
69335d41f28SJason King 	 * data instead of a fixed size buffer.
69435d41f28SJason King 	 *
69535d41f28SJason King 	 * Copy the header.
69635d41f28SJason King 	 */
69735d41f28SJason King 	bcopy(&hdr, p, sizeof (hdr));
69835d41f28SJason King 	pa = virtio_dma_cookie_pa(cb->cb_dma, 0);
69935d41f28SJason King 	if ((r = virtio_chain_append(cb->cb_chain,
70035d41f28SJason King 	    pa, hdrlen, VIRTIO_DIR_DEVICE_READS)) != DDI_SUCCESS) {
70135d41f28SJason King 		goto done;
70235d41f28SJason King 	}
70335d41f28SJason King 
70435d41f28SJason King 	/*
70535d41f28SJason King 	 * Copy the request data
70635d41f28SJason King 	 */
70735d41f28SJason King 	p = virtio_dma_va(cb->cb_dma, hdrlen);
70835d41f28SJason King 	bcopy(data, p, datalen);
70935d41f28SJason King 	if ((r = virtio_chain_append(cb->cb_chain,
71035d41f28SJason King 	    pa + hdrlen, datalen, VIRTIO_DIR_DEVICE_READS)) != DDI_SUCCESS) {
71135d41f28SJason King 		goto done;
71235d41f28SJason King 	}
71335d41f28SJason King 
71435d41f28SJason King 	/*
71535d41f28SJason King 	 * We already cleared the buffer, so don't need to copy out a 0 for
71635d41f28SJason King 	 * the ack byte. Just add a descriptor for that spot.
71735d41f28SJason King 	 */
71835d41f28SJason King 	ackp = virtio_dma_va(cb->cb_dma, hdrlen + datalen);
71935d41f28SJason King 	if ((r = virtio_chain_append(cb->cb_chain,
72035d41f28SJason King 	    pa + hdrlen + datalen, acklen,
72135d41f28SJason King 	    VIRTIO_DIR_DEVICE_WRITES)) != DDI_SUCCESS) {
72235d41f28SJason King 		goto done;
72335d41f28SJason King 	}
72435d41f28SJason King 
72535d41f28SJason King 	virtio_dma_sync(cb->cb_dma, DDI_DMA_SYNC_FORDEV);
72635d41f28SJason King 	virtio_chain_submit(cb->cb_chain, B_TRUE);
72735d41f28SJason King 
72835d41f28SJason King 	/*
72935d41f28SJason King 	 * Spin waiting for response.
73035d41f28SJason King 	 */
73135d41f28SJason King 	mutex_enter(&vif->vif_mutex);
73235d41f28SJason King 	while ((vic = virtio_queue_poll(vif->vif_ctrl_vq)) == NULL) {
73335d41f28SJason King 		mutex_exit(&vif->vif_mutex);
73435d41f28SJason King 		delay(drv_usectohz(1000));
73535d41f28SJason King 		mutex_enter(&vif->vif_mutex);
73635d41f28SJason King 	}
73735d41f28SJason King 
73835d41f28SJason King 	virtio_dma_sync(cb->cb_dma, DDI_DMA_SYNC_FORCPU);
73935d41f28SJason King 	VERIFY3P(virtio_chain_data(vic), ==, cb);
74035d41f28SJason King 	mutex_exit(&vif->vif_mutex);
74135d41f28SJason King 
74235d41f28SJason King 	if (*ackp != VIRTIO_NET_CQ_OK) {
74335d41f28SJason King 		r = DDI_FAILURE;
74435d41f28SJason King 	}
74535d41f28SJason King 
74635d41f28SJason King done:
74735d41f28SJason King 	mutex_enter(&vif->vif_mutex);
74835d41f28SJason King 	vioif_ctrlbuf_free(vif, cb);
74935d41f28SJason King 	mutex_exit(&vif->vif_mutex);
75035d41f28SJason King 
75135d41f28SJason King 	return (r);
75235d41f28SJason King }
75335d41f28SJason King 
7549e0bf232SPatrick Mooney static int
vioif_m_multicst(void * arg,boolean_t add,const uint8_t * mcst_addr)755f8296c60SJoshua M. Clulow vioif_m_multicst(void *arg, boolean_t add, const uint8_t *mcst_addr)
7568a324c92SDan McDonald {
757f8296c60SJoshua M. Clulow 	/*
758f8296c60SJoshua M. Clulow 	 * Even though we currently do not have support for programming
759f8296c60SJoshua M. Clulow 	 * multicast filters, or even enabling promiscuous mode, we return
760f8296c60SJoshua M. Clulow 	 * success here to avoid the networking stack falling back to link
761f8296c60SJoshua M. Clulow 	 * layer broadcast for multicast traffic.  Some hypervisors already
762f8296c60SJoshua M. Clulow 	 * pass received multicast frames onto the guest, so at least on those
763f8296c60SJoshua M. Clulow 	 * systems multicast will work as expected anyway.
764f8296c60SJoshua M. Clulow 	 */
765f8296c60SJoshua M. Clulow 	return (0);
7668a324c92SDan McDonald }
7678a324c92SDan McDonald 
7689e0bf232SPatrick Mooney static int
vioif_m_setpromisc(void * arg,boolean_t on)769f8296c60SJoshua M. Clulow vioif_m_setpromisc(void *arg, boolean_t on)
7708a324c92SDan McDonald {
77135d41f28SJason King 	vioif_t *vif = arg;
77235d41f28SJason King 	uint8_t val = on ? 1 : 0;
77335d41f28SJason King 
77435d41f28SJason King 	if (!vif->vif_has_ctrlq_rx) {
775*d4221574SAndy Fiddaman 		if (vioif_fake_promisc_success)
776*d4221574SAndy Fiddaman 			return (0);
777*d4221574SAndy Fiddaman 
778*d4221574SAndy Fiddaman 		return (ENOTSUP);
77935d41f28SJason King 	}
78035d41f28SJason King 
78135d41f28SJason King 	return (vioif_ctrlq_req(vif, VIRTIO_NET_CTRL_RX,
78235d41f28SJason King 	    VIRTIO_NET_CTRL_RX_PROMISC, &val, sizeof (val)));
7838a324c92SDan McDonald }
7848a324c92SDan McDonald 
7859e0bf232SPatrick Mooney static int
vioif_m_unicst(void * arg,const uint8_t * mac)786f8296c60SJoshua M. Clulow vioif_m_unicst(void *arg, const uint8_t *mac)
7878a324c92SDan McDonald {
788f8296c60SJoshua M. Clulow 	return (ENOTSUP);
7898a324c92SDan McDonald }
7908a324c92SDan McDonald 
791970db7b7SDan Kimmel static uint_t
vioif_add_rx(vioif_t * vif)792f8296c60SJoshua M. Clulow vioif_add_rx(vioif_t *vif)
7938a324c92SDan McDonald {
794f8296c60SJoshua M. Clulow 	VERIFY(MUTEX_HELD(&vif->vif_mutex));
7958a324c92SDan McDonald 
796f8296c60SJoshua M. Clulow 	if (vif->vif_runstate != VIOIF_RUNSTATE_RUNNING) {
797970db7b7SDan Kimmel 		/*
798f8296c60SJoshua M. Clulow 		 * If the NIC is not running, do not give the device any
799f8296c60SJoshua M. Clulow 		 * receive buffers.
800970db7b7SDan Kimmel 		 */
801f8296c60SJoshua M. Clulow 		return (0);
802f8296c60SJoshua M. Clulow 	}
8038a324c92SDan McDonald 
804f8296c60SJoshua M. Clulow 	uint_t num_added = 0;
8058a324c92SDan McDonald 
806f8296c60SJoshua M. Clulow 	vioif_rxbuf_t *rb;
807f8296c60SJoshua M. Clulow 	while ((rb = vioif_rxbuf_alloc(vif)) != NULL) {
808970db7b7SDan Kimmel 		/*
809f8296c60SJoshua M. Clulow 		 * For legacy devices, and those that have not negotiated
810f8296c60SJoshua M. Clulow 		 * VIRTIO_F_ANY_LAYOUT, the virtio net header must appear in a
811f8296c60SJoshua M. Clulow 		 * separate descriptor entry to the rest of the buffer.
812970db7b7SDan Kimmel 		 */
813f8296c60SJoshua M. Clulow 		if (virtio_chain_append(rb->rb_chain,
814f8296c60SJoshua M. Clulow 		    virtio_dma_cookie_pa(rb->rb_dma, 0),
815f8296c60SJoshua M. Clulow 		    sizeof (struct virtio_net_hdr),
816f8296c60SJoshua M. Clulow 		    VIRTIO_DIR_DEVICE_WRITES) != DDI_SUCCESS) {
817f8296c60SJoshua M. Clulow 			goto fail;
818970db7b7SDan Kimmel 		}
8198a324c92SDan McDonald 
820f8296c60SJoshua M. Clulow 		for (uint_t n = 0; n < virtio_dma_ncookies(rb->rb_dma); n++) {
821f8296c60SJoshua M. Clulow 			uint64_t pa = virtio_dma_cookie_pa(rb->rb_dma, n);
822f8296c60SJoshua M. Clulow 			size_t sz = virtio_dma_cookie_size(rb->rb_dma, n);
8238a324c92SDan McDonald 
824f8296c60SJoshua M. Clulow 			if (n == 0) {
825f8296c60SJoshua M. Clulow 				pa += VIOIF_HEADER_SKIP;
826f8296c60SJoshua M. Clulow 				VERIFY3U(sz, >, VIOIF_HEADER_SKIP);
827f8296c60SJoshua M. Clulow 				sz -= VIOIF_HEADER_SKIP;
828f8296c60SJoshua M. Clulow 			}
8298a324c92SDan McDonald 
830f8296c60SJoshua M. Clulow 			if (virtio_chain_append(rb->rb_chain, pa, sz,
831f8296c60SJoshua M. Clulow 			    VIRTIO_DIR_DEVICE_WRITES) != DDI_SUCCESS) {
832f8296c60SJoshua M. Clulow 				goto fail;
833f8296c60SJoshua M. Clulow 			}
834f8296c60SJoshua M. Clulow 		}
835f8296c60SJoshua M. Clulow 
836f8296c60SJoshua M. Clulow 		virtio_chain_submit(rb->rb_chain, B_FALSE);
837f8296c60SJoshua M. Clulow 		num_added++;
838f8296c60SJoshua M. Clulow 		continue;
8398a324c92SDan McDonald 
840f8296c60SJoshua M. Clulow fail:
841f8296c60SJoshua M. Clulow 		vioif_rxbuf_free(vif, rb);
842f8296c60SJoshua M. Clulow 		vif->vif_norecvbuf++;
843f8296c60SJoshua M. Clulow 		break;
844f8296c60SJoshua M. Clulow 	}
845f8296c60SJoshua M. Clulow 
846f8296c60SJoshua M. Clulow 	if (num_added > 0) {
847f8296c60SJoshua M. Clulow 		virtio_queue_flush(vif->vif_rx_vq);
848f8296c60SJoshua M. Clulow 	}
8498a324c92SDan McDonald 
850970db7b7SDan Kimmel 	return (num_added);
8518a324c92SDan McDonald }
8528a324c92SDan McDonald 
853970db7b7SDan Kimmel static uint_t
vioif_process_rx(vioif_t * vif)854f8296c60SJoshua M. Clulow vioif_process_rx(vioif_t *vif)
8558a324c92SDan McDonald {
856f8296c60SJoshua M. Clulow 	virtio_chain_t *vic;
857970db7b7SDan Kimmel 	mblk_t *mphead = NULL, *lastmp = NULL, *mp;
858970db7b7SDan Kimmel 	uint_t num_processed = 0;
8598a324c92SDan McDonald 
860f8296c60SJoshua M. Clulow 	VERIFY(MUTEX_HELD(&vif->vif_mutex));
8618a324c92SDan McDonald 
862f8296c60SJoshua M. Clulow 	while ((vic = virtio_queue_poll(vif->vif_rx_vq)) != NULL) {
863f8296c60SJoshua M. Clulow 		/*
864f8296c60SJoshua M. Clulow 		 * We have to use the chain received length here, as the device
865f8296c60SJoshua M. Clulow 		 * does not tell us the received frame length any other way.
866f8296c60SJoshua M. Clulow 		 * In a limited survey of hypervisors, virtio network devices
867f8296c60SJoshua M. Clulow 		 * appear to provide the right value here.
868f8296c60SJoshua M. Clulow 		 */
869f8296c60SJoshua M. Clulow 		size_t len = virtio_chain_received_length(vic);
870f8296c60SJoshua M. Clulow 		vioif_rxbuf_t *rb = virtio_chain_data(vic);
8718a324c92SDan McDonald 
872f8296c60SJoshua M. Clulow 		virtio_dma_sync(rb->rb_dma, DDI_DMA_SYNC_FORCPU);
873f8296c60SJoshua M. Clulow 
874f8296c60SJoshua M. Clulow 		/*
875f8296c60SJoshua M. Clulow 		 * If the NIC is not running, discard any received frames.
876f8296c60SJoshua M. Clulow 		 */
877f8296c60SJoshua M. Clulow 		if (vif->vif_runstate != VIOIF_RUNSTATE_RUNNING) {
878f8296c60SJoshua M. Clulow 			vioif_rxbuf_free(vif, rb);
8798a324c92SDan McDonald 			continue;
8808a324c92SDan McDonald 		}
8818a324c92SDan McDonald 
882f8296c60SJoshua M. Clulow 		if (len < sizeof (struct virtio_net_hdr)) {
883f8296c60SJoshua M. Clulow 			vif->vif_rxfail_chain_undersize++;
884f8296c60SJoshua M. Clulow 			vif->vif_ierrors++;
885f8296c60SJoshua M. Clulow 			vioif_rxbuf_free(vif, rb);
886f8296c60SJoshua M. Clulow 			continue;
887f8296c60SJoshua M. Clulow 		}
8888a324c92SDan McDonald 		len -= sizeof (struct virtio_net_hdr);
889f8296c60SJoshua M. Clulow 
8908a324c92SDan McDonald 		/*
891970db7b7SDan Kimmel 		 * We copy small packets that happen to fit into a single
8928a324c92SDan McDonald 		 * cookie and reuse the buffers. For bigger ones, we loan
8938a324c92SDan McDonald 		 * the buffers upstream.
8948a324c92SDan McDonald 		 */
895f8296c60SJoshua M. Clulow 		if (len < vif->vif_rxcopy_thresh ||
896f8296c60SJoshua M. Clulow 		    vif->vif_nrxbufs_onloan >= vif->vif_nrxbufs_onloan_max) {
897f8296c60SJoshua M. Clulow 			mutex_exit(&vif->vif_mutex);
898f8296c60SJoshua M. Clulow 			if ((mp = allocb(len, 0)) == NULL) {
899f8296c60SJoshua M. Clulow 				mutex_enter(&vif->vif_mutex);
900f8296c60SJoshua M. Clulow 				vif->vif_norecvbuf++;
901f8296c60SJoshua M. Clulow 				vif->vif_ierrors++;
902f8296c60SJoshua M. Clulow 
903f8296c60SJoshua M. Clulow 				vioif_rxbuf_free(vif, rb);
904f8296c60SJoshua M. Clulow 				continue;
9058a324c92SDan McDonald 			}
9068a324c92SDan McDonald 
907f8296c60SJoshua M. Clulow 			bcopy(virtio_dma_va(rb->rb_dma, VIOIF_HEADER_SKIP),
908f8296c60SJoshua M. Clulow 			    mp->b_rptr, len);
9098a324c92SDan McDonald 			mp->b_wptr = mp->b_rptr + len;
9108a324c92SDan McDonald 
911f8296c60SJoshua M. Clulow 			/*
912f8296c60SJoshua M. Clulow 			 * As the packet contents was copied rather than
913f8296c60SJoshua M. Clulow 			 * loaned, we can return the receive buffer resources
914f8296c60SJoshua M. Clulow 			 * to the free list.
915f8296c60SJoshua M. Clulow 			 */
916f8296c60SJoshua M. Clulow 			mutex_enter(&vif->vif_mutex);
917f8296c60SJoshua M. Clulow 			vioif_rxbuf_free(vif, rb);
918f8296c60SJoshua M. Clulow 
9198a324c92SDan McDonald 		} else {
920f8296c60SJoshua M. Clulow 			mutex_exit(&vif->vif_mutex);
921f8296c60SJoshua M. Clulow 			if ((mp = desballoc(virtio_dma_va(rb->rb_dma,
922f8296c60SJoshua M. Clulow 			    VIOIF_HEADER_SKIP), len, 0,
923f8296c60SJoshua M. Clulow 			    &rb->rb_frtn)) == NULL) {
924f8296c60SJoshua M. Clulow 				mutex_enter(&vif->vif_mutex);
925f8296c60SJoshua M. Clulow 				vif->vif_norecvbuf++;
926f8296c60SJoshua M. Clulow 				vif->vif_ierrors++;
927f8296c60SJoshua M. Clulow 
928f8296c60SJoshua M. Clulow 				vioif_rxbuf_free(vif, rb);
929f8296c60SJoshua M. Clulow 				continue;
9308a324c92SDan McDonald 			}
9318a324c92SDan McDonald 			mp->b_wptr = mp->b_rptr + len;
9328a324c92SDan McDonald 
933f8296c60SJoshua M. Clulow 			mutex_enter(&vif->vif_mutex);
934f8296c60SJoshua M. Clulow 			vif->vif_nrxbufs_onloan++;
9358a324c92SDan McDonald 		}
9368a324c92SDan McDonald 
9378a324c92SDan McDonald 		/*
9388a324c92SDan McDonald 		 * virtio-net does not tell us if this packet is multicast
9398a324c92SDan McDonald 		 * or broadcast, so we have to check it.
9408a324c92SDan McDonald 		 */
9418a324c92SDan McDonald 		if (mp->b_rptr[0] & 0x1) {
9428a324c92SDan McDonald 			if (bcmp(mp->b_rptr, vioif_broadcast, ETHERADDRL) != 0)
943f8296c60SJoshua M. Clulow 				vif->vif_multircv++;
9448a324c92SDan McDonald 			else
945f8296c60SJoshua M. Clulow 				vif->vif_brdcstrcv++;
9468a324c92SDan McDonald 		}
9478a324c92SDan McDonald 
948f8296c60SJoshua M. Clulow 		vif->vif_rbytes += len;
949f8296c60SJoshua M. Clulow 		vif->vif_ipackets++;
950970db7b7SDan Kimmel 
951970db7b7SDan Kimmel 		if (lastmp == NULL) {
952970db7b7SDan Kimmel 			mphead = mp;
953970db7b7SDan Kimmel 		} else {
954970db7b7SDan Kimmel 			lastmp->b_next = mp;
955970db7b7SDan Kimmel 		}
956970db7b7SDan Kimmel 		lastmp = mp;
957970db7b7SDan Kimmel 		num_processed++;
9588a324c92SDan McDonald 	}
9598a324c92SDan McDonald 
960970db7b7SDan Kimmel 	if (mphead != NULL) {
961f8296c60SJoshua M. Clulow 		if (vif->vif_runstate == VIOIF_RUNSTATE_RUNNING) {
962f8296c60SJoshua M. Clulow 			mutex_exit(&vif->vif_mutex);
963f8296c60SJoshua M. Clulow 			mac_rx(vif->vif_mac_handle, NULL, mphead);
964f8296c60SJoshua M. Clulow 			mutex_enter(&vif->vif_mutex);
965f8296c60SJoshua M. Clulow 		} else {
966f8296c60SJoshua M. Clulow 			/*
967f8296c60SJoshua M. Clulow 			 * The NIC was disabled part way through our execution,
968f8296c60SJoshua M. Clulow 			 * so free the messages we allocated.
969f8296c60SJoshua M. Clulow 			 */
970f8296c60SJoshua M. Clulow 			freemsgchain(mphead);
971f8296c60SJoshua M. Clulow 		}
972970db7b7SDan Kimmel 	}
973970db7b7SDan Kimmel 
974970db7b7SDan Kimmel 	return (num_processed);
9758a324c92SDan McDonald }
9768a324c92SDan McDonald 
977970db7b7SDan Kimmel static uint_t
vioif_reclaim_used_tx(vioif_t * vif)978f8296c60SJoshua M. Clulow vioif_reclaim_used_tx(vioif_t *vif)
9798a324c92SDan McDonald {
980f8296c60SJoshua M. Clulow 	virtio_chain_t *vic;
981970db7b7SDan Kimmel 	uint_t num_reclaimed = 0;
9828a324c92SDan McDonald 
983f8296c60SJoshua M. Clulow 	VERIFY(MUTEX_NOT_HELD(&vif->vif_mutex));
9849e0bf232SPatrick Mooney 
985f8296c60SJoshua M. Clulow 	while ((vic = virtio_queue_poll(vif->vif_tx_vq)) != NULL) {
986f8296c60SJoshua M. Clulow 		vioif_txbuf_t *tb = virtio_chain_data(vic);
9878a324c92SDan McDonald 
988f8296c60SJoshua M. Clulow 		if (tb->tb_mp != NULL) {
989f8296c60SJoshua M. Clulow 			/*
990f8296c60SJoshua M. Clulow 			 * Unbind the external mapping.
991f8296c60SJoshua M. Clulow 			 */
992f8296c60SJoshua M. Clulow 			for (uint_t i = 0; i < tb->tb_dmaext_capacity; i++) {
993f8296c60SJoshua M. Clulow 				if (tb->tb_dmaext[i] == NULL) {
994f8296c60SJoshua M. Clulow 					continue;
995f8296c60SJoshua M. Clulow 				}
9968a324c92SDan McDonald 
997f8296c60SJoshua M. Clulow 				virtio_dma_unbind(tb->tb_dmaext[i]);
9989e0bf232SPatrick Mooney 			}
999f8296c60SJoshua M. Clulow 
1000f8296c60SJoshua M. Clulow 			freemsg(tb->tb_mp);
1001f8296c60SJoshua M. Clulow 			tb->tb_mp = NULL;
10028a324c92SDan McDonald 		}
10038a324c92SDan McDonald 
1004f8296c60SJoshua M. Clulow 		/*
1005f8296c60SJoshua M. Clulow 		 * Return this transmit buffer to the free list for reuse.
1006f8296c60SJoshua M. Clulow 		 */
1007f8296c60SJoshua M. Clulow 		mutex_enter(&vif->vif_mutex);
1008f8296c60SJoshua M. Clulow 		vioif_txbuf_free(vif, tb);
1009f8296c60SJoshua M. Clulow 		mutex_exit(&vif->vif_mutex);
10108a324c92SDan McDonald 
1011970db7b7SDan Kimmel 		num_reclaimed++;
10128a324c92SDan McDonald 	}
10138a324c92SDan McDonald 
10149e0bf232SPatrick Mooney 	/* Return ring to transmitting state if descriptors were reclaimed. */
10159e0bf232SPatrick Mooney 	if (num_reclaimed > 0) {
10169e0bf232SPatrick Mooney 		boolean_t do_update = B_FALSE;
10179e0bf232SPatrick Mooney 
1018f8296c60SJoshua M. Clulow 		mutex_enter(&vif->vif_mutex);
1019f8296c60SJoshua M. Clulow 		vif->vif_stat_tx_reclaim += num_reclaimed;
1020f8296c60SJoshua M. Clulow 		if (vif->vif_tx_corked) {
10219e0bf232SPatrick Mooney 			/*
10229e0bf232SPatrick Mooney 			 * TX was corked on a lack of available descriptors.
10239e0bf232SPatrick Mooney 			 * That dire state has passed so the TX interrupt can
10249e0bf232SPatrick Mooney 			 * be disabled and MAC can be notified that
10259e0bf232SPatrick Mooney 			 * transmission is possible again.
10269e0bf232SPatrick Mooney 			 */
1027f8296c60SJoshua M. Clulow 			vif->vif_tx_corked = B_FALSE;
1028f8296c60SJoshua M. Clulow 			virtio_queue_no_interrupt(vif->vif_tx_vq, B_TRUE);
10299e0bf232SPatrick Mooney 			do_update = B_TRUE;
10309e0bf232SPatrick Mooney 		}
10319e0bf232SPatrick Mooney 
10329e0bf232SPatrick Mooney 		if (do_update) {
1033f8296c60SJoshua M. Clulow 			mac_tx_update(vif->vif_mac_handle);
10349e0bf232SPatrick Mooney 		}
1035f8296c60SJoshua M. Clulow 		mutex_exit(&vif->vif_mutex);
10368a324c92SDan McDonald 	}
1037970db7b7SDan Kimmel 
1038970db7b7SDan Kimmel 	return (num_reclaimed);
10398a324c92SDan McDonald }
10408a324c92SDan McDonald 
10419e0bf232SPatrick Mooney static void
vioif_reclaim_periodic(void * arg)10429e0bf232SPatrick Mooney vioif_reclaim_periodic(void *arg)
10439e0bf232SPatrick Mooney {
1044f8296c60SJoshua M. Clulow 	vioif_t *vif = arg;
10459e0bf232SPatrick Mooney 	uint_t num_reclaimed;
10469e0bf232SPatrick Mooney 
1047f8296c60SJoshua M. Clulow 	num_reclaimed = vioif_reclaim_used_tx(vif);
10489e0bf232SPatrick Mooney 
1049f8296c60SJoshua M. Clulow 	mutex_enter(&vif->vif_mutex);
1050f8296c60SJoshua M. Clulow 	vif->vif_tx_reclaim_tid = 0;
10519e0bf232SPatrick Mooney 	/*
10529e0bf232SPatrick Mooney 	 * If used descriptors were reclaimed or TX descriptors appear to be
10539e0bf232SPatrick Mooney 	 * outstanding, the ring is considered active and periodic reclamation
10549e0bf232SPatrick Mooney 	 * is necessary for now.
10559e0bf232SPatrick Mooney 	 */
1056f8296c60SJoshua M. Clulow 	if (num_reclaimed != 0 || virtio_queue_nactive(vif->vif_tx_vq) != 0) {
10579e0bf232SPatrick Mooney 		/* Do not reschedule if the ring is being drained. */
1058f8296c60SJoshua M. Clulow 		if (!vif->vif_tx_drain) {
1059f8296c60SJoshua M. Clulow 			vioif_reclaim_restart(vif);
10609e0bf232SPatrick Mooney 		}
10619e0bf232SPatrick Mooney 	}
1062f8296c60SJoshua M. Clulow 	mutex_exit(&vif->vif_mutex);
10639e0bf232SPatrick Mooney }
10649e0bf232SPatrick Mooney 
10659e0bf232SPatrick Mooney static void
vioif_reclaim_restart(vioif_t * vif)1066f8296c60SJoshua M. Clulow vioif_reclaim_restart(vioif_t *vif)
10679e0bf232SPatrick Mooney {
1068f8296c60SJoshua M. Clulow 	VERIFY(MUTEX_HELD(&vif->vif_mutex));
1069f8296c60SJoshua M. Clulow 	VERIFY(!vif->vif_tx_drain);
10709e0bf232SPatrick Mooney 
1071f8296c60SJoshua M. Clulow 	if (vif->vif_tx_reclaim_tid == 0) {
1072f8296c60SJoshua M. Clulow 		vif->vif_tx_reclaim_tid = timeout(vioif_reclaim_periodic, vif,
10739e0bf232SPatrick Mooney 		    MSEC_TO_TICK_ROUNDUP(vioif_reclaim_ms));
10749e0bf232SPatrick Mooney 	}
10759e0bf232SPatrick Mooney }
10769e0bf232SPatrick Mooney 
10779e0bf232SPatrick Mooney static void
vioif_tx_drain(vioif_t * vif)1078f8296c60SJoshua M. Clulow vioif_tx_drain(vioif_t *vif)
10799e0bf232SPatrick Mooney {
1080f8296c60SJoshua M. Clulow 	VERIFY(MUTEX_HELD(&vif->vif_mutex));
1081f8296c60SJoshua M. Clulow 	VERIFY3S(vif->vif_runstate, ==, VIOIF_RUNSTATE_STOPPING);
1082f8296c60SJoshua M. Clulow 
1083f8296c60SJoshua M. Clulow 	vif->vif_tx_drain = B_TRUE;
10849e0bf232SPatrick Mooney 	/* Put a stop to the periodic reclaim if it is running */
1085f8296c60SJoshua M. Clulow 	if (vif->vif_tx_reclaim_tid != 0) {
1086f8296c60SJoshua M. Clulow 		timeout_id_t tid = vif->vif_tx_reclaim_tid;
10879e0bf232SPatrick Mooney 
10889e0bf232SPatrick Mooney 		/*
1089f8296c60SJoshua M. Clulow 		 * With vif_tx_drain set, there is no risk that a racing
10909e0bf232SPatrick Mooney 		 * vioif_reclaim_periodic() call will reschedule itself.
10919e0bf232SPatrick Mooney 		 *
10929e0bf232SPatrick Mooney 		 * Being part of the mc_stop hook also guarantees that
1093f8296c60SJoshua M. Clulow 		 * vioif_m_tx() will not be called to restart it.
10949e0bf232SPatrick Mooney 		 */
1095f8296c60SJoshua M. Clulow 		vif->vif_tx_reclaim_tid = 0;
1096f8296c60SJoshua M. Clulow 		mutex_exit(&vif->vif_mutex);
10979e0bf232SPatrick Mooney 		(void) untimeout(tid);
1098f8296c60SJoshua M. Clulow 		mutex_enter(&vif->vif_mutex);
10999e0bf232SPatrick Mooney 	}
1100f8296c60SJoshua M. Clulow 	virtio_queue_no_interrupt(vif->vif_tx_vq, B_TRUE);
11019e0bf232SPatrick Mooney 
11029e0bf232SPatrick Mooney 	/*
11039e0bf232SPatrick Mooney 	 * Wait for all of the TX descriptors to be processed by the host so
11049e0bf232SPatrick Mooney 	 * they can be reclaimed.
11059e0bf232SPatrick Mooney 	 */
1106f8296c60SJoshua M. Clulow 	while (vif->vif_ntxbufs_alloc > 0) {
1107f8296c60SJoshua M. Clulow 		mutex_exit(&vif->vif_mutex);
1108f8296c60SJoshua M. Clulow 		(void) vioif_reclaim_used_tx(vif);
11099e0bf232SPatrick Mooney 		delay(5);
1110f8296c60SJoshua M. Clulow 		mutex_enter(&vif->vif_mutex);
11119e0bf232SPatrick Mooney 	}
1112f8296c60SJoshua M. Clulow 	VERIFY(!vif->vif_tx_corked);
1113f8296c60SJoshua M. Clulow 	VERIFY3U(vif->vif_tx_reclaim_tid, ==, 0);
1114f8296c60SJoshua M. Clulow 	VERIFY3U(virtio_queue_nactive(vif->vif_tx_vq), ==, 0);
11159e0bf232SPatrick Mooney }
11169e0bf232SPatrick Mooney 
1117f8296c60SJoshua M. Clulow static int
vioif_tx_inline(vioif_t * vif,vioif_txbuf_t * tb,mblk_t * mp,size_t msg_size)1118f8296c60SJoshua M. Clulow vioif_tx_inline(vioif_t *vif, vioif_txbuf_t *tb, mblk_t *mp, size_t msg_size)
11198a324c92SDan McDonald {
1120f8296c60SJoshua M. Clulow 	VERIFY(MUTEX_NOT_HELD(&vif->vif_mutex));
11218a324c92SDan McDonald 
1122f8296c60SJoshua M. Clulow 	VERIFY3U(msg_size, <=, virtio_dma_size(tb->tb_dma) - VIOIF_HEADER_SKIP);
11238a324c92SDan McDonald 
1124f8296c60SJoshua M. Clulow 	/*
1125f8296c60SJoshua M. Clulow 	 * Copy the message into the inline buffer and then free the message.
1126f8296c60SJoshua M. Clulow 	 */
1127f8296c60SJoshua M. Clulow 	mcopymsg(mp, virtio_dma_va(tb->tb_dma, VIOIF_HEADER_SKIP));
11288a324c92SDan McDonald 
1129f8296c60SJoshua M. Clulow 	if (virtio_chain_append(tb->tb_chain,
1130f8296c60SJoshua M. Clulow 	    virtio_dma_cookie_pa(tb->tb_dma, 0) + VIOIF_HEADER_SKIP,
1131f8296c60SJoshua M. Clulow 	    msg_size, VIRTIO_DIR_DEVICE_READS) != DDI_SUCCESS) {
1132f8296c60SJoshua M. Clulow 		return (DDI_FAILURE);
11338a324c92SDan McDonald 	}
11348a324c92SDan McDonald 
1135f8296c60SJoshua M. Clulow 	return (DDI_SUCCESS);
11368a324c92SDan McDonald }
11378a324c92SDan McDonald 
1138f8296c60SJoshua M. Clulow static int
vioif_tx_external(vioif_t * vif,vioif_txbuf_t * tb,mblk_t * mp,size_t msg_size)1139f8296c60SJoshua M. Clulow vioif_tx_external(vioif_t *vif, vioif_txbuf_t *tb, mblk_t *mp, size_t msg_size)
11408a324c92SDan McDonald {
1141f8296c60SJoshua M. Clulow 	VERIFY(MUTEX_NOT_HELD(&vif->vif_mutex));
11428a324c92SDan McDonald 
1143f8296c60SJoshua M. Clulow 	mblk_t *nmp = mp;
1144f8296c60SJoshua M. Clulow 	tb->tb_ndmaext = 0;
11458a324c92SDan McDonald 
1146f8296c60SJoshua M. Clulow 	while (nmp != NULL) {
11478a324c92SDan McDonald 		size_t len;
11488a324c92SDan McDonald 
1149f8296c60SJoshua M. Clulow 		if ((len = MBLKL(nmp)) == 0) {
1150f8296c60SJoshua M. Clulow 			/*
1151f8296c60SJoshua M. Clulow 			 * Skip any zero-length entries in the chain.
1152f8296c60SJoshua M. Clulow 			 */
11538a324c92SDan McDonald 			nmp = nmp->b_cont;
11548a324c92SDan McDonald 			continue;
11558a324c92SDan McDonald 		}
11568a324c92SDan McDonald 
1157f8296c60SJoshua M. Clulow 		if (tb->tb_ndmaext >= tb->tb_dmaext_capacity) {
1158f8296c60SJoshua M. Clulow 			mutex_enter(&vif->vif_mutex);
1159f8296c60SJoshua M. Clulow 			vif->vif_txfail_indirect_limit++;
1160f8296c60SJoshua M. Clulow 			vif->vif_notxbuf++;
1161f8296c60SJoshua M. Clulow 			mutex_exit(&vif->vif_mutex);
1162f8296c60SJoshua M. Clulow 			goto fail;
11638a324c92SDan McDonald 		}
11648a324c92SDan McDonald 
1165f8296c60SJoshua M. Clulow 		if (tb->tb_dmaext[tb->tb_ndmaext] == NULL) {
1166f8296c60SJoshua M. Clulow 			/*
1167f8296c60SJoshua M. Clulow 			 * Allocate a DMA handle for this slot.
1168f8296c60SJoshua M. Clulow 			 */
1169f8296c60SJoshua M. Clulow 			if ((tb->tb_dmaext[tb->tb_ndmaext] =
1170f8296c60SJoshua M. Clulow 			    virtio_dma_alloc_nomem(vif->vif_virtio,
1171f8296c60SJoshua M. Clulow 			    &vioif_dma_attr_external, KM_SLEEP)) == NULL) {
1172f8296c60SJoshua M. Clulow 				mutex_enter(&vif->vif_mutex);
1173f8296c60SJoshua M. Clulow 				vif->vif_notxbuf++;
1174f8296c60SJoshua M. Clulow 				mutex_exit(&vif->vif_mutex);
1175f8296c60SJoshua M. Clulow 				goto fail;
1176f8296c60SJoshua M. Clulow 			}
1177f8296c60SJoshua M. Clulow 		}
1178f8296c60SJoshua M. Clulow 		virtio_dma_t *extdma = tb->tb_dmaext[tb->tb_ndmaext++];
1179f8296c60SJoshua M. Clulow 
1180f8296c60SJoshua M. Clulow 		if (virtio_dma_bind(extdma, nmp->b_rptr, len,
1181f8296c60SJoshua M. Clulow 		    DDI_DMA_WRITE | DDI_DMA_STREAMING, KM_SLEEP) !=
1182f8296c60SJoshua M. Clulow 		    DDI_SUCCESS) {
1183f8296c60SJoshua M. Clulow 			mutex_enter(&vif->vif_mutex);
1184f8296c60SJoshua M. Clulow 			vif->vif_txfail_dma_bind++;
1185f8296c60SJoshua M. Clulow 			mutex_exit(&vif->vif_mutex);
1186f8296c60SJoshua M. Clulow 			goto fail;
11878a324c92SDan McDonald 		}
11888a324c92SDan McDonald 
1189f8296c60SJoshua M. Clulow 		for (uint_t n = 0; n < virtio_dma_ncookies(extdma); n++) {
1190f8296c60SJoshua M. Clulow 			uint64_t pa = virtio_dma_cookie_pa(extdma, n);
1191f8296c60SJoshua M. Clulow 			size_t sz = virtio_dma_cookie_size(extdma, n);
1192f8296c60SJoshua M. Clulow 
1193f8296c60SJoshua M. Clulow 			if (virtio_chain_append(tb->tb_chain, pa, sz,
1194f8296c60SJoshua M. Clulow 			    VIRTIO_DIR_DEVICE_READS) != DDI_SUCCESS) {
1195f8296c60SJoshua M. Clulow 				mutex_enter(&vif->vif_mutex);
1196f8296c60SJoshua M. Clulow 				vif->vif_txfail_indirect_limit++;
1197f8296c60SJoshua M. Clulow 				vif->vif_notxbuf++;
1198f8296c60SJoshua M. Clulow 				mutex_exit(&vif->vif_mutex);
1199f8296c60SJoshua M. Clulow 				goto fail;
1200f8296c60SJoshua M. Clulow 			}
1201f8296c60SJoshua M. Clulow 		}
12028a324c92SDan McDonald 
12038a324c92SDan McDonald 		nmp = nmp->b_cont;
12048a324c92SDan McDonald 	}
12058a324c92SDan McDonald 
1206f8296c60SJoshua M. Clulow 	/*
1207f8296c60SJoshua M. Clulow 	 * We need to keep the message around until we reclaim the buffer from
1208f8296c60SJoshua M. Clulow 	 * the device before freeing it.
1209f8296c60SJoshua M. Clulow 	 */
1210f8296c60SJoshua M. Clulow 	tb->tb_mp = mp;
12118a324c92SDan McDonald 
12128a324c92SDan McDonald 	return (DDI_SUCCESS);
12138a324c92SDan McDonald 
1214f8296c60SJoshua M. Clulow fail:
1215f8296c60SJoshua M. Clulow 	for (uint_t n = 0; n < tb->tb_ndmaext; n++) {
1216f8296c60SJoshua M. Clulow 		if (tb->tb_dmaext[n] != NULL) {
1217f8296c60SJoshua M. Clulow 			virtio_dma_unbind(tb->tb_dmaext[n]);
1218f8296c60SJoshua M. Clulow 		}
12198a324c92SDan McDonald 	}
1220f8296c60SJoshua M. Clulow 	tb->tb_ndmaext = 0;
12218a324c92SDan McDonald 
1222f8296c60SJoshua M. Clulow 	freemsg(mp);
1223f8296c60SJoshua M. Clulow 
1224f8296c60SJoshua M. Clulow 	return (DDI_FAILURE);
12258a324c92SDan McDonald }
12268a324c92SDan McDonald 
12278a324c92SDan McDonald static boolean_t
vioif_send(vioif_t * vif,mblk_t * mp)1228f8296c60SJoshua M. Clulow vioif_send(vioif_t *vif, mblk_t *mp)
12298a324c92SDan McDonald {
1230f8296c60SJoshua M. Clulow 	VERIFY(MUTEX_NOT_HELD(&vif->vif_mutex));
1231f8296c60SJoshua M. Clulow 
1232f8296c60SJoshua M. Clulow 	vioif_txbuf_t *tb = NULL;
1233f8296c60SJoshua M. Clulow 	struct virtio_net_hdr *vnh = NULL;
12348a324c92SDan McDonald 	size_t msg_size = 0;
12358a324c92SDan McDonald 	uint32_t csum_start;
12368a324c92SDan McDonald 	uint32_t csum_stuff;
12378a324c92SDan McDonald 	uint32_t csum_flags;
12388a324c92SDan McDonald 	uint32_t lso_flags;
12398a324c92SDan McDonald 	uint32_t lso_mss;
12408a324c92SDan McDonald 	mblk_t *nmp;
12418a324c92SDan McDonald 	int ret;
12428a324c92SDan McDonald 	boolean_t lso_required = B_FALSE;
1243f8296c60SJoshua M. Clulow 	struct ether_header *ether = (void *)mp->b_rptr;
12448a324c92SDan McDonald 
12458a324c92SDan McDonald 	for (nmp = mp; nmp; nmp = nmp->b_cont)
12468a324c92SDan McDonald 		msg_size += MBLKL(nmp);
12478a324c92SDan McDonald 
124862366fbbSRobert Mustacchi 	if (vif->vif_tx_tso4 || vif->vif_tx_tso6) {
12498a324c92SDan McDonald 		mac_lso_get(mp, &lso_mss, &lso_flags);
1250f8296c60SJoshua M. Clulow 		lso_required = (lso_flags & HW_LSO) != 0;
12518a324c92SDan McDonald 	}
12528a324c92SDan McDonald 
1253f8296c60SJoshua M. Clulow 	mutex_enter(&vif->vif_mutex);
1254f8296c60SJoshua M. Clulow 	if ((tb = vioif_txbuf_alloc(vif)) == NULL) {
1255f8296c60SJoshua M. Clulow 		vif->vif_notxbuf++;
1256f8296c60SJoshua M. Clulow 		goto fail;
12578a324c92SDan McDonald 	}
1258f8296c60SJoshua M. Clulow 	mutex_exit(&vif->vif_mutex);
12598a324c92SDan McDonald 
1260f8296c60SJoshua M. Clulow 	/*
1261f8296c60SJoshua M. Clulow 	 * Use the inline buffer for the virtio net header.  Zero the portion
1262f8296c60SJoshua M. Clulow 	 * of our DMA allocation prior to the packet data.
1263f8296c60SJoshua M. Clulow 	 */
1264f8296c60SJoshua M. Clulow 	vnh = virtio_dma_va(tb->tb_dma, 0);
1265f8296c60SJoshua M. Clulow 	bzero(vnh, VIOIF_HEADER_SKIP);
12668a324c92SDan McDonald 
1267f8296c60SJoshua M. Clulow 	/*
1268f8296c60SJoshua M. Clulow 	 * For legacy devices, and those that have not negotiated
1269f8296c60SJoshua M. Clulow 	 * VIRTIO_F_ANY_LAYOUT, the virtio net header must appear in a separate
1270f8296c60SJoshua M. Clulow 	 * descriptor entry to the rest of the buffer.
1271f8296c60SJoshua M. Clulow 	 */
1272f8296c60SJoshua M. Clulow 	if (virtio_chain_append(tb->tb_chain,
1273f8296c60SJoshua M. Clulow 	    virtio_dma_cookie_pa(tb->tb_dma, 0), sizeof (struct virtio_net_hdr),
1274f8296c60SJoshua M. Clulow 	    VIRTIO_DIR_DEVICE_READS) != DDI_SUCCESS) {
1275f8296c60SJoshua M. Clulow 		mutex_enter(&vif->vif_mutex);
1276f8296c60SJoshua M. Clulow 		vif->vif_notxbuf++;
1277f8296c60SJoshua M. Clulow 		goto fail;
1278f8296c60SJoshua M. Clulow 	}
12798a324c92SDan McDonald 
1280f8296c60SJoshua M. Clulow 	mac_hcksum_get(mp, &csum_start, &csum_stuff, NULL, NULL, &csum_flags);
12818a324c92SDan McDonald 
1282f8296c60SJoshua M. Clulow 	/*
1283f8296c60SJoshua M. Clulow 	 * They want us to do the TCP/UDP csum calculation.
1284f8296c60SJoshua M. Clulow 	 */
12858a324c92SDan McDonald 	if (csum_flags & HCK_PARTIALCKSUM) {
12868a324c92SDan McDonald 		int eth_hsize;
12878a324c92SDan McDonald 
1288f8296c60SJoshua M. Clulow 		/*
1289f8296c60SJoshua M. Clulow 		 * Did we ask for it?
1290f8296c60SJoshua M. Clulow 		 */
1291f8296c60SJoshua M. Clulow 		ASSERT(vif->vif_tx_csum);
12928a324c92SDan McDonald 
1293f8296c60SJoshua M. Clulow 		/*
1294f8296c60SJoshua M. Clulow 		 * We only asked for partial csum packets.
1295f8296c60SJoshua M. Clulow 		 */
12968a324c92SDan McDonald 		ASSERT(!(csum_flags & HCK_IPV4_HDRCKSUM));
12978a324c92SDan McDonald 		ASSERT(!(csum_flags & HCK_FULLCKSUM));
12988a324c92SDan McDonald 
1299f8296c60SJoshua M. Clulow 		if (ether->ether_type == htons(ETHERTYPE_VLAN)) {
13008a324c92SDan McDonald 			eth_hsize = sizeof (struct ether_vlan_header);
13018a324c92SDan McDonald 		} else {
13028a324c92SDan McDonald 			eth_hsize = sizeof (struct ether_header);
13038a324c92SDan McDonald 		}
1304f8296c60SJoshua M. Clulow 
1305f8296c60SJoshua M. Clulow 		vnh->vnh_flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
1306f8296c60SJoshua M. Clulow 		vnh->vnh_csum_start = eth_hsize + csum_start;
1307f8296c60SJoshua M. Clulow 		vnh->vnh_csum_offset = csum_stuff - csum_start;
13088a324c92SDan McDonald 	}
13098a324c92SDan McDonald 
1310f8296c60SJoshua M. Clulow 	/*
1311f8296c60SJoshua M. Clulow 	 * Setup LSO fields if required.
1312f8296c60SJoshua M. Clulow 	 */
13138a324c92SDan McDonald 	if (lso_required) {
1314d240edafSRobert Mustacchi 		mac_ether_offload_flags_t needed;
1315d240edafSRobert Mustacchi 		mac_ether_offload_info_t meo;
1316d240edafSRobert Mustacchi 		uint32_t cksum;
1317d240edafSRobert Mustacchi 		size_t len;
1318d240edafSRobert Mustacchi 		mblk_t *pullmp = NULL;
1319d240edafSRobert Mustacchi 		tcpha_t *tcpha;
1320d240edafSRobert Mustacchi 
1321d240edafSRobert Mustacchi 		if (mac_ether_offload_info(mp, &meo) != 0) {
1322d240edafSRobert Mustacchi 			goto fail;
1323d240edafSRobert Mustacchi 		}
1324d240edafSRobert Mustacchi 
1325d240edafSRobert Mustacchi 		needed = MEOI_L2INFO_SET | MEOI_L3INFO_SET | MEOI_L4INFO_SET;
1326d240edafSRobert Mustacchi 		if ((meo.meoi_flags & needed) != needed) {
1327d240edafSRobert Mustacchi 			goto fail;
1328d240edafSRobert Mustacchi 		}
1329d240edafSRobert Mustacchi 
1330d240edafSRobert Mustacchi 		if (meo.meoi_l4proto != IPPROTO_TCP) {
1331d240edafSRobert Mustacchi 			goto fail;
1332d240edafSRobert Mustacchi 		}
1333d240edafSRobert Mustacchi 
133462366fbbSRobert Mustacchi 		if (meo.meoi_l3proto == ETHERTYPE_IP && vif->vif_tx_tso4) {
1335d240edafSRobert Mustacchi 			vnh->vnh_gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
133662366fbbSRobert Mustacchi 		} else if (meo.meoi_l3proto == ETHERTYPE_IPV6 &&
133762366fbbSRobert Mustacchi 		    vif->vif_tx_tso6) {
1338d240edafSRobert Mustacchi 			vnh->vnh_gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
1339d240edafSRobert Mustacchi 		} else {
1340d240edafSRobert Mustacchi 			goto fail;
1341d240edafSRobert Mustacchi 		}
1342d240edafSRobert Mustacchi 
1343d240edafSRobert Mustacchi 		/*
1344d240edafSRobert Mustacchi 		 * The TCP stack does not include the length in the TCP
1345d240edafSRobert Mustacchi 		 * pseudo-header when it is performing LSO since hardware
1346d240edafSRobert Mustacchi 		 * generally asks for it to be removed (as it'll change).
1347d240edafSRobert Mustacchi 		 * Unfortunately, for virtio, we actually need it. This means we
1348d240edafSRobert Mustacchi 		 * need to go through and calculate the actual length and fix
1349d240edafSRobert Mustacchi 		 * things up. Because the virtio spec cares about the ECN flag
1350d240edafSRobert Mustacchi 		 * and indicating that, at least this means we'll have that
1351d240edafSRobert Mustacchi 		 * available as well.
1352d240edafSRobert Mustacchi 		 */
1353d240edafSRobert Mustacchi 		if (MBLKL(mp) < vnh->vnh_hdr_len) {
1354d240edafSRobert Mustacchi 			pullmp = msgpullup(mp, vnh->vnh_hdr_len);
1355d240edafSRobert Mustacchi 			if (pullmp == NULL)
1356d240edafSRobert Mustacchi 				goto fail;
1357d240edafSRobert Mustacchi 			tcpha = (tcpha_t *)(pullmp->b_rptr + meo.meoi_l2hlen +
1358d240edafSRobert Mustacchi 			    meo.meoi_l3hlen);
1359d240edafSRobert Mustacchi 		} else {
1360d240edafSRobert Mustacchi 			tcpha = (tcpha_t *)(mp->b_rptr + meo.meoi_l2hlen +
1361d240edafSRobert Mustacchi 			    meo.meoi_l3hlen);
1362d240edafSRobert Mustacchi 		}
1363d240edafSRobert Mustacchi 
1364d240edafSRobert Mustacchi 		len = meo.meoi_len - meo.meoi_l2hlen - meo.meoi_l3hlen;
1365d240edafSRobert Mustacchi 		cksum = ntohs(tcpha->tha_sum) + len;
1366d240edafSRobert Mustacchi 		cksum = (cksum >> 16) + (cksum & 0xffff);
1367d240edafSRobert Mustacchi 		cksum = (cksum >> 16) + (cksum & 0xffff);
1368d240edafSRobert Mustacchi 		tcpha->tha_sum = htons(cksum);
1369d240edafSRobert Mustacchi 
1370d240edafSRobert Mustacchi 		if (tcpha->tha_flags & TH_CWR) {
1371d240edafSRobert Mustacchi 			vnh->vnh_gso_type |= VIRTIO_NET_HDR_GSO_ECN;
1372d240edafSRobert Mustacchi 		}
1373f8296c60SJoshua M. Clulow 		vnh->vnh_gso_size = (uint16_t)lso_mss;
1374d240edafSRobert Mustacchi 		vnh->vnh_hdr_len = meo.meoi_l2hlen + meo.meoi_l3hlen +
1375d240edafSRobert Mustacchi 		    meo.meoi_l4hlen;
1376d240edafSRobert Mustacchi 
1377d240edafSRobert Mustacchi 		freemsg(pullmp);
13788a324c92SDan McDonald 	}
13798a324c92SDan McDonald 
1380f8296c60SJoshua M. Clulow 	/*
1381f8296c60SJoshua M. Clulow 	 * The device does not maintain its own statistics about broadcast or
1382f8296c60SJoshua M. Clulow 	 * multicast packets, so we have to check the destination address
1383f8296c60SJoshua M. Clulow 	 * ourselves.
1384f8296c60SJoshua M. Clulow 	 */
1385f8296c60SJoshua M. Clulow 	if ((ether->ether_dhost.ether_addr_octet[0] & 0x01) != 0) {
1386f8296c60SJoshua M. Clulow 		mutex_enter(&vif->vif_mutex);
1387f8296c60SJoshua M. Clulow 		if (ether_cmp(&ether->ether_dhost, vioif_broadcast) == 0) {
1388f8296c60SJoshua M. Clulow 			vif->vif_brdcstxmt++;
1389f8296c60SJoshua M. Clulow 		} else {
1390f8296c60SJoshua M. Clulow 			vif->vif_multixmt++;
1391f8296c60SJoshua M. Clulow 		}
1392f8296c60SJoshua M. Clulow 		mutex_exit(&vif->vif_mutex);
13938a324c92SDan McDonald 	}
13948a324c92SDan McDonald 
13958a324c92SDan McDonald 	/*
1396f8296c60SJoshua M. Clulow 	 * For small packets, copy into the preallocated inline buffer rather
1397f8296c60SJoshua M. Clulow 	 * than incur the overhead of mapping.  Note that both of these
1398f8296c60SJoshua M. Clulow 	 * functions ensure that "mp" is freed before returning.
13998a324c92SDan McDonald 	 */
1400f8296c60SJoshua M. Clulow 	if (msg_size < vif->vif_txcopy_thresh) {
1401f8296c60SJoshua M. Clulow 		ret = vioif_tx_inline(vif, tb, mp, msg_size);
14028a324c92SDan McDonald 	} else {
1403f8296c60SJoshua M. Clulow 		ret = vioif_tx_external(vif, tb, mp, msg_size);
14048a324c92SDan McDonald 	}
1405f8296c60SJoshua M. Clulow 	mp = NULL;
14068a324c92SDan McDonald 
1407f8296c60SJoshua M. Clulow 	mutex_enter(&vif->vif_mutex);
14088a324c92SDan McDonald 
1409f8296c60SJoshua M. Clulow 	if (ret != DDI_SUCCESS) {
1410f8296c60SJoshua M. Clulow 		goto fail;
1411f8296c60SJoshua M. Clulow 	}
14128a324c92SDan McDonald 
1413f8296c60SJoshua M. Clulow 	vif->vif_opackets++;
1414f8296c60SJoshua M. Clulow 	vif->vif_obytes += msg_size;
1415f8296c60SJoshua M. Clulow 	mutex_exit(&vif->vif_mutex);
14168a324c92SDan McDonald 
1417f8296c60SJoshua M. Clulow 	virtio_dma_sync(tb->tb_dma, DDI_DMA_SYNC_FORDEV);
1418f8296c60SJoshua M. Clulow 	virtio_chain_submit(tb->tb_chain, B_TRUE);
14198a324c92SDan McDonald 
14208a324c92SDan McDonald 	return (B_TRUE);
1421f8296c60SJoshua M. Clulow 
1422f8296c60SJoshua M. Clulow fail:
1423f8296c60SJoshua M. Clulow 	vif->vif_oerrors++;
1424f8296c60SJoshua M. Clulow 	if (tb != NULL) {
1425f8296c60SJoshua M. Clulow 		vioif_txbuf_free(vif, tb);
1426f8296c60SJoshua M. Clulow 	}
1427f8296c60SJoshua M. Clulow 	mutex_exit(&vif->vif_mutex);
1428f8296c60SJoshua M. Clulow 
1429f8296c60SJoshua M. Clulow 	return (mp == NULL);
14308a324c92SDan McDonald }
14318a324c92SDan McDonald 
14329e0bf232SPatrick Mooney static mblk_t *
vioif_m_tx(void * arg,mblk_t * mp)1433f8296c60SJoshua M. Clulow vioif_m_tx(void *arg, mblk_t *mp)
14348a324c92SDan McDonald {
1435f8296c60SJoshua M. Clulow 	vioif_t *vif = arg;
14369e0bf232SPatrick Mooney 	mblk_t *nmp;
14379e0bf232SPatrick Mooney 
14389e0bf232SPatrick Mooney 	/*
14399e0bf232SPatrick Mooney 	 * Prior to attempting to send any more frames, do a reclaim to pick up
14409e0bf232SPatrick Mooney 	 * any descriptors which have been processed by the host.
14419e0bf232SPatrick Mooney 	 */
1442f8296c60SJoshua M. Clulow 	if (virtio_queue_nactive(vif->vif_tx_vq) != 0) {
1443f8296c60SJoshua M. Clulow 		(void) vioif_reclaim_used_tx(vif);
14449e0bf232SPatrick Mooney 	}
14458a324c92SDan McDonald 
14468a324c92SDan McDonald 	while (mp != NULL) {
14478a324c92SDan McDonald 		nmp = mp->b_next;
14488a324c92SDan McDonald 		mp->b_next = NULL;
14498a324c92SDan McDonald 
1450f8296c60SJoshua M. Clulow 		if (!vioif_send(vif, mp)) {
14519e0bf232SPatrick Mooney 			/*
14529e0bf232SPatrick Mooney 			 * If there are no descriptors available, try to
14539e0bf232SPatrick Mooney 			 * reclaim some, allowing a retry of the send if some
14549e0bf232SPatrick Mooney 			 * are found.
14559e0bf232SPatrick Mooney 			 */
14568a324c92SDan McDonald 			mp->b_next = nmp;
1457f8296c60SJoshua M. Clulow 			if (vioif_reclaim_used_tx(vif) != 0) {
14589e0bf232SPatrick Mooney 				continue;
14599e0bf232SPatrick Mooney 			}
14609e0bf232SPatrick Mooney 
14619e0bf232SPatrick Mooney 			/*
14629e0bf232SPatrick Mooney 			 * Otherwise, enable the TX ring interrupt so that as
14639e0bf232SPatrick Mooney 			 * soon as a descriptor becomes available, transmission
14649e0bf232SPatrick Mooney 			 * can begin again.  For safety, make sure the periodic
14659e0bf232SPatrick Mooney 			 * reclaim is running as well.
14669e0bf232SPatrick Mooney 			 */
1467f8296c60SJoshua M. Clulow 			mutex_enter(&vif->vif_mutex);
1468f8296c60SJoshua M. Clulow 			vif->vif_tx_corked = B_TRUE;
1469f8296c60SJoshua M. Clulow 			virtio_queue_no_interrupt(vif->vif_tx_vq, B_FALSE);
1470f8296c60SJoshua M. Clulow 			vioif_reclaim_restart(vif);
1471f8296c60SJoshua M. Clulow 			mutex_exit(&vif->vif_mutex);
14729e0bf232SPatrick Mooney 			return (mp);
14738a324c92SDan McDonald 		}
14748a324c92SDan McDonald 		mp = nmp;
14758a324c92SDan McDonald 	}
14768a324c92SDan McDonald 
14779e0bf232SPatrick Mooney 	/* Ensure the periodic reclaim has been started. */
1478f8296c60SJoshua M. Clulow 	mutex_enter(&vif->vif_mutex);
1479f8296c60SJoshua M. Clulow 	vioif_reclaim_restart(vif);
1480f8296c60SJoshua M. Clulow 	mutex_exit(&vif->vif_mutex);
14819e0bf232SPatrick Mooney 
14829e0bf232SPatrick Mooney 	return (NULL);
14838a324c92SDan McDonald }
14848a324c92SDan McDonald 
14859e0bf232SPatrick Mooney static int
vioif_m_start(void * arg)1486f8296c60SJoshua M. Clulow vioif_m_start(void *arg)
14878a324c92SDan McDonald {
1488f8296c60SJoshua M. Clulow 	vioif_t *vif = arg;
1489f8296c60SJoshua M. Clulow 
1490f8296c60SJoshua M. Clulow 	mutex_enter(&vif->vif_mutex);
1491f8296c60SJoshua M. Clulow 
1492f8296c60SJoshua M. Clulow 	VERIFY3S(vif->vif_runstate, ==, VIOIF_RUNSTATE_STOPPED);
1493f8296c60SJoshua M. Clulow 	vif->vif_runstate = VIOIF_RUNSTATE_RUNNING;
14948a324c92SDan McDonald 
1495f8296c60SJoshua M. Clulow 	mac_link_update(vif->vif_mac_handle, LINK_STATE_UP);
14968a324c92SDan McDonald 
1497f8296c60SJoshua M. Clulow 	virtio_queue_no_interrupt(vif->vif_rx_vq, B_FALSE);
14988a324c92SDan McDonald 
1499970db7b7SDan Kimmel 	/*
15009e0bf232SPatrick Mooney 	 * Starting interrupts on the TX virtqueue is unnecessary at this time.
15019e0bf232SPatrick Mooney 	 * Descriptor reclamation is handling during transmit, via a periodic
15029e0bf232SPatrick Mooney 	 * timer, and when resources are tight, via the then-enabled interrupt.
1503970db7b7SDan Kimmel 	 */
1504f8296c60SJoshua M. Clulow 	vif->vif_tx_drain = B_FALSE;
1505970db7b7SDan Kimmel 
1506970db7b7SDan Kimmel 	/*
1507f8296c60SJoshua M. Clulow 	 * Add as many receive buffers as we can to the receive queue.  If we
1508f8296c60SJoshua M. Clulow 	 * cannot add any, it may be because we have stopped and started again
1509f8296c60SJoshua M. Clulow 	 * and the descriptors are all in the queue already.
1510970db7b7SDan Kimmel 	 */
1511f8296c60SJoshua M. Clulow 	(void) vioif_add_rx(vif);
1512970db7b7SDan Kimmel 
1513f8296c60SJoshua M. Clulow 	mutex_exit(&vif->vif_mutex);
15148a324c92SDan McDonald 	return (DDI_SUCCESS);
15158a324c92SDan McDonald }
15168a324c92SDan McDonald 
15179e0bf232SPatrick Mooney static void
vioif_m_stop(void * arg)1518f8296c60SJoshua M. Clulow vioif_m_stop(void *arg)
15198a324c92SDan McDonald {
1520f8296c60SJoshua M. Clulow 	vioif_t *vif = arg;
1521f8296c60SJoshua M. Clulow 
1522f8296c60SJoshua M. Clulow 	mutex_enter(&vif->vif_mutex);
1523f8296c60SJoshua M. Clulow 
1524f8296c60SJoshua M. Clulow 	VERIFY3S(vif->vif_runstate, ==, VIOIF_RUNSTATE_RUNNING);
1525f8296c60SJoshua M. Clulow 	vif->vif_runstate = VIOIF_RUNSTATE_STOPPING;
15268a324c92SDan McDonald 
15279e0bf232SPatrick Mooney 	/* Ensure all TX descriptors have been processed and reclaimed */
1528f8296c60SJoshua M. Clulow 	vioif_tx_drain(vif);
15299e0bf232SPatrick Mooney 
1530f8296c60SJoshua M. Clulow 	virtio_queue_no_interrupt(vif->vif_rx_vq, B_TRUE);
1531f8296c60SJoshua M. Clulow 
1532f8296c60SJoshua M. Clulow 	vif->vif_runstate = VIOIF_RUNSTATE_STOPPED;
1533f8296c60SJoshua M. Clulow 	mutex_exit(&vif->vif_mutex);
15348a324c92SDan McDonald }
15358a324c92SDan McDonald 
15368a324c92SDan McDonald static int
vioif_m_stat(void * arg,uint_t stat,uint64_t * val)1537f8296c60SJoshua M. Clulow vioif_m_stat(void *arg, uint_t stat, uint64_t *val)
15388a324c92SDan McDonald {
1539f8296c60SJoshua M. Clulow 	vioif_t *vif = arg;
15408a324c92SDan McDonald 
15418a324c92SDan McDonald 	switch (stat) {
15428a324c92SDan McDonald 	case MAC_STAT_IERRORS:
1543f8296c60SJoshua M. Clulow 		*val = vif->vif_ierrors;
15448a324c92SDan McDonald 		break;
15458a324c92SDan McDonald 	case MAC_STAT_OERRORS:
1546f8296c60SJoshua M. Clulow 		*val = vif->vif_oerrors;
15478a324c92SDan McDonald 		break;
15488a324c92SDan McDonald 	case MAC_STAT_MULTIRCV:
1549f8296c60SJoshua M. Clulow 		*val = vif->vif_multircv;
15508a324c92SDan McDonald 		break;
15518a324c92SDan McDonald 	case MAC_STAT_BRDCSTRCV:
1552f8296c60SJoshua M. Clulow 		*val = vif->vif_brdcstrcv;
15538a324c92SDan McDonald 		break;
15548a324c92SDan McDonald 	case MAC_STAT_MULTIXMT:
1555f8296c60SJoshua M. Clulow 		*val = vif->vif_multixmt;
15568a324c92SDan McDonald 		break;
15578a324c92SDan McDonald 	case MAC_STAT_BRDCSTXMT:
1558f8296c60SJoshua M. Clulow 		*val = vif->vif_brdcstxmt;
15598a324c92SDan McDonald 		break;
15608a324c92SDan McDonald 	case MAC_STAT_IPACKETS:
1561f8296c60SJoshua M. Clulow 		*val = vif->vif_ipackets;
15628a324c92SDan McDonald 		break;
15638a324c92SDan McDonald 	case MAC_STAT_RBYTES:
1564f8296c60SJoshua M. Clulow 		*val = vif->vif_rbytes;
15658a324c92SDan McDonald 		break;
15668a324c92SDan McDonald 	case MAC_STAT_OPACKETS:
1567f8296c60SJoshua M. Clulow 		*val = vif->vif_opackets;
15688a324c92SDan McDonald 		break;
15698a324c92SDan McDonald 	case MAC_STAT_OBYTES:
1570f8296c60SJoshua M. Clulow 		*val = vif->vif_obytes;
15718a324c92SDan McDonald 		break;
15728a324c92SDan McDonald 	case MAC_STAT_NORCVBUF:
1573f8296c60SJoshua M. Clulow 		*val = vif->vif_norecvbuf;
15748a324c92SDan McDonald 		break;
15758a324c92SDan McDonald 	case MAC_STAT_NOXMTBUF:
1576f8296c60SJoshua M. Clulow 		*val = vif->vif_notxbuf;
15778a324c92SDan McDonald 		break;
15788a324c92SDan McDonald 	case MAC_STAT_IFSPEED:
15798a324c92SDan McDonald 		/* always 1 Gbit */
15808a324c92SDan McDonald 		*val = 1000000000ULL;
15818a324c92SDan McDonald 		break;
15828a324c92SDan McDonald 	case ETHER_STAT_LINK_DUPLEX:
15838a324c92SDan McDonald 		/* virtual device, always full-duplex */
15848a324c92SDan McDonald 		*val = LINK_DUPLEX_FULL;
15858a324c92SDan McDonald 		break;
15868a324c92SDan McDonald 
15878a324c92SDan McDonald 	default:
15888a324c92SDan McDonald 		return (ENOTSUP);
15898a324c92SDan McDonald 	}
15908a324c92SDan McDonald 
15918a324c92SDan McDonald 	return (DDI_SUCCESS);
15928a324c92SDan McDonald }
15938a324c92SDan McDonald 
15948a324c92SDan McDonald static int
vioif_m_setprop(void * arg,const char * pr_name,mac_prop_id_t pr_num,uint_t pr_valsize,const void * pr_val)1595f8296c60SJoshua M. Clulow vioif_m_setprop(void *arg, const char *pr_name, mac_prop_id_t pr_num,
15968a324c92SDan McDonald     uint_t pr_valsize, const void *pr_val)
15978a324c92SDan McDonald {
1598f8296c60SJoshua M. Clulow 	vioif_t *vif = arg;
15998a324c92SDan McDonald 
1600f8296c60SJoshua M. Clulow 	switch (pr_num) {
1601f8296c60SJoshua M. Clulow 	case MAC_PROP_MTU: {
1602f8296c60SJoshua M. Clulow 		int r;
1603f8296c60SJoshua M. Clulow 		uint32_t mtu;
1604f8296c60SJoshua M. Clulow 		if (pr_valsize < sizeof (mtu)) {
1605f8296c60SJoshua M. Clulow 			return (EOVERFLOW);
1606f8296c60SJoshua M. Clulow 		}
1607f8296c60SJoshua M. Clulow 		bcopy(pr_val, &mtu, sizeof (mtu));
16088a324c92SDan McDonald 
1609f8296c60SJoshua M. Clulow 		if (mtu < ETHERMIN || mtu > vif->vif_mtu_max) {
16108a324c92SDan McDonald 			return (EINVAL);
1611f8296c60SJoshua M. Clulow 		}
16128a324c92SDan McDonald 
1613f8296c60SJoshua M. Clulow 		mutex_enter(&vif->vif_mutex);
1614f8296c60SJoshua M. Clulow 		if ((r = mac_maxsdu_update(vif->vif_mac_handle, mtu)) == 0) {
1615f8296c60SJoshua M. Clulow 			vif->vif_mtu = mtu;
1616f8296c60SJoshua M. Clulow 		}
1617f8296c60SJoshua M. Clulow 		mutex_exit(&vif->vif_mutex);
16188a324c92SDan McDonald 
1619f8296c60SJoshua M. Clulow 		return (r);
16208a324c92SDan McDonald 	}
16218a324c92SDan McDonald 
1622f8296c60SJoshua M. Clulow 	case MAC_PROP_PRIVATE: {
1623f8296c60SJoshua M. Clulow 		long max, result;
1624f8296c60SJoshua M. Clulow 		uint_t *resp;
1625f8296c60SJoshua M. Clulow 		char *endptr;
1626f8296c60SJoshua M. Clulow 
1627f8296c60SJoshua M. Clulow 		if (strcmp(pr_name, VIOIF_MACPROP_TXCOPY_THRESH) == 0) {
1628f8296c60SJoshua M. Clulow 			max = VIOIF_MACPROP_TXCOPY_THRESH_MAX;
1629f8296c60SJoshua M. Clulow 			resp = &vif->vif_txcopy_thresh;
1630f8296c60SJoshua M. Clulow 		} else if (strcmp(pr_name, VIOIF_MACPROP_RXCOPY_THRESH) == 0) {
1631f8296c60SJoshua M. Clulow 			max = VIOIF_MACPROP_RXCOPY_THRESH_MAX;
1632f8296c60SJoshua M. Clulow 			resp = &vif->vif_rxcopy_thresh;
1633f8296c60SJoshua M. Clulow 		} else {
1634f8296c60SJoshua M. Clulow 			return (ENOTSUP);
1635f8296c60SJoshua M. Clulow 		}
16368a324c92SDan McDonald 
1637f8296c60SJoshua M. Clulow 		if (pr_val == NULL) {
16388a324c92SDan McDonald 			return (EINVAL);
1639f8296c60SJoshua M. Clulow 		}
16408a324c92SDan McDonald 
1641f8296c60SJoshua M. Clulow 		if (ddi_strtol(pr_val, &endptr, 10, &result) != 0 ||
1642f8296c60SJoshua M. Clulow 		    *endptr != '\0' || result < 0 || result > max) {
16438a324c92SDan McDonald 			return (EINVAL);
16448a324c92SDan McDonald 		}
16458a324c92SDan McDonald 
1646f8296c60SJoshua M. Clulow 		mutex_enter(&vif->vif_mutex);
1647f8296c60SJoshua M. Clulow 		*resp = result;
1648f8296c60SJoshua M. Clulow 		mutex_exit(&vif->vif_mutex);
1649f8296c60SJoshua M. Clulow 
1650f8296c60SJoshua M. Clulow 		return (0);
1651f8296c60SJoshua M. Clulow 	}
1652f8296c60SJoshua M. Clulow 
16538a324c92SDan McDonald 	default:
16548a324c92SDan McDonald 		return (ENOTSUP);
16558a324c92SDan McDonald 	}
16568a324c92SDan McDonald }
16578a324c92SDan McDonald 
16588a324c92SDan McDonald static int
vioif_m_getprop(void * arg,const char * pr_name,mac_prop_id_t pr_num,uint_t pr_valsize,void * pr_val)1659f8296c60SJoshua M. Clulow vioif_m_getprop(void *arg, const char *pr_name, mac_prop_id_t pr_num,
16608a324c92SDan McDonald     uint_t pr_valsize, void *pr_val)
16618a324c92SDan McDonald {
1662f8296c60SJoshua M. Clulow 	vioif_t *vif = arg;
16638a324c92SDan McDonald 
1664f8296c60SJoshua M. Clulow 	switch (pr_num) {
1665f8296c60SJoshua M. Clulow 	case MAC_PROP_PRIVATE: {
1666f8296c60SJoshua M. Clulow 		uint_t value;
16678a324c92SDan McDonald 
1668f8296c60SJoshua M. Clulow 		if (strcmp(pr_name, VIOIF_MACPROP_TXCOPY_THRESH) == 0) {
1669f8296c60SJoshua M. Clulow 			value = vif->vif_txcopy_thresh;
1670f8296c60SJoshua M. Clulow 		} else if (strcmp(pr_name, VIOIF_MACPROP_RXCOPY_THRESH) == 0) {
1671f8296c60SJoshua M. Clulow 			value = vif->vif_rxcopy_thresh;
1672f8296c60SJoshua M. Clulow 		} else {
1673f8296c60SJoshua M. Clulow 			return (ENOTSUP);
1674f8296c60SJoshua M. Clulow 		}
16758a324c92SDan McDonald 
1676f8296c60SJoshua M. Clulow 		if (snprintf(pr_val, pr_valsize, "%u", value) >= pr_valsize) {
1677f8296c60SJoshua M. Clulow 			return (EOVERFLOW);
1678f8296c60SJoshua M. Clulow 		}
16798a324c92SDan McDonald 
1680f8296c60SJoshua M. Clulow 		return (0);
1681f8296c60SJoshua M. Clulow 	}
16828a324c92SDan McDonald 
16838a324c92SDan McDonald 	default:
1684f8296c60SJoshua M. Clulow 		return (ENOTSUP);
16858a324c92SDan McDonald 	}
16868a324c92SDan McDonald }
16878a324c92SDan McDonald 
16888a324c92SDan McDonald static void
vioif_m_propinfo(void * arg,const char * pr_name,mac_prop_id_t pr_num,mac_prop_info_handle_t prh)1689f8296c60SJoshua M. Clulow vioif_m_propinfo(void *arg, const char *pr_name, mac_prop_id_t pr_num,
16908a324c92SDan McDonald     mac_prop_info_handle_t prh)
16918a324c92SDan McDonald {
1692f8296c60SJoshua M. Clulow 	vioif_t *vif = arg;
16938a324c92SDan McDonald 	char valstr[64];
16948a324c92SDan McDonald 	int value;
16958a324c92SDan McDonald 
16968a324c92SDan McDonald 	switch (pr_num) {
16978a324c92SDan McDonald 	case MAC_PROP_MTU:
1698f8296c60SJoshua M. Clulow 		mac_prop_info_set_perm(prh, MAC_PROP_PERM_RW);
1699f8296c60SJoshua M. Clulow 		mac_prop_info_set_range_uint32(prh, ETHERMIN, vif->vif_mtu_max);
1700f8296c60SJoshua M. Clulow 		return;
17018a324c92SDan McDonald 
17028a324c92SDan McDonald 	case MAC_PROP_PRIVATE:
1703f8296c60SJoshua M. Clulow 		if (strcmp(pr_name, VIOIF_MACPROP_TXCOPY_THRESH) == 0) {
1704f8296c60SJoshua M. Clulow 			value = VIOIF_MACPROP_TXCOPY_THRESH_DEF;
1705f8296c60SJoshua M. Clulow 		} else if (strcmp(pr_name, VIOIF_MACPROP_RXCOPY_THRESH) == 0) {
1706f8296c60SJoshua M. Clulow 			value = VIOIF_MACPROP_RXCOPY_THRESH_DEF;
17078a324c92SDan McDonald 		} else {
1708f8296c60SJoshua M. Clulow 			/*
1709f8296c60SJoshua M. Clulow 			 * We do not recognise this private property name.
1710f8296c60SJoshua M. Clulow 			 */
17118a324c92SDan McDonald 			return;
17128a324c92SDan McDonald 		}
1713f8296c60SJoshua M. Clulow 		mac_prop_info_set_perm(prh, MAC_PROP_PERM_RW);
17148a324c92SDan McDonald 		(void) snprintf(valstr, sizeof (valstr), "%d", value);
1715f8296c60SJoshua M. Clulow 		mac_prop_info_set_default_str(prh, valstr);
1716f8296c60SJoshua M. Clulow 		return;
17178a324c92SDan McDonald 
17188a324c92SDan McDonald 	default:
1719f8296c60SJoshua M. Clulow 		return;
17208a324c92SDan McDonald 	}
17218a324c92SDan McDonald }
17228a324c92SDan McDonald 
17238a324c92SDan McDonald static boolean_t
vioif_m_getcapab(void * arg,mac_capab_t cap,void * cap_data)1724f8296c60SJoshua M. Clulow vioif_m_getcapab(void *arg, mac_capab_t cap, void *cap_data)
17258a324c92SDan McDonald {
1726f8296c60SJoshua M. Clulow 	vioif_t *vif = arg;
17278a324c92SDan McDonald 
17288a324c92SDan McDonald 	switch (cap) {
1729f8296c60SJoshua M. Clulow 	case MAC_CAPAB_HCKSUM: {
1730f8296c60SJoshua M. Clulow 		if (!vif->vif_tx_csum) {
1731f8296c60SJoshua M. Clulow 			return (B_FALSE);
17328a324c92SDan McDonald 		}
17338a324c92SDan McDonald 
1734f8296c60SJoshua M. Clulow 		*(uint32_t *)cap_data = HCKSUM_INET_PARTIAL;
1735f8296c60SJoshua M. Clulow 
1736f8296c60SJoshua M. Clulow 		return (B_TRUE);
17378a324c92SDan McDonald 	}
17388a324c92SDan McDonald 
1739f8296c60SJoshua M. Clulow 	case MAC_CAPAB_LSO: {
1740f8296c60SJoshua M. Clulow 		if (!vif->vif_tx_tso4) {
1741f8296c60SJoshua M. Clulow 			return (B_FALSE);
1742f8296c60SJoshua M. Clulow 		}
17438a324c92SDan McDonald 
1744f8296c60SJoshua M. Clulow 		mac_capab_lso_t *lso = cap_data;
174562366fbbSRobert Mustacchi 		lso->lso_flags = LSO_TX_BASIC_TCP_IPV4 | LSO_TX_BASIC_TCP_IPV6;
1746f8296c60SJoshua M. Clulow 		lso->lso_basic_tcp_ipv4.lso_max = VIOIF_RX_DATA_SIZE;
174762366fbbSRobert Mustacchi 		lso->lso_basic_tcp_ipv6.lso_max = VIOIF_RX_DATA_SIZE;
17488a324c92SDan McDonald 
1749f8296c60SJoshua M. Clulow 		return (B_TRUE);
17508a324c92SDan McDonald 	}
17518a324c92SDan McDonald 
1752f8296c60SJoshua M. Clulow 	default:
1753f8296c60SJoshua M. Clulow 		return (B_FALSE);
1754f8296c60SJoshua M. Clulow 	}
17558a324c92SDan McDonald }
17568a324c92SDan McDonald 
17579e0bf232SPatrick Mooney static boolean_t
vioif_has_feature(vioif_t * vif,uint32_t feature)1758f8296c60SJoshua M. Clulow vioif_has_feature(vioif_t *vif, uint32_t feature)
17598a324c92SDan McDonald {
1760f8296c60SJoshua M. Clulow 	return (virtio_feature_present(vif->vif_virtio, feature));
17618a324c92SDan McDonald }
17628a324c92SDan McDonald 
1763f8296c60SJoshua M. Clulow /*
1764f8296c60SJoshua M. Clulow  * Read the primary MAC address from the device if one is provided.  If not,
1765f8296c60SJoshua M. Clulow  * generate a random locally administered MAC address and write it back to the
1766f8296c60SJoshua M. Clulow  * device.
1767f8296c60SJoshua M. Clulow  */
17688a324c92SDan McDonald static void
vioif_get_mac(vioif_t * vif)1769f8296c60SJoshua M. Clulow vioif_get_mac(vioif_t *vif)
17708a324c92SDan McDonald {
1771f8296c60SJoshua M. Clulow 	VERIFY(MUTEX_HELD(&vif->vif_mutex));
17728a324c92SDan McDonald 
1773f8296c60SJoshua M. Clulow 	if (vioif_has_feature(vif, VIRTIO_NET_F_MAC)) {
1774f8296c60SJoshua M. Clulow 		for (uint_t i = 0; i < ETHERADDRL; i++) {
1775f8296c60SJoshua M. Clulow 			vif->vif_mac[i] = virtio_dev_get8(vif->vif_virtio,
17768a324c92SDan McDonald 			    VIRTIO_NET_CONFIG_MAC + i);
17778a324c92SDan McDonald 		}
1778f8296c60SJoshua M. Clulow 		vif->vif_mac_from_host = 1;
17798a324c92SDan McDonald 
1780f8296c60SJoshua M. Clulow 		return;
1781f8296c60SJoshua M. Clulow 	}
17828a324c92SDan McDonald 
1783f8296c60SJoshua M. Clulow 	/* Get a few random bytes */
1784f8296c60SJoshua M. Clulow 	(void) random_get_pseudo_bytes(vif->vif_mac, ETHERADDRL);
1785f8296c60SJoshua M. Clulow 	/* Make sure it's a unicast MAC */
1786f8296c60SJoshua M. Clulow 	vif->vif_mac[0] &= ~1;
1787f8296c60SJoshua M. Clulow 	/* Set the "locally administered" bit */
1788f8296c60SJoshua M. Clulow 	vif->vif_mac[1] |= 2;
1789f8296c60SJoshua M. Clulow 
1790f8296c60SJoshua M. Clulow 	/*
1791f8296c60SJoshua M. Clulow 	 * Write the random MAC address back to the device.
1792f8296c60SJoshua M. Clulow 	 */
1793f8296c60SJoshua M. Clulow 	for (uint_t i = 0; i < ETHERADDRL; i++) {
1794f8296c60SJoshua M. Clulow 		virtio_dev_put8(vif->vif_virtio, VIRTIO_NET_CONFIG_MAC + i,
1795f8296c60SJoshua M. Clulow 		    vif->vif_mac[i]);
17968a324c92SDan McDonald 	}
1797f8296c60SJoshua M. Clulow 	vif->vif_mac_from_host = 0;
1798f8296c60SJoshua M. Clulow 
1799f8296c60SJoshua M. Clulow 	dev_err(vif->vif_dip, CE_NOTE, "!Generated a random MAC address: "
1800f8296c60SJoshua M. Clulow 	    "%02x:%02x:%02x:%02x:%02x:%02x",
1801f8296c60SJoshua M. Clulow 	    (uint_t)vif->vif_mac[0], (uint_t)vif->vif_mac[1],
1802f8296c60SJoshua M. Clulow 	    (uint_t)vif->vif_mac[2], (uint_t)vif->vif_mac[3],
1803f8296c60SJoshua M. Clulow 	    (uint_t)vif->vif_mac[4], (uint_t)vif->vif_mac[5]);
18048a324c92SDan McDonald }
18058a324c92SDan McDonald 
18068a324c92SDan McDonald /*
18078a324c92SDan McDonald  * Virtqueue interrupt handlers
18088a324c92SDan McDonald  */
18099e0bf232SPatrick Mooney static uint_t
vioif_rx_handler(caddr_t arg0,caddr_t arg1)1810f8296c60SJoshua M. Clulow vioif_rx_handler(caddr_t arg0, caddr_t arg1)
18118a324c92SDan McDonald {
1812f8296c60SJoshua M. Clulow 	vioif_t *vif = (vioif_t *)arg0;
1813f8296c60SJoshua M. Clulow 
1814f8296c60SJoshua M. Clulow 	mutex_enter(&vif->vif_mutex);
1815f8296c60SJoshua M. Clulow 	(void) vioif_process_rx(vif);
18168a324c92SDan McDonald 
1817970db7b7SDan Kimmel 	/*
1818f8296c60SJoshua M. Clulow 	 * Attempt to replenish the receive queue.  If we cannot add any
1819f8296c60SJoshua M. Clulow 	 * descriptors here, it may be because all of the recently received
1820f8296c60SJoshua M. Clulow 	 * packets were loaned up to the networking stack.
1821970db7b7SDan Kimmel 	 */
1822f8296c60SJoshua M. Clulow 	(void) vioif_add_rx(vif);
1823f8296c60SJoshua M. Clulow 	mutex_exit(&vif->vif_mutex);
18248a324c92SDan McDonald 
18258a324c92SDan McDonald 	return (DDI_INTR_CLAIMED);
18268a324c92SDan McDonald }
18278a324c92SDan McDonald 
18289e0bf232SPatrick Mooney static uint_t
vioif_tx_handler(caddr_t arg0,caddr_t arg1)1829f8296c60SJoshua M. Clulow vioif_tx_handler(caddr_t arg0, caddr_t arg1)
18308a324c92SDan McDonald {
1831f8296c60SJoshua M. Clulow 	vioif_t *vif = (vioif_t *)arg0;
18328a324c92SDan McDonald 
1833970db7b7SDan Kimmel 	/*
18349e0bf232SPatrick Mooney 	 * The TX interrupt could race with other reclamation activity, so
18359e0bf232SPatrick Mooney 	 * interpreting the return value is unimportant.
1836970db7b7SDan Kimmel 	 */
1837f8296c60SJoshua M. Clulow 	(void) vioif_reclaim_used_tx(vif);
1838970db7b7SDan Kimmel 
18398a324c92SDan McDonald 	return (DDI_INTR_CLAIMED);
18408a324c92SDan McDonald }
18418a324c92SDan McDonald 
18428a324c92SDan McDonald static void
vioif_check_features(vioif_t * vif)1843f8296c60SJoshua M. Clulow vioif_check_features(vioif_t *vif)
18448a324c92SDan McDonald {
1845f8296c60SJoshua M. Clulow 	VERIFY(MUTEX_HELD(&vif->vif_mutex));
18468a324c92SDan McDonald 
1847f8296c60SJoshua M. Clulow 	vif->vif_tx_csum = 0;
1848f8296c60SJoshua M. Clulow 	vif->vif_tx_tso4 = 0;
184962366fbbSRobert Mustacchi 	vif->vif_tx_tso6 = 0;
18508a324c92SDan McDonald 
1851f8296c60SJoshua M. Clulow 	if (vioif_has_feature(vif, VIRTIO_NET_F_CSUM)) {
1852f8296c60SJoshua M. Clulow 		/*
1853f8296c60SJoshua M. Clulow 		 * The host will accept packets with partial checksums from us.
1854f8296c60SJoshua M. Clulow 		 */
1855f8296c60SJoshua M. Clulow 		vif->vif_tx_csum = 1;
18568a324c92SDan McDonald 
1857f8296c60SJoshua M. Clulow 		/*
1858f8296c60SJoshua M. Clulow 		 * The legacy GSO feature represents the combination of
1859f8296c60SJoshua M. Clulow 		 * HOST_TSO4, HOST_TSO6, and HOST_ECN.
1860f8296c60SJoshua M. Clulow 		 */
1861f8296c60SJoshua M. Clulow 		boolean_t gso = vioif_has_feature(vif, VIRTIO_NET_F_GSO);
1862f8296c60SJoshua M. Clulow 		boolean_t tso4 = vioif_has_feature(vif, VIRTIO_NET_F_HOST_TSO4);
186362366fbbSRobert Mustacchi 		boolean_t tso6 = vioif_has_feature(vif, VIRTIO_NET_F_HOST_TSO6);
1864f8296c60SJoshua M. Clulow 		boolean_t ecn = vioif_has_feature(vif, VIRTIO_NET_F_HOST_ECN);
1865f8296c60SJoshua M. Clulow 
1866f8296c60SJoshua M. Clulow 		/*
1867f8296c60SJoshua M. Clulow 		 * Explicit congestion notification (ECN) is configured
1868f8296c60SJoshua M. Clulow 		 * globally; see "tcp_ecn_permitted".  As we cannot currently
1869f8296c60SJoshua M. Clulow 		 * request that the stack disable ECN on a per interface basis,
1870f8296c60SJoshua M. Clulow 		 * we require the device to support the combination of
1871f8296c60SJoshua M. Clulow 		 * segmentation offload and ECN support.
1872f8296c60SJoshua M. Clulow 		 */
187362366fbbSRobert Mustacchi 		if (gso) {
1874f8296c60SJoshua M. Clulow 			vif->vif_tx_tso4 = 1;
187562366fbbSRobert Mustacchi 			vif->vif_tx_tso6 = 1;
187662366fbbSRobert Mustacchi 		}
187762366fbbSRobert Mustacchi 		if (tso4 && ecn) {
187862366fbbSRobert Mustacchi 			vif->vif_tx_tso4 = 1;
187962366fbbSRobert Mustacchi 		}
188062366fbbSRobert Mustacchi 		if (tso6 && ecn) {
188162366fbbSRobert Mustacchi 			vif->vif_tx_tso6 = 1;
18828a324c92SDan McDonald 		}
18838a324c92SDan McDonald 	}
188435d41f28SJason King 
188535d41f28SJason King 	if (vioif_has_feature(vif, VIRTIO_NET_F_CTRL_VQ)) {
188635d41f28SJason King 		vif->vif_has_ctrlq = 1;
188735d41f28SJason King 
188835d41f28SJason King 		/*
188935d41f28SJason King 		 * The VIRTIO_NET_F_CTRL_VQ feature must be enabled if there's
189035d41f28SJason King 		 * any chance of the VIRTIO_NET_F_CTRL_RX being enabled.
189135d41f28SJason King 		 */
189235d41f28SJason King 		if (vioif_has_feature(vif, VIRTIO_NET_F_CTRL_RX))
189335d41f28SJason King 			vif->vif_has_ctrlq_rx = 1;
189435d41f28SJason King 	}
18958a324c92SDan McDonald }
18968a324c92SDan McDonald 
1897aefa9c84SJoshua M. Clulow static int
vioif_select_interrupt_types(void)1898aefa9c84SJoshua M. Clulow vioif_select_interrupt_types(void)
1899aefa9c84SJoshua M. Clulow {
1900aefa9c84SJoshua M. Clulow 	id_t id;
1901aefa9c84SJoshua M. Clulow 	smbios_system_t sys;
1902aefa9c84SJoshua M. Clulow 	smbios_info_t info;
1903aefa9c84SJoshua M. Clulow 
1904aefa9c84SJoshua M. Clulow 	if (vioif_allowed_int_types != -1) {
1905aefa9c84SJoshua M. Clulow 		/*
1906aefa9c84SJoshua M. Clulow 		 * If this value was tuned via /etc/system or the debugger,
1907aefa9c84SJoshua M. Clulow 		 * use the provided value directly.
1908aefa9c84SJoshua M. Clulow 		 */
1909aefa9c84SJoshua M. Clulow 		return (vioif_allowed_int_types);
1910aefa9c84SJoshua M. Clulow 	}
1911aefa9c84SJoshua M. Clulow 
1912f2047739SJoshua M. Clulow 	if (ksmbios == NULL ||
1913f2047739SJoshua M. Clulow 	    (id = smbios_info_system(ksmbios, &sys)) == SMB_ERR ||
1914aefa9c84SJoshua M. Clulow 	    smbios_info_common(ksmbios, id, &info) == SMB_ERR) {
1915aefa9c84SJoshua M. Clulow 		/*
1916aefa9c84SJoshua M. Clulow 		 * The system may not have valid SMBIOS data, so ignore a
1917aefa9c84SJoshua M. Clulow 		 * failure here.
1918aefa9c84SJoshua M. Clulow 		 */
191964439ec0SJoshua M. Clulow 		return (VIRTIO_ANY_INTR_TYPE);
1920aefa9c84SJoshua M. Clulow 	}
1921aefa9c84SJoshua M. Clulow 
1922aefa9c84SJoshua M. Clulow 	if (strcmp(info.smbi_manufacturer, "Google") == 0 &&
1923aefa9c84SJoshua M. Clulow 	    strcmp(info.smbi_product, "Google Compute Engine") == 0) {
1924aefa9c84SJoshua M. Clulow 		/*
1925aefa9c84SJoshua M. Clulow 		 * An undiagnosed issue with the Google Compute Engine (GCE)
1926aefa9c84SJoshua M. Clulow 		 * hypervisor exists.  In this environment, no RX interrupts
1927aefa9c84SJoshua M. Clulow 		 * are received if MSI-X handlers are installed.  This does not
1928aefa9c84SJoshua M. Clulow 		 * appear to be true for the Virtio SCSI driver.  Fixed
1929aefa9c84SJoshua M. Clulow 		 * interrupts do appear to work, so we fall back for now:
1930aefa9c84SJoshua M. Clulow 		 */
1931aefa9c84SJoshua M. Clulow 		return (DDI_INTR_TYPE_FIXED);
1932aefa9c84SJoshua M. Clulow 	}
1933aefa9c84SJoshua M. Clulow 
193464439ec0SJoshua M. Clulow 	return (VIRTIO_ANY_INTR_TYPE);
1935aefa9c84SJoshua M. Clulow }
1936aefa9c84SJoshua M. Clulow 
19378a324c92SDan McDonald static int
vioif_attach(dev_info_t * dip,ddi_attach_cmd_t cmd)1938f8296c60SJoshua M. Clulow vioif_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
19398a324c92SDan McDonald {
1940f8296c60SJoshua M. Clulow 	int ret;
1941f8296c60SJoshua M. Clulow 	vioif_t *vif;
1942f8296c60SJoshua M. Clulow 	virtio_t *vio;
1943f8296c60SJoshua M. Clulow 	mac_register_t *macp = NULL;
19448a324c92SDan McDonald 
1945f8296c60SJoshua M. Clulow 	if (cmd != DDI_ATTACH) {
1946f8296c60SJoshua M. Clulow 		return (DDI_FAILURE);
19478a324c92SDan McDonald 	}
19488a324c92SDan McDonald 
1949f8296c60SJoshua M. Clulow 	if ((vio = virtio_init(dip, VIRTIO_NET_WANTED_FEATURES, B_TRUE)) ==
1950f8296c60SJoshua M. Clulow 	    NULL) {
1951f8296c60SJoshua M. Clulow 		return (DDI_FAILURE);
1952f8296c60SJoshua M. Clulow 	}
19538a324c92SDan McDonald 
1954f8296c60SJoshua M. Clulow 	vif = kmem_zalloc(sizeof (*vif), KM_SLEEP);
1955f8296c60SJoshua M. Clulow 	vif->vif_dip = dip;
1956f8296c60SJoshua M. Clulow 	vif->vif_virtio = vio;
1957f8296c60SJoshua M. Clulow 	vif->vif_runstate = VIOIF_RUNSTATE_STOPPED;
1958f8296c60SJoshua M. Clulow 	ddi_set_driver_private(dip, vif);
1959f8296c60SJoshua M. Clulow 
1960f8296c60SJoshua M. Clulow 	if ((vif->vif_rx_vq = virtio_queue_alloc(vio, VIRTIO_NET_VIRTQ_RX,
1961f8296c60SJoshua M. Clulow 	    "rx", vioif_rx_handler, vif, B_FALSE, VIOIF_MAX_SEGS)) == NULL ||
1962f8296c60SJoshua M. Clulow 	    (vif->vif_tx_vq = virtio_queue_alloc(vio, VIRTIO_NET_VIRTQ_TX,
1963f8296c60SJoshua M. Clulow 	    "tx", vioif_tx_handler, vif, B_FALSE, VIOIF_MAX_SEGS)) == NULL) {
1964f8296c60SJoshua M. Clulow 		goto fail;
1965f8296c60SJoshua M. Clulow 	}
19668a324c92SDan McDonald 
196735d41f28SJason King 	if (vioif_has_feature(vif, VIRTIO_NET_F_CTRL_VQ) &&
196835d41f28SJason King 	    (vif->vif_ctrl_vq = virtio_queue_alloc(vio,
196935d41f28SJason King 	    VIRTIO_NET_VIRTQ_CONTROL, "ctrlq", NULL, vif,
197035d41f28SJason King 	    B_FALSE, VIOIF_MAX_SEGS)) == NULL) {
197135d41f28SJason King 		goto fail;
197235d41f28SJason King 	}
197335d41f28SJason King 
1974aefa9c84SJoshua M. Clulow 	if (virtio_init_complete(vio, vioif_select_interrupt_types()) !=
1975aefa9c84SJoshua M. Clulow 	    DDI_SUCCESS) {
1976f8296c60SJoshua M. Clulow 		dev_err(dip, CE_WARN, "failed to complete Virtio init");
1977f8296c60SJoshua M. Clulow 		goto fail;
19788a324c92SDan McDonald 	}
19798a324c92SDan McDonald 
1980f8296c60SJoshua M. Clulow 	virtio_queue_no_interrupt(vif->vif_rx_vq, B_TRUE);
1981f8296c60SJoshua M. Clulow 	virtio_queue_no_interrupt(vif->vif_tx_vq, B_TRUE);
198235d41f28SJason King 	if (vif->vif_ctrl_vq != NULL)
198335d41f28SJason King 		virtio_queue_no_interrupt(vif->vif_ctrl_vq, B_TRUE);
19848a324c92SDan McDonald 
1985f8296c60SJoshua M. Clulow 	mutex_init(&vif->vif_mutex, NULL, MUTEX_DRIVER, virtio_intr_pri(vio));
1986f8296c60SJoshua M. Clulow 	mutex_enter(&vif->vif_mutex);
19878a324c92SDan McDonald 
1988f8296c60SJoshua M. Clulow 	vioif_get_mac(vif);
19898a324c92SDan McDonald 
1990f8296c60SJoshua M. Clulow 	vif->vif_rxcopy_thresh = VIOIF_MACPROP_RXCOPY_THRESH_DEF;
1991f8296c60SJoshua M. Clulow 	vif->vif_txcopy_thresh = VIOIF_MACPROP_TXCOPY_THRESH_DEF;
19928a324c92SDan McDonald 
1993f8296c60SJoshua M. Clulow 	if (vioif_has_feature(vif, VIRTIO_NET_F_MTU)) {
1994f8296c60SJoshua M. Clulow 		vif->vif_mtu_max = virtio_dev_get16(vio, VIRTIO_NET_CONFIG_MTU);
1995f8296c60SJoshua M. Clulow 	} else {
1996f8296c60SJoshua M. Clulow 		vif->vif_mtu_max = ETHERMTU;
19978a324c92SDan McDonald 	}
19988a324c92SDan McDonald 
1999f8296c60SJoshua M. Clulow 	vif->vif_mtu = ETHERMTU;
2000f8296c60SJoshua M. Clulow 	if (vif->vif_mtu > vif->vif_mtu_max) {
2001f8296c60SJoshua M. Clulow 		vif->vif_mtu = vif->vif_mtu_max;
20028a324c92SDan McDonald 	}
20038a324c92SDan McDonald 
2004f8296c60SJoshua M. Clulow 	vioif_check_features(vif);
20058a324c92SDan McDonald 
2006f8296c60SJoshua M. Clulow 	if (vioif_alloc_bufs(vif) != 0) {
2007f8296c60SJoshua M. Clulow 		mutex_exit(&vif->vif_mutex);
2008f8296c60SJoshua M. Clulow 		dev_err(dip, CE_WARN, "failed to allocate memory");
2009f8296c60SJoshua M. Clulow 		goto fail;
2010f8296c60SJoshua M. Clulow 	}
20118a324c92SDan McDonald 
2012f8296c60SJoshua M. Clulow 	mutex_exit(&vif->vif_mutex);
20138a324c92SDan McDonald 
2014f8296c60SJoshua M. Clulow 	if (virtio_interrupts_enable(vio) != DDI_SUCCESS) {
2015f8296c60SJoshua M. Clulow 		dev_err(dip, CE_WARN, "failed to enable interrupts");
2016f8296c60SJoshua M. Clulow 		goto fail;
2017f8296c60SJoshua M. Clulow 	}
20188a324c92SDan McDonald 
20198a324c92SDan McDonald 	if ((macp = mac_alloc(MAC_VERSION)) == NULL) {
2020f8296c60SJoshua M. Clulow 		dev_err(dip, CE_WARN, "failed to allocate a mac_register");
2021f8296c60SJoshua M. Clulow 		goto fail;
20228a324c92SDan McDonald 	}
20238a324c92SDan McDonald 
20248a324c92SDan McDonald 	macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
2025f8296c60SJoshua M. Clulow 	macp->m_driver = vif;
2026f8296c60SJoshua M. Clulow 	macp->m_dip = dip;
2027f8296c60SJoshua M. Clulow 	macp->m_src_addr = vif->vif_mac;
2028f8296c60SJoshua M. Clulow 	macp->m_callbacks = &vioif_mac_callbacks;
20298a324c92SDan McDonald 	macp->m_min_sdu = 0;
2030f8296c60SJoshua M. Clulow 	macp->m_max_sdu = vif->vif_mtu;
20318a324c92SDan McDonald 	macp->m_margin = VLAN_TAGSZ;
20328a324c92SDan McDonald 	macp->m_priv_props = vioif_priv_props;
20338a324c92SDan McDonald 
2034f8296c60SJoshua M. Clulow 	if ((ret = mac_register(macp, &vif->vif_mac_handle)) != 0) {
2035f8296c60SJoshua M. Clulow 		dev_err(dip, CE_WARN, "mac_register() failed (%d)", ret);
2036f8296c60SJoshua M. Clulow 		goto fail;
20378a324c92SDan McDonald 	}
2038f8296c60SJoshua M. Clulow 	mac_free(macp);
20398a324c92SDan McDonald 
2040f8296c60SJoshua M. Clulow 	mac_link_update(vif->vif_mac_handle, LINK_STATE_UP);
20418a324c92SDan McDonald 
20428a324c92SDan McDonald 	return (DDI_SUCCESS);
20438a324c92SDan McDonald 
2044f8296c60SJoshua M. Clulow fail:
2045f8296c60SJoshua M. Clulow 	vioif_free_bufs(vif);
2046f8296c60SJoshua M. Clulow 	if (macp != NULL) {
2047f8296c60SJoshua M. Clulow 		mac_free(macp);
2048f8296c60SJoshua M. Clulow 	}
2049f8296c60SJoshua M. Clulow 	(void) virtio_fini(vio, B_TRUE);
2050f8296c60SJoshua M. Clulow 	kmem_free(vif, sizeof (*vif));
20518a324c92SDan McDonald 	return (DDI_FAILURE);
20528a324c92SDan McDonald }
20538a324c92SDan McDonald 
20548a324c92SDan McDonald static int
vioif_detach(dev_info_t * dip,ddi_detach_cmd_t cmd)2055f8296c60SJoshua M. Clulow vioif_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
20568a324c92SDan McDonald {
2057f8296c60SJoshua M. Clulow 	int r;
2058f8296c60SJoshua M. Clulow 	vioif_t *vif;
20598a324c92SDan McDonald 
2060f8296c60SJoshua M. Clulow 	if (cmd != DDI_DETACH) {
20618a324c92SDan McDonald 		return (DDI_FAILURE);
2062f8296c60SJoshua M. Clulow 	}
20638a324c92SDan McDonald 
2064f8296c60SJoshua M. Clulow 	if ((vif = ddi_get_driver_private(dip)) == NULL) {
2065f8296c60SJoshua M. Clulow 		return (DDI_FAILURE);
2066f8296c60SJoshua M. Clulow 	}
20678a324c92SDan McDonald 
2068f8296c60SJoshua M. Clulow 	mutex_enter(&vif->vif_mutex);
2069f8296c60SJoshua M. Clulow 	if (vif->vif_runstate != VIOIF_RUNSTATE_STOPPED) {
2070f8296c60SJoshua M. Clulow 		dev_err(dip, CE_WARN, "!NIC still running, cannot detach");
2071f8296c60SJoshua M. Clulow 		mutex_exit(&vif->vif_mutex);
20728a324c92SDan McDonald 		return (DDI_FAILURE);
2073f8296c60SJoshua M. Clulow 	}
20748a324c92SDan McDonald 
2075f8296c60SJoshua M. Clulow 	/*
2076f8296c60SJoshua M. Clulow 	 * There should be no outstanding transmit buffers once the NIC is
2077f8296c60SJoshua M. Clulow 	 * completely stopped.
2078f8296c60SJoshua M. Clulow 	 */
2079f8296c60SJoshua M. Clulow 	VERIFY3U(vif->vif_ntxbufs_alloc, ==, 0);
2080f8296c60SJoshua M. Clulow 
2081f8296c60SJoshua M. Clulow 	/*
2082f8296c60SJoshua M. Clulow 	 * Though we cannot claw back all of the receive buffers until we reset
2083f8296c60SJoshua M. Clulow 	 * the device, we must ensure all those loaned to MAC have been
2084f8296c60SJoshua M. Clulow 	 * returned before calling mac_unregister().
2085f8296c60SJoshua M. Clulow 	 */
2086f8296c60SJoshua M. Clulow 	if (vif->vif_nrxbufs_onloan > 0) {
2087f8296c60SJoshua M. Clulow 		dev_err(dip, CE_WARN, "!%u receive buffers still loaned, "
2088f8296c60SJoshua M. Clulow 		    "cannot detach", vif->vif_nrxbufs_onloan);
2089f8296c60SJoshua M. Clulow 		mutex_exit(&vif->vif_mutex);
20908a324c92SDan McDonald 		return (DDI_FAILURE);
20918a324c92SDan McDonald 	}
20928a324c92SDan McDonald 
2093f8296c60SJoshua M. Clulow 	if ((r = mac_unregister(vif->vif_mac_handle)) != 0) {
2094f8296c60SJoshua M. Clulow 		dev_err(dip, CE_WARN, "!MAC unregister failed (%d)", r);
20958a324c92SDan McDonald 		return (DDI_FAILURE);
20968a324c92SDan McDonald 	}
20978a324c92SDan McDonald 
2098f8296c60SJoshua M. Clulow 	/*
2099f8296c60SJoshua M. Clulow 	 * Shut down the device so that we can recover any previously
2100f8296c60SJoshua M. Clulow 	 * submitted receive buffers.
2101f8296c60SJoshua M. Clulow 	 */
2102f8296c60SJoshua M. Clulow 	virtio_shutdown(vif->vif_virtio);
2103f8296c60SJoshua M. Clulow 	for (;;) {
2104f8296c60SJoshua M. Clulow 		virtio_chain_t *vic;
21058a324c92SDan McDonald 
2106f8296c60SJoshua M. Clulow 		if ((vic = virtio_queue_evacuate(vif->vif_rx_vq)) == NULL) {
2107f8296c60SJoshua M. Clulow 			break;
2108f8296c60SJoshua M. Clulow 		}
21098a324c92SDan McDonald 
2110f8296c60SJoshua M. Clulow 		vioif_rxbuf_t *rb = virtio_chain_data(vic);
2111f8296c60SJoshua M. Clulow 		vioif_rxbuf_free(vif, rb);
21128a324c92SDan McDonald 	}
21138a324c92SDan McDonald 
211489cb8ffbSAndy Fiddaman 	/*
211589cb8ffbSAndy Fiddaman 	 * vioif_free_bufs() must be called before virtio_fini()
211689cb8ffbSAndy Fiddaman 	 * as it uses virtio_chain_free() which itself depends on some
211789cb8ffbSAndy Fiddaman 	 * virtio data structures still being around.
211889cb8ffbSAndy Fiddaman 	 */
2119f8296c60SJoshua M. Clulow 	vioif_free_bufs(vif);
212089cb8ffbSAndy Fiddaman 	(void) virtio_fini(vif->vif_virtio, B_FALSE);
21218a324c92SDan McDonald 
2122f8296c60SJoshua M. Clulow 	mutex_exit(&vif->vif_mutex);
2123f8296c60SJoshua M. Clulow 	mutex_destroy(&vif->vif_mutex);
21248a324c92SDan McDonald 
2125f8296c60SJoshua M. Clulow 	kmem_free(vif, sizeof (*vif));
21268a324c92SDan McDonald 
21278a324c92SDan McDonald 	return (DDI_SUCCESS);
21288a324c92SDan McDonald }
21298a324c92SDan McDonald 
21308a324c92SDan McDonald static int
vioif_quiesce(dev_info_t * dip)2131f8296c60SJoshua M. Clulow vioif_quiesce(dev_info_t *dip)
21328a324c92SDan McDonald {
2133f8296c60SJoshua M. Clulow 	vioif_t *vif;
21348a324c92SDan McDonald 
2135f8296c60SJoshua M. Clulow 	if ((vif = ddi_get_driver_private(dip)) == NULL)
21368a324c92SDan McDonald 		return (DDI_FAILURE);
21378a324c92SDan McDonald 
2138f8296c60SJoshua M. Clulow 	return (virtio_quiesce(vif->vif_virtio));
21398a324c92SDan McDonald }
21408a324c92SDan McDonald 
21418a324c92SDan McDonald int
_init(void)21428a324c92SDan McDonald _init(void)
21438a324c92SDan McDonald {
2144f8296c60SJoshua M. Clulow 	int ret;
21458a324c92SDan McDonald 
2146f8296c60SJoshua M. Clulow 	mac_init_ops(&vioif_dev_ops, "vioif");
21478a324c92SDan McDonald 
2148f8296c60SJoshua M. Clulow 	if ((ret = mod_install(&vioif_modlinkage)) != DDI_SUCCESS) {
2149f8296c60SJoshua M. Clulow 		mac_fini_ops(&vioif_dev_ops);
21508a324c92SDan McDonald 	}
21518a324c92SDan McDonald 
2152f8296c60SJoshua M. Clulow 	return (ret);
21538a324c92SDan McDonald }
21548a324c92SDan McDonald 
21558a324c92SDan McDonald int
_fini(void)21568a324c92SDan McDonald _fini(void)
21578a324c92SDan McDonald {
21588a324c92SDan McDonald 	int ret;
21598a324c92SDan McDonald 
2160f8296c60SJoshua M. Clulow 	if ((ret = mod_remove(&vioif_modlinkage)) == DDI_SUCCESS) {
2161f8296c60SJoshua M. Clulow 		mac_fini_ops(&vioif_dev_ops);
21628a324c92SDan McDonald 	}
21638a324c92SDan McDonald 
21648a324c92SDan McDonald 	return (ret);
21658a324c92SDan McDonald }
21668a324c92SDan McDonald 
21678a324c92SDan McDonald int
_info(struct modinfo * modinfop)2168f8296c60SJoshua M. Clulow _info(struct modinfo *modinfop)
21698a324c92SDan McDonald {
2170f8296c60SJoshua M. Clulow 	return (mod_info(&vioif_modlinkage, modinfop));
21718a324c92SDan McDonald }
2172