xref: /illumos-gate/usr/src/uts/common/xen/io/xnf.c (revision cfd17c15)
1843e1988Sjohnlev /*
2843e1988Sjohnlev  * CDDL HEADER START
3843e1988Sjohnlev  *
4843e1988Sjohnlev  * The contents of this file are subject to the terms of the
5843e1988Sjohnlev  * Common Development and Distribution License (the "License").
6843e1988Sjohnlev  * You may not use this file except in compliance with the License.
7843e1988Sjohnlev  *
8843e1988Sjohnlev  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9843e1988Sjohnlev  * or http://www.opensolaris.org/os/licensing.
10843e1988Sjohnlev  * See the License for the specific language governing permissions
11843e1988Sjohnlev  * and limitations under the License.
12843e1988Sjohnlev  *
13843e1988Sjohnlev  * When distributing Covered Code, include this CDDL HEADER in each
14843e1988Sjohnlev  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15843e1988Sjohnlev  * If applicable, add the following below this CDDL HEADER, with the
16843e1988Sjohnlev  * fields enclosed by brackets "[]" replaced with your own identifying
17843e1988Sjohnlev  * information: Portions Copyright [yyyy] [name of copyright owner]
18843e1988Sjohnlev  *
19843e1988Sjohnlev  * CDDL HEADER END
20843e1988Sjohnlev  */
21843e1988Sjohnlev 
22843e1988Sjohnlev /*
23fd0939efSDavid Edmondson  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
24843e1988Sjohnlev  * Use is subject to license terms.
25843e1988Sjohnlev  */
26843e1988Sjohnlev 
279276b399SYuri Pankov /*
289276b399SYuri Pankov  * Copyright (c) 2014, 2017 by Delphix. All rights reserved.
298fa80b69SAndrew Stormont  * Copyright 2020 RackTop Systems, Inc.
309276b399SYuri Pankov  */
319276b399SYuri Pankov 
32843e1988Sjohnlev /*
33843e1988Sjohnlev  *
34843e1988Sjohnlev  * Copyright (c) 2004 Christian Limpach.
35843e1988Sjohnlev  * All rights reserved.
36843e1988Sjohnlev  *
37843e1988Sjohnlev  * Redistribution and use in source and binary forms, with or without
38843e1988Sjohnlev  * modification, are permitted provided that the following conditions
39843e1988Sjohnlev  * are met:
40843e1988Sjohnlev  * 1. Redistributions of source code must retain the above copyright
41843e1988Sjohnlev  *    notice, this list of conditions and the following disclaimer.
42843e1988Sjohnlev  * 2. Redistributions in binary form must reproduce the above copyright
43843e1988Sjohnlev  *    notice, this list of conditions and the following disclaimer in the
44843e1988Sjohnlev  *    documentation and/or other materials provided with the distribution.
45843e1988Sjohnlev  * 3. This section intentionally left blank.
46843e1988Sjohnlev  * 4. The name of the author may not be used to endorse or promote products
47843e1988Sjohnlev  *    derived from this software without specific prior written permission.
48843e1988Sjohnlev  *
49843e1988Sjohnlev  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
50843e1988Sjohnlev  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
51843e1988Sjohnlev  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
52843e1988Sjohnlev  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
53843e1988Sjohnlev  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
54843e1988Sjohnlev  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
55843e1988Sjohnlev  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
56843e1988Sjohnlev  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
57843e1988Sjohnlev  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
58843e1988Sjohnlev  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
59843e1988Sjohnlev  */
60843e1988Sjohnlev /*
61843e1988Sjohnlev  * Section 3 of the above license was updated in response to bug 6379571.
62843e1988Sjohnlev  */
63843e1988Sjohnlev 
64843e1988Sjohnlev /*
6556567907SDavid Edmondson  * xnf.c - GLDv3 network driver for domU.
6656567907SDavid Edmondson  */
6756567907SDavid Edmondson 
6856567907SDavid Edmondson /*
6956567907SDavid Edmondson  * This driver uses four per-instance locks:
7056567907SDavid Edmondson  *
7156567907SDavid Edmondson  * xnf_gref_lock:
7256567907SDavid Edmondson  *
7356567907SDavid Edmondson  *    Protects access to the grant reference list stored in
7456567907SDavid Edmondson  *    xnf_gref_head. Grant references should be acquired and released
7556567907SDavid Edmondson  *    using gref_get() and gref_put() respectively.
7656567907SDavid Edmondson  *
7756567907SDavid Edmondson  * xnf_schedlock:
7856567907SDavid Edmondson  *
7956567907SDavid Edmondson  *    Protects:
8056567907SDavid Edmondson  *    xnf_need_sched - used to record that a previous transmit attempt
8156567907SDavid Edmondson  *       failed (and consequently it will be necessary to call
8256567907SDavid Edmondson  *       mac_tx_update() when transmit resources are available).
8356567907SDavid Edmondson  *    xnf_pending_multicast - the number of multicast requests that
8456567907SDavid Edmondson  *       have been submitted to the backend for which we have not
8556567907SDavid Edmondson  *       processed responses.
8656567907SDavid Edmondson  *
8756567907SDavid Edmondson  * xnf_txlock:
8856567907SDavid Edmondson  *
8956567907SDavid Edmondson  *    Protects the transmit ring (xnf_tx_ring) and associated
9056567907SDavid Edmondson  *    structures (notably xnf_tx_pkt_id and xnf_tx_pkt_id_head).
9156567907SDavid Edmondson  *
9256567907SDavid Edmondson  * xnf_rxlock:
9356567907SDavid Edmondson  *
9456567907SDavid Edmondson  *    Protects the receive ring (xnf_rx_ring) and associated
9556567907SDavid Edmondson  *    structures (notably xnf_rx_pkt_info).
9656567907SDavid Edmondson  *
9756567907SDavid Edmondson  * If driver-global state that affects both the transmit and receive
9856567907SDavid Edmondson  * rings is manipulated, both xnf_txlock and xnf_rxlock should be
9956567907SDavid Edmondson  * held, in that order.
10056567907SDavid Edmondson  *
10156567907SDavid Edmondson  * xnf_schedlock is acquired both whilst holding xnf_txlock and
10256567907SDavid Edmondson  * without. It should always be acquired after xnf_txlock if both are
10356567907SDavid Edmondson  * held.
10456567907SDavid Edmondson  *
10556567907SDavid Edmondson  * Notes:
10656567907SDavid Edmondson  * - atomic_add_64() is used to manipulate counters where we require
10756567907SDavid Edmondson  *   accuracy. For counters intended only for observation by humans,
10856567907SDavid Edmondson  *   post increment/decrement are used instead.
109843e1988Sjohnlev  */
110843e1988Sjohnlev 
111843e1988Sjohnlev #include <sys/types.h>
112843e1988Sjohnlev #include <sys/errno.h>
113843e1988Sjohnlev #include <sys/param.h>
114843e1988Sjohnlev #include <sys/sysmacros.h>
115843e1988Sjohnlev #include <sys/systm.h>
116843e1988Sjohnlev #include <sys/stream.h>
117843e1988Sjohnlev #include <sys/strsubr.h>
11856567907SDavid Edmondson #include <sys/strsun.h>
119843e1988Sjohnlev #include <sys/conf.h>
120843e1988Sjohnlev #include <sys/ddi.h>
121843e1988Sjohnlev #include <sys/devops.h>
122843e1988Sjohnlev #include <sys/sunddi.h>
123843e1988Sjohnlev #include <sys/sunndi.h>
124843e1988Sjohnlev #include <sys/dlpi.h>
125843e1988Sjohnlev #include <sys/ethernet.h>
126843e1988Sjohnlev #include <sys/strsun.h>
127843e1988Sjohnlev #include <sys/pattr.h>
128843e1988Sjohnlev #include <inet/ip.h>
129a859da42SDavid Edmondson #include <inet/ip_impl.h>
1309276b399SYuri Pankov #include <inet/tcp.h>
1319276b399SYuri Pankov #include <netinet/udp.h>
132a859da42SDavid Edmondson #include <sys/gld.h>
133843e1988Sjohnlev #include <sys/modctl.h>
134da14cebeSEric Cheng #include <sys/mac_provider.h>
135843e1988Sjohnlev #include <sys/mac_ether.h>
136843e1988Sjohnlev #include <sys/bootinfo.h>
137843e1988Sjohnlev #include <sys/mach_mmu.h>
138551bc2a6Smrj #ifdef	XPV_HVM_DRIVER
139551bc2a6Smrj #include <sys/xpv_support.h>
140551bc2a6Smrj #include <sys/hypervisor.h>
141551bc2a6Smrj #else
142551bc2a6Smrj #include <sys/hypervisor.h>
143843e1988Sjohnlev #include <sys/evtchn_impl.h>
144843e1988Sjohnlev #include <sys/balloon_impl.h>
145551bc2a6Smrj #endif
146551bc2a6Smrj #include <xen/public/io/netif.h>
147551bc2a6Smrj #include <sys/gnttab.h>
148843e1988Sjohnlev #include <xen/sys/xendev.h>
149551bc2a6Smrj #include <sys/sdt.h>
15056567907SDavid Edmondson #include <sys/note.h>
15156567907SDavid Edmondson #include <sys/debug.h>
152551bc2a6Smrj 
153551bc2a6Smrj #include <io/xnf.h>
154551bc2a6Smrj 
155843e1988Sjohnlev /*
156843e1988Sjohnlev  * On a 32 bit PAE system physical and machine addresses are larger
157843e1988Sjohnlev  * than 32 bits.  ddi_btop() on such systems take an unsigned long
158843e1988Sjohnlev  * argument, and so addresses above 4G are truncated before ddi_btop()
159843e1988Sjohnlev  * gets to see them.  To avoid this, code the shift operation here.
160843e1988Sjohnlev  */
161843e1988Sjohnlev #define	xnf_btop(addr)	((addr) >> PAGESHIFT)
162843e1988Sjohnlev 
1639276b399SYuri Pankov /*
1649276b399SYuri Pankov  * The parameters below should only be changed in /etc/system, never in mdb.
1659276b399SYuri Pankov  */
166551bc2a6Smrj 
167843e1988Sjohnlev /*
16856567907SDavid Edmondson  * Should we use the multicast control feature if the backend provides
16956567907SDavid Edmondson  * it?
170843e1988Sjohnlev  */
17156567907SDavid Edmondson boolean_t xnf_multicast_control = B_TRUE;
17256567907SDavid Edmondson 
1739276b399SYuri Pankov /*
1749276b399SYuri Pankov  * Should we allow scatter-gather for tx if backend allows it?
1759276b399SYuri Pankov  */
1769276b399SYuri Pankov boolean_t xnf_enable_tx_sg = B_TRUE;
1779276b399SYuri Pankov 
1789276b399SYuri Pankov /*
1799276b399SYuri Pankov  * Should we allow scatter-gather for rx if backend allows it?
1809276b399SYuri Pankov  */
1819276b399SYuri Pankov boolean_t xnf_enable_rx_sg = B_TRUE;
1829276b399SYuri Pankov 
1839276b399SYuri Pankov /*
1849276b399SYuri Pankov  * Should we allow lso for tx sends if backend allows it?
1859276b399SYuri Pankov  * Requires xnf_enable_tx_sg to be also set to TRUE.
1869276b399SYuri Pankov  */
1879276b399SYuri Pankov boolean_t xnf_enable_lso = B_TRUE;
1889276b399SYuri Pankov 
1899276b399SYuri Pankov /*
1909276b399SYuri Pankov  * Should we allow lro on rx if backend supports it?
1919276b399SYuri Pankov  * Requires xnf_enable_rx_sg to be also set to TRUE.
1929276b399SYuri Pankov  *
1939276b399SYuri Pankov  * !! WARNING !!
1949276b399SYuri Pankov  * LRO is not yet supported in the OS so this should be left as FALSE.
1959276b399SYuri Pankov  * !! WARNING !!
1969276b399SYuri Pankov  */
1979276b399SYuri Pankov boolean_t xnf_enable_lro = B_FALSE;
1989276b399SYuri Pankov 
199843e1988Sjohnlev /*
20056567907SDavid Edmondson  * Received packets below this size are copied to a new streams buffer
20156567907SDavid Edmondson  * rather than being desballoc'ed.
20256567907SDavid Edmondson  *
20356567907SDavid Edmondson  * This value is chosen to accommodate traffic where there are a large
20456567907SDavid Edmondson  * number of small packets. For data showing a typical distribution,
20556567907SDavid Edmondson  * see:
20656567907SDavid Edmondson  *
20756567907SDavid Edmondson  * Sinha07a:
20856567907SDavid Edmondson  *	Rishi Sinha, Christos Papadopoulos, and John
20956567907SDavid Edmondson  *	Heidemann. Internet Packet Size Distributions: Some
21056567907SDavid Edmondson  *	Observations. Technical Report ISI-TR-2007-643,
21156567907SDavid Edmondson  *	USC/Information Sciences Institute, May, 2007. Orignally
21256567907SDavid Edmondson  *	released October 2005 as web page
21356567907SDavid Edmondson  *	http://netweb.usc.edu/~sinha/pkt-sizes/.
21456567907SDavid Edmondson  *	<http://www.isi.edu/~johnh/PAPERS/Sinha07a.html>.
215843e1988Sjohnlev  */
21656567907SDavid Edmondson size_t xnf_rx_copy_limit = 64;
217843e1988Sjohnlev 
21856567907SDavid Edmondson #define	INVALID_GRANT_HANDLE	((grant_handle_t)-1)
21956567907SDavid Edmondson #define	INVALID_GRANT_REF	((grant_ref_t)-1)
22056567907SDavid Edmondson #define	INVALID_TX_ID		((uint16_t)-1)
22156567907SDavid Edmondson 
22256567907SDavid Edmondson #define	TX_ID_TO_TXID(p, id) (&((p)->xnf_tx_pkt_id[(id)]))
2239276b399SYuri Pankov #define	TX_ID_VALID(i) \
2249276b399SYuri Pankov 	(((i) != INVALID_TX_ID) && ((i) < NET_TX_RING_SIZE))
2259276b399SYuri Pankov 
2269276b399SYuri Pankov /*
2279276b399SYuri Pankov  * calculate how many pages are spanned by an mblk fragment
2289276b399SYuri Pankov  */
2299276b399SYuri Pankov #define	xnf_mblk_pages(mp)	(MBLKL(mp) == 0 ? 0 : \
2309276b399SYuri Pankov     xnf_btop((uintptr_t)mp->b_wptr - 1) - xnf_btop((uintptr_t)mp->b_rptr) + 1)
231843e1988Sjohnlev 
232843e1988Sjohnlev /* Required system entry points */
233843e1988Sjohnlev static int	xnf_attach(dev_info_t *, ddi_attach_cmd_t);
234843e1988Sjohnlev static int	xnf_detach(dev_info_t *, ddi_detach_cmd_t);
235843e1988Sjohnlev 
236843e1988Sjohnlev /* Required driver entry points for Nemo */
237843e1988Sjohnlev static int	xnf_start(void *);
238843e1988Sjohnlev static void	xnf_stop(void *);
239843e1988Sjohnlev static int	xnf_set_mac_addr(void *, const uint8_t *);
240843e1988Sjohnlev static int	xnf_set_multicast(void *, boolean_t, const uint8_t *);
241843e1988Sjohnlev static int	xnf_set_promiscuous(void *, boolean_t);
242843e1988Sjohnlev static mblk_t	*xnf_send(void *, mblk_t *);
243843e1988Sjohnlev static uint_t	xnf_intr(caddr_t);
244843e1988Sjohnlev static int	xnf_stat(void *, uint_t, uint64_t *);
245843e1988Sjohnlev static boolean_t xnf_getcapab(void *, mac_capab_t, void *);
2469276b399SYuri Pankov static int xnf_getprop(void *, const char *, mac_prop_id_t, uint_t, void *);
2479276b399SYuri Pankov static int xnf_setprop(void *, const char *, mac_prop_id_t, uint_t,
2489276b399SYuri Pankov     const void *);
2499276b399SYuri Pankov static void xnf_propinfo(void *, const char *, mac_prop_id_t,
2509276b399SYuri Pankov     mac_prop_info_handle_t);
251843e1988Sjohnlev 
252843e1988Sjohnlev /* Driver private functions */
253843e1988Sjohnlev static int xnf_alloc_dma_resources(xnf_t *);
254843e1988Sjohnlev static void xnf_release_dma_resources(xnf_t *);
255843e1988Sjohnlev static void xnf_release_mblks(xnf_t *);
25656567907SDavid Edmondson 
25756567907SDavid Edmondson static int xnf_buf_constructor(void *, void *, int);
25856567907SDavid Edmondson static void xnf_buf_destructor(void *, void *);
25956567907SDavid Edmondson static xnf_buf_t *xnf_buf_get(xnf_t *, int, boolean_t);
26056567907SDavid Edmondson #pragma inline(xnf_buf_get)
26156567907SDavid Edmondson static void xnf_buf_put(xnf_t *, xnf_buf_t *, boolean_t);
26256567907SDavid Edmondson #pragma inline(xnf_buf_put)
26356567907SDavid Edmondson static void xnf_buf_refresh(xnf_buf_t *);
26456567907SDavid Edmondson #pragma inline(xnf_buf_refresh)
26556567907SDavid Edmondson static void xnf_buf_recycle(xnf_buf_t *);
26656567907SDavid Edmondson 
26756567907SDavid Edmondson static int xnf_tx_buf_constructor(void *, void *, int);
26856567907SDavid Edmondson static void xnf_tx_buf_destructor(void *, void *);
26956567907SDavid Edmondson 
2709276b399SYuri Pankov static grant_ref_t xnf_gref_get(xnf_t *);
2719276b399SYuri Pankov #pragma inline(xnf_gref_get)
2729276b399SYuri Pankov static void xnf_gref_put(xnf_t *, grant_ref_t);
2739276b399SYuri Pankov #pragma inline(xnf_gref_put)
27456567907SDavid Edmondson 
2759276b399SYuri Pankov static xnf_txid_t *xnf_txid_get(xnf_t *);
2769276b399SYuri Pankov #pragma inline(xnf_txid_get)
2779276b399SYuri Pankov static void xnf_txid_put(xnf_t *, xnf_txid_t *);
2789276b399SYuri Pankov #pragma inline(xnf_txid_put)
27956567907SDavid Edmondson 
28056567907SDavid Edmondson static void xnf_rxbuf_hang(xnf_t *, xnf_buf_t *);
28156567907SDavid Edmondson static int xnf_tx_clean_ring(xnf_t  *);
282843e1988Sjohnlev static void oe_state_change(dev_info_t *, ddi_eventcookie_t,
283843e1988Sjohnlev     void *, void *);
28456567907SDavid Edmondson static boolean_t xnf_kstat_init(xnf_t *);
28556567907SDavid Edmondson static void xnf_rx_collect(xnf_t *);
286843e1988Sjohnlev 
2879276b399SYuri Pankov #define	XNF_CALLBACK_FLAGS	(MC_GETCAPAB | MC_PROPERTIES)
2889276b399SYuri Pankov 
289843e1988Sjohnlev static mac_callbacks_t xnf_callbacks = {
2909276b399SYuri Pankov 	.mc_callbacks = XNF_CALLBACK_FLAGS,
2919276b399SYuri Pankov 	.mc_getstat = xnf_stat,
2929276b399SYuri Pankov 	.mc_start = xnf_start,
2939276b399SYuri Pankov 	.mc_stop = xnf_stop,
2949276b399SYuri Pankov 	.mc_setpromisc = xnf_set_promiscuous,
2959276b399SYuri Pankov 	.mc_multicst = xnf_set_multicast,
2969276b399SYuri Pankov 	.mc_unicst = xnf_set_mac_addr,
2979276b399SYuri Pankov 	.mc_tx = xnf_send,
2989276b399SYuri Pankov 	.mc_getcapab = xnf_getcapab,
2999276b399SYuri Pankov 	.mc_setprop = xnf_setprop,
3009276b399SYuri Pankov 	.mc_getprop = xnf_getprop,
3019276b399SYuri Pankov 	.mc_propinfo = xnf_propinfo,
302843e1988Sjohnlev };
303843e1988Sjohnlev 
304843e1988Sjohnlev /* DMA attributes for network ring buffer */
305843e1988Sjohnlev static ddi_dma_attr_t ringbuf_dma_attr = {
3069276b399SYuri Pankov 	.dma_attr_version = DMA_ATTR_V0,
3079276b399SYuri Pankov 	.dma_attr_addr_lo = 0,
3089276b399SYuri Pankov 	.dma_attr_addr_hi = 0xffffffffffffffffULL,
3099276b399SYuri Pankov 	.dma_attr_count_max = 0x7fffffff,
3109276b399SYuri Pankov 	.dma_attr_align = MMU_PAGESIZE,
3119276b399SYuri Pankov 	.dma_attr_burstsizes = 0x7ff,
3129276b399SYuri Pankov 	.dma_attr_minxfer = 1,
3139276b399SYuri Pankov 	.dma_attr_maxxfer = 0xffffffffU,
3149276b399SYuri Pankov 	.dma_attr_seg = 0xffffffffffffffffULL,
3159276b399SYuri Pankov 	.dma_attr_sgllen = 1,
3169276b399SYuri Pankov 	.dma_attr_granular = 1,
3179276b399SYuri Pankov 	.dma_attr_flags = 0
3189276b399SYuri Pankov };
3199276b399SYuri Pankov 
3209276b399SYuri Pankov /* DMA attributes for receive data */
3219276b399SYuri Pankov static ddi_dma_attr_t rx_buf_dma_attr = {
3229276b399SYuri Pankov 	.dma_attr_version = DMA_ATTR_V0,
3239276b399SYuri Pankov 	.dma_attr_addr_lo = 0,
3249276b399SYuri Pankov 	.dma_attr_addr_hi = 0xffffffffffffffffULL,
3259276b399SYuri Pankov 	.dma_attr_count_max = MMU_PAGEOFFSET,
3269276b399SYuri Pankov 	.dma_attr_align = MMU_PAGESIZE, /* allocation alignment */
3279276b399SYuri Pankov 	.dma_attr_burstsizes = 0x7ff,
3289276b399SYuri Pankov 	.dma_attr_minxfer = 1,
3299276b399SYuri Pankov 	.dma_attr_maxxfer = 0xffffffffU,
3309276b399SYuri Pankov 	.dma_attr_seg = 0xffffffffffffffffULL,
3319276b399SYuri Pankov 	.dma_attr_sgllen = 1,
3329276b399SYuri Pankov 	.dma_attr_granular = 1,
3339276b399SYuri Pankov 	.dma_attr_flags = 0
334843e1988Sjohnlev };
335843e1988Sjohnlev 
3369276b399SYuri Pankov /* DMA attributes for transmit data */
3379276b399SYuri Pankov static ddi_dma_attr_t tx_buf_dma_attr = {
3389276b399SYuri Pankov 	.dma_attr_version = DMA_ATTR_V0,
3399276b399SYuri Pankov 	.dma_attr_addr_lo = 0,
3409276b399SYuri Pankov 	.dma_attr_addr_hi = 0xffffffffffffffffULL,
3419276b399SYuri Pankov 	.dma_attr_count_max = MMU_PAGEOFFSET,
3429276b399SYuri Pankov 	.dma_attr_align = 1,
3439276b399SYuri Pankov 	.dma_attr_burstsizes = 0x7ff,
3449276b399SYuri Pankov 	.dma_attr_minxfer = 1,
3459276b399SYuri Pankov 	.dma_attr_maxxfer = 0xffffffffU,
3469276b399SYuri Pankov 	.dma_attr_seg = XEN_DATA_BOUNDARY - 1, /* segment boundary */
3479276b399SYuri Pankov 	.dma_attr_sgllen = XEN_MAX_TX_DATA_PAGES, /* max number of segments */
3489276b399SYuri Pankov 	.dma_attr_granular = 1,
3499276b399SYuri Pankov 	.dma_attr_flags = 0
350843e1988Sjohnlev };
351843e1988Sjohnlev 
352843e1988Sjohnlev /* DMA access attributes for registers and descriptors */
353843e1988Sjohnlev static ddi_device_acc_attr_t accattr = {
354843e1988Sjohnlev 	DDI_DEVICE_ATTR_V0,
355843e1988Sjohnlev 	DDI_STRUCTURE_LE_ACC,	/* This is a little-endian device */
356843e1988Sjohnlev 	DDI_STRICTORDER_ACC
357843e1988Sjohnlev };
358843e1988Sjohnlev 
359843e1988Sjohnlev /* DMA access attributes for data: NOT to be byte swapped. */
360843e1988Sjohnlev static ddi_device_acc_attr_t data_accattr = {
361843e1988Sjohnlev 	DDI_DEVICE_ATTR_V0,
362843e1988Sjohnlev 	DDI_NEVERSWAP_ACC,
363843e1988Sjohnlev 	DDI_STRICTORDER_ACC
364843e1988Sjohnlev };
365843e1988Sjohnlev 
366843e1988Sjohnlev DDI_DEFINE_STREAM_OPS(xnf_dev_ops, nulldev, nulldev, xnf_attach, xnf_detach,
36719397407SSherry Moore     nodev, NULL, D_MP, NULL, ddi_quiesce_not_supported);
368843e1988Sjohnlev 
369843e1988Sjohnlev static struct modldrv xnf_modldrv = {
370a859da42SDavid Edmondson 	&mod_driverops,
371a859da42SDavid Edmondson 	"Virtual Ethernet driver",
372a859da42SDavid Edmondson 	&xnf_dev_ops
373843e1988Sjohnlev };
374843e1988Sjohnlev 
375843e1988Sjohnlev static struct modlinkage modlinkage = {
376843e1988Sjohnlev 	MODREV_1, &xnf_modldrv, NULL
377843e1988Sjohnlev };
378843e1988Sjohnlev 
379843e1988Sjohnlev int
_init(void)380843e1988Sjohnlev _init(void)
381843e1988Sjohnlev {
382843e1988Sjohnlev 	int r;
383843e1988Sjohnlev 
384843e1988Sjohnlev 	mac_init_ops(&xnf_dev_ops, "xnf");
385843e1988Sjohnlev 	r = mod_install(&modlinkage);
386843e1988Sjohnlev 	if (r != DDI_SUCCESS)
387843e1988Sjohnlev 		mac_fini_ops(&xnf_dev_ops);
388843e1988Sjohnlev 
389843e1988Sjohnlev 	return (r);
390843e1988Sjohnlev }
391843e1988Sjohnlev 
392843e1988Sjohnlev int
_fini(void)393843e1988Sjohnlev _fini(void)
394843e1988Sjohnlev {
39556567907SDavid Edmondson 	return (EBUSY); /* XXPV should be removable */
396843e1988Sjohnlev }
397843e1988Sjohnlev 
398843e1988Sjohnlev int
_info(struct modinfo * modinfop)399843e1988Sjohnlev _info(struct modinfo *modinfop)
400843e1988Sjohnlev {
401843e1988Sjohnlev 	return (mod_info(&modlinkage, modinfop));
402843e1988Sjohnlev }
403843e1988Sjohnlev 
40456567907SDavid Edmondson /*
40556567907SDavid Edmondson  * Acquire a grant reference.
40656567907SDavid Edmondson  */
40756567907SDavid Edmondson static grant_ref_t
xnf_gref_get(xnf_t * xnfp)4089276b399SYuri Pankov xnf_gref_get(xnf_t *xnfp)
40956567907SDavid Edmondson {
41056567907SDavid Edmondson 	grant_ref_t gref;
41156567907SDavid Edmondson 
41256567907SDavid Edmondson 	mutex_enter(&xnfp->xnf_gref_lock);
41356567907SDavid Edmondson 
41456567907SDavid Edmondson 	do {
41556567907SDavid Edmondson 		gref = gnttab_claim_grant_reference(&xnfp->xnf_gref_head);
41656567907SDavid Edmondson 
41756567907SDavid Edmondson 	} while ((gref == INVALID_GRANT_REF) &&
41856567907SDavid Edmondson 	    (gnttab_alloc_grant_references(16, &xnfp->xnf_gref_head) == 0));
41956567907SDavid Edmondson 
42056567907SDavid Edmondson 	mutex_exit(&xnfp->xnf_gref_lock);
42156567907SDavid Edmondson 
42256567907SDavid Edmondson 	if (gref == INVALID_GRANT_REF) {
42356567907SDavid Edmondson 		xnfp->xnf_stat_gref_failure++;
42456567907SDavid Edmondson 	} else {
4251a5e258fSJosef 'Jeff' Sipek 		atomic_inc_64(&xnfp->xnf_stat_gref_outstanding);
42656567907SDavid Edmondson 		if (xnfp->xnf_stat_gref_outstanding > xnfp->xnf_stat_gref_peak)
42756567907SDavid Edmondson 			xnfp->xnf_stat_gref_peak =
42856567907SDavid Edmondson 			    xnfp->xnf_stat_gref_outstanding;
42956567907SDavid Edmondson 	}
43056567907SDavid Edmondson 
43156567907SDavid Edmondson 	return (gref);
43256567907SDavid Edmondson }
43356567907SDavid Edmondson 
43456567907SDavid Edmondson /*
43556567907SDavid Edmondson  * Release a grant reference.
43656567907SDavid Edmondson  */
43756567907SDavid Edmondson static void
xnf_gref_put(xnf_t * xnfp,grant_ref_t gref)4389276b399SYuri Pankov xnf_gref_put(xnf_t *xnfp, grant_ref_t gref)
43956567907SDavid Edmondson {
44056567907SDavid Edmondson 	ASSERT(gref != INVALID_GRANT_REF);
44156567907SDavid Edmondson 
44256567907SDavid Edmondson 	mutex_enter(&xnfp->xnf_gref_lock);
44356567907SDavid Edmondson 	gnttab_release_grant_reference(&xnfp->xnf_gref_head, gref);
44456567907SDavid Edmondson 	mutex_exit(&xnfp->xnf_gref_lock);
44556567907SDavid Edmondson 
4461a5e258fSJosef 'Jeff' Sipek 	atomic_dec_64(&xnfp->xnf_stat_gref_outstanding);
44756567907SDavid Edmondson }
44856567907SDavid Edmondson 
44956567907SDavid Edmondson /*
45056567907SDavid Edmondson  * Acquire a transmit id.
45156567907SDavid Edmondson  */
45256567907SDavid Edmondson static xnf_txid_t *
xnf_txid_get(xnf_t * xnfp)4539276b399SYuri Pankov xnf_txid_get(xnf_t *xnfp)
45456567907SDavid Edmondson {
45556567907SDavid Edmondson 	xnf_txid_t *tidp;
45656567907SDavid Edmondson 
45756567907SDavid Edmondson 	ASSERT(MUTEX_HELD(&xnfp->xnf_txlock));
45856567907SDavid Edmondson 
45956567907SDavid Edmondson 	if (xnfp->xnf_tx_pkt_id_head == INVALID_TX_ID)
46056567907SDavid Edmondson 		return (NULL);
46156567907SDavid Edmondson 
46256567907SDavid Edmondson 	ASSERT(TX_ID_VALID(xnfp->xnf_tx_pkt_id_head));
46356567907SDavid Edmondson 
46456567907SDavid Edmondson 	tidp = TX_ID_TO_TXID(xnfp, xnfp->xnf_tx_pkt_id_head);
46556567907SDavid Edmondson 	xnfp->xnf_tx_pkt_id_head = tidp->next;
46656567907SDavid Edmondson 	tidp->next = INVALID_TX_ID;
46756567907SDavid Edmondson 
46856567907SDavid Edmondson 	ASSERT(tidp->txbuf == NULL);
46956567907SDavid Edmondson 
47056567907SDavid Edmondson 	return (tidp);
47156567907SDavid Edmondson }
47256567907SDavid Edmondson 
47356567907SDavid Edmondson /*
47456567907SDavid Edmondson  * Release a transmit id.
47556567907SDavid Edmondson  */
47656567907SDavid Edmondson static void
xnf_txid_put(xnf_t * xnfp,xnf_txid_t * tidp)4779276b399SYuri Pankov xnf_txid_put(xnf_t *xnfp, xnf_txid_t *tidp)
47856567907SDavid Edmondson {
47956567907SDavid Edmondson 	ASSERT(MUTEX_HELD(&xnfp->xnf_txlock));
48056567907SDavid Edmondson 	ASSERT(TX_ID_VALID(tidp->id));
48156567907SDavid Edmondson 	ASSERT(tidp->next == INVALID_TX_ID);
48256567907SDavid Edmondson 
48356567907SDavid Edmondson 	tidp->txbuf = NULL;
48456567907SDavid Edmondson 	tidp->next = xnfp->xnf_tx_pkt_id_head;
48556567907SDavid Edmondson 	xnfp->xnf_tx_pkt_id_head = tidp->id;
48656567907SDavid Edmondson }
48756567907SDavid Edmondson 
4889276b399SYuri Pankov static void
xnf_data_txbuf_free(xnf_t * xnfp,xnf_txbuf_t * txp)4899276b399SYuri Pankov xnf_data_txbuf_free(xnf_t *xnfp, xnf_txbuf_t *txp)
4909276b399SYuri Pankov {
4919276b399SYuri Pankov 	ASSERT3U(txp->tx_type, ==, TX_DATA);
4929276b399SYuri Pankov 
4939276b399SYuri Pankov 	/*
4949276b399SYuri Pankov 	 * We are either using a lookaside buffer or we are mapping existing
4959276b399SYuri Pankov 	 * buffers.
4969276b399SYuri Pankov 	 */
4979276b399SYuri Pankov 	if (txp->tx_bdesc != NULL) {
4989276b399SYuri Pankov 		ASSERT(!txp->tx_handle_bound);
4999276b399SYuri Pankov 		xnf_buf_put(xnfp, txp->tx_bdesc, B_TRUE);
5009276b399SYuri Pankov 	} else {
5019276b399SYuri Pankov 		if (txp->tx_txreq.gref != INVALID_GRANT_REF) {
5029276b399SYuri Pankov 			if (gnttab_query_foreign_access(txp->tx_txreq.gref) !=
5039276b399SYuri Pankov 			    0) {
5049276b399SYuri Pankov 				cmn_err(CE_PANIC, "tx grant %d still in use by "
5059276b399SYuri Pankov 				    "backend domain", txp->tx_txreq.gref);
5069276b399SYuri Pankov 			}
5079276b399SYuri Pankov 			(void) gnttab_end_foreign_access_ref(
5089276b399SYuri Pankov 			    txp->tx_txreq.gref, 1);
5099276b399SYuri Pankov 			xnf_gref_put(xnfp, txp->tx_txreq.gref);
5109276b399SYuri Pankov 		}
5119276b399SYuri Pankov 
5129276b399SYuri Pankov 		if (txp->tx_handle_bound)
5139276b399SYuri Pankov 			(void) ddi_dma_unbind_handle(txp->tx_dma_handle);
5149276b399SYuri Pankov 	}
5159276b399SYuri Pankov 
5169276b399SYuri Pankov 	if (txp->tx_mp != NULL)
5179276b399SYuri Pankov 		freemsg(txp->tx_mp);
5189276b399SYuri Pankov 
5199276b399SYuri Pankov 	if (txp->tx_prev != NULL) {
5209276b399SYuri Pankov 		ASSERT3P(txp->tx_prev->tx_next, ==, txp);
5219276b399SYuri Pankov 		txp->tx_prev->tx_next = NULL;
5229276b399SYuri Pankov 	}
5239276b399SYuri Pankov 
5249276b399SYuri Pankov 	if (txp->tx_txreq.id != INVALID_TX_ID) {
5259276b399SYuri Pankov 		/*
5269276b399SYuri Pankov 		 * This should be only possible when resuming from a suspend.
5279276b399SYuri Pankov 		 */
5289276b399SYuri Pankov 		ASSERT(!xnfp->xnf_connected);
5299276b399SYuri Pankov 		xnf_txid_put(xnfp, TX_ID_TO_TXID(xnfp, txp->tx_txreq.id));
5309276b399SYuri Pankov 		txp->tx_txreq.id = INVALID_TX_ID;
5319276b399SYuri Pankov 	}
5329276b399SYuri Pankov 
5339276b399SYuri Pankov 	kmem_cache_free(xnfp->xnf_tx_buf_cache, txp);
5349276b399SYuri Pankov }
5359276b399SYuri Pankov 
5369276b399SYuri Pankov static void
xnf_data_txbuf_free_chain(xnf_t * xnfp,xnf_txbuf_t * txp)5379276b399SYuri Pankov xnf_data_txbuf_free_chain(xnf_t *xnfp, xnf_txbuf_t *txp)
5389276b399SYuri Pankov {
5399276b399SYuri Pankov 	if (txp == NULL)
5409276b399SYuri Pankov 		return;
5419276b399SYuri Pankov 
5429276b399SYuri Pankov 	while (txp->tx_next != NULL)
5439276b399SYuri Pankov 		txp = txp->tx_next;
5449276b399SYuri Pankov 
5459276b399SYuri Pankov 	/*
5469276b399SYuri Pankov 	 * We free the chain in reverse order so that grants can be released
5479276b399SYuri Pankov 	 * for all dma chunks before unbinding the dma handles. The mblk is
5489276b399SYuri Pankov 	 * freed last, after all its fragments' dma handles are unbound.
5499276b399SYuri Pankov 	 */
5509276b399SYuri Pankov 	xnf_txbuf_t *prev;
5519276b399SYuri Pankov 	for (; txp != NULL; txp = prev) {
5529276b399SYuri Pankov 		prev = txp->tx_prev;
5539276b399SYuri Pankov 		xnf_data_txbuf_free(xnfp, txp);
5549276b399SYuri Pankov 	}
5559276b399SYuri Pankov }
5569276b399SYuri Pankov 
5579276b399SYuri Pankov static xnf_txbuf_t *
xnf_data_txbuf_alloc(xnf_t * xnfp,int flag)558*cfd17c15SJoshua M. Clulow xnf_data_txbuf_alloc(xnf_t *xnfp, int flag)
5599276b399SYuri Pankov {
560*cfd17c15SJoshua M. Clulow 	xnf_txbuf_t *txp;
561*cfd17c15SJoshua M. Clulow 
562*cfd17c15SJoshua M. Clulow 	if ((txp = kmem_cache_alloc(xnfp->xnf_tx_buf_cache, flag)) == NULL) {
563*cfd17c15SJoshua M. Clulow 		return (NULL);
564*cfd17c15SJoshua M. Clulow 	}
565*cfd17c15SJoshua M. Clulow 
5669276b399SYuri Pankov 	txp->tx_type = TX_DATA;
5679276b399SYuri Pankov 	txp->tx_next = NULL;
5689276b399SYuri Pankov 	txp->tx_prev = NULL;
5699276b399SYuri Pankov 	txp->tx_head = txp;
5709276b399SYuri Pankov 	txp->tx_frags_to_ack = 0;
5719276b399SYuri Pankov 	txp->tx_mp = NULL;
5729276b399SYuri Pankov 	txp->tx_bdesc = NULL;
5739276b399SYuri Pankov 	txp->tx_handle_bound = B_FALSE;
5749276b399SYuri Pankov 	txp->tx_txreq.gref = INVALID_GRANT_REF;
5759276b399SYuri Pankov 	txp->tx_txreq.id = INVALID_TX_ID;
5769276b399SYuri Pankov 
5779276b399SYuri Pankov 	return (txp);
5789276b399SYuri Pankov }
5799276b399SYuri Pankov 
58056567907SDavid Edmondson /*
58156567907SDavid Edmondson  * Get `wanted' slots in the transmit ring, waiting for at least that
58256567907SDavid Edmondson  * number if `wait' is B_TRUE. Force the ring to be cleaned by setting
58356567907SDavid Edmondson  * `wanted' to zero.
58456567907SDavid Edmondson  *
58556567907SDavid Edmondson  * Return the number of slots available.
58656567907SDavid Edmondson  */
58756567907SDavid Edmondson static int
xnf_tx_slots_get(xnf_t * xnfp,int wanted,boolean_t wait)5889276b399SYuri Pankov xnf_tx_slots_get(xnf_t *xnfp, int wanted, boolean_t wait)
58956567907SDavid Edmondson {
59056567907SDavid Edmondson 	int slotsfree;
59156567907SDavid Edmondson 	boolean_t forced_clean = (wanted == 0);
59256567907SDavid Edmondson 
59356567907SDavid Edmondson 	ASSERT(MUTEX_HELD(&xnfp->xnf_txlock));
59456567907SDavid Edmondson 
59556567907SDavid Edmondson 	/* LINTED: constant in conditional context */
59656567907SDavid Edmondson 	while (B_TRUE) {
59756567907SDavid Edmondson 		slotsfree = RING_FREE_REQUESTS(&xnfp->xnf_tx_ring);
59856567907SDavid Edmondson 
59956567907SDavid Edmondson 		if ((slotsfree < wanted) || forced_clean)
60056567907SDavid Edmondson 			slotsfree = xnf_tx_clean_ring(xnfp);
60156567907SDavid Edmondson 
60256567907SDavid Edmondson 		/*
60356567907SDavid Edmondson 		 * If there are more than we need free, tell other
60456567907SDavid Edmondson 		 * people to come looking again. We hold txlock, so we
60556567907SDavid Edmondson 		 * are able to take our slots before anyone else runs.
60656567907SDavid Edmondson 		 */
60756567907SDavid Edmondson 		if (slotsfree > wanted)
60856567907SDavid Edmondson 			cv_broadcast(&xnfp->xnf_cv_tx_slots);
60956567907SDavid Edmondson 
61056567907SDavid Edmondson 		if (slotsfree >= wanted)
61156567907SDavid Edmondson 			break;
61256567907SDavid Edmondson 
61356567907SDavid Edmondson 		if (!wait)
61456567907SDavid Edmondson 			break;
61556567907SDavid Edmondson 
61656567907SDavid Edmondson 		cv_wait(&xnfp->xnf_cv_tx_slots, &xnfp->xnf_txlock);
61756567907SDavid Edmondson 	}
61856567907SDavid Edmondson 
61956567907SDavid Edmondson 	ASSERT(slotsfree <= RING_SIZE(&(xnfp->xnf_tx_ring)));
62056567907SDavid Edmondson 
62156567907SDavid Edmondson 	return (slotsfree);
62256567907SDavid Edmondson }
62356567907SDavid Edmondson 
624843e1988Sjohnlev static int
xnf_setup_rings(xnf_t * xnfp)625843e1988Sjohnlev xnf_setup_rings(xnf_t *xnfp)
626843e1988Sjohnlev {
627551bc2a6Smrj 	domid_t			oeid;
62856567907SDavid Edmondson 	struct xenbus_device	*xsd;
62956567907SDavid Edmondson 	RING_IDX		i;
63056567907SDavid Edmondson 	int			err;
63156567907SDavid Edmondson 	xnf_txid_t		*tidp;
63256567907SDavid Edmondson 	xnf_buf_t **bdescp;
633843e1988Sjohnlev 
634551bc2a6Smrj 	oeid = xvdi_get_oeid(xnfp->xnf_devinfo);
635551bc2a6Smrj 	xsd = xvdi_get_xsd(xnfp->xnf_devinfo);
636843e1988Sjohnlev 
63756567907SDavid Edmondson 	if (xnfp->xnf_tx_ring_ref != INVALID_GRANT_REF)
638551bc2a6Smrj 		gnttab_end_foreign_access(xnfp->xnf_tx_ring_ref, 0, 0);
639843e1988Sjohnlev 
640843e1988Sjohnlev 	err = gnttab_grant_foreign_access(oeid,
641551bc2a6Smrj 	    xnf_btop(pa_to_ma(xnfp->xnf_tx_ring_phys_addr)), 0);
642843e1988Sjohnlev 	if (err <= 0) {
643843e1988Sjohnlev 		err = -err;
644843e1988Sjohnlev 		xenbus_dev_error(xsd, err, "granting access to tx ring page");
645843e1988Sjohnlev 		goto out;
646843e1988Sjohnlev 	}
647551bc2a6Smrj 	xnfp->xnf_tx_ring_ref = (grant_ref_t)err;
648843e1988Sjohnlev 
64956567907SDavid Edmondson 	if (xnfp->xnf_rx_ring_ref != INVALID_GRANT_REF)
650551bc2a6Smrj 		gnttab_end_foreign_access(xnfp->xnf_rx_ring_ref, 0, 0);
651843e1988Sjohnlev 
652843e1988Sjohnlev 	err = gnttab_grant_foreign_access(oeid,
653551bc2a6Smrj 	    xnf_btop(pa_to_ma(xnfp->xnf_rx_ring_phys_addr)), 0);
654843e1988Sjohnlev 	if (err <= 0) {
655843e1988Sjohnlev 		err = -err;
656843e1988Sjohnlev 		xenbus_dev_error(xsd, err, "granting access to rx ring page");
657843e1988Sjohnlev 		goto out;
658843e1988Sjohnlev 	}
659551bc2a6Smrj 	xnfp->xnf_rx_ring_ref = (grant_ref_t)err;
660843e1988Sjohnlev 
66156567907SDavid Edmondson 	mutex_enter(&xnfp->xnf_txlock);
662843e1988Sjohnlev 
663843e1988Sjohnlev 	/*
6649276b399SYuri Pankov 	 * We first cleanup the TX ring in case we are doing a resume.
6659276b399SYuri Pankov 	 * Note that this can lose packets, but we expect to stagger on.
666843e1988Sjohnlev 	 */
66756567907SDavid Edmondson 	xnfp->xnf_tx_pkt_id_head = INVALID_TX_ID; /* I.e. emtpy list. */
66856567907SDavid Edmondson 	for (i = 0, tidp = &xnfp->xnf_tx_pkt_id[0];
66956567907SDavid Edmondson 	    i < NET_TX_RING_SIZE;
67056567907SDavid Edmondson 	    i++, tidp++) {
6719276b399SYuri Pankov 		xnf_txbuf_t *txp = tidp->txbuf;
6729276b399SYuri Pankov 		if (txp == NULL)
67356567907SDavid Edmondson 			continue;
674843e1988Sjohnlev 
67556567907SDavid Edmondson 		switch (txp->tx_type) {
67656567907SDavid Edmondson 		case TX_DATA:
6779276b399SYuri Pankov 			/*
6789276b399SYuri Pankov 			 * txid_put() will be called for each txbuf's txid in
6799276b399SYuri Pankov 			 * the chain which will result in clearing tidp->txbuf.
6809276b399SYuri Pankov 			 */
6819276b399SYuri Pankov 			xnf_data_txbuf_free_chain(xnfp, txp);
682843e1988Sjohnlev 
68356567907SDavid Edmondson 			break;
684843e1988Sjohnlev 
68556567907SDavid Edmondson 		case TX_MCAST_REQ:
68656567907SDavid Edmondson 			txp->tx_type = TX_MCAST_RSP;
68756567907SDavid Edmondson 			txp->tx_status = NETIF_RSP_DROPPED;
68856567907SDavid Edmondson 			cv_broadcast(&xnfp->xnf_cv_multicast);
689843e1988Sjohnlev 
69056567907SDavid Edmondson 			/*
69156567907SDavid Edmondson 			 * The request consumed two slots in the ring,
69256567907SDavid Edmondson 			 * yet only a single xnf_txid_t is used. Step
69356567907SDavid Edmondson 			 * over the empty slot.
69456567907SDavid Edmondson 			 */
69556567907SDavid Edmondson 			i++;
6969276b399SYuri Pankov 			ASSERT3U(i, <, NET_TX_RING_SIZE);
69756567907SDavid Edmondson 			break;
698843e1988Sjohnlev 
69956567907SDavid Edmondson 		case TX_MCAST_RSP:
70056567907SDavid Edmondson 			break;
70156567907SDavid Edmondson 		}
702843e1988Sjohnlev 	}
703843e1988Sjohnlev 
7049276b399SYuri Pankov 	/*
7059276b399SYuri Pankov 	 * Now purge old list and add each txid to the new free list.
7069276b399SYuri Pankov 	 */
7079276b399SYuri Pankov 	xnfp->xnf_tx_pkt_id_head = INVALID_TX_ID; /* I.e. emtpy list. */
7089276b399SYuri Pankov 	for (i = 0, tidp = &xnfp->xnf_tx_pkt_id[0];
7099276b399SYuri Pankov 	    i < NET_TX_RING_SIZE;
7109276b399SYuri Pankov 	    i++, tidp++) {
7119276b399SYuri Pankov 		tidp->id = i;
7129276b399SYuri Pankov 		ASSERT3P(tidp->txbuf, ==, NULL);
7139276b399SYuri Pankov 		tidp->next = INVALID_TX_ID; /* Appease txid_put(). */
7149276b399SYuri Pankov 		xnf_txid_put(xnfp, tidp);
7159276b399SYuri Pankov 	}
7169276b399SYuri Pankov 
717a859da42SDavid Edmondson 	/* LINTED: constant in conditional context */
718a859da42SDavid Edmondson 	SHARED_RING_INIT(xnfp->xnf_tx_ring.sring);
71956567907SDavid Edmondson 	/* LINTED: constant in conditional context */
72056567907SDavid Edmondson 	FRONT_RING_INIT(&xnfp->xnf_tx_ring,
72156567907SDavid Edmondson 	    xnfp->xnf_tx_ring.sring, PAGESIZE);
722843e1988Sjohnlev 
723551bc2a6Smrj 	mutex_exit(&xnfp->xnf_txlock);
724843e1988Sjohnlev 
72556567907SDavid Edmondson 	mutex_enter(&xnfp->xnf_rxlock);
72656567907SDavid Edmondson 
727843e1988Sjohnlev 	/*
72856567907SDavid Edmondson 	 * Clean out any buffers currently posted to the receive ring
72956567907SDavid Edmondson 	 * before we reset it.
730843e1988Sjohnlev 	 */
73156567907SDavid Edmondson 	for (i = 0, bdescp = &xnfp->xnf_rx_pkt_info[0];
73256567907SDavid Edmondson 	    i < NET_RX_RING_SIZE;
73356567907SDavid Edmondson 	    i++, bdescp++) {
73456567907SDavid Edmondson 		if (*bdescp != NULL) {
73556567907SDavid Edmondson 			xnf_buf_put(xnfp, *bdescp, B_FALSE);
73656567907SDavid Edmondson 			*bdescp = NULL;
737843e1988Sjohnlev 		}
738843e1988Sjohnlev 	}
739551bc2a6Smrj 
740a859da42SDavid Edmondson 	/* LINTED: constant in conditional context */
741a859da42SDavid Edmondson 	SHARED_RING_INIT(xnfp->xnf_rx_ring.sring);
74256567907SDavid Edmondson 	/* LINTED: constant in conditional context */
74356567907SDavid Edmondson 	FRONT_RING_INIT(&xnfp->xnf_rx_ring,
74456567907SDavid Edmondson 	    xnfp->xnf_rx_ring.sring, PAGESIZE);
745a859da42SDavid Edmondson 
74656567907SDavid Edmondson 	/*
74756567907SDavid Edmondson 	 * Fill the ring with buffers.
74856567907SDavid Edmondson 	 */
749843e1988Sjohnlev 	for (i = 0; i < NET_RX_RING_SIZE; i++) {
75056567907SDavid Edmondson 		xnf_buf_t *bdesc;
75156567907SDavid Edmondson 
75256567907SDavid Edmondson 		bdesc = xnf_buf_get(xnfp, KM_SLEEP, B_FALSE);
75356567907SDavid Edmondson 		VERIFY(bdesc != NULL);
75456567907SDavid Edmondson 		xnf_rxbuf_hang(xnfp, bdesc);
755843e1988Sjohnlev 	}
75656567907SDavid Edmondson 
757843e1988Sjohnlev 	/* LINTED: constant in conditional context */
758551bc2a6Smrj 	RING_PUSH_REQUESTS(&xnfp->xnf_rx_ring);
759843e1988Sjohnlev 
76056567907SDavid Edmondson 	mutex_exit(&xnfp->xnf_rxlock);
761843e1988Sjohnlev 
762843e1988Sjohnlev 	return (0);
763843e1988Sjohnlev 
764843e1988Sjohnlev out:
76556567907SDavid Edmondson 	if (xnfp->xnf_tx_ring_ref != INVALID_GRANT_REF)
766551bc2a6Smrj 		gnttab_end_foreign_access(xnfp->xnf_tx_ring_ref, 0, 0);
76756567907SDavid Edmondson 	xnfp->xnf_tx_ring_ref = INVALID_GRANT_REF;
768843e1988Sjohnlev 
76956567907SDavid Edmondson 	if (xnfp->xnf_rx_ring_ref != INVALID_GRANT_REF)
770551bc2a6Smrj 		gnttab_end_foreign_access(xnfp->xnf_rx_ring_ref, 0, 0);
77156567907SDavid Edmondson 	xnfp->xnf_rx_ring_ref = INVALID_GRANT_REF;
772843e1988Sjohnlev 
773843e1988Sjohnlev 	return (err);
774843e1988Sjohnlev }
775843e1988Sjohnlev 
776843e1988Sjohnlev /*
777843e1988Sjohnlev  * Connect driver to back end, called to set up communication with
778843e1988Sjohnlev  * back end driver both initially and on resume after restore/migrate.
779843e1988Sjohnlev  */
780843e1988Sjohnlev void
xnf_be_connect(xnf_t * xnfp)781843e1988Sjohnlev xnf_be_connect(xnf_t *xnfp)
782843e1988Sjohnlev {
783843e1988Sjohnlev 	const char	*message;
784843e1988Sjohnlev 	xenbus_transaction_t xbt;
785551bc2a6Smrj 	struct		xenbus_device *xsd;
786843e1988Sjohnlev 	char		*xsname;
787a390c5f4Scz 	int		err;
788843e1988Sjohnlev 
789551bc2a6Smrj 	ASSERT(!xnfp->xnf_connected);
790843e1988Sjohnlev 
791551bc2a6Smrj 	xsd = xvdi_get_xsd(xnfp->xnf_devinfo);
792551bc2a6Smrj 	xsname = xvdi_get_xsname(xnfp->xnf_devinfo);
793843e1988Sjohnlev 
794843e1988Sjohnlev 	err = xnf_setup_rings(xnfp);
795843e1988Sjohnlev 	if (err != 0) {
796843e1988Sjohnlev 		cmn_err(CE_WARN, "failed to set up tx/rx rings");
797843e1988Sjohnlev 		xenbus_dev_error(xsd, err, "setting up ring");
798843e1988Sjohnlev 		return;
799843e1988Sjohnlev 	}
800843e1988Sjohnlev 
801843e1988Sjohnlev again:
802843e1988Sjohnlev 	err = xenbus_transaction_start(&xbt);
803843e1988Sjohnlev 	if (err != 0) {
804843e1988Sjohnlev 		xenbus_dev_error(xsd, EIO, "starting transaction");
805843e1988Sjohnlev 		return;
806843e1988Sjohnlev 	}
807843e1988Sjohnlev 
808843e1988Sjohnlev 	err = xenbus_printf(xbt, xsname, "tx-ring-ref", "%u",
809551bc2a6Smrj 	    xnfp->xnf_tx_ring_ref);
810843e1988Sjohnlev 	if (err != 0) {
811843e1988Sjohnlev 		message = "writing tx ring-ref";
812843e1988Sjohnlev 		goto abort_transaction;
813843e1988Sjohnlev 	}
814843e1988Sjohnlev 
815843e1988Sjohnlev 	err = xenbus_printf(xbt, xsname, "rx-ring-ref", "%u",
816551bc2a6Smrj 	    xnfp->xnf_rx_ring_ref);
817843e1988Sjohnlev 	if (err != 0) {
818843e1988Sjohnlev 		message = "writing rx ring-ref";
819843e1988Sjohnlev 		goto abort_transaction;
820843e1988Sjohnlev 	}
821843e1988Sjohnlev 
822551bc2a6Smrj 	err = xenbus_printf(xbt, xsname, "event-channel", "%u",
823551bc2a6Smrj 	    xnfp->xnf_evtchn);
824843e1988Sjohnlev 	if (err != 0) {
825843e1988Sjohnlev 		message = "writing event-channel";
826843e1988Sjohnlev 		goto abort_transaction;
827843e1988Sjohnlev 	}
828843e1988Sjohnlev 
829843e1988Sjohnlev 	err = xenbus_printf(xbt, xsname, "feature-rx-notify", "%d", 1);
830843e1988Sjohnlev 	if (err != 0) {
831843e1988Sjohnlev 		message = "writing feature-rx-notify";
832843e1988Sjohnlev 		goto abort_transaction;
833843e1988Sjohnlev 	}
834843e1988Sjohnlev 
83556567907SDavid Edmondson 	err = xenbus_printf(xbt, xsname, "request-rx-copy", "%d", 1);
83656567907SDavid Edmondson 	if (err != 0) {
83756567907SDavid Edmondson 		message = "writing request-rx-copy";
83856567907SDavid Edmondson 		goto abort_transaction;
83956567907SDavid Edmondson 	}
84056567907SDavid Edmondson 
84156567907SDavid Edmondson 	if (xnfp->xnf_be_mcast_control) {
84256567907SDavid Edmondson 		err = xenbus_printf(xbt, xsname, "request-multicast-control",
843843e1988Sjohnlev 		    "%d", 1);
844843e1988Sjohnlev 		if (err != 0) {
84556567907SDavid Edmondson 			message = "writing request-multicast-control";
846843e1988Sjohnlev 			goto abort_transaction;
847843e1988Sjohnlev 		}
848843e1988Sjohnlev 	}
849843e1988Sjohnlev 
8509276b399SYuri Pankov 	/*
8519276b399SYuri Pankov 	 * Tell backend if we support scatter-gather lists on the rx side.
8529276b399SYuri Pankov 	 */
8539276b399SYuri Pankov 	err = xenbus_printf(xbt, xsname, "feature-sg", "%d",
8549276b399SYuri Pankov 	    xnf_enable_rx_sg ? 1 : 0);
8559276b399SYuri Pankov 	if (err != 0) {
8569276b399SYuri Pankov 		message = "writing feature-sg";
8579276b399SYuri Pankov 		goto abort_transaction;
8589276b399SYuri Pankov 	}
8599276b399SYuri Pankov 
8609276b399SYuri Pankov 	/*
8619276b399SYuri Pankov 	 * Tell backend if we support LRO for IPv4. Scatter-gather on rx is
8629276b399SYuri Pankov 	 * a prerequisite.
8639276b399SYuri Pankov 	 */
8649276b399SYuri Pankov 	err = xenbus_printf(xbt, xsname, "feature-gso-tcpv4", "%d",
8659276b399SYuri Pankov 	    (xnf_enable_rx_sg && xnf_enable_lro) ? 1 : 0);
8669276b399SYuri Pankov 	if (err != 0) {
8679276b399SYuri Pankov 		message = "writing feature-gso-tcpv4";
8689276b399SYuri Pankov 		goto abort_transaction;
8699276b399SYuri Pankov 	}
8709276b399SYuri Pankov 
87156567907SDavid Edmondson 	err = xvdi_switch_state(xnfp->xnf_devinfo, xbt, XenbusStateConnected);
872843e1988Sjohnlev 	if (err != 0) {
87356567907SDavid Edmondson 		message = "switching state to XenbusStateConnected";
874843e1988Sjohnlev 		goto abort_transaction;
875843e1988Sjohnlev 	}
876843e1988Sjohnlev 
877843e1988Sjohnlev 	err = xenbus_transaction_end(xbt, 0);
878843e1988Sjohnlev 	if (err != 0) {
879843e1988Sjohnlev 		if (err == EAGAIN)
880843e1988Sjohnlev 			goto again;
881843e1988Sjohnlev 		xenbus_dev_error(xsd, err, "completing transaction");
882843e1988Sjohnlev 	}
883843e1988Sjohnlev 
884843e1988Sjohnlev 	return;
885843e1988Sjohnlev 
886843e1988Sjohnlev abort_transaction:
887843e1988Sjohnlev 	(void) xenbus_transaction_end(xbt, 1);
888843e1988Sjohnlev 	xenbus_dev_error(xsd, err, "%s", message);
889843e1988Sjohnlev }
890843e1988Sjohnlev 
891a390c5f4Scz /*
89256567907SDavid Edmondson  * Read configuration information from xenstore.
893a390c5f4Scz  */
894a390c5f4Scz void
xnf_read_config(xnf_t * xnfp)895a390c5f4Scz xnf_read_config(xnf_t *xnfp)
896a390c5f4Scz {
89756567907SDavid Edmondson 	int err, be_cap;
89856567907SDavid Edmondson 	char mac[ETHERADDRL * 3];
89956567907SDavid Edmondson 	char *oename = xvdi_get_oename(xnfp->xnf_devinfo);
900a390c5f4Scz 
90156567907SDavid Edmondson 	err = xenbus_scanf(XBT_NULL, oename, "mac",
902a390c5f4Scz 	    "%s", (char *)&mac[0]);
903a390c5f4Scz 	if (err != 0) {
904a390c5f4Scz 		/*
905a390c5f4Scz 		 * bad: we're supposed to be set up with a proper mac
906a390c5f4Scz 		 * addr. at this point
907a390c5f4Scz 		 */
908a390c5f4Scz 		cmn_err(CE_WARN, "%s%d: no mac address",
909a390c5f4Scz 		    ddi_driver_name(xnfp->xnf_devinfo),
910a390c5f4Scz 		    ddi_get_instance(xnfp->xnf_devinfo));
911a390c5f4Scz 			return;
912a390c5f4Scz 	}
913a390c5f4Scz 	if (ether_aton(mac, xnfp->xnf_mac_addr) != ETHERADDRL) {
914a390c5f4Scz 		err = ENOENT;
915a390c5f4Scz 		xenbus_dev_error(xvdi_get_xsd(xnfp->xnf_devinfo), ENOENT,
916a390c5f4Scz 		    "parsing %s/mac", xvdi_get_xsname(xnfp->xnf_devinfo));
917a390c5f4Scz 		return;
918a390c5f4Scz 	}
919a390c5f4Scz 
92056567907SDavid Edmondson 	err = xenbus_scanf(XBT_NULL, oename,
92156567907SDavid Edmondson 	    "feature-rx-copy", "%d", &be_cap);
922a390c5f4Scz 	/*
923a390c5f4Scz 	 * If we fail to read the store we assume that the key is
924a390c5f4Scz 	 * absent, implying an older domain at the far end.  Older
92556567907SDavid Edmondson 	 * domains cannot do HV copy.
926a390c5f4Scz 	 */
927a390c5f4Scz 	if (err != 0)
92856567907SDavid Edmondson 		be_cap = 0;
92956567907SDavid Edmondson 	xnfp->xnf_be_rx_copy = (be_cap != 0);
93056567907SDavid Edmondson 
93156567907SDavid Edmondson 	err = xenbus_scanf(XBT_NULL, oename,
93256567907SDavid Edmondson 	    "feature-multicast-control", "%d", &be_cap);
933a390c5f4Scz 	/*
93456567907SDavid Edmondson 	 * If we fail to read the store we assume that the key is
93556567907SDavid Edmondson 	 * absent, implying an older domain at the far end.  Older
93656567907SDavid Edmondson 	 * domains do not support multicast control.
937a390c5f4Scz 	 */
93856567907SDavid Edmondson 	if (err != 0)
93956567907SDavid Edmondson 		be_cap = 0;
94056567907SDavid Edmondson 	xnfp->xnf_be_mcast_control = (be_cap != 0) && xnf_multicast_control;
9419276b399SYuri Pankov 
9429276b399SYuri Pankov 	/*
9439276b399SYuri Pankov 	 * See if back-end supports scatter-gather for transmits. If not,
9449276b399SYuri Pankov 	 * we will not support LSO and limit the mtu to 1500.
9459276b399SYuri Pankov 	 */
9469276b399SYuri Pankov 	err = xenbus_scanf(XBT_NULL, oename, "feature-sg", "%d", &be_cap);
9479276b399SYuri Pankov 	if (err != 0) {
9489276b399SYuri Pankov 		be_cap = 0;
9499276b399SYuri Pankov 		dev_err(xnfp->xnf_devinfo, CE_WARN, "error reading "
9509276b399SYuri Pankov 		    "'feature-sg' from backend driver");
9519276b399SYuri Pankov 	}
9529276b399SYuri Pankov 	if (be_cap == 0) {
9539276b399SYuri Pankov 		dev_err(xnfp->xnf_devinfo, CE_WARN, "scatter-gather is not "
9549276b399SYuri Pankov 		    "supported for transmits in the backend driver. LSO is "
9559276b399SYuri Pankov 		    "disabled and MTU is restricted to 1500 bytes.");
9569276b399SYuri Pankov 	}
9579276b399SYuri Pankov 	xnfp->xnf_be_tx_sg = (be_cap != 0) && xnf_enable_tx_sg;
9589276b399SYuri Pankov 
9599276b399SYuri Pankov 	if (xnfp->xnf_be_tx_sg) {
9609276b399SYuri Pankov 		/*
9619276b399SYuri Pankov 		 * Check if LSO is supported. Currently we only check for
9629276b399SYuri Pankov 		 * IPv4 as Illumos doesn't support LSO for IPv6.
9639276b399SYuri Pankov 		 */
9649276b399SYuri Pankov 		err = xenbus_scanf(XBT_NULL, oename, "feature-gso-tcpv4", "%d",
9659276b399SYuri Pankov 		    &be_cap);
9669276b399SYuri Pankov 		if (err != 0) {
9679276b399SYuri Pankov 			be_cap = 0;
9689276b399SYuri Pankov 			dev_err(xnfp->xnf_devinfo, CE_WARN, "error reading "
9699276b399SYuri Pankov 			    "'feature-gso-tcpv4' from backend driver");
9709276b399SYuri Pankov 		}
9719276b399SYuri Pankov 		if (be_cap == 0) {
9729276b399SYuri Pankov 			dev_err(xnfp->xnf_devinfo, CE_WARN, "LSO is not "
9739276b399SYuri Pankov 			    "supported by the backend driver. Performance "
9749276b399SYuri Pankov 			    "will be affected.");
9759276b399SYuri Pankov 		}
9769276b399SYuri Pankov 		xnfp->xnf_be_lso = (be_cap != 0) && xnf_enable_lso;
9779276b399SYuri Pankov 	}
978a390c5f4Scz }
979a390c5f4Scz 
980843e1988Sjohnlev /*
981843e1988Sjohnlev  *  attach(9E) -- Attach a device to the system
982843e1988Sjohnlev  */
983843e1988Sjohnlev static int
xnf_attach(dev_info_t * devinfo,ddi_attach_cmd_t cmd)984843e1988Sjohnlev xnf_attach(dev_info_t *devinfo, ddi_attach_cmd_t cmd)
985843e1988Sjohnlev {
986843e1988Sjohnlev 	mac_register_t *macp;
987843e1988Sjohnlev 	xnf_t *xnfp;
988843e1988Sjohnlev 	int err;
98956567907SDavid Edmondson 	char cachename[32];
990843e1988Sjohnlev 
991843e1988Sjohnlev 	switch (cmd) {
992843e1988Sjohnlev 	case DDI_RESUME:
993843e1988Sjohnlev 		xnfp = ddi_get_driver_private(devinfo);
99456567907SDavid Edmondson 		xnfp->xnf_gen++;
995843e1988Sjohnlev 
996843e1988Sjohnlev 		(void) xvdi_resume(devinfo);
997843e1988Sjohnlev 		(void) xvdi_alloc_evtchn(devinfo);
998551bc2a6Smrj 		xnfp->xnf_evtchn = xvdi_get_evtchn(devinfo);
999551bc2a6Smrj #ifdef XPV_HVM_DRIVER
1000551bc2a6Smrj 		ec_bind_evtchn_to_handler(xnfp->xnf_evtchn, IPL_VIF, xnf_intr,
1001551bc2a6Smrj 		    xnfp);
1002551bc2a6Smrj #else
1003843e1988Sjohnlev 		(void) ddi_add_intr(devinfo, 0, NULL, NULL, xnf_intr,
1004843e1988Sjohnlev 		    (caddr_t)xnfp);
1005551bc2a6Smrj #endif
1006843e1988Sjohnlev 		return (DDI_SUCCESS);
1007843e1988Sjohnlev 
1008843e1988Sjohnlev 	case DDI_ATTACH:
1009843e1988Sjohnlev 		break;
1010843e1988Sjohnlev 
1011843e1988Sjohnlev 	default:
1012843e1988Sjohnlev 		return (DDI_FAILURE);
1013843e1988Sjohnlev 	}
1014843e1988Sjohnlev 
1015843e1988Sjohnlev 	/*
1016843e1988Sjohnlev 	 *  Allocate gld_mac_info_t and xnf_instance structures
1017843e1988Sjohnlev 	 */
1018843e1988Sjohnlev 	macp = mac_alloc(MAC_VERSION);
1019843e1988Sjohnlev 	if (macp == NULL)
1020843e1988Sjohnlev 		return (DDI_FAILURE);
1021843e1988Sjohnlev 	xnfp = kmem_zalloc(sizeof (*xnfp), KM_SLEEP);
1022843e1988Sjohnlev 
10239276b399SYuri Pankov 	xnfp->xnf_tx_pkt_id =
10249276b399SYuri Pankov 	    kmem_zalloc(sizeof (xnf_txid_t) * NET_TX_RING_SIZE, KM_SLEEP);
10259276b399SYuri Pankov 
10269276b399SYuri Pankov 	xnfp->xnf_rx_pkt_info =
10279276b399SYuri Pankov 	    kmem_zalloc(sizeof (xnf_buf_t *) * NET_RX_RING_SIZE, KM_SLEEP);
10289276b399SYuri Pankov 
1029843e1988Sjohnlev 	macp->m_dip = devinfo;
1030843e1988Sjohnlev 	macp->m_driver = xnfp;
1031551bc2a6Smrj 	xnfp->xnf_devinfo = devinfo;
1032843e1988Sjohnlev 
1033843e1988Sjohnlev 	macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
1034551bc2a6Smrj 	macp->m_src_addr = xnfp->xnf_mac_addr;
1035843e1988Sjohnlev 	macp->m_callbacks = &xnf_callbacks;
1036843e1988Sjohnlev 	macp->m_min_sdu = 0;
10379276b399SYuri Pankov 	xnfp->xnf_mtu = ETHERMTU;
10389276b399SYuri Pankov 	macp->m_max_sdu = xnfp->xnf_mtu;
1039843e1988Sjohnlev 
1040551bc2a6Smrj 	xnfp->xnf_running = B_FALSE;
1041551bc2a6Smrj 	xnfp->xnf_connected = B_FALSE;
104256567907SDavid Edmondson 	xnfp->xnf_be_rx_copy = B_FALSE;
104356567907SDavid Edmondson 	xnfp->xnf_be_mcast_control = B_FALSE;
104464c5e63cSDavid Edmondson 	xnfp->xnf_need_sched = B_FALSE;
1045551bc2a6Smrj 
104656567907SDavid Edmondson 	xnfp->xnf_rx_head = NULL;
104756567907SDavid Edmondson 	xnfp->xnf_rx_tail = NULL;
104856567907SDavid Edmondson 	xnfp->xnf_rx_new_buffers_posted = B_FALSE;
104956567907SDavid Edmondson 
1050551bc2a6Smrj #ifdef XPV_HVM_DRIVER
105166f654faSYuri Pankov 	/* Report our version to dom0 */
105266f654faSYuri Pankov 	(void) xenbus_printf(XBT_NULL, "guest/xnf", "version", "%d",
105366f654faSYuri Pankov 	    HVMPV_XNF_VERS);
1054551bc2a6Smrj #endif
1055843e1988Sjohnlev 
1056843e1988Sjohnlev 	/*
1057843e1988Sjohnlev 	 * Get the iblock cookie with which to initialize the mutexes.
1058843e1988Sjohnlev 	 */
1059551bc2a6Smrj 	if (ddi_get_iblock_cookie(devinfo, 0, &xnfp->xnf_icookie)
1060843e1988Sjohnlev 	    != DDI_SUCCESS)
1061843e1988Sjohnlev 		goto failure;
106256567907SDavid Edmondson 
106356567907SDavid Edmondson 	mutex_init(&xnfp->xnf_txlock,
1064551bc2a6Smrj 	    NULL, MUTEX_DRIVER, xnfp->xnf_icookie);
106556567907SDavid Edmondson 	mutex_init(&xnfp->xnf_rxlock,
1066551bc2a6Smrj 	    NULL, MUTEX_DRIVER, xnfp->xnf_icookie);
106756567907SDavid Edmondson 	mutex_init(&xnfp->xnf_schedlock,
1068551bc2a6Smrj 	    NULL, MUTEX_DRIVER, xnfp->xnf_icookie);
106956567907SDavid Edmondson 	mutex_init(&xnfp->xnf_gref_lock,
1070551bc2a6Smrj 	    NULL, MUTEX_DRIVER, xnfp->xnf_icookie);
1071843e1988Sjohnlev 
107256567907SDavid Edmondson 	cv_init(&xnfp->xnf_cv_state, NULL, CV_DEFAULT, NULL);
107356567907SDavid Edmondson 	cv_init(&xnfp->xnf_cv_multicast, NULL, CV_DEFAULT, NULL);
107456567907SDavid Edmondson 	cv_init(&xnfp->xnf_cv_tx_slots, NULL, CV_DEFAULT, NULL);
107556567907SDavid Edmondson 
107656567907SDavid Edmondson 	(void) sprintf(cachename, "xnf_buf_cache_%d",
107756567907SDavid Edmondson 	    ddi_get_instance(devinfo));
107856567907SDavid Edmondson 	xnfp->xnf_buf_cache = kmem_cache_create(cachename,
107956567907SDavid Edmondson 	    sizeof (xnf_buf_t), 0,
108056567907SDavid Edmondson 	    xnf_buf_constructor, xnf_buf_destructor,
108156567907SDavid Edmondson 	    NULL, xnfp, NULL, 0);
108256567907SDavid Edmondson 	if (xnfp->xnf_buf_cache == NULL)
108356567907SDavid Edmondson 		goto failure_0;
108456567907SDavid Edmondson 
108556567907SDavid Edmondson 	(void) sprintf(cachename, "xnf_tx_buf_cache_%d",
108656567907SDavid Edmondson 	    ddi_get_instance(devinfo));
108756567907SDavid Edmondson 	xnfp->xnf_tx_buf_cache = kmem_cache_create(cachename,
108856567907SDavid Edmondson 	    sizeof (xnf_txbuf_t), 0,
108956567907SDavid Edmondson 	    xnf_tx_buf_constructor, xnf_tx_buf_destructor,
109056567907SDavid Edmondson 	    NULL, xnfp, NULL, 0);
109156567907SDavid Edmondson 	if (xnfp->xnf_tx_buf_cache == NULL)
1092551bc2a6Smrj 		goto failure_1;
109356567907SDavid Edmondson 
109456567907SDavid Edmondson 	xnfp->xnf_gref_head = INVALID_GRANT_REF;
109556567907SDavid Edmondson 
1096843e1988Sjohnlev 	if (xnf_alloc_dma_resources(xnfp) == DDI_FAILURE) {
1097843e1988Sjohnlev 		cmn_err(CE_WARN, "xnf%d: failed to allocate and initialize "
1098551bc2a6Smrj 		    "driver data structures",
1099551bc2a6Smrj 		    ddi_get_instance(xnfp->xnf_devinfo));
110056567907SDavid Edmondson 		goto failure_2;
1101843e1988Sjohnlev 	}
1102843e1988Sjohnlev 
1103551bc2a6Smrj 	xnfp->xnf_rx_ring.sring->rsp_event =
1104551bc2a6Smrj 	    xnfp->xnf_tx_ring.sring->rsp_event = 1;
1105843e1988Sjohnlev 
110656567907SDavid Edmondson 	xnfp->xnf_tx_ring_ref = INVALID_GRANT_REF;
110756567907SDavid Edmondson 	xnfp->xnf_rx_ring_ref = INVALID_GRANT_REF;
1108843e1988Sjohnlev 
1109843e1988Sjohnlev 	/* set driver private pointer now */
1110843e1988Sjohnlev 	ddi_set_driver_private(devinfo, xnfp);
1111843e1988Sjohnlev 
1112843e1988Sjohnlev 	if (!xnf_kstat_init(xnfp))
111356567907SDavid Edmondson 		goto failure_3;
1114843e1988Sjohnlev 
1115843e1988Sjohnlev 	/*
1116843e1988Sjohnlev 	 * Allocate an event channel, add the interrupt handler and
1117843e1988Sjohnlev 	 * bind it to the event channel.
1118843e1988Sjohnlev 	 */
1119843e1988Sjohnlev 	(void) xvdi_alloc_evtchn(devinfo);
1120551bc2a6Smrj 	xnfp->xnf_evtchn = xvdi_get_evtchn(devinfo);
1121551bc2a6Smrj #ifdef XPV_HVM_DRIVER
1122551bc2a6Smrj 	ec_bind_evtchn_to_handler(xnfp->xnf_evtchn, IPL_VIF, xnf_intr, xnfp);
1123551bc2a6Smrj #else
1124843e1988Sjohnlev 	(void) ddi_add_intr(devinfo, 0, NULL, NULL, xnf_intr, (caddr_t)xnfp);
1125551bc2a6Smrj #endif
1126843e1988Sjohnlev 
1127551bc2a6Smrj 	err = mac_register(macp, &xnfp->xnf_mh);
1128843e1988Sjohnlev 	mac_free(macp);
1129843e1988Sjohnlev 	macp = NULL;
1130843e1988Sjohnlev 	if (err != 0)
113156567907SDavid Edmondson 		goto failure_4;
113256567907SDavid Edmondson 
113356567907SDavid Edmondson 	if (xvdi_add_event_handler(devinfo, XS_OE_STATE, oe_state_change, NULL)
113456567907SDavid Edmondson 	    != DDI_SUCCESS)
113556567907SDavid Edmondson 		goto failure_5;
1136843e1988Sjohnlev 
1137fb07ba1cSfvdl #ifdef XPV_HVM_DRIVER
1138fb07ba1cSfvdl 	/*
1139fb07ba1cSfvdl 	 * In the HVM case, this driver essentially replaces a driver for
1140fb07ba1cSfvdl 	 * a 'real' PCI NIC. Without the "model" property set to
1141fb07ba1cSfvdl 	 * "Ethernet controller", like the PCI code does, netbooting does
1142fb07ba1cSfvdl 	 * not work correctly, as strplumb_get_netdev_path() will not find
1143fb07ba1cSfvdl 	 * this interface.
1144fb07ba1cSfvdl 	 */
1145fb07ba1cSfvdl 	(void) ndi_prop_update_string(DDI_DEV_T_NONE, devinfo, "model",
1146fb07ba1cSfvdl 	    "Ethernet controller");
1147fb07ba1cSfvdl #endif
1148fb07ba1cSfvdl 
1149843e1988Sjohnlev 	return (DDI_SUCCESS);
1150843e1988Sjohnlev 
115156567907SDavid Edmondson failure_5:
11521665cdc0SDavid Edmondson 	(void) mac_unregister(xnfp->xnf_mh);
115356567907SDavid Edmondson 
115456567907SDavid Edmondson failure_4:
1155551bc2a6Smrj #ifdef XPV_HVM_DRIVER
1156551bc2a6Smrj 	ec_unbind_evtchn(xnfp->xnf_evtchn);
1157ea8190a2Ssmaybe 	xvdi_free_evtchn(devinfo);
1158551bc2a6Smrj #else
1159551bc2a6Smrj 	ddi_remove_intr(devinfo, 0, xnfp->xnf_icookie);
1160551bc2a6Smrj #endif
1161551bc2a6Smrj 	xnfp->xnf_evtchn = INVALID_EVTCHN;
116256567907SDavid Edmondson 	kstat_delete(xnfp->xnf_kstat_aux);
116356567907SDavid Edmondson 
116456567907SDavid Edmondson failure_3:
116556567907SDavid Edmondson 	xnf_release_dma_resources(xnfp);
1166843e1988Sjohnlev 
1167a390c5f4Scz failure_2:
116856567907SDavid Edmondson 	kmem_cache_destroy(xnfp->xnf_tx_buf_cache);
1169a390c5f4Scz 
1170551bc2a6Smrj failure_1:
117156567907SDavid Edmondson 	kmem_cache_destroy(xnfp->xnf_buf_cache);
117256567907SDavid Edmondson 
117356567907SDavid Edmondson failure_0:
117456567907SDavid Edmondson 	cv_destroy(&xnfp->xnf_cv_tx_slots);
117556567907SDavid Edmondson 	cv_destroy(&xnfp->xnf_cv_multicast);
117656567907SDavid Edmondson 	cv_destroy(&xnfp->xnf_cv_state);
117756567907SDavid Edmondson 
117856567907SDavid Edmondson 	mutex_destroy(&xnfp->xnf_gref_lock);
117956567907SDavid Edmondson 	mutex_destroy(&xnfp->xnf_schedlock);
118056567907SDavid Edmondson 	mutex_destroy(&xnfp->xnf_rxlock);
1181551bc2a6Smrj 	mutex_destroy(&xnfp->xnf_txlock);
1182843e1988Sjohnlev 
1183843e1988Sjohnlev failure:
1184843e1988Sjohnlev 	kmem_free(xnfp, sizeof (*xnfp));
1185843e1988Sjohnlev 	if (macp != NULL)
1186843e1988Sjohnlev 		mac_free(macp);
1187843e1988Sjohnlev 
1188843e1988Sjohnlev 	return (DDI_FAILURE);
1189843e1988Sjohnlev }
1190843e1988Sjohnlev 
1191843e1988Sjohnlev /*  detach(9E) -- Detach a device from the system */
1192843e1988Sjohnlev static int
xnf_detach(dev_info_t * devinfo,ddi_detach_cmd_t cmd)1193843e1988Sjohnlev xnf_detach(dev_info_t *devinfo, ddi_detach_cmd_t cmd)
1194843e1988Sjohnlev {
1195843e1988Sjohnlev 	xnf_t *xnfp;		/* Our private device info */
1196843e1988Sjohnlev 
1197843e1988Sjohnlev 	xnfp = ddi_get_driver_private(devinfo);
1198843e1988Sjohnlev 
1199843e1988Sjohnlev 	switch (cmd) {
1200843e1988Sjohnlev 	case DDI_SUSPEND:
1201551bc2a6Smrj #ifdef XPV_HVM_DRIVER
1202551bc2a6Smrj 		ec_unbind_evtchn(xnfp->xnf_evtchn);
1203ea8190a2Ssmaybe 		xvdi_free_evtchn(devinfo);
1204551bc2a6Smrj #else
1205551bc2a6Smrj 		ddi_remove_intr(devinfo, 0, xnfp->xnf_icookie);
1206551bc2a6Smrj #endif
1207843e1988Sjohnlev 
1208843e1988Sjohnlev 		xvdi_suspend(devinfo);
1209843e1988Sjohnlev 
121056567907SDavid Edmondson 		mutex_enter(&xnfp->xnf_rxlock);
1211551bc2a6Smrj 		mutex_enter(&xnfp->xnf_txlock);
1212843e1988Sjohnlev 
1213551bc2a6Smrj 		xnfp->xnf_evtchn = INVALID_EVTCHN;
1214551bc2a6Smrj 		xnfp->xnf_connected = B_FALSE;
1215551bc2a6Smrj 		mutex_exit(&xnfp->xnf_txlock);
121656567907SDavid Edmondson 		mutex_exit(&xnfp->xnf_rxlock);
12174bae950fSMax zhen 
12184bae950fSMax zhen 		/* claim link to be down after disconnect */
12194bae950fSMax zhen 		mac_link_update(xnfp->xnf_mh, LINK_STATE_DOWN);
1220843e1988Sjohnlev 		return (DDI_SUCCESS);
1221843e1988Sjohnlev 
1222843e1988Sjohnlev 	case DDI_DETACH:
1223843e1988Sjohnlev 		break;
1224843e1988Sjohnlev 
1225843e1988Sjohnlev 	default:
1226843e1988Sjohnlev 		return (DDI_FAILURE);
1227843e1988Sjohnlev 	}
1228843e1988Sjohnlev 
1229551bc2a6Smrj 	if (xnfp->xnf_connected)
1230843e1988Sjohnlev 		return (DDI_FAILURE);
1231843e1988Sjohnlev 
123256567907SDavid Edmondson 	/*
123356567907SDavid Edmondson 	 * Cannot detach if we have xnf_buf_t outstanding.
123456567907SDavid Edmondson 	 */
123556567907SDavid Edmondson 	if (xnfp->xnf_stat_buf_allocated > 0)
123656567907SDavid Edmondson 		return (DDI_FAILURE);
1237843e1988Sjohnlev 
1238551bc2a6Smrj 	if (mac_unregister(xnfp->xnf_mh) != 0)
1239843e1988Sjohnlev 		return (DDI_FAILURE);
1240843e1988Sjohnlev 
1241a390c5f4Scz 	kstat_delete(xnfp->xnf_kstat_aux);
1242a390c5f4Scz 
1243843e1988Sjohnlev 	/* Stop the receiver */
1244843e1988Sjohnlev 	xnf_stop(xnfp);
1245843e1988Sjohnlev 
1246843e1988Sjohnlev 	xvdi_remove_event_handler(devinfo, XS_OE_STATE);
1247843e1988Sjohnlev 
1248843e1988Sjohnlev 	/* Remove the interrupt */
1249551bc2a6Smrj #ifdef XPV_HVM_DRIVER
1250551bc2a6Smrj 	ec_unbind_evtchn(xnfp->xnf_evtchn);
1251ea8190a2Ssmaybe 	xvdi_free_evtchn(devinfo);
1252551bc2a6Smrj #else
1253551bc2a6Smrj 	ddi_remove_intr(devinfo, 0, xnfp->xnf_icookie);
1254551bc2a6Smrj #endif
1255843e1988Sjohnlev 
1256843e1988Sjohnlev 	/* Release any pending xmit mblks */
1257843e1988Sjohnlev 	xnf_release_mblks(xnfp);
1258843e1988Sjohnlev 
1259843e1988Sjohnlev 	/* Release all DMA resources */
1260843e1988Sjohnlev 	xnf_release_dma_resources(xnfp);
1261843e1988Sjohnlev 
126256567907SDavid Edmondson 	cv_destroy(&xnfp->xnf_cv_tx_slots);
126356567907SDavid Edmondson 	cv_destroy(&xnfp->xnf_cv_multicast);
126456567907SDavid Edmondson 	cv_destroy(&xnfp->xnf_cv_state);
126556567907SDavid Edmondson 
126656567907SDavid Edmondson 	kmem_cache_destroy(xnfp->xnf_tx_buf_cache);
126756567907SDavid Edmondson 	kmem_cache_destroy(xnfp->xnf_buf_cache);
126856567907SDavid Edmondson 
126956567907SDavid Edmondson 	mutex_destroy(&xnfp->xnf_gref_lock);
127056567907SDavid Edmondson 	mutex_destroy(&xnfp->xnf_schedlock);
127156567907SDavid Edmondson 	mutex_destroy(&xnfp->xnf_rxlock);
1272551bc2a6Smrj 	mutex_destroy(&xnfp->xnf_txlock);
1273843e1988Sjohnlev 
1274843e1988Sjohnlev 	kmem_free(xnfp, sizeof (*xnfp));
1275843e1988Sjohnlev 
1276843e1988Sjohnlev 	return (DDI_SUCCESS);
1277843e1988Sjohnlev }
1278843e1988Sjohnlev 
1279843e1988Sjohnlev /*
1280843e1988Sjohnlev  *  xnf_set_mac_addr() -- set the physical network address on the board.
1281843e1988Sjohnlev  */
1282843e1988Sjohnlev static int
xnf_set_mac_addr(void * arg,const uint8_t * macaddr)1283843e1988Sjohnlev xnf_set_mac_addr(void *arg, const uint8_t *macaddr)
1284843e1988Sjohnlev {
128556567907SDavid Edmondson 	_NOTE(ARGUNUSED(arg, macaddr));
1286843e1988Sjohnlev 
1287843e1988Sjohnlev 	/*
1288843e1988Sjohnlev 	 * We can't set our macaddr.
1289843e1988Sjohnlev 	 */
1290843e1988Sjohnlev 	return (ENOTSUP);
1291843e1988Sjohnlev }
1292843e1988Sjohnlev 
1293843e1988Sjohnlev /*
1294843e1988Sjohnlev  *  xnf_set_multicast() -- set (enable) or disable a multicast address.
1295843e1988Sjohnlev  *
1296843e1988Sjohnlev  *  Program the hardware to enable/disable the multicast address
129756567907SDavid Edmondson  *  in "mca".  Enable if "add" is true, disable if false.
1298843e1988Sjohnlev  */
1299843e1988Sjohnlev static int
xnf_set_multicast(void * arg,boolean_t add,const uint8_t * mca)1300843e1988Sjohnlev xnf_set_multicast(void *arg, boolean_t add, const uint8_t *mca)
1301843e1988Sjohnlev {
1302843e1988Sjohnlev 	xnf_t *xnfp = arg;
130356567907SDavid Edmondson 	xnf_txbuf_t *txp;
130456567907SDavid Edmondson 	int n_slots;
130556567907SDavid Edmondson 	RING_IDX slot;
130656567907SDavid Edmondson 	xnf_txid_t *tidp;
130756567907SDavid Edmondson 	netif_tx_request_t *txrp;
130856567907SDavid Edmondson 	struct netif_extra_info *erp;
130956567907SDavid Edmondson 	boolean_t notify, result;
1310843e1988Sjohnlev 
131156567907SDavid Edmondson 	/*
131256567907SDavid Edmondson 	 * If the backend does not support multicast control then we
131356567907SDavid Edmondson 	 * must assume that the right packets will just arrive.
131456567907SDavid Edmondson 	 */
131556567907SDavid Edmondson 	if (!xnfp->xnf_be_mcast_control)
131656567907SDavid Edmondson 		return (0);
131756567907SDavid Edmondson 
131856567907SDavid Edmondson 	txp = kmem_cache_alloc(xnfp->xnf_tx_buf_cache, KM_SLEEP);
131956567907SDavid Edmondson 
132056567907SDavid Edmondson 	mutex_enter(&xnfp->xnf_txlock);
1321843e1988Sjohnlev 
1322843e1988Sjohnlev 	/*
132356567907SDavid Edmondson 	 * If we're not yet connected then claim success. This is
132456567907SDavid Edmondson 	 * acceptable because we refresh the entire set of multicast
132556567907SDavid Edmondson 	 * addresses when we get connected.
1326843e1988Sjohnlev 	 *
132756567907SDavid Edmondson 	 * We can't wait around here because the MAC layer expects
132856567907SDavid Edmondson 	 * this to be a non-blocking operation - waiting ends up
132956567907SDavid Edmondson 	 * causing a deadlock during resume.
1330843e1988Sjohnlev 	 */
133156567907SDavid Edmondson 	if (!xnfp->xnf_connected) {
133256567907SDavid Edmondson 		mutex_exit(&xnfp->xnf_txlock);
133356567907SDavid Edmondson 		return (0);
133456567907SDavid Edmondson 	}
1335843e1988Sjohnlev 
133656567907SDavid Edmondson 	/*
133756567907SDavid Edmondson 	 * 1. Acquire two slots in the ring.
133856567907SDavid Edmondson 	 * 2. Fill in the slots.
133956567907SDavid Edmondson 	 * 3. Request notification when the operation is done.
134056567907SDavid Edmondson 	 * 4. Kick the peer.
134156567907SDavid Edmondson 	 * 5. Wait for the response via xnf_tx_clean_ring().
134256567907SDavid Edmondson 	 */
134356567907SDavid Edmondson 
13449276b399SYuri Pankov 	n_slots = xnf_tx_slots_get(xnfp, 2, B_TRUE);
134556567907SDavid Edmondson 	ASSERT(n_slots >= 2);
134656567907SDavid Edmondson 
134756567907SDavid Edmondson 	slot = xnfp->xnf_tx_ring.req_prod_pvt;
13489276b399SYuri Pankov 	tidp = xnf_txid_get(xnfp);
134956567907SDavid Edmondson 	VERIFY(tidp != NULL);
135056567907SDavid Edmondson 
135156567907SDavid Edmondson 	txp->tx_type = TX_MCAST_REQ;
135256567907SDavid Edmondson 	txp->tx_slot = slot;
135356567907SDavid Edmondson 
135456567907SDavid Edmondson 	txrp = RING_GET_REQUEST(&xnfp->xnf_tx_ring, slot);
135556567907SDavid Edmondson 	erp = (struct netif_extra_info *)
135656567907SDavid Edmondson 	    RING_GET_REQUEST(&xnfp->xnf_tx_ring, slot + 1);
135756567907SDavid Edmondson 
135856567907SDavid Edmondson 	txrp->gref = 0;
135956567907SDavid Edmondson 	txrp->size = 0;
136056567907SDavid Edmondson 	txrp->offset = 0;
136156567907SDavid Edmondson 	/* Set tx_txreq.id to appease xnf_tx_clean_ring(). */
136256567907SDavid Edmondson 	txrp->id = txp->tx_txreq.id = tidp->id;
136356567907SDavid Edmondson 	txrp->flags = NETTXF_extra_info;
136456567907SDavid Edmondson 
136556567907SDavid Edmondson 	erp->type = add ? XEN_NETIF_EXTRA_TYPE_MCAST_ADD :
136656567907SDavid Edmondson 	    XEN_NETIF_EXTRA_TYPE_MCAST_DEL;
136756567907SDavid Edmondson 	bcopy((void *)mca, &erp->u.mcast.addr, ETHERADDRL);
136856567907SDavid Edmondson 
136956567907SDavid Edmondson 	tidp->txbuf = txp;
137056567907SDavid Edmondson 
137156567907SDavid Edmondson 	xnfp->xnf_tx_ring.req_prod_pvt = slot + 2;
137256567907SDavid Edmondson 
137356567907SDavid Edmondson 	mutex_enter(&xnfp->xnf_schedlock);
137456567907SDavid Edmondson 	xnfp->xnf_pending_multicast++;
137556567907SDavid Edmondson 	mutex_exit(&xnfp->xnf_schedlock);
137656567907SDavid Edmondson 
137756567907SDavid Edmondson 	/* LINTED: constant in conditional context */
137856567907SDavid Edmondson 	RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&xnfp->xnf_tx_ring,
137956567907SDavid Edmondson 	    notify);
138056567907SDavid Edmondson 	if (notify)
138156567907SDavid Edmondson 		ec_notify_via_evtchn(xnfp->xnf_evtchn);
138256567907SDavid Edmondson 
138356567907SDavid Edmondson 	while (txp->tx_type == TX_MCAST_REQ)
13849276b399SYuri Pankov 		cv_wait(&xnfp->xnf_cv_multicast, &xnfp->xnf_txlock);
138556567907SDavid Edmondson 
13869276b399SYuri Pankov 	ASSERT3U(txp->tx_type, ==, TX_MCAST_RSP);
138756567907SDavid Edmondson 
138856567907SDavid Edmondson 	mutex_enter(&xnfp->xnf_schedlock);
138956567907SDavid Edmondson 	xnfp->xnf_pending_multicast--;
139056567907SDavid Edmondson 	mutex_exit(&xnfp->xnf_schedlock);
139156567907SDavid Edmondson 
139256567907SDavid Edmondson 	result = (txp->tx_status == NETIF_RSP_OKAY);
139356567907SDavid Edmondson 
13949276b399SYuri Pankov 	xnf_txid_put(xnfp, tidp);
139556567907SDavid Edmondson 
139656567907SDavid Edmondson 	mutex_exit(&xnfp->xnf_txlock);
139756567907SDavid Edmondson 
139856567907SDavid Edmondson 	kmem_cache_free(xnfp->xnf_tx_buf_cache, txp);
139956567907SDavid Edmondson 
140056567907SDavid Edmondson 	return (result ? 0 : 1);
140156567907SDavid Edmondson }
140256567907SDavid Edmondson 
140356567907SDavid Edmondson /*
140456567907SDavid Edmondson  * xnf_set_promiscuous() -- set or reset promiscuous mode on the board
140556567907SDavid Edmondson  *
140656567907SDavid Edmondson  *  Program the hardware to enable/disable promiscuous mode.
140756567907SDavid Edmondson  */
140856567907SDavid Edmondson static int
xnf_set_promiscuous(void * arg,boolean_t on)1409843e1988Sjohnlev xnf_set_promiscuous(void *arg, boolean_t on)
1410843e1988Sjohnlev {
141156567907SDavid Edmondson 	_NOTE(ARGUNUSED(arg, on));
1412843e1988Sjohnlev 
1413843e1988Sjohnlev 	/*
1414843e1988Sjohnlev 	 * We can't really do this, but we pretend that we can in
1415843e1988Sjohnlev 	 * order that snoop will work.
1416843e1988Sjohnlev 	 */
1417843e1988Sjohnlev 	return (0);
1418843e1988Sjohnlev }
1419843e1988Sjohnlev 
1420843e1988Sjohnlev /*
1421843e1988Sjohnlev  * Clean buffers that we have responses for from the transmit ring.
1422843e1988Sjohnlev  */
1423843e1988Sjohnlev static int
xnf_tx_clean_ring(xnf_t * xnfp)142456567907SDavid Edmondson xnf_tx_clean_ring(xnf_t *xnfp)
1425843e1988Sjohnlev {
142656567907SDavid Edmondson 	boolean_t work_to_do;
1427843e1988Sjohnlev 
1428551bc2a6Smrj 	ASSERT(MUTEX_HELD(&xnfp->xnf_txlock));
1429843e1988Sjohnlev 
1430a390c5f4Scz loop:
143166f1a35aSschuster 	while (RING_HAS_UNCONSUMED_RESPONSES(&xnfp->xnf_tx_ring)) {
143256567907SDavid Edmondson 		RING_IDX cons, prod, i;
143356567907SDavid Edmondson 
143456567907SDavid Edmondson 		cons = xnfp->xnf_tx_ring.rsp_cons;
143556567907SDavid Edmondson 		prod = xnfp->xnf_tx_ring.sring->rsp_prod;
1436843e1988Sjohnlev 		membar_consumer();
1437843e1988Sjohnlev 		/*
143856567907SDavid Edmondson 		 * Clean tx requests from ring that we have responses
143956567907SDavid Edmondson 		 * for.
1440843e1988Sjohnlev 		 */
144156567907SDavid Edmondson 		DTRACE_PROBE2(xnf_tx_clean_range, int, cons, int, prod);
144256567907SDavid Edmondson 		for (i = cons; i != prod; i++) {
144356567907SDavid Edmondson 			netif_tx_response_t *trp;
144456567907SDavid Edmondson 			xnf_txid_t *tidp;
144556567907SDavid Edmondson 			xnf_txbuf_t *txp;
144656567907SDavid Edmondson 
144756567907SDavid Edmondson 			trp = RING_GET_RESPONSE(&xnfp->xnf_tx_ring, i);
14489276b399SYuri Pankov 			/*
14499276b399SYuri Pankov 			 * if this slot was occupied by netif_extra_info_t,
14509276b399SYuri Pankov 			 * then the response will be NETIF_RSP_NULL. In this
14519276b399SYuri Pankov 			 * case there are no resources to clean up.
14529276b399SYuri Pankov 			 */
14539276b399SYuri Pankov 			if (trp->status == NETIF_RSP_NULL)
14549276b399SYuri Pankov 				continue;
14559276b399SYuri Pankov 
145656567907SDavid Edmondson 			ASSERT(TX_ID_VALID(trp->id));
145756567907SDavid Edmondson 
145856567907SDavid Edmondson 			tidp = TX_ID_TO_TXID(xnfp, trp->id);
14599276b399SYuri Pankov 			ASSERT3U(tidp->id, ==, trp->id);
14609276b399SYuri Pankov 			ASSERT3U(tidp->next, ==, INVALID_TX_ID);
146156567907SDavid Edmondson 
146256567907SDavid Edmondson 			txp = tidp->txbuf;
146356567907SDavid Edmondson 			ASSERT(txp != NULL);
14649276b399SYuri Pankov 			ASSERT3U(txp->tx_txreq.id, ==, trp->id);
146556567907SDavid Edmondson 
146656567907SDavid Edmondson 			switch (txp->tx_type) {
146756567907SDavid Edmondson 			case TX_DATA:
14689276b399SYuri Pankov 				/*
14699276b399SYuri Pankov 				 * We must put the txid for each response we
14709276b399SYuri Pankov 				 * acknowledge to make sure that we never have
14719276b399SYuri Pankov 				 * more free slots than txids. Because of this
14729276b399SYuri Pankov 				 * we do it here instead of waiting for it to
14739276b399SYuri Pankov 				 * be done in xnf_data_txbuf_free_chain().
14749276b399SYuri Pankov 				 */
14759276b399SYuri Pankov 				xnf_txid_put(xnfp, tidp);
14769276b399SYuri Pankov 				txp->tx_txreq.id = INVALID_TX_ID;
14779276b399SYuri Pankov 				ASSERT3S(txp->tx_head->tx_frags_to_ack, >, 0);
14789276b399SYuri Pankov 				txp->tx_head->tx_frags_to_ack--;
14799276b399SYuri Pankov 
14809276b399SYuri Pankov 				/*
14819276b399SYuri Pankov 				 * We clean the whole chain once we got a
14829276b399SYuri Pankov 				 * response for each fragment.
14839276b399SYuri Pankov 				 */
14849276b399SYuri Pankov 				if (txp->tx_head->tx_frags_to_ack == 0)
14859276b399SYuri Pankov 					xnf_data_txbuf_free_chain(xnfp, txp);
148656567907SDavid Edmondson 
148756567907SDavid Edmondson 				break;
148856567907SDavid Edmondson 
148956567907SDavid Edmondson 			case TX_MCAST_REQ:
149056567907SDavid Edmondson 				txp->tx_type = TX_MCAST_RSP;
149156567907SDavid Edmondson 				txp->tx_status = trp->status;
149256567907SDavid Edmondson 				cv_broadcast(&xnfp->xnf_cv_multicast);
149356567907SDavid Edmondson 
149456567907SDavid Edmondson 				break;
149556567907SDavid Edmondson 
149656567907SDavid Edmondson 			default:
149756567907SDavid Edmondson 				cmn_err(CE_PANIC, "xnf_tx_clean_ring: "
149856567907SDavid Edmondson 				    "invalid xnf_txbuf_t type: %d",
149956567907SDavid Edmondson 				    txp->tx_type);
150056567907SDavid Edmondson 				break;
150156567907SDavid Edmondson 			}
1502843e1988Sjohnlev 		}
150356567907SDavid Edmondson 		/*
150456567907SDavid Edmondson 		 * Record the last response we dealt with so that we
150556567907SDavid Edmondson 		 * know where to start next time around.
150656567907SDavid Edmondson 		 */
150756567907SDavid Edmondson 		xnfp->xnf_tx_ring.rsp_cons = prod;
1508843e1988Sjohnlev 		membar_enter();
150966f1a35aSschuster 	}
151066f1a35aSschuster 
1511a390c5f4Scz 	/* LINTED: constant in conditional context */
1512a390c5f4Scz 	RING_FINAL_CHECK_FOR_RESPONSES(&xnfp->xnf_tx_ring, work_to_do);
1513a390c5f4Scz 	if (work_to_do)
1514a390c5f4Scz 		goto loop;
1515a390c5f4Scz 
151666f1a35aSschuster 	return (RING_FREE_REQUESTS(&xnfp->xnf_tx_ring));
1517843e1988Sjohnlev }
1518843e1988Sjohnlev 
1519843e1988Sjohnlev /*
152056567907SDavid Edmondson  * Allocate and fill in a look-aside buffer for the packet `mp'. Used
152156567907SDavid Edmondson  * to ensure that the packet is physically contiguous and contained
152256567907SDavid Edmondson  * within a single page.
1523843e1988Sjohnlev  */
152456567907SDavid Edmondson static xnf_buf_t *
xnf_tx_get_lookaside(xnf_t * xnfp,mblk_t * mp,size_t * plen)15259276b399SYuri Pankov xnf_tx_get_lookaside(xnf_t *xnfp, mblk_t *mp, size_t *plen)
1526843e1988Sjohnlev {
152756567907SDavid Edmondson 	xnf_buf_t *bd;
152856567907SDavid Edmondson 	caddr_t bp;
1529843e1988Sjohnlev 
1530*cfd17c15SJoshua M. Clulow 	if ((bd = xnf_buf_get(xnfp, KM_NOSLEEP, B_TRUE)) == NULL) {
153156567907SDavid Edmondson 		return (NULL);
1532*cfd17c15SJoshua M. Clulow 	}
153356567907SDavid Edmondson 
153456567907SDavid Edmondson 	bp = bd->buf;
153556567907SDavid Edmondson 	while (mp != NULL) {
153656567907SDavid Edmondson 		size_t len = MBLKL(mp);
153756567907SDavid Edmondson 
153856567907SDavid Edmondson 		bcopy(mp->b_rptr, bp, len);
1539843e1988Sjohnlev 		bp += len;
154056567907SDavid Edmondson 
154156567907SDavid Edmondson 		mp = mp->b_cont;
1542843e1988Sjohnlev 	}
154356567907SDavid Edmondson 
15449276b399SYuri Pankov 	*plen = bp - bd->buf;
15459276b399SYuri Pankov 	ASSERT3U(*plen, <=, PAGESIZE);
154656567907SDavid Edmondson 
15479276b399SYuri Pankov 	xnfp->xnf_stat_tx_lookaside++;
154856567907SDavid Edmondson 
154956567907SDavid Edmondson 	return (bd);
1550843e1988Sjohnlev }
1551843e1988Sjohnlev 
155256567907SDavid Edmondson /*
15539276b399SYuri Pankov  * Insert the pseudo-header checksum into the packet.
15549276b399SYuri Pankov  * Assumes packet is IPv4, TCP/UDP since we only advertised support for
15559276b399SYuri Pankov  * HCKSUM_INET_FULL_V4.
155656567907SDavid Edmondson  */
15579276b399SYuri Pankov int
xnf_pseudo_cksum(mblk_t * mp)15589276b399SYuri Pankov xnf_pseudo_cksum(mblk_t *mp)
1559a859da42SDavid Edmondson {
1560a859da42SDavid Edmondson 	struct ether_header *ehp;
15619276b399SYuri Pankov 	uint16_t sap, iplen, *stuff;
1562a859da42SDavid Edmondson 	uint32_t cksum;
15639276b399SYuri Pankov 	size_t len;
1564a859da42SDavid Edmondson 	ipha_t *ipha;
1565a859da42SDavid Edmondson 	ipaddr_t src, dst;
15669276b399SYuri Pankov 	uchar_t *ptr;
15679276b399SYuri Pankov 
15689276b399SYuri Pankov 	ptr = mp->b_rptr;
15699276b399SYuri Pankov 	len = MBLKL(mp);
15709276b399SYuri Pankov 
15719276b399SYuri Pankov 	/* Each header must fit completely in an mblk. */
15729276b399SYuri Pankov 	ASSERT3U(len, >=, sizeof (*ehp));
1573a859da42SDavid Edmondson 
15749276b399SYuri Pankov 	ehp = (struct ether_header *)ptr;
1575a859da42SDavid Edmondson 
1576a859da42SDavid Edmondson 	if (ntohs(ehp->ether_type) == VLAN_TPID) {
1577a859da42SDavid Edmondson 		struct ether_vlan_header *evhp;
15789276b399SYuri Pankov 		ASSERT3U(len, >=, sizeof (*evhp));
15799276b399SYuri Pankov 		evhp = (struct ether_vlan_header *)ptr;
1580a859da42SDavid Edmondson 		sap = ntohs(evhp->ether_type);
15819276b399SYuri Pankov 		ptr += sizeof (*evhp);
15829276b399SYuri Pankov 		len -= sizeof (*evhp);
1583a859da42SDavid Edmondson 	} else {
1584a859da42SDavid Edmondson 		sap = ntohs(ehp->ether_type);
15859276b399SYuri Pankov 		ptr += sizeof (*ehp);
15869276b399SYuri Pankov 		len -= sizeof (*ehp);
1587a859da42SDavid Edmondson 	}
1588a859da42SDavid Edmondson 
15899276b399SYuri Pankov 	ASSERT3U(sap, ==, ETHERTYPE_IP);
1590a859da42SDavid Edmondson 
15919276b399SYuri Pankov 	/*
15929276b399SYuri Pankov 	 * Ethernet and IP headers may be in different mblks.
15939276b399SYuri Pankov 	 */
15949276b399SYuri Pankov 	ASSERT3P(ptr, <=, mp->b_wptr);
15959276b399SYuri Pankov 	if (ptr == mp->b_wptr) {
15969276b399SYuri Pankov 		mp = mp->b_cont;
15979276b399SYuri Pankov 		ptr = mp->b_rptr;
15989276b399SYuri Pankov 		len = MBLKL(mp);
1599a859da42SDavid Edmondson 	}
1600a859da42SDavid Edmondson 
16019276b399SYuri Pankov 	ASSERT3U(len, >=, sizeof (ipha_t));
16029276b399SYuri Pankov 	ipha = (ipha_t *)ptr;
1603a859da42SDavid Edmondson 
16049276b399SYuri Pankov 	/*
16059276b399SYuri Pankov 	 * We assume the IP header has no options. (This is enforced in
16069276b399SYuri Pankov 	 * ire_send_wire_v4() -- search for IXAF_NO_HW_CKSUM).
16079276b399SYuri Pankov 	 */
16089276b399SYuri Pankov 	ASSERT3U(IPH_HDR_LENGTH(ipha), ==, IP_SIMPLE_HDR_LENGTH);
16099276b399SYuri Pankov 	iplen = ntohs(ipha->ipha_length) - IP_SIMPLE_HDR_LENGTH;
1610a859da42SDavid Edmondson 
16119276b399SYuri Pankov 	ptr += IP_SIMPLE_HDR_LENGTH;
16129276b399SYuri Pankov 	len -= IP_SIMPLE_HDR_LENGTH;
16139276b399SYuri Pankov 
16149276b399SYuri Pankov 	/*
16159276b399SYuri Pankov 	 * IP and L4 headers may be in different mblks.
16169276b399SYuri Pankov 	 */
16179276b399SYuri Pankov 	ASSERT3P(ptr, <=, mp->b_wptr);
16189276b399SYuri Pankov 	if (ptr == mp->b_wptr) {
16199276b399SYuri Pankov 		mp = mp->b_cont;
16209276b399SYuri Pankov 		ptr = mp->b_rptr;
16219276b399SYuri Pankov 		len = MBLKL(mp);
16229276b399SYuri Pankov 	}
1623a859da42SDavid Edmondson 
1624a859da42SDavid Edmondson 	switch (ipha->ipha_protocol) {
1625a859da42SDavid Edmondson 	case IPPROTO_TCP:
16269276b399SYuri Pankov 		ASSERT3U(len, >=, sizeof (tcph_t));
16279276b399SYuri Pankov 		stuff = (uint16_t *)(ptr + TCP_CHECKSUM_OFFSET);
1628a859da42SDavid Edmondson 		cksum = IP_TCP_CSUM_COMP;
1629a859da42SDavid Edmondson 		break;
1630a859da42SDavid Edmondson 	case IPPROTO_UDP:
16319276b399SYuri Pankov 		ASSERT3U(len, >=, sizeof (struct udphdr));
16329276b399SYuri Pankov 		stuff = (uint16_t *)(ptr + UDP_CHECKSUM_OFFSET);
1633a859da42SDavid Edmondson 		cksum = IP_UDP_CSUM_COMP;
1634a859da42SDavid Edmondson 		break;
1635a859da42SDavid Edmondson 	default:
1636a859da42SDavid Edmondson 		cmn_err(CE_WARN, "xnf_pseudo_cksum: unexpected protocol %d",
1637a859da42SDavid Edmondson 		    ipha->ipha_protocol);
16389276b399SYuri Pankov 		return (EINVAL);
1639a859da42SDavid Edmondson 	}
1640a859da42SDavid Edmondson 
1641a859da42SDavid Edmondson 	src = ipha->ipha_src;
1642a859da42SDavid Edmondson 	dst = ipha->ipha_dst;
1643a859da42SDavid Edmondson 
1644a859da42SDavid Edmondson 	cksum += (dst >> 16) + (dst & 0xFFFF);
1645a859da42SDavid Edmondson 	cksum += (src >> 16) + (src & 0xFFFF);
16469276b399SYuri Pankov 	cksum += htons(iplen);
1647a859da42SDavid Edmondson 
1648a859da42SDavid Edmondson 	cksum = (cksum >> 16) + (cksum & 0xFFFF);
1649a859da42SDavid Edmondson 	cksum = (cksum >> 16) + (cksum & 0xFFFF);
1650a859da42SDavid Edmondson 
1651a859da42SDavid Edmondson 	ASSERT(cksum <= 0xFFFF);
1652a859da42SDavid Edmondson 
1653a859da42SDavid Edmondson 	*stuff = (uint16_t)(cksum ? cksum : ~cksum);
16549276b399SYuri Pankov 
16559276b399SYuri Pankov 	return (0);
1656a859da42SDavid Edmondson }
1657a859da42SDavid Edmondson 
1658843e1988Sjohnlev /*
16599276b399SYuri Pankov  * Push a packet into the transmit ring.
16609276b399SYuri Pankov  *
16619276b399SYuri Pankov  * Note: the format of a tx packet that spans multiple slots is similar to
16629276b399SYuri Pankov  * what is described in xnf_rx_one_packet().
1663843e1988Sjohnlev  */
16649276b399SYuri Pankov static void
xnf_tx_push_packet(xnf_t * xnfp,xnf_txbuf_t * head)16659276b399SYuri Pankov xnf_tx_push_packet(xnf_t *xnfp, xnf_txbuf_t *head)
1666843e1988Sjohnlev {
16679276b399SYuri Pankov 	int nslots = 0;
16689276b399SYuri Pankov 	int extras = 0;
166956567907SDavid Edmondson 	RING_IDX slot;
167056567907SDavid Edmondson 	boolean_t notify;
1671843e1988Sjohnlev 
16729276b399SYuri Pankov 	ASSERT(MUTEX_HELD(&xnfp->xnf_txlock));
167356567907SDavid Edmondson 	ASSERT(xnfp->xnf_running);
1674843e1988Sjohnlev 
167566f1a35aSschuster 	slot = xnfp->xnf_tx_ring.req_prod_pvt;
167656567907SDavid Edmondson 
16779276b399SYuri Pankov 	/*
16789276b399SYuri Pankov 	 * The caller has already checked that we have enough slots to proceed.
16799276b399SYuri Pankov 	 */
16809276b399SYuri Pankov 	for (xnf_txbuf_t *txp = head; txp != NULL; txp = txp->tx_next) {
168156567907SDavid Edmondson 		xnf_txid_t *tidp;
168256567907SDavid Edmondson 		netif_tx_request_t *txrp;
168356567907SDavid Edmondson 
16849276b399SYuri Pankov 		tidp = xnf_txid_get(xnfp);
168556567907SDavid Edmondson 		VERIFY(tidp != NULL);
168656567907SDavid Edmondson 		txrp = RING_GET_REQUEST(&xnfp->xnf_tx_ring, slot);
168756567907SDavid Edmondson 
168856567907SDavid Edmondson 		txp->tx_slot = slot;
168956567907SDavid Edmondson 		txp->tx_txreq.id = tidp->id;
169056567907SDavid Edmondson 		*txrp = txp->tx_txreq;
169156567907SDavid Edmondson 
169256567907SDavid Edmondson 		tidp->txbuf = txp;
169356567907SDavid Edmondson 		slot++;
16949276b399SYuri Pankov 		nslots++;
169556567907SDavid Edmondson 
16969276b399SYuri Pankov 		/*
16979276b399SYuri Pankov 		 * When present, LSO info is placed in a slot after the first
16989276b399SYuri Pankov 		 * data segment, and doesn't require a txid.
16999276b399SYuri Pankov 		 */
17009276b399SYuri Pankov 		if (txp->tx_txreq.flags & NETTXF_extra_info) {
17019276b399SYuri Pankov 			netif_extra_info_t *extra;
17029276b399SYuri Pankov 			ASSERT3U(nslots, ==, 1);
17039276b399SYuri Pankov 
17049276b399SYuri Pankov 			extra = (netif_extra_info_t *)
17059276b399SYuri Pankov 			    RING_GET_REQUEST(&xnfp->xnf_tx_ring, slot);
17069276b399SYuri Pankov 			*extra = txp->tx_extra;
17079276b399SYuri Pankov 			slot++;
17089276b399SYuri Pankov 			nslots++;
17099276b399SYuri Pankov 			extras = 1;
17109276b399SYuri Pankov 		}
1711843e1988Sjohnlev 	}
1712843e1988Sjohnlev 
17139276b399SYuri Pankov 	ASSERT3U(nslots, <=, XEN_MAX_SLOTS_PER_TX);
17149276b399SYuri Pankov 
17159276b399SYuri Pankov 	/*
17169276b399SYuri Pankov 	 * Store the number of data fragments.
17179276b399SYuri Pankov 	 */
17189276b399SYuri Pankov 	head->tx_frags_to_ack = nslots - extras;
17199276b399SYuri Pankov 
172056567907SDavid Edmondson 	xnfp->xnf_tx_ring.req_prod_pvt = slot;
172156567907SDavid Edmondson 
1722843e1988Sjohnlev 	/*
172356567907SDavid Edmondson 	 * Tell the peer that we sent something, if it cares.
1724843e1988Sjohnlev 	 */
172556567907SDavid Edmondson 	/* LINTED: constant in conditional context */
17269276b399SYuri Pankov 	RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&xnfp->xnf_tx_ring, notify);
172756567907SDavid Edmondson 	if (notify)
172856567907SDavid Edmondson 		ec_notify_via_evtchn(xnfp->xnf_evtchn);
17299276b399SYuri Pankov }
173056567907SDavid Edmondson 
17319276b399SYuri Pankov static xnf_txbuf_t *
xnf_mblk_copy(xnf_t * xnfp,mblk_t * mp)17329276b399SYuri Pankov xnf_mblk_copy(xnf_t *xnfp, mblk_t *mp)
17339276b399SYuri Pankov {
1734*cfd17c15SJoshua M. Clulow 	xnf_txbuf_t *txp;
17359276b399SYuri Pankov 	size_t length;
17369276b399SYuri Pankov 
1737*cfd17c15SJoshua M. Clulow 	if ((txp = xnf_data_txbuf_alloc(xnfp, KM_NOSLEEP)) == NULL) {
1738*cfd17c15SJoshua M. Clulow 		return (NULL);
1739*cfd17c15SJoshua M. Clulow 	}
1740*cfd17c15SJoshua M. Clulow 
17419276b399SYuri Pankov 	txp->tx_bdesc = xnf_tx_get_lookaside(xnfp, mp, &length);
17429276b399SYuri Pankov 	if (txp->tx_bdesc == NULL) {
17439276b399SYuri Pankov 		xnf_data_txbuf_free(xnfp, txp);
17449276b399SYuri Pankov 		return (NULL);
17459276b399SYuri Pankov 	}
17469276b399SYuri Pankov 	txp->tx_mfn = txp->tx_bdesc->buf_mfn;
17479276b399SYuri Pankov 	txp->tx_txreq.gref = txp->tx_bdesc->grant_ref;
17489276b399SYuri Pankov 	txp->tx_txreq.size = length;
17499276b399SYuri Pankov 	txp->tx_txreq.offset = (uintptr_t)txp->tx_bdesc->buf & PAGEOFFSET;
17509276b399SYuri Pankov 	txp->tx_txreq.flags = 0;
175156567907SDavid Edmondson 
175256567907SDavid Edmondson 	return (txp);
175356567907SDavid Edmondson }
175456567907SDavid Edmondson 
17559276b399SYuri Pankov static xnf_txbuf_t *
xnf_mblk_map(xnf_t * xnfp,mblk_t * mp,int * countp)17569276b399SYuri Pankov xnf_mblk_map(xnf_t *xnfp, mblk_t *mp, int *countp)
175756567907SDavid Edmondson {
17589276b399SYuri Pankov 	xnf_txbuf_t *head = NULL;
17599276b399SYuri Pankov 	xnf_txbuf_t *tail = NULL;
176056567907SDavid Edmondson 	domid_t oeid;
17619276b399SYuri Pankov 	int nsegs = 0;
1762843e1988Sjohnlev 
176356567907SDavid Edmondson 	oeid = xvdi_get_oeid(xnfp->xnf_devinfo);
1764843e1988Sjohnlev 
17659276b399SYuri Pankov 	for (mblk_t *ml = mp; ml != NULL; ml = ml->b_cont) {
17669276b399SYuri Pankov 		ddi_dma_handle_t dma_handle;
17678fa80b69SAndrew Stormont 		const ddi_dma_cookie_t *dma_cookie, *dma_cookie_prev;
176856567907SDavid Edmondson 		xnf_txbuf_t *txp;
176956567907SDavid Edmondson 
17709276b399SYuri Pankov 		if (MBLKL(ml) == 0)
17719276b399SYuri Pankov 			continue;
177256567907SDavid Edmondson 
1773*cfd17c15SJoshua M. Clulow 		if ((txp = xnf_data_txbuf_alloc(xnfp, KM_NOSLEEP)) == NULL) {
1774*cfd17c15SJoshua M. Clulow 			goto error;
1775*cfd17c15SJoshua M. Clulow 		}
177656567907SDavid Edmondson 
17779276b399SYuri Pankov 		if (head == NULL) {
17789276b399SYuri Pankov 			head = txp;
17799276b399SYuri Pankov 		} else {
17809276b399SYuri Pankov 			ASSERT(tail != NULL);
17819276b399SYuri Pankov 			TXBUF_SETNEXT(tail, txp);
17829276b399SYuri Pankov 			txp->tx_head = head;
178356567907SDavid Edmondson 		}
178456567907SDavid Edmondson 
1785843e1988Sjohnlev 		/*
17869276b399SYuri Pankov 		 * The necessary segmentation rules (e.g. not crossing a page
17879276b399SYuri Pankov 		 * boundary) are enforced by the dma attributes of the handle.
1788843e1988Sjohnlev 		 */
17899276b399SYuri Pankov 		dma_handle = txp->tx_dma_handle;
17909276b399SYuri Pankov 		int ret = ddi_dma_addr_bind_handle(dma_handle,
17919276b399SYuri Pankov 		    NULL, (char *)ml->b_rptr, MBLKL(ml),
17929276b399SYuri Pankov 		    DDI_DMA_WRITE | DDI_DMA_STREAMING,
17938fa80b69SAndrew Stormont 		    DDI_DMA_DONTWAIT, 0, NULL, NULL);
17949276b399SYuri Pankov 		if (ret != DDI_DMA_MAPPED) {
17959276b399SYuri Pankov 			if (ret != DDI_DMA_NORESOURCES) {
17969276b399SYuri Pankov 				dev_err(xnfp->xnf_devinfo, CE_WARN,
17979276b399SYuri Pankov 				    "ddi_dma_addr_bind_handle() failed "
17989276b399SYuri Pankov 				    "[dma_error=%d]", ret);
17999276b399SYuri Pankov 			}
18009276b399SYuri Pankov 			goto error;
1801843e1988Sjohnlev 		}
18029276b399SYuri Pankov 		txp->tx_handle_bound = B_TRUE;
18039276b399SYuri Pankov 
18048fa80b69SAndrew Stormont 		dma_cookie_prev = NULL;
18058fa80b69SAndrew Stormont 		while ((dma_cookie = ddi_dma_cookie_iter(dma_handle,
18068fa80b69SAndrew Stormont 		    dma_cookie_prev)) != NULL) {
18079276b399SYuri Pankov 			if (nsegs == XEN_MAX_TX_DATA_PAGES) {
18089276b399SYuri Pankov 				dev_err(xnfp->xnf_devinfo, CE_WARN,
18099276b399SYuri Pankov 				    "xnf_dmamap_alloc() failed: "
18109276b399SYuri Pankov 				    "too many segments");
18119276b399SYuri Pankov 				goto error;
181256567907SDavid Edmondson 			}
18138fa80b69SAndrew Stormont 			if (dma_cookie_prev != NULL) {
1814*cfd17c15SJoshua M. Clulow 				if ((txp = xnf_data_txbuf_alloc(xnfp,
1815*cfd17c15SJoshua M. Clulow 				    KM_NOSLEEP)) == NULL) {
1816*cfd17c15SJoshua M. Clulow 					goto error;
1817*cfd17c15SJoshua M. Clulow 				}
18189276b399SYuri Pankov 				ASSERT(tail != NULL);
18199276b399SYuri Pankov 				TXBUF_SETNEXT(tail, txp);
18209276b399SYuri Pankov 				txp->tx_head = head;
182156567907SDavid Edmondson 			}
182256567907SDavid Edmondson 
182356567907SDavid Edmondson 			txp->tx_mfn =
18248fa80b69SAndrew Stormont 			    xnf_btop(pa_to_ma(dma_cookie->dmac_laddress));
18259276b399SYuri Pankov 			txp->tx_txreq.gref = xnf_gref_get(xnfp);
182656567907SDavid Edmondson 			if (txp->tx_txreq.gref == INVALID_GRANT_REF) {
18279276b399SYuri Pankov 				dev_err(xnfp->xnf_devinfo, CE_WARN,
18289276b399SYuri Pankov 				    "xnf_dmamap_alloc() failed: "
18299276b399SYuri Pankov 				    "invalid grant ref");
18309276b399SYuri Pankov 				goto error;
183156567907SDavid Edmondson 			}
183256567907SDavid Edmondson 			gnttab_grant_foreign_access_ref(txp->tx_txreq.gref,
183356567907SDavid Edmondson 			    oeid, txp->tx_mfn, 1);
18349276b399SYuri Pankov 			txp->tx_txreq.offset =
18358fa80b69SAndrew Stormont 			    dma_cookie->dmac_laddress & PAGEOFFSET;
18368fa80b69SAndrew Stormont 			txp->tx_txreq.size = dma_cookie->dmac_size;
18379276b399SYuri Pankov 			txp->tx_txreq.flags = 0;
183856567907SDavid Edmondson 
18399276b399SYuri Pankov 			nsegs++;
184056567907SDavid Edmondson 
18419276b399SYuri Pankov 			if (tail != NULL)
18429276b399SYuri Pankov 				tail->tx_txreq.flags = NETTXF_more_data;
18439276b399SYuri Pankov 			tail = txp;
18448fa80b69SAndrew Stormont 
18458fa80b69SAndrew Stormont 			dma_cookie_prev = dma_cookie;
184656567907SDavid Edmondson 		}
18479276b399SYuri Pankov 	}
184856567907SDavid Edmondson 
18499276b399SYuri Pankov 	*countp = nsegs;
18509276b399SYuri Pankov 	return (head);
185156567907SDavid Edmondson 
18529276b399SYuri Pankov error:
18539276b399SYuri Pankov 	xnf_data_txbuf_free_chain(xnfp, head);
18549276b399SYuri Pankov 	return (NULL);
18559276b399SYuri Pankov }
1856843e1988Sjohnlev 
18579276b399SYuri Pankov static void
xnf_tx_setup_offload(xnf_t * xnfp,xnf_txbuf_t * head,uint32_t cksum_flags,uint32_t lso_flags,uint32_t mss)18589276b399SYuri Pankov xnf_tx_setup_offload(xnf_t *xnfp, xnf_txbuf_t *head,
18599276b399SYuri Pankov     uint32_t cksum_flags, uint32_t lso_flags, uint32_t mss)
18609276b399SYuri Pankov {
18619276b399SYuri Pankov 	if (lso_flags != 0) {
18629276b399SYuri Pankov 		ASSERT3U(lso_flags, ==, HW_LSO);
18639276b399SYuri Pankov 		ASSERT3P(head->tx_bdesc, ==, NULL);
18649276b399SYuri Pankov 
18659276b399SYuri Pankov 		head->tx_txreq.flags |= NETTXF_extra_info;
18669276b399SYuri Pankov 		netif_extra_info_t *extra = &head->tx_extra;
18679276b399SYuri Pankov 		extra->type = XEN_NETIF_EXTRA_TYPE_GSO;
18689276b399SYuri Pankov 		extra->flags = 0;
18699276b399SYuri Pankov 		extra->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4;
18709276b399SYuri Pankov 		extra->u.gso.size = mss;
18719276b399SYuri Pankov 		extra->u.gso.features = 0;
18729276b399SYuri Pankov 		extra->u.gso.pad = 0;
18739276b399SYuri Pankov 	} else if (cksum_flags != 0) {
18749276b399SYuri Pankov 		ASSERT3U(cksum_flags, ==, HCK_FULLCKSUM);
1875843e1988Sjohnlev 		/*
18769276b399SYuri Pankov 		 * If the local protocol stack requests checksum
18779276b399SYuri Pankov 		 * offload we set the 'checksum blank' flag,
18789276b399SYuri Pankov 		 * indicating to the peer that we need the checksum
18799276b399SYuri Pankov 		 * calculated for us.
18809276b399SYuri Pankov 		 *
18819276b399SYuri Pankov 		 * We _don't_ set the validated flag, because we haven't
18829276b399SYuri Pankov 		 * validated that the data and the checksum match.
18839276b399SYuri Pankov 		 *
18849276b399SYuri Pankov 		 * Note: we already called xnf_pseudo_cksum() in
18859276b399SYuri Pankov 		 * xnf_send(), so we just set the txreq flag here.
1886843e1988Sjohnlev 		 */
18879276b399SYuri Pankov 		head->tx_txreq.flags |= NETTXF_csum_blank;
18889276b399SYuri Pankov 		xnfp->xnf_stat_tx_cksum_deferred++;
1889843e1988Sjohnlev 	}
18909276b399SYuri Pankov }
1891843e1988Sjohnlev 
18929276b399SYuri Pankov /*
18939276b399SYuri Pankov  * Send packet mp. Called by the MAC framework.
18949276b399SYuri Pankov  */
18959276b399SYuri Pankov static mblk_t *
xnf_send(void * arg,mblk_t * mp)18969276b399SYuri Pankov xnf_send(void *arg, mblk_t *mp)
18979276b399SYuri Pankov {
18989276b399SYuri Pankov 	xnf_t *xnfp = arg;
18999276b399SYuri Pankov 	xnf_txbuf_t *head;
19009276b399SYuri Pankov 	mblk_t *ml;
19019276b399SYuri Pankov 	int length;
19029276b399SYuri Pankov 	int pages, chunks, slots, slots_free;
19039276b399SYuri Pankov 	uint32_t cksum_flags, lso_flags, mss;
19049276b399SYuri Pankov 	boolean_t pulledup = B_FALSE;
19059276b399SYuri Pankov 	boolean_t force_copy = B_FALSE;
1906843e1988Sjohnlev 
19079276b399SYuri Pankov 	ASSERT3P(mp->b_next, ==, NULL);
1908843e1988Sjohnlev 
19099276b399SYuri Pankov 	mutex_enter(&xnfp->xnf_txlock);
19109276b399SYuri Pankov 
19119276b399SYuri Pankov 	/*
19129276b399SYuri Pankov 	 * Wait until we are connected to the backend.
19139276b399SYuri Pankov 	 */
19149276b399SYuri Pankov 	while (!xnfp->xnf_connected)
19159276b399SYuri Pankov 		cv_wait(&xnfp->xnf_cv_state, &xnfp->xnf_txlock);
19169276b399SYuri Pankov 
19179276b399SYuri Pankov 	/*
19189276b399SYuri Pankov 	 * To simplify logic and be in sync with the rescheduling mechanism,
19199276b399SYuri Pankov 	 * we require the maximum amount of slots that could be used by a
19209276b399SYuri Pankov 	 * transaction to be free before proceeding. The only downside of doing
19219276b399SYuri Pankov 	 * this is that it slightly reduces the effective size of the ring.
19229276b399SYuri Pankov 	 */
19239276b399SYuri Pankov 	slots_free = xnf_tx_slots_get(xnfp, XEN_MAX_SLOTS_PER_TX, B_FALSE);
19249276b399SYuri Pankov 	if (slots_free < XEN_MAX_SLOTS_PER_TX) {
19259276b399SYuri Pankov 		/*
19269276b399SYuri Pankov 		 * We need to ask for a re-schedule later as the ring is full.
19279276b399SYuri Pankov 		 */
19289276b399SYuri Pankov 		mutex_enter(&xnfp->xnf_schedlock);
19299276b399SYuri Pankov 		xnfp->xnf_need_sched = B_TRUE;
19309276b399SYuri Pankov 		mutex_exit(&xnfp->xnf_schedlock);
1931843e1988Sjohnlev 
19329276b399SYuri Pankov 		xnfp->xnf_stat_tx_defer++;
19339276b399SYuri Pankov 		mutex_exit(&xnfp->xnf_txlock);
19349276b399SYuri Pankov 		return (mp);
193556567907SDavid Edmondson 	}
1936843e1988Sjohnlev 
1937843e1988Sjohnlev 	/*
19389276b399SYuri Pankov 	 * Get hw offload parameters.
19399276b399SYuri Pankov 	 * This must be done before pulling up the mp as those parameters
19409276b399SYuri Pankov 	 * are not copied over.
1941843e1988Sjohnlev 	 */
19429276b399SYuri Pankov 	mac_hcksum_get(mp, NULL, NULL, NULL, NULL, &cksum_flags);
19439276b399SYuri Pankov 	mac_lso_get(mp, &mss, &lso_flags);
1944843e1988Sjohnlev 
1945843e1988Sjohnlev 	/*
19469276b399SYuri Pankov 	 * XXX: fix MAC framework so that we can advertise support for
19479276b399SYuri Pankov 	 * partial checksum for IPv4 only. This way we won't need to calculate
19489276b399SYuri Pankov 	 * the pseudo header checksum ourselves.
1949843e1988Sjohnlev 	 */
19509276b399SYuri Pankov 	if (cksum_flags != 0) {
19519276b399SYuri Pankov 		ASSERT3U(cksum_flags, ==, HCK_FULLCKSUM);
19529276b399SYuri Pankov 		(void) xnf_pseudo_cksum(mp);
19539276b399SYuri Pankov 	}
1954843e1988Sjohnlev 
19559276b399SYuri Pankov pulledup:
19569276b399SYuri Pankov 	for (ml = mp, pages = 0, chunks = 0, length = 0; ml != NULL;
19579276b399SYuri Pankov 	    ml = ml->b_cont, chunks++) {
19589276b399SYuri Pankov 		pages += xnf_mblk_pages(ml);
19599276b399SYuri Pankov 		length += MBLKL(ml);
19609276b399SYuri Pankov 	}
19619276b399SYuri Pankov 	DTRACE_PROBE3(packet, int, length, int, chunks, int, pages);
19629276b399SYuri Pankov 	DTRACE_PROBE3(lso, int, length, uint32_t, lso_flags, uint32_t, mss);
1963843e1988Sjohnlev 
19649276b399SYuri Pankov 	/*
19659276b399SYuri Pankov 	 * If the ethernet header crosses a page boundary the packet
19669276b399SYuri Pankov 	 * will be dropped by the backend. In practice it seems like
19679276b399SYuri Pankov 	 * this happens fairly rarely so we'll do nothing unless the
19689276b399SYuri Pankov 	 * packet is small enough to fit in a look-aside buffer.
19699276b399SYuri Pankov 	 */
19709276b399SYuri Pankov 	if (((uintptr_t)mp->b_rptr & PAGEOFFSET) +
19719276b399SYuri Pankov 	    sizeof (struct ether_header) > PAGESIZE) {
19729276b399SYuri Pankov 		xnfp->xnf_stat_tx_eth_hdr_split++;
19739276b399SYuri Pankov 		if (length <= PAGESIZE)
19749276b399SYuri Pankov 			force_copy = B_TRUE;
19759276b399SYuri Pankov 	}
1976843e1988Sjohnlev 
19779276b399SYuri Pankov 	if (force_copy || (pages > 1 && !xnfp->xnf_be_tx_sg)) {
19789276b399SYuri Pankov 		/*
19799276b399SYuri Pankov 		 * If the packet spans several pages and scatter-gather is not
19809276b399SYuri Pankov 		 * supported then use a look-aside buffer.
19819276b399SYuri Pankov 		 */
19829276b399SYuri Pankov 		ASSERT3U(length, <=, PAGESIZE);
19839276b399SYuri Pankov 		head = xnf_mblk_copy(xnfp, mp);
19849276b399SYuri Pankov 		if (head == NULL) {
19859276b399SYuri Pankov 			dev_err(xnfp->xnf_devinfo, CE_WARN,
19869276b399SYuri Pankov 			    "xnf_mblk_copy() failed");
19879276b399SYuri Pankov 			goto drop;
19889276b399SYuri Pankov 		}
19899276b399SYuri Pankov 	} else {
19909276b399SYuri Pankov 		/*
19919276b399SYuri Pankov 		 * There's a limit for how many pages can be passed to the
19929276b399SYuri Pankov 		 * backend. If we pass that limit, the packet will be dropped
19939276b399SYuri Pankov 		 * and some backend implementations (e.g. Linux) could even
19949276b399SYuri Pankov 		 * offline the interface.
19959276b399SYuri Pankov 		 */
19969276b399SYuri Pankov 		if (pages > XEN_MAX_TX_DATA_PAGES) {
19979276b399SYuri Pankov 			if (pulledup) {
19989276b399SYuri Pankov 				dev_err(xnfp->xnf_devinfo, CE_WARN,
19999276b399SYuri Pankov 				    "too many pages, even after pullup: %d.",
20009276b399SYuri Pankov 				    pages);
20019276b399SYuri Pankov 				goto drop;
20029276b399SYuri Pankov 			}
2003843e1988Sjohnlev 
20049276b399SYuri Pankov 			/*
20059276b399SYuri Pankov 			 * Defragment packet if it spans too many pages.
20069276b399SYuri Pankov 			 */
20079276b399SYuri Pankov 			mblk_t *newmp = msgpullup(mp, -1);
2008*cfd17c15SJoshua M. Clulow 			if (newmp == NULL) {
2009*cfd17c15SJoshua M. Clulow 				dev_err(xnfp->xnf_devinfo, CE_WARN,
2010*cfd17c15SJoshua M. Clulow 				    "msgpullup() failed");
2011*cfd17c15SJoshua M. Clulow 				goto drop;
2012*cfd17c15SJoshua M. Clulow 			}
2013*cfd17c15SJoshua M. Clulow 
20149276b399SYuri Pankov 			freemsg(mp);
20159276b399SYuri Pankov 			mp = newmp;
20169276b399SYuri Pankov 			xnfp->xnf_stat_tx_pullup++;
20179276b399SYuri Pankov 			pulledup = B_TRUE;
20189276b399SYuri Pankov 			goto pulledup;
201956567907SDavid Edmondson 		}
202066f1a35aSschuster 
20219276b399SYuri Pankov 		head = xnf_mblk_map(xnfp, mp, &slots);
20229276b399SYuri Pankov 		if (head == NULL)
20239276b399SYuri Pankov 			goto drop;
20249276b399SYuri Pankov 
20259276b399SYuri Pankov 		IMPLY(slots > 1, xnfp->xnf_be_tx_sg);
202666f1a35aSschuster 	}
2027843e1988Sjohnlev 
202856567907SDavid Edmondson 	/*
20299276b399SYuri Pankov 	 * Set tx_mp so that mblk is freed when the txbuf chain is freed.
203056567907SDavid Edmondson 	 */
20319276b399SYuri Pankov 	head->tx_mp = mp;
203264c5e63cSDavid Edmondson 
20339276b399SYuri Pankov 	xnf_tx_setup_offload(xnfp, head, cksum_flags, lso_flags, mss);
20349276b399SYuri Pankov 
20359276b399SYuri Pankov 	/*
20369276b399SYuri Pankov 	 * The first request must store the total length of the packet.
20379276b399SYuri Pankov 	 */
20389276b399SYuri Pankov 	head->tx_txreq.size = length;
20399276b399SYuri Pankov 
20409276b399SYuri Pankov 	/*
20419276b399SYuri Pankov 	 * Push the packet we have prepared into the ring.
20429276b399SYuri Pankov 	 */
20439276b399SYuri Pankov 	xnf_tx_push_packet(xnfp, head);
20449276b399SYuri Pankov 	xnfp->xnf_stat_opackets++;
20459276b399SYuri Pankov 	xnfp->xnf_stat_obytes += length;
20469276b399SYuri Pankov 
20479276b399SYuri Pankov 	mutex_exit(&xnfp->xnf_txlock);
20489276b399SYuri Pankov 	return (NULL);
2049843e1988Sjohnlev 
20509276b399SYuri Pankov drop:
20519276b399SYuri Pankov 	freemsg(mp);
20529276b399SYuri Pankov 	xnfp->xnf_stat_tx_drop++;
20539276b399SYuri Pankov 	mutex_exit(&xnfp->xnf_txlock);
20549276b399SYuri Pankov 	return (NULL);
2055843e1988Sjohnlev }
2056843e1988Sjohnlev 
2057843e1988Sjohnlev /*
205856567907SDavid Edmondson  * Notification of RX packets. Currently no TX-complete interrupt is
205956567907SDavid Edmondson  * used, as we clean the TX ring lazily.
2060843e1988Sjohnlev  */
2061843e1988Sjohnlev static uint_t
xnf_intr(caddr_t arg)2062843e1988Sjohnlev xnf_intr(caddr_t arg)
2063843e1988Sjohnlev {
2064843e1988Sjohnlev 	xnf_t *xnfp = (xnf_t *)arg;
206556567907SDavid Edmondson 	mblk_t *mp;
206656567907SDavid Edmondson 	boolean_t need_sched, clean_ring;
2067843e1988Sjohnlev 
206856567907SDavid Edmondson 	mutex_enter(&xnfp->xnf_rxlock);
2069843e1988Sjohnlev 
207056567907SDavid Edmondson 	/*
207156567907SDavid Edmondson 	 * Interrupts before we are connected are spurious.
207256567907SDavid Edmondson 	 */
2073a390c5f4Scz 	if (!xnfp->xnf_connected) {
207456567907SDavid Edmondson 		mutex_exit(&xnfp->xnf_rxlock);
2075551bc2a6Smrj 		xnfp->xnf_stat_unclaimed_interrupts++;
2076843e1988Sjohnlev 		return (DDI_INTR_UNCLAIMED);
2077843e1988Sjohnlev 	}
2078843e1988Sjohnlev 
207956567907SDavid Edmondson 	/*
208056567907SDavid Edmondson 	 * Receive side processing.
208156567907SDavid Edmondson 	 */
208256567907SDavid Edmondson 	do {
208356567907SDavid Edmondson 		/*
208456567907SDavid Edmondson 		 * Collect buffers from the ring.
208556567907SDavid Edmondson 		 */
208656567907SDavid Edmondson 		xnf_rx_collect(xnfp);
2087843e1988Sjohnlev 
208856567907SDavid Edmondson 		/*
208956567907SDavid Edmondson 		 * Interrupt me when the next receive buffer is consumed.
209056567907SDavid Edmondson 		 */
209156567907SDavid Edmondson 		xnfp->xnf_rx_ring.sring->rsp_event =
209256567907SDavid Edmondson 		    xnfp->xnf_rx_ring.rsp_cons + 1;
209356567907SDavid Edmondson 		xen_mb();
209456567907SDavid Edmondson 
209556567907SDavid Edmondson 	} while (RING_HAS_UNCONSUMED_RESPONSES(&xnfp->xnf_rx_ring));
2096551bc2a6Smrj 
209756567907SDavid Edmondson 	if (xnfp->xnf_rx_new_buffers_posted) {
209856567907SDavid Edmondson 		boolean_t notify;
209956567907SDavid Edmondson 
210056567907SDavid Edmondson 		/*
210156567907SDavid Edmondson 		 * Indicate to the peer that we have re-filled the
210256567907SDavid Edmondson 		 * receive ring, if it cares.
210356567907SDavid Edmondson 		 */
210456567907SDavid Edmondson 		/* LINTED: constant in conditional context */
210556567907SDavid Edmondson 		RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&xnfp->xnf_rx_ring, notify);
210656567907SDavid Edmondson 		if (notify)
210756567907SDavid Edmondson 			ec_notify_via_evtchn(xnfp->xnf_evtchn);
210856567907SDavid Edmondson 		xnfp->xnf_rx_new_buffers_posted = B_FALSE;
2109843e1988Sjohnlev 	}
2110843e1988Sjohnlev 
211156567907SDavid Edmondson 	mp = xnfp->xnf_rx_head;
211256567907SDavid Edmondson 	xnfp->xnf_rx_head = xnfp->xnf_rx_tail = NULL;
211356567907SDavid Edmondson 
211464c5e63cSDavid Edmondson 	xnfp->xnf_stat_interrupts++;
211556567907SDavid Edmondson 	mutex_exit(&xnfp->xnf_rxlock);
211656567907SDavid Edmondson 
211756567907SDavid Edmondson 	if (mp != NULL)
211856567907SDavid Edmondson 		mac_rx(xnfp->xnf_mh, NULL, mp);
211964c5e63cSDavid Edmondson 
2120843e1988Sjohnlev 	/*
212156567907SDavid Edmondson 	 * Transmit side processing.
212256567907SDavid Edmondson 	 *
212356567907SDavid Edmondson 	 * If a previous transmit attempt failed or we have pending
212456567907SDavid Edmondson 	 * multicast requests, clean the ring.
212556567907SDavid Edmondson 	 *
212656567907SDavid Edmondson 	 * If we previously stalled transmission and cleaning produces
212756567907SDavid Edmondson 	 * some free slots, tell upstream to attempt sending again.
212856567907SDavid Edmondson 	 *
212956567907SDavid Edmondson 	 * The odd style is to avoid acquiring xnf_txlock unless we
213056567907SDavid Edmondson 	 * will actually look inside the tx machinery.
2131843e1988Sjohnlev 	 */
213256567907SDavid Edmondson 	mutex_enter(&xnfp->xnf_schedlock);
213356567907SDavid Edmondson 	need_sched = xnfp->xnf_need_sched;
213456567907SDavid Edmondson 	clean_ring = need_sched || (xnfp->xnf_pending_multicast > 0);
213556567907SDavid Edmondson 	mutex_exit(&xnfp->xnf_schedlock);
213664c5e63cSDavid Edmondson 
213756567907SDavid Edmondson 	if (clean_ring) {
213856567907SDavid Edmondson 		int free_slots;
213956567907SDavid Edmondson 
214056567907SDavid Edmondson 		mutex_enter(&xnfp->xnf_txlock);
21419276b399SYuri Pankov 		free_slots = xnf_tx_slots_get(xnfp, 0, B_FALSE);
214256567907SDavid Edmondson 
21439276b399SYuri Pankov 		if (need_sched && (free_slots >= XEN_MAX_SLOTS_PER_TX)) {
214456567907SDavid Edmondson 			mutex_enter(&xnfp->xnf_schedlock);
214556567907SDavid Edmondson 			xnfp->xnf_need_sched = B_FALSE;
214656567907SDavid Edmondson 			mutex_exit(&xnfp->xnf_schedlock);
214756567907SDavid Edmondson 
214856567907SDavid Edmondson 			mac_tx_update(xnfp->xnf_mh);
214956567907SDavid Edmondson 		}
215056567907SDavid Edmondson 		mutex_exit(&xnfp->xnf_txlock);
215156567907SDavid Edmondson 	}
2152843e1988Sjohnlev 
215364c5e63cSDavid Edmondson 	return (DDI_INTR_CLAIMED);
2154843e1988Sjohnlev }
2155843e1988Sjohnlev 
2156843e1988Sjohnlev /*
2157843e1988Sjohnlev  *  xnf_start() -- start the board receiving and enable interrupts.
2158843e1988Sjohnlev  */
2159843e1988Sjohnlev static int
xnf_start(void * arg)2160843e1988Sjohnlev xnf_start(void *arg)
2161843e1988Sjohnlev {
2162843e1988Sjohnlev 	xnf_t *xnfp = arg;
2163843e1988Sjohnlev 
216456567907SDavid Edmondson 	mutex_enter(&xnfp->xnf_rxlock);
2165551bc2a6Smrj 	mutex_enter(&xnfp->xnf_txlock);
2166843e1988Sjohnlev 
2167843e1988Sjohnlev 	/* Accept packets from above. */
2168551bc2a6Smrj 	xnfp->xnf_running = B_TRUE;
2169843e1988Sjohnlev 
2170551bc2a6Smrj 	mutex_exit(&xnfp->xnf_txlock);
217156567907SDavid Edmondson 	mutex_exit(&xnfp->xnf_rxlock);
2172843e1988Sjohnlev 
2173843e1988Sjohnlev 	return (0);
2174843e1988Sjohnlev }
2175843e1988Sjohnlev 
2176843e1988Sjohnlev /* xnf_stop() - disable hardware */
2177843e1988Sjohnlev static void
xnf_stop(void * arg)2178843e1988Sjohnlev xnf_stop(void *arg)
2179843e1988Sjohnlev {
2180843e1988Sjohnlev 	xnf_t *xnfp = arg;
2181843e1988Sjohnlev 
218256567907SDavid Edmondson 	mutex_enter(&xnfp->xnf_rxlock);
2183551bc2a6Smrj 	mutex_enter(&xnfp->xnf_txlock);
2184843e1988Sjohnlev 
2185551bc2a6Smrj 	xnfp->xnf_running = B_FALSE;
2186843e1988Sjohnlev 
2187551bc2a6Smrj 	mutex_exit(&xnfp->xnf_txlock);
218856567907SDavid Edmondson 	mutex_exit(&xnfp->xnf_rxlock);
2189843e1988Sjohnlev }
2190843e1988Sjohnlev 
2191843e1988Sjohnlev /*
219256567907SDavid Edmondson  * Hang buffer `bdesc' on the RX ring.
2193843e1988Sjohnlev  */
2194843e1988Sjohnlev static void
xnf_rxbuf_hang(xnf_t * xnfp,xnf_buf_t * bdesc)219556567907SDavid Edmondson xnf_rxbuf_hang(xnf_t *xnfp, xnf_buf_t *bdesc)
2196843e1988Sjohnlev {
219756567907SDavid Edmondson 	netif_rx_request_t *reqp;
219856567907SDavid Edmondson 	RING_IDX hang_ix;
2199843e1988Sjohnlev 
220056567907SDavid Edmondson 	ASSERT(MUTEX_HELD(&xnfp->xnf_rxlock));
2201843e1988Sjohnlev 
2202551bc2a6Smrj 	reqp = RING_GET_REQUEST(&xnfp->xnf_rx_ring,
2203551bc2a6Smrj 	    xnfp->xnf_rx_ring.req_prod_pvt);
2204551bc2a6Smrj 	hang_ix = (RING_IDX) (reqp - RING_GET_REQUEST(&xnfp->xnf_rx_ring, 0));
220556567907SDavid Edmondson 	ASSERT(xnfp->xnf_rx_pkt_info[hang_ix] == NULL);
220656567907SDavid Edmondson 
220756567907SDavid Edmondson 	reqp->id = bdesc->id = hang_ix;
2208843e1988Sjohnlev 	reqp->gref = bdesc->grant_ref;
220956567907SDavid Edmondson 
221056567907SDavid Edmondson 	xnfp->xnf_rx_pkt_info[hang_ix] = bdesc;
2211551bc2a6Smrj 	xnfp->xnf_rx_ring.req_prod_pvt++;
221256567907SDavid Edmondson 
221356567907SDavid Edmondson 	xnfp->xnf_rx_new_buffers_posted = B_TRUE;
2214843e1988Sjohnlev }
2215843e1988Sjohnlev 
221656567907SDavid Edmondson /*
22179276b399SYuri Pankov  * Receive an entire packet from the ring, starting from slot *consp.
22189276b399SYuri Pankov  * prod indicates the slot of the latest response.
22199276b399SYuri Pankov  * On return, *consp will point to the head of the next packet.
22209276b399SYuri Pankov  *
22219276b399SYuri Pankov  * Note: If slot prod was reached before we could gather a full packet, we will
22229276b399SYuri Pankov  * drop the partial packet; this would most likely indicate a bug in either
22239276b399SYuri Pankov  * the front-end or the back-end driver.
22249276b399SYuri Pankov  *
22259276b399SYuri Pankov  * An rx packet can consist of several fragments and thus span multiple slots.
22269276b399SYuri Pankov  * Each fragment can contain up to 4k of data.
22279276b399SYuri Pankov  *
22289276b399SYuri Pankov  * A typical 9000 MTU packet with look like this:
22299276b399SYuri Pankov  * +------+---------------------+-------------------+-----------------------+
22309276b399SYuri Pankov  * | SLOT | TYPE                | CONTENTS          | FLAGS                 |
22319276b399SYuri Pankov  * +------+---------------------+-------------------+-----------------------+
22329276b399SYuri Pankov  * | 1    | netif_rx_response_t | 1st data fragment | more_data             |
22339276b399SYuri Pankov  * +------+---------------------+-------------------+-----------------------+
22349276b399SYuri Pankov  * | 2    | netif_rx_response_t | 2nd data fragment | more_data             |
22359276b399SYuri Pankov  * +------+---------------------+-------------------+-----------------------+
22369276b399SYuri Pankov  * | 3    | netif_rx_response_t | 3rd data fragment | [none]                |
22379276b399SYuri Pankov  * +------+---------------------+-------------------+-----------------------+
22389276b399SYuri Pankov  *
22399276b399SYuri Pankov  * Fragments are chained by setting NETRXF_more_data in the previous
22409276b399SYuri Pankov  * response's flags. If there are additional flags, such as
22419276b399SYuri Pankov  * NETRXF_data_validated or NETRXF_extra_info, those should be set on the
22429276b399SYuri Pankov  * first fragment.
22439276b399SYuri Pankov  *
22449276b399SYuri Pankov  * Sometimes extra info can be present. If so, it will follow the first
22459276b399SYuri Pankov  * fragment, and NETRXF_extra_info flag will be set on the first response.
22469276b399SYuri Pankov  * If LRO is set on a packet, it will be stored in the extra info. Conforming
22479276b399SYuri Pankov  * to the spec, extra info can also be chained, but must all be present right
22489276b399SYuri Pankov  * after the first fragment.
22499276b399SYuri Pankov  *
22509276b399SYuri Pankov  * Example of a packet with 2 extra infos:
22519276b399SYuri Pankov  * +------+---------------------+-------------------+-----------------------+
22529276b399SYuri Pankov  * | SLOT | TYPE                | CONTENTS          | FLAGS                 |
22539276b399SYuri Pankov  * +------+---------------------+-------------------+-----------------------+
22549276b399SYuri Pankov  * | 1    | netif_rx_response_t | 1st data fragment | extra_info, more_data |
22559276b399SYuri Pankov  * +------+---------------------+-------------------+-----------------------+
22569276b399SYuri Pankov  * | 2    | netif_extra_info_t  | 1st extra info    | EXTRA_FLAG_MORE       |
22579276b399SYuri Pankov  * +------+---------------------+-------------------+-----------------------+
22589276b399SYuri Pankov  * | 3    | netif_extra_info_t  | 2nd extra info    | [none]                |
22599276b399SYuri Pankov  * +------+---------------------+-------------------+-----------------------+
22609276b399SYuri Pankov  * | 4    | netif_rx_response_t | 2nd data fragment | more_data             |
22619276b399SYuri Pankov  * +------+---------------------+-------------------+-----------------------+
22629276b399SYuri Pankov  * | 5    | netif_rx_response_t | 3rd data fragment | more_data             |
22639276b399SYuri Pankov  * +------+---------------------+-------------------+-----------------------+
22649276b399SYuri Pankov  * | 6    | netif_rx_response_t | 4th data fragment | [none]                |
22659276b399SYuri Pankov  * +------+---------------------+-------------------+-----------------------+
22669276b399SYuri Pankov  *
22679276b399SYuri Pankov  * In practice, the only extra we expect is for LRO, but only if we advertise
22689276b399SYuri Pankov  * that we support it to the backend (xnf_enable_lro == TRUE).
226956567907SDavid Edmondson  */
22709276b399SYuri Pankov static int
xnf_rx_one_packet(xnf_t * xnfp,RING_IDX prod,RING_IDX * consp,mblk_t ** mpp)22719276b399SYuri Pankov xnf_rx_one_packet(xnf_t *xnfp, RING_IDX prod, RING_IDX *consp, mblk_t **mpp)
2272551bc2a6Smrj {
22739276b399SYuri Pankov 	mblk_t *head = NULL;
22749276b399SYuri Pankov 	mblk_t *tail = NULL;
22759276b399SYuri Pankov 	mblk_t *mp;
22769276b399SYuri Pankov 	int error = 0;
22779276b399SYuri Pankov 	RING_IDX cons = *consp;
22789276b399SYuri Pankov 	netif_extra_info_t lro;
22799276b399SYuri Pankov 	boolean_t is_lro = B_FALSE;
22809276b399SYuri Pankov 	boolean_t is_extra = B_FALSE;
228156567907SDavid Edmondson 
22829276b399SYuri Pankov 	netif_rx_response_t rsp = *RING_GET_RESPONSE(&xnfp->xnf_rx_ring, cons);
2283551bc2a6Smrj 
22849276b399SYuri Pankov 	boolean_t hwcsum = (rsp.flags & NETRXF_data_validated) != 0;
22859276b399SYuri Pankov 	boolean_t more_data = (rsp.flags & NETRXF_more_data) != 0;
22869276b399SYuri Pankov 	boolean_t more_extra = (rsp.flags & NETRXF_extra_info) != 0;
2287551bc2a6Smrj 
22889276b399SYuri Pankov 	IMPLY(more_data, xnf_enable_rx_sg);
2289551bc2a6Smrj 
22909276b399SYuri Pankov 	while (cons != prod) {
229156567907SDavid Edmondson 		xnf_buf_t *bdesc;
22929276b399SYuri Pankov 		int len, off;
22939276b399SYuri Pankov 		int rxidx = cons & (NET_RX_RING_SIZE - 1);
22949276b399SYuri Pankov 
22959276b399SYuri Pankov 		bdesc = xnfp->xnf_rx_pkt_info[rxidx];
22969276b399SYuri Pankov 		xnfp->xnf_rx_pkt_info[rxidx] = NULL;
22979276b399SYuri Pankov 
22989276b399SYuri Pankov 		if (is_extra) {
22999276b399SYuri Pankov 			netif_extra_info_t *extra = (netif_extra_info_t *)&rsp;
23009276b399SYuri Pankov 			/*
23019276b399SYuri Pankov 			 * The only extra we expect is for LRO, and it should
23029276b399SYuri Pankov 			 * only be present once.
23039276b399SYuri Pankov 			 */
23049276b399SYuri Pankov 			if (extra->type == XEN_NETIF_EXTRA_TYPE_GSO &&
23059276b399SYuri Pankov 			    !is_lro) {
23069276b399SYuri Pankov 				ASSERT(xnf_enable_lro);
23079276b399SYuri Pankov 				lro = *extra;
23089276b399SYuri Pankov 				is_lro = B_TRUE;
23099276b399SYuri Pankov 				DTRACE_PROBE1(lro, netif_extra_info_t *, &lro);
23109276b399SYuri Pankov 			} else {
23119276b399SYuri Pankov 				dev_err(xnfp->xnf_devinfo, CE_WARN, "rx packet "
23129276b399SYuri Pankov 				    "contains unexpected extra info of type %d",
23139276b399SYuri Pankov 				    extra->type);
23149276b399SYuri Pankov 				error = EINVAL;
23159276b399SYuri Pankov 			}
23169276b399SYuri Pankov 			more_extra =
23179276b399SYuri Pankov 			    (extra->flags & XEN_NETIF_EXTRA_FLAG_MORE) != 0;
2318551bc2a6Smrj 
23199276b399SYuri Pankov 			goto hang_buf;
23209276b399SYuri Pankov 		}
2321551bc2a6Smrj 
23229276b399SYuri Pankov 		ASSERT3U(bdesc->id, ==, rsp.id);
2323843e1988Sjohnlev 
2324843e1988Sjohnlev 		/*
23259276b399SYuri Pankov 		 * status stores packet length when >= 0, or errors when < 0.
232656567907SDavid Edmondson 		 */
23279276b399SYuri Pankov 		len = rsp.status;
23289276b399SYuri Pankov 		off = rsp.offset;
23299276b399SYuri Pankov 		more_data = (rsp.flags & NETRXF_more_data) != 0;
233056567907SDavid Edmondson 
233156567907SDavid Edmondson 		/*
23329276b399SYuri Pankov 		 * sanity checks.
2333843e1988Sjohnlev 		 */
2334a390c5f4Scz 		if (!xnfp->xnf_running) {
23359276b399SYuri Pankov 			error = EBUSY;
233656567907SDavid Edmondson 		} else if (len <= 0) {
2337551bc2a6Smrj 			xnfp->xnf_stat_errrx++;
233856567907SDavid Edmondson 
233956567907SDavid Edmondson 			switch (len) {
234056567907SDavid Edmondson 			case 0:
2341551bc2a6Smrj 				xnfp->xnf_stat_runt++;
234256567907SDavid Edmondson 				break;
234356567907SDavid Edmondson 			case NETIF_RSP_ERROR:
2344551bc2a6Smrj 				xnfp->xnf_stat_mac_rcv_error++;
234556567907SDavid Edmondson 				break;
234656567907SDavid Edmondson 			case NETIF_RSP_DROPPED:
2347551bc2a6Smrj 				xnfp->xnf_stat_norxbuf++;
234856567907SDavid Edmondson 				break;
234956567907SDavid Edmondson 			}
23509276b399SYuri Pankov 			error = EINVAL;
235156567907SDavid Edmondson 		} else if (bdesc->grant_ref == INVALID_GRANT_REF) {
23529276b399SYuri Pankov 			dev_err(xnfp->xnf_devinfo, CE_WARN,
23539276b399SYuri Pankov 			    "Bad rx grant reference, rsp id %d", rsp.id);
23549276b399SYuri Pankov 			error = EINVAL;
235556567907SDavid Edmondson 		} else if ((off + len) > PAGESIZE) {
23569276b399SYuri Pankov 			dev_err(xnfp->xnf_devinfo, CE_WARN, "Rx packet crosses "
23579276b399SYuri Pankov 			    "page boundary (offset %d, length %d)", off, len);
23589276b399SYuri Pankov 			error = EINVAL;
23599276b399SYuri Pankov 		}
236056567907SDavid Edmondson 
23619276b399SYuri Pankov 		if (error != 0) {
23629276b399SYuri Pankov 			/*
23639276b399SYuri Pankov 			 * If an error has been detected, we do not attempt
23649276b399SYuri Pankov 			 * to read the data but we still need to replace
23659276b399SYuri Pankov 			 * the rx bufs.
23669276b399SYuri Pankov 			 */
23679276b399SYuri Pankov 			goto hang_buf;
23689276b399SYuri Pankov 		}
2369843e1988Sjohnlev 
23709276b399SYuri Pankov 		xnf_buf_t *nbuf = NULL;
23719276b399SYuri Pankov 
23729276b399SYuri Pankov 		/*
23739276b399SYuri Pankov 		 * If the packet is below a pre-determined size we will
23749276b399SYuri Pankov 		 * copy data out of the buf rather than replace it.
23759276b399SYuri Pankov 		 */
23769276b399SYuri Pankov 		if (len > xnf_rx_copy_limit)
23779276b399SYuri Pankov 			nbuf = xnf_buf_get(xnfp, KM_NOSLEEP, B_FALSE);
2378551bc2a6Smrj 
23799276b399SYuri Pankov 		if (nbuf != NULL) {
23809276b399SYuri Pankov 			mp = desballoc((unsigned char *)bdesc->buf,
23819276b399SYuri Pankov 			    bdesc->len, 0, &bdesc->free_rtn);
23829276b399SYuri Pankov 
23839276b399SYuri Pankov 			if (mp == NULL) {
23849276b399SYuri Pankov 				xnfp->xnf_stat_rx_desballoc_fail++;
23859276b399SYuri Pankov 				xnfp->xnf_stat_norxbuf++;
23869276b399SYuri Pankov 				error = ENOMEM;
23879276b399SYuri Pankov 				/*
23889276b399SYuri Pankov 				 * we free the buf we just allocated as we
23899276b399SYuri Pankov 				 * will re-hang the old buf.
23909276b399SYuri Pankov 				 */
23919276b399SYuri Pankov 				xnf_buf_put(xnfp, nbuf, B_FALSE);
23929276b399SYuri Pankov 				goto hang_buf;
23939276b399SYuri Pankov 			}
23949276b399SYuri Pankov 
23959276b399SYuri Pankov 			mp->b_rptr = mp->b_rptr + off;
23969276b399SYuri Pankov 			mp->b_wptr = mp->b_rptr + len;
239756567907SDavid Edmondson 
239856567907SDavid Edmondson 			/*
23999276b399SYuri Pankov 			 * Release the grant as the backend doesn't need to
24009276b399SYuri Pankov 			 * access this buffer anymore and grants are scarce.
240156567907SDavid Edmondson 			 */
24029276b399SYuri Pankov 			(void) gnttab_end_foreign_access_ref(bdesc->grant_ref,
24039276b399SYuri Pankov 			    0);
24049276b399SYuri Pankov 			xnf_gref_put(xnfp, bdesc->grant_ref);
24059276b399SYuri Pankov 			bdesc->grant_ref = INVALID_GRANT_REF;
240656567907SDavid Edmondson 
24079276b399SYuri Pankov 			bdesc = nbuf;
24089276b399SYuri Pankov 		} else {
240956567907SDavid Edmondson 			/*
24109276b399SYuri Pankov 			 * We failed to allocate a new buf or decided to reuse
24119276b399SYuri Pankov 			 * the old one. In either case we copy the data off it
24129276b399SYuri Pankov 			 * and put it back into the ring.
241356567907SDavid Edmondson 			 */
24149276b399SYuri Pankov 			mp = allocb(len, 0);
24159276b399SYuri Pankov 			if (mp == NULL) {
24169276b399SYuri Pankov 				xnfp->xnf_stat_rx_allocb_fail++;
24179276b399SYuri Pankov 				xnfp->xnf_stat_norxbuf++;
24189276b399SYuri Pankov 				error = ENOMEM;
24199276b399SYuri Pankov 				goto hang_buf;
2420843e1988Sjohnlev 			}
24219276b399SYuri Pankov 			bcopy(bdesc->buf + off, mp->b_wptr, len);
24229276b399SYuri Pankov 			mp->b_wptr += len;
2423843e1988Sjohnlev 		}
242456567907SDavid Edmondson 
24259276b399SYuri Pankov 		if (head == NULL)
24269276b399SYuri Pankov 			head = mp;
24279276b399SYuri Pankov 		else
24289276b399SYuri Pankov 			tail->b_cont = mp;
24299276b399SYuri Pankov 		tail = mp;
24309276b399SYuri Pankov 
24319276b399SYuri Pankov hang_buf:
24329276b399SYuri Pankov 		/*
24339276b399SYuri Pankov 		 * No matter what happens, for each response we need to hang
24349276b399SYuri Pankov 		 * a new buf on the rx ring. Put either the old one, or a new
24359276b399SYuri Pankov 		 * one if the old one is borrowed by the kernel via desballoc().
24369276b399SYuri Pankov 		 */
243756567907SDavid Edmondson 		xnf_rxbuf_hang(xnfp, bdesc);
24389276b399SYuri Pankov 		cons++;
243956567907SDavid Edmondson 
24409276b399SYuri Pankov 		/* next response is an extra */
24419276b399SYuri Pankov 		is_extra = more_extra;
244256567907SDavid Edmondson 
24439276b399SYuri Pankov 		if (!more_data && !more_extra)
24449276b399SYuri Pankov 			break;
244556567907SDavid Edmondson 
24469276b399SYuri Pankov 		/*
24479276b399SYuri Pankov 		 * Note that since requests and responses are union'd on the
24489276b399SYuri Pankov 		 * same ring, we copy the response to a local variable instead
24499276b399SYuri Pankov 		 * of keeping a pointer. Otherwise xnf_rxbuf_hang() would have
24509276b399SYuri Pankov 		 * overwritten contents of rsp.
24519276b399SYuri Pankov 		 */
24529276b399SYuri Pankov 		rsp = *RING_GET_RESPONSE(&xnfp->xnf_rx_ring, cons);
24539276b399SYuri Pankov 	}
2454843e1988Sjohnlev 
24559276b399SYuri Pankov 	/*
24569276b399SYuri Pankov 	 * Check that we do not get stuck in a loop.
24579276b399SYuri Pankov 	 */
24589276b399SYuri Pankov 	ASSERT3U(*consp, !=, cons);
24599276b399SYuri Pankov 	*consp = cons;
2460843e1988Sjohnlev 
24619276b399SYuri Pankov 	/*
24629276b399SYuri Pankov 	 * We ran out of responses but the flags indicate there is more data.
24639276b399SYuri Pankov 	 */
24649276b399SYuri Pankov 	if (more_data) {
24659276b399SYuri Pankov 		dev_err(xnfp->xnf_devinfo, CE_WARN, "rx: need more fragments.");
24669276b399SYuri Pankov 		error = EINVAL;
24679276b399SYuri Pankov 	}
24689276b399SYuri Pankov 	if (more_extra) {
24699276b399SYuri Pankov 		dev_err(xnfp->xnf_devinfo, CE_WARN, "rx: need more fragments "
24709276b399SYuri Pankov 		    "(extras).");
24719276b399SYuri Pankov 		error = EINVAL;
24729276b399SYuri Pankov 	}
2473843e1988Sjohnlev 
24749276b399SYuri Pankov 	/*
24759276b399SYuri Pankov 	 * An error means the packet must be dropped. If we have already formed
24769276b399SYuri Pankov 	 * a partial packet, then discard it.
24779276b399SYuri Pankov 	 */
24789276b399SYuri Pankov 	if (error != 0) {
24799276b399SYuri Pankov 		if (head != NULL)
24809276b399SYuri Pankov 			freemsg(head);
24819276b399SYuri Pankov 		xnfp->xnf_stat_rx_drop++;
24829276b399SYuri Pankov 		return (error);
2483843e1988Sjohnlev 	}
2484843e1988Sjohnlev 
24859276b399SYuri Pankov 	ASSERT(head != NULL);
24869276b399SYuri Pankov 
24879276b399SYuri Pankov 	if (hwcsum) {
24889276b399SYuri Pankov 		/*
24899276b399SYuri Pankov 		 * If the peer says that the data has been validated then we
24909276b399SYuri Pankov 		 * declare that the full checksum has been verified.
24919276b399SYuri Pankov 		 *
24929276b399SYuri Pankov 		 * We don't look at the "checksum blank" flag, and hence could
24939276b399SYuri Pankov 		 * have a packet here that we are asserting is good with
24949276b399SYuri Pankov 		 * a blank checksum.
24959276b399SYuri Pankov 		 */
24969276b399SYuri Pankov 		mac_hcksum_set(head, 0, 0, 0, 0, HCK_FULLCKSUM_OK);
24979276b399SYuri Pankov 		xnfp->xnf_stat_rx_cksum_no_need++;
24989276b399SYuri Pankov 	}
24999276b399SYuri Pankov 
25009276b399SYuri Pankov 	/* XXX: set lro info for packet once LRO is supported in OS. */
25019276b399SYuri Pankov 
25029276b399SYuri Pankov 	*mpp = head;
25039276b399SYuri Pankov 
25049276b399SYuri Pankov 	return (0);
25059276b399SYuri Pankov }
25069276b399SYuri Pankov 
25079276b399SYuri Pankov /*
25089276b399SYuri Pankov  * Collect packets from the RX ring, storing them in `xnfp' for later use.
25099276b399SYuri Pankov  */
25109276b399SYuri Pankov static void
xnf_rx_collect(xnf_t * xnfp)25119276b399SYuri Pankov xnf_rx_collect(xnf_t *xnfp)
25129276b399SYuri Pankov {
25139276b399SYuri Pankov 	RING_IDX prod;
25149276b399SYuri Pankov 
25159276b399SYuri Pankov 	ASSERT(MUTEX_HELD(&xnfp->xnf_rxlock));
25169276b399SYuri Pankov 
25179276b399SYuri Pankov 	prod = xnfp->xnf_rx_ring.sring->rsp_prod;
2518843e1988Sjohnlev 	/*
25199276b399SYuri Pankov 	 * Ensure we see queued responses up to 'prod'.
2520843e1988Sjohnlev 	 */
25219276b399SYuri Pankov 	membar_consumer();
2522843e1988Sjohnlev 
25239276b399SYuri Pankov 	while (xnfp->xnf_rx_ring.rsp_cons != prod) {
25249276b399SYuri Pankov 		mblk_t *mp;
2525843e1988Sjohnlev 
25269276b399SYuri Pankov 		/*
25279276b399SYuri Pankov 		 * Collect a packet.
25289276b399SYuri Pankov 		 * rsp_cons is updated inside xnf_rx_one_packet().
25299276b399SYuri Pankov 		 */
25309276b399SYuri Pankov 		int error = xnf_rx_one_packet(xnfp, prod,
25319276b399SYuri Pankov 		    &xnfp->xnf_rx_ring.rsp_cons, &mp);
25329276b399SYuri Pankov 		if (error == 0) {
25339276b399SYuri Pankov 			xnfp->xnf_stat_ipackets++;
25349276b399SYuri Pankov 			xnfp->xnf_stat_rbytes += xmsgsize(mp);
2535843e1988Sjohnlev 
25369276b399SYuri Pankov 			/*
25379276b399SYuri Pankov 			 * Append the mblk to the rx list.
25389276b399SYuri Pankov 			 */
25399276b399SYuri Pankov 			if (xnfp->xnf_rx_head == NULL) {
25409276b399SYuri Pankov 				ASSERT3P(xnfp->xnf_rx_tail, ==, NULL);
25419276b399SYuri Pankov 				xnfp->xnf_rx_head = mp;
25429276b399SYuri Pankov 			} else {
25439276b399SYuri Pankov 				ASSERT(xnfp->xnf_rx_tail != NULL);
25449276b399SYuri Pankov 				xnfp->xnf_rx_tail->b_next = mp;
25459276b399SYuri Pankov 			}
25469276b399SYuri Pankov 			xnfp->xnf_rx_tail = mp;
2547843e1988Sjohnlev 		}
2548843e1988Sjohnlev 	}
2549843e1988Sjohnlev }
2550843e1988Sjohnlev 
2551843e1988Sjohnlev /*
2552843e1988Sjohnlev  *  xnf_alloc_dma_resources() -- initialize the drivers structures
2553843e1988Sjohnlev  */
2554843e1988Sjohnlev static int
xnf_alloc_dma_resources(xnf_t * xnfp)2555843e1988Sjohnlev xnf_alloc_dma_resources(xnf_t *xnfp)
2556843e1988Sjohnlev {
2557*cfd17c15SJoshua M. Clulow 	dev_info_t		*devinfo = xnfp->xnf_devinfo;
2558843e1988Sjohnlev 	size_t			len;
2559843e1988Sjohnlev 	ddi_dma_cookie_t	dma_cookie;
2560843e1988Sjohnlev 	uint_t			ncookies;
2561843e1988Sjohnlev 	int			rc;
2562843e1988Sjohnlev 	caddr_t			rptr;
2563843e1988Sjohnlev 
2564843e1988Sjohnlev 	/*
2565843e1988Sjohnlev 	 * The code below allocates all the DMA data structures that
2566843e1988Sjohnlev 	 * need to be released when the driver is detached.
2567843e1988Sjohnlev 	 *
2568843e1988Sjohnlev 	 * Allocate page for the transmit descriptor ring.
2569843e1988Sjohnlev 	 */
2570843e1988Sjohnlev 	if (ddi_dma_alloc_handle(devinfo, &ringbuf_dma_attr,
2571551bc2a6Smrj 	    DDI_DMA_SLEEP, 0, &xnfp->xnf_tx_ring_dma_handle) != DDI_SUCCESS)
2572843e1988Sjohnlev 		goto alloc_error;
2573843e1988Sjohnlev 
2574551bc2a6Smrj 	if (ddi_dma_mem_alloc(xnfp->xnf_tx_ring_dma_handle,
2575843e1988Sjohnlev 	    PAGESIZE, &accattr, DDI_DMA_CONSISTENT,
2576843e1988Sjohnlev 	    DDI_DMA_SLEEP, 0, &rptr, &len,
2577551bc2a6Smrj 	    &xnfp->xnf_tx_ring_dma_acchandle) != DDI_SUCCESS) {
2578551bc2a6Smrj 		ddi_dma_free_handle(&xnfp->xnf_tx_ring_dma_handle);
2579551bc2a6Smrj 		xnfp->xnf_tx_ring_dma_handle = NULL;
2580843e1988Sjohnlev 		goto alloc_error;
2581843e1988Sjohnlev 	}
2582843e1988Sjohnlev 
2583551bc2a6Smrj 	if ((rc = ddi_dma_addr_bind_handle(xnfp->xnf_tx_ring_dma_handle, NULL,
2584843e1988Sjohnlev 	    rptr, PAGESIZE, DDI_DMA_RDWR | DDI_DMA_CONSISTENT,
2585843e1988Sjohnlev 	    DDI_DMA_SLEEP, 0, &dma_cookie, &ncookies)) != DDI_DMA_MAPPED) {
2586551bc2a6Smrj 		ddi_dma_mem_free(&xnfp->xnf_tx_ring_dma_acchandle);
2587551bc2a6Smrj 		ddi_dma_free_handle(&xnfp->xnf_tx_ring_dma_handle);
2588551bc2a6Smrj 		xnfp->xnf_tx_ring_dma_handle = NULL;
2589551bc2a6Smrj 		xnfp->xnf_tx_ring_dma_acchandle = NULL;
2590843e1988Sjohnlev 		if (rc == DDI_DMA_NORESOURCES)
2591843e1988Sjohnlev 			goto alloc_error;
2592843e1988Sjohnlev 		else
2593843e1988Sjohnlev 			goto error;
2594843e1988Sjohnlev 	}
2595843e1988Sjohnlev 
2596843e1988Sjohnlev 	ASSERT(ncookies == 1);
2597843e1988Sjohnlev 	bzero(rptr, PAGESIZE);
2598843e1988Sjohnlev 	/* LINTED: constant in conditional context */
2599843e1988Sjohnlev 	SHARED_RING_INIT((netif_tx_sring_t *)rptr);
2600843e1988Sjohnlev 	/* LINTED: constant in conditional context */
2601551bc2a6Smrj 	FRONT_RING_INIT(&xnfp->xnf_tx_ring, (netif_tx_sring_t *)rptr, PAGESIZE);
2602551bc2a6Smrj 	xnfp->xnf_tx_ring_phys_addr = dma_cookie.dmac_laddress;
2603843e1988Sjohnlev 
2604843e1988Sjohnlev 	/*
2605843e1988Sjohnlev 	 * Allocate page for the receive descriptor ring.
2606843e1988Sjohnlev 	 */
2607843e1988Sjohnlev 	if (ddi_dma_alloc_handle(devinfo, &ringbuf_dma_attr,
2608551bc2a6Smrj 	    DDI_DMA_SLEEP, 0, &xnfp->xnf_rx_ring_dma_handle) != DDI_SUCCESS)
2609843e1988Sjohnlev 		goto alloc_error;
2610843e1988Sjohnlev 
2611551bc2a6Smrj 	if (ddi_dma_mem_alloc(xnfp->xnf_rx_ring_dma_handle,
2612843e1988Sjohnlev 	    PAGESIZE, &accattr, DDI_DMA_CONSISTENT,
2613843e1988Sjohnlev 	    DDI_DMA_SLEEP, 0, &rptr, &len,
2614551bc2a6Smrj 	    &xnfp->xnf_rx_ring_dma_acchandle) != DDI_SUCCESS) {
2615551bc2a6Smrj 		ddi_dma_free_handle(&xnfp->xnf_rx_ring_dma_handle);
2616551bc2a6Smrj 		xnfp->xnf_rx_ring_dma_handle = NULL;
2617843e1988Sjohnlev 		goto alloc_error;
2618843e1988Sjohnlev 	}
2619843e1988Sjohnlev 
2620551bc2a6Smrj 	if ((rc = ddi_dma_addr_bind_handle(xnfp->xnf_rx_ring_dma_handle, NULL,
2621843e1988Sjohnlev 	    rptr, PAGESIZE, DDI_DMA_RDWR | DDI_DMA_CONSISTENT,
2622843e1988Sjohnlev 	    DDI_DMA_SLEEP, 0, &dma_cookie, &ncookies)) != DDI_DMA_MAPPED) {
2623551bc2a6Smrj 		ddi_dma_mem_free(&xnfp->xnf_rx_ring_dma_acchandle);
2624551bc2a6Smrj 		ddi_dma_free_handle(&xnfp->xnf_rx_ring_dma_handle);
2625551bc2a6Smrj 		xnfp->xnf_rx_ring_dma_handle = NULL;
2626551bc2a6Smrj 		xnfp->xnf_rx_ring_dma_acchandle = NULL;
2627843e1988Sjohnlev 		if (rc == DDI_DMA_NORESOURCES)
2628843e1988Sjohnlev 			goto alloc_error;
2629843e1988Sjohnlev 		else
2630843e1988Sjohnlev 			goto error;
2631843e1988Sjohnlev 	}
2632843e1988Sjohnlev 
2633843e1988Sjohnlev 	ASSERT(ncookies == 1);
2634843e1988Sjohnlev 	bzero(rptr, PAGESIZE);
2635843e1988Sjohnlev 	/* LINTED: constant in conditional context */
2636843e1988Sjohnlev 	SHARED_RING_INIT((netif_rx_sring_t *)rptr);
2637843e1988Sjohnlev 	/* LINTED: constant in conditional context */
2638551bc2a6Smrj 	FRONT_RING_INIT(&xnfp->xnf_rx_ring, (netif_rx_sring_t *)rptr, PAGESIZE);
2639551bc2a6Smrj 	xnfp->xnf_rx_ring_phys_addr = dma_cookie.dmac_laddress;
2640843e1988Sjohnlev 
2641843e1988Sjohnlev 	return (DDI_SUCCESS);
2642843e1988Sjohnlev 
2643843e1988Sjohnlev alloc_error:
2644843e1988Sjohnlev 	cmn_err(CE_WARN, "xnf%d: could not allocate enough DMA memory",
2645551bc2a6Smrj 	    ddi_get_instance(xnfp->xnf_devinfo));
2646843e1988Sjohnlev error:
2647843e1988Sjohnlev 	xnf_release_dma_resources(xnfp);
2648843e1988Sjohnlev 	return (DDI_FAILURE);
2649843e1988Sjohnlev }
2650843e1988Sjohnlev 
2651843e1988Sjohnlev /*
2652843e1988Sjohnlev  * Release all DMA resources in the opposite order from acquisition
2653843e1988Sjohnlev  */
2654843e1988Sjohnlev static void
xnf_release_dma_resources(xnf_t * xnfp)2655843e1988Sjohnlev xnf_release_dma_resources(xnf_t *xnfp)
2656843e1988Sjohnlev {
2657843e1988Sjohnlev 	int i;
2658843e1988Sjohnlev 
2659843e1988Sjohnlev 	/*
2660843e1988Sjohnlev 	 * Free receive buffers which are currently associated with
266156567907SDavid Edmondson 	 * descriptors.
2662843e1988Sjohnlev 	 */
266356567907SDavid Edmondson 	mutex_enter(&xnfp->xnf_rxlock);
266456567907SDavid Edmondson 	for (i = 0; i < NET_RX_RING_SIZE; i++) {
266556567907SDavid Edmondson 		xnf_buf_t *bp;
2666843e1988Sjohnlev 
266756567907SDavid Edmondson 		if ((bp = xnfp->xnf_rx_pkt_info[i]) == NULL)
2668843e1988Sjohnlev 			continue;
266956567907SDavid Edmondson 		xnfp->xnf_rx_pkt_info[i] = NULL;
267056567907SDavid Edmondson 		xnf_buf_put(xnfp, bp, B_FALSE);
2671843e1988Sjohnlev 	}
267256567907SDavid Edmondson 	mutex_exit(&xnfp->xnf_rxlock);
2673843e1988Sjohnlev 
267456567907SDavid Edmondson 	/* Free the receive ring buffer. */
2675551bc2a6Smrj 	if (xnfp->xnf_rx_ring_dma_acchandle != NULL) {
2676551bc2a6Smrj 		(void) ddi_dma_unbind_handle(xnfp->xnf_rx_ring_dma_handle);
2677551bc2a6Smrj 		ddi_dma_mem_free(&xnfp->xnf_rx_ring_dma_acchandle);
2678551bc2a6Smrj 		ddi_dma_free_handle(&xnfp->xnf_rx_ring_dma_handle);
2679551bc2a6Smrj 		xnfp->xnf_rx_ring_dma_acchandle = NULL;
2680843e1988Sjohnlev 	}
268156567907SDavid Edmondson 	/* Free the transmit ring buffer. */
2682551bc2a6Smrj 	if (xnfp->xnf_tx_ring_dma_acchandle != NULL) {
2683551bc2a6Smrj 		(void) ddi_dma_unbind_handle(xnfp->xnf_tx_ring_dma_handle);
2684551bc2a6Smrj 		ddi_dma_mem_free(&xnfp->xnf_tx_ring_dma_acchandle);
2685551bc2a6Smrj 		ddi_dma_free_handle(&xnfp->xnf_tx_ring_dma_handle);
2686551bc2a6Smrj 		xnfp->xnf_tx_ring_dma_acchandle = NULL;
2687843e1988Sjohnlev 	}
2688a390c5f4Scz 
2689843e1988Sjohnlev }
2690843e1988Sjohnlev 
2691843e1988Sjohnlev /*
269256567907SDavid Edmondson  * Release any packets and associated structures used by the TX ring.
2693843e1988Sjohnlev  */
2694843e1988Sjohnlev static void
xnf_release_mblks(xnf_t * xnfp)269556567907SDavid Edmondson xnf_release_mblks(xnf_t *xnfp)
2696843e1988Sjohnlev {
269756567907SDavid Edmondson 	RING_IDX i;
269856567907SDavid Edmondson 	xnf_txid_t *tidp;
2699843e1988Sjohnlev 
270056567907SDavid Edmondson 	for (i = 0, tidp = &xnfp->xnf_tx_pkt_id[0];
270156567907SDavid Edmondson 	    i < NET_TX_RING_SIZE;
270256567907SDavid Edmondson 	    i++, tidp++) {
270356567907SDavid Edmondson 		xnf_txbuf_t *txp = tidp->txbuf;
2704843e1988Sjohnlev 
270556567907SDavid Edmondson 		if (txp != NULL) {
270656567907SDavid Edmondson 			ASSERT(txp->tx_mp != NULL);
270756567907SDavid Edmondson 			freemsg(txp->tx_mp);
2708843e1988Sjohnlev 
27099276b399SYuri Pankov 			xnf_txid_put(xnfp, tidp);
271056567907SDavid Edmondson 			kmem_cache_free(xnfp->xnf_tx_buf_cache, txp);
271156567907SDavid Edmondson 		}
2712551bc2a6Smrj 	}
2713843e1988Sjohnlev }
2714843e1988Sjohnlev 
271556567907SDavid Edmondson static int
xnf_buf_constructor(void * buf,void * arg,int kmflag)271656567907SDavid Edmondson xnf_buf_constructor(void *buf, void *arg, int kmflag)
2717843e1988Sjohnlev {
271856567907SDavid Edmondson 	int (*ddiflags)(caddr_t) = DDI_DMA_SLEEP;
271956567907SDavid Edmondson 	xnf_buf_t *bdesc = buf;
272056567907SDavid Edmondson 	xnf_t *xnfp = arg;
272156567907SDavid Edmondson 	ddi_dma_cookie_t dma_cookie;
272256567907SDavid Edmondson 	uint_t ncookies;
2723843e1988Sjohnlev 	size_t len;
2724843e1988Sjohnlev 
272556567907SDavid Edmondson 	if (kmflag & KM_NOSLEEP)
272656567907SDavid Edmondson 		ddiflags = DDI_DMA_DONTWAIT;
2727843e1988Sjohnlev 
272856567907SDavid Edmondson 	/* Allocate a DMA access handle for the buffer. */
27299276b399SYuri Pankov 	if (ddi_dma_alloc_handle(xnfp->xnf_devinfo, &rx_buf_dma_attr,
273056567907SDavid Edmondson 	    ddiflags, 0, &bdesc->dma_handle) != DDI_SUCCESS)
2731843e1988Sjohnlev 		goto failure;
2732843e1988Sjohnlev 
273356567907SDavid Edmondson 	/* Allocate DMA-able memory for buffer. */
2734843e1988Sjohnlev 	if (ddi_dma_mem_alloc(bdesc->dma_handle,
273556567907SDavid Edmondson 	    PAGESIZE, &data_accattr, DDI_DMA_STREAMING, ddiflags, 0,
2736843e1988Sjohnlev 	    &bdesc->buf, &len, &bdesc->acc_handle) != DDI_SUCCESS)
2737551bc2a6Smrj 		goto failure_1;
2738843e1988Sjohnlev 
273956567907SDavid Edmondson 	/* Bind to virtual address of buffer to get physical address. */
274056567907SDavid Edmondson 	if (ddi_dma_addr_bind_handle(bdesc->dma_handle, NULL,
274156567907SDavid Edmondson 	    bdesc->buf, len, DDI_DMA_RDWR | DDI_DMA_STREAMING,
274256567907SDavid Edmondson 	    ddiflags, 0, &dma_cookie, &ncookies) != DDI_DMA_MAPPED)
274356567907SDavid Edmondson 		goto failure_2;
274456567907SDavid Edmondson 	ASSERT(ncookies == 1);
274556567907SDavid Edmondson 
274656567907SDavid Edmondson 	bdesc->free_rtn.free_func = xnf_buf_recycle;
274756567907SDavid Edmondson 	bdesc->free_rtn.free_arg = (caddr_t)bdesc;
2748843e1988Sjohnlev 	bdesc->xnfp = xnfp;
274956567907SDavid Edmondson 	bdesc->buf_phys = dma_cookie.dmac_laddress;
275056567907SDavid Edmondson 	bdesc->buf_mfn = pfn_to_mfn(xnf_btop(bdesc->buf_phys));
275156567907SDavid Edmondson 	bdesc->len = dma_cookie.dmac_size;
275256567907SDavid Edmondson 	bdesc->grant_ref = INVALID_GRANT_REF;
275356567907SDavid Edmondson 	bdesc->gen = xnfp->xnf_gen;
275456567907SDavid Edmondson 
27551a5e258fSJosef 'Jeff' Sipek 	atomic_inc_64(&xnfp->xnf_stat_buf_allocated);
275656567907SDavid Edmondson 
275756567907SDavid Edmondson 	return (0);
2758843e1988Sjohnlev 
275956567907SDavid Edmondson failure_2:
276056567907SDavid Edmondson 	ddi_dma_mem_free(&bdesc->acc_handle);
2761843e1988Sjohnlev 
2762551bc2a6Smrj failure_1:
2763843e1988Sjohnlev 	ddi_dma_free_handle(&bdesc->dma_handle);
2764843e1988Sjohnlev 
2765843e1988Sjohnlev failure:
276656567907SDavid Edmondson 
2767fd0939efSDavid Edmondson 	ASSERT(kmflag & KM_NOSLEEP); /* Cannot fail for KM_SLEEP. */
276856567907SDavid Edmondson 	return (-1);
2769843e1988Sjohnlev }
2770843e1988Sjohnlev 
277156567907SDavid Edmondson static void
xnf_buf_destructor(void * buf,void * arg)277256567907SDavid Edmondson xnf_buf_destructor(void *buf, void *arg)
2773843e1988Sjohnlev {
277456567907SDavid Edmondson 	xnf_buf_t *bdesc = buf;
277556567907SDavid Edmondson 	xnf_t *xnfp = arg;
277656567907SDavid Edmondson 
277756567907SDavid Edmondson 	(void) ddi_dma_unbind_handle(bdesc->dma_handle);
277856567907SDavid Edmondson 	ddi_dma_mem_free(&bdesc->acc_handle);
277956567907SDavid Edmondson 	ddi_dma_free_handle(&bdesc->dma_handle);
278056567907SDavid Edmondson 
27811a5e258fSJosef 'Jeff' Sipek 	atomic_dec_64(&xnfp->xnf_stat_buf_allocated);
278256567907SDavid Edmondson }
2783843e1988Sjohnlev 
278456567907SDavid Edmondson static xnf_buf_t *
xnf_buf_get(xnf_t * xnfp,int flags,boolean_t readonly)278556567907SDavid Edmondson xnf_buf_get(xnf_t *xnfp, int flags, boolean_t readonly)
278656567907SDavid Edmondson {
278756567907SDavid Edmondson 	grant_ref_t gref;
278856567907SDavid Edmondson 	xnf_buf_t *bufp;
278956567907SDavid Edmondson 
279056567907SDavid Edmondson 	/*
279156567907SDavid Edmondson 	 * Usually grant references are more scarce than memory, so we
279256567907SDavid Edmondson 	 * attempt to acquire a grant reference first.
279356567907SDavid Edmondson 	 */
27949276b399SYuri Pankov 	gref = xnf_gref_get(xnfp);
279556567907SDavid Edmondson 	if (gref == INVALID_GRANT_REF)
2796843e1988Sjohnlev 		return (NULL);
2797843e1988Sjohnlev 
279856567907SDavid Edmondson 	bufp = kmem_cache_alloc(xnfp->xnf_buf_cache, flags);
279956567907SDavid Edmondson 	if (bufp == NULL) {
28009276b399SYuri Pankov 		xnf_gref_put(xnfp, gref);
2801843e1988Sjohnlev 		return (NULL);
280256567907SDavid Edmondson 	}
2803843e1988Sjohnlev 
28049276b399SYuri Pankov 	ASSERT3U(bufp->grant_ref, ==, INVALID_GRANT_REF);
2805843e1988Sjohnlev 
280656567907SDavid Edmondson 	bufp->grant_ref = gref;
2807843e1988Sjohnlev 
280856567907SDavid Edmondson 	if (bufp->gen != xnfp->xnf_gen)
280956567907SDavid Edmondson 		xnf_buf_refresh(bufp);
2810843e1988Sjohnlev 
281156567907SDavid Edmondson 	gnttab_grant_foreign_access_ref(bufp->grant_ref,
281256567907SDavid Edmondson 	    xvdi_get_oeid(bufp->xnfp->xnf_devinfo),
281356567907SDavid Edmondson 	    bufp->buf_mfn, readonly ? 1 : 0);
2814843e1988Sjohnlev 
28151a5e258fSJosef 'Jeff' Sipek 	atomic_inc_64(&xnfp->xnf_stat_buf_outstanding);
2816551bc2a6Smrj 
281756567907SDavid Edmondson 	return (bufp);
281856567907SDavid Edmondson }
281956567907SDavid Edmondson 
282056567907SDavid Edmondson static void
xnf_buf_put(xnf_t * xnfp,xnf_buf_t * bufp,boolean_t readonly)282156567907SDavid Edmondson xnf_buf_put(xnf_t *xnfp, xnf_buf_t *bufp, boolean_t readonly)
282256567907SDavid Edmondson {
282356567907SDavid Edmondson 	if (bufp->grant_ref != INVALID_GRANT_REF) {
282456567907SDavid Edmondson 		(void) gnttab_end_foreign_access_ref(
282556567907SDavid Edmondson 		    bufp->grant_ref, readonly ? 1 : 0);
28269276b399SYuri Pankov 		xnf_gref_put(xnfp, bufp->grant_ref);
282756567907SDavid Edmondson 		bufp->grant_ref = INVALID_GRANT_REF;
2828843e1988Sjohnlev 	}
2829843e1988Sjohnlev 
283056567907SDavid Edmondson 	kmem_cache_free(xnfp->xnf_buf_cache, bufp);
2831843e1988Sjohnlev 
28321a5e258fSJosef 'Jeff' Sipek 	atomic_dec_64(&xnfp->xnf_stat_buf_outstanding);
283356567907SDavid Edmondson }
2834843e1988Sjohnlev 
283556567907SDavid Edmondson /*
283656567907SDavid Edmondson  * Refresh any cached data about a buffer after resume.
283756567907SDavid Edmondson  */
283856567907SDavid Edmondson static void
xnf_buf_refresh(xnf_buf_t * bdesc)283956567907SDavid Edmondson xnf_buf_refresh(xnf_buf_t *bdesc)
284056567907SDavid Edmondson {
284156567907SDavid Edmondson 	bdesc->buf_mfn = pfn_to_mfn(xnf_btop(bdesc->buf_phys));
284256567907SDavid Edmondson 	bdesc->gen = bdesc->xnfp->xnf_gen;
284356567907SDavid Edmondson }
2844843e1988Sjohnlev 
284556567907SDavid Edmondson /*
284656567907SDavid Edmondson  * Streams `freeb' routine for `xnf_buf_t' when used as transmit
284756567907SDavid Edmondson  * look-aside buffers.
284856567907SDavid Edmondson  */
284956567907SDavid Edmondson static void
xnf_buf_recycle(xnf_buf_t * bdesc)285056567907SDavid Edmondson xnf_buf_recycle(xnf_buf_t *bdesc)
285156567907SDavid Edmondson {
285256567907SDavid Edmondson 	xnf_t *xnfp = bdesc->xnfp;
285356567907SDavid Edmondson 
285456567907SDavid Edmondson 	xnf_buf_put(xnfp, bdesc, B_TRUE);
285556567907SDavid Edmondson }
285656567907SDavid Edmondson 
285756567907SDavid Edmondson static int
xnf_tx_buf_constructor(void * buf,void * arg,int kmflag)285856567907SDavid Edmondson xnf_tx_buf_constructor(void *buf, void *arg, int kmflag)
285956567907SDavid Edmondson {
2860fd0939efSDavid Edmondson 	int (*ddiflags)(caddr_t) = DDI_DMA_SLEEP;
286156567907SDavid Edmondson 	xnf_txbuf_t *txp = buf;
286256567907SDavid Edmondson 	xnf_t *xnfp = arg;
286356567907SDavid Edmondson 
2864fd0939efSDavid Edmondson 	if (kmflag & KM_NOSLEEP)
2865fd0939efSDavid Edmondson 		ddiflags = DDI_DMA_DONTWAIT;
2866fd0939efSDavid Edmondson 
28679276b399SYuri Pankov 	if (ddi_dma_alloc_handle(xnfp->xnf_devinfo, &tx_buf_dma_attr,
2868fd0939efSDavid Edmondson 	    ddiflags, 0, &txp->tx_dma_handle) != DDI_SUCCESS) {
2869fd0939efSDavid Edmondson 		ASSERT(kmflag & KM_NOSLEEP); /* Cannot fail for KM_SLEEP. */
287056567907SDavid Edmondson 		return (-1);
2871fd0939efSDavid Edmondson 	}
287256567907SDavid Edmondson 
287356567907SDavid Edmondson 	return (0);
287456567907SDavid Edmondson }
287556567907SDavid Edmondson 
287656567907SDavid Edmondson static void
xnf_tx_buf_destructor(void * buf,void * arg)287756567907SDavid Edmondson xnf_tx_buf_destructor(void *buf, void *arg)
287856567907SDavid Edmondson {
287956567907SDavid Edmondson 	_NOTE(ARGUNUSED(arg));
288056567907SDavid Edmondson 	xnf_txbuf_t *txp = buf;
288156567907SDavid Edmondson 
288256567907SDavid Edmondson 	ddi_dma_free_handle(&txp->tx_dma_handle);
2883843e1988Sjohnlev }
2884843e1988Sjohnlev 
2885551bc2a6Smrj /*
2886551bc2a6Smrj  * Statistics.
2887551bc2a6Smrj  */
2888551bc2a6Smrj static char *xnf_aux_statistics[] = {
2889551bc2a6Smrj 	"tx_cksum_deferred",
2890551bc2a6Smrj 	"rx_cksum_no_need",
2891551bc2a6Smrj 	"interrupts",
2892551bc2a6Smrj 	"unclaimed_interrupts",
2893551bc2a6Smrj 	"tx_pullup",
28949276b399SYuri Pankov 	"tx_lookaside",
28959276b399SYuri Pankov 	"tx_drop",
28969276b399SYuri Pankov 	"tx_eth_hdr_split",
289756567907SDavid Edmondson 	"buf_allocated",
289856567907SDavid Edmondson 	"buf_outstanding",
289956567907SDavid Edmondson 	"gref_outstanding",
290056567907SDavid Edmondson 	"gref_failure",
290156567907SDavid Edmondson 	"gref_peak",
290256567907SDavid Edmondson 	"rx_allocb_fail",
290356567907SDavid Edmondson 	"rx_desballoc_fail",
2904551bc2a6Smrj };
2905551bc2a6Smrj 
2906551bc2a6Smrj static int
xnf_kstat_aux_update(kstat_t * ksp,int flag)2907551bc2a6Smrj xnf_kstat_aux_update(kstat_t *ksp, int flag)
2908551bc2a6Smrj {
2909551bc2a6Smrj 	xnf_t *xnfp;
2910551bc2a6Smrj 	kstat_named_t *knp;
2911551bc2a6Smrj 
2912551bc2a6Smrj 	if (flag != KSTAT_READ)
2913551bc2a6Smrj 		return (EACCES);
2914551bc2a6Smrj 
2915551bc2a6Smrj 	xnfp = ksp->ks_private;
2916551bc2a6Smrj 	knp = ksp->ks_data;
2917551bc2a6Smrj 
2918551bc2a6Smrj 	/*
2919551bc2a6Smrj 	 * Assignment order must match that of the names in
2920551bc2a6Smrj 	 * xnf_aux_statistics.
2921551bc2a6Smrj 	 */
2922551bc2a6Smrj 	(knp++)->value.ui64 = xnfp->xnf_stat_tx_cksum_deferred;
2923551bc2a6Smrj 	(knp++)->value.ui64 = xnfp->xnf_stat_rx_cksum_no_need;
2924551bc2a6Smrj 
2925551bc2a6Smrj 	(knp++)->value.ui64 = xnfp->xnf_stat_interrupts;
2926551bc2a6Smrj 	(knp++)->value.ui64 = xnfp->xnf_stat_unclaimed_interrupts;
2927551bc2a6Smrj 	(knp++)->value.ui64 = xnfp->xnf_stat_tx_pullup;
29289276b399SYuri Pankov 	(knp++)->value.ui64 = xnfp->xnf_stat_tx_lookaside;
29299276b399SYuri Pankov 	(knp++)->value.ui64 = xnfp->xnf_stat_tx_drop;
29309276b399SYuri Pankov 	(knp++)->value.ui64 = xnfp->xnf_stat_tx_eth_hdr_split;
2931551bc2a6Smrj 
293256567907SDavid Edmondson 	(knp++)->value.ui64 = xnfp->xnf_stat_buf_allocated;
293356567907SDavid Edmondson 	(knp++)->value.ui64 = xnfp->xnf_stat_buf_outstanding;
293456567907SDavid Edmondson 	(knp++)->value.ui64 = xnfp->xnf_stat_gref_outstanding;
293556567907SDavid Edmondson 	(knp++)->value.ui64 = xnfp->xnf_stat_gref_failure;
293656567907SDavid Edmondson 	(knp++)->value.ui64 = xnfp->xnf_stat_gref_peak;
293756567907SDavid Edmondson 	(knp++)->value.ui64 = xnfp->xnf_stat_rx_allocb_fail;
293856567907SDavid Edmondson 	(knp++)->value.ui64 = xnfp->xnf_stat_rx_desballoc_fail;
2939551bc2a6Smrj 
2940551bc2a6Smrj 	return (0);
2941551bc2a6Smrj }
2942551bc2a6Smrj 
2943551bc2a6Smrj static boolean_t
xnf_kstat_init(xnf_t * xnfp)2944551bc2a6Smrj xnf_kstat_init(xnf_t *xnfp)
2945551bc2a6Smrj {
2946551bc2a6Smrj 	int nstat = sizeof (xnf_aux_statistics) /
2947551bc2a6Smrj 	    sizeof (xnf_aux_statistics[0]);
2948551bc2a6Smrj 	char **cp = xnf_aux_statistics;
2949551bc2a6Smrj 	kstat_named_t *knp;
2950551bc2a6Smrj 
2951551bc2a6Smrj 	/*
2952551bc2a6Smrj 	 * Create and initialise kstats.
2953551bc2a6Smrj 	 */
2954551bc2a6Smrj 	if ((xnfp->xnf_kstat_aux = kstat_create("xnf",
2955551bc2a6Smrj 	    ddi_get_instance(xnfp->xnf_devinfo),
2956551bc2a6Smrj 	    "aux_statistics", "net", KSTAT_TYPE_NAMED,
2957551bc2a6Smrj 	    nstat, 0)) == NULL)
2958551bc2a6Smrj 		return (B_FALSE);
2959551bc2a6Smrj 
2960551bc2a6Smrj 	xnfp->xnf_kstat_aux->ks_private = xnfp;
2961551bc2a6Smrj 	xnfp->xnf_kstat_aux->ks_update = xnf_kstat_aux_update;
2962551bc2a6Smrj 
2963551bc2a6Smrj 	knp = xnfp->xnf_kstat_aux->ks_data;
2964551bc2a6Smrj 	while (nstat > 0) {
2965551bc2a6Smrj 		kstat_named_init(knp, *cp, KSTAT_DATA_UINT64);
2966551bc2a6Smrj 
2967551bc2a6Smrj 		knp++;
2968551bc2a6Smrj 		cp++;
2969551bc2a6Smrj 		nstat--;
2970551bc2a6Smrj 	}
2971551bc2a6Smrj 
2972551bc2a6Smrj 	kstat_install(xnfp->xnf_kstat_aux);
2973551bc2a6Smrj 
2974551bc2a6Smrj 	return (B_TRUE);
2975551bc2a6Smrj }
2976551bc2a6Smrj 
2977843e1988Sjohnlev static int
xnf_stat(void * arg,uint_t stat,uint64_t * val)2978843e1988Sjohnlev xnf_stat(void *arg, uint_t stat, uint64_t *val)
2979843e1988Sjohnlev {
2980843e1988Sjohnlev 	xnf_t *xnfp = arg;
2981843e1988Sjohnlev 
298256567907SDavid Edmondson 	mutex_enter(&xnfp->xnf_rxlock);
2983551bc2a6Smrj 	mutex_enter(&xnfp->xnf_txlock);
2984843e1988Sjohnlev 
2985551bc2a6Smrj #define	mac_stat(q, r)				\
2986843e1988Sjohnlev 	case (MAC_STAT_##q):			\
2987551bc2a6Smrj 		*val = xnfp->xnf_stat_##r;	\
2988551bc2a6Smrj 		break
2989551bc2a6Smrj 
2990551bc2a6Smrj #define	ether_stat(q, r)			\
2991551bc2a6Smrj 	case (ETHER_STAT_##q):			\
2992551bc2a6Smrj 		*val = xnfp->xnf_stat_##r;	\
2993843e1988Sjohnlev 		break
2994843e1988Sjohnlev 
2995843e1988Sjohnlev 	switch (stat) {
2996843e1988Sjohnlev 
2997551bc2a6Smrj 	mac_stat(IPACKETS, ipackets);
2998551bc2a6Smrj 	mac_stat(OPACKETS, opackets);
2999551bc2a6Smrj 	mac_stat(RBYTES, rbytes);
3000551bc2a6Smrj 	mac_stat(OBYTES, obytes);
3001551bc2a6Smrj 	mac_stat(NORCVBUF, norxbuf);
3002551bc2a6Smrj 	mac_stat(IERRORS, errrx);
3003551bc2a6Smrj 	mac_stat(NOXMTBUF, tx_defer);
3004551bc2a6Smrj 
3005551bc2a6Smrj 	ether_stat(MACRCV_ERRORS, mac_rcv_error);
3006551bc2a6Smrj 	ether_stat(TOOSHORT_ERRORS, runt);
3007843e1988Sjohnlev 
30084bae950fSMax zhen 	/* always claim to be in full duplex mode */
30094bae950fSMax zhen 	case ETHER_STAT_LINK_DUPLEX:
30104bae950fSMax zhen 		*val = LINK_DUPLEX_FULL;
30114bae950fSMax zhen 		break;
30124bae950fSMax zhen 
30134bae950fSMax zhen 	/* always claim to be at 1Gb/s link speed */
30144bae950fSMax zhen 	case MAC_STAT_IFSPEED:
30154bae950fSMax zhen 		*val = 1000000000ull;
30164bae950fSMax zhen 		break;
30174bae950fSMax zhen 
3018843e1988Sjohnlev 	default:
3019551bc2a6Smrj 		mutex_exit(&xnfp->xnf_txlock);
302056567907SDavid Edmondson 		mutex_exit(&xnfp->xnf_rxlock);
3021843e1988Sjohnlev 
3022843e1988Sjohnlev 		return (ENOTSUP);
3023843e1988Sjohnlev 	}
3024843e1988Sjohnlev 
3025551bc2a6Smrj #undef mac_stat
3026551bc2a6Smrj #undef ether_stat
3027843e1988Sjohnlev 
3028551bc2a6Smrj 	mutex_exit(&xnfp->xnf_txlock);
302956567907SDavid Edmondson 	mutex_exit(&xnfp->xnf_rxlock);
3030843e1988Sjohnlev 
3031843e1988Sjohnlev 	return (0);
3032843e1988Sjohnlev }
3033843e1988Sjohnlev 
30349276b399SYuri Pankov static int
xnf_change_mtu(xnf_t * xnfp,uint32_t mtu)30359276b399SYuri Pankov xnf_change_mtu(xnf_t *xnfp, uint32_t mtu)
30369276b399SYuri Pankov {
30379276b399SYuri Pankov 	if (mtu > ETHERMTU) {
30389276b399SYuri Pankov 		if (!xnf_enable_tx_sg) {
30399276b399SYuri Pankov 			dev_err(xnfp->xnf_devinfo, CE_WARN, "MTU limited to %d "
30409276b399SYuri Pankov 			    "because scatter-gather is disabled for transmit "
30419276b399SYuri Pankov 			    "in driver settings", ETHERMTU);
30429276b399SYuri Pankov 			return (EINVAL);
30439276b399SYuri Pankov 		} else if (!xnf_enable_rx_sg) {
30449276b399SYuri Pankov 			dev_err(xnfp->xnf_devinfo, CE_WARN, "MTU limited to %d "
30459276b399SYuri Pankov 			    "because scatter-gather is disabled for receive "
30469276b399SYuri Pankov 			    "in driver settings", ETHERMTU);
30479276b399SYuri Pankov 			return (EINVAL);
30489276b399SYuri Pankov 		} else if (!xnfp->xnf_be_tx_sg) {
30499276b399SYuri Pankov 			dev_err(xnfp->xnf_devinfo, CE_WARN, "MTU limited to %d "
30509276b399SYuri Pankov 			    "because backend doesn't support scatter-gather",
30519276b399SYuri Pankov 			    ETHERMTU);
30529276b399SYuri Pankov 			return (EINVAL);
30539276b399SYuri Pankov 		}
30549276b399SYuri Pankov 		if (mtu > XNF_MAXPKT)
30559276b399SYuri Pankov 			return (EINVAL);
30569276b399SYuri Pankov 	}
30579276b399SYuri Pankov 	int error = mac_maxsdu_update(xnfp->xnf_mh, mtu);
30589276b399SYuri Pankov 	if (error == 0)
30599276b399SYuri Pankov 		xnfp->xnf_mtu = mtu;
30609276b399SYuri Pankov 
30619276b399SYuri Pankov 	return (error);
30629276b399SYuri Pankov }
30639276b399SYuri Pankov 
30649276b399SYuri Pankov /*ARGSUSED*/
30659276b399SYuri Pankov static int
xnf_getprop(void * data,const char * prop_name,mac_prop_id_t prop_id,uint_t prop_val_size,void * prop_val)30669276b399SYuri Pankov xnf_getprop(void *data, const char *prop_name, mac_prop_id_t prop_id,
30679276b399SYuri Pankov     uint_t prop_val_size, void *prop_val)
30689276b399SYuri Pankov {
30699276b399SYuri Pankov 	xnf_t *xnfp = data;
30709276b399SYuri Pankov 
30719276b399SYuri Pankov 	switch (prop_id) {
30729276b399SYuri Pankov 	case MAC_PROP_MTU:
30739276b399SYuri Pankov 		ASSERT(prop_val_size >= sizeof (uint32_t));
30749276b399SYuri Pankov 		bcopy(&xnfp->xnf_mtu, prop_val, sizeof (uint32_t));
30759276b399SYuri Pankov 		break;
30769276b399SYuri Pankov 	default:
30779276b399SYuri Pankov 		return (ENOTSUP);
30789276b399SYuri Pankov 	}
30799276b399SYuri Pankov 	return (0);
30809276b399SYuri Pankov }
30819276b399SYuri Pankov 
30829276b399SYuri Pankov /*ARGSUSED*/
30839276b399SYuri Pankov static int
xnf_setprop(void * data,const char * prop_name,mac_prop_id_t prop_id,uint_t prop_val_size,const void * prop_val)30849276b399SYuri Pankov xnf_setprop(void *data, const char *prop_name, mac_prop_id_t prop_id,
30859276b399SYuri Pankov     uint_t prop_val_size, const void *prop_val)
30869276b399SYuri Pankov {
30879276b399SYuri Pankov 	xnf_t *xnfp = data;
30889276b399SYuri Pankov 	uint32_t new_mtu;
30899276b399SYuri Pankov 	int error;
30909276b399SYuri Pankov 
30919276b399SYuri Pankov 	switch (prop_id) {
30929276b399SYuri Pankov 	case MAC_PROP_MTU:
30939276b399SYuri Pankov 		ASSERT(prop_val_size >= sizeof (uint32_t));
30949276b399SYuri Pankov 		bcopy(prop_val, &new_mtu, sizeof (new_mtu));
30959276b399SYuri Pankov 		error = xnf_change_mtu(xnfp, new_mtu);
30969276b399SYuri Pankov 		break;
30979276b399SYuri Pankov 	default:
30989276b399SYuri Pankov 		return (ENOTSUP);
30999276b399SYuri Pankov 	}
31009276b399SYuri Pankov 
31019276b399SYuri Pankov 	return (error);
31029276b399SYuri Pankov }
31039276b399SYuri Pankov 
31049276b399SYuri Pankov /*ARGSUSED*/
31059276b399SYuri Pankov static void
xnf_propinfo(void * data,const char * prop_name,mac_prop_id_t prop_id,mac_prop_info_handle_t prop_handle)31069276b399SYuri Pankov xnf_propinfo(void *data, const char *prop_name, mac_prop_id_t prop_id,
31079276b399SYuri Pankov     mac_prop_info_handle_t prop_handle)
31089276b399SYuri Pankov {
31099276b399SYuri Pankov 	switch (prop_id) {
31109276b399SYuri Pankov 	case MAC_PROP_MTU:
31119276b399SYuri Pankov 		mac_prop_info_set_range_uint32(prop_handle, 0, XNF_MAXPKT);
31129276b399SYuri Pankov 		break;
31139276b399SYuri Pankov 	default:
31149276b399SYuri Pankov 		break;
31159276b399SYuri Pankov 	}
31169276b399SYuri Pankov }
31179276b399SYuri Pankov 
3118843e1988Sjohnlev static boolean_t
xnf_getcapab(void * arg,mac_capab_t cap,void * cap_data)3119843e1988Sjohnlev xnf_getcapab(void *arg, mac_capab_t cap, void *cap_data)
3120843e1988Sjohnlev {
31219276b399SYuri Pankov 	xnf_t *xnfp = arg;
3122843e1988Sjohnlev 
3123843e1988Sjohnlev 	switch (cap) {
3124843e1988Sjohnlev 	case MAC_CAPAB_HCKSUM: {
3125843e1988Sjohnlev 		uint32_t *capab = cap_data;
3126843e1988Sjohnlev 
3127568a765bSdme 		/*
3128a859da42SDavid Edmondson 		 * Whilst the flag used to communicate with the IO
3129a859da42SDavid Edmondson 		 * domain is called "NETTXF_csum_blank", the checksum
3130a859da42SDavid Edmondson 		 * in the packet must contain the pseudo-header
3131a859da42SDavid Edmondson 		 * checksum and not zero.
3132a859da42SDavid Edmondson 		 *
3133a859da42SDavid Edmondson 		 * To help out the IO domain, we might use
3134a859da42SDavid Edmondson 		 * HCKSUM_INET_PARTIAL. Unfortunately our stack will
3135a859da42SDavid Edmondson 		 * then use checksum offload for IPv6 packets, which
3136a859da42SDavid Edmondson 		 * the IO domain can't handle.
3137568a765bSdme 		 *
3138a859da42SDavid Edmondson 		 * As a result, we declare outselves capable of
3139a859da42SDavid Edmondson 		 * HCKSUM_INET_FULL_V4. This means that we receive
3140a859da42SDavid Edmondson 		 * IPv4 packets from the stack with a blank checksum
3141a859da42SDavid Edmondson 		 * field and must insert the pseudo-header checksum
3142a859da42SDavid Edmondson 		 * before passing the packet to the IO domain.
3143568a765bSdme 		 */
314456567907SDavid Edmondson 		*capab = HCKSUM_INET_FULL_V4;
31459276b399SYuri Pankov 
31469276b399SYuri Pankov 		/*
31479276b399SYuri Pankov 		 * TODO: query the "feature-ipv6-csum-offload" capability.
31489276b399SYuri Pankov 		 * If enabled, that could allow us to use HCKSUM_INET_PARTIAL.
31499276b399SYuri Pankov 		 */
31509276b399SYuri Pankov 
31519276b399SYuri Pankov 		break;
31529276b399SYuri Pankov 	}
31539276b399SYuri Pankov 	case MAC_CAPAB_LSO: {
31549276b399SYuri Pankov 		if (!xnfp->xnf_be_lso)
31559276b399SYuri Pankov 			return (B_FALSE);
31569276b399SYuri Pankov 
31579276b399SYuri Pankov 		mac_capab_lso_t *lso = cap_data;
31589276b399SYuri Pankov 		lso->lso_flags = LSO_TX_BASIC_TCP_IPV4;
31599276b399SYuri Pankov 		lso->lso_basic_tcp_ipv4.lso_max = IP_MAXPACKET;
3160843e1988Sjohnlev 		break;
3161843e1988Sjohnlev 	}
3162843e1988Sjohnlev 	default:
3163843e1988Sjohnlev 		return (B_FALSE);
3164843e1988Sjohnlev 	}
3165843e1988Sjohnlev 
3166843e1988Sjohnlev 	return (B_TRUE);
3167843e1988Sjohnlev }
3168843e1988Sjohnlev 
316956567907SDavid Edmondson /*
317056567907SDavid Edmondson  * The state of the peer has changed - react accordingly.
317156567907SDavid Edmondson  */
3172843e1988Sjohnlev static void
oe_state_change(dev_info_t * dip,ddi_eventcookie_t id,void * arg,void * impl_data)3173843e1988Sjohnlev oe_state_change(dev_info_t *dip, ddi_eventcookie_t id,
3174843e1988Sjohnlev     void *arg, void *impl_data)
3175843e1988Sjohnlev {
317656567907SDavid Edmondson 	_NOTE(ARGUNUSED(id, arg));
3177843e1988Sjohnlev 	xnf_t *xnfp = ddi_get_driver_private(dip);
3178843e1988Sjohnlev 	XenbusState new_state = *(XenbusState *)impl_data;
3179843e1988Sjohnlev 
3180843e1988Sjohnlev 	ASSERT(xnfp != NULL);
3181843e1988Sjohnlev 
3182843e1988Sjohnlev 	switch (new_state) {
318356567907SDavid Edmondson 	case XenbusStateUnknown:
318456567907SDavid Edmondson 	case XenbusStateInitialising:
318556567907SDavid Edmondson 	case XenbusStateInitialised:
318656567907SDavid Edmondson 	case XenbusStateClosing:
318756567907SDavid Edmondson 	case XenbusStateClosed:
318856567907SDavid Edmondson 	case XenbusStateReconfiguring:
318956567907SDavid Edmondson 	case XenbusStateReconfigured:
319056567907SDavid Edmondson 		break;
319156567907SDavid Edmondson 
319256567907SDavid Edmondson 	case XenbusStateInitWait:
319356567907SDavid Edmondson 		xnf_read_config(xnfp);
319456567907SDavid Edmondson 
319556567907SDavid Edmondson 		if (!xnfp->xnf_be_rx_copy) {
319656567907SDavid Edmondson 			cmn_err(CE_WARN,
319756567907SDavid Edmondson 			    "The xnf driver requires a dom0 that "
319856567907SDavid Edmondson 			    "supports 'feature-rx-copy'.");
319956567907SDavid Edmondson 			(void) xvdi_switch_state(xnfp->xnf_devinfo,
320056567907SDavid Edmondson 			    XBT_NULL, XenbusStateClosed);
320156567907SDavid Edmondson 			break;
320256567907SDavid Edmondson 		}
320356567907SDavid Edmondson 
320456567907SDavid Edmondson 		/*
320556567907SDavid Edmondson 		 * Connect to the backend.
320656567907SDavid Edmondson 		 */
320756567907SDavid Edmondson 		xnf_be_connect(xnfp);
320856567907SDavid Edmondson 
320956567907SDavid Edmondson 		/*
321056567907SDavid Edmondson 		 * Our MAC address as discovered by xnf_read_config().
321156567907SDavid Edmondson 		 */
321256567907SDavid Edmondson 		mac_unicst_update(xnfp->xnf_mh, xnfp->xnf_mac_addr);
321356567907SDavid Edmondson 
32149276b399SYuri Pankov 		/*
32159276b399SYuri Pankov 		 * We do not know if some features such as LSO are supported
32169276b399SYuri Pankov 		 * until we connect to the backend. We request the MAC layer
32179276b399SYuri Pankov 		 * to poll our capabilities again.
32189276b399SYuri Pankov 		 */
32199276b399SYuri Pankov 		mac_capab_update(xnfp->xnf_mh);
32209276b399SYuri Pankov 
322156567907SDavid Edmondson 		break;
322256567907SDavid Edmondson 
3223843e1988Sjohnlev 	case XenbusStateConnected:
322456567907SDavid Edmondson 		mutex_enter(&xnfp->xnf_rxlock);
3225551bc2a6Smrj 		mutex_enter(&xnfp->xnf_txlock);
3226843e1988Sjohnlev 
3227551bc2a6Smrj 		xnfp->xnf_connected = B_TRUE;
3228a390c5f4Scz 		/*
322956567907SDavid Edmondson 		 * Wake up any threads waiting to send data to
323056567907SDavid Edmondson 		 * backend.
3231a390c5f4Scz 		 */
323256567907SDavid Edmondson 		cv_broadcast(&xnfp->xnf_cv_state);
3233843e1988Sjohnlev 
3234551bc2a6Smrj 		mutex_exit(&xnfp->xnf_txlock);
323556567907SDavid Edmondson 		mutex_exit(&xnfp->xnf_rxlock);
3236843e1988Sjohnlev 
3237a390c5f4Scz 		/*
323856567907SDavid Edmondson 		 * Kick the peer in case it missed any transmits
323956567907SDavid Edmondson 		 * request in the TX ring.
3240a390c5f4Scz 		 */
3241551bc2a6Smrj 		ec_notify_via_evtchn(xnfp->xnf_evtchn);
3242a390c5f4Scz 
3243a390c5f4Scz 		/*
324456567907SDavid Edmondson 		 * There may already be completed receive requests in
324556567907SDavid Edmondson 		 * the ring sent by backend after it gets connected
324656567907SDavid Edmondson 		 * but before we see its state change here, so we call
324756567907SDavid Edmondson 		 * xnf_intr() to handle them, if any.
3248a390c5f4Scz 		 */
3249a390c5f4Scz 		(void) xnf_intr((caddr_t)xnfp);
3250a390c5f4Scz 
325156567907SDavid Edmondson 		/*
325256567907SDavid Edmondson 		 * Mark the link up now that we are connected.
325356567907SDavid Edmondson 		 */
32544bae950fSMax zhen 		mac_link_update(xnfp->xnf_mh, LINK_STATE_UP);
32554bae950fSMax zhen 
325656567907SDavid Edmondson 		/*
325756567907SDavid Edmondson 		 * Tell the backend about the multicast addresses in
325856567907SDavid Edmondson 		 * which we are interested.
325956567907SDavid Edmondson 		 */
326056567907SDavid Edmondson 		mac_multicast_refresh(xnfp->xnf_mh, NULL, xnfp, B_TRUE);
326156567907SDavid Edmondson 
3262843e1988Sjohnlev 		break;
3263843e1988Sjohnlev 
3264843e1988Sjohnlev 	default:
3265843e1988Sjohnlev 		break;
3266843e1988Sjohnlev 	}
3267843e1988Sjohnlev }
3268