xref: /illumos-gate/usr/src/uts/common/xen/io/xnb.c (revision c61a1653)
1843e1988Sjohnlev /*
2843e1988Sjohnlev  * CDDL HEADER START
3843e1988Sjohnlev  *
4843e1988Sjohnlev  * The contents of this file are subject to the terms of the
5843e1988Sjohnlev  * Common Development and Distribution License (the "License").
6843e1988Sjohnlev  * You may not use this file except in compliance with the License.
7843e1988Sjohnlev  *
8843e1988Sjohnlev  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9843e1988Sjohnlev  * or http://www.opensolaris.org/os/licensing.
10843e1988Sjohnlev  * See the License for the specific language governing permissions
11843e1988Sjohnlev  * and limitations under the License.
12843e1988Sjohnlev  *
13843e1988Sjohnlev  * When distributing Covered Code, include this CDDL HEADER in each
14843e1988Sjohnlev  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15843e1988Sjohnlev  * If applicable, add the following below this CDDL HEADER, with the
16843e1988Sjohnlev  * fields enclosed by brackets "[]" replaced with your own identifying
17843e1988Sjohnlev  * information: Portions Copyright [yyyy] [name of copyright owner]
18843e1988Sjohnlev  *
19843e1988Sjohnlev  * CDDL HEADER END
20843e1988Sjohnlev  */
21843e1988Sjohnlev 
22843e1988Sjohnlev /*
23fd0939efSDavid Edmondson  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
24843e1988Sjohnlev  * Use is subject to license terms.
25*c61a1653SRyan Zezeski  * Copyright 2018 Joyent, Inc.
26843e1988Sjohnlev  */
27843e1988Sjohnlev 
28843e1988Sjohnlev #ifdef DEBUG
29843e1988Sjohnlev #define	XNB_DEBUG 1
30843e1988Sjohnlev #endif /* DEBUG */
31843e1988Sjohnlev 
32843e1988Sjohnlev #include "xnb.h"
33843e1988Sjohnlev 
34843e1988Sjohnlev #include <sys/sunddi.h>
35843e1988Sjohnlev #include <sys/sunndi.h>
36843e1988Sjohnlev #include <sys/modctl.h>
37843e1988Sjohnlev #include <sys/conf.h>
38843e1988Sjohnlev #include <sys/mac.h>
3956567907SDavid Edmondson #include <sys/mac_impl.h> /* For mac_fix_cksum(). */
40843e1988Sjohnlev #include <sys/dlpi.h>
41843e1988Sjohnlev #include <sys/strsubr.h>
42843e1988Sjohnlev #include <sys/strsun.h>
43551bc2a6Smrj #include <sys/types.h>
44843e1988Sjohnlev #include <sys/pattr.h>
45843e1988Sjohnlev #include <vm/seg_kmem.h>
46843e1988Sjohnlev #include <vm/hat_i86.h>
47843e1988Sjohnlev #include <xen/sys/xenbus_impl.h>
48843e1988Sjohnlev #include <xen/sys/xendev.h>
49843e1988Sjohnlev #include <sys/balloon_impl.h>
50843e1988Sjohnlev #include <sys/evtchn_impl.h>
51843e1988Sjohnlev #include <sys/gnttab.h>
52d2b85481Srscott #include <vm/vm_dep.h>
5356567907SDavid Edmondson #include <sys/note.h>
54843e1988Sjohnlev #include <sys/gld.h>
55843e1988Sjohnlev #include <inet/ip.h>
56843e1988Sjohnlev #include <inet/ip_impl.h>
57843e1988Sjohnlev 
58843e1988Sjohnlev /*
59024c26efSMax zhen  * The terms "transmit" and "receive" are used in alignment with domU,
60024c26efSMax zhen  * which means that packets originating from the peer domU are "transmitted"
61024c26efSMax zhen  * to other parts of the system and packets are "received" from them.
62843e1988Sjohnlev  */
63843e1988Sjohnlev 
64843e1988Sjohnlev /*
6556567907SDavid Edmondson  * Should we allow guests to manipulate multicast group membership?
6664c5e63cSDavid Edmondson  */
6756567907SDavid Edmondson static boolean_t	xnb_multicast_control = B_TRUE;
68843e1988Sjohnlev 
69843e1988Sjohnlev static boolean_t	xnb_connect_rings(dev_info_t *);
70843e1988Sjohnlev static void		xnb_disconnect_rings(dev_info_t *);
71843e1988Sjohnlev static void		xnb_oe_state_change(dev_info_t *, ddi_eventcookie_t,
72843e1988Sjohnlev     void *, void *);
73843e1988Sjohnlev static void		xnb_hp_state_change(dev_info_t *, ddi_eventcookie_t,
74843e1988Sjohnlev     void *, void *);
75843e1988Sjohnlev 
76024c26efSMax zhen static int	xnb_txbuf_constructor(void *, void *, int);
77024c26efSMax zhen static void	xnb_txbuf_destructor(void *, void *);
7856567907SDavid Edmondson static void	xnb_tx_notify_peer(xnb_t *, boolean_t);
79024c26efSMax zhen static void	xnb_tx_mark_complete(xnb_t *, RING_IDX, int16_t);
80551bc2a6Smrj 
8156567907SDavid Edmondson mblk_t		*xnb_to_peer(xnb_t *, mblk_t *);
8256567907SDavid Edmondson mblk_t		*xnb_copy_to_peer(xnb_t *, mblk_t *);
83551bc2a6Smrj 
8456567907SDavid Edmondson static void		setup_gop(xnb_t *, gnttab_copy_t *, uchar_t *,
8556567907SDavid Edmondson     size_t, size_t, size_t, grant_ref_t);
8656567907SDavid Edmondson #pragma inline(setup_gop)
8756567907SDavid Edmondson static boolean_t	is_foreign(void *);
8856567907SDavid Edmondson #pragma inline(is_foreign)
89843e1988Sjohnlev 
90843e1988Sjohnlev #define	INVALID_GRANT_HANDLE	((grant_handle_t)-1)
91843e1988Sjohnlev #define	INVALID_GRANT_REF	((grant_ref_t)-1)
92843e1988Sjohnlev 
93843e1988Sjohnlev static kmutex_t	xnb_alloc_page_lock;
94843e1988Sjohnlev 
9556567907SDavid Edmondson /*
9656567907SDavid Edmondson  * On a 32 bit PAE system physical and machine addresses are larger
9756567907SDavid Edmondson  * than 32 bits.  ddi_btop() on such systems take an unsigned long
9856567907SDavid Edmondson  * argument, and so addresses above 4G are truncated before ddi_btop()
9956567907SDavid Edmondson  * gets to see them.  To avoid this, code the shift operation here.
10056567907SDavid Edmondson  */
10156567907SDavid Edmondson #define	xnb_btop(addr)	((addr) >> PAGESHIFT)
10256567907SDavid Edmondson 
10356567907SDavid Edmondson /* DMA attributes for transmit and receive data */
10456567907SDavid Edmondson static ddi_dma_attr_t buf_dma_attr = {
10556567907SDavid Edmondson 	DMA_ATTR_V0,		/* version of this structure */
10656567907SDavid Edmondson 	0,			/* lowest usable address */
10756567907SDavid Edmondson 	0xffffffffffffffffULL,	/* highest usable address */
10856567907SDavid Edmondson 	0x7fffffff,		/* maximum DMAable byte count */
10956567907SDavid Edmondson 	MMU_PAGESIZE,		/* alignment in bytes */
11056567907SDavid Edmondson 	0x7ff,			/* bitmap of burst sizes */
11156567907SDavid Edmondson 	1,			/* minimum transfer */
11256567907SDavid Edmondson 	0xffffffffU,		/* maximum transfer */
11356567907SDavid Edmondson 	0xffffffffffffffffULL,	/* maximum segment length */
11456567907SDavid Edmondson 	1,			/* maximum number of segments */
11556567907SDavid Edmondson 	1,			/* granularity */
11656567907SDavid Edmondson 	0,			/* flags (reserved) */
11756567907SDavid Edmondson };
11856567907SDavid Edmondson 
11956567907SDavid Edmondson /* DMA access attributes for data: NOT to be byte swapped. */
12056567907SDavid Edmondson static ddi_device_acc_attr_t data_accattr = {
12156567907SDavid Edmondson 	DDI_DEVICE_ATTR_V0,
12256567907SDavid Edmondson 	DDI_NEVERSWAP_ACC,
12356567907SDavid Edmondson 	DDI_STRICTORDER_ACC
12456567907SDavid Edmondson };
12556567907SDavid Edmondson 
126843e1988Sjohnlev /*
127843e1988Sjohnlev  * Statistics.
128843e1988Sjohnlev  */
129fd0939efSDavid Edmondson static const char * const aux_statistics[] = {
130024c26efSMax zhen 	"rx_cksum_deferred",
131024c26efSMax zhen 	"tx_cksum_no_need",
132024c26efSMax zhen 	"rx_rsp_notok",
133843e1988Sjohnlev 	"tx_notify_deferred",
134843e1988Sjohnlev 	"tx_notify_sent",
135843e1988Sjohnlev 	"rx_notify_deferred",
136843e1988Sjohnlev 	"rx_notify_sent",
137843e1988Sjohnlev 	"tx_too_early",
138843e1988Sjohnlev 	"rx_too_early",
139843e1988Sjohnlev 	"rx_allocb_failed",
140551bc2a6Smrj 	"tx_allocb_failed",
141024c26efSMax zhen 	"rx_foreign_page",
142843e1988Sjohnlev 	"mac_full",
143843e1988Sjohnlev 	"spurious_intr",
144843e1988Sjohnlev 	"allocation_success",
145843e1988Sjohnlev 	"allocation_failure",
146843e1988Sjohnlev 	"small_allocation_success",
147843e1988Sjohnlev 	"small_allocation_failure",
148551bc2a6Smrj 	"other_allocation_failure",
149024c26efSMax zhen 	"rx_pageboundary_crossed",
150024c26efSMax zhen 	"rx_cpoparea_grown",
151843e1988Sjohnlev 	"csum_hardware",
152843e1988Sjohnlev 	"csum_software",
153fd0939efSDavid Edmondson 	"tx_overflow_page",
154fd0939efSDavid Edmondson 	"tx_unexpected_flags",
155843e1988Sjohnlev };
156843e1988Sjohnlev 
157843e1988Sjohnlev static int
xnb_ks_aux_update(kstat_t * ksp,int flag)158843e1988Sjohnlev xnb_ks_aux_update(kstat_t *ksp, int flag)
159843e1988Sjohnlev {
160843e1988Sjohnlev 	xnb_t *xnbp;
161843e1988Sjohnlev 	kstat_named_t *knp;
162843e1988Sjohnlev 
163843e1988Sjohnlev 	if (flag != KSTAT_READ)
164843e1988Sjohnlev 		return (EACCES);
165843e1988Sjohnlev 
166843e1988Sjohnlev 	xnbp = ksp->ks_private;
167843e1988Sjohnlev 	knp = ksp->ks_data;
168843e1988Sjohnlev 
169843e1988Sjohnlev 	/*
170843e1988Sjohnlev 	 * Assignment order should match that of the names in
171843e1988Sjohnlev 	 * aux_statistics.
172843e1988Sjohnlev 	 */
173024c26efSMax zhen 	(knp++)->value.ui64 = xnbp->xnb_stat_rx_cksum_deferred;
174024c26efSMax zhen 	(knp++)->value.ui64 = xnbp->xnb_stat_tx_cksum_no_need;
175024c26efSMax zhen 	(knp++)->value.ui64 = xnbp->xnb_stat_rx_rsp_notok;
176551bc2a6Smrj 	(knp++)->value.ui64 = xnbp->xnb_stat_tx_notify_deferred;
177551bc2a6Smrj 	(knp++)->value.ui64 = xnbp->xnb_stat_tx_notify_sent;
178551bc2a6Smrj 	(knp++)->value.ui64 = xnbp->xnb_stat_rx_notify_deferred;
179551bc2a6Smrj 	(knp++)->value.ui64 = xnbp->xnb_stat_rx_notify_sent;
180551bc2a6Smrj 	(knp++)->value.ui64 = xnbp->xnb_stat_tx_too_early;
181551bc2a6Smrj 	(knp++)->value.ui64 = xnbp->xnb_stat_rx_too_early;
182551bc2a6Smrj 	(knp++)->value.ui64 = xnbp->xnb_stat_rx_allocb_failed;
183551bc2a6Smrj 	(knp++)->value.ui64 = xnbp->xnb_stat_tx_allocb_failed;
184024c26efSMax zhen 	(knp++)->value.ui64 = xnbp->xnb_stat_rx_foreign_page;
185551bc2a6Smrj 	(knp++)->value.ui64 = xnbp->xnb_stat_mac_full;
186551bc2a6Smrj 	(knp++)->value.ui64 = xnbp->xnb_stat_spurious_intr;
187551bc2a6Smrj 	(knp++)->value.ui64 = xnbp->xnb_stat_allocation_success;
188551bc2a6Smrj 	(knp++)->value.ui64 = xnbp->xnb_stat_allocation_failure;
189551bc2a6Smrj 	(knp++)->value.ui64 = xnbp->xnb_stat_small_allocation_success;
190551bc2a6Smrj 	(knp++)->value.ui64 = xnbp->xnb_stat_small_allocation_failure;
191551bc2a6Smrj 	(knp++)->value.ui64 = xnbp->xnb_stat_other_allocation_failure;
192024c26efSMax zhen 	(knp++)->value.ui64 = xnbp->xnb_stat_rx_pagebndry_crossed;
193024c26efSMax zhen 	(knp++)->value.ui64 = xnbp->xnb_stat_rx_cpoparea_grown;
194551bc2a6Smrj 	(knp++)->value.ui64 = xnbp->xnb_stat_csum_hardware;
195551bc2a6Smrj 	(knp++)->value.ui64 = xnbp->xnb_stat_csum_software;
196fd0939efSDavid Edmondson 	(knp++)->value.ui64 = xnbp->xnb_stat_tx_overflow_page;
197fd0939efSDavid Edmondson 	(knp++)->value.ui64 = xnbp->xnb_stat_tx_unexpected_flags;
198843e1988Sjohnlev 
199843e1988Sjohnlev 	return (0);
200843e1988Sjohnlev }
201843e1988Sjohnlev 
202843e1988Sjohnlev static boolean_t
xnb_ks_init(xnb_t * xnbp)203843e1988Sjohnlev xnb_ks_init(xnb_t *xnbp)
204843e1988Sjohnlev {
205843e1988Sjohnlev 	int nstat = sizeof (aux_statistics) /
206843e1988Sjohnlev 	    sizeof (aux_statistics[0]);
207fd0939efSDavid Edmondson 	const char * const *cp = aux_statistics;
208843e1988Sjohnlev 	kstat_named_t *knp;
209843e1988Sjohnlev 
210843e1988Sjohnlev 	/*
211843e1988Sjohnlev 	 * Create and initialise kstats.
212843e1988Sjohnlev 	 */
213551bc2a6Smrj 	xnbp->xnb_kstat_aux = kstat_create(ddi_driver_name(xnbp->xnb_devinfo),
214551bc2a6Smrj 	    ddi_get_instance(xnbp->xnb_devinfo), "aux_statistics", "net",
215843e1988Sjohnlev 	    KSTAT_TYPE_NAMED, nstat, 0);
216551bc2a6Smrj 	if (xnbp->xnb_kstat_aux == NULL)
217843e1988Sjohnlev 		return (B_FALSE);
218843e1988Sjohnlev 
219551bc2a6Smrj 	xnbp->xnb_kstat_aux->ks_private = xnbp;
220551bc2a6Smrj 	xnbp->xnb_kstat_aux->ks_update = xnb_ks_aux_update;
221843e1988Sjohnlev 
222551bc2a6Smrj 	knp = xnbp->xnb_kstat_aux->ks_data;
223843e1988Sjohnlev 	while (nstat > 0) {
224843e1988Sjohnlev 		kstat_named_init(knp, *cp, KSTAT_DATA_UINT64);
225843e1988Sjohnlev 
226843e1988Sjohnlev 		knp++;
227843e1988Sjohnlev 		cp++;
228843e1988Sjohnlev 		nstat--;
229843e1988Sjohnlev 	}
230843e1988Sjohnlev 
231551bc2a6Smrj 	kstat_install(xnbp->xnb_kstat_aux);
232843e1988Sjohnlev 
233843e1988Sjohnlev 	return (B_TRUE);
234843e1988Sjohnlev }
235843e1988Sjohnlev 
236843e1988Sjohnlev static void
xnb_ks_free(xnb_t * xnbp)237843e1988Sjohnlev xnb_ks_free(xnb_t *xnbp)
238843e1988Sjohnlev {
239551bc2a6Smrj 	kstat_delete(xnbp->xnb_kstat_aux);
240843e1988Sjohnlev }
241843e1988Sjohnlev 
242843e1988Sjohnlev /*
24356567907SDavid Edmondson  * Calculate and insert the transport checksum for an arbitrary packet.
244843e1988Sjohnlev  */
245843e1988Sjohnlev static mblk_t *
xnb_software_csum(xnb_t * xnbp,mblk_t * mp)246843e1988Sjohnlev xnb_software_csum(xnb_t *xnbp, mblk_t *mp)
247843e1988Sjohnlev {
24856567907SDavid Edmondson 	_NOTE(ARGUNUSED(xnbp));
24956567907SDavid Edmondson 
250843e1988Sjohnlev 	/*
25156567907SDavid Edmondson 	 * XXPV dme: shouldn't rely on mac_fix_cksum(), not least
252843e1988Sjohnlev 	 * because it doesn't cover all of the interesting cases :-(
253843e1988Sjohnlev 	 */
2540dc2366fSVenugopal Iyer 	mac_hcksum_set(mp, 0, 0, 0, 0, HCK_FULLCKSUM);
255*c61a1653SRyan Zezeski 	mac_hw_emul(&mp, NULL, NULL, MAC_HWCKSUM_EMUL);
256*c61a1653SRyan Zezeski 	return (mp);
257843e1988Sjohnlev }
258843e1988Sjohnlev 
259843e1988Sjohnlev mblk_t *
xnb_process_cksum_flags(xnb_t * xnbp,mblk_t * mp,uint32_t capab)260843e1988Sjohnlev xnb_process_cksum_flags(xnb_t *xnbp, mblk_t *mp, uint32_t capab)
261843e1988Sjohnlev {
262843e1988Sjohnlev 	struct ether_header *ehp;
263843e1988Sjohnlev 	uint16_t sap;
264843e1988Sjohnlev 	uint32_t offset;
265843e1988Sjohnlev 	ipha_t *ipha;
266843e1988Sjohnlev 
267843e1988Sjohnlev 	ASSERT(mp->b_next == NULL);
268843e1988Sjohnlev 
269843e1988Sjohnlev 	/*
270843e1988Sjohnlev 	 * Check that the packet is contained in a single mblk.  In
27156567907SDavid Edmondson 	 * the "from peer" path this is true today, but may change
272843e1988Sjohnlev 	 * when scatter gather support is added.  In the "to peer"
273843e1988Sjohnlev 	 * path we cannot be sure, but in most cases it will be true
274843e1988Sjohnlev 	 * (in the xnbo case the packet has come from a MAC device
275843e1988Sjohnlev 	 * which is unlikely to split packets).
276843e1988Sjohnlev 	 */
277843e1988Sjohnlev 	if (mp->b_cont != NULL)
278843e1988Sjohnlev 		goto software;
279843e1988Sjohnlev 
280843e1988Sjohnlev 	/*
281843e1988Sjohnlev 	 * If the MAC has no hardware capability don't do any further
282843e1988Sjohnlev 	 * checking.
283843e1988Sjohnlev 	 */
284843e1988Sjohnlev 	if (capab == 0)
285843e1988Sjohnlev 		goto software;
286843e1988Sjohnlev 
287843e1988Sjohnlev 	ASSERT(MBLKL(mp) >= sizeof (struct ether_header));
288843e1988Sjohnlev 	ehp = (struct ether_header *)mp->b_rptr;
289843e1988Sjohnlev 
290843e1988Sjohnlev 	if (ntohs(ehp->ether_type) == VLAN_TPID) {
291843e1988Sjohnlev 		struct ether_vlan_header *evhp;
292843e1988Sjohnlev 
293843e1988Sjohnlev 		ASSERT(MBLKL(mp) >= sizeof (struct ether_vlan_header));
294843e1988Sjohnlev 		evhp = (struct ether_vlan_header *)mp->b_rptr;
295843e1988Sjohnlev 		sap = ntohs(evhp->ether_type);
296843e1988Sjohnlev 		offset = sizeof (struct ether_vlan_header);
297843e1988Sjohnlev 	} else {
298843e1988Sjohnlev 		sap = ntohs(ehp->ether_type);
299843e1988Sjohnlev 		offset = sizeof (struct ether_header);
300843e1988Sjohnlev 	}
301843e1988Sjohnlev 
302843e1988Sjohnlev 	/*
303843e1988Sjohnlev 	 * We only attempt to do IPv4 packets in hardware.
304843e1988Sjohnlev 	 */
305843e1988Sjohnlev 	if (sap != ETHERTYPE_IP)
306843e1988Sjohnlev 		goto software;
307843e1988Sjohnlev 
308843e1988Sjohnlev 	/*
309843e1988Sjohnlev 	 * We know that this is an IPv4 packet.
310843e1988Sjohnlev 	 */
311843e1988Sjohnlev 	ipha = (ipha_t *)(mp->b_rptr + offset);
312843e1988Sjohnlev 
313843e1988Sjohnlev 	switch (ipha->ipha_protocol) {
314843e1988Sjohnlev 	case IPPROTO_TCP:
315a859da42SDavid Edmondson 	case IPPROTO_UDP: {
316a859da42SDavid Edmondson 		uint32_t start, length, stuff, cksum;
317a859da42SDavid Edmondson 		uint16_t *stuffp;
318a859da42SDavid Edmondson 
319843e1988Sjohnlev 		/*
320a859da42SDavid Edmondson 		 * This is a TCP/IPv4 or UDP/IPv4 packet, for which we
321a859da42SDavid Edmondson 		 * can use full IPv4 and partial checksum offload.
322843e1988Sjohnlev 		 */
323a859da42SDavid Edmondson 		if ((capab & (HCKSUM_INET_FULL_V4|HCKSUM_INET_PARTIAL)) == 0)
324a859da42SDavid Edmondson 			break;
325a859da42SDavid Edmondson 
326a859da42SDavid Edmondson 		start = IP_SIMPLE_HDR_LENGTH;
327a859da42SDavid Edmondson 		length = ntohs(ipha->ipha_length);
328a859da42SDavid Edmondson 		if (ipha->ipha_protocol == IPPROTO_TCP) {
329a859da42SDavid Edmondson 			stuff = start + TCP_CHECKSUM_OFFSET;
330a859da42SDavid Edmondson 			cksum = IP_TCP_CSUM_COMP;
331a859da42SDavid Edmondson 		} else {
332a859da42SDavid Edmondson 			stuff = start + UDP_CHECKSUM_OFFSET;
333a859da42SDavid Edmondson 			cksum = IP_UDP_CSUM_COMP;
334a859da42SDavid Edmondson 		}
335a859da42SDavid Edmondson 		stuffp = (uint16_t *)(mp->b_rptr + offset + stuff);
336a859da42SDavid Edmondson 
337a859da42SDavid Edmondson 		if (capab & HCKSUM_INET_FULL_V4) {
338a859da42SDavid Edmondson 			/*
339a859da42SDavid Edmondson 			 * Some devices require that the checksum
340a859da42SDavid Edmondson 			 * field of the packet is zero for full
341a859da42SDavid Edmondson 			 * offload.
342a859da42SDavid Edmondson 			 */
343a859da42SDavid Edmondson 			*stuffp = 0;
344a859da42SDavid Edmondson 
3450dc2366fSVenugopal Iyer 			mac_hcksum_set(mp, 0, 0, 0, 0, HCK_FULLCKSUM);
346843e1988Sjohnlev 
347551bc2a6Smrj 			xnbp->xnb_stat_csum_hardware++;
348843e1988Sjohnlev 
349843e1988Sjohnlev 			return (mp);
350843e1988Sjohnlev 		}
351843e1988Sjohnlev 
352a859da42SDavid Edmondson 		if (capab & HCKSUM_INET_PARTIAL) {
353a859da42SDavid Edmondson 			if (*stuffp == 0) {
354a859da42SDavid Edmondson 				ipaddr_t src, dst;
355a859da42SDavid Edmondson 
356a859da42SDavid Edmondson 				/*
357a859da42SDavid Edmondson 				 * Older Solaris guests don't insert
358a859da42SDavid Edmondson 				 * the pseudo-header checksum, so we
359a859da42SDavid Edmondson 				 * calculate it here.
360a859da42SDavid Edmondson 				 */
361a859da42SDavid Edmondson 				src = ipha->ipha_src;
362a859da42SDavid Edmondson 				dst = ipha->ipha_dst;
363a859da42SDavid Edmondson 
364a859da42SDavid Edmondson 				cksum += (dst >> 16) + (dst & 0xFFFF);
365a859da42SDavid Edmondson 				cksum += (src >> 16) + (src & 0xFFFF);
366a859da42SDavid Edmondson 				cksum += length - IP_SIMPLE_HDR_LENGTH;
367a859da42SDavid Edmondson 
368a859da42SDavid Edmondson 				cksum = (cksum >> 16) + (cksum & 0xFFFF);
369a859da42SDavid Edmondson 				cksum = (cksum >> 16) + (cksum & 0xFFFF);
370a859da42SDavid Edmondson 
371a859da42SDavid Edmondson 				ASSERT(cksum <= 0xFFFF);
372a859da42SDavid Edmondson 
373a859da42SDavid Edmondson 				*stuffp = (uint16_t)(cksum ? cksum : ~cksum);
374a859da42SDavid Edmondson 			}
375a859da42SDavid Edmondson 
3760dc2366fSVenugopal Iyer 			mac_hcksum_set(mp, start, stuff, length, 0,
3770dc2366fSVenugopal Iyer 			    HCK_PARTIALCKSUM);
378a859da42SDavid Edmondson 
379a859da42SDavid Edmondson 			xnbp->xnb_stat_csum_hardware++;
380843e1988Sjohnlev 
381a859da42SDavid Edmondson 			return (mp);
382a859da42SDavid Edmondson 		}
383a859da42SDavid Edmondson 
384a859da42SDavid Edmondson 		/* NOTREACHED */
385843e1988Sjohnlev 		break;
386a859da42SDavid Edmondson 	}
387843e1988Sjohnlev 
388843e1988Sjohnlev 	default:
389843e1988Sjohnlev 		/* Use software. */
390843e1988Sjohnlev 		break;
391843e1988Sjohnlev 	}
392843e1988Sjohnlev 
393843e1988Sjohnlev software:
394843e1988Sjohnlev 	/*
395843e1988Sjohnlev 	 * We are not able to use any offload so do the whole thing in
396843e1988Sjohnlev 	 * software.
397843e1988Sjohnlev 	 */
398551bc2a6Smrj 	xnbp->xnb_stat_csum_software++;
399843e1988Sjohnlev 
400843e1988Sjohnlev 	return (xnb_software_csum(xnbp, mp));
401843e1988Sjohnlev }
402843e1988Sjohnlev 
403843e1988Sjohnlev int
xnb_attach(dev_info_t * dip,xnb_flavour_t * flavour,void * flavour_data)404843e1988Sjohnlev xnb_attach(dev_info_t *dip, xnb_flavour_t *flavour, void *flavour_data)
405843e1988Sjohnlev {
406843e1988Sjohnlev 	xnb_t *xnbp;
40756567907SDavid Edmondson 	char *xsname;
40856567907SDavid Edmondson 	char cachename[32];
409843e1988Sjohnlev 
410843e1988Sjohnlev 	xnbp = kmem_zalloc(sizeof (*xnbp), KM_SLEEP);
411843e1988Sjohnlev 
412551bc2a6Smrj 	xnbp->xnb_flavour = flavour;
413551bc2a6Smrj 	xnbp->xnb_flavour_data = flavour_data;
414551bc2a6Smrj 	xnbp->xnb_devinfo = dip;
415551bc2a6Smrj 	xnbp->xnb_evtchn = INVALID_EVTCHN;
416551bc2a6Smrj 	xnbp->xnb_irq = B_FALSE;
417551bc2a6Smrj 	xnbp->xnb_tx_ring_handle = INVALID_GRANT_HANDLE;
418551bc2a6Smrj 	xnbp->xnb_rx_ring_handle = INVALID_GRANT_HANDLE;
419551bc2a6Smrj 	xnbp->xnb_connected = B_FALSE;
420551bc2a6Smrj 	xnbp->xnb_hotplugged = B_FALSE;
421551bc2a6Smrj 	xnbp->xnb_detachable = B_FALSE;
422551bc2a6Smrj 	xnbp->xnb_peer = xvdi_get_oeid(dip);
42356567907SDavid Edmondson 	xnbp->xnb_be_status = XNB_STATE_INIT;
42456567907SDavid Edmondson 	xnbp->xnb_fe_status = XNB_STATE_INIT;
425551bc2a6Smrj 
426024c26efSMax zhen 	xnbp->xnb_tx_buf_count = 0;
427551bc2a6Smrj 
42856567907SDavid Edmondson 	xnbp->xnb_rx_hv_copy = B_FALSE;
42956567907SDavid Edmondson 	xnbp->xnb_multicast_control = B_FALSE;
430551bc2a6Smrj 
431024c26efSMax zhen 	xnbp->xnb_rx_va = vmem_alloc(heap_arena, PAGESIZE, VM_SLEEP);
432024c26efSMax zhen 	ASSERT(xnbp->xnb_rx_va != NULL);
433551bc2a6Smrj 
434551bc2a6Smrj 	if (ddi_get_iblock_cookie(dip, 0, &xnbp->xnb_icookie)
435843e1988Sjohnlev 	    != DDI_SUCCESS)
436843e1988Sjohnlev 		goto failure;
437843e1988Sjohnlev 
43856567907SDavid Edmondson 	/* Allocated on demand, when/if we enter xnb_copy_to_peer(). */
439024c26efSMax zhen 	xnbp->xnb_rx_cpop = NULL;
44056567907SDavid Edmondson 	xnbp->xnb_rx_cpop_count = 0;
441551bc2a6Smrj 
442551bc2a6Smrj 	mutex_init(&xnbp->xnb_tx_lock, NULL, MUTEX_DRIVER,
443551bc2a6Smrj 	    xnbp->xnb_icookie);
444551bc2a6Smrj 	mutex_init(&xnbp->xnb_rx_lock, NULL, MUTEX_DRIVER,
445551bc2a6Smrj 	    xnbp->xnb_icookie);
44656567907SDavid Edmondson 	mutex_init(&xnbp->xnb_state_lock, NULL, MUTEX_DRIVER,
44756567907SDavid Edmondson 	    xnbp->xnb_icookie);
448843e1988Sjohnlev 
44956567907SDavid Edmondson 	/* Set driver private pointer now. */
450843e1988Sjohnlev 	ddi_set_driver_private(dip, xnbp);
451843e1988Sjohnlev 
45256567907SDavid Edmondson 	(void) sprintf(cachename, "xnb_tx_buf_cache_%d", ddi_get_instance(dip));
45356567907SDavid Edmondson 	xnbp->xnb_tx_buf_cache = kmem_cache_create(cachename,
45456567907SDavid Edmondson 	    sizeof (xnb_txbuf_t), 0,
45556567907SDavid Edmondson 	    xnb_txbuf_constructor, xnb_txbuf_destructor,
45656567907SDavid Edmondson 	    NULL, xnbp, NULL, 0);
45756567907SDavid Edmondson 	if (xnbp->xnb_tx_buf_cache == NULL)
45856567907SDavid Edmondson 		goto failure_0;
45956567907SDavid Edmondson 
460843e1988Sjohnlev 	if (!xnb_ks_init(xnbp))
461551bc2a6Smrj 		goto failure_1;
462843e1988Sjohnlev 
463843e1988Sjohnlev 	/*
464843e1988Sjohnlev 	 * Receive notification of changes in the state of the
465843e1988Sjohnlev 	 * driver in the guest domain.
466843e1988Sjohnlev 	 */
4677eea693dSMark Johnson 	if (xvdi_add_event_handler(dip, XS_OE_STATE, xnb_oe_state_change,
4687eea693dSMark Johnson 	    NULL) != DDI_SUCCESS)
469551bc2a6Smrj 		goto failure_2;
470843e1988Sjohnlev 
471843e1988Sjohnlev 	/*
472843e1988Sjohnlev 	 * Receive notification of hotplug events.
473843e1988Sjohnlev 	 */
4747eea693dSMark Johnson 	if (xvdi_add_event_handler(dip, XS_HP_STATE, xnb_hp_state_change,
4757eea693dSMark Johnson 	    NULL) != DDI_SUCCESS)
476551bc2a6Smrj 		goto failure_2;
477843e1988Sjohnlev 
478843e1988Sjohnlev 	xsname = xvdi_get_xsname(dip);
479843e1988Sjohnlev 
480843e1988Sjohnlev 	if (xenbus_printf(XBT_NULL, xsname,
48156567907SDavid Edmondson 	    "feature-multicast-control", "%d",
48256567907SDavid Edmondson 	    xnb_multicast_control ? 1 : 0) != 0)
483551bc2a6Smrj 		goto failure_3;
484551bc2a6Smrj 
485551bc2a6Smrj 	if (xenbus_printf(XBT_NULL, xsname,
48656567907SDavid Edmondson 	    "feature-rx-copy", "%d",  1) != 0)
487551bc2a6Smrj 		goto failure_3;
488551bc2a6Smrj 	/*
489551bc2a6Smrj 	 * Linux domUs seem to depend on "feature-rx-flip" being 0
490551bc2a6Smrj 	 * in addition to "feature-rx-copy" being 1. It seems strange
491551bc2a6Smrj 	 * to use four possible states to describe a binary decision,
492551bc2a6Smrj 	 * but we might as well play nice.
493551bc2a6Smrj 	 */
494551bc2a6Smrj 	if (xenbus_printf(XBT_NULL, xsname,
49556567907SDavid Edmondson 	    "feature-rx-flip", "%d", 0) != 0)
496551bc2a6Smrj 		goto failure_3;
497843e1988Sjohnlev 
498843e1988Sjohnlev 	(void) xvdi_switch_state(dip, XBT_NULL, XenbusStateInitWait);
499843e1988Sjohnlev 	(void) xvdi_post_event(dip, XEN_HP_ADD);
500843e1988Sjohnlev 
501843e1988Sjohnlev 	return (DDI_SUCCESS);
502843e1988Sjohnlev 
503551bc2a6Smrj failure_3:
504843e1988Sjohnlev 	xvdi_remove_event_handler(dip, NULL);
505843e1988Sjohnlev 
506551bc2a6Smrj failure_2:
507843e1988Sjohnlev 	xnb_ks_free(xnbp);
508843e1988Sjohnlev 
509551bc2a6Smrj failure_1:
51056567907SDavid Edmondson 	kmem_cache_destroy(xnbp->xnb_tx_buf_cache);
51156567907SDavid Edmondson 
51256567907SDavid Edmondson failure_0:
51356567907SDavid Edmondson 	mutex_destroy(&xnbp->xnb_state_lock);
514551bc2a6Smrj 	mutex_destroy(&xnbp->xnb_rx_lock);
515551bc2a6Smrj 	mutex_destroy(&xnbp->xnb_tx_lock);
516843e1988Sjohnlev 
517843e1988Sjohnlev failure:
518024c26efSMax zhen 	vmem_free(heap_arena, xnbp->xnb_rx_va, PAGESIZE);
519843e1988Sjohnlev 	kmem_free(xnbp, sizeof (*xnbp));
520843e1988Sjohnlev 	return (DDI_FAILURE);
521843e1988Sjohnlev }
522843e1988Sjohnlev 
523843e1988Sjohnlev void
xnb_detach(dev_info_t * dip)524843e1988Sjohnlev xnb_detach(dev_info_t *dip)
525843e1988Sjohnlev {
526843e1988Sjohnlev 	xnb_t *xnbp = ddi_get_driver_private(dip);
527843e1988Sjohnlev 
528843e1988Sjohnlev 	ASSERT(xnbp != NULL);
529551bc2a6Smrj 	ASSERT(!xnbp->xnb_connected);
530024c26efSMax zhen 	ASSERT(xnbp->xnb_tx_buf_count == 0);
531843e1988Sjohnlev 
532843e1988Sjohnlev 	xnb_disconnect_rings(dip);
533843e1988Sjohnlev 
534843e1988Sjohnlev 	xvdi_remove_event_handler(dip, NULL);
535843e1988Sjohnlev 
536843e1988Sjohnlev 	xnb_ks_free(xnbp);
537843e1988Sjohnlev 
53856567907SDavid Edmondson 	kmem_cache_destroy(xnbp->xnb_tx_buf_cache);
53956567907SDavid Edmondson 
540843e1988Sjohnlev 	ddi_set_driver_private(dip, NULL);
541843e1988Sjohnlev 
54256567907SDavid Edmondson 	mutex_destroy(&xnbp->xnb_state_lock);
543551bc2a6Smrj 	mutex_destroy(&xnbp->xnb_rx_lock);
54456567907SDavid Edmondson 	mutex_destroy(&xnbp->xnb_tx_lock);
545551bc2a6Smrj 
54656567907SDavid Edmondson 	if (xnbp->xnb_rx_cpop_count > 0)
54756567907SDavid Edmondson 		kmem_free(xnbp->xnb_rx_cpop, sizeof (xnbp->xnb_rx_cpop[0])
54856567907SDavid Edmondson 		    * xnbp->xnb_rx_cpop_count);
549843e1988Sjohnlev 
550024c26efSMax zhen 	ASSERT(xnbp->xnb_rx_va != NULL);
551024c26efSMax zhen 	vmem_free(heap_arena, xnbp->xnb_rx_va, PAGESIZE);
552843e1988Sjohnlev 
553843e1988Sjohnlev 	kmem_free(xnbp, sizeof (*xnbp));
554843e1988Sjohnlev }
555843e1988Sjohnlev 
55656567907SDavid Edmondson /*
55756567907SDavid Edmondson  * Allocate a page from the hypervisor to be flipped to the peer.
55856567907SDavid Edmondson  *
55956567907SDavid Edmondson  * Try to get pages in batches to reduce the overhead of calls into
56056567907SDavid Edmondson  * the balloon driver.
56156567907SDavid Edmondson  */
562843e1988Sjohnlev static mfn_t
xnb_alloc_page(xnb_t * xnbp)563843e1988Sjohnlev xnb_alloc_page(xnb_t *xnbp)
564843e1988Sjohnlev {
565843e1988Sjohnlev #define	WARNING_RATE_LIMIT 100
566843e1988Sjohnlev #define	BATCH_SIZE 256
567843e1988Sjohnlev 	static mfn_t mfns[BATCH_SIZE];	/* common across all instances */
568843e1988Sjohnlev 	static int nth = BATCH_SIZE;
569843e1988Sjohnlev 	mfn_t mfn;
570843e1988Sjohnlev 
571843e1988Sjohnlev 	mutex_enter(&xnb_alloc_page_lock);
572843e1988Sjohnlev 	if (nth == BATCH_SIZE) {
573843e1988Sjohnlev 		if (balloon_alloc_pages(BATCH_SIZE, mfns) != BATCH_SIZE) {
574551bc2a6Smrj 			xnbp->xnb_stat_allocation_failure++;
575843e1988Sjohnlev 			mutex_exit(&xnb_alloc_page_lock);
576843e1988Sjohnlev 
577843e1988Sjohnlev 			/*
578843e1988Sjohnlev 			 * Try for a single page in low memory situations.
579843e1988Sjohnlev 			 */
580843e1988Sjohnlev 			if (balloon_alloc_pages(1, &mfn) != 1) {
581551bc2a6Smrj 				if ((xnbp->xnb_stat_small_allocation_failure++
582551bc2a6Smrj 				    % WARNING_RATE_LIMIT) == 0)
583843e1988Sjohnlev 					cmn_err(CE_WARN, "xnb_alloc_page: "
584843e1988Sjohnlev 					    "Cannot allocate memory to "
585843e1988Sjohnlev 					    "transfer packets to peer.");
586843e1988Sjohnlev 				return (0);
587843e1988Sjohnlev 			} else {
588551bc2a6Smrj 				xnbp->xnb_stat_small_allocation_success++;
589843e1988Sjohnlev 				return (mfn);
590843e1988Sjohnlev 			}
591843e1988Sjohnlev 		}
592843e1988Sjohnlev 
593843e1988Sjohnlev 		nth = 0;
594551bc2a6Smrj 		xnbp->xnb_stat_allocation_success++;
595843e1988Sjohnlev 	}
596843e1988Sjohnlev 
597843e1988Sjohnlev 	mfn = mfns[nth++];
598843e1988Sjohnlev 	mutex_exit(&xnb_alloc_page_lock);
599843e1988Sjohnlev 
600843e1988Sjohnlev 	ASSERT(mfn != 0);
601843e1988Sjohnlev 
602843e1988Sjohnlev 	return (mfn);
603843e1988Sjohnlev #undef BATCH_SIZE
604843e1988Sjohnlev #undef WARNING_RATE_LIMIT
605843e1988Sjohnlev }
606843e1988Sjohnlev 
60756567907SDavid Edmondson /*
60856567907SDavid Edmondson  * Free a page back to the hypervisor.
60956567907SDavid Edmondson  *
61056567907SDavid Edmondson  * This happens only in the error path, so batching is not worth the
61156567907SDavid Edmondson  * complication.
61256567907SDavid Edmondson  */
613843e1988Sjohnlev static void
xnb_free_page(xnb_t * xnbp,mfn_t mfn)614843e1988Sjohnlev xnb_free_page(xnb_t *xnbp, mfn_t mfn)
615843e1988Sjohnlev {
61656567907SDavid Edmondson 	_NOTE(ARGUNUSED(xnbp));
617843e1988Sjohnlev 	int r;
618d2b85481Srscott 	pfn_t pfn;
619d2b85481Srscott 
620d2b85481Srscott 	pfn = xen_assign_pfn(mfn);
621d2b85481Srscott 	pfnzero(pfn, 0, PAGESIZE);
622d2b85481Srscott 	xen_release_pfn(pfn);
623843e1988Sjohnlev 
624843e1988Sjohnlev 	if ((r = balloon_free_pages(1, &mfn, NULL, NULL)) != 1) {
625843e1988Sjohnlev 		cmn_err(CE_WARN, "free_page: cannot decrease memory "
626843e1988Sjohnlev 		    "reservation (%d): page kept but unusable (mfn = 0x%lx).",
627843e1988Sjohnlev 		    r, mfn);
628843e1988Sjohnlev 	}
629843e1988Sjohnlev }
630843e1988Sjohnlev 
631551bc2a6Smrj /*
63256567907SDavid Edmondson  * Similar to RING_HAS_UNCONSUMED_REQUESTS(&xnbp->rx_ring) but using
63356567907SDavid Edmondson  * local variables. Used in both xnb_to_peer() and xnb_copy_to_peer().
634551bc2a6Smrj  */
635551bc2a6Smrj #define	XNB_RING_HAS_UNCONSUMED_REQUESTS(_r)		\
636551bc2a6Smrj 	((((_r)->sring->req_prod - loop) <		\
637551bc2a6Smrj 		(RING_SIZE(_r) - (loop - prod))) ?	\
638551bc2a6Smrj 	    ((_r)->sring->req_prod - loop) :		\
639551bc2a6Smrj 	    (RING_SIZE(_r) - (loop - prod)))
640551bc2a6Smrj 
64156567907SDavid Edmondson /*
64256567907SDavid Edmondson  * Pass packets to the peer using page flipping.
64356567907SDavid Edmondson  */
644843e1988Sjohnlev mblk_t *
xnb_to_peer(xnb_t * xnbp,mblk_t * mp)645843e1988Sjohnlev xnb_to_peer(xnb_t *xnbp, mblk_t *mp)
646843e1988Sjohnlev {
647843e1988Sjohnlev 	mblk_t *free = mp, *prev = NULL;
648843e1988Sjohnlev 	size_t len;
649843e1988Sjohnlev 	gnttab_transfer_t *gop;
650843e1988Sjohnlev 	boolean_t notify;
651843e1988Sjohnlev 	RING_IDX loop, prod, end;
652843e1988Sjohnlev 
653843e1988Sjohnlev 	/*
654843e1988Sjohnlev 	 * For each packet the sequence of operations is:
655843e1988Sjohnlev 	 *
656843e1988Sjohnlev 	 * 1. get a new page from the hypervisor.
657843e1988Sjohnlev 	 * 2. get a request slot from the ring.
658843e1988Sjohnlev 	 * 3. copy the data into the new page.
659843e1988Sjohnlev 	 * 4. transfer the page to the peer.
660843e1988Sjohnlev 	 * 5. update the request slot.
661843e1988Sjohnlev 	 * 6. kick the peer.
662843e1988Sjohnlev 	 * 7. free mp.
663843e1988Sjohnlev 	 *
664843e1988Sjohnlev 	 * In order to reduce the number of hypercalls, we prepare
665843e1988Sjohnlev 	 * several packets for the peer and perform a single hypercall
666843e1988Sjohnlev 	 * to transfer them.
667843e1988Sjohnlev 	 */
668843e1988Sjohnlev 
6692a9992ecSToomas Soome 	len = 0;
670024c26efSMax zhen 	mutex_enter(&xnbp->xnb_rx_lock);
671843e1988Sjohnlev 
672843e1988Sjohnlev 	/*
673843e1988Sjohnlev 	 * If we are not connected to the peer or have not yet
674843e1988Sjohnlev 	 * finished hotplug it is too early to pass packets to the
675843e1988Sjohnlev 	 * peer.
676843e1988Sjohnlev 	 */
677551bc2a6Smrj 	if (!(xnbp->xnb_connected && xnbp->xnb_hotplugged)) {
678024c26efSMax zhen 		mutex_exit(&xnbp->xnb_rx_lock);
679024c26efSMax zhen 		DTRACE_PROBE(flip_rx_too_early);
680024c26efSMax zhen 		xnbp->xnb_stat_rx_too_early++;
681843e1988Sjohnlev 		return (mp);
682843e1988Sjohnlev 	}
683843e1988Sjohnlev 
684551bc2a6Smrj 	loop = xnbp->xnb_rx_ring.req_cons;
685551bc2a6Smrj 	prod = xnbp->xnb_rx_ring.rsp_prod_pvt;
686024c26efSMax zhen 	gop = xnbp->xnb_rx_top;
687843e1988Sjohnlev 
688843e1988Sjohnlev 	while ((mp != NULL) &&
689551bc2a6Smrj 	    XNB_RING_HAS_UNCONSUMED_REQUESTS(&xnbp->xnb_rx_ring)) {
690843e1988Sjohnlev 
691843e1988Sjohnlev 		mfn_t mfn;
692843e1988Sjohnlev 		pfn_t pfn;
693843e1988Sjohnlev 		netif_rx_request_t *rxreq;
694843e1988Sjohnlev 		netif_rx_response_t *rxresp;
695843e1988Sjohnlev 		char *valoop;
696843e1988Sjohnlev 		mblk_t *ml;
697843e1988Sjohnlev 		uint16_t cksum_flags;
698843e1988Sjohnlev 
699843e1988Sjohnlev 		/* 1 */
700843e1988Sjohnlev 		if ((mfn = xnb_alloc_page(xnbp)) == 0) {
701024c26efSMax zhen 			xnbp->xnb_stat_rx_defer++;
702843e1988Sjohnlev 			break;
703843e1988Sjohnlev 		}
704843e1988Sjohnlev 
705843e1988Sjohnlev 		/* 2 */
706551bc2a6Smrj 		rxreq = RING_GET_REQUEST(&xnbp->xnb_rx_ring, loop);
707843e1988Sjohnlev 
708843e1988Sjohnlev #ifdef XNB_DEBUG
709843e1988Sjohnlev 		if (!(rxreq->id < NET_RX_RING_SIZE))
710843e1988Sjohnlev 			cmn_err(CE_PANIC, "xnb_to_peer: "
711843e1988Sjohnlev 			    "id %d out of range in request 0x%p",
712843e1988Sjohnlev 			    rxreq->id, (void *)rxreq);
713843e1988Sjohnlev #endif /* XNB_DEBUG */
714843e1988Sjohnlev 
715843e1988Sjohnlev 		/* Assign a pfn and map the new page at the allocated va. */
716843e1988Sjohnlev 		pfn = xen_assign_pfn(mfn);
717024c26efSMax zhen 		hat_devload(kas.a_hat, xnbp->xnb_rx_va, PAGESIZE,
718843e1988Sjohnlev 		    pfn, PROT_READ | PROT_WRITE, HAT_LOAD);
719843e1988Sjohnlev 
720843e1988Sjohnlev 		/* 3 */
721843e1988Sjohnlev 		len = 0;
7226ac4daadSDavid Edmondson 		valoop = xnbp->xnb_rx_va;
723843e1988Sjohnlev 		for (ml = mp; ml != NULL; ml = ml->b_cont) {
724843e1988Sjohnlev 			size_t chunk = ml->b_wptr - ml->b_rptr;
725843e1988Sjohnlev 
726843e1988Sjohnlev 			bcopy(ml->b_rptr, valoop, chunk);
727843e1988Sjohnlev 			valoop += chunk;
728843e1988Sjohnlev 			len += chunk;
729843e1988Sjohnlev 		}
730843e1988Sjohnlev 
7316ac4daadSDavid Edmondson 		ASSERT(len < PAGESIZE);
732843e1988Sjohnlev 
733843e1988Sjohnlev 		/* Release the pfn. */
734024c26efSMax zhen 		hat_unload(kas.a_hat, xnbp->xnb_rx_va, PAGESIZE,
735843e1988Sjohnlev 		    HAT_UNLOAD_UNMAP);
736843e1988Sjohnlev 		xen_release_pfn(pfn);
737843e1988Sjohnlev 
738843e1988Sjohnlev 		/* 4 */
739843e1988Sjohnlev 		gop->mfn = mfn;
740551bc2a6Smrj 		gop->domid = xnbp->xnb_peer;
741843e1988Sjohnlev 		gop->ref = rxreq->gref;
742843e1988Sjohnlev 
743843e1988Sjohnlev 		/* 5.1 */
744551bc2a6Smrj 		rxresp = RING_GET_RESPONSE(&xnbp->xnb_rx_ring, prod);
7456ac4daadSDavid Edmondson 		rxresp->offset = 0;
746843e1988Sjohnlev 		rxresp->flags = 0;
747843e1988Sjohnlev 
748551bc2a6Smrj 		cksum_flags = xnbp->xnb_flavour->xf_cksum_to_peer(xnbp, mp);
749843e1988Sjohnlev 		if (cksum_flags != 0)
750024c26efSMax zhen 			xnbp->xnb_stat_rx_cksum_deferred++;
751843e1988Sjohnlev 		rxresp->flags |= cksum_flags;
752843e1988Sjohnlev 
753551bc2a6Smrj 		rxresp->id = RING_GET_REQUEST(&xnbp->xnb_rx_ring, prod)->id;
754843e1988Sjohnlev 		rxresp->status = len;
755843e1988Sjohnlev 
756843e1988Sjohnlev 		loop++;
757843e1988Sjohnlev 		prod++;
758843e1988Sjohnlev 		gop++;
759843e1988Sjohnlev 		prev = mp;
760843e1988Sjohnlev 		mp = mp->b_next;
761843e1988Sjohnlev 	}
762843e1988Sjohnlev 
763843e1988Sjohnlev 	/*
764843e1988Sjohnlev 	 * Did we actually do anything?
765843e1988Sjohnlev 	 */
766551bc2a6Smrj 	if (loop == xnbp->xnb_rx_ring.req_cons) {
767024c26efSMax zhen 		mutex_exit(&xnbp->xnb_rx_lock);
768843e1988Sjohnlev 		return (mp);
769843e1988Sjohnlev 	}
770843e1988Sjohnlev 
771843e1988Sjohnlev 	end = loop;
772843e1988Sjohnlev 
773843e1988Sjohnlev 	/*
774843e1988Sjohnlev 	 * Unlink the end of the 'done' list from the remainder.
775843e1988Sjohnlev 	 */
776843e1988Sjohnlev 	ASSERT(prev != NULL);
777843e1988Sjohnlev 	prev->b_next = NULL;
778843e1988Sjohnlev 
779024c26efSMax zhen 	if (HYPERVISOR_grant_table_op(GNTTABOP_transfer, xnbp->xnb_rx_top,
780551bc2a6Smrj 	    loop - xnbp->xnb_rx_ring.req_cons) != 0) {
781843e1988Sjohnlev 		cmn_err(CE_WARN, "xnb_to_peer: transfer operation failed");
782843e1988Sjohnlev 	}
783843e1988Sjohnlev 
784551bc2a6Smrj 	loop = xnbp->xnb_rx_ring.req_cons;
785551bc2a6Smrj 	prod = xnbp->xnb_rx_ring.rsp_prod_pvt;
786024c26efSMax zhen 	gop = xnbp->xnb_rx_top;
787843e1988Sjohnlev 
788843e1988Sjohnlev 	while (loop < end) {
789843e1988Sjohnlev 		int16_t status = NETIF_RSP_OKAY;
790843e1988Sjohnlev 
791843e1988Sjohnlev 		if (gop->status != 0) {
792843e1988Sjohnlev 			status = NETIF_RSP_ERROR;
793843e1988Sjohnlev 
794843e1988Sjohnlev 			/*
795843e1988Sjohnlev 			 * If the status is anything other than
796843e1988Sjohnlev 			 * GNTST_bad_page then we don't own the page
797843e1988Sjohnlev 			 * any more, so don't try to give it back.
798843e1988Sjohnlev 			 */
799843e1988Sjohnlev 			if (gop->status != GNTST_bad_page)
800843e1988Sjohnlev 				gop->mfn = 0;
801843e1988Sjohnlev 		} else {
802843e1988Sjohnlev 			/* The page is no longer ours. */
803843e1988Sjohnlev 			gop->mfn = 0;
804843e1988Sjohnlev 		}
805843e1988Sjohnlev 
806843e1988Sjohnlev 		if (gop->mfn != 0)
807843e1988Sjohnlev 			/*
808843e1988Sjohnlev 			 * Give back the page, as we won't be using
809843e1988Sjohnlev 			 * it.
810843e1988Sjohnlev 			 */
811843e1988Sjohnlev 			xnb_free_page(xnbp, gop->mfn);
812843e1988Sjohnlev 		else
813843e1988Sjohnlev 			/*
814843e1988Sjohnlev 			 * We gave away a page, update our accounting
815843e1988Sjohnlev 			 * now.
816843e1988Sjohnlev 			 */
817843e1988Sjohnlev 			balloon_drv_subtracted(1);
818843e1988Sjohnlev 
819843e1988Sjohnlev 		/* 5.2 */
820843e1988Sjohnlev 		if (status != NETIF_RSP_OKAY) {
821551bc2a6Smrj 			RING_GET_RESPONSE(&xnbp->xnb_rx_ring, prod)->status =
822843e1988Sjohnlev 			    status;
823843e1988Sjohnlev 		} else {
824024c26efSMax zhen 			xnbp->xnb_stat_ipackets++;
825024c26efSMax zhen 			xnbp->xnb_stat_rbytes += len;
826843e1988Sjohnlev 		}
827843e1988Sjohnlev 
828843e1988Sjohnlev 		loop++;
829843e1988Sjohnlev 		prod++;
830843e1988Sjohnlev 		gop++;
831843e1988Sjohnlev 	}
832843e1988Sjohnlev 
833551bc2a6Smrj 	xnbp->xnb_rx_ring.req_cons = loop;
834551bc2a6Smrj 	xnbp->xnb_rx_ring.rsp_prod_pvt = prod;
835843e1988Sjohnlev 
836843e1988Sjohnlev 	/* 6 */
837551bc2a6Smrj 	/* LINTED: constant in conditional context */
838551bc2a6Smrj 	RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&xnbp->xnb_rx_ring, notify);
839843e1988Sjohnlev 	if (notify) {
840551bc2a6Smrj 		ec_notify_via_evtchn(xnbp->xnb_evtchn);
841024c26efSMax zhen 		xnbp->xnb_stat_rx_notify_sent++;
842843e1988Sjohnlev 	} else {
843024c26efSMax zhen 		xnbp->xnb_stat_rx_notify_deferred++;
844843e1988Sjohnlev 	}
845843e1988Sjohnlev 
846843e1988Sjohnlev 	if (mp != NULL)
847024c26efSMax zhen 		xnbp->xnb_stat_rx_defer++;
848843e1988Sjohnlev 
849024c26efSMax zhen 	mutex_exit(&xnbp->xnb_rx_lock);
850843e1988Sjohnlev 
851843e1988Sjohnlev 	/* Free mblk_t's that we consumed. */
852843e1988Sjohnlev 	freemsgchain(free);
853843e1988Sjohnlev 
854843e1988Sjohnlev 	return (mp);
855843e1988Sjohnlev }
856843e1988Sjohnlev 
85756567907SDavid Edmondson /* Helper functions for xnb_copy_to_peer(). */
858551bc2a6Smrj 
859551bc2a6Smrj /*
860551bc2a6Smrj  * Grow the array of copy operation descriptors.
861551bc2a6Smrj  */
86256567907SDavid Edmondson static boolean_t
grow_cpop_area(xnb_t * xnbp)86356567907SDavid Edmondson grow_cpop_area(xnb_t *xnbp)
864551bc2a6Smrj {
86556567907SDavid Edmondson 	size_t count;
86656567907SDavid Edmondson 	gnttab_copy_t *new;
867551bc2a6Smrj 
868024c26efSMax zhen 	ASSERT(MUTEX_HELD(&xnbp->xnb_rx_lock));
869551bc2a6Smrj 
87056567907SDavid Edmondson 	count = xnbp->xnb_rx_cpop_count + CPOP_DEFCNT;
871551bc2a6Smrj 
87256567907SDavid Edmondson 	if ((new = kmem_alloc(sizeof (new[0]) * count, KM_NOSLEEP)) == NULL) {
873551bc2a6Smrj 		xnbp->xnb_stat_other_allocation_failure++;
87456567907SDavid Edmondson 		return (B_FALSE);
875551bc2a6Smrj 	}
876551bc2a6Smrj 
87756567907SDavid Edmondson 	bcopy(xnbp->xnb_rx_cpop, new,
87856567907SDavid Edmondson 	    sizeof (xnbp->xnb_rx_cpop[0]) * xnbp->xnb_rx_cpop_count);
879551bc2a6Smrj 
88056567907SDavid Edmondson 	kmem_free(xnbp->xnb_rx_cpop,
88156567907SDavid Edmondson 	    sizeof (xnbp->xnb_rx_cpop[0]) * xnbp->xnb_rx_cpop_count);
882551bc2a6Smrj 
88356567907SDavid Edmondson 	xnbp->xnb_rx_cpop = new;
88456567907SDavid Edmondson 	xnbp->xnb_rx_cpop_count = count;
885551bc2a6Smrj 
886024c26efSMax zhen 	xnbp->xnb_stat_rx_cpoparea_grown++;
887551bc2a6Smrj 
88856567907SDavid Edmondson 	return (B_TRUE);
889551bc2a6Smrj }
890551bc2a6Smrj 
891551bc2a6Smrj /*
892551bc2a6Smrj  * Check whether an address is on a page that's foreign to this domain.
893551bc2a6Smrj  */
894551bc2a6Smrj static boolean_t
is_foreign(void * addr)895551bc2a6Smrj is_foreign(void *addr)
896551bc2a6Smrj {
89756567907SDavid Edmondson 	pfn_t pfn = hat_getpfnum(kas.a_hat, addr);
898551bc2a6Smrj 
89956567907SDavid Edmondson 	return ((pfn & PFN_IS_FOREIGN_MFN) == PFN_IS_FOREIGN_MFN);
900551bc2a6Smrj }
901551bc2a6Smrj 
902551bc2a6Smrj /*
903551bc2a6Smrj  * Insert a newly allocated mblk into a chain, replacing the old one.
904551bc2a6Smrj  */
905551bc2a6Smrj static mblk_t *
replace_msg(mblk_t * mp,size_t len,mblk_t * mp_prev,mblk_t * ml_prev)906551bc2a6Smrj replace_msg(mblk_t *mp, size_t len, mblk_t *mp_prev, mblk_t *ml_prev)
907551bc2a6Smrj {
908551bc2a6Smrj 	uint32_t	start, stuff, end, value, flags;
909551bc2a6Smrj 	mblk_t		*new_mp;
910551bc2a6Smrj 
911551bc2a6Smrj 	new_mp = copyb(mp);
9120dc2366fSVenugopal Iyer 	if (new_mp == NULL) {
913551bc2a6Smrj 		cmn_err(CE_PANIC, "replace_msg: cannot alloc new message"
914551bc2a6Smrj 		    "for %p, len %lu", (void *) mp, len);
9150dc2366fSVenugopal Iyer 	}
916551bc2a6Smrj 
9170dc2366fSVenugopal Iyer 	mac_hcksum_get(mp, &start, &stuff, &end, &value, &flags);
9180dc2366fSVenugopal Iyer 	mac_hcksum_set(new_mp, start, stuff, end, value, flags);
919551bc2a6Smrj 
920551bc2a6Smrj 	new_mp->b_next = mp->b_next;
921551bc2a6Smrj 	new_mp->b_prev = mp->b_prev;
922551bc2a6Smrj 	new_mp->b_cont = mp->b_cont;
923551bc2a6Smrj 
924551bc2a6Smrj 	/* Make sure we only overwrite pointers to the mblk being replaced. */
925551bc2a6Smrj 	if (mp_prev != NULL && mp_prev->b_next == mp)
926551bc2a6Smrj 		mp_prev->b_next = new_mp;
927551bc2a6Smrj 
928551bc2a6Smrj 	if (ml_prev != NULL && ml_prev->b_cont == mp)
929551bc2a6Smrj 		ml_prev->b_cont = new_mp;
930551bc2a6Smrj 
931551bc2a6Smrj 	mp->b_next = mp->b_prev = mp->b_cont = NULL;
932551bc2a6Smrj 	freemsg(mp);
933551bc2a6Smrj 
934551bc2a6Smrj 	return (new_mp);
935551bc2a6Smrj }
936551bc2a6Smrj 
937551bc2a6Smrj /*
938551bc2a6Smrj  * Set all the fields in a gnttab_copy_t.
939551bc2a6Smrj  */
940551bc2a6Smrj static void
setup_gop(xnb_t * xnbp,gnttab_copy_t * gp,uchar_t * rptr,size_t s_off,size_t d_off,size_t len,grant_ref_t d_ref)941551bc2a6Smrj setup_gop(xnb_t *xnbp, gnttab_copy_t *gp, uchar_t *rptr,
942551bc2a6Smrj     size_t s_off, size_t d_off, size_t len, grant_ref_t d_ref)
943551bc2a6Smrj {
944551bc2a6Smrj 	ASSERT(xnbp != NULL && gp != NULL);
945551bc2a6Smrj 
946551bc2a6Smrj 	gp->source.offset = s_off;
947551bc2a6Smrj 	gp->source.u.gmfn = pfn_to_mfn(hat_getpfnum(kas.a_hat, (caddr_t)rptr));
948551bc2a6Smrj 	gp->source.domid = DOMID_SELF;
949551bc2a6Smrj 
950551bc2a6Smrj 	gp->len = (uint16_t)len;
951551bc2a6Smrj 	gp->flags = GNTCOPY_dest_gref;
952551bc2a6Smrj 	gp->status = 0;
953551bc2a6Smrj 
954551bc2a6Smrj 	gp->dest.u.ref = d_ref;
955551bc2a6Smrj 	gp->dest.offset = d_off;
956551bc2a6Smrj 	gp->dest.domid = xnbp->xnb_peer;
957551bc2a6Smrj }
958551bc2a6Smrj 
95956567907SDavid Edmondson /*
96056567907SDavid Edmondson  * Pass packets to the peer using hypervisor copy operations.
96156567907SDavid Edmondson  */
962551bc2a6Smrj mblk_t *
xnb_copy_to_peer(xnb_t * xnbp,mblk_t * mp)963551bc2a6Smrj xnb_copy_to_peer(xnb_t *xnbp, mblk_t *mp)
964551bc2a6Smrj {
965551bc2a6Smrj 	mblk_t		*free = mp, *mp_prev = NULL, *saved_mp = mp;
966551bc2a6Smrj 	mblk_t		*ml, *ml_prev;
967551bc2a6Smrj 	boolean_t	notify;
968551bc2a6Smrj 	RING_IDX	loop, prod;
969551bc2a6Smrj 	int		i;
970551bc2a6Smrj 
97156567907SDavid Edmondson 	/*
97256567907SDavid Edmondson 	 * If the peer does not pre-post buffers for received packets,
97356567907SDavid Edmondson 	 * use page flipping to pass packets to it.
97456567907SDavid Edmondson 	 */
97556567907SDavid Edmondson 	if (!xnbp->xnb_rx_hv_copy)
976551bc2a6Smrj 		return (xnb_to_peer(xnbp, mp));
977551bc2a6Smrj 
978551bc2a6Smrj 	/*
979551bc2a6Smrj 	 * For each packet the sequence of operations is:
980551bc2a6Smrj 	 *
981551bc2a6Smrj 	 *  1. get a request slot from the ring.
982551bc2a6Smrj 	 *  2. set up data for hypercall (see NOTE below)
983551bc2a6Smrj 	 *  3. have the hypervisore copy the data
984551bc2a6Smrj 	 *  4. update the request slot.
985551bc2a6Smrj 	 *  5. kick the peer.
986551bc2a6Smrj 	 *
987551bc2a6Smrj 	 * NOTE ad 2.
988551bc2a6Smrj 	 *  In order to reduce the number of hypercalls, we prepare
98956567907SDavid Edmondson 	 *  several mblks (mp->b_cont != NULL) for the peer and
99056567907SDavid Edmondson 	 *  perform a single hypercall to transfer them.  We also have
99156567907SDavid Edmondson 	 *  to set up a seperate copy operation for every page.
992551bc2a6Smrj 	 *
99356567907SDavid Edmondson 	 * If we have more than one packet (mp->b_next != NULL), we do
99456567907SDavid Edmondson 	 * this whole dance repeatedly.
995551bc2a6Smrj 	 */
996551bc2a6Smrj 
997024c26efSMax zhen 	mutex_enter(&xnbp->xnb_rx_lock);
998551bc2a6Smrj 
999551bc2a6Smrj 	if (!(xnbp->xnb_connected && xnbp->xnb_hotplugged)) {
1000024c26efSMax zhen 		mutex_exit(&xnbp->xnb_rx_lock);
1001024c26efSMax zhen 		DTRACE_PROBE(copy_rx_too_early);
1002024c26efSMax zhen 		xnbp->xnb_stat_rx_too_early++;
1003551bc2a6Smrj 		return (mp);
1004551bc2a6Smrj 	}
1005551bc2a6Smrj 
1006551bc2a6Smrj 	loop = xnbp->xnb_rx_ring.req_cons;
1007551bc2a6Smrj 	prod = xnbp->xnb_rx_ring.rsp_prod_pvt;
1008551bc2a6Smrj 
1009551bc2a6Smrj 	while ((mp != NULL) &&
1010551bc2a6Smrj 	    XNB_RING_HAS_UNCONSUMED_REQUESTS(&xnbp->xnb_rx_ring)) {
1011551bc2a6Smrj 		netif_rx_request_t	*rxreq;
101256567907SDavid Edmondson 		size_t			d_offset, len;
101356567907SDavid Edmondson 		int			item_count;
101456567907SDavid Edmondson 		gnttab_copy_t		*gop_cp;
1015551bc2a6Smrj 		netif_rx_response_t	*rxresp;
1016551bc2a6Smrj 		uint16_t		cksum_flags;
1017551bc2a6Smrj 		int16_t			status = NETIF_RSP_OKAY;
1018551bc2a6Smrj 
1019551bc2a6Smrj 		/* 1 */
1020551bc2a6Smrj 		rxreq = RING_GET_REQUEST(&xnbp->xnb_rx_ring, loop);
1021551bc2a6Smrj 
1022551bc2a6Smrj #ifdef XNB_DEBUG
1023551bc2a6Smrj 		if (!(rxreq->id < NET_RX_RING_SIZE))
1024551bc2a6Smrj 			cmn_err(CE_PANIC, "xnb_copy_to_peer: "
1025551bc2a6Smrj 			    "id %d out of range in request 0x%p",
1026551bc2a6Smrj 			    rxreq->id, (void *)rxreq);
1027551bc2a6Smrj #endif /* XNB_DEBUG */
1028551bc2a6Smrj 
1029551bc2a6Smrj 		/* 2 */
10306ac4daadSDavid Edmondson 		d_offset = 0;
1031551bc2a6Smrj 		len = 0;
1032551bc2a6Smrj 		item_count = 0;
1033551bc2a6Smrj 
1034024c26efSMax zhen 		gop_cp = xnbp->xnb_rx_cpop;
1035551bc2a6Smrj 
1036551bc2a6Smrj 		/*
103756567907SDavid Edmondson 		 * We walk the b_cont pointers and set up a
103856567907SDavid Edmondson 		 * gnttab_copy_t for each sub-page chunk in each data
103956567907SDavid Edmondson 		 * block.
1040551bc2a6Smrj 		 */
1041551bc2a6Smrj 		/* 2a */
1042551bc2a6Smrj 		for (ml = mp, ml_prev = NULL; ml != NULL; ml = ml->b_cont) {
1043551bc2a6Smrj 			size_t	chunk = ml->b_wptr - ml->b_rptr;
1044551bc2a6Smrj 			uchar_t	*r_tmp,	*rpt_align;
1045551bc2a6Smrj 			size_t	r_offset;
1046551bc2a6Smrj 
1047551bc2a6Smrj 			/*
104856567907SDavid Edmondson 			 * The hypervisor will not allow us to
104956567907SDavid Edmondson 			 * reference a foreign page (e.g. one
105056567907SDavid Edmondson 			 * belonging to another domain) by mfn in the
105156567907SDavid Edmondson 			 * copy operation. If the data in this mblk is
105256567907SDavid Edmondson 			 * on such a page we must copy the data into a
105356567907SDavid Edmondson 			 * local page before initiating the hypervisor
105456567907SDavid Edmondson 			 * copy operation.
1055551bc2a6Smrj 			 */
1056551bc2a6Smrj 			if (is_foreign(ml->b_rptr) || is_foreign(ml->b_wptr)) {
1057551bc2a6Smrj 				mblk_t *ml_new = replace_msg(ml, chunk,
1058551bc2a6Smrj 				    mp_prev, ml_prev);
1059551bc2a6Smrj 
1060551bc2a6Smrj 				/* We can still use old ml, but not *ml! */
1061551bc2a6Smrj 				if (free == ml)
1062551bc2a6Smrj 					free = ml_new;
1063551bc2a6Smrj 				if (mp == ml)
1064551bc2a6Smrj 					mp = ml_new;
1065551bc2a6Smrj 				ml = ml_new;
1066551bc2a6Smrj 
1067024c26efSMax zhen 				xnbp->xnb_stat_rx_foreign_page++;
1068551bc2a6Smrj 			}
1069551bc2a6Smrj 
1070551bc2a6Smrj 			rpt_align = (uchar_t *)ALIGN2PAGE(ml->b_rptr);
1071551bc2a6Smrj 			r_offset = (uint16_t)(ml->b_rptr - rpt_align);
1072551bc2a6Smrj 			r_tmp = ml->b_rptr;
1073551bc2a6Smrj 
1074551bc2a6Smrj 			if (d_offset + chunk > PAGESIZE)
1075551bc2a6Smrj 				cmn_err(CE_PANIC, "xnb_copy_to_peer: mp %p "
1076551bc2a6Smrj 				    "(svd: %p), ml %p,rpt_alg. %p, d_offset "
1077551bc2a6Smrj 				    "(%lu) + chunk (%lu) > PAGESIZE %d!",
1078551bc2a6Smrj 				    (void *)mp, (void *)saved_mp, (void *)ml,
1079551bc2a6Smrj 				    (void *)rpt_align,
1080551bc2a6Smrj 				    d_offset, chunk, (int)PAGESIZE);
1081551bc2a6Smrj 
1082551bc2a6Smrj 			while (chunk > 0) {
1083551bc2a6Smrj 				size_t part_len;
1084551bc2a6Smrj 
108556567907SDavid Edmondson 				if (item_count == xnbp->xnb_rx_cpop_count) {
108656567907SDavid Edmondson 					if (!grow_cpop_area(xnbp))
1087551bc2a6Smrj 						goto failure;
108856567907SDavid Edmondson 					gop_cp = &xnbp->xnb_rx_cpop[item_count];
1089551bc2a6Smrj 				}
1090551bc2a6Smrj 				/*
1091551bc2a6Smrj 				 * If our mblk crosses a page boundary, we need
109256567907SDavid Edmondson 				 * to do a seperate copy for each page.
1093551bc2a6Smrj 				 */
1094551bc2a6Smrj 				if (r_offset + chunk > PAGESIZE) {
1095551bc2a6Smrj 					part_len = PAGESIZE - r_offset;
1096551bc2a6Smrj 
1097551bc2a6Smrj 					DTRACE_PROBE3(mblk_page_crossed,
1098551bc2a6Smrj 					    (mblk_t *), ml, int, chunk, int,
1099551bc2a6Smrj 					    (int)r_offset);
1100551bc2a6Smrj 
1101024c26efSMax zhen 					xnbp->xnb_stat_rx_pagebndry_crossed++;
1102551bc2a6Smrj 				} else {
1103551bc2a6Smrj 					part_len = chunk;
1104551bc2a6Smrj 				}
1105551bc2a6Smrj 
1106551bc2a6Smrj 				setup_gop(xnbp, gop_cp, r_tmp, r_offset,
1107551bc2a6Smrj 				    d_offset, part_len, rxreq->gref);
1108551bc2a6Smrj 
1109551bc2a6Smrj 				chunk -= part_len;
1110551bc2a6Smrj 
1111551bc2a6Smrj 				len += part_len;
1112551bc2a6Smrj 				d_offset += part_len;
1113551bc2a6Smrj 				r_tmp += part_len;
1114551bc2a6Smrj 				/*
1115551bc2a6Smrj 				 * The 2nd, 3rd ... last copies will always
1116551bc2a6Smrj 				 * start at r_tmp, therefore r_offset is 0.
1117551bc2a6Smrj 				 */
1118551bc2a6Smrj 				r_offset = 0;
1119551bc2a6Smrj 				gop_cp++;
112056567907SDavid Edmondson 				item_count++;
1121551bc2a6Smrj 			}
1122551bc2a6Smrj 			ml_prev = ml;
112356567907SDavid Edmondson 
1124551bc2a6Smrj 			DTRACE_PROBE4(mblk_loop_end, (mblk_t *), ml, int,
1125551bc2a6Smrj 			    chunk, int, len, int, item_count);
1126551bc2a6Smrj 		}
1127551bc2a6Smrj 		/* 3 */
1128024c26efSMax zhen 		if (HYPERVISOR_grant_table_op(GNTTABOP_copy, xnbp->xnb_rx_cpop,
1129551bc2a6Smrj 		    item_count) != 0) {
1130551bc2a6Smrj 			cmn_err(CE_WARN, "xnb_copy_to_peer: copy op. failed");
1131551bc2a6Smrj 			DTRACE_PROBE(HV_granttableopfailed);
1132551bc2a6Smrj 		}
1133551bc2a6Smrj 
1134551bc2a6Smrj 		/* 4 */
1135551bc2a6Smrj 		rxresp = RING_GET_RESPONSE(&xnbp->xnb_rx_ring, prod);
11366ac4daadSDavid Edmondson 		rxresp->offset = 0;
1137551bc2a6Smrj 
1138551bc2a6Smrj 		rxresp->flags = 0;
1139551bc2a6Smrj 
1140551bc2a6Smrj 		DTRACE_PROBE4(got_RX_rsp, int, (int)rxresp->id, int,
1141551bc2a6Smrj 		    (int)rxresp->offset, int, (int)rxresp->flags, int,
1142551bc2a6Smrj 		    (int)rxresp->status);
1143551bc2a6Smrj 
1144551bc2a6Smrj 		cksum_flags = xnbp->xnb_flavour->xf_cksum_to_peer(xnbp, mp);
1145551bc2a6Smrj 		if (cksum_flags != 0)
1146024c26efSMax zhen 			xnbp->xnb_stat_rx_cksum_deferred++;
1147551bc2a6Smrj 		rxresp->flags |= cksum_flags;
1148551bc2a6Smrj 
1149551bc2a6Smrj 		rxresp->id = RING_GET_REQUEST(&xnbp->xnb_rx_ring, prod)->id;
1150551bc2a6Smrj 		rxresp->status = len;
1151551bc2a6Smrj 
1152551bc2a6Smrj 		DTRACE_PROBE4(RX_rsp_set, int, (int)rxresp->id, int,
1153551bc2a6Smrj 		    (int)rxresp->offset, int, (int)rxresp->flags, int,
1154551bc2a6Smrj 		    (int)rxresp->status);
1155551bc2a6Smrj 
1156551bc2a6Smrj 		for (i = 0; i < item_count; i++) {
1157024c26efSMax zhen 			if (xnbp->xnb_rx_cpop[i].status != 0) {
115856567907SDavid Edmondson 				DTRACE_PROBE2(cpop_status_nonnull, int,
1159024c26efSMax zhen 				    (int)xnbp->xnb_rx_cpop[i].status,
1160551bc2a6Smrj 				    int, i);
1161551bc2a6Smrj 				status = NETIF_RSP_ERROR;
1162551bc2a6Smrj 			}
1163551bc2a6Smrj 		}
1164551bc2a6Smrj 
1165551bc2a6Smrj 		/* 5.2 */
1166551bc2a6Smrj 		if (status != NETIF_RSP_OKAY) {
1167551bc2a6Smrj 			RING_GET_RESPONSE(&xnbp->xnb_rx_ring, prod)->status =
1168551bc2a6Smrj 			    status;
1169024c26efSMax zhen 			xnbp->xnb_stat_rx_rsp_notok++;
1170551bc2a6Smrj 		} else {
1171024c26efSMax zhen 			xnbp->xnb_stat_ipackets++;
1172024c26efSMax zhen 			xnbp->xnb_stat_rbytes += len;
1173551bc2a6Smrj 		}
1174551bc2a6Smrj 
1175551bc2a6Smrj 		loop++;
1176551bc2a6Smrj 		prod++;
1177551bc2a6Smrj 		mp_prev = mp;
1178551bc2a6Smrj 		mp = mp->b_next;
1179551bc2a6Smrj 	}
1180551bc2a6Smrj failure:
1181551bc2a6Smrj 	/*
1182551bc2a6Smrj 	 * Did we actually do anything?
1183551bc2a6Smrj 	 */
1184551bc2a6Smrj 	if (loop == xnbp->xnb_rx_ring.req_cons) {
1185024c26efSMax zhen 		mutex_exit(&xnbp->xnb_rx_lock);
1186551bc2a6Smrj 		return (mp);
1187551bc2a6Smrj 	}
1188551bc2a6Smrj 
1189551bc2a6Smrj 	/*
1190551bc2a6Smrj 	 * Unlink the end of the 'done' list from the remainder.
1191551bc2a6Smrj 	 */
1192551bc2a6Smrj 	ASSERT(mp_prev != NULL);
1193551bc2a6Smrj 	mp_prev->b_next = NULL;
1194551bc2a6Smrj 
1195551bc2a6Smrj 	xnbp->xnb_rx_ring.req_cons = loop;
1196551bc2a6Smrj 	xnbp->xnb_rx_ring.rsp_prod_pvt = prod;
1197551bc2a6Smrj 
1198551bc2a6Smrj 	/* 6 */
1199551bc2a6Smrj 	/* LINTED: constant in conditional context */
1200551bc2a6Smrj 	RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&xnbp->xnb_rx_ring, notify);
1201551bc2a6Smrj 	if (notify) {
1202551bc2a6Smrj 		ec_notify_via_evtchn(xnbp->xnb_evtchn);
1203024c26efSMax zhen 		xnbp->xnb_stat_rx_notify_sent++;
1204551bc2a6Smrj 	} else {
1205024c26efSMax zhen 		xnbp->xnb_stat_rx_notify_deferred++;
1206551bc2a6Smrj 	}
1207551bc2a6Smrj 
1208551bc2a6Smrj 	if (mp != NULL)
1209024c26efSMax zhen 		xnbp->xnb_stat_rx_defer++;
1210551bc2a6Smrj 
1211024c26efSMax zhen 	mutex_exit(&xnbp->xnb_rx_lock);
1212551bc2a6Smrj 
1213551bc2a6Smrj 	/* Free mblk_t structs we have consumed. */
1214551bc2a6Smrj 	freemsgchain(free);
1215551bc2a6Smrj 
1216551bc2a6Smrj 	return (mp);
1217551bc2a6Smrj }
1218551bc2a6Smrj 
1219843e1988Sjohnlev 
1220843e1988Sjohnlev static void
xnb_tx_notify_peer(xnb_t * xnbp,boolean_t force)122156567907SDavid Edmondson xnb_tx_notify_peer(xnb_t *xnbp, boolean_t force)
1222843e1988Sjohnlev {
1223843e1988Sjohnlev 	boolean_t notify;
1224843e1988Sjohnlev 
1225024c26efSMax zhen 	ASSERT(MUTEX_HELD(&xnbp->xnb_tx_lock));
1226843e1988Sjohnlev 
1227551bc2a6Smrj 	/* LINTED: constant in conditional context */
1228551bc2a6Smrj 	RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&xnbp->xnb_tx_ring, notify);
122956567907SDavid Edmondson 	if (notify || force) {
1230551bc2a6Smrj 		ec_notify_via_evtchn(xnbp->xnb_evtchn);
1231024c26efSMax zhen 		xnbp->xnb_stat_tx_notify_sent++;
1232843e1988Sjohnlev 	} else {
1233024c26efSMax zhen 		xnbp->xnb_stat_tx_notify_deferred++;
1234843e1988Sjohnlev 	}
1235843e1988Sjohnlev }
1236843e1988Sjohnlev 
1237843e1988Sjohnlev static void
xnb_tx_mark_complete(xnb_t * xnbp,RING_IDX id,int16_t status)1238024c26efSMax zhen xnb_tx_mark_complete(xnb_t *xnbp, RING_IDX id, int16_t status)
1239843e1988Sjohnlev {
1240843e1988Sjohnlev 	RING_IDX i;
1241843e1988Sjohnlev 	netif_tx_response_t *txresp;
1242843e1988Sjohnlev 
1243024c26efSMax zhen 	ASSERT(MUTEX_HELD(&xnbp->xnb_tx_lock));
1244843e1988Sjohnlev 
1245551bc2a6Smrj 	i = xnbp->xnb_tx_ring.rsp_prod_pvt;
1246843e1988Sjohnlev 
1247551bc2a6Smrj 	txresp = RING_GET_RESPONSE(&xnbp->xnb_tx_ring, i);
1248843e1988Sjohnlev 	txresp->id = id;
1249843e1988Sjohnlev 	txresp->status = status;
1250843e1988Sjohnlev 
1251551bc2a6Smrj 	xnbp->xnb_tx_ring.rsp_prod_pvt = i + 1;
1252843e1988Sjohnlev 
1253843e1988Sjohnlev 	/*
1254843e1988Sjohnlev 	 * Note that we don't push the change to the peer here - that
1255843e1988Sjohnlev 	 * is the callers responsibility.
1256843e1988Sjohnlev 	 */
1257843e1988Sjohnlev }
1258843e1988Sjohnlev 
1259843e1988Sjohnlev static void
xnb_txbuf_recycle(xnb_txbuf_t * txp)126056567907SDavid Edmondson xnb_txbuf_recycle(xnb_txbuf_t *txp)
1261843e1988Sjohnlev {
126256567907SDavid Edmondson 	xnb_t *xnbp = txp->xt_xnbp;
1263843e1988Sjohnlev 
126456567907SDavid Edmondson 	kmem_cache_free(xnbp->xnb_tx_buf_cache, txp);
1265551bc2a6Smrj 
126656567907SDavid Edmondson 	xnbp->xnb_tx_buf_outstanding--;
1267843e1988Sjohnlev }
1268843e1988Sjohnlev 
126956567907SDavid Edmondson static int
xnb_txbuf_constructor(void * buf,void * arg,int kmflag)127056567907SDavid Edmondson xnb_txbuf_constructor(void *buf, void *arg, int kmflag)
1271843e1988Sjohnlev {
127256567907SDavid Edmondson 	_NOTE(ARGUNUSED(kmflag));
127356567907SDavid Edmondson 	xnb_txbuf_t *txp = buf;
127456567907SDavid Edmondson 	xnb_t *xnbp = arg;
127556567907SDavid Edmondson 	size_t len;
127656567907SDavid Edmondson 	ddi_dma_cookie_t dma_cookie;
127756567907SDavid Edmondson 	uint_t ncookies;
1278843e1988Sjohnlev 
127956567907SDavid Edmondson 	txp->xt_free_rtn.free_func = xnb_txbuf_recycle;
128056567907SDavid Edmondson 	txp->xt_free_rtn.free_arg = (caddr_t)txp;
128156567907SDavid Edmondson 	txp->xt_xnbp = xnbp;
128256567907SDavid Edmondson 	txp->xt_next = NULL;
1283551bc2a6Smrj 
128456567907SDavid Edmondson 	if (ddi_dma_alloc_handle(xnbp->xnb_devinfo, &buf_dma_attr,
128556567907SDavid Edmondson 	    0, 0, &txp->xt_dma_handle) != DDI_SUCCESS)
128656567907SDavid Edmondson 		goto failure;
1287551bc2a6Smrj 
128856567907SDavid Edmondson 	if (ddi_dma_mem_alloc(txp->xt_dma_handle, PAGESIZE, &data_accattr,
128956567907SDavid Edmondson 	    DDI_DMA_STREAMING, 0, 0, &txp->xt_buf, &len,
129056567907SDavid Edmondson 	    &txp->xt_acc_handle) != DDI_SUCCESS)
129156567907SDavid Edmondson 		goto failure_1;
1292551bc2a6Smrj 
129356567907SDavid Edmondson 	if (ddi_dma_addr_bind_handle(txp->xt_dma_handle, NULL, txp->xt_buf,
129456567907SDavid Edmondson 	    len, DDI_DMA_RDWR | DDI_DMA_STREAMING, DDI_DMA_DONTWAIT, 0,
129556567907SDavid Edmondson 	    &dma_cookie, &ncookies)
129656567907SDavid Edmondson 	    != DDI_DMA_MAPPED)
129756567907SDavid Edmondson 		goto failure_2;
129856567907SDavid Edmondson 	ASSERT(ncookies == 1);
1299843e1988Sjohnlev 
130056567907SDavid Edmondson 	txp->xt_mfn = xnb_btop(dma_cookie.dmac_laddress);
130156567907SDavid Edmondson 	txp->xt_buflen = dma_cookie.dmac_size;
1302843e1988Sjohnlev 
130356567907SDavid Edmondson 	DTRACE_PROBE(txbuf_allocated);
1304843e1988Sjohnlev 
13051a5e258fSJosef 'Jeff' Sipek 	atomic_inc_32(&xnbp->xnb_tx_buf_count);
130656567907SDavid Edmondson 	xnbp->xnb_tx_buf_outstanding++;
1307843e1988Sjohnlev 
130856567907SDavid Edmondson 	return (0);
1309843e1988Sjohnlev 
131056567907SDavid Edmondson failure_2:
131156567907SDavid Edmondson 	ddi_dma_mem_free(&txp->xt_acc_handle);
1312843e1988Sjohnlev 
131356567907SDavid Edmondson failure_1:
131456567907SDavid Edmondson 	ddi_dma_free_handle(&txp->xt_dma_handle);
1315843e1988Sjohnlev 
131656567907SDavid Edmondson failure:
1317843e1988Sjohnlev 
131856567907SDavid Edmondson 	return (-1);
1319843e1988Sjohnlev }
1320843e1988Sjohnlev 
1321843e1988Sjohnlev static void
xnb_txbuf_destructor(void * buf,void * arg)132256567907SDavid Edmondson xnb_txbuf_destructor(void *buf, void *arg)
1323843e1988Sjohnlev {
132456567907SDavid Edmondson 	xnb_txbuf_t *txp = buf;
132556567907SDavid Edmondson 	xnb_t *xnbp = arg;
1326843e1988Sjohnlev 
132756567907SDavid Edmondson 	(void) ddi_dma_unbind_handle(txp->xt_dma_handle);
132856567907SDavid Edmondson 	ddi_dma_mem_free(&txp->xt_acc_handle);
132956567907SDavid Edmondson 	ddi_dma_free_handle(&txp->xt_dma_handle);
1330843e1988Sjohnlev 
13311a5e258fSJosef 'Jeff' Sipek 	atomic_dec_32(&xnbp->xnb_tx_buf_count);
1332843e1988Sjohnlev }
1333843e1988Sjohnlev 
133456567907SDavid Edmondson /*
133556567907SDavid Edmondson  * Take packets from the peer and deliver them onward.
133656567907SDavid Edmondson  */
1337843e1988Sjohnlev static mblk_t *
xnb_from_peer(xnb_t * xnbp)1338024c26efSMax zhen xnb_from_peer(xnb_t *xnbp)
1339843e1988Sjohnlev {
1340843e1988Sjohnlev 	RING_IDX start, end, loop;
134156567907SDavid Edmondson 	gnttab_copy_t *cop;
1342024c26efSMax zhen 	xnb_txbuf_t **txpp;
1343843e1988Sjohnlev 	netif_tx_request_t *txreq;
134456567907SDavid Edmondson 	boolean_t work_to_do, need_notify = B_FALSE;
1345843e1988Sjohnlev 	mblk_t *head, *tail;
134656567907SDavid Edmondson 	int n_data_req, i;
1347843e1988Sjohnlev 
134856567907SDavid Edmondson 	ASSERT(MUTEX_HELD(&xnbp->xnb_tx_lock));
1349843e1988Sjohnlev 
1350843e1988Sjohnlev 	head = tail = NULL;
1351843e1988Sjohnlev around:
1352843e1988Sjohnlev 
1353551bc2a6Smrj 	/* LINTED: constant in conditional context */
1354551bc2a6Smrj 	RING_FINAL_CHECK_FOR_REQUESTS(&xnbp->xnb_tx_ring, work_to_do);
1355843e1988Sjohnlev 	if (!work_to_do) {
1356843e1988Sjohnlev finished:
135756567907SDavid Edmondson 		xnb_tx_notify_peer(xnbp, need_notify);
135856567907SDavid Edmondson 
1359843e1988Sjohnlev 		return (head);
1360843e1988Sjohnlev 	}
1361843e1988Sjohnlev 
1362551bc2a6Smrj 	start = xnbp->xnb_tx_ring.req_cons;
1363551bc2a6Smrj 	end = xnbp->xnb_tx_ring.sring->req_prod;
1364843e1988Sjohnlev 
1365a8e7f927SDavid Edmondson 	if ((end - start) > NET_TX_RING_SIZE) {
1366a8e7f927SDavid Edmondson 		/*
1367a8e7f927SDavid Edmondson 		 * This usually indicates that the frontend driver is
1368a8e7f927SDavid Edmondson 		 * misbehaving, as it's not possible to have more than
1369a8e7f927SDavid Edmondson 		 * NET_TX_RING_SIZE ring elements in play at any one
1370a8e7f927SDavid Edmondson 		 * time.
1371a8e7f927SDavid Edmondson 		 *
1372a8e7f927SDavid Edmondson 		 * We reset the ring pointers to the state declared by
1373a8e7f927SDavid Edmondson 		 * the frontend and try to carry on.
1374a8e7f927SDavid Edmondson 		 */
1375a8e7f927SDavid Edmondson 		cmn_err(CE_WARN, "xnb_from_peer: domain %d tried to give us %u "
1376a8e7f927SDavid Edmondson 		    "items in the ring, resetting and trying to recover.",
1377a8e7f927SDavid Edmondson 		    xnbp->xnb_peer, (end - start));
1378a8e7f927SDavid Edmondson 
1379a8e7f927SDavid Edmondson 		/* LINTED: constant in conditional context */
1380a8e7f927SDavid Edmondson 		BACK_RING_ATTACH(&xnbp->xnb_tx_ring,
1381a8e7f927SDavid Edmondson 		    (netif_tx_sring_t *)xnbp->xnb_tx_ring_addr, PAGESIZE);
1382a8e7f927SDavid Edmondson 
1383a8e7f927SDavid Edmondson 		goto around;
1384a8e7f927SDavid Edmondson 	}
1385a8e7f927SDavid Edmondson 
138656567907SDavid Edmondson 	loop = start;
138756567907SDavid Edmondson 	cop = xnbp->xnb_tx_cop;
138856567907SDavid Edmondson 	txpp = xnbp->xnb_tx_bufp;
138956567907SDavid Edmondson 	n_data_req = 0;
1390843e1988Sjohnlev 
139156567907SDavid Edmondson 	while (loop < end) {
1392fd0939efSDavid Edmondson 		static const uint16_t acceptable_flags =
1393fd0939efSDavid Edmondson 		    NETTXF_csum_blank |
1394fd0939efSDavid Edmondson 		    NETTXF_data_validated |
1395fd0939efSDavid Edmondson 		    NETTXF_extra_info;
1396fd0939efSDavid Edmondson 		uint16_t unexpected_flags;
1397fd0939efSDavid Edmondson 
139856567907SDavid Edmondson 		txreq = RING_GET_REQUEST(&xnbp->xnb_tx_ring, loop);
1399843e1988Sjohnlev 
1400fd0939efSDavid Edmondson 		unexpected_flags = txreq->flags & ~acceptable_flags;
1401fd0939efSDavid Edmondson 		if (unexpected_flags != 0) {
1402fd0939efSDavid Edmondson 			/*
1403fd0939efSDavid Edmondson 			 * The peer used flag bits that we do not
1404fd0939efSDavid Edmondson 			 * recognize.
1405fd0939efSDavid Edmondson 			 */
1406fd0939efSDavid Edmondson 			cmn_err(CE_WARN, "xnb_from_peer: "
1407fd0939efSDavid Edmondson 			    "unexpected flag bits (0x%x) from peer "
1408fd0939efSDavid Edmondson 			    "in transmit request",
1409fd0939efSDavid Edmondson 			    unexpected_flags);
1410fd0939efSDavid Edmondson 			xnbp->xnb_stat_tx_unexpected_flags++;
1411fd0939efSDavid Edmondson 
1412fd0939efSDavid Edmondson 			/* Mark this entry as failed. */
1413fd0939efSDavid Edmondson 			xnb_tx_mark_complete(xnbp, txreq->id, NETIF_RSP_ERROR);
1414fd0939efSDavid Edmondson 			need_notify = B_TRUE;
1415fd0939efSDavid Edmondson 
1416fd0939efSDavid Edmondson 		} else if (txreq->flags & NETTXF_extra_info) {
141756567907SDavid Edmondson 			struct netif_extra_info *erp;
141856567907SDavid Edmondson 			boolean_t status;
141956567907SDavid Edmondson 
142056567907SDavid Edmondson 			loop++; /* Consume another slot in the ring. */
142156567907SDavid Edmondson 			ASSERT(loop <= end);
142256567907SDavid Edmondson 
142356567907SDavid Edmondson 			erp = (struct netif_extra_info *)
142456567907SDavid Edmondson 			    RING_GET_REQUEST(&xnbp->xnb_tx_ring, loop);
142556567907SDavid Edmondson 
142656567907SDavid Edmondson 			switch (erp->type) {
142756567907SDavid Edmondson 			case XEN_NETIF_EXTRA_TYPE_MCAST_ADD:
142856567907SDavid Edmondson 				ASSERT(xnbp->xnb_multicast_control);
142956567907SDavid Edmondson 				status = xnbp->xnb_flavour->xf_mcast_add(xnbp,
143056567907SDavid Edmondson 				    &erp->u.mcast.addr);
143156567907SDavid Edmondson 				break;
143256567907SDavid Edmondson 			case XEN_NETIF_EXTRA_TYPE_MCAST_DEL:
143356567907SDavid Edmondson 				ASSERT(xnbp->xnb_multicast_control);
143456567907SDavid Edmondson 				status = xnbp->xnb_flavour->xf_mcast_del(xnbp,
143556567907SDavid Edmondson 				    &erp->u.mcast.addr);
143656567907SDavid Edmondson 				break;
143756567907SDavid Edmondson 			default:
143856567907SDavid Edmondson 				status = B_FALSE;
143956567907SDavid Edmondson 				cmn_err(CE_WARN, "xnb_from_peer: "
144056567907SDavid Edmondson 				    "unknown extra type %d", erp->type);
144156567907SDavid Edmondson 				break;
144256567907SDavid Edmondson 			}
1443843e1988Sjohnlev 
144456567907SDavid Edmondson 			xnb_tx_mark_complete(xnbp, txreq->id,
144556567907SDavid Edmondson 			    status ? NETIF_RSP_OKAY : NETIF_RSP_ERROR);
144656567907SDavid Edmondson 			need_notify = B_TRUE;
1447fd0939efSDavid Edmondson 
1448fd0939efSDavid Edmondson 		} else if ((txreq->offset > PAGESIZE) ||
1449fd0939efSDavid Edmondson 		    (txreq->offset + txreq->size > PAGESIZE)) {
1450fd0939efSDavid Edmondson 			/*
1451fd0939efSDavid Edmondson 			 * Peer attempted to refer to data beyond the
1452fd0939efSDavid Edmondson 			 * end of the granted page.
1453fd0939efSDavid Edmondson 			 */
1454fd0939efSDavid Edmondson 			cmn_err(CE_WARN, "xnb_from_peer: "
1455fd0939efSDavid Edmondson 			    "attempt to refer beyond the end of granted "
1456fd0939efSDavid Edmondson 			    "page in txreq (offset %d, size %d).",
1457fd0939efSDavid Edmondson 			    txreq->offset, txreq->size);
1458fd0939efSDavid Edmondson 			xnbp->xnb_stat_tx_overflow_page++;
1459fd0939efSDavid Edmondson 
1460fd0939efSDavid Edmondson 			/* Mark this entry as failed. */
1461fd0939efSDavid Edmondson 			xnb_tx_mark_complete(xnbp, txreq->id, NETIF_RSP_ERROR);
1462fd0939efSDavid Edmondson 			need_notify = B_TRUE;
1463fd0939efSDavid Edmondson 
146456567907SDavid Edmondson 		} else {
146556567907SDavid Edmondson 			xnb_txbuf_t *txp;
146656567907SDavid Edmondson 
146756567907SDavid Edmondson 			txp = kmem_cache_alloc(xnbp->xnb_tx_buf_cache,
146856567907SDavid Edmondson 			    KM_NOSLEEP);
146956567907SDavid Edmondson 			if (txp == NULL)
147056567907SDavid Edmondson 				break;
147156567907SDavid Edmondson 
147256567907SDavid Edmondson 			txp->xt_mblk = desballoc((unsigned char *)txp->xt_buf,
147356567907SDavid Edmondson 			    txp->xt_buflen, 0, &txp->xt_free_rtn);
147456567907SDavid Edmondson 			if (txp->xt_mblk == NULL) {
147556567907SDavid Edmondson 				kmem_cache_free(xnbp->xnb_tx_buf_cache, txp);
147656567907SDavid Edmondson 				break;
147756567907SDavid Edmondson 			}
1478843e1988Sjohnlev 
147956567907SDavid Edmondson 			txp->xt_idx = loop;
148056567907SDavid Edmondson 			txp->xt_id = txreq->id;
1481843e1988Sjohnlev 
148256567907SDavid Edmondson 			cop->source.u.ref = txreq->gref;
148356567907SDavid Edmondson 			cop->source.domid = xnbp->xnb_peer;
148456567907SDavid Edmondson 			cop->source.offset = txreq->offset;
1485843e1988Sjohnlev 
148656567907SDavid Edmondson 			cop->dest.u.gmfn = txp->xt_mfn;
148756567907SDavid Edmondson 			cop->dest.domid = DOMID_SELF;
148856567907SDavid Edmondson 			cop->dest.offset = 0;
1489843e1988Sjohnlev 
149056567907SDavid Edmondson 			cop->len = txreq->size;
149156567907SDavid Edmondson 			cop->flags = GNTCOPY_source_gref;
149256567907SDavid Edmondson 			cop->status = 0;
1493843e1988Sjohnlev 
149456567907SDavid Edmondson 			*txpp = txp;
1495843e1988Sjohnlev 
149656567907SDavid Edmondson 			txpp++;
149756567907SDavid Edmondson 			cop++;
149856567907SDavid Edmondson 			n_data_req++;
1499843e1988Sjohnlev 
150056567907SDavid Edmondson 			ASSERT(n_data_req <= NET_TX_RING_SIZE);
150156567907SDavid Edmondson 		}
1502843e1988Sjohnlev 
150356567907SDavid Edmondson 		loop++;
150456567907SDavid Edmondson 	}
150556567907SDavid Edmondson 
150656567907SDavid Edmondson 	xnbp->xnb_tx_ring.req_cons = loop;
150756567907SDavid Edmondson 
150856567907SDavid Edmondson 	if (n_data_req == 0)
150956567907SDavid Edmondson 		goto around;
151056567907SDavid Edmondson 
151156567907SDavid Edmondson 	if (HYPERVISOR_grant_table_op(GNTTABOP_copy,
151256567907SDavid Edmondson 	    xnbp->xnb_tx_cop, n_data_req) != 0) {
151356567907SDavid Edmondson 
151456567907SDavid Edmondson 		cmn_err(CE_WARN, "xnb_from_peer: copy operation failed");
151556567907SDavid Edmondson 
151656567907SDavid Edmondson 		txpp = xnbp->xnb_tx_bufp;
151756567907SDavid Edmondson 		i = n_data_req;
151856567907SDavid Edmondson 		while (i > 0) {
151956567907SDavid Edmondson 			kmem_cache_free(xnbp->xnb_tx_buf_cache, *txpp);
1520024c26efSMax zhen 			txpp++;
152156567907SDavid Edmondson 			i--;
1522843e1988Sjohnlev 		}
1523843e1988Sjohnlev 
1524843e1988Sjohnlev 		goto finished;
1525843e1988Sjohnlev 	}
1526843e1988Sjohnlev 
152756567907SDavid Edmondson 	txpp = xnbp->xnb_tx_bufp;
152856567907SDavid Edmondson 	cop = xnbp->xnb_tx_cop;
152956567907SDavid Edmondson 	i = n_data_req;
1530843e1988Sjohnlev 
153156567907SDavid Edmondson 	while (i > 0) {
153256567907SDavid Edmondson 		xnb_txbuf_t *txp = *txpp;
1533843e1988Sjohnlev 
153456567907SDavid Edmondson 		txreq = RING_GET_REQUEST(&xnbp->xnb_tx_ring, txp->xt_idx);
1535843e1988Sjohnlev 
153656567907SDavid Edmondson 		if (cop->status != 0) {
153756567907SDavid Edmondson #ifdef XNB_DEBUG
153856567907SDavid Edmondson 			cmn_err(CE_WARN, "xnb_from_peer: "
153956567907SDavid Edmondson 			    "txpp 0x%p failed (%d)",
154056567907SDavid Edmondson 			    (void *)*txpp, cop->status);
154156567907SDavid Edmondson #endif /* XNB_DEBUG */
1542fd0939efSDavid Edmondson 			xnb_tx_mark_complete(xnbp, txp->xt_id, NETIF_RSP_ERROR);
154356567907SDavid Edmondson 			freemsg(txp->xt_mblk);
154456567907SDavid Edmondson 		} else {
154556567907SDavid Edmondson 			mblk_t *mp;
1546843e1988Sjohnlev 
154756567907SDavid Edmondson 			mp = txp->xt_mblk;
154856567907SDavid Edmondson 			mp->b_rptr = mp->b_wptr = (unsigned char *)txp->xt_buf;
154956567907SDavid Edmondson 			mp->b_wptr += txreq->size;
155056567907SDavid Edmondson 			mp->b_next = NULL;
1551843e1988Sjohnlev 
1552843e1988Sjohnlev 			/*
155356567907SDavid Edmondson 			 * If there are checksum flags, process them
155456567907SDavid Edmondson 			 * appropriately.
1555843e1988Sjohnlev 			 */
155656567907SDavid Edmondson 			if ((txreq->flags &
1557843e1988Sjohnlev 			    (NETTXF_csum_blank | NETTXF_data_validated))
155856567907SDavid Edmondson 			    != 0) {
1559551bc2a6Smrj 				mp = xnbp->xnb_flavour->xf_cksum_from_peer(xnbp,
1560843e1988Sjohnlev 				    mp, txreq->flags);
1561024c26efSMax zhen 				xnbp->xnb_stat_tx_cksum_no_need++;
1562843e1988Sjohnlev 
156356567907SDavid Edmondson 				txp->xt_mblk = mp;
156456567907SDavid Edmondson 			}
1565843e1988Sjohnlev 
1566843e1988Sjohnlev 			if (head == NULL) {
1567843e1988Sjohnlev 				ASSERT(tail == NULL);
1568843e1988Sjohnlev 				head = mp;
1569843e1988Sjohnlev 			} else {
1570843e1988Sjohnlev 				ASSERT(tail != NULL);
1571843e1988Sjohnlev 				tail->b_next = mp;
1572843e1988Sjohnlev 			}
1573843e1988Sjohnlev 			tail = mp;
157456567907SDavid Edmondson 
157556567907SDavid Edmondson 			xnbp->xnb_stat_opackets++;
157656567907SDavid Edmondson 			xnbp->xnb_stat_obytes += txreq->size;
157756567907SDavid Edmondson 
1578fd0939efSDavid Edmondson 			xnb_tx_mark_complete(xnbp, txp->xt_id, NETIF_RSP_OKAY);
1579843e1988Sjohnlev 		}
1580843e1988Sjohnlev 
158156567907SDavid Edmondson 		txpp++;
158256567907SDavid Edmondson 		cop++;
158356567907SDavid Edmondson 		i--;
158456567907SDavid Edmondson 	}
1585843e1988Sjohnlev 
1586843e1988Sjohnlev 	goto around;
1587843e1988Sjohnlev 	/* NOTREACHED */
1588843e1988Sjohnlev }
1589843e1988Sjohnlev 
1590843e1988Sjohnlev static uint_t
xnb_intr(caddr_t arg)1591843e1988Sjohnlev xnb_intr(caddr_t arg)
1592843e1988Sjohnlev {
1593843e1988Sjohnlev 	xnb_t *xnbp = (xnb_t *)arg;
1594843e1988Sjohnlev 	mblk_t *mp;
1595843e1988Sjohnlev 
1596551bc2a6Smrj 	xnbp->xnb_stat_intr++;
1597843e1988Sjohnlev 
1598024c26efSMax zhen 	mutex_enter(&xnbp->xnb_tx_lock);
1599843e1988Sjohnlev 
1600551bc2a6Smrj 	ASSERT(xnbp->xnb_connected);
1601843e1988Sjohnlev 
1602024c26efSMax zhen 	mp = xnb_from_peer(xnbp);
1603843e1988Sjohnlev 
1604024c26efSMax zhen 	mutex_exit(&xnbp->xnb_tx_lock);
1605843e1988Sjohnlev 
1606551bc2a6Smrj 	if (!xnbp->xnb_hotplugged) {
1607024c26efSMax zhen 		xnbp->xnb_stat_tx_too_early++;
1608843e1988Sjohnlev 		goto fail;
1609843e1988Sjohnlev 	}
1610843e1988Sjohnlev 	if (mp == NULL) {
1611551bc2a6Smrj 		xnbp->xnb_stat_spurious_intr++;
1612843e1988Sjohnlev 		goto fail;
1613843e1988Sjohnlev 	}
1614843e1988Sjohnlev 
1615024c26efSMax zhen 	xnbp->xnb_flavour->xf_from_peer(xnbp, mp);
1616843e1988Sjohnlev 
1617843e1988Sjohnlev 	return (DDI_INTR_CLAIMED);
1618843e1988Sjohnlev 
1619843e1988Sjohnlev fail:
1620843e1988Sjohnlev 	freemsgchain(mp);
1621843e1988Sjohnlev 	return (DDI_INTR_CLAIMED);
1622843e1988Sjohnlev }
1623843e1988Sjohnlev 
162456567907SDavid Edmondson /*
162556567907SDavid Edmondson  * Read our configuration from xenstore.
162656567907SDavid Edmondson  */
162756567907SDavid Edmondson boolean_t
xnb_read_xs_config(xnb_t * xnbp)162856567907SDavid Edmondson xnb_read_xs_config(xnb_t *xnbp)
162956567907SDavid Edmondson {
163056567907SDavid Edmondson 	char *xsname;
163156567907SDavid Edmondson 	char mac[ETHERADDRL * 3];
163256567907SDavid Edmondson 
163356567907SDavid Edmondson 	xsname = xvdi_get_xsname(xnbp->xnb_devinfo);
163456567907SDavid Edmondson 
163556567907SDavid Edmondson 	if (xenbus_scanf(XBT_NULL, xsname,
163656567907SDavid Edmondson 	    "mac", "%s", mac) != 0) {
163756567907SDavid Edmondson 		cmn_err(CE_WARN, "xnb_attach: "
163856567907SDavid Edmondson 		    "cannot read mac address from %s",
163956567907SDavid Edmondson 		    xsname);
164056567907SDavid Edmondson 		return (B_FALSE);
164156567907SDavid Edmondson 	}
164256567907SDavid Edmondson 
164356567907SDavid Edmondson 	if (ether_aton(mac, xnbp->xnb_mac_addr) != ETHERADDRL) {
164456567907SDavid Edmondson 		cmn_err(CE_WARN,
164556567907SDavid Edmondson 		    "xnb_attach: cannot parse mac address %s",
164656567907SDavid Edmondson 		    mac);
164756567907SDavid Edmondson 		return (B_FALSE);
164856567907SDavid Edmondson 	}
164956567907SDavid Edmondson 
165056567907SDavid Edmondson 	return (B_TRUE);
165156567907SDavid Edmondson }
165256567907SDavid Edmondson 
165356567907SDavid Edmondson /*
165456567907SDavid Edmondson  * Read the configuration of the peer from xenstore.
165556567907SDavid Edmondson  */
165656567907SDavid Edmondson boolean_t
xnb_read_oe_config(xnb_t * xnbp)165756567907SDavid Edmondson xnb_read_oe_config(xnb_t *xnbp)
1658843e1988Sjohnlev {
1659843e1988Sjohnlev 	char *oename;
1660843e1988Sjohnlev 	int i;
1661843e1988Sjohnlev 
166256567907SDavid Edmondson 	oename = xvdi_get_oename(xnbp->xnb_devinfo);
1663843e1988Sjohnlev 
1664843e1988Sjohnlev 	if (xenbus_gather(XBT_NULL, oename,
166556567907SDavid Edmondson 	    "event-channel", "%u", &xnbp->xnb_fe_evtchn,
1666551bc2a6Smrj 	    "tx-ring-ref", "%lu", &xnbp->xnb_tx_ring_ref,
1667551bc2a6Smrj 	    "rx-ring-ref", "%lu", &xnbp->xnb_rx_ring_ref,
1668843e1988Sjohnlev 	    NULL) != 0) {
166956567907SDavid Edmondson 		cmn_err(CE_WARN, "xnb_read_oe_config: "
1670843e1988Sjohnlev 		    "cannot read other-end details from %s",
1671843e1988Sjohnlev 		    oename);
167256567907SDavid Edmondson 		return (B_FALSE);
1673843e1988Sjohnlev 	}
1674843e1988Sjohnlev 
167556567907SDavid Edmondson 	/*
167656567907SDavid Edmondson 	 * Check whether our peer requests receive side hypervisor
167756567907SDavid Edmondson 	 * copy.
167856567907SDavid Edmondson 	 */
1679843e1988Sjohnlev 	if (xenbus_scanf(XBT_NULL, oename,
168056567907SDavid Edmondson 	    "request-rx-copy", "%d", &i) != 0)
1681843e1988Sjohnlev 		i = 0;
1682843e1988Sjohnlev 	if (i != 0)
168356567907SDavid Edmondson 		xnbp->xnb_rx_hv_copy = B_TRUE;
1684843e1988Sjohnlev 
168556567907SDavid Edmondson 	/*
168656567907SDavid Edmondson 	 * Check whether our peer requests multicast_control.
168756567907SDavid Edmondson 	 */
1688843e1988Sjohnlev 	if (xenbus_scanf(XBT_NULL, oename,
168956567907SDavid Edmondson 	    "request-multicast-control", "%d", &i) != 0)
1690551bc2a6Smrj 		i = 0;
1691551bc2a6Smrj 	if (i != 0)
169256567907SDavid Edmondson 		xnbp->xnb_multicast_control = B_TRUE;
169356567907SDavid Edmondson 
169456567907SDavid Edmondson 	/*
169556567907SDavid Edmondson 	 * The Linux backend driver here checks to see if the peer has
169656567907SDavid Edmondson 	 * set 'feature-no-csum-offload'. This is used to indicate
169756567907SDavid Edmondson 	 * that the guest cannot handle receiving packets without a
169856567907SDavid Edmondson 	 * valid checksum. We don't check here, because packets passed
169956567907SDavid Edmondson 	 * to the peer _always_ have a valid checksum.
170056567907SDavid Edmondson 	 *
170156567907SDavid Edmondson 	 * There are three cases:
170256567907SDavid Edmondson 	 *
170356567907SDavid Edmondson 	 * - the NIC is dedicated: packets from the wire should always
170456567907SDavid Edmondson 	 *   have a valid checksum. If the hardware validates the
170556567907SDavid Edmondson 	 *   checksum then the relevant bit will be set in the packet
170656567907SDavid Edmondson 	 *   attributes and we will inform the peer. It can choose to
170756567907SDavid Edmondson 	 *   ignore the hardware verification.
170856567907SDavid Edmondson 	 *
170956567907SDavid Edmondson 	 * - the NIC is shared (VNIC) and a packet originates from the
171056567907SDavid Edmondson 	 *   wire: this is the same as the case above - the packets
171156567907SDavid Edmondson 	 *   will have a valid checksum.
171256567907SDavid Edmondson 	 *
171356567907SDavid Edmondson 	 * - the NIC is shared (VNIC) and a packet originates from the
171456567907SDavid Edmondson 	 *   host: the MAC layer ensures that all such packets have a
171556567907SDavid Edmondson 	 *   valid checksum by calculating one if the stack did not.
171656567907SDavid Edmondson 	 */
171756567907SDavid Edmondson 
171856567907SDavid Edmondson 	return (B_TRUE);
171956567907SDavid Edmondson }
172056567907SDavid Edmondson 
172156567907SDavid Edmondson void
xnb_start_connect(xnb_t * xnbp)172256567907SDavid Edmondson xnb_start_connect(xnb_t *xnbp)
172356567907SDavid Edmondson {
172456567907SDavid Edmondson 	dev_info_t  *dip = xnbp->xnb_devinfo;
172556567907SDavid Edmondson 
172656567907SDavid Edmondson 	if (!xnb_connect_rings(dip)) {
172756567907SDavid Edmondson 		cmn_err(CE_WARN, "xnb_start_connect: "
172856567907SDavid Edmondson 		    "cannot connect rings");
172956567907SDavid Edmondson 		goto failed;
173056567907SDavid Edmondson 	}
173156567907SDavid Edmondson 
173256567907SDavid Edmondson 	if (!xnbp->xnb_flavour->xf_start_connect(xnbp)) {
173356567907SDavid Edmondson 		cmn_err(CE_WARN, "xnb_start_connect: "
173456567907SDavid Edmondson 		    "flavour failed to connect");
173556567907SDavid Edmondson 		goto failed;
173656567907SDavid Edmondson 	}
173756567907SDavid Edmondson 
173856567907SDavid Edmondson 	(void) xvdi_switch_state(dip, XBT_NULL, XenbusStateConnected);
173956567907SDavid Edmondson 	return;
174056567907SDavid Edmondson 
174156567907SDavid Edmondson failed:
174256567907SDavid Edmondson 	xnbp->xnb_flavour->xf_peer_disconnected(xnbp);
174356567907SDavid Edmondson 	xnb_disconnect_rings(dip);
174456567907SDavid Edmondson 	(void) xvdi_switch_state(dip, XBT_NULL,
174556567907SDavid Edmondson 	    XenbusStateClosed);
174656567907SDavid Edmondson 	(void) xvdi_post_event(dip, XEN_HP_REMOVE);
174756567907SDavid Edmondson }
174856567907SDavid Edmondson 
174956567907SDavid Edmondson static boolean_t
xnb_connect_rings(dev_info_t * dip)175056567907SDavid Edmondson xnb_connect_rings(dev_info_t *dip)
175156567907SDavid Edmondson {
175256567907SDavid Edmondson 	xnb_t *xnbp = ddi_get_driver_private(dip);
175356567907SDavid Edmondson 	struct gnttab_map_grant_ref map_op;
175456567907SDavid Edmondson 
175556567907SDavid Edmondson 	/*
175656567907SDavid Edmondson 	 * Cannot attempt to connect the rings if already connected.
175756567907SDavid Edmondson 	 */
175856567907SDavid Edmondson 	ASSERT(!xnbp->xnb_connected);
1759843e1988Sjohnlev 
1760843e1988Sjohnlev 	/*
1761843e1988Sjohnlev 	 * 1. allocate a vaddr for the tx page, one for the rx page.
1762843e1988Sjohnlev 	 * 2. call GNTTABOP_map_grant_ref to map the relevant pages
1763843e1988Sjohnlev 	 *    into the allocated vaddr (one for tx, one for rx).
1764843e1988Sjohnlev 	 * 3. call EVTCHNOP_bind_interdomain to have the event channel
1765843e1988Sjohnlev 	 *    bound to this domain.
1766843e1988Sjohnlev 	 * 4. associate the event channel with an interrupt.
176756567907SDavid Edmondson 	 * 5. enable the interrupt.
1768843e1988Sjohnlev 	 */
1769843e1988Sjohnlev 
1770843e1988Sjohnlev 	/* 1.tx */
1771551bc2a6Smrj 	xnbp->xnb_tx_ring_addr = vmem_xalloc(heap_arena, PAGESIZE, PAGESIZE,
1772843e1988Sjohnlev 	    0, 0, 0, 0, VM_SLEEP);
1773551bc2a6Smrj 	ASSERT(xnbp->xnb_tx_ring_addr != NULL);
1774843e1988Sjohnlev 
1775843e1988Sjohnlev 	/* 2.tx */
1776551bc2a6Smrj 	map_op.host_addr = (uint64_t)((long)xnbp->xnb_tx_ring_addr);
1777843e1988Sjohnlev 	map_op.flags = GNTMAP_host_map;
1778551bc2a6Smrj 	map_op.ref = xnbp->xnb_tx_ring_ref;
1779551bc2a6Smrj 	map_op.dom = xnbp->xnb_peer;
17807eea693dSMark Johnson 	hat_prepare_mapping(kas.a_hat, xnbp->xnb_tx_ring_addr, NULL);
17817eea693dSMark Johnson 	if (xen_map_gref(GNTTABOP_map_grant_ref, &map_op, 1, B_FALSE) != 0 ||
17827eea693dSMark Johnson 	    map_op.status != 0) {
1783843e1988Sjohnlev 		cmn_err(CE_WARN, "xnb_connect_rings: cannot map tx-ring page.");
1784843e1988Sjohnlev 		goto fail;
1785843e1988Sjohnlev 	}
1786551bc2a6Smrj 	xnbp->xnb_tx_ring_handle = map_op.handle;
1787843e1988Sjohnlev 
1788551bc2a6Smrj 	/* LINTED: constant in conditional context */
1789551bc2a6Smrj 	BACK_RING_INIT(&xnbp->xnb_tx_ring,
1790551bc2a6Smrj 	    (netif_tx_sring_t *)xnbp->xnb_tx_ring_addr, PAGESIZE);
1791843e1988Sjohnlev 
1792843e1988Sjohnlev 	/* 1.rx */
1793551bc2a6Smrj 	xnbp->xnb_rx_ring_addr = vmem_xalloc(heap_arena, PAGESIZE, PAGESIZE,
1794843e1988Sjohnlev 	    0, 0, 0, 0, VM_SLEEP);
1795551bc2a6Smrj 	ASSERT(xnbp->xnb_rx_ring_addr != NULL);
1796843e1988Sjohnlev 
1797843e1988Sjohnlev 	/* 2.rx */
1798551bc2a6Smrj 	map_op.host_addr = (uint64_t)((long)xnbp->xnb_rx_ring_addr);
1799843e1988Sjohnlev 	map_op.flags = GNTMAP_host_map;
1800551bc2a6Smrj 	map_op.ref = xnbp->xnb_rx_ring_ref;
1801551bc2a6Smrj 	map_op.dom = xnbp->xnb_peer;
18027eea693dSMark Johnson 	hat_prepare_mapping(kas.a_hat, xnbp->xnb_rx_ring_addr, NULL);
18037eea693dSMark Johnson 	if (xen_map_gref(GNTTABOP_map_grant_ref, &map_op, 1, B_FALSE) != 0 ||
18047eea693dSMark Johnson 	    map_op.status != 0) {
1805843e1988Sjohnlev 		cmn_err(CE_WARN, "xnb_connect_rings: cannot map rx-ring page.");
1806843e1988Sjohnlev 		goto fail;
1807843e1988Sjohnlev 	}
1808551bc2a6Smrj 	xnbp->xnb_rx_ring_handle = map_op.handle;
1809843e1988Sjohnlev 
1810551bc2a6Smrj 	/* LINTED: constant in conditional context */
1811551bc2a6Smrj 	BACK_RING_INIT(&xnbp->xnb_rx_ring,
1812551bc2a6Smrj 	    (netif_rx_sring_t *)xnbp->xnb_rx_ring_addr, PAGESIZE);
1813843e1988Sjohnlev 
1814843e1988Sjohnlev 	/* 3 */
181556567907SDavid Edmondson 	if (xvdi_bind_evtchn(dip, xnbp->xnb_fe_evtchn) != DDI_SUCCESS) {
1816843e1988Sjohnlev 		cmn_err(CE_WARN, "xnb_connect_rings: "
1817551bc2a6Smrj 		    "cannot bind event channel %d", xnbp->xnb_evtchn);
1818551bc2a6Smrj 		xnbp->xnb_evtchn = INVALID_EVTCHN;
1819843e1988Sjohnlev 		goto fail;
1820843e1988Sjohnlev 	}
1821551bc2a6Smrj 	xnbp->xnb_evtchn = xvdi_get_evtchn(dip);
1822843e1988Sjohnlev 
1823843e1988Sjohnlev 	/*
1824843e1988Sjohnlev 	 * It would be good to set the state to XenbusStateConnected
1825843e1988Sjohnlev 	 * here as well, but then what if ddi_add_intr() failed?
1826843e1988Sjohnlev 	 * Changing the state in the store will be noticed by the peer
1827843e1988Sjohnlev 	 * and cannot be "taken back".
1828843e1988Sjohnlev 	 */
1829551bc2a6Smrj 	mutex_enter(&xnbp->xnb_tx_lock);
1830551bc2a6Smrj 	mutex_enter(&xnbp->xnb_rx_lock);
1831843e1988Sjohnlev 
1832551bc2a6Smrj 	xnbp->xnb_connected = B_TRUE;
1833843e1988Sjohnlev 
1834551bc2a6Smrj 	mutex_exit(&xnbp->xnb_rx_lock);
1835551bc2a6Smrj 	mutex_exit(&xnbp->xnb_tx_lock);
1836843e1988Sjohnlev 
183756567907SDavid Edmondson 	/* 4, 5 */
1838843e1988Sjohnlev 	if (ddi_add_intr(dip, 0, NULL, NULL, xnb_intr, (caddr_t)xnbp)
1839843e1988Sjohnlev 	    != DDI_SUCCESS) {
1840843e1988Sjohnlev 		cmn_err(CE_WARN, "xnb_connect_rings: cannot add interrupt");
1841843e1988Sjohnlev 		goto fail;
1842843e1988Sjohnlev 	}
1843551bc2a6Smrj 	xnbp->xnb_irq = B_TRUE;
1844843e1988Sjohnlev 
1845843e1988Sjohnlev 	return (B_TRUE);
1846843e1988Sjohnlev 
1847843e1988Sjohnlev fail:
1848551bc2a6Smrj 	mutex_enter(&xnbp->xnb_tx_lock);
1849551bc2a6Smrj 	mutex_enter(&xnbp->xnb_rx_lock);
1850843e1988Sjohnlev 
1851551bc2a6Smrj 	xnbp->xnb_connected = B_FALSE;
185256567907SDavid Edmondson 
1853551bc2a6Smrj 	mutex_exit(&xnbp->xnb_rx_lock);
1854551bc2a6Smrj 	mutex_exit(&xnbp->xnb_tx_lock);
1855843e1988Sjohnlev 
1856843e1988Sjohnlev 	return (B_FALSE);
1857843e1988Sjohnlev }
1858843e1988Sjohnlev 
1859843e1988Sjohnlev static void
xnb_disconnect_rings(dev_info_t * dip)1860843e1988Sjohnlev xnb_disconnect_rings(dev_info_t *dip)
1861843e1988Sjohnlev {
1862843e1988Sjohnlev 	xnb_t *xnbp = ddi_get_driver_private(dip);
1863843e1988Sjohnlev 
1864551bc2a6Smrj 	if (xnbp->xnb_irq) {
1865843e1988Sjohnlev 		ddi_remove_intr(dip, 0, NULL);
1866551bc2a6Smrj 		xnbp->xnb_irq = B_FALSE;
1867843e1988Sjohnlev 	}
1868843e1988Sjohnlev 
1869551bc2a6Smrj 	if (xnbp->xnb_evtchn != INVALID_EVTCHN) {
1870843e1988Sjohnlev 		xvdi_free_evtchn(dip);
1871551bc2a6Smrj 		xnbp->xnb_evtchn = INVALID_EVTCHN;
1872843e1988Sjohnlev 	}
1873843e1988Sjohnlev 
1874551bc2a6Smrj 	if (xnbp->xnb_rx_ring_handle != INVALID_GRANT_HANDLE) {
1875843e1988Sjohnlev 		struct gnttab_unmap_grant_ref unmap_op;
1876843e1988Sjohnlev 
1877551bc2a6Smrj 		unmap_op.host_addr = (uint64_t)(uintptr_t)
1878551bc2a6Smrj 		    xnbp->xnb_rx_ring_addr;
1879843e1988Sjohnlev 		unmap_op.dev_bus_addr = 0;
1880551bc2a6Smrj 		unmap_op.handle = xnbp->xnb_rx_ring_handle;
1881843e1988Sjohnlev 		if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref,
1882843e1988Sjohnlev 		    &unmap_op, 1) != 0)
1883843e1988Sjohnlev 			cmn_err(CE_WARN, "xnb_disconnect_rings: "
1884843e1988Sjohnlev 			    "cannot unmap rx-ring page (%d)",
1885843e1988Sjohnlev 			    unmap_op.status);
1886843e1988Sjohnlev 
1887551bc2a6Smrj 		xnbp->xnb_rx_ring_handle = INVALID_GRANT_HANDLE;
1888843e1988Sjohnlev 	}
1889843e1988Sjohnlev 
1890551bc2a6Smrj 	if (xnbp->xnb_rx_ring_addr != NULL) {
1891551bc2a6Smrj 		hat_release_mapping(kas.a_hat, xnbp->xnb_rx_ring_addr);
1892551bc2a6Smrj 		vmem_free(heap_arena, xnbp->xnb_rx_ring_addr, PAGESIZE);
1893551bc2a6Smrj 		xnbp->xnb_rx_ring_addr = NULL;
1894843e1988Sjohnlev 	}
1895843e1988Sjohnlev 
1896551bc2a6Smrj 	if (xnbp->xnb_tx_ring_handle != INVALID_GRANT_HANDLE) {
1897843e1988Sjohnlev 		struct gnttab_unmap_grant_ref unmap_op;
1898843e1988Sjohnlev 
1899551bc2a6Smrj 		unmap_op.host_addr = (uint64_t)(uintptr_t)
1900551bc2a6Smrj 		    xnbp->xnb_tx_ring_addr;
1901843e1988Sjohnlev 		unmap_op.dev_bus_addr = 0;
1902551bc2a6Smrj 		unmap_op.handle = xnbp->xnb_tx_ring_handle;
1903843e1988Sjohnlev 		if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref,
1904843e1988Sjohnlev 		    &unmap_op, 1) != 0)
1905843e1988Sjohnlev 			cmn_err(CE_WARN, "xnb_disconnect_rings: "
1906843e1988Sjohnlev 			    "cannot unmap tx-ring page (%d)",
1907843e1988Sjohnlev 			    unmap_op.status);
1908843e1988Sjohnlev 
1909551bc2a6Smrj 		xnbp->xnb_tx_ring_handle = INVALID_GRANT_HANDLE;
1910843e1988Sjohnlev 	}
1911843e1988Sjohnlev 
1912551bc2a6Smrj 	if (xnbp->xnb_tx_ring_addr != NULL) {
1913551bc2a6Smrj 		hat_release_mapping(kas.a_hat, xnbp->xnb_tx_ring_addr);
1914551bc2a6Smrj 		vmem_free(heap_arena, xnbp->xnb_tx_ring_addr, PAGESIZE);
1915551bc2a6Smrj 		xnbp->xnb_tx_ring_addr = NULL;
1916843e1988Sjohnlev 	}
1917843e1988Sjohnlev }
1918843e1988Sjohnlev 
1919843e1988Sjohnlev static void
xnb_oe_state_change(dev_info_t * dip,ddi_eventcookie_t id,void * arg,void * impl_data)1920843e1988Sjohnlev xnb_oe_state_change(dev_info_t *dip, ddi_eventcookie_t id,
1921843e1988Sjohnlev     void *arg, void *impl_data)
1922843e1988Sjohnlev {
192356567907SDavid Edmondson 	_NOTE(ARGUNUSED(id, arg));
1924843e1988Sjohnlev 	xnb_t *xnbp = ddi_get_driver_private(dip);
1925843e1988Sjohnlev 	XenbusState new_state = *(XenbusState *)impl_data;
1926843e1988Sjohnlev 
1927843e1988Sjohnlev 	ASSERT(xnbp != NULL);
1928843e1988Sjohnlev 
1929843e1988Sjohnlev 	switch (new_state) {
1930843e1988Sjohnlev 	case XenbusStateConnected:
193108cfff84Scz 		/* spurious state change */
193208cfff84Scz 		if (xnbp->xnb_connected)
193308cfff84Scz 			return;
193408cfff84Scz 
193556567907SDavid Edmondson 		if (!xnb_read_oe_config(xnbp) ||
193656567907SDavid Edmondson 		    !xnbp->xnb_flavour->xf_peer_connected(xnbp)) {
193756567907SDavid Edmondson 			cmn_err(CE_WARN, "xnb_oe_state_change: "
193856567907SDavid Edmondson 			    "read otherend config error");
1939843e1988Sjohnlev 			(void) xvdi_switch_state(dip, XBT_NULL,
1940843e1988Sjohnlev 			    XenbusStateClosed);
1941843e1988Sjohnlev 			(void) xvdi_post_event(dip, XEN_HP_REMOVE);
194256567907SDavid Edmondson 
194356567907SDavid Edmondson 			break;
1944843e1988Sjohnlev 		}
1945843e1988Sjohnlev 
194656567907SDavid Edmondson 
194756567907SDavid Edmondson 		mutex_enter(&xnbp->xnb_state_lock);
194856567907SDavid Edmondson 		xnbp->xnb_fe_status = XNB_STATE_READY;
194956567907SDavid Edmondson 		if (xnbp->xnb_be_status == XNB_STATE_READY)
195056567907SDavid Edmondson 			xnb_start_connect(xnbp);
195156567907SDavid Edmondson 		mutex_exit(&xnbp->xnb_state_lock);
195256567907SDavid Edmondson 
1953843e1988Sjohnlev 		/*
1954843e1988Sjohnlev 		 * Now that we've attempted to connect it's reasonable
1955843e1988Sjohnlev 		 * to allow an attempt to detach.
1956843e1988Sjohnlev 		 */
1957551bc2a6Smrj 		xnbp->xnb_detachable = B_TRUE;
1958843e1988Sjohnlev 
1959843e1988Sjohnlev 		break;
1960843e1988Sjohnlev 
1961843e1988Sjohnlev 	case XenbusStateClosing:
1962843e1988Sjohnlev 		(void) xvdi_switch_state(dip, XBT_NULL, XenbusStateClosing);
1963843e1988Sjohnlev 
1964843e1988Sjohnlev 		break;
1965843e1988Sjohnlev 
1966843e1988Sjohnlev 	case XenbusStateClosed:
1967551bc2a6Smrj 		xnbp->xnb_flavour->xf_peer_disconnected(xnbp);
1968843e1988Sjohnlev 
1969551bc2a6Smrj 		mutex_enter(&xnbp->xnb_tx_lock);
1970551bc2a6Smrj 		mutex_enter(&xnbp->xnb_rx_lock);
1971843e1988Sjohnlev 
1972843e1988Sjohnlev 		xnb_disconnect_rings(dip);
1973551bc2a6Smrj 		xnbp->xnb_connected = B_FALSE;
1974843e1988Sjohnlev 
1975551bc2a6Smrj 		mutex_exit(&xnbp->xnb_rx_lock);
1976551bc2a6Smrj 		mutex_exit(&xnbp->xnb_tx_lock);
1977843e1988Sjohnlev 
1978843e1988Sjohnlev 		(void) xvdi_switch_state(dip, XBT_NULL, XenbusStateClosed);
1979843e1988Sjohnlev 		(void) xvdi_post_event(dip, XEN_HP_REMOVE);
1980843e1988Sjohnlev 		/*
1981843e1988Sjohnlev 		 * In all likelyhood this is already set (in the above
1982843e1988Sjohnlev 		 * case), but if the peer never attempted to connect
1983843e1988Sjohnlev 		 * and the domain is destroyed we get here without
1984843e1988Sjohnlev 		 * having been through the case above, so we set it to
1985843e1988Sjohnlev 		 * be sure.
1986843e1988Sjohnlev 		 */
1987551bc2a6Smrj 		xnbp->xnb_detachable = B_TRUE;
1988843e1988Sjohnlev 
1989843e1988Sjohnlev 		break;
1990843e1988Sjohnlev 
1991843e1988Sjohnlev 	default:
1992843e1988Sjohnlev 		break;
1993843e1988Sjohnlev 	}
1994843e1988Sjohnlev }
1995843e1988Sjohnlev 
1996843e1988Sjohnlev static void
xnb_hp_state_change(dev_info_t * dip,ddi_eventcookie_t id,void * arg,void * impl_data)1997843e1988Sjohnlev xnb_hp_state_change(dev_info_t *dip, ddi_eventcookie_t id,
1998843e1988Sjohnlev     void *arg, void *impl_data)
1999843e1988Sjohnlev {
200056567907SDavid Edmondson 	_NOTE(ARGUNUSED(id, arg));
2001843e1988Sjohnlev 	xnb_t *xnbp = ddi_get_driver_private(dip);
2002843e1988Sjohnlev 	xendev_hotplug_state_t state = *(xendev_hotplug_state_t *)impl_data;
2003843e1988Sjohnlev 
2004843e1988Sjohnlev 	ASSERT(xnbp != NULL);
2005843e1988Sjohnlev 
2006843e1988Sjohnlev 	switch (state) {
2007843e1988Sjohnlev 	case Connected:
200808cfff84Scz 		/* spurious hotplug event */
200908cfff84Scz 		if (xnbp->xnb_hotplugged)
201056567907SDavid Edmondson 			break;
201108cfff84Scz 
201256567907SDavid Edmondson 		if (!xnb_read_xs_config(xnbp))
201356567907SDavid Edmondson 			break;
201456567907SDavid Edmondson 
201556567907SDavid Edmondson 		if (!xnbp->xnb_flavour->xf_hotplug_connected(xnbp))
201656567907SDavid Edmondson 			break;
2017843e1988Sjohnlev 
2018551bc2a6Smrj 		mutex_enter(&xnbp->xnb_tx_lock);
2019551bc2a6Smrj 		mutex_enter(&xnbp->xnb_rx_lock);
2020843e1988Sjohnlev 
202156567907SDavid Edmondson 		xnbp->xnb_hotplugged = B_TRUE;
2022843e1988Sjohnlev 
2023551bc2a6Smrj 		mutex_exit(&xnbp->xnb_rx_lock);
2024551bc2a6Smrj 		mutex_exit(&xnbp->xnb_tx_lock);
202556567907SDavid Edmondson 
202656567907SDavid Edmondson 		mutex_enter(&xnbp->xnb_state_lock);
202756567907SDavid Edmondson 		xnbp->xnb_be_status = XNB_STATE_READY;
202856567907SDavid Edmondson 		if (xnbp->xnb_fe_status == XNB_STATE_READY)
202956567907SDavid Edmondson 			xnb_start_connect(xnbp);
203056567907SDavid Edmondson 		mutex_exit(&xnbp->xnb_state_lock);
203156567907SDavid Edmondson 
2032843e1988Sjohnlev 		break;
2033843e1988Sjohnlev 
2034843e1988Sjohnlev 	default:
2035843e1988Sjohnlev 		break;
2036843e1988Sjohnlev 	}
2037843e1988Sjohnlev }
2038843e1988Sjohnlev 
2039843e1988Sjohnlev static struct modldrv modldrv = {
2040a859da42SDavid Edmondson 	&mod_miscops, "xnb",
2041843e1988Sjohnlev };
2042843e1988Sjohnlev 
2043843e1988Sjohnlev static struct modlinkage modlinkage = {
2044843e1988Sjohnlev 	MODREV_1, &modldrv, NULL
2045843e1988Sjohnlev };
2046843e1988Sjohnlev 
2047843e1988Sjohnlev int
_init(void)2048843e1988Sjohnlev _init(void)
2049843e1988Sjohnlev {
2050843e1988Sjohnlev 	int i;
2051843e1988Sjohnlev 
2052843e1988Sjohnlev 	mutex_init(&xnb_alloc_page_lock, NULL, MUTEX_DRIVER, NULL);
2053843e1988Sjohnlev 
2054843e1988Sjohnlev 	i = mod_install(&modlinkage);
205556567907SDavid Edmondson 	if (i != DDI_SUCCESS)
2056843e1988Sjohnlev 		mutex_destroy(&xnb_alloc_page_lock);
205756567907SDavid Edmondson 
2058843e1988Sjohnlev 	return (i);
2059843e1988Sjohnlev }
2060843e1988Sjohnlev 
2061843e1988Sjohnlev int
_info(struct modinfo * modinfop)2062843e1988Sjohnlev _info(struct modinfo *modinfop)
2063843e1988Sjohnlev {
2064843e1988Sjohnlev 	return (mod_info(&modlinkage, modinfop));
2065843e1988Sjohnlev }
2066843e1988Sjohnlev 
2067843e1988Sjohnlev int
_fini(void)2068843e1988Sjohnlev _fini(void)
2069843e1988Sjohnlev {
2070843e1988Sjohnlev 	int i;
2071843e1988Sjohnlev 
2072843e1988Sjohnlev 	i = mod_remove(&modlinkage);
207356567907SDavid Edmondson 	if (i == DDI_SUCCESS)
2074843e1988Sjohnlev 		mutex_destroy(&xnb_alloc_page_lock);
207556567907SDavid Edmondson 
2076843e1988Sjohnlev 	return (i);
2077843e1988Sjohnlev }
2078