1843e1988Sjohnlev /*
2843e1988Sjohnlev * CDDL HEADER START
3843e1988Sjohnlev *
4843e1988Sjohnlev * The contents of this file are subject to the terms of the
5843e1988Sjohnlev * Common Development and Distribution License (the "License").
6843e1988Sjohnlev * You may not use this file except in compliance with the License.
7843e1988Sjohnlev *
8843e1988Sjohnlev * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9843e1988Sjohnlev * or http://www.opensolaris.org/os/licensing.
10843e1988Sjohnlev * See the License for the specific language governing permissions
11843e1988Sjohnlev * and limitations under the License.
12843e1988Sjohnlev *
13843e1988Sjohnlev * When distributing Covered Code, include this CDDL HEADER in each
14843e1988Sjohnlev * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15843e1988Sjohnlev * If applicable, add the following below this CDDL HEADER, with the
16843e1988Sjohnlev * fields enclosed by brackets "[]" replaced with your own identifying
17843e1988Sjohnlev * information: Portions Copyright [yyyy] [name of copyright owner]
18843e1988Sjohnlev *
19843e1988Sjohnlev * CDDL HEADER END
20843e1988Sjohnlev */
21843e1988Sjohnlev
22843e1988Sjohnlev /*
23fd0939efSDavid Edmondson * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
24843e1988Sjohnlev * Use is subject to license terms.
25*c61a1653SRyan Zezeski * Copyright 2018 Joyent, Inc.
26843e1988Sjohnlev */
27843e1988Sjohnlev
28843e1988Sjohnlev #ifdef DEBUG
29843e1988Sjohnlev #define XNB_DEBUG 1
30843e1988Sjohnlev #endif /* DEBUG */
31843e1988Sjohnlev
32843e1988Sjohnlev #include "xnb.h"
33843e1988Sjohnlev
34843e1988Sjohnlev #include <sys/sunddi.h>
35843e1988Sjohnlev #include <sys/sunndi.h>
36843e1988Sjohnlev #include <sys/modctl.h>
37843e1988Sjohnlev #include <sys/conf.h>
38843e1988Sjohnlev #include <sys/mac.h>
3956567907SDavid Edmondson #include <sys/mac_impl.h> /* For mac_fix_cksum(). */
40843e1988Sjohnlev #include <sys/dlpi.h>
41843e1988Sjohnlev #include <sys/strsubr.h>
42843e1988Sjohnlev #include <sys/strsun.h>
43551bc2a6Smrj #include <sys/types.h>
44843e1988Sjohnlev #include <sys/pattr.h>
45843e1988Sjohnlev #include <vm/seg_kmem.h>
46843e1988Sjohnlev #include <vm/hat_i86.h>
47843e1988Sjohnlev #include <xen/sys/xenbus_impl.h>
48843e1988Sjohnlev #include <xen/sys/xendev.h>
49843e1988Sjohnlev #include <sys/balloon_impl.h>
50843e1988Sjohnlev #include <sys/evtchn_impl.h>
51843e1988Sjohnlev #include <sys/gnttab.h>
52d2b85481Srscott #include <vm/vm_dep.h>
5356567907SDavid Edmondson #include <sys/note.h>
54843e1988Sjohnlev #include <sys/gld.h>
55843e1988Sjohnlev #include <inet/ip.h>
56843e1988Sjohnlev #include <inet/ip_impl.h>
57843e1988Sjohnlev
58843e1988Sjohnlev /*
59024c26efSMax zhen * The terms "transmit" and "receive" are used in alignment with domU,
60024c26efSMax zhen * which means that packets originating from the peer domU are "transmitted"
61024c26efSMax zhen * to other parts of the system and packets are "received" from them.
62843e1988Sjohnlev */
63843e1988Sjohnlev
64843e1988Sjohnlev /*
6556567907SDavid Edmondson * Should we allow guests to manipulate multicast group membership?
6664c5e63cSDavid Edmondson */
6756567907SDavid Edmondson static boolean_t xnb_multicast_control = B_TRUE;
68843e1988Sjohnlev
69843e1988Sjohnlev static boolean_t xnb_connect_rings(dev_info_t *);
70843e1988Sjohnlev static void xnb_disconnect_rings(dev_info_t *);
71843e1988Sjohnlev static void xnb_oe_state_change(dev_info_t *, ddi_eventcookie_t,
72843e1988Sjohnlev void *, void *);
73843e1988Sjohnlev static void xnb_hp_state_change(dev_info_t *, ddi_eventcookie_t,
74843e1988Sjohnlev void *, void *);
75843e1988Sjohnlev
76024c26efSMax zhen static int xnb_txbuf_constructor(void *, void *, int);
77024c26efSMax zhen static void xnb_txbuf_destructor(void *, void *);
7856567907SDavid Edmondson static void xnb_tx_notify_peer(xnb_t *, boolean_t);
79024c26efSMax zhen static void xnb_tx_mark_complete(xnb_t *, RING_IDX, int16_t);
80551bc2a6Smrj
8156567907SDavid Edmondson mblk_t *xnb_to_peer(xnb_t *, mblk_t *);
8256567907SDavid Edmondson mblk_t *xnb_copy_to_peer(xnb_t *, mblk_t *);
83551bc2a6Smrj
8456567907SDavid Edmondson static void setup_gop(xnb_t *, gnttab_copy_t *, uchar_t *,
8556567907SDavid Edmondson size_t, size_t, size_t, grant_ref_t);
8656567907SDavid Edmondson #pragma inline(setup_gop)
8756567907SDavid Edmondson static boolean_t is_foreign(void *);
8856567907SDavid Edmondson #pragma inline(is_foreign)
89843e1988Sjohnlev
90843e1988Sjohnlev #define INVALID_GRANT_HANDLE ((grant_handle_t)-1)
91843e1988Sjohnlev #define INVALID_GRANT_REF ((grant_ref_t)-1)
92843e1988Sjohnlev
93843e1988Sjohnlev static kmutex_t xnb_alloc_page_lock;
94843e1988Sjohnlev
9556567907SDavid Edmondson /*
9656567907SDavid Edmondson * On a 32 bit PAE system physical and machine addresses are larger
9756567907SDavid Edmondson * than 32 bits. ddi_btop() on such systems take an unsigned long
9856567907SDavid Edmondson * argument, and so addresses above 4G are truncated before ddi_btop()
9956567907SDavid Edmondson * gets to see them. To avoid this, code the shift operation here.
10056567907SDavid Edmondson */
10156567907SDavid Edmondson #define xnb_btop(addr) ((addr) >> PAGESHIFT)
10256567907SDavid Edmondson
10356567907SDavid Edmondson /* DMA attributes for transmit and receive data */
10456567907SDavid Edmondson static ddi_dma_attr_t buf_dma_attr = {
10556567907SDavid Edmondson DMA_ATTR_V0, /* version of this structure */
10656567907SDavid Edmondson 0, /* lowest usable address */
10756567907SDavid Edmondson 0xffffffffffffffffULL, /* highest usable address */
10856567907SDavid Edmondson 0x7fffffff, /* maximum DMAable byte count */
10956567907SDavid Edmondson MMU_PAGESIZE, /* alignment in bytes */
11056567907SDavid Edmondson 0x7ff, /* bitmap of burst sizes */
11156567907SDavid Edmondson 1, /* minimum transfer */
11256567907SDavid Edmondson 0xffffffffU, /* maximum transfer */
11356567907SDavid Edmondson 0xffffffffffffffffULL, /* maximum segment length */
11456567907SDavid Edmondson 1, /* maximum number of segments */
11556567907SDavid Edmondson 1, /* granularity */
11656567907SDavid Edmondson 0, /* flags (reserved) */
11756567907SDavid Edmondson };
11856567907SDavid Edmondson
11956567907SDavid Edmondson /* DMA access attributes for data: NOT to be byte swapped. */
12056567907SDavid Edmondson static ddi_device_acc_attr_t data_accattr = {
12156567907SDavid Edmondson DDI_DEVICE_ATTR_V0,
12256567907SDavid Edmondson DDI_NEVERSWAP_ACC,
12356567907SDavid Edmondson DDI_STRICTORDER_ACC
12456567907SDavid Edmondson };
12556567907SDavid Edmondson
126843e1988Sjohnlev /*
127843e1988Sjohnlev * Statistics.
128843e1988Sjohnlev */
129fd0939efSDavid Edmondson static const char * const aux_statistics[] = {
130024c26efSMax zhen "rx_cksum_deferred",
131024c26efSMax zhen "tx_cksum_no_need",
132024c26efSMax zhen "rx_rsp_notok",
133843e1988Sjohnlev "tx_notify_deferred",
134843e1988Sjohnlev "tx_notify_sent",
135843e1988Sjohnlev "rx_notify_deferred",
136843e1988Sjohnlev "rx_notify_sent",
137843e1988Sjohnlev "tx_too_early",
138843e1988Sjohnlev "rx_too_early",
139843e1988Sjohnlev "rx_allocb_failed",
140551bc2a6Smrj "tx_allocb_failed",
141024c26efSMax zhen "rx_foreign_page",
142843e1988Sjohnlev "mac_full",
143843e1988Sjohnlev "spurious_intr",
144843e1988Sjohnlev "allocation_success",
145843e1988Sjohnlev "allocation_failure",
146843e1988Sjohnlev "small_allocation_success",
147843e1988Sjohnlev "small_allocation_failure",
148551bc2a6Smrj "other_allocation_failure",
149024c26efSMax zhen "rx_pageboundary_crossed",
150024c26efSMax zhen "rx_cpoparea_grown",
151843e1988Sjohnlev "csum_hardware",
152843e1988Sjohnlev "csum_software",
153fd0939efSDavid Edmondson "tx_overflow_page",
154fd0939efSDavid Edmondson "tx_unexpected_flags",
155843e1988Sjohnlev };
156843e1988Sjohnlev
157843e1988Sjohnlev static int
xnb_ks_aux_update(kstat_t * ksp,int flag)158843e1988Sjohnlev xnb_ks_aux_update(kstat_t *ksp, int flag)
159843e1988Sjohnlev {
160843e1988Sjohnlev xnb_t *xnbp;
161843e1988Sjohnlev kstat_named_t *knp;
162843e1988Sjohnlev
163843e1988Sjohnlev if (flag != KSTAT_READ)
164843e1988Sjohnlev return (EACCES);
165843e1988Sjohnlev
166843e1988Sjohnlev xnbp = ksp->ks_private;
167843e1988Sjohnlev knp = ksp->ks_data;
168843e1988Sjohnlev
169843e1988Sjohnlev /*
170843e1988Sjohnlev * Assignment order should match that of the names in
171843e1988Sjohnlev * aux_statistics.
172843e1988Sjohnlev */
173024c26efSMax zhen (knp++)->value.ui64 = xnbp->xnb_stat_rx_cksum_deferred;
174024c26efSMax zhen (knp++)->value.ui64 = xnbp->xnb_stat_tx_cksum_no_need;
175024c26efSMax zhen (knp++)->value.ui64 = xnbp->xnb_stat_rx_rsp_notok;
176551bc2a6Smrj (knp++)->value.ui64 = xnbp->xnb_stat_tx_notify_deferred;
177551bc2a6Smrj (knp++)->value.ui64 = xnbp->xnb_stat_tx_notify_sent;
178551bc2a6Smrj (knp++)->value.ui64 = xnbp->xnb_stat_rx_notify_deferred;
179551bc2a6Smrj (knp++)->value.ui64 = xnbp->xnb_stat_rx_notify_sent;
180551bc2a6Smrj (knp++)->value.ui64 = xnbp->xnb_stat_tx_too_early;
181551bc2a6Smrj (knp++)->value.ui64 = xnbp->xnb_stat_rx_too_early;
182551bc2a6Smrj (knp++)->value.ui64 = xnbp->xnb_stat_rx_allocb_failed;
183551bc2a6Smrj (knp++)->value.ui64 = xnbp->xnb_stat_tx_allocb_failed;
184024c26efSMax zhen (knp++)->value.ui64 = xnbp->xnb_stat_rx_foreign_page;
185551bc2a6Smrj (knp++)->value.ui64 = xnbp->xnb_stat_mac_full;
186551bc2a6Smrj (knp++)->value.ui64 = xnbp->xnb_stat_spurious_intr;
187551bc2a6Smrj (knp++)->value.ui64 = xnbp->xnb_stat_allocation_success;
188551bc2a6Smrj (knp++)->value.ui64 = xnbp->xnb_stat_allocation_failure;
189551bc2a6Smrj (knp++)->value.ui64 = xnbp->xnb_stat_small_allocation_success;
190551bc2a6Smrj (knp++)->value.ui64 = xnbp->xnb_stat_small_allocation_failure;
191551bc2a6Smrj (knp++)->value.ui64 = xnbp->xnb_stat_other_allocation_failure;
192024c26efSMax zhen (knp++)->value.ui64 = xnbp->xnb_stat_rx_pagebndry_crossed;
193024c26efSMax zhen (knp++)->value.ui64 = xnbp->xnb_stat_rx_cpoparea_grown;
194551bc2a6Smrj (knp++)->value.ui64 = xnbp->xnb_stat_csum_hardware;
195551bc2a6Smrj (knp++)->value.ui64 = xnbp->xnb_stat_csum_software;
196fd0939efSDavid Edmondson (knp++)->value.ui64 = xnbp->xnb_stat_tx_overflow_page;
197fd0939efSDavid Edmondson (knp++)->value.ui64 = xnbp->xnb_stat_tx_unexpected_flags;
198843e1988Sjohnlev
199843e1988Sjohnlev return (0);
200843e1988Sjohnlev }
201843e1988Sjohnlev
202843e1988Sjohnlev static boolean_t
xnb_ks_init(xnb_t * xnbp)203843e1988Sjohnlev xnb_ks_init(xnb_t *xnbp)
204843e1988Sjohnlev {
205843e1988Sjohnlev int nstat = sizeof (aux_statistics) /
206843e1988Sjohnlev sizeof (aux_statistics[0]);
207fd0939efSDavid Edmondson const char * const *cp = aux_statistics;
208843e1988Sjohnlev kstat_named_t *knp;
209843e1988Sjohnlev
210843e1988Sjohnlev /*
211843e1988Sjohnlev * Create and initialise kstats.
212843e1988Sjohnlev */
213551bc2a6Smrj xnbp->xnb_kstat_aux = kstat_create(ddi_driver_name(xnbp->xnb_devinfo),
214551bc2a6Smrj ddi_get_instance(xnbp->xnb_devinfo), "aux_statistics", "net",
215843e1988Sjohnlev KSTAT_TYPE_NAMED, nstat, 0);
216551bc2a6Smrj if (xnbp->xnb_kstat_aux == NULL)
217843e1988Sjohnlev return (B_FALSE);
218843e1988Sjohnlev
219551bc2a6Smrj xnbp->xnb_kstat_aux->ks_private = xnbp;
220551bc2a6Smrj xnbp->xnb_kstat_aux->ks_update = xnb_ks_aux_update;
221843e1988Sjohnlev
222551bc2a6Smrj knp = xnbp->xnb_kstat_aux->ks_data;
223843e1988Sjohnlev while (nstat > 0) {
224843e1988Sjohnlev kstat_named_init(knp, *cp, KSTAT_DATA_UINT64);
225843e1988Sjohnlev
226843e1988Sjohnlev knp++;
227843e1988Sjohnlev cp++;
228843e1988Sjohnlev nstat--;
229843e1988Sjohnlev }
230843e1988Sjohnlev
231551bc2a6Smrj kstat_install(xnbp->xnb_kstat_aux);
232843e1988Sjohnlev
233843e1988Sjohnlev return (B_TRUE);
234843e1988Sjohnlev }
235843e1988Sjohnlev
236843e1988Sjohnlev static void
xnb_ks_free(xnb_t * xnbp)237843e1988Sjohnlev xnb_ks_free(xnb_t *xnbp)
238843e1988Sjohnlev {
239551bc2a6Smrj kstat_delete(xnbp->xnb_kstat_aux);
240843e1988Sjohnlev }
241843e1988Sjohnlev
242843e1988Sjohnlev /*
24356567907SDavid Edmondson * Calculate and insert the transport checksum for an arbitrary packet.
244843e1988Sjohnlev */
245843e1988Sjohnlev static mblk_t *
xnb_software_csum(xnb_t * xnbp,mblk_t * mp)246843e1988Sjohnlev xnb_software_csum(xnb_t *xnbp, mblk_t *mp)
247843e1988Sjohnlev {
24856567907SDavid Edmondson _NOTE(ARGUNUSED(xnbp));
24956567907SDavid Edmondson
250843e1988Sjohnlev /*
25156567907SDavid Edmondson * XXPV dme: shouldn't rely on mac_fix_cksum(), not least
252843e1988Sjohnlev * because it doesn't cover all of the interesting cases :-(
253843e1988Sjohnlev */
2540dc2366fSVenugopal Iyer mac_hcksum_set(mp, 0, 0, 0, 0, HCK_FULLCKSUM);
255*c61a1653SRyan Zezeski mac_hw_emul(&mp, NULL, NULL, MAC_HWCKSUM_EMUL);
256*c61a1653SRyan Zezeski return (mp);
257843e1988Sjohnlev }
258843e1988Sjohnlev
259843e1988Sjohnlev mblk_t *
xnb_process_cksum_flags(xnb_t * xnbp,mblk_t * mp,uint32_t capab)260843e1988Sjohnlev xnb_process_cksum_flags(xnb_t *xnbp, mblk_t *mp, uint32_t capab)
261843e1988Sjohnlev {
262843e1988Sjohnlev struct ether_header *ehp;
263843e1988Sjohnlev uint16_t sap;
264843e1988Sjohnlev uint32_t offset;
265843e1988Sjohnlev ipha_t *ipha;
266843e1988Sjohnlev
267843e1988Sjohnlev ASSERT(mp->b_next == NULL);
268843e1988Sjohnlev
269843e1988Sjohnlev /*
270843e1988Sjohnlev * Check that the packet is contained in a single mblk. In
27156567907SDavid Edmondson * the "from peer" path this is true today, but may change
272843e1988Sjohnlev * when scatter gather support is added. In the "to peer"
273843e1988Sjohnlev * path we cannot be sure, but in most cases it will be true
274843e1988Sjohnlev * (in the xnbo case the packet has come from a MAC device
275843e1988Sjohnlev * which is unlikely to split packets).
276843e1988Sjohnlev */
277843e1988Sjohnlev if (mp->b_cont != NULL)
278843e1988Sjohnlev goto software;
279843e1988Sjohnlev
280843e1988Sjohnlev /*
281843e1988Sjohnlev * If the MAC has no hardware capability don't do any further
282843e1988Sjohnlev * checking.
283843e1988Sjohnlev */
284843e1988Sjohnlev if (capab == 0)
285843e1988Sjohnlev goto software;
286843e1988Sjohnlev
287843e1988Sjohnlev ASSERT(MBLKL(mp) >= sizeof (struct ether_header));
288843e1988Sjohnlev ehp = (struct ether_header *)mp->b_rptr;
289843e1988Sjohnlev
290843e1988Sjohnlev if (ntohs(ehp->ether_type) == VLAN_TPID) {
291843e1988Sjohnlev struct ether_vlan_header *evhp;
292843e1988Sjohnlev
293843e1988Sjohnlev ASSERT(MBLKL(mp) >= sizeof (struct ether_vlan_header));
294843e1988Sjohnlev evhp = (struct ether_vlan_header *)mp->b_rptr;
295843e1988Sjohnlev sap = ntohs(evhp->ether_type);
296843e1988Sjohnlev offset = sizeof (struct ether_vlan_header);
297843e1988Sjohnlev } else {
298843e1988Sjohnlev sap = ntohs(ehp->ether_type);
299843e1988Sjohnlev offset = sizeof (struct ether_header);
300843e1988Sjohnlev }
301843e1988Sjohnlev
302843e1988Sjohnlev /*
303843e1988Sjohnlev * We only attempt to do IPv4 packets in hardware.
304843e1988Sjohnlev */
305843e1988Sjohnlev if (sap != ETHERTYPE_IP)
306843e1988Sjohnlev goto software;
307843e1988Sjohnlev
308843e1988Sjohnlev /*
309843e1988Sjohnlev * We know that this is an IPv4 packet.
310843e1988Sjohnlev */
311843e1988Sjohnlev ipha = (ipha_t *)(mp->b_rptr + offset);
312843e1988Sjohnlev
313843e1988Sjohnlev switch (ipha->ipha_protocol) {
314843e1988Sjohnlev case IPPROTO_TCP:
315a859da42SDavid Edmondson case IPPROTO_UDP: {
316a859da42SDavid Edmondson uint32_t start, length, stuff, cksum;
317a859da42SDavid Edmondson uint16_t *stuffp;
318a859da42SDavid Edmondson
319843e1988Sjohnlev /*
320a859da42SDavid Edmondson * This is a TCP/IPv4 or UDP/IPv4 packet, for which we
321a859da42SDavid Edmondson * can use full IPv4 and partial checksum offload.
322843e1988Sjohnlev */
323a859da42SDavid Edmondson if ((capab & (HCKSUM_INET_FULL_V4|HCKSUM_INET_PARTIAL)) == 0)
324a859da42SDavid Edmondson break;
325a859da42SDavid Edmondson
326a859da42SDavid Edmondson start = IP_SIMPLE_HDR_LENGTH;
327a859da42SDavid Edmondson length = ntohs(ipha->ipha_length);
328a859da42SDavid Edmondson if (ipha->ipha_protocol == IPPROTO_TCP) {
329a859da42SDavid Edmondson stuff = start + TCP_CHECKSUM_OFFSET;
330a859da42SDavid Edmondson cksum = IP_TCP_CSUM_COMP;
331a859da42SDavid Edmondson } else {
332a859da42SDavid Edmondson stuff = start + UDP_CHECKSUM_OFFSET;
333a859da42SDavid Edmondson cksum = IP_UDP_CSUM_COMP;
334a859da42SDavid Edmondson }
335a859da42SDavid Edmondson stuffp = (uint16_t *)(mp->b_rptr + offset + stuff);
336a859da42SDavid Edmondson
337a859da42SDavid Edmondson if (capab & HCKSUM_INET_FULL_V4) {
338a859da42SDavid Edmondson /*
339a859da42SDavid Edmondson * Some devices require that the checksum
340a859da42SDavid Edmondson * field of the packet is zero for full
341a859da42SDavid Edmondson * offload.
342a859da42SDavid Edmondson */
343a859da42SDavid Edmondson *stuffp = 0;
344a859da42SDavid Edmondson
3450dc2366fSVenugopal Iyer mac_hcksum_set(mp, 0, 0, 0, 0, HCK_FULLCKSUM);
346843e1988Sjohnlev
347551bc2a6Smrj xnbp->xnb_stat_csum_hardware++;
348843e1988Sjohnlev
349843e1988Sjohnlev return (mp);
350843e1988Sjohnlev }
351843e1988Sjohnlev
352a859da42SDavid Edmondson if (capab & HCKSUM_INET_PARTIAL) {
353a859da42SDavid Edmondson if (*stuffp == 0) {
354a859da42SDavid Edmondson ipaddr_t src, dst;
355a859da42SDavid Edmondson
356a859da42SDavid Edmondson /*
357a859da42SDavid Edmondson * Older Solaris guests don't insert
358a859da42SDavid Edmondson * the pseudo-header checksum, so we
359a859da42SDavid Edmondson * calculate it here.
360a859da42SDavid Edmondson */
361a859da42SDavid Edmondson src = ipha->ipha_src;
362a859da42SDavid Edmondson dst = ipha->ipha_dst;
363a859da42SDavid Edmondson
364a859da42SDavid Edmondson cksum += (dst >> 16) + (dst & 0xFFFF);
365a859da42SDavid Edmondson cksum += (src >> 16) + (src & 0xFFFF);
366a859da42SDavid Edmondson cksum += length - IP_SIMPLE_HDR_LENGTH;
367a859da42SDavid Edmondson
368a859da42SDavid Edmondson cksum = (cksum >> 16) + (cksum & 0xFFFF);
369a859da42SDavid Edmondson cksum = (cksum >> 16) + (cksum & 0xFFFF);
370a859da42SDavid Edmondson
371a859da42SDavid Edmondson ASSERT(cksum <= 0xFFFF);
372a859da42SDavid Edmondson
373a859da42SDavid Edmondson *stuffp = (uint16_t)(cksum ? cksum : ~cksum);
374a859da42SDavid Edmondson }
375a859da42SDavid Edmondson
3760dc2366fSVenugopal Iyer mac_hcksum_set(mp, start, stuff, length, 0,
3770dc2366fSVenugopal Iyer HCK_PARTIALCKSUM);
378a859da42SDavid Edmondson
379a859da42SDavid Edmondson xnbp->xnb_stat_csum_hardware++;
380843e1988Sjohnlev
381a859da42SDavid Edmondson return (mp);
382a859da42SDavid Edmondson }
383a859da42SDavid Edmondson
384a859da42SDavid Edmondson /* NOTREACHED */
385843e1988Sjohnlev break;
386a859da42SDavid Edmondson }
387843e1988Sjohnlev
388843e1988Sjohnlev default:
389843e1988Sjohnlev /* Use software. */
390843e1988Sjohnlev break;
391843e1988Sjohnlev }
392843e1988Sjohnlev
393843e1988Sjohnlev software:
394843e1988Sjohnlev /*
395843e1988Sjohnlev * We are not able to use any offload so do the whole thing in
396843e1988Sjohnlev * software.
397843e1988Sjohnlev */
398551bc2a6Smrj xnbp->xnb_stat_csum_software++;
399843e1988Sjohnlev
400843e1988Sjohnlev return (xnb_software_csum(xnbp, mp));
401843e1988Sjohnlev }
402843e1988Sjohnlev
403843e1988Sjohnlev int
xnb_attach(dev_info_t * dip,xnb_flavour_t * flavour,void * flavour_data)404843e1988Sjohnlev xnb_attach(dev_info_t *dip, xnb_flavour_t *flavour, void *flavour_data)
405843e1988Sjohnlev {
406843e1988Sjohnlev xnb_t *xnbp;
40756567907SDavid Edmondson char *xsname;
40856567907SDavid Edmondson char cachename[32];
409843e1988Sjohnlev
410843e1988Sjohnlev xnbp = kmem_zalloc(sizeof (*xnbp), KM_SLEEP);
411843e1988Sjohnlev
412551bc2a6Smrj xnbp->xnb_flavour = flavour;
413551bc2a6Smrj xnbp->xnb_flavour_data = flavour_data;
414551bc2a6Smrj xnbp->xnb_devinfo = dip;
415551bc2a6Smrj xnbp->xnb_evtchn = INVALID_EVTCHN;
416551bc2a6Smrj xnbp->xnb_irq = B_FALSE;
417551bc2a6Smrj xnbp->xnb_tx_ring_handle = INVALID_GRANT_HANDLE;
418551bc2a6Smrj xnbp->xnb_rx_ring_handle = INVALID_GRANT_HANDLE;
419551bc2a6Smrj xnbp->xnb_connected = B_FALSE;
420551bc2a6Smrj xnbp->xnb_hotplugged = B_FALSE;
421551bc2a6Smrj xnbp->xnb_detachable = B_FALSE;
422551bc2a6Smrj xnbp->xnb_peer = xvdi_get_oeid(dip);
42356567907SDavid Edmondson xnbp->xnb_be_status = XNB_STATE_INIT;
42456567907SDavid Edmondson xnbp->xnb_fe_status = XNB_STATE_INIT;
425551bc2a6Smrj
426024c26efSMax zhen xnbp->xnb_tx_buf_count = 0;
427551bc2a6Smrj
42856567907SDavid Edmondson xnbp->xnb_rx_hv_copy = B_FALSE;
42956567907SDavid Edmondson xnbp->xnb_multicast_control = B_FALSE;
430551bc2a6Smrj
431024c26efSMax zhen xnbp->xnb_rx_va = vmem_alloc(heap_arena, PAGESIZE, VM_SLEEP);
432024c26efSMax zhen ASSERT(xnbp->xnb_rx_va != NULL);
433551bc2a6Smrj
434551bc2a6Smrj if (ddi_get_iblock_cookie(dip, 0, &xnbp->xnb_icookie)
435843e1988Sjohnlev != DDI_SUCCESS)
436843e1988Sjohnlev goto failure;
437843e1988Sjohnlev
43856567907SDavid Edmondson /* Allocated on demand, when/if we enter xnb_copy_to_peer(). */
439024c26efSMax zhen xnbp->xnb_rx_cpop = NULL;
44056567907SDavid Edmondson xnbp->xnb_rx_cpop_count = 0;
441551bc2a6Smrj
442551bc2a6Smrj mutex_init(&xnbp->xnb_tx_lock, NULL, MUTEX_DRIVER,
443551bc2a6Smrj xnbp->xnb_icookie);
444551bc2a6Smrj mutex_init(&xnbp->xnb_rx_lock, NULL, MUTEX_DRIVER,
445551bc2a6Smrj xnbp->xnb_icookie);
44656567907SDavid Edmondson mutex_init(&xnbp->xnb_state_lock, NULL, MUTEX_DRIVER,
44756567907SDavid Edmondson xnbp->xnb_icookie);
448843e1988Sjohnlev
44956567907SDavid Edmondson /* Set driver private pointer now. */
450843e1988Sjohnlev ddi_set_driver_private(dip, xnbp);
451843e1988Sjohnlev
45256567907SDavid Edmondson (void) sprintf(cachename, "xnb_tx_buf_cache_%d", ddi_get_instance(dip));
45356567907SDavid Edmondson xnbp->xnb_tx_buf_cache = kmem_cache_create(cachename,
45456567907SDavid Edmondson sizeof (xnb_txbuf_t), 0,
45556567907SDavid Edmondson xnb_txbuf_constructor, xnb_txbuf_destructor,
45656567907SDavid Edmondson NULL, xnbp, NULL, 0);
45756567907SDavid Edmondson if (xnbp->xnb_tx_buf_cache == NULL)
45856567907SDavid Edmondson goto failure_0;
45956567907SDavid Edmondson
460843e1988Sjohnlev if (!xnb_ks_init(xnbp))
461551bc2a6Smrj goto failure_1;
462843e1988Sjohnlev
463843e1988Sjohnlev /*
464843e1988Sjohnlev * Receive notification of changes in the state of the
465843e1988Sjohnlev * driver in the guest domain.
466843e1988Sjohnlev */
4677eea693dSMark Johnson if (xvdi_add_event_handler(dip, XS_OE_STATE, xnb_oe_state_change,
4687eea693dSMark Johnson NULL) != DDI_SUCCESS)
469551bc2a6Smrj goto failure_2;
470843e1988Sjohnlev
471843e1988Sjohnlev /*
472843e1988Sjohnlev * Receive notification of hotplug events.
473843e1988Sjohnlev */
4747eea693dSMark Johnson if (xvdi_add_event_handler(dip, XS_HP_STATE, xnb_hp_state_change,
4757eea693dSMark Johnson NULL) != DDI_SUCCESS)
476551bc2a6Smrj goto failure_2;
477843e1988Sjohnlev
478843e1988Sjohnlev xsname = xvdi_get_xsname(dip);
479843e1988Sjohnlev
480843e1988Sjohnlev if (xenbus_printf(XBT_NULL, xsname,
48156567907SDavid Edmondson "feature-multicast-control", "%d",
48256567907SDavid Edmondson xnb_multicast_control ? 1 : 0) != 0)
483551bc2a6Smrj goto failure_3;
484551bc2a6Smrj
485551bc2a6Smrj if (xenbus_printf(XBT_NULL, xsname,
48656567907SDavid Edmondson "feature-rx-copy", "%d", 1) != 0)
487551bc2a6Smrj goto failure_3;
488551bc2a6Smrj /*
489551bc2a6Smrj * Linux domUs seem to depend on "feature-rx-flip" being 0
490551bc2a6Smrj * in addition to "feature-rx-copy" being 1. It seems strange
491551bc2a6Smrj * to use four possible states to describe a binary decision,
492551bc2a6Smrj * but we might as well play nice.
493551bc2a6Smrj */
494551bc2a6Smrj if (xenbus_printf(XBT_NULL, xsname,
49556567907SDavid Edmondson "feature-rx-flip", "%d", 0) != 0)
496551bc2a6Smrj goto failure_3;
497843e1988Sjohnlev
498843e1988Sjohnlev (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateInitWait);
499843e1988Sjohnlev (void) xvdi_post_event(dip, XEN_HP_ADD);
500843e1988Sjohnlev
501843e1988Sjohnlev return (DDI_SUCCESS);
502843e1988Sjohnlev
503551bc2a6Smrj failure_3:
504843e1988Sjohnlev xvdi_remove_event_handler(dip, NULL);
505843e1988Sjohnlev
506551bc2a6Smrj failure_2:
507843e1988Sjohnlev xnb_ks_free(xnbp);
508843e1988Sjohnlev
509551bc2a6Smrj failure_1:
51056567907SDavid Edmondson kmem_cache_destroy(xnbp->xnb_tx_buf_cache);
51156567907SDavid Edmondson
51256567907SDavid Edmondson failure_0:
51356567907SDavid Edmondson mutex_destroy(&xnbp->xnb_state_lock);
514551bc2a6Smrj mutex_destroy(&xnbp->xnb_rx_lock);
515551bc2a6Smrj mutex_destroy(&xnbp->xnb_tx_lock);
516843e1988Sjohnlev
517843e1988Sjohnlev failure:
518024c26efSMax zhen vmem_free(heap_arena, xnbp->xnb_rx_va, PAGESIZE);
519843e1988Sjohnlev kmem_free(xnbp, sizeof (*xnbp));
520843e1988Sjohnlev return (DDI_FAILURE);
521843e1988Sjohnlev }
522843e1988Sjohnlev
523843e1988Sjohnlev void
xnb_detach(dev_info_t * dip)524843e1988Sjohnlev xnb_detach(dev_info_t *dip)
525843e1988Sjohnlev {
526843e1988Sjohnlev xnb_t *xnbp = ddi_get_driver_private(dip);
527843e1988Sjohnlev
528843e1988Sjohnlev ASSERT(xnbp != NULL);
529551bc2a6Smrj ASSERT(!xnbp->xnb_connected);
530024c26efSMax zhen ASSERT(xnbp->xnb_tx_buf_count == 0);
531843e1988Sjohnlev
532843e1988Sjohnlev xnb_disconnect_rings(dip);
533843e1988Sjohnlev
534843e1988Sjohnlev xvdi_remove_event_handler(dip, NULL);
535843e1988Sjohnlev
536843e1988Sjohnlev xnb_ks_free(xnbp);
537843e1988Sjohnlev
53856567907SDavid Edmondson kmem_cache_destroy(xnbp->xnb_tx_buf_cache);
53956567907SDavid Edmondson
540843e1988Sjohnlev ddi_set_driver_private(dip, NULL);
541843e1988Sjohnlev
54256567907SDavid Edmondson mutex_destroy(&xnbp->xnb_state_lock);
543551bc2a6Smrj mutex_destroy(&xnbp->xnb_rx_lock);
54456567907SDavid Edmondson mutex_destroy(&xnbp->xnb_tx_lock);
545551bc2a6Smrj
54656567907SDavid Edmondson if (xnbp->xnb_rx_cpop_count > 0)
54756567907SDavid Edmondson kmem_free(xnbp->xnb_rx_cpop, sizeof (xnbp->xnb_rx_cpop[0])
54856567907SDavid Edmondson * xnbp->xnb_rx_cpop_count);
549843e1988Sjohnlev
550024c26efSMax zhen ASSERT(xnbp->xnb_rx_va != NULL);
551024c26efSMax zhen vmem_free(heap_arena, xnbp->xnb_rx_va, PAGESIZE);
552843e1988Sjohnlev
553843e1988Sjohnlev kmem_free(xnbp, sizeof (*xnbp));
554843e1988Sjohnlev }
555843e1988Sjohnlev
55656567907SDavid Edmondson /*
55756567907SDavid Edmondson * Allocate a page from the hypervisor to be flipped to the peer.
55856567907SDavid Edmondson *
55956567907SDavid Edmondson * Try to get pages in batches to reduce the overhead of calls into
56056567907SDavid Edmondson * the balloon driver.
56156567907SDavid Edmondson */
562843e1988Sjohnlev static mfn_t
xnb_alloc_page(xnb_t * xnbp)563843e1988Sjohnlev xnb_alloc_page(xnb_t *xnbp)
564843e1988Sjohnlev {
565843e1988Sjohnlev #define WARNING_RATE_LIMIT 100
566843e1988Sjohnlev #define BATCH_SIZE 256
567843e1988Sjohnlev static mfn_t mfns[BATCH_SIZE]; /* common across all instances */
568843e1988Sjohnlev static int nth = BATCH_SIZE;
569843e1988Sjohnlev mfn_t mfn;
570843e1988Sjohnlev
571843e1988Sjohnlev mutex_enter(&xnb_alloc_page_lock);
572843e1988Sjohnlev if (nth == BATCH_SIZE) {
573843e1988Sjohnlev if (balloon_alloc_pages(BATCH_SIZE, mfns) != BATCH_SIZE) {
574551bc2a6Smrj xnbp->xnb_stat_allocation_failure++;
575843e1988Sjohnlev mutex_exit(&xnb_alloc_page_lock);
576843e1988Sjohnlev
577843e1988Sjohnlev /*
578843e1988Sjohnlev * Try for a single page in low memory situations.
579843e1988Sjohnlev */
580843e1988Sjohnlev if (balloon_alloc_pages(1, &mfn) != 1) {
581551bc2a6Smrj if ((xnbp->xnb_stat_small_allocation_failure++
582551bc2a6Smrj % WARNING_RATE_LIMIT) == 0)
583843e1988Sjohnlev cmn_err(CE_WARN, "xnb_alloc_page: "
584843e1988Sjohnlev "Cannot allocate memory to "
585843e1988Sjohnlev "transfer packets to peer.");
586843e1988Sjohnlev return (0);
587843e1988Sjohnlev } else {
588551bc2a6Smrj xnbp->xnb_stat_small_allocation_success++;
589843e1988Sjohnlev return (mfn);
590843e1988Sjohnlev }
591843e1988Sjohnlev }
592843e1988Sjohnlev
593843e1988Sjohnlev nth = 0;
594551bc2a6Smrj xnbp->xnb_stat_allocation_success++;
595843e1988Sjohnlev }
596843e1988Sjohnlev
597843e1988Sjohnlev mfn = mfns[nth++];
598843e1988Sjohnlev mutex_exit(&xnb_alloc_page_lock);
599843e1988Sjohnlev
600843e1988Sjohnlev ASSERT(mfn != 0);
601843e1988Sjohnlev
602843e1988Sjohnlev return (mfn);
603843e1988Sjohnlev #undef BATCH_SIZE
604843e1988Sjohnlev #undef WARNING_RATE_LIMIT
605843e1988Sjohnlev }
606843e1988Sjohnlev
60756567907SDavid Edmondson /*
60856567907SDavid Edmondson * Free a page back to the hypervisor.
60956567907SDavid Edmondson *
61056567907SDavid Edmondson * This happens only in the error path, so batching is not worth the
61156567907SDavid Edmondson * complication.
61256567907SDavid Edmondson */
613843e1988Sjohnlev static void
xnb_free_page(xnb_t * xnbp,mfn_t mfn)614843e1988Sjohnlev xnb_free_page(xnb_t *xnbp, mfn_t mfn)
615843e1988Sjohnlev {
61656567907SDavid Edmondson _NOTE(ARGUNUSED(xnbp));
617843e1988Sjohnlev int r;
618d2b85481Srscott pfn_t pfn;
619d2b85481Srscott
620d2b85481Srscott pfn = xen_assign_pfn(mfn);
621d2b85481Srscott pfnzero(pfn, 0, PAGESIZE);
622d2b85481Srscott xen_release_pfn(pfn);
623843e1988Sjohnlev
624843e1988Sjohnlev if ((r = balloon_free_pages(1, &mfn, NULL, NULL)) != 1) {
625843e1988Sjohnlev cmn_err(CE_WARN, "free_page: cannot decrease memory "
626843e1988Sjohnlev "reservation (%d): page kept but unusable (mfn = 0x%lx).",
627843e1988Sjohnlev r, mfn);
628843e1988Sjohnlev }
629843e1988Sjohnlev }
630843e1988Sjohnlev
631551bc2a6Smrj /*
63256567907SDavid Edmondson * Similar to RING_HAS_UNCONSUMED_REQUESTS(&xnbp->rx_ring) but using
63356567907SDavid Edmondson * local variables. Used in both xnb_to_peer() and xnb_copy_to_peer().
634551bc2a6Smrj */
635551bc2a6Smrj #define XNB_RING_HAS_UNCONSUMED_REQUESTS(_r) \
636551bc2a6Smrj ((((_r)->sring->req_prod - loop) < \
637551bc2a6Smrj (RING_SIZE(_r) - (loop - prod))) ? \
638551bc2a6Smrj ((_r)->sring->req_prod - loop) : \
639551bc2a6Smrj (RING_SIZE(_r) - (loop - prod)))
640551bc2a6Smrj
64156567907SDavid Edmondson /*
64256567907SDavid Edmondson * Pass packets to the peer using page flipping.
64356567907SDavid Edmondson */
644843e1988Sjohnlev mblk_t *
xnb_to_peer(xnb_t * xnbp,mblk_t * mp)645843e1988Sjohnlev xnb_to_peer(xnb_t *xnbp, mblk_t *mp)
646843e1988Sjohnlev {
647843e1988Sjohnlev mblk_t *free = mp, *prev = NULL;
648843e1988Sjohnlev size_t len;
649843e1988Sjohnlev gnttab_transfer_t *gop;
650843e1988Sjohnlev boolean_t notify;
651843e1988Sjohnlev RING_IDX loop, prod, end;
652843e1988Sjohnlev
653843e1988Sjohnlev /*
654843e1988Sjohnlev * For each packet the sequence of operations is:
655843e1988Sjohnlev *
656843e1988Sjohnlev * 1. get a new page from the hypervisor.
657843e1988Sjohnlev * 2. get a request slot from the ring.
658843e1988Sjohnlev * 3. copy the data into the new page.
659843e1988Sjohnlev * 4. transfer the page to the peer.
660843e1988Sjohnlev * 5. update the request slot.
661843e1988Sjohnlev * 6. kick the peer.
662843e1988Sjohnlev * 7. free mp.
663843e1988Sjohnlev *
664843e1988Sjohnlev * In order to reduce the number of hypercalls, we prepare
665843e1988Sjohnlev * several packets for the peer and perform a single hypercall
666843e1988Sjohnlev * to transfer them.
667843e1988Sjohnlev */
668843e1988Sjohnlev
6692a9992ecSToomas Soome len = 0;
670024c26efSMax zhen mutex_enter(&xnbp->xnb_rx_lock);
671843e1988Sjohnlev
672843e1988Sjohnlev /*
673843e1988Sjohnlev * If we are not connected to the peer or have not yet
674843e1988Sjohnlev * finished hotplug it is too early to pass packets to the
675843e1988Sjohnlev * peer.
676843e1988Sjohnlev */
677551bc2a6Smrj if (!(xnbp->xnb_connected && xnbp->xnb_hotplugged)) {
678024c26efSMax zhen mutex_exit(&xnbp->xnb_rx_lock);
679024c26efSMax zhen DTRACE_PROBE(flip_rx_too_early);
680024c26efSMax zhen xnbp->xnb_stat_rx_too_early++;
681843e1988Sjohnlev return (mp);
682843e1988Sjohnlev }
683843e1988Sjohnlev
684551bc2a6Smrj loop = xnbp->xnb_rx_ring.req_cons;
685551bc2a6Smrj prod = xnbp->xnb_rx_ring.rsp_prod_pvt;
686024c26efSMax zhen gop = xnbp->xnb_rx_top;
687843e1988Sjohnlev
688843e1988Sjohnlev while ((mp != NULL) &&
689551bc2a6Smrj XNB_RING_HAS_UNCONSUMED_REQUESTS(&xnbp->xnb_rx_ring)) {
690843e1988Sjohnlev
691843e1988Sjohnlev mfn_t mfn;
692843e1988Sjohnlev pfn_t pfn;
693843e1988Sjohnlev netif_rx_request_t *rxreq;
694843e1988Sjohnlev netif_rx_response_t *rxresp;
695843e1988Sjohnlev char *valoop;
696843e1988Sjohnlev mblk_t *ml;
697843e1988Sjohnlev uint16_t cksum_flags;
698843e1988Sjohnlev
699843e1988Sjohnlev /* 1 */
700843e1988Sjohnlev if ((mfn = xnb_alloc_page(xnbp)) == 0) {
701024c26efSMax zhen xnbp->xnb_stat_rx_defer++;
702843e1988Sjohnlev break;
703843e1988Sjohnlev }
704843e1988Sjohnlev
705843e1988Sjohnlev /* 2 */
706551bc2a6Smrj rxreq = RING_GET_REQUEST(&xnbp->xnb_rx_ring, loop);
707843e1988Sjohnlev
708843e1988Sjohnlev #ifdef XNB_DEBUG
709843e1988Sjohnlev if (!(rxreq->id < NET_RX_RING_SIZE))
710843e1988Sjohnlev cmn_err(CE_PANIC, "xnb_to_peer: "
711843e1988Sjohnlev "id %d out of range in request 0x%p",
712843e1988Sjohnlev rxreq->id, (void *)rxreq);
713843e1988Sjohnlev #endif /* XNB_DEBUG */
714843e1988Sjohnlev
715843e1988Sjohnlev /* Assign a pfn and map the new page at the allocated va. */
716843e1988Sjohnlev pfn = xen_assign_pfn(mfn);
717024c26efSMax zhen hat_devload(kas.a_hat, xnbp->xnb_rx_va, PAGESIZE,
718843e1988Sjohnlev pfn, PROT_READ | PROT_WRITE, HAT_LOAD);
719843e1988Sjohnlev
720843e1988Sjohnlev /* 3 */
721843e1988Sjohnlev len = 0;
7226ac4daadSDavid Edmondson valoop = xnbp->xnb_rx_va;
723843e1988Sjohnlev for (ml = mp; ml != NULL; ml = ml->b_cont) {
724843e1988Sjohnlev size_t chunk = ml->b_wptr - ml->b_rptr;
725843e1988Sjohnlev
726843e1988Sjohnlev bcopy(ml->b_rptr, valoop, chunk);
727843e1988Sjohnlev valoop += chunk;
728843e1988Sjohnlev len += chunk;
729843e1988Sjohnlev }
730843e1988Sjohnlev
7316ac4daadSDavid Edmondson ASSERT(len < PAGESIZE);
732843e1988Sjohnlev
733843e1988Sjohnlev /* Release the pfn. */
734024c26efSMax zhen hat_unload(kas.a_hat, xnbp->xnb_rx_va, PAGESIZE,
735843e1988Sjohnlev HAT_UNLOAD_UNMAP);
736843e1988Sjohnlev xen_release_pfn(pfn);
737843e1988Sjohnlev
738843e1988Sjohnlev /* 4 */
739843e1988Sjohnlev gop->mfn = mfn;
740551bc2a6Smrj gop->domid = xnbp->xnb_peer;
741843e1988Sjohnlev gop->ref = rxreq->gref;
742843e1988Sjohnlev
743843e1988Sjohnlev /* 5.1 */
744551bc2a6Smrj rxresp = RING_GET_RESPONSE(&xnbp->xnb_rx_ring, prod);
7456ac4daadSDavid Edmondson rxresp->offset = 0;
746843e1988Sjohnlev rxresp->flags = 0;
747843e1988Sjohnlev
748551bc2a6Smrj cksum_flags = xnbp->xnb_flavour->xf_cksum_to_peer(xnbp, mp);
749843e1988Sjohnlev if (cksum_flags != 0)
750024c26efSMax zhen xnbp->xnb_stat_rx_cksum_deferred++;
751843e1988Sjohnlev rxresp->flags |= cksum_flags;
752843e1988Sjohnlev
753551bc2a6Smrj rxresp->id = RING_GET_REQUEST(&xnbp->xnb_rx_ring, prod)->id;
754843e1988Sjohnlev rxresp->status = len;
755843e1988Sjohnlev
756843e1988Sjohnlev loop++;
757843e1988Sjohnlev prod++;
758843e1988Sjohnlev gop++;
759843e1988Sjohnlev prev = mp;
760843e1988Sjohnlev mp = mp->b_next;
761843e1988Sjohnlev }
762843e1988Sjohnlev
763843e1988Sjohnlev /*
764843e1988Sjohnlev * Did we actually do anything?
765843e1988Sjohnlev */
766551bc2a6Smrj if (loop == xnbp->xnb_rx_ring.req_cons) {
767024c26efSMax zhen mutex_exit(&xnbp->xnb_rx_lock);
768843e1988Sjohnlev return (mp);
769843e1988Sjohnlev }
770843e1988Sjohnlev
771843e1988Sjohnlev end = loop;
772843e1988Sjohnlev
773843e1988Sjohnlev /*
774843e1988Sjohnlev * Unlink the end of the 'done' list from the remainder.
775843e1988Sjohnlev */
776843e1988Sjohnlev ASSERT(prev != NULL);
777843e1988Sjohnlev prev->b_next = NULL;
778843e1988Sjohnlev
779024c26efSMax zhen if (HYPERVISOR_grant_table_op(GNTTABOP_transfer, xnbp->xnb_rx_top,
780551bc2a6Smrj loop - xnbp->xnb_rx_ring.req_cons) != 0) {
781843e1988Sjohnlev cmn_err(CE_WARN, "xnb_to_peer: transfer operation failed");
782843e1988Sjohnlev }
783843e1988Sjohnlev
784551bc2a6Smrj loop = xnbp->xnb_rx_ring.req_cons;
785551bc2a6Smrj prod = xnbp->xnb_rx_ring.rsp_prod_pvt;
786024c26efSMax zhen gop = xnbp->xnb_rx_top;
787843e1988Sjohnlev
788843e1988Sjohnlev while (loop < end) {
789843e1988Sjohnlev int16_t status = NETIF_RSP_OKAY;
790843e1988Sjohnlev
791843e1988Sjohnlev if (gop->status != 0) {
792843e1988Sjohnlev status = NETIF_RSP_ERROR;
793843e1988Sjohnlev
794843e1988Sjohnlev /*
795843e1988Sjohnlev * If the status is anything other than
796843e1988Sjohnlev * GNTST_bad_page then we don't own the page
797843e1988Sjohnlev * any more, so don't try to give it back.
798843e1988Sjohnlev */
799843e1988Sjohnlev if (gop->status != GNTST_bad_page)
800843e1988Sjohnlev gop->mfn = 0;
801843e1988Sjohnlev } else {
802843e1988Sjohnlev /* The page is no longer ours. */
803843e1988Sjohnlev gop->mfn = 0;
804843e1988Sjohnlev }
805843e1988Sjohnlev
806843e1988Sjohnlev if (gop->mfn != 0)
807843e1988Sjohnlev /*
808843e1988Sjohnlev * Give back the page, as we won't be using
809843e1988Sjohnlev * it.
810843e1988Sjohnlev */
811843e1988Sjohnlev xnb_free_page(xnbp, gop->mfn);
812843e1988Sjohnlev else
813843e1988Sjohnlev /*
814843e1988Sjohnlev * We gave away a page, update our accounting
815843e1988Sjohnlev * now.
816843e1988Sjohnlev */
817843e1988Sjohnlev balloon_drv_subtracted(1);
818843e1988Sjohnlev
819843e1988Sjohnlev /* 5.2 */
820843e1988Sjohnlev if (status != NETIF_RSP_OKAY) {
821551bc2a6Smrj RING_GET_RESPONSE(&xnbp->xnb_rx_ring, prod)->status =
822843e1988Sjohnlev status;
823843e1988Sjohnlev } else {
824024c26efSMax zhen xnbp->xnb_stat_ipackets++;
825024c26efSMax zhen xnbp->xnb_stat_rbytes += len;
826843e1988Sjohnlev }
827843e1988Sjohnlev
828843e1988Sjohnlev loop++;
829843e1988Sjohnlev prod++;
830843e1988Sjohnlev gop++;
831843e1988Sjohnlev }
832843e1988Sjohnlev
833551bc2a6Smrj xnbp->xnb_rx_ring.req_cons = loop;
834551bc2a6Smrj xnbp->xnb_rx_ring.rsp_prod_pvt = prod;
835843e1988Sjohnlev
836843e1988Sjohnlev /* 6 */
837551bc2a6Smrj /* LINTED: constant in conditional context */
838551bc2a6Smrj RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&xnbp->xnb_rx_ring, notify);
839843e1988Sjohnlev if (notify) {
840551bc2a6Smrj ec_notify_via_evtchn(xnbp->xnb_evtchn);
841024c26efSMax zhen xnbp->xnb_stat_rx_notify_sent++;
842843e1988Sjohnlev } else {
843024c26efSMax zhen xnbp->xnb_stat_rx_notify_deferred++;
844843e1988Sjohnlev }
845843e1988Sjohnlev
846843e1988Sjohnlev if (mp != NULL)
847024c26efSMax zhen xnbp->xnb_stat_rx_defer++;
848843e1988Sjohnlev
849024c26efSMax zhen mutex_exit(&xnbp->xnb_rx_lock);
850843e1988Sjohnlev
851843e1988Sjohnlev /* Free mblk_t's that we consumed. */
852843e1988Sjohnlev freemsgchain(free);
853843e1988Sjohnlev
854843e1988Sjohnlev return (mp);
855843e1988Sjohnlev }
856843e1988Sjohnlev
85756567907SDavid Edmondson /* Helper functions for xnb_copy_to_peer(). */
858551bc2a6Smrj
859551bc2a6Smrj /*
860551bc2a6Smrj * Grow the array of copy operation descriptors.
861551bc2a6Smrj */
86256567907SDavid Edmondson static boolean_t
grow_cpop_area(xnb_t * xnbp)86356567907SDavid Edmondson grow_cpop_area(xnb_t *xnbp)
864551bc2a6Smrj {
86556567907SDavid Edmondson size_t count;
86656567907SDavid Edmondson gnttab_copy_t *new;
867551bc2a6Smrj
868024c26efSMax zhen ASSERT(MUTEX_HELD(&xnbp->xnb_rx_lock));
869551bc2a6Smrj
87056567907SDavid Edmondson count = xnbp->xnb_rx_cpop_count + CPOP_DEFCNT;
871551bc2a6Smrj
87256567907SDavid Edmondson if ((new = kmem_alloc(sizeof (new[0]) * count, KM_NOSLEEP)) == NULL) {
873551bc2a6Smrj xnbp->xnb_stat_other_allocation_failure++;
87456567907SDavid Edmondson return (B_FALSE);
875551bc2a6Smrj }
876551bc2a6Smrj
87756567907SDavid Edmondson bcopy(xnbp->xnb_rx_cpop, new,
87856567907SDavid Edmondson sizeof (xnbp->xnb_rx_cpop[0]) * xnbp->xnb_rx_cpop_count);
879551bc2a6Smrj
88056567907SDavid Edmondson kmem_free(xnbp->xnb_rx_cpop,
88156567907SDavid Edmondson sizeof (xnbp->xnb_rx_cpop[0]) * xnbp->xnb_rx_cpop_count);
882551bc2a6Smrj
88356567907SDavid Edmondson xnbp->xnb_rx_cpop = new;
88456567907SDavid Edmondson xnbp->xnb_rx_cpop_count = count;
885551bc2a6Smrj
886024c26efSMax zhen xnbp->xnb_stat_rx_cpoparea_grown++;
887551bc2a6Smrj
88856567907SDavid Edmondson return (B_TRUE);
889551bc2a6Smrj }
890551bc2a6Smrj
891551bc2a6Smrj /*
892551bc2a6Smrj * Check whether an address is on a page that's foreign to this domain.
893551bc2a6Smrj */
894551bc2a6Smrj static boolean_t
is_foreign(void * addr)895551bc2a6Smrj is_foreign(void *addr)
896551bc2a6Smrj {
89756567907SDavid Edmondson pfn_t pfn = hat_getpfnum(kas.a_hat, addr);
898551bc2a6Smrj
89956567907SDavid Edmondson return ((pfn & PFN_IS_FOREIGN_MFN) == PFN_IS_FOREIGN_MFN);
900551bc2a6Smrj }
901551bc2a6Smrj
902551bc2a6Smrj /*
903551bc2a6Smrj * Insert a newly allocated mblk into a chain, replacing the old one.
904551bc2a6Smrj */
905551bc2a6Smrj static mblk_t *
replace_msg(mblk_t * mp,size_t len,mblk_t * mp_prev,mblk_t * ml_prev)906551bc2a6Smrj replace_msg(mblk_t *mp, size_t len, mblk_t *mp_prev, mblk_t *ml_prev)
907551bc2a6Smrj {
908551bc2a6Smrj uint32_t start, stuff, end, value, flags;
909551bc2a6Smrj mblk_t *new_mp;
910551bc2a6Smrj
911551bc2a6Smrj new_mp = copyb(mp);
9120dc2366fSVenugopal Iyer if (new_mp == NULL) {
913551bc2a6Smrj cmn_err(CE_PANIC, "replace_msg: cannot alloc new message"
914551bc2a6Smrj "for %p, len %lu", (void *) mp, len);
9150dc2366fSVenugopal Iyer }
916551bc2a6Smrj
9170dc2366fSVenugopal Iyer mac_hcksum_get(mp, &start, &stuff, &end, &value, &flags);
9180dc2366fSVenugopal Iyer mac_hcksum_set(new_mp, start, stuff, end, value, flags);
919551bc2a6Smrj
920551bc2a6Smrj new_mp->b_next = mp->b_next;
921551bc2a6Smrj new_mp->b_prev = mp->b_prev;
922551bc2a6Smrj new_mp->b_cont = mp->b_cont;
923551bc2a6Smrj
924551bc2a6Smrj /* Make sure we only overwrite pointers to the mblk being replaced. */
925551bc2a6Smrj if (mp_prev != NULL && mp_prev->b_next == mp)
926551bc2a6Smrj mp_prev->b_next = new_mp;
927551bc2a6Smrj
928551bc2a6Smrj if (ml_prev != NULL && ml_prev->b_cont == mp)
929551bc2a6Smrj ml_prev->b_cont = new_mp;
930551bc2a6Smrj
931551bc2a6Smrj mp->b_next = mp->b_prev = mp->b_cont = NULL;
932551bc2a6Smrj freemsg(mp);
933551bc2a6Smrj
934551bc2a6Smrj return (new_mp);
935551bc2a6Smrj }
936551bc2a6Smrj
937551bc2a6Smrj /*
938551bc2a6Smrj * Set all the fields in a gnttab_copy_t.
939551bc2a6Smrj */
940551bc2a6Smrj static void
setup_gop(xnb_t * xnbp,gnttab_copy_t * gp,uchar_t * rptr,size_t s_off,size_t d_off,size_t len,grant_ref_t d_ref)941551bc2a6Smrj setup_gop(xnb_t *xnbp, gnttab_copy_t *gp, uchar_t *rptr,
942551bc2a6Smrj size_t s_off, size_t d_off, size_t len, grant_ref_t d_ref)
943551bc2a6Smrj {
944551bc2a6Smrj ASSERT(xnbp != NULL && gp != NULL);
945551bc2a6Smrj
946551bc2a6Smrj gp->source.offset = s_off;
947551bc2a6Smrj gp->source.u.gmfn = pfn_to_mfn(hat_getpfnum(kas.a_hat, (caddr_t)rptr));
948551bc2a6Smrj gp->source.domid = DOMID_SELF;
949551bc2a6Smrj
950551bc2a6Smrj gp->len = (uint16_t)len;
951551bc2a6Smrj gp->flags = GNTCOPY_dest_gref;
952551bc2a6Smrj gp->status = 0;
953551bc2a6Smrj
954551bc2a6Smrj gp->dest.u.ref = d_ref;
955551bc2a6Smrj gp->dest.offset = d_off;
956551bc2a6Smrj gp->dest.domid = xnbp->xnb_peer;
957551bc2a6Smrj }
958551bc2a6Smrj
95956567907SDavid Edmondson /*
96056567907SDavid Edmondson * Pass packets to the peer using hypervisor copy operations.
96156567907SDavid Edmondson */
962551bc2a6Smrj mblk_t *
xnb_copy_to_peer(xnb_t * xnbp,mblk_t * mp)963551bc2a6Smrj xnb_copy_to_peer(xnb_t *xnbp, mblk_t *mp)
964551bc2a6Smrj {
965551bc2a6Smrj mblk_t *free = mp, *mp_prev = NULL, *saved_mp = mp;
966551bc2a6Smrj mblk_t *ml, *ml_prev;
967551bc2a6Smrj boolean_t notify;
968551bc2a6Smrj RING_IDX loop, prod;
969551bc2a6Smrj int i;
970551bc2a6Smrj
97156567907SDavid Edmondson /*
97256567907SDavid Edmondson * If the peer does not pre-post buffers for received packets,
97356567907SDavid Edmondson * use page flipping to pass packets to it.
97456567907SDavid Edmondson */
97556567907SDavid Edmondson if (!xnbp->xnb_rx_hv_copy)
976551bc2a6Smrj return (xnb_to_peer(xnbp, mp));
977551bc2a6Smrj
978551bc2a6Smrj /*
979551bc2a6Smrj * For each packet the sequence of operations is:
980551bc2a6Smrj *
981551bc2a6Smrj * 1. get a request slot from the ring.
982551bc2a6Smrj * 2. set up data for hypercall (see NOTE below)
983551bc2a6Smrj * 3. have the hypervisore copy the data
984551bc2a6Smrj * 4. update the request slot.
985551bc2a6Smrj * 5. kick the peer.
986551bc2a6Smrj *
987551bc2a6Smrj * NOTE ad 2.
988551bc2a6Smrj * In order to reduce the number of hypercalls, we prepare
98956567907SDavid Edmondson * several mblks (mp->b_cont != NULL) for the peer and
99056567907SDavid Edmondson * perform a single hypercall to transfer them. We also have
99156567907SDavid Edmondson * to set up a seperate copy operation for every page.
992551bc2a6Smrj *
99356567907SDavid Edmondson * If we have more than one packet (mp->b_next != NULL), we do
99456567907SDavid Edmondson * this whole dance repeatedly.
995551bc2a6Smrj */
996551bc2a6Smrj
997024c26efSMax zhen mutex_enter(&xnbp->xnb_rx_lock);
998551bc2a6Smrj
999551bc2a6Smrj if (!(xnbp->xnb_connected && xnbp->xnb_hotplugged)) {
1000024c26efSMax zhen mutex_exit(&xnbp->xnb_rx_lock);
1001024c26efSMax zhen DTRACE_PROBE(copy_rx_too_early);
1002024c26efSMax zhen xnbp->xnb_stat_rx_too_early++;
1003551bc2a6Smrj return (mp);
1004551bc2a6Smrj }
1005551bc2a6Smrj
1006551bc2a6Smrj loop = xnbp->xnb_rx_ring.req_cons;
1007551bc2a6Smrj prod = xnbp->xnb_rx_ring.rsp_prod_pvt;
1008551bc2a6Smrj
1009551bc2a6Smrj while ((mp != NULL) &&
1010551bc2a6Smrj XNB_RING_HAS_UNCONSUMED_REQUESTS(&xnbp->xnb_rx_ring)) {
1011551bc2a6Smrj netif_rx_request_t *rxreq;
101256567907SDavid Edmondson size_t d_offset, len;
101356567907SDavid Edmondson int item_count;
101456567907SDavid Edmondson gnttab_copy_t *gop_cp;
1015551bc2a6Smrj netif_rx_response_t *rxresp;
1016551bc2a6Smrj uint16_t cksum_flags;
1017551bc2a6Smrj int16_t status = NETIF_RSP_OKAY;
1018551bc2a6Smrj
1019551bc2a6Smrj /* 1 */
1020551bc2a6Smrj rxreq = RING_GET_REQUEST(&xnbp->xnb_rx_ring, loop);
1021551bc2a6Smrj
1022551bc2a6Smrj #ifdef XNB_DEBUG
1023551bc2a6Smrj if (!(rxreq->id < NET_RX_RING_SIZE))
1024551bc2a6Smrj cmn_err(CE_PANIC, "xnb_copy_to_peer: "
1025551bc2a6Smrj "id %d out of range in request 0x%p",
1026551bc2a6Smrj rxreq->id, (void *)rxreq);
1027551bc2a6Smrj #endif /* XNB_DEBUG */
1028551bc2a6Smrj
1029551bc2a6Smrj /* 2 */
10306ac4daadSDavid Edmondson d_offset = 0;
1031551bc2a6Smrj len = 0;
1032551bc2a6Smrj item_count = 0;
1033551bc2a6Smrj
1034024c26efSMax zhen gop_cp = xnbp->xnb_rx_cpop;
1035551bc2a6Smrj
1036551bc2a6Smrj /*
103756567907SDavid Edmondson * We walk the b_cont pointers and set up a
103856567907SDavid Edmondson * gnttab_copy_t for each sub-page chunk in each data
103956567907SDavid Edmondson * block.
1040551bc2a6Smrj */
1041551bc2a6Smrj /* 2a */
1042551bc2a6Smrj for (ml = mp, ml_prev = NULL; ml != NULL; ml = ml->b_cont) {
1043551bc2a6Smrj size_t chunk = ml->b_wptr - ml->b_rptr;
1044551bc2a6Smrj uchar_t *r_tmp, *rpt_align;
1045551bc2a6Smrj size_t r_offset;
1046551bc2a6Smrj
1047551bc2a6Smrj /*
104856567907SDavid Edmondson * The hypervisor will not allow us to
104956567907SDavid Edmondson * reference a foreign page (e.g. one
105056567907SDavid Edmondson * belonging to another domain) by mfn in the
105156567907SDavid Edmondson * copy operation. If the data in this mblk is
105256567907SDavid Edmondson * on such a page we must copy the data into a
105356567907SDavid Edmondson * local page before initiating the hypervisor
105456567907SDavid Edmondson * copy operation.
1055551bc2a6Smrj */
1056551bc2a6Smrj if (is_foreign(ml->b_rptr) || is_foreign(ml->b_wptr)) {
1057551bc2a6Smrj mblk_t *ml_new = replace_msg(ml, chunk,
1058551bc2a6Smrj mp_prev, ml_prev);
1059551bc2a6Smrj
1060551bc2a6Smrj /* We can still use old ml, but not *ml! */
1061551bc2a6Smrj if (free == ml)
1062551bc2a6Smrj free = ml_new;
1063551bc2a6Smrj if (mp == ml)
1064551bc2a6Smrj mp = ml_new;
1065551bc2a6Smrj ml = ml_new;
1066551bc2a6Smrj
1067024c26efSMax zhen xnbp->xnb_stat_rx_foreign_page++;
1068551bc2a6Smrj }
1069551bc2a6Smrj
1070551bc2a6Smrj rpt_align = (uchar_t *)ALIGN2PAGE(ml->b_rptr);
1071551bc2a6Smrj r_offset = (uint16_t)(ml->b_rptr - rpt_align);
1072551bc2a6Smrj r_tmp = ml->b_rptr;
1073551bc2a6Smrj
1074551bc2a6Smrj if (d_offset + chunk > PAGESIZE)
1075551bc2a6Smrj cmn_err(CE_PANIC, "xnb_copy_to_peer: mp %p "
1076551bc2a6Smrj "(svd: %p), ml %p,rpt_alg. %p, d_offset "
1077551bc2a6Smrj "(%lu) + chunk (%lu) > PAGESIZE %d!",
1078551bc2a6Smrj (void *)mp, (void *)saved_mp, (void *)ml,
1079551bc2a6Smrj (void *)rpt_align,
1080551bc2a6Smrj d_offset, chunk, (int)PAGESIZE);
1081551bc2a6Smrj
1082551bc2a6Smrj while (chunk > 0) {
1083551bc2a6Smrj size_t part_len;
1084551bc2a6Smrj
108556567907SDavid Edmondson if (item_count == xnbp->xnb_rx_cpop_count) {
108656567907SDavid Edmondson if (!grow_cpop_area(xnbp))
1087551bc2a6Smrj goto failure;
108856567907SDavid Edmondson gop_cp = &xnbp->xnb_rx_cpop[item_count];
1089551bc2a6Smrj }
1090551bc2a6Smrj /*
1091551bc2a6Smrj * If our mblk crosses a page boundary, we need
109256567907SDavid Edmondson * to do a seperate copy for each page.
1093551bc2a6Smrj */
1094551bc2a6Smrj if (r_offset + chunk > PAGESIZE) {
1095551bc2a6Smrj part_len = PAGESIZE - r_offset;
1096551bc2a6Smrj
1097551bc2a6Smrj DTRACE_PROBE3(mblk_page_crossed,
1098551bc2a6Smrj (mblk_t *), ml, int, chunk, int,
1099551bc2a6Smrj (int)r_offset);
1100551bc2a6Smrj
1101024c26efSMax zhen xnbp->xnb_stat_rx_pagebndry_crossed++;
1102551bc2a6Smrj } else {
1103551bc2a6Smrj part_len = chunk;
1104551bc2a6Smrj }
1105551bc2a6Smrj
1106551bc2a6Smrj setup_gop(xnbp, gop_cp, r_tmp, r_offset,
1107551bc2a6Smrj d_offset, part_len, rxreq->gref);
1108551bc2a6Smrj
1109551bc2a6Smrj chunk -= part_len;
1110551bc2a6Smrj
1111551bc2a6Smrj len += part_len;
1112551bc2a6Smrj d_offset += part_len;
1113551bc2a6Smrj r_tmp += part_len;
1114551bc2a6Smrj /*
1115551bc2a6Smrj * The 2nd, 3rd ... last copies will always
1116551bc2a6Smrj * start at r_tmp, therefore r_offset is 0.
1117551bc2a6Smrj */
1118551bc2a6Smrj r_offset = 0;
1119551bc2a6Smrj gop_cp++;
112056567907SDavid Edmondson item_count++;
1121551bc2a6Smrj }
1122551bc2a6Smrj ml_prev = ml;
112356567907SDavid Edmondson
1124551bc2a6Smrj DTRACE_PROBE4(mblk_loop_end, (mblk_t *), ml, int,
1125551bc2a6Smrj chunk, int, len, int, item_count);
1126551bc2a6Smrj }
1127551bc2a6Smrj /* 3 */
1128024c26efSMax zhen if (HYPERVISOR_grant_table_op(GNTTABOP_copy, xnbp->xnb_rx_cpop,
1129551bc2a6Smrj item_count) != 0) {
1130551bc2a6Smrj cmn_err(CE_WARN, "xnb_copy_to_peer: copy op. failed");
1131551bc2a6Smrj DTRACE_PROBE(HV_granttableopfailed);
1132551bc2a6Smrj }
1133551bc2a6Smrj
1134551bc2a6Smrj /* 4 */
1135551bc2a6Smrj rxresp = RING_GET_RESPONSE(&xnbp->xnb_rx_ring, prod);
11366ac4daadSDavid Edmondson rxresp->offset = 0;
1137551bc2a6Smrj
1138551bc2a6Smrj rxresp->flags = 0;
1139551bc2a6Smrj
1140551bc2a6Smrj DTRACE_PROBE4(got_RX_rsp, int, (int)rxresp->id, int,
1141551bc2a6Smrj (int)rxresp->offset, int, (int)rxresp->flags, int,
1142551bc2a6Smrj (int)rxresp->status);
1143551bc2a6Smrj
1144551bc2a6Smrj cksum_flags = xnbp->xnb_flavour->xf_cksum_to_peer(xnbp, mp);
1145551bc2a6Smrj if (cksum_flags != 0)
1146024c26efSMax zhen xnbp->xnb_stat_rx_cksum_deferred++;
1147551bc2a6Smrj rxresp->flags |= cksum_flags;
1148551bc2a6Smrj
1149551bc2a6Smrj rxresp->id = RING_GET_REQUEST(&xnbp->xnb_rx_ring, prod)->id;
1150551bc2a6Smrj rxresp->status = len;
1151551bc2a6Smrj
1152551bc2a6Smrj DTRACE_PROBE4(RX_rsp_set, int, (int)rxresp->id, int,
1153551bc2a6Smrj (int)rxresp->offset, int, (int)rxresp->flags, int,
1154551bc2a6Smrj (int)rxresp->status);
1155551bc2a6Smrj
1156551bc2a6Smrj for (i = 0; i < item_count; i++) {
1157024c26efSMax zhen if (xnbp->xnb_rx_cpop[i].status != 0) {
115856567907SDavid Edmondson DTRACE_PROBE2(cpop_status_nonnull, int,
1159024c26efSMax zhen (int)xnbp->xnb_rx_cpop[i].status,
1160551bc2a6Smrj int, i);
1161551bc2a6Smrj status = NETIF_RSP_ERROR;
1162551bc2a6Smrj }
1163551bc2a6Smrj }
1164551bc2a6Smrj
1165551bc2a6Smrj /* 5.2 */
1166551bc2a6Smrj if (status != NETIF_RSP_OKAY) {
1167551bc2a6Smrj RING_GET_RESPONSE(&xnbp->xnb_rx_ring, prod)->status =
1168551bc2a6Smrj status;
1169024c26efSMax zhen xnbp->xnb_stat_rx_rsp_notok++;
1170551bc2a6Smrj } else {
1171024c26efSMax zhen xnbp->xnb_stat_ipackets++;
1172024c26efSMax zhen xnbp->xnb_stat_rbytes += len;
1173551bc2a6Smrj }
1174551bc2a6Smrj
1175551bc2a6Smrj loop++;
1176551bc2a6Smrj prod++;
1177551bc2a6Smrj mp_prev = mp;
1178551bc2a6Smrj mp = mp->b_next;
1179551bc2a6Smrj }
1180551bc2a6Smrj failure:
1181551bc2a6Smrj /*
1182551bc2a6Smrj * Did we actually do anything?
1183551bc2a6Smrj */
1184551bc2a6Smrj if (loop == xnbp->xnb_rx_ring.req_cons) {
1185024c26efSMax zhen mutex_exit(&xnbp->xnb_rx_lock);
1186551bc2a6Smrj return (mp);
1187551bc2a6Smrj }
1188551bc2a6Smrj
1189551bc2a6Smrj /*
1190551bc2a6Smrj * Unlink the end of the 'done' list from the remainder.
1191551bc2a6Smrj */
1192551bc2a6Smrj ASSERT(mp_prev != NULL);
1193551bc2a6Smrj mp_prev->b_next = NULL;
1194551bc2a6Smrj
1195551bc2a6Smrj xnbp->xnb_rx_ring.req_cons = loop;
1196551bc2a6Smrj xnbp->xnb_rx_ring.rsp_prod_pvt = prod;
1197551bc2a6Smrj
1198551bc2a6Smrj /* 6 */
1199551bc2a6Smrj /* LINTED: constant in conditional context */
1200551bc2a6Smrj RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&xnbp->xnb_rx_ring, notify);
1201551bc2a6Smrj if (notify) {
1202551bc2a6Smrj ec_notify_via_evtchn(xnbp->xnb_evtchn);
1203024c26efSMax zhen xnbp->xnb_stat_rx_notify_sent++;
1204551bc2a6Smrj } else {
1205024c26efSMax zhen xnbp->xnb_stat_rx_notify_deferred++;
1206551bc2a6Smrj }
1207551bc2a6Smrj
1208551bc2a6Smrj if (mp != NULL)
1209024c26efSMax zhen xnbp->xnb_stat_rx_defer++;
1210551bc2a6Smrj
1211024c26efSMax zhen mutex_exit(&xnbp->xnb_rx_lock);
1212551bc2a6Smrj
1213551bc2a6Smrj /* Free mblk_t structs we have consumed. */
1214551bc2a6Smrj freemsgchain(free);
1215551bc2a6Smrj
1216551bc2a6Smrj return (mp);
1217551bc2a6Smrj }
1218551bc2a6Smrj
1219843e1988Sjohnlev
1220843e1988Sjohnlev static void
xnb_tx_notify_peer(xnb_t * xnbp,boolean_t force)122156567907SDavid Edmondson xnb_tx_notify_peer(xnb_t *xnbp, boolean_t force)
1222843e1988Sjohnlev {
1223843e1988Sjohnlev boolean_t notify;
1224843e1988Sjohnlev
1225024c26efSMax zhen ASSERT(MUTEX_HELD(&xnbp->xnb_tx_lock));
1226843e1988Sjohnlev
1227551bc2a6Smrj /* LINTED: constant in conditional context */
1228551bc2a6Smrj RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&xnbp->xnb_tx_ring, notify);
122956567907SDavid Edmondson if (notify || force) {
1230551bc2a6Smrj ec_notify_via_evtchn(xnbp->xnb_evtchn);
1231024c26efSMax zhen xnbp->xnb_stat_tx_notify_sent++;
1232843e1988Sjohnlev } else {
1233024c26efSMax zhen xnbp->xnb_stat_tx_notify_deferred++;
1234843e1988Sjohnlev }
1235843e1988Sjohnlev }
1236843e1988Sjohnlev
1237843e1988Sjohnlev static void
xnb_tx_mark_complete(xnb_t * xnbp,RING_IDX id,int16_t status)1238024c26efSMax zhen xnb_tx_mark_complete(xnb_t *xnbp, RING_IDX id, int16_t status)
1239843e1988Sjohnlev {
1240843e1988Sjohnlev RING_IDX i;
1241843e1988Sjohnlev netif_tx_response_t *txresp;
1242843e1988Sjohnlev
1243024c26efSMax zhen ASSERT(MUTEX_HELD(&xnbp->xnb_tx_lock));
1244843e1988Sjohnlev
1245551bc2a6Smrj i = xnbp->xnb_tx_ring.rsp_prod_pvt;
1246843e1988Sjohnlev
1247551bc2a6Smrj txresp = RING_GET_RESPONSE(&xnbp->xnb_tx_ring, i);
1248843e1988Sjohnlev txresp->id = id;
1249843e1988Sjohnlev txresp->status = status;
1250843e1988Sjohnlev
1251551bc2a6Smrj xnbp->xnb_tx_ring.rsp_prod_pvt = i + 1;
1252843e1988Sjohnlev
1253843e1988Sjohnlev /*
1254843e1988Sjohnlev * Note that we don't push the change to the peer here - that
1255843e1988Sjohnlev * is the callers responsibility.
1256843e1988Sjohnlev */
1257843e1988Sjohnlev }
1258843e1988Sjohnlev
1259843e1988Sjohnlev static void
xnb_txbuf_recycle(xnb_txbuf_t * txp)126056567907SDavid Edmondson xnb_txbuf_recycle(xnb_txbuf_t *txp)
1261843e1988Sjohnlev {
126256567907SDavid Edmondson xnb_t *xnbp = txp->xt_xnbp;
1263843e1988Sjohnlev
126456567907SDavid Edmondson kmem_cache_free(xnbp->xnb_tx_buf_cache, txp);
1265551bc2a6Smrj
126656567907SDavid Edmondson xnbp->xnb_tx_buf_outstanding--;
1267843e1988Sjohnlev }
1268843e1988Sjohnlev
126956567907SDavid Edmondson static int
xnb_txbuf_constructor(void * buf,void * arg,int kmflag)127056567907SDavid Edmondson xnb_txbuf_constructor(void *buf, void *arg, int kmflag)
1271843e1988Sjohnlev {
127256567907SDavid Edmondson _NOTE(ARGUNUSED(kmflag));
127356567907SDavid Edmondson xnb_txbuf_t *txp = buf;
127456567907SDavid Edmondson xnb_t *xnbp = arg;
127556567907SDavid Edmondson size_t len;
127656567907SDavid Edmondson ddi_dma_cookie_t dma_cookie;
127756567907SDavid Edmondson uint_t ncookies;
1278843e1988Sjohnlev
127956567907SDavid Edmondson txp->xt_free_rtn.free_func = xnb_txbuf_recycle;
128056567907SDavid Edmondson txp->xt_free_rtn.free_arg = (caddr_t)txp;
128156567907SDavid Edmondson txp->xt_xnbp = xnbp;
128256567907SDavid Edmondson txp->xt_next = NULL;
1283551bc2a6Smrj
128456567907SDavid Edmondson if (ddi_dma_alloc_handle(xnbp->xnb_devinfo, &buf_dma_attr,
128556567907SDavid Edmondson 0, 0, &txp->xt_dma_handle) != DDI_SUCCESS)
128656567907SDavid Edmondson goto failure;
1287551bc2a6Smrj
128856567907SDavid Edmondson if (ddi_dma_mem_alloc(txp->xt_dma_handle, PAGESIZE, &data_accattr,
128956567907SDavid Edmondson DDI_DMA_STREAMING, 0, 0, &txp->xt_buf, &len,
129056567907SDavid Edmondson &txp->xt_acc_handle) != DDI_SUCCESS)
129156567907SDavid Edmondson goto failure_1;
1292551bc2a6Smrj
129356567907SDavid Edmondson if (ddi_dma_addr_bind_handle(txp->xt_dma_handle, NULL, txp->xt_buf,
129456567907SDavid Edmondson len, DDI_DMA_RDWR | DDI_DMA_STREAMING, DDI_DMA_DONTWAIT, 0,
129556567907SDavid Edmondson &dma_cookie, &ncookies)
129656567907SDavid Edmondson != DDI_DMA_MAPPED)
129756567907SDavid Edmondson goto failure_2;
129856567907SDavid Edmondson ASSERT(ncookies == 1);
1299843e1988Sjohnlev
130056567907SDavid Edmondson txp->xt_mfn = xnb_btop(dma_cookie.dmac_laddress);
130156567907SDavid Edmondson txp->xt_buflen = dma_cookie.dmac_size;
1302843e1988Sjohnlev
130356567907SDavid Edmondson DTRACE_PROBE(txbuf_allocated);
1304843e1988Sjohnlev
13051a5e258fSJosef 'Jeff' Sipek atomic_inc_32(&xnbp->xnb_tx_buf_count);
130656567907SDavid Edmondson xnbp->xnb_tx_buf_outstanding++;
1307843e1988Sjohnlev
130856567907SDavid Edmondson return (0);
1309843e1988Sjohnlev
131056567907SDavid Edmondson failure_2:
131156567907SDavid Edmondson ddi_dma_mem_free(&txp->xt_acc_handle);
1312843e1988Sjohnlev
131356567907SDavid Edmondson failure_1:
131456567907SDavid Edmondson ddi_dma_free_handle(&txp->xt_dma_handle);
1315843e1988Sjohnlev
131656567907SDavid Edmondson failure:
1317843e1988Sjohnlev
131856567907SDavid Edmondson return (-1);
1319843e1988Sjohnlev }
1320843e1988Sjohnlev
1321843e1988Sjohnlev static void
xnb_txbuf_destructor(void * buf,void * arg)132256567907SDavid Edmondson xnb_txbuf_destructor(void *buf, void *arg)
1323843e1988Sjohnlev {
132456567907SDavid Edmondson xnb_txbuf_t *txp = buf;
132556567907SDavid Edmondson xnb_t *xnbp = arg;
1326843e1988Sjohnlev
132756567907SDavid Edmondson (void) ddi_dma_unbind_handle(txp->xt_dma_handle);
132856567907SDavid Edmondson ddi_dma_mem_free(&txp->xt_acc_handle);
132956567907SDavid Edmondson ddi_dma_free_handle(&txp->xt_dma_handle);
1330843e1988Sjohnlev
13311a5e258fSJosef 'Jeff' Sipek atomic_dec_32(&xnbp->xnb_tx_buf_count);
1332843e1988Sjohnlev }
1333843e1988Sjohnlev
133456567907SDavid Edmondson /*
133556567907SDavid Edmondson * Take packets from the peer and deliver them onward.
133656567907SDavid Edmondson */
1337843e1988Sjohnlev static mblk_t *
xnb_from_peer(xnb_t * xnbp)1338024c26efSMax zhen xnb_from_peer(xnb_t *xnbp)
1339843e1988Sjohnlev {
1340843e1988Sjohnlev RING_IDX start, end, loop;
134156567907SDavid Edmondson gnttab_copy_t *cop;
1342024c26efSMax zhen xnb_txbuf_t **txpp;
1343843e1988Sjohnlev netif_tx_request_t *txreq;
134456567907SDavid Edmondson boolean_t work_to_do, need_notify = B_FALSE;
1345843e1988Sjohnlev mblk_t *head, *tail;
134656567907SDavid Edmondson int n_data_req, i;
1347843e1988Sjohnlev
134856567907SDavid Edmondson ASSERT(MUTEX_HELD(&xnbp->xnb_tx_lock));
1349843e1988Sjohnlev
1350843e1988Sjohnlev head = tail = NULL;
1351843e1988Sjohnlev around:
1352843e1988Sjohnlev
1353551bc2a6Smrj /* LINTED: constant in conditional context */
1354551bc2a6Smrj RING_FINAL_CHECK_FOR_REQUESTS(&xnbp->xnb_tx_ring, work_to_do);
1355843e1988Sjohnlev if (!work_to_do) {
1356843e1988Sjohnlev finished:
135756567907SDavid Edmondson xnb_tx_notify_peer(xnbp, need_notify);
135856567907SDavid Edmondson
1359843e1988Sjohnlev return (head);
1360843e1988Sjohnlev }
1361843e1988Sjohnlev
1362551bc2a6Smrj start = xnbp->xnb_tx_ring.req_cons;
1363551bc2a6Smrj end = xnbp->xnb_tx_ring.sring->req_prod;
1364843e1988Sjohnlev
1365a8e7f927SDavid Edmondson if ((end - start) > NET_TX_RING_SIZE) {
1366a8e7f927SDavid Edmondson /*
1367a8e7f927SDavid Edmondson * This usually indicates that the frontend driver is
1368a8e7f927SDavid Edmondson * misbehaving, as it's not possible to have more than
1369a8e7f927SDavid Edmondson * NET_TX_RING_SIZE ring elements in play at any one
1370a8e7f927SDavid Edmondson * time.
1371a8e7f927SDavid Edmondson *
1372a8e7f927SDavid Edmondson * We reset the ring pointers to the state declared by
1373a8e7f927SDavid Edmondson * the frontend and try to carry on.
1374a8e7f927SDavid Edmondson */
1375a8e7f927SDavid Edmondson cmn_err(CE_WARN, "xnb_from_peer: domain %d tried to give us %u "
1376a8e7f927SDavid Edmondson "items in the ring, resetting and trying to recover.",
1377a8e7f927SDavid Edmondson xnbp->xnb_peer, (end - start));
1378a8e7f927SDavid Edmondson
1379a8e7f927SDavid Edmondson /* LINTED: constant in conditional context */
1380a8e7f927SDavid Edmondson BACK_RING_ATTACH(&xnbp->xnb_tx_ring,
1381a8e7f927SDavid Edmondson (netif_tx_sring_t *)xnbp->xnb_tx_ring_addr, PAGESIZE);
1382a8e7f927SDavid Edmondson
1383a8e7f927SDavid Edmondson goto around;
1384a8e7f927SDavid Edmondson }
1385a8e7f927SDavid Edmondson
138656567907SDavid Edmondson loop = start;
138756567907SDavid Edmondson cop = xnbp->xnb_tx_cop;
138856567907SDavid Edmondson txpp = xnbp->xnb_tx_bufp;
138956567907SDavid Edmondson n_data_req = 0;
1390843e1988Sjohnlev
139156567907SDavid Edmondson while (loop < end) {
1392fd0939efSDavid Edmondson static const uint16_t acceptable_flags =
1393fd0939efSDavid Edmondson NETTXF_csum_blank |
1394fd0939efSDavid Edmondson NETTXF_data_validated |
1395fd0939efSDavid Edmondson NETTXF_extra_info;
1396fd0939efSDavid Edmondson uint16_t unexpected_flags;
1397fd0939efSDavid Edmondson
139856567907SDavid Edmondson txreq = RING_GET_REQUEST(&xnbp->xnb_tx_ring, loop);
1399843e1988Sjohnlev
1400fd0939efSDavid Edmondson unexpected_flags = txreq->flags & ~acceptable_flags;
1401fd0939efSDavid Edmondson if (unexpected_flags != 0) {
1402fd0939efSDavid Edmondson /*
1403fd0939efSDavid Edmondson * The peer used flag bits that we do not
1404fd0939efSDavid Edmondson * recognize.
1405fd0939efSDavid Edmondson */
1406fd0939efSDavid Edmondson cmn_err(CE_WARN, "xnb_from_peer: "
1407fd0939efSDavid Edmondson "unexpected flag bits (0x%x) from peer "
1408fd0939efSDavid Edmondson "in transmit request",
1409fd0939efSDavid Edmondson unexpected_flags);
1410fd0939efSDavid Edmondson xnbp->xnb_stat_tx_unexpected_flags++;
1411fd0939efSDavid Edmondson
1412fd0939efSDavid Edmondson /* Mark this entry as failed. */
1413fd0939efSDavid Edmondson xnb_tx_mark_complete(xnbp, txreq->id, NETIF_RSP_ERROR);
1414fd0939efSDavid Edmondson need_notify = B_TRUE;
1415fd0939efSDavid Edmondson
1416fd0939efSDavid Edmondson } else if (txreq->flags & NETTXF_extra_info) {
141756567907SDavid Edmondson struct netif_extra_info *erp;
141856567907SDavid Edmondson boolean_t status;
141956567907SDavid Edmondson
142056567907SDavid Edmondson loop++; /* Consume another slot in the ring. */
142156567907SDavid Edmondson ASSERT(loop <= end);
142256567907SDavid Edmondson
142356567907SDavid Edmondson erp = (struct netif_extra_info *)
142456567907SDavid Edmondson RING_GET_REQUEST(&xnbp->xnb_tx_ring, loop);
142556567907SDavid Edmondson
142656567907SDavid Edmondson switch (erp->type) {
142756567907SDavid Edmondson case XEN_NETIF_EXTRA_TYPE_MCAST_ADD:
142856567907SDavid Edmondson ASSERT(xnbp->xnb_multicast_control);
142956567907SDavid Edmondson status = xnbp->xnb_flavour->xf_mcast_add(xnbp,
143056567907SDavid Edmondson &erp->u.mcast.addr);
143156567907SDavid Edmondson break;
143256567907SDavid Edmondson case XEN_NETIF_EXTRA_TYPE_MCAST_DEL:
143356567907SDavid Edmondson ASSERT(xnbp->xnb_multicast_control);
143456567907SDavid Edmondson status = xnbp->xnb_flavour->xf_mcast_del(xnbp,
143556567907SDavid Edmondson &erp->u.mcast.addr);
143656567907SDavid Edmondson break;
143756567907SDavid Edmondson default:
143856567907SDavid Edmondson status = B_FALSE;
143956567907SDavid Edmondson cmn_err(CE_WARN, "xnb_from_peer: "
144056567907SDavid Edmondson "unknown extra type %d", erp->type);
144156567907SDavid Edmondson break;
144256567907SDavid Edmondson }
1443843e1988Sjohnlev
144456567907SDavid Edmondson xnb_tx_mark_complete(xnbp, txreq->id,
144556567907SDavid Edmondson status ? NETIF_RSP_OKAY : NETIF_RSP_ERROR);
144656567907SDavid Edmondson need_notify = B_TRUE;
1447fd0939efSDavid Edmondson
1448fd0939efSDavid Edmondson } else if ((txreq->offset > PAGESIZE) ||
1449fd0939efSDavid Edmondson (txreq->offset + txreq->size > PAGESIZE)) {
1450fd0939efSDavid Edmondson /*
1451fd0939efSDavid Edmondson * Peer attempted to refer to data beyond the
1452fd0939efSDavid Edmondson * end of the granted page.
1453fd0939efSDavid Edmondson */
1454fd0939efSDavid Edmondson cmn_err(CE_WARN, "xnb_from_peer: "
1455fd0939efSDavid Edmondson "attempt to refer beyond the end of granted "
1456fd0939efSDavid Edmondson "page in txreq (offset %d, size %d).",
1457fd0939efSDavid Edmondson txreq->offset, txreq->size);
1458fd0939efSDavid Edmondson xnbp->xnb_stat_tx_overflow_page++;
1459fd0939efSDavid Edmondson
1460fd0939efSDavid Edmondson /* Mark this entry as failed. */
1461fd0939efSDavid Edmondson xnb_tx_mark_complete(xnbp, txreq->id, NETIF_RSP_ERROR);
1462fd0939efSDavid Edmondson need_notify = B_TRUE;
1463fd0939efSDavid Edmondson
146456567907SDavid Edmondson } else {
146556567907SDavid Edmondson xnb_txbuf_t *txp;
146656567907SDavid Edmondson
146756567907SDavid Edmondson txp = kmem_cache_alloc(xnbp->xnb_tx_buf_cache,
146856567907SDavid Edmondson KM_NOSLEEP);
146956567907SDavid Edmondson if (txp == NULL)
147056567907SDavid Edmondson break;
147156567907SDavid Edmondson
147256567907SDavid Edmondson txp->xt_mblk = desballoc((unsigned char *)txp->xt_buf,
147356567907SDavid Edmondson txp->xt_buflen, 0, &txp->xt_free_rtn);
147456567907SDavid Edmondson if (txp->xt_mblk == NULL) {
147556567907SDavid Edmondson kmem_cache_free(xnbp->xnb_tx_buf_cache, txp);
147656567907SDavid Edmondson break;
147756567907SDavid Edmondson }
1478843e1988Sjohnlev
147956567907SDavid Edmondson txp->xt_idx = loop;
148056567907SDavid Edmondson txp->xt_id = txreq->id;
1481843e1988Sjohnlev
148256567907SDavid Edmondson cop->source.u.ref = txreq->gref;
148356567907SDavid Edmondson cop->source.domid = xnbp->xnb_peer;
148456567907SDavid Edmondson cop->source.offset = txreq->offset;
1485843e1988Sjohnlev
148656567907SDavid Edmondson cop->dest.u.gmfn = txp->xt_mfn;
148756567907SDavid Edmondson cop->dest.domid = DOMID_SELF;
148856567907SDavid Edmondson cop->dest.offset = 0;
1489843e1988Sjohnlev
149056567907SDavid Edmondson cop->len = txreq->size;
149156567907SDavid Edmondson cop->flags = GNTCOPY_source_gref;
149256567907SDavid Edmondson cop->status = 0;
1493843e1988Sjohnlev
149456567907SDavid Edmondson *txpp = txp;
1495843e1988Sjohnlev
149656567907SDavid Edmondson txpp++;
149756567907SDavid Edmondson cop++;
149856567907SDavid Edmondson n_data_req++;
1499843e1988Sjohnlev
150056567907SDavid Edmondson ASSERT(n_data_req <= NET_TX_RING_SIZE);
150156567907SDavid Edmondson }
1502843e1988Sjohnlev
150356567907SDavid Edmondson loop++;
150456567907SDavid Edmondson }
150556567907SDavid Edmondson
150656567907SDavid Edmondson xnbp->xnb_tx_ring.req_cons = loop;
150756567907SDavid Edmondson
150856567907SDavid Edmondson if (n_data_req == 0)
150956567907SDavid Edmondson goto around;
151056567907SDavid Edmondson
151156567907SDavid Edmondson if (HYPERVISOR_grant_table_op(GNTTABOP_copy,
151256567907SDavid Edmondson xnbp->xnb_tx_cop, n_data_req) != 0) {
151356567907SDavid Edmondson
151456567907SDavid Edmondson cmn_err(CE_WARN, "xnb_from_peer: copy operation failed");
151556567907SDavid Edmondson
151656567907SDavid Edmondson txpp = xnbp->xnb_tx_bufp;
151756567907SDavid Edmondson i = n_data_req;
151856567907SDavid Edmondson while (i > 0) {
151956567907SDavid Edmondson kmem_cache_free(xnbp->xnb_tx_buf_cache, *txpp);
1520024c26efSMax zhen txpp++;
152156567907SDavid Edmondson i--;
1522843e1988Sjohnlev }
1523843e1988Sjohnlev
1524843e1988Sjohnlev goto finished;
1525843e1988Sjohnlev }
1526843e1988Sjohnlev
152756567907SDavid Edmondson txpp = xnbp->xnb_tx_bufp;
152856567907SDavid Edmondson cop = xnbp->xnb_tx_cop;
152956567907SDavid Edmondson i = n_data_req;
1530843e1988Sjohnlev
153156567907SDavid Edmondson while (i > 0) {
153256567907SDavid Edmondson xnb_txbuf_t *txp = *txpp;
1533843e1988Sjohnlev
153456567907SDavid Edmondson txreq = RING_GET_REQUEST(&xnbp->xnb_tx_ring, txp->xt_idx);
1535843e1988Sjohnlev
153656567907SDavid Edmondson if (cop->status != 0) {
153756567907SDavid Edmondson #ifdef XNB_DEBUG
153856567907SDavid Edmondson cmn_err(CE_WARN, "xnb_from_peer: "
153956567907SDavid Edmondson "txpp 0x%p failed (%d)",
154056567907SDavid Edmondson (void *)*txpp, cop->status);
154156567907SDavid Edmondson #endif /* XNB_DEBUG */
1542fd0939efSDavid Edmondson xnb_tx_mark_complete(xnbp, txp->xt_id, NETIF_RSP_ERROR);
154356567907SDavid Edmondson freemsg(txp->xt_mblk);
154456567907SDavid Edmondson } else {
154556567907SDavid Edmondson mblk_t *mp;
1546843e1988Sjohnlev
154756567907SDavid Edmondson mp = txp->xt_mblk;
154856567907SDavid Edmondson mp->b_rptr = mp->b_wptr = (unsigned char *)txp->xt_buf;
154956567907SDavid Edmondson mp->b_wptr += txreq->size;
155056567907SDavid Edmondson mp->b_next = NULL;
1551843e1988Sjohnlev
1552843e1988Sjohnlev /*
155356567907SDavid Edmondson * If there are checksum flags, process them
155456567907SDavid Edmondson * appropriately.
1555843e1988Sjohnlev */
155656567907SDavid Edmondson if ((txreq->flags &
1557843e1988Sjohnlev (NETTXF_csum_blank | NETTXF_data_validated))
155856567907SDavid Edmondson != 0) {
1559551bc2a6Smrj mp = xnbp->xnb_flavour->xf_cksum_from_peer(xnbp,
1560843e1988Sjohnlev mp, txreq->flags);
1561024c26efSMax zhen xnbp->xnb_stat_tx_cksum_no_need++;
1562843e1988Sjohnlev
156356567907SDavid Edmondson txp->xt_mblk = mp;
156456567907SDavid Edmondson }
1565843e1988Sjohnlev
1566843e1988Sjohnlev if (head == NULL) {
1567843e1988Sjohnlev ASSERT(tail == NULL);
1568843e1988Sjohnlev head = mp;
1569843e1988Sjohnlev } else {
1570843e1988Sjohnlev ASSERT(tail != NULL);
1571843e1988Sjohnlev tail->b_next = mp;
1572843e1988Sjohnlev }
1573843e1988Sjohnlev tail = mp;
157456567907SDavid Edmondson
157556567907SDavid Edmondson xnbp->xnb_stat_opackets++;
157656567907SDavid Edmondson xnbp->xnb_stat_obytes += txreq->size;
157756567907SDavid Edmondson
1578fd0939efSDavid Edmondson xnb_tx_mark_complete(xnbp, txp->xt_id, NETIF_RSP_OKAY);
1579843e1988Sjohnlev }
1580843e1988Sjohnlev
158156567907SDavid Edmondson txpp++;
158256567907SDavid Edmondson cop++;
158356567907SDavid Edmondson i--;
158456567907SDavid Edmondson }
1585843e1988Sjohnlev
1586843e1988Sjohnlev goto around;
1587843e1988Sjohnlev /* NOTREACHED */
1588843e1988Sjohnlev }
1589843e1988Sjohnlev
1590843e1988Sjohnlev static uint_t
xnb_intr(caddr_t arg)1591843e1988Sjohnlev xnb_intr(caddr_t arg)
1592843e1988Sjohnlev {
1593843e1988Sjohnlev xnb_t *xnbp = (xnb_t *)arg;
1594843e1988Sjohnlev mblk_t *mp;
1595843e1988Sjohnlev
1596551bc2a6Smrj xnbp->xnb_stat_intr++;
1597843e1988Sjohnlev
1598024c26efSMax zhen mutex_enter(&xnbp->xnb_tx_lock);
1599843e1988Sjohnlev
1600551bc2a6Smrj ASSERT(xnbp->xnb_connected);
1601843e1988Sjohnlev
1602024c26efSMax zhen mp = xnb_from_peer(xnbp);
1603843e1988Sjohnlev
1604024c26efSMax zhen mutex_exit(&xnbp->xnb_tx_lock);
1605843e1988Sjohnlev
1606551bc2a6Smrj if (!xnbp->xnb_hotplugged) {
1607024c26efSMax zhen xnbp->xnb_stat_tx_too_early++;
1608843e1988Sjohnlev goto fail;
1609843e1988Sjohnlev }
1610843e1988Sjohnlev if (mp == NULL) {
1611551bc2a6Smrj xnbp->xnb_stat_spurious_intr++;
1612843e1988Sjohnlev goto fail;
1613843e1988Sjohnlev }
1614843e1988Sjohnlev
1615024c26efSMax zhen xnbp->xnb_flavour->xf_from_peer(xnbp, mp);
1616843e1988Sjohnlev
1617843e1988Sjohnlev return (DDI_INTR_CLAIMED);
1618843e1988Sjohnlev
1619843e1988Sjohnlev fail:
1620843e1988Sjohnlev freemsgchain(mp);
1621843e1988Sjohnlev return (DDI_INTR_CLAIMED);
1622843e1988Sjohnlev }
1623843e1988Sjohnlev
162456567907SDavid Edmondson /*
162556567907SDavid Edmondson * Read our configuration from xenstore.
162656567907SDavid Edmondson */
162756567907SDavid Edmondson boolean_t
xnb_read_xs_config(xnb_t * xnbp)162856567907SDavid Edmondson xnb_read_xs_config(xnb_t *xnbp)
162956567907SDavid Edmondson {
163056567907SDavid Edmondson char *xsname;
163156567907SDavid Edmondson char mac[ETHERADDRL * 3];
163256567907SDavid Edmondson
163356567907SDavid Edmondson xsname = xvdi_get_xsname(xnbp->xnb_devinfo);
163456567907SDavid Edmondson
163556567907SDavid Edmondson if (xenbus_scanf(XBT_NULL, xsname,
163656567907SDavid Edmondson "mac", "%s", mac) != 0) {
163756567907SDavid Edmondson cmn_err(CE_WARN, "xnb_attach: "
163856567907SDavid Edmondson "cannot read mac address from %s",
163956567907SDavid Edmondson xsname);
164056567907SDavid Edmondson return (B_FALSE);
164156567907SDavid Edmondson }
164256567907SDavid Edmondson
164356567907SDavid Edmondson if (ether_aton(mac, xnbp->xnb_mac_addr) != ETHERADDRL) {
164456567907SDavid Edmondson cmn_err(CE_WARN,
164556567907SDavid Edmondson "xnb_attach: cannot parse mac address %s",
164656567907SDavid Edmondson mac);
164756567907SDavid Edmondson return (B_FALSE);
164856567907SDavid Edmondson }
164956567907SDavid Edmondson
165056567907SDavid Edmondson return (B_TRUE);
165156567907SDavid Edmondson }
165256567907SDavid Edmondson
165356567907SDavid Edmondson /*
165456567907SDavid Edmondson * Read the configuration of the peer from xenstore.
165556567907SDavid Edmondson */
165656567907SDavid Edmondson boolean_t
xnb_read_oe_config(xnb_t * xnbp)165756567907SDavid Edmondson xnb_read_oe_config(xnb_t *xnbp)
1658843e1988Sjohnlev {
1659843e1988Sjohnlev char *oename;
1660843e1988Sjohnlev int i;
1661843e1988Sjohnlev
166256567907SDavid Edmondson oename = xvdi_get_oename(xnbp->xnb_devinfo);
1663843e1988Sjohnlev
1664843e1988Sjohnlev if (xenbus_gather(XBT_NULL, oename,
166556567907SDavid Edmondson "event-channel", "%u", &xnbp->xnb_fe_evtchn,
1666551bc2a6Smrj "tx-ring-ref", "%lu", &xnbp->xnb_tx_ring_ref,
1667551bc2a6Smrj "rx-ring-ref", "%lu", &xnbp->xnb_rx_ring_ref,
1668843e1988Sjohnlev NULL) != 0) {
166956567907SDavid Edmondson cmn_err(CE_WARN, "xnb_read_oe_config: "
1670843e1988Sjohnlev "cannot read other-end details from %s",
1671843e1988Sjohnlev oename);
167256567907SDavid Edmondson return (B_FALSE);
1673843e1988Sjohnlev }
1674843e1988Sjohnlev
167556567907SDavid Edmondson /*
167656567907SDavid Edmondson * Check whether our peer requests receive side hypervisor
167756567907SDavid Edmondson * copy.
167856567907SDavid Edmondson */
1679843e1988Sjohnlev if (xenbus_scanf(XBT_NULL, oename,
168056567907SDavid Edmondson "request-rx-copy", "%d", &i) != 0)
1681843e1988Sjohnlev i = 0;
1682843e1988Sjohnlev if (i != 0)
168356567907SDavid Edmondson xnbp->xnb_rx_hv_copy = B_TRUE;
1684843e1988Sjohnlev
168556567907SDavid Edmondson /*
168656567907SDavid Edmondson * Check whether our peer requests multicast_control.
168756567907SDavid Edmondson */
1688843e1988Sjohnlev if (xenbus_scanf(XBT_NULL, oename,
168956567907SDavid Edmondson "request-multicast-control", "%d", &i) != 0)
1690551bc2a6Smrj i = 0;
1691551bc2a6Smrj if (i != 0)
169256567907SDavid Edmondson xnbp->xnb_multicast_control = B_TRUE;
169356567907SDavid Edmondson
169456567907SDavid Edmondson /*
169556567907SDavid Edmondson * The Linux backend driver here checks to see if the peer has
169656567907SDavid Edmondson * set 'feature-no-csum-offload'. This is used to indicate
169756567907SDavid Edmondson * that the guest cannot handle receiving packets without a
169856567907SDavid Edmondson * valid checksum. We don't check here, because packets passed
169956567907SDavid Edmondson * to the peer _always_ have a valid checksum.
170056567907SDavid Edmondson *
170156567907SDavid Edmondson * There are three cases:
170256567907SDavid Edmondson *
170356567907SDavid Edmondson * - the NIC is dedicated: packets from the wire should always
170456567907SDavid Edmondson * have a valid checksum. If the hardware validates the
170556567907SDavid Edmondson * checksum then the relevant bit will be set in the packet
170656567907SDavid Edmondson * attributes and we will inform the peer. It can choose to
170756567907SDavid Edmondson * ignore the hardware verification.
170856567907SDavid Edmondson *
170956567907SDavid Edmondson * - the NIC is shared (VNIC) and a packet originates from the
171056567907SDavid Edmondson * wire: this is the same as the case above - the packets
171156567907SDavid Edmondson * will have a valid checksum.
171256567907SDavid Edmondson *
171356567907SDavid Edmondson * - the NIC is shared (VNIC) and a packet originates from the
171456567907SDavid Edmondson * host: the MAC layer ensures that all such packets have a
171556567907SDavid Edmondson * valid checksum by calculating one if the stack did not.
171656567907SDavid Edmondson */
171756567907SDavid Edmondson
171856567907SDavid Edmondson return (B_TRUE);
171956567907SDavid Edmondson }
172056567907SDavid Edmondson
172156567907SDavid Edmondson void
xnb_start_connect(xnb_t * xnbp)172256567907SDavid Edmondson xnb_start_connect(xnb_t *xnbp)
172356567907SDavid Edmondson {
172456567907SDavid Edmondson dev_info_t *dip = xnbp->xnb_devinfo;
172556567907SDavid Edmondson
172656567907SDavid Edmondson if (!xnb_connect_rings(dip)) {
172756567907SDavid Edmondson cmn_err(CE_WARN, "xnb_start_connect: "
172856567907SDavid Edmondson "cannot connect rings");
172956567907SDavid Edmondson goto failed;
173056567907SDavid Edmondson }
173156567907SDavid Edmondson
173256567907SDavid Edmondson if (!xnbp->xnb_flavour->xf_start_connect(xnbp)) {
173356567907SDavid Edmondson cmn_err(CE_WARN, "xnb_start_connect: "
173456567907SDavid Edmondson "flavour failed to connect");
173556567907SDavid Edmondson goto failed;
173656567907SDavid Edmondson }
173756567907SDavid Edmondson
173856567907SDavid Edmondson (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateConnected);
173956567907SDavid Edmondson return;
174056567907SDavid Edmondson
174156567907SDavid Edmondson failed:
174256567907SDavid Edmondson xnbp->xnb_flavour->xf_peer_disconnected(xnbp);
174356567907SDavid Edmondson xnb_disconnect_rings(dip);
174456567907SDavid Edmondson (void) xvdi_switch_state(dip, XBT_NULL,
174556567907SDavid Edmondson XenbusStateClosed);
174656567907SDavid Edmondson (void) xvdi_post_event(dip, XEN_HP_REMOVE);
174756567907SDavid Edmondson }
174856567907SDavid Edmondson
174956567907SDavid Edmondson static boolean_t
xnb_connect_rings(dev_info_t * dip)175056567907SDavid Edmondson xnb_connect_rings(dev_info_t *dip)
175156567907SDavid Edmondson {
175256567907SDavid Edmondson xnb_t *xnbp = ddi_get_driver_private(dip);
175356567907SDavid Edmondson struct gnttab_map_grant_ref map_op;
175456567907SDavid Edmondson
175556567907SDavid Edmondson /*
175656567907SDavid Edmondson * Cannot attempt to connect the rings if already connected.
175756567907SDavid Edmondson */
175856567907SDavid Edmondson ASSERT(!xnbp->xnb_connected);
1759843e1988Sjohnlev
1760843e1988Sjohnlev /*
1761843e1988Sjohnlev * 1. allocate a vaddr for the tx page, one for the rx page.
1762843e1988Sjohnlev * 2. call GNTTABOP_map_grant_ref to map the relevant pages
1763843e1988Sjohnlev * into the allocated vaddr (one for tx, one for rx).
1764843e1988Sjohnlev * 3. call EVTCHNOP_bind_interdomain to have the event channel
1765843e1988Sjohnlev * bound to this domain.
1766843e1988Sjohnlev * 4. associate the event channel with an interrupt.
176756567907SDavid Edmondson * 5. enable the interrupt.
1768843e1988Sjohnlev */
1769843e1988Sjohnlev
1770843e1988Sjohnlev /* 1.tx */
1771551bc2a6Smrj xnbp->xnb_tx_ring_addr = vmem_xalloc(heap_arena, PAGESIZE, PAGESIZE,
1772843e1988Sjohnlev 0, 0, 0, 0, VM_SLEEP);
1773551bc2a6Smrj ASSERT(xnbp->xnb_tx_ring_addr != NULL);
1774843e1988Sjohnlev
1775843e1988Sjohnlev /* 2.tx */
1776551bc2a6Smrj map_op.host_addr = (uint64_t)((long)xnbp->xnb_tx_ring_addr);
1777843e1988Sjohnlev map_op.flags = GNTMAP_host_map;
1778551bc2a6Smrj map_op.ref = xnbp->xnb_tx_ring_ref;
1779551bc2a6Smrj map_op.dom = xnbp->xnb_peer;
17807eea693dSMark Johnson hat_prepare_mapping(kas.a_hat, xnbp->xnb_tx_ring_addr, NULL);
17817eea693dSMark Johnson if (xen_map_gref(GNTTABOP_map_grant_ref, &map_op, 1, B_FALSE) != 0 ||
17827eea693dSMark Johnson map_op.status != 0) {
1783843e1988Sjohnlev cmn_err(CE_WARN, "xnb_connect_rings: cannot map tx-ring page.");
1784843e1988Sjohnlev goto fail;
1785843e1988Sjohnlev }
1786551bc2a6Smrj xnbp->xnb_tx_ring_handle = map_op.handle;
1787843e1988Sjohnlev
1788551bc2a6Smrj /* LINTED: constant in conditional context */
1789551bc2a6Smrj BACK_RING_INIT(&xnbp->xnb_tx_ring,
1790551bc2a6Smrj (netif_tx_sring_t *)xnbp->xnb_tx_ring_addr, PAGESIZE);
1791843e1988Sjohnlev
1792843e1988Sjohnlev /* 1.rx */
1793551bc2a6Smrj xnbp->xnb_rx_ring_addr = vmem_xalloc(heap_arena, PAGESIZE, PAGESIZE,
1794843e1988Sjohnlev 0, 0, 0, 0, VM_SLEEP);
1795551bc2a6Smrj ASSERT(xnbp->xnb_rx_ring_addr != NULL);
1796843e1988Sjohnlev
1797843e1988Sjohnlev /* 2.rx */
1798551bc2a6Smrj map_op.host_addr = (uint64_t)((long)xnbp->xnb_rx_ring_addr);
1799843e1988Sjohnlev map_op.flags = GNTMAP_host_map;
1800551bc2a6Smrj map_op.ref = xnbp->xnb_rx_ring_ref;
1801551bc2a6Smrj map_op.dom = xnbp->xnb_peer;
18027eea693dSMark Johnson hat_prepare_mapping(kas.a_hat, xnbp->xnb_rx_ring_addr, NULL);
18037eea693dSMark Johnson if (xen_map_gref(GNTTABOP_map_grant_ref, &map_op, 1, B_FALSE) != 0 ||
18047eea693dSMark Johnson map_op.status != 0) {
1805843e1988Sjohnlev cmn_err(CE_WARN, "xnb_connect_rings: cannot map rx-ring page.");
1806843e1988Sjohnlev goto fail;
1807843e1988Sjohnlev }
1808551bc2a6Smrj xnbp->xnb_rx_ring_handle = map_op.handle;
1809843e1988Sjohnlev
1810551bc2a6Smrj /* LINTED: constant in conditional context */
1811551bc2a6Smrj BACK_RING_INIT(&xnbp->xnb_rx_ring,
1812551bc2a6Smrj (netif_rx_sring_t *)xnbp->xnb_rx_ring_addr, PAGESIZE);
1813843e1988Sjohnlev
1814843e1988Sjohnlev /* 3 */
181556567907SDavid Edmondson if (xvdi_bind_evtchn(dip, xnbp->xnb_fe_evtchn) != DDI_SUCCESS) {
1816843e1988Sjohnlev cmn_err(CE_WARN, "xnb_connect_rings: "
1817551bc2a6Smrj "cannot bind event channel %d", xnbp->xnb_evtchn);
1818551bc2a6Smrj xnbp->xnb_evtchn = INVALID_EVTCHN;
1819843e1988Sjohnlev goto fail;
1820843e1988Sjohnlev }
1821551bc2a6Smrj xnbp->xnb_evtchn = xvdi_get_evtchn(dip);
1822843e1988Sjohnlev
1823843e1988Sjohnlev /*
1824843e1988Sjohnlev * It would be good to set the state to XenbusStateConnected
1825843e1988Sjohnlev * here as well, but then what if ddi_add_intr() failed?
1826843e1988Sjohnlev * Changing the state in the store will be noticed by the peer
1827843e1988Sjohnlev * and cannot be "taken back".
1828843e1988Sjohnlev */
1829551bc2a6Smrj mutex_enter(&xnbp->xnb_tx_lock);
1830551bc2a6Smrj mutex_enter(&xnbp->xnb_rx_lock);
1831843e1988Sjohnlev
1832551bc2a6Smrj xnbp->xnb_connected = B_TRUE;
1833843e1988Sjohnlev
1834551bc2a6Smrj mutex_exit(&xnbp->xnb_rx_lock);
1835551bc2a6Smrj mutex_exit(&xnbp->xnb_tx_lock);
1836843e1988Sjohnlev
183756567907SDavid Edmondson /* 4, 5 */
1838843e1988Sjohnlev if (ddi_add_intr(dip, 0, NULL, NULL, xnb_intr, (caddr_t)xnbp)
1839843e1988Sjohnlev != DDI_SUCCESS) {
1840843e1988Sjohnlev cmn_err(CE_WARN, "xnb_connect_rings: cannot add interrupt");
1841843e1988Sjohnlev goto fail;
1842843e1988Sjohnlev }
1843551bc2a6Smrj xnbp->xnb_irq = B_TRUE;
1844843e1988Sjohnlev
1845843e1988Sjohnlev return (B_TRUE);
1846843e1988Sjohnlev
1847843e1988Sjohnlev fail:
1848551bc2a6Smrj mutex_enter(&xnbp->xnb_tx_lock);
1849551bc2a6Smrj mutex_enter(&xnbp->xnb_rx_lock);
1850843e1988Sjohnlev
1851551bc2a6Smrj xnbp->xnb_connected = B_FALSE;
185256567907SDavid Edmondson
1853551bc2a6Smrj mutex_exit(&xnbp->xnb_rx_lock);
1854551bc2a6Smrj mutex_exit(&xnbp->xnb_tx_lock);
1855843e1988Sjohnlev
1856843e1988Sjohnlev return (B_FALSE);
1857843e1988Sjohnlev }
1858843e1988Sjohnlev
1859843e1988Sjohnlev static void
xnb_disconnect_rings(dev_info_t * dip)1860843e1988Sjohnlev xnb_disconnect_rings(dev_info_t *dip)
1861843e1988Sjohnlev {
1862843e1988Sjohnlev xnb_t *xnbp = ddi_get_driver_private(dip);
1863843e1988Sjohnlev
1864551bc2a6Smrj if (xnbp->xnb_irq) {
1865843e1988Sjohnlev ddi_remove_intr(dip, 0, NULL);
1866551bc2a6Smrj xnbp->xnb_irq = B_FALSE;
1867843e1988Sjohnlev }
1868843e1988Sjohnlev
1869551bc2a6Smrj if (xnbp->xnb_evtchn != INVALID_EVTCHN) {
1870843e1988Sjohnlev xvdi_free_evtchn(dip);
1871551bc2a6Smrj xnbp->xnb_evtchn = INVALID_EVTCHN;
1872843e1988Sjohnlev }
1873843e1988Sjohnlev
1874551bc2a6Smrj if (xnbp->xnb_rx_ring_handle != INVALID_GRANT_HANDLE) {
1875843e1988Sjohnlev struct gnttab_unmap_grant_ref unmap_op;
1876843e1988Sjohnlev
1877551bc2a6Smrj unmap_op.host_addr = (uint64_t)(uintptr_t)
1878551bc2a6Smrj xnbp->xnb_rx_ring_addr;
1879843e1988Sjohnlev unmap_op.dev_bus_addr = 0;
1880551bc2a6Smrj unmap_op.handle = xnbp->xnb_rx_ring_handle;
1881843e1988Sjohnlev if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref,
1882843e1988Sjohnlev &unmap_op, 1) != 0)
1883843e1988Sjohnlev cmn_err(CE_WARN, "xnb_disconnect_rings: "
1884843e1988Sjohnlev "cannot unmap rx-ring page (%d)",
1885843e1988Sjohnlev unmap_op.status);
1886843e1988Sjohnlev
1887551bc2a6Smrj xnbp->xnb_rx_ring_handle = INVALID_GRANT_HANDLE;
1888843e1988Sjohnlev }
1889843e1988Sjohnlev
1890551bc2a6Smrj if (xnbp->xnb_rx_ring_addr != NULL) {
1891551bc2a6Smrj hat_release_mapping(kas.a_hat, xnbp->xnb_rx_ring_addr);
1892551bc2a6Smrj vmem_free(heap_arena, xnbp->xnb_rx_ring_addr, PAGESIZE);
1893551bc2a6Smrj xnbp->xnb_rx_ring_addr = NULL;
1894843e1988Sjohnlev }
1895843e1988Sjohnlev
1896551bc2a6Smrj if (xnbp->xnb_tx_ring_handle != INVALID_GRANT_HANDLE) {
1897843e1988Sjohnlev struct gnttab_unmap_grant_ref unmap_op;
1898843e1988Sjohnlev
1899551bc2a6Smrj unmap_op.host_addr = (uint64_t)(uintptr_t)
1900551bc2a6Smrj xnbp->xnb_tx_ring_addr;
1901843e1988Sjohnlev unmap_op.dev_bus_addr = 0;
1902551bc2a6Smrj unmap_op.handle = xnbp->xnb_tx_ring_handle;
1903843e1988Sjohnlev if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref,
1904843e1988Sjohnlev &unmap_op, 1) != 0)
1905843e1988Sjohnlev cmn_err(CE_WARN, "xnb_disconnect_rings: "
1906843e1988Sjohnlev "cannot unmap tx-ring page (%d)",
1907843e1988Sjohnlev unmap_op.status);
1908843e1988Sjohnlev
1909551bc2a6Smrj xnbp->xnb_tx_ring_handle = INVALID_GRANT_HANDLE;
1910843e1988Sjohnlev }
1911843e1988Sjohnlev
1912551bc2a6Smrj if (xnbp->xnb_tx_ring_addr != NULL) {
1913551bc2a6Smrj hat_release_mapping(kas.a_hat, xnbp->xnb_tx_ring_addr);
1914551bc2a6Smrj vmem_free(heap_arena, xnbp->xnb_tx_ring_addr, PAGESIZE);
1915551bc2a6Smrj xnbp->xnb_tx_ring_addr = NULL;
1916843e1988Sjohnlev }
1917843e1988Sjohnlev }
1918843e1988Sjohnlev
1919843e1988Sjohnlev static void
xnb_oe_state_change(dev_info_t * dip,ddi_eventcookie_t id,void * arg,void * impl_data)1920843e1988Sjohnlev xnb_oe_state_change(dev_info_t *dip, ddi_eventcookie_t id,
1921843e1988Sjohnlev void *arg, void *impl_data)
1922843e1988Sjohnlev {
192356567907SDavid Edmondson _NOTE(ARGUNUSED(id, arg));
1924843e1988Sjohnlev xnb_t *xnbp = ddi_get_driver_private(dip);
1925843e1988Sjohnlev XenbusState new_state = *(XenbusState *)impl_data;
1926843e1988Sjohnlev
1927843e1988Sjohnlev ASSERT(xnbp != NULL);
1928843e1988Sjohnlev
1929843e1988Sjohnlev switch (new_state) {
1930843e1988Sjohnlev case XenbusStateConnected:
193108cfff84Scz /* spurious state change */
193208cfff84Scz if (xnbp->xnb_connected)
193308cfff84Scz return;
193408cfff84Scz
193556567907SDavid Edmondson if (!xnb_read_oe_config(xnbp) ||
193656567907SDavid Edmondson !xnbp->xnb_flavour->xf_peer_connected(xnbp)) {
193756567907SDavid Edmondson cmn_err(CE_WARN, "xnb_oe_state_change: "
193856567907SDavid Edmondson "read otherend config error");
1939843e1988Sjohnlev (void) xvdi_switch_state(dip, XBT_NULL,
1940843e1988Sjohnlev XenbusStateClosed);
1941843e1988Sjohnlev (void) xvdi_post_event(dip, XEN_HP_REMOVE);
194256567907SDavid Edmondson
194356567907SDavid Edmondson break;
1944843e1988Sjohnlev }
1945843e1988Sjohnlev
194656567907SDavid Edmondson
194756567907SDavid Edmondson mutex_enter(&xnbp->xnb_state_lock);
194856567907SDavid Edmondson xnbp->xnb_fe_status = XNB_STATE_READY;
194956567907SDavid Edmondson if (xnbp->xnb_be_status == XNB_STATE_READY)
195056567907SDavid Edmondson xnb_start_connect(xnbp);
195156567907SDavid Edmondson mutex_exit(&xnbp->xnb_state_lock);
195256567907SDavid Edmondson
1953843e1988Sjohnlev /*
1954843e1988Sjohnlev * Now that we've attempted to connect it's reasonable
1955843e1988Sjohnlev * to allow an attempt to detach.
1956843e1988Sjohnlev */
1957551bc2a6Smrj xnbp->xnb_detachable = B_TRUE;
1958843e1988Sjohnlev
1959843e1988Sjohnlev break;
1960843e1988Sjohnlev
1961843e1988Sjohnlev case XenbusStateClosing:
1962843e1988Sjohnlev (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateClosing);
1963843e1988Sjohnlev
1964843e1988Sjohnlev break;
1965843e1988Sjohnlev
1966843e1988Sjohnlev case XenbusStateClosed:
1967551bc2a6Smrj xnbp->xnb_flavour->xf_peer_disconnected(xnbp);
1968843e1988Sjohnlev
1969551bc2a6Smrj mutex_enter(&xnbp->xnb_tx_lock);
1970551bc2a6Smrj mutex_enter(&xnbp->xnb_rx_lock);
1971843e1988Sjohnlev
1972843e1988Sjohnlev xnb_disconnect_rings(dip);
1973551bc2a6Smrj xnbp->xnb_connected = B_FALSE;
1974843e1988Sjohnlev
1975551bc2a6Smrj mutex_exit(&xnbp->xnb_rx_lock);
1976551bc2a6Smrj mutex_exit(&xnbp->xnb_tx_lock);
1977843e1988Sjohnlev
1978843e1988Sjohnlev (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateClosed);
1979843e1988Sjohnlev (void) xvdi_post_event(dip, XEN_HP_REMOVE);
1980843e1988Sjohnlev /*
1981843e1988Sjohnlev * In all likelyhood this is already set (in the above
1982843e1988Sjohnlev * case), but if the peer never attempted to connect
1983843e1988Sjohnlev * and the domain is destroyed we get here without
1984843e1988Sjohnlev * having been through the case above, so we set it to
1985843e1988Sjohnlev * be sure.
1986843e1988Sjohnlev */
1987551bc2a6Smrj xnbp->xnb_detachable = B_TRUE;
1988843e1988Sjohnlev
1989843e1988Sjohnlev break;
1990843e1988Sjohnlev
1991843e1988Sjohnlev default:
1992843e1988Sjohnlev break;
1993843e1988Sjohnlev }
1994843e1988Sjohnlev }
1995843e1988Sjohnlev
1996843e1988Sjohnlev static void
xnb_hp_state_change(dev_info_t * dip,ddi_eventcookie_t id,void * arg,void * impl_data)1997843e1988Sjohnlev xnb_hp_state_change(dev_info_t *dip, ddi_eventcookie_t id,
1998843e1988Sjohnlev void *arg, void *impl_data)
1999843e1988Sjohnlev {
200056567907SDavid Edmondson _NOTE(ARGUNUSED(id, arg));
2001843e1988Sjohnlev xnb_t *xnbp = ddi_get_driver_private(dip);
2002843e1988Sjohnlev xendev_hotplug_state_t state = *(xendev_hotplug_state_t *)impl_data;
2003843e1988Sjohnlev
2004843e1988Sjohnlev ASSERT(xnbp != NULL);
2005843e1988Sjohnlev
2006843e1988Sjohnlev switch (state) {
2007843e1988Sjohnlev case Connected:
200808cfff84Scz /* spurious hotplug event */
200908cfff84Scz if (xnbp->xnb_hotplugged)
201056567907SDavid Edmondson break;
201108cfff84Scz
201256567907SDavid Edmondson if (!xnb_read_xs_config(xnbp))
201356567907SDavid Edmondson break;
201456567907SDavid Edmondson
201556567907SDavid Edmondson if (!xnbp->xnb_flavour->xf_hotplug_connected(xnbp))
201656567907SDavid Edmondson break;
2017843e1988Sjohnlev
2018551bc2a6Smrj mutex_enter(&xnbp->xnb_tx_lock);
2019551bc2a6Smrj mutex_enter(&xnbp->xnb_rx_lock);
2020843e1988Sjohnlev
202156567907SDavid Edmondson xnbp->xnb_hotplugged = B_TRUE;
2022843e1988Sjohnlev
2023551bc2a6Smrj mutex_exit(&xnbp->xnb_rx_lock);
2024551bc2a6Smrj mutex_exit(&xnbp->xnb_tx_lock);
202556567907SDavid Edmondson
202656567907SDavid Edmondson mutex_enter(&xnbp->xnb_state_lock);
202756567907SDavid Edmondson xnbp->xnb_be_status = XNB_STATE_READY;
202856567907SDavid Edmondson if (xnbp->xnb_fe_status == XNB_STATE_READY)
202956567907SDavid Edmondson xnb_start_connect(xnbp);
203056567907SDavid Edmondson mutex_exit(&xnbp->xnb_state_lock);
203156567907SDavid Edmondson
2032843e1988Sjohnlev break;
2033843e1988Sjohnlev
2034843e1988Sjohnlev default:
2035843e1988Sjohnlev break;
2036843e1988Sjohnlev }
2037843e1988Sjohnlev }
2038843e1988Sjohnlev
2039843e1988Sjohnlev static struct modldrv modldrv = {
2040a859da42SDavid Edmondson &mod_miscops, "xnb",
2041843e1988Sjohnlev };
2042843e1988Sjohnlev
2043843e1988Sjohnlev static struct modlinkage modlinkage = {
2044843e1988Sjohnlev MODREV_1, &modldrv, NULL
2045843e1988Sjohnlev };
2046843e1988Sjohnlev
2047843e1988Sjohnlev int
_init(void)2048843e1988Sjohnlev _init(void)
2049843e1988Sjohnlev {
2050843e1988Sjohnlev int i;
2051843e1988Sjohnlev
2052843e1988Sjohnlev mutex_init(&xnb_alloc_page_lock, NULL, MUTEX_DRIVER, NULL);
2053843e1988Sjohnlev
2054843e1988Sjohnlev i = mod_install(&modlinkage);
205556567907SDavid Edmondson if (i != DDI_SUCCESS)
2056843e1988Sjohnlev mutex_destroy(&xnb_alloc_page_lock);
205756567907SDavid Edmondson
2058843e1988Sjohnlev return (i);
2059843e1988Sjohnlev }
2060843e1988Sjohnlev
2061843e1988Sjohnlev int
_info(struct modinfo * modinfop)2062843e1988Sjohnlev _info(struct modinfo *modinfop)
2063843e1988Sjohnlev {
2064843e1988Sjohnlev return (mod_info(&modlinkage, modinfop));
2065843e1988Sjohnlev }
2066843e1988Sjohnlev
2067843e1988Sjohnlev int
_fini(void)2068843e1988Sjohnlev _fini(void)
2069843e1988Sjohnlev {
2070843e1988Sjohnlev int i;
2071843e1988Sjohnlev
2072843e1988Sjohnlev i = mod_remove(&modlinkage);
207356567907SDavid Edmondson if (i == DDI_SUCCESS)
2074843e1988Sjohnlev mutex_destroy(&xnb_alloc_page_lock);
207556567907SDavid Edmondson
2076843e1988Sjohnlev return (i);
2077843e1988Sjohnlev }
2078