1843e1988Sjohnlev /*
2843e1988Sjohnlev  * CDDL HEADER START
3843e1988Sjohnlev  *
4843e1988Sjohnlev  * The contents of this file are subject to the terms of the
5843e1988Sjohnlev  * Common Development and Distribution License (the "License").
6843e1988Sjohnlev  * You may not use this file except in compliance with the License.
7843e1988Sjohnlev  *
8843e1988Sjohnlev  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9843e1988Sjohnlev  * or http://www.opensolaris.org/os/licensing.
10843e1988Sjohnlev  * See the License for the specific language governing permissions
11843e1988Sjohnlev  * and limitations under the License.
12843e1988Sjohnlev  *
13843e1988Sjohnlev  * When distributing Covered Code, include this CDDL HEADER in each
14843e1988Sjohnlev  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15843e1988Sjohnlev  * If applicable, add the following below this CDDL HEADER, with the
16843e1988Sjohnlev  * fields enclosed by brackets "[]" replaced with your own identifying
17843e1988Sjohnlev  * information: Portions Copyright [yyyy] [name of copyright owner]
18843e1988Sjohnlev  *
19843e1988Sjohnlev  * CDDL HEADER END
20843e1988Sjohnlev  */
22843e1988Sjohnlev /*
23fd0939efSDavid Edmondson  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
24843e1988Sjohnlev  * Use is subject to license terms.
25843e1988Sjohnlev  */
27843e1988Sjohnlev /*
28843e1988Sjohnlev  * Xen network backend - mac client edition.
29843e1988Sjohnlev  *
30843e1988Sjohnlev  * A driver that sits above an existing GLDv3/Nemo MAC driver and
31843e1988Sjohnlev  * relays packets to/from that driver from/to a guest domain.
32843e1988Sjohnlev  */
34fd0939efSDavid Edmondson #ifdef DEBUG
35fd0939efSDavid Edmondson #define	XNBO_DEBUG 1
36fd0939efSDavid Edmondson #endif /* DEBUG */
37fd0939efSDavid Edmondson 
38843e1988Sjohnlev #include "xnb.h"
40843e1988Sjohnlev #include <sys/sunddi.h>
41da14cebeSEric Cheng #include <sys/ddi.h>
42843e1988Sjohnlev #include <sys/modctl.h>
43843e1988Sjohnlev #include <sys/strsubr.h>
44da14cebeSEric Cheng #include <sys/mac_client.h>
45da14cebeSEric Cheng #include <sys/mac_provider.h>
46da14cebeSEric Cheng #include <sys/mac_client_priv.h>
47843e1988Sjohnlev #include <sys/mac.h>
48843e1988Sjohnlev #include <net/if.h>
49843e1988Sjohnlev #include <sys/dlpi.h>
50843e1988Sjohnlev #include <sys/pattr.h>
51843e1988Sjohnlev #include <xen/sys/xenbus_impl.h>
52843e1988Sjohnlev #include <xen/sys/xendev.h>
5356567907SDavid Edmondson #include <sys/sdt.h>
5456567907SDavid Edmondson #include <sys/note.h>
56fd0939efSDavid Edmondson #ifdef XNBO_DEBUG
57fd0939efSDavid Edmondson boolean_t xnbo_cksum_offload_to_peer = B_TRUE;
58fd0939efSDavid Edmondson boolean_t xnbo_cksum_offload_from_peer = B_TRUE;
59fd0939efSDavid Edmondson #endif /* XNBO_DEBUG */
60fd0939efSDavid Edmondson 
6156567907SDavid Edmondson /* Track multicast addresses. */
6256567907SDavid Edmondson typedef struct xmca {
6356567907SDavid Edmondson 	struct xmca *next;
6456567907SDavid Edmondson 	ether_addr_t addr;
6556567907SDavid Edmondson } xmca_t;
6656567907SDavid Edmondson 
6756567907SDavid Edmondson /* State about this device instance. */
68843e1988Sjohnlev typedef struct xnbo {
69843e1988Sjohnlev 	mac_handle_t		o_mh;
70da14cebeSEric Cheng 	mac_client_handle_t	o_mch;
71da14cebeSEric Cheng 	mac_unicast_handle_t	o_mah;
72da14cebeSEric Cheng 	mac_promisc_handle_t	o_mphp;
73843e1988Sjohnlev 	boolean_t		o_running;
74843e1988Sjohnlev 	boolean_t		o_promiscuous;
75843e1988Sjohnlev 	uint32_t		o_hcksum_capab;
7656567907SDavid Edmondson 	xmca_t			*o_mca;
7756567907SDavid Edmondson 	char			o_link_name[LIFNAMSIZ];
7856567907SDavid Edmondson 	boolean_t		o_need_rx_filter;
7956567907SDavid Edmondson 	boolean_t		o_need_setphysaddr;
8056567907SDavid Edmondson 	boolean_t		o_multicast_control;
81843e1988Sjohnlev } xnbo_t;
8356567907SDavid Edmondson static void xnbo_close_mac(xnb_t *);
840324f02aSDavid Edmondson static void i_xnbo_close_mac(xnb_t *, boolean_t);
86843e1988Sjohnlev /*
87843e1988Sjohnlev  * Packets from the peer come here.  We pass them to the mac device.
88843e1988Sjohnlev  */
89843e1988Sjohnlev static void
xnbo_to_mac(xnb_t * xnbp,mblk_t * mp)90843e1988Sjohnlev xnbo_to_mac(xnb_t *xnbp, mblk_t *mp)
91843e1988Sjohnlev {
92551bc2a6Smrj 	xnbo_t *xnbop = xnbp->xnb_flavour_data;
94843e1988Sjohnlev 	ASSERT(mp != NULL);
96843e1988Sjohnlev 	if (!xnbop->o_running) {
97024c26efSMax zhen 		xnbp->xnb_stat_tx_too_early++;
98843e1988Sjohnlev 		goto fail;
99843e1988Sjohnlev 	}
101da14cebeSEric Cheng 	if (mac_tx(xnbop->o_mch, mp, 0,
102*aabaa37aSToomas Soome 	    MAC_DROP_ON_NO_DESC, NULL) != (mac_tx_cookie_t)NULL) {
103551bc2a6Smrj 		xnbp->xnb_stat_mac_full++;
104843e1988Sjohnlev 	}
106843e1988Sjohnlev 	return;
108843e1988Sjohnlev fail:
109843e1988Sjohnlev 	freemsgchain(mp);
110843e1988Sjohnlev }
11256567907SDavid Edmondson /*
11356567907SDavid Edmondson  * Process the checksum flags `flags' provided by the peer for the
11456567907SDavid Edmondson  * packet `mp'.
11556567907SDavid Edmondson  */
116843e1988Sjohnlev static mblk_t *
xnbo_cksum_from_peer(xnb_t * xnbp,mblk_t * mp,uint16_t flags)117843e1988Sjohnlev xnbo_cksum_from_peer(xnb_t *xnbp, mblk_t *mp, uint16_t flags)
118843e1988Sjohnlev {
119551bc2a6Smrj 	xnbo_t *xnbop = xnbp->xnb_flavour_data;
121843e1988Sjohnlev 	ASSERT(mp->b_next == NULL);
123843e1988Sjohnlev 	if ((flags & NETTXF_csum_blank) != 0) {
124fd0939efSDavid Edmondson 		uint32_t capab = xnbop->o_hcksum_capab;
125fd0939efSDavid Edmondson 
126fd0939efSDavid Edmondson #ifdef XNBO_DEBUG
127fd0939efSDavid Edmondson 		if (!xnbo_cksum_offload_from_peer)
128fd0939efSDavid Edmondson 			capab = 0;
129fd0939efSDavid Edmondson #endif /* XNBO_DEBUG */
130fd0939efSDavid Edmondson 
131843e1988Sjohnlev 		/*
132843e1988Sjohnlev 		 * The checksum in the packet is blank.  Determine
133843e1988Sjohnlev 		 * whether we can do hardware offload and, if so,
134843e1988Sjohnlev 		 * update the flags on the mblk according.  If not,
135843e1988Sjohnlev 		 * calculate and insert the checksum using software.
136843e1988Sjohnlev 		 */
137fd0939efSDavid Edmondson 		mp = xnb_process_cksum_flags(xnbp, mp, capab);
138843e1988Sjohnlev 	}
140843e1988Sjohnlev 	return (mp);
141843e1988Sjohnlev }
14356567907SDavid Edmondson /*
14456567907SDavid Edmondson  * Calculate the checksum flags to be relayed to the peer for the
14556567907SDavid Edmondson  * packet `mp'.
14656567907SDavid Edmondson  */
147843e1988Sjohnlev static uint16_t
xnbo_cksum_to_peer(xnb_t * xnbp,mblk_t * mp)148843e1988Sjohnlev xnbo_cksum_to_peer(xnb_t *xnbp, mblk_t *mp)
149843e1988Sjohnlev {
15056567907SDavid Edmondson 	_NOTE(ARGUNUSED(xnbp));
151843e1988Sjohnlev 	uint16_t r = 0;
15256567907SDavid Edmondson 	uint32_t pflags, csum;
154fd0939efSDavid Edmondson #ifdef XNBO_DEBUG
155fd0939efSDavid Edmondson 	if (!xnbo_cksum_offload_to_peer)
156fd0939efSDavid Edmondson 		return (0);
157fd0939efSDavid Edmondson #endif /* XNBO_DEBUG */
158fd0939efSDavid Edmondson 
159843e1988Sjohnlev 	/*
160843e1988Sjohnlev 	 * We might also check for HCK_PARTIALCKSUM here and,
161843e1988Sjohnlev 	 * providing that the partial checksum covers the TCP/UDP
162843e1988Sjohnlev 	 * payload, return NETRXF_data_validated.
163843e1988Sjohnlev 	 *
164843e1988Sjohnlev 	 * It seems that it's probably not worthwhile, as even MAC
165843e1988Sjohnlev 	 * devices which advertise HCKSUM_INET_PARTIAL in their
166843e1988Sjohnlev 	 * capabilities tend to use HCK_FULLCKSUM on the receive side
167843e1988Sjohnlev 	 * - they are actually saying that in the output path the
168843e1988Sjohnlev 	 * caller must use HCK_PARTIALCKSUM.
16956567907SDavid Edmondson 	 *
17056567907SDavid Edmondson 	 * Then again, if a NIC supports HCK_PARTIALCKSUM in its'
17156567907SDavid Edmondson 	 * output path, the host IP stack will use it. If such packets
17256567907SDavid Edmondson 	 * are destined for the peer (i.e. looped around) we would
17356567907SDavid Edmondson 	 * gain some advantage.
174843e1988Sjohnlev 	 */
1760dc2366fSVenugopal Iyer 	mac_hcksum_get(mp, NULL, NULL, NULL, &csum, &pflags);
17856567907SDavid Edmondson 	/*
17956567907SDavid Edmondson 	 * If the MAC driver has asserted that the checksum is
18056567907SDavid Edmondson 	 * good, let the peer know.
18156567907SDavid Edmondson 	 */
18256567907SDavid Edmondson 	if (((pflags & HCK_FULLCKSUM) != 0) &&
18356567907SDavid Edmondson 	    (((pflags & HCK_FULLCKSUM_OK) != 0) ||
18456567907SDavid Edmondson 	    (csum == 0xffff)))
18556567907SDavid Edmondson 		r |= NETRXF_data_validated;
187843e1988Sjohnlev 	return (r);
188843e1988Sjohnlev }
190843e1988Sjohnlev /*
191843e1988Sjohnlev  * Packets from the mac device come here.  We pass them to the peer.
192843e1988Sjohnlev  */
193843e1988Sjohnlev /*ARGSUSED*/
194843e1988Sjohnlev static void
xnbo_from_mac(void * arg,mac_resource_handle_t mrh,mblk_t * mp,boolean_t loopback)195da14cebeSEric Cheng xnbo_from_mac(void *arg, mac_resource_handle_t mrh, mblk_t *mp,
196da14cebeSEric Cheng     boolean_t loopback)
197843e1988Sjohnlev {
198843e1988Sjohnlev 	xnb_t *xnbp = arg;
200551bc2a6Smrj 	mp = xnb_copy_to_peer(xnbp, mp);
202843e1988Sjohnlev 	if (mp != NULL)
203843e1988Sjohnlev 		freemsgchain(mp);
204843e1988Sjohnlev }
206843e1988Sjohnlev /*
207843e1988Sjohnlev  * Packets from the mac device come here. We pass them to the peer if
208843e1988Sjohnlev  * the destination mac address matches or it's a multicast/broadcast
209843e1988Sjohnlev  * address.
210843e1988Sjohnlev  */
211843e1988Sjohnlev static void
xnbo_from_mac_filter(void * arg,mac_resource_handle_t mrh,mblk_t * mp,boolean_t loopback)212da14cebeSEric Cheng xnbo_from_mac_filter(void *arg, mac_resource_handle_t mrh, mblk_t *mp,
213da14cebeSEric Cheng     boolean_t loopback)
214843e1988Sjohnlev {
21556567907SDavid Edmondson 	_NOTE(ARGUNUSED(loopback));
216843e1988Sjohnlev 	xnb_t *xnbp = arg;
217551bc2a6Smrj 	xnbo_t *xnbop = xnbp->xnb_flavour_data;
218843e1988Sjohnlev 	mblk_t *next, *keep, *keep_head, *free, *free_head;
220843e1988Sjohnlev 	keep = keep_head = free = free_head = NULL;
222843e1988Sjohnlev #define	ADD(list, bp)				\
223843e1988Sjohnlev 	if (list != NULL)			\
224843e1988Sjohnlev 		list->b_next = bp;		\
225843e1988Sjohnlev 	else					\
226843e1988Sjohnlev 		list##_head = bp;		\
227843e1988Sjohnlev 	list = bp;
229843e1988Sjohnlev 	for (; mp != NULL; mp = next) {
230843e1988Sjohnlev 		mac_header_info_t hdr_info;
232843e1988Sjohnlev 		next = mp->b_next;
233843e1988Sjohnlev 		mp->b_next = NULL;
235843e1988Sjohnlev 		if (mac_header_info(xnbop->o_mh, mp, &hdr_info) != 0) {
236843e1988Sjohnlev 			ADD(free, mp);
237843e1988Sjohnlev 			continue;
238843e1988Sjohnlev 		}
240843e1988Sjohnlev 		if ((hdr_info.mhi_dsttype == MAC_ADDRTYPE_BROADCAST) ||
241843e1988Sjohnlev 		    (hdr_info.mhi_dsttype == MAC_ADDRTYPE_MULTICAST)) {
242843e1988Sjohnlev 			ADD(keep, mp);
243843e1988Sjohnlev 			continue;
244843e1988Sjohnlev 		}
246551bc2a6Smrj 		if (bcmp(hdr_info.mhi_daddr, xnbp->xnb_mac_addr,
247551bc2a6Smrj 		    sizeof (xnbp->xnb_mac_addr)) == 0) {
248843e1988Sjohnlev 			ADD(keep, mp);
249843e1988Sjohnlev 			continue;
250843e1988Sjohnlev 		}
252843e1988Sjohnlev 		ADD(free, mp);
253843e1988Sjohnlev 	}
254843e1988Sjohnlev #undef	ADD
256843e1988Sjohnlev 	if (keep_head != NULL)
257da14cebeSEric Cheng 		xnbo_from_mac(xnbp, mrh, keep_head, B_FALSE);
259843e1988Sjohnlev 	if (free_head != NULL)
260843e1988Sjohnlev 		freemsgchain(free_head);
261843e1988Sjohnlev }
263843e1988Sjohnlev static boolean_t
xnbo_open_mac(xnb_t * xnbp,char * mac)264843e1988Sjohnlev xnbo_open_mac(xnb_t *xnbp, char *mac)
265843e1988Sjohnlev {
266551bc2a6Smrj 	xnbo_t *xnbop = xnbp->xnb_flavour_data;
26756567907SDavid Edmondson 	int err;
268843e1988Sjohnlev 	const mac_info_t *mi;
269da14cebeSEric Cheng 	void (*rx_fn)(void *, mac_resource_handle_t, mblk_t *, boolean_t);
270da14cebeSEric Cheng 	struct ether_addr ea;
271e7801d59Ssowmini 	uint_t max_sdu;
272da14cebeSEric Cheng 	mac_diag_t diag;
274d62bc4baSyz 	if ((err = mac_open_by_linkname(mac, &xnbop->o_mh)) != 0) {
275843e1988Sjohnlev 		cmn_err(CE_WARN, "xnbo_open_mac: "
276d62bc4baSyz 		    "cannot open mac for link %s (%d)", mac, err);
277843e1988Sjohnlev 		return (B_FALSE);
278843e1988Sjohnlev 	}
279843e1988Sjohnlev 	ASSERT(xnbop->o_mh != NULL);
281843e1988Sjohnlev 	mi = mac_info(xnbop->o_mh);
282843e1988Sjohnlev 	ASSERT(mi != NULL);
284843e1988Sjohnlev 	if (mi->mi_media != DL_ETHER) {
285843e1988Sjohnlev 		cmn_err(CE_WARN, "xnbo_open_mac: "
286d62bc4baSyz 		    "device is not DL_ETHER (%d)", mi->mi_media);
2870324f02aSDavid Edmondson 		i_xnbo_close_mac(xnbp, B_TRUE);
288843e1988Sjohnlev 		return (B_FALSE);
289843e1988Sjohnlev 	}
290843e1988Sjohnlev 	if (mi->mi_media != mi->mi_nativemedia) {
291843e1988Sjohnlev 		cmn_err(CE_WARN, "xnbo_open_mac: "
292d62bc4baSyz 		    "device media and native media mismatch (%d != %d)",
293843e1988Sjohnlev 		    mi->mi_media, mi->mi_nativemedia);
2940324f02aSDavid Edmondson 		i_xnbo_close_mac(xnbp, B_TRUE);
295843e1988Sjohnlev 		return (B_FALSE);
296843e1988Sjohnlev 	}
298e7801d59Ssowmini 	mac_sdu_get(xnbop->o_mh, NULL, &max_sdu);
299e7801d59Ssowmini 	if (max_sdu > XNBMAXPKT) {
300e7801d59Ssowmini 		cmn_err(CE_WARN, "xnbo_open_mac: mac device SDU too big (%d)",
301e7801d59Ssowmini 		    max_sdu);
3020324f02aSDavid Edmondson 		i_xnbo_close_mac(xnbp, B_TRUE);
303843e1988Sjohnlev 		return (B_FALSE);
304843e1988Sjohnlev 	}
306fc4e975dSVenugopal Iyer 	/*
307fc4e975dSVenugopal Iyer 	 * MAC_OPEN_FLAGS_MULTI_PRIMARY is relevant when we are migrating a
308fc4e975dSVenugopal Iyer 	 * guest on the localhost itself. In this case we would have the MAC
309fc4e975dSVenugopal Iyer 	 * client open for the guest being migrated *and* also for the
310fc4e975dSVenugopal Iyer 	 * migrated guest (i.e. the former will be active till the migration
311fc4e975dSVenugopal Iyer 	 * is complete when the latter will be activated). This flag states
312fc4e975dSVenugopal Iyer 	 * that it is OK for mac_unicast_add to add the primary MAC unicast
313fc4e975dSVenugopal Iyer 	 * address multiple times.
314fc4e975dSVenugopal Iyer 	 */
315da14cebeSEric Cheng 	if (mac_client_open(xnbop->o_mh, &xnbop->o_mch, NULL,
316fc4e975dSVenugopal Iyer 	    MAC_OPEN_FLAGS_USE_DATALINK_NAME |
317fc4e975dSVenugopal Iyer 	    MAC_OPEN_FLAGS_MULTI_PRIMARY) != 0) {
318da14cebeSEric Cheng 		cmn_err(CE_WARN, "xnbo_open_mac: "
319da14cebeSEric Cheng 		    "error (%d) opening mac client", err);
3200324f02aSDavid Edmondson 		i_xnbo_close_mac(xnbp, B_TRUE);
321da14cebeSEric Cheng 		return (B_FALSE);
322da14cebeSEric Cheng 	}
323da14cebeSEric Cheng 
32456567907SDavid Edmondson 	if (xnbop->o_need_rx_filter)
325843e1988Sjohnlev 		rx_fn = xnbo_from_mac_filter;
326843e1988Sjohnlev 	else
327843e1988Sjohnlev 		rx_fn = xnbo_from_mac;
329fc4e975dSVenugopal Iyer 	err = mac_unicast_add_set_rx(xnbop->o_mch, NULL, MAC_UNICAST_PRIMARY,
33056567907SDavid Edmondson 	    &xnbop->o_mah, 0, &diag, xnbop->o_multicast_control ? rx_fn : NULL,
33156567907SDavid Edmondson 	    xnbp);
332fc4e975dSVenugopal Iyer 	if (err != 0) {
333fc4e975dSVenugopal Iyer 		cmn_err(CE_WARN, "xnbo_open_mac: failed to get the primary "
334fc4e975dSVenugopal Iyer 		    "MAC address of %s: %d", mac, err);
3350324f02aSDavid Edmondson 		i_xnbo_close_mac(xnbp, B_TRUE);
336fc4e975dSVenugopal Iyer 		return (B_FALSE);
337fc4e975dSVenugopal Iyer 	}
33856567907SDavid Edmondson 	if (!xnbop->o_multicast_control) {
339da14cebeSEric Cheng 		err = mac_promisc_add(xnbop->o_mch, MAC_CLIENT_PROMISC_ALL,
340ae6aa22aSVenugopal Iyer 		    rx_fn, xnbp, &xnbop->o_mphp, MAC_PROMISC_FLAGS_NO_TX_LOOP |
341ae6aa22aSVenugopal Iyer 		    MAC_PROMISC_FLAGS_VLAN_TAG_STRIP);
342da14cebeSEric Cheng 		if (err != 0) {
343da14cebeSEric Cheng 			cmn_err(CE_WARN, "xnbo_open_mac: "
344da14cebeSEric Cheng 			    "cannot enable promiscuous mode of %s: %d",
345da14cebeSEric Cheng 			    mac, err);
3460324f02aSDavid Edmondson 			i_xnbo_close_mac(xnbp, B_TRUE);
347da14cebeSEric Cheng 			return (B_FALSE);
348da14cebeSEric Cheng 		}
349da14cebeSEric Cheng 		xnbop->o_promiscuous = B_TRUE;
350da14cebeSEric Cheng 	}
35256567907SDavid Edmondson 	if (xnbop->o_need_setphysaddr) {
353da14cebeSEric Cheng 		err = mac_unicast_primary_set(xnbop->o_mh, xnbp->xnb_mac_addr);
354843e1988Sjohnlev 		/* Warn, but continue on. */
355843e1988Sjohnlev 		if (err != 0) {
356551bc2a6Smrj 			bcopy(xnbp->xnb_mac_addr, ea.ether_addr_octet,
357843e1988Sjohnlev 			    ETHERADDRL);
358843e1988Sjohnlev 			cmn_err(CE_WARN, "xnbo_open_mac: "
359843e1988Sjohnlev 			    "cannot set MAC address of %s to "
360da14cebeSEric Cheng 			    "%s: %d", mac, ether_sprintf(&ea), err);
361843e1988Sjohnlev 		}
362843e1988Sjohnlev 	}
36456567907SDavid Edmondson 	if (!mac_capab_get(xnbop->o_mh, MAC_CAPAB_HCKSUM,
36556567907SDavid Edmondson 	    &xnbop->o_hcksum_capab))
36656567907SDavid Edmondson 		xnbop->o_hcksum_capab = 0;
36756567907SDavid Edmondson 
368843e1988Sjohnlev 	xnbop->o_running = B_TRUE;
370843e1988Sjohnlev 	return (B_TRUE);
371843e1988Sjohnlev }
373843e1988Sjohnlev static void
xnbo_close_mac(xnb_t * xnbp)37456567907SDavid Edmondson xnbo_close_mac(xnb_t *xnbp)
3750324f02aSDavid Edmondson {
3760324f02aSDavid Edmondson 	i_xnbo_close_mac(xnbp, B_FALSE);
3770324f02aSDavid Edmondson }