/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ /* * Xen network backend - mac client edition. * * A driver that sits above an existing GLDv3/Nemo MAC driver and * relays packets to/from that driver from/to a guest domain. */ #ifdef DEBUG #define XNBO_DEBUG 1 #endif /* DEBUG */ #include "xnb.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef XNBO_DEBUG boolean_t xnbo_cksum_offload_to_peer = B_TRUE; boolean_t xnbo_cksum_offload_from_peer = B_TRUE; #endif /* XNBO_DEBUG */ /* Track multicast addresses. */ typedef struct xmca { struct xmca *next; ether_addr_t addr; } xmca_t; /* State about this device instance. */ typedef struct xnbo { mac_handle_t o_mh; mac_client_handle_t o_mch; mac_unicast_handle_t o_mah; mac_promisc_handle_t o_mphp; boolean_t o_running; boolean_t o_promiscuous; uint32_t o_hcksum_capab; xmca_t *o_mca; char o_link_name[LIFNAMSIZ]; boolean_t o_need_rx_filter; boolean_t o_need_setphysaddr; boolean_t o_multicast_control; } xnbo_t; static void xnbo_close_mac(xnb_t *); static void i_xnbo_close_mac(xnb_t *, boolean_t); /* * Packets from the peer come here. We pass them to the mac device. */ static void xnbo_to_mac(xnb_t *xnbp, mblk_t *mp) { xnbo_t *xnbop = xnbp->xnb_flavour_data; ASSERT(mp != NULL); if (!xnbop->o_running) { xnbp->xnb_stat_tx_too_early++; goto fail; } if (mac_tx(xnbop->o_mch, mp, 0, MAC_DROP_ON_NO_DESC, NULL) != (mac_tx_cookie_t)NULL) { xnbp->xnb_stat_mac_full++; } return; fail: freemsgchain(mp); } /* * Process the checksum flags `flags' provided by the peer for the * packet `mp'. */ static mblk_t * xnbo_cksum_from_peer(xnb_t *xnbp, mblk_t *mp, uint16_t flags) { xnbo_t *xnbop = xnbp->xnb_flavour_data; ASSERT(mp->b_next == NULL); if ((flags & NETTXF_csum_blank) != 0) { uint32_t capab = xnbop->o_hcksum_capab; #ifdef XNBO_DEBUG if (!xnbo_cksum_offload_from_peer) capab = 0; #endif /* XNBO_DEBUG */ /* * The checksum in the packet is blank. Determine * whether we can do hardware offload and, if so, * update the flags on the mblk according. If not, * calculate and insert the checksum using software. */ mp = xnb_process_cksum_flags(xnbp, mp, capab); } return (mp); } /* * Calculate the checksum flags to be relayed to the peer for the * packet `mp'. */ static uint16_t xnbo_cksum_to_peer(xnb_t *xnbp, mblk_t *mp) { _NOTE(ARGUNUSED(xnbp)); uint16_t r = 0; uint32_t pflags, csum; #ifdef XNBO_DEBUG if (!xnbo_cksum_offload_to_peer) return (0); #endif /* XNBO_DEBUG */ /* * We might also check for HCK_PARTIALCKSUM here and, * providing that the partial checksum covers the TCP/UDP * payload, return NETRXF_data_validated. * * It seems that it's probably not worthwhile, as even MAC * devices which advertise HCKSUM_INET_PARTIAL in their * capabilities tend to use HCK_FULLCKSUM on the receive side * - they are actually saying that in the output path the * caller must use HCK_PARTIALCKSUM. * * Then again, if a NIC supports HCK_PARTIALCKSUM in its' * output path, the host IP stack will use it. If such packets * are destined for the peer (i.e. looped around) we would * gain some advantage. */ mac_hcksum_get(mp, NULL, NULL, NULL, &csum, &pflags); /* * If the MAC driver has asserted that the checksum is * good, let the peer know. */ if (((pflags & HCK_FULLCKSUM) != 0) && (((pflags & HCK_FULLCKSUM_OK) != 0) || (csum == 0xffff))) r |= NETRXF_data_validated; return (r); } /* * Packets from the mac device come here. We pass them to the peer. */ /*ARGSUSED*/ static void xnbo_from_mac(void *arg, mac_resource_handle_t mrh, mblk_t *mp, boolean_t loopback) { xnb_t *xnbp = arg; mp = xnb_copy_to_peer(xnbp, mp); if (mp != NULL) freemsgchain(mp); } /* * Packets from the mac device come here. We pass them to the peer if * the destination mac address matches or it's a multicast/broadcast * address. */ static void xnbo_from_mac_filter(void *arg, mac_resource_handle_t mrh, mblk_t *mp, boolean_t loopback) { _NOTE(ARGUNUSED(loopback)); xnb_t *xnbp = arg; xnbo_t *xnbop = xnbp->xnb_flavour_data; mblk_t *next, *keep, *keep_head, *free, *free_head; keep = keep_head = free = free_head = NULL; #define ADD(list, bp) \ if (list != NULL) \ list->b_next = bp; \ else \ list##_head = bp; \ list = bp; for (; mp != NULL; mp = next) { mac_header_info_t hdr_info; next = mp->b_next; mp->b_next = NULL; if (mac_header_info(xnbop->o_mh, mp, &hdr_info) != 0) { ADD(free, mp); continue; } if ((hdr_info.mhi_dsttype == MAC_ADDRTYPE_BROADCAST) || (hdr_info.mhi_dsttype == MAC_ADDRTYPE_MULTICAST)) { ADD(keep, mp); continue; } if (bcmp(hdr_info.mhi_daddr, xnbp->xnb_mac_addr, sizeof (xnbp->xnb_mac_addr)) == 0) { ADD(keep, mp); continue; } ADD(free, mp); } #undef ADD if (keep_head != NULL) xnbo_from_mac(xnbp, mrh, keep_head, B_FALSE); if (free_head != NULL) freemsgchain(free_head); } static boolean_t xnbo_open_mac(xnb_t *xnbp, char *mac) { xnbo_t *xnbop = xnbp->xnb_flavour_data; int err; const mac_info_t *mi; void (*rx_fn)(void *, mac_resource_handle_t, mblk_t *, boolean_t); struct ether_addr ea; uint_t max_sdu; mac_diag_t diag; if ((err = mac_open_by_linkname(mac, &xnbop->o_mh)) != 0) { cmn_err(CE_WARN, "xnbo_open_mac: " "cannot open mac for link %s (%d)", mac, err); return (B_FALSE); } ASSERT(xnbop->o_mh != NULL); mi = mac_info(xnbop->o_mh); ASSERT(mi != NULL); if (mi->mi_media != DL_ETHER) { cmn_err(CE_WARN, "xnbo_open_mac: " "device is not DL_ETHER (%d)", mi->mi_media); i_xnbo_close_mac(xnbp, B_TRUE); return (B_FALSE); } if (mi->mi_media != mi->mi_nativemedia) { cmn_err(CE_WARN, "xnbo_open_mac: " "device media and native media mismatch (%d != %d)", mi->mi_media, mi->mi_nativemedia); i_xnbo_close_mac(xnbp, B_TRUE); return (B_FALSE); } mac_sdu_get(xnbop->o_mh, NULL, &max_sdu); if (max_sdu > XNBMAXPKT) { cmn_err(CE_WARN, "xnbo_open_mac: mac device SDU too big (%d)", max_sdu); i_xnbo_close_mac(xnbp, B_TRUE); return (B_FALSE); } /* * MAC_OPEN_FLAGS_MULTI_PRIMARY is relevant when we are migrating a * guest on the localhost itself. In this case we would have the MAC * client open for the guest being migrated *and* also for the * migrated guest (i.e. the former will be active till the migration * is complete when the latter will be activated). This flag states * that it is OK for mac_unicast_add to add the primary MAC unicast * address multiple times. */ if (mac_client_open(xnbop->o_mh, &xnbop->o_mch, NULL, MAC_OPEN_FLAGS_USE_DATALINK_NAME | MAC_OPEN_FLAGS_MULTI_PRIMARY) != 0) { cmn_err(CE_WARN, "xnbo_open_mac: " "error (%d) opening mac client", err); i_xnbo_close_mac(xnbp, B_TRUE); return (B_FALSE); } if (xnbop->o_need_rx_filter) rx_fn = xnbo_from_mac_filter; else rx_fn = xnbo_from_mac; err = mac_unicast_add_set_rx(xnbop->o_mch, NULL, MAC_UNICAST_PRIMARY, &xnbop->o_mah, 0, &diag, xnbop->o_multicast_control ? rx_fn : NULL, xnbp); if (err != 0) { cmn_err(CE_WARN, "xnbo_open_mac: failed to get the primary " "MAC address of %s: %d", mac, err); i_xnbo_close_mac(xnbp, B_TRUE); return (B_FALSE); } if (!xnbop->o_multicast_control) { err = mac_promisc_add(xnbop->o_mch, MAC_CLIENT_PROMISC_ALL, rx_fn, xnbp, &xnbop->o_mphp, MAC_PROMISC_FLAGS_NO_TX_LOOP | MAC_PROMISC_FLAGS_VLAN_TAG_STRIP); if (err != 0) { cmn_err(CE_WARN, "xnbo_open_mac: " "cannot enable promiscuous mode of %s: %d", mac, err); i_xnbo_close_mac(xnbp, B_TRUE); return (B_FALSE); } xnbop->o_promiscuous = B_TRUE; } if (xnbop->o_need_setphysaddr) { err = mac_unicast_primary_set(xnbop->o_mh, xnbp->xnb_mac_addr); /* Warn, but continue on. */ if (err != 0) { bcopy(xnbp->xnb_mac_addr, ea.ether_addr_octet, ETHERADDRL); cmn_err(CE_WARN, "xnbo_open_mac: " "cannot set MAC address of %s to " "%s: %d", mac, ether_sprintf(&ea), err); } } if (!mac_capab_get(xnbop->o_mh, MAC_CAPAB_HCKSUM, &xnbop->o_hcksum_capab)) xnbop->o_hcksum_capab = 0; xnbop->o_running = B_TRUE; return (B_TRUE); } static void xnbo_close_mac(xnb_t *xnbp) { i_xnbo_close_mac(xnbp, B_FALSE); } static void i_xnbo_close_mac(xnb_t *xnbp, boolean_t locked) { xnbo_t *xnbop = xnbp->xnb_flavour_data; xmca_t *loop; ASSERT(!locked || MUTEX_HELD(&xnbp->xnb_state_lock)); if (xnbop->o_mh == NULL) return; if (xnbop->o_running) xnbop->o_running = B_FALSE; if (!locked) mutex_enter(&xnbp->xnb_state_lock); loop = xnbop->o_mca; xnbop->o_mca = NULL; if (!locked) mutex_exit(&xnbp->xnb_state_lock); while (loop != NULL) { xmca_t *next = loop->next; DTRACE_PROBE3(mcast_remove, (char *), "close", (void *), xnbp, (etheraddr_t *), loop->addr); (void) mac_multicast_remove(xnbop->o_mch, loop->addr); kmem_free(loop, sizeof (*loop)); loop = next; } if (xnbop->o_promiscuous) { if (xnbop->o_mphp != NULL) { mac_promisc_remove(xnbop->o_mphp); xnbop->o_mphp = NULL; } xnbop->o_promiscuous = B_FALSE; } else { if (xnbop->o_mch != NULL) mac_rx_clear(xnbop->o_mch); } if (xnbop->o_mah != NULL) { (void) mac_unicast_remove(xnbop->o_mch, xnbop->o_mah); xnbop->o_mah = NULL; } if (xnbop->o_mch != NULL) { mac_client_close(xnbop->o_mch, 0); xnbop->o_mch = NULL; } mac_close(xnbop->o_mh); xnbop->o_mh = NULL; } /* * Hotplug has completed and we are connected to the peer. We have all * the information we need to exchange traffic, so open the MAC device * and configure it appropriately. */ static boolean_t xnbo_start_connect(xnb_t *xnbp) { xnbo_t *xnbop = xnbp->xnb_flavour_data; return (xnbo_open_mac(xnbp, xnbop->o_link_name)); } /* * The guest has successfully synchronize with this instance. We read * the configuration of the guest from xenstore to check whether the * guest requests multicast control. If not (the default) we make a * note that the MAC device needs to be used in promiscious mode. */ static boolean_t xnbo_peer_connected(xnb_t *xnbp) { char *oename; int request; xnbo_t *xnbop = xnbp->xnb_flavour_data; oename = xvdi_get_oename(xnbp->xnb_devinfo); if (xenbus_scanf(XBT_NULL, oename, "request-multicast-control", "%d", &request) != 0) request = 0; xnbop->o_multicast_control = (request > 0); return (B_TRUE); } /* * The guest domain has closed down the inter-domain connection. We * close the underlying MAC device. */ static void xnbo_peer_disconnected(xnb_t *xnbp) { xnbo_close_mac(xnbp); } /* * The hotplug script has completed. We read information from xenstore * about our configuration, most notably the name of the MAC device we * should use. */ static boolean_t xnbo_hotplug_connected(xnb_t *xnbp) { char *xsname; xnbo_t *xnbop = xnbp->xnb_flavour_data; int need; xsname = xvdi_get_xsname(xnbp->xnb_devinfo); if (xenbus_scanf(XBT_NULL, xsname, "nic", "%s", xnbop->o_link_name) != 0) { cmn_err(CE_WARN, "xnbo_connect: " "cannot read nic name from %s", xsname); return (B_FALSE); } if (xenbus_scanf(XBT_NULL, xsname, "SUNW-need-rx-filter", "%d", &need) != 0) need = 0; xnbop->o_need_rx_filter = (need > 0); if (xenbus_scanf(XBT_NULL, xsname, "SUNW-need-set-physaddr", "%d", &need) != 0) need = 0; xnbop->o_need_setphysaddr = (need > 0); return (B_TRUE); } /* * Find the multicast address `addr', return B_TRUE if it is one that * we receive. If `remove', remove it from the set received. */ static boolean_t xnbo_mcast_find(xnb_t *xnbp, ether_addr_t *addr, boolean_t remove) { xnbo_t *xnbop = xnbp->xnb_flavour_data; xmca_t *prev, *del, *this; ASSERT(MUTEX_HELD(&xnbp->xnb_state_lock)); ASSERT(xnbop->o_promiscuous == B_FALSE); prev = del = NULL; this = xnbop->o_mca; while (this != NULL) { if (bcmp(&this->addr, addr, sizeof (this->addr)) == 0) { del = this; if (remove) { if (prev == NULL) xnbop->o_mca = this->next; else prev->next = this->next; } break; } prev = this; this = this->next; } if (del == NULL) return (B_FALSE); if (remove) { DTRACE_PROBE3(mcast_remove, (char *), "remove", (void *), xnbp, (etheraddr_t *), del->addr); mac_multicast_remove(xnbop->o_mch, del->addr); kmem_free(del, sizeof (*del)); } return (B_TRUE); } /* * Add the multicast address `addr' to the set received. */ static boolean_t xnbo_mcast_add(xnb_t *xnbp, ether_addr_t *addr) { xnbo_t *xnbop = xnbp->xnb_flavour_data; boolean_t r = B_FALSE; ASSERT(xnbop->o_promiscuous == B_FALSE); mutex_enter(&xnbp->xnb_state_lock); if (xnbo_mcast_find(xnbp, addr, B_FALSE)) { r = B_TRUE; } else if (mac_multicast_add(xnbop->o_mch, (const uint8_t *)addr) == 0) { xmca_t *mca; DTRACE_PROBE3(mcast_add, (char *), "add", (void *), xnbp, (etheraddr_t *), addr); mca = kmem_alloc(sizeof (*mca), KM_SLEEP); bcopy(addr, &mca->addr, sizeof (mca->addr)); mca->next = xnbop->o_mca; xnbop->o_mca = mca; r = B_TRUE; } mutex_exit(&xnbp->xnb_state_lock); return (r); } /* * Remove the multicast address `addr' from the set received. */ static boolean_t xnbo_mcast_del(xnb_t *xnbp, ether_addr_t *addr) { boolean_t r; mutex_enter(&xnbp->xnb_state_lock); r = xnbo_mcast_find(xnbp, addr, B_TRUE); mutex_exit(&xnbp->xnb_state_lock); return (r); } static int xnbo_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) { static xnb_flavour_t flavour = { xnbo_to_mac, xnbo_peer_connected, xnbo_peer_disconnected, xnbo_hotplug_connected, xnbo_start_connect, xnbo_cksum_from_peer, xnbo_cksum_to_peer, xnbo_mcast_add, xnbo_mcast_del, }; xnbo_t *xnbop; switch (cmd) { case DDI_ATTACH: break; case DDI_RESUME: return (DDI_SUCCESS); default: return (DDI_FAILURE); } xnbop = kmem_zalloc(sizeof (*xnbop), KM_SLEEP); if (xnb_attach(dip, &flavour, xnbop) != DDI_SUCCESS) { kmem_free(xnbop, sizeof (*xnbop)); return (DDI_FAILURE); } return (DDI_SUCCESS); } static int xnbo_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) { xnb_t *xnbp = ddi_get_driver_private(dip); xnbo_t *xnbop = xnbp->xnb_flavour_data; switch (cmd) { case DDI_DETACH: break; case DDI_SUSPEND: return (DDI_SUCCESS); default: return (DDI_FAILURE); } mutex_enter(&xnbp->xnb_tx_lock); mutex_enter(&xnbp->xnb_rx_lock); if (!xnbp->xnb_detachable || xnbp->xnb_connected || (xnbp->xnb_tx_buf_count > 0)) { mutex_exit(&xnbp->xnb_rx_lock); mutex_exit(&xnbp->xnb_tx_lock); return (DDI_FAILURE); } mutex_exit(&xnbp->xnb_rx_lock); mutex_exit(&xnbp->xnb_tx_lock); xnbo_close_mac(xnbp); kmem_free(xnbop, sizeof (*xnbop)); xnb_detach(dip); return (DDI_SUCCESS); } static struct cb_ops cb_ops = { nulldev, /* open */ nulldev, /* close */ nodev, /* strategy */ nodev, /* print */ nodev, /* dump */ nodev, /* read */ nodev, /* write */ nodev, /* ioctl */ nodev, /* devmap */ nodev, /* mmap */ nodev, /* segmap */ nochpoll, /* poll */ ddi_prop_op, /* cb_prop_op */ 0, /* streamtab */ D_NEW | D_MP | D_64BIT /* Driver compatibility flag */ }; static struct dev_ops ops = { DEVO_REV, /* devo_rev */ 0, /* devo_refcnt */ nulldev, /* devo_getinfo */ nulldev, /* devo_identify */ nulldev, /* devo_probe */ xnbo_attach, /* devo_attach */ xnbo_detach, /* devo_detach */ nodev, /* devo_reset */ &cb_ops, /* devo_cb_ops */ (struct bus_ops *)0, /* devo_bus_ops */ NULL, /* devo_power */ ddi_quiesce_not_needed, /* devo_quiesce */ }; static struct modldrv modldrv = { &mod_driverops, "xnbo driver", &ops, }; static struct modlinkage modlinkage = { MODREV_1, &modldrv, NULL }; int _init(void) { return (mod_install(&modlinkage)); } int _info(struct modinfo *modinfop) { return (mod_info(&modlinkage, modinfop)); } int _fini(void) { return (mod_remove(&modlinkage)); }