/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. * Copyright 2012 OmniTI Computer Consulting, Inc All rights reserved. * Copyright 2020 Joyent, Inc. */ /* * IEEE 802.3ad Link Aggregation - Link Aggregation MAC ports. * * Implements the functions needed to manage the MAC ports that are * part of Link Aggregation groups. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static kmem_cache_t *aggr_port_cache; static id_space_t *aggr_portids; static void aggr_port_notify_cb(void *, mac_notify_type_t); /*ARGSUSED*/ static int aggr_port_constructor(void *buf, void *arg, int kmflag) { bzero(buf, sizeof (aggr_port_t)); return (0); } /*ARGSUSED*/ static void aggr_port_destructor(void *buf, void *arg) { aggr_port_t *port = buf; ASSERT3P(port->lp_mnh, ==, NULL); ASSERT(!port->lp_tx_grp_added); for (uint_t i = 0; i < MAX_GROUPS_PER_PORT; i++) ASSERT3P(port->lp_hwghs[i], ==, NULL); } void aggr_port_init(void) { aggr_port_cache = kmem_cache_create("aggr_port_cache", sizeof (aggr_port_t), 0, aggr_port_constructor, aggr_port_destructor, NULL, NULL, NULL, 0); /* * Allocate a id space to manage port identification. The range of * the arena will be from 1 to UINT16_MAX, because the LACP protocol * specifies 16-bit unique identification. */ aggr_portids = id_space_create("aggr_portids", 1, UINT16_MAX); ASSERT(aggr_portids != NULL); } void aggr_port_fini(void) { /* * This function is called only after all groups have been * freed. This ensures that there are no remaining allocated * ports when this function is invoked. */ kmem_cache_destroy(aggr_port_cache); id_space_destroy(aggr_portids); } /* ARGSUSED */ void aggr_port_init_callbacks(aggr_port_t *port) { /* add the port's receive callback */ port->lp_mnh = mac_notify_add(port->lp_mh, aggr_port_notify_cb, port); /* * Hold a reference of the grp and the port and this reference will * be released when the thread exits. * * The reference on the port is used for aggr_port_delete() to * continue without waiting for the thread to exit; the reference * on the grp is used for aggr_grp_delete() to wait for the thread * to exit before calling mac_unregister(). * * Note that these references will be released either in * aggr_port_delete() when mac_notify_remove() succeeds, or in * the aggr_port_notify_cb() callback when the port is deleted * (lp_closing is set). */ aggr_grp_port_hold(port); } int aggr_port_create(aggr_grp_t *grp, const datalink_id_t linkid, boolean_t force, aggr_port_t **pp) { int err; mac_handle_t mh; mac_client_handle_t mch = NULL; aggr_port_t *port; uint16_t portid; uint_t i; boolean_t no_link_update = B_FALSE; const mac_info_t *mip; uint32_t note; uint32_t margin; char client_name[MAXNAMELEN]; char aggr_name[MAXNAMELEN]; char port_name[MAXNAMELEN]; mac_diag_t diag; mac_unicast_handle_t mah; *pp = NULL; if ((err = mac_open_by_linkid(linkid, &mh)) != 0) return (err); mip = mac_info(mh); if (mip->mi_media != DL_ETHER || mip->mi_nativemedia != DL_ETHER) { err = EINVAL; goto fail; } /* * If the underlying MAC does not support link update notification, it * can only be aggregated if `force' is set. This is because aggr * depends on link notifications to attach ports whose link is up. */ note = mac_no_notification(mh); if ((note & (DL_NOTE_LINK_UP | DL_NOTE_LINK_DOWN)) != 0) { no_link_update = B_TRUE; if (!force) { /* * We borrow this error code to indicate that link * notification is not supported. */ err = ENETDOWN; goto fail; } } if (((err = dls_mgmt_get_linkinfo(grp->lg_linkid, aggr_name, NULL, NULL, NULL)) != 0) || ((err = dls_mgmt_get_linkinfo(linkid, port_name, NULL, NULL, NULL)) != 0)) { goto fail; } (void) snprintf(client_name, MAXNAMELEN, "%s-%s", aggr_name, port_name); if ((err = mac_client_open(mh, &mch, client_name, MAC_OPEN_FLAGS_IS_AGGR_PORT | MAC_OPEN_FLAGS_EXCLUSIVE)) != 0) { goto fail; } if ((portid = (uint16_t)id_alloc(aggr_portids)) == 0) { err = ENOMEM; goto fail; } /* * As the underlying MAC's current margin size is used to determine * the margin size of the aggregation itself, request the underlying * MAC not to change to a smaller size. */ if ((err = mac_margin_add(mh, &margin, B_TRUE)) != 0) { id_free(aggr_portids, portid); goto fail; } if ((err = mac_unicast_add(mch, NULL, MAC_UNICAST_PRIMARY | MAC_UNICAST_DISABLE_TX_VID_CHECK, &mah, 0, &diag)) != 0) { VERIFY3S(mac_margin_remove(mh, margin), ==, 0); id_free(aggr_portids, portid); goto fail; } port = kmem_cache_alloc(aggr_port_cache, KM_SLEEP); port->lp_refs = 1; port->lp_next = NULL; port->lp_mh = mh; port->lp_mch = mch; port->lp_mip = mip; port->lp_linkid = linkid; port->lp_closing = B_FALSE; port->lp_mah = mah; /* get the port's original MAC address */ mac_unicast_primary_get(port->lp_mh, port->lp_addr); /* initialize state */ port->lp_state = AGGR_PORT_STATE_STANDBY; port->lp_link_state = LINK_STATE_UNKNOWN; port->lp_ifspeed = 0; port->lp_link_duplex = LINK_DUPLEX_UNKNOWN; port->lp_started = B_FALSE; port->lp_tx_enabled = B_FALSE; port->lp_promisc_on = B_FALSE; port->lp_no_link_update = no_link_update; port->lp_portid = portid; port->lp_margin = margin; port->lp_prom_addr = NULL; /* * Save the current statistics of the port. They will be used * later by aggr_m_stats() when aggregating the statistics of * the constituent ports. */ for (i = 0; i < MAC_NSTAT; i++) { port->lp_stat[i] = aggr_port_stat(port, i + MAC_STAT_MIN); } for (i = 0; i < ETHER_NSTAT; i++) { port->lp_ether_stat[i] = aggr_port_stat(port, i + MACTYPE_STAT_MIN); } /* LACP related state */ port->lp_collector_enabled = B_FALSE; *pp = port; return (0); fail: if (mch != NULL) mac_client_close(mch, MAC_CLOSE_FLAGS_EXCLUSIVE); mac_close(mh); return (err); } void aggr_port_delete(aggr_port_t *port) { aggr_lacp_port_t *pl = &port->lp_lacp; ASSERT(!port->lp_promisc_on); port->lp_closing = B_TRUE; VERIFY0(mac_margin_remove(port->lp_mh, port->lp_margin)); mac_client_clear_flow_cb(port->lp_mch); /* * If the notification callback is already in process and waiting for * the aggr grp's mac perimeter, don't wait (otherwise there would be * deadlock). Otherwise, if mac_notify_remove() succeeds, we can * release the reference held when mac_notify_add() is called. */ if ((port->lp_mnh != NULL) && (mac_notify_remove(port->lp_mnh, B_FALSE) == 0)) { aggr_grp_port_rele(port); } port->lp_mnh = NULL; /* * Inform the the port lacp timer thread to exit. Note that waiting * for the thread to exit may cause deadlock since that thread may * need to enter into the mac perimeter which we are currently in. * It is fine to continue without waiting though since that thread * is holding a reference of the port. */ mutex_enter(&pl->lacp_timer_lock); pl->lacp_timer_bits |= LACP_THREAD_EXIT; cv_broadcast(&pl->lacp_timer_cv); mutex_exit(&pl->lacp_timer_lock); /* * Restore the port MAC address. Note it is called after the * port's notification callback being removed. This prevent * port's MAC_NOTE_UNICST notify callback function being called. */ (void) mac_unicast_primary_set(port->lp_mh, port->lp_addr); if (port->lp_mah != NULL) (void) mac_unicast_remove(port->lp_mch, port->lp_mah); mac_client_close(port->lp_mch, MAC_CLOSE_FLAGS_EXCLUSIVE); mac_close(port->lp_mh); AGGR_PORT_REFRELE(port); } void aggr_port_free(aggr_port_t *port) { ASSERT(port->lp_refs == 0); if (port->lp_grp != NULL) AGGR_GRP_REFRELE(port->lp_grp); port->lp_grp = NULL; id_free(aggr_portids, port->lp_portid); port->lp_portid = 0; mutex_destroy(&port->lp_lacp.lacp_timer_lock); cv_destroy(&port->lp_lacp.lacp_timer_cv); kmem_cache_free(aggr_port_cache, port); } /* * Invoked upon receiving a MAC_NOTE_LINK notification for * one of the constituent ports. */ boolean_t aggr_port_notify_link(aggr_grp_t *grp, aggr_port_t *port) { boolean_t do_attach = B_FALSE; boolean_t do_detach = B_FALSE; boolean_t link_state_changed = B_TRUE; uint64_t ifspeed; link_state_t link_state; link_duplex_t link_duplex; mac_perim_handle_t mph; ASSERT(MAC_PERIM_HELD(grp->lg_mh)); mac_perim_enter_by_mh(port->lp_mh, &mph); /* * link state change? For links that do not support link state * notification, always assume the link is up. */ link_state = port->lp_no_link_update ? LINK_STATE_UP : mac_link_get(port->lp_mh); if (port->lp_link_state != link_state) { if (link_state == LINK_STATE_UP) do_attach = (port->lp_link_state != LINK_STATE_UP); else do_detach = (port->lp_link_state == LINK_STATE_UP); } port->lp_link_state = link_state; /* link duplex change? */ link_duplex = aggr_port_stat(port, ETHER_STAT_LINK_DUPLEX); if (port->lp_link_duplex != link_duplex) { if (link_duplex == LINK_DUPLEX_FULL) do_attach |= (port->lp_link_duplex != LINK_DUPLEX_FULL); else do_detach |= (port->lp_link_duplex == LINK_DUPLEX_FULL); } port->lp_link_duplex = link_duplex; /* link speed changes? */ ifspeed = aggr_port_stat(port, MAC_STAT_IFSPEED); if (port->lp_ifspeed != ifspeed) { mutex_enter(&grp->lg_stat_lock); if (port->lp_state == AGGR_PORT_STATE_ATTACHED) do_detach |= (ifspeed != grp->lg_ifspeed); else do_attach |= (ifspeed == grp->lg_ifspeed); mutex_exit(&grp->lg_stat_lock); } port->lp_ifspeed = ifspeed; if (do_attach) { /* attempt to attach the port to the aggregation */ link_state_changed = aggr_grp_attach_port(grp, port); } else if (do_detach) { /* detach the port from the aggregation */ link_state_changed = aggr_grp_detach_port(grp, port); } mac_perim_exit(mph); return (link_state_changed); } /* * Invoked upon receiving a MAC_NOTE_UNICST for one of the constituent * ports of a group. */ static void aggr_port_notify_unicst(aggr_grp_t *grp, aggr_port_t *port, boolean_t *mac_addr_changedp, boolean_t *link_state_changedp) { boolean_t mac_addr_changed = B_FALSE; boolean_t link_state_changed = B_FALSE; uint8_t mac_addr[ETHERADDRL]; mac_perim_handle_t mph; ASSERT(MAC_PERIM_HELD(grp->lg_mh)); ASSERT(mac_addr_changedp != NULL); ASSERT(link_state_changedp != NULL); mac_perim_enter_by_mh(port->lp_mh, &mph); /* * If it is called when setting the MAC address to the * aggregation group MAC address, do nothing. */ mac_unicast_primary_get(port->lp_mh, mac_addr); if (bcmp(mac_addr, grp->lg_addr, ETHERADDRL) == 0) { mac_perim_exit(mph); goto done; } /* save the new port MAC address */ bcopy(mac_addr, port->lp_addr, ETHERADDRL); aggr_grp_port_mac_changed(grp, port, &mac_addr_changed, &link_state_changed); mac_perim_exit(mph); /* * If this port was used to determine the MAC address of * the group, update the MAC address of the constituent * ports. */ if (mac_addr_changed && aggr_grp_update_ports_mac(grp)) link_state_changed = B_TRUE; done: *mac_addr_changedp = mac_addr_changed; *link_state_changedp = link_state_changed; } /* * Notification callback invoked by the MAC service module for * a particular MAC port. */ static void aggr_port_notify_cb(void *arg, mac_notify_type_t type) { aggr_port_t *port = arg; aggr_grp_t *grp = port->lp_grp; boolean_t mac_addr_changed, link_state_changed; mac_perim_handle_t mph; mac_perim_enter_by_mh(grp->lg_mh, &mph); if (port->lp_closing) { mac_perim_exit(mph); /* * Release the reference so it is safe for aggr to call * mac_unregister() now. */ aggr_grp_port_rele(port); return; } switch (type) { case MAC_NOTE_TX: mac_tx_update(grp->lg_mh); break; case MAC_NOTE_LINK: if (aggr_port_notify_link(grp, port)) mac_link_update(grp->lg_mh, grp->lg_link_state); break; case MAC_NOTE_UNICST: aggr_port_notify_unicst(grp, port, &mac_addr_changed, &link_state_changed); if (mac_addr_changed) mac_unicst_update(grp->lg_mh, grp->lg_addr); if (link_state_changed) mac_link_update(grp->lg_mh, grp->lg_link_state); break; default: break; } mac_perim_exit(mph); } int aggr_port_start(aggr_port_t *port) { ASSERT(MAC_PERIM_HELD(port->lp_mh)); if (port->lp_started) return (0); port->lp_started = B_TRUE; aggr_grp_multicst_port(port, B_TRUE); return (0); } void aggr_port_stop(aggr_port_t *port) { ASSERT(MAC_PERIM_HELD(port->lp_mh)); if (!port->lp_started) return; aggr_grp_multicst_port(port, B_FALSE); /* update the port state */ port->lp_started = B_FALSE; } /* * Set the promisc mode of the port. If the port is already in the * requested mode then do nothing. */ int aggr_port_promisc(aggr_port_t *port, boolean_t on) { int rc; ASSERT(MAC_PERIM_HELD(port->lp_mh)); if (on == port->lp_promisc_on) return (0); rc = mac_set_promisc(port->lp_mh, on); if (rc == 0) port->lp_promisc_on = on; return (rc); } /* * Set the MAC address of a port. */ int aggr_port_unicst(aggr_port_t *port) { aggr_grp_t *grp = port->lp_grp; ASSERT(MAC_PERIM_HELD(grp->lg_mh)); ASSERT(MAC_PERIM_HELD(port->lp_mh)); return (mac_unicast_primary_set(port->lp_mh, grp->lg_addr)); } /* * Add or remove a multicast address to/from a port. */ int aggr_port_multicst(void *arg, boolean_t add, const uint8_t *addrp) { aggr_port_t *port = arg; if (add) { return (mac_multicast_add(port->lp_mch, addrp)); } else { mac_multicast_remove(port->lp_mch, addrp); return (0); } } uint64_t aggr_port_stat(aggr_port_t *port, uint_t stat) { return (mac_stat_get(port->lp_mh, stat)); } /* * Add a non-primary unicast address to the underlying port. If the * port supports HW Rx groups, then try to add the address filter to * the HW group first. If that fails, or if the port does not support * RINGS capab, then enable the port's promiscous mode. */ int aggr_port_addmac(aggr_port_t *port, uint_t idx, const uint8_t *mac_addr) { aggr_unicst_addr_t *addr, **pprev; mac_perim_handle_t pmph; int err; ASSERT(MAC_PERIM_HELD(port->lp_grp->lg_mh)); ASSERT3U(idx, <, MAX_GROUPS_PER_PORT); mac_perim_enter_by_mh(port->lp_mh, &pmph); /* * If the port doesn't have a HW group to back the aggr's * pseudo group, then try using the port's default group and * let the aggr SW classify its traffic. This scenario happens * when mixing ports with a different number of HW groups. */ if (port->lp_hwghs[idx] == NULL) idx = 0; /* * If there is an underlying HW Rx group, then try adding this * unicast address to it. */ if ((port->lp_hwghs[idx] != NULL) && ((mac_hwgroup_addmac(port->lp_hwghs[idx], mac_addr)) == 0)) { mac_perim_exit(pmph); return (0); } /* * If the port doesn't have HW groups, or we failed to add the * HW filter, then enable the port's promiscuous mode. We * enable promiscuous mode only if the port is already started. */ if (port->lp_started && ((err = aggr_port_promisc(port, B_TRUE)) != 0)) { mac_perim_exit(pmph); return (err); } /* * Walk through the unicast addresses that requires promiscous mode * enabled on this port, and add this address to the end of the list. */ pprev = &port->lp_prom_addr; while ((addr = *pprev) != NULL) { ASSERT(bcmp(mac_addr, addr->aua_addr, ETHERADDRL) != 0); pprev = &addr->aua_next; } addr = kmem_alloc(sizeof (aggr_unicst_addr_t), KM_SLEEP); bcopy(mac_addr, addr->aua_addr, ETHERADDRL); addr->aua_next = NULL; *pprev = addr; mac_perim_exit(pmph); return (0); } /* * Remove a non-primary unicast address from the underlying port. This address * must has been added by aggr_port_addmac(). As a result, we probably need to * remove the address from the port's HW Rx group, or to disable the port's * promiscous mode. */ void aggr_port_remmac(aggr_port_t *port, uint_t idx, const uint8_t *mac_addr) { aggr_grp_t *grp = port->lp_grp; aggr_unicst_addr_t *addr, **pprev; mac_perim_handle_t pmph; ASSERT(MAC_PERIM_HELD(grp->lg_mh)); ASSERT3U(idx, <, MAX_GROUPS_PER_PORT); mac_perim_enter_by_mh(port->lp_mh, &pmph); /* * See whether this address is in the list of addresses that requires * the port being promiscous mode. */ pprev = &port->lp_prom_addr; while ((addr = *pprev) != NULL) { if (bcmp(mac_addr, addr->aua_addr, ETHERADDRL) == 0) break; pprev = &addr->aua_next; } if (addr != NULL) { /* * This unicast address put the port into the promiscous mode, * delete this address from the lp_prom_addr list. If this is * the last address in that list, disable the promiscous mode * if the aggregation is not in promiscous mode. */ *pprev = addr->aua_next; kmem_free(addr, sizeof (aggr_unicst_addr_t)); if (port->lp_prom_addr == NULL && !grp->lg_promisc) (void) aggr_port_promisc(port, B_FALSE); } else { /* See comment in aggr_port_addmac(). */ if (port->lp_hwghs[idx] == NULL) idx = 0; ASSERT3P(port->lp_hwghs[idx], !=, NULL); (void) mac_hwgroup_remmac(port->lp_hwghs[idx], mac_addr); } mac_perim_exit(pmph); } int aggr_port_addvlan(aggr_port_t *port, uint_t idx, uint16_t vid) { mac_perim_handle_t pmph; int err; ASSERT(MAC_PERIM_HELD(port->lp_grp->lg_mh)); ASSERT3U(idx, <, MAX_GROUPS_PER_PORT); mac_perim_enter_by_mh(port->lp_mh, &pmph); /* See comment in aggr_port_addmac(). */ if (port->lp_hwghs[idx] == NULL) idx = 0; /* * Add the VLAN filter to the HW group if the port has a HW * group. If the port doesn't have a HW group, then it will * implicitly allow tagged traffic to pass and there is * nothing to do. */ if (port->lp_hwghs[idx] == NULL) err = 0; else err = mac_hwgroup_addvlan(port->lp_hwghs[idx], vid); mac_perim_exit(pmph); return (err); } int aggr_port_remvlan(aggr_port_t *port, uint_t idx, uint16_t vid) { mac_perim_handle_t pmph; int err; ASSERT(MAC_PERIM_HELD(port->lp_grp->lg_mh)); ASSERT3U(idx, <, MAX_GROUPS_PER_PORT); mac_perim_enter_by_mh(port->lp_mh, &pmph); /* See comment in aggr_port_addmac(). */ if (port->lp_hwghs[idx] == NULL) idx = 0; if (port->lp_hwghs[idx] == NULL) err = 0; else err = mac_hwgroup_remvlan(port->lp_hwghs[idx], vid); mac_perim_exit(pmph); return (err); }