/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ /* * IEEE 802.3ad Link Aggregation - Send code. * * Implements the Distributor function. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * Update the TX load balancing policy of the specified group. */ void aggr_send_update_policy(aggr_grp_t *grp, uint32_t policy) { uint8_t mac_policy = 0; ASSERT(MAC_PERIM_HELD(grp->lg_mh)); if ((policy & AGGR_POLICY_L2) != 0) mac_policy |= MAC_PKT_HASH_L2; if ((policy & AGGR_POLICY_L3) != 0) mac_policy |= MAC_PKT_HASH_L3; if ((policy & AGGR_POLICY_L4) != 0) mac_policy |= MAC_PKT_HASH_L4; grp->lg_tx_policy = policy; grp->lg_mac_tx_policy = mac_policy; } #define HASH_HINT(hint) \ ((hint) ^ ((hint) >> 24) ^ ((hint) >> 16) ^ ((hint) >> 8)) /* * Function invoked by mac layer to find a specific TX ring on a port * to send data. */ mblk_t * aggr_find_tx_ring(void *arg, mblk_t *mp, uintptr_t hint, mac_ring_handle_t *rh) { aggr_grp_t *grp = arg; aggr_port_t *port; uint64_t hash; rw_enter(&grp->lg_tx_lock, RW_READER); if (grp->lg_ntx_ports == 0) { /* * We could have returned from aggr_m_start() before * the ports were actually attached. Drop the chain. */ rw_exit(&grp->lg_tx_lock); freemsgchain(mp); return (NULL); } hash = mac_pkt_hash(DL_ETHER, mp, grp->lg_mac_tx_policy, B_TRUE); port = grp->lg_tx_ports[hash % grp->lg_ntx_ports]; /* * Use hash as the hint so to direct traffic to * different TX rings. Note below bit operation * is needed in case hint is 0 to get the most * benefit from HASH_HINT() algorithm. */ if (port->lp_tx_ring_cnt > 1) { if (hint == 0) { hash = (hash << 24 | hash << 16 | hash); hash = (hash << 32 | hash); } else { hash = hint; } hash = HASH_HINT(hash); *rh = port->lp_pseudo_tx_rings[hash % port->lp_tx_ring_cnt]; } else { *rh = port->lp_pseudo_tx_rings[0]; } rw_exit(&grp->lg_tx_lock); return (mp); } /* * aggr_tx_notify_thread: * * aggr_tx_ring_update() callback function wakes up this thread when * it gets called. This thread will call mac_tx_ring_update() to * notify upper mac of flow control getting relieved. Note that * aggr_tx_ring_update() cannot call mac_tx_ring_update() directly * because aggr_tx_ring_update() is called from lower mac with * mi_rw_lock held. */ void aggr_tx_notify_thread(void *arg) { callb_cpr_t cprinfo; aggr_grp_t *grp = (aggr_grp_t *)arg; mac_ring_handle_t pseudo_mrh; CALLB_CPR_INIT(&cprinfo, &grp->lg_tx_flowctl_lock, callb_generic_cpr, "aggr_tx_notify_thread"); mutex_enter(&grp->lg_tx_flowctl_lock); while (!grp->lg_tx_notify_done) { if ((grp->lg_tx_blocked_cnt) == 0) { CALLB_CPR_SAFE_BEGIN(&cprinfo); cv_wait(&grp->lg_tx_flowctl_cv, &grp->lg_tx_flowctl_lock); CALLB_CPR_SAFE_END(&cprinfo, &grp->lg_tx_flowctl_lock); continue; } while (grp->lg_tx_blocked_cnt != 0) { grp->lg_tx_blocked_cnt--; pseudo_mrh = grp->lg_tx_blocked_rings[grp->lg_tx_blocked_cnt]; mutex_exit(&grp->lg_tx_flowctl_lock); mac_tx_ring_update(grp->lg_mh, pseudo_mrh); mutex_enter(&grp->lg_tx_flowctl_lock); } } /* * The grp is being destroyed, exit the thread. */ grp->lg_tx_notify_thread = NULL; CALLB_CPR_EXIT(&cprinfo); thread_exit(); } /* * Callback function registered with lower mac to receive wakeups from * drivers when flow control is relieved (i.e. Tx descriptors are * available). */ void aggr_tx_ring_update(void *arg1, uintptr_t arg2) { aggr_port_t *port = (aggr_port_t *)arg1; mac_ring_handle_t mrh = (mac_ring_handle_t)arg2; mac_ring_handle_t pseudo_mrh; aggr_grp_t *grp = port->lp_grp; int i = 0; if (mrh == NULL) { /* * If the underlying NIC does not expose TX rings, * still as pseudo TX ring is presented to the * aggr mac. */ pseudo_mrh = port->lp_pseudo_tx_rings[0]; } else { for (i = 0; i < port->lp_tx_ring_cnt; i++) { if (port->lp_tx_rings[i] == mrh) break; } ASSERT(i < port->lp_tx_ring_cnt); pseudo_mrh = port->lp_pseudo_tx_rings[i]; } mutex_enter(&grp->lg_tx_flowctl_lock); /* * It could be possible that some (broken?) device driver * could send more than one wakeup on the same ring. In * such a case, multiple instances of the same pseudo TX * ring should not be saved in lg_tx_blocked_rings[] * array. So first check if woken up ring (pseudo_mrh) is * already in the lg_tx_blocked_rings[] array. */ for (i = 0; i < grp->lg_tx_blocked_cnt; i++) { if (grp->lg_tx_blocked_rings[i] == pseudo_mrh) { mutex_exit(&grp->lg_tx_flowctl_lock); return; } } /* A distinct mac_ring_handle. Save and increment count */ grp->lg_tx_blocked_rings[grp->lg_tx_blocked_cnt] = pseudo_mrh; grp->lg_tx_blocked_cnt++; cv_signal(&grp->lg_tx_flowctl_cv); mutex_exit(&grp->lg_tx_flowctl_lock); } /* * Send function invoked by the MAC service module. */ mblk_t * aggr_ring_tx(void *arg, mblk_t *mp) { aggr_pseudo_tx_ring_t *pseudo_ring = (aggr_pseudo_tx_ring_t *)arg; aggr_port_t *port = pseudo_ring->atr_port; return (mac_hwring_send_priv(port->lp_mch, pseudo_ring->atr_hw_rh, mp)); } /* * Enable sending on the specified port. */ void aggr_send_port_enable(aggr_port_t *port) { aggr_grp_t *grp = port->lp_grp; ASSERT(MAC_PERIM_HELD(grp->lg_mh)); if (port->lp_tx_enabled || (port->lp_state != AGGR_PORT_STATE_ATTACHED)) { /* already enabled or port not yet attached */ return; } /* * Add to group's array of tx ports. */ rw_enter(&grp->lg_tx_lock, RW_WRITER); if (grp->lg_tx_ports_size < grp->lg_ntx_ports+1) { /* current array too small */ aggr_port_t **new_ports; uint_t new_size; new_size = grp->lg_ntx_ports+1; new_ports = kmem_zalloc(new_size * sizeof (aggr_port_t *), KM_SLEEP); if (grp->lg_tx_ports_size > 0) { ASSERT(grp->lg_tx_ports != NULL); bcopy(grp->lg_tx_ports, new_ports, grp->lg_ntx_ports * sizeof (aggr_port_t *)); kmem_free(grp->lg_tx_ports, grp->lg_tx_ports_size * sizeof (aggr_port_t *)); } grp->lg_tx_ports = new_ports; grp->lg_tx_ports_size = new_size; } grp->lg_tx_ports[grp->lg_ntx_ports++] = port; port->lp_tx_idx = grp->lg_ntx_ports-1; rw_exit(&grp->lg_tx_lock); port->lp_tx_enabled = B_TRUE; aggr_grp_update_default(grp); } /* * Disable sending from the specified port. */ void aggr_send_port_disable(aggr_port_t *port) { uint_t idx, ntx; aggr_grp_t *grp = port->lp_grp; ASSERT(MAC_PERIM_HELD(grp->lg_mh)); ASSERT(MAC_PERIM_HELD(port->lp_mh)); if (!port->lp_tx_enabled) { /* not yet enabled */ return; } rw_enter(&grp->lg_tx_lock, RW_WRITER); idx = port->lp_tx_idx; ntx = grp->lg_ntx_ports; ASSERT(idx < ntx); /* remove from array of attached ports */ if (idx == (ntx - 1)) { grp->lg_tx_ports[idx] = NULL; } else { /* not the last entry, replace with last one */ aggr_port_t *victim; victim = grp->lg_tx_ports[ntx - 1]; grp->lg_tx_ports[ntx - 1] = NULL; victim->lp_tx_idx = idx; grp->lg_tx_ports[idx] = victim; } port->lp_tx_idx = 0; grp->lg_ntx_ports--; rw_exit(&grp->lg_tx_lock); port->lp_tx_enabled = B_FALSE; aggr_grp_update_default(grp); }