17c478bd9Sstevel@tonic-gate /*
27c478bd9Sstevel@tonic-gate * CDDL HEADER START
37c478bd9Sstevel@tonic-gate *
47c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the
5c0192a57Sericheng * Common Development and Distribution License (the "License").
6c0192a57Sericheng * You may not use this file except in compliance with the License.
77c478bd9Sstevel@tonic-gate *
87c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
97c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing.
107c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions
117c478bd9Sstevel@tonic-gate * and limitations under the License.
127c478bd9Sstevel@tonic-gate *
137c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each
147c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
157c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the
167c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying
177c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner]
187c478bd9Sstevel@tonic-gate *
197c478bd9Sstevel@tonic-gate * CDDL HEADER END
207c478bd9Sstevel@tonic-gate */
217c478bd9Sstevel@tonic-gate /*
220dc2366fSVenugopal Iyer * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
237c478bd9Sstevel@tonic-gate * Use is subject to license terms.
247c478bd9Sstevel@tonic-gate */
257c478bd9Sstevel@tonic-gate
267c478bd9Sstevel@tonic-gate /*
277c478bd9Sstevel@tonic-gate * IEEE 802.3ad Link Aggregation - Send code.
287c478bd9Sstevel@tonic-gate *
297c478bd9Sstevel@tonic-gate * Implements the Distributor function.
307c478bd9Sstevel@tonic-gate */
317c478bd9Sstevel@tonic-gate
327c478bd9Sstevel@tonic-gate #include <sys/conf.h>
337c478bd9Sstevel@tonic-gate #include <sys/modctl.h>
347c478bd9Sstevel@tonic-gate #include <sys/sunddi.h>
350dc2366fSVenugopal Iyer #include <sys/callb.h>
367c478bd9Sstevel@tonic-gate #include <sys/vlan.h>
377c478bd9Sstevel@tonic-gate #include <sys/strsun.h>
387c478bd9Sstevel@tonic-gate #include <sys/strsubr.h>
39ae6aa22aSVenugopal Iyer #include <sys/dlpi.h>
407c478bd9Sstevel@tonic-gate
417c478bd9Sstevel@tonic-gate #include <inet/common.h>
427c478bd9Sstevel@tonic-gate #include <inet/led.h>
437c478bd9Sstevel@tonic-gate #include <inet/ip.h>
447c478bd9Sstevel@tonic-gate #include <inet/ip6.h>
457c478bd9Sstevel@tonic-gate #include <inet/tcp.h>
467c478bd9Sstevel@tonic-gate #include <netinet/udp.h>
477c478bd9Sstevel@tonic-gate
487c478bd9Sstevel@tonic-gate #include <sys/aggr.h>
497c478bd9Sstevel@tonic-gate #include <sys/aggr_impl.h>
507c478bd9Sstevel@tonic-gate
517c478bd9Sstevel@tonic-gate /*
527c478bd9Sstevel@tonic-gate * Update the TX load balancing policy of the specified group.
537c478bd9Sstevel@tonic-gate */
547c478bd9Sstevel@tonic-gate void
aggr_send_update_policy(aggr_grp_t * grp,uint32_t policy)557c478bd9Sstevel@tonic-gate aggr_send_update_policy(aggr_grp_t *grp, uint32_t policy)
567c478bd9Sstevel@tonic-gate {
57ae6aa22aSVenugopal Iyer uint8_t mac_policy = 0;
58ae6aa22aSVenugopal Iyer
59da14cebeSEric Cheng ASSERT(MAC_PERIM_HELD(grp->lg_mh));
607c478bd9Sstevel@tonic-gate
61ae6aa22aSVenugopal Iyer if ((policy & AGGR_POLICY_L2) != 0)
62ae6aa22aSVenugopal Iyer mac_policy |= MAC_PKT_HASH_L2;
63ae6aa22aSVenugopal Iyer if ((policy & AGGR_POLICY_L3) != 0)
64ae6aa22aSVenugopal Iyer mac_policy |= MAC_PKT_HASH_L3;
65ae6aa22aSVenugopal Iyer if ((policy & AGGR_POLICY_L4) != 0)
66ae6aa22aSVenugopal Iyer mac_policy |= MAC_PKT_HASH_L4;
67ae6aa22aSVenugopal Iyer
687c478bd9Sstevel@tonic-gate grp->lg_tx_policy = policy;
69ae6aa22aSVenugopal Iyer grp->lg_mac_tx_policy = mac_policy;
707c478bd9Sstevel@tonic-gate }
717c478bd9Sstevel@tonic-gate
720dc2366fSVenugopal Iyer #define HASH_HINT(hint) \
730dc2366fSVenugopal Iyer ((hint) ^ ((hint) >> 24) ^ ((hint) >> 16) ^ ((hint) >> 8))
740dc2366fSVenugopal Iyer
757c478bd9Sstevel@tonic-gate /*
760dc2366fSVenugopal Iyer * Function invoked by mac layer to find a specific TX ring on a port
770dc2366fSVenugopal Iyer * to send data.
787c478bd9Sstevel@tonic-gate */
797c478bd9Sstevel@tonic-gate mblk_t *
aggr_find_tx_ring(void * arg,mblk_t * mp,uintptr_t hint,mac_ring_handle_t * rh)800dc2366fSVenugopal Iyer aggr_find_tx_ring(void *arg, mblk_t *mp, uintptr_t hint, mac_ring_handle_t *rh)
817c478bd9Sstevel@tonic-gate {
827c478bd9Sstevel@tonic-gate aggr_grp_t *grp = arg;
837c478bd9Sstevel@tonic-gate aggr_port_t *port;
84da14cebeSEric Cheng uint64_t hash;
857c478bd9Sstevel@tonic-gate
860dc2366fSVenugopal Iyer rw_enter(&grp->lg_tx_lock, RW_READER);
870dc2366fSVenugopal Iyer if (grp->lg_ntx_ports == 0) {
8898b1442aSmeem /*
890dc2366fSVenugopal Iyer * We could have returned from aggr_m_start() before
900dc2366fSVenugopal Iyer * the ports were actually attached. Drop the chain.
9198b1442aSmeem */
92da14cebeSEric Cheng rw_exit(&grp->lg_tx_lock);
930dc2366fSVenugopal Iyer freemsgchain(mp);
940dc2366fSVenugopal Iyer return (NULL);
950dc2366fSVenugopal Iyer }
960dc2366fSVenugopal Iyer hash = mac_pkt_hash(DL_ETHER, mp, grp->lg_mac_tx_policy, B_TRUE);
970dc2366fSVenugopal Iyer port = grp->lg_tx_ports[hash % grp->lg_ntx_ports];
9895c1c84bSRamesh Kumar Katla
990dc2366fSVenugopal Iyer /*
1000dc2366fSVenugopal Iyer * Use hash as the hint so to direct traffic to
1010dc2366fSVenugopal Iyer * different TX rings. Note below bit operation
1020dc2366fSVenugopal Iyer * is needed in case hint is 0 to get the most
1030dc2366fSVenugopal Iyer * benefit from HASH_HINT() algorithm.
1040dc2366fSVenugopal Iyer */
1050dc2366fSVenugopal Iyer if (port->lp_tx_ring_cnt > 1) {
1060dc2366fSVenugopal Iyer if (hint == 0) {
107da14cebeSEric Cheng hash = (hash << 24 | hash << 16 | hash);
108da14cebeSEric Cheng hash = (hash << 32 | hash);
1090dc2366fSVenugopal Iyer } else {
1100dc2366fSVenugopal Iyer hash = hint;
1110dc2366fSVenugopal Iyer }
1120dc2366fSVenugopal Iyer hash = HASH_HINT(hash);
1130dc2366fSVenugopal Iyer *rh = port->lp_pseudo_tx_rings[hash % port->lp_tx_ring_cnt];
1140dc2366fSVenugopal Iyer } else {
1150dc2366fSVenugopal Iyer *rh = port->lp_pseudo_tx_rings[0];
1160dc2366fSVenugopal Iyer }
1170dc2366fSVenugopal Iyer rw_exit(&grp->lg_tx_lock);
118da14cebeSEric Cheng
1190dc2366fSVenugopal Iyer return (mp);
1200dc2366fSVenugopal Iyer }
121da14cebeSEric Cheng
1220dc2366fSVenugopal Iyer /*
1230dc2366fSVenugopal Iyer * aggr_tx_notify_thread:
1240dc2366fSVenugopal Iyer *
1250dc2366fSVenugopal Iyer * aggr_tx_ring_update() callback function wakes up this thread when
1260dc2366fSVenugopal Iyer * it gets called. This thread will call mac_tx_ring_update() to
1270dc2366fSVenugopal Iyer * notify upper mac of flow control getting relieved. Note that
1280dc2366fSVenugopal Iyer * aggr_tx_ring_update() cannot call mac_tx_ring_update() directly
1290dc2366fSVenugopal Iyer * because aggr_tx_ring_update() is called from lower mac with
1300dc2366fSVenugopal Iyer * mi_rw_lock held.
1310dc2366fSVenugopal Iyer */
1320dc2366fSVenugopal Iyer void
aggr_tx_notify_thread(void * arg)1330dc2366fSVenugopal Iyer aggr_tx_notify_thread(void *arg)
1340dc2366fSVenugopal Iyer {
1350dc2366fSVenugopal Iyer callb_cpr_t cprinfo;
1360dc2366fSVenugopal Iyer aggr_grp_t *grp = (aggr_grp_t *)arg;
1370dc2366fSVenugopal Iyer mac_ring_handle_t pseudo_mrh;
1380dc2366fSVenugopal Iyer
1390dc2366fSVenugopal Iyer CALLB_CPR_INIT(&cprinfo, &grp->lg_tx_flowctl_lock, callb_generic_cpr,
1400dc2366fSVenugopal Iyer "aggr_tx_notify_thread");
1410dc2366fSVenugopal Iyer
1420dc2366fSVenugopal Iyer mutex_enter(&grp->lg_tx_flowctl_lock);
1430dc2366fSVenugopal Iyer while (!grp->lg_tx_notify_done) {
1440dc2366fSVenugopal Iyer if ((grp->lg_tx_blocked_cnt) == 0) {
1450dc2366fSVenugopal Iyer CALLB_CPR_SAFE_BEGIN(&cprinfo);
1460dc2366fSVenugopal Iyer cv_wait(&grp->lg_tx_flowctl_cv,
1470dc2366fSVenugopal Iyer &grp->lg_tx_flowctl_lock);
1480dc2366fSVenugopal Iyer CALLB_CPR_SAFE_END(&cprinfo, &grp->lg_tx_flowctl_lock);
1490dc2366fSVenugopal Iyer continue;
1507c478bd9Sstevel@tonic-gate }
1510dc2366fSVenugopal Iyer while (grp->lg_tx_blocked_cnt != 0) {
1520dc2366fSVenugopal Iyer grp->lg_tx_blocked_cnt--;
1530dc2366fSVenugopal Iyer pseudo_mrh =
1540dc2366fSVenugopal Iyer grp->lg_tx_blocked_rings[grp->lg_tx_blocked_cnt];
1550dc2366fSVenugopal Iyer mutex_exit(&grp->lg_tx_flowctl_lock);
1560dc2366fSVenugopal Iyer mac_tx_ring_update(grp->lg_mh, pseudo_mrh);
1570dc2366fSVenugopal Iyer mutex_enter(&grp->lg_tx_flowctl_lock);
1580dc2366fSVenugopal Iyer }
1590dc2366fSVenugopal Iyer }
1600dc2366fSVenugopal Iyer /*
1610dc2366fSVenugopal Iyer * The grp is being destroyed, exit the thread.
1620dc2366fSVenugopal Iyer */
1630dc2366fSVenugopal Iyer grp->lg_tx_notify_thread = NULL;
1640dc2366fSVenugopal Iyer CALLB_CPR_EXIT(&cprinfo);
1650dc2366fSVenugopal Iyer thread_exit();
1660dc2366fSVenugopal Iyer }
1677c478bd9Sstevel@tonic-gate
1680dc2366fSVenugopal Iyer /*
1690dc2366fSVenugopal Iyer * Callback function registered with lower mac to receive wakeups from
1700dc2366fSVenugopal Iyer * drivers when flow control is relieved (i.e. Tx descriptors are
1710dc2366fSVenugopal Iyer * available).
1720dc2366fSVenugopal Iyer */
1730dc2366fSVenugopal Iyer void
aggr_tx_ring_update(void * arg1,uintptr_t arg2)1740dc2366fSVenugopal Iyer aggr_tx_ring_update(void *arg1, uintptr_t arg2)
1750dc2366fSVenugopal Iyer {
1760dc2366fSVenugopal Iyer aggr_port_t *port = (aggr_port_t *)arg1;
1770dc2366fSVenugopal Iyer mac_ring_handle_t mrh = (mac_ring_handle_t)arg2;
1780dc2366fSVenugopal Iyer mac_ring_handle_t pseudo_mrh;
1790dc2366fSVenugopal Iyer aggr_grp_t *grp = port->lp_grp;
1800dc2366fSVenugopal Iyer int i = 0;
1810dc2366fSVenugopal Iyer
1820dc2366fSVenugopal Iyer if (mrh == NULL) {
1830dc2366fSVenugopal Iyer /*
1840dc2366fSVenugopal Iyer * If the underlying NIC does not expose TX rings,
1850dc2366fSVenugopal Iyer * still as pseudo TX ring is presented to the
1860dc2366fSVenugopal Iyer * aggr mac.
1870dc2366fSVenugopal Iyer */
1880dc2366fSVenugopal Iyer pseudo_mrh = port->lp_pseudo_tx_rings[0];
1890dc2366fSVenugopal Iyer } else {
1900dc2366fSVenugopal Iyer for (i = 0; i < port->lp_tx_ring_cnt; i++) {
1910dc2366fSVenugopal Iyer if (port->lp_tx_rings[i] == mrh)
1920dc2366fSVenugopal Iyer break;
1930dc2366fSVenugopal Iyer }
1940dc2366fSVenugopal Iyer ASSERT(i < port->lp_tx_ring_cnt);
1950dc2366fSVenugopal Iyer pseudo_mrh = port->lp_pseudo_tx_rings[i];
1967c478bd9Sstevel@tonic-gate }
1970dc2366fSVenugopal Iyer mutex_enter(&grp->lg_tx_flowctl_lock);
1980dc2366fSVenugopal Iyer /*
1990dc2366fSVenugopal Iyer * It could be possible that some (broken?) device driver
2000dc2366fSVenugopal Iyer * could send more than one wakeup on the same ring. In
2010dc2366fSVenugopal Iyer * such a case, multiple instances of the same pseudo TX
2020dc2366fSVenugopal Iyer * ring should not be saved in lg_tx_blocked_rings[]
2030dc2366fSVenugopal Iyer * array. So first check if woken up ring (pseudo_mrh) is
2040dc2366fSVenugopal Iyer * already in the lg_tx_blocked_rings[] array.
2050dc2366fSVenugopal Iyer */
2060dc2366fSVenugopal Iyer for (i = 0; i < grp->lg_tx_blocked_cnt; i++) {
2070dc2366fSVenugopal Iyer if (grp->lg_tx_blocked_rings[i] == pseudo_mrh) {
2080dc2366fSVenugopal Iyer mutex_exit(&grp->lg_tx_flowctl_lock);
2090dc2366fSVenugopal Iyer return;
2100dc2366fSVenugopal Iyer }
2110dc2366fSVenugopal Iyer }
2120dc2366fSVenugopal Iyer /* A distinct mac_ring_handle. Save and increment count */
2130dc2366fSVenugopal Iyer grp->lg_tx_blocked_rings[grp->lg_tx_blocked_cnt] = pseudo_mrh;
2140dc2366fSVenugopal Iyer grp->lg_tx_blocked_cnt++;
2150dc2366fSVenugopal Iyer cv_signal(&grp->lg_tx_flowctl_cv);
2160dc2366fSVenugopal Iyer mutex_exit(&grp->lg_tx_flowctl_lock);
2170dc2366fSVenugopal Iyer }
2180dc2366fSVenugopal Iyer
2190dc2366fSVenugopal Iyer /*
2200dc2366fSVenugopal Iyer * Send function invoked by the MAC service module.
2210dc2366fSVenugopal Iyer */
2220dc2366fSVenugopal Iyer mblk_t *
aggr_ring_tx(void * arg,mblk_t * mp)2230dc2366fSVenugopal Iyer aggr_ring_tx(void *arg, mblk_t *mp)
2240dc2366fSVenugopal Iyer {
2250dc2366fSVenugopal Iyer aggr_pseudo_tx_ring_t *pseudo_ring = (aggr_pseudo_tx_ring_t *)arg;
2260dc2366fSVenugopal Iyer aggr_port_t *port = pseudo_ring->atr_port;
2270dc2366fSVenugopal Iyer
2280dc2366fSVenugopal Iyer return (mac_hwring_send_priv(port->lp_mch, pseudo_ring->atr_hw_rh, mp));
2297c478bd9Sstevel@tonic-gate }
2307c478bd9Sstevel@tonic-gate
2317c478bd9Sstevel@tonic-gate /*
2327c478bd9Sstevel@tonic-gate * Enable sending on the specified port.
2337c478bd9Sstevel@tonic-gate */
2347c478bd9Sstevel@tonic-gate void
aggr_send_port_enable(aggr_port_t * port)2357c478bd9Sstevel@tonic-gate aggr_send_port_enable(aggr_port_t *port)
2367c478bd9Sstevel@tonic-gate {
2377c478bd9Sstevel@tonic-gate aggr_grp_t *grp = port->lp_grp;
2387c478bd9Sstevel@tonic-gate
239da14cebeSEric Cheng ASSERT(MAC_PERIM_HELD(grp->lg_mh));
240da14cebeSEric Cheng
2417c478bd9Sstevel@tonic-gate if (port->lp_tx_enabled || (port->lp_state !=
2427c478bd9Sstevel@tonic-gate AGGR_PORT_STATE_ATTACHED)) {
2437c478bd9Sstevel@tonic-gate /* already enabled or port not yet attached */
2447c478bd9Sstevel@tonic-gate return;
2457c478bd9Sstevel@tonic-gate }
2467c478bd9Sstevel@tonic-gate
2477c478bd9Sstevel@tonic-gate /*
2487c478bd9Sstevel@tonic-gate * Add to group's array of tx ports.
2497c478bd9Sstevel@tonic-gate */
250da14cebeSEric Cheng rw_enter(&grp->lg_tx_lock, RW_WRITER);
2517c478bd9Sstevel@tonic-gate if (grp->lg_tx_ports_size < grp->lg_ntx_ports+1) {
2527c478bd9Sstevel@tonic-gate /* current array too small */
2537c478bd9Sstevel@tonic-gate aggr_port_t **new_ports;
2547c478bd9Sstevel@tonic-gate uint_t new_size;
2557c478bd9Sstevel@tonic-gate
2567c478bd9Sstevel@tonic-gate new_size = grp->lg_ntx_ports+1;
2577c478bd9Sstevel@tonic-gate new_ports = kmem_zalloc(new_size * sizeof (aggr_port_t *),
2587c478bd9Sstevel@tonic-gate KM_SLEEP);
2597c478bd9Sstevel@tonic-gate
2607c478bd9Sstevel@tonic-gate if (grp->lg_tx_ports_size > 0) {
2617c478bd9Sstevel@tonic-gate ASSERT(grp->lg_tx_ports != NULL);
2627c478bd9Sstevel@tonic-gate bcopy(grp->lg_tx_ports, new_ports,
2637c478bd9Sstevel@tonic-gate grp->lg_ntx_ports * sizeof (aggr_port_t *));
2647c478bd9Sstevel@tonic-gate kmem_free(grp->lg_tx_ports,
2657c478bd9Sstevel@tonic-gate grp->lg_tx_ports_size * sizeof (aggr_port_t *));
2667c478bd9Sstevel@tonic-gate }
2677c478bd9Sstevel@tonic-gate
2687c478bd9Sstevel@tonic-gate grp->lg_tx_ports = new_ports;
2697c478bd9Sstevel@tonic-gate grp->lg_tx_ports_size = new_size;
2707c478bd9Sstevel@tonic-gate }
2717c478bd9Sstevel@tonic-gate
2727c478bd9Sstevel@tonic-gate grp->lg_tx_ports[grp->lg_ntx_ports++] = port;
2737c478bd9Sstevel@tonic-gate port->lp_tx_idx = grp->lg_ntx_ports-1;
274da14cebeSEric Cheng rw_exit(&grp->lg_tx_lock);
2757c478bd9Sstevel@tonic-gate
2767c478bd9Sstevel@tonic-gate port->lp_tx_enabled = B_TRUE;
277*09b7f21aSRobert Mustacchi
278*09b7f21aSRobert Mustacchi aggr_grp_update_default(grp);
2797c478bd9Sstevel@tonic-gate }
2807c478bd9Sstevel@tonic-gate
2817c478bd9Sstevel@tonic-gate /*
2827c478bd9Sstevel@tonic-gate * Disable sending from the specified port.
2837c478bd9Sstevel@tonic-gate */
2847c478bd9Sstevel@tonic-gate void
aggr_send_port_disable(aggr_port_t * port)2857c478bd9Sstevel@tonic-gate aggr_send_port_disable(aggr_port_t *port)
2867c478bd9Sstevel@tonic-gate {
2877c478bd9Sstevel@tonic-gate uint_t idx, ntx;
2887c478bd9Sstevel@tonic-gate aggr_grp_t *grp = port->lp_grp;
2897c478bd9Sstevel@tonic-gate
290da14cebeSEric Cheng ASSERT(MAC_PERIM_HELD(grp->lg_mh));
291da14cebeSEric Cheng ASSERT(MAC_PERIM_HELD(port->lp_mh));
2927c478bd9Sstevel@tonic-gate
2937c478bd9Sstevel@tonic-gate if (!port->lp_tx_enabled) {
2947c478bd9Sstevel@tonic-gate /* not yet enabled */
2957c478bd9Sstevel@tonic-gate return;
2967c478bd9Sstevel@tonic-gate }
2977c478bd9Sstevel@tonic-gate
298da14cebeSEric Cheng rw_enter(&grp->lg_tx_lock, RW_WRITER);
2997c478bd9Sstevel@tonic-gate idx = port->lp_tx_idx;
3007c478bd9Sstevel@tonic-gate ntx = grp->lg_ntx_ports;
3017c478bd9Sstevel@tonic-gate ASSERT(idx < ntx);
3027c478bd9Sstevel@tonic-gate
3037c478bd9Sstevel@tonic-gate /* remove from array of attached ports */
3047c478bd9Sstevel@tonic-gate if (idx == (ntx - 1)) {
3057c478bd9Sstevel@tonic-gate grp->lg_tx_ports[idx] = NULL;
3067c478bd9Sstevel@tonic-gate } else {
3077c478bd9Sstevel@tonic-gate /* not the last entry, replace with last one */
3087c478bd9Sstevel@tonic-gate aggr_port_t *victim;
3097c478bd9Sstevel@tonic-gate
3107c478bd9Sstevel@tonic-gate victim = grp->lg_tx_ports[ntx - 1];
3117c478bd9Sstevel@tonic-gate grp->lg_tx_ports[ntx - 1] = NULL;
3127c478bd9Sstevel@tonic-gate victim->lp_tx_idx = idx;
3137c478bd9Sstevel@tonic-gate grp->lg_tx_ports[idx] = victim;
3147c478bd9Sstevel@tonic-gate }
3157c478bd9Sstevel@tonic-gate
3167c478bd9Sstevel@tonic-gate port->lp_tx_idx = 0;
3177c478bd9Sstevel@tonic-gate grp->lg_ntx_ports--;
318da14cebeSEric Cheng rw_exit(&grp->lg_tx_lock);
3197c478bd9Sstevel@tonic-gate
3207c478bd9Sstevel@tonic-gate port->lp_tx_enabled = B_FALSE;
321*09b7f21aSRobert Mustacchi
322*09b7f21aSRobert Mustacchi aggr_grp_update_default(grp);
3237c478bd9Sstevel@tonic-gate }
324