xref: /illumos-gate/usr/src/uts/common/io/aggr/aggr_grp.c (revision 13810335)
17c478bd9Sstevel@tonic-gate /*
27c478bd9Sstevel@tonic-gate  * CDDL HEADER START
37c478bd9Sstevel@tonic-gate  *
47c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5f12af565Snd  * Common Development and Distribution License (the "License").
6f12af565Snd  * You may not use this file except in compliance with the License.
77c478bd9Sstevel@tonic-gate  *
87c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
97c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
107c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
117c478bd9Sstevel@tonic-gate  * and limitations under the License.
127c478bd9Sstevel@tonic-gate  *
137c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
147c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
157c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
167c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
177c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
187c478bd9Sstevel@tonic-gate  *
197c478bd9Sstevel@tonic-gate  * CDDL HEADER END
207c478bd9Sstevel@tonic-gate  */
217c478bd9Sstevel@tonic-gate /*
220591ddd0SPrakash Jalan  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
2345948e49SRyan Zezeski  * Copyright 2020 Joyent, Inc.
240a36db39SPaul Winder  * Copyright 2020 RackTop Systems, Inc.
257c478bd9Sstevel@tonic-gate  */
267c478bd9Sstevel@tonic-gate 
277c478bd9Sstevel@tonic-gate /*
287c478bd9Sstevel@tonic-gate  * IEEE 802.3ad Link Aggregation -- Link Aggregation Groups.
297c478bd9Sstevel@tonic-gate  *
307c478bd9Sstevel@tonic-gate  * An instance of the structure aggr_grp_t is allocated for each
317c478bd9Sstevel@tonic-gate  * link aggregation group. When created, aggr_grp_t objects are
32210db224Sericheng  * entered into the aggr_grp_hash hash table maintained by the modhash
33d62bc4baSyz  * module. The hash key is the linkid associated with the link
34d62bc4baSyz  * aggregation group.
357c478bd9Sstevel@tonic-gate  *
3645948e49SRyan Zezeski  * Each aggregation contains a set of ports. The port is represented
3745948e49SRyan Zezeski  * by the aggr_port_t structure. A port consists of a single MAC
3845948e49SRyan Zezeski  * client which has exclusive (MCIS_EXCLUSIVE) use of the underlying
3945948e49SRyan Zezeski  * MAC. This client is used by the aggr to send and receive LACP
4045948e49SRyan Zezeski  * traffic. Each port client takes on the same MAC unicast address --
4145948e49SRyan Zezeski  * the address of the aggregation itself (taken from the first port by
4245948e49SRyan Zezeski  * default).
430dc2366fSVenugopal Iyer  *
4445948e49SRyan Zezeski  * The MAC client that hangs off each aggr port is not your typical
4545948e49SRyan Zezeski  * MAC client. Not only does it have exclusive control of the MAC, but
4645948e49SRyan Zezeski  * it also has no Tx or Rx SRSes. An SRS is designed to queue and
4745948e49SRyan Zezeski  * fanout traffic among L4 protocols; but the aggr is an intermediary,
4845948e49SRyan Zezeski  * not a consumer. Instead of using SRSes, the aggr puts the
4945948e49SRyan Zezeski  * underlying hardware rings into passthru mode and ships packets up
5045948e49SRyan Zezeski  * via a direct call to aggr_recv_cb(). This allows aggr to enforce
5145948e49SRyan Zezeski  * LACP while passing all other traffic up to clients of the aggr.
5245948e49SRyan Zezeski  *
5345948e49SRyan Zezeski  * Pseudo Rx Groups and Rings
5445948e49SRyan Zezeski  * --------------------------
5545948e49SRyan Zezeski  *
5645948e49SRyan Zezeski  * It is imperative for client performance that the aggr provide as
5745948e49SRyan Zezeski  * many MAC groups as possible. In order to use the underlying HW
5845948e49SRyan Zezeski  * resources, aggr creates pseudo groups to aggregate the underlying
5945948e49SRyan Zezeski  * HW groups. Every HW group gets mapped to a pseudo group; and every
6045948e49SRyan Zezeski  * HW ring in that group gets mapped to a pseudo ring. The pseudo
6145948e49SRyan Zezeski  * group at index 0 combines all the HW groups at index 0 from each
6245948e49SRyan Zezeski  * port, etc. The aggr's MAC then creates normal MAC groups and rings
6345948e49SRyan Zezeski  * out of these pseudo groups and rings to present to the aggr's
6445948e49SRyan Zezeski  * clients. To the clients, the aggr's groups and rings are absolutely
6545948e49SRyan Zezeski  * no different than a NIC's groups or rings.
6645948e49SRyan Zezeski  *
6745948e49SRyan Zezeski  * Pseudo Tx Rings
6845948e49SRyan Zezeski  * ---------------
6945948e49SRyan Zezeski  *
7045948e49SRyan Zezeski  * The underlying ports (NICs) in an aggregation can have Tx rings. To
7145948e49SRyan Zezeski  * enhance aggr's performance, these Tx rings are made available to
7245948e49SRyan Zezeski  * the aggr layer as pseudo Tx rings. The concept of pseudo rings are
7345948e49SRyan Zezeski  * not new. They are already present and implemented on the Rx side.
7445948e49SRyan Zezeski  * The same concept is extended to the Tx side where each Tx ring of
7545948e49SRyan Zezeski  * an underlying port is reflected in aggr as a pseudo Tx ring. Thus
7645948e49SRyan Zezeski  * each pseudo Tx ring will map to a specific hardware Tx ring. Even
7745948e49SRyan Zezeski  * in the case of a NIC that does not have a Tx ring, a pseudo Tx ring
7845948e49SRyan Zezeski  * is given to the aggregation layer.
790dc2366fSVenugopal Iyer  *
800dc2366fSVenugopal Iyer  * With this change, the outgoing stack depth looks much better:
810dc2366fSVenugopal Iyer  *
820dc2366fSVenugopal Iyer  * mac_tx() -> mac_tx_aggr_mode() -> mac_tx_soft_ring_process() ->
830dc2366fSVenugopal Iyer  * mac_tx_send() -> aggr_ring_rx() -> <driver>_ring_tx()
840dc2366fSVenugopal Iyer  *
8545948e49SRyan Zezeski  * Two new modes are introduced to mac_tx() to handle aggr pseudo Tx rings:
860dc2366fSVenugopal Iyer  * SRS_TX_AGGR and SRS_TX_BW_AGGR.
870dc2366fSVenugopal Iyer  *
880dc2366fSVenugopal Iyer  * In SRS_TX_AGGR mode, mac_tx_aggr_mode() routine is called. This routine
8945948e49SRyan Zezeski  * invokes an aggr function, aggr_find_tx_ring(), to find a (pseudo) Tx
900dc2366fSVenugopal Iyer  * ring belonging to a port on which the packet has to be sent.
910dc2366fSVenugopal Iyer  * aggr_find_tx_ring() first finds the outgoing port based on L2/L3/L4
9245948e49SRyan Zezeski  * policy and then uses the fanout_hint passed to it to pick a Tx ring from
930dc2366fSVenugopal Iyer  * the selected port.
940dc2366fSVenugopal Iyer  *
950dc2366fSVenugopal Iyer  * In SRS_TX_BW_AGGR mode, mac_tx_bw_mode() function is called where
960dc2366fSVenugopal Iyer  * bandwidth limit is applied first on the outgoing packet and the packets
970dc2366fSVenugopal Iyer  * allowed to go out would call mac_tx_aggr_mode() to send the packet on a
9845948e49SRyan Zezeski  * particular Tx ring.
997c478bd9Sstevel@tonic-gate  */
1007c478bd9Sstevel@tonic-gate 
1017c478bd9Sstevel@tonic-gate #include <sys/types.h>
1027c478bd9Sstevel@tonic-gate #include <sys/sysmacros.h>
1037c478bd9Sstevel@tonic-gate #include <sys/conf.h>
1047c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h>
105da14cebeSEric Cheng #include <sys/disp.h>
1067c478bd9Sstevel@tonic-gate #include <sys/list.h>
1077c478bd9Sstevel@tonic-gate #include <sys/ksynch.h>
1087c478bd9Sstevel@tonic-gate #include <sys/kmem.h>
1097c478bd9Sstevel@tonic-gate #include <sys/stream.h>
1107c478bd9Sstevel@tonic-gate #include <sys/modctl.h>
1117c478bd9Sstevel@tonic-gate #include <sys/ddi.h>
1127c478bd9Sstevel@tonic-gate #include <sys/sunddi.h>
1137c478bd9Sstevel@tonic-gate #include <sys/atomic.h>
1147c478bd9Sstevel@tonic-gate #include <sys/stat.h>
115210db224Sericheng #include <sys/modhash.h>
116d62bc4baSyz #include <sys/id_space.h>
1177c478bd9Sstevel@tonic-gate #include <sys/strsun.h>
1182b24ab6bSSebastien Roy #include <sys/cred.h>
1197c478bd9Sstevel@tonic-gate #include <sys/dlpi.h>
1202b24ab6bSSebastien Roy #include <sys/zone.h>
121da14cebeSEric Cheng #include <sys/mac_provider.h>
122d62bc4baSyz #include <sys/dls.h>
123d62bc4baSyz #include <sys/vlan.h>
1247c478bd9Sstevel@tonic-gate #include <sys/aggr.h>
1257c478bd9Sstevel@tonic-gate #include <sys/aggr_impl.h>
1267c478bd9Sstevel@tonic-gate 
1277c478bd9Sstevel@tonic-gate static int aggr_m_start(void *);
1287c478bd9Sstevel@tonic-gate static void aggr_m_stop(void *);
1297c478bd9Sstevel@tonic-gate static int aggr_m_promisc(void *, boolean_t);
1307c478bd9Sstevel@tonic-gate static int aggr_m_multicst(void *, boolean_t, const uint8_t *);
1317c478bd9Sstevel@tonic-gate static int aggr_m_unicst(void *, const uint8_t *);
132ba2e4443Sseb static int aggr_m_stat(void *, uint_t, uint64_t *);
1337c478bd9Sstevel@tonic-gate static void aggr_m_ioctl(void *, queue_t *, mblk_t *);
134ba2e4443Sseb static boolean_t aggr_m_capab_get(void *, mac_capab_t, void *);
135986cab2cSGirish Moodalbail static int aggr_m_setprop(void *, const char *, mac_prop_id_t, uint_t,
136986cab2cSGirish Moodalbail     const void *);
1370dc2366fSVenugopal Iyer static void aggr_m_propinfo(void *, const char *, mac_prop_id_t,
1380dc2366fSVenugopal Iyer     mac_prop_info_handle_t);
139986cab2cSGirish Moodalbail 
140d62bc4baSyz static aggr_port_t *aggr_grp_port_lookup(aggr_grp_t *, datalink_id_t);
1414deae11aSyz static int aggr_grp_rem_port(aggr_grp_t *, aggr_port_t *, boolean_t *,
1424deae11aSyz     boolean_t *);
143d62bc4baSyz 
1447c478bd9Sstevel@tonic-gate static void aggr_grp_capab_set(aggr_grp_t *);
1457c478bd9Sstevel@tonic-gate static boolean_t aggr_grp_capab_check(aggr_grp_t *, aggr_port_t *);
146f4420ae7Snd static uint_t aggr_grp_max_sdu(aggr_grp_t *);
147d62bc4baSyz static uint32_t aggr_grp_max_margin(aggr_grp_t *);
148f4420ae7Snd static boolean_t aggr_grp_sdu_check(aggr_grp_t *, aggr_port_t *);
149d62bc4baSyz static boolean_t aggr_grp_margin_check(aggr_grp_t *, aggr_port_t *);
150da14cebeSEric Cheng 
151da14cebeSEric Cheng static int aggr_add_pseudo_rx_group(aggr_port_t *, aggr_pseudo_rx_group_t *);
152da14cebeSEric Cheng static void aggr_rem_pseudo_rx_group(aggr_port_t *, aggr_pseudo_rx_group_t *);
153da14cebeSEric Cheng static int aggr_pseudo_disable_intr(mac_intr_handle_t);
154da14cebeSEric Cheng static int aggr_pseudo_enable_intr(mac_intr_handle_t);
15545948e49SRyan Zezeski static int aggr_pseudo_start_rx_ring(mac_ring_driver_t, uint64_t);
15645948e49SRyan Zezeski static void aggr_pseudo_stop_rx_ring(mac_ring_driver_t);
157da14cebeSEric Cheng static int aggr_addmac(void *, const uint8_t *);
158da14cebeSEric Cheng static int aggr_remmac(void *, const uint8_t *);
15984de666eSRyan Zezeski static int aggr_addvlan(mac_group_driver_t, uint16_t);
16084de666eSRyan Zezeski static int aggr_remvlan(mac_group_driver_t, uint16_t);
161da14cebeSEric Cheng static mblk_t *aggr_rx_poll(void *, int);
162da14cebeSEric Cheng static void aggr_fill_ring(void *, mac_ring_type_t, const int,
163da14cebeSEric Cheng     const int, mac_ring_info_t *, mac_ring_handle_t);
164da14cebeSEric Cheng static void aggr_fill_group(void *, mac_ring_type_t, const int,
165da14cebeSEric Cheng     mac_group_info_t *, mac_group_handle_t);
1667c478bd9Sstevel@tonic-gate 
167210db224Sericheng static kmem_cache_t	*aggr_grp_cache;
168210db224Sericheng static mod_hash_t	*aggr_grp_hash;
169210db224Sericheng static krwlock_t	aggr_grp_lock;
170210db224Sericheng static uint_t		aggr_grp_cnt;
171d62bc4baSyz static id_space_t	*key_ids;
1727c478bd9Sstevel@tonic-gate 
1737c478bd9Sstevel@tonic-gate #define	GRP_HASHSZ		64
174d62bc4baSyz #define	GRP_HASH_KEY(linkid)	((mod_hash_key_t)(uintptr_t)linkid)
175da14cebeSEric Cheng #define	AGGR_PORT_NAME_DELIMIT '-'
1767c478bd9Sstevel@tonic-gate 
1777c478bd9Sstevel@tonic-gate static uchar_t aggr_zero_mac[] = {0, 0, 0, 0, 0, 0};
1787c478bd9Sstevel@tonic-gate 
179986cab2cSGirish Moodalbail #define	AGGR_M_CALLBACK_FLAGS	\
1800dc2366fSVenugopal Iyer 	(MC_IOCTL | MC_GETCAPAB | MC_SETPROP | MC_PROPINFO)
181ba2e4443Sseb 
182ba2e4443Sseb static mac_callbacks_t aggr_m_callbacks = {
183ba2e4443Sseb 	AGGR_M_CALLBACK_FLAGS,
184ba2e4443Sseb 	aggr_m_stat,
185ba2e4443Sseb 	aggr_m_start,
186ba2e4443Sseb 	aggr_m_stop,
187ba2e4443Sseb 	aggr_m_promisc,
188ba2e4443Sseb 	aggr_m_multicst,
189da14cebeSEric Cheng 	NULL,
1900dc2366fSVenugopal Iyer 	NULL,
1910dc2366fSVenugopal Iyer 	NULL,
192ba2e4443Sseb 	aggr_m_ioctl,
193986cab2cSGirish Moodalbail 	aggr_m_capab_get,
194986cab2cSGirish Moodalbail 	NULL,
195986cab2cSGirish Moodalbail 	NULL,
196986cab2cSGirish Moodalbail 	aggr_m_setprop,
1970dc2366fSVenugopal Iyer 	NULL,
1980dc2366fSVenugopal Iyer 	aggr_m_propinfo
199ba2e4443Sseb };
200ba2e4443Sseb 
2017c478bd9Sstevel@tonic-gate /*ARGSUSED*/
2027c478bd9Sstevel@tonic-gate static int
aggr_grp_constructor(void * buf,void * arg,int kmflag)2037c478bd9Sstevel@tonic-gate aggr_grp_constructor(void *buf, void *arg, int kmflag)
2047c478bd9Sstevel@tonic-gate {
2057c478bd9Sstevel@tonic-gate 	aggr_grp_t *grp = buf;
2067c478bd9Sstevel@tonic-gate 
2077c478bd9Sstevel@tonic-gate 	bzero(grp, sizeof (*grp));
208da14cebeSEric Cheng 	mutex_init(&grp->lg_lacp_lock, NULL, MUTEX_DEFAULT, NULL);
209da14cebeSEric Cheng 	cv_init(&grp->lg_lacp_cv, NULL, CV_DEFAULT, NULL);
210da14cebeSEric Cheng 	rw_init(&grp->lg_tx_lock, NULL, RW_DRIVER, NULL);
211da14cebeSEric Cheng 	mutex_init(&grp->lg_port_lock, NULL, MUTEX_DEFAULT, NULL);
212da14cebeSEric Cheng 	cv_init(&grp->lg_port_cv, NULL, CV_DEFAULT, NULL);
2130dc2366fSVenugopal Iyer 	mutex_init(&grp->lg_tx_flowctl_lock, NULL, MUTEX_DEFAULT, NULL);
2140dc2366fSVenugopal Iyer 	cv_init(&grp->lg_tx_flowctl_cv, NULL, CV_DEFAULT, NULL);
2157c478bd9Sstevel@tonic-gate 	grp->lg_link_state = LINK_STATE_UNKNOWN;
2167c478bd9Sstevel@tonic-gate 	return (0);
2177c478bd9Sstevel@tonic-gate }
2187c478bd9Sstevel@tonic-gate 
2197c478bd9Sstevel@tonic-gate /*ARGSUSED*/
2207c478bd9Sstevel@tonic-gate static void
aggr_grp_destructor(void * buf,void * arg)2217c478bd9Sstevel@tonic-gate aggr_grp_destructor(void *buf, void *arg)
2227c478bd9Sstevel@tonic-gate {
2237c478bd9Sstevel@tonic-gate 	aggr_grp_t *grp = buf;
2247c478bd9Sstevel@tonic-gate 
2257c478bd9Sstevel@tonic-gate 	if (grp->lg_tx_ports != NULL) {
2267c478bd9Sstevel@tonic-gate 		kmem_free(grp->lg_tx_ports,
2277c478bd9Sstevel@tonic-gate 		    grp->lg_tx_ports_size * sizeof (aggr_port_t *));
2287c478bd9Sstevel@tonic-gate 	}
2297c478bd9Sstevel@tonic-gate 
230da14cebeSEric Cheng 	mutex_destroy(&grp->lg_lacp_lock);
231da14cebeSEric Cheng 	cv_destroy(&grp->lg_lacp_cv);
232da14cebeSEric Cheng 	mutex_destroy(&grp->lg_port_lock);
233da14cebeSEric Cheng 	cv_destroy(&grp->lg_port_cv);
234da14cebeSEric Cheng 	rw_destroy(&grp->lg_tx_lock);
2350dc2366fSVenugopal Iyer 	mutex_destroy(&grp->lg_tx_flowctl_lock);
2360dc2366fSVenugopal Iyer 	cv_destroy(&grp->lg_tx_flowctl_cv);
2377c478bd9Sstevel@tonic-gate }
2387c478bd9Sstevel@tonic-gate 
2397c478bd9Sstevel@tonic-gate void
aggr_grp_init(void)2407c478bd9Sstevel@tonic-gate aggr_grp_init(void)
2417c478bd9Sstevel@tonic-gate {
2427c478bd9Sstevel@tonic-gate 	aggr_grp_cache = kmem_cache_create("aggr_grp_cache",
2437c478bd9Sstevel@tonic-gate 	    sizeof (aggr_grp_t), 0, aggr_grp_constructor,
2447c478bd9Sstevel@tonic-gate 	    aggr_grp_destructor, NULL, NULL, NULL, 0);
2457c478bd9Sstevel@tonic-gate 
246210db224Sericheng 	aggr_grp_hash = mod_hash_create_idhash("aggr_grp_hash",
247210db224Sericheng 	    GRP_HASHSZ, mod_hash_null_valdtor);
248210db224Sericheng 	rw_init(&aggr_grp_lock, NULL, RW_DEFAULT, NULL);
249210db224Sericheng 	aggr_grp_cnt = 0;
250d62bc4baSyz 
251d62bc4baSyz 	/*
252d62bc4baSyz 	 * Allocate an id space to manage key values (when key is not
253d62bc4baSyz 	 * specified). The range of the id space will be from
254d62bc4baSyz 	 * (AGGR_MAX_KEY + 1) to UINT16_MAX, because the LACP protocol
255d62bc4baSyz 	 * uses a 16-bit key.
256d62bc4baSyz 	 */
257d62bc4baSyz 	key_ids = id_space_create("aggr_key_ids", AGGR_MAX_KEY + 1, UINT16_MAX);
258d62bc4baSyz 	ASSERT(key_ids != NULL);
2597c478bd9Sstevel@tonic-gate }
2607c478bd9Sstevel@tonic-gate 
261c0192a57Sericheng void
aggr_grp_fini(void)2627c478bd9Sstevel@tonic-gate aggr_grp_fini(void)
2637c478bd9Sstevel@tonic-gate {
264d62bc4baSyz 	id_space_destroy(key_ids);
265210db224Sericheng 	rw_destroy(&aggr_grp_lock);
266210db224Sericheng 	mod_hash_destroy_idhash(aggr_grp_hash);
2677c478bd9Sstevel@tonic-gate 	kmem_cache_destroy(aggr_grp_cache);
2687c478bd9Sstevel@tonic-gate }
2697c478bd9Sstevel@tonic-gate 
270210db224Sericheng uint_t
aggr_grp_count(void)271210db224Sericheng aggr_grp_count(void)
272210db224Sericheng {
273210db224Sericheng 	uint_t	count;
274210db224Sericheng 
275210db224Sericheng 	rw_enter(&aggr_grp_lock, RW_READER);
276210db224Sericheng 	count = aggr_grp_cnt;
277210db224Sericheng 	rw_exit(&aggr_grp_lock);
278210db224Sericheng 	return (count);
279210db224Sericheng }
280210db224Sericheng 
281da14cebeSEric Cheng /*
282da14cebeSEric Cheng  * Since both aggr_port_notify_cb() and aggr_port_timer_thread() functions
283da14cebeSEric Cheng  * requires the mac perimeter, this function holds a reference of the aggr
284da14cebeSEric Cheng  * and aggr won't call mac_unregister() until this reference drops to 0.
285da14cebeSEric Cheng  */
286da14cebeSEric Cheng void
aggr_grp_port_hold(aggr_port_t * port)287da14cebeSEric Cheng aggr_grp_port_hold(aggr_port_t *port)
288da14cebeSEric Cheng {
289da14cebeSEric Cheng 	aggr_grp_t	*grp = port->lp_grp;
290da14cebeSEric Cheng 
291da14cebeSEric Cheng 	AGGR_PORT_REFHOLD(port);
292da14cebeSEric Cheng 	mutex_enter(&grp->lg_port_lock);
293da14cebeSEric Cheng 	grp->lg_port_ref++;
294da14cebeSEric Cheng 	mutex_exit(&grp->lg_port_lock);
295da14cebeSEric Cheng }
296da14cebeSEric Cheng 
297da14cebeSEric Cheng /*
298da14cebeSEric Cheng  * Release the reference of the grp and inform aggr_grp_delete() calling
299da14cebeSEric Cheng  * mac_unregister() is now safe.
300da14cebeSEric Cheng  */
301da14cebeSEric Cheng void
aggr_grp_port_rele(aggr_port_t * port)302da14cebeSEric Cheng aggr_grp_port_rele(aggr_port_t *port)
303da14cebeSEric Cheng {
304da14cebeSEric Cheng 	aggr_grp_t	*grp = port->lp_grp;
305da14cebeSEric Cheng 
306da14cebeSEric Cheng 	mutex_enter(&grp->lg_port_lock);
307da14cebeSEric Cheng 	if (--grp->lg_port_ref == 0)
308da14cebeSEric Cheng 		cv_signal(&grp->lg_port_cv);
309da14cebeSEric Cheng 	mutex_exit(&grp->lg_port_lock);
310da14cebeSEric Cheng 	AGGR_PORT_REFRELE(port);
311da14cebeSEric Cheng }
312da14cebeSEric Cheng 
313da14cebeSEric Cheng /*
314da14cebeSEric Cheng  * Wait for the port's lacp timer thread and the port's notification callback
315da14cebeSEric Cheng  * to exit.
316da14cebeSEric Cheng  */
317da14cebeSEric Cheng void
aggr_grp_port_wait(aggr_grp_t * grp)318da14cebeSEric Cheng aggr_grp_port_wait(aggr_grp_t *grp)
319da14cebeSEric Cheng {
320da14cebeSEric Cheng 	mutex_enter(&grp->lg_port_lock);
321da14cebeSEric Cheng 	if (grp->lg_port_ref != 0)
322da14cebeSEric Cheng 		cv_wait(&grp->lg_port_cv, &grp->lg_port_lock);
323da14cebeSEric Cheng 	mutex_exit(&grp->lg_port_lock);
324da14cebeSEric Cheng }
325da14cebeSEric Cheng 
3267c478bd9Sstevel@tonic-gate /*
3277c478bd9Sstevel@tonic-gate  * Attach a port to a link aggregation group.
3287c478bd9Sstevel@tonic-gate  *
3297c478bd9Sstevel@tonic-gate  * A port is attached to a link aggregation group once its speed
3307c478bd9Sstevel@tonic-gate  * and link state have been verified.
3317c478bd9Sstevel@tonic-gate  *
3327c478bd9Sstevel@tonic-gate  * Returns B_TRUE if the group link state or speed has changed. If
3337c478bd9Sstevel@tonic-gate  * it's the case, the caller must notify the MAC layer via a call
3347c478bd9Sstevel@tonic-gate  * to mac_link().
3357c478bd9Sstevel@tonic-gate  */
3367c478bd9Sstevel@tonic-gate boolean_t
aggr_grp_attach_port(aggr_grp_t * grp,aggr_port_t * port)3377c478bd9Sstevel@tonic-gate aggr_grp_attach_port(aggr_grp_t *grp, aggr_port_t *port)
3387c478bd9Sstevel@tonic-gate {
3394deae11aSyz 	boolean_t link_state_changed = B_FALSE;
3407c478bd9Sstevel@tonic-gate 
341da14cebeSEric Cheng 	ASSERT(MAC_PERIM_HELD(grp->lg_mh));
342da14cebeSEric Cheng 	ASSERT(MAC_PERIM_HELD(port->lp_mh));
3437c478bd9Sstevel@tonic-gate 
3447c478bd9Sstevel@tonic-gate 	if (port->lp_state == AGGR_PORT_STATE_ATTACHED)
3457c478bd9Sstevel@tonic-gate 		return (B_FALSE);
3467c478bd9Sstevel@tonic-gate 
3477c478bd9Sstevel@tonic-gate 	/*
3487c478bd9Sstevel@tonic-gate 	 * Validate the MAC port link speed and update the group
3497c478bd9Sstevel@tonic-gate 	 * link speed if needed.
3507c478bd9Sstevel@tonic-gate 	 */
3517c478bd9Sstevel@tonic-gate 	if (port->lp_ifspeed == 0 ||
3527c478bd9Sstevel@tonic-gate 	    port->lp_link_state != LINK_STATE_UP ||
3537c478bd9Sstevel@tonic-gate 	    port->lp_link_duplex != LINK_DUPLEX_FULL) {
3547c478bd9Sstevel@tonic-gate 		/*
3557c478bd9Sstevel@tonic-gate 		 * Can't attach a MAC port with unknown link speed,
3567c478bd9Sstevel@tonic-gate 		 * down link, or not in full duplex mode.
3577c478bd9Sstevel@tonic-gate 		 */
3587c478bd9Sstevel@tonic-gate 		return (B_FALSE);
3597c478bd9Sstevel@tonic-gate 	}
3607c478bd9Sstevel@tonic-gate 
36184de666eSRyan Zezeski 	mutex_enter(&grp->lg_stat_lock);
3627c478bd9Sstevel@tonic-gate 	if (grp->lg_ifspeed == 0) {
3637c478bd9Sstevel@tonic-gate 		/*
3647c478bd9Sstevel@tonic-gate 		 * The group inherits the speed of the first link being
3657c478bd9Sstevel@tonic-gate 		 * attached.
3667c478bd9Sstevel@tonic-gate 		 */
3677c478bd9Sstevel@tonic-gate 		grp->lg_ifspeed = port->lp_ifspeed;
3684deae11aSyz 		link_state_changed = B_TRUE;
3697c478bd9Sstevel@tonic-gate 	} else if (grp->lg_ifspeed != port->lp_ifspeed) {
3707c478bd9Sstevel@tonic-gate 		/*
3717c478bd9Sstevel@tonic-gate 		 * The link speed of the MAC port must be the same as
3727c478bd9Sstevel@tonic-gate 		 * the group link speed, as per 802.3ad. Since it is
3737c478bd9Sstevel@tonic-gate 		 * not, the attach is cancelled.
3747c478bd9Sstevel@tonic-gate 		 */
37584de666eSRyan Zezeski 		mutex_exit(&grp->lg_stat_lock);
3767c478bd9Sstevel@tonic-gate 		return (B_FALSE);
3777c478bd9Sstevel@tonic-gate 	}
37884de666eSRyan Zezeski 	mutex_exit(&grp->lg_stat_lock);
3797c478bd9Sstevel@tonic-gate 
3807c478bd9Sstevel@tonic-gate 	grp->lg_nattached_ports++;
3817c478bd9Sstevel@tonic-gate 
3827c478bd9Sstevel@tonic-gate 	/*
3837c478bd9Sstevel@tonic-gate 	 * Update the group link state.
3847c478bd9Sstevel@tonic-gate 	 */
3857c478bd9Sstevel@tonic-gate 	if (grp->lg_link_state != LINK_STATE_UP) {
3867c478bd9Sstevel@tonic-gate 		grp->lg_link_state = LINK_STATE_UP;
38784de666eSRyan Zezeski 		mutex_enter(&grp->lg_stat_lock);
3887c478bd9Sstevel@tonic-gate 		grp->lg_link_duplex = LINK_DUPLEX_FULL;
38984de666eSRyan Zezeski 		mutex_exit(&grp->lg_stat_lock);
3904deae11aSyz 		link_state_changed = B_TRUE;
3917c478bd9Sstevel@tonic-gate 	}
3927c478bd9Sstevel@tonic-gate 
3937c478bd9Sstevel@tonic-gate 	/*
3947c478bd9Sstevel@tonic-gate 	 * Update port's state.
3957c478bd9Sstevel@tonic-gate 	 */
3967c478bd9Sstevel@tonic-gate 	port->lp_state = AGGR_PORT_STATE_ATTACHED;
3977c478bd9Sstevel@tonic-gate 
398ae6aa22aSVenugopal Iyer 	aggr_grp_multicst_port(port, B_TRUE);
399ae6aa22aSVenugopal Iyer 
400490ed22dSyz 	/*
40145948e49SRyan Zezeski 	 * The port client doesn't have an Rx SRS; instead of calling
40245948e49SRyan Zezeski 	 * mac_rx_set() we set the client's flow callback directly.
40345948e49SRyan Zezeski 	 * This datapath is used only when the port's driver doesn't
40445948e49SRyan Zezeski 	 * support MAC_CAPAB_RINGS. Drivers with ring support will
40545948e49SRyan Zezeski 	 * deliver traffic to the aggr via ring passthru.
406490ed22dSyz 	 */
40745948e49SRyan Zezeski 	mac_client_set_flow_cb(port->lp_mch, aggr_recv_cb, port);
408490ed22dSyz 
4097c478bd9Sstevel@tonic-gate 	/*
4107c478bd9Sstevel@tonic-gate 	 * If LACP is OFF, the port can be used to send data as soon
4117c478bd9Sstevel@tonic-gate 	 * as its link is up and verified to be compatible with the
4127c478bd9Sstevel@tonic-gate 	 * aggregation.
4137c478bd9Sstevel@tonic-gate 	 *
4147c478bd9Sstevel@tonic-gate 	 * If LACP is active or passive, notify the LACP subsystem, which
4157c478bd9Sstevel@tonic-gate 	 * will enable sending on the port following the LACP protocol.
4167c478bd9Sstevel@tonic-gate 	 */
4177c478bd9Sstevel@tonic-gate 	if (grp->lg_lacp_mode == AGGR_LACP_OFF)
4187c478bd9Sstevel@tonic-gate 		aggr_send_port_enable(port);
4197c478bd9Sstevel@tonic-gate 	else
4207c478bd9Sstevel@tonic-gate 		aggr_lacp_port_attached(port);
4217c478bd9Sstevel@tonic-gate 
4224deae11aSyz 	return (link_state_changed);
4237c478bd9Sstevel@tonic-gate }
4247c478bd9Sstevel@tonic-gate 
4257c478bd9Sstevel@tonic-gate boolean_t
aggr_grp_detach_port(aggr_grp_t * grp,aggr_port_t * port)426da14cebeSEric Cheng aggr_grp_detach_port(aggr_grp_t *grp, aggr_port_t *port)
4277c478bd9Sstevel@tonic-gate {
4284deae11aSyz 	boolean_t link_state_changed = B_FALSE;
4297c478bd9Sstevel@tonic-gate 
430da14cebeSEric Cheng 	ASSERT(MAC_PERIM_HELD(grp->lg_mh));
431da14cebeSEric Cheng 	ASSERT(MAC_PERIM_HELD(port->lp_mh));
4327c478bd9Sstevel@tonic-gate 
433da14cebeSEric Cheng 	/* update state */
4347c478bd9Sstevel@tonic-gate 	if (port->lp_state != AGGR_PORT_STATE_ATTACHED)
4357c478bd9Sstevel@tonic-gate 		return (B_FALSE);
436490ed22dSyz 
43745948e49SRyan Zezeski 	mac_client_clear_flow_cb(port->lp_mch);
4387c478bd9Sstevel@tonic-gate 
4397c478bd9Sstevel@tonic-gate 	aggr_grp_multicst_port(port, B_FALSE);
4407c478bd9Sstevel@tonic-gate 
4417c478bd9Sstevel@tonic-gate 	if (grp->lg_lacp_mode == AGGR_LACP_OFF)
4427c478bd9Sstevel@tonic-gate 		aggr_send_port_disable(port);
443da14cebeSEric Cheng 	else
4447c478bd9Sstevel@tonic-gate 		aggr_lacp_port_detached(port);
4457c478bd9Sstevel@tonic-gate 
44695c1c84bSRamesh Kumar Katla 	port->lp_state = AGGR_PORT_STATE_STANDBY;
447da14cebeSEric Cheng 
4487c478bd9Sstevel@tonic-gate 	grp->lg_nattached_ports--;
4497c478bd9Sstevel@tonic-gate 	if (grp->lg_nattached_ports == 0) {
4507c478bd9Sstevel@tonic-gate 		/* the last attached MAC port of the group is being detached */
4517c478bd9Sstevel@tonic-gate 		grp->lg_link_state = LINK_STATE_DOWN;
45284de666eSRyan Zezeski 		mutex_enter(&grp->lg_stat_lock);
45384de666eSRyan Zezeski 		grp->lg_ifspeed = 0;
4547c478bd9Sstevel@tonic-gate 		grp->lg_link_duplex = LINK_DUPLEX_UNKNOWN;
45584de666eSRyan Zezeski 		mutex_exit(&grp->lg_stat_lock);
4564deae11aSyz 		link_state_changed = B_TRUE;
4577c478bd9Sstevel@tonic-gate 	}
4587c478bd9Sstevel@tonic-gate 
4594deae11aSyz 	return (link_state_changed);
4607c478bd9Sstevel@tonic-gate }
4617c478bd9Sstevel@tonic-gate 
4627c478bd9Sstevel@tonic-gate /*
4637c478bd9Sstevel@tonic-gate  * Update the MAC addresses of the constituent ports of the specified
4647c478bd9Sstevel@tonic-gate  * group. This function is invoked:
4657c478bd9Sstevel@tonic-gate  * - after creating a new aggregation group.
4667c478bd9Sstevel@tonic-gate  * - after adding new ports to an aggregation group.
4677c478bd9Sstevel@tonic-gate  * - after removing a port from a group when the MAC address of
4687c478bd9Sstevel@tonic-gate  *   that port was used for the MAC address of the group.
4697c478bd9Sstevel@tonic-gate  * - after the MAC address of a port changed when the MAC address
4707c478bd9Sstevel@tonic-gate  *   of that port was used for the MAC address of the group.
4714deae11aSyz  *
4724deae11aSyz  * Return true if the link state of the aggregation changed, for example
4734deae11aSyz  * as a result of a failure changing the MAC address of one of the
4744deae11aSyz  * constituent ports.
4757c478bd9Sstevel@tonic-gate  */
4764deae11aSyz boolean_t
aggr_grp_update_ports_mac(aggr_grp_t * grp)4777c478bd9Sstevel@tonic-gate aggr_grp_update_ports_mac(aggr_grp_t *grp)
4787c478bd9Sstevel@tonic-gate {
4797c478bd9Sstevel@tonic-gate 	aggr_port_t *cport;
4804deae11aSyz 	boolean_t link_state_changed = B_FALSE;
481da14cebeSEric Cheng 	mac_perim_handle_t mph;
4827c478bd9Sstevel@tonic-gate 
483da14cebeSEric Cheng 	ASSERT(MAC_PERIM_HELD(grp->lg_mh));
4844deae11aSyz 
4857c478bd9Sstevel@tonic-gate 	for (cport = grp->lg_ports; cport != NULL;
4867c478bd9Sstevel@tonic-gate 	    cport = cport->lp_next) {
487da14cebeSEric Cheng 		mac_perim_enter_by_mh(cport->lp_mh, &mph);
488da14cebeSEric Cheng 		if (aggr_port_unicst(cport) != 0) {
489da14cebeSEric Cheng 			if (aggr_grp_detach_port(grp, cport))
490392b1d6eSyz 				link_state_changed = B_TRUE;
4914deae11aSyz 		} else {
4924deae11aSyz 			/*
4934deae11aSyz 			 * If a port was detached because of a previous
4944deae11aSyz 			 * failure changing the MAC address, the port is
4954deae11aSyz 			 * reattached when it successfully changes the MAC
4964deae11aSyz 			 * address now, and this might cause the link state
4974deae11aSyz 			 * of the aggregation to change.
4984deae11aSyz 			 */
499392b1d6eSyz 			if (aggr_grp_attach_port(grp, cport))
500392b1d6eSyz 				link_state_changed = B_TRUE;
5014deae11aSyz 		}
502da14cebeSEric Cheng 		mac_perim_exit(mph);
5037c478bd9Sstevel@tonic-gate 	}
5044deae11aSyz 	return (link_state_changed);
5057c478bd9Sstevel@tonic-gate }
5067c478bd9Sstevel@tonic-gate 
5077c478bd9Sstevel@tonic-gate /*
5087c478bd9Sstevel@tonic-gate  * Invoked when the MAC address of a port has changed. If the port's
5094deae11aSyz  * MAC address was used for the group MAC address, set mac_addr_changedp
5104deae11aSyz  * to B_TRUE to indicate to the caller that it should send a MAC_NOTE_UNICST
5114deae11aSyz  * notification. If the link state changes due to detach/attach of
5124deae11aSyz  * the constituent port, set link_state_changedp to B_TRUE to indicate
5134deae11aSyz  * to the caller that it should send a MAC_NOTE_LINK notification. In both
5144deae11aSyz  * cases, it is the responsibility of the caller to invoke notification
5154deae11aSyz  * functions after releasing the the port lock.
5167c478bd9Sstevel@tonic-gate  */
5174deae11aSyz void
aggr_grp_port_mac_changed(aggr_grp_t * grp,aggr_port_t * port,boolean_t * mac_addr_changedp,boolean_t * link_state_changedp)5184deae11aSyz aggr_grp_port_mac_changed(aggr_grp_t *grp, aggr_port_t *port,
5194deae11aSyz     boolean_t *mac_addr_changedp, boolean_t *link_state_changedp)
5207c478bd9Sstevel@tonic-gate {
521da14cebeSEric Cheng 	ASSERT(MAC_PERIM_HELD(grp->lg_mh));
522da14cebeSEric Cheng 	ASSERT(MAC_PERIM_HELD(port->lp_mh));
5234deae11aSyz 	ASSERT(mac_addr_changedp != NULL);
5244deae11aSyz 	ASSERT(link_state_changedp != NULL);
5254deae11aSyz 
5264deae11aSyz 	*mac_addr_changedp = B_FALSE;
5274deae11aSyz 	*link_state_changedp = B_FALSE;
5287c478bd9Sstevel@tonic-gate 
5297c478bd9Sstevel@tonic-gate 	if (grp->lg_addr_fixed) {
5307c478bd9Sstevel@tonic-gate 		/*
5317c478bd9Sstevel@tonic-gate 		 * The group is using a fixed MAC address or an automatic
5327c478bd9Sstevel@tonic-gate 		 * MAC address has not been set.
5337c478bd9Sstevel@tonic-gate 		 */
5344deae11aSyz 		return;
5357c478bd9Sstevel@tonic-gate 	}
5367c478bd9Sstevel@tonic-gate 
5377c478bd9Sstevel@tonic-gate 	if (grp->lg_mac_addr_port == port) {
5387c478bd9Sstevel@tonic-gate 		/*
5397c478bd9Sstevel@tonic-gate 		 * The MAC address of the port was assigned to the group
5407c478bd9Sstevel@tonic-gate 		 * MAC address. Update the group MAC address.
5417c478bd9Sstevel@tonic-gate 		 */
5427c478bd9Sstevel@tonic-gate 		bcopy(port->lp_addr, grp->lg_addr, ETHERADDRL);
5434deae11aSyz 		*mac_addr_changedp = B_TRUE;
5447c478bd9Sstevel@tonic-gate 	} else {
5457c478bd9Sstevel@tonic-gate 		/*
5467c478bd9Sstevel@tonic-gate 		 * Update the actual port MAC address to the MAC address
5477c478bd9Sstevel@tonic-gate 		 * of the group.
5487c478bd9Sstevel@tonic-gate 		 */
549da14cebeSEric Cheng 		if (aggr_port_unicst(port) != 0) {
550da14cebeSEric Cheng 			*link_state_changedp = aggr_grp_detach_port(grp, port);
5514deae11aSyz 		} else {
5524deae11aSyz 			/*
5534deae11aSyz 			 * If a port was detached because of a previous
5544deae11aSyz 			 * failure changing the MAC address, the port is
5554deae11aSyz 			 * reattached when it successfully changes the MAC
5564deae11aSyz 			 * address now, and this might cause the link state
5574deae11aSyz 			 * of the aggregation to change.
5584deae11aSyz 			 */
5594deae11aSyz 			*link_state_changedp = aggr_grp_attach_port(grp, port);
5604deae11aSyz 		}
5617c478bd9Sstevel@tonic-gate 	}
5627c478bd9Sstevel@tonic-gate }
5637c478bd9Sstevel@tonic-gate 
5647c478bd9Sstevel@tonic-gate /*
5657c478bd9Sstevel@tonic-gate  * Add a port to a link aggregation group.
5667c478bd9Sstevel@tonic-gate  */
5677c478bd9Sstevel@tonic-gate static int
aggr_grp_add_port(aggr_grp_t * grp,datalink_id_t port_linkid,boolean_t force,aggr_port_t ** pp)568da14cebeSEric Cheng aggr_grp_add_port(aggr_grp_t *grp, datalink_id_t port_linkid, boolean_t force,
569d62bc4baSyz     aggr_port_t **pp)
5707c478bd9Sstevel@tonic-gate {
5717c478bd9Sstevel@tonic-gate 	aggr_port_t *port, **cport;
572da14cebeSEric Cheng 	mac_perim_handle_t mph;
5732b24ab6bSSebastien Roy 	zoneid_t port_zoneid = ALL_ZONES;
5747c478bd9Sstevel@tonic-gate 	int err;
5757c478bd9Sstevel@tonic-gate 
57645948e49SRyan Zezeski 	/* The port must be in the same zone as the aggregation. */
5772b24ab6bSSebastien Roy 	if (zone_check_datalink(&port_zoneid, port_linkid) != 0)
5782b24ab6bSSebastien Roy 		port_zoneid = GLOBAL_ZONEID;
5792b24ab6bSSebastien Roy 	if (grp->lg_zoneid != port_zoneid)
5802b24ab6bSSebastien Roy 		return (EBUSY);
5812b24ab6bSSebastien Roy 
582da14cebeSEric Cheng 	/*
58345948e49SRyan Zezeski 	 * If we are creating the aggr, then there is no MAC handle
58445948e49SRyan Zezeski 	 * and thus no perimeter to hold. If we are adding a port to
58545948e49SRyan Zezeski 	 * an existing aggr, then the perimiter of the aggr's MAC must
58645948e49SRyan Zezeski 	 * be held.
587da14cebeSEric Cheng 	 */
588da14cebeSEric Cheng 	ASSERT(grp->lg_mh == NULL || MAC_PERIM_HELD(grp->lg_mh));
5897c478bd9Sstevel@tonic-gate 
590da14cebeSEric Cheng 	err = aggr_port_create(grp, port_linkid, force, &port);
5917c478bd9Sstevel@tonic-gate 	if (err != 0)
5927c478bd9Sstevel@tonic-gate 		return (err);
5937c478bd9Sstevel@tonic-gate 
594da14cebeSEric Cheng 	mac_perim_enter_by_mh(port->lp_mh, &mph);
5957c478bd9Sstevel@tonic-gate 
59645948e49SRyan Zezeski 	/* Add the new port to the end of the list. */
5977c478bd9Sstevel@tonic-gate 	cport = &grp->lg_ports;
5987c478bd9Sstevel@tonic-gate 	while (*cport != NULL)
5997c478bd9Sstevel@tonic-gate 		cport = &((*cport)->lp_next);
6007c478bd9Sstevel@tonic-gate 	*cport = port;
6017c478bd9Sstevel@tonic-gate 
6027c478bd9Sstevel@tonic-gate 	/*
6037c478bd9Sstevel@tonic-gate 	 * Back reference to the group it is member of. A port always
6047c478bd9Sstevel@tonic-gate 	 * holds a reference to its group to ensure that the back
6057c478bd9Sstevel@tonic-gate 	 * reference is always valid.
6067c478bd9Sstevel@tonic-gate 	 */
6077c478bd9Sstevel@tonic-gate 	port->lp_grp = grp;
6087c478bd9Sstevel@tonic-gate 	AGGR_GRP_REFHOLD(grp);
6097c478bd9Sstevel@tonic-gate 	grp->lg_nports++;
610*13810335SPaul Winder 	if (grp->lg_nports > grp->lg_nports_high)
611*13810335SPaul Winder 		grp->lg_nports_high = grp->lg_nports;
6127c478bd9Sstevel@tonic-gate 
6137c478bd9Sstevel@tonic-gate 	aggr_lacp_init_port(port);
614da14cebeSEric Cheng 	mac_perim_exit(mph);
615da14cebeSEric Cheng 
616da14cebeSEric Cheng 	if (pp != NULL)
617da14cebeSEric Cheng 		*pp = port;
618da14cebeSEric Cheng 
619da14cebeSEric Cheng 	return (0);
620da14cebeSEric Cheng }
621da14cebeSEric Cheng 
62209b7f21aSRobert Mustacchi /*
62399ad48a4SRyan Zezeski  * This is called when the 'lg_tx_ports' arrangement has changed and
62499ad48a4SRyan Zezeski  * we need to update the corresponding 'mi_default_tx_ring'. This
62599ad48a4SRyan Zezeski  * happens for several reasons.
62609b7f21aSRobert Mustacchi  *
62799ad48a4SRyan Zezeski  *     - A pseudo TX mac group was added or removed.
62899ad48a4SRyan Zezeski  *     - An LACP message has changed the port's state.
62999ad48a4SRyan Zezeski  *     - A link event has changed the port's state.
63099ad48a4SRyan Zezeski  *
63199ad48a4SRyan Zezeski  * In any case, we see if there is at least one port enabled (see
63299ad48a4SRyan Zezeski  * 'aggr_send_port_enable()'), and if so we use its first ring as the
63399ad48a4SRyan Zezeski  * mac's default TX ring.
63499ad48a4SRyan Zezeski  *
63599ad48a4SRyan Zezeski  * Note, because we only have a single TX group, we don't have to
63699ad48a4SRyan Zezeski  * worry about the rings moving between groups and the chance that mac
63799ad48a4SRyan Zezeski  * will reassign it unless someone removes a port, at which point, we
63899ad48a4SRyan Zezeski  * play it safe and call this again.
63909b7f21aSRobert Mustacchi  */
64009b7f21aSRobert Mustacchi void
aggr_grp_update_default(aggr_grp_t * grp)64109b7f21aSRobert Mustacchi aggr_grp_update_default(aggr_grp_t *grp)
64209b7f21aSRobert Mustacchi {
64309b7f21aSRobert Mustacchi 	aggr_port_t *port;
64409b7f21aSRobert Mustacchi 	ASSERT(MAC_PERIM_HELD(grp->lg_mh));
64509b7f21aSRobert Mustacchi 
64609b7f21aSRobert Mustacchi 	rw_enter(&grp->lg_tx_lock, RW_WRITER);
64709b7f21aSRobert Mustacchi 
64809b7f21aSRobert Mustacchi 	if (grp->lg_ntx_ports == 0) {
64909b7f21aSRobert Mustacchi 		rw_exit(&grp->lg_tx_lock);
65009b7f21aSRobert Mustacchi 		return;
65109b7f21aSRobert Mustacchi 	}
65209b7f21aSRobert Mustacchi 
65309b7f21aSRobert Mustacchi 	port = grp->lg_tx_ports[0];
65409b7f21aSRobert Mustacchi 	ASSERT(port->lp_tx_ring_cnt > 0);
65509b7f21aSRobert Mustacchi 	mac_hwring_set_default(grp->lg_mh, port->lp_pseudo_tx_rings[0]);
65609b7f21aSRobert Mustacchi 	rw_exit(&grp->lg_tx_lock);
65709b7f21aSRobert Mustacchi }
65809b7f21aSRobert Mustacchi 
659da14cebeSEric Cheng /*
6600dc2366fSVenugopal Iyer  * Add a pseudo RX ring for the given HW ring handle.
661da14cebeSEric Cheng  */
662da14cebeSEric Cheng static int
aggr_add_pseudo_rx_ring(aggr_port_t * port,aggr_pseudo_rx_group_t * rx_grp,mac_ring_handle_t hw_rh)663da14cebeSEric Cheng aggr_add_pseudo_rx_ring(aggr_port_t *port,
664da14cebeSEric Cheng     aggr_pseudo_rx_group_t *rx_grp, mac_ring_handle_t hw_rh)
665da14cebeSEric Cheng {
666da14cebeSEric Cheng 	aggr_pseudo_rx_ring_t	*ring;
667da14cebeSEric Cheng 	int			err;
668da14cebeSEric Cheng 	int			j;
669da14cebeSEric Cheng 
670da14cebeSEric Cheng 	for (j = 0; j < MAX_RINGS_PER_GROUP; j++) {
671da14cebeSEric Cheng 		ring = rx_grp->arg_rings + j;
672da14cebeSEric Cheng 		if (!(ring->arr_flags & MAC_PSEUDO_RING_INUSE))
673da14cebeSEric Cheng 			break;
674da14cebeSEric Cheng 	}
6757c478bd9Sstevel@tonic-gate 
676c615009fSyz 	/*
6770dc2366fSVenugopal Iyer 	 * No slot for this new RX ring.
678c615009fSyz 	 */
679da14cebeSEric Cheng 	if (j == MAX_RINGS_PER_GROUP)
680*13810335SPaul Winder 		return (ENOSPC);
681c615009fSyz 
682da14cebeSEric Cheng 	ring->arr_flags |= MAC_PSEUDO_RING_INUSE;
683da14cebeSEric Cheng 	ring->arr_hw_rh = hw_rh;
684da14cebeSEric Cheng 	ring->arr_port = port;
68545948e49SRyan Zezeski 	ring->arr_grp = rx_grp;
686da14cebeSEric Cheng 	rx_grp->arg_ring_cnt++;
6877c478bd9Sstevel@tonic-gate 
688da14cebeSEric Cheng 	/*
689da14cebeSEric Cheng 	 * The group is already registered, dynamically add a new ring to the
690da14cebeSEric Cheng 	 * mac group.
691da14cebeSEric Cheng 	 */
692da14cebeSEric Cheng 	if ((err = mac_group_add_ring(rx_grp->arg_gh, j)) != 0) {
693da14cebeSEric Cheng 		ring->arr_flags &= ~MAC_PSEUDO_RING_INUSE;
694da14cebeSEric Cheng 		ring->arr_hw_rh = NULL;
695da14cebeSEric Cheng 		ring->arr_port = NULL;
69645948e49SRyan Zezeski 		ring->arr_grp = NULL;
697da14cebeSEric Cheng 		rx_grp->arg_ring_cnt--;
6980dc2366fSVenugopal Iyer 	} else {
69945948e49SRyan Zezeski 		/*
70045948e49SRyan Zezeski 		 * This must run after the MAC is registered.
70145948e49SRyan Zezeski 		 */
70245948e49SRyan Zezeski 		ASSERT3P(ring->arr_rh, !=, NULL);
70345948e49SRyan Zezeski 		mac_hwring_set_passthru(hw_rh, (mac_rx_t)aggr_recv_cb,
70445948e49SRyan Zezeski 		    (void *)port, (mac_resource_handle_t)ring);
705da14cebeSEric Cheng 	}
706da14cebeSEric Cheng 	return (err);
707da14cebeSEric Cheng }
7087c478bd9Sstevel@tonic-gate 
709da14cebeSEric Cheng /*
7100dc2366fSVenugopal Iyer  * Remove the pseudo RX ring of the given HW ring handle.
711da14cebeSEric Cheng  */
712da14cebeSEric Cheng static void
aggr_rem_pseudo_rx_ring(aggr_pseudo_rx_group_t * rx_grp,mac_ring_handle_t hw_rh)713da14cebeSEric Cheng aggr_rem_pseudo_rx_ring(aggr_pseudo_rx_group_t *rx_grp, mac_ring_handle_t hw_rh)
714da14cebeSEric Cheng {
71545948e49SRyan Zezeski 	for (uint_t j = 0; j < MAX_RINGS_PER_GROUP; j++) {
71645948e49SRyan Zezeski 		aggr_pseudo_rx_ring_t *ring = rx_grp->arg_rings + j;
717da14cebeSEric Cheng 
718da14cebeSEric Cheng 		if (!(ring->arr_flags & MAC_PSEUDO_RING_INUSE) ||
719da14cebeSEric Cheng 		    ring->arr_hw_rh != hw_rh) {
720da14cebeSEric Cheng 			continue;
721da14cebeSEric Cheng 		}
722da14cebeSEric Cheng 
723da14cebeSEric Cheng 		mac_group_rem_ring(rx_grp->arg_gh, ring->arr_rh);
724da14cebeSEric Cheng 
725da14cebeSEric Cheng 		ring->arr_flags &= ~MAC_PSEUDO_RING_INUSE;
726da14cebeSEric Cheng 		ring->arr_hw_rh = NULL;
727da14cebeSEric Cheng 		ring->arr_port = NULL;
72845948e49SRyan Zezeski 		ring->arr_grp = NULL;
729da14cebeSEric Cheng 		rx_grp->arg_ring_cnt--;
73045948e49SRyan Zezeski 		mac_hwring_clear_passthru(hw_rh);
731da14cebeSEric Cheng 		break;
732da14cebeSEric Cheng 	}
733da14cebeSEric Cheng }
734da14cebeSEric Cheng 
735da14cebeSEric Cheng /*
73684de666eSRyan Zezeski  * Create pseudo rings over the HW rings of the port.
73784de666eSRyan Zezeski  *
73884de666eSRyan Zezeski  * o Create a pseudo ring in rx_grp per HW ring in the port's HW group.
73984de666eSRyan Zezeski  *
74084de666eSRyan Zezeski  * o Program existing unicast filters on the pseudo group into the HW group.
74184de666eSRyan Zezeski  *
74284de666eSRyan Zezeski  * o Program existing VLAN filters on the pseudo group into the HW group.
743da14cebeSEric Cheng  */
744da14cebeSEric Cheng static int
aggr_add_pseudo_rx_group(aggr_port_t * port,aggr_pseudo_rx_group_t * rx_grp)745da14cebeSEric Cheng aggr_add_pseudo_rx_group(aggr_port_t *port, aggr_pseudo_rx_group_t *rx_grp)
746da14cebeSEric Cheng {
747da14cebeSEric Cheng 	mac_ring_handle_t	hw_rh[MAX_RINGS_PER_GROUP];
748da14cebeSEric Cheng 	aggr_unicst_addr_t	*addr, *a;
749da14cebeSEric Cheng 	mac_perim_handle_t	pmph;
75084de666eSRyan Zezeski 	aggr_vlan_t		*avp;
75145948e49SRyan Zezeski 	uint_t			hw_rh_cnt, i;
752da14cebeSEric Cheng 	int			err = 0;
75345948e49SRyan Zezeski 	uint_t			g_idx = rx_grp->arg_index;
754da14cebeSEric Cheng 
75545948e49SRyan Zezeski 	ASSERT(MAC_PERIM_HELD(port->lp_grp->lg_mh));
75645948e49SRyan Zezeski 	ASSERT3U(g_idx, <, MAX_GROUPS_PER_PORT);
757da14cebeSEric Cheng 	mac_perim_enter_by_mh(port->lp_mh, &pmph);
758da14cebeSEric Cheng 
75934a4e6b5SToomas Soome 	i = 0;
76034a4e6b5SToomas Soome 	addr = NULL;
761da14cebeSEric Cheng 	/*
76245948e49SRyan Zezeski 	 * This function must be called after the aggr registers its
76345948e49SRyan Zezeski 	 * MAC and its Rx groups have been initialized.
764da14cebeSEric Cheng 	 */
765da14cebeSEric Cheng 	ASSERT(rx_grp->arg_gh != NULL);
766da14cebeSEric Cheng 
767da14cebeSEric Cheng 	/*
76884de666eSRyan Zezeski 	 * Get the list of the underlying HW rings.
769da14cebeSEric Cheng 	 */
77045948e49SRyan Zezeski 	hw_rh_cnt = mac_hwrings_idx_get(port->lp_mh, g_idx,
77145948e49SRyan Zezeski 	    &port->lp_hwghs[g_idx], hw_rh, MAC_RING_TYPE_RX);
772da14cebeSEric Cheng 
773da14cebeSEric Cheng 	/*
77484de666eSRyan Zezeski 	 * Add existing VLAN and unicast address filters to the port.
775da14cebeSEric Cheng 	 */
77684de666eSRyan Zezeski 	for (avp = list_head(&rx_grp->arg_vlans); avp != NULL;
77784de666eSRyan Zezeski 	    avp = list_next(&rx_grp->arg_vlans, avp)) {
77845948e49SRyan Zezeski 		if ((err = aggr_port_addvlan(port, g_idx, avp->av_vid)) != 0)
77984de666eSRyan Zezeski 			goto err;
78084de666eSRyan Zezeski 	}
78184de666eSRyan Zezeski 
782da14cebeSEric Cheng 	for (addr = rx_grp->arg_macaddr; addr != NULL; addr = addr->aua_next) {
78345948e49SRyan Zezeski 		if ((err = aggr_port_addmac(port, g_idx, addr->aua_addr)) != 0)
78484de666eSRyan Zezeski 			goto err;
785da14cebeSEric Cheng 	}
786da14cebeSEric Cheng 
78784de666eSRyan Zezeski 	for (i = 0; i < hw_rh_cnt; i++) {
788da14cebeSEric Cheng 		err = aggr_add_pseudo_rx_ring(port, rx_grp, hw_rh[i]);
78984de666eSRyan Zezeski 		if (err != 0)
79084de666eSRyan Zezeski 			goto err;
79184de666eSRyan Zezeski 	}
792da14cebeSEric Cheng 
79384de666eSRyan Zezeski 	mac_perim_exit(pmph);
79484de666eSRyan Zezeski 	return (0);
79584de666eSRyan Zezeski 
79684de666eSRyan Zezeski err:
79784de666eSRyan Zezeski 	ASSERT(err != 0);
79884de666eSRyan Zezeski 
79945948e49SRyan Zezeski 	for (uint_t j = 0; j < i; j++)
80084de666eSRyan Zezeski 		aggr_rem_pseudo_rx_ring(rx_grp, hw_rh[j]);
80184de666eSRyan Zezeski 
80284de666eSRyan Zezeski 	for (a = rx_grp->arg_macaddr; a != addr; a = a->aua_next)
80345948e49SRyan Zezeski 		aggr_port_remmac(port, g_idx, a->aua_addr);
804da14cebeSEric Cheng 
80584de666eSRyan Zezeski 	if (avp != NULL)
80684de666eSRyan Zezeski 		avp = list_prev(&rx_grp->arg_vlans, avp);
807da14cebeSEric Cheng 
80884de666eSRyan Zezeski 	for (; avp != NULL; avp = list_prev(&rx_grp->arg_vlans, avp)) {
80984de666eSRyan Zezeski 		int err2;
81084de666eSRyan Zezeski 
81145948e49SRyan Zezeski 		if ((err2 = aggr_port_remvlan(port, g_idx, avp->av_vid)) != 0) {
81284de666eSRyan Zezeski 			cmn_err(CE_WARN, "Failed to remove VLAN %u from port %s"
81384de666eSRyan Zezeski 			    ": errno %d.", avp->av_vid,
81484de666eSRyan Zezeski 			    mac_client_name(port->lp_mch), err2);
815da14cebeSEric Cheng 		}
816da14cebeSEric Cheng 	}
81784de666eSRyan Zezeski 
81845948e49SRyan Zezeski 	port->lp_hwghs[g_idx] = NULL;
819da14cebeSEric Cheng 	mac_perim_exit(pmph);
820da14cebeSEric Cheng 	return (err);
821da14cebeSEric Cheng }
822da14cebeSEric Cheng 
823da14cebeSEric Cheng /*
82484de666eSRyan Zezeski  * Destroy the pseudo rings mapping to this port and remove all VLAN
82584de666eSRyan Zezeski  * and unicast filters from this port. Even if there are no underlying
82684de666eSRyan Zezeski  * HW rings we must still remove the unicast filters to take the port
82784de666eSRyan Zezeski  * out of promisc mode.
828da14cebeSEric Cheng  */
829da14cebeSEric Cheng static void
aggr_rem_pseudo_rx_group(aggr_port_t * port,aggr_pseudo_rx_group_t * rx_grp)830da14cebeSEric Cheng aggr_rem_pseudo_rx_group(aggr_port_t *port, aggr_pseudo_rx_group_t *rx_grp)
831da14cebeSEric Cheng {
832da14cebeSEric Cheng 	mac_ring_handle_t	hw_rh[MAX_RINGS_PER_GROUP];
833da14cebeSEric Cheng 	aggr_unicst_addr_t	*addr;
834da14cebeSEric Cheng 	mac_perim_handle_t	pmph;
83545948e49SRyan Zezeski 	uint_t			hw_rh_cnt;
83645948e49SRyan Zezeski 	uint_t			g_idx = rx_grp->arg_index;
837da14cebeSEric Cheng 
83845948e49SRyan Zezeski 	ASSERT(MAC_PERIM_HELD(port->lp_grp->lg_mh));
83945948e49SRyan Zezeski 	ASSERT3U(g_idx, <, MAX_GROUPS_PER_PORT);
84045948e49SRyan Zezeski 	ASSERT3P(rx_grp->arg_gh, !=, NULL);
841da14cebeSEric Cheng 	mac_perim_enter_by_mh(port->lp_mh, &pmph);
842da14cebeSEric Cheng 
84345948e49SRyan Zezeski 	hw_rh_cnt = mac_hwrings_idx_get(port->lp_mh, g_idx, NULL, hw_rh,
84445948e49SRyan Zezeski 	    MAC_RING_TYPE_RX);
845da14cebeSEric Cheng 
84645948e49SRyan Zezeski 	for (uint_t i = 0; i < hw_rh_cnt; i++)
847da14cebeSEric Cheng 		aggr_rem_pseudo_rx_ring(rx_grp, hw_rh[i]);
848da14cebeSEric Cheng 
849da14cebeSEric Cheng 	for (addr = rx_grp->arg_macaddr; addr != NULL; addr = addr->aua_next)
85045948e49SRyan Zezeski 		aggr_port_remmac(port, g_idx, addr->aua_addr);
851da14cebeSEric Cheng 
85284de666eSRyan Zezeski 	for (aggr_vlan_t *avp = list_head(&rx_grp->arg_vlans); avp != NULL;
85384de666eSRyan Zezeski 	    avp = list_next(&rx_grp->arg_vlans, avp)) {
85484de666eSRyan Zezeski 		int err;
85584de666eSRyan Zezeski 
85645948e49SRyan Zezeski 		if ((err = aggr_port_remvlan(port, g_idx, avp->av_vid)) != 0) {
85784de666eSRyan Zezeski 			cmn_err(CE_WARN, "Failed to remove VLAN %u from port %s"
85884de666eSRyan Zezeski 			    ": errno %d.", avp->av_vid,
85984de666eSRyan Zezeski 			    mac_client_name(port->lp_mch), err);
86084de666eSRyan Zezeski 		}
86184de666eSRyan Zezeski 	}
86284de666eSRyan Zezeski 
86345948e49SRyan Zezeski 	port->lp_hwghs[g_idx] = NULL;
8640dc2366fSVenugopal Iyer 	mac_perim_exit(pmph);
8650dc2366fSVenugopal Iyer }
8660dc2366fSVenugopal Iyer 
8670dc2366fSVenugopal Iyer /*
8680dc2366fSVenugopal Iyer  * Add a pseudo TX ring for the given HW ring handle.
8690dc2366fSVenugopal Iyer  */
8700dc2366fSVenugopal Iyer static int
aggr_add_pseudo_tx_ring(aggr_port_t * port,aggr_pseudo_tx_group_t * tx_grp,mac_ring_handle_t hw_rh,mac_ring_handle_t * pseudo_rh)8710dc2366fSVenugopal Iyer aggr_add_pseudo_tx_ring(aggr_port_t *port,
8720dc2366fSVenugopal Iyer     aggr_pseudo_tx_group_t *tx_grp, mac_ring_handle_t hw_rh,
8730dc2366fSVenugopal Iyer     mac_ring_handle_t *pseudo_rh)
8740dc2366fSVenugopal Iyer {
8750dc2366fSVenugopal Iyer 	aggr_pseudo_tx_ring_t	*ring;
8760dc2366fSVenugopal Iyer 	int			err;
8770dc2366fSVenugopal Iyer 	int			i;
8780dc2366fSVenugopal Iyer 
8790dc2366fSVenugopal Iyer 	ASSERT(MAC_PERIM_HELD(port->lp_mh));
8800dc2366fSVenugopal Iyer 	for (i = 0; i < MAX_RINGS_PER_GROUP; i++) {
8810dc2366fSVenugopal Iyer 		ring = tx_grp->atg_rings + i;
8820dc2366fSVenugopal Iyer 		if (!(ring->atr_flags & MAC_PSEUDO_RING_INUSE))
8830dc2366fSVenugopal Iyer 			break;
8840dc2366fSVenugopal Iyer 	}
8850dc2366fSVenugopal Iyer 	/*
8860dc2366fSVenugopal Iyer 	 * No slot for this new TX ring.
8870dc2366fSVenugopal Iyer 	 */
8880dc2366fSVenugopal Iyer 	if (i == MAX_RINGS_PER_GROUP)
889*13810335SPaul Winder 		return (ENOSPC);
8900dc2366fSVenugopal Iyer 	/*
8910dc2366fSVenugopal Iyer 	 * The following 4 statements needs to be done before
8920dc2366fSVenugopal Iyer 	 * calling mac_group_add_ring(). Otherwise it will
8930dc2366fSVenugopal Iyer 	 * result in an assertion failure in mac_init_ring().
8940dc2366fSVenugopal Iyer 	 */
8950dc2366fSVenugopal Iyer 	ring->atr_flags |= MAC_PSEUDO_RING_INUSE;
8960dc2366fSVenugopal Iyer 	ring->atr_hw_rh = hw_rh;
8970dc2366fSVenugopal Iyer 	ring->atr_port = port;
8980dc2366fSVenugopal Iyer 	tx_grp->atg_ring_cnt++;
8990dc2366fSVenugopal Iyer 
9000dc2366fSVenugopal Iyer 	/*
9010dc2366fSVenugopal Iyer 	 * The TX side has no concept of ring groups unlike RX groups.
9020dc2366fSVenugopal Iyer 	 * There is just a single group which stores all the TX rings.
9030dc2366fSVenugopal Iyer 	 * This group will be used to store aggr's pseudo TX rings.
9040dc2366fSVenugopal Iyer 	 */
9050dc2366fSVenugopal Iyer 	if ((err = mac_group_add_ring(tx_grp->atg_gh, i)) != 0) {
9060dc2366fSVenugopal Iyer 		ring->atr_flags &= ~MAC_PSEUDO_RING_INUSE;
9070dc2366fSVenugopal Iyer 		ring->atr_hw_rh = NULL;
9080dc2366fSVenugopal Iyer 		ring->atr_port = NULL;
9090dc2366fSVenugopal Iyer 		tx_grp->atg_ring_cnt--;
9100dc2366fSVenugopal Iyer 	} else {
9110dc2366fSVenugopal Iyer 		*pseudo_rh = mac_find_ring(tx_grp->atg_gh, i);
9120dc2366fSVenugopal Iyer 		if (hw_rh != NULL) {
9130dc2366fSVenugopal Iyer 			mac_hwring_setup(hw_rh, (mac_resource_handle_t)ring,
9140dc2366fSVenugopal Iyer 			    mac_find_ring(tx_grp->atg_gh, i));
9150dc2366fSVenugopal Iyer 		}
9160dc2366fSVenugopal Iyer 	}
91709b7f21aSRobert Mustacchi 
9180dc2366fSVenugopal Iyer 	return (err);
9190dc2366fSVenugopal Iyer }
9200dc2366fSVenugopal Iyer 
9210dc2366fSVenugopal Iyer /*
9220dc2366fSVenugopal Iyer  * Remove the pseudo TX ring of the given HW ring handle.
9230dc2366fSVenugopal Iyer  */
9240dc2366fSVenugopal Iyer static void
aggr_rem_pseudo_tx_ring(aggr_pseudo_tx_group_t * tx_grp,mac_ring_handle_t pseudo_hw_rh)9250dc2366fSVenugopal Iyer aggr_rem_pseudo_tx_ring(aggr_pseudo_tx_group_t *tx_grp,
9260dc2366fSVenugopal Iyer     mac_ring_handle_t pseudo_hw_rh)
9270dc2366fSVenugopal Iyer {
9280dc2366fSVenugopal Iyer 	aggr_pseudo_tx_ring_t	*ring;
9290dc2366fSVenugopal Iyer 	int			i;
9300dc2366fSVenugopal Iyer 
9310dc2366fSVenugopal Iyer 	for (i = 0; i < MAX_RINGS_PER_GROUP; i++) {
9320dc2366fSVenugopal Iyer 		ring = tx_grp->atg_rings + i;
9330dc2366fSVenugopal Iyer 		if (ring->atr_rh != pseudo_hw_rh)
9340dc2366fSVenugopal Iyer 			continue;
9350dc2366fSVenugopal Iyer 
9360dc2366fSVenugopal Iyer 		ASSERT(ring->atr_flags & MAC_PSEUDO_RING_INUSE);
9370dc2366fSVenugopal Iyer 		mac_group_rem_ring(tx_grp->atg_gh, pseudo_hw_rh);
9380dc2366fSVenugopal Iyer 		ring->atr_flags &= ~MAC_PSEUDO_RING_INUSE;
9390dc2366fSVenugopal Iyer 		mac_hwring_teardown(ring->atr_hw_rh);
9400dc2366fSVenugopal Iyer 		ring->atr_hw_rh = NULL;
9410dc2366fSVenugopal Iyer 		ring->atr_port = NULL;
9420dc2366fSVenugopal Iyer 		tx_grp->atg_ring_cnt--;
9430dc2366fSVenugopal Iyer 		break;
9440dc2366fSVenugopal Iyer 	}
9450dc2366fSVenugopal Iyer }
9460dc2366fSVenugopal Iyer 
9470dc2366fSVenugopal Iyer /*
9480dc2366fSVenugopal Iyer  * This function is called to create pseudo rings over hardware rings of
9490dc2366fSVenugopal Iyer  * the underlying device. There is a 1:1 mapping between the pseudo TX
9500dc2366fSVenugopal Iyer  * rings of the aggr and the hardware rings of the underlying port.
9510dc2366fSVenugopal Iyer  */
9520dc2366fSVenugopal Iyer static int
aggr_add_pseudo_tx_group(aggr_port_t * port,aggr_pseudo_tx_group_t * tx_grp,uint_t limit)953*13810335SPaul Winder aggr_add_pseudo_tx_group(aggr_port_t *port, aggr_pseudo_tx_group_t *tx_grp,
954*13810335SPaul Winder     uint_t limit)
9550dc2366fSVenugopal Iyer {
9560dc2366fSVenugopal Iyer 	aggr_grp_t		*grp = port->lp_grp;
9570dc2366fSVenugopal Iyer 	mac_ring_handle_t	hw_rh[MAX_RINGS_PER_GROUP], pseudo_rh;
9580dc2366fSVenugopal Iyer 	mac_perim_handle_t	pmph;
9590dc2366fSVenugopal Iyer 	int			hw_rh_cnt, i = 0, j;
9600dc2366fSVenugopal Iyer 	int			err = 0;
9610dc2366fSVenugopal Iyer 
962*13810335SPaul Winder 	if (limit == 0)
963*13810335SPaul Winder 		return (ENOSPC);
964*13810335SPaul Winder 
9650dc2366fSVenugopal Iyer 	ASSERT(MAC_PERIM_HELD(grp->lg_mh));
9660dc2366fSVenugopal Iyer 	mac_perim_enter_by_mh(port->lp_mh, &pmph);
9670dc2366fSVenugopal Iyer 
9680dc2366fSVenugopal Iyer 	/*
9690dc2366fSVenugopal Iyer 	 * Get the list the the underlying HW rings.
9700dc2366fSVenugopal Iyer 	 */
97145948e49SRyan Zezeski 	hw_rh_cnt = mac_hwrings_get(port->lp_mch, NULL, hw_rh,
97245948e49SRyan Zezeski 	    MAC_RING_TYPE_TX);
9730dc2366fSVenugopal Iyer 
9740dc2366fSVenugopal Iyer 	/*
9750dc2366fSVenugopal Iyer 	 * Even if the underlying NIC does not have TX rings, we
9760dc2366fSVenugopal Iyer 	 * still make a psuedo TX ring for that NIC with NULL as
9770dc2366fSVenugopal Iyer 	 * the ring handle.
9780dc2366fSVenugopal Iyer 	 */
9790dc2366fSVenugopal Iyer 	if (hw_rh_cnt == 0)
9800dc2366fSVenugopal Iyer 		port->lp_tx_ring_cnt = 1;
9810dc2366fSVenugopal Iyer 	else
982*13810335SPaul Winder 		port->lp_tx_ring_cnt = MIN(hw_rh_cnt, limit);
9830dc2366fSVenugopal Iyer 
984*13810335SPaul Winder 	port->lp_tx_ring_alloc = port->lp_tx_ring_cnt;
9850dc2366fSVenugopal Iyer 	port->lp_tx_rings = kmem_zalloc((sizeof (mac_ring_handle_t *) *
986*13810335SPaul Winder 	    port->lp_tx_ring_alloc), KM_SLEEP);
9870dc2366fSVenugopal Iyer 	port->lp_pseudo_tx_rings = kmem_zalloc((sizeof (mac_ring_handle_t *) *
988*13810335SPaul Winder 	    port->lp_tx_ring_alloc), KM_SLEEP);
9890dc2366fSVenugopal Iyer 
9900dc2366fSVenugopal Iyer 	if (hw_rh_cnt == 0) {
9910dc2366fSVenugopal Iyer 		if ((err = aggr_add_pseudo_tx_ring(port, tx_grp,
9920dc2366fSVenugopal Iyer 		    NULL, &pseudo_rh)) == 0) {
9930dc2366fSVenugopal Iyer 			port->lp_tx_rings[0] = NULL;
9940dc2366fSVenugopal Iyer 			port->lp_pseudo_tx_rings[0] = pseudo_rh;
9950dc2366fSVenugopal Iyer 		}
9960dc2366fSVenugopal Iyer 	} else {
997*13810335SPaul Winder 		for (i = 0; err == 0 && i < port->lp_tx_ring_cnt; i++) {
9980dc2366fSVenugopal Iyer 			err = aggr_add_pseudo_tx_ring(port,
9990dc2366fSVenugopal Iyer 			    tx_grp, hw_rh[i], &pseudo_rh);
10000dc2366fSVenugopal Iyer 			if (err != 0)
10010dc2366fSVenugopal Iyer 				break;
10020dc2366fSVenugopal Iyer 			port->lp_tx_rings[i] = hw_rh[i];
10030dc2366fSVenugopal Iyer 			port->lp_pseudo_tx_rings[i] = pseudo_rh;
10040dc2366fSVenugopal Iyer 		}
10050dc2366fSVenugopal Iyer 	}
10060dc2366fSVenugopal Iyer 
10070dc2366fSVenugopal Iyer 	if (err != 0) {
10080dc2366fSVenugopal Iyer 		if (hw_rh_cnt != 0) {
10090dc2366fSVenugopal Iyer 			for (j = 0; j < i; j++) {
10100dc2366fSVenugopal Iyer 				aggr_rem_pseudo_tx_ring(tx_grp,
10110dc2366fSVenugopal Iyer 				    port->lp_pseudo_tx_rings[j]);
10120dc2366fSVenugopal Iyer 			}
10130dc2366fSVenugopal Iyer 		}
10140dc2366fSVenugopal Iyer 		kmem_free(port->lp_tx_rings,
1015*13810335SPaul Winder 		    (sizeof (mac_ring_handle_t *) * port->lp_tx_ring_alloc));
10160dc2366fSVenugopal Iyer 		kmem_free(port->lp_pseudo_tx_rings,
1017*13810335SPaul Winder 		    (sizeof (mac_ring_handle_t *) * port->lp_tx_ring_alloc));
10180dc2366fSVenugopal Iyer 		port->lp_tx_ring_cnt = 0;
1019*13810335SPaul Winder 		port->lp_tx_ring_alloc = 0;
10200dc2366fSVenugopal Iyer 	} else {
10210dc2366fSVenugopal Iyer 		port->lp_tx_grp_added = B_TRUE;
10220dc2366fSVenugopal Iyer 		port->lp_tx_notify_mh = mac_client_tx_notify(port->lp_mch,
10230dc2366fSVenugopal Iyer 		    aggr_tx_ring_update, port);
10240dc2366fSVenugopal Iyer 	}
10250dc2366fSVenugopal Iyer 	mac_perim_exit(pmph);
102609b7f21aSRobert Mustacchi 	aggr_grp_update_default(grp);
10270dc2366fSVenugopal Iyer 	return (err);
10280dc2366fSVenugopal Iyer }
10290dc2366fSVenugopal Iyer 
10300dc2366fSVenugopal Iyer /*
10310dc2366fSVenugopal Iyer  * This function is called by aggr to remove pseudo TX rings over the
10320dc2366fSVenugopal Iyer  * HW rings of the underlying port.
10330dc2366fSVenugopal Iyer  */
10340dc2366fSVenugopal Iyer static void
aggr_rem_pseudo_tx_group(aggr_port_t * port,aggr_pseudo_tx_group_t * tx_grp)10350dc2366fSVenugopal Iyer aggr_rem_pseudo_tx_group(aggr_port_t *port, aggr_pseudo_tx_group_t *tx_grp)
10360dc2366fSVenugopal Iyer {
10370dc2366fSVenugopal Iyer 	aggr_grp_t		*grp = port->lp_grp;
10380dc2366fSVenugopal Iyer 	mac_perim_handle_t	pmph;
10390dc2366fSVenugopal Iyer 	int			i;
10400dc2366fSVenugopal Iyer 
10410dc2366fSVenugopal Iyer 	ASSERT(MAC_PERIM_HELD(grp->lg_mh));
10420dc2366fSVenugopal Iyer 	mac_perim_enter_by_mh(port->lp_mh, &pmph);
10430dc2366fSVenugopal Iyer 
10440dc2366fSVenugopal Iyer 	if (!port->lp_tx_grp_added)
10450dc2366fSVenugopal Iyer 		goto done;
10460dc2366fSVenugopal Iyer 
10470dc2366fSVenugopal Iyer 	ASSERT(tx_grp->atg_gh != NULL);
10480dc2366fSVenugopal Iyer 
10490dc2366fSVenugopal Iyer 	for (i = 0; i < port->lp_tx_ring_cnt; i++)
10500dc2366fSVenugopal Iyer 		aggr_rem_pseudo_tx_ring(tx_grp, port->lp_pseudo_tx_rings[i]);
10510dc2366fSVenugopal Iyer 
10520dc2366fSVenugopal Iyer 	kmem_free(port->lp_tx_rings,
1053*13810335SPaul Winder 	    (sizeof (mac_ring_handle_t *) * port->lp_tx_ring_alloc));
10540dc2366fSVenugopal Iyer 	kmem_free(port->lp_pseudo_tx_rings,
1055*13810335SPaul Winder 	    (sizeof (mac_ring_handle_t *) * port->lp_tx_ring_alloc));
10560dc2366fSVenugopal Iyer 
10570dc2366fSVenugopal Iyer 	port->lp_tx_ring_cnt = 0;
10580dc2366fSVenugopal Iyer 	(void) mac_client_tx_notify(port->lp_mch, NULL, port->lp_tx_notify_mh);
10590dc2366fSVenugopal Iyer 	port->lp_tx_grp_added = B_FALSE;
106009b7f21aSRobert Mustacchi 	aggr_grp_update_default(grp);
1061da14cebeSEric Cheng done:
1062da14cebeSEric Cheng 	mac_perim_exit(pmph);
1063da14cebeSEric Cheng }
1064da14cebeSEric Cheng 
1065da14cebeSEric Cheng static int
aggr_pseudo_disable_intr(mac_intr_handle_t ih)1066da14cebeSEric Cheng aggr_pseudo_disable_intr(mac_intr_handle_t ih)
1067da14cebeSEric Cheng {
1068da14cebeSEric Cheng 	aggr_pseudo_rx_ring_t *rr_ring = (aggr_pseudo_rx_ring_t *)ih;
1069da14cebeSEric Cheng 	return (mac_hwring_disable_intr(rr_ring->arr_hw_rh));
1070da14cebeSEric Cheng }
1071da14cebeSEric Cheng 
1072da14cebeSEric Cheng static int
aggr_pseudo_enable_intr(mac_intr_handle_t ih)1073da14cebeSEric Cheng aggr_pseudo_enable_intr(mac_intr_handle_t ih)
1074da14cebeSEric Cheng {
1075da14cebeSEric Cheng 	aggr_pseudo_rx_ring_t *rr_ring = (aggr_pseudo_rx_ring_t *)ih;
1076da14cebeSEric Cheng 	return (mac_hwring_enable_intr(rr_ring->arr_hw_rh));
1077da14cebeSEric Cheng }
1078da14cebeSEric Cheng 
1079666e8af9SRobert Mustacchi /*
108045948e49SRyan Zezeski  * Start the pseudo ring. Since the pseudo ring is just an abstraction
108145948e49SRyan Zezeski  * over an actual HW ring, the real task is to start the underlying HW
108245948e49SRyan Zezeski  * ring.
1083666e8af9SRobert Mustacchi  */
1084da14cebeSEric Cheng static int
aggr_pseudo_start_rx_ring(mac_ring_driver_t arg,uint64_t mr_gen)108545948e49SRyan Zezeski aggr_pseudo_start_rx_ring(mac_ring_driver_t arg, uint64_t mr_gen)
1086da14cebeSEric Cheng {
108745948e49SRyan Zezeski 	int err;
1088da14cebeSEric Cheng 	aggr_pseudo_rx_ring_t *rr_ring = (aggr_pseudo_rx_ring_t *)arg;
1089da14cebeSEric Cheng 
109045948e49SRyan Zezeski 	err = mac_hwring_start(rr_ring->arr_hw_rh);
109145948e49SRyan Zezeski 
109245948e49SRyan Zezeski 	if (err != 0)
109345948e49SRyan Zezeski 		return (err);
109445948e49SRyan Zezeski 
1095666e8af9SRobert Mustacchi 	rr_ring->arr_gen = mr_gen;
109645948e49SRyan Zezeski 	return (err);
109745948e49SRyan Zezeski }
109845948e49SRyan Zezeski 
109945948e49SRyan Zezeski /*
110045948e49SRyan Zezeski  * Stop the pseudo ring. Since the pseudo ring is just an abstraction
110145948e49SRyan Zezeski  * over an actual HW ring, the real task is to stop the underlying HW
110245948e49SRyan Zezeski  * ring.
110345948e49SRyan Zezeski  */
110445948e49SRyan Zezeski static void
aggr_pseudo_stop_rx_ring(mac_ring_driver_t arg)110545948e49SRyan Zezeski aggr_pseudo_stop_rx_ring(mac_ring_driver_t arg)
110645948e49SRyan Zezeski {
110745948e49SRyan Zezeski 	aggr_pseudo_rx_ring_t *rr_ring = (aggr_pseudo_rx_ring_t *)arg;
110845948e49SRyan Zezeski 
110945948e49SRyan Zezeski 	/*
111045948e49SRyan Zezeski 	 * The rings underlying the default group must stay up to
111145948e49SRyan Zezeski 	 * continue receiving LACP traffic. We would normally never
111245948e49SRyan Zezeski 	 * stop the default Rx rings because of the primary MAC
111345948e49SRyan Zezeski 	 * client; but aggr's primary MAC client doesn't call
111445948e49SRyan Zezeski 	 * mac_unicast_add() and thus mi_active is 0 when the last
111545948e49SRyan Zezeski 	 * non-primary client is deleted.
111645948e49SRyan Zezeski 	 */
111745948e49SRyan Zezeski 	if (rr_ring->arr_grp->arg_index != 0)
111845948e49SRyan Zezeski 		mac_hwring_stop(rr_ring->arr_hw_rh);
11197c478bd9Sstevel@tonic-gate }
11207c478bd9Sstevel@tonic-gate 
1121*13810335SPaul Winder /*
1122*13810335SPaul Winder  * Trim each port in a group to ensure it uses no more than tx_ring_limit
1123*13810335SPaul Winder  * rings.
1124*13810335SPaul Winder  */
1125*13810335SPaul Winder static void
aggr_grp_balance_tx(aggr_grp_t * grp,uint_t tx_ring_limit)1126*13810335SPaul Winder aggr_grp_balance_tx(aggr_grp_t *grp, uint_t tx_ring_limit)
1127*13810335SPaul Winder {
1128*13810335SPaul Winder 	aggr_port_t *port;
1129*13810335SPaul Winder 	mac_perim_handle_t mph;
1130*13810335SPaul Winder 	uint_t i, tx_ring_cnt;
1131*13810335SPaul Winder 
1132*13810335SPaul Winder 	ASSERT(tx_ring_limit > 0);
1133*13810335SPaul Winder 	ASSERT(MAC_PERIM_HELD(grp->lg_mh));
1134*13810335SPaul Winder 
1135*13810335SPaul Winder 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
1136*13810335SPaul Winder 		mac_perim_enter_by_mh(port->lp_mh, &mph);
1137*13810335SPaul Winder 
1138*13810335SPaul Winder 		/*
1139*13810335SPaul Winder 		 * Reduce the Tx ring count first to prevent rings being
1140*13810335SPaul Winder 		 * used as they are removed.
1141*13810335SPaul Winder 		 */
1142*13810335SPaul Winder 		rw_enter(&grp->lg_tx_lock, RW_WRITER);
1143*13810335SPaul Winder 		if (port->lp_tx_ring_cnt <= tx_ring_limit) {
1144*13810335SPaul Winder 			rw_exit(&grp->lg_tx_lock);
1145*13810335SPaul Winder 			mac_perim_exit(mph);
1146*13810335SPaul Winder 			continue;
1147*13810335SPaul Winder 		}
1148*13810335SPaul Winder 
1149*13810335SPaul Winder 		tx_ring_cnt = port->lp_tx_ring_cnt;
1150*13810335SPaul Winder 		port->lp_tx_ring_cnt = tx_ring_limit;
1151*13810335SPaul Winder 		rw_exit(&grp->lg_tx_lock);
1152*13810335SPaul Winder 
1153*13810335SPaul Winder 		for (i = tx_ring_cnt - 1; i >= tx_ring_limit; i--) {
1154*13810335SPaul Winder 			aggr_rem_pseudo_tx_ring(&grp->lg_tx_group,
1155*13810335SPaul Winder 			    port->lp_pseudo_tx_rings[i]);
1156*13810335SPaul Winder 
1157*13810335SPaul Winder 		}
1158*13810335SPaul Winder 
1159*13810335SPaul Winder 		mac_perim_exit(mph);
1160*13810335SPaul Winder 	}
1161*13810335SPaul Winder }
1162*13810335SPaul Winder 
11637c478bd9Sstevel@tonic-gate /*
11647c478bd9Sstevel@tonic-gate  * Add one or more ports to an existing link aggregation group.
11657c478bd9Sstevel@tonic-gate  */
11667c478bd9Sstevel@tonic-gate int
aggr_grp_add_ports(datalink_id_t linkid,uint_t nports,boolean_t force,laioc_port_t * ports)1167d62bc4baSyz aggr_grp_add_ports(datalink_id_t linkid, uint_t nports, boolean_t force,
1168d62bc4baSyz     laioc_port_t *ports)
11697c478bd9Sstevel@tonic-gate {
117045948e49SRyan Zezeski 	int rc;
117145948e49SRyan Zezeski 	uint_t port_added = 0;
117245948e49SRyan Zezeski 	uint_t grp_added;
1173*13810335SPaul Winder 	uint_t nports_high, tx_ring_limit;
11747c478bd9Sstevel@tonic-gate 	aggr_grp_t *grp = NULL;
11757c478bd9Sstevel@tonic-gate 	aggr_port_t *port;
1176c615009fSyz 	boolean_t link_state_changed = B_FALSE;
1177da14cebeSEric Cheng 	mac_perim_handle_t mph, pmph;
11787c478bd9Sstevel@tonic-gate 
117945948e49SRyan Zezeski 	/* Get the aggr corresponding to linkid. */
1180210db224Sericheng 	rw_enter(&aggr_grp_lock, RW_READER);
1181d62bc4baSyz 	if (mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(linkid),
1182210db224Sericheng 	    (mod_hash_val_t *)&grp) != 0) {
1183210db224Sericheng 		rw_exit(&aggr_grp_lock);
1184210db224Sericheng 		return (ENOENT);
11857c478bd9Sstevel@tonic-gate 	}
11867c478bd9Sstevel@tonic-gate 	AGGR_GRP_REFHOLD(grp);
11877c478bd9Sstevel@tonic-gate 
1188da14cebeSEric Cheng 	/*
118945948e49SRyan Zezeski 	 * Hold the perimeter so that the aggregation can't be destroyed.
1190da14cebeSEric Cheng 	 */
1191da14cebeSEric Cheng 	mac_perim_enter_by_mh(grp->lg_mh, &mph);
1192da14cebeSEric Cheng 	rw_exit(&aggr_grp_lock);
11937c478bd9Sstevel@tonic-gate 
1194*13810335SPaul Winder 	/*
1195*13810335SPaul Winder 	 * Limit the number of Tx rings per port. When determining the
1196*13810335SPaul Winder 	 * number of ports take into consideration the existing high
1197*13810335SPaul Winder 	 * value, and what the new high value may be after this request.
1198*13810335SPaul Winder 	 */
1199*13810335SPaul Winder 	nports_high = MAX(grp->lg_nports_high, grp->lg_nports + nports);
1200*13810335SPaul Winder 	tx_ring_limit = MAX_RINGS_PER_GROUP / nports_high;
1201*13810335SPaul Winder 
1202*13810335SPaul Winder 	if (tx_ring_limit == 0) {
1203*13810335SPaul Winder 		rc = ENOSPC;
1204*13810335SPaul Winder 		goto bail;
1205*13810335SPaul Winder 	}
1206*13810335SPaul Winder 
1207*13810335SPaul Winder 	/*
1208*13810335SPaul Winder 	 * Balance the Tx rings so each port has a fair share of rings.
1209*13810335SPaul Winder 	 */
1210*13810335SPaul Winder 	aggr_grp_balance_tx(grp, tx_ring_limit);
1211*13810335SPaul Winder 
121245948e49SRyan Zezeski 	/* Add the specified ports to the aggr. */
121345948e49SRyan Zezeski 	for (uint_t i = 0; i < nports; i++) {
121445948e49SRyan Zezeski 		grp_added = 0;
121545948e49SRyan Zezeski 
1216d62bc4baSyz 		if ((rc = aggr_grp_add_port(grp, ports[i].lp_linkid,
1217d62bc4baSyz 		    force, &port)) != 0) {
12187c478bd9Sstevel@tonic-gate 			goto bail;
1219ba2e4443Sseb 		}
122045948e49SRyan Zezeski 
12217c478bd9Sstevel@tonic-gate 		ASSERT(port != NULL);
122245948e49SRyan Zezeski 		port_added++;
12237c478bd9Sstevel@tonic-gate 
12247c478bd9Sstevel@tonic-gate 		/* check capabilities */
1225f4420ae7Snd 		if (!aggr_grp_capab_check(grp, port) ||
1226d62bc4baSyz 		    !aggr_grp_sdu_check(grp, port) ||
1227d62bc4baSyz 		    !aggr_grp_margin_check(grp, port)) {
12287c478bd9Sstevel@tonic-gate 			rc = ENOTSUP;
12297c478bd9Sstevel@tonic-gate 			goto bail;
12307c478bd9Sstevel@tonic-gate 		}
12317c478bd9Sstevel@tonic-gate 
1232da14cebeSEric Cheng 		/*
1233da14cebeSEric Cheng 		 * Create the pseudo ring for each HW ring of the underlying
1234da14cebeSEric Cheng 		 * port.
1235da14cebeSEric Cheng 		 */
1236*13810335SPaul Winder 		rc = aggr_add_pseudo_tx_group(port, &grp->lg_tx_group,
1237*13810335SPaul Winder 		    tx_ring_limit);
12380dc2366fSVenugopal Iyer 		if (rc != 0)
12390dc2366fSVenugopal Iyer 			goto bail;
124045948e49SRyan Zezeski 
124145948e49SRyan Zezeski 		for (uint_t j = 0; j < grp->lg_rx_group_count; j++) {
124245948e49SRyan Zezeski 			rc = aggr_add_pseudo_rx_group(port,
124345948e49SRyan Zezeski 			    &grp->lg_rx_groups[j]);
124445948e49SRyan Zezeski 
124545948e49SRyan Zezeski 			if (rc != 0)
124645948e49SRyan Zezeski 				goto bail;
124745948e49SRyan Zezeski 
124845948e49SRyan Zezeski 			grp_added++;
124945948e49SRyan Zezeski 		}
1250da14cebeSEric Cheng 
1251da14cebeSEric Cheng 		mac_perim_enter_by_mh(port->lp_mh, &pmph);
1252da14cebeSEric Cheng 
1253da14cebeSEric Cheng 		/* set LACP mode */
1254da14cebeSEric Cheng 		aggr_port_lacp_set_mode(grp, port);
1255da14cebeSEric Cheng 
12567c478bd9Sstevel@tonic-gate 		/* start port if group has already been started */
12577c478bd9Sstevel@tonic-gate 		if (grp->lg_started) {
12587c478bd9Sstevel@tonic-gate 			rc = aggr_port_start(port);
12597c478bd9Sstevel@tonic-gate 			if (rc != 0) {
1260da14cebeSEric Cheng 				mac_perim_exit(pmph);
12617c478bd9Sstevel@tonic-gate 				goto bail;
12627c478bd9Sstevel@tonic-gate 			}
12637c478bd9Sstevel@tonic-gate 
1264da14cebeSEric Cheng 			/*
1265da14cebeSEric Cheng 			 * Turn on the promiscuous mode over the port when it
1266da14cebeSEric Cheng 			 * is requested to be turned on to receive the
126745948e49SRyan Zezeski 			 * non-primary address over a port, or the promiscuous
1268da14cebeSEric Cheng 			 * mode is enabled over the aggr.
1269da14cebeSEric Cheng 			 */
1270da14cebeSEric Cheng 			if (grp->lg_promisc || port->lp_prom_addr != NULL) {
1271da14cebeSEric Cheng 				rc = aggr_port_promisc(port, B_TRUE);
1272da14cebeSEric Cheng 				if (rc != 0) {
1273da14cebeSEric Cheng 					mac_perim_exit(pmph);
1274da14cebeSEric Cheng 					goto bail;
1275da14cebeSEric Cheng 				}
12767c478bd9Sstevel@tonic-gate 			}
12777c478bd9Sstevel@tonic-gate 		}
1278da14cebeSEric Cheng 		mac_perim_exit(pmph);
1279c615009fSyz 
1280c615009fSyz 		/*
1281c615009fSyz 		 * Attach each port if necessary.
1282c615009fSyz 		 */
1283da14cebeSEric Cheng 		if (aggr_port_notify_link(grp, port))
1284392b1d6eSyz 			link_state_changed = B_TRUE;
1285da14cebeSEric Cheng 
1286da14cebeSEric Cheng 		/*
1287da14cebeSEric Cheng 		 * Initialize the callback functions for this port.
1288da14cebeSEric Cheng 		 */
1289da14cebeSEric Cheng 		aggr_port_init_callbacks(port);
12907c478bd9Sstevel@tonic-gate 	}
12917c478bd9Sstevel@tonic-gate 
12927c478bd9Sstevel@tonic-gate 	/* update the MAC address of the constituent ports */
1293392b1d6eSyz 	if (aggr_grp_update_ports_mac(grp))
1294392b1d6eSyz 		link_state_changed = B_TRUE;
1295c615009fSyz 
1296c615009fSyz 	if (link_state_changed)
1297ba2e4443Sseb 		mac_link_update(grp->lg_mh, grp->lg_link_state);
12987c478bd9Sstevel@tonic-gate 
12997c478bd9Sstevel@tonic-gate bail:
13007c478bd9Sstevel@tonic-gate 	if (rc != 0) {
13017c478bd9Sstevel@tonic-gate 		/* stop and remove ports that have been added */
130245948e49SRyan Zezeski 		for (uint_t i = 0; i < port_added; i++) {
130345948e49SRyan Zezeski 			uint_t grp_remove;
130445948e49SRyan Zezeski 
1305d62bc4baSyz 			port = aggr_grp_port_lookup(grp, ports[i].lp_linkid);
13067c478bd9Sstevel@tonic-gate 			ASSERT(port != NULL);
130745948e49SRyan Zezeski 
13087c478bd9Sstevel@tonic-gate 			if (grp->lg_started) {
1309da14cebeSEric Cheng 				mac_perim_enter_by_mh(port->lp_mh, &pmph);
1310da14cebeSEric Cheng 				(void) aggr_port_promisc(port, B_FALSE);
13117c478bd9Sstevel@tonic-gate 				aggr_port_stop(port);
1312da14cebeSEric Cheng 				mac_perim_exit(pmph);
13137c478bd9Sstevel@tonic-gate 			}
131445948e49SRyan Zezeski 
13150dc2366fSVenugopal Iyer 			aggr_rem_pseudo_tx_group(port, &grp->lg_tx_group);
131645948e49SRyan Zezeski 
131745948e49SRyan Zezeski 			/*
131845948e49SRyan Zezeski 			 * Only the last port could have a partial set
131945948e49SRyan Zezeski 			 * of groups added.
132045948e49SRyan Zezeski 			 */
132145948e49SRyan Zezeski 			grp_remove = (i + 1 == port_added) ? grp_added :
132245948e49SRyan Zezeski 			    grp->lg_rx_group_count;
132345948e49SRyan Zezeski 
132445948e49SRyan Zezeski 			for (uint_t j = 0; j < grp_remove; j++) {
132545948e49SRyan Zezeski 				aggr_rem_pseudo_rx_group(port,
132645948e49SRyan Zezeski 				    &grp->lg_rx_groups[j]);
132745948e49SRyan Zezeski 			}
132845948e49SRyan Zezeski 
13294deae11aSyz 			(void) aggr_grp_rem_port(grp, port, NULL, NULL);
13307c478bd9Sstevel@tonic-gate 		}
13317c478bd9Sstevel@tonic-gate 	}
13327c478bd9Sstevel@tonic-gate 
1333da14cebeSEric Cheng 	mac_perim_exit(mph);
13347c478bd9Sstevel@tonic-gate 	AGGR_GRP_REFRELE(grp);
13357c478bd9Sstevel@tonic-gate 	return (rc);
13367c478bd9Sstevel@tonic-gate }
13377c478bd9Sstevel@tonic-gate 
1338da14cebeSEric Cheng static int
aggr_grp_modify_common(aggr_grp_t * grp,uint8_t update_mask,uint32_t policy,boolean_t mac_fixed,const uchar_t * mac_addr,aggr_lacp_mode_t lacp_mode,aggr_lacp_timer_t lacp_timer)1339da14cebeSEric Cheng aggr_grp_modify_common(aggr_grp_t *grp, uint8_t update_mask, uint32_t policy,
1340da14cebeSEric Cheng     boolean_t mac_fixed, const uchar_t *mac_addr, aggr_lacp_mode_t lacp_mode,
1341da14cebeSEric Cheng     aggr_lacp_timer_t lacp_timer)
13427c478bd9Sstevel@tonic-gate {
13437c478bd9Sstevel@tonic-gate 	boolean_t mac_addr_changed = B_FALSE;
13444deae11aSyz 	boolean_t link_state_changed = B_FALSE;
1345da14cebeSEric Cheng 	mac_perim_handle_t pmph;
13467c478bd9Sstevel@tonic-gate 
1347da14cebeSEric Cheng 	ASSERT(MAC_PERIM_HELD(grp->lg_mh));
13487c478bd9Sstevel@tonic-gate 
13497c478bd9Sstevel@tonic-gate 	/* validate fixed address if specified */
13507c478bd9Sstevel@tonic-gate 	if ((update_mask & AGGR_MODIFY_MAC) && mac_fixed &&
13517c478bd9Sstevel@tonic-gate 	    ((bcmp(aggr_zero_mac, mac_addr, ETHERADDRL) == 0) ||
13527c478bd9Sstevel@tonic-gate 	    (mac_addr[0] & 0x01))) {
1353da14cebeSEric Cheng 		return (EINVAL);
13547c478bd9Sstevel@tonic-gate 	}
13557c478bd9Sstevel@tonic-gate 
13567c478bd9Sstevel@tonic-gate 	/* update policy if requested */
13577c478bd9Sstevel@tonic-gate 	if (update_mask & AGGR_MODIFY_POLICY)
13587c478bd9Sstevel@tonic-gate 		aggr_send_update_policy(grp, policy);
13597c478bd9Sstevel@tonic-gate 
13607c478bd9Sstevel@tonic-gate 	/* update unicast MAC address if requested */
13617c478bd9Sstevel@tonic-gate 	if (update_mask & AGGR_MODIFY_MAC) {
13627c478bd9Sstevel@tonic-gate 		if (mac_fixed) {
13637c478bd9Sstevel@tonic-gate 			/* user-supplied MAC address */
13647c478bd9Sstevel@tonic-gate 			grp->lg_mac_addr_port = NULL;
13657c478bd9Sstevel@tonic-gate 			if (bcmp(mac_addr, grp->lg_addr, ETHERADDRL) != 0) {
13667c478bd9Sstevel@tonic-gate 				bcopy(mac_addr, grp->lg_addr, ETHERADDRL);
13677c478bd9Sstevel@tonic-gate 				mac_addr_changed = B_TRUE;
13687c478bd9Sstevel@tonic-gate 			}
13697c478bd9Sstevel@tonic-gate 		} else if (grp->lg_addr_fixed) {
13707c478bd9Sstevel@tonic-gate 			/* switch from user-supplied to automatic */
13717c478bd9Sstevel@tonic-gate 			aggr_port_t *port = grp->lg_ports;
13727c478bd9Sstevel@tonic-gate 
1373da14cebeSEric Cheng 			mac_perim_enter_by_mh(port->lp_mh, &pmph);
13747c478bd9Sstevel@tonic-gate 			bcopy(port->lp_addr, grp->lg_addr, ETHERADDRL);
13757c478bd9Sstevel@tonic-gate 			grp->lg_mac_addr_port = port;
13767c478bd9Sstevel@tonic-gate 			mac_addr_changed = B_TRUE;
1377da14cebeSEric Cheng 			mac_perim_exit(pmph);
13787c478bd9Sstevel@tonic-gate 		}
13797c478bd9Sstevel@tonic-gate 		grp->lg_addr_fixed = mac_fixed;
13807c478bd9Sstevel@tonic-gate 	}
13817c478bd9Sstevel@tonic-gate 
13827c478bd9Sstevel@tonic-gate 	if (mac_addr_changed)
13834deae11aSyz 		link_state_changed = aggr_grp_update_ports_mac(grp);
13847c478bd9Sstevel@tonic-gate 
13857c478bd9Sstevel@tonic-gate 	if (update_mask & AGGR_MODIFY_LACP_MODE)
13867c478bd9Sstevel@tonic-gate 		aggr_lacp_update_mode(grp, lacp_mode);
13877c478bd9Sstevel@tonic-gate 
1388da14cebeSEric Cheng 	if (update_mask & AGGR_MODIFY_LACP_TIMER)
13897c478bd9Sstevel@tonic-gate 		aggr_lacp_update_timer(grp, lacp_timer);
13907c478bd9Sstevel@tonic-gate 
1391da14cebeSEric Cheng 	if (link_state_changed)
1392da14cebeSEric Cheng 		mac_link_update(grp->lg_mh, grp->lg_link_state);
13934deae11aSyz 
1394da14cebeSEric Cheng 	if (mac_addr_changed)
1395da14cebeSEric Cheng 		mac_unicst_update(grp->lg_mh, grp->lg_addr);
13964deae11aSyz 
1397da14cebeSEric Cheng 	return (0);
1398da14cebeSEric Cheng }
13994deae11aSyz 
1400da14cebeSEric Cheng /*
1401da14cebeSEric Cheng  * Update properties of an existing link aggregation group.
1402da14cebeSEric Cheng  */
1403da14cebeSEric Cheng int
aggr_grp_modify(datalink_id_t linkid,uint8_t update_mask,uint32_t policy,boolean_t mac_fixed,const uchar_t * mac_addr,aggr_lacp_mode_t lacp_mode,aggr_lacp_timer_t lacp_timer)1404da14cebeSEric Cheng aggr_grp_modify(datalink_id_t linkid, uint8_t update_mask, uint32_t policy,
1405da14cebeSEric Cheng     boolean_t mac_fixed, const uchar_t *mac_addr, aggr_lacp_mode_t lacp_mode,
1406da14cebeSEric Cheng     aggr_lacp_timer_t lacp_timer)
1407da14cebeSEric Cheng {
1408da14cebeSEric Cheng 	aggr_grp_t *grp = NULL;
1409da14cebeSEric Cheng 	mac_perim_handle_t mph;
1410da14cebeSEric Cheng 	int err;
1411da14cebeSEric Cheng 
1412da14cebeSEric Cheng 	/* get group corresponding to linkid */
1413da14cebeSEric Cheng 	rw_enter(&aggr_grp_lock, RW_READER);
1414da14cebeSEric Cheng 	if (mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(linkid),
1415da14cebeSEric Cheng 	    (mod_hash_val_t *)&grp) != 0) {
1416210db224Sericheng 		rw_exit(&aggr_grp_lock);
1417da14cebeSEric Cheng 		return (ENOENT);
14187c478bd9Sstevel@tonic-gate 	}
1419da14cebeSEric Cheng 	AGGR_GRP_REFHOLD(grp);
14207c478bd9Sstevel@tonic-gate 
1421da14cebeSEric Cheng 	/*
1422da14cebeSEric Cheng 	 * Hold the perimeter so that the aggregation won't be destroyed.
1423da14cebeSEric Cheng 	 */
1424da14cebeSEric Cheng 	mac_perim_enter_by_mh(grp->lg_mh, &mph);
1425da14cebeSEric Cheng 	rw_exit(&aggr_grp_lock);
14267c478bd9Sstevel@tonic-gate 
1427da14cebeSEric Cheng 	err = aggr_grp_modify_common(grp, update_mask, policy, mac_fixed,
1428da14cebeSEric Cheng 	    mac_addr, lacp_mode, lacp_timer);
1429da14cebeSEric Cheng 
1430da14cebeSEric Cheng 	mac_perim_exit(mph);
1431da14cebeSEric Cheng 	AGGR_GRP_REFRELE(grp);
1432da14cebeSEric Cheng 	return (err);
14337c478bd9Sstevel@tonic-gate }
14347c478bd9Sstevel@tonic-gate 
14357c478bd9Sstevel@tonic-gate /*
14367c478bd9Sstevel@tonic-gate  * Create a new link aggregation group upon request from administrator.
14377c478bd9Sstevel@tonic-gate  * Returns 0 on success, an errno on failure.
14387c478bd9Sstevel@tonic-gate  */
14397c478bd9Sstevel@tonic-gate int
aggr_grp_create(datalink_id_t linkid,uint32_t key,uint_t nports,laioc_port_t * ports,uint32_t policy,boolean_t mac_fixed,boolean_t force,uchar_t * mac_addr,aggr_lacp_mode_t lacp_mode,aggr_lacp_timer_t lacp_timer,cred_t * credp)1440d62bc4baSyz aggr_grp_create(datalink_id_t linkid, uint32_t key, uint_t nports,
1441d62bc4baSyz     laioc_port_t *ports, uint32_t policy, boolean_t mac_fixed, boolean_t force,
14422b24ab6bSSebastien Roy     uchar_t *mac_addr, aggr_lacp_mode_t lacp_mode, aggr_lacp_timer_t lacp_timer,
14432b24ab6bSSebastien Roy     cred_t *credp)
14447c478bd9Sstevel@tonic-gate {
14457c478bd9Sstevel@tonic-gate 	aggr_grp_t *grp = NULL;
14467c478bd9Sstevel@tonic-gate 	aggr_port_t *port;
1447adc52889SPaul Winder 	aggr_port_t *last_attached = NULL;
1448ba2e4443Sseb 	mac_register_t *mac;
14494deae11aSyz 	boolean_t link_state_changed;
1450adc52889SPaul Winder 	mac_perim_handle_t mph, pmph;
1451adc52889SPaul Winder 	datalink_id_t tempid;
1452adc52889SPaul Winder 	boolean_t mac_registered = B_FALSE;
1453*13810335SPaul Winder 	uint_t tx_ring_limit;
14547c478bd9Sstevel@tonic-gate 	int err;
1455adc52889SPaul Winder 	int i, j;
14560dc2366fSVenugopal Iyer 	kt_did_t tid = 0;
14577c478bd9Sstevel@tonic-gate 
14587c478bd9Sstevel@tonic-gate 	/* need at least one port */
14597c478bd9Sstevel@tonic-gate 	if (nports == 0)
14607c478bd9Sstevel@tonic-gate 		return (EINVAL);
14617c478bd9Sstevel@tonic-gate 
1462210db224Sericheng 	rw_enter(&aggr_grp_lock, RW_WRITER);
14637c478bd9Sstevel@tonic-gate 
1464d62bc4baSyz 	/* does a group with the same linkid already exist? */
1465d62bc4baSyz 	err = mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(linkid),
1466210db224Sericheng 	    (mod_hash_val_t *)&grp);
1467210db224Sericheng 	if (err == 0) {
1468210db224Sericheng 		rw_exit(&aggr_grp_lock);
14697c478bd9Sstevel@tonic-gate 		return (EEXIST);
14707c478bd9Sstevel@tonic-gate 	}
14717c478bd9Sstevel@tonic-gate 
14727c478bd9Sstevel@tonic-gate 	grp = kmem_cache_alloc(aggr_grp_cache, KM_SLEEP);
14737c478bd9Sstevel@tonic-gate 
14747c478bd9Sstevel@tonic-gate 	grp->lg_refs = 1;
14754deae11aSyz 	grp->lg_closing = B_FALSE;
1476d62bc4baSyz 	grp->lg_force = force;
1477d62bc4baSyz 	grp->lg_linkid = linkid;
14782b24ab6bSSebastien Roy 	grp->lg_zoneid = crgetzoneid(credp);
14797c478bd9Sstevel@tonic-gate 	grp->lg_ifspeed = 0;
14807c478bd9Sstevel@tonic-gate 	grp->lg_link_state = LINK_STATE_UNKNOWN;
14817c478bd9Sstevel@tonic-gate 	grp->lg_link_duplex = LINK_DUPLEX_UNKNOWN;
14827c478bd9Sstevel@tonic-gate 	grp->lg_started = B_FALSE;
14837c478bd9Sstevel@tonic-gate 	grp->lg_promisc = B_FALSE;
1484da14cebeSEric Cheng 	grp->lg_lacp_done = B_FALSE;
14850dc2366fSVenugopal Iyer 	grp->lg_tx_notify_done = B_FALSE;
1486da14cebeSEric Cheng 	grp->lg_lacp_head = grp->lg_lacp_tail = NULL;
1487da14cebeSEric Cheng 	grp->lg_lacp_rx_thread = thread_create(NULL, 0,
1488da14cebeSEric Cheng 	    aggr_lacp_rx_thread, grp, 0, &p0, TS_RUN, minclsyspri);
14890dc2366fSVenugopal Iyer 	grp->lg_tx_notify_thread = thread_create(NULL, 0,
14900dc2366fSVenugopal Iyer 	    aggr_tx_notify_thread, grp, 0, &p0, TS_RUN, minclsyspri);
14910dc2366fSVenugopal Iyer 	grp->lg_tx_blocked_rings = kmem_zalloc((sizeof (mac_ring_handle_t *) *
14920dc2366fSVenugopal Iyer 	    MAX_RINGS_PER_GROUP), KM_SLEEP);
14930dc2366fSVenugopal Iyer 	grp->lg_tx_blocked_cnt = 0;
149445948e49SRyan Zezeski 	bzero(&grp->lg_rx_groups,
149545948e49SRyan Zezeski 	    sizeof (aggr_pseudo_rx_group_t) * MAX_GROUPS_PER_PORT);
14960dc2366fSVenugopal Iyer 	bzero(&grp->lg_tx_group, sizeof (aggr_pseudo_tx_group_t));
14977c478bd9Sstevel@tonic-gate 	aggr_lacp_init_grp(grp);
14987c478bd9Sstevel@tonic-gate 
14997c478bd9Sstevel@tonic-gate 	/* add MAC ports to group */
15007c478bd9Sstevel@tonic-gate 	grp->lg_ports = NULL;
15017c478bd9Sstevel@tonic-gate 	grp->lg_nports = 0;
15027c478bd9Sstevel@tonic-gate 	grp->lg_nattached_ports = 0;
15037c478bd9Sstevel@tonic-gate 	grp->lg_ntx_ports = 0;
15047c478bd9Sstevel@tonic-gate 
1505d62bc4baSyz 	/*
1506d62bc4baSyz 	 * If key is not specified by the user, allocate the key.
1507d62bc4baSyz 	 */
1508d62bc4baSyz 	if ((key == 0) && ((key = (uint32_t)id_alloc(key_ids)) == 0)) {
1509d62bc4baSyz 		err = ENOMEM;
1510d62bc4baSyz 		goto bail;
1511d62bc4baSyz 	}
1512d62bc4baSyz 	grp->lg_key = key;
1513d62bc4baSyz 
15147c478bd9Sstevel@tonic-gate 	for (i = 0; i < nports; i++) {
151584de666eSRyan Zezeski 		err = aggr_grp_add_port(grp, ports[i].lp_linkid, force, &port);
15167c478bd9Sstevel@tonic-gate 		if (err != 0)
15177c478bd9Sstevel@tonic-gate 			goto bail;
15187c478bd9Sstevel@tonic-gate 	}
15197c478bd9Sstevel@tonic-gate 
152045948e49SRyan Zezeski 	grp->lg_rx_group_count = 1;
152145948e49SRyan Zezeski 
152245948e49SRyan Zezeski 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
152345948e49SRyan Zezeski 		uint_t num_rgroups;
152445948e49SRyan Zezeski 
152545948e49SRyan Zezeski 		mac_perim_enter_by_mh(port->lp_mh, &mph);
152645948e49SRyan Zezeski 		num_rgroups = mac_get_num_rx_groups(port->lp_mh);
152745948e49SRyan Zezeski 		mac_perim_exit(mph);
152845948e49SRyan Zezeski 
152945948e49SRyan Zezeski 		/*
153045948e49SRyan Zezeski 		 * Utilize all the groups in a port. If some ports
153145948e49SRyan Zezeski 		 * have less groups than others, then traffic destined
153245948e49SRyan Zezeski 		 * for the same unicast address may be HW classified
153345948e49SRyan Zezeski 		 * on some ports but SW classified by aggr when
153445948e49SRyan Zezeski 		 * arriving on other ports.
153545948e49SRyan Zezeski 		 */
153645948e49SRyan Zezeski 		grp->lg_rx_group_count = MAX(grp->lg_rx_group_count,
153745948e49SRyan Zezeski 		    num_rgroups);
153845948e49SRyan Zezeski 	}
153945948e49SRyan Zezeski 
154045948e49SRyan Zezeski 	/*
154145948e49SRyan Zezeski 	 * There could be cases where the hardware provides more
154245948e49SRyan Zezeski 	 * groups than aggr can support. Make sure we never go above
154345948e49SRyan Zezeski 	 * the max aggr can support.
154445948e49SRyan Zezeski 	 */
154545948e49SRyan Zezeski 	grp->lg_rx_group_count = MIN(grp->lg_rx_group_count,
154645948e49SRyan Zezeski 	    MAX_GROUPS_PER_PORT);
154745948e49SRyan Zezeski 
154845948e49SRyan Zezeski 	ASSERT3U(grp->lg_rx_group_count, >, 0);
154945948e49SRyan Zezeski 	for (i = 0; i < MAX_GROUPS_PER_PORT; i++) {
155045948e49SRyan Zezeski 		grp->lg_rx_groups[i].arg_index = i;
155145948e49SRyan Zezeski 		grp->lg_rx_groups[i].arg_untagged = 0;
155245948e49SRyan Zezeski 		list_create(&(grp->lg_rx_groups[i].arg_vlans),
155345948e49SRyan Zezeski 		    sizeof (aggr_vlan_t), offsetof(aggr_vlan_t, av_link));
155445948e49SRyan Zezeski 	}
155545948e49SRyan Zezeski 
15567c478bd9Sstevel@tonic-gate 	/*
15577c478bd9Sstevel@tonic-gate 	 * If no explicit MAC address was specified by the administrator,
15587c478bd9Sstevel@tonic-gate 	 * set it to the MAC address of the first port.
15597c478bd9Sstevel@tonic-gate 	 */
15607c478bd9Sstevel@tonic-gate 	grp->lg_addr_fixed = mac_fixed;
15617c478bd9Sstevel@tonic-gate 	if (grp->lg_addr_fixed) {
15627c478bd9Sstevel@tonic-gate 		/* validate specified address */
15637c478bd9Sstevel@tonic-gate 		if (bcmp(aggr_zero_mac, mac_addr, ETHERADDRL) == 0) {
15647c478bd9Sstevel@tonic-gate 			err = EINVAL;
15657c478bd9Sstevel@tonic-gate 			goto bail;
15667c478bd9Sstevel@tonic-gate 		}
15677c478bd9Sstevel@tonic-gate 		bcopy(mac_addr, grp->lg_addr, ETHERADDRL);
15687c478bd9Sstevel@tonic-gate 	} else {
15697c478bd9Sstevel@tonic-gate 		bcopy(grp->lg_ports->lp_addr, grp->lg_addr, ETHERADDRL);
15707c478bd9Sstevel@tonic-gate 		grp->lg_mac_addr_port = grp->lg_ports;
15717c478bd9Sstevel@tonic-gate 	}
15727c478bd9Sstevel@tonic-gate 
157345948e49SRyan Zezeski 	/* Set the initial group capabilities. */
15747c478bd9Sstevel@tonic-gate 	aggr_grp_capab_set(grp);
15757c478bd9Sstevel@tonic-gate 
1576d62bc4baSyz 	if ((mac = mac_alloc(MAC_VERSION)) == NULL) {
1577d62bc4baSyz 		err = ENOMEM;
1578ba2e4443Sseb 		goto bail;
1579d62bc4baSyz 	}
1580ba2e4443Sseb 	mac->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
1581ba2e4443Sseb 	mac->m_driver = grp;
1582ba2e4443Sseb 	mac->m_dip = aggr_dip;
1583d62bc4baSyz 	mac->m_instance = grp->lg_key > AGGR_MAX_KEY ? (uint_t)-1 : grp->lg_key;
1584ba2e4443Sseb 	mac->m_src_addr = grp->lg_addr;
1585ba2e4443Sseb 	mac->m_callbacks = &aggr_m_callbacks;
1586ba2e4443Sseb 	mac->m_min_sdu = 0;
1587f4420ae7Snd 	mac->m_max_sdu = grp->lg_max_sdu = aggr_grp_max_sdu(grp);
1588d62bc4baSyz 	mac->m_margin = aggr_grp_max_margin(grp);
1589da14cebeSEric Cheng 	mac->m_v12n = MAC_VIRT_LEVEL1;
1590ba2e4443Sseb 	err = mac_register(mac, &grp->lg_mh);
1591ba2e4443Sseb 	mac_free(mac);
1592ba2e4443Sseb 	if (err != 0)
15937c478bd9Sstevel@tonic-gate 		goto bail;
15947c478bd9Sstevel@tonic-gate 
15952b24ab6bSSebastien Roy 	err = dls_devnet_create(grp->lg_mh, grp->lg_linkid, crgetzoneid(credp));
15962b24ab6bSSebastien Roy 	if (err != 0) {
1597d62bc4baSyz 		(void) mac_unregister(grp->lg_mh);
1598da14cebeSEric Cheng 		grp->lg_mh = NULL;
1599d62bc4baSyz 		goto bail;
1600d62bc4baSyz 	}
1601d62bc4baSyz 
1602adc52889SPaul Winder 	mac_registered = B_TRUE;
1603adc52889SPaul Winder 
1604da14cebeSEric Cheng 	mac_perim_enter_by_mh(grp->lg_mh, &mph);
1605da14cebeSEric Cheng 
1606da14cebeSEric Cheng 	/*
1607da14cebeSEric Cheng 	 * Update the MAC address of the constituent ports.
1608da14cebeSEric Cheng 	 * None of the port is attached at this time, the link state of the
1609da14cebeSEric Cheng 	 * aggregation will not change.
161045948e49SRyan Zezeski 	 *
161145948e49SRyan Zezeski 	 * All ports take on the primary MAC address of the aggr
161245948e49SRyan Zezeski 	 * (lg_aggr). At this point, none of the ports are attached;
161345948e49SRyan Zezeski 	 * thus the link state of the aggregation will not change.
1614da14cebeSEric Cheng 	 */
1615da14cebeSEric Cheng 	link_state_changed = aggr_grp_update_ports_mac(grp);
1616da14cebeSEric Cheng 	ASSERT(!link_state_changed);
1617da14cebeSEric Cheng 
161845948e49SRyan Zezeski 	/* Update outbound load balancing policy. */
1619da14cebeSEric Cheng 	aggr_send_update_policy(grp, policy);
1620da14cebeSEric Cheng 
162145948e49SRyan Zezeski 	/* Set LACP mode. */
16227c478bd9Sstevel@tonic-gate 	aggr_lacp_set_mode(grp, lacp_mode, lacp_timer);
16237c478bd9Sstevel@tonic-gate 
1624*13810335SPaul Winder 	/*
1625*13810335SPaul Winder 	 * The pseudo Tx group holds a maximum of MAX_RINGS_PER_GROUP
1626*13810335SPaul Winder 	 * rings, when all the Tx rings of all the ports are accumulated
1627*13810335SPaul Winder 	 * it is conceivable this limit is exceeded. We try and prevent
1628*13810335SPaul Winder 	 * this by limiting the number of rings an individual port will use.
1629*13810335SPaul Winder 	 *
1630*13810335SPaul Winder 	 * - When an aggr is first created, we will not let an
1631*13810335SPaul Winder 	 *   individual port use more than MAX_RINGS_PER_GROUP/nports
1632*13810335SPaul Winder 	 *   rings.
1633*13810335SPaul Winder 	 * - As ports are added to an existing aggr, each of the
1634*13810335SPaul Winder 	 *   ports will not use more than MAX_RINGS_PER_GROUP/nports_high.
1635*13810335SPaul Winder 	 *   Where nports_high is the highest number of ports the aggr has
1636*13810335SPaul Winder 	 *   held (including any ports being added). This may involve
1637*13810335SPaul Winder 	 *   trimming rings from existing ports.
1638*13810335SPaul Winder 	 */
1639*13810335SPaul Winder 
1640*13810335SPaul Winder 	/* Leave room for 4 ports */
1641*13810335SPaul Winder 	tx_ring_limit = MAX_RINGS_PER_GROUP / MAX(4, nports);
1642*13810335SPaul Winder 
1643c615009fSyz 	/*
1644c615009fSyz 	 * Attach each port if necessary.
1645c615009fSyz 	 */
1646392b1d6eSyz 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
1647da14cebeSEric Cheng 		/*
164845948e49SRyan Zezeski 		 * Create the pseudo ring for each HW ring of the
164945948e49SRyan Zezeski 		 * underlying port. Note that this is done after the
165045948e49SRyan Zezeski 		 * aggr registers its MAC.
1651da14cebeSEric Cheng 		 */
1652*13810335SPaul Winder 		err = aggr_add_pseudo_tx_group(port, &grp->lg_tx_group,
1653*13810335SPaul Winder 		    tx_ring_limit);
1654adc52889SPaul Winder 
1655adc52889SPaul Winder 		if (err != 0) {
1656adc52889SPaul Winder 			mac_perim_exit(mph);
1657adc52889SPaul Winder 			goto bail;
1658adc52889SPaul Winder 		}
165945948e49SRyan Zezeski 
166045948e49SRyan Zezeski 		for (i = 0; i < grp->lg_rx_group_count; i++) {
1661adc52889SPaul Winder 			err = aggr_add_pseudo_rx_group(port,
1662adc52889SPaul Winder 			    &grp->lg_rx_groups[i]);
1663adc52889SPaul Winder 
1664adc52889SPaul Winder 			if (err != 0) {
1665adc52889SPaul Winder 				/*
1666adc52889SPaul Winder 				 * Undo what we have added for the current
1667adc52889SPaul Winder 				 * port.
1668adc52889SPaul Winder 				 */
1669adc52889SPaul Winder 				aggr_rem_pseudo_tx_group(port,
1670adc52889SPaul Winder 				    &grp->lg_tx_group);
1671adc52889SPaul Winder 
1672adc52889SPaul Winder 				for (j = 0; j < i; j++) {
1673adc52889SPaul Winder 					aggr_rem_pseudo_rx_group(port,
1674adc52889SPaul Winder 					    &grp->lg_rx_groups[j]);
1675adc52889SPaul Winder 				}
1676adc52889SPaul Winder 
1677adc52889SPaul Winder 				mac_perim_exit(mph);
1678adc52889SPaul Winder 				goto bail;
1679adc52889SPaul Winder 			}
168045948e49SRyan Zezeski 		}
168145948e49SRyan Zezeski 
1682da14cebeSEric Cheng 		if (aggr_port_notify_link(grp, port))
1683392b1d6eSyz 			link_state_changed = B_TRUE;
1684da14cebeSEric Cheng 
1685da14cebeSEric Cheng 		/*
1686da14cebeSEric Cheng 		 * Initialize the callback functions for this port.
1687da14cebeSEric Cheng 		 */
1688da14cebeSEric Cheng 		aggr_port_init_callbacks(port);
1689adc52889SPaul Winder 
1690adc52889SPaul Winder 		last_attached = port;
1691392b1d6eSyz 	}
1692392b1d6eSyz 
1693392b1d6eSyz 	if (link_state_changed)
1694392b1d6eSyz 		mac_link_update(grp->lg_mh, grp->lg_link_state);
1695c615009fSyz 
16967c478bd9Sstevel@tonic-gate 	/* add new group to hash table */
1697d62bc4baSyz 	err = mod_hash_insert(aggr_grp_hash, GRP_HASH_KEY(linkid),
1698210db224Sericheng 	    (mod_hash_val_t)grp);
16997c478bd9Sstevel@tonic-gate 	ASSERT(err == 0);
1700210db224Sericheng 	aggr_grp_cnt++;
17017c478bd9Sstevel@tonic-gate 
1702da14cebeSEric Cheng 	mac_perim_exit(mph);
1703210db224Sericheng 	rw_exit(&aggr_grp_lock);
17047c478bd9Sstevel@tonic-gate 	return (0);
17057c478bd9Sstevel@tonic-gate 
17067c478bd9Sstevel@tonic-gate bail:
1707da14cebeSEric Cheng 	grp->lg_closing = B_TRUE;
17087c478bd9Sstevel@tonic-gate 
1709da14cebeSEric Cheng 	/*
1710da14cebeSEric Cheng 	 * Inform the lacp_rx thread to exit.
1711da14cebeSEric Cheng 	 */
1712da14cebeSEric Cheng 	mutex_enter(&grp->lg_lacp_lock);
1713da14cebeSEric Cheng 	grp->lg_lacp_done = B_TRUE;
1714da14cebeSEric Cheng 	cv_signal(&grp->lg_lacp_cv);
1715da14cebeSEric Cheng 	while (grp->lg_lacp_rx_thread != NULL)
1716da14cebeSEric Cheng 		cv_wait(&grp->lg_lacp_cv, &grp->lg_lacp_lock);
1717da14cebeSEric Cheng 	mutex_exit(&grp->lg_lacp_lock);
17180dc2366fSVenugopal Iyer 	/*
17190dc2366fSVenugopal Iyer 	 * Inform the tx_notify thread to exit.
17200dc2366fSVenugopal Iyer 	 */
17210dc2366fSVenugopal Iyer 	mutex_enter(&grp->lg_tx_flowctl_lock);
17220dc2366fSVenugopal Iyer 	if (grp->lg_tx_notify_thread != NULL) {
17230dc2366fSVenugopal Iyer 		tid = grp->lg_tx_notify_thread->t_did;
17240dc2366fSVenugopal Iyer 		grp->lg_tx_notify_done = B_TRUE;
17250dc2366fSVenugopal Iyer 		cv_signal(&grp->lg_tx_flowctl_cv);
17260dc2366fSVenugopal Iyer 	}
17270dc2366fSVenugopal Iyer 	mutex_exit(&grp->lg_tx_flowctl_lock);
17280dc2366fSVenugopal Iyer 	if (tid != 0)
17290dc2366fSVenugopal Iyer 		thread_join(tid);
17300dc2366fSVenugopal Iyer 
1731adc52889SPaul Winder 	if (mac_registered) {
1732adc52889SPaul Winder 		(void) dls_devnet_destroy(grp->lg_mh, &tempid, B_TRUE);
1733adc52889SPaul Winder 		(void) mac_disable(grp->lg_mh);
1734adc52889SPaul Winder 
1735adc52889SPaul Winder 		if (last_attached != NULL) {
1736adc52889SPaul Winder 			/*
1737adc52889SPaul Winder 			 * Detach and clean up ports added.
1738adc52889SPaul Winder 			 */
1739adc52889SPaul Winder 			mac_perim_enter_by_mh(grp->lg_mh, &mph);
1740adc52889SPaul Winder 
1741adc52889SPaul Winder 			for (port = grp->lg_ports; ; port = port->lp_next) {
1742adc52889SPaul Winder 				mac_perim_enter_by_mh(port->lp_mh, &pmph);
1743adc52889SPaul Winder 				(void) aggr_grp_detach_port(grp, port);
1744adc52889SPaul Winder 				mac_perim_exit(pmph);
1745adc52889SPaul Winder 
1746adc52889SPaul Winder 				aggr_rem_pseudo_tx_group(port,
1747adc52889SPaul Winder 				    &grp->lg_tx_group);
1748adc52889SPaul Winder 
1749adc52889SPaul Winder 				for (i = 0; i < grp->lg_rx_group_count; i++) {
1750adc52889SPaul Winder 					aggr_rem_pseudo_rx_group(port,
1751adc52889SPaul Winder 					    &grp->lg_rx_groups[i]);
1752adc52889SPaul Winder 				}
1753adc52889SPaul Winder 				if (port == last_attached)
1754adc52889SPaul Winder 					break;
1755adc52889SPaul Winder 			}
1756adc52889SPaul Winder 
1757adc52889SPaul Winder 			mac_perim_exit(mph);
1758adc52889SPaul Winder 		}
1759adc52889SPaul Winder 
1760adc52889SPaul Winder 		(void) mac_unregister(grp->lg_mh);
1761adc52889SPaul Winder 	}
1762adc52889SPaul Winder 
1763adc52889SPaul Winder 	port = grp->lg_ports;
1764adc52889SPaul Winder 	while (port != NULL) {
1765adc52889SPaul Winder 		aggr_port_t *cport;
1766adc52889SPaul Winder 
1767adc52889SPaul Winder 		cport = port->lp_next;
1768adc52889SPaul Winder 		aggr_port_delete(port);
1769adc52889SPaul Winder 		port = cport;
1770adc52889SPaul Winder 	}
1771adc52889SPaul Winder 
17720dc2366fSVenugopal Iyer 	kmem_free(grp->lg_tx_blocked_rings,
17730dc2366fSVenugopal Iyer 	    (sizeof (mac_ring_handle_t *) * MAX_RINGS_PER_GROUP));
1774210db224Sericheng 	rw_exit(&aggr_grp_lock);
1775da14cebeSEric Cheng 	AGGR_GRP_REFRELE(grp);
17767c478bd9Sstevel@tonic-gate 	return (err);
17777c478bd9Sstevel@tonic-gate }
17787c478bd9Sstevel@tonic-gate 
17797c478bd9Sstevel@tonic-gate /*
1780d62bc4baSyz  * Return a pointer to the member of a group with specified linkid.
17817c478bd9Sstevel@tonic-gate  */
17827c478bd9Sstevel@tonic-gate static aggr_port_t *
aggr_grp_port_lookup(aggr_grp_t * grp,datalink_id_t linkid)1783d62bc4baSyz aggr_grp_port_lookup(aggr_grp_t *grp, datalink_id_t linkid)
17847c478bd9Sstevel@tonic-gate {
17857c478bd9Sstevel@tonic-gate 	aggr_port_t *port;
17867c478bd9Sstevel@tonic-gate 
1787da14cebeSEric Cheng 	ASSERT(MAC_PERIM_HELD(grp->lg_mh));
17887c478bd9Sstevel@tonic-gate 
17897c478bd9Sstevel@tonic-gate 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
1790d62bc4baSyz 		if (port->lp_linkid == linkid)
17917c478bd9Sstevel@tonic-gate 			break;
17927c478bd9Sstevel@tonic-gate 	}
17937c478bd9Sstevel@tonic-gate 
17947c478bd9Sstevel@tonic-gate 	return (port);
17957c478bd9Sstevel@tonic-gate }
17967c478bd9Sstevel@tonic-gate 
17977c478bd9Sstevel@tonic-gate /*
17987c478bd9Sstevel@tonic-gate  * Stop, detach and remove a port from a link aggregation group.
17997c478bd9Sstevel@tonic-gate  */
18007c478bd9Sstevel@tonic-gate static int
aggr_grp_rem_port(aggr_grp_t * grp,aggr_port_t * port,boolean_t * mac_addr_changedp,boolean_t * link_state_changedp)18014deae11aSyz aggr_grp_rem_port(aggr_grp_t *grp, aggr_port_t *port,
18024deae11aSyz     boolean_t *mac_addr_changedp, boolean_t *link_state_changedp)
18037c478bd9Sstevel@tonic-gate {
18044deae11aSyz 	int rc = 0;
18057c478bd9Sstevel@tonic-gate 	aggr_port_t **pport;
18064deae11aSyz 	boolean_t mac_addr_changed = B_FALSE;
18074deae11aSyz 	boolean_t link_state_changed = B_FALSE;
1808da14cebeSEric Cheng 	mac_perim_handle_t mph;
18097c478bd9Sstevel@tonic-gate 	uint64_t val;
18107c478bd9Sstevel@tonic-gate 	uint_t i;
1811ba2e4443Sseb 	uint_t stat;
18127c478bd9Sstevel@tonic-gate 
1813da14cebeSEric Cheng 	ASSERT(MAC_PERIM_HELD(grp->lg_mh));
18147c478bd9Sstevel@tonic-gate 	ASSERT(grp->lg_nports > 1);
18154deae11aSyz 	ASSERT(!grp->lg_closing);
18167c478bd9Sstevel@tonic-gate 
18177c478bd9Sstevel@tonic-gate 	/* unlink port */
18187c478bd9Sstevel@tonic-gate 	for (pport = &grp->lg_ports; *pport != port;
18197c478bd9Sstevel@tonic-gate 	    pport = &(*pport)->lp_next) {
18204deae11aSyz 		if (*pport == NULL) {
18214deae11aSyz 			rc = ENOENT;
18224deae11aSyz 			goto done;
18234deae11aSyz 		}
18247c478bd9Sstevel@tonic-gate 	}
18257c478bd9Sstevel@tonic-gate 	*pport = port->lp_next;
18267c478bd9Sstevel@tonic-gate 
1827da14cebeSEric Cheng 	mac_perim_enter_by_mh(port->lp_mh, &mph);
18287c478bd9Sstevel@tonic-gate 
18297c478bd9Sstevel@tonic-gate 	/*
18307c478bd9Sstevel@tonic-gate 	 * If the MAC address of the port being removed was assigned
18317c478bd9Sstevel@tonic-gate 	 * to the group, update the group MAC address
18327c478bd9Sstevel@tonic-gate 	 * using the MAC address of a different port.
18337c478bd9Sstevel@tonic-gate 	 */
18347c478bd9Sstevel@tonic-gate 	if (!grp->lg_addr_fixed && grp->lg_mac_addr_port == port) {
18357c478bd9Sstevel@tonic-gate 		/*
18367c478bd9Sstevel@tonic-gate 		 * Set the MAC address of the group to the
18377c478bd9Sstevel@tonic-gate 		 * MAC address of its first port.
18387c478bd9Sstevel@tonic-gate 		 */
18397c478bd9Sstevel@tonic-gate 		bcopy(grp->lg_ports->lp_addr, grp->lg_addr, ETHERADDRL);
18407c478bd9Sstevel@tonic-gate 		grp->lg_mac_addr_port = grp->lg_ports;
18414deae11aSyz 		mac_addr_changed = B_TRUE;
18427c478bd9Sstevel@tonic-gate 	}
18437c478bd9Sstevel@tonic-gate 
1844da14cebeSEric Cheng 	link_state_changed = aggr_grp_detach_port(grp, port);
18457c478bd9Sstevel@tonic-gate 
18467c478bd9Sstevel@tonic-gate 	/*
1847ba2e4443Sseb 	 * Add the counter statistics of the ports while it was aggregated
1848ba2e4443Sseb 	 * to the group's residual statistics.  This is done by obtaining
1849ba2e4443Sseb 	 * the current counter from the underlying MAC then subtracting the
1850ba2e4443Sseb 	 * value of the counter at the moment it was added to the
1851ba2e4443Sseb 	 * aggregation.
18527c478bd9Sstevel@tonic-gate 	 */
1853da14cebeSEric Cheng 	for (i = 0; i < MAC_NSTAT; i++) {
1854ba2e4443Sseb 		stat = i + MAC_STAT_MIN;
1855ba2e4443Sseb 		if (!MAC_STAT_ISACOUNTER(stat))
18567c478bd9Sstevel@tonic-gate 			continue;
1857ba2e4443Sseb 		val = aggr_port_stat(port, stat);
18587c478bd9Sstevel@tonic-gate 		val -= port->lp_stat[i];
185984de666eSRyan Zezeski 		mutex_enter(&grp->lg_stat_lock);
18607c478bd9Sstevel@tonic-gate 		grp->lg_stat[i] += val;
186184de666eSRyan Zezeski 		mutex_exit(&grp->lg_stat_lock);
18627c478bd9Sstevel@tonic-gate 	}
1863da14cebeSEric Cheng 	for (i = 0; i < ETHER_NSTAT; i++) {
1864ba2e4443Sseb 		stat = i + MACTYPE_STAT_MIN;
1865ba2e4443Sseb 		if (!ETHER_STAT_ISACOUNTER(stat))
18661f8aaf0dSethindra 			continue;
1867ba2e4443Sseb 		val = aggr_port_stat(port, stat);
1868ba2e4443Sseb 		val -= port->lp_ether_stat[i];
186984de666eSRyan Zezeski 		mutex_enter(&grp->lg_stat_lock);
1870ba2e4443Sseb 		grp->lg_ether_stat[i] += val;
187184de666eSRyan Zezeski 		mutex_exit(&grp->lg_stat_lock);
1872ba2e4443Sseb 	}
18737c478bd9Sstevel@tonic-gate 
18747c478bd9Sstevel@tonic-gate 	grp->lg_nports--;
1875da14cebeSEric Cheng 	mac_perim_exit(mph);
18767c478bd9Sstevel@tonic-gate 
18770dc2366fSVenugopal Iyer 	aggr_rem_pseudo_tx_group(port, &grp->lg_tx_group);
18787c478bd9Sstevel@tonic-gate 	aggr_port_delete(port);
18797c478bd9Sstevel@tonic-gate 
18807c478bd9Sstevel@tonic-gate 	/*
18817c478bd9Sstevel@tonic-gate 	 * If the group MAC address has changed, update the MAC address of
1882d62bc4baSyz 	 * the remaining constituent ports according to the new MAC
18837c478bd9Sstevel@tonic-gate 	 * address of the group.
18847c478bd9Sstevel@tonic-gate 	 */
1885392b1d6eSyz 	if (mac_addr_changed && aggr_grp_update_ports_mac(grp))
1886392b1d6eSyz 		link_state_changed = B_TRUE;
18877c478bd9Sstevel@tonic-gate 
18884deae11aSyz done:
18894deae11aSyz 	if (mac_addr_changedp != NULL)
18904deae11aSyz 		*mac_addr_changedp = mac_addr_changed;
18914deae11aSyz 	if (link_state_changedp != NULL)
18924deae11aSyz 		*link_state_changedp = link_state_changed;
18937c478bd9Sstevel@tonic-gate 
18944deae11aSyz 	return (rc);
18957c478bd9Sstevel@tonic-gate }
18967c478bd9Sstevel@tonic-gate 
18977c478bd9Sstevel@tonic-gate /*
18987c478bd9Sstevel@tonic-gate  * Remove one or more ports from an existing link aggregation group.
18997c478bd9Sstevel@tonic-gate  */
19007c478bd9Sstevel@tonic-gate int
aggr_grp_rem_ports(datalink_id_t linkid,uint_t nports,laioc_port_t * ports)1901d62bc4baSyz aggr_grp_rem_ports(datalink_id_t linkid, uint_t nports, laioc_port_t *ports)
19027c478bd9Sstevel@tonic-gate {
19030a36db39SPaul Winder 	int rc = 0;
19040a36db39SPaul Winder 	uint_t i;
19057c478bd9Sstevel@tonic-gate 	aggr_grp_t *grp = NULL;
19067c478bd9Sstevel@tonic-gate 	aggr_port_t *port;
19074deae11aSyz 	boolean_t mac_addr_update = B_FALSE, mac_addr_changed;
19084deae11aSyz 	boolean_t link_state_update = B_FALSE, link_state_changed;
1909da14cebeSEric Cheng 	mac_perim_handle_t mph, pmph;
19107c478bd9Sstevel@tonic-gate 
1911d62bc4baSyz 	/* get group corresponding to linkid */
1912210db224Sericheng 	rw_enter(&aggr_grp_lock, RW_READER);
1913d62bc4baSyz 	if (mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(linkid),
1914210db224Sericheng 	    (mod_hash_val_t *)&grp) != 0) {
1915210db224Sericheng 		rw_exit(&aggr_grp_lock);
1916210db224Sericheng 		return (ENOENT);
19177c478bd9Sstevel@tonic-gate 	}
19187c478bd9Sstevel@tonic-gate 	AGGR_GRP_REFHOLD(grp);
1919210db224Sericheng 
1920da14cebeSEric Cheng 	/*
1921da14cebeSEric Cheng 	 * Hold the perimeter so that the aggregation won't be destroyed.
1922da14cebeSEric Cheng 	 */
1923da14cebeSEric Cheng 	mac_perim_enter_by_mh(grp->lg_mh, &mph);
1924da14cebeSEric Cheng 	rw_exit(&aggr_grp_lock);
19257c478bd9Sstevel@tonic-gate 
19267c478bd9Sstevel@tonic-gate 	/* we need to keep at least one port per group */
19277c478bd9Sstevel@tonic-gate 	if (nports >= grp->lg_nports) {
19287c478bd9Sstevel@tonic-gate 		rc = EINVAL;
19297c478bd9Sstevel@tonic-gate 		goto bail;
19307c478bd9Sstevel@tonic-gate 	}
19317c478bd9Sstevel@tonic-gate 
19327c478bd9Sstevel@tonic-gate 	/* first verify that all the groups are valid */
19337c478bd9Sstevel@tonic-gate 	for (i = 0; i < nports; i++) {
1934d62bc4baSyz 		if (aggr_grp_port_lookup(grp, ports[i].lp_linkid) == NULL) {
19357c478bd9Sstevel@tonic-gate 			/* port not found */
19367c478bd9Sstevel@tonic-gate 			rc = ENOENT;
19377c478bd9Sstevel@tonic-gate 			goto bail;
19387c478bd9Sstevel@tonic-gate 		}
19397c478bd9Sstevel@tonic-gate 	}
19407c478bd9Sstevel@tonic-gate 
1941da14cebeSEric Cheng 	/* clear the promiscous mode for the specified ports */
1942da14cebeSEric Cheng 	for (i = 0; i < nports && rc == 0; i++) {
1943da14cebeSEric Cheng 		/* lookup port */
1944da14cebeSEric Cheng 		port = aggr_grp_port_lookup(grp, ports[i].lp_linkid);
1945da14cebeSEric Cheng 		ASSERT(port != NULL);
1946da14cebeSEric Cheng 
1947da14cebeSEric Cheng 		mac_perim_enter_by_mh(port->lp_mh, &pmph);
1948da14cebeSEric Cheng 		rc = aggr_port_promisc(port, B_FALSE);
1949da14cebeSEric Cheng 		mac_perim_exit(pmph);
1950da14cebeSEric Cheng 	}
1951da14cebeSEric Cheng 	if (rc != 0) {
1952da14cebeSEric Cheng 		for (i = 0; i < nports; i++) {
1953da14cebeSEric Cheng 			port = aggr_grp_port_lookup(grp,
1954da14cebeSEric Cheng 			    ports[i].lp_linkid);
1955da14cebeSEric Cheng 			ASSERT(port != NULL);
1956da14cebeSEric Cheng 
1957da14cebeSEric Cheng 			/*
1958da14cebeSEric Cheng 			 * Turn the promiscuous mode back on if it is required
1959da14cebeSEric Cheng 			 * to receive the non-primary address over a port, or
1960da14cebeSEric Cheng 			 * the promiscous mode is enabled over the aggr.
1961da14cebeSEric Cheng 			 */
1962da14cebeSEric Cheng 			mac_perim_enter_by_mh(port->lp_mh, &pmph);
1963da14cebeSEric Cheng 			if (port->lp_started && (grp->lg_promisc ||
1964da14cebeSEric Cheng 			    port->lp_prom_addr != NULL)) {
1965da14cebeSEric Cheng 				(void) aggr_port_promisc(port, B_TRUE);
1966da14cebeSEric Cheng 			}
1967da14cebeSEric Cheng 			mac_perim_exit(pmph);
1968da14cebeSEric Cheng 		}
1969da14cebeSEric Cheng 		goto bail;
1970da14cebeSEric Cheng 	}
1971da14cebeSEric Cheng 
19727c478bd9Sstevel@tonic-gate 	/* remove the specified ports from group */
1973da14cebeSEric Cheng 	for (i = 0; i < nports; i++) {
19747c478bd9Sstevel@tonic-gate 		/* lookup port */
1975d62bc4baSyz 		port = aggr_grp_port_lookup(grp, ports[i].lp_linkid);
19767c478bd9Sstevel@tonic-gate 		ASSERT(port != NULL);
19777c478bd9Sstevel@tonic-gate 
19787c478bd9Sstevel@tonic-gate 		/* stop port if group has already been started */
19797c478bd9Sstevel@tonic-gate 		if (grp->lg_started) {
1980da14cebeSEric Cheng 			mac_perim_enter_by_mh(port->lp_mh, &pmph);
19817c478bd9Sstevel@tonic-gate 			aggr_port_stop(port);
1982da14cebeSEric Cheng 			mac_perim_exit(pmph);
19837c478bd9Sstevel@tonic-gate 		}
19847c478bd9Sstevel@tonic-gate 
19850dc2366fSVenugopal Iyer 		/*
19860dc2366fSVenugopal Iyer 		 * aggr_rem_pseudo_tx_group() is not called here. Instead
19870dc2366fSVenugopal Iyer 		 * it is called from inside aggr_grp_rem_port() after the
19880dc2366fSVenugopal Iyer 		 * port has been detached. The reason is that
19890dc2366fSVenugopal Iyer 		 * aggr_rem_pseudo_tx_group() removes one ring at a time
19900dc2366fSVenugopal Iyer 		 * and if there is still traffic going on, then there
19910dc2366fSVenugopal Iyer 		 * is the possibility of aggr_find_tx_ring() returning a
19920dc2366fSVenugopal Iyer 		 * removed ring for transmission. Once the port has been
19930dc2366fSVenugopal Iyer 		 * detached, that port will not be used and
19940dc2366fSVenugopal Iyer 		 * aggr_find_tx_ring() will not return any rings
19950dc2366fSVenugopal Iyer 		 * belonging to it.
19960dc2366fSVenugopal Iyer 		 */
19970a36db39SPaul Winder 		for (uint_t j = 0; j < grp->lg_rx_group_count; j++)
19980a36db39SPaul Winder 			aggr_rem_pseudo_rx_group(port, &grp->lg_rx_groups[j]);
19990dc2366fSVenugopal Iyer 
20007c478bd9Sstevel@tonic-gate 		/* remove port from group */
20014deae11aSyz 		rc = aggr_grp_rem_port(grp, port, &mac_addr_changed,
20024deae11aSyz 		    &link_state_changed);
20037c478bd9Sstevel@tonic-gate 		ASSERT(rc == 0);
20044deae11aSyz 		mac_addr_update = mac_addr_update || mac_addr_changed;
20054deae11aSyz 		link_state_update = link_state_update || link_state_changed;
20067c478bd9Sstevel@tonic-gate 	}
20077c478bd9Sstevel@tonic-gate 
20087c478bd9Sstevel@tonic-gate bail:
2009da14cebeSEric Cheng 	if (mac_addr_update)
2010da14cebeSEric Cheng 		mac_unicst_update(grp->lg_mh, grp->lg_addr);
2011da14cebeSEric Cheng 	if (link_state_update)
2012da14cebeSEric Cheng 		mac_link_update(grp->lg_mh, grp->lg_link_state);
2013da14cebeSEric Cheng 
2014da14cebeSEric Cheng 	mac_perim_exit(mph);
20157c478bd9Sstevel@tonic-gate 	AGGR_GRP_REFRELE(grp);
20167c478bd9Sstevel@tonic-gate 
20177c478bd9Sstevel@tonic-gate 	return (rc);
20187c478bd9Sstevel@tonic-gate }
20197c478bd9Sstevel@tonic-gate 
20207c478bd9Sstevel@tonic-gate int
aggr_grp_delete(datalink_id_t linkid,cred_t * cred)20212b24ab6bSSebastien Roy aggr_grp_delete(datalink_id_t linkid, cred_t *cred)
20227c478bd9Sstevel@tonic-gate {
2023210db224Sericheng 	aggr_grp_t *grp = NULL;
20247c478bd9Sstevel@tonic-gate 	aggr_port_t *port, *cport;
2025d62bc4baSyz 	datalink_id_t tmpid;
2026210db224Sericheng 	mod_hash_val_t val;
2027da14cebeSEric Cheng 	mac_perim_handle_t mph, pmph;
20280466663dSyz 	int err;
20290dc2366fSVenugopal Iyer 	kt_did_t tid = 0;
20307c478bd9Sstevel@tonic-gate 
2031210db224Sericheng 	rw_enter(&aggr_grp_lock, RW_WRITER);
20327c478bd9Sstevel@tonic-gate 
2033d62bc4baSyz 	if (mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(linkid),
2034210db224Sericheng 	    (mod_hash_val_t *)&grp) != 0) {
2035210db224Sericheng 		rw_exit(&aggr_grp_lock);
2036210db224Sericheng 		return (ENOENT);
20377c478bd9Sstevel@tonic-gate 	}
2038490ed22dSyz 
2039d62bc4baSyz 	/*
2040d62bc4baSyz 	 * Note that dls_devnet_destroy() must be called before lg_lock is
2041d62bc4baSyz 	 * held. Otherwise, it will deadlock if another thread is in
2042d62bc4baSyz 	 * aggr_m_stat() and thus has a kstat_hold() on the kstats that
2043d62bc4baSyz 	 * dls_devnet_destroy() needs to delete.
2044d62bc4baSyz 	 */
2045da14cebeSEric Cheng 	if ((err = dls_devnet_destroy(grp->lg_mh, &tmpid, B_TRUE)) != 0) {
2046d62bc4baSyz 		rw_exit(&aggr_grp_lock);
2047d62bc4baSyz 		return (err);
2048d62bc4baSyz 	}
2049d62bc4baSyz 	ASSERT(linkid == tmpid);
2050d62bc4baSyz 
20517c478bd9Sstevel@tonic-gate 	/*
20527c478bd9Sstevel@tonic-gate 	 * Unregister from the MAC service module. Since this can
20537c478bd9Sstevel@tonic-gate 	 * fail if a client hasn't closed the MAC port, we gracefully
20547c478bd9Sstevel@tonic-gate 	 * fail the operation.
20557c478bd9Sstevel@tonic-gate 	 */
20560466663dSyz 	if ((err = mac_disable(grp->lg_mh)) != 0) {
20572b24ab6bSSebastien Roy 		(void) dls_devnet_create(grp->lg_mh, linkid, crgetzoneid(cred));
2058210db224Sericheng 		rw_exit(&aggr_grp_lock);
20590466663dSyz 		return (err);
20607c478bd9Sstevel@tonic-gate 	}
2061da14cebeSEric Cheng 	(void) mod_hash_remove(aggr_grp_hash, GRP_HASH_KEY(linkid), &val);
2062da14cebeSEric Cheng 	ASSERT(grp == (aggr_grp_t *)val);
2063da14cebeSEric Cheng 
2064da14cebeSEric Cheng 	ASSERT(aggr_grp_cnt > 0);
2065da14cebeSEric Cheng 	aggr_grp_cnt--;
2066da14cebeSEric Cheng 	rw_exit(&aggr_grp_lock);
20677c478bd9Sstevel@tonic-gate 
206819599311Sudpa 	/*
2069da14cebeSEric Cheng 	 * Inform the lacp_rx thread to exit.
207019599311Sudpa 	 */
2071da14cebeSEric Cheng 	mutex_enter(&grp->lg_lacp_lock);
2072da14cebeSEric Cheng 	grp->lg_lacp_done = B_TRUE;
2073da14cebeSEric Cheng 	cv_signal(&grp->lg_lacp_cv);
2074da14cebeSEric Cheng 	while (grp->lg_lacp_rx_thread != NULL)
2075da14cebeSEric Cheng 		cv_wait(&grp->lg_lacp_cv, &grp->lg_lacp_lock);
2076da14cebeSEric Cheng 	mutex_exit(&grp->lg_lacp_lock);
20770dc2366fSVenugopal Iyer 	/*
20780dc2366fSVenugopal Iyer 	 * Inform the tx_notify_thread to exit.
20790dc2366fSVenugopal Iyer 	 */
20800dc2366fSVenugopal Iyer 	mutex_enter(&grp->lg_tx_flowctl_lock);
20810dc2366fSVenugopal Iyer 	if (grp->lg_tx_notify_thread != NULL) {
20820dc2366fSVenugopal Iyer 		tid = grp->lg_tx_notify_thread->t_did;
20830dc2366fSVenugopal Iyer 		grp->lg_tx_notify_done = B_TRUE;
20840dc2366fSVenugopal Iyer 		cv_signal(&grp->lg_tx_flowctl_cv);
20850dc2366fSVenugopal Iyer 	}
20860dc2366fSVenugopal Iyer 	mutex_exit(&grp->lg_tx_flowctl_lock);
20870dc2366fSVenugopal Iyer 	if (tid != 0)
20880dc2366fSVenugopal Iyer 		thread_join(tid);
208919599311Sudpa 
2090da14cebeSEric Cheng 	mac_perim_enter_by_mh(grp->lg_mh, &mph);
2091da14cebeSEric Cheng 
2092da14cebeSEric Cheng 	grp->lg_closing = B_TRUE;
20937c478bd9Sstevel@tonic-gate 	/* detach and free MAC ports associated with group */
20947c478bd9Sstevel@tonic-gate 	port = grp->lg_ports;
20957c478bd9Sstevel@tonic-gate 	while (port != NULL) {
20967c478bd9Sstevel@tonic-gate 		cport = port->lp_next;
2097da14cebeSEric Cheng 		mac_perim_enter_by_mh(port->lp_mh, &pmph);
20987c478bd9Sstevel@tonic-gate 		if (grp->lg_started)
20997c478bd9Sstevel@tonic-gate 			aggr_port_stop(port);
2100da14cebeSEric Cheng 		(void) aggr_grp_detach_port(grp, port);
2101da14cebeSEric Cheng 		mac_perim_exit(pmph);
21020dc2366fSVenugopal Iyer 		aggr_rem_pseudo_tx_group(port, &grp->lg_tx_group);
210345948e49SRyan Zezeski 		for (uint_t i = 0; i < grp->lg_rx_group_count; i++)
210445948e49SRyan Zezeski 			aggr_rem_pseudo_rx_group(port, &grp->lg_rx_groups[i]);
21057c478bd9Sstevel@tonic-gate 		aggr_port_delete(port);
21067c478bd9Sstevel@tonic-gate 		port = cport;
21077c478bd9Sstevel@tonic-gate 	}
21087c478bd9Sstevel@tonic-gate 
2109da14cebeSEric Cheng 	mac_perim_exit(mph);
21100466663dSyz 
21110dc2366fSVenugopal Iyer 	kmem_free(grp->lg_tx_blocked_rings,
21120dc2366fSVenugopal Iyer 	    (sizeof (mac_ring_handle_t *) * MAX_RINGS_PER_GROUP));
2113da14cebeSEric Cheng 	/*
2114da14cebeSEric Cheng 	 * Wait for the port's lacp timer thread and its notification callback
2115da14cebeSEric Cheng 	 * to exit before calling mac_unregister() since both needs to access
2116da14cebeSEric Cheng 	 * the mac perimeter of the grp.
2117da14cebeSEric Cheng 	 */
2118da14cebeSEric Cheng 	aggr_grp_port_wait(grp);
2119210db224Sericheng 
2120da14cebeSEric Cheng 	VERIFY(mac_unregister(grp->lg_mh) == 0);
2121da14cebeSEric Cheng 	grp->lg_mh = NULL;
21227c478bd9Sstevel@tonic-gate 
212345948e49SRyan Zezeski 	for (uint_t i = 0; i < MAX_GROUPS_PER_PORT; i++) {
212445948e49SRyan Zezeski 		list_destroy(&(grp->lg_rx_groups[i].arg_vlans));
212545948e49SRyan Zezeski 	}
212684de666eSRyan Zezeski 
21277c478bd9Sstevel@tonic-gate 	AGGR_GRP_REFRELE(grp);
21287c478bd9Sstevel@tonic-gate 	return (0);
21297c478bd9Sstevel@tonic-gate }
21307c478bd9Sstevel@tonic-gate 
21317c478bd9Sstevel@tonic-gate void
aggr_grp_free(aggr_grp_t * grp)21327c478bd9Sstevel@tonic-gate aggr_grp_free(aggr_grp_t *grp)
21337c478bd9Sstevel@tonic-gate {
21347c478bd9Sstevel@tonic-gate 	ASSERT(grp->lg_refs == 0);
2135da14cebeSEric Cheng 	ASSERT(grp->lg_port_ref == 0);
2136d62bc4baSyz 	if (grp->lg_key > AGGR_MAX_KEY) {
2137d62bc4baSyz 		id_free(key_ids, grp->lg_key);
2138d62bc4baSyz 		grp->lg_key = 0;
2139d62bc4baSyz 	}
21407c478bd9Sstevel@tonic-gate 	kmem_cache_free(aggr_grp_cache, grp);
21417c478bd9Sstevel@tonic-gate }
21427c478bd9Sstevel@tonic-gate 
2143d62bc4baSyz int
aggr_grp_info(datalink_id_t linkid,void * fn_arg,aggr_grp_info_new_grp_fn_t new_grp_fn,aggr_grp_info_new_port_fn_t new_port_fn,cred_t * cred)2144d62bc4baSyz aggr_grp_info(datalink_id_t linkid, void *fn_arg,
2145d62bc4baSyz     aggr_grp_info_new_grp_fn_t new_grp_fn,
21462b24ab6bSSebastien Roy     aggr_grp_info_new_port_fn_t new_port_fn, cred_t *cred)
21477c478bd9Sstevel@tonic-gate {
2148d62bc4baSyz 	aggr_grp_t	*grp;
2149d62bc4baSyz 	aggr_port_t	*port;
2150da14cebeSEric Cheng 	mac_perim_handle_t mph, pmph;
2151d62bc4baSyz 	int		rc = 0;
21527c478bd9Sstevel@tonic-gate 
21532b24ab6bSSebastien Roy 	/*
21542b24ab6bSSebastien Roy 	 * Make sure that the aggregation link is visible from the caller's
21552b24ab6bSSebastien Roy 	 * zone.
21562b24ab6bSSebastien Roy 	 */
21572b24ab6bSSebastien Roy 	if (!dls_devnet_islinkvisible(linkid, crgetzoneid(cred)))
21582b24ab6bSSebastien Roy 		return (ENOENT);
21592b24ab6bSSebastien Roy 
2160d62bc4baSyz 	rw_enter(&aggr_grp_lock, RW_READER);
21617c478bd9Sstevel@tonic-gate 
2162d62bc4baSyz 	if (mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(linkid),
2163d62bc4baSyz 	    (mod_hash_val_t *)&grp) != 0) {
2164d62bc4baSyz 		rw_exit(&aggr_grp_lock);
2165d62bc4baSyz 		return (ENOENT);
2166d62bc4baSyz 	}
2167da14cebeSEric Cheng 	AGGR_GRP_REFHOLD(grp);
21687c478bd9Sstevel@tonic-gate 
2169da14cebeSEric Cheng 	mac_perim_enter_by_mh(grp->lg_mh, &mph);
2170da14cebeSEric Cheng 	rw_exit(&aggr_grp_lock);
21717c478bd9Sstevel@tonic-gate 
2172d62bc4baSyz 	rc = new_grp_fn(fn_arg, grp->lg_linkid,
2173d62bc4baSyz 	    (grp->lg_key > AGGR_MAX_KEY) ? 0 : grp->lg_key, grp->lg_addr,
2174d62bc4baSyz 	    grp->lg_addr_fixed, grp->lg_force, grp->lg_tx_policy,
21757c478bd9Sstevel@tonic-gate 	    grp->lg_nports, grp->lg_lacp_mode, grp->aggr.PeriodicTimer);
21767c478bd9Sstevel@tonic-gate 
2177d62bc4baSyz 	if (rc != 0)
21787c478bd9Sstevel@tonic-gate 		goto bail;
21797c478bd9Sstevel@tonic-gate 
21807c478bd9Sstevel@tonic-gate 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
2181da14cebeSEric Cheng 		mac_perim_enter_by_mh(port->lp_mh, &pmph);
2182d62bc4baSyz 		rc = new_port_fn(fn_arg, port->lp_linkid, port->lp_addr,
2183d62bc4baSyz 		    port->lp_state, &port->lp_lacp.ActorOperPortState);
2184da14cebeSEric Cheng 		mac_perim_exit(pmph);
21857c478bd9Sstevel@tonic-gate 
2186d62bc4baSyz 		if (rc != 0)
21877c478bd9Sstevel@tonic-gate 			goto bail;
21887c478bd9Sstevel@tonic-gate 	}
21897c478bd9Sstevel@tonic-gate 
21907c478bd9Sstevel@tonic-gate bail:
2191da14cebeSEric Cheng 	mac_perim_exit(mph);
2192da14cebeSEric Cheng 	AGGR_GRP_REFRELE(grp);
21937c478bd9Sstevel@tonic-gate 	return (rc);
21947c478bd9Sstevel@tonic-gate }
21957c478bd9Sstevel@tonic-gate 
21967c478bd9Sstevel@tonic-gate /*ARGSUSED*/
21977c478bd9Sstevel@tonic-gate static void
aggr_m_ioctl(void * arg,queue_t * q,mblk_t * mp)21987c478bd9Sstevel@tonic-gate aggr_m_ioctl(void *arg, queue_t *q, mblk_t *mp)
21997c478bd9Sstevel@tonic-gate {
22007c478bd9Sstevel@tonic-gate 	miocnak(q, mp, 0, ENOTSUP);
22017c478bd9Sstevel@tonic-gate }
22027c478bd9Sstevel@tonic-gate 
2203ba2e4443Sseb static int
aggr_grp_stat(aggr_grp_t * grp,uint_t stat,uint64_t * val)2204ba2e4443Sseb aggr_grp_stat(aggr_grp_t *grp, uint_t stat, uint64_t *val)
22057c478bd9Sstevel@tonic-gate {
2206ba2e4443Sseb 	aggr_port_t	*port;
2207ba2e4443Sseb 	uint_t		stat_index;
2208ba2e4443Sseb 
220984de666eSRyan Zezeski 	ASSERT(MUTEX_HELD(&grp->lg_stat_lock));
221084de666eSRyan Zezeski 
2211ba2e4443Sseb 	/* We only aggregate counter statistics. */
2212ba2e4443Sseb 	if (IS_MAC_STAT(stat) && !MAC_STAT_ISACOUNTER(stat) ||
2213ba2e4443Sseb 	    IS_MACTYPE_STAT(stat) && !ETHER_STAT_ISACOUNTER(stat)) {
2214ba2e4443Sseb 		return (ENOTSUP);
2215ba2e4443Sseb 	}
2216ba2e4443Sseb 
2217ba2e4443Sseb 	/*
2218ba2e4443Sseb 	 * Counter statistics for a group are computed by aggregating the
2219ba2e4443Sseb 	 * counters of the members MACs while they were aggregated, plus
2220ba2e4443Sseb 	 * the residual counter of the group itself, which is updated each
2221ba2e4443Sseb 	 * time a MAC is removed from the group.
2222ba2e4443Sseb 	 */
2223ba2e4443Sseb 	*val = 0;
2224ba2e4443Sseb 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
2225ba2e4443Sseb 		/* actual port statistic */
2226ba2e4443Sseb 		*val += aggr_port_stat(port, stat);
2227ba2e4443Sseb 		/*
2228ba2e4443Sseb 		 * minus the port stat when it was added, plus any residual
2229d62bc4baSyz 		 * amount for the group.
2230ba2e4443Sseb 		 */
2231ba2e4443Sseb 		if (IS_MAC_STAT(stat)) {
2232ba2e4443Sseb 			stat_index = stat - MAC_STAT_MIN;
2233ba2e4443Sseb 			*val -= port->lp_stat[stat_index];
2234ba2e4443Sseb 			*val += grp->lg_stat[stat_index];
2235ba2e4443Sseb 		} else if (IS_MACTYPE_STAT(stat)) {
2236ba2e4443Sseb 			stat_index = stat - MACTYPE_STAT_MIN;
2237ba2e4443Sseb 			*val -= port->lp_ether_stat[stat_index];
2238ba2e4443Sseb 			*val += grp->lg_ether_stat[stat_index];
2239ba2e4443Sseb 		}
2240ba2e4443Sseb 	}
2241ba2e4443Sseb 	return (0);
2242ba2e4443Sseb }
2243ba2e4443Sseb 
22440dc2366fSVenugopal Iyer int
aggr_rx_ring_stat(mac_ring_driver_t rdriver,uint_t stat,uint64_t * val)22450dc2366fSVenugopal Iyer aggr_rx_ring_stat(mac_ring_driver_t rdriver, uint_t stat, uint64_t *val)
22460dc2366fSVenugopal Iyer {
22470dc2366fSVenugopal Iyer 	aggr_pseudo_rx_ring_t   *rx_ring = (aggr_pseudo_rx_ring_t *)rdriver;
22480dc2366fSVenugopal Iyer 
22490dc2366fSVenugopal Iyer 	if (rx_ring->arr_hw_rh != NULL) {
22500dc2366fSVenugopal Iyer 		*val = mac_pseudo_rx_ring_stat_get(rx_ring->arr_hw_rh, stat);
22510dc2366fSVenugopal Iyer 	} else {
22520dc2366fSVenugopal Iyer 		aggr_port_t	*port = rx_ring->arr_port;
22530dc2366fSVenugopal Iyer 
22540dc2366fSVenugopal Iyer 		*val = mac_stat_get(port->lp_mh, stat);
22550dc2366fSVenugopal Iyer 
22560dc2366fSVenugopal Iyer 	}
22570dc2366fSVenugopal Iyer 	return (0);
22580dc2366fSVenugopal Iyer }
22590dc2366fSVenugopal Iyer 
22600dc2366fSVenugopal Iyer int
aggr_tx_ring_stat(mac_ring_driver_t rdriver,uint_t stat,uint64_t * val)22610dc2366fSVenugopal Iyer aggr_tx_ring_stat(mac_ring_driver_t rdriver, uint_t stat, uint64_t *val)
22620dc2366fSVenugopal Iyer {
22630dc2366fSVenugopal Iyer 	aggr_pseudo_tx_ring_t   *tx_ring = (aggr_pseudo_tx_ring_t *)rdriver;
22640dc2366fSVenugopal Iyer 
22650dc2366fSVenugopal Iyer 	if (tx_ring->atr_hw_rh != NULL) {
22660dc2366fSVenugopal Iyer 		*val = mac_pseudo_tx_ring_stat_get(tx_ring->atr_hw_rh, stat);
22670dc2366fSVenugopal Iyer 	} else {
22680dc2366fSVenugopal Iyer 		aggr_port_t	*port = tx_ring->atr_port;
22690dc2366fSVenugopal Iyer 
22700dc2366fSVenugopal Iyer 		*val = mac_stat_get(port->lp_mh, stat);
22710dc2366fSVenugopal Iyer 	}
22720dc2366fSVenugopal Iyer 	return (0);
22730dc2366fSVenugopal Iyer }
22740dc2366fSVenugopal Iyer 
2275ba2e4443Sseb static int
aggr_m_stat(void * arg,uint_t stat,uint64_t * val)2276ba2e4443Sseb aggr_m_stat(void *arg, uint_t stat, uint64_t *val)
2277ba2e4443Sseb {
2278da14cebeSEric Cheng 	aggr_grp_t		*grp = arg;
2279da14cebeSEric Cheng 	int			rval = 0;
22807c478bd9Sstevel@tonic-gate 
228184de666eSRyan Zezeski 	mutex_enter(&grp->lg_stat_lock);
22827c478bd9Sstevel@tonic-gate 
22837c478bd9Sstevel@tonic-gate 	switch (stat) {
22847c478bd9Sstevel@tonic-gate 	case MAC_STAT_IFSPEED:
2285ba2e4443Sseb 		*val = grp->lg_ifspeed;
22867c478bd9Sstevel@tonic-gate 		break;
2287ba2e4443Sseb 
2288ba2e4443Sseb 	case ETHER_STAT_LINK_DUPLEX:
2289ba2e4443Sseb 		*val = grp->lg_link_duplex;
22907c478bd9Sstevel@tonic-gate 		break;
2291ba2e4443Sseb 
22927c478bd9Sstevel@tonic-gate 	default:
22937c478bd9Sstevel@tonic-gate 		/*
2294ba2e4443Sseb 		 * For all other statistics, we return the aggregated stat
2295ba2e4443Sseb 		 * from the underlying ports.  aggr_grp_stat() will set
2296ba2e4443Sseb 		 * rval appropriately if the statistic isn't a counter.
22977c478bd9Sstevel@tonic-gate 		 */
2298ba2e4443Sseb 		rval = aggr_grp_stat(grp, stat, val);
22997c478bd9Sstevel@tonic-gate 	}
23007c478bd9Sstevel@tonic-gate 
230184de666eSRyan Zezeski 	mutex_exit(&grp->lg_stat_lock);
2302ba2e4443Sseb 	return (rval);
23037c478bd9Sstevel@tonic-gate }
23047c478bd9Sstevel@tonic-gate 
23057c478bd9Sstevel@tonic-gate static int
aggr_m_start(void * arg)23067c478bd9Sstevel@tonic-gate aggr_m_start(void *arg)
23077c478bd9Sstevel@tonic-gate {
23087c478bd9Sstevel@tonic-gate 	aggr_grp_t *grp = arg;
23097c478bd9Sstevel@tonic-gate 	aggr_port_t *port;
2310da14cebeSEric Cheng 	mac_perim_handle_t mph, pmph;
23117c478bd9Sstevel@tonic-gate 
2312da14cebeSEric Cheng 	mac_perim_enter_by_mh(grp->lg_mh, &mph);
23137c478bd9Sstevel@tonic-gate 
23147c478bd9Sstevel@tonic-gate 	/*
23157c478bd9Sstevel@tonic-gate 	 * Attempts to start all configured members of the group.
23167c478bd9Sstevel@tonic-gate 	 * Group members will be attached when their link-up notification
23177c478bd9Sstevel@tonic-gate 	 * is received.
23187c478bd9Sstevel@tonic-gate 	 */
23197c478bd9Sstevel@tonic-gate 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
2320da14cebeSEric Cheng 		mac_perim_enter_by_mh(port->lp_mh, &pmph);
23217c478bd9Sstevel@tonic-gate 		if (aggr_port_start(port) != 0) {
2322da14cebeSEric Cheng 			mac_perim_exit(pmph);
23237c478bd9Sstevel@tonic-gate 			continue;
23247c478bd9Sstevel@tonic-gate 		}
23257c478bd9Sstevel@tonic-gate 
2326da14cebeSEric Cheng 		/*
2327da14cebeSEric Cheng 		 * Turn on the promiscuous mode if it is required to receive
2328da14cebeSEric Cheng 		 * the non-primary address over a port, or the promiscous
2329da14cebeSEric Cheng 		 * mode is enabled over the aggr.
2330da14cebeSEric Cheng 		 */
2331da14cebeSEric Cheng 		if (grp->lg_promisc || port->lp_prom_addr != NULL) {
2332da14cebeSEric Cheng 			if (aggr_port_promisc(port, B_TRUE) != 0)
2333da14cebeSEric Cheng 				aggr_port_stop(port);
2334da14cebeSEric Cheng 		}
2335da14cebeSEric Cheng 		mac_perim_exit(pmph);
23367c478bd9Sstevel@tonic-gate 	}
23377c478bd9Sstevel@tonic-gate 
23387c478bd9Sstevel@tonic-gate 	grp->lg_started = B_TRUE;
23397c478bd9Sstevel@tonic-gate 
2340da14cebeSEric Cheng 	mac_perim_exit(mph);
23417c478bd9Sstevel@tonic-gate 	return (0);
23427c478bd9Sstevel@tonic-gate }
23437c478bd9Sstevel@tonic-gate 
23447c478bd9Sstevel@tonic-gate static void
aggr_m_stop(void * arg)23457c478bd9Sstevel@tonic-gate aggr_m_stop(void *arg)
23467c478bd9Sstevel@tonic-gate {
23477c478bd9Sstevel@tonic-gate 	aggr_grp_t *grp = arg;
23487c478bd9Sstevel@tonic-gate 	aggr_port_t *port;
2349da14cebeSEric Cheng 	mac_perim_handle_t mph, pmph;
23507c478bd9Sstevel@tonic-gate 
2351da14cebeSEric Cheng 	mac_perim_enter_by_mh(grp->lg_mh, &mph);
23527c478bd9Sstevel@tonic-gate 
23537c478bd9Sstevel@tonic-gate 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
2354da14cebeSEric Cheng 		mac_perim_enter_by_mh(port->lp_mh, &pmph);
2355da14cebeSEric Cheng 
2356da14cebeSEric Cheng 		/* reset port promiscuous mode */
2357da14cebeSEric Cheng 		(void) aggr_port_promisc(port, B_FALSE);
2358da14cebeSEric Cheng 
23597c478bd9Sstevel@tonic-gate 		aggr_port_stop(port);
2360da14cebeSEric Cheng 		mac_perim_exit(pmph);
23617c478bd9Sstevel@tonic-gate 	}
23627c478bd9Sstevel@tonic-gate 
23637c478bd9Sstevel@tonic-gate 	grp->lg_started = B_FALSE;
2364da14cebeSEric Cheng 	mac_perim_exit(mph);
23657c478bd9Sstevel@tonic-gate }
23667c478bd9Sstevel@tonic-gate 
23677c478bd9Sstevel@tonic-gate static int
aggr_m_promisc(void * arg,boolean_t on)23687c478bd9Sstevel@tonic-gate aggr_m_promisc(void *arg, boolean_t on)
23697c478bd9Sstevel@tonic-gate {
23707c478bd9Sstevel@tonic-gate 	aggr_grp_t *grp = arg;
23717c478bd9Sstevel@tonic-gate 	aggr_port_t *port;
23724deae11aSyz 	boolean_t link_state_changed = B_FALSE;
2373da14cebeSEric Cheng 	mac_perim_handle_t mph, pmph;
23747c478bd9Sstevel@tonic-gate 
23757c478bd9Sstevel@tonic-gate 	AGGR_GRP_REFHOLD(grp);
2376da14cebeSEric Cheng 	mac_perim_enter_by_mh(grp->lg_mh, &mph);
23777c478bd9Sstevel@tonic-gate 
23784deae11aSyz 	ASSERT(!grp->lg_closing);
23794deae11aSyz 
23807c478bd9Sstevel@tonic-gate 	if (on == grp->lg_promisc)
23817c478bd9Sstevel@tonic-gate 		goto bail;
23827c478bd9Sstevel@tonic-gate 
23837c478bd9Sstevel@tonic-gate 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
2384da14cebeSEric Cheng 		int	err = 0;
2385da14cebeSEric Cheng 
2386da14cebeSEric Cheng 		mac_perim_enter_by_mh(port->lp_mh, &pmph);
23877c478bd9Sstevel@tonic-gate 		AGGR_PORT_REFHOLD(port);
2388da14cebeSEric Cheng 		if (!on && (port->lp_prom_addr == NULL))
2389da14cebeSEric Cheng 			err = aggr_port_promisc(port, B_FALSE);
2390da14cebeSEric Cheng 		else if (on && port->lp_started)
2391da14cebeSEric Cheng 			err = aggr_port_promisc(port, B_TRUE);
2392da14cebeSEric Cheng 
2393da14cebeSEric Cheng 		if (err != 0) {
2394da14cebeSEric Cheng 			if (aggr_grp_detach_port(grp, port))
2395da14cebeSEric Cheng 				link_state_changed = B_TRUE;
2396da14cebeSEric Cheng 		} else {
2397da14cebeSEric Cheng 			/*
2398da14cebeSEric Cheng 			 * If a port was detached because of a previous
2399da14cebeSEric Cheng 			 * failure changing the promiscuity, the port
2400da14cebeSEric Cheng 			 * is reattached when it successfully changes
2401da14cebeSEric Cheng 			 * the promiscuity now, and this might cause
2402da14cebeSEric Cheng 			 * the link state of the aggregation to change.
2403da14cebeSEric Cheng 			 */
2404da14cebeSEric Cheng 			if (aggr_grp_attach_port(grp, port))
2405da14cebeSEric Cheng 				link_state_changed = B_TRUE;
24067c478bd9Sstevel@tonic-gate 		}
2407da14cebeSEric Cheng 		mac_perim_exit(pmph);
24087c478bd9Sstevel@tonic-gate 		AGGR_PORT_REFRELE(port);
24097c478bd9Sstevel@tonic-gate 	}
24107c478bd9Sstevel@tonic-gate 
24117c478bd9Sstevel@tonic-gate 	grp->lg_promisc = on;
24127c478bd9Sstevel@tonic-gate 
24134deae11aSyz 	if (link_state_changed)
2414ba2e4443Sseb 		mac_link_update(grp->lg_mh, grp->lg_link_state);
24154deae11aSyz 
24167c478bd9Sstevel@tonic-gate bail:
2417da14cebeSEric Cheng 	mac_perim_exit(mph);
24187c478bd9Sstevel@tonic-gate 	AGGR_GRP_REFRELE(grp);
24197c478bd9Sstevel@tonic-gate 
24207c478bd9Sstevel@tonic-gate 	return (0);
24217c478bd9Sstevel@tonic-gate }
24227c478bd9Sstevel@tonic-gate 
2423da14cebeSEric Cheng static void
aggr_grp_port_rename(const char * new_name,void * arg)2424da14cebeSEric Cheng aggr_grp_port_rename(const char *new_name, void *arg)
2425da14cebeSEric Cheng {
2426da14cebeSEric Cheng 	/*
2427da14cebeSEric Cheng 	 * aggr port's mac client name is the format of "aggr link name" plus
2428da14cebeSEric Cheng 	 * AGGR_PORT_NAME_DELIMIT plus "underneath link name".
2429da14cebeSEric Cheng 	 */
2430da14cebeSEric Cheng 	int aggr_len, link_len, clnt_name_len, i;
2431da14cebeSEric Cheng 	char *str_end, *str_st, *str_del;
2432da14cebeSEric Cheng 	char aggr_name[MAXNAMELEN];
2433da14cebeSEric Cheng 	char link_name[MAXNAMELEN];
2434da14cebeSEric Cheng 	char *clnt_name;
2435da14cebeSEric Cheng 	aggr_grp_t *aggr_grp = arg;
2436da14cebeSEric Cheng 	aggr_port_t *aggr_port = aggr_grp->lg_ports;
2437da14cebeSEric Cheng 
2438da14cebeSEric Cheng 	for (i = 0; i < aggr_grp->lg_nports; i++) {
2439da14cebeSEric Cheng 		clnt_name = mac_client_name(aggr_port->lp_mch);
2440da14cebeSEric Cheng 		clnt_name_len = strlen(clnt_name);
2441da14cebeSEric Cheng 		str_st = clnt_name;
2442da14cebeSEric Cheng 		str_end = &(clnt_name[clnt_name_len]);
2443da14cebeSEric Cheng 		str_del = strchr(str_st, AGGR_PORT_NAME_DELIMIT);
2444da14cebeSEric Cheng 		ASSERT(str_del != NULL);
2445da14cebeSEric Cheng 		aggr_len = (intptr_t)((uintptr_t)str_del - (uintptr_t)str_st);
2446da14cebeSEric Cheng 		link_len = (intptr_t)((uintptr_t)str_end - (uintptr_t)str_del);
2447da14cebeSEric Cheng 		bzero(aggr_name, MAXNAMELEN);
2448da14cebeSEric Cheng 		bzero(link_name, MAXNAMELEN);
2449da14cebeSEric Cheng 		bcopy(clnt_name, aggr_name, aggr_len);
2450da14cebeSEric Cheng 		bcopy(str_del, link_name, link_len + 1);
2451da14cebeSEric Cheng 		bzero(clnt_name, MAXNAMELEN);
2452da14cebeSEric Cheng 		(void) snprintf(clnt_name, MAXNAMELEN, "%s%s", new_name,
2453da14cebeSEric Cheng 		    link_name);
2454da14cebeSEric Cheng 
2455da14cebeSEric Cheng 		(void) mac_rename_primary(aggr_port->lp_mh, NULL);
2456da14cebeSEric Cheng 		aggr_port = aggr_port->lp_next;
2457da14cebeSEric Cheng 	}
2458da14cebeSEric Cheng }
2459da14cebeSEric Cheng 
2460ba2e4443Sseb /*
2461ba2e4443Sseb  * Initialize the capabilities that are advertised for the group
2462ba2e4443Sseb  * according to the capabilities of the constituent ports.
2463ba2e4443Sseb  */
2464ba2e4443Sseb static boolean_t
aggr_m_capab_get(void * arg,mac_capab_t cap,void * cap_data)2465ba2e4443Sseb aggr_m_capab_get(void *arg, mac_capab_t cap, void *cap_data)
2466ba2e4443Sseb {
2467ba2e4443Sseb 	aggr_grp_t *grp = arg;
2468ba2e4443Sseb 
2469ba2e4443Sseb 	switch (cap) {
2470ba2e4443Sseb 	case MAC_CAPAB_HCKSUM: {
2471ba2e4443Sseb 		uint32_t *hcksum_txflags = cap_data;
2472ba2e4443Sseb 		*hcksum_txflags = grp->lg_hcksum_txflags;
2473ba2e4443Sseb 		break;
2474ba2e4443Sseb 	}
247519c868a0SRoamer 	case MAC_CAPAB_LSO: {
247619c868a0SRoamer 		mac_capab_lso_t *cap_lso = cap_data;
247719c868a0SRoamer 
247819c868a0SRoamer 		if (grp->lg_lso) {
247919c868a0SRoamer 			*cap_lso = grp->lg_cap_lso;
248019c868a0SRoamer 			break;
248119c868a0SRoamer 		} else {
248219c868a0SRoamer 			return (B_FALSE);
248319c868a0SRoamer 		}
248419c868a0SRoamer 	}
2485d62bc4baSyz 	case MAC_CAPAB_NO_NATIVEVLAN:
2486d62bc4baSyz 		return (!grp->lg_vlan);
2487d62bc4baSyz 	case MAC_CAPAB_NO_ZCOPY:
2488d62bc4baSyz 		return (!grp->lg_zcopy);
2489da14cebeSEric Cheng 	case MAC_CAPAB_RINGS: {
2490da14cebeSEric Cheng 		mac_capab_rings_t *cap_rings = cap_data;
249145948e49SRyan Zezeski 		uint_t ring_cnt = 0;
249245948e49SRyan Zezeski 
249345948e49SRyan Zezeski 		for (uint_t i = 0; i < grp->lg_rx_group_count; i++)
249445948e49SRyan Zezeski 			ring_cnt += grp->lg_rx_groups[i].arg_ring_cnt;
2495da14cebeSEric Cheng 
2496da14cebeSEric Cheng 		if (cap_rings->mr_type == MAC_RING_TYPE_RX) {
2497da14cebeSEric Cheng 			cap_rings->mr_group_type = MAC_GROUP_TYPE_STATIC;
249845948e49SRyan Zezeski 			cap_rings->mr_rnum = ring_cnt;
249945948e49SRyan Zezeski 			cap_rings->mr_gnum = grp->lg_rx_group_count;
2500da14cebeSEric Cheng 			cap_rings->mr_gaddring = NULL;
2501da14cebeSEric Cheng 			cap_rings->mr_gremring = NULL;
2502da14cebeSEric Cheng 		} else {
25030dc2366fSVenugopal Iyer 			cap_rings->mr_group_type = MAC_GROUP_TYPE_STATIC;
25040dc2366fSVenugopal Iyer 			cap_rings->mr_rnum = grp->lg_tx_group.atg_ring_cnt;
25050dc2366fSVenugopal Iyer 			cap_rings->mr_gnum = 0;
2506da14cebeSEric Cheng 		}
25070dc2366fSVenugopal Iyer 		cap_rings->mr_rget = aggr_fill_ring;
25080dc2366fSVenugopal Iyer 		cap_rings->mr_gget = aggr_fill_group;
2509da14cebeSEric Cheng 		break;
2510da14cebeSEric Cheng 	}
2511da14cebeSEric Cheng 	case MAC_CAPAB_AGGR:
2512da14cebeSEric Cheng 	{
2513da14cebeSEric Cheng 		mac_capab_aggr_t *aggr_cap;
2514da14cebeSEric Cheng 
2515da14cebeSEric Cheng 		if (cap_data != NULL) {
2516da14cebeSEric Cheng 			aggr_cap = cap_data;
2517da14cebeSEric Cheng 			aggr_cap->mca_rename_fn = aggr_grp_port_rename;
2518da14cebeSEric Cheng 			aggr_cap->mca_unicst = aggr_m_unicst;
25190dc2366fSVenugopal Iyer 			aggr_cap->mca_find_tx_ring_fn = aggr_find_tx_ring;
25200dc2366fSVenugopal Iyer 			aggr_cap->mca_arg = arg;
2521da14cebeSEric Cheng 		}
2522da14cebeSEric Cheng 		return (B_TRUE);
2523da14cebeSEric Cheng 	}
2524ba2e4443Sseb 	default:
2525ba2e4443Sseb 		return (B_FALSE);
2526ba2e4443Sseb 	}
2527ba2e4443Sseb 	return (B_TRUE);
2528ba2e4443Sseb }
2529ba2e4443Sseb 
2530da14cebeSEric Cheng /*
253184de666eSRyan Zezeski  * Callback function for MAC layer to register groups.
2532da14cebeSEric Cheng  */
2533da14cebeSEric Cheng static void
aggr_fill_group(void * arg,mac_ring_type_t rtype,const int index,mac_group_info_t * infop,mac_group_handle_t gh)2534da14cebeSEric Cheng aggr_fill_group(void *arg, mac_ring_type_t rtype, const int index,
2535da14cebeSEric Cheng     mac_group_info_t *infop, mac_group_handle_t gh)
2536da14cebeSEric Cheng {
2537da14cebeSEric Cheng 	aggr_grp_t *grp = arg;
25380dc2366fSVenugopal Iyer 
25390dc2366fSVenugopal Iyer 	if (rtype == MAC_RING_TYPE_RX) {
254045948e49SRyan Zezeski 		aggr_pseudo_rx_group_t *rx_group = &grp->lg_rx_groups[index];
254145948e49SRyan Zezeski 
25420dc2366fSVenugopal Iyer 		rx_group->arg_gh = gh;
25430dc2366fSVenugopal Iyer 		rx_group->arg_grp = grp;
25440dc2366fSVenugopal Iyer 
25450dc2366fSVenugopal Iyer 		infop->mgi_driver = (mac_group_driver_t)rx_group;
25460dc2366fSVenugopal Iyer 		infop->mgi_start = NULL;
25470dc2366fSVenugopal Iyer 		infop->mgi_stop = NULL;
25480dc2366fSVenugopal Iyer 		infop->mgi_addmac = aggr_addmac;
25490dc2366fSVenugopal Iyer 		infop->mgi_remmac = aggr_remmac;
25500dc2366fSVenugopal Iyer 		infop->mgi_count = rx_group->arg_ring_cnt;
255184de666eSRyan Zezeski 
255284de666eSRyan Zezeski 		/*
255384de666eSRyan Zezeski 		 * Always set the HW VLAN callbacks. They are smart
255484de666eSRyan Zezeski 		 * enough to know when a port has HW VLAN filters to
255584de666eSRyan Zezeski 		 * program and when it doesn't.
255684de666eSRyan Zezeski 		 */
255784de666eSRyan Zezeski 		infop->mgi_addvlan = aggr_addvlan;
255884de666eSRyan Zezeski 		infop->mgi_remvlan = aggr_remvlan;
25590dc2366fSVenugopal Iyer 	} else {
256045948e49SRyan Zezeski 		aggr_pseudo_tx_group_t *tx_group = &grp->lg_tx_group;
256145948e49SRyan Zezeski 
256245948e49SRyan Zezeski 		ASSERT3S(index, ==, 0);
25630dc2366fSVenugopal Iyer 		tx_group->atg_gh = gh;
25640dc2366fSVenugopal Iyer 	}
2565da14cebeSEric Cheng }
2566da14cebeSEric Cheng 
2567da14cebeSEric Cheng /*
2568da14cebeSEric Cheng  * Callback funtion for MAC layer to register all rings.
2569da14cebeSEric Cheng  */
2570da14cebeSEric Cheng static void
aggr_fill_ring(void * arg,mac_ring_type_t rtype,const int rg_index,const int index,mac_ring_info_t * infop,mac_ring_handle_t rh)2571da14cebeSEric Cheng aggr_fill_ring(void *arg, mac_ring_type_t rtype, const int rg_index,
2572da14cebeSEric Cheng     const int index, mac_ring_info_t *infop, mac_ring_handle_t rh)
2573da14cebeSEric Cheng {
2574da14cebeSEric Cheng 	aggr_grp_t	*grp = arg;
2575da14cebeSEric Cheng 
2576da14cebeSEric Cheng 	switch (rtype) {
2577da14cebeSEric Cheng 	case MAC_RING_TYPE_RX: {
257845948e49SRyan Zezeski 		aggr_pseudo_rx_group_t	*rx_group;
2579da14cebeSEric Cheng 		aggr_pseudo_rx_ring_t	*rx_ring;
2580da14cebeSEric Cheng 		mac_intr_t		aggr_mac_intr;
2581da14cebeSEric Cheng 
258245948e49SRyan Zezeski 		rx_group = &grp->lg_rx_groups[rg_index];
258345948e49SRyan Zezeski 		ASSERT3S(index, >=, 0);
258445948e49SRyan Zezeski 		ASSERT3S(index, <, rx_group->arg_ring_cnt);
2585da14cebeSEric Cheng 		rx_ring = rx_group->arg_rings + index;
2586da14cebeSEric Cheng 		rx_ring->arr_rh = rh;
2587da14cebeSEric Cheng 
2588da14cebeSEric Cheng 		/*
2589da14cebeSEric Cheng 		 * Entrypoint to enable interrupt (disable poll) and
2590da14cebeSEric Cheng 		 * disable interrupt (enable poll).
2591da14cebeSEric Cheng 		 */
2592da14cebeSEric Cheng 		aggr_mac_intr.mi_handle = (mac_intr_handle_t)rx_ring;
2593da14cebeSEric Cheng 		aggr_mac_intr.mi_enable = aggr_pseudo_enable_intr;
2594da14cebeSEric Cheng 		aggr_mac_intr.mi_disable = aggr_pseudo_disable_intr;
25950dc2366fSVenugopal Iyer 		aggr_mac_intr.mi_ddi_handle = NULL;
2596da14cebeSEric Cheng 
2597da14cebeSEric Cheng 		infop->mri_driver = (mac_ring_driver_t)rx_ring;
259845948e49SRyan Zezeski 		infop->mri_start = aggr_pseudo_start_rx_ring;
259945948e49SRyan Zezeski 		infop->mri_stop = aggr_pseudo_stop_rx_ring;
2600da14cebeSEric Cheng 
2601da14cebeSEric Cheng 		infop->mri_intr = aggr_mac_intr;
2602da14cebeSEric Cheng 		infop->mri_poll = aggr_rx_poll;
26030dc2366fSVenugopal Iyer 
26040dc2366fSVenugopal Iyer 		infop->mri_stat = aggr_rx_ring_stat;
26050dc2366fSVenugopal Iyer 		break;
26060dc2366fSVenugopal Iyer 	}
26070dc2366fSVenugopal Iyer 	case MAC_RING_TYPE_TX: {
26080dc2366fSVenugopal Iyer 		aggr_pseudo_tx_group_t	*tx_group = &grp->lg_tx_group;
26090dc2366fSVenugopal Iyer 		aggr_pseudo_tx_ring_t	*tx_ring;
26100dc2366fSVenugopal Iyer 
26110dc2366fSVenugopal Iyer 		ASSERT(rg_index == -1);
26120dc2366fSVenugopal Iyer 		ASSERT(index < tx_group->atg_ring_cnt);
26130dc2366fSVenugopal Iyer 
26140dc2366fSVenugopal Iyer 		tx_ring = &tx_group->atg_rings[index];
26150dc2366fSVenugopal Iyer 		tx_ring->atr_rh = rh;
26160dc2366fSVenugopal Iyer 
26170dc2366fSVenugopal Iyer 		infop->mri_driver = (mac_ring_driver_t)tx_ring;
26180dc2366fSVenugopal Iyer 		infop->mri_start = NULL;
26190dc2366fSVenugopal Iyer 		infop->mri_stop = NULL;
26200dc2366fSVenugopal Iyer 		infop->mri_tx = aggr_ring_tx;
26210dc2366fSVenugopal Iyer 		infop->mri_stat = aggr_tx_ring_stat;
26220dc2366fSVenugopal Iyer 		/*
26230dc2366fSVenugopal Iyer 		 * Use the hw TX ring handle to find if the ring needs
26240dc2366fSVenugopal Iyer 		 * serialization or not. For NICs that do not expose
26250dc2366fSVenugopal Iyer 		 * Tx rings, atr_hw_rh will be NULL.
26260dc2366fSVenugopal Iyer 		 */
26270dc2366fSVenugopal Iyer 		if (tx_ring->atr_hw_rh != NULL) {
26280dc2366fSVenugopal Iyer 			infop->mri_flags =
26290dc2366fSVenugopal Iyer 			    mac_hwring_getinfo(tx_ring->atr_hw_rh);
26300dc2366fSVenugopal Iyer 		}
2631da14cebeSEric Cheng 		break;
2632da14cebeSEric Cheng 	}
2633da14cebeSEric Cheng 	default:
2634da14cebeSEric Cheng 		break;
2635da14cebeSEric Cheng 	}
2636da14cebeSEric Cheng }
2637da14cebeSEric Cheng 
2638da14cebeSEric Cheng static mblk_t *
aggr_rx_poll(void * arg,int bytes_to_pickup)2639da14cebeSEric Cheng aggr_rx_poll(void *arg, int bytes_to_pickup)
2640da14cebeSEric Cheng {
2641da14cebeSEric Cheng 	aggr_pseudo_rx_ring_t *rr_ring = arg;
2642da14cebeSEric Cheng 	aggr_port_t *port = rr_ring->arr_port;
2643da14cebeSEric Cheng 	aggr_grp_t *grp = port->lp_grp;
2644da14cebeSEric Cheng 	mblk_t *mp_chain, *mp, **mpp;
2645da14cebeSEric Cheng 
2646da14cebeSEric Cheng 	mp_chain = mac_hwring_poll(rr_ring->arr_hw_rh, bytes_to_pickup);
2647da14cebeSEric Cheng 
2648da14cebeSEric Cheng 	if (grp->lg_lacp_mode == AGGR_LACP_OFF)
2649da14cebeSEric Cheng 		return (mp_chain);
2650da14cebeSEric Cheng 
2651da14cebeSEric Cheng 	mpp = &mp_chain;
2652da14cebeSEric Cheng 	while ((mp = *mpp) != NULL) {
2653da14cebeSEric Cheng 		if (MBLKL(mp) >= sizeof (struct ether_header)) {
2654da14cebeSEric Cheng 			struct ether_header *ehp;
2655da14cebeSEric Cheng 
2656da14cebeSEric Cheng 			ehp = (struct ether_header *)mp->b_rptr;
2657da14cebeSEric Cheng 			if (ntohs(ehp->ether_type) == ETHERTYPE_SLOW) {
2658da14cebeSEric Cheng 				*mpp = mp->b_next;
2659da14cebeSEric Cheng 				mp->b_next = NULL;
2660da14cebeSEric Cheng 				aggr_recv_lacp(port,
2661da14cebeSEric Cheng 				    (mac_resource_handle_t)rr_ring, mp);
2662da14cebeSEric Cheng 				continue;
2663da14cebeSEric Cheng 			}
2664da14cebeSEric Cheng 		}
2665da14cebeSEric Cheng 
2666da14cebeSEric Cheng 		if (!port->lp_collector_enabled) {
2667da14cebeSEric Cheng 			*mpp = mp->b_next;
2668da14cebeSEric Cheng 			mp->b_next = NULL;
2669da14cebeSEric Cheng 			freemsg(mp);
2670da14cebeSEric Cheng 			continue;
2671da14cebeSEric Cheng 		}
2672da14cebeSEric Cheng 		mpp = &mp->b_next;
2673da14cebeSEric Cheng 	}
2674da14cebeSEric Cheng 	return (mp_chain);
2675da14cebeSEric Cheng }
2676da14cebeSEric Cheng 
267719599311Sudpa static int
aggr_addmac(void * arg,const uint8_t * mac_addr)2678da14cebeSEric Cheng aggr_addmac(void *arg, const uint8_t *mac_addr)
267919599311Sudpa {
2680da14cebeSEric Cheng 	aggr_pseudo_rx_group_t	*rx_group = (aggr_pseudo_rx_group_t *)arg;
2681da14cebeSEric Cheng 	aggr_unicst_addr_t	*addr, **pprev;
2682da14cebeSEric Cheng 	aggr_grp_t		*grp = rx_group->arg_grp;
2683da14cebeSEric Cheng 	aggr_port_t		*port, *p;
2684da14cebeSEric Cheng 	mac_perim_handle_t	mph;
2685da14cebeSEric Cheng 	int			err = 0;
268645948e49SRyan Zezeski 	uint_t			idx = rx_group->arg_index;
2687da14cebeSEric Cheng 
2688da14cebeSEric Cheng 	mac_perim_enter_by_mh(grp->lg_mh, &mph);
2689da14cebeSEric Cheng 
2690da14cebeSEric Cheng 	if (bcmp(mac_addr, grp->lg_addr, ETHERADDRL) == 0) {
2691da14cebeSEric Cheng 		mac_perim_exit(mph);
2692da14cebeSEric Cheng 		return (0);
2693da14cebeSEric Cheng 	}
269419599311Sudpa 
2695da14cebeSEric Cheng 	/*
2696da14cebeSEric Cheng 	 * Insert this mac address into the list of mac addresses owned by
2697da14cebeSEric Cheng 	 * the aggregation pseudo group.
2698da14cebeSEric Cheng 	 */
2699da14cebeSEric Cheng 	pprev = &rx_group->arg_macaddr;
2700da14cebeSEric Cheng 	while ((addr = *pprev) != NULL) {
2701da14cebeSEric Cheng 		if (bcmp(mac_addr, addr->aua_addr, ETHERADDRL) == 0) {
2702da14cebeSEric Cheng 			mac_perim_exit(mph);
2703da14cebeSEric Cheng 			return (EEXIST);
2704da14cebeSEric Cheng 		}
2705da14cebeSEric Cheng 		pprev = &addr->aua_next;
2706da14cebeSEric Cheng 	}
2707da14cebeSEric Cheng 	addr = kmem_alloc(sizeof (aggr_unicst_addr_t), KM_SLEEP);
2708da14cebeSEric Cheng 	bcopy(mac_addr, addr->aua_addr, ETHERADDRL);
2709da14cebeSEric Cheng 	addr->aua_next = NULL;
2710da14cebeSEric Cheng 	*pprev = addr;
271119599311Sudpa 
2712da14cebeSEric Cheng 	for (port = grp->lg_ports; port != NULL; port = port->lp_next)
271345948e49SRyan Zezeski 		if ((err = aggr_port_addmac(port, idx, mac_addr)) != 0)
271419599311Sudpa 			break;
2715da14cebeSEric Cheng 
2716da14cebeSEric Cheng 	if (err != 0) {
2717da14cebeSEric Cheng 		for (p = grp->lg_ports; p != port; p = p->lp_next)
271845948e49SRyan Zezeski 			aggr_port_remmac(p, idx, mac_addr);
2719da14cebeSEric Cheng 
2720da14cebeSEric Cheng 		*pprev = NULL;
2721da14cebeSEric Cheng 		kmem_free(addr, sizeof (aggr_unicst_addr_t));
272219599311Sudpa 	}
272319599311Sudpa 
2724da14cebeSEric Cheng 	mac_perim_exit(mph);
2725da14cebeSEric Cheng 	return (err);
2726da14cebeSEric Cheng }
2727da14cebeSEric Cheng 
2728da14cebeSEric Cheng static int
aggr_remmac(void * arg,const uint8_t * mac_addr)2729da14cebeSEric Cheng aggr_remmac(void *arg, const uint8_t *mac_addr)
2730da14cebeSEric Cheng {
2731da14cebeSEric Cheng 	aggr_pseudo_rx_group_t	*rx_group = (aggr_pseudo_rx_group_t *)arg;
2732da14cebeSEric Cheng 	aggr_unicst_addr_t	*addr, **pprev;
2733da14cebeSEric Cheng 	aggr_grp_t		*grp = rx_group->arg_grp;
2734da14cebeSEric Cheng 	aggr_port_t		*port;
2735da14cebeSEric Cheng 	mac_perim_handle_t	mph;
2736da14cebeSEric Cheng 	int			err = 0;
2737da14cebeSEric Cheng 
2738da14cebeSEric Cheng 	mac_perim_enter_by_mh(grp->lg_mh, &mph);
2739da14cebeSEric Cheng 
2740da14cebeSEric Cheng 	if (bcmp(mac_addr, grp->lg_addr, ETHERADDRL) == 0) {
2741da14cebeSEric Cheng 		mac_perim_exit(mph);
2742da14cebeSEric Cheng 		return (0);
274319599311Sudpa 	}
2744da14cebeSEric Cheng 
2745da14cebeSEric Cheng 	/*
2746da14cebeSEric Cheng 	 * Insert this mac address into the list of mac addresses owned by
2747da14cebeSEric Cheng 	 * the aggregation pseudo group.
2748da14cebeSEric Cheng 	 */
2749da14cebeSEric Cheng 	pprev = &rx_group->arg_macaddr;
2750da14cebeSEric Cheng 	while ((addr = *pprev) != NULL) {
2751da14cebeSEric Cheng 		if (bcmp(mac_addr, addr->aua_addr, ETHERADDRL) != 0) {
2752da14cebeSEric Cheng 			pprev = &addr->aua_next;
2753da14cebeSEric Cheng 			continue;
2754da14cebeSEric Cheng 		}
2755da14cebeSEric Cheng 		break;
2756da14cebeSEric Cheng 	}
2757da14cebeSEric Cheng 	if (addr == NULL) {
2758da14cebeSEric Cheng 		mac_perim_exit(mph);
2759da14cebeSEric Cheng 		return (EINVAL);
2760da14cebeSEric Cheng 	}
2761da14cebeSEric Cheng 
2762da14cebeSEric Cheng 	for (port = grp->lg_ports; port != NULL; port = port->lp_next)
276345948e49SRyan Zezeski 		aggr_port_remmac(port, rx_group->arg_index, mac_addr);
2764da14cebeSEric Cheng 
2765da14cebeSEric Cheng 	*pprev = addr->aua_next;
2766da14cebeSEric Cheng 	kmem_free(addr, sizeof (aggr_unicst_addr_t));
2767da14cebeSEric Cheng 
2768da14cebeSEric Cheng 	mac_perim_exit(mph);
2769da14cebeSEric Cheng 	return (err);
277019599311Sudpa }
277119599311Sudpa 
277284de666eSRyan Zezeski /*
277384de666eSRyan Zezeski  * Search for VID in the Rx group's list and return a pointer if
277484de666eSRyan Zezeski  * found. Otherwise return NULL.
277584de666eSRyan Zezeski  */
277684de666eSRyan Zezeski static aggr_vlan_t *
aggr_find_vlan(aggr_pseudo_rx_group_t * rx_group,uint16_t vid)277784de666eSRyan Zezeski aggr_find_vlan(aggr_pseudo_rx_group_t *rx_group, uint16_t vid)
277884de666eSRyan Zezeski {
277984de666eSRyan Zezeski 	ASSERT(MAC_PERIM_HELD(rx_group->arg_grp->lg_mh));
278084de666eSRyan Zezeski 	for (aggr_vlan_t *avp = list_head(&rx_group->arg_vlans); avp != NULL;
278184de666eSRyan Zezeski 	    avp = list_next(&rx_group->arg_vlans, avp)) {
278284de666eSRyan Zezeski 		if (avp->av_vid == vid)
278384de666eSRyan Zezeski 			return (avp);
278484de666eSRyan Zezeski 	}
278584de666eSRyan Zezeski 
278684de666eSRyan Zezeski 	return (NULL);
278784de666eSRyan Zezeski }
278884de666eSRyan Zezeski 
278984de666eSRyan Zezeski /*
279084de666eSRyan Zezeski  * Accept traffic on the specified VID.
279184de666eSRyan Zezeski  *
279284de666eSRyan Zezeski  * Persist VLAN state in the aggr so that ports added later will
279384de666eSRyan Zezeski  * receive the correct filters. In the future it would be nice to
279484de666eSRyan Zezeski  * allow aggr to iterate its clients instead of duplicating state.
279584de666eSRyan Zezeski  */
279684de666eSRyan Zezeski static int
aggr_addvlan(mac_group_driver_t gdriver,uint16_t vid)279784de666eSRyan Zezeski aggr_addvlan(mac_group_driver_t gdriver, uint16_t vid)
279884de666eSRyan Zezeski {
279945948e49SRyan Zezeski 	aggr_pseudo_rx_group_t	*rx_group = (aggr_pseudo_rx_group_t *)gdriver;
280084de666eSRyan Zezeski 	aggr_grp_t		*aggr = rx_group->arg_grp;
280184de666eSRyan Zezeski 	aggr_port_t		*port, *p;
280284de666eSRyan Zezeski 	mac_perim_handle_t	mph;
280384de666eSRyan Zezeski 	int			err = 0;
280484de666eSRyan Zezeski 	aggr_vlan_t		*avp = NULL;
280545948e49SRyan Zezeski 	uint_t			idx = rx_group->arg_index;
280684de666eSRyan Zezeski 
280784de666eSRyan Zezeski 	mac_perim_enter_by_mh(aggr->lg_mh, &mph);
280884de666eSRyan Zezeski 
280984de666eSRyan Zezeski 	if (vid == MAC_VLAN_UNTAGGED) {
281084de666eSRyan Zezeski 		/*
281184de666eSRyan Zezeski 		 * Aggr is both a MAC provider and MAC client. As a
281284de666eSRyan Zezeski 		 * MAC provider it is passed MAC_VLAN_UNTAGGED by its
281384de666eSRyan Zezeski 		 * client. As a client itself, it should pass
281484de666eSRyan Zezeski 		 * VLAN_ID_NONE to its ports.
281584de666eSRyan Zezeski 		 */
281684de666eSRyan Zezeski 		vid = VLAN_ID_NONE;
281784de666eSRyan Zezeski 		rx_group->arg_untagged++;
281884de666eSRyan Zezeski 		goto update_ports;
281984de666eSRyan Zezeski 	}
282084de666eSRyan Zezeski 
282184de666eSRyan Zezeski 	avp = aggr_find_vlan(rx_group, vid);
282284de666eSRyan Zezeski 
282384de666eSRyan Zezeski 	if (avp != NULL) {
282484de666eSRyan Zezeski 		avp->av_refs++;
282584de666eSRyan Zezeski 		mac_perim_exit(mph);
282684de666eSRyan Zezeski 		return (0);
282784de666eSRyan Zezeski 	}
282884de666eSRyan Zezeski 
282984de666eSRyan Zezeski 	avp = kmem_zalloc(sizeof (aggr_vlan_t), KM_SLEEP);
283084de666eSRyan Zezeski 	avp->av_vid = vid;
283184de666eSRyan Zezeski 	avp->av_refs = 1;
283284de666eSRyan Zezeski 
283384de666eSRyan Zezeski update_ports:
283484de666eSRyan Zezeski 	for (port = aggr->lg_ports; port != NULL; port = port->lp_next)
283545948e49SRyan Zezeski 		if ((err = aggr_port_addvlan(port, idx, vid)) != 0)
283684de666eSRyan Zezeski 			break;
283784de666eSRyan Zezeski 
283884de666eSRyan Zezeski 	if (err != 0) {
283984de666eSRyan Zezeski 		/*
284084de666eSRyan Zezeski 		 * If any of these calls fail then we are in a
284184de666eSRyan Zezeski 		 * situation where the ports have different HW state.
284284de666eSRyan Zezeski 		 * There's no reasonable action the MAC client can
284384de666eSRyan Zezeski 		 * take in this scenario to rectify the situation.
284484de666eSRyan Zezeski 		 */
284584de666eSRyan Zezeski 		for (p = aggr->lg_ports; p != port; p = p->lp_next) {
284684de666eSRyan Zezeski 			int err2;
284784de666eSRyan Zezeski 
284845948e49SRyan Zezeski 			if ((err2 = aggr_port_remvlan(p, idx, vid)) != 0) {
284984de666eSRyan Zezeski 				cmn_err(CE_WARN, "Failed to remove VLAN %u"
285084de666eSRyan Zezeski 				    " from port %s: errno %d.", vid,
285184de666eSRyan Zezeski 				    mac_client_name(p->lp_mch), err2);
285284de666eSRyan Zezeski 			}
285384de666eSRyan Zezeski 
285484de666eSRyan Zezeski 		}
285584de666eSRyan Zezeski 
285684de666eSRyan Zezeski 		if (vid == VLAN_ID_NONE)
285784de666eSRyan Zezeski 			rx_group->arg_untagged--;
285884de666eSRyan Zezeski 
285984de666eSRyan Zezeski 		if (avp != NULL) {
286084de666eSRyan Zezeski 			kmem_free(avp, sizeof (aggr_vlan_t));
286184de666eSRyan Zezeski 			avp = NULL;
286284de666eSRyan Zezeski 		}
286384de666eSRyan Zezeski 	}
286484de666eSRyan Zezeski 
286584de666eSRyan Zezeski 	if (avp != NULL)
286684de666eSRyan Zezeski 		list_insert_tail(&rx_group->arg_vlans, avp);
286784de666eSRyan Zezeski 
286884de666eSRyan Zezeski done:
286984de666eSRyan Zezeski 	mac_perim_exit(mph);
287084de666eSRyan Zezeski 	return (err);
287184de666eSRyan Zezeski }
287284de666eSRyan Zezeski 
287384de666eSRyan Zezeski /*
287484de666eSRyan Zezeski  * Stop accepting traffic on this VLAN if it's the last use of this VLAN.
287584de666eSRyan Zezeski  */
287684de666eSRyan Zezeski static int
aggr_remvlan(mac_group_driver_t gdriver,uint16_t vid)287784de666eSRyan Zezeski aggr_remvlan(mac_group_driver_t gdriver, uint16_t vid)
287884de666eSRyan Zezeski {
287945948e49SRyan Zezeski 	aggr_pseudo_rx_group_t	*rx_group = (aggr_pseudo_rx_group_t *)gdriver;
288084de666eSRyan Zezeski 	aggr_grp_t		*aggr = rx_group->arg_grp;
288184de666eSRyan Zezeski 	aggr_port_t		*port, *p;
288284de666eSRyan Zezeski 	mac_perim_handle_t	mph;
288384de666eSRyan Zezeski 	int			err = 0;
288484de666eSRyan Zezeski 	aggr_vlan_t		*avp = NULL;
288545948e49SRyan Zezeski 	uint_t			idx = rx_group->arg_index;
288684de666eSRyan Zezeski 
288784de666eSRyan Zezeski 	mac_perim_enter_by_mh(aggr->lg_mh, &mph);
288884de666eSRyan Zezeski 
288984de666eSRyan Zezeski 	/*
289084de666eSRyan Zezeski 	 * See the comment in aggr_addvlan().
289184de666eSRyan Zezeski 	 */
289284de666eSRyan Zezeski 	if (vid == MAC_VLAN_UNTAGGED) {
289384de666eSRyan Zezeski 		vid = VLAN_ID_NONE;
289484de666eSRyan Zezeski 		rx_group->arg_untagged--;
289584de666eSRyan Zezeski 
289684de666eSRyan Zezeski 		if (rx_group->arg_untagged > 0)
289784de666eSRyan Zezeski 			goto done;
289884de666eSRyan Zezeski 
289984de666eSRyan Zezeski 		goto update_ports;
290084de666eSRyan Zezeski 	}
290184de666eSRyan Zezeski 
290284de666eSRyan Zezeski 	avp = aggr_find_vlan(rx_group, vid);
290384de666eSRyan Zezeski 
290484de666eSRyan Zezeski 	if (avp == NULL) {
290584de666eSRyan Zezeski 		err = ENOENT;
290684de666eSRyan Zezeski 		goto done;
290784de666eSRyan Zezeski 	}
290884de666eSRyan Zezeski 
290984de666eSRyan Zezeski 	avp->av_refs--;
291084de666eSRyan Zezeski 
291184de666eSRyan Zezeski 	if (avp->av_refs > 0)
291284de666eSRyan Zezeski 		goto done;
291384de666eSRyan Zezeski 
291484de666eSRyan Zezeski update_ports:
291584de666eSRyan Zezeski 	for (port = aggr->lg_ports; port != NULL; port = port->lp_next)
291645948e49SRyan Zezeski 		if ((err = aggr_port_remvlan(port, idx, vid)) != 0)
291784de666eSRyan Zezeski 			break;
291884de666eSRyan Zezeski 
291984de666eSRyan Zezeski 	/*
292084de666eSRyan Zezeski 	 * See the comment in aggr_addvlan() for justification of the
292184de666eSRyan Zezeski 	 * use of VERIFY here.
292284de666eSRyan Zezeski 	 */
292384de666eSRyan Zezeski 	if (err != 0) {
292484de666eSRyan Zezeski 		for (p = aggr->lg_ports; p != port; p = p->lp_next) {
292584de666eSRyan Zezeski 			int err2;
292684de666eSRyan Zezeski 
292745948e49SRyan Zezeski 			if ((err2 = aggr_port_addvlan(p, idx, vid)) != 0) {
292884de666eSRyan Zezeski 				cmn_err(CE_WARN, "Failed to add VLAN %u"
292984de666eSRyan Zezeski 				    " to port %s: errno %d.", vid,
293084de666eSRyan Zezeski 				    mac_client_name(p->lp_mch), err2);
293184de666eSRyan Zezeski 			}
293284de666eSRyan Zezeski 		}
293384de666eSRyan Zezeski 
293484de666eSRyan Zezeski 		if (avp != NULL)
293584de666eSRyan Zezeski 			avp->av_refs++;
293684de666eSRyan Zezeski 
293784de666eSRyan Zezeski 		if (vid == VLAN_ID_NONE)
293884de666eSRyan Zezeski 			rx_group->arg_untagged++;
293984de666eSRyan Zezeski 
294084de666eSRyan Zezeski 		goto done;
294184de666eSRyan Zezeski 	}
294284de666eSRyan Zezeski 
294384de666eSRyan Zezeski 	if (err == 0 && avp != NULL) {
294484de666eSRyan Zezeski 		VERIFY3U(avp->av_refs, ==, 0);
294584de666eSRyan Zezeski 		list_remove(&rx_group->arg_vlans, avp);
294684de666eSRyan Zezeski 		kmem_free(avp, sizeof (aggr_vlan_t));
294784de666eSRyan Zezeski 	}
294884de666eSRyan Zezeski 
294984de666eSRyan Zezeski done:
295084de666eSRyan Zezeski 	mac_perim_exit(mph);
295184de666eSRyan Zezeski 	return (err);
295284de666eSRyan Zezeski }
295384de666eSRyan Zezeski 
29547c478bd9Sstevel@tonic-gate /*
29557c478bd9Sstevel@tonic-gate  * Add or remove the multicast addresses that are defined for the group
29567c478bd9Sstevel@tonic-gate  * to or from the specified port.
2957ae6aa22aSVenugopal Iyer  *
2958ae6aa22aSVenugopal Iyer  * Note that aggr_grp_multicst_port(..., B_TRUE) is called when the port
2959ae6aa22aSVenugopal Iyer  * is started and attached, and aggr_grp_multicst_port(..., B_FALSE) is
2960ae6aa22aSVenugopal Iyer  * called when the port is either stopped or detached.
29617c478bd9Sstevel@tonic-gate  */
29627c478bd9Sstevel@tonic-gate void
aggr_grp_multicst_port(aggr_port_t * port,boolean_t add)29637c478bd9Sstevel@tonic-gate aggr_grp_multicst_port(aggr_port_t *port, boolean_t add)
29647c478bd9Sstevel@tonic-gate {
29657c478bd9Sstevel@tonic-gate 	aggr_grp_t *grp = port->lp_grp;
29667c478bd9Sstevel@tonic-gate 
2967da14cebeSEric Cheng 	ASSERT(MAC_PERIM_HELD(port->lp_mh));
2968da14cebeSEric Cheng 	ASSERT(MAC_PERIM_HELD(grp->lg_mh));
29697c478bd9Sstevel@tonic-gate 
2970ae6aa22aSVenugopal Iyer 	if (!port->lp_started || port->lp_state != AGGR_PORT_STATE_ATTACHED)
29717c478bd9Sstevel@tonic-gate 		return;
29727c478bd9Sstevel@tonic-gate 
2973da14cebeSEric Cheng 	mac_multicast_refresh(grp->lg_mh, aggr_port_multicst, port, add);
29747c478bd9Sstevel@tonic-gate }
29757c478bd9Sstevel@tonic-gate 
29767c478bd9Sstevel@tonic-gate static int
aggr_m_multicst(void * arg,boolean_t add,const uint8_t * addrp)29777c478bd9Sstevel@tonic-gate aggr_m_multicst(void *arg, boolean_t add, const uint8_t *addrp)
29787c478bd9Sstevel@tonic-gate {
29797c478bd9Sstevel@tonic-gate 	aggr_grp_t *grp = arg;
29804a6df672SAnil udupa 	aggr_port_t *port = NULL, *errport = NULL;
2981da14cebeSEric Cheng 	mac_perim_handle_t mph;
29824a6df672SAnil udupa 	int err = 0;
29837c478bd9Sstevel@tonic-gate 
2984da14cebeSEric Cheng 	mac_perim_enter_by_mh(grp->lg_mh, &mph);
29857c478bd9Sstevel@tonic-gate 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
2986ae6aa22aSVenugopal Iyer 		if (port->lp_state != AGGR_PORT_STATE_ATTACHED ||
2987ae6aa22aSVenugopal Iyer 		    !port->lp_started) {
29887c478bd9Sstevel@tonic-gate 			continue;
2989ae6aa22aSVenugopal Iyer 		}
29904a6df672SAnil udupa 		err = aggr_port_multicst(port, add, addrp);
29914a6df672SAnil udupa 		if (err != 0) {
29924a6df672SAnil udupa 			errport = port;
29934a6df672SAnil udupa 			break;
29944a6df672SAnil udupa 		}
29954a6df672SAnil udupa 	}
29964a6df672SAnil udupa 
29974a6df672SAnil udupa 	/*
29984a6df672SAnil udupa 	 * At least one port caused error return and this error is returned to
29994a6df672SAnil udupa 	 * mac, eventually a NAK would be sent upwards.
30004a6df672SAnil udupa 	 * Some ports have this multicast address listed now, and some don't.
30014a6df672SAnil udupa 	 * Treat this error as a whole aggr failure not individual port failure.
30024a6df672SAnil udupa 	 * Therefore remove this multicast address from other ports.
30034a6df672SAnil udupa 	 */
30044a6df672SAnil udupa 	if ((err != 0) && add) {
30054a6df672SAnil udupa 		for (port = grp->lg_ports; port != errport;
30064a6df672SAnil udupa 		    port = port->lp_next) {
30074a6df672SAnil udupa 			if (port->lp_state != AGGR_PORT_STATE_ATTACHED ||
30084a6df672SAnil udupa 			    !port->lp_started) {
30094a6df672SAnil udupa 				continue;
30104a6df672SAnil udupa 			}
30114a6df672SAnil udupa 			(void) aggr_port_multicst(port, B_FALSE, addrp);
30124a6df672SAnil udupa 		}
30137c478bd9Sstevel@tonic-gate 	}
3014da14cebeSEric Cheng 	mac_perim_exit(mph);
30157c478bd9Sstevel@tonic-gate 	return (err);
30167c478bd9Sstevel@tonic-gate }
30177c478bd9Sstevel@tonic-gate 
30187c478bd9Sstevel@tonic-gate static int
aggr_m_unicst(void * arg,const uint8_t * macaddr)30197c478bd9Sstevel@tonic-gate aggr_m_unicst(void *arg, const uint8_t *macaddr)
30207c478bd9Sstevel@tonic-gate {
30217c478bd9Sstevel@tonic-gate 	aggr_grp_t *grp = arg;
3022da14cebeSEric Cheng 	mac_perim_handle_t mph;
3023da14cebeSEric Cheng 	int err;
30247c478bd9Sstevel@tonic-gate 
3025da14cebeSEric Cheng 	mac_perim_enter_by_mh(grp->lg_mh, &mph);
3026da14cebeSEric Cheng 	err = aggr_grp_modify_common(grp, AGGR_MODIFY_MAC, 0, B_TRUE, macaddr,
30277c478bd9Sstevel@tonic-gate 	    0, 0);
3028da14cebeSEric Cheng 	mac_perim_exit(mph);
3029da14cebeSEric Cheng 	return (err);
30307c478bd9Sstevel@tonic-gate }
30317c478bd9Sstevel@tonic-gate 
30327c478bd9Sstevel@tonic-gate /*
30337c478bd9Sstevel@tonic-gate  * Initialize the capabilities that are advertised for the group
30347c478bd9Sstevel@tonic-gate  * according to the capabilities of the constituent ports.
30357c478bd9Sstevel@tonic-gate  */
30367c478bd9Sstevel@tonic-gate static void
aggr_grp_capab_set(aggr_grp_t * grp)30377c478bd9Sstevel@tonic-gate aggr_grp_capab_set(aggr_grp_t *grp)
30387c478bd9Sstevel@tonic-gate {
3039020da793Sseb 	uint32_t cksum;
30407c478bd9Sstevel@tonic-gate 	aggr_port_t *port;
304119c868a0SRoamer 	mac_capab_lso_t cap_lso;
30427c478bd9Sstevel@tonic-gate 
3043da14cebeSEric Cheng 	ASSERT(grp->lg_mh == NULL);
30447c478bd9Sstevel@tonic-gate 	ASSERT(grp->lg_ports != NULL);
3045ba2e4443Sseb 
3046ba2e4443Sseb 	grp->lg_hcksum_txflags = (uint32_t)-1;
3047d62bc4baSyz 	grp->lg_zcopy = B_TRUE;
3048d62bc4baSyz 	grp->lg_vlan = B_TRUE;
3049ba2e4443Sseb 
305019c868a0SRoamer 	grp->lg_lso = B_TRUE;
305119c868a0SRoamer 	grp->lg_cap_lso.lso_flags = (t_uscalar_t)-1;
305219c868a0SRoamer 	grp->lg_cap_lso.lso_basic_tcp_ipv4.lso_max = (t_uscalar_t)-1;
305319c868a0SRoamer 
30547c478bd9Sstevel@tonic-gate 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
3055020da793Sseb 		if (!mac_capab_get(port->lp_mh, MAC_CAPAB_HCKSUM, &cksum))
3056020da793Sseb 			cksum = 0;
3057020da793Sseb 		grp->lg_hcksum_txflags &= cksum;
30587c478bd9Sstevel@tonic-gate 
3059d62bc4baSyz 		grp->lg_vlan &=
3060d62bc4baSyz 		    !mac_capab_get(port->lp_mh, MAC_CAPAB_NO_NATIVEVLAN, NULL);
3061d62bc4baSyz 
3062d62bc4baSyz 		grp->lg_zcopy &=
3063d62bc4baSyz 		    !mac_capab_get(port->lp_mh, MAC_CAPAB_NO_ZCOPY, NULL);
306419c868a0SRoamer 
306519c868a0SRoamer 		grp->lg_lso &=
306619c868a0SRoamer 		    mac_capab_get(port->lp_mh, MAC_CAPAB_LSO, &cap_lso);
306719c868a0SRoamer 		if (grp->lg_lso) {
306819c868a0SRoamer 			grp->lg_cap_lso.lso_flags &= cap_lso.lso_flags;
306919c868a0SRoamer 			if (grp->lg_cap_lso.lso_basic_tcp_ipv4.lso_max >
307019c868a0SRoamer 			    cap_lso.lso_basic_tcp_ipv4.lso_max)
307119c868a0SRoamer 				grp->lg_cap_lso.lso_basic_tcp_ipv4.lso_max =
307219c868a0SRoamer 				    cap_lso.lso_basic_tcp_ipv4.lso_max;
307319c868a0SRoamer 		}
3074ba2e4443Sseb 	}
30757c478bd9Sstevel@tonic-gate }
30767c478bd9Sstevel@tonic-gate 
30777c478bd9Sstevel@tonic-gate /*
3078ba2e4443Sseb  * Checks whether the capabilities of the port being added are compatible
30797c478bd9Sstevel@tonic-gate  * with the current capabilities of the aggregation.
30807c478bd9Sstevel@tonic-gate  */
30817c478bd9Sstevel@tonic-gate static boolean_t
aggr_grp_capab_check(aggr_grp_t * grp,aggr_port_t * port)30827c478bd9Sstevel@tonic-gate aggr_grp_capab_check(aggr_grp_t *grp, aggr_port_t *port)
30837c478bd9Sstevel@tonic-gate {
3084d62bc4baSyz 	uint32_t hcksum_txflags;
30857c478bd9Sstevel@tonic-gate 
30867c478bd9Sstevel@tonic-gate 	ASSERT(grp->lg_ports != NULL);
30877c478bd9Sstevel@tonic-gate 
3088d62bc4baSyz 	if (((!mac_capab_get(port->lp_mh, MAC_CAPAB_NO_NATIVEVLAN, NULL)) &
3089d62bc4baSyz 	    grp->lg_vlan) != grp->lg_vlan) {
3090d62bc4baSyz 		return (B_FALSE);
3091d62bc4baSyz 	}
3092d62bc4baSyz 
3093d62bc4baSyz 	if (((!mac_capab_get(port->lp_mh, MAC_CAPAB_NO_ZCOPY, NULL)) &
3094d62bc4baSyz 	    grp->lg_zcopy) != grp->lg_zcopy) {
3095d62bc4baSyz 		return (B_FALSE);
3096d62bc4baSyz 	}
3097d62bc4baSyz 
3098ba2e4443Sseb 	if (!mac_capab_get(port->lp_mh, MAC_CAPAB_HCKSUM, &hcksum_txflags)) {
3099ba2e4443Sseb 		if (grp->lg_hcksum_txflags != 0)
3100ba2e4443Sseb 			return (B_FALSE);
3101ba2e4443Sseb 	} else if ((hcksum_txflags & grp->lg_hcksum_txflags) !=
3102ba2e4443Sseb 	    grp->lg_hcksum_txflags) {
3103ba2e4443Sseb 		return (B_FALSE);
3104ba2e4443Sseb 	}
3105ba2e4443Sseb 
310619c868a0SRoamer 	if (grp->lg_lso) {
310719c868a0SRoamer 		mac_capab_lso_t cap_lso;
310819c868a0SRoamer 
310919c868a0SRoamer 		if (mac_capab_get(port->lp_mh, MAC_CAPAB_LSO, &cap_lso)) {
311019c868a0SRoamer 			if ((grp->lg_cap_lso.lso_flags & cap_lso.lso_flags) !=
311119c868a0SRoamer 			    grp->lg_cap_lso.lso_flags)
311219c868a0SRoamer 				return (B_FALSE);
311319c868a0SRoamer 			if (grp->lg_cap_lso.lso_basic_tcp_ipv4.lso_max >
311419c868a0SRoamer 			    cap_lso.lso_basic_tcp_ipv4.lso_max)
311519c868a0SRoamer 				return (B_FALSE);
311619c868a0SRoamer 		} else {
311719c868a0SRoamer 			return (B_FALSE);
311819c868a0SRoamer 		}
311919c868a0SRoamer 	}
312019c868a0SRoamer 
3121ba2e4443Sseb 	return (B_TRUE);
31227c478bd9Sstevel@tonic-gate }
3123f4420ae7Snd 
3124f4420ae7Snd /*
3125f4420ae7Snd  * Returns the maximum SDU according to the SDU of the constituent ports.
3126f4420ae7Snd  */
3127f4420ae7Snd static uint_t
aggr_grp_max_sdu(aggr_grp_t * grp)3128f4420ae7Snd aggr_grp_max_sdu(aggr_grp_t *grp)
3129f4420ae7Snd {
3130f4420ae7Snd 	uint_t max_sdu = (uint_t)-1;
3131f4420ae7Snd 	aggr_port_t *port;
3132f4420ae7Snd 
3133f4420ae7Snd 	ASSERT(grp->lg_ports != NULL);
3134f4420ae7Snd 
3135f4420ae7Snd 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
3136e7801d59Ssowmini 		uint_t port_sdu_max;
3137e7801d59Ssowmini 
3138e7801d59Ssowmini 		mac_sdu_get(port->lp_mh, NULL, &port_sdu_max);
3139e7801d59Ssowmini 		if (max_sdu > port_sdu_max)
3140e7801d59Ssowmini 			max_sdu = port_sdu_max;
3141f4420ae7Snd 	}
3142f4420ae7Snd 
3143f4420ae7Snd 	return (max_sdu);
3144f4420ae7Snd }
3145f4420ae7Snd 
3146f4420ae7Snd /*
3147f4420ae7Snd  * Checks if the maximum SDU of the specified port is compatible
3148f4420ae7Snd  * with the maximum SDU of the specified aggregation group, returns
3149f4420ae7Snd  * B_TRUE if it is, B_FALSE otherwise.
3150f4420ae7Snd  */
3151f4420ae7Snd static boolean_t
aggr_grp_sdu_check(aggr_grp_t * grp,aggr_port_t * port)3152f4420ae7Snd aggr_grp_sdu_check(aggr_grp_t *grp, aggr_port_t *port)
3153f4420ae7Snd {
3154e7801d59Ssowmini 	uint_t port_sdu_max;
3155f4420ae7Snd 
3156e7801d59Ssowmini 	mac_sdu_get(port->lp_mh, NULL, &port_sdu_max);
3157e7801d59Ssowmini 	return (port_sdu_max >= grp->lg_max_sdu);
3158f4420ae7Snd }
3159d62bc4baSyz 
3160d62bc4baSyz /*
3161d62bc4baSyz  * Returns the maximum margin according to the margin of the constituent ports.
3162d62bc4baSyz  */
3163d62bc4baSyz static uint32_t
aggr_grp_max_margin(aggr_grp_t * grp)3164d62bc4baSyz aggr_grp_max_margin(aggr_grp_t *grp)
3165d62bc4baSyz {
3166d62bc4baSyz 	uint32_t margin = UINT32_MAX;
3167d62bc4baSyz 	aggr_port_t *port;
3168d62bc4baSyz 
3169da14cebeSEric Cheng 	ASSERT(grp->lg_mh == NULL);
3170d62bc4baSyz 	ASSERT(grp->lg_ports != NULL);
3171d62bc4baSyz 
3172d62bc4baSyz 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
3173d62bc4baSyz 		if (margin > port->lp_margin)
3174d62bc4baSyz 			margin = port->lp_margin;
3175d62bc4baSyz 	}
3176d62bc4baSyz 
3177d62bc4baSyz 	grp->lg_margin = margin;
3178d62bc4baSyz 	return (margin);
3179d62bc4baSyz }
3180d62bc4baSyz 
3181d62bc4baSyz /*
3182d62bc4baSyz  * Checks if the maximum margin of the specified port is compatible
3183d62bc4baSyz  * with the maximum margin of the specified aggregation group, returns
3184d62bc4baSyz  * B_TRUE if it is, B_FALSE otherwise.
3185d62bc4baSyz  */
3186d62bc4baSyz static boolean_t
aggr_grp_margin_check(aggr_grp_t * grp,aggr_port_t * port)3187d62bc4baSyz aggr_grp_margin_check(aggr_grp_t *grp, aggr_port_t *port)
3188d62bc4baSyz {
3189d62bc4baSyz 	if (port->lp_margin >= grp->lg_margin)
3190d62bc4baSyz 		return (B_TRUE);
3191d62bc4baSyz 
3192d62bc4baSyz 	/*
3193d62bc4baSyz 	 * See whether the current margin value is allowed to be changed to
3194d62bc4baSyz 	 * the new value.
3195d62bc4baSyz 	 */
3196d62bc4baSyz 	if (!mac_margin_update(grp->lg_mh, port->lp_margin))
3197d62bc4baSyz 		return (B_FALSE);
3198d62bc4baSyz 
3199d62bc4baSyz 	grp->lg_margin = port->lp_margin;
3200d62bc4baSyz 	return (B_TRUE);
3201d62bc4baSyz }
3202986cab2cSGirish Moodalbail 
3203986cab2cSGirish Moodalbail /*
3204986cab2cSGirish Moodalbail  * Set MTU on individual ports of an aggregation group
3205986cab2cSGirish Moodalbail  */
3206986cab2cSGirish Moodalbail static int
aggr_set_port_sdu(aggr_grp_t * grp,aggr_port_t * port,uint32_t sdu,uint32_t * old_mtu)3207986cab2cSGirish Moodalbail aggr_set_port_sdu(aggr_grp_t *grp, aggr_port_t *port, uint32_t sdu,
3208986cab2cSGirish Moodalbail     uint32_t *old_mtu)
3209986cab2cSGirish Moodalbail {
3210666e8af9SRobert Mustacchi 	boolean_t		removed = B_FALSE;
3211986cab2cSGirish Moodalbail 	mac_perim_handle_t	mph;
3212986cab2cSGirish Moodalbail 	mac_diag_t		diag;
3213986cab2cSGirish Moodalbail 	int			err, rv, retry = 0;
3214986cab2cSGirish Moodalbail 
3215986cab2cSGirish Moodalbail 	if (port->lp_mah != NULL) {
3216986cab2cSGirish Moodalbail 		(void) mac_unicast_remove(port->lp_mch, port->lp_mah);
3217986cab2cSGirish Moodalbail 		port->lp_mah = NULL;
3218986cab2cSGirish Moodalbail 		removed = B_TRUE;
3219986cab2cSGirish Moodalbail 	}
3220986cab2cSGirish Moodalbail 	err = mac_set_mtu(port->lp_mh, sdu, old_mtu);
3221986cab2cSGirish Moodalbail try_again:
32224c91d6c6SVenugopal Iyer 	if (removed && (rv = mac_unicast_add(port->lp_mch, NULL,
32234c91d6c6SVenugopal Iyer 	    MAC_UNICAST_PRIMARY | MAC_UNICAST_DISABLE_TX_VID_CHECK,
32244c91d6c6SVenugopal Iyer 	    &port->lp_mah, 0, &diag)) != 0) {
3225986cab2cSGirish Moodalbail 		/*
3226986cab2cSGirish Moodalbail 		 * following is a workaround for a bug in 'bge' driver.
3227986cab2cSGirish Moodalbail 		 * See CR 6794654 for more information and this work around
3228986cab2cSGirish Moodalbail 		 * will be removed once the CR is fixed.
3229986cab2cSGirish Moodalbail 		 */
3230986cab2cSGirish Moodalbail 		if (rv == EIO && retry++ < 3) {
3231986cab2cSGirish Moodalbail 			delay(2 * hz);
3232986cab2cSGirish Moodalbail 			goto try_again;
3233986cab2cSGirish Moodalbail 		}
3234986cab2cSGirish Moodalbail 		/*
32354c91d6c6SVenugopal Iyer 		 * if mac_unicast_add() failed while setting the MTU,
3236986cab2cSGirish Moodalbail 		 * detach the port from the group.
3237986cab2cSGirish Moodalbail 		 */
3238986cab2cSGirish Moodalbail 		mac_perim_enter_by_mh(port->lp_mh, &mph);
3239986cab2cSGirish Moodalbail 		(void) aggr_grp_detach_port(grp, port);
3240986cab2cSGirish Moodalbail 		mac_perim_exit(mph);
3241986cab2cSGirish Moodalbail 		cmn_err(CE_WARN, "Unable to restart the port %s while "
3242986cab2cSGirish Moodalbail 		    "setting MTU. Detaching the port from the aggregation.",
3243986cab2cSGirish Moodalbail 		    mac_client_name(port->lp_mch));
3244986cab2cSGirish Moodalbail 	}
3245986cab2cSGirish Moodalbail 	return (err);
3246986cab2cSGirish Moodalbail }
3247986cab2cSGirish Moodalbail 
3248986cab2cSGirish Moodalbail static int
aggr_sdu_update(aggr_grp_t * grp,uint32_t sdu)3249986cab2cSGirish Moodalbail aggr_sdu_update(aggr_grp_t *grp, uint32_t sdu)
3250986cab2cSGirish Moodalbail {
3251986cab2cSGirish Moodalbail 	int			err = 0, i, rv;
3252986cab2cSGirish Moodalbail 	aggr_port_t		*port;
3253986cab2cSGirish Moodalbail 	uint32_t		*mtu;
3254986cab2cSGirish Moodalbail 
3255986cab2cSGirish Moodalbail 	ASSERT(MAC_PERIM_HELD(grp->lg_mh));
3256986cab2cSGirish Moodalbail 
3257986cab2cSGirish Moodalbail 	/*
3258986cab2cSGirish Moodalbail 	 * If the MTU being set is equal to aggr group's maximum
3259986cab2cSGirish Moodalbail 	 * allowable value, then there is nothing to change
3260986cab2cSGirish Moodalbail 	 */
3261986cab2cSGirish Moodalbail 	if (sdu == grp->lg_max_sdu)
3262986cab2cSGirish Moodalbail 		return (0);
3263986cab2cSGirish Moodalbail 
3264986cab2cSGirish Moodalbail 	/* 0 is aggr group's min sdu */
3265986cab2cSGirish Moodalbail 	if (sdu == 0)
3266986cab2cSGirish Moodalbail 		return (EINVAL);
3267986cab2cSGirish Moodalbail 
3268986cab2cSGirish Moodalbail 	mtu = kmem_alloc(sizeof (uint32_t) * grp->lg_nports, KM_SLEEP);
3269986cab2cSGirish Moodalbail 	for (port = grp->lg_ports, i = 0; port != NULL && err == 0;
3270986cab2cSGirish Moodalbail 	    port = port->lp_next, i++) {
3271986cab2cSGirish Moodalbail 		err = aggr_set_port_sdu(grp, port, sdu, mtu + i);
3272986cab2cSGirish Moodalbail 	}
3273986cab2cSGirish Moodalbail 	if (err != 0) {
3274986cab2cSGirish Moodalbail 		/* recover from error: reset the mtus of the ports */
3275986cab2cSGirish Moodalbail 		aggr_port_t *tmp;
3276986cab2cSGirish Moodalbail 
3277986cab2cSGirish Moodalbail 		for (tmp = grp->lg_ports, i = 0; tmp != port;
3278986cab2cSGirish Moodalbail 		    tmp = tmp->lp_next, i++) {
3279986cab2cSGirish Moodalbail 			(void) aggr_set_port_sdu(grp, tmp, *(mtu + i), NULL);
3280986cab2cSGirish Moodalbail 		}
3281986cab2cSGirish Moodalbail 		goto bail;
3282986cab2cSGirish Moodalbail 	}
3283986cab2cSGirish Moodalbail 	grp->lg_max_sdu = aggr_grp_max_sdu(grp);
3284986cab2cSGirish Moodalbail 	rv = mac_maxsdu_update(grp->lg_mh, grp->lg_max_sdu);
3285986cab2cSGirish Moodalbail 	ASSERT(rv == 0);
3286986cab2cSGirish Moodalbail bail:
3287986cab2cSGirish Moodalbail 	kmem_free(mtu, sizeof (uint32_t) * grp->lg_nports);
3288986cab2cSGirish Moodalbail 	return (err);
3289986cab2cSGirish Moodalbail }
3290986cab2cSGirish Moodalbail 
3291986cab2cSGirish Moodalbail /*
3292986cab2cSGirish Moodalbail  * Callback functions for set/get of properties
3293986cab2cSGirish Moodalbail  */
3294986cab2cSGirish Moodalbail /*ARGSUSED*/
3295986cab2cSGirish Moodalbail static int
aggr_m_setprop(void * m_driver,const char * pr_name,mac_prop_id_t pr_num,uint_t pr_valsize,const void * pr_val)3296986cab2cSGirish Moodalbail aggr_m_setprop(void *m_driver, const char *pr_name, mac_prop_id_t pr_num,
3297986cab2cSGirish Moodalbail     uint_t pr_valsize, const void *pr_val)
3298986cab2cSGirish Moodalbail {
3299666e8af9SRobert Mustacchi 	int		err = ENOTSUP;
3300666e8af9SRobert Mustacchi 	aggr_grp_t	*grp = m_driver;
3301986cab2cSGirish Moodalbail 
3302986cab2cSGirish Moodalbail 	switch (pr_num) {
3303986cab2cSGirish Moodalbail 	case MAC_PROP_MTU: {
3304666e8af9SRobert Mustacchi 		uint32_t	mtu;
3305986cab2cSGirish Moodalbail 
3306986cab2cSGirish Moodalbail 		if (pr_valsize < sizeof (mtu)) {
3307986cab2cSGirish Moodalbail 			err = EINVAL;
3308986cab2cSGirish Moodalbail 			break;
3309986cab2cSGirish Moodalbail 		}
3310986cab2cSGirish Moodalbail 		bcopy(pr_val, &mtu, sizeof (mtu));
3311986cab2cSGirish Moodalbail 		err = aggr_sdu_update(grp, mtu);
3312986cab2cSGirish Moodalbail 		break;
3313986cab2cSGirish Moodalbail 	}
3314986cab2cSGirish Moodalbail 	default:
3315986cab2cSGirish Moodalbail 		break;
3316986cab2cSGirish Moodalbail 	}
3317986cab2cSGirish Moodalbail 	return (err);
3318986cab2cSGirish Moodalbail }
3319986cab2cSGirish Moodalbail 
33200591ddd0SPrakash Jalan typedef struct rboundary {
33210591ddd0SPrakash Jalan 	uint32_t	bval;
33220591ddd0SPrakash Jalan 	int		btype;
33230591ddd0SPrakash Jalan } rboundary_t;
33240591ddd0SPrakash Jalan 
33250591ddd0SPrakash Jalan /*
33260591ddd0SPrakash Jalan  * This function finds the intersection of mtu ranges stored in arrays -
33270591ddd0SPrakash Jalan  * mrange[0] ... mrange[mcount -1]. It returns the intersection in rval.
33280591ddd0SPrakash Jalan  * Individual arrays are assumed to contain non-overlapping ranges.
33290591ddd0SPrakash Jalan  * Algorithm:
33300591ddd0SPrakash Jalan  *   A range has two boundaries - min and max. We scan all arrays and store
33310591ddd0SPrakash Jalan  * each boundary as a separate element in a temporary array. We also store
33320591ddd0SPrakash Jalan  * the boundary types, min or max, as +1 or -1 respectively in the temporary
33330591ddd0SPrakash Jalan  * array. Then we sort the temporary array in ascending order. We scan the
33340591ddd0SPrakash Jalan  * sorted array from lower to higher values and keep a cumulative sum of
33350591ddd0SPrakash Jalan  * boundary types. Element in the temporary array for which the sum reaches
33360591ddd0SPrakash Jalan  * mcount is a min boundary of a range in the result and next element will be
33370591ddd0SPrakash Jalan  * max boundary.
33380591ddd0SPrakash Jalan  *
33390591ddd0SPrakash Jalan  * Example for mcount = 3,
33400591ddd0SPrakash Jalan  *
33410591ddd0SPrakash Jalan  *  ----|_________|-------|_______|----|__|------ mrange[0]
33420591ddd0SPrakash Jalan  *
33430591ddd0SPrakash Jalan  *  -------|________|--|____________|-----|___|-- mrange[1]
33440591ddd0SPrakash Jalan  *
33450591ddd0SPrakash Jalan  *  --------|________________|-------|____|------ mrange[2]
33460591ddd0SPrakash Jalan  *
33470591ddd0SPrakash Jalan  *                                      3 2 1
33480591ddd0SPrakash Jalan  *                                       \|/
33490591ddd0SPrakash Jalan  *      1  23     2 1  2  3  2    1 01 2  V   0  <- the sum
33500591ddd0SPrakash Jalan  *  ----|--||-----|-|--|--|--|----|-||-|--|---|-- sorted array
33510591ddd0SPrakash Jalan  *
33520591ddd0SPrakash Jalan  *                                 same min and max
33530591ddd0SPrakash Jalan  *                                        V
33540591ddd0SPrakash Jalan  *  --------|_____|-------|__|------------|------ intersecting ranges
33550591ddd0SPrakash Jalan  */
33560591ddd0SPrakash Jalan void
aggr_mtu_range_intersection(mac_propval_range_t ** mrange,int mcount,mac_propval_uint32_range_t ** prval,int * prmaxcnt,int * prcount)33570591ddd0SPrakash Jalan aggr_mtu_range_intersection(mac_propval_range_t **mrange, int mcount,
33580591ddd0SPrakash Jalan     mac_propval_uint32_range_t **prval, int *prmaxcnt, int *prcount)
33590591ddd0SPrakash Jalan {
33600591ddd0SPrakash Jalan 	mac_propval_uint32_range_t	*rval, *ur;
33610591ddd0SPrakash Jalan 	int				rmaxcnt, rcount;
33620591ddd0SPrakash Jalan 	size_t				sz_range32;
33630591ddd0SPrakash Jalan 	rboundary_t			*ta; /* temporary array */
33640591ddd0SPrakash Jalan 	rboundary_t			temp;
33650591ddd0SPrakash Jalan 	boolean_t			range_started = B_FALSE;
33660591ddd0SPrakash Jalan 	int				i, j, m, sum;
33670591ddd0SPrakash Jalan 
33680591ddd0SPrakash Jalan 	sz_range32 = sizeof (mac_propval_uint32_range_t);
33690591ddd0SPrakash Jalan 
33700591ddd0SPrakash Jalan 	for (i = 0, rmaxcnt = 0; i < mcount; i++)
33710591ddd0SPrakash Jalan 		rmaxcnt += mrange[i]->mpr_count;
33720591ddd0SPrakash Jalan 
33730591ddd0SPrakash Jalan 	/* Allocate enough space to store the results */
33740591ddd0SPrakash Jalan 	rval = kmem_alloc(rmaxcnt * sz_range32, KM_SLEEP);
33750591ddd0SPrakash Jalan 
33760591ddd0SPrakash Jalan 	/* Number of boundaries are twice as many as ranges */
33770591ddd0SPrakash Jalan 	ta = kmem_alloc(2 * rmaxcnt * sizeof (rboundary_t), KM_SLEEP);
33780591ddd0SPrakash Jalan 
33790591ddd0SPrakash Jalan 	for (i = 0, m = 0; i < mcount; i++) {
33800591ddd0SPrakash Jalan 		ur = &(mrange[i]->mpr_range_uint32[0]);
33810591ddd0SPrakash Jalan 		for (j = 0; j < mrange[i]->mpr_count; j++) {
33820591ddd0SPrakash Jalan 			ta[m].bval = ur[j].mpur_min;
33830591ddd0SPrakash Jalan 			ta[m++].btype = 1;
33840591ddd0SPrakash Jalan 			ta[m].bval = ur[j].mpur_max;
33850591ddd0SPrakash Jalan 			ta[m++].btype = -1;
33860591ddd0SPrakash Jalan 		}
33870591ddd0SPrakash Jalan 	}
33880591ddd0SPrakash Jalan 
33890591ddd0SPrakash Jalan 	/*
33900591ddd0SPrakash Jalan 	 * Sort the temporary array in ascending order of bval;
33910591ddd0SPrakash Jalan 	 * if boundary values are same then sort on btype.
33920591ddd0SPrakash Jalan 	 */
33930591ddd0SPrakash Jalan 	for (i = 0; i < m-1; i++) {
33940591ddd0SPrakash Jalan 		for (j = i+1; j < m; j++) {
33950591ddd0SPrakash Jalan 			if ((ta[i].bval > ta[j].bval) ||
33960591ddd0SPrakash Jalan 			    ((ta[i].bval == ta[j].bval) &&
33970591ddd0SPrakash Jalan 			    (ta[i].btype < ta[j].btype))) {
33980591ddd0SPrakash Jalan 				temp = ta[i];
33990591ddd0SPrakash Jalan 				ta[i] = ta[j];
34000591ddd0SPrakash Jalan 				ta[j] = temp;
34010591ddd0SPrakash Jalan 			}
34020591ddd0SPrakash Jalan 		}
34030591ddd0SPrakash Jalan 	}
34040591ddd0SPrakash Jalan 
34050591ddd0SPrakash Jalan 	/* Walk through temporary array to find all ranges in the results */
34060591ddd0SPrakash Jalan 	for (i = 0, sum = 0, rcount = 0; i < m; i++) {
34070591ddd0SPrakash Jalan 		sum += ta[i].btype;
34080591ddd0SPrakash Jalan 		if (sum == mcount) {
34090591ddd0SPrakash Jalan 			rval[rcount].mpur_min = ta[i].bval;
34100591ddd0SPrakash Jalan 			range_started = B_TRUE;
34110591ddd0SPrakash Jalan 		} else if (sum < mcount && range_started) {
34120591ddd0SPrakash Jalan 			rval[rcount++].mpur_max = ta[i].bval;
34130591ddd0SPrakash Jalan 			range_started = B_FALSE;
34140591ddd0SPrakash Jalan 		}
34150591ddd0SPrakash Jalan 	}
34160591ddd0SPrakash Jalan 
34170591ddd0SPrakash Jalan 	*prval = rval;
34180591ddd0SPrakash Jalan 	*prmaxcnt = rmaxcnt;
34190591ddd0SPrakash Jalan 	*prcount = rcount;
342084191983SPrakash Jalan 
342184191983SPrakash Jalan 	kmem_free(ta, 2 * rmaxcnt * sizeof (rboundary_t));
34220591ddd0SPrakash Jalan }
34230591ddd0SPrakash Jalan 
34240591ddd0SPrakash Jalan /*
34250591ddd0SPrakash Jalan  * Returns the mtu ranges which could be supported by aggr group.
34260591ddd0SPrakash Jalan  * prmaxcnt returns the size of the buffer prval, prcount returns
34270591ddd0SPrakash Jalan  * the number of valid entries in prval. Caller is responsible
34280591ddd0SPrakash Jalan  * for freeing up prval.
34290591ddd0SPrakash Jalan  */
3430f0f2c3a5SGirish Moodalbail int
aggr_grp_possible_mtu_range(aggr_grp_t * grp,mac_propval_uint32_range_t ** prval,int * prmaxcnt,int * prcount)34310591ddd0SPrakash Jalan aggr_grp_possible_mtu_range(aggr_grp_t *grp, mac_propval_uint32_range_t **prval,
34320591ddd0SPrakash Jalan     int *prmaxcnt, int *prcount)
3433f0f2c3a5SGirish Moodalbail {
34340591ddd0SPrakash Jalan 	mac_propval_range_t		**vals;
3435f0f2c3a5SGirish Moodalbail 	aggr_port_t			*port;
3436f0f2c3a5SGirish Moodalbail 	mac_perim_handle_t		mph;
3437666e8af9SRobert Mustacchi 	uint_t				i, numr;
3438666e8af9SRobert Mustacchi 	int				err = 0;
34390591ddd0SPrakash Jalan 	size_t				sz_propval, sz_range32;
34400591ddd0SPrakash Jalan 	size_t				size;
3441f0f2c3a5SGirish Moodalbail 
34420591ddd0SPrakash Jalan 	sz_propval = sizeof (mac_propval_range_t);
34430591ddd0SPrakash Jalan 	sz_range32 = sizeof (mac_propval_uint32_range_t);
3444f0f2c3a5SGirish Moodalbail 
34450591ddd0SPrakash Jalan 	ASSERT(MAC_PERIM_HELD(grp->lg_mh));
34460dc2366fSVenugopal Iyer 
34470591ddd0SPrakash Jalan 	vals = kmem_zalloc(sizeof (mac_propval_range_t *) * grp->lg_nports,
3448f0f2c3a5SGirish Moodalbail 	    KM_SLEEP);
3449f0f2c3a5SGirish Moodalbail 
3450f0f2c3a5SGirish Moodalbail 	for (port = grp->lg_ports, i = 0; port != NULL;
3451f0f2c3a5SGirish Moodalbail 	    port = port->lp_next, i++) {
34520591ddd0SPrakash Jalan 
34530591ddd0SPrakash Jalan 		size = sz_propval;
34540591ddd0SPrakash Jalan 		vals[i] = kmem_alloc(size, KM_SLEEP);
34550591ddd0SPrakash Jalan 		vals[i]->mpr_count = 1;
34560591ddd0SPrakash Jalan 
3457f0f2c3a5SGirish Moodalbail 		mac_perim_enter_by_mh(port->lp_mh, &mph);
34580591ddd0SPrakash Jalan 
34590dc2366fSVenugopal Iyer 		err = mac_prop_info(port->lp_mh, MAC_PROP_MTU, NULL,
34600591ddd0SPrakash Jalan 		    NULL, 0, vals[i], NULL);
34610591ddd0SPrakash Jalan 		if (err == ENOSPC) {
34620591ddd0SPrakash Jalan 			/*
34630591ddd0SPrakash Jalan 			 * Not enough space to hold all ranges.
34640591ddd0SPrakash Jalan 			 * Allocate extra space as indicated and retry.
34650591ddd0SPrakash Jalan 			 */
34660591ddd0SPrakash Jalan 			numr = vals[i]->mpr_count;
34670591ddd0SPrakash Jalan 			kmem_free(vals[i], sz_propval);
34680591ddd0SPrakash Jalan 			size = sz_propval + (numr - 1) * sz_range32;
34690591ddd0SPrakash Jalan 			vals[i] = kmem_alloc(size, KM_SLEEP);
34700591ddd0SPrakash Jalan 			vals[i]->mpr_count = numr;
34710591ddd0SPrakash Jalan 			err = mac_prop_info(port->lp_mh, MAC_PROP_MTU, NULL,
34720591ddd0SPrakash Jalan 			    NULL, 0, vals[i], NULL);
34730591ddd0SPrakash Jalan 			ASSERT(err != ENOSPC);
34740591ddd0SPrakash Jalan 		}
3475f0f2c3a5SGirish Moodalbail 		mac_perim_exit(mph);
34760591ddd0SPrakash Jalan 		if (err != 0) {
34770591ddd0SPrakash Jalan 			kmem_free(vals[i], size);
34780591ddd0SPrakash Jalan 			vals[i] = NULL;
3479f0f2c3a5SGirish Moodalbail 			break;
34800591ddd0SPrakash Jalan 		}
3481f0f2c3a5SGirish Moodalbail 	}
34820dc2366fSVenugopal Iyer 
3483f0f2c3a5SGirish Moodalbail 	/*
3484f0f2c3a5SGirish Moodalbail 	 * if any of the underlying ports does not support changing MTU then
3485f0f2c3a5SGirish Moodalbail 	 * just return ENOTSUP
3486f0f2c3a5SGirish Moodalbail 	 */
3487f0f2c3a5SGirish Moodalbail 	if (port != NULL) {
3488f0f2c3a5SGirish Moodalbail 		ASSERT(err != 0);
3489f0f2c3a5SGirish Moodalbail 		goto done;
3490f0f2c3a5SGirish Moodalbail 	}
34910dc2366fSVenugopal Iyer 
34920591ddd0SPrakash Jalan 	aggr_mtu_range_intersection(vals, grp->lg_nports, prval, prmaxcnt,
34930591ddd0SPrakash Jalan 	    prcount);
34940591ddd0SPrakash Jalan 
34950591ddd0SPrakash Jalan done:
3496f0f2c3a5SGirish Moodalbail 	for (i = 0; i < grp->lg_nports; i++) {
34970591ddd0SPrakash Jalan 		if (vals[i] != NULL) {
34980591ddd0SPrakash Jalan 			numr = vals[i]->mpr_count;
34990591ddd0SPrakash Jalan 			size = sz_propval + (numr - 1) * sz_range32;
35000591ddd0SPrakash Jalan 			kmem_free(vals[i], size);
35010591ddd0SPrakash Jalan 		}
3502f0f2c3a5SGirish Moodalbail 	}
35030dc2366fSVenugopal Iyer 
35040591ddd0SPrakash Jalan 	kmem_free(vals, sizeof (mac_propval_range_t *) * grp->lg_nports);
3505f0f2c3a5SGirish Moodalbail 	return (err);
3506f0f2c3a5SGirish Moodalbail }
3507f0f2c3a5SGirish Moodalbail 
35080dc2366fSVenugopal Iyer static void
aggr_m_propinfo(void * m_driver,const char * pr_name,mac_prop_id_t pr_num,mac_prop_info_handle_t prh)35090dc2366fSVenugopal Iyer aggr_m_propinfo(void *m_driver, const char *pr_name, mac_prop_id_t pr_num,
35100dc2366fSVenugopal Iyer     mac_prop_info_handle_t prh)
3511986cab2cSGirish Moodalbail {
35120591ddd0SPrakash Jalan 	aggr_grp_t			*grp = m_driver;
35130591ddd0SPrakash Jalan 	mac_propval_uint32_range_t	*rval = NULL;
35140591ddd0SPrakash Jalan 	int				i, rcount, rmaxcnt;
35150591ddd0SPrakash Jalan 	int				err = 0;
3516f0f2c3a5SGirish Moodalbail 
35170dc2366fSVenugopal Iyer 	_NOTE(ARGUNUSED(pr_name));
35180dc2366fSVenugopal Iyer 
3519f0f2c3a5SGirish Moodalbail 	switch (pr_num) {
35200591ddd0SPrakash Jalan 	case MAC_PROP_MTU:
35210dc2366fSVenugopal Iyer 
35220591ddd0SPrakash Jalan 		err = aggr_grp_possible_mtu_range(grp, &rval, &rmaxcnt,
35230591ddd0SPrakash Jalan 		    &rcount);
35240591ddd0SPrakash Jalan 		if (err != 0) {
35250591ddd0SPrakash Jalan 			ASSERT(rval == NULL);
35260dc2366fSVenugopal Iyer 			return;
35270591ddd0SPrakash Jalan 		}
35280591ddd0SPrakash Jalan 		for (i = 0; i < rcount; i++) {
35290591ddd0SPrakash Jalan 			mac_prop_info_set_range_uint32(prh,
35300591ddd0SPrakash Jalan 			    rval[i].mpur_min, rval[i].mpur_max);
35310591ddd0SPrakash Jalan 		}
35320591ddd0SPrakash Jalan 		kmem_free(rval, sizeof (mac_propval_uint32_range_t) * rmaxcnt);
35330dc2366fSVenugopal Iyer 		break;
35340dc2366fSVenugopal Iyer 	}
3535986cab2cSGirish Moodalbail }
3536