1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright 2019 Joyent, Inc.
25 * Copyright 2017 OmniTI Computer Consulting, Inc. All rights reserved.
26 * Copyright 2020 RackTop Systems, Inc.
27 */
28
29#include <sys/types.h>
30#include <sys/conf.h>
31#include <sys/id_space.h>
32#include <sys/esunddi.h>
33#include <sys/stat.h>
34#include <sys/mkdev.h>
35#include <sys/stream.h>
36#include <sys/strsubr.h>
37#include <sys/dlpi.h>
38#include <sys/modhash.h>
39#include <sys/mac.h>
40#include <sys/mac_provider.h>
41#include <sys/mac_impl.h>
42#include <sys/mac_client_impl.h>
43#include <sys/mac_client_priv.h>
44#include <sys/mac_soft_ring.h>
45#include <sys/mac_stat.h>
46#include <sys/dld.h>
47#include <sys/modctl.h>
48#include <sys/fs/dv_node.h>
49#include <sys/thread.h>
50#include <sys/proc.h>
51#include <sys/callb.h>
52#include <sys/cpuvar.h>
53#include <sys/atomic.h>
54#include <sys/sdt.h>
55#include <sys/mac_flow.h>
56#include <sys/ddi_intr_impl.h>
57#include <sys/disp.h>
58#include <sys/sdt.h>
59#include <sys/pattr.h>
60#include <sys/strsun.h>
61#include <sys/vlan.h>
62#include <inet/ip.h>
63#include <inet/tcp.h>
64#include <netinet/udp.h>
65#include <netinet/sctp.h>
66
67/*
68 * MAC Provider Interface.
69 *
70 * Interface for GLDv3 compatible NIC drivers.
71 */
72
73static void i_mac_notify_thread(void *);
74
75typedef void (*mac_notify_default_cb_fn_t)(mac_impl_t *);
76
77static const mac_notify_default_cb_fn_t mac_notify_cb_list[MAC_NNOTE] = {
78	mac_fanout_recompute,	/* MAC_NOTE_LINK */
79	NULL,		/* MAC_NOTE_UNICST */
80	NULL,		/* MAC_NOTE_TX */
81	NULL,		/* MAC_NOTE_DEVPROMISC */
82	NULL,		/* MAC_NOTE_FASTPATH_FLUSH */
83	NULL,		/* MAC_NOTE_SDU_SIZE */
84	NULL,		/* MAC_NOTE_MARGIN */
85	NULL,		/* MAC_NOTE_CAPAB_CHG */
86	NULL		/* MAC_NOTE_LOWLINK */
87};
88
89/*
90 * Driver support functions.
91 */
92
93/* REGISTRATION */
94
95mac_register_t *
96mac_alloc(uint_t mac_version)
97{
98	mac_register_t *mregp;
99
100	/*
101	 * Make sure there isn't a version mismatch between the driver and
102	 * the framework.  In the future, if multiple versions are
103	 * supported, this check could become more sophisticated.
104	 */
105	if (mac_version != MAC_VERSION)
106		return (NULL);
107
108	mregp = kmem_zalloc(sizeof (mac_register_t), KM_SLEEP);
109	mregp->m_version = mac_version;
110	return (mregp);
111}
112
113void
114mac_free(mac_register_t *mregp)
115{
116	kmem_free(mregp, sizeof (mac_register_t));
117}
118
119/*
120 * Convert a MAC's offload features into the equivalent DB_CKSUMFLAGS
121 * value.
122 */
123static uint16_t
124mac_features_to_flags(mac_handle_t mh)
125{
126	uint16_t flags = 0;
127	uint32_t cap_sum = 0;
128	mac_capab_lso_t cap_lso;
129
130	if (mac_capab_get(mh, MAC_CAPAB_HCKSUM, &cap_sum)) {
131		if (cap_sum & HCKSUM_IPHDRCKSUM)
132			flags |= HCK_IPV4_HDRCKSUM;
133
134		if (cap_sum & HCKSUM_INET_PARTIAL)
135			flags |= HCK_PARTIALCKSUM;
136		else if (cap_sum & (HCKSUM_INET_FULL_V4 | HCKSUM_INET_FULL_V6))
137			flags |= HCK_FULLCKSUM;
138	}
139
140	/*
141	 * We don't need the information stored in 'cap_lso', but we
142	 * need to pass a non-NULL pointer to appease the driver.
143	 */
144	if (mac_capab_get(mh, MAC_CAPAB_LSO, &cap_lso))
145		flags |= HW_LSO;
146
147	return (flags);
148}
149
150/*
151 * mac_register() is how drivers register new MACs with the GLDv3
152 * framework.  The mregp argument is allocated by drivers using the
153 * mac_alloc() function, and can be freed using mac_free() immediately upon
154 * return from mac_register().  Upon success (0 return value), the mhp
155 * opaque pointer becomes the driver's handle to its MAC interface, and is
156 * the argument to all other mac module entry points.
157 */
158/* ARGSUSED */
159int
160mac_register(mac_register_t *mregp, mac_handle_t *mhp)
161{
162	mac_impl_t		*mip;
163	mactype_t		*mtype;
164	int			err = EINVAL;
165	struct devnames		*dnp = NULL;
166	uint_t			instance;
167	boolean_t		style1_created = B_FALSE;
168	boolean_t		style2_created = B_FALSE;
169	char			*driver;
170	minor_t			minor = 0;
171
172	/* A successful call to mac_init_ops() sets the DN_GLDV3_DRIVER flag. */
173	if (!GLDV3_DRV(ddi_driver_major(mregp->m_dip)))
174		return (EINVAL);
175
176	/* Find the required MAC-Type plugin. */
177	if ((mtype = mactype_getplugin(mregp->m_type_ident)) == NULL)
178		return (EINVAL);
179
180	/* Create a mac_impl_t to represent this MAC. */
181	mip = kmem_cache_alloc(i_mac_impl_cachep, KM_SLEEP);
182
183	/*
184	 * The mac is not ready for open yet.
185	 */
186	mip->mi_state_flags |= MIS_DISABLED;
187
188	/*
189	 * When a mac is registered, the m_instance field can be set to:
190	 *
191	 *  0:	Get the mac's instance number from m_dip.
192	 *	This is usually used for physical device dips.
193	 *
194	 *  [1 .. MAC_MAX_MINOR-1]: Use the value as the mac's instance number.
195	 *	For example, when an aggregation is created with the key option,
196	 *	"key" will be used as the instance number.
197	 *
198	 *  -1: Assign an instance number from [MAC_MAX_MINOR .. MAXMIN-1].
199	 *	This is often used when a MAC of a virtual link is registered
200	 *	(e.g., aggregation when "key" is not specified, or vnic).
201	 *
202	 * Note that the instance number is used to derive the mi_minor field
203	 * of mac_impl_t, which will then be used to derive the name of kstats
204	 * and the devfs nodes.  The first 2 cases are needed to preserve
205	 * backward compatibility.
206	 */
207	switch (mregp->m_instance) {
208	case 0:
209		instance = ddi_get_instance(mregp->m_dip);
210		break;
211	case ((uint_t)-1):
212		minor = mac_minor_hold(B_TRUE);
213		if (minor == 0) {
214			err = ENOSPC;
215			goto fail;
216		}
217		instance = minor - 1;
218		break;
219	default:
220		instance = mregp->m_instance;
221		if (instance >= MAC_MAX_MINOR) {
222			err = EINVAL;
223			goto fail;
224		}
225		break;
226	}
227
228	mip->mi_minor = (minor_t)(instance + 1);
229	mip->mi_dip = mregp->m_dip;
230	mip->mi_clients_list = NULL;
231	mip->mi_nclients = 0;
232
233	/* Set the default IEEE Port VLAN Identifier */
234	mip->mi_pvid = 1;
235
236	/* Default bridge link learning protection values */
237	mip->mi_llimit = 1000;
238	mip->mi_ldecay = 200;
239
240	driver = (char *)ddi_driver_name(mip->mi_dip);
241
242	/* Construct the MAC name as <drvname><instance> */
243	(void) snprintf(mip->mi_name, sizeof (mip->mi_name), "%s%d",
244	    driver, instance);
245
246	mip->mi_driver = mregp->m_driver;
247
248	mip->mi_type = mtype;
249	mip->mi_margin = mregp->m_margin;
250	mip->mi_info.mi_media = mtype->mt_type;
251	mip->mi_info.mi_nativemedia = mtype->mt_nativetype;
252	if (mregp->m_max_sdu <= mregp->m_min_sdu)
253		goto fail;
254	if (mregp->m_multicast_sdu == 0)
255		mregp->m_multicast_sdu = mregp->m_max_sdu;
256	if (mregp->m_multicast_sdu < mregp->m_min_sdu ||
257	    mregp->m_multicast_sdu > mregp->m_max_sdu)
258		goto fail;
259	mip->mi_sdu_min = mregp->m_min_sdu;
260	mip->mi_sdu_max = mregp->m_max_sdu;
261	mip->mi_sdu_multicast = mregp->m_multicast_sdu;
262	mip->mi_info.mi_addr_length = mip->mi_type->mt_addr_length;
263	/*
264	 * If the media supports a broadcast address, cache a pointer to it
265	 * in the mac_info_t so that upper layers can use it.
266	 */
267	mip->mi_info.mi_brdcst_addr = mip->mi_type->mt_brdcst_addr;
268
269	mip->mi_v12n_level = mregp->m_v12n;
270
271	/*
272	 * Copy the unicast source address into the mac_info_t, but only if
273	 * the MAC-Type defines a non-zero address length.  We need to
274	 * handle MAC-Types that have an address length of 0
275	 * (point-to-point protocol MACs for example).
276	 */
277	if (mip->mi_type->mt_addr_length > 0) {
278		if (mregp->m_src_addr == NULL)
279			goto fail;
280		mip->mi_info.mi_unicst_addr =
281		    kmem_alloc(mip->mi_type->mt_addr_length, KM_SLEEP);
282		bcopy(mregp->m_src_addr, mip->mi_info.mi_unicst_addr,
283		    mip->mi_type->mt_addr_length);
284
285		/*
286		 * Copy the fixed 'factory' MAC address from the immutable
287		 * info.  This is taken to be the MAC address currently in
288		 * use.
289		 */
290		bcopy(mip->mi_info.mi_unicst_addr, mip->mi_addr,
291		    mip->mi_type->mt_addr_length);
292
293		/*
294		 * At this point, we should set up the classification
295		 * rules etc but we delay it till mac_open() so that
296		 * the resource discovery has taken place and we
297		 * know someone wants to use the device. Otherwise
298		 * memory gets allocated for Rx ring structures even
299		 * during probe.
300		 */
301
302		/* Copy the destination address if one is provided. */
303		if (mregp->m_dst_addr != NULL) {
304			bcopy(mregp->m_dst_addr, mip->mi_dstaddr,
305			    mip->mi_type->mt_addr_length);
306			mip->mi_dstaddr_set = B_TRUE;
307		}
308	} else if (mregp->m_src_addr != NULL) {
309		goto fail;
310	}
311
312	/*
313	 * The format of the m_pdata is specific to the plugin.  It is
314	 * passed in as an argument to all of the plugin callbacks.  The
315	 * driver can update this information by calling
316	 * mac_pdata_update().
317	 */
318	if (mip->mi_type->mt_ops.mtops_ops & MTOPS_PDATA_VERIFY) {
319		/*
320		 * Verify if the supplied plugin data is valid.  Note that
321		 * even if the caller passed in a NULL pointer as plugin data,
322		 * we still need to verify if that's valid as the plugin may
323		 * require plugin data to function.
324		 */
325		if (!mip->mi_type->mt_ops.mtops_pdata_verify(mregp->m_pdata,
326		    mregp->m_pdata_size)) {
327			goto fail;
328		}
329		if (mregp->m_pdata != NULL) {
330			mip->mi_pdata =
331			    kmem_alloc(mregp->m_pdata_size, KM_SLEEP);
332			bcopy(mregp->m_pdata, mip->mi_pdata,
333			    mregp->m_pdata_size);
334			mip->mi_pdata_size = mregp->m_pdata_size;
335		}
336	} else if (mregp->m_pdata != NULL) {
337		/*
338		 * The caller supplied non-NULL plugin data, but the plugin
339		 * does not recognize plugin data.
340		 */
341		err = EINVAL;
342		goto fail;
343	}
344
345	/*
346	 * Register the private properties.
347	 */
348	mac_register_priv_prop(mip, mregp->m_priv_props);
349
350	/*
351	 * Stash the driver callbacks into the mac_impl_t, but first sanity
352	 * check to make sure all mandatory callbacks are set.
353	 */
354	if (mregp->m_callbacks->mc_getstat == NULL ||
355	    mregp->m_callbacks->mc_start == NULL ||
356	    mregp->m_callbacks->mc_stop == NULL ||
357	    mregp->m_callbacks->mc_setpromisc == NULL ||
358	    mregp->m_callbacks->mc_multicst == NULL) {
359		goto fail;
360	}
361	mip->mi_callbacks = mregp->m_callbacks;
362
363	if (mac_capab_get((mac_handle_t)mip, MAC_CAPAB_LEGACY,
364	    &mip->mi_capab_legacy)) {
365		mip->mi_state_flags |= MIS_LEGACY;
366		mip->mi_phy_dev = mip->mi_capab_legacy.ml_dev;
367	} else {
368		mip->mi_phy_dev = makedevice(ddi_driver_major(mip->mi_dip),
369		    mip->mi_minor);
370	}
371
372	/*
373	 * Allocate a notification thread. thread_create blocks for memory
374	 * if needed, it never fails.
375	 */
376	mip->mi_notify_thread = thread_create(NULL, 0, i_mac_notify_thread,
377	    mip, 0, &p0, TS_RUN, minclsyspri);
378
379	/*
380	 * Cache the DB_CKSUMFLAGS that this MAC supports.
381	 */
382	mip->mi_tx_cksum_flags = mac_features_to_flags((mac_handle_t)mip);
383
384	/*
385	 * Initialize the capabilities
386	 */
387	bzero(&mip->mi_rx_rings_cap, sizeof (mac_capab_rings_t));
388	bzero(&mip->mi_tx_rings_cap, sizeof (mac_capab_rings_t));
389
390	if (i_mac_capab_get((mac_handle_t)mip, MAC_CAPAB_VNIC, NULL))
391		mip->mi_state_flags |= MIS_IS_VNIC;
392
393	if (i_mac_capab_get((mac_handle_t)mip, MAC_CAPAB_AGGR, NULL))
394		mip->mi_state_flags |= MIS_IS_AGGR;
395
396	mac_addr_factory_init(mip);
397
398	mac_transceiver_init(mip);
399
400	mac_led_init(mip);
401
402	/*
403	 * Enforce the virtrualization level registered.
404	 */
405	if (mip->mi_v12n_level & MAC_VIRT_LEVEL1) {
406		if (mac_init_rings(mip, MAC_RING_TYPE_RX) != 0 ||
407		    mac_init_rings(mip, MAC_RING_TYPE_TX) != 0)
408			goto fail;
409
410		/*
411		 * The driver needs to register at least rx rings for this
412		 * virtualization level.
413		 */
414		if (mip->mi_rx_groups == NULL)
415			goto fail;
416	}
417
418	/*
419	 * The driver must set mc_unicst entry point to NULL when it advertises
420	 * CAP_RINGS for rx groups.
421	 */
422	if (mip->mi_rx_groups != NULL) {
423		if (mregp->m_callbacks->mc_unicst != NULL)
424			goto fail;
425	} else {
426		if (mregp->m_callbacks->mc_unicst == NULL)
427			goto fail;
428	}
429
430	/*
431	 * Initialize MAC addresses. Must be called after mac_init_rings().
432	 */
433	mac_init_macaddr(mip);
434
435	mip->mi_share_capab.ms_snum = 0;
436	if (mip->mi_v12n_level & MAC_VIRT_HIO) {
437		(void) mac_capab_get((mac_handle_t)mip, MAC_CAPAB_SHARES,
438		    &mip->mi_share_capab);
439	}
440
441	/*
442	 * Initialize the kstats for this device.
443	 */
444	mac_driver_stat_create(mip);
445
446	/* Zero out any properties. */
447	bzero(&mip->mi_resource_props, sizeof (mac_resource_props_t));
448
449	if (mip->mi_minor <= MAC_MAX_MINOR) {
450		/* Create a style-2 DLPI device */
451		if (ddi_create_minor_node(mip->mi_dip, driver, S_IFCHR, 0,
452		    DDI_NT_NET, CLONE_DEV) != DDI_SUCCESS)
453			goto fail;
454		style2_created = B_TRUE;
455
456		/* Create a style-1 DLPI device */
457		if (ddi_create_minor_node(mip->mi_dip, mip->mi_name, S_IFCHR,
458		    mip->mi_minor, DDI_NT_NET, 0) != DDI_SUCCESS)
459			goto fail;
460		style1_created = B_TRUE;
461	}
462
463	mac_flow_l2tab_create(mip, &mip->mi_flow_tab);
464
465	rw_enter(&i_mac_impl_lock, RW_WRITER);
466	if (mod_hash_insert(i_mac_impl_hash,
467	    (mod_hash_key_t)mip->mi_name, (mod_hash_val_t)mip) != 0) {
468		rw_exit(&i_mac_impl_lock);
469		err = EEXIST;
470		goto fail;
471	}
472
473	DTRACE_PROBE2(mac__register, struct devnames *, dnp,
474	    (mac_impl_t *), mip);
475
476	/*
477	 * Mark the MAC to be ready for open.
478	 */
479	mip->mi_state_flags &= ~MIS_DISABLED;
480	rw_exit(&i_mac_impl_lock);
481
482	atomic_inc_32(&i_mac_impl_count);
483
484	cmn_err(CE_NOTE, "!%s registered", mip->mi_name);
485	*mhp = (mac_handle_t)mip;
486	return (0);
487
488fail:
489	if (style1_created)
490		ddi_remove_minor_node(mip->mi_dip, mip->mi_name);
491
492	if (style2_created)
493		ddi_remove_minor_node(mip->mi_dip, driver);
494
495	mac_addr_factory_fini(mip);
496
497	/* Clean up registered MAC addresses */
498	mac_fini_macaddr(mip);
499
500	/* Clean up registered rings */
501	mac_free_rings(mip, MAC_RING_TYPE_RX);
502	mac_free_rings(mip, MAC_RING_TYPE_TX);
503
504	/* Clean up notification thread */
505	if (mip->mi_notify_thread != NULL)
506		i_mac_notify_exit(mip);
507
508	if (mip->mi_info.mi_unicst_addr != NULL) {
509		kmem_free(mip->mi_info.mi_unicst_addr,
510		    mip->mi_type->mt_addr_length);
511		mip->mi_info.mi_unicst_addr = NULL;
512	}
513
514	mac_driver_stat_delete(mip);
515
516	if (mip->mi_type != NULL) {
517		atomic_dec_32(&mip->mi_type->mt_ref);
518		mip->mi_type = NULL;
519	}
520
521	if (mip->mi_pdata != NULL) {
522		kmem_free(mip->mi_pdata, mip->mi_pdata_size);
523		mip->mi_pdata = NULL;
524		mip->mi_pdata_size = 0;
525	}
526
527	if (minor != 0) {
528		ASSERT(minor > MAC_MAX_MINOR);
529		mac_minor_rele(minor);
530	}
531
532	mip->mi_state_flags = 0;
533	mac_unregister_priv_prop(mip);
534
535	/*
536	 * Clear the state before destroying the mac_impl_t
537	 */
538	mip->mi_state_flags = 0;
539
540	kmem_cache_free(i_mac_impl_cachep, mip);
541	return (err);
542}
543
544/*
545 * Unregister from the GLDv3 framework
546 */
547int
548mac_unregister(mac_handle_t mh)
549{
550	int			err;
551	mac_impl_t		*mip = (mac_impl_t *)mh;
552	mod_hash_val_t		val;
553	mac_margin_req_t	*mmr, *nextmmr;
554
555	/* Fail the unregister if there are any open references to this mac. */
556	if ((err = mac_disable_nowait(mh)) != 0)
557		return (err);
558
559	/*
560	 * Clean up notification thread and wait for it to exit.
561	 */
562	i_mac_notify_exit(mip);
563
564	/*
565	 * Prior to acquiring the MAC perimeter, remove the MAC instance from
566	 * the internal hash table. Such removal means table-walkers that
567	 * acquire the perimeter will not do so on behalf of what we are
568	 * unregistering, which prevents a deadlock.
569	 */
570	rw_enter(&i_mac_impl_lock, RW_WRITER);
571	(void) mod_hash_remove(i_mac_impl_hash,
572	    (mod_hash_key_t)mip->mi_name, &val);
573	rw_exit(&i_mac_impl_lock);
574	ASSERT(mip == (mac_impl_t *)val);
575
576	i_mac_perim_enter(mip);
577
578	/*
579	 * There is still resource properties configured over this mac.
580	 */
581	if (mip->mi_resource_props.mrp_mask != 0)
582		mac_fastpath_enable((mac_handle_t)mip);
583
584	if (mip->mi_minor < MAC_MAX_MINOR + 1) {
585		ddi_remove_minor_node(mip->mi_dip, mip->mi_name);
586		ddi_remove_minor_node(mip->mi_dip,
587		    (char *)ddi_driver_name(mip->mi_dip));
588	}
589
590	ASSERT(mip->mi_nactiveclients == 0 && !(mip->mi_state_flags &
591	    MIS_EXCLUSIVE));
592
593	mac_driver_stat_delete(mip);
594
595	ASSERT(i_mac_impl_count > 0);
596	atomic_dec_32(&i_mac_impl_count);
597
598	if (mip->mi_pdata != NULL)
599		kmem_free(mip->mi_pdata, mip->mi_pdata_size);
600	mip->mi_pdata = NULL;
601	mip->mi_pdata_size = 0;
602
603	/*
604	 * Free the list of margin request.
605	 */
606	for (mmr = mip->mi_mmrp; mmr != NULL; mmr = nextmmr) {
607		nextmmr = mmr->mmr_nextp;
608		kmem_free(mmr, sizeof (mac_margin_req_t));
609	}
610	mip->mi_mmrp = NULL;
611
612	mip->mi_linkstate = mip->mi_lowlinkstate = LINK_STATE_UNKNOWN;
613	kmem_free(mip->mi_info.mi_unicst_addr, mip->mi_type->mt_addr_length);
614	mip->mi_info.mi_unicst_addr = NULL;
615
616	atomic_dec_32(&mip->mi_type->mt_ref);
617	mip->mi_type = NULL;
618
619	/*
620	 * Free the primary MAC address.
621	 */
622	mac_fini_macaddr(mip);
623
624	/*
625	 * free all rings
626	 */
627	mac_free_rings(mip, MAC_RING_TYPE_RX);
628	mac_free_rings(mip, MAC_RING_TYPE_TX);
629
630	mac_addr_factory_fini(mip);
631
632	bzero(mip->mi_addr, MAXMACADDRLEN);
633	bzero(mip->mi_dstaddr, MAXMACADDRLEN);
634	mip->mi_dstaddr_set = B_FALSE;
635
636	/* and the flows */
637	mac_flow_tab_destroy(mip->mi_flow_tab);
638	mip->mi_flow_tab = NULL;
639
640	if (mip->mi_minor > MAC_MAX_MINOR)
641		mac_minor_rele(mip->mi_minor);
642
643	cmn_err(CE_NOTE, "!%s unregistered", mip->mi_name);
644
645	/*
646	 * Reset the perim related fields to default values before
647	 * kmem_cache_free
648	 */
649	i_mac_perim_exit(mip);
650	mip->mi_state_flags = 0;
651
652	mac_unregister_priv_prop(mip);
653
654	ASSERT(mip->mi_bridge_link == NULL);
655	kmem_cache_free(i_mac_impl_cachep, mip);
656
657	return (0);
658}
659
660/* DATA RECEPTION */
661
662/*
663 * This function is invoked for packets received by the MAC driver in
664 * interrupt context. The ring generation number provided by the driver
665 * is matched with the ring generation number held in MAC. If they do not
666 * match, received packets are considered stale packets coming from an older
667 * assignment of the ring. Drop them.
668 */
669void
670mac_rx_ring(mac_handle_t mh, mac_ring_handle_t mrh, mblk_t *mp_chain,
671    uint64_t mr_gen_num)
672{
673	mac_ring_t		*mr = (mac_ring_t *)mrh;
674
675	if ((mr != NULL) && (mr->mr_gen_num != mr_gen_num)) {
676		DTRACE_PROBE2(mac__rx__rings__stale__packet, uint64_t,
677		    mr->mr_gen_num, uint64_t, mr_gen_num);
678		freemsgchain(mp_chain);
679		return;
680	}
681	mac_rx(mh, (mac_resource_handle_t)mrh, mp_chain);
682}
683
684/*
685 * This function is invoked for each packet received by the underlying driver.
686 */
687void
688mac_rx(mac_handle_t mh, mac_resource_handle_t mrh, mblk_t *mp_chain)
689{
690	mac_impl_t *mip = (mac_impl_t *)mh;
691
692	/*
693	 * Check if the link is part of a bridge.  If not, then we don't need
694	 * to take the lock to remain consistent.  Make this common case
695	 * lock-free and tail-call optimized.
696	 */
697	if (mip->mi_bridge_link == NULL) {
698		mac_rx_common(mh, mrh, mp_chain);
699	} else {
700		/*
701		 * Once we take a reference on the bridge link, the bridge
702		 * module itself can't unload, so the callback pointers are
703		 * stable.
704		 */
705		mutex_enter(&mip->mi_bridge_lock);
706		if ((mh = mip->mi_bridge_link) != NULL)
707			mac_bridge_ref_cb(mh, B_TRUE);
708		mutex_exit(&mip->mi_bridge_lock);
709		if (mh == NULL) {
710			mac_rx_common((mac_handle_t)mip, mrh, mp_chain);
711		} else {
712			mac_bridge_rx_cb(mh, mrh, mp_chain);
713			mac_bridge_ref_cb(mh, B_FALSE);
714		}
715	}
716}
717
718/*
719 * Special case function: this allows snooping of packets transmitted and
720 * received by TRILL. By design, they go directly into the TRILL module.
721 */
722void
723mac_trill_snoop(mac_handle_t mh, mblk_t *mp)
724{
725	mac_impl_t *mip = (mac_impl_t *)mh;
726
727	if (mip->mi_promisc_list != NULL)
728		mac_promisc_dispatch(mip, mp, NULL, B_FALSE);
729}
730
731/*
732 * This is the upward reentry point for packets arriving from the bridging
733 * module and from mac_rx for links not part of a bridge.
734 */
735void
736mac_rx_common(mac_handle_t mh, mac_resource_handle_t mrh, mblk_t *mp_chain)
737{
738	mac_impl_t		*mip = (mac_impl_t *)mh;
739	mac_ring_t		*mr = (mac_ring_t *)mrh;
740	mac_soft_ring_set_t	*mac_srs;
741	mblk_t			*bp = mp_chain;
742
743	/*
744	 * If there are any promiscuous mode callbacks defined for
745	 * this MAC, pass them a copy if appropriate.
746	 */
747	if (mip->mi_promisc_list != NULL)
748		mac_promisc_dispatch(mip, mp_chain, NULL, B_FALSE);
749
750	if (mr != NULL) {
751		/*
752		 * If the SRS teardown has started, just return. The 'mr'
753		 * continues to be valid until the driver unregisters the MAC.
754		 * Hardware classified packets will not make their way up
755		 * beyond this point once the teardown has started. The driver
756		 * is never passed a pointer to a flow entry or SRS or any
757		 * structure that can be freed much before mac_unregister.
758		 */
759		mutex_enter(&mr->mr_lock);
760		if ((mr->mr_state != MR_INUSE) || (mr->mr_flag &
761		    (MR_INCIPIENT | MR_CONDEMNED | MR_QUIESCE))) {
762			mutex_exit(&mr->mr_lock);
763			freemsgchain(mp_chain);
764			return;
765		}
766
767		/*
768		 * The ring is in passthru mode; pass the chain up to
769		 * the pseudo ring.
770		 */
771		if (mr->mr_classify_type == MAC_PASSTHRU_CLASSIFIER) {
772			MR_REFHOLD_LOCKED(mr);
773			mutex_exit(&mr->mr_lock);
774			mr->mr_pt_fn(mr->mr_pt_arg1, mr->mr_pt_arg2, mp_chain,
775			    B_FALSE);
776			MR_REFRELE(mr);
777			return;
778		}
779
780		/*
781		 * The passthru callback should only be set when in
782		 * MAC_PASSTHRU_CLASSIFIER mode.
783		 */
784		ASSERT3P(mr->mr_pt_fn, ==, NULL);
785
786		/*
787		 * We check if an SRS is controlling this ring.
788		 * If so, we can directly call the srs_lower_proc
789		 * routine otherwise we need to go through mac_rx_classify
790		 * to reach the right place.
791		 */
792		if (mr->mr_classify_type == MAC_HW_CLASSIFIER) {
793			MR_REFHOLD_LOCKED(mr);
794			mutex_exit(&mr->mr_lock);
795			ASSERT3P(mr->mr_srs, !=, NULL);
796			mac_srs = mr->mr_srs;
797
798			/*
799			 * This is the fast path. All packets received
800			 * on this ring are hardware classified and
801			 * share the same MAC header info.
802			 */
803			mac_srs->srs_rx.sr_lower_proc(mh,
804			    (mac_resource_handle_t)mac_srs, mp_chain, B_FALSE);
805			MR_REFRELE(mr);
806			return;
807		}
808
809		mutex_exit(&mr->mr_lock);
810		/* We'll fall through to software classification */
811	} else {
812		flow_entry_t *flent;
813		int err;
814
815		rw_enter(&mip->mi_rw_lock, RW_READER);
816		if (mip->mi_single_active_client != NULL) {
817			flent = mip->mi_single_active_client->mci_flent_list;
818			FLOW_TRY_REFHOLD(flent, err);
819			rw_exit(&mip->mi_rw_lock);
820			if (err == 0) {
821				(flent->fe_cb_fn)(flent->fe_cb_arg1,
822				    flent->fe_cb_arg2, mp_chain, B_FALSE);
823				FLOW_REFRELE(flent);
824				return;
825			}
826		} else {
827			rw_exit(&mip->mi_rw_lock);
828		}
829	}
830
831	if (!FLOW_TAB_EMPTY(mip->mi_flow_tab)) {
832		if ((bp = mac_rx_flow(mh, mrh, bp)) == NULL)
833			return;
834	}
835
836	freemsgchain(bp);
837}
838
839/* DATA TRANSMISSION */
840
841/*
842 * A driver's notification to resume transmission, in case of a provider
843 * without TX rings.
844 */
845void
846mac_tx_update(mac_handle_t mh)
847{
848	mac_tx_ring_update(mh, NULL);
849}
850
851/*
852 * A driver's notification to resume transmission on the specified TX ring.
853 */
854void
855mac_tx_ring_update(mac_handle_t mh, mac_ring_handle_t rh)
856{
857	i_mac_tx_srs_notify((mac_impl_t *)mh, rh);
858}
859
860/* LINK STATE */
861/*
862 * Notify the MAC layer about a link state change
863 */
864void
865mac_link_update(mac_handle_t mh, link_state_t link)
866{
867	mac_impl_t	*mip = (mac_impl_t *)mh;
868
869	/*
870	 * Save the link state.
871	 */
872	mip->mi_lowlinkstate = link;
873
874	/*
875	 * Send a MAC_NOTE_LOWLINK notification.  This tells the notification
876	 * thread to deliver both lower and upper notifications.
877	 */
878	i_mac_notify(mip, MAC_NOTE_LOWLINK);
879}
880
881/*
882 * Notify the MAC layer about a link state change due to bridging.
883 */
884void
885mac_link_redo(mac_handle_t mh, link_state_t link)
886{
887	mac_impl_t	*mip = (mac_impl_t *)mh;
888
889	/*
890	 * Save the link state.
891	 */
892	mip->mi_linkstate = link;
893
894	/*
895	 * Send a MAC_NOTE_LINK notification.  Only upper notifications are
896	 * made.
897	 */
898	i_mac_notify(mip, MAC_NOTE_LINK);
899}
900
901/* MINOR NODE HANDLING */
902
903/*
904 * Given a dev_t, return the instance number (PPA) associated with it.
905 * Drivers can use this in their getinfo(9e) implementation to lookup
906 * the instance number (i.e. PPA) of the device, to use as an index to
907 * their own array of soft state structures.
908 *
909 * Returns -1 on error.
910 */
911int
912mac_devt_to_instance(dev_t devt)
913{
914	return (dld_devt_to_instance(devt));
915}
916
917/*
918 * This function returns the first minor number that is available for
919 * driver private use.  All minor numbers smaller than this are
920 * reserved for GLDv3 use.
921 */
922minor_t
923mac_private_minor(void)
924{
925	return (MAC_PRIVATE_MINOR);
926}
927
928/* OTHER CONTROL INFORMATION */
929
930/*
931 * A driver notified us that its primary MAC address has changed.
932 */
933void
934mac_unicst_update(mac_handle_t mh, const uint8_t *addr)
935{
936	mac_impl_t	*mip = (mac_impl_t *)mh;
937
938	if (mip->mi_type->mt_addr_length == 0)
939		return;
940
941	i_mac_perim_enter(mip);
942
943	/*
944	 * If address changes, freshen the MAC address value and update
945	 * all MAC clients that share this MAC address.
946	 */
947	if (bcmp(addr, mip->mi_addr, mip->mi_type->mt_addr_length) != 0) {
948		mac_freshen_macaddr(mac_find_macaddr(mip, mip->mi_addr),
949		    (uint8_t *)addr);
950	}
951
952	i_mac_perim_exit(mip);
953
954	/*
955	 * Send a MAC_NOTE_UNICST notification.
956	 */
957	i_mac_notify(mip, MAC_NOTE_UNICST);
958}
959
960void
961mac_dst_update(mac_handle_t mh, const uint8_t *addr)
962{
963	mac_impl_t	*mip = (mac_impl_t *)mh;
964
965	if (mip->mi_type->mt_addr_length == 0)
966		return;
967
968	i_mac_perim_enter(mip);
969	bcopy(addr, mip->mi_dstaddr, mip->mi_type->mt_addr_length);
970	i_mac_perim_exit(mip);
971	i_mac_notify(mip, MAC_NOTE_DEST);
972}
973
974/*
975 * MAC plugin information changed.
976 */
977int
978mac_pdata_update(mac_handle_t mh, void *mac_pdata, size_t dsize)
979{
980	mac_impl_t	*mip = (mac_impl_t *)mh;
981
982	/*
983	 * Verify that the plugin supports MAC plugin data and that the
984	 * supplied data is valid.
985	 */
986	if (!(mip->mi_type->mt_ops.mtops_ops & MTOPS_PDATA_VERIFY))
987		return (EINVAL);
988	if (!mip->mi_type->mt_ops.mtops_pdata_verify(mac_pdata, dsize))
989		return (EINVAL);
990
991	if (mip->mi_pdata != NULL)
992		kmem_free(mip->mi_pdata, mip->mi_pdata_size);
993
994	mip->mi_pdata = kmem_alloc(dsize, KM_SLEEP);
995	bcopy(mac_pdata, mip->mi_pdata, dsize);
996	mip->mi_pdata_size = dsize;
997
998	/*
999	 * Since the MAC plugin data is used to construct MAC headers that
1000	 * were cached in fast-path headers, we need to flush fast-path
1001	 * information for links associated with this mac.
1002	 */
1003	i_mac_notify(mip, MAC_NOTE_FASTPATH_FLUSH);
1004	return (0);
1005}
1006
1007/*
1008 * The mac provider or mac frameowrk calls this function when it wants
1009 * to notify upstream consumers that the capabilities have changed and
1010 * that they should modify their own internal state accordingly.
1011 *
1012 * We currently have no regard for the fact that a provider could
1013 * decide to drop capabilities which would invalidate pending traffic.
1014 * For example, if one was to disable the Tx checksum offload while
1015 * TCP/IP traffic was being sent by mac clients relying on that
1016 * feature, then those packets would hit the write with missing or
1017 * partial checksums. A proper solution involves not only providing
1018 * notfication, but also performing client quiescing. That is, a capab
1019 * change should be treated as an atomic transaction that forms a
1020 * barrier between traffic relying on the current capabs and traffic
1021 * relying on the new capabs. In practice, simnet is currently the
1022 * only provider that could hit this, and it's an easily avoidable
1023 * situation (and at worst it should only lead to some dropped
1024 * packets). But if we ever want better on-the-fly capab change to
1025 * actual hardware providers, then we should give this update
1026 * mechanism a proper implementation.
1027 */
1028void
1029mac_capab_update(mac_handle_t mh)
1030{
1031	/*
1032	 * Send a MAC_NOTE_CAPAB_CHG notification to alert upstream
1033	 * clients to renegotiate capabilities.
1034	 */
1035	i_mac_notify((mac_impl_t *)mh, MAC_NOTE_CAPAB_CHG);
1036}
1037
1038/*
1039 * Used by normal drivers to update the max sdu size.
1040 * We need to handle the case of a smaller mi_sdu_multicast
1041 * since this is called by mac_set_mtu() even for drivers that
1042 * have differing unicast and multicast mtu and we don't want to
1043 * increase the multicast mtu by accident in that case.
1044 */
1045int
1046mac_maxsdu_update(mac_handle_t mh, uint_t sdu_max)
1047{
1048	mac_impl_t	*mip = (mac_impl_t *)mh;
1049
1050	if (sdu_max == 0 || sdu_max < mip->mi_sdu_min)
1051		return (EINVAL);
1052	mip->mi_sdu_max = sdu_max;
1053	if (mip->mi_sdu_multicast > mip->mi_sdu_max)
1054		mip->mi_sdu_multicast = mip->mi_sdu_max;
1055
1056	/* Send a MAC_NOTE_SDU_SIZE notification. */
1057	i_mac_notify(mip, MAC_NOTE_SDU_SIZE);
1058	return (0);
1059}
1060
1061/*
1062 * Version of the above function that is used by drivers that have a different
1063 * max sdu size for multicast/broadcast vs. unicast.
1064 */
1065int
1066mac_maxsdu_update2(mac_handle_t mh, uint_t sdu_max, uint_t sdu_multicast)
1067{
1068	mac_impl_t	*mip = (mac_impl_t *)mh;
1069
1070	if (sdu_max == 0 || sdu_max < mip->mi_sdu_min)
1071		return (EINVAL);
1072	if (sdu_multicast == 0)
1073		sdu_multicast = sdu_max;
1074	if (sdu_multicast > sdu_max || sdu_multicast < mip->mi_sdu_min)
1075		return (EINVAL);
1076	mip->mi_sdu_max = sdu_max;
1077	mip->mi_sdu_multicast = sdu_multicast;
1078
1079	/* Send a MAC_NOTE_SDU_SIZE notification. */
1080	i_mac_notify(mip, MAC_NOTE_SDU_SIZE);
1081	return (0);
1082}
1083
1084static void
1085mac_ring_intr_retarget(mac_group_t *group, mac_ring_t *ring)
1086{
1087	mac_client_impl_t *mcip;
1088	flow_entry_t *flent;
1089	mac_soft_ring_set_t *mac_rx_srs;
1090	mac_cpus_t *srs_cpu;
1091	int i;
1092
1093	if (((mcip = MAC_GROUP_ONLY_CLIENT(group)) != NULL) &&
1094	    (!ring->mr_info.mri_intr.mi_ddi_shared)) {
1095		/* interrupt can be re-targeted */
1096		ASSERT(group->mrg_state == MAC_GROUP_STATE_RESERVED);
1097		flent = mcip->mci_flent;
1098		if (ring->mr_type == MAC_RING_TYPE_RX) {
1099			for (i = 0; i < flent->fe_rx_srs_cnt; i++) {
1100				mac_rx_srs = flent->fe_rx_srs[i];
1101				if (mac_rx_srs->srs_ring != ring)
1102					continue;
1103				srs_cpu = &mac_rx_srs->srs_cpu;
1104				mutex_enter(&cpu_lock);
1105				mac_rx_srs_retarget_intr(mac_rx_srs,
1106				    srs_cpu->mc_rx_intr_cpu);
1107				mutex_exit(&cpu_lock);
1108				break;
1109			}
1110		} else {
1111			if (flent->fe_tx_srs != NULL) {
1112				mutex_enter(&cpu_lock);
1113				mac_tx_srs_retarget_intr(
1114				    flent->fe_tx_srs);
1115				mutex_exit(&cpu_lock);
1116			}
1117		}
1118	}
1119}
1120
1121/*
1122 * Clients like aggr create pseudo rings (mac_ring_t) and expose them to
1123 * their clients. There is a 1-1 mapping pseudo ring and the hardware
1124 * ring. ddi interrupt handles are exported from the hardware ring to
1125 * the pseudo ring. Thus when the interrupt handle changes, clients of
1126 * aggr that are using the handle need to use the new handle and
1127 * re-target their interrupts.
1128 */
1129static void
1130mac_pseudo_ring_intr_retarget(mac_impl_t *mip, mac_ring_t *ring,
1131    ddi_intr_handle_t ddh)
1132{
1133	mac_ring_t *pring;
1134	mac_group_t *pgroup;
1135	mac_impl_t *pmip;
1136	char macname[MAXNAMELEN];
1137	mac_perim_handle_t p_mph;
1138	uint64_t saved_gen_num;
1139
1140again:
1141	pring = (mac_ring_t *)ring->mr_prh;
1142	pgroup = (mac_group_t *)pring->mr_gh;
1143	pmip = (mac_impl_t *)pgroup->mrg_mh;
1144	saved_gen_num = ring->mr_gen_num;
1145	(void) strlcpy(macname, pmip->mi_name, MAXNAMELEN);
1146	/*
1147	 * We need to enter aggr's perimeter. The locking hierarchy
1148	 * dictates that aggr's perimeter should be entered first
1149	 * and then the port's perimeter. So drop the port's
1150	 * perimeter, enter aggr's and then re-enter port's
1151	 * perimeter.
1152	 */
1153	i_mac_perim_exit(mip);
1154	/*
1155	 * While we know pmip is the aggr's mip, there is a
1156	 * possibility that aggr could have unregistered by
1157	 * the time we exit port's perimeter (mip) and
1158	 * enter aggr's perimeter (pmip). To avoid that
1159	 * scenario, enter aggr's perimeter using its name.
1160	 */
1161	if (mac_perim_enter_by_macname(macname, &p_mph) != 0)
1162		return;
1163	i_mac_perim_enter(mip);
1164	/*
1165	 * Check if the ring got assigned to another aggregation before
1166	 * be could enter aggr's and the port's perimeter. When a ring
1167	 * gets deleted from an aggregation, it calls mac_stop_ring()
1168	 * which increments the generation number. So checking
1169	 * generation number will be enough.
1170	 */
1171	if (ring->mr_gen_num != saved_gen_num && ring->mr_prh != NULL) {
1172		i_mac_perim_exit(mip);
1173		mac_perim_exit(p_mph);
1174		i_mac_perim_enter(mip);
1175		goto again;
1176	}
1177
1178	/* Check if pseudo ring is still present */
1179	if (ring->mr_prh != NULL) {
1180		pring->mr_info.mri_intr.mi_ddi_handle = ddh;
1181		pring->mr_info.mri_intr.mi_ddi_shared =
1182		    ring->mr_info.mri_intr.mi_ddi_shared;
1183		if (ddh != NULL)
1184			mac_ring_intr_retarget(pgroup, pring);
1185	}
1186	i_mac_perim_exit(mip);
1187	mac_perim_exit(p_mph);
1188}
1189/*
1190 * API called by driver to provide new interrupt handle for TX/RX rings.
1191 * This usually happens when IRM (Interrupt Resource Manangement)
1192 * framework either gives the driver more MSI-x interrupts or takes
1193 * away MSI-x interrupts from the driver.
1194 */
1195void
1196mac_ring_intr_set(mac_ring_handle_t mrh, ddi_intr_handle_t ddh)
1197{
1198	mac_ring_t	*ring = (mac_ring_t *)mrh;
1199	mac_group_t	*group = (mac_group_t *)ring->mr_gh;
1200	mac_impl_t	*mip = (mac_impl_t *)group->mrg_mh;
1201
1202	i_mac_perim_enter(mip);
1203	ring->mr_info.mri_intr.mi_ddi_handle = ddh;
1204	if (ddh == NULL) {
1205		/* Interrupts being reset */
1206		ring->mr_info.mri_intr.mi_ddi_shared = B_FALSE;
1207		if (ring->mr_prh != NULL) {
1208			mac_pseudo_ring_intr_retarget(mip, ring, ddh);
1209			return;
1210		}
1211	} else {
1212		/* New interrupt handle */
1213		mac_compare_ddi_handle(mip->mi_rx_groups,
1214		    mip->mi_rx_group_count, ring);
1215		if (!ring->mr_info.mri_intr.mi_ddi_shared) {
1216			mac_compare_ddi_handle(mip->mi_tx_groups,
1217			    mip->mi_tx_group_count, ring);
1218		}
1219		if (ring->mr_prh != NULL) {
1220			mac_pseudo_ring_intr_retarget(mip, ring, ddh);
1221			return;
1222		} else {
1223			mac_ring_intr_retarget(group, ring);
1224		}
1225	}
1226	i_mac_perim_exit(mip);
1227}
1228
1229/* PRIVATE FUNCTIONS, FOR INTERNAL USE ONLY */
1230
1231/*
1232 * Updates the mac_impl structure with the current state of the link
1233 */
1234static void
1235i_mac_log_link_state(mac_impl_t *mip)
1236{
1237	/*
1238	 * If no change, then it is not interesting.
1239	 */
1240	if (mip->mi_lastlowlinkstate == mip->mi_lowlinkstate)
1241		return;
1242
1243	switch (mip->mi_lowlinkstate) {
1244	case LINK_STATE_UP:
1245		if (mip->mi_type->mt_ops.mtops_ops & MTOPS_LINK_DETAILS) {
1246			char det[200];
1247
1248			mip->mi_type->mt_ops.mtops_link_details(det,
1249			    sizeof (det), (mac_handle_t)mip, mip->mi_pdata);
1250
1251			cmn_err(CE_NOTE, "!%s link up, %s", mip->mi_name, det);
1252		} else {
1253			cmn_err(CE_NOTE, "!%s link up", mip->mi_name);
1254		}
1255		break;
1256
1257	case LINK_STATE_DOWN:
1258		/*
1259		 * Only transitions from UP to DOWN are interesting
1260		 */
1261		if (mip->mi_lastlowlinkstate != LINK_STATE_UNKNOWN)
1262			cmn_err(CE_NOTE, "!%s link down", mip->mi_name);
1263		break;
1264
1265	case LINK_STATE_UNKNOWN:
1266		/*
1267		 * This case is normally not interesting.
1268		 */
1269		break;
1270	}
1271	mip->mi_lastlowlinkstate = mip->mi_lowlinkstate;
1272}
1273
1274/*
1275 * Main routine for the callbacks notifications thread
1276 */
1277static void
1278i_mac_notify_thread(void *arg)
1279{
1280	mac_impl_t	*mip = arg;
1281	callb_cpr_t	cprinfo;
1282	mac_cb_t	*mcb;
1283	mac_cb_info_t	*mcbi;
1284	mac_notify_cb_t	*mncb;
1285
1286	mcbi = &mip->mi_notify_cb_info;
1287	CALLB_CPR_INIT(&cprinfo, mcbi->mcbi_lockp, callb_generic_cpr,
1288	    "i_mac_notify_thread");
1289
1290	mutex_enter(mcbi->mcbi_lockp);
1291
1292	for (;;) {
1293		uint32_t	bits;
1294		uint32_t	type;
1295
1296		bits = mip->mi_notify_bits;
1297		if (bits == 0) {
1298			CALLB_CPR_SAFE_BEGIN(&cprinfo);
1299			cv_wait(&mcbi->mcbi_cv, mcbi->mcbi_lockp);
1300			CALLB_CPR_SAFE_END(&cprinfo, mcbi->mcbi_lockp);
1301			continue;
1302		}
1303		mip->mi_notify_bits = 0;
1304		if ((bits & (1 << MAC_NNOTE)) != 0) {
1305			/* request to quit */
1306			ASSERT(mip->mi_state_flags & MIS_DISABLED);
1307			break;
1308		}
1309
1310		mutex_exit(mcbi->mcbi_lockp);
1311
1312		/*
1313		 * Log link changes on the actual link, but then do reports on
1314		 * synthetic state (if part of a bridge).
1315		 */
1316		if ((bits & (1 << MAC_NOTE_LOWLINK)) != 0) {
1317			link_state_t newstate;
1318			mac_handle_t mh;
1319
1320			i_mac_log_link_state(mip);
1321			newstate = mip->mi_lowlinkstate;
1322			if (mip->mi_bridge_link != NULL) {
1323				mutex_enter(&mip->mi_bridge_lock);
1324				if ((mh = mip->mi_bridge_link) != NULL) {
1325					newstate = mac_bridge_ls_cb(mh,
1326					    newstate);
1327				}
1328				mutex_exit(&mip->mi_bridge_lock);
1329			}
1330			if (newstate != mip->mi_linkstate) {
1331				mip->mi_linkstate = newstate;
1332				bits |= 1 << MAC_NOTE_LINK;
1333			}
1334		}
1335
1336		/*
1337		 * Depending on which capabs have changed, the Tx
1338		 * checksum flags may also need to be updated.
1339		 */
1340		if ((bits & (1 << MAC_NOTE_CAPAB_CHG)) != 0) {
1341			mac_perim_handle_t mph;
1342			mac_handle_t mh = (mac_handle_t)mip;
1343
1344			mac_perim_enter_by_mh(mh, &mph);
1345			mip->mi_tx_cksum_flags = mac_features_to_flags(mh);
1346			mac_perim_exit(mph);
1347		}
1348
1349		/*
1350		 * Do notification callbacks for each notification type.
1351		 */
1352		for (type = 0; type < MAC_NNOTE; type++) {
1353			if ((bits & (1 << type)) == 0) {
1354				continue;
1355			}
1356
1357			if (mac_notify_cb_list[type] != NULL)
1358				(*mac_notify_cb_list[type])(mip);
1359
1360			/*
1361			 * Walk the list of notifications.
1362			 */
1363			MAC_CALLBACK_WALKER_INC(&mip->mi_notify_cb_info);
1364			for (mcb = mip->mi_notify_cb_list; mcb != NULL;
1365			    mcb = mcb->mcb_nextp) {
1366				mncb = (mac_notify_cb_t *)mcb->mcb_objp;
1367				mncb->mncb_fn(mncb->mncb_arg, type);
1368			}
1369			MAC_CALLBACK_WALKER_DCR(&mip->mi_notify_cb_info,
1370			    &mip->mi_notify_cb_list);
1371		}
1372
1373		mutex_enter(mcbi->mcbi_lockp);
1374	}
1375
1376	mip->mi_state_flags |= MIS_NOTIFY_DONE;
1377	cv_broadcast(&mcbi->mcbi_cv);
1378
1379	/* CALLB_CPR_EXIT drops the lock */
1380	CALLB_CPR_EXIT(&cprinfo);
1381	thread_exit();
1382}
1383
1384/*
1385 * Signal the i_mac_notify_thread asking it to quit.
1386 * Then wait till it is done.
1387 */
1388void
1389i_mac_notify_exit(mac_impl_t *mip)
1390{
1391	mac_cb_info_t	*mcbi;
1392
1393	mcbi = &mip->mi_notify_cb_info;
1394
1395	mutex_enter(mcbi->mcbi_lockp);
1396	mip->mi_notify_bits = (1 << MAC_NNOTE);
1397	cv_broadcast(&mcbi->mcbi_cv);
1398
1399
1400	while ((mip->mi_notify_thread != NULL) &&
1401	    !(mip->mi_state_flags & MIS_NOTIFY_DONE)) {
1402		cv_wait(&mcbi->mcbi_cv, mcbi->mcbi_lockp);
1403	}
1404
1405	/* Necessary clean up before doing kmem_cache_free */
1406	mip->mi_state_flags &= ~MIS_NOTIFY_DONE;
1407	mip->mi_notify_bits = 0;
1408	mip->mi_notify_thread = NULL;
1409	mutex_exit(mcbi->mcbi_lockp);
1410}
1411
1412/*
1413 * Entry point invoked by drivers to dynamically add a ring to an
1414 * existing group.
1415 */
1416int
1417mac_group_add_ring(mac_group_handle_t gh, int index)
1418{
1419	mac_group_t *group = (mac_group_t *)gh;
1420	mac_impl_t *mip = (mac_impl_t *)group->mrg_mh;
1421	int ret;
1422
1423	i_mac_perim_enter(mip);
1424	ret = i_mac_group_add_ring(group, NULL, index);
1425	i_mac_perim_exit(mip);
1426	return (ret);
1427}
1428
1429/*
1430 * Entry point invoked by drivers to dynamically remove a ring
1431 * from an existing group. The specified ring handle must no longer
1432 * be used by the driver after a call to this function.
1433 */
1434void
1435mac_group_rem_ring(mac_group_handle_t gh, mac_ring_handle_t rh)
1436{
1437	mac_group_t *group = (mac_group_t *)gh;
1438	mac_impl_t *mip = (mac_impl_t *)group->mrg_mh;
1439
1440	i_mac_perim_enter(mip);
1441	i_mac_group_rem_ring(group, (mac_ring_t *)rh, B_TRUE);
1442	i_mac_perim_exit(mip);
1443}
1444
1445/*
1446 * mac_prop_info_*() callbacks called from the driver's prefix_propinfo()
1447 * entry points.
1448 */
1449
1450void
1451mac_prop_info_set_default_uint8(mac_prop_info_handle_t ph, uint8_t val)
1452{
1453	mac_prop_info_state_t *pr = (mac_prop_info_state_t *)ph;
1454
1455	/* nothing to do if the caller doesn't want the default value */
1456	if (pr->pr_default == NULL)
1457		return;
1458
1459	ASSERT(pr->pr_default_size >= sizeof (uint8_t));
1460
1461	*(uint8_t *)(pr->pr_default) = val;
1462	pr->pr_flags |= MAC_PROP_INFO_DEFAULT;
1463}
1464
1465void
1466mac_prop_info_set_default_uint64(mac_prop_info_handle_t ph, uint64_t val)
1467{
1468	mac_prop_info_state_t *pr = (mac_prop_info_state_t *)ph;
1469
1470	/* nothing to do if the caller doesn't want the default value */
1471	if (pr->pr_default == NULL)
1472		return;
1473
1474	ASSERT(pr->pr_default_size >= sizeof (uint64_t));
1475
1476	bcopy(&val, pr->pr_default, sizeof (val));
1477
1478	pr->pr_flags |= MAC_PROP_INFO_DEFAULT;
1479}
1480
1481void
1482mac_prop_info_set_default_uint32(mac_prop_info_handle_t ph, uint32_t val)
1483{
1484	mac_prop_info_state_t *pr = (mac_prop_info_state_t *)ph;
1485
1486	/* nothing to do if the caller doesn't want the default value */
1487	if (pr->pr_default == NULL)
1488		return;
1489
1490	ASSERT(pr->pr_default_size >= sizeof (uint32_t));
1491
1492	bcopy(&val, pr->pr_default, sizeof (val));
1493
1494	pr->pr_flags |= MAC_PROP_INFO_DEFAULT;
1495}
1496
1497void
1498mac_prop_info_set_default_str(mac_prop_info_handle_t ph, const char *str)
1499{
1500	mac_prop_info_state_t *pr = (mac_prop_info_state_t *)ph;
1501
1502	/* nothing to do if the caller doesn't want the default value */
1503	if (pr->pr_default == NULL)
1504		return;
1505
1506	if (strlen(str) >= pr->pr_default_size)
1507		pr->pr_errno = ENOBUFS;
1508	else
1509		(void) strlcpy(pr->pr_default, str, pr->pr_default_size);
1510	pr->pr_flags |= MAC_PROP_INFO_DEFAULT;
1511}
1512
1513void
1514mac_prop_info_set_default_link_flowctrl(mac_prop_info_handle_t ph,
1515    link_flowctrl_t val)
1516{
1517	mac_prop_info_state_t *pr = (mac_prop_info_state_t *)ph;
1518
1519	/* nothing to do if the caller doesn't want the default value */
1520	if (pr->pr_default == NULL)
1521		return;
1522
1523	ASSERT(pr->pr_default_size >= sizeof (link_flowctrl_t));
1524
1525	bcopy(&val, pr->pr_default, sizeof (val));
1526
1527	pr->pr_flags |= MAC_PROP_INFO_DEFAULT;
1528}
1529
1530void
1531mac_prop_info_set_default_fec(mac_prop_info_handle_t ph, link_fec_t val)
1532{
1533	mac_prop_info_state_t *pr = (mac_prop_info_state_t *)ph;
1534
1535	/* nothing to do if the caller doesn't want the default value */
1536	if (pr->pr_default == NULL)
1537		return;
1538
1539	ASSERT(pr->pr_default_size >= sizeof (link_fec_t));
1540
1541	bcopy(&val, pr->pr_default, sizeof (val));
1542
1543	pr->pr_flags |= MAC_PROP_INFO_DEFAULT;
1544}
1545
1546void
1547mac_prop_info_set_range_uint32(mac_prop_info_handle_t ph, uint32_t min,
1548    uint32_t max)
1549{
1550	mac_prop_info_state_t *pr = (mac_prop_info_state_t *)ph;
1551	mac_propval_range_t *range = pr->pr_range;
1552	mac_propval_uint32_range_t *range32;
1553
1554	/* nothing to do if the caller doesn't want the range info */
1555	if (range == NULL)
1556		return;
1557
1558	if (pr->pr_range_cur_count++ == 0) {
1559		/* first range */
1560		pr->pr_flags |= MAC_PROP_INFO_RANGE;
1561		range->mpr_type = MAC_PROPVAL_UINT32;
1562	} else {
1563		/* all ranges of a property should be of the same type */
1564		ASSERT(range->mpr_type == MAC_PROPVAL_UINT32);
1565		if (pr->pr_range_cur_count > range->mpr_count) {
1566			pr->pr_errno = ENOSPC;
1567			return;
1568		}
1569	}
1570
1571	range32 = range->mpr_range_uint32;
1572	range32[pr->pr_range_cur_count - 1].mpur_min = min;
1573	range32[pr->pr_range_cur_count - 1].mpur_max = max;
1574}
1575
1576void
1577mac_prop_info_set_perm(mac_prop_info_handle_t ph, uint8_t perm)
1578{
1579	mac_prop_info_state_t *pr = (mac_prop_info_state_t *)ph;
1580
1581	pr->pr_perm = perm;
1582	pr->pr_flags |= MAC_PROP_INFO_PERM;
1583}
1584
1585void
1586mac_hcksum_get(const mblk_t *mp, uint32_t *start, uint32_t *stuff,
1587    uint32_t *end, uint32_t *value, uint32_t *flags_ptr)
1588{
1589	uint32_t flags;
1590
1591	ASSERT(DB_TYPE(mp) == M_DATA);
1592
1593	flags = DB_CKSUMFLAGS(mp) & HCK_FLAGS;
1594	if ((flags & (HCK_PARTIALCKSUM | HCK_FULLCKSUM)) != 0) {
1595		if (value != NULL)
1596			*value = (uint32_t)DB_CKSUM16(mp);
1597		if ((flags & HCK_PARTIALCKSUM) != 0) {
1598			if (start != NULL)
1599				*start = (uint32_t)DB_CKSUMSTART(mp);
1600			if (stuff != NULL)
1601				*stuff = (uint32_t)DB_CKSUMSTUFF(mp);
1602			if (end != NULL)
1603				*end = (uint32_t)DB_CKSUMEND(mp);
1604		}
1605	}
1606
1607	if (flags_ptr != NULL)
1608		*flags_ptr = flags;
1609}
1610
1611void
1612mac_hcksum_set(mblk_t *mp, uint32_t start, uint32_t stuff, uint32_t end,
1613    uint32_t value, uint32_t flags)
1614{
1615	ASSERT(DB_TYPE(mp) == M_DATA);
1616
1617	DB_CKSUMSTART(mp) = (intptr_t)start;
1618	DB_CKSUMSTUFF(mp) = (intptr_t)stuff;
1619	DB_CKSUMEND(mp) = (intptr_t)end;
1620	DB_CKSUMFLAGS(mp) = (uint16_t)flags;
1621	DB_CKSUM16(mp) = (uint16_t)value;
1622}
1623
1624void
1625mac_hcksum_clone(const mblk_t *src, mblk_t *dst)
1626{
1627	ASSERT3U(DB_TYPE(src), ==, M_DATA);
1628	ASSERT3U(DB_TYPE(dst), ==, M_DATA);
1629
1630	/*
1631	 * Do these assignments unconditionally, rather than only when
1632	 * flags is non-zero. This protects a situation where zeroed
1633	 * hcksum data does not make the jump onto an mblk_t with
1634	 * stale data in those fields. It's important to copy all
1635	 * possible flags (HCK_* as well as HW_*) and not just the
1636	 * checksum specific flags. Dropping flags during a clone
1637	 * could result in dropped packets. If the caller has good
1638	 * reason to drop those flags then it should do it manually,
1639	 * after the clone.
1640	 */
1641	DB_CKSUMFLAGS(dst) = DB_CKSUMFLAGS(src);
1642	DB_CKSUMSTART(dst) = DB_CKSUMSTART(src);
1643	DB_CKSUMSTUFF(dst) = DB_CKSUMSTUFF(src);
1644	DB_CKSUMEND(dst) = DB_CKSUMEND(src);
1645	DB_CKSUM16(dst) = DB_CKSUM16(src);
1646	DB_LSOMSS(dst) = DB_LSOMSS(src);
1647}
1648
1649void
1650mac_lso_get(mblk_t *mp, uint32_t *mss, uint32_t *flags)
1651{
1652	ASSERT(DB_TYPE(mp) == M_DATA);
1653
1654	if (flags != NULL) {
1655		*flags = DB_CKSUMFLAGS(mp) & HW_LSO;
1656		if ((*flags != 0) && (mss != NULL))
1657			*mss = (uint32_t)DB_LSOMSS(mp);
1658	}
1659}
1660
1661void
1662mac_transceiver_info_set_present(mac_transceiver_info_t *infop,
1663    boolean_t present)
1664{
1665	infop->mti_present = present;
1666}
1667
1668void
1669mac_transceiver_info_set_usable(mac_transceiver_info_t *infop,
1670    boolean_t usable)
1671{
1672	infop->mti_usable = usable;
1673}
1674
1675/*
1676 * We should really keep track of our offset and not walk everything every
1677 * time. I can't imagine that this will be kind to us at high packet rates;
1678 * however, for the moment, let's leave that.
1679 *
1680 * This walks a message block chain without pulling up to fill in the context
1681 * information. Note that the data we care about could be hidden across more
1682 * than one mblk_t.
1683 */
1684static int
1685mac_meoi_get_uint8(mblk_t *mp, off_t off, uint8_t *out)
1686{
1687	size_t mpsize;
1688	uint8_t *bp;
1689
1690	mpsize = msgsize(mp);
1691	/* Check for overflow */
1692	if (off + sizeof (uint16_t) > mpsize)
1693		return (-1);
1694
1695	mpsize = MBLKL(mp);
1696	while (off >= mpsize) {
1697		mp = mp->b_cont;
1698		off -= mpsize;
1699		mpsize = MBLKL(mp);
1700	}
1701
1702	bp = mp->b_rptr + off;
1703	*out = *bp;
1704	return (0);
1705
1706}
1707
1708static int
1709mac_meoi_get_uint16(mblk_t *mp, off_t off, uint16_t *out)
1710{
1711	size_t mpsize;
1712	uint8_t *bp;
1713
1714	mpsize = msgsize(mp);
1715	/* Check for overflow */
1716	if (off + sizeof (uint16_t) > mpsize)
1717		return (-1);
1718
1719	mpsize = MBLKL(mp);
1720	while (off >= mpsize) {
1721		mp = mp->b_cont;
1722		off -= mpsize;
1723		mpsize = MBLKL(mp);
1724	}
1725
1726	/*
1727	 * Data is in network order. Note the second byte of data might be in
1728	 * the next mp.
1729	 */
1730	bp = mp->b_rptr + off;
1731	*out = *bp << 8;
1732	if (off + 1 == mpsize) {
1733		mp = mp->b_cont;
1734		bp = mp->b_rptr;
1735	} else {
1736		bp++;
1737	}
1738
1739	*out |= *bp;
1740	return (0);
1741
1742}
1743
1744
1745int
1746mac_ether_offload_info(mblk_t *mp, mac_ether_offload_info_t *meoi)
1747{
1748	size_t off;
1749	uint16_t ether;
1750	uint8_t ipproto, iplen, l4len, maclen;
1751
1752	bzero(meoi, sizeof (mac_ether_offload_info_t));
1753
1754	meoi->meoi_len = msgsize(mp);
1755	off = offsetof(struct ether_header, ether_type);
1756	if (mac_meoi_get_uint16(mp, off, &ether) != 0)
1757		return (-1);
1758
1759	if (ether == ETHERTYPE_VLAN) {
1760		off = offsetof(struct ether_vlan_header, ether_type);
1761		if (mac_meoi_get_uint16(mp, off, &ether) != 0)
1762			return (-1);
1763		meoi->meoi_flags |= MEOI_VLAN_TAGGED;
1764		maclen = sizeof (struct ether_vlan_header);
1765	} else {
1766		maclen = sizeof (struct ether_header);
1767	}
1768	meoi->meoi_flags |= MEOI_L2INFO_SET;
1769	meoi->meoi_l2hlen = maclen;
1770	meoi->meoi_l3proto = ether;
1771
1772	switch (ether) {
1773	case ETHERTYPE_IP:
1774		/*
1775		 * For IPv4 we need to get the length of the header, as it can
1776		 * be variable.
1777		 */
1778		off = offsetof(ipha_t, ipha_version_and_hdr_length) + maclen;
1779		if (mac_meoi_get_uint8(mp, off, &iplen) != 0)
1780			return (-1);
1781		iplen &= 0x0f;
1782		if (iplen < 5 || iplen > 0x0f)
1783			return (-1);
1784		iplen *= 4;
1785		off = offsetof(ipha_t, ipha_protocol) + maclen;
1786		if (mac_meoi_get_uint8(mp, off, &ipproto) == -1)
1787			return (-1);
1788		break;
1789	case ETHERTYPE_IPV6:
1790		iplen = 40;
1791		off = offsetof(ip6_t, ip6_nxt) + maclen;
1792		if (mac_meoi_get_uint8(mp, off, &ipproto) == -1)
1793			return (-1);
1794		break;
1795	default:
1796		return (0);
1797	}
1798	meoi->meoi_l3hlen = iplen;
1799	meoi->meoi_l4proto = ipproto;
1800	meoi->meoi_flags |= MEOI_L3INFO_SET;
1801
1802	switch (ipproto) {
1803	case IPPROTO_TCP:
1804		off = offsetof(tcph_t, th_offset_and_rsrvd) + maclen + iplen;
1805		if (mac_meoi_get_uint8(mp, off, &l4len) == -1)
1806			return (-1);
1807		l4len = (l4len & 0xf0) >> 4;
1808		if (l4len < 5 || l4len > 0xf)
1809			return (-1);
1810		l4len *= 4;
1811		break;
1812	case IPPROTO_UDP:
1813		l4len = sizeof (struct udphdr);
1814		break;
1815	case IPPROTO_SCTP:
1816		l4len = sizeof (sctp_hdr_t);
1817		break;
1818	default:
1819		return (0);
1820	}
1821
1822	meoi->meoi_l4hlen = l4len;
1823	meoi->meoi_flags |= MEOI_L4INFO_SET;
1824	return (0);
1825}
1826