1da14cebEric Cheng/*
2da14cebEric Cheng * CDDL HEADER START
3da14cebEric Cheng *
4da14cebEric Cheng * The contents of this file are subject to the terms of the
5da14cebEric Cheng * Common Development and Distribution License (the "License").
6da14cebEric Cheng * You may not use this file except in compliance with the License.
7da14cebEric Cheng *
8da14cebEric Cheng * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9da14cebEric Cheng * or http://www.opensolaris.org/os/licensing.
10da14cebEric Cheng * See the License for the specific language governing permissions
11da14cebEric Cheng * and limitations under the License.
12da14cebEric Cheng *
13da14cebEric Cheng * When distributing Covered Code, include this CDDL HEADER in each
14da14cebEric Cheng * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15da14cebEric Cheng * If applicable, add the following below this CDDL HEADER, with the
16da14cebEric Cheng * fields enclosed by brackets "[]" replaced with your own identifying
17da14cebEric Cheng * information: Portions Copyright [yyyy] [name of copyright owner]
18da14cebEric Cheng *
19da14cebEric Cheng * CDDL HEADER END
20da14cebEric Cheng */
21da14cebEric Cheng
22da14cebEric Cheng/*
230591dddPrakash Jalan * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
24c61a165Ryan Zezeski * Copyright 2019 Joyent, Inc.
258241ccbDan McDonald * Copyright 2017 OmniTI Computer Consulting, Inc. All rights reserved.
26d77e6e0Paul Winder * Copyright 2020 RackTop Systems, Inc.
27da14cebEric Cheng */
28da14cebEric Cheng
29da14cebEric Cheng#include <sys/types.h>
30da14cebEric Cheng#include <sys/conf.h>
31da14cebEric Cheng#include <sys/id_space.h>
32da14cebEric Cheng#include <sys/esunddi.h>
33da14cebEric Cheng#include <sys/stat.h>
34da14cebEric Cheng#include <sys/mkdev.h>
35da14cebEric Cheng#include <sys/stream.h>
36da14cebEric Cheng#include <sys/strsubr.h>
37da14cebEric Cheng#include <sys/dlpi.h>
38da14cebEric Cheng#include <sys/modhash.h>
39da14cebEric Cheng#include <sys/mac.h>
40da14cebEric Cheng#include <sys/mac_provider.h>
41da14cebEric Cheng#include <sys/mac_impl.h>
42da14cebEric Cheng#include <sys/mac_client_impl.h>
43da14cebEric Cheng#include <sys/mac_client_priv.h>
44da14cebEric Cheng#include <sys/mac_soft_ring.h>
450dc2366Venugopal Iyer#include <sys/mac_stat.h>
4661af195Garrett D'Amore#include <sys/dld.h>
47da14cebEric Cheng#include <sys/modctl.h>
48da14cebEric Cheng#include <sys/fs/dv_node.h>
49da14cebEric Cheng#include <sys/thread.h>
50da14cebEric Cheng#include <sys/proc.h>
51da14cebEric Cheng#include <sys/callb.h>
52da14cebEric Cheng#include <sys/cpuvar.h>
53da14cebEric Cheng#include <sys/atomic.h>
54da14cebEric Cheng#include <sys/sdt.h>
55da14cebEric Cheng#include <sys/mac_flow.h>
56da14cebEric Cheng#include <sys/ddi_intr_impl.h>
57da14cebEric Cheng#include <sys/disp.h>
58da14cebEric Cheng#include <sys/sdt.h>
590dc2366Venugopal Iyer#include <sys/pattr.h>
600dc2366Venugopal Iyer#include <sys/strsun.h>
6184de666Ryan Zezeski#include <sys/vlan.h>
62d240edaRobert Mustacchi#include <inet/ip.h>
63d240edaRobert Mustacchi#include <inet/tcp.h>
64d240edaRobert Mustacchi#include <netinet/udp.h>
65d240edaRobert Mustacchi#include <netinet/sctp.h>
66da14cebEric Cheng
67da14cebEric Cheng/*
68da14cebEric Cheng * MAC Provider Interface.
69da14cebEric Cheng *
70da14cebEric Cheng * Interface for GLDv3 compatible NIC drivers.
71da14cebEric Cheng */
72da14cebEric Cheng
73da14cebEric Chengstatic void i_mac_notify_thread(void *);
74da14cebEric Cheng
75da14cebEric Chengtypedef void (*mac_notify_default_cb_fn_t)(mac_impl_t *);
76da14cebEric Cheng
774eaa471Rishi Srivatsavaistatic const mac_notify_default_cb_fn_t mac_notify_cb_list[MAC_NNOTE] = {
784eaa471Rishi Srivatsavai	mac_fanout_recompute,	/* MAC_NOTE_LINK */
794eaa471Rishi Srivatsavai	NULL,		/* MAC_NOTE_UNICST */
804eaa471Rishi Srivatsavai	NULL,		/* MAC_NOTE_TX */
814eaa471Rishi Srivatsavai	NULL,		/* MAC_NOTE_DEVPROMISC */
824eaa471Rishi Srivatsavai	NULL,		/* MAC_NOTE_FASTPATH_FLUSH */
834eaa471Rishi Srivatsavai	NULL,		/* MAC_NOTE_SDU_SIZE */
844eaa471Rishi Srivatsavai	NULL,		/* MAC_NOTE_MARGIN */
854eaa471Rishi Srivatsavai	NULL,		/* MAC_NOTE_CAPAB_CHG */
864eaa471Rishi Srivatsavai	NULL		/* MAC_NOTE_LOWLINK */
87da14cebEric Cheng};
88da14cebEric Cheng
89da14cebEric Cheng/*
90da14cebEric Cheng * Driver support functions.
91da14cebEric Cheng */
92da14cebEric Cheng
93da14cebEric Cheng/* REGISTRATION */
94da14cebEric Cheng
95da14cebEric Chengmac_register_t *
96da14cebEric Chengmac_alloc(uint_t mac_version)
97da14cebEric Cheng{
98da14cebEric Cheng	mac_register_t *mregp;
99da14cebEric Cheng
100da14cebEric Cheng	/*
101da14cebEric Cheng	 * Make sure there isn't a version mismatch between the driver and
102da14cebEric Cheng	 * the framework.  In the future, if multiple versions are
103da14cebEric Cheng	 * supported, this check could become more sophisticated.
104da14cebEric Cheng	 */
105da14cebEric Cheng	if (mac_version != MAC_VERSION)
106da14cebEric Cheng		return (NULL);
107da14cebEric Cheng
108da14cebEric Cheng	mregp = kmem_zalloc(sizeof (mac_register_t), KM_SLEEP);
109da14cebEric Cheng	mregp->m_version = mac_version;
110da14cebEric Cheng	return (mregp);
111da14cebEric Cheng}
112da14cebEric Cheng
113da14cebEric Chengvoid
114da14cebEric Chengmac_free(mac_register_t *mregp)
115da14cebEric Cheng{
116da14cebEric Cheng	kmem_free(mregp, sizeof (mac_register_t));
117da14cebEric Cheng}
118da14cebEric Cheng
119da14cebEric Cheng/*
120c61a165Ryan Zezeski * Convert a MAC's offload features into the equivalent DB_CKSUMFLAGS
121c61a165Ryan Zezeski * value.
122c61a165Ryan Zezeski */
123c61a165Ryan Zezeskistatic uint16_t
124c61a165Ryan Zezeskimac_features_to_flags(mac_handle_t mh)
125c61a165Ryan Zezeski{
126c61a165Ryan Zezeski	uint16_t flags = 0;
127c61a165Ryan Zezeski	uint32_t cap_sum = 0;
128c61a165Ryan Zezeski	mac_capab_lso_t cap_lso;
129c61a165Ryan Zezeski
130c61a165Ryan Zezeski	if (mac_capab_get(mh, MAC_CAPAB_HCKSUM, &cap_sum)) {
131c61a165Ryan Zezeski		if (cap_sum & HCKSUM_IPHDRCKSUM)
132c61a165Ryan Zezeski			flags |= HCK_IPV4_HDRCKSUM;
133c61a165Ryan Zezeski
134c61a165Ryan Zezeski		if (cap_sum & HCKSUM_INET_PARTIAL)
135c61a165Ryan Zezeski			flags |= HCK_PARTIALCKSUM;
136c61a165Ryan Zezeski		else if (cap_sum & (HCKSUM_INET_FULL_V4 | HCKSUM_INET_FULL_V6))
137c61a165Ryan Zezeski			flags |= HCK_FULLCKSUM;
138c61a165Ryan Zezeski	}
139c61a165Ryan Zezeski
140c61a165Ryan Zezeski	/*
141c61a165Ryan Zezeski	 * We don't need the information stored in 'cap_lso', but we
142c61a165Ryan Zezeski	 * need to pass a non-NULL pointer to appease the driver.
143c61a165Ryan Zezeski	 */
144c61a165Ryan Zezeski	if (mac_capab_get(mh, MAC_CAPAB_LSO, &cap_lso))
145c61a165Ryan Zezeski		flags |= HW_LSO;
146c61a165Ryan Zezeski
147c61a165Ryan Zezeski	return (flags);
148c61a165Ryan Zezeski}
149c61a165Ryan Zezeski
150c61a165Ryan Zezeski/*
151da14cebEric Cheng * mac_register() is how drivers register new MACs with the GLDv3
152da14cebEric Cheng * framework.  The mregp argument is allocated by drivers using the
153da14cebEric Cheng * mac_alloc() function, and can be freed using mac_free() immediately upon
154da14cebEric Cheng * return from mac_register().  Upon success (0 return value), the mhp
155da14cebEric Cheng * opaque pointer becomes the driver's handle to its MAC interface, and is
156da14cebEric Cheng * the argument to all other mac module entry points.
157da14cebEric Cheng */
158da14cebEric Cheng/* ARGSUSED */
159da14cebEric Chengint
160da14cebEric Chengmac_register(mac_register_t *mregp, mac_handle_t *mhp)
161da14cebEric Cheng{
162da14cebEric Cheng	mac_impl_t		*mip;
163da14cebEric Cheng	mactype_t		*mtype;
164da14cebEric Cheng	int			err = EINVAL;
165da14cebEric Cheng	struct devnames		*dnp = NULL;
166da14cebEric Cheng	uint_t			instance;
167da14cebEric Cheng	boolean_t		style1_created = B_FALSE;
168da14cebEric Cheng	boolean_t		style2_created = B_FALSE;
169da14cebEric Cheng	char			*driver;
170da14cebEric Cheng	minor_t			minor = 0;
171da14cebEric Cheng
172ee94b1cSebastien Roy	/* A successful call to mac_init_ops() sets the DN_GLDV3_DRIVER flag. */
173ee94b1cSebastien Roy	if (!GLDV3_DRV(ddi_driver_major(mregp->m_dip)))
174ee94b1cSebastien Roy		return (EINVAL);
175ee94b1cSebastien Roy
176da14cebEric Cheng	/* Find the required MAC-Type plugin. */
177da14cebEric Cheng	if ((mtype = mactype_getplugin(mregp->m_type_ident)) == NULL)
178da14cebEric Cheng		return (EINVAL);
179da14cebEric Cheng
180da14cebEric Cheng	/* Create a mac_impl_t to represent this MAC. */
181da14cebEric Cheng	mip = kmem_cache_alloc(i_mac_impl_cachep, KM_SLEEP);
182da14cebEric Cheng
183da14cebEric Cheng	/*
184da14cebEric Cheng	 * The mac is not ready for open yet.
185da14cebEric Cheng	 */
186da14cebEric Cheng	mip->mi_state_flags |= MIS_DISABLED;
187da14cebEric Cheng
188da14cebEric Cheng	/*
189da14cebEric Cheng	 * When a mac is registered, the m_instance field can be set to:
190da14cebEric Cheng	 *
191da14cebEric Cheng	 *  0:	Get the mac's instance number from m_dip.
192da14cebEric Cheng	 *	This is usually used for physical device dips.
193da14cebEric Cheng	 *
194da14cebEric Cheng	 *  [1 .. MAC_MAX_MINOR-1]: Use the value as the mac's instance number.
195da14cebEric Cheng	 *	For example, when an aggregation is created with the key option,
196da14cebEric Cheng	 *	"key" will be used as the instance number.
197da14cebEric Cheng	 *
198da14cebEric Cheng	 *  -1: Assign an instance number from [MAC_MAX_MINOR .. MAXMIN-1].
199da14cebEric Cheng	 *	This is often used when a MAC of a virtual link is registered
200da14cebEric Cheng	 *	(e.g., aggregation when "key" is not specified, or vnic).
201da14cebEric Cheng	 *
202da14cebEric Cheng	 * Note that the instance number is used to derive the mi_minor field
203da14cebEric Cheng	 * of mac_impl_t, which will then be used to derive the name of kstats
204da14cebEric Cheng	 * and the devfs nodes.  The first 2 cases are needed to preserve
205da14cebEric Cheng	 * backward compatibility.
206da14cebEric Cheng	 */
207da14cebEric Cheng	switch (mregp->m_instance) {
208da14cebEric Cheng	case 0:
209da14cebEric Cheng		instance = ddi_get_instance(mregp->m_dip);
210da14cebEric Cheng		break;
211da14cebEric Cheng	case ((uint_t)-1):
212da14cebEric Cheng		minor = mac_minor_hold(B_TRUE);
213da14cebEric Cheng		if (minor == 0) {
214da14cebEric Cheng			err = ENOSPC;
215da14cebEric Cheng			goto fail;
216da14cebEric Cheng		}
217da14cebEric Cheng		instance = minor - 1;
218da14cebEric Cheng		break;
219da14cebEric Cheng	default:
220da14cebEric Cheng		instance = mregp->m_instance;
221da14cebEric Cheng		if (instance >= MAC_MAX_MINOR) {
222da14cebEric Cheng			err = EINVAL;
223da14cebEric Cheng			goto fail;
224da14cebEric Cheng		}
225da14cebEric Cheng		break;
226da14cebEric Cheng	}
227da14cebEric Cheng
228da14cebEric Cheng	mip->mi_minor = (minor_t)(instance + 1);
229da14cebEric Cheng	mip->mi_dip = mregp->m_dip;
230da14cebEric Cheng	mip->mi_clients_list = NULL;
231da14cebEric Cheng	mip->mi_nclients = 0;
232da14cebEric Cheng
2334eaa471Rishi Srivatsavai	/* Set the default IEEE Port VLAN Identifier */
2344eaa471Rishi Srivatsavai	mip->mi_pvid = 1;
2354eaa471Rishi Srivatsavai
2364eaa471Rishi Srivatsavai	/* Default bridge link learning protection values */
2374eaa471Rishi Srivatsavai	mip->mi_llimit = 1000;
2384eaa471Rishi Srivatsavai	mip->mi_ldecay = 200;
2394eaa471Rishi Srivatsavai
240da14cebEric Cheng	driver = (char *)ddi_driver_name(mip->mi_dip);
241da14cebEric Cheng
242da14cebEric Cheng	/* Construct the MAC name as <drvname><instance> */
243da14cebEric Cheng	(void) snprintf(mip->mi_name, sizeof (mip->mi_name), "%s%d",
244da14cebEric Cheng	    driver, instance);
245da14cebEric Cheng
246da14cebEric Cheng	mip->mi_driver = mregp->m_driver;
247da14cebEric Cheng
248da14cebEric Cheng	mip->mi_type = mtype;
249da14cebEric Cheng	mip->mi_margin = mregp->m_margin;
250da14cebEric Cheng	mip->mi_info.mi_media = mtype->mt_type;
251da14cebEric Cheng	mip->mi_info.mi_nativemedia = mtype->mt_nativetype;
252da14cebEric Cheng	if (mregp->m_max_sdu <= mregp->m_min_sdu)
253da14cebEric Cheng		goto fail;
2541eee170Erik Nordmark	if (mregp->m_multicast_sdu == 0)
2551eee170Erik Nordmark		mregp->m_multicast_sdu = mregp->m_max_sdu;
2561eee170Erik Nordmark	if (mregp->m_multicast_sdu < mregp->m_min_sdu ||
2571eee170Erik Nordmark	    mregp->m_multicast_sdu > mregp->m_max_sdu)
2581eee170Erik Nordmark		goto fail;
259da14cebEric Cheng	mip->mi_sdu_min = mregp->m_min_sdu;
260da14cebEric Cheng	mip->mi_sdu_max = mregp->m_max_sdu;
2611eee170Erik Nordmark	mip->mi_sdu_multicast = mregp->m_multicast_sdu;
262da14cebEric Cheng	mip->mi_info.mi_addr_length = mip->mi_type->mt_addr_length;
263da14cebEric Cheng	/*
264da14cebEric Cheng	 * If the media supports a broadcast address, cache a pointer to it
265da14cebEric Cheng	 * in the mac_info_t so that upper layers can use it.
266da14cebEric Cheng	 */
267da14cebEric Cheng	mip->mi_info.mi_brdcst_addr = mip->mi_type->mt_brdcst_addr;
268da14cebEric Cheng
269da14cebEric Cheng	mip->mi_v12n_level = mregp->m_v12n;
270da14cebEric Cheng
271da14cebEric Cheng	/*
272da14cebEric Cheng	 * Copy the unicast source address into the mac_info_t, but only if
273da14cebEric Cheng	 * the MAC-Type defines a non-zero address length.  We need to
274da14cebEric Cheng	 * handle MAC-Types that have an address length of 0
275da14cebEric Cheng	 * (point-to-point protocol MACs for example).
276da14cebEric Cheng	 */
277da14cebEric Cheng	if (mip->mi_type->mt_addr_length > 0) {
278da14cebEric Cheng		if (mregp->m_src_addr == NULL)
279da14cebEric Cheng			goto fail;
280da14cebEric Cheng		mip->mi_info.mi_unicst_addr =
281da14cebEric Cheng		    kmem_alloc(mip->mi_type->mt_addr_length, KM_SLEEP);
282da14cebEric Cheng		bcopy(mregp->m_src_addr, mip->mi_info.mi_unicst_addr,
283da14cebEric Cheng		    mip->mi_type->mt_addr_length);
284da14cebEric Cheng
285da14cebEric Cheng		/*
286da14cebEric Cheng		 * Copy the fixed 'factory' MAC address from the immutable
287da14cebEric Cheng		 * info.  This is taken to be the MAC address currently in
288da14cebEric Cheng		 * use.
289da14cebEric Cheng		 */
290da14cebEric Cheng		bcopy(mip->mi_info.mi_unicst_addr, mip->mi_addr,
291da14cebEric Cheng		    mip->mi_type->mt_addr_length);
292da14cebEric Cheng
293da14cebEric Cheng		/*
294da14cebEric Cheng		 * At this point, we should set up the classification
295da14cebEric Cheng		 * rules etc but we delay it till mac_open() so that
296da14cebEric Cheng		 * the resource discovery has taken place and we
297da14cebEric Cheng		 * know someone wants to use the device. Otherwise
298da14cebEric Cheng		 * memory gets allocated for Rx ring structures even
299da14cebEric Cheng		 * during probe.
300da14cebEric Cheng		 */
301da14cebEric Cheng
302da14cebEric Cheng		/* Copy the destination address if one is provided. */
303da14cebEric Cheng		if (mregp->m_dst_addr != NULL) {
304da14cebEric Cheng			bcopy(mregp->m_dst_addr, mip->mi_dstaddr,
305da14cebEric Cheng			    mip->mi_type->mt_addr_length);
3062b24ab6Sebastien Roy			mip->mi_dstaddr_set = B_TRUE;
307da14cebEric Cheng		}
308da14cebEric Cheng	} else if (mregp->m_src_addr != NULL) {
309da14cebEric Cheng		goto fail;
310da14cebEric Cheng	}
311da14cebEric Cheng
312da14cebEric Cheng	/*
313da14cebEric Cheng	 * The format of the m_pdata is specific to the plugin.  It is
314da14cebEric Cheng	 * passed in as an argument to all of the plugin callbacks.  The
315da14cebEric Cheng	 * driver can update this information by calling
316da14cebEric Cheng	 * mac_pdata_update().
317da14cebEric Cheng	 */
3182b24ab6Sebastien Roy	if (mip->mi_type->mt_ops.mtops_ops & MTOPS_PDATA_VERIFY) {
319da14cebEric Cheng		/*
3202b24ab6Sebastien Roy		 * Verify if the supplied plugin data is valid.  Note that
3212b24ab6Sebastien Roy		 * even if the caller passed in a NULL pointer as plugin data,
3222b24ab6Sebastien Roy		 * we still need to verify if that's valid as the plugin may
3232b24ab6Sebastien Roy		 * require plugin data to function.
324da14cebEric Cheng		 */
325da14cebEric Cheng		if (!mip->mi_type->mt_ops.mtops_pdata_verify(mregp->m_pdata,
326da14cebEric Cheng		    mregp->m_pdata_size)) {
327da14cebEric Cheng			goto fail;
328da14cebEric Cheng		}
3292b24ab6Sebastien Roy		if (mregp->m_pdata != NULL) {
3302b24ab6Sebastien Roy			mip->mi_pdata =
3312b24ab6Sebastien Roy			    kmem_alloc(mregp->m_pdata_size, KM_SLEEP);
3322b24ab6Sebastien Roy			bcopy(mregp->m_pdata, mip->mi_pdata,
3332b24ab6Sebastien Roy			    mregp->m_pdata_size);
3342b24ab6Sebastien Roy			mip->mi_pdata_size = mregp->m_pdata_size;
3352b24ab6Sebastien Roy		}
3362b24ab6Sebastien Roy	} else if (mregp->m_pdata != NULL) {
3372b24ab6Sebastien Roy		/*
3382b24ab6Sebastien Roy		 * The caller supplied non-NULL plugin data, but the plugin
3392b24ab6Sebastien Roy		 * does not recognize plugin data.
3402b24ab6Sebastien Roy		 */
3412b24ab6Sebastien Roy		err = EINVAL;
3422b24ab6Sebastien Roy		goto fail;
343da14cebEric Cheng	}
344da14cebEric Cheng
345da14cebEric Cheng	/*
346da14cebEric Cheng	 * Register the private properties.
347da14cebEric Cheng	 */
3480dc2366Venugopal Iyer	mac_register_priv_prop(mip, mregp->m_priv_props);
349da14cebEric Cheng
350da14cebEric Cheng	/*
351da14cebEric Cheng	 * Stash the driver callbacks into the mac_impl_t, but first sanity
352da14cebEric Cheng	 * check to make sure all mandatory callbacks are set.
353