1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright 2019 Joyent, Inc.
25 * Copyright 2017 OmniTI Computer Consulting, Inc. All rights reserved.
26 * Copyright 2020 RackTop Systems, Inc.
27 */
28
29 #include <sys/types.h>
30 #include <sys/conf.h>
31 #include <sys/id_space.h>
32 #include <sys/esunddi.h>
33 #include <sys/stat.h>
34 #include <sys/mkdev.h>
35 #include <sys/stream.h>
36 #include <sys/strsubr.h>
37 #include <sys/dlpi.h>
38 #include <sys/modhash.h>
39 #include <sys/mac.h>
40 #include <sys/mac_provider.h>
41 #include <sys/mac_impl.h>
42 #include <sys/mac_client_impl.h>
43 #include <sys/mac_client_priv.h>
44 #include <sys/mac_soft_ring.h>
45 #include <sys/mac_stat.h>
46 #include <sys/dld.h>
47 #include <sys/modctl.h>
48 #include <sys/fs/dv_node.h>
49 #include <sys/thread.h>
50 #include <sys/proc.h>
51 #include <sys/callb.h>
52 #include <sys/cpuvar.h>
53 #include <sys/atomic.h>
54 #include <sys/sdt.h>
55 #include <sys/mac_flow.h>
56 #include <sys/ddi_intr_impl.h>
57 #include <sys/disp.h>
58 #include <sys/sdt.h>
59 #include <sys/pattr.h>
60 #include <sys/strsun.h>
61 #include <sys/vlan.h>
62 #include <inet/ip.h>
63 #include <inet/tcp.h>
64 #include <netinet/udp.h>
65 #include <netinet/sctp.h>
66
67 /*
68 * MAC Provider Interface.
69 *
70 * Interface for GLDv3 compatible NIC drivers.
71 */
72
73 static void i_mac_notify_thread(void *);
74
75 typedef void (*mac_notify_default_cb_fn_t)(mac_impl_t *);
76
77 static const mac_notify_default_cb_fn_t mac_notify_cb_list[MAC_NNOTE] = {
78 mac_fanout_recompute, /* MAC_NOTE_LINK */
79 NULL, /* MAC_NOTE_UNICST */
80 NULL, /* MAC_NOTE_TX */
81 NULL, /* MAC_NOTE_DEVPROMISC */
82 NULL, /* MAC_NOTE_FASTPATH_FLUSH */
83 NULL, /* MAC_NOTE_SDU_SIZE */
84 NULL, /* MAC_NOTE_MARGIN */
85 NULL, /* MAC_NOTE_CAPAB_CHG */
86 NULL /* MAC_NOTE_LOWLINK */
87 };
88
89 /*
90 * Driver support functions.
91 */
92
93 /* REGISTRATION */
94
95 mac_register_t *
mac_alloc(uint_t mac_version)96 mac_alloc(uint_t mac_version)
97 {
98 mac_register_t *mregp;
99
100 /*
101 * Make sure there isn't a version mismatch between the driver and
102 * the framework. In the future, if multiple versions are
103 * supported, this check could become more sophisticated.
104 */
105 if (mac_version != MAC_VERSION)
106 return (NULL);
107
108 mregp = kmem_zalloc(sizeof (mac_register_t), KM_SLEEP);
109 mregp->m_version = mac_version;
110 return (mregp);
111 }
112
113 void
mac_free(mac_register_t * mregp)114 mac_free(mac_register_t *mregp)
115 {
116 kmem_free(mregp, sizeof (mac_register_t));
117 }
118
119 /*
120 * Convert a MAC's offload features into the equivalent DB_CKSUMFLAGS
121 * value.
122 */
123 static uint16_t
mac_features_to_flags(mac_handle_t mh)124 mac_features_to_flags(mac_handle_t mh)
125 {
126 uint16_t flags = 0;
127 uint32_t cap_sum = 0;
128 mac_capab_lso_t cap_lso;
129
130 if (mac_capab_get(mh, MAC_CAPAB_HCKSUM, &cap_sum)) {
131 if (cap_sum & HCKSUM_IPHDRCKSUM)
132 flags |= HCK_IPV4_HDRCKSUM;
133
134 if (cap_sum & HCKSUM_INET_PARTIAL)
135 flags |= HCK_PARTIALCKSUM;
136 else if (cap_sum & (HCKSUM_INET_FULL_V4 | HCKSUM_INET_FULL_V6))
137 flags |= HCK_FULLCKSUM;
138 }
139
140 /*
141 * We don't need the information stored in 'cap_lso', but we
142 * need to pass a non-NULL pointer to appease the driver.
143 */
144 if (mac_capab_get(mh, MAC_CAPAB_LSO, &cap_lso))
145 flags |= HW_LSO;
146
147 return (flags);
148 }
149
150 /*
151 * mac_register() is how drivers register new MACs with the GLDv3
152 * framework. The mregp argument is allocated by drivers using the
153 * mac_alloc() function, and can be freed using mac_free() immediately upon
154 * return from mac_register(). Upon success (0 return value), the mhp
155 * opaque pointer becomes the driver's handle to its MAC interface, and is
156 * the argument to all other mac module entry points.
157 */
158 /* ARGSUSED */
159 int
mac_register(mac_register_t * mregp,mac_handle_t * mhp)160 mac_register(mac_register_t *mregp, mac_handle_t *mhp)
161 {
162 mac_impl_t *mip;
163 mactype_t *mtype;
164 int err = EINVAL;
165 struct devnames *dnp = NULL;
166 uint_t instance;
167 boolean_t style1_created = B_FALSE;
168 boolean_t style2_created = B_FALSE;
169 char *driver;
170 minor_t minor = 0;
171
172 /* A successful call to mac_init_ops() sets the DN_GLDV3_DRIVER flag. */
173 if (!GLDV3_DRV(ddi_driver_major(mregp->m_dip)))
174 return (EINVAL);
175
176 /* Find the required MAC-Type plugin. */
177 if ((mtype = mactype_getplugin(mregp->m_type_ident)) == NULL)
178 return (EINVAL);
179
180 /* Create a mac_impl_t to represent this MAC. */
181 mip = kmem_cache_alloc(i_mac_impl_cachep, KM_SLEEP);
182
183 /*
184 * The mac is not ready for open yet.
185 */
186 mip->mi_state_flags |= MIS_DISABLED;
187
188 /*
189 * When a mac is registered, the m_instance field can be set to:
190 *
191 * 0: Get the mac's instance number from m_dip.
192 * This is usually used for physical device dips.
193 *
194 * [1 .. MAC_MAX_MINOR-1]: Use the value as the mac's instance number.
195 * For example, when an aggregation is created with the key option,
196 * "key" will be used as the instance number.
197 *
198 * -1: Assign an instance number from [MAC_MAX_MINOR .. MAXMIN-1].
199 * This is often used when a MAC of a virtual link is registered
200 * (e.g., aggregation when "key" is not specified, or vnic).
201 *
202 * Note that the instance number is used to derive the mi_minor field
203 * of mac_impl_t, which will then be used to derive the name of kstats
204 * and the devfs nodes. The first 2 cases are needed to preserve
205 * backward compatibility.
206 */
207 switch (mregp->m_instance) {
208 case 0:
209 instance = ddi_get_instance(mregp->m_dip);
210 break;
211 case ((uint_t)-1):
212 minor = mac_minor_hold(B_TRUE);
213 if (minor == 0) {
214 err = ENOSPC;
215 goto fail;
216 }
217 instance = minor - 1;
218 break;
219 default:
220 instance = mregp->m_instance;
221 if (instance >= MAC_MAX_MINOR) {
222 err = EINVAL;
223 goto fail;
224 }
225 break;
226 }
227
228 mip->mi_minor = (minor_t)(instance + 1);
229 mip->mi_dip = mregp->m_dip;
230 mip->mi_clients_list = NULL;
231 mip->mi_nclients = 0;
232
233 /* Set the default IEEE Port VLAN Identifier */
234 mip->mi_pvid = 1;
235
236 /* Default bridge link learning protection values */
237 mip->mi_llimit = 1000;
238 mip->mi_ldecay = 200;
239
240 driver = (char *)ddi_driver_name(mip->mi_dip);
241
242 /* Construct the MAC name as <drvname><instance> */
243 (void) snprintf(mip->mi_name, sizeof (mip->mi_name), "%s%d",
244 driver, instance);
245
246 mip->mi_driver = mregp->m_driver;
247
248 mip->mi_type = mtype;
249 mip->mi_margin = mregp->m_margin;
250 mip->mi_info.mi_media = mtype->mt_type;
251 mip->mi_info.mi_nativemedia = mtype->mt_nativetype;
252 if (mregp->m_max_sdu <= mregp->m_min_sdu)
253 goto fail;
254 if (mregp->m_multicast_sdu == 0)
255 mregp->m_multicast_sdu = mregp->m_max_sdu;
256 if (mregp->m_multicast_sdu < mregp->m_min_sdu ||
257 mregp->m_multicast_sdu > mregp->m_max_sdu)
258 goto fail;
259 mip->mi_sdu_min = mregp->m_min_sdu;
260 mip->mi_sdu_max = mregp->m_max_sdu;
261 mip->mi_sdu_multicast = mregp->m_multicast_sdu;
262 mip->mi_info.mi_addr_length = mip->mi_type->mt_addr_length;
263 /*
264 * If the media supports a broadcast address, cache a pointer to it
265 * in the mac_info_t so that upper layers can use it.
266 */
267 mip->mi_info.mi_brdcst_addr = mip->mi_type->mt_brdcst_addr;
268
269 mip->mi_v12n_level = mregp->m_v12n;
270
271 /*
272 * Copy the unicast source address into the mac_info_t, but only if
273 * the MAC-Type defines a non-zero address length. We need to
274 * handle MAC-Types that have an address length of 0
275 * (point-to-point protocol MACs for example).
276 */
277 if (mip->mi_type->mt_addr_length > 0) {
278 if (mregp->m_src_addr == NULL)
279 goto fail;
280 mip->mi_info.mi_unicst_addr =
281 kmem_alloc(mip->mi_type->mt_addr_length, KM_SLEEP);
282 bcopy(mregp->m_src_addr, mip->mi_info.mi_unicst_addr,
283 mip->mi_type->mt_addr_length);
284
285 /*
286 * Copy the fixed 'factory' MAC address from the immutable
287 * info. This is taken to be the MAC address currently in
288 * use.
289 */
290 bcopy(mip->mi_info.mi_unicst_addr, mip->mi_addr,
291 mip->mi_type->mt_addr_length);
292
293 /*
294 * At this point, we should set up the classification
295 * rules etc but we delay it till mac_open() so that
296 * the resource discovery has taken place and we
297 * know someone wants to use the device. Otherwise
298 * memory gets allocated for Rx ring structures even
299 * during probe.
300 */
301
302 /* Copy the destination address if one is provided. */
303 if (mregp->m_dst_addr != NULL) {
304 bcopy(mregp->m_dst_addr, mip->mi_dstaddr,
305 mip->mi_type->mt_addr_length);
306 mip->mi_dstaddr_set = B_TRUE;
307 }
308 } else if (mregp->m_src_addr != NULL) {
309 goto fail;
310 }
311
312 /*
313 * The format of the m_pdata is specific to the plugin. It is
314 * passed in as an argument to all of the plugin callbacks. The
315 * driver can update this information by calling
316 * mac_pdata_update().
317 */
318 if (mip->mi_type->mt_ops.mtops_ops & MTOPS_PDATA_VERIFY) {
319 /*
320 * Verify if the supplied plugin data is valid. Note that
321 * even if the caller passed in a NULL pointer as plugin data,
322 * we still need to verify if that's valid as the plugin may
323 * require plugin data to function.
324 */
325 if (!mip->mi_type->mt_ops.mtops_pdata_verify(mregp->m_pdata,
326 mregp->m_pdata_size)) {
327 goto fail;
328 }
329 if (mregp->m_pdata != NULL) {
330 mip->mi_pdata =
331 kmem_alloc(mregp->m_pdata_size, KM_SLEEP);
332 bcopy(mregp->m_pdata, mip->mi_pdata,
333 mregp->m_pdata_size);
334 mip->mi_pdata_size = mregp->m_pdata_size;
335 }
336 } else if (mregp->m_pdata != NULL) {
337 /*
338 * The caller supplied non-NULL plugin data, but the plugin
339 * does not recognize plugin data.
340 */
341 err = EINVAL;
342 goto fail;
343 }
344
345 /*
346 * Register the private properties.
347 */
348 mac_register_priv_prop(mip, mregp->m_priv_props);
349
350 /*
351 * Stash the driver callbacks into the mac_impl_t, but first sanity
352 * check to make sure all mandatory callbacks are set.
353 */
354 if (mregp->m_callbacks->mc_getstat == NULL ||
355 mregp->m_callbacks->mc_start == NULL ||
356 mregp->m_callbacks->mc_stop == NULL ||
357 mregp->m_callbacks->mc_setpromisc == NULL ||
358 mregp->m_callbacks->mc_multicst == NULL) {
359 goto fail;
360 }
361 mip->mi_callbacks = mregp->m_callbacks;
362
363 if (mac_capab_get((mac_handle_t)mip, MAC_CAPAB_LEGACY,
364 &mip->mi_capab_legacy)) {
365 mip->mi_state_flags |= MIS_LEGACY;
366 mip->mi_phy_dev = mip->mi_capab_legacy.ml_dev;
367 } else {
368 mip->mi_phy_dev = makedevice(ddi_driver_major(mip->mi_dip),
369 mip->mi_minor);
370 }
371
372 /*
373 * Allocate a notification thread. thread_create blocks for memory
374 * if needed, it never fails.
375 */
376 mip->mi_notify_thread = thread_create(NULL, 0, i_mac_notify_thread,
377 mip, 0, &p0, TS_RUN, minclsyspri);
378
379 /*
380 * Cache the DB_CKSUMFLAGS that this MAC supports.
381 */
382 mip->mi_tx_cksum_flags = mac_features_to_flags((mac_handle_t)mip);
383
384 /*
385 * Initialize the capabilities
386 */
387 bzero(&mip->mi_rx_rings_cap, sizeof (mac_capab_rings_t));
388 bzero(&mip->mi_tx_rings_cap, sizeof (mac_capab_rings_t));
389
390 if (i_mac_capab_get((mac_handle_t)mip, MAC_CAPAB_VNIC, NULL))
391 mip->mi_state_flags |= MIS_IS_VNIC;
392
393 if (i_mac_capab_get((mac_handle_t)mip, MAC_CAPAB_AGGR, NULL))
394 mip->mi_state_flags |= MIS_IS_AGGR;
395
396 if (i_mac_capab_get((mac_handle_t)mip, MAC_CAPAB_OVERLAY, NULL))
397 mip->mi_state_flags |= MIS_IS_OVERLAY;
398
399 mac_addr_factory_init(mip);
400
401 mac_transceiver_init(mip);
402
403 mac_led_init(mip);
404
405 /*
406 * Enforce the virtrualization level registered.
407 */
408 if (mip->mi_v12n_level & MAC_VIRT_LEVEL1) {
409 if (mac_init_rings(mip, MAC_RING_TYPE_RX) != 0 ||
410 mac_init_rings(mip, MAC_RING_TYPE_TX) != 0)
411 goto fail;
412
413 /*
414 * The driver needs to register at least rx rings for this
415 * virtualization level.
416 */
417 if (mip->mi_rx_groups == NULL)
418 goto fail;
419 }
420
421 /*
422 * The driver must set mc_unicst entry point to NULL when it advertises
423 * CAP_RINGS for rx groups.
424 */
425 if (mip->mi_rx_groups != NULL) {
426 if (mregp->m_callbacks->mc_unicst != NULL)
427 goto fail;
428 } else {
429 if (mregp->m_callbacks->mc_unicst == NULL)
430 goto fail;
431 }
432
433 /*
434 * Initialize MAC addresses. Must be called after mac_init_rings().
435 */
436 mac_init_macaddr(mip);
437
438 mip->mi_share_capab.ms_snum = 0;
439 if (mip->mi_v12n_level & MAC_VIRT_HIO) {
440 (void) mac_capab_get((mac_handle_t)mip, MAC_CAPAB_SHARES,
441 &mip->mi_share_capab);
442 }
443
444 /*
445 * Initialize the kstats for this device.
446 */
447 mac_driver_stat_create(mip);
448
449 /* Zero out any properties. */
450 bzero(&mip->mi_resource_props, sizeof (mac_resource_props_t));
451
452 if (mip->mi_minor <= MAC_MAX_MINOR) {
453 /* Create a style-2 DLPI device */
454 if (ddi_create_minor_node(mip->mi_dip, driver, S_IFCHR, 0,
455 DDI_NT_NET, CLONE_DEV) != DDI_SUCCESS)
456 goto fail;
457 style2_created = B_TRUE;
458
459 /* Create a style-1 DLPI device */
460 if (ddi_create_minor_node(mip->mi_dip, mip->mi_name, S_IFCHR,
461 mip->mi_minor, DDI_NT_NET, 0) != DDI_SUCCESS)
462 goto fail;
463 style1_created = B_TRUE;
464 }
465
466 mac_flow_l2tab_create(mip, &mip->mi_flow_tab);
467
468 rw_enter(&i_mac_impl_lock, RW_WRITER);
469 if (mod_hash_insert(i_mac_impl_hash,
470 (mod_hash_key_t)mip->mi_name, (mod_hash_val_t)mip) != 0) {
471 rw_exit(&i_mac_impl_lock);
472 err = EEXIST;
473 goto fail;
474 }
475
476 DTRACE_PROBE2(mac__register, struct devnames *, dnp,
477 (mac_impl_t *), mip);
478
479 /*
480 * Mark the MAC to be ready for open.
481 */
482 mip->mi_state_flags &= ~MIS_DISABLED;
483 rw_exit(&i_mac_impl_lock);
484
485 atomic_inc_32(&i_mac_impl_count);
486
487 cmn_err(CE_NOTE, "!%s registered", mip->mi_name);
488 *mhp = (mac_handle_t)mip;
489 return (0);
490
491 fail:
492 if (style1_created)
493 ddi_remove_minor_node(mip->mi_dip, mip->mi_name);
494
495 if (style2_created)
496 ddi_remove_minor_node(mip->mi_dip, driver);
497
498 mac_addr_factory_fini(mip);
499
500 /* Clean up registered MAC addresses */
501 mac_fini_macaddr(mip);
502
503 /* Clean up registered rings */
504 mac_free_rings(mip, MAC_RING_TYPE_RX);
505 mac_free_rings(mip, MAC_RING_TYPE_TX);
506
507 /* Clean up notification thread */
508 if (mip->mi_notify_thread != NULL)
509 i_mac_notify_exit(mip);
510
511 if (mip->mi_info.mi_unicst_addr != NULL) {
512 kmem_free(mip->mi_info.mi_unicst_addr,
513 mip->mi_type->mt_addr_length);
514 mip->mi_info.mi_unicst_addr = NULL;
515 }
516
517 mac_driver_stat_delete(mip);
518
519 if (mip->mi_type != NULL) {
520 atomic_dec_32(&mip->mi_type->mt_ref);
521 mip->mi_type = NULL;
522 }
523
524 if (mip->mi_pdata != NULL) {
525 kmem_free(mip->mi_pdata, mip->mi_pdata_size);
526 mip->mi_pdata = NULL;
527 mip->mi_pdata_size = 0;
528 }
529
530 if (minor != 0) {
531 ASSERT(minor > MAC_MAX_MINOR);
532 mac_minor_rele(minor);
533 }
534
535 mip->mi_state_flags = 0;
536 mac_unregister_priv_prop(mip);
537
538 /*
539 * Clear the state before destroying the mac_impl_t
540 */
541 mip->mi_state_flags = 0;
542
543 kmem_cache_free(i_mac_impl_cachep, mip);
544 return (err);
545 }
546
547 /*
548 * Unregister from the GLDv3 framework
549 */
550 int
mac_unregister(mac_handle_t mh)551 mac_unregister(mac_handle_t mh)
552 {
553 int err;
554 mac_impl_t *mip = (mac_impl_t *)mh;
555 mod_hash_val_t val;
556 mac_margin_req_t *mmr, *nextmmr;
557
558 /* Fail the unregister if there are any open references to this mac. */
559 if ((err = mac_disable_nowait(mh)) != 0)
560 return (err);
561
562 /*
563 * Clean up notification thread and wait for it to exit.
564 */
565 i_mac_notify_exit(mip);
566
567 /*
568 * Prior to acquiring the MAC perimeter, remove the MAC instance from
569 * the internal hash table. Such removal means table-walkers that
570 * acquire the perimeter will not do so on behalf of what we are
571 * unregistering, which prevents a deadlock.
572 */
573 rw_enter(&i_mac_impl_lock, RW_WRITER);
574 (void) mod_hash_remove(i_mac_impl_hash,
575 (mod_hash_key_t)mip->mi_name, &val);
576 rw_exit(&i_mac_impl_lock);
577 ASSERT(mip == (mac_impl_t *)val);
578
579 i_mac_perim_enter(mip);
580
581 /*
582 * There is still resource properties configured over this mac.
583 */
584 if (mip->mi_resource_props.mrp_mask != 0)
585 mac_fastpath_enable((mac_handle_t)mip);
586
587 if (mip->mi_minor < MAC_MAX_MINOR + 1) {
588 ddi_remove_minor_node(mip->mi_dip, mip->mi_name);
589 ddi_remove_minor_node(mip->mi_dip,
590 (char *)ddi_driver_name(mip->mi_dip));
591 }
592
593 ASSERT(mip->mi_nactiveclients == 0 && !(mip->mi_state_flags &
594 MIS_EXCLUSIVE));
595
596 mac_driver_stat_delete(mip);
597
598 ASSERT(i_mac_impl_count > 0);
599 atomic_dec_32(&i_mac_impl_count);
600
601 if (mip->mi_pdata != NULL)
602 kmem_free(mip->mi_pdata, mip->mi_pdata_size);
603 mip->mi_pdata = NULL;
604 mip->mi_pdata_size = 0;
605
606 /*
607 * Free the list of margin request.
608 */
609 for (mmr = mip->mi_mmrp; mmr != NULL; mmr = nextmmr) {
610 nextmmr = mmr->mmr_nextp;
611 kmem_free(mmr, sizeof (mac_margin_req_t));
612 }
613 mip->mi_mmrp = NULL;
614
615 mip->mi_linkstate = mip->mi_lowlinkstate = LINK_STATE_UNKNOWN;
616 kmem_free(mip->mi_info.mi_unicst_addr, mip->mi_type->mt_addr_length);
617 mip->mi_info.mi_unicst_addr = NULL;
618
619 atomic_dec_32(&mip->mi_type->mt_ref);
620 mip->mi_type = NULL;
621
622 /*
623 * Free the primary MAC address.
624 */
625 mac_fini_macaddr(mip);
626
627 /*
628 * free all rings
629 */
630 mac_free_rings(mip, MAC_RING_TYPE_RX);
631 mac_free_rings(mip, MAC_RING_TYPE_TX);
632
633 mac_addr_factory_fini(mip);
634
635 bzero(mip->mi_addr, MAXMACADDRLEN);
636 bzero(mip->mi_dstaddr, MAXMACADDRLEN);
637 mip->mi_dstaddr_set = B_FALSE;
638
639 /* and the flows */
640 mac_flow_tab_destroy(mip->mi_flow_tab);
641 mip->mi_flow_tab = NULL;
642
643 if (mip->mi_minor > MAC_MAX_MINOR)
644 mac_minor_rele(mip->mi_minor);
645
646 cmn_err(CE_NOTE, "!%s unregistered", mip->mi_name);
647
648 /*
649 * Reset the perim related fields to default values before
650 * kmem_cache_free
651 */
652 i_mac_perim_exit(mip);
653 mip->mi_state_flags = 0;
654
655 mac_unregister_priv_prop(mip);
656
657 ASSERT(mip->mi_bridge_link == NULL);
658 kmem_cache_free(i_mac_impl_cachep, mip);
659
660 return (0);
661 }
662
663 /* DATA RECEPTION */
664
665 /*
666 * This function is invoked for packets received by the MAC driver in
667 * interrupt context. The ring generation number provided by the driver
668 * is matched with the ring generation number held in MAC. If they do not
669 * match, received packets are considered stale packets coming from an older
670 * assignment of the ring. Drop them.
671 */
672 void
mac_rx_ring(mac_handle_t mh,mac_ring_handle_t mrh,mblk_t * mp_chain,uint64_t mr_gen_num)673 mac_rx_ring(mac_handle_t mh, mac_ring_handle_t mrh, mblk_t *mp_chain,
674 uint64_t mr_gen_num)
675 {
676 mac_ring_t *mr = (mac_ring_t *)mrh;
677
678 if ((mr != NULL) && (mr->mr_gen_num != mr_gen_num)) {
679 DTRACE_PROBE2(mac__rx__rings__stale__packet, uint64_t,
680 mr->mr_gen_num, uint64_t, mr_gen_num);
681 freemsgchain(mp_chain);
682 return;
683 }
684 mac_rx(mh, (mac_resource_handle_t)mrh, mp_chain);
685 }
686
687 /*
688 * This function is invoked for each packet received by the underlying driver.
689 */
690 void
mac_rx(mac_handle_t mh,mac_resource_handle_t mrh,mblk_t * mp_chain)691 mac_rx(mac_handle_t mh, mac_resource_handle_t mrh, mblk_t *mp_chain)
692 {
693 mac_impl_t *mip = (mac_impl_t *)mh;
694
695 /*
696 * Check if the link is part of a bridge. If not, then we don't need
697 * to take the lock to remain consistent. Make this common case
698 * lock-free and tail-call optimized.
699 */
700 if (mip->mi_bridge_link == NULL) {
701 mac_rx_common(mh, mrh, mp_chain);
702 } else {
703 /*
704 * Once we take a reference on the bridge link, the bridge
705 * module itself can't unload, so the callback pointers are
706 * stable.
707 */
708 mutex_enter(&mip->mi_bridge_lock);
709 if ((mh = mip->mi_bridge_link) != NULL)
710 mac_bridge_ref_cb(mh, B_TRUE);
711 mutex_exit(&mip->mi_bridge_lock);
712 if (mh == NULL) {
713 mac_rx_common((mac_handle_t)mip, mrh, mp_chain);
714 } else {
715 mac_bridge_rx_cb(mh, mrh, mp_chain);
716 mac_bridge_ref_cb(mh, B_FALSE);
717 }
718 }
719 }
720
721 /*
722 * Special case function: this allows snooping of packets transmitted and
723 * received by TRILL. By design, they go directly into the TRILL module.
724 */
725 void
mac_trill_snoop(mac_handle_t mh,mblk_t * mp)726 mac_trill_snoop(mac_handle_t mh, mblk_t *mp)
727 {
728 mac_impl_t *mip = (mac_impl_t *)mh;
729
730 if (mip->mi_promisc_list != NULL)
731 mac_promisc_dispatch(mip, mp, NULL, B_FALSE);
732 }
733
734 /*
735 * This is the upward reentry point for packets arriving from the bridging
736 * module and from mac_rx for links not part of a bridge.
737 */
738 void
mac_rx_common(mac_handle_t mh,mac_resource_handle_t mrh,mblk_t * mp_chain)739 mac_rx_common(mac_handle_t mh, mac_resource_handle_t mrh, mblk_t *mp_chain)
740 {
741 mac_impl_t *mip = (mac_impl_t *)mh;
742 mac_ring_t *mr = (mac_ring_t *)mrh;
743 mac_soft_ring_set_t *mac_srs;
744 mblk_t *bp = mp_chain;
745
746 /*
747 * If there are any promiscuous mode callbacks defined for
748 * this MAC, pass them a copy if appropriate.
749 */
750 if (mip->mi_promisc_list != NULL)
751 mac_promisc_dispatch(mip, mp_chain, NULL, B_FALSE);
752
753 if (mr != NULL) {
754 /*
755 * If the SRS teardown has started, just return. The 'mr'
756 * continues to be valid until the driver unregisters the MAC.
757 * Hardware classified packets will not make their way up
758 * beyond this point once the teardown has started. The driver
759 * is never passed a pointer to a flow entry or SRS or any
760 * structure that can be freed much before mac_unregister.
761 */
762 mutex_enter(&mr->mr_lock);
763 if ((mr->mr_state != MR_INUSE) || (mr->mr_flag &
764 (MR_INCIPIENT | MR_CONDEMNED | MR_QUIESCE))) {
765 mutex_exit(&mr->mr_lock);
766 freemsgchain(mp_chain);
767 return;
768 }
769
770 /*
771 * The ring is in passthru mode; pass the chain up to
772 * the pseudo ring.
773 */
774 if (mr->mr_classify_type == MAC_PASSTHRU_CLASSIFIER) {
775 MR_REFHOLD_LOCKED(mr);
776 mutex_exit(&mr->mr_lock);
777 mr->mr_pt_fn(mr->mr_pt_arg1, mr->mr_pt_arg2, mp_chain,
778 B_FALSE);
779 MR_REFRELE(mr);
780 return;
781 }
782
783 /*
784 * The passthru callback should only be set when in
785 * MAC_PASSTHRU_CLASSIFIER mode.
786 */
787 ASSERT3P(mr->mr_pt_fn, ==, NULL);
788
789 /*
790 * We check if an SRS is controlling this ring.
791 * If so, we can directly call the srs_lower_proc
792 * routine otherwise we need to go through mac_rx_classify
793 * to reach the right place.
794 */
795 if (mr->mr_classify_type == MAC_HW_CLASSIFIER) {
796 MR_REFHOLD_LOCKED(mr);
797 mutex_exit(&mr->mr_lock);
798 ASSERT3P(mr->mr_srs, !=, NULL);
799 mac_srs = mr->mr_srs;
800
801 /*
802 * This is the fast path. All packets received
803 * on this ring are hardware classified and
804 * share the same MAC header info.
805 */
806 mac_srs->srs_rx.sr_lower_proc(mh,
807 (mac_resource_handle_t)mac_srs, mp_chain, B_FALSE);
808 MR_REFRELE(mr);
809 return;
810 }
811
812 mutex_exit(&mr->mr_lock);
813 /* We'll fall through to software classification */
814 } else {
815 flow_entry_t *flent;
816 int err;
817
818 rw_enter(&mip->mi_rw_lock, RW_READER);
819 if (mip->mi_single_active_client != NULL) {
820 flent = mip->mi_single_active_client->mci_flent_list;
821 FLOW_TRY_REFHOLD(flent, err);
822 rw_exit(&mip->mi_rw_lock);
823 if (err == 0) {
824 (flent->fe_cb_fn)(flent->fe_cb_arg1,
825 flent->fe_cb_arg2, mp_chain, B_FALSE);
826 FLOW_REFRELE(flent);
827 return;
828 }
829 } else {
830 rw_exit(&mip->mi_rw_lock);
831 }
832 }
833
834 if (!FLOW_TAB_EMPTY(mip->mi_flow_tab)) {
835 if ((bp = mac_rx_flow(mh, mrh, bp)) == NULL)
836 return;
837 }
838
839 freemsgchain(bp);
840 }
841
842 /* DATA TRANSMISSION */
843
844 /*
845 * A driver's notification to resume transmission, in case of a provider
846 * without TX rings.
847 */
848 void
mac_tx_update(mac_handle_t mh)849 mac_tx_update(mac_handle_t mh)
850 {
851 mac_tx_ring_update(mh, NULL);
852 }
853
854 /*
855 * A driver's notification to resume transmission on the specified TX ring.
856 */
857 void
mac_tx_ring_update(mac_handle_t mh,mac_ring_handle_t rh)858 mac_tx_ring_update(mac_handle_t mh, mac_ring_handle_t rh)
859 {
860 i_mac_tx_srs_notify((mac_impl_t *)mh, rh);
861 }
862
863 /* LINK STATE */
864 /*
865 * Notify the MAC layer about a link state change
866 */
867 void
mac_link_update(mac_handle_t mh,link_state_t link)868 mac_link_update(mac_handle_t mh, link_state_t link)
869 {
870 mac_impl_t *mip = (mac_impl_t *)mh;
871
872 /*
873 * Save the link state.
874 */
875 mip->mi_lowlinkstate = link;
876
877 /*
878 * Send a MAC_NOTE_LOWLINK notification. This tells the notification
879 * thread to deliver both lower and upper notifications.
880 */
881 i_mac_notify(mip, MAC_NOTE_LOWLINK);
882 }
883
884 /*
885 * Notify the MAC layer about a link state change due to bridging.
886 */
887 void
mac_link_redo(mac_handle_t mh,link_state_t link)888 mac_link_redo(mac_handle_t mh, link_state_t link)
889 {
890 mac_impl_t *mip = (mac_impl_t *)mh;
891
892 /*
893 * Save the link state.
894 */
895 mip->mi_linkstate = link;
896
897 /*
898 * Send a MAC_NOTE_LINK notification. Only upper notifications are
899 * made.
900 */
901 i_mac_notify(mip, MAC_NOTE_LINK);
902 }
903
904 /* MINOR NODE HANDLING */
905
906 /*
907 * Given a dev_t, return the instance number (PPA) associated with it.
908 * Drivers can use this in their getinfo(9e) implementation to lookup
909 * the instance number (i.e. PPA) of the device, to use as an index to
910 * their own array of soft state structures.
911 *
912 * Returns -1 on error.
913 */
914 int
mac_devt_to_instance(dev_t devt)915 mac_devt_to_instance(dev_t devt)
916 {
917 return (dld_devt_to_instance(devt));
918 }
919
920 /*
921 * This function returns the first minor number that is available for
922 * driver private use. All minor numbers smaller than this are
923 * reserved for GLDv3 use.
924 */
925 minor_t
mac_private_minor(void)926 mac_private_minor(void)
927 {
928 return (MAC_PRIVATE_MINOR);
929 }
930
931 /* OTHER CONTROL INFORMATION */
932
933 /*
934 * A driver notified us that its primary MAC address has changed.
935 */
936 void
mac_unicst_update(mac_handle_t mh,const uint8_t * addr)937 mac_unicst_update(mac_handle_t mh, const uint8_t *addr)
938 {
939 mac_impl_t *mip = (mac_impl_t *)mh;
940
941 if (mip->mi_type->mt_addr_length == 0)
942 return;
943
944 i_mac_perim_enter(mip);
945
946 /*
947 * If address changes, freshen the MAC address value and update
948 * all MAC clients that share this MAC address.
949 */
950 if (bcmp(addr, mip->mi_addr, mip->mi_type->mt_addr_length) != 0) {
951 mac_freshen_macaddr(mac_find_macaddr(mip, mip->mi_addr),
952 (uint8_t *)addr);
953 }
954
955 i_mac_perim_exit(mip);
956
957 /*
958 * Send a MAC_NOTE_UNICST notification.
959 */
960 i_mac_notify(mip, MAC_NOTE_UNICST);
961 }
962
963 void
mac_dst_update(mac_handle_t mh,const uint8_t * addr)964 mac_dst_update(mac_handle_t mh, const uint8_t *addr)
965 {
966 mac_impl_t *mip = (mac_impl_t *)mh;
967
968 if (mip->mi_type->mt_addr_length == 0)
969 return;
970
971 i_mac_perim_enter(mip);
972 bcopy(addr, mip->mi_dstaddr, mip->mi_type->mt_addr_length);
973 i_mac_perim_exit(mip);
974 i_mac_notify(mip, MAC_NOTE_DEST);
975 }
976
977 /*
978 * MAC plugin information changed.
979 */
980 int
mac_pdata_update(mac_handle_t mh,void * mac_pdata,size_t dsize)981 mac_pdata_update(mac_handle_t mh, void *mac_pdata, size_t dsize)
982 {
983 mac_impl_t *mip = (mac_impl_t *)mh;
984
985 /*
986 * Verify that the plugin supports MAC plugin data and that the
987 * supplied data is valid.
988 */
989 if (!(mip->mi_type->mt_ops.mtops_ops & MTOPS_PDATA_VERIFY))
990 return (EINVAL);
991 if (!mip->mi_type->mt_ops.mtops_pdata_verify(mac_pdata, dsize))
992 return (EINVAL);
993
994 if (mip->mi_pdata != NULL)
995 kmem_free(mip->mi_pdata, mip->mi_pdata_size);
996
997 mip->mi_pdata = kmem_alloc(dsize, KM_SLEEP);
998 bcopy(mac_pdata, mip->mi_pdata, dsize);
999 mip->mi_pdata_size = dsize;
1000
1001 /*
1002 * Since the MAC plugin data is used to construct MAC headers that
1003 * were cached in fast-path headers, we need to flush fast-path
1004 * information for links associated with this mac.
1005 */
1006 i_mac_notify(mip, MAC_NOTE_FASTPATH_FLUSH);
1007 return (0);
1008 }
1009
1010 /*
1011 * The mac provider or mac frameowrk calls this function when it wants
1012 * to notify upstream consumers that the capabilities have changed and
1013 * that they should modify their own internal state accordingly.
1014 *
1015 * We currently have no regard for the fact that a provider could
1016 * decide to drop capabilities which would invalidate pending traffic.
1017 * For example, if one was to disable the Tx checksum offload while
1018 * TCP/IP traffic was being sent by mac clients relying on that
1019 * feature, then those packets would hit the write with missing or
1020 * partial checksums. A proper solution involves not only providing
1021 * notfication, but also performing client quiescing. That is, a capab
1022 * change should be treated as an atomic transaction that forms a
1023 * barrier between traffic relying on the current capabs and traffic
1024 * relying on the new capabs. In practice, simnet is currently the
1025 * only provider that could hit this, and it's an easily avoidable
1026 * situation (and at worst it should only lead to some dropped
1027 * packets). But if we ever want better on-the-fly capab change to
1028 * actual hardware providers, then we should give this update
1029 * mechanism a proper implementation.
1030 */
1031 void
mac_capab_update(mac_handle_t mh)1032 mac_capab_update(mac_handle_t mh)
1033 {
1034 /*
1035 * Send a MAC_NOTE_CAPAB_CHG notification to alert upstream
1036 * clients to renegotiate capabilities.
1037 */
1038 i_mac_notify((mac_impl_t *)mh, MAC_NOTE_CAPAB_CHG);
1039 }
1040
1041 /*
1042 * Used by normal drivers to update the max sdu size.
1043 * We need to handle the case of a smaller mi_sdu_multicast
1044 * since this is called by mac_set_mtu() even for drivers that
1045 * have differing unicast and multicast mtu and we don't want to
1046 * increase the multicast mtu by accident in that case.
1047 */
1048 int
mac_maxsdu_update(mac_handle_t mh,uint_t sdu_max)1049 mac_maxsdu_update(mac_handle_t mh, uint_t sdu_max)
1050 {
1051 mac_impl_t *mip = (mac_impl_t *)mh;
1052
1053 if (sdu_max == 0 || sdu_max < mip->mi_sdu_min)
1054 return (EINVAL);
1055 mip->mi_sdu_max = sdu_max;
1056 if (mip->mi_sdu_multicast > mip->mi_sdu_max)
1057 mip->mi_sdu_multicast = mip->mi_sdu_max;
1058
1059 /* Send a MAC_NOTE_SDU_SIZE notification. */
1060 i_mac_notify(mip, MAC_NOTE_SDU_SIZE);
1061 return (0);
1062 }
1063
1064 /*
1065 * Version of the above function that is used by drivers that have a different
1066 * max sdu size for multicast/broadcast vs. unicast.
1067 */
1068 int
mac_maxsdu_update2(mac_handle_t mh,uint_t sdu_max,uint_t sdu_multicast)1069 mac_maxsdu_update2(mac_handle_t mh, uint_t sdu_max, uint_t sdu_multicast)
1070 {
1071 mac_impl_t *mip = (mac_impl_t *)mh;
1072
1073 if (sdu_max == 0 || sdu_max < mip->mi_sdu_min)
1074 return (EINVAL);
1075 if (sdu_multicast == 0)
1076 sdu_multicast = sdu_max;
1077 if (sdu_multicast > sdu_max || sdu_multicast < mip->mi_sdu_min)
1078 return (EINVAL);
1079 mip->mi_sdu_max = sdu_max;
1080 mip->mi_sdu_multicast = sdu_multicast;
1081
1082 /* Send a MAC_NOTE_SDU_SIZE notification. */
1083 i_mac_notify(mip, MAC_NOTE_SDU_SIZE);
1084 return (0);
1085 }
1086
1087 static void
mac_ring_intr_retarget(mac_group_t * group,mac_ring_t * ring)1088 mac_ring_intr_retarget(mac_group_t *group, mac_ring_t *ring)
1089 {
1090 mac_client_impl_t *mcip;
1091 flow_entry_t *flent;
1092 mac_soft_ring_set_t *mac_rx_srs;
1093 mac_cpus_t *srs_cpu;
1094 int i;
1095
1096 if (((mcip = MAC_GROUP_ONLY_CLIENT(group)) != NULL) &&
1097 (!ring->mr_info.mri_intr.mi_ddi_shared)) {
1098 /* interrupt can be re-targeted */
1099 ASSERT(group->mrg_state == MAC_GROUP_STATE_RESERVED);
1100 flent = mcip->mci_flent;
1101 if (ring->mr_type == MAC_RING_TYPE_RX) {
1102 for (i = 0; i < flent->fe_rx_srs_cnt; i++) {
1103 mac_rx_srs = flent->fe_rx_srs[i];
1104 if (mac_rx_srs->srs_ring != ring)
1105 continue;
1106 srs_cpu = &mac_rx_srs->srs_cpu;
1107 mutex_enter(&cpu_lock);
1108 mac_rx_srs_retarget_intr(mac_rx_srs,
1109 srs_cpu->mc_rx_intr_cpu);
1110 mutex_exit(&cpu_lock);
1111 break;
1112 }
1113 } else {
1114 if (flent->fe_tx_srs != NULL) {
1115 mutex_enter(&cpu_lock);
1116 mac_tx_srs_retarget_intr(
1117 flent->fe_tx_srs);
1118 mutex_exit(&cpu_lock);
1119 }
1120 }
1121 }
1122 }
1123
1124 /*
1125 * Clients like aggr create pseudo rings (mac_ring_t) and expose them to
1126 * their clients. There is a 1-1 mapping pseudo ring and the hardware
1127 * ring. ddi interrupt handles are exported from the hardware ring to
1128 * the pseudo ring. Thus when the interrupt handle changes, clients of
1129 * aggr that are using the handle need to use the new handle and
1130 * re-target their interrupts.
1131 */
1132 static void
mac_pseudo_ring_intr_retarget(mac_impl_t * mip,mac_ring_t * ring,ddi_intr_handle_t ddh)1133 mac_pseudo_ring_intr_retarget(mac_impl_t *mip, mac_ring_t *ring,
1134 ddi_intr_handle_t ddh)
1135 {
1136 mac_ring_t *pring;
1137 mac_group_t *pgroup;
1138 mac_impl_t *pmip;
1139 char macname[MAXNAMELEN];
1140 mac_perim_handle_t p_mph;
1141 uint64_t saved_gen_num;
1142
1143 again:
1144 pring = (mac_ring_t *)ring->mr_prh;
1145 pgroup = (mac_group_t *)pring->mr_gh;
1146 pmip = (mac_impl_t *)pgroup->mrg_mh;
1147 saved_gen_num = ring->mr_gen_num;
1148 (void) strlcpy(macname, pmip->mi_name, MAXNAMELEN);
1149 /*
1150 * We need to enter aggr's perimeter. The locking hierarchy
1151 * dictates that aggr's perimeter should be entered first
1152 * and then the port's perimeter. So drop the port's
1153 * perimeter, enter aggr's and then re-enter port's
1154 * perimeter.
1155 */
1156 i_mac_perim_exit(mip);
1157 /*
1158 * While we know pmip is the aggr's mip, there is a
1159 * possibility that aggr could have unregistered by
1160 * the time we exit port's perimeter (mip) and
1161 * enter aggr's perimeter (pmip). To avoid that
1162 * scenario, enter aggr's perimeter using its name.
1163 */
1164 if (mac_perim_enter_by_macname(macname, &p_mph) != 0)
1165 return;
1166 i_mac_perim_enter(mip);
1167 /*
1168 * Check if the ring got assigned to another aggregation before
1169 * be could enter aggr's and the port's perimeter. When a ring
1170 * gets deleted from an aggregation, it calls mac_stop_ring()
1171 * which increments the generation number. So checking
1172 * generation number will be enough.
1173 */
1174 if (ring->mr_gen_num != saved_gen_num && ring->mr_prh != NULL) {
1175 i_mac_perim_exit(mip);
1176 mac_perim_exit(p_mph);
1177 i_mac_perim_enter(mip);
1178 goto again;
1179 }
1180
1181 /* Check if pseudo ring is still present */
1182 if (ring->mr_prh != NULL) {
1183 pring->mr_info.mri_intr.mi_ddi_handle = ddh;
1184 pring->mr_info.mri_intr.mi_ddi_shared =
1185 ring->mr_info.mri_intr.mi_ddi_shared;
1186 if (ddh != NULL)
1187 mac_ring_intr_retarget(pgroup, pring);
1188 }
1189 i_mac_perim_exit(mip);
1190 mac_perim_exit(p_mph);
1191 }
1192 /*
1193 * API called by driver to provide new interrupt handle for TX/RX rings.
1194 * This usually happens when IRM (Interrupt Resource Manangement)
1195 * framework either gives the driver more MSI-x interrupts or takes
1196 * away MSI-x interrupts from the driver.
1197 */
1198 void
mac_ring_intr_set(mac_ring_handle_t mrh,ddi_intr_handle_t ddh)1199 mac_ring_intr_set(mac_ring_handle_t mrh, ddi_intr_handle_t ddh)
1200 {
1201 mac_ring_t *ring = (mac_ring_t *)mrh;
1202 mac_group_t *group = (mac_group_t *)ring->mr_gh;
1203 mac_impl_t *mip = (mac_impl_t *)group->mrg_mh;
1204
1205 i_mac_perim_enter(mip);
1206 ring->mr_info.mri_intr.mi_ddi_handle = ddh;
1207 if (ddh == NULL) {
1208 /* Interrupts being reset */
1209 ring->mr_info.mri_intr.mi_ddi_shared = B_FALSE;
1210 if (ring->mr_prh != NULL) {
1211 mac_pseudo_ring_intr_retarget(mip, ring, ddh);
1212 return;
1213 }
1214 } else {
1215 /* New interrupt handle */
1216 mac_compare_ddi_handle(mip->mi_rx_groups,
1217 mip->mi_rx_group_count, ring);
1218 if (!ring->mr_info.mri_intr.mi_ddi_shared) {
1219 mac_compare_ddi_handle(mip->mi_tx_groups,
1220 mip->mi_tx_group_count, ring);
1221 }
1222 if (ring->mr_prh != NULL) {
1223 mac_pseudo_ring_intr_retarget(mip, ring, ddh);
1224 return;
1225 } else {
1226 mac_ring_intr_retarget(group, ring);
1227 }
1228 }
1229 i_mac_perim_exit(mip);
1230 }
1231
1232 /* PRIVATE FUNCTIONS, FOR INTERNAL USE ONLY */
1233
1234 /*
1235 * Updates the mac_impl structure with the current state of the link
1236 */
1237 static void
i_mac_log_link_state(mac_impl_t * mip)1238 i_mac_log_link_state(mac_impl_t *mip)
1239 {
1240 /*
1241 * If no change, then it is not interesting.
1242 */
1243 if (mip->mi_lastlowlinkstate == mip->mi_lowlinkstate)
1244 return;
1245
1246 switch (mip->mi_lowlinkstate) {
1247 case LINK_STATE_UP:
1248 if (mip->mi_type->mt_ops.mtops_ops & MTOPS_LINK_DETAILS) {
1249 char det[200];
1250
1251 mip->mi_type->mt_ops.mtops_link_details(det,
1252 sizeof (det), (mac_handle_t)mip, mip->mi_pdata);
1253
1254 cmn_err(CE_NOTE, "!%s link up, %s", mip->mi_name, det);
1255 } else {
1256 cmn_err(CE_NOTE, "!%s link up", mip->mi_name);
1257 }
1258 break;
1259
1260 case LINK_STATE_DOWN:
1261 /*
1262 * Only transitions from UP to DOWN are interesting
1263 */
1264 if (mip->mi_lastlowlinkstate != LINK_STATE_UNKNOWN)
1265 cmn_err(CE_NOTE, "!%s link down", mip->mi_name);
1266 break;
1267
1268 case LINK_STATE_UNKNOWN:
1269 /*
1270 * This case is normally not interesting.
1271 */
1272 break;
1273 }
1274 mip->mi_lastlowlinkstate = mip->mi_lowlinkstate;
1275 }
1276
1277 /*
1278 * Main routine for the callbacks notifications thread
1279 */
1280 static void
i_mac_notify_thread(void * arg)1281 i_mac_notify_thread(void *arg)
1282 {
1283 mac_impl_t *mip = arg;
1284 callb_cpr_t cprinfo;
1285 mac_cb_t *mcb;
1286 mac_cb_info_t *mcbi;
1287 mac_notify_cb_t *mncb;
1288
1289 mcbi = &mip->mi_notify_cb_info;
1290 CALLB_CPR_INIT(&cprinfo, mcbi->mcbi_lockp, callb_generic_cpr,
1291 "i_mac_notify_thread");
1292
1293 mutex_enter(mcbi->mcbi_lockp);
1294
1295 for (;;) {
1296 uint32_t bits;
1297 uint32_t type;
1298
1299 bits = mip->mi_notify_bits;
1300 if (bits == 0) {
1301 CALLB_CPR_SAFE_BEGIN(&cprinfo);
1302 cv_wait(&mcbi->mcbi_cv, mcbi->mcbi_lockp);
1303 CALLB_CPR_SAFE_END(&cprinfo, mcbi->mcbi_lockp);
1304 continue;
1305 }
1306 mip->mi_notify_bits = 0;
1307 if ((bits & (1 << MAC_NNOTE)) != 0) {
1308 /* request to quit */
1309 ASSERT(mip->mi_state_flags & MIS_DISABLED);
1310 break;
1311 }
1312
1313 mutex_exit(mcbi->mcbi_lockp);
1314
1315 /*
1316 * Log link changes on the actual link, but then do reports on
1317 * synthetic state (if part of a bridge).
1318 */
1319 if ((bits & (1 << MAC_NOTE_LOWLINK)) != 0) {
1320 link_state_t newstate;
1321 mac_handle_t mh;
1322
1323 i_mac_log_link_state(mip);
1324 newstate = mip->mi_lowlinkstate;
1325 if (mip->mi_bridge_link != NULL) {
1326 mutex_enter(&mip->mi_bridge_lock);
1327 if ((mh = mip->mi_bridge_link) != NULL) {
1328 newstate = mac_bridge_ls_cb(mh,
1329 newstate);
1330 }
1331 mutex_exit(&mip->mi_bridge_lock);
1332 }
1333 if (newstate != mip->mi_linkstate) {
1334 mip->mi_linkstate = newstate;
1335 bits |= 1 << MAC_NOTE_LINK;
1336 }
1337 }
1338
1339 /*
1340 * Depending on which capabs have changed, the Tx
1341 * checksum flags may also need to be updated.
1342 */
1343 if ((bits & (1 << MAC_NOTE_CAPAB_CHG)) != 0) {
1344 mac_perim_handle_t mph;
1345 mac_handle_t mh = (mac_handle_t)mip;
1346
1347 mac_perim_enter_by_mh(mh, &mph);
1348 mip->mi_tx_cksum_flags = mac_features_to_flags(mh);
1349 mac_perim_exit(mph);
1350 }
1351
1352 /*
1353 * Do notification callbacks for each notification type.
1354 */
1355 for (type = 0; type < MAC_NNOTE; type++) {
1356 if ((bits & (1 << type)) == 0) {
1357 continue;
1358 }
1359
1360 if (mac_notify_cb_list[type] != NULL)
1361 (*mac_notify_cb_list[type])(mip);
1362
1363 /*
1364 * Walk the list of notifications.
1365 */
1366 MAC_CALLBACK_WALKER_INC(&mip->mi_notify_cb_info);
1367 for (mcb = mip->mi_notify_cb_list; mcb != NULL;
1368 mcb = mcb->mcb_nextp) {
1369 mncb = (mac_notify_cb_t *)mcb->mcb_objp;
1370 mncb->mncb_fn(mncb->mncb_arg, type);
1371 }
1372 MAC_CALLBACK_WALKER_DCR(&mip->mi_notify_cb_info,
1373 &mip->mi_notify_cb_list);
1374 }
1375
1376 mutex_enter(mcbi->mcbi_lockp);
1377 }
1378
1379 mip->mi_state_flags |= MIS_NOTIFY_DONE;
1380 cv_broadcast(&mcbi->mcbi_cv);
1381
1382 /* CALLB_CPR_EXIT drops the lock */
1383 CALLB_CPR_EXIT(&cprinfo);
1384 thread_exit();
1385 }
1386
1387 /*
1388 * Signal the i_mac_notify_thread asking it to quit.
1389 * Then wait till it is done.
1390 */
1391 void
i_mac_notify_exit(mac_impl_t * mip)1392 i_mac_notify_exit(mac_impl_t *mip)
1393 {
1394 mac_cb_info_t *mcbi;
1395
1396 mcbi = &mip->mi_notify_cb_info;
1397
1398 mutex_enter(mcbi->mcbi_lockp);
1399 mip->mi_notify_bits = (1 << MAC_NNOTE);
1400 cv_broadcast(&mcbi->mcbi_cv);
1401
1402
1403 while ((mip->mi_notify_thread != NULL) &&
1404 !(mip->mi_state_flags & MIS_NOTIFY_DONE)) {
1405 cv_wait(&mcbi->mcbi_cv, mcbi->mcbi_lockp);
1406 }
1407
1408 /* Necessary clean up before doing kmem_cache_free */
1409 mip->mi_state_flags &= ~MIS_NOTIFY_DONE;
1410 mip->mi_notify_bits = 0;
1411 mip->mi_notify_thread = NULL;
1412 mutex_exit(mcbi->mcbi_lockp);
1413 }
1414
1415 /*
1416 * Entry point invoked by drivers to dynamically add a ring to an
1417 * existing group.
1418 */
1419 int
mac_group_add_ring(mac_group_handle_t gh,int index)1420 mac_group_add_ring(mac_group_handle_t gh, int index)
1421 {
1422 mac_group_t *group = (mac_group_t *)gh;
1423 mac_impl_t *mip = (mac_impl_t *)group->mrg_mh;
1424 int ret;
1425
1426 i_mac_perim_enter(mip);
1427 ret = i_mac_group_add_ring(group, NULL, index);
1428 i_mac_perim_exit(mip);
1429 return (ret);
1430 }
1431
1432 /*
1433 * Entry point invoked by drivers to dynamically remove a ring
1434 * from an existing group. The specified ring handle must no longer
1435 * be used by the driver after a call to this function.
1436 */
1437 void
mac_group_rem_ring(mac_group_handle_t gh,mac_ring_handle_t rh)1438 mac_group_rem_ring(mac_group_handle_t gh, mac_ring_handle_t rh)
1439 {
1440 mac_group_t *group = (mac_group_t *)gh;
1441 mac_impl_t *mip = (mac_impl_t *)group->mrg_mh;
1442
1443 i_mac_perim_enter(mip);
1444 i_mac_group_rem_ring(group, (mac_ring_t *)rh, B_TRUE);
1445 i_mac_perim_exit(mip);
1446 }
1447
1448 /*
1449 * mac_prop_info_*() callbacks called from the driver's prefix_propinfo()
1450 * entry points.
1451 */
1452
1453 void
mac_prop_info_set_default_uint8(mac_prop_info_handle_t ph,uint8_t val)1454 mac_prop_info_set_default_uint8(mac_prop_info_handle_t ph, uint8_t val)
1455 {
1456 mac_prop_info_state_t *pr = (mac_prop_info_state_t *)ph;
1457
1458 /* nothing to do if the caller doesn't want the default value */
1459 if (pr->pr_default == NULL)
1460 return;
1461
1462 ASSERT(pr->pr_default_size >= sizeof (uint8_t));
1463
1464 *(uint8_t *)(pr->pr_default) = val;
1465 pr->pr_flags |= MAC_PROP_INFO_DEFAULT;
1466 }
1467
1468 void
mac_prop_info_set_default_uint64(mac_prop_info_handle_t ph,uint64_t val)1469 mac_prop_info_set_default_uint64(mac_prop_info_handle_t ph, uint64_t val)
1470 {
1471 mac_prop_info_state_t *pr = (mac_prop_info_state_t *)ph;
1472
1473 /* nothing to do if the caller doesn't want the default value */
1474 if (pr->pr_default == NULL)
1475 return;
1476
1477 ASSERT(pr->pr_default_size >= sizeof (uint64_t));
1478
1479 bcopy(&val, pr->pr_default, sizeof (val));
1480
1481 pr->pr_flags |= MAC_PROP_INFO_DEFAULT;
1482 }
1483
1484 void
mac_prop_info_set_default_uint32(mac_prop_info_handle_t ph,uint32_t val)1485 mac_prop_info_set_default_uint32(mac_prop_info_handle_t ph, uint32_t val)
1486 {
1487 mac_prop_info_state_t *pr = (mac_prop_info_state_t *)ph;
1488
1489 /* nothing to do if the caller doesn't want the default value */
1490 if (pr->pr_default == NULL)
1491 return;
1492
1493 ASSERT(pr->pr_default_size >= sizeof (uint32_t));
1494
1495 bcopy(&val, pr->pr_default, sizeof (val));
1496
1497 pr->pr_flags |= MAC_PROP_INFO_DEFAULT;
1498 }
1499
1500 void
mac_prop_info_set_default_str(mac_prop_info_handle_t ph,const char * str)1501 mac_prop_info_set_default_str(mac_prop_info_handle_t ph, const char *str)
1502 {
1503 mac_prop_info_state_t *pr = (mac_prop_info_state_t *)ph;
1504
1505 /* nothing to do if the caller doesn't want the default value */
1506 if (pr->pr_default == NULL)
1507 return;
1508
1509 if (strlen(str) >= pr->pr_default_size)
1510 pr->pr_errno = ENOBUFS;
1511 else
1512 (void) strlcpy(pr->pr_default, str, pr->pr_default_size);
1513 pr->pr_flags |= MAC_PROP_INFO_DEFAULT;
1514 }
1515
1516 void
mac_prop_info_set_default_link_flowctrl(mac_prop_info_handle_t ph,link_flowctrl_t val)1517 mac_prop_info_set_default_link_flowctrl(mac_prop_info_handle_t ph,
1518 link_flowctrl_t val)
1519 {
1520 mac_prop_info_state_t *pr = (mac_prop_info_state_t *)ph;
1521
1522 /* nothing to do if the caller doesn't want the default value */
1523 if (pr->pr_default == NULL)
1524 return;
1525
1526 ASSERT(pr->pr_default_size >= sizeof (link_flowctrl_t));
1527
1528 bcopy(&val, pr->pr_default, sizeof (val));
1529
1530 pr->pr_flags |= MAC_PROP_INFO_DEFAULT;
1531 }
1532
1533 void
mac_prop_info_set_default_fec(mac_prop_info_handle_t ph,link_fec_t val)1534 mac_prop_info_set_default_fec(mac_prop_info_handle_t ph, link_fec_t val)
1535 {
1536 mac_prop_info_state_t *pr = (mac_prop_info_state_t *)ph;
1537
1538 /* nothing to do if the caller doesn't want the default value */
1539 if (pr->pr_default == NULL)
1540 return;
1541
1542 ASSERT(pr->pr_default_size >= sizeof (link_fec_t));
1543
1544 bcopy(&val, pr->pr_default, sizeof (val));
1545
1546 pr->pr_flags |= MAC_PROP_INFO_DEFAULT;
1547 }
1548
1549 void
mac_prop_info_set_range_uint32(mac_prop_info_handle_t ph,uint32_t min,uint32_t max)1550 mac_prop_info_set_range_uint32(mac_prop_info_handle_t ph, uint32_t min,
1551 uint32_t max)
1552 {
1553 mac_prop_info_state_t *pr = (mac_prop_info_state_t *)ph;
1554 mac_propval_range_t *range = pr->pr_range;
1555 mac_propval_uint32_range_t *range32;
1556
1557 /* nothing to do if the caller doesn't want the range info */
1558 if (range == NULL)
1559 return;
1560
1561 if (pr->pr_range_cur_count++ == 0) {
1562 /* first range */
1563 pr->pr_flags |= MAC_PROP_INFO_RANGE;
1564 range->mpr_type = MAC_PROPVAL_UINT32;
1565 } else {
1566 /* all ranges of a property should be of the same type */
1567 ASSERT(range->mpr_type == MAC_PROPVAL_UINT32);
1568 if (pr->pr_range_cur_count > range->mpr_count) {
1569 pr->pr_errno = ENOSPC;
1570 return;
1571 }
1572 }
1573
1574 range32 = range->mpr_range_uint32;
1575 range32[pr->pr_range_cur_count - 1].mpur_min = min;
1576 range32[pr->pr_range_cur_count - 1].mpur_max = max;
1577 }
1578
1579 void
mac_prop_info_set_perm(mac_prop_info_handle_t ph,uint8_t perm)1580 mac_prop_info_set_perm(mac_prop_info_handle_t ph, uint8_t perm)
1581 {
1582 mac_prop_info_state_t *pr = (mac_prop_info_state_t *)ph;
1583
1584 pr->pr_perm = perm;
1585 pr->pr_flags |= MAC_PROP_INFO_PERM;
1586 }
1587
1588 void
mac_hcksum_get(const mblk_t * mp,uint32_t * start,uint32_t * stuff,uint32_t * end,uint32_t * value,uint32_t * flags_ptr)1589 mac_hcksum_get(const mblk_t *mp, uint32_t *start, uint32_t *stuff,
1590 uint32_t *end, uint32_t *value, uint32_t *flags_ptr)
1591 {
1592 uint32_t flags;
1593
1594 ASSERT(DB_TYPE(mp) == M_DATA);
1595
1596 flags = DB_CKSUMFLAGS(mp) & HCK_FLAGS;
1597 if ((flags & (HCK_PARTIALCKSUM | HCK_FULLCKSUM)) != 0) {
1598 if (value != NULL)
1599 *value = (uint32_t)DB_CKSUM16(mp);
1600 if ((flags & HCK_PARTIALCKSUM) != 0) {
1601 if (start != NULL)
1602 *start = (uint32_t)DB_CKSUMSTART(mp);
1603 if (stuff != NULL)
1604 *stuff = (uint32_t)DB_CKSUMSTUFF(mp);
1605 if (end != NULL)
1606 *end = (uint32_t)DB_CKSUMEND(mp);
1607 }
1608 }
1609
1610 if (flags_ptr != NULL)
1611 *flags_ptr = flags;
1612 }
1613
1614 void
mac_hcksum_set(mblk_t * mp,uint32_t start,uint32_t stuff,uint32_t end,uint32_t value,uint32_t flags)1615 mac_hcksum_set(mblk_t *mp, uint32_t start, uint32_t stuff, uint32_t end,
1616 uint32_t value, uint32_t flags)
1617 {
1618 ASSERT(DB_TYPE(mp) == M_DATA);
1619
1620 DB_CKSUMSTART(mp) = (intptr_t)start;
1621 DB_CKSUMSTUFF(mp) = (intptr_t)stuff;
1622 DB_CKSUMEND(mp) = (intptr_t)end;
1623 DB_CKSUMFLAGS(mp) = (uint16_t)flags;
1624 DB_CKSUM16(mp) = (uint16_t)value;
1625 }
1626
1627 void
mac_hcksum_clone(const mblk_t * src,mblk_t * dst)1628 mac_hcksum_clone(const mblk_t *src, mblk_t *dst)
1629 {
1630 ASSERT3U(DB_TYPE(src), ==, M_DATA);
1631 ASSERT3U(DB_TYPE(dst), ==, M_DATA);
1632
1633 /*
1634 * Do these assignments unconditionally, rather than only when
1635 * flags is non-zero. This protects a situation where zeroed
1636 * hcksum data does not make the jump onto an mblk_t with
1637 * stale data in those fields. It's important to copy all
1638 * possible flags (HCK_* as well as HW_*) and not just the
1639 * checksum specific flags. Dropping flags during a clone
1640 * could result in dropped packets. If the caller has good
1641 * reason to drop those flags then it should do it manually,
1642 * after the clone.
1643 */
1644 DB_CKSUMFLAGS(dst) = DB_CKSUMFLAGS(src);
1645 DB_CKSUMSTART(dst) = DB_CKSUMSTART(src);
1646 DB_CKSUMSTUFF(dst) = DB_CKSUMSTUFF(src);
1647 DB_CKSUMEND(dst) = DB_CKSUMEND(src);
1648 DB_CKSUM16(dst) = DB_CKSUM16(src);
1649 DB_LSOMSS(dst) = DB_LSOMSS(src);
1650 }
1651
1652 void
mac_lso_get(mblk_t * mp,uint32_t * mss,uint32_t * flags)1653 mac_lso_get(mblk_t *mp, uint32_t *mss, uint32_t *flags)
1654 {
1655 ASSERT(DB_TYPE(mp) == M_DATA);
1656
1657 if (flags != NULL) {
1658 *flags = DB_CKSUMFLAGS(mp) & HW_LSO;
1659 if ((*flags != 0) && (mss != NULL))
1660 *mss = (uint32_t)DB_LSOMSS(mp);
1661 }
1662 }
1663
1664 void
mac_transceiver_info_set_present(mac_transceiver_info_t * infop,boolean_t present)1665 mac_transceiver_info_set_present(mac_transceiver_info_t *infop,
1666 boolean_t present)
1667 {
1668 infop->mti_present = present;
1669 }
1670
1671 void
mac_transceiver_info_set_usable(mac_transceiver_info_t * infop,boolean_t usable)1672 mac_transceiver_info_set_usable(mac_transceiver_info_t *infop,
1673 boolean_t usable)
1674 {
1675 infop->mti_usable = usable;
1676 }
1677
1678 /*
1679 * We should really keep track of our offset and not walk everything every
1680 * time. I can't imagine that this will be kind to us at high packet rates;
1681 * however, for the moment, let's leave that.
1682 *
1683 * This walks a message block chain without pulling up to fill in the context
1684 * information. Note that the data we care about could be hidden across more
1685 * than one mblk_t.
1686 */
1687 static int
mac_meoi_get_uint8(mblk_t * mp,off_t off,uint8_t * out)1688 mac_meoi_get_uint8(mblk_t *mp, off_t off, uint8_t *out)
1689 {
1690 size_t mpsize;
1691 uint8_t *bp;
1692
1693 mpsize = msgsize(mp);
1694 /* Check for overflow */
1695 if (off + sizeof (uint16_t) > mpsize)
1696 return (-1);
1697
1698 mpsize = MBLKL(mp);
1699 while (off >= mpsize) {
1700 mp = mp->b_cont;
1701 off -= mpsize;
1702 mpsize = MBLKL(mp);
1703 }
1704
1705 bp = mp->b_rptr + off;
1706 *out = *bp;
1707 return (0);
1708
1709 }
1710
1711 static int
mac_meoi_get_uint16(mblk_t * mp,off_t off,uint16_t * out)1712 mac_meoi_get_uint16(mblk_t *mp, off_t off, uint16_t *out)
1713 {
1714 size_t mpsize;
1715 uint8_t *bp;
1716
1717 mpsize = msgsize(mp);
1718 /* Check for overflow */
1719 if (off + sizeof (uint16_t) > mpsize)
1720 return (-1);
1721
1722 mpsize = MBLKL(mp);
1723 while (off >= mpsize) {
1724 mp = mp->b_cont;
1725 off -= mpsize;
1726 mpsize = MBLKL(mp);
1727 }
1728
1729 /*
1730 * Data is in network order. Note the second byte of data might be in
1731 * the next mp.
1732 */
1733 bp = mp->b_rptr + off;
1734 *out = *bp << 8;
1735 if (off + 1 == mpsize) {
1736 mp = mp->b_cont;
1737 bp = mp->b_rptr;
1738 } else {
1739 bp++;
1740 }
1741
1742 *out |= *bp;
1743 return (0);
1744
1745 }
1746
1747
1748 int
mac_ether_offload_info(mblk_t * mp,mac_ether_offload_info_t * meoi)1749 mac_ether_offload_info(mblk_t *mp, mac_ether_offload_info_t *meoi)
1750 {
1751 size_t off;
1752 uint16_t ether;
1753 uint8_t ipproto, iplen, l4len, maclen;
1754
1755 bzero(meoi, sizeof (mac_ether_offload_info_t));
1756
1757 meoi->meoi_len = msgsize(mp);
1758 off = offsetof(struct ether_header, ether_type);
1759 if (mac_meoi_get_uint16(mp, off, ðer) != 0)
1760 return (-1);
1761
1762 if (ether == ETHERTYPE_VLAN) {
1763 off = offsetof(struct ether_vlan_header, ether_type);
1764 if (mac_meoi_get_uint16(mp, off, ðer) != 0)
1765 return (-1);
1766 meoi->meoi_flags |= MEOI_VLAN_TAGGED;
1767 maclen = sizeof (struct ether_vlan_header);
1768 } else {
1769 maclen = sizeof (struct ether_header);
1770 }
1771 meoi->meoi_flags |= MEOI_L2INFO_SET;
1772 meoi->meoi_l2hlen = maclen;
1773 meoi->meoi_l3proto = ether;
1774
1775 switch (ether) {
1776 case ETHERTYPE_IP:
1777 /*
1778 * For IPv4 we need to get the length of the header, as it can
1779 * be variable.
1780 */
1781 off = offsetof(ipha_t, ipha_version_and_hdr_length) + maclen;
1782 if (mac_meoi_get_uint8(mp, off, &iplen) != 0)
1783 return (-1);
1784 iplen &= 0x0f;
1785 if (iplen < 5 || iplen > 0x0f)
1786 return (-1);
1787 iplen *= 4;
1788 off = offsetof(ipha_t, ipha_protocol) + maclen;
1789 if (mac_meoi_get_uint8(mp, off, &ipproto) == -1)
1790 return (-1);
1791 break;
1792 case ETHERTYPE_IPV6:
1793 iplen = 40;
1794 off = offsetof(ip6_t, ip6_nxt) + maclen;
1795 if (mac_meoi_get_uint8(mp, off, &ipproto) == -1)
1796 return (-1);
1797 break;
1798 default:
1799 return (0);
1800 }
1801 meoi->meoi_l3hlen = iplen;
1802 meoi->meoi_l4proto = ipproto;
1803 meoi->meoi_flags |= MEOI_L3INFO_SET;
1804
1805 switch (ipproto) {
1806 case IPPROTO_TCP:
1807 off = offsetof(tcph_t, th_offset_and_rsrvd) + maclen + iplen;
1808 if (mac_meoi_get_uint8(mp, off, &l4len) == -1)
1809 return (-1);
1810 l4len = (l4len & 0xf0) >> 4;
1811 if (l4len < 5 || l4len > 0xf)
1812 return (-1);
1813 l4len *= 4;
1814 break;
1815 case IPPROTO_UDP:
1816 l4len = sizeof (struct udphdr);
1817 break;
1818 case IPPROTO_SCTP:
1819 l4len = sizeof (sctp_hdr_t);
1820 break;
1821 default:
1822 return (0);
1823 }
1824
1825 meoi->meoi_l4hlen = l4len;
1826 meoi->meoi_flags |= MEOI_L4INFO_SET;
1827 return (0);
1828 }
1829