1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. 24 * Copyright 2019 Joyent, Inc. 25 * Copyright 2017 OmniTI Computer Consulting, Inc. All rights reserved. 26 */ 27 28 #include <sys/types.h> 29 #include <sys/conf.h> 30 #include <sys/id_space.h> 31 #include <sys/esunddi.h> 32 #include <sys/stat.h> 33 #include <sys/mkdev.h> 34 #include <sys/stream.h> 35 #include <sys/strsubr.h> 36 #include <sys/dlpi.h> 37 #include <sys/modhash.h> 38 #include <sys/mac.h> 39 #include <sys/mac_provider.h> 40 #include <sys/mac_impl.h> 41 #include <sys/mac_client_impl.h> 42 #include <sys/mac_client_priv.h> 43 #include <sys/mac_soft_ring.h> 44 #include <sys/mac_stat.h> 45 #include <sys/dld.h> 46 #include <sys/modctl.h> 47 #include <sys/fs/dv_node.h> 48 #include <sys/thread.h> 49 #include <sys/proc.h> 50 #include <sys/callb.h> 51 #include <sys/cpuvar.h> 52 #include <sys/atomic.h> 53 #include <sys/sdt.h> 54 #include <sys/mac_flow.h> 55 #include <sys/ddi_intr_impl.h> 56 #include <sys/disp.h> 57 #include <sys/sdt.h> 58 #include <sys/pattr.h> 59 #include <sys/strsun.h> 60 #include <sys/vlan.h> 61 #include <inet/ip.h> 62 #include <inet/tcp.h> 63 #include <netinet/udp.h> 64 #include <netinet/sctp.h> 65 66 /* 67 * MAC Provider Interface. 68 * 69 * Interface for GLDv3 compatible NIC drivers. 70 */ 71 72 static void i_mac_notify_thread(void *); 73 74 typedef void (*mac_notify_default_cb_fn_t)(mac_impl_t *); 75 76 static const mac_notify_default_cb_fn_t mac_notify_cb_list[MAC_NNOTE] = { 77 mac_fanout_recompute, /* MAC_NOTE_LINK */ 78 NULL, /* MAC_NOTE_UNICST */ 79 NULL, /* MAC_NOTE_TX */ 80 NULL, /* MAC_NOTE_DEVPROMISC */ 81 NULL, /* MAC_NOTE_FASTPATH_FLUSH */ 82 NULL, /* MAC_NOTE_SDU_SIZE */ 83 NULL, /* MAC_NOTE_MARGIN */ 84 NULL, /* MAC_NOTE_CAPAB_CHG */ 85 NULL /* MAC_NOTE_LOWLINK */ 86 }; 87 88 /* 89 * Driver support functions. 90 */ 91 92 /* REGISTRATION */ 93 94 mac_register_t * 95 mac_alloc(uint_t mac_version) 96 { 97 mac_register_t *mregp; 98 99 /* 100 * Make sure there isn't a version mismatch between the driver and 101 * the framework. In the future, if multiple versions are 102 * supported, this check could become more sophisticated. 103 */ 104 if (mac_version != MAC_VERSION) 105 return (NULL); 106 107 mregp = kmem_zalloc(sizeof (mac_register_t), KM_SLEEP); 108 mregp->m_version = mac_version; 109 return (mregp); 110 } 111 112 void 113 mac_free(mac_register_t *mregp) 114 { 115 kmem_free(mregp, sizeof (mac_register_t)); 116 } 117 118 /* 119 * Convert a MAC's offload features into the equivalent DB_CKSUMFLAGS 120 * value. 121 */ 122 static uint16_t 123 mac_features_to_flags(mac_handle_t mh) 124 { 125 uint16_t flags = 0; 126 uint32_t cap_sum = 0; 127 mac_capab_lso_t cap_lso; 128 129 if (mac_capab_get(mh, MAC_CAPAB_HCKSUM, &cap_sum)) { 130 if (cap_sum & HCKSUM_IPHDRCKSUM) 131 flags |= HCK_IPV4_HDRCKSUM; 132 133 if (cap_sum & HCKSUM_INET_PARTIAL) 134 flags |= HCK_PARTIALCKSUM; 135 else if (cap_sum & (HCKSUM_INET_FULL_V4 | HCKSUM_INET_FULL_V6)) 136 flags |= HCK_FULLCKSUM; 137 } 138 139 /* 140 * We don't need the information stored in 'cap_lso', but we 141 * need to pass a non-NULL pointer to appease the driver. 142 */ 143 if (mac_capab_get(mh, MAC_CAPAB_LSO, &cap_lso)) 144 flags |= HW_LSO; 145 146 return (flags); 147 } 148 149 /* 150 * mac_register() is how drivers register new MACs with the GLDv3 151 * framework. The mregp argument is allocated by drivers using the 152 * mac_alloc() function, and can be freed using mac_free() immediately upon 153 * return from mac_register(). Upon success (0 return value), the mhp 154 * opaque pointer becomes the driver's handle to its MAC interface, and is 155 * the argument to all other mac module entry points. 156 */ 157 /* ARGSUSED */ 158 int 159 mac_register(mac_register_t *mregp, mac_handle_t *mhp) 160 { 161 mac_impl_t *mip; 162 mactype_t *mtype; 163 int err = EINVAL; 164 struct devnames *dnp = NULL; 165 uint_t instance; 166 boolean_t style1_created = B_FALSE; 167 boolean_t style2_created = B_FALSE; 168 char *driver; 169 minor_t minor = 0; 170 171 /* A successful call to mac_init_ops() sets the DN_GLDV3_DRIVER flag. */ 172 if (!GLDV3_DRV(ddi_driver_major(mregp->m_dip))) 173 return (EINVAL); 174 175 /* Find the required MAC-Type plugin. */ 176 if ((mtype = mactype_getplugin(mregp->m_type_ident)) == NULL) 177 return (EINVAL); 178 179 /* Create a mac_impl_t to represent this MAC. */ 180 mip = kmem_cache_alloc(i_mac_impl_cachep, KM_SLEEP); 181 182 /* 183 * The mac is not ready for open yet. 184 */ 185 mip->mi_state_flags |= MIS_DISABLED; 186 187 /* 188 * When a mac is registered, the m_instance field can be set to: 189 * 190 * 0: Get the mac's instance number from m_dip. 191 * This is usually used for physical device dips. 192 * 193 * [1 .. MAC_MAX_MINOR-1]: Use the value as the mac's instance number. 194 * For example, when an aggregation is created with the key option, 195 * "key" will be used as the instance number. 196 * 197 * -1: Assign an instance number from [MAC_MAX_MINOR .. MAXMIN-1]. 198 * This is often used when a MAC of a virtual link is registered 199 * (e.g., aggregation when "key" is not specified, or vnic). 200 * 201 * Note that the instance number is used to derive the mi_minor field 202 * of mac_impl_t, which will then be used to derive the name of kstats 203 * and the devfs nodes. The first 2 cases are needed to preserve 204 * backward compatibility. 205 */ 206 switch (mregp->m_instance) { 207 case 0: 208 instance = ddi_get_instance(mregp->m_dip); 209 break; 210 case ((uint_t)-1): 211 minor = mac_minor_hold(B_TRUE); 212 if (minor == 0) { 213 err = ENOSPC; 214 goto fail; 215 } 216 instance = minor - 1; 217 break; 218 default: 219 instance = mregp->m_instance; 220 if (instance >= MAC_MAX_MINOR) { 221 err = EINVAL; 222 goto fail; 223 } 224 break; 225 } 226 227 mip->mi_minor = (minor_t)(instance + 1); 228 mip->mi_dip = mregp->m_dip; 229 mip->mi_clients_list = NULL; 230 mip->mi_nclients = 0; 231 232 /* Set the default IEEE Port VLAN Identifier */ 233 mip->mi_pvid = 1; 234 235 /* Default bridge link learning protection values */ 236 mip->mi_llimit = 1000; 237 mip->mi_ldecay = 200; 238 239 driver = (char *)ddi_driver_name(mip->mi_dip); 240 241 /* Construct the MAC name as <drvname><instance> */ 242 (void) snprintf(mip->mi_name, sizeof (mip->mi_name), "%s%d", 243 driver, instance); 244 245 mip->mi_driver = mregp->m_driver; 246 247 mip->mi_type = mtype; 248 mip->mi_margin = mregp->m_margin; 249 mip->mi_info.mi_media = mtype->mt_type; 250 mip->mi_info.mi_nativemedia = mtype->mt_nativetype; 251 if (mregp->m_max_sdu <= mregp->m_min_sdu) 252 goto fail; 253 if (mregp->m_multicast_sdu == 0) 254 mregp->m_multicast_sdu = mregp->m_max_sdu; 255 if (mregp->m_multicast_sdu < mregp->m_min_sdu || 256 mregp->m_multicast_sdu > mregp->m_max_sdu) 257 goto fail; 258 mip->mi_sdu_min = mregp->m_min_sdu; 259 mip->mi_sdu_max = mregp->m_max_sdu; 260 mip->mi_sdu_multicast = mregp->m_multicast_sdu; 261 mip->mi_info.mi_addr_length = mip->mi_type->mt_addr_length; 262 /* 263 * If the media supports a broadcast address, cache a pointer to it 264 * in the mac_info_t so that upper layers can use it. 265 */ 266 mip->mi_info.mi_brdcst_addr = mip->mi_type->mt_brdcst_addr; 267 268 mip->mi_v12n_level = mregp->m_v12n; 269 270 /* 271 * Copy the unicast source address into the mac_info_t, but only if 272 * the MAC-Type defines a non-zero address length. We need to 273 * handle MAC-Types that have an address length of 0 274 * (point-to-point protocol MACs for example). 275 */ 276 if (mip->mi_type->mt_addr_length > 0) { 277 if (mregp->m_src_addr == NULL) 278 goto fail; 279 mip->mi_info.mi_unicst_addr = 280 kmem_alloc(mip->mi_type->mt_addr_length, KM_SLEEP); 281 bcopy(mregp->m_src_addr, mip->mi_info.mi_unicst_addr, 282 mip->mi_type->mt_addr_length); 283 284 /* 285 * Copy the fixed 'factory' MAC address from the immutable 286 * info. This is taken to be the MAC address currently in 287 * use. 288 */ 289 bcopy(mip->mi_info.mi_unicst_addr, mip->mi_addr, 290 mip->mi_type->mt_addr_length); 291 292 /* 293 * At this point, we should set up the classification 294 * rules etc but we delay it till mac_open() so that 295 * the resource discovery has taken place and we 296 * know someone wants to use the device. Otherwise 297 * memory gets allocated for Rx ring structures even 298 * during probe. 299 */ 300 301 /* Copy the destination address if one is provided. */ 302 if (mregp->m_dst_addr != NULL) { 303 bcopy(mregp->m_dst_addr, mip->mi_dstaddr, 304 mip->mi_type->mt_addr_length); 305 mip->mi_dstaddr_set = B_TRUE; 306 } 307 } else if (mregp->m_src_addr != NULL) { 308 goto fail; 309 } 310 311 /* 312 * The format of the m_pdata is specific to the plugin. It is 313 * passed in as an argument to all of the plugin callbacks. The 314 * driver can update this information by calling 315 * mac_pdata_update(). 316 */ 317 if (mip->mi_type->mt_ops.mtops_ops & MTOPS_PDATA_VERIFY) { 318 /* 319 * Verify if the supplied plugin data is valid. Note that 320 * even if the caller passed in a NULL pointer as plugin data, 321 * we still need to verify if that's valid as the plugin may 322 * require plugin data to function. 323 */ 324 if (!mip->mi_type->mt_ops.mtops_pdata_verify(mregp->m_pdata, 325 mregp->m_pdata_size)) { 326 goto fail; 327 } 328 if (mregp->m_pdata != NULL) { 329 mip->mi_pdata = 330 kmem_alloc(mregp->m_pdata_size, KM_SLEEP); 331 bcopy(mregp->m_pdata, mip->mi_pdata, 332 mregp->m_pdata_size); 333 mip->mi_pdata_size = mregp->m_pdata_size; 334 } 335 } else if (mregp->m_pdata != NULL) { 336 /* 337 * The caller supplied non-NULL plugin data, but the plugin 338 * does not recognize plugin data. 339 */ 340 err = EINVAL; 341 goto fail; 342 } 343 344 /* 345 * Register the private properties. 346 */ 347 mac_register_priv_prop(mip, mregp->m_priv_props); 348 349 /* 350 * Stash the driver callbacks into the mac_impl_t, but first sanity 351 * check to make sure all mandatory callbacks are set. 352 */ 353 if (mregp->m_callbacks->mc_getstat == NULL || 354 mregp->m_callbacks->mc_start == NULL || 355 mregp->m_callbacks->mc_stop == NULL || 356 mregp->m_callbacks->mc_setpromisc == NULL || 357 mregp->m_callbacks->mc_multicst == NULL) { 358 goto fail; 359 } 360 mip->mi_callbacks = mregp->m_callbacks; 361 362 if (mac_capab_get((mac_handle_t)mip, MAC_CAPAB_LEGACY, 363 &mip->mi_capab_legacy)) { 364 mip->mi_state_flags |= MIS_LEGACY; 365 mip->mi_phy_dev = mip->mi_capab_legacy.ml_dev; 366 } else { 367 mip->mi_phy_dev = makedevice(ddi_driver_major(mip->mi_dip), 368 mip->mi_minor); 369 } 370 371 /* 372 * Allocate a notification thread. thread_create blocks for memory 373 * if needed, it never fails. 374 */ 375 mip->mi_notify_thread = thread_create(NULL, 0, i_mac_notify_thread, 376 mip, 0, &p0, TS_RUN, minclsyspri); 377 378 /* 379 * Cache the DB_CKSUMFLAGS that this MAC supports. 380 */ 381 mip->mi_tx_cksum_flags = mac_features_to_flags((mac_handle_t)mip); 382 383 /* 384 * Initialize the capabilities 385 */ 386 bzero(&mip->mi_rx_rings_cap, sizeof (mac_capab_rings_t)); 387 bzero(&mip->mi_tx_rings_cap, sizeof (mac_capab_rings_t)); 388 389 if (i_mac_capab_get((mac_handle_t)mip, MAC_CAPAB_VNIC, NULL)) 390 mip->mi_state_flags |= MIS_IS_VNIC; 391 392 if (i_mac_capab_get((mac_handle_t)mip, MAC_CAPAB_AGGR, NULL)) 393 mip->mi_state_flags |= MIS_IS_AGGR; 394 395 mac_addr_factory_init(mip); 396 397 mac_transceiver_init(mip); 398 399 mac_led_init(mip); 400 401 /* 402 * Enforce the virtrualization level registered. 403 */ 404 if (mip->mi_v12n_level & MAC_VIRT_LEVEL1) { 405 if (mac_init_rings(mip, MAC_RING_TYPE_RX) != 0 || 406 mac_init_rings(mip, MAC_RING_TYPE_TX) != 0) 407 goto fail; 408 409 /* 410 * The driver needs to register at least rx rings for this 411 * virtualization level. 412 */ 413 if (mip->mi_rx_groups == NULL) 414 goto fail; 415 } 416 417 /* 418 * The driver must set mc_unicst entry point to NULL when it advertises 419 * CAP_RINGS for rx groups. 420 */ 421 if (mip->mi_rx_groups != NULL) { 422 if (mregp->m_callbacks->mc_unicst != NULL) 423 goto fail; 424 } else { 425 if (mregp->m_callbacks->mc_unicst == NULL) 426 goto fail; 427 } 428 429 /* 430 * Initialize MAC addresses. Must be called after mac_init_rings(). 431 */ 432 mac_init_macaddr(mip); 433 434 mip->mi_share_capab.ms_snum = 0; 435 if (mip->mi_v12n_level & MAC_VIRT_HIO) { 436 (void) mac_capab_get((mac_handle_t)mip, MAC_CAPAB_SHARES, 437 &mip->mi_share_capab); 438 } 439 440 /* 441 * Initialize the kstats for this device. 442 */ 443 mac_driver_stat_create(mip); 444 445 /* Zero out any properties. */ 446 bzero(&mip->mi_resource_props, sizeof (mac_resource_props_t)); 447 448 if (mip->mi_minor <= MAC_MAX_MINOR) { 449 /* Create a style-2 DLPI device */ 450 if (ddi_create_minor_node(mip->mi_dip, driver, S_IFCHR, 0, 451 DDI_NT_NET, CLONE_DEV) != DDI_SUCCESS) 452 goto fail; 453 style2_created = B_TRUE; 454 455 /* Create a style-1 DLPI device */ 456 if (ddi_create_minor_node(mip->mi_dip, mip->mi_name, S_IFCHR, 457 mip->mi_minor, DDI_NT_NET, 0) != DDI_SUCCESS) 458 goto fail; 459 style1_created = B_TRUE; 460 } 461 462 mac_flow_l2tab_create(mip, &mip->mi_flow_tab); 463 464 rw_enter(&i_mac_impl_lock, RW_WRITER); 465 if (mod_hash_insert(i_mac_impl_hash, 466 (mod_hash_key_t)mip->mi_name, (mod_hash_val_t)mip) != 0) { 467 rw_exit(&i_mac_impl_lock); 468 err = EEXIST; 469 goto fail; 470 } 471 472 DTRACE_PROBE2(mac__register, struct devnames *, dnp, 473 (mac_impl_t *), mip); 474 475 /* 476 * Mark the MAC to be ready for open. 477 */ 478 mip->mi_state_flags &= ~MIS_DISABLED; 479 rw_exit(&i_mac_impl_lock); 480 481 atomic_inc_32(&i_mac_impl_count); 482 483 cmn_err(CE_NOTE, "!%s registered", mip->mi_name); 484 *mhp = (mac_handle_t)mip; 485 return (0); 486 487 fail: 488 if (style1_created) 489 ddi_remove_minor_node(mip->mi_dip, mip->mi_name); 490 491 if (style2_created) 492 ddi_remove_minor_node(mip->mi_dip, driver); 493 494 mac_addr_factory_fini(mip); 495 496 /* Clean up registered MAC addresses */ 497 mac_fini_macaddr(mip); 498 499 /* Clean up registered rings */ 500 mac_free_rings(mip, MAC_RING_TYPE_RX); 501 mac_free_rings(mip, MAC_RING_TYPE_TX); 502 503 /* Clean up notification thread */ 504 if (mip->mi_notify_thread != NULL) 505 i_mac_notify_exit(mip); 506 507 if (mip->mi_info.mi_unicst_addr != NULL) { 508 kmem_free(mip->mi_info.mi_unicst_addr, 509 mip->mi_type->mt_addr_length); 510 mip->mi_info.mi_unicst_addr = NULL; 511 } 512 513 mac_driver_stat_delete(mip); 514 515 if (mip->mi_type != NULL) { 516 atomic_dec_32(&mip->mi_type->mt_ref); 517 mip->mi_type = NULL; 518 } 519 520 if (mip->mi_pdata != NULL) { 521 kmem_free(mip->mi_pdata, mip->mi_pdata_size); 522 mip->mi_pdata = NULL; 523 mip->mi_pdata_size = 0; 524 } 525 526 if (minor != 0) { 527 ASSERT(minor > MAC_MAX_MINOR); 528 mac_minor_rele(minor); 529 } 530 531 mip->mi_state_flags = 0; 532 mac_unregister_priv_prop(mip); 533 534 /* 535 * Clear the state before destroying the mac_impl_t 536 */ 537 mip->mi_state_flags = 0; 538 539 kmem_cache_free(i_mac_impl_cachep, mip); 540 return (err); 541 } 542 543 /* 544 * Unregister from the GLDv3 framework 545 */ 546 int 547 mac_unregister(mac_handle_t mh) 548 { 549 int err; 550 mac_impl_t *mip = (mac_impl_t *)mh; 551 mod_hash_val_t val; 552 mac_margin_req_t *mmr, *nextmmr; 553 554 /* Fail the unregister if there are any open references to this mac. */ 555 if ((err = mac_disable_nowait(mh)) != 0) 556 return (err); 557 558 /* 559 * Clean up notification thread and wait for it to exit. 560 */ 561 i_mac_notify_exit(mip); 562 563 /* 564 * Prior to acquiring the MAC perimeter, remove the MAC instance from 565 * the internal hash table. Such removal means table-walkers that 566 * acquire the perimeter will not do so on behalf of what we are 567 * unregistering, which prevents a deadlock. 568 */ 569 rw_enter(&i_mac_impl_lock, RW_WRITER); 570 (void) mod_hash_remove(i_mac_impl_hash, 571 (mod_hash_key_t)mip->mi_name, &val); 572 rw_exit(&i_mac_impl_lock); 573 ASSERT(mip == (mac_impl_t *)val); 574 575 i_mac_perim_enter(mip); 576 577 /* 578 * There is still resource properties configured over this mac. 579 */ 580 if (mip->mi_resource_props.mrp_mask != 0) 581 mac_fastpath_enable((mac_handle_t)mip); 582 583 if (mip->mi_minor < MAC_MAX_MINOR + 1) { 584 ddi_remove_minor_node(mip->mi_dip, mip->mi_name); 585 ddi_remove_minor_node(mip->mi_dip, 586 (char *)ddi_driver_name(mip->mi_dip)); 587 } 588 589 ASSERT(mip->mi_nactiveclients == 0 && !(mip->mi_state_flags & 590 MIS_EXCLUSIVE)); 591 592 mac_driver_stat_delete(mip); 593 594 ASSERT(i_mac_impl_count > 0); 595 atomic_dec_32(&i_mac_impl_count); 596 597 if (mip->mi_pdata != NULL) 598 kmem_free(mip->mi_pdata, mip->mi_pdata_size); 599 mip->mi_pdata = NULL; 600 mip->mi_pdata_size = 0; 601 602 /* 603 * Free the list of margin request. 604 */ 605 for (mmr = mip->mi_mmrp; mmr != NULL; mmr = nextmmr) { 606 nextmmr = mmr->mmr_nextp; 607 kmem_free(mmr, sizeof (mac_margin_req_t)); 608 } 609 mip->mi_mmrp = NULL; 610 611 mip->mi_linkstate = mip->mi_lowlinkstate = LINK_STATE_UNKNOWN; 612 kmem_free(mip->mi_info.mi_unicst_addr, mip->mi_type->mt_addr_length); 613 mip->mi_info.mi_unicst_addr = NULL; 614 615 atomic_dec_32(&mip->mi_type->mt_ref); 616 mip->mi_type = NULL; 617 618 /* 619 * Free the primary MAC address. 620 */ 621 mac_fini_macaddr(mip); 622 623 /* 624 * free all rings 625 */ 626 mac_free_rings(mip, MAC_RING_TYPE_RX); 627 mac_free_rings(mip, MAC_RING_TYPE_TX); 628 629 mac_addr_factory_fini(mip); 630 631 bzero(mip->mi_addr, MAXMACADDRLEN); 632 bzero(mip->mi_dstaddr, MAXMACADDRLEN); 633 mip->mi_dstaddr_set = B_FALSE; 634 635 /* and the flows */ 636 mac_flow_tab_destroy(mip->mi_flow_tab); 637 mip->mi_flow_tab = NULL; 638 639 if (mip->mi_minor > MAC_MAX_MINOR) 640 mac_minor_rele(mip->mi_minor); 641 642 cmn_err(CE_NOTE, "!%s unregistered", mip->mi_name); 643 644 /* 645 * Reset the perim related fields to default values before 646 * kmem_cache_free 647 */ 648 i_mac_perim_exit(mip); 649 mip->mi_state_flags = 0; 650 651 mac_unregister_priv_prop(mip); 652 653 ASSERT(mip->mi_bridge_link == NULL); 654 kmem_cache_free(i_mac_impl_cachep, mip); 655 656 return (0); 657 } 658 659 /* DATA RECEPTION */ 660 661 /* 662 * This function is invoked for packets received by the MAC driver in 663 * interrupt context. The ring generation number provided by the driver 664 * is matched with the ring generation number held in MAC. If they do not 665 * match, received packets are considered stale packets coming from an older 666 * assignment of the ring. Drop them. 667 */ 668 void 669 mac_rx_ring(mac_handle_t mh, mac_ring_handle_t mrh, mblk_t *mp_chain, 670 uint64_t mr_gen_num) 671 { 672 mac_ring_t *mr = (mac_ring_t *)mrh; 673 674 if ((mr != NULL) && (mr->mr_gen_num != mr_gen_num)) { 675 DTRACE_PROBE2(mac__rx__rings__stale__packet, uint64_t, 676 mr->mr_gen_num, uint64_t, mr_gen_num); 677 freemsgchain(mp_chain); 678 return; 679 } 680 mac_rx(mh, (mac_resource_handle_t)mrh, mp_chain); 681 } 682 683 /* 684 * This function is invoked for each packet received by the underlying driver. 685 */ 686 void 687 mac_rx(mac_handle_t mh, mac_resource_handle_t mrh, mblk_t *mp_chain) 688 { 689 mac_impl_t *mip = (mac_impl_t *)mh; 690 691 /* 692 * Check if the link is part of a bridge. If not, then we don't need 693 * to take the lock to remain consistent. Make this common case 694 * lock-free and tail-call optimized. 695 */ 696 if (mip->mi_bridge_link == NULL) { 697 mac_rx_common(mh, mrh, mp_chain); 698 } else { 699 /* 700 * Once we take a reference on the bridge link, the bridge 701 * module itself can't unload, so the callback pointers are 702 * stable. 703 */ 704 mutex_enter(&mip->mi_bridge_lock); 705 if ((mh = mip->mi_bridge_link) != NULL) 706 mac_bridge_ref_cb(mh, B_TRUE); 707 mutex_exit(&mip->mi_bridge_lock); 708 if (mh == NULL) { 709 mac_rx_common((mac_handle_t)mip, mrh, mp_chain); 710 } else { 711 mac_bridge_rx_cb(mh, mrh, mp_chain); 712 mac_bridge_ref_cb(mh, B_FALSE); 713 } 714 } 715 } 716 717 /* 718 * Special case function: this allows snooping of packets transmitted and 719 * received by TRILL. By design, they go directly into the TRILL module. 720 */ 721 void 722 mac_trill_snoop(mac_handle_t mh, mblk_t *mp) 723 { 724 mac_impl_t *mip = (mac_impl_t *)mh; 725 726 if (mip->mi_promisc_list != NULL) 727 mac_promisc_dispatch(mip, mp, NULL, B_FALSE); 728 } 729 730 /* 731 * This is the upward reentry point for packets arriving from the bridging 732 * module and from mac_rx for links not part of a bridge. 733 */ 734 void 735 mac_rx_common(mac_handle_t mh, mac_resource_handle_t mrh, mblk_t *mp_chain) 736 { 737 mac_impl_t *mip = (mac_impl_t *)mh; 738 mac_ring_t *mr = (mac_ring_t *)mrh; 739 mac_soft_ring_set_t *mac_srs; 740 mblk_t *bp = mp_chain; 741 742 /* 743 * If there are any promiscuous mode callbacks defined for 744 * this MAC, pass them a copy if appropriate. 745 */ 746 if (mip->mi_promisc_list != NULL) 747 mac_promisc_dispatch(mip, mp_chain, NULL, B_FALSE); 748 749 if (mr != NULL) { 750 /* 751 * If the SRS teardown has started, just return. The 'mr' 752 * continues to be valid until the driver unregisters the MAC. 753 * Hardware classified packets will not make their way up 754 * beyond this point once the teardown has started. The driver 755 * is never passed a pointer to a flow entry or SRS or any 756 * structure that can be freed much before mac_unregister. 757 */ 758 mutex_enter(&mr->mr_lock); 759 if ((mr->mr_state != MR_INUSE) || (mr->mr_flag & 760 (MR_INCIPIENT | MR_CONDEMNED | MR_QUIESCE))) { 761 mutex_exit(&mr->mr_lock); 762 freemsgchain(mp_chain); 763 return; 764 } 765 766 /* 767 * The ring is in passthru mode; pass the chain up to 768 * the pseudo ring. 769 */ 770 if (mr->mr_classify_type == MAC_PASSTHRU_CLASSIFIER) { 771 MR_REFHOLD_LOCKED(mr); 772 mutex_exit(&mr->mr_lock); 773 mr->mr_pt_fn(mr->mr_pt_arg1, mr->mr_pt_arg2, mp_chain, 774 B_FALSE); 775 MR_REFRELE(mr); 776 return; 777 } 778 779 /* 780 * The passthru callback should only be set when in 781 * MAC_PASSTHRU_CLASSIFIER mode. 782 */ 783 ASSERT3P(mr->mr_pt_fn, ==, NULL); 784 785 /* 786 * We check if an SRS is controlling this ring. 787 * If so, we can directly call the srs_lower_proc 788 * routine otherwise we need to go through mac_rx_classify 789 * to reach the right place. 790 */ 791 if (mr->mr_classify_type == MAC_HW_CLASSIFIER) { 792 MR_REFHOLD_LOCKED(mr); 793 mutex_exit(&mr->mr_lock); 794 ASSERT3P(mr->mr_srs, !=, NULL); 795 mac_srs = mr->mr_srs; 796 797 /* 798 * This is the fast path. All packets received 799 * on this ring are hardware classified and 800 * share the same MAC header info. 801 */ 802 mac_srs->srs_rx.sr_lower_proc(mh, 803 (mac_resource_handle_t)mac_srs, mp_chain, B_FALSE); 804 MR_REFRELE(mr); 805 return; 806 } 807 808 mutex_exit(&mr->mr_lock); 809 /* We'll fall through to software classification */ 810 } else { 811 flow_entry_t *flent; 812 int err; 813 814 rw_enter(&mip->mi_rw_lock, RW_READER); 815 if (mip->mi_single_active_client != NULL) { 816 flent = mip->mi_single_active_client->mci_flent_list; 817 FLOW_TRY_REFHOLD(flent, err); 818 rw_exit(&mip->mi_rw_lock); 819 if (err == 0) { 820 (flent->fe_cb_fn)(flent->fe_cb_arg1, 821 flent->fe_cb_arg2, mp_chain, B_FALSE); 822 FLOW_REFRELE(flent); 823 return; 824 } 825 } else { 826 rw_exit(&mip->mi_rw_lock); 827 } 828 } 829 830 if (!FLOW_TAB_EMPTY(mip->mi_flow_tab)) { 831 if ((bp = mac_rx_flow(mh, mrh, bp)) == NULL) 832 return; 833 } 834 835 freemsgchain(bp); 836 } 837 838 /* DATA TRANSMISSION */ 839 840 /* 841 * A driver's notification to resume transmission, in case of a provider 842 * without TX rings. 843 */ 844 void 845 mac_tx_update(mac_handle_t mh) 846 { 847 mac_tx_ring_update(mh, NULL); 848 } 849 850 /* 851 * A driver's notification to resume transmission on the specified TX ring. 852 */ 853 void 854 mac_tx_ring_update(mac_handle_t mh, mac_ring_handle_t rh) 855 { 856 i_mac_tx_srs_notify((mac_impl_t *)mh, rh); 857 } 858 859 /* LINK STATE */ 860 /* 861 * Notify the MAC layer about a link state change 862 */ 863 void 864 mac_link_update(mac_handle_t mh, link_state_t link) 865 { 866 mac_impl_t *mip = (mac_impl_t *)mh; 867 868 /* 869 * Save the link state. 870 */ 871 mip->mi_lowlinkstate = link; 872 873 /* 874 * Send a MAC_NOTE_LOWLINK notification. This tells the notification 875 * thread to deliver both lower and upper notifications. 876 */ 877 i_mac_notify(mip, MAC_NOTE_LOWLINK); 878 } 879 880 /* 881 * Notify the MAC layer about a link state change due to bridging. 882 */ 883 void 884 mac_link_redo(mac_handle_t mh, link_state_t link) 885 { 886 mac_impl_t *mip = (mac_impl_t *)mh; 887 888 /* 889 * Save the link state. 890 */ 891 mip->mi_linkstate = link; 892 893 /* 894 * Send a MAC_NOTE_LINK notification. Only upper notifications are 895 * made. 896 */ 897 i_mac_notify(mip, MAC_NOTE_LINK); 898 } 899 900 /* MINOR NODE HANDLING */ 901 902 /* 903 * Given a dev_t, return the instance number (PPA) associated with it. 904 * Drivers can use this in their getinfo(9e) implementation to lookup 905 * the instance number (i.e. PPA) of the device, to use as an index to 906 * their own array of soft state structures. 907 * 908 * Returns -1 on error. 909 */ 910 int 911 mac_devt_to_instance(dev_t devt) 912 { 913 return (dld_devt_to_instance(devt)); 914 } 915 916 /* 917 * This function returns the first minor number that is available for 918 * driver private use. All minor numbers smaller than this are 919 * reserved for GLDv3 use. 920 */ 921 minor_t 922 mac_private_minor(void) 923 { 924 return (MAC_PRIVATE_MINOR); 925 } 926 927 /* OTHER CONTROL INFORMATION */ 928 929 /* 930 * A driver notified us that its primary MAC address has changed. 931 */ 932 void 933 mac_unicst_update(mac_handle_t mh, const uint8_t *addr) 934 { 935 mac_impl_t *mip = (mac_impl_t *)mh; 936 937 if (mip->mi_type->mt_addr_length == 0) 938 return; 939 940 i_mac_perim_enter(mip); 941 942 /* 943 * If address changes, freshen the MAC address value and update 944 * all MAC clients that share this MAC address. 945 */ 946 if (bcmp(addr, mip->mi_addr, mip->mi_type->mt_addr_length) != 0) { 947 mac_freshen_macaddr(mac_find_macaddr(mip, mip->mi_addr), 948 (uint8_t *)addr); 949 } 950 951 i_mac_perim_exit(mip); 952 953 /* 954 * Send a MAC_NOTE_UNICST notification. 955 */ 956 i_mac_notify(mip, MAC_NOTE_UNICST); 957 } 958 959 void 960 mac_dst_update(mac_handle_t mh, const uint8_t *addr) 961 { 962 mac_impl_t *mip = (mac_impl_t *)mh; 963 964 if (mip->mi_type->mt_addr_length == 0) 965 return; 966 967 i_mac_perim_enter(mip); 968 bcopy(addr, mip->mi_dstaddr, mip->mi_type->mt_addr_length); 969 i_mac_perim_exit(mip); 970 i_mac_notify(mip, MAC_NOTE_DEST); 971 } 972 973 /* 974 * MAC plugin information changed. 975 */ 976 int 977 mac_pdata_update(mac_handle_t mh, void *mac_pdata, size_t dsize) 978 { 979 mac_impl_t *mip = (mac_impl_t *)mh; 980 981 /* 982 * Verify that the plugin supports MAC plugin data and that the 983 * supplied data is valid. 984 */ 985 if (!(mip->mi_type->mt_ops.mtops_ops & MTOPS_PDATA_VERIFY)) 986 return (EINVAL); 987 if (!mip->mi_type->mt_ops.mtops_pdata_verify(mac_pdata, dsize)) 988 return (EINVAL); 989 990 if (mip->mi_pdata != NULL) 991 kmem_free(mip->mi_pdata, mip->mi_pdata_size); 992 993 mip->mi_pdata = kmem_alloc(dsize, KM_SLEEP); 994 bcopy(mac_pdata, mip->mi_pdata, dsize); 995 mip->mi_pdata_size = dsize; 996 997 /* 998 * Since the MAC plugin data is used to construct MAC headers that 999 * were cached in fast-path headers, we need to flush fast-path 1000 * information for links associated with this mac. 1001 */ 1002 i_mac_notify(mip, MAC_NOTE_FASTPATH_FLUSH); 1003 return (0); 1004 } 1005 1006 /* 1007 * The mac provider or mac frameowrk calls this function when it wants 1008 * to notify upstream consumers that the capabilities have changed and 1009 * that they should modify their own internal state accordingly. 1010 * 1011 * We currently have no regard for the fact that a provider could 1012 * decide to drop capabilities which would invalidate pending traffic. 1013 * For example, if one was to disable the Tx checksum offload while 1014 * TCP/IP traffic was being sent by mac clients relying on that 1015 * feature, then those packets would hit the write with missing or 1016 * partial checksums. A proper solution involves not only providing 1017 * notfication, but also performing client quiescing. That is, a capab 1018 * change should be treated as an atomic transaction that forms a 1019 * barrier between traffic relying on the current capabs and traffic 1020 * relying on the new capabs. In practice, simnet is currently the 1021 * only provider that could hit this, and it's an easily avoidable 1022 * situation (and at worst it should only lead to some dropped 1023 * packets). But if we ever want better on-the-fly capab change to 1024 * actual hardware providers, then we should give this update 1025 * mechanism a proper implementation. 1026 */ 1027 void 1028 mac_capab_update(mac_handle_t mh) 1029 { 1030 /* 1031 * Send a MAC_NOTE_CAPAB_CHG notification to alert upstream 1032 * clients to renegotiate capabilities. 1033 */ 1034 i_mac_notify((mac_impl_t *)mh, MAC_NOTE_CAPAB_CHG); 1035 } 1036 1037 /* 1038 * Used by normal drivers to update the max sdu size. 1039 * We need to handle the case of a smaller mi_sdu_multicast 1040 * since this is called by mac_set_mtu() even for drivers that 1041 * have differing unicast and multicast mtu and we don't want to 1042 * increase the multicast mtu by accident in that case. 1043 */ 1044 int 1045 mac_maxsdu_update(mac_handle_t mh, uint_t sdu_max) 1046 { 1047 mac_impl_t *mip = (mac_impl_t *)mh; 1048 1049 if (sdu_max == 0 || sdu_max < mip->mi_sdu_min) 1050 return (EINVAL); 1051 mip->mi_sdu_max = sdu_max; 1052 if (mip->mi_sdu_multicast > mip->mi_sdu_max) 1053 mip->mi_sdu_multicast = mip->mi_sdu_max; 1054 1055 /* Send a MAC_NOTE_SDU_SIZE notification. */ 1056 i_mac_notify(mip, MAC_NOTE_SDU_SIZE); 1057 return (0); 1058 } 1059 1060 /* 1061 * Version of the above function that is used by drivers that have a different 1062 * max sdu size for multicast/broadcast vs. unicast. 1063 */ 1064 int 1065 mac_maxsdu_update2(mac_handle_t mh, uint_t sdu_max, uint_t sdu_multicast) 1066 { 1067 mac_impl_t *mip = (mac_impl_t *)mh; 1068 1069 if (sdu_max == 0 || sdu_max < mip->mi_sdu_min) 1070 return (EINVAL); 1071 if (sdu_multicast == 0) 1072 sdu_multicast = sdu_max; 1073 if (sdu_multicast > sdu_max || sdu_multicast < mip->mi_sdu_min) 1074 return (EINVAL); 1075 mip->mi_sdu_max = sdu_max; 1076 mip->mi_sdu_multicast = sdu_multicast; 1077 1078 /* Send a MAC_NOTE_SDU_SIZE notification. */ 1079 i_mac_notify(mip, MAC_NOTE_SDU_SIZE); 1080 return (0); 1081 } 1082 1083 static void 1084 mac_ring_intr_retarget(mac_group_t *group, mac_ring_t *ring) 1085 { 1086 mac_client_impl_t *mcip; 1087 flow_entry_t *flent; 1088 mac_soft_ring_set_t *mac_rx_srs; 1089 mac_cpus_t *srs_cpu; 1090 int i; 1091 1092 if (((mcip = MAC_GROUP_ONLY_CLIENT(group)) != NULL) && 1093 (!ring->mr_info.mri_intr.mi_ddi_shared)) { 1094 /* interrupt can be re-targeted */ 1095 ASSERT(group->mrg_state == MAC_GROUP_STATE_RESERVED); 1096 flent = mcip->mci_flent; 1097 if (ring->mr_type == MAC_RING_TYPE_RX) { 1098 for (i = 0; i < flent->fe_rx_srs_cnt; i++) { 1099 mac_rx_srs = flent->fe_rx_srs[i]; 1100 if (mac_rx_srs->srs_ring != ring) 1101 continue; 1102 srs_cpu = &mac_rx_srs->srs_cpu; 1103 mutex_enter(&cpu_lock); 1104 mac_rx_srs_retarget_intr(mac_rx_srs, 1105 srs_cpu->mc_rx_intr_cpu); 1106 mutex_exit(&cpu_lock); 1107 break; 1108 } 1109 } else { 1110 if (flent->fe_tx_srs != NULL) { 1111 mutex_enter(&cpu_lock); 1112 mac_tx_srs_retarget_intr( 1113 flent->fe_tx_srs); 1114 mutex_exit(&cpu_lock); 1115 } 1116 } 1117 } 1118 } 1119 1120 /* 1121 * Clients like aggr create pseudo rings (mac_ring_t) and expose them to 1122 * their clients. There is a 1-1 mapping pseudo ring and the hardware 1123 * ring. ddi interrupt handles are exported from the hardware ring to 1124 * the pseudo ring. Thus when the interrupt handle changes, clients of 1125 * aggr that are using the handle need to use the new handle and 1126 * re-target their interrupts. 1127 */ 1128 static void 1129 mac_pseudo_ring_intr_retarget(mac_impl_t *mip, mac_ring_t *ring, 1130 ddi_intr_handle_t ddh) 1131 { 1132 mac_ring_t *pring; 1133 mac_group_t *pgroup; 1134 mac_impl_t *pmip; 1135 char macname[MAXNAMELEN]; 1136 mac_perim_handle_t p_mph; 1137 uint64_t saved_gen_num; 1138 1139 again: 1140 pring = (mac_ring_t *)ring->mr_prh; 1141 pgroup = (mac_group_t *)pring->mr_gh; 1142 pmip = (mac_impl_t *)pgroup->mrg_mh; 1143 saved_gen_num = ring->mr_gen_num; 1144 (void) strlcpy(macname, pmip->mi_name, MAXNAMELEN); 1145 /* 1146 * We need to enter aggr's perimeter. The locking hierarchy 1147 * dictates that aggr's perimeter should be entered first 1148 * and then the port's perimeter. So drop the port's 1149 * perimeter, enter aggr's and then re-enter port's 1150 * perimeter. 1151 */ 1152 i_mac_perim_exit(mip); 1153 /* 1154 * While we know pmip is the aggr's mip, there is a 1155 * possibility that aggr could have unregistered by 1156 * the time we exit port's perimeter (mip) and 1157 * enter aggr's perimeter (pmip). To avoid that 1158 * scenario, enter aggr's perimeter using its name. 1159 */ 1160 if (mac_perim_enter_by_macname(macname, &p_mph) != 0) 1161 return; 1162 i_mac_perim_enter(mip); 1163 /* 1164 * Check if the ring got assigned to another aggregation before 1165 * be could enter aggr's and the port's perimeter. When a ring 1166 * gets deleted from an aggregation, it calls mac_stop_ring() 1167 * which increments the generation number. So checking 1168 * generation number will be enough. 1169 */ 1170 if (ring->mr_gen_num != saved_gen_num && ring->mr_prh != NULL) { 1171 i_mac_perim_exit(mip); 1172 mac_perim_exit(p_mph); 1173 i_mac_perim_enter(mip); 1174 goto again; 1175 } 1176 1177 /* Check if pseudo ring is still present */ 1178 if (ring->mr_prh != NULL) { 1179 pring->mr_info.mri_intr.mi_ddi_handle = ddh; 1180 pring->mr_info.mri_intr.mi_ddi_shared = 1181 ring->mr_info.mri_intr.mi_ddi_shared; 1182 if (ddh != NULL) 1183 mac_ring_intr_retarget(pgroup, pring); 1184 } 1185 i_mac_perim_exit(mip); 1186 mac_perim_exit(p_mph); 1187 } 1188 /* 1189 * API called by driver to provide new interrupt handle for TX/RX rings. 1190 * This usually happens when IRM (Interrupt Resource Manangement) 1191 * framework either gives the driver more MSI-x interrupts or takes 1192 * away MSI-x interrupts from the driver. 1193 */ 1194 void 1195 mac_ring_intr_set(mac_ring_handle_t mrh, ddi_intr_handle_t ddh) 1196 { 1197 mac_ring_t *ring = (mac_ring_t *)mrh; 1198 mac_group_t *group = (mac_group_t *)ring->mr_gh; 1199 mac_impl_t *mip = (mac_impl_t *)group->mrg_mh; 1200 1201 i_mac_perim_enter(mip); 1202 ring->mr_info.mri_intr.mi_ddi_handle = ddh; 1203 if (ddh == NULL) { 1204 /* Interrupts being reset */ 1205 ring->mr_info.mri_intr.mi_ddi_shared = B_FALSE; 1206 if (ring->mr_prh != NULL) { 1207 mac_pseudo_ring_intr_retarget(mip, ring, ddh); 1208 return; 1209 } 1210 } else { 1211 /* New interrupt handle */ 1212 mac_compare_ddi_handle(mip->mi_rx_groups, 1213 mip->mi_rx_group_count, ring); 1214 if (!ring->mr_info.mri_intr.mi_ddi_shared) { 1215 mac_compare_ddi_handle(mip->mi_tx_groups, 1216 mip->mi_tx_group_count, ring); 1217 } 1218 if (ring->mr_prh != NULL) { 1219 mac_pseudo_ring_intr_retarget(mip, ring, ddh); 1220 return; 1221 } else { 1222 mac_ring_intr_retarget(group, ring); 1223 } 1224 } 1225 i_mac_perim_exit(mip); 1226 } 1227 1228 /* PRIVATE FUNCTIONS, FOR INTERNAL USE ONLY */ 1229 1230 /* 1231 * Updates the mac_impl structure with the current state of the link 1232 */ 1233 static void 1234 i_mac_log_link_state(mac_impl_t *mip) 1235 { 1236 /* 1237 * If no change, then it is not interesting. 1238 */ 1239 if (mip->mi_lastlowlinkstate == mip->mi_lowlinkstate) 1240 return; 1241 1242 switch (mip->mi_lowlinkstate) { 1243 case LINK_STATE_UP: 1244 if (mip->mi_type->mt_ops.mtops_ops & MTOPS_LINK_DETAILS) { 1245 char det[200]; 1246 1247 mip->mi_type->mt_ops.mtops_link_details(det, 1248 sizeof (det), (mac_handle_t)mip, mip->mi_pdata); 1249 1250 cmn_err(CE_NOTE, "!%s link up, %s", mip->mi_name, det); 1251 } else { 1252 cmn_err(CE_NOTE, "!%s link up", mip->mi_name); 1253 } 1254 break; 1255 1256 case LINK_STATE_DOWN: 1257 /* 1258 * Only transitions from UP to DOWN are interesting 1259 */ 1260 if (mip->mi_lastlowlinkstate != LINK_STATE_UNKNOWN) 1261 cmn_err(CE_NOTE, "!%s link down", mip->mi_name); 1262 break; 1263 1264 case LINK_STATE_UNKNOWN: 1265 /* 1266 * This case is normally not interesting. 1267 */ 1268 break; 1269 } 1270 mip->mi_lastlowlinkstate = mip->mi_lowlinkstate; 1271 } 1272 1273 /* 1274 * Main routine for the callbacks notifications thread 1275 */ 1276 static void 1277 i_mac_notify_thread(void *arg) 1278 { 1279 mac_impl_t *mip = arg; 1280 callb_cpr_t cprinfo; 1281 mac_cb_t *mcb; 1282 mac_cb_info_t *mcbi; 1283 mac_notify_cb_t *mncb; 1284 1285 mcbi = &mip->mi_notify_cb_info; 1286 CALLB_CPR_INIT(&cprinfo, mcbi->mcbi_lockp, callb_generic_cpr, 1287 "i_mac_notify_thread"); 1288 1289 mutex_enter(mcbi->mcbi_lockp); 1290 1291 for (;;) { 1292 uint32_t bits; 1293 uint32_t type; 1294 1295 bits = mip->mi_notify_bits; 1296 if (bits == 0) { 1297 CALLB_CPR_SAFE_BEGIN(&cprinfo); 1298 cv_wait(&mcbi->mcbi_cv, mcbi->mcbi_lockp); 1299 CALLB_CPR_SAFE_END(&cprinfo, mcbi->mcbi_lockp); 1300 continue; 1301 } 1302 mip->mi_notify_bits = 0; 1303 if ((bits & (1 << MAC_NNOTE)) != 0) { 1304 /* request to quit */ 1305 ASSERT(mip->mi_state_flags & MIS_DISABLED); 1306 break; 1307 } 1308 1309 mutex_exit(mcbi->mcbi_lockp); 1310 1311 /* 1312 * Log link changes on the actual link, but then do reports on 1313 * synthetic state (if part of a bridge). 1314 */ 1315 if ((bits & (1 << MAC_NOTE_LOWLINK)) != 0) { 1316 link_state_t newstate; 1317 mac_handle_t mh; 1318 1319 i_mac_log_link_state(mip); 1320 newstate = mip->mi_lowlinkstate; 1321 if (mip->mi_bridge_link != NULL) { 1322 mutex_enter(&mip->mi_bridge_lock); 1323 if ((mh = mip->mi_bridge_link) != NULL) { 1324 newstate = mac_bridge_ls_cb(mh, 1325 newstate); 1326 } 1327 mutex_exit(&mip->mi_bridge_lock); 1328 } 1329 if (newstate != mip->mi_linkstate) { 1330 mip->mi_linkstate = newstate; 1331 bits |= 1 << MAC_NOTE_LINK; 1332 } 1333 } 1334 1335 /* 1336 * Depending on which capabs have changed, the Tx 1337 * checksum flags may also need to be updated. 1338 */ 1339 if ((bits & (1 << MAC_NOTE_CAPAB_CHG)) != 0) { 1340 mac_perim_handle_t mph; 1341 mac_handle_t mh = (mac_handle_t)mip; 1342 1343 mac_perim_enter_by_mh(mh, &mph); 1344 mip->mi_tx_cksum_flags = mac_features_to_flags(mh); 1345 mac_perim_exit(mph); 1346 } 1347 1348 /* 1349 * Do notification callbacks for each notification type. 1350 */ 1351 for (type = 0; type < MAC_NNOTE; type++) { 1352 if ((bits & (1 << type)) == 0) { 1353 continue; 1354 } 1355 1356 if (mac_notify_cb_list[type] != NULL) 1357 (*mac_notify_cb_list[type])(mip); 1358 1359 /* 1360 * Walk the list of notifications. 1361 */ 1362 MAC_CALLBACK_WALKER_INC(&mip->mi_notify_cb_info); 1363 for (mcb = mip->mi_notify_cb_list; mcb != NULL; 1364 mcb = mcb->mcb_nextp) { 1365 mncb = (mac_notify_cb_t *)mcb->mcb_objp; 1366 mncb->mncb_fn(mncb->mncb_arg, type); 1367 } 1368 MAC_CALLBACK_WALKER_DCR(&mip->mi_notify_cb_info, 1369 &mip->mi_notify_cb_list); 1370 } 1371 1372 mutex_enter(mcbi->mcbi_lockp); 1373 } 1374 1375 mip->mi_state_flags |= MIS_NOTIFY_DONE; 1376 cv_broadcast(&mcbi->mcbi_cv); 1377 1378 /* CALLB_CPR_EXIT drops the lock */ 1379 CALLB_CPR_EXIT(&cprinfo); 1380 thread_exit(); 1381 } 1382 1383 /* 1384 * Signal the i_mac_notify_thread asking it to quit. 1385 * Then wait till it is done. 1386 */ 1387 void 1388 i_mac_notify_exit(mac_impl_t *mip) 1389 { 1390 mac_cb_info_t *mcbi; 1391 1392 mcbi = &mip->mi_notify_cb_info; 1393 1394 mutex_enter(mcbi->mcbi_lockp); 1395 mip->mi_notify_bits = (1 << MAC_NNOTE); 1396 cv_broadcast(&mcbi->mcbi_cv); 1397 1398 1399 while ((mip->mi_notify_thread != NULL) && 1400 !(mip->mi_state_flags & MIS_NOTIFY_DONE)) { 1401 cv_wait(&mcbi->mcbi_cv, mcbi->mcbi_lockp); 1402 } 1403 1404 /* Necessary clean up before doing kmem_cache_free */ 1405 mip->mi_state_flags &= ~MIS_NOTIFY_DONE; 1406 mip->mi_notify_bits = 0; 1407 mip->mi_notify_thread = NULL; 1408 mutex_exit(mcbi->mcbi_lockp); 1409 } 1410 1411 /* 1412 * Entry point invoked by drivers to dynamically add a ring to an 1413 * existing group. 1414 */ 1415 int 1416 mac_group_add_ring(mac_group_handle_t gh, int index) 1417 { 1418 mac_group_t *group = (mac_group_t *)gh; 1419 mac_impl_t *mip = (mac_impl_t *)group->mrg_mh; 1420 int ret; 1421 1422 i_mac_perim_enter(mip); 1423 ret = i_mac_group_add_ring(group, NULL, index); 1424 i_mac_perim_exit(mip); 1425 return (ret); 1426 } 1427 1428 /* 1429 * Entry point invoked by drivers to dynamically remove a ring 1430 * from an existing group. The specified ring handle must no longer 1431 * be used by the driver after a call to this function. 1432 */ 1433 void 1434 mac_group_rem_ring(mac_group_handle_t gh, mac_ring_handle_t rh) 1435 { 1436 mac_group_t *group = (mac_group_t *)gh; 1437 mac_impl_t *mip = (mac_impl_t *)group->mrg_mh; 1438 1439 i_mac_perim_enter(mip); 1440 i_mac_group_rem_ring(group, (mac_ring_t *)rh, B_TRUE); 1441 i_mac_perim_exit(mip); 1442 } 1443 1444 /* 1445 * mac_prop_info_*() callbacks called from the driver's prefix_propinfo() 1446 * entry points. 1447 */ 1448 1449 void 1450 mac_prop_info_set_default_uint8(mac_prop_info_handle_t ph, uint8_t val) 1451 { 1452 mac_prop_info_state_t *pr = (mac_prop_info_state_t *)ph; 1453 1454 /* nothing to do if the caller doesn't want the default value */ 1455 if (pr->pr_default == NULL) 1456 return; 1457 1458 ASSERT(pr->pr_default_size >= sizeof (uint8_t)); 1459 1460 *(uint8_t *)(pr->pr_default) = val; 1461 pr->pr_flags |= MAC_PROP_INFO_DEFAULT; 1462 } 1463 1464 void 1465 mac_prop_info_set_default_uint64(mac_prop_info_handle_t ph, uint64_t val) 1466 { 1467 mac_prop_info_state_t *pr = (mac_prop_info_state_t *)ph; 1468 1469 /* nothing to do if the caller doesn't want the default value */ 1470 if (pr->pr_default == NULL) 1471 return; 1472 1473 ASSERT(pr->pr_default_size >= sizeof (uint64_t)); 1474 1475 bcopy(&val, pr->pr_default, sizeof (val)); 1476 1477 pr->pr_flags |= MAC_PROP_INFO_DEFAULT; 1478 } 1479 1480 void 1481 mac_prop_info_set_default_uint32(mac_prop_info_handle_t ph, uint32_t val) 1482 { 1483 mac_prop_info_state_t *pr = (mac_prop_info_state_t *)ph; 1484 1485 /* nothing to do if the caller doesn't want the default value */ 1486 if (pr->pr_default == NULL) 1487 return; 1488 1489 ASSERT(pr->pr_default_size >= sizeof (uint32_t)); 1490 1491 bcopy(&val, pr->pr_default, sizeof (val)); 1492 1493 pr->pr_flags |= MAC_PROP_INFO_DEFAULT; 1494 } 1495 1496 void 1497 mac_prop_info_set_default_str(mac_prop_info_handle_t ph, const char *str) 1498 { 1499 mac_prop_info_state_t *pr = (mac_prop_info_state_t *)ph; 1500 1501 /* nothing to do if the caller doesn't want the default value */ 1502 if (pr->pr_default == NULL) 1503 return; 1504 1505 if (strlen(str) >= pr->pr_default_size) 1506 pr->pr_errno = ENOBUFS; 1507 else 1508 (void) strlcpy(pr->pr_default, str, pr->pr_default_size); 1509 pr->pr_flags |= MAC_PROP_INFO_DEFAULT; 1510 } 1511 1512 void 1513 mac_prop_info_set_default_link_flowctrl(mac_prop_info_handle_t ph, 1514 link_flowctrl_t val) 1515 { 1516 mac_prop_info_state_t *pr = (mac_prop_info_state_t *)ph; 1517 1518 /* nothing to do if the caller doesn't want the default value */ 1519 if (pr->pr_default == NULL) 1520 return; 1521 1522 ASSERT(pr->pr_default_size >= sizeof (link_flowctrl_t)); 1523 1524 bcopy(&val, pr->pr_default, sizeof (val)); 1525 1526 pr->pr_flags |= MAC_PROP_INFO_DEFAULT; 1527 } 1528 1529 void 1530 mac_prop_info_set_range_uint32(mac_prop_info_handle_t ph, uint32_t min, 1531 uint32_t max) 1532 { 1533 mac_prop_info_state_t *pr = (mac_prop_info_state_t *)ph; 1534 mac_propval_range_t *range = pr->pr_range; 1535 mac_propval_uint32_range_t *range32; 1536 1537 /* nothing to do if the caller doesn't want the range info */ 1538 if (range == NULL) 1539 return; 1540 1541 if (pr->pr_range_cur_count++ == 0) { 1542 /* first range */ 1543 pr->pr_flags |= MAC_PROP_INFO_RANGE; 1544 range->mpr_type = MAC_PROPVAL_UINT32; 1545 } else { 1546 /* all ranges of a property should be of the same type */ 1547 ASSERT(range->mpr_type == MAC_PROPVAL_UINT32); 1548 if (pr->pr_range_cur_count > range->mpr_count) { 1549 pr->pr_errno = ENOSPC; 1550 return; 1551 } 1552 } 1553 1554 range32 = range->mpr_range_uint32; 1555 range32[pr->pr_range_cur_count - 1].mpur_min = min; 1556 range32[pr->pr_range_cur_count - 1].mpur_max = max; 1557 } 1558 1559 void 1560 mac_prop_info_set_perm(mac_prop_info_handle_t ph, uint8_t perm) 1561 { 1562 mac_prop_info_state_t *pr = (mac_prop_info_state_t *)ph; 1563 1564 pr->pr_perm = perm; 1565 pr->pr_flags |= MAC_PROP_INFO_PERM; 1566 } 1567 1568 void 1569 mac_hcksum_get(const mblk_t *mp, uint32_t *start, uint32_t *stuff, 1570 uint32_t *end, uint32_t *value, uint32_t *flags_ptr) 1571 { 1572 uint32_t flags; 1573 1574 ASSERT(DB_TYPE(mp) == M_DATA); 1575 1576 flags = DB_CKSUMFLAGS(mp) & HCK_FLAGS; 1577 if ((flags & (HCK_PARTIALCKSUM | HCK_FULLCKSUM)) != 0) { 1578 if (value != NULL) 1579 *value = (uint32_t)DB_CKSUM16(mp); 1580 if ((flags & HCK_PARTIALCKSUM) != 0) { 1581 if (start != NULL) 1582 *start = (uint32_t)DB_CKSUMSTART(mp); 1583 if (stuff != NULL) 1584 *stuff = (uint32_t)DB_CKSUMSTUFF(mp); 1585 if (end != NULL) 1586 *end = (uint32_t)DB_CKSUMEND(mp); 1587 } 1588 } 1589 1590 if (flags_ptr != NULL) 1591 *flags_ptr = flags; 1592 } 1593 1594 void 1595 mac_hcksum_set(mblk_t *mp, uint32_t start, uint32_t stuff, uint32_t end, 1596 uint32_t value, uint32_t flags) 1597 { 1598 ASSERT(DB_TYPE(mp) == M_DATA); 1599 1600 DB_CKSUMSTART(mp) = (intptr_t)start; 1601 DB_CKSUMSTUFF(mp) = (intptr_t)stuff; 1602 DB_CKSUMEND(mp) = (intptr_t)end; 1603 DB_CKSUMFLAGS(mp) = (uint16_t)flags; 1604 DB_CKSUM16(mp) = (uint16_t)value; 1605 } 1606 1607 void 1608 mac_hcksum_clone(const mblk_t *src, mblk_t *dst) 1609 { 1610 ASSERT3U(DB_TYPE(src), ==, M_DATA); 1611 ASSERT3U(DB_TYPE(dst), ==, M_DATA); 1612 1613 /* 1614 * Do these assignments unconditionally, rather than only when 1615 * flags is non-zero. This protects a situation where zeroed 1616 * hcksum data does not make the jump onto an mblk_t with 1617 * stale data in those fields. It's important to copy all 1618 * possible flags (HCK_* as well as HW_*) and not just the 1619 * checksum specific flags. Dropping flags during a clone 1620 * could result in dropped packets. If the caller has good 1621 * reason to drop those flags then it should do it manually, 1622 * after the clone. 1623 */ 1624 DB_CKSUMFLAGS(dst) = DB_CKSUMFLAGS(src); 1625 DB_CKSUMSTART(dst) = DB_CKSUMSTART(src); 1626 DB_CKSUMSTUFF(dst) = DB_CKSUMSTUFF(src); 1627 DB_CKSUMEND(dst) = DB_CKSUMEND(src); 1628 DB_CKSUM16(dst) = DB_CKSUM16(src); 1629 DB_LSOMSS(dst) = DB_LSOMSS(src); 1630 } 1631 1632 void 1633 mac_lso_get(mblk_t *mp, uint32_t *mss, uint32_t *flags) 1634 { 1635 ASSERT(DB_TYPE(mp) == M_DATA); 1636 1637 if (flags != NULL) { 1638 *flags = DB_CKSUMFLAGS(mp) & HW_LSO; 1639 if ((*flags != 0) && (mss != NULL)) 1640 *mss = (uint32_t)DB_LSOMSS(mp); 1641 } 1642 } 1643 1644 void 1645 mac_transceiver_info_set_present(mac_transceiver_info_t *infop, 1646 boolean_t present) 1647 { 1648 infop->mti_present = present; 1649 } 1650 1651 void 1652 mac_transceiver_info_set_usable(mac_transceiver_info_t *infop, 1653 boolean_t usable) 1654 { 1655 infop->mti_usable = usable; 1656 } 1657 1658 /* 1659 * We should really keep track of our offset and not walk everything every 1660 * time. I can't imagine that this will be kind to us at high packet rates; 1661 * however, for the moment, let's leave that. 1662 * 1663 * This walks a message block chain without pulling up to fill in the context 1664 * information. Note that the data we care about could be hidden across more 1665 * than one mblk_t. 1666 */ 1667 static int 1668 mac_meoi_get_uint8(mblk_t *mp, off_t off, uint8_t *out) 1669 { 1670 size_t mpsize; 1671 uint8_t *bp; 1672 1673 mpsize = msgsize(mp); 1674 /* Check for overflow */ 1675 if (off + sizeof (uint16_t) > mpsize) 1676 return (-1); 1677 1678 mpsize = MBLKL(mp); 1679 while (off >= mpsize) { 1680 mp = mp->b_cont; 1681 off -= mpsize; 1682 mpsize = MBLKL(mp); 1683 } 1684 1685 bp = mp->b_rptr + off; 1686 *out = *bp; 1687 return (0); 1688 1689 } 1690 1691 static int 1692 mac_meoi_get_uint16(mblk_t *mp, off_t off, uint16_t *out) 1693 { 1694 size_t mpsize; 1695 uint8_t *bp; 1696 1697 mpsize = msgsize(mp); 1698 /* Check for overflow */ 1699 if (off + sizeof (uint16_t) > mpsize) 1700 return (-1); 1701 1702 mpsize = MBLKL(mp); 1703 while (off >= mpsize) { 1704 mp = mp->b_cont; 1705 off -= mpsize; 1706 mpsize = MBLKL(mp); 1707 } 1708 1709 /* 1710 * Data is in network order. Note the second byte of data might be in 1711 * the next mp. 1712 */ 1713 bp = mp->b_rptr + off; 1714 *out = *bp << 8; 1715 if (off + 1 == mpsize) { 1716 mp = mp->b_cont; 1717 bp = mp->b_rptr; 1718 } else { 1719 bp++; 1720 } 1721 1722 *out |= *bp; 1723 return (0); 1724 1725 } 1726 1727 1728 int 1729 mac_ether_offload_info(mblk_t *mp, mac_ether_offload_info_t *meoi) 1730 { 1731 size_t off; 1732 uint16_t ether; 1733 uint8_t ipproto, iplen, l4len, maclen; 1734 1735 bzero(meoi, sizeof (mac_ether_offload_info_t)); 1736 1737 meoi->meoi_len = msgsize(mp); 1738 off = offsetof(struct ether_header, ether_type); 1739 if (mac_meoi_get_uint16(mp, off, ðer) != 0) 1740 return (-1); 1741 1742 if (ether == ETHERTYPE_VLAN) { 1743 off = offsetof(struct ether_vlan_header, ether_type); 1744 if (mac_meoi_get_uint16(mp, off, ðer) != 0) 1745 return (-1); 1746 meoi->meoi_flags |= MEOI_VLAN_TAGGED; 1747 maclen = sizeof (struct ether_vlan_header); 1748 } else { 1749 maclen = sizeof (struct ether_header); 1750 } 1751 meoi->meoi_flags |= MEOI_L2INFO_SET; 1752 meoi->meoi_l2hlen = maclen; 1753 meoi->meoi_l3proto = ether; 1754 1755 switch (ether) { 1756 case ETHERTYPE_IP: 1757 /* 1758 * For IPv4 we need to get the length of the header, as it can 1759 * be variable. 1760 */ 1761 off = offsetof(ipha_t, ipha_version_and_hdr_length) + maclen; 1762 if (mac_meoi_get_uint8(mp, off, &iplen) != 0) 1763 return (-1); 1764 iplen &= 0x0f; 1765 if (iplen < 5 || iplen > 0x0f) 1766 return (-1); 1767 iplen *= 4; 1768 off = offsetof(ipha_t, ipha_protocol) + maclen; 1769 if (mac_meoi_get_uint8(mp, off, &ipproto) == -1) 1770 return (-1); 1771 break; 1772 case ETHERTYPE_IPV6: 1773 iplen = 40; 1774 off = offsetof(ip6_t, ip6_nxt) + maclen; 1775 if (mac_meoi_get_uint8(mp, off, &ipproto) == -1) 1776 return (-1); 1777 break; 1778 default: 1779 return (0); 1780 } 1781 meoi->meoi_l3hlen = iplen; 1782 meoi->meoi_l4proto = ipproto; 1783 meoi->meoi_flags |= MEOI_L3INFO_SET; 1784 1785 switch (ipproto) { 1786 case IPPROTO_TCP: 1787 off = offsetof(tcph_t, th_offset_and_rsrvd) + maclen + iplen; 1788 if (mac_meoi_get_uint8(mp, off, &l4len) == -1) 1789 return (-1); 1790 l4len = (l4len & 0xf0) >> 4; 1791 if (l4len < 5 || l4len > 0xf) 1792 return (-1); 1793 l4len *= 4; 1794 break; 1795 case IPPROTO_UDP: 1796 l4len = sizeof (struct udphdr); 1797 break; 1798 case IPPROTO_SCTP: 1799 l4len = sizeof (sctp_hdr_t); 1800 break; 1801 default: 1802 return (0); 1803 } 1804 1805 meoi->meoi_l4hlen = l4len; 1806 meoi->meoi_flags |= MEOI_L4INFO_SET; 1807 return (0); 1808 } 1809