1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright 2012, Nexenta Systems, Inc. All rights reserved.
24 * Copyright (c) 2018, Joyent, Inc.
25 */
26
27/*
28 * Data-Link Driver
29 */
30#include <sys/sysmacros.h>
31#include <sys/strsubr.h>
32#include <sys/strsun.h>
33#include <sys/vlan.h>
34#include <sys/dld_impl.h>
35#include <sys/mac_client.h>
36#include <sys/mac_client_impl.h>
37#include <sys/mac_client_priv.h>
38
39typedef void proto_reqfunc_t(dld_str_t *, mblk_t *);
40
41static proto_reqfunc_t proto_info_req, proto_attach_req, proto_detach_req,
42    proto_bind_req, proto_unbind_req, proto_promiscon_req, proto_promiscoff_req,
43    proto_enabmulti_req, proto_disabmulti_req, proto_physaddr_req,
44    proto_setphysaddr_req, proto_udqos_req, proto_req, proto_capability_req,
45    proto_notify_req, proto_passive_req;
46
47static void proto_capability_advertise(dld_str_t *, mblk_t *);
48static int dld_capab_poll_disable(dld_str_t *, dld_capab_poll_t *);
49static boolean_t check_mod_above(queue_t *, const char *);
50
51#define	DL_ACK_PENDING(state) \
52	((state) == DL_ATTACH_PENDING || \
53	(state) == DL_DETACH_PENDING || \
54	(state) == DL_BIND_PENDING || \
55	(state) == DL_UNBIND_PENDING)
56
57/*
58 * Process a DLPI protocol message.
59 * The primitives DL_BIND_REQ, DL_ENABMULTI_REQ, DL_PROMISCON_REQ,
60 * DL_SET_PHYS_ADDR_REQ put the data link below our dld_str_t into an
61 * 'active' state. The primitive DL_PASSIVE_REQ marks our dld_str_t
62 * as 'passive' and forbids it from being subsequently made 'active'
63 * by the above primitives.
64 */
65void
66dld_proto(dld_str_t *dsp, mblk_t *mp)
67{
68	t_uscalar_t		prim;
69
70	if (MBLKL(mp) < sizeof (t_uscalar_t)) {
71		freemsg(mp);
72		return;
73	}
74	prim = ((union DL_primitives *)mp->b_rptr)->dl_primitive;
75
76	switch (prim) {
77	case DL_INFO_REQ:
78		proto_info_req(dsp, mp);
79		break;
80	case DL_BIND_REQ:
81		proto_bind_req(dsp, mp);
82		break;
83	case DL_UNBIND_REQ:
84		proto_unbind_req(dsp, mp);
85		break;
86	case DL_UNITDATA_REQ:
87		proto_unitdata_req(dsp, mp);
88		break;
89	case DL_UDQOS_REQ:
90		proto_udqos_req(dsp, mp);
91		break;
92	case DL_ATTACH_REQ:
93		proto_attach_req(dsp, mp);
94		break;
95	case DL_DETACH_REQ:
96		proto_detach_req(dsp, mp);
97		break;
98	case DL_ENABMULTI_REQ:
99		proto_enabmulti_req(dsp, mp);
100		break;
101	case DL_DISABMULTI_REQ:
102		proto_disabmulti_req(dsp, mp);
103		break;
104	case DL_PROMISCON_REQ:
105		proto_promiscon_req(dsp, mp);
106		break;
107	case DL_PROMISCOFF_REQ:
108		proto_promiscoff_req(dsp, mp);
109		break;
110	case DL_PHYS_ADDR_REQ:
111		proto_physaddr_req(dsp, mp);
112		break;
113	case DL_SET_PHYS_ADDR_REQ:
114		proto_setphysaddr_req(dsp, mp);
115		break;
116	case DL_NOTIFY_REQ:
117		proto_notify_req(dsp, mp);
118		break;
119	case DL_CAPABILITY_REQ:
120		proto_capability_req(dsp, mp);
121		break;
122	case DL_PASSIVE_REQ:
123		proto_passive_req(dsp, mp);
124		break;
125	default:
126		proto_req(dsp, mp);
127		break;
128	}
129}
130
131#define	NEG(x)	-(x)
132typedef struct dl_info_ack_wrapper {
133	dl_info_ack_t		dl_info;
134	uint8_t			dl_addr[MAXMACADDRLEN + sizeof (uint16_t)];
135	uint8_t			dl_brdcst_addr[MAXMACADDRLEN];
136	dl_qos_cl_range1_t	dl_qos_range1;
137	dl_qos_cl_sel1_t	dl_qos_sel1;
138} dl_info_ack_wrapper_t;
139
140/*
141 * DL_INFO_REQ
142 */
143static void
144proto_info_req(dld_str_t *dsp, mblk_t *mp)
145{
146	dl_info_ack_wrapper_t	*dlwp;
147	dl_info_ack_t		*dlp;
148	dl_qos_cl_sel1_t	*selp;
149	dl_qos_cl_range1_t	*rangep;
150	uint8_t			*addr;
151	uint8_t			*brdcst_addr;
152	uint_t			addr_length;
153	uint_t			sap_length;
154	mac_info_t		minfo;
155	mac_info_t		*minfop;
156	queue_t			*q = dsp->ds_wq;
157
158	/*
159	 * Swap the request message for one large enough to contain the
160	 * wrapper structure defined above.
161	 */
162	if ((mp = mexchange(q, mp, sizeof (dl_info_ack_wrapper_t),
163	    M_PCPROTO, 0)) == NULL)
164		return;
165
166	bzero(mp->b_rptr, sizeof (dl_info_ack_wrapper_t));
167	dlwp = (dl_info_ack_wrapper_t *)mp->b_rptr;
168
169	dlp = &(dlwp->dl_info);
170	ASSERT(dlp == (dl_info_ack_t *)mp->b_rptr);
171
172	dlp->dl_primitive = DL_INFO_ACK;
173
174	/*
175	 * Set up the sub-structure pointers.
176	 */
177	addr = dlwp->dl_addr;
178	brdcst_addr = dlwp->dl_brdcst_addr;
179	rangep = &(dlwp->dl_qos_range1);
180	selp = &(dlwp->dl_qos_sel1);
181
182	/*
183	 * This driver supports only version 2 connectionless DLPI provider
184	 * nodes.
185	 */
186	dlp->dl_service_mode = DL_CLDLS;
187	dlp->dl_version = DL_VERSION_2;
188
189	/*
190	 * Set the style of the provider
191	 */
192	dlp->dl_provider_style = dsp->ds_style;
193	ASSERT(dlp->dl_provider_style == DL_STYLE1 ||
194	    dlp->dl_provider_style == DL_STYLE2);
195
196	/*
197	 * Set the current DLPI state.
198	 */
199	dlp->dl_current_state = dsp->ds_dlstate;
200
201	/*
202	 * Gratuitously set the media type. This is to deal with modules
203	 * that assume the media type is known prior to DL_ATTACH_REQ
204	 * being completed.
205	 */
206	dlp->dl_mac_type = DL_ETHER;
207
208	/*
209	 * If the stream is not at least attached we try to retrieve the
210	 * mac_info using mac_info_get()
211	 */
212	if (dsp->ds_dlstate == DL_UNATTACHED ||
213	    dsp->ds_dlstate == DL_ATTACH_PENDING ||
214	    dsp->ds_dlstate == DL_DETACH_PENDING) {
215		if (!mac_info_get(ddi_major_to_name(dsp->ds_major), &minfo)) {
216			/*
217			 * Cannot find mac_info. giving up.
218			 */
219			goto done;
220		}
221		minfop = &minfo;
222	} else {
223		minfop = (mac_info_t *)dsp->ds_mip;
224		/* We can only get the sdu if we're attached. */
225		mac_sdu_get(dsp->ds_mh, &dlp->dl_min_sdu, &dlp->dl_max_sdu);
226	}
227
228	/*
229	 * Set the media type (properly this time).
230	 */
231	if (dsp->ds_native)
232		dlp->dl_mac_type = minfop->mi_nativemedia;
233	else
234		dlp->dl_mac_type = minfop->mi_media;
235
236	/*
237	 * Set the DLSAP length. We only support 16 bit values and they
238	 * appear after the MAC address portion of DLSAP addresses.
239	 */
240	sap_length = sizeof (uint16_t);
241	dlp->dl_sap_length = NEG(sap_length);
242
243	addr_length = minfop->mi_addr_length;
244
245	/*
246	 * Copy in the media broadcast address.
247	 */
248	if (minfop->mi_brdcst_addr != NULL) {
249		dlp->dl_brdcst_addr_offset =
250		    (uintptr_t)brdcst_addr - (uintptr_t)dlp;
251		bcopy(minfop->mi_brdcst_addr, brdcst_addr, addr_length);
252		dlp->dl_brdcst_addr_length = addr_length;
253	}
254
255	/* Only VLAN links and links that have a normal tag mode support QOS. */
256	if ((dsp->ds_mch != NULL &&
257	    mac_client_vid(dsp->ds_mch) != VLAN_ID_NONE) ||
258	    (dsp->ds_dlp != NULL &&
259	    dsp->ds_dlp->dl_tagmode == LINK_TAGMODE_NORMAL)) {
260		dlp->dl_qos_range_offset = (uintptr_t)rangep - (uintptr_t)dlp;
261		dlp->dl_qos_range_length = sizeof (dl_qos_cl_range1_t);
262
263		rangep->dl_qos_type = DL_QOS_CL_RANGE1;
264		rangep->dl_trans_delay.dl_target_value = DL_UNKNOWN;
265		rangep->dl_trans_delay.dl_accept_value = DL_UNKNOWN;
266		rangep->dl_protection.dl_min = DL_UNKNOWN;
267		rangep->dl_protection.dl_max = DL_UNKNOWN;
268		rangep->dl_residual_error = DL_UNKNOWN;
269
270		/*
271		 * Specify the supported range of priorities.
272		 */
273		rangep->dl_priority.dl_min = 0;
274		rangep->dl_priority.dl_max = (1 << VLAN_PRI_SIZE) - 1;
275
276		dlp->dl_qos_offset = (uintptr_t)selp - (uintptr_t)dlp;
277		dlp->dl_qos_length = sizeof (dl_qos_cl_sel1_t);
278
279		selp->dl_qos_type = DL_QOS_CL_SEL1;
280		selp->dl_trans_delay = DL_UNKNOWN;
281		selp->dl_protection = DL_UNKNOWN;
282		selp->dl_residual_error = DL_UNKNOWN;
283
284		/*
285		 * Specify the current priority (which can be changed by
286		 * the DL_UDQOS_REQ primitive).
287		 */
288		selp->dl_priority = dsp->ds_pri;
289	}
290
291	dlp->dl_addr_length = addr_length + sizeof (uint16_t);
292	if (dsp->ds_dlstate == DL_IDLE) {
293		/*
294		 * The stream is bound. Therefore we can formulate a valid
295		 * DLSAP address.
296		 */
297		dlp->dl_addr_offset = (uintptr_t)addr - (uintptr_t)dlp;
298		if (addr_length > 0)
299			mac_unicast_primary_get(dsp->ds_mh, addr);
300
301		*(uint16_t *)(addr + addr_length) = dsp->ds_sap;
302	}
303
304done:
305	IMPLY(dlp->dl_qos_offset != 0, dlp->dl_qos_length != 0);
306	IMPLY(dlp->dl_qos_range_offset != 0,
307	    dlp->dl_qos_range_length != 0);
308	IMPLY(dlp->dl_addr_offset != 0, dlp->dl_addr_length != 0);
309	IMPLY(dlp->dl_brdcst_addr_offset != 0,
310	    dlp->dl_brdcst_addr_length != 0);
311
312	qreply(q, mp);
313}
314
315/*
316 * DL_ATTACH_REQ
317 */
318static void
319proto_attach_req(dld_str_t *dsp, mblk_t *mp)
320{
321	dl_attach_req_t	*dlp = (dl_attach_req_t *)mp->b_rptr;
322	int		err = 0;
323	t_uscalar_t	dl_err;
324	queue_t		*q = dsp->ds_wq;
325
326	if (MBLKL(mp) < sizeof (dl_attach_req_t) ||
327	    dlp->dl_ppa < 0 || dsp->ds_style == DL_STYLE1) {
328		dl_err = DL_BADPRIM;
329		goto failed;
330	}
331
332	if (dsp->ds_dlstate != DL_UNATTACHED) {
333		dl_err = DL_OUTSTATE;
334		goto failed;
335	}
336
337	dsp->ds_dlstate = DL_ATTACH_PENDING;
338
339	err = dld_str_attach(dsp, dlp->dl_ppa);
340	if (err != 0) {
341		switch (err) {
342		case ENOENT:
343			dl_err = DL_BADPPA;
344			err = 0;
345			break;
346		default:
347			dl_err = DL_SYSERR;
348			break;
349		}
350		dsp->ds_dlstate = DL_UNATTACHED;
351		goto failed;
352	}
353	ASSERT(dsp->ds_dlstate == DL_UNBOUND);
354	dlokack(q, mp, DL_ATTACH_REQ);
355	return;
356
357failed:
358	dlerrorack(q, mp, DL_ATTACH_REQ, dl_err, (t_uscalar_t)err);
359}
360
361/*
362 * DL_DETACH_REQ
363 */
364static void
365proto_detach_req(dld_str_t *dsp, mblk_t *mp)
366{
367	queue_t		*q = dsp->ds_wq;
368	t_uscalar_t	dl_err;
369
370	if (MBLKL(mp) < sizeof (dl_detach_req_t)) {
371		dl_err = DL_BADPRIM;
372		goto failed;
373	}
374
375	if (dsp->ds_dlstate != DL_UNBOUND) {
376		dl_err = DL_OUTSTATE;
377		goto failed;
378	}
379
380	if (dsp->ds_style == DL_STYLE1) {
381		dl_err = DL_BADPRIM;
382		goto failed;
383	}
384
385	ASSERT(dsp->ds_datathr_cnt == 0);
386	dsp->ds_dlstate = DL_DETACH_PENDING;
387
388	dld_str_detach(dsp);
389	dlokack(dsp->ds_wq, mp, DL_DETACH_REQ);
390	return;
391
392failed:
393	dlerrorack(q, mp, DL_DETACH_REQ, dl_err, 0);
394}
395
396/*
397 * DL_BIND_REQ
398 */
399static void
400proto_bind_req(dld_str_t *dsp, mblk_t *mp)
401{
402	dl_bind_req_t	*dlp = (dl_bind_req_t *)mp->b_rptr;
403	int		err = 0;
404	uint8_t		dlsap_addr[MAXMACADDRLEN + sizeof (uint16_t)];
405	uint_t		dlsap_addr_length;
406	t_uscalar_t	dl_err;
407	t_scalar_t	sap;
408	queue_t		*q = dsp->ds_wq;
409	mac_perim_handle_t	mph;
410	void		*mdip;
411	int32_t		intr_cpu;
412
413	if (MBLKL(mp) < sizeof (dl_bind_req_t)) {
414		dl_err = DL_BADPRIM;
415		goto failed;
416	}
417
418	if (dlp->dl_xidtest_flg != 0) {
419		dl_err = DL_NOAUTO;
420		goto failed;
421	}
422
423	if (dlp->dl_service_mode != DL_CLDLS) {
424		dl_err = DL_UNSUPPORTED;
425		goto failed;
426	}
427
428	if (dsp->ds_dlstate != DL_UNBOUND) {
429		dl_err = DL_OUTSTATE;
430		goto failed;
431	}
432
433	mac_perim_enter_by_mh(dsp->ds_mh, &mph);
434
435	if ((err = dls_active_set(dsp)) != 0) {
436		dl_err = DL_SYSERR;
437		goto failed2;
438	}
439
440	dsp->ds_dlstate = DL_BIND_PENDING;
441	/*
442	 * Set the receive callback.
443	 */
444	dls_rx_set(dsp, (dsp->ds_mode == DLD_RAW) ?
445	    dld_str_rx_raw : dld_str_rx_unitdata, dsp);
446
447	/*
448	 * Bind the channel such that it can receive packets.
449	 */
450	sap = dlp->dl_sap;
451	dsp->ds_nonip = !check_mod_above(dsp->ds_rq, "ip") &&
452	    !check_mod_above(dsp->ds_rq, "arp");
453
454	err = dls_bind(dsp, sap);
455	if (err != 0) {
456		switch (err) {
457		case EINVAL:
458			dl_err = DL_BADADDR;
459			err = 0;
460			break;
461		default:
462			dl_err = DL_SYSERR;
463			break;
464		}
465
466		dsp->ds_dlstate = DL_UNBOUND;
467		dls_active_clear(dsp, B_FALSE);
468		goto failed2;
469	}
470
471	intr_cpu = mac_client_intr_cpu(dsp->ds_mch);
472	mdip = mac_get_devinfo(dsp->ds_mh);
473	mac_perim_exit(mph);
474
475	/*
476	 * We do this after we get out of the perim to avoid deadlocks
477	 * etc. since part of mac_client_retarget_intr is to walk the
478	 * device tree in order to find and retarget the interrupts.
479	 */
480	if (intr_cpu != -1)
481		mac_client_set_intr_cpu(mdip, dsp->ds_mch, intr_cpu);
482
483	/*
484	 * Copy in MAC address.
485	 */
486	dlsap_addr_length = dsp->ds_mip->mi_addr_length;
487	mac_unicast_primary_get(dsp->ds_mh, dlsap_addr);
488
489	/*
490	 * Copy in the SAP.
491	 */
492	*(uint16_t *)(dlsap_addr + dlsap_addr_length) = sap;
493	dlsap_addr_length += sizeof (uint16_t);
494
495	dsp->ds_dlstate = DL_IDLE;
496	dlbindack(q, mp, sap, dlsap_addr, dlsap_addr_length, 0, 0);
497	return;
498
499failed2:
500	mac_perim_exit(mph);
501failed:
502	dlerrorack(q, mp, DL_BIND_REQ, dl_err, (t_uscalar_t)err);
503}
504
505/*
506 * DL_UNBIND_REQ
507 */
508static void
509proto_unbind_req(dld_str_t *dsp, mblk_t *mp)
510{
511	queue_t		*q = dsp->ds_wq;
512	t_uscalar_t	dl_err;
513	mac_perim_handle_t	mph;
514
515	if (MBLKL(mp) < sizeof (dl_unbind_req_t)) {
516		dl_err = DL_BADPRIM;
517		goto failed;
518	}
519
520	if (dsp->ds_dlstate != DL_IDLE) {
521		dl_err = DL_OUTSTATE;
522		goto failed;
523	}
524
525	mutex_enter(&dsp->ds_lock);
526	while (dsp->ds_datathr_cnt != 0)
527		cv_wait(&dsp->ds_datathr_cv, &dsp->ds_lock);
528
529	dsp->ds_dlstate = DL_UNBIND_PENDING;
530	mutex_exit(&dsp->ds_lock);
531
532	mac_perim_enter_by_mh(dsp->ds_mh, &mph);
533	/*
534	 * Unbind the channel to stop packets being received.
535	 */
536	dls_unbind(dsp);
537
538	/*
539	 * Disable polling mode, if it is enabled.
540	 */
541	(void) dld_capab_poll_disable(dsp, NULL);
542
543	/*
544	 * Clear LSO flags.
545	 */
546	dsp->ds_lso = B_FALSE;
547	dsp->ds_lso_max = 0;
548
549	/*
550	 * Clear the receive callback.
551	 */
552	dls_rx_set(dsp, NULL, NULL);
553	dsp->ds_direct = B_FALSE;
554
555	/*
556	 * Set the mode back to the default (unitdata).
557	 */
558	dsp->ds_mode = DLD_UNITDATA;
559	dsp->ds_dlstate = DL_UNBOUND;
560
561	dls_active_clear(dsp, B_FALSE);
562	mac_perim_exit(mph);
563	dlokack(dsp->ds_wq, mp, DL_UNBIND_REQ);
564	return;
565failed:
566	dlerrorack(q, mp, DL_UNBIND_REQ, dl_err, 0);
567}
568
569/*
570 * DL_PROMISCON_REQ
571 */
572static void
573proto_promiscon_req(dld_str_t *dsp, mblk_t *mp)
574{
575	dl_promiscon_req_t *dlp = (dl_promiscon_req_t *)mp->b_rptr;
576	int		err = 0;
577	t_uscalar_t	dl_err;
578	uint32_t	new_flags, promisc_saved;
579	queue_t		*q = dsp->ds_wq;
580	mac_perim_handle_t	mph;
581
582	if (MBLKL(mp) < sizeof (dl_promiscon_req_t)) {
583		dl_err = DL_BADPRIM;
584		goto failed;
585	}
586
587	if (dsp->ds_dlstate == DL_UNATTACHED ||
588	    DL_ACK_PENDING(dsp->ds_dlstate)) {
589		dl_err = DL_OUTSTATE;
590		goto failed;
591	}
592
593	mac_perim_enter_by_mh(dsp->ds_mh, &mph);
594
595	new_flags = promisc_saved = dsp->ds_promisc;
596	switch (dlp->dl_level) {
597	case DL_PROMISC_SAP:
598		new_flags |= DLS_PROMISC_SAP;
599		break;
600
601	case DL_PROMISC_MULTI:
602		new_flags |= DLS_PROMISC_MULTI;
603		break;
604
605	case DL_PROMISC_PHYS:
606		new_flags |= DLS_PROMISC_PHYS;
607		break;
608
609	default:
610		dl_err = DL_NOTSUPPORTED;
611		goto failed2;
612	}
613
614	if ((promisc_saved == 0) && (err = dls_active_set(dsp)) != 0) {
615		ASSERT(dsp->ds_promisc == promisc_saved);
616		dl_err = DL_SYSERR;
617		goto failed2;
618	}
619
620	/*
621	 * Adjust channel promiscuity.
622	 */
623	err = dls_promisc(dsp, new_flags);
624
625	if (err != 0) {
626		dl_err = DL_SYSERR;
627		dsp->ds_promisc = promisc_saved;
628		if (promisc_saved == 0)
629			dls_active_clear(dsp, B_FALSE);
630		goto failed2;
631	}
632
633	mac_perim_exit(mph);
634
635	dlokack(q, mp, DL_PROMISCON_REQ);
636	return;
637
638failed2:
639	mac_perim_exit(mph);
640failed:
641	dlerrorack(q, mp, DL_PROMISCON_REQ, dl_err, (t_uscalar_t)err);
642}
643
644/*
645 * DL_PROMISCOFF_REQ
646 */
647static void
648proto_promiscoff_req(dld_str_t *dsp, mblk_t *mp)
649{
650	dl_promiscoff_req_t *dlp = (dl_promiscoff_req_t *)mp->b_rptr;
651	int		err = 0;
652	t_uscalar_t	dl_err;
653	uint32_t	new_flags;
654	queue_t		*q = dsp->ds_wq;
655	mac_perim_handle_t	mph;
656
657	if (MBLKL(mp) < sizeof (dl_promiscoff_req_t)) {
658		dl_err = DL_BADPRIM;
659		goto failed;
660	}
661
662	if (dsp->ds_dlstate == DL_UNATTACHED ||
663	    DL_ACK_PENDING(dsp->ds_dlstate)) {
664		dl_err = DL_OUTSTATE;
665		goto failed;
666	}
667
668	mac_perim_enter_by_mh(dsp->ds_mh, &mph);
669
670	new_flags = dsp->ds_promisc;
671	switch (dlp->dl_level) {
672	case DL_PROMISC_SAP:
673		if (!(dsp->ds_promisc & DLS_PROMISC_SAP)) {
674			dl_err = DL_NOTENAB;
675			goto failed2;
676		}
677		new_flags &= ~DLS_PROMISC_SAP;
678		break;
679
680	case DL_PROMISC_MULTI:
681		if (!(dsp->ds_promisc & DLS_PROMISC_MULTI)) {
682			dl_err = DL_NOTENAB;
683			goto failed2;
684		}
685		new_flags &= ~DLS_PROMISC_MULTI;
686		break;
687
688	case DL_PROMISC_PHYS:
689		if (!(dsp->ds_promisc & DLS_PROMISC_PHYS)) {
690			dl_err = DL_NOTENAB;
691			goto failed2;
692		}
693		new_flags &= ~DLS_PROMISC_PHYS;
694		break;
695
696	default:
697		dl_err = DL_NOTSUPPORTED;
698		goto failed2;
699	}
700
701	/*
702	 * Adjust channel promiscuity.
703	 */
704	err = dls_promisc(dsp, new_flags);
705
706	if (err != 0) {
707		dl_err = DL_SYSERR;
708		goto failed2;
709	}
710
711	ASSERT(dsp->ds_promisc == new_flags);
712	if (dsp->ds_promisc == 0)
713		dls_active_clear(dsp, B_FALSE);
714
715	mac_perim_exit(mph);
716
717	dlokack(q, mp, DL_PROMISCOFF_REQ);
718	return;
719failed2:
720	mac_perim_exit(mph);
721failed:
722	dlerrorack(q, mp, DL_PROMISCOFF_REQ, dl_err, (t_uscalar_t)err);
723}
724
725/*
726 * DL_ENABMULTI_REQ
727 */
728static void
729proto_enabmulti_req(dld_str_t *dsp, mblk_t *mp)
730{
731	dl_enabmulti_req_t *dlp = (dl_enabmulti_req_t *)mp->b_rptr;
732	int		err = 0;
733	t_uscalar_t	dl_err;
734	queue_t		*q = dsp->ds_wq;
735	mac_perim_handle_t	mph;
736
737	if (dsp->ds_dlstate == DL_UNATTACHED ||
738	    DL_ACK_PENDING(dsp->ds_dlstate)) {
739		dl_err = DL_OUTSTATE;
740		goto failed;
741	}
742
743	if (MBLKL(mp) < sizeof (dl_enabmulti_req_t) ||
744	    !MBLKIN(mp, dlp->dl_addr_offset, dlp->dl_addr_length) ||
745	    dlp->dl_addr_length != dsp->ds_mip->mi_addr_length) {
746		dl_err = DL_BADPRIM;
747		goto failed;
748	}
749
750	mac_perim_enter_by_mh(dsp->ds_mh, &mph);
751
752	if ((dsp->ds_dmap == NULL) && (err = dls_active_set(dsp)) != 0) {
753		dl_err = DL_SYSERR;
754		goto failed2;
755	}
756
757	err = dls_multicst_add(dsp, mp->b_rptr + dlp->dl_addr_offset);
758	if (err != 0) {
759		switch (err) {
760		case EINVAL:
761			dl_err = DL_BADADDR;
762			err = 0;
763			break;
764		case ENOSPC:
765			dl_err = DL_TOOMANY;
766			err = 0;
767			break;
768		default:
769			dl_err = DL_SYSERR;
770			break;
771		}
772		if (dsp->ds_dmap == NULL)
773			dls_active_clear(dsp, B_FALSE);
774		goto failed2;
775	}
776
777	mac_perim_exit(mph);
778
779	dlokack(q, mp, DL_ENABMULTI_REQ);
780	return;
781
782failed2:
783	mac_perim_exit(mph);
784failed:
785	dlerrorack(q, mp, DL_ENABMULTI_REQ, dl_err, (t_uscalar_t)err);
786}
787
788/*
789 * DL_DISABMULTI_REQ
790 */
791static void
792proto_disabmulti_req(dld_str_t *dsp, mblk_t *mp)
793{
794	dl_disabmulti_req_t *dlp = (dl_disabmulti_req_t *)mp->b_rptr;
795	int		err = 0;
796	t_uscalar_t	dl_err;
797	queue_t		*q = dsp->ds_wq;
798	mac_perim_handle_t	mph;
799
800	if (dsp->ds_dlstate == DL_UNATTACHED ||
801	    DL_ACK_PENDING(dsp->ds_dlstate)) {
802		dl_err = DL_OUTSTATE;
803		goto failed;
804	}
805
806	if (MBLKL(mp) < sizeof (dl_disabmulti_req_t) ||
807	    !MBLKIN(mp, dlp->dl_addr_offset, dlp->dl_addr_length) ||
808	    dlp->dl_addr_length != dsp->ds_mip->mi_addr_length) {
809		dl_err = DL_BADPRIM;
810		goto failed;
811	}
812
813	mac_perim_enter_by_mh(dsp->ds_mh, &mph);
814	err = dls_multicst_remove(dsp, mp->b_rptr + dlp->dl_addr_offset);
815	if ((err == 0) && (dsp->ds_dmap == NULL))
816		dls_active_clear(dsp, B_FALSE);
817	mac_perim_exit(mph);
818
819	if (err != 0) {
820		switch (err) {
821		case EINVAL:
822			dl_err = DL_BADADDR;
823			err = 0;
824			break;
825
826		case ENOENT:
827			dl_err = DL_NOTENAB;
828			err = 0;
829			break;
830
831		default:
832			dl_err = DL_SYSERR;
833			break;
834		}
835		goto failed;
836	}
837	dlokack(q, mp, DL_DISABMULTI_REQ);
838	return;
839failed:
840	dlerrorack(q, mp, DL_DISABMULTI_REQ, dl_err, (t_uscalar_t)err);
841}
842
843/*
844 * DL_PHYS_ADDR_REQ
845 */
846static void
847proto_physaddr_req(dld_str_t *dsp, mblk_t *mp)
848{
849	dl_phys_addr_req_t *dlp = (dl_phys_addr_req_t *)mp->b_rptr;
850	queue_t		*q = dsp->ds_wq;
851	t_uscalar_t	dl_err = 0;
852	char		*addr = NULL;
853	uint_t		addr_length;
854
855	if (MBLKL(mp) < sizeof (dl_phys_addr_req_t)) {
856		dl_err = DL_BADPRIM;
857		goto done;
858	}
859
860	if (dsp->ds_dlstate == DL_UNATTACHED ||
861	    DL_ACK_PENDING(dsp->ds_dlstate)) {
862		dl_err = DL_OUTSTATE;
863		goto done;
864	}
865
866	addr_length = dsp->ds_mip->mi_addr_length;
867	if (addr_length > 0) {
868		addr = kmem_alloc(addr_length, KM_SLEEP);
869		switch (dlp->dl_addr_type) {
870		case DL_CURR_PHYS_ADDR:
871			mac_unicast_primary_get(dsp->ds_mh, (uint8_t *)addr);
872			break;
873		case DL_FACT_PHYS_ADDR:
874			bcopy(dsp->ds_mip->mi_unicst_addr, addr, addr_length);
875			break;
876		case DL_CURR_DEST_ADDR:
877			if (!mac_dst_get(dsp->ds_mh, (uint8_t *)addr))
878				dl_err = DL_NOTSUPPORTED;
879			break;
880		default:
881			dl_err = DL_UNSUPPORTED;
882		}
883	}
884done:
885	if (dl_err == 0)
886		dlphysaddrack(q, mp, addr, (t_uscalar_t)addr_length);
887	else
888		dlerrorack(q, mp, DL_PHYS_ADDR_REQ, dl_err, 0);
889	if (addr != NULL)
890		kmem_free(addr, addr_length);
891}
892
893/*
894 * DL_SET_PHYS_ADDR_REQ
895 */
896static void
897proto_setphysaddr_req(dld_str_t *dsp, mblk_t *mp)
898{
899	dl_set_phys_addr_req_t *dlp = (dl_set_phys_addr_req_t *)mp->b_rptr;
900	int		err = 0;
901	t_uscalar_t	dl_err;
902	queue_t		*q = dsp->ds_wq;
903	mac_perim_handle_t	mph;
904
905	if (dsp->ds_dlstate == DL_UNATTACHED ||
906	    DL_ACK_PENDING(dsp->ds_dlstate)) {
907		dl_err = DL_OUTSTATE;
908		goto failed;
909	}
910
911	if (MBLKL(mp) < sizeof (dl_set_phys_addr_req_t) ||
912	    !MBLKIN(mp, dlp->dl_addr_offset, dlp->dl_addr_length) ||
913	    dlp->dl_addr_length != dsp->ds_mip->mi_addr_length) {
914		dl_err = DL_BADPRIM;
915		goto failed;
916	}
917
918	mac_perim_enter_by_mh(dsp->ds_mh, &mph);
919
920	if ((err = dls_active_set(dsp)) != 0) {
921		dl_err = DL_SYSERR;
922		goto failed2;
923	}
924
925	/*
926	 * If mac-nospoof is enabled and the link is owned by a
927	 * non-global zone, changing the mac address is not allowed.
928	 */
929	if (dsp->ds_dlp->dl_zid != GLOBAL_ZONEID &&
930	    mac_protect_enabled(dsp->ds_mch, MPT_MACNOSPOOF)) {
931		dls_active_clear(dsp, B_FALSE);
932		err = EACCES;
933		goto failed2;
934	}
935
936	err = mac_unicast_primary_set(dsp->ds_mh,
937	    mp->b_rptr + dlp->dl_addr_offset);
938	if (err != 0) {
939		switch (err) {
940		case EINVAL:
941			dl_err = DL_BADADDR;
942			err = 0;
943			break;
944
945		default:
946			dl_err = DL_SYSERR;
947			break;
948		}
949		dls_active_clear(dsp, B_FALSE);
950		goto failed2;
951
952	}
953
954	mac_perim_exit(mph);
955
956	dlokack(q, mp, DL_SET_PHYS_ADDR_REQ);
957	return;
958
959failed2:
960	mac_perim_exit(mph);
961failed:
962	dlerrorack(q, mp, DL_SET_PHYS_ADDR_REQ, dl_err, (t_uscalar_t)err);
963}
964
965/*
966 * DL_UDQOS_REQ
967 */
968static void
969proto_udqos_req(dld_str_t *dsp, mblk_t *mp)
970{
971	dl_udqos_req_t *dlp = (dl_udqos_req_t *)mp->b_rptr;
972	dl_qos_cl_sel1_t *selp;
973	int		off, len;
974	t_uscalar_t	dl_err;
975	queue_t		*q = dsp->ds_wq;
976
977	off = dlp->dl_qos_offset;
978	len = dlp->dl_qos_length;
979
980	if (MBLKL(mp) < sizeof (dl_udqos_req_t) || !MBLKIN(mp, off, len)) {
981		dl_err = DL_BADPRIM;
982		goto failed;
983	}
984
985	selp = (dl_qos_cl_sel1_t *)(mp->b_rptr + off);
986	if (selp->dl_qos_type != DL_QOS_CL_SEL1) {
987		dl_err = DL_BADQOSTYPE;
988		goto failed;
989	}
990
991	if (selp->dl_priority > (1 << VLAN_PRI_SIZE) - 1 ||
992	    selp->dl_priority < 0) {
993		dl_err = DL_BADQOSPARAM;
994		goto failed;
995	}
996
997	dsp->ds_pri = selp->dl_priority;
998	dlokack(q, mp, DL_UDQOS_REQ);
999	return;
1000failed:
1001	dlerrorack(q, mp, DL_UDQOS_REQ, dl_err, 0);
1002}
1003
1004static boolean_t
1005check_mod_above(queue_t *q, const char *mod)
1006{
1007	queue_t		*next_q;
1008	boolean_t	ret = B_TRUE;
1009
1010	claimstr(q);
1011	next_q = q->q_next;
1012	if (strcmp(next_q->q_qinfo->qi_minfo->mi_idname, mod) != 0)
1013		ret = B_FALSE;
1014	releasestr(q);
1015	return (ret);
1016}
1017
1018/*
1019 * DL_CAPABILITY_REQ
1020 */
1021static void
1022proto_capability_req(dld_str_t *dsp, mblk_t *mp)
1023{
1024	dl_capability_req_t *dlp = (dl_capability_req_t *)mp->b_rptr;
1025	dl_capability_sub_t *sp;
1026	size_t		size, len;
1027	offset_t	off, end;
1028	t_uscalar_t	dl_err;
1029	queue_t		*q = dsp->ds_wq;
1030
1031	if (MBLKL(mp) < sizeof (dl_capability_req_t)) {
1032		dl_err = DL_BADPRIM;
1033		goto failed;
1034	}
1035
1036	if (dsp->ds_dlstate == DL_UNATTACHED ||
1037	    DL_ACK_PENDING(dsp->ds_dlstate)) {
1038		dl_err = DL_OUTSTATE;
1039		goto failed;
1040	}
1041
1042	/*
1043	 * This request is overloaded. If there are no requested capabilities
1044	 * then we just want to acknowledge with all the capabilities we
1045	 * support. Otherwise we enable the set of capabilities requested.
1046	 */
1047	if (dlp->dl_sub_length == 0) {
1048		proto_capability_advertise(dsp, mp);
1049		return;
1050	}
1051
1052	if (!MBLKIN(mp, dlp->dl_sub_offset, dlp->dl_sub_length)) {
1053		dl_err = DL_BADPRIM;
1054		goto failed;
1055	}
1056
1057	dlp->dl_primitive = DL_CAPABILITY_ACK;
1058
1059	off = dlp->dl_sub_offset;
1060	len = dlp->dl_sub_length;
1061
1062	/*
1063	 * Walk the list of capabilities to be enabled.
1064	 */
1065	for (end = off + len; off < end; ) {
1066		sp = (dl_capability_sub_t *)(mp->b_rptr + off);
1067		size = sizeof (dl_capability_sub_t) + sp->dl_length;
1068
1069		if (off + size > end ||
1070		    !IS_P2ALIGNED(off, sizeof (uint32_t))) {
1071			dl_err = DL_BADPRIM;
1072			goto failed;
1073		}
1074
1075		switch (sp->dl_cap) {
1076		/*
1077		 * TCP/IP checksum offload to hardware.
1078		 */
1079		case DL_CAPAB_HCKSUM: {
1080			dl_capab_hcksum_t *hcksump;
1081			dl_capab_hcksum_t hcksum;
1082
1083			hcksump = (dl_capab_hcksum_t *)&sp[1];
1084			/*
1085			 * Copy for alignment.
1086			 */
1087			bcopy(hcksump, &hcksum, sizeof (dl_capab_hcksum_t));
1088			dlcapabsetqid(&(hcksum.hcksum_mid), dsp->ds_rq);
1089			bcopy(&hcksum, hcksump, sizeof (dl_capab_hcksum_t));
1090			break;
1091		}
1092
1093		case DL_CAPAB_DLD: {
1094			dl_capab_dld_t	*dldp;
1095			dl_capab_dld_t	dld;
1096
1097			dldp = (dl_capab_dld_t *)&sp[1];
1098			/*
1099			 * Copy for alignment.
1100			 */
1101			bcopy(dldp, &dld, sizeof (dl_capab_dld_t));
1102			dlcapabsetqid(&(dld.dld_mid), dsp->ds_rq);
1103			bcopy(&dld, dldp, sizeof (dl_capab_dld_t));
1104			break;
1105		}
1106		default:
1107			break;
1108		}
1109		off += size;
1110	}
1111	qreply(q, mp);
1112	return;
1113failed:
1114	dlerrorack(q, mp, DL_CAPABILITY_REQ, dl_err, 0);
1115}
1116
1117/*
1118 * DL_NOTIFY_REQ
1119 */
1120static void
1121proto_notify_req(dld_str_t *dsp, mblk_t *mp)
1122{
1123	dl_notify_req_t	*dlp = (dl_notify_req_t *)mp->b_rptr;
1124	t_uscalar_t	dl_err;
1125	queue_t		*q = dsp->ds_wq;
1126	uint_t		note =
1127	    DL_NOTE_PROMISC_ON_PHYS |
1128	    DL_NOTE_PROMISC_OFF_PHYS |
1129	    DL_NOTE_PHYS_ADDR |
1130	    DL_NOTE_LINK_UP |
1131	    DL_NOTE_LINK_DOWN |
1132	    DL_NOTE_CAPAB_RENEG |
1133	    DL_NOTE_FASTPATH_FLUSH |
1134	    DL_NOTE_SPEED |
1135	    DL_NOTE_SDU_SIZE|
1136	    DL_NOTE_SDU_SIZE2|
1137	    DL_NOTE_ALLOWED_IPS;
1138
1139	if (MBLKL(mp) < sizeof (dl_notify_req_t)) {
1140		dl_err = DL_BADPRIM;
1141		goto failed;
1142	}
1143
1144	if (dsp->ds_dlstate == DL_UNATTACHED ||
1145	    DL_ACK_PENDING(dsp->ds_dlstate)) {
1146		dl_err = DL_OUTSTATE;
1147		goto failed;
1148	}
1149
1150	note &= ~(mac_no_notification(dsp->ds_mh));
1151
1152	/*
1153	 * Cache the notifications that are being enabled.
1154	 */
1155	dsp->ds_notifications = dlp->dl_notifications & note;
1156	/*
1157	 * The ACK carries all notifications regardless of which set is
1158	 * being enabled.
1159	 */
1160	dlnotifyack(q, mp, note);
1161
1162	/*
1163	 * Generate DL_NOTIFY_IND messages for each enabled notification.
1164	 */
1165	if (dsp->ds_notifications != 0) {
1166		dld_str_notify_ind(dsp);
1167	}
1168	return;
1169failed:
1170	dlerrorack(q, mp, DL_NOTIFY_REQ, dl_err, 0);
1171}
1172
1173/*
1174 * DL_UINTDATA_REQ
1175 */
1176void
1177proto_unitdata_req(dld_str_t *dsp, mblk_t *mp)
1178{
1179	queue_t			*q = dsp->ds_wq;
1180	dl_unitdata_req_t	*dlp = (dl_unitdata_req_t *)mp->b_rptr;
1181	off_t			off;
1182	size_t			len, size;
1183	const uint8_t		*addr;
1184	uint16_t		sap;
1185	uint_t			addr_length;
1186	mblk_t			*bp, *payload;
1187	t_uscalar_t		dl_err;
1188	uint_t			max_sdu;
1189
1190	if (MBLKL(mp) < sizeof (dl_unitdata_req_t) || mp->b_cont == NULL) {
1191		dlerrorack(q, mp, DL_UNITDATA_REQ, DL_BADPRIM, 0);
1192		return;
1193	}
1194
1195	mutex_enter(&dsp->ds_lock);
1196	if (dsp->ds_dlstate != DL_IDLE) {
1197		mutex_exit(&dsp->ds_lock);
1198		dlerrorack(q, mp, DL_UNITDATA_REQ, DL_OUTSTATE, 0);
1199		return;
1200	}
1201	DLD_DATATHR_INC(dsp);
1202	mutex_exit(&dsp->ds_lock);
1203
1204	addr_length = dsp->ds_mip->mi_addr_length;
1205
1206	off = dlp->dl_dest_addr_offset;
1207	len = dlp->dl_dest_addr_length;
1208
1209	if (!MBLKIN(mp, off, len) || !IS_P2ALIGNED(off, sizeof (uint16_t))) {
1210		dl_err = DL_BADPRIM;
1211		goto failed;
1212	}
1213
1214	if (len != addr_length + sizeof (uint16_t)) {
1215		dl_err = DL_BADADDR;
1216		goto failed;
1217	}
1218
1219	addr = mp->b_rptr + off;
1220	sap = *(uint16_t *)(mp->b_rptr + off + addr_length);
1221
1222	/*
1223	 * Check the length of the packet and the block types.
1224	 */
1225	size = 0;
1226	payload = mp->b_cont;
1227	for (bp = payload; bp != NULL; bp = bp->b_cont) {
1228		if (DB_TYPE(bp) != M_DATA)
1229			goto baddata;
1230
1231		size += MBLKL(bp);
1232	}
1233
1234	mac_sdu_get(dsp->ds_mh, NULL, &max_sdu);
1235	if (size > max_sdu)
1236		goto baddata;
1237
1238	/*
1239	 * Build a packet header.
1240	 */
1241	if ((bp = dls_header(dsp, addr, sap, dlp->dl_priority.dl_max,
1242	    &payload)) == NULL) {
1243		dl_err = DL_BADADDR;
1244		goto failed;
1245	}
1246
1247	/*
1248	 * We no longer need the M_PROTO header, so free it.
1249	 */
1250	freeb(mp);
1251
1252	/*
1253	 * Transfer the checksum offload information if it is present.
1254	 */
1255	mac_hcksum_clone(payload, bp);
1256
1257	/*
1258	 * Link the payload onto the new header.
1259	 */
1260	ASSERT(bp->b_cont == NULL);
1261	bp->b_cont = payload;
1262
1263	/*
1264	 * No lock can be held across modules and putnext()'s,
1265	 * which can happen here with the call from DLD_TX().
1266	 */
1267	if (DLD_TX(dsp, bp, 0, 0) != 0) {
1268		/* flow-controlled */
1269		DLD_SETQFULL(dsp);
1270	}
1271	DLD_DATATHR_DCR(dsp);
1272	return;
1273
1274failed:
1275	dlerrorack(q, mp, DL_UNITDATA_REQ, dl_err, 0);
1276	DLD_DATATHR_DCR(dsp);
1277	return;
1278
1279baddata:
1280	dluderrorind(q, mp, (void *)addr, len, DL_BADDATA, 0);
1281	DLD_DATATHR_DCR(dsp);
1282}
1283
1284/*
1285 * DL_PASSIVE_REQ
1286 */
1287static void
1288proto_passive_req(dld_str_t *dsp, mblk_t *mp)
1289{
1290	t_uscalar_t dl_err;
1291
1292	/*
1293	 * If we've already become active by issuing an active primitive,
1294	 * then it's too late to try to become passive.
1295	 */
1296	if (dsp->ds_passivestate == DLD_ACTIVE) {
1297		dl_err = DL_OUTSTATE;
1298		goto failed;
1299	}
1300
1301	if (MBLKL(mp) < sizeof (dl_passive_req_t)) {
1302		dl_err = DL_BADPRIM;
1303		goto failed;
1304	}
1305
1306	dsp->ds_passivestate = DLD_PASSIVE;
1307	dlokack(dsp->ds_wq, mp, DL_PASSIVE_REQ);
1308	return;
1309failed:
1310	dlerrorack(dsp->ds_wq, mp, DL_PASSIVE_REQ, dl_err, 0);
1311}
1312
1313
1314/*
1315 * Catch-all handler.
1316 */
1317static void
1318proto_req(dld_str_t *dsp, mblk_t *mp)
1319{
1320	union DL_primitives	*dlp = (union DL_primitives *)mp->b_rptr;
1321
1322	dlerrorack(dsp->ds_wq, mp, dlp->dl_primitive, DL_UNSUPPORTED, 0);
1323}
1324
1325static int
1326dld_capab_perim(dld_str_t *dsp, void *data, uint_t flags)
1327{
1328	switch (flags) {
1329	case DLD_ENABLE:
1330		mac_perim_enter_by_mh(dsp->ds_mh, (mac_perim_handle_t *)data);
1331		return (0);
1332
1333	case DLD_DISABLE:
1334		mac_perim_exit((mac_perim_handle_t)data);
1335		return (0);
1336
1337	case DLD_QUERY:
1338		return (mac_perim_held(dsp->ds_mh));
1339	}
1340	return (0);
1341}
1342
1343static int
1344dld_capab_direct(dld_str_t *dsp, void *data, uint_t flags)
1345{
1346	dld_capab_direct_t	*direct = data;
1347
1348	ASSERT(MAC_PERIM_HELD(dsp->ds_mh));
1349
1350	switch (flags) {
1351	case DLD_ENABLE:
1352		dls_rx_set(dsp, (dls_rx_t)direct->di_rx_cf,
1353		    direct->di_rx_ch);
1354
1355		direct->di_tx_df = (uintptr_t)str_mdata_fastpath_put;
1356		direct->di_tx_dh = dsp;
1357		direct->di_tx_cb_df = (uintptr_t)mac_client_tx_notify;
1358		direct->di_tx_cb_dh = dsp->ds_mch;
1359		direct->di_tx_fctl_df = (uintptr_t)mac_tx_is_flow_blocked;
1360		direct->di_tx_fctl_dh = dsp->ds_mch;
1361
1362		dsp->ds_direct = B_TRUE;
1363
1364		return (0);
1365
1366	case DLD_DISABLE:
1367		dls_rx_set(dsp, (dsp->ds_mode == DLD_FASTPATH) ?
1368		    dld_str_rx_fastpath : dld_str_rx_unitdata, (void *)dsp);
1369		dsp->ds_direct = B_FALSE;
1370
1371		return (0);
1372	}
1373	return (ENOTSUP);
1374}
1375
1376/*
1377 * This function is misnamed. All polling and fanouts are run out of
1378 * the lower MAC for VNICs and out of the MAC for NICs. The
1379 * availability of Rx rings and promiscous mode is taken care of
1380 * between the soft ring set (mac_srs), the Rx ring, and the SW
1381 * classifier. Fanout, if necessary, is done by the soft rings that
1382 * are part of the SRS. By default the SRS divvies up the packets
1383 * based on protocol: TCP, UDP, or Other (OTH).
1384 *
1385 * The SRS (or its associated soft rings) always store the ill_rx_ring
1386 * (the cookie returned when they registered with IP during plumb) as their
1387 * 2nd argument which is passed up as mac_resource_handle_t. The upcall
1388 * function and 1st argument is what the caller registered when they
1389 * called mac_rx_classify_flow_add() to register the flow. For VNIC,
1390 * the function is vnic_rx and argument is vnic_t. For regular NIC
1391 * case, it mac_rx_default and mac_handle_t. As explained above, the
1392 * SRS (or its soft ring) will add the ill_rx_ring (mac_resource_handle_t)
1393 * from its stored 2nd argument.
1394 */
1395static int
1396dld_capab_poll_enable(dld_str_t *dsp, dld_capab_poll_t *poll)
1397{
1398	if (dsp->ds_polling)
1399		return (EINVAL);
1400
1401	if ((dld_opt & DLD_OPT_NO_POLL) != 0 || dsp->ds_mode == DLD_RAW)
1402		return (ENOTSUP);
1403
1404	/*
1405	 * Enable client polling if and only if DLS bypass is
1406	 * possible. Some traffic requires DLS processing in the Rx
1407	 * data path. In such a case we can neither allow the client
1408	 * (IP) to directly poll the soft ring (since DLS processing
1409	 * hasn't been done) nor can we allow DLS bypass.
1410	 */
1411	if (!mac_rx_bypass_set(dsp->ds_mch, dsp->ds_rx, dsp->ds_rx_arg))
1412		return (ENOTSUP);
1413
1414	/*
1415	 * Register soft ring resources. This will come in handy later if
1416	 * the user decides to modify CPU bindings to use more CPUs for the
1417	 * device in which case we will switch to fanout using soft rings.
1418	 */
1419	mac_resource_set_common(dsp->ds_mch,
1420	    (mac_resource_add_t)poll->poll_ring_add_cf,
1421	    (mac_resource_remove_t)poll->poll_ring_remove_cf,
1422	    (mac_resource_quiesce_t)poll->poll_ring_quiesce_cf,
1423	    (mac_resource_restart_t)poll->poll_ring_restart_cf,
1424	    (mac_resource_bind_t)poll->poll_ring_bind_cf,
1425	    poll->poll_ring_ch);
1426
1427	mac_client_poll_enable(dsp->ds_mch);
1428
1429	dsp->ds_polling = B_TRUE;
1430	return (0);
1431}
1432
1433/* ARGSUSED */
1434static int
1435dld_capab_poll_disable(dld_str_t *dsp, dld_capab_poll_t *poll)
1436{
1437	if (!dsp->ds_polling)
1438		return (EINVAL);
1439
1440	mac_client_poll_disable(dsp->ds_mch);
1441	mac_resource_set(dsp->ds_mch, NULL, NULL);
1442
1443	dsp->ds_polling = B_FALSE;
1444	return (0);
1445}
1446
1447static int
1448dld_capab_poll(dld_str_t *dsp, void *data, uint_t flags)
1449{
1450	dld_capab_poll_t	*poll = data;
1451
1452	ASSERT(MAC_PERIM_HELD(dsp->ds_mh));
1453
1454	switch (flags) {
1455	case DLD_ENABLE:
1456		return (dld_capab_poll_enable(dsp, poll));
1457	case DLD_DISABLE:
1458		return (dld_capab_poll_disable(dsp, poll));
1459	}
1460	return (ENOTSUP);
1461}
1462
1463static int
1464dld_capab_lso(dld_str_t *dsp, void *data, uint_t flags)
1465{
1466	dld_capab_lso_t		*lso = data;
1467
1468	ASSERT(MAC_PERIM_HELD(dsp->ds_mh));
1469
1470	switch (flags) {
1471	case DLD_ENABLE: {
1472		mac_capab_lso_t		mac_lso;
1473
1474		/*
1475		 * Check if LSO is supported on this MAC & enable LSO
1476		 * accordingly.
1477		 */
1478		if (mac_capab_get(dsp->ds_mh, MAC_CAPAB_LSO, &mac_lso)) {
1479			lso->lso_max_tcpv4 = mac_lso.lso_basic_tcp_ipv4.lso_max;
1480			lso->lso_max_tcpv6 = mac_lso.lso_basic_tcp_ipv6.lso_max;
1481			lso->lso_flags = 0;
1482			/* translate the flag for mac clients */
1483			if ((mac_lso.lso_flags & LSO_TX_BASIC_TCP_IPV4) != 0)
1484				lso->lso_flags |= DLD_LSO_BASIC_TCP_IPV4;
1485			if ((mac_lso.lso_flags & LSO_TX_BASIC_TCP_IPV6) != 0)
1486				lso->lso_flags |= DLD_LSO_BASIC_TCP_IPV6;
1487			dsp->ds_lso = lso->lso_flags != 0;
1488			/*
1489			 * DLS uses this to try and make sure that a raw ioctl
1490			 * doesn't send too much data, but doesn't currently
1491			 * check the actual SAP that is sending this (or that
1492			 * it's TCP). So for now, just use the max value here.
1493			 */
1494			dsp->ds_lso_max = MAX(lso->lso_max_tcpv4,
1495			    lso->lso_max_tcpv6);
1496		} else {
1497			dsp->ds_lso = B_FALSE;
1498			dsp->ds_lso_max = 0;
1499			return (ENOTSUP);
1500		}
1501		return (0);
1502	}
1503	case DLD_DISABLE: {
1504		dsp->ds_lso = B_FALSE;
1505		dsp->ds_lso_max = 0;
1506		return (0);
1507	}
1508	}
1509	return (ENOTSUP);
1510}
1511
1512static int
1513dld_capab(dld_str_t *dsp, uint_t type, void *data, uint_t flags)
1514{
1515	int	err;
1516
1517	/*
1518	 * Don't enable direct callback capabilities unless the caller is
1519	 * the IP client. When a module is inserted in a stream (_I_INSERT)
1520	 * the stack initiates capability disable, but due to races, the
1521	 * module insertion may complete before the capability disable
1522	 * completes. So we limit the check to DLD_ENABLE case.
1523	 */
1524	if ((flags == DLD_ENABLE && type != DLD_CAPAB_PERIM) &&
1525	    (!(dsp->ds_sap == ETHERTYPE_IP || dsp->ds_sap == ETHERTYPE_IPV6) ||
1526	    !check_mod_above(dsp->ds_rq, "ip"))) {
1527		return (ENOTSUP);
1528	}
1529
1530	switch (type) {
1531	case DLD_CAPAB_DIRECT:
1532		if (dsp->ds_sap == ETHERTYPE_IPV6) {
1533			err = ENOTSUP;
1534			break;
1535		}
1536		err = dld_capab_direct(dsp, data, flags);
1537		break;
1538
1539	case DLD_CAPAB_POLL:
1540		if (dsp->ds_sap == ETHERTYPE_IPV6) {
1541			err = ENOTSUP;
1542			break;
1543		}
1544		err =  dld_capab_poll(dsp, data, flags);
1545		break;
1546
1547	case DLD_CAPAB_PERIM:
1548		err = dld_capab_perim(dsp, data, flags);
1549		break;
1550
1551	case DLD_CAPAB_LSO:
1552		err = dld_capab_lso(dsp, data, flags);
1553		break;
1554
1555	default:
1556		err = ENOTSUP;
1557		break;
1558	}
1559
1560	return (err);
1561}
1562
1563/*
1564 * DL_CAPABILITY_ACK/DL_ERROR_ACK
1565 */
1566static void
1567proto_capability_advertise(dld_str_t *dsp, mblk_t *mp)
1568{
1569	dl_capability_ack_t	*dlap;
1570	dl_capability_sub_t	*dlsp;
1571	size_t			subsize;
1572	dl_capab_dld_t		dld;
1573	dl_capab_hcksum_t	hcksum;
1574	dl_capab_zerocopy_t	zcopy;
1575	dl_capab_vrrp_t		vrrp;
1576	mac_capab_vrrp_t	vrrp_capab;
1577	uint8_t			*ptr;
1578	queue_t			*q = dsp->ds_wq;
1579	mblk_t			*mp1;
1580	boolean_t		hcksum_capable = B_FALSE;
1581	boolean_t		zcopy_capable = B_FALSE;
1582	boolean_t		dld_capable = B_FALSE;
1583	boolean_t		vrrp_capable = B_FALSE;
1584
1585	/*
1586	 * Initially assume no capabilities.
1587	 */
1588	subsize = 0;
1589
1590	/*
1591	 * Check if checksum offload is supported on this MAC.
1592	 */
1593	bzero(&hcksum, sizeof (dl_capab_hcksum_t));
1594	if (mac_capab_get(dsp->ds_mh, MAC_CAPAB_HCKSUM,
1595	    &hcksum.hcksum_txflags)) {
1596		if (hcksum.hcksum_txflags != 0) {
1597			hcksum_capable = B_TRUE;
1598			subsize += sizeof (dl_capability_sub_t) +
1599			    sizeof (dl_capab_hcksum_t);
1600		}
1601	}
1602
1603	/*
1604	 * Check if zerocopy is supported on this interface.
1605	 * If advertising DL_CAPAB_ZEROCOPY has not been explicitly disabled
1606	 * then reserve space for that capability.
1607	 */
1608	if (!mac_capab_get(dsp->ds_mh, MAC_CAPAB_NO_ZCOPY, NULL) &&
1609	    !(dld_opt & DLD_OPT_NO_ZEROCOPY)) {
1610		zcopy_capable = B_TRUE;
1611		subsize += sizeof (dl_capability_sub_t) +
1612		    sizeof (dl_capab_zerocopy_t);
1613	}
1614
1615	/*
1616	 * Direct capability negotiation interface between IP and DLD
1617	 */
1618	if ((dsp->ds_sap == ETHERTYPE_IP || dsp->ds_sap == ETHERTYPE_IPV6) &&
1619	    check_mod_above(dsp->ds_rq, "ip")) {
1620		dld_capable = B_TRUE;
1621		subsize += sizeof (dl_capability_sub_t) +
1622		    sizeof (dl_capab_dld_t);
1623	}
1624
1625	/*
1626	 * Check if vrrp is supported on this interface. If so, reserve
1627	 * space for that capability.
1628	 */
1629	if (mac_capab_get(dsp->ds_mh, MAC_CAPAB_VRRP, &vrrp_capab)) {
1630		vrrp_capable = B_TRUE;
1631		subsize += sizeof (dl_capability_sub_t) +
1632		    sizeof (dl_capab_vrrp_t);
1633	}
1634
1635	/*
1636	 * If there are no capabilities to advertise or if we
1637	 * can't allocate a response, send a DL_ERROR_ACK.
1638	 */
1639	if ((mp1 = reallocb(mp,
1640	    sizeof (dl_capability_ack_t) + subsize, 0)) == NULL) {
1641		dlerrorack(q, mp, DL_CAPABILITY_REQ, DL_NOTSUPPORTED, 0);
1642		return;
1643	}
1644
1645	mp = mp1;
1646	DB_TYPE(mp) = M_PROTO;
1647	mp->b_wptr = mp->b_rptr + sizeof (dl_capability_ack_t) + subsize;
1648	bzero(mp->b_rptr, MBLKL(mp));
1649	dlap = (dl_capability_ack_t *)mp->b_rptr;
1650	dlap->dl_primitive = DL_CAPABILITY_ACK;
1651	dlap->dl_sub_offset = sizeof (dl_capability_ack_t);
1652	dlap->dl_sub_length = subsize;
1653	ptr = (uint8_t *)&dlap[1];
1654
1655	/*
1656	 * TCP/IP checksum offload.
1657	 */
1658	if (hcksum_capable) {
1659		dlsp = (dl_capability_sub_t *)ptr;
1660
1661		dlsp->dl_cap = DL_CAPAB_HCKSUM;
1662		dlsp->dl_length = sizeof (dl_capab_hcksum_t);
1663		ptr += sizeof (dl_capability_sub_t);
1664
1665		hcksum.hcksum_version = HCKSUM_VERSION_1;
1666		dlcapabsetqid(&(hcksum.hcksum_mid), dsp->ds_rq);
1667		bcopy(&hcksum, ptr, sizeof (dl_capab_hcksum_t));
1668		ptr += sizeof (dl_capab_hcksum_t);
1669	}
1670
1671	/*
1672	 * Zero copy
1673	 */
1674	if (zcopy_capable) {
1675		dlsp = (dl_capability_sub_t *)ptr;
1676
1677		dlsp->dl_cap = DL_CAPAB_ZEROCOPY;
1678		dlsp->dl_length = sizeof (dl_capab_zerocopy_t);
1679		ptr += sizeof (dl_capability_sub_t);
1680
1681		bzero(&zcopy, sizeof (dl_capab_zerocopy_t));
1682		zcopy.zerocopy_version = ZEROCOPY_VERSION_1;
1683		zcopy.zerocopy_flags = DL_CAPAB_VMSAFE_MEM;
1684
1685		dlcapabsetqid(&(zcopy.zerocopy_mid), dsp->ds_rq);
1686		bcopy(&zcopy, ptr, sizeof (dl_capab_zerocopy_t));
1687		ptr += sizeof (dl_capab_zerocopy_t);
1688	}
1689
1690	/*
1691	 * VRRP capability negotiation
1692	 */
1693	if (vrrp_capable) {
1694		dlsp = (dl_capability_sub_t *)ptr;
1695		dlsp->dl_cap = DL_CAPAB_VRRP;
1696		dlsp->dl_length = sizeof (dl_capab_vrrp_t);
1697		ptr += sizeof (dl_capability_sub_t);
1698
1699		bzero(&vrrp, sizeof (dl_capab_vrrp_t));
1700		vrrp.vrrp_af = vrrp_capab.mcv_af;
1701		bcopy(&vrrp, ptr, sizeof (dl_capab_vrrp_t));
1702		ptr += sizeof (dl_capab_vrrp_t);
1703	}
1704
1705	/*
1706	 * Direct capability negotiation interface between IP and DLD.
1707	 * Refer to dld.h for details.
1708	 */
1709	if (dld_capable) {
1710		dlsp = (dl_capability_sub_t *)ptr;
1711		dlsp->dl_cap = DL_CAPAB_DLD;
1712		dlsp->dl_length = sizeof (dl_capab_dld_t);
1713		ptr += sizeof (dl_capability_sub_t);
1714
1715		bzero(&dld, sizeof (dl_capab_dld_t));
1716		dld.dld_version = DLD_CURRENT_VERSION;
1717		dld.dld_capab = (uintptr_t)dld_capab;
1718		dld.dld_capab_handle = (uintptr_t)dsp;
1719
1720		dlcapabsetqid(&(dld.dld_mid), dsp->ds_rq);
1721		bcopy(&dld, ptr, sizeof (dl_capab_dld_t));
1722		ptr += sizeof (dl_capab_dld_t);
1723	}
1724
1725	ASSERT(ptr == mp->b_rptr + sizeof (dl_capability_ack_t) + subsize);
1726	qreply(q, mp);
1727}
1728
1729/*
1730 * Disable any enabled capabilities.
1731 */
1732void
1733dld_capabilities_disable(dld_str_t *dsp)
1734{
1735	if (dsp->ds_polling)
1736		(void) dld_capab_poll_disable(dsp, NULL);
1737}
1738