xref: /illumos-gate/usr/src/uts/common/io/mac/mac_util.c (revision da14cebe)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * MAC Services Module - misc utilities
28  */
29 
30 #include <sys/types.h>
31 #include <sys/mac.h>
32 #include <sys/mac_impl.h>
33 #include <sys/mac_client_priv.h>
34 #include <sys/mac_client_impl.h>
35 #include <sys/mac_soft_ring.h>
36 #include <sys/strsubr.h>
37 #include <sys/strsun.h>
38 #include <sys/vlan.h>
39 #include <sys/pattr.h>
40 #include <sys/pci_tools.h>
41 #include <inet/ip.h>
42 #include <inet/ip_impl.h>
43 #include <inet/ip6.h>
44 #include <sys/vtrace.h>
45 #include <sys/dlpi.h>
46 #include <sys/sunndi.h>
47 
48 /*
49  * Copy an mblk, preserving its hardware checksum flags.
50  */
51 static mblk_t *
52 mac_copymsg_cksum(mblk_t *mp)
53 {
54 	mblk_t *mp1;
55 	uint32_t start, stuff, end, value, flags;
56 
57 	mp1 = copymsg(mp);
58 	if (mp1 == NULL)
59 		return (NULL);
60 
61 	hcksum_retrieve(mp, NULL, NULL, &start, &stuff, &end, &value, &flags);
62 	(void) hcksum_assoc(mp1, NULL, NULL, start, stuff, end, value,
63 	    flags, KM_NOSLEEP);
64 
65 	return (mp1);
66 }
67 
68 /*
69  * Copy an mblk chain, presenting the hardware checksum flags of the
70  * individual mblks.
71  */
72 mblk_t *
73 mac_copymsgchain_cksum(mblk_t *mp)
74 {
75 	mblk_t *nmp = NULL;
76 	mblk_t **nmpp = &nmp;
77 
78 	for (; mp != NULL; mp = mp->b_next) {
79 		if ((*nmpp = mac_copymsg_cksum(mp)) == NULL) {
80 			freemsgchain(nmp);
81 			return (NULL);
82 		}
83 
84 		nmpp = &((*nmpp)->b_next);
85 	}
86 
87 	return (nmp);
88 }
89 
90 /*
91  * Process the specified mblk chain for proper handling of hardware
92  * checksum offload. This routine is invoked for loopback traffic
93  * between MAC clients.
94  * The function handles a NULL mblk chain passed as argument.
95  */
96 mblk_t *
97 mac_fix_cksum(mblk_t *mp_chain)
98 {
99 	mblk_t *mp, *prev = NULL, *new_chain = mp_chain, *mp1;
100 	uint32_t flags, start, stuff, end, value;
101 
102 	for (mp = mp_chain; mp != NULL; prev = mp, mp = mp->b_next) {
103 		uint16_t len;
104 		uint32_t offset;
105 		struct ether_header *ehp;
106 		uint16_t sap;
107 
108 		hcksum_retrieve(mp, NULL, NULL, &start, &stuff, &end, &value,
109 		    &flags);
110 		if (flags == 0)
111 			continue;
112 
113 		/*
114 		 * Since the processing of checksum offload for loopback
115 		 * traffic requires modification of the packet contents,
116 		 * ensure sure that we are always modifying our own copy.
117 		 */
118 		if (DB_REF(mp) > 1) {
119 			mp1 = copymsg(mp);
120 			if (mp1 == NULL)
121 				continue;
122 			mp1->b_next = mp->b_next;
123 			mp->b_next = NULL;
124 			freemsg(mp);
125 			if (prev != NULL)
126 				prev->b_next = mp1;
127 			else
128 				new_chain = mp1;
129 			mp = mp1;
130 		}
131 
132 		/*
133 		 * Ethernet, and optionally VLAN header.
134 		 */
135 		/* LINTED: improper alignment cast */
136 		ehp = (struct ether_header *)mp->b_rptr;
137 		if (ntohs(ehp->ether_type) == VLAN_TPID) {
138 			struct ether_vlan_header *evhp;
139 
140 			ASSERT(MBLKL(mp) >= sizeof (struct ether_vlan_header));
141 			/* LINTED: improper alignment cast */
142 			evhp = (struct ether_vlan_header *)mp->b_rptr;
143 			sap = ntohs(evhp->ether_type);
144 			offset = sizeof (struct ether_vlan_header);
145 		} else {
146 			sap = ntohs(ehp->ether_type);
147 			offset = sizeof (struct ether_header);
148 		}
149 
150 		if (MBLKL(mp) <= offset) {
151 			offset -= MBLKL(mp);
152 			if (mp->b_cont == NULL) {
153 				/* corrupted packet, skip it */
154 				if (prev != NULL)
155 					prev->b_next = mp->b_next;
156 				else
157 					new_chain = mp->b_next;
158 				mp1 = mp->b_next;
159 				mp->b_next = NULL;
160 				freemsg(mp);
161 				mp = mp1;
162 				continue;
163 			}
164 			mp = mp->b_cont;
165 		}
166 
167 		if (flags & (HCK_FULLCKSUM | HCK_IPV4_HDRCKSUM)) {
168 			ipha_t *ipha = NULL;
169 
170 			/*
171 			 * In order to compute the full and header
172 			 * checksums, we need to find and parse
173 			 * the IP and/or ULP headers.
174 			 */
175 
176 			sap = (sap < ETHERTYPE_802_MIN) ? 0 : sap;
177 
178 			/*
179 			 * IP header.
180 			 */
181 			if (sap != ETHERTYPE_IP)
182 				continue;
183 
184 			ASSERT(MBLKL(mp) >= offset + sizeof (ipha_t));
185 			/* LINTED: improper alignment cast */
186 			ipha = (ipha_t *)(mp->b_rptr + offset);
187 
188 			if (flags & HCK_FULLCKSUM) {
189 				ipaddr_t src, dst;
190 				uint32_t cksum;
191 				uint16_t *up;
192 				uint8_t proto;
193 
194 				/*
195 				 * Pointer to checksum field in ULP header.
196 				 */
197 				proto = ipha->ipha_protocol;
198 				ASSERT(ipha->ipha_version_and_hdr_length ==
199 				    IP_SIMPLE_HDR_VERSION);
200 				if (proto == IPPROTO_TCP) {
201 					/* LINTED: improper alignment cast */
202 					up = IPH_TCPH_CHECKSUMP(ipha,
203 					    IP_SIMPLE_HDR_LENGTH);
204 				} else {
205 					ASSERT(proto == IPPROTO_UDP);
206 					/* LINTED: improper alignment cast */
207 					up = IPH_UDPH_CHECKSUMP(ipha,
208 					    IP_SIMPLE_HDR_LENGTH);
209 				}
210 
211 				/*
212 				 * Pseudo-header checksum.
213 				 */
214 				src = ipha->ipha_src;
215 				dst = ipha->ipha_dst;
216 				len = ntohs(ipha->ipha_length) -
217 				    IP_SIMPLE_HDR_LENGTH;
218 
219 				cksum = (dst >> 16) + (dst & 0xFFFF) +
220 				    (src >> 16) + (src & 0xFFFF);
221 				cksum += htons(len);
222 
223 				/*
224 				 * The checksum value stored in the packet needs
225 				 * to be correct. Compute it here.
226 				 */
227 				*up = 0;
228 				cksum += (((proto) == IPPROTO_UDP) ?
229 				    IP_UDP_CSUM_COMP : IP_TCP_CSUM_COMP);
230 				cksum = IP_CSUM(mp, IP_SIMPLE_HDR_LENGTH +
231 				    offset, cksum);
232 				*(up) = (uint16_t)(cksum ? cksum : ~cksum);
233 
234 				flags |= HCK_FULLCKSUM_OK;
235 				value = 0xffff;
236 			}
237 
238 			if (flags & HCK_IPV4_HDRCKSUM) {
239 				ASSERT(ipha != NULL);
240 				ipha->ipha_hdr_checksum =
241 				    (uint16_t)ip_csum_hdr(ipha);
242 			}
243 		}
244 
245 		if (flags & HCK_PARTIALCKSUM) {
246 			uint16_t *up, partial, cksum;
247 			uchar_t *ipp; /* ptr to beginning of IP header */
248 
249 			if (mp->b_cont != NULL) {
250 				mblk_t *mp1;
251 
252 				mp1 = msgpullup(mp, offset + end);
253 				if (mp1 == NULL)
254 					continue;
255 				mp1->b_next = mp->b_next;
256 				mp->b_next = NULL;
257 				freemsg(mp);
258 				if (prev != NULL)
259 					prev->b_next = mp1;
260 				else
261 					new_chain = mp1;
262 				mp = mp1;
263 			}
264 
265 			ipp = mp->b_rptr + offset;
266 			/* LINTED: cast may result in improper alignment */
267 			up = (uint16_t *)((uchar_t *)ipp + stuff);
268 			partial = *up;
269 			*up = 0;
270 
271 			cksum = IP_BCSUM_PARTIAL(mp->b_rptr + offset + start,
272 			    end - start, partial);
273 			cksum = ~cksum;
274 			*up = cksum ? cksum : ~cksum;
275 
276 			/*
277 			 * Since we already computed the whole checksum,
278 			 * indicate to the stack that it has already
279 			 * been verified by the hardware.
280 			 */
281 			flags &= ~HCK_PARTIALCKSUM;
282 			flags |= (HCK_FULLCKSUM | HCK_FULLCKSUM_OK);
283 			value = 0xffff;
284 		}
285 
286 		(void) hcksum_assoc(mp, NULL, NULL, start, stuff, end,
287 		    value, flags, KM_NOSLEEP);
288 	}
289 
290 	return (new_chain);
291 }
292 
293 /*
294  * Add VLAN tag to the specified mblk.
295  */
296 mblk_t *
297 mac_add_vlan_tag(mblk_t *mp, uint_t pri, uint16_t vid)
298 {
299 	mblk_t *hmp;
300 	struct ether_vlan_header *evhp;
301 	struct ether_header *ehp;
302 	uint32_t start, stuff, end, value, flags;
303 
304 	ASSERT(pri != 0 || vid != 0);
305 
306 	/*
307 	 * Allocate an mblk for the new tagged ethernet header,
308 	 * and copy the MAC addresses and ethertype from the
309 	 * original header.
310 	 */
311 
312 	hmp = allocb(sizeof (struct ether_vlan_header), BPRI_MED);
313 	if (hmp == NULL) {
314 		freemsg(mp);
315 		return (NULL);
316 	}
317 
318 	evhp = (struct ether_vlan_header *)hmp->b_rptr;
319 	ehp = (struct ether_header *)mp->b_rptr;
320 
321 	bcopy(ehp, evhp, (ETHERADDRL * 2));
322 	evhp->ether_type = ehp->ether_type;
323 	evhp->ether_tpid = htons(ETHERTYPE_VLAN);
324 
325 	hmp->b_wptr += sizeof (struct ether_vlan_header);
326 	mp->b_rptr += sizeof (struct ether_header);
327 
328 	/*
329 	 * Free the original message if it's now empty. Link the
330 	 * rest of messages to the header message.
331 	 */
332 	hcksum_retrieve(mp, NULL, NULL, &start, &stuff, &end, &value, &flags);
333 	(void) hcksum_assoc(hmp, NULL, NULL, start, stuff, end, value, flags,
334 	    KM_NOSLEEP);
335 	if (MBLKL(mp) == 0) {
336 		hmp->b_cont = mp->b_cont;
337 		freeb(mp);
338 	} else {
339 		hmp->b_cont = mp;
340 	}
341 	ASSERT(MBLKL(hmp) >= sizeof (struct ether_vlan_header));
342 
343 	/*
344 	 * Initialize the new TCI (Tag Control Information).
345 	 */
346 	evhp->ether_tci = htons(VLAN_TCI(pri, 0, vid));
347 
348 	return (hmp);
349 }
350 
351 /*
352  * Adds a VLAN tag with the specified VID and priority to each mblk of
353  * the specified chain.
354  */
355 mblk_t *
356 mac_add_vlan_tag_chain(mblk_t *mp_chain, uint_t pri, uint16_t vid)
357 {
358 	mblk_t *next_mp, **prev, *mp;
359 
360 	mp = mp_chain;
361 	prev = &mp_chain;
362 
363 	while (mp != NULL) {
364 		next_mp = mp->b_next;
365 		mp->b_next = NULL;
366 		if ((mp = mac_add_vlan_tag(mp, pri, vid)) == NULL) {
367 			freemsgchain(next_mp);
368 			break;
369 		}
370 		*prev = mp;
371 		prev = &mp->b_next;
372 		mp = mp->b_next = next_mp;
373 	}
374 
375 	return (mp_chain);
376 }
377 
378 /*
379  * Strip VLAN tag
380  */
381 mblk_t *
382 mac_strip_vlan_tag(mblk_t *mp)
383 {
384 	mblk_t *newmp;
385 	struct ether_vlan_header *evhp;
386 
387 	evhp = (struct ether_vlan_header *)mp->b_rptr;
388 	if (ntohs(evhp->ether_tpid) == ETHERTYPE_VLAN) {
389 		ASSERT(MBLKL(mp) >= sizeof (struct ether_vlan_header));
390 
391 		if (DB_REF(mp) > 1) {
392 			newmp = copymsg(mp);
393 			if (newmp == NULL)
394 				return (NULL);
395 			freemsg(mp);
396 			mp = newmp;
397 		}
398 
399 		evhp = (struct ether_vlan_header *)mp->b_rptr;
400 
401 		ovbcopy(mp->b_rptr, mp->b_rptr + VLAN_TAGSZ, 2 * ETHERADDRL);
402 		mp->b_rptr += VLAN_TAGSZ;
403 	}
404 	return (mp);
405 }
406 
407 /*
408  * Strip VLAN tag from each mblk of the chain.
409  */
410 mblk_t *
411 mac_strip_vlan_tag_chain(mblk_t *mp_chain)
412 {
413 	mblk_t *mp, *next_mp, **prev;
414 
415 	mp = mp_chain;
416 	prev = &mp_chain;
417 
418 	while (mp != NULL) {
419 		next_mp = mp->b_next;
420 		mp->b_next = NULL;
421 		if ((mp = mac_strip_vlan_tag(mp)) == NULL) {
422 			freemsgchain(next_mp);
423 			break;
424 		}
425 		*prev = mp;
426 		prev = &mp->b_next;
427 		mp = mp->b_next = next_mp;
428 	}
429 
430 	return (mp_chain);
431 }
432 
433 /*
434  * Default callback function. Used when the datapath is not yet initialized.
435  */
436 /* ARGSUSED */
437 void
438 mac_pkt_drop(void *arg, mac_resource_handle_t resource, mblk_t *mp,
439     boolean_t loopback)
440 {
441 	mblk_t	*mp1 = mp;
442 
443 	while (mp1 != NULL) {
444 		mp1->b_prev = NULL;
445 		mp1->b_queue = NULL;
446 		mp1 = mp1->b_next;
447 	}
448 	freemsgchain(mp);
449 }
450 
451 /*
452  * Determines the IPv6 header length accounting for all the optional IPv6
453  * headers (hop-by-hop, destination, routing and fragment). The header length
454  * and next header value (a transport header) is captured.
455  *
456  * Returns B_FALSE if all the IP headers are not in the same mblk otherwise
457  * returns B_TRUE.
458  */
459 boolean_t
460 mac_ip_hdr_length_v6(mblk_t *mp, ip6_t *ip6h, uint16_t *hdr_length,
461     uint8_t *next_hdr)
462 {
463 	uint16_t length;
464 	uint_t	ehdrlen;
465 	uint8_t *whereptr;
466 	uint8_t *endptr;
467 	uint8_t *nexthdrp;
468 	ip6_dest_t *desthdr;
469 	ip6_rthdr_t *rthdr;
470 	ip6_frag_t *fraghdr;
471 
472 	endptr = mp->b_wptr;
473 	if (((uchar_t *)ip6h + IPV6_HDR_LEN) > endptr)
474 		return (B_FALSE);
475 	ASSERT((IPH_HDR_VERSION(ip6h) & ~IP_FORWARD_PROG_BIT) == IPV6_VERSION);
476 	length = IPV6_HDR_LEN;
477 	whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */
478 
479 	nexthdrp = &ip6h->ip6_nxt;
480 	while (whereptr < endptr) {
481 		/* Is there enough left for len + nexthdr? */
482 		if (whereptr + MIN_EHDR_LEN > endptr)
483 			break;
484 
485 		switch (*nexthdrp) {
486 		case IPPROTO_HOPOPTS:
487 		case IPPROTO_DSTOPTS:
488 			/* Assumes the headers are identical for hbh and dst */
489 			desthdr = (ip6_dest_t *)whereptr;
490 			ehdrlen = 8 * (desthdr->ip6d_len + 1);
491 			if ((uchar_t *)desthdr +  ehdrlen > endptr)
492 				return (B_FALSE);
493 			nexthdrp = &desthdr->ip6d_nxt;
494 			break;
495 		case IPPROTO_ROUTING:
496 			rthdr = (ip6_rthdr_t *)whereptr;
497 			ehdrlen =  8 * (rthdr->ip6r_len + 1);
498 			if ((uchar_t *)rthdr +  ehdrlen > endptr)
499 				return (B_FALSE);
500 			nexthdrp = &rthdr->ip6r_nxt;
501 			break;
502 		case IPPROTO_FRAGMENT:
503 			fraghdr = (ip6_frag_t *)whereptr;
504 			ehdrlen = sizeof (ip6_frag_t);
505 			if ((uchar_t *)&fraghdr[1] > endptr)
506 				return (B_FALSE);
507 			nexthdrp = &fraghdr->ip6f_nxt;
508 			break;
509 		case IPPROTO_NONE:
510 			/* No next header means we're finished */
511 		default:
512 			*hdr_length = length;
513 			*next_hdr = *nexthdrp;
514 			return (B_TRUE);
515 		}
516 		length += ehdrlen;
517 		whereptr += ehdrlen;
518 		*hdr_length = length;
519 		*next_hdr = *nexthdrp;
520 	}
521 	switch (*nexthdrp) {
522 	case IPPROTO_HOPOPTS:
523 	case IPPROTO_DSTOPTS:
524 	case IPPROTO_ROUTING:
525 	case IPPROTO_FRAGMENT:
526 		/*
527 		 * If any know extension headers are still to be processed,
528 		 * the packet's malformed (or at least all the IP header(s) are
529 		 * not in the same mblk - and that should never happen.
530 		 */
531 		return (B_FALSE);
532 
533 	default:
534 		/*
535 		 * If we get here, we know that all of the IP headers were in
536 		 * the same mblk, even if the ULP header is in the next mblk.
537 		 */
538 		*hdr_length = length;
539 		*next_hdr = *nexthdrp;
540 		return (B_TRUE);
541 	}
542 }
543 
544 typedef struct mac_dladm_intr {
545 	int	ino;
546 	int	cpu_id;
547 	char	driver_path[MAXPATHLEN];
548 	char	nexus_path[MAXPATHLEN];
549 } mac_dladm_intr_t;
550 
551 /* Bind the interrupt to cpu_num */
552 static int
553 mac_set_intr(ldi_handle_t lh, processorid_t cpu_num, int ino)
554 {
555 	pcitool_intr_set_t	iset;
556 	int			err;
557 
558 	iset.ino = ino;
559 	iset.cpu_id = cpu_num;
560 	iset.user_version = PCITOOL_VERSION;
561 	err = ldi_ioctl(lh, PCITOOL_DEVICE_SET_INTR, (intptr_t)&iset, FKIOCTL,
562 	    kcred, NULL);
563 
564 	return (err);
565 }
566 
567 /*
568  * Search interrupt information. iget is filled in with the info to search
569  */
570 static boolean_t
571 mac_search_intrinfo(pcitool_intr_get_t *iget_p, mac_dladm_intr_t *dln)
572 {
573 	int	i;
574 	char	driver_path[2 * MAXPATHLEN];
575 
576 	for (i = 0; i < iget_p->num_devs; i++) {
577 		(void) strlcpy(driver_path, iget_p->dev[i].path, MAXPATHLEN);
578 		(void) snprintf(&driver_path[strlen(driver_path)], MAXPATHLEN,
579 		    ":%s%d", iget_p->dev[i].driver_name,
580 		    iget_p->dev[i].dev_inst);
581 		/* Match the device path for the device path */
582 		if (strcmp(driver_path, dln->driver_path) == 0) {
583 			dln->ino = iget_p->ino;
584 			dln->cpu_id = iget_p->cpu_id;
585 			return (B_TRUE);
586 		}
587 	}
588 	return (B_FALSE);
589 }
590 
591 /*
592  * Get information about ino, i.e. if this is the interrupt for our
593  * device and where it is bound etc.
594  */
595 static boolean_t
596 mac_get_single_intr(ldi_handle_t lh, int ino, mac_dladm_intr_t *dln)
597 {
598 	pcitool_intr_get_t	*iget_p;
599 	int			ipsz;
600 	int			nipsz;
601 	int			err;
602 	uint8_t			inum;
603 
604 	/*
605 	 * Check if SLEEP is OK, i.e if could come here in response to
606 	 * changing the fanout due to some callback from the driver, say
607 	 * link speed changes.
608 	 */
609 	ipsz = PCITOOL_IGET_SIZE(0);
610 	iget_p = kmem_zalloc(ipsz, KM_SLEEP);
611 
612 	iget_p->num_devs_ret = 0;
613 	iget_p->user_version = PCITOOL_VERSION;
614 	iget_p->ino = ino;
615 
616 	err = ldi_ioctl(lh, PCITOOL_DEVICE_GET_INTR, (intptr_t)iget_p,
617 	    FKIOCTL, kcred, NULL);
618 	if (err != 0) {
619 		kmem_free(iget_p, ipsz);
620 		return (B_FALSE);
621 	}
622 	if (iget_p->num_devs == 0) {
623 		kmem_free(iget_p, ipsz);
624 		return (B_FALSE);
625 	}
626 	inum = iget_p->num_devs;
627 	if (iget_p->num_devs_ret < iget_p->num_devs) {
628 		/* Reallocate */
629 		nipsz = PCITOOL_IGET_SIZE(iget_p->num_devs);
630 
631 		kmem_free(iget_p, ipsz);
632 		ipsz = nipsz;
633 		iget_p = kmem_zalloc(ipsz, KM_SLEEP);
634 
635 		iget_p->num_devs_ret = inum;
636 		iget_p->ino = ino;
637 		iget_p->user_version = PCITOOL_VERSION;
638 		err = ldi_ioctl(lh, PCITOOL_DEVICE_GET_INTR, (intptr_t)iget_p,
639 		    FKIOCTL, kcred, NULL);
640 		if (err != 0) {
641 			kmem_free(iget_p, ipsz);
642 			return (B_FALSE);
643 		}
644 		/* defensive */
645 		if (iget_p->num_devs != iget_p->num_devs_ret) {
646 			kmem_free(iget_p, ipsz);
647 			return (B_FALSE);
648 		}
649 	}
650 
651 	if (mac_search_intrinfo(iget_p, dln)) {
652 		kmem_free(iget_p, ipsz);
653 		return (B_TRUE);
654 	}
655 	kmem_free(iget_p, ipsz);
656 	return (B_FALSE);
657 }
658 
659 /*
660  * Get the interrupts and check each one to see if it is for our device.
661  */
662 static int
663 mac_validate_intr(ldi_handle_t lh, mac_dladm_intr_t *dln, processorid_t cpuid)
664 {
665 	pcitool_intr_info_t	intr_info;
666 	int			err;
667 	int			ino;
668 
669 	err = ldi_ioctl(lh, PCITOOL_SYSTEM_INTR_INFO, (intptr_t)&intr_info,
670 	    FKIOCTL, kcred, NULL);
671 	if (err != 0)
672 		return (-1);
673 
674 	for (ino = 0; ino < intr_info.num_intr; ino++) {
675 		if (mac_get_single_intr(lh, ino, dln)) {
676 			if (dln->cpu_id == cpuid)
677 				return (0);
678 			return (1);
679 		}
680 	}
681 	return (-1);
682 }
683 
684 /*
685  * Obtain the nexus parent node info. for mdip.
686  */
687 static dev_info_t *
688 mac_get_nexus_node(dev_info_t *mdip, mac_dladm_intr_t *dln)
689 {
690 	struct dev_info		*tdip = (struct dev_info *)mdip;
691 	struct ddi_minor_data	*minordata;
692 	int			circ;
693 	dev_info_t		*pdip;
694 	char			pathname[MAXPATHLEN];
695 
696 	while (tdip != NULL) {
697 		ndi_devi_enter((dev_info_t *)tdip, &circ);
698 		for (minordata = tdip->devi_minor; minordata != NULL;
699 		    minordata = minordata->next) {
700 			if (strncmp(minordata->ddm_node_type, DDI_NT_INTRCTL,
701 			    strlen(DDI_NT_INTRCTL)) == 0) {
702 				pdip = minordata->dip;
703 				(void) ddi_pathname(pdip, pathname);
704 				(void) snprintf(dln->nexus_path, MAXPATHLEN,
705 				    "/devices%s:intr", pathname);
706 				(void) ddi_pathname_minor(minordata, pathname);
707 				ndi_devi_exit((dev_info_t *)tdip, circ);
708 				return (pdip);
709 			}
710 		}
711 		ndi_devi_exit((dev_info_t *)tdip, circ);
712 		tdip = tdip->devi_parent;
713 	}
714 	return (NULL);
715 }
716 
717 /*
718  * For a primary MAC client, if the user has set a list or CPUs or
719  * we have obtained it implicitly, we try to retarget the interrupt
720  * for that device on one of the CPUs in the list.
721  * We assign the interrupt to the same CPU as the poll thread.
722  */
723 static boolean_t
724 mac_check_interrupt_binding(dev_info_t *mdip, int32_t cpuid)
725 {
726 	ldi_handle_t		lh = NULL;
727 	ldi_ident_t		li = NULL;
728 	int			err;
729 	int			ret;
730 	mac_dladm_intr_t	dln;
731 	dev_info_t		*dip;
732 	struct ddi_minor_data	*minordata;
733 
734 	dln.nexus_path[0] = '\0';
735 	dln.driver_path[0] = '\0';
736 
737 	minordata = ((struct dev_info *)mdip)->devi_minor;
738 	while (minordata != NULL) {
739 		if (minordata->type == DDM_MINOR)
740 			break;
741 		minordata = minordata->next;
742 	}
743 	if (minordata == NULL)
744 		return (B_FALSE);
745 
746 	(void) ddi_pathname_minor(minordata, dln.driver_path);
747 
748 	dip = mac_get_nexus_node(mdip, &dln);
749 	/* defensive */
750 	if (dip == NULL)
751 		return (B_FALSE);
752 
753 	err = ldi_ident_from_major(ddi_driver_major(dip), &li);
754 	if (err != 0)
755 		return (B_FALSE);
756 
757 	err = ldi_open_by_name(dln.nexus_path, FREAD|FWRITE, kcred, &lh, li);
758 	if (err != 0)
759 		return (B_FALSE);
760 
761 	ret = mac_validate_intr(lh, &dln, cpuid);
762 	if (ret < 0) {
763 		(void) ldi_close(lh, FREAD|FWRITE, kcred);
764 		return (B_FALSE);
765 	}
766 	/* cmn_note? */
767 	if (ret != 0)
768 		if ((err = (mac_set_intr(lh, cpuid, dln.ino))) != 0) {
769 			(void) ldi_close(lh, FREAD|FWRITE, kcred);
770 			return (B_FALSE);
771 		}
772 	(void) ldi_close(lh, FREAD|FWRITE, kcred);
773 	return (B_TRUE);
774 }
775 
776 void
777 mac_client_set_intr_cpu(void *arg, mac_client_handle_t mch, int32_t cpuid)
778 {
779 	dev_info_t		*mdip = (dev_info_t *)arg;
780 	mac_client_impl_t	*mcip = (mac_client_impl_t *)mch;
781 	mac_resource_props_t	*mrp;
782 	mac_perim_handle_t	mph;
783 
784 	if (cpuid == -1 || !mac_check_interrupt_binding(mdip, cpuid))
785 		return;
786 
787 	mac_perim_enter_by_mh((mac_handle_t)mcip->mci_mip, &mph);
788 	mrp = MCIP_RESOURCE_PROPS(mcip);
789 	mrp->mrp_intr_cpu = cpuid;
790 	mac_perim_exit(mph);
791 }
792 
793 int32_t
794 mac_client_intr_cpu(mac_client_handle_t mch)
795 {
796 	mac_client_impl_t	*mcip = (mac_client_impl_t *)mch;
797 	mac_cpus_t		*srs_cpu;
798 	mac_soft_ring_set_t	*rx_srs;
799 	flow_entry_t		*flent = mcip->mci_flent;
800 	mac_resource_props_t	*mrp = MCIP_RESOURCE_PROPS(mcip);
801 
802 	/*
803 	 * Check if we need to retarget the interrupt. We do this only
804 	 * for the primary MAC client. We do this if we have the only
805 	 *  exclusive ring in the group.
806 	 */
807 	if (mac_is_primary_client(mcip) && flent->fe_rx_srs_cnt == 2) {
808 		rx_srs = flent->fe_rx_srs[1];
809 		srs_cpu = &rx_srs->srs_cpu;
810 		if (mrp->mrp_intr_cpu == srs_cpu->mc_pollid)
811 			return (-1);
812 		return (srs_cpu->mc_pollid);
813 	}
814 	return (-1);
815 }
816 
817 void *
818 mac_get_devinfo(mac_handle_t mh)
819 {
820 	mac_impl_t	*mip = (mac_impl_t *)mh;
821 
822 	return ((void *)mip->mi_dip);
823 }
824