xref: /illumos-gate/usr/src/uts/common/inet/ip/ip6.c (revision bd670b35)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 /*
26  * Copyright (c) 1990 Mentat Inc.
27  */
28 
29 #include <sys/types.h>
30 #include <sys/stream.h>
31 #include <sys/dlpi.h>
32 #include <sys/stropts.h>
33 #include <sys/sysmacros.h>
34 #include <sys/strsun.h>
35 #include <sys/strlog.h>
36 #include <sys/strsubr.h>
37 #define	_SUN_TPI_VERSION	2
38 #include <sys/tihdr.h>
39 #include <sys/ddi.h>
40 #include <sys/sunddi.h>
41 #include <sys/cmn_err.h>
42 #include <sys/debug.h>
43 #include <sys/sdt.h>
44 #include <sys/kobj.h>
45 #include <sys/zone.h>
46 #include <sys/neti.h>
47 #include <sys/hook.h>
48 
49 #include <sys/kmem.h>
50 #include <sys/systm.h>
51 #include <sys/param.h>
52 #include <sys/socket.h>
53 #include <sys/vtrace.h>
54 #include <sys/isa_defs.h>
55 #include <sys/atomic.h>
56 #include <sys/policy.h>
57 #include <sys/mac.h>
58 #include <net/if.h>
59 #include <net/if_types.h>
60 #include <net/route.h>
61 #include <net/if_dl.h>
62 #include <sys/sockio.h>
63 #include <netinet/in.h>
64 #include <netinet/ip6.h>
65 #include <netinet/icmp6.h>
66 #include <netinet/sctp.h>
67 
68 #include <inet/common.h>
69 #include <inet/mi.h>
70 #include <inet/optcom.h>
71 #include <inet/mib2.h>
72 #include <inet/nd.h>
73 #include <inet/arp.h>
74 
75 #include <inet/ip.h>
76 #include <inet/ip_impl.h>
77 #include <inet/ip6.h>
78 #include <inet/ip6_asp.h>
79 #include <inet/tcp.h>
80 #include <inet/tcp_impl.h>
81 #include <inet/udp_impl.h>
82 #include <inet/ipp_common.h>
83 
84 #include <inet/ip_multi.h>
85 #include <inet/ip_if.h>
86 #include <inet/ip_ire.h>
87 #include <inet/ip_rts.h>
88 #include <inet/ip_ndp.h>
89 #include <net/pfkeyv2.h>
90 #include <inet/sadb.h>
91 #include <inet/ipsec_impl.h>
92 #include <inet/iptun/iptun_impl.h>
93 #include <inet/sctp_ip.h>
94 #include <sys/pattr.h>
95 #include <inet/ipclassifier.h>
96 #include <inet/ipsecah.h>
97 #include <inet/rawip_impl.h>
98 #include <inet/rts_impl.h>
99 #include <sys/squeue_impl.h>
100 #include <sys/squeue.h>
101 
102 #include <sys/tsol/label.h>
103 #include <sys/tsol/tnet.h>
104 
105 #include <rpc/pmap_prot.h>
106 
107 /* Temporary; for CR 6451644 work-around */
108 #include <sys/ethernet.h>
109 
110 /*
111  * Naming conventions:
112  *      These rules should be judiciously applied
113  *	if there is a need to identify something as IPv6 versus IPv4
114  *	IPv6 funcions will end with _v6 in the ip module.
115  *	IPv6 funcions will end with _ipv6 in the transport modules.
116  *	IPv6 macros:
117  *		Some macros end with _V6; e.g. ILL_FRAG_HASH_V6
118  *		Some macros start with V6_; e.g. V6_OR_V4_INADDR_ANY
119  *		And then there are ..V4_PART_OF_V6.
120  *		The intent is that macros in the ip module end with _V6.
121  *	IPv6 global variables will start with ipv6_
122  *	IPv6 structures will start with ipv6
123  *	IPv6 defined constants should start with IPV6_
124  *		(but then there are NDP_DEFAULT_VERS_PRI_AND_FLOW, etc)
125  */
126 
127 /*
128  * ip6opt_ls is used to enable IPv6 (via /etc/system on TX systems).
129  * We need to do this because we didn't obtain the IP6OPT_LS (0x0a)
130  * from IANA. This mechanism will remain in effect until an official
131  * number is obtained.
132  */
133 uchar_t ip6opt_ls;
134 
135 const in6_addr_t ipv6_all_ones =
136 	{ 0xffffffffU, 0xffffffffU, 0xffffffffU, 0xffffffffU };
137 const in6_addr_t ipv6_all_zeros = { 0, 0, 0, 0 };
138 
139 #ifdef	_BIG_ENDIAN
140 const in6_addr_t ipv6_unspecified_group = { 0xff000000U, 0, 0, 0 };
141 #else	/* _BIG_ENDIAN */
142 const in6_addr_t ipv6_unspecified_group = { 0x000000ffU, 0, 0, 0 };
143 #endif	/* _BIG_ENDIAN */
144 
145 #ifdef	_BIG_ENDIAN
146 const in6_addr_t ipv6_loopback = { 0, 0, 0, 0x00000001U };
147 #else  /* _BIG_ENDIAN */
148 const in6_addr_t ipv6_loopback = { 0, 0, 0, 0x01000000U };
149 #endif /* _BIG_ENDIAN */
150 
151 #ifdef _BIG_ENDIAN
152 const in6_addr_t ipv6_all_hosts_mcast = { 0xff020000U, 0, 0, 0x00000001U };
153 #else  /* _BIG_ENDIAN */
154 const in6_addr_t ipv6_all_hosts_mcast = { 0x000002ffU, 0, 0, 0x01000000U };
155 #endif /* _BIG_ENDIAN */
156 
157 #ifdef _BIG_ENDIAN
158 const in6_addr_t ipv6_all_rtrs_mcast = { 0xff020000U, 0, 0, 0x00000002U };
159 #else  /* _BIG_ENDIAN */
160 const in6_addr_t ipv6_all_rtrs_mcast = { 0x000002ffU, 0, 0, 0x02000000U };
161 #endif /* _BIG_ENDIAN */
162 
163 #ifdef _BIG_ENDIAN
164 const in6_addr_t ipv6_all_v2rtrs_mcast = { 0xff020000U, 0, 0, 0x00000016U };
165 #else  /* _BIG_ENDIAN */
166 const in6_addr_t ipv6_all_v2rtrs_mcast = { 0x000002ffU, 0, 0, 0x16000000U };
167 #endif /* _BIG_ENDIAN */
168 
169 #ifdef _BIG_ENDIAN
170 const in6_addr_t ipv6_solicited_node_mcast =
171 			{ 0xff020000U, 0, 0x00000001U, 0xff000000U };
172 #else  /* _BIG_ENDIAN */
173 const in6_addr_t ipv6_solicited_node_mcast =
174 			{ 0x000002ffU, 0, 0x01000000U, 0x000000ffU };
175 #endif /* _BIG_ENDIAN */
176 
177 static boolean_t icmp_inbound_verify_v6(mblk_t *, icmp6_t *, ip_recv_attr_t *);
178 static void	icmp_inbound_too_big_v6(icmp6_t *, ip_recv_attr_t *);
179 static void	icmp_pkt_v6(mblk_t *, void *, size_t, const in6_addr_t *,
180     ip_recv_attr_t *);
181 static void	icmp_redirect_v6(mblk_t *, ip6_t *, nd_redirect_t *,
182     ip_recv_attr_t *);
183 static void	icmp_send_redirect_v6(mblk_t *, in6_addr_t *,
184     in6_addr_t *, ip_recv_attr_t *);
185 static void	icmp_send_reply_v6(mblk_t *, ip6_t *, icmp6_t *,
186     ip_recv_attr_t *);
187 static boolean_t	ip_source_routed_v6(ip6_t *, mblk_t *, ip_stack_t *);
188 
189 /*
190  * icmp_inbound_v6 deals with ICMP messages that are handled by IP.
191  * If the ICMP message is consumed by IP, i.e., it should not be delivered
192  * to any IPPROTO_ICMP raw sockets, then it returns NULL.
193  * Likewise, if the ICMP error is misformed (too short, etc), then it
194  * returns NULL. The caller uses this to determine whether or not to send
195  * to raw sockets.
196  *
197  * All error messages are passed to the matching transport stream.
198  *
199  * See comment for icmp_inbound_v4() on how IPsec is handled.
200  */
201 mblk_t *
202 icmp_inbound_v6(mblk_t *mp, ip_recv_attr_t *ira)
203 {
204 	icmp6_t		*icmp6;
205 	ip6_t		*ip6h;		/* Outer header */
206 	int		ip_hdr_length;	/* Outer header length */
207 	boolean_t	interested;
208 	ill_t		*ill = ira->ira_ill;
209 	ip_stack_t	*ipst = ill->ill_ipst;
210 	mblk_t		*mp_ret = NULL;
211 
212 	ip6h = (ip6_t *)mp->b_rptr;
213 
214 	BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInMsgs);
215 
216 	/* Make sure ira_l2src is set for ndp_input */
217 	if (!(ira->ira_flags & IRAF_L2SRC_SET))
218 		ip_setl2src(mp, ira, ira->ira_rill);
219 
220 	ip_hdr_length = ira->ira_ip_hdr_length;
221 	if ((mp->b_wptr - mp->b_rptr) < (ip_hdr_length + ICMP6_MINLEN)) {
222 		if (ira->ira_pktlen < (ip_hdr_length + ICMP6_MINLEN)) {
223 			BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTruncatedPkts);
224 			ip_drop_input("ipIfStatsInTruncatedPkts", mp, ill);
225 			freemsg(mp);
226 			return (NULL);
227 		}
228 		ip6h = ip_pullup(mp, ip_hdr_length + ICMP6_MINLEN, ira);
229 		if (ip6h == NULL) {
230 			BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors);
231 			freemsg(mp);
232 			return (NULL);
233 		}
234 	}
235 
236 	icmp6 = (icmp6_t *)(&mp->b_rptr[ip_hdr_length]);
237 	DTRACE_PROBE2(icmp__inbound__v6, ip6_t *, ip6h, icmp6_t *, icmp6);
238 	ip2dbg(("icmp_inbound_v6: type %d code %d\n", icmp6->icmp6_type,
239 	    icmp6->icmp6_code));
240 
241 	/*
242 	 * We will set "interested" to "true" if we should pass a copy to
243 	 * the transport i.e., if it is an error message.
244 	 */
245 	interested = !(icmp6->icmp6_type & ICMP6_INFOMSG_MASK);
246 
247 	switch (icmp6->icmp6_type) {
248 	case ICMP6_DST_UNREACH:
249 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInDestUnreachs);
250 		if (icmp6->icmp6_code == ICMP6_DST_UNREACH_ADMIN)
251 			BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInAdminProhibs);
252 		break;
253 
254 	case ICMP6_TIME_EXCEEDED:
255 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInTimeExcds);
256 		break;
257 
258 	case ICMP6_PARAM_PROB:
259 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInParmProblems);
260 		break;
261 
262 	case ICMP6_PACKET_TOO_BIG:
263 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInPktTooBigs);
264 		break;
265 
266 	case ICMP6_ECHO_REQUEST:
267 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInEchos);
268 		if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) &&
269 		    !ipst->ips_ipv6_resp_echo_mcast)
270 			break;
271 
272 		/*
273 		 * We must have exclusive use of the mblk to convert it to
274 		 * a response.
275 		 * If not, we copy it.
276 		 */
277 		if (mp->b_datap->db_ref > 1) {
278 			mblk_t	*mp1;
279 
280 			mp1 = copymsg(mp);
281 			if (mp1 == NULL) {
282 				BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
283 				ip_drop_input("ipIfStatsInDiscards - copymsg",
284 				    mp, ill);
285 				freemsg(mp);
286 				return (NULL);
287 			}
288 			freemsg(mp);
289 			mp = mp1;
290 			ip6h = (ip6_t *)mp->b_rptr;
291 			icmp6 = (icmp6_t *)(&mp->b_rptr[ip_hdr_length]);
292 		}
293 
294 		icmp6->icmp6_type = ICMP6_ECHO_REPLY;
295 		icmp_send_reply_v6(mp, ip6h, icmp6, ira);
296 		return (NULL);
297 
298 	case ICMP6_ECHO_REPLY:
299 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInEchoReplies);
300 		break;
301 
302 	case ND_ROUTER_SOLICIT:
303 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRouterSolicits);
304 		break;
305 
306 	case ND_ROUTER_ADVERT:
307 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRouterAdvertisements);
308 		break;
309 
310 	case ND_NEIGHBOR_SOLICIT:
311 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInNeighborSolicits);
312 		ndp_input(mp, ira);
313 		return (NULL);
314 
315 	case ND_NEIGHBOR_ADVERT:
316 		BUMP_MIB(ill->ill_icmp6_mib,
317 		    ipv6IfIcmpInNeighborAdvertisements);
318 		ndp_input(mp, ira);
319 		return (NULL);
320 
321 	case ND_REDIRECT:
322 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRedirects);
323 
324 		if (ipst->ips_ipv6_ignore_redirect)
325 			break;
326 
327 		/* We now allow a RAW socket to receive this. */
328 		interested = B_TRUE;
329 		break;
330 
331 	/*
332 	 * The next three icmp messages will be handled by MLD.
333 	 * Pass all valid MLD packets up to any process(es)
334 	 * listening on a raw ICMP socket.
335 	 */
336 	case MLD_LISTENER_QUERY:
337 	case MLD_LISTENER_REPORT:
338 	case MLD_LISTENER_REDUCTION:
339 		mp = mld_input(mp, ira);
340 		return (mp);
341 	default:
342 		break;
343 	}
344 	/*
345 	 * See if there is an ICMP client to avoid an extra copymsg/freemsg
346 	 * if there isn't one.
347 	 */
348 	if (ipst->ips_ipcl_proto_fanout_v6[IPPROTO_ICMPV6].connf_head != NULL) {
349 		/* If there is an ICMP client and we want one too, copy it. */
350 
351 		if (!interested) {
352 			/* Caller will deliver to RAW sockets */
353 			return (mp);
354 		}
355 		mp_ret = copymsg(mp);
356 		if (mp_ret == NULL) {
357 			BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
358 			ip_drop_input("ipIfStatsInDiscards - copymsg", mp, ill);
359 		}
360 	} else if (!interested) {
361 		/* Neither we nor raw sockets are interested. Drop packet now */
362 		freemsg(mp);
363 		return (NULL);
364 	}
365 
366 	/*
367 	 * ICMP error or redirect packet. Make sure we have enough of
368 	 * the header and that db_ref == 1 since we might end up modifying
369 	 * the packet.
370 	 */
371 	if (mp->b_cont != NULL) {
372 		if (ip_pullup(mp, -1, ira) == NULL) {
373 			BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
374 			ip_drop_input("ipIfStatsInDiscards - ip_pullup",
375 			    mp, ill);
376 			freemsg(mp);
377 			return (mp_ret);
378 		}
379 	}
380 
381 	if (mp->b_datap->db_ref > 1) {
382 		mblk_t	*mp1;
383 
384 		mp1 = copymsg(mp);
385 		if (mp1 == NULL) {
386 			BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
387 			ip_drop_input("ipIfStatsInDiscards - copymsg", mp, ill);
388 			freemsg(mp);
389 			return (mp_ret);
390 		}
391 		freemsg(mp);
392 		mp = mp1;
393 	}
394 
395 	/*
396 	 * In case mp has changed, verify the message before any further
397 	 * processes.
398 	 */
399 	ip6h = (ip6_t *)mp->b_rptr;
400 	icmp6 = (icmp6_t *)(&mp->b_rptr[ip_hdr_length]);
401 	if (!icmp_inbound_verify_v6(mp, icmp6, ira)) {
402 		freemsg(mp);
403 		return (mp_ret);
404 	}
405 
406 	switch (icmp6->icmp6_type) {
407 	case ND_REDIRECT:
408 		icmp_redirect_v6(mp, ip6h, (nd_redirect_t *)icmp6, ira);
409 		break;
410 	case ICMP6_PACKET_TOO_BIG:
411 		/* Update DCE and adjust MTU is icmp header if needed */
412 		icmp_inbound_too_big_v6(icmp6, ira);
413 		/* FALLTHRU */
414 	default:
415 		icmp_inbound_error_fanout_v6(mp, icmp6, ira);
416 		break;
417 	}
418 
419 	return (mp_ret);
420 }
421 
422 /*
423  * Send an ICMP echo reply.
424  * The caller has already updated the payload part of the packet.
425  * We handle the ICMP checksum, IP source address selection and feed
426  * the packet into ip_output_simple.
427  */
428 static void
429 icmp_send_reply_v6(mblk_t *mp, ip6_t *ip6h, icmp6_t *icmp6,
430     ip_recv_attr_t *ira)
431 {
432 	uint_t		ip_hdr_length = ira->ira_ip_hdr_length;
433 	ill_t		*ill = ira->ira_ill;
434 	ip_stack_t	*ipst = ill->ill_ipst;
435 	ip_xmit_attr_t	ixas;
436 	in6_addr_t	origsrc;
437 
438 	/*
439 	 * Remove any extension headers (do not reverse a source route)
440 	 * and clear the flow id (keep traffic class for now).
441 	 */
442 	if (ip_hdr_length != IPV6_HDR_LEN) {
443 		int	i;
444 
445 		for (i = 0; i < IPV6_HDR_LEN; i++) {
446 			mp->b_rptr[ip_hdr_length - i - 1] =
447 			    mp->b_rptr[IPV6_HDR_LEN - i - 1];
448 		}
449 		mp->b_rptr += (ip_hdr_length - IPV6_HDR_LEN);
450 		ip6h = (ip6_t *)mp->b_rptr;
451 		ip6h->ip6_nxt = IPPROTO_ICMPV6;
452 		i = ntohs(ip6h->ip6_plen);
453 		i -= (ip_hdr_length - IPV6_HDR_LEN);
454 		ip6h->ip6_plen = htons(i);
455 		ip_hdr_length = IPV6_HDR_LEN;
456 		ASSERT(ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN == msgdsize(mp));
457 	}
458 	ip6h->ip6_vcf &= ~IPV6_FLOWINFO_FLOWLABEL;
459 
460 	/* Reverse the source and destination addresses. */
461 	origsrc = ip6h->ip6_src;
462 	ip6h->ip6_src = ip6h->ip6_dst;
463 	ip6h->ip6_dst = origsrc;
464 
465 	/* set the hop limit */
466 	ip6h->ip6_hops = ipst->ips_ipv6_def_hops;
467 
468 	/*
469 	 * Prepare for checksum by putting icmp length in the icmp
470 	 * checksum field. The checksum is calculated in ip_output
471 	 */
472 	icmp6->icmp6_cksum = ip6h->ip6_plen;
473 
474 	bzero(&ixas, sizeof (ixas));
475 	ixas.ixa_flags = IXAF_BASIC_SIMPLE_V6;
476 	ixas.ixa_zoneid = ira->ira_zoneid;
477 	ixas.ixa_cred = kcred;
478 	ixas.ixa_cpid = NOPID;
479 	ixas.ixa_tsl = ira->ira_tsl;	/* Behave as a multi-level responder */
480 	ixas.ixa_ifindex = 0;
481 	ixas.ixa_ipst = ipst;
482 	ixas.ixa_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
483 
484 	if (!(ira->ira_flags & IRAF_IPSEC_SECURE)) {
485 		/*
486 		 * This packet should go out the same way as it
487 		 * came in i.e in clear, independent of the IPsec
488 		 * policy for transmitting packets.
489 		 */
490 		ixas.ixa_flags |= IXAF_NO_IPSEC;
491 	} else {
492 		if (!ipsec_in_to_out(ira, &ixas, mp, NULL, ip6h)) {
493 			BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
494 			/* Note: mp already consumed and ip_drop_packet done */
495 			return;
496 		}
497 	}
498 
499 	/* Was the destination (now source) link-local? Send out same group */
500 	if (IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src)) {
501 		ixas.ixa_flags |= IXAF_SCOPEID_SET;
502 		if (IS_UNDER_IPMP(ill))
503 			ixas.ixa_scopeid = ill_get_upper_ifindex(ill);
504 		else
505 			ixas.ixa_scopeid = ill->ill_phyint->phyint_ifindex;
506 	}
507 
508 	if (ira->ira_flags & IRAF_MULTIBROADCAST) {
509 		/*
510 		 * Not one or our addresses (IRE_LOCALs), thus we let
511 		 * ip_output_simple pick the source.
512 		 */
513 		ip6h->ip6_src = ipv6_all_zeros;
514 		ixas.ixa_flags |= IXAF_SET_SOURCE;
515 	}
516 
517 	/* Should we send using dce_pmtu? */
518 	if (ipst->ips_ipv6_icmp_return_pmtu)
519 		ixas.ixa_flags |= IXAF_PMTU_DISCOVERY;
520 
521 	(void) ip_output_simple(mp, &ixas);
522 	ixa_cleanup(&ixas);
523 
524 }
525 
526 /*
527  * Verify the ICMP messages for either for ICMP error or redirect packet.
528  * The caller should have fully pulled up the message. If it's a redirect
529  * packet, only basic checks on IP header will be done; otherwise, verify
530  * the packet by looking at the included ULP header.
531  *
532  * Called before icmp_inbound_error_fanout_v6 is called.
533  */
534 static boolean_t
535 icmp_inbound_verify_v6(mblk_t *mp, icmp6_t *icmp6, ip_recv_attr_t *ira)
536 {
537 	ill_t		*ill = ira->ira_ill;
538 	uint16_t	hdr_length;
539 	uint8_t		*nexthdrp;
540 	uint8_t		nexthdr;
541 	ip_stack_t	*ipst = ill->ill_ipst;
542 	conn_t		*connp;
543 	ip6_t		*ip6h;	/* Inner header */
544 
545 	ip6h = (ip6_t *)&icmp6[1];
546 	if ((uchar_t *)ip6h + IPV6_HDR_LEN > mp->b_wptr)
547 		goto truncated;
548 
549 	if (icmp6->icmp6_type == ND_REDIRECT) {
550 		hdr_length = sizeof (nd_redirect_t);
551 	} else {
552 		if ((IPH_HDR_VERSION(ip6h) != IPV6_VERSION))
553 			goto discard_pkt;
554 		hdr_length = IPV6_HDR_LEN;
555 	}
556 
557 	if ((uchar_t *)ip6h + hdr_length > mp->b_wptr)
558 		goto truncated;
559 
560 	/*
561 	 * Stop here for ICMP_REDIRECT.
562 	 */
563 	if (icmp6->icmp6_type == ND_REDIRECT)
564 		return (B_TRUE);
565 
566 	/*
567 	 * ICMP errors only.
568 	 */
569 	if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_length, &nexthdrp))
570 		goto discard_pkt;
571 	nexthdr = *nexthdrp;
572 
573 	/* Try to pass the ICMP message to clients who need it */
574 	switch (nexthdr) {
575 	case IPPROTO_UDP:
576 		/*
577 		 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of
578 		 * transport header.
579 		 */
580 		if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN >
581 		    mp->b_wptr)
582 			goto truncated;
583 		break;
584 	case IPPROTO_TCP: {
585 		tcpha_t		*tcpha;
586 
587 		/*
588 		 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of
589 		 * transport header.
590 		 */
591 		if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN >
592 		    mp->b_wptr)
593 			goto truncated;
594 
595 		tcpha = (tcpha_t *)((uchar_t *)ip6h + hdr_length);
596 		/*
597 		 * With IPMP we need to match across group, which we do
598 		 * since we have the upper ill from ira_ill.
599 		 */
600 		connp = ipcl_tcp_lookup_reversed_ipv6(ip6h, tcpha, TCPS_LISTEN,
601 		    ill->ill_phyint->phyint_ifindex, ipst);
602 		if (connp == NULL)
603 			goto discard_pkt;
604 
605 		if ((connp->conn_verifyicmp != NULL) &&
606 		    !connp->conn_verifyicmp(connp, tcpha, NULL, icmp6, ira)) {
607 			CONN_DEC_REF(connp);
608 			goto discard_pkt;
609 		}
610 		CONN_DEC_REF(connp);
611 		break;
612 	}
613 	case IPPROTO_SCTP:
614 		/*
615 		 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of
616 		 * transport header.
617 		 */
618 		if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN >
619 		    mp->b_wptr)
620 			goto truncated;
621 		break;
622 	case IPPROTO_ESP:
623 	case IPPROTO_AH:
624 		break;
625 	case IPPROTO_ENCAP:
626 	case IPPROTO_IPV6: {
627 		/* Look for self-encapsulated packets that caused an error */
628 		ip6_t *in_ip6h;
629 
630 		in_ip6h = (ip6_t *)((uint8_t *)ip6h + hdr_length);
631 		if ((uint8_t *)in_ip6h + (nexthdr == IPPROTO_ENCAP ?
632 		    sizeof (ipha_t) : sizeof (ip6_t)) > mp->b_wptr)
633 			goto truncated;
634 		break;
635 	}
636 	default:
637 		break;
638 	}
639 
640 	return (B_TRUE);
641 
642 discard_pkt:
643 	/* Bogus ICMP error. */
644 	BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
645 	return (B_FALSE);
646 
647 truncated:
648 	/* We pulled up everthing already. Must be truncated */
649 	BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors);
650 	return (B_FALSE);
651 }
652 
653 /*
654  * Process received IPv6 ICMP Packet too big.
655  * The caller is responsible for validating the packet before passing it in
656  * and also to fanout the ICMP error to any matching transport conns. Assumes
657  * the message has been fully pulled up.
658  *
659  * Before getting here, the caller has called icmp_inbound_verify_v6()
660  * that should have verified with ULP to prevent undoing the changes we're
661  * going to make to DCE. For example, TCP might have verified that the packet
662  * which generated error is in the send window.
663  *
664  * In some cases modified this MTU in the ICMP header packet; the caller
665  * should pass to the matching ULP after this returns.
666  */
667 static void
668 icmp_inbound_too_big_v6(icmp6_t *icmp6, ip_recv_attr_t *ira)
669 {
670 	uint32_t	mtu;
671 	dce_t		*dce;
672 	ill_t		*ill = ira->ira_ill;	/* Upper ill if IPMP */
673 	ip_stack_t	*ipst = ill->ill_ipst;
674 	int		old_max_frag;
675 	in6_addr_t	final_dst;
676 	ip6_t		*ip6h;	/* Inner IP header */
677 
678 	/* Caller has already pulled up everything. */
679 	ip6h = (ip6_t *)&icmp6[1];
680 	final_dst = ip_get_dst_v6(ip6h, NULL, NULL);
681 
682 	/*
683 	 * For link local destinations matching simply on address is not
684 	 * sufficient. Same link local addresses for different ILL's is
685 	 * possible.
686 	 */
687 	if (IN6_IS_ADDR_LINKSCOPE(&final_dst)) {
688 		dce = dce_lookup_and_add_v6(&final_dst,
689 		    ill->ill_phyint->phyint_ifindex, ipst);
690 	} else {
691 		dce = dce_lookup_and_add_v6(&final_dst, 0, ipst);
692 	}
693 	if (dce == NULL) {
694 		/* Couldn't add a unique one - ENOMEM */
695 		if (ip_debug > 2) {
696 			/* ip1dbg */
697 			pr_addr_dbg("icmp_inbound_too_big_v6:"
698 			    "no dce for dst %s\n", AF_INET6,
699 			    &final_dst);
700 		}
701 		return;
702 	}
703 
704 	mtu = ntohl(icmp6->icmp6_mtu);
705 
706 	mutex_enter(&dce->dce_lock);
707 	if (dce->dce_flags & DCEF_PMTU)
708 		old_max_frag = dce->dce_pmtu;
709 	else
710 		old_max_frag = ill->ill_mtu;
711 
712 	if (mtu < IPV6_MIN_MTU) {
713 		ip1dbg(("Received mtu less than IPv6 "
714 		    "min mtu %d: %d\n", IPV6_MIN_MTU, mtu));
715 		mtu = IPV6_MIN_MTU;
716 		/*
717 		 * If an mtu less than IPv6 min mtu is received,
718 		 * we must include a fragment header in
719 		 * subsequent packets.
720 		 */
721 		dce->dce_flags |= DCEF_TOO_SMALL_PMTU;
722 	} else {
723 		dce->dce_flags &= ~DCEF_TOO_SMALL_PMTU;
724 	}
725 	ip1dbg(("Received mtu from router: %d\n", mtu));
726 	dce->dce_pmtu = MIN(old_max_frag, mtu);
727 
728 	/* Prepare to send the new max frag size for the ULP. */
729 	if (dce->dce_flags & DCEF_TOO_SMALL_PMTU) {
730 		/*
731 		 * If we need a fragment header in every packet
732 		 * (above case or multirouting), make sure the
733 		 * ULP takes it into account when computing the
734 		 * payload size.
735 		 */
736 		icmp6->icmp6_mtu = htonl(dce->dce_pmtu - sizeof (ip6_frag_t));
737 	} else {
738 		icmp6->icmp6_mtu = htonl(dce->dce_pmtu);
739 	}
740 	/* We now have a PMTU for sure */
741 	dce->dce_flags |= DCEF_PMTU;
742 	dce->dce_last_change_time = TICK_TO_SEC(lbolt64);
743 	mutex_exit(&dce->dce_lock);
744 	/*
745 	 * After dropping the lock the new value is visible to everyone.
746 	 * Then we bump the generation number so any cached values reinspect
747 	 * the dce_t.
748 	 */
749 	dce_increment_generation(dce);
750 	dce_refrele(dce);
751 }
752 
753 /*
754  * Fanout received ICMPv6 error packets to the transports.
755  * Assumes the IPv6 plus ICMPv6 headers have been pulled up but nothing else.
756  *
757  * The caller must have called icmp_inbound_verify_v6.
758  */
759 void
760 icmp_inbound_error_fanout_v6(mblk_t *mp, icmp6_t *icmp6, ip_recv_attr_t *ira)
761 {
762 	uint16_t	*up;	/* Pointer to ports in ULP header */
763 	uint32_t	ports;	/* reversed ports for fanout */
764 	ip6_t		rip6h;	/* With reversed addresses */
765 	ip6_t		*ip6h;	/* Inner IP header */
766 	uint16_t	hdr_length; /* Inner IP header length */
767 	uint8_t		*nexthdrp;
768 	uint8_t		nexthdr;
769 	tcpha_t		*tcpha;
770 	conn_t		*connp;
771 	ill_t		*ill = ira->ira_ill;	/* Upper in the case of IPMP */
772 	ip_stack_t	*ipst = ill->ill_ipst;
773 	ipsec_stack_t	*ipss = ipst->ips_netstack->netstack_ipsec;
774 
775 	/* Caller has already pulled up everything. */
776 	ip6h = (ip6_t *)&icmp6[1];
777 	ASSERT(mp->b_cont == NULL);
778 	ASSERT((uchar_t *)&ip6h[1] <= mp->b_wptr);
779 
780 	if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_length, &nexthdrp))
781 		goto drop_pkt;
782 	nexthdr = *nexthdrp;
783 	ira->ira_protocol = nexthdr;
784 
785 	/*
786 	 * We need a separate IP header with the source and destination
787 	 * addresses reversed to do fanout/classification because the ip6h in
788 	 * the ICMPv6 error is in the form we sent it out.
789 	 */
790 	rip6h.ip6_src = ip6h->ip6_dst;
791 	rip6h.ip6_dst = ip6h->ip6_src;
792 	rip6h.ip6_nxt = nexthdr;
793 
794 	/* Try to pass the ICMP message to clients who need it */
795 	switch (nexthdr) {
796 	case IPPROTO_UDP: {
797 		/* Attempt to find a client stream based on port. */
798 		up = (uint16_t *)((uchar_t *)ip6h + hdr_length);
799 
800 		/* Note that we send error to all matches. */
801 		ira->ira_flags |= IRAF_ICMP_ERROR;
802 		ip_fanout_udp_multi_v6(mp, &rip6h, up[0], up[1], ira);
803 		ira->ira_flags &= ~IRAF_ICMP_ERROR;
804 		return;
805 	}
806 	case IPPROTO_TCP: {
807 		/*
808 		 * Attempt to find a client stream based on port.
809 		 * Note that we do a reverse lookup since the header is
810 		 * in the form we sent it out.
811 		 */
812 		tcpha = (tcpha_t *)((uchar_t *)ip6h + hdr_length);
813 		/*
814 		 * With IPMP we need to match across group, which we do
815 		 * since we have the upper ill from ira_ill.
816 		 */
817 		connp = ipcl_tcp_lookup_reversed_ipv6(ip6h, tcpha,
818 		    TCPS_LISTEN, ill->ill_phyint->phyint_ifindex, ipst);
819 		if (connp == NULL) {
820 			goto drop_pkt;
821 		}
822 
823 		if (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) ||
824 		    (ira->ira_flags & IRAF_IPSEC_SECURE)) {
825 			mp = ipsec_check_inbound_policy(mp, connp,
826 			    NULL, ip6h, ira);
827 			if (mp == NULL) {
828 				BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
829 				/* Note that mp is NULL */
830 				ip_drop_input("ipIfStatsInDiscards", mp, ill);
831 				CONN_DEC_REF(connp);
832 				return;
833 			}
834 		}
835 
836 		ira->ira_flags |= IRAF_ICMP_ERROR;
837 		if (IPCL_IS_TCP(connp)) {
838 			SQUEUE_ENTER_ONE(connp->conn_sqp, mp,
839 			    connp->conn_recvicmp, connp, ira, SQ_FILL,
840 			    SQTAG_TCP6_INPUT_ICMP_ERR);
841 		} else {
842 			/* Not TCP; must be SOCK_RAW, IPPROTO_TCP */
843 			ill_t *rill = ira->ira_rill;
844 
845 			ira->ira_ill = ira->ira_rill = NULL;
846 			(connp->conn_recv)(connp, mp, NULL, ira);
847 			CONN_DEC_REF(connp);
848 			ira->ira_ill = ill;
849 			ira->ira_rill = rill;
850 		}
851 		ira->ira_flags &= ~IRAF_ICMP_ERROR;
852 		return;
853 
854 	}
855 	case IPPROTO_SCTP:
856 		up = (uint16_t *)((uchar_t *)ip6h + hdr_length);
857 		/* Find a SCTP client stream for this packet. */
858 		((uint16_t *)&ports)[0] = up[1];
859 		((uint16_t *)&ports)[1] = up[0];
860 
861 		ira->ira_flags |= IRAF_ICMP_ERROR;
862 		ip_fanout_sctp(mp, NULL, &rip6h, ports, ira);
863 		ira->ira_flags &= ~IRAF_ICMP_ERROR;
864 		return;
865 
866 	case IPPROTO_ESP:
867 	case IPPROTO_AH:
868 		if (!ipsec_loaded(ipss)) {
869 			ip_proto_not_sup(mp, ira);
870 			return;
871 		}
872 
873 		if (nexthdr == IPPROTO_ESP)
874 			mp = ipsecesp_icmp_error(mp, ira);
875 		else
876 			mp = ipsecah_icmp_error(mp, ira);
877 		if (mp == NULL)
878 			return;
879 
880 		/* Just in case ipsec didn't preserve the NULL b_cont */
881 		if (mp->b_cont != NULL) {
882 			if (!pullupmsg(mp, -1))
883 				goto drop_pkt;
884 		}
885 
886 		/*
887 		 * If succesful, the mp has been modified to not include
888 		 * the ESP/AH header so we can fanout to the ULP's icmp
889 		 * error handler.
890 		 */
891 		if (mp->b_wptr - mp->b_rptr < IPV6_HDR_LEN)
892 			goto drop_pkt;
893 
894 		ip6h = (ip6_t *)mp->b_rptr;
895 		/* Don't call hdr_length_v6() unless you have to. */
896 		if (ip6h->ip6_nxt != IPPROTO_ICMPV6)
897 			hdr_length = ip_hdr_length_v6(mp, ip6h);
898 		else
899 			hdr_length = IPV6_HDR_LEN;
900 
901 		/* Verify the modified message before any further processes. */
902 		icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]);
903 		if (!icmp_inbound_verify_v6(mp, icmp6, ira)) {
904 			freemsg(mp);
905 			return;
906 		}
907 
908 		icmp_inbound_error_fanout_v6(mp, icmp6, ira);
909 		return;
910 
911 	case IPPROTO_IPV6: {
912 		/* Look for self-encapsulated packets that caused an error */
913 		ip6_t *in_ip6h;
914 
915 		in_ip6h = (ip6_t *)((uint8_t *)ip6h + hdr_length);
916 
917 		if (IN6_ARE_ADDR_EQUAL(&in_ip6h->ip6_src, &ip6h->ip6_src) &&
918 		    IN6_ARE_ADDR_EQUAL(&in_ip6h->ip6_dst, &ip6h->ip6_dst)) {
919 			/*
920 			 * Self-encapsulated case. As in the ipv4 case,
921 			 * we need to strip the 2nd IP header. Since mp
922 			 * is already pulled-up, we can simply bcopy
923 			 * the 3rd header + data over the 2nd header.
924 			 */
925 			uint16_t unused_len;
926 
927 			/*
928 			 * Make sure we don't do recursion more than once.
929 			 */
930 			if (!ip_hdr_length_nexthdr_v6(mp, in_ip6h,
931 			    &unused_len, &nexthdrp) ||
932 			    *nexthdrp == IPPROTO_IPV6) {
933 				goto drop_pkt;
934 			}
935 
936 			/*
937 			 * Copy the 3rd header + remaining data on top
938 			 * of the 2nd header.
939 			 */
940 			bcopy(in_ip6h, ip6h, mp->b_wptr - (uchar_t *)in_ip6h);
941 
942 			/*
943 			 * Subtract length of the 2nd header.
944 			 */
945 			mp->b_wptr -= hdr_length;
946 
947 			ip6h = (ip6_t *)mp->b_rptr;
948 			/* Don't call hdr_length_v6() unless you have to. */
949 			if (ip6h->ip6_nxt != IPPROTO_ICMPV6)
950 				hdr_length = ip_hdr_length_v6(mp, ip6h);
951 			else
952 				hdr_length = IPV6_HDR_LEN;
953 
954 			/*
955 			 * Verify the modified message before any further
956 			 * processes.
957 			 */
958 			icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]);
959 			if (!icmp_inbound_verify_v6(mp, icmp6, ira)) {
960 				freemsg(mp);
961 				return;
962 			}
963 
964 			/*
965 			 * Now recurse, and see what I _really_ should be
966 			 * doing here.
967 			 */
968 			icmp_inbound_error_fanout_v6(mp, icmp6, ira);
969 			return;
970 		}
971 		/* FALLTHRU */
972 	}
973 	case IPPROTO_ENCAP:
974 		if ((connp = ipcl_iptun_classify_v6(&rip6h.ip6_src,
975 		    &rip6h.ip6_dst, ipst)) != NULL) {
976 			ira->ira_flags |= IRAF_ICMP_ERROR;
977 			connp->conn_recvicmp(connp, mp, NULL, ira);
978 			CONN_DEC_REF(connp);
979 			ira->ira_flags &= ~IRAF_ICMP_ERROR;
980 			return;
981 		}
982 		/*
983 		 * No IP tunnel is interested, fallthrough and see
984 		 * if a raw socket will want it.
985 		 */
986 		/* FALLTHRU */
987 	default:
988 		ira->ira_flags |= IRAF_ICMP_ERROR;
989 		ASSERT(ira->ira_protocol == nexthdr);
990 		ip_fanout_proto_v6(mp, &rip6h, ira);
991 		ira->ira_flags &= ~IRAF_ICMP_ERROR;
992 		return;
993 	}
994 	/* NOTREACHED */
995 drop_pkt:
996 	BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors);
997 	ip1dbg(("icmp_inbound_error_fanout_v6: drop pkt\n"));
998 	freemsg(mp);
999 }
1000 
1001 /*
1002  * Process received IPv6 ICMP Redirect messages.
1003  * Assumes the caller has verified that the headers are in the pulled up mblk.
1004  * Consumes mp.
1005  */
1006 /* ARGSUSED */
1007 static void
1008 icmp_redirect_v6(mblk_t *mp, ip6_t *ip6h, nd_redirect_t *rd,
1009     ip_recv_attr_t *ira)
1010 {
1011 	ire_t		*ire, *nire;
1012 	ire_t		*prev_ire = NULL;
1013 	ire_t		*redir_ire;
1014 	in6_addr_t	*src, *dst, *gateway;
1015 	nd_opt_hdr_t	*opt;
1016 	nce_t		*nce;
1017 	int		ncec_flags = 0;
1018 	int		err = 0;
1019 	boolean_t	redirect_to_router = B_FALSE;
1020 	int		len;
1021 	int		optlen;
1022 	ill_t		*ill = ira->ira_rill;
1023 	ill_t		*rill = ira->ira_rill;
1024 	ip_stack_t	*ipst = ill->ill_ipst;
1025 
1026 	/*
1027 	 * Since ira_ill is where the IRE_LOCAL was hosted we use ira_rill
1028 	 * and make it be the IPMP upper so avoid being confused by a packet
1029 	 * addressed to a unicast address on a different ill.
1030 	 */
1031 	if (IS_UNDER_IPMP(rill)) {
1032 		rill = ipmp_ill_hold_ipmp_ill(rill);
1033 		if (rill == NULL) {
1034 			BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects);
1035 			ip_drop_input("ipv6IfIcmpInBadRedirects - IPMP ill",
1036 			    mp, ill);
1037 			freemsg(mp);
1038 			return;
1039 		}
1040 		ASSERT(rill != ira->ira_rill);
1041 	}
1042 
1043 	len = mp->b_wptr - (uchar_t *)rd;
1044 	src = &ip6h->ip6_src;
1045 	dst = &rd->nd_rd_dst;
1046 	gateway = &rd->nd_rd_target;
1047 
1048 	/* Verify if it is a valid redirect */
1049 	if (!IN6_IS_ADDR_LINKLOCAL(src) ||
1050 	    (ip6h->ip6_hops != IPV6_MAX_HOPS) ||
1051 	    (rd->nd_rd_code != 0) ||
1052 	    (len < sizeof (nd_redirect_t)) ||
1053 	    (IN6_IS_ADDR_V4MAPPED(dst)) ||
1054 	    (IN6_IS_ADDR_MULTICAST(dst))) {
1055 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects);
1056 		ip_drop_input("ipv6IfIcmpInBadRedirects - addr/len", mp, ill);
1057 		goto fail_redirect;
1058 	}
1059 
1060 	if (!(IN6_IS_ADDR_LINKLOCAL(gateway) ||
1061 	    IN6_ARE_ADDR_EQUAL(gateway, dst))) {
1062 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects);
1063 		ip_drop_input("ipv6IfIcmpInBadRedirects - bad gateway",
1064 		    mp, ill);
1065 		goto fail_redirect;
1066 	}
1067 
1068 	optlen = len - sizeof (nd_redirect_t);
1069 	if (optlen != 0) {
1070 		if (!ndp_verify_optlen((nd_opt_hdr_t *)&rd[1], optlen)) {
1071 			BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects);
1072 			ip_drop_input("ipv6IfIcmpInBadRedirects - options",
1073 			    mp, ill);
1074 			goto fail_redirect;
1075 		}
1076 	}
1077 
1078 	if (!IN6_ARE_ADDR_EQUAL(gateway, dst)) {
1079 		redirect_to_router = B_TRUE;
1080 		ncec_flags |= NCE_F_ISROUTER;
1081 	} else {
1082 		gateway = dst;	/* Add nce for dst */
1083 	}
1084 
1085 
1086 	/*
1087 	 * Verify that the IP source address of the redirect is
1088 	 * the same as the current first-hop router for the specified
1089 	 * ICMP destination address.
1090 	 * Also, Make sure we had a route for the dest in question and
1091 	 * that route was pointing to the old gateway (the source of the
1092 	 * redirect packet.)
1093 	 * Note: this merely says that there is some IRE which matches that
1094 	 * gateway; not that the longest match matches that gateway.
1095 	 */
1096 	prev_ire = ire_ftable_lookup_v6(dst, 0, src, 0, rill,
1097 	    ALL_ZONES, NULL, MATCH_IRE_GW | MATCH_IRE_ILL, 0, ipst, NULL);
1098 
1099 	/*
1100 	 * Check that
1101 	 *	the redirect was not from ourselves
1102 	 *	old gateway is still directly reachable
1103 	 */
1104 	if (prev_ire == NULL ||
1105 	    (prev_ire->ire_type & (IRE_LOCAL|IRE_LOOPBACK)) ||
1106 	    (prev_ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE))) {
1107 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects);
1108 		ip_drop_input("ipv6IfIcmpInBadRedirects - ire", mp, ill);
1109 		goto fail_redirect;
1110 	}
1111 
1112 	ASSERT(prev_ire->ire_ill != NULL);
1113 	if (prev_ire->ire_ill->ill_flags & ILLF_NONUD)
1114 		ncec_flags |= NCE_F_NONUD;
1115 
1116 	opt = (nd_opt_hdr_t *)&rd[1];
1117 	opt = ndp_get_option(opt, optlen, ND_OPT_TARGET_LINKADDR);
1118 	if (opt != NULL) {
1119 		err = nce_lookup_then_add_v6(rill,
1120 		    (uchar_t *)&opt[1],		/* Link layer address */
1121 		    rill->ill_phys_addr_length,
1122 		    gateway, ncec_flags, ND_STALE, &nce);
1123 		switch (err) {
1124 		case 0:
1125 			nce_refrele(nce);
1126 			break;
1127 		case EEXIST:
1128 			/*
1129 			 * Check to see if link layer address has changed and
1130 			 * process the ncec_state accordingly.
1131 			 */
1132 			nce_process(nce->nce_common,
1133 			    (uchar_t *)&opt[1], 0, B_FALSE);
1134 			nce_refrele(nce);
1135 			break;
1136 		default:
1137 			ip1dbg(("icmp_redirect_v6: NCE create failed %d\n",
1138 			    err));
1139 			goto fail_redirect;
1140 		}
1141 	}
1142 	if (redirect_to_router) {
1143 		ASSERT(IN6_IS_ADDR_LINKLOCAL(gateway));
1144 
1145 		/*
1146 		 * Create a Route Association.  This will allow us to remember
1147 		 * a router told us to use the particular gateway.
1148 		 */
1149 		ire = ire_create_v6(
1150 		    dst,
1151 		    &ipv6_all_ones,		/* mask */
1152 		    gateway,			/* gateway addr */
1153 		    IRE_HOST,
1154 		    prev_ire->ire_ill,
1155 		    ALL_ZONES,
1156 		    (RTF_DYNAMIC | RTF_GATEWAY | RTF_HOST),
1157 		    NULL,
1158 		    ipst);
1159 	} else {
1160 		ipif_t *ipif;
1161 		in6_addr_t gw;
1162 
1163 		/*
1164 		 * Just create an on link entry, i.e. interface route.
1165 		 * The gateway field is our link-local on the ill.
1166 		 */
1167 		mutex_enter(&rill->ill_lock);
1168 		for (ipif = rill->ill_ipif; ipif != NULL;
1169 		    ipif = ipif->ipif_next) {
1170 			if (!(ipif->ipif_state_flags & IPIF_CONDEMNED) &&
1171 			    IN6_IS_ADDR_LINKLOCAL(&ipif->ipif_v6lcl_addr))
1172 				break;
1173 		}
1174 		if (ipif == NULL) {
1175 			/* We have no link-local address! */
1176 			mutex_exit(&rill->ill_lock);
1177 			goto fail_redirect;
1178 		}
1179 		gw = ipif->ipif_v6lcl_addr;
1180 		mutex_exit(&rill->ill_lock);
1181 
1182 		ire = ire_create_v6(
1183 		    dst,				/* gateway == dst */
1184 		    &ipv6_all_ones,			/* mask */
1185 		    &gw,				/* gateway addr */
1186 		    rill->ill_net_type,			/* IF_[NO]RESOLVER */
1187 		    prev_ire->ire_ill,
1188 		    ALL_ZONES,
1189 		    (RTF_DYNAMIC | RTF_HOST),
1190 		    NULL,
1191 		    ipst);
1192 	}
1193 
1194 	if (ire == NULL)
1195 		goto fail_redirect;
1196 
1197 	nire = ire_add(ire);
1198 	/* Check if it was a duplicate entry */
1199 	if (nire != NULL && nire != ire) {
1200 		ASSERT(nire->ire_identical_ref > 1);
1201 		ire_delete(nire);
1202 		ire_refrele(nire);
1203 		nire = NULL;
1204 	}
1205 	ire = nire;
1206 	if (ire != NULL) {
1207 		ire_refrele(ire);		/* Held in ire_add */
1208 
1209 		/* tell routing sockets that we received a redirect */
1210 		ip_rts_change_v6(RTM_REDIRECT,
1211 		    &rd->nd_rd_dst,
1212 		    &rd->nd_rd_target,
1213 		    &ipv6_all_ones, 0, src,
1214 		    (RTF_DYNAMIC | RTF_GATEWAY | RTF_HOST), 0,
1215 		    (RTA_DST | RTA_GATEWAY | RTA_NETMASK | RTA_AUTHOR), ipst);
1216 
1217 		/*
1218 		 * Delete any existing IRE_HOST type ires for this destination.
1219 		 * This together with the added IRE has the effect of
1220 		 * modifying an existing redirect.
1221 		 */
1222 		redir_ire = ire_ftable_lookup_v6(dst, 0, src, IRE_HOST,
1223 		    prev_ire->ire_ill, ALL_ZONES, NULL,
1224 		    (MATCH_IRE_GW | MATCH_IRE_TYPE | MATCH_IRE_ILL), 0, ipst,
1225 		    NULL);
1226 
1227 		if (redir_ire != NULL) {
1228 			if (redir_ire->ire_flags & RTF_DYNAMIC)
1229 				ire_delete(redir_ire);
1230 			ire_refrele(redir_ire);
1231 		}
1232 	}
1233 
1234 	ire_refrele(prev_ire);
1235 	prev_ire = NULL;
1236 
1237 fail_redirect:
1238 	if (prev_ire != NULL)
1239 		ire_refrele(prev_ire);
1240 	freemsg(mp);
1241 	if (rill != ira->ira_rill)
1242 		ill_refrele(rill);
1243 }
1244 
1245 /*
1246  * Build and ship an IPv6 ICMP message using the packet data in mp,
1247  * and the ICMP header pointed to by "stuff".  (May be called as
1248  * writer.)
1249  * Note: assumes that icmp_pkt_err_ok_v6 has been called to
1250  * verify that an icmp error packet can be sent.
1251  *
1252  * If v6src_ptr is set use it as a source. Otherwise select a reasonable
1253  * source address (see above function).
1254  */
1255 static void
1256 icmp_pkt_v6(mblk_t *mp, void *stuff, size_t len,
1257     const in6_addr_t *v6src_ptr, ip_recv_attr_t *ira)
1258 {
1259 	ip6_t		*ip6h;
1260 	in6_addr_t	v6dst;
1261 	size_t		len_needed;
1262 	size_t		msg_len;
1263 	mblk_t		*mp1;
1264 	icmp6_t		*icmp6;
1265 	in6_addr_t	v6src;
1266 	ill_t		*ill = ira->ira_ill;
1267 	ip_stack_t	*ipst = ill->ill_ipst;
1268 	ip_xmit_attr_t	ixas;
1269 
1270 	ip6h = (ip6_t *)mp->b_rptr;
1271 
1272 	bzero(&ixas, sizeof (ixas));
1273 	ixas.ixa_flags = IXAF_BASIC_SIMPLE_V6;
1274 	ixas.ixa_zoneid = ira->ira_zoneid;
1275 	ixas.ixa_ifindex = 0;
1276 	ixas.ixa_ipst = ipst;
1277 	ixas.ixa_cred = kcred;
1278 	ixas.ixa_cpid = NOPID;
1279 	ixas.ixa_tsl = ira->ira_tsl;	/* Behave as a multi-level responder */
1280 	ixas.ixa_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
1281 
1282 	/*
1283 	 * If the source of the original packet was link-local, then
1284 	 * make sure we send on the same ill (group) as we received it on.
1285 	 */
1286 	if (IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src)) {
1287 		ixas.ixa_flags |= IXAF_SCOPEID_SET;
1288 		if (IS_UNDER_IPMP(ill))
1289 			ixas.ixa_scopeid = ill_get_upper_ifindex(ill);
1290 		else
1291 			ixas.ixa_scopeid = ill->ill_phyint->phyint_ifindex;
1292 	}
1293 
1294 	if (ira->ira_flags & IRAF_IPSEC_SECURE) {
1295 		/*
1296 		 * Apply IPsec based on how IPsec was applied to
1297 		 * the packet that had the error.
1298 		 *
1299 		 * If it was an outbound packet that caused the ICMP
1300 		 * error, then the caller will have setup the IRA
1301 		 * appropriately.
1302 		 */
1303 		if (!ipsec_in_to_out(ira, &ixas, mp, NULL, ip6h)) {
1304 			BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsOutDiscards);
1305 			/* Note: mp already consumed and ip_drop_packet done */
1306 			return;
1307 		}
1308 	} else {
1309 		/*
1310 		 * This is in clear. The icmp message we are building
1311 		 * here should go out in clear, independent of our policy.
1312 		 */
1313 		ixas.ixa_flags |= IXAF_NO_IPSEC;
1314 	}
1315 
1316 	/*
1317 	 * If the caller specified the source we use that.
1318 	 * Otherwise, if the packet was for one of our unicast addresses, make
1319 	 * sure we respond with that as the source. Otherwise
1320 	 * have ip_output_simple pick the source address.
1321 	 */
1322 	if (v6src_ptr != NULL) {
1323 		v6src = *v6src_ptr;
1324 	} else {
1325 		ire_t *ire;
1326 		uint_t match_flags = MATCH_IRE_TYPE | MATCH_IRE_ZONEONLY;
1327 
1328 		if (IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src) ||
1329 		    IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst))
1330 			match_flags |= MATCH_IRE_ILL;
1331 
1332 		ire = ire_ftable_lookup_v6(&ip6h->ip6_dst, 0, 0,
1333 		    (IRE_LOCAL|IRE_LOOPBACK), ill, ira->ira_zoneid, NULL,
1334 		    match_flags, 0, ipst, NULL);
1335 		if (ire != NULL) {
1336 			v6src = ip6h->ip6_dst;
1337 			ire_refrele(ire);
1338 		} else {
1339 			v6src = ipv6_all_zeros;
1340 			ixas.ixa_flags |= IXAF_SET_SOURCE;
1341 		}
1342 	}
1343 	v6dst = ip6h->ip6_src;
1344 	len_needed = ipst->ips_ipv6_icmp_return - IPV6_HDR_LEN - len;
1345 	msg_len = msgdsize(mp);
1346 	if (msg_len > len_needed) {
1347 		if (!adjmsg(mp, len_needed - msg_len)) {
1348 			BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutErrors);
1349 			freemsg(mp);
1350 			return;
1351 		}
1352 		msg_len = len_needed;
1353 	}
1354 	mp1 = allocb(IPV6_HDR_LEN + len, BPRI_MED);
1355 	if (mp1 == NULL) {
1356 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutErrors);
1357 		freemsg(mp);
1358 		return;
1359 	}
1360 	mp1->b_cont = mp;
1361 	mp = mp1;
1362 
1363 	/*
1364 	 * Set IXAF_TRUSTED_ICMP so we can let the ICMP messages this
1365 	 * node generates be accepted in peace by all on-host destinations.
1366 	 * If we do NOT assume that all on-host destinations trust
1367 	 * self-generated ICMP messages, then rework here, ip6.c, and spd.c.
1368 	 * (Look for IXAF_TRUSTED_ICMP).
1369 	 */
1370 	ixas.ixa_flags |= IXAF_TRUSTED_ICMP;
1371 
1372 	ip6h = (ip6_t *)mp->b_rptr;
1373 	mp1->b_wptr = (uchar_t *)ip6h + (IPV6_HDR_LEN + len);
1374 
1375 	ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW;
1376 	ip6h->ip6_nxt = IPPROTO_ICMPV6;
1377 	ip6h->ip6_hops = ipst->ips_ipv6_def_hops;
1378 	ip6h->ip6_dst = v6dst;
1379 	ip6h->ip6_src = v6src;
1380 	msg_len += IPV6_HDR_LEN + len;
1381 	if (msg_len > IP_MAXPACKET + IPV6_HDR_LEN) {
1382 		(void) adjmsg(mp, IP_MAXPACKET + IPV6_HDR_LEN - msg_len);
1383 		msg_len = IP_MAXPACKET + IPV6_HDR_LEN;
1384 	}
1385 	ip6h->ip6_plen = htons((uint16_t)(msgdsize(mp) - IPV6_HDR_LEN));
1386 	icmp6 = (icmp6_t *)&ip6h[1];
1387 	bcopy(stuff, (char *)icmp6, len);
1388 	/*
1389 	 * Prepare for checksum by putting icmp length in the icmp
1390 	 * checksum field. The checksum is calculated in ip_output_wire_v6.
1391 	 */
1392 	icmp6->icmp6_cksum = ip6h->ip6_plen;
1393 	if (icmp6->icmp6_type == ND_REDIRECT) {
1394 		ip6h->ip6_hops = IPV6_MAX_HOPS;
1395 	}
1396 
1397 	(void) ip_output_simple(mp, &ixas);
1398 	ixa_cleanup(&ixas);
1399 }
1400 
1401 /*
1402  * Update the output mib when ICMPv6 packets are sent.
1403  */
1404 void
1405 icmp_update_out_mib_v6(ill_t *ill, icmp6_t *icmp6)
1406 {
1407 	BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutMsgs);
1408 
1409 	switch (icmp6->icmp6_type) {
1410 	case ICMP6_DST_UNREACH:
1411 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutDestUnreachs);
1412 		if (icmp6->icmp6_code == ICMP6_DST_UNREACH_ADMIN)
1413 			BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutAdminProhibs);
1414 		break;
1415 
1416 	case ICMP6_TIME_EXCEEDED:
1417 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutTimeExcds);
1418 		break;
1419 
1420 	case ICMP6_PARAM_PROB:
1421 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutParmProblems);
1422 		break;
1423 
1424 	case ICMP6_PACKET_TOO_BIG:
1425 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutPktTooBigs);
1426 		break;
1427 
1428 	case ICMP6_ECHO_REQUEST:
1429 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutEchos);
1430 		break;
1431 
1432 	case ICMP6_ECHO_REPLY:
1433 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutEchoReplies);
1434 		break;
1435 
1436 	case ND_ROUTER_SOLICIT:
1437 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRouterSolicits);
1438 		break;
1439 
1440 	case ND_ROUTER_ADVERT:
1441 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRouterAdvertisements);
1442 		break;
1443 
1444 	case ND_NEIGHBOR_SOLICIT:
1445 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutNeighborSolicits);
1446 		break;
1447 
1448 	case ND_NEIGHBOR_ADVERT:
1449 		BUMP_MIB(ill->ill_icmp6_mib,
1450 		    ipv6IfIcmpOutNeighborAdvertisements);
1451 		break;
1452 
1453 	case ND_REDIRECT:
1454 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRedirects);
1455 		break;
1456 
1457 	case MLD_LISTENER_QUERY:
1458 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembQueries);
1459 		break;
1460 
1461 	case MLD_LISTENER_REPORT:
1462 	case MLD_V2_LISTENER_REPORT:
1463 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembResponses);
1464 		break;
1465 
1466 	case MLD_LISTENER_REDUCTION:
1467 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembReductions);
1468 		break;
1469 	}
1470 }
1471 
1472 /*
1473  * Check if it is ok to send an ICMPv6 error packet in
1474  * response to the IP packet in mp.
1475  * Free the message and return null if no
1476  * ICMP error packet should be sent.
1477  */
1478 static mblk_t *
1479 icmp_pkt_err_ok_v6(mblk_t *mp, boolean_t mcast_ok, ip_recv_attr_t *ira)
1480 {
1481 	ill_t		*ill = ira->ira_ill;
1482 	ip_stack_t	*ipst = ill->ill_ipst;
1483 	boolean_t	llbcast;
1484 	ip6_t		*ip6h;
1485 
1486 	if (!mp)
1487 		return (NULL);
1488 
1489 	/* We view multicast and broadcast as the same.. */
1490 	llbcast = (ira->ira_flags &
1491 	    (IRAF_L2DST_MULTICAST|IRAF_L2DST_BROADCAST)) != 0;
1492 	ip6h = (ip6_t *)mp->b_rptr;
1493 
1494 	/* Check if source address uniquely identifies the host */
1495 
1496 	if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_src) ||
1497 	    IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_src) ||
1498 	    IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) {
1499 		freemsg(mp);
1500 		return (NULL);
1501 	}
1502 
1503 	if (ip6h->ip6_nxt == IPPROTO_ICMPV6) {
1504 		size_t	len_needed = IPV6_HDR_LEN + ICMP6_MINLEN;
1505 		icmp6_t		*icmp6;
1506 
1507 		if (mp->b_wptr - mp->b_rptr < len_needed) {
1508 			if (!pullupmsg(mp, len_needed)) {
1509 				BUMP_MIB(ill->ill_icmp6_mib,
1510 				    ipv6IfIcmpInErrors);
1511 				freemsg(mp);
1512 				return (NULL);
1513 			}
1514 			ip6h = (ip6_t *)mp->b_rptr;
1515 		}
1516 		icmp6 = (icmp6_t *)&ip6h[1];
1517 		/* Explicitly do not generate errors in response to redirects */
1518 		if (ICMP6_IS_ERROR(icmp6->icmp6_type) ||
1519 		    icmp6->icmp6_type == ND_REDIRECT) {
1520 			freemsg(mp);
1521 			return (NULL);
1522 		}
1523 	}
1524 	/*
1525 	 * Check that the destination is not multicast and that the packet
1526 	 * was not sent on link layer broadcast or multicast.  (Exception
1527 	 * is Packet too big message as per the draft - when mcast_ok is set.)
1528 	 */
1529 	if (!mcast_ok &&
1530 	    (llbcast || IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst))) {
1531 		freemsg(mp);
1532 		return (NULL);
1533 	}
1534 	/*
1535 	 * If this is a labeled system, then check to see if we're allowed to
1536 	 * send a response to this particular sender.  If not, then just drop.
1537 	 */
1538 	if (is_system_labeled() && !tsol_can_reply_error(mp, ira)) {
1539 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutErrors);
1540 		freemsg(mp);
1541 		return (NULL);
1542 	}
1543 
1544 	if (icmp_err_rate_limit(ipst)) {
1545 		/*
1546 		 * Only send ICMP error packets every so often.
1547 		 * This should be done on a per port/source basis,
1548 		 * but for now this will suffice.
1549 		 */
1550 		freemsg(mp);
1551 		return (NULL);
1552 	}
1553 	return (mp);
1554 }
1555 
1556 /*
1557  * Called when a packet was sent out the same link that it arrived on.
1558  * Check if it is ok to send a redirect and then send it.
1559  */
1560 void
1561 ip_send_potential_redirect_v6(mblk_t *mp, ip6_t *ip6h, ire_t *ire,
1562     ip_recv_attr_t *ira)
1563 {
1564 	ill_t		*ill = ira->ira_ill;
1565 	ip_stack_t	*ipst = ill->ill_ipst;
1566 	in6_addr_t	*v6targ;
1567 	ire_t		*src_ire_v6 = NULL;
1568 	mblk_t		*mp1;
1569 	ire_t		*nhop_ire = NULL;
1570 
1571 	/*
1572 	 * Don't send a redirect when forwarding a source
1573 	 * routed packet.
1574 	 */
1575 	if (ip_source_routed_v6(ip6h, mp, ipst))
1576 		return;
1577 
1578 	if (ire->ire_type & IRE_ONLINK) {
1579 		/* Target is directly connected */
1580 		v6targ = &ip6h->ip6_dst;
1581 	} else {
1582 		/* Determine the most specific IRE used to send the packets */
1583 		nhop_ire = ire_nexthop(ire);
1584 		if (nhop_ire == NULL)
1585 			return;
1586 
1587 		/*
1588 		 * We won't send redirects to a router
1589 		 * that doesn't have a link local
1590 		 * address, but will forward.
1591 		 */
1592 		if (!IN6_IS_ADDR_LINKLOCAL(&nhop_ire->ire_addr_v6)) {
1593 			BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors);
1594 			ip_drop_input("ipIfStatsInAddrErrors", mp, ill);
1595 			ire_refrele(nhop_ire);
1596 			return;
1597 		}
1598 		v6targ = &nhop_ire->ire_addr_v6;
1599 	}
1600 	src_ire_v6 = ire_ftable_lookup_v6(&ip6h->ip6_src,
1601 	    NULL, NULL, IRE_INTERFACE, ire->ire_ill, ALL_ZONES, NULL,
1602 	    MATCH_IRE_ILL | MATCH_IRE_TYPE, 0, ipst, NULL);
1603 
1604 	if (src_ire_v6 == NULL) {
1605 		if (nhop_ire != NULL)
1606 			ire_refrele(nhop_ire);
1607 		return;
1608 	}
1609 
1610 	/*
1611 	 * The source is directly connected.
1612 	 */
1613 	mp1 = copymsg(mp);
1614 	if (mp1 != NULL)
1615 		icmp_send_redirect_v6(mp1, v6targ, &ip6h->ip6_dst, ira);
1616 
1617 	if (nhop_ire != NULL)
1618 		ire_refrele(nhop_ire);
1619 	ire_refrele(src_ire_v6);
1620 }
1621 
1622 /*
1623  * Generate an ICMPv6 redirect message.
1624  * Include target link layer address option if it exits.
1625  * Always include redirect header.
1626  */
1627 static void
1628 icmp_send_redirect_v6(mblk_t *mp, in6_addr_t *targetp, in6_addr_t *dest,
1629     ip_recv_attr_t *ira)
1630 {
1631 	nd_redirect_t	*rd;
1632 	nd_opt_rd_hdr_t	*rdh;
1633 	uchar_t		*buf;
1634 	ncec_t		*ncec = NULL;
1635 	nd_opt_hdr_t	*opt;
1636 	int		len;
1637 	int		ll_opt_len = 0;
1638 	int		max_redir_hdr_data_len;
1639 	int		pkt_len;
1640 	in6_addr_t	*srcp;
1641 	ill_t		*ill;
1642 	boolean_t	need_refrele;
1643 	ip_stack_t	*ipst = ira->ira_ill->ill_ipst;
1644 
1645 	mp = icmp_pkt_err_ok_v6(mp, B_FALSE, ira);
1646 	if (mp == NULL)
1647 		return;
1648 
1649 	if (IS_UNDER_IPMP(ira->ira_ill)) {
1650 		ill = ipmp_ill_hold_ipmp_ill(ira->ira_ill);
1651 		if (ill == NULL) {
1652 			ill = ira->ira_ill;
1653 			BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects);
1654 			ip_drop_output("no IPMP ill for sending redirect",
1655 			    mp, ill);
1656 			freemsg(mp);
1657 			return;
1658 		}
1659 		need_refrele = B_TRUE;
1660 	} else {
1661 		ill = ira->ira_ill;
1662 		need_refrele = B_FALSE;
1663 	}
1664 
1665 	ncec = ncec_lookup_illgrp_v6(ill, targetp);
1666 	if (ncec != NULL && ncec->ncec_state != ND_INCOMPLETE &&
1667 	    ncec->ncec_lladdr != NULL) {
1668 		ll_opt_len = (sizeof (nd_opt_hdr_t) +
1669 		    ill->ill_phys_addr_length + 7)/8 * 8;
1670 	}
1671 	len = sizeof (nd_redirect_t) + sizeof (nd_opt_rd_hdr_t) + ll_opt_len;
1672 	ASSERT(len % 4 == 0);
1673 	buf = kmem_alloc(len, KM_NOSLEEP);
1674 	if (buf == NULL) {
1675 		if (ncec != NULL)
1676 			ncec_refrele(ncec);
1677 		if (need_refrele)
1678 			ill_refrele(ill);
1679 		freemsg(mp);
1680 		return;
1681 	}
1682 
1683 	rd = (nd_redirect_t *)buf;
1684 	rd->nd_rd_type = (uint8_t)ND_REDIRECT;
1685 	rd->nd_rd_code = 0;
1686 	rd->nd_rd_reserved = 0;
1687 	rd->nd_rd_target = *targetp;
1688 	rd->nd_rd_dst = *dest;
1689 
1690 	opt = (nd_opt_hdr_t *)(buf + sizeof (nd_redirect_t));
1691 	if (ncec != NULL && ll_opt_len != 0) {
1692 		opt->nd_opt_type = ND_OPT_TARGET_LINKADDR;
1693 		opt->nd_opt_len = ll_opt_len/8;
1694 		bcopy((char *)ncec->ncec_lladdr, &opt[1],
1695 		    ill->ill_phys_addr_length);
1696 	}
1697 	if (ncec != NULL)
1698 		ncec_refrele(ncec);
1699 	rdh = (nd_opt_rd_hdr_t *)(buf + sizeof (nd_redirect_t) + ll_opt_len);
1700 	rdh->nd_opt_rh_type = (uint8_t)ND_OPT_REDIRECTED_HEADER;
1701 	/* max_redir_hdr_data_len and nd_opt_rh_len must be multiple of 8 */
1702 	max_redir_hdr_data_len =
1703 	    (ipst->ips_ipv6_icmp_return - IPV6_HDR_LEN - len)/8*8;
1704 	pkt_len = msgdsize(mp);
1705 	/* Make sure mp is 8 byte aligned */
1706 	if (pkt_len > max_redir_hdr_data_len) {
1707 		rdh->nd_opt_rh_len = (max_redir_hdr_data_len +
1708 		    sizeof (nd_opt_rd_hdr_t))/8;
1709 		(void) adjmsg(mp, max_redir_hdr_data_len - pkt_len);
1710 	} else {
1711 		rdh->nd_opt_rh_len = (pkt_len + sizeof (nd_opt_rd_hdr_t))/8;
1712 		(void) adjmsg(mp, -(pkt_len % 8));
1713 	}
1714 	rdh->nd_opt_rh_reserved1 = 0;
1715 	rdh->nd_opt_rh_reserved2 = 0;
1716 	/* ipif_v6lcl_addr contains the link-local source address */
1717 	srcp = &ill->ill_ipif->ipif_v6lcl_addr;
1718 
1719 	/* Redirects sent by router, and router is global zone */
1720 	ASSERT(ira->ira_zoneid == ALL_ZONES);
1721 	ira->ira_zoneid = GLOBAL_ZONEID;
1722 	icmp_pkt_v6(mp, buf, len, srcp, ira);
1723 	kmem_free(buf, len);
1724 	if (need_refrele)
1725 		ill_refrele(ill);
1726 }
1727 
1728 
1729 /* Generate an ICMP time exceeded message.  (May be called as writer.) */
1730 void
1731 icmp_time_exceeded_v6(mblk_t *mp, uint8_t code, boolean_t mcast_ok,
1732     ip_recv_attr_t *ira)
1733 {
1734 	icmp6_t	icmp6;
1735 
1736 	mp = icmp_pkt_err_ok_v6(mp, mcast_ok, ira);
1737 	if (mp == NULL)
1738 		return;
1739 
1740 	bzero(&icmp6, sizeof (icmp6_t));
1741 	icmp6.icmp6_type = ICMP6_TIME_EXCEEDED;
1742 	icmp6.icmp6_code = code;
1743 	icmp_pkt_v6(mp, &icmp6, sizeof (icmp6_t), NULL, ira);
1744 }
1745 
1746 /*
1747  * Generate an ICMP unreachable message.
1748  * When called from ip_output side a minimal ip_recv_attr_t needs to be
1749  * constructed by the caller.
1750  */
1751 void
1752 icmp_unreachable_v6(mblk_t *mp, uint8_t code, boolean_t mcast_ok,
1753     ip_recv_attr_t *ira)
1754 {
1755 	icmp6_t	icmp6;
1756 
1757 	mp = icmp_pkt_err_ok_v6(mp, mcast_ok, ira);
1758 	if (mp == NULL)
1759 		return;
1760 
1761 	bzero(&icmp6, sizeof (icmp6_t));
1762 	icmp6.icmp6_type = ICMP6_DST_UNREACH;
1763 	icmp6.icmp6_code = code;
1764 	icmp_pkt_v6(mp, &icmp6, sizeof (icmp6_t), NULL, ira);
1765 }
1766 
1767 /*
1768  * Generate an ICMP pkt too big message.
1769  * When called from ip_output side a minimal ip_recv_attr_t needs to be
1770  * constructed by the caller.
1771  */
1772 void
1773 icmp_pkt2big_v6(mblk_t *mp, uint32_t mtu, boolean_t mcast_ok,
1774     ip_recv_attr_t *ira)
1775 {
1776 	icmp6_t	icmp6;
1777 
1778 	mp = icmp_pkt_err_ok_v6(mp, mcast_ok, ira);
1779 	if (mp == NULL)
1780 		return;
1781 
1782 	bzero(&icmp6, sizeof (icmp6_t));
1783 	icmp6.icmp6_type = ICMP6_PACKET_TOO_BIG;
1784 	icmp6.icmp6_code = 0;
1785 	icmp6.icmp6_mtu = htonl(mtu);
1786 
1787 	icmp_pkt_v6(mp, &icmp6, sizeof (icmp6_t), NULL, ira);
1788 }
1789 
1790 /*
1791  * Generate an ICMP parameter problem message. (May be called as writer.)
1792  * 'offset' is the offset from the beginning of the packet in error.
1793  * When called from ip_output side a minimal ip_recv_attr_t needs to be
1794  * constructed by the caller.
1795  */
1796 static void
1797 icmp_param_problem_v6(mblk_t *mp, uint8_t code, uint32_t offset,
1798     boolean_t mcast_ok, ip_recv_attr_t *ira)
1799 {
1800 	icmp6_t	icmp6;
1801 
1802 	mp = icmp_pkt_err_ok_v6(mp, mcast_ok, ira);
1803 	if (mp == NULL)
1804 		return;
1805 
1806 	bzero((char *)&icmp6, sizeof (icmp6_t));
1807 	icmp6.icmp6_type = ICMP6_PARAM_PROB;
1808 	icmp6.icmp6_code = code;
1809 	icmp6.icmp6_pptr = htonl(offset);
1810 	icmp_pkt_v6(mp, &icmp6, sizeof (icmp6_t), NULL, ira);
1811 }
1812 
1813 void
1814 icmp_param_problem_nexthdr_v6(mblk_t *mp, boolean_t mcast_ok,
1815     ip_recv_attr_t *ira)
1816 {
1817 	ip6_t		*ip6h = (ip6_t *)mp->b_rptr;
1818 	uint16_t	hdr_length;
1819 	uint8_t		*nexthdrp;
1820 	uint32_t	offset;
1821 	ill_t		*ill = ira->ira_ill;
1822 
1823 	/* Determine the offset of the bad nexthdr value */
1824 	if (!ip_hdr_length_nexthdr_v6(mp, ip6h,	&hdr_length, &nexthdrp)) {
1825 		/* Malformed packet */
1826 		BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
1827 		ip_drop_input("ipIfStatsInDiscards", mp, ill);
1828 		freemsg(mp);
1829 		return;
1830 	}
1831 
1832 	offset = nexthdrp - mp->b_rptr;
1833 	icmp_param_problem_v6(mp, ICMP6_PARAMPROB_NEXTHEADER, offset,
1834 	    mcast_ok, ira);
1835 }
1836 
1837 /*
1838  * Verify whether or not the IP address is a valid local address.
1839  * Could be a unicast, including one for a down interface.
1840  * If allow_mcbc then a multicast or broadcast address is also
1841  * acceptable.
1842  *
1843  * In the case of a multicast address, however, the
1844  * upper protocol is expected to reset the src address
1845  * to zero when we return IPVL_MCAST so that
1846  * no packets are emitted with multicast address as
1847  * source address.
1848  * The addresses valid for bind are:
1849  *	(1) - in6addr_any
1850  *	(2) - IP address of an UP interface
1851  *	(3) - IP address of a DOWN interface
1852  *	(4) - a multicast address. In this case
1853  *	the conn will only receive packets destined to
1854  *	the specified multicast address. Note: the
1855  *	application still has to issue an
1856  *	IPV6_JOIN_GROUP socket option.
1857  *
1858  * In all the above cases, the bound address must be valid in the current zone.
1859  * When the address is loopback or multicast, there might be many matching IREs
1860  * so bind has to look up based on the zone.
1861  */
1862 ip_laddr_t
1863 ip_laddr_verify_v6(const in6_addr_t *v6src, zoneid_t zoneid,
1864     ip_stack_t *ipst, boolean_t allow_mcbc, uint_t scopeid)
1865 {
1866 	ire_t		*src_ire;
1867 	uint_t		match_flags;
1868 	ill_t		*ill = NULL;
1869 
1870 	ASSERT(!IN6_IS_ADDR_V4MAPPED(v6src));
1871 	ASSERT(!IN6_IS_ADDR_UNSPECIFIED(v6src));
1872 
1873 	match_flags = MATCH_IRE_ZONEONLY;
1874 	if (scopeid != 0) {
1875 		ill = ill_lookup_on_ifindex(scopeid, B_TRUE, ipst);
1876 		if (ill == NULL)
1877 			return (IPVL_BAD);
1878 		match_flags |= MATCH_IRE_ILL;
1879 	}
1880 
1881 	src_ire = ire_ftable_lookup_v6(v6src, NULL, NULL, 0,
1882 	    ill, zoneid, NULL, match_flags, 0, ipst, NULL);
1883 	if (ill != NULL)
1884 		ill_refrele(ill);
1885 
1886 	/*
1887 	 * If an address other than in6addr_any is requested,
1888 	 * we verify that it is a valid address for bind
1889 	 * Note: Following code is in if-else-if form for
1890 	 * readability compared to a condition check.
1891 	 */
1892 	if (src_ire != NULL && (src_ire->ire_type & (IRE_LOCAL|IRE_LOOPBACK))) {
1893 		/*
1894 		 * (2) Bind to address of local UP interface
1895 		 */
1896 		ire_refrele(src_ire);
1897 		return (IPVL_UNICAST_UP);
1898 	} else if (IN6_IS_ADDR_MULTICAST(v6src)) {
1899 		/* (4) bind to multicast address. */
1900 		if (src_ire != NULL)
1901 			ire_refrele(src_ire);
1902 
1903 		/*
1904 		 * Note: caller should take IPV6_MULTICAST_IF
1905 		 * into account when selecting a real source address.
1906 		 */
1907 		if (allow_mcbc)
1908 			return (IPVL_MCAST);
1909 		else
1910 			return (IPVL_BAD);
1911 	} else {
1912 		ipif_t *ipif;
1913 
1914 		/*
1915 		 * (3) Bind to address of local DOWN interface?
1916 		 * (ipif_lookup_addr() looks up all interfaces
1917 		 * but we do not get here for UP interfaces
1918 		 * - case (2) above)
1919 		 */
1920 		if (src_ire != NULL)
1921 			ire_refrele(src_ire);
1922 
1923 		ipif = ipif_lookup_addr_v6(v6src, NULL, zoneid, ipst);
1924 		if (ipif == NULL)
1925 			return (IPVL_BAD);
1926 
1927 		/* Not a useful source? */
1928 		if (ipif->ipif_flags & (IPIF_NOLOCAL | IPIF_ANYCAST)) {
1929 			ipif_refrele(ipif);
1930 			return (IPVL_BAD);
1931 		}
1932 		ipif_refrele(ipif);
1933 		return (IPVL_UNICAST_DOWN);
1934 	}
1935 }
1936 
1937 /*
1938  * Verify that both the source and destination addresses are valid.  If
1939  * IPDF_VERIFY_DST is not set, then the destination address may be unreachable,
1940  * i.e. have no route to it.  Protocols like TCP want to verify destination
1941  * reachability, while tunnels do not.
1942  *
1943  * Determine the route, the interface, and (optionally) the source address
1944  * to use to reach a given destination.
1945  * Note that we allow connect to broadcast and multicast addresses when
1946  * IPDF_ALLOW_MCBC is set.
1947  * first_hop and dst_addr are normally the same, but if source routing
1948  * they will differ; in that case the first_hop is what we'll use for the
1949  * routing lookup but the dce and label checks will be done on dst_addr,
1950  *
1951  * If uinfo is set, then we fill in the best available information
1952  * we have for the destination. This is based on (in priority order) any
1953  * metrics and path MTU stored in a dce_t, route metrics, and finally the
1954  * ill_mtu.
1955  *
1956  * Tsol note: If we have a source route then dst_addr != firsthop. But we
1957  * always do the label check on dst_addr.
1958  *
1959  * Assumes that the caller has set ixa_scopeid for link-local communication.
1960  */
1961 int
1962 ip_set_destination_v6(in6_addr_t *src_addrp, const in6_addr_t *dst_addr,
1963     const in6_addr_t *firsthop, ip_xmit_attr_t *ixa, iulp_t *uinfo,
1964     uint32_t flags, uint_t mac_mode)
1965 {
1966 	ire_t		*ire;
1967 	int		error = 0;
1968 	in6_addr_t	setsrc;				/* RTF_SETSRC */
1969 	zoneid_t	zoneid = ixa->ixa_zoneid;	/* Honors SO_ALLZONES */
1970 	ip_stack_t	*ipst = ixa->ixa_ipst;
1971 	dce_t		*dce;
1972 	uint_t		pmtu;
1973 	uint_t		ifindex;
1974 	uint_t		generation;
1975 	nce_t		*nce;
1976 	ill_t		*ill = NULL;
1977 	boolean_t	multirt = B_FALSE;
1978 
1979 	ASSERT(!IN6_IS_ADDR_V4MAPPED(dst_addr));
1980 
1981 	ASSERT(!(ixa->ixa_flags & IXAF_IS_IPV4));
1982 
1983 	/*
1984 	 * We never send to zero; the ULPs map it to the loopback address.
1985 	 * We can't allow it since we use zero to mean unitialized in some
1986 	 * places.
1987 	 */
1988 	ASSERT(!IN6_IS_ADDR_UNSPECIFIED(dst_addr));
1989 
1990 	if (is_system_labeled()) {
1991 		ts_label_t *tsl = NULL;
1992 
1993 		error = tsol_check_dest(ixa->ixa_tsl, dst_addr, IPV6_VERSION,
1994 		    mac_mode, (flags & IPDF_ZONE_IS_GLOBAL) != 0, &tsl);
1995 		if (error != 0)
1996 			return (error);
1997 		if (tsl != NULL) {
1998 			/* Update the label */
1999 			ip_xmit_attr_replace_tsl(ixa, tsl);
2000 		}
2001 	}
2002 
2003 	setsrc = ipv6_all_zeros;
2004 	/*
2005 	 * Select a route; For IPMP interfaces, we would only select
2006 	 * a "hidden" route (i.e., going through a specific under_ill)
2007 	 * if ixa_ifindex has been specified.
2008 	 */
2009 	ire = ip_select_route_v6(firsthop, ixa, &generation, &setsrc, &error,
2010 	    &multirt);
2011 	ASSERT(ire != NULL);	/* IRE_NOROUTE if none found */
2012 	if (error != 0)
2013 		goto bad_addr;
2014 
2015 	/*
2016 	 * ire can't be a broadcast or multicast unless IPDF_ALLOW_MCBC is set.
2017 	 * If IPDF_VERIFY_DST is set, the destination must be reachable.
2018 	 * Otherwise the destination needn't be reachable.
2019 	 *
2020 	 * If we match on a reject or black hole, then we've got a
2021 	 * local failure.  May as well fail out the connect() attempt,
2022 	 * since it's never going to succeed.
2023 	 */
2024 	if (ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) {
2025 		/*
2026 		 * If we're verifying destination reachability, we always want
2027 		 * to complain here.
2028 		 *
2029 		 * If we're not verifying destination reachability but the
2030 		 * destination has a route, we still want to fail on the
2031 		 * temporary address and broadcast address tests.
2032 		 *
2033 		 * In both cases do we let the code continue so some reasonable
2034 		 * information is returned to the caller. That enables the
2035 		 * caller to use (and even cache) the IRE. conn_ip_ouput will
2036 		 * use the generation mismatch path to check for the unreachable
2037 		 * case thereby avoiding any specific check in the main path.
2038 		 */
2039 		ASSERT(generation == IRE_GENERATION_VERIFY);
2040 		if (flags & IPDF_VERIFY_DST) {
2041 			/*
2042 			 * Set errno but continue to set up ixa_ire to be
2043 			 * the RTF_REJECT|RTF_BLACKHOLE IRE.
2044 			 * That allows callers to use ip_output to get an
2045 			 * ICMP error back.
2046 			 */
2047 			if (!(ire->ire_type & IRE_HOST))
2048 				error = ENETUNREACH;
2049 			else
2050 				error = EHOSTUNREACH;
2051 		}
2052 	}
2053 
2054 	if ((ire->ire_type & (IRE_BROADCAST|IRE_MULTICAST)) &&
2055 	    !(flags & IPDF_ALLOW_MCBC)) {
2056 		ire_refrele(ire);
2057 		ire = ire_reject(ipst, B_FALSE);
2058 		generation = IRE_GENERATION_VERIFY;
2059 		error = ENETUNREACH;
2060 	}
2061 
2062 	/* Cache things */
2063 	if (ixa->ixa_ire != NULL)
2064 		ire_refrele_notr(ixa->ixa_ire);
2065 #ifdef DEBUG
2066 	ire_refhold_notr(ire);
2067 	ire_refrele(ire);
2068 #endif
2069 	ixa->ixa_ire = ire;
2070 	ixa->ixa_ire_generation = generation;
2071 
2072 	/*
2073 	 * For multicast with multirt we have a flag passed back from
2074 	 * ire_lookup_multi_ill_v6 since we don't have an IRE for each
2075 	 * possible multicast address.
2076 	 * We also need a flag for multicast since we can't check
2077 	 * whether RTF_MULTIRT is set in ixa_ire for multicast.
2078 	 */
2079 	if (multirt) {
2080 		ixa->ixa_postfragfn = ip_postfrag_multirt_v6;
2081 		ixa->ixa_flags |= IXAF_MULTIRT_MULTICAST;
2082 	} else {
2083 		ixa->ixa_postfragfn = ire->ire_postfragfn;
2084 		ixa->ixa_flags &= ~IXAF_MULTIRT_MULTICAST;
2085 	}
2086 	if (!(ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE))) {
2087 		/* Get an nce to cache. */
2088 		nce = ire_to_nce(ire, NULL, firsthop);
2089 		if (nce == NULL) {
2090 			/* Allocation failure? */
2091 			ixa->ixa_ire_generation = IRE_GENERATION_VERIFY;
2092 		} else {
2093 			if (ixa->ixa_nce != NULL)
2094 				nce_refrele(ixa->ixa_nce);
2095 			ixa->ixa_nce = nce;
2096 		}
2097 	}
2098 
2099 	/*
2100 	 * We use use ire_nexthop_ill to avoid the under ipmp
2101 	 * interface for source address selection. Note that for ipmp
2102 	 * probe packets, ixa_ifindex would have been specified, and
2103 	 * the ip_select_route() invocation would have picked an ire
2104 	 * will ire_ill pointing at an under interface.
2105 	 */
2106 	ill = ire_nexthop_ill(ire);
2107 
2108 	/*
2109 	 * If the source address is a loopback address, the
2110 	 * destination had best be local or multicast.
2111 	 * If we are sending to an IRE_LOCAL using a loopback source then
2112 	 * it had better be the same zoneid.
2113 	 */
2114 	if (IN6_IS_ADDR_LOOPBACK(src_addrp)) {
2115 		if ((ire->ire_type & IRE_LOCAL) && ire->ire_zoneid != zoneid) {
2116 			ire = NULL;	/* Stored in ixa_ire */
2117 			error = EADDRNOTAVAIL;
2118 			goto bad_addr;
2119 		}
2120 		if (!(ire->ire_type & (IRE_LOOPBACK|IRE_LOCAL|IRE_MULTICAST))) {
2121 			ire = NULL;	/* Stored in ixa_ire */
2122 			error = EADDRNOTAVAIL;
2123 			goto bad_addr;
2124 		}
2125 	}
2126 
2127 	/*
2128 	 * Does the caller want us to pick a source address?
2129 	 */
2130 	if (flags & IPDF_SELECT_SRC) {
2131 		in6_addr_t	src_addr;
2132 
2133 		/* If unreachable we have no ill but need some source */
2134 		if (ill == NULL) {
2135 			src_addr = ipv6_loopback;
2136 			/* Make sure we look for a better source address */
2137 			generation = SRC_GENERATION_VERIFY;
2138 		} else {
2139 			error = ip_select_source_v6(ill, &setsrc, dst_addr,
2140 			    zoneid, ipst, B_FALSE, ixa->ixa_src_preferences,
2141 			    &src_addr, &generation, NULL);
2142 			if (error != 0) {
2143 				ire = NULL;	/* Stored in ixa_ire */
2144 				goto bad_addr;
2145 			}
2146 		}
2147 
2148 		/*
2149 		 * We allow the source address to to down.
2150 		 * However, we check that we don't use the loopback address
2151 		 * as a source when sending out on the wire.
2152 		 */
2153 		if (IN6_IS_ADDR_LOOPBACK(&src_addr) &&
2154 		    !(ire->ire_type & (IRE_LOCAL|IRE_LOOPBACK|IRE_MULTICAST)) &&
2155 		    !(ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE))) {
2156 			ire = NULL;	/* Stored in ixa_ire */
2157 			error = EADDRNOTAVAIL;
2158 			goto bad_addr;
2159 		}
2160 
2161 		*src_addrp = src_addr;
2162 		ixa->ixa_src_generation = generation;
2163 	}
2164 
2165 	/*
2166 	 * Make sure we don't leave an unreachable ixa_nce in place
2167 	 * since ip_select_route is used when we unplumb i.e., remove
2168 	 * references on ixa_ire, ixa_nce, and ixa_dce.
2169 	 */
2170 	nce = ixa->ixa_nce;
2171 	if (nce != NULL && nce->nce_is_condemned) {
2172 		nce_refrele(nce);
2173 		ixa->ixa_nce = NULL;
2174 		ixa->ixa_ire_generation = IRE_GENERATION_VERIFY;
2175 	}
2176 
2177 
2178 	ifindex = 0;
2179 	if (IN6_IS_ADDR_LINKSCOPE(dst_addr)) {
2180 		/* If we are creating a DCE we'd better have an ifindex */
2181 		if (ill != NULL)
2182 			ifindex = ill->ill_phyint->phyint_ifindex;
2183 		else
2184 			flags &= ~IPDF_UNIQUE_DCE;
2185 	}
2186 
2187 	if (flags & IPDF_UNIQUE_DCE) {
2188 		/* Fallback to the default dce if allocation fails */
2189 		dce = dce_lookup_and_add_v6(dst_addr, ifindex, ipst);
2190 		if (dce != NULL) {
2191 			generation = dce->dce_generation;
2192 		} else {
2193 			dce = dce_lookup_v6(dst_addr, ifindex, ipst,
2194 			    &generation);
2195 		}
2196 	} else {
2197 		dce = dce_lookup_v6(dst_addr, ifindex, ipst, &generation);
2198 	}
2199 	ASSERT(dce != NULL);
2200 	if (ixa->ixa_dce != NULL)
2201 		dce_refrele_notr(ixa->ixa_dce);
2202 #ifdef DEBUG
2203 	dce_refhold_notr(dce);
2204 	dce_refrele(dce);
2205 #endif
2206 	ixa->ixa_dce = dce;
2207 	ixa->ixa_dce_generation = generation;
2208 
2209 	/*
2210 	 * Note that IPv6 multicast supports PMTU discovery unlike IPv4
2211 	 * multicast. But pmtu discovery is only enabled for connected
2212 	 * sockets in general.
2213 	 */
2214 
2215 	/*
2216 	 * Set initial value for fragmentation limit.  Either conn_ip_output
2217 	 * or ULP might updates it when there are routing changes.
2218 	 * Handles a NULL ixa_ire->ire_ill or a NULL ixa_nce for RTF_REJECT.
2219 	 */
2220 	pmtu = ip_get_pmtu(ixa);
2221 	ixa->ixa_fragsize = pmtu;
2222 	/* Make sure ixa_fragsize and ixa_pmtu remain identical */
2223 	if (ixa->ixa_flags & IXAF_VERIFY_PMTU)
2224 		ixa->ixa_pmtu = pmtu;
2225 
2226 	/*
2227 	 * Extract information useful for some transports.
2228 	 * First we look for DCE metrics. Then we take what we have in
2229 	 * the metrics in the route, where the offlink is used if we have
2230 	 * one.
2231 	 */
2232 	if (uinfo != NULL) {
2233 		bzero(uinfo, sizeof (*uinfo));
2234 
2235 		if (dce->dce_flags & DCEF_UINFO)
2236 			*uinfo = dce->dce_uinfo;
2237 
2238 		rts_merge_metrics(uinfo, &ire->ire_metrics);
2239 
2240 		/* Allow ire_metrics to decrease the path MTU from above */
2241 		if (uinfo->iulp_mtu == 0 || uinfo->iulp_mtu > pmtu)
2242 			uinfo->iulp_mtu = pmtu;
2243 
2244 		uinfo->iulp_localnet = (ire->ire_type & IRE_ONLINK) != 0;
2245 		uinfo->iulp_loopback = (ire->ire_type & IRE_LOOPBACK) != 0;
2246 		uinfo->iulp_local = (ire->ire_type & IRE_LOCAL) != 0;
2247 	}
2248 
2249 	if (ill != NULL)
2250 		ill_refrele(ill);
2251 
2252 	return (error);
2253 
2254 bad_addr:
2255 	if (ire != NULL)
2256 		ire_refrele(ire);
2257 
2258 	if (ill != NULL)
2259 		ill_refrele(ill);
2260 
2261 	/*
2262 	 * Make sure we don't leave an unreachable ixa_nce in place
2263 	 * since ip_select_route is used when we unplumb i.e., remove
2264 	 * references on ixa_ire, ixa_nce, and ixa_dce.
2265 	 */
2266 	nce = ixa->ixa_nce;
2267 	if (nce != NULL && nce->nce_is_condemned) {
2268 		nce_refrele(nce);
2269 		ixa->ixa_nce = NULL;
2270 		ixa->ixa_ire_generation = IRE_GENERATION_VERIFY;
2271 	}
2272 
2273 	return (error);
2274 }
2275 
2276 /*
2277  * Handle protocols with which IP is less intimate.  There
2278  * can be more than one stream bound to a particular
2279  * protocol.  When this is the case, normally each one gets a copy
2280  * of any incoming packets.
2281  *
2282  * Zones notes:
2283  * Packets will be distributed to conns in all zones. This is really only
2284  * useful for ICMPv6 as only applications in the global zone can create raw
2285  * sockets for other protocols.
2286  */
2287 void
2288 ip_fanout_proto_v6(mblk_t *mp, ip6_t *ip6h, ip_recv_attr_t *ira)
2289 {
2290 	mblk_t		*mp1;
2291 	in6_addr_t	laddr = ip6h->ip6_dst;
2292 	conn_t		*connp, *first_connp, *next_connp;
2293 	connf_t		*connfp;
2294 	ill_t		*ill = ira->ira_ill;
2295 	ip_stack_t	*ipst = ill->ill_ipst;
2296 
2297 	connfp = &ipst->ips_ipcl_proto_fanout_v6[ira->ira_protocol];
2298 	mutex_enter(&connfp->connf_lock);
2299 	connp = connfp->connf_head;
2300 	for (connp = connfp->connf_head; connp != NULL;
2301 	    connp = connp->conn_next) {
2302 		/* Note: IPCL_PROTO_MATCH_V6 includes conn_wantpacket */
2303 		if (IPCL_PROTO_MATCH_V6(connp, ira, ip6h) &&
2304 		    (!(ira->ira_flags & IRAF_SYSTEM_LABELED) ||
2305 		    tsol_receive_local(mp, &laddr, IPV6_VERSION, ira, connp)))
2306 			break;
2307 	}
2308 
2309 	if (connp == NULL) {
2310 		/*
2311 		 * No one bound to this port.  Is
2312 		 * there a client that wants all
2313 		 * unclaimed datagrams?
2314 		 */
2315 		mutex_exit(&connfp->connf_lock);
2316 		ip_fanout_send_icmp_v6(mp, ICMP6_PARAM_PROB,
2317 		    ICMP6_PARAMPROB_NEXTHEADER, ira);
2318 		return;
2319 	}
2320 
2321 	ASSERT(IPCL_IS_NONSTR(connp) || connp->conn_rq != NULL);
2322 
2323 	CONN_INC_REF(connp);
2324 	first_connp = connp;
2325 
2326 	/*
2327 	 * XXX: Fix the multiple protocol listeners case. We should not
2328 	 * be walking the conn->conn_next list here.
2329 	 */
2330 	connp = connp->conn_next;
2331 	for (;;) {
2332 		while (connp != NULL) {
2333 			/* Note: IPCL_PROTO_MATCH_V6 includes conn_wantpacket */
2334 			if (IPCL_PROTO_MATCH_V6(connp, ira, ip6h) &&
2335 			    (!(ira->ira_flags & IRAF_SYSTEM_LABELED) ||
2336 			    tsol_receive_local(mp, &laddr, IPV6_VERSION,
2337 			    ira, connp)))
2338 				break;
2339 			connp = connp->conn_next;
2340 		}
2341 
2342 		if (connp == NULL) {
2343 			/* No more interested clients */
2344 			connp = first_connp;
2345 			break;
2346 		}
2347 		if (((mp1 = dupmsg(mp)) == NULL) &&
2348 		    ((mp1 = copymsg(mp)) == NULL)) {
2349 			/* Memory allocation failed */
2350 			BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
2351 			ip_drop_input("ipIfStatsInDiscards", mp, ill);
2352 			connp = first_connp;
2353 			break;
2354 		}
2355 
2356 		CONN_INC_REF(connp);
2357 		mutex_exit(&connfp->connf_lock);
2358 
2359 		ip_fanout_proto_conn(connp, mp1, NULL, (ip6_t *)mp1->b_rptr,
2360 		    ira);
2361 
2362 		mutex_enter(&connfp->connf_lock);
2363 		/* Follow the next pointer before releasing the conn. */
2364 		next_connp = connp->conn_next;
2365 		CONN_DEC_REF(connp);
2366 		connp = next_connp;
2367 	}
2368 
2369 	/* Last one.  Send it upstream. */
2370 	mutex_exit(&connfp->connf_lock);
2371 
2372 	ip_fanout_proto_conn(connp, mp, NULL, ip6h, ira);
2373 
2374 	CONN_DEC_REF(connp);
2375 }
2376 
2377 /*
2378  * Called when it is conceptually a ULP that would sent the packet
2379  * e.g., port unreachable and nexthdr unknown. Check that the packet
2380  * would have passed the IPsec global policy before sending the error.
2381  *
2382  * Send an ICMP error after patching up the packet appropriately.
2383  * Uses ip_drop_input and bumps the appropriate MIB.
2384  * For ICMP6_PARAMPROB_NEXTHEADER we determine the offset to use.
2385  */
2386 void
2387 ip_fanout_send_icmp_v6(mblk_t *mp, uint_t icmp_type, uint8_t icmp_code,
2388     ip_recv_attr_t *ira)
2389 {
2390 	ip6_t		*ip6h;
2391 	boolean_t	secure;
2392 	ill_t		*ill = ira->ira_ill;
2393 	ip_stack_t	*ipst = ill->ill_ipst;
2394 	netstack_t	*ns = ipst->ips_netstack;
2395 	ipsec_stack_t	*ipss = ns->netstack_ipsec;
2396 
2397 	secure = ira->ira_flags & IRAF_IPSEC_SECURE;
2398 
2399 	/*
2400 	 * We are generating an icmp error for some inbound packet.
2401 	 * Called from all ip_fanout_(udp, tcp, proto) functions.
2402 	 * Before we generate an error, check with global policy
2403 	 * to see whether this is allowed to enter the system. As
2404 	 * there is no "conn", we are checking with global policy.
2405 	 */
2406 	ip6h = (ip6_t *)mp->b_rptr;
2407 	if (secure || ipss->ipsec_inbound_v6_policy_present) {
2408 		mp = ipsec_check_global_policy(mp, NULL, NULL, ip6h, ira, ns);
2409 		if (mp == NULL)
2410 			return;
2411 	}
2412 
2413 	/* We never send errors for protocols that we do implement */
2414 	if (ira->ira_protocol == IPPROTO_ICMPV6) {
2415 		BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
2416 		ip_drop_input("ip_fanout_send_icmp_v6", mp, ill);
2417 		freemsg(mp);
2418 		return;
2419 	}
2420 
2421 	switch (icmp_type) {
2422 	case ICMP6_DST_UNREACH:
2423 		ASSERT(icmp_code == ICMP6_DST_UNREACH_NOPORT);
2424 
2425 		BUMP_MIB(ill->ill_ip_mib, udpIfStatsNoPorts);
2426 		ip_drop_input("ipIfStatsNoPorts", mp, ill);
2427 
2428 		icmp_unreachable_v6(mp, icmp_code, B_FALSE, ira);
2429 		break;
2430 	case ICMP6_PARAM_PROB:
2431 		ASSERT(icmp_code == ICMP6_PARAMPROB_NEXTHEADER);
2432 
2433 		BUMP_MIB(ill->ill_ip_mib, ipIfStatsInUnknownProtos);
2434 		ip_drop_input("ipIfStatsInUnknownProtos", mp, ill);
2435 
2436 		/* Let the system determine the offset for this one */
2437 		icmp_param_problem_nexthdr_v6(mp, B_FALSE, ira);
2438 		break;
2439 	default:
2440 #ifdef DEBUG
2441 		panic("ip_fanout_send_icmp_v6: wrong type");
2442 		/*NOTREACHED*/
2443 #else
2444 		freemsg(mp);
2445 		break;
2446 #endif
2447 	}
2448 }
2449 
2450 /*
2451  * Fanout for UDP packets that are multicast or ICMP errors.
2452  * (Unicast fanout is handled in ip_input_v6.)
2453  *
2454  * If SO_REUSEADDR is set all multicast packets
2455  * will be delivered to all conns bound to the same port.
2456  *
2457  * Fanout for UDP packets.
2458  * The caller puts <fport, lport> in the ports parameter.
2459  * ire_type must be IRE_BROADCAST for multicast and broadcast packets.
2460  *
2461  * If SO_REUSEADDR is set all multicast and broadcast packets
2462  * will be delivered to all conns bound to the same port.
2463  *
2464  * Zones notes:
2465  * Earlier in ip_input on a system with multiple shared-IP zones we
2466  * duplicate the multicast and broadcast packets and send them up
2467  * with each explicit zoneid that exists on that ill.
2468  * This means that here we can match the zoneid with SO_ALLZONES being special.
2469  */
2470 void
2471 ip_fanout_udp_multi_v6(mblk_t *mp, ip6_t *ip6h, uint16_t lport, uint16_t fport,
2472     ip_recv_attr_t *ira)
2473 {
2474 	in6_addr_t	laddr;
2475 	conn_t		*connp;
2476 	connf_t		*connfp;
2477 	in6_addr_t	faddr;
2478 	ill_t		*ill = ira->ira_ill;
2479 	ip_stack_t	*ipst = ill->ill_ipst;
2480 
2481 	ASSERT(ira->ira_flags & (IRAF_MULTIBROADCAST|IRAF_ICMP_ERROR));
2482 
2483 	laddr = ip6h->ip6_dst;
2484 	faddr = ip6h->ip6_src;
2485 
2486 	/* Attempt to find a client stream based on destination port. */
2487 	connfp = &ipst->ips_ipcl_udp_fanout[IPCL_UDP_HASH(lport, ipst)];
2488 	mutex_enter(&connfp->connf_lock);
2489 	connp = connfp->connf_head;
2490 	while (connp != NULL) {
2491 		if ((IPCL_UDP_MATCH_V6(connp, lport, laddr, fport, faddr)) &&
2492 		    conn_wantpacket_v6(connp, ira, ip6h) &&
2493 		    (!(ira->ira_flags & IRAF_SYSTEM_LABELED) ||
2494 		    tsol_receive_local(mp, &laddr, IPV6_VERSION, ira, connp)))
2495 			break;
2496 		connp = connp->conn_next;
2497 	}
2498 
2499 	if (connp == NULL)
2500 		goto notfound;
2501 
2502 	CONN_INC_REF(connp);
2503 
2504 	if (connp->conn_reuseaddr) {
2505 		conn_t		*first_connp = connp;
2506 		conn_t		*next_connp;
2507 		mblk_t		*mp1;
2508 
2509 		connp = connp->conn_next;
2510 		for (;;) {
2511 			while (connp != NULL) {
2512 				if (IPCL_UDP_MATCH_V6(connp, lport, laddr,
2513 				    fport, faddr) &&
2514 				    conn_wantpacket_v6(connp, ira, ip6h) &&
2515 				    (!(ira->ira_flags & IRAF_SYSTEM_LABELED) ||
2516 				    tsol_receive_local(mp, &laddr, IPV6_VERSION,
2517 				    ira, connp)))
2518 					break;
2519 				connp = connp->conn_next;
2520 			}
2521 			if (connp == NULL) {
2522 				/* No more interested clients */
2523 				connp = first_connp;
2524 				break;
2525 			}
2526 			if (((mp1 = dupmsg(mp)) == NULL) &&
2527 			    ((mp1 = copymsg(mp)) == NULL)) {
2528 				/* Memory allocation failed */
2529 				BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
2530 				ip_drop_input("ipIfStatsInDiscards", mp, ill);
2531 				connp = first_connp;
2532 				break;
2533 			}
2534 
2535 			CONN_INC_REF(connp);
2536 			mutex_exit(&connfp->connf_lock);
2537 
2538 			IP6_STAT(ipst, ip6_udp_fanmb);
2539 			ip_fanout_udp_conn(connp, mp1, NULL,
2540 			    (ip6_t *)mp1->b_rptr, ira);
2541 
2542 			mutex_enter(&connfp->connf_lock);
2543 			/* Follow the next pointer before releasing the conn. */
2544 			next_connp = connp->conn_next;
2545 			IP6_STAT(ipst, ip6_udp_fanmb);
2546 			CONN_DEC_REF(connp);
2547 			connp = next_connp;
2548 		}
2549 	}
2550 
2551 	/* Last one.  Send it upstream. */
2552 	mutex_exit(&connfp->connf_lock);
2553 
2554 	IP6_STAT(ipst, ip6_udp_fanmb);
2555 	ip_fanout_udp_conn(connp, mp, NULL, ip6h, ira);
2556 	CONN_DEC_REF(connp);
2557 	return;
2558 
2559 notfound:
2560 	mutex_exit(&connfp->connf_lock);
2561 	/*
2562 	 * No one bound to this port.  Is
2563 	 * there a client that wants all
2564 	 * unclaimed datagrams?
2565 	 */
2566 	if (ipst->ips_ipcl_proto_fanout_v6[IPPROTO_UDP].connf_head != NULL) {
2567 		ASSERT(ira->ira_protocol == IPPROTO_UDP);
2568 		ip_fanout_proto_v6(mp, ip6h, ira);
2569 	} else {
2570 		ip_fanout_send_icmp_v6(mp, ICMP6_DST_UNREACH,
2571 		    ICMP6_DST_UNREACH_NOPORT, ira);
2572 	}
2573 }
2574 
2575 /*
2576  * int ip_find_hdr_v6()
2577  *
2578  * This routine is used by the upper layer protocols, iptun, and IPsec:
2579  * - Set extension header pointers to appropriate locations
2580  * - Determine IPv6 header length and return it
2581  * - Return a pointer to the last nexthdr value
2582  *
2583  * The caller must initialize ipp_fields.
2584  * The upper layer protocols normally set label_separate which makes the
2585  * routine put the TX label in ipp_label_v6. If this is not set then
2586  * the hop-by-hop options including the label are placed in ipp_hopopts.
2587  *
2588  * NOTE: If multiple extension headers of the same type are present,
2589  * ip_find_hdr_v6() will set the respective extension header pointers
2590  * to the first one that it encounters in the IPv6 header.  It also
2591  * skips fragment headers.  This routine deals with malformed packets
2592  * of various sorts in which case the returned length is up to the
2593  * malformed part.
2594  */
2595 int
2596 ip_find_hdr_v6(mblk_t *mp, ip6_t *ip6h, boolean_t label_separate, ip_pkt_t *ipp,
2597     uint8_t *nexthdrp)
2598 {
2599 	uint_t	length, ehdrlen;
2600 	uint8_t nexthdr;
2601 	uint8_t *whereptr, *endptr;
2602 	ip6_dest_t *tmpdstopts;
2603 	ip6_rthdr_t *tmprthdr;
2604 	ip6_hbh_t *tmphopopts;
2605 	ip6_frag_t *tmpfraghdr;
2606 
2607 	ipp->ipp_fields |= IPPF_HOPLIMIT | IPPF_TCLASS | IPPF_ADDR;
2608 	ipp->ipp_hoplimit = ip6h->ip6_hops;
2609 	ipp->ipp_tclass = IPV6_FLOW_TCLASS(ip6h->ip6_flow);
2610 	ipp->ipp_addr = ip6h->ip6_dst;
2611 
2612 	length = IPV6_HDR_LEN;
2613 	whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */
2614 	endptr = mp->b_wptr;
2615 
2616 	nexthdr = ip6h->ip6_nxt;
2617 	while (whereptr < endptr) {
2618 		/* Is there enough left for len + nexthdr? */
2619 		if (whereptr + MIN_EHDR_LEN > endptr)
2620 			goto done;
2621 
2622 		switch (nexthdr) {
2623 		case IPPROTO_HOPOPTS: {
2624 			/* We check for any CIPSO */
2625 			uchar_t *secopt;
2626 			boolean_t hbh_needed;
2627 			uchar_t *after_secopt;
2628 
2629 			tmphopopts = (ip6_hbh_t *)whereptr;
2630 			ehdrlen = 8 * (tmphopopts->ip6h_len + 1);
2631 			if ((uchar_t *)tmphopopts +  ehdrlen > endptr)
2632 				goto done;
2633 			nexthdr = tmphopopts->ip6h_nxt;
2634 
2635 			if (!label_separate) {
2636 				secopt = NULL;
2637 				after_secopt = whereptr;
2638 			} else {
2639 				/*
2640 				 * We have dropped packets with bad options in
2641 				 * ip6_input. No need to check return value
2642 				 * here.
2643 				 */
2644 				(void) tsol_find_secopt_v6(whereptr, ehdrlen,
2645 				    &secopt, &after_secopt, &hbh_needed);
2646 			}
2647 			if (secopt != NULL && after_secopt - whereptr > 0) {
2648 				ipp->ipp_fields |= IPPF_LABEL_V6;
2649 				ipp->ipp_label_v6 = secopt;
2650 				ipp->ipp_label_len_v6 = after_secopt - whereptr;
2651 			} else {
2652 				ipp->ipp_label_len_v6 = 0;
2653 				after_secopt = whereptr;
2654 				hbh_needed = B_TRUE;
2655 			}
2656 			/* return only 1st hbh */
2657 			if (hbh_needed && !(ipp->ipp_fields & IPPF_HOPOPTS)) {
2658 				ipp->ipp_fields |= IPPF_HOPOPTS;
2659 				ipp->ipp_hopopts = (ip6_hbh_t *)after_secopt;
2660 				ipp->ipp_hopoptslen = ehdrlen -
2661 				    ipp->ipp_label_len_v6;
2662 			}
2663 			break;
2664 		}
2665 		case IPPROTO_DSTOPTS:
2666 			tmpdstopts = (ip6_dest_t *)whereptr;
2667 			ehdrlen = 8 * (tmpdstopts->ip6d_len + 1);
2668 			if ((uchar_t *)tmpdstopts +  ehdrlen > endptr)
2669 				goto done;
2670 			nexthdr = tmpdstopts->ip6d_nxt;
2671 			/*
2672 			 * ipp_dstopts is set to the destination header after a
2673 			 * routing header.
2674 			 * Assume it is a post-rthdr destination header
2675 			 * and adjust when we find an rthdr.
2676 			 */
2677 			if (!(ipp->ipp_fields & IPPF_DSTOPTS)) {
2678 				ipp->ipp_fields |= IPPF_DSTOPTS;
2679 				ipp->ipp_dstopts = tmpdstopts;
2680 				ipp->ipp_dstoptslen = ehdrlen;
2681 			}
2682 			break;
2683 		case IPPROTO_ROUTING:
2684 			tmprthdr = (ip6_rthdr_t *)whereptr;
2685 			ehdrlen = 8 * (tmprthdr->ip6r_len + 1);
2686 			if ((uchar_t *)tmprthdr +  ehdrlen > endptr)
2687 				goto done;
2688 			nexthdr = tmprthdr->ip6r_nxt;
2689 			/* return only 1st rthdr */
2690 			if (!(ipp->ipp_fields & IPPF_RTHDR)) {
2691 				ipp->ipp_fields |= IPPF_RTHDR;
2692 				ipp->ipp_rthdr = tmprthdr;
2693 				ipp->ipp_rthdrlen = ehdrlen;
2694 			}
2695 			/*
2696 			 * Make any destination header we've seen be a
2697 			 * pre-rthdr destination header.
2698 			 */
2699 			if (ipp->ipp_fields & IPPF_DSTOPTS) {
2700 				ipp->ipp_fields &= ~IPPF_DSTOPTS;
2701 				ipp->ipp_fields |= IPPF_RTHDRDSTOPTS;
2702 				ipp->ipp_rthdrdstopts = ipp->ipp_dstopts;
2703 				ipp->ipp_dstopts = NULL;
2704 				ipp->ipp_rthdrdstoptslen = ipp->ipp_dstoptslen;
2705 				ipp->ipp_dstoptslen = 0;
2706 			}
2707 			break;
2708 		case IPPROTO_FRAGMENT:
2709 			tmpfraghdr = (ip6_frag_t *)whereptr;
2710 			ehdrlen = sizeof (ip6_frag_t);
2711 			if ((uchar_t *)tmpfraghdr + ehdrlen > endptr)
2712 				goto done;
2713 			nexthdr = tmpfraghdr->ip6f_nxt;
2714 			if (!(ipp->ipp_fields & IPPF_FRAGHDR)) {
2715 				ipp->ipp_fields |= IPPF_FRAGHDR;
2716 				ipp->ipp_fraghdr = tmpfraghdr;
2717 				ipp->ipp_fraghdrlen = ehdrlen;
2718 			}
2719 			break;
2720 		case IPPROTO_NONE:
2721 		default:
2722 			goto done;
2723 		}
2724 		length += ehdrlen;
2725 		whereptr += ehdrlen;
2726 	}
2727 done:
2728 	if (nexthdrp != NULL)
2729 		*nexthdrp = nexthdr;
2730 	return (length);
2731 }
2732 
2733 /*
2734  * Try to determine where and what are the IPv6 header length and
2735  * pointer to nexthdr value for the upper layer protocol (or an
2736  * unknown next hdr).
2737  *
2738  * Parameters returns a pointer to the nexthdr value;
2739  * Must handle malformed packets of various sorts.
2740  * Function returns failure for malformed cases.
2741  */
2742 boolean_t
2743 ip_hdr_length_nexthdr_v6(mblk_t *mp, ip6_t *ip6h, uint16_t *hdr_length_ptr,
2744     uint8_t **nexthdrpp)
2745 {
2746 	uint16_t length;
2747 	uint_t	ehdrlen;
2748 	uint8_t	*nexthdrp;
2749 	uint8_t *whereptr;
2750 	uint8_t *endptr;
2751 	ip6_dest_t *desthdr;
2752 	ip6_rthdr_t *rthdr;
2753 	ip6_frag_t *fraghdr;
2754 
2755 	ASSERT(IPH_HDR_VERSION(ip6h) == IPV6_VERSION);
2756 	length = IPV6_HDR_LEN;
2757 	whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */
2758 	endptr = mp->b_wptr;
2759 
2760 	nexthdrp = &ip6h->ip6_nxt;
2761 	while (whereptr < endptr) {
2762 		/* Is there enough left for len + nexthdr? */
2763 		if (whereptr + MIN_EHDR_LEN > endptr)
2764 			break;
2765 
2766 		switch (*nexthdrp) {
2767 		case IPPROTO_HOPOPTS:
2768 		case IPPROTO_DSTOPTS:
2769 			/* Assumes the headers are identical for hbh and dst */
2770 			desthdr = (ip6_dest_t *)whereptr;
2771 			ehdrlen = 8 * (desthdr->ip6d_len + 1);
2772 			if ((uchar_t *)desthdr +  ehdrlen > endptr)
2773 				return (B_FALSE);
2774 			nexthdrp = &desthdr->ip6d_nxt;
2775 			break;
2776 		case IPPROTO_ROUTING:
2777 			rthdr = (ip6_rthdr_t *)whereptr;
2778 			ehdrlen =  8 * (rthdr->ip6r_len + 1);
2779 			if ((uchar_t *)rthdr +  ehdrlen > endptr)
2780 				return (B_FALSE);
2781 			nexthdrp = &rthdr->ip6r_nxt;
2782 			break;
2783 		case IPPROTO_FRAGMENT:
2784 			fraghdr = (ip6_frag_t *)whereptr;
2785 			ehdrlen = sizeof (ip6_frag_t);
2786 			if ((uchar_t *)&fraghdr[1] > endptr)
2787 				return (B_FALSE);
2788 			nexthdrp = &fraghdr->ip6f_nxt;
2789 			break;
2790 		case IPPROTO_NONE:
2791 			/* No next header means we're finished */
2792 		default:
2793 			*hdr_length_ptr = length;
2794 			*nexthdrpp = nexthdrp;
2795 			return (B_TRUE);
2796 		}
2797 		length += ehdrlen;
2798 		whereptr += ehdrlen;
2799 		*hdr_length_ptr = length;
2800 		*nexthdrpp = nexthdrp;
2801 	}
2802 	switch (*nexthdrp) {
2803 	case IPPROTO_HOPOPTS:
2804 	case IPPROTO_DSTOPTS:
2805 	case IPPROTO_ROUTING:
2806 	case IPPROTO_FRAGMENT:
2807 		/*
2808 		 * If any know extension headers are still to be processed,
2809 		 * the packet's malformed (or at least all the IP header(s) are
2810 		 * not in the same mblk - and that should never happen.
2811 		 */
2812 		return (B_FALSE);
2813 
2814 	default:
2815 		/*
2816 		 * If we get here, we know that all of the IP headers were in
2817 		 * the same mblk, even if the ULP header is in the next mblk.
2818 		 */
2819 		*hdr_length_ptr = length;
2820 		*nexthdrpp = nexthdrp;
2821 		return (B_TRUE);
2822 	}
2823 }
2824 
2825 /*
2826  * Return the length of the IPv6 related headers (including extension headers)
2827  * Returns a length even if the packet is malformed.
2828  */
2829 int
2830 ip_hdr_length_v6(mblk_t *mp, ip6_t *ip6h)
2831 {
2832 	uint16_t hdr_len;
2833 	uint8_t	*nexthdrp;
2834 
2835 	(void) ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_len, &nexthdrp);
2836 	return (hdr_len);
2837 }
2838 
2839 /*
2840  * Parse and process any hop-by-hop or destination options.
2841  *
2842  * Assumes that q is an ill read queue so that ICMP errors for link-local
2843  * destinations are sent out the correct interface.
2844  *
2845  * Returns -1 if there was an error and mp has been consumed.
2846  * Returns 0 if no special action is needed.
2847  * Returns 1 if the packet contained a router alert option for this node
2848  * which is verified to be "interesting/known" for our implementation.
2849  *
2850  * XXX Note: In future as more hbh or dest options are defined,
2851  * it may be better to have different routines for hbh and dest
2852  * options as opt_type fields other than IP6OPT_PAD1 and IP6OPT_PADN
2853  * may have same value in different namespaces. Or is it same namespace ??
2854  * Current code checks for each opt_type (other than pads) if it is in
2855  * the expected  nexthdr (hbh or dest)
2856  */
2857 int
2858 ip_process_options_v6(mblk_t *mp, ip6_t *ip6h,
2859     uint8_t *optptr, uint_t optlen, uint8_t hdr_type, ip_recv_attr_t *ira)
2860 {
2861 	uint8_t opt_type;
2862 	uint_t optused;
2863 	int ret = 0;
2864 	const char *errtype;
2865 	ill_t		*ill = ira->ira_ill;
2866 	ip_stack_t	*ipst = ill->ill_ipst;
2867 
2868 	while (optlen != 0) {
2869 		opt_type = *optptr;
2870 		if (opt_type == IP6OPT_PAD1) {
2871 			optused = 1;
2872 		} else {
2873 			if (optlen < 2)
2874 				goto bad_opt;
2875 			errtype = "malformed";
2876 			if (opt_type == ip6opt_ls) {
2877 				optused = 2 + optptr[1];
2878 				if (optused > optlen)
2879 					goto bad_opt;
2880 			} else switch (opt_type) {
2881 			case IP6OPT_PADN:
2882 				/*
2883 				 * Note:We don't verify that (N-2) pad octets
2884 				 * are zero as required by spec. Adhere to
2885 				 * "be liberal in what you accept..." part of
2886 				 * implementation philosophy (RFC791,RFC1122)
2887 				 */
2888 				optused = 2 + optptr[1];
2889 				if (optused > optlen)
2890 					goto bad_opt;
2891 				break;
2892 
2893 			case IP6OPT_JUMBO:
2894 				if (hdr_type != IPPROTO_HOPOPTS)
2895 					goto opt_error;
2896 				goto opt_error; /* XXX Not implemented! */
2897 
2898 			case IP6OPT_ROUTER_ALERT: {
2899 				struct ip6_opt_router *or;
2900 
2901 				if (hdr_type != IPPROTO_HOPOPTS)
2902 					goto opt_error;
2903 				optused = 2 + optptr[1];
2904 				if (optused > optlen)
2905 					goto bad_opt;
2906 				or = (struct ip6_opt_router *)optptr;
2907 				/* Check total length and alignment */
2908 				if (optused != sizeof (*or) ||
2909 				    ((uintptr_t)or->ip6or_value & 0x1) != 0)
2910 					goto opt_error;
2911 				/* Check value */
2912 				switch (*((uint16_t *)or->ip6or_value)) {
2913 				case IP6_ALERT_MLD:
2914 				case IP6_ALERT_RSVP:
2915 					ret = 1;
2916 				}
2917 				break;
2918 			}
2919 			case IP6OPT_HOME_ADDRESS: {
2920 				/*
2921 				 * Minimal support for the home address option
2922 				 * (which is required by all IPv6 nodes).
2923 				 * Implement by just swapping the home address
2924 				 * and source address.
2925 				 * XXX Note: this has IPsec implications since
2926 				 * AH needs to take this into account.
2927 				 * Also, when IPsec is used we need to ensure
2928 				 * that this is only processed once
2929 				 * in the received packet (to avoid swapping
2930 				 * back and forth).
2931 				 * NOTE:This option processing is considered
2932 				 * to be unsafe and prone to a denial of
2933 				 * service attack.
2934 				 * The current processing is not safe even with
2935 				 * IPsec secured IP packets. Since the home
2936 				 * address option processing requirement still
2937 				 * is in the IETF draft and in the process of
2938 				 * being redefined for its usage, it has been
2939 				 * decided to turn off the option by default.
2940 				 * If this section of code needs to be executed,
2941 				 * ndd variable ip6_ignore_home_address_opt
2942 				 * should be set to 0 at the user's own risk.
2943 				 */
2944 				struct ip6_opt_home_address *oh;
2945 				in6_addr_t tmp;
2946 
2947 				if (ipst->ips_ipv6_ignore_home_address_opt)
2948 					goto opt_error;
2949 
2950 				if (hdr_type != IPPROTO_DSTOPTS)
2951 					goto opt_error;
2952 				optused = 2 + optptr[1];
2953 				if (optused > optlen)
2954 					goto bad_opt;
2955 
2956 				/*
2957 				 * We did this dest. opt the first time
2958 				 * around (i.e. before AH processing).
2959 				 * If we've done AH... stop now.
2960 				 */
2961 				if ((ira->ira_flags & IRAF_IPSEC_SECURE) &&
2962 				    ira->ira_ipsec_ah_sa != NULL)
2963 					break;
2964 
2965 				oh = (struct ip6_opt_home_address *)optptr;
2966 				/* Check total length and alignment */
2967 				if (optused < sizeof (*oh) ||
2968 				    ((uintptr_t)oh->ip6oh_addr & 0x7) != 0)
2969 					goto opt_error;
2970 				/* Swap ip6_src and the home address */
2971 				tmp = ip6h->ip6_src;
2972 				/* XXX Note: only 8 byte alignment option */
2973 				ip6h->ip6_src = *(in6_addr_t *)oh->ip6oh_addr;
2974 				*(in6_addr_t *)oh->ip6oh_addr = tmp;
2975 				break;
2976 			}
2977 
2978 			case IP6OPT_TUNNEL_LIMIT:
2979 				if (hdr_type != IPPROTO_DSTOPTS) {
2980 					goto opt_error;
2981 				}
2982 				optused = 2 + optptr[1];
2983 				if (optused > optlen) {
2984 					goto bad_opt;
2985 				}
2986 				if (optused != 3) {
2987 					goto opt_error;
2988 				}
2989 				break;
2990 
2991 			default:
2992 				errtype = "unknown";
2993 				/* FALLTHROUGH */
2994 			opt_error:
2995 				/* Determine which zone should send error */
2996 				switch (IP6OPT_TYPE(opt_type)) {
2997 				case IP6OPT_TYPE_SKIP:
2998 					optused = 2 + optptr[1];
2999 					if (optused > optlen)
3000 						goto bad_opt;
3001 					ip1dbg(("ip_process_options_v6: %s "
3002 					    "opt 0x%x skipped\n",
3003 					    errtype, opt_type));
3004 					break;
3005 				case IP6OPT_TYPE_DISCARD:
3006 					ip1dbg(("ip_process_options_v6: %s "
3007 					    "opt 0x%x; packet dropped\n",
3008 					    errtype, opt_type));
3009 					BUMP_MIB(ill->ill_ip_mib,
3010 					    ipIfStatsInHdrErrors);
3011 					ip_drop_input("ipIfStatsInHdrErrors",
3012 					    mp, ill);
3013 					freemsg(mp);
3014 					return (-1);
3015 				case IP6OPT_TYPE_ICMP:
3016 					BUMP_MIB(ill->ill_ip_mib,
3017 					    ipIfStatsInHdrErrors);
3018 					ip_drop_input("ipIfStatsInHdrErrors",
3019 					    mp, ill);
3020 					icmp_param_problem_v6(mp,
3021 					    ICMP6_PARAMPROB_OPTION,
3022 					    (uint32_t)(optptr -
3023 					    (uint8_t *)ip6h),
3024 					    B_FALSE, ira);
3025 					return (-1);
3026 				case IP6OPT_TYPE_FORCEICMP:
3027 					BUMP_MIB(ill->ill_ip_mib,
3028 					    ipIfStatsInHdrErrors);
3029 					ip_drop_input("ipIfStatsInHdrErrors",
3030 					    mp, ill);
3031 					icmp_param_problem_v6(mp,
3032 					    ICMP6_PARAMPROB_OPTION,
3033 					    (uint32_t)(optptr -
3034 					    (uint8_t *)ip6h),
3035 					    B_TRUE, ira);
3036 					return (-1);
3037 				default:
3038 					ASSERT(0);
3039 				}
3040 			}
3041 		}
3042 		optlen -= optused;
3043 		optptr += optused;
3044 	}
3045 	return (ret);
3046 
3047 bad_opt:
3048 	/* Determine which zone should send error */
3049 	ip_drop_input("ICMP_PARAM_PROBLEM", mp, ill);
3050 	icmp_param_problem_v6(mp, ICMP6_PARAMPROB_OPTION,
3051 	    (uint32_t)(optptr - (uint8_t *)ip6h),
3052 	    B_FALSE, ira);
3053 	return (-1);
3054 }
3055 
3056 /*
3057  * Process a routing header that is not yet empty.
3058  * Because of RFC 5095, we now reject all route headers.
3059  */
3060 void
3061 ip_process_rthdr(mblk_t *mp, ip6_t *ip6h, ip6_rthdr_t *rth,
3062     ip_recv_attr_t *ira)
3063 {
3064 	ill_t		*ill = ira->ira_ill;
3065 	ip_stack_t	*ipst = ill->ill_ipst;
3066 
3067 	ASSERT(rth->ip6r_segleft != 0);
3068 
3069 	if (!ipst->ips_ipv6_forward_src_routed) {
3070 		/* XXX Check for source routed out same interface? */
3071 		BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits);
3072 		BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors);
3073 		ip_drop_input("ipIfStatsInAddrErrors", mp, ill);
3074 		freemsg(mp);
3075 		return;
3076 	}
3077 
3078 	ip_drop_input("ICMP_PARAM_PROBLEM", mp, ill);
3079 	icmp_param_problem_v6(mp, ICMP6_PARAMPROB_HEADER,
3080 	    (uint32_t)((uchar_t *)&rth->ip6r_type - (uchar_t *)ip6h),
3081 	    B_FALSE, ira);
3082 }
3083 
3084 /*
3085  * Read side put procedure for IPv6 module.
3086  */
3087 void
3088 ip_rput_v6(queue_t *q, mblk_t *mp)
3089 {
3090 	ill_t		*ill;
3091 
3092 	ill = (ill_t *)q->q_ptr;
3093 	if (ill->ill_state_flags & (ILL_CONDEMNED | ILL_LL_SUBNET_PENDING)) {
3094 		union DL_primitives *dl;
3095 
3096 		dl = (union DL_primitives *)mp->b_rptr;
3097 		/*
3098 		 * Things are opening or closing - only accept DLPI
3099 		 * ack messages. If the stream is closing and ip_wsrv
3100 		 * has completed, ip_close is out of the qwait, but has
3101 		 * not yet completed qprocsoff. Don't proceed any further
3102 		 * because the ill has been cleaned up and things hanging
3103 		 * off the ill have been freed.
3104 		 */
3105 		if ((mp->b_datap->db_type != M_PCPROTO) ||
3106 		    (dl->dl_primitive == DL_UNITDATA_IND)) {
3107 			inet_freemsg(mp);
3108 			return;
3109 		}
3110 	}
3111 	if (DB_TYPE(mp) == M_DATA) {
3112 		struct mac_header_info_s mhi;
3113 
3114 		ip_mdata_to_mhi(ill, mp, &mhi);
3115 		ip_input_v6(ill, NULL, mp, &mhi);
3116 	} else {
3117 		ip_rput_notdata(ill, mp);
3118 	}
3119 }
3120 
3121 /*
3122  * Walk through the IPv6 packet in mp and see if there's an AH header
3123  * in it.  See if the AH header needs to get done before other headers in
3124  * the packet.  (Worker function for ipsec_early_ah_v6().)
3125  */
3126 #define	IPSEC_HDR_DONT_PROCESS	0
3127 #define	IPSEC_HDR_PROCESS	1
3128 #define	IPSEC_MEMORY_ERROR	2 /* or malformed packet */
3129 static int
3130 ipsec_needs_processing_v6(mblk_t *mp, uint8_t *nexthdr)
3131 {
3132 	uint_t	length;
3133 	uint_t	ehdrlen;
3134 	uint8_t *whereptr;
3135 	uint8_t *endptr;
3136 	uint8_t *nexthdrp;
3137 	ip6_dest_t *desthdr;
3138 	ip6_rthdr_t *rthdr;
3139 	ip6_t	*ip6h;
3140 
3141 	/*
3142 	 * For now just pullup everything.  In general, the less pullups,
3143 	 * the better, but there's so much squirrelling through anyway,
3144 	 * it's just easier this way.
3145 	 */
3146 	if (!pullupmsg(mp, -1)) {
3147 		return (IPSEC_MEMORY_ERROR);
3148 	}
3149 
3150 	ip6h = (ip6_t *)mp->b_rptr;
3151 	length = IPV6_HDR_LEN;
3152 	whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */
3153 	endptr = mp->b_wptr;
3154 
3155 	/*
3156 	 * We can't just use the argument nexthdr in the place
3157 	 * of nexthdrp becaue we don't dereference nexthdrp
3158 	 * till we confirm whether it is a valid address.
3159 	 */
3160 	nexthdrp = &ip6h->ip6_nxt;
3161 	while (whereptr < endptr) {
3162 		/* Is there enough left for len + nexthdr? */
3163 		if (whereptr + MIN_EHDR_LEN > endptr)
3164 			return (IPSEC_MEMORY_ERROR);
3165 
3166 		switch (*nexthdrp) {
3167 		case IPPROTO_HOPOPTS:
3168 		case IPPROTO_DSTOPTS:
3169 			/* Assumes the headers are identical for hbh and dst */
3170 			desthdr = (ip6_dest_t *)whereptr;
3171 			ehdrlen = 8 * (desthdr->ip6d_len + 1);
3172 			if ((uchar_t *)desthdr +  ehdrlen > endptr)
3173 				return (IPSEC_MEMORY_ERROR);
3174 			/*
3175 			 * Return DONT_PROCESS because the destination
3176 			 * options header may be for each hop in a
3177 			 * routing-header, and we only want AH if we're
3178 			 * finished with routing headers.
3179 			 */
3180 			if (*nexthdrp == IPPROTO_DSTOPTS)
3181 				return (IPSEC_HDR_DONT_PROCESS);
3182 			nexthdrp = &desthdr->ip6d_nxt;
3183 			break;
3184 		case IPPROTO_ROUTING:
3185 			rthdr = (ip6_rthdr_t *)whereptr;
3186 
3187 			/*
3188 			 * If there's more hops left on the routing header,
3189 			 * return now with DON'T PROCESS.
3190 			 */
3191 			if (rthdr->ip6r_segleft > 0)
3192 				return (IPSEC_HDR_DONT_PROCESS);
3193 
3194 			ehdrlen =  8 * (rthdr->ip6r_len + 1);
3195 			if ((uchar_t *)rthdr +  ehdrlen > endptr)
3196 				return (IPSEC_MEMORY_ERROR);
3197 			nexthdrp = &rthdr->ip6r_nxt;
3198 			break;
3199 		case IPPROTO_FRAGMENT:
3200 			/* Wait for reassembly */
3201 			return (IPSEC_HDR_DONT_PROCESS);
3202 		case IPPROTO_AH:
3203 			*nexthdr = IPPROTO_AH;
3204 			return (IPSEC_HDR_PROCESS);
3205 		case IPPROTO_NONE:
3206 			/* No next header means we're finished */
3207 		default:
3208 			return (IPSEC_HDR_DONT_PROCESS);
3209 		}
3210 		length += ehdrlen;
3211 		whereptr += ehdrlen;
3212 	}
3213 	/*
3214 	 * Malformed/truncated packet.
3215 	 */
3216 	return (IPSEC_MEMORY_ERROR);
3217 }
3218 
3219 /*
3220  * Path for AH if options are present.
3221  * Returns NULL if the mblk was consumed.
3222  *
3223  * Sometimes AH needs to be done before other IPv6 headers for security
3224  * reasons.  This function (and its ipsec_needs_processing_v6() above)
3225  * indicates if that is so, and fans out to the appropriate IPsec protocol
3226  * for the datagram passed in.
3227  */
3228 mblk_t *
3229 ipsec_early_ah_v6(mblk_t *mp, ip_recv_attr_t *ira)
3230 {
3231 	uint8_t nexthdr;
3232 	ah_t *ah;
3233 	ill_t		*ill = ira->ira_ill;
3234 	ip_stack_t	*ipst = ill->ill_ipst;
3235 	ipsec_stack_t	*ipss = ipst->ips_netstack->netstack_ipsec;
3236 
3237 	switch (ipsec_needs_processing_v6(mp, &nexthdr)) {
3238 	case IPSEC_MEMORY_ERROR:
3239 		BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
3240 		ip_drop_input("ipIfStatsInDiscards", mp, ill);
3241 		freemsg(mp);
3242 		return (NULL);
3243 	case IPSEC_HDR_DONT_PROCESS:
3244 		return (mp);
3245 	}
3246 
3247 	/* Default means send it to AH! */
3248 	ASSERT(nexthdr == IPPROTO_AH);
3249 
3250 	if (!ipsec_loaded(ipss)) {
3251 		ip_proto_not_sup(mp, ira);
3252 		return (NULL);
3253 	}
3254 
3255 	mp = ipsec_inbound_ah_sa(mp, ira, &ah);
3256 	if (mp == NULL)
3257 		return (NULL);
3258 	ASSERT(ah != NULL);
3259 	ASSERT(ira->ira_flags & IRAF_IPSEC_SECURE);
3260 	ASSERT(ira->ira_ipsec_ah_sa != NULL);
3261 	ASSERT(ira->ira_ipsec_ah_sa->ipsa_input_func != NULL);
3262 	mp = ira->ira_ipsec_ah_sa->ipsa_input_func(mp, ah, ira);
3263 
3264 	if (mp == NULL) {
3265 		/*
3266 		 * Either it failed or is pending. In the former case
3267 		 * ipIfStatsInDiscards was increased.
3268 		 */
3269 		return (NULL);
3270 	}
3271 
3272 	/* we're done with IPsec processing, send it up */
3273 	ip_input_post_ipsec(mp, ira);
3274 	return (NULL);
3275 }
3276 
3277 /*
3278  * Reassemble fragment.
3279  * When it returns a completed message the first mblk will only contain
3280  * the headers prior to the fragment header, with the nexthdr value updated
3281  * to be the header after the fragment header.
3282  */
3283 mblk_t *
3284 ip_input_fragment_v6(mblk_t *mp, ip6_t *ip6h,
3285     ip6_frag_t *fraghdr, uint_t remlen, ip_recv_attr_t *ira)
3286 {
3287 	uint32_t	ident = ntohl(fraghdr->ip6f_ident);
3288 	uint16_t	offset;
3289 	boolean_t	more_frags;
3290 	uint8_t		nexthdr = fraghdr->ip6f_nxt;
3291 	in6_addr_t	*v6dst_ptr;
3292 	in6_addr_t	*v6src_ptr;
3293 	uint_t		end;
3294 	uint_t		hdr_length;
3295 	size_t		count;
3296 	ipf_t		*ipf;
3297 	ipf_t		**ipfp;
3298 	ipfb_t		*ipfb;
3299 	mblk_t		*mp1;
3300 	uint8_t		ecn_info = 0;
3301 	size_t		msg_len;
3302 	mblk_t		*tail_mp;
3303 	mblk_t		*t_mp;
3304 	boolean_t	pruned = B_FALSE;
3305 	uint32_t	sum_val;
3306 	uint16_t	sum_flags;
3307 	ill_t		*ill = ira->ira_ill;
3308 	ip_stack_t	*ipst = ill->ill_ipst;
3309 	uint_t		prev_nexthdr_offset;
3310 	uint8_t		prev_nexthdr;
3311 	uint8_t		*ptr;
3312 	uint32_t	packet_size;
3313 
3314 	/*
3315 	 * We utilize hardware computed checksum info only for UDP since
3316 	 * IP fragmentation is a normal occurence for the protocol.  In
3317 	 * addition, checksum offload support for IP fragments carrying
3318 	 * UDP payload is commonly implemented across network adapters.
3319 	 */
3320 	ASSERT(ira->ira_rill != NULL);
3321 	if (nexthdr == IPPROTO_UDP && dohwcksum &&
3322 	    ILL_HCKSUM_CAPABLE(ira->ira_rill) &&
3323 	    (DB_CKSUMFLAGS(mp) & (HCK_FULLCKSUM | HCK_PARTIALCKSUM))) {
3324 		mblk_t *mp1 = mp->b_cont;
3325 		int32_t len;
3326 
3327 		/* Record checksum information from the packet */
3328 		sum_val = (uint32_t)DB_CKSUM16(mp);
3329 		sum_flags = DB_CKSUMFLAGS(mp);
3330 
3331 		/* fragmented payload offset from beginning of mblk */
3332 		offset = (uint16_t)((uchar_t *)&fraghdr[1] - mp->b_rptr);
3333 
3334 		if ((sum_flags & HCK_PARTIALCKSUM) &&
3335 		    (mp1 == NULL || mp1->b_cont == NULL) &&
3336 		    offset >= DB_CKSUMSTART(mp) &&
3337 		    ((len = offset - DB_CKSUMSTART(mp)) & 1) == 0) {
3338 			uint32_t adj;
3339 			/*
3340 			 * Partial checksum has been calculated by hardware
3341 			 * and attached to the packet; in addition, any
3342 			 * prepended extraneous data is even byte aligned.
3343 			 * If any such data exists, we adjust the checksum;
3344 			 * this would also handle any postpended data.
3345 			 */
3346 			IP_ADJCKSUM_PARTIAL(mp->b_rptr + DB_CKSUMSTART(mp),
3347 			    mp, mp1, len, adj);
3348 
3349 			/* One's complement subtract extraneous checksum */
3350 			if (adj >= sum_val)
3351 				sum_val = ~(adj - sum_val) & 0xFFFF;
3352 			else
3353 				sum_val -= adj;
3354 		}
3355 	} else {
3356 		sum_val = 0;
3357 		sum_flags = 0;
3358 	}
3359 
3360 	/* Clear hardware checksumming flag */
3361 	DB_CKSUMFLAGS(mp) = 0;
3362 
3363 	/*
3364 	 * Determine the offset (from the begining of the IP header)
3365 	 * of the nexthdr value which has IPPROTO_FRAGMENT. We use
3366 	 * this when removing the fragment header from the packet.
3367 	 * This packet consists of the IPv6 header, a potential
3368 	 * hop-by-hop options header, a potential pre-routing-header
3369 	 * destination options header, and a potential routing header.
3370 	 */
3371 	prev_nexthdr_offset = (uint8_t *)&ip6h->ip6_nxt - (uint8_t *)ip6h;
3372 	prev_nexthdr = ip6h->ip6_nxt;
3373 	ptr = (uint8_t *)&ip6h[1];
3374 
3375 	if (prev_nexthdr == IPPROTO_HOPOPTS) {
3376 		ip6_hbh_t	*hbh_hdr;
3377 		uint_t		hdr_len;
3378 
3379 		hbh_hdr = (ip6_hbh_t *)ptr;
3380 		hdr_len = 8 * (hbh_hdr->ip6h_len + 1);
3381 		prev_nexthdr = hbh_hdr->ip6h_nxt;
3382 		prev_nexthdr_offset = (uint8_t *)&hbh_hdr->ip6h_nxt
3383 		    - (uint8_t *)ip6h;
3384 		ptr += hdr_len;
3385 	}
3386 	if (prev_nexthdr == IPPROTO_DSTOPTS) {
3387 		ip6_dest_t	*dest_hdr;
3388 		uint_t		hdr_len;
3389 
3390 		dest_hdr = (ip6_dest_t *)ptr;
3391 		hdr_len = 8 * (dest_hdr->ip6d_len + 1);
3392 		prev_nexthdr = dest_hdr->ip6d_nxt;
3393 		prev_nexthdr_offset = (uint8_t *)&dest_hdr->ip6d_nxt
3394 		    - (uint8_t *)ip6h;
3395 		ptr += hdr_len;
3396 	}
3397 	if (prev_nexthdr == IPPROTO_ROUTING) {
3398 		ip6_rthdr_t	*rthdr;
3399 		uint_t		hdr_len;
3400 
3401 		rthdr = (ip6_rthdr_t *)ptr;
3402 		prev_nexthdr = rthdr->ip6r_nxt;
3403 		prev_nexthdr_offset = (uint8_t *)&rthdr->ip6r_nxt
3404 		    - (uint8_t *)ip6h;
3405 		hdr_len = 8 * (rthdr->ip6r_len + 1);
3406 		ptr += hdr_len;
3407 	}
3408 	if (prev_nexthdr != IPPROTO_FRAGMENT) {
3409 		/* Can't handle other headers before the fragment header */
3410 		BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors);
3411 		ip_drop_input("ipIfStatsInHdrErrors", mp, ill);
3412 		freemsg(mp);
3413 		return (NULL);
3414 	}
3415 
3416 	/*
3417 	 * Note: Fragment offset in header is in 8-octet units.
3418 	 * Clearing least significant 3 bits not only extracts
3419 	 * it but also gets it in units of octets.
3420 	 */
3421 	offset = ntohs(fraghdr->ip6f_offlg) & ~7;
3422 	more_frags = (fraghdr->ip6f_offlg & IP6F_MORE_FRAG);
3423 
3424 	/*
3425 	 * Is the more frags flag on and the payload length not a multiple
3426 	 * of eight?
3427 	 */
3428 	if (more_frags && (ntohs(ip6h->ip6_plen) & 7)) {
3429 		ip_drop_input("ICMP_PARAM_PROBLEM", mp, ill);
3430 		icmp_param_problem_v6(mp, ICMP6_PARAMPROB_HEADER,
3431 		    (uint32_t)((char *)&ip6h->ip6_plen -
3432 		    (char *)ip6h), B_FALSE, ira);
3433 		return (NULL);
3434 	}
3435 
3436 	v6src_ptr = &ip6h->ip6_src;
3437 	v6dst_ptr = &ip6h->ip6_dst;
3438 	end = remlen;
3439 
3440 	hdr_length = (uint_t)((char *)&fraghdr[1] - (char *)ip6h);
3441 	end += offset;
3442 
3443 	/*
3444 	 * Would fragment cause reassembled packet to have a payload length
3445 	 * greater than IP_MAXPACKET - the max payload size?
3446 	 */
3447 	if (end > IP_MAXPACKET) {
3448 		BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors);
3449 		ip_drop_input("Reassembled packet too large", mp, ill);
3450 		icmp_param_problem_v6(mp, ICMP6_PARAMPROB_HEADER,
3451 		    (uint32_t)((char *)&fraghdr->ip6f_offlg -
3452 		    (char *)ip6h), B_FALSE, ira);
3453 		return (NULL);
3454 	}
3455 
3456 	/*
3457 	 * This packet just has one fragment. Reassembly not
3458 	 * needed.
3459 	 */
3460 	if (!more_frags && offset == 0) {
3461 		goto reass_done;
3462 	}
3463 
3464 	/*
3465 	 * Drop the fragmented as early as possible, if
3466 	 * we don't have resource(s) to re-assemble.
3467 	 */
3468 	if (ipst->ips_ip_reass_queue_bytes == 0) {
3469 		freemsg(mp);
3470 		return (NULL);
3471 	}
3472 
3473 	/* Record the ECN field info. */
3474 	ecn_info = (uint8_t)(ntohl(ip6h->ip6_vcf & htonl(~0xFFCFFFFF)) >> 20);
3475 	/*
3476 	 * If this is not the first fragment, dump the unfragmentable
3477 	 * portion of the packet.
3478 	 */
3479 	if (offset)
3480 		mp->b_rptr = (uchar_t *)&fraghdr[1];
3481 
3482 	/*
3483 	 * Fragmentation reassembly.  Each ILL has a hash table for
3484 	 * queueing packets undergoing reassembly for all IPIFs
3485 	 * associated with the ILL.  The hash is based on the packet
3486 	 * IP ident field.  The ILL frag hash table was allocated
3487 	 * as a timer block at the time the ILL was created.  Whenever
3488 	 * there is anything on the reassembly queue, the timer will
3489 	 * be running.
3490 	 */
3491 	/* Handle vnic loopback of fragments */
3492 	if (mp->b_datap->db_ref > 2)
3493 		msg_len = 0;
3494 	else
3495 		msg_len = MBLKSIZE(mp);
3496 
3497 	tail_mp = mp;
3498 	while (tail_mp->b_cont != NULL) {
3499 		tail_mp = tail_mp->b_cont;
3500 		if (tail_mp->b_datap->db_ref <= 2)
3501 			msg_len += MBLKSIZE(tail_mp);
3502 	}
3503 	/*
3504 	 * If the reassembly list for this ILL will get too big
3505 	 * prune it.
3506 	 */
3507 
3508 	if ((msg_len + sizeof (*ipf) + ill->ill_frag_count) >=
3509 	    ipst->ips_ip_reass_queue_bytes) {
3510 		DTRACE_PROBE3(ip_reass_queue_bytes, uint_t, msg_len,
3511 		    uint_t, ill->ill_frag_count,
3512 		    uint_t, ipst->ips_ip_reass_queue_bytes);
3513 		ill_frag_prune(ill,
3514 		    (ipst->ips_ip_reass_queue_bytes < msg_len) ? 0 :
3515 		    (ipst->ips_ip_reass_queue_bytes - msg_len));
3516 		pruned = B_TRUE;
3517 	}
3518 
3519 	ipfb = &ill->ill_frag_hash_tbl[ILL_FRAG_HASH_V6(*v6src_ptr, ident)];
3520 	mutex_enter(&ipfb->ipfb_lock);
3521 
3522 	ipfp = &ipfb->ipfb_ipf;
3523 	/* Try to find an existing fragment queue for this packet. */
3524 	for (;;) {
3525 		ipf = ipfp[0];
3526 		if (ipf) {
3527 			/*
3528 			 * It has to match on ident, source address, and
3529 			 * dest address.
3530 			 */
3531 			if (ipf->ipf_ident == ident &&
3532 			    IN6_ARE_ADDR_EQUAL(&ipf->ipf_v6src, v6src_ptr) &&
3533 			    IN6_ARE_ADDR_EQUAL(&ipf->ipf_v6dst, v6dst_ptr)) {
3534 
3535 				/*
3536 				 * If we have received too many
3537 				 * duplicate fragments for this packet
3538 				 * free it.
3539 				 */
3540 				if (ipf->ipf_num_dups > ip_max_frag_dups) {
3541 					ill_frag_free_pkts(ill, ipfb, ipf, 1);
3542 					freemsg(mp);
3543 					mutex_exit(&ipfb->ipfb_lock);
3544 					return (NULL);
3545 				}
3546 
3547 				break;
3548 			}
3549 			ipfp = &ipf->ipf_hash_next;
3550 			continue;
3551 		}
3552 
3553 
3554 		/*
3555 		 * If we pruned the list, do we want to store this new
3556 		 * fragment?. We apply an optimization here based on the
3557 		 * fact that most fragments will be received in order.
3558 		 * So if the offset of this incoming fragment is zero,
3559 		 * it is the first fragment of a new packet. We will
3560 		 * keep it.  Otherwise drop the fragment, as we have
3561 		 * probably pruned the packet already (since the
3562 		 * packet cannot be found).
3563 		 */
3564 
3565 		if (pruned && offset != 0) {
3566 			mutex_exit(&ipfb->ipfb_lock);
3567 			freemsg(mp);
3568 			return (NULL);
3569 		}
3570 
3571 		/* New guy.  Allocate a frag message. */
3572 		mp1 = allocb(sizeof (*ipf), BPRI_MED);
3573 		if (!mp1) {
3574 			BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
3575 			ip_drop_input("ipIfStatsInDiscards", mp, ill);
3576 			freemsg(mp);
3577 	partial_reass_done:
3578 			mutex_exit(&ipfb->ipfb_lock);
3579 			return (NULL);
3580 		}
3581 
3582 		if (ipfb->ipfb_frag_pkts >= MAX_FRAG_PKTS(ipst))  {
3583 			/*
3584 			 * Too many fragmented packets in this hash bucket.
3585 			 * Free the oldest.
3586 			 */
3587 			ill_frag_free_pkts(ill, ipfb, ipfb->ipfb_ipf, 1);
3588 		}
3589 
3590 		mp1->b_cont = mp;
3591 
3592 		/* Initialize the fragment header. */
3593 		ipf = (ipf_t *)mp1->b_rptr;
3594 		ipf->ipf_mp = mp1;
3595 		ipf->ipf_ptphn = ipfp;
3596 		ipfp[0] = ipf;
3597 		ipf->ipf_hash_next = NULL;
3598 		ipf->ipf_ident = ident;
3599 		ipf->ipf_v6src = *v6src_ptr;
3600 		ipf->ipf_v6dst = *v6dst_ptr;
3601 		/* Record reassembly start time. */
3602 		ipf->ipf_timestamp = gethrestime_sec();
3603 		/* Record ipf generation and account for frag header */
3604 		ipf->ipf_gen = ill->ill_ipf_gen++;
3605 		ipf->ipf_count = MBLKSIZE(mp1);
3606 		ipf->ipf_protocol = nexthdr;
3607 		ipf->ipf_nf_hdr_len = 0;
3608 		ipf->ipf_prev_nexthdr_offset = 0;
3609 		ipf->ipf_last_frag_seen = B_FALSE;
3610 		ipf->ipf_ecn = ecn_info;
3611 		ipf->ipf_num_dups = 0;
3612 		ipfb->ipfb_frag_pkts++;
3613 		ipf->ipf_checksum = 0;
3614 		ipf->ipf_checksum_flags = 0;
3615 
3616 		/* Store checksum value in fragment header */
3617 		if (sum_flags != 0) {
3618 			sum_val = (sum_val & 0xFFFF) + (sum_val >> 16);
3619 			sum_val = (sum_val & 0xFFFF) + (sum_val >> 16);
3620 			ipf->ipf_checksum = sum_val;
3621 			ipf->ipf_checksum_flags = sum_flags;
3622 		}
3623 
3624 		/*
3625 		 * We handle reassembly two ways.  In the easy case,
3626 		 * where all the fragments show up in order, we do
3627 		 * minimal bookkeeping, and just clip new pieces on
3628 		 * the end.  If we ever see a hole, then we go off
3629 		 * to ip_reassemble which has to mark the pieces and
3630 		 * keep track of the number of holes, etc.  Obviously,
3631 		 * the point of having both mechanisms is so we can
3632 		 * handle the easy case as efficiently as possible.
3633 		 */
3634 		if (offset == 0) {
3635 			/* Easy case, in-order reassembly so far. */
3636 			/* Update the byte count */
3637 			ipf->ipf_count += msg_len;
3638 			ipf->ipf_tail_mp = tail_mp;
3639 			/*
3640 			 * Keep track of next expected offset in
3641 			 * ipf_end.
3642 			 */
3643 			ipf->ipf_end = end;
3644 			ipf->ipf_nf_hdr_len = hdr_length;
3645 			ipf->ipf_prev_nexthdr_offset = prev_nexthdr_offset;
3646 		} else {
3647 			/* Hard case, hole at the beginning. */
3648 			ipf->ipf_tail_mp = NULL;
3649 			/*
3650 			 * ipf_end == 0 means that we have given up
3651 			 * on easy reassembly.
3652 			 */
3653 			ipf->ipf_end = 0;
3654 
3655 			/* Forget checksum offload from now on */
3656 			ipf->ipf_checksum_flags = 0;
3657 
3658 			/*
3659 			 * ipf_hole_cnt is set by ip_reassemble.
3660 			 * ipf_count is updated by ip_reassemble.
3661 			 * No need to check for return value here
3662 			 * as we don't expect reassembly to complete or
3663 			 * fail for the first fragment itself.
3664 			 */
3665 			(void) ip_reassemble(mp, ipf, offset, more_frags, ill,
3666 			    msg_len);
3667 		}
3668 		/* Update per ipfb and ill byte counts */
3669 		ipfb->ipfb_count += ipf->ipf_count;
3670 		ASSERT(ipfb->ipfb_count > 0);	/* Wraparound */
3671 		atomic_add_32(&ill->ill_frag_count, ipf->ipf_count);
3672 		/* If the frag timer wasn't already going, start it. */
3673 		mutex_enter(&ill->ill_lock);
3674 		ill_frag_timer_start(ill);
3675 		mutex_exit(&ill->ill_lock);
3676 		goto partial_reass_done;
3677 	}
3678 
3679 	/*
3680 	 * If the packet's flag has changed (it could be coming up
3681 	 * from an interface different than the previous, therefore
3682 	 * possibly different checksum capability), then forget about
3683 	 * any stored checksum states.  Otherwise add the value to
3684 	 * the existing one stored in the fragment header.
3685 	 */
3686 	if (sum_flags != 0 && sum_flags == ipf->ipf_checksum_flags) {
3687 		sum_val += ipf->ipf_checksum;
3688 		sum_val = (sum_val & 0xFFFF) + (sum_val >> 16);
3689 		sum_val = (sum_val & 0xFFFF) + (sum_val >> 16);
3690 		ipf->ipf_checksum = sum_val;
3691 	} else if (ipf->ipf_checksum_flags != 0) {
3692 		/* Forget checksum offload from now on */
3693 		ipf->ipf_checksum_flags = 0;
3694 	}
3695 
3696 	/*
3697 	 * We have a new piece of a datagram which is already being
3698 	 * reassembled.  Update the ECN info if all IP fragments
3699 	 * are ECN capable.  If there is one which is not, clear
3700 	 * all the info.  If there is at least one which has CE
3701 	 * code point, IP needs to report that up to transport.
3702 	 */
3703 	if (ecn_info != IPH_ECN_NECT && ipf->ipf_ecn != IPH_ECN_NECT) {
3704 		if (ecn_info == IPH_ECN_CE)
3705 			ipf->ipf_ecn = IPH_ECN_CE;
3706 	} else {
3707 		ipf->ipf_ecn = IPH_ECN_NECT;
3708 	}
3709 
3710 	if (offset && ipf->ipf_end == offset) {
3711 		/* The new fragment fits at the end */
3712 		ipf->ipf_tail_mp->b_cont = mp;
3713 		/* Update the byte count */
3714 		ipf->ipf_count += msg_len;
3715 		/* Update per ipfb and ill byte counts */
3716 		ipfb->ipfb_count += msg_len;
3717 		ASSERT(ipfb->ipfb_count > 0);	/* Wraparound */
3718 		atomic_add_32(&ill->ill_frag_count, msg_len);
3719 		if (more_frags) {
3720 			/* More to come. */
3721 			ipf->ipf_end = end;
3722 			ipf->ipf_tail_mp = tail_mp;
3723 			goto partial_reass_done;
3724 		}
3725 	} else {
3726 		/*
3727 		 * Go do the hard cases.
3728 		 * Call ip_reassemble().
3729 		 */
3730 		int ret;
3731 
3732 		if (offset == 0) {
3733 			if (ipf->ipf_prev_nexthdr_offset == 0) {
3734 				ipf->ipf_nf_hdr_len = hdr_length;
3735 				ipf->ipf_prev_nexthdr_offset =
3736 				    prev_nexthdr_offset;
3737 			}
3738 		}
3739 		/* Save current byte count */
3740 		count = ipf->ipf_count;
3741 		ret = ip_reassemble(mp, ipf, offset, more_frags, ill, msg_len);
3742 
3743 		/* Count of bytes added and subtracted (freeb()ed) */
3744 		count = ipf->ipf_count - count;
3745 		if (count) {
3746 			/* Update per ipfb and ill byte counts */
3747 			ipfb->ipfb_count += count;
3748 			ASSERT(ipfb->ipfb_count > 0);	/* Wraparound */
3749 			atomic_add_32(&ill->ill_frag_count, count);
3750 		}
3751 		if (ret == IP_REASS_PARTIAL) {
3752 			goto partial_reass_done;
3753 		} else if (ret == IP_REASS_FAILED) {
3754 			/* Reassembly failed. Free up all resources */
3755 			ill_frag_free_pkts(ill, ipfb, ipf, 1);
3756 			for (t_mp = mp; t_mp != NULL; t_mp = t_mp->b_cont) {
3757 				IP_REASS_SET_START(t_mp, 0);
3758 				IP_REASS_SET_END(t_mp, 0);
3759 			}
3760 			freemsg(mp);
3761 			goto partial_reass_done;
3762 		}
3763 
3764 		/* We will reach here iff 'ret' is IP_REASS_COMPLETE */
3765 	}
3766 	/*
3767 	 * We have completed reassembly.  Unhook the frag header from
3768 	 * the reassembly list.
3769 	 *
3770 	 * Grab the unfragmentable header length next header value out
3771 	 * of the first fragment
3772 	 */
3773 	ASSERT(ipf->ipf_nf_hdr_len != 0);
3774 	hdr_length = ipf->ipf_nf_hdr_len;
3775 
3776 	/*
3777 	 * Before we free the frag header, record the ECN info
3778 	 * to report back to the transport.
3779 	 */
3780 	ecn_info = ipf->ipf_ecn;
3781 
3782 	/*
3783 	 * Store the nextheader field in the header preceding the fragment
3784 	 * header
3785 	 */
3786 	nexthdr = ipf->ipf_protocol;
3787 	prev_nexthdr_offset = ipf->ipf_prev_nexthdr_offset;
3788 	ipfp = ipf->ipf_ptphn;
3789 
3790 	/* We need to supply these to caller */
3791 	if ((sum_flags = ipf->ipf_checksum_flags) != 0)
3792 		sum_val = ipf->ipf_checksum;
3793 	else
3794 		sum_val = 0;
3795 
3796 	mp1 = ipf->ipf_mp;
3797 	count = ipf->ipf_count;
3798 	ipf = ipf->ipf_hash_next;
3799 	if (ipf)
3800 		ipf->ipf_ptphn = ipfp;
3801 	ipfp[0] = ipf;
3802 	atomic_add_32(&ill->ill_frag_count, -count);
3803 	ASSERT(ipfb->ipfb_count >= count);
3804 	ipfb->ipfb_count -= count;
3805 	ipfb->ipfb_frag_pkts--;
3806 	mutex_exit(&ipfb->ipfb_lock);
3807 	/* Ditch the frag header. */
3808 	mp = mp1->b_cont;
3809 	freeb(mp1);
3810 
3811 	/*
3812 	 * Make sure the packet is good by doing some sanity
3813 	 * check. If bad we can silentely drop the packet.
3814 	 */
3815 reass_done:
3816 	if (hdr_length < sizeof (ip6_frag_t)) {
3817 		BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors);
3818 		ip_drop_input("ipIfStatsInHdrErrors", mp, ill);
3819 		ip1dbg(("ip_input_fragment_v6: bad packet\n"));
3820 		freemsg(mp);
3821 		return (NULL);
3822 	}
3823 
3824 	/*
3825 	 * Remove the fragment header from the initial header by
3826 	 * splitting the mblk into the non-fragmentable header and
3827 	 * everthing after the fragment extension header.  This has the
3828 	 * side effect of putting all the headers that need destination
3829 	 * processing into the b_cont block-- on return this fact is
3830 	 * used in order to avoid having to look at the extensions
3831 	 * already processed.
3832 	 *
3833 	 * Note that this code assumes that the unfragmentable portion
3834 	 * of the header is in the first mblk and increments
3835 	 * the read pointer past it.  If this assumption is broken
3836 	 * this code fails badly.
3837 	 */
3838 	if (mp->b_rptr + hdr_length != mp->b_wptr) {
3839 		mblk_t *nmp;
3840 
3841 		if (!(nmp = dupb(mp))) {
3842 			ip1dbg(("ip_input_fragment_v6: dupb failed\n"));
3843 			BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
3844 			ip_drop_input("ipIfStatsInDiscards", mp, ill);
3845 			freemsg(mp);
3846 			return (NULL);
3847 		}
3848 		nmp->b_cont = mp->b_cont;
3849 		mp->b_cont = nmp;
3850 		nmp->b_rptr += hdr_length;
3851 	}
3852 	mp->b_wptr = mp->b_rptr + hdr_length - sizeof (ip6_frag_t);
3853 
3854 	ip6h = (ip6_t *)mp->b_rptr;
3855 	((char *)ip6h)[prev_nexthdr_offset] = nexthdr;
3856 
3857 	/* Restore original IP length in header. */
3858 	packet_size = msgdsize(mp);
3859 	ip6h->ip6_plen = htons((uint16_t)(packet_size - IPV6_HDR_LEN));
3860 	/* Record the ECN info. */
3861 	ip6h->ip6_vcf &= htonl(0xFFCFFFFF);
3862 	ip6h->ip6_vcf |= htonl(ecn_info << 20);
3863 
3864 	/* Update the receive attributes */
3865 	ira->ira_pktlen = packet_size;
3866 	ira->ira_ip_hdr_length = hdr_length - sizeof (ip6_frag_t);
3867 	ira->ira_protocol = nexthdr;
3868 
3869 	/* Reassembly is successful; set checksum information in packet */
3870 	DB_CKSUM16(mp) = (uint16_t)sum_val;
3871 	DB_CKSUMFLAGS(mp) = sum_flags;
3872 	DB_CKSUMSTART(mp) = ira->ira_ip_hdr_length;
3873 
3874 	return (mp);
3875 }
3876 
3877 /*
3878  * Given an mblk and a ptr, find the destination address in an IPv6 routing
3879  * header.
3880  */
3881 static in6_addr_t
3882 pluck_out_dst(const mblk_t *mp, uint8_t *whereptr, in6_addr_t oldrv)
3883 {
3884 	ip6_rthdr0_t *rt0;
3885 	int segleft, numaddr;
3886 	in6_addr_t *ap, rv = oldrv;
3887 
3888 	rt0 = (ip6_rthdr0_t *)whereptr;
3889 	if (rt0->ip6r0_type != 0 && rt0->ip6r0_type != 2) {
3890 		DTRACE_PROBE2(pluck_out_dst_unknown_type, mblk_t *, mp,
3891 		    uint8_t *, whereptr);
3892 		return (rv);
3893 	}
3894 	segleft = rt0->ip6r0_segleft;
3895 	numaddr = rt0->ip6r0_len / 2;
3896 
3897 	if ((rt0->ip6r0_len & 0x1) ||
3898 	    (mp != NULL && whereptr + (rt0->ip6r0_len + 1) * 8 > mp->b_wptr) ||
3899 	    (segleft > rt0->ip6r0_len / 2)) {
3900 		/*
3901 		 * Corrupt packet.  Either the routing header length is odd
3902 		 * (can't happen) or mismatched compared to the packet, or the
3903 		 * number of addresses is.  Return what we can.  This will
3904 		 * only be a problem on forwarded packets that get squeezed
3905 		 * through an outbound tunnel enforcing IPsec Tunnel Mode.
3906 		 */
3907 		DTRACE_PROBE2(pluck_out_dst_badpkt, mblk_t *, mp, uint8_t *,
3908 		    whereptr);
3909 		return (rv);
3910 	}
3911 
3912 	if (segleft != 0) {
3913 		ap = (in6_addr_t *)((char *)rt0 + sizeof (*rt0));
3914 		rv = ap[numaddr - 1];
3915 	}
3916 
3917 	return (rv);
3918 }
3919 
3920 /*
3921  * Walk through the options to see if there is a routing header.
3922  * If present get the destination which is the last address of
3923  * the option.
3924  * mp needs to be provided in cases when the extension headers might span
3925  * b_cont; mp is never modified by this function.
3926  */
3927 in6_addr_t
3928 ip_get_dst_v6(ip6_t *ip6h, const mblk_t *mp, boolean_t *is_fragment)
3929 {
3930 	const mblk_t *current_mp = mp;
3931 	uint8_t nexthdr;
3932 	uint8_t *whereptr;
3933 	int ehdrlen;
3934 	in6_addr_t rv;
3935 
3936 	whereptr = (uint8_t *)ip6h;
3937 	ehdrlen = sizeof (ip6_t);
3938 
3939 	/* We assume at least the IPv6 base header is within one mblk. */
3940 	ASSERT(mp == NULL ||
3941 	    (mp->b_rptr <= whereptr && mp->b_wptr >= whereptr + ehdrlen));
3942 
3943 	rv = ip6h->ip6_dst;
3944 	nexthdr = ip6h->ip6_nxt;
3945 	if (is_fragment != NULL)
3946 		*is_fragment = B_FALSE;
3947 
3948 	/*
3949 	 * We also assume (thanks to ipsec_tun_outbound()'s pullup) that
3950 	 * no extension headers will be split across mblks.
3951 	 */
3952 
3953 	while (nexthdr == IPPROTO_HOPOPTS || nexthdr == IPPROTO_DSTOPTS ||
3954 	    nexthdr == IPPROTO_ROUTING) {
3955 		if (nexthdr == IPPROTO_ROUTING)
3956 			rv = pluck_out_dst(current_mp, whereptr, rv);
3957 
3958 		/*
3959 		 * All IPv6 extension headers have the next-header in byte
3960 		 * 0, and the (length - 8) in 8-byte-words.
3961 		 */
3962 		while (current_mp != NULL &&
3963 		    whereptr + ehdrlen >= current_mp->b_wptr) {
3964 			ehdrlen -= (current_mp->b_wptr - whereptr);
3965 			current_mp = current_mp->b_cont;
3966 			if (current_mp == NULL) {
3967 				/* Bad packet.  Return what we can. */
3968 				DTRACE_PROBE3(ip_get_dst_v6_badpkt, mblk_t *,
3969 				    mp, mblk_t *, current_mp, ip6_t *, ip6h);
3970 				goto done;
3971 			}
3972 			whereptr = current_mp->b_rptr;
3973 		}
3974 		whereptr += ehdrlen;
3975 
3976 		nexthdr = *whereptr;
3977 		ASSERT(current_mp == NULL || whereptr + 1 < current_mp->b_wptr);
3978 		ehdrlen = (*(whereptr + 1) + 1) * 8;
3979 	}
3980 
3981 done:
3982 	if (nexthdr == IPPROTO_FRAGMENT && is_fragment != NULL)
3983 		*is_fragment = B_TRUE;
3984 	return (rv);
3985 }
3986 
3987 /*
3988  * ip_source_routed_v6:
3989  * This function is called by redirect code (called from ip_input_v6) to
3990  * know whether this packet is source routed through this node i.e
3991  * whether this node (router) is part of the journey. This
3992  * function is called under two cases :
3993  *
3994  * case 1 : Routing header was processed by this node and
3995  *	    ip_process_rthdr replaced ip6_dst with the next hop
3996  *	    and we are forwarding the packet to the next hop.
3997  *
3998  * case 2 : Routing header was not processed by this node and we
3999  *	    are just forwarding the packet.
4000  *
4001  * For case (1) we don't want to send redirects. For case(2) we
4002  * want to send redirects.
4003  */
4004 static boolean_t
4005 ip_source_routed_v6(ip6_t *ip6h, mblk_t *mp, ip_stack_t *ipst)
4006 {
4007 	uint8_t		nexthdr;
4008 	in6_addr_t	*addrptr;
4009 	ip6_rthdr0_t	*rthdr;
4010 	uint8_t		numaddr;
4011 	ip6_hbh_t	*hbhhdr;
4012 	uint_t		ehdrlen;
4013 	uint8_t		*byteptr;
4014 
4015 	ip2dbg(("ip_source_routed_v6\n"));
4016 	nexthdr = ip6h->ip6_nxt;
4017 	ehdrlen = IPV6_HDR_LEN;
4018 
4019 	/* if a routing hdr is preceeded by HOPOPT or DSTOPT */
4020 	while (nexthdr == IPPROTO_HOPOPTS ||
4021 	    nexthdr == IPPROTO_DSTOPTS) {
4022 		byteptr = (uint8_t *)ip6h + ehdrlen;
4023 		/*
4024 		 * Check if we have already processed
4025 		 * packets or we are just a forwarding
4026 		 * router which only pulled up msgs up
4027 		 * to IPV6HDR and  one HBH ext header
4028 		 */
4029 		if (byteptr + MIN_EHDR_LEN > mp->b_wptr) {
4030 			ip2dbg(("ip_source_routed_v6: Extension"
4031 			    " headers not processed\n"));
4032 			return (B_FALSE);
4033 		}
4034 		hbhhdr = (ip6_hbh_t *)byteptr;
4035 		nexthdr = hbhhdr->ip6h_nxt;
4036 		ehdrlen = ehdrlen + 8 * (hbhhdr->ip6h_len + 1);
4037 	}
4038 	switch (nexthdr) {
4039 	case IPPROTO_ROUTING:
4040 		byteptr = (uint8_t *)ip6h + ehdrlen;
4041 		/*
4042 		 * If for some reason, we haven't pulled up
4043 		 * the routing hdr data mblk, then we must
4044 		 * not have processed it at all. So for sure
4045 		 * we are not part of the source routed journey.
4046 		 */
4047 		if (byteptr + MIN_EHDR_LEN > mp->b_wptr) {
4048 			ip2dbg(("ip_source_routed_v6: Routing"
4049 			    " header not processed\n"));
4050 			return (B_FALSE);
4051 		}
4052 		rthdr = (ip6_rthdr0_t *)byteptr;
4053 		/*
4054 		 * Either we are an intermediate router or the
4055 		 * last hop before destination and we have
4056 		 * already processed the routing header.
4057 		 * If segment_left is greater than or equal to zero,
4058 		 * then we must be the (numaddr - segleft) entry
4059 		 * of the routing header. Although ip6r0_segleft
4060 		 * is a unit8_t variable, we still check for zero
4061 		 * or greater value, if in case the data type
4062 		 * is changed someday in future.
4063 		 */
4064 		if (rthdr->ip6r0_segleft > 0 ||
4065 		    rthdr->ip6r0_segleft == 0) {
4066 			numaddr = rthdr->ip6r0_len / 2;
4067 			addrptr = (in6_addr_t *)((char *)rthdr +
4068 			    sizeof (*rthdr));
4069 			addrptr += (numaddr - (rthdr->ip6r0_segleft + 1));
4070 			if (addrptr != NULL) {
4071 				if (ip_type_v6(addrptr, ipst) == IRE_LOCAL)
4072 					return (B_TRUE);
4073 				ip1dbg(("ip_source_routed_v6: Not local\n"));
4074 			}
4075 		}
4076 	/* FALLTHRU */
4077 	default:
4078 		ip2dbg(("ip_source_routed_v6: Not source routed here\n"));
4079 		return (B_FALSE);
4080 	}
4081 }
4082 
4083 /*
4084  * IPv6 fragmentation.  Essentially the same as IPv4 fragmentation.
4085  * We have not optimized this in terms of number of mblks
4086  * allocated. For instance, for each fragment sent we always allocate a
4087  * mblk to hold the IPv6 header and fragment header.
4088  *
4089  * Assumes that all the extension headers are contained in the first mblk
4090  * and that the fragment header has has already been added by calling
4091  * ip_fraghdr_add_v6.
4092  */
4093 int
4094 ip_fragment_v6(mblk_t *mp, nce_t *nce, iaflags_t ixaflags, uint_t pkt_len,
4095     uint32_t max_frag, uint32_t xmit_hint, zoneid_t szone, zoneid_t nolzid,
4096     pfirepostfrag_t postfragfn, uintptr_t *ixa_cookie)
4097 {
4098 	ip6_t		*ip6h = (ip6_t *)mp->b_rptr;
4099 	ip6_t		*fip6h;
4100 	mblk_t		*hmp;
4101 	mblk_t		*hmp0;
4102 	mblk_t		*dmp;
4103 	ip6_frag_t	*fraghdr;
4104 	size_t		unfragmentable_len;
4105 	size_t		mlen;
4106 	size_t		max_chunk;
4107 	uint16_t	off_flags;
4108 	uint16_t	offset = 0;
4109 	ill_t		*ill = nce->nce_ill;
4110 	uint8_t		nexthdr;
4111 	uint8_t		*ptr;
4112 	ip_stack_t	*ipst = ill->ill_ipst;
4113 	uint_t		priority = mp->b_band;
4114 	int		error = 0;
4115 
4116 	BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragReqds);
4117 	if (max_frag == 0) {
4118 		BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails);
4119 		ip_drop_output("FragFails: zero max_frag", mp, ill);
4120 		freemsg(mp);
4121 		return (EINVAL);
4122 	}
4123 
4124 	/*
4125 	 * Caller should have added fraghdr_t to pkt_len, and also
4126 	 * updated ip6_plen.
4127 	 */
4128 	ASSERT(ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN == pkt_len);
4129 	ASSERT(msgdsize(mp) == pkt_len);
4130 
4131 	/*
4132 	 * Determine the length of the unfragmentable portion of this
4133 	 * datagram.  This consists of the IPv6 header, a potential
4134 	 * hop-by-hop options header, a potential pre-routing-header
4135 	 * destination options header, and a potential routing header.
4136 	 */
4137 	nexthdr = ip6h->ip6_nxt;
4138 	ptr = (uint8_t *)&ip6h[1];
4139 
4140 	if (nexthdr == IPPROTO_HOPOPTS) {
4141 		ip6_hbh_t	*hbh_hdr;
4142 		uint_t		hdr_len;
4143 
4144 		hbh_hdr = (ip6_hbh_t *)ptr;
4145 		hdr_len = 8 * (hbh_hdr->ip6h_len + 1);
4146 		nexthdr = hbh_hdr->ip6h_nxt;
4147 		ptr += hdr_len;
4148 	}
4149 	if (nexthdr == IPPROTO_DSTOPTS) {
4150 		ip6_dest_t	*dest_hdr;
4151 		uint_t		hdr_len;
4152 
4153 		dest_hdr = (ip6_dest_t *)ptr;
4154 		if (dest_hdr->ip6d_nxt == IPPROTO_ROUTING) {
4155 			hdr_len = 8 * (dest_hdr->ip6d_len + 1);
4156 			nexthdr = dest_hdr->ip6d_nxt;
4157 			ptr += hdr_len;
4158 		}
4159 	}
4160 	if (nexthdr == IPPROTO_ROUTING) {
4161 		ip6_rthdr_t	*rthdr;
4162 		uint_t		hdr_len;
4163 
4164 		rthdr = (ip6_rthdr_t *)ptr;
4165 		nexthdr = rthdr->ip6r_nxt;
4166 		hdr_len = 8 * (rthdr->ip6r_len + 1);
4167 		ptr += hdr_len;
4168 	}
4169 	if (nexthdr != IPPROTO_FRAGMENT) {
4170 		BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails);
4171 		ip_drop_output("FragFails: bad nexthdr", mp, ill);
4172 		freemsg(mp);
4173 		return (EINVAL);
4174 	}
4175 	unfragmentable_len = (uint_t)(ptr - (uint8_t *)ip6h);
4176 	unfragmentable_len += sizeof (ip6_frag_t);
4177 
4178 	max_chunk = (max_frag - unfragmentable_len) & ~7;
4179 
4180 	/*
4181 	 * Allocate an mblk with enough room for the link-layer
4182 	 * header and the unfragmentable part of the datagram, which includes
4183 	 * the fragment header.  This (or a copy) will be used as the
4184 	 * first mblk for each fragment we send.
4185 	 */
4186 	hmp = allocb_tmpl(unfragmentable_len + ipst->ips_ip_wroff_extra, mp);
4187 	if (hmp == NULL) {
4188 		BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails);
4189 		ip_drop_output("FragFails: no hmp", mp, ill);
4190 		freemsg(mp);
4191 		return (ENOBUFS);
4192 	}
4193 	hmp->b_rptr += ipst->ips_ip_wroff_extra;
4194 	hmp->b_wptr = hmp->b_rptr + unfragmentable_len;
4195 
4196 	fip6h = (ip6_t *)hmp->b_rptr;
4197 	bcopy(ip6h, fip6h, unfragmentable_len);
4198 
4199 	/*
4200 	 * pkt_len is set to the total length of the fragmentable data in this
4201 	 * datagram.  For each fragment sent, we will decrement pkt_len
4202 	 * by the amount of fragmentable data sent in that fragment
4203 	 * until len reaches zero.
4204 	 */
4205 	pkt_len -= unfragmentable_len;
4206 
4207 	/*
4208 	 * Move read ptr past unfragmentable portion, we don't want this part
4209 	 * of the data in our fragments.
4210 	 */
4211 	mp->b_rptr += unfragmentable_len;
4212 	if (mp->b_rptr == mp->b_wptr) {
4213 		mblk_t *mp1 = mp->b_cont;
4214 		freeb(mp);
4215 		mp = mp1;
4216 	}
4217 
4218 	while (pkt_len != 0) {
4219 		mlen = MIN(pkt_len, max_chunk);
4220 		pkt_len -= mlen;
4221 		if (pkt_len != 0) {
4222 			/* Not last */
4223 			hmp0 = copyb(hmp);
4224 			if (hmp0 == NULL) {
4225 				BUMP_MIB(ill->ill_ip_mib,
4226 				    ipIfStatsOutFragFails);
4227 				ip_drop_output("FragFails: copyb failed",
4228 				    mp, ill);
4229 				freeb(hmp);
4230 				freemsg(mp);
4231 				ip1dbg(("ip_fragment_v6: copyb failed\n"));
4232 				return (ENOBUFS);
4233 			}
4234 			off_flags = IP6F_MORE_FRAG;
4235 		} else {
4236 			/* Last fragment */
4237 			hmp0 = hmp;
4238 			hmp = NULL;
4239 			off_flags = 0;
4240 		}
4241 		fip6h = (ip6_t *)(hmp0->b_rptr);
4242 		fraghdr = (ip6_frag_t *)(hmp0->b_rptr + unfragmentable_len -
4243 		    sizeof (ip6_frag_t));
4244 
4245 		fip6h->ip6_plen = htons((uint16_t)(mlen +
4246 		    unfragmentable_len - IPV6_HDR_LEN));
4247 		/*
4248 		 * Note: Optimization alert.
4249 		 * In IPv6 (and IPv4) protocol header, Fragment Offset
4250 		 * ("offset") is 13 bits wide and in 8-octet units.
4251 		 * In IPv6 protocol header (unlike IPv4) in a 16 bit field,
4252 		 * it occupies the most significant 13 bits.
4253 		 * (least significant 13 bits in IPv4).
4254 		 * We do not do any shifts here. Not shifting is same effect
4255 		 * as taking offset value in octet units, dividing by 8 and
4256 		 * then shifting 3 bits left to line it up in place in proper
4257 		 * place protocol header.
4258 		 */
4259 		fraghdr->ip6f_offlg = htons(offset) | off_flags;
4260 
4261 		if (!(dmp = ip_carve_mp(&mp, mlen))) {
4262 			/* mp has already been freed by ip_carve_mp() */
4263 			BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails);
4264 			ip_drop_output("FragFails: could not carve mp",
4265 			    hmp0, ill);
4266 			if (hmp != NULL)
4267 				freeb(hmp);
4268 			freeb(hmp0);
4269 			ip1dbg(("ip_carve_mp: failed\n"));
4270 			return (ENOBUFS);
4271 		}
4272 		hmp0->b_cont = dmp;
4273 		/* Get the priority marking, if any */
4274 		hmp0->b_band = priority;
4275 
4276 		BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragCreates);
4277 
4278 		error = postfragfn(hmp0, nce, ixaflags,
4279 		    mlen + unfragmentable_len, xmit_hint, szone, nolzid,
4280 		    ixa_cookie);
4281 		if (error != 0 && error != EWOULDBLOCK && hmp != NULL) {
4282 			/* No point in sending the other fragments */
4283 			BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails);
4284 			ip_drop_output("FragFails: postfragfn failed",
4285 			    hmp, ill);
4286 			freeb(hmp);
4287 			freemsg(mp);
4288 			return (error);
4289 		}
4290 		/* No need to redo state machine in loop */
4291 		ixaflags &= ~IXAF_REACH_CONF;
4292 
4293 		offset += mlen;
4294 	}
4295 	BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragOKs);
4296 	return (error);
4297 }
4298 
4299 /*
4300  * Add a fragment header to an IPv6 packet.
4301  * Assumes that all the extension headers are contained in the first mblk.
4302  *
4303  * The fragment header is inserted after an hop-by-hop options header
4304  * and after [an optional destinations header followed by] a routing header.
4305  */
4306 mblk_t *
4307 ip_fraghdr_add_v6(mblk_t *mp, uint32_t ident, ip_xmit_attr_t *ixa)
4308 {
4309 	ip6_t		*ip6h = (ip6_t *)mp->b_rptr;
4310 	ip6_t		*fip6h;
4311 	mblk_t		*hmp;
4312 	ip6_frag_t	*fraghdr;
4313 	size_t		unfragmentable_len;
4314 	uint8_t		nexthdr;
4315 	uint_t		prev_nexthdr_offset;
4316 	uint8_t		*ptr;
4317 	uint_t		priority = mp->b_band;
4318 	ip_stack_t	*ipst = ixa->ixa_ipst;
4319 
4320 	/*
4321 	 * Determine the length of the unfragmentable portion of this
4322 	 * datagram.  This consists of the IPv6 header, a potential
4323 	 * hop-by-hop options header, a potential pre-routing-header
4324 	 * destination options header, and a potential routing header.
4325 	 */
4326 	nexthdr = ip6h->ip6_nxt;
4327 	prev_nexthdr_offset = (uint8_t *)&ip6h->ip6_nxt - (uint8_t *)ip6h;
4328 	ptr = (uint8_t *)&ip6h[1];
4329 
4330 	if (nexthdr == IPPROTO_HOPOPTS) {
4331 		ip6_hbh_t	*hbh_hdr;
4332 		uint_t		hdr_len;
4333 
4334 		hbh_hdr = (ip6_hbh_t *)ptr;
4335 		hdr_len = 8 * (hbh_hdr->ip6h_len + 1);
4336 		nexthdr = hbh_hdr->ip6h_nxt;
4337 		prev_nexthdr_offset = (uint8_t *)&hbh_hdr->ip6h_nxt
4338 		    - (uint8_t *)ip6h;
4339 		ptr += hdr_len;
4340 	}
4341 	if (nexthdr == IPPROTO_DSTOPTS) {
4342 		ip6_dest_t	*dest_hdr;
4343 		uint_t		hdr_len;
4344 
4345 		dest_hdr = (ip6_dest_t *)ptr;
4346 		if (dest_hdr->ip6d_nxt == IPPROTO_ROUTING) {
4347 			hdr_len = 8 * (dest_hdr->ip6d_len + 1);
4348 			nexthdr = dest_hdr->ip6d_nxt;
4349 			prev_nexthdr_offset = (uint8_t *)&dest_hdr->ip6d_nxt
4350 			    - (uint8_t *)ip6h;
4351 			ptr += hdr_len;
4352 		}
4353 	}
4354 	if (nexthdr == IPPROTO_ROUTING) {
4355 		ip6_rthdr_t	*rthdr;
4356 		uint_t		hdr_len;
4357 
4358 		rthdr = (ip6_rthdr_t *)ptr;
4359 		nexthdr = rthdr->ip6r_nxt;
4360 		prev_nexthdr_offset = (uint8_t *)&rthdr->ip6r_nxt
4361 		    - (uint8_t *)ip6h;
4362 		hdr_len = 8 * (rthdr->ip6r_len + 1);
4363 		ptr += hdr_len;
4364 	}
4365 	unfragmentable_len = (uint_t)(ptr - (uint8_t *)ip6h);
4366 
4367 	/*
4368 	 * Allocate an mblk with enough room for the link-layer
4369 	 * header, the unfragmentable part of the datagram, and the
4370 	 * fragment header.
4371 	 */
4372 	hmp = allocb_tmpl(unfragmentable_len + sizeof (ip6_frag_t) +
4373 	    ipst->ips_ip_wroff_extra, mp);
4374 	if (hmp == NULL) {
4375 		ill_t *ill = ixa->ixa_nce->nce_ill;
4376 
4377 		BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
4378 		ip_drop_output("ipIfStatsOutDiscards: allocb failure", mp, ill);
4379 		freemsg(mp);
4380 		return (NULL);
4381 	}
4382 	hmp->b_rptr += ipst->ips_ip_wroff_extra;
4383 	hmp->b_wptr = hmp->b_rptr + unfragmentable_len + sizeof (ip6_frag_t);
4384 
4385 	fip6h = (ip6_t *)hmp->b_rptr;
4386 	fraghdr = (ip6_frag_t *)(hmp->b_rptr + unfragmentable_len);
4387 
4388 	bcopy(ip6h, fip6h, unfragmentable_len);
4389 	fip6h->ip6_plen = htons(ntohs(fip6h->ip6_plen) + sizeof (ip6_frag_t));
4390 	hmp->b_rptr[prev_nexthdr_offset] = IPPROTO_FRAGMENT;
4391 
4392 	fraghdr->ip6f_nxt = nexthdr;
4393 	fraghdr->ip6f_reserved = 0;
4394 	fraghdr->ip6f_offlg = 0;
4395 	fraghdr->ip6f_ident = htonl(ident);
4396 
4397 	/* Get the priority marking, if any */
4398 	hmp->b_band = priority;
4399 
4400 	/*
4401 	 * Move read ptr past unfragmentable portion, we don't want this part
4402 	 * of the data in our fragments.
4403 	 */
4404 	mp->b_rptr += unfragmentable_len;
4405 	hmp->b_cont = mp;
4406 	return (hmp);
4407 }
4408 
4409 /*
4410  * Determine if the ill and multicast aspects of that packets
4411  * "matches" the conn.
4412  */
4413 boolean_t
4414 conn_wantpacket_v6(conn_t *connp, ip_recv_attr_t *ira, ip6_t *ip6h)
4415 {
4416 	ill_t		*ill = ira->ira_rill;
4417 	zoneid_t	zoneid = ira->ira_zoneid;
4418 	uint_t		in_ifindex;
4419 	in6_addr_t	*v6dst_ptr = &ip6h->ip6_dst;
4420 	in6_addr_t	*v6src_ptr = &ip6h->ip6_src;
4421 
4422 	/*
4423 	 * conn_incoming_ifindex is set by IPV6_BOUND_IF and as link-local
4424 	 * scopeid. This is used to limit
4425 	 * unicast and multicast reception to conn_incoming_ifindex.
4426 	 * conn_wantpacket_v6 is called both for unicast and
4427 	 * multicast packets.
4428 	 */
4429 	in_ifindex = connp->conn_incoming_ifindex;
4430 
4431 	/* mpathd can bind to the under IPMP interface, which we allow */
4432 	if (in_ifindex != 0 && in_ifindex != ill->ill_phyint->phyint_ifindex) {
4433 		if (!IS_UNDER_IPMP(ill))
4434 			return (B_FALSE);
4435 
4436 		if (in_ifindex != ipmp_ill_get_ipmp_ifindex(ill))
4437 			return (B_FALSE);
4438 	}
4439 
4440 	if (!IPCL_ZONE_MATCH(connp, zoneid))
4441 		return (B_FALSE);
4442 
4443 	if (!(ira->ira_flags & IRAF_MULTICAST))
4444 		return (B_TRUE);
4445 
4446 	if (connp->conn_multi_router)
4447 		return (B_TRUE);
4448 
4449 	if (ira->ira_protocol == IPPROTO_RSVP)
4450 		return (B_TRUE);
4451 
4452 	return (conn_hasmembers_ill_withsrc_v6(connp, v6dst_ptr, v6src_ptr,
4453 	    ira->ira_ill));
4454 }
4455 
4456 /*
4457  * pr_addr_dbg function provides the needed buffer space to call
4458  * inet_ntop() function's 3rd argument. This function should be
4459  * used by any kernel routine which wants to save INET6_ADDRSTRLEN
4460  * stack buffer space in it's own stack frame. This function uses
4461  * a buffer from it's own stack and prints the information.
4462  * Example: pr_addr_dbg("func: no route for %s\n ", AF_INET, addr)
4463  *
4464  * Note:    This function can call inet_ntop() once.
4465  */
4466 void
4467 pr_addr_dbg(char *fmt1, int af, const void *addr)
4468 {
4469 	char	buf[INET6_ADDRSTRLEN];
4470 
4471 	if (fmt1 == NULL) {
4472 		ip0dbg(("pr_addr_dbg: Wrong arguments\n"));
4473 		return;
4474 	}
4475 
4476 	/*
4477 	 * This does not compare debug level and just prints
4478 	 * out. Thus it is the responsibility of the caller
4479 	 * to check the appropriate debug-level before calling
4480 	 * this function.
4481 	 */
4482 	if (ip_debug > 0) {
4483 		printf(fmt1, inet_ntop(af, addr, buf, sizeof (buf)));
4484 	}
4485 
4486 
4487 }
4488 
4489 
4490 /*
4491  * Return the length in bytes of the IPv6 headers (base header
4492  * extension headers) that will be needed based on the
4493  * ip_pkt_t structure passed by the caller.
4494  *
4495  * The returned length does not include the length of the upper level
4496  * protocol (ULP) header.
4497  */
4498 int
4499 ip_total_hdrs_len_v6(const ip_pkt_t *ipp)
4500 {
4501 	int len;
4502 
4503 	len = IPV6_HDR_LEN;
4504 
4505 	/*
4506 	 * If there's a security label here, then we ignore any hop-by-hop
4507 	 * options the user may try to set.
4508 	 */
4509 	if (ipp->ipp_fields & IPPF_LABEL_V6) {
4510 		uint_t hopoptslen;
4511 		/*
4512 		 * Note that ipp_label_len_v6 is just the option - not
4513 		 * the hopopts extension header. It also needs to be padded
4514 		 * to a multiple of 8 bytes.
4515 		 */
4516 		ASSERT(ipp->ipp_label_len_v6 != 0);
4517 		hopoptslen = ipp->ipp_label_len_v6 + sizeof (ip6_hbh_t);
4518 		hopoptslen = (hopoptslen + 7)/8 * 8;
4519 		len += hopoptslen;
4520 	} else if (ipp->ipp_fields & IPPF_HOPOPTS) {
4521 		ASSERT(ipp->ipp_hopoptslen != 0);
4522 		len += ipp->ipp_hopoptslen;
4523 	}
4524 
4525 	/*
4526 	 * En-route destination options
4527 	 * Only do them if there's a routing header as well
4528 	 */
4529 	if ((ipp->ipp_fields & (IPPF_RTHDRDSTOPTS|IPPF_RTHDR)) ==
4530 	    (IPPF_RTHDRDSTOPTS|IPPF_RTHDR)) {
4531 		ASSERT(ipp->ipp_rthdrdstoptslen != 0);
4532 		len += ipp->ipp_rthdrdstoptslen;
4533 	}
4534 	if (ipp->ipp_fields & IPPF_RTHDR) {
4535 		ASSERT(ipp->ipp_rthdrlen != 0);
4536 		len += ipp->ipp_rthdrlen;
4537 	}
4538 	if (ipp->ipp_fields & IPPF_DSTOPTS) {
4539 		ASSERT(ipp->ipp_dstoptslen != 0);
4540 		len += ipp->ipp_dstoptslen;
4541 	}
4542 	return (len);
4543 }
4544 
4545 /*
4546  * All-purpose routine to build a header chain of an IPv6 header
4547  * followed by any required extension headers and a proto header.
4548  *
4549  * The caller has to set the source and destination address as well as
4550  * ip6_plen. The caller has to massage any routing header and compensate
4551  * for the ULP pseudo-header checksum due to the source route.
4552  *
4553  * The extension headers will all be fully filled in.
4554  */
4555 void
4556 ip_build_hdrs_v6(uchar_t *buf, uint_t buf_len, const ip_pkt_t *ipp,
4557     uint8_t protocol, uint32_t flowinfo)
4558 {
4559 	uint8_t *nxthdr_ptr;
4560 	uint8_t *cp;
4561 	ip6_t	*ip6h = (ip6_t *)buf;
4562 
4563 	/* Initialize IPv6 header */
4564 	ip6h->ip6_vcf =
4565 	    (IPV6_DEFAULT_VERS_AND_FLOW & IPV6_VERS_AND_FLOW_MASK) |
4566 	    (flowinfo & ~IPV6_VERS_AND_FLOW_MASK);
4567 
4568 	if (ipp->ipp_fields & IPPF_TCLASS) {
4569 		/* Overrides the class part of flowinfo */
4570 		ip6h->ip6_vcf = IPV6_TCLASS_FLOW(ip6h->ip6_vcf,
4571 		    ipp->ipp_tclass);
4572 	}
4573 
4574 	if (ipp->ipp_fields & IPPF_HOPLIMIT)
4575 		ip6h->ip6_hops = ipp->ipp_hoplimit;
4576 	else
4577 		ip6h->ip6_hops = ipp->ipp_unicast_hops;
4578 
4579 	if ((ipp->ipp_fields & IPPF_ADDR) &&
4580 	    !IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr))
4581 		ip6h->ip6_src = ipp->ipp_addr;
4582 
4583 	nxthdr_ptr = (uint8_t *)&ip6h->ip6_nxt;
4584 	cp = (uint8_t *)&ip6h[1];
4585 	/*
4586 	 * Here's where we have to start stringing together
4587 	 * any extension headers in the right order:
4588 	 * Hop-by-hop, destination, routing, and final destination opts.
4589 	 */
4590 	/*
4591 	 * If there's a security label here, then we ignore any hop-by-hop
4592 	 * options the user may try to set.
4593 	 */
4594 	if (ipp->ipp_fields & IPPF_LABEL_V6) {
4595 		/*
4596 		 * Hop-by-hop options with the label.
4597 		 * Note that ipp_label_v6 is just the option - not
4598 		 * the hopopts extension header. It also needs to be padded
4599 		 * to a multiple of 8 bytes.
4600 		 */
4601 		ip6_hbh_t *hbh = (ip6_hbh_t *)cp;
4602 		uint_t hopoptslen;
4603 		uint_t padlen;
4604 
4605 		padlen = ipp->ipp_label_len_v6 + sizeof (ip6_hbh_t);
4606 		hopoptslen = (padlen + 7)/8 * 8;
4607 		padlen = hopoptslen - padlen;
4608 
4609 		*nxthdr_ptr = IPPROTO_HOPOPTS;
4610 		nxthdr_ptr = &hbh->ip6h_nxt;
4611 		hbh->ip6h_len = hopoptslen/8 - 1;
4612 		cp += sizeof (ip6_hbh_t);
4613 		bcopy(ipp->ipp_label_v6, cp, ipp->ipp_label_len_v6);
4614 		cp += ipp->ipp_label_len_v6;
4615 
4616 		ASSERT(padlen <= 7);
4617 		switch (padlen) {
4618 		case 0:
4619 			break;
4620 		case 1:
4621 			cp[0] = IP6OPT_PAD1;
4622 			break;
4623 		default:
4624 			cp[0] = IP6OPT_PADN;
4625 			cp[1] = padlen - 2;
4626 			bzero(&cp[2], padlen - 2);
4627 			break;
4628 		}
4629 		cp += padlen;
4630 	} else if (ipp->ipp_fields & IPPF_HOPOPTS) {
4631 		/* Hop-by-hop options */
4632 		ip6_hbh_t *hbh = (ip6_hbh_t *)cp;
4633 
4634 		*nxthdr_ptr = IPPROTO_HOPOPTS;
4635 		nxthdr_ptr = &hbh->ip6h_nxt;
4636 
4637 		bcopy(ipp->ipp_hopopts, cp, ipp->ipp_hopoptslen);
4638 		cp += ipp->ipp_hopoptslen;
4639 	}
4640 	/*
4641 	 * En-route destination options
4642 	 * Only do them if there's a routing header as well
4643 	 */
4644 	if ((ipp->ipp_fields & (IPPF_RTHDRDSTOPTS|IPPF_RTHDR)) ==
4645 	    (IPPF_RTHDRDSTOPTS|IPPF_RTHDR)) {
4646 		ip6_dest_t *dst = (ip6_dest_t *)cp;
4647 
4648 		*nxthdr_ptr = IPPROTO_DSTOPTS;
4649 		nxthdr_ptr = &dst->ip6d_nxt;
4650 
4651 		bcopy(ipp->ipp_rthdrdstopts, cp, ipp->ipp_rthdrdstoptslen);
4652 		cp += ipp->ipp_rthdrdstoptslen;
4653 	}
4654 	/*
4655 	 * Routing header next
4656 	 */
4657 	if (ipp->ipp_fields & IPPF_RTHDR) {
4658 		ip6_rthdr_t *rt = (ip6_rthdr_t *)cp;
4659 
4660 		*nxthdr_ptr = IPPROTO_ROUTING;
4661 		nxthdr_ptr = &rt->ip6r_nxt;
4662 
4663 		bcopy(ipp->ipp_rthdr, cp, ipp->ipp_rthdrlen);
4664 		cp += ipp->ipp_rthdrlen;
4665 	}
4666 	/*
4667 	 * Do ultimate destination options
4668 	 */
4669 	if (ipp->ipp_fields & IPPF_DSTOPTS) {
4670 		ip6_dest_t *dest = (ip6_dest_t *)cp;
4671 
4672 		*nxthdr_ptr = IPPROTO_DSTOPTS;
4673 		nxthdr_ptr = &dest->ip6d_nxt;
4674 
4675 		bcopy(ipp->ipp_dstopts, cp, ipp->ipp_dstoptslen);
4676 		cp += ipp->ipp_dstoptslen;
4677 	}
4678 	/*
4679 	 * Now set the last header pointer to the proto passed in
4680 	 */
4681 	*nxthdr_ptr = protocol;
4682 	ASSERT((int)(cp - buf) == buf_len);
4683 }
4684 
4685 /*
4686  * Return a pointer to the routing header extension header
4687  * in the IPv6 header(s) chain passed in.
4688  * If none found, return NULL
4689  * Assumes that all extension headers are in same mblk as the v6 header
4690  */
4691 ip6_rthdr_t *
4692 ip_find_rthdr_v6(ip6_t *ip6h, uint8_t *endptr)
4693 {
4694 	ip6_dest_t	*desthdr;
4695 	ip6_frag_t	*fraghdr;
4696 	uint_t		hdrlen;
4697 	uint8_t		nexthdr;
4698 	uint8_t		*ptr = (uint8_t *)&ip6h[1];
4699 
4700 	if (ip6h->ip6_nxt == IPPROTO_ROUTING)
4701 		return ((ip6_rthdr_t *)ptr);
4702 
4703 	/*
4704 	 * The routing header will precede all extension headers
4705 	 * other than the hop-by-hop and destination options
4706 	 * extension headers, so if we see anything other than those,
4707 	 * we're done and didn't find it.
4708 	 * We could see a destination options header alone but no
4709 	 * routing header, in which case we'll return NULL as soon as
4710 	 * we see anything after that.
4711 	 * Hop-by-hop and destination option headers are identical,
4712 	 * so we can use either one we want as a template.
4713 	 */
4714 	nexthdr = ip6h->ip6_nxt;
4715 	while (ptr < endptr) {
4716 		/* Is there enough left for len + nexthdr? */
4717 		if (ptr + MIN_EHDR_LEN > endptr)
4718 			return (NULL);
4719 
4720 		switch (nexthdr) {
4721 		case IPPROTO_HOPOPTS:
4722 		case IPPROTO_DSTOPTS:
4723 			/* Assumes the headers are identical for hbh and dst */
4724 			desthdr = (ip6_dest_t *)ptr;
4725 			hdrlen = 8 * (desthdr->ip6d_len + 1);
4726 			nexthdr = desthdr->ip6d_nxt;
4727 			break;
4728 
4729 		case IPPROTO_ROUTING:
4730 			return ((ip6_rthdr_t *)ptr);
4731 
4732 		case IPPROTO_FRAGMENT:
4733 			fraghdr = (ip6_frag_t *)ptr;
4734 			hdrlen = sizeof (ip6_frag_t);
4735 			nexthdr = fraghdr->ip6f_nxt;
4736 			break;
4737 
4738 		default:
4739 			return (NULL);
4740 		}
4741 		ptr += hdrlen;
4742 	}
4743 	return (NULL);
4744 }
4745 
4746 /*
4747  * Called for source-routed packets originating on this node.
4748  * Manipulates the original routing header by moving every entry up
4749  * one slot, placing the first entry in the v6 header's v6_dst field,
4750  * and placing the ultimate destination in the routing header's last
4751  * slot.
4752  *
4753  * Returns the checksum diference between the ultimate destination
4754  * (last hop in the routing header when the packet is sent) and
4755  * the first hop (ip6_dst when the packet is sent)
4756  */
4757 /* ARGSUSED2 */
4758 uint32_t
4759 ip_massage_options_v6(ip6_t *ip6h, ip6_rthdr_t *rth, netstack_t *ns)
4760 {
4761 	uint_t		numaddr;
4762 	uint_t		i;
4763 	in6_addr_t	*addrptr;
4764 	in6_addr_t	tmp;
4765 	ip6_rthdr0_t	*rthdr = (ip6_rthdr0_t *)rth;
4766 	uint32_t	cksm;
4767 	uint32_t	addrsum = 0;
4768 	uint16_t	*ptr;
4769 
4770 	/*
4771 	 * Perform any processing needed for source routing.
4772 	 * We know that all extension headers will be in the same mblk
4773 	 * as the IPv6 header.
4774 	 */
4775 
4776 	/*
4777 	 * If no segments left in header, or the header length field is zero,
4778 	 * don't move hop addresses around;
4779 	 * Checksum difference is zero.
4780 	 */
4781 	if ((rthdr->ip6r0_segleft == 0) || (rthdr->ip6r0_len == 0))
4782 		return (0);
4783 
4784 	ptr = (uint16_t *)&ip6h->ip6_dst;
4785 	cksm = 0;
4786 	for (i = 0; i < (sizeof (in6_addr_t) / sizeof (uint16_t)); i++) {
4787 		cksm += ptr[i];
4788 	}
4789 	cksm = (cksm & 0xFFFF) + (cksm >> 16);
4790 
4791 	/*
4792 	 * Here's where the fun begins - we have to
4793 	 * move all addresses up one spot, take the
4794 	 * first hop and make it our first ip6_dst,
4795 	 * and place the ultimate destination in the
4796 	 * newly-opened last slot.
4797 	 */
4798 	addrptr = (in6_addr_t *)((char *)rthdr + sizeof (*rthdr));
4799 	numaddr = rthdr->ip6r0_len / 2;
4800 	tmp = *addrptr;
4801 	for (i = 0; i < (numaddr - 1); addrptr++, i++) {
4802 		*addrptr = addrptr[1];
4803 	}
4804 	*addrptr = ip6h->ip6_dst;
4805 	ip6h->ip6_dst = tmp;
4806 
4807 	/*
4808 	 * From the checksummed ultimate destination subtract the checksummed
4809 	 * current ip6_dst (the first hop address). Return that number.
4810 	 * (In the v4 case, the second part of this is done in each routine
4811 	 *  that calls ip_massage_options(). We do it all in this one place
4812 	 *  for v6).
4813 	 */
4814 	ptr = (uint16_t *)&ip6h->ip6_dst;
4815 	for (i = 0; i < (sizeof (in6_addr_t) / sizeof (uint16_t)); i++) {
4816 		addrsum += ptr[i];
4817 	}
4818 	cksm -= ((addrsum >> 16) + (addrsum & 0xFFFF));
4819 	if ((int)cksm < 0)
4820 		cksm--;
4821 	cksm = (cksm & 0xFFFF) + (cksm >> 16);
4822 
4823 	return (cksm);
4824 }
4825 
4826 void
4827 *ip6_kstat_init(netstackid_t stackid, ip6_stat_t *ip6_statisticsp)
4828 {
4829 	kstat_t *ksp;
4830 
4831 	ip6_stat_t template = {
4832 		{ "ip6_udp_fannorm", 	KSTAT_DATA_UINT64 },
4833 		{ "ip6_udp_fanmb", 	KSTAT_DATA_UINT64 },
4834 		{ "ip6_recv_pullup", 		KSTAT_DATA_UINT64 },
4835 		{ "ip6_db_ref",			KSTAT_DATA_UINT64 },
4836 		{ "ip6_notaligned",		KSTAT_DATA_UINT64 },
4837 		{ "ip6_multimblk",		KSTAT_DATA_UINT64 },
4838 		{ "ipsec_proto_ahesp",		KSTAT_DATA_UINT64 },
4839 		{ "ip6_out_sw_cksum",			KSTAT_DATA_UINT64 },
4840 		{ "ip6_out_sw_cksum_bytes",		KSTAT_DATA_UINT64 },
4841 		{ "ip6_in_sw_cksum",			KSTAT_DATA_UINT64 },
4842 		{ "ip6_tcp_in_full_hw_cksum_err",	KSTAT_DATA_UINT64 },
4843 		{ "ip6_tcp_in_part_hw_cksum_err",	KSTAT_DATA_UINT64 },
4844 		{ "ip6_tcp_in_sw_cksum_err",		KSTAT_DATA_UINT64 },
4845 		{ "ip6_udp_in_full_hw_cksum_err",	KSTAT_DATA_UINT64 },
4846 		{ "ip6_udp_in_part_hw_cksum_err",	KSTAT_DATA_UINT64 },
4847 		{ "ip6_udp_in_sw_cksum_err",		KSTAT_DATA_UINT64 },
4848 	};
4849 	ksp = kstat_create_netstack("ip", 0, "ip6stat", "net",
4850 	    KSTAT_TYPE_NAMED, sizeof (template) / sizeof (kstat_named_t),
4851 	    KSTAT_FLAG_VIRTUAL, stackid);
4852 
4853 	if (ksp == NULL)
4854 		return (NULL);
4855 
4856 	bcopy(&template, ip6_statisticsp, sizeof (template));
4857 	ksp->ks_data = (void *)ip6_statisticsp;
4858 	ksp->ks_private = (void *)(uintptr_t)stackid;
4859 
4860 	kstat_install(ksp);
4861 	return (ksp);
4862 }
4863 
4864 void
4865 ip6_kstat_fini(netstackid_t stackid, kstat_t *ksp)
4866 {
4867 	if (ksp != NULL) {
4868 		ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private);
4869 		kstat_delete_netstack(ksp, stackid);
4870 	}
4871 }
4872 
4873 /*
4874  * The following two functions set and get the value for the
4875  * IPV6_SRC_PREFERENCES socket option.
4876  */
4877 int
4878 ip6_set_src_preferences(ip_xmit_attr_t *ixa, uint32_t prefs)
4879 {
4880 	/*
4881 	 * We only support preferences that are covered by
4882 	 * IPV6_PREFER_SRC_MASK.
4883 	 */
4884 	if (prefs & ~IPV6_PREFER_SRC_MASK)
4885 		return (EINVAL);
4886 
4887 	/*
4888 	 * Look for conflicting preferences or default preferences.  If
4889 	 * both bits of a related pair are clear, the application wants the
4890 	 * system's default value for that pair.  Both bits in a pair can't
4891 	 * be set.
4892 	 */
4893 	if ((prefs & IPV6_PREFER_SRC_MIPMASK) == 0) {
4894 		prefs |= IPV6_PREFER_SRC_MIPDEFAULT;
4895 	} else if ((prefs & IPV6_PREFER_SRC_MIPMASK) ==
4896 	    IPV6_PREFER_SRC_MIPMASK) {
4897 		return (EINVAL);
4898 	}
4899 	if ((prefs & IPV6_PREFER_SRC_TMPMASK) == 0) {
4900 		prefs |= IPV6_PREFER_SRC_TMPDEFAULT;
4901 	} else if ((prefs & IPV6_PREFER_SRC_TMPMASK) ==
4902 	    IPV6_PREFER_SRC_TMPMASK) {
4903 		return (EINVAL);
4904 	}
4905 	if ((prefs & IPV6_PREFER_SRC_CGAMASK) == 0) {
4906 		prefs |= IPV6_PREFER_SRC_CGADEFAULT;
4907 	} else if ((prefs & IPV6_PREFER_SRC_CGAMASK) ==
4908 	    IPV6_PREFER_SRC_CGAMASK) {
4909 		return (EINVAL);
4910 	}
4911 
4912 	ixa->ixa_src_preferences = prefs;
4913 	return (0);
4914 }
4915 
4916 size_t
4917 ip6_get_src_preferences(ip_xmit_attr_t *ixa, uint32_t *val)
4918 {
4919 	*val = ixa->ixa_src_preferences;
4920 	return (sizeof (ixa->ixa_src_preferences));
4921 }
4922 
4923 /*
4924  * Get the size of the IP options (including the IP headers size)
4925  * without including the AH header's size. If till_ah is B_FALSE,
4926  * and if AH header is present, dest options beyond AH header will
4927  * also be included in the returned size.
4928  */
4929 int
4930 ipsec_ah_get_hdr_size_v6(mblk_t *mp, boolean_t till_ah)
4931 {
4932 	ip6_t *ip6h;
4933 	uint8_t nexthdr;
4934 	uint8_t *whereptr;
4935 	ip6_hbh_t *hbhhdr;
4936 	ip6_dest_t *dsthdr;
4937 	ip6_rthdr_t *rthdr;
4938 	int ehdrlen;
4939 	int size;
4940 	ah_t *ah;
4941 
4942 	ip6h = (ip6_t *)mp->b_rptr;
4943 	size = IPV6_HDR_LEN;
4944 	nexthdr = ip6h->ip6_nxt;
4945 	whereptr = (uint8_t *)&ip6h[1];
4946 	for (;;) {
4947 		/* Assume IP has already stripped it */
4948 		ASSERT(nexthdr != IPPROTO_FRAGMENT);
4949 		switch (nexthdr) {
4950 		case IPPROTO_HOPOPTS:
4951 			hbhhdr = (ip6_hbh_t *)whereptr;
4952 			nexthdr = hbhhdr->ip6h_nxt;
4953 			ehdrlen = 8 * (hbhhdr->ip6h_len + 1);
4954 			break;
4955 		case IPPROTO_DSTOPTS:
4956 			dsthdr = (ip6_dest_t *)whereptr;
4957 			nexthdr = dsthdr->ip6d_nxt;
4958 			ehdrlen = 8 * (dsthdr->ip6d_len + 1);
4959 			break;
4960 		case IPPROTO_ROUTING:
4961 			rthdr = (ip6_rthdr_t *)whereptr;
4962 			nexthdr = rthdr->ip6r_nxt;
4963 			ehdrlen = 8 * (rthdr->ip6r_len + 1);
4964 			break;
4965 		default :
4966 			if (till_ah) {
4967 				ASSERT(nexthdr == IPPROTO_AH);
4968 				return (size);
4969 			}
4970 			/*
4971 			 * If we don't have a AH header to traverse,
4972 			 * return now. This happens normally for
4973 			 * outbound datagrams where we have not inserted
4974 			 * the AH header.
4975 			 */
4976 			if (nexthdr != IPPROTO_AH) {
4977 				return (size);
4978 			}
4979 
4980 			/*
4981 			 * We don't include the AH header's size
4982 			 * to be symmetrical with other cases where
4983 			 * we either don't have a AH header (outbound)
4984 			 * or peek into the AH header yet (inbound and
4985 			 * not pulled up yet).
4986 			 */
4987 			ah = (ah_t *)whereptr;
4988 			nexthdr = ah->ah_nexthdr;
4989 			ehdrlen = (ah->ah_length << 2) + 8;
4990 
4991 			if (nexthdr == IPPROTO_DSTOPTS) {
4992 				if (whereptr + ehdrlen >= mp->b_wptr) {
4993 					/*
4994 					 * The destination options header
4995 					 * is not part of the first mblk.
4996 					 */
4997 					whereptr = mp->b_cont->b_rptr;
4998 				} else {
4999 					whereptr += ehdrlen;
5000 				}
5001 
5002 				dsthdr = (ip6_dest_t *)whereptr;
5003 				ehdrlen = 8 * (dsthdr->ip6d_len + 1);
5004 				size += ehdrlen;
5005 			}
5006 			return (size);
5007 		}
5008 		whereptr += ehdrlen;
5009 		size += ehdrlen;
5010 	}
5011 }
5012 
5013 /*
5014  * Utility routine that checks if `v6srcp' is a valid address on underlying
5015  * interface `ill'.  If `ipifp' is non-NULL, it's set to a held ipif
5016  * associated with `v6srcp' on success.  NOTE: if this is not called from
5017  * inside the IPSQ (ill_g_lock is not held), `ill' may be removed from the
5018  * group during or after this lookup.
5019  */
5020 boolean_t
5021 ipif_lookup_testaddr_v6(ill_t *ill, const in6_addr_t *v6srcp, ipif_t **ipifp)
5022 {
5023 	ipif_t *ipif;
5024 
5025 
5026 	ipif = ipif_lookup_addr_exact_v6(v6srcp, ill, ill->ill_ipst);
5027 	if (ipif != NULL) {
5028 		if (ipifp != NULL)
5029 			*ipifp = ipif;
5030 		else
5031 			ipif_refrele(ipif);
5032 		return (B_TRUE);
5033 	}
5034 
5035 	if (ip_debug > 2) {
5036 		pr_addr_dbg("ipif_lookup_testaddr_v6: cannot find ipif for "
5037 		    "src %s\n", AF_INET6, v6srcp);
5038 	}
5039 	return (B_FALSE);
5040 }
5041