xref: /illumos-gate/usr/src/uts/common/inet/ip/ip6.c (revision 9495f63e)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
23  * Copyright (c) 1990 Mentat Inc.
24  * Copyright 2017 OmniTI Computer Consulting, Inc. All rights reserved.
25  * Copyright 2021 Joyent, Inc.
26  */
27 
28 #include <sys/types.h>
29 #include <sys/stream.h>
30 #include <sys/dlpi.h>
31 #include <sys/stropts.h>
32 #include <sys/sysmacros.h>
33 #include <sys/strsun.h>
34 #include <sys/strlog.h>
35 #include <sys/strsubr.h>
36 #define	_SUN_TPI_VERSION	2
37 #include <sys/tihdr.h>
38 #include <sys/ddi.h>
39 #include <sys/sunddi.h>
40 #include <sys/cmn_err.h>
41 #include <sys/debug.h>
42 #include <sys/sdt.h>
43 #include <sys/kobj.h>
44 #include <sys/zone.h>
45 #include <sys/neti.h>
46 #include <sys/hook.h>
47 
48 #include <sys/kmem.h>
49 #include <sys/systm.h>
50 #include <sys/param.h>
51 #include <sys/socket.h>
52 #include <sys/vtrace.h>
53 #include <sys/isa_defs.h>
54 #include <sys/atomic.h>
55 #include <sys/policy.h>
56 #include <sys/mac.h>
57 #include <net/if.h>
58 #include <net/if_types.h>
59 #include <net/route.h>
60 #include <net/if_dl.h>
61 #include <sys/sockio.h>
62 #include <netinet/in.h>
63 #include <netinet/ip6.h>
64 #include <netinet/icmp6.h>
65 #include <netinet/sctp.h>
66 
67 #include <inet/common.h>
68 #include <inet/mi.h>
69 #include <inet/optcom.h>
70 #include <inet/mib2.h>
71 #include <inet/nd.h>
72 #include <inet/arp.h>
73 
74 #include <inet/ip.h>
75 #include <inet/ip_impl.h>
76 #include <inet/ip6.h>
77 #include <inet/ip6_asp.h>
78 #include <inet/tcp.h>
79 #include <inet/tcp_impl.h>
80 #include <inet/udp_impl.h>
81 #include <inet/ipp_common.h>
82 
83 #include <inet/ip_multi.h>
84 #include <inet/ip_if.h>
85 #include <inet/ip_ire.h>
86 #include <inet/ip_rts.h>
87 #include <inet/ip_ndp.h>
88 #include <net/pfkeyv2.h>
89 #include <inet/sadb.h>
90 #include <inet/ipsec_impl.h>
91 #include <inet/iptun/iptun_impl.h>
92 #include <inet/sctp_ip.h>
93 #include <sys/pattr.h>
94 #include <inet/ipclassifier.h>
95 #include <inet/ipsecah.h>
96 #include <inet/rawip_impl.h>
97 #include <inet/rts_impl.h>
98 #include <sys/squeue_impl.h>
99 #include <sys/squeue.h>
100 
101 #include <sys/tsol/label.h>
102 #include <sys/tsol/tnet.h>
103 
104 /* Temporary; for CR 6451644 work-around */
105 #include <sys/ethernet.h>
106 
107 /*
108  * Naming conventions:
109  *      These rules should be judiciously applied
110  *	if there is a need to identify something as IPv6 versus IPv4
111  *	IPv6 funcions will end with _v6 in the ip module.
112  *	IPv6 funcions will end with _ipv6 in the transport modules.
113  *	IPv6 macros:
114  *		Some macros end with _V6; e.g. ILL_FRAG_HASH_V6
115  *		Some macros start with V6_; e.g. V6_OR_V4_INADDR_ANY
116  *		And then there are ..V4_PART_OF_V6.
117  *		The intent is that macros in the ip module end with _V6.
118  *	IPv6 global variables will start with ipv6_
119  *	IPv6 structures will start with ipv6
120  *	IPv6 defined constants should start with IPV6_
121  *		(but then there are NDP_DEFAULT_VERS_PRI_AND_FLOW, etc)
122  */
123 
124 /*
125  * ip6opt_ls is used to enable IPv6 (via /etc/system on TX systems).
126  * We need to do this because we didn't obtain the IP6OPT_LS (0x0a)
127  * from IANA. This mechanism will remain in effect until an official
128  * number is obtained.
129  */
130 uchar_t ip6opt_ls;
131 
132 const in6_addr_t ipv6_all_ones =
133 	{ 0xffffffffU, 0xffffffffU, 0xffffffffU, 0xffffffffU };
134 const in6_addr_t ipv6_all_zeros = { 0, 0, 0, 0 };
135 
136 #ifdef	_BIG_ENDIAN
137 const in6_addr_t ipv6_unspecified_group = { 0xff000000U, 0, 0, 0 };
138 #else	/* _BIG_ENDIAN */
139 const in6_addr_t ipv6_unspecified_group = { 0x000000ffU, 0, 0, 0 };
140 #endif	/* _BIG_ENDIAN */
141 
142 #ifdef	_BIG_ENDIAN
143 const in6_addr_t ipv6_loopback = { 0, 0, 0, 0x00000001U };
144 #else  /* _BIG_ENDIAN */
145 const in6_addr_t ipv6_loopback = { 0, 0, 0, 0x01000000U };
146 #endif /* _BIG_ENDIAN */
147 
148 #ifdef _BIG_ENDIAN
149 const in6_addr_t ipv6_all_hosts_mcast = { 0xff020000U, 0, 0, 0x00000001U };
150 #else  /* _BIG_ENDIAN */
151 const in6_addr_t ipv6_all_hosts_mcast = { 0x000002ffU, 0, 0, 0x01000000U };
152 #endif /* _BIG_ENDIAN */
153 
154 #ifdef _BIG_ENDIAN
155 const in6_addr_t ipv6_all_rtrs_mcast = { 0xff020000U, 0, 0, 0x00000002U };
156 #else  /* _BIG_ENDIAN */
157 const in6_addr_t ipv6_all_rtrs_mcast = { 0x000002ffU, 0, 0, 0x02000000U };
158 #endif /* _BIG_ENDIAN */
159 
160 #ifdef _BIG_ENDIAN
161 const in6_addr_t ipv6_all_v2rtrs_mcast = { 0xff020000U, 0, 0, 0x00000016U };
162 #else  /* _BIG_ENDIAN */
163 const in6_addr_t ipv6_all_v2rtrs_mcast = { 0x000002ffU, 0, 0, 0x16000000U };
164 #endif /* _BIG_ENDIAN */
165 
166 #ifdef _BIG_ENDIAN
167 const in6_addr_t ipv6_solicited_node_mcast =
168 			{ 0xff020000U, 0, 0x00000001U, 0xff000000U };
169 #else  /* _BIG_ENDIAN */
170 const in6_addr_t ipv6_solicited_node_mcast =
171 			{ 0x000002ffU, 0, 0x01000000U, 0x000000ffU };
172 #endif /* _BIG_ENDIAN */
173 
174 static boolean_t icmp_inbound_verify_v6(mblk_t *, icmp6_t *, ip_recv_attr_t *);
175 static void	icmp_inbound_too_big_v6(icmp6_t *, ip_recv_attr_t *);
176 static void	icmp_pkt_v6(mblk_t *, void *, size_t, const in6_addr_t *,
177     ip_recv_attr_t *);
178 static void	icmp_redirect_v6(mblk_t *, ip6_t *, nd_redirect_t *,
179     ip_recv_attr_t *);
180 static void	icmp_send_redirect_v6(mblk_t *, in6_addr_t *,
181     in6_addr_t *, ip_recv_attr_t *);
182 static void	icmp_send_reply_v6(mblk_t *, ip6_t *, icmp6_t *,
183     ip_recv_attr_t *);
184 static boolean_t	ip_source_routed_v6(ip6_t *, mblk_t *, ip_stack_t *);
185 
186 /*
187  * icmp_inbound_v6 deals with ICMP messages that are handled by IP.
188  * If the ICMP message is consumed by IP, i.e., it should not be delivered
189  * to any IPPROTO_ICMP raw sockets, then it returns NULL.
190  * Likewise, if the ICMP error is misformed (too short, etc), then it
191  * returns NULL. The caller uses this to determine whether or not to send
192  * to raw sockets.
193  *
194  * All error messages are passed to the matching transport stream.
195  *
196  * See comment for icmp_inbound_v4() on how IPsec is handled.
197  */
198 mblk_t *
icmp_inbound_v6(mblk_t * mp,ip_recv_attr_t * ira)199 icmp_inbound_v6(mblk_t *mp, ip_recv_attr_t *ira)
200 {
201 	icmp6_t		*icmp6;
202 	ip6_t		*ip6h;		/* Outer header */
203 	int		ip_hdr_length;	/* Outer header length */
204 	boolean_t	interested;
205 	ill_t		*ill = ira->ira_ill;
206 	ip_stack_t	*ipst = ill->ill_ipst;
207 	mblk_t		*mp_ret = NULL;
208 
209 	ip6h = (ip6_t *)mp->b_rptr;
210 
211 	BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInMsgs);
212 
213 	/* Check for Martian packets  */
214 	if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_src)) {
215 		BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors);
216 		ip_drop_input("ipIfStatsInAddrErrors: mcast src", mp, ill);
217 		freemsg(mp);
218 		return (NULL);
219 	}
220 
221 	/* Make sure ira_l2src is set for ndp_input */
222 	if (!(ira->ira_flags & IRAF_L2SRC_SET))
223 		ip_setl2src(mp, ira, ira->ira_rill);
224 
225 	ip_hdr_length = ira->ira_ip_hdr_length;
226 	if ((mp->b_wptr - mp->b_rptr) < (ip_hdr_length + ICMP6_MINLEN)) {
227 		if (ira->ira_pktlen < (ip_hdr_length + ICMP6_MINLEN)) {
228 			BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTruncatedPkts);
229 			ip_drop_input("ipIfStatsInTruncatedPkts", mp, ill);
230 			freemsg(mp);
231 			return (NULL);
232 		}
233 		ip6h = ip_pullup(mp, ip_hdr_length + ICMP6_MINLEN, ira);
234 		if (ip6h == NULL) {
235 			BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors);
236 			freemsg(mp);
237 			return (NULL);
238 		}
239 	}
240 
241 	icmp6 = (icmp6_t *)(&mp->b_rptr[ip_hdr_length]);
242 	DTRACE_PROBE2(icmp__inbound__v6, ip6_t *, ip6h, icmp6_t *, icmp6);
243 	ip2dbg(("icmp_inbound_v6: type %d code %d\n", icmp6->icmp6_type,
244 	    icmp6->icmp6_code));
245 
246 	/*
247 	 * We will set "interested" to "true" if we should pass a copy to
248 	 * the transport i.e., if it is an error message.
249 	 */
250 	interested = !(icmp6->icmp6_type & ICMP6_INFOMSG_MASK);
251 
252 	switch (icmp6->icmp6_type) {
253 	case ICMP6_DST_UNREACH:
254 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInDestUnreachs);
255 		if (icmp6->icmp6_code == ICMP6_DST_UNREACH_ADMIN)
256 			BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInAdminProhibs);
257 		break;
258 
259 	case ICMP6_TIME_EXCEEDED:
260 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInTimeExcds);
261 		break;
262 
263 	case ICMP6_PARAM_PROB:
264 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInParmProblems);
265 		break;
266 
267 	case ICMP6_PACKET_TOO_BIG:
268 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInPktTooBigs);
269 		break;
270 
271 	case ICMP6_ECHO_REQUEST:
272 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInEchos);
273 		if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) &&
274 		    !ipst->ips_ipv6_resp_echo_mcast)
275 			break;
276 
277 		/*
278 		 * We must have exclusive use of the mblk to convert it to
279 		 * a response.
280 		 * If not, we copy it.
281 		 */
282 		if (mp->b_datap->db_ref > 1) {
283 			mblk_t	*mp1;
284 
285 			mp1 = copymsg(mp);
286 			if (mp1 == NULL) {
287 				BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
288 				ip_drop_input("ipIfStatsInDiscards - copymsg",
289 				    mp, ill);
290 				freemsg(mp);
291 				return (NULL);
292 			}
293 			freemsg(mp);
294 			mp = mp1;
295 			ip6h = (ip6_t *)mp->b_rptr;
296 			icmp6 = (icmp6_t *)(&mp->b_rptr[ip_hdr_length]);
297 		}
298 
299 		icmp6->icmp6_type = ICMP6_ECHO_REPLY;
300 		icmp_send_reply_v6(mp, ip6h, icmp6, ira);
301 		return (NULL);
302 
303 	case ICMP6_ECHO_REPLY:
304 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInEchoReplies);
305 		break;
306 
307 	case ND_ROUTER_SOLICIT:
308 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRouterSolicits);
309 		break;
310 
311 	case ND_ROUTER_ADVERT:
312 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRouterAdvertisements);
313 		break;
314 
315 	case ND_NEIGHBOR_SOLICIT:
316 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInNeighborSolicits);
317 		ndp_input(mp, ira);
318 		return (NULL);
319 
320 	case ND_NEIGHBOR_ADVERT:
321 		BUMP_MIB(ill->ill_icmp6_mib,
322 		    ipv6IfIcmpInNeighborAdvertisements);
323 		ndp_input(mp, ira);
324 		return (NULL);
325 
326 	case ND_REDIRECT:
327 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRedirects);
328 
329 		if (ipst->ips_ipv6_ignore_redirect)
330 			break;
331 
332 		/* We now allow a RAW socket to receive this. */
333 		interested = B_TRUE;
334 		break;
335 
336 	/*
337 	 * The next three icmp messages will be handled by MLD.
338 	 * Pass all valid MLD packets up to any process(es)
339 	 * listening on a raw ICMP socket.
340 	 */
341 	case MLD_LISTENER_QUERY:
342 	case MLD_LISTENER_REPORT:
343 	case MLD_LISTENER_REDUCTION:
344 		mp = mld_input(mp, ira);
345 		return (mp);
346 	default:
347 		break;
348 	}
349 	/*
350 	 * See if there is an ICMP client to avoid an extra copymsg/freemsg
351 	 * if there isn't one.
352 	 */
353 	if (ipst->ips_ipcl_proto_fanout_v6[IPPROTO_ICMPV6].connf_head != NULL) {
354 		/* If there is an ICMP client and we want one too, copy it. */
355 
356 		if (!interested) {
357 			/* Caller will deliver to RAW sockets */
358 			return (mp);
359 		}
360 		mp_ret = copymsg(mp);
361 		if (mp_ret == NULL) {
362 			BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
363 			ip_drop_input("ipIfStatsInDiscards - copymsg", mp, ill);
364 		}
365 	} else if (!interested) {
366 		/* Neither we nor raw sockets are interested. Drop packet now */
367 		freemsg(mp);
368 		return (NULL);
369 	}
370 
371 	/*
372 	 * ICMP error or redirect packet. Make sure we have enough of
373 	 * the header and that db_ref == 1 since we might end up modifying
374 	 * the packet.
375 	 */
376 	if (mp->b_cont != NULL) {
377 		if (ip_pullup(mp, -1, ira) == NULL) {
378 			BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
379 			ip_drop_input("ipIfStatsInDiscards - ip_pullup",
380 			    mp, ill);
381 			freemsg(mp);
382 			return (mp_ret);
383 		}
384 	}
385 
386 	if (mp->b_datap->db_ref > 1) {
387 		mblk_t	*mp1;
388 
389 		mp1 = copymsg(mp);
390 		if (mp1 == NULL) {
391 			BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
392 			ip_drop_input("ipIfStatsInDiscards - copymsg", mp, ill);
393 			freemsg(mp);
394 			return (mp_ret);
395 		}
396 		freemsg(mp);
397 		mp = mp1;
398 	}
399 
400 	/*
401 	 * In case mp has changed, verify the message before any further
402 	 * processes.
403 	 */
404 	ip6h = (ip6_t *)mp->b_rptr;
405 	icmp6 = (icmp6_t *)(&mp->b_rptr[ip_hdr_length]);
406 	if (!icmp_inbound_verify_v6(mp, icmp6, ira)) {
407 		freemsg(mp);
408 		return (mp_ret);
409 	}
410 
411 	switch (icmp6->icmp6_type) {
412 	case ND_REDIRECT:
413 		icmp_redirect_v6(mp, ip6h, (nd_redirect_t *)icmp6, ira);
414 		break;
415 	case ICMP6_PACKET_TOO_BIG:
416 		/* Update DCE and adjust MTU is icmp header if needed */
417 		icmp_inbound_too_big_v6(icmp6, ira);
418 		/* FALLTHROUGH */
419 	default:
420 		icmp_inbound_error_fanout_v6(mp, icmp6, ira);
421 		break;
422 	}
423 
424 	return (mp_ret);
425 }
426 
427 /*
428  * Send an ICMP echo reply.
429  * The caller has already updated the payload part of the packet.
430  * We handle the ICMP checksum, IP source address selection and feed
431  * the packet into ip_output_simple.
432  */
433 static void
icmp_send_reply_v6(mblk_t * mp,ip6_t * ip6h,icmp6_t * icmp6,ip_recv_attr_t * ira)434 icmp_send_reply_v6(mblk_t *mp, ip6_t *ip6h, icmp6_t *icmp6,
435     ip_recv_attr_t *ira)
436 {
437 	uint_t		ip_hdr_length = ira->ira_ip_hdr_length;
438 	ill_t		*ill = ira->ira_ill;
439 	ip_stack_t	*ipst = ill->ill_ipst;
440 	ip_xmit_attr_t	ixas;
441 	in6_addr_t	origsrc;
442 
443 	/*
444 	 * Remove any extension headers (do not reverse a source route)
445 	 * and clear the flow id (keep traffic class for now).
446 	 */
447 	if (ip_hdr_length != IPV6_HDR_LEN) {
448 		int	i;
449 
450 		for (i = 0; i < IPV6_HDR_LEN; i++) {
451 			mp->b_rptr[ip_hdr_length - i - 1] =
452 			    mp->b_rptr[IPV6_HDR_LEN - i - 1];
453 		}
454 		mp->b_rptr += (ip_hdr_length - IPV6_HDR_LEN);
455 		ip6h = (ip6_t *)mp->b_rptr;
456 		ip6h->ip6_nxt = IPPROTO_ICMPV6;
457 		i = ntohs(ip6h->ip6_plen);
458 		i -= (ip_hdr_length - IPV6_HDR_LEN);
459 		ip6h->ip6_plen = htons(i);
460 		ip_hdr_length = IPV6_HDR_LEN;
461 		ASSERT(ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN == msgdsize(mp));
462 	}
463 	ip6h->ip6_vcf &= ~IPV6_FLOWINFO_FLOWLABEL;
464 
465 	/* Reverse the source and destination addresses. */
466 	origsrc = ip6h->ip6_src;
467 	ip6h->ip6_src = ip6h->ip6_dst;
468 	ip6h->ip6_dst = origsrc;
469 
470 	/* set the hop limit */
471 	ip6h->ip6_hops = ipst->ips_ipv6_def_hops;
472 
473 	/*
474 	 * Prepare for checksum by putting icmp length in the icmp
475 	 * checksum field. The checksum is calculated in ip_output
476 	 */
477 	icmp6->icmp6_cksum = ip6h->ip6_plen;
478 
479 	bzero(&ixas, sizeof (ixas));
480 	ixas.ixa_flags = IXAF_BASIC_SIMPLE_V6;
481 	ixas.ixa_zoneid = ira->ira_zoneid;
482 	ixas.ixa_cred = kcred;
483 	ixas.ixa_cpid = NOPID;
484 	ixas.ixa_tsl = ira->ira_tsl;	/* Behave as a multi-level responder */
485 	ixas.ixa_ifindex = 0;
486 	ixas.ixa_ipst = ipst;
487 	ixas.ixa_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
488 
489 	if (!(ira->ira_flags & IRAF_IPSEC_SECURE)) {
490 		/*
491 		 * This packet should go out the same way as it
492 		 * came in i.e in clear, independent of the IPsec
493 		 * policy for transmitting packets.
494 		 */
495 		ixas.ixa_flags |= IXAF_NO_IPSEC;
496 	} else {
497 		if (!ipsec_in_to_out(ira, &ixas, mp, NULL, ip6h)) {
498 			BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
499 			/* Note: mp already consumed and ip_drop_packet done */
500 			return;
501 		}
502 	}
503 
504 	/* Was the destination (now source) link-local? Send out same group */
505 	if (IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src)) {
506 		ixas.ixa_flags |= IXAF_SCOPEID_SET;
507 		if (IS_UNDER_IPMP(ill))
508 			ixas.ixa_scopeid = ill_get_upper_ifindex(ill);
509 		else
510 			ixas.ixa_scopeid = ill->ill_phyint->phyint_ifindex;
511 	}
512 
513 	if (ira->ira_flags & IRAF_MULTIBROADCAST) {
514 		/*
515 		 * Not one or our addresses (IRE_LOCALs), thus we let
516 		 * ip_output_simple pick the source.
517 		 */
518 		ip6h->ip6_src = ipv6_all_zeros;
519 		ixas.ixa_flags |= IXAF_SET_SOURCE;
520 	}
521 
522 	/* Should we send using dce_pmtu? */
523 	if (ipst->ips_ipv6_icmp_return_pmtu)
524 		ixas.ixa_flags |= IXAF_PMTU_DISCOVERY;
525 
526 	(void) ip_output_simple(mp, &ixas);
527 	ixa_cleanup(&ixas);
528 
529 }
530 
531 /*
532  * Verify the ICMP messages for either for ICMP error or redirect packet.
533  * The caller should have fully pulled up the message. If it's a redirect
534  * packet, only basic checks on IP header will be done; otherwise, verify
535  * the packet by looking at the included ULP header.
536  *
537  * Called before icmp_inbound_error_fanout_v6 is called.
538  */
539 static boolean_t
icmp_inbound_verify_v6(mblk_t * mp,icmp6_t * icmp6,ip_recv_attr_t * ira)540 icmp_inbound_verify_v6(mblk_t *mp, icmp6_t *icmp6, ip_recv_attr_t *ira)
541 {
542 	ill_t		*ill = ira->ira_ill;
543 	uint16_t	hdr_length;
544 	uint8_t		*nexthdrp;
545 	uint8_t		nexthdr;
546 	ip_stack_t	*ipst = ill->ill_ipst;
547 	conn_t		*connp;
548 	ip6_t		*ip6h;	/* Inner header */
549 
550 	ip6h = (ip6_t *)&icmp6[1];
551 	if ((uchar_t *)ip6h + IPV6_HDR_LEN > mp->b_wptr)
552 		goto truncated;
553 
554 	if (icmp6->icmp6_type == ND_REDIRECT) {
555 		hdr_length = sizeof (nd_redirect_t);
556 	} else {
557 		if ((IPH_HDR_VERSION(ip6h) != IPV6_VERSION))
558 			goto discard_pkt;
559 		hdr_length = IPV6_HDR_LEN;
560 	}
561 
562 	if ((uchar_t *)ip6h + hdr_length > mp->b_wptr)
563 		goto truncated;
564 
565 	/*
566 	 * Stop here for ICMP_REDIRECT.
567 	 */
568 	if (icmp6->icmp6_type == ND_REDIRECT)
569 		return (B_TRUE);
570 
571 	/*
572 	 * ICMP errors only.
573 	 */
574 	if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_length, &nexthdrp))
575 		goto discard_pkt;
576 	nexthdr = *nexthdrp;
577 
578 	/* Try to pass the ICMP message to clients who need it */
579 	switch (nexthdr) {
580 	case IPPROTO_UDP:
581 		/*
582 		 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of
583 		 * transport header.
584 		 */
585 		if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN >
586 		    mp->b_wptr)
587 			goto truncated;
588 		break;
589 	case IPPROTO_TCP: {
590 		tcpha_t		*tcpha;
591 
592 		/*
593 		 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of
594 		 * transport header.
595 		 */
596 		if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN >
597 		    mp->b_wptr)
598 			goto truncated;
599 
600 		tcpha = (tcpha_t *)((uchar_t *)ip6h + hdr_length);
601 		/*
602 		 * With IPMP we need to match across group, which we do
603 		 * since we have the upper ill from ira_ill.
604 		 */
605 		connp = ipcl_tcp_lookup_reversed_ipv6(ip6h, tcpha, TCPS_LISTEN,
606 		    ill->ill_phyint->phyint_ifindex, ipst);
607 		if (connp == NULL)
608 			goto discard_pkt;
609 
610 		if ((connp->conn_verifyicmp != NULL) &&
611 		    !connp->conn_verifyicmp(connp, tcpha, NULL, icmp6, ira)) {
612 			CONN_DEC_REF(connp);
613 			goto discard_pkt;
614 		}
615 		CONN_DEC_REF(connp);
616 		break;
617 	}
618 	case IPPROTO_SCTP:
619 		/*
620 		 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of
621 		 * transport header.
622 		 */
623 		if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN >
624 		    mp->b_wptr)
625 			goto truncated;
626 		break;
627 	case IPPROTO_ESP:
628 	case IPPROTO_AH:
629 		break;
630 	case IPPROTO_ENCAP:
631 	case IPPROTO_IPV6: {
632 		/* Look for self-encapsulated packets that caused an error */
633 		ip6_t *in_ip6h;
634 
635 		in_ip6h = (ip6_t *)((uint8_t *)ip6h + hdr_length);
636 		if ((uint8_t *)in_ip6h + (nexthdr == IPPROTO_ENCAP ?
637 		    sizeof (ipha_t) : sizeof (ip6_t)) > mp->b_wptr)
638 			goto truncated;
639 		break;
640 	}
641 	default:
642 		break;
643 	}
644 
645 	return (B_TRUE);
646 
647 discard_pkt:
648 	/* Bogus ICMP error. */
649 	BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
650 	return (B_FALSE);
651 
652 truncated:
653 	/* We pulled up everthing already. Must be truncated */
654 	BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors);
655 	return (B_FALSE);
656 }
657 
658 /*
659  * Process received IPv6 ICMP Packet too big.
660  * The caller is responsible for validating the packet before passing it in
661  * and also to fanout the ICMP error to any matching transport conns. Assumes
662  * the message has been fully pulled up.
663  *
664  * Before getting here, the caller has called icmp_inbound_verify_v6()
665  * that should have verified with ULP to prevent undoing the changes we're
666  * going to make to DCE. For example, TCP might have verified that the packet
667  * which generated error is in the send window.
668  *
669  * In some cases modified this MTU in the ICMP header packet; the caller
670  * should pass to the matching ULP after this returns.
671  */
672 static void
icmp_inbound_too_big_v6(icmp6_t * icmp6,ip_recv_attr_t * ira)673 icmp_inbound_too_big_v6(icmp6_t *icmp6, ip_recv_attr_t *ira)
674 {
675 	uint32_t	mtu;
676 	dce_t		*dce;
677 	ill_t		*ill = ira->ira_ill;	/* Upper ill if IPMP */
678 	ip_stack_t	*ipst = ill->ill_ipst;
679 	int		old_max_frag;
680 	in6_addr_t	final_dst;
681 	ip6_t		*ip6h;	/* Inner IP header */
682 
683 	/* Caller has already pulled up everything. */
684 	ip6h = (ip6_t *)&icmp6[1];
685 	final_dst = ip_get_dst_v6(ip6h, NULL, NULL);
686 
687 	mtu = ntohl(icmp6->icmp6_mtu);
688 	if (mtu < IPV6_MIN_MTU) {
689 		/*
690 		 * RFC 8021 suggests to ignore messages where mtu is
691 		 * less than the IPv6 minimum.
692 		 */
693 		ip1dbg(("Received mtu less than IPv6 "
694 		    "min mtu %d: %d\n", IPV6_MIN_MTU, mtu));
695 		DTRACE_PROBE1(icmp6__too__small__mtu, uint32_t, mtu);
696 		return;
697 	}
698 
699 	/*
700 	 * For link local destinations matching simply on address is not
701 	 * sufficient. Same link local addresses for different ILL's is
702 	 * possible.
703 	 */
704 	if (IN6_IS_ADDR_LINKSCOPE(&final_dst)) {
705 		dce = dce_lookup_and_add_v6(&final_dst,
706 		    ill->ill_phyint->phyint_ifindex, ipst);
707 	} else {
708 		dce = dce_lookup_and_add_v6(&final_dst, 0, ipst);
709 	}
710 	if (dce == NULL) {
711 		/* Couldn't add a unique one - ENOMEM */
712 		if (ip_debug > 2) {
713 			/* ip1dbg */
714 			pr_addr_dbg("icmp_inbound_too_big_v6:"
715 			    "no dce for dst %s\n", AF_INET6,
716 			    &final_dst);
717 		}
718 		return;
719 	}
720 
721 	mutex_enter(&dce->dce_lock);
722 	if (dce->dce_flags & DCEF_PMTU)
723 		old_max_frag = dce->dce_pmtu;
724 	else if (IN6_IS_ADDR_MULTICAST(&final_dst))
725 		old_max_frag = ill->ill_mc_mtu;
726 	else
727 		old_max_frag = ill->ill_mtu;
728 
729 	ip1dbg(("Received mtu from router: %d\n", mtu));
730 	DTRACE_PROBE1(icmp6__received__mtu, uint32_t, mtu);
731 	dce->dce_pmtu = MIN(old_max_frag, mtu);
732 	icmp6->icmp6_mtu = htonl(dce->dce_pmtu);
733 
734 	/* We now have a PMTU for sure */
735 	dce->dce_flags |= DCEF_PMTU;
736 	dce->dce_last_change_time = TICK_TO_SEC(ddi_get_lbolt64());
737 
738 	mutex_exit(&dce->dce_lock);
739 	/*
740 	 * After dropping the lock the new value is visible to everyone.
741 	 * Then we bump the generation number so any cached values reinspect
742 	 * the dce_t.
743 	 */
744 	dce_increment_generation(dce);
745 	dce_refrele(dce);
746 }
747 
748 /*
749  * Fanout received ICMPv6 error packets to the transports.
750  * Assumes the IPv6 plus ICMPv6 headers have been pulled up but nothing else.
751  *
752  * The caller must have called icmp_inbound_verify_v6.
753  */
754 void
icmp_inbound_error_fanout_v6(mblk_t * mp,icmp6_t * icmp6,ip_recv_attr_t * ira)755 icmp_inbound_error_fanout_v6(mblk_t *mp, icmp6_t *icmp6, ip_recv_attr_t *ira)
756 {
757 	uint16_t	*up;	/* Pointer to ports in ULP header */
758 	uint32_t	ports;	/* reversed ports for fanout */
759 	ip6_t		rip6h;	/* With reversed addresses */
760 	ip6_t		*ip6h;	/* Inner IP header */
761 	uint16_t	hdr_length; /* Inner IP header length */
762 	uint8_t		*nexthdrp;
763 	uint8_t		nexthdr;
764 	tcpha_t		*tcpha;
765 	conn_t		*connp;
766 	ill_t		*ill = ira->ira_ill;	/* Upper in the case of IPMP */
767 	ip_stack_t	*ipst = ill->ill_ipst;
768 	ipsec_stack_t	*ipss = ipst->ips_netstack->netstack_ipsec;
769 
770 	/* Caller has already pulled up everything. */
771 	ip6h = (ip6_t *)&icmp6[1];
772 	ASSERT(mp->b_cont == NULL);
773 	ASSERT((uchar_t *)&ip6h[1] <= mp->b_wptr);
774 
775 	if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_length, &nexthdrp))
776 		goto drop_pkt;
777 	nexthdr = *nexthdrp;
778 	ira->ira_protocol = nexthdr;
779 
780 	/*
781 	 * We need a separate IP header with the source and destination
782 	 * addresses reversed to do fanout/classification because the ip6h in
783 	 * the ICMPv6 error is in the form we sent it out.
784 	 */
785 	rip6h.ip6_src = ip6h->ip6_dst;
786 	rip6h.ip6_dst = ip6h->ip6_src;
787 	rip6h.ip6_nxt = nexthdr;
788 
789 	/* Try to pass the ICMP message to clients who need it */
790 	switch (nexthdr) {
791 	case IPPROTO_UDP: {
792 		/* Attempt to find a client stream based on port. */
793 		up = (uint16_t *)((uchar_t *)ip6h + hdr_length);
794 
795 		/* Note that we send error to all matches. */
796 		ira->ira_flags |= IRAF_ICMP_ERROR;
797 		ip_fanout_udp_multi_v6(mp, &rip6h, up[0], up[1], ira);
798 		ira->ira_flags &= ~IRAF_ICMP_ERROR;
799 		return;
800 	}
801 	case IPPROTO_TCP: {
802 		/*
803 		 * Attempt to find a client stream based on port.
804 		 * Note that we do a reverse lookup since the header is
805 		 * in the form we sent it out.
806 		 */
807 		tcpha = (tcpha_t *)((uchar_t *)ip6h + hdr_length);
808 		/*
809 		 * With IPMP we need to match across group, which we do
810 		 * since we have the upper ill from ira_ill.
811 		 */
812 		connp = ipcl_tcp_lookup_reversed_ipv6(ip6h, tcpha,
813 		    TCPS_LISTEN, ill->ill_phyint->phyint_ifindex, ipst);
814 		if (connp == NULL) {
815 			goto drop_pkt;
816 		}
817 
818 		if (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) ||
819 		    (ira->ira_flags & IRAF_IPSEC_SECURE)) {
820 			mp = ipsec_check_inbound_policy(mp, connp,
821 			    NULL, ip6h, ira);
822 			if (mp == NULL) {
823 				BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
824 				/* Note that mp is NULL */
825 				ip_drop_input("ipIfStatsInDiscards", mp, ill);
826 				CONN_DEC_REF(connp);
827 				return;
828 			}
829 		}
830 
831 		ira->ira_flags |= IRAF_ICMP_ERROR;
832 		if (IPCL_IS_TCP(connp)) {
833 			SQUEUE_ENTER_ONE(connp->conn_sqp, mp,
834 			    connp->conn_recvicmp, connp, ira, SQ_FILL,
835 			    SQTAG_TCP6_INPUT_ICMP_ERR);
836 		} else {
837 			/* Not TCP; must be SOCK_RAW, IPPROTO_TCP */
838 			ill_t *rill = ira->ira_rill;
839 
840 			ira->ira_ill = ira->ira_rill = NULL;
841 			(connp->conn_recv)(connp, mp, NULL, ira);
842 			CONN_DEC_REF(connp);
843 			ira->ira_ill = ill;
844 			ira->ira_rill = rill;
845 		}
846 		ira->ira_flags &= ~IRAF_ICMP_ERROR;
847 		return;
848 
849 	}
850 	case IPPROTO_SCTP:
851 		up = (uint16_t *)((uchar_t *)ip6h + hdr_length);
852 		/* Find a SCTP client stream for this packet. */
853 		((uint16_t *)&ports)[0] = up[1];
854 		((uint16_t *)&ports)[1] = up[0];
855 
856 		ira->ira_flags |= IRAF_ICMP_ERROR;
857 		ip_fanout_sctp(mp, NULL, &rip6h, ports, ira);
858 		ira->ira_flags &= ~IRAF_ICMP_ERROR;
859 		return;
860 
861 	case IPPROTO_ESP:
862 	case IPPROTO_AH:
863 		if (!ipsec_loaded(ipss)) {
864 			ip_proto_not_sup(mp, ira);
865 			return;
866 		}
867 
868 		if (nexthdr == IPPROTO_ESP)
869 			mp = ipsecesp_icmp_error(mp, ira);
870 		else
871 			mp = ipsecah_icmp_error(mp, ira);
872 		if (mp == NULL)
873 			return;
874 
875 		/* Just in case ipsec didn't preserve the NULL b_cont */
876 		if (mp->b_cont != NULL) {
877 			if (!pullupmsg(mp, -1))
878 				goto drop_pkt;
879 		}
880 
881 		/*
882 		 * If succesful, the mp has been modified to not include
883 		 * the ESP/AH header so we can fanout to the ULP's icmp
884 		 * error handler.
885 		 */
886 		if (mp->b_wptr - mp->b_rptr < IPV6_HDR_LEN)
887 			goto drop_pkt;
888 
889 		ip6h = (ip6_t *)mp->b_rptr;
890 		/* Don't call hdr_length_v6() unless you have to. */
891 		if (ip6h->ip6_nxt != IPPROTO_ICMPV6)
892 			hdr_length = ip_hdr_length_v6(mp, ip6h);
893 		else
894 			hdr_length = IPV6_HDR_LEN;
895 
896 		/* Verify the modified message before any further processes. */
897 		icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]);
898 		if (!icmp_inbound_verify_v6(mp, icmp6, ira)) {
899 			freemsg(mp);
900 			return;
901 		}
902 
903 		icmp_inbound_error_fanout_v6(mp, icmp6, ira);
904 		return;
905 
906 	case IPPROTO_IPV6: {
907 		/* Look for self-encapsulated packets that caused an error */
908 		ip6_t *in_ip6h;
909 
910 		in_ip6h = (ip6_t *)((uint8_t *)ip6h + hdr_length);
911 
912 		if (IN6_ARE_ADDR_EQUAL(&in_ip6h->ip6_src, &ip6h->ip6_src) &&
913 		    IN6_ARE_ADDR_EQUAL(&in_ip6h->ip6_dst, &ip6h->ip6_dst)) {
914 			/*
915 			 * Self-encapsulated case. As in the ipv4 case,
916 			 * we need to strip the 2nd IP header. Since mp
917 			 * is already pulled-up, we can simply bcopy
918 			 * the 3rd header + data over the 2nd header.
919 			 */
920 			uint16_t unused_len;
921 
922 			/*
923 			 * Make sure we don't do recursion more than once.
924 			 */
925 			if (!ip_hdr_length_nexthdr_v6(mp, in_ip6h,
926 			    &unused_len, &nexthdrp) ||
927 			    *nexthdrp == IPPROTO_IPV6) {
928 				goto drop_pkt;
929 			}
930 
931 			/*
932 			 * Copy the 3rd header + remaining data on top
933 			 * of the 2nd header.
934 			 */
935 			bcopy(in_ip6h, ip6h, mp->b_wptr - (uchar_t *)in_ip6h);
936 
937 			/*
938 			 * Subtract length of the 2nd header.
939 			 */
940 			mp->b_wptr -= hdr_length;
941 
942 			ip6h = (ip6_t *)mp->b_rptr;
943 			/* Don't call hdr_length_v6() unless you have to. */
944 			if (ip6h->ip6_nxt != IPPROTO_ICMPV6)
945 				hdr_length = ip_hdr_length_v6(mp, ip6h);
946 			else
947 				hdr_length = IPV6_HDR_LEN;
948 
949 			/*
950 			 * Verify the modified message before any further
951 			 * processes.
952 			 */
953 			icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]);
954 			if (!icmp_inbound_verify_v6(mp, icmp6, ira)) {
955 				freemsg(mp);
956 				return;
957 			}
958 
959 			/*
960 			 * Now recurse, and see what I _really_ should be
961 			 * doing here.
962 			 */
963 			icmp_inbound_error_fanout_v6(mp, icmp6, ira);
964 			return;
965 		}
966 	}
967 	/* FALLTHROUGH */
968 	case IPPROTO_ENCAP:
969 		if ((connp = ipcl_iptun_classify_v6(&rip6h.ip6_src,
970 		    &rip6h.ip6_dst, ipst)) != NULL) {
971 			ira->ira_flags |= IRAF_ICMP_ERROR;
972 			connp->conn_recvicmp(connp, mp, NULL, ira);
973 			CONN_DEC_REF(connp);
974 			ira->ira_flags &= ~IRAF_ICMP_ERROR;
975 			return;
976 		}
977 		/*
978 		 * No IP tunnel is interested, fallthrough and see
979 		 * if a raw socket will want it.
980 		 */
981 		/* FALLTHROUGH */
982 	default:
983 		ira->ira_flags |= IRAF_ICMP_ERROR;
984 		ASSERT(ira->ira_protocol == nexthdr);
985 		ip_fanout_proto_v6(mp, &rip6h, ira);
986 		ira->ira_flags &= ~IRAF_ICMP_ERROR;
987 		return;
988 	}
989 	/* NOTREACHED */
990 drop_pkt:
991 	BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors);
992 	ip1dbg(("icmp_inbound_error_fanout_v6: drop pkt\n"));
993 	freemsg(mp);
994 }
995 
996 /*
997  * Process received IPv6 ICMP Redirect messages.
998  * Assumes the caller has verified that the headers are in the pulled up mblk.
999  * Consumes mp.
1000  */
1001 /* ARGSUSED */
1002 static void
icmp_redirect_v6(mblk_t * mp,ip6_t * ip6h,nd_redirect_t * rd,ip_recv_attr_t * ira)1003 icmp_redirect_v6(mblk_t *mp, ip6_t *ip6h, nd_redirect_t *rd,
1004     ip_recv_attr_t *ira)
1005 {
1006 	ire_t		*ire, *nire;
1007 	ire_t		*prev_ire = NULL;
1008 	ire_t		*redir_ire;
1009 	in6_addr_t	*src, *dst, *gateway;
1010 	nd_opt_hdr_t	*opt;
1011 	nce_t		*nce;
1012 	int		ncec_flags = 0;
1013 	int		err = 0;
1014 	boolean_t	redirect_to_router = B_FALSE;
1015 	int		len;
1016 	int		optlen;
1017 	ill_t		*ill = ira->ira_rill;
1018 	ill_t		*rill = ira->ira_rill;
1019 	ip_stack_t	*ipst = ill->ill_ipst;
1020 
1021 	/*
1022 	 * Since ira_ill is where the IRE_LOCAL was hosted we use ira_rill
1023 	 * and make it be the IPMP upper so avoid being confused by a packet
1024 	 * addressed to a unicast address on a different ill.
1025 	 */
1026 	if (IS_UNDER_IPMP(rill)) {
1027 		rill = ipmp_ill_hold_ipmp_ill(rill);
1028 		if (rill == NULL) {
1029 			BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects);
1030 			ip_drop_input("ipv6IfIcmpInBadRedirects - IPMP ill",
1031 			    mp, ill);
1032 			freemsg(mp);
1033 			return;
1034 		}
1035 		ASSERT(rill != ira->ira_rill);
1036 	}
1037 
1038 	len = mp->b_wptr - (uchar_t *)rd;
1039 	src = &ip6h->ip6_src;
1040 	dst = &rd->nd_rd_dst;
1041 	gateway = &rd->nd_rd_target;
1042 
1043 	/* Verify if it is a valid redirect */
1044 	if (!IN6_IS_ADDR_LINKLOCAL(src) ||
1045 	    (ip6h->ip6_hops != IPV6_MAX_HOPS) ||
1046 	    (rd->nd_rd_code != 0) ||
1047 	    (len < sizeof (nd_redirect_t)) ||
1048 	    (IN6_IS_ADDR_V4MAPPED(dst)) ||
1049 	    (IN6_IS_ADDR_MULTICAST(dst))) {
1050 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects);
1051 		ip_drop_input("ipv6IfIcmpInBadRedirects - addr/len", mp, ill);
1052 		goto fail_redirect;
1053 	}
1054 
1055 	if (!(IN6_IS_ADDR_LINKLOCAL(gateway) ||
1056 	    IN6_ARE_ADDR_EQUAL(gateway, dst))) {
1057 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects);
1058 		ip_drop_input("ipv6IfIcmpInBadRedirects - bad gateway",
1059 		    mp, ill);
1060 		goto fail_redirect;
1061 	}
1062 
1063 	optlen = len - sizeof (nd_redirect_t);
1064 	if (optlen != 0) {
1065 		if (!ndp_verify_optlen((nd_opt_hdr_t *)&rd[1], optlen)) {
1066 			BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects);
1067 			ip_drop_input("ipv6IfIcmpInBadRedirects - options",
1068 			    mp, ill);
1069 			goto fail_redirect;
1070 		}
1071 	}
1072 
1073 	if (!IN6_ARE_ADDR_EQUAL(gateway, dst)) {
1074 		redirect_to_router = B_TRUE;
1075 		ncec_flags |= NCE_F_ISROUTER;
1076 	} else {
1077 		gateway = dst;	/* Add nce for dst */
1078 	}
1079 
1080 
1081 	/*
1082 	 * Verify that the IP source address of the redirect is
1083 	 * the same as the current first-hop router for the specified
1084 	 * ICMP destination address.
1085 	 * Also, Make sure we had a route for the dest in question and
1086 	 * that route was pointing to the old gateway (the source of the
1087 	 * redirect packet.)
1088 	 * We do longest match and then compare ire_gateway_addr_v6 below.
1089 	 */
1090 	prev_ire = ire_ftable_lookup_v6(dst, 0, 0, 0, rill,
1091 	    ALL_ZONES, NULL, MATCH_IRE_ILL, 0, ipst, NULL);
1092 
1093 	/*
1094 	 * Check that
1095 	 *	the redirect was not from ourselves
1096 	 *	old gateway is still directly reachable
1097 	 */
1098 	if (prev_ire == NULL ||
1099 	    (prev_ire->ire_type & (IRE_LOCAL|IRE_LOOPBACK)) ||
1100 	    (prev_ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) ||
1101 	    !IN6_ARE_ADDR_EQUAL(src, &prev_ire->ire_gateway_addr_v6)) {
1102 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects);
1103 		ip_drop_input("ipv6IfIcmpInBadRedirects - ire", mp, ill);
1104 		goto fail_redirect;
1105 	}
1106 
1107 	ASSERT(prev_ire->ire_ill != NULL);
1108 	if (prev_ire->ire_ill->ill_flags & ILLF_NONUD)
1109 		ncec_flags |= NCE_F_NONUD;
1110 
1111 	opt = (nd_opt_hdr_t *)&rd[1];
1112 	opt = ndp_get_option(opt, optlen, ND_OPT_TARGET_LINKADDR);
1113 	if (opt != NULL) {
1114 		err = nce_lookup_then_add_v6(rill,
1115 		    (uchar_t *)&opt[1],		/* Link layer address */
1116 		    rill->ill_phys_addr_length,
1117 		    gateway, ncec_flags, ND_STALE, &nce);
1118 		switch (err) {
1119 		case 0:
1120 			nce_refrele(nce);
1121 			break;
1122 		case EEXIST:
1123 			/*
1124 			 * Check to see if link layer address has changed and
1125 			 * process the ncec_state accordingly.
1126 			 */
1127 			nce_process(nce->nce_common,
1128 			    (uchar_t *)&opt[1], 0, B_FALSE);
1129 			nce_refrele(nce);
1130 			break;
1131 		default:
1132 			ip1dbg(("icmp_redirect_v6: NCE create failed %d\n",
1133 			    err));
1134 			goto fail_redirect;
1135 		}
1136 	}
1137 	if (redirect_to_router) {
1138 		ASSERT(IN6_IS_ADDR_LINKLOCAL(gateway));
1139 
1140 		/*
1141 		 * Create a Route Association.  This will allow us to remember
1142 		 * a router told us to use the particular gateway.
1143 		 */
1144 		ire = ire_create_v6(
1145 		    dst,
1146 		    &ipv6_all_ones,		/* mask */
1147 		    gateway,			/* gateway addr */
1148 		    IRE_HOST,
1149 		    prev_ire->ire_ill,
1150 		    ALL_ZONES,
1151 		    (RTF_DYNAMIC | RTF_GATEWAY | RTF_HOST),
1152 		    NULL,
1153 		    ipst);
1154 	} else {
1155 		ipif_t *ipif;
1156 		in6_addr_t gw;
1157 
1158 		/*
1159 		 * Just create an on link entry, i.e. interface route.
1160 		 * The gateway field is our link-local on the ill.
1161 		 */
1162 		mutex_enter(&rill->ill_lock);
1163 		for (ipif = rill->ill_ipif; ipif != NULL;
1164 		    ipif = ipif->ipif_next) {
1165 			if (!(ipif->ipif_state_flags & IPIF_CONDEMNED) &&
1166 			    IN6_IS_ADDR_LINKLOCAL(&ipif->ipif_v6lcl_addr))
1167 				break;
1168 		}
1169 		if (ipif == NULL) {
1170 			/* We have no link-local address! */
1171 			mutex_exit(&rill->ill_lock);
1172 			goto fail_redirect;
1173 		}
1174 		gw = ipif->ipif_v6lcl_addr;
1175 		mutex_exit(&rill->ill_lock);
1176 
1177 		ire = ire_create_v6(
1178 		    dst,				/* gateway == dst */
1179 		    &ipv6_all_ones,			/* mask */
1180 		    &gw,				/* gateway addr */
1181 		    rill->ill_net_type,			/* IF_[NO]RESOLVER */
1182 		    prev_ire->ire_ill,
1183 		    ALL_ZONES,
1184 		    (RTF_DYNAMIC | RTF_HOST),
1185 		    NULL,
1186 		    ipst);
1187 	}
1188 
1189 	if (ire == NULL)
1190 		goto fail_redirect;
1191 
1192 	nire = ire_add(ire);
1193 	/* Check if it was a duplicate entry */
1194 	if (nire != NULL && nire != ire) {
1195 		ASSERT(nire->ire_identical_ref > 1);
1196 		ire_delete(nire);
1197 		ire_refrele(nire);
1198 		nire = NULL;
1199 	}
1200 	ire = nire;
1201 	if (ire != NULL) {
1202 		ire_refrele(ire);		/* Held in ire_add */
1203 
1204 		/* tell routing sockets that we received a redirect */
1205 		ip_rts_change_v6(RTM_REDIRECT,
1206 		    &rd->nd_rd_dst,
1207 		    &rd->nd_rd_target,
1208 		    &ipv6_all_ones, 0, src,
1209 		    (RTF_DYNAMIC | RTF_GATEWAY | RTF_HOST), 0,
1210 		    (RTA_DST | RTA_GATEWAY | RTA_NETMASK | RTA_AUTHOR), ipst);
1211 
1212 		/*
1213 		 * Delete any existing IRE_HOST type ires for this destination.
1214 		 * This together with the added IRE has the effect of
1215 		 * modifying an existing redirect.
1216 		 */
1217 		redir_ire = ire_ftable_lookup_v6(dst, 0, src, IRE_HOST,
1218 		    prev_ire->ire_ill, ALL_ZONES, NULL,
1219 		    (MATCH_IRE_GW | MATCH_IRE_TYPE | MATCH_IRE_ILL), 0, ipst,
1220 		    NULL);
1221 
1222 		if (redir_ire != NULL) {
1223 			if (redir_ire->ire_flags & RTF_DYNAMIC)
1224 				ire_delete(redir_ire);
1225 			ire_refrele(redir_ire);
1226 		}
1227 	}
1228 
1229 	ire_refrele(prev_ire);
1230 	prev_ire = NULL;
1231 
1232 fail_redirect:
1233 	if (prev_ire != NULL)
1234 		ire_refrele(prev_ire);
1235 	freemsg(mp);
1236 	if (rill != ira->ira_rill)
1237 		ill_refrele(rill);
1238 }
1239 
1240 /*
1241  * Build and ship an IPv6 ICMP message using the packet data in mp,
1242  * and the ICMP header pointed to by "stuff".  (May be called as
1243  * writer.)
1244  * Note: assumes that icmp_pkt_err_ok_v6 has been called to
1245  * verify that an icmp error packet can be sent.
1246  *
1247  * If v6src_ptr is set use it as a source. Otherwise select a reasonable
1248  * source address (see above function).
1249  */
1250 static void
icmp_pkt_v6(mblk_t * mp,void * stuff,size_t len,const in6_addr_t * v6src_ptr,ip_recv_attr_t * ira)1251 icmp_pkt_v6(mblk_t *mp, void *stuff, size_t len,
1252     const in6_addr_t *v6src_ptr, ip_recv_attr_t *ira)
1253 {
1254 	ip6_t		*ip6h;
1255 	in6_addr_t	v6dst;
1256 	size_t		len_needed;
1257 	size_t		msg_len;
1258 	mblk_t		*mp1;
1259 	icmp6_t		*icmp6;
1260 	in6_addr_t	v6src;
1261 	ill_t		*ill = ira->ira_ill;
1262 	ip_stack_t	*ipst = ill->ill_ipst;
1263 	ip_xmit_attr_t	ixas;
1264 
1265 	ip6h = (ip6_t *)mp->b_rptr;
1266 
1267 	bzero(&ixas, sizeof (ixas));
1268 	ixas.ixa_flags = IXAF_BASIC_SIMPLE_V6;
1269 	ixas.ixa_zoneid = ira->ira_zoneid;
1270 	ixas.ixa_ifindex = 0;
1271 	ixas.ixa_ipst = ipst;
1272 	ixas.ixa_cred = kcred;
1273 	ixas.ixa_cpid = NOPID;
1274 	ixas.ixa_tsl = ira->ira_tsl;	/* Behave as a multi-level responder */
1275 	ixas.ixa_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
1276 
1277 	/*
1278 	 * If the source of the original packet was link-local, then
1279 	 * make sure we send on the same ill (group) as we received it on.
1280 	 */
1281 	if (IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src)) {
1282 		ixas.ixa_flags |= IXAF_SCOPEID_SET;
1283 		if (IS_UNDER_IPMP(ill))
1284 			ixas.ixa_scopeid = ill_get_upper_ifindex(ill);
1285 		else
1286 			ixas.ixa_scopeid = ill->ill_phyint->phyint_ifindex;
1287 	}
1288 
1289 	if (ira->ira_flags & IRAF_IPSEC_SECURE) {
1290 		/*
1291 		 * Apply IPsec based on how IPsec was applied to
1292 		 * the packet that had the error.
1293 		 *
1294 		 * If it was an outbound packet that caused the ICMP
1295 		 * error, then the caller will have setup the IRA
1296 		 * appropriately.
1297 		 */
1298 		if (!ipsec_in_to_out(ira, &ixas, mp, NULL, ip6h)) {
1299 			BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsOutDiscards);
1300 			/* Note: mp already consumed and ip_drop_packet done */
1301 			return;
1302 		}
1303 	} else {
1304 		/*
1305 		 * This is in clear. The icmp message we are building
1306 		 * here should go out in clear, independent of our policy.
1307 		 */
1308 		ixas.ixa_flags |= IXAF_NO_IPSEC;
1309 	}
1310 
1311 	/*
1312 	 * If the caller specified the source we use that.
1313 	 * Otherwise, if the packet was for one of our unicast addresses, make
1314 	 * sure we respond with that as the source. Otherwise
1315 	 * have ip_output_simple pick the source address.
1316 	 */
1317 	if (v6src_ptr != NULL) {
1318 		v6src = *v6src_ptr;
1319 	} else {
1320 		ire_t *ire;
1321 		uint_t match_flags = MATCH_IRE_TYPE | MATCH_IRE_ZONEONLY;
1322 
1323 		if (IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src) ||
1324 		    IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst))
1325 			match_flags |= MATCH_IRE_ILL;
1326 
1327 		ire = ire_ftable_lookup_v6(&ip6h->ip6_dst, 0, 0,
1328 		    (IRE_LOCAL|IRE_LOOPBACK), ill, ira->ira_zoneid, NULL,
1329 		    match_flags, 0, ipst, NULL);
1330 		if (ire != NULL) {
1331 			v6src = ip6h->ip6_dst;
1332 			ire_refrele(ire);
1333 		} else {
1334 			v6src = ipv6_all_zeros;
1335 			ixas.ixa_flags |= IXAF_SET_SOURCE;
1336 		}
1337 	}
1338 	v6dst = ip6h->ip6_src;
1339 	len_needed = ipst->ips_ipv6_icmp_return - IPV6_HDR_LEN - len;
1340 	msg_len = msgdsize(mp);
1341 	if (msg_len > len_needed) {
1342 		if (!adjmsg(mp, len_needed - msg_len)) {
1343 			BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutErrors);
1344 			freemsg(mp);
1345 			return;
1346 		}
1347 		msg_len = len_needed;
1348 	}
1349 	mp1 = allocb(IPV6_HDR_LEN + len, BPRI_MED);
1350 	if (mp1 == NULL) {
1351 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutErrors);
1352 		freemsg(mp);
1353 		return;
1354 	}
1355 	mp1->b_cont = mp;
1356 	mp = mp1;
1357 
1358 	/*
1359 	 * Set IXAF_TRUSTED_ICMP so we can let the ICMP messages this
1360 	 * node generates be accepted in peace by all on-host destinations.
1361 	 * If we do NOT assume that all on-host destinations trust
1362 	 * self-generated ICMP messages, then rework here, ip6.c, and spd.c.
1363 	 * (Look for IXAF_TRUSTED_ICMP).
1364 	 */
1365 	ixas.ixa_flags |= IXAF_TRUSTED_ICMP;
1366 
1367 	ip6h = (ip6_t *)mp->b_rptr;
1368 	mp1->b_wptr = (uchar_t *)ip6h + (IPV6_HDR_LEN + len);
1369 
1370 	ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW;
1371 	ip6h->ip6_nxt = IPPROTO_ICMPV6;
1372 	ip6h->ip6_hops = ipst->ips_ipv6_def_hops;
1373 	ip6h->ip6_dst = v6dst;
1374 	ip6h->ip6_src = v6src;
1375 	msg_len += IPV6_HDR_LEN + len;
1376 	if (msg_len > IP_MAXPACKET + IPV6_HDR_LEN) {
1377 		(void) adjmsg(mp, IP_MAXPACKET + IPV6_HDR_LEN - msg_len);
1378 		msg_len = IP_MAXPACKET + IPV6_HDR_LEN;
1379 	}
1380 	ip6h->ip6_plen = htons((uint16_t)(msgdsize(mp) - IPV6_HDR_LEN));
1381 	icmp6 = (icmp6_t *)&ip6h[1];
1382 	bcopy(stuff, (char *)icmp6, len);
1383 	/*
1384 	 * Prepare for checksum by putting icmp length in the icmp
1385 	 * checksum field. The checksum is calculated in ip_output_wire_v6.
1386 	 */
1387 	icmp6->icmp6_cksum = ip6h->ip6_plen;
1388 	if (icmp6->icmp6_type == ND_REDIRECT) {
1389 		ip6h->ip6_hops = IPV6_MAX_HOPS;
1390 	}
1391 
1392 	(void) ip_output_simple(mp, &ixas);
1393 	ixa_cleanup(&ixas);
1394 }
1395 
1396 /*
1397  * Update the output mib when ICMPv6 packets are sent.
1398  */
1399 void
icmp_update_out_mib_v6(ill_t * ill,icmp6_t * icmp6)1400 icmp_update_out_mib_v6(ill_t *ill, icmp6_t *icmp6)
1401 {
1402 	BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutMsgs);
1403 
1404 	switch (icmp6->icmp6_type) {
1405 	case ICMP6_DST_UNREACH:
1406 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutDestUnreachs);
1407 		if (icmp6->icmp6_code == ICMP6_DST_UNREACH_ADMIN)
1408 			BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutAdminProhibs);
1409 		break;
1410 
1411 	case ICMP6_TIME_EXCEEDED:
1412 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutTimeExcds);
1413 		break;
1414 
1415 	case ICMP6_PARAM_PROB:
1416 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutParmProblems);
1417 		break;
1418 
1419 	case ICMP6_PACKET_TOO_BIG:
1420 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutPktTooBigs);
1421 		break;
1422 
1423 	case ICMP6_ECHO_REQUEST:
1424 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutEchos);
1425 		break;
1426 
1427 	case ICMP6_ECHO_REPLY:
1428 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutEchoReplies);
1429 		break;
1430 
1431 	case ND_ROUTER_SOLICIT:
1432 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRouterSolicits);
1433 		break;
1434 
1435 	case ND_ROUTER_ADVERT:
1436 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRouterAdvertisements);
1437 		break;
1438 
1439 	case ND_NEIGHBOR_SOLICIT:
1440 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutNeighborSolicits);
1441 		break;
1442 
1443 	case ND_NEIGHBOR_ADVERT:
1444 		BUMP_MIB(ill->ill_icmp6_mib,
1445 		    ipv6IfIcmpOutNeighborAdvertisements);
1446 		break;
1447 
1448 	case ND_REDIRECT:
1449 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRedirects);
1450 		break;
1451 
1452 	case MLD_LISTENER_QUERY:
1453 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembQueries);
1454 		break;
1455 
1456 	case MLD_LISTENER_REPORT:
1457 	case MLD_V2_LISTENER_REPORT:
1458 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembResponses);
1459 		break;
1460 
1461 	case MLD_LISTENER_REDUCTION:
1462 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembReductions);
1463 		break;
1464 	}
1465 }
1466 
1467 /*
1468  * Check if it is ok to send an ICMPv6 error packet in
1469  * response to the IP packet in mp.
1470  * Free the message and return null if no
1471  * ICMP error packet should be sent.
1472  */
1473 static mblk_t *
icmp_pkt_err_ok_v6(mblk_t * mp,boolean_t mcast_ok,ip_recv_attr_t * ira)1474 icmp_pkt_err_ok_v6(mblk_t *mp, boolean_t mcast_ok, ip_recv_attr_t *ira)
1475 {
1476 	ill_t		*ill = ira->ira_ill;
1477 	ip_stack_t	*ipst = ill->ill_ipst;
1478 	boolean_t	llbcast;
1479 	ip6_t		*ip6h;
1480 
1481 	if (!mp)
1482 		return (NULL);
1483 
1484 	/* We view multicast and broadcast as the same.. */
1485 	llbcast = (ira->ira_flags &
1486 	    (IRAF_L2DST_MULTICAST|IRAF_L2DST_BROADCAST)) != 0;
1487 	ip6h = (ip6_t *)mp->b_rptr;
1488 
1489 	/* Check if source address uniquely identifies the host */
1490 
1491 	if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_src) ||
1492 	    IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_src) ||
1493 	    IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) {
1494 		freemsg(mp);
1495 		return (NULL);
1496 	}
1497 
1498 	if (ip6h->ip6_nxt == IPPROTO_ICMPV6) {
1499 		size_t	len_needed = IPV6_HDR_LEN + ICMP6_MINLEN;
1500 		icmp6_t		*icmp6;
1501 
1502 		if (mp->b_wptr - mp->b_rptr < len_needed) {
1503 			if (!pullupmsg(mp, len_needed)) {
1504 				BUMP_MIB(ill->ill_icmp6_mib,
1505 				    ipv6IfIcmpInErrors);
1506 				freemsg(mp);
1507 				return (NULL);
1508 			}
1509 			ip6h = (ip6_t *)mp->b_rptr;
1510 		}
1511 		icmp6 = (icmp6_t *)&ip6h[1];
1512 		/* Explicitly do not generate errors in response to redirects */
1513 		if (ICMP6_IS_ERROR(icmp6->icmp6_type) ||
1514 		    icmp6->icmp6_type == ND_REDIRECT) {
1515 			freemsg(mp);
1516 			return (NULL);
1517 		}
1518 	}
1519 	/*
1520 	 * Check that the destination is not multicast and that the packet
1521 	 * was not sent on link layer broadcast or multicast.  (Exception
1522 	 * is Packet too big message as per the draft - when mcast_ok is set.)
1523 	 */
1524 	if (!mcast_ok &&
1525 	    (llbcast || IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst))) {
1526 		freemsg(mp);
1527 		return (NULL);
1528 	}
1529 	/*
1530 	 * If this is a labeled system, then check to see if we're allowed to
1531 	 * send a response to this particular sender.  If not, then just drop.
1532 	 */
1533 	if (is_system_labeled() && !tsol_can_reply_error(mp, ira)) {
1534 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutErrors);
1535 		freemsg(mp);
1536 		return (NULL);
1537 	}
1538 
1539 	if (icmp_err_rate_limit(ipst)) {
1540 		/*
1541 		 * Only send ICMP error packets every so often.
1542 		 * This should be done on a per port/source basis,
1543 		 * but for now this will suffice.
1544 		 */
1545 		freemsg(mp);
1546 		return (NULL);
1547 	}
1548 	return (mp);
1549 }
1550 
1551 /*
1552  * Called when a packet was sent out the same link that it arrived on.
1553  * Check if it is ok to send a redirect and then send it.
1554  */
1555 void
ip_send_potential_redirect_v6(mblk_t * mp,ip6_t * ip6h,ire_t * ire,ip_recv_attr_t * ira)1556 ip_send_potential_redirect_v6(mblk_t *mp, ip6_t *ip6h, ire_t *ire,
1557     ip_recv_attr_t *ira)
1558 {
1559 	ill_t		*ill = ira->ira_ill;
1560 	ip_stack_t	*ipst = ill->ill_ipst;
1561 	in6_addr_t	*v6targ;
1562 	ire_t		*src_ire_v6 = NULL;
1563 	mblk_t		*mp1;
1564 	ire_t		*nhop_ire = NULL;
1565 
1566 	/*
1567 	 * Don't send a redirect when forwarding a source
1568 	 * routed packet.
1569 	 */
1570 	if (ip_source_routed_v6(ip6h, mp, ipst))
1571 		return;
1572 
1573 	if (ire->ire_type & IRE_ONLINK) {
1574 		/* Target is directly connected */
1575 		v6targ = &ip6h->ip6_dst;
1576 	} else {
1577 		/* Determine the most specific IRE used to send the packets */
1578 		nhop_ire = ire_nexthop(ire);
1579 		if (nhop_ire == NULL)
1580 			return;
1581 
1582 		/*
1583 		 * We won't send redirects to a router
1584 		 * that doesn't have a link local
1585 		 * address, but will forward.
1586 		 */
1587 		if (!IN6_IS_ADDR_LINKLOCAL(&nhop_ire->ire_addr_v6)) {
1588 			BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors);
1589 			ip_drop_input("ipIfStatsInAddrErrors", mp, ill);
1590 			ire_refrele(nhop_ire);
1591 			return;
1592 		}
1593 		v6targ = &nhop_ire->ire_addr_v6;
1594 	}
1595 	src_ire_v6 = ire_ftable_lookup_v6(&ip6h->ip6_src,
1596 	    NULL, NULL, IRE_INTERFACE, ire->ire_ill, ALL_ZONES, NULL,
1597 	    MATCH_IRE_ILL | MATCH_IRE_TYPE, 0, ipst, NULL);
1598 
1599 	if (src_ire_v6 == NULL) {
1600 		if (nhop_ire != NULL)
1601 			ire_refrele(nhop_ire);
1602 		return;
1603 	}
1604 
1605 	/*
1606 	 * The source is directly connected.
1607 	 */
1608 	mp1 = copymsg(mp);
1609 	if (mp1 != NULL)
1610 		icmp_send_redirect_v6(mp1, v6targ, &ip6h->ip6_dst, ira);
1611 
1612 	if (nhop_ire != NULL)
1613 		ire_refrele(nhop_ire);
1614 	ire_refrele(src_ire_v6);
1615 }
1616 
1617 /*
1618  * Generate an ICMPv6 redirect message.
1619  * Include target link layer address option if it exits.
1620  * Always include redirect header.
1621  */
1622 static void
icmp_send_redirect_v6(mblk_t * mp,in6_addr_t * targetp,in6_addr_t * dest,ip_recv_attr_t * ira)1623 icmp_send_redirect_v6(mblk_t *mp, in6_addr_t *targetp, in6_addr_t *dest,
1624     ip_recv_attr_t *ira)
1625 {
1626 	nd_redirect_t	*rd;
1627 	nd_opt_rd_hdr_t	*rdh;
1628 	uchar_t		*buf;
1629 	ncec_t		*ncec = NULL;
1630 	nd_opt_hdr_t	*opt;
1631 	int		len;
1632 	int		ll_opt_len = 0;
1633 	int		max_redir_hdr_data_len;
1634 	int		pkt_len;
1635 	in6_addr_t	*srcp;
1636 	ill_t		*ill;
1637 	boolean_t	need_refrele;
1638 	ip_stack_t	*ipst = ira->ira_ill->ill_ipst;
1639 
1640 	mp = icmp_pkt_err_ok_v6(mp, B_FALSE, ira);
1641 	if (mp == NULL)
1642 		return;
1643 
1644 	if (IS_UNDER_IPMP(ira->ira_ill)) {
1645 		ill = ipmp_ill_hold_ipmp_ill(ira->ira_ill);
1646 		if (ill == NULL) {
1647 			ill = ira->ira_ill;
1648 			BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects);
1649 			ip_drop_output("no IPMP ill for sending redirect",
1650 			    mp, ill);
1651 			freemsg(mp);
1652 			return;
1653 		}
1654 		need_refrele = B_TRUE;
1655 	} else {
1656 		ill = ira->ira_ill;
1657 		need_refrele = B_FALSE;
1658 	}
1659 
1660 	ncec = ncec_lookup_illgrp_v6(ill, targetp);
1661 	if (ncec != NULL && ncec->ncec_state != ND_INCOMPLETE &&
1662 	    ncec->ncec_lladdr != NULL) {
1663 		ll_opt_len = (sizeof (nd_opt_hdr_t) +
1664 		    ill->ill_phys_addr_length + 7)/8 * 8;
1665 	}
1666 	len = sizeof (nd_redirect_t) + sizeof (nd_opt_rd_hdr_t) + ll_opt_len;
1667 	ASSERT(len % 4 == 0);
1668 	buf = kmem_alloc(len, KM_NOSLEEP);
1669 	if (buf == NULL) {
1670 		if (ncec != NULL)
1671 			ncec_refrele(ncec);
1672 		if (need_refrele)
1673 			ill_refrele(ill);
1674 		freemsg(mp);
1675 		return;
1676 	}
1677 
1678 	rd = (nd_redirect_t *)buf;
1679 	rd->nd_rd_type = (uint8_t)ND_REDIRECT;
1680 	rd->nd_rd_code = 0;
1681 	rd->nd_rd_reserved = 0;
1682 	rd->nd_rd_target = *targetp;
1683 	rd->nd_rd_dst = *dest;
1684 
1685 	opt = (nd_opt_hdr_t *)(buf + sizeof (nd_redirect_t));
1686 	if (ncec != NULL && ll_opt_len != 0) {
1687 		opt->nd_opt_type = ND_OPT_TARGET_LINKADDR;
1688 		opt->nd_opt_len = ll_opt_len/8;
1689 		bcopy((char *)ncec->ncec_lladdr, &opt[1],
1690 		    ill->ill_phys_addr_length);
1691 	}
1692 	if (ncec != NULL)
1693 		ncec_refrele(ncec);
1694 	rdh = (nd_opt_rd_hdr_t *)(buf + sizeof (nd_redirect_t) + ll_opt_len);
1695 	rdh->nd_opt_rh_type = (uint8_t)ND_OPT_REDIRECTED_HEADER;
1696 	/* max_redir_hdr_data_len and nd_opt_rh_len must be multiple of 8 */
1697 	max_redir_hdr_data_len =
1698 	    (ipst->ips_ipv6_icmp_return - IPV6_HDR_LEN - len)/8*8;
1699 	pkt_len = msgdsize(mp);
1700 	/* Make sure mp is 8 byte aligned */
1701 	if (pkt_len > max_redir_hdr_data_len) {
1702 		rdh->nd_opt_rh_len = (max_redir_hdr_data_len +
1703 		    sizeof (nd_opt_rd_hdr_t))/8;
1704 		(void) adjmsg(mp, max_redir_hdr_data_len - pkt_len);
1705 	} else {
1706 		rdh->nd_opt_rh_len = (pkt_len + sizeof (nd_opt_rd_hdr_t))/8;
1707 		(void) adjmsg(mp, -(pkt_len % 8));
1708 	}
1709 	rdh->nd_opt_rh_reserved1 = 0;
1710 	rdh->nd_opt_rh_reserved2 = 0;
1711 	/* ipif_v6lcl_addr contains the link-local source address */
1712 	srcp = &ill->ill_ipif->ipif_v6lcl_addr;
1713 
1714 	/* Redirects sent by router, and router is global zone */
1715 	ASSERT(ira->ira_zoneid == ALL_ZONES);
1716 	ira->ira_zoneid = GLOBAL_ZONEID;
1717 	icmp_pkt_v6(mp, buf, len, srcp, ira);
1718 	kmem_free(buf, len);
1719 	if (need_refrele)
1720 		ill_refrele(ill);
1721 }
1722 
1723 
1724 /* Generate an ICMP time exceeded message.  (May be called as writer.) */
1725 void
icmp_time_exceeded_v6(mblk_t * mp,uint8_t code,boolean_t mcast_ok,ip_recv_attr_t * ira)1726 icmp_time_exceeded_v6(mblk_t *mp, uint8_t code, boolean_t mcast_ok,
1727     ip_recv_attr_t *ira)
1728 {
1729 	icmp6_t	icmp6;
1730 
1731 	mp = icmp_pkt_err_ok_v6(mp, mcast_ok, ira);
1732 	if (mp == NULL)
1733 		return;
1734 
1735 	bzero(&icmp6, sizeof (icmp6_t));
1736 	icmp6.icmp6_type = ICMP6_TIME_EXCEEDED;
1737 	icmp6.icmp6_code = code;
1738 	icmp_pkt_v6(mp, &icmp6, sizeof (icmp6_t), NULL, ira);
1739 }
1740 
1741 /*
1742  * Generate an ICMP unreachable message.
1743  * When called from ip_output side a minimal ip_recv_attr_t needs to be
1744  * constructed by the caller.
1745  */
1746 void
icmp_unreachable_v6(mblk_t * mp,uint8_t code,boolean_t mcast_ok,ip_recv_attr_t * ira)1747 icmp_unreachable_v6(mblk_t *mp, uint8_t code, boolean_t mcast_ok,
1748     ip_recv_attr_t *ira)
1749 {
1750 	icmp6_t	icmp6;
1751 
1752 	mp = icmp_pkt_err_ok_v6(mp, mcast_ok, ira);
1753 	if (mp == NULL)
1754 		return;
1755 
1756 	bzero(&icmp6, sizeof (icmp6_t));
1757 	icmp6.icmp6_type = ICMP6_DST_UNREACH;
1758 	icmp6.icmp6_code = code;
1759 	icmp_pkt_v6(mp, &icmp6, sizeof (icmp6_t), NULL, ira);
1760 }
1761 
1762 /*
1763  * Generate an ICMP pkt too big message.
1764  * When called from ip_output side a minimal ip_recv_attr_t needs to be
1765  * constructed by the caller.
1766  */
1767 void
icmp_pkt2big_v6(mblk_t * mp,uint32_t mtu,boolean_t mcast_ok,ip_recv_attr_t * ira)1768 icmp_pkt2big_v6(mblk_t *mp, uint32_t mtu, boolean_t mcast_ok,
1769     ip_recv_attr_t *ira)
1770 {
1771 	icmp6_t	icmp6;
1772 
1773 	mp = icmp_pkt_err_ok_v6(mp, mcast_ok, ira);
1774 	if (mp == NULL)
1775 		return;
1776 
1777 	bzero(&icmp6, sizeof (icmp6_t));
1778 	icmp6.icmp6_type = ICMP6_PACKET_TOO_BIG;
1779 	icmp6.icmp6_code = 0;
1780 	icmp6.icmp6_mtu = htonl(mtu);
1781 
1782 	icmp_pkt_v6(mp, &icmp6, sizeof (icmp6_t), NULL, ira);
1783 }
1784 
1785 /*
1786  * Generate an ICMP parameter problem message. (May be called as writer.)
1787  * 'offset' is the offset from the beginning of the packet in error.
1788  * When called from ip_output side a minimal ip_recv_attr_t needs to be
1789  * constructed by the caller.
1790  */
1791 static void
icmp_param_problem_v6(mblk_t * mp,uint8_t code,uint32_t offset,boolean_t mcast_ok,ip_recv_attr_t * ira)1792 icmp_param_problem_v6(mblk_t *mp, uint8_t code, uint32_t offset,
1793     boolean_t mcast_ok, ip_recv_attr_t *ira)
1794 {
1795 	icmp6_t	icmp6;
1796 
1797 	mp = icmp_pkt_err_ok_v6(mp, mcast_ok, ira);
1798 	if (mp == NULL)
1799 		return;
1800 
1801 	bzero((char *)&icmp6, sizeof (icmp6_t));
1802 	icmp6.icmp6_type = ICMP6_PARAM_PROB;
1803 	icmp6.icmp6_code = code;
1804 	icmp6.icmp6_pptr = htonl(offset);
1805 	icmp_pkt_v6(mp, &icmp6, sizeof (icmp6_t), NULL, ira);
1806 }
1807 
1808 void
icmp_param_problem_nexthdr_v6(mblk_t * mp,boolean_t mcast_ok,ip_recv_attr_t * ira)1809 icmp_param_problem_nexthdr_v6(mblk_t *mp, boolean_t mcast_ok,
1810     ip_recv_attr_t *ira)
1811 {
1812 	ip6_t		*ip6h = (ip6_t *)mp->b_rptr;
1813 	uint16_t	hdr_length;
1814 	uint8_t		*nexthdrp;
1815 	uint32_t	offset;
1816 	ill_t		*ill = ira->ira_ill;
1817 
1818 	/* Determine the offset of the bad nexthdr value */
1819 	if (!ip_hdr_length_nexthdr_v6(mp, ip6h,	&hdr_length, &nexthdrp)) {
1820 		/* Malformed packet */
1821 		BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
1822 		ip_drop_input("ipIfStatsInDiscards", mp, ill);
1823 		freemsg(mp);
1824 		return;
1825 	}
1826 
1827 	offset = nexthdrp - mp->b_rptr;
1828 	icmp_param_problem_v6(mp, ICMP6_PARAMPROB_NEXTHEADER, offset,
1829 	    mcast_ok, ira);
1830 }
1831 
1832 /*
1833  * Verify whether or not the IP address is a valid local address.
1834  * Could be a unicast, including one for a down interface.
1835  * If allow_mcbc then a multicast or broadcast address is also
1836  * acceptable.
1837  *
1838  * In the case of a multicast address, however, the
1839  * upper protocol is expected to reset the src address
1840  * to zero when we return IPVL_MCAST so that
1841  * no packets are emitted with multicast address as
1842  * source address.
1843  * The addresses valid for bind are:
1844  *	(1) - in6addr_any
1845  *	(2) - IP address of an UP interface
1846  *	(3) - IP address of a DOWN interface
1847  *	(4) - a multicast address. In this case
1848  *	the conn will only receive packets destined to
1849  *	the specified multicast address. Note: the
1850  *	application still has to issue an
1851  *	IPV6_JOIN_GROUP socket option.
1852  *
1853  * In all the above cases, the bound address must be valid in the current zone.
1854  * When the address is loopback or multicast, there might be many matching IREs
1855  * so bind has to look up based on the zone.
1856  */
1857 ip_laddr_t
ip_laddr_verify_v6(const in6_addr_t * v6src,zoneid_t zoneid,ip_stack_t * ipst,boolean_t allow_mcbc,uint_t scopeid)1858 ip_laddr_verify_v6(const in6_addr_t *v6src, zoneid_t zoneid,
1859     ip_stack_t *ipst, boolean_t allow_mcbc, uint_t scopeid)
1860 {
1861 	ire_t		*src_ire;
1862 	uint_t		match_flags;
1863 	ill_t		*ill = NULL;
1864 
1865 	ASSERT(!IN6_IS_ADDR_V4MAPPED(v6src));
1866 	ASSERT(!IN6_IS_ADDR_UNSPECIFIED(v6src));
1867 
1868 	match_flags = MATCH_IRE_ZONEONLY;
1869 	if (scopeid != 0) {
1870 		ill = ill_lookup_on_ifindex(scopeid, B_TRUE, ipst);
1871 		if (ill == NULL)
1872 			return (IPVL_BAD);
1873 		match_flags |= MATCH_IRE_ILL;
1874 	}
1875 
1876 	src_ire = ire_ftable_lookup_v6(v6src, NULL, NULL, 0,
1877 	    ill, zoneid, NULL, match_flags, 0, ipst, NULL);
1878 	if (ill != NULL)
1879 		ill_refrele(ill);
1880 
1881 	/*
1882 	 * If an address other than in6addr_any is requested,
1883 	 * we verify that it is a valid address for bind
1884 	 * Note: Following code is in if-else-if form for
1885 	 * readability compared to a condition check.
1886 	 */
1887 	if (src_ire != NULL && (src_ire->ire_type & (IRE_LOCAL|IRE_LOOPBACK))) {
1888 		/*
1889 		 * (2) Bind to address of local UP interface
1890 		 */
1891 		ire_refrele(src_ire);
1892 		return (IPVL_UNICAST_UP);
1893 	} else if (IN6_IS_ADDR_MULTICAST(v6src)) {
1894 		/* (4) bind to multicast address. */
1895 		if (src_ire != NULL)
1896 			ire_refrele(src_ire);
1897 
1898 		/*
1899 		 * Note: caller should take IPV6_MULTICAST_IF
1900 		 * into account when selecting a real source address.
1901 		 */
1902 		if (allow_mcbc)
1903 			return (IPVL_MCAST);
1904 		else
1905 			return (IPVL_BAD);
1906 	} else {
1907 		ipif_t *ipif;
1908 
1909 		/*
1910 		 * (3) Bind to address of local DOWN interface?
1911 		 * (ipif_lookup_addr() looks up all interfaces
1912 		 * but we do not get here for UP interfaces
1913 		 * - case (2) above)
1914 		 */
1915 		if (src_ire != NULL)
1916 			ire_refrele(src_ire);
1917 
1918 		ipif = ipif_lookup_addr_v6(v6src, NULL, zoneid, ipst);
1919 		if (ipif == NULL)
1920 			return (IPVL_BAD);
1921 
1922 		/* Not a useful source? */
1923 		if (ipif->ipif_flags & (IPIF_NOLOCAL | IPIF_ANYCAST)) {
1924 			ipif_refrele(ipif);
1925 			return (IPVL_BAD);
1926 		}
1927 		ipif_refrele(ipif);
1928 		return (IPVL_UNICAST_DOWN);
1929 	}
1930 }
1931 
1932 /*
1933  * Verify that both the source and destination addresses are valid.  If
1934  * IPDF_VERIFY_DST is not set, then the destination address may be unreachable,
1935  * i.e. have no route to it.  Protocols like TCP want to verify destination
1936  * reachability, while tunnels do not.
1937  *
1938  * Determine the route, the interface, and (optionally) the source address
1939  * to use to reach a given destination.
1940  * Note that we allow connect to broadcast and multicast addresses when
1941  * IPDF_ALLOW_MCBC is set.
1942  * first_hop and dst_addr are normally the same, but if source routing
1943  * they will differ; in that case the first_hop is what we'll use for the
1944  * routing lookup but the dce and label checks will be done on dst_addr,
1945  *
1946  * If uinfo is set, then we fill in the best available information
1947  * we have for the destination. This is based on (in priority order) any
1948  * metrics and path MTU stored in a dce_t, route metrics, and finally the
1949  * ill_mtu/ill_mc_mtu.
1950  *
1951  * Tsol note: If we have a source route then dst_addr != firsthop. But we
1952  * always do the label check on dst_addr.
1953  *
1954  * Assumes that the caller has set ixa_scopeid for link-local communication.
1955  */
1956 int
ip_set_destination_v6(in6_addr_t * src_addrp,const in6_addr_t * dst_addr,const in6_addr_t * firsthop,ip_xmit_attr_t * ixa,iulp_t * uinfo,uint32_t flags,uint_t mac_mode)1957 ip_set_destination_v6(in6_addr_t *src_addrp, const in6_addr_t *dst_addr,
1958     const in6_addr_t *firsthop, ip_xmit_attr_t *ixa, iulp_t *uinfo,
1959     uint32_t flags, uint_t mac_mode)
1960 {
1961 	ire_t		*ire;
1962 	int		error = 0;
1963 	in6_addr_t	setsrc;				/* RTF_SETSRC */
1964 	zoneid_t	zoneid = ixa->ixa_zoneid;	/* Honors SO_ALLZONES */
1965 	ip_stack_t	*ipst = ixa->ixa_ipst;
1966 	dce_t		*dce;
1967 	uint_t		pmtu;
1968 	uint_t		ifindex;
1969 	uint_t		generation;
1970 	nce_t		*nce;
1971 	ill_t		*ill = NULL;
1972 	boolean_t	multirt = B_FALSE;
1973 
1974 	ASSERT(!IN6_IS_ADDR_V4MAPPED(dst_addr));
1975 
1976 	ASSERT(!(ixa->ixa_flags & IXAF_IS_IPV4));
1977 
1978 	/*
1979 	 * We never send to zero; the ULPs map it to the loopback address.
1980 	 * We can't allow it since we use zero to mean unitialized in some
1981 	 * places.
1982 	 */
1983 	ASSERT(!IN6_IS_ADDR_UNSPECIFIED(dst_addr));
1984 
1985 	if (is_system_labeled()) {
1986 		ts_label_t *tsl = NULL;
1987 
1988 		error = tsol_check_dest(ixa->ixa_tsl, dst_addr, IPV6_VERSION,
1989 		    mac_mode, (flags & IPDF_ZONE_IS_GLOBAL) != 0, &tsl);
1990 		if (error != 0)
1991 			return (error);
1992 		if (tsl != NULL) {
1993 			/* Update the label */
1994 			ip_xmit_attr_replace_tsl(ixa, tsl);
1995 		}
1996 	}
1997 
1998 	setsrc = ipv6_all_zeros;
1999 	/*
2000 	 * Select a route; For IPMP interfaces, we would only select
2001 	 * a "hidden" route (i.e., going through a specific under_ill)
2002 	 * if ixa_ifindex has been specified.
2003 	 */
2004 	ire = ip_select_route_v6(firsthop, *src_addrp, ixa, &generation,
2005 	    &setsrc, &error, &multirt);
2006 	ASSERT(ire != NULL);	/* IRE_NOROUTE if none found */
2007 	if (error != 0)
2008 		goto bad_addr;
2009 
2010 	/*
2011 	 * ire can't be a broadcast or multicast unless IPDF_ALLOW_MCBC is set.
2012 	 * If IPDF_VERIFY_DST is set, the destination must be reachable.
2013 	 * Otherwise the destination needn't be reachable.
2014 	 *
2015 	 * If we match on a reject or black hole, then we've got a
2016 	 * local failure.  May as well fail out the connect() attempt,
2017 	 * since it's never going to succeed.
2018 	 */
2019 	if (ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) {
2020 		/*
2021 		 * If we're verifying destination reachability, we always want
2022 		 * to complain here.
2023 		 *
2024 		 * If we're not verifying destination reachability but the
2025 		 * destination has a route, we still want to fail on the
2026 		 * temporary address and broadcast address tests.
2027 		 *
2028 		 * In both cases do we let the code continue so some reasonable
2029 		 * information is returned to the caller. That enables the
2030 		 * caller to use (and even cache) the IRE. conn_ip_ouput will
2031 		 * use the generation mismatch path to check for the unreachable
2032 		 * case thereby avoiding any specific check in the main path.
2033 		 */
2034 		ASSERT(generation == IRE_GENERATION_VERIFY);
2035 		if (flags & IPDF_VERIFY_DST) {
2036 			/*
2037 			 * Set errno but continue to set up ixa_ire to be
2038 			 * the RTF_REJECT|RTF_BLACKHOLE IRE.
2039 			 * That allows callers to use ip_output to get an
2040 			 * ICMP error back.
2041 			 */
2042 			if (!(ire->ire_type & IRE_HOST))
2043 				error = ENETUNREACH;
2044 			else
2045 				error = EHOSTUNREACH;
2046 		}
2047 	}
2048 
2049 	if ((ire->ire_type & (IRE_BROADCAST|IRE_MULTICAST)) &&
2050 	    !(flags & IPDF_ALLOW_MCBC)) {
2051 		ire_refrele(ire);
2052 		ire = ire_reject(ipst, B_FALSE);
2053 		generation = IRE_GENERATION_VERIFY;
2054 		error = ENETUNREACH;
2055 	}
2056 
2057 	/* Cache things */
2058 	if (ixa->ixa_ire != NULL)
2059 		ire_refrele_notr(ixa->ixa_ire);
2060 #ifdef DEBUG
2061 	ire_refhold_notr(ire);
2062 	ire_refrele(ire);
2063 #endif
2064 	ixa->ixa_ire = ire;
2065 	ixa->ixa_ire_generation = generation;
2066 
2067 	/*
2068 	 * Ensure that ixa_dce is always set any time that ixa_ire is set,
2069 	 * since some callers will send a packet to conn_ip_output() even if
2070 	 * there's an error.
2071 	 */
2072 	ifindex = 0;
2073 	if (IN6_IS_ADDR_LINKSCOPE(dst_addr)) {
2074 		/* If we are creating a DCE we'd better have an ifindex */
2075 		if (ill != NULL)
2076 			ifindex = ill->ill_phyint->phyint_ifindex;
2077 		else
2078 			flags &= ~IPDF_UNIQUE_DCE;
2079 	}
2080 
2081 	if (flags & IPDF_UNIQUE_DCE) {
2082 		/* Fallback to the default dce if allocation fails */
2083 		dce = dce_lookup_and_add_v6(dst_addr, ifindex, ipst);
2084 		if (dce != NULL) {
2085 			generation = dce->dce_generation;
2086 		} else {
2087 			dce = dce_lookup_v6(dst_addr, ifindex, ipst,
2088 			    &generation);
2089 		}
2090 	} else {
2091 		dce = dce_lookup_v6(dst_addr, ifindex, ipst, &generation);
2092 	}
2093 	ASSERT(dce != NULL);
2094 	if (ixa->ixa_dce != NULL)
2095 		dce_refrele_notr(ixa->ixa_dce);
2096 #ifdef DEBUG
2097 	dce_refhold_notr(dce);
2098 	dce_refrele(dce);
2099 #endif
2100 	ixa->ixa_dce = dce;
2101 	ixa->ixa_dce_generation = generation;
2102 
2103 
2104 	/*
2105 	 * For multicast with multirt we have a flag passed back from
2106 	 * ire_lookup_multi_ill_v6 since we don't have an IRE for each
2107 	 * possible multicast address.
2108 	 * We also need a flag for multicast since we can't check
2109 	 * whether RTF_MULTIRT is set in ixa_ire for multicast.
2110 	 */
2111 	if (multirt) {
2112 		ixa->ixa_postfragfn = ip_postfrag_multirt_v6;
2113 		ixa->ixa_flags |= IXAF_MULTIRT_MULTICAST;
2114 	} else {
2115 		ixa->ixa_postfragfn = ire->ire_postfragfn;
2116 		ixa->ixa_flags &= ~IXAF_MULTIRT_MULTICAST;
2117 	}
2118 	if (!(ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE))) {
2119 		/* Get an nce to cache. */
2120 		nce = ire_to_nce(ire, 0, firsthop);
2121 		if (nce == NULL) {
2122 			/* Allocation failure? */
2123 			ixa->ixa_ire_generation = IRE_GENERATION_VERIFY;
2124 		} else {
2125 			if (ixa->ixa_nce != NULL)
2126 				nce_refrele(ixa->ixa_nce);
2127 			ixa->ixa_nce = nce;
2128 		}
2129 	}
2130 
2131 	/*
2132 	 * If the source address is a loopback address, the
2133 	 * destination had best be local or multicast.
2134 	 * If we are sending to an IRE_LOCAL using a loopback source then
2135 	 * it had better be the same zoneid.
2136 	 */
2137 	if (IN6_IS_ADDR_LOOPBACK(src_addrp)) {
2138 		if ((ire->ire_type & IRE_LOCAL) && ire->ire_zoneid != zoneid) {
2139 			ire = NULL;	/* Stored in ixa_ire */
2140 			error = EADDRNOTAVAIL;
2141 			goto bad_addr;
2142 		}
2143 		if (!(ire->ire_type & (IRE_LOOPBACK|IRE_LOCAL|IRE_MULTICAST))) {
2144 			ire = NULL;	/* Stored in ixa_ire */
2145 			error = EADDRNOTAVAIL;
2146 			goto bad_addr;
2147 		}
2148 	}
2149 
2150 	/*
2151 	 * Does the caller want us to pick a source address?
2152 	 */
2153 	if (flags & IPDF_SELECT_SRC) {
2154 		in6_addr_t	src_addr;
2155 
2156 		/*
2157 		 * We use use ire_nexthop_ill to avoid the under ipmp
2158 		 * interface for source address selection. Note that for ipmp
2159 		 * probe packets, ixa_ifindex would have been specified, and
2160 		 * the ip_select_route() invocation would have picked an ire
2161 		 * will ire_ill pointing at an under interface.
2162 		 */
2163 		ill = ire_nexthop_ill(ire);
2164 
2165 		/* If unreachable we have no ill but need some source */
2166 		if (ill == NULL) {
2167 			src_addr = ipv6_loopback;
2168 			/* Make sure we look for a better source address */
2169 			generation = SRC_GENERATION_VERIFY;
2170 		} else {
2171 			error = ip_select_source_v6(ill, &setsrc, dst_addr,
2172 			    zoneid, ipst, B_FALSE, ixa->ixa_src_preferences,
2173 			    &src_addr, &generation, NULL);
2174 			if (error != 0) {
2175 				ire = NULL;	/* Stored in ixa_ire */
2176 				goto bad_addr;
2177 			}
2178 		}
2179 
2180 		/*
2181 		 * We allow the source address to to down.
2182 		 * However, we check that we don't use the loopback address
2183 		 * as a source when sending out on the wire.
2184 		 */
2185 		if (IN6_IS_ADDR_LOOPBACK(&src_addr) &&
2186 		    !(ire->ire_type & (IRE_LOCAL|IRE_LOOPBACK|IRE_MULTICAST)) &&
2187 		    !(ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE))) {
2188 			ire = NULL;	/* Stored in ixa_ire */
2189 			error = EADDRNOTAVAIL;
2190 			goto bad_addr;
2191 		}
2192 
2193 		*src_addrp = src_addr;
2194 		ixa->ixa_src_generation = generation;
2195 	}
2196 
2197 	/*
2198 	 * Make sure we don't leave an unreachable ixa_nce in place
2199 	 * since ip_select_route is used when we unplumb i.e., remove
2200 	 * references on ixa_ire, ixa_nce, and ixa_dce.
2201 	 */
2202 	nce = ixa->ixa_nce;
2203 	if (nce != NULL && nce->nce_is_condemned) {
2204 		nce_refrele(nce);
2205 		ixa->ixa_nce = NULL;
2206 		ixa->ixa_ire_generation = IRE_GENERATION_VERIFY;
2207 	}
2208 
2209 	/*
2210 	 * Note that IPv6 multicast supports PMTU discovery unlike IPv4
2211 	 * multicast. But pmtu discovery is only enabled for connected
2212 	 * sockets in general.
2213 	 */
2214 
2215 	/*
2216 	 * Set initial value for fragmentation limit.  Either conn_ip_output
2217 	 * or ULP might updates it when there are routing changes.
2218 	 * Handles a NULL ixa_ire->ire_ill or a NULL ixa_nce for RTF_REJECT.
2219 	 */
2220 	pmtu = ip_get_pmtu(ixa);
2221 	ixa->ixa_fragsize = pmtu;
2222 	/* Make sure ixa_fragsize and ixa_pmtu remain identical */
2223 	if (ixa->ixa_flags & IXAF_VERIFY_PMTU)
2224 		ixa->ixa_pmtu = pmtu;
2225 
2226 	/*
2227 	 * Extract information useful for some transports.
2228 	 * First we look for DCE metrics. Then we take what we have in
2229 	 * the metrics in the route, where the offlink is used if we have
2230 	 * one.
2231 	 */
2232 	if (uinfo != NULL) {
2233 		bzero(uinfo, sizeof (*uinfo));
2234 
2235 		if (dce->dce_flags & DCEF_UINFO)
2236 			*uinfo = dce->dce_uinfo;
2237 
2238 		rts_merge_metrics(uinfo, &ire->ire_metrics);
2239 
2240 		/* Allow ire_metrics to decrease the path MTU from above */
2241 		if (uinfo->iulp_mtu == 0 || uinfo->iulp_mtu > pmtu)
2242 			uinfo->iulp_mtu = pmtu;
2243 
2244 		uinfo->iulp_localnet = (ire->ire_type & IRE_ONLINK) != 0;
2245 		uinfo->iulp_loopback = (ire->ire_type & IRE_LOOPBACK) != 0;
2246 		uinfo->iulp_local = (ire->ire_type & IRE_LOCAL) != 0;
2247 	}
2248 
2249 	if (ill != NULL)
2250 		ill_refrele(ill);
2251 
2252 	return (error);
2253 
2254 bad_addr:
2255 	if (ire != NULL)
2256 		ire_refrele(ire);
2257 
2258 	if (ill != NULL)
2259 		ill_refrele(ill);
2260 
2261 	/*
2262 	 * Make sure we don't leave an unreachable ixa_nce in place
2263 	 * since ip_select_route is used when we unplumb i.e., remove
2264 	 * references on ixa_ire, ixa_nce, and ixa_dce.
2265 	 */
2266 	nce = ixa->ixa_nce;
2267 	if (nce != NULL && nce->nce_is_condemned) {
2268 		nce_refrele(nce);
2269 		ixa->ixa_nce = NULL;
2270 		ixa->ixa_ire_generation = IRE_GENERATION_VERIFY;
2271 	}
2272 
2273 	return (error);
2274 }
2275 
2276 /*
2277  * Handle protocols with which IP is less intimate.  There
2278  * can be more than one stream bound to a particular
2279  * protocol.  When this is the case, normally each one gets a copy
2280  * of any incoming packets.
2281  *
2282  * Zones notes:
2283  * Packets will be distributed to conns in all zones. This is really only
2284  * useful for ICMPv6 as only applications in the global zone can create raw
2285  * sockets for other protocols.
2286  */
2287 void
ip_fanout_proto_v6(mblk_t * mp,ip6_t * ip6h,ip_recv_attr_t * ira)2288 ip_fanout_proto_v6(mblk_t *mp, ip6_t *ip6h, ip_recv_attr_t *ira)
2289 {
2290 	mblk_t		*mp1;
2291 	in6_addr_t	laddr = ip6h->ip6_dst;
2292 	conn_t		*connp, *first_connp, *next_connp;
2293 	connf_t		*connfp;
2294 	ill_t		*ill = ira->ira_ill;
2295 	ip_stack_t	*ipst = ill->ill_ipst;
2296 
2297 	connfp = &ipst->ips_ipcl_proto_fanout_v6[ira->ira_protocol];
2298 	mutex_enter(&connfp->connf_lock);
2299 	connp = connfp->connf_head;
2300 	for (connp = connfp->connf_head; connp != NULL;
2301 	    connp = connp->conn_next) {
2302 		/* Note: IPCL_PROTO_MATCH_V6 includes conn_wantpacket */
2303 		if (IPCL_PROTO_MATCH_V6(connp, ira, ip6h) &&
2304 		    (!(ira->ira_flags & IRAF_SYSTEM_LABELED) ||
2305 		    tsol_receive_local(mp, &laddr, IPV6_VERSION, ira, connp)))
2306 			break;
2307 	}
2308 
2309 	if (connp == NULL) {
2310 		/*
2311 		 * No one bound to this port.  Is
2312 		 * there a client that wants all
2313 		 * unclaimed datagrams?
2314 		 */
2315 		mutex_exit(&connfp->connf_lock);
2316 		ip_fanout_send_icmp_v6(mp, ICMP6_PARAM_PROB,
2317 		    ICMP6_PARAMPROB_NEXTHEADER, ira);
2318 		return;
2319 	}
2320 
2321 	ASSERT(IPCL_IS_NONSTR(connp) || connp->conn_rq != NULL);
2322 
2323 	CONN_INC_REF(connp);
2324 	first_connp = connp;
2325 
2326 	/*
2327 	 * XXX: Fix the multiple protocol listeners case. We should not
2328 	 * be walking the conn->conn_next list here.
2329 	 */
2330 	connp = connp->conn_next;
2331 	for (;;) {
2332 		while (connp != NULL) {
2333 			/* Note: IPCL_PROTO_MATCH_V6 includes conn_wantpacket */
2334 			if (IPCL_PROTO_MATCH_V6(connp, ira, ip6h) &&
2335 			    (!(ira->ira_flags & IRAF_SYSTEM_LABELED) ||
2336 			    tsol_receive_local(mp, &laddr, IPV6_VERSION,
2337 			    ira, connp)))
2338 				break;
2339 			connp = connp->conn_next;
2340 		}
2341 
2342 		if (connp == NULL) {
2343 			/* No more interested clients */
2344 			connp = first_connp;
2345 			break;
2346 		}
2347 		if (((mp1 = dupmsg(mp)) == NULL) &&
2348 		    ((mp1 = copymsg(mp)) == NULL)) {
2349 			/* Memory allocation failed */
2350 			BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
2351 			ip_drop_input("ipIfStatsInDiscards", mp, ill);
2352 			connp = first_connp;
2353 			break;
2354 		}
2355 
2356 		CONN_INC_REF(connp);
2357 		mutex_exit(&connfp->connf_lock);
2358 
2359 		ip_fanout_proto_conn(connp, mp1, NULL, (ip6_t *)mp1->b_rptr,
2360 		    ira);
2361 
2362 		mutex_enter(&connfp->connf_lock);
2363 		/* Follow the next pointer before releasing the conn. */
2364 		next_connp = connp->conn_next;
2365 		CONN_DEC_REF(connp);
2366 		connp = next_connp;
2367 	}
2368 
2369 	/* Last one.  Send it upstream. */
2370 	mutex_exit(&connfp->connf_lock);
2371 
2372 	ip_fanout_proto_conn(connp, mp, NULL, ip6h, ira);
2373 
2374 	CONN_DEC_REF(connp);
2375 }
2376 
2377 /*
2378  * Called when it is conceptually a ULP that would sent the packet
2379  * e.g., port unreachable and nexthdr unknown. Check that the packet
2380  * would have passed the IPsec global policy before sending the error.
2381  *
2382  * Send an ICMP error after patching up the packet appropriately.
2383  * Uses ip_drop_input and bumps the appropriate MIB.
2384  * For ICMP6_PARAMPROB_NEXTHEADER we determine the offset to use.
2385  */
2386 void
ip_fanout_send_icmp_v6(mblk_t * mp,uint_t icmp_type,uint8_t icmp_code,ip_recv_attr_t * ira)2387 ip_fanout_send_icmp_v6(mblk_t *mp, uint_t icmp_type, uint8_t icmp_code,
2388     ip_recv_attr_t *ira)
2389 {
2390 	ip6_t		*ip6h;
2391 	boolean_t	secure;
2392 	ill_t		*ill = ira->ira_ill;
2393 	ip_stack_t	*ipst = ill->ill_ipst;
2394 	netstack_t	*ns = ipst->ips_netstack;
2395 	ipsec_stack_t	*ipss = ns->netstack_ipsec;
2396 
2397 	secure = ira->ira_flags & IRAF_IPSEC_SECURE;
2398 
2399 	/*
2400 	 * We are generating an icmp error for some inbound packet.
2401 	 * Called from all ip_fanout_(udp, tcp, proto) functions.
2402 	 * Before we generate an error, check with global policy
2403 	 * to see whether this is allowed to enter the system. As
2404 	 * there is no "conn", we are checking with global policy.
2405 	 */
2406 	ip6h = (ip6_t *)mp->b_rptr;
2407 	if (secure || ipss->ipsec_inbound_v6_policy_present) {
2408 		mp = ipsec_check_global_policy(mp, NULL, NULL, ip6h, ira, ns);
2409 		if (mp == NULL)
2410 			return;
2411 	}
2412 
2413 	/* We never send errors for protocols that we do implement */
2414 	if (ira->ira_protocol == IPPROTO_ICMPV6) {
2415 		BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
2416 		ip_drop_input("ip_fanout_send_icmp_v6", mp, ill);
2417 		freemsg(mp);
2418 		return;
2419 	}
2420 
2421 	switch (icmp_type) {
2422 	case ICMP6_DST_UNREACH:
2423 		ASSERT(icmp_code == ICMP6_DST_UNREACH_NOPORT);
2424 
2425 		BUMP_MIB(ill->ill_ip_mib, udpIfStatsNoPorts);
2426 		ip_drop_input("ipIfStatsNoPorts", mp, ill);
2427 
2428 		icmp_unreachable_v6(mp, icmp_code, B_FALSE, ira);
2429 		break;
2430 	case ICMP6_PARAM_PROB:
2431 		ASSERT(icmp_code == ICMP6_PARAMPROB_NEXTHEADER);
2432 
2433 		BUMP_MIB(ill->ill_ip_mib, ipIfStatsInUnknownProtos);
2434 		ip_drop_input("ipIfStatsInUnknownProtos", mp, ill);
2435 
2436 		/* Let the system determine the offset for this one */
2437 		icmp_param_problem_nexthdr_v6(mp, B_FALSE, ira);
2438 		break;
2439 	default:
2440 #ifdef DEBUG
2441 		panic("ip_fanout_send_icmp_v6: wrong type");
2442 		/*NOTREACHED*/
2443 #else
2444 		freemsg(mp);
2445 		break;
2446 #endif
2447 	}
2448 }
2449 
2450 /*
2451  * Fanout for UDP packets that are multicast or ICMP errors.
2452  * (Unicast fanout is handled in ip_input_v6.)
2453  *
2454  * If SO_REUSEADDR is set all multicast packets
2455  * will be delivered to all conns bound to the same port.
2456  *
2457  * Fanout for UDP packets.
2458  * The caller puts <fport, lport> in the ports parameter.
2459  * ire_type must be IRE_BROADCAST for multicast and broadcast packets.
2460  *
2461  * If SO_REUSEADDR is set all multicast and broadcast packets
2462  * will be delivered to all conns bound to the same port.
2463  *
2464  * Zones notes:
2465  * Earlier in ip_input on a system with multiple shared-IP zones we
2466  * duplicate the multicast and broadcast packets and send them up
2467  * with each explicit zoneid that exists on that ill.
2468  * This means that here we can match the zoneid with SO_ALLZONES being special.
2469  */
2470 void
ip_fanout_udp_multi_v6(mblk_t * mp,ip6_t * ip6h,uint16_t lport,uint16_t fport,ip_recv_attr_t * ira)2471 ip_fanout_udp_multi_v6(mblk_t *mp, ip6_t *ip6h, uint16_t lport, uint16_t fport,
2472     ip_recv_attr_t *ira)
2473 {
2474 	in6_addr_t	laddr;
2475 	conn_t		*connp;
2476 	connf_t		*connfp;
2477 	in6_addr_t	faddr;
2478 	ill_t		*ill = ira->ira_ill;
2479 	ip_stack_t	*ipst = ill->ill_ipst;
2480 
2481 	ASSERT(ira->ira_flags & (IRAF_MULTIBROADCAST|IRAF_ICMP_ERROR));
2482 
2483 	laddr = ip6h->ip6_dst;
2484 	faddr = ip6h->ip6_src;
2485 
2486 	/* Attempt to find a client stream based on destination port. */
2487 	connfp = &ipst->ips_ipcl_udp_fanout[IPCL_UDP_HASH(lport, ipst)];
2488 	mutex_enter(&connfp->connf_lock);
2489 	connp = connfp->connf_head;
2490 	while (connp != NULL) {
2491 		if ((IPCL_UDP_MATCH_V6(connp, lport, laddr, fport, faddr)) &&
2492 		    conn_wantpacket_v6(connp, ira, ip6h) &&
2493 		    (!(ira->ira_flags & IRAF_SYSTEM_LABELED) ||
2494 		    tsol_receive_local(mp, &laddr, IPV6_VERSION, ira, connp)))
2495 			break;
2496 		connp = connp->conn_next;
2497 	}
2498 
2499 	if (connp == NULL)
2500 		goto notfound;
2501 
2502 	CONN_INC_REF(connp);
2503 
2504 	if (connp->conn_reuseaddr) {
2505 		conn_t		*first_connp = connp;
2506 		conn_t		*next_connp;
2507 		mblk_t		*mp1;
2508 
2509 		connp = connp->conn_next;
2510 		for (;;) {
2511 			while (connp != NULL) {
2512 				if (IPCL_UDP_MATCH_V6(connp, lport, laddr,
2513 				    fport, faddr) &&
2514 				    conn_wantpacket_v6(connp, ira, ip6h) &&
2515 				    (!(ira->ira_flags & IRAF_SYSTEM_LABELED) ||
2516 				    tsol_receive_local(mp, &laddr, IPV6_VERSION,
2517 				    ira, connp)))
2518 					break;
2519 				connp = connp->conn_next;
2520 			}
2521 			if (connp == NULL) {
2522 				/* No more interested clients */
2523 				connp = first_connp;
2524 				break;
2525 			}
2526 			if (((mp1 = dupmsg(mp)) == NULL) &&
2527 			    ((mp1 = copymsg(mp)) == NULL)) {
2528 				/* Memory allocation failed */
2529 				BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
2530 				ip_drop_input("ipIfStatsInDiscards", mp, ill);
2531 				connp = first_connp;
2532 				break;
2533 			}
2534 
2535 			CONN_INC_REF(connp);
2536 			mutex_exit(&connfp->connf_lock);
2537 
2538 			IP6_STAT(ipst, ip6_udp_fanmb);
2539 			ip_fanout_udp_conn(connp, mp1, NULL,
2540 			    (ip6_t *)mp1->b_rptr, ira);
2541 
2542 			mutex_enter(&connfp->connf_lock);
2543 			/* Follow the next pointer before releasing the conn. */
2544 			next_connp = connp->conn_next;
2545 			IP6_STAT(ipst, ip6_udp_fanmb);
2546 			CONN_DEC_REF(connp);
2547 			connp = next_connp;
2548 		}
2549 	}
2550 
2551 	/* Last one.  Send it upstream. */
2552 	mutex_exit(&connfp->connf_lock);
2553 
2554 	IP6_STAT(ipst, ip6_udp_fanmb);
2555 	ip_fanout_udp_conn(connp, mp, NULL, ip6h, ira);
2556 	CONN_DEC_REF(connp);
2557 	return;
2558 
2559 notfound:
2560 	mutex_exit(&connfp->connf_lock);
2561 	/*
2562 	 * No one bound to this port.  Is
2563 	 * there a client that wants all
2564 	 * unclaimed datagrams?
2565 	 */
2566 	if (ipst->ips_ipcl_proto_fanout_v6[IPPROTO_UDP].connf_head != NULL) {
2567 		ASSERT(ira->ira_protocol == IPPROTO_UDP);
2568 		ip_fanout_proto_v6(mp, ip6h, ira);
2569 	} else {
2570 		ip_fanout_send_icmp_v6(mp, ICMP6_DST_UNREACH,
2571 		    ICMP6_DST_UNREACH_NOPORT, ira);
2572 	}
2573 }
2574 
2575 /*
2576  * int ip_find_hdr_v6()
2577  *
2578  * This routine is used by the upper layer protocols, iptun, and IPsec:
2579  * - Set extension header pointers to appropriate locations
2580  * - Determine IPv6 header length and return it
2581  * - Return a pointer to the last nexthdr value
2582  *
2583  * The caller must initialize ipp_fields.
2584  * The upper layer protocols normally set label_separate which makes the
2585  * routine put the TX label in ipp_label_v6. If this is not set then
2586  * the hop-by-hop options including the label are placed in ipp_hopopts.
2587  *
2588  * NOTE: If multiple extension headers of the same type are present,
2589  * ip_find_hdr_v6() will set the respective extension header pointers
2590  * to the first one that it encounters in the IPv6 header.  It also
2591  * skips fragment headers.  This routine deals with malformed packets
2592  * of various sorts in which case the returned length is up to the
2593  * malformed part.
2594  */
2595 int
ip_find_hdr_v6(mblk_t * mp,ip6_t * ip6h,boolean_t label_separate,ip_pkt_t * ipp,uint8_t * nexthdrp)2596 ip_find_hdr_v6(mblk_t *mp, ip6_t *ip6h, boolean_t label_separate, ip_pkt_t *ipp,
2597     uint8_t *nexthdrp)
2598 {
2599 	uint_t	length, ehdrlen;
2600 	uint8_t nexthdr;
2601 	uint8_t *whereptr, *endptr;
2602 	ip6_dest_t *tmpdstopts;
2603 	ip6_rthdr_t *tmprthdr;
2604 	ip6_hbh_t *tmphopopts;
2605 	ip6_frag_t *tmpfraghdr;
2606 
2607 	ipp->ipp_fields |= IPPF_HOPLIMIT | IPPF_TCLASS | IPPF_ADDR;
2608 	ipp->ipp_hoplimit = ip6h->ip6_hops;
2609 	ipp->ipp_tclass = IPV6_FLOW_TCLASS(ip6h->ip6_flow);
2610 	ipp->ipp_addr = ip6h->ip6_dst;
2611 
2612 	length = IPV6_HDR_LEN;
2613 	whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */
2614 	endptr = mp->b_wptr;
2615 
2616 	nexthdr = ip6h->ip6_nxt;
2617 	while (whereptr < endptr) {
2618 		/* Is there enough left for len + nexthdr? */
2619 		if (whereptr + MIN_EHDR_LEN > endptr)
2620 			goto done;
2621 
2622 		switch (nexthdr) {
2623 		case IPPROTO_HOPOPTS: {
2624 			/* We check for any CIPSO */
2625 			uchar_t *secopt;
2626 			boolean_t hbh_needed;
2627 			uchar_t *after_secopt;
2628 
2629 			tmphopopts = (ip6_hbh_t *)whereptr;
2630 			ehdrlen = 8 * (tmphopopts->ip6h_len + 1);
2631 			if ((uchar_t *)tmphopopts +  ehdrlen > endptr)
2632 				goto done;
2633 			nexthdr = tmphopopts->ip6h_nxt;
2634 
2635 			if (!label_separate) {
2636 				secopt = NULL;
2637 				after_secopt = whereptr;
2638 			} else {
2639 				/*
2640 				 * We have dropped packets with bad options in
2641 				 * ip6_input. No need to check return value
2642 				 * here.
2643 				 */
2644 				(void) tsol_find_secopt_v6(whereptr, ehdrlen,
2645 				    &secopt, &after_secopt, &hbh_needed);
2646 			}
2647 			if (secopt != NULL && after_secopt - whereptr > 0) {
2648 				ipp->ipp_fields |= IPPF_LABEL_V6;
2649 				ipp->ipp_label_v6 = secopt;
2650 				ipp->ipp_label_len_v6 = after_secopt - whereptr;
2651 			} else {
2652 				ipp->ipp_label_len_v6 = 0;
2653 				after_secopt = whereptr;
2654 				hbh_needed = B_TRUE;
2655 			}
2656 			/* return only 1st hbh */
2657 			if (hbh_needed && !(ipp->ipp_fields & IPPF_HOPOPTS)) {
2658 				ipp->ipp_fields |= IPPF_HOPOPTS;
2659 				ipp->ipp_hopopts = (ip6_hbh_t *)after_secopt;
2660 				ipp->ipp_hopoptslen = ehdrlen -
2661 				    ipp->ipp_label_len_v6;
2662 			}
2663 			break;
2664 		}
2665 		case IPPROTO_DSTOPTS:
2666 			tmpdstopts = (ip6_dest_t *)whereptr;
2667 			ehdrlen = 8 * (tmpdstopts->ip6d_len + 1);
2668 			if ((uchar_t *)tmpdstopts +  ehdrlen > endptr)
2669 				goto done;
2670 			nexthdr = tmpdstopts->ip6d_nxt;
2671 			/*
2672 			 * ipp_dstopts is set to the destination header after a
2673 			 * routing header.
2674 			 * Assume it is a post-rthdr destination header
2675 			 * and adjust when we find an rthdr.
2676 			 */
2677 			if (!(ipp->ipp_fields & IPPF_DSTOPTS)) {
2678 				ipp->ipp_fields |= IPPF_DSTOPTS;
2679 				ipp->ipp_dstopts = tmpdstopts;
2680 				ipp->ipp_dstoptslen = ehdrlen;
2681 			}
2682 			break;
2683 		case IPPROTO_ROUTING:
2684 			tmprthdr = (ip6_rthdr_t *)whereptr;
2685 			ehdrlen = 8 * (tmprthdr->ip6r_len + 1);
2686 			if ((uchar_t *)tmprthdr +  ehdrlen > endptr)
2687 				goto done;
2688 			nexthdr = tmprthdr->ip6r_nxt;
2689 			/* return only 1st rthdr */
2690 			if (!(ipp->ipp_fields & IPPF_RTHDR)) {
2691 				ipp->ipp_fields |= IPPF_RTHDR;
2692 				ipp->ipp_rthdr = tmprthdr;
2693 				ipp->ipp_rthdrlen = ehdrlen;
2694 			}
2695 			/*
2696 			 * Make any destination header we've seen be a
2697 			 * pre-rthdr destination header.
2698 			 */
2699 			if (ipp->ipp_fields & IPPF_DSTOPTS) {
2700 				ipp->ipp_fields &= ~IPPF_DSTOPTS;
2701 				ipp->ipp_fields |= IPPF_RTHDRDSTOPTS;
2702 				ipp->ipp_rthdrdstopts = ipp->ipp_dstopts;
2703 				ipp->ipp_dstopts = NULL;
2704 				ipp->ipp_rthdrdstoptslen = ipp->ipp_dstoptslen;
2705 				ipp->ipp_dstoptslen = 0;
2706 			}
2707 			break;
2708 		case IPPROTO_FRAGMENT:
2709 			tmpfraghdr = (ip6_frag_t *)whereptr;
2710 			ehdrlen = sizeof (ip6_frag_t);
2711 			if ((uchar_t *)tmpfraghdr + ehdrlen > endptr)
2712 				goto done;
2713 			nexthdr = tmpfraghdr->ip6f_nxt;
2714 			if (!(ipp->ipp_fields & IPPF_FRAGHDR)) {
2715 				ipp->ipp_fields |= IPPF_FRAGHDR;
2716 				ipp->ipp_fraghdr = tmpfraghdr;
2717 				ipp->ipp_fraghdrlen = ehdrlen;
2718 			}
2719 			break;
2720 		case IPPROTO_NONE:
2721 		default:
2722 			goto done;
2723 		}
2724 		length += ehdrlen;
2725 		whereptr += ehdrlen;
2726 	}
2727 done:
2728 	if (nexthdrp != NULL)
2729 		*nexthdrp = nexthdr;
2730 	return (length);
2731 }
2732 
2733 /*
2734  * Return the length of the IPv6 related headers (including extension headers)
2735  * If the packet is malformed, this returns the simple IPv6 header length.
2736  */
2737 uint16_t
ip_hdr_length_v6(mblk_t * mp,ip6_t * ip6h)2738 ip_hdr_length_v6(mblk_t *mp, ip6_t *ip6h)
2739 {
2740 	uint16_t hdr_len;
2741 
2742 	if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_len, NULL))
2743 		hdr_len = sizeof (*ip6h);
2744 	return (hdr_len);
2745 }
2746 
2747 /*
2748  * Parse and process any hop-by-hop or destination options.
2749  *
2750  * Assumes that q is an ill read queue so that ICMP errors for link-local
2751  * destinations are sent out the correct interface.
2752  *
2753  * Returns -1 if there was an error and mp has been consumed.
2754  * Returns 0 if no special action is needed.
2755  * Returns 1 if the packet contained a router alert option for this node
2756  * which is verified to be "interesting/known" for our implementation.
2757  *
2758  * XXX Note: In future as more hbh or dest options are defined,
2759  * it may be better to have different routines for hbh and dest
2760  * options as opt_type fields other than IP6OPT_PAD1 and IP6OPT_PADN
2761  * may have same value in different namespaces. Or is it same namespace ??
2762  * Current code checks for each opt_type (other than pads) if it is in
2763  * the expected  nexthdr (hbh or dest)
2764  */
2765 int
ip_process_options_v6(mblk_t * mp,ip6_t * ip6h,uint8_t * optptr,uint_t optlen,uint8_t hdr_type,ip_recv_attr_t * ira)2766 ip_process_options_v6(mblk_t *mp, ip6_t *ip6h,
2767     uint8_t *optptr, uint_t optlen, uint8_t hdr_type, ip_recv_attr_t *ira)
2768 {
2769 	uint8_t opt_type;
2770 	uint_t optused = 0;
2771 	int ret = 0;
2772 	const char *errtype;
2773 	ill_t		*ill = ira->ira_ill;
2774 	ip_stack_t	*ipst = ill->ill_ipst;
2775 
2776 	while (optlen != 0) {
2777 		opt_type = *optptr;
2778 		if (opt_type == IP6OPT_PAD1) {
2779 			optused = 1;
2780 		} else {
2781 			if (optlen < 2)
2782 				goto bad_opt;
2783 			errtype = "malformed";
2784 			if (opt_type == ip6opt_ls) {
2785 				optused = 2 + optptr[1];
2786 				if (optused > optlen)
2787 					goto bad_opt;
2788 			} else switch (opt_type) {
2789 			case IP6OPT_PADN:
2790 				/*
2791 				 * Note:We don't verify that (N-2) pad octets
2792 				 * are zero as required by spec. Adhere to
2793 				 * "be liberal in what you accept..." part of
2794 				 * implementation philosophy (RFC791,RFC1122)
2795 				 */
2796 				optused = 2 + optptr[1];
2797 				if (optused > optlen)
2798 					goto bad_opt;
2799 				break;
2800 
2801 			case IP6OPT_JUMBO:
2802 				if (hdr_type != IPPROTO_HOPOPTS)
2803 					goto opt_error;
2804 				goto opt_error; /* XXX Not implemented! */
2805 
2806 			case IP6OPT_ROUTER_ALERT: {
2807 				struct ip6_opt_router *or;
2808 
2809 				if (hdr_type != IPPROTO_HOPOPTS)
2810 					goto opt_error;
2811 				optused = 2 + optptr[1];
2812 				if (optused > optlen)
2813 					goto bad_opt;
2814 				or = (struct ip6_opt_router *)optptr;
2815 				/* Check total length and alignment */
2816 				if (optused != sizeof (*or) ||
2817 				    ((uintptr_t)or->ip6or_value & 0x1) != 0)
2818 					goto opt_error;
2819 				/* Check value */
2820 				switch (*((uint16_t *)or->ip6or_value)) {
2821 				case IP6_ALERT_MLD:
2822 				case IP6_ALERT_RSVP:
2823 					ret = 1;
2824 				}
2825 				break;
2826 			}
2827 			case IP6OPT_HOME_ADDRESS: {
2828 				/*
2829 				 * Minimal support for the home address option
2830 				 * (which is required by all IPv6 nodes).
2831 				 * Implement by just swapping the home address
2832 				 * and source address.
2833 				 * XXX Note: this has IPsec implications since
2834 				 * AH needs to take this into account.
2835 				 * Also, when IPsec is used we need to ensure
2836 				 * that this is only processed once
2837 				 * in the received packet (to avoid swapping
2838 				 * back and forth).
2839 				 * NOTE:This option processing is considered
2840 				 * to be unsafe and prone to a denial of
2841 				 * service attack.
2842 				 * The current processing is not safe even with
2843 				 * IPsec secured IP packets. Since the home
2844 				 * address option processing requirement still
2845 				 * is in the IETF draft and in the process of
2846 				 * being redefined for its usage, it has been
2847 				 * decided to turn off the option by default.
2848 				 * If this section of code needs to be executed,
2849 				 * ndd variable ip6_ignore_home_address_opt
2850 				 * should be set to 0 at the user's own risk.
2851 				 */
2852 				struct ip6_opt_home_address *oh;
2853 				in6_addr_t tmp;
2854 
2855 				if (ipst->ips_ipv6_ignore_home_address_opt)
2856 					goto opt_error;
2857 
2858 				if (hdr_type != IPPROTO_DSTOPTS)
2859 					goto opt_error;
2860 				optused = 2 + optptr[1];
2861 				if (optused > optlen)
2862 					goto bad_opt;
2863 
2864 				/*
2865 				 * We did this dest. opt the first time
2866 				 * around (i.e. before AH processing).
2867 				 * If we've done AH... stop now.
2868 				 */
2869 				if ((ira->ira_flags & IRAF_IPSEC_SECURE) &&
2870 				    ira->ira_ipsec_ah_sa != NULL)
2871 					break;
2872 
2873 				oh = (struct ip6_opt_home_address *)optptr;
2874 				/* Check total length and alignment */
2875 				if (optused < sizeof (*oh) ||
2876 				    ((uintptr_t)oh->ip6oh_addr & 0x7) != 0)
2877 					goto opt_error;
2878 				/* Swap ip6_src and the home address */
2879 				tmp = ip6h->ip6_src;
2880 				/* XXX Note: only 8 byte alignment option */
2881 				ip6h->ip6_src = *(in6_addr_t *)oh->ip6oh_addr;
2882 				*(in6_addr_t *)oh->ip6oh_addr = tmp;
2883 				break;
2884 			}
2885 
2886 			case IP6OPT_TUNNEL_LIMIT:
2887 				if (hdr_type != IPPROTO_DSTOPTS) {
2888 					goto opt_error;
2889 				}
2890 				optused = 2 + optptr[1];
2891 				if (optused > optlen) {
2892 					goto bad_opt;
2893 				}
2894 				if (optused != 3) {
2895 					goto opt_error;
2896 				}
2897 				break;
2898 
2899 			default:
2900 				errtype = "unknown";
2901 				/* FALLTHROUGH */
2902 			opt_error:
2903 				/* Determine which zone should send error */
2904 				switch (IP6OPT_TYPE(opt_type)) {
2905 				case IP6OPT_TYPE_SKIP:
2906 					optused = 2 + optptr[1];
2907 					if (optused > optlen)
2908 						goto bad_opt;
2909 					ip1dbg(("ip_process_options_v6: %s "
2910 					    "opt 0x%x skipped\n",
2911 					    errtype, opt_type));
2912 					break;
2913 				case IP6OPT_TYPE_DISCARD:
2914 					ip1dbg(("ip_process_options_v6: %s "
2915 					    "opt 0x%x; packet dropped\n",
2916 					    errtype, opt_type));
2917 					BUMP_MIB(ill->ill_ip_mib,
2918 					    ipIfStatsInHdrErrors);
2919 					ip_drop_input("ipIfStatsInHdrErrors",
2920 					    mp, ill);
2921 					freemsg(mp);
2922 					return (-1);
2923 				case IP6OPT_TYPE_ICMP:
2924 					BUMP_MIB(ill->ill_ip_mib,
2925 					    ipIfStatsInHdrErrors);
2926 					ip_drop_input("ipIfStatsInHdrErrors",
2927 					    mp, ill);
2928 					icmp_param_problem_v6(mp,
2929 					    ICMP6_PARAMPROB_OPTION,
2930 					    (uint32_t)(optptr -
2931 					    (uint8_t *)ip6h),
2932 					    B_FALSE, ira);
2933 					return (-1);
2934 				case IP6OPT_TYPE_FORCEICMP:
2935 					BUMP_MIB(ill->ill_ip_mib,
2936 					    ipIfStatsInHdrErrors);
2937 					ip_drop_input("ipIfStatsInHdrErrors",
2938 					    mp, ill);
2939 					icmp_param_problem_v6(mp,
2940 					    ICMP6_PARAMPROB_OPTION,
2941 					    (uint32_t)(optptr -
2942 					    (uint8_t *)ip6h),
2943 					    B_TRUE, ira);
2944 					return (-1);
2945 				default:
2946 					ASSERT(0);
2947 				}
2948 			}
2949 		}
2950 		optlen -= optused;
2951 		optptr += optused;
2952 	}
2953 	return (ret);
2954 
2955 bad_opt:
2956 	/* Determine which zone should send error */
2957 	ip_drop_input("ICMP_PARAM_PROBLEM", mp, ill);
2958 	icmp_param_problem_v6(mp, ICMP6_PARAMPROB_OPTION,
2959 	    (uint32_t)(optptr - (uint8_t *)ip6h),
2960 	    B_FALSE, ira);
2961 	return (-1);
2962 }
2963 
2964 /*
2965  * Process a routing header that is not yet empty.
2966  * Because of RFC 5095, we now reject all route headers.
2967  */
2968 void
ip_process_rthdr(mblk_t * mp,ip6_t * ip6h,ip6_rthdr_t * rth,ip_recv_attr_t * ira)2969 ip_process_rthdr(mblk_t *mp, ip6_t *ip6h, ip6_rthdr_t *rth,
2970     ip_recv_attr_t *ira)
2971 {
2972 	ill_t		*ill = ira->ira_ill;
2973 	ip_stack_t	*ipst = ill->ill_ipst;
2974 
2975 	ASSERT(rth->ip6r_segleft != 0);
2976 
2977 	if (!ipst->ips_ipv6_forward_src_routed) {
2978 		/* XXX Check for source routed out same interface? */
2979 		BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits);
2980 		BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors);
2981 		ip_drop_input("ipIfStatsInAddrErrors", mp, ill);
2982 		freemsg(mp);
2983 		return;
2984 	}
2985 
2986 	ip_drop_input("ICMP_PARAM_PROBLEM", mp, ill);
2987 	icmp_param_problem_v6(mp, ICMP6_PARAMPROB_HEADER,
2988 	    (uint32_t)((uchar_t *)&rth->ip6r_type - (uchar_t *)ip6h),
2989 	    B_FALSE, ira);
2990 }
2991 
2992 /*
2993  * Read side put procedure for IPv6 module.
2994  */
2995 int
ip_rput_v6(queue_t * q,mblk_t * mp)2996 ip_rput_v6(queue_t *q, mblk_t *mp)
2997 {
2998 	ill_t		*ill;
2999 
3000 	ill = (ill_t *)q->q_ptr;
3001 	if (ill->ill_state_flags & (ILL_CONDEMNED | ILL_LL_SUBNET_PENDING)) {
3002 		union DL_primitives *dl;
3003 
3004 		dl = (union DL_primitives *)mp->b_rptr;
3005 		/*
3006 		 * Things are opening or closing - only accept DLPI
3007 		 * ack messages. If the stream is closing and ip_wsrv
3008 		 * has completed, ip_close is out of the qwait, but has
3009 		 * not yet completed qprocsoff. Don't proceed any further
3010 		 * because the ill has been cleaned up and things hanging
3011 		 * off the ill have been freed.
3012 		 */
3013 		if ((mp->b_datap->db_type != M_PCPROTO) ||
3014 		    (dl->dl_primitive == DL_UNITDATA_IND)) {
3015 			inet_freemsg(mp);
3016 			return (0);
3017 		}
3018 	}
3019 	if (DB_TYPE(mp) == M_DATA) {
3020 		struct mac_header_info_s mhi;
3021 
3022 		ip_mdata_to_mhi(ill, mp, &mhi);
3023 		ip_input_v6(ill, NULL, mp, &mhi);
3024 	} else {
3025 		ip_rput_notdata(ill, mp);
3026 	}
3027 	return (0);
3028 }
3029 
3030 /*
3031  * Walk through the IPv6 packet in mp and see if there's an AH header
3032  * in it.  See if the AH header needs to get done before other headers in
3033  * the packet.  (Worker function for ipsec_early_ah_v6().)
3034  */
3035 #define	IPSEC_HDR_DONT_PROCESS	0
3036 #define	IPSEC_HDR_PROCESS	1
3037 #define	IPSEC_MEMORY_ERROR	2 /* or malformed packet */
3038 static int
ipsec_needs_processing_v6(mblk_t * mp,uint8_t * nexthdr)3039 ipsec_needs_processing_v6(mblk_t *mp, uint8_t *nexthdr)
3040 {
3041 	uint_t	length;
3042 	uint_t	ehdrlen;
3043 	uint8_t *whereptr;
3044 	uint8_t *endptr;
3045 	uint8_t *nexthdrp;
3046 	ip6_dest_t *desthdr;
3047 	ip6_rthdr_t *rthdr;
3048 	ip6_t	*ip6h;
3049 
3050 	/*
3051 	 * For now just pullup everything.  In general, the less pullups,
3052 	 * the better, but there's so much squirrelling through anyway,
3053 	 * it's just easier this way.
3054 	 */
3055 	if (!pullupmsg(mp, -1)) {
3056 		return (IPSEC_MEMORY_ERROR);
3057 	}
3058 
3059 	ip6h = (ip6_t *)mp->b_rptr;
3060 	length = IPV6_HDR_LEN;
3061 	whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */
3062 	endptr = mp->b_wptr;
3063 
3064 	/*
3065 	 * We can't just use the argument nexthdr in the place
3066 	 * of nexthdrp becaue we don't dereference nexthdrp
3067 	 * till we confirm whether it is a valid address.
3068 	 */
3069 	nexthdrp = &ip6h->ip6_nxt;
3070 	while (whereptr < endptr) {
3071 		/* Is there enough left for len + nexthdr? */
3072 		if (whereptr + MIN_EHDR_LEN > endptr)
3073 			return (IPSEC_MEMORY_ERROR);
3074 
3075 		switch (*nexthdrp) {
3076 		case IPPROTO_HOPOPTS:
3077 		case IPPROTO_DSTOPTS:
3078 			/* Assumes the headers are identical for hbh and dst */
3079 			desthdr = (ip6_dest_t *)whereptr;
3080 			ehdrlen = 8 * (desthdr->ip6d_len + 1);
3081 			if ((uchar_t *)desthdr +  ehdrlen > endptr)
3082 				return (IPSEC_MEMORY_ERROR);
3083 			/*
3084 			 * Return DONT_PROCESS because the destination
3085 			 * options header may be for each hop in a
3086 			 * routing-header, and we only want AH if we're
3087 			 * finished with routing headers.
3088 			 */
3089 			if (*nexthdrp == IPPROTO_DSTOPTS)
3090 				return (IPSEC_HDR_DONT_PROCESS);
3091 			nexthdrp = &desthdr->ip6d_nxt;
3092 			break;
3093 		case IPPROTO_ROUTING:
3094 			rthdr = (ip6_rthdr_t *)whereptr;
3095 
3096 			/*
3097 			 * If there's more hops left on the routing header,
3098 			 * return now with DON'T PROCESS.
3099 			 */
3100 			if (rthdr->ip6r_segleft > 0)
3101 				return (IPSEC_HDR_DONT_PROCESS);
3102 
3103 			ehdrlen =  8 * (rthdr->ip6r_len + 1);
3104 			if ((uchar_t *)rthdr +  ehdrlen > endptr)
3105 				return (IPSEC_MEMORY_ERROR);
3106 			nexthdrp = &rthdr->ip6r_nxt;
3107 			break;
3108 		case IPPROTO_FRAGMENT:
3109 			/* Wait for reassembly */
3110 			return (IPSEC_HDR_DONT_PROCESS);
3111 		case IPPROTO_AH:
3112 			*nexthdr = IPPROTO_AH;
3113 			return (IPSEC_HDR_PROCESS);
3114 		case IPPROTO_NONE:
3115 			/* No next header means we're finished */
3116 		default:
3117 			return (IPSEC_HDR_DONT_PROCESS);
3118 		}
3119 		length += ehdrlen;
3120 		whereptr += ehdrlen;
3121 	}
3122 	/*
3123 	 * Malformed/truncated packet.
3124 	 */
3125 	return (IPSEC_MEMORY_ERROR);
3126 }
3127 
3128 /*
3129  * Path for AH if options are present.
3130  * Returns NULL if the mblk was consumed.
3131  *
3132  * Sometimes AH needs to be done before other IPv6 headers for security
3133  * reasons.  This function (and its ipsec_needs_processing_v6() above)
3134  * indicates if that is so, and fans out to the appropriate IPsec protocol
3135  * for the datagram passed in.
3136  */
3137 mblk_t *
ipsec_early_ah_v6(mblk_t * mp,ip_recv_attr_t * ira)3138 ipsec_early_ah_v6(mblk_t *mp, ip_recv_attr_t *ira)
3139 {
3140 	uint8_t nexthdr;
3141 	ah_t *ah;
3142 	ill_t		*ill = ira->ira_ill;
3143 	ip_stack_t	*ipst = ill->ill_ipst;
3144 	ipsec_stack_t	*ipss = ipst->ips_netstack->netstack_ipsec;
3145 
3146 	switch (ipsec_needs_processing_v6(mp, &nexthdr)) {
3147 	case IPSEC_MEMORY_ERROR:
3148 		BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
3149 		ip_drop_input("ipIfStatsInDiscards", mp, ill);
3150 		freemsg(mp);
3151 		return (NULL);
3152 	case IPSEC_HDR_DONT_PROCESS:
3153 		return (mp);
3154 	}
3155 
3156 	/* Default means send it to AH! */
3157 	ASSERT(nexthdr == IPPROTO_AH);
3158 
3159 	if (!ipsec_loaded(ipss)) {
3160 		ip_proto_not_sup(mp, ira);
3161 		return (NULL);
3162 	}
3163 
3164 	mp = ipsec_inbound_ah_sa(mp, ira, &ah);
3165 	if (mp == NULL)
3166 		return (NULL);
3167 	ASSERT(ah != NULL);
3168 	ASSERT(ira->ira_flags & IRAF_IPSEC_SECURE);
3169 	ASSERT(ira->ira_ipsec_ah_sa != NULL);
3170 	ASSERT(ira->ira_ipsec_ah_sa->ipsa_input_func != NULL);
3171 	mp = ira->ira_ipsec_ah_sa->ipsa_input_func(mp, ah, ira);
3172 
3173 	if (mp == NULL) {
3174 		/*
3175 		 * Either it failed or is pending. In the former case
3176 		 * ipIfStatsInDiscards was increased.
3177 		 */
3178 		return (NULL);
3179 	}
3180 
3181 	/* we're done with IPsec processing, send it up */
3182 	ip_input_post_ipsec(mp, ira);
3183 	return (NULL);
3184 }
3185 
3186 /*
3187  * Reassemble fragment.
3188  * When it returns a completed message the first mblk will only contain
3189  * the headers prior to the fragment header, with the nexthdr value updated
3190  * to be the header after the fragment header.
3191  */
3192 mblk_t *
ip_input_fragment_v6(mblk_t * mp,ip6_t * ip6h,ip6_frag_t * fraghdr,uint_t remlen,ip_recv_attr_t * ira)3193 ip_input_fragment_v6(mblk_t *mp, ip6_t *ip6h,
3194     ip6_frag_t *fraghdr, uint_t remlen, ip_recv_attr_t *ira)
3195 {
3196 	uint32_t	ident = ntohl(fraghdr->ip6f_ident);
3197 	uint16_t	offset;
3198 	boolean_t	more_frags;
3199 	uint8_t		nexthdr = fraghdr->ip6f_nxt;
3200 	in6_addr_t	*v6dst_ptr;
3201 	in6_addr_t	*v6src_ptr;
3202 	uint_t		end;
3203 	uint_t		hdr_length;
3204 	size_t		count;
3205 	ipf_t		*ipf;
3206 	ipf_t		**ipfp;
3207 	ipfb_t		*ipfb;
3208 	mblk_t		*mp1;
3209 	uint8_t		ecn_info = 0;
3210 	size_t		msg_len;
3211 	mblk_t		*tail_mp;
3212 	mblk_t		*t_mp;
3213 	boolean_t	pruned = B_FALSE;
3214 	uint32_t	sum_val;
3215 	uint16_t	sum_flags;
3216 	ill_t		*ill = ira->ira_ill;
3217 	ip_stack_t	*ipst = ill->ill_ipst;
3218 	uint_t		prev_nexthdr_offset;
3219 	uint8_t		prev_nexthdr;
3220 	uint8_t		*ptr;
3221 	uint32_t	packet_size;
3222 
3223 	/*
3224 	 * We utilize hardware computed checksum info only for UDP since
3225 	 * IP fragmentation is a normal occurence for the protocol.  In
3226 	 * addition, checksum offload support for IP fragments carrying
3227 	 * UDP payload is commonly implemented across network adapters.
3228 	 */
3229 	ASSERT(ira->ira_rill != NULL);
3230 	if (nexthdr == IPPROTO_UDP && dohwcksum &&
3231 	    ILL_HCKSUM_CAPABLE(ira->ira_rill) &&
3232 	    (DB_CKSUMFLAGS(mp) & (HCK_FULLCKSUM | HCK_PARTIALCKSUM))) {
3233 		mblk_t *mp1 = mp->b_cont;
3234 		int32_t len;
3235 
3236 		/* Record checksum information from the packet */
3237 		sum_val = (uint32_t)DB_CKSUM16(mp);
3238 		sum_flags = DB_CKSUMFLAGS(mp);
3239 
3240 		/* fragmented payload offset from beginning of mblk */
3241 		offset = (uint16_t)((uchar_t *)&fraghdr[1] - mp->b_rptr);
3242 
3243 		if ((sum_flags & HCK_PARTIALCKSUM) &&
3244 		    (mp1 == NULL || mp1->b_cont == NULL) &&
3245 		    offset >= DB_CKSUMSTART(mp) &&
3246 		    ((len = offset - DB_CKSUMSTART(mp)) & 1) == 0) {
3247 			uint32_t adj;
3248 			/*
3249 			 * Partial checksum has been calculated by hardware
3250 			 * and attached to the packet; in addition, any
3251 			 * prepended extraneous data is even byte aligned.
3252 			 * If any such data exists, we adjust the checksum;
3253 			 * this would also handle any postpended data.
3254 			 */
3255 			IP_ADJCKSUM_PARTIAL(mp->b_rptr + DB_CKSUMSTART(mp),
3256 			    mp, mp1, len, adj);
3257 
3258 			/* One's complement subtract extraneous checksum */
3259 			if (adj >= sum_val)
3260 				sum_val = ~(adj - sum_val) & 0xFFFF;
3261 			else
3262 				sum_val -= adj;
3263 		}
3264 	} else {
3265 		sum_val = 0;
3266 		sum_flags = 0;
3267 	}
3268 
3269 	/* Clear hardware checksumming flag */
3270 	DB_CKSUMFLAGS(mp) = 0;
3271 
3272 	/*
3273 	 * Determine the offset (from the begining of the IP header)
3274 	 * of the nexthdr value which has IPPROTO_FRAGMENT. We use
3275 	 * this when removing the fragment header from the packet.
3276 	 * This packet consists of the IPv6 header, a potential
3277 	 * hop-by-hop options header, a potential pre-routing-header
3278 	 * destination options header, and a potential routing header.
3279 	 */
3280 	prev_nexthdr_offset = (uint8_t *)&ip6h->ip6_nxt - (uint8_t *)ip6h;
3281 	prev_nexthdr = ip6h->ip6_nxt;
3282 	ptr = (uint8_t *)&ip6h[1];
3283 
3284 	if (prev_nexthdr == IPPROTO_HOPOPTS) {
3285 		ip6_hbh_t	*hbh_hdr;
3286 		uint_t		hdr_len;
3287 
3288 		hbh_hdr = (ip6_hbh_t *)ptr;
3289 		hdr_len = 8 * (hbh_hdr->ip6h_len + 1);
3290 		prev_nexthdr = hbh_hdr->ip6h_nxt;
3291 		prev_nexthdr_offset = (uint8_t *)&hbh_hdr->ip6h_nxt
3292 		    - (uint8_t *)ip6h;
3293 		ptr += hdr_len;
3294 	}
3295 	if (prev_nexthdr == IPPROTO_DSTOPTS) {
3296 		ip6_dest_t	*dest_hdr;
3297 		uint_t		hdr_len;
3298 
3299 		dest_hdr = (ip6_dest_t *)ptr;
3300 		hdr_len = 8 * (dest_hdr->ip6d_len + 1);
3301 		prev_nexthdr = dest_hdr->ip6d_nxt;
3302 		prev_nexthdr_offset = (uint8_t *)&dest_hdr->ip6d_nxt
3303 		    - (uint8_t *)ip6h;
3304 		ptr += hdr_len;
3305 	}
3306 	if (prev_nexthdr == IPPROTO_ROUTING) {
3307 		ip6_rthdr_t	*rthdr;
3308 		uint_t		hdr_len;
3309 
3310 		rthdr = (ip6_rthdr_t *)ptr;
3311 		prev_nexthdr = rthdr->ip6r_nxt;
3312 		prev_nexthdr_offset = (uint8_t *)&rthdr->ip6r_nxt
3313 		    - (uint8_t *)ip6h;
3314 		hdr_len = 8 * (rthdr->ip6r_len + 1);
3315 		ptr += hdr_len;
3316 	}
3317 	if (prev_nexthdr != IPPROTO_FRAGMENT) {
3318 		/* Can't handle other headers before the fragment header */
3319 		BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors);
3320 		ip_drop_input("ipIfStatsInHdrErrors", mp, ill);
3321 		freemsg(mp);
3322 		return (NULL);
3323 	}
3324 
3325 	/*
3326 	 * Note: Fragment offset in header is in 8-octet units.
3327 	 * Clearing least significant 3 bits not only extracts
3328 	 * it but also gets it in units of octets.
3329 	 */
3330 	offset = ntohs(fraghdr->ip6f_offlg) & ~7;
3331 	more_frags = (fraghdr->ip6f_offlg & IP6F_MORE_FRAG);
3332 
3333 	/*
3334 	 * Is the more frags flag on and the payload length not a multiple
3335 	 * of eight?
3336 	 */
3337 	if (more_frags && (ntohs(ip6h->ip6_plen) & 7)) {
3338 		ip_drop_input("ICMP_PARAM_PROBLEM", mp, ill);
3339 		icmp_param_problem_v6(mp, ICMP6_PARAMPROB_HEADER,
3340 		    (uint32_t)((char *)&ip6h->ip6_plen -
3341 		    (char *)ip6h), B_FALSE, ira);
3342 		return (NULL);
3343 	}
3344 
3345 	v6src_ptr = &ip6h->ip6_src;
3346 	v6dst_ptr = &ip6h->ip6_dst;
3347 	end = remlen;
3348 
3349 	hdr_length = (uint_t)((char *)&fraghdr[1] - (char *)ip6h);
3350 	end += offset;
3351 
3352 	/*
3353 	 * Would fragment cause reassembled packet to have a payload length
3354 	 * greater than IP_MAXPACKET - the max payload size?
3355 	 */
3356 	if (end > IP_MAXPACKET) {
3357 		BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors);
3358 		ip_drop_input("Reassembled packet too large", mp, ill);
3359 		icmp_param_problem_v6(mp, ICMP6_PARAMPROB_HEADER,
3360 		    (uint32_t)((char *)&fraghdr->ip6f_offlg -
3361 		    (char *)ip6h), B_FALSE, ira);
3362 		return (NULL);
3363 	}
3364 
3365 	/*
3366 	 * This packet just has one fragment. Reassembly not
3367 	 * needed.
3368 	 */
3369 	if (!more_frags && offset == 0) {
3370 		goto reass_done;
3371 	}
3372 
3373 	/*
3374 	 * Drop the fragmented as early as possible, if
3375 	 * we don't have resource(s) to re-assemble.
3376 	 */
3377 	if (ipst->ips_ip_reass_queue_bytes == 0) {
3378 		freemsg(mp);
3379 		return (NULL);
3380 	}
3381 
3382 	/* Record the ECN field info. */
3383 	ecn_info = (uint8_t)(ntohl(ip6h->ip6_vcf & htonl(~0xFFCFFFFF)) >> 20);
3384 	/*
3385 	 * If this is not the first fragment, dump the unfragmentable
3386 	 * portion of the packet.
3387 	 */
3388 	if (offset)
3389 		mp->b_rptr = (uchar_t *)&fraghdr[1];
3390 
3391 	/*
3392 	 * Fragmentation reassembly.  Each ILL has a hash table for
3393 	 * queueing packets undergoing reassembly for all IPIFs
3394 	 * associated with the ILL.  The hash is based on the packet
3395 	 * IP ident field.  The ILL frag hash table was allocated
3396 	 * as a timer block at the time the ILL was created.  Whenever
3397 	 * there is anything on the reassembly queue, the timer will
3398 	 * be running.
3399 	 */
3400 	/* Handle vnic loopback of fragments */
3401 	if (mp->b_datap->db_ref > 2)
3402 		msg_len = 0;
3403 	else
3404 		msg_len = MBLKSIZE(mp);
3405 
3406 	tail_mp = mp;
3407 	while (tail_mp->b_cont != NULL) {
3408 		tail_mp = tail_mp->b_cont;
3409 		if (tail_mp->b_datap->db_ref <= 2)
3410 			msg_len += MBLKSIZE(tail_mp);
3411 	}
3412 	/*
3413 	 * If the reassembly list for this ILL will get too big
3414 	 * prune it.
3415 	 */
3416 
3417 	if ((msg_len + sizeof (*ipf) + ill->ill_frag_count) >=
3418 	    ipst->ips_ip_reass_queue_bytes) {
3419 		DTRACE_PROBE3(ip_reass_queue_bytes, uint_t, msg_len,
3420 		    uint_t, ill->ill_frag_count,
3421 		    uint_t, ipst->ips_ip_reass_queue_bytes);
3422 		ill_frag_prune(ill,
3423 		    (ipst->ips_ip_reass_queue_bytes < msg_len) ? 0 :
3424 		    (ipst->ips_ip_reass_queue_bytes - msg_len));
3425 		pruned = B_TRUE;
3426 	}
3427 
3428 	ipfb = &ill->ill_frag_hash_tbl[ILL_FRAG_HASH_V6(*v6src_ptr, ident)];
3429 	mutex_enter(&ipfb->ipfb_lock);
3430 
3431 	ipfp = &ipfb->ipfb_ipf;
3432 	/* Try to find an existing fragment queue for this packet. */
3433 	for (;;) {
3434 		ipf = ipfp[0];
3435 		if (ipf) {
3436 			/*
3437 			 * It has to match on ident, source address, and
3438 			 * dest address.
3439 			 */
3440 			if (ipf->ipf_ident == ident &&
3441 			    IN6_ARE_ADDR_EQUAL(&ipf->ipf_v6src, v6src_ptr) &&
3442 			    IN6_ARE_ADDR_EQUAL(&ipf->ipf_v6dst, v6dst_ptr)) {
3443 
3444 				/*
3445 				 * If we have received too many
3446 				 * duplicate fragments for this packet
3447 				 * free it.
3448 				 */
3449 				if (ipf->ipf_num_dups > ip_max_frag_dups) {
3450 					ill_frag_free_pkts(ill, ipfb, ipf, 1);
3451 					freemsg(mp);
3452 					mutex_exit(&ipfb->ipfb_lock);
3453 					return (NULL);
3454 				}
3455 
3456 				break;
3457 			}
3458 			ipfp = &ipf->ipf_hash_next;
3459 			continue;
3460 		}
3461 
3462 
3463 		/*
3464 		 * If we pruned the list, do we want to store this new
3465 		 * fragment?. We apply an optimization here based on the