xref: /illumos-gate/usr/src/uts/common/inet/ip/conn_opt.c (revision 1cb875ae)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 /* Copyright (c) 1990 Mentat Inc. */
27 
28 #include <sys/types.h>
29 #include <sys/stream.h>
30 #include <sys/strsun.h>
31 #define	_SUN_TPI_VERSION 2
32 #include <sys/tihdr.h>
33 #include <sys/xti_inet.h>
34 #include <sys/ucred.h>
35 #include <sys/zone.h>
36 #include <sys/ddi.h>
37 #include <sys/sunddi.h>
38 #include <sys/cmn_err.h>
39 #include <sys/debug.h>
40 #include <sys/atomic.h>
41 #include <sys/policy.h>
42 
43 #include <sys/systm.h>
44 #include <sys/param.h>
45 #include <sys/kmem.h>
46 #include <sys/sdt.h>
47 #include <sys/socket.h>
48 #include <sys/ethernet.h>
49 #include <sys/mac.h>
50 #include <net/if.h>
51 #include <net/if_types.h>
52 #include <net/if_arp.h>
53 #include <net/route.h>
54 #include <sys/sockio.h>
55 #include <netinet/in.h>
56 #include <net/if_dl.h>
57 
58 #include <inet/common.h>
59 #include <inet/mi.h>
60 #include <inet/mib2.h>
61 #include <inet/nd.h>
62 #include <inet/arp.h>
63 #include <inet/snmpcom.h>
64 #include <inet/kstatcom.h>
65 
66 #include <netinet/igmp_var.h>
67 #include <netinet/ip6.h>
68 #include <netinet/icmp6.h>
69 #include <netinet/sctp.h>
70 
71 #include <inet/ip.h>
72 #include <inet/ip_impl.h>
73 #include <inet/ip6.h>
74 #include <inet/ip6_asp.h>
75 #include <inet/tcp.h>
76 #include <inet/ip_multi.h>
77 #include <inet/ip_if.h>
78 #include <inet/ip_ire.h>
79 #include <inet/ip_ftable.h>
80 #include <inet/ip_rts.h>
81 #include <inet/optcom.h>
82 #include <inet/ip_ndp.h>
83 #include <inet/ip_listutils.h>
84 #include <netinet/igmp.h>
85 #include <netinet/ip_mroute.h>
86 #include <netinet/udp.h>
87 #include <inet/ipp_common.h>
88 
89 #include <net/pfkeyv2.h>
90 #include <inet/sadb.h>
91 #include <inet/ipsec_impl.h>
92 #include <inet/ipdrop.h>
93 #include <inet/ip_netinfo.h>
94 
95 #include <inet/ipclassifier.h>
96 #include <inet/sctp_ip.h>
97 #include <inet/sctp/sctp_impl.h>
98 #include <inet/udp_impl.h>
99 #include <sys/sunddi.h>
100 
101 #include <sys/tsol/label.h>
102 #include <sys/tsol/tnet.h>
103 
104 static	sin_t	sin_null;	/* Zero address for quick clears */
105 static	sin6_t	sin6_null;	/* Zero address for quick clears */
106 
107 /*
108  * Return how much size is needed for the different ancillary data items
109  */
110 uint_t
111 conn_recvancillary_size(conn_t *connp, crb_t recv_ancillary,
112     ip_recv_attr_t *ira, mblk_t *mp, ip_pkt_t *ipp)
113 {
114 	uint_t		ancil_size;
115 	ip_stack_t	*ipst = connp->conn_netstack->netstack_ip;
116 
117 	/*
118 	 * If IP_RECVDSTADDR is set we include the destination IP
119 	 * address as an option. With IP_RECVOPTS we include all
120 	 * the IP options.
121 	 */
122 	ancil_size = 0;
123 	if (recv_ancillary.crb_recvdstaddr &&
124 	    (ira->ira_flags & IRAF_IS_IPV4)) {
125 		ancil_size += sizeof (struct T_opthdr) +
126 		    sizeof (struct in_addr);
127 		IP_STAT(ipst, conn_in_recvdstaddr);
128 	}
129 
130 	/*
131 	 * ip_recvpktinfo is used for both AF_INET and AF_INET6 but
132 	 * are different
133 	 */
134 	if (recv_ancillary.crb_ip_recvpktinfo &&
135 	    connp->conn_family == AF_INET) {
136 		ancil_size += sizeof (struct T_opthdr) +
137 		    sizeof (struct in_pktinfo);
138 		IP_STAT(ipst, conn_in_recvpktinfo);
139 	}
140 
141 	if ((recv_ancillary.crb_recvopts) &&
142 	    (ipp->ipp_fields & IPPF_IPV4_OPTIONS)) {
143 		ancil_size += sizeof (struct T_opthdr) +
144 		    ipp->ipp_ipv4_options_len;
145 		IP_STAT(ipst, conn_in_recvopts);
146 	}
147 
148 	if (recv_ancillary.crb_recvslla) {
149 		ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
150 		ill_t *ill;
151 
152 		/* Make sure ira_l2src is setup if not already */
153 		if (!(ira->ira_flags & IRAF_L2SRC_SET)) {
154 			ill = ill_lookup_on_ifindex(ira->ira_rifindex, B_FALSE,
155 			    ipst);
156 			if (ill != NULL) {
157 				ip_setl2src(mp, ira, ill);
158 				ill_refrele(ill);
159 			}
160 		}
161 		ancil_size += sizeof (struct T_opthdr) +
162 		    sizeof (struct sockaddr_dl);
163 		IP_STAT(ipst, conn_in_recvslla);
164 	}
165 
166 	if (recv_ancillary.crb_recvif) {
167 		ancil_size += sizeof (struct T_opthdr) + sizeof (uint_t);
168 		IP_STAT(ipst, conn_in_recvif);
169 	}
170 
171 	/*
172 	 * ip_recvpktinfo is used for both AF_INET and AF_INET6 but
173 	 * are different
174 	 */
175 	if (recv_ancillary.crb_ip_recvpktinfo &&
176 	    connp->conn_family == AF_INET6) {
177 		ancil_size += sizeof (struct T_opthdr) +
178 		    sizeof (struct in6_pktinfo);
179 		IP_STAT(ipst, conn_in_recvpktinfo);
180 	}
181 
182 	if (recv_ancillary.crb_ipv6_recvhoplimit) {
183 		ancil_size += sizeof (struct T_opthdr) + sizeof (int);
184 		IP_STAT(ipst, conn_in_recvhoplimit);
185 	}
186 
187 	if (recv_ancillary.crb_ipv6_recvtclass) {
188 		ancil_size += sizeof (struct T_opthdr) + sizeof (int);
189 		IP_STAT(ipst, conn_in_recvtclass);
190 	}
191 
192 	if (recv_ancillary.crb_ipv6_recvhopopts &&
193 	    (ipp->ipp_fields & IPPF_HOPOPTS)) {
194 		ancil_size += sizeof (struct T_opthdr) + ipp->ipp_hopoptslen;
195 		IP_STAT(ipst, conn_in_recvhopopts);
196 	}
197 	/*
198 	 * To honor RFC3542 when an application asks for both IPV6_RECVDSTOPTS
199 	 * and IPV6_RECVRTHDR, we pass up the item rthdrdstopts (the destination
200 	 * options that appear before a routing header.
201 	 * We also pass them up if IPV6_RECVRTHDRDSTOPTS is set.
202 	 */
203 	if (ipp->ipp_fields & IPPF_RTHDRDSTOPTS) {
204 		if (recv_ancillary.crb_ipv6_recvrthdrdstopts ||
205 		    (recv_ancillary.crb_ipv6_recvdstopts &&
206 		    recv_ancillary.crb_ipv6_recvrthdr)) {
207 			ancil_size += sizeof (struct T_opthdr) +
208 			    ipp->ipp_rthdrdstoptslen;
209 			IP_STAT(ipst, conn_in_recvrthdrdstopts);
210 		}
211 	}
212 	if ((recv_ancillary.crb_ipv6_recvrthdr) &&
213 	    (ipp->ipp_fields & IPPF_RTHDR)) {
214 		ancil_size += sizeof (struct T_opthdr) + ipp->ipp_rthdrlen;
215 		IP_STAT(ipst, conn_in_recvrthdr);
216 	}
217 	if ((recv_ancillary.crb_ipv6_recvdstopts ||
218 	    recv_ancillary.crb_old_ipv6_recvdstopts) &&
219 	    (ipp->ipp_fields & IPPF_DSTOPTS)) {
220 		ancil_size += sizeof (struct T_opthdr) + ipp->ipp_dstoptslen;
221 		IP_STAT(ipst, conn_in_recvdstopts);
222 	}
223 	if (recv_ancillary.crb_recvucred && ira->ira_cred != NULL) {
224 		ancil_size += sizeof (struct T_opthdr) + ucredsize;
225 		IP_STAT(ipst, conn_in_recvucred);
226 	}
227 
228 	/*
229 	 * If SO_TIMESTAMP is set allocate the appropriate sized
230 	 * buffer. Since gethrestime() expects a pointer aligned
231 	 * argument, we allocate space necessary for extra
232 	 * alignment (even though it might not be used).
233 	 */
234 	if (recv_ancillary.crb_timestamp) {
235 		ancil_size += sizeof (struct T_opthdr) +
236 		    sizeof (timestruc_t) + _POINTER_ALIGNMENT;
237 		IP_STAT(ipst, conn_in_timestamp);
238 	}
239 
240 	/*
241 	 * If IP_RECVTTL is set allocate the appropriate sized buffer
242 	 */
243 	if (recv_ancillary.crb_recvttl &&
244 	    (ira->ira_flags & IRAF_IS_IPV4)) {
245 		ancil_size += sizeof (struct T_opthdr) + sizeof (uint8_t);
246 		IP_STAT(ipst, conn_in_recvttl);
247 	}
248 
249 	return (ancil_size);
250 }
251 
252 /*
253  * Lay down the ancillary data items at "ancil_buf".
254  * Assumes caller has used conn_recvancillary_size to allocate a sufficiently
255  * large buffer - ancil_size.
256  */
257 void
258 conn_recvancillary_add(conn_t *connp, crb_t recv_ancillary,
259     ip_recv_attr_t *ira, ip_pkt_t *ipp, uchar_t *ancil_buf, uint_t ancil_size)
260 {
261 	/*
262 	 * Copy in destination address before options to avoid
263 	 * any padding issues.
264 	 */
265 	if (recv_ancillary.crb_recvdstaddr &&
266 	    (ira->ira_flags & IRAF_IS_IPV4)) {
267 		struct T_opthdr *toh;
268 		ipaddr_t *dstptr;
269 
270 		toh = (struct T_opthdr *)ancil_buf;
271 		toh->level = IPPROTO_IP;
272 		toh->name = IP_RECVDSTADDR;
273 		toh->len = sizeof (struct T_opthdr) + sizeof (ipaddr_t);
274 		toh->status = 0;
275 		ancil_buf += sizeof (struct T_opthdr);
276 		dstptr = (ipaddr_t *)ancil_buf;
277 		*dstptr = ipp->ipp_addr_v4;
278 		ancil_buf += sizeof (ipaddr_t);
279 		ancil_size -= toh->len;
280 	}
281 
282 	/*
283 	 * ip_recvpktinfo is used for both AF_INET and AF_INET6 but
284 	 * are different
285 	 */
286 	if (recv_ancillary.crb_ip_recvpktinfo &&
287 	    connp->conn_family == AF_INET) {
288 		ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
289 		struct T_opthdr *toh;
290 		struct in_pktinfo *pktinfop;
291 		ill_t *ill;
292 		ipif_t *ipif;
293 
294 		toh = (struct T_opthdr *)ancil_buf;
295 		toh->level = IPPROTO_IP;
296 		toh->name = IP_PKTINFO;
297 		toh->len = sizeof (struct T_opthdr) + sizeof (*pktinfop);
298 		toh->status = 0;
299 		ancil_buf += sizeof (struct T_opthdr);
300 		pktinfop = (struct in_pktinfo *)ancil_buf;
301 
302 		pktinfop->ipi_ifindex = ira->ira_ruifindex;
303 		pktinfop->ipi_spec_dst.s_addr = INADDR_ANY;
304 
305 		/* Find a good address to report */
306 		ill = ill_lookup_on_ifindex(ira->ira_ruifindex, B_FALSE, ipst);
307 		if (ill != NULL) {
308 			ipif = ipif_good_addr(ill, IPCL_ZONEID(connp));
309 			if (ipif != NULL) {
310 				pktinfop->ipi_spec_dst.s_addr =
311 				    ipif->ipif_lcl_addr;
312 				ipif_refrele(ipif);
313 			}
314 			ill_refrele(ill);
315 		}
316 		pktinfop->ipi_addr.s_addr = ipp->ipp_addr_v4;
317 		ancil_buf += sizeof (struct in_pktinfo);
318 		ancil_size -= toh->len;
319 	}
320 
321 	if ((recv_ancillary.crb_recvopts) &&
322 	    (ipp->ipp_fields & IPPF_IPV4_OPTIONS)) {
323 		struct T_opthdr *toh;
324 
325 		toh = (struct T_opthdr *)ancil_buf;
326 		toh->level = IPPROTO_IP;
327 		toh->name = IP_RECVOPTS;
328 		toh->len = sizeof (struct T_opthdr) + ipp->ipp_ipv4_options_len;
329 		toh->status = 0;
330 		ancil_buf += sizeof (struct T_opthdr);
331 		bcopy(ipp->ipp_ipv4_options, ancil_buf,
332 		    ipp->ipp_ipv4_options_len);
333 		ancil_buf += ipp->ipp_ipv4_options_len;
334 		ancil_size -= toh->len;
335 	}
336 
337 	if (recv_ancillary.crb_recvslla) {
338 		ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
339 		struct T_opthdr *toh;
340 		struct sockaddr_dl *dstptr;
341 		ill_t *ill;
342 		int alen = 0;
343 
344 		ill = ill_lookup_on_ifindex(ira->ira_rifindex, B_FALSE, ipst);
345 		if (ill != NULL)
346 			alen = ill->ill_phys_addr_length;
347 
348 		/*
349 		 * For loopback multicast and broadcast the packet arrives
350 		 * with ira_ruifdex being the physical interface, but
351 		 * ira_l2src is all zero since ip_postfrag_loopback doesn't
352 		 * know our l2src. We don't report the address in that case.
353 		 */
354 		if (ira->ira_flags & IRAF_LOOPBACK)
355 			alen = 0;
356 
357 		toh = (struct T_opthdr *)ancil_buf;
358 		toh->level = IPPROTO_IP;
359 		toh->name = IP_RECVSLLA;
360 		toh->len = sizeof (struct T_opthdr) +
361 		    sizeof (struct sockaddr_dl);
362 		toh->status = 0;
363 		ancil_buf += sizeof (struct T_opthdr);
364 		dstptr = (struct sockaddr_dl *)ancil_buf;
365 		dstptr->sdl_family = AF_LINK;
366 		dstptr->sdl_index = ira->ira_ruifindex;
367 		if (ill != NULL)
368 			dstptr->sdl_type = ill->ill_type;
369 		else
370 			dstptr->sdl_type = 0;
371 		dstptr->sdl_nlen = 0;
372 		dstptr->sdl_alen = alen;
373 		dstptr->sdl_slen = 0;
374 		bcopy(ira->ira_l2src, dstptr->sdl_data, alen);
375 		ancil_buf += sizeof (struct sockaddr_dl);
376 		ancil_size -= toh->len;
377 		if (ill != NULL)
378 			ill_refrele(ill);
379 	}
380 
381 	if (recv_ancillary.crb_recvif) {
382 		struct T_opthdr *toh;
383 		uint_t		*dstptr;
384 
385 		toh = (struct T_opthdr *)ancil_buf;
386 		toh->level = IPPROTO_IP;
387 		toh->name = IP_RECVIF;
388 		toh->len = sizeof (struct T_opthdr) + sizeof (uint_t);
389 		toh->status = 0;
390 		ancil_buf += sizeof (struct T_opthdr);
391 		dstptr = (uint_t *)ancil_buf;
392 		*dstptr = ira->ira_ruifindex;
393 		ancil_buf += sizeof (uint_t);
394 		ancil_size -= toh->len;
395 	}
396 
397 	/*
398 	 * ip_recvpktinfo is used for both AF_INET and AF_INET6 but
399 	 * are different
400 	 */
401 	if (recv_ancillary.crb_ip_recvpktinfo &&
402 	    connp->conn_family == AF_INET6) {
403 		struct T_opthdr *toh;
404 		struct in6_pktinfo *pkti;
405 
406 		toh = (struct T_opthdr *)ancil_buf;
407 		toh->level = IPPROTO_IPV6;
408 		toh->name = IPV6_PKTINFO;
409 		toh->len = sizeof (struct T_opthdr) + sizeof (*pkti);
410 		toh->status = 0;
411 		ancil_buf += sizeof (struct T_opthdr);
412 		pkti = (struct in6_pktinfo *)ancil_buf;
413 		if (ira->ira_flags & IRAF_IS_IPV4) {
414 			IN6_IPADDR_TO_V4MAPPED(ipp->ipp_addr_v4,
415 			    &pkti->ipi6_addr);
416 		} else {
417 			pkti->ipi6_addr = ipp->ipp_addr;
418 		}
419 		pkti->ipi6_ifindex = ira->ira_ruifindex;
420 
421 		ancil_buf += sizeof (*pkti);
422 		ancil_size -= toh->len;
423 	}
424 	if (recv_ancillary.crb_ipv6_recvhoplimit) {
425 		struct T_opthdr *toh;
426 
427 		toh = (struct T_opthdr *)ancil_buf;
428 		toh->level = IPPROTO_IPV6;
429 		toh->name = IPV6_HOPLIMIT;
430 		toh->len = sizeof (struct T_opthdr) + sizeof (uint_t);
431 		toh->status = 0;
432 		ancil_buf += sizeof (struct T_opthdr);
433 		*(uint_t *)ancil_buf = ipp->ipp_hoplimit;
434 		ancil_buf += sizeof (uint_t);
435 		ancil_size -= toh->len;
436 	}
437 	if (recv_ancillary.crb_ipv6_recvtclass) {
438 		struct T_opthdr *toh;
439 
440 		toh = (struct T_opthdr *)ancil_buf;
441 		toh->level = IPPROTO_IPV6;
442 		toh->name = IPV6_TCLASS;
443 		toh->len = sizeof (struct T_opthdr) + sizeof (uint_t);
444 		toh->status = 0;
445 		ancil_buf += sizeof (struct T_opthdr);
446 
447 		if (ira->ira_flags & IRAF_IS_IPV4)
448 			*(uint_t *)ancil_buf = ipp->ipp_type_of_service;
449 		else
450 			*(uint_t *)ancil_buf = ipp->ipp_tclass;
451 		ancil_buf += sizeof (uint_t);
452 		ancil_size -= toh->len;
453 	}
454 	if (recv_ancillary.crb_ipv6_recvhopopts &&
455 	    (ipp->ipp_fields & IPPF_HOPOPTS)) {
456 		struct T_opthdr *toh;
457 
458 		toh = (struct T_opthdr *)ancil_buf;
459 		toh->level = IPPROTO_IPV6;
460 		toh->name = IPV6_HOPOPTS;
461 		toh->len = sizeof (struct T_opthdr) + ipp->ipp_hopoptslen;
462 		toh->status = 0;
463 		ancil_buf += sizeof (struct T_opthdr);
464 		bcopy(ipp->ipp_hopopts, ancil_buf, ipp->ipp_hopoptslen);
465 		ancil_buf += ipp->ipp_hopoptslen;
466 		ancil_size -= toh->len;
467 	}
468 	/*
469 	 * To honor RFC3542 when an application asks for both IPV6_RECVDSTOPTS
470 	 * and IPV6_RECVRTHDR, we pass up the item rthdrdstopts (the destination
471 	 * options that appear before a routing header.
472 	 * We also pass them up if IPV6_RECVRTHDRDSTOPTS is set.
473 	 */
474 	if (ipp->ipp_fields & IPPF_RTHDRDSTOPTS) {
475 		if (recv_ancillary.crb_ipv6_recvrthdrdstopts ||
476 		    (recv_ancillary.crb_ipv6_recvdstopts &&
477 		    recv_ancillary.crb_ipv6_recvrthdr)) {
478 			struct T_opthdr *toh;
479 
480 			toh = (struct T_opthdr *)ancil_buf;
481 			toh->level = IPPROTO_IPV6;
482 			toh->name = IPV6_DSTOPTS;
483 			toh->len = sizeof (struct T_opthdr) +
484 			    ipp->ipp_rthdrdstoptslen;
485 			toh->status = 0;
486 			ancil_buf += sizeof (struct T_opthdr);
487 			bcopy(ipp->ipp_rthdrdstopts, ancil_buf,
488 			    ipp->ipp_rthdrdstoptslen);
489 			ancil_buf += ipp->ipp_rthdrdstoptslen;
490 			ancil_size -= toh->len;
491 		}
492 	}
493 	if (recv_ancillary.crb_ipv6_recvrthdr &&
494 	    (ipp->ipp_fields & IPPF_RTHDR)) {
495 		struct T_opthdr *toh;
496 
497 		toh = (struct T_opthdr *)ancil_buf;
498 		toh->level = IPPROTO_IPV6;
499 		toh->name = IPV6_RTHDR;
500 		toh->len = sizeof (struct T_opthdr) + ipp->ipp_rthdrlen;
501 		toh->status = 0;
502 		ancil_buf += sizeof (struct T_opthdr);
503 		bcopy(ipp->ipp_rthdr, ancil_buf, ipp->ipp_rthdrlen);
504 		ancil_buf += ipp->ipp_rthdrlen;
505 		ancil_size -= toh->len;
506 	}
507 	if ((recv_ancillary.crb_ipv6_recvdstopts ||
508 	    recv_ancillary.crb_old_ipv6_recvdstopts) &&
509 	    (ipp->ipp_fields & IPPF_DSTOPTS)) {
510 		struct T_opthdr *toh;
511 
512 		toh = (struct T_opthdr *)ancil_buf;
513 		toh->level = IPPROTO_IPV6;
514 		toh->name = IPV6_DSTOPTS;
515 		toh->len = sizeof (struct T_opthdr) + ipp->ipp_dstoptslen;
516 		toh->status = 0;
517 		ancil_buf += sizeof (struct T_opthdr);
518 		bcopy(ipp->ipp_dstopts, ancil_buf, ipp->ipp_dstoptslen);
519 		ancil_buf += ipp->ipp_dstoptslen;
520 		ancil_size -= toh->len;
521 	}
522 
523 	if (recv_ancillary.crb_recvucred && ira->ira_cred != NULL) {
524 		struct T_opthdr *toh;
525 		cred_t		*rcr = connp->conn_cred;
526 
527 		toh = (struct T_opthdr *)ancil_buf;
528 		toh->level = SOL_SOCKET;
529 		toh->name = SCM_UCRED;
530 		toh->len = sizeof (struct T_opthdr) + ucredsize;
531 		toh->status = 0;
532 		(void) cred2ucred(ira->ira_cred, ira->ira_cpid, &toh[1], rcr);
533 		ancil_buf += toh->len;
534 		ancil_size -= toh->len;
535 	}
536 	if (recv_ancillary.crb_timestamp) {
537 		struct	T_opthdr *toh;
538 
539 		toh = (struct T_opthdr *)ancil_buf;
540 		toh->level = SOL_SOCKET;
541 		toh->name = SCM_TIMESTAMP;
542 		toh->len = sizeof (struct T_opthdr) +
543 		    sizeof (timestruc_t) + _POINTER_ALIGNMENT;
544 		toh->status = 0;
545 		ancil_buf += sizeof (struct T_opthdr);
546 		/* Align for gethrestime() */
547 		ancil_buf = (uchar_t *)P2ROUNDUP((intptr_t)ancil_buf,
548 		    sizeof (intptr_t));
549 		gethrestime((timestruc_t *)ancil_buf);
550 		ancil_buf = (uchar_t *)toh + toh->len;
551 		ancil_size -= toh->len;
552 	}
553 
554 	/*
555 	 * CAUTION:
556 	 * Due to aligment issues
557 	 * Processing of IP_RECVTTL option
558 	 * should always be the last. Adding
559 	 * any option processing after this will
560 	 * cause alignment panic.
561 	 */
562 	if (recv_ancillary.crb_recvttl &&
563 	    (ira->ira_flags & IRAF_IS_IPV4)) {
564 		struct	T_opthdr *toh;
565 		uint8_t	*dstptr;
566 
567 		toh = (struct T_opthdr *)ancil_buf;
568 		toh->level = IPPROTO_IP;
569 		toh->name = IP_RECVTTL;
570 		toh->len = sizeof (struct T_opthdr) + sizeof (uint8_t);
571 		toh->status = 0;
572 		ancil_buf += sizeof (struct T_opthdr);
573 		dstptr = (uint8_t *)ancil_buf;
574 		*dstptr = ipp->ipp_hoplimit;
575 		ancil_buf += sizeof (uint8_t);
576 		ancil_size -= toh->len;
577 	}
578 
579 	/* Consumed all of allocated space */
580 	ASSERT(ancil_size == 0);
581 
582 }
583 
584 /*
585  * This routine retrieves the current status of socket options.
586  * It returns the size of the option retrieved, or -1.
587  */
588 int
589 conn_opt_get(conn_opt_arg_t *coa, t_scalar_t level, t_scalar_t name,
590     uchar_t *ptr)
591 {
592 	int		*i1 = (int *)ptr;
593 	conn_t		*connp = coa->coa_connp;
594 	ip_xmit_attr_t	*ixa = coa->coa_ixa;
595 	ip_pkt_t	*ipp = coa->coa_ipp;
596 	ip_stack_t	*ipst = ixa->ixa_ipst;
597 	uint_t		len;
598 
599 	ASSERT(MUTEX_HELD(&coa->coa_connp->conn_lock));
600 
601 	switch (level) {
602 	case SOL_SOCKET:
603 		switch (name) {
604 		case SO_DEBUG:
605 			*i1 = connp->conn_debug ? SO_DEBUG : 0;
606 			break;	/* goto sizeof (int) option return */
607 		case SO_KEEPALIVE:
608 			*i1 = connp->conn_keepalive ? SO_KEEPALIVE : 0;
609 			break;
610 		case SO_LINGER:	{
611 			struct linger *lgr = (struct linger *)ptr;
612 
613 			lgr->l_onoff = connp->conn_linger ? SO_LINGER : 0;
614 			lgr->l_linger = connp->conn_lingertime;
615 			}
616 			return (sizeof (struct linger));
617 
618 		case SO_OOBINLINE:
619 			*i1 = connp->conn_oobinline ? SO_OOBINLINE : 0;
620 			break;
621 		case SO_REUSEADDR:
622 			*i1 = connp->conn_reuseaddr ? SO_REUSEADDR : 0;
623 			break;	/* goto sizeof (int) option return */
624 		case SO_TYPE:
625 			*i1 = connp->conn_so_type;
626 			break;	/* goto sizeof (int) option return */
627 		case SO_DONTROUTE:
628 			*i1 = (ixa->ixa_flags & IXAF_DONTROUTE) ?
629 			    SO_DONTROUTE : 0;
630 			break;	/* goto sizeof (int) option return */
631 		case SO_USELOOPBACK:
632 			*i1 = connp->conn_useloopback ? SO_USELOOPBACK : 0;
633 			break;	/* goto sizeof (int) option return */
634 		case SO_BROADCAST:
635 			*i1 = connp->conn_broadcast ? SO_BROADCAST : 0;
636 			break;	/* goto sizeof (int) option return */
637 
638 		case SO_SNDBUF:
639 			*i1 = connp->conn_sndbuf;
640 			break;	/* goto sizeof (int) option return */
641 		case SO_RCVBUF:
642 			*i1 = connp->conn_rcvbuf;
643 			break;	/* goto sizeof (int) option return */
644 		case SO_RCVTIMEO:
645 		case SO_SNDTIMEO:
646 			/*
647 			 * Pass these two options in order for third part
648 			 * protocol usage. Here just return directly.
649 			 */
650 			*i1 = 0;
651 			break;
652 		case SO_DGRAM_ERRIND:
653 			*i1 = connp->conn_dgram_errind ? SO_DGRAM_ERRIND : 0;
654 			break;	/* goto sizeof (int) option return */
655 		case SO_RECVUCRED:
656 			*i1 = connp->conn_recv_ancillary.crb_recvucred;
657 			break;	/* goto sizeof (int) option return */
658 		case SO_TIMESTAMP:
659 			*i1 = connp->conn_recv_ancillary.crb_timestamp;
660 			break;	/* goto sizeof (int) option return */
661 		case SO_VRRP:
662 			*i1 = connp->conn_isvrrp;
663 			break;	/* goto sizeof (int) option return */
664 		case SO_ANON_MLP:
665 			*i1 = connp->conn_anon_mlp;
666 			break;	/* goto sizeof (int) option return */
667 		case SO_MAC_EXEMPT:
668 			*i1 = (connp->conn_mac_mode == CONN_MAC_AWARE);
669 			break;	/* goto sizeof (int) option return */
670 		case SO_MAC_IMPLICIT:
671 			*i1 = (connp->conn_mac_mode == CONN_MAC_IMPLICIT);
672 			break;	/* goto sizeof (int) option return */
673 		case SO_ALLZONES:
674 			*i1 = connp->conn_allzones;
675 			break;	/* goto sizeof (int) option return */
676 		case SO_EXCLBIND:
677 			*i1 = connp->conn_exclbind ? SO_EXCLBIND : 0;
678 			break;
679 		case SO_PROTOTYPE:
680 			*i1 = connp->conn_proto;
681 			break;
682 
683 		case SO_DOMAIN:
684 			*i1 = connp->conn_family;
685 			break;
686 		default:
687 			return (-1);
688 		}
689 		break;
690 	case IPPROTO_IP:
691 		if (connp->conn_family != AF_INET)
692 			return (-1);
693 		switch (name) {
694 		case IP_OPTIONS:
695 		case T_IP_OPTIONS:
696 			if (!(ipp->ipp_fields & IPPF_IPV4_OPTIONS))
697 				return (0);
698 
699 			len = ipp->ipp_ipv4_options_len;
700 			if (len > 0) {
701 				bcopy(ipp->ipp_ipv4_options, ptr, len);
702 			}
703 			return (len);
704 
705 		case IP_PKTINFO: {
706 			/*
707 			 * This also handles IP_RECVPKTINFO.
708 			 * IP_PKTINFO and IP_RECVPKTINFO have same value.
709 			 * Differentiation is based on the size of the
710 			 * argument passed in.
711 			 */
712 			struct in_pktinfo *pktinfo;
713 
714 #ifdef notdef
715 			/* optcom doesn't provide a length with "get" */
716 			if (inlen == sizeof (int)) {
717 				/* This is IP_RECVPKTINFO option. */
718 				*i1 = connp->conn_recv_ancillary.
719 				    crb_ip_recvpktinfo;
720 				return (sizeof (int));
721 			}
722 #endif
723 			/* XXX assumes that caller has room for max size! */
724 
725 			pktinfo = (struct in_pktinfo *)ptr;
726 			pktinfo->ipi_ifindex = ixa->ixa_ifindex;
727 			if (ipp->ipp_fields & IPPF_ADDR)
728 				pktinfo->ipi_spec_dst.s_addr = ipp->ipp_addr_v4;
729 			else
730 				pktinfo->ipi_spec_dst.s_addr = INADDR_ANY;
731 			return (sizeof (struct in_pktinfo));
732 		}
733 		case IP_DONTFRAG:
734 			*i1 = (ixa->ixa_flags & IXAF_DONTFRAG) != 0;
735 			return (sizeof (int));
736 		case IP_TOS:
737 		case T_IP_TOS:
738 			*i1 = (int)ipp->ipp_type_of_service;
739 			break;	/* goto sizeof (int) option return */
740 		case IP_TTL:
741 			*i1 = (int)ipp->ipp_unicast_hops;
742 			break;	/* goto sizeof (int) option return */
743 		case IP_DHCPINIT_IF:
744 			return (-1);
745 		case IP_NEXTHOP:
746 			if (ixa->ixa_flags & IXAF_NEXTHOP_SET) {
747 				*(ipaddr_t *)ptr = ixa->ixa_nexthop_v4;
748 				return (sizeof (ipaddr_t));
749 			} else {
750 				return (0);
751 			}
752 
753 		case IP_MULTICAST_IF:
754 			/* 0 address if not set */
755 			*(ipaddr_t *)ptr = ixa->ixa_multicast_ifaddr;
756 			return (sizeof (ipaddr_t));
757 		case IP_MULTICAST_TTL:
758 			*(uchar_t *)ptr = ixa->ixa_multicast_ttl;
759 			return (sizeof (uchar_t));
760 		case IP_MULTICAST_LOOP:
761 			*ptr = (ixa->ixa_flags & IXAF_MULTICAST_LOOP) ? 1 : 0;
762 			return (sizeof (uint8_t));
763 		case IP_RECVOPTS:
764 			*i1 = connp->conn_recv_ancillary.crb_recvopts;
765 			break;	/* goto sizeof (int) option return */
766 		case IP_RECVDSTADDR:
767 			*i1 = connp->conn_recv_ancillary.crb_recvdstaddr;
768 			break;	/* goto sizeof (int) option return */
769 		case IP_RECVIF:
770 			*i1 = connp->conn_recv_ancillary.crb_recvif;
771 			break;	/* goto sizeof (int) option return */
772 		case IP_RECVSLLA:
773 			*i1 = connp->conn_recv_ancillary.crb_recvslla;
774 			break;	/* goto sizeof (int) option return */
775 		case IP_RECVTTL:
776 			*i1 = connp->conn_recv_ancillary.crb_recvttl;
777 			break;	/* goto sizeof (int) option return */
778 		case IP_ADD_MEMBERSHIP:
779 		case IP_DROP_MEMBERSHIP:
780 		case MCAST_JOIN_GROUP:
781 		case MCAST_LEAVE_GROUP:
782 		case IP_BLOCK_SOURCE:
783 		case IP_UNBLOCK_SOURCE:
784 		case IP_ADD_SOURCE_MEMBERSHIP:
785 		case IP_DROP_SOURCE_MEMBERSHIP:
786 		case MCAST_BLOCK_SOURCE:
787 		case MCAST_UNBLOCK_SOURCE:
788 		case MCAST_JOIN_SOURCE_GROUP:
789 		case MCAST_LEAVE_SOURCE_GROUP:
790 		case MRT_INIT:
791 		case MRT_DONE:
792 		case MRT_ADD_VIF:
793 		case MRT_DEL_VIF:
794 		case MRT_ADD_MFC:
795 		case MRT_DEL_MFC:
796 			/* cannot "get" the value for these */
797 			return (-1);
798 		case MRT_VERSION:
799 		case MRT_ASSERT:
800 			(void) ip_mrouter_get(name, connp, ptr);
801 			return (sizeof (int));
802 		case IP_SEC_OPT:
803 			return (ipsec_req_from_conn(connp, (ipsec_req_t	*)ptr,
804 			    IPSEC_AF_V4));
805 		case IP_BOUND_IF:
806 			/* Zero if not set */
807 			*i1 = connp->conn_bound_if;
808 			break;	/* goto sizeof (int) option return */
809 		case IP_UNSPEC_SRC:
810 			*i1 = connp->conn_unspec_src;
811 			break;	/* goto sizeof (int) option return */
812 		case IP_BROADCAST_TTL:
813 			if (ixa->ixa_flags & IXAF_BROADCAST_TTL_SET)
814 				*(uchar_t *)ptr = ixa->ixa_broadcast_ttl;
815 			else
816 				*(uchar_t *)ptr = ipst->ips_ip_broadcast_ttl;
817 			return (sizeof (uchar_t));
818 		default:
819 			return (-1);
820 		}
821 		break;
822 	case IPPROTO_IPV6:
823 		if (connp->conn_family != AF_INET6)
824 			return (-1);
825 		switch (name) {
826 		case IPV6_UNICAST_HOPS:
827 			*i1 = (int)ipp->ipp_unicast_hops;
828 			break;	/* goto sizeof (int) option return */
829 		case IPV6_MULTICAST_IF:
830 			/* 0 index if not set */
831 			*i1 = ixa->ixa_multicast_ifindex;
832 			break;	/* goto sizeof (int) option return */
833 		case IPV6_MULTICAST_HOPS:
834 			*i1 = ixa->ixa_multicast_ttl;
835 			break;	/* goto sizeof (int) option return */
836 		case IPV6_MULTICAST_LOOP:
837 			*i1 = (ixa->ixa_flags & IXAF_MULTICAST_LOOP) ? 1 : 0;
838 			break;	/* goto sizeof (int) option return */
839 		case IPV6_JOIN_GROUP:
840 		case IPV6_LEAVE_GROUP:
841 		case MCAST_JOIN_GROUP:
842 		case MCAST_LEAVE_GROUP:
843 		case MCAST_BLOCK_SOURCE:
844 		case MCAST_UNBLOCK_SOURCE:
845 		case MCAST_JOIN_SOURCE_GROUP:
846 		case MCAST_LEAVE_SOURCE_GROUP:
847 			/* cannot "get" the value for these */
848 			return (-1);
849 		case IPV6_BOUND_IF:
850 			/* Zero if not set */
851 			*i1 = connp->conn_bound_if;
852 			break;	/* goto sizeof (int) option return */
853 		case IPV6_UNSPEC_SRC:
854 			*i1 = connp->conn_unspec_src;
855 			break;	/* goto sizeof (int) option return */
856 		case IPV6_RECVPKTINFO:
857 			*i1 = connp->conn_recv_ancillary.crb_ip_recvpktinfo;
858 			break;	/* goto sizeof (int) option return */
859 		case IPV6_RECVTCLASS:
860 			*i1 = connp->conn_recv_ancillary.crb_ipv6_recvtclass;
861 			break;	/* goto sizeof (int) option return */
862 		case IPV6_RECVPATHMTU:
863 			*i1 = connp->conn_ipv6_recvpathmtu;
864 			break;	/* goto sizeof (int) option return */
865 		case IPV6_RECVHOPLIMIT:
866 			*i1 = connp->conn_recv_ancillary.crb_ipv6_recvhoplimit;
867 			break;	/* goto sizeof (int) option return */
868 		case IPV6_RECVHOPOPTS:
869 			*i1 = connp->conn_recv_ancillary.crb_ipv6_recvhopopts;
870 			break;	/* goto sizeof (int) option return */
871 		case IPV6_RECVDSTOPTS:
872 			*i1 = connp->conn_recv_ancillary.crb_ipv6_recvdstopts;
873 			break;	/* goto sizeof (int) option return */
874 		case _OLD_IPV6_RECVDSTOPTS:
875 			*i1 =
876 			    connp->conn_recv_ancillary.crb_old_ipv6_recvdstopts;
877 			break;	/* goto sizeof (int) option return */
878 		case IPV6_RECVRTHDRDSTOPTS:
879 			*i1 = connp->conn_recv_ancillary.
880 			    crb_ipv6_recvrthdrdstopts;
881 			break;	/* goto sizeof (int) option return */
882 		case IPV6_RECVRTHDR:
883 			*i1 = connp->conn_recv_ancillary.crb_ipv6_recvrthdr;
884 			break;	/* goto sizeof (int) option return */
885 		case IPV6_PKTINFO: {
886 			/* XXX assumes that caller has room for max size! */
887 			struct in6_pktinfo *pkti;
888 
889 			pkti = (struct in6_pktinfo *)ptr;
890 			pkti->ipi6_ifindex = ixa->ixa_ifindex;
891 			if (ipp->ipp_fields & IPPF_ADDR)
892 				pkti->ipi6_addr = ipp->ipp_addr;
893 			else
894 				pkti->ipi6_addr = ipv6_all_zeros;
895 			return (sizeof (struct in6_pktinfo));
896 		}
897 		case IPV6_TCLASS:
898 			*i1 = ipp->ipp_tclass;
899 			break;	/* goto sizeof (int) option return */
900 		case IPV6_NEXTHOP: {
901 			sin6_t *sin6 = (sin6_t *)ptr;
902 
903 			if (ixa->ixa_flags & IXAF_NEXTHOP_SET)
904 				return (0);
905 
906 			*sin6 = sin6_null;
907 			sin6->sin6_family = AF_INET6;
908 			sin6->sin6_addr = ixa->ixa_nexthop_v6;
909 
910 			return (sizeof (sin6_t));
911 		}
912 		case IPV6_HOPOPTS:
913 			if (!(ipp->ipp_fields & IPPF_HOPOPTS))
914 				return (0);
915 			bcopy(ipp->ipp_hopopts, ptr,
916 			    ipp->ipp_hopoptslen);
917 			return (ipp->ipp_hopoptslen);
918 		case IPV6_RTHDRDSTOPTS:
919 			if (!(ipp->ipp_fields & IPPF_RTHDRDSTOPTS))
920 				return (0);
921 			bcopy(ipp->ipp_rthdrdstopts, ptr,
922 			    ipp->ipp_rthdrdstoptslen);
923 			return (ipp->ipp_rthdrdstoptslen);
924 		case IPV6_RTHDR:
925 			if (!(ipp->ipp_fields & IPPF_RTHDR))
926 				return (0);
927 			bcopy(ipp->ipp_rthdr, ptr, ipp->ipp_rthdrlen);
928 			return (ipp->ipp_rthdrlen);
929 		case IPV6_DSTOPTS:
930 			if (!(ipp->ipp_fields & IPPF_DSTOPTS))
931 				return (0);
932 			bcopy(ipp->ipp_dstopts, ptr, ipp->ipp_dstoptslen);
933 			return (ipp->ipp_dstoptslen);
934 		case IPV6_PATHMTU:
935 			return (ip_fill_mtuinfo(connp, ixa,
936 			    (struct ip6_mtuinfo *)ptr));
937 		case IPV6_SEC_OPT:
938 			return (ipsec_req_from_conn(connp, (ipsec_req_t	*)ptr,
939 			    IPSEC_AF_V6));
940 		case IPV6_SRC_PREFERENCES:
941 			return (ip6_get_src_preferences(ixa, (uint32_t *)ptr));
942 		case IPV6_DONTFRAG:
943 			*i1 = (ixa->ixa_flags & IXAF_DONTFRAG) != 0;
944 			return (sizeof (int));
945 		case IPV6_USE_MIN_MTU:
946 			if (ixa->ixa_flags & IXAF_USE_MIN_MTU)
947 				*i1 = ixa->ixa_use_min_mtu;
948 			else
949 				*i1 = IPV6_USE_MIN_MTU_MULTICAST;
950 			break;
951 		case IPV6_V6ONLY:
952 			*i1 = connp->conn_ipv6_v6only;
953 			return (sizeof (int));
954 		default:
955 			return (-1);
956 		}
957 		break;
958 	case IPPROTO_UDP:
959 		switch (name) {
960 		case UDP_ANONPRIVBIND:
961 			*i1 = connp->conn_anon_priv_bind;
962 			break;
963 		case UDP_EXCLBIND:
964 			*i1 = connp->conn_exclbind ? UDP_EXCLBIND : 0;
965 			break;
966 		default:
967 			return (-1);
968 		}
969 		break;
970 	case IPPROTO_TCP:
971 		switch (name) {
972 		case TCP_RECVDSTADDR:
973 			*i1 = connp->conn_recv_ancillary.crb_recvdstaddr;
974 			break;
975 		case TCP_ANONPRIVBIND:
976 			*i1 = connp->conn_anon_priv_bind;
977 			break;
978 		case TCP_EXCLBIND:
979 			*i1 = connp->conn_exclbind ? TCP_EXCLBIND : 0;
980 			break;
981 		default:
982 			return (-1);
983 		}
984 		break;
985 	default:
986 		return (-1);
987 	}
988 	return (sizeof (int));
989 }
990 
991 static int conn_opt_set_socket(conn_opt_arg_t *coa, t_scalar_t name,
992     uint_t inlen, uchar_t *invalp, boolean_t checkonly, cred_t *cr);
993 static int conn_opt_set_ip(conn_opt_arg_t *coa, t_scalar_t name,
994     uint_t inlen, uchar_t *invalp, boolean_t checkonly, cred_t *cr);
995 static int conn_opt_set_ipv6(conn_opt_arg_t *coa, t_scalar_t name,
996     uint_t inlen, uchar_t *invalp, boolean_t checkonly, cred_t *cr);
997 static int conn_opt_set_udp(conn_opt_arg_t *coa, t_scalar_t name,
998     uint_t inlen, uchar_t *invalp, boolean_t checkonly, cred_t *cr);
999 static int conn_opt_set_tcp(conn_opt_arg_t *coa, t_scalar_t name,
1000     uint_t inlen, uchar_t *invalp, boolean_t checkonly, cred_t *cr);
1001 
1002 /*
1003  * This routine sets the most common socket options including some
1004  * that are transport/ULP specific.
1005  * It returns errno or zero.
1006  *
1007  * For fixed length options, there is no sanity check
1008  * of passed in length is done. It is assumed *_optcom_req()
1009  * routines do the right thing.
1010  */
1011 int
1012 conn_opt_set(conn_opt_arg_t *coa, t_scalar_t level, t_scalar_t name,
1013     uint_t inlen, uchar_t *invalp, boolean_t checkonly, cred_t *cr)
1014 {
1015 	ASSERT(MUTEX_NOT_HELD(&coa->coa_connp->conn_lock));
1016 
1017 	/* We have different functions for different levels */
1018 	switch (level) {
1019 	case SOL_SOCKET:
1020 		return (conn_opt_set_socket(coa, name, inlen, invalp,
1021 		    checkonly, cr));
1022 	case IPPROTO_IP:
1023 		return (conn_opt_set_ip(coa, name, inlen, invalp,
1024 		    checkonly, cr));
1025 	case IPPROTO_IPV6:
1026 		return (conn_opt_set_ipv6(coa, name, inlen, invalp,
1027 		    checkonly, cr));
1028 	case IPPROTO_UDP:
1029 		return (conn_opt_set_udp(coa, name, inlen, invalp,
1030 		    checkonly, cr));
1031 	case IPPROTO_TCP:
1032 		return (conn_opt_set_tcp(coa, name, inlen, invalp,
1033 		    checkonly, cr));
1034 	default:
1035 		return (0);
1036 	}
1037 }
1038 
1039 /*
1040  * Handle SOL_SOCKET
1041  * Note that we do not handle SO_PROTOTYPE here. The ULPs that support
1042  * it implement their own checks and setting of conn_proto.
1043  */
1044 /* ARGSUSED1 */
1045 static int
1046 conn_opt_set_socket(conn_opt_arg_t *coa, t_scalar_t name, uint_t inlen,
1047     uchar_t *invalp, boolean_t checkonly, cred_t *cr)
1048 {
1049 	conn_t		*connp = coa->coa_connp;
1050 	ip_xmit_attr_t	*ixa = coa->coa_ixa;
1051 	int		*i1 = (int *)invalp;
1052 	boolean_t	onoff = (*i1 == 0) ? 0 : 1;
1053 
1054 	switch (name) {
1055 	case SO_ALLZONES:
1056 		if (IPCL_IS_BOUND(connp))
1057 			return (EINVAL);
1058 		break;
1059 	case SO_VRRP:
1060 		if (secpolicy_ip_config(cr, checkonly) != 0)
1061 			return (EACCES);
1062 		break;
1063 	case SO_MAC_EXEMPT:
1064 		if (secpolicy_net_mac_aware(cr) != 0)
1065 			return (EACCES);
1066 		if (IPCL_IS_BOUND(connp))
1067 			return (EINVAL);
1068 		break;
1069 	case SO_MAC_IMPLICIT:
1070 		if (secpolicy_net_mac_implicit(cr) != 0)
1071 			return (EACCES);
1072 		break;
1073 	}
1074 	if (checkonly)
1075 		return (0);
1076 
1077 	mutex_enter(&connp->conn_lock);
1078 	/* Here we set the actual option value */
1079 	switch (name) {
1080 	case SO_DEBUG:
1081 		connp->conn_debug = onoff;
1082 		break;
1083 	case SO_KEEPALIVE:
1084 		connp->conn_keepalive = onoff;
1085 		break;
1086 	case SO_LINGER: {
1087 		struct linger *lgr = (struct linger *)invalp;
1088 
1089 		if (lgr->l_onoff) {
1090 			connp->conn_linger = 1;
1091 			connp->conn_lingertime = lgr->l_linger;
1092 		} else {
1093 			connp->conn_linger = 0;
1094 			connp->conn_lingertime = 0;
1095 		}
1096 		break;
1097 	}
1098 	case SO_OOBINLINE:
1099 		connp->conn_oobinline = onoff;
1100 		coa->coa_changed |= COA_OOBINLINE_CHANGED;
1101 		break;
1102 	case SO_REUSEADDR:
1103 		connp->conn_reuseaddr = onoff;
1104 		break;
1105 	case SO_DONTROUTE:
1106 		if (onoff)
1107 			ixa->ixa_flags |= IXAF_DONTROUTE;
1108 		else
1109 			ixa->ixa_flags &= ~IXAF_DONTROUTE;
1110 		coa->coa_changed |= COA_ROUTE_CHANGED;
1111 		break;
1112 	case SO_USELOOPBACK:
1113 		connp->conn_useloopback = onoff;
1114 		break;
1115 	case SO_BROADCAST:
1116 		connp->conn_broadcast = onoff;
1117 		break;
1118 	case SO_SNDBUF:
1119 		/* ULP has range checked the value */
1120 		connp->conn_sndbuf = *i1;
1121 		coa->coa_changed |= COA_SNDBUF_CHANGED;
1122 		break;
1123 	case SO_RCVBUF:
1124 		/* ULP has range checked the value */
1125 		connp->conn_rcvbuf = *i1;
1126 		coa->coa_changed |= COA_RCVBUF_CHANGED;
1127 		break;
1128 	case SO_RCVTIMEO:
1129 	case SO_SNDTIMEO:
1130 		/*
1131 		 * Pass these two options in order for third part
1132 		 * protocol usage.
1133 		 */
1134 		break;
1135 	case SO_DGRAM_ERRIND:
1136 		connp->conn_dgram_errind = onoff;
1137 		break;
1138 	case SO_RECVUCRED:
1139 		connp->conn_recv_ancillary.crb_recvucred = onoff;
1140 		break;
1141 	case SO_ALLZONES:
1142 		connp->conn_allzones = onoff;
1143 		coa->coa_changed |= COA_ROUTE_CHANGED;
1144 		if (onoff)
1145 			ixa->ixa_zoneid = ALL_ZONES;
1146 		else
1147 			ixa->ixa_zoneid = connp->conn_zoneid;
1148 		break;
1149 	case SO_TIMESTAMP:
1150 		connp->conn_recv_ancillary.crb_timestamp = onoff;
1151 		break;
1152 	case SO_VRRP:
1153 		connp->conn_isvrrp = onoff;
1154 		break;
1155 	case SO_ANON_MLP:
1156 		connp->conn_anon_mlp = onoff;
1157 		break;
1158 	case SO_MAC_EXEMPT:
1159 		connp->conn_mac_mode = onoff ?
1160 		    CONN_MAC_AWARE : CONN_MAC_DEFAULT;
1161 		break;
1162 	case SO_MAC_IMPLICIT:
1163 		connp->conn_mac_mode = onoff ?
1164 		    CONN_MAC_IMPLICIT : CONN_MAC_DEFAULT;
1165 		break;
1166 	case SO_EXCLBIND:
1167 		connp->conn_exclbind = onoff;
1168 		break;
1169 	}
1170 	mutex_exit(&connp->conn_lock);
1171 	return (0);
1172 }
1173 
1174 /* Handle IPPROTO_IP */
1175 static int
1176 conn_opt_set_ip(conn_opt_arg_t *coa, t_scalar_t name, uint_t inlen,
1177     uchar_t *invalp, boolean_t checkonly, cred_t *cr)
1178 {
1179 	conn_t		*connp = coa->coa_connp;
1180 	ip_xmit_attr_t	*ixa = coa->coa_ixa;
1181 	ip_pkt_t	*ipp = coa->coa_ipp;
1182 	int		*i1 = (int *)invalp;
1183 	boolean_t	onoff = (*i1 == 0) ? 0 : 1;
1184 	ipaddr_t	addr = (ipaddr_t)*i1;
1185 	uint_t		ifindex;
1186 	zoneid_t	zoneid = IPCL_ZONEID(connp);
1187 	ipif_t		*ipif;
1188 	ip_stack_t	*ipst = connp->conn_netstack->netstack_ip;
1189 	int		error;
1190 
1191 	if (connp->conn_family != AF_INET)
1192 		return (EINVAL);
1193 
1194 	switch (name) {
1195 	case IP_TTL:
1196 		/* Don't allow zero */
1197 		if (*i1 < 1 || *i1 > 255)
1198 			return (EINVAL);
1199 		break;
1200 	case IP_MULTICAST_IF:
1201 		if (addr == INADDR_ANY) {
1202 			/* Clear */
1203 			ifindex = 0;
1204 			break;
1205 		}
1206 		ipif = ipif_lookup_addr(addr, NULL, zoneid, ipst);
1207 		if (ipif == NULL)
1208 			return (EHOSTUNREACH);
1209 		/* not supported by the virtual network iface */
1210 		if (IS_VNI(ipif->ipif_ill)) {
1211 			ipif_refrele(ipif);
1212 			return (EINVAL);
1213 		}
1214 		ifindex = ipif->ipif_ill->ill_phyint->phyint_ifindex;
1215 		ipif_refrele(ipif);
1216 		break;
1217 	case IP_NEXTHOP: {
1218 		ire_t	*ire;
1219 
1220 		if (addr == INADDR_ANY) {
1221 			/* Clear */
1222 			break;
1223 		}
1224 		/* Verify that the next-hop is on-link */
1225 		ire = ire_ftable_lookup_v4(addr, 0, 0, IRE_ONLINK, NULL, zoneid,
1226 		    NULL, MATCH_IRE_TYPE, 0, ipst, NULL);
1227 		if (ire == NULL)
1228 			return (EHOSTUNREACH);
1229 		ire_refrele(ire);
1230 		break;
1231 	}
1232 	case IP_OPTIONS:
1233 	case T_IP_OPTIONS: {
1234 		uint_t newlen;
1235 
1236 		if (ipp->ipp_fields & IPPF_LABEL_V4)
1237 			newlen = inlen + (ipp->ipp_label_len_v4 + 3) & ~3;
1238 		else
1239 			newlen = inlen;
1240 		if ((inlen & 0x3) || newlen > IP_MAX_OPT_LENGTH) {
1241 			return (EINVAL);
1242 		}
1243 		break;
1244 	}
1245 	case IP_PKTINFO: {
1246 		struct in_pktinfo *pktinfo;
1247 
1248 		/* Two different valid lengths */
1249 		if (inlen != sizeof (int) &&
1250 		    inlen != sizeof (struct in_pktinfo))
1251 			return (EINVAL);
1252 		if (inlen == sizeof (int))
1253 			break;
1254 
1255 		pktinfo = (struct in_pktinfo *)invalp;
1256 		if (pktinfo->ipi_spec_dst.s_addr != INADDR_ANY) {
1257 			switch (ip_laddr_verify_v4(pktinfo->ipi_spec_dst.s_addr,
1258 			    zoneid, ipst, B_FALSE)) {
1259 			case IPVL_UNICAST_UP:
1260 			case IPVL_UNICAST_DOWN:
1261 				break;
1262 			default:
1263 				return (EADDRNOTAVAIL);
1264 			}
1265 		}
1266 		if (!ip_ifindex_valid(pktinfo->ipi_ifindex, B_FALSE, ipst))
1267 			return (ENXIO);
1268 		break;
1269 	}
1270 	case IP_BOUND_IF:
1271 		ifindex = *(uint_t *)i1;
1272 
1273 		/* Just check it is ok. */
1274 		if (!ip_ifindex_valid(ifindex, B_FALSE, ipst))
1275 			return (ENXIO);
1276 		break;
1277 	}
1278 	if (checkonly)
1279 		return (0);
1280 
1281 	/* Here we set the actual option value */
1282 	/*
1283 	 * conn_lock protects the bitfields, and is used to
1284 	 * set the fields atomically. Not needed for ixa settings since
1285 	 * the caller has an exclusive copy of the ixa.
1286 	 * We can not hold conn_lock across the multicast options though.
1287 	 */
1288 	switch (name) {
1289 	case IP_OPTIONS:
1290 	case T_IP_OPTIONS:
1291 		/* Save options for use by IP. */
1292 		mutex_enter(&connp->conn_lock);
1293 		error = optcom_pkt_set(invalp, inlen,
1294 		    (uchar_t **)&ipp->ipp_ipv4_options,
1295 		    &ipp->ipp_ipv4_options_len);
1296 		if (error != 0) {
1297 			mutex_exit(&connp->conn_lock);
1298 			return (error);
1299 		}
1300 		if (ipp->ipp_ipv4_options_len == 0) {
1301 			ipp->ipp_fields &= ~IPPF_IPV4_OPTIONS;
1302 		} else {
1303 			ipp->ipp_fields |= IPPF_IPV4_OPTIONS;
1304 		}
1305 		mutex_exit(&connp->conn_lock);
1306 		coa->coa_changed |= COA_HEADER_CHANGED;
1307 		coa->coa_changed |= COA_WROFF_CHANGED;
1308 		break;
1309 
1310 	case IP_TTL:
1311 		mutex_enter(&connp->conn_lock);
1312 		ipp->ipp_unicast_hops = *i1;
1313 		mutex_exit(&connp->conn_lock);
1314 		coa->coa_changed |= COA_HEADER_CHANGED;
1315 		break;
1316 	case IP_TOS:
1317 	case T_IP_TOS:
1318 		mutex_enter(&connp->conn_lock);
1319 		if (*i1 == -1) {
1320 			ipp->ipp_type_of_service = 0;
1321 		} else {
1322 			ipp->ipp_type_of_service = *i1;
1323 		}
1324 		mutex_exit(&connp->conn_lock);
1325 		coa->coa_changed |= COA_HEADER_CHANGED;
1326 		break;
1327 	case IP_MULTICAST_IF:
1328 		ixa->ixa_multicast_ifindex = ifindex;
1329 		ixa->ixa_multicast_ifaddr = addr;
1330 		coa->coa_changed |= COA_ROUTE_CHANGED;
1331 		break;
1332 	case IP_MULTICAST_TTL:
1333 		ixa->ixa_multicast_ttl = *invalp;
1334 		/* Handled automatically by ip_output */
1335 		break;
1336 	case IP_MULTICAST_LOOP:
1337 		if (*invalp != 0)
1338 			ixa->ixa_flags |= IXAF_MULTICAST_LOOP;
1339 		else
1340 			ixa->ixa_flags &= ~IXAF_MULTICAST_LOOP;
1341 		/* Handled automatically by ip_output */
1342 		break;
1343 	case IP_RECVOPTS:
1344 		mutex_enter(&connp->conn_lock);
1345 		connp->conn_recv_ancillary.crb_recvopts = onoff;
1346 		mutex_exit(&connp->conn_lock);
1347 		break;
1348 	case IP_RECVDSTADDR:
1349 		mutex_enter(&connp->conn_lock);
1350 		connp->conn_recv_ancillary.crb_recvdstaddr = onoff;
1351 		mutex_exit(&connp->conn_lock);
1352 		break;
1353 	case IP_RECVIF:
1354 		mutex_enter(&connp->conn_lock);
1355 		connp->conn_recv_ancillary.crb_recvif = onoff;
1356 		mutex_exit(&connp->conn_lock);
1357 		break;
1358 	case IP_RECVSLLA:
1359 		mutex_enter(&connp->conn_lock);
1360 		connp->conn_recv_ancillary.crb_recvslla = onoff;
1361 		mutex_exit(&connp->conn_lock);
1362 		break;
1363 	case IP_RECVTTL:
1364 		mutex_enter(&connp->conn_lock);
1365 		connp->conn_recv_ancillary.crb_recvttl = onoff;
1366 		mutex_exit(&connp->conn_lock);
1367 		break;
1368 	case IP_PKTINFO: {
1369 		/*
1370 		 * This also handles IP_RECVPKTINFO.
1371 		 * IP_PKTINFO and IP_RECVPKTINFO have same value.
1372 		 * Differentiation is based on the size of the
1373 		 * argument passed in.
1374 		 */
1375 		struct in_pktinfo *pktinfo;
1376 
1377 		if (inlen == sizeof (int)) {
1378 			/* This is IP_RECVPKTINFO option. */
1379 			mutex_enter(&connp->conn_lock);
1380 			connp->conn_recv_ancillary.crb_ip_recvpktinfo =
1381 			    onoff;
1382 			mutex_exit(&connp->conn_lock);
1383 			break;
1384 		}
1385 
1386 		/* This is IP_PKTINFO option. */
1387 		mutex_enter(&connp->conn_lock);
1388 		pktinfo = (struct in_pktinfo *)invalp;
1389 		if (ipp->ipp_addr_v4 != INADDR_ANY) {
1390 			ipp->ipp_fields |= IPPF_ADDR;
1391 			IN6_INADDR_TO_V4MAPPED(&pktinfo->ipi_spec_dst,
1392 			    &ipp->ipp_addr);
1393 		} else {
1394 			ipp->ipp_fields &= ~IPPF_ADDR;
1395 			ipp->ipp_addr = ipv6_all_zeros;
1396 		}
1397 		mutex_exit(&connp->conn_lock);
1398 		ixa->ixa_ifindex = pktinfo->ipi_ifindex;
1399 		coa->coa_changed |= COA_ROUTE_CHANGED;
1400 		coa->coa_changed |= COA_HEADER_CHANGED;
1401 		break;
1402 	}
1403 	case IP_DONTFRAG:
1404 		if (onoff) {
1405 			ixa->ixa_flags |= (IXAF_DONTFRAG | IXAF_PMTU_IPV4_DF);
1406 			ixa->ixa_flags &= ~IXAF_PMTU_DISCOVERY;
1407 		} else {
1408 			ixa->ixa_flags &= ~(IXAF_DONTFRAG | IXAF_PMTU_IPV4_DF);
1409 			ixa->ixa_flags |= IXAF_PMTU_DISCOVERY;
1410 		}
1411 		/* Need to redo ip_attr_connect */
1412 		coa->coa_changed |= COA_ROUTE_CHANGED;
1413 		break;
1414 	case IP_ADD_MEMBERSHIP:
1415 	case IP_DROP_MEMBERSHIP:
1416 	case MCAST_JOIN_GROUP:
1417 	case MCAST_LEAVE_GROUP:
1418 		return (ip_opt_set_multicast_group(connp, name,
1419 		    invalp, B_FALSE, checkonly));
1420 
1421 	case IP_BLOCK_SOURCE:
1422 	case IP_UNBLOCK_SOURCE:
1423 	case IP_ADD_SOURCE_MEMBERSHIP:
1424 	case IP_DROP_SOURCE_MEMBERSHIP:
1425 	case MCAST_BLOCK_SOURCE:
1426 	case MCAST_UNBLOCK_SOURCE:
1427 	case MCAST_JOIN_SOURCE_GROUP:
1428 	case MCAST_LEAVE_SOURCE_GROUP:
1429 		return (ip_opt_set_multicast_sources(connp, name,
1430 		    invalp, B_FALSE, checkonly));
1431 
1432 	case IP_SEC_OPT:
1433 		mutex_enter(&connp->conn_lock);
1434 		error = ipsec_set_req(cr, connp, (ipsec_req_t *)invalp);
1435 		mutex_exit(&connp->conn_lock);
1436 		if (error != 0) {
1437 			return (error);
1438 		}
1439 		/* This is an IPsec policy change - redo ip_attr_connect */
1440 		coa->coa_changed |= COA_ROUTE_CHANGED;
1441 		break;
1442 	case IP_NEXTHOP:
1443 		ixa->ixa_nexthop_v4 = addr;
1444 		if (addr != INADDR_ANY)
1445 			ixa->ixa_flags |= IXAF_NEXTHOP_SET;
1446 		else
1447 			ixa->ixa_flags &= ~IXAF_NEXTHOP_SET;
1448 		coa->coa_changed |= COA_ROUTE_CHANGED;
1449 		break;
1450 
1451 	case IP_BOUND_IF:
1452 		ixa->ixa_ifindex = ifindex;		/* Send */
1453 		mutex_enter(&connp->conn_lock);
1454 		connp->conn_incoming_ifindex = ifindex;	/* Receive */
1455 		connp->conn_bound_if = ifindex;		/* getsockopt */
1456 		mutex_exit(&connp->conn_lock);
1457 		coa->coa_changed |= COA_ROUTE_CHANGED;
1458 		break;
1459 	case IP_UNSPEC_SRC:
1460 		mutex_enter(&connp->conn_lock);
1461 		connp->conn_unspec_src = onoff;
1462 		if (onoff)
1463 			ixa->ixa_flags &= ~IXAF_VERIFY_SOURCE;
1464 		else
1465 			ixa->ixa_flags |= IXAF_VERIFY_SOURCE;
1466 
1467 		mutex_exit(&connp->conn_lock);
1468 		break;
1469 	case IP_BROADCAST_TTL:
1470 		ixa->ixa_broadcast_ttl = *invalp;
1471 		ixa->ixa_flags |= IXAF_BROADCAST_TTL_SET;
1472 		/* Handled automatically by ip_output */
1473 		break;
1474 	case MRT_INIT:
1475 	case MRT_DONE:
1476 	case MRT_ADD_VIF:
1477 	case MRT_DEL_VIF:
1478 	case MRT_ADD_MFC:
1479 	case MRT_DEL_MFC:
1480 	case MRT_ASSERT:
1481 		if ((error = secpolicy_ip_config(cr, B_FALSE)) != 0) {
1482 			return (error);
1483 		}
1484 		error = ip_mrouter_set((int)name, connp, checkonly,
1485 		    (uchar_t *)invalp, inlen);
1486 		if (error) {
1487 			return (error);
1488 		}
1489 		return (0);
1490 
1491 	}
1492 	return (0);
1493 }
1494 
1495 /* Handle IPPROTO_IPV6 */
1496 static int
1497 conn_opt_set_ipv6(conn_opt_arg_t *coa, t_scalar_t name, uint_t inlen,
1498     uchar_t *invalp, boolean_t checkonly, cred_t *cr)
1499 {
1500 	conn_t		*connp = coa->coa_connp;
1501 	ip_xmit_attr_t	*ixa = coa->coa_ixa;
1502 	ip_pkt_t	*ipp = coa->coa_ipp;
1503 	int		*i1 = (int *)invalp;
1504 	boolean_t	onoff = (*i1 == 0) ? 0 : 1;
1505 	uint_t		ifindex;
1506 	zoneid_t	zoneid = IPCL_ZONEID(connp);
1507 	ip_stack_t	*ipst = connp->conn_netstack->netstack_ip;
1508 	int		error;
1509 
1510 	if (connp->conn_family != AF_INET6)
1511 		return (EINVAL);
1512 
1513 	switch (name) {
1514 	case IPV6_MULTICAST_IF:
1515 		/*
1516 		 * The only possible error is EINVAL.
1517 		 * We call this option on both V4 and V6
1518 		 * If both fail, then this call returns
1519 		 * EINVAL. If at least one of them succeeds we
1520 		 * return success.
1521 		 */
1522 		ifindex = *(uint_t *)i1;
1523 
1524 		if (!ip_ifindex_valid(ifindex, B_TRUE, ipst) &&
1525 		    !ip_ifindex_valid(ifindex, B_FALSE, ipst))
1526 			return (EINVAL);
1527 		break;
1528 	case IPV6_UNICAST_HOPS:
1529 		/* Don't allow zero. -1 means to use default */
1530 		if (*i1 < -1 || *i1 == 0 || *i1 > IPV6_MAX_HOPS)
1531 			return (EINVAL);
1532 		break;
1533 	case IPV6_MULTICAST_HOPS:
1534 		/* -1 means use default */
1535 		if (*i1 < -1 || *i1 > IPV6_MAX_HOPS)
1536 			return (EINVAL);
1537 		break;
1538 	case IPV6_MULTICAST_LOOP:
1539 		if (*i1 != 0 && *i1 != 1)
1540 			return (EINVAL);
1541 		break;
1542 	case IPV6_BOUND_IF:
1543 		ifindex = *(uint_t *)i1;
1544 
1545 		if (!ip_ifindex_valid(ifindex, B_TRUE, ipst))
1546 			return (ENXIO);
1547 		break;
1548 	case IPV6_PKTINFO: {
1549 		struct in6_pktinfo *pkti;
1550 		boolean_t isv6;
1551 
1552 		if (inlen != 0 && inlen != sizeof (struct in6_pktinfo))
1553 			return (EINVAL);
1554 		if (inlen == 0)
1555 			break;	/* Clear values below */
1556 
1557 		/*
1558 		 * Verify the source address and ifindex. Privileged users
1559 		 * can use any source address.
1560 		 */
1561 		pkti = (struct in6_pktinfo *)invalp;
1562 
1563 		/*
1564 		 * For link-local addresses we use the ipi6_ifindex when
1565 		 * we verify the local address.
1566 		 * If net_rawaccess then any source address can be used.
1567 		 */
1568 		if (!IN6_IS_ADDR_UNSPECIFIED(&pkti->ipi6_addr) &&
1569 		    secpolicy_net_rawaccess(cr) != 0) {
1570 			uint_t scopeid = 0;
1571 			in6_addr_t *v6src = &pkti->ipi6_addr;
1572 			ipaddr_t v4src;
1573 			ip_laddr_t laddr_type = IPVL_UNICAST_UP;
1574 
1575 			if (IN6_IS_ADDR_V4MAPPED(v6src)) {
1576 				IN6_V4MAPPED_TO_IPADDR(v6src, v4src);
1577 				if (v4src != INADDR_ANY) {
1578 					laddr_type = ip_laddr_verify_v4(v4src,
1579 					    zoneid, ipst, B_FALSE);
1580 				}
1581 			} else {
1582 				if (IN6_IS_ADDR_LINKSCOPE(v6src))
1583 					scopeid = pkti->ipi6_ifindex;
1584 
1585 				laddr_type = ip_laddr_verify_v6(v6src, zoneid,
1586 				    ipst, B_FALSE, scopeid);
1587 			}
1588 			switch (laddr_type) {
1589 			case IPVL_UNICAST_UP:
1590 			case IPVL_UNICAST_DOWN:
1591 				break;
1592 			default:
1593 				return (EADDRNOTAVAIL);
1594 			}
1595 			ixa->ixa_flags |= IXAF_VERIFY_SOURCE;
1596 		} else if (!IN6_IS_ADDR_UNSPECIFIED(&pkti->ipi6_addr)) {
1597 			/* Allow any source */
1598 			ixa->ixa_flags &= ~IXAF_VERIFY_SOURCE;
1599 		}
1600 		isv6 = !(IN6_IS_ADDR_V4MAPPED(&pkti->ipi6_addr));
1601 		if (!ip_ifindex_valid(pkti->ipi6_ifindex, isv6, ipst))
1602 			return (ENXIO);
1603 		break;
1604 	}
1605 	case IPV6_HOPLIMIT:
1606 		/* It is only allowed as ancilary data */
1607 		if (!coa->coa_ancillary)
1608 			return (EINVAL);
1609 
1610 		if (inlen != 0 && inlen != sizeof (int))
1611 			return (EINVAL);
1612 		if (inlen == sizeof (int)) {
1613 			if (*i1 > 255 || *i1 < -1 || *i1 == 0)
1614 				return (EINVAL);
1615 		}
1616 		break;
1617 	case IPV6_TCLASS:
1618 		if (inlen != 0 && inlen != sizeof (int))
1619 			return (EINVAL);
1620 		if (inlen == sizeof (int)) {
1621 			if (*i1 > 255 || *i1 < -1)
1622 				return (EINVAL);
1623 		}
1624 		break;
1625 	case IPV6_NEXTHOP:
1626 		if (inlen != 0 && inlen != sizeof (sin6_t))
1627 			return (EINVAL);
1628 		if (inlen == sizeof (sin6_t)) {
1629 			sin6_t *sin6 = (sin6_t *)invalp;
1630 			ire_t	*ire;
1631 
1632 			if (sin6->sin6_family != AF_INET6)
1633 				return (EAFNOSUPPORT);
1634 			if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr))
1635 				return (EADDRNOTAVAIL);
1636 
1637 			/* Verify that the next-hop is on-link */
1638 			ire = ire_ftable_lookup_v6(&sin6->sin6_addr,
1639 			    0, 0, IRE_ONLINK, NULL, zoneid,
1640 			    NULL, MATCH_IRE_TYPE, 0, ipst, NULL);
1641 			if (ire == NULL)
1642 				return (EHOSTUNREACH);
1643 			ire_refrele(ire);
1644 			break;
1645 		}
1646 		break;
1647 	case IPV6_RTHDR:
1648 	case IPV6_DSTOPTS:
1649 	case IPV6_RTHDRDSTOPTS:
1650 	case IPV6_HOPOPTS: {
1651 		/* All have the length field in the same place */
1652 		ip6_hbh_t *hopts = (ip6_hbh_t *)invalp;
1653 		/*
1654 		 * Sanity checks - minimum size, size a multiple of
1655 		 * eight bytes, and matching size passed in.
1656 		 */
1657 		if (inlen != 0 &&
1658 		    inlen != (8 * (hopts->ip6h_len + 1)))
1659 			return (EINVAL);
1660 		break;
1661 	}
1662 	case IPV6_PATHMTU:
1663 		/* Can't be set */
1664 		return (EINVAL);
1665 
1666 	case IPV6_USE_MIN_MTU:
1667 		if (inlen != sizeof (int))
1668 			return (EINVAL);
1669 		if (*i1 < -1 || *i1 > 1)
1670 			return (EINVAL);
1671 		break;
1672 	case IPV6_SRC_PREFERENCES:
1673 		if (inlen != sizeof (uint32_t))
1674 			return (EINVAL);
1675 		break;
1676 	case IPV6_V6ONLY:
1677 		if (*i1 < 0 || *i1 > 1) {
1678 			return (EINVAL);
1679 		}
1680 		break;
1681 	}
1682 	if (checkonly)
1683 		return (0);
1684 
1685 	/* Here we set the actual option value */
1686 	/*
1687 	 * conn_lock protects the bitfields, and is used to
1688 	 * set the fields atomically. Not needed for ixa settings since
1689 	 * the caller has an exclusive copy of the ixa.
1690 	 * We can not hold conn_lock across the multicast options though.
1691 	 */
1692 	ASSERT(MUTEX_NOT_HELD(&coa->coa_connp->conn_lock));
1693 	switch (name) {
1694 	case IPV6_MULTICAST_IF:
1695 		ixa->ixa_multicast_ifindex = ifindex;
1696 		/* Need to redo ip_attr_connect */
1697 		coa->coa_changed |= COA_ROUTE_CHANGED;
1698 		break;
1699 	case IPV6_UNICAST_HOPS:
1700 		/* -1 means use default */
1701 		mutex_enter(&connp->conn_lock);
1702 		if (*i1 == -1) {
1703 			ipp->ipp_unicast_hops = connp->conn_default_ttl;
1704 		} else {
1705 			ipp->ipp_unicast_hops = (uint8_t)*i1;
1706 		}
1707 		mutex_exit(&connp->conn_lock);
1708 		coa->coa_changed |= COA_HEADER_CHANGED;
1709 		break;
1710 	case IPV6_MULTICAST_HOPS:
1711 		/* -1 means use default */
1712 		if (*i1 == -1) {
1713 			ixa->ixa_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
1714 		} else {
1715 			ixa->ixa_multicast_ttl = (uint8_t)*i1;
1716 		}
1717 		/* Handled automatically by ip_output */
1718 		break;
1719 	case IPV6_MULTICAST_LOOP:
1720 		if (*i1 != 0)
1721 			ixa->ixa_flags |= IXAF_MULTICAST_LOOP;
1722 		else
1723 			ixa->ixa_flags &= ~IXAF_MULTICAST_LOOP;
1724 		/* Handled automatically by ip_output */
1725 		break;
1726 	case IPV6_JOIN_GROUP:
1727 	case IPV6_LEAVE_GROUP:
1728 	case MCAST_JOIN_GROUP:
1729 	case MCAST_LEAVE_GROUP:
1730 		return (ip_opt_set_multicast_group(connp, name,
1731 		    invalp, B_TRUE, checkonly));
1732 
1733 	case MCAST_BLOCK_SOURCE:
1734 	case MCAST_UNBLOCK_SOURCE:
1735 	case MCAST_JOIN_SOURCE_GROUP:
1736 	case MCAST_LEAVE_SOURCE_GROUP:
1737 		return (ip_opt_set_multicast_sources(connp, name,
1738 		    invalp, B_TRUE, checkonly));
1739 
1740 	case IPV6_BOUND_IF:
1741 		ixa->ixa_ifindex = ifindex;		/* Send */
1742 		mutex_enter(&connp->conn_lock);
1743 		connp->conn_incoming_ifindex = ifindex;	/* Receive */
1744 		connp->conn_bound_if = ifindex;		/* getsockopt */
1745 		mutex_exit(&connp->conn_lock);
1746 		coa->coa_changed |= COA_ROUTE_CHANGED;
1747 		break;
1748 	case IPV6_UNSPEC_SRC:
1749 		mutex_enter(&connp->conn_lock);
1750 		connp->conn_unspec_src = onoff;
1751 		if (onoff)
1752 			ixa->ixa_flags &= ~IXAF_VERIFY_SOURCE;
1753 		else
1754 			ixa->ixa_flags |= IXAF_VERIFY_SOURCE;
1755 		mutex_exit(&connp->conn_lock);
1756 		break;
1757 	case IPV6_RECVPKTINFO:
1758 		mutex_enter(&connp->conn_lock);
1759 		connp->conn_recv_ancillary.crb_ip_recvpktinfo = onoff;
1760 		mutex_exit(&connp->conn_lock);
1761 		break;
1762 	case IPV6_RECVTCLASS:
1763 		mutex_enter(&connp->conn_lock);
1764 		connp->conn_recv_ancillary.crb_ipv6_recvtclass = onoff;
1765 		mutex_exit(&connp->conn_lock);
1766 		break;
1767 	case IPV6_RECVPATHMTU:
1768 		mutex_enter(&connp->conn_lock);
1769 		connp->conn_ipv6_recvpathmtu = onoff;
1770 		mutex_exit(&connp->conn_lock);
1771 		break;
1772 	case IPV6_RECVHOPLIMIT:
1773 		mutex_enter(&connp->conn_lock);
1774 		connp->conn_recv_ancillary.crb_ipv6_recvhoplimit =
1775 		    onoff;
1776 		mutex_exit(&connp->conn_lock);
1777 		break;
1778 	case IPV6_RECVHOPOPTS:
1779 		mutex_enter(&connp->conn_lock);
1780 		connp->conn_recv_ancillary.crb_ipv6_recvhopopts = onoff;
1781 		mutex_exit(&connp->conn_lock);
1782 		break;
1783 	case IPV6_RECVDSTOPTS:
1784 		mutex_enter(&connp->conn_lock);
1785 		connp->conn_recv_ancillary.crb_ipv6_recvdstopts = onoff;
1786 		mutex_exit(&connp->conn_lock);
1787 		break;
1788 	case _OLD_IPV6_RECVDSTOPTS:
1789 		mutex_enter(&connp->conn_lock);
1790 		connp->conn_recv_ancillary.crb_old_ipv6_recvdstopts =
1791 		    onoff;
1792 		mutex_exit(&connp->conn_lock);
1793 		break;
1794 	case IPV6_RECVRTHDRDSTOPTS:
1795 		mutex_enter(&connp->conn_lock);
1796 		connp->conn_recv_ancillary.crb_ipv6_recvrthdrdstopts =
1797 		    onoff;
1798 		mutex_exit(&connp->conn_lock);
1799 		break;
1800 	case IPV6_RECVRTHDR:
1801 		mutex_enter(&connp->conn_lock);
1802 		connp->conn_recv_ancillary.crb_ipv6_recvrthdr = onoff;
1803 		mutex_exit(&connp->conn_lock);
1804 		break;
1805 	case IPV6_PKTINFO:
1806 		mutex_enter(&connp->conn_lock);
1807 		if (inlen == 0) {
1808 			ipp->ipp_fields &= ~IPPF_ADDR;
1809 			ipp->ipp_addr = ipv6_all_zeros;
1810 			ixa->ixa_ifindex = 0;
1811 		} else {
1812 			struct in6_pktinfo *pkti;
1813 
1814 			pkti = (struct in6_pktinfo *)invalp;
1815 			ipp->ipp_addr = pkti->ipi6_addr;
1816 			if (!IN6_IS_ADDR_UNSPECIFIED(&ipp->ipp_addr))
1817 				ipp->ipp_fields |= IPPF_ADDR;
1818 			else
1819 				ipp->ipp_fields &= ~IPPF_ADDR;
1820 			ixa->ixa_ifindex = pkti->ipi6_ifindex;
1821 		}
1822 		mutex_exit(&connp->conn_lock);
1823 		/* Source and ifindex might have changed */
1824 		coa->coa_changed |= COA_HEADER_CHANGED;
1825 		coa->coa_changed |= COA_ROUTE_CHANGED;
1826 		break;
1827 	case IPV6_HOPLIMIT:
1828 		mutex_enter(&connp->conn_lock);
1829 		if (inlen == 0 || *i1 == -1) {
1830 			/* Revert to default */
1831 			ipp->ipp_fields &= ~IPPF_HOPLIMIT;
1832 			ixa->ixa_flags &= ~IXAF_NO_TTL_CHANGE;
1833 		} else {
1834 			ipp->ipp_hoplimit = *i1;
1835 			ipp->ipp_fields |= IPPF_HOPLIMIT;
1836 			/* Ensure that it sticks for multicast packets */
1837 			ixa->ixa_flags |= IXAF_NO_TTL_CHANGE;
1838 		}
1839 		mutex_exit(&connp->conn_lock);
1840 		coa->coa_changed |= COA_HEADER_CHANGED;
1841 		break;
1842 	case IPV6_TCLASS:
1843 		/*
1844 		 * IPV6_TCLASS accepts -1 as use kernel default
1845 		 * and [0, 255] as the actualy traffic class.
1846 		 */
1847 		mutex_enter(&connp->conn_lock);
1848 		if (inlen == 0 || *i1 == -1) {
1849 			ipp->ipp_tclass = 0;
1850 			ipp->ipp_fields &= ~IPPF_TCLASS;
1851 		} else {
1852 			ipp->ipp_tclass = *i1;
1853 			ipp->ipp_fields |= IPPF_TCLASS;
1854 		}
1855 		mutex_exit(&connp->conn_lock);
1856 		coa->coa_changed |= COA_HEADER_CHANGED;
1857 		break;
1858 	case IPV6_NEXTHOP:
1859 		if (inlen == 0) {
1860 			ixa->ixa_flags &= ~IXAF_NEXTHOP_SET;
1861 		} else {
1862 			sin6_t *sin6 = (sin6_t *)invalp;
1863 
1864 			ixa->ixa_nexthop_v6 = sin6->sin6_addr;
1865 			if (!IN6_IS_ADDR_UNSPECIFIED(&ixa->ixa_nexthop_v6))
1866 				ixa->ixa_flags |= IXAF_NEXTHOP_SET;
1867 			else
1868 				ixa->ixa_flags &= ~IXAF_NEXTHOP_SET;
1869 		}
1870 		coa->coa_changed |= COA_ROUTE_CHANGED;
1871 		break;
1872 	case IPV6_HOPOPTS:
1873 		mutex_enter(&connp->conn_lock);
1874 		error = optcom_pkt_set(invalp, inlen,
1875 		    (uchar_t **)&ipp->ipp_hopopts, &ipp->ipp_hopoptslen);
1876 		if (error != 0) {
1877 			mutex_exit(&connp->conn_lock);
1878 			return (error);
1879 		}
1880 		if (ipp->ipp_hopoptslen == 0) {
1881 			ipp->ipp_fields &= ~IPPF_HOPOPTS;
1882 		} else {
1883 			ipp->ipp_fields |= IPPF_HOPOPTS;
1884 		}
1885 		mutex_exit(&connp->conn_lock);
1886 		coa->coa_changed |= COA_HEADER_CHANGED;
1887 		coa->coa_changed |= COA_WROFF_CHANGED;
1888 		break;
1889 	case IPV6_RTHDRDSTOPTS:
1890 		mutex_enter(&connp->conn_lock);
1891 		error = optcom_pkt_set(invalp, inlen,
1892 		    (uchar_t **)&ipp->ipp_rthdrdstopts,
1893 		    &ipp->ipp_rthdrdstoptslen);
1894 		if (error != 0) {
1895 			mutex_exit(&connp->conn_lock);
1896 			return (error);
1897 		}
1898 		if (ipp->ipp_rthdrdstoptslen == 0) {
1899 			ipp->ipp_fields &= ~IPPF_RTHDRDSTOPTS;
1900 		} else {
1901 			ipp->ipp_fields |= IPPF_RTHDRDSTOPTS;
1902 		}
1903 		mutex_exit(&connp->conn_lock);
1904 		coa->coa_changed |= COA_HEADER_CHANGED;
1905 		coa->coa_changed |= COA_WROFF_CHANGED;
1906 		break;
1907 	case IPV6_DSTOPTS:
1908 		mutex_enter(&connp->conn_lock);
1909 		error = optcom_pkt_set(invalp, inlen,
1910 		    (uchar_t **)&ipp->ipp_dstopts, &ipp->ipp_dstoptslen);
1911 		if (error != 0) {
1912 			mutex_exit(&connp->conn_lock);
1913 			return (error);
1914 		}
1915 		if (ipp->ipp_dstoptslen == 0) {
1916 			ipp->ipp_fields &= ~IPPF_DSTOPTS;
1917 		} else {
1918 			ipp->ipp_fields |= IPPF_DSTOPTS;
1919 		}
1920 		mutex_exit(&connp->conn_lock);
1921 		coa->coa_changed |= COA_HEADER_CHANGED;
1922 		coa->coa_changed |= COA_WROFF_CHANGED;
1923 		break;
1924 	case IPV6_RTHDR:
1925 		mutex_enter(&connp->conn_lock);
1926 		error = optcom_pkt_set(invalp, inlen,
1927 		    (uchar_t **)&ipp->ipp_rthdr, &ipp->ipp_rthdrlen);
1928 		if (error != 0) {
1929 			mutex_exit(&connp->conn_lock);
1930 			return (error);
1931 		}
1932 		if (ipp->ipp_rthdrlen == 0) {
1933 			ipp->ipp_fields &= ~IPPF_RTHDR;
1934 		} else {
1935 			ipp->ipp_fields |= IPPF_RTHDR;
1936 		}
1937 		mutex_exit(&connp->conn_lock);
1938 		coa->coa_changed |= COA_HEADER_CHANGED;
1939 		coa->coa_changed |= COA_WROFF_CHANGED;
1940 		break;
1941 
1942 	case IPV6_DONTFRAG:
1943 		if (onoff) {
1944 			ixa->ixa_flags |= IXAF_DONTFRAG;
1945 			ixa->ixa_flags &= ~IXAF_PMTU_DISCOVERY;
1946 		} else {
1947 			ixa->ixa_flags &= ~IXAF_DONTFRAG;
1948 			ixa->ixa_flags |= IXAF_PMTU_DISCOVERY;
1949 		}
1950 		/* Need to redo ip_attr_connect */
1951 		coa->coa_changed |= COA_ROUTE_CHANGED;
1952 		break;
1953 
1954 	case IPV6_USE_MIN_MTU:
1955 		ixa->ixa_flags |= IXAF_USE_MIN_MTU;
1956 		ixa->ixa_use_min_mtu = *i1;
1957 		/* Need to redo ip_attr_connect */
1958 		coa->coa_changed |= COA_ROUTE_CHANGED;
1959 		break;
1960 
1961 	case IPV6_SEC_OPT:
1962 		mutex_enter(&connp->conn_lock);
1963 		error = ipsec_set_req(cr, connp, (ipsec_req_t *)invalp);
1964 		mutex_exit(&connp->conn_lock);
1965 		if (error != 0) {
1966 			return (error);
1967 		}
1968 		/* This is an IPsec policy change - redo ip_attr_connect */
1969 		coa->coa_changed |= COA_ROUTE_CHANGED;
1970 		break;
1971 	case IPV6_SRC_PREFERENCES:
1972 		/*
1973 		 * This socket option only affects connected
1974 		 * sockets that haven't already bound to a specific
1975 		 * IPv6 address.  In other words, sockets that
1976 		 * don't call bind() with an address other than the
1977 		 * unspecified address and that call connect().
1978 		 * ip_set_destination_v6() passes these preferences
1979 		 * to the ipif_select_source_v6() function.
1980 		 */
1981 		mutex_enter(&connp->conn_lock);
1982 		error = ip6_set_src_preferences(ixa, *(uint32_t *)invalp);
1983 		mutex_exit(&connp->conn_lock);
1984 		if (error != 0) {
1985 			return (error);
1986 		}
1987 		break;
1988 	case IPV6_V6ONLY:
1989 		mutex_enter(&connp->conn_lock);
1990 		connp->conn_ipv6_v6only = onoff;
1991 		mutex_exit(&connp->conn_lock);
1992 		break;
1993 	}
1994 	return (0);
1995 }
1996 
1997 /* Handle IPPROTO_UDP */
1998 /* ARGSUSED1 */
1999 static int
2000 conn_opt_set_udp(conn_opt_arg_t *coa, t_scalar_t name, uint_t inlen,
2001     uchar_t *invalp, boolean_t checkonly, cred_t *cr)
2002 {
2003 	conn_t		*connp = coa->coa_connp;
2004 	int		*i1 = (int *)invalp;
2005 	boolean_t	onoff = (*i1 == 0) ? 0 : 1;
2006 	int		error;
2007 
2008 	switch (name) {
2009 	case UDP_ANONPRIVBIND:
2010 		if ((error = secpolicy_net_privaddr(cr, 0, IPPROTO_UDP)) != 0) {
2011 			return (error);
2012 		}
2013 		break;
2014 	}
2015 	if (checkonly)
2016 		return (0);
2017 
2018 	/* Here we set the actual option value */
2019 	mutex_enter(&connp->conn_lock);
2020 	switch (name) {
2021 	case UDP_ANONPRIVBIND:
2022 		connp->conn_anon_priv_bind = onoff;
2023 		break;
2024 	case UDP_EXCLBIND:
2025 		connp->conn_exclbind = onoff;
2026 		break;
2027 	}
2028 	mutex_exit(&connp->conn_lock);
2029 	return (0);
2030 }
2031 
2032 /* Handle IPPROTO_TCP */
2033 /* ARGSUSED1 */
2034 static int
2035 conn_opt_set_tcp(conn_opt_arg_t *coa, t_scalar_t name, uint_t inlen,
2036     uchar_t *invalp, boolean_t checkonly, cred_t *cr)
2037 {
2038 	conn_t		*connp = coa->coa_connp;
2039 	int		*i1 = (int *)invalp;
2040 	boolean_t	onoff = (*i1 == 0) ? 0 : 1;
2041 	int		error;
2042 
2043 	switch (name) {
2044 	case TCP_ANONPRIVBIND:
2045 		if ((error = secpolicy_net_privaddr(cr, 0, IPPROTO_TCP)) != 0) {
2046 			return (error);
2047 		}
2048 		break;
2049 	}
2050 	if (checkonly)
2051 		return (0);
2052 
2053 	/* Here we set the actual option value */
2054 	mutex_enter(&connp->conn_lock);
2055 	switch (name) {
2056 	case TCP_ANONPRIVBIND:
2057 		connp->conn_anon_priv_bind = onoff;
2058 		break;
2059 	case TCP_EXCLBIND:
2060 		connp->conn_exclbind = onoff;
2061 		break;
2062 	case TCP_RECVDSTADDR:
2063 		connp->conn_recv_ancillary.crb_recvdstaddr = onoff;
2064 		break;
2065 	}
2066 	mutex_exit(&connp->conn_lock);
2067 	return (0);
2068 }
2069 
2070 int
2071 conn_getsockname(conn_t *connp, struct sockaddr *sa, uint_t *salenp)
2072 {
2073 	sin_t		*sin;
2074 	sin6_t		*sin6;
2075 
2076 	if (connp->conn_family == AF_INET) {
2077 		if (*salenp < sizeof (sin_t))
2078 			return (EINVAL);
2079 
2080 		*salenp = sizeof (sin_t);
2081 		/* Fill zeroes and then initialize non-zero fields */
2082 		sin = (sin_t *)sa;
2083 		*sin = sin_null;
2084 		sin->sin_family = AF_INET;
2085 		if (!IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_saddr_v6) &&
2086 		    !IN6_IS_ADDR_UNSPECIFIED(&connp->conn_saddr_v6)) {
2087 			sin->sin_addr.s_addr = connp->conn_saddr_v4;
2088 		} else {
2089 			/*
2090 			 * INADDR_ANY
2091 			 * conn_saddr is not set, we might be bound to
2092 			 * broadcast/multicast. Use conn_bound_addr as
2093 			 * local address instead (that could
2094 			 * also still be INADDR_ANY)
2095 			 */
2096 			sin->sin_addr.s_addr = connp->conn_bound_addr_v4;
2097 		}
2098 		sin->sin_port = connp->conn_lport;
2099 	} else {
2100 		if (*salenp < sizeof (sin6_t))
2101 			return (EINVAL);
2102 
2103 		*salenp = sizeof (sin6_t);
2104 		/* Fill zeroes and then initialize non-zero fields */
2105 		sin6 = (sin6_t *)sa;
2106 		*sin6 = sin6_null;
2107 		sin6->sin6_family = AF_INET6;
2108 		if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_saddr_v6)) {
2109 			sin6->sin6_addr = connp->conn_saddr_v6;
2110 		} else {
2111 			/*
2112 			 * conn_saddr is not set, we might be bound to
2113 			 * broadcast/multicast. Use conn_bound_addr as
2114 			 * local address instead (which could
2115 			 * also still be unspecified)
2116 			 */
2117 			sin6->sin6_addr = connp->conn_bound_addr_v6;
2118 		}
2119 		sin6->sin6_port = connp->conn_lport;
2120 		if (IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr) &&
2121 		    (connp->conn_ixa->ixa_flags & IXAF_SCOPEID_SET))
2122 			sin6->sin6_scope_id = connp->conn_ixa->ixa_scopeid;
2123 	}
2124 	return (0);
2125 }
2126 
2127 int
2128 conn_getpeername(conn_t *connp, struct sockaddr *sa, uint_t *salenp)
2129 {
2130 	struct sockaddr_in	*sin;
2131 	struct sockaddr_in6	*sin6;
2132 
2133 	if (connp->conn_family == AF_INET) {
2134 		if (*salenp < sizeof (sin_t))
2135 			return (EINVAL);
2136 
2137 		*salenp = sizeof (sin_t);
2138 		/* initialize */
2139 		sin = (sin_t *)sa;
2140 		*sin = sin_null;
2141 		sin->sin_family = AF_INET;
2142 		sin->sin_addr.s_addr = connp->conn_faddr_v4;
2143 		sin->sin_port = connp->conn_fport;
2144 	} else {
2145 		if (*salenp < sizeof (sin6_t))
2146 			return (EINVAL);
2147 
2148 		*salenp = sizeof (sin6_t);
2149 		/* initialize */
2150 		sin6 = (sin6_t *)sa;
2151 		*sin6 = sin6_null;
2152 		sin6->sin6_family = AF_INET6;
2153 		sin6->sin6_addr = connp->conn_faddr_v6;
2154 		sin6->sin6_port =  connp->conn_fport;
2155 		sin6->sin6_flowinfo = connp->conn_flowinfo;
2156 		if (IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr) &&
2157 		    (connp->conn_ixa->ixa_flags & IXAF_SCOPEID_SET))
2158 			sin6->sin6_scope_id = connp->conn_ixa->ixa_scopeid;
2159 	}
2160 	return (0);
2161 }
2162 
2163 static uint32_t	cksum_massage_options_v4(ipha_t *, netstack_t *);
2164 static uint32_t cksum_massage_options_v6(ip6_t *, uint_t, netstack_t *);
2165 
2166 /*
2167  * Allocate and fill in conn_ht_iphc based on the current information
2168  * in the conn.
2169  * Normally used when we bind() and connect().
2170  * Returns failure if can't allocate memory, or if there is a problem
2171  * with a routing header/option.
2172  *
2173  * We allocate space for the transport header (ulp_hdr_len + extra) and
2174  * indicate the offset of the ulp header by setting ixa_ip_hdr_length.
2175  * The extra is there for transports that want some spare room for future
2176  * options. conn_ht_iphc_allocated is what was allocated; conn_ht_iphc_len
2177  * excludes the extra part.
2178  *
2179  * We massage an routing option/header and store the ckecksum difference
2180  * in conn_sum.
2181  *
2182  * Caller needs to update conn_wroff if desired.
2183  */
2184 int
2185 conn_build_hdr_template(conn_t *connp, uint_t ulp_hdr_length, uint_t extra,
2186     const in6_addr_t *v6src, const in6_addr_t *v6dst, uint32_t flowinfo)
2187 {
2188 	ip_xmit_attr_t	*ixa = connp->conn_ixa;
2189 	ip_pkt_t	*ipp = &connp->conn_xmit_ipp;
2190 	uint_t		ip_hdr_length;
2191 	uchar_t		*hdrs;
2192 	uint_t		hdrs_len;
2193 
2194 	ASSERT(MUTEX_HELD(&connp->conn_lock));
2195 
2196 	if (ixa->ixa_flags & IXAF_IS_IPV4) {
2197 		ip_hdr_length = ip_total_hdrs_len_v4(ipp);
2198 		/* In case of TX label and IP options it can be too much */
2199 		if (ip_hdr_length > IP_MAX_HDR_LENGTH) {
2200 			/* Preserves existing TX errno for this */
2201 			return (EHOSTUNREACH);
2202 		}
2203 	} else {
2204 		ip_hdr_length = ip_total_hdrs_len_v6(ipp);
2205 	}
2206 	ixa->ixa_ip_hdr_length = ip_hdr_length;
2207 	hdrs_len = ip_hdr_length + ulp_hdr_length + extra;
2208 	ASSERT(hdrs_len != 0);
2209 
2210 	if (hdrs_len != connp->conn_ht_iphc_allocated) {
2211 		/* Allocate new before we free any old */
2212 		hdrs = kmem_alloc(hdrs_len, KM_NOSLEEP);
2213 		if (hdrs == NULL)
2214 			return (ENOMEM);
2215 
2216 		if (connp->conn_ht_iphc != NULL) {
2217 			kmem_free(connp->conn_ht_iphc,
2218 			    connp->conn_ht_iphc_allocated);
2219 		}
2220 		connp->conn_ht_iphc = hdrs;
2221 		connp->conn_ht_iphc_allocated = hdrs_len;
2222 	} else {
2223 		hdrs = connp->conn_ht_iphc;
2224 	}
2225 	hdrs_len -= extra;
2226 	connp->conn_ht_iphc_len = hdrs_len;
2227 
2228 	connp->conn_ht_ulp = hdrs + ip_hdr_length;
2229 	connp->conn_ht_ulp_len = ulp_hdr_length;
2230 
2231 	if (ixa->ixa_flags & IXAF_IS_IPV4) {
2232 		ipha_t	*ipha = (ipha_t *)hdrs;
2233 
2234 		IN6_V4MAPPED_TO_IPADDR(v6src, ipha->ipha_src);
2235 		IN6_V4MAPPED_TO_IPADDR(v6dst, ipha->ipha_dst);
2236 		ip_build_hdrs_v4(hdrs, ip_hdr_length, ipp, connp->conn_proto);
2237 		ipha->ipha_length = htons(hdrs_len);
2238 		if (ixa->ixa_flags & IXAF_PMTU_IPV4_DF)
2239 			ipha->ipha_fragment_offset_and_flags |= IPH_DF_HTONS;
2240 		else
2241 			ipha->ipha_fragment_offset_and_flags &= ~IPH_DF_HTONS;
2242 
2243 		if (ipp->ipp_fields & IPPF_IPV4_OPTIONS) {
2244 			connp->conn_sum = cksum_massage_options_v4(ipha,
2245 			    connp->conn_netstack);
2246 		} else {
2247 			connp->conn_sum = 0;
2248 		}
2249 	} else {
2250 		ip6_t	*ip6h = (ip6_t *)hdrs;
2251 
2252 		ip6h->ip6_src = *v6src;
2253 		ip6h->ip6_dst = *v6dst;
2254 		ip_build_hdrs_v6(hdrs, ip_hdr_length, ipp, connp->conn_proto,
2255 		    flowinfo);
2256 		ip6h->ip6_plen = htons(hdrs_len - IPV6_HDR_LEN);
2257 
2258 		if (ipp->ipp_fields & IPPF_RTHDR) {
2259 			connp->conn_sum = cksum_massage_options_v6(ip6h,
2260 			    ip_hdr_length, connp->conn_netstack);
2261 
2262 			/*
2263 			 * Verify that the first hop isn't a mapped address.
2264 			 * Routers along the path need to do this verification
2265 			 * for subsequent hops.
2266 			 */
2267 			if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst))
2268 				return (EADDRNOTAVAIL);
2269 
2270 		} else {
2271 			connp->conn_sum = 0;
2272 		}
2273 	}
2274 	return (0);
2275 }
2276 
2277 /*
2278  * Prepend a header template to data_mp based on the ip_pkt_t
2279  * and the passed in source, destination and protocol.
2280  *
2281  * Returns failure if can't allocate memory, in which case data_mp is freed.
2282  * We allocate space for the transport header (ulp_hdr_len) and
2283  * indicate the offset of the ulp header by setting ixa_ip_hdr_length.
2284  *
2285  * We massage an routing option/header and return the ckecksum difference
2286  * in *sump. This is in host byte order.
2287  *
2288  * Caller needs to update conn_wroff if desired.
2289  */
2290 mblk_t *
2291 conn_prepend_hdr(ip_xmit_attr_t *ixa, const ip_pkt_t *ipp,
2292     const in6_addr_t *v6src, const in6_addr_t *v6dst,
2293     uint8_t protocol, uint32_t flowinfo, uint_t ulp_hdr_length, mblk_t *data_mp,
2294     uint_t data_length, uint_t wroff_extra, uint32_t *sump, int *errorp)
2295 {
2296 	uint_t		ip_hdr_length;
2297 	uchar_t		*hdrs;
2298 	uint_t		hdrs_len;
2299 	mblk_t		*mp;
2300 
2301 	if (ixa->ixa_flags & IXAF_IS_IPV4) {
2302 		ip_hdr_length = ip_total_hdrs_len_v4(ipp);
2303 		ASSERT(ip_hdr_length <= IP_MAX_HDR_LENGTH);
2304 	} else {
2305 		ip_hdr_length = ip_total_hdrs_len_v6(ipp);
2306 	}
2307 	hdrs_len = ip_hdr_length + ulp_hdr_length;
2308 	ASSERT(hdrs_len != 0);
2309 
2310 	ixa->ixa_ip_hdr_length = ip_hdr_length;
2311 
2312 	/* Can we prepend to data_mp? */
2313 	if (data_mp != NULL &&
2314 	    data_mp->b_rptr - data_mp->b_datap->db_base >= hdrs_len &&
2315 	    data_mp->b_datap->db_ref == 1) {
2316 		hdrs = data_mp->b_rptr - hdrs_len;
2317 		data_mp->b_rptr = hdrs;
2318 		mp = data_mp;
2319 	} else {
2320 		mp = allocb(hdrs_len + wroff_extra, BPRI_MED);
2321 		if (mp == NULL) {
2322 			freemsg(data_mp);
2323 			*errorp = ENOMEM;
2324 			return (NULL);
2325 		}
2326 		mp->b_wptr = mp->b_datap->db_lim;
2327 		hdrs = mp->b_rptr = mp->b_wptr - hdrs_len;
2328 		mp->b_cont = data_mp;
2329 	}
2330 
2331 	/*
2332 	 * Set the source in the header. ip_build_hdrs_v4/v6 will overwrite it
2333 	 * if PKTINFO (aka IPPF_ADDR) was set.
2334 	 */
2335 	if (ixa->ixa_flags & IXAF_IS_IPV4) {
2336 		ipha_t *ipha = (ipha_t *)hdrs;
2337 
2338 		ASSERT(IN6_IS_ADDR_V4MAPPED(v6dst));
2339 		IN6_V4MAPPED_TO_IPADDR(v6src, ipha->ipha_src);
2340 		IN6_V4MAPPED_TO_IPADDR(v6dst, ipha->ipha_dst);
2341 		ip_build_hdrs_v4(hdrs, ip_hdr_length, ipp, protocol);
2342 		ipha->ipha_length = htons(hdrs_len + data_length);
2343 		if (ixa->ixa_flags & IXAF_PMTU_IPV4_DF)
2344 			ipha->ipha_fragment_offset_and_flags |= IPH_DF_HTONS;
2345 		else
2346 			ipha->ipha_fragment_offset_and_flags &= ~IPH_DF_HTONS;
2347 
2348 		if (ipp->ipp_fields & IPPF_IPV4_OPTIONS) {
2349 			*sump = cksum_massage_options_v4(ipha,
2350 			    ixa->ixa_ipst->ips_netstack);
2351 		} else {
2352 			*sump = 0;
2353 		}
2354 	} else {
2355 		ip6_t *ip6h = (ip6_t *)hdrs;
2356 
2357 		ip6h->ip6_src = *v6src;
2358 		ip6h->ip6_dst = *v6dst;
2359 		ip_build_hdrs_v6(hdrs, ip_hdr_length, ipp, protocol, flowinfo);
2360 		ip6h->ip6_plen = htons(hdrs_len + data_length - IPV6_HDR_LEN);
2361 
2362 		if (ipp->ipp_fields & IPPF_RTHDR) {
2363 			*sump = cksum_massage_options_v6(ip6h,
2364 			    ip_hdr_length, ixa->ixa_ipst->ips_netstack);
2365 
2366 			/*
2367 			 * Verify that the first hop isn't a mapped address.
2368 			 * Routers along the path need to do this verification
2369 			 * for subsequent hops.
2370 			 */
2371 			if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst)) {
2372 				*errorp = EADDRNOTAVAIL;
2373 				freemsg(mp);
2374 				return (NULL);
2375 			}
2376 		} else {
2377 			*sump = 0;
2378 		}
2379 	}
2380 	return (mp);
2381 }
2382 
2383 /*
2384  * Massage a source route if any putting the first hop
2385  * in ipha_dst. Compute a starting value for the checksum which
2386  * takes into account that the original ipha_dst should be
2387  * included in the checksum but that IP will include the
2388  * first hop from the source route in the tcp checksum.
2389  */
2390 static uint32_t
2391 cksum_massage_options_v4(ipha_t *ipha, netstack_t *ns)
2392 {
2393 	in_addr_t	dst;
2394 	uint32_t	cksum;
2395 
2396 	/* Get last hop then diff against first hop */
2397 	cksum = ip_massage_options(ipha, ns);
2398 	cksum = (cksum & 0xFFFF) + (cksum >> 16);
2399 	dst = ipha->ipha_dst;
2400 	cksum -= ((dst >> 16) + (dst & 0xffff));
2401 	if ((int)cksum < 0)
2402 		cksum--;
2403 	cksum = (cksum & 0xFFFF) + (cksum >> 16);
2404 	cksum = (cksum & 0xFFFF) + (cksum >> 16);
2405 	ASSERT(cksum < 0x10000);
2406 	return (ntohs(cksum));
2407 }
2408 
2409 static uint32_t
2410 cksum_massage_options_v6(ip6_t *ip6h, uint_t ip_hdr_len, netstack_t *ns)
2411 {
2412 	uint8_t		*end;
2413 	ip6_rthdr_t	*rth;
2414 	uint32_t	cksum;
2415 
2416 	end = (uint8_t *)ip6h + ip_hdr_len;
2417 	rth = ip_find_rthdr_v6(ip6h, end);
2418 	if (rth == NULL)
2419 		return (0);
2420 
2421 	cksum = ip_massage_options_v6(ip6h, rth, ns);
2422 	cksum = (cksum & 0xFFFF) + (cksum >> 16);
2423 	ASSERT(cksum < 0x10000);
2424 	return (ntohs(cksum));
2425 }
2426 
2427 /*
2428  * ULPs that change the destination address need to call this for each
2429  * change to discard any state about a previous destination that might
2430  * have been multicast or multirt.
2431  */
2432 void
2433 ip_attr_newdst(ip_xmit_attr_t *ixa)
2434 {
2435 	ixa->ixa_flags &= ~(IXAF_LOOPBACK_COPY | IXAF_NO_HW_CKSUM |
2436 	    IXAF_NO_TTL_CHANGE | IXAF_IPV6_ADD_FRAGHDR |
2437 	    IXAF_NO_LOOP_ZONEID_SET);
2438 }
2439 
2440 /*
2441  * Determine the nexthop which will be used.
2442  * Normally this is just the destination, but if a IPv4 source route, or
2443  * IPv6 routing header, is in the ip_pkt_t then we extract the nexthop from
2444  * there.
2445  */
2446 void
2447 ip_attr_nexthop(const ip_pkt_t *ipp, const ip_xmit_attr_t *ixa,
2448     const in6_addr_t *dst, in6_addr_t *nexthop)
2449 {
2450 	if (ixa->ixa_flags & IXAF_IS_IPV4) {
2451 		ipaddr_t v4dst;
2452 		ipaddr_t v4nexthop;
2453 
2454 		IN6_V4MAPPED_TO_IPADDR(dst, v4dst);
2455 		v4nexthop = ip_pkt_source_route_v4(ipp);
2456 		if (v4nexthop == INADDR_ANY)
2457 			v4nexthop = v4dst;
2458 
2459 		IN6_IPADDR_TO_V4MAPPED(v4nexthop, nexthop);
2460 	} else {
2461 		const in6_addr_t *v6nexthop;
2462 
2463 		v6nexthop = ip_pkt_source_route_v6(ipp);
2464 		if (v6nexthop == NULL)
2465 			v6nexthop = dst;
2466 
2467 		*nexthop = *v6nexthop;
2468 	}
2469 }
2470 
2471 /*
2472  * Update the ip_xmit_attr_t based the addresses, conn_xmit_ipp and conn_ixa.
2473  * If IPDF_IPSEC is set we cache the IPsec policy to handle the unconnected
2474  * case (connected latching is done in conn_connect).
2475  * Note that IPsec policy lookup requires conn_proto and conn_laddr to be
2476  * set, but doesn't otherwise use the conn_t.
2477  *
2478  * Caller must set/clear IXAF_IS_IPV4 as appropriately.
2479  * Caller must use ip_attr_nexthop() to determine the nexthop argument.
2480  *
2481  * The caller must NOT hold conn_lock (to avoid problems with ill_refrele
2482  * causing the squeue to run doing ipcl_walk grabbing conn_lock.)
2483  *
2484  * Updates laddrp and uinfo if they are non-NULL.
2485  *
2486  * TSOL notes: The callers if ip_attr_connect must check if the destination
2487  * is different than before and in that case redo conn_update_label.
2488  * The callers of conn_connect do not need that since conn_connect
2489  * performs the conn_update_label.
2490  */
2491 int
2492 ip_attr_connect(const conn_t *connp, ip_xmit_attr_t *ixa,
2493     const in6_addr_t *v6src, const in6_addr_t *v6dst,
2494     const in6_addr_t *v6nexthop, in_port_t dstport, in6_addr_t *laddrp,
2495     iulp_t *uinfo, uint32_t flags)
2496 {
2497 	in6_addr_t		laddr = *v6src;
2498 	int			error;
2499 
2500 	ASSERT(MUTEX_NOT_HELD(&connp->conn_lock));
2501 
2502 	if (connp->conn_zone_is_global)
2503 		flags |= IPDF_ZONE_IS_GLOBAL;
2504 	else
2505 		flags &= ~IPDF_ZONE_IS_GLOBAL;
2506 
2507 	/*
2508 	 * Lookup the route to determine a source address and the uinfo.
2509 	 * If the ULP has a source route option then the caller will
2510 	 * have set v6nexthop to be the first hop.
2511 	 */
2512 	if (ixa->ixa_flags & IXAF_IS_IPV4) {
2513 		ipaddr_t v4dst;
2514 		ipaddr_t v4src, v4nexthop;
2515 
2516 		IN6_V4MAPPED_TO_IPADDR(v6dst, v4dst);
2517 		IN6_V4MAPPED_TO_IPADDR(v6nexthop, v4nexthop);
2518 		IN6_V4MAPPED_TO_IPADDR(v6src, v4src);
2519 
2520 		if (connp->conn_unspec_src || v4src != INADDR_ANY)
2521 			flags &= ~IPDF_SELECT_SRC;
2522 		else
2523 			flags |= IPDF_SELECT_SRC;
2524 
2525 		error = ip_set_destination_v4(&v4src, v4dst, v4nexthop, ixa,
2526 		    uinfo, flags, connp->conn_mac_mode);
2527 		IN6_IPADDR_TO_V4MAPPED(v4src, &laddr);
2528 	} else {
2529 		if (connp->conn_unspec_src || !IN6_IS_ADDR_UNSPECIFIED(v6src))
2530 			flags &= ~IPDF_SELECT_SRC;
2531 		else
2532 			flags |= IPDF_SELECT_SRC;
2533 
2534 		error = ip_set_destination_v6(&laddr, v6dst, v6nexthop, ixa,
2535 		    uinfo, flags, connp->conn_mac_mode);
2536 	}
2537 	/* Pass out some address even if we hit a RTF_REJECT etc */
2538 	if (laddrp != NULL)
2539 		*laddrp = laddr;
2540 
2541 	if (error != 0)
2542 		return (error);
2543 
2544 	if (flags & IPDF_IPSEC) {
2545 		/*
2546 		 * Set any IPsec policy in ixa. Routine also looks at ULP
2547 		 * ports.
2548 		 */
2549 		ipsec_cache_outbound_policy(connp, v6src, v6dst, dstport, ixa);
2550 	}
2551 	return (0);
2552 }
2553 
2554 /*
2555  * Connect the conn based on the addresses, conn_xmit_ipp and conn_ixa.
2556  * Assumes that conn_faddr and conn_fport are already set. As such it is not
2557  * usable for SCTP, since SCTP has multiple faddrs.
2558  *
2559  * Caller must hold conn_lock to provide atomic constency between the
2560  * conn_t's addresses and the ixa.
2561  * NOTE: this function drops and reaquires conn_lock since it can't be
2562  * held across ip_attr_connect/ip_set_destination.
2563  *
2564  * The caller needs to handle inserting in the receive-side fanout when
2565  * appropriate after conn_connect returns.
2566  */
2567 int
2568 conn_connect(conn_t *connp, iulp_t *uinfo, uint32_t flags)
2569 {
2570 	ip_xmit_attr_t	*ixa = connp->conn_ixa;
2571 	in6_addr_t	nexthop;
2572 	in6_addr_t	saddr, faddr;
2573 	in_port_t	fport;
2574 	int		error;
2575 
2576 	ASSERT(MUTEX_HELD(&connp->conn_lock));
2577 
2578 	if (connp->conn_ipversion == IPV4_VERSION)
2579 		ixa->ixa_flags |= IXAF_IS_IPV4;
2580 	else
2581 		ixa->ixa_flags &= ~IXAF_IS_IPV4;
2582 
2583 	/* We do IPsec latching below - hence no caching in ip_attr_connect */
2584 	flags &= ~IPDF_IPSEC;
2585 
2586 	/* In case we had previously done an ip_attr_connect */
2587 	ip_attr_newdst(ixa);
2588 
2589 	/*
2590 	 * Determine the nexthop and copy the addresses before dropping
2591 	 * conn_lock.
2592 	 */
2593 	ip_attr_nexthop(&connp->conn_xmit_ipp, connp->conn_ixa,
2594 	    &connp->conn_faddr_v6, &nexthop);
2595 	saddr = connp->conn_saddr_v6;
2596 	faddr = connp->conn_faddr_v6;
2597 	fport = connp->conn_fport;
2598 
2599 	mutex_exit(&connp->conn_lock);
2600 	error = ip_attr_connect(connp, ixa, &saddr, &faddr, &nexthop, fport,
2601 	    &saddr, uinfo, flags | IPDF_VERIFY_DST);
2602 	mutex_enter(&connp->conn_lock);
2603 
2604 	/* Could have changed even if an error */
2605 	connp->conn_saddr_v6 = saddr;
2606 	if (error != 0)
2607 		return (error);
2608 
2609 	/*
2610 	 * Check whether Trusted Solaris policy allows communication with this
2611 	 * host, and pretend that the destination is unreachable if not.
2612 	 * Compute any needed label and place it in ipp_label_v4/v6.
2613 	 *
2614 	 * Later conn_build_hdr_template() takes ipp_label_v4/v6 to form
2615 	 * the packet.
2616 	 *
2617 	 * TSOL Note: Any concurrent threads would pick a different ixa
2618 	 * (and ipp if they are to change the ipp)  so we
2619 	 * don't have to worry about concurrent threads.
2620 	 */
2621 	if (is_system_labeled()) {
2622 		if (connp->conn_mlp_type != mlptSingle)
2623 			return (ECONNREFUSED);
2624 
2625 		/*
2626 		 * conn_update_label will set ipp_label* which will later
2627 		 * be used by conn_build_hdr_template.
2628 		 */
2629 		error = conn_update_label(connp, ixa,
2630 		    &connp->conn_faddr_v6, &connp->conn_xmit_ipp);
2631 		if (error != 0)
2632 			return (error);
2633 	}
2634 
2635 	/*
2636 	 * Ensure that we match on the selected local address.
2637 	 * This overrides conn_laddr in the case we had earlier bound to a
2638 	 * multicast or broadcast address.
2639 	 */
2640 	connp->conn_laddr_v6 = connp->conn_saddr_v6;
2641 
2642 	/*
2643 	 * Allow setting new policies.
2644 	 * The addresses/ports are already set, thus the IPsec policy calls
2645 	 * can handle their passed-in conn's.
2646 	 */
2647 	connp->conn_policy_cached = B_FALSE;
2648 
2649 	/*
2650 	 * Cache IPsec policy in this conn.  If we have per-socket policy,
2651 	 * we'll cache that.  If we don't, we'll inherit global policy.
2652 	 *
2653 	 * This is done before the caller inserts in the receive-side fanout.
2654 	 * Note that conn_policy_cached is set by ipsec_conn_cache_policy() even
2655 	 * for connections where we don't have a policy. This is to prevent
2656 	 * global policy lookups in the inbound path.
2657 	 *
2658 	 * If we insert before we set conn_policy_cached,
2659 	 * CONN_INBOUND_POLICY_PRESENT() check can still evaluate true
2660 	 * because global policy cound be non-empty. We normally call
2661 	 * ipsec_check_policy() for conn_policy_cached connections only if
2662 	 * conn_in_enforce_policy is set. But in this case,
2663 	 * conn_policy_cached can get set anytime since we made the
2664 	 * CONN_INBOUND_POLICY_PRESENT() check and ipsec_check_policy() is
2665 	 * called, which will make the above assumption false.  Thus, we
2666 	 * need to insert after we set conn_policy_cached.
2667 	 */
2668 	error = ipsec_conn_cache_policy(connp,
2669 	    connp->conn_ipversion == IPV4_VERSION);
2670 	if (error != 0)
2671 		return (error);
2672 
2673 	/*
2674 	 * We defer to do LSO check until here since now we have better idea
2675 	 * whether IPsec is present. If the underlying ill is LSO capable,
2676 	 * copy its capability in so the ULP can decide whether to enable LSO
2677 	 * on this connection. So far, only TCP/IPv4 is implemented, so won't
2678 	 * claim LSO for IPv6.
2679 	 *
2680 	 * Currently, won't enable LSO for IRE_LOOPBACK or IRE_LOCAL, because
2681 	 * the receiver can not handle it. Also not to enable LSO for MULTIRT.
2682 	 */
2683 	ixa->ixa_flags &= ~IXAF_LSO_CAPAB;
2684 
2685 	ASSERT(ixa->ixa_ire != NULL);
2686 	if (ixa->ixa_ipst->ips_ip_lso_outbound && (flags & IPDF_LSO) &&
2687 	    !(ixa->ixa_flags & IXAF_IPSEC_SECURE) &&
2688 	    !(ixa->ixa_ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK)) &&
2689 	    !(ixa->ixa_ire->ire_flags & RTF_MULTIRT) &&
2690 	    (ixa->ixa_nce != NULL) &&
2691 	    ((ixa->ixa_flags & IXAF_IS_IPV4) ?
2692 	    ILL_LSO_TCP_IPV4_USABLE(ixa->ixa_nce->nce_ill) :
2693 	    ILL_LSO_TCP_IPV6_USABLE(ixa->ixa_nce->nce_ill))) {
2694 		ixa->ixa_lso_capab = *ixa->ixa_nce->nce_ill->ill_lso_capab;
2695 		ixa->ixa_flags |= IXAF_LSO_CAPAB;
2696 	}
2697 
2698 	/* Check whether ZEROCOPY capability is usable for this connection. */
2699 	ixa->ixa_flags &= ~IXAF_ZCOPY_CAPAB;
2700 
2701 	if ((flags & IPDF_ZCOPY) &&
2702 	    !(ixa->ixa_flags & IXAF_IPSEC_SECURE) &&
2703 	    !(ixa->ixa_ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK)) &&
2704 	    !(ixa->ixa_ire->ire_flags & RTF_MULTIRT) &&
2705 	    (ixa->ixa_nce != NULL) &&
2706 	    ILL_ZCOPY_USABLE(ixa->ixa_nce->nce_ill)) {
2707 		ixa->ixa_flags |= IXAF_ZCOPY_CAPAB;
2708 	}
2709 	return (0);
2710 }
2711 
2712 /*
2713  * Predicates to check if the addresses match conn_last*
2714  */
2715 
2716 /*
2717  * Compare the conn against an address.
2718  * If using mapped addresses on AF_INET6 sockets, use the _v6 function
2719  */
2720 boolean_t
2721 conn_same_as_last_v4(conn_t *connp, sin_t *sin)
2722 {
2723 	ASSERT(connp->conn_family == AF_INET);
2724 	return (sin->sin_addr.s_addr == connp->conn_v4lastdst &&
2725 	    sin->sin_port == connp->conn_lastdstport);
2726 }
2727 
2728 /*
2729  * Compare, including for mapped addresses
2730  */
2731 boolean_t
2732 conn_same_as_last_v6(conn_t *connp, sin6_t *sin6)
2733 {
2734 	return (IN6_ARE_ADDR_EQUAL(&connp->conn_v6lastdst, &sin6->sin6_addr) &&
2735 	    sin6->sin6_port == connp->conn_lastdstport &&
2736 	    sin6->sin6_flowinfo == connp->conn_lastflowinfo &&
2737 	    sin6->sin6_scope_id == connp->conn_lastscopeid);
2738 }
2739 
2740 /*
2741  * Compute a label and place it in the ip_packet_t.
2742  * Handles IPv4 and IPv6.
2743  * The caller should have a correct ixa_tsl and ixa_zoneid and have
2744  * already called conn_connect or ip_attr_connect to ensure that tsol_check_dest
2745  * has been called.
2746  */
2747 int
2748 conn_update_label(const conn_t *connp, const ip_xmit_attr_t *ixa,
2749     const in6_addr_t *v6dst, ip_pkt_t *ipp)
2750 {
2751 	int		err;
2752 	ipaddr_t	v4dst;
2753 
2754 	if (IN6_IS_ADDR_V4MAPPED(v6dst)) {
2755 		uchar_t		opt_storage[IP_MAX_OPT_LENGTH];
2756 
2757 		IN6_V4MAPPED_TO_IPADDR(v6dst, v4dst);
2758 
2759 		err = tsol_compute_label_v4(ixa->ixa_tsl, ixa->ixa_zoneid,
2760 		    v4dst, opt_storage, ixa->ixa_ipst);
2761 		if (err == 0) {
2762 			/* Length contained in opt_storage[IPOPT_OLEN] */
2763 			err = optcom_pkt_set(opt_storage,
2764 			    opt_storage[IPOPT_OLEN],
2765 			    (uchar_t **)&ipp->ipp_label_v4,
2766 			    &ipp->ipp_label_len_v4);
2767 		}
2768 		if (err != 0) {
2769 			DTRACE_PROBE4(tx__ip__log__info__updatelabel,
2770 			    char *, "conn(1) failed to update options(2) "
2771 			    "on ixa(3)",
2772 			    conn_t *, connp, char *, opt_storage,
2773 			    ip_xmit_attr_t *, ixa);
2774 		}
2775 		if (ipp->ipp_label_len_v4 != 0)
2776 			ipp->ipp_fields |= IPPF_LABEL_V4;
2777 		else
2778 			ipp->ipp_fields &= ~IPPF_LABEL_V4;
2779 	} else {
2780 		uchar_t		opt_storage[TSOL_MAX_IPV6_OPTION];
2781 		uint_t		optlen;
2782 
2783 		err = tsol_compute_label_v6(ixa->ixa_tsl, ixa->ixa_zoneid,
2784 		    v6dst, opt_storage, ixa->ixa_ipst);
2785 		if (err == 0) {
2786 			/*
2787 			 * Note that ipp_label_v6 is just the option - not
2788 			 * the hopopts extension header.
2789 			 *
2790 			 * Length contained in opt_storage[IPOPT_OLEN], but
2791 			 * that doesn't include the two byte options header.
2792 			 */
2793 			optlen = opt_storage[IPOPT_OLEN];
2794 			if (optlen != 0)
2795 				optlen += 2;
2796 
2797 			err = optcom_pkt_set(opt_storage, optlen,
2798 			    (uchar_t **)&ipp->ipp_label_v6,
2799 			    &ipp->ipp_label_len_v6);
2800 		}
2801 		if (err != 0) {
2802 			DTRACE_PROBE4(tx__ip__log__info__updatelabel,
2803 			    char *, "conn(1) failed to update options(2) "
2804 			    "on ixa(3)",
2805 			    conn_t *, connp, char *, opt_storage,
2806 			    ip_xmit_attr_t *, ixa);
2807 		}
2808 		if (ipp->ipp_label_len_v6 != 0)
2809 			ipp->ipp_fields |= IPPF_LABEL_V6;
2810 		else
2811 			ipp->ipp_fields &= ~IPPF_LABEL_V6;
2812 	}
2813 	return (err);
2814 }
2815 
2816 /*
2817  * Inherit all options settings from the parent/listener to the eager.
2818  * Returns zero on success; ENOMEM if memory allocation failed.
2819  *
2820  * We assume that the eager has not had any work done i.e., the conn_ixa
2821  * and conn_xmit_ipp are all zero.
2822  * Furthermore we assume that no other thread can access the eager (because
2823  * it isn't inserted in any fanout list).
2824  */
2825 int
2826 conn_inherit_parent(conn_t *lconnp, conn_t *econnp)
2827 {
2828 	cred_t	*credp;
2829 	int	err;
2830 	void	*notify_cookie;
2831 
2832 	econnp->conn_family = lconnp->conn_family;
2833 	econnp->conn_ipv6_v6only = lconnp->conn_ipv6_v6only;
2834 	econnp->conn_wq = lconnp->conn_wq;
2835 	econnp->conn_rq = lconnp->conn_rq;
2836 
2837 	/*
2838 	 * Make a safe copy of the transmit attributes.
2839 	 * conn_connect will later be used by the caller to setup the ire etc.
2840 	 */
2841 	ASSERT(econnp->conn_ixa->ixa_refcnt == 1);
2842 	ASSERT(econnp->conn_ixa->ixa_ire == NULL);
2843 	ASSERT(econnp->conn_ixa->ixa_dce == NULL);
2844 	ASSERT(econnp->conn_ixa->ixa_nce == NULL);
2845 
2846 	/* Preserve ixa_notify_cookie */
2847 	notify_cookie = econnp->conn_ixa->ixa_notify_cookie;
2848 	ixa_safe_copy(lconnp->conn_ixa, econnp->conn_ixa);
2849 	econnp->conn_ixa->ixa_notify_cookie = notify_cookie;
2850 
2851 	econnp->conn_bound_if = lconnp->conn_bound_if;
2852 	econnp->conn_incoming_ifindex = lconnp->conn_incoming_ifindex;
2853 
2854 	/* Inherit all RECV options */
2855 	econnp->conn_recv_ancillary = lconnp->conn_recv_ancillary;
2856 
2857 	err = ip_pkt_copy(&lconnp->conn_xmit_ipp, &econnp->conn_xmit_ipp,
2858 	    KM_NOSLEEP);
2859 	if (err != 0)
2860 		return (err);
2861 
2862 	econnp->conn_zoneid = lconnp->conn_zoneid;
2863 	econnp->conn_allzones = lconnp->conn_allzones;
2864 
2865 	/* This is odd. Pick a flowlabel for each connection instead? */
2866 	econnp->conn_flowinfo = lconnp->conn_flowinfo;
2867 
2868 	econnp->conn_default_ttl = lconnp->conn_default_ttl;
2869 
2870 	/*
2871 	 * TSOL: tsol_input_proc() needs the eager's cred before the
2872 	 * eager is accepted
2873 	 */
2874 	ASSERT(lconnp->conn_cred != NULL);
2875 	econnp->conn_cred = credp = lconnp->conn_cred;
2876 	crhold(credp);
2877 	econnp->conn_cpid = lconnp->conn_cpid;
2878 	econnp->conn_open_time = ddi_get_lbolt64();
2879 
2880 	/*
2881 	 * Cache things in the ixa without any refhold.
2882 	 * Listener might not have set up ixa_cred
2883 	 */
2884 	econnp->conn_ixa->ixa_cred = econnp->conn_cred;
2885 	econnp->conn_ixa->ixa_cpid = econnp->conn_cpid;
2886 	if (is_system_labeled())
2887 		econnp->conn_ixa->ixa_tsl = crgetlabel(econnp->conn_cred);
2888 
2889 	/*
2890 	 * If the caller has the process-wide flag set, then default to MAC
2891 	 * exempt mode.  This allows read-down to unlabeled hosts.
2892 	 */
2893 	if (getpflags(NET_MAC_AWARE, credp) != 0)
2894 		econnp->conn_mac_mode = CONN_MAC_AWARE;
2895 
2896 	econnp->conn_zone_is_global = lconnp->conn_zone_is_global;
2897 
2898 	/*
2899 	 * We eliminate the need for sockfs to send down a T_SVR4_OPTMGMT_REQ
2900 	 * via soaccept()->soinheritoptions() which essentially applies
2901 	 * all the listener options to the new connection. The options that we
2902 	 * need to take care of are:
2903 	 * SO_DEBUG, SO_REUSEADDR, SO_KEEPALIVE, SO_DONTROUTE, SO_BROADCAST,
2904 	 * SO_USELOOPBACK, SO_OOBINLINE, SO_DGRAM_ERRIND, SO_LINGER,
2905 	 * SO_SNDBUF, SO_RCVBUF.
2906 	 *
2907 	 * SO_RCVBUF:	conn_rcvbuf is set.
2908 	 * SO_SNDBUF:	conn_sndbuf is set.
2909 	 */
2910 
2911 	econnp->conn_sndbuf = lconnp->conn_sndbuf;
2912 	econnp->conn_rcvbuf = lconnp->conn_rcvbuf;
2913 	econnp->conn_sndlowat = lconnp->conn_sndlowat;
2914 	econnp->conn_rcvlowat = lconnp->conn_rcvlowat;
2915 	econnp->conn_dgram_errind = lconnp->conn_dgram_errind;
2916 	econnp->conn_oobinline = lconnp->conn_oobinline;
2917 	econnp->conn_debug = lconnp->conn_debug;
2918 	econnp->conn_keepalive = lconnp->conn_keepalive;
2919 	econnp->conn_linger = lconnp->conn_linger;
2920 	econnp->conn_lingertime = lconnp->conn_lingertime;
2921 
2922 	/* Set the IP options */
2923 	econnp->conn_broadcast = lconnp->conn_broadcast;
2924 	econnp->conn_useloopback = lconnp->conn_useloopback;
2925 	econnp->conn_reuseaddr = lconnp->conn_reuseaddr;
2926 	return (0);
2927 }
2928