1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
23  * Copyright (c) 2011 Nexenta Systems, Inc. All rights reserved.
24  * Copyright 2019 Joyent, Inc.
25  * Copyright (c) 2016 by Delphix. All rights reserved.
26  * Copyright 2020 OmniOS Community Edition (OmniOSce) Association.
27  */
28 
29 #include <sys/types.h>
30 #include <sys/stream.h>
31 #define	_SUN_TPI_VERSION 2
32 #include <sys/tihdr.h>
33 #include <sys/socket.h>
34 #include <sys/xti_xtiopt.h>
35 #include <sys/xti_inet.h>
36 #include <sys/policy.h>
37 
38 #include <inet/cc.h>
39 #include <inet/common.h>
40 #include <netinet/ip6.h>
41 #include <inet/ip.h>
42 
43 #include <netinet/in.h>
44 #include <netinet/tcp.h>
45 #include <inet/optcom.h>
46 #include <inet/proto_set.h>
47 #include <inet/tcp_impl.h>
48 
49 static int	tcp_opt_default(queue_t *, int, int, uchar_t *);
50 
51 /*
52  * Table of all known options handled on a TCP protocol stack.
53  *
54  * Note: This table contains options processed by both TCP and IP levels
55  *       and is the superset of options that can be performed on a TCP over IP
56  *       stack.
57  */
58 opdes_t	tcp_opt_arr[] = {
59 
60 { SO_LINGER,	SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0,
61 	sizeof (struct linger), 0 },
62 
63 { SO_DEBUG,	SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
64 { SO_KEEPALIVE,	SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
65 { SO_DONTROUTE,	SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
66 { SO_USELOOPBACK, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0
67 	},
68 { SO_BROADCAST,	SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
69 { SO_REUSEADDR, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
70 { SO_OOBINLINE, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
71 { SO_TYPE,	SOL_SOCKET, OA_R, OA_R, OP_NP, 0, sizeof (int), 0 },
72 { SO_SNDBUF,	SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
73 { SO_RCVBUF,	SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
74 { SO_SNDTIMEO,	SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0,
75 	sizeof (struct timeval), 0 },
76 { SO_RCVTIMEO,	SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0,
77 	sizeof (struct timeval), 0 },
78 { SO_DGRAM_ERRIND, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0
79 	},
80 { SO_SND_COPYAVOID, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
81 { SO_ANON_MLP, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int),
82 	0 },
83 { SO_MAC_EXEMPT, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int),
84 	0 },
85 { SO_MAC_IMPLICIT, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int),
86 	0 },
87 { SO_ALLZONES, SOL_SOCKET, OA_R, OA_RW, OP_CONFIG, 0, sizeof (int),
88 	0 },
89 { SO_EXCLBIND, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
90 
91 { SO_DOMAIN,	SOL_SOCKET, OA_R, OA_R, OP_NP, 0, sizeof (int), 0 },
92 
93 { SO_PROTOTYPE,	SOL_SOCKET, OA_R, OA_R, OP_NP, 0, sizeof (int), 0 },
94 
95 { TCP_NODELAY,	IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0
96 	},
97 { TCP_MAXSEG,	IPPROTO_TCP, OA_R, OA_R, OP_NP, 0, sizeof (uint_t),
98 	536 },
99 
100 { TCP_NOTIFY_THRESHOLD, IPPROTO_TCP, OA_RW, OA_RW, OP_NP,
101 	OP_DEF_FN, sizeof (int), -1 /* not initialized */ },
102 
103 { TCP_ABORT_THRESHOLD, IPPROTO_TCP, OA_RW, OA_RW, OP_NP,
104 	OP_DEF_FN, sizeof (int), -1 /* not initialized */ },
105 
106 { TCP_CONN_NOTIFY_THRESHOLD, IPPROTO_TCP, OA_RW, OA_RW, OP_NP,
107 	OP_DEF_FN, sizeof (int), -1 /* not initialized */ },
108 
109 { TCP_CONN_ABORT_THRESHOLD, IPPROTO_TCP, OA_RW, OA_RW, OP_NP,
110 	OP_DEF_FN, sizeof (int), -1 /* not initialized */ },
111 
112 { TCP_RECVDSTADDR, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int),
113 	0 },
114 
115 { TCP_ANONPRIVBIND, IPPROTO_TCP, OA_R, OA_RW, OP_PRIVPORT, 0,
116 	sizeof (int), 0 },
117 
118 { TCP_EXCLBIND, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0
119 	},
120 
121 { TCP_INIT_CWND, IPPROTO_TCP, OA_RW, OA_RW, OP_CONFIG, 0,
122 	sizeof (int), 0 },
123 
124 { TCP_KEEPALIVE_THRESHOLD, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0,
125 	sizeof (int), 0	},
126 
127 { TCP_KEEPIDLE, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
128 
129 { TCP_KEEPCNT, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
130 
131 { TCP_KEEPINTVL, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
132 
133 { TCP_KEEPALIVE_ABORT_THRESHOLD, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0,
134 	sizeof (int), 0	},
135 
136 { TCP_CORK, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
137 
138 { TCP_RTO_INITIAL, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (uint32_t), 0 },
139 
140 { TCP_RTO_MIN, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (uint32_t), 0 },
141 
142 { TCP_RTO_MAX, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (uint32_t), 0 },
143 
144 { TCP_LINGER2, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
145 
146 { TCP_CONGESTION, IPPROTO_TCP, OA_RW, OA_RW, OP_NP,
147 	OP_VARLEN, CC_ALGO_NAME_MAX, 0 },
148 
149 { IP_OPTIONS,	IPPROTO_IP, OA_RW, OA_RW, OP_NP,
150 	(OP_VARLEN|OP_NODEFAULT),
151 	IP_MAX_OPT_LENGTH + IP_ADDR_LEN, -1 /* not initialized */ },
152 { T_IP_OPTIONS,	IPPROTO_IP, OA_RW, OA_RW, OP_NP,
153 	(OP_VARLEN|OP_NODEFAULT),
154 	IP_MAX_OPT_LENGTH + IP_ADDR_LEN, -1 /* not initialized */ },
155 
156 { IP_TOS,	IPPROTO_IP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
157 { T_IP_TOS,	IPPROTO_IP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
158 { IP_TTL,	IPPROTO_IP, OA_RW, OA_RW, OP_NP, OP_DEF_FN,
159 	sizeof (int), -1 /* not initialized */ },
160 { IP_RECVTOS,	IPPROTO_IP,  OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
161 
162 { IP_SEC_OPT, IPPROTO_IP, OA_RW, OA_RW, OP_NP, OP_NODEFAULT,
163 	sizeof (ipsec_req_t), -1 /* not initialized */ },
164 
165 { IP_BOUND_IF, IPPROTO_IP, OA_RW, OA_RW, OP_NP, 0,
166 	sizeof (int),	0 /* no ifindex */ },
167 
168 { IP_UNSPEC_SRC, IPPROTO_IP, OA_R, OA_RW, OP_RAW, 0,
169 	sizeof (int), 0 },
170 
171 { IPV6_UNICAST_HOPS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, OP_DEF_FN,
172 	sizeof (int), -1 /* not initialized */ },
173 
174 { IPV6_BOUND_IF, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
175 	sizeof (int),	0 /* no ifindex */ },
176 
177 { IP_DONTFRAG, IPPROTO_IP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
178 
179 { IP_NEXTHOP, IPPROTO_IP, OA_R, OA_RW, OP_CONFIG, 0,
180 	sizeof (in_addr_t),	-1 /* not initialized  */ },
181 
182 { IPV6_UNSPEC_SRC, IPPROTO_IPV6, OA_R, OA_RW, OP_RAW, 0,
183 	sizeof (int), 0 },
184 
185 { IPV6_PKTINFO, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP,
186 	(OP_NODEFAULT|OP_VARLEN),
187 	sizeof (struct in6_pktinfo), -1 /* not initialized */ },
188 { IPV6_NEXTHOP, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP,
189 	OP_NODEFAULT,
190 	sizeof (sin6_t), -1 /* not initialized */ },
191 { IPV6_HOPOPTS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP,
192 	(OP_VARLEN|OP_NODEFAULT), 255*8,
193 	-1 /* not initialized */ },
194 { IPV6_DSTOPTS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP,
195 	(OP_VARLEN|OP_NODEFAULT), 255*8,
196 	-1 /* not initialized */ },
197 { IPV6_RTHDRDSTOPTS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP,
198 	(OP_VARLEN|OP_NODEFAULT), 255*8,
199 	-1 /* not initialized */ },
200 { IPV6_RTHDR, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP,
201 	(OP_VARLEN|OP_NODEFAULT), 255*8,
202 	-1 /* not initialized */ },
203 { IPV6_TCLASS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP,
204 	OP_NODEFAULT,
205 	sizeof (int), -1 /* not initialized */ },
206 { IPV6_PATHMTU, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP,
207 	OP_NODEFAULT,
208 	sizeof (struct ip6_mtuinfo), -1 /* not initialized */ },
209 { IPV6_DONTFRAG, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
210 	sizeof (int), 0 },
211 { IPV6_USE_MIN_MTU, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
212 	sizeof (int), 0 },
213 { IPV6_V6ONLY, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
214 	sizeof (int), 0 },
215 
216 /* Enable receipt of ancillary data */
217 { IPV6_RECVPKTINFO, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
218 	sizeof (int), 0 },
219 { IPV6_RECVHOPLIMIT, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
220 	sizeof (int), 0 },
221 { IPV6_RECVHOPOPTS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
222 	sizeof (int), 0 },
223 { _OLD_IPV6_RECVDSTOPTS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
224 	sizeof (int), 0 },
225 { IPV6_RECVDSTOPTS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
226 	sizeof (int), 0 },
227 { IPV6_RECVRTHDR, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
228 	sizeof (int), 0 },
229 { IPV6_RECVRTHDRDSTOPTS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
230 	sizeof (int), 0 },
231 { IPV6_RECVTCLASS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
232 	sizeof (int), 0 },
233 
234 { IPV6_SEC_OPT, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, OP_NODEFAULT,
235 	sizeof (ipsec_req_t), -1 /* not initialized */ },
236 { IPV6_SRC_PREFERENCES, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
237 	sizeof (uint32_t), IPV6_PREFER_SRC_DEFAULT },
238 };
239 
240 /*
241  * Table of all supported levels
242  * Note: Some levels (e.g. XTI_GENERIC) may be valid but may not have
243  * any supported options so we need this info separately.
244  *
245  * This is needed only for topmost tpi providers and is used only by
246  * XTI interfaces.
247  */
248 optlevel_t	tcp_valid_levels_arr[] = {
249 	XTI_GENERIC,
250 	SOL_SOCKET,
251 	IPPROTO_TCP,
252 	IPPROTO_IP,
253 	IPPROTO_IPV6
254 };
255 
256 
257 #define	TCP_OPT_ARR_CNT		A_CNT(tcp_opt_arr)
258 #define	TCP_VALID_LEVELS_CNT	A_CNT(tcp_valid_levels_arr)
259 
260 uint_t tcp_max_optsize; /* initialized when TCP driver is loaded */
261 
262 /*
263  * Initialize option database object for TCP
264  *
265  * This object represents database of options to search passed to
266  * {sock,tpi}optcom_req() interface routine to take care of option
267  * management and associated methods.
268  */
269 
270 optdb_obj_t tcp_opt_obj = {
271 	tcp_opt_default,	/* TCP default value function pointer */
272 	tcp_tpi_opt_get,	/* TCP get function pointer */
273 	tcp_tpi_opt_set,	/* TCP set function pointer */
274 	TCP_OPT_ARR_CNT,	/* TCP option database count of entries */
275 	tcp_opt_arr,		/* TCP option database */
276 	TCP_VALID_LEVELS_CNT,	/* TCP valid level count of entries */
277 	tcp_valid_levels_arr	/* TCP valid level array */
278 };
279 
280 static int tcp_max_init_cwnd = TCP_MAX_INIT_CWND;
281 
282 /*
283  * Some TCP options can be "set" by requesting them in the option
284  * buffer. This is needed for XTI feature test though we do not
285  * allow it in general. We interpret that this mechanism is more
286  * applicable to OSI protocols and need not be allowed in general.
287  * This routine filters out options for which it is not allowed (most)
288  * and lets through those (few) for which it is. [ The XTI interface
289  * test suite specifics will imply that any XTI_GENERIC level XTI_* if
290  * ever implemented will have to be allowed here ].
291  */
292 static boolean_t
293 tcp_allow_connopt_set(int level, int name)
294 {
295 
296 	switch (level) {
297 	case IPPROTO_TCP:
298 		switch (name) {
299 		case TCP_NODELAY:
300 			return (B_TRUE);
301 		default:
302 			return (B_FALSE);
303 		}
304 		/*NOTREACHED*/
305 	default:
306 		return (B_FALSE);
307 	}
308 	/*NOTREACHED*/
309 }
310 
311 /*
312  * This routine gets default values of certain options whose default
313  * values are maintained by protocol specific code
314  */
315 /* ARGSUSED */
316 static int
317 tcp_opt_default(queue_t *q, int level, int name, uchar_t *ptr)
318 {
319 	int32_t	*i1 = (int32_t *)ptr;
320 	tcp_stack_t	*tcps = Q_TO_TCP(q)->tcp_tcps;
321 
322 	switch (level) {
323 	case IPPROTO_TCP:
324 		switch (name) {
325 		case TCP_NOTIFY_THRESHOLD:
326 			*i1 = tcps->tcps_ip_notify_interval;
327 			break;
328 		case TCP_ABORT_THRESHOLD:
329 			*i1 = tcps->tcps_ip_abort_interval;
330 			break;
331 		case TCP_CONN_NOTIFY_THRESHOLD:
332 			*i1 = tcps->tcps_ip_notify_cinterval;
333 			break;
334 		case TCP_CONN_ABORT_THRESHOLD:
335 			*i1 = tcps->tcps_ip_abort_cinterval;
336 			break;
337 		default:
338 			return (-1);
339 		}
340 		break;
341 	case IPPROTO_IP:
342 		switch (name) {
343 		case IP_TTL:
344 			*i1 = tcps->tcps_ipv4_ttl;
345 			break;
346 		default:
347 			return (-1);
348 		}
349 		break;
350 	case IPPROTO_IPV6:
351 		switch (name) {
352 		case IPV6_UNICAST_HOPS:
353 			*i1 = tcps->tcps_ipv6_hoplimit;
354 			break;
355 		default:
356 			return (-1);
357 		}
358 		break;
359 	default:
360 		return (-1);
361 	}
362 	return (sizeof (int));
363 }
364 
365 /*
366  * TCP routine to get the values of options.
367  */
368 int
369 tcp_opt_get(conn_t *connp, int level, int name, uchar_t *ptr)
370 {
371 	int		*i1 = (int *)ptr;
372 	tcp_t		*tcp = connp->conn_tcp;
373 	conn_opt_arg_t	coas;
374 	int		retval;
375 
376 	coas.coa_connp = connp;
377 	coas.coa_ixa = connp->conn_ixa;
378 	coas.coa_ipp = &connp->conn_xmit_ipp;
379 	coas.coa_ancillary = B_FALSE;
380 	coas.coa_changed = 0;
381 
382 	switch (level) {
383 	case SOL_SOCKET:
384 		switch (name) {
385 		case SO_SND_COPYAVOID:
386 			*i1 = tcp->tcp_snd_zcopy_on ?
387 			    SO_SND_COPYAVOID : 0;
388 			return (sizeof (int));
389 		case SO_ACCEPTCONN:
390 			*i1 = (tcp->tcp_state == TCPS_LISTEN);
391 			return (sizeof (int));
392 		}
393 		break;
394 	case IPPROTO_TCP:
395 		switch (name) {
396 		case TCP_NODELAY:
397 			*i1 = (tcp->tcp_naglim == 1) ? TCP_NODELAY : 0;
398 			return (sizeof (int));
399 		case TCP_MAXSEG:
400 			*i1 = tcp->tcp_mss;
401 			return (sizeof (int));
402 		case TCP_NOTIFY_THRESHOLD:
403 			*i1 = (int)tcp->tcp_first_timer_threshold;
404 			return (sizeof (int));
405 		case TCP_ABORT_THRESHOLD:
406 			*i1 = tcp->tcp_second_timer_threshold;
407 			return (sizeof (int));
408 		case TCP_CONN_NOTIFY_THRESHOLD:
409 			*i1 = tcp->tcp_first_ctimer_threshold;
410 			return (sizeof (int));
411 		case TCP_CONN_ABORT_THRESHOLD:
412 			*i1 = tcp->tcp_second_ctimer_threshold;
413 			return (sizeof (int));
414 		case TCP_INIT_CWND:
415 			*i1 = tcp->tcp_init_cwnd;
416 			return (sizeof (int));
417 		case TCP_KEEPALIVE_THRESHOLD:
418 			*i1 = tcp->tcp_ka_interval;
419 			return (sizeof (int));
420 
421 		/*
422 		 * TCP_KEEPIDLE expects value in seconds, but
423 		 * tcp_ka_interval is in milliseconds.
424 		 */
425 		case TCP_KEEPIDLE:
426 			*i1 = tcp->tcp_ka_interval / 1000;
427 			return (sizeof (int));
428 		case TCP_KEEPCNT:
429 			*i1 = tcp->tcp_ka_cnt;
430 			return (sizeof (int));
431 
432 		/*
433 		 * TCP_KEEPINTVL expects value in seconds, but
434 		 * tcp_ka_rinterval is in milliseconds.
435 		 */
436 		case TCP_KEEPINTVL:
437 			*i1 = tcp->tcp_ka_rinterval / 1000;
438 			return (sizeof (int));
439 		case TCP_KEEPALIVE_ABORT_THRESHOLD:
440 			*i1 = tcp->tcp_ka_abort_thres;
441 			return (sizeof (int));
442 		case TCP_CONGESTION: {
443 			size_t len = strlcpy((char *)ptr, CC_ALGO(tcp)->name,
444 			    CC_ALGO_NAME_MAX);
445 			if (len >= CC_ALGO_NAME_MAX)
446 				return (-1);
447 			return (len + 1);
448 		}
449 		case TCP_CORK:
450 			*i1 = tcp->tcp_cork;
451 			return (sizeof (int));
452 		case TCP_RTO_INITIAL:
453 			*i1 = tcp->tcp_rto_initial;
454 			return (sizeof (uint32_t));
455 		case TCP_RTO_MIN:
456 			*i1 = tcp->tcp_rto_min;
457 			return (sizeof (uint32_t));
458 		case TCP_RTO_MAX:
459 			*i1 = tcp->tcp_rto_max;
460 			return (sizeof (uint32_t));
461 		case TCP_LINGER2:
462 			*i1 = tcp->tcp_fin_wait_2_flush_interval / SECONDS;
463 			return (sizeof (int));
464 		}
465 		break;
466 	case IPPROTO_IP:
467 		if (connp->conn_family != AF_INET)
468 			return (-1);
469 		switch (name) {
470 		case IP_OPTIONS:
471 		case T_IP_OPTIONS:
472 			/* Caller ensures enough space */
473 			return (ip_opt_get_user(connp, ptr));
474 		default:
475 			break;
476 		}
477 		break;
478 
479 	case IPPROTO_IPV6:
480 		/*
481 		 * IPPROTO_IPV6 options are only supported for sockets
482 		 * that are using IPv6 on the wire.
483 		 */
484 		if (connp->conn_ipversion != IPV6_VERSION) {
485 			return (-1);
486 		}
487 		switch (name) {
488 		case IPV6_PATHMTU:
489 			if (tcp->tcp_state < TCPS_ESTABLISHED)
490 				return (-1);
491 			break;
492 		}
493 		break;
494 	}
495 	mutex_enter(&connp->conn_lock);
496 	retval = conn_opt_get(&coas, level, name, ptr);
497 	mutex_exit(&connp->conn_lock);
498 	return (retval);
499 }
500 
501 /*
502  * We declare as 'int' rather than 'void' to satisfy pfi_t arg requirements.
503  * Parameters are assumed to be verified by the caller.
504  */
505 /* ARGSUSED */
506 int
507 tcp_opt_set(conn_t *connp, uint_t optset_context, int level, int name,
508     uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp,
509     void *thisdg_attrs, cred_t *cr)
510 {
511 	tcp_t	*tcp = connp->conn_tcp;
512 	int	*i1 = (int *)invalp;
513 	boolean_t onoff = (*i1 == 0) ? 0 : 1;
514 	boolean_t checkonly;
515 	int	reterr;
516 	tcp_stack_t	*tcps = tcp->tcp_tcps;
517 	conn_opt_arg_t	coas;
518 	uint32_t	val = *((uint32_t *)invalp);
519 
520 	coas.coa_connp = connp;
521 	coas.coa_ixa = connp->conn_ixa;
522 	coas.coa_ipp = &connp->conn_xmit_ipp;
523 	coas.coa_ancillary = B_FALSE;
524 	coas.coa_changed = 0;
525 
526 	switch (optset_context) {
527 	case SETFN_OPTCOM_CHECKONLY:
528 		checkonly = B_TRUE;
529 		/*
530 		 * Note: Implies T_CHECK semantics for T_OPTCOM_REQ
531 		 * inlen != 0 implies value supplied and
532 		 *	we have to "pretend" to set it.
533 		 * inlen == 0 implies that there is no
534 		 *	value part in T_CHECK request and just validation
535 		 * done elsewhere should be enough, we just return here.
536 		 */
537 		if (inlen == 0) {
538 			*outlenp = 0;
539 			return (0);
540 		}
541 		break;
542 	case SETFN_OPTCOM_NEGOTIATE:
543 		checkonly = B_FALSE;
544 		break;
545 	case SETFN_UD_NEGOTIATE: /* error on conn-oriented transports ? */
546 	case SETFN_CONN_NEGOTIATE:
547 		checkonly = B_FALSE;
548 		/*
549 		 * Negotiating local and "association-related" options
550 		 * from other (T_CONN_REQ, T_CONN_RES,T_UNITDATA_REQ)
551 		 * primitives is allowed by XTI, but we choose
552 		 * to not implement this style negotiation for Internet
553 		 * protocols (We interpret it is a must for OSI world but
554 		 * optional for Internet protocols) for all options.
555 		 * [ Will do only for the few options that enable test
556 		 * suites that our XTI implementation of this feature
557 		 * works for transports that do allow it ]
558 		 */
559 		if (!tcp_allow_connopt_set(level, name)) {
560 			*outlenp = 0;
561 			return (EINVAL);
562 		}
563 		break;
564 	default:
565 		/*
566 		 * We should never get here
567 		 */
568 		*outlenp = 0;
569 		return (EINVAL);
570 	}
571 
572 	ASSERT((optset_context != SETFN_OPTCOM_CHECKONLY) ||
573 	    (optset_context == SETFN_OPTCOM_CHECKONLY && inlen != 0));
574 
575 	/*
576 	 * For TCP, we should have no ancillary data sent down
577 	 * (sendmsg isn't supported for SOCK_STREAM), so thisdg_attrs
578 	 * has to be zero.
579 	 */
580 	ASSERT(thisdg_attrs == NULL);
581 
582 	/*
583 	 * For fixed length options, no sanity check
584 	 * of passed in length is done. It is assumed *_optcom_req()
585 	 * routines do the right thing.
586 	 */
587 	switch (level) {
588 	case SOL_SOCKET:
589 		switch (name) {
590 		case SO_KEEPALIVE:
591 			if (checkonly) {
592 				/* check only case */
593 				break;
594 			}
595 
596 			if (!onoff) {
597 				if (connp->conn_keepalive) {
598 					if (tcp->tcp_ka_tid != 0) {
599 						(void) TCP_TIMER_CANCEL(tcp,
600 						    tcp->tcp_ka_tid);
601 						tcp->tcp_ka_tid = 0;
602 					}
603 					connp->conn_keepalive = 0;
604 				}
605 				break;
606 			}
607 			if (!connp->conn_keepalive) {
608 				/* Crank up the keepalive timer */
609 				tcp->tcp_ka_last_intrvl = 0;
610 				tcp->tcp_ka_tid = TCP_TIMER(tcp,
611 				    tcp_keepalive_timer, tcp->tcp_ka_interval);
612 				connp->conn_keepalive = 1;
613 			}
614 			break;
615 		case SO_SNDBUF: {
616 			if (*i1 > tcps->tcps_max_buf) {
617 				*outlenp = 0;
618 				return (ENOBUFS);
619 			}
620 			if (checkonly)
621 				break;
622 
623 			connp->conn_sndbuf = *i1;
624 			if (tcps->tcps_snd_lowat_fraction != 0) {
625 				connp->conn_sndlowat = connp->conn_sndbuf /
626 				    tcps->tcps_snd_lowat_fraction;
627 			}
628 			(void) tcp_maxpsz_set(tcp, B_TRUE);
629 			/*
630 			 * If we are flow-controlled, recheck the condition.
631 			 * There are apps that increase SO_SNDBUF size when
632 			 * flow-controlled (EWOULDBLOCK), and expect the flow
633 			 * control condition to be lifted right away.
634 			 */
635 			mutex_enter(&tcp->tcp_non_sq_lock);
636 			if (tcp->tcp_flow_stopped &&
637 			    TCP_UNSENT_BYTES(tcp) < connp->conn_sndbuf) {
638 				tcp_clrqfull(tcp);
639 			}
640 			mutex_exit(&tcp->tcp_non_sq_lock);
641 			*outlenp = inlen;
642 			return (0);
643 		}
644 		case SO_RCVBUF:
645 			if (*i1 > tcps->tcps_max_buf) {
646 				*outlenp = 0;
647 				return (ENOBUFS);
648 			}
649 			/* Silently ignore zero */
650 			if (!checkonly && *i1 != 0) {
651 				*i1 = MSS_ROUNDUP(*i1, tcp->tcp_mss);
652 				(void) tcp_rwnd_set(tcp, *i1);
653 			}
654 			/*
655 			 * XXX should we return the rwnd here
656 			 * and tcp_opt_get ?
657 			 */
658 			*outlenp = inlen;
659 			return (0);
660 		case SO_SND_COPYAVOID:
661 			if (!checkonly) {
662 				if (tcp->tcp_loopback ||
663 				    (onoff != 1) || !tcp_zcopy_check(tcp)) {
664 					*outlenp = 0;
665 					return (EOPNOTSUPP);
666 				}
667 				tcp->tcp_snd_zcopy_aware = 1;
668 			}
669 			*outlenp = inlen;
670 			return (0);
671 		}
672 		break;
673 	case IPPROTO_TCP:
674 		switch (name) {
675 		case TCP_NODELAY:
676 			if (!checkonly)
677 				tcp->tcp_naglim = *i1 ? 1 : tcp->tcp_mss;
678 			break;
679 		case TCP_NOTIFY_THRESHOLD:
680 			if (!checkonly)
681 				tcp->tcp_first_timer_threshold = *i1;
682 			break;
683 		case TCP_ABORT_THRESHOLD:
684 			if (!checkonly)
685 				tcp->tcp_second_timer_threshold = *i1;
686 			break;
687 		case TCP_CONN_NOTIFY_THRESHOLD:
688 			if (!checkonly)
689 				tcp->tcp_first_ctimer_threshold = *i1;
690 			break;
691 		case TCP_CONN_ABORT_THRESHOLD:
692 			if (!checkonly)
693 				tcp->tcp_second_ctimer_threshold = *i1;
694 			break;
695 		case TCP_RECVDSTADDR:
696 			if (tcp->tcp_state > TCPS_LISTEN) {
697 				*outlenp = 0;
698 				return (EOPNOTSUPP);
699 			}
700 			/* Setting done in conn_opt_set */
701 			break;
702 		case TCP_INIT_CWND:
703 			if (checkonly)
704 				break;
705 
706 			/*
707 			 * Only allow socket with network configuration
708 			 * privilege to set the initial cwnd to be larger
709 			 * than allowed by RFC 3390.
710 			 */
711 			if (val > MIN(4, MAX(2, 4380 / tcp->tcp_mss))) {
712 				if ((reterr = secpolicy_ip_config(cr, B_TRUE))
713 				    != 0) {
714 					*outlenp = 0;
715 					return (reterr);
716 				}
717 				if (val > tcp_max_init_cwnd) {
718 					*outlenp = 0;
719 					return (EINVAL);
720 				}
721 			}
722 
723 			tcp->tcp_init_cwnd = val;
724 
725 			/*
726 			 * If the socket is connected, AND no outbound data
727 			 * has been sent, reset the actual cwnd values.
728 			 */
729 			if (tcp->tcp_state == TCPS_ESTABLISHED &&
730 			    tcp->tcp_iss == tcp->tcp_snxt - 1) {
731 				tcp->tcp_cwnd =
732 				    MIN(tcp->tcp_rwnd, val * tcp->tcp_mss);
733 			}
734 			break;
735 
736 		/*
737 		 * TCP_KEEPIDLE is in seconds but TCP_KEEPALIVE_THRESHOLD
738 		 * is in milliseconds. TCP_KEEPIDLE is introduced for
739 		 * compatibility with other Unix flavors.
740 		 * We can fall through TCP_KEEPALIVE_THRESHOLD logic after
741 		 * converting the input to milliseconds.
742 		 */
743 		case TCP_KEEPIDLE:
744 			*i1 *= 1000;
745 			/* FALLTHRU */
746 
747 		case TCP_KEEPALIVE_THRESHOLD:
748 			if (checkonly)
749 				break;
750 
751 			if (*i1 < tcps->tcps_keepalive_interval_low ||
752 			    *i1 > tcps->tcps_keepalive_interval_high) {
753 				*outlenp = 0;
754 				return (EINVAL);
755 			}
756 			if (*i1 != tcp->tcp_ka_interval) {
757 				tcp->tcp_ka_interval = *i1;
758 				/*
759 				 * Check if we need to restart the
760 				 * keepalive timer.
761 				 */
762 				if (tcp->tcp_ka_tid != 0) {
763 					ASSERT(connp->conn_keepalive);
764 					(void) TCP_TIMER_CANCEL(tcp,
765 					    tcp->tcp_ka_tid);
766 					tcp->tcp_ka_last_intrvl = 0;
767 					tcp->tcp_ka_tid = TCP_TIMER(tcp,
768 					    tcp_keepalive_timer,
769 					    tcp->tcp_ka_interval);
770 				}
771 			}
772 			break;
773 
774 		/*
775 		 * tcp_ka_abort_thres = tcp_ka_rinterval * tcp_ka_cnt.
776 		 * So setting TCP_KEEPCNT or TCP_KEEPINTVL can affect all the
777 		 * three members - tcp_ka_abort_thres, tcp_ka_rinterval and
778 		 * tcp_ka_cnt.
779 		 */
780 		case TCP_KEEPCNT:
781 			if (checkonly)
782 				break;
783 
784 			if (*i1 == 0) {
785 				return (EINVAL);
786 			} else if (tcp->tcp_ka_rinterval == 0) {
787 				/*
788 				 * When TCP_KEEPCNT is specified without first
789 				 * specifying a TCP_KEEPINTVL, we infer an
790 				 * interval based on a tunable specific to our
791 				 * stack: the tcp_keepalive_abort_interval.
792 				 * (Or the TCP_KEEPALIVE_ABORT_THRESHOLD, in
793 				 * the unlikely event that that has been set.)
794 				 * Given the abort interval's default value of
795 				 * 480 seconds, low TCP_KEEPCNT values can
796 				 * result in intervals that exceed the default
797 				 * maximum RTO of 60 seconds.  Rather than
798 				 * fail in these cases, we (implicitly) clamp
799 				 * the interval at the maximum RTO; if the
800 				 * TCP_KEEPCNT is shortly followed by a
801 				 * TCP_KEEPINTVL (as we expect), the abort
802 				 * threshold will be recalculated correctly --
803 				 * and if a TCP_KEEPINTVL is not forthcoming,
804 				 * keep-alive will at least operate reasonably
805 				 * given the underconfigured state.
806 				 */
807 				uint32_t interval;
808 
809 				interval = tcp->tcp_ka_abort_thres / *i1;
810 
811 				if (interval < tcp->tcp_rto_min)
812 					interval = tcp->tcp_rto_min;
813 
814 				if (interval > tcp->tcp_rto_max)
815 					interval = tcp->tcp_rto_max;
816 
817 				tcp->tcp_ka_rinterval = interval;
818 			} else {
819 				if ((*i1 * tcp->tcp_ka_rinterval) <
820 				    tcps->tcps_keepalive_abort_interval_low ||
821 				    (*i1 * tcp->tcp_ka_rinterval) >
822 				    tcps->tcps_keepalive_abort_interval_high)
823 					return (EINVAL);
824 				tcp->tcp_ka_abort_thres =
825 				    (*i1 * tcp->tcp_ka_rinterval);
826 			}
827 			tcp->tcp_ka_cnt = *i1;
828 			break;
829 		case TCP_KEEPINTVL:
830 			/*
831 			 * TCP_KEEPINTVL is specified in seconds, but
832 			 * tcp_ka_rinterval is in milliseconds.
833 			 */
834 
835 			if (checkonly)
836 				break;
837 
838 			if ((*i1 * 1000) < tcp->tcp_rto_min ||
839 			    (*i1 * 1000) > tcp->tcp_rto_max)
840 				return (EINVAL);
841 
842 			if (tcp->tcp_ka_cnt == 0) {
843 				tcp->tcp_ka_cnt =
844 				    tcp->tcp_ka_abort_thres / (*i1 * 1000);
845 			} else {
846 				if ((*i1 * tcp->tcp_ka_cnt * 1000) <
847 				    tcps->tcps_keepalive_abort_interval_low ||
848 				    (*i1 * tcp->tcp_ka_cnt * 1000) >
849 				    tcps->tcps_keepalive_abort_interval_high)
850 					return (EINVAL);
851 				tcp->tcp_ka_abort_thres =
852 				    (*i1 * tcp->tcp_ka_cnt * 1000);
853 			}
854 			tcp->tcp_ka_rinterval = *i1 * 1000;
855 			break;
856 		case TCP_KEEPALIVE_ABORT_THRESHOLD:
857 			if (!checkonly) {
858 				if (*i1 <
859 				    tcps->tcps_keepalive_abort_interval_low ||
860 				    *i1 >
861 				    tcps->tcps_keepalive_abort_interval_high) {
862 					*outlenp = 0;
863 					return (EINVAL);
864 				}
865 				tcp->tcp_ka_abort_thres = *i1;
866 				tcp->tcp_ka_cnt = 0;
867 				tcp->tcp_ka_rinterval = 0;
868 			}
869 			break;
870 		case TCP_CONGESTION: {
871 			struct cc_algo *algo;
872 
873 			if (checkonly) {
874 				break;
875 			}
876 
877 			/*
878 			 * Make sure the string is NUL-terminated. Some
879 			 * consumers pass only the number of characters
880 			 * in the string, and don't include the NUL
881 			 * terminator, so we set it for them.
882 			 */
883 			if (inlen < CC_ALGO_NAME_MAX) {
884 				invalp[inlen] = '\0';
885 			}
886 			invalp[CC_ALGO_NAME_MAX - 1] = '\0';
887 
888 			if ((algo = cc_load_algo((char *)invalp)) == NULL) {
889 				return (ENOENT);
890 			}
891 
892 			if (CC_ALGO(tcp)->cb_destroy != NULL) {
893 				CC_ALGO(tcp)->cb_destroy(&tcp->tcp_ccv);
894 			}
895 
896 			CC_DATA(tcp) = NULL;
897 			CC_ALGO(tcp) = algo;
898 
899 			if (CC_ALGO(tcp)->cb_init != NULL) {
900 				VERIFY0(CC_ALGO(tcp)->cb_init(&tcp->tcp_ccv));
901 			}
902 
903 			break;
904 		}
905 		case TCP_CORK:
906 			if (!checkonly) {
907 				/*
908 				 * if tcp->tcp_cork was set and is now
909 				 * being unset, we have to make sure that
910 				 * the remaining data gets sent out. Also
911 				 * unset tcp->tcp_cork so that tcp_wput_data()
912 				 * can send data even if it is less than mss
913 				 */
914 				if (tcp->tcp_cork && onoff == 0 &&
915 				    tcp->tcp_unsent > 0) {
916 					tcp->tcp_cork = B_FALSE;
917 					tcp_wput_data(tcp, NULL, B_FALSE);
918 				}
919 				tcp->tcp_cork = onoff;
920 			}
921 			break;
922 		case TCP_RTO_INITIAL:
923 			if (checkonly || val == 0)
924 				break;
925 
926 			/*
927 			 * Sanity checks
928 			 *
929 			 * The initial RTO should be bounded by the minimum
930 			 * and maximum RTO.  And it should also be smaller
931 			 * than the connect attempt abort timeout.  Otherwise,
932 			 * the connection won't be aborted in a period
933 			 * reasonably close to that timeout.
934 			 */
935 			if (val < tcp->tcp_rto_min || val > tcp->tcp_rto_max ||
936 			    val > tcp->tcp_second_ctimer_threshold ||
937 			    val < tcps->tcps_rexmit_interval_initial_low ||
938 			    val > tcps->tcps_rexmit_interval_initial_high) {
939 				*outlenp = 0;
940 				return (EINVAL);
941 			}
942 			tcp->tcp_rto_initial = val;
943 
944 			/*
945 			 * If TCP has not sent anything, need to re-calculate
946 			 * tcp_rto.  Otherwise, this option change does not
947 			 * really affect anything.
948 			 */
949 			if (tcp->tcp_state >= TCPS_SYN_SENT)
950 				break;
951 
952 			tcp->tcp_rtt_sa = MSEC2NSEC(tcp->tcp_rto_initial) << 2;
953 			tcp->tcp_rtt_sd = MSEC2NSEC(tcp->tcp_rto_initial) >> 1;
954 			tcp->tcp_rto = tcp_calculate_rto(tcp, tcps,
955 			    tcps->tcps_conn_grace_period);
956 			break;
957 		case TCP_RTO_MIN:
958 			if (checkonly || val == 0)
959 				break;
960 
961 			if (val < tcps->tcps_rexmit_interval_min_low ||
962 			    val > tcps->tcps_rexmit_interval_min_high ||
963 			    val > tcp->tcp_rto_max) {
964 				*outlenp = 0;
965 				return (EINVAL);
966 			}
967 			tcp->tcp_rto_min = val;
968 			if (tcp->tcp_rto < val)
969 				tcp->tcp_rto = val;
970 			break;
971 		case TCP_RTO_MAX:
972 			if (checkonly || val == 0)
973 				break;
974 
975 			/*
976 			 * Sanity checks
977 			 *
978 			 * The maximum RTO should not be larger than the
979 			 * connection abort timeout.  Otherwise, the
980 			 * connection won't be aborted in a period reasonably
981 			 * close to that timeout.
982 			 */
983 			if (val < tcps->tcps_rexmit_interval_max_low ||
984 			    val > tcps->tcps_rexmit_interval_max_high ||
985 			    val < tcp->tcp_rto_min ||
986 			    val > tcp->tcp_second_timer_threshold) {
987 				*outlenp = 0;
988 				return (EINVAL);
989 			}
990 			tcp->tcp_rto_max = val;
991 			if (tcp->tcp_rto > val)
992 				tcp->tcp_rto = val;
993 			break;
994 		case TCP_LINGER2:
995 			if (checkonly || *i1 == 0)
996 				break;
997 
998 			/*
999 			 * Note that the option value's unit is second.  And
1000 			 * the value should be bigger than the private
1001 			 * parameter tcp_fin_wait_2_flush_interval's lower
1002 			 * bound and smaller than the current value of that
1003 			 * parameter.  It should be smaller than the current
1004 			 * value to avoid an app setting TCP_LINGER2 to a big
1005 			 * value, causing resource to be held up too long in
1006 			 * FIN-WAIT-2 state.
1007 			 */
1008 			if (*i1 < 0 ||
1009 			    tcps->tcps_fin_wait_2_flush_interval_low/SECONDS >
1010 			    *i1 ||
1011 			    tcps->tcps_fin_wait_2_flush_interval/SECONDS <
1012 			    *i1) {
1013 				*outlenp = 0;
1014 				return (EINVAL);
1015 			}
1016 			tcp->tcp_fin_wait_2_flush_interval = *i1 * SECONDS;
1017 			break;
1018 		default:
1019 			break;
1020 		}
1021 		break;
1022 	case IPPROTO_IP:
1023 		if (connp->conn_family != AF_INET) {
1024 			*outlenp = 0;
1025 			return (EINVAL);
1026 		}
1027 		switch (name) {
1028 		case IP_SEC_OPT:
1029 			/*
1030 			 * We should not allow policy setting after
1031 			 * we start listening for connections.
1032 			 */
1033 			if (tcp->tcp_state == TCPS_LISTEN) {
1034 				return (EINVAL);
1035 			}
1036 			break;
1037 		case IP_RECVTOS:
1038 			if (!checkonly) {
1039 				/*
1040 				 * Force it to be sent up with the next msg
1041 				 * by setting it to a value which cannot
1042 				 * appear in a packet (TOS is only 8-bits)
1043 				 */
1044 				tcp->tcp_recvtos = 0xffffffffU;
1045 			}
1046 			break;
1047 		}
1048 		break;
1049 	case IPPROTO_IPV6:
1050 		/*
1051 		 * IPPROTO_IPV6 options are only supported for sockets
1052 		 * that are using IPv6 on the wire.
1053 		 */
1054 		if (connp->conn_ipversion != IPV6_VERSION) {
1055 			*outlenp = 0;
1056 			return (EINVAL);
1057 		}
1058 
1059 		switch (name) {
1060 		case IPV6_RECVPKTINFO:
1061 			if (!checkonly) {
1062 				/* Force it to be sent up with the next msg */
1063 				tcp->tcp_recvifindex = 0;
1064 			}
1065 			break;
1066 		case IPV6_RECVTCLASS:
1067 			if (!checkonly) {
1068 				/* Force it to be sent up with the next msg */
1069 				tcp->tcp_recvtclass = 0xffffffffU;
1070 			}
1071 			break;
1072 		case IPV6_RECVHOPLIMIT:
1073 			if (!checkonly) {
1074 				/* Force it to be sent up with the next msg */
1075 				tcp->tcp_recvhops = 0xffffffffU;
1076 			}
1077 			break;
1078 		case IPV6_PKTINFO:
1079 			/* This is an extra check for TCP */
1080 			if (inlen == sizeof (struct in6_pktinfo)) {
1081 				struct in6_pktinfo *pkti;
1082 
1083 				pkti = (struct in6_pktinfo *)invalp;
1084 				/*
1085 				 * RFC 3542 states that ipi6_addr must be
1086 				 * the unspecified address when setting the
1087 				 * IPV6_PKTINFO sticky socket option on a
1088 				 * TCP socket.
1089 				 */
1090 				if (!IN6_IS_ADDR_UNSPECIFIED(&pkti->ipi6_addr))
1091 					return (EINVAL);
1092 			}
1093 			break;
1094 		case IPV6_SEC_OPT:
1095 			/*
1096 			 * We should not allow policy setting after
1097 			 * we start listening for connections.
1098 			 */
1099 			if (tcp->tcp_state == TCPS_LISTEN) {
1100 				return (EINVAL);
1101 			}
1102 			break;
1103 		}
1104 		break;
1105 	}
1106 	reterr = conn_opt_set(&coas, level, name, inlen, invalp,
1107 	    checkonly, cr);
1108 	if (reterr != 0) {
1109 		*outlenp = 0;
1110 		return (reterr);
1111 	}
1112 
1113 	/*
1114 	 * Common case of OK return with outval same as inval
1115 	 */
1116 	if (invalp != outvalp) {
1117 		/* don't trust bcopy for identical src/dst */
1118 		(void) bcopy(invalp, outvalp, inlen);
1119 	}
1120 	*outlenp = inlen;
1121 
1122 	if (coas.coa_changed & COA_HEADER_CHANGED) {
1123 		/* If we are connected we rebuilt the headers */
1124 		if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6) &&
1125 		    !IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_faddr_v6)) {
1126 			reterr = tcp_build_hdrs(tcp);
1127 			if (reterr != 0)
1128 				return (reterr);
1129 		}
1130 	}
1131 	if (coas.coa_changed & COA_ROUTE_CHANGED) {
1132 		in6_addr_t nexthop;
1133 
1134 		/*
1135 		 * If we are connected we re-cache the information.
1136 		 * We ignore errors to preserve BSD behavior.
1137 		 * Note that we don't redo IPsec policy lookup here
1138 		 * since the final destination (or source) didn't change.
1139 		 */
1140 		ip_attr_nexthop(&connp->conn_xmit_ipp, connp->conn_ixa,
1141 		    &connp->conn_faddr_v6, &nexthop);
1142 
1143 		if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6) &&
1144 		    !IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_faddr_v6)) {
1145 			(void) ip_attr_connect(connp, connp->conn_ixa,
1146 			    &connp->conn_laddr_v6, &connp->conn_faddr_v6,
1147 			    &nexthop, connp->conn_fport, NULL, NULL,
1148 			    IPDF_VERIFY_DST);
1149 		}
1150 	}
1151 	if ((coas.coa_changed & COA_SNDBUF_CHANGED) && !IPCL_IS_NONSTR(connp)) {
1152 		connp->conn_wq->q_hiwat = connp->conn_sndbuf;
1153 	}
1154 	if (coas.coa_changed & COA_WROFF_CHANGED) {
1155 		connp->conn_wroff = connp->conn_ht_iphc_allocated +
1156 		    tcps->tcps_wroff_xtra;
1157 		(void) proto_set_tx_wroff(connp->conn_rq, connp,
1158 		    connp->conn_wroff);
1159 	}
1160 	if (coas.coa_changed & COA_OOBINLINE_CHANGED) {
1161 		if (IPCL_IS_NONSTR(connp))
1162 			proto_set_rx_oob_opt(connp, onoff);
1163 	}
1164 	return (0);
1165 }
1166