17c478bd9Sstevel@tonic-gate /*
27c478bd9Sstevel@tonic-gate  * CDDL HEADER START
37c478bd9Sstevel@tonic-gate  *
47c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
545916cd2Sjpk  * Common Development and Distribution License (the "License").
645916cd2Sjpk  * You may not use this file except in compliance with the License.
77c478bd9Sstevel@tonic-gate  *
87c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
97c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
107c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
117c478bd9Sstevel@tonic-gate  * and limitations under the License.
127c478bd9Sstevel@tonic-gate  *
137c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
147c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
157c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
167c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
177c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
187c478bd9Sstevel@tonic-gate  *
197c478bd9Sstevel@tonic-gate  * CDDL HEADER END
207c478bd9Sstevel@tonic-gate  */
217c478bd9Sstevel@tonic-gate /*
2266cd0f60SKacheong Poon  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
233d0a255cSGarrett D'Amore  * Copyright (c) 2011 Nexenta Systems, Inc. All rights reserved.
24d4994511SCody Peter Mello  * Copyright 2019 Joyent, Inc.
25c12492cfSSebastien Roy  * Copyright (c) 2016 by Delphix. All rights reserved.
26221e47fbSAndy Fiddaman  * Copyright 2020 OmniOS Community Edition (OmniOSce) Association.
27*e8249070SRobert Mustacchi  * Copyright 2024 Oxide Computer Company
287c478bd9Sstevel@tonic-gate  */
297c478bd9Sstevel@tonic-gate 
307c478bd9Sstevel@tonic-gate #include <sys/types.h>
317c478bd9Sstevel@tonic-gate #include <sys/stream.h>
327c478bd9Sstevel@tonic-gate #define	_SUN_TPI_VERSION 2
337c478bd9Sstevel@tonic-gate #include <sys/tihdr.h>
347c478bd9Sstevel@tonic-gate #include <sys/socket.h>
357c478bd9Sstevel@tonic-gate #include <sys/xti_xtiopt.h>
367c478bd9Sstevel@tonic-gate #include <sys/xti_inet.h>
37721fffe3SKacheong Poon #include <sys/policy.h>
387c478bd9Sstevel@tonic-gate 
39d4994511SCody Peter Mello #include <inet/cc.h>
407c478bd9Sstevel@tonic-gate #include <inet/common.h>
417c478bd9Sstevel@tonic-gate #include <netinet/ip6.h>
427c478bd9Sstevel@tonic-gate #include <inet/ip.h>
437c478bd9Sstevel@tonic-gate 
447c478bd9Sstevel@tonic-gate #include <netinet/in.h>
457c478bd9Sstevel@tonic-gate #include <netinet/tcp.h>
467c478bd9Sstevel@tonic-gate #include <inet/optcom.h>
47721fffe3SKacheong Poon #include <inet/proto_set.h>
48bd670b35SErik Nordmark #include <inet/tcp_impl.h>
497c478bd9Sstevel@tonic-gate 
50ca3c8f41SDavid Höppner static int	tcp_opt_default(queue_t *, int, int, uchar_t *);
51ca3c8f41SDavid Höppner 
527c478bd9Sstevel@tonic-gate /*
537c478bd9Sstevel@tonic-gate  * Table of all known options handled on a TCP protocol stack.
547c478bd9Sstevel@tonic-gate  *
557c478bd9Sstevel@tonic-gate  * Note: This table contains options processed by both TCP and IP levels
567c478bd9Sstevel@tonic-gate  *       and is the superset of options that can be performed on a TCP over IP
577c478bd9Sstevel@tonic-gate  *       stack.
587c478bd9Sstevel@tonic-gate  */
597c478bd9Sstevel@tonic-gate opdes_t	tcp_opt_arr[] = {
607c478bd9Sstevel@tonic-gate 
61bd670b35SErik Nordmark { SO_LINGER,	SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0,
627c478bd9Sstevel@tonic-gate 	sizeof (struct linger), 0 },
637c478bd9Sstevel@tonic-gate 
64bd670b35SErik Nordmark { SO_DEBUG,	SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
65bd670b35SErik Nordmark { SO_KEEPALIVE,	SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
66bd670b35SErik Nordmark { SO_DONTROUTE,	SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
67bd670b35SErik Nordmark { SO_USELOOPBACK, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0
687c478bd9Sstevel@tonic-gate 	},
69bd670b35SErik Nordmark { SO_BROADCAST,	SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
70bd670b35SErik Nordmark { SO_REUSEADDR, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
71bd670b35SErik Nordmark { SO_OOBINLINE, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
72bd670b35SErik Nordmark { SO_TYPE,	SOL_SOCKET, OA_R, OA_R, OP_NP, 0, sizeof (int), 0 },
73bd670b35SErik Nordmark { SO_SNDBUF,	SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
74bd670b35SErik Nordmark { SO_RCVBUF,	SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
75bd670b35SErik Nordmark { SO_SNDTIMEO,	SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0,
763986c91eSanders 	sizeof (struct timeval), 0 },
77bd670b35SErik Nordmark { SO_RCVTIMEO,	SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0,
783986c91eSanders 	sizeof (struct timeval), 0 },
79bd670b35SErik Nordmark { SO_DGRAM_ERRIND, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0
807c478bd9Sstevel@tonic-gate 	},
817c478bd9Sstevel@tonic-gate { SO_SND_COPYAVOID, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
82bd670b35SErik Nordmark { SO_ANON_MLP, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int),
835d0bc3edSsommerfe 	0 },
84bd670b35SErik Nordmark { SO_MAC_EXEMPT, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int),
855d0bc3edSsommerfe 	0 },
86bd670b35SErik Nordmark { SO_MAC_IMPLICIT, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int),
875d3b8cb7SBill Sommerfeld 	0 },
88bd670b35SErik Nordmark { SO_ALLZONES, SOL_SOCKET, OA_R, OA_RW, OP_CONFIG, 0, sizeof (int),
895d0bc3edSsommerfe 	0 },
90bd670b35SErik Nordmark { SO_EXCLBIND, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
91ae347574Skcpoon 
92bd670b35SErik Nordmark { SO_DOMAIN,	SOL_SOCKET, OA_R, OA_R, OP_NP, 0, sizeof (int), 0 },
9388cda078Skcpoon 
94bd670b35SErik Nordmark { SO_PROTOTYPE,	SOL_SOCKET, OA_R, OA_R, OP_NP, 0, sizeof (int), 0 },
9588cda078Skcpoon 
96bd670b35SErik Nordmark { TCP_NODELAY,	IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0
977c478bd9Sstevel@tonic-gate 	},
98bd670b35SErik Nordmark { TCP_MAXSEG,	IPPROTO_TCP, OA_R, OA_R, OP_NP, 0, sizeof (uint_t),
997c478bd9Sstevel@tonic-gate 	536 },
1007c478bd9Sstevel@tonic-gate 
1017c478bd9Sstevel@tonic-gate { TCP_NOTIFY_THRESHOLD, IPPROTO_TCP, OA_RW, OA_RW, OP_NP,
102bd670b35SErik Nordmark 	OP_DEF_FN, sizeof (int), -1 /* not initialized */ },
1037c478bd9Sstevel@tonic-gate 
1047c478bd9Sstevel@tonic-gate { TCP_ABORT_THRESHOLD, IPPROTO_TCP, OA_RW, OA_RW, OP_NP,
105bd670b35SErik Nordmark 	OP_DEF_FN, sizeof (int), -1 /* not initialized */ },
1067c478bd9Sstevel@tonic-gate 
1077c478bd9Sstevel@tonic-gate { TCP_CONN_NOTIFY_THRESHOLD, IPPROTO_TCP, OA_RW, OA_RW, OP_NP,
108bd670b35SErik Nordmark 	OP_DEF_FN, sizeof (int), -1 /* not initialized */ },
1097c478bd9Sstevel@tonic-gate 
1107c478bd9Sstevel@tonic-gate { TCP_CONN_ABORT_THRESHOLD, IPPROTO_TCP, OA_RW, OA_RW, OP_NP,
111bd670b35SErik Nordmark 	OP_DEF_FN, sizeof (int), -1 /* not initialized */ },
1127c478bd9Sstevel@tonic-gate 
113bd670b35SErik Nordmark { TCP_RECVDSTADDR, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int),
1147c478bd9Sstevel@tonic-gate 	0 },
1157c478bd9Sstevel@tonic-gate 
116bd670b35SErik Nordmark { TCP_ANONPRIVBIND, IPPROTO_TCP, OA_R, OA_RW, OP_PRIVPORT, 0,
1177c478bd9Sstevel@tonic-gate 	sizeof (int), 0 },
1187c478bd9Sstevel@tonic-gate 
119bd670b35SErik Nordmark { TCP_EXCLBIND, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0
1207c478bd9Sstevel@tonic-gate 	},
1217c478bd9Sstevel@tonic-gate 
122bd670b35SErik Nordmark { TCP_INIT_CWND, IPPROTO_TCP, OA_RW, OA_RW, OP_CONFIG, 0,
1237c478bd9Sstevel@tonic-gate 	sizeof (int), 0 },
1247c478bd9Sstevel@tonic-gate 
125bd670b35SErik Nordmark { TCP_KEEPALIVE_THRESHOLD, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0,
1267c478bd9Sstevel@tonic-gate 	sizeof (int), 0	},
1277c478bd9Sstevel@tonic-gate 
1283d0a255cSGarrett D'Amore { TCP_KEEPIDLE, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
1293d0a255cSGarrett D'Amore 
1303d0a255cSGarrett D'Amore { TCP_KEEPCNT, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
1313d0a255cSGarrett D'Amore 
1323d0a255cSGarrett D'Amore { TCP_KEEPINTVL, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
1333d0a255cSGarrett D'Amore 
134bd670b35SErik Nordmark { TCP_KEEPALIVE_ABORT_THRESHOLD, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0,
1357c478bd9Sstevel@tonic-gate 	sizeof (int), 0	},
1367c478bd9Sstevel@tonic-gate 
137bd670b35SErik Nordmark { TCP_CORK, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
1387c478bd9Sstevel@tonic-gate 
139861fa149SNils Nieuwejaar { TCP_QUICKACK, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
140861fa149SNils Nieuwejaar 
141707e74bcSKacheong Poon { TCP_RTO_INITIAL, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (uint32_t), 0 },
142707e74bcSKacheong Poon 
143707e74bcSKacheong Poon { TCP_RTO_MIN, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (uint32_t), 0 },
144707e74bcSKacheong Poon 
145707e74bcSKacheong Poon { TCP_RTO_MAX, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (uint32_t), 0 },
146707e74bcSKacheong Poon 
147707e74bcSKacheong Poon { TCP_LINGER2, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
148707e74bcSKacheong Poon 
149d4994511SCody Peter Mello { TCP_CONGESTION, IPPROTO_TCP, OA_RW, OA_RW, OP_NP,
150d4994511SCody Peter Mello 	OP_VARLEN, CC_ALGO_NAME_MAX, 0 },
151d4994511SCody Peter Mello 
1527c478bd9Sstevel@tonic-gate { IP_OPTIONS,	IPPROTO_IP, OA_RW, OA_RW, OP_NP,
153bd670b35SErik Nordmark 	(OP_VARLEN|OP_NODEFAULT),
1540f1702c5SYu Xiangning 	IP_MAX_OPT_LENGTH + IP_ADDR_LEN, -1 /* not initialized */ },
1557c478bd9Sstevel@tonic-gate { T_IP_OPTIONS,	IPPROTO_IP, OA_RW, OA_RW, OP_NP,
156bd670b35SErik Nordmark 	(OP_VARLEN|OP_NODEFAULT),
1570f1702c5SYu Xiangning 	IP_MAX_OPT_LENGTH + IP_ADDR_LEN, -1 /* not initialized */ },
1587c478bd9Sstevel@tonic-gate 
159bd670b35SErik Nordmark { IP_TOS,	IPPROTO_IP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
160bd670b35SErik Nordmark { T_IP_TOS,	IPPROTO_IP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
161bd670b35SErik Nordmark { IP_TTL,	IPPROTO_IP, OA_RW, OA_RW, OP_NP, OP_DEF_FN,
1627c478bd9Sstevel@tonic-gate 	sizeof (int), -1 /* not initialized */ },
163221e47fbSAndy Fiddaman { IP_RECVTOS,	IPPROTO_IP,  OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
1647c478bd9Sstevel@tonic-gate 
165bd670b35SErik Nordmark { IP_SEC_OPT, IPPROTO_IP, OA_RW, OA_RW, OP_NP, OP_NODEFAULT,
1667c478bd9Sstevel@tonic-gate 	sizeof (ipsec_req_t), -1 /* not initialized */ },
1677c478bd9Sstevel@tonic-gate 
168bd670b35SErik Nordmark { IP_BOUND_IF, IPPROTO_IP, OA_RW, OA_RW, OP_NP, 0,
1697c478bd9Sstevel@tonic-gate 	sizeof (int),	0 /* no ifindex */ },
1707c478bd9Sstevel@tonic-gate 
171bd670b35SErik Nordmark { IP_UNSPEC_SRC, IPPROTO_IP, OA_R, OA_RW, OP_RAW, 0,
1727c478bd9Sstevel@tonic-gate 	sizeof (int), 0 },
1737c478bd9Sstevel@tonic-gate 
174*e8249070SRobert Mustacchi { IP_MINTTL,	IPPROTO_IP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
175*e8249070SRobert Mustacchi 
176bd670b35SErik Nordmark { IPV6_UNICAST_HOPS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, OP_DEF_FN,
1777c478bd9Sstevel@tonic-gate 	sizeof (int), -1 /* not initialized */ },
1787c478bd9Sstevel@tonic-gate 
179bd670b35SErik Nordmark { IPV6_BOUND_IF, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
1807c478bd9Sstevel@tonic-gate 	sizeof (int),	0 /* no ifindex */ },
1817c478bd9Sstevel@tonic-gate 
182bd670b35SErik Nordmark { IP_DONTFRAG, IPPROTO_IP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
183bd670b35SErik Nordmark 
184bd670b35SErik Nordmark { IP_NEXTHOP, IPPROTO_IP, OA_R, OA_RW, OP_CONFIG, 0,
18543d18f1cSpriyanka 	sizeof (in_addr_t),	-1 /* not initialized  */ },
18643d18f1cSpriyanka 
187bd670b35SErik Nordmark { IPV6_UNSPEC_SRC, IPPROTO_IPV6, OA_R, OA_RW, OP_RAW, 0,
1887c478bd9Sstevel@tonic-gate 	sizeof (int), 0 },
1897c478bd9Sstevel@tonic-gate 
1907c478bd9Sstevel@tonic-gate { IPV6_PKTINFO, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP,
191bd670b35SErik Nordmark 	(OP_NODEFAULT|OP_VARLEN),
1927c478bd9Sstevel@tonic-gate 	sizeof (struct in6_pktinfo), -1 /* not initialized */ },
1937c478bd9Sstevel@tonic-gate { IPV6_NEXTHOP, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP,
194bd670b35SErik Nordmark 	OP_NODEFAULT,
1957c478bd9Sstevel@tonic-gate 	sizeof (sin6_t), -1 /* not initialized */ },
1967c478bd9Sstevel@tonic-gate { IPV6_HOPOPTS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP,
197bd670b35SErik Nordmark 	(OP_VARLEN|OP_NODEFAULT), 255*8,
1987c478bd9Sstevel@tonic-gate 	-1 /* not initialized */ },
1997c478bd9Sstevel@tonic-gate { IPV6_DSTOPTS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP,
200bd670b35SErik Nordmark 	(OP_VARLEN|OP_NODEFAULT), 255*8,
2017c478bd9Sstevel@tonic-gate 	-1 /* not initialized */ },
2027c478bd9Sstevel@tonic-gate { IPV6_RTHDRDSTOPTS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP,
203bd670b35SErik Nordmark 	(OP_VARLEN|OP_NODEFAULT), 255*8,
2047c478bd9Sstevel@tonic-gate 	-1 /* not initialized */ },
2057c478bd9Sstevel@tonic-gate { IPV6_RTHDR, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP,
206bd670b35SErik Nordmark 	(OP_VARLEN|OP_NODEFAULT), 255*8,
2077c478bd9Sstevel@tonic-gate 	-1 /* not initialized */ },
2087c478bd9Sstevel@tonic-gate { IPV6_TCLASS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP,
209bd670b35SErik Nordmark 	OP_NODEFAULT,
2107c478bd9Sstevel@tonic-gate 	sizeof (int), -1 /* not initialized */ },
2117c478bd9Sstevel@tonic-gate { IPV6_PATHMTU, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP,
212bd670b35SErik Nordmark 	OP_NODEFAULT,
2137c478bd9Sstevel@tonic-gate 	sizeof (struct ip6_mtuinfo), -1 /* not initialized */ },
214bd670b35SErik Nordmark { IPV6_DONTFRAG, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
215bd670b35SErik Nordmark 	sizeof (int), 0 },
216bd670b35SErik Nordmark { IPV6_USE_MIN_MTU, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
2177c478bd9Sstevel@tonic-gate 	sizeof (int), 0 },
218bd670b35SErik Nordmark { IPV6_V6ONLY, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
2197c478bd9Sstevel@tonic-gate 	sizeof (int), 0 },
2207c478bd9Sstevel@tonic-gate 
2217c478bd9Sstevel@tonic-gate /* Enable receipt of ancillary data */
222bd670b35SErik Nordmark { IPV6_RECVPKTINFO, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
2237c478bd9Sstevel@tonic-gate 	sizeof (int), 0 },
224bd670b35SErik Nordmark { IPV6_RECVHOPLIMIT, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
2257c478bd9Sstevel@tonic-gate 	sizeof (int), 0 },
226bd670b35SErik Nordmark { IPV6_RECVHOPOPTS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
2277c478bd9Sstevel@tonic-gate 	sizeof (int), 0 },
228bd670b35SErik Nordmark { _OLD_IPV6_RECVDSTOPTS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
2297c478bd9Sstevel@tonic-gate 	sizeof (int), 0 },
230bd670b35SErik Nordmark { IPV6_RECVDSTOPTS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
2317c478bd9Sstevel@tonic-gate 	sizeof (int), 0 },
232bd670b35SErik Nordmark { IPV6_RECVRTHDR, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
2337c478bd9Sstevel@tonic-gate 	sizeof (int), 0 },
234bd670b35SErik Nordmark { IPV6_RECVRTHDRDSTOPTS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
2357c478bd9Sstevel@tonic-gate 	sizeof (int), 0 },
236bd670b35SErik Nordmark { IPV6_RECVTCLASS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
2377c478bd9Sstevel@tonic-gate 	sizeof (int), 0 },
2387c478bd9Sstevel@tonic-gate 
239bd670b35SErik Nordmark { IPV6_SEC_OPT, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, OP_NODEFAULT,
2407c478bd9Sstevel@tonic-gate 	sizeof (ipsec_req_t), -1 /* not initialized */ },
241bd670b35SErik Nordmark { IPV6_SRC_PREFERENCES, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
2427c478bd9Sstevel@tonic-gate 	sizeof (uint32_t), IPV6_PREFER_SRC_DEFAULT },
243*e8249070SRobert Mustacchi 
244*e8249070SRobert Mustacchi { IPV6_MINHOPCOUNT, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
245*e8249070SRobert Mustacchi 	sizeof (int), 0 },
2467c478bd9Sstevel@tonic-gate };
2477c478bd9Sstevel@tonic-gate 
2487c478bd9Sstevel@tonic-gate /*
2497c478bd9Sstevel@tonic-gate  * Table of all supported levels
2507c478bd9Sstevel@tonic-gate  * Note: Some levels (e.g. XTI_GENERIC) may be valid but may not have
2517c478bd9Sstevel@tonic-gate  * any supported options so we need this info separately.
2527c478bd9Sstevel@tonic-gate  *
2537c478bd9Sstevel@tonic-gate  * This is needed only for topmost tpi providers and is used only by
2547c478bd9Sstevel@tonic-gate  * XTI interfaces.
2557c478bd9Sstevel@tonic-gate  */
2567c478bd9Sstevel@tonic-gate optlevel_t	tcp_valid_levels_arr[] = {
2577c478bd9Sstevel@tonic-gate 	XTI_GENERIC,
2587c478bd9Sstevel@tonic-gate 	SOL_SOCKET,
2597c478bd9Sstevel@tonic-gate 	IPPROTO_TCP,
2607c478bd9Sstevel@tonic-gate 	IPPROTO_IP,
2617c478bd9Sstevel@tonic-gate 	IPPROTO_IPV6
2627c478bd9Sstevel@tonic-gate };
2637c478bd9Sstevel@tonic-gate 
2647c478bd9Sstevel@tonic-gate 
2657c478bd9Sstevel@tonic-gate #define	TCP_OPT_ARR_CNT		A_CNT(tcp_opt_arr)
2667c478bd9Sstevel@tonic-gate #define	TCP_VALID_LEVELS_CNT	A_CNT(tcp_valid_levels_arr)
2677c478bd9Sstevel@tonic-gate 
2687c478bd9Sstevel@tonic-gate uint_t tcp_max_optsize; /* initialized when TCP driver is loaded */
2697c478bd9Sstevel@tonic-gate 
2707c478bd9Sstevel@tonic-gate /*
2717c478bd9Sstevel@tonic-gate  * Initialize option database object for TCP
2727c478bd9Sstevel@tonic-gate  *
2737c478bd9Sstevel@tonic-gate  * This object represents database of options to search passed to
2747c478bd9Sstevel@tonic-gate  * {sock,tpi}optcom_req() interface routine to take care of option
2757c478bd9Sstevel@tonic-gate  * management and associated methods.
2767c478bd9Sstevel@tonic-gate  */
2777c478bd9Sstevel@tonic-gate 
2787c478bd9Sstevel@tonic-gate optdb_obj_t tcp_opt_obj = {
2797c478bd9Sstevel@tonic-gate 	tcp_opt_default,	/* TCP default value function pointer */
2800f1702c5SYu Xiangning 	tcp_tpi_opt_get,	/* TCP get function pointer */
2810f1702c5SYu Xiangning 	tcp_tpi_opt_set,	/* TCP set function pointer */
2827c478bd9Sstevel@tonic-gate 	TCP_OPT_ARR_CNT,	/* TCP option database count of entries */
2837c478bd9Sstevel@tonic-gate 	tcp_opt_arr,		/* TCP option database */
2847c478bd9Sstevel@tonic-gate 	TCP_VALID_LEVELS_CNT,	/* TCP valid level count of entries */
2857c478bd9Sstevel@tonic-gate 	tcp_valid_levels_arr	/* TCP valid level array */
2867c478bd9Sstevel@tonic-gate };
287721fffe3SKacheong Poon 
288721fffe3SKacheong Poon static int tcp_max_init_cwnd = TCP_MAX_INIT_CWND;
289721fffe3SKacheong Poon 
290721fffe3SKacheong Poon /*
291721fffe3SKacheong Poon  * Some TCP options can be "set" by requesting them in the option
292721fffe3SKacheong Poon  * buffer. This is needed for XTI feature test though we do not
293721fffe3SKacheong Poon  * allow it in general. We interpret that this mechanism is more
294721fffe3SKacheong Poon  * applicable to OSI protocols and need not be allowed in general.
295721fffe3SKacheong Poon  * This routine filters out options for which it is not allowed (most)
296721fffe3SKacheong Poon  * and lets through those (few) for which it is. [ The XTI interface
297721fffe3SKacheong Poon  * test suite specifics will imply that any XTI_GENERIC level XTI_* if
298721fffe3SKacheong Poon  * ever implemented will have to be allowed here ].
299721fffe3SKacheong Poon  */
300721fffe3SKacheong Poon static boolean_t
tcp_allow_connopt_set(int level,int name)301721fffe3SKacheong Poon tcp_allow_connopt_set(int level, int name)
302721fffe3SKacheong Poon {
303721fffe3SKacheong Poon 
304721fffe3SKacheong Poon 	switch (level) {
305721fffe3SKacheong Poon 	case IPPROTO_TCP:
306721fffe3SKacheong Poon 		switch (name) {
307721fffe3SKacheong Poon 		case TCP_NODELAY:
308721fffe3SKacheong Poon 			return (B_TRUE);
309721fffe3SKacheong Poon 		default:
310721fffe3SKacheong Poon 			return (B_FALSE);
311721fffe3SKacheong Poon 		}
312721fffe3SKacheong Poon 		/*NOTREACHED*/
313721fffe3SKacheong Poon 	default:
314721fffe3SKacheong Poon 		return (B_FALSE);
315721fffe3SKacheong Poon 	}
316721fffe3SKacheong Poon 	/*NOTREACHED*/
317721fffe3SKacheong Poon }
318721fffe3SKacheong Poon 
319721fffe3SKacheong Poon /*
320721fffe3SKacheong Poon  * This routine gets default values of certain options whose default
321721fffe3SKacheong Poon  * values are maintained by protocol specific code
322721fffe3SKacheong Poon  */
323721fffe3SKacheong Poon /* ARGSUSED */
324ca3c8f41SDavid Höppner static int
tcp_opt_default(queue_t * q,int level,int name,uchar_t * ptr)325721fffe3SKacheong Poon tcp_opt_default(queue_t *q, int level, int name, uchar_t *ptr)
326721fffe3SKacheong Poon {
327721fffe3SKacheong Poon 	int32_t	*i1 = (int32_t *)ptr;
328721fffe3SKacheong Poon 	tcp_stack_t	*tcps = Q_TO_TCP(q)->tcp_tcps;
329721fffe3SKacheong Poon 
330721fffe3SKacheong Poon 	switch (level) {
331721fffe3SKacheong Poon 	case IPPROTO_TCP:
332721fffe3SKacheong Poon 		switch (name) {
333721fffe3SKacheong Poon 		case TCP_NOTIFY_THRESHOLD:
334721fffe3SKacheong Poon 			*i1 = tcps->tcps_ip_notify_interval;
335721fffe3SKacheong Poon 			break;
336721fffe3SKacheong Poon 		case TCP_ABORT_THRESHOLD:
337721fffe3SKacheong Poon 			*i1 = tcps->tcps_ip_abort_interval;
338721fffe3SKacheong Poon 			break;
339721fffe3SKacheong Poon 		case TCP_CONN_NOTIFY_THRESHOLD:
340721fffe3SKacheong Poon 			*i1 = tcps->tcps_ip_notify_cinterval;
341721fffe3SKacheong Poon 			break;
342721fffe3SKacheong Poon 		case TCP_CONN_ABORT_THRESHOLD:
343721fffe3SKacheong Poon 			*i1 = tcps->tcps_ip_abort_cinterval;
344721fffe3SKacheong Poon 			break;
345721fffe3SKacheong Poon 		default:
346721fffe3SKacheong Poon 			return (-1);
347721fffe3SKacheong Poon 		}
348721fffe3SKacheong Poon 		break;
349721fffe3SKacheong Poon 	case IPPROTO_IP:
350721fffe3SKacheong Poon 		switch (name) {
351721fffe3SKacheong Poon 		case IP_TTL:
352721fffe3SKacheong Poon 			*i1 = tcps->tcps_ipv4_ttl;
353721fffe3SKacheong Poon 			break;
354721fffe3SKacheong Poon 		default:
355721fffe3SKacheong Poon 			return (-1);
356721fffe3SKacheong Poon 		}
357721fffe3SKacheong Poon 		break;
358721fffe3SKacheong Poon 	case IPPROTO_IPV6:
359721fffe3SKacheong Poon 		switch (name) {
360721fffe3SKacheong Poon 		case IPV6_UNICAST_HOPS:
361721fffe3SKacheong Poon 			*i1 = tcps->tcps_ipv6_hoplimit;
362721fffe3SKacheong Poon 			break;
363721fffe3SKacheong Poon 		default:
364721fffe3SKacheong Poon 			return (-1);
365721fffe3SKacheong Poon 		}
366721fffe3SKacheong Poon 		break;
367721fffe3SKacheong Poon 	default:
368721fffe3SKacheong Poon 		return (-1);
369721fffe3SKacheong Poon 	}
370721fffe3SKacheong Poon 	return (sizeof (int));
371721fffe3SKacheong Poon }
372721fffe3SKacheong Poon 
373721fffe3SKacheong Poon /*
374721fffe3SKacheong Poon  * TCP routine to get the values of options.
375721fffe3SKacheong Poon  */
376721fffe3SKacheong Poon int
tcp_opt_get(conn_t * connp,int level,int name,uchar_t * ptr)377721fffe3SKacheong Poon tcp_opt_get(conn_t *connp, int level, int name, uchar_t *ptr)
378721fffe3SKacheong Poon {
379721fffe3SKacheong Poon 	int		*i1 = (int *)ptr;
380721fffe3SKacheong Poon 	tcp_t		*tcp = connp->conn_tcp;
381721fffe3SKacheong Poon 	conn_opt_arg_t	coas;
382721fffe3SKacheong Poon 	int		retval;
383721fffe3SKacheong Poon 
384721fffe3SKacheong Poon 	coas.coa_connp = connp;
385721fffe3SKacheong Poon 	coas.coa_ixa = connp->conn_ixa;
386721fffe3SKacheong Poon 	coas.coa_ipp = &connp->conn_xmit_ipp;
387721fffe3SKacheong Poon 	coas.coa_ancillary = B_FALSE;
388721fffe3SKacheong Poon 	coas.coa_changed = 0;
389721fffe3SKacheong Poon 
390721fffe3SKacheong Poon 	switch (level) {
391721fffe3SKacheong Poon 	case SOL_SOCKET:
392721fffe3SKacheong Poon 		switch (name) {
393721fffe3SKacheong Poon 		case SO_SND_COPYAVOID:
394721fffe3SKacheong Poon 			*i1 = tcp->tcp_snd_zcopy_on ?
395721fffe3SKacheong Poon 			    SO_SND_COPYAVOID : 0;
396721fffe3SKacheong Poon 			return (sizeof (int));
397721fffe3SKacheong Poon 		case SO_ACCEPTCONN:
398721fffe3SKacheong Poon 			*i1 = (tcp->tcp_state == TCPS_LISTEN);
399721fffe3SKacheong Poon 			return (sizeof (int));
400721fffe3SKacheong Poon 		}
401721fffe3SKacheong Poon 		break;
402721fffe3SKacheong Poon 	case IPPROTO_TCP:
403721fffe3SKacheong Poon 		switch (name) {
404721fffe3SKacheong Poon 		case TCP_NODELAY:
405721fffe3SKacheong Poon 			*i1 = (tcp->tcp_naglim == 1) ? TCP_NODELAY : 0;
406721fffe3SKacheong Poon 			return (sizeof (int));
407721fffe3SKacheong Poon 		case TCP_MAXSEG:
408721fffe3SKacheong Poon 			*i1 = tcp->tcp_mss;
409721fffe3SKacheong Poon 			return (sizeof (int));
410721fffe3SKacheong Poon 		case TCP_NOTIFY_THRESHOLD:
411721fffe3SKacheong Poon 			*i1 = (int)tcp->tcp_first_timer_threshold;
412721fffe3SKacheong Poon 			return (sizeof (int));
413721fffe3SKacheong Poon 		case TCP_ABORT_THRESHOLD:
414721fffe3SKacheong Poon 			*i1 = tcp->tcp_second_timer_threshold;
415721fffe3SKacheong Poon 			return (sizeof (int));
416721fffe3SKacheong Poon 		case TCP_CONN_NOTIFY_THRESHOLD:
417721fffe3SKacheong Poon 			*i1 = tcp->tcp_first_ctimer_threshold;
418721fffe3SKacheong Poon 			return (sizeof (int));
419721fffe3SKacheong Poon 		case TCP_CONN_ABORT_THRESHOLD:
420721fffe3SKacheong Poon 			*i1 = tcp->tcp_second_ctimer_threshold;
421721fffe3SKacheong Poon 			return (sizeof (int));
422721fffe3SKacheong Poon 		case TCP_INIT_CWND:
423721fffe3SKacheong Poon 			*i1 = tcp->tcp_init_cwnd;
424721fffe3SKacheong Poon 			return (sizeof (int));
425721fffe3SKacheong Poon 		case TCP_KEEPALIVE_THRESHOLD:
426721fffe3SKacheong Poon 			*i1 = tcp->tcp_ka_interval;
427721fffe3SKacheong Poon 			return (sizeof (int));
4283d0a255cSGarrett D'Amore 
4293d0a255cSGarrett D'Amore 		/*
4303d0a255cSGarrett D'Amore 		 * TCP_KEEPIDLE expects value in seconds, but
4313d0a255cSGarrett D'Amore 		 * tcp_ka_interval is in milliseconds.
4323d0a255cSGarrett D'Amore 		 */
4333d0a255cSGarrett D'Amore 		case TCP_KEEPIDLE:
4343d0a255cSGarrett D'Amore 			*i1 = tcp->tcp_ka_interval / 1000;
4353d0a255cSGarrett D'Amore 			return (sizeof (int));
4363d0a255cSGarrett D'Amore 		case TCP_KEEPCNT:
4373d0a255cSGarrett D'Amore 			*i1 = tcp->tcp_ka_cnt;
4383d0a255cSGarrett D'Amore 			return (sizeof (int));
4393d0a255cSGarrett D'Amore 
4403d0a255cSGarrett D'Amore 		/*
4413d0a255cSGarrett D'Amore 		 * TCP_KEEPINTVL expects value in seconds, but
4423d0a255cSGarrett D'Amore 		 * tcp_ka_rinterval is in milliseconds.
4433d0a255cSGarrett D'Amore 		 */
4443d0a255cSGarrett D'Amore 		case TCP_KEEPINTVL:
4453d0a255cSGarrett D'Amore 			*i1 = tcp->tcp_ka_rinterval / 1000;
4463d0a255cSGarrett D'Amore 			return (sizeof (int));
447721fffe3SKacheong Poon 		case TCP_KEEPALIVE_ABORT_THRESHOLD:
448721fffe3SKacheong Poon 			*i1 = tcp->tcp_ka_abort_thres;
449721fffe3SKacheong Poon 			return (sizeof (int));
450d4994511SCody Peter Mello 		case TCP_CONGESTION: {
451d4994511SCody Peter Mello 			size_t len = strlcpy((char *)ptr, CC_ALGO(tcp)->name,
452d4994511SCody Peter Mello 			    CC_ALGO_NAME_MAX);
453d4994511SCody Peter Mello 			if (len >= CC_ALGO_NAME_MAX)
454d4994511SCody Peter Mello 				return (-1);
455d4994511SCody Peter Mello 			return (len + 1);
456d4994511SCody Peter Mello 		}
457721fffe3SKacheong Poon 		case TCP_CORK:
458721fffe3SKacheong Poon 			*i1 = tcp->tcp_cork;
459721fffe3SKacheong Poon 			return (sizeof (int));
460861fa149SNils Nieuwejaar 		case TCP_QUICKACK:
461861fa149SNils Nieuwejaar 			*i1 = tcp->tcp_quickack;
462861fa149SNils Nieuwejaar 			return (sizeof (int));
463707e74bcSKacheong Poon 		case TCP_RTO_INITIAL:
464707e74bcSKacheong Poon 			*i1 = tcp->tcp_rto_initial;
465707e74bcSKacheong Poon 			return (sizeof (uint32_t));
466707e74bcSKacheong Poon 		case TCP_RTO_MIN:
467707e74bcSKacheong Poon 			*i1 = tcp->tcp_rto_min;
468707e74bcSKacheong Poon 			return (sizeof (uint32_t));
469707e74bcSKacheong Poon 		case TCP_RTO_MAX:
470707e74bcSKacheong Poon 			*i1 = tcp->tcp_rto_max;
471707e74bcSKacheong Poon 			return (sizeof (uint32_t));
472707e74bcSKacheong Poon 		case TCP_LINGER2:
473707e74bcSKacheong Poon 			*i1 = tcp->tcp_fin_wait_2_flush_interval / SECONDS;
474707e74bcSKacheong Poon 			return (sizeof (int));
475721fffe3SKacheong Poon 		}
476721fffe3SKacheong Poon 		break;
477721fffe3SKacheong Poon 	case IPPROTO_IP:
478721fffe3SKacheong Poon 		if (connp->conn_family != AF_INET)
479721fffe3SKacheong Poon 			return (-1);
480721fffe3SKacheong Poon 		switch (name) {
481721fffe3SKacheong Poon 		case IP_OPTIONS:
482721fffe3SKacheong Poon 		case T_IP_OPTIONS:
483721fffe3SKacheong Poon 			/* Caller ensures enough space */
484721fffe3SKacheong Poon 			return (ip_opt_get_user(connp, ptr));
485721fffe3SKacheong Poon 		default:
486721fffe3SKacheong Poon 			break;
487721fffe3SKacheong Poon 		}
488721fffe3SKacheong Poon 		break;
489721fffe3SKacheong Poon 
490721fffe3SKacheong Poon 	case IPPROTO_IPV6:
491721fffe3SKacheong Poon 		/*
492721fffe3SKacheong Poon 		 * IPPROTO_IPV6 options are only supported for sockets
493721fffe3SKacheong Poon 		 * that are using IPv6 on the wire.
494721fffe3SKacheong Poon 		 */
495721fffe3SKacheong Poon 		if (connp->conn_ipversion != IPV6_VERSION) {
496721fffe3SKacheong Poon 			return (-1);
497721fffe3SKacheong Poon 		}
498721fffe3SKacheong Poon 		switch (name) {
499721fffe3SKacheong Poon 		case IPV6_PATHMTU:
500721fffe3SKacheong Poon 			if (tcp->tcp_state < TCPS_ESTABLISHED)
501721fffe3SKacheong Poon 				return (-1);
502721fffe3SKacheong Poon 			break;
503721fffe3SKacheong Poon 		}
504721fffe3SKacheong Poon 		break;
505721fffe3SKacheong Poon 	}
506721fffe3SKacheong Poon 	mutex_enter(&connp->conn_lock);
507721fffe3SKacheong Poon 	retval = conn_opt_get(&coas, level, name, ptr);
508721fffe3SKacheong Poon 	mutex_exit(&connp->conn_lock);
509721fffe3SKacheong Poon 	return (retval);
510721fffe3SKacheong Poon }
511721fffe3SKacheong Poon 
512721fffe3SKacheong Poon /*
513721fffe3SKacheong Poon  * We declare as 'int' rather than 'void' to satisfy pfi_t arg requirements.
514721fffe3SKacheong Poon  * Parameters are assumed to be verified by the caller.
515721fffe3SKacheong Poon  */
516721fffe3SKacheong Poon /* ARGSUSED */
517721fffe3SKacheong Poon int
tcp_opt_set(conn_t * connp,uint_t optset_context,int level,int name,uint_t inlen,uchar_t * invalp,uint_t * outlenp,uchar_t * outvalp,void * thisdg_attrs,cred_t * cr)518721fffe3SKacheong Poon tcp_opt_set(conn_t *connp, uint_t optset_context, int level, int name,
519721fffe3SKacheong Poon     uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp,
520721fffe3SKacheong Poon     void *thisdg_attrs, cred_t *cr)
521721fffe3SKacheong Poon {
522721fffe3SKacheong Poon 	tcp_t	*tcp = connp->conn_tcp;
523721fffe3SKacheong Poon 	int	*i1 = (int *)invalp;
524721fffe3SKacheong Poon 	boolean_t onoff = (*i1 == 0) ? 0 : 1;
525721fffe3SKacheong Poon 	boolean_t checkonly;
526721fffe3SKacheong Poon 	int	reterr;
527721fffe3SKacheong Poon 	tcp_stack_t	*tcps = tcp->tcp_tcps;
528721fffe3SKacheong Poon 	conn_opt_arg_t	coas;
529707e74bcSKacheong Poon 	uint32_t	val = *((uint32_t *)invalp);
530721fffe3SKacheong Poon 
531721fffe3SKacheong Poon 	coas.coa_connp = connp;
532721fffe3SKacheong Poon 	coas.coa_ixa = connp->conn_ixa;
533721fffe3SKacheong Poon 	coas.coa_ipp = &connp->conn_xmit_ipp;
534721fffe3SKacheong Poon 	coas.coa_ancillary = B_FALSE;
535721fffe3SKacheong Poon 	coas.coa_changed = 0;
536721fffe3SKacheong Poon 
537721fffe3SKacheong Poon 	switch (optset_context) {
538721fffe3SKacheong Poon 	case SETFN_OPTCOM_CHECKONLY:
539721fffe3SKacheong Poon 		checkonly = B_TRUE;
540721fffe3SKacheong Poon 		/*
541721fffe3SKacheong Poon 		 * Note: Implies T_CHECK semantics for T_OPTCOM_REQ
542721fffe3SKacheong Poon 		 * inlen != 0 implies value supplied and
543221e47fbSAndy Fiddaman 		 *	we have to "pretend" to set it.
544721fffe3SKacheong Poon 		 * inlen == 0 implies that there is no
545221e47fbSAndy Fiddaman 		 *	value part in T_CHECK request and just validation
546721fffe3SKacheong Poon 		 * done elsewhere should be enough, we just return here.
547721fffe3SKacheong Poon 		 */
548721fffe3SKacheong Poon 		if (inlen == 0) {
549721fffe3SKacheong Poon 			*outlenp = 0;
550721fffe3SKacheong Poon 			return (0);
551721fffe3SKacheong Poon 		}
552721fffe3SKacheong Poon 		break;
553721fffe3SKacheong Poon 	case SETFN_OPTCOM_NEGOTIATE:
554721fffe3SKacheong Poon 		checkonly = B_FALSE;
555721fffe3SKacheong Poon 		break;
556721fffe3SKacheong Poon 	case SETFN_UD_NEGOTIATE: /* error on conn-oriented transports ? */
557721fffe3SKacheong Poon 	case SETFN_CONN_NEGOTIATE:
558721fffe3SKacheong Poon 		checkonly = B_FALSE;
559721fffe3SKacheong Poon 		/*
560721fffe3SKacheong Poon 		 * Negotiating local and "association-related" options
561721fffe3SKacheong Poon 		 * from other (T_CONN_REQ, T_CONN_RES,T_UNITDATA_REQ)
562721fffe3SKacheong Poon 		 * primitives is allowed by XTI, but we choose
563721fffe3SKacheong Poon 		 * to not implement this style negotiation for Internet
564721fffe3SKacheong Poon 		 * protocols (We interpret it is a must for OSI world but
565721fffe3SKacheong Poon 		 * optional for Internet protocols) for all options.
566721fffe3SKacheong Poon 		 * [ Will do only for the few options that enable test
567721fffe3SKacheong Poon 		 * suites that our XTI implementation of this feature
568721fffe3SKacheong Poon 		 * works for transports that do allow it ]
569721fffe3SKacheong Poon 		 */
570721fffe3SKacheong Poon 		if (!tcp_allow_connopt_set(level, name)) {
571721fffe3SKacheong Poon 			*outlenp = 0;
572721fffe3SKacheong Poon 			return (EINVAL);
573721fffe3SKacheong Poon 		}
574721fffe3SKacheong Poon 		break;
575721fffe3SKacheong Poon 	default:
576721fffe3SKacheong Poon 		/*
577721fffe3SKacheong Poon 		 * We should never get here
578721fffe3SKacheong Poon 		 */
579721fffe3SKacheong Poon 		*outlenp = 0;
580721fffe3SKacheong Poon 		return (EINVAL);
581721fffe3SKacheong Poon 	}
582721fffe3SKacheong Poon 
583721fffe3SKacheong Poon 	ASSERT((optset_context != SETFN_OPTCOM_CHECKONLY) ||
584721fffe3SKacheong Poon 	    (optset_context == SETFN_OPTCOM_CHECKONLY && inlen != 0));
585721fffe3SKacheong Poon 
586721fffe3SKacheong Poon 	/*
587721fffe3SKacheong Poon 	 * For TCP, we should have no ancillary data sent down
588721fffe3SKacheong Poon 	 * (sendmsg isn't supported for SOCK_STREAM), so thisdg_attrs
589721fffe3SKacheong Poon 	 * has to be zero.
590721fffe3SKacheong Poon 	 */
591721fffe3SKacheong Poon 	ASSERT(thisdg_attrs == NULL);
592721fffe3SKacheong Poon 
593721fffe3SKacheong Poon 	/*
594721fffe3SKacheong Poon 	 * For fixed length options, no sanity check
595721fffe3SKacheong Poon 	 * of passed in length is done. It is assumed *_optcom_req()
596721fffe3SKacheong Poon 	 * routines do the right thing.
597721fffe3SKacheong Poon 	 */
598721fffe3SKacheong Poon 	switch (level) {
599721fffe3SKacheong Poon 	case SOL_SOCKET:
600721fffe3SKacheong Poon 		switch (name) {
601721fffe3SKacheong Poon 		case SO_KEEPALIVE:
602721fffe3SKacheong Poon 			if (checkonly) {
603721fffe3SKacheong Poon 				/* check only case */
604721fffe3SKacheong Poon 				break;
605721fffe3SKacheong Poon 			}
606721fffe3SKacheong Poon 
607721fffe3SKacheong Poon 			if (!onoff) {
608721fffe3SKacheong Poon 				if (connp->conn_keepalive) {
609721fffe3SKacheong Poon 					if (tcp->tcp_ka_tid != 0) {
610721fffe3SKacheong Poon 						(void) TCP_TIMER_CANCEL(tcp,
611721fffe3SKacheong Poon 						    tcp->tcp_ka_tid);
612721fffe3SKacheong Poon 						tcp->tcp_ka_tid = 0;
613721fffe3SKacheong Poon 					}
614721fffe3SKacheong Poon 					connp->conn_keepalive = 0;
615721fffe3SKacheong Poon 				}
616721fffe3SKacheong Poon 				break;
617721fffe3SKacheong Poon 			}
618721fffe3SKacheong Poon 			if (!connp->conn_keepalive) {
619721fffe3SKacheong Poon 				/* Crank up the keepalive timer */
620721fffe3SKacheong Poon 				tcp->tcp_ka_last_intrvl = 0;
621721fffe3SKacheong Poon 				tcp->tcp_ka_tid = TCP_TIMER(tcp,
62266cd0f60SKacheong Poon 				    tcp_keepalive_timer, tcp->tcp_ka_interval);
623721fffe3SKacheong Poon 				connp->conn_keepalive = 1;
624721fffe3SKacheong Poon 			}
625721fffe3SKacheong Poon 			break;
626721fffe3SKacheong Poon 		case SO_SNDBUF: {
627721fffe3SKacheong Poon 			if (*i1 > tcps->tcps_max_buf) {
628721fffe3SKacheong Poon 				*outlenp = 0;
629721fffe3SKacheong Poon 				return (ENOBUFS);
630721fffe3SKacheong Poon 			}
631721fffe3SKacheong Poon 			if (checkonly)
632721fffe3SKacheong Poon 				break;
633721fffe3SKacheong Poon 
634721fffe3SKacheong Poon 			connp->conn_sndbuf = *i1;
635721fffe3SKacheong Poon 			if (tcps->tcps_snd_lowat_fraction != 0) {
636721fffe3SKacheong Poon 				connp->conn_sndlowat = connp->conn_sndbuf /
637721fffe3SKacheong Poon 				    tcps->tcps_snd_lowat_fraction;
638721fffe3SKacheong Poon 			}
639721fffe3SKacheong Poon 			(void) tcp_maxpsz_set(tcp, B_TRUE);
640721fffe3SKacheong Poon 			/*
641721fffe3SKacheong Poon 			 * If we are flow-controlled, recheck the condition.
642721fffe3SKacheong Poon 			 * There are apps that increase SO_SNDBUF size when
643721fffe3SKacheong Poon 			 * flow-controlled (EWOULDBLOCK), and expect the flow
644721fffe3SKacheong Poon 			 * control condition to be lifted right away.
645721fffe3SKacheong Poon 			 */
646721fffe3SKacheong Poon 			mutex_enter(&tcp->tcp_non_sq_lock);
647721fffe3SKacheong Poon 			if (tcp->tcp_flow_stopped &&
648721fffe3SKacheong Poon 			    TCP_UNSENT_BYTES(tcp) < connp->conn_sndbuf) {
649721fffe3SKacheong Poon 				tcp_clrqfull(tcp);
650721fffe3SKacheong Poon 			}
651721fffe3SKacheong Poon 			mutex_exit(&tcp->tcp_non_sq_lock);
652721fffe3SKacheong Poon 			*outlenp = inlen;
653721fffe3SKacheong Poon 			return (0);
654721fffe3SKacheong Poon 		}
655721fffe3SKacheong Poon 		case SO_RCVBUF:
656721fffe3SKacheong Poon 			if (*i1 > tcps->tcps_max_buf) {
657721fffe3SKacheong Poon 				*outlenp = 0;
658721fffe3SKacheong Poon 				return (ENOBUFS);
659721fffe3SKacheong Poon 			}
660721fffe3SKacheong Poon 			/* Silently ignore zero */
661721fffe3SKacheong Poon 			if (!checkonly && *i1 != 0) {
662721fffe3SKacheong Poon 				*i1 = MSS_ROUNDUP(*i1, tcp->tcp_mss);
663721fffe3SKacheong Poon 				(void) tcp_rwnd_set(tcp, *i1);
664721fffe3SKacheong Poon 			}
665721fffe3SKacheong Poon 			/*
666721fffe3SKacheong Poon 			 * XXX should we return the rwnd here
667721fffe3SKacheong Poon 			 * and tcp_opt_get ?
668721fffe3SKacheong Poon 			 */
669721fffe3SKacheong Poon 			*outlenp = inlen;
670721fffe3SKacheong Poon 			return (0);
671721fffe3SKacheong Poon 		case SO_SND_COPYAVOID:
672721fffe3SKacheong Poon 			if (!checkonly) {
673721fffe3SKacheong Poon 				if (tcp->tcp_loopback ||
674721fffe3SKacheong Poon 				    (onoff != 1) || !tcp_zcopy_check(tcp)) {
675721fffe3SKacheong Poon 					*outlenp = 0;
676721fffe3SKacheong Poon 					return (EOPNOTSUPP);
677721fffe3SKacheong Poon 				}
678721fffe3SKacheong Poon 				tcp->tcp_snd_zcopy_aware = 1;
679721fffe3SKacheong Poon 			}
680721fffe3SKacheong Poon 			*outlenp = inlen;
681721fffe3SKacheong Poon 			return (0);
682721fffe3SKacheong Poon 		}
683721fffe3SKacheong Poon 		break;
684721fffe3SKacheong Poon 	case IPPROTO_TCP:
685721fffe3SKacheong Poon 		switch (name) {
686721fffe3SKacheong Poon 		case TCP_NODELAY:
687721fffe3SKacheong Poon 			if (!checkonly)
688721fffe3SKacheong Poon 				tcp->tcp_naglim = *i1 ? 1 : tcp->tcp_mss;
689721fffe3SKacheong Poon 			break;
690721fffe3SKacheong Poon 		case TCP_NOTIFY_THRESHOLD:
691721fffe3SKacheong Poon 			if (!checkonly)
692721fffe3SKacheong Poon 				tcp->tcp_first_timer_threshold = *i1;
693721fffe3SKacheong Poon 			break;
694721fffe3SKacheong Poon 		case TCP_ABORT_THRESHOLD:
695721fffe3SKacheong Poon 			if (!checkonly)
696721fffe3SKacheong Poon 				tcp->tcp_second_timer_threshold = *i1;
697721fffe3SKacheong Poon 			break;
698721fffe3SKacheong Poon 		case TCP_CONN_NOTIFY_THRESHOLD:
699721fffe3SKacheong Poon 			if (!checkonly)
700721fffe3SKacheong Poon 				tcp->tcp_first_ctimer_threshold = *i1;
701721fffe3SKacheong Poon 			break;
702721fffe3SKacheong Poon 		case TCP_CONN_ABORT_THRESHOLD:
703721fffe3SKacheong Poon 			if (!checkonly)
704721fffe3SKacheong Poon 				tcp->tcp_second_ctimer_threshold = *i1;
705721fffe3SKacheong Poon 			break;
706721fffe3SKacheong Poon 		case TCP_RECVDSTADDR:
707721fffe3SKacheong Poon 			if (tcp->tcp_state > TCPS_LISTEN) {
708721fffe3SKacheong Poon 				*outlenp = 0;
709721fffe3SKacheong Poon 				return (EOPNOTSUPP);
710721fffe3SKacheong Poon 			}
711721fffe3SKacheong Poon 			/* Setting done in conn_opt_set */
712721fffe3SKacheong Poon 			break;
713707e74bcSKacheong Poon 		case TCP_INIT_CWND:
714721fffe3SKacheong Poon 			if (checkonly)
715721fffe3SKacheong Poon 				break;
716721fffe3SKacheong Poon 
717721fffe3SKacheong Poon 			/*
718721fffe3SKacheong Poon 			 * Only allow socket with network configuration
719721fffe3SKacheong Poon 			 * privilege to set the initial cwnd to be larger
720721fffe3SKacheong Poon 			 * than allowed by RFC 3390.
721721fffe3SKacheong Poon 			 */
7229ebe787fSDan McDonald 			if (val > MIN(4, MAX(2, 4380 / tcp->tcp_mss))) {
7239ebe787fSDan McDonald 				if ((reterr = secpolicy_ip_config(cr, B_TRUE))
7249ebe787fSDan McDonald 				    != 0) {
7259ebe787fSDan McDonald 					*outlenp = 0;
7269ebe787fSDan McDonald 					return (reterr);
7279ebe787fSDan McDonald 				}
7289ebe787fSDan McDonald 				if (val > tcp_max_init_cwnd) {
7299ebe787fSDan McDonald 					*outlenp = 0;
7309ebe787fSDan McDonald 					return (EINVAL);
7319ebe787fSDan McDonald 				}
732721fffe3SKacheong Poon 			}
7339ebe787fSDan McDonald 
734707e74bcSKacheong Poon 			tcp->tcp_init_cwnd = val;
7359ebe787fSDan McDonald 
7369ebe787fSDan McDonald 			/*
7379ebe787fSDan McDonald 			 * If the socket is connected, AND no outbound data
7389ebe787fSDan McDonald 			 * has been sent, reset the actual cwnd values.
7399ebe787fSDan McDonald 			 */
7409ebe787fSDan McDonald 			if (tcp->tcp_state == TCPS_ESTABLISHED &&
7419ebe787fSDan McDonald 			    tcp->tcp_iss == tcp->tcp_snxt - 1) {
7429ebe787fSDan McDonald 				tcp->tcp_cwnd =
7439ebe787fSDan McDonald 				    MIN(tcp->tcp_rwnd, val * tcp->tcp_mss);
7449ebe787fSDan McDonald 			}
745721fffe3SKacheong Poon 			break;
7463d0a255cSGarrett D'Amore 
7473d0a255cSGarrett D'Amore 		/*
7483d0a255cSGarrett D'Amore 		 * TCP_KEEPIDLE is in seconds but TCP_KEEPALIVE_THRESHOLD
7493d0a255cSGarrett D'Amore 		 * is in milliseconds. TCP_KEEPIDLE is introduced for
7503d0a255cSGarrett D'Amore 		 * compatibility with other Unix flavors.
7513d0a255cSGarrett D'Amore 		 * We can fall through TCP_KEEPALIVE_THRESHOLD logic after
7523d0a255cSGarrett D'Amore 		 * converting the input to milliseconds.
7533d0a255cSGarrett D'Amore 		 */
7543d0a255cSGarrett D'Amore 		case TCP_KEEPIDLE:
7553d0a255cSGarrett D'Amore 			*i1 *= 1000;
7568ed55813SVineeth Pillai 			/* FALLTHRU */
7573d0a255cSGarrett D'Amore 
758721fffe3SKacheong Poon 		case TCP_KEEPALIVE_THRESHOLD:
759721fffe3SKacheong Poon 			if (checkonly)
760721fffe3SKacheong Poon 				break;
761721fffe3SKacheong Poon 
762721fffe3SKacheong Poon 			if (*i1 < tcps->tcps_keepalive_interval_low ||
763721fffe3SKacheong Poon 			    *i1 > tcps->tcps_keepalive_interval_high) {
764721fffe3SKacheong Poon 				*outlenp = 0;
765721fffe3SKacheong Poon 				return (EINVAL);
766721fffe3SKacheong Poon 			}
767721fffe3SKacheong Poon 			if (*i1 != tcp->tcp_ka_interval) {
768721fffe3SKacheong Poon 				tcp->tcp_ka_interval = *i1;
769721fffe3SKacheong Poon 				/*
770721fffe3SKacheong Poon 				 * Check if we need to restart the
771721fffe3SKacheong Poon 				 * keepalive timer.
772721fffe3SKacheong Poon 				 */
773721fffe3SKacheong Poon 				if (tcp->tcp_ka_tid != 0) {
774721fffe3SKacheong Poon 					ASSERT(connp->conn_keepalive);
775721fffe3SKacheong Poon 					(void) TCP_TIMER_CANCEL(tcp,
776721fffe3SKacheong Poon 					    tcp->tcp_ka_tid);
777721fffe3SKacheong Poon 					tcp->tcp_ka_last_intrvl = 0;
778721fffe3SKacheong Poon 					tcp->tcp_ka_tid = TCP_TIMER(tcp,
779721fffe3SKacheong Poon 					    tcp_keepalive_timer,
78066cd0f60SKacheong Poon 					    tcp->tcp_ka_interval);
781721fffe3SKacheong Poon 				}
782721fffe3SKacheong Poon 			}
783721fffe3SKacheong Poon 			break;
7843d0a255cSGarrett D'Amore 
7853d0a255cSGarrett D'Amore 		/*
7863d0a255cSGarrett D'Amore 		 * tcp_ka_abort_thres = tcp_ka_rinterval * tcp_ka_cnt.
7873d0a255cSGarrett D'Amore 		 * So setting TCP_KEEPCNT or TCP_KEEPINTVL can affect all the
7883d0a255cSGarrett D'Amore 		 * three members - tcp_ka_abort_thres, tcp_ka_rinterval and
7893d0a255cSGarrett D'Amore 		 * tcp_ka_cnt.
7903d0a255cSGarrett D'Amore 		 */
7913d0a255cSGarrett D'Amore 		case TCP_KEEPCNT:
7923d0a255cSGarrett D'Amore 			if (checkonly)
7933d0a255cSGarrett D'Amore 				break;
7943d0a255cSGarrett D'Amore 
7953d0a255cSGarrett D'Amore 			if (*i1 == 0) {
7963d0a255cSGarrett D'Amore 				return (EINVAL);
7973d0a255cSGarrett D'Amore 			} else if (tcp->tcp_ka_rinterval == 0) {
798a41f965aSBryan Cantrill 				/*
799a41f965aSBryan Cantrill 				 * When TCP_KEEPCNT is specified without first
800a41f965aSBryan Cantrill 				 * specifying a TCP_KEEPINTVL, we infer an
801a41f965aSBryan Cantrill 				 * interval based on a tunable specific to our
802a41f965aSBryan Cantrill 				 * stack: the tcp_keepalive_abort_interval.
803a41f965aSBryan Cantrill 				 * (Or the TCP_KEEPALIVE_ABORT_THRESHOLD, in
804a41f965aSBryan Cantrill 				 * the unlikely event that that has been set.)
805a41f965aSBryan Cantrill 				 * Given the abort interval's default value of
806a41f965aSBryan Cantrill 				 * 480 seconds, low TCP_KEEPCNT values can
807a41f965aSBryan Cantrill 				 * result in intervals that exceed the default
808a41f965aSBryan Cantrill 				 * maximum RTO of 60 seconds.  Rather than
809a41f965aSBryan Cantrill 				 * fail in these cases, we (implicitly) clamp
810a41f965aSBryan Cantrill 				 * the interval at the maximum RTO; if the
811a41f965aSBryan Cantrill 				 * TCP_KEEPCNT is shortly followed by a
812a41f965aSBryan Cantrill 				 * TCP_KEEPINTVL (as we expect), the abort
813a41f965aSBryan Cantrill 				 * threshold will be recalculated correctly --
814a41f965aSBryan Cantrill 				 * and if a TCP_KEEPINTVL is not forthcoming,
815a41f965aSBryan Cantrill 				 * keep-alive will at least operate reasonably
816a41f965aSBryan Cantrill 				 * given the underconfigured state.
817a41f965aSBryan Cantrill 				 */
818a41f965aSBryan Cantrill 				uint32_t interval;
819a41f965aSBryan Cantrill 
820a41f965aSBryan Cantrill 				interval = tcp->tcp_ka_abort_thres / *i1;
821a41f965aSBryan Cantrill 
822a41f965aSBryan Cantrill 				if (interval < tcp->tcp_rto_min)
823a41f965aSBryan Cantrill 					interval = tcp->tcp_rto_min;
824a41f965aSBryan Cantrill 
825a41f965aSBryan Cantrill 				if (interval > tcp->tcp_rto_max)
826a41f965aSBryan Cantrill 					interval = tcp->tcp_rto_max;
8273d0a255cSGarrett D'Amore 
828a41f965aSBryan Cantrill 				tcp->tcp_ka_rinterval = interval;
8293d0a255cSGarrett D'Amore 			} else {
8303d0a255cSGarrett D'Amore 				if ((*i1 * tcp->tcp_ka_rinterval) <
8313d0a255cSGarrett D'Amore 				    tcps->tcps_keepalive_abort_interval_low ||
8323d0a255cSGarrett D'Amore 				    (*i1 * tcp->tcp_ka_rinterval) >
8333d0a255cSGarrett D'Amore 				    tcps->tcps_keepalive_abort_interval_high)
8343d0a255cSGarrett D'Amore 					return (EINVAL);
8353d0a255cSGarrett D'Amore 				tcp->tcp_ka_abort_thres =
8363d0a255cSGarrett D'Amore 				    (*i1 * tcp->tcp_ka_rinterval);
8373d0a255cSGarrett D'Amore 			}
8383d0a255cSGarrett D'Amore 			tcp->tcp_ka_cnt = *i1;
8393d0a255cSGarrett D'Amore 			break;
8403d0a255cSGarrett D'Amore 		case TCP_KEEPINTVL:
8413d0a255cSGarrett D'Amore 			/*
8423d0a255cSGarrett D'Amore 			 * TCP_KEEPINTVL is specified in seconds, but
8433d0a255cSGarrett D'Amore 			 * tcp_ka_rinterval is in milliseconds.
8443d0a255cSGarrett D'Amore 			 */
8453d0a255cSGarrett D'Amore 
8463d0a255cSGarrett D'Amore 			if (checkonly)
8473d0a255cSGarrett D'Amore 				break;
8483d0a255cSGarrett D'Amore 
8493d0a255cSGarrett D'Amore 			if ((*i1 * 1000) < tcp->tcp_rto_min ||
8503d0a255cSGarrett D'Amore 			    (*i1 * 1000) > tcp->tcp_rto_max)
8513d0a255cSGarrett D'Amore 				return (EINVAL);
8523d0a255cSGarrett D'Amore 
8533d0a255cSGarrett D'Amore 			if (tcp->tcp_ka_cnt == 0) {
8543d0a255cSGarrett D'Amore 				tcp->tcp_ka_cnt =
8553d0a255cSGarrett D'Amore 				    tcp->tcp_ka_abort_thres / (*i1 * 1000);
8563d0a255cSGarrett D'Amore 			} else {
8573d0a255cSGarrett D'Amore 				if ((*i1 * tcp->tcp_ka_cnt * 1000) <
8583d0a255cSGarrett D'Amore 				    tcps->tcps_keepalive_abort_interval_low ||
8593d0a255cSGarrett D'Amore 				    (*i1 * tcp->tcp_ka_cnt * 1000) >
8603d0a255cSGarrett D'Amore 				    tcps->tcps_keepalive_abort_interval_high)
8613d0a255cSGarrett D'Amore 					return (EINVAL);
8623d0a255cSGarrett D'Amore 				tcp->tcp_ka_abort_thres =
8633d0a255cSGarrett D'Amore 				    (*i1 * tcp->tcp_ka_cnt * 1000);
8643d0a255cSGarrett D'Amore 			}
8653d0a255cSGarrett D'Amore 			tcp->tcp_ka_rinterval = *i1 * 1000;
8663d0a255cSGarrett D'Amore 			break;
867721fffe3SKacheong Poon 		case TCP_KEEPALIVE_ABORT_THRESHOLD:
868721fffe3SKacheong Poon 			if (!checkonly) {
869721fffe3SKacheong Poon 				if (*i1 <
870721fffe3SKacheong Poon 				    tcps->tcps_keepalive_abort_interval_low ||
871721fffe3SKacheong Poon 				    *i1 >
872721fffe3SKacheong Poon 				    tcps->tcps_keepalive_abort_interval_high) {
873721fffe3SKacheong Poon 					*outlenp = 0;
874721fffe3SKacheong Poon 					return (EINVAL);
875721fffe3SKacheong Poon 				}
876721fffe3SKacheong Poon 				tcp->tcp_ka_abort_thres = *i1;
8773d0a255cSGarrett D'Amore 				tcp->tcp_ka_cnt = 0;
8783d0a255cSGarrett D'Amore 				tcp->tcp_ka_rinterval = 0;
879721fffe3SKacheong Poon 			}
880721fffe3SKacheong Poon 			break;
881d4994511SCody Peter Mello 		case TCP_CONGESTION: {
882d4994511SCody Peter Mello 			struct cc_algo *algo;
883d4994511SCody Peter Mello 
884d4994511SCody Peter Mello 			if (checkonly) {
885d4994511SCody Peter Mello 				break;
886d4994511SCody Peter Mello 			}
887d4994511SCody Peter Mello 
888d4994511SCody Peter Mello 			/*
889d4994511SCody Peter Mello 			 * Make sure the string is NUL-terminated. Some
890d4994511SCody Peter Mello 			 * consumers pass only the number of characters
891d4994511SCody Peter Mello 			 * in the string, and don't include the NUL
892d4994511SCody Peter Mello 			 * terminator, so we set it for them.
893d4994511SCody Peter Mello 			 */
894d4994511SCody Peter Mello 			if (inlen < CC_ALGO_NAME_MAX) {
895d4994511SCody Peter Mello 				invalp[inlen] = '\0';
896d4994511SCody Peter Mello 			}
897d4994511SCody Peter Mello 			invalp[CC_ALGO_NAME_MAX - 1] = '\0';
898d4994511SCody Peter Mello 
899d4994511SCody Peter Mello 			if ((algo = cc_load_algo((char *)invalp)) == NULL) {
900d4994511SCody Peter Mello 				return (ENOENT);
901d4994511SCody Peter Mello 			}
902d4994511SCody Peter Mello 
903d4994511SCody Peter Mello 			if (CC_ALGO(tcp)->cb_destroy != NULL) {
904d4994511SCody Peter Mello 				CC_ALGO(tcp)->cb_destroy(&tcp->tcp_ccv);
905d4994511SCody Peter Mello 			}
906d4994511SCody Peter Mello 
907d4994511SCody Peter Mello 			CC_DATA(tcp) = NULL;
908d4994511SCody Peter Mello 			CC_ALGO(tcp) = algo;
909d4994511SCody Peter Mello 
910d4994511SCody Peter Mello 			if (CC_ALGO(tcp)->cb_init != NULL) {
911d4994511SCody Peter Mello 				VERIFY0(CC_ALGO(tcp)->cb_init(&tcp->tcp_ccv));
912d4994511SCody Peter Mello 			}
913d4994511SCody Peter Mello 
914d4994511SCody Peter Mello 			break;
915d4994511SCody Peter Mello 		}
916721fffe3SKacheong Poon 		case TCP_CORK:
917721fffe3SKacheong Poon 			if (!checkonly) {
918721fffe3SKacheong Poon 				/*
919721fffe3SKacheong Poon 				 * if tcp->tcp_cork was set and is now
920721fffe3SKacheong Poon 				 * being unset, we have to make sure that
921721fffe3SKacheong Poon 				 * the remaining data gets sent out. Also
922721fffe3SKacheong Poon 				 * unset tcp->tcp_cork so that tcp_wput_data()
923721fffe3SKacheong Poon 				 * can send data even if it is less than mss
924721fffe3SKacheong Poon 				 */
925721fffe3SKacheong Poon 				if (tcp->tcp_cork && onoff == 0 &&
926721fffe3SKacheong Poon 				    tcp->tcp_unsent > 0) {
927721fffe3SKacheong Poon 					tcp->tcp_cork = B_FALSE;
928721fffe3SKacheong Poon 					tcp_wput_data(tcp, NULL, B_FALSE);
929721fffe3SKacheong Poon 				}
930721fffe3SKacheong Poon 				tcp->tcp_cork = onoff;
931721fffe3SKacheong Poon 			}
932721fffe3SKacheong Poon 			break;
933861fa149SNils Nieuwejaar 		case TCP_QUICKACK:
934861fa149SNils Nieuwejaar 			if (!checkonly) {
935861fa149SNils Nieuwejaar 				tcp->tcp_quickack = onoff;
936861fa149SNils Nieuwejaar 			}
937861fa149SNils Nieuwejaar 			break;
938c12492cfSSebastien Roy 		case TCP_RTO_INITIAL:
939707e74bcSKacheong Poon 			if (checkonly || val == 0)
940707e74bcSKacheong Poon 				break;
941707e74bcSKacheong Poon 
942707e74bcSKacheong Poon 			/*
943707e74bcSKacheong Poon 			 * Sanity checks
944707e74bcSKacheong Poon 			 *
945707e74bcSKacheong Poon 			 * The initial RTO should be bounded by the minimum
946707e74bcSKacheong Poon 			 * and maximum RTO.  And it should also be smaller
947707e74bcSKacheong Poon 			 * than the connect attempt abort timeout.  Otherwise,
948707e74bcSKacheong Poon 			 * the connection won't be aborted in a period
949707e74bcSKacheong Poon 			 * reasonably close to that timeout.
950707e74bcSKacheong Poon 			 */
951707e74bcSKacheong Poon 			if (val < tcp->tcp_rto_min || val > tcp->tcp_rto_max ||
952707e74bcSKacheong Poon 			    val > tcp->tcp_second_ctimer_threshold ||
953707e74bcSKacheong Poon 			    val < tcps->tcps_rexmit_interval_initial_low ||
954707e74bcSKacheong Poon 			    val > tcps->tcps_rexmit_interval_initial_high) {
955707e74bcSKacheong Poon 				*outlenp = 0;
956707e74bcSKacheong Poon 				return (EINVAL);
957707e74bcSKacheong Poon 			}
958707e74bcSKacheong Poon 			tcp->tcp_rto_initial = val;
959707e74bcSKacheong Poon 
960707e74bcSKacheong Poon 			/*
961707e74bcSKacheong Poon 			 * If TCP has not sent anything, need to re-calculate
962707e74bcSKacheong Poon 			 * tcp_rto.  Otherwise, this option change does not
963707e74bcSKacheong Poon 			 * really affect anything.
964707e74bcSKacheong Poon 			 */
965707e74bcSKacheong Poon 			if (tcp->tcp_state >= TCPS_SYN_SENT)
966707e74bcSKacheong Poon 				break;
967707e74bcSKacheong Poon 
968c12492cfSSebastien Roy 			tcp->tcp_rtt_sa = MSEC2NSEC(tcp->tcp_rto_initial) << 2;
969c12492cfSSebastien Roy 			tcp->tcp_rtt_sd = MSEC2NSEC(tcp->tcp_rto_initial) >> 1;
970c12492cfSSebastien Roy 			tcp->tcp_rto = tcp_calculate_rto(tcp, tcps,
971c12492cfSSebastien Roy 			    tcps->tcps_conn_grace_period);
972707e74bcSKacheong Poon 			break;
973707e74bcSKacheong Poon 		case TCP_RTO_MIN:
974707e74bcSKacheong Poon 			if (checkonly || val == 0)
975707e74bcSKacheong Poon 				break;
976707e74bcSKacheong Poon 
977707e74bcSKacheong Poon 			if (val < tcps->tcps_rexmit_interval_min_low ||
978707e74bcSKacheong Poon 			    val > tcps->tcps_rexmit_interval_min_high ||
979707e74bcSKacheong Poon 			    val > tcp->tcp_rto_max) {
980707e74bcSKacheong Poon 				*outlenp = 0;
981707e74bcSKacheong Poon 				return (EINVAL);
982707e74bcSKacheong Poon 			}
983707e74bcSKacheong Poon 			tcp->tcp_rto_min = val;
984707e74bcSKacheong Poon 			if (tcp->tcp_rto < val)
985707e74bcSKacheong Poon 				tcp->tcp_rto = val;
986707e74bcSKacheong Poon 			break;
987707e74bcSKacheong Poon 		case TCP_RTO_MAX:
988707e74bcSKacheong Poon 			if (checkonly || val == 0)
989707e74bcSKacheong Poon 				break;
990707e74bcSKacheong Poon 
991707e74bcSKacheong Poon 			/*
992707e74bcSKacheong Poon 			 * Sanity checks
993707e74bcSKacheong Poon 			 *
994707e74bcSKacheong Poon 			 * The maximum RTO should not be larger than the
995707e74bcSKacheong Poon 			 * connection abort timeout.  Otherwise, the
996707e74bcSKacheong Poon 			 * connection won't be aborted in a period reasonably
997707e74bcSKacheong Poon 			 * close to that timeout.
998707e74bcSKacheong Poon 			 */
999707e74bcSKacheong Poon 			if (val < tcps->tcps_rexmit_interval_max_low ||
1000707e74bcSKacheong Poon 			    val > tcps->tcps_rexmit_interval_max_high ||
1001707e74bcSKacheong Poon 			    val < tcp->tcp_rto_min ||
1002707e74bcSKacheong Poon 			    val > tcp->tcp_second_timer_threshold) {
1003707e74bcSKacheong Poon 				*outlenp = 0;
1004707e74bcSKacheong Poon 				return (EINVAL);
1005707e74bcSKacheong Poon 			}
1006707e74bcSKacheong Poon 			tcp->tcp_rto_max = val;
1007707e74bcSKacheong Poon 			if (tcp->tcp_rto > val)
1008707e74bcSKacheong Poon 				tcp->tcp_rto = val;
1009707e74bcSKacheong Poon 			break;
1010707e74bcSKacheong Poon 		case TCP_LINGER2:
1011707e74bcSKacheong Poon 			if (checkonly || *i1 == 0)
1012707e74bcSKacheong Poon 				break;
1013707e74bcSKacheong Poon 
1014707e74bcSKacheong Poon 			/*
1015707e74bcSKacheong Poon 			 * Note that the option value's unit is second.  And
1016707e74bcSKacheong Poon 			 * the value should be bigger than the private
1017707e74bcSKacheong Poon 			 * parameter tcp_fin_wait_2_flush_interval's lower
1018707e74bcSKacheong Poon 			 * bound and smaller than the current value of that
1019707e74bcSKacheong Poon 			 * parameter.  It should be smaller than the current
1020707e74bcSKacheong Poon 			 * value to avoid an app setting TCP_LINGER2 to a big
1021707e74bcSKacheong Poon 			 * value, causing resource to be held up too long in
1022707e74bcSKacheong Poon 			 * FIN-WAIT-2 state.
1023707e74bcSKacheong Poon 			 */
1024707e74bcSKacheong Poon 			if (*i1 < 0 ||
1025707e74bcSKacheong Poon 			    tcps->tcps_fin_wait_2_flush_interval_low/SECONDS >
1026707e74bcSKacheong Poon 			    *i1 ||
1027707e74bcSKacheong Poon 			    tcps->tcps_fin_wait_2_flush_interval/SECONDS <
1028707e74bcSKacheong Poon 			    *i1) {
1029707e74bcSKacheong Poon 				*outlenp = 0;
1030707e74bcSKacheong Poon 				return (EINVAL);
1031707e74bcSKacheong Poon 			}
1032707e74bcSKacheong Poon 			tcp->tcp_fin_wait_2_flush_interval = *i1 * SECONDS;
1033707e74bcSKacheong Poon 			break;
1034721fffe3SKacheong Poon 		default:
1035721fffe3SKacheong Poon 			break;
1036721fffe3SKacheong Poon 		}
1037721fffe3SKacheong Poon 		break;
1038721fffe3SKacheong Poon 	case IPPROTO_IP:
1039721fffe3SKacheong Poon 		if (connp->conn_family != AF_INET) {
1040721fffe3SKacheong Poon 			*outlenp = 0;
1041721fffe3SKacheong Poon 			return (EINVAL);
1042721fffe3SKacheong Poon 		}
1043721fffe3SKacheong Poon 		switch (name) {
1044721fffe3SKacheong Poon 		case IP_SEC_OPT:
1045721fffe3SKacheong Poon 			/*
1046721fffe3SKacheong Poon 			 * We should not allow policy setting after
1047721fffe3SKacheong Poon 			 * we start listening for connections.
1048721fffe3SKacheong Poon 			 */
1049721fffe3SKacheong Poon 			if (tcp->tcp_state == TCPS_LISTEN) {
1050721fffe3SKacheong Poon 				return (EINVAL);
1051721fffe3SKacheong Poon 			}
1052721fffe3SKacheong Poon 			break;
1053221e47fbSAndy Fiddaman 		case IP_RECVTOS:
1054221e47fbSAndy Fiddaman 			if (!checkonly) {
1055221e47fbSAndy Fiddaman 				/*
1056221e47fbSAndy Fiddaman 				 * Force it to be sent up with the next msg
1057221e47fbSAndy Fiddaman 				 * by setting it to a value which cannot
1058221e47fbSAndy Fiddaman 				 * appear in a packet (TOS is only 8-bits)
1059221e47fbSAndy Fiddaman 				 */
1060221e47fbSAndy Fiddaman 				tcp->tcp_recvtos = 0xffffffffU;
1061221e47fbSAndy Fiddaman 			}
1062221e47fbSAndy Fiddaman 			break;
1063721fffe3SKacheong Poon 		}
1064721fffe3SKacheong Poon 		break;
1065721fffe3SKacheong Poon 	case IPPROTO_IPV6:
1066721fffe3SKacheong Poon 		/*
1067721fffe3SKacheong Poon 		 * IPPROTO_IPV6 options are only supported for sockets
1068721fffe3SKacheong Poon 		 * that are using IPv6 on the wire.
1069721fffe3SKacheong Poon 		 */
1070721fffe3SKacheong Poon 		if (connp->conn_ipversion != IPV6_VERSION) {
1071721fffe3SKacheong Poon 			*outlenp = 0;
1072721fffe3SKacheong Poon 			return (EINVAL);
1073721fffe3SKacheong Poon 		}
1074721fffe3SKacheong Poon 
1075721fffe3SKacheong Poon 		switch (name) {
1076721fffe3SKacheong Poon 		case IPV6_RECVPKTINFO:
1077721fffe3SKacheong Poon 			if (!checkonly) {
1078721fffe3SKacheong Poon 				/* Force it to be sent up with the next msg */
1079721fffe3SKacheong Poon 				tcp->tcp_recvifindex = 0;
1080721fffe3SKacheong Poon 			}
1081721fffe3SKacheong Poon 			break;
1082721fffe3SKacheong Poon 		case IPV6_RECVTCLASS:
1083721fffe3SKacheong Poon 			if (!checkonly) {
1084721fffe3SKacheong Poon 				/* Force it to be sent up with the next msg */
1085721fffe3SKacheong Poon 				tcp->tcp_recvtclass = 0xffffffffU;
1086721fffe3SKacheong Poon 			}
1087721fffe3SKacheong Poon 			break;
1088721fffe3SKacheong Poon 		case IPV6_RECVHOPLIMIT:
1089721fffe3SKacheong Poon 			if (!checkonly) {
1090721fffe3SKacheong Poon 				/* Force it to be sent up with the next msg */
1091721fffe3SKacheong Poon 				tcp->tcp_recvhops = 0xffffffffU;
1092721fffe3SKacheong Poon 			}
1093721fffe3SKacheong Poon 			break;
1094721fffe3SKacheong Poon 		case IPV6_PKTINFO:
1095721fffe3SKacheong Poon 			/* This is an extra check for TCP */
1096721fffe3SKacheong Poon 			if (inlen == sizeof (struct in6_pktinfo)) {
1097721fffe3SKacheong Poon 				struct in6_pktinfo *pkti;
1098721fffe3SKacheong Poon 
1099721fffe3SKacheong Poon 				pkti = (struct in6_pktinfo *)invalp;
1100721fffe3SKacheong Poon 				/*
1101721fffe3SKacheong Poon 				 * RFC 3542 states that ipi6_addr must be
1102721fffe3SKacheong Poon 				 * the unspecified address when setting the
1103721fffe3SKacheong Poon 				 * IPV6_PKTINFO sticky socket option on a
1104721fffe3SKacheong Poon 				 * TCP socket.
1105721fffe3SKacheong Poon 				 */
1106721fffe3SKacheong Poon 				if (!IN6_IS_ADDR_UNSPECIFIED(&pkti->ipi6_addr))
1107721fffe3SKacheong Poon 					return (EINVAL);
1108721fffe3SKacheong Poon 			}
1109721fffe3SKacheong Poon 			break;
1110721fffe3SKacheong Poon 		case IPV6_SEC_OPT:
1111721fffe3SKacheong Poon 			/*
1112721fffe3SKacheong Poon 			 * We should not allow policy setting after
1113721fffe3SKacheong Poon 			 * we start listening for connections.
1114721fffe3SKacheong Poon 			 */
1115721fffe3SKacheong Poon 			if (tcp->tcp_state == TCPS_LISTEN) {
1116721fffe3SKacheong Poon 				return (EINVAL);
1117721fffe3SKacheong Poon 			}
1118721fffe3SKacheong Poon 			break;
1119721fffe3SKacheong Poon 		}
1120721fffe3SKacheong Poon 		break;
1121721fffe3SKacheong Poon 	}
1122721fffe3SKacheong Poon 	reterr = conn_opt_set(&coas, level, name, inlen, invalp,
1123721fffe3SKacheong Poon 	    checkonly, cr);
1124721fffe3SKacheong Poon 	if (reterr != 0) {
1125721fffe3SKacheong Poon 		*outlenp = 0;
1126721fffe3SKacheong Poon 		return (reterr);
1127721fffe3SKacheong Poon 	}
1128721fffe3SKacheong Poon 
1129721fffe3SKacheong Poon 	/*
1130721fffe3SKacheong Poon 	 * Common case of OK return with outval same as inval
1131721fffe3SKacheong Poon 	 */
1132721fffe3SKacheong Poon 	if (invalp != outvalp) {
1133721fffe3SKacheong Poon 		/* don't trust bcopy for identical src/dst */
1134721fffe3SKacheong Poon 		(void) bcopy(invalp, outvalp, inlen);
1135721fffe3SKacheong Poon 	}
1136721fffe3SKacheong Poon 	*outlenp = inlen;
1137721fffe3SKacheong Poon 
1138721fffe3SKacheong Poon 	if (coas.coa_changed & COA_HEADER_CHANGED) {
1139721fffe3SKacheong Poon 		/* If we are connected we rebuilt the headers */
1140721fffe3SKacheong Poon 		if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6) &&
1141721fffe3SKacheong Poon 		    !IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_faddr_v6)) {
1142721fffe3SKacheong Poon 			reterr = tcp_build_hdrs(tcp);
1143721fffe3SKacheong Poon 			if (reterr != 0)
1144721fffe3SKacheong Poon 				return (reterr);
1145721fffe3SKacheong Poon 		}
1146721fffe3SKacheong Poon 	}
1147721fffe3SKacheong Poon 	if (coas.coa_changed & COA_ROUTE_CHANGED) {
1148721fffe3SKacheong Poon 		in6_addr_t nexthop;
1149721fffe3SKacheong Poon 
1150721fffe3SKacheong Poon 		/*
1151721fffe3SKacheong Poon 		 * If we are connected we re-cache the information.
1152721fffe3SKacheong Poon 		 * We ignore errors to preserve BSD behavior.
1153721fffe3SKacheong Poon 		 * Note that we don't redo IPsec policy lookup here
1154721fffe3SKacheong Poon 		 * since the final destination (or source) didn't change.
1155721fffe3SKacheong Poon 		 */
1156721fffe3SKacheong Poon 		ip_attr_nexthop(&connp->conn_xmit_ipp, connp->conn_ixa,
1157721fffe3SKacheong Poon 		    &connp->conn_faddr_v6, &nexthop);
1158721fffe3SKacheong Poon 
1159721fffe3SKacheong Poon 		if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6) &&
1160721fffe3SKacheong Poon 		    !IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_faddr_v6)) {
1161721fffe3SKacheong Poon 			(void) ip_attr_connect(connp, connp->conn_ixa,
1162721fffe3SKacheong Poon 			    &connp->conn_laddr_v6, &connp->conn_faddr_v6,
1163721fffe3SKacheong Poon 			    &nexthop, connp->conn_fport, NULL, NULL,
1164721fffe3SKacheong Poon 			    IPDF_VERIFY_DST);
1165721fffe3SKacheong Poon 		}
1166721fffe3SKacheong Poon 	}
1167721fffe3SKacheong Poon 	if ((coas.coa_changed & COA_SNDBUF_CHANGED) && !IPCL_IS_NONSTR(connp)) {
1168721fffe3SKacheong Poon 		connp->conn_wq->q_hiwat = connp->conn_sndbuf;
1169721fffe3SKacheong Poon 	}
1170721fffe3SKacheong Poon 	if (coas.coa_changed & COA_WROFF_CHANGED) {
1171721fffe3SKacheong Poon 		connp->conn_wroff = connp->conn_ht_iphc_allocated +
1172721fffe3SKacheong Poon 		    tcps->tcps_wroff_xtra;
1173721fffe3SKacheong Poon 		(void) proto_set_tx_wroff(connp->conn_rq, connp,
1174721fffe3SKacheong Poon 		    connp->conn_wroff);
1175721fffe3SKacheong Poon 	}
1176721fffe3SKacheong Poon 	if (coas.coa_changed & COA_OOBINLINE_CHANGED) {
1177721fffe3SKacheong Poon 		if (IPCL_IS_NONSTR(connp))
1178721fffe3SKacheong Poon 			proto_set_rx_oob_opt(connp, onoff);
1179721fffe3SKacheong Poon 	}
1180721fffe3SKacheong Poon 	return (0);
1181721fffe3SKacheong Poon }
1182