xref: /illumos-gate/usr/src/uts/common/inet/tcp/tcp_opt_data.c (revision d49945110829673d27d215f4db010ac1d22a68de)
17c478bd9Sstevel@tonic-gate /*
27c478bd9Sstevel@tonic-gate  * CDDL HEADER START
37c478bd9Sstevel@tonic-gate  *
47c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
545916cd2Sjpk  * Common Development and Distribution License (the "License").
645916cd2Sjpk  * You may not use this file except in compliance with the License.
77c478bd9Sstevel@tonic-gate  *
87c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
97c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
107c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
117c478bd9Sstevel@tonic-gate  * and limitations under the License.
127c478bd9Sstevel@tonic-gate  *
137c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
147c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
157c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
167c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
177c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
187c478bd9Sstevel@tonic-gate  *
197c478bd9Sstevel@tonic-gate  * CDDL HEADER END
207c478bd9Sstevel@tonic-gate  */
217c478bd9Sstevel@tonic-gate /*
2266cd0f60SKacheong Poon  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
233d0a255cSGarrett D'Amore  * Copyright (c) 2011 Nexenta Systems, Inc. All rights reserved.
24*d4994511SCody Peter Mello  * Copyright 2019 Joyent, Inc.
25c12492cfSSebastien Roy  * Copyright (c) 2016 by Delphix. All rights reserved.
267c478bd9Sstevel@tonic-gate  */
277c478bd9Sstevel@tonic-gate 
287c478bd9Sstevel@tonic-gate #include <sys/types.h>
297c478bd9Sstevel@tonic-gate #include <sys/stream.h>
307c478bd9Sstevel@tonic-gate #define	_SUN_TPI_VERSION 2
317c478bd9Sstevel@tonic-gate #include <sys/tihdr.h>
327c478bd9Sstevel@tonic-gate #include <sys/socket.h>
337c478bd9Sstevel@tonic-gate #include <sys/xti_xtiopt.h>
347c478bd9Sstevel@tonic-gate #include <sys/xti_inet.h>
35721fffe3SKacheong Poon #include <sys/policy.h>
367c478bd9Sstevel@tonic-gate 
37*d4994511SCody Peter Mello #include <inet/cc.h>
387c478bd9Sstevel@tonic-gate #include <inet/common.h>
397c478bd9Sstevel@tonic-gate #include <netinet/ip6.h>
407c478bd9Sstevel@tonic-gate #include <inet/ip.h>
417c478bd9Sstevel@tonic-gate 
427c478bd9Sstevel@tonic-gate #include <netinet/in.h>
437c478bd9Sstevel@tonic-gate #include <netinet/tcp.h>
447c478bd9Sstevel@tonic-gate #include <inet/optcom.h>
45721fffe3SKacheong Poon #include <inet/proto_set.h>
46bd670b35SErik Nordmark #include <inet/tcp_impl.h>
477c478bd9Sstevel@tonic-gate 
48ca3c8f41SDavid Höppner static int	tcp_opt_default(queue_t *, int, int, uchar_t *);
49ca3c8f41SDavid Höppner 
507c478bd9Sstevel@tonic-gate /*
517c478bd9Sstevel@tonic-gate  * Table of all known options handled on a TCP protocol stack.
527c478bd9Sstevel@tonic-gate  *
537c478bd9Sstevel@tonic-gate  * Note: This table contains options processed by both TCP and IP levels
547c478bd9Sstevel@tonic-gate  *       and is the superset of options that can be performed on a TCP over IP
557c478bd9Sstevel@tonic-gate  *       stack.
567c478bd9Sstevel@tonic-gate  */
577c478bd9Sstevel@tonic-gate opdes_t	tcp_opt_arr[] = {
587c478bd9Sstevel@tonic-gate 
59bd670b35SErik Nordmark { SO_LINGER,	SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0,
607c478bd9Sstevel@tonic-gate 	sizeof (struct linger), 0 },
617c478bd9Sstevel@tonic-gate 
62bd670b35SErik Nordmark { SO_DEBUG,	SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
63bd670b35SErik Nordmark { SO_KEEPALIVE,	SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
64bd670b35SErik Nordmark { SO_DONTROUTE,	SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
65bd670b35SErik Nordmark { SO_USELOOPBACK, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0
667c478bd9Sstevel@tonic-gate 	},
67bd670b35SErik Nordmark { SO_BROADCAST,	SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
68bd670b35SErik Nordmark { SO_REUSEADDR, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
69bd670b35SErik Nordmark { SO_OOBINLINE, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
70bd670b35SErik Nordmark { SO_TYPE,	SOL_SOCKET, OA_R, OA_R, OP_NP, 0, sizeof (int), 0 },
71bd670b35SErik Nordmark { SO_SNDBUF,	SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
72bd670b35SErik Nordmark { SO_RCVBUF,	SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
73bd670b35SErik Nordmark { SO_SNDTIMEO,	SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0,
743986c91eSanders 	sizeof (struct timeval), 0 },
75bd670b35SErik Nordmark { SO_RCVTIMEO,	SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0,
763986c91eSanders 	sizeof (struct timeval), 0 },
77bd670b35SErik Nordmark { SO_DGRAM_ERRIND, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0
787c478bd9Sstevel@tonic-gate 	},
797c478bd9Sstevel@tonic-gate { SO_SND_COPYAVOID, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
80bd670b35SErik Nordmark { SO_ANON_MLP, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int),
815d0bc3edSsommerfe 	0 },
82bd670b35SErik Nordmark { SO_MAC_EXEMPT, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int),
835d0bc3edSsommerfe 	0 },
84bd670b35SErik Nordmark { SO_MAC_IMPLICIT, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int),
855d3b8cb7SBill Sommerfeld 	0 },
86bd670b35SErik Nordmark { SO_ALLZONES, SOL_SOCKET, OA_R, OA_RW, OP_CONFIG, 0, sizeof (int),
875d0bc3edSsommerfe 	0 },
88bd670b35SErik Nordmark { SO_EXCLBIND, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
89ae347574Skcpoon 
90bd670b35SErik Nordmark { SO_DOMAIN,	SOL_SOCKET, OA_R, OA_R, OP_NP, 0, sizeof (int), 0 },
9188cda078Skcpoon 
92bd670b35SErik Nordmark { SO_PROTOTYPE,	SOL_SOCKET, OA_R, OA_R, OP_NP, 0, sizeof (int), 0 },
9388cda078Skcpoon 
94bd670b35SErik Nordmark { TCP_NODELAY,	IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0
957c478bd9Sstevel@tonic-gate 	},
96bd670b35SErik Nordmark { TCP_MAXSEG,	IPPROTO_TCP, OA_R, OA_R, OP_NP, 0, sizeof (uint_t),
977c478bd9Sstevel@tonic-gate 	536 },
987c478bd9Sstevel@tonic-gate 
997c478bd9Sstevel@tonic-gate { TCP_NOTIFY_THRESHOLD, IPPROTO_TCP, OA_RW, OA_RW, OP_NP,
100bd670b35SErik Nordmark 	OP_DEF_FN, sizeof (int), -1 /* not initialized */ },
1017c478bd9Sstevel@tonic-gate 
1027c478bd9Sstevel@tonic-gate { TCP_ABORT_THRESHOLD, IPPROTO_TCP, OA_RW, OA_RW, OP_NP,
103bd670b35SErik Nordmark 	OP_DEF_FN, sizeof (int), -1 /* not initialized */ },
1047c478bd9Sstevel@tonic-gate 
1057c478bd9Sstevel@tonic-gate { TCP_CONN_NOTIFY_THRESHOLD, IPPROTO_TCP, OA_RW, OA_RW, OP_NP,
106bd670b35SErik Nordmark 	OP_DEF_FN, sizeof (int), -1 /* not initialized */ },
1077c478bd9Sstevel@tonic-gate 
1087c478bd9Sstevel@tonic-gate { TCP_CONN_ABORT_THRESHOLD, IPPROTO_TCP, OA_RW, OA_RW, OP_NP,
109bd670b35SErik Nordmark 	OP_DEF_FN, sizeof (int), -1 /* not initialized */ },
1107c478bd9Sstevel@tonic-gate 
111bd670b35SErik Nordmark { TCP_RECVDSTADDR, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int),
1127c478bd9Sstevel@tonic-gate 	0 },
1137c478bd9Sstevel@tonic-gate 
114bd670b35SErik Nordmark { TCP_ANONPRIVBIND, IPPROTO_TCP, OA_R, OA_RW, OP_PRIVPORT, 0,
1157c478bd9Sstevel@tonic-gate 	sizeof (int), 0 },
1167c478bd9Sstevel@tonic-gate 
117bd670b35SErik Nordmark { TCP_EXCLBIND, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0
1187c478bd9Sstevel@tonic-gate 	},
1197c478bd9Sstevel@tonic-gate 
120bd670b35SErik Nordmark { TCP_INIT_CWND, IPPROTO_TCP, OA_RW, OA_RW, OP_CONFIG, 0,
1217c478bd9Sstevel@tonic-gate 	sizeof (int), 0 },
1227c478bd9Sstevel@tonic-gate 
123bd670b35SErik Nordmark { TCP_KEEPALIVE_THRESHOLD, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0,
1247c478bd9Sstevel@tonic-gate 	sizeof (int), 0	},
1257c478bd9Sstevel@tonic-gate 
1263d0a255cSGarrett D'Amore { TCP_KEEPIDLE, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
1273d0a255cSGarrett D'Amore 
1283d0a255cSGarrett D'Amore { TCP_KEEPCNT, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
1293d0a255cSGarrett D'Amore 
1303d0a255cSGarrett D'Amore { TCP_KEEPINTVL, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
1313d0a255cSGarrett D'Amore 
132bd670b35SErik Nordmark { TCP_KEEPALIVE_ABORT_THRESHOLD, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0,
1337c478bd9Sstevel@tonic-gate 	sizeof (int), 0	},
1347c478bd9Sstevel@tonic-gate 
135bd670b35SErik Nordmark { TCP_CORK, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
1367c478bd9Sstevel@tonic-gate 
137707e74bcSKacheong Poon { TCP_RTO_INITIAL, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (uint32_t), 0 },
138707e74bcSKacheong Poon 
139707e74bcSKacheong Poon { TCP_RTO_MIN, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (uint32_t), 0 },
140707e74bcSKacheong Poon 
141707e74bcSKacheong Poon { TCP_RTO_MAX, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (uint32_t), 0 },
142707e74bcSKacheong Poon 
143707e74bcSKacheong Poon { TCP_LINGER2, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
144707e74bcSKacheong Poon 
145*d4994511SCody Peter Mello { TCP_CONGESTION, IPPROTO_TCP, OA_RW, OA_RW, OP_NP,
146*d4994511SCody Peter Mello 	OP_VARLEN, CC_ALGO_NAME_MAX, 0 },
147*d4994511SCody Peter Mello 
1487c478bd9Sstevel@tonic-gate { IP_OPTIONS,	IPPROTO_IP, OA_RW, OA_RW, OP_NP,
149bd670b35SErik Nordmark 	(OP_VARLEN|OP_NODEFAULT),
1500f1702c5SYu Xiangning 	IP_MAX_OPT_LENGTH + IP_ADDR_LEN, -1 /* not initialized */ },
1517c478bd9Sstevel@tonic-gate { T_IP_OPTIONS,	IPPROTO_IP, OA_RW, OA_RW, OP_NP,
152bd670b35SErik Nordmark 	(OP_VARLEN|OP_NODEFAULT),
1530f1702c5SYu Xiangning 	IP_MAX_OPT_LENGTH + IP_ADDR_LEN, -1 /* not initialized */ },
1547c478bd9Sstevel@tonic-gate 
155bd670b35SErik Nordmark { IP_TOS,	IPPROTO_IP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
156bd670b35SErik Nordmark { T_IP_TOS,	IPPROTO_IP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
157bd670b35SErik Nordmark { IP_TTL,	IPPROTO_IP, OA_RW, OA_RW, OP_NP, OP_DEF_FN,
1587c478bd9Sstevel@tonic-gate 	sizeof (int), -1 /* not initialized */ },
1597c478bd9Sstevel@tonic-gate 
160bd670b35SErik Nordmark { IP_SEC_OPT, IPPROTO_IP, OA_RW, OA_RW, OP_NP, OP_NODEFAULT,
1617c478bd9Sstevel@tonic-gate 	sizeof (ipsec_req_t), -1 /* not initialized */ },
1627c478bd9Sstevel@tonic-gate 
163bd670b35SErik Nordmark { IP_BOUND_IF, IPPROTO_IP, OA_RW, OA_RW, OP_NP, 0,
1647c478bd9Sstevel@tonic-gate 	sizeof (int),	0 /* no ifindex */ },
1657c478bd9Sstevel@tonic-gate 
166bd670b35SErik Nordmark { IP_UNSPEC_SRC, IPPROTO_IP, OA_R, OA_RW, OP_RAW, 0,
1677c478bd9Sstevel@tonic-gate 	sizeof (int), 0 },
1687c478bd9Sstevel@tonic-gate 
169bd670b35SErik Nordmark { IPV6_UNICAST_HOPS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, OP_DEF_FN,
1707c478bd9Sstevel@tonic-gate 	sizeof (int), -1 /* not initialized */ },
1717c478bd9Sstevel@tonic-gate 
172bd670b35SErik Nordmark { IPV6_BOUND_IF, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
1737c478bd9Sstevel@tonic-gate 	sizeof (int),	0 /* no ifindex */ },
1747c478bd9Sstevel@tonic-gate 
175bd670b35SErik Nordmark { IP_DONTFRAG, IPPROTO_IP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
176bd670b35SErik Nordmark 
177bd670b35SErik Nordmark { IP_NEXTHOP, IPPROTO_IP, OA_R, OA_RW, OP_CONFIG, 0,
17843d18f1cSpriyanka 	sizeof (in_addr_t),	-1 /* not initialized  */ },
17943d18f1cSpriyanka 
180bd670b35SErik Nordmark { IPV6_UNSPEC_SRC, IPPROTO_IPV6, OA_R, OA_RW, OP_RAW, 0,
1817c478bd9Sstevel@tonic-gate 	sizeof (int), 0 },
1827c478bd9Sstevel@tonic-gate 
1837c478bd9Sstevel@tonic-gate { IPV6_PKTINFO, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP,
184bd670b35SErik Nordmark 	(OP_NODEFAULT|OP_VARLEN),
1857c478bd9Sstevel@tonic-gate 	sizeof (struct in6_pktinfo), -1 /* not initialized */ },
1867c478bd9Sstevel@tonic-gate { IPV6_NEXTHOP, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP,
187bd670b35SErik Nordmark 	OP_NODEFAULT,
1887c478bd9Sstevel@tonic-gate 	sizeof (sin6_t), -1 /* not initialized */ },
1897c478bd9Sstevel@tonic-gate { IPV6_HOPOPTS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP,
190bd670b35SErik Nordmark 	(OP_VARLEN|OP_NODEFAULT), 255*8,
1917c478bd9Sstevel@tonic-gate 	-1 /* not initialized */ },
1927c478bd9Sstevel@tonic-gate { IPV6_DSTOPTS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP,
193bd670b35SErik Nordmark 	(OP_VARLEN|OP_NODEFAULT), 255*8,
1947c478bd9Sstevel@tonic-gate 	-1 /* not initialized */ },
1957c478bd9Sstevel@tonic-gate { IPV6_RTHDRDSTOPTS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP,
196bd670b35SErik Nordmark 	(OP_VARLEN|OP_NODEFAULT), 255*8,
1977c478bd9Sstevel@tonic-gate 	-1 /* not initialized */ },
1987c478bd9Sstevel@tonic-gate { IPV6_RTHDR, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP,
199bd670b35SErik Nordmark 	(OP_VARLEN|OP_NODEFAULT), 255*8,
2007c478bd9Sstevel@tonic-gate 	-1 /* not initialized */ },
2017c478bd9Sstevel@tonic-gate { IPV6_TCLASS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP,
202bd670b35SErik Nordmark 	OP_NODEFAULT,
2037c478bd9Sstevel@tonic-gate 	sizeof (int), -1 /* not initialized */ },
2047c478bd9Sstevel@tonic-gate { IPV6_PATHMTU, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP,
205bd670b35SErik Nordmark 	OP_NODEFAULT,
2067c478bd9Sstevel@tonic-gate 	sizeof (struct ip6_mtuinfo), -1 /* not initialized */ },
207bd670b35SErik Nordmark { IPV6_DONTFRAG, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
208bd670b35SErik Nordmark 	sizeof (int), 0 },
209bd670b35SErik Nordmark { IPV6_USE_MIN_MTU, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
2107c478bd9Sstevel@tonic-gate 	sizeof (int), 0 },
211bd670b35SErik Nordmark { IPV6_V6ONLY, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
2127c478bd9Sstevel@tonic-gate 	sizeof (int), 0 },
2137c478bd9Sstevel@tonic-gate 
2147c478bd9Sstevel@tonic-gate /* Enable receipt of ancillary data */
215bd670b35SErik Nordmark { IPV6_RECVPKTINFO, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
2167c478bd9Sstevel@tonic-gate 	sizeof (int), 0 },
217bd670b35SErik Nordmark { IPV6_RECVHOPLIMIT, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
2187c478bd9Sstevel@tonic-gate 	sizeof (int), 0 },
219bd670b35SErik Nordmark { IPV6_RECVHOPOPTS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
2207c478bd9Sstevel@tonic-gate 	sizeof (int), 0 },
221bd670b35SErik Nordmark { _OLD_IPV6_RECVDSTOPTS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
2227c478bd9Sstevel@tonic-gate 	sizeof (int), 0 },
223bd670b35SErik Nordmark { IPV6_RECVDSTOPTS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
2247c478bd9Sstevel@tonic-gate 	sizeof (int), 0 },
225bd670b35SErik Nordmark { IPV6_RECVRTHDR, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
2267c478bd9Sstevel@tonic-gate 	sizeof (int), 0 },
227bd670b35SErik Nordmark { IPV6_RECVRTHDRDSTOPTS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
2287c478bd9Sstevel@tonic-gate 	sizeof (int), 0 },
229bd670b35SErik Nordmark { IPV6_RECVTCLASS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
2307c478bd9Sstevel@tonic-gate 	sizeof (int), 0 },
2317c478bd9Sstevel@tonic-gate 
232bd670b35SErik Nordmark { IPV6_SEC_OPT, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, OP_NODEFAULT,
2337c478bd9Sstevel@tonic-gate 	sizeof (ipsec_req_t), -1 /* not initialized */ },
234bd670b35SErik Nordmark { IPV6_SRC_PREFERENCES, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
2357c478bd9Sstevel@tonic-gate 	sizeof (uint32_t), IPV6_PREFER_SRC_DEFAULT },
2367c478bd9Sstevel@tonic-gate };
2377c478bd9Sstevel@tonic-gate 
2387c478bd9Sstevel@tonic-gate /*
2397c478bd9Sstevel@tonic-gate  * Table of all supported levels
2407c478bd9Sstevel@tonic-gate  * Note: Some levels (e.g. XTI_GENERIC) may be valid but may not have
2417c478bd9Sstevel@tonic-gate  * any supported options so we need this info separately.
2427c478bd9Sstevel@tonic-gate  *
2437c478bd9Sstevel@tonic-gate  * This is needed only for topmost tpi providers and is used only by
2447c478bd9Sstevel@tonic-gate  * XTI interfaces.
2457c478bd9Sstevel@tonic-gate  */
2467c478bd9Sstevel@tonic-gate optlevel_t	tcp_valid_levels_arr[] = {
2477c478bd9Sstevel@tonic-gate 	XTI_GENERIC,
2487c478bd9Sstevel@tonic-gate 	SOL_SOCKET,
2497c478bd9Sstevel@tonic-gate 	IPPROTO_TCP,
2507c478bd9Sstevel@tonic-gate 	IPPROTO_IP,
2517c478bd9Sstevel@tonic-gate 	IPPROTO_IPV6
2527c478bd9Sstevel@tonic-gate };
2537c478bd9Sstevel@tonic-gate 
2547c478bd9Sstevel@tonic-gate 
2557c478bd9Sstevel@tonic-gate #define	TCP_OPT_ARR_CNT		A_CNT(tcp_opt_arr)
2567c478bd9Sstevel@tonic-gate #define	TCP_VALID_LEVELS_CNT	A_CNT(tcp_valid_levels_arr)
2577c478bd9Sstevel@tonic-gate 
2587c478bd9Sstevel@tonic-gate uint_t tcp_max_optsize; /* initialized when TCP driver is loaded */
2597c478bd9Sstevel@tonic-gate 
2607c478bd9Sstevel@tonic-gate /*
2617c478bd9Sstevel@tonic-gate  * Initialize option database object for TCP
2627c478bd9Sstevel@tonic-gate  *
2637c478bd9Sstevel@tonic-gate  * This object represents database of options to search passed to
2647c478bd9Sstevel@tonic-gate  * {sock,tpi}optcom_req() interface routine to take care of option
2657c478bd9Sstevel@tonic-gate  * management and associated methods.
2667c478bd9Sstevel@tonic-gate  */
2677c478bd9Sstevel@tonic-gate 
2687c478bd9Sstevel@tonic-gate optdb_obj_t tcp_opt_obj = {
2697c478bd9Sstevel@tonic-gate 	tcp_opt_default,	/* TCP default value function pointer */
2700f1702c5SYu Xiangning 	tcp_tpi_opt_get,	/* TCP get function pointer */
2710f1702c5SYu Xiangning 	tcp_tpi_opt_set,	/* TCP set function pointer */
2727c478bd9Sstevel@tonic-gate 	TCP_OPT_ARR_CNT,	/* TCP option database count of entries */
2737c478bd9Sstevel@tonic-gate 	tcp_opt_arr,		/* TCP option database */
2747c478bd9Sstevel@tonic-gate 	TCP_VALID_LEVELS_CNT,	/* TCP valid level count of entries */
2757c478bd9Sstevel@tonic-gate 	tcp_valid_levels_arr	/* TCP valid level array */
2767c478bd9Sstevel@tonic-gate };
277721fffe3SKacheong Poon 
278721fffe3SKacheong Poon static int tcp_max_init_cwnd = TCP_MAX_INIT_CWND;
279721fffe3SKacheong Poon 
280721fffe3SKacheong Poon /*
281721fffe3SKacheong Poon  * Some TCP options can be "set" by requesting them in the option
282721fffe3SKacheong Poon  * buffer. This is needed for XTI feature test though we do not
283721fffe3SKacheong Poon  * allow it in general. We interpret that this mechanism is more
284721fffe3SKacheong Poon  * applicable to OSI protocols and need not be allowed in general.
285721fffe3SKacheong Poon  * This routine filters out options for which it is not allowed (most)
286721fffe3SKacheong Poon  * and lets through those (few) for which it is. [ The XTI interface
287721fffe3SKacheong Poon  * test suite specifics will imply that any XTI_GENERIC level XTI_* if
288721fffe3SKacheong Poon  * ever implemented will have to be allowed here ].
289721fffe3SKacheong Poon  */
290721fffe3SKacheong Poon static boolean_t
291721fffe3SKacheong Poon tcp_allow_connopt_set(int level, int name)
292721fffe3SKacheong Poon {
293721fffe3SKacheong Poon 
294721fffe3SKacheong Poon 	switch (level) {
295721fffe3SKacheong Poon 	case IPPROTO_TCP:
296721fffe3SKacheong Poon 		switch (name) {
297721fffe3SKacheong Poon 		case TCP_NODELAY:
298721fffe3SKacheong Poon 			return (B_TRUE);
299721fffe3SKacheong Poon 		default:
300721fffe3SKacheong Poon 			return (B_FALSE);
301721fffe3SKacheong Poon 		}
302721fffe3SKacheong Poon 		/*NOTREACHED*/
303721fffe3SKacheong Poon 	default:
304721fffe3SKacheong Poon 		return (B_FALSE);
305721fffe3SKacheong Poon 	}
306721fffe3SKacheong Poon 	/*NOTREACHED*/
307721fffe3SKacheong Poon }
308721fffe3SKacheong Poon 
309721fffe3SKacheong Poon /*
310721fffe3SKacheong Poon  * This routine gets default values of certain options whose default
311721fffe3SKacheong Poon  * values are maintained by protocol specific code
312721fffe3SKacheong Poon  */
313721fffe3SKacheong Poon /* ARGSUSED */
314ca3c8f41SDavid Höppner static int
315721fffe3SKacheong Poon tcp_opt_default(queue_t *q, int level, int name, uchar_t *ptr)
316721fffe3SKacheong Poon {
317721fffe3SKacheong Poon 	int32_t	*i1 = (int32_t *)ptr;
318721fffe3SKacheong Poon 	tcp_stack_t	*tcps = Q_TO_TCP(q)->tcp_tcps;
319721fffe3SKacheong Poon 
320721fffe3SKacheong Poon 	switch (level) {
321721fffe3SKacheong Poon 	case IPPROTO_TCP:
322721fffe3SKacheong Poon 		switch (name) {
323721fffe3SKacheong Poon 		case TCP_NOTIFY_THRESHOLD:
324721fffe3SKacheong Poon 			*i1 = tcps->tcps_ip_notify_interval;
325721fffe3SKacheong Poon 			break;
326721fffe3SKacheong Poon 		case TCP_ABORT_THRESHOLD:
327721fffe3SKacheong Poon 			*i1 = tcps->tcps_ip_abort_interval;
328721fffe3SKacheong Poon 			break;
329721fffe3SKacheong Poon 		case TCP_CONN_NOTIFY_THRESHOLD:
330721fffe3SKacheong Poon 			*i1 = tcps->tcps_ip_notify_cinterval;
331721fffe3SKacheong Poon 			break;
332721fffe3SKacheong Poon 		case TCP_CONN_ABORT_THRESHOLD:
333721fffe3SKacheong Poon 			*i1 = tcps->tcps_ip_abort_cinterval;
334721fffe3SKacheong Poon 			break;
335721fffe3SKacheong Poon 		default:
336721fffe3SKacheong Poon 			return (-1);
337721fffe3SKacheong Poon 		}
338721fffe3SKacheong Poon 		break;
339721fffe3SKacheong Poon 	case IPPROTO_IP:
340721fffe3SKacheong Poon 		switch (name) {
341721fffe3SKacheong Poon 		case IP_TTL:
342721fffe3SKacheong Poon 			*i1 = tcps->tcps_ipv4_ttl;
343721fffe3SKacheong Poon 			break;
344721fffe3SKacheong Poon 		default:
345721fffe3SKacheong Poon 			return (-1);
346721fffe3SKacheong Poon 		}
347721fffe3SKacheong Poon 		break;
348721fffe3SKacheong Poon 	case IPPROTO_IPV6:
349721fffe3SKacheong Poon 		switch (name) {
350721fffe3SKacheong Poon 		case IPV6_UNICAST_HOPS:
351721fffe3SKacheong Poon 			*i1 = tcps->tcps_ipv6_hoplimit;
352721fffe3SKacheong Poon 			break;
353721fffe3SKacheong Poon 		default:
354721fffe3SKacheong Poon 			return (-1);
355721fffe3SKacheong Poon 		}
356721fffe3SKacheong Poon 		break;
357721fffe3SKacheong Poon 	default:
358721fffe3SKacheong Poon 		return (-1);
359721fffe3SKacheong Poon 	}
360721fffe3SKacheong Poon 	return (sizeof (int));
361721fffe3SKacheong Poon }
362721fffe3SKacheong Poon 
363721fffe3SKacheong Poon /*
364721fffe3SKacheong Poon  * TCP routine to get the values of options.
365721fffe3SKacheong Poon  */
366721fffe3SKacheong Poon int
367721fffe3SKacheong Poon tcp_opt_get(conn_t *connp, int level, int name, uchar_t *ptr)
368721fffe3SKacheong Poon {
369721fffe3SKacheong Poon 	int		*i1 = (int *)ptr;
370721fffe3SKacheong Poon 	tcp_t		*tcp = connp->conn_tcp;
371721fffe3SKacheong Poon 	conn_opt_arg_t	coas;
372721fffe3SKacheong Poon 	int		retval;
373721fffe3SKacheong Poon 
374721fffe3SKacheong Poon 	coas.coa_connp = connp;
375721fffe3SKacheong Poon 	coas.coa_ixa = connp->conn_ixa;
376721fffe3SKacheong Poon 	coas.coa_ipp = &connp->conn_xmit_ipp;
377721fffe3SKacheong Poon 	coas.coa_ancillary = B_FALSE;
378721fffe3SKacheong Poon 	coas.coa_changed = 0;
379721fffe3SKacheong Poon 
380721fffe3SKacheong Poon 	switch (level) {
381721fffe3SKacheong Poon 	case SOL_SOCKET:
382721fffe3SKacheong Poon 		switch (name) {
383721fffe3SKacheong Poon 		case SO_SND_COPYAVOID:
384721fffe3SKacheong Poon 			*i1 = tcp->tcp_snd_zcopy_on ?
385721fffe3SKacheong Poon 			    SO_SND_COPYAVOID : 0;
386721fffe3SKacheong Poon 			return (sizeof (int));
387721fffe3SKacheong Poon 		case SO_ACCEPTCONN:
388721fffe3SKacheong Poon 			*i1 = (tcp->tcp_state == TCPS_LISTEN);
389721fffe3SKacheong Poon 			return (sizeof (int));
390721fffe3SKacheong Poon 		}
391721fffe3SKacheong Poon 		break;
392721fffe3SKacheong Poon 	case IPPROTO_TCP:
393721fffe3SKacheong Poon 		switch (name) {
394721fffe3SKacheong Poon 		case TCP_NODELAY:
395721fffe3SKacheong Poon 			*i1 = (tcp->tcp_naglim == 1) ? TCP_NODELAY : 0;
396721fffe3SKacheong Poon 			return (sizeof (int));
397721fffe3SKacheong Poon 		case TCP_MAXSEG:
398721fffe3SKacheong Poon 			*i1 = tcp->tcp_mss;
399721fffe3SKacheong Poon 			return (sizeof (int));
400721fffe3SKacheong Poon 		case TCP_NOTIFY_THRESHOLD:
401721fffe3SKacheong Poon 			*i1 = (int)tcp->tcp_first_timer_threshold;
402721fffe3SKacheong Poon 			return (sizeof (int));
403721fffe3SKacheong Poon 		case TCP_ABORT_THRESHOLD:
404721fffe3SKacheong Poon 			*i1 = tcp->tcp_second_timer_threshold;
405721fffe3SKacheong Poon 			return (sizeof (int));
406721fffe3SKacheong Poon 		case TCP_CONN_NOTIFY_THRESHOLD:
407721fffe3SKacheong Poon 			*i1 = tcp->tcp_first_ctimer_threshold;
408721fffe3SKacheong Poon 			return (sizeof (int));
409721fffe3SKacheong Poon 		case TCP_CONN_ABORT_THRESHOLD:
410721fffe3SKacheong Poon 			*i1 = tcp->tcp_second_ctimer_threshold;
411721fffe3SKacheong Poon 			return (sizeof (int));
412721fffe3SKacheong Poon 		case TCP_INIT_CWND:
413721fffe3SKacheong Poon 			*i1 = tcp->tcp_init_cwnd;
414721fffe3SKacheong Poon 			return (sizeof (int));
415721fffe3SKacheong Poon 		case TCP_KEEPALIVE_THRESHOLD:
416721fffe3SKacheong Poon 			*i1 = tcp->tcp_ka_interval;
417721fffe3SKacheong Poon 			return (sizeof (int));
4183d0a255cSGarrett D'Amore 
4193d0a255cSGarrett D'Amore 		/*
4203d0a255cSGarrett D'Amore 		 * TCP_KEEPIDLE expects value in seconds, but
4213d0a255cSGarrett D'Amore 		 * tcp_ka_interval is in milliseconds.
4223d0a255cSGarrett D'Amore 		 */
4233d0a255cSGarrett D'Amore 		case TCP_KEEPIDLE:
4243d0a255cSGarrett D'Amore 			*i1 = tcp->tcp_ka_interval / 1000;
4253d0a255cSGarrett D'Amore 			return (sizeof (int));
4263d0a255cSGarrett D'Amore 		case TCP_KEEPCNT:
4273d0a255cSGarrett D'Amore 			*i1 = tcp->tcp_ka_cnt;
4283d0a255cSGarrett D'Amore 			return (sizeof (int));
4293d0a255cSGarrett D'Amore 
4303d0a255cSGarrett D'Amore 		/*
4313d0a255cSGarrett D'Amore 		 * TCP_KEEPINTVL expects value in seconds, but
4323d0a255cSGarrett D'Amore 		 * tcp_ka_rinterval is in milliseconds.
4333d0a255cSGarrett D'Amore 		 */
4343d0a255cSGarrett D'Amore 		case TCP_KEEPINTVL:
4353d0a255cSGarrett D'Amore 			*i1 = tcp->tcp_ka_rinterval / 1000;
4363d0a255cSGarrett D'Amore 			return (sizeof (int));
437721fffe3SKacheong Poon 		case TCP_KEEPALIVE_ABORT_THRESHOLD:
438721fffe3SKacheong Poon 			*i1 = tcp->tcp_ka_abort_thres;
439721fffe3SKacheong Poon 			return (sizeof (int));
440*d4994511SCody Peter Mello 		case TCP_CONGESTION: {
441*d4994511SCody Peter Mello 			size_t len = strlcpy((char *)ptr, CC_ALGO(tcp)->name,
442*d4994511SCody Peter Mello 			    CC_ALGO_NAME_MAX);
443*d4994511SCody Peter Mello 			if (len >= CC_ALGO_NAME_MAX)
444*d4994511SCody Peter Mello 				return (-1);
445*d4994511SCody Peter Mello 			return (len + 1);
446*d4994511SCody Peter Mello 		}
447721fffe3SKacheong Poon 		case TCP_CORK:
448721fffe3SKacheong Poon 			*i1 = tcp->tcp_cork;
449721fffe3SKacheong Poon 			return (sizeof (int));
450707e74bcSKacheong Poon 		case TCP_RTO_INITIAL:
451707e74bcSKacheong Poon 			*i1 = tcp->tcp_rto_initial;
452707e74bcSKacheong Poon 			return (sizeof (uint32_t));
453707e74bcSKacheong Poon 		case TCP_RTO_MIN:
454707e74bcSKacheong Poon 			*i1 = tcp->tcp_rto_min;
455707e74bcSKacheong Poon 			return (sizeof (uint32_t));
456707e74bcSKacheong Poon 		case TCP_RTO_MAX:
457707e74bcSKacheong Poon 			*i1 = tcp->tcp_rto_max;
458707e74bcSKacheong Poon 			return (sizeof (uint32_t));
459707e74bcSKacheong Poon 		case TCP_LINGER2:
460707e74bcSKacheong Poon 			*i1 = tcp->tcp_fin_wait_2_flush_interval / SECONDS;
461707e74bcSKacheong Poon 			return (sizeof (int));
462721fffe3SKacheong Poon 		}
463721fffe3SKacheong Poon 		break;
464721fffe3SKacheong Poon 	case IPPROTO_IP:
465721fffe3SKacheong Poon 		if (connp->conn_family != AF_INET)
466721fffe3SKacheong Poon 			return (-1);
467721fffe3SKacheong Poon 		switch (name) {
468721fffe3SKacheong Poon 		case IP_OPTIONS:
469721fffe3SKacheong Poon 		case T_IP_OPTIONS:
470721fffe3SKacheong Poon 			/* Caller ensures enough space */
471721fffe3SKacheong Poon 			return (ip_opt_get_user(connp, ptr));
472721fffe3SKacheong Poon 		default:
473721fffe3SKacheong Poon 			break;
474721fffe3SKacheong Poon 		}
475721fffe3SKacheong Poon 		break;
476721fffe3SKacheong Poon 
477721fffe3SKacheong Poon 	case IPPROTO_IPV6:
478721fffe3SKacheong Poon 		/*
479721fffe3SKacheong Poon 		 * IPPROTO_IPV6 options are only supported for sockets
480721fffe3SKacheong Poon 		 * that are using IPv6 on the wire.
481721fffe3SKacheong Poon 		 */
482721fffe3SKacheong Poon 		if (connp->conn_ipversion != IPV6_VERSION) {
483721fffe3SKacheong Poon 			return (-1);
484721fffe3SKacheong Poon 		}
485721fffe3SKacheong Poon 		switch (name) {
486721fffe3SKacheong Poon 		case IPV6_PATHMTU:
487721fffe3SKacheong Poon 			if (tcp->tcp_state < TCPS_ESTABLISHED)
488721fffe3SKacheong Poon 				return (-1);
489721fffe3SKacheong Poon 			break;
490721fffe3SKacheong Poon 		}
491721fffe3SKacheong Poon 		break;
492721fffe3SKacheong Poon 	}
493721fffe3SKacheong Poon 	mutex_enter(&connp->conn_lock);
494721fffe3SKacheong Poon 	retval = conn_opt_get(&coas, level, name, ptr);
495721fffe3SKacheong Poon 	mutex_exit(&connp->conn_lock);
496721fffe3SKacheong Poon 	return (retval);
497721fffe3SKacheong Poon }
498721fffe3SKacheong Poon 
499721fffe3SKacheong Poon /*
500721fffe3SKacheong Poon  * We declare as 'int' rather than 'void' to satisfy pfi_t arg requirements.
501721fffe3SKacheong Poon  * Parameters are assumed to be verified by the caller.
502721fffe3SKacheong Poon  */
503721fffe3SKacheong Poon /* ARGSUSED */
504721fffe3SKacheong Poon int
505721fffe3SKacheong Poon tcp_opt_set(conn_t *connp, uint_t optset_context, int level, int name,
506721fffe3SKacheong Poon     uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp,
507721fffe3SKacheong Poon     void *thisdg_attrs, cred_t *cr)
508721fffe3SKacheong Poon {
509721fffe3SKacheong Poon 	tcp_t	*tcp = connp->conn_tcp;
510721fffe3SKacheong Poon 	int	*i1 = (int *)invalp;
511721fffe3SKacheong Poon 	boolean_t onoff = (*i1 == 0) ? 0 : 1;
512721fffe3SKacheong Poon 	boolean_t checkonly;
513721fffe3SKacheong Poon 	int	reterr;
514721fffe3SKacheong Poon 	tcp_stack_t	*tcps = tcp->tcp_tcps;
515721fffe3SKacheong Poon 	conn_opt_arg_t	coas;
516707e74bcSKacheong Poon 	uint32_t	val = *((uint32_t *)invalp);
517721fffe3SKacheong Poon 
518721fffe3SKacheong Poon 	coas.coa_connp = connp;
519721fffe3SKacheong Poon 	coas.coa_ixa = connp->conn_ixa;
520721fffe3SKacheong Poon 	coas.coa_ipp = &connp->conn_xmit_ipp;
521721fffe3SKacheong Poon 	coas.coa_ancillary = B_FALSE;
522721fffe3SKacheong Poon 	coas.coa_changed = 0;
523721fffe3SKacheong Poon 
524721fffe3SKacheong Poon 	switch (optset_context) {
525721fffe3SKacheong Poon 	case SETFN_OPTCOM_CHECKONLY:
526721fffe3SKacheong Poon 		checkonly = B_TRUE;
527721fffe3SKacheong Poon 		/*
528721fffe3SKacheong Poon 		 * Note: Implies T_CHECK semantics for T_OPTCOM_REQ
529721fffe3SKacheong Poon 		 * inlen != 0 implies value supplied and
530721fffe3SKacheong Poon 		 * 	we have to "pretend" to set it.
531721fffe3SKacheong Poon 		 * inlen == 0 implies that there is no
532721fffe3SKacheong Poon 		 * 	value part in T_CHECK request and just validation
533721fffe3SKacheong Poon 		 * done elsewhere should be enough, we just return here.
534721fffe3SKacheong Poon 		 */
535721fffe3SKacheong Poon 		if (inlen == 0) {
536721fffe3SKacheong Poon 			*outlenp = 0;
537721fffe3SKacheong Poon 			return (0);
538721fffe3SKacheong Poon 		}
539721fffe3SKacheong Poon 		break;
540721fffe3SKacheong Poon 	case SETFN_OPTCOM_NEGOTIATE:
541721fffe3SKacheong Poon 		checkonly = B_FALSE;
542721fffe3SKacheong Poon 		break;
543721fffe3SKacheong Poon 	case SETFN_UD_NEGOTIATE: /* error on conn-oriented transports ? */
544721fffe3SKacheong Poon 	case SETFN_CONN_NEGOTIATE:
545721fffe3SKacheong Poon 		checkonly = B_FALSE;
546721fffe3SKacheong Poon 		/*
547721fffe3SKacheong Poon 		 * Negotiating local and "association-related" options
548721fffe3SKacheong Poon 		 * from other (T_CONN_REQ, T_CONN_RES,T_UNITDATA_REQ)
549721fffe3SKacheong Poon 		 * primitives is allowed by XTI, but we choose
550721fffe3SKacheong Poon 		 * to not implement this style negotiation for Internet
551721fffe3SKacheong Poon 		 * protocols (We interpret it is a must for OSI world but
552721fffe3SKacheong Poon 		 * optional for Internet protocols) for all options.
553721fffe3SKacheong Poon 		 * [ Will do only for the few options that enable test
554721fffe3SKacheong Poon 		 * suites that our XTI implementation of this feature
555721fffe3SKacheong Poon 		 * works for transports that do allow it ]
556721fffe3SKacheong Poon 		 */
557721fffe3SKacheong Poon 		if (!tcp_allow_connopt_set(level, name)) {
558721fffe3SKacheong Poon 			*outlenp = 0;
559721fffe3SKacheong Poon 			return (EINVAL);
560721fffe3SKacheong Poon 		}
561721fffe3SKacheong Poon 		break;
562721fffe3SKacheong Poon 	default:
563721fffe3SKacheong Poon 		/*
564721fffe3SKacheong Poon 		 * We should never get here
565721fffe3SKacheong Poon 		 */
566721fffe3SKacheong Poon 		*outlenp = 0;
567721fffe3SKacheong Poon 		return (EINVAL);
568721fffe3SKacheong Poon 	}
569721fffe3SKacheong Poon 
570721fffe3SKacheong Poon 	ASSERT((optset_context != SETFN_OPTCOM_CHECKONLY) ||
571721fffe3SKacheong Poon 	    (optset_context == SETFN_OPTCOM_CHECKONLY && inlen != 0));
572721fffe3SKacheong Poon 
573721fffe3SKacheong Poon 	/*
574721fffe3SKacheong Poon 	 * For TCP, we should have no ancillary data sent down
575721fffe3SKacheong Poon 	 * (sendmsg isn't supported for SOCK_STREAM), so thisdg_attrs
576721fffe3SKacheong Poon 	 * has to be zero.
577721fffe3SKacheong Poon 	 */
578721fffe3SKacheong Poon 	ASSERT(thisdg_attrs == NULL);
579721fffe3SKacheong Poon 
580721fffe3SKacheong Poon 	/*
581721fffe3SKacheong Poon 	 * For fixed length options, no sanity check
582721fffe3SKacheong Poon 	 * of passed in length is done. It is assumed *_optcom_req()
583721fffe3SKacheong Poon 	 * routines do the right thing.
584721fffe3SKacheong Poon 	 */
585721fffe3SKacheong Poon 	switch (level) {
586721fffe3SKacheong Poon 	case SOL_SOCKET:
587721fffe3SKacheong Poon 		switch (name) {
588721fffe3SKacheong Poon 		case SO_KEEPALIVE:
589721fffe3SKacheong Poon 			if (checkonly) {
590721fffe3SKacheong Poon 				/* check only case */
591721fffe3SKacheong Poon 				break;
592721fffe3SKacheong Poon 			}
593721fffe3SKacheong Poon 
594721fffe3SKacheong Poon 			if (!onoff) {
595721fffe3SKacheong Poon 				if (connp->conn_keepalive) {
596721fffe3SKacheong Poon 					if (tcp->tcp_ka_tid != 0) {
597721fffe3SKacheong Poon 						(void) TCP_TIMER_CANCEL(tcp,
598721fffe3SKacheong Poon 						    tcp->tcp_ka_tid);
599721fffe3SKacheong Poon 						tcp->tcp_ka_tid = 0;
600721fffe3SKacheong Poon 					}
601721fffe3SKacheong Poon 					connp->conn_keepalive = 0;
602721fffe3SKacheong Poon 				}
603721fffe3SKacheong Poon 				break;
604721fffe3SKacheong Poon 			}
605721fffe3SKacheong Poon 			if (!connp->conn_keepalive) {
606721fffe3SKacheong Poon 				/* Crank up the keepalive timer */
607721fffe3SKacheong Poon 				tcp->tcp_ka_last_intrvl = 0;
608721fffe3SKacheong Poon 				tcp->tcp_ka_tid = TCP_TIMER(tcp,
60966cd0f60SKacheong Poon 				    tcp_keepalive_timer, tcp->tcp_ka_interval);
610721fffe3SKacheong Poon 				connp->conn_keepalive = 1;
611721fffe3SKacheong Poon 			}
612721fffe3SKacheong Poon 			break;
613721fffe3SKacheong Poon 		case SO_SNDBUF: {
614721fffe3SKacheong Poon 			if (*i1 > tcps->tcps_max_buf) {
615721fffe3SKacheong Poon 				*outlenp = 0;
616721fffe3SKacheong Poon 				return (ENOBUFS);
617721fffe3SKacheong Poon 			}
618721fffe3SKacheong Poon 			if (checkonly)
619721fffe3SKacheong Poon 				break;
620721fffe3SKacheong Poon 
621721fffe3SKacheong Poon 			connp->conn_sndbuf = *i1;
622721fffe3SKacheong Poon 			if (tcps->tcps_snd_lowat_fraction != 0) {
623721fffe3SKacheong Poon 				connp->conn_sndlowat = connp->conn_sndbuf /
624721fffe3SKacheong Poon 				    tcps->tcps_snd_lowat_fraction;
625721fffe3SKacheong Poon 			}
626721fffe3SKacheong Poon 			(void) tcp_maxpsz_set(tcp, B_TRUE);
627721fffe3SKacheong Poon 			/*
628721fffe3SKacheong Poon 			 * If we are flow-controlled, recheck the condition.
629721fffe3SKacheong Poon 			 * There are apps that increase SO_SNDBUF size when
630721fffe3SKacheong Poon 			 * flow-controlled (EWOULDBLOCK), and expect the flow
631721fffe3SKacheong Poon 			 * control condition to be lifted right away.
632721fffe3SKacheong Poon 			 */
633721fffe3SKacheong Poon 			mutex_enter(&tcp->tcp_non_sq_lock);
634721fffe3SKacheong Poon 			if (tcp->tcp_flow_stopped &&
635721fffe3SKacheong Poon 			    TCP_UNSENT_BYTES(tcp) < connp->conn_sndbuf) {
636721fffe3SKacheong Poon 				tcp_clrqfull(tcp);
637721fffe3SKacheong Poon 			}
638721fffe3SKacheong Poon 			mutex_exit(&tcp->tcp_non_sq_lock);
639721fffe3SKacheong Poon 			*outlenp = inlen;
640721fffe3SKacheong Poon 			return (0);
641721fffe3SKacheong Poon 		}
642721fffe3SKacheong Poon 		case SO_RCVBUF:
643721fffe3SKacheong Poon 			if (*i1 > tcps->tcps_max_buf) {
644721fffe3SKacheong Poon 				*outlenp = 0;
645721fffe3SKacheong Poon 				return (ENOBUFS);
646721fffe3SKacheong Poon 			}
647721fffe3SKacheong Poon 			/* Silently ignore zero */
648721fffe3SKacheong Poon 			if (!checkonly && *i1 != 0) {
649721fffe3SKacheong Poon 				*i1 = MSS_ROUNDUP(*i1, tcp->tcp_mss);
650721fffe3SKacheong Poon 				(void) tcp_rwnd_set(tcp, *i1);
651721fffe3SKacheong Poon 			}
652721fffe3SKacheong Poon 			/*
653721fffe3SKacheong Poon 			 * XXX should we return the rwnd here
654721fffe3SKacheong Poon 			 * and tcp_opt_get ?
655721fffe3SKacheong Poon 			 */
656721fffe3SKacheong Poon 			*outlenp = inlen;
657721fffe3SKacheong Poon 			return (0);
658721fffe3SKacheong Poon 		case SO_SND_COPYAVOID:
659721fffe3SKacheong Poon 			if (!checkonly) {
660721fffe3SKacheong Poon 				if (tcp->tcp_loopback ||
661721fffe3SKacheong Poon 				    (onoff != 1) || !tcp_zcopy_check(tcp)) {
662721fffe3SKacheong Poon 					*outlenp = 0;
663721fffe3SKacheong Poon 					return (EOPNOTSUPP);
664721fffe3SKacheong Poon 				}
665721fffe3SKacheong Poon 				tcp->tcp_snd_zcopy_aware = 1;
666721fffe3SKacheong Poon 			}
667721fffe3SKacheong Poon 			*outlenp = inlen;
668721fffe3SKacheong Poon 			return (0);
669721fffe3SKacheong Poon 		}
670721fffe3SKacheong Poon 		break;
671721fffe3SKacheong Poon 	case IPPROTO_TCP:
672721fffe3SKacheong Poon 		switch (name) {
673721fffe3SKacheong Poon 		case TCP_NODELAY:
674721fffe3SKacheong Poon 			if (!checkonly)
675721fffe3SKacheong Poon 				tcp->tcp_naglim = *i1 ? 1 : tcp->tcp_mss;
676721fffe3SKacheong Poon 			break;
677721fffe3SKacheong Poon 		case TCP_NOTIFY_THRESHOLD:
678721fffe3SKacheong Poon 			if (!checkonly)
679721fffe3SKacheong Poon 				tcp->tcp_first_timer_threshold = *i1;
680721fffe3SKacheong Poon 			break;
681721fffe3SKacheong Poon 		case TCP_ABORT_THRESHOLD:
682721fffe3SKacheong Poon 			if (!checkonly)
683721fffe3SKacheong Poon 				tcp->tcp_second_timer_threshold = *i1;
684721fffe3SKacheong Poon 			break;
685721fffe3SKacheong Poon 		case TCP_CONN_NOTIFY_THRESHOLD:
686721fffe3SKacheong Poon 			if (!checkonly)
687721fffe3SKacheong Poon 				tcp->tcp_first_ctimer_threshold = *i1;
688721fffe3SKacheong Poon 			break;
689721fffe3SKacheong Poon 		case TCP_CONN_ABORT_THRESHOLD:
690721fffe3SKacheong Poon 			if (!checkonly)
691721fffe3SKacheong Poon 				tcp->tcp_second_ctimer_threshold = *i1;
692721fffe3SKacheong Poon 			break;
693721fffe3SKacheong Poon 		case TCP_RECVDSTADDR:
694721fffe3SKacheong Poon 			if (tcp->tcp_state > TCPS_LISTEN) {
695721fffe3SKacheong Poon 				*outlenp = 0;
696721fffe3SKacheong Poon 				return (EOPNOTSUPP);
697721fffe3SKacheong Poon 			}
698721fffe3SKacheong Poon 			/* Setting done in conn_opt_set */
699721fffe3SKacheong Poon 			break;
700707e74bcSKacheong Poon 		case TCP_INIT_CWND:
701721fffe3SKacheong Poon 			if (checkonly)
702721fffe3SKacheong Poon 				break;
703721fffe3SKacheong Poon 
704721fffe3SKacheong Poon 			/*
705721fffe3SKacheong Poon 			 * Only allow socket with network configuration
706721fffe3SKacheong Poon 			 * privilege to set the initial cwnd to be larger
707721fffe3SKacheong Poon 			 * than allowed by RFC 3390.
708721fffe3SKacheong Poon 			 */
7099ebe787fSDan McDonald 			if (val > MIN(4, MAX(2, 4380 / tcp->tcp_mss))) {
7109ebe787fSDan McDonald 				if ((reterr = secpolicy_ip_config(cr, B_TRUE))
7119ebe787fSDan McDonald 				    != 0) {
7129ebe787fSDan McDonald 					*outlenp = 0;
7139ebe787fSDan McDonald 					return (reterr);
7149ebe787fSDan McDonald 				}
7159ebe787fSDan McDonald 				if (val > tcp_max_init_cwnd) {
7169ebe787fSDan McDonald 					*outlenp = 0;
7179ebe787fSDan McDonald 					return (EINVAL);
7189ebe787fSDan McDonald 				}
719721fffe3SKacheong Poon 			}
7209ebe787fSDan McDonald 
721707e74bcSKacheong Poon 			tcp->tcp_init_cwnd = val;
7229ebe787fSDan McDonald 
7239ebe787fSDan McDonald 			/*
7249ebe787fSDan McDonald 			 * If the socket is connected, AND no outbound data
7259ebe787fSDan McDonald 			 * has been sent, reset the actual cwnd values.
7269ebe787fSDan McDonald 			 */
7279ebe787fSDan McDonald 			if (tcp->tcp_state == TCPS_ESTABLISHED &&
7289ebe787fSDan McDonald 			    tcp->tcp_iss == tcp->tcp_snxt - 1) {
7299ebe787fSDan McDonald 				tcp->tcp_cwnd =
7309ebe787fSDan McDonald 				    MIN(tcp->tcp_rwnd, val * tcp->tcp_mss);
7319ebe787fSDan McDonald 			}
732721fffe3SKacheong Poon 			break;
7333d0a255cSGarrett D'Amore 
7343d0a255cSGarrett D'Amore 		/*
7353d0a255cSGarrett D'Amore 		 * TCP_KEEPIDLE is in seconds but TCP_KEEPALIVE_THRESHOLD
7363d0a255cSGarrett D'Amore 		 * is in milliseconds. TCP_KEEPIDLE is introduced for
7373d0a255cSGarrett D'Amore 		 * compatibility with other Unix flavors.
7383d0a255cSGarrett D'Amore 		 * We can fall through TCP_KEEPALIVE_THRESHOLD logic after
7393d0a255cSGarrett D'Amore 		 * converting the input to milliseconds.
7403d0a255cSGarrett D'Amore 		 */
7413d0a255cSGarrett D'Amore 		case TCP_KEEPIDLE:
7423d0a255cSGarrett D'Amore 			*i1 *= 1000;
7438ed55813SVineeth Pillai 			/* FALLTHRU */
7443d0a255cSGarrett D'Amore 
745721fffe3SKacheong Poon 		case TCP_KEEPALIVE_THRESHOLD:
746721fffe3SKacheong Poon 			if (checkonly)
747721fffe3SKacheong Poon 				break;
748721fffe3SKacheong Poon 
749721fffe3SKacheong Poon 			if (*i1 < tcps->tcps_keepalive_interval_low ||
750721fffe3SKacheong Poon 			    *i1 > tcps->tcps_keepalive_interval_high) {
751721fffe3SKacheong Poon 				*outlenp = 0;
752721fffe3SKacheong Poon 				return (EINVAL);
753721fffe3SKacheong Poon 			}
754721fffe3SKacheong Poon 			if (*i1 != tcp->tcp_ka_interval) {
755721fffe3SKacheong Poon 				tcp->tcp_ka_interval = *i1;
756721fffe3SKacheong Poon 				/*
757721fffe3SKacheong Poon 				 * Check if we need to restart the
758721fffe3SKacheong Poon 				 * keepalive timer.
759721fffe3SKacheong Poon 				 */
760721fffe3SKacheong Poon 				if (tcp->tcp_ka_tid != 0) {
761721fffe3SKacheong Poon 					ASSERT(connp->conn_keepalive);
762721fffe3SKacheong Poon 					(void) TCP_TIMER_CANCEL(tcp,
763721fffe3SKacheong Poon 					    tcp->tcp_ka_tid);
764721fffe3SKacheong Poon 					tcp->tcp_ka_last_intrvl = 0;
765721fffe3SKacheong Poon 					tcp->tcp_ka_tid = TCP_TIMER(tcp,
766721fffe3SKacheong Poon 					    tcp_keepalive_timer,
76766cd0f60SKacheong Poon 					    tcp->tcp_ka_interval);
768721fffe3SKacheong Poon 				}
769721fffe3SKacheong Poon 			}
770721fffe3SKacheong Poon 			break;
7713d0a255cSGarrett D'Amore 
7723d0a255cSGarrett D'Amore 		/*
7733d0a255cSGarrett D'Amore 		 * tcp_ka_abort_thres = tcp_ka_rinterval * tcp_ka_cnt.
7743d0a255cSGarrett D'Amore 		 * So setting TCP_KEEPCNT or TCP_KEEPINTVL can affect all the
7753d0a255cSGarrett D'Amore 		 * three members - tcp_ka_abort_thres, tcp_ka_rinterval and
7763d0a255cSGarrett D'Amore 		 * tcp_ka_cnt.
7773d0a255cSGarrett D'Amore 		 */
7783d0a255cSGarrett D'Amore 		case TCP_KEEPCNT:
7793d0a255cSGarrett D'Amore 			if (checkonly)
7803d0a255cSGarrett D'Amore 				break;
7813d0a255cSGarrett D'Amore 
7823d0a255cSGarrett D'Amore 			if (*i1 == 0) {
7833d0a255cSGarrett D'Amore 				return (EINVAL);
7843d0a255cSGarrett D'Amore 			} else if (tcp->tcp_ka_rinterval == 0) {
785a41f965aSBryan Cantrill 				/*
786a41f965aSBryan Cantrill 				 * When TCP_KEEPCNT is specified without first
787a41f965aSBryan Cantrill 				 * specifying a TCP_KEEPINTVL, we infer an
788a41f965aSBryan Cantrill 				 * interval based on a tunable specific to our
789a41f965aSBryan Cantrill 				 * stack: the tcp_keepalive_abort_interval.
790a41f965aSBryan Cantrill 				 * (Or the TCP_KEEPALIVE_ABORT_THRESHOLD, in
791a41f965aSBryan Cantrill 				 * the unlikely event that that has been set.)
792a41f965aSBryan Cantrill 				 * Given the abort interval's default value of
793a41f965aSBryan Cantrill 				 * 480 seconds, low TCP_KEEPCNT values can
794a41f965aSBryan Cantrill 				 * result in intervals that exceed the default
795a41f965aSBryan Cantrill 				 * maximum RTO of 60 seconds.  Rather than
796a41f965aSBryan Cantrill 				 * fail in these cases, we (implicitly) clamp
797a41f965aSBryan Cantrill 				 * the interval at the maximum RTO; if the
798a41f965aSBryan Cantrill 				 * TCP_KEEPCNT is shortly followed by a
799a41f965aSBryan Cantrill 				 * TCP_KEEPINTVL (as we expect), the abort
800a41f965aSBryan Cantrill 				 * threshold will be recalculated correctly --
801a41f965aSBryan Cantrill 				 * and if a TCP_KEEPINTVL is not forthcoming,
802a41f965aSBryan Cantrill 				 * keep-alive will at least operate reasonably
803a41f965aSBryan Cantrill 				 * given the underconfigured state.
804a41f965aSBryan Cantrill 				 */
805a41f965aSBryan Cantrill 				uint32_t interval;
806a41f965aSBryan Cantrill 
807a41f965aSBryan Cantrill 				interval = tcp->tcp_ka_abort_thres / *i1;
808a41f965aSBryan Cantrill 
809a41f965aSBryan Cantrill 				if (interval < tcp->tcp_rto_min)
810a41f965aSBryan Cantrill 					interval = tcp->tcp_rto_min;
811a41f965aSBryan Cantrill 
812a41f965aSBryan Cantrill 				if (interval > tcp->tcp_rto_max)
813a41f965aSBryan Cantrill 					interval = tcp->tcp_rto_max;
8143d0a255cSGarrett D'Amore 
815a41f965aSBryan Cantrill 				tcp->tcp_ka_rinterval = interval;
8163d0a255cSGarrett D'Amore 			} else {
8173d0a255cSGarrett D'Amore 				if ((*i1 * tcp->tcp_ka_rinterval) <
8183d0a255cSGarrett D'Amore 				    tcps->tcps_keepalive_abort_interval_low ||
8193d0a255cSGarrett D'Amore 				    (*i1 * tcp->tcp_ka_rinterval) >
8203d0a255cSGarrett D'Amore 				    tcps->tcps_keepalive_abort_interval_high)
8213d0a255cSGarrett D'Amore 					return (EINVAL);
8223d0a255cSGarrett D'Amore 				tcp->tcp_ka_abort_thres =
8233d0a255cSGarrett D'Amore 				    (*i1 * tcp->tcp_ka_rinterval);
8243d0a255cSGarrett D'Amore 			}
8253d0a255cSGarrett D'Amore 			tcp->tcp_ka_cnt = *i1;
8263d0a255cSGarrett D'Amore 			break;
8273d0a255cSGarrett D'Amore 		case TCP_KEEPINTVL:
8283d0a255cSGarrett D'Amore 			/*
8293d0a255cSGarrett D'Amore 			 * TCP_KEEPINTVL is specified in seconds, but
8303d0a255cSGarrett D'Amore 			 * tcp_ka_rinterval is in milliseconds.
8313d0a255cSGarrett D'Amore 			 */
8323d0a255cSGarrett D'Amore 
8333d0a255cSGarrett D'Amore 			if (checkonly)
8343d0a255cSGarrett D'Amore 				break;
8353d0a255cSGarrett D'Amore 
8363d0a255cSGarrett D'Amore 			if ((*i1 * 1000) < tcp->tcp_rto_min ||
8373d0a255cSGarrett D'Amore 			    (*i1 * 1000) > tcp->tcp_rto_max)
8383d0a255cSGarrett D'Amore 				return (EINVAL);
8393d0a255cSGarrett D'Amore 
8403d0a255cSGarrett D'Amore 			if (tcp->tcp_ka_cnt == 0) {
8413d0a255cSGarrett D'Amore 				tcp->tcp_ka_cnt =
8423d0a255cSGarrett D'Amore 				    tcp->tcp_ka_abort_thres / (*i1 * 1000);
8433d0a255cSGarrett D'Amore 			} else {
8443d0a255cSGarrett D'Amore 				if ((*i1 * tcp->tcp_ka_cnt * 1000) <
8453d0a255cSGarrett D'Amore 				    tcps->tcps_keepalive_abort_interval_low ||
8463d0a255cSGarrett D'Amore 				    (*i1 * tcp->tcp_ka_cnt * 1000) >
8473d0a255cSGarrett D'Amore 				    tcps->tcps_keepalive_abort_interval_high)
8483d0a255cSGarrett D'Amore 					return (EINVAL);
8493d0a255cSGarrett D'Amore 				tcp->tcp_ka_abort_thres =
8503d0a255cSGarrett D'Amore 				    (*i1 * tcp->tcp_ka_cnt * 1000);
8513d0a255cSGarrett D'Amore 			}
8523d0a255cSGarrett D'Amore 			tcp->tcp_ka_rinterval = *i1 * 1000;
8533d0a255cSGarrett D'Amore 			break;
854721fffe3SKacheong Poon 		case TCP_KEEPALIVE_ABORT_THRESHOLD:
855721fffe3SKacheong Poon 			if (!checkonly) {
856721fffe3SKacheong Poon 				if (*i1 <
857721fffe3SKacheong Poon 				    tcps->tcps_keepalive_abort_interval_low ||
858721fffe3SKacheong Poon 				    *i1 >
859721fffe3SKacheong Poon 				    tcps->tcps_keepalive_abort_interval_high) {
860721fffe3SKacheong Poon 					*outlenp = 0;
861721fffe3SKacheong Poon 					return (EINVAL);
862721fffe3SKacheong Poon 				}
863721fffe3SKacheong Poon 				tcp->tcp_ka_abort_thres = *i1;
8643d0a255cSGarrett D'Amore 				tcp->tcp_ka_cnt = 0;
8653d0a255cSGarrett D'Amore 				tcp->tcp_ka_rinterval = 0;
866721fffe3SKacheong Poon 			}
867721fffe3SKacheong Poon 			break;
868*d4994511SCody Peter Mello 		case TCP_CONGESTION: {
869*d4994511SCody Peter Mello 			struct cc_algo *algo;
870*d4994511SCody Peter Mello 
871*d4994511SCody Peter Mello 			if (checkonly) {
872*d4994511SCody Peter Mello 				break;
873*d4994511SCody Peter Mello 			}
874*d4994511SCody Peter Mello 
875*d4994511SCody Peter Mello 			/*
876*d4994511SCody Peter Mello 			 * Make sure the string is NUL-terminated. Some
877*d4994511SCody Peter Mello 			 * consumers pass only the number of characters
878*d4994511SCody Peter Mello 			 * in the string, and don't include the NUL
879*d4994511SCody Peter Mello 			 * terminator, so we set it for them.
880*d4994511SCody Peter Mello 			 */
881*d4994511SCody Peter Mello 			if (inlen < CC_ALGO_NAME_MAX) {
882*d4994511SCody Peter Mello 				invalp[inlen] = '\0';
883*d4994511SCody Peter Mello 			}
884*d4994511SCody Peter Mello 			invalp[CC_ALGO_NAME_MAX - 1] = '\0';
885*d4994511SCody Peter Mello 
886*d4994511SCody Peter Mello 			if ((algo = cc_load_algo((char *)invalp)) == NULL) {
887*d4994511SCody Peter Mello 				return (ENOENT);
888*d4994511SCody Peter Mello 			}
889*d4994511SCody Peter Mello 
890*d4994511SCody Peter Mello 			if (CC_ALGO(tcp)->cb_destroy != NULL) {
891*d4994511SCody Peter Mello 				CC_ALGO(tcp)->cb_destroy(&tcp->tcp_ccv);
892*d4994511SCody Peter Mello 			}
893*d4994511SCody Peter Mello 
894*d4994511SCody Peter Mello 			CC_DATA(tcp) = NULL;
895*d4994511SCody Peter Mello 			CC_ALGO(tcp) = algo;
896*d4994511SCody Peter Mello 
897*d4994511SCody Peter Mello 			if (CC_ALGO(tcp)->cb_init != NULL) {
898*d4994511SCody Peter Mello 				VERIFY0(CC_ALGO(tcp)->cb_init(&tcp->tcp_ccv));
899*d4994511SCody Peter Mello 			}
900*d4994511SCody Peter Mello 
901*d4994511SCody Peter Mello 			break;
902*d4994511SCody Peter Mello 		}
903721fffe3SKacheong Poon 		case TCP_CORK:
904721fffe3SKacheong Poon 			if (!checkonly) {
905721fffe3SKacheong Poon 				/*
906721fffe3SKacheong Poon 				 * if tcp->tcp_cork was set and is now
907721fffe3SKacheong Poon 				 * being unset, we have to make sure that
908721fffe3SKacheong Poon 				 * the remaining data gets sent out. Also
909721fffe3SKacheong Poon 				 * unset tcp->tcp_cork so that tcp_wput_data()
910721fffe3SKacheong Poon 				 * can send data even if it is less than mss
911721fffe3SKacheong Poon 				 */
912721fffe3SKacheong Poon 				if (tcp->tcp_cork && onoff == 0 &&
913721fffe3SKacheong Poon 				    tcp->tcp_unsent > 0) {
914721fffe3SKacheong Poon 					tcp->tcp_cork = B_FALSE;
915721fffe3SKacheong Poon 					tcp_wput_data(tcp, NULL, B_FALSE);
916721fffe3SKacheong Poon 				}
917721fffe3SKacheong Poon 				tcp->tcp_cork = onoff;
918721fffe3SKacheong Poon 			}
919721fffe3SKacheong Poon 			break;
920c12492cfSSebastien Roy 		case TCP_RTO_INITIAL:
921707e74bcSKacheong Poon 			if (checkonly || val == 0)
922707e74bcSKacheong Poon 				break;
923707e74bcSKacheong Poon 
924707e74bcSKacheong Poon 			/*
925707e74bcSKacheong Poon 			 * Sanity checks
926707e74bcSKacheong Poon 			 *
927707e74bcSKacheong Poon 			 * The initial RTO should be bounded by the minimum
928707e74bcSKacheong Poon 			 * and maximum RTO.  And it should also be smaller
929707e74bcSKacheong Poon 			 * than the connect attempt abort timeout.  Otherwise,
930707e74bcSKacheong Poon 			 * the connection won't be aborted in a period
931707e74bcSKacheong Poon 			 * reasonably close to that timeout.
932707e74bcSKacheong Poon 			 */
933707e74bcSKacheong Poon 			if (val < tcp->tcp_rto_min || val > tcp->tcp_rto_max ||
934707e74bcSKacheong Poon 			    val > tcp->tcp_second_ctimer_threshold ||
935707e74bcSKacheong Poon 			    val < tcps->tcps_rexmit_interval_initial_low ||
936707e74bcSKacheong Poon 			    val > tcps->tcps_rexmit_interval_initial_high) {
937707e74bcSKacheong Poon 				*outlenp = 0;
938707e74bcSKacheong Poon 				return (EINVAL);
939707e74bcSKacheong Poon 			}
940707e74bcSKacheong Poon 			tcp->tcp_rto_initial = val;
941707e74bcSKacheong Poon 
942707e74bcSKacheong Poon 			/*
943707e74bcSKacheong Poon 			 * If TCP has not sent anything, need to re-calculate
944707e74bcSKacheong Poon 			 * tcp_rto.  Otherwise, this option change does not
945707e74bcSKacheong Poon 			 * really affect anything.
946707e74bcSKacheong Poon 			 */
947707e74bcSKacheong Poon 			if (tcp->tcp_state >= TCPS_SYN_SENT)
948707e74bcSKacheong Poon 				break;
949707e74bcSKacheong Poon 
950c12492cfSSebastien Roy 			tcp->tcp_rtt_sa = MSEC2NSEC(tcp->tcp_rto_initial) << 2;
951c12492cfSSebastien Roy 			tcp->tcp_rtt_sd = MSEC2NSEC(tcp->tcp_rto_initial) >> 1;
952c12492cfSSebastien Roy 			tcp->tcp_rto = tcp_calculate_rto(tcp, tcps,
953c12492cfSSebastien Roy 			    tcps->tcps_conn_grace_period);
954707e74bcSKacheong Poon 			break;
955707e74bcSKacheong Poon 		case TCP_RTO_MIN:
956707e74bcSKacheong Poon 			if (checkonly || val == 0)
957707e74bcSKacheong Poon 				break;
958707e74bcSKacheong Poon 
959707e74bcSKacheong Poon 			if (val < tcps->tcps_rexmit_interval_min_low ||
960707e74bcSKacheong Poon 			    val > tcps->tcps_rexmit_interval_min_high ||
961707e74bcSKacheong Poon 			    val > tcp->tcp_rto_max) {
962707e74bcSKacheong Poon 				*outlenp = 0;
963707e74bcSKacheong Poon 				return (EINVAL);
964707e74bcSKacheong Poon 			}
965707e74bcSKacheong Poon 			tcp->tcp_rto_min = val;
966707e74bcSKacheong Poon 			if (tcp->tcp_rto < val)
967707e74bcSKacheong Poon 				tcp->tcp_rto = val;
968707e74bcSKacheong Poon 			break;
969707e74bcSKacheong Poon 		case TCP_RTO_MAX:
970707e74bcSKacheong Poon 			if (checkonly || val == 0)
971707e74bcSKacheong Poon 				break;
972707e74bcSKacheong Poon 
973707e74bcSKacheong Poon 			/*
974707e74bcSKacheong Poon 			 * Sanity checks
975707e74bcSKacheong Poon 			 *
976707e74bcSKacheong Poon 			 * The maximum RTO should not be larger than the
977707e74bcSKacheong Poon 			 * connection abort timeout.  Otherwise, the
978707e74bcSKacheong Poon 			 * connection won't be aborted in a period reasonably
979707e74bcSKacheong Poon 			 * close to that timeout.
980707e74bcSKacheong Poon 			 */
981707e74bcSKacheong Poon 			if (val < tcps->tcps_rexmit_interval_max_low ||
982707e74bcSKacheong Poon 			    val > tcps->tcps_rexmit_interval_max_high ||
983707e74bcSKacheong Poon 			    val < tcp->tcp_rto_min ||
984707e74bcSKacheong Poon 			    val > tcp->tcp_second_timer_threshold) {
985707e74bcSKacheong Poon 				*outlenp = 0;
986707e74bcSKacheong Poon 				return (EINVAL);
987707e74bcSKacheong Poon 			}
988707e74bcSKacheong Poon 			tcp->tcp_rto_max = val;
989707e74bcSKacheong Poon 			if (tcp->tcp_rto > val)
990707e74bcSKacheong Poon 				tcp->tcp_rto = val;
991707e74bcSKacheong Poon 			break;
992707e74bcSKacheong Poon 		case TCP_LINGER2:
993707e74bcSKacheong Poon 			if (checkonly || *i1 == 0)
994707e74bcSKacheong Poon 				break;
995707e74bcSKacheong Poon 
996707e74bcSKacheong Poon 			/*
997707e74bcSKacheong Poon 			 * Note that the option value's unit is second.  And
998707e74bcSKacheong Poon 			 * the value should be bigger than the private
999707e74bcSKacheong Poon 			 * parameter tcp_fin_wait_2_flush_interval's lower
1000707e74bcSKacheong Poon 			 * bound and smaller than the current value of that
1001707e74bcSKacheong Poon 			 * parameter.  It should be smaller than the current
1002707e74bcSKacheong Poon 			 * value to avoid an app setting TCP_LINGER2 to a big
1003707e74bcSKacheong Poon 			 * value, causing resource to be held up too long in
1004707e74bcSKacheong Poon 			 * FIN-WAIT-2 state.
1005707e74bcSKacheong Poon 			 */
1006707e74bcSKacheong Poon 			if (*i1 < 0 ||
1007707e74bcSKacheong Poon 			    tcps->tcps_fin_wait_2_flush_interval_low/SECONDS >
1008707e74bcSKacheong Poon 			    *i1 ||
1009707e74bcSKacheong Poon 			    tcps->tcps_fin_wait_2_flush_interval/SECONDS <
1010707e74bcSKacheong Poon 			    *i1) {
1011707e74bcSKacheong Poon 				*outlenp = 0;
1012707e74bcSKacheong Poon 				return (EINVAL);
1013707e74bcSKacheong Poon 			}
1014707e74bcSKacheong Poon 			tcp->tcp_fin_wait_2_flush_interval = *i1 * SECONDS;
1015707e74bcSKacheong Poon 			break;
1016721fffe3SKacheong Poon 		default:
1017721fffe3SKacheong Poon 			break;
1018721fffe3SKacheong Poon 		}
1019721fffe3SKacheong Poon 		break;
1020721fffe3SKacheong Poon 	case IPPROTO_IP:
1021721fffe3SKacheong Poon 		if (connp->conn_family != AF_INET) {
1022721fffe3SKacheong Poon 			*outlenp = 0;
1023721fffe3SKacheong Poon 			return (EINVAL);
1024721fffe3SKacheong Poon 		}
1025721fffe3SKacheong Poon 		switch (name) {
1026721fffe3SKacheong Poon 		case IP_SEC_OPT:
1027721fffe3SKacheong Poon 			/*
1028721fffe3SKacheong Poon 			 * We should not allow policy setting after
1029721fffe3SKacheong Poon 			 * we start listening for connections.
1030721fffe3SKacheong Poon 			 */
1031721fffe3SKacheong Poon 			if (tcp->tcp_state == TCPS_LISTEN) {
1032721fffe3SKacheong Poon 				return (EINVAL);
1033721fffe3SKacheong Poon 			}
1034721fffe3SKacheong Poon 			break;
1035721fffe3SKacheong Poon 		}
1036721fffe3SKacheong Poon 		break;
1037721fffe3SKacheong Poon 	case IPPROTO_IPV6:
1038721fffe3SKacheong Poon 		/*
1039721fffe3SKacheong Poon 		 * IPPROTO_IPV6 options are only supported for sockets
1040721fffe3SKacheong Poon 		 * that are using IPv6 on the wire.
1041721fffe3SKacheong Poon 		 */
1042721fffe3SKacheong Poon 		if (connp->conn_ipversion != IPV6_VERSION) {
1043721fffe3SKacheong Poon 			*outlenp = 0;
1044721fffe3SKacheong Poon 			return (EINVAL);
1045721fffe3SKacheong Poon 		}
1046721fffe3SKacheong Poon 
1047721fffe3SKacheong Poon 		switch (name) {
1048721fffe3SKacheong Poon 		case IPV6_RECVPKTINFO:
1049721fffe3SKacheong Poon 			if (!checkonly) {
1050721fffe3SKacheong Poon 				/* Force it to be sent up with the next msg */
1051721fffe3SKacheong Poon 				tcp->tcp_recvifindex = 0;
1052721fffe3SKacheong Poon 			}
1053721fffe3SKacheong Poon 			break;
1054721fffe3SKacheong Poon 		case IPV6_RECVTCLASS:
1055721fffe3SKacheong Poon 			if (!checkonly) {
1056721fffe3SKacheong Poon 				/* Force it to be sent up with the next msg */
1057721fffe3SKacheong Poon 				tcp->tcp_recvtclass = 0xffffffffU;
1058721fffe3SKacheong Poon 			}
1059721fffe3SKacheong Poon 			break;
1060721fffe3SKacheong Poon 		case IPV6_RECVHOPLIMIT:
1061721fffe3SKacheong Poon 			if (!checkonly) {
1062721fffe3SKacheong Poon 				/* Force it to be sent up with the next msg */
1063721fffe3SKacheong Poon 				tcp->tcp_recvhops = 0xffffffffU;
1064721fffe3SKacheong Poon 			}
1065721fffe3SKacheong Poon 			break;
1066721fffe3SKacheong Poon 		case IPV6_PKTINFO:
1067721fffe3SKacheong Poon 			/* This is an extra check for TCP */
1068721fffe3SKacheong Poon 			if (inlen == sizeof (struct in6_pktinfo)) {
1069721fffe3SKacheong Poon 				struct in6_pktinfo *pkti;
1070721fffe3SKacheong Poon 
1071721fffe3SKacheong Poon 				pkti = (struct in6_pktinfo *)invalp;
1072721fffe3SKacheong Poon 				/*
1073721fffe3SKacheong Poon 				 * RFC 3542 states that ipi6_addr must be
1074721fffe3SKacheong Poon 				 * the unspecified address when setting the
1075721fffe3SKacheong Poon 				 * IPV6_PKTINFO sticky socket option on a
1076721fffe3SKacheong Poon 				 * TCP socket.
1077721fffe3SKacheong Poon 				 */
1078721fffe3SKacheong Poon 				if (!IN6_IS_ADDR_UNSPECIFIED(&pkti->ipi6_addr))
1079721fffe3SKacheong Poon 					return (EINVAL);
1080721fffe3SKacheong Poon 			}
1081721fffe3SKacheong Poon 			break;
1082721fffe3SKacheong Poon 		case IPV6_SEC_OPT:
1083721fffe3SKacheong Poon 			/*
1084721fffe3SKacheong Poon 			 * We should not allow policy setting after
1085721fffe3SKacheong Poon 			 * we start listening for connections.
1086721fffe3SKacheong Poon 			 */
1087721fffe3SKacheong Poon 			if (tcp->tcp_state == TCPS_LISTEN) {
1088721fffe3SKacheong Poon 				return (EINVAL);
1089721fffe3SKacheong Poon 			}
1090721fffe3SKacheong Poon 			break;
1091721fffe3SKacheong Poon 		}
1092721fffe3SKacheong Poon 		break;
1093721fffe3SKacheong Poon 	}
1094721fffe3SKacheong Poon 	reterr = conn_opt_set(&coas, level, name, inlen, invalp,
1095721fffe3SKacheong Poon 	    checkonly, cr);
1096721fffe3SKacheong Poon 	if (reterr != 0) {
1097721fffe3SKacheong Poon 		*outlenp = 0;
1098721fffe3SKacheong Poon 		return (reterr);
1099721fffe3SKacheong Poon 	}
1100721fffe3SKacheong Poon 
1101721fffe3SKacheong Poon 	/*
1102721fffe3SKacheong Poon 	 * Common case of OK return with outval same as inval
1103721fffe3SKacheong Poon 	 */
1104721fffe3SKacheong Poon 	if (invalp != outvalp) {
1105721fffe3SKacheong Poon 		/* don't trust bcopy for identical src/dst */
1106721fffe3SKacheong Poon 		(void) bcopy(invalp, outvalp, inlen);
1107721fffe3SKacheong Poon 	}
1108721fffe3SKacheong Poon 	*outlenp = inlen;
1109721fffe3SKacheong Poon 
1110721fffe3SKacheong Poon 	if (coas.coa_changed & COA_HEADER_CHANGED) {
1111721fffe3SKacheong Poon 		/* If we are connected we rebuilt the headers */
1112721fffe3SKacheong Poon 		if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6) &&
1113721fffe3SKacheong Poon 		    !IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_faddr_v6)) {
1114721fffe3SKacheong Poon 			reterr = tcp_build_hdrs(tcp);
1115721fffe3SKacheong Poon 			if (reterr != 0)
1116721fffe3SKacheong Poon 				return (reterr);
1117721fffe3SKacheong Poon 		}
1118721fffe3SKacheong Poon 	}
1119721fffe3SKacheong Poon 	if (coas.coa_changed & COA_ROUTE_CHANGED) {
1120721fffe3SKacheong Poon 		in6_addr_t nexthop;
1121721fffe3SKacheong Poon 
1122721fffe3SKacheong Poon 		/*
1123721fffe3SKacheong Poon 		 * If we are connected we re-cache the information.
1124721fffe3SKacheong Poon 		 * We ignore errors to preserve BSD behavior.
1125721fffe3SKacheong Poon 		 * Note that we don't redo IPsec policy lookup here
1126721fffe3SKacheong Poon 		 * since the final destination (or source) didn't change.
1127721fffe3SKacheong Poon 		 */
1128721fffe3SKacheong Poon 		ip_attr_nexthop(&connp->conn_xmit_ipp, connp->conn_ixa,
1129721fffe3SKacheong Poon 		    &connp->conn_faddr_v6, &nexthop);
1130721fffe3SKacheong Poon 
1131721fffe3SKacheong Poon 		if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6) &&
1132721fffe3SKacheong Poon 		    !IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_faddr_v6)) {
1133721fffe3SKacheong Poon 			(void) ip_attr_connect(connp, connp->conn_ixa,
1134721fffe3SKacheong Poon 			    &connp->conn_laddr_v6, &connp->conn_faddr_v6,
1135721fffe3SKacheong Poon 			    &nexthop, connp->conn_fport, NULL, NULL,
1136721fffe3SKacheong Poon 			    IPDF_VERIFY_DST);
1137721fffe3SKacheong Poon 		}
1138721fffe3SKacheong Poon 	}
1139721fffe3SKacheong Poon 	if ((coas.coa_changed & COA_SNDBUF_CHANGED) && !IPCL_IS_NONSTR(connp)) {
1140721fffe3SKacheong Poon 		connp->conn_wq->q_hiwat = connp->conn_sndbuf;
1141721fffe3SKacheong Poon 	}
1142721fffe3SKacheong Poon 	if (coas.coa_changed & COA_WROFF_CHANGED) {
1143721fffe3SKacheong Poon 		connp->conn_wroff = connp->conn_ht_iphc_allocated +
1144721fffe3SKacheong Poon 		    tcps->tcps_wroff_xtra;
1145721fffe3SKacheong Poon 		(void) proto_set_tx_wroff(connp->conn_rq, connp,
1146721fffe3SKacheong Poon 		    connp->conn_wroff);
1147721fffe3SKacheong Poon 	}
1148721fffe3SKacheong Poon 	if (coas.coa_changed & COA_OOBINLINE_CHANGED) {
1149721fffe3SKacheong Poon 		if (IPCL_IS_NONSTR(connp))
1150721fffe3SKacheong Poon 			proto_set_rx_oob_opt(connp, onoff);
1151721fffe3SKacheong Poon 	}
1152721fffe3SKacheong Poon 	return (0);
1153721fffe3SKacheong Poon }
1154