xref: /illumos-gate/usr/src/uts/common/inet/tcp/tcp_opt_data.c (revision 721fffe35d40e548a5a58dc53a2ec9c6762172d9)
17c478bd9Sstevel@tonic-gate /*
27c478bd9Sstevel@tonic-gate  * CDDL HEADER START
37c478bd9Sstevel@tonic-gate  *
47c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
545916cd2Sjpk  * Common Development and Distribution License (the "License").
645916cd2Sjpk  * You may not use this file except in compliance with the License.
77c478bd9Sstevel@tonic-gate  *
87c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
97c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
107c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
117c478bd9Sstevel@tonic-gate  * and limitations under the License.
127c478bd9Sstevel@tonic-gate  *
137c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
147c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
157c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
167c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
177c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
187c478bd9Sstevel@tonic-gate  *
197c478bd9Sstevel@tonic-gate  * CDDL HEADER END
207c478bd9Sstevel@tonic-gate  */
217c478bd9Sstevel@tonic-gate /*
22*721fffe3SKacheong Poon  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
237c478bd9Sstevel@tonic-gate  * Use is subject to license terms.
247c478bd9Sstevel@tonic-gate  */
257c478bd9Sstevel@tonic-gate 
267c478bd9Sstevel@tonic-gate #include <sys/types.h>
277c478bd9Sstevel@tonic-gate #include <sys/stream.h>
287c478bd9Sstevel@tonic-gate #define	_SUN_TPI_VERSION 2
297c478bd9Sstevel@tonic-gate #include <sys/tihdr.h>
307c478bd9Sstevel@tonic-gate #include <sys/socket.h>
317c478bd9Sstevel@tonic-gate #include <sys/xti_xtiopt.h>
327c478bd9Sstevel@tonic-gate #include <sys/xti_inet.h>
33*721fffe3SKacheong Poon #include <sys/policy.h>
347c478bd9Sstevel@tonic-gate 
357c478bd9Sstevel@tonic-gate #include <inet/common.h>
367c478bd9Sstevel@tonic-gate #include <netinet/ip6.h>
377c478bd9Sstevel@tonic-gate #include <inet/ip.h>
387c478bd9Sstevel@tonic-gate 
397c478bd9Sstevel@tonic-gate #include <netinet/in.h>
407c478bd9Sstevel@tonic-gate #include <netinet/tcp.h>
417c478bd9Sstevel@tonic-gate #include <inet/optcom.h>
42*721fffe3SKacheong Poon #include <inet/proto_set.h>
43bd670b35SErik Nordmark #include <inet/tcp_impl.h>
447c478bd9Sstevel@tonic-gate 
457c478bd9Sstevel@tonic-gate /*
467c478bd9Sstevel@tonic-gate  * Table of all known options handled on a TCP protocol stack.
477c478bd9Sstevel@tonic-gate  *
487c478bd9Sstevel@tonic-gate  * Note: This table contains options processed by both TCP and IP levels
497c478bd9Sstevel@tonic-gate  *       and is the superset of options that can be performed on a TCP over IP
507c478bd9Sstevel@tonic-gate  *       stack.
517c478bd9Sstevel@tonic-gate  */
527c478bd9Sstevel@tonic-gate opdes_t	tcp_opt_arr[] = {
537c478bd9Sstevel@tonic-gate 
54bd670b35SErik Nordmark { SO_LINGER,	SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0,
557c478bd9Sstevel@tonic-gate 	sizeof (struct linger), 0 },
567c478bd9Sstevel@tonic-gate 
57bd670b35SErik Nordmark { SO_DEBUG,	SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
58bd670b35SErik Nordmark { SO_KEEPALIVE,	SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
59bd670b35SErik Nordmark { SO_DONTROUTE,	SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
60bd670b35SErik Nordmark { SO_USELOOPBACK, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0
617c478bd9Sstevel@tonic-gate 	},
62bd670b35SErik Nordmark { SO_BROADCAST,	SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
63bd670b35SErik Nordmark { SO_REUSEADDR, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
64bd670b35SErik Nordmark { SO_OOBINLINE, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
65bd670b35SErik Nordmark { SO_TYPE,	SOL_SOCKET, OA_R, OA_R, OP_NP, 0, sizeof (int), 0 },
66bd670b35SErik Nordmark { SO_SNDBUF,	SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
67bd670b35SErik Nordmark { SO_RCVBUF,	SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
68bd670b35SErik Nordmark { SO_SNDTIMEO,	SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0,
693986c91eSanders 	sizeof (struct timeval), 0 },
70bd670b35SErik Nordmark { SO_RCVTIMEO,	SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0,
713986c91eSanders 	sizeof (struct timeval), 0 },
72bd670b35SErik Nordmark { SO_DGRAM_ERRIND, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0
737c478bd9Sstevel@tonic-gate 	},
747c478bd9Sstevel@tonic-gate { SO_SND_COPYAVOID, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
75bd670b35SErik Nordmark { SO_ANON_MLP, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int),
765d0bc3edSsommerfe 	0 },
77bd670b35SErik Nordmark { SO_MAC_EXEMPT, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int),
785d0bc3edSsommerfe 	0 },
79bd670b35SErik Nordmark { SO_MAC_IMPLICIT, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int),
805d3b8cb7SBill Sommerfeld 	0 },
81bd670b35SErik Nordmark { SO_ALLZONES, SOL_SOCKET, OA_R, OA_RW, OP_CONFIG, 0, sizeof (int),
825d0bc3edSsommerfe 	0 },
83bd670b35SErik Nordmark { SO_EXCLBIND, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
84ae347574Skcpoon 
85bd670b35SErik Nordmark { SO_DOMAIN,	SOL_SOCKET, OA_R, OA_R, OP_NP, 0, sizeof (int), 0 },
8688cda078Skcpoon 
87bd670b35SErik Nordmark { SO_PROTOTYPE,	SOL_SOCKET, OA_R, OA_R, OP_NP, 0, sizeof (int), 0 },
8888cda078Skcpoon 
89bd670b35SErik Nordmark { TCP_NODELAY,	IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0
907c478bd9Sstevel@tonic-gate 	},
91bd670b35SErik Nordmark { TCP_MAXSEG,	IPPROTO_TCP, OA_R, OA_R, OP_NP, 0, sizeof (uint_t),
927c478bd9Sstevel@tonic-gate 	536 },
937c478bd9Sstevel@tonic-gate 
947c478bd9Sstevel@tonic-gate { TCP_NOTIFY_THRESHOLD, IPPROTO_TCP, OA_RW, OA_RW, OP_NP,
95bd670b35SErik Nordmark 	OP_DEF_FN, sizeof (int), -1 /* not initialized */ },
967c478bd9Sstevel@tonic-gate 
977c478bd9Sstevel@tonic-gate { TCP_ABORT_THRESHOLD, IPPROTO_TCP, OA_RW, OA_RW, OP_NP,
98bd670b35SErik Nordmark 	OP_DEF_FN, sizeof (int), -1 /* not initialized */ },
997c478bd9Sstevel@tonic-gate 
1007c478bd9Sstevel@tonic-gate { TCP_CONN_NOTIFY_THRESHOLD, IPPROTO_TCP, OA_RW, OA_RW, OP_NP,
101bd670b35SErik Nordmark 	OP_DEF_FN, sizeof (int), -1 /* not initialized */ },
1027c478bd9Sstevel@tonic-gate 
1037c478bd9Sstevel@tonic-gate { TCP_CONN_ABORT_THRESHOLD, IPPROTO_TCP, OA_RW, OA_RW, OP_NP,
104bd670b35SErik Nordmark 	OP_DEF_FN, sizeof (int), -1 /* not initialized */ },
1057c478bd9Sstevel@tonic-gate 
106bd670b35SErik Nordmark { TCP_RECVDSTADDR, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int),
1077c478bd9Sstevel@tonic-gate 	0 },
1087c478bd9Sstevel@tonic-gate 
109bd670b35SErik Nordmark { TCP_ANONPRIVBIND, IPPROTO_TCP, OA_R, OA_RW, OP_PRIVPORT, 0,
1107c478bd9Sstevel@tonic-gate 	sizeof (int), 0 },
1117c478bd9Sstevel@tonic-gate 
112bd670b35SErik Nordmark { TCP_EXCLBIND, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0
1137c478bd9Sstevel@tonic-gate 	},
1147c478bd9Sstevel@tonic-gate 
115bd670b35SErik Nordmark { TCP_INIT_CWND, IPPROTO_TCP, OA_RW, OA_RW, OP_CONFIG, 0,
1167c478bd9Sstevel@tonic-gate 	sizeof (int), 0 },
1177c478bd9Sstevel@tonic-gate 
118bd670b35SErik Nordmark { TCP_KEEPALIVE_THRESHOLD, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0,
1197c478bd9Sstevel@tonic-gate 	sizeof (int), 0	},
1207c478bd9Sstevel@tonic-gate 
121bd670b35SErik Nordmark { TCP_KEEPALIVE_ABORT_THRESHOLD, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0,
1227c478bd9Sstevel@tonic-gate 	sizeof (int), 0	},
1237c478bd9Sstevel@tonic-gate 
124bd670b35SErik Nordmark { TCP_CORK, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
1257c478bd9Sstevel@tonic-gate 
1267c478bd9Sstevel@tonic-gate { IP_OPTIONS,	IPPROTO_IP, OA_RW, OA_RW, OP_NP,
127bd670b35SErik Nordmark 	(OP_VARLEN|OP_NODEFAULT),
1280f1702c5SYu Xiangning 	IP_MAX_OPT_LENGTH + IP_ADDR_LEN, -1 /* not initialized */ },
1297c478bd9Sstevel@tonic-gate { T_IP_OPTIONS,	IPPROTO_IP, OA_RW, OA_RW, OP_NP,
130bd670b35SErik Nordmark 	(OP_VARLEN|OP_NODEFAULT),
1310f1702c5SYu Xiangning 	IP_MAX_OPT_LENGTH + IP_ADDR_LEN, -1 /* not initialized */ },
1327c478bd9Sstevel@tonic-gate 
133bd670b35SErik Nordmark { IP_TOS,	IPPROTO_IP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
134bd670b35SErik Nordmark { T_IP_TOS,	IPPROTO_IP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
135bd670b35SErik Nordmark { IP_TTL,	IPPROTO_IP, OA_RW, OA_RW, OP_NP, OP_DEF_FN,
1367c478bd9Sstevel@tonic-gate 	sizeof (int), -1 /* not initialized */ },
1377c478bd9Sstevel@tonic-gate 
138bd670b35SErik Nordmark { IP_SEC_OPT, IPPROTO_IP, OA_RW, OA_RW, OP_NP, OP_NODEFAULT,
1397c478bd9Sstevel@tonic-gate 	sizeof (ipsec_req_t), -1 /* not initialized */ },
1407c478bd9Sstevel@tonic-gate 
141bd670b35SErik Nordmark { IP_BOUND_IF, IPPROTO_IP, OA_RW, OA_RW, OP_NP, 0,
1427c478bd9Sstevel@tonic-gate 	sizeof (int),	0 /* no ifindex */ },
1437c478bd9Sstevel@tonic-gate 
144bd670b35SErik Nordmark { IP_UNSPEC_SRC, IPPROTO_IP, OA_R, OA_RW, OP_RAW, 0,
1457c478bd9Sstevel@tonic-gate 	sizeof (int), 0 },
1467c478bd9Sstevel@tonic-gate 
147bd670b35SErik Nordmark { IPV6_UNICAST_HOPS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, OP_DEF_FN,
1487c478bd9Sstevel@tonic-gate 	sizeof (int), -1 /* not initialized */ },
1497c478bd9Sstevel@tonic-gate 
150bd670b35SErik Nordmark { IPV6_BOUND_IF, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
1517c478bd9Sstevel@tonic-gate 	sizeof (int),	0 /* no ifindex */ },
1527c478bd9Sstevel@tonic-gate 
153bd670b35SErik Nordmark { IP_DONTFRAG, IPPROTO_IP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
154bd670b35SErik Nordmark 
155bd670b35SErik Nordmark { IP_NEXTHOP, IPPROTO_IP, OA_R, OA_RW, OP_CONFIG, 0,
15643d18f1cSpriyanka 	sizeof (in_addr_t),	-1 /* not initialized  */ },
15743d18f1cSpriyanka 
158bd670b35SErik Nordmark { IPV6_UNSPEC_SRC, IPPROTO_IPV6, OA_R, OA_RW, OP_RAW, 0,
1597c478bd9Sstevel@tonic-gate 	sizeof (int), 0 },
1607c478bd9Sstevel@tonic-gate 
1617c478bd9Sstevel@tonic-gate { IPV6_PKTINFO, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP,
162bd670b35SErik Nordmark 	(OP_NODEFAULT|OP_VARLEN),
1637c478bd9Sstevel@tonic-gate 	sizeof (struct in6_pktinfo), -1 /* not initialized */ },
1647c478bd9Sstevel@tonic-gate { IPV6_NEXTHOP, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP,
165bd670b35SErik Nordmark 	OP_NODEFAULT,
1667c478bd9Sstevel@tonic-gate 	sizeof (sin6_t), -1 /* not initialized */ },
1677c478bd9Sstevel@tonic-gate { IPV6_HOPOPTS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP,
168bd670b35SErik Nordmark 	(OP_VARLEN|OP_NODEFAULT), 255*8,
1697c478bd9Sstevel@tonic-gate 	-1 /* not initialized */ },
1707c478bd9Sstevel@tonic-gate { IPV6_DSTOPTS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP,
171bd670b35SErik Nordmark 	(OP_VARLEN|OP_NODEFAULT), 255*8,
1727c478bd9Sstevel@tonic-gate 	-1 /* not initialized */ },
1737c478bd9Sstevel@tonic-gate { IPV6_RTHDRDSTOPTS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP,
174bd670b35SErik Nordmark 	(OP_VARLEN|OP_NODEFAULT), 255*8,
1757c478bd9Sstevel@tonic-gate 	-1 /* not initialized */ },
1767c478bd9Sstevel@tonic-gate { IPV6_RTHDR, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP,
177bd670b35SErik Nordmark 	(OP_VARLEN|OP_NODEFAULT), 255*8,
1787c478bd9Sstevel@tonic-gate 	-1 /* not initialized */ },
1797c478bd9Sstevel@tonic-gate { IPV6_TCLASS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP,
180bd670b35SErik Nordmark 	OP_NODEFAULT,
1817c478bd9Sstevel@tonic-gate 	sizeof (int), -1 /* not initialized */ },
1827c478bd9Sstevel@tonic-gate { IPV6_PATHMTU, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP,
183bd670b35SErik Nordmark 	OP_NODEFAULT,
1847c478bd9Sstevel@tonic-gate 	sizeof (struct ip6_mtuinfo), -1 /* not initialized */ },
185bd670b35SErik Nordmark { IPV6_DONTFRAG, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
186bd670b35SErik Nordmark 	sizeof (int), 0 },
187bd670b35SErik Nordmark { IPV6_USE_MIN_MTU, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
1887c478bd9Sstevel@tonic-gate 	sizeof (int), 0 },
189bd670b35SErik Nordmark { IPV6_V6ONLY, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
1907c478bd9Sstevel@tonic-gate 	sizeof (int), 0 },
1917c478bd9Sstevel@tonic-gate 
1927c478bd9Sstevel@tonic-gate /* Enable receipt of ancillary data */
193bd670b35SErik Nordmark { IPV6_RECVPKTINFO, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
1947c478bd9Sstevel@tonic-gate 	sizeof (int), 0 },
195bd670b35SErik Nordmark { IPV6_RECVHOPLIMIT, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
1967c478bd9Sstevel@tonic-gate 	sizeof (int), 0 },
197bd670b35SErik Nordmark { IPV6_RECVHOPOPTS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
1987c478bd9Sstevel@tonic-gate 	sizeof (int), 0 },
199bd670b35SErik Nordmark { _OLD_IPV6_RECVDSTOPTS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
2007c478bd9Sstevel@tonic-gate 	sizeof (int), 0 },
201bd670b35SErik Nordmark { IPV6_RECVDSTOPTS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
2027c478bd9Sstevel@tonic-gate 	sizeof (int), 0 },
203bd670b35SErik Nordmark { IPV6_RECVRTHDR, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
2047c478bd9Sstevel@tonic-gate 	sizeof (int), 0 },
205bd670b35SErik Nordmark { IPV6_RECVRTHDRDSTOPTS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
2067c478bd9Sstevel@tonic-gate 	sizeof (int), 0 },
207bd670b35SErik Nordmark { IPV6_RECVTCLASS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
2087c478bd9Sstevel@tonic-gate 	sizeof (int), 0 },
2097c478bd9Sstevel@tonic-gate 
210bd670b35SErik Nordmark { IPV6_SEC_OPT, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, OP_NODEFAULT,
2117c478bd9Sstevel@tonic-gate 	sizeof (ipsec_req_t), -1 /* not initialized */ },
212bd670b35SErik Nordmark { IPV6_SRC_PREFERENCES, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
2137c478bd9Sstevel@tonic-gate 	sizeof (uint32_t), IPV6_PREFER_SRC_DEFAULT },
2147c478bd9Sstevel@tonic-gate };
2157c478bd9Sstevel@tonic-gate 
2167c478bd9Sstevel@tonic-gate /*
2177c478bd9Sstevel@tonic-gate  * Table of all supported levels
2187c478bd9Sstevel@tonic-gate  * Note: Some levels (e.g. XTI_GENERIC) may be valid but may not have
2197c478bd9Sstevel@tonic-gate  * any supported options so we need this info separately.
2207c478bd9Sstevel@tonic-gate  *
2217c478bd9Sstevel@tonic-gate  * This is needed only for topmost tpi providers and is used only by
2227c478bd9Sstevel@tonic-gate  * XTI interfaces.
2237c478bd9Sstevel@tonic-gate  */
2247c478bd9Sstevel@tonic-gate optlevel_t	tcp_valid_levels_arr[] = {
2257c478bd9Sstevel@tonic-gate 	XTI_GENERIC,
2267c478bd9Sstevel@tonic-gate 	SOL_SOCKET,
2277c478bd9Sstevel@tonic-gate 	IPPROTO_TCP,
2287c478bd9Sstevel@tonic-gate 	IPPROTO_IP,
2297c478bd9Sstevel@tonic-gate 	IPPROTO_IPV6
2307c478bd9Sstevel@tonic-gate };
2317c478bd9Sstevel@tonic-gate 
2327c478bd9Sstevel@tonic-gate 
2337c478bd9Sstevel@tonic-gate #define	TCP_OPT_ARR_CNT		A_CNT(tcp_opt_arr)
2347c478bd9Sstevel@tonic-gate #define	TCP_VALID_LEVELS_CNT	A_CNT(tcp_valid_levels_arr)
2357c478bd9Sstevel@tonic-gate 
2367c478bd9Sstevel@tonic-gate uint_t tcp_max_optsize; /* initialized when TCP driver is loaded */
2377c478bd9Sstevel@tonic-gate 
2387c478bd9Sstevel@tonic-gate /*
2397c478bd9Sstevel@tonic-gate  * Initialize option database object for TCP
2407c478bd9Sstevel@tonic-gate  *
2417c478bd9Sstevel@tonic-gate  * This object represents database of options to search passed to
2427c478bd9Sstevel@tonic-gate  * {sock,tpi}optcom_req() interface routine to take care of option
2437c478bd9Sstevel@tonic-gate  * management and associated methods.
2447c478bd9Sstevel@tonic-gate  */
2457c478bd9Sstevel@tonic-gate 
2467c478bd9Sstevel@tonic-gate optdb_obj_t tcp_opt_obj = {
2477c478bd9Sstevel@tonic-gate 	tcp_opt_default,	/* TCP default value function pointer */
2480f1702c5SYu Xiangning 	tcp_tpi_opt_get,	/* TCP get function pointer */
2490f1702c5SYu Xiangning 	tcp_tpi_opt_set,	/* TCP set function pointer */
2507c478bd9Sstevel@tonic-gate 	TCP_OPT_ARR_CNT,	/* TCP option database count of entries */
2517c478bd9Sstevel@tonic-gate 	tcp_opt_arr,		/* TCP option database */
2527c478bd9Sstevel@tonic-gate 	TCP_VALID_LEVELS_CNT,	/* TCP valid level count of entries */
2537c478bd9Sstevel@tonic-gate 	tcp_valid_levels_arr	/* TCP valid level array */
2547c478bd9Sstevel@tonic-gate };
255*721fffe3SKacheong Poon 
256*721fffe3SKacheong Poon /* Maximum TCP initial cwin (start/restart). */
257*721fffe3SKacheong Poon #define	TCP_MAX_INIT_CWND	16
258*721fffe3SKacheong Poon 
259*721fffe3SKacheong Poon static int tcp_max_init_cwnd = TCP_MAX_INIT_CWND;
260*721fffe3SKacheong Poon 
261*721fffe3SKacheong Poon /*
262*721fffe3SKacheong Poon  * Some TCP options can be "set" by requesting them in the option
263*721fffe3SKacheong Poon  * buffer. This is needed for XTI feature test though we do not
264*721fffe3SKacheong Poon  * allow it in general. We interpret that this mechanism is more
265*721fffe3SKacheong Poon  * applicable to OSI protocols and need not be allowed in general.
266*721fffe3SKacheong Poon  * This routine filters out options for which it is not allowed (most)
267*721fffe3SKacheong Poon  * and lets through those (few) for which it is. [ The XTI interface
268*721fffe3SKacheong Poon  * test suite specifics will imply that any XTI_GENERIC level XTI_* if
269*721fffe3SKacheong Poon  * ever implemented will have to be allowed here ].
270*721fffe3SKacheong Poon  */
271*721fffe3SKacheong Poon static boolean_t
272*721fffe3SKacheong Poon tcp_allow_connopt_set(int level, int name)
273*721fffe3SKacheong Poon {
274*721fffe3SKacheong Poon 
275*721fffe3SKacheong Poon 	switch (level) {
276*721fffe3SKacheong Poon 	case IPPROTO_TCP:
277*721fffe3SKacheong Poon 		switch (name) {
278*721fffe3SKacheong Poon 		case TCP_NODELAY:
279*721fffe3SKacheong Poon 			return (B_TRUE);
280*721fffe3SKacheong Poon 		default:
281*721fffe3SKacheong Poon 			return (B_FALSE);
282*721fffe3SKacheong Poon 		}
283*721fffe3SKacheong Poon 		/*NOTREACHED*/
284*721fffe3SKacheong Poon 	default:
285*721fffe3SKacheong Poon 		return (B_FALSE);
286*721fffe3SKacheong Poon 	}
287*721fffe3SKacheong Poon 	/*NOTREACHED*/
288*721fffe3SKacheong Poon }
289*721fffe3SKacheong Poon 
290*721fffe3SKacheong Poon /*
291*721fffe3SKacheong Poon  * This routine gets default values of certain options whose default
292*721fffe3SKacheong Poon  * values are maintained by protocol specific code
293*721fffe3SKacheong Poon  */
294*721fffe3SKacheong Poon /* ARGSUSED */
295*721fffe3SKacheong Poon int
296*721fffe3SKacheong Poon tcp_opt_default(queue_t *q, int level, int name, uchar_t *ptr)
297*721fffe3SKacheong Poon {
298*721fffe3SKacheong Poon 	int32_t	*i1 = (int32_t *)ptr;
299*721fffe3SKacheong Poon 	tcp_stack_t	*tcps = Q_TO_TCP(q)->tcp_tcps;
300*721fffe3SKacheong Poon 
301*721fffe3SKacheong Poon 	switch (level) {
302*721fffe3SKacheong Poon 	case IPPROTO_TCP:
303*721fffe3SKacheong Poon 		switch (name) {
304*721fffe3SKacheong Poon 		case TCP_NOTIFY_THRESHOLD:
305*721fffe3SKacheong Poon 			*i1 = tcps->tcps_ip_notify_interval;
306*721fffe3SKacheong Poon 			break;
307*721fffe3SKacheong Poon 		case TCP_ABORT_THRESHOLD:
308*721fffe3SKacheong Poon 			*i1 = tcps->tcps_ip_abort_interval;
309*721fffe3SKacheong Poon 			break;
310*721fffe3SKacheong Poon 		case TCP_CONN_NOTIFY_THRESHOLD:
311*721fffe3SKacheong Poon 			*i1 = tcps->tcps_ip_notify_cinterval;
312*721fffe3SKacheong Poon 			break;
313*721fffe3SKacheong Poon 		case TCP_CONN_ABORT_THRESHOLD:
314*721fffe3SKacheong Poon 			*i1 = tcps->tcps_ip_abort_cinterval;
315*721fffe3SKacheong Poon 			break;
316*721fffe3SKacheong Poon 		default:
317*721fffe3SKacheong Poon 			return (-1);
318*721fffe3SKacheong Poon 		}
319*721fffe3SKacheong Poon 		break;
320*721fffe3SKacheong Poon 	case IPPROTO_IP:
321*721fffe3SKacheong Poon 		switch (name) {
322*721fffe3SKacheong Poon 		case IP_TTL:
323*721fffe3SKacheong Poon 			*i1 = tcps->tcps_ipv4_ttl;
324*721fffe3SKacheong Poon 			break;
325*721fffe3SKacheong Poon 		default:
326*721fffe3SKacheong Poon 			return (-1);
327*721fffe3SKacheong Poon 		}
328*721fffe3SKacheong Poon 		break;
329*721fffe3SKacheong Poon 	case IPPROTO_IPV6:
330*721fffe3SKacheong Poon 		switch (name) {
331*721fffe3SKacheong Poon 		case IPV6_UNICAST_HOPS:
332*721fffe3SKacheong Poon 			*i1 = tcps->tcps_ipv6_hoplimit;
333*721fffe3SKacheong Poon 			break;
334*721fffe3SKacheong Poon 		default:
335*721fffe3SKacheong Poon 			return (-1);
336*721fffe3SKacheong Poon 		}
337*721fffe3SKacheong Poon 		break;
338*721fffe3SKacheong Poon 	default:
339*721fffe3SKacheong Poon 		return (-1);
340*721fffe3SKacheong Poon 	}
341*721fffe3SKacheong Poon 	return (sizeof (int));
342*721fffe3SKacheong Poon }
343*721fffe3SKacheong Poon 
344*721fffe3SKacheong Poon /*
345*721fffe3SKacheong Poon  * TCP routine to get the values of options.
346*721fffe3SKacheong Poon  */
347*721fffe3SKacheong Poon int
348*721fffe3SKacheong Poon tcp_opt_get(conn_t *connp, int level, int name, uchar_t *ptr)
349*721fffe3SKacheong Poon {
350*721fffe3SKacheong Poon 	int		*i1 = (int *)ptr;
351*721fffe3SKacheong Poon 	tcp_t		*tcp = connp->conn_tcp;
352*721fffe3SKacheong Poon 	conn_opt_arg_t	coas;
353*721fffe3SKacheong Poon 	int		retval;
354*721fffe3SKacheong Poon 
355*721fffe3SKacheong Poon 	coas.coa_connp = connp;
356*721fffe3SKacheong Poon 	coas.coa_ixa = connp->conn_ixa;
357*721fffe3SKacheong Poon 	coas.coa_ipp = &connp->conn_xmit_ipp;
358*721fffe3SKacheong Poon 	coas.coa_ancillary = B_FALSE;
359*721fffe3SKacheong Poon 	coas.coa_changed = 0;
360*721fffe3SKacheong Poon 
361*721fffe3SKacheong Poon 	switch (level) {
362*721fffe3SKacheong Poon 	case SOL_SOCKET:
363*721fffe3SKacheong Poon 		switch (name) {
364*721fffe3SKacheong Poon 		case SO_SND_COPYAVOID:
365*721fffe3SKacheong Poon 			*i1 = tcp->tcp_snd_zcopy_on ?
366*721fffe3SKacheong Poon 			    SO_SND_COPYAVOID : 0;
367*721fffe3SKacheong Poon 			return (sizeof (int));
368*721fffe3SKacheong Poon 		case SO_ACCEPTCONN:
369*721fffe3SKacheong Poon 			*i1 = (tcp->tcp_state == TCPS_LISTEN);
370*721fffe3SKacheong Poon 			return (sizeof (int));
371*721fffe3SKacheong Poon 		}
372*721fffe3SKacheong Poon 		break;
373*721fffe3SKacheong Poon 	case IPPROTO_TCP:
374*721fffe3SKacheong Poon 		switch (name) {
375*721fffe3SKacheong Poon 		case TCP_NODELAY:
376*721fffe3SKacheong Poon 			*i1 = (tcp->tcp_naglim == 1) ? TCP_NODELAY : 0;
377*721fffe3SKacheong Poon 			return (sizeof (int));
378*721fffe3SKacheong Poon 		case TCP_MAXSEG:
379*721fffe3SKacheong Poon 			*i1 = tcp->tcp_mss;
380*721fffe3SKacheong Poon 			return (sizeof (int));
381*721fffe3SKacheong Poon 		case TCP_NOTIFY_THRESHOLD:
382*721fffe3SKacheong Poon 			*i1 = (int)tcp->tcp_first_timer_threshold;
383*721fffe3SKacheong Poon 			return (sizeof (int));
384*721fffe3SKacheong Poon 		case TCP_ABORT_THRESHOLD:
385*721fffe3SKacheong Poon 			*i1 = tcp->tcp_second_timer_threshold;
386*721fffe3SKacheong Poon 			return (sizeof (int));
387*721fffe3SKacheong Poon 		case TCP_CONN_NOTIFY_THRESHOLD:
388*721fffe3SKacheong Poon 			*i1 = tcp->tcp_first_ctimer_threshold;
389*721fffe3SKacheong Poon 			return (sizeof (int));
390*721fffe3SKacheong Poon 		case TCP_CONN_ABORT_THRESHOLD:
391*721fffe3SKacheong Poon 			*i1 = tcp->tcp_second_ctimer_threshold;
392*721fffe3SKacheong Poon 			return (sizeof (int));
393*721fffe3SKacheong Poon 		case TCP_INIT_CWND:
394*721fffe3SKacheong Poon 			*i1 = tcp->tcp_init_cwnd;
395*721fffe3SKacheong Poon 			return (sizeof (int));
396*721fffe3SKacheong Poon 		case TCP_KEEPALIVE_THRESHOLD:
397*721fffe3SKacheong Poon 			*i1 = tcp->tcp_ka_interval;
398*721fffe3SKacheong Poon 			return (sizeof (int));
399*721fffe3SKacheong Poon 		case TCP_KEEPALIVE_ABORT_THRESHOLD:
400*721fffe3SKacheong Poon 			*i1 = tcp->tcp_ka_abort_thres;
401*721fffe3SKacheong Poon 			return (sizeof (int));
402*721fffe3SKacheong Poon 		case TCP_CORK:
403*721fffe3SKacheong Poon 			*i1 = tcp->tcp_cork;
404*721fffe3SKacheong Poon 			return (sizeof (int));
405*721fffe3SKacheong Poon 		}
406*721fffe3SKacheong Poon 		break;
407*721fffe3SKacheong Poon 	case IPPROTO_IP:
408*721fffe3SKacheong Poon 		if (connp->conn_family != AF_INET)
409*721fffe3SKacheong Poon 			return (-1);
410*721fffe3SKacheong Poon 		switch (name) {
411*721fffe3SKacheong Poon 		case IP_OPTIONS:
412*721fffe3SKacheong Poon 		case T_IP_OPTIONS:
413*721fffe3SKacheong Poon 			/* Caller ensures enough space */
414*721fffe3SKacheong Poon 			return (ip_opt_get_user(connp, ptr));
415*721fffe3SKacheong Poon 		default:
416*721fffe3SKacheong Poon 			break;
417*721fffe3SKacheong Poon 		}
418*721fffe3SKacheong Poon 		break;
419*721fffe3SKacheong Poon 
420*721fffe3SKacheong Poon 	case IPPROTO_IPV6:
421*721fffe3SKacheong Poon 		/*
422*721fffe3SKacheong Poon 		 * IPPROTO_IPV6 options are only supported for sockets
423*721fffe3SKacheong Poon 		 * that are using IPv6 on the wire.
424*721fffe3SKacheong Poon 		 */
425*721fffe3SKacheong Poon 		if (connp->conn_ipversion != IPV6_VERSION) {
426*721fffe3SKacheong Poon 			return (-1);
427*721fffe3SKacheong Poon 		}
428*721fffe3SKacheong Poon 		switch (name) {
429*721fffe3SKacheong Poon 		case IPV6_PATHMTU:
430*721fffe3SKacheong Poon 			if (tcp->tcp_state < TCPS_ESTABLISHED)
431*721fffe3SKacheong Poon 				return (-1);
432*721fffe3SKacheong Poon 			break;
433*721fffe3SKacheong Poon 		}
434*721fffe3SKacheong Poon 		break;
435*721fffe3SKacheong Poon 	}
436*721fffe3SKacheong Poon 	mutex_enter(&connp->conn_lock);
437*721fffe3SKacheong Poon 	retval = conn_opt_get(&coas, level, name, ptr);
438*721fffe3SKacheong Poon 	mutex_exit(&connp->conn_lock);
439*721fffe3SKacheong Poon 	return (retval);
440*721fffe3SKacheong Poon }
441*721fffe3SKacheong Poon 
442*721fffe3SKacheong Poon /*
443*721fffe3SKacheong Poon  * We declare as 'int' rather than 'void' to satisfy pfi_t arg requirements.
444*721fffe3SKacheong Poon  * Parameters are assumed to be verified by the caller.
445*721fffe3SKacheong Poon  */
446*721fffe3SKacheong Poon /* ARGSUSED */
447*721fffe3SKacheong Poon int
448*721fffe3SKacheong Poon tcp_opt_set(conn_t *connp, uint_t optset_context, int level, int name,
449*721fffe3SKacheong Poon     uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp,
450*721fffe3SKacheong Poon     void *thisdg_attrs, cred_t *cr)
451*721fffe3SKacheong Poon {
452*721fffe3SKacheong Poon 	tcp_t	*tcp = connp->conn_tcp;
453*721fffe3SKacheong Poon 	int	*i1 = (int *)invalp;
454*721fffe3SKacheong Poon 	boolean_t onoff = (*i1 == 0) ? 0 : 1;
455*721fffe3SKacheong Poon 	boolean_t checkonly;
456*721fffe3SKacheong Poon 	int	reterr;
457*721fffe3SKacheong Poon 	tcp_stack_t	*tcps = tcp->tcp_tcps;
458*721fffe3SKacheong Poon 	conn_opt_arg_t	coas;
459*721fffe3SKacheong Poon 
460*721fffe3SKacheong Poon 	coas.coa_connp = connp;
461*721fffe3SKacheong Poon 	coas.coa_ixa = connp->conn_ixa;
462*721fffe3SKacheong Poon 	coas.coa_ipp = &connp->conn_xmit_ipp;
463*721fffe3SKacheong Poon 	coas.coa_ancillary = B_FALSE;
464*721fffe3SKacheong Poon 	coas.coa_changed = 0;
465*721fffe3SKacheong Poon 
466*721fffe3SKacheong Poon 	switch (optset_context) {
467*721fffe3SKacheong Poon 	case SETFN_OPTCOM_CHECKONLY:
468*721fffe3SKacheong Poon 		checkonly = B_TRUE;
469*721fffe3SKacheong Poon 		/*
470*721fffe3SKacheong Poon 		 * Note: Implies T_CHECK semantics for T_OPTCOM_REQ
471*721fffe3SKacheong Poon 		 * inlen != 0 implies value supplied and
472*721fffe3SKacheong Poon 		 * 	we have to "pretend" to set it.
473*721fffe3SKacheong Poon 		 * inlen == 0 implies that there is no
474*721fffe3SKacheong Poon 		 * 	value part in T_CHECK request and just validation
475*721fffe3SKacheong Poon 		 * done elsewhere should be enough, we just return here.
476*721fffe3SKacheong Poon 		 */
477*721fffe3SKacheong Poon 		if (inlen == 0) {
478*721fffe3SKacheong Poon 			*outlenp = 0;
479*721fffe3SKacheong Poon 			return (0);
480*721fffe3SKacheong Poon 		}
481*721fffe3SKacheong Poon 		break;
482*721fffe3SKacheong Poon 	case SETFN_OPTCOM_NEGOTIATE:
483*721fffe3SKacheong Poon 		checkonly = B_FALSE;
484*721fffe3SKacheong Poon 		break;
485*721fffe3SKacheong Poon 	case SETFN_UD_NEGOTIATE: /* error on conn-oriented transports ? */
486*721fffe3SKacheong Poon 	case SETFN_CONN_NEGOTIATE:
487*721fffe3SKacheong Poon 		checkonly = B_FALSE;
488*721fffe3SKacheong Poon 		/*
489*721fffe3SKacheong Poon 		 * Negotiating local and "association-related" options
490*721fffe3SKacheong Poon 		 * from other (T_CONN_REQ, T_CONN_RES,T_UNITDATA_REQ)
491*721fffe3SKacheong Poon 		 * primitives is allowed by XTI, but we choose
492*721fffe3SKacheong Poon 		 * to not implement this style negotiation for Internet
493*721fffe3SKacheong Poon 		 * protocols (We interpret it is a must for OSI world but
494*721fffe3SKacheong Poon 		 * optional for Internet protocols) for all options.
495*721fffe3SKacheong Poon 		 * [ Will do only for the few options that enable test
496*721fffe3SKacheong Poon 		 * suites that our XTI implementation of this feature
497*721fffe3SKacheong Poon 		 * works for transports that do allow it ]
498*721fffe3SKacheong Poon 		 */
499*721fffe3SKacheong Poon 		if (!tcp_allow_connopt_set(level, name)) {
500*721fffe3SKacheong Poon 			*outlenp = 0;
501*721fffe3SKacheong Poon 			return (EINVAL);
502*721fffe3SKacheong Poon 		}
503*721fffe3SKacheong Poon 		break;
504*721fffe3SKacheong Poon 	default:
505*721fffe3SKacheong Poon 		/*
506*721fffe3SKacheong Poon 		 * We should never get here
507*721fffe3SKacheong Poon 		 */
508*721fffe3SKacheong Poon 		*outlenp = 0;
509*721fffe3SKacheong Poon 		return (EINVAL);
510*721fffe3SKacheong Poon 	}
511*721fffe3SKacheong Poon 
512*721fffe3SKacheong Poon 	ASSERT((optset_context != SETFN_OPTCOM_CHECKONLY) ||
513*721fffe3SKacheong Poon 	    (optset_context == SETFN_OPTCOM_CHECKONLY && inlen != 0));
514*721fffe3SKacheong Poon 
515*721fffe3SKacheong Poon 	/*
516*721fffe3SKacheong Poon 	 * For TCP, we should have no ancillary data sent down
517*721fffe3SKacheong Poon 	 * (sendmsg isn't supported for SOCK_STREAM), so thisdg_attrs
518*721fffe3SKacheong Poon 	 * has to be zero.
519*721fffe3SKacheong Poon 	 */
520*721fffe3SKacheong Poon 	ASSERT(thisdg_attrs == NULL);
521*721fffe3SKacheong Poon 
522*721fffe3SKacheong Poon 	/*
523*721fffe3SKacheong Poon 	 * For fixed length options, no sanity check
524*721fffe3SKacheong Poon 	 * of passed in length is done. It is assumed *_optcom_req()
525*721fffe3SKacheong Poon 	 * routines do the right thing.
526*721fffe3SKacheong Poon 	 */
527*721fffe3SKacheong Poon 	switch (level) {
528*721fffe3SKacheong Poon 	case SOL_SOCKET:
529*721fffe3SKacheong Poon 		switch (name) {
530*721fffe3SKacheong Poon 		case SO_KEEPALIVE:
531*721fffe3SKacheong Poon 			if (checkonly) {
532*721fffe3SKacheong Poon 				/* check only case */
533*721fffe3SKacheong Poon 				break;
534*721fffe3SKacheong Poon 			}
535*721fffe3SKacheong Poon 
536*721fffe3SKacheong Poon 			if (!onoff) {
537*721fffe3SKacheong Poon 				if (connp->conn_keepalive) {
538*721fffe3SKacheong Poon 					if (tcp->tcp_ka_tid != 0) {
539*721fffe3SKacheong Poon 						(void) TCP_TIMER_CANCEL(tcp,
540*721fffe3SKacheong Poon 						    tcp->tcp_ka_tid);
541*721fffe3SKacheong Poon 						tcp->tcp_ka_tid = 0;
542*721fffe3SKacheong Poon 					}
543*721fffe3SKacheong Poon 					connp->conn_keepalive = 0;
544*721fffe3SKacheong Poon 				}
545*721fffe3SKacheong Poon 				break;
546*721fffe3SKacheong Poon 			}
547*721fffe3SKacheong Poon 			if (!connp->conn_keepalive) {
548*721fffe3SKacheong Poon 				/* Crank up the keepalive timer */
549*721fffe3SKacheong Poon 				tcp->tcp_ka_last_intrvl = 0;
550*721fffe3SKacheong Poon 				tcp->tcp_ka_tid = TCP_TIMER(tcp,
551*721fffe3SKacheong Poon 				    tcp_keepalive_timer,
552*721fffe3SKacheong Poon 				    MSEC_TO_TICK(tcp->tcp_ka_interval));
553*721fffe3SKacheong Poon 				connp->conn_keepalive = 1;
554*721fffe3SKacheong Poon 			}
555*721fffe3SKacheong Poon 			break;
556*721fffe3SKacheong Poon 		case SO_SNDBUF: {
557*721fffe3SKacheong Poon 			if (*i1 > tcps->tcps_max_buf) {
558*721fffe3SKacheong Poon 				*outlenp = 0;
559*721fffe3SKacheong Poon 				return (ENOBUFS);
560*721fffe3SKacheong Poon 			}
561*721fffe3SKacheong Poon 			if (checkonly)
562*721fffe3SKacheong Poon 				break;
563*721fffe3SKacheong Poon 
564*721fffe3SKacheong Poon 			connp->conn_sndbuf = *i1;
565*721fffe3SKacheong Poon 			if (tcps->tcps_snd_lowat_fraction != 0) {
566*721fffe3SKacheong Poon 				connp->conn_sndlowat = connp->conn_sndbuf /
567*721fffe3SKacheong Poon 				    tcps->tcps_snd_lowat_fraction;
568*721fffe3SKacheong Poon 			}
569*721fffe3SKacheong Poon 			(void) tcp_maxpsz_set(tcp, B_TRUE);
570*721fffe3SKacheong Poon 			/*
571*721fffe3SKacheong Poon 			 * If we are flow-controlled, recheck the condition.
572*721fffe3SKacheong Poon 			 * There are apps that increase SO_SNDBUF size when
573*721fffe3SKacheong Poon 			 * flow-controlled (EWOULDBLOCK), and expect the flow
574*721fffe3SKacheong Poon 			 * control condition to be lifted right away.
575*721fffe3SKacheong Poon 			 */
576*721fffe3SKacheong Poon 			mutex_enter(&tcp->tcp_non_sq_lock);
577*721fffe3SKacheong Poon 			if (tcp->tcp_flow_stopped &&
578*721fffe3SKacheong Poon 			    TCP_UNSENT_BYTES(tcp) < connp->conn_sndbuf) {
579*721fffe3SKacheong Poon 				tcp_clrqfull(tcp);
580*721fffe3SKacheong Poon 			}
581*721fffe3SKacheong Poon 			mutex_exit(&tcp->tcp_non_sq_lock);
582*721fffe3SKacheong Poon 			*outlenp = inlen;
583*721fffe3SKacheong Poon 			return (0);
584*721fffe3SKacheong Poon 		}
585*721fffe3SKacheong Poon 		case SO_RCVBUF:
586*721fffe3SKacheong Poon 			if (*i1 > tcps->tcps_max_buf) {
587*721fffe3SKacheong Poon 				*outlenp = 0;
588*721fffe3SKacheong Poon 				return (ENOBUFS);
589*721fffe3SKacheong Poon 			}
590*721fffe3SKacheong Poon 			/* Silently ignore zero */
591*721fffe3SKacheong Poon 			if (!checkonly && *i1 != 0) {
592*721fffe3SKacheong Poon 				*i1 = MSS_ROUNDUP(*i1, tcp->tcp_mss);
593*721fffe3SKacheong Poon 				(void) tcp_rwnd_set(tcp, *i1);
594*721fffe3SKacheong Poon 			}
595*721fffe3SKacheong Poon 			/*
596*721fffe3SKacheong Poon 			 * XXX should we return the rwnd here
597*721fffe3SKacheong Poon 			 * and tcp_opt_get ?
598*721fffe3SKacheong Poon 			 */
599*721fffe3SKacheong Poon 			*outlenp = inlen;
600*721fffe3SKacheong Poon 			return (0);
601*721fffe3SKacheong Poon 		case SO_SND_COPYAVOID:
602*721fffe3SKacheong Poon 			if (!checkonly) {
603*721fffe3SKacheong Poon 				if (tcp->tcp_loopback ||
604*721fffe3SKacheong Poon 				    (tcp->tcp_kssl_ctx != NULL) ||
605*721fffe3SKacheong Poon 				    (onoff != 1) || !tcp_zcopy_check(tcp)) {
606*721fffe3SKacheong Poon 					*outlenp = 0;
607*721fffe3SKacheong Poon 					return (EOPNOTSUPP);
608*721fffe3SKacheong Poon 				}
609*721fffe3SKacheong Poon 				tcp->tcp_snd_zcopy_aware = 1;
610*721fffe3SKacheong Poon 			}
611*721fffe3SKacheong Poon 			*outlenp = inlen;
612*721fffe3SKacheong Poon 			return (0);
613*721fffe3SKacheong Poon 		}
614*721fffe3SKacheong Poon 		break;
615*721fffe3SKacheong Poon 	case IPPROTO_TCP:
616*721fffe3SKacheong Poon 		switch (name) {
617*721fffe3SKacheong Poon 		case TCP_NODELAY:
618*721fffe3SKacheong Poon 			if (!checkonly)
619*721fffe3SKacheong Poon 				tcp->tcp_naglim = *i1 ? 1 : tcp->tcp_mss;
620*721fffe3SKacheong Poon 			break;
621*721fffe3SKacheong Poon 		case TCP_NOTIFY_THRESHOLD:
622*721fffe3SKacheong Poon 			if (!checkonly)
623*721fffe3SKacheong Poon 				tcp->tcp_first_timer_threshold = *i1;
624*721fffe3SKacheong Poon 			break;
625*721fffe3SKacheong Poon 		case TCP_ABORT_THRESHOLD:
626*721fffe3SKacheong Poon 			if (!checkonly)
627*721fffe3SKacheong Poon 				tcp->tcp_second_timer_threshold = *i1;
628*721fffe3SKacheong Poon 			break;
629*721fffe3SKacheong Poon 		case TCP_CONN_NOTIFY_THRESHOLD:
630*721fffe3SKacheong Poon 			if (!checkonly)
631*721fffe3SKacheong Poon 				tcp->tcp_first_ctimer_threshold = *i1;
632*721fffe3SKacheong Poon 			break;
633*721fffe3SKacheong Poon 		case TCP_CONN_ABORT_THRESHOLD:
634*721fffe3SKacheong Poon 			if (!checkonly)
635*721fffe3SKacheong Poon 				tcp->tcp_second_ctimer_threshold = *i1;
636*721fffe3SKacheong Poon 			break;
637*721fffe3SKacheong Poon 		case TCP_RECVDSTADDR:
638*721fffe3SKacheong Poon 			if (tcp->tcp_state > TCPS_LISTEN) {
639*721fffe3SKacheong Poon 				*outlenp = 0;
640*721fffe3SKacheong Poon 				return (EOPNOTSUPP);
641*721fffe3SKacheong Poon 			}
642*721fffe3SKacheong Poon 			/* Setting done in conn_opt_set */
643*721fffe3SKacheong Poon 			break;
644*721fffe3SKacheong Poon 		case TCP_INIT_CWND: {
645*721fffe3SKacheong Poon 			uint32_t init_cwnd = *((uint32_t *)invalp);
646*721fffe3SKacheong Poon 
647*721fffe3SKacheong Poon 			if (checkonly)
648*721fffe3SKacheong Poon 				break;
649*721fffe3SKacheong Poon 
650*721fffe3SKacheong Poon 			/*
651*721fffe3SKacheong Poon 			 * Only allow socket with network configuration
652*721fffe3SKacheong Poon 			 * privilege to set the initial cwnd to be larger
653*721fffe3SKacheong Poon 			 * than allowed by RFC 3390.
654*721fffe3SKacheong Poon 			 */
655*721fffe3SKacheong Poon 			if (init_cwnd <= MIN(4, MAX(2, 4380 / tcp->tcp_mss))) {
656*721fffe3SKacheong Poon 				tcp->tcp_init_cwnd = init_cwnd;
657*721fffe3SKacheong Poon 				break;
658*721fffe3SKacheong Poon 			}
659*721fffe3SKacheong Poon 			if ((reterr = secpolicy_ip_config(cr, B_TRUE)) != 0) {
660*721fffe3SKacheong Poon 				*outlenp = 0;
661*721fffe3SKacheong Poon 				return (reterr);
662*721fffe3SKacheong Poon 			}
663*721fffe3SKacheong Poon 			if (init_cwnd > tcp_max_init_cwnd) {
664*721fffe3SKacheong Poon 				*outlenp = 0;
665*721fffe3SKacheong Poon 				return (EINVAL);
666*721fffe3SKacheong Poon 			}
667*721fffe3SKacheong Poon 			tcp->tcp_init_cwnd = init_cwnd;
668*721fffe3SKacheong Poon 			break;
669*721fffe3SKacheong Poon 		}
670*721fffe3SKacheong Poon 		case TCP_KEEPALIVE_THRESHOLD:
671*721fffe3SKacheong Poon 			if (checkonly)
672*721fffe3SKacheong Poon 				break;
673*721fffe3SKacheong Poon 
674*721fffe3SKacheong Poon 			if (*i1 < tcps->tcps_keepalive_interval_low ||
675*721fffe3SKacheong Poon 			    *i1 > tcps->tcps_keepalive_interval_high) {
676*721fffe3SKacheong Poon 				*outlenp = 0;
677*721fffe3SKacheong Poon 				return (EINVAL);
678*721fffe3SKacheong Poon 			}
679*721fffe3SKacheong Poon 			if (*i1 != tcp->tcp_ka_interval) {
680*721fffe3SKacheong Poon 				tcp->tcp_ka_interval = *i1;
681*721fffe3SKacheong Poon 				/*
682*721fffe3SKacheong Poon 				 * Check if we need to restart the
683*721fffe3SKacheong Poon 				 * keepalive timer.
684*721fffe3SKacheong Poon 				 */
685*721fffe3SKacheong Poon 				if (tcp->tcp_ka_tid != 0) {
686*721fffe3SKacheong Poon 					ASSERT(connp->conn_keepalive);
687*721fffe3SKacheong Poon 					(void) TCP_TIMER_CANCEL(tcp,
688*721fffe3SKacheong Poon 					    tcp->tcp_ka_tid);
689*721fffe3SKacheong Poon 					tcp->tcp_ka_last_intrvl = 0;
690*721fffe3SKacheong Poon 					tcp->tcp_ka_tid = TCP_TIMER(tcp,
691*721fffe3SKacheong Poon 					    tcp_keepalive_timer,
692*721fffe3SKacheong Poon 					    MSEC_TO_TICK(tcp->tcp_ka_interval));
693*721fffe3SKacheong Poon 				}
694*721fffe3SKacheong Poon 			}
695*721fffe3SKacheong Poon 			break;
696*721fffe3SKacheong Poon 		case TCP_KEEPALIVE_ABORT_THRESHOLD:
697*721fffe3SKacheong Poon 			if (!checkonly) {
698*721fffe3SKacheong Poon 				if (*i1 <
699*721fffe3SKacheong Poon 				    tcps->tcps_keepalive_abort_interval_low ||
700*721fffe3SKacheong Poon 				    *i1 >
701*721fffe3SKacheong Poon 				    tcps->tcps_keepalive_abort_interval_high) {
702*721fffe3SKacheong Poon 					*outlenp = 0;
703*721fffe3SKacheong Poon 					return (EINVAL);
704*721fffe3SKacheong Poon 				}
705*721fffe3SKacheong Poon 				tcp->tcp_ka_abort_thres = *i1;
706*721fffe3SKacheong Poon 			}
707*721fffe3SKacheong Poon 			break;
708*721fffe3SKacheong Poon 		case TCP_CORK:
709*721fffe3SKacheong Poon 			if (!checkonly) {
710*721fffe3SKacheong Poon 				/*
711*721fffe3SKacheong Poon 				 * if tcp->tcp_cork was set and is now
712*721fffe3SKacheong Poon 				 * being unset, we have to make sure that
713*721fffe3SKacheong Poon 				 * the remaining data gets sent out. Also
714*721fffe3SKacheong Poon 				 * unset tcp->tcp_cork so that tcp_wput_data()
715*721fffe3SKacheong Poon 				 * can send data even if it is less than mss
716*721fffe3SKacheong Poon 				 */
717*721fffe3SKacheong Poon 				if (tcp->tcp_cork && onoff == 0 &&
718*721fffe3SKacheong Poon 				    tcp->tcp_unsent > 0) {
719*721fffe3SKacheong Poon 					tcp->tcp_cork = B_FALSE;
720*721fffe3SKacheong Poon 					tcp_wput_data(tcp, NULL, B_FALSE);
721*721fffe3SKacheong Poon 				}
722*721fffe3SKacheong Poon 				tcp->tcp_cork = onoff;
723*721fffe3SKacheong Poon 			}
724*721fffe3SKacheong Poon 			break;
725*721fffe3SKacheong Poon 		default:
726*721fffe3SKacheong Poon 			break;
727*721fffe3SKacheong Poon 		}
728*721fffe3SKacheong Poon 		break;
729*721fffe3SKacheong Poon 	case IPPROTO_IP:
730*721fffe3SKacheong Poon 		if (connp->conn_family != AF_INET) {
731*721fffe3SKacheong Poon 			*outlenp = 0;
732*721fffe3SKacheong Poon 			return (EINVAL);
733*721fffe3SKacheong Poon 		}
734*721fffe3SKacheong Poon 		switch (name) {
735*721fffe3SKacheong Poon 		case IP_SEC_OPT:
736*721fffe3SKacheong Poon 			/*
737*721fffe3SKacheong Poon 			 * We should not allow policy setting after
738*721fffe3SKacheong Poon 			 * we start listening for connections.
739*721fffe3SKacheong Poon 			 */
740*721fffe3SKacheong Poon 			if (tcp->tcp_state == TCPS_LISTEN) {
741*721fffe3SKacheong Poon 				return (EINVAL);
742*721fffe3SKacheong Poon 			}
743*721fffe3SKacheong Poon 			break;
744*721fffe3SKacheong Poon 		}
745*721fffe3SKacheong Poon 		break;
746*721fffe3SKacheong Poon 	case IPPROTO_IPV6:
747*721fffe3SKacheong Poon 		/*
748*721fffe3SKacheong Poon 		 * IPPROTO_IPV6 options are only supported for sockets
749*721fffe3SKacheong Poon 		 * that are using IPv6 on the wire.
750*721fffe3SKacheong Poon 		 */
751*721fffe3SKacheong Poon 		if (connp->conn_ipversion != IPV6_VERSION) {
752*721fffe3SKacheong Poon 			*outlenp = 0;
753*721fffe3SKacheong Poon 			return (EINVAL);
754*721fffe3SKacheong Poon 		}
755*721fffe3SKacheong Poon 
756*721fffe3SKacheong Poon 		switch (name) {
757*721fffe3SKacheong Poon 		case IPV6_RECVPKTINFO:
758*721fffe3SKacheong Poon 			if (!checkonly) {
759*721fffe3SKacheong Poon 				/* Force it to be sent up with the next msg */
760*721fffe3SKacheong Poon 				tcp->tcp_recvifindex = 0;
761*721fffe3SKacheong Poon 			}
762*721fffe3SKacheong Poon 			break;
763*721fffe3SKacheong Poon 		case IPV6_RECVTCLASS:
764*721fffe3SKacheong Poon 			if (!checkonly) {
765*721fffe3SKacheong Poon 				/* Force it to be sent up with the next msg */
766*721fffe3SKacheong Poon 				tcp->tcp_recvtclass = 0xffffffffU;
767*721fffe3SKacheong Poon 			}
768*721fffe3SKacheong Poon 			break;
769*721fffe3SKacheong Poon 		case IPV6_RECVHOPLIMIT:
770*721fffe3SKacheong Poon 			if (!checkonly) {
771*721fffe3SKacheong Poon 				/* Force it to be sent up with the next msg */
772*721fffe3SKacheong Poon 				tcp->tcp_recvhops = 0xffffffffU;
773*721fffe3SKacheong Poon 			}
774*721fffe3SKacheong Poon 			break;
775*721fffe3SKacheong Poon 		case IPV6_PKTINFO:
776*721fffe3SKacheong Poon 			/* This is an extra check for TCP */
777*721fffe3SKacheong Poon 			if (inlen == sizeof (struct in6_pktinfo)) {
778*721fffe3SKacheong Poon 				struct in6_pktinfo *pkti;
779*721fffe3SKacheong Poon 
780*721fffe3SKacheong Poon 				pkti = (struct in6_pktinfo *)invalp;
781*721fffe3SKacheong Poon 				/*
782*721fffe3SKacheong Poon 				 * RFC 3542 states that ipi6_addr must be
783*721fffe3SKacheong Poon 				 * the unspecified address when setting the
784*721fffe3SKacheong Poon 				 * IPV6_PKTINFO sticky socket option on a
785*721fffe3SKacheong Poon 				 * TCP socket.
786*721fffe3SKacheong Poon 				 */
787*721fffe3SKacheong Poon 				if (!IN6_IS_ADDR_UNSPECIFIED(&pkti->ipi6_addr))
788*721fffe3SKacheong Poon 					return (EINVAL);
789*721fffe3SKacheong Poon 			}
790*721fffe3SKacheong Poon 			break;
791*721fffe3SKacheong Poon 		case IPV6_SEC_OPT:
792*721fffe3SKacheong Poon 			/*
793*721fffe3SKacheong Poon 			 * We should not allow policy setting after
794*721fffe3SKacheong Poon 			 * we start listening for connections.
795*721fffe3SKacheong Poon 			 */
796*721fffe3SKacheong Poon 			if (tcp->tcp_state == TCPS_LISTEN) {
797*721fffe3SKacheong Poon 				return (EINVAL);
798*721fffe3SKacheong Poon 			}
799*721fffe3SKacheong Poon 			break;
800*721fffe3SKacheong Poon 		}
801*721fffe3SKacheong Poon 		break;
802*721fffe3SKacheong Poon 	}
803*721fffe3SKacheong Poon 	reterr = conn_opt_set(&coas, level, name, inlen, invalp,
804*721fffe3SKacheong Poon 	    checkonly, cr);
805*721fffe3SKacheong Poon 	if (reterr != 0) {
806*721fffe3SKacheong Poon 		*outlenp = 0;
807*721fffe3SKacheong Poon 		return (reterr);
808*721fffe3SKacheong Poon 	}
809*721fffe3SKacheong Poon 
810*721fffe3SKacheong Poon 	/*
811*721fffe3SKacheong Poon 	 * Common case of OK return with outval same as inval
812*721fffe3SKacheong Poon 	 */
813*721fffe3SKacheong Poon 	if (invalp != outvalp) {
814*721fffe3SKacheong Poon 		/* don't trust bcopy for identical src/dst */
815*721fffe3SKacheong Poon 		(void) bcopy(invalp, outvalp, inlen);
816*721fffe3SKacheong Poon 	}
817*721fffe3SKacheong Poon 	*outlenp = inlen;
818*721fffe3SKacheong Poon 
819*721fffe3SKacheong Poon 	if (coas.coa_changed & COA_HEADER_CHANGED) {
820*721fffe3SKacheong Poon 		/* If we are connected we rebuilt the headers */
821*721fffe3SKacheong Poon 		if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6) &&
822*721fffe3SKacheong Poon 		    !IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_faddr_v6)) {
823*721fffe3SKacheong Poon 			reterr = tcp_build_hdrs(tcp);
824*721fffe3SKacheong Poon 			if (reterr != 0)
825*721fffe3SKacheong Poon 				return (reterr);
826*721fffe3SKacheong Poon 		}
827*721fffe3SKacheong Poon 	}
828*721fffe3SKacheong Poon 	if (coas.coa_changed & COA_ROUTE_CHANGED) {
829*721fffe3SKacheong Poon 		in6_addr_t nexthop;
830*721fffe3SKacheong Poon 
831*721fffe3SKacheong Poon 		/*
832*721fffe3SKacheong Poon 		 * If we are connected we re-cache the information.
833*721fffe3SKacheong Poon 		 * We ignore errors to preserve BSD behavior.
834*721fffe3SKacheong Poon 		 * Note that we don't redo IPsec policy lookup here
835*721fffe3SKacheong Poon 		 * since the final destination (or source) didn't change.
836*721fffe3SKacheong Poon 		 */
837*721fffe3SKacheong Poon 		ip_attr_nexthop(&connp->conn_xmit_ipp, connp->conn_ixa,
838*721fffe3SKacheong Poon 		    &connp->conn_faddr_v6, &nexthop);
839*721fffe3SKacheong Poon 
840*721fffe3SKacheong Poon 		if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6) &&
841*721fffe3SKacheong Poon 		    !IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_faddr_v6)) {
842*721fffe3SKacheong Poon 			(void) ip_attr_connect(connp, connp->conn_ixa,
843*721fffe3SKacheong Poon 			    &connp->conn_laddr_v6, &connp->conn_faddr_v6,
844*721fffe3SKacheong Poon 			    &nexthop, connp->conn_fport, NULL, NULL,
845*721fffe3SKacheong Poon 			    IPDF_VERIFY_DST);
846*721fffe3SKacheong Poon 		}
847*721fffe3SKacheong Poon 	}
848*721fffe3SKacheong Poon 	if ((coas.coa_changed & COA_SNDBUF_CHANGED) && !IPCL_IS_NONSTR(connp)) {
849*721fffe3SKacheong Poon 		connp->conn_wq->q_hiwat = connp->conn_sndbuf;
850*721fffe3SKacheong Poon 	}
851*721fffe3SKacheong Poon 	if (coas.coa_changed & COA_WROFF_CHANGED) {
852*721fffe3SKacheong Poon 		connp->conn_wroff = connp->conn_ht_iphc_allocated +
853*721fffe3SKacheong Poon 		    tcps->tcps_wroff_xtra;
854*721fffe3SKacheong Poon 		(void) proto_set_tx_wroff(connp->conn_rq, connp,
855*721fffe3SKacheong Poon 		    connp->conn_wroff);
856*721fffe3SKacheong Poon 	}
857*721fffe3SKacheong Poon 	if (coas.coa_changed & COA_OOBINLINE_CHANGED) {
858*721fffe3SKacheong Poon 		if (IPCL_IS_NONSTR(connp))
859*721fffe3SKacheong Poon 			proto_set_rx_oob_opt(connp, onoff);
860*721fffe3SKacheong Poon 	}
861*721fffe3SKacheong Poon 	return (0);
862*721fffe3SKacheong Poon }
863