xref: /illumos-gate/usr/src/uts/common/inet/tcp/tcp_opt_data.c (revision 66cd0f60c3182913d379abb730ae755bf6367126)
17c478bd9Sstevel@tonic-gate /*
27c478bd9Sstevel@tonic-gate  * CDDL HEADER START
37c478bd9Sstevel@tonic-gate  *
47c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
545916cd2Sjpk  * Common Development and Distribution License (the "License").
645916cd2Sjpk  * You may not use this file except in compliance with the License.
77c478bd9Sstevel@tonic-gate  *
87c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
97c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
107c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
117c478bd9Sstevel@tonic-gate  * and limitations under the License.
127c478bd9Sstevel@tonic-gate  *
137c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
147c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
157c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
167c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
177c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
187c478bd9Sstevel@tonic-gate  *
197c478bd9Sstevel@tonic-gate  * CDDL HEADER END
207c478bd9Sstevel@tonic-gate  */
217c478bd9Sstevel@tonic-gate /*
22*66cd0f60SKacheong Poon  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
237c478bd9Sstevel@tonic-gate  */
247c478bd9Sstevel@tonic-gate 
257c478bd9Sstevel@tonic-gate #include <sys/types.h>
267c478bd9Sstevel@tonic-gate #include <sys/stream.h>
277c478bd9Sstevel@tonic-gate #define	_SUN_TPI_VERSION 2
287c478bd9Sstevel@tonic-gate #include <sys/tihdr.h>
297c478bd9Sstevel@tonic-gate #include <sys/socket.h>
307c478bd9Sstevel@tonic-gate #include <sys/xti_xtiopt.h>
317c478bd9Sstevel@tonic-gate #include <sys/xti_inet.h>
32721fffe3SKacheong Poon #include <sys/policy.h>
337c478bd9Sstevel@tonic-gate 
347c478bd9Sstevel@tonic-gate #include <inet/common.h>
357c478bd9Sstevel@tonic-gate #include <netinet/ip6.h>
367c478bd9Sstevel@tonic-gate #include <inet/ip.h>
377c478bd9Sstevel@tonic-gate 
387c478bd9Sstevel@tonic-gate #include <netinet/in.h>
397c478bd9Sstevel@tonic-gate #include <netinet/tcp.h>
407c478bd9Sstevel@tonic-gate #include <inet/optcom.h>
41721fffe3SKacheong Poon #include <inet/proto_set.h>
42bd670b35SErik Nordmark #include <inet/tcp_impl.h>
437c478bd9Sstevel@tonic-gate 
447c478bd9Sstevel@tonic-gate /*
457c478bd9Sstevel@tonic-gate  * Table of all known options handled on a TCP protocol stack.
467c478bd9Sstevel@tonic-gate  *
477c478bd9Sstevel@tonic-gate  * Note: This table contains options processed by both TCP and IP levels
487c478bd9Sstevel@tonic-gate  *       and is the superset of options that can be performed on a TCP over IP
497c478bd9Sstevel@tonic-gate  *       stack.
507c478bd9Sstevel@tonic-gate  */
517c478bd9Sstevel@tonic-gate opdes_t	tcp_opt_arr[] = {
527c478bd9Sstevel@tonic-gate 
53bd670b35SErik Nordmark { SO_LINGER,	SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0,
547c478bd9Sstevel@tonic-gate 	sizeof (struct linger), 0 },
557c478bd9Sstevel@tonic-gate 
56bd670b35SErik Nordmark { SO_DEBUG,	SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
57bd670b35SErik Nordmark { SO_KEEPALIVE,	SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
58bd670b35SErik Nordmark { SO_DONTROUTE,	SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
59bd670b35SErik Nordmark { SO_USELOOPBACK, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0
607c478bd9Sstevel@tonic-gate 	},
61bd670b35SErik Nordmark { SO_BROADCAST,	SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
62bd670b35SErik Nordmark { SO_REUSEADDR, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
63bd670b35SErik Nordmark { SO_OOBINLINE, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
64bd670b35SErik Nordmark { SO_TYPE,	SOL_SOCKET, OA_R, OA_R, OP_NP, 0, sizeof (int), 0 },
65bd670b35SErik Nordmark { SO_SNDBUF,	SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
66bd670b35SErik Nordmark { SO_RCVBUF,	SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
67bd670b35SErik Nordmark { SO_SNDTIMEO,	SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0,
683986c91eSanders 	sizeof (struct timeval), 0 },
69bd670b35SErik Nordmark { SO_RCVTIMEO,	SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0,
703986c91eSanders 	sizeof (struct timeval), 0 },
71bd670b35SErik Nordmark { SO_DGRAM_ERRIND, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0
727c478bd9Sstevel@tonic-gate 	},
737c478bd9Sstevel@tonic-gate { SO_SND_COPYAVOID, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
74bd670b35SErik Nordmark { SO_ANON_MLP, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int),
755d0bc3edSsommerfe 	0 },
76bd670b35SErik Nordmark { SO_MAC_EXEMPT, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int),
775d0bc3edSsommerfe 	0 },
78bd670b35SErik Nordmark { SO_MAC_IMPLICIT, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int),
795d3b8cb7SBill Sommerfeld 	0 },
80bd670b35SErik Nordmark { SO_ALLZONES, SOL_SOCKET, OA_R, OA_RW, OP_CONFIG, 0, sizeof (int),
815d0bc3edSsommerfe 	0 },
82bd670b35SErik Nordmark { SO_EXCLBIND, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
83ae347574Skcpoon 
84bd670b35SErik Nordmark { SO_DOMAIN,	SOL_SOCKET, OA_R, OA_R, OP_NP, 0, sizeof (int), 0 },
8588cda078Skcpoon 
86bd670b35SErik Nordmark { SO_PROTOTYPE,	SOL_SOCKET, OA_R, OA_R, OP_NP, 0, sizeof (int), 0 },
8788cda078Skcpoon 
88bd670b35SErik Nordmark { TCP_NODELAY,	IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0
897c478bd9Sstevel@tonic-gate 	},
90bd670b35SErik Nordmark { TCP_MAXSEG,	IPPROTO_TCP, OA_R, OA_R, OP_NP, 0, sizeof (uint_t),
917c478bd9Sstevel@tonic-gate 	536 },
927c478bd9Sstevel@tonic-gate 
937c478bd9Sstevel@tonic-gate { TCP_NOTIFY_THRESHOLD, IPPROTO_TCP, OA_RW, OA_RW, OP_NP,
94bd670b35SErik Nordmark 	OP_DEF_FN, sizeof (int), -1 /* not initialized */ },
957c478bd9Sstevel@tonic-gate 
967c478bd9Sstevel@tonic-gate { TCP_ABORT_THRESHOLD, IPPROTO_TCP, OA_RW, OA_RW, OP_NP,
97bd670b35SErik Nordmark 	OP_DEF_FN, sizeof (int), -1 /* not initialized */ },
987c478bd9Sstevel@tonic-gate 
997c478bd9Sstevel@tonic-gate { TCP_CONN_NOTIFY_THRESHOLD, IPPROTO_TCP, OA_RW, OA_RW, OP_NP,
100bd670b35SErik Nordmark 	OP_DEF_FN, sizeof (int), -1 /* not initialized */ },
1017c478bd9Sstevel@tonic-gate 
1027c478bd9Sstevel@tonic-gate { TCP_CONN_ABORT_THRESHOLD, IPPROTO_TCP, OA_RW, OA_RW, OP_NP,
103bd670b35SErik Nordmark 	OP_DEF_FN, sizeof (int), -1 /* not initialized */ },
1047c478bd9Sstevel@tonic-gate 
105bd670b35SErik Nordmark { TCP_RECVDSTADDR, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int),
1067c478bd9Sstevel@tonic-gate 	0 },
1077c478bd9Sstevel@tonic-gate 
108bd670b35SErik Nordmark { TCP_ANONPRIVBIND, IPPROTO_TCP, OA_R, OA_RW, OP_PRIVPORT, 0,
1097c478bd9Sstevel@tonic-gate 	sizeof (int), 0 },
1107c478bd9Sstevel@tonic-gate 
111bd670b35SErik Nordmark { TCP_EXCLBIND, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0
1127c478bd9Sstevel@tonic-gate 	},
1137c478bd9Sstevel@tonic-gate 
114bd670b35SErik Nordmark { TCP_INIT_CWND, IPPROTO_TCP, OA_RW, OA_RW, OP_CONFIG, 0,
1157c478bd9Sstevel@tonic-gate 	sizeof (int), 0 },
1167c478bd9Sstevel@tonic-gate 
117bd670b35SErik Nordmark { TCP_KEEPALIVE_THRESHOLD, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0,
1187c478bd9Sstevel@tonic-gate 	sizeof (int), 0	},
1197c478bd9Sstevel@tonic-gate 
120bd670b35SErik Nordmark { TCP_KEEPALIVE_ABORT_THRESHOLD, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0,
1217c478bd9Sstevel@tonic-gate 	sizeof (int), 0	},
1227c478bd9Sstevel@tonic-gate 
123bd670b35SErik Nordmark { TCP_CORK, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
1247c478bd9Sstevel@tonic-gate 
1257c478bd9Sstevel@tonic-gate { IP_OPTIONS,	IPPROTO_IP, OA_RW, OA_RW, OP_NP,
126bd670b35SErik Nordmark 	(OP_VARLEN|OP_NODEFAULT),
1270f1702c5SYu Xiangning 	IP_MAX_OPT_LENGTH + IP_ADDR_LEN, -1 /* not initialized */ },
1287c478bd9Sstevel@tonic-gate { T_IP_OPTIONS,	IPPROTO_IP, OA_RW, OA_RW, OP_NP,
129bd670b35SErik Nordmark 	(OP_VARLEN|OP_NODEFAULT),
1300f1702c5SYu Xiangning 	IP_MAX_OPT_LENGTH + IP_ADDR_LEN, -1 /* not initialized */ },
1317c478bd9Sstevel@tonic-gate 
132bd670b35SErik Nordmark { IP_TOS,	IPPROTO_IP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
133bd670b35SErik Nordmark { T_IP_TOS,	IPPROTO_IP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
134bd670b35SErik Nordmark { IP_TTL,	IPPROTO_IP, OA_RW, OA_RW, OP_NP, OP_DEF_FN,
1357c478bd9Sstevel@tonic-gate 	sizeof (int), -1 /* not initialized */ },
1367c478bd9Sstevel@tonic-gate 
137bd670b35SErik Nordmark { IP_SEC_OPT, IPPROTO_IP, OA_RW, OA_RW, OP_NP, OP_NODEFAULT,
1387c478bd9Sstevel@tonic-gate 	sizeof (ipsec_req_t), -1 /* not initialized */ },
1397c478bd9Sstevel@tonic-gate 
140bd670b35SErik Nordmark { IP_BOUND_IF, IPPROTO_IP, OA_RW, OA_RW, OP_NP, 0,
1417c478bd9Sstevel@tonic-gate 	sizeof (int),	0 /* no ifindex */ },
1427c478bd9Sstevel@tonic-gate 
143bd670b35SErik Nordmark { IP_UNSPEC_SRC, IPPROTO_IP, OA_R, OA_RW, OP_RAW, 0,
1447c478bd9Sstevel@tonic-gate 	sizeof (int), 0 },
1457c478bd9Sstevel@tonic-gate 
146bd670b35SErik Nordmark { IPV6_UNICAST_HOPS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, OP_DEF_FN,
1477c478bd9Sstevel@tonic-gate 	sizeof (int), -1 /* not initialized */ },
1487c478bd9Sstevel@tonic-gate 
149bd670b35SErik Nordmark { IPV6_BOUND_IF, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
1507c478bd9Sstevel@tonic-gate 	sizeof (int),	0 /* no ifindex */ },
1517c478bd9Sstevel@tonic-gate 
152bd670b35SErik Nordmark { IP_DONTFRAG, IPPROTO_IP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
153bd670b35SErik Nordmark 
154bd670b35SErik Nordmark { IP_NEXTHOP, IPPROTO_IP, OA_R, OA_RW, OP_CONFIG, 0,
15543d18f1cSpriyanka 	sizeof (in_addr_t),	-1 /* not initialized  */ },
15643d18f1cSpriyanka 
157bd670b35SErik Nordmark { IPV6_UNSPEC_SRC, IPPROTO_IPV6, OA_R, OA_RW, OP_RAW, 0,
1587c478bd9Sstevel@tonic-gate 	sizeof (int), 0 },
1597c478bd9Sstevel@tonic-gate 
1607c478bd9Sstevel@tonic-gate { IPV6_PKTINFO, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP,
161bd670b35SErik Nordmark 	(OP_NODEFAULT|OP_VARLEN),
1627c478bd9Sstevel@tonic-gate 	sizeof (struct in6_pktinfo), -1 /* not initialized */ },
1637c478bd9Sstevel@tonic-gate { IPV6_NEXTHOP, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP,
164bd670b35SErik Nordmark 	OP_NODEFAULT,
1657c478bd9Sstevel@tonic-gate 	sizeof (sin6_t), -1 /* not initialized */ },
1667c478bd9Sstevel@tonic-gate { IPV6_HOPOPTS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP,
167bd670b35SErik Nordmark 	(OP_VARLEN|OP_NODEFAULT), 255*8,
1687c478bd9Sstevel@tonic-gate 	-1 /* not initialized */ },
1697c478bd9Sstevel@tonic-gate { IPV6_DSTOPTS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP,
170bd670b35SErik Nordmark 	(OP_VARLEN|OP_NODEFAULT), 255*8,
1717c478bd9Sstevel@tonic-gate 	-1 /* not initialized */ },
1727c478bd9Sstevel@tonic-gate { IPV6_RTHDRDSTOPTS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP,
173bd670b35SErik Nordmark 	(OP_VARLEN|OP_NODEFAULT), 255*8,
1747c478bd9Sstevel@tonic-gate 	-1 /* not initialized */ },
1757c478bd9Sstevel@tonic-gate { IPV6_RTHDR, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP,
176bd670b35SErik Nordmark 	(OP_VARLEN|OP_NODEFAULT), 255*8,
1777c478bd9Sstevel@tonic-gate 	-1 /* not initialized */ },
1787c478bd9Sstevel@tonic-gate { IPV6_TCLASS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP,
179bd670b35SErik Nordmark 	OP_NODEFAULT,
1807c478bd9Sstevel@tonic-gate 	sizeof (int), -1 /* not initialized */ },
1817c478bd9Sstevel@tonic-gate { IPV6_PATHMTU, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP,
182bd670b35SErik Nordmark 	OP_NODEFAULT,
1837c478bd9Sstevel@tonic-gate 	sizeof (struct ip6_mtuinfo), -1 /* not initialized */ },
184bd670b35SErik Nordmark { IPV6_DONTFRAG, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
185bd670b35SErik Nordmark 	sizeof (int), 0 },
186bd670b35SErik Nordmark { IPV6_USE_MIN_MTU, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
1877c478bd9Sstevel@tonic-gate 	sizeof (int), 0 },
188bd670b35SErik Nordmark { IPV6_V6ONLY, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
1897c478bd9Sstevel@tonic-gate 	sizeof (int), 0 },
1907c478bd9Sstevel@tonic-gate 
1917c478bd9Sstevel@tonic-gate /* Enable receipt of ancillary data */
192bd670b35SErik Nordmark { IPV6_RECVPKTINFO, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
1937c478bd9Sstevel@tonic-gate 	sizeof (int), 0 },
194bd670b35SErik Nordmark { IPV6_RECVHOPLIMIT, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
1957c478bd9Sstevel@tonic-gate 	sizeof (int), 0 },
196bd670b35SErik Nordmark { IPV6_RECVHOPOPTS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
1977c478bd9Sstevel@tonic-gate 	sizeof (int), 0 },
198bd670b35SErik Nordmark { _OLD_IPV6_RECVDSTOPTS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
1997c478bd9Sstevel@tonic-gate 	sizeof (int), 0 },
200bd670b35SErik Nordmark { IPV6_RECVDSTOPTS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
2017c478bd9Sstevel@tonic-gate 	sizeof (int), 0 },
202bd670b35SErik Nordmark { IPV6_RECVRTHDR, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
2037c478bd9Sstevel@tonic-gate 	sizeof (int), 0 },
204bd670b35SErik Nordmark { IPV6_RECVRTHDRDSTOPTS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
2057c478bd9Sstevel@tonic-gate 	sizeof (int), 0 },
206bd670b35SErik Nordmark { IPV6_RECVTCLASS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
2077c478bd9Sstevel@tonic-gate 	sizeof (int), 0 },
2087c478bd9Sstevel@tonic-gate 
209bd670b35SErik Nordmark { IPV6_SEC_OPT, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, OP_NODEFAULT,
2107c478bd9Sstevel@tonic-gate 	sizeof (ipsec_req_t), -1 /* not initialized */ },
211bd670b35SErik Nordmark { IPV6_SRC_PREFERENCES, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
2127c478bd9Sstevel@tonic-gate 	sizeof (uint32_t), IPV6_PREFER_SRC_DEFAULT },
2137c478bd9Sstevel@tonic-gate };
2147c478bd9Sstevel@tonic-gate 
2157c478bd9Sstevel@tonic-gate /*
2167c478bd9Sstevel@tonic-gate  * Table of all supported levels
2177c478bd9Sstevel@tonic-gate  * Note: Some levels (e.g. XTI_GENERIC) may be valid but may not have
2187c478bd9Sstevel@tonic-gate  * any supported options so we need this info separately.
2197c478bd9Sstevel@tonic-gate  *
2207c478bd9Sstevel@tonic-gate  * This is needed only for topmost tpi providers and is used only by
2217c478bd9Sstevel@tonic-gate  * XTI interfaces.
2227c478bd9Sstevel@tonic-gate  */
2237c478bd9Sstevel@tonic-gate optlevel_t	tcp_valid_levels_arr[] = {
2247c478bd9Sstevel@tonic-gate 	XTI_GENERIC,
2257c478bd9Sstevel@tonic-gate 	SOL_SOCKET,
2267c478bd9Sstevel@tonic-gate 	IPPROTO_TCP,
2277c478bd9Sstevel@tonic-gate 	IPPROTO_IP,
2287c478bd9Sstevel@tonic-gate 	IPPROTO_IPV6
2297c478bd9Sstevel@tonic-gate };
2307c478bd9Sstevel@tonic-gate 
2317c478bd9Sstevel@tonic-gate 
2327c478bd9Sstevel@tonic-gate #define	TCP_OPT_ARR_CNT		A_CNT(tcp_opt_arr)
2337c478bd9Sstevel@tonic-gate #define	TCP_VALID_LEVELS_CNT	A_CNT(tcp_valid_levels_arr)
2347c478bd9Sstevel@tonic-gate 
2357c478bd9Sstevel@tonic-gate uint_t tcp_max_optsize; /* initialized when TCP driver is loaded */
2367c478bd9Sstevel@tonic-gate 
2377c478bd9Sstevel@tonic-gate /*
2387c478bd9Sstevel@tonic-gate  * Initialize option database object for TCP
2397c478bd9Sstevel@tonic-gate  *
2407c478bd9Sstevel@tonic-gate  * This object represents database of options to search passed to
2417c478bd9Sstevel@tonic-gate  * {sock,tpi}optcom_req() interface routine to take care of option
2427c478bd9Sstevel@tonic-gate  * management and associated methods.
2437c478bd9Sstevel@tonic-gate  */
2447c478bd9Sstevel@tonic-gate 
2457c478bd9Sstevel@tonic-gate optdb_obj_t tcp_opt_obj = {
2467c478bd9Sstevel@tonic-gate 	tcp_opt_default,	/* TCP default value function pointer */
2470f1702c5SYu Xiangning 	tcp_tpi_opt_get,	/* TCP get function pointer */
2480f1702c5SYu Xiangning 	tcp_tpi_opt_set,	/* TCP set function pointer */
2497c478bd9Sstevel@tonic-gate 	TCP_OPT_ARR_CNT,	/* TCP option database count of entries */
2507c478bd9Sstevel@tonic-gate 	tcp_opt_arr,		/* TCP option database */
2517c478bd9Sstevel@tonic-gate 	TCP_VALID_LEVELS_CNT,	/* TCP valid level count of entries */
2527c478bd9Sstevel@tonic-gate 	tcp_valid_levels_arr	/* TCP valid level array */
2537c478bd9Sstevel@tonic-gate };
254721fffe3SKacheong Poon 
255721fffe3SKacheong Poon /* Maximum TCP initial cwin (start/restart). */
256721fffe3SKacheong Poon #define	TCP_MAX_INIT_CWND	16
257721fffe3SKacheong Poon 
258721fffe3SKacheong Poon static int tcp_max_init_cwnd = TCP_MAX_INIT_CWND;
259721fffe3SKacheong Poon 
260721fffe3SKacheong Poon /*
261721fffe3SKacheong Poon  * Some TCP options can be "set" by requesting them in the option
262721fffe3SKacheong Poon  * buffer. This is needed for XTI feature test though we do not
263721fffe3SKacheong Poon  * allow it in general. We interpret that this mechanism is more
264721fffe3SKacheong Poon  * applicable to OSI protocols and need not be allowed in general.
265721fffe3SKacheong Poon  * This routine filters out options for which it is not allowed (most)
266721fffe3SKacheong Poon  * and lets through those (few) for which it is. [ The XTI interface
267721fffe3SKacheong Poon  * test suite specifics will imply that any XTI_GENERIC level XTI_* if
268721fffe3SKacheong Poon  * ever implemented will have to be allowed here ].
269721fffe3SKacheong Poon  */
270721fffe3SKacheong Poon static boolean_t
271721fffe3SKacheong Poon tcp_allow_connopt_set(int level, int name)
272721fffe3SKacheong Poon {
273721fffe3SKacheong Poon 
274721fffe3SKacheong Poon 	switch (level) {
275721fffe3SKacheong Poon 	case IPPROTO_TCP:
276721fffe3SKacheong Poon 		switch (name) {
277721fffe3SKacheong Poon 		case TCP_NODELAY:
278721fffe3SKacheong Poon 			return (B_TRUE);
279721fffe3SKacheong Poon 		default:
280721fffe3SKacheong Poon 			return (B_FALSE);
281721fffe3SKacheong Poon 		}
282721fffe3SKacheong Poon 		/*NOTREACHED*/
283721fffe3SKacheong Poon 	default:
284721fffe3SKacheong Poon 		return (B_FALSE);
285721fffe3SKacheong Poon 	}
286721fffe3SKacheong Poon 	/*NOTREACHED*/
287721fffe3SKacheong Poon }
288721fffe3SKacheong Poon 
289721fffe3SKacheong Poon /*
290721fffe3SKacheong Poon  * This routine gets default values of certain options whose default
291721fffe3SKacheong Poon  * values are maintained by protocol specific code
292721fffe3SKacheong Poon  */
293721fffe3SKacheong Poon /* ARGSUSED */
294721fffe3SKacheong Poon int
295721fffe3SKacheong Poon tcp_opt_default(queue_t *q, int level, int name, uchar_t *ptr)
296721fffe3SKacheong Poon {
297721fffe3SKacheong Poon 	int32_t	*i1 = (int32_t *)ptr;
298721fffe3SKacheong Poon 	tcp_stack_t	*tcps = Q_TO_TCP(q)->tcp_tcps;
299721fffe3SKacheong Poon 
300721fffe3SKacheong Poon 	switch (level) {
301721fffe3SKacheong Poon 	case IPPROTO_TCP:
302721fffe3SKacheong Poon 		switch (name) {
303721fffe3SKacheong Poon 		case TCP_NOTIFY_THRESHOLD:
304721fffe3SKacheong Poon 			*i1 = tcps->tcps_ip_notify_interval;
305721fffe3SKacheong Poon 			break;
306721fffe3SKacheong Poon 		case TCP_ABORT_THRESHOLD:
307721fffe3SKacheong Poon 			*i1 = tcps->tcps_ip_abort_interval;
308721fffe3SKacheong Poon 			break;
309721fffe3SKacheong Poon 		case TCP_CONN_NOTIFY_THRESHOLD:
310721fffe3SKacheong Poon 			*i1 = tcps->tcps_ip_notify_cinterval;
311721fffe3SKacheong Poon 			break;
312721fffe3SKacheong Poon 		case TCP_CONN_ABORT_THRESHOLD:
313721fffe3SKacheong Poon 			*i1 = tcps->tcps_ip_abort_cinterval;
314721fffe3SKacheong Poon 			break;
315721fffe3SKacheong Poon 		default:
316721fffe3SKacheong Poon 			return (-1);
317721fffe3SKacheong Poon 		}
318721fffe3SKacheong Poon 		break;
319721fffe3SKacheong Poon 	case IPPROTO_IP:
320721fffe3SKacheong Poon 		switch (name) {
321721fffe3SKacheong Poon 		case IP_TTL:
322721fffe3SKacheong Poon 			*i1 = tcps->tcps_ipv4_ttl;
323721fffe3SKacheong Poon 			break;
324721fffe3SKacheong Poon 		default:
325721fffe3SKacheong Poon 			return (-1);
326721fffe3SKacheong Poon 		}
327721fffe3SKacheong Poon 		break;
328721fffe3SKacheong Poon 	case IPPROTO_IPV6:
329721fffe3SKacheong Poon 		switch (name) {
330721fffe3SKacheong Poon 		case IPV6_UNICAST_HOPS:
331721fffe3SKacheong Poon 			*i1 = tcps->tcps_ipv6_hoplimit;
332721fffe3SKacheong Poon 			break;
333721fffe3SKacheong Poon 		default:
334721fffe3SKacheong Poon 			return (-1);
335721fffe3SKacheong Poon 		}
336721fffe3SKacheong Poon 		break;
337721fffe3SKacheong Poon 	default:
338721fffe3SKacheong Poon 		return (-1);
339721fffe3SKacheong Poon 	}
340721fffe3SKacheong Poon 	return (sizeof (int));
341721fffe3SKacheong Poon }
342721fffe3SKacheong Poon 
343721fffe3SKacheong Poon /*
344721fffe3SKacheong Poon  * TCP routine to get the values of options.
345721fffe3SKacheong Poon  */
346721fffe3SKacheong Poon int
347721fffe3SKacheong Poon tcp_opt_get(conn_t *connp, int level, int name, uchar_t *ptr)
348721fffe3SKacheong Poon {
349721fffe3SKacheong Poon 	int		*i1 = (int *)ptr;
350721fffe3SKacheong Poon 	tcp_t		*tcp = connp->conn_tcp;
351721fffe3SKacheong Poon 	conn_opt_arg_t	coas;
352721fffe3SKacheong Poon 	int		retval;
353721fffe3SKacheong Poon 
354721fffe3SKacheong Poon 	coas.coa_connp = connp;
355721fffe3SKacheong Poon 	coas.coa_ixa = connp->conn_ixa;
356721fffe3SKacheong Poon 	coas.coa_ipp = &connp->conn_xmit_ipp;
357721fffe3SKacheong Poon 	coas.coa_ancillary = B_FALSE;
358721fffe3SKacheong Poon 	coas.coa_changed = 0;
359721fffe3SKacheong Poon 
360721fffe3SKacheong Poon 	switch (level) {
361721fffe3SKacheong Poon 	case SOL_SOCKET:
362721fffe3SKacheong Poon 		switch (name) {
363721fffe3SKacheong Poon 		case SO_SND_COPYAVOID:
364721fffe3SKacheong Poon 			*i1 = tcp->tcp_snd_zcopy_on ?
365721fffe3SKacheong Poon 			    SO_SND_COPYAVOID : 0;
366721fffe3SKacheong Poon 			return (sizeof (int));
367721fffe3SKacheong Poon 		case SO_ACCEPTCONN:
368721fffe3SKacheong Poon 			*i1 = (tcp->tcp_state == TCPS_LISTEN);
369721fffe3SKacheong Poon 			return (sizeof (int));
370721fffe3SKacheong Poon 		}
371721fffe3SKacheong Poon 		break;
372721fffe3SKacheong Poon 	case IPPROTO_TCP:
373721fffe3SKacheong Poon 		switch (name) {
374721fffe3SKacheong Poon 		case TCP_NODELAY:
375721fffe3SKacheong Poon 			*i1 = (tcp->tcp_naglim == 1) ? TCP_NODELAY : 0;
376721fffe3SKacheong Poon 			return (sizeof (int));
377721fffe3SKacheong Poon 		case TCP_MAXSEG:
378721fffe3SKacheong Poon 			*i1 = tcp->tcp_mss;
379721fffe3SKacheong Poon 			return (sizeof (int));
380721fffe3SKacheong Poon 		case TCP_NOTIFY_THRESHOLD:
381721fffe3SKacheong Poon 			*i1 = (int)tcp->tcp_first_timer_threshold;
382721fffe3SKacheong Poon 			return (sizeof (int));
383721fffe3SKacheong Poon 		case TCP_ABORT_THRESHOLD:
384721fffe3SKacheong Poon 			*i1 = tcp->tcp_second_timer_threshold;
385721fffe3SKacheong Poon 			return (sizeof (int));
386721fffe3SKacheong Poon 		case TCP_CONN_NOTIFY_THRESHOLD:
387721fffe3SKacheong Poon 			*i1 = tcp->tcp_first_ctimer_threshold;
388721fffe3SKacheong Poon 			return (sizeof (int));
389721fffe3SKacheong Poon 		case TCP_CONN_ABORT_THRESHOLD:
390721fffe3SKacheong Poon 			*i1 = tcp->tcp_second_ctimer_threshold;
391721fffe3SKacheong Poon 			return (sizeof (int));
392721fffe3SKacheong Poon 		case TCP_INIT_CWND:
393721fffe3SKacheong Poon 			*i1 = tcp->tcp_init_cwnd;
394721fffe3SKacheong Poon 			return (sizeof (int));
395721fffe3SKacheong Poon 		case TCP_KEEPALIVE_THRESHOLD:
396721fffe3SKacheong Poon 			*i1 = tcp->tcp_ka_interval;
397721fffe3SKacheong Poon 			return (sizeof (int));
398721fffe3SKacheong Poon 		case TCP_KEEPALIVE_ABORT_THRESHOLD:
399721fffe3SKacheong Poon 			*i1 = tcp->tcp_ka_abort_thres;
400721fffe3SKacheong Poon 			return (sizeof (int));
401721fffe3SKacheong Poon 		case TCP_CORK:
402721fffe3SKacheong Poon 			*i1 = tcp->tcp_cork;
403721fffe3SKacheong Poon 			return (sizeof (int));
404721fffe3SKacheong Poon 		}
405721fffe3SKacheong Poon 		break;
406721fffe3SKacheong Poon 	case IPPROTO_IP:
407721fffe3SKacheong Poon 		if (connp->conn_family != AF_INET)
408721fffe3SKacheong Poon 			return (-1);
409721fffe3SKacheong Poon 		switch (name) {
410721fffe3SKacheong Poon 		case IP_OPTIONS:
411721fffe3SKacheong Poon 		case T_IP_OPTIONS:
412721fffe3SKacheong Poon 			/* Caller ensures enough space */
413721fffe3SKacheong Poon 			return (ip_opt_get_user(connp, ptr));
414721fffe3SKacheong Poon 		default:
415721fffe3SKacheong Poon 			break;
416721fffe3SKacheong Poon 		}
417721fffe3SKacheong Poon 		break;
418721fffe3SKacheong Poon 
419721fffe3SKacheong Poon 	case IPPROTO_IPV6:
420721fffe3SKacheong Poon 		/*
421721fffe3SKacheong Poon 		 * IPPROTO_IPV6 options are only supported for sockets
422721fffe3SKacheong Poon 		 * that are using IPv6 on the wire.
423721fffe3SKacheong Poon 		 */
424721fffe3SKacheong Poon 		if (connp->conn_ipversion != IPV6_VERSION) {
425721fffe3SKacheong Poon 			return (-1);
426721fffe3SKacheong Poon 		}
427721fffe3SKacheong Poon 		switch (name) {
428721fffe3SKacheong Poon 		case IPV6_PATHMTU:
429721fffe3SKacheong Poon 			if (tcp->tcp_state < TCPS_ESTABLISHED)
430721fffe3SKacheong Poon 				return (-1);
431721fffe3SKacheong Poon 			break;
432721fffe3SKacheong Poon 		}
433721fffe3SKacheong Poon 		break;
434721fffe3SKacheong Poon 	}
435721fffe3SKacheong Poon 	mutex_enter(&connp->conn_lock);
436721fffe3SKacheong Poon 	retval = conn_opt_get(&coas, level, name, ptr);
437721fffe3SKacheong Poon 	mutex_exit(&connp->conn_lock);
438721fffe3SKacheong Poon 	return (retval);
439721fffe3SKacheong Poon }
440721fffe3SKacheong Poon 
441721fffe3SKacheong Poon /*
442721fffe3SKacheong Poon  * We declare as 'int' rather than 'void' to satisfy pfi_t arg requirements.
443721fffe3SKacheong Poon  * Parameters are assumed to be verified by the caller.
444721fffe3SKacheong Poon  */
445721fffe3SKacheong Poon /* ARGSUSED */
446721fffe3SKacheong Poon int
447721fffe3SKacheong Poon tcp_opt_set(conn_t *connp, uint_t optset_context, int level, int name,
448721fffe3SKacheong Poon     uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp,
449721fffe3SKacheong Poon     void *thisdg_attrs, cred_t *cr)
450721fffe3SKacheong Poon {
451721fffe3SKacheong Poon 	tcp_t	*tcp = connp->conn_tcp;
452721fffe3SKacheong Poon 	int	*i1 = (int *)invalp;
453721fffe3SKacheong Poon 	boolean_t onoff = (*i1 == 0) ? 0 : 1;
454721fffe3SKacheong Poon 	boolean_t checkonly;
455721fffe3SKacheong Poon 	int	reterr;
456721fffe3SKacheong Poon 	tcp_stack_t	*tcps = tcp->tcp_tcps;
457721fffe3SKacheong Poon 	conn_opt_arg_t	coas;
458721fffe3SKacheong Poon 
459721fffe3SKacheong Poon 	coas.coa_connp = connp;
460721fffe3SKacheong Poon 	coas.coa_ixa = connp->conn_ixa;
461721fffe3SKacheong Poon 	coas.coa_ipp = &connp->conn_xmit_ipp;
462721fffe3SKacheong Poon 	coas.coa_ancillary = B_FALSE;
463721fffe3SKacheong Poon 	coas.coa_changed = 0;
464721fffe3SKacheong Poon 
465721fffe3SKacheong Poon 	switch (optset_context) {
466721fffe3SKacheong Poon 	case SETFN_OPTCOM_CHECKONLY:
467721fffe3SKacheong Poon 		checkonly = B_TRUE;
468721fffe3SKacheong Poon 		/*
469721fffe3SKacheong Poon 		 * Note: Implies T_CHECK semantics for T_OPTCOM_REQ
470721fffe3SKacheong Poon 		 * inlen != 0 implies value supplied and
471721fffe3SKacheong Poon 		 * 	we have to "pretend" to set it.
472721fffe3SKacheong Poon 		 * inlen == 0 implies that there is no
473721fffe3SKacheong Poon 		 * 	value part in T_CHECK request and just validation
474721fffe3SKacheong Poon 		 * done elsewhere should be enough, we just return here.
475721fffe3SKacheong Poon 		 */
476721fffe3SKacheong Poon 		if (inlen == 0) {
477721fffe3SKacheong Poon 			*outlenp = 0;
478721fffe3SKacheong Poon 			return (0);
479721fffe3SKacheong Poon 		}
480721fffe3SKacheong Poon 		break;
481721fffe3SKacheong Poon 	case SETFN_OPTCOM_NEGOTIATE:
482721fffe3SKacheong Poon 		checkonly = B_FALSE;
483721fffe3SKacheong Poon 		break;
484721fffe3SKacheong Poon 	case SETFN_UD_NEGOTIATE: /* error on conn-oriented transports ? */
485721fffe3SKacheong Poon 	case SETFN_CONN_NEGOTIATE:
486721fffe3SKacheong Poon 		checkonly = B_FALSE;
487721fffe3SKacheong Poon 		/*
488721fffe3SKacheong Poon 		 * Negotiating local and "association-related" options
489721fffe3SKacheong Poon 		 * from other (T_CONN_REQ, T_CONN_RES,T_UNITDATA_REQ)
490721fffe3SKacheong Poon 		 * primitives is allowed by XTI, but we choose
491721fffe3SKacheong Poon 		 * to not implement this style negotiation for Internet
492721fffe3SKacheong Poon 		 * protocols (We interpret it is a must for OSI world but
493721fffe3SKacheong Poon 		 * optional for Internet protocols) for all options.
494721fffe3SKacheong Poon 		 * [ Will do only for the few options that enable test
495721fffe3SKacheong Poon 		 * suites that our XTI implementation of this feature
496721fffe3SKacheong Poon 		 * works for transports that do allow it ]
497721fffe3SKacheong Poon 		 */
498721fffe3SKacheong Poon 		if (!tcp_allow_connopt_set(level, name)) {
499721fffe3SKacheong Poon 			*outlenp = 0;
500721fffe3SKacheong Poon 			return (EINVAL);
501721fffe3SKacheong Poon 		}
502721fffe3SKacheong Poon 		break;
503721fffe3SKacheong Poon 	default:
504721fffe3SKacheong Poon 		/*
505721fffe3SKacheong Poon 		 * We should never get here
506721fffe3SKacheong Poon 		 */
507721fffe3SKacheong Poon 		*outlenp = 0;
508721fffe3SKacheong Poon 		return (EINVAL);
509721fffe3SKacheong Poon 	}
510721fffe3SKacheong Poon 
511721fffe3SKacheong Poon 	ASSERT((optset_context != SETFN_OPTCOM_CHECKONLY) ||
512721fffe3SKacheong Poon 	    (optset_context == SETFN_OPTCOM_CHECKONLY && inlen != 0));
513721fffe3SKacheong Poon 
514721fffe3SKacheong Poon 	/*
515721fffe3SKacheong Poon 	 * For TCP, we should have no ancillary data sent down
516721fffe3SKacheong Poon 	 * (sendmsg isn't supported for SOCK_STREAM), so thisdg_attrs
517721fffe3SKacheong Poon 	 * has to be zero.
518721fffe3SKacheong Poon 	 */
519721fffe3SKacheong Poon 	ASSERT(thisdg_attrs == NULL);
520721fffe3SKacheong Poon 
521721fffe3SKacheong Poon 	/*
522721fffe3SKacheong Poon 	 * For fixed length options, no sanity check
523721fffe3SKacheong Poon 	 * of passed in length is done. It is assumed *_optcom_req()
524721fffe3SKacheong Poon 	 * routines do the right thing.
525721fffe3SKacheong Poon 	 */
526721fffe3SKacheong Poon 	switch (level) {
527721fffe3SKacheong Poon 	case SOL_SOCKET:
528721fffe3SKacheong Poon 		switch (name) {
529721fffe3SKacheong Poon 		case SO_KEEPALIVE:
530721fffe3SKacheong Poon 			if (checkonly) {
531721fffe3SKacheong Poon 				/* check only case */
532721fffe3SKacheong Poon 				break;
533721fffe3SKacheong Poon 			}
534721fffe3SKacheong Poon 
535721fffe3SKacheong Poon 			if (!onoff) {
536721fffe3SKacheong Poon 				if (connp->conn_keepalive) {
537721fffe3SKacheong Poon 					if (tcp->tcp_ka_tid != 0) {
538721fffe3SKacheong Poon 						(void) TCP_TIMER_CANCEL(tcp,
539721fffe3SKacheong Poon 						    tcp->tcp_ka_tid);
540721fffe3SKacheong Poon 						tcp->tcp_ka_tid = 0;
541721fffe3SKacheong Poon 					}
542721fffe3SKacheong Poon 					connp->conn_keepalive = 0;
543721fffe3SKacheong Poon 				}
544721fffe3SKacheong Poon 				break;
545721fffe3SKacheong Poon 			}
546721fffe3SKacheong Poon 			if (!connp->conn_keepalive) {
547721fffe3SKacheong Poon 				/* Crank up the keepalive timer */
548721fffe3SKacheong Poon 				tcp->tcp_ka_last_intrvl = 0;
549721fffe3SKacheong Poon 				tcp->tcp_ka_tid = TCP_TIMER(tcp,
550*66cd0f60SKacheong Poon 				    tcp_keepalive_timer, tcp->tcp_ka_interval);
551721fffe3SKacheong Poon 				connp->conn_keepalive = 1;
552721fffe3SKacheong Poon 			}
553721fffe3SKacheong Poon 			break;
554721fffe3SKacheong Poon 		case SO_SNDBUF: {
555721fffe3SKacheong Poon 			if (*i1 > tcps->tcps_max_buf) {
556721fffe3SKacheong Poon 				*outlenp = 0;
557721fffe3SKacheong Poon 				return (ENOBUFS);
558721fffe3SKacheong Poon 			}
559721fffe3SKacheong Poon 			if (checkonly)
560721fffe3SKacheong Poon 				break;
561721fffe3SKacheong Poon 
562721fffe3SKacheong Poon 			connp->conn_sndbuf = *i1;
563721fffe3SKacheong Poon 			if (tcps->tcps_snd_lowat_fraction != 0) {
564721fffe3SKacheong Poon 				connp->conn_sndlowat = connp->conn_sndbuf /
565721fffe3SKacheong Poon 				    tcps->tcps_snd_lowat_fraction;
566721fffe3SKacheong Poon 			}
567721fffe3SKacheong Poon 			(void) tcp_maxpsz_set(tcp, B_TRUE);
568721fffe3SKacheong Poon 			/*
569721fffe3SKacheong Poon 			 * If we are flow-controlled, recheck the condition.
570721fffe3SKacheong Poon 			 * There are apps that increase SO_SNDBUF size when
571721fffe3SKacheong Poon 			 * flow-controlled (EWOULDBLOCK), and expect the flow
572721fffe3SKacheong Poon 			 * control condition to be lifted right away.
573721fffe3SKacheong Poon 			 */
574721fffe3SKacheong Poon 			mutex_enter(&tcp->tcp_non_sq_lock);
575721fffe3SKacheong Poon 			if (tcp->tcp_flow_stopped &&
576721fffe3SKacheong Poon 			    TCP_UNSENT_BYTES(tcp) < connp->conn_sndbuf) {
577721fffe3SKacheong Poon 				tcp_clrqfull(tcp);
578721fffe3SKacheong Poon 			}
579721fffe3SKacheong Poon 			mutex_exit(&tcp->tcp_non_sq_lock);
580721fffe3SKacheong Poon 			*outlenp = inlen;
581721fffe3SKacheong Poon 			return (0);
582721fffe3SKacheong Poon 		}
583721fffe3SKacheong Poon 		case SO_RCVBUF:
584721fffe3SKacheong Poon 			if (*i1 > tcps->tcps_max_buf) {
585721fffe3SKacheong Poon 				*outlenp = 0;
586721fffe3SKacheong Poon 				return (ENOBUFS);
587721fffe3SKacheong Poon 			}
588721fffe3SKacheong Poon 			/* Silently ignore zero */
589721fffe3SKacheong Poon 			if (!checkonly && *i1 != 0) {
590721fffe3SKacheong Poon 				*i1 = MSS_ROUNDUP(*i1, tcp->tcp_mss);
591721fffe3SKacheong Poon 				(void) tcp_rwnd_set(tcp, *i1);
592721fffe3SKacheong Poon 			}
593721fffe3SKacheong Poon 			/*
594721fffe3SKacheong Poon 			 * XXX should we return the rwnd here
595721fffe3SKacheong Poon 			 * and tcp_opt_get ?
596721fffe3SKacheong Poon 			 */
597721fffe3SKacheong Poon 			*outlenp = inlen;
598721fffe3SKacheong Poon 			return (0);
599721fffe3SKacheong Poon 		case SO_SND_COPYAVOID:
600721fffe3SKacheong Poon 			if (!checkonly) {
601721fffe3SKacheong Poon 				if (tcp->tcp_loopback ||
602721fffe3SKacheong Poon 				    (tcp->tcp_kssl_ctx != NULL) ||
603721fffe3SKacheong Poon 				    (onoff != 1) || !tcp_zcopy_check(tcp)) {
604721fffe3SKacheong Poon 					*outlenp = 0;
605721fffe3SKacheong Poon 					return (EOPNOTSUPP);
606721fffe3SKacheong Poon 				}
607721fffe3SKacheong Poon 				tcp->tcp_snd_zcopy_aware = 1;
608721fffe3SKacheong Poon 			}
609721fffe3SKacheong Poon 			*outlenp = inlen;
610721fffe3SKacheong Poon 			return (0);
611721fffe3SKacheong Poon 		}
612721fffe3SKacheong Poon 		break;
613721fffe3SKacheong Poon 	case IPPROTO_TCP:
614721fffe3SKacheong Poon 		switch (name) {
615721fffe3SKacheong Poon 		case TCP_NODELAY:
616721fffe3SKacheong Poon 			if (!checkonly)
617721fffe3SKacheong Poon 				tcp->tcp_naglim = *i1 ? 1 : tcp->tcp_mss;
618721fffe3SKacheong Poon 			break;
619721fffe3SKacheong Poon 		case TCP_NOTIFY_THRESHOLD:
620721fffe3SKacheong Poon 			if (!checkonly)
621721fffe3SKacheong Poon 				tcp->tcp_first_timer_threshold = *i1;
622721fffe3SKacheong Poon 			break;
623721fffe3SKacheong Poon 		case TCP_ABORT_THRESHOLD:
624721fffe3SKacheong Poon 			if (!checkonly)
625721fffe3SKacheong Poon 				tcp->tcp_second_timer_threshold = *i1;
626721fffe3SKacheong Poon 			break;
627721fffe3SKacheong Poon 		case TCP_CONN_NOTIFY_THRESHOLD:
628721fffe3SKacheong Poon 			if (!checkonly)
629721fffe3SKacheong Poon 				tcp->tcp_first_ctimer_threshold = *i1;
630721fffe3SKacheong Poon 			break;
631721fffe3SKacheong Poon 		case TCP_CONN_ABORT_THRESHOLD:
632721fffe3SKacheong Poon 			if (!checkonly)
633721fffe3SKacheong Poon 				tcp->tcp_second_ctimer_threshold = *i1;
634721fffe3SKacheong Poon 			break;
635721fffe3SKacheong Poon 		case TCP_RECVDSTADDR:
636721fffe3SKacheong Poon 			if (tcp->tcp_state > TCPS_LISTEN) {
637721fffe3SKacheong Poon 				*outlenp = 0;
638721fffe3SKacheong Poon 				return (EOPNOTSUPP);
639721fffe3SKacheong Poon 			}
640721fffe3SKacheong Poon 			/* Setting done in conn_opt_set */
641721fffe3SKacheong Poon 			break;
642721fffe3SKacheong Poon 		case TCP_INIT_CWND: {
643721fffe3SKacheong Poon 			uint32_t init_cwnd = *((uint32_t *)invalp);
644721fffe3SKacheong Poon 
645721fffe3SKacheong Poon 			if (checkonly)
646721fffe3SKacheong Poon 				break;
647721fffe3SKacheong Poon 
648721fffe3SKacheong Poon 			/*
649721fffe3SKacheong Poon 			 * Only allow socket with network configuration
650721fffe3SKacheong Poon 			 * privilege to set the initial cwnd to be larger
651721fffe3SKacheong Poon 			 * than allowed by RFC 3390.
652721fffe3SKacheong Poon 			 */
653721fffe3SKacheong Poon 			if (init_cwnd <= MIN(4, MAX(2, 4380 / tcp->tcp_mss))) {
654721fffe3SKacheong Poon 				tcp->tcp_init_cwnd = init_cwnd;
655721fffe3SKacheong Poon 				break;
656721fffe3SKacheong Poon 			}
657721fffe3SKacheong Poon 			if ((reterr = secpolicy_ip_config(cr, B_TRUE)) != 0) {
658721fffe3SKacheong Poon 				*outlenp = 0;
659721fffe3SKacheong Poon 				return (reterr);
660721fffe3SKacheong Poon 			}
661721fffe3SKacheong Poon 			if (init_cwnd > tcp_max_init_cwnd) {
662721fffe3SKacheong Poon 				*outlenp = 0;
663721fffe3SKacheong Poon 				return (EINVAL);
664721fffe3SKacheong Poon 			}
665721fffe3SKacheong Poon 			tcp->tcp_init_cwnd = init_cwnd;
666721fffe3SKacheong Poon 			break;
667721fffe3SKacheong Poon 		}
668721fffe3SKacheong Poon 		case TCP_KEEPALIVE_THRESHOLD:
669721fffe3SKacheong Poon 			if (checkonly)
670721fffe3SKacheong Poon 				break;
671721fffe3SKacheong Poon 
672721fffe3SKacheong Poon 			if (*i1 < tcps->tcps_keepalive_interval_low ||
673721fffe3SKacheong Poon 			    *i1 > tcps->tcps_keepalive_interval_high) {
674721fffe3SKacheong Poon 				*outlenp = 0;
675721fffe3SKacheong Poon 				return (EINVAL);
676721fffe3SKacheong Poon 			}
677721fffe3SKacheong Poon 			if (*i1 != tcp->tcp_ka_interval) {
678721fffe3SKacheong Poon 				tcp->tcp_ka_interval = *i1;
679721fffe3SKacheong Poon 				/*
680721fffe3SKacheong Poon 				 * Check if we need to restart the
681721fffe3SKacheong Poon 				 * keepalive timer.
682721fffe3SKacheong Poon 				 */
683721fffe3SKacheong Poon 				if (tcp->tcp_ka_tid != 0) {
684721fffe3SKacheong Poon 					ASSERT(connp->conn_keepalive);
685721fffe3SKacheong Poon 					(void) TCP_TIMER_CANCEL(tcp,
686721fffe3SKacheong Poon 					    tcp->tcp_ka_tid);
687721fffe3SKacheong Poon 					tcp->tcp_ka_last_intrvl = 0;
688721fffe3SKacheong Poon 					tcp->tcp_ka_tid = TCP_TIMER(tcp,
689721fffe3SKacheong Poon 					    tcp_keepalive_timer,
690*66cd0f60SKacheong Poon 					    tcp->tcp_ka_interval);
691721fffe3SKacheong Poon 				}
692721fffe3SKacheong Poon 			}
693721fffe3SKacheong Poon 			break;
694721fffe3SKacheong Poon 		case TCP_KEEPALIVE_ABORT_THRESHOLD:
695721fffe3SKacheong Poon 			if (!checkonly) {
696721fffe3SKacheong Poon 				if (*i1 <
697721fffe3SKacheong Poon 				    tcps->tcps_keepalive_abort_interval_low ||
698721fffe3SKacheong Poon 				    *i1 >
699721fffe3SKacheong Poon 				    tcps->tcps_keepalive_abort_interval_high) {
700721fffe3SKacheong Poon 					*outlenp = 0;
701721fffe3SKacheong Poon 					return (EINVAL);
702721fffe3SKacheong Poon 				}
703721fffe3SKacheong Poon 				tcp->tcp_ka_abort_thres = *i1;
704721fffe3SKacheong Poon 			}
705721fffe3SKacheong Poon 			break;
706721fffe3SKacheong Poon 		case TCP_CORK:
707721fffe3SKacheong Poon 			if (!checkonly) {
708721fffe3SKacheong Poon 				/*
709721fffe3SKacheong Poon 				 * if tcp->tcp_cork was set and is now
710721fffe3SKacheong Poon 				 * being unset, we have to make sure that
711721fffe3SKacheong Poon 				 * the remaining data gets sent out. Also
712721fffe3SKacheong Poon 				 * unset tcp->tcp_cork so that tcp_wput_data()
713721fffe3SKacheong Poon 				 * can send data even if it is less than mss
714721fffe3SKacheong Poon 				 */
715721fffe3SKacheong Poon 				if (tcp->tcp_cork && onoff == 0 &&
716721fffe3SKacheong Poon 				    tcp->tcp_unsent > 0) {
717721fffe3SKacheong Poon 					tcp->tcp_cork = B_FALSE;
718721fffe3SKacheong Poon 					tcp_wput_data(tcp, NULL, B_FALSE);
719721fffe3SKacheong Poon 				}
720721fffe3SKacheong Poon 				tcp->tcp_cork = onoff;
721721fffe3SKacheong Poon 			}
722721fffe3SKacheong Poon 			break;
723721fffe3SKacheong Poon 		default:
724721fffe3SKacheong Poon 			break;
725721fffe3SKacheong Poon 		}
726721fffe3SKacheong Poon 		break;
727721fffe3SKacheong Poon 	case IPPROTO_IP:
728721fffe3SKacheong Poon 		if (connp->conn_family != AF_INET) {
729721fffe3SKacheong Poon 			*outlenp = 0;
730721fffe3SKacheong Poon 			return (EINVAL);
731721fffe3SKacheong Poon 		}
732721fffe3SKacheong Poon 		switch (name) {
733721fffe3SKacheong Poon 		case IP_SEC_OPT:
734721fffe3SKacheong Poon 			/*
735721fffe3SKacheong Poon 			 * We should not allow policy setting after
736721fffe3SKacheong Poon 			 * we start listening for connections.
737721fffe3SKacheong Poon 			 */
738721fffe3SKacheong Poon 			if (tcp->tcp_state == TCPS_LISTEN) {
739721fffe3SKacheong Poon 				return (EINVAL);
740721fffe3SKacheong Poon 			}
741721fffe3SKacheong Poon 			break;
742721fffe3SKacheong Poon 		}
743721fffe3SKacheong Poon 		break;
744721fffe3SKacheong Poon 	case IPPROTO_IPV6:
745721fffe3SKacheong Poon 		/*
746721fffe3SKacheong Poon 		 * IPPROTO_IPV6 options are only supported for sockets
747721fffe3SKacheong Poon 		 * that are using IPv6 on the wire.
748721fffe3SKacheong Poon 		 */
749721fffe3SKacheong Poon 		if (connp->conn_ipversion != IPV6_VERSION) {
750721fffe3SKacheong Poon 			*outlenp = 0;
751721fffe3SKacheong Poon 			return (EINVAL);
752721fffe3SKacheong Poon 		}
753721fffe3SKacheong Poon 
754721fffe3SKacheong Poon 		switch (name) {
755721fffe3SKacheong Poon 		case IPV6_RECVPKTINFO:
756721fffe3SKacheong Poon 			if (!checkonly) {
757721fffe3SKacheong Poon 				/* Force it to be sent up with the next msg */
758721fffe3SKacheong Poon 				tcp->tcp_recvifindex = 0;
759721fffe3SKacheong Poon 			}
760721fffe3SKacheong Poon 			break;
761721fffe3SKacheong Poon 		case IPV6_RECVTCLASS:
762721fffe3SKacheong Poon 			if (!checkonly) {
763721fffe3SKacheong Poon 				/* Force it to be sent up with the next msg */
764721fffe3SKacheong Poon 				tcp->tcp_recvtclass = 0xffffffffU;
765721fffe3SKacheong Poon 			}
766721fffe3SKacheong Poon 			break;
767721fffe3SKacheong Poon 		case IPV6_RECVHOPLIMIT:
768721fffe3SKacheong Poon 			if (!checkonly) {
769721fffe3SKacheong Poon 				/* Force it to be sent up with the next msg */
770721fffe3SKacheong Poon 				tcp->tcp_recvhops = 0xffffffffU;
771721fffe3SKacheong Poon 			}
772721fffe3SKacheong Poon 			break;
773721fffe3SKacheong Poon 		case IPV6_PKTINFO:
774721fffe3SKacheong Poon 			/* This is an extra check for TCP */
775721fffe3SKacheong Poon 			if (inlen == sizeof (struct in6_pktinfo)) {
776721fffe3SKacheong Poon 				struct in6_pktinfo *pkti;
777721fffe3SKacheong Poon 
778721fffe3SKacheong Poon 				pkti = (struct in6_pktinfo *)invalp;
779721fffe3SKacheong Poon 				/*
780721fffe3SKacheong Poon 				 * RFC 3542 states that ipi6_addr must be
781721fffe3SKacheong Poon 				 * the unspecified address when setting the
782721fffe3SKacheong Poon 				 * IPV6_PKTINFO sticky socket option on a
783721fffe3SKacheong Poon 				 * TCP socket.
784721fffe3SKacheong Poon 				 */
785721fffe3SKacheong Poon 				if (!IN6_IS_ADDR_UNSPECIFIED(&pkti->ipi6_addr))
786721fffe3SKacheong Poon 					return (EINVAL);
787721fffe3SKacheong Poon 			}
788721fffe3SKacheong Poon 			break;
789721fffe3SKacheong Poon 		case IPV6_SEC_OPT:
790721fffe3SKacheong Poon 			/*
791721fffe3SKacheong Poon 			 * We should not allow policy setting after
792721fffe3SKacheong Poon 			 * we start listening for connections.
793721fffe3SKacheong Poon 			 */
794721fffe3SKacheong Poon 			if (tcp->tcp_state == TCPS_LISTEN) {
795721fffe3SKacheong Poon 				return (EINVAL);
796721fffe3SKacheong Poon 			}
797721fffe3SKacheong Poon 			break;
798721fffe3SKacheong Poon 		}
799721fffe3SKacheong Poon 		break;
800721fffe3SKacheong Poon 	}
801721fffe3SKacheong Poon 	reterr = conn_opt_set(&coas, level, name, inlen, invalp,
802721fffe3SKacheong Poon 	    checkonly, cr);
803721fffe3SKacheong Poon 	if (reterr != 0) {
804721fffe3SKacheong Poon 		*outlenp = 0;
805721fffe3SKacheong Poon 		return (reterr);
806721fffe3SKacheong Poon 	}
807721fffe3SKacheong Poon 
808721fffe3SKacheong Poon 	/*
809721fffe3SKacheong Poon 	 * Common case of OK return with outval same as inval
810721fffe3SKacheong Poon 	 */
811721fffe3SKacheong Poon 	if (invalp != outvalp) {
812721fffe3SKacheong Poon 		/* don't trust bcopy for identical src/dst */
813721fffe3SKacheong Poon 		(void) bcopy(invalp, outvalp, inlen);
814721fffe3SKacheong Poon 	}
815721fffe3SKacheong Poon 	*outlenp = inlen;
816721fffe3SKacheong Poon 
817721fffe3SKacheong Poon 	if (coas.coa_changed & COA_HEADER_CHANGED) {
818721fffe3SKacheong Poon 		/* If we are connected we rebuilt the headers */
819721fffe3SKacheong Poon 		if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6) &&
820721fffe3SKacheong Poon 		    !IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_faddr_v6)) {
821721fffe3SKacheong Poon 			reterr = tcp_build_hdrs(tcp);
822721fffe3SKacheong Poon 			if (reterr != 0)
823721fffe3SKacheong Poon 				return (reterr);
824721fffe3SKacheong Poon 		}
825721fffe3SKacheong Poon 	}
826721fffe3SKacheong Poon 	if (coas.coa_changed & COA_ROUTE_CHANGED) {
827721fffe3SKacheong Poon 		in6_addr_t nexthop;
828721fffe3SKacheong Poon 
829721fffe3SKacheong Poon 		/*
830721fffe3SKacheong Poon 		 * If we are connected we re-cache the information.
831721fffe3SKacheong Poon 		 * We ignore errors to preserve BSD behavior.
832721fffe3SKacheong Poon 		 * Note that we don't redo IPsec policy lookup here
833721fffe3SKacheong Poon 		 * since the final destination (or source) didn't change.
834721fffe3SKacheong Poon 		 */
835721fffe3SKacheong Poon 		ip_attr_nexthop(&connp->conn_xmit_ipp, connp->conn_ixa,
836721fffe3SKacheong Poon 		    &connp->conn_faddr_v6, &nexthop);
837721fffe3SKacheong Poon 
838721fffe3SKacheong Poon 		if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6) &&
839721fffe3SKacheong Poon 		    !IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_faddr_v6)) {
840721fffe3SKacheong Poon 			(void) ip_attr_connect(connp, connp->conn_ixa,
841721fffe3SKacheong Poon 			    &connp->conn_laddr_v6, &connp->conn_faddr_v6,
842721fffe3SKacheong Poon 			    &nexthop, connp->conn_fport, NULL, NULL,
843721fffe3SKacheong Poon 			    IPDF_VERIFY_DST);
844721fffe3SKacheong Poon 		}
845721fffe3SKacheong Poon 	}
846721fffe3SKacheong Poon 	if ((coas.coa_changed & COA_SNDBUF_CHANGED) && !IPCL_IS_NONSTR(connp)) {
847721fffe3SKacheong Poon 		connp->conn_wq->q_hiwat = connp->conn_sndbuf;
848721fffe3SKacheong Poon 	}
849721fffe3SKacheong Poon 	if (coas.coa_changed & COA_WROFF_CHANGED) {
850721fffe3SKacheong Poon 		connp->conn_wroff = connp->conn_ht_iphc_allocated +
851721fffe3SKacheong Poon 		    tcps->tcps_wroff_xtra;
852721fffe3SKacheong Poon 		(void) proto_set_tx_wroff(connp->conn_rq, connp,
853721fffe3SKacheong Poon 		    connp->conn_wroff);
854721fffe3SKacheong Poon 	}
855721fffe3SKacheong Poon 	if (coas.coa_changed & COA_OOBINLINE_CHANGED) {
856721fffe3SKacheong Poon 		if (IPCL_IS_NONSTR(connp))
857721fffe3SKacheong Poon 			proto_set_rx_oob_opt(connp, onoff);
858721fffe3SKacheong Poon 	}
859721fffe3SKacheong Poon 	return (0);
860721fffe3SKacheong Poon }
861