1721fffe3SKacheong Poon /*
2721fffe3SKacheong Poon * CDDL HEADER START
3721fffe3SKacheong Poon *
4721fffe3SKacheong Poon * The contents of this file are subject to the terms of the
5721fffe3SKacheong Poon * Common Development and Distribution License (the "License").
6721fffe3SKacheong Poon * You may not use this file except in compliance with the License.
7721fffe3SKacheong Poon *
8721fffe3SKacheong Poon * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9721fffe3SKacheong Poon * or http://www.opensolaris.org/os/licensing.
10721fffe3SKacheong Poon * See the License for the specific language governing permissions
11721fffe3SKacheong Poon * and limitations under the License.
12721fffe3SKacheong Poon *
13721fffe3SKacheong Poon * When distributing Covered Code, include this CDDL HEADER in each
14721fffe3SKacheong Poon * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15721fffe3SKacheong Poon * If applicable, add the following below this CDDL HEADER, with the
16721fffe3SKacheong Poon * fields enclosed by brackets "[]" replaced with your own identifying
17721fffe3SKacheong Poon * information: Portions Copyright [yyyy] [name of copyright owner]
18721fffe3SKacheong Poon *
19721fffe3SKacheong Poon * CDDL HEADER END
20721fffe3SKacheong Poon */
21721fffe3SKacheong Poon
22721fffe3SKacheong Poon /*
2366cd0f60SKacheong Poon * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
242404c9e6SPatrick Mooney * Copyright 2016 Joyent, Inc.
25143b26e0SSteve Gonczi * Copyright (c) 2016 by Delphix. All rights reserved.
26721fffe3SKacheong Poon */
27721fffe3SKacheong Poon
28721fffe3SKacheong Poon /*
29721fffe3SKacheong Poon * This file contains functions related to TCP time wait processing. Also
30721fffe3SKacheong Poon * refer to the time wait handling comments in tcp_impl.h.
31721fffe3SKacheong Poon */
32721fffe3SKacheong Poon
33721fffe3SKacheong Poon #include <sys/types.h>
34721fffe3SKacheong Poon #include <sys/strsun.h>
35721fffe3SKacheong Poon #include <sys/squeue_impl.h>
36721fffe3SKacheong Poon #include <sys/squeue.h>
37721fffe3SKacheong Poon #include <sys/callo.h>
38721fffe3SKacheong Poon
39721fffe3SKacheong Poon #include <inet/common.h>
40721fffe3SKacheong Poon #include <inet/ip.h>
41721fffe3SKacheong Poon #include <inet/tcp.h>
42721fffe3SKacheong Poon #include <inet/tcp_impl.h>
43721fffe3SKacheong Poon #include <inet/tcp_cluster.h>
44721fffe3SKacheong Poon
452404c9e6SPatrick Mooney static void tcp_time_wait_purge(tcp_t *, tcp_squeue_priv_t *);
462404c9e6SPatrick Mooney
472404c9e6SPatrick Mooney #define TW_BUCKET(t) \
482404c9e6SPatrick Mooney (((t) / MSEC_TO_TICK(TCP_TIME_WAIT_DELAY)) % TCP_TIME_WAIT_BUCKETS)
492404c9e6SPatrick Mooney
502404c9e6SPatrick Mooney #define TW_BUCKET_NEXT(b) (((b) + 1) % TCP_TIME_WAIT_BUCKETS)
51721fffe3SKacheong Poon
52721fffe3SKacheong Poon
53721fffe3SKacheong Poon /*
54721fffe3SKacheong Poon * Remove a connection from the list of detached TIME_WAIT connections.
55721fffe3SKacheong Poon * It returns B_FALSE if it can't remove the connection from the list
56721fffe3SKacheong Poon * as the connection has already been removed from the list due to an
57721fffe3SKacheong Poon * earlier call to tcp_time_wait_remove(); otherwise it returns B_TRUE.
58721fffe3SKacheong Poon */
59721fffe3SKacheong Poon boolean_t
tcp_time_wait_remove(tcp_t * tcp,tcp_squeue_priv_t * tsp)602404c9e6SPatrick Mooney tcp_time_wait_remove(tcp_t *tcp, tcp_squeue_priv_t *tsp)
61721fffe3SKacheong Poon {
62721fffe3SKacheong Poon boolean_t locked = B_FALSE;
63721fffe3SKacheong Poon
642404c9e6SPatrick Mooney if (tsp == NULL) {
652404c9e6SPatrick Mooney tsp = *((tcp_squeue_priv_t **)
66721fffe3SKacheong Poon squeue_getprivate(tcp->tcp_connp->conn_sqp, SQPRIVATE_TCP));
672404c9e6SPatrick Mooney mutex_enter(&tsp->tcp_time_wait_lock);
68721fffe3SKacheong Poon locked = B_TRUE;
69721fffe3SKacheong Poon } else {
702404c9e6SPatrick Mooney ASSERT(MUTEX_HELD(&tsp->tcp_time_wait_lock));
71721fffe3SKacheong Poon }
72721fffe3SKacheong Poon
73721fffe3SKacheong Poon /* 0 means that the tcp_t has not been added to the time wait list. */
74721fffe3SKacheong Poon if (tcp->tcp_time_wait_expire == 0) {
75721fffe3SKacheong Poon ASSERT(tcp->tcp_time_wait_next == NULL);
76721fffe3SKacheong Poon ASSERT(tcp->tcp_time_wait_prev == NULL);
77721fffe3SKacheong Poon if (locked)
782404c9e6SPatrick Mooney mutex_exit(&tsp->tcp_time_wait_lock);
79721fffe3SKacheong Poon return (B_FALSE);
80721fffe3SKacheong Poon }
81721fffe3SKacheong Poon ASSERT(TCP_IS_DETACHED(tcp));
82721fffe3SKacheong Poon ASSERT(tcp->tcp_state == TCPS_TIME_WAIT);
832404c9e6SPatrick Mooney ASSERT(tsp->tcp_time_wait_cnt > 0);
84721fffe3SKacheong Poon
852404c9e6SPatrick Mooney if (tcp->tcp_time_wait_next != NULL) {
86721fffe3SKacheong Poon tcp->tcp_time_wait_next->tcp_time_wait_prev =
87721fffe3SKacheong Poon tcp->tcp_time_wait_prev;
88721fffe3SKacheong Poon }
892404c9e6SPatrick Mooney if (tcp->tcp_time_wait_prev != NULL) {
902404c9e6SPatrick Mooney tcp->tcp_time_wait_prev->tcp_time_wait_next =
912404c9e6SPatrick Mooney tcp->tcp_time_wait_next;
922404c9e6SPatrick Mooney } else {
932404c9e6SPatrick Mooney unsigned int bucket;
942404c9e6SPatrick Mooney
952404c9e6SPatrick Mooney bucket = TW_BUCKET(tcp->tcp_time_wait_expire);
962404c9e6SPatrick Mooney ASSERT(tsp->tcp_time_wait_bucket[bucket] == tcp);
972404c9e6SPatrick Mooney tsp->tcp_time_wait_bucket[bucket] = tcp->tcp_time_wait_next;
982404c9e6SPatrick Mooney }
99721fffe3SKacheong Poon tcp->tcp_time_wait_next = NULL;
100721fffe3SKacheong Poon tcp->tcp_time_wait_prev = NULL;
101721fffe3SKacheong Poon tcp->tcp_time_wait_expire = 0;
1022404c9e6SPatrick Mooney tsp->tcp_time_wait_cnt--;
103721fffe3SKacheong Poon
104721fffe3SKacheong Poon if (locked)
1052404c9e6SPatrick Mooney mutex_exit(&tsp->tcp_time_wait_lock);
106721fffe3SKacheong Poon return (B_TRUE);
107721fffe3SKacheong Poon }
108721fffe3SKacheong Poon
10981b60dacSJerry Jelinek /* Constants used for fast checking of a localhost address */
11081b60dacSJerry Jelinek #if defined(_BIG_ENDIAN)
11181b60dacSJerry Jelinek #define IPv4_LOCALHOST 0x7f000000U
11281b60dacSJerry Jelinek #define IPv4_LH_MASK 0xffffff00U
11381b60dacSJerry Jelinek #else
11481b60dacSJerry Jelinek #define IPv4_LOCALHOST 0x0000007fU
11581b60dacSJerry Jelinek #define IPv4_LH_MASK 0x00ffffffU
11681b60dacSJerry Jelinek #endif
11781b60dacSJerry Jelinek
11881b60dacSJerry Jelinek #define IS_LOCAL_HOST(x) ( \
11981b60dacSJerry Jelinek ((x)->tcp_connp->conn_ipversion == IPV4_VERSION && \
12081b60dacSJerry Jelinek ((x)->tcp_connp->conn_laddr_v4 & IPv4_LH_MASK) == IPv4_LOCALHOST) || \
12181b60dacSJerry Jelinek ((x)->tcp_connp->conn_ipversion == IPV6_VERSION && \
12281b60dacSJerry Jelinek IN6_IS_ADDR_LOOPBACK(&(x)->tcp_connp->conn_laddr_v6)))
12381b60dacSJerry Jelinek
1242404c9e6SPatrick Mooney
125721fffe3SKacheong Poon /*
126721fffe3SKacheong Poon * Add a connection to the list of detached TIME_WAIT connections
127721fffe3SKacheong Poon * and set its time to expire.
128721fffe3SKacheong Poon */
129721fffe3SKacheong Poon void
tcp_time_wait_append(tcp_t * tcp)130721fffe3SKacheong Poon tcp_time_wait_append(tcp_t *tcp)
131721fffe3SKacheong Poon {
132721fffe3SKacheong Poon tcp_stack_t *tcps = tcp->tcp_tcps;
13366cd0f60SKacheong Poon squeue_t *sqp = tcp->tcp_connp->conn_sqp;
1342404c9e6SPatrick Mooney tcp_squeue_priv_t *tsp =
13566cd0f60SKacheong Poon *((tcp_squeue_priv_t **)squeue_getprivate(sqp, SQPRIVATE_TCP));
1362404c9e6SPatrick Mooney int64_t now, schedule;
1372404c9e6SPatrick Mooney unsigned int bucket;
138721fffe3SKacheong Poon
139721fffe3SKacheong Poon tcp_timers_stop(tcp);
140721fffe3SKacheong Poon
141721fffe3SKacheong Poon /* Freed above */
142721fffe3SKacheong Poon ASSERT(tcp->tcp_timer_tid == 0);
143721fffe3SKacheong Poon ASSERT(tcp->tcp_ack_tid == 0);
144721fffe3SKacheong Poon
145721fffe3SKacheong Poon /* must have happened at the time of detaching the tcp */
1462404c9e6SPatrick Mooney ASSERT(TCP_IS_DETACHED(tcp));
1472404c9e6SPatrick Mooney ASSERT(tcp->tcp_state == TCPS_TIME_WAIT);
148721fffe3SKacheong Poon ASSERT(tcp->tcp_ptpahn == NULL);
149721fffe3SKacheong Poon ASSERT(tcp->tcp_flow_stopped == 0);
150721fffe3SKacheong Poon ASSERT(tcp->tcp_time_wait_next == NULL);
151721fffe3SKacheong Poon ASSERT(tcp->tcp_time_wait_prev == NULL);
15266cd0f60SKacheong Poon ASSERT(tcp->tcp_time_wait_expire == 0);
153721fffe3SKacheong Poon ASSERT(tcp->tcp_listener == NULL);
154721fffe3SKacheong Poon
1552404c9e6SPatrick Mooney TCP_DBGSTAT(tcps, tcp_time_wait);
1562404c9e6SPatrick Mooney mutex_enter(&tsp->tcp_time_wait_lock);
1572404c9e6SPatrick Mooney
1582404c9e6SPatrick Mooney /*
1592404c9e6SPatrick Mooney * Immediately expire loopback connections. Since there is no worry
1602404c9e6SPatrick Mooney * about packets on the local host showing up after a long network
1612404c9e6SPatrick Mooney * delay, this is safe and allows much higher rates of connection churn
1622404c9e6SPatrick Mooney * for applications operating locally.
1632404c9e6SPatrick Mooney *
1642404c9e6SPatrick Mooney * This typically bypasses the tcp_free_list fast path due to squeue
1652404c9e6SPatrick Mooney * re-entry for the loopback close operation.
1662404c9e6SPatrick Mooney */
1672404c9e6SPatrick Mooney if (tcp->tcp_loopback) {
1682404c9e6SPatrick Mooney tcp_time_wait_purge(tcp, tsp);
1692404c9e6SPatrick Mooney mutex_exit(&tsp->tcp_time_wait_lock);
1702404c9e6SPatrick Mooney return;
17181b60dacSJerry Jelinek }
172721fffe3SKacheong Poon
1732404c9e6SPatrick Mooney /*
1742404c9e6SPatrick Mooney * In order to reap TIME_WAITs reliably, we should use a source of time
1752404c9e6SPatrick Mooney * that is not adjustable by the user. While it would be more accurate
1762404c9e6SPatrick Mooney * to grab this timestamp before (potentially) sleeping on the
1772404c9e6SPatrick Mooney * tcp_time_wait_lock, doing so complicates bucket addressing later.
1782404c9e6SPatrick Mooney */
1792404c9e6SPatrick Mooney now = ddi_get_lbolt64();
1802404c9e6SPatrick Mooney
1812404c9e6SPatrick Mooney /*
1822404c9e6SPatrick Mooney * Each squeue uses an arbitrary time offset when scheduling
1832404c9e6SPatrick Mooney * expiration timers. This prevents the bucketing from forcing
1842404c9e6SPatrick Mooney * tcp_time_wait_collector to run in locksetup across squeues.
1852404c9e6SPatrick Mooney *
1862404c9e6SPatrick Mooney * This offset is (re)initialized when a new TIME_WAIT connection is
1872404c9e6SPatrick Mooney * added to an squeue which has no connections waiting to expire.
1882404c9e6SPatrick Mooney */
1892404c9e6SPatrick Mooney if (tsp->tcp_time_wait_tid == 0) {
1902404c9e6SPatrick Mooney ASSERT(tsp->tcp_time_wait_cnt == 0);
1912404c9e6SPatrick Mooney tsp->tcp_time_wait_offset =
1922404c9e6SPatrick Mooney now % MSEC_TO_TICK(TCP_TIME_WAIT_DELAY);
1932404c9e6SPatrick Mooney }
1942404c9e6SPatrick Mooney now -= tsp->tcp_time_wait_offset;
195721fffe3SKacheong Poon
1962404c9e6SPatrick Mooney /*
1972404c9e6SPatrick Mooney * Use the netstack-defined timeout, rounded up to the minimum
1982404c9e6SPatrick Mooney * time_wait_collector interval.
1992404c9e6SPatrick Mooney */
2002404c9e6SPatrick Mooney schedule = now + MSEC_TO_TICK(tcps->tcps_time_wait_interval);
2012404c9e6SPatrick Mooney tcp->tcp_time_wait_expire = schedule;
2022404c9e6SPatrick Mooney
2032404c9e6SPatrick Mooney /*
2042404c9e6SPatrick Mooney * Append the connection into the appropriate bucket.
2052404c9e6SPatrick Mooney */
2062404c9e6SPatrick Mooney bucket = TW_BUCKET(tcp->tcp_time_wait_expire);
2072404c9e6SPatrick Mooney tcp->tcp_time_wait_next = tsp->tcp_time_wait_bucket[bucket];
2082404c9e6SPatrick Mooney tsp->tcp_time_wait_bucket[bucket] = tcp;
2092404c9e6SPatrick Mooney if (tcp->tcp_time_wait_next != NULL) {
2102404c9e6SPatrick Mooney ASSERT(tcp->tcp_time_wait_next->tcp_time_wait_prev == NULL);
2112404c9e6SPatrick Mooney tcp->tcp_time_wait_next->tcp_time_wait_prev = tcp;
2122404c9e6SPatrick Mooney }
2132404c9e6SPatrick Mooney tsp->tcp_time_wait_cnt++;
2142404c9e6SPatrick Mooney
2152404c9e6SPatrick Mooney /*
2162404c9e6SPatrick Mooney * Round delay up to the nearest bucket boundary.
2172404c9e6SPatrick Mooney */
2182404c9e6SPatrick Mooney schedule += MSEC_TO_TICK(TCP_TIME_WAIT_DELAY);
2192404c9e6SPatrick Mooney schedule -= schedule % MSEC_TO_TICK(TCP_TIME_WAIT_DELAY);
2202404c9e6SPatrick Mooney
2212404c9e6SPatrick Mooney /*
2222404c9e6SPatrick Mooney * The newly inserted entry may require a tighter schedule for the
2232404c9e6SPatrick Mooney * expiration timer.
2242404c9e6SPatrick Mooney */
2252404c9e6SPatrick Mooney if (schedule < tsp->tcp_time_wait_schedule) {
2262404c9e6SPatrick Mooney callout_id_t old_tid = tsp->tcp_time_wait_tid;
2272404c9e6SPatrick Mooney
2282404c9e6SPatrick Mooney tsp->tcp_time_wait_schedule = schedule;
2292404c9e6SPatrick Mooney tsp->tcp_time_wait_tid =
2302404c9e6SPatrick Mooney timeout_generic(CALLOUT_NORMAL,
2312404c9e6SPatrick Mooney tcp_time_wait_collector, sqp,
2322404c9e6SPatrick Mooney TICK_TO_NSEC(schedule - now),
2332404c9e6SPatrick Mooney CALLOUT_TCP_RESOLUTION, CALLOUT_FLAG_ROUNDUP);
23466cd0f60SKacheong Poon
23566cd0f60SKacheong Poon /*
2362404c9e6SPatrick Mooney * It is possible for the timer to fire before the untimeout
2372404c9e6SPatrick Mooney * action is able to complete. In that case, the exclusion
2382404c9e6SPatrick Mooney * offered by the tcp_time_wait_collector_active flag will
2392404c9e6SPatrick Mooney * prevent multiple collector threads from processing records
2402404c9e6SPatrick Mooney * simultaneously from the same squeue.
2410870f17bSKacheong Poon */
2422404c9e6SPatrick Mooney mutex_exit(&tsp->tcp_time_wait_lock);
2432404c9e6SPatrick Mooney (void) untimeout_default(old_tid, 0);
2442404c9e6SPatrick Mooney return;
2452404c9e6SPatrick Mooney }
2462404c9e6SPatrick Mooney
2472404c9e6SPatrick Mooney /*
2482404c9e6SPatrick Mooney * Start a fresh timer if none exists.
2492404c9e6SPatrick Mooney */
2502404c9e6SPatrick Mooney if (tsp->tcp_time_wait_schedule == 0) {
2512404c9e6SPatrick Mooney ASSERT(tsp->tcp_time_wait_tid == 0);
2522404c9e6SPatrick Mooney
2532404c9e6SPatrick Mooney tsp->tcp_time_wait_schedule = schedule;
2542404c9e6SPatrick Mooney tsp->tcp_time_wait_tid =
2552404c9e6SPatrick Mooney timeout_generic(CALLOUT_NORMAL,
2562404c9e6SPatrick Mooney tcp_time_wait_collector, sqp,
2572404c9e6SPatrick Mooney TICK_TO_NSEC(schedule - now),
2582404c9e6SPatrick Mooney CALLOUT_TCP_RESOLUTION, CALLOUT_FLAG_ROUNDUP);
259721fffe3SKacheong Poon }
2602404c9e6SPatrick Mooney mutex_exit(&tsp->tcp_time_wait_lock);
261721fffe3SKacheong Poon }
262721fffe3SKacheong Poon
263721fffe3SKacheong Poon /*
264721fffe3SKacheong Poon * Wrapper to call tcp_close_detached() via squeue to clean up TIME-WAIT
265721fffe3SKacheong Poon * tcp_t. Used in tcp_time_wait_collector().
266721fffe3SKacheong Poon */
267721fffe3SKacheong Poon /* ARGSUSED */
268721fffe3SKacheong Poon static void
tcp_timewait_close(void * arg,mblk_t * mp,void * arg2,ip_recv_attr_t * dummy)269721fffe3SKacheong Poon tcp_timewait_close(void *arg, mblk_t *mp, void *arg2, ip_recv_attr_t *dummy)
270721fffe3SKacheong Poon {
271721fffe3SKacheong Poon conn_t *connp = (conn_t *)arg;
272721fffe3SKacheong Poon tcp_t *tcp = connp->conn_tcp;
273721fffe3SKacheong Poon
274721fffe3SKacheong Poon ASSERT(tcp != NULL);
275721fffe3SKacheong Poon if (tcp->tcp_state == TCPS_CLOSED) {
276721fffe3SKacheong Poon return;
277721fffe3SKacheong Poon }
278721fffe3SKacheong Poon
279721fffe3SKacheong Poon ASSERT((connp->conn_family == AF_INET &&
280721fffe3SKacheong Poon connp->conn_ipversion == IPV4_VERSION) ||
281721fffe3SKacheong Poon (connp->conn_family == AF_INET6 &&
282721fffe3SKacheong Poon (connp->conn_ipversion == IPV4_VERSION ||
283721fffe3SKacheong Poon connp->conn_ipversion == IPV6_VERSION)));
284721fffe3SKacheong Poon ASSERT(!tcp->tcp_listener);
285721fffe3SKacheong Poon
286721fffe3SKacheong Poon ASSERT(TCP_IS_DETACHED(tcp));
287721fffe3SKacheong Poon
288721fffe3SKacheong Poon /*
289721fffe3SKacheong Poon * Because they have no upstream client to rebind or tcp_close()
290721fffe3SKacheong Poon * them later, we axe the connection here and now.
291721fffe3SKacheong Poon */
292721fffe3SKacheong Poon tcp_close_detached(tcp);
293721fffe3SKacheong Poon }
294721fffe3SKacheong Poon
2952404c9e6SPatrick Mooney
2962404c9e6SPatrick Mooney static void
tcp_time_wait_purge(tcp_t * tcp,tcp_squeue_priv_t * tsp)2972404c9e6SPatrick Mooney tcp_time_wait_purge(tcp_t *tcp, tcp_squeue_priv_t *tsp)
2982404c9e6SPatrick Mooney {
2992404c9e6SPatrick Mooney mblk_t *mp;
3002404c9e6SPatrick Mooney conn_t *connp = tcp->tcp_connp;
3012404c9e6SPatrick Mooney kmutex_t *lock;
3022404c9e6SPatrick Mooney
3032404c9e6SPatrick Mooney ASSERT(MUTEX_HELD(&tsp->tcp_time_wait_lock));
3042404c9e6SPatrick Mooney ASSERT(connp->conn_fanout != NULL);
3052404c9e6SPatrick Mooney
3062404c9e6SPatrick Mooney lock = &connp->conn_fanout->connf_lock;
3072404c9e6SPatrick Mooney
3082404c9e6SPatrick Mooney /*
3092404c9e6SPatrick Mooney * This is essentially a TIME_WAIT reclaim fast path optimization for
3102404c9e6SPatrick Mooney * performance where the connection is checked under the fanout lock
3112404c9e6SPatrick Mooney * (so that no one else can get access to the conn_t) that the refcnt
3122404c9e6SPatrick Mooney * is 2 (one each for TCP and the classifier hash list). That is the
3132404c9e6SPatrick Mooney * case and clustering callbacks are not enabled, the conn can be
3142404c9e6SPatrick Mooney * removed under the fanout lock and avoid clean-up under the squeue.
3152404c9e6SPatrick Mooney *
3162404c9e6SPatrick Mooney * This optimization is forgone when clustering is enabled since the
3172404c9e6SPatrick Mooney * clustering callback must be made before setting the CONDEMNED flag
3182404c9e6SPatrick Mooney * and after dropping all locks
3192404c9e6SPatrick Mooney *
3202404c9e6SPatrick Mooney * See the comments in tcp_closei_local for additional information
3212404c9e6SPatrick Mooney * regarding the refcnt logic.
3222404c9e6SPatrick Mooney */
3232404c9e6SPatrick Mooney if (mutex_tryenter(lock)) {
3242404c9e6SPatrick Mooney mutex_enter(&connp->conn_lock);
3252404c9e6SPatrick Mooney if (connp->conn_ref == 2 && cl_inet_disconnect == NULL) {
3262404c9e6SPatrick Mooney ipcl_hash_remove_locked(connp, connp->conn_fanout);
3272404c9e6SPatrick Mooney /*
3282404c9e6SPatrick Mooney * Set the CONDEMNED flag now itself so that the refcnt
3292404c9e6SPatrick Mooney * cannot increase due to any walker.
3302404c9e6SPatrick Mooney */
3312404c9e6SPatrick Mooney connp->conn_state_flags |= CONN_CONDEMNED;
3322404c9e6SPatrick Mooney mutex_exit(&connp->conn_lock);
3332404c9e6SPatrick Mooney mutex_exit(lock);
3342404c9e6SPatrick Mooney if (tsp->tcp_free_list_cnt < tcp_free_list_max_cnt) {
3352404c9e6SPatrick Mooney /*
3362404c9e6SPatrick Mooney * Add to head of tcp_free_list
3372404c9e6SPatrick Mooney */
3382404c9e6SPatrick Mooney tcp_cleanup(tcp);
3392404c9e6SPatrick Mooney ASSERT(connp->conn_latch == NULL);
3402404c9e6SPatrick Mooney ASSERT(connp->conn_policy == NULL);
3412404c9e6SPatrick Mooney ASSERT(tcp->tcp_tcps == NULL);
3422404c9e6SPatrick Mooney ASSERT(connp->conn_netstack == NULL);
3432404c9e6SPatrick Mooney
3442404c9e6SPatrick Mooney tcp->tcp_time_wait_next = tsp->tcp_free_list;
3452404c9e6SPatrick Mooney tcp->tcp_in_free_list = B_TRUE;
3462404c9e6SPatrick Mooney tsp->tcp_free_list = tcp;
3472404c9e6SPatrick Mooney tsp->tcp_free_list_cnt++;
3482404c9e6SPatrick Mooney } else {
3492404c9e6SPatrick Mooney /*
3502404c9e6SPatrick Mooney * Do not add to tcp_free_list
3512404c9e6SPatrick Mooney */
3522404c9e6SPatrick Mooney tcp_bind_hash_remove(tcp);
3532404c9e6SPatrick Mooney ixa_cleanup(tcp->tcp_connp->conn_ixa);
3542404c9e6SPatrick Mooney tcp_ipsec_cleanup(tcp);
3552404c9e6SPatrick Mooney CONN_DEC_REF(tcp->tcp_connp);
3562404c9e6SPatrick Mooney }
3572404c9e6SPatrick Mooney
3582404c9e6SPatrick Mooney /*
3592404c9e6SPatrick Mooney * With the fast-path complete, we can bail.
3602404c9e6SPatrick Mooney */
3612404c9e6SPatrick Mooney return;
3622404c9e6SPatrick Mooney } else {
3632404c9e6SPatrick Mooney /*
3642404c9e6SPatrick Mooney * Fall back to slow path.
3652404c9e6SPatrick Mooney */
3662404c9e6SPatrick Mooney CONN_INC_REF_LOCKED(connp);
3672404c9e6SPatrick Mooney mutex_exit(&connp->conn_lock);
3682404c9e6SPatrick Mooney mutex_exit(lock);
3692404c9e6SPatrick Mooney }
3702404c9e6SPatrick Mooney } else {
3712404c9e6SPatrick Mooney CONN_INC_REF(connp);
3722404c9e6SPatrick Mooney }
3732404c9e6SPatrick Mooney
3742404c9e6SPatrick Mooney /*
3752404c9e6SPatrick Mooney * We can reuse the closemp here since conn has detached (otherwise we
3762404c9e6SPatrick Mooney * wouldn't even be in time_wait list). It is safe to change
3772404c9e6SPatrick Mooney * tcp_closemp_used without taking a lock as no other thread can
3782404c9e6SPatrick Mooney * concurrently access it at this point in the connection lifecycle.
3792404c9e6SPatrick Mooney */
3802404c9e6SPatrick Mooney if (tcp->tcp_closemp.b_prev == NULL) {
3812404c9e6SPatrick Mooney tcp->tcp_closemp_used = B_TRUE;
3822404c9e6SPatrick Mooney } else {
3832404c9e6SPatrick Mooney cmn_err(CE_PANIC,
3842404c9e6SPatrick Mooney "tcp_timewait_collector: concurrent use of tcp_closemp: "
3852404c9e6SPatrick Mooney "connp %p tcp %p\n", (void *)connp, (void *)tcp);
3862404c9e6SPatrick Mooney }
3872404c9e6SPatrick Mooney
3882404c9e6SPatrick Mooney TCP_DEBUG_GETPCSTACK(tcp->tcmp_stk, 15);
3892404c9e6SPatrick Mooney mp = &tcp->tcp_closemp;
3902404c9e6SPatrick Mooney mutex_exit(&tsp->tcp_time_wait_lock);
3912404c9e6SPatrick Mooney SQUEUE_ENTER_ONE(connp->conn_sqp, mp, tcp_timewait_close, connp, NULL,
3922404c9e6SPatrick Mooney SQ_FILL, SQTAG_TCP_TIMEWAIT);
3932404c9e6SPatrick Mooney mutex_enter(&tsp->tcp_time_wait_lock);
3942404c9e6SPatrick Mooney }
3952404c9e6SPatrick Mooney
396721fffe3SKacheong Poon /*
3972404c9e6SPatrick Mooney * Purge any tcp_t instances associated with this squeue which have expired
3982404c9e6SPatrick Mooney * from the TIME_WAIT state.
399721fffe3SKacheong Poon */
400721fffe3SKacheong Poon void
tcp_time_wait_collector(void * arg)401721fffe3SKacheong Poon tcp_time_wait_collector(void *arg)
402721fffe3SKacheong Poon {
403721fffe3SKacheong Poon tcp_t *tcp;
404c79a72d7SPatrick Mooney int64_t now, sched_active, sched_cur, sched_new;
4052404c9e6SPatrick Mooney unsigned int idx;
406721fffe3SKacheong Poon
407721fffe3SKacheong Poon squeue_t *sqp = (squeue_t *)arg;
4082404c9e6SPatrick Mooney tcp_squeue_priv_t *tsp =
409721fffe3SKacheong Poon *((tcp_squeue_priv_t **)squeue_getprivate(sqp, SQPRIVATE_TCP));
410721fffe3SKacheong Poon
4112404c9e6SPatrick Mooney mutex_enter(&tsp->tcp_time_wait_lock);
4122404c9e6SPatrick Mooney
4132404c9e6SPatrick Mooney /*
4142404c9e6SPatrick Mooney * Because of timer scheduling complexity and the fact that the
4152404c9e6SPatrick Mooney * tcp_time_wait_lock is dropped during tcp_time_wait_purge, it is
4162404c9e6SPatrick Mooney * possible for multiple tcp_time_wait_collector threads to run against
4172404c9e6SPatrick Mooney * the same squeue. This flag is used to exclude other collectors from
4182404c9e6SPatrick Mooney * the squeue during execution.
4192404c9e6SPatrick Mooney */
4202404c9e6SPatrick Mooney if (tsp->tcp_time_wait_collector_active) {
4212404c9e6SPatrick Mooney mutex_exit(&tsp->tcp_time_wait_lock);
4222404c9e6SPatrick Mooney return;
4232404c9e6SPatrick Mooney }
4242404c9e6SPatrick Mooney tsp->tcp_time_wait_collector_active = B_TRUE;
425721fffe3SKacheong Poon
426c79a72d7SPatrick Mooney /*
427c79a72d7SPatrick Mooney * After its assignment here, the value of sched_active must not be
428c79a72d7SPatrick Mooney * altered as it is used to validate the state of the
429c79a72d7SPatrick Mooney * tcp_time_wait_collector callout schedule for this squeue.
430c79a72d7SPatrick Mooney *
431c79a72d7SPatrick Mooney * The same does not hold true of sched_cur, which holds the timestamp
432c79a72d7SPatrick Mooney * of the bucket undergoing processing. While it is initially equal to
433c79a72d7SPatrick Mooney * sched_active, certain conditions below can walk it forward,
434c79a72d7SPatrick Mooney * triggering the retry loop.
435c79a72d7SPatrick Mooney */
436c79a72d7SPatrick Mooney sched_cur = sched_active = tsp->tcp_time_wait_schedule;
437c79a72d7SPatrick Mooney
4382404c9e6SPatrick Mooney /*
4392404c9e6SPatrick Mooney * Purge the free list if necessary
4402404c9e6SPatrick Mooney */
4412404c9e6SPatrick Mooney if (tsp->tcp_free_list != NULL) {
442721fffe3SKacheong Poon TCP_G_STAT(tcp_freelist_cleanup);
4432404c9e6SPatrick Mooney while ((tcp = tsp->tcp_free_list) != NULL) {
4442404c9e6SPatrick Mooney tsp->tcp_free_list = tcp->tcp_time_wait_next;
445721fffe3SKacheong Poon tcp->tcp_time_wait_next = NULL;
4462404c9e6SPatrick Mooney tsp->tcp_free_list_cnt--;
447721fffe3SKacheong Poon ASSERT(tcp->tcp_tcps == NULL);
448721fffe3SKacheong Poon CONN_DEC_REF(tcp->tcp_connp);
449721fffe3SKacheong Poon }
4502404c9e6SPatrick Mooney ASSERT(tsp->tcp_free_list_cnt == 0);
451721fffe3SKacheong Poon }
452721fffe3SKacheong Poon
453721fffe3SKacheong Poon /*
4542404c9e6SPatrick Mooney * If there are no connections pending, clear timer-related state to be
4552404c9e6SPatrick Mooney * reinitialized by the next caller.
456721fffe3SKacheong Poon */
4572404c9e6SPatrick Mooney if (tsp->tcp_time_wait_cnt == 0) {
4582404c9e6SPatrick Mooney tsp->tcp_time_wait_offset = 0;
4592404c9e6SPatrick Mooney tsp->tcp_time_wait_schedule = 0;
4602404c9e6SPatrick Mooney tsp->tcp_time_wait_tid = 0;
4612404c9e6SPatrick Mooney tsp->tcp_time_wait_collector_active = B_FALSE;
4622404c9e6SPatrick Mooney mutex_exit(&tsp->tcp_time_wait_lock);
4632404c9e6SPatrick Mooney return;
4642404c9e6SPatrick Mooney }
4652404c9e6SPatrick Mooney
466c79a72d7SPatrick Mooney retry:
4672404c9e6SPatrick Mooney /*
4682404c9e6SPatrick Mooney * Grab the bucket which we were scheduled to cleanse.
4692404c9e6SPatrick Mooney */
470c79a72d7SPatrick Mooney idx = TW_BUCKET(sched_cur - 1);
4712404c9e6SPatrick Mooney now = ddi_get_lbolt64() - tsp->tcp_time_wait_offset;
4722404c9e6SPatrick Mooney tcp = tsp->tcp_time_wait_bucket[idx];
4732404c9e6SPatrick Mooney
4742404c9e6SPatrick Mooney while (tcp != NULL) {
475721fffe3SKacheong Poon /*
4762404c9e6SPatrick Mooney * Since the bucket count is sized to prevent wrap-around
4772404c9e6SPatrick Mooney * during typical operation and timers are schedule to process
4782404c9e6SPatrick Mooney * buckets with only expired connections, there is only one
4792404c9e6SPatrick Mooney * reason to encounter a connection expiring in the future:
4802404c9e6SPatrick Mooney * The tcp_time_wait_collector thread has been so delayed in
4812404c9e6SPatrick Mooney * its processing that connections have wrapped around the
4822404c9e6SPatrick Mooney * timing wheel into this bucket.
4832404c9e6SPatrick Mooney *
4842404c9e6SPatrick Mooney * In that case, the remaining entires in the bucket can be
4852404c9e6SPatrick Mooney * ignored since, being appended sequentially, they should all
4862404c9e6SPatrick Mooney * expire in the future.
487721fffe3SKacheong Poon */
4882404c9e6SPatrick Mooney if (now < tcp->tcp_time_wait_expire) {
489721fffe3SKacheong Poon break;
4902404c9e6SPatrick Mooney }
491721fffe3SKacheong Poon
4922404c9e6SPatrick Mooney /*
4932404c9e6SPatrick Mooney * Pull the connection out of the bucket.
4942404c9e6SPatrick Mooney */
4952404c9e6SPatrick Mooney VERIFY(tcp_time_wait_remove(tcp, tsp));
496721fffe3SKacheong Poon
497721fffe3SKacheong Poon /*
4982404c9e6SPatrick Mooney * Purge the connection.
499721fffe3SKacheong Poon *
5002404c9e6SPatrick Mooney * While tcp_time_wait_lock will be temporarily dropped as part
5012404c9e6SPatrick Mooney * of the process, there is no risk of the timer being
5022404c9e6SPatrick Mooney * (re)scheduled while the collector is running since a value
5032404c9e6SPatrick Mooney * corresponding to the past is left in tcp_time_wait_schedule.
504721fffe3SKacheong Poon */
5052404c9e6SPatrick Mooney tcp_time_wait_purge(tcp, tsp);
506721fffe3SKacheong Poon
5072404c9e6SPatrick Mooney /*
5082404c9e6SPatrick Mooney * Because tcp_time_wait_remove clears the tcp_time_wait_next
5092404c9e6SPatrick Mooney * field, the next item must be grabbed directly from the
5102404c9e6SPatrick Mooney * bucket itself.
5112404c9e6SPatrick Mooney */
5122404c9e6SPatrick Mooney tcp = tsp->tcp_time_wait_bucket[idx];
5132404c9e6SPatrick Mooney }
514721fffe3SKacheong Poon
5152404c9e6SPatrick Mooney if (tsp->tcp_time_wait_cnt == 0) {
5162404c9e6SPatrick Mooney /*
5172404c9e6SPatrick Mooney * There is not a need for the collector to schedule a new
5182404c9e6SPatrick Mooney * timer if no pending items remain. The timer state can be
5192404c9e6SPatrick Mooney * cleared only if it was untouched while the collector dropped
5202404c9e6SPatrick Mooney * its locks during tcp_time_wait_purge.
5212404c9e6SPatrick Mooney */
522c79a72d7SPatrick Mooney if (tsp->tcp_time_wait_schedule == sched_active) {
5232404c9e6SPatrick Mooney tsp->tcp_time_wait_offset = 0;
5242404c9e6SPatrick Mooney tsp->tcp_time_wait_schedule = 0;
5252404c9e6SPatrick Mooney tsp->tcp_time_wait_tid = 0;
5262404c9e6SPatrick Mooney }
5272404c9e6SPatrick Mooney tsp->tcp_time_wait_collector_active = B_FALSE;
5282404c9e6SPatrick Mooney mutex_exit(&tsp->tcp_time_wait_lock);
5292404c9e6SPatrick Mooney return;
5302404c9e6SPatrick Mooney } else {
5312404c9e6SPatrick Mooney unsigned int nidx;
5322404c9e6SPatrick Mooney
5332404c9e6SPatrick Mooney /*
5342404c9e6SPatrick Mooney * Locate the next bucket containing entries.
5352404c9e6SPatrick Mooney */
536c79a72d7SPatrick Mooney sched_new = sched_cur + MSEC_TO_TICK(TCP_TIME_WAIT_DELAY);
5372404c9e6SPatrick Mooney nidx = TW_BUCKET_NEXT(idx);
5382404c9e6SPatrick Mooney while (tsp->tcp_time_wait_bucket[nidx] == NULL) {
5392404c9e6SPatrick Mooney if (nidx == idx) {
5402404c9e6SPatrick Mooney break;
5412404c9e6SPatrick Mooney }
5422404c9e6SPatrick Mooney nidx = TW_BUCKET_NEXT(nidx);
543c79a72d7SPatrick Mooney sched_new += MSEC_TO_TICK(TCP_TIME_WAIT_DELAY);
544721fffe3SKacheong Poon }
5452404c9e6SPatrick Mooney ASSERT(tsp->tcp_time_wait_bucket[nidx] != NULL);
546721fffe3SKacheong Poon }
547721fffe3SKacheong Poon
5482404c9e6SPatrick Mooney /*
5492404c9e6SPatrick Mooney * It is possible that the system is under such dire load that between
5502404c9e6SPatrick Mooney * the timer scheduling and TIME_WAIT processing delay, execution
5512404c9e6SPatrick Mooney * overran the interval allocated to this bucket.
5522404c9e6SPatrick Mooney */
5532404c9e6SPatrick Mooney now = ddi_get_lbolt64() - tsp->tcp_time_wait_offset;
554c79a72d7SPatrick Mooney if (sched_new <= now) {
5552404c9e6SPatrick Mooney /*
5562404c9e6SPatrick Mooney * Attempt to right the situation by immediately performing a
5572404c9e6SPatrick Mooney * purge on the next bucket. This loop will continue as needed
5582404c9e6SPatrick Mooney * until the schedule can be pushed out ahead of the clock.
5592404c9e6SPatrick Mooney */
560c79a72d7SPatrick Mooney sched_cur = sched_new;
561c79a72d7SPatrick Mooney DTRACE_PROBE3(tcp__time__wait__overrun,
562c79a72d7SPatrick Mooney tcp_squeue_priv_t *, tsp, int64_t, sched_new, int64_t, now);
5632404c9e6SPatrick Mooney goto retry;
5642404c9e6SPatrick Mooney }
565721fffe3SKacheong Poon
56666cd0f60SKacheong Poon /*
5672404c9e6SPatrick Mooney * Another thread may have snuck in to reschedule the timer while locks
5682404c9e6SPatrick Mooney * were dropped during tcp_time_wait_purge. Defer to the running timer
5692404c9e6SPatrick Mooney * if that is the case.
57066cd0f60SKacheong Poon */
571c79a72d7SPatrick Mooney if (tsp->tcp_time_wait_schedule != sched_active) {
5722404c9e6SPatrick Mooney tsp->tcp_time_wait_collector_active = B_FALSE;
5732404c9e6SPatrick Mooney mutex_exit(&tsp->tcp_time_wait_lock);
5742404c9e6SPatrick Mooney return;
57566cd0f60SKacheong Poon }
5762404c9e6SPatrick Mooney
5772404c9e6SPatrick Mooney /*
5782404c9e6SPatrick Mooney * Schedule the next timer.
5792404c9e6SPatrick Mooney */
580c79a72d7SPatrick Mooney tsp->tcp_time_wait_schedule = sched_new;
5812404c9e6SPatrick Mooney tsp->tcp_time_wait_tid =
5822404c9e6SPatrick Mooney timeout_generic(CALLOUT_NORMAL,
5832404c9e6SPatrick Mooney tcp_time_wait_collector, sqp,
584c79a72d7SPatrick Mooney TICK_TO_NSEC(sched_new - now),
5852404c9e6SPatrick Mooney CALLOUT_TCP_RESOLUTION, CALLOUT_FLAG_ROUNDUP);
5862404c9e6SPatrick Mooney tsp->tcp_time_wait_collector_active = B_FALSE;
5872404c9e6SPatrick Mooney mutex_exit(&tsp->tcp_time_wait_lock);
588721fffe3SKacheong Poon }
589721fffe3SKacheong Poon
590721fffe3SKacheong Poon /*
591721fffe3SKacheong Poon * tcp_time_wait_processing() handles processing of incoming packets when
592721fffe3SKacheong Poon * the tcp_t is in the TIME_WAIT state.
593721fffe3SKacheong Poon *
594721fffe3SKacheong Poon * A TIME_WAIT tcp_t that has an associated open TCP end point (not in
595721fffe3SKacheong Poon * detached state) is never put on the time wait list.
596721fffe3SKacheong Poon */
597721fffe3SKacheong Poon void
tcp_time_wait_processing(tcp_t * tcp,mblk_t * mp,uint32_t seg_seq,uint32_t seg_ack,int seg_len,tcpha_t * tcpha,ip_recv_attr_t * ira)598721fffe3SKacheong Poon tcp_time_wait_processing(tcp_t *tcp, mblk_t *mp, uint32_t seg_seq,
599721fffe3SKacheong Poon uint32_t seg_ack, int seg_len, tcpha_t *tcpha, ip_recv_attr_t *ira)
600721fffe3SKacheong Poon {
601721fffe3SKacheong Poon int32_t bytes_acked;
602721fffe3SKacheong Poon int32_t gap;
603721fffe3SKacheong Poon int32_t rgap;
604721fffe3SKacheong Poon tcp_opt_t tcpopt;
605721fffe3SKacheong Poon uint_t flags;
606721fffe3SKacheong Poon uint32_t new_swnd = 0;
607721fffe3SKacheong Poon conn_t *nconnp;
608721fffe3SKacheong Poon conn_t *connp = tcp->tcp_connp;
609721fffe3SKacheong Poon tcp_stack_t *tcps = tcp->tcp_tcps;
610721fffe3SKacheong Poon
611*a2f04351SSebastien Roy TCPS_BUMP_MIB(tcps, tcpHCInSegs);
612721fffe3SKacheong Poon DTRACE_PROBE2(tcp__trace__recv, mblk_t *, mp, tcp_t *, tcp);
613721fffe3SKacheong Poon
614721fffe3SKacheong Poon flags = (unsigned int)tcpha->tha_flags & 0xFF;
615721fffe3SKacheong Poon new_swnd = ntohs(tcpha->tha_win) <<
616721fffe3SKacheong Poon ((tcpha->tha_flags & TH_SYN) ? 0 : tcp->tcp_snd_ws);
6171f183ba0SLauri Tirkkonen
618143b26e0SSteve Gonczi boolean_t keepalive = (seg_len == 0 || seg_len == 1) &&
619143b26e0SSteve Gonczi (seg_seq + 1 == tcp->tcp_rnxt);
620143b26e0SSteve Gonczi if (tcp->tcp_snd_ts_ok && !(flags & TH_RST) && !keepalive) {
6211f183ba0SLauri Tirkkonen int options;
6221f183ba0SLauri Tirkkonen if (tcp->tcp_snd_sack_ok)
6231f183ba0SLauri Tirkkonen tcpopt.tcp = tcp;
6241f183ba0SLauri Tirkkonen else
6251f183ba0SLauri Tirkkonen tcpopt.tcp = NULL;
6261f183ba0SLauri Tirkkonen options = tcp_parse_options(tcpha, &tcpopt);
6271f183ba0SLauri Tirkkonen if (!(options & TCP_OPT_TSTAMP_PRESENT)) {
6281f183ba0SLauri Tirkkonen DTRACE_TCP1(droppedtimestamp, tcp_t *, tcp);
6291f183ba0SLauri Tirkkonen goto done;
6301f183ba0SLauri Tirkkonen } else if (!tcp_paws_check(tcp, &tcpopt)) {
6311f183ba0SLauri Tirkkonen tcp_xmit_ctl(NULL, tcp, tcp->tcp_snxt, tcp->tcp_rnxt,
6321f183ba0SLauri Tirkkonen TH_ACK);
633721fffe3SKacheong Poon goto done;
634721fffe3SKacheong Poon }
635721fffe3SKacheong Poon }
636721fffe3SKacheong Poon gap = seg_seq - tcp->tcp_rnxt;
637721fffe3SKacheong Poon rgap = tcp->tcp_rwnd - (gap + seg_len);
638721fffe3SKacheong Poon if (gap < 0) {
639721fffe3SKacheong Poon TCPS_BUMP_MIB(tcps, tcpInDataDupSegs);
640721fffe3SKacheong Poon TCPS_UPDATE_MIB(tcps, tcpInDataDupBytes,
641721fffe3SKacheong Poon (seg_len > -gap ? -gap : seg_len));
642721fffe3SKacheong Poon seg_len += gap;
643721fffe3SKacheong Poon if (seg_len < 0 || (seg_len == 0 && !(flags & TH_FIN))) {
644721fffe3SKacheong Poon if (flags & TH_RST) {
645721fffe3SKacheong Poon goto done;
646721fffe3SKacheong Poon }
647721fffe3SKacheong Poon if ((flags & TH_FIN) && seg_len == -1) {
648721fffe3SKacheong Poon /*
649721fffe3SKacheong Poon * When TCP receives a duplicate FIN in
650721fffe3SKacheong Poon * TIME_WAIT state, restart the 2 MSL timer.
651721fffe3SKacheong Poon * See page 73 in RFC 793. Make sure this TCP
652721fffe3SKacheong Poon * is already on the TIME_WAIT list. If not,
653721fffe3SKacheong Poon * just restart the timer.
654721fffe3SKacheong Poon */
655721fffe3SKacheong Poon if (TCP_IS_DETACHED(tcp)) {
656721fffe3SKacheong Poon if (tcp_time_wait_remove(tcp, NULL) ==
657721fffe3SKacheong Poon B_TRUE) {
658721fffe3SKacheong Poon tcp_time_wait_append(tcp);
659721fffe3SKacheong Poon TCP_DBGSTAT(tcps,
660721fffe3SKacheong Poon tcp_rput_time_wait);
661721fffe3SKacheong Poon }
662721fffe3SKacheong Poon } else {
663721fffe3SKacheong Poon ASSERT(tcp != NULL);
664721fffe3SKacheong Poon TCP_TIMER_RESTART(tcp,
665721fffe3SKacheong Poon tcps->tcps_time_wait_interval);
666721fffe3SKacheong Poon }
667721fffe3SKacheong Poon tcp_xmit_ctl(NULL, tcp, tcp->tcp_snxt,
668721fffe3SKacheong Poon tcp->tcp_rnxt, TH_ACK);
669721fffe3SKacheong Poon goto done;
670721fffe3SKacheong Poon }
671721fffe3SKacheong Poon flags |= TH_ACK_NEEDED;
672721fffe3SKacheong Poon seg_len = 0;
673721fffe3SKacheong Poon goto process_ack;
674721fffe3SKacheong Poon }
675721fffe3SKacheong Poon
676721fffe3SKacheong Poon /* Fix seg_seq, and chew the gap off the front. */
677721fffe3SKacheong Poon seg_seq = tcp->tcp_rnxt;
678721fffe3SKacheong Poon }
679721fffe3SKacheong Poon
680721fffe3SKacheong Poon if ((flags & TH_SYN) && gap > 0 && rgap < 0) {
681721fffe3SKacheong Poon /*
682721fffe3SKacheong Poon * Make sure that when we accept the connection, pick
683c0e6663fSJerry Jelinek * an ISS greater than (tcp_snxt + tcp_iss_incr/2) for the
684721fffe3SKacheong Poon * old connection.
685721fffe3SKacheong Poon *
686721fffe3SKacheong Poon * The next ISS generated is equal to tcp_iss_incr_extra
687c0e6663fSJerry Jelinek * + tcp_iss_incr/2 + other components depending on the
688721fffe3SKacheong Poon * value of tcp_strong_iss. We pre-calculate the new
689721fffe3SKacheong Poon * ISS here and compare with tcp_snxt to determine if
690721fffe3SKacheong Poon * we need to make adjustment to tcp_iss_incr_extra.
691721fffe3SKacheong Poon *
692721fffe3SKacheong Poon * The above calculation is ugly and is a
693721fffe3SKacheong Poon * waste of CPU cycles...
694721fffe3SKacheong Poon */
695721fffe3SKacheong Poon uint32_t new_iss = tcps->tcps_iss_incr_extra;
696721fffe3SKacheong Poon int32_t adj;
697721fffe3SKacheong Poon ip_stack_t *ipst = tcps->tcps_netstack->netstack_ip;
698721fffe3SKacheong Poon
699721fffe3SKacheong Poon switch (tcps->tcps_strong_iss) {
700721fffe3SKacheong Poon case 2: {
701721fffe3SKacheong Poon /* Add time and MD5 components. */
702721fffe3SKacheong Poon uint32_t answer[4];
703721fffe3SKacheong Poon struct {
704721fffe3SKacheong Poon uint32_t ports;
705721fffe3SKacheong Poon in6_addr_t src;
706721fffe3SKacheong Poon in6_addr_t dst;
707721fffe3SKacheong Poon } arg;
708721fffe3SKacheong Poon MD5_CTX context;
709721fffe3SKacheong Poon
710721fffe3SKacheong Poon mutex_enter(&tcps->tcps_iss_key_lock);
711721fffe3SKacheong Poon context = tcps->tcps_iss_key;
712721fffe3SKacheong Poon mutex_exit(&tcps->tcps_iss_key_lock);
713721fffe3SKacheong Poon arg.ports = connp->conn_ports;
714721fffe3SKacheong Poon /* We use MAPPED addresses in tcp_iss_init */
715721fffe3SKacheong Poon arg.src = connp->conn_laddr_v6;
716721fffe3SKacheong Poon arg.dst = connp->conn_faddr_v6;
717721fffe3SKacheong Poon MD5Update(&context, (uchar_t *)&arg,
718721fffe3SKacheong Poon sizeof (arg));
719721fffe3SKacheong Poon MD5Final((uchar_t *)answer, &context);
720721fffe3SKacheong Poon answer[0] ^= answer[1] ^ answer[2] ^ answer[3];
721721fffe3SKacheong Poon new_iss += (gethrtime() >> ISS_NSEC_SHT) + answer[0];
722721fffe3SKacheong Poon break;
723721fffe3SKacheong Poon }
724721fffe3SKacheong Poon case 1:
725721fffe3SKacheong Poon /* Add time component and min random (i.e. 1). */
726721fffe3SKacheong Poon new_iss += (gethrtime() >> ISS_NSEC_SHT) + 1;
727721fffe3SKacheong Poon break;
728721fffe3SKacheong Poon default:
729721fffe3SKacheong Poon /* Add only time component. */
730c0e6663fSJerry Jelinek new_iss += (uint32_t)gethrestime_sec() *
731c0e6663fSJerry Jelinek tcps->tcps_iss_incr;
732721fffe3SKacheong Poon break;
733721fffe3SKacheong Poon }
734721fffe3SKacheong Poon if ((adj = (int32_t)(tcp->tcp_snxt - new_iss)) > 0) {
735721fffe3SKacheong Poon /*
736c0e6663fSJerry Jelinek * New ISS not guaranteed to be tcp_iss_incr/2
737721fffe3SKacheong Poon * ahead of the current tcp_snxt, so add the
738721fffe3SKacheong Poon * difference to tcp_iss_incr_extra.
739721fffe3SKacheong Poon */
740721fffe3SKacheong Poon tcps->tcps_iss_incr_extra += adj;
741721fffe3SKacheong Poon }
742721fffe3SKacheong Poon /*
743721fffe3SKacheong Poon * If tcp_clean_death() can not perform the task now,
744721fffe3SKacheong Poon * drop the SYN packet and let the other side re-xmit.
745721fffe3SKacheong Poon * Otherwise pass the SYN packet back in, since the
746721fffe3SKacheong Poon * old tcp state has been cleaned up or freed.
747721fffe3SKacheong Poon */
748721fffe3SKacheong Poon if (tcp_clean_death(tcp, 0) == -1)
749721fffe3SKacheong Poon goto done;
750721fffe3SKacheong Poon nconnp = ipcl_classify(mp, ira, ipst);
751721fffe3SKacheong Poon if (nconnp != NULL) {
752721fffe3SKacheong Poon TCP_STAT(tcps, tcp_time_wait_syn_success);
753721fffe3SKacheong Poon /* Drops ref on nconnp */
754721fffe3SKacheong Poon tcp_reinput(nconnp, mp, ira, ipst);
755721fffe3SKacheong Poon return;
756721fffe3SKacheong Poon }
757721fffe3SKacheong Poon goto done;
758721fffe3SKacheong Poon }
759721fffe3SKacheong Poon
760721fffe3SKacheong Poon /*
761721fffe3SKacheong Poon * rgap is the amount of stuff received out of window. A negative
762721fffe3SKacheong Poon * value is the amount out of window.
763721fffe3SKacheong Poon */
764721fffe3SKacheong Poon if (rgap < 0) {
765721fffe3SKacheong Poon TCPS_BUMP_MIB(tcps, tcpInDataPastWinSegs);
766721fffe3SKacheong Poon TCPS_UPDATE_MIB(tcps, tcpInDataPastWinBytes, -rgap);
767721fffe3SKacheong Poon /* Fix seg_len and make sure there is something left. */
768721fffe3SKacheong Poon seg_len += rgap;
769721fffe3SKacheong Poon if (seg_len <= 0) {
770721fffe3SKacheong Poon if (flags & TH_RST) {
771721fffe3SKacheong Poon goto done;
772721fffe3SKacheong Poon }
773721fffe3SKacheong Poon flags |= TH_ACK_NEEDED;
774721fffe3SKacheong Poon seg_len = 0;
775721fffe3SKacheong Poon goto process_ack;
776721fffe3SKacheong Poon }
777721fffe3SKacheong Poon }
778721fffe3SKacheong Poon /*
7791f183ba0SLauri Tirkkonen * Check whether we can update tcp_ts_recent. This test is from RFC
7801f183ba0SLauri Tirkkonen * 7323, section 5.3.
781721fffe3SKacheong Poon */
7821f183ba0SLauri Tirkkonen if (tcp->tcp_snd_ts_ok && !(flags & TH_RST) &&
783721fffe3SKacheong Poon TSTMP_GEQ(tcpopt.tcp_opt_ts_val, tcp->tcp_ts_recent) &&
784721fffe3SKacheong Poon SEQ_LEQ(seg_seq, tcp->tcp_rack)) {
785721fffe3SKacheong Poon tcp->tcp_ts_recent = tcpopt.tcp_opt_ts_val;
786721fffe3SKacheong Poon tcp->tcp_last_rcv_lbolt = ddi_get_lbolt64();
787721fffe3SKacheong Poon }
788721fffe3SKacheong Poon
789721fffe3SKacheong Poon if (seg_seq != tcp->tcp_rnxt && seg_len > 0) {
790721fffe3SKacheong Poon /* Always ack out of order packets */
791721fffe3SKacheong Poon flags |= TH_ACK_NEEDED;
792721fffe3SKacheong Poon seg_len = 0;
793721fffe3SKacheong Poon } else if (seg_len > 0) {
794721fffe3SKacheong Poon TCPS_BUMP_MIB(tcps, tcpInClosed);
795721fffe3SKacheong Poon TCPS_BUMP_MIB(tcps, tcpInDataInorderSegs);
796721fffe3SKacheong Poon TCPS_UPDATE_MIB(tcps, tcpInDataInorderBytes, seg_len);
797*a2f04351SSebastien Roy tcp->tcp_cs.tcp_in_data_inorder_segs++;
798*a2f04351SSebastien Roy tcp->tcp_cs.tcp_in_data_inorder_bytes += seg_len;
799721fffe3SKacheong Poon }
800721fffe3SKacheong Poon if (flags & TH_RST) {
801721fffe3SKacheong Poon (void) tcp_clean_death(tcp, 0);
802721fffe3SKacheong Poon goto done;
803721fffe3SKacheong Poon }
804721fffe3SKacheong Poon if (flags & TH_SYN) {
805721fffe3SKacheong Poon tcp_xmit_ctl("TH_SYN", tcp, seg_ack, seg_seq + 1,
806721fffe3SKacheong Poon TH_RST|TH_ACK);
807721fffe3SKacheong Poon /*
808721fffe3SKacheong Poon * Do not delete the TCP structure if it is in
809721fffe3SKacheong Poon * TIME_WAIT state. Refer to RFC 1122, 4.2.2.13.
810721fffe3SKacheong Poon */
811721fffe3SKacheong Poon goto done;
812721fffe3SKacheong Poon }
813721fffe3SKacheong Poon process_ack:
814721fffe3SKacheong Poon if (flags & TH_ACK) {
815721fffe3SKacheong Poon bytes_acked = (int)(seg_ack - tcp->tcp_suna);
816721fffe3SKacheong Poon if (bytes_acked <= 0) {
817721fffe3SKacheong Poon if (bytes_acked == 0 && seg_len == 0 &&
818721fffe3SKacheong Poon new_swnd == tcp->tcp_swnd)
819721fffe3SKacheong Poon TCPS_BUMP_MIB(tcps, tcpInDupAck);
820721fffe3SKacheong Poon } else {
821721fffe3SKacheong Poon /* Acks something not sent */
822721fffe3SKacheong Poon flags |= TH_ACK_NEEDED;
823721fffe3SKacheong Poon }
824721fffe3SKacheong Poon }
825721fffe3SKacheong Poon if (flags & TH_ACK_NEEDED) {
826721fffe3SKacheong Poon /*
827721fffe3SKacheong Poon * Time to send an ack for some reason.
828721fffe3SKacheong Poon */
829721fffe3SKacheong Poon tcp_xmit_ctl(NULL, tcp, tcp->tcp_snxt,
830721fffe3SKacheong Poon tcp->tcp_rnxt, TH_ACK);
831721fffe3SKacheong Poon }
832721fffe3SKacheong Poon done:
833721fffe3SKacheong Poon freemsg(mp);
834721fffe3SKacheong Poon }
835