1721fffe3SKacheong Poon /*
2721fffe3SKacheong Poon  * CDDL HEADER START
3721fffe3SKacheong Poon  *
4721fffe3SKacheong Poon  * The contents of this file are subject to the terms of the
5721fffe3SKacheong Poon  * Common Development and Distribution License (the "License").
6721fffe3SKacheong Poon  * You may not use this file except in compliance with the License.
7721fffe3SKacheong Poon  *
8721fffe3SKacheong Poon  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9721fffe3SKacheong Poon  * or http://www.opensolaris.org/os/licensing.
10721fffe3SKacheong Poon  * See the License for the specific language governing permissions
11721fffe3SKacheong Poon  * and limitations under the License.
12721fffe3SKacheong Poon  *
13721fffe3SKacheong Poon  * When distributing Covered Code, include this CDDL HEADER in each
14721fffe3SKacheong Poon  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15721fffe3SKacheong Poon  * If applicable, add the following below this CDDL HEADER, with the
16721fffe3SKacheong Poon  * fields enclosed by brackets "[]" replaced with your own identifying
17721fffe3SKacheong Poon  * information: Portions Copyright [yyyy] [name of copyright owner]
18721fffe3SKacheong Poon  *
19721fffe3SKacheong Poon  * CDDL HEADER END
20721fffe3SKacheong Poon  */
21721fffe3SKacheong Poon 
22721fffe3SKacheong Poon /*
2366cd0f60SKacheong Poon  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
243d0a255cSGarrett D'Amore  * Copyright (c) 2011 Nexenta Systems, Inc. All rights reserved.
257f2dc2cfSBryan Cantrill  * Copyright 2011 Joyent, Inc.  All rights reserved.
2645a4b79dSSebastien Roy  * Copyright (c) 2014, 2017 by Delphix. All rights reserved.
27721fffe3SKacheong Poon  */
28721fffe3SKacheong Poon 
29721fffe3SKacheong Poon #include <sys/types.h>
30721fffe3SKacheong Poon #include <sys/strlog.h>
31721fffe3SKacheong Poon #include <sys/strsun.h>
32721fffe3SKacheong Poon #include <sys/squeue_impl.h>
33721fffe3SKacheong Poon #include <sys/squeue.h>
34721fffe3SKacheong Poon #include <sys/callo.h>
35721fffe3SKacheong Poon #include <sys/strsubr.h>
36721fffe3SKacheong Poon 
37721fffe3SKacheong Poon #include <inet/common.h>
38721fffe3SKacheong Poon #include <inet/ip.h>
39721fffe3SKacheong Poon #include <inet/ip_ire.h>
40721fffe3SKacheong Poon #include <inet/ip_rts.h>
41721fffe3SKacheong Poon #include <inet/tcp.h>
42721fffe3SKacheong Poon #include <inet/tcp_impl.h>
43721fffe3SKacheong Poon 
44721fffe3SKacheong Poon /*
45721fffe3SKacheong Poon  * Implementation of TCP Timers.
46721fffe3SKacheong Poon  * =============================
47721fffe3SKacheong Poon  *
48721fffe3SKacheong Poon  * INTERFACE:
49721fffe3SKacheong Poon  *
50721fffe3SKacheong Poon  * There are two basic functions dealing with tcp timers:
51721fffe3SKacheong Poon  *
52721fffe3SKacheong Poon  *	timeout_id_t	tcp_timeout(connp, func, time)
53*57d1724dSToomas Soome  *	clock_t		tcp_timeout_cancel(connp, timeout_id)
54721fffe3SKacheong Poon  *	TCP_TIMER_RESTART(tcp, intvl)
55721fffe3SKacheong Poon  *
56721fffe3SKacheong Poon  * tcp_timeout() starts a timer for the 'tcp' instance arranging to call 'func'
57721fffe3SKacheong Poon  * after 'time' ticks passed. The function called by timeout() must adhere to
58721fffe3SKacheong Poon  * the same restrictions as a driver soft interrupt handler - it must not sleep
59721fffe3SKacheong Poon  * or call other functions that might sleep. The value returned is the opaque
60721fffe3SKacheong Poon  * non-zero timeout identifier that can be passed to tcp_timeout_cancel() to
61721fffe3SKacheong Poon  * cancel the request. The call to tcp_timeout() may fail in which case it
62721fffe3SKacheong Poon  * returns zero. This is different from the timeout(9F) function which never
63721fffe3SKacheong Poon  * fails.
64721fffe3SKacheong Poon  *
65721fffe3SKacheong Poon  * The call-back function 'func' always receives 'connp' as its single
66721fffe3SKacheong Poon  * argument. It is always executed in the squeue corresponding to the tcp
67721fffe3SKacheong Poon  * structure. The tcp structure is guaranteed to be present at the time the
68721fffe3SKacheong Poon  * call-back is called.
69721fffe3SKacheong Poon  *
70721fffe3SKacheong Poon  * NOTE: The call-back function 'func' is never called if tcp is in
71*57d1724dSToomas Soome  *	the TCPS_CLOSED state.
72721fffe3SKacheong Poon  *
73721fffe3SKacheong Poon  * tcp_timeout_cancel() attempts to cancel a pending tcp_timeout()
74721fffe3SKacheong Poon  * request. locks acquired by the call-back routine should not be held across
75721fffe3SKacheong Poon  * the call to tcp_timeout_cancel() or a deadlock may result.
76721fffe3SKacheong Poon  *
777f2dc2cfSBryan Cantrill  * tcp_timeout_cancel() returns -1 if the timeout request is invalid.
787f2dc2cfSBryan Cantrill  * Otherwise, it returns an integer value greater than or equal to 0.
79721fffe3SKacheong Poon  *
80721fffe3SKacheong Poon  * NOTE: both tcp_timeout() and tcp_timeout_cancel() should always be called
81*57d1724dSToomas Soome  *	within squeue context corresponding to the tcp instance. Since the
82721fffe3SKacheong Poon  *	call-back is also called via the same squeue, there are no race
83721fffe3SKacheong Poon  *	conditions described in untimeout(9F) manual page since all calls are
84721fffe3SKacheong Poon  *	strictly serialized.
85721fffe3SKacheong Poon  *
86721fffe3SKacheong Poon  *      TCP_TIMER_RESTART() is a macro that attempts to cancel a pending timeout
87721fffe3SKacheong Poon  *	stored in tcp_timer_tid and starts a new one using
88721fffe3SKacheong Poon  *	MSEC_TO_TICK(intvl). It always uses tcp_timer() function as a call-back
89721fffe3SKacheong Poon  *	and stores the return value of tcp_timeout() in the tcp->tcp_timer_tid
90721fffe3SKacheong Poon  *	field.
91721fffe3SKacheong Poon  *
92721fffe3SKacheong Poon  * IMPLEMENTATION:
93721fffe3SKacheong Poon  *
94721fffe3SKacheong Poon  * TCP timers are implemented using three-stage process. The call to
95721fffe3SKacheong Poon  * tcp_timeout() uses timeout(9F) function to call tcp_timer_callback() function
96721fffe3SKacheong Poon  * when the timer expires. The tcp_timer_callback() arranges the call of the
97721fffe3SKacheong Poon  * tcp_timer_handler() function via squeue corresponding to the tcp
98721fffe3SKacheong Poon  * instance. The tcp_timer_handler() calls actual requested timeout call-back
99721fffe3SKacheong Poon  * and passes tcp instance as an argument to it. Information is passed between
100721fffe3SKacheong Poon  * stages using the tcp_timer_t structure which contains the connp pointer, the
101721fffe3SKacheong Poon  * tcp call-back to call and the timeout id returned by the timeout(9F).
102721fffe3SKacheong Poon  *
103721fffe3SKacheong Poon  * The tcp_timer_t structure is not used directly, it is embedded in an mblk_t -
104721fffe3SKacheong Poon  * like structure that is used to enter an squeue. The mp->b_rptr of this pseudo
105721fffe3SKacheong Poon  * mblk points to the beginning of tcp_timer_t structure. The tcp_timeout()
106721fffe3SKacheong Poon  * returns the pointer to this mblk.
107721fffe3SKacheong Poon  *
108721fffe3SKacheong Poon  * The pseudo mblk is allocated from a special tcp_timer_cache kmem cache. It
109721fffe3SKacheong Poon  * looks like a normal mblk without actual dblk attached to it.
110721fffe3SKacheong Poon  *
111721fffe3SKacheong Poon  * To optimize performance each tcp instance holds a small cache of timer
112721fffe3SKacheong Poon  * mblocks. In the current implementation it caches up to two timer mblocks per
113721fffe3SKacheong Poon  * tcp instance. The cache is preserved over tcp frees and is only freed when
114721fffe3SKacheong Poon  * the whole tcp structure is destroyed by its kmem destructor. Since all tcp
115721fffe3SKacheong Poon  * timer processing happens on a corresponding squeue, the cache manipulation
116721fffe3SKacheong Poon  * does not require any locks. Experiments show that majority of timer mblocks
117721fffe3SKacheong Poon  * allocations are satisfied from the tcp cache and do not involve kmem calls.
118721fffe3SKacheong Poon  *
119721fffe3SKacheong Poon  * The tcp_timeout() places a refhold on the connp instance which guarantees
120721fffe3SKacheong Poon  * that it will be present at the time the call-back function fires. The
121721fffe3SKacheong Poon  * tcp_timer_handler() drops the reference after calling the call-back, so the
122721fffe3SKacheong Poon  * call-back function does not need to manipulate the references explicitly.
123721fffe3SKacheong Poon  */
124721fffe3SKacheong Poon 
125721fffe3SKacheong Poon kmem_cache_t *tcp_timercache;
126721fffe3SKacheong Poon 
127721fffe3SKacheong Poon static void	tcp_ip_notify(tcp_t *);
128721fffe3SKacheong Poon static void	tcp_timer_callback(void *);
129721fffe3SKacheong Poon static void	tcp_timer_free(tcp_t *, mblk_t *);
130721fffe3SKacheong Poon static void	tcp_timer_handler(void *, mblk_t *, void *, ip_recv_attr_t *);
131721fffe3SKacheong Poon 
13266cd0f60SKacheong Poon /*
13366cd0f60SKacheong Poon  * tim is in millisec.
13466cd0f60SKacheong Poon  */
135721fffe3SKacheong Poon timeout_id_t
tcp_timeout(conn_t * connp,void (* f)(void *),hrtime_t tim)13666cd0f60SKacheong Poon tcp_timeout(conn_t *connp, void (*f)(void *), hrtime_t tim)
137721fffe3SKacheong Poon {
138721fffe3SKacheong Poon 	mblk_t *mp;
139721fffe3SKacheong Poon 	tcp_timer_t *tcpt;
140721fffe3SKacheong Poon 	tcp_t *tcp = connp->conn_tcp;
141721fffe3SKacheong Poon 
142721fffe3SKacheong Poon 	ASSERT(connp->conn_sqp != NULL);
143721fffe3SKacheong Poon 
144721fffe3SKacheong Poon 	TCP_DBGSTAT(tcp->tcp_tcps, tcp_timeout_calls);
145721fffe3SKacheong Poon 
146721fffe3SKacheong Poon 	if (tcp->tcp_timercache == NULL) {
147721fffe3SKacheong Poon 		mp = tcp_timermp_alloc(KM_NOSLEEP | KM_PANIC);
148721fffe3SKacheong Poon 	} else {
149721fffe3SKacheong Poon 		TCP_DBGSTAT(tcp->tcp_tcps, tcp_timeout_cached_alloc);
150721fffe3SKacheong Poon 		mp = tcp->tcp_timercache;
151721fffe3SKacheong Poon 		tcp->tcp_timercache = mp->b_next;
152721fffe3SKacheong Poon 		mp->b_next = NULL;
153721fffe3SKacheong Poon 		ASSERT(mp->b_wptr == NULL);
154721fffe3SKacheong Poon 	}
155721fffe3SKacheong Poon 
156721fffe3SKacheong Poon 	CONN_INC_REF(connp);
157721fffe3SKacheong Poon 	tcpt = (tcp_timer_t *)mp->b_rptr;
158721fffe3SKacheong Poon 	tcpt->connp = connp;
159721fffe3SKacheong Poon 	tcpt->tcpt_proc = f;
160721fffe3SKacheong Poon 	/*
161721fffe3SKacheong Poon 	 * TCP timers are normal timeouts. Plus, they do not require more than
162721fffe3SKacheong Poon 	 * a 10 millisecond resolution. By choosing a coarser resolution and by
163721fffe3SKacheong Poon 	 * rounding up the expiration to the next resolution boundary, we can
164721fffe3SKacheong Poon 	 * batch timers in the callout subsystem to make TCP timers more
165721fffe3SKacheong Poon 	 * efficient. The roundup also protects short timers from expiring too
166721fffe3SKacheong Poon 	 * early before they have a chance to be cancelled.
167721fffe3SKacheong Poon 	 */
168721fffe3SKacheong Poon 	tcpt->tcpt_tid = timeout_generic(CALLOUT_NORMAL, tcp_timer_callback, mp,
16966cd0f60SKacheong Poon 	    tim * MICROSEC, CALLOUT_TCP_RESOLUTION, CALLOUT_FLAG_ROUNDUP);
1707f2dc2cfSBryan Cantrill 	VERIFY(!(tcpt->tcpt_tid & CALLOUT_ID_FREE));
171721fffe3SKacheong Poon 
172721fffe3SKacheong Poon 	return ((timeout_id_t)mp);
173721fffe3SKacheong Poon }
174721fffe3SKacheong Poon 
175721fffe3SKacheong Poon static void
tcp_timer_callback(void * arg)176721fffe3SKacheong Poon tcp_timer_callback(void *arg)
177721fffe3SKacheong Poon {
178721fffe3SKacheong Poon 	mblk_t *mp = (mblk_t *)arg;
179721fffe3SKacheong Poon 	tcp_timer_t *tcpt;
180721fffe3SKacheong Poon 	conn_t	*connp;
181721fffe3SKacheong Poon 
182721fffe3SKacheong Poon 	tcpt = (tcp_timer_t *)mp->b_rptr;
183721fffe3SKacheong Poon 	connp = tcpt->connp;
184721fffe3SKacheong Poon 	SQUEUE_ENTER_ONE(connp->conn_sqp, mp, tcp_timer_handler, connp,
185721fffe3SKacheong Poon 	    NULL, SQ_FILL, SQTAG_TCP_TIMER);
186721fffe3SKacheong Poon }
187721fffe3SKacheong Poon 
188721fffe3SKacheong Poon /* ARGSUSED */
189721fffe3SKacheong Poon static void
tcp_timer_handler(void * arg,mblk_t * mp,void * arg2,ip_recv_attr_t * dummy)190721fffe3SKacheong Poon tcp_timer_handler(void *arg, mblk_t *mp, void *arg2, ip_recv_attr_t *dummy)
191721fffe3SKacheong Poon {
192721fffe3SKacheong Poon 	tcp_timer_t *tcpt;
193721fffe3SKacheong Poon 	conn_t *connp = (conn_t *)arg;
194721fffe3SKacheong Poon 	tcp_t *tcp = connp->conn_tcp;
195721fffe3SKacheong Poon 
196721fffe3SKacheong Poon 	tcpt = (tcp_timer_t *)mp->b_rptr;
197721fffe3SKacheong Poon 	ASSERT(connp == tcpt->connp);
198721fffe3SKacheong Poon 	ASSERT((squeue_t *)arg2 == connp->conn_sqp);
199721fffe3SKacheong Poon 
2007f2dc2cfSBryan Cantrill 	if (tcpt->tcpt_tid & CALLOUT_ID_FREE) {
2017f2dc2cfSBryan Cantrill 		/*
2027f2dc2cfSBryan Cantrill 		 * This timeout was cancelled after it was enqueued to the
2037f2dc2cfSBryan Cantrill 		 * squeue; free the timer and return.
2047f2dc2cfSBryan Cantrill 		 */
2057f2dc2cfSBryan Cantrill 		tcp_timer_free(connp->conn_tcp, mp);
2067f2dc2cfSBryan Cantrill 		return;
2077f2dc2cfSBryan Cantrill 	}
2087f2dc2cfSBryan Cantrill 
209721fffe3SKacheong Poon 	/*
210721fffe3SKacheong Poon 	 * If the TCP has reached the closed state, don't proceed any
211721fffe3SKacheong Poon 	 * further. This TCP logically does not exist on the system.
212721fffe3SKacheong Poon 	 * tcpt_proc could for example access queues, that have already
213721fffe3SKacheong Poon 	 * been qprocoff'ed off.
214721fffe3SKacheong Poon 	 */
215721fffe3SKacheong Poon 	if (tcp->tcp_state != TCPS_CLOSED) {
216721fffe3SKacheong Poon 		(*tcpt->tcpt_proc)(connp);
217721fffe3SKacheong Poon 	} else {
218721fffe3SKacheong Poon 		tcp->tcp_timer_tid = 0;
219721fffe3SKacheong Poon 	}
2207f2dc2cfSBryan Cantrill 
221721fffe3SKacheong Poon 	tcp_timer_free(connp->conn_tcp, mp);
222721fffe3SKacheong Poon }
223721fffe3SKacheong Poon 
224721fffe3SKacheong Poon /*
225721fffe3SKacheong Poon  * There is potential race with untimeout and the handler firing at the same
226721fffe3SKacheong Poon  * time. The mblock may be freed by the handler while we are trying to use
227721fffe3SKacheong Poon  * it. But since both should execute on the same squeue, this race should not
228721fffe3SKacheong Poon  * occur.
229721fffe3SKacheong Poon  */
230721fffe3SKacheong Poon clock_t
tcp_timeout_cancel(conn_t * connp,timeout_id_t id)231721fffe3SKacheong Poon tcp_timeout_cancel(conn_t *connp, timeout_id_t id)
232721fffe3SKacheong Poon {
233721fffe3SKacheong Poon 	mblk_t	*mp = (mblk_t *)id;
234721fffe3SKacheong Poon 	tcp_timer_t *tcpt;
235721fffe3SKacheong Poon 	clock_t delta;
236721fffe3SKacheong Poon 
237721fffe3SKacheong Poon 	TCP_DBGSTAT(connp->conn_tcp->tcp_tcps, tcp_timeout_cancel_reqs);
238721fffe3SKacheong Poon 
239721fffe3SKacheong Poon 	if (mp == NULL)
240721fffe3SKacheong Poon 		return (-1);
241721fffe3SKacheong Poon 
242721fffe3SKacheong Poon 	tcpt = (tcp_timer_t *)mp->b_rptr;
243721fffe3SKacheong Poon 	ASSERT(tcpt->connp == connp);
244721fffe3SKacheong Poon 
245721fffe3SKacheong Poon 	delta = untimeout_default(tcpt->tcpt_tid, 0);
246721fffe3SKacheong Poon 
247721fffe3SKacheong Poon 	if (delta >= 0) {
248721fffe3SKacheong Poon 		TCP_DBGSTAT(connp->conn_tcp->tcp_tcps, tcp_timeout_canceled);
249721fffe3SKacheong Poon 		tcp_timer_free(connp->conn_tcp, mp);
250721fffe3SKacheong Poon 		CONN_DEC_REF(connp);
2517f2dc2cfSBryan Cantrill 	} else {
2527f2dc2cfSBryan Cantrill 		/*
2537f2dc2cfSBryan Cantrill 		 * If we were unable to untimeout successfully, it has already
2547f2dc2cfSBryan Cantrill 		 * been enqueued on the squeue; mark the ID with the free
2557f2dc2cfSBryan Cantrill 		 * bit.	 This bit can never be set in a valid identifier, and
2567f2dc2cfSBryan Cantrill 		 * we'll use it to prevent the timeout from being executed.
2577f2dc2cfSBryan Cantrill 		 * And note that we're within the squeue perimeter here, so
2587f2dc2cfSBryan Cantrill 		 * we don't need to worry about racing with timer handling
2597f2dc2cfSBryan Cantrill 		 * (which also executes within the perimeter).
2607f2dc2cfSBryan Cantrill 		 */
2617f2dc2cfSBryan Cantrill 		tcpt->tcpt_tid |= CALLOUT_ID_FREE;
2627f2dc2cfSBryan Cantrill 		delta = 0;
263721fffe3SKacheong Poon 	}
264721fffe3SKacheong Poon 
26566cd0f60SKacheong Poon 	return (TICK_TO_MSEC(delta));
266721fffe3SKacheong Poon }
267721fffe3SKacheong Poon 
268721fffe3SKacheong Poon /*
269721fffe3SKacheong Poon  * Allocate space for the timer event. The allocation looks like mblk, but it is
270721fffe3SKacheong Poon  * not a proper mblk. To avoid confusion we set b_wptr to NULL.
271721fffe3SKacheong Poon  *
272721fffe3SKacheong Poon  * Dealing with failures: If we can't allocate from the timer cache we try
273721fffe3SKacheong Poon  * allocating from dblock caches using allocb_tryhard(). In this case b_wptr
274721fffe3SKacheong Poon  * points to b_rptr.
275721fffe3SKacheong Poon  * If we can't allocate anything using allocb_tryhard(), we perform a last
276721fffe3SKacheong Poon  * attempt and use kmem_alloc_tryhard(). In this case we set b_wptr to -1 and
277721fffe3SKacheong Poon  * save the actual allocation size in b_datap.
278721fffe3SKacheong Poon  */
279721fffe3SKacheong Poon mblk_t *
tcp_timermp_alloc(int kmflags)280721fffe3SKacheong Poon tcp_timermp_alloc(int kmflags)
281721fffe3SKacheong Poon {
282721fffe3SKacheong Poon 	mblk_t *mp = (mblk_t *)kmem_cache_alloc(tcp_timercache,
283721fffe3SKacheong Poon 	    kmflags & ~KM_PANIC);
284721fffe3SKacheong Poon 
285721fffe3SKacheong Poon 	if (mp != NULL) {
286721fffe3SKacheong Poon 		mp->b_next = mp->b_prev = NULL;
287721fffe3SKacheong Poon 		mp->b_rptr = (uchar_t *)(&mp[1]);
288721fffe3SKacheong Poon 		mp->b_wptr = NULL;
289721fffe3SKacheong Poon 		mp->b_datap = NULL;
290721fffe3SKacheong Poon 		mp->b_queue = NULL;
291721fffe3SKacheong Poon 		mp->b_cont = NULL;
292721fffe3SKacheong Poon 	} else if (kmflags & KM_PANIC) {
293721fffe3SKacheong Poon 		/*
294721fffe3SKacheong Poon 		 * Failed to allocate memory for the timer. Try allocating from
295721fffe3SKacheong Poon 		 * dblock caches.
296721fffe3SKacheong Poon 		 */
297721fffe3SKacheong Poon 		/* ipclassifier calls this from a constructor - hence no tcps */
298721fffe3SKacheong Poon 		TCP_G_STAT(tcp_timermp_allocfail);
299721fffe3SKacheong Poon 		mp = allocb_tryhard(sizeof (tcp_timer_t));
300721fffe3SKacheong Poon 		if (mp == NULL) {
301721fffe3SKacheong Poon 			size_t size = 0;
302721fffe3SKacheong Poon 			/*
303721fffe3SKacheong Poon 			 * Memory is really low. Try tryhard allocation.
304721fffe3SKacheong Poon 			 *
305721fffe3SKacheong Poon 			 * ipclassifier calls this from a constructor -
306721fffe3SKacheong Poon 			 * hence no tcps
307721fffe3SKacheong Poon 			 */
308721fffe3SKacheong Poon 			TCP_G_STAT(tcp_timermp_allocdblfail);
309721fffe3SKacheong Poon 			mp = kmem_alloc_tryhard(sizeof (mblk_t) +
310721fffe3SKacheong Poon 			    sizeof (tcp_timer_t), &size, kmflags);
311721fffe3SKacheong Poon 			mp->b_rptr = (uchar_t *)(&mp[1]);
312721fffe3SKacheong Poon 			mp->b_next = mp->b_prev = NULL;
313721fffe3SKacheong Poon 			mp->b_wptr = (uchar_t *)-1;
314721fffe3SKacheong Poon 			mp->b_datap = (dblk_t *)size;
315721fffe3SKacheong Poon 			mp->b_queue = NULL;
316721fffe3SKacheong Poon 			mp->b_cont = NULL;
317721fffe3SKacheong Poon 		}
318721fffe3SKacheong Poon 		ASSERT(mp->b_wptr != NULL);
319721fffe3SKacheong Poon 	}
320721fffe3SKacheong Poon 	/* ipclassifier calls this from a constructor - hence no tcps */
321721fffe3SKacheong Poon 	TCP_G_DBGSTAT(tcp_timermp_alloced);
322721fffe3SKacheong Poon 
323721fffe3SKacheong Poon 	return (mp);
324721fffe3SKacheong Poon }
325721fffe3SKacheong Poon 
326721fffe3SKacheong Poon /*
327721fffe3SKacheong Poon  * Free per-tcp timer cache.
328721fffe3SKacheong Poon  * It can only contain entries from tcp_timercache.
329721fffe3SKacheong Poon  */
330721fffe3SKacheong Poon void
tcp_timermp_free(tcp_t * tcp)331721fffe3SKacheong Poon tcp_timermp_free(tcp_t *tcp)
332721fffe3SKacheong Poon {
333721fffe3SKacheong Poon 	mblk_t *mp;
334721fffe3SKacheong Poon 
335721fffe3SKacheong Poon 	while ((mp = tcp->tcp_timercache) != NULL) {
336721fffe3SKacheong Poon 		ASSERT(mp->b_wptr == NULL);
337721fffe3SKacheong Poon 		tcp->tcp_timercache = tcp->tcp_timercache->b_next;
338721fffe3SKacheong Poon 		kmem_cache_free(tcp_timercache, mp);
339721fffe3SKacheong Poon 	}
340721fffe3SKacheong Poon }
341721fffe3SKacheong Poon 
342721fffe3SKacheong Poon /*
343721fffe3SKacheong Poon  * Free timer event. Put it on the per-tcp timer cache if there is not too many
344721fffe3SKacheong Poon  * events there already (currently at most two events are cached).
345721fffe3SKacheong Poon  * If the event is not allocated from the timer cache, free it right away.
346721fffe3SKacheong Poon  */
347721fffe3SKacheong Poon static void
tcp_timer_free(tcp_t * tcp,mblk_t * mp)348721fffe3SKacheong Poon tcp_timer_free(tcp_t *tcp, mblk_t *mp)
349721fffe3SKacheong Poon {
350721fffe3SKacheong Poon 	mblk_t *mp1 = tcp->tcp_timercache;
351721fffe3SKacheong Poon 
352721fffe3SKacheong Poon 	if (mp->b_wptr != NULL) {
353721fffe3SKacheong Poon 		/*
354721fffe3SKacheong Poon 		 * This allocation is not from a timer cache, free it right
355721fffe3SKacheong Poon 		 * away.
356721fffe3SKacheong Poon 		 */
357721fffe3SKacheong Poon 		if (mp->b_wptr != (uchar_t *)-1)
358721fffe3SKacheong Poon 			freeb(mp);
359721fffe3SKacheong Poon 		else
360721fffe3SKacheong Poon 			kmem_free(mp, (size_t)mp->b_datap);
361721fffe3SKacheong Poon 	} else if (mp1 == NULL || mp1->b_next == NULL) {
362721fffe3SKacheong Poon 		/* Cache this timer block for future allocations */
363721fffe3SKacheong Poon 		mp->b_rptr = (uchar_t *)(&mp[1]);
364721fffe3SKacheong Poon 		mp->b_next = mp1;
365721fffe3SKacheong Poon 		tcp->tcp_timercache = mp;
366721fffe3SKacheong Poon 	} else {
367721fffe3SKacheong Poon 		kmem_cache_free(tcp_timercache, mp);
368721fffe3SKacheong Poon 		TCP_DBGSTAT(tcp->tcp_tcps, tcp_timermp_freed);
369721fffe3SKacheong Poon 	}
370721fffe3SKacheong Poon }
371721fffe3SKacheong Poon 
372721fffe3SKacheong Poon /*
373721fffe3SKacheong Poon  * Stop all TCP timers.
374721fffe3SKacheong Poon  */
375721fffe3SKacheong Poon void
tcp_timers_stop(tcp_t * tcp)376721fffe3SKacheong Poon tcp_timers_stop(tcp_t *tcp)
377721fffe3SKacheong Poon {
378721fffe3SKacheong Poon 	if (tcp->tcp_timer_tid != 0) {
379721fffe3SKacheong Poon 		(void) TCP_TIMER_CANCEL(tcp, tcp->tcp_timer_tid);
380721fffe3SKacheong Poon 		tcp->tcp_timer_tid = 0;
381721fffe3SKacheong Poon 	}
382721fffe3SKacheong Poon 	if (tcp->tcp_ka_tid != 0) {
383721fffe3SKacheong Poon 		(void) TCP_TIMER_CANCEL(tcp, tcp->tcp_ka_tid);
384721fffe3SKacheong Poon 		tcp->tcp_ka_tid = 0;
385721fffe3SKacheong Poon 	}
386721fffe3SKacheong Poon 	if (tcp->tcp_ack_tid != 0) {
387721fffe3SKacheong Poon 		(void) TCP_TIMER_CANCEL(tcp, tcp->tcp_ack_tid);
388721fffe3SKacheong Poon 		tcp->tcp_ack_tid = 0;
389721fffe3SKacheong Poon 	}
390721fffe3SKacheong Poon 	if (tcp->tcp_push_tid != 0) {
391721fffe3SKacheong Poon 		(void) TCP_TIMER_CANCEL(tcp, tcp->tcp_push_tid);
392721fffe3SKacheong Poon 		tcp->tcp_push_tid = 0;
393721fffe3SKacheong Poon 	}
394721fffe3SKacheong Poon 	if (tcp->tcp_reass_tid != 0) {
395721fffe3SKacheong Poon 		(void) TCP_TIMER_CANCEL(tcp, tcp->tcp_reass_tid);
396721fffe3SKacheong Poon 		tcp->tcp_reass_tid = 0;
397721fffe3SKacheong Poon 	}
398721fffe3SKacheong Poon }
399721fffe3SKacheong Poon 
400721fffe3SKacheong Poon /*
401721fffe3SKacheong Poon  * Timer callback routine for keepalive probe.  We do a fake resend of
402721fffe3SKacheong Poon  * last ACKed byte.  Then set a timer using RTO.  When the timer expires,
403721fffe3SKacheong Poon  * check to see if we have heard anything from the other end for the last
404721fffe3SKacheong Poon  * RTO period.  If we have, set the timer to expire for another
405721fffe3SKacheong Poon  * tcp_keepalive_intrvl and check again.  If we have not, set a timer using
406721fffe3SKacheong Poon  * RTO << 1 and check again when it expires.  Keep exponentially increasing
407721fffe3SKacheong Poon  * the timeout if we have not heard from the other side.  If for more than
408721fffe3SKacheong Poon  * (tcp_ka_interval + tcp_ka_abort_thres) we have not heard anything,
409721fffe3SKacheong Poon  * kill the connection unless the keepalive abort threshold is 0.  In
410721fffe3SKacheong Poon  * that case, we will probe "forever."
4113d0a255cSGarrett D'Amore  * If tcp_ka_cnt and tcp_ka_rinterval are non-zero, then we do not follow
4123d0a255cSGarrett D'Amore  * the exponential backoff, but send probes tcp_ka_cnt times in regular
4133d0a255cSGarrett D'Amore  * intervals of tcp_ka_rinterval milliseconds until we hear back from peer.
4143d0a255cSGarrett D'Amore  * Kill the connection if we don't hear back from peer after tcp_ka_cnt
4153d0a255cSGarrett D'Amore  * probes are sent.
416721fffe3SKacheong Poon  */
417721fffe3SKacheong Poon void
tcp_keepalive_timer(void * arg)418721fffe3SKacheong Poon tcp_keepalive_timer(void *arg)
419721fffe3SKacheong Poon {
420721fffe3SKacheong Poon 	mblk_t	*mp;
421721fffe3SKacheong Poon 	conn_t	*connp = (conn_t *)arg;
422*57d1724dSToomas Soome 	tcp_t	*tcp = connp->conn_tcp;
423721fffe3SKacheong Poon 	int32_t	firetime;
424721fffe3SKacheong Poon 	int32_t	idletime;
425721fffe3SKacheong Poon 	int32_t	ka_intrvl;
426721fffe3SKacheong Poon 	tcp_stack_t	*tcps = tcp->tcp_tcps;
427721fffe3SKacheong Poon 
428721fffe3SKacheong Poon 	tcp->tcp_ka_tid = 0;
429721fffe3SKacheong Poon 
430721fffe3SKacheong Poon 	if (tcp->tcp_fused)
431721fffe3SKacheong Poon 		return;
432721fffe3SKacheong Poon 
433721fffe3SKacheong Poon 	TCPS_BUMP_MIB(tcps, tcpTimKeepalive);
434721fffe3SKacheong Poon 	ka_intrvl = tcp->tcp_ka_interval;
435721fffe3SKacheong Poon 
436721fffe3SKacheong Poon 	/*
437721fffe3SKacheong Poon 	 * Keepalive probe should only be sent if the application has not
438721fffe3SKacheong Poon 	 * done a close on the connection.
439721fffe3SKacheong Poon 	 */
440721fffe3SKacheong Poon 	if (tcp->tcp_state > TCPS_CLOSE_WAIT) {
441721fffe3SKacheong Poon 		return;
442721fffe3SKacheong Poon 	}
443721fffe3SKacheong Poon 	/* Timer fired too early, restart it. */
444721fffe3SKacheong Poon 	if (tcp->tcp_state < TCPS_ESTABLISHED) {
445721fffe3SKacheong Poon 		tcp->tcp_ka_tid = TCP_TIMER(tcp, tcp_keepalive_timer,
44666cd0f60SKacheong Poon 		    ka_intrvl);
447721fffe3SKacheong Poon 		return;
448721fffe3SKacheong Poon 	}
449721fffe3SKacheong Poon 
450721fffe3SKacheong Poon 	idletime = TICK_TO_MSEC(ddi_get_lbolt() - tcp->tcp_last_recv_time);
451721fffe3SKacheong Poon 	/*
452721fffe3SKacheong Poon 	 * If we have not heard from the other side for a long
453721fffe3SKacheong Poon 	 * time, kill the connection unless the keepalive abort
454721fffe3SKacheong Poon 	 * threshold is 0.  In that case, we will probe "forever."
455721fffe3SKacheong Poon 	 */
456721fffe3SKacheong Poon 	if (tcp->tcp_ka_abort_thres != 0 &&
457721fffe3SKacheong Poon 	    idletime > (ka_intrvl + tcp->tcp_ka_abort_thres)) {
458721fffe3SKacheong Poon 		TCPS_BUMP_MIB(tcps, tcpTimKeepaliveDrop);
459721fffe3SKacheong Poon 		(void) tcp_clean_death(tcp, tcp->tcp_client_errno ?
460721fffe3SKacheong Poon 		    tcp->tcp_client_errno : ETIMEDOUT);
461721fffe3SKacheong Poon 		return;
462721fffe3SKacheong Poon 	}
463721fffe3SKacheong Poon 
464721fffe3SKacheong Poon 	if (tcp->tcp_snxt == tcp->tcp_suna &&
465721fffe3SKacheong Poon 	    idletime >= ka_intrvl) {
466721fffe3SKacheong Poon 		/* Fake resend of last ACKed byte. */
467721fffe3SKacheong Poon 		mblk_t	*mp1 = allocb(1, BPRI_LO);
468721fffe3SKacheong Poon 
469721fffe3SKacheong Poon 		if (mp1 != NULL) {
470721fffe3SKacheong Poon 			*mp1->b_wptr++ = '\0';
471721fffe3SKacheong Poon 			mp = tcp_xmit_mp(tcp, mp1, 1, NULL, NULL,
472721fffe3SKacheong Poon 			    tcp->tcp_suna - 1, B_FALSE, NULL, B_TRUE);
473721fffe3SKacheong Poon 			freeb(mp1);
474721fffe3SKacheong Poon 			/*
475721fffe3SKacheong Poon 			 * if allocation failed, fall through to start the
476721fffe3SKacheong Poon 			 * timer back.
477721fffe3SKacheong Poon 			 */
478721fffe3SKacheong Poon 			if (mp != NULL) {
479721fffe3SKacheong Poon 				tcp_send_data(tcp, mp);
480721fffe3SKacheong Poon 				TCPS_BUMP_MIB(tcps, tcpTimKeepaliveProbe);
4813d0a255cSGarrett D'Amore 				if (tcp->tcp_ka_rinterval) {
4823d0a255cSGarrett D'Amore 					firetime = tcp->tcp_ka_rinterval;
4833d0a255cSGarrett D'Amore 				} else if (tcp->tcp_ka_last_intrvl != 0) {
484721fffe3SKacheong Poon 					int max;
485721fffe3SKacheong Poon 					/*
486721fffe3SKacheong Poon 					 * We should probe again at least
487721fffe3SKacheong Poon 					 * in ka_intrvl, but not more than
488707e74bcSKacheong Poon 					 * tcp_rto_max.
489721fffe3SKacheong Poon 					 */
490707e74bcSKacheong Poon 					max = tcp->tcp_rto_max;
491721fffe3SKacheong Poon 					firetime = MIN(ka_intrvl - 1,
492721fffe3SKacheong Poon 					    tcp->tcp_ka_last_intrvl << 1);
493721fffe3SKacheong Poon 					if (firetime > max)
494721fffe3SKacheong Poon 						firetime = max;
495721fffe3SKacheong Poon 				} else {
496721fffe3SKacheong Poon 					firetime = tcp->tcp_rto;
497721fffe3SKacheong Poon 				}
498721fffe3SKacheong Poon 				tcp->tcp_ka_tid = TCP_TIMER(tcp,
49966cd0f60SKacheong Poon 				    tcp_keepalive_timer, firetime);
500721fffe3SKacheong Poon 				tcp->tcp_ka_last_intrvl = firetime;
501721fffe3SKacheong Poon 				return;
502721fffe3SKacheong Poon 			}
503721fffe3SKacheong Poon 		}
504721fffe3SKacheong Poon 	} else {
505721fffe3SKacheong Poon 		tcp->tcp_ka_last_intrvl = 0;
506721fffe3SKacheong Poon 	}
507721fffe3SKacheong Poon 
508721fffe3SKacheong Poon 	/* firetime can be negative if (mp1 == NULL || mp == NULL) */
509721fffe3SKacheong Poon 	if ((firetime = ka_intrvl - idletime) < 0) {
510721fffe3SKacheong Poon 		firetime = ka_intrvl;
511721fffe3SKacheong Poon 	}
51266cd0f60SKacheong Poon 	tcp->tcp_ka_tid = TCP_TIMER(tcp, tcp_keepalive_timer, firetime);
513721fffe3SKacheong Poon }
514721fffe3SKacheong Poon 
515721fffe3SKacheong Poon void
tcp_reass_timer(void * arg)516721fffe3SKacheong Poon tcp_reass_timer(void *arg)
517721fffe3SKacheong Poon {
518721fffe3SKacheong Poon 	conn_t *connp = (conn_t *)arg;
519721fffe3SKacheong Poon 	tcp_t *tcp = connp->conn_tcp;
520721fffe3SKacheong Poon 
521721fffe3SKacheong Poon 	tcp->tcp_reass_tid = 0;
522721fffe3SKacheong Poon 	if (tcp->tcp_reass_head == NULL)
523721fffe3SKacheong Poon 		return;
524721fffe3SKacheong Poon 	ASSERT(tcp->tcp_reass_tail != NULL);
525721fffe3SKacheong Poon 	if (tcp->tcp_snd_sack_ok && tcp->tcp_num_sack_blk > 0) {
526721fffe3SKacheong Poon 		tcp_sack_remove(tcp->tcp_sack_list,
527721fffe3SKacheong Poon 		    TCP_REASS_END(tcp->tcp_reass_tail), &tcp->tcp_num_sack_blk);
528721fffe3SKacheong Poon 	}
529721fffe3SKacheong Poon 	tcp_close_mpp(&tcp->tcp_reass_head);
530721fffe3SKacheong Poon 	tcp->tcp_reass_tail = NULL;
531721fffe3SKacheong Poon 	TCP_STAT(tcp->tcp_tcps, tcp_reass_timeout);
532721fffe3SKacheong Poon }
533721fffe3SKacheong Poon 
534721fffe3SKacheong Poon /* This function handles the push timeout. */
535721fffe3SKacheong Poon void
tcp_push_timer(void * arg)536721fffe3SKacheong Poon tcp_push_timer(void *arg)
537721fffe3SKacheong Poon {
538721fffe3SKacheong Poon 	conn_t	*connp = (conn_t *)arg;
539721fffe3SKacheong Poon 	tcp_t *tcp = connp->conn_tcp;
540721fffe3SKacheong Poon 
541721fffe3SKacheong Poon 	TCP_DBGSTAT(tcp->tcp_tcps, tcp_push_timer_cnt);
542721fffe3SKacheong Poon 
543721fffe3SKacheong Poon 	ASSERT(tcp->tcp_listener == NULL);
544721fffe3SKacheong Poon 
545721fffe3SKacheong Poon 	ASSERT(!IPCL_IS_NONSTR(connp));
546721fffe3SKacheong Poon 
547721fffe3SKacheong Poon 	tcp->tcp_push_tid = 0;
548721fffe3SKacheong Poon 
549721fffe3SKacheong Poon 	if (tcp->tcp_rcv_list != NULL &&
550721fffe3SKacheong Poon 	    tcp_rcv_drain(tcp) == TH_ACK_NEEDED)
551721fffe3SKacheong Poon 		tcp_xmit_ctl(NULL, tcp, tcp->tcp_snxt, tcp->tcp_rnxt, TH_ACK);
552721fffe3SKacheong Poon }
553721fffe3SKacheong Poon 
554721fffe3SKacheong Poon /*
555721fffe3SKacheong Poon  * This function handles delayed ACK timeout.
556721fffe3SKacheong Poon  */
557721fffe3SKacheong Poon void
tcp_ack_timer(void * arg)558721fffe3SKacheong Poon tcp_ack_timer(void *arg)
559721fffe3SKacheong Poon {
560721fffe3SKacheong Poon 	conn_t	*connp = (conn_t *)arg;
561721fffe3SKacheong Poon 	tcp_t *tcp = connp->conn_tcp;
562721fffe3SKacheong Poon 	mblk_t *mp;
563721fffe3SKacheong Poon 	tcp_stack_t	*tcps = tcp->tcp_tcps;
564721fffe3SKacheong Poon 
565721fffe3SKacheong Poon 	TCP_DBGSTAT(tcps, tcp_ack_timer_cnt);
566721fffe3SKacheong Poon 
567721fffe3SKacheong Poon 	tcp->tcp_ack_tid = 0;
568721fffe3SKacheong Poon 
569721fffe3SKacheong Poon 	if (tcp->tcp_fused)
570721fffe3SKacheong Poon 		return;
571721fffe3SKacheong Poon 
572721fffe3SKacheong Poon 	/*
573721fffe3SKacheong Poon 	 * Do not send ACK if there is no outstanding unack'ed data.
574721fffe3SKacheong Poon 	 */
575721fffe3SKacheong Poon 	if (tcp->tcp_rnxt == tcp->tcp_rack) {
576721fffe3SKacheong Poon 		return;
577721fffe3SKacheong Poon 	}
578721fffe3SKacheong Poon 
579721fffe3SKacheong Poon 	if ((tcp->tcp_rnxt - tcp->tcp_rack) > tcp->tcp_mss) {
580721fffe3SKacheong Poon 		/*
581721fffe3SKacheong Poon 		 * Make sure we don't allow deferred ACKs to result in
582721fffe3SKacheong Poon 		 * timer-based ACKing.  If we have held off an ACK
583721fffe3SKacheong Poon 		 * when there was more than an mss here, and the timer
584721fffe3SKacheong Poon 		 * goes off, we have to worry about the possibility
585721fffe3SKacheong Poon 		 * that the sender isn't doing slow-start, or is out
586721fffe3SKacheong Poon 		 * of step with us for some other reason.  We fall
587721fffe3SKacheong Poon 		 * permanently back in the direction of
588721fffe3SKacheong Poon 		 * ACK-every-other-packet as suggested in RFC 1122.
589721fffe3SKacheong Poon 		 */
590721fffe3SKacheong Poon 		if (tcp->tcp_rack_abs_max > 2)
591721fffe3SKacheong Poon 			tcp->tcp_rack_abs_max--;
592721fffe3SKacheong Poon 		tcp->tcp_rack_cur_max = 2;
593721fffe3SKacheong Poon 	}
594721fffe3SKacheong Poon 	mp = tcp_ack_mp(tcp);
595721fffe3SKacheong Poon 
596721fffe3SKacheong Poon 	if (mp != NULL) {
597a2f04351SSebastien Roy 		TCPS_BUMP_MIB(tcps, tcpHCOutSegs);
598721fffe3SKacheong Poon 		TCPS_BUMP_MIB(tcps, tcpOutAck);
599721fffe3SKacheong Poon 		TCPS_BUMP_MIB(tcps, tcpOutAckDelayed);
600721fffe3SKacheong Poon 		tcp_send_data(tcp, mp);
601721fffe3SKacheong Poon 	}
602721fffe3SKacheong Poon }
603721fffe3SKacheong Poon 
604721fffe3SKacheong Poon /*
605721fffe3SKacheong Poon  * Notify IP that we are having trouble with this connection.  IP should
606721fffe3SKacheong Poon  * make note so it can potentially use a different IRE.
607721fffe3SKacheong Poon  */
608721fffe3SKacheong Poon static void
tcp_ip_notify(tcp_t * tcp)609721fffe3SKacheong Poon tcp_ip_notify(tcp_t *tcp)
610721fffe3SKacheong Poon {
611721fffe3SKacheong Poon 	conn_t		*connp = tcp->tcp_connp;
612721fffe3SKacheong Poon 	ire_t		*ire;
613721fffe3SKacheong Poon 
614721fffe3SKacheong Poon 	/*
615721fffe3SKacheong Poon 	 * Note: in the case of source routing we want to blow away the
616721fffe3SKacheong Poon 	 * route to the first source route hop.
617721fffe3SKacheong Poon 	 */
618721fffe3SKacheong Poon 	ire = connp->conn_ixa->ixa_ire;
619721fffe3SKacheong Poon 	if (ire != NULL && !(ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE))) {
620721fffe3SKacheong Poon 		if (ire->ire_ipversion == IPV4_VERSION) {
621721fffe3SKacheong Poon 			/*
622721fffe3SKacheong Poon 			 * As per RFC 1122, we send an RTM_LOSING to inform
623721fffe3SKacheong Poon 			 * routing protocols.
624721fffe3SKacheong Poon 			 */
625721fffe3SKacheong Poon 			ip_rts_change(RTM_LOSING, ire->ire_addr,
626721fffe3SKacheong Poon 			    ire->ire_gateway_addr, ire->ire_mask,
627721fffe3SKacheong Poon 			    connp->conn_laddr_v4,  0, 0, 0,
628721fffe3SKacheong Poon 			    (RTA_DST | RTA_GATEWAY | RTA_NETMASK | RTA_IFA),
629721fffe3SKacheong Poon 			    ire->ire_ipst);
630721fffe3SKacheong Poon 		}
631721fffe3SKacheong Poon 		(void) ire_no_good(ire);
632721fffe3SKacheong Poon 	}
633721fffe3SKacheong Poon }
634721fffe3SKacheong Poon 
635721fffe3SKacheong Poon /*
636721fffe3SKacheong Poon  * tcp_timer is the timer service routine.  It handles the retransmission,
637721fffe3SKacheong Poon  * FIN_WAIT_2 flush, and zero window probe timeout events.  It figures out
638721fffe3SKacheong Poon  * from the state of the tcp instance what kind of action needs to be done
639721fffe3SKacheong Poon  * at the time it is called.
640721fffe3SKacheong Poon  */
641721fffe3SKacheong Poon void
tcp_timer(void * arg)642721fffe3SKacheong Poon tcp_timer(void *arg)
643721fffe3SKacheong Poon {
644721fffe3SKacheong Poon 	mblk_t		*mp;
645721fffe3SKacheong Poon 	clock_t		first_threshold;
646721fffe3SKacheong Poon 	clock_t		second_threshold;
647721fffe3SKacheong Poon 	clock_t		ms;
648721fffe3SKacheong Poon 	uint32_t	mss;
649721fffe3SKacheong Poon 	conn_t		*connp = (conn_t *)arg;
650721fffe3SKacheong Poon 	tcp_t		*tcp = connp->conn_tcp;
651721fffe3SKacheong Poon 	tcp_stack_t	*tcps = tcp->tcp_tcps;
652707e74bcSKacheong Poon 	boolean_t	dont_timeout = B_FALSE;
653721fffe3SKacheong Poon 
654721fffe3SKacheong Poon 	tcp->tcp_timer_tid = 0;
655721fffe3SKacheong Poon 
656721fffe3SKacheong Poon 	if (tcp->tcp_fused)
657721fffe3SKacheong Poon 		return;
658721fffe3SKacheong Poon 
659721fffe3SKacheong Poon 	first_threshold =  tcp->tcp_first_timer_threshold;
660721fffe3SKacheong Poon 	second_threshold = tcp->tcp_second_timer_threshold;
661721fffe3SKacheong Poon 	switch (tcp->tcp_state) {
662721fffe3SKacheong Poon 	case TCPS_IDLE:
663721fffe3SKacheong Poon 	case TCPS_BOUND:
664721fffe3SKacheong Poon 	case TCPS_LISTEN:
665721fffe3SKacheong Poon 		return;
666721fffe3SKacheong Poon 	case TCPS_SYN_RCVD: {
667721fffe3SKacheong Poon 		tcp_t	*listener = tcp->tcp_listener;
668721fffe3SKacheong Poon 
669721fffe3SKacheong Poon 		if (tcp->tcp_syn_rcvd_timeout == 0 && (listener != NULL)) {
670721fffe3SKacheong Poon 			/* it's our first timeout */
671721fffe3SKacheong Poon 			tcp->tcp_syn_rcvd_timeout = 1;
672721fffe3SKacheong Poon 			mutex_enter(&listener->tcp_eager_lock);
673721fffe3SKacheong Poon 			listener->tcp_syn_rcvd_timeout++;
674721fffe3SKacheong Poon 			if (!tcp->tcp_dontdrop && !tcp->tcp_closemp_used) {
675721fffe3SKacheong Poon 				/*
676721fffe3SKacheong Poon 				 * Make this eager available for drop if we
677721fffe3SKacheong Poon 				 * need to drop one to accomodate a new
678721fffe3SKacheong Poon 				 * incoming SYN request.
679721fffe3SKacheong Poon 				 */
680721fffe3SKacheong Poon 				MAKE_DROPPABLE(listener, tcp);
681721fffe3SKacheong Poon 			}
682721fffe3SKacheong Poon 			if (!listener->tcp_syn_defense &&
683721fffe3SKacheong Poon 			    (listener->tcp_syn_rcvd_timeout >
684721fffe3SKacheong Poon 			    (tcps->tcps_conn_req_max_q0 >> 2)) &&
685721fffe3SKacheong Poon 			    (tcps->tcps_conn_req_max_q0 > 200)) {
686721fffe3SKacheong Poon 				/* We may be under attack. Put on a defense. */
687721fffe3SKacheong Poon 				listener->tcp_syn_defense = B_TRUE;
688721fffe3SKacheong Poon 				cmn_err(CE_WARN, "High TCP connect timeout "
689721fffe3SKacheong Poon 				    "rate! System (port %d) may be under a "
690721fffe3SKacheong Poon 				    "SYN flood attack!",
691721fffe3SKacheong Poon 				    ntohs(listener->tcp_connp->conn_lport));
692721fffe3SKacheong Poon 
693721fffe3SKacheong Poon 				listener->tcp_ip_addr_cache = kmem_zalloc(
694721fffe3SKacheong Poon 				    IP_ADDR_CACHE_SIZE * sizeof (ipaddr_t),
695721fffe3SKacheong Poon 				    KM_NOSLEEP);
696721fffe3SKacheong Poon 			}
697721fffe3SKacheong Poon 			mutex_exit(&listener->tcp_eager_lock);
698721fffe3SKacheong Poon 		} else if (listener != NULL) {
699721fffe3SKacheong Poon 			mutex_enter(&listener->tcp_eager_lock);
700721fffe3SKacheong Poon 			tcp->tcp_syn_rcvd_timeout++;
701721fffe3SKacheong Poon 			if (tcp->tcp_syn_rcvd_timeout > 1 &&
702721fffe3SKacheong Poon 			    !tcp->tcp_closemp_used) {
703721fffe3SKacheong Poon 				/*
704721fffe3SKacheong Poon 				 * This is our second timeout. Put the tcp in
705721fffe3SKacheong Poon 				 * the list of droppable eagers to allow it to
706721fffe3SKacheong Poon 				 * be dropped, if needed. We don't check
707721fffe3SKacheong Poon 				 * whether tcp_dontdrop is set or not to
708721fffe3SKacheong Poon 				 * protect ourselve from a SYN attack where a
709721fffe3SKacheong Poon 				 * remote host can spoof itself as one of the
710721fffe3SKacheong Poon 				 * good IP source and continue to hold
711721fffe3SKacheong Poon 				 * resources too long.
712721fffe3SKacheong Poon 				 */
713721fffe3SKacheong Poon 				MAKE_DROPPABLE(listener, tcp);
714721fffe3SKacheong Poon 			}
715721fffe3SKacheong Poon 			mutex_exit(&listener->tcp_eager_lock);
716721fffe3SKacheong Poon 		}
717721fffe3SKacheong Poon 	}
718721fffe3SKacheong Poon 		/* FALLTHRU */
719721fffe3SKacheong Poon 	case TCPS_SYN_SENT:
720721fffe3SKacheong Poon 		first_threshold =  tcp->tcp_first_ctimer_threshold;
721721fffe3SKacheong Poon 		second_threshold = tcp->tcp_second_ctimer_threshold;
722707e74bcSKacheong Poon 
7235dd46ab5SKacheong Poon 		/*
7245dd46ab5SKacheong Poon 		 * If an app has set the second_threshold to 0, it means that
7255dd46ab5SKacheong Poon 		 * we need to retransmit forever, unless this is a passive
7265dd46ab5SKacheong Poon 		 * open.  We need to set second_threshold back to a normal
7275dd46ab5SKacheong Poon 		 * value such that later comparison with it still makes
7285dd46ab5SKacheong Poon 		 * sense.  But we set dont_timeout to B_TRUE so that we will
7295dd46ab5SKacheong Poon 		 * never time out.
7305dd46ab5SKacheong Poon 		 */
731707e74bcSKacheong Poon 		if (second_threshold == 0) {
7325dd46ab5SKacheong Poon 			second_threshold = tcps->tcps_ip_abort_linterval;
7335dd46ab5SKacheong Poon 			if (tcp->tcp_active_open)
734707e74bcSKacheong Poon 				dont_timeout = B_TRUE;
735707e74bcSKacheong Poon 		}
736721fffe3SKacheong Poon 		break;
737721fffe3SKacheong Poon 	case TCPS_ESTABLISHED:
738707e74bcSKacheong Poon 	case TCPS_CLOSE_WAIT:
739707e74bcSKacheong Poon 		/*
740707e74bcSKacheong Poon 		 * If the end point has not been closed, TCP can retransmit
741707e74bcSKacheong Poon 		 * forever.  But if the end point is closed, the normal
742707e74bcSKacheong Poon 		 * timeout applies.
743707e74bcSKacheong Poon 		 */
7445dd46ab5SKacheong Poon 		if (second_threshold == 0) {
7455dd46ab5SKacheong Poon 			second_threshold = tcps->tcps_ip_abort_linterval;
746707e74bcSKacheong Poon 			dont_timeout = B_TRUE;
7475dd46ab5SKacheong Poon 		}
748707e74bcSKacheong Poon 		/* FALLTHRU */
749721fffe3SKacheong Poon 	case TCPS_FIN_WAIT_1:
750721fffe3SKacheong Poon 	case TCPS_CLOSING:
751721fffe3SKacheong Poon 	case TCPS_LAST_ACK:
752721fffe3SKacheong Poon 		/* If we have data to rexmit */
753721fffe3SKacheong Poon 		if (tcp->tcp_suna != tcp->tcp_snxt) {
754c12492cfSSebastien Roy 			clock_t time_to_wait;
755721fffe3SKacheong Poon 
756721fffe3SKacheong Poon 			TCPS_BUMP_MIB(tcps, tcpTimRetrans);
757721fffe3SKacheong Poon 			if (!tcp->tcp_xmit_head)
758721fffe3SKacheong Poon 				break;
759c12492cfSSebastien Roy 			time_to_wait = NSEC2MSEC(gethrtime() -
760c12492cfSSebastien Roy 			    (hrtime_t)(intptr_t)tcp->tcp_xmit_head->b_prev);
761c12492cfSSebastien Roy 			time_to_wait = tcp->tcp_rto - time_to_wait;
762721fffe3SKacheong Poon 			/*
763721fffe3SKacheong Poon 			 * If the timer fires too early, 1 clock tick earlier,
764721fffe3SKacheong Poon 			 * restart the timer.
765721fffe3SKacheong Poon 			 */
766721fffe3SKacheong Poon 			if (time_to_wait > msec_per_tick) {
767721fffe3SKacheong Poon 				TCP_STAT(tcps, tcp_timer_fire_early);
768721fffe3SKacheong Poon 				TCP_TIMER_RESTART(tcp, time_to_wait);
769721fffe3SKacheong Poon 				return;
770721fffe3SKacheong Poon 			}
771721fffe3SKacheong Poon 			/*
772721fffe3SKacheong Poon 			 * When we probe zero windows, we force the swnd open.
773721fffe3SKacheong Poon 			 * If our peer acks with a closed window swnd will be
774721fffe3SKacheong Poon 			 * set to zero by tcp_rput(). As long as we are
775721fffe3SKacheong Poon 			 * receiving acks tcp_rput will
776721fffe3SKacheong Poon 			 * reset 'tcp_ms_we_have_waited' so as not to trip the
777721fffe3SKacheong Poon 			 * first and second interval actions.  NOTE: the timer
778721fffe3SKacheong Poon 			 * interval is allowed to continue its exponential
779721fffe3SKacheong Poon 			 * backoff.
780721fffe3SKacheong Poon 			 */
781721fffe3SKacheong Poon 			if (tcp->tcp_swnd == 0 || tcp->tcp_zero_win_probe) {
782721fffe3SKacheong Poon 				if (connp->conn_debug) {
783721fffe3SKacheong Poon 					(void) strlog(TCP_MOD_ID, 0, 1,
784721fffe3SKacheong Poon 					    SL_TRACE, "tcp_timer: zero win");
785721fffe3SKacheong Poon 				}
786721fffe3SKacheong Poon 			} else {
787*57d1724dSToomas Soome 				cc_cong_signal(tcp, 0, CC_RTO);
788721fffe3SKacheong Poon 			}
789721fffe3SKacheong Poon 			break;
790721fffe3SKacheong Poon 		}
791721fffe3SKacheong Poon 		/*
792721fffe3SKacheong Poon 		 * We have something to send yet we cannot send.  The
793721fffe3SKacheong Poon 		 * reason can be:
794721fffe3SKacheong Poon 		 *
795721fffe3SKacheong Poon 		 * 1. Zero send window: we need to do zero window probe.
796721fffe3SKacheong Poon 		 * 2. Zero cwnd: because of ECN, we need to "clock out
797721fffe3SKacheong Poon 		 * segments.
798721fffe3SKacheong Poon 		 * 3. SWS avoidance: receiver may have shrunk window,
799721fffe3SKacheong Poon 		 * reset our knowledge.
800721fffe3SKacheong Poon 		 *
801721fffe3SKacheong Poon 		 * Note that condition 2 can happen with either 1 or
802721fffe3SKacheong Poon 		 * 3.  But 1 and 3 are exclusive.
803721fffe3SKacheong Poon 		 */
804721fffe3SKacheong Poon 		if (tcp->tcp_unsent != 0) {
805721fffe3SKacheong Poon 			/*
806721fffe3SKacheong Poon 			 * Should not hold the zero-copy messages for too long.
807721fffe3SKacheong Poon 			 */
808721fffe3SKacheong Poon 			if (tcp->tcp_snd_zcopy_aware && !tcp->tcp_xmit_zc_clean)
809721fffe3SKacheong Poon 				tcp->tcp_xmit_head = tcp_zcopy_backoff(tcp,
810721fffe3SKacheong Poon 				    tcp->tcp_xmit_head, B_TRUE);
811721fffe3SKacheong Poon 
812721fffe3SKacheong Poon 			if (tcp->tcp_cwnd == 0) {
813721fffe3SKacheong Poon 				/*
814721fffe3SKacheong Poon 				 * Set tcp_cwnd to 1 MSS so that a
815721fffe3SKacheong Poon 				 * new segment can be sent out.  We
816721fffe3SKacheong Poon 				 * are "clocking out" new data when
817721fffe3SKacheong Poon 				 * the network is really congested.
818721fffe3SKacheong Poon 				 */
819721fffe3SKacheong Poon 				ASSERT(tcp->tcp_ecn_ok);
820721fffe3SKacheong Poon 				tcp->tcp_cwnd = tcp->tcp_mss;
821721fffe3SKacheong Poon 			}
822721fffe3SKacheong Poon 			if (tcp->tcp_swnd == 0) {
823721fffe3SKacheong Poon 				/* Extend window for zero window probe */
824721fffe3SKacheong Poon 				tcp->tcp_swnd++;
825721fffe3SKacheong Poon 				tcp->tcp_zero_win_probe = B_TRUE;
826721fffe3SKacheong Poon 				TCPS_BUMP_MIB(tcps, tcpOutWinProbe);
827a2f04351SSebastien Roy 				tcp->tcp_cs.tcp_out_zwnd_probes++;
828721fffe3SKacheong Poon 			} else {
829721fffe3SKacheong Poon 				/*
830721fffe3SKacheong Poon 				 * Handle timeout from sender SWS avoidance.
831721fffe3SKacheong Poon 				 * Reset our knowledge of the max send window
832721fffe3SKacheong Poon 				 * since the receiver might have reduced its
833721fffe3SKacheong Poon 				 * receive buffer.  Avoid setting tcp_max_swnd
834721fffe3SKacheong Poon 				 * to one since that will essentially disable
835721fffe3SKacheong Poon 				 * the SWS checks.
836721fffe3SKacheong Poon 				 *
837721fffe3SKacheong Poon 				 * Note that since we don't have a SWS
838721fffe3SKacheong Poon 				 * state variable, if the timeout is set
839721fffe3SKacheong Poon 				 * for ECN but not for SWS, this
840721fffe3SKacheong Poon 				 * code will also be executed.  This is
841721fffe3SKacheong Poon 				 * fine as tcp_max_swnd is updated
842721fffe3SKacheong Poon 				 * constantly and it will not affect
843721fffe3SKacheong Poon 				 * anything.
844721fffe3SKacheong Poon 				 */
845721fffe3SKacheong Poon 				tcp->tcp_max_swnd = MAX(tcp->tcp_swnd, 2);
846721fffe3SKacheong Poon 			}
847721fffe3SKacheong Poon 			tcp_wput_data(tcp, NULL, B_FALSE);
848721fffe3SKacheong Poon 			return;
849721fffe3SKacheong Poon 		}
850721fffe3SKacheong Poon 		/* Is there a FIN that needs to be to re retransmitted? */
851721fffe3SKacheong Poon 		if ((tcp->tcp_valid_bits & TCP_FSS_VALID) &&
852721fffe3SKacheong Poon 		    !tcp->tcp_fin_acked)
853721fffe3SKacheong Poon 			break;
854721fffe3SKacheong Poon 		/* Nothing to do, return without restarting timer. */
855721fffe3SKacheong Poon 		TCP_STAT(tcps, tcp_timer_fire_miss);
856721fffe3SKacheong Poon 		return;
857721fffe3SKacheong Poon 	case TCPS_FIN_WAIT_2:
858721fffe3SKacheong Poon 		/*
859721fffe3SKacheong Poon 		 * User closed the TCP endpoint and peer ACK'ed our FIN.
860721fffe3SKacheong Poon 		 * We waited some time for for peer's FIN, but it hasn't
861721fffe3SKacheong Poon 		 * arrived.  We flush the connection now to avoid
862721fffe3SKacheong Poon 		 * case where the peer has rebooted.
863721fffe3SKacheong Poon 		 */
864721fffe3SKacheong Poon 		if (TCP_IS_DETACHED(tcp)) {
865721fffe3SKacheong Poon 			(void) tcp_clean_death(tcp, 0);
866721fffe3SKacheong Poon 		} else {
867721fffe3SKacheong Poon 			TCP_TIMER_RESTART(tcp,
868707e74bcSKacheong Poon 			    tcp->tcp_fin_wait_2_flush_interval);
869721fffe3SKacheong Poon 		}
870721fffe3SKacheong Poon 		return;
871721fffe3SKacheong Poon 	case TCPS_TIME_WAIT:
872721fffe3SKacheong Poon 		(void) tcp_clean_death(tcp, 0);
873721fffe3SKacheong Poon 		return;
874721fffe3SKacheong Poon 	default:
875721fffe3SKacheong Poon 		if (connp->conn_debug) {
876721fffe3SKacheong Poon 			(void) strlog(TCP_MOD_ID, 0, 1, SL_TRACE|SL_ERROR,
877721fffe3SKacheong Poon 			    "tcp_timer: strange state (%d) %s",
878721fffe3SKacheong Poon 			    tcp->tcp_state, tcp_display(tcp, NULL,
879721fffe3SKacheong Poon 			    DISP_PORT_ONLY));
880721fffe3SKacheong Poon 		}
881721fffe3SKacheong Poon 		return;
882721fffe3SKacheong Poon 	}
883721fffe3SKacheong Poon 
884721fffe3SKacheong Poon 	/*
885721fffe3SKacheong Poon 	 * If the system is under memory pressure or the max number of
886721fffe3SKacheong Poon 	 * connections have been established for the listener, be more
887721fffe3SKacheong Poon 	 * aggressive in aborting connections.
888721fffe3SKacheong Poon 	 */
889721fffe3SKacheong Poon 	if (tcps->tcps_reclaim || (tcp->tcp_listen_cnt != NULL &&
890721fffe3SKacheong Poon 	    tcp->tcp_listen_cnt->tlc_cnt > tcp->tcp_listen_cnt->tlc_max)) {
891721fffe3SKacheong Poon 		second_threshold = tcp_early_abort * SECONDS;
892707e74bcSKacheong Poon 
893707e74bcSKacheong Poon 		/* We will ignore the never timeout promise in this case... */
894707e74bcSKacheong Poon 		dont_timeout = B_FALSE;
895721fffe3SKacheong Poon 	}
896721fffe3SKacheong Poon 
8975dd46ab5SKacheong Poon 	ASSERT(second_threshold != 0);
898707e74bcSKacheong Poon 
899721fffe3SKacheong Poon 	if ((ms = tcp->tcp_ms_we_have_waited) > second_threshold) {
900721fffe3SKacheong Poon 		/*
901721fffe3SKacheong Poon 		 * Should not hold the zero-copy messages for too long.
902721fffe3SKacheong Poon 		 */
903721fffe3SKacheong Poon 		if (tcp->tcp_snd_zcopy_aware && !tcp->tcp_xmit_zc_clean)
904721fffe3SKacheong Poon 			tcp->tcp_xmit_head = tcp_zcopy_backoff(tcp,
905721fffe3SKacheong Poon 			    tcp->tcp_xmit_head, B_TRUE);
906721fffe3SKacheong Poon 
9075dd46ab5SKacheong Poon 		if (dont_timeout) {
9085dd46ab5SKacheong Poon 			/*
9095dd46ab5SKacheong Poon 			 * Reset tcp_ms_we_have_waited to avoid overflow since
9105dd46ab5SKacheong Poon 			 * we are going to retransmit forever.
9115dd46ab5SKacheong Poon 			 */
9125dd46ab5SKacheong Poon 			tcp->tcp_ms_we_have_waited = second_threshold;
913707e74bcSKacheong Poon 			goto timer_rexmit;
9145dd46ab5SKacheong Poon 		}
915707e74bcSKacheong Poon 
916721fffe3SKacheong Poon 		/*
917721fffe3SKacheong Poon 		 * For zero window probe, we need to send indefinitely,
918721fffe3SKacheong Poon 		 * unless we have not heard from the other side for some
919721fffe3SKacheong Poon 		 * time...
920721fffe3SKacheong Poon 		 */
921721fffe3SKacheong Poon 		if ((tcp->tcp_zero_win_probe == 0) ||
922721fffe3SKacheong Poon 		    (TICK_TO_MSEC(ddi_get_lbolt() - tcp->tcp_last_recv_time) >
923721fffe3SKacheong Poon 		    second_threshold)) {
924721fffe3SKacheong Poon 			TCPS_BUMP_MIB(tcps, tcpTimRetransDrop);
925721fffe3SKacheong Poon 			/*
926721fffe3SKacheong Poon 			 * If TCP is in SYN_RCVD state, send back a
927721fffe3SKacheong Poon 			 * RST|ACK as BSD does.  Note that tcp_zero_win_probe
928721fffe3SKacheong Poon 			 * should be zero in TCPS_SYN_RCVD state.
929721fffe3SKacheong Poon 			 */
930721fffe3SKacheong Poon 			if (tcp->tcp_state == TCPS_SYN_RCVD) {
931721fffe3SKacheong Poon 				tcp_xmit_ctl("tcp_timer: RST sent on timeout "
932721fffe3SKacheong Poon 				    "in SYN_RCVD",
933721fffe3SKacheong Poon 				    tcp, tcp->tcp_snxt,
934721fffe3SKacheong Poon 				    tcp->tcp_rnxt, TH_RST | TH_ACK);
935721fffe3SKacheong Poon 			}
936721fffe3SKacheong Poon 			(void) tcp_clean_death(tcp,
937721fffe3SKacheong Poon 			    tcp->tcp_client_errno ?
938721fffe3SKacheong Poon 			    tcp->tcp_client_errno : ETIMEDOUT);
939721fffe3SKacheong Poon 			return;
940721fffe3SKacheong Poon 		} else {
941721fffe3SKacheong Poon 			/*
942721fffe3SKacheong Poon 			 * If the system is under memory pressure, we also
943721fffe3SKacheong Poon 			 * abort connection in zero window probing.
944721fffe3SKacheong Poon 			 */
945721fffe3SKacheong Poon 			if (tcps->tcps_reclaim) {
946721fffe3SKacheong Poon 				(void) tcp_clean_death(tcp,
947721fffe3SKacheong Poon 				    tcp->tcp_client_errno ?
948721fffe3SKacheong Poon 				    tcp->tcp_client_errno : ETIMEDOUT);
949721fffe3SKacheong Poon 				TCP_STAT(tcps, tcp_zwin_mem_drop);
950721fffe3SKacheong Poon 				return;
951721fffe3SKacheong Poon 			}
952721fffe3SKacheong Poon 			/*
953721fffe3SKacheong Poon 			 * Set tcp_ms_we_have_waited to second_threshold
954721fffe3SKacheong Poon 			 * so that in next timeout, we will do the above
955721fffe3SKacheong Poon 			 * check (ddi_get_lbolt() - tcp_last_recv_time).
956721fffe3SKacheong Poon 			 * This is also to avoid overflow.
957721fffe3SKacheong Poon 			 *
958721fffe3SKacheong Poon 			 * We don't need to decrement tcp_timer_backoff
959721fffe3SKacheong Poon 			 * to avoid overflow because it will be decremented
960721fffe3SKacheong Poon 			 * later if new timeout value is greater than
961707e74bcSKacheong Poon 			 * tcp_rto_max.  In the case when tcp_rto_max is
962707e74bcSKacheong Poon 			 * greater than second_threshold, it means that we
963707e74bcSKacheong Poon 			 * will wait longer than second_threshold to send
964707e74bcSKacheong Poon 			 * the next
965721fffe3SKacheong Poon 			 * window probe.
966721fffe3SKacheong Poon 			 */
967721fffe3SKacheong Poon 			tcp->tcp_ms_we_have_waited = second_threshold;
968721fffe3SKacheong Poon 		}
969721fffe3SKacheong Poon 	} else if (ms > first_threshold) {
970721fffe3SKacheong Poon 		/*
971721fffe3SKacheong Poon 		 * Should not hold the zero-copy messages for too long.
972721fffe3SKacheong Poon 		 */
973721fffe3SKacheong Poon 		if (tcp->tcp_snd_zcopy_aware && !tcp->tcp_xmit_zc_clean)
974721fffe3SKacheong Poon 			tcp->tcp_xmit_head = tcp_zcopy_backoff(tcp,
975721fffe3SKacheong Poon 			    tcp->tcp_xmit_head, B_TRUE);
976721fffe3SKacheong Poon 
977721fffe3SKacheong Poon 		/*
978721fffe3SKacheong Poon 		 * We have been retransmitting for too long...  The RTT
979721fffe3SKacheong Poon 		 * we calculated is probably incorrect.  Reinitialize it.
980721fffe3SKacheong Poon 		 * Need to compensate for 0 tcp_rtt_sa.  Reset
981721fffe3SKacheong Poon 		 * tcp_rtt_update so that we won't accidentally cache a
982721fffe3SKacheong Poon 		 * bad value.  But only do this if this is not a zero
983721fffe3SKacheong Poon 		 * window probe.
984721fffe3SKacheong Poon 		 */
985721fffe3SKacheong Poon 		if (tcp->tcp_rtt_sa != 0 && tcp->tcp_zero_win_probe == 0) {
986c12492cfSSebastien Roy 			tcp->tcp_rtt_sd += tcp->tcp_rtt_sa >> 3 +
987c12492cfSSebastien Roy 			    tcp->tcp_rtt_sa >> 5;
988721fffe3SKacheong Poon 			tcp->tcp_rtt_sa = 0;
989721fffe3SKacheong Poon 			tcp_ip_notify(tcp);
990721fffe3SKacheong Poon 			tcp->tcp_rtt_update = 0;
991721fffe3SKacheong Poon 		}
992721fffe3SKacheong Poon 	}
993707e74bcSKacheong Poon 
994707e74bcSKacheong Poon timer_rexmit:
995721fffe3SKacheong Poon 	tcp->tcp_timer_backoff++;
996c12492cfSSebastien Roy 	/*
997c12492cfSSebastien Roy 	 * Calculate the backed off retransmission timeout. If the shift brings
998c12492cfSSebastien Roy 	 * us back over the max, then we repin the value, and decrement the
999c12492cfSSebastien Roy 	 * backoff to avoid overflow.
1000c12492cfSSebastien Roy 	 */
1001c12492cfSSebastien Roy 	ms = tcp_calculate_rto(tcp, tcps, 0) << tcp->tcp_timer_backoff;
1002707e74bcSKacheong Poon 	if (ms > tcp->tcp_rto_max) {
1003707e74bcSKacheong Poon 		ms = tcp->tcp_rto_max;
1004721fffe3SKacheong Poon 		tcp->tcp_timer_backoff--;
1005721fffe3SKacheong Poon 	}
1006721fffe3SKacheong Poon 	tcp->tcp_ms_we_have_waited += ms;
1007721fffe3SKacheong Poon 	if (tcp->tcp_zero_win_probe == 0) {
1008721fffe3SKacheong Poon 		tcp->tcp_rto = ms;
1009721fffe3SKacheong Poon 	}
1010721fffe3SKacheong Poon 	TCP_TIMER_RESTART(tcp, ms);
1011721fffe3SKacheong Poon 	/*
1012721fffe3SKacheong Poon 	 * This is after a timeout and tcp_rto is backed off.  Set
1013721fffe3SKacheong Poon 	 * tcp_set_timer to 1 so that next time RTO is updated, we will
1014721fffe3SKacheong Poon 	 * restart the timer with a correct value.
1015721fffe3SKacheong Poon 	 */
1016721fffe3SKacheong Poon 	tcp->tcp_set_timer = 1;
1017721fffe3SKacheong Poon 	mss = tcp->tcp_snxt - tcp->tcp_suna;
1018721fffe3SKacheong Poon 	if (mss > tcp->tcp_mss)
1019721fffe3SKacheong Poon 		mss = tcp->tcp_mss;
1020721fffe3SKacheong Poon 	if (mss > tcp->tcp_swnd && tcp->tcp_swnd != 0)
1021721fffe3SKacheong Poon 		mss = tcp->tcp_swnd;
1022721fffe3SKacheong Poon 
1023c12492cfSSebastien Roy 	if ((mp = tcp->tcp_xmit_head) != NULL) {
1024c12492cfSSebastien Roy 		mp->b_prev = (mblk_t *)(intptr_t)gethrtime();
1025c12492cfSSebastien Roy 	}
1026721fffe3SKacheong Poon 	mp = tcp_xmit_mp(tcp, mp, mss, NULL, NULL, tcp->tcp_suna, B_TRUE, &mss,
1027721fffe3SKacheong Poon 	    B_TRUE);
1028721fffe3SKacheong Poon 
1029721fffe3SKacheong Poon 	/*
1030721fffe3SKacheong Poon 	 * When slow start after retransmission begins, start with
1031721fffe3SKacheong Poon 	 * this seq no.  tcp_rexmit_max marks the end of special slow
1032633fc3a6SSebastien Roy 	 * start phase.
1033721fffe3SKacheong Poon 	 */
1034721fffe3SKacheong Poon 	tcp->tcp_rexmit_nxt = tcp->tcp_suna;
1035721fffe3SKacheong Poon 	if ((tcp->tcp_valid_bits & TCP_FSS_VALID) &&
1036721fffe3SKacheong Poon 	    (tcp->tcp_unsent == 0)) {
1037721fffe3SKacheong Poon 		tcp->tcp_rexmit_max = tcp->tcp_fss;
1038721fffe3SKacheong Poon 	} else {
1039721fffe3SKacheong Poon 		tcp->tcp_rexmit_max = tcp->tcp_snxt;
1040721fffe3SKacheong Poon 	}
1041721fffe3SKacheong Poon 	tcp->tcp_rexmit = B_TRUE;
1042721fffe3SKacheong Poon 	tcp->tcp_dupack_cnt = 0;
1043721fffe3SKacheong Poon 
1044721fffe3SKacheong Poon 	/*
1045721fffe3SKacheong Poon 	 * Remove all rexmit SACK blk to start from fresh.
1046721fffe3SKacheong Poon 	 */
104766cd0f60SKacheong Poon 	if (tcp->tcp_snd_sack_ok)
1048721fffe3SKacheong Poon 		TCP_NOTSACK_REMOVE_ALL(tcp->tcp_notsack_list, tcp);
1049721fffe3SKacheong Poon 	if (mp == NULL) {
1050721fffe3SKacheong Poon 		return;
1051721fffe3SKacheong Poon 	}
1052721fffe3SKacheong Poon 
1053721fffe3SKacheong Poon 	tcp->tcp_csuna = tcp->tcp_snxt;
1054721fffe3SKacheong Poon 	TCPS_BUMP_MIB(tcps, tcpRetransSegs);
1055721fffe3SKacheong Poon 	TCPS_UPDATE_MIB(tcps, tcpRetransBytes, mss);
1056a2f04351SSebastien Roy 	tcp->tcp_cs.tcp_out_retrans_segs++;
1057a2f04351SSebastien Roy 	tcp->tcp_cs.tcp_out_retrans_bytes += mss;
1058721fffe3SKacheong Poon 	tcp_send_data(tcp, mp);
1059721fffe3SKacheong Poon 
1060721fffe3SKacheong Poon }
1061721fffe3SKacheong Poon 
1062721fffe3SKacheong Poon /*
1063721fffe3SKacheong Poon  * Handle lingering timeouts. This function is called when the SO_LINGER timeout
1064721fffe3SKacheong Poon  * expires.
1065721fffe3SKacheong Poon  */
1066721fffe3SKacheong Poon void
tcp_close_linger_timeout(void * arg)1067721fffe3SKacheong Poon tcp_close_linger_timeout(void *arg)
1068721fffe3SKacheong Poon {
1069721fffe3SKacheong Poon 	conn_t	*connp = (conn_t *)arg;
1070*57d1724dSToomas Soome 	tcp_t	*tcp = connp->conn_tcp;
1071721fffe3SKacheong Poon 
1072721fffe3SKacheong Poon 	tcp->tcp_client_errno = ETIMEDOUT;
1073721fffe3SKacheong Poon 	tcp_stop_lingering(tcp);
1074721fffe3SKacheong Poon }
1075