xref: /illumos-gate/usr/src/uts/common/inet/cc/cc_newreno.c (revision 45a4b79d042e642c2ed7090ec290469ccf8fc563)
1 /*
2  * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994, 1995
3  *	The Regents of the University of California.
4  * Copyright (c) 2007-2008,2010
5  *	Swinburne University of Technology, Melbourne, Australia.
6  * Copyright (c) 2009-2010 Lawrence Stewart <lstewart@freebsd.org>
7  * Copyright (c) 2010 The FreeBSD Foundation
8  * All rights reserved.
9  * Copyright (c) 2017 by Delphix. All rights reserved.
10  *
11  * This software was developed at the Centre for Advanced Internet
12  * Architectures, Swinburne University of Technology, by Lawrence Stewart, James
13  * Healy and David Hayes, made possible in part by a grant from the Cisco
14  * University Research Program Fund at Community Foundation Silicon Valley.
15  *
16  * Portions of this software were developed at the Centre for Advanced
17  * Internet Architectures, Swinburne University of Technology, Melbourne,
18  * Australia by David Hayes under sponsorship from the FreeBSD Foundation.
19  *
20  * Redistribution and use in source and binary forms, with or without
21  * modification, are permitted provided that the following conditions
22  * are met:
23  * 1. Redistributions of source code must retain the above copyright
24  *    notice, this list of conditions and the following disclaimer.
25  * 2. Redistributions in binary form must reproduce the above copyright
26  *    notice, this list of conditions and the following disclaimer in the
27  *    documentation and/or other materials provided with the distribution.
28  *
29  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
30  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
33  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
34  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
35  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
36  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
37  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
38  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
39  * SUCH DAMAGE.
40  */
41 
42 /*
43  * This software was first released in 2007 by James Healy and Lawrence Stewart
44  * whilst working on the NewTCP research project at Swinburne University of
45  * Technology's Centre for Advanced Internet Architectures, Melbourne,
46  * Australia, which was made possible in part by a grant from the Cisco
47  * University Research Program Fund at Community Foundation Silicon Valley.
48  * More details are available at:
49  *   http://caia.swin.edu.au/urp/newtcp/
50  */
51 
52 #include <sys/errno.h>
53 #include <inet/tcp.h>
54 #include <inet/tcp_impl.h>
55 #include <inet/cc.h>
56 #include <inet/cc/cc_module.h>
57 
58 static void	newreno_ack_received(struct cc_var *ccv, uint16_t type);
59 static void	newreno_after_idle(struct cc_var *ccv);
60 static void	newreno_cong_signal(struct cc_var *ccv, uint32_t type);
61 static void	newreno_post_recovery(struct cc_var *ccv);
62 
63 static struct modlmisc cc_newreno_modlmisc = {
64 	&mod_miscops,
65 	"New Reno Congestion Control"
66 };
67 
68 static struct modlinkage cc_newreno_modlinkage = {
69 	MODREV_1,
70 	&cc_newreno_modlmisc,
71 	NULL
72 };
73 
74 struct cc_algo newreno_cc_algo = {
75 	.name = "newreno",
76 	.ack_received = newreno_ack_received,
77 	.after_idle = newreno_after_idle,
78 	.cong_signal = newreno_cong_signal,
79 	.post_recovery = newreno_post_recovery,
80 };
81 
82 int
83 _init(void)
84 {
85 	int err;
86 
87 	if ((err = cc_register_algo(&newreno_cc_algo)) == 0) {
88 		if ((err = mod_install(&cc_newreno_modlinkage)) != 0)
89 			(void) cc_deregister_algo(&newreno_cc_algo);
90 	}
91 	return (err);
92 }
93 
94 int
95 _fini(void)
96 {
97 	/* XXX Not unloadable for now */
98 	return (EBUSY);
99 }
100 
101 int
102 _info(struct modinfo *modinfop)
103 {
104 	return (mod_info(&cc_newreno_modlinkage, modinfop));
105 }
106 
107 static void
108 newreno_ack_received(struct cc_var *ccv, uint16_t type)
109 {
110 	if (type == CC_ACK && !IN_RECOVERY(ccv->flags) &&
111 	    (ccv->flags & CCF_CWND_LIMITED)) {
112 		uint_t cw = CCV(ccv, tcp_cwnd);
113 		uint_t incr = CCV(ccv, tcp_mss);
114 
115 		/*
116 		 * Regular in-order ACK, open the congestion window.
117 		 * Method depends on which congestion control state we're
118 		 * in (slow start or cong avoid) and if ABC (RFC 3465) is
119 		 * enabled.
120 		 *
121 		 * slow start: cwnd <= ssthresh
122 		 * cong avoid: cwnd > ssthresh
123 		 *
124 		 * slow start and ABC (RFC 3465):
125 		 *   Grow cwnd exponentially by the amount of data
126 		 *   ACKed capping the max increment per ACK to
127 		 *   (abc_l_var * maxseg) bytes.
128 		 *
129 		 * slow start without ABC (RFC 5681):
130 		 *   Grow cwnd exponentially by maxseg per ACK.
131 		 *
132 		 * cong avoid and ABC (RFC 3465):
133 		 *   Grow cwnd linearly by maxseg per RTT for each
134 		 *   cwnd worth of ACKed data.
135 		 *
136 		 * cong avoid without ABC (RFC 5681):
137 		 *   Grow cwnd linearly by approximately maxseg per RTT using
138 		 *   maxseg^2 / cwnd per ACK as the increment.
139 		 *   If cwnd > maxseg^2, fix the cwnd increment at 1 byte to
140 		 *   avoid capping cwnd.
141 		 */
142 		if (cw > CCV(ccv, tcp_cwnd_ssthresh)) {
143 			if (CC_ABC(ccv)) {
144 				if (ccv->flags & CCF_ABC_SENTAWND)
145 					ccv->flags &= ~CCF_ABC_SENTAWND;
146 				else
147 					incr = 0;
148 			} else
149 				incr = max((incr * incr / cw), 1);
150 		} else if (CC_ABC(ccv)) {
151 			/*
152 			 * In slow-start with ABC enabled and no RTO in sight?
153 			 * (Must not use abc_l_var > 1 if slow starting after
154 			 * an RTO.
155 			 */
156 			if (ccv->flags & CCF_RTO) {
157 				incr = min(ccv->bytes_this_ack,
158 				    CCV(ccv, tcp_mss));
159 			} else {
160 				incr = min(ccv->bytes_this_ack,
161 				    CC_ABC_L_VAR(ccv) * CCV(ccv, tcp_mss));
162 			}
163 
164 		}
165 		/* ABC is on by default, so incr equals 0 frequently. */
166 		if (incr > 0)
167 			CCV(ccv, tcp_cwnd) = min(cw + incr,
168 			    TCP_MAXWIN << CCV(ccv, tcp_snd_ws));
169 	}
170 }
171 
172 static void
173 newreno_after_idle(struct cc_var *ccv)
174 {
175 	int rw;
176 
177 	/*
178 	 * If we've been idle for more than one retransmit timeout the old
179 	 * congestion window is no longer current and we have to reduce it to
180 	 * the restart window before we can transmit again.
181 	 *
182 	 * The restart window is the initial window or the last CWND, whichever
183 	 * is smaller.
184 	 *
185 	 * This is done to prevent us from flooding the path with a full CWND at
186 	 * wirespeed, overloading router and switch buffers along the way.
187 	 *
188 	 * See RFC5681 Section 4.1. "Restarting Idle Connections".
189 	 */
190 	if (CCV(ccv, tcp_init_cwnd) != 0) {
191 		/*
192 		 * The TCP_INIT_CWND socket option was used to override the
193 		 * default.
194 		 */
195 		rw = CCV(ccv, tcp_init_cwnd) * CCV(ccv, tcp_mss);
196 	} else if (CCSV(ccv, tcps_slow_start_initial) != 0) {
197 		/* The _slow_start_initial tunable was explicitly set. */
198 		rw = min(TCP_MAX_INIT_CWND, CCSV(ccv, tcps_slow_start_initial))
199 		    * CCV(ccv, tcp_mss);
200 	} else {
201 		/* Do RFC 3390 */
202 		rw = min(4 * CCV(ccv, tcp_mss),
203 		    max(2 * CCV(ccv, tcp_mss), 4380));
204 	}
205 
206 	CCV(ccv, tcp_cwnd) = min(rw, CCV(ccv, tcp_cwnd));
207 }
208 
209 /*
210  * Perform any necessary tasks before we enter congestion recovery.
211  */
212 static void
213 newreno_cong_signal(struct cc_var *ccv, uint32_t type)
214 {
215 	uint32_t cwin, ssthresh_on_loss;
216 	uint32_t mss;
217 
218 	cwin = CCV(ccv, tcp_cwnd);
219 	mss = CCV(ccv, tcp_mss);
220 	ssthresh_on_loss =
221 	    max((CCV(ccv, tcp_snxt) - CCV(ccv, tcp_suna)) / 2 / mss, 2)
222 	    * mss;
223 
224 	/* Catch algos which mistakenly leak private signal types. */
225 	ASSERT((type & CC_SIGPRIVMASK) == 0);
226 
227 	cwin = max(cwin / 2 / mss, 2) * mss;
228 
229 	switch (type) {
230 	case CC_NDUPACK:
231 		if (!IN_FASTRECOVERY(ccv->flags)) {
232 			if (!IN_CONGRECOVERY(ccv->flags)) {
233 				CCV(ccv, tcp_cwnd_ssthresh) = ssthresh_on_loss;
234 				CCV(ccv, tcp_cwnd) = cwin;
235 			}
236 			ENTER_RECOVERY(ccv->flags);
237 		}
238 		break;
239 	case CC_ECN:
240 		if (!IN_CONGRECOVERY(ccv->flags)) {
241 			CCV(ccv, tcp_cwnd_ssthresh) = ssthresh_on_loss;
242 			CCV(ccv, tcp_cwnd) = cwin;
243 			ENTER_CONGRECOVERY(ccv->flags);
244 		}
245 		break;
246 	case CC_RTO:
247 		CCV(ccv, tcp_cwnd_ssthresh) = ssthresh_on_loss;
248 		CCV(ccv, tcp_cwnd) = mss;
249 		break;
250 	}
251 }
252 
253 /*
254  * Perform any necessary tasks before we exit congestion recovery.
255  */
256 static void
257 newreno_post_recovery(struct cc_var *ccv)
258 {
259 	if (IN_FASTRECOVERY(ccv->flags)) {
260 		/*
261 		 * Fast recovery will conclude after returning from this
262 		 * function.
263 		 */
264 		if (CCV(ccv, tcp_cwnd) > CCV(ccv, tcp_cwnd_ssthresh)) {
265 			CCV(ccv, tcp_cwnd) = CCV(ccv, tcp_cwnd_ssthresh);
266 		}
267 	}
268 }
269