xref: /illumos-gate/usr/src/uts/common/inet/cc/cc_cubic.c (revision f1ee6ec3)
145a4b79dSSebastien Roy /*
245a4b79dSSebastien Roy  * Copyright (c) 2008-2010 Lawrence Stewart <lstewart@freebsd.org>
345a4b79dSSebastien Roy  * Copyright (c) 2010 The FreeBSD Foundation
445a4b79dSSebastien Roy  * All rights reserved.
545a4b79dSSebastien Roy  * Copyright (c) 2017 by Delphix. All rights reserved.
61e609378SCody Peter Mello  * Copyright 2019 Joyent, Inc.
73b0b0a4eSPaul Winder  * Copyright 2020 RackTop Systems, Inc.
845a4b79dSSebastien Roy  *
945a4b79dSSebastien Roy  * This software was developed by Lawrence Stewart while studying at the Centre
1045a4b79dSSebastien Roy  * for Advanced Internet Architectures, Swinburne University of Technology, made
1145a4b79dSSebastien Roy  * possible in part by a grant from the Cisco University Research Program Fund
1245a4b79dSSebastien Roy  * at Community Foundation Silicon Valley.
1345a4b79dSSebastien Roy  *
1445a4b79dSSebastien Roy  * Portions of this software were developed at the Centre for Advanced
1545a4b79dSSebastien Roy  * Internet Architectures, Swinburne University of Technology, Melbourne,
1645a4b79dSSebastien Roy  * Australia by David Hayes under sponsorship from the FreeBSD Foundation.
1745a4b79dSSebastien Roy  *
1845a4b79dSSebastien Roy  * Redistribution and use in source and binary forms, with or without
1945a4b79dSSebastien Roy  * modification, are permitted provided that the following conditions
2045a4b79dSSebastien Roy  * are met:
2145a4b79dSSebastien Roy  * 1. Redistributions of source code must retain the above copyright
2245a4b79dSSebastien Roy  *    notice, this list of conditions and the following disclaimer.
2345a4b79dSSebastien Roy  * 2. Redistributions in binary form must reproduce the above copyright
2445a4b79dSSebastien Roy  *    notice, this list of conditions and the following disclaimer in the
2545a4b79dSSebastien Roy  *    documentation and/or other materials provided with the distribution.
2645a4b79dSSebastien Roy  *
2745a4b79dSSebastien Roy  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
2845a4b79dSSebastien Roy  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
2945a4b79dSSebastien Roy  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
3045a4b79dSSebastien Roy  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
3145a4b79dSSebastien Roy  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
3245a4b79dSSebastien Roy  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
3345a4b79dSSebastien Roy  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
3445a4b79dSSebastien Roy  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
3545a4b79dSSebastien Roy  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
3645a4b79dSSebastien Roy  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
3745a4b79dSSebastien Roy  * SUCH DAMAGE.
3845a4b79dSSebastien Roy  */
3945a4b79dSSebastien Roy 
4045a4b79dSSebastien Roy /*
4145a4b79dSSebastien Roy  * An implementation of the CUBIC congestion control algorithm for FreeBSD,
4245a4b79dSSebastien Roy  * based on the Internet Draft "draft-rhee-tcpm-cubic-02" by Rhee, Xu and Ha.
4345a4b79dSSebastien Roy  * Originally released as part of the NewTCP research project at Swinburne
4445a4b79dSSebastien Roy  * University of Technology's Centre for Advanced Internet Architectures,
4545a4b79dSSebastien Roy  * Melbourne, Australia, which was made possible in part by a grant from the
4645a4b79dSSebastien Roy  * Cisco University Research Program Fund at Community Foundation Silicon
4745a4b79dSSebastien Roy  * Valley. More details are available at:
4845a4b79dSSebastien Roy  *   http://caia.swin.edu.au/urp/newtcp/
4945a4b79dSSebastien Roy  */
5045a4b79dSSebastien Roy 
5145a4b79dSSebastien Roy #include <sys/errno.h>
5245a4b79dSSebastien Roy #include <sys/types.h>
5345a4b79dSSebastien Roy #include <sys/kmem.h>
5445a4b79dSSebastien Roy #include <sys/ddi.h>
5545a4b79dSSebastien Roy #include <sys/sunddi.h>
5645a4b79dSSebastien Roy #include <sys/modctl.h>
5745a4b79dSSebastien Roy #include <sys/time.h>
5845a4b79dSSebastien Roy 
5945a4b79dSSebastien Roy #include <inet/tcp_impl.h>
6045a4b79dSSebastien Roy #include <inet/cc.h>
6145a4b79dSSebastien Roy #include <inet/cc/cc_cubic.h>
6245a4b79dSSebastien Roy #include <inet/cc/cc_module.h>
6345a4b79dSSebastien Roy 
6445a4b79dSSebastien Roy static struct modlmisc cc_cubic_modlmisc = {
6545a4b79dSSebastien Roy 	&mod_miscops,
6645a4b79dSSebastien Roy 	"Cubic Congestion Control"
6745a4b79dSSebastien Roy };
6845a4b79dSSebastien Roy 
6945a4b79dSSebastien Roy static struct modlinkage cc_cubic_modlinkage = {
7045a4b79dSSebastien Roy 	MODREV_1,
7145a4b79dSSebastien Roy 	&cc_cubic_modlmisc,
7245a4b79dSSebastien Roy 	NULL
7345a4b79dSSebastien Roy };
7445a4b79dSSebastien Roy 
7545a4b79dSSebastien Roy /*
7645a4b79dSSebastien Roy  * cubic uses the NewReno implementation of after_idle and uses NewReno's
7745a4b79dSSebastien Roy  * ack_received callback during slow start.
7845a4b79dSSebastien Roy  */
7945a4b79dSSebastien Roy static struct cc_algo *newreno_cc_algo;
8045a4b79dSSebastien Roy 
8145a4b79dSSebastien Roy static void	cubic_ack_received(struct cc_var *ccv, uint16_t type);
8245a4b79dSSebastien Roy static void	cubic_cb_destroy(struct cc_var *ccv);
8345a4b79dSSebastien Roy static int	cubic_cb_init(struct cc_var *ccv);
8445a4b79dSSebastien Roy static void	cubic_cong_signal(struct cc_var *ccv, uint32_t type);
8545a4b79dSSebastien Roy static void	cubic_conn_init(struct cc_var *ccv);
8645a4b79dSSebastien Roy static void	cubic_post_recovery(struct cc_var *ccv);
8745a4b79dSSebastien Roy static void	cubic_record_rtt(struct cc_var *ccv);
8845a4b79dSSebastien Roy static void	cubic_ssthresh_update(struct cc_var *ccv);
893b0b0a4eSPaul Winder static void	cubic_after_idle(struct cc_var *ccv);
9045a4b79dSSebastien Roy 
9145a4b79dSSebastien Roy struct cubic {
9245a4b79dSSebastien Roy 	/* Cubic K in fixed point form with CUBIC_SHIFT worth of precision. */
9345a4b79dSSebastien Roy 	int64_t		K;
9445a4b79dSSebastien Roy 	/* Sum of RTT samples across an epoch in nanoseconds. */
9545a4b79dSSebastien Roy 	hrtime_t	sum_rtt_nsecs;
9645a4b79dSSebastien Roy 	/* cwnd at the most recent congestion event. */
9745a4b79dSSebastien Roy 	uint32_t	max_cwnd;
9845a4b79dSSebastien Roy 	/* cwnd at the previous congestion event. */
9945a4b79dSSebastien Roy 	uint32_t	prev_max_cwnd;
10045a4b79dSSebastien Roy 	/* Number of congestion events. */
10145a4b79dSSebastien Roy 	uint32_t	num_cong_events;
10245a4b79dSSebastien Roy 	/* Minimum observed rtt in nanoseconds. */
10345a4b79dSSebastien Roy 	hrtime_t	min_rtt_nsecs;
10445a4b79dSSebastien Roy 	/* Mean observed rtt between congestion epochs. */
10545a4b79dSSebastien Roy 	hrtime_t	mean_rtt_nsecs;
10645a4b79dSSebastien Roy 	/* ACKs since last congestion event. */
10745a4b79dSSebastien Roy 	int		epoch_ack_count;
10845a4b79dSSebastien Roy 	/* Time of last congestion event in nanoseconds. */
10945a4b79dSSebastien Roy 	hrtime_t	t_last_cong;
11045a4b79dSSebastien Roy };
11145a4b79dSSebastien Roy 
11245a4b79dSSebastien Roy struct cc_algo cubic_cc_algo = {
11345a4b79dSSebastien Roy 	.name = "cubic",
11445a4b79dSSebastien Roy 	.ack_received = cubic_ack_received,
11545a4b79dSSebastien Roy 	.cb_destroy = cubic_cb_destroy,
11645a4b79dSSebastien Roy 	.cb_init = cubic_cb_init,
11745a4b79dSSebastien Roy 	.cong_signal = cubic_cong_signal,
11845a4b79dSSebastien Roy 	.conn_init = cubic_conn_init,
11945a4b79dSSebastien Roy 	.post_recovery = cubic_post_recovery,
1203b0b0a4eSPaul Winder 	.after_idle = cubic_after_idle,
12145a4b79dSSebastien Roy };
12245a4b79dSSebastien Roy 
12345a4b79dSSebastien Roy int
_init(void)12445a4b79dSSebastien Roy _init(void)
12545a4b79dSSebastien Roy {
12645a4b79dSSebastien Roy 	int err;
12745a4b79dSSebastien Roy 
12845a4b79dSSebastien Roy 	if ((newreno_cc_algo = cc_load_algo("newreno")) == NULL)
12945a4b79dSSebastien Roy 		return (EINVAL);
13045a4b79dSSebastien Roy 
13145a4b79dSSebastien Roy 	if ((err = cc_register_algo(&cubic_cc_algo)) == 0) {
13245a4b79dSSebastien Roy 		if ((err = mod_install(&cc_cubic_modlinkage)) != 0)
13345a4b79dSSebastien Roy 			(void) cc_deregister_algo(&cubic_cc_algo);
13445a4b79dSSebastien Roy 	}
1353b0b0a4eSPaul Winder 
13645a4b79dSSebastien Roy 	return (err);
13745a4b79dSSebastien Roy }
13845a4b79dSSebastien Roy 
13945a4b79dSSebastien Roy int
_fini(void)14045a4b79dSSebastien Roy _fini(void)
14145a4b79dSSebastien Roy {
14245a4b79dSSebastien Roy 	/* XXX Not unloadable for now */
14345a4b79dSSebastien Roy 	return (EBUSY);
14445a4b79dSSebastien Roy }
14545a4b79dSSebastien Roy 
14645a4b79dSSebastien Roy int
_info(struct modinfo * modinfop)14745a4b79dSSebastien Roy _info(struct modinfo *modinfop)
14845a4b79dSSebastien Roy {
14945a4b79dSSebastien Roy 	return (mod_info(&cc_cubic_modlinkage, modinfop));
15045a4b79dSSebastien Roy }
15145a4b79dSSebastien Roy 
15245a4b79dSSebastien Roy static void
cubic_ack_received(struct cc_var * ccv,uint16_t type)15345a4b79dSSebastien Roy cubic_ack_received(struct cc_var *ccv, uint16_t type)
15445a4b79dSSebastien Roy {
15545a4b79dSSebastien Roy 	struct cubic *cubic_data;
15645a4b79dSSebastien Roy 	uint32_t w_tf, w_cubic_next;
15745a4b79dSSebastien Roy 	hrtime_t nsecs_since_cong;
15845a4b79dSSebastien Roy 
15945a4b79dSSebastien Roy 	cubic_data = ccv->cc_data;
16045a4b79dSSebastien Roy 	cubic_record_rtt(ccv);
16145a4b79dSSebastien Roy 
16245a4b79dSSebastien Roy 	/*
16345a4b79dSSebastien Roy 	 * Regular ACK and we're not in cong/fast recovery and we're cwnd
16445a4b79dSSebastien Roy 	 * limited and we're either not doing ABC or are slow starting or are
16545a4b79dSSebastien Roy 	 * doing ABC and we've sent a cwnd's worth of bytes.
16645a4b79dSSebastien Roy 	 */
16745a4b79dSSebastien Roy 	if (type == CC_ACK && !IN_RECOVERY(ccv->flags) &&
16845a4b79dSSebastien Roy 	    (ccv->flags & CCF_CWND_LIMITED) && (!CC_ABC(ccv) ||
16945a4b79dSSebastien Roy 	    CCV(ccv, tcp_cwnd) <= CCV(ccv, tcp_cwnd_ssthresh) ||
17045a4b79dSSebastien Roy 	    (CC_ABC(ccv) && (ccv->flags & CCF_ABC_SENTAWND)))) {
17145a4b79dSSebastien Roy 		/* Use the logic in NewReno ack_received() for slow start. */
17245a4b79dSSebastien Roy 		if (CCV(ccv, tcp_cwnd) <= CCV(ccv, tcp_cwnd_ssthresh) ||
17345a4b79dSSebastien Roy 		    cubic_data->min_rtt_nsecs == TCPTV_SRTTBASE)
17445a4b79dSSebastien Roy 			newreno_cc_algo->ack_received(ccv, type);
17545a4b79dSSebastien Roy 		else {
17645a4b79dSSebastien Roy 			nsecs_since_cong = gethrtime() -
17745a4b79dSSebastien Roy 			    cubic_data->t_last_cong;
17845a4b79dSSebastien Roy 
17945a4b79dSSebastien Roy 			/*
18045a4b79dSSebastien Roy 			 * The mean RTT is used to best reflect the equations in
18145a4b79dSSebastien Roy 			 * the I-D. Using min_rtt in the tf_cwnd calculation
18245a4b79dSSebastien Roy 			 * causes w_tf to grow much faster than it should if the
18345a4b79dSSebastien Roy 			 * RTT is dominated by network buffering rather than
18445a4b79dSSebastien Roy 			 * propagation delay.
18545a4b79dSSebastien Roy 			 */
18645a4b79dSSebastien Roy 			w_tf = tf_cwnd(nsecs_since_cong,
18745a4b79dSSebastien Roy 			    cubic_data->mean_rtt_nsecs, cubic_data->max_cwnd,
18845a4b79dSSebastien Roy 			    CCV(ccv, tcp_mss));
18945a4b79dSSebastien Roy 
19045a4b79dSSebastien Roy 			w_cubic_next = cubic_cwnd(nsecs_since_cong +
19145a4b79dSSebastien Roy 			    cubic_data->mean_rtt_nsecs, cubic_data->max_cwnd,
19245a4b79dSSebastien Roy 			    CCV(ccv, tcp_mss), cubic_data->K);
19345a4b79dSSebastien Roy 
19445a4b79dSSebastien Roy 			ccv->flags &= ~CCF_ABC_SENTAWND;
19545a4b79dSSebastien Roy 
19645a4b79dSSebastien Roy 			if (w_cubic_next < w_tf) {
19745a4b79dSSebastien Roy 				/*
19845a4b79dSSebastien Roy 				 * TCP-friendly region, follow tf
19945a4b79dSSebastien Roy 				 * cwnd growth.
20045a4b79dSSebastien Roy 				 */
2013b0b0a4eSPaul Winder 				if (CCV(ccv, tcp_cwnd) < w_tf)
2023b0b0a4eSPaul Winder 					CCV(ccv, tcp_cwnd) = w_tf;
20345a4b79dSSebastien Roy 			} else if (CCV(ccv, tcp_cwnd) < w_cubic_next) {
20445a4b79dSSebastien Roy 				/*
20545a4b79dSSebastien Roy 				 * Concave or convex region, follow CUBIC
20645a4b79dSSebastien Roy 				 * cwnd growth.
20745a4b79dSSebastien Roy 				 */
20845a4b79dSSebastien Roy 				if (CC_ABC(ccv))
2093b0b0a4eSPaul Winder 					CCV(ccv, tcp_cwnd) = MIN(w_cubic_next,
2103b0b0a4eSPaul Winder 					    INT_MAX);
21145a4b79dSSebastien Roy 				else
2123b0b0a4eSPaul Winder 					CCV(ccv, tcp_cwnd) += MAX(1,
2133b0b0a4eSPaul Winder 					    ((MIN(w_cubic_next, INT_MAX) -
21445a4b79dSSebastien Roy 					    CCV(ccv, tcp_cwnd)) *
21545a4b79dSSebastien Roy 					    CCV(ccv, tcp_mss)) /
2163b0b0a4eSPaul Winder 					    CCV(ccv, tcp_cwnd));
21745a4b79dSSebastien Roy 			}
21845a4b79dSSebastien Roy 
21945a4b79dSSebastien Roy 			/*
22045a4b79dSSebastien Roy 			 * If we're not in slow start and we're probing for a
22145a4b79dSSebastien Roy 			 * new cwnd limit at the start of a connection
22245a4b79dSSebastien Roy 			 * (happens when hostcache has a relevant entry),
22345a4b79dSSebastien Roy 			 * keep updating our current estimate of the
22445a4b79dSSebastien Roy 			 * max_cwnd.
22545a4b79dSSebastien Roy 			 */
22645a4b79dSSebastien Roy 			if (cubic_data->num_cong_events == 0 &&
2273b0b0a4eSPaul Winder 			    cubic_data->max_cwnd < CCV(ccv, tcp_cwnd)) {
22845a4b79dSSebastien Roy 				cubic_data->max_cwnd = CCV(ccv, tcp_cwnd);
2293b0b0a4eSPaul Winder 				cubic_data->K = cubic_k(cubic_data->max_cwnd /
2303b0b0a4eSPaul Winder 				    CCV(ccv, tcp_mss));
2313b0b0a4eSPaul Winder 			}
23245a4b79dSSebastien Roy 		}
23345a4b79dSSebastien Roy 	}
23445a4b79dSSebastien Roy }
23545a4b79dSSebastien Roy 
2363b0b0a4eSPaul Winder /*
2373b0b0a4eSPaul Winder  * This is a Cubic specific implementation of after_idle.
2383b0b0a4eSPaul Winder  *   - Reset cwnd by calling New Reno implementation of after_idle.
2393b0b0a4eSPaul Winder  *   - Reset t_last_cong.
2403b0b0a4eSPaul Winder  */
2413b0b0a4eSPaul Winder static void
cubic_after_idle(struct cc_var * ccv)2423b0b0a4eSPaul Winder cubic_after_idle(struct cc_var *ccv)
2433b0b0a4eSPaul Winder {
2443b0b0a4eSPaul Winder 	struct cubic *cubic_data;
2453b0b0a4eSPaul Winder 
2463b0b0a4eSPaul Winder 	cubic_data = ccv->cc_data;
2473b0b0a4eSPaul Winder 
2483b0b0a4eSPaul Winder 	cubic_data->max_cwnd = max(cubic_data->max_cwnd, CCV(ccv, tcp_cwnd));
2493b0b0a4eSPaul Winder 	cubic_data->K = cubic_k(cubic_data->max_cwnd / CCV(ccv, tcp_mss));
2503b0b0a4eSPaul Winder 
2513b0b0a4eSPaul Winder 	newreno_cc_algo->after_idle(ccv);
2523b0b0a4eSPaul Winder 	cubic_data->t_last_cong = gethrtime();
2533b0b0a4eSPaul Winder }
2543b0b0a4eSPaul Winder 
25545a4b79dSSebastien Roy static void
cubic_cb_destroy(struct cc_var * ccv)25645a4b79dSSebastien Roy cubic_cb_destroy(struct cc_var *ccv)
25745a4b79dSSebastien Roy {
25845a4b79dSSebastien Roy 
25945a4b79dSSebastien Roy 	if (ccv->cc_data != NULL)
26045a4b79dSSebastien Roy 		kmem_free(ccv->cc_data, sizeof (struct cubic));
26145a4b79dSSebastien Roy }
26245a4b79dSSebastien Roy 
26345a4b79dSSebastien Roy static int
cubic_cb_init(struct cc_var * ccv)26445a4b79dSSebastien Roy cubic_cb_init(struct cc_var *ccv)
26545a4b79dSSebastien Roy {
26645a4b79dSSebastien Roy 	struct cubic *cubic_data;
26745a4b79dSSebastien Roy 
268*f1ee6ec3SPaul Winder 	cubic_data = kmem_zalloc(sizeof (struct cubic), KM_NOSLEEP);
26945a4b79dSSebastien Roy 
27045a4b79dSSebastien Roy 	if (cubic_data == NULL)
27145a4b79dSSebastien Roy 		return (ENOMEM);
27245a4b79dSSebastien Roy 
27345a4b79dSSebastien Roy 	/* Init some key variables with sensible defaults. */
27445a4b79dSSebastien Roy 	cubic_data->t_last_cong = gethrtime();
27545a4b79dSSebastien Roy 	cubic_data->min_rtt_nsecs = TCPTV_SRTTBASE;
27645a4b79dSSebastien Roy 	cubic_data->mean_rtt_nsecs = 1;
27745a4b79dSSebastien Roy 
27845a4b79dSSebastien Roy 	ccv->cc_data = cubic_data;
27945a4b79dSSebastien Roy 
28045a4b79dSSebastien Roy 	return (0);
28145a4b79dSSebastien Roy }
28245a4b79dSSebastien Roy 
28345a4b79dSSebastien Roy /*
28445a4b79dSSebastien Roy  * Perform any necessary tasks before we enter congestion recovery.
28545a4b79dSSebastien Roy  */
28645a4b79dSSebastien Roy static void
cubic_cong_signal(struct cc_var * ccv,uint32_t type)28745a4b79dSSebastien Roy cubic_cong_signal(struct cc_var *ccv, uint32_t type)
28845a4b79dSSebastien Roy {
28945a4b79dSSebastien Roy 	struct cubic *cubic_data;
29045a4b79dSSebastien Roy 	uint32_t cwin;
29145a4b79dSSebastien Roy 	uint32_t mss;
29245a4b79dSSebastien Roy 
29345a4b79dSSebastien Roy 	cubic_data = ccv->cc_data;
29445a4b79dSSebastien Roy 	cwin = CCV(ccv, tcp_cwnd);
29545a4b79dSSebastien Roy 	mss = CCV(ccv, tcp_mss);
29645a4b79dSSebastien Roy 
29745a4b79dSSebastien Roy 	switch (type) {
29845a4b79dSSebastien Roy 	case CC_NDUPACK:
29945a4b79dSSebastien Roy 		if (!IN_FASTRECOVERY(ccv->flags)) {
30045a4b79dSSebastien Roy 			if (!IN_CONGRECOVERY(ccv->flags)) {
30145a4b79dSSebastien Roy 				cubic_ssthresh_update(ccv);
30245a4b79dSSebastien Roy 				cubic_data->num_cong_events++;
30345a4b79dSSebastien Roy 				cubic_data->prev_max_cwnd =
30445a4b79dSSebastien Roy 				    cubic_data->max_cwnd;
30545a4b79dSSebastien Roy 				cubic_data->max_cwnd = cwin;
30645a4b79dSSebastien Roy 				CCV(ccv, tcp_cwnd) =
30745a4b79dSSebastien Roy 				    CCV(ccv, tcp_cwnd_ssthresh);
30845a4b79dSSebastien Roy 			}
30945a4b79dSSebastien Roy 			ENTER_RECOVERY(ccv->flags);
31045a4b79dSSebastien Roy 		}
31145a4b79dSSebastien Roy 		break;
31245a4b79dSSebastien Roy 
31345a4b79dSSebastien Roy 	case CC_ECN:
31445a4b79dSSebastien Roy 		if (!IN_CONGRECOVERY(ccv->flags)) {
31545a4b79dSSebastien Roy 			cubic_ssthresh_update(ccv);
31645a4b79dSSebastien Roy 			cubic_data->num_cong_events++;
31745a4b79dSSebastien Roy 			cubic_data->prev_max_cwnd = cubic_data->max_cwnd;
31845a4b79dSSebastien Roy 			cubic_data->max_cwnd = cwin;
31945a4b79dSSebastien Roy 			cubic_data->t_last_cong = gethrtime();
32045a4b79dSSebastien Roy 			CCV(ccv, tcp_cwnd) = CCV(ccv, tcp_cwnd_ssthresh);
32145a4b79dSSebastien Roy 			ENTER_CONGRECOVERY(ccv->flags);
32245a4b79dSSebastien Roy 		}
32345a4b79dSSebastien Roy 		break;
32445a4b79dSSebastien Roy 
32545a4b79dSSebastien Roy 	case CC_RTO:
32645a4b79dSSebastien Roy 		/*
32745a4b79dSSebastien Roy 		 * Grab the current time and record it so we know when the
3281e609378SCody Peter Mello 		 * most recent congestion event was.
32945a4b79dSSebastien Roy 		 */
3301e609378SCody Peter Mello 		cubic_data->num_cong_events++;
3311e609378SCody Peter Mello 		cubic_data->t_last_cong = gethrtime();
3321e609378SCody Peter Mello 		cubic_ssthresh_update(ccv);
3331e609378SCody Peter Mello 		cubic_data->max_cwnd = cwin;
3341e609378SCody Peter Mello 		CCV(ccv, tcp_cwnd) = mss;
33545a4b79dSSebastien Roy 		break;
33645a4b79dSSebastien Roy 	}
33745a4b79dSSebastien Roy }
33845a4b79dSSebastien Roy 
33945a4b79dSSebastien Roy static void
cubic_conn_init(struct cc_var * ccv)34045a4b79dSSebastien Roy cubic_conn_init(struct cc_var *ccv)
34145a4b79dSSebastien Roy {
34245a4b79dSSebastien Roy 	struct cubic *cubic_data;
34345a4b79dSSebastien Roy 
34445a4b79dSSebastien Roy 	cubic_data = ccv->cc_data;
34545a4b79dSSebastien Roy 
34645a4b79dSSebastien Roy 	/*
34745a4b79dSSebastien Roy 	 * Ensure we have a sane initial value for max_cwnd recorded. Without
34845a4b79dSSebastien Roy 	 * this here bad things happen when entries from the TCP hostcache
34945a4b79dSSebastien Roy 	 * get used.
35045a4b79dSSebastien Roy 	 */
35145a4b79dSSebastien Roy 	cubic_data->max_cwnd = CCV(ccv, tcp_cwnd);
35245a4b79dSSebastien Roy }
35345a4b79dSSebastien Roy 
35445a4b79dSSebastien Roy /*
35545a4b79dSSebastien Roy  * Perform any necessary tasks before we exit congestion recovery.
35645a4b79dSSebastien Roy  */
35745a4b79dSSebastien Roy static void
cubic_post_recovery(struct cc_var * ccv)35845a4b79dSSebastien Roy cubic_post_recovery(struct cc_var *ccv)
35945a4b79dSSebastien Roy {
36045a4b79dSSebastien Roy 	struct cubic *cubic_data;
3613b0b0a4eSPaul Winder 	uint32_t mss, pipe;
36245a4b79dSSebastien Roy 
36345a4b79dSSebastien Roy 	cubic_data = ccv->cc_data;
36445a4b79dSSebastien Roy 
36545a4b79dSSebastien Roy 	/* Fast convergence heuristic. */
36645a4b79dSSebastien Roy 	if (cubic_data->max_cwnd < cubic_data->prev_max_cwnd) {
36745a4b79dSSebastien Roy 		cubic_data->max_cwnd = (cubic_data->max_cwnd * CUBIC_FC_FACTOR)
36845a4b79dSSebastien Roy 		    >> CUBIC_SHIFT;
36945a4b79dSSebastien Roy 	}
37045a4b79dSSebastien Roy 
371*f1ee6ec3SPaul Winder 	/*
372*f1ee6ec3SPaul Winder 	 * There is a risk that if the cwnd becomes less than mss, and
373*f1ee6ec3SPaul Winder 	 * we do not get enough acks to drive it back up beyond mss,
374*f1ee6ec3SPaul Winder 	 * we will stop transmitting data altogether.
375*f1ee6ec3SPaul Winder 	 *
376*f1ee6ec3SPaul Winder 	 * The Cubic RFC defines values in terms of units of mss. Therefore
377*f1ee6ec3SPaul Winder 	 * we must make sure we have at least 1 mss to make progress
378*f1ee6ec3SPaul Winder 	 * since the algorthm is written that way.
379*f1ee6ec3SPaul Winder 	 */
3803b0b0a4eSPaul Winder 	mss = CCV(ccv, tcp_mss);
3813b0b0a4eSPaul Winder 
38245a4b79dSSebastien Roy 	if (IN_FASTRECOVERY(ccv->flags)) {
3833b0b0a4eSPaul Winder 		/*
3843b0b0a4eSPaul Winder 		 * If inflight data is less than ssthresh, set cwnd
3853b0b0a4eSPaul Winder 		 * conservatively to avoid a burst of data, as suggested in
3863b0b0a4eSPaul Winder 		 * the NewReno RFC. Otherwise, use the CUBIC method.
3873b0b0a4eSPaul Winder 		 */
3883b0b0a4eSPaul Winder 		pipe = CCV(ccv, tcp_snxt) - CCV(ccv, tcp_suna);
3893b0b0a4eSPaul Winder 		if (pipe < CCV(ccv, tcp_cwnd_ssthresh)) {
3903b0b0a4eSPaul Winder 			/*
3913b0b0a4eSPaul Winder 			 * Ensure that cwnd does not collapse to 1 MSS under
3923b0b0a4eSPaul Winder 			 * adverse conditions. Implements RFC6582
3933b0b0a4eSPaul Winder 			 */
3943b0b0a4eSPaul Winder 			CCV(ccv, tcp_cwnd) = MAX(pipe, mss) + mss;
3953b0b0a4eSPaul Winder 		} else {
3963b0b0a4eSPaul Winder 			/* Update cwnd based on beta and adjusted max_cwnd. */
397*f1ee6ec3SPaul Winder 			CCV(ccv, tcp_cwnd) = max(mss, ((CUBIC_BETA *
3983b0b0a4eSPaul Winder 			    cubic_data->max_cwnd) >> CUBIC_SHIFT));
3993b0b0a4eSPaul Winder 		}
400*f1ee6ec3SPaul Winder 	} else {
401*f1ee6ec3SPaul Winder 		CCV(ccv, tcp_cwnd) = max(mss, CCV(ccv, tcp_cwnd));
40245a4b79dSSebastien Roy 	}
4033b0b0a4eSPaul Winder 
40445a4b79dSSebastien Roy 	cubic_data->t_last_cong = gethrtime();
40545a4b79dSSebastien Roy 
40645a4b79dSSebastien Roy 	/* Calculate the average RTT between congestion epochs. */
40745a4b79dSSebastien Roy 	if (cubic_data->epoch_ack_count > 0 &&
40845a4b79dSSebastien Roy 	    cubic_data->sum_rtt_nsecs >= cubic_data->epoch_ack_count) {
40945a4b79dSSebastien Roy 		cubic_data->mean_rtt_nsecs =
41045a4b79dSSebastien Roy 		    (cubic_data->sum_rtt_nsecs / cubic_data->epoch_ack_count);
41145a4b79dSSebastien Roy 	}
41245a4b79dSSebastien Roy 
41345a4b79dSSebastien Roy 	cubic_data->epoch_ack_count = 0;
41445a4b79dSSebastien Roy 	cubic_data->sum_rtt_nsecs = 0;
4153b0b0a4eSPaul Winder 	cubic_data->K = cubic_k(cubic_data->max_cwnd / mss);
41645a4b79dSSebastien Roy }
41745a4b79dSSebastien Roy 
41845a4b79dSSebastien Roy /*
41945a4b79dSSebastien Roy  * Record the min RTT and sum samples for the epoch average RTT calculation.
42045a4b79dSSebastien Roy  */
42145a4b79dSSebastien Roy static void
cubic_record_rtt(struct cc_var * ccv)42245a4b79dSSebastien Roy cubic_record_rtt(struct cc_var *ccv)
42345a4b79dSSebastien Roy {
42445a4b79dSSebastien Roy 	struct cubic *cubic_data;
42545a4b79dSSebastien Roy 	int t_srtt_nsecs;
42645a4b79dSSebastien Roy 
42745a4b79dSSebastien Roy 	/* Ignore srtt until a min number of samples have been taken. */
42845a4b79dSSebastien Roy 	if (CCV(ccv, tcp_rtt_update) >= CUBIC_MIN_RTT_SAMPLES) {
42945a4b79dSSebastien Roy 		cubic_data = ccv->cc_data;
43045a4b79dSSebastien Roy 		/* tcp_rtt_sa is 8 * smoothed RTT in nanoseconds */
43145a4b79dSSebastien Roy 		t_srtt_nsecs = CCV(ccv, tcp_rtt_sa) >> 3;
43245a4b79dSSebastien Roy 
43345a4b79dSSebastien Roy 		/*
43445a4b79dSSebastien Roy 		 * Record the current SRTT as our minrtt if it's the smallest
43545a4b79dSSebastien Roy 		 * we've seen or minrtt is currently equal to its initialized
43645a4b79dSSebastien Roy 		 * value.
43745a4b79dSSebastien Roy 		 *
43845a4b79dSSebastien Roy 		 * XXXLAS: Should there be some hysteresis for minrtt?
43945a4b79dSSebastien Roy 		 */
44045a4b79dSSebastien Roy 		if ((t_srtt_nsecs < cubic_data->min_rtt_nsecs ||
44145a4b79dSSebastien Roy 		    cubic_data->min_rtt_nsecs == TCPTV_SRTTBASE)) {
44245a4b79dSSebastien Roy 			cubic_data->min_rtt_nsecs = max(1, t_srtt_nsecs);
44345a4b79dSSebastien Roy 
44445a4b79dSSebastien Roy 			/*
44545a4b79dSSebastien Roy 			 * If the connection is within its first congestion
44645a4b79dSSebastien Roy 			 * epoch, ensure we prime mean_rtt_nsecs with a
44745a4b79dSSebastien Roy 			 * reasonable value until the epoch average RTT is
44845a4b79dSSebastien Roy 			 * calculated in cubic_post_recovery().
44945a4b79dSSebastien Roy 			 */
45045a4b79dSSebastien Roy 			if (cubic_data->min_rtt_nsecs >
45145a4b79dSSebastien Roy 			    cubic_data->mean_rtt_nsecs)
45245a4b79dSSebastien Roy 				cubic_data->mean_rtt_nsecs =
45345a4b79dSSebastien Roy 				    cubic_data->min_rtt_nsecs;
45445a4b79dSSebastien Roy 		}
45545a4b79dSSebastien Roy 
45645a4b79dSSebastien Roy 		/* Sum samples for epoch average RTT calculation. */
45745a4b79dSSebastien Roy 		cubic_data->sum_rtt_nsecs += t_srtt_nsecs;
45845a4b79dSSebastien Roy 		cubic_data->epoch_ack_count++;
45945a4b79dSSebastien Roy 	}
46045a4b79dSSebastien Roy }
46145a4b79dSSebastien Roy 
46245a4b79dSSebastien Roy /*
46345a4b79dSSebastien Roy  * Update the ssthresh in the event of congestion.
46445a4b79dSSebastien Roy  */
46545a4b79dSSebastien Roy static void
cubic_ssthresh_update(struct cc_var * ccv)46645a4b79dSSebastien Roy cubic_ssthresh_update(struct cc_var *ccv)
46745a4b79dSSebastien Roy {
46845a4b79dSSebastien Roy 	struct cubic *cubic_data;
46945a4b79dSSebastien Roy 
47045a4b79dSSebastien Roy 	cubic_data = ccv->cc_data;
47145a4b79dSSebastien Roy 
47245a4b79dSSebastien Roy 	/*
47345a4b79dSSebastien Roy 	 * On the first congestion event, set ssthresh to cwnd * 0.5, on
47445a4b79dSSebastien Roy 	 * subsequent congestion events, set it to cwnd * beta.
47545a4b79dSSebastien Roy 	 */
47645a4b79dSSebastien Roy 	if (cubic_data->num_cong_events == 0)
47745a4b79dSSebastien Roy 		CCV(ccv, tcp_cwnd_ssthresh) = CCV(ccv, tcp_cwnd) >> 1;
47845a4b79dSSebastien Roy 	else
47945a4b79dSSebastien Roy 		CCV(ccv, tcp_cwnd_ssthresh) =
48045a4b79dSSebastien Roy 		    (CCV(ccv, tcp_cwnd) * CUBIC_BETA) >> CUBIC_SHIFT;
48145a4b79dSSebastien Roy }
482