1*45a4b79dSSebastien Roy /*
2*45a4b79dSSebastien Roy  * CDDL HEADER START
3*45a4b79dSSebastien Roy  *
4*45a4b79dSSebastien Roy  * The contents of this file are subject to the terms of the
5*45a4b79dSSebastien Roy  * Common Development and Distribution License (the "License").
6*45a4b79dSSebastien Roy  * You may not use this file except in compliance with the License.
7*45a4b79dSSebastien Roy  *
8*45a4b79dSSebastien Roy  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9*45a4b79dSSebastien Roy  * or http://www.opensolaris.org/os/licensing.
10*45a4b79dSSebastien Roy  * See the License for the specific language governing permissions
11*45a4b79dSSebastien Roy  * and limitations under the License.
12*45a4b79dSSebastien Roy  *
13*45a4b79dSSebastien Roy  * When distributing Covered Code, include this CDDL HEADER in each
14*45a4b79dSSebastien Roy  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15*45a4b79dSSebastien Roy  * If applicable, add the following below this CDDL HEADER, with the
16*45a4b79dSSebastien Roy  * fields enclosed by brackets "[]" replaced with your own identifying
17*45a4b79dSSebastien Roy  * information: Portions Copyright [yyyy] [name of copyright owner]
18*45a4b79dSSebastien Roy  *
19*45a4b79dSSebastien Roy  * CDDL HEADER END
20*45a4b79dSSebastien Roy  */
21*45a4b79dSSebastien Roy 
22*45a4b79dSSebastien Roy /*
23*45a4b79dSSebastien Roy  * Copyright (c) 2017 by Delphix. All rights reserved.
24*45a4b79dSSebastien Roy  */
25*45a4b79dSSebastien Roy 
26*45a4b79dSSebastien Roy /*
27*45a4b79dSSebastien Roy  * The TCP congestion control algorithm extracted from the pre-framework
28*45a4b79dSSebastien Roy  * implementation of TCP congestion control.
29*45a4b79dSSebastien Roy  */
30*45a4b79dSSebastien Roy 
31*45a4b79dSSebastien Roy #include <sys/errno.h>
32*45a4b79dSSebastien Roy #include <inet/tcp.h>
33*45a4b79dSSebastien Roy #include <inet/tcp_impl.h>
34*45a4b79dSSebastien Roy #include <inet/cc.h>
35*45a4b79dSSebastien Roy #include <inet/cc/cc_module.h>
36*45a4b79dSSebastien Roy 
37*45a4b79dSSebastien Roy static void	sunreno_ack_received(struct cc_var *ccv, uint16_t type);
38*45a4b79dSSebastien Roy static void	sunreno_after_idle(struct cc_var *ccv);
39*45a4b79dSSebastien Roy static void	sunreno_cong_signal(struct cc_var *ccv, uint32_t type);
40*45a4b79dSSebastien Roy static void	sunreno_post_recovery(struct cc_var *ccv);
41*45a4b79dSSebastien Roy 
42*45a4b79dSSebastien Roy #define	CC_SUNRENO_ALGO_NAME "sunreno"
43*45a4b79dSSebastien Roy 
44*45a4b79dSSebastien Roy static struct modlmisc cc_sunreno_modlmisc = {
45*45a4b79dSSebastien Roy 	&mod_miscops,
46*45a4b79dSSebastien Roy 	"SUNReno Congestion Control"
47*45a4b79dSSebastien Roy };
48*45a4b79dSSebastien Roy 
49*45a4b79dSSebastien Roy static struct modlinkage cc_sunreno_modlinkage = {
50*45a4b79dSSebastien Roy 	MODREV_1,
51*45a4b79dSSebastien Roy 	&cc_sunreno_modlmisc,
52*45a4b79dSSebastien Roy 	NULL
53*45a4b79dSSebastien Roy };
54*45a4b79dSSebastien Roy 
55*45a4b79dSSebastien Roy struct cc_algo sunreno_cc_algo = {
56*45a4b79dSSebastien Roy 	.name = CC_SUNRENO_ALGO_NAME,
57*45a4b79dSSebastien Roy 	.ack_received = sunreno_ack_received,
58*45a4b79dSSebastien Roy 	.after_idle = sunreno_after_idle,
59*45a4b79dSSebastien Roy 	.cong_signal = sunreno_cong_signal,
60*45a4b79dSSebastien Roy 	.post_recovery = sunreno_post_recovery,
61*45a4b79dSSebastien Roy };
62*45a4b79dSSebastien Roy 
63*45a4b79dSSebastien Roy int
_init(void)64*45a4b79dSSebastien Roy _init(void)
65*45a4b79dSSebastien Roy {
66*45a4b79dSSebastien Roy 	int err;
67*45a4b79dSSebastien Roy 
68*45a4b79dSSebastien Roy 	if ((err = cc_register_algo(&sunreno_cc_algo)) == 0) {
69*45a4b79dSSebastien Roy 		if ((err = mod_install(&cc_sunreno_modlinkage)) != 0)
70*45a4b79dSSebastien Roy 			(void) cc_deregister_algo(&sunreno_cc_algo);
71*45a4b79dSSebastien Roy 	}
72*45a4b79dSSebastien Roy 	return (err);
73*45a4b79dSSebastien Roy }
74*45a4b79dSSebastien Roy 
75*45a4b79dSSebastien Roy int
_fini(void)76*45a4b79dSSebastien Roy _fini(void)
77*45a4b79dSSebastien Roy {
78*45a4b79dSSebastien Roy 	return (EBUSY);
79*45a4b79dSSebastien Roy }
80*45a4b79dSSebastien Roy 
81*45a4b79dSSebastien Roy int
_info(struct modinfo * modinfop)82*45a4b79dSSebastien Roy _info(struct modinfo *modinfop)
83*45a4b79dSSebastien Roy {
84*45a4b79dSSebastien Roy 	return (mod_info(&cc_sunreno_modlinkage, modinfop));
85*45a4b79dSSebastien Roy }
86*45a4b79dSSebastien Roy 
87*45a4b79dSSebastien Roy static void
sunreno_ack_received(struct cc_var * ccv,uint16_t type)88*45a4b79dSSebastien Roy sunreno_ack_received(struct cc_var *ccv, uint16_t type)
89*45a4b79dSSebastien Roy {
90*45a4b79dSSebastien Roy 	uint32_t add;
91*45a4b79dSSebastien Roy 	uint32_t cwnd;
92*45a4b79dSSebastien Roy 	int mss;
93*45a4b79dSSebastien Roy 
94*45a4b79dSSebastien Roy 	if (type == CC_ACK && !IN_RECOVERY(ccv->flags)) {
95*45a4b79dSSebastien Roy 		mss = CCV(ccv, tcp_mss);
96*45a4b79dSSebastien Roy 		cwnd = CCV(ccv, tcp_cwnd);
97*45a4b79dSSebastien Roy 		add = mss;
98*45a4b79dSSebastien Roy 
99*45a4b79dSSebastien Roy 		if (cwnd >= CCV(ccv, tcp_cwnd_ssthresh)) {
100*45a4b79dSSebastien Roy 			/*
101*45a4b79dSSebastien Roy 			 * This is to prevent an increase of less than 1 MSS of
102*45a4b79dSSebastien Roy 			 * tcp_cwnd.  With partial increase, tcp_wput_data()
103*45a4b79dSSebastien Roy 			 * may send out tinygrams in order to preserve mblk
104*45a4b79dSSebastien Roy 			 * boundaries.
105*45a4b79dSSebastien Roy 			 *
106*45a4b79dSSebastien Roy 			 * By initializing tcp_cwnd_cnt to new tcp_cwnd and
107*45a4b79dSSebastien Roy 			 * decrementing it by 1 MSS for every ACKs, tcp_cwnd is
108*45a4b79dSSebastien Roy 			 * increased by 1 MSS for every RTTs.
109*45a4b79dSSebastien Roy 			 */
110*45a4b79dSSebastien Roy 			if (CCV(ccv, tcp_cwnd_cnt) <= 0) {
111*45a4b79dSSebastien Roy 				CCV(ccv, tcp_cwnd_cnt) = cwnd + add;
112*45a4b79dSSebastien Roy 			} else {
113*45a4b79dSSebastien Roy 				CCV(ccv, tcp_cwnd_cnt) -= add;
114*45a4b79dSSebastien Roy 				add = 0;
115*45a4b79dSSebastien Roy 			}
116*45a4b79dSSebastien Roy 		}
117*45a4b79dSSebastien Roy 		CCV(ccv, tcp_cwnd) = MIN(cwnd + add, CCV(ccv, tcp_cwnd_max));
118*45a4b79dSSebastien Roy 	}
119*45a4b79dSSebastien Roy }
120*45a4b79dSSebastien Roy 
121*45a4b79dSSebastien Roy static void
sunreno_after_idle(struct cc_var * ccv)122*45a4b79dSSebastien Roy sunreno_after_idle(struct cc_var *ccv)
123*45a4b79dSSebastien Roy {
124*45a4b79dSSebastien Roy 	int32_t	num_sack_blk = 0;
125*45a4b79dSSebastien Roy 	int mss;
126*45a4b79dSSebastien Roy 
127*45a4b79dSSebastien Roy 	if (CCV(ccv, tcp_snd_sack_ok) && CCV(ccv, tcp_num_sack_blk) > 0) {
128*45a4b79dSSebastien Roy 		int32_t	opt_len;
129*45a4b79dSSebastien Roy 
130*45a4b79dSSebastien Roy 		num_sack_blk = MIN(CCV(ccv, tcp_max_sack_blk),
131*45a4b79dSSebastien Roy 		    CCV(ccv, tcp_num_sack_blk));
132*45a4b79dSSebastien Roy 		opt_len = num_sack_blk * sizeof (sack_blk_t) + TCPOPT_NOP_LEN *
133*45a4b79dSSebastien Roy 		    2 + TCPOPT_HEADER_LEN;
134*45a4b79dSSebastien Roy 		mss = CCV(ccv, tcp_mss) - opt_len;
135*45a4b79dSSebastien Roy 	} else {
136*45a4b79dSSebastien Roy 		mss = CCV(ccv, tcp_mss);
137*45a4b79dSSebastien Roy 	}
138*45a4b79dSSebastien Roy 
139*45a4b79dSSebastien Roy 	TCP_SET_INIT_CWND(CCV_PROTO(ccv), mss,
140*45a4b79dSSebastien Roy 	    CCSV(ccv, tcps_slow_start_after_idle));
141*45a4b79dSSebastien Roy }
142*45a4b79dSSebastien Roy 
143*45a4b79dSSebastien Roy /*
144*45a4b79dSSebastien Roy  * Perform any necessary tasks before we enter congestion recovery.
145*45a4b79dSSebastien Roy  */
146*45a4b79dSSebastien Roy static void
sunreno_cong_signal(struct cc_var * ccv,uint32_t type)147*45a4b79dSSebastien Roy sunreno_cong_signal(struct cc_var *ccv, uint32_t type)
148*45a4b79dSSebastien Roy {
149*45a4b79dSSebastien Roy 	int npkt;
150*45a4b79dSSebastien Roy 	int mss;
151*45a4b79dSSebastien Roy 
152*45a4b79dSSebastien Roy 	/* Catch algos which mistakenly leak private signal types. */
153*45a4b79dSSebastien Roy 	ASSERT((type & CC_SIGPRIVMASK) == 0);
154*45a4b79dSSebastien Roy 
155*45a4b79dSSebastien Roy 	mss = CCV(ccv, tcp_mss);
156*45a4b79dSSebastien Roy 	npkt = ((CCV(ccv, tcp_snxt) - CCV(ccv, tcp_suna)) >> 1) / mss;
157*45a4b79dSSebastien Roy 
158*45a4b79dSSebastien Roy 	switch (type) {
159*45a4b79dSSebastien Roy 	case CC_NDUPACK:
160*45a4b79dSSebastien Roy 		if (!IN_FASTRECOVERY(ccv->flags)) {
161*45a4b79dSSebastien Roy 			if (!IN_CONGRECOVERY(ccv->flags)) {
162*45a4b79dSSebastien Roy 				CCV(ccv, tcp_cwnd_ssthresh) = MAX(npkt, 2) *
163*45a4b79dSSebastien Roy 				    mss;
164*45a4b79dSSebastien Roy 				CCV(ccv, tcp_cwnd) = (npkt +
165*45a4b79dSSebastien Roy 				    CCV(ccv, tcp_dupack_cnt)) * mss;
166*45a4b79dSSebastien Roy 			}
167*45a4b79dSSebastien Roy 			ENTER_RECOVERY(ccv->flags);
168*45a4b79dSSebastien Roy 		}
169*45a4b79dSSebastien Roy 		break;
170*45a4b79dSSebastien Roy 	case CC_ECN:
171*45a4b79dSSebastien Roy 		if (!IN_CONGRECOVERY(ccv->flags) && !CCV(ccv, tcp_cwr)) {
172*45a4b79dSSebastien Roy 			CCV(ccv, tcp_cwnd_ssthresh) = MAX(npkt, 2) * mss;
173*45a4b79dSSebastien Roy 			CCV(ccv, tcp_cwnd) = npkt * mss;
174*45a4b79dSSebastien Roy 			if (CCV(ccv, tcp_cwnd) == 0) {
175*45a4b79dSSebastien Roy 				/*
176*45a4b79dSSebastien Roy 				 * This makes sure that when the ACK comes
177*45a4b79dSSebastien Roy 				 * back, we will increase tcp_cwnd by 1 MSS.
178*45a4b79dSSebastien Roy 				 */
179*45a4b79dSSebastien Roy 				CCV(ccv, tcp_cwnd_cnt) = 0;
180*45a4b79dSSebastien Roy 			}
181*45a4b79dSSebastien Roy 			ENTER_CONGRECOVERY(ccv->flags);
182*45a4b79dSSebastien Roy 		}
183*45a4b79dSSebastien Roy 		break;
184*45a4b79dSSebastien Roy 	case CC_RTO:
185*45a4b79dSSebastien Roy 		/*
186*45a4b79dSSebastien Roy 		 * After retransmission, we need to do slow start.  Set the
187*45a4b79dSSebastien Roy 		 * ssthresh to one half of current effective window and cwnd to
188*45a4b79dSSebastien Roy 		 * one MSS.  Also reset tcp_cwnd_cnt.
189*45a4b79dSSebastien Roy 		 *
190*45a4b79dSSebastien Roy 		 * Note that if tcp_ssthresh is reduced because of ECN, do not
191*45a4b79dSSebastien Roy 		 * reduce it again unless it is already one window of data away
192*45a4b79dSSebastien Roy 		 * (tcp_cwr should then be cleared) or this is a timeout for a
193*45a4b79dSSebastien Roy 		 * retransmitted segment.
194*45a4b79dSSebastien Roy 		 */
195*45a4b79dSSebastien Roy 		if (!CCV(ccv, tcp_cwr) || CCV(ccv, tcp_rexmit)) {
196*45a4b79dSSebastien Roy 			if (CCV(ccv, tcp_timer_backoff) != 0)
197*45a4b79dSSebastien Roy 				npkt = CCV(ccv, tcp_cwnd_ssthresh) / 2 / mss;
198*45a4b79dSSebastien Roy 			CCV(ccv, tcp_cwnd_ssthresh) = MAX(npkt, 2) * mss;
199*45a4b79dSSebastien Roy 		}
200*45a4b79dSSebastien Roy 		CCV(ccv, tcp_cwnd) = mss;
201*45a4b79dSSebastien Roy 		CCV(ccv, tcp_cwnd_cnt) = 0;
202*45a4b79dSSebastien Roy 		break;
203*45a4b79dSSebastien Roy 	}
204*45a4b79dSSebastien Roy }
205*45a4b79dSSebastien Roy 
206*45a4b79dSSebastien Roy /*
207*45a4b79dSSebastien Roy  * Perform any necessary tasks before we exit congestion recovery.
208*45a4b79dSSebastien Roy  */
209*45a4b79dSSebastien Roy static void
sunreno_post_recovery(struct cc_var * ccv)210*45a4b79dSSebastien Roy sunreno_post_recovery(struct cc_var *ccv)
211*45a4b79dSSebastien Roy {
212*45a4b79dSSebastien Roy 	/*
213*45a4b79dSSebastien Roy 	 * Restore the congestion window back to ssthresh as per RFC 5681
214*45a4b79dSSebastien Roy 	 * section 3.2.
215*45a4b79dSSebastien Roy 	 */
216*45a4b79dSSebastien Roy 	if (IN_FASTRECOVERY(ccv->flags)) {
217*45a4b79dSSebastien Roy 		if (CCV(ccv, tcp_cwnd) > CCV(ccv, tcp_cwnd_ssthresh)) {
218*45a4b79dSSebastien Roy 			CCV(ccv, tcp_cwnd) = CCV(ccv, tcp_cwnd_ssthresh);
219*45a4b79dSSebastien Roy 		}
220*45a4b79dSSebastien Roy 	}
221*45a4b79dSSebastien Roy 	CCV(ccv, tcp_cwnd_cnt) = 0;
222*45a4b79dSSebastien Roy }
223