1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2017 by Delphix. All rights reserved.
24  */
25 
26 /*
27  * The TCP congestion control algorithm extracted from the pre-framework
28  * implementation of TCP congestion control.
29  */
30 
31 #include <sys/errno.h>
32 #include <inet/tcp.h>
33 #include <inet/tcp_impl.h>
34 #include <inet/cc.h>
35 #include <inet/cc/cc_module.h>
36 
37 static void	sunreno_ack_received(struct cc_var *ccv, uint16_t type);
38 static void	sunreno_after_idle(struct cc_var *ccv);
39 static void	sunreno_cong_signal(struct cc_var *ccv, uint32_t type);
40 static void	sunreno_post_recovery(struct cc_var *ccv);
41 
42 #define	CC_SUNRENO_ALGO_NAME "sunreno"
43 
44 static struct modlmisc cc_sunreno_modlmisc = {
45 	&mod_miscops,
46 	"SUNReno Congestion Control"
47 };
48 
49 static struct modlinkage cc_sunreno_modlinkage = {
50 	MODREV_1,
51 	&cc_sunreno_modlmisc,
52 	NULL
53 };
54 
55 struct cc_algo sunreno_cc_algo = {
56 	.name = CC_SUNRENO_ALGO_NAME,
57 	.ack_received = sunreno_ack_received,
58 	.after_idle = sunreno_after_idle,
59 	.cong_signal = sunreno_cong_signal,
60 	.post_recovery = sunreno_post_recovery,
61 };
62 
63 int
_init(void)64 _init(void)
65 {
66 	int err;
67 
68 	if ((err = cc_register_algo(&sunreno_cc_algo)) == 0) {
69 		if ((err = mod_install(&cc_sunreno_modlinkage)) != 0)
70 			(void) cc_deregister_algo(&sunreno_cc_algo);
71 	}
72 	return (err);
73 }
74 
75 int
_fini(void)76 _fini(void)
77 {
78 	return (EBUSY);
79 }
80 
81 int
_info(struct modinfo * modinfop)82 _info(struct modinfo *modinfop)
83 {
84 	return (mod_info(&cc_sunreno_modlinkage, modinfop));
85 }
86 
87 static void
sunreno_ack_received(struct cc_var * ccv,uint16_t type)88 sunreno_ack_received(struct cc_var *ccv, uint16_t type)
89 {
90 	uint32_t add;
91 	uint32_t cwnd;
92 	int mss;
93 
94 	if (type == CC_ACK && !IN_RECOVERY(ccv->flags)) {
95 		mss = CCV(ccv, tcp_mss);
96 		cwnd = CCV(ccv, tcp_cwnd);
97 		add = mss;
98 
99 		if (cwnd >= CCV(ccv, tcp_cwnd_ssthresh)) {
100 			/*
101 			 * This is to prevent an increase of less than 1 MSS of
102 			 * tcp_cwnd.  With partial increase, tcp_wput_data()
103 			 * may send out tinygrams in order to preserve mblk
104 			 * boundaries.
105 			 *
106 			 * By initializing tcp_cwnd_cnt to new tcp_cwnd and
107 			 * decrementing it by 1 MSS for every ACKs, tcp_cwnd is
108 			 * increased by 1 MSS for every RTTs.
109 			 */
110 			if (CCV(ccv, tcp_cwnd_cnt) <= 0) {
111 				CCV(ccv, tcp_cwnd_cnt) = cwnd + add;
112 			} else {
113 				CCV(ccv, tcp_cwnd_cnt) -= add;
114 				add = 0;
115 			}
116 		}
117 		CCV(ccv, tcp_cwnd) = MIN(cwnd + add, CCV(ccv, tcp_cwnd_max));
118 	}
119 }
120 
121 static void
sunreno_after_idle(struct cc_var * ccv)122 sunreno_after_idle(struct cc_var *ccv)
123 {
124 	int32_t	num_sack_blk = 0;
125 	int mss;
126 
127 	if (CCV(ccv, tcp_snd_sack_ok) && CCV(ccv, tcp_num_sack_blk) > 0) {
128 		int32_t	opt_len;
129 
130 		num_sack_blk = MIN(CCV(ccv, tcp_max_sack_blk),
131 		    CCV(ccv, tcp_num_sack_blk));
132 		opt_len = num_sack_blk * sizeof (sack_blk_t) + TCPOPT_NOP_LEN *
133 		    2 + TCPOPT_HEADER_LEN;
134 		mss = CCV(ccv, tcp_mss) - opt_len;
135 	} else {
136 		mss = CCV(ccv, tcp_mss);
137 	}
138 
139 	TCP_SET_INIT_CWND(CCV_PROTO(ccv), mss,
140 	    CCSV(ccv, tcps_slow_start_after_idle));
141 }
142 
143 /*
144  * Perform any necessary tasks before we enter congestion recovery.
145  */
146 static void
sunreno_cong_signal(struct cc_var * ccv,uint32_t type)147 sunreno_cong_signal(struct cc_var *ccv, uint32_t type)
148 {
149 	int npkt;
150 	int mss;
151 
152 	/* Catch algos which mistakenly leak private signal types. */
153 	ASSERT((type & CC_SIGPRIVMASK) == 0);
154 
155 	mss = CCV(ccv, tcp_mss);
156 	npkt = ((CCV(ccv, tcp_snxt) - CCV(ccv, tcp_suna)) >> 1) / mss;
157 
158 	switch (type) {
159 	case CC_NDUPACK:
160 		if (!IN_FASTRECOVERY(ccv->flags)) {
161 			if (!IN_CONGRECOVERY(ccv->flags)) {
162 				CCV(ccv, tcp_cwnd_ssthresh) = MAX(npkt, 2) *
163 				    mss;
164 				CCV(ccv, tcp_cwnd) = (npkt +
165 				    CCV(ccv, tcp_dupack_cnt)) * mss;
166 			}
167 			ENTER_RECOVERY(ccv->flags);
168 		}
169 		break;
170 	case CC_ECN:
171 		if (!IN_CONGRECOVERY(ccv->flags) && !CCV(ccv, tcp_cwr)) {
172 			CCV(ccv, tcp_cwnd_ssthresh) = MAX(npkt, 2) * mss;
173 			CCV(ccv, tcp_cwnd) = npkt * mss;
174 			if (CCV(ccv, tcp_cwnd) == 0) {
175 				/*
176 				 * This makes sure that when the ACK comes
177 				 * back, we will increase tcp_cwnd by 1 MSS.
178 				 */
179 				CCV(ccv, tcp_cwnd_cnt) = 0;
180 			}
181 			ENTER_CONGRECOVERY(ccv->flags);
182 		}
183 		break;
184 	case CC_RTO:
185 		/*
186 		 * After retransmission, we need to do slow start.  Set the
187 		 * ssthresh to one half of current effective window and cwnd to
188 		 * one MSS.  Also reset tcp_cwnd_cnt.
189 		 *
190 		 * Note that if tcp_ssthresh is reduced because of ECN, do not
191 		 * reduce it again unless it is already one window of data away
192 		 * (tcp_cwr should then be cleared) or this is a timeout for a
193 		 * retransmitted segment.
194 		 */
195 		if (!CCV(ccv, tcp_cwr) || CCV(ccv, tcp_rexmit)) {
196 			if (CCV(ccv, tcp_timer_backoff) != 0)
197 				npkt = CCV(ccv, tcp_cwnd_ssthresh) / 2 / mss;
198 			CCV(ccv, tcp_cwnd_ssthresh) = MAX(npkt, 2) * mss;
199 		}
200 		CCV(ccv, tcp_cwnd) = mss;
201 		CCV(ccv, tcp_cwnd_cnt) = 0;
202 		break;
203 	}
204 }
205 
206 /*
207  * Perform any necessary tasks before we exit congestion recovery.
208  */
209 static void
sunreno_post_recovery(struct cc_var * ccv)210 sunreno_post_recovery(struct cc_var *ccv)
211 {
212 	/*
213 	 * Restore the congestion window back to ssthresh as per RFC 5681
214 	 * section 3.2.
215 	 */
216 	if (IN_FASTRECOVERY(ccv->flags)) {
217 		if (CCV(ccv, tcp_cwnd) > CCV(ccv, tcp_cwnd_ssthresh)) {
218 			CCV(ccv, tcp_cwnd) = CCV(ccv, tcp_cwnd_ssthresh);
219 		}
220 	}
221 	CCV(ccv, tcp_cwnd_cnt) = 0;
222 }
223