1*45a4b79dSSebastien Roy /*
2*45a4b79dSSebastien Roy * CDDL HEADER START
3*45a4b79dSSebastien Roy *
4*45a4b79dSSebastien Roy * The contents of this file are subject to the terms of the
5*45a4b79dSSebastien Roy * Common Development and Distribution License (the "License").
6*45a4b79dSSebastien Roy * You may not use this file except in compliance with the License.
7*45a4b79dSSebastien Roy *
8*45a4b79dSSebastien Roy * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9*45a4b79dSSebastien Roy * or http://www.opensolaris.org/os/licensing.
10*45a4b79dSSebastien Roy * See the License for the specific language governing permissions
11*45a4b79dSSebastien Roy * and limitations under the License.
12*45a4b79dSSebastien Roy *
13*45a4b79dSSebastien Roy * When distributing Covered Code, include this CDDL HEADER in each
14*45a4b79dSSebastien Roy * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15*45a4b79dSSebastien Roy * If applicable, add the following below this CDDL HEADER, with the
16*45a4b79dSSebastien Roy * fields enclosed by brackets "[]" replaced with your own identifying
17*45a4b79dSSebastien Roy * information: Portions Copyright [yyyy] [name of copyright owner]
18*45a4b79dSSebastien Roy *
19*45a4b79dSSebastien Roy * CDDL HEADER END
20*45a4b79dSSebastien Roy */
21*45a4b79dSSebastien Roy
22*45a4b79dSSebastien Roy /*
23*45a4b79dSSebastien Roy * Copyright (c) 2017 by Delphix. All rights reserved.
24*45a4b79dSSebastien Roy */
25*45a4b79dSSebastien Roy
26*45a4b79dSSebastien Roy /*
27*45a4b79dSSebastien Roy * The TCP congestion control algorithm extracted from the pre-framework
28*45a4b79dSSebastien Roy * implementation of TCP congestion control.
29*45a4b79dSSebastien Roy */
30*45a4b79dSSebastien Roy
31*45a4b79dSSebastien Roy #include <sys/errno.h>
32*45a4b79dSSebastien Roy #include <inet/tcp.h>
33*45a4b79dSSebastien Roy #include <inet/tcp_impl.h>
34*45a4b79dSSebastien Roy #include <inet/cc.h>
35*45a4b79dSSebastien Roy #include <inet/cc/cc_module.h>
36*45a4b79dSSebastien Roy
37*45a4b79dSSebastien Roy static void sunreno_ack_received(struct cc_var *ccv, uint16_t type);
38*45a4b79dSSebastien Roy static void sunreno_after_idle(struct cc_var *ccv);
39*45a4b79dSSebastien Roy static void sunreno_cong_signal(struct cc_var *ccv, uint32_t type);
40*45a4b79dSSebastien Roy static void sunreno_post_recovery(struct cc_var *ccv);
41*45a4b79dSSebastien Roy
42*45a4b79dSSebastien Roy #define CC_SUNRENO_ALGO_NAME "sunreno"
43*45a4b79dSSebastien Roy
44*45a4b79dSSebastien Roy static struct modlmisc cc_sunreno_modlmisc = {
45*45a4b79dSSebastien Roy &mod_miscops,
46*45a4b79dSSebastien Roy "SUNReno Congestion Control"
47*45a4b79dSSebastien Roy };
48*45a4b79dSSebastien Roy
49*45a4b79dSSebastien Roy static struct modlinkage cc_sunreno_modlinkage = {
50*45a4b79dSSebastien Roy MODREV_1,
51*45a4b79dSSebastien Roy &cc_sunreno_modlmisc,
52*45a4b79dSSebastien Roy NULL
53*45a4b79dSSebastien Roy };
54*45a4b79dSSebastien Roy
55*45a4b79dSSebastien Roy struct cc_algo sunreno_cc_algo = {
56*45a4b79dSSebastien Roy .name = CC_SUNRENO_ALGO_NAME,
57*45a4b79dSSebastien Roy .ack_received = sunreno_ack_received,
58*45a4b79dSSebastien Roy .after_idle = sunreno_after_idle,
59*45a4b79dSSebastien Roy .cong_signal = sunreno_cong_signal,
60*45a4b79dSSebastien Roy .post_recovery = sunreno_post_recovery,
61*45a4b79dSSebastien Roy };
62*45a4b79dSSebastien Roy
63*45a4b79dSSebastien Roy int
_init(void)64*45a4b79dSSebastien Roy _init(void)
65*45a4b79dSSebastien Roy {
66*45a4b79dSSebastien Roy int err;
67*45a4b79dSSebastien Roy
68*45a4b79dSSebastien Roy if ((err = cc_register_algo(&sunreno_cc_algo)) == 0) {
69*45a4b79dSSebastien Roy if ((err = mod_install(&cc_sunreno_modlinkage)) != 0)
70*45a4b79dSSebastien Roy (void) cc_deregister_algo(&sunreno_cc_algo);
71*45a4b79dSSebastien Roy }
72*45a4b79dSSebastien Roy return (err);
73*45a4b79dSSebastien Roy }
74*45a4b79dSSebastien Roy
75*45a4b79dSSebastien Roy int
_fini(void)76*45a4b79dSSebastien Roy _fini(void)
77*45a4b79dSSebastien Roy {
78*45a4b79dSSebastien Roy return (EBUSY);
79*45a4b79dSSebastien Roy }
80*45a4b79dSSebastien Roy
81*45a4b79dSSebastien Roy int
_info(struct modinfo * modinfop)82*45a4b79dSSebastien Roy _info(struct modinfo *modinfop)
83*45a4b79dSSebastien Roy {
84*45a4b79dSSebastien Roy return (mod_info(&cc_sunreno_modlinkage, modinfop));
85*45a4b79dSSebastien Roy }
86*45a4b79dSSebastien Roy
87*45a4b79dSSebastien Roy static void
sunreno_ack_received(struct cc_var * ccv,uint16_t type)88*45a4b79dSSebastien Roy sunreno_ack_received(struct cc_var *ccv, uint16_t type)
89*45a4b79dSSebastien Roy {
90*45a4b79dSSebastien Roy uint32_t add;
91*45a4b79dSSebastien Roy uint32_t cwnd;
92*45a4b79dSSebastien Roy int mss;
93*45a4b79dSSebastien Roy
94*45a4b79dSSebastien Roy if (type == CC_ACK && !IN_RECOVERY(ccv->flags)) {
95*45a4b79dSSebastien Roy mss = CCV(ccv, tcp_mss);
96*45a4b79dSSebastien Roy cwnd = CCV(ccv, tcp_cwnd);
97*45a4b79dSSebastien Roy add = mss;
98*45a4b79dSSebastien Roy
99*45a4b79dSSebastien Roy if (cwnd >= CCV(ccv, tcp_cwnd_ssthresh)) {
100*45a4b79dSSebastien Roy /*
101*45a4b79dSSebastien Roy * This is to prevent an increase of less than 1 MSS of
102*45a4b79dSSebastien Roy * tcp_cwnd. With partial increase, tcp_wput_data()
103*45a4b79dSSebastien Roy * may send out tinygrams in order to preserve mblk
104*45a4b79dSSebastien Roy * boundaries.
105*45a4b79dSSebastien Roy *
106*45a4b79dSSebastien Roy * By initializing tcp_cwnd_cnt to new tcp_cwnd and
107*45a4b79dSSebastien Roy * decrementing it by 1 MSS for every ACKs, tcp_cwnd is
108*45a4b79dSSebastien Roy * increased by 1 MSS for every RTTs.
109*45a4b79dSSebastien Roy */
110*45a4b79dSSebastien Roy if (CCV(ccv, tcp_cwnd_cnt) <= 0) {
111*45a4b79dSSebastien Roy CCV(ccv, tcp_cwnd_cnt) = cwnd + add;
112*45a4b79dSSebastien Roy } else {
113*45a4b79dSSebastien Roy CCV(ccv, tcp_cwnd_cnt) -= add;
114*45a4b79dSSebastien Roy add = 0;
115*45a4b79dSSebastien Roy }
116*45a4b79dSSebastien Roy }
117*45a4b79dSSebastien Roy CCV(ccv, tcp_cwnd) = MIN(cwnd + add, CCV(ccv, tcp_cwnd_max));
118*45a4b79dSSebastien Roy }
119*45a4b79dSSebastien Roy }
120*45a4b79dSSebastien Roy
121*45a4b79dSSebastien Roy static void
sunreno_after_idle(struct cc_var * ccv)122*45a4b79dSSebastien Roy sunreno_after_idle(struct cc_var *ccv)
123*45a4b79dSSebastien Roy {
124*45a4b79dSSebastien Roy int32_t num_sack_blk = 0;
125*45a4b79dSSebastien Roy int mss;
126*45a4b79dSSebastien Roy
127*45a4b79dSSebastien Roy if (CCV(ccv, tcp_snd_sack_ok) && CCV(ccv, tcp_num_sack_blk) > 0) {
128*45a4b79dSSebastien Roy int32_t opt_len;
129*45a4b79dSSebastien Roy
130*45a4b79dSSebastien Roy num_sack_blk = MIN(CCV(ccv, tcp_max_sack_blk),
131*45a4b79dSSebastien Roy CCV(ccv, tcp_num_sack_blk));
132*45a4b79dSSebastien Roy opt_len = num_sack_blk * sizeof (sack_blk_t) + TCPOPT_NOP_LEN *
133*45a4b79dSSebastien Roy 2 + TCPOPT_HEADER_LEN;
134*45a4b79dSSebastien Roy mss = CCV(ccv, tcp_mss) - opt_len;
135*45a4b79dSSebastien Roy } else {
136*45a4b79dSSebastien Roy mss = CCV(ccv, tcp_mss);
137*45a4b79dSSebastien Roy }
138*45a4b79dSSebastien Roy
139*45a4b79dSSebastien Roy TCP_SET_INIT_CWND(CCV_PROTO(ccv), mss,
140*45a4b79dSSebastien Roy CCSV(ccv, tcps_slow_start_after_idle));
141*45a4b79dSSebastien Roy }
142*45a4b79dSSebastien Roy
143*45a4b79dSSebastien Roy /*
144*45a4b79dSSebastien Roy * Perform any necessary tasks before we enter congestion recovery.
145*45a4b79dSSebastien Roy */
146*45a4b79dSSebastien Roy static void
sunreno_cong_signal(struct cc_var * ccv,uint32_t type)147*45a4b79dSSebastien Roy sunreno_cong_signal(struct cc_var *ccv, uint32_t type)
148*45a4b79dSSebastien Roy {
149*45a4b79dSSebastien Roy int npkt;
150*45a4b79dSSebastien Roy int mss;
151*45a4b79dSSebastien Roy
152*45a4b79dSSebastien Roy /* Catch algos which mistakenly leak private signal types. */
153*45a4b79dSSebastien Roy ASSERT((type & CC_SIGPRIVMASK) == 0);
154*45a4b79dSSebastien Roy
155*45a4b79dSSebastien Roy mss = CCV(ccv, tcp_mss);
156*45a4b79dSSebastien Roy npkt = ((CCV(ccv, tcp_snxt) - CCV(ccv, tcp_suna)) >> 1) / mss;
157*45a4b79dSSebastien Roy
158*45a4b79dSSebastien Roy switch (type) {
159*45a4b79dSSebastien Roy case CC_NDUPACK:
160*45a4b79dSSebastien Roy if (!IN_FASTRECOVERY(ccv->flags)) {
161*45a4b79dSSebastien Roy if (!IN_CONGRECOVERY(ccv->flags)) {
162*45a4b79dSSebastien Roy CCV(ccv, tcp_cwnd_ssthresh) = MAX(npkt, 2) *
163*45a4b79dSSebastien Roy mss;
164*45a4b79dSSebastien Roy CCV(ccv, tcp_cwnd) = (npkt +
165*45a4b79dSSebastien Roy CCV(ccv, tcp_dupack_cnt)) * mss;
166*45a4b79dSSebastien Roy }
167*45a4b79dSSebastien Roy ENTER_RECOVERY(ccv->flags);
168*45a4b79dSSebastien Roy }
169*45a4b79dSSebastien Roy break;
170*45a4b79dSSebastien Roy case CC_ECN:
171*45a4b79dSSebastien Roy if (!IN_CONGRECOVERY(ccv->flags) && !CCV(ccv, tcp_cwr)) {
172*45a4b79dSSebastien Roy CCV(ccv, tcp_cwnd_ssthresh) = MAX(npkt, 2) * mss;
173*45a4b79dSSebastien Roy CCV(ccv, tcp_cwnd) = npkt * mss;
174*45a4b79dSSebastien Roy if (CCV(ccv, tcp_cwnd) == 0) {
175*45a4b79dSSebastien Roy /*
176*45a4b79dSSebastien Roy * This makes sure that when the ACK comes
177*45a4b79dSSebastien Roy * back, we will increase tcp_cwnd by 1 MSS.
178*45a4b79dSSebastien Roy */
179*45a4b79dSSebastien Roy CCV(ccv, tcp_cwnd_cnt) = 0;
180*45a4b79dSSebastien Roy }
181*45a4b79dSSebastien Roy ENTER_CONGRECOVERY(ccv->flags);
182*45a4b79dSSebastien Roy }
183*45a4b79dSSebastien Roy break;
184*45a4b79dSSebastien Roy case CC_RTO:
185*45a4b79dSSebastien Roy /*
186*45a4b79dSSebastien Roy * After retransmission, we need to do slow start. Set the
187*45a4b79dSSebastien Roy * ssthresh to one half of current effective window and cwnd to
188*45a4b79dSSebastien Roy * one MSS. Also reset tcp_cwnd_cnt.
189*45a4b79dSSebastien Roy *
190*45a4b79dSSebastien Roy * Note that if tcp_ssthresh is reduced because of ECN, do not
191*45a4b79dSSebastien Roy * reduce it again unless it is already one window of data away
192*45a4b79dSSebastien Roy * (tcp_cwr should then be cleared) or this is a timeout for a
193*45a4b79dSSebastien Roy * retransmitted segment.
194*45a4b79dSSebastien Roy */
195*45a4b79dSSebastien Roy if (!CCV(ccv, tcp_cwr) || CCV(ccv, tcp_rexmit)) {
196*45a4b79dSSebastien Roy if (CCV(ccv, tcp_timer_backoff) != 0)
197*45a4b79dSSebastien Roy npkt = CCV(ccv, tcp_cwnd_ssthresh) / 2 / mss;
198*45a4b79dSSebastien Roy CCV(ccv, tcp_cwnd_ssthresh) = MAX(npkt, 2) * mss;
199*45a4b79dSSebastien Roy }
200*45a4b79dSSebastien Roy CCV(ccv, tcp_cwnd) = mss;
201*45a4b79dSSebastien Roy CCV(ccv, tcp_cwnd_cnt) = 0;
202*45a4b79dSSebastien Roy break;
203*45a4b79dSSebastien Roy }
204*45a4b79dSSebastien Roy }
205*45a4b79dSSebastien Roy
206*45a4b79dSSebastien Roy /*
207*45a4b79dSSebastien Roy * Perform any necessary tasks before we exit congestion recovery.
208*45a4b79dSSebastien Roy */
209*45a4b79dSSebastien Roy static void
sunreno_post_recovery(struct cc_var * ccv)210*45a4b79dSSebastien Roy sunreno_post_recovery(struct cc_var *ccv)
211*45a4b79dSSebastien Roy {
212*45a4b79dSSebastien Roy /*
213*45a4b79dSSebastien Roy * Restore the congestion window back to ssthresh as per RFC 5681
214*45a4b79dSSebastien Roy * section 3.2.
215*45a4b79dSSebastien Roy */
216*45a4b79dSSebastien Roy if (IN_FASTRECOVERY(ccv->flags)) {
217*45a4b79dSSebastien Roy if (CCV(ccv, tcp_cwnd) > CCV(ccv, tcp_cwnd_ssthresh)) {
218*45a4b79dSSebastien Roy CCV(ccv, tcp_cwnd) = CCV(ccv, tcp_cwnd_ssthresh);
219*45a4b79dSSebastien Roy }
220*45a4b79dSSebastien Roy }
221*45a4b79dSSebastien Roy CCV(ccv, tcp_cwnd_cnt) = 0;
222*45a4b79dSSebastien Roy }
223