145a4b79dSSebastien Roy /* 245a4b79dSSebastien Roy * Copyright (c) 2007-2008 3*8f97fda4SAndy Fiddaman * Swinburne University of Technology, Melbourne, Australia. 445a4b79dSSebastien Roy * Copyright (c) 2009-2010 Lawrence Stewart <lstewart@freebsd.org> 545a4b79dSSebastien Roy * Copyright (c) 2010 The FreeBSD Foundation 645a4b79dSSebastien Roy * All rights reserved. 745a4b79dSSebastien Roy * Copyright (c) 2017 by Delphix. All rights reserved. 845a4b79dSSebastien Roy * 945a4b79dSSebastien Roy * This software was developed at the Centre for Advanced Internet 1045a4b79dSSebastien Roy * Architectures, Swinburne University of Technology, by Lawrence Stewart and 1145a4b79dSSebastien Roy * James Healy, made possible in part by a grant from the Cisco University 1245a4b79dSSebastien Roy * Research Program Fund at Community Foundation Silicon Valley. 1345a4b79dSSebastien Roy * 1445a4b79dSSebastien Roy * Portions of this software were developed at the Centre for Advanced 1545a4b79dSSebastien Roy * Internet Architectures, Swinburne University of Technology, Melbourne, 1645a4b79dSSebastien Roy * Australia by David Hayes under sponsorship from the FreeBSD Foundation. 1745a4b79dSSebastien Roy * 1845a4b79dSSebastien Roy * Redistribution and use in source and binary forms, with or without 1945a4b79dSSebastien Roy * modification, are permitted provided that the following conditions 2045a4b79dSSebastien Roy * are met: 2145a4b79dSSebastien Roy * 1. Redistributions of source code must retain the above copyright 2245a4b79dSSebastien Roy * notice, this list of conditions and the following disclaimer. 2345a4b79dSSebastien Roy * 2. Redistributions in binary form must reproduce the above copyright 2445a4b79dSSebastien Roy * notice, this list of conditions and the following disclaimer in the 2545a4b79dSSebastien Roy * documentation and/or other materials provided with the distribution. 2645a4b79dSSebastien Roy * 2745a4b79dSSebastien Roy * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 2845a4b79dSSebastien Roy * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 2945a4b79dSSebastien Roy * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 3045a4b79dSSebastien Roy * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 3145a4b79dSSebastien Roy * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 3245a4b79dSSebastien Roy * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 3345a4b79dSSebastien Roy * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 3445a4b79dSSebastien Roy * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 3545a4b79dSSebastien Roy * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 3645a4b79dSSebastien Roy * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 3745a4b79dSSebastien Roy * SUCH DAMAGE. 3845a4b79dSSebastien Roy * 3945a4b79dSSebastien Roy * $FreeBSD$ 4045a4b79dSSebastien Roy */ 4145a4b79dSSebastien Roy 4245a4b79dSSebastien Roy /* 4345a4b79dSSebastien Roy * This software was first released in 2007 by James Healy and Lawrence Stewart 4445a4b79dSSebastien Roy * whilst working on the NewTCP research project at Swinburne University of 4545a4b79dSSebastien Roy * Technology's Centre for Advanced Internet Architectures, Melbourne, 4645a4b79dSSebastien Roy * Australia, which was made possible in part by a grant from the Cisco 4745a4b79dSSebastien Roy * University Research Program Fund at Community Foundation Silicon Valley. 4845a4b79dSSebastien Roy * More details are available at: 4945a4b79dSSebastien Roy * http://caia.swin.edu.au/urp/newtcp/ 5045a4b79dSSebastien Roy */ 5145a4b79dSSebastien Roy 5245a4b79dSSebastien Roy #ifndef _NETINET_CC_H_ 5345a4b79dSSebastien Roy #define _NETINET_CC_H_ 5445a4b79dSSebastien Roy 55*8f97fda4SAndy Fiddaman #if (defined(_KERNEL) || defined(_KMEMUSER)) 56*8f97fda4SAndy Fiddaman 5745a4b79dSSebastien Roy #ifdef __cplusplus 5845a4b79dSSebastien Roy extern "C" { 5945a4b79dSSebastien Roy #endif 6045a4b79dSSebastien Roy 6145a4b79dSSebastien Roy #include <netinet/tcp.h> 6245a4b79dSSebastien Roy #include <sys/queue.h> 6345a4b79dSSebastien Roy #include <sys/rwlock.h> 6445a4b79dSSebastien Roy 6545a4b79dSSebastien Roy #define CC_ALGO_NAME_MAX 16 /* max congestion control name length */ 6645a4b79dSSebastien Roy 6745a4b79dSSebastien Roy #define CC_DEFAULT_ALGO_NAME "sunreno" 6845a4b79dSSebastien Roy 6945a4b79dSSebastien Roy struct tcp_s; 7045a4b79dSSebastien Roy struct sctp_s; 7145a4b79dSSebastien Roy 7245a4b79dSSebastien Roy /* CC housekeeping functions. */ 7345a4b79dSSebastien Roy extern struct cc_algo *cc_load_algo(const char *name); 7445a4b79dSSebastien Roy extern int cc_register_algo(struct cc_algo *add_cc); 7545a4b79dSSebastien Roy extern int cc_deregister_algo(struct cc_algo *remove_cc); 7645a4b79dSSebastien Roy 7745a4b79dSSebastien Roy /* 7845a4b79dSSebastien Roy * Wrapper around transport structs that contain same-named congestion 7945a4b79dSSebastien Roy * control variables. Allows algos to be shared amongst multiple CC aware 8045a4b79dSSebastien Roy * transports. 8145a4b79dSSebastien Roy * 8245a4b79dSSebastien Roy * In theory, this code (from FreeBSD) can be used to support pluggable 8345a4b79dSSebastien Roy * congestion control for sctp as well as tcp. However, the support for sctp 8445a4b79dSSebastien Roy * in FreeBSD is incomplete, and in practice "type" is ignored. cc_module.h 8545a4b79dSSebastien Roy * provides a CCV macro which implementations can use to get a variable out of 8645a4b79dSSebastien Roy * the protocol-appropriate structure. 8745a4b79dSSebastien Roy * 8845a4b79dSSebastien Roy * If FreeBSD eventually does extend support for pluggable congestion control 8945a4b79dSSebastien Roy * to sctp, we'll need to make sure we're setting "type" appropriately or use 9045a4b79dSSebastien Roy * a definition of CCV that ignores it. 9145a4b79dSSebastien Roy */ 9245a4b79dSSebastien Roy struct cc_var { 9345a4b79dSSebastien Roy void *cc_data; /* Per-connection private algorithm data. */ 9445a4b79dSSebastien Roy int bytes_this_ack; /* # bytes acked by the current ACK. */ 9545a4b79dSSebastien Roy int t_bytes_acked; /* # bytes acked during current RTT */ 9645a4b79dSSebastien Roy tcp_seq curack; /* Most recent ACK. */ 9745a4b79dSSebastien Roy uint32_t flags; /* Flags for cc_var (see below) */ 9845a4b79dSSebastien Roy int type; /* Indicates which ptr is valid in ccvc. */ 9945a4b79dSSebastien Roy union ccv_container { 10045a4b79dSSebastien Roy struct tcp_s *tcp; 10145a4b79dSSebastien Roy struct sctp_s *sctp; 10245a4b79dSSebastien Roy } ccvc; 10345a4b79dSSebastien Roy uint16_t nsegs; /* # segments coalesced into current chain. */ 10445a4b79dSSebastien Roy }; 10545a4b79dSSebastien Roy 10645a4b79dSSebastien Roy /* 10745a4b79dSSebastien Roy * cc_var flags. 10845a4b79dSSebastien Roy * 10945a4b79dSSebastien Roy * CCF_ABC_SENTAWND is set when a full congestion window of data has been ACKed 11045a4b79dSSebastien Roy * according to the Appropriate Byte Counting spec, defined in RFC 3465. 11145a4b79dSSebastien Roy */ 11245a4b79dSSebastien Roy #define CCF_ABC_SENTAWND 0x0001 /* ABC counted cwnd worth of bytes? */ 11345a4b79dSSebastien Roy #define CCF_CWND_LIMITED 0x0002 /* Are we currently cwnd limited? */ 11445a4b79dSSebastien Roy #define CCF_FASTRECOVERY 0x0004 /* in NewReno Fast Recovery */ 11545a4b79dSSebastien Roy #define CCF_WASFRECOVERY 0x0008 /* was in NewReno Fast Recovery */ 11645a4b79dSSebastien Roy #define CCF_CONGRECOVERY 0x0010 /* congestion recovery mode */ 11745a4b79dSSebastien Roy #define CCF_WASCRECOVERY 0x0020 /* was in congestion recovery */ 11845a4b79dSSebastien Roy /* 11945a4b79dSSebastien Roy * In slow-start due to a retransmission timeout. This flag is enabled for the 12045a4b79dSSebastien Roy * duration of the slow-start phase. 12145a4b79dSSebastien Roy */ 12245a4b79dSSebastien Roy #define CCF_RTO 0x0040 /* in slow-start due to timeout */ 12345a4b79dSSebastien Roy 12445a4b79dSSebastien Roy #define IN_FASTRECOVERY(flags) (flags & CCF_FASTRECOVERY) 12545a4b79dSSebastien Roy #define ENTER_FASTRECOVERY(flags) flags |= CCF_FASTRECOVERY 12645a4b79dSSebastien Roy #define EXIT_FASTRECOVERY(flags) flags &= ~CCF_FASTRECOVERY 12745a4b79dSSebastien Roy 12845a4b79dSSebastien Roy #define IN_CONGRECOVERY(flags) (flags & CCF_CONGRECOVERY) 12945a4b79dSSebastien Roy #define ENTER_CONGRECOVERY(flags) flags |= CCF_CONGRECOVERY 13045a4b79dSSebastien Roy #define EXIT_CONGRECOVERY(flags) flags &= ~CCF_CONGRECOVERY 13145a4b79dSSebastien Roy 13245a4b79dSSebastien Roy #define IN_RECOVERY(flags) (flags & (CCF_CONGRECOVERY | CCF_FASTRECOVERY)) 13345a4b79dSSebastien Roy #define ENTER_RECOVERY(flags) flags |= (CCF_CONGRECOVERY | CCF_FASTRECOVERY) 13445a4b79dSSebastien Roy #define EXIT_RECOVERY(flags) flags &= ~(CCF_CONGRECOVERY | CCF_FASTRECOVERY) 13545a4b79dSSebastien Roy 13645a4b79dSSebastien Roy /* 13745a4b79dSSebastien Roy * ACK types passed to the ack_received() hook. 13845a4b79dSSebastien Roy * 13945a4b79dSSebastien Roy * CC_ACK is passed when an ACK acknowledges previously unACKed data. 14045a4b79dSSebastien Roy * CC_DUPACK is passed when a duplicate ACK is received. The conditions under 14145a4b79dSSebastien Roy * which an ACK is considered a duplicate ACK are defined in RFC 5681. 14245a4b79dSSebastien Roy */ 14345a4b79dSSebastien Roy #define CC_ACK 0x0001 /* Regular in sequence ACK. */ 14445a4b79dSSebastien Roy #define CC_DUPACK 0x0002 /* Duplicate ACK. */ 14545a4b79dSSebastien Roy #define CC_PARTIALACK 0x0004 /* Not yet. */ 14645a4b79dSSebastien Roy #define CC_SACK 0x0008 /* Not yet. */ 14745a4b79dSSebastien Roy 14845a4b79dSSebastien Roy /* 14945a4b79dSSebastien Roy * Congestion signal types passed to the cong_signal() hook. The highest order 8 15045a4b79dSSebastien Roy * bits (0x01000000 - 0x80000000) are reserved for CC algos to declare their own 15145a4b79dSSebastien Roy * congestion signal types. 15245a4b79dSSebastien Roy * 15345a4b79dSSebastien Roy * The congestion signals defined here cover the following situations: 15445a4b79dSSebastien Roy * CC_ECN: A packet with an Explicit Congestion Notification was received 15545a4b79dSSebastien Roy * See RFC 3168. 15645a4b79dSSebastien Roy * CC_RTO: A round-trip timeout occured. 15745a4b79dSSebastien Roy * CC_RTO_ERR: An ACK was received for a sequence number after we fired an RTO 15845a4b79dSSebastien Roy * for that sequence number 15945a4b79dSSebastien Roy * CC_NDUPACK: Trigger fast retransmit based on the assumption that receiving 16045a4b79dSSebastien Roy * N duplicate ACKs indicates packet loss rather than reordering. Fast 16145a4b79dSSebastien Roy * retransmit is followed by fast recovery. Fast retransmit and recovery 16245a4b79dSSebastien Roy * were originally described in RFC 2581 and were updated by RFC3782 16345a4b79dSSebastien Roy * (NewReno). In both RFC2581 and RFC3782, N is 3. 16445a4b79dSSebastien Roy */ 16545a4b79dSSebastien Roy #define CC_ECN 0x00000001 /* ECN marked packet received. */ 16645a4b79dSSebastien Roy #define CC_RTO 0x00000002 /* RTO fired. */ 16745a4b79dSSebastien Roy #define CC_RTO_ERR 0x00000004 /* RTO fired in error. */ 16845a4b79dSSebastien Roy #define CC_NDUPACK 0x00000008 /* Threshold of dupack's reached. */ 16945a4b79dSSebastien Roy 17045a4b79dSSebastien Roy #define CC_SIGPRIVMASK 0xFF000000 /* Mask to check if sig is private. */ 17145a4b79dSSebastien Roy 17245a4b79dSSebastien Roy /* 17345a4b79dSSebastien Roy * Structure to hold data and function pointers that together represent a 17445a4b79dSSebastien Roy * congestion control algorithm. 17545a4b79dSSebastien Roy */ 17645a4b79dSSebastien Roy struct cc_algo { 17745a4b79dSSebastien Roy char name[CC_ALGO_NAME_MAX]; 17845a4b79dSSebastien Roy 17945a4b79dSSebastien Roy /* Init CC state for a new control block. */ 18045a4b79dSSebastien Roy int (*cb_init)(struct cc_var *ccv); 18145a4b79dSSebastien Roy 18245a4b79dSSebastien Roy /* Cleanup CC state for a terminating control block. */ 18345a4b79dSSebastien Roy void (*cb_destroy)(struct cc_var *ccv); 18445a4b79dSSebastien Roy 18545a4b79dSSebastien Roy /* Init variables for a newly established connection. */ 18645a4b79dSSebastien Roy void (*conn_init)(struct cc_var *ccv); 18745a4b79dSSebastien Roy 18845a4b79dSSebastien Roy /* Called on receipt of an ack. */ 18945a4b79dSSebastien Roy void (*ack_received)(struct cc_var *ccv, uint16_t type); 19045a4b79dSSebastien Roy 19145a4b79dSSebastien Roy /* Called on detection of a congestion signal. */ 19245a4b79dSSebastien Roy void (*cong_signal)(struct cc_var *ccv, uint32_t type); 19345a4b79dSSebastien Roy 19445a4b79dSSebastien Roy /* Called after exiting congestion recovery. */ 19545a4b79dSSebastien Roy void (*post_recovery)(struct cc_var *ccv); 19645a4b79dSSebastien Roy 19745a4b79dSSebastien Roy /* Called when data transfer resumes after an idle period. */ 19845a4b79dSSebastien Roy void (*after_idle)(struct cc_var *ccv); 19945a4b79dSSebastien Roy 20045a4b79dSSebastien Roy STAILQ_ENTRY(cc_algo) entries; 20145a4b79dSSebastien Roy }; 20245a4b79dSSebastien Roy 20345a4b79dSSebastien Roy typedef int cc_walk_func_t(void *, struct cc_algo *); 20445a4b79dSSebastien Roy extern int cc_walk_algos(cc_walk_func_t *, void *); 20545a4b79dSSebastien Roy 20645a4b79dSSebastien Roy /* Macro to obtain the CC algo's struct ptr. */ 20745a4b79dSSebastien Roy #define CC_ALGO(tp) ((tp)->tcp_cc_algo) 20845a4b79dSSebastien Roy 20945a4b79dSSebastien Roy /* Macro to obtain the CC algo's data ptr. */ 21045a4b79dSSebastien Roy #define CC_DATA(tp) ((tp)->tcp_ccv.cc_data) 21145a4b79dSSebastien Roy 21245a4b79dSSebastien Roy #ifdef __cplusplus 21345a4b79dSSebastien Roy } 21445a4b79dSSebastien Roy #endif 21545a4b79dSSebastien Roy 216*8f97fda4SAndy Fiddaman #endif /* (defined(_KERNEL) || defined(_KMEMUSER)) */ 217*8f97fda4SAndy Fiddaman 21845a4b79dSSebastien Roy #endif /* _NETINET_CC_H_ */ 219