summaryrefslogtreecommitdiff
path: root/usr/src/uts/common/inet/cc.h
blob: d92535cd48152a053437e5ff8d731ed82c7ae0c7 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
/*
 * Copyright (c) 2007-2008
 *	Swinburne University of Technology, Melbourne, Australia.
 * Copyright (c) 2009-2010 Lawrence Stewart <lstewart@freebsd.org>
 * Copyright (c) 2010 The FreeBSD Foundation
 * All rights reserved.
 * Copyright (c) 2017 by Delphix. All rights reserved.
 *
 * This software was developed at the Centre for Advanced Internet
 * Architectures, Swinburne University of Technology, by Lawrence Stewart and
 * James Healy, made possible in part by a grant from the Cisco University
 * Research Program Fund at Community Foundation Silicon Valley.
 *
 * Portions of this software were developed at the Centre for Advanced
 * Internet Architectures, Swinburne University of Technology, Melbourne,
 * Australia by David Hayes under sponsorship from the FreeBSD Foundation.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 * $FreeBSD$
 */

/*
 * This software was first released in 2007 by James Healy and Lawrence Stewart
 * whilst working on the NewTCP research project at Swinburne University of
 * Technology's Centre for Advanced Internet Architectures, Melbourne,
 * Australia, which was made possible in part by a grant from the Cisco
 * University Research Program Fund at Community Foundation Silicon Valley.
 * More details are available at:
 *   http://caia.swin.edu.au/urp/newtcp/
 */

#ifndef _NETINET_CC_H_
#define	_NETINET_CC_H_

#if (defined(_KERNEL) || defined(_KMEMUSER))

#ifdef	__cplusplus
extern "C" {
#endif

#include <netinet/tcp.h>
#include <sys/queue.h>
#include <sys/rwlock.h>

#define	CC_ALGO_NAME_MAX	16	/* max congestion control name length */

#define	CC_DEFAULT_ALGO_NAME	"sunreno"

struct tcp_s;
struct sctp_s;

/* CC housekeeping functions. */
extern struct cc_algo *cc_load_algo(const char *name);
extern int	cc_register_algo(struct cc_algo *add_cc);
extern int	cc_deregister_algo(struct cc_algo *remove_cc);

/*
 * Wrapper around transport structs that contain same-named congestion
 * control variables. Allows algos to be shared amongst multiple CC aware
 * transports.
 *
 * In theory, this code (from FreeBSD) can be used to support pluggable
 * congestion control for sctp as well as tcp.  However, the support for sctp
 * in FreeBSD is incomplete, and in practice "type" is ignored.  cc_module.h
 * provides a CCV macro which implementations can use to get a variable out of
 * the protocol-appropriate structure.
 *
 * If FreeBSD eventually does extend support for pluggable congestion control
 * to sctp, we'll need to make sure we're setting "type" appropriately or use
 * a definition of CCV that ignores it.
 */
struct cc_var {
	void		*cc_data; /* Per-connection private algorithm data. */
	int		bytes_this_ack; /* # bytes acked by the current ACK. */
	int		t_bytes_acked; /* # bytes acked during current RTT */
	tcp_seq		curack; /* Most recent ACK. */
	uint32_t	flags; /* Flags for cc_var (see below) */
	int		type; /* Indicates which ptr is valid in ccvc. */
	union ccv_container {
		struct tcp_s	*tcp;
		struct sctp_s	*sctp;
	} ccvc;
	uint16_t	nsegs; /* # segments coalesced into current chain. */
};

/*
 * cc_var flags.
 *
 * CCF_ABC_SENTAWND is set when a full congestion window of data has been ACKed
 *   according to the Appropriate Byte Counting spec, defined in RFC 3465.
 */
#define	CCF_ABC_SENTAWND	0x0001	/* ABC counted cwnd worth of bytes? */
#define	CCF_CWND_LIMITED	0x0002	/* Are we currently cwnd limited? */
#define	CCF_FASTRECOVERY	0x0004	/* in NewReno Fast Recovery */
#define	CCF_WASFRECOVERY	0x0008	/* was in NewReno Fast Recovery */
#define	CCF_CONGRECOVERY	0x0010	/* congestion recovery mode */
#define	CCF_WASCRECOVERY	0x0020	/* was in congestion recovery */
/*
 * In slow-start due to a retransmission timeout. This flag is enabled for the
 * duration of the slow-start phase.
 */
#define	CCF_RTO			0x0040	/* in slow-start due to timeout */

#define	IN_FASTRECOVERY(flags)		(flags & CCF_FASTRECOVERY)
#define	ENTER_FASTRECOVERY(flags)	flags |= CCF_FASTRECOVERY
#define	EXIT_FASTRECOVERY(flags)	flags &= ~CCF_FASTRECOVERY

#define	IN_CONGRECOVERY(flags)		(flags & CCF_CONGRECOVERY)
#define	ENTER_CONGRECOVERY(flags)	flags |= CCF_CONGRECOVERY
#define	EXIT_CONGRECOVERY(flags)	flags &= ~CCF_CONGRECOVERY

#define	IN_RECOVERY(flags) (flags & (CCF_CONGRECOVERY | CCF_FASTRECOVERY))
#define	ENTER_RECOVERY(flags) flags |= (CCF_CONGRECOVERY | CCF_FASTRECOVERY)
#define	EXIT_RECOVERY(flags) flags &= ~(CCF_CONGRECOVERY | CCF_FASTRECOVERY)

/*
 * ACK types passed to the ack_received() hook.
 *
 * CC_ACK is passed when an ACK acknowledges previously unACKed data.
 * CC_DUPACK is passed when a duplicate ACK is received.  The conditions under
 *   which an ACK is considered a duplicate ACK are defined in RFC 5681.
 */
#define	CC_ACK		0x0001	/* Regular in sequence ACK. */
#define	CC_DUPACK	0x0002	/* Duplicate ACK. */
#define	CC_PARTIALACK	0x0004	/* Not yet. */
#define	CC_SACK		0x0008	/* Not yet. */

/*
 * Congestion signal types passed to the cong_signal() hook. The highest order 8
 * bits (0x01000000 - 0x80000000) are reserved for CC algos to declare their own
 * congestion signal types.
 *
 * The congestion signals defined here cover the following situations:
 * CC_ECN: A packet with an Explicit Congestion Notification was received
 *   See RFC 3168.
 * CC_RTO: A round-trip timeout occured.
 * CC_RTO_ERR: An ACK was received for a sequence number after we fired an RTO
 *   for that sequence number
 * CC_NDUPACK: Trigger fast retransmit based on the assumption that receiving
 *   N duplicate ACKs indicates packet loss rather than reordering.  Fast
 *   retransmit is followed by fast recovery.  Fast retransmit and recovery
 *   were originally described in RFC 2581 and were updated by RFC3782
 *   (NewReno).  In both RFC2581 and RFC3782, N is 3.
 */
#define	CC_ECN		0x00000001	/* ECN marked packet received. */
#define	CC_RTO		0x00000002	/* RTO fired. */
#define	CC_RTO_ERR	0x00000004	/* RTO fired in error. */
#define	CC_NDUPACK	0x00000008	/* Threshold of dupack's reached. */

#define	CC_SIGPRIVMASK	0xFF000000	/* Mask to check if sig is private. */

/*
 * Structure to hold data and function pointers that together represent a
 * congestion control algorithm.
 */
struct cc_algo {
	char	name[CC_ALGO_NAME_MAX];

	/* Init CC state for a new control block. */
	int	(*cb_init)(struct cc_var *ccv);

	/* Cleanup CC state for a terminating control block. */
	void	(*cb_destroy)(struct cc_var *ccv);

	/* Init variables for a newly established connection. */
	void	(*conn_init)(struct cc_var *ccv);

	/* Called on receipt of an ack. */
	void	(*ack_received)(struct cc_var *ccv, uint16_t type);

	/* Called on detection of a congestion signal. */
	void	(*cong_signal)(struct cc_var *ccv, uint32_t type);

	/* Called after exiting congestion recovery. */
	void	(*post_recovery)(struct cc_var *ccv);

	/* Called when data transfer resumes after an idle period. */
	void	(*after_idle)(struct cc_var *ccv);

	STAILQ_ENTRY(cc_algo) entries;
};

typedef int cc_walk_func_t(void *, struct cc_algo *);
extern int	cc_walk_algos(cc_walk_func_t *, void *);

/* Macro to obtain the CC algo's struct ptr. */
#define	CC_ALGO(tp)	((tp)->tcp_cc_algo)

/* Macro to obtain the CC algo's data ptr. */
#define	CC_DATA(tp)	((tp)->tcp_ccv.cc_data)

#ifdef	__cplusplus
}
#endif

#endif	/* (defined(_KERNEL) || defined(_KMEMUSER)) */

#endif /* _NETINET_CC_H_ */