summaryrefslogtreecommitdiff
path: root/usr/src/uts/common/inet/cc.h
diff options
context:
space:
mode:
Diffstat (limited to 'usr/src/uts/common/inet/cc.h')
-rw-r--r--usr/src/uts/common/inet/cc.h214
1 files changed, 214 insertions, 0 deletions
diff --git a/usr/src/uts/common/inet/cc.h b/usr/src/uts/common/inet/cc.h
new file mode 100644
index 0000000000..170d0e7f8b
--- /dev/null
+++ b/usr/src/uts/common/inet/cc.h
@@ -0,0 +1,214 @@
+/*
+ * Copyright (c) 2007-2008
+ * Swinburne University of Technology, Melbourne, Australia.
+ * Copyright (c) 2009-2010 Lawrence Stewart <lstewart@freebsd.org>
+ * Copyright (c) 2010 The FreeBSD Foundation
+ * All rights reserved.
+ * Copyright (c) 2017 by Delphix. All rights reserved.
+ *
+ * This software was developed at the Centre for Advanced Internet
+ * Architectures, Swinburne University of Technology, by Lawrence Stewart and
+ * James Healy, made possible in part by a grant from the Cisco University
+ * Research Program Fund at Community Foundation Silicon Valley.
+ *
+ * Portions of this software were developed at the Centre for Advanced
+ * Internet Architectures, Swinburne University of Technology, Melbourne,
+ * Australia by David Hayes under sponsorship from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+/*
+ * This software was first released in 2007 by James Healy and Lawrence Stewart
+ * whilst working on the NewTCP research project at Swinburne University of
+ * Technology's Centre for Advanced Internet Architectures, Melbourne,
+ * Australia, which was made possible in part by a grant from the Cisco
+ * University Research Program Fund at Community Foundation Silicon Valley.
+ * More details are available at:
+ * http://caia.swin.edu.au/urp/newtcp/
+ */
+
+#ifndef _NETINET_CC_H_
+#define _NETINET_CC_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <netinet/tcp.h>
+#include <sys/queue.h>
+#include <sys/rwlock.h>
+
+#define CC_ALGO_NAME_MAX 16 /* max congestion control name length */
+
+#define CC_DEFAULT_ALGO_NAME "sunreno"
+
+struct tcp_s;
+struct sctp_s;
+
+/* CC housekeeping functions. */
+extern struct cc_algo *cc_load_algo(const char *name);
+extern int cc_register_algo(struct cc_algo *add_cc);
+extern int cc_deregister_algo(struct cc_algo *remove_cc);
+
+/*
+ * Wrapper around transport structs that contain same-named congestion
+ * control variables. Allows algos to be shared amongst multiple CC aware
+ * transports.
+ *
+ * In theory, this code (from FreeBSD) can be used to support pluggable
+ * congestion control for sctp as well as tcp. However, the support for sctp
+ * in FreeBSD is incomplete, and in practice "type" is ignored. cc_module.h
+ * provides a CCV macro which implementations can use to get a variable out of
+ * the protocol-appropriate structure.
+ *
+ * If FreeBSD eventually does extend support for pluggable congestion control
+ * to sctp, we'll need to make sure we're setting "type" appropriately or use
+ * a definition of CCV that ignores it.
+ */
+struct cc_var {
+ void *cc_data; /* Per-connection private algorithm data. */
+ int bytes_this_ack; /* # bytes acked by the current ACK. */
+ int t_bytes_acked; /* # bytes acked during current RTT */
+ tcp_seq curack; /* Most recent ACK. */
+ uint32_t flags; /* Flags for cc_var (see below) */
+ int type; /* Indicates which ptr is valid in ccvc. */
+ union ccv_container {
+ struct tcp_s *tcp;
+ struct sctp_s *sctp;
+ } ccvc;
+ uint16_t nsegs; /* # segments coalesced into current chain. */
+};
+
+/*
+ * cc_var flags.
+ *
+ * CCF_ABC_SENTAWND is set when a full congestion window of data has been ACKed
+ * according to the Appropriate Byte Counting spec, defined in RFC 3465.
+ */
+#define CCF_ABC_SENTAWND 0x0001 /* ABC counted cwnd worth of bytes? */
+#define CCF_CWND_LIMITED 0x0002 /* Are we currently cwnd limited? */
+#define CCF_FASTRECOVERY 0x0004 /* in NewReno Fast Recovery */
+#define CCF_WASFRECOVERY 0x0008 /* was in NewReno Fast Recovery */
+#define CCF_CONGRECOVERY 0x0010 /* congestion recovery mode */
+#define CCF_WASCRECOVERY 0x0020 /* was in congestion recovery */
+/*
+ * In slow-start due to a retransmission timeout. This flag is enabled for the
+ * duration of the slow-start phase.
+ */
+#define CCF_RTO 0x0040 /* in slow-start due to timeout */
+
+#define IN_FASTRECOVERY(flags) (flags & CCF_FASTRECOVERY)
+#define ENTER_FASTRECOVERY(flags) flags |= CCF_FASTRECOVERY
+#define EXIT_FASTRECOVERY(flags) flags &= ~CCF_FASTRECOVERY
+
+#define IN_CONGRECOVERY(flags) (flags & CCF_CONGRECOVERY)
+#define ENTER_CONGRECOVERY(flags) flags |= CCF_CONGRECOVERY
+#define EXIT_CONGRECOVERY(flags) flags &= ~CCF_CONGRECOVERY
+
+#define IN_RECOVERY(flags) (flags & (CCF_CONGRECOVERY | CCF_FASTRECOVERY))
+#define ENTER_RECOVERY(flags) flags |= (CCF_CONGRECOVERY | CCF_FASTRECOVERY)
+#define EXIT_RECOVERY(flags) flags &= ~(CCF_CONGRECOVERY | CCF_FASTRECOVERY)
+
+/*
+ * ACK types passed to the ack_received() hook.
+ *
+ * CC_ACK is passed when an ACK acknowledges previously unACKed data.
+ * CC_DUPACK is passed when a duplicate ACK is received. The conditions under
+ * which an ACK is considered a duplicate ACK are defined in RFC 5681.
+ */
+#define CC_ACK 0x0001 /* Regular in sequence ACK. */
+#define CC_DUPACK 0x0002 /* Duplicate ACK. */
+#define CC_PARTIALACK 0x0004 /* Not yet. */
+#define CC_SACK 0x0008 /* Not yet. */
+
+/*
+ * Congestion signal types passed to the cong_signal() hook. The highest order 8
+ * bits (0x01000000 - 0x80000000) are reserved for CC algos to declare their own
+ * congestion signal types.
+ *
+ * The congestion signals defined here cover the following situations:
+ * CC_ECN: A packet with an Explicit Congestion Notification was received
+ * See RFC 3168.
+ * CC_RTO: A round-trip timeout occured.
+ * CC_RTO_ERR: An ACK was received for a sequence number after we fired an RTO
+ * for that sequence number
+ * CC_NDUPACK: Trigger fast retransmit based on the assumption that receiving
+ * N duplicate ACKs indicates packet loss rather than reordering. Fast
+ * retransmit is followed by fast recovery. Fast retransmit and recovery
+ * were originally described in RFC 2581 and were updated by RFC3782
+ * (NewReno). In both RFC2581 and RFC3782, N is 3.
+ */
+#define CC_ECN 0x00000001 /* ECN marked packet received. */
+#define CC_RTO 0x00000002 /* RTO fired. */
+#define CC_RTO_ERR 0x00000004 /* RTO fired in error. */
+#define CC_NDUPACK 0x00000008 /* Threshold of dupack's reached. */
+
+#define CC_SIGPRIVMASK 0xFF000000 /* Mask to check if sig is private. */
+
+/*
+ * Structure to hold data and function pointers that together represent a
+ * congestion control algorithm.
+ */
+struct cc_algo {
+ char name[CC_ALGO_NAME_MAX];
+
+ /* Init CC state for a new control block. */
+ int (*cb_init)(struct cc_var *ccv);
+
+ /* Cleanup CC state for a terminating control block. */
+ void (*cb_destroy)(struct cc_var *ccv);
+
+ /* Init variables for a newly established connection. */
+ void (*conn_init)(struct cc_var *ccv);
+
+ /* Called on receipt of an ack. */
+ void (*ack_received)(struct cc_var *ccv, uint16_t type);
+
+ /* Called on detection of a congestion signal. */
+ void (*cong_signal)(struct cc_var *ccv, uint32_t type);
+
+ /* Called after exiting congestion recovery. */
+ void (*post_recovery)(struct cc_var *ccv);
+
+ /* Called when data transfer resumes after an idle period. */
+ void (*after_idle)(struct cc_var *ccv);
+
+ STAILQ_ENTRY(cc_algo) entries;
+};
+
+typedef int cc_walk_func_t(void *, struct cc_algo *);
+extern int cc_walk_algos(cc_walk_func_t *, void *);
+
+/* Macro to obtain the CC algo's struct ptr. */
+#define CC_ALGO(tp) ((tp)->tcp_cc_algo)
+
+/* Macro to obtain the CC algo's data ptr. */
+#define CC_DATA(tp) ((tp)->tcp_ccv.cc_data)
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _NETINET_CC_H_ */