summaryrefslogtreecommitdiff
path: root/usr/src
diff options
context:
space:
mode:
authorPaul Winder <pwinder@racktopsystems.com>2020-04-16 17:58:59 +0100
committerPaul Winder <paul@winders.demon.co.uk>2020-04-24 08:09:30 +0100
commit3b0b0a4e9ef34199357e064b48f2dd2e9ff1c9a2 (patch)
tree869a055061a92278887e0b9e9846e6b1a038b385 /usr/src
parent1e609378b9a5ee694d9a9d3d87e1e9dee3e0ab18 (diff)
downloadillumos-joyent-3b0b0a4e9ef34199357e064b48f2dd2e9ff1c9a2.tar.gz
12583 Import FreeBSD congestion control updates
Portions contributed by: Richard Scheffinger Reviewed by: Hans Rosenfeld <rosenfeld@grumpf.hope-2000.org> Reviewed by: Dan McDonald <danmcd@joyent.com> Approved by: Garrett D'Amore <garrett@damore.org>
Diffstat (limited to 'usr/src')
-rw-r--r--usr/src/uts/common/inet/cc/cc_cubic.c66
-rw-r--r--usr/src/uts/common/inet/cc/cc_cubic.h17
-rw-r--r--usr/src/uts/common/inet/cc/cc_newreno.c18
3 files changed, 88 insertions, 13 deletions
diff --git a/usr/src/uts/common/inet/cc/cc_cubic.c b/usr/src/uts/common/inet/cc/cc_cubic.c
index 11c238afd8..6071409e30 100644
--- a/usr/src/uts/common/inet/cc/cc_cubic.c
+++ b/usr/src/uts/common/inet/cc/cc_cubic.c
@@ -4,6 +4,7 @@
* All rights reserved.
* Copyright (c) 2017 by Delphix. All rights reserved.
* Copyright 2019 Joyent, Inc.
+ * Copyright 2020 RackTop Systems, Inc.
*
* This software was developed by Lawrence Stewart while studying at the Centre
* for Advanced Internet Architectures, Swinburne University of Technology, made
@@ -85,6 +86,7 @@ static void cubic_conn_init(struct cc_var *ccv);
static void cubic_post_recovery(struct cc_var *ccv);
static void cubic_record_rtt(struct cc_var *ccv);
static void cubic_ssthresh_update(struct cc_var *ccv);
+static void cubic_after_idle(struct cc_var *ccv);
struct cubic {
/* Cubic K in fixed point form with CUBIC_SHIFT worth of precision. */
@@ -115,6 +117,7 @@ struct cc_algo cubic_cc_algo = {
.cong_signal = cubic_cong_signal,
.conn_init = cubic_conn_init,
.post_recovery = cubic_post_recovery,
+ .after_idle = cubic_after_idle,
};
int
@@ -129,7 +132,7 @@ _init(void)
if ((err = mod_install(&cc_cubic_modlinkage)) != 0)
(void) cc_deregister_algo(&cubic_cc_algo);
}
- cubic_cc_algo.after_idle = newreno_cc_algo->after_idle;
+
return (err);
}
@@ -195,19 +198,22 @@ cubic_ack_received(struct cc_var *ccv, uint16_t type)
* TCP-friendly region, follow tf
* cwnd growth.
*/
- CCV(ccv, tcp_cwnd) = w_tf;
+ if (CCV(ccv, tcp_cwnd) < w_tf)
+ CCV(ccv, tcp_cwnd) = w_tf;
} else if (CCV(ccv, tcp_cwnd) < w_cubic_next) {
/*
* Concave or convex region, follow CUBIC
* cwnd growth.
*/
if (CC_ABC(ccv))
- CCV(ccv, tcp_cwnd) = w_cubic_next;
+ CCV(ccv, tcp_cwnd) = MIN(w_cubic_next,
+ INT_MAX);
else
- CCV(ccv, tcp_cwnd) += ((w_cubic_next -
+ CCV(ccv, tcp_cwnd) += MAX(1,
+ ((MIN(w_cubic_next, INT_MAX) -
CCV(ccv, tcp_cwnd)) *
CCV(ccv, tcp_mss)) /
- CCV(ccv, tcp_cwnd);
+ CCV(ccv, tcp_cwnd));
}
/*
@@ -218,12 +224,34 @@ cubic_ack_received(struct cc_var *ccv, uint16_t type)
* max_cwnd.
*/
if (cubic_data->num_cong_events == 0 &&
- cubic_data->max_cwnd < CCV(ccv, tcp_cwnd))
+ cubic_data->max_cwnd < CCV(ccv, tcp_cwnd)) {
cubic_data->max_cwnd = CCV(ccv, tcp_cwnd);
+ cubic_data->K = cubic_k(cubic_data->max_cwnd /
+ CCV(ccv, tcp_mss));
+ }
}
}
}
+/*
+ * This is a Cubic specific implementation of after_idle.
+ * - Reset cwnd by calling New Reno implementation of after_idle.
+ * - Reset t_last_cong.
+ */
+static void
+cubic_after_idle(struct cc_var *ccv)
+{
+ struct cubic *cubic_data;
+
+ cubic_data = ccv->cc_data;
+
+ cubic_data->max_cwnd = max(cubic_data->max_cwnd, CCV(ccv, tcp_cwnd));
+ cubic_data->K = cubic_k(cubic_data->max_cwnd / CCV(ccv, tcp_mss));
+
+ newreno_cc_algo->after_idle(ccv);
+ cubic_data->t_last_cong = gethrtime();
+}
+
static void
cubic_cb_destroy(struct cc_var *ccv)
{
@@ -330,6 +358,7 @@ static void
cubic_post_recovery(struct cc_var *ccv)
{
struct cubic *cubic_data;
+ uint32_t mss, pipe;
cubic_data = ccv->cc_data;
@@ -339,11 +368,28 @@ cubic_post_recovery(struct cc_var *ccv)
>> CUBIC_SHIFT;
}
+ mss = CCV(ccv, tcp_mss);
+
if (IN_FASTRECOVERY(ccv->flags)) {
- /* Update cwnd based on beta and adjusted max_cwnd. */
- CCV(ccv, tcp_cwnd) = max(1, ((CUBIC_BETA *
- cubic_data->max_cwnd) >> CUBIC_SHIFT));
+ /*
+ * If inflight data is less than ssthresh, set cwnd
+ * conservatively to avoid a burst of data, as suggested in
+ * the NewReno RFC. Otherwise, use the CUBIC method.
+ */
+ pipe = CCV(ccv, tcp_snxt) - CCV(ccv, tcp_suna);
+ if (pipe < CCV(ccv, tcp_cwnd_ssthresh)) {
+ /*
+ * Ensure that cwnd does not collapse to 1 MSS under
+ * adverse conditions. Implements RFC6582
+ */
+ CCV(ccv, tcp_cwnd) = MAX(pipe, mss) + mss;
+ } else {
+ /* Update cwnd based on beta and adjusted max_cwnd. */
+ CCV(ccv, tcp_cwnd) = max(1, ((CUBIC_BETA *
+ cubic_data->max_cwnd) >> CUBIC_SHIFT));
+ }
}
+
cubic_data->t_last_cong = gethrtime();
/* Calculate the average RTT between congestion epochs. */
@@ -355,7 +401,7 @@ cubic_post_recovery(struct cc_var *ccv)
cubic_data->epoch_ack_count = 0;
cubic_data->sum_rtt_nsecs = 0;
- cubic_data->K = cubic_k(cubic_data->max_cwnd / CCV(ccv, tcp_mss));
+ cubic_data->K = cubic_k(cubic_data->max_cwnd / mss);
}
/*
diff --git a/usr/src/uts/common/inet/cc/cc_cubic.h b/usr/src/uts/common/inet/cc/cc_cubic.h
index c87751d257..cc6e6e459a 100644
--- a/usr/src/uts/common/inet/cc/cc_cubic.h
+++ b/usr/src/uts/common/inet/cc/cc_cubic.h
@@ -4,6 +4,7 @@
* All rights reserved.
* Copyright (c) 2017 by Delphix. All rights reserved.
* Copyright 2019 Joyent, Inc.
+ * Copyright 2020 RackTop Systems, Inc.
*
* This software was developed by Lawrence Stewart while studying at the Centre
* for Advanced Internet Architectures, Swinburne University of Technology, made
@@ -70,6 +71,12 @@
/* Don't trust s_rtt until this many rtt samples have been taken. */
#define CUBIC_MIN_RTT_SAMPLES 8
+/*
+ * (2^21)^3 is long max. Dividing (2^63) by Cubic_C_factor
+ * and taking cube-root yields 448845 as the effective useful limit
+ */
+#define CUBED_ROOT_MAX_ULONG 448845
+
/* Userland only bits. */
#ifndef _KERNEL
@@ -188,6 +195,11 @@ cubic_cwnd(hrtime_t nsecs_since_cong, uint32_t wmax, uint32_t smss, int64_t K)
*/
cwnd = (t - K * MILLISEC) / MILLISEC;
+ if (cwnd > CUBED_ROOT_MAX_ULONG)
+ return (INT_MAX);
+ if (cwnd < -CUBED_ROOT_MAX_ULONG)
+ return (0);
+
/* cwnd = (t - K)^3, with CUBIC_SHIFT^3 worth of precision. */
cwnd *= (cwnd * cwnd);
@@ -199,7 +211,10 @@ cubic_cwnd(hrtime_t nsecs_since_cong, uint32_t wmax, uint32_t smss, int64_t K)
*/
cwnd = ((cwnd * CUBIC_C_FACTOR * smss) >> CUBIC_SHIFT_4) + wmax;
- return ((uint32_t)cwnd);
+ /*
+ * for negative cwnd, limiting to zero as lower bound
+ */
+ return (max(0, cwnd));
}
/*
diff --git a/usr/src/uts/common/inet/cc/cc_newreno.c b/usr/src/uts/common/inet/cc/cc_newreno.c
index ceb76d8643..5cb1c32534 100644
--- a/usr/src/uts/common/inet/cc/cc_newreno.c
+++ b/usr/src/uts/common/inet/cc/cc_newreno.c
@@ -7,6 +7,7 @@
* Copyright (c) 2010 The FreeBSD Foundation
* All rights reserved.
* Copyright (c) 2017 by Delphix. All rights reserved.
+ * Copyright 2020 RackTop Systems, Inc.
*
* This software was developed at the Centre for Advanced Internet
* Architectures, Swinburne University of Technology, by Lawrence Stewart, James
@@ -256,12 +257,25 @@ newreno_cong_signal(struct cc_var *ccv, uint32_t type)
static void
newreno_post_recovery(struct cc_var *ccv)
{
+ uint32_t pipe;
+
if (IN_FASTRECOVERY(ccv->flags)) {
/*
* Fast recovery will conclude after returning from this
- * function.
+ * function. Window inflation should have left us with
+ * approximately cwnd_ssthresh outstanding data. But in case we
+ * would be inclined to send a burst, better to do it via the
+ * slow start mechanism.
*/
- if (CCV(ccv, tcp_cwnd) > CCV(ccv, tcp_cwnd_ssthresh)) {
+ pipe = CCV(ccv, tcp_snxt) - CCV(ccv, tcp_suna);
+ if (pipe < CCV(ccv, tcp_cwnd_ssthresh)) {
+ /*
+ * Ensure that cwnd does not collapse to 1 MSS under
+ * adverse conditions. Implements RFC6582
+ */
+ CCV(ccv, tcp_cwnd) = MAX(pipe, CCV(ccv, tcp_mss)) +
+ CCV(ccv, tcp_mss);
+ } else if (CCV(ccv, tcp_cwnd) > CCV(ccv, tcp_cwnd_ssthresh)) {
CCV(ccv, tcp_cwnd) = CCV(ccv, tcp_cwnd_ssthresh);
}
}