diff options
author | Paul Winder <pwinder@racktopsystems.com> | 2020-04-16 17:58:59 +0100 |
---|---|---|
committer | Paul Winder <paul@winders.demon.co.uk> | 2020-04-24 08:09:30 +0100 |
commit | 3b0b0a4e9ef34199357e064b48f2dd2e9ff1c9a2 (patch) | |
tree | 869a055061a92278887e0b9e9846e6b1a038b385 /usr/src | |
parent | 1e609378b9a5ee694d9a9d3d87e1e9dee3e0ab18 (diff) | |
download | illumos-joyent-3b0b0a4e9ef34199357e064b48f2dd2e9ff1c9a2.tar.gz |
12583 Import FreeBSD congestion control updates
Portions contributed by: Richard Scheffinger
Reviewed by: Hans Rosenfeld <rosenfeld@grumpf.hope-2000.org>
Reviewed by: Dan McDonald <danmcd@joyent.com>
Approved by: Garrett D'Amore <garrett@damore.org>
Diffstat (limited to 'usr/src')
-rw-r--r-- | usr/src/uts/common/inet/cc/cc_cubic.c | 66 | ||||
-rw-r--r-- | usr/src/uts/common/inet/cc/cc_cubic.h | 17 | ||||
-rw-r--r-- | usr/src/uts/common/inet/cc/cc_newreno.c | 18 |
3 files changed, 88 insertions, 13 deletions
diff --git a/usr/src/uts/common/inet/cc/cc_cubic.c b/usr/src/uts/common/inet/cc/cc_cubic.c index 11c238afd8..6071409e30 100644 --- a/usr/src/uts/common/inet/cc/cc_cubic.c +++ b/usr/src/uts/common/inet/cc/cc_cubic.c @@ -4,6 +4,7 @@ * All rights reserved. * Copyright (c) 2017 by Delphix. All rights reserved. * Copyright 2019 Joyent, Inc. + * Copyright 2020 RackTop Systems, Inc. * * This software was developed by Lawrence Stewart while studying at the Centre * for Advanced Internet Architectures, Swinburne University of Technology, made @@ -85,6 +86,7 @@ static void cubic_conn_init(struct cc_var *ccv); static void cubic_post_recovery(struct cc_var *ccv); static void cubic_record_rtt(struct cc_var *ccv); static void cubic_ssthresh_update(struct cc_var *ccv); +static void cubic_after_idle(struct cc_var *ccv); struct cubic { /* Cubic K in fixed point form with CUBIC_SHIFT worth of precision. */ @@ -115,6 +117,7 @@ struct cc_algo cubic_cc_algo = { .cong_signal = cubic_cong_signal, .conn_init = cubic_conn_init, .post_recovery = cubic_post_recovery, + .after_idle = cubic_after_idle, }; int @@ -129,7 +132,7 @@ _init(void) if ((err = mod_install(&cc_cubic_modlinkage)) != 0) (void) cc_deregister_algo(&cubic_cc_algo); } - cubic_cc_algo.after_idle = newreno_cc_algo->after_idle; + return (err); } @@ -195,19 +198,22 @@ cubic_ack_received(struct cc_var *ccv, uint16_t type) * TCP-friendly region, follow tf * cwnd growth. */ - CCV(ccv, tcp_cwnd) = w_tf; + if (CCV(ccv, tcp_cwnd) < w_tf) + CCV(ccv, tcp_cwnd) = w_tf; } else if (CCV(ccv, tcp_cwnd) < w_cubic_next) { /* * Concave or convex region, follow CUBIC * cwnd growth. */ if (CC_ABC(ccv)) - CCV(ccv, tcp_cwnd) = w_cubic_next; + CCV(ccv, tcp_cwnd) = MIN(w_cubic_next, + INT_MAX); else - CCV(ccv, tcp_cwnd) += ((w_cubic_next - + CCV(ccv, tcp_cwnd) += MAX(1, + ((MIN(w_cubic_next, INT_MAX) - CCV(ccv, tcp_cwnd)) * CCV(ccv, tcp_mss)) / - CCV(ccv, tcp_cwnd); + CCV(ccv, tcp_cwnd)); } /* @@ -218,12 +224,34 @@ cubic_ack_received(struct cc_var *ccv, uint16_t type) * max_cwnd. */ if (cubic_data->num_cong_events == 0 && - cubic_data->max_cwnd < CCV(ccv, tcp_cwnd)) + cubic_data->max_cwnd < CCV(ccv, tcp_cwnd)) { cubic_data->max_cwnd = CCV(ccv, tcp_cwnd); + cubic_data->K = cubic_k(cubic_data->max_cwnd / + CCV(ccv, tcp_mss)); + } } } } +/* + * This is a Cubic specific implementation of after_idle. + * - Reset cwnd by calling New Reno implementation of after_idle. + * - Reset t_last_cong. + */ +static void +cubic_after_idle(struct cc_var *ccv) +{ + struct cubic *cubic_data; + + cubic_data = ccv->cc_data; + + cubic_data->max_cwnd = max(cubic_data->max_cwnd, CCV(ccv, tcp_cwnd)); + cubic_data->K = cubic_k(cubic_data->max_cwnd / CCV(ccv, tcp_mss)); + + newreno_cc_algo->after_idle(ccv); + cubic_data->t_last_cong = gethrtime(); +} + static void cubic_cb_destroy(struct cc_var *ccv) { @@ -330,6 +358,7 @@ static void cubic_post_recovery(struct cc_var *ccv) { struct cubic *cubic_data; + uint32_t mss, pipe; cubic_data = ccv->cc_data; @@ -339,11 +368,28 @@ cubic_post_recovery(struct cc_var *ccv) >> CUBIC_SHIFT; } + mss = CCV(ccv, tcp_mss); + if (IN_FASTRECOVERY(ccv->flags)) { - /* Update cwnd based on beta and adjusted max_cwnd. */ - CCV(ccv, tcp_cwnd) = max(1, ((CUBIC_BETA * - cubic_data->max_cwnd) >> CUBIC_SHIFT)); + /* + * If inflight data is less than ssthresh, set cwnd + * conservatively to avoid a burst of data, as suggested in + * the NewReno RFC. Otherwise, use the CUBIC method. + */ + pipe = CCV(ccv, tcp_snxt) - CCV(ccv, tcp_suna); + if (pipe < CCV(ccv, tcp_cwnd_ssthresh)) { + /* + * Ensure that cwnd does not collapse to 1 MSS under + * adverse conditions. Implements RFC6582 + */ + CCV(ccv, tcp_cwnd) = MAX(pipe, mss) + mss; + } else { + /* Update cwnd based on beta and adjusted max_cwnd. */ + CCV(ccv, tcp_cwnd) = max(1, ((CUBIC_BETA * + cubic_data->max_cwnd) >> CUBIC_SHIFT)); + } } + cubic_data->t_last_cong = gethrtime(); /* Calculate the average RTT between congestion epochs. */ @@ -355,7 +401,7 @@ cubic_post_recovery(struct cc_var *ccv) cubic_data->epoch_ack_count = 0; cubic_data->sum_rtt_nsecs = 0; - cubic_data->K = cubic_k(cubic_data->max_cwnd / CCV(ccv, tcp_mss)); + cubic_data->K = cubic_k(cubic_data->max_cwnd / mss); } /* diff --git a/usr/src/uts/common/inet/cc/cc_cubic.h b/usr/src/uts/common/inet/cc/cc_cubic.h index c87751d257..cc6e6e459a 100644 --- a/usr/src/uts/common/inet/cc/cc_cubic.h +++ b/usr/src/uts/common/inet/cc/cc_cubic.h @@ -4,6 +4,7 @@ * All rights reserved. * Copyright (c) 2017 by Delphix. All rights reserved. * Copyright 2019 Joyent, Inc. + * Copyright 2020 RackTop Systems, Inc. * * This software was developed by Lawrence Stewart while studying at the Centre * for Advanced Internet Architectures, Swinburne University of Technology, made @@ -70,6 +71,12 @@ /* Don't trust s_rtt until this many rtt samples have been taken. */ #define CUBIC_MIN_RTT_SAMPLES 8 +/* + * (2^21)^3 is long max. Dividing (2^63) by Cubic_C_factor + * and taking cube-root yields 448845 as the effective useful limit + */ +#define CUBED_ROOT_MAX_ULONG 448845 + /* Userland only bits. */ #ifndef _KERNEL @@ -188,6 +195,11 @@ cubic_cwnd(hrtime_t nsecs_since_cong, uint32_t wmax, uint32_t smss, int64_t K) */ cwnd = (t - K * MILLISEC) / MILLISEC; + if (cwnd > CUBED_ROOT_MAX_ULONG) + return (INT_MAX); + if (cwnd < -CUBED_ROOT_MAX_ULONG) + return (0); + /* cwnd = (t - K)^3, with CUBIC_SHIFT^3 worth of precision. */ cwnd *= (cwnd * cwnd); @@ -199,7 +211,10 @@ cubic_cwnd(hrtime_t nsecs_since_cong, uint32_t wmax, uint32_t smss, int64_t K) */ cwnd = ((cwnd * CUBIC_C_FACTOR * smss) >> CUBIC_SHIFT_4) + wmax; - return ((uint32_t)cwnd); + /* + * for negative cwnd, limiting to zero as lower bound + */ + return (max(0, cwnd)); } /* diff --git a/usr/src/uts/common/inet/cc/cc_newreno.c b/usr/src/uts/common/inet/cc/cc_newreno.c index ceb76d8643..5cb1c32534 100644 --- a/usr/src/uts/common/inet/cc/cc_newreno.c +++ b/usr/src/uts/common/inet/cc/cc_newreno.c @@ -7,6 +7,7 @@ * Copyright (c) 2010 The FreeBSD Foundation * All rights reserved. * Copyright (c) 2017 by Delphix. All rights reserved. + * Copyright 2020 RackTop Systems, Inc. * * This software was developed at the Centre for Advanced Internet * Architectures, Swinburne University of Technology, by Lawrence Stewart, James @@ -256,12 +257,25 @@ newreno_cong_signal(struct cc_var *ccv, uint32_t type) static void newreno_post_recovery(struct cc_var *ccv) { + uint32_t pipe; + if (IN_FASTRECOVERY(ccv->flags)) { /* * Fast recovery will conclude after returning from this - * function. + * function. Window inflation should have left us with + * approximately cwnd_ssthresh outstanding data. But in case we + * would be inclined to send a burst, better to do it via the + * slow start mechanism. */ - if (CCV(ccv, tcp_cwnd) > CCV(ccv, tcp_cwnd_ssthresh)) { + pipe = CCV(ccv, tcp_snxt) - CCV(ccv, tcp_suna); + if (pipe < CCV(ccv, tcp_cwnd_ssthresh)) { + /* + * Ensure that cwnd does not collapse to 1 MSS under + * adverse conditions. Implements RFC6582 + */ + CCV(ccv, tcp_cwnd) = MAX(pipe, CCV(ccv, tcp_mss)) + + CCV(ccv, tcp_mss); + } else if (CCV(ccv, tcp_cwnd) > CCV(ccv, tcp_cwnd_ssthresh)) { CCV(ccv, tcp_cwnd) = CCV(ccv, tcp_cwnd_ssthresh); } } |