summaryrefslogtreecommitdiff
path: root/usr/src/uts/common/inet/tcp/tcp.c
diff options
context:
space:
mode:
authorSebastien Roy <seb@delphix.com>2017-08-01 13:21:40 -0400
committerRobert Mustacchi <rm@joyent.com>2019-08-23 18:42:52 +0000
commit45a4b79d042e642c2ed7090ec290469ccf8fc563 (patch)
tree3a2b9b0104d34bf6063ec1875142e69c1bc7a296 /usr/src/uts/common/inet/tcp/tcp.c
parent867a2ce85cd3f659cb7bc187ba93a095fe1df597 (diff)
downloadillumos-joyent-45a4b79d042e642c2ed7090ec290469ccf8fc563.tar.gz
11553 Want pluggable TCP congestion control algorithms
Portions contributed by: Cody Peter Mello <cody.mello@joyent.com> Reviewed by: Dan McDonald <danmcd@joyent.com> Reviewed by: Robert Mustacchi <robert.mustacchi@joyent.com> Approved by: Richard Lowe <richlowe@richlowe.net>
Diffstat (limited to 'usr/src/uts/common/inet/tcp/tcp.c')
-rw-r--r--usr/src/uts/common/inet/tcp/tcp.c33
1 files changed, 28 insertions, 5 deletions
diff --git a/usr/src/uts/common/inet/tcp/tcp.c b/usr/src/uts/common/inet/tcp/tcp.c
index d7458c8eee..bfa08ada8c 100644
--- a/usr/src/uts/common/inet/tcp/tcp.c
+++ b/usr/src/uts/common/inet/tcp/tcp.c
@@ -23,7 +23,7 @@
* Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011, Joyent Inc. All rights reserved.
* Copyright (c) 2011 Nexenta Systems, Inc. All rights reserved.
- * Copyright (c) 2013, 2016 by Delphix. All rights reserved.
+ * Copyright (c) 2013, 2017 by Delphix. All rights reserved.
* Copyright 2014, OmniTI Computer Consulting, Inc. All rights reserved.
*/
/* Copyright (c) 1990 Mentat Inc. */
@@ -74,6 +74,7 @@
#include <inet/ipsec_impl.h>
#include <inet/common.h>
+#include <inet/cc.h>
#include <inet/ip.h>
#include <inet/ip_impl.h>
#include <inet/ip6.h>
@@ -1409,6 +1410,10 @@ tcp_free(tcp_t *tcp)
*/
tcp_close_mpp(&tcp->tcp_conn.tcp_eager_conn_ind);
+ /* Allow the CC algorithm to clean up after itself. */
+ if (tcp->tcp_cc_algo != NULL && tcp->tcp_cc_algo->cb_destroy != NULL)
+ tcp->tcp_cc_algo->cb_destroy(&tcp->tcp_ccv);
+
/*
* If this is a non-STREAM socket still holding on to an upper
* handle, release it. As a result of fallback we might also see
@@ -1455,7 +1460,7 @@ tcp_free(tcp_t *tcp)
* collector will free up the freelist is the connection ends up sitting
* there for too long.
*/
-void *
+conn_t *
tcp_get_conn(void *arg, tcp_stack_t *tcps)
{
tcp_t *tcp = NULL;
@@ -1494,7 +1499,7 @@ tcp_get_conn(void *arg, tcp_stack_t *tcps)
connp->conn_recv = tcp_input_data;
ASSERT(connp->conn_recvicmp == tcp_icmp_input);
ASSERT(connp->conn_verifyicmp == tcp_verifyicmp);
- return ((void *)connp);
+ return (connp);
}
mutex_exit(&tcp_time_wait->tcp_time_wait_lock);
/*
@@ -1529,7 +1534,7 @@ tcp_get_conn(void *arg, tcp_stack_t *tcps)
connp->conn_ixa->ixa_notify = tcp_notify;
connp->conn_ixa->ixa_notify_cookie = tcp;
- return ((void *)connp);
+ return (connp);
}
/*
@@ -2298,6 +2303,11 @@ tcp_reinit_values(tcp_t *tcp)
ASSERT(tcp->tcp_listen_cnt == NULL);
ASSERT(tcp->tcp_reass_tid == 0);
+ /* Allow the CC algorithm to clean up after itself. */
+ if (tcp->tcp_cc_algo->cb_destroy != NULL)
+ tcp->tcp_cc_algo->cb_destroy(&tcp->tcp_ccv);
+ tcp->tcp_cc_algo = NULL;
+
#undef DONTCARE
#undef PRESERVE
}
@@ -2318,7 +2328,12 @@ tcp_init_values(tcp_t *tcp, tcp_t *parent)
(connp->conn_ipversion == IPV4_VERSION ||
connp->conn_ipversion == IPV6_VERSION)));
+ tcp->tcp_ccv.type = IPPROTO_TCP;
+ tcp->tcp_ccv.ccvc.tcp = tcp;
+
if (parent == NULL) {
+ tcp->tcp_cc_algo = tcps->tcps_default_cc_algo;
+
tcp->tcp_naglim = tcps->tcps_naglim_def;
tcp->tcp_rto_initial = tcps->tcps_rexmit_interval_initial;
@@ -2346,6 +2361,8 @@ tcp_init_values(tcp_t *tcp, tcp_t *parent)
*/
} else {
/* Inherit various TCP parameters from the parent. */
+ tcp->tcp_cc_algo = parent->tcp_cc_algo;
+
tcp->tcp_naglim = parent->tcp_naglim;
tcp->tcp_rto_initial = parent->tcp_rto_initial;
@@ -2372,6 +2389,9 @@ tcp_init_values(tcp_t *tcp, tcp_t *parent)
tcp->tcp_init_cwnd = parent->tcp_init_cwnd;
}
+ if (tcp->tcp_cc_algo->cb_init != NULL)
+ VERIFY(tcp->tcp_cc_algo->cb_init(&tcp->tcp_ccv) == 0);
+
/*
* Initialize tcp_rtt_sa and tcp_rtt_sd so that the calculated RTO
* will be close to tcp_rexmit_interval_initial. By doing this, we
@@ -2616,7 +2636,7 @@ tcp_create_common(cred_t *credp, boolean_t isv6, boolean_t issocket,
}
sqp = IP_SQUEUE_GET((uint_t)gethrtime());
- connp = (conn_t *)tcp_get_conn(sqp, tcps);
+ connp = tcp_get_conn(sqp, tcps);
/*
* Both tcp_get_conn and netstack_find_by_cred incremented refcnt,
* so we drop it by one.
@@ -3807,6 +3827,9 @@ tcp_stack_init(netstackid_t stackid, netstack_t *ns)
list_create(&tcps->tcps_listener_conf, sizeof (tcp_listener_t),
offsetof(tcp_listener_t, tl_link));
+ tcps->tcps_default_cc_algo = cc_load_algo(CC_DEFAULT_ALGO_NAME);
+ VERIFY3P(tcps->tcps_default_cc_algo, !=, NULL);
+
return (tcps);
}