summaryrefslogtreecommitdiff
path: root/usr/src/uts/common/inet/tcp/tcp.c
diff options
context:
space:
mode:
authorSebastien Roy <seb@delphix.com>2017-08-01 13:21:40 -0400
committerCody Peter Mello <melloc@writev.io>2019-08-09 22:54:08 +0000
commit9cba20af356f81a5f469ca8aa89ed17ee827b6a6 (patch)
treebbd75aad827348f90befdc813aac7f66f4a9565a /usr/src/uts/common/inet/tcp/tcp.c
parentb456ecdeb57b9ceefc89cd60f374f8fff83acf91 (diff)
downloadillumos-joyent-9cba20af356f81a5f469ca8aa89ed17ee827b6a6.tar.gz
OS-7329 Want pluggable TCP congestion control algorithms
Reviewed by: Dan McDonald <danmcd@joyent.com> Reviewed by: Robert Mustacchi <robert.mustacchi@joyent.com> Approved by: Robert Mustacchi <robert.mustacchi@joyent.com>
Diffstat (limited to 'usr/src/uts/common/inet/tcp/tcp.c')
-rw-r--r--usr/src/uts/common/inet/tcp/tcp.c33
1 files changed, 28 insertions, 5 deletions
diff --git a/usr/src/uts/common/inet/tcp/tcp.c b/usr/src/uts/common/inet/tcp/tcp.c
index ba66be0b2b..1f4025a50c 100644
--- a/usr/src/uts/common/inet/tcp/tcp.c
+++ b/usr/src/uts/common/inet/tcp/tcp.c
@@ -23,7 +23,7 @@
* Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright 2017 Joyent, Inc.
* Copyright (c) 2011 Nexenta Systems, Inc. All rights reserved.
- * Copyright (c) 2013, 2016 by Delphix. All rights reserved.
+ * Copyright (c) 2013, 2017 by Delphix. All rights reserved.
* Copyright 2014, OmniTI Computer Consulting, Inc. All rights reserved.
*/
/* Copyright (c) 1990 Mentat Inc. */
@@ -74,6 +74,7 @@
#include <inet/ipsec_impl.h>
#include <inet/common.h>
+#include <inet/cc.h>
#include <inet/ip.h>
#include <inet/ip_impl.h>
#include <inet/ip6.h>
@@ -1408,6 +1409,10 @@ tcp_free(tcp_t *tcp)
*/
tcp_close_mpp(&tcp->tcp_conn.tcp_eager_conn_ind);
+ /* Allow the CC algorithm to clean up after itself. */
+ if (tcp->tcp_cc_algo != NULL && tcp->tcp_cc_algo->cb_destroy != NULL)
+ tcp->tcp_cc_algo->cb_destroy(&tcp->tcp_ccv);
+
/*
* Destroy any association with SO_REUSEPORT group.
*/
@@ -1469,7 +1474,7 @@ tcp_free(tcp_t *tcp)
* collector will free up the freelist is the connection ends up sitting
* there for too long.
*/
-void *
+conn_t *
tcp_get_conn(void *arg, tcp_stack_t *tcps)
{
tcp_t *tcp = NULL;
@@ -1508,7 +1513,7 @@ tcp_get_conn(void *arg, tcp_stack_t *tcps)
connp->conn_recv = tcp_input_data;
ASSERT(connp->conn_recvicmp == tcp_icmp_input);
ASSERT(connp->conn_verifyicmp == tcp_verifyicmp);
- return ((void *)connp);
+ return (connp);
}
mutex_exit(&tcp_time_wait->tcp_time_wait_lock);
/*
@@ -1543,7 +1548,7 @@ tcp_get_conn(void *arg, tcp_stack_t *tcps)
connp->conn_ixa->ixa_notify = tcp_notify;
connp->conn_ixa->ixa_notify_cookie = tcp;
- return ((void *)connp);
+ return (connp);
}
/*
@@ -2312,6 +2317,11 @@ tcp_reinit_values(tcp_t *tcp)
ASSERT(tcp->tcp_listen_cnt == NULL);
ASSERT(tcp->tcp_reass_tid == 0);
+ /* Allow the CC algorithm to clean up after itself. */
+ if (tcp->tcp_cc_algo->cb_destroy != NULL)
+ tcp->tcp_cc_algo->cb_destroy(&tcp->tcp_ccv);
+ tcp->tcp_cc_algo = NULL;
+
#undef DONTCARE
#undef PRESERVE
}
@@ -2332,7 +2342,12 @@ tcp_init_values(tcp_t *tcp, tcp_t *parent)
(connp->conn_ipversion == IPV4_VERSION ||
connp->conn_ipversion == IPV6_VERSION)));
+ tcp->tcp_ccv.type = IPPROTO_TCP;
+ tcp->tcp_ccv.ccvc.tcp = tcp;
+
if (parent == NULL) {
+ tcp->tcp_cc_algo = tcps->tcps_default_cc_algo;
+
tcp->tcp_naglim = tcps->tcps_naglim_def;
tcp->tcp_rto_initial = tcps->tcps_rexmit_interval_initial;
@@ -2360,6 +2375,8 @@ tcp_init_values(tcp_t *tcp, tcp_t *parent)
*/
} else {
/* Inherit various TCP parameters from the parent. */
+ tcp->tcp_cc_algo = parent->tcp_cc_algo;
+
tcp->tcp_naglim = parent->tcp_naglim;
tcp->tcp_rto_initial = parent->tcp_rto_initial;
@@ -2386,6 +2403,9 @@ tcp_init_values(tcp_t *tcp, tcp_t *parent)
tcp->tcp_init_cwnd = parent->tcp_init_cwnd;
}
+ if (tcp->tcp_cc_algo->cb_init != NULL)
+ VERIFY(tcp->tcp_cc_algo->cb_init(&tcp->tcp_ccv) == 0);
+
/*
* Initialize tcp_rtt_sa and tcp_rtt_sd so that the calculated RTO
* will be close to tcp_rexmit_interval_initial. By doing this, we
@@ -2633,7 +2653,7 @@ tcp_create_common(cred_t *credp, boolean_t isv6, boolean_t issocket,
}
sqp = IP_SQUEUE_GET((uint_t)gethrtime());
- connp = (conn_t *)tcp_get_conn(sqp, tcps);
+ connp = tcp_get_conn(sqp, tcps);
/*
* Both tcp_get_conn and netstack_find_by_cred incremented refcnt,
* so we drop it by one.
@@ -3822,6 +3842,9 @@ tcp_stack_init(netstackid_t stackid, netstack_t *ns)
list_create(&tcps->tcps_listener_conf, sizeof (tcp_listener_t),
offsetof(tcp_listener_t, tl_link));
+ tcps->tcps_default_cc_algo = cc_load_algo(CC_DEFAULT_ALGO_NAME);
+ VERIFY3P(tcps->tcps_default_cc_algo, !=, NULL);
+
return (tcps);
}