summaryrefslogtreecommitdiff
path: root/usr/src/uts/common
diff options
context:
space:
mode:
authorGarrett D'Amore <garrett@nexenta.com>2011-08-17 16:31:10 -0700
committerGarrett D'Amore <garrett@nexenta.com>2011-08-17 16:31:10 -0700
commit3d0a255c417cf2e7b69e770de43f195b0eeffacb (patch)
tree9fbbacfdcae24cab7914edc9825da1708191153b /usr/src/uts/common
parent49ba5bc4e7d0a3e56108ec7cbf1d8cf2483ccca9 (diff)
downloadillumos-joyent-3d0a255c417cf2e7b69e770de43f195b0eeffacb.tar.gz
1361 Add support for socket options TCP_KEEPCNT, TCP_KEEPIDLE, TCP_KEEPINTVL
Reviewed by: Pavan <pavan.tc@gmail.com> Reviewed by: Dan McDonald <danmcd at nexenta.com> Reviewed by: Garrett D'Amore <garrett@damore.org> Approved by: Garrett D'Amore <garrett@damore.org>
Diffstat (limited to 'usr/src/uts/common')
-rw-r--r--usr/src/uts/common/inet/tcp.h18
-rw-r--r--usr/src/uts/common/inet/tcp/tcp.c3
-rw-r--r--usr/src/uts/common/inet/tcp/tcp_opt_data.c100
-rw-r--r--usr/src/uts/common/inet/tcp/tcp_timers.c10
-rw-r--r--usr/src/uts/common/netinet/tcp.h4
5 files changed, 133 insertions, 2 deletions
diff --git a/usr/src/uts/common/inet/tcp.h b/usr/src/uts/common/inet/tcp.h
index 460f183884..d95f2559c6 100644
--- a/usr/src/uts/common/inet/tcp.h
+++ b/usr/src/uts/common/inet/tcp.h
@@ -21,6 +21,7 @@
/*
* Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011, Joyent, Inc. All rights reserved.
+ * Copyright (c) 2011 Nexenta Systems, Inc. All rights reserved.
*/
/* Copyright (c) 1990 Mentat Inc. */
@@ -334,11 +335,26 @@ typedef struct tcp_s {
} tcp_conn;
uint32_t tcp_syn_rcvd_timeout; /* How many SYN_RCVD timeout in q0 */
- /* TCP Keepalive Timer members */
+ /*
+ * TCP Keepalive Timer members.
+ * All keepalive timer intervals are in milliseconds.
+ */
int32_t tcp_ka_last_intrvl; /* Last probe interval */
timeout_id_t tcp_ka_tid; /* Keepalive timer ID */
uint32_t tcp_ka_interval; /* Keepalive interval */
+
+ /*
+ * TCP connection is terminated if we don't hear back from the peer
+ * for tcp_ka_abort_thres milliseconds after the first keepalive probe.
+ * tcp_ka_rinterval is the interval in milliseconds between successive
+ * keepalive probes. tcp_ka_cnt is the number of keepalive probes to
+ * be sent before terminating the connection, if we don't hear back from
+ * peer.
+ * tcp_ka_abort_thres = tcp_ka_rinterval * tcp_ka_cnt
+ */
+ uint32_t tcp_ka_rinterval; /* keepalive retransmit interval */
uint32_t tcp_ka_abort_thres; /* Keepalive abort threshold */
+ uint32_t tcp_ka_cnt; /* count of keepalive probes */
int32_t tcp_client_errno; /* How the client screwed up */
diff --git a/usr/src/uts/common/inet/tcp/tcp.c b/usr/src/uts/common/inet/tcp/tcp.c
index 8d3dacf35b..0734468ea0 100644
--- a/usr/src/uts/common/inet/tcp/tcp.c
+++ b/usr/src/uts/common/inet/tcp/tcp.c
@@ -22,6 +22,7 @@
/*
* Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011, Joyent Inc. All rights reserved.
+ * Copyright (c) 2011 Nexenta Systems, Inc. All rights reserved.
*/
/* Copyright (c) 1990 Mentat Inc. */
@@ -2354,6 +2355,8 @@ tcp_init_values(tcp_t *tcp, tcp_t *parent)
tcp->tcp_ka_interval = tcps->tcps_keepalive_interval;
tcp->tcp_ka_abort_thres = tcps->tcps_keepalive_abort_interval;
+ tcp->tcp_ka_cnt = 0;
+ tcp->tcp_ka_rinterval = 0;
/*
* Default value of tcp_init_cwnd is 0, so no need to set here
diff --git a/usr/src/uts/common/inet/tcp/tcp_opt_data.c b/usr/src/uts/common/inet/tcp/tcp_opt_data.c
index c1614463c2..960c3d8902 100644
--- a/usr/src/uts/common/inet/tcp/tcp_opt_data.c
+++ b/usr/src/uts/common/inet/tcp/tcp_opt_data.c
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011 Nexenta Systems, Inc. All rights reserved.
*/
#include <sys/types.h>
@@ -117,6 +118,12 @@ opdes_t tcp_opt_arr[] = {
{ TCP_KEEPALIVE_THRESHOLD, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0,
sizeof (int), 0 },
+{ TCP_KEEPIDLE, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
+
+{ TCP_KEEPCNT, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
+
+{ TCP_KEEPINTVL, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
+
{ TCP_KEEPALIVE_ABORT_THRESHOLD, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0,
sizeof (int), 0 },
@@ -403,6 +410,25 @@ tcp_opt_get(conn_t *connp, int level, int name, uchar_t *ptr)
case TCP_KEEPALIVE_THRESHOLD:
*i1 = tcp->tcp_ka_interval;
return (sizeof (int));
+
+ /*
+ * TCP_KEEPIDLE expects value in seconds, but
+ * tcp_ka_interval is in milliseconds.
+ */
+ case TCP_KEEPIDLE:
+ *i1 = tcp->tcp_ka_interval / 1000;
+ return (sizeof (int));
+ case TCP_KEEPCNT:
+ *i1 = tcp->tcp_ka_cnt;
+ return (sizeof (int));
+
+ /*
+ * TCP_KEEPINTVL expects value in seconds, but
+ * tcp_ka_rinterval is in milliseconds.
+ */
+ case TCP_KEEPINTVL:
+ *i1 = tcp->tcp_ka_rinterval / 1000;
+ return (sizeof (int));
case TCP_KEEPALIVE_ABORT_THRESHOLD:
*i1 = tcp->tcp_ka_abort_thres;
return (sizeof (int));
@@ -682,6 +708,18 @@ tcp_opt_set(conn_t *connp, uint_t optset_context, int level, int name,
}
tcp->tcp_init_cwnd = val;
break;
+
+ /*
+ * TCP_KEEPIDLE is in seconds but TCP_KEEPALIVE_THRESHOLD
+ * is in milliseconds. TCP_KEEPIDLE is introduced for
+ * compatibility with other Unix flavors.
+ * We can fall through TCP_KEEPALIVE_THRESHOLD logic after
+ * converting the input to milliseconds.
+ */
+ case TCP_KEEPIDLE:
+ *i1 *= 1000;
+ /* fall through */
+
case TCP_KEEPALIVE_THRESHOLD:
if (checkonly)
break;
@@ -708,6 +746,66 @@ tcp_opt_set(conn_t *connp, uint_t optset_context, int level, int name,
}
}
break;
+
+ /*
+ * tcp_ka_abort_thres = tcp_ka_rinterval * tcp_ka_cnt.
+ * So setting TCP_KEEPCNT or TCP_KEEPINTVL can affect all the
+ * three members - tcp_ka_abort_thres, tcp_ka_rinterval and
+ * tcp_ka_cnt.
+ */
+ case TCP_KEEPCNT:
+ if (checkonly)
+ break;
+
+ if (*i1 == 0) {
+ return (EINVAL);
+ } else if (tcp->tcp_ka_rinterval == 0) {
+ if ((tcp->tcp_ka_abort_thres / *i1) <
+ tcp->tcp_rto_min ||
+ (tcp->tcp_ka_abort_thres / *i1) >
+ tcp->tcp_rto_max)
+ return (EINVAL);
+
+ tcp->tcp_ka_rinterval =
+ tcp->tcp_ka_abort_thres / *i1;
+ } else {
+ if ((*i1 * tcp->tcp_ka_rinterval) <
+ tcps->tcps_keepalive_abort_interval_low ||
+ (*i1 * tcp->tcp_ka_rinterval) >
+ tcps->tcps_keepalive_abort_interval_high)
+ return (EINVAL);
+ tcp->tcp_ka_abort_thres =
+ (*i1 * tcp->tcp_ka_rinterval);
+ }
+ tcp->tcp_ka_cnt = *i1;
+ break;
+ case TCP_KEEPINTVL:
+ /*
+ * TCP_KEEPINTVL is specified in seconds, but
+ * tcp_ka_rinterval is in milliseconds.
+ */
+
+ if (checkonly)
+ break;
+
+ if ((*i1 * 1000) < tcp->tcp_rto_min ||
+ (*i1 * 1000) > tcp->tcp_rto_max)
+ return (EINVAL);
+
+ if (tcp->tcp_ka_cnt == 0) {
+ tcp->tcp_ka_cnt =
+ tcp->tcp_ka_abort_thres / (*i1 * 1000);
+ } else {
+ if ((*i1 * tcp->tcp_ka_cnt * 1000) <
+ tcps->tcps_keepalive_abort_interval_low ||
+ (*i1 * tcp->tcp_ka_cnt * 1000) >
+ tcps->tcps_keepalive_abort_interval_high)
+ return (EINVAL);
+ tcp->tcp_ka_abort_thres =
+ (*i1 * tcp->tcp_ka_cnt * 1000);
+ }
+ tcp->tcp_ka_rinterval = *i1 * 1000;
+ break;
case TCP_KEEPALIVE_ABORT_THRESHOLD:
if (!checkonly) {
if (*i1 <
@@ -718,6 +816,8 @@ tcp_opt_set(conn_t *connp, uint_t optset_context, int level, int name,
return (EINVAL);
}
tcp->tcp_ka_abort_thres = *i1;
+ tcp->tcp_ka_cnt = 0;
+ tcp->tcp_ka_rinterval = 0;
}
break;
case TCP_CORK:
diff --git a/usr/src/uts/common/inet/tcp/tcp_timers.c b/usr/src/uts/common/inet/tcp/tcp_timers.c
index c883be8cfd..90e1c9178c 100644
--- a/usr/src/uts/common/inet/tcp/tcp_timers.c
+++ b/usr/src/uts/common/inet/tcp/tcp_timers.c
@@ -21,6 +21,7 @@
/*
* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011 Nexenta Systems, Inc. All rights reserved.
*/
#include <sys/types.h>
@@ -390,6 +391,11 @@ tcp_timers_stop(tcp_t *tcp)
* (tcp_ka_interval + tcp_ka_abort_thres) we have not heard anything,
* kill the connection unless the keepalive abort threshold is 0. In
* that case, we will probe "forever."
+ * If tcp_ka_cnt and tcp_ka_rinterval are non-zero, then we do not follow
+ * the exponential backoff, but send probes tcp_ka_cnt times in regular
+ * intervals of tcp_ka_rinterval milliseconds until we hear back from peer.
+ * Kill the connection if we don't hear back from peer after tcp_ka_cnt
+ * probes are sent.
*/
void
tcp_keepalive_timer(void *arg)
@@ -455,7 +461,9 @@ tcp_keepalive_timer(void *arg)
if (mp != NULL) {
tcp_send_data(tcp, mp);
TCPS_BUMP_MIB(tcps, tcpTimKeepaliveProbe);
- if (tcp->tcp_ka_last_intrvl != 0) {
+ if (tcp->tcp_ka_rinterval) {
+ firetime = tcp->tcp_ka_rinterval;
+ } else if (tcp->tcp_ka_last_intrvl != 0) {
int max;
/*
* We should probe again at least
diff --git a/usr/src/uts/common/netinet/tcp.h b/usr/src/uts/common/netinet/tcp.h
index 9a08545ab7..f6c2fc160b 100644
--- a/usr/src/uts/common/netinet/tcp.h
+++ b/usr/src/uts/common/netinet/tcp.h
@@ -21,6 +21,7 @@
/*
* Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011 Nexenta Systems, Inc. All rights reserved.
*/
/*
@@ -125,6 +126,9 @@ struct tcphdr {
/* gap for expansion of ``standard'' options */
#define TCP_ANONPRIVBIND 0x20 /* for internal use only */
#define TCP_EXCLBIND 0x21 /* for internal use only */
+#define TCP_KEEPIDLE 0x22
+#define TCP_KEEPCNT 0x23
+#define TCP_KEEPINTVL 0x24
#ifdef __cplusplus
}