diff options
author | Garrett D'Amore <garrett@nexenta.com> | 2011-08-17 16:31:10 -0700 |
---|---|---|
committer | Garrett D'Amore <garrett@nexenta.com> | 2011-08-17 16:31:10 -0700 |
commit | 3d0a255c417cf2e7b69e770de43f195b0eeffacb (patch) | |
tree | 9fbbacfdcae24cab7914edc9825da1708191153b /usr/src/uts/common | |
parent | 49ba5bc4e7d0a3e56108ec7cbf1d8cf2483ccca9 (diff) | |
download | illumos-joyent-3d0a255c417cf2e7b69e770de43f195b0eeffacb.tar.gz |
1361 Add support for socket options TCP_KEEPCNT, TCP_KEEPIDLE, TCP_KEEPINTVL
Reviewed by: Pavan <pavan.tc@gmail.com>
Reviewed by: Dan McDonald <danmcd at nexenta.com>
Reviewed by: Garrett D'Amore <garrett@damore.org>
Approved by: Garrett D'Amore <garrett@damore.org>
Diffstat (limited to 'usr/src/uts/common')
-rw-r--r-- | usr/src/uts/common/inet/tcp.h | 18 | ||||
-rw-r--r-- | usr/src/uts/common/inet/tcp/tcp.c | 3 | ||||
-rw-r--r-- | usr/src/uts/common/inet/tcp/tcp_opt_data.c | 100 | ||||
-rw-r--r-- | usr/src/uts/common/inet/tcp/tcp_timers.c | 10 | ||||
-rw-r--r-- | usr/src/uts/common/netinet/tcp.h | 4 |
5 files changed, 133 insertions, 2 deletions
diff --git a/usr/src/uts/common/inet/tcp.h b/usr/src/uts/common/inet/tcp.h index 460f183884..d95f2559c6 100644 --- a/usr/src/uts/common/inet/tcp.h +++ b/usr/src/uts/common/inet/tcp.h @@ -21,6 +21,7 @@ /* * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2011, Joyent, Inc. All rights reserved. + * Copyright (c) 2011 Nexenta Systems, Inc. All rights reserved. */ /* Copyright (c) 1990 Mentat Inc. */ @@ -334,11 +335,26 @@ typedef struct tcp_s { } tcp_conn; uint32_t tcp_syn_rcvd_timeout; /* How many SYN_RCVD timeout in q0 */ - /* TCP Keepalive Timer members */ + /* + * TCP Keepalive Timer members. + * All keepalive timer intervals are in milliseconds. + */ int32_t tcp_ka_last_intrvl; /* Last probe interval */ timeout_id_t tcp_ka_tid; /* Keepalive timer ID */ uint32_t tcp_ka_interval; /* Keepalive interval */ + + /* + * TCP connection is terminated if we don't hear back from the peer + * for tcp_ka_abort_thres milliseconds after the first keepalive probe. + * tcp_ka_rinterval is the interval in milliseconds between successive + * keepalive probes. tcp_ka_cnt is the number of keepalive probes to + * be sent before terminating the connection, if we don't hear back from + * peer. + * tcp_ka_abort_thres = tcp_ka_rinterval * tcp_ka_cnt + */ + uint32_t tcp_ka_rinterval; /* keepalive retransmit interval */ uint32_t tcp_ka_abort_thres; /* Keepalive abort threshold */ + uint32_t tcp_ka_cnt; /* count of keepalive probes */ int32_t tcp_client_errno; /* How the client screwed up */ diff --git a/usr/src/uts/common/inet/tcp/tcp.c b/usr/src/uts/common/inet/tcp/tcp.c index 8d3dacf35b..0734468ea0 100644 --- a/usr/src/uts/common/inet/tcp/tcp.c +++ b/usr/src/uts/common/inet/tcp/tcp.c @@ -22,6 +22,7 @@ /* * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2011, Joyent Inc. All rights reserved. + * Copyright (c) 2011 Nexenta Systems, Inc. All rights reserved. */ /* Copyright (c) 1990 Mentat Inc. */ @@ -2354,6 +2355,8 @@ tcp_init_values(tcp_t *tcp, tcp_t *parent) tcp->tcp_ka_interval = tcps->tcps_keepalive_interval; tcp->tcp_ka_abort_thres = tcps->tcps_keepalive_abort_interval; + tcp->tcp_ka_cnt = 0; + tcp->tcp_ka_rinterval = 0; /* * Default value of tcp_init_cwnd is 0, so no need to set here diff --git a/usr/src/uts/common/inet/tcp/tcp_opt_data.c b/usr/src/uts/common/inet/tcp/tcp_opt_data.c index c1614463c2..960c3d8902 100644 --- a/usr/src/uts/common/inet/tcp/tcp_opt_data.c +++ b/usr/src/uts/common/inet/tcp/tcp_opt_data.c @@ -20,6 +20,7 @@ */ /* * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2011 Nexenta Systems, Inc. All rights reserved. */ #include <sys/types.h> @@ -117,6 +118,12 @@ opdes_t tcp_opt_arr[] = { { TCP_KEEPALIVE_THRESHOLD, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 }, +{ TCP_KEEPIDLE, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 }, + +{ TCP_KEEPCNT, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 }, + +{ TCP_KEEPINTVL, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 }, + { TCP_KEEPALIVE_ABORT_THRESHOLD, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 }, @@ -403,6 +410,25 @@ tcp_opt_get(conn_t *connp, int level, int name, uchar_t *ptr) case TCP_KEEPALIVE_THRESHOLD: *i1 = tcp->tcp_ka_interval; return (sizeof (int)); + + /* + * TCP_KEEPIDLE expects value in seconds, but + * tcp_ka_interval is in milliseconds. + */ + case TCP_KEEPIDLE: + *i1 = tcp->tcp_ka_interval / 1000; + return (sizeof (int)); + case TCP_KEEPCNT: + *i1 = tcp->tcp_ka_cnt; + return (sizeof (int)); + + /* + * TCP_KEEPINTVL expects value in seconds, but + * tcp_ka_rinterval is in milliseconds. + */ + case TCP_KEEPINTVL: + *i1 = tcp->tcp_ka_rinterval / 1000; + return (sizeof (int)); case TCP_KEEPALIVE_ABORT_THRESHOLD: *i1 = tcp->tcp_ka_abort_thres; return (sizeof (int)); @@ -682,6 +708,18 @@ tcp_opt_set(conn_t *connp, uint_t optset_context, int level, int name, } tcp->tcp_init_cwnd = val; break; + + /* + * TCP_KEEPIDLE is in seconds but TCP_KEEPALIVE_THRESHOLD + * is in milliseconds. TCP_KEEPIDLE is introduced for + * compatibility with other Unix flavors. + * We can fall through TCP_KEEPALIVE_THRESHOLD logic after + * converting the input to milliseconds. + */ + case TCP_KEEPIDLE: + *i1 *= 1000; + /* fall through */ + case TCP_KEEPALIVE_THRESHOLD: if (checkonly) break; @@ -708,6 +746,66 @@ tcp_opt_set(conn_t *connp, uint_t optset_context, int level, int name, } } break; + + /* + * tcp_ka_abort_thres = tcp_ka_rinterval * tcp_ka_cnt. + * So setting TCP_KEEPCNT or TCP_KEEPINTVL can affect all the + * three members - tcp_ka_abort_thres, tcp_ka_rinterval and + * tcp_ka_cnt. + */ + case TCP_KEEPCNT: + if (checkonly) + break; + + if (*i1 == 0) { + return (EINVAL); + } else if (tcp->tcp_ka_rinterval == 0) { + if ((tcp->tcp_ka_abort_thres / *i1) < + tcp->tcp_rto_min || + (tcp->tcp_ka_abort_thres / *i1) > + tcp->tcp_rto_max) + return (EINVAL); + + tcp->tcp_ka_rinterval = + tcp->tcp_ka_abort_thres / *i1; + } else { + if ((*i1 * tcp->tcp_ka_rinterval) < + tcps->tcps_keepalive_abort_interval_low || + (*i1 * tcp->tcp_ka_rinterval) > + tcps->tcps_keepalive_abort_interval_high) + return (EINVAL); + tcp->tcp_ka_abort_thres = + (*i1 * tcp->tcp_ka_rinterval); + } + tcp->tcp_ka_cnt = *i1; + break; + case TCP_KEEPINTVL: + /* + * TCP_KEEPINTVL is specified in seconds, but + * tcp_ka_rinterval is in milliseconds. + */ + + if (checkonly) + break; + + if ((*i1 * 1000) < tcp->tcp_rto_min || + (*i1 * 1000) > tcp->tcp_rto_max) + return (EINVAL); + + if (tcp->tcp_ka_cnt == 0) { + tcp->tcp_ka_cnt = + tcp->tcp_ka_abort_thres / (*i1 * 1000); + } else { + if ((*i1 * tcp->tcp_ka_cnt * 1000) < + tcps->tcps_keepalive_abort_interval_low || + (*i1 * tcp->tcp_ka_cnt * 1000) > + tcps->tcps_keepalive_abort_interval_high) + return (EINVAL); + tcp->tcp_ka_abort_thres = + (*i1 * tcp->tcp_ka_cnt * 1000); + } + tcp->tcp_ka_rinterval = *i1 * 1000; + break; case TCP_KEEPALIVE_ABORT_THRESHOLD: if (!checkonly) { if (*i1 < @@ -718,6 +816,8 @@ tcp_opt_set(conn_t *connp, uint_t optset_context, int level, int name, return (EINVAL); } tcp->tcp_ka_abort_thres = *i1; + tcp->tcp_ka_cnt = 0; + tcp->tcp_ka_rinterval = 0; } break; case TCP_CORK: diff --git a/usr/src/uts/common/inet/tcp/tcp_timers.c b/usr/src/uts/common/inet/tcp/tcp_timers.c index c883be8cfd..90e1c9178c 100644 --- a/usr/src/uts/common/inet/tcp/tcp_timers.c +++ b/usr/src/uts/common/inet/tcp/tcp_timers.c @@ -21,6 +21,7 @@ /* * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2011 Nexenta Systems, Inc. All rights reserved. */ #include <sys/types.h> @@ -390,6 +391,11 @@ tcp_timers_stop(tcp_t *tcp) * (tcp_ka_interval + tcp_ka_abort_thres) we have not heard anything, * kill the connection unless the keepalive abort threshold is 0. In * that case, we will probe "forever." + * If tcp_ka_cnt and tcp_ka_rinterval are non-zero, then we do not follow + * the exponential backoff, but send probes tcp_ka_cnt times in regular + * intervals of tcp_ka_rinterval milliseconds until we hear back from peer. + * Kill the connection if we don't hear back from peer after tcp_ka_cnt + * probes are sent. */ void tcp_keepalive_timer(void *arg) @@ -455,7 +461,9 @@ tcp_keepalive_timer(void *arg) if (mp != NULL) { tcp_send_data(tcp, mp); TCPS_BUMP_MIB(tcps, tcpTimKeepaliveProbe); - if (tcp->tcp_ka_last_intrvl != 0) { + if (tcp->tcp_ka_rinterval) { + firetime = tcp->tcp_ka_rinterval; + } else if (tcp->tcp_ka_last_intrvl != 0) { int max; /* * We should probe again at least diff --git a/usr/src/uts/common/netinet/tcp.h b/usr/src/uts/common/netinet/tcp.h index 9a08545ab7..f6c2fc160b 100644 --- a/usr/src/uts/common/netinet/tcp.h +++ b/usr/src/uts/common/netinet/tcp.h @@ -21,6 +21,7 @@ /* * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2011 Nexenta Systems, Inc. All rights reserved. */ /* @@ -125,6 +126,9 @@ struct tcphdr { /* gap for expansion of ``standard'' options */ #define TCP_ANONPRIVBIND 0x20 /* for internal use only */ #define TCP_EXCLBIND 0x21 /* for internal use only */ +#define TCP_KEEPIDLE 0x22 +#define TCP_KEEPCNT 0x23 +#define TCP_KEEPINTVL 0x24 #ifdef __cplusplus } |