summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPatrick Mooney <pmooney@pfmooney.com>2016-08-23 18:55:09 +0000
committerPatrick Mooney <pmooney@pfmooney.com>2016-08-31 16:08:12 +0000
commit4a663ad4eabd38f9c6397c5205cc12f083b474ca (patch)
treec2d4420853113fab3c17d6f78d4a1599f8bfc3c7
parentee5b9c83a7ae2c239b08a811615a37a77687e3f2 (diff)
downloadillumos-joyent-4a663ad4eabd38f9c6397c5205cc12f083b474ca.tar.gz
OS-5613 SO_REUSEPORT needs better state-change coverage
Reviewed by: Robert Mustacchi <rm@joyent.com> Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com> Approved by: Jerry Jelinek <jerry.jelinek@joyent.com>
-rw-r--r--usr/src/uts/common/inet/tcp/tcp_bind.c13
-rw-r--r--usr/src/uts/common/inet/tcp/tcp_opt_data.c83
2 files changed, 81 insertions, 15 deletions
diff --git a/usr/src/uts/common/inet/tcp/tcp_bind.c b/usr/src/uts/common/inet/tcp/tcp_bind.c
index adc201eebb..7ea9dc3413 100644
--- a/usr/src/uts/common/inet/tcp/tcp_bind.c
+++ b/usr/src/uts/common/inet/tcp/tcp_bind.c
@@ -22,7 +22,7 @@
/*
* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright 2013 Nexenta Systems, Inc. All rights reserved.
- * Copyright 2015 Joyent, Inc.
+ * Copyright 2016 Joyent, Inc.
*/
#include <sys/types.h>
@@ -853,9 +853,10 @@ tcp_bindi(tcp_t *tcp, in_port_t port, const in6_addr_t *laddr,
* This entry is bound to the exact same
* address and port. If SO_REUSEPORT is set on
* the calling socket, attempt to reuse this
- * binding if it too appears to be willing.
+ * binding if it too had SO_REUSEPORT enabled
+ * when it was bound.
*/
- attempt_reuse = B_TRUE;
+ attempt_reuse = (ltcp->tcp_rg_bind != NULL);
break;
}
@@ -905,6 +906,7 @@ tcp_bindi(tcp_t *tcp, in_port_t port, const in6_addr_t *laddr,
} else {
if (attempt_reuse) {
int err;
+ struct tcp_rg_s *rg;
ASSERT(ltcp != NULL);
ASSERT(ltcp->tcp_rg_bind != NULL);
@@ -921,9 +923,10 @@ tcp_bindi(tcp_t *tcp, in_port_t port, const in6_addr_t *laddr,
* the existing reuseport group on ltcp, it
* should clean up its own (empty) group.
*/
- VERIFY(tcp_rg_remove(tcp->tcp_rg_bind, tcp));
- tcp_rg_destroy(tcp->tcp_rg_bind);
+ rg = tcp->tcp_rg_bind;
tcp->tcp_rg_bind = ltcp->tcp_rg_bind;
+ VERIFY(tcp_rg_remove(rg, tcp));
+ tcp_rg_destroy(rg);
}
/*
diff --git a/usr/src/uts/common/inet/tcp/tcp_opt_data.c b/usr/src/uts/common/inet/tcp/tcp_opt_data.c
index 835acd1b12..7ea4fdaf2f 100644
--- a/usr/src/uts/common/inet/tcp/tcp_opt_data.c
+++ b/usr/src/uts/common/inet/tcp/tcp_opt_data.c
@@ -485,17 +485,46 @@ tcp_opt_get(conn_t *connp, int level, int name, uchar_t *ptr)
return (retval);
}
+/*
+ * Set a TCP connection's participation in SO_REUSEPORT. This operation is
+ * performed under the protection of the squeue via tcp_setsockopt.
+ * The manipulation of tcp_rg_bind, as part of this operation, is subject to
+ * these constraints:
+ * 1. Prior to bind(), tcp_rg_bind can be set/cleared in tcp_set_reuseport
+ * under the protection of the squeue.
+ * 2. Once the connection has been bound, the tcp_rg_bind pointer must not be
+ * altered until such time as tcp_free() cleans up the connection.
+ * 3. A connection undergoing bind, which matches to a connection participating
+ * in port-reuse, will switch its tcp_rg_bind pointer when it joins the
+ * group of an existing connection in tcp_bindi().
+ */
static int
tcp_set_reuseport(conn_t *connp, boolean_t do_enable)
{
tcp_t *tcp = connp->conn_tcp;
struct tcp_rg_s *rg;
- if (do_enable && !IPCL_IS_NONSTR(connp)) {
- /*
- * SO_REUSEPORT cannot be enabled on sockets which have fallen
- * back to the STREAMS API.
- */
+ if (!IPCL_IS_NONSTR(connp)) {
+ if (do_enable) {
+ /*
+ * SO_REUSEPORT cannot be enabled on sockets which have
+ * fallen back to the STREAMS API.
+ */
+ return (EINVAL);
+ } else {
+ /*
+ * A connection with SO_REUSEPORT enabled should be
+ * prevented from falling back to STREAMS mode via
+ * logic in tcp_fallback. It is legal, however, for
+ * fallen-back connections to affirm the disabled state
+ * of SO_REUSEPORT.
+ */
+ ASSERT(connp->conn_reuseport == 0);
+ ASSERT(tcp->tcp_conn_rg_bind == NULL);
+ return (0);
+ }
+ }
+ if (tcp->tcp_state <= TCPS_CLOSED) {
return (EINVAL);
}
if (connp->conn_reuseport == 0 && do_enable) {
@@ -503,17 +532,51 @@ tcp_set_reuseport(conn_t *connp, boolean_t do_enable)
if (tcp->tcp_rg_bind != NULL) {
tcp_rg_setactive(tcp->tcp_rg_bind, do_enable);
} else {
- if (tcp->tcp_state >= TCPS_BOUND ||
- tcp->tcp_state <= TCPS_CLOSED)
- return (EINVAL);
- if ((rg = tcp_rg_init(tcp)) == NULL)
+ /*
+ * Connection state is not a concern when initially
+ * populating tcp_rg_bind. Setting it to non-NULL on a
+ * bound or listening connection would only mean that
+ * new reused-port binds become a possibility.
+ */
+ if ((rg = tcp_rg_init(tcp)) == NULL) {
return (ENOMEM);
+ }
tcp->tcp_rg_bind = rg;
}
connp->conn_reuseport = 1;
} else if (connp->conn_reuseport != 0 && !do_enable) {
/* enabled -> disabled */
- if (tcp->tcp_rg_bind != NULL) {
+ ASSERT(tcp->tcp_rg_bind != NULL);
+ if (tcp->tcp_state == TCPS_IDLE) {
+ /*
+ * If the connection has not been bound yet, discard
+ * the reuse group state. Since disabling SO_REUSEPORT
+ * on a bound socket will _not_ prevent others from
+ * reusing the port, the presence of tcp_rg_bind is
+ * used to determine reuse availability, not
+ * conn_reuseport.
+ *
+ * This allows proper behavior for examples such as:
+ *
+ * setsockopt(fd1, ... SO_REUSEPORT, &on_val...);
+ * bind(fd1, &myaddr, ...);
+ * setsockopt(fd1, ... SO_REUSEPORT, &off_val...);
+ *
+ * setsockopt(fd2, ... SO_REUSEPORT, &on_val...);
+ * bind(fd2, &myaddr, ...); // <- SHOULD SUCCEED
+ *
+ */
+ rg = tcp->tcp_rg_bind;
+ tcp->tcp_rg_bind = NULL;
+ VERIFY(tcp_rg_remove(rg, tcp));
+ tcp_rg_destroy(rg);
+ } else {
+ /*
+ * If a connection has been bound, it's no longer safe
+ * to manipulate tcp_rg_bind until connection clean-up
+ * during tcp_free. Just mark the member status of the
+ * connection as inactive.
+ */
tcp_rg_setactive(tcp->tcp_rg_bind, do_enable);
}
connp->conn_reuseport = 0;