diff options
author | Patrick Mooney <pmooney@pfmooney.com> | 2016-08-23 18:55:09 +0000 |
---|---|---|
committer | Patrick Mooney <pmooney@pfmooney.com> | 2016-08-31 16:08:12 +0000 |
commit | 4a663ad4eabd38f9c6397c5205cc12f083b474ca (patch) | |
tree | c2d4420853113fab3c17d6f78d4a1599f8bfc3c7 | |
parent | ee5b9c83a7ae2c239b08a811615a37a77687e3f2 (diff) | |
download | illumos-joyent-4a663ad4eabd38f9c6397c5205cc12f083b474ca.tar.gz |
OS-5613 SO_REUSEPORT needs better state-change coverage
Reviewed by: Robert Mustacchi <rm@joyent.com>
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Approved by: Jerry Jelinek <jerry.jelinek@joyent.com>
-rw-r--r-- | usr/src/uts/common/inet/tcp/tcp_bind.c | 13 | ||||
-rw-r--r-- | usr/src/uts/common/inet/tcp/tcp_opt_data.c | 83 |
2 files changed, 81 insertions, 15 deletions
diff --git a/usr/src/uts/common/inet/tcp/tcp_bind.c b/usr/src/uts/common/inet/tcp/tcp_bind.c index adc201eebb..7ea9dc3413 100644 --- a/usr/src/uts/common/inet/tcp/tcp_bind.c +++ b/usr/src/uts/common/inet/tcp/tcp_bind.c @@ -22,7 +22,7 @@ /* * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. * Copyright 2013 Nexenta Systems, Inc. All rights reserved. - * Copyright 2015 Joyent, Inc. + * Copyright 2016 Joyent, Inc. */ #include <sys/types.h> @@ -853,9 +853,10 @@ tcp_bindi(tcp_t *tcp, in_port_t port, const in6_addr_t *laddr, * This entry is bound to the exact same * address and port. If SO_REUSEPORT is set on * the calling socket, attempt to reuse this - * binding if it too appears to be willing. + * binding if it too had SO_REUSEPORT enabled + * when it was bound. */ - attempt_reuse = B_TRUE; + attempt_reuse = (ltcp->tcp_rg_bind != NULL); break; } @@ -905,6 +906,7 @@ tcp_bindi(tcp_t *tcp, in_port_t port, const in6_addr_t *laddr, } else { if (attempt_reuse) { int err; + struct tcp_rg_s *rg; ASSERT(ltcp != NULL); ASSERT(ltcp->tcp_rg_bind != NULL); @@ -921,9 +923,10 @@ tcp_bindi(tcp_t *tcp, in_port_t port, const in6_addr_t *laddr, * the existing reuseport group on ltcp, it * should clean up its own (empty) group. */ - VERIFY(tcp_rg_remove(tcp->tcp_rg_bind, tcp)); - tcp_rg_destroy(tcp->tcp_rg_bind); + rg = tcp->tcp_rg_bind; tcp->tcp_rg_bind = ltcp->tcp_rg_bind; + VERIFY(tcp_rg_remove(rg, tcp)); + tcp_rg_destroy(rg); } /* diff --git a/usr/src/uts/common/inet/tcp/tcp_opt_data.c b/usr/src/uts/common/inet/tcp/tcp_opt_data.c index 835acd1b12..7ea4fdaf2f 100644 --- a/usr/src/uts/common/inet/tcp/tcp_opt_data.c +++ b/usr/src/uts/common/inet/tcp/tcp_opt_data.c @@ -485,17 +485,46 @@ tcp_opt_get(conn_t *connp, int level, int name, uchar_t *ptr) return (retval); } +/* + * Set a TCP connection's participation in SO_REUSEPORT. This operation is + * performed under the protection of the squeue via tcp_setsockopt. + * The manipulation of tcp_rg_bind, as part of this operation, is subject to + * these constraints: + * 1. Prior to bind(), tcp_rg_bind can be set/cleared in tcp_set_reuseport + * under the protection of the squeue. + * 2. Once the connection has been bound, the tcp_rg_bind pointer must not be + * altered until such time as tcp_free() cleans up the connection. + * 3. A connection undergoing bind, which matches to a connection participating + * in port-reuse, will switch its tcp_rg_bind pointer when it joins the + * group of an existing connection in tcp_bindi(). + */ static int tcp_set_reuseport(conn_t *connp, boolean_t do_enable) { tcp_t *tcp = connp->conn_tcp; struct tcp_rg_s *rg; - if (do_enable && !IPCL_IS_NONSTR(connp)) { - /* - * SO_REUSEPORT cannot be enabled on sockets which have fallen - * back to the STREAMS API. - */ + if (!IPCL_IS_NONSTR(connp)) { + if (do_enable) { + /* + * SO_REUSEPORT cannot be enabled on sockets which have + * fallen back to the STREAMS API. + */ + return (EINVAL); + } else { + /* + * A connection with SO_REUSEPORT enabled should be + * prevented from falling back to STREAMS mode via + * logic in tcp_fallback. It is legal, however, for + * fallen-back connections to affirm the disabled state + * of SO_REUSEPORT. + */ + ASSERT(connp->conn_reuseport == 0); + ASSERT(tcp->tcp_conn_rg_bind == NULL); + return (0); + } + } + if (tcp->tcp_state <= TCPS_CLOSED) { return (EINVAL); } if (connp->conn_reuseport == 0 && do_enable) { @@ -503,17 +532,51 @@ tcp_set_reuseport(conn_t *connp, boolean_t do_enable) if (tcp->tcp_rg_bind != NULL) { tcp_rg_setactive(tcp->tcp_rg_bind, do_enable); } else { - if (tcp->tcp_state >= TCPS_BOUND || - tcp->tcp_state <= TCPS_CLOSED) - return (EINVAL); - if ((rg = tcp_rg_init(tcp)) == NULL) + /* + * Connection state is not a concern when initially + * populating tcp_rg_bind. Setting it to non-NULL on a + * bound or listening connection would only mean that + * new reused-port binds become a possibility. + */ + if ((rg = tcp_rg_init(tcp)) == NULL) { return (ENOMEM); + } tcp->tcp_rg_bind = rg; } connp->conn_reuseport = 1; } else if (connp->conn_reuseport != 0 && !do_enable) { /* enabled -> disabled */ - if (tcp->tcp_rg_bind != NULL) { + ASSERT(tcp->tcp_rg_bind != NULL); + if (tcp->tcp_state == TCPS_IDLE) { + /* + * If the connection has not been bound yet, discard + * the reuse group state. Since disabling SO_REUSEPORT + * on a bound socket will _not_ prevent others from + * reusing the port, the presence of tcp_rg_bind is + * used to determine reuse availability, not + * conn_reuseport. + * + * This allows proper behavior for examples such as: + * + * setsockopt(fd1, ... SO_REUSEPORT, &on_val...); + * bind(fd1, &myaddr, ...); + * setsockopt(fd1, ... SO_REUSEPORT, &off_val...); + * + * setsockopt(fd2, ... SO_REUSEPORT, &on_val...); + * bind(fd2, &myaddr, ...); // <- SHOULD SUCCEED + * + */ + rg = tcp->tcp_rg_bind; + tcp->tcp_rg_bind = NULL; + VERIFY(tcp_rg_remove(rg, tcp)); + tcp_rg_destroy(rg); + } else { + /* + * If a connection has been bound, it's no longer safe + * to manipulate tcp_rg_bind until connection clean-up + * during tcp_free. Just mark the member status of the + * connection as inactive. + */ tcp_rg_setactive(tcp->tcp_rg_bind, do_enable); } connp->conn_reuseport = 0; |