diff options
author | Dan McDonald <danmcd@joyent.com> | 2019-07-23 14:40:58 -0400 |
---|---|---|
committer | Dan McDonald <danmcd@joyent.com> | 2019-07-31 17:40:11 -0400 |
commit | ec6335ff2d187135730376f34f2fe1cae4189572 (patch) | |
tree | 0e332ae6f8456cd76bc0b042112d550144ef643b | |
parent | 25bc4d84f6e57bd873e9b5135cd49fc329aa0876 (diff) | |
download | illumos-joyent-ec6335ff2d187135730376f34f2fe1cae4189572.tar.gz |
OS-7899 LX handing of SO_REUSEADDR needs to be closer to Linuxrelease-20190801
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Patrick Mooney <patrick.mooney@joyent.com>
Approved by: Jerry Jelinek <jerry.jelinek@joyent.com>
-rw-r--r-- | usr/src/uts/common/brand/lx/syscall/lx_socket.c | 232 |
1 files changed, 227 insertions, 5 deletions
diff --git a/usr/src/uts/common/brand/lx/syscall/lx_socket.c b/usr/src/uts/common/brand/lx/syscall/lx_socket.c index a95e220ea2..896f65e0cd 100644 --- a/usr/src/uts/common/brand/lx/syscall/lx_socket.c +++ b/usr/src/uts/common/brand/lx/syscall/lx_socket.c @@ -22,7 +22,7 @@ /* * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. - * Copyright 2018 Joyent, Inc. + * Copyright 2019 Joyent, Inc. * Copyright 2019 OmniOS Community Edition (OmniOSce) Association. */ @@ -111,6 +111,9 @@ typedef struct lx_group_req32 { /* lxsad_flags */ #define LXSAD_FL_STRCRED 0x1 #define LXSAD_FL_EMULSEQPKT 0x2 +/* These two work together to implement Linux SO_REUSEADDR semantics. */ +#define LXSAD_FL_EMULRUADDR 0x4 +#define LXSAD_FL_EMULRUPORT 0x8 static lx_socket_aux_data_t *lx_sad_acquire(vnode_t *); @@ -3018,6 +3021,210 @@ lx_mcast_common(sonode_t *so, int level, int optname, void *optval, return (error); } +/* + * So in Linux, the SO_REUSEADDR includes, essentially, SO_REUSEPORT as part + * of its functionality. Experiments on CentOS 7 with a 3.10-ish kernel show + * that querying on SO_REUSEPORT show it's "off" if SO_REUSEADDR gets set. + * This means we can't count on directly querying the native socket state. We + * munge things here in LX-land to essentially turn on both REUSEADDR and + * REUSEPORT in native conn_t state for LX processes that set SO_REUSEADDR. + * + * We also keep track if the wily Linux app sends BOTH REUSEADDR and REUSEPORT + * down. We can return that both are on, or if it uses just REUSEADDR, we + * don't return yes for a check of REUSEPORT. This means our conn_t state may + * be different than what an LX process will see. "REUSEPORT" for LX may be + * off, but internally it will be on. + * + * BEGIN CSTYLED + * State table for internal conn_reuse{addr,port}: + * + * LX ADDR,PORT Int. ADDR,PORT New ADDR New LX New Int. LXchg? Intchg? + * ============ ============== ======== ====== ======== ====== ======= + * + * off,off off,off off off,off off,off NO NO + * + * off,off off,off on on,off on,on YES YES(2) + * + * off,on off,on off off,on off,on NO NO + * + * off,on off,on on on,on on,on YES YES + * + * on,off on,on off off,off off,off YES YES(2) + * + * on,off on,on on on,off on,on NO NO + * + * on,on on,on off off,on off,on YES YES + * + * on,on on,on on on,on on,on NO NO + * + * + * LX ADDR,PORT Int. ADDR,PORT New PORT New LX New Int. LXchg? Intchg? + * ============ ============== ======== ====== ======== ====== ======= + * + * off,off off,off off off,off off,off NO NO + * + * off,off off,off on off,on off,on YES YES + * + * off,on off,on off off,off off,off YES YES + * + * off,on off,on on off,on off,on NO NO + * + * on,off on,on off on,off on,on NO NO + * + * on,off on,on on on,on on,on YES NO + * + * on,on on,on off on,off on,on YES NO + * + * on,on on,on on on,on on,on NO NO + * + * END CSTYLED + * + * For setting these options, we need to obey the state table above. + * For getting REUSEADDR, the native stack handles it already. + * For getting REUSEPORT, we'll have to track the auxiliary data's flags. + */ +static int +lx_set_reuse_handler(sonode_t *so, int optname, void *optval, socklen_t optlen) +{ + lx_socket_aux_data_t *sad; + boolean_t enable; + int error; + + if (optlen != sizeof (int)) + return (EINVAL); + enable = (*((int *)optval) != 0); + + ASSERT(optname == LX_SO_REUSEADDR || optname == LX_SO_REUSEPORT); + sad = lx_sad_acquire(SOTOV(so)); + + /* + * lx_sad_acquire() holds its mutex for us. This protects us + * against racing option-setters on the same socket. + */ + if (optname == LX_SO_REUSEADDR) { + /* Check if already set to what we want! */ + if (enable == + ((sad->lxsad_flags & LXSAD_FL_EMULRUADDR) != 0)) { + mutex_exit(&sad->lxsad_lock); + return (0); + } + + /* + * At this point, we know we need to change SO_REUSEADDR, + * Linux-style. We know these are supported options too, + * so we don't bother with any lookup. + */ + error = socket_setsockopt(so, SOL_SOCKET, SO_REUSEADDR, + optval, optlen, CRED()); + if (error != 0) { + mutex_exit(&sad->lxsad_lock); + return (error); + } + if (enable) + sad->lxsad_flags |= LXSAD_FL_EMULRUADDR; + else + sad->lxsad_flags &= ~LXSAD_FL_EMULRUADDR; + + /* + * At THIS point, we need to figure out if we ALSO need to + * toggle the native-side SO_REUSEPORT state because Linux's + * SO_REUSEADDR ALSO include the moral equivalent of + * SO_REUSEPORT. There may be further subtleties, but for now + * assume a Linux app that uses SO_REUSEADDR wants that + * SO_REUSEPORT functionality thrown in for free. + * + * Check for SO_REUSEPORT already enabled first. + */ + if ((sad->lxsad_flags & LXSAD_FL_EMULRUPORT) != 0) { + /* Someone turned on REUSEPORT first, we're good. */ + mutex_exit(&sad->lxsad_lock); + return (0); + } + + /* + * Fall through to REUSEPORT setting, it'll know it's a + * supplement based on (optname == SO_REUSEADDR). + */ + } else if (enable == + ((sad->lxsad_flags & LXSAD_FL_EMULRUPORT) != 0)) { + /* + * If we reach here, we're setting REUSEPORT to what it's + * already set. + */ + ASSERT3U(optname, ==, LX_SO_REUSEPORT); + mutex_exit(&sad->lxsad_lock); + return (0); + } + + if (optname == LX_SO_REUSEPORT && + ((sad->lxsad_flags & LXSAD_FL_EMULRUADDR) != 0)) { + /* + * Corner case: REUSEPORT change *but* REUSEADDR is still + * enabled. We must not alter conn_t/native state here, as + * REUSEADDR *needs* REUSEPORT enabled on conn_t/native state. + * If we want to enable REUSEPORT, the setsockopt would be a + * NOP. If want to disable it, we MUST NOT turn off native + * REUSEPORT lest we break Linux-like behavior, and instead + * merely turn off the LXSAD_FL_EMULRUPORT flag. + */ + error = 0; + } else { + /* + * At this point, we need to change REUSEPORT. We may be + * doing it for an actual REUSEPORT change, OR for Linux + * REUSEADDR semantics. As earlier, we know the option map + * lookup is superfluous. + */ + error = socket_setsockopt(so, SOL_SOCKET, SO_REUSEPORT, optval, + optlen, CRED()); + } + + if (error != 0 && optname == LX_SO_REUSEADDR) { + int addr_error, revert_to_optval; + + ASSERT0(sad->lxsad_flags & LXSAD_FL_EMULRUPORT); + /* + * We need more cleanup if the REUSEPORT change fails during + * an actual REUSEADDR set. + */ + if (enable) { + sad->lxsad_flags &= ~LXSAD_FL_EMULRUADDR; + revert_to_optval = 0; + } else { + sad->lxsad_flags |= LXSAD_FL_EMULRUADDR; + revert_to_optval = 1; + } + + /* Just hardwire it, we're in trouble! */ + addr_error = socket_setsockopt(so, SOL_SOCKET, SO_REUSEADDR, + &revert_to_optval, optlen, CRED()); + if (addr_error != 0) { + /* + * Well this sucks, we really shot ourselves in the + * foot. We should somehow signal a catastrophic + * error. For now, just return the one we had earlier. + */ + DTRACE_PROBE1(lx__reuse__seconderr, int, addr_error); + mutex_exit(&sad->lxsad_lock); + return (error); + } + /* + * Else we managed successfully to clean up and can fall + * through the normal error path. + */ + } else if (error == 0 && optname == LX_SO_REUSEPORT) { + /* We successfully changed REUSEPORT explicitly. */ + if (enable) + sad->lxsad_flags |= LXSAD_FL_EMULRUPORT; + else + sad->lxsad_flags &= ~LXSAD_FL_EMULRUPORT; + } + /* Else it's an error for an explicit REUSEPORT, just return. */ + + mutex_exit(&sad->lxsad_lock); + return (error); +} + static int lx_setsockopt_ip(sonode_t *so, int optname, void *optval, socklen_t optlen) { @@ -3402,6 +3609,11 @@ lx_setsockopt_socket(sonode_t *so, int optname, void *optval, socklen_t optlen) */ return (0); + case LX_SO_REUSEADDR: + case LX_SO_REUSEPORT: + /* See the function called below for the oddness of REUSE*. */ + return (lx_set_reuse_handler(so, optname, optval, optlen)); + case LX_SO_PASSCRED: /* * In many cases, the Linux SO_PASSCRED is mapped to the SunOS @@ -3707,6 +3919,7 @@ lx_getsockopt_socket(sonode_t *so, int optname, void *optval, int error = 0; int *intval = (int *)optval; lx_proto_opts_t sockopts_tbl = PROTO_SOCKOPTS(ltos_socket_sockopts); + lx_socket_aux_data_t *sad; switch (optname) { case LX_SO_TYPE: @@ -3716,8 +3929,6 @@ lx_getsockopt_socket(sonode_t *so, int optname, void *optval, */ if (so->so_family == AF_UNIX && (so->so_mode & SM_CONNREQUIRED) == 0) { - lx_socket_aux_data_t *sad; - if (*optlen < sizeof (int)) return (EINVAL); sad = lx_sad_acquire(SOTOV(so)); @@ -3751,8 +3962,6 @@ lx_getsockopt_socket(sonode_t *so, int optname, void *optval, */ if (so->so_family == AF_UNIX && (so->so_mode & SM_CONNREQUIRED) != 0) { - lx_socket_aux_data_t *sad; - if (*optlen < sizeof (int)) { return (EINVAL); } @@ -3765,6 +3974,19 @@ lx_getsockopt_socket(sonode_t *so, int optname, void *optval, } break; + case LX_SO_REUSEPORT: + /* See lx_set_reuse_handler() for the sordid details. */ + if (so->so_family != AF_INET && so->so_family != AF_INET6) + break; + if (*optlen < sizeof (int)) + return (EINVAL); + sad = lx_sad_acquire(SOTOV(so)); + *optlen = sizeof (int); + *intval = + (sad->lxsad_flags & LXSAD_FL_EMULRUPORT) == 0 ? 0 : 1; + mutex_exit(&sad->lxsad_lock); + return (0); + case LX_SO_PEERCRED: if (*optlen < sizeof (struct lx_ucred)) { error = EINVAL; |