diff options
Diffstat (limited to 'usr/src/lib/brand/lx/lx_brand/common/capabilities.c')
-rw-r--r-- | usr/src/lib/brand/lx/lx_brand/common/capabilities.c | 516 |
1 files changed, 516 insertions, 0 deletions
diff --git a/usr/src/lib/brand/lx/lx_brand/common/capabilities.c b/usr/src/lib/brand/lx/lx_brand/common/capabilities.c new file mode 100644 index 0000000000..e034560452 --- /dev/null +++ b/usr/src/lib/brand/lx/lx_brand/common/capabilities.c @@ -0,0 +1,516 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2019 Joyent, Inc. + */ + +/* + * LX Brand emulation of capget/capset syscalls + */ + +#include <sys/types.h> +#include <sys/errno.h> +#include <sys/debug.h> +#include <sys/lx_types.h> +#include <sys/lx_syscall.h> +#include <sys/syscall.h> +#include <alloca.h> +#include <errno.h> +#include <unistd.h> +#include <string.h> +#include <sys/lx_misc.h> +#include <priv.h> + +typedef struct { + uint32_t version; + int pid; +} lx_cap_user_header_t; + +typedef struct { + uint32_t effective; + uint32_t permitted; + uint32_t inheritable; +} lx_cap_user_data_t; + +typedef struct { + priv_set_t *p_effective; + priv_set_t *p_permitted; + priv_set_t *p_inheritable; +} lx_cap_privs_t; + +#define LX_CAP_UPDATE_PERMITTED 0x1 +#define LX_CAP_UPDATE_EFFECTIVE 0x2 +#define LX_CAP_UPDATE_INHERITABLE 0x4 + + +#define LX_CAP_MAXLEN 2 + +typedef struct { + uint32_t effective[LX_CAP_MAXLEN]; + uint32_t permitted[LX_CAP_MAXLEN]; + uint32_t inheritable[LX_CAP_MAXLEN]; +} lx_cap_data_t; + +#define LX_CAP_VERSION_1 0x19980330 +#define LX_CAP_VERSION_2 0x20071026 /* deprecated by Linux */ +#define LX_CAP_VERSION_3 0x20080522 + +#define LX_CAP_SETPCAP 8 + +/* + * Even though we lack mappings for capabilities higher than LX_CAP_MAX_VALID, + * it's valuable to test all the way out to the end of the second field. This + * ensures that new capabilities we lack support for are not silently accepted. + */ +#define LX_CAP_MAX_CHECK 63 + +#define LX_CAP_CAPISSET(id, cap) \ + (((id < 32) && (((0x1 << id) & cap[0]) != 0)) || \ + ((id >= 32) && (((0x1 << (id - 32) & cap[1]) != 0)))) + +#define LX_CAP_CAPSET(id, cap) \ + if (id < 32) { cap[0] |= (0x1 << id); } \ + else { cap[1] |= (0x1 << (id - 32)); } + +static const char *lx_cap_map_chown[] = { + PRIV_FILE_CHOWN, + PRIV_FILE_CHOWN_SELF, + NULL +}; +static const char *lx_cap_map_dac_override[] = { + PRIV_FILE_DAC_READ, + PRIV_FILE_DAC_WRITE, + PRIV_FILE_DAC_EXECUTE, + NULL +}; +static const char *lx_cap_map_dac_read_search[] = { + PRIV_FILE_DAC_SEARCH, + PRIV_FILE_DAC_READ, + NULL +}; +static const char *lx_cap_map_fowner[] = { PRIV_FILE_OWNER, NULL }; +static const char *lx_cap_map_fsetid[] = { PRIV_FILE_SETID, NULL }; +static const char *lx_cap_map_kill[] = { PRIV_PROC_OWNER, NULL }; +/* + * One way that Linux capabilities(7) differs from Illumos privileges(5) is + * that it distinguishes between setuid and setgroups rights. This will be a + * problem if an lx-branded process requests to drop only CAP_SETUID but not + * CAP_SETGID. + * + * In that case, CAP_SETUID will be maintained. + */ +static const char *lx_cap_map_setgid[] = { PRIV_PROC_SETID, NULL }; +static const char *lx_cap_map_setuid[] = { PRIV_PROC_SETID, NULL }; +static const char *lx_cap_map_linux_immutable[] = { PRIV_FILE_FLAG_SET, NULL }; +static const char *lx_cap_map_bind_service[] = { PRIV_NET_PRIVADDR, NULL }; +static const char *lx_cap_map_net_admin[] = { + PRIV_SYS_IP_CONFIG, + NULL + /* + * It would probably make sense to include PRIV_SYS_DL_CONFIG, but that + * privilege is not extended to non-global zones by default. A more + * sophisticated capabilities translation layer could make it optional. + */ +}; +static const char *lx_cap_map_net_raw[] = { + PRIV_NET_RAWACCESS, + PRIV_NET_ICMPACCESS, + NULL +}; +static const char *lx_cap_map_ipc_lock[] = { PRIV_PROC_LOCK_MEMORY, NULL }; +static const char *lx_cap_map_ipc_owner[] = { + PRIV_IPC_DAC_READ, + PRIV_IPC_DAC_WRITE, + PRIV_IPC_OWNER, + NULL +}; +static const char *lx_cap_map_sys_chroot[] = { PRIV_PROC_CHROOT, NULL }; +static const char *lx_cap_map_sys_admin[] = { + PRIV_SYS_MOUNT, + PRIV_SYS_ADMIN, + NULL +}; +static const char *lx_cap_map_sys_nice[] = { PRIV_PROC_PRIOUP, NULL }; +static const char *lx_cap_map_sys_resource[] = { PRIV_SYS_RESOURCE, NULL }; +static const char *lx_cap_map_audit_write[] = { PRIV_PROC_AUDIT, NULL }; +static const char *lx_cap_map_audit_control[] = { PRIV_SYS_AUDIT, NULL }; + +/* + * Mapping of Linux capabilities -> Illumos privileges + * The ID definitions can be found in the Linux sources here: + * include/uapi/linux/capability.h + * + * Order is critical. + */ +static const char ** lx_cap_mapping[LX_CAP_MAX_VALID + 1] = { + lx_cap_map_chown, /* CAP_CHOWN */ + lx_cap_map_dac_override, /* CAP_DAC_OVERRIDE */ + lx_cap_map_dac_read_search, /* CAP_DAC_READ_SEARCH */ + lx_cap_map_fowner, /* CAP_FOWNER */ + lx_cap_map_fsetid, /* CAP_FSETID */ + lx_cap_map_kill, /* CAP_KILL */ + lx_cap_map_setgid, /* CAP_SETGID */ + lx_cap_map_setuid, /* CAP_SETUID */ + NULL, /* CAP_SETPCAP */ + lx_cap_map_linux_immutable, /* CAP_LINUX_IMMUTABLE */ + lx_cap_map_bind_service, /* CAP_BIND_SERVICE */ + NULL, /* CAP_BROADCAST */ + lx_cap_map_net_admin, /* CAP_NET_ADMIN */ + lx_cap_map_net_raw, /* CAP_NET_RAW */ + lx_cap_map_ipc_lock, /* CAP_IPC_LOCK */ + lx_cap_map_ipc_owner, /* CAP_IPC_OWNER */ + NULL, /* CAP_MODULE */ + NULL, /* CAP_RAWIO */ + lx_cap_map_sys_chroot, /* CAP_SYS_CHROOT */ + NULL, /* CAP_PTRACE */ + NULL, /* CAP_PACCT */ + lx_cap_map_sys_admin, /* CAP_SYS_ADMIN */ + NULL, /* CAP_BOOT */ + lx_cap_map_sys_nice, /* CAP_SYS_NICE */ + lx_cap_map_sys_resource, /* CAP_SYS_RESOURCE */ + NULL, /* CAP_SYS_TIME */ + NULL, /* CAP_SYS_TTY_CONFIG */ + NULL, /* CAP_MKNOD */ + NULL, /* CAP_LEASE */ + lx_cap_map_audit_write, /* CAP_AUDIT_WRITE */ + lx_cap_map_audit_control, /* CAP_AUDIT_CONTROL */ + NULL, /* CAP_SETFCAP */ + NULL, /* CAP_MAC_OVERRIDE */ + NULL, /* CAP_MAC_ADMIN */ + NULL, /* CAP_SYSLOG */ + NULL, /* CAP_WAKE_ALARM */ + NULL /* CAP_BLOCK_SUSPEND */ +}; + +/* track priv_set_t size, set on entry to lx_capset/lx_capget */ +static unsigned int lx_cap_priv_size = 0; + +/* safely allocate priv_set_t triplet on the stack */ +#define LX_CAP_ALLOC_PRIVS(ptr) \ + { \ + ptr = SAFE_ALLOCA(sizeof (lx_cap_privs_t) + \ + (3 * lx_cap_priv_size)); \ + if (ptr != NULL) { \ + ptr->p_effective = (priv_set_t *) \ + ((caddr_t)ptr + sizeof (lx_cap_privs_t)); \ + ptr->p_permitted = (priv_set_t *) \ + ((caddr_t)ptr + sizeof (lx_cap_privs_t) + \ + lx_cap_priv_size);\ + ptr->p_inheritable = (priv_set_t *) \ + ((caddr_t)ptr + sizeof (lx_cap_privs_t) + \ + 2 * lx_cap_priv_size); \ + } \ + } + +static long +lx_cap_update_priv(priv_set_t *priv, const uint32_t cap[]) +{ + int i, j; + boolean_t cap_set; + boolean_t priv_set; + boolean_t updated = B_FALSE; + for (i = 0; i <= LX_CAP_MAX_CHECK; i++) { + cap_set = LX_CAP_CAPISSET(i, cap); + if (i > LX_CAP_MAX_VALID || lx_cap_mapping[i] == NULL) { + /* don't allow setting unsupported caps */ + if (cap_set) { + /* + * CAP_SETPCAP is a special capability, with + * varying behavior, that can be used to + * control if the process can change other + * process's capabilities, or to control moving + * capabilities between sets. For now we ignore + * this if its passed in. + */ + if (i == LX_CAP_SETPCAP) { + continue; + } + lx_unsupported("set unsupported capability %d", + i); + return (-1); + } else { + continue; + } + } + for (j = 0; lx_cap_mapping[i][j] != NULL; j++) { + priv_set = priv_ismember(priv, lx_cap_mapping[i][j]); + if (priv_set && !cap_set) { + VERIFY0(priv_delset(priv, + lx_cap_mapping[i][j])); + updated = B_TRUE; + } else if (!priv_set && cap_set) { + VERIFY0(priv_addset(priv, + lx_cap_mapping[i][j])); + updated = B_TRUE; + } + } + } + if (updated) + return (1); + else + return (0); +} + +static long +lx_cap_to_priv(lx_cap_data_t *cap, lx_cap_privs_t *priv) +{ + long changes = 0; + long result; + + result = lx_cap_update_priv(priv->p_permitted, cap->permitted); + if (result < 0) + return (-1); + else if (result > 0) + changes |= LX_CAP_UPDATE_PERMITTED; + + result = lx_cap_update_priv(priv->p_effective, cap->effective); + if (result < 0) + return (-1); + else if (result > 0) + changes |= LX_CAP_UPDATE_EFFECTIVE; + + result = lx_cap_update_priv(priv->p_inheritable, cap->inheritable); + if (result < 0) + return (-1); + else if (result > 0) + changes |= LX_CAP_UPDATE_INHERITABLE; + + return (changes); +} + +static void +lx_cap_from_priv(const priv_set_t *priv, uint32_t cap[]) +{ + int i, j; + boolean_t valid; + (void) memset(cap, '\0', sizeof (uint32_t) * LX_CAP_MAXLEN); + for (i = 0; i <= LX_CAP_MAX_VALID; i++) { + if (lx_cap_mapping[i] == NULL) { + continue; + } + valid = B_TRUE; + for (j = 0; lx_cap_mapping[i][j] != NULL; j++) { + if (!priv_ismember(priv, + lx_cap_mapping[i][j])) { + valid = B_FALSE; + } + } + if (valid) { + LX_CAP_CAPSET(i, cap); + } + } +} + +static long +lx_cap_read_cap(const lx_cap_user_header_t *uhp, const lx_cap_user_data_t *udp, + lx_cap_data_t *cd) +{ + lx_cap_user_header_t uh; + lx_cap_user_data_t ud_buf; + int cap_count; + int i; + + if (uucopy(uhp, &uh, sizeof (uh)) != 0) + return (-errno); + + switch (uh.version) { + case LX_CAP_VERSION_1: + cap_count = 1; + break; + case LX_CAP_VERSION_2: + case LX_CAP_VERSION_3: + cap_count = 2; + break; + default: + return (-EINVAL); + } + + /* Only allow capset on calling process */ + if (uh.pid != 0 && uh.pid != getpid()) + return (-EPERM); + + /* zero the struct in case cap_count < 2 */ + (void) memset(cd, '\0', sizeof (lx_cap_data_t)); + + for (i = 0; i < cap_count; i++) { + if (uucopy(udp + i, &ud_buf, sizeof (ud_buf)) != 0) + return (-errno); + cd->permitted[i] = ud_buf.permitted; + cd->effective[i] = ud_buf.effective; + cd->inheritable[i] = ud_buf.inheritable; + } + return (0); +} + +long +lx_capget(uintptr_t p1, uintptr_t p2) +{ + const priv_impl_info_t *impl; + lx_cap_user_header_t *uhp = (lx_cap_user_header_t *)p1; + lx_cap_user_data_t *udp = (lx_cap_user_data_t *)p2; + lx_cap_user_header_t uh; + lx_cap_privs_t *privs; + lx_cap_data_t cd_result; + lx_cap_user_data_t cd_buf; + int cap_count; + int i; + + if (lx_cap_priv_size == 0) { + impl = getprivimplinfo(); + lx_cap_priv_size = sizeof (priv_chunk_t) * impl->priv_setsize; + } + + if (uucopy(uhp, &uh, sizeof (uh)) != 0) + return (-errno); + + switch (uh.version) { + case LX_CAP_VERSION_1: + cap_count = 1; + break; + case LX_CAP_VERSION_2: + case LX_CAP_VERSION_3: + cap_count = 2; + break; + default: + /* + * As per the man page: call will fail with EINVAL and set the + * version field of the header to the kernel preferred version + * when an unsupported version value is provided. + */ + uh.version = LX_CAP_VERSION_3; + if (uucopy(&uh, uhp, sizeof (uh)) != 0) + return (-errno); + return (-EINVAL); + } + + /* + * Only allow capget on the calling process. + * If a pid is specified, lie about being able to locate it. + */ + if (uh.pid > 0 && uh.pid != getpid()) + return (-ESRCH); + if (uh.pid < 0) + return (-EINVAL); + + LX_CAP_ALLOC_PRIVS(privs) + + if (privs == NULL) + return (-ENOMEM); + + if (getppriv(PRIV_PERMITTED, privs->p_permitted) != 0) + return (-errno); + if (getppriv(PRIV_EFFECTIVE, privs->p_effective) != 0) + return (-errno); + if (getppriv(PRIV_INHERITABLE, privs->p_inheritable) != 0) + return (-errno); + + lx_cap_from_priv(privs->p_permitted, cd_result.permitted); + lx_cap_from_priv(privs->p_effective, cd_result.effective); + lx_cap_from_priv(privs->p_inheritable, cd_result.inheritable); + + /* convert to output format */ + for (i = 0; i < cap_count; i++) { + cd_buf.effective = cd_result.effective[i]; + cd_buf.permitted = cd_result.permitted[i]; + cd_buf.inheritable = cd_result.inheritable[i]; + if (uucopy(&cd_buf, udp + i, sizeof (cd_buf)) != 0) + return (-errno); + } + + return (0); +} + +long +lx_capset(uintptr_t p1, uintptr_t p2) +{ + const priv_impl_info_t *impl; + lx_cap_data_t cd; + lx_cap_privs_t *privs; + long result; + + if (lx_cap_priv_size == 0) { + impl = getprivimplinfo(); + lx_cap_priv_size = sizeof (priv_chunk_t) * impl->priv_setsize; + } + + /* verify header and read in desired capabilities */ + result = lx_cap_read_cap((lx_cap_user_header_t *)p1, + (lx_cap_user_data_t *)p2, &cd); + if (result != 0) + return (result); + + LX_CAP_ALLOC_PRIVS(privs) + + if (privs == NULL) + return (-ENOMEM); + + /* fetch current privs to compare against */ + if (getppriv(PRIV_PERMITTED, privs->p_permitted) != 0) + return (-errno); + if (getppriv(PRIV_EFFECTIVE, privs->p_effective) != 0) + return (-errno); + if (getppriv(PRIV_INHERITABLE, privs->p_inheritable) != 0) + return (-errno); + + + result = lx_cap_to_priv(&cd, privs); + if (result < 0) + return (-EPERM); + + /* report success if no changes needed */ + if (result == 0) + return (0); + + /* Ensure the effective/inheritable caps aren't > permitted */ + if (!priv_issubset(privs->p_effective, privs->p_permitted) || + !priv_issubset(privs->p_inheritable, privs->p_permitted)) + return (-EPERM); + + /* + * Here is where things become racy. Linux updates all three + * capability sets simultaneously in the capset syscall. In order to + * emulate capabilities via privileges, three setppriv operations are + * required in sequence. If one or two should fail, there is not a + * mechanism to convey the incomplete operation to the caller. + * + * We do two things to make this less risky: + * 1. Verify that both the desired effective and inheritable + * sets are subsets of the desired permitted set. + * 2. Perform the setppriv of the permitted set first. + * + * Should the setppriv(permitted) fail, we can safely bail out with an + * error. If it succeeds, the setppriv of effective and inheritable + * are likely to succeed given that they've been verified legal. + * + * If the partial error does happen, we'll be forced to report failure + * even though the privileges were altered. + */ + + if ((result & LX_CAP_UPDATE_PERMITTED) != 0) { + /* failure here is totally safe */ + if (setppriv(PRIV_SET, PRIV_PERMITTED, privs->p_permitted) != 0) + return (-errno); + } + if ((result & LX_CAP_UPDATE_EFFECTIVE) != 0) { + /* failure here is a bummer */ + if (setppriv(PRIV_SET, PRIV_EFFECTIVE, privs->p_effective) != 0) + return (-errno); + } + if ((result & LX_CAP_UPDATE_EFFECTIVE) != 0) { + /* failure here is a major bummer */ + if (setppriv(PRIV_SET, PRIV_INHERITABLE, + privs->p_inheritable) != 0) + return (-errno); + } + + return (0); +} |