diff options
author | Jerry Jelinek <jerry.jelinek@joyent.com> | 2017-08-11 17:23:35 +0000 |
---|---|---|
committer | Jerry Jelinek <jerry.jelinek@joyent.com> | 2017-08-11 17:31:08 +0000 |
commit | 7c9cf15db394630449591dfb9e3186a6908030bd (patch) | |
tree | 6dbd5dff5c6c9c080f03a2187d73a5bbd5187d69 | |
parent | 35d5f63341902524053682b3ec36ff1d269932b6 (diff) | |
download | illumos-joyent-7c9cf15db394630449591dfb9e3186a6908030bd.tar.gz |
OS-5725 allow swap to be "disabled" in LX
Reviewed by: Patrick Mooney <patrick.mooney@joyent.com>
Approved by: Patrick Mooney <patrick.mooney@joyent.com>
-rw-r--r-- | usr/src/lib/brand/lx/testing/ltp_skiplist | 2 | ||||
-rw-r--r-- | usr/src/uts/common/brand/lx/os/lx_syscall.c | 8 | ||||
-rw-r--r-- | usr/src/uts/common/brand/lx/procfs/lx_prvnops.c | 59 | ||||
-rw-r--r-- | usr/src/uts/common/brand/lx/sys/lx_brand.h | 1 | ||||
-rw-r--r-- | usr/src/uts/common/brand/lx/sys/lx_syscalls.h | 2 | ||||
-rw-r--r-- | usr/src/uts/common/brand/lx/syscall/lx_miscsys.c | 64 |
6 files changed, 111 insertions, 25 deletions
diff --git a/usr/src/lib/brand/lx/testing/ltp_skiplist b/usr/src/lib/brand/lx/testing/ltp_skiplist index d4a732c33d..571717b23a 100644 --- a/usr/src/lib/brand/lx/testing/ltp_skiplist +++ b/usr/src/lib/brand/lx/testing/ltp_skiplist @@ -72,9 +72,7 @@ setfsuid04 setfsuid04_16 settimeofday01 stime01 -swapoff01 swapoff02 -swapon01 swapon02 swapon03 switch01 diff --git a/usr/src/uts/common/brand/lx/os/lx_syscall.c b/usr/src/uts/common/brand/lx/os/lx_syscall.c index 750b81eed2..832686f17a 100644 --- a/usr/src/uts/common/brand/lx/os/lx_syscall.c +++ b/usr/src/uts/common/brand/lx/os/lx_syscall.c @@ -606,7 +606,7 @@ lx_sysent_t lx_sysent32[] = { {"oldlstat", NULL, NOSYS_OBSOLETE, 0}, /* 84 */ {"readlink", lx_readlink, 0, 3}, /* 85 */ {"uselib", NULL, NOSYS_KERNEL, 0}, /* 86 */ - {"swapon", NULL, NOSYS_KERNEL, 0}, /* 87 */ + {"swapon", lx_swapon, 0, 2}, /* 87 */ {"reboot", lx_reboot, 0, 4}, /* 88 */ {"readdir", NULL, 0, 3}, /* 89 */ {"mmap", NULL, 0, 6}, /* 90 */ @@ -634,7 +634,7 @@ lx_sysent_t lx_sysent32[] = { {"idle", NULL, NOSYS_NO_EQUIV, 0}, /* 112 */ {"vm86old", NULL, NOSYS_OBSOLETE, 0}, /* 113 */ {"wait4", lx_wait4, 0, 4}, /* 114 */ - {"swapoff", NULL, NOSYS_KERNEL, 0}, /* 115 */ + {"swapoff", lx_swapoff, 0, 1}, /* 115 */ {"sysinfo", lx_sysinfo32, 0, 1}, /* 116 */ {"ipc", NULL, 0, 5}, /* 117 */ {"fsync", NULL, 0, 1}, /* 118 */ @@ -1057,8 +1057,8 @@ lx_sysent_t lx_sysent64[] = { {"settimeofday", NULL, 0, 2}, /* 164 */ {"mount", lx_mount, 0, 5}, /* 165 */ {"umount2", lx_umount2, 0, 2}, /* 166 */ - {"swapon", NULL, NOSYS_KERNEL, 0}, /* 167 */ - {"swapoff", NULL, NOSYS_KERNEL, 0}, /* 168 */ + {"swapon", lx_swapon, 0, 2}, /* 167 */ + {"swapoff", lx_swapoff, 0, 1}, /* 168 */ {"reboot", lx_reboot, 0, 4}, /* 169 */ {"sethostname", lx_sethostname, 0, 2}, /* 170 */ {"setdomainname", lx_setdomainname, 0, 2}, /* 171 */ diff --git a/usr/src/uts/common/brand/lx/procfs/lx_prvnops.c b/usr/src/uts/common/brand/lx/procfs/lx_prvnops.c index ea518f646b..5f85c1440e 100644 --- a/usr/src/uts/common/brand/lx/procfs/lx_prvnops.c +++ b/usr/src/uts/common/brand/lx/procfs/lx_prvnops.c @@ -3713,12 +3713,16 @@ static void lxpr_read_meminfo(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) { zone_t *zone = LXPTOZ(lxpnp); - int global = zone == global_zone; + lx_zone_data_t *lxzd = ztolxzd(zone); long total_mem, free_mem, total_swap; + boolean_t swap_disabled; ASSERT(lxpnp->lxpr_type == LXPR_MEMINFO); + ASSERT(zone->zone_brand == &lx_brand); + ASSERT(lxzd != NULL); + swap_disabled = lxzd->lxzd_swap_disabled; - if (global || zone->zone_phys_mem_ctl == UINT64_MAX) { + if (zone->zone_phys_mem_ctl == UINT64_MAX) { total_mem = physmem * PAGESIZE; free_mem = freemem * PAGESIZE; } else { @@ -3728,12 +3732,16 @@ lxpr_read_meminfo(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) free_mem = 0; } - if (global || zone->zone_max_swap_ctl == UINT64_MAX) { - total_swap = k_anoninfo.ani_max * PAGESIZE; + if (swap_disabled) { + total_swap = 0; } else { - mutex_enter(&zone->zone_mem_lock); - total_swap = zone->zone_max_swap_ctl; - mutex_exit(&zone->zone_mem_lock); + if (zone->zone_max_swap_ctl == UINT64_MAX) { + total_swap = k_anoninfo.ani_max * PAGESIZE; + } else { + mutex_enter(&zone->zone_mem_lock); + total_swap = zone->zone_max_swap_ctl; + mutex_exit(&zone->zone_mem_lock); + } } /* @@ -4318,6 +4326,9 @@ lxpr_read_stat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) * our entire swap cap as one swap partition. See lxpr_read_meminfo for an * explanation on why we report 0 used swap. * + * The zone's lxzd_swap_disabled boolean controls whether or not we pretend + * swap space is configured. + * * It is important to use formatting identical to the Linux implementation * so that consumers do not break. See swap_show() in mm/swapfile.c. */ @@ -4326,22 +4337,32 @@ static void lxpr_read_swaps(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) { zone_t *zone = LXPTOZ(lxpnp); - uint64_t totswap, usedswap; + boolean_t swap_enabled; + lx_zone_data_t *lxzd = ztolxzd(zone); - if (zone->zone_max_swap_ctl == UINT64_MAX) { - totswap = (k_anoninfo.ani_max * PAGESIZE) >> 10; - } else { - mutex_enter(&zone->zone_mem_lock); - /* Uses units of 1 kb (2^10). */ - totswap = zone->zone_max_swap_ctl >> 10; - mutex_exit(&zone->zone_mem_lock); - } - usedswap = 0; + ASSERT(zone->zone_brand == &lx_brand); + ASSERT(lxzd != NULL); + swap_enabled = !lxzd->lxzd_swap_disabled; lxpr_uiobuf_printf(uiobuf, "Filename\t\t\t\tType\t\tSize\tUsed\tPriority\n"); - lxpr_uiobuf_printf(uiobuf, "%-40s%s\t%llu\t%llu\t%d\n", - "/dev/swap", "partition", totswap, usedswap, -1); + + if (swap_enabled) { + uint64_t totswap, usedswap; + + if (zone->zone_max_swap_ctl == UINT64_MAX) { + totswap = (k_anoninfo.ani_max * PAGESIZE) >> 10; + } else { + mutex_enter(&zone->zone_mem_lock); + /* Uses units of 1 kb (2^10). */ + totswap = zone->zone_max_swap_ctl >> 10; + mutex_exit(&zone->zone_mem_lock); + } + usedswap = 0; + + lxpr_uiobuf_printf(uiobuf, "%-40s%s\t%llu\t%llu\t%d\n", + "/dev/swap", "partition", totswap, usedswap, -1); + } } /* ARGSUSED */ diff --git a/usr/src/uts/common/brand/lx/sys/lx_brand.h b/usr/src/uts/common/brand/lx/sys/lx_brand.h index ec5fedcff8..9230876254 100644 --- a/usr/src/uts/common/brand/lx/sys/lx_brand.h +++ b/usr/src/uts/common/brand/lx/sys/lx_brand.h @@ -622,6 +622,7 @@ typedef struct lx_zone_data { dev_t lxzd_zfs_dev; /* major num for zfs */ uint_t lxzd_aio_nr; /* see lx_aio.c */ uint_t lxzd_pipe_max_sz; /* pipe-max-size sysctl val */ + boolean_t lxzd_swap_disabled; /* no fake swap in zone? */ } lx_zone_data_t; /* LWP br_lwp_flags values */ diff --git a/usr/src/uts/common/brand/lx/sys/lx_syscalls.h b/usr/src/uts/common/brand/lx/sys/lx_syscalls.h index 31ceb0da08..c998adbca7 100644 --- a/usr/src/uts/common/brand/lx/sys/lx_syscalls.h +++ b/usr/src/uts/common/brand/lx/sys/lx_syscalls.h @@ -242,6 +242,8 @@ extern long lx_socketpair(); extern long lx_stat32(); extern long lx_stat64(); extern long lx_stime(); +extern long lx_swapoff(); +extern long lx_swapon(); extern long lx_sync(); extern long lx_sync_file_range(); extern long lx_syncfs(); diff --git a/usr/src/uts/common/brand/lx/syscall/lx_miscsys.c b/usr/src/uts/common/brand/lx/syscall/lx_miscsys.c index 23c9d78628..0a24ef9678 100644 --- a/usr/src/uts/common/brand/lx/syscall/lx_miscsys.c +++ b/usr/src/uts/common/brand/lx/syscall/lx_miscsys.c @@ -40,6 +40,17 @@ #define LX_RUSAGE_BOTH (-2) #define LX_RUSAGE_THREAD 1 +#define LX_SWAP_PRIOMASK 0x7fff +#define LX_SWAP_PREFER 0x8000 +#define LX_SWAP_DISCARD 0x10000 +#define LX_SWAP_DISCARD_ONCE 0x20000 +#define LX_SWAP_DISCARD_PAGES 0x40000 + +#define LX_SWAP_ALL (LX_SWAP_DISCARD_PAGES | \ + LX_SWAP_DISCARD_ONCE | \ + LX_SWAP_DISCARD | \ + LX_SWAP_PREFER | LX_SWAP_PRIOMASK) + /* From uts/common/fs/vfs.c */ extern void vfs_sync(int); /* From uts/common/os/grow.c */ @@ -435,3 +446,56 @@ lx_unshare(int flags) { return (set_errno(EPERM)); } + +/* + * The whole idea of "swap space" within a zone is a complete fabrication. + * However, some apps expect to be able to see swap space data in the /proc + * files, while other apps actually don't want there to be any swap space + * configured. We use the swapon/off syscalls to allow this visibility to be + * controlled from within the zone iself. Note that the "swapon" CLI tends to + * do a lot of additional validation which will fail within a zone. + * + * Once we have better Linux capabilities(7) support we should check + * CAP_SYS_ADMIN instead of uid == 0. + */ +long +lx_swapoff(char *path) +{ + char buf[MAXPATHLEN]; + size_t len; + lx_zone_data_t *lxzd; + + /* Simple validaton of the argument */ + if (copyinstr(path, buf, sizeof (buf), &len) != 0) + return (set_errno(EFAULT)); + if (crgetuid(CRED()) != 0) + return (set_errno(EPERM)); + + lxzd = ztolxzd(curzone); + ASSERT(lxzd != NULL); + + lxzd->lxzd_swap_disabled = B_TRUE; + return (0); +} + +long +lx_swapon(char *path, int flags) +{ + char buf[MAXPATHLEN]; + size_t len; + lx_zone_data_t *lxzd; + + /* Simple validaton of the arguments */ + if (copyinstr(path, buf, sizeof (buf), &len) != 0) + return (set_errno(EFAULT)); + if (flags & ~LX_SWAP_ALL) + return (set_errno(EINVAL)); + if (crgetuid(CRED()) != 0) + return (set_errno(EPERM)); + + lxzd = ztolxzd(curzone); + ASSERT(lxzd != NULL); + + lxzd->lxzd_swap_disabled = B_FALSE; + return (0); +} |