summaryrefslogtreecommitdiff
path: root/usr/src
diff options
context:
space:
mode:
authorPatrick Mooney <pmooney@pfmooney.com>2017-08-03 18:19:56 +0000
committerPatrick Mooney <pmooney@pfmooney.com>2017-09-28 15:02:34 +0000
commitdd35f9f41aba6f51178986d36ec29206d6025757 (patch)
treec4fb07b811c268cd15e87093cf51597a64f8a460 /usr/src
parent5ae84a5233b723c890288b775cb5317db2e54d61 (diff)
downloadillumos-joyent-dd35f9f41aba6f51178986d36ec29206d6025757.tar.gz
OS-6323 want stack-clash mitigation
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com> Reviewed by: Alex Wilson <alex.wilson@joyent.com> Approved by: Alex Wilson <alex.wilson@joyent.com>
Diffstat (limited to 'usr/src')
-rw-r--r--usr/src/cmd/mdb/common/modules/genunix/memory.c43
-rw-r--r--usr/src/uts/common/Makefile.files1
-rw-r--r--usr/src/uts/common/brand/lx/procfs/lx_prvnops.c4
-rw-r--r--usr/src/uts/common/exec/elf/elf.c6
-rw-r--r--usr/src/uts/common/fs/lxproc/lxpr_vnops.c4
-rw-r--r--usr/src/uts/common/fs/proc/prioctl.c26
-rw-r--r--usr/src/uts/common/fs/proc/prsubr.c35
-rw-r--r--usr/src/uts/common/os/exec.c62
-rw-r--r--usr/src/uts/common/os/grow.c33
-rw-r--r--usr/src/uts/common/sys/proc.h11
-rw-r--r--usr/src/uts/common/vm/seg.h3
-rw-r--r--usr/src/uts/common/vm/seg_hole.c305
-rw-r--r--usr/src/uts/common/vm/seg_hole.h40
-rw-r--r--usr/src/uts/common/vm/vm_as.c48
-rw-r--r--usr/src/uts/i86pc/vm/vm_machdep.c23
15 files changed, 602 insertions, 42 deletions
diff --git a/usr/src/cmd/mdb/common/modules/genunix/memory.c b/usr/src/cmd/mdb/common/modules/genunix/memory.c
index 34e746f36c..fa4918b9b8 100644
--- a/usr/src/cmd/mdb/common/modules/genunix/memory.c
+++ b/usr/src/cmd/mdb/common/modules/genunix/memory.c
@@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2015 Joyent, Inc.
+ * Copyright 2017 Joyent, Inc.
*/
#include <mdb/mdb_param.h>
@@ -40,6 +40,7 @@
#include <sys/vnode.h>
#include <vm/seg_map.h>
#include <vm/seg_vn.h>
+#include <vm/seg_hole.h>
#if defined(__i386) || defined(__amd64)
#include <sys/balloon_impl.h>
#endif
@@ -975,6 +976,11 @@ seg(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
return (DCMD_OK);
}
+typedef struct pmap_walk_types {
+ uintptr_t pwt_segvn;
+ uintptr_t pwt_seghole;
+} pmap_walk_types_t;
+
/*ARGSUSED*/
static int
pmap_walk_count_pages(uintptr_t addr, const void *data, void *out)
@@ -987,12 +993,14 @@ pmap_walk_count_pages(uintptr_t addr, const void *data, void *out)
}
static int
-pmap_walk_seg(uintptr_t addr, const struct seg *seg, uintptr_t segvn)
+pmap_walk_seg(uintptr_t addr, const struct seg *seg,
+ const pmap_walk_types_t *types)
{
+ const uintptr_t ops = (uintptr_t)seg->s_ops;
mdb_printf("%0?p %0?p %7dk", addr, seg->s_base, seg->s_size / 1024);
- if (segvn == (uintptr_t)seg->s_ops && seg->s_data != NULL) {
+ if (ops == types->pwt_segvn && seg->s_data != NULL) {
struct segvn_data svn;
pgcnt_t nres = 0;
@@ -1018,6 +1026,18 @@ pmap_walk_seg(uintptr_t addr, const struct seg *seg, uintptr_t segvn)
} else {
mdb_printf(" [ anon ]");
}
+ } else if (ops == types->pwt_seghole && seg->s_data != NULL) {
+ seghole_data_t shd;
+ char name[16];
+
+ (void) mdb_vread(&shd, sizeof (shd), (uintptr_t)seg->s_data);
+ if (shd.shd_name == NULL || mdb_readstr(name, sizeof (name),
+ (uintptr_t)shd.shd_name) == 0) {
+ name[0] = '\0';
+ }
+
+ mdb_printf(" %8s [ hole%s%s ]", "-",
+ name[0] == '0' ? "" : ":", name);
} else {
mdb_printf(" %8s [ &%a ]", "?", seg->s_ops);
}
@@ -1027,11 +1047,14 @@ pmap_walk_seg(uintptr_t addr, const struct seg *seg, uintptr_t segvn)
}
static int
-pmap_walk_seg_quick(uintptr_t addr, const struct seg *seg, uintptr_t segvn)
+pmap_walk_seg_quick(uintptr_t addr, const struct seg *seg,
+ const pmap_walk_types_t *types)
{
+ const uintptr_t ops = (uintptr_t)seg->s_ops;
+
mdb_printf("%0?p %0?p %7dk", addr, seg->s_base, seg->s_size / 1024);
- if (segvn == (uintptr_t)seg->s_ops && seg->s_data != NULL) {
+ if (ops == types->pwt_segvn && seg->s_data != NULL) {
struct segvn_data svn;
svn.vp = NULL;
@@ -1054,10 +1077,10 @@ pmap_walk_seg_quick(uintptr_t addr, const struct seg *seg, uintptr_t segvn)
int
pmap(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
{
- uintptr_t segvn;
proc_t proc;
uint_t quick = FALSE;
mdb_walk_cb_t cb = (mdb_walk_cb_t)pmap_walk_seg;
+ pmap_walk_types_t wtypes = { 0 };
GElf_Sym sym;
@@ -1074,9 +1097,9 @@ pmap(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
}
if (mdb_lookup_by_name("segvn_ops", &sym) == 0)
- segvn = (uintptr_t)sym.st_value;
- else
- segvn = NULL;
+ wtypes.pwt_segvn = (uintptr_t)sym.st_value;
+ if (mdb_lookup_by_name("seghole_ops", &sym) == 0)
+ wtypes.pwt_seghole = (uintptr_t)sym.st_value;
mdb_printf("%?s %?s %8s ", "SEG", "BASE", "SIZE");
@@ -1087,7 +1110,7 @@ pmap(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
mdb_printf("%8s %s\n", "RES", "PATH");
}
- if (mdb_pwalk("seg", cb, (void *)segvn, (uintptr_t)proc.p_as) == -1) {
+ if (mdb_pwalk("seg", cb, (void *)&wtypes, (uintptr_t)proc.p_as) == -1) {
mdb_warn("failed to walk segments of as %p", proc.p_as);
return (DCMD_ERR);
}
diff --git a/usr/src/uts/common/Makefile.files b/usr/src/uts/common/Makefile.files
index 7b30741fb0..8220f91f03 100644
--- a/usr/src/uts/common/Makefile.files
+++ b/usr/src/uts/common/Makefile.files
@@ -304,6 +304,7 @@ GENUNIX_OBJS += \
sctp_crc32.o \
secflags.o \
seg_dev.o \
+ seg_hole.o \
seg_kp.o \
seg_kpm.o \
seg_map.o \
diff --git a/usr/src/uts/common/brand/lx/procfs/lx_prvnops.c b/usr/src/uts/common/brand/lx/procfs/lx_prvnops.c
index 08a817396b..5dfb8ce093 100644
--- a/usr/src/uts/common/brand/lx/procfs/lx_prvnops.c
+++ b/usr/src/uts/common/brand/lx/procfs/lx_prvnops.c
@@ -1759,6 +1759,10 @@ lxpr_read_pid_maps(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
vnode_t *vp;
uint_t protbits;
+ if ((seg->s_flags & S_HOLE) != 0) {
+ continue;
+ }
+
pbuf = kmem_alloc(sizeof (*pbuf), KM_SLEEP);
pbuf->saddr = (uintptr_t)seg->s_base;
diff --git a/usr/src/uts/common/exec/elf/elf.c b/usr/src/uts/common/exec/elf/elf.c
index 23c198897d..023d027789 100644
--- a/usr/src/uts/common/exec/elf/elf.c
+++ b/usr/src/uts/common/exec/elf/elf.c
@@ -26,7 +26,7 @@
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
/* All Rights Reserved */
/*
- * Copyright 2016 Joyent, Inc.
+ * Copyright 2017 Joyent, Inc.
*/
#include <sys/types.h>
@@ -2355,6 +2355,10 @@ top:
void *tmp = NULL;
extern struct seg_ops segspt_shmops;
+ if ((seg->s_flags & S_HOLE) != 0) {
+ continue;
+ }
+
for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
uint_t prot;
size_t size;
diff --git a/usr/src/uts/common/fs/lxproc/lxpr_vnops.c b/usr/src/uts/common/fs/lxproc/lxpr_vnops.c
index 85ef7b4b9b..1f7f3074d6 100644
--- a/usr/src/uts/common/fs/lxproc/lxpr_vnops.c
+++ b/usr/src/uts/common/fs/lxproc/lxpr_vnops.c
@@ -766,6 +766,10 @@ lxpr_read_pid_maps(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
vnode_t *vp;
uint_t protbits;
+ if ((seg->s_flags & S_HOLE) != 0) {
+ continue;
+ }
+
pbuf = kmem_alloc(sizeof (*pbuf), KM_SLEEP);
pbuf->saddr = seg->s_base;
diff --git a/usr/src/uts/common/fs/proc/prioctl.c b/usr/src/uts/common/fs/proc/prioctl.c
index 7b7fae7557..470c66362b 100644
--- a/usr/src/uts/common/fs/proc/prioctl.c
+++ b/usr/src/uts/common/fs/proc/prioctl.c
@@ -22,7 +22,7 @@
/*
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
- * Copyright 2016 Joyent, Inc.
+ * Copyright 2017 Joyent, Inc.
*/
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
@@ -3521,6 +3521,10 @@ oprgetmap(proc_t *p, list_t *iolhead)
caddr_t saddr, naddr;
void *tmp = NULL;
+ if ((seg->s_flags & S_HOLE) != 0) {
+ continue;
+ }
+
for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
prot = pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr);
if (saddr == naddr)
@@ -3581,6 +3585,10 @@ oprgetmap32(proc_t *p, list_t *iolhead)
caddr_t saddr, naddr;
void *tmp = NULL;
+ if ((seg->s_flags & S_HOLE) != 0) {
+ continue;
+ }
+
for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
prot = pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr);
if (saddr == naddr)
@@ -3634,6 +3642,10 @@ oprpdsize(struct as *as)
void *tmp = NULL;
size_t npage;
+ if ((seg->s_flags & S_HOLE) != 0) {
+ continue;
+ }
+
for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
(void) pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr);
if ((npage = (naddr - saddr) / PAGESIZE) != 0)
@@ -3664,6 +3676,10 @@ oprpdsize32(struct as *as)
void *tmp = NULL;
size_t npage;
+ if ((seg->s_flags & S_HOLE) != 0) {
+ continue;
+ }
+
for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
(void) pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr);
if ((npage = (naddr - saddr) / PAGESIZE) != 0)
@@ -3714,6 +3730,10 @@ again:
caddr_t saddr, naddr;
void *tmp = NULL;
+ if ((seg->s_flags & S_HOLE) != 0) {
+ continue;
+ }
+
for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
size_t len;
size_t npage;
@@ -3821,6 +3841,10 @@ again:
caddr_t saddr, naddr;
void *tmp = NULL;
+ if ((seg->s_flags & S_HOLE) != 0) {
+ continue;
+ }
+
for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
size_t len;
size_t npage;
diff --git a/usr/src/uts/common/fs/proc/prsubr.c b/usr/src/uts/common/fs/proc/prsubr.c
index 0645a91de1..2062970885 100644
--- a/usr/src/uts/common/fs/proc/prsubr.c
+++ b/usr/src/uts/common/fs/proc/prsubr.c
@@ -1416,6 +1416,10 @@ prnsegs(struct as *as, int reserved)
caddr_t saddr, naddr;
void *tmp = NULL;
+ if ((seg->s_flags & S_HOLE) != 0) {
+ continue;
+ }
+
for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
(void) pr_getprot(seg, reserved, &tmp,
&saddr, &naddr, eaddr);
@@ -1671,6 +1675,10 @@ prgetmap(proc_t *p, int reserved, list_t *iolhead)
caddr_t saddr, naddr;
void *tmp = NULL;
+ if ((seg->s_flags & S_HOLE) != 0) {
+ continue;
+ }
+
for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
prot = pr_getprot(seg, reserved, &tmp,
&saddr, &naddr, eaddr);
@@ -1782,6 +1790,10 @@ prgetmap32(proc_t *p, int reserved, list_t *iolhead)
caddr_t saddr, naddr;
void *tmp = NULL;
+ if ((seg->s_flags & S_HOLE) != 0) {
+ continue;
+ }
+
for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
prot = pr_getprot(seg, reserved, &tmp,
&saddr, &naddr, eaddr);
@@ -1885,6 +1897,10 @@ prpdsize(struct as *as)
void *tmp = NULL;
size_t npage;
+ if ((seg->s_flags & S_HOLE) != 0) {
+ continue;
+ }
+
for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
(void) pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr);
if ((npage = (naddr - saddr) / PAGESIZE) != 0)
@@ -1915,6 +1931,10 @@ prpdsize32(struct as *as)
void *tmp = NULL;
size_t npage;
+ if ((seg->s_flags & S_HOLE) != 0) {
+ continue;
+ }
+
for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
(void) pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr);
if ((npage = (naddr - saddr) / PAGESIZE) != 0)
@@ -1966,6 +1986,10 @@ again:
caddr_t saddr, naddr;
void *tmp = NULL;
+ if ((seg->s_flags & S_HOLE) != 0) {
+ continue;
+ }
+
for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
struct vnode *vp;
struct vattr vattr;
@@ -2113,6 +2137,10 @@ again:
caddr_t saddr, naddr;
void *tmp = NULL;
+ if ((seg->s_flags & S_HOLE) != 0) {
+ continue;
+ }
+
for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
struct vnode *vp;
struct vattr vattr;
@@ -4064,6 +4092,9 @@ prgetxmap(proc_t *p, list_t *iolhead)
uint64_t npages;
uint64_t pagenum;
+ if ((seg->s_flags & S_HOLE) != 0) {
+ continue;
+ }
/*
* Segment loop part one: iterate from the base of the segment
* to its end, pausing at each address boundary (baddr) between
@@ -4260,6 +4291,10 @@ prgetxmap32(proc_t *p, list_t *iolhead)
uint64_t npages;
uint64_t pagenum;
+ if ((seg->s_flags & S_HOLE) != 0) {
+ continue;
+ }
+
/*
* Segment loop part one: iterate from the base of the segment
* to its end, pausing at each address boundary (baddr) between
diff --git a/usr/src/uts/common/os/exec.c b/usr/src/uts/common/os/exec.c
index 2ab4d1f023..96b6081489 100644
--- a/usr/src/uts/common/os/exec.c
+++ b/usr/src/uts/common/os/exec.c
@@ -26,7 +26,7 @@
/* Copyright (c) 1988 AT&T */
/* All Rights Reserved */
/*
- * Copyright 2016 Joyent, Inc.
+ * Copyright 2017 Joyent, Inc.
*/
#include <sys/types.h>
@@ -78,6 +78,7 @@
#include <vm/as.h>
#include <vm/seg.h>
#include <vm/seg_vn.h>
+#include <vm/seg_hole.h>
#define PRIV_RESET 0x01 /* needs to reset privs */
#define PRIV_SETID 0x02 /* needs to change uids */
@@ -116,6 +117,14 @@ size_t aslr_max_brk_skew = 16 * 1024 * 1024; /* 16MB */
size_t aslr_max_stack_skew = 64 * 1024; /* 64KB */
/*
+ * Size of guard segment for 64-bit processes and minimum size it can be shrunk
+ * to in the case of grow() operations. These are kept as variables in case
+ * they need to be tuned in an emergency.
+ */
+size_t stack_guard_seg_sz = 256 * 1024 * 1024;
+size_t stack_guard_min_sz = 64 * 1024 * 1024;
+
+/*
* exece() - system call wrapper around exec_common()
*/
int
@@ -1948,6 +1957,15 @@ exec_get_spslew(void)
* The initial user stack layout is as follows:
*
* User Stack
+ * +---------------+
+ * | |
+ * | stack guard |
+ * | (64-bit only) |
+ * | |
+ * +...............+ <--- stack limit (base - curproc->p_stk_ctl)
+ * . .
+ * . .
+ * . .
* +---------------+ <--- curproc->p_usrstack
* | |
* | slew |
@@ -1989,6 +2007,11 @@ exec_get_spslew(void)
* +---------------+ <--- argv[]
* | argc |
* +---------------+ <--- stack base
+ *
+ * In 64-bit processes, a stack guard segment is allocated at the address
+ * immediately below where the stack limit ends. This protects new library
+ * mappings (such as the linker) from being placed in relatively dangerous
+ * proximity to the stack.
*/
int
exec_args(execa_t *uap, uarg_t *args, intpdata_t *intp, void **auxvpp)
@@ -2002,6 +2025,9 @@ exec_args(execa_t *uap, uarg_t *args, intpdata_t *intp, void **auxvpp)
struct as *as;
extern int use_stk_lpg;
size_t sp_slew;
+#if defined(_LP64)
+ const size_t sg_sz = (stack_guard_seg_sz & PAGEMASK);
+#endif /* defined(_LP64) */
args->from_model = p->p_model;
if (p->p_model == DATAMODEL_NATIVE) {
@@ -2153,6 +2179,8 @@ exec_args(execa_t *uap, uarg_t *args, intpdata_t *intp, void **auxvpp)
p->p_brkpageszc = 0;
p->p_stksize = 0;
p->p_stkpageszc = 0;
+ p->p_stkg_start = 0;
+ p->p_stkg_end = 0;
p->p_model = args->to_model;
p->p_usrstack = usrstack;
p->p_stkprot = args->stk_prot;
@@ -2190,10 +2218,36 @@ exec_args(execa_t *uap, uarg_t *args, intpdata_t *intp, void **auxvpp)
(void) hat_setup(as->a_hat, HAT_ALLOC);
hat_join_srd(as->a_hat, args->ex_vp);
- /*
- * Finally, write out the contents of the new stack.
- */
+ /* Write out the contents of the new stack. */
error = stk_copyout(args, usrstack - sp_slew, auxvpp, up);
kmem_free(args->stk_base, args->stk_size);
+
+#if defined(_LP64)
+ /* Add stack guard segment (if needed) after successful copyout */
+ if (error == 0 && p->p_model == DATAMODEL_LP64 && sg_sz != 0) {
+ seghole_crargs_t sca;
+ caddr_t addr_end = (caddr_t)(((uintptr_t)usrstack -
+ p->p_stk_ctl) & PAGEMASK);
+ caddr_t addr_start = addr_end - sg_sz;
+
+ DTRACE_PROBE4(stack__guard__chk, proc_t *, p,
+ caddr_t, addr_start, caddr_t, addr_end, size_t, sg_sz);
+
+ if (addr_end >= usrstack || addr_start >= addr_end ||
+ valid_usr_range(addr_start, sg_sz, PROT_NONE, as,
+ as->a_userlimit) != RANGE_OKAY) {
+ return (E2BIG);
+ }
+
+ /* Create un-mappable area in AS with seg_hole */
+ sca.name = "stack_guard";
+ error = as_map(as, addr_start, sg_sz, seghole_create, &sca);
+ if (error == 0) {
+ p->p_stkg_start = (uintptr_t)addr_start;
+ p->p_stkg_end = (uintptr_t)addr_start + sg_sz;
+ }
+ }
+#endif /* defined(_LP64) */
+
return (error);
}
diff --git a/usr/src/uts/common/os/grow.c b/usr/src/uts/common/os/grow.c
index a3de80259f..9d40f93da1 100644
--- a/usr/src/uts/common/os/grow.c
+++ b/usr/src/uts/common/os/grow.c
@@ -21,7 +21,7 @@
/*
* Copyright 2013 OmniTI Computer Consulting, Inc. All rights reserved.
- * Copyright (c) 2014, Joyent, Inc. All rights reserved.
+ * Copyright 2017 Joyent, Inc.
*/
/*
@@ -333,9 +333,10 @@ grow(caddr_t sp)
} else {
err = grow_internal(sp, p->p_stkpageszc);
}
+ newsize = p->p_stksize;
as_rangeunlock(as);
- if (err == 0 && (newsize = p->p_stksize) > oldsize) {
+ if (err == 0 && newsize > oldsize) {
ASSERT(IS_P2ALIGNED(oldsize, PAGESIZE));
ASSERT(IS_P2ALIGNED(newsize, PAGESIZE));
/*
@@ -428,6 +429,7 @@ grow_internal(caddr_t sp, uint_t growszc)
struct proc *p = curproc;
size_t newsize;
size_t oldsize;
+ uintptr_t new_start;
int error;
size_t pgsz;
uint_t szc;
@@ -498,7 +500,32 @@ grow_internal(caddr_t sp, uint_t growszc)
}
crargs.lgrp_mem_policy_flags = LGRP_MP_FLAG_EXTEND_DOWN;
- if ((error = as_map(p->p_as, p->p_usrstack - newsize, newsize - oldsize,
+ /*
+ * The stack is about to grow into its guard. This can be acceptable
+ * if the size restriction on the stack has been expanded since its
+ * initialization during exec(). In such cases, the guard segment will
+ * be shrunk, provided the new size is reasonable.
+ */
+ new_start = (uintptr_t)p->p_usrstack - newsize;
+ if (p->p_stkg_start != 0 && new_start > p->p_stkg_start &&
+ new_start < p->p_stkg_end) {
+ const size_t unmap_sz = p->p_stkg_end - new_start;
+ const size_t remain_sz = new_start - p->p_stkg_start;
+ extern size_t stack_guard_min_sz;
+
+ /* Do not allow the guard to shrink below minimum size */
+ if (remain_sz < stack_guard_min_sz) {
+ return (ENOMEM);
+ }
+
+ error = as_unmap(p->p_as, (caddr_t)new_start, unmap_sz);
+ if (error != 0) {
+ return (error);
+ }
+ p->p_stkg_end -= unmap_sz;
+ }
+
+ if ((error = as_map(p->p_as, (caddr_t)new_start, newsize - oldsize,
segvn_create, &crargs)) != 0) {
if (error == EAGAIN) {
cmn_err(CE_WARN, "Sorry, no swap space to grow stack "
diff --git a/usr/src/uts/common/sys/proc.h b/usr/src/uts/common/sys/proc.h
index d5be7943ef..a7fff4e5ab 100644
--- a/usr/src/uts/common/sys/proc.h
+++ b/usr/src/uts/common/sys/proc.h
@@ -251,8 +251,15 @@ typedef struct proc {
kmutex_t p_maplock; /* lock for pr_mappage() */
struct proc *p_rlink; /* linked list for server */
kcondvar_t p_srwchan_cv;
- size_t p_stksize; /* process stack size in bytes */
- uint_t p_stkpageszc; /* preferred stack max page size code */
+
+ /*
+ * Stack sizing and guard information.
+ * Generally protected by as_rangelock()
+ */
+ size_t p_stksize; /* process stack size in bytes */
+ uint_t p_stkpageszc; /* preferred stack max page size code */
+ uintptr_t p_stkg_start; /* start of stack guard */
+ uintptr_t p_stkg_end; /* end of stack guard */
/*
* Microstate accounting, resource usage, and real-time profiling
diff --git a/usr/src/uts/common/vm/seg.h b/usr/src/uts/common/vm/seg.h
index be1c9514e9..9dde7028c4 100644
--- a/usr/src/uts/common/vm/seg.h
+++ b/usr/src/uts/common/vm/seg.h
@@ -21,7 +21,7 @@
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
- * Copyright (c) 2015, Joyent, Inc.
+ * Copyright 2017 Joyent, Inc.
*/
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
@@ -113,6 +113,7 @@ typedef struct seg {
} seg_t;
#define S_PURGE (0x01) /* seg should be purged in as_gap() */
+#define S_HOLE (0x02) /* seg represents hole in AS */
struct seg_ops {
int (*dup)(struct seg *, struct seg *);
diff --git a/usr/src/uts/common/vm/seg_hole.c b/usr/src/uts/common/vm/seg_hole.c
new file mode 100644
index 0000000000..a716c270cf
--- /dev/null
+++ b/usr/src/uts/common/vm/seg_hole.c
@@ -0,0 +1,305 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2017 Joyent, Inc.
+ */
+
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/errno.h>
+#include <sys/cred.h>
+#include <sys/kmem.h>
+#include <sys/lgrp.h>
+#include <sys/mman.h>
+
+#include <vm/hat.h>
+#include <vm/as.h>
+#include <vm/seg.h>
+#include <vm/seg_hole.h>
+
+
+static int seghole_dup(struct seg *, struct seg *);
+static int seghole_unmap(struct seg *, caddr_t, size_t);
+static void seghole_free(struct seg *);
+static faultcode_t seghole_fault(struct hat *, struct seg *, caddr_t, size_t,
+ enum fault_type, enum seg_rw);
+static faultcode_t seghole_faulta(struct seg *, caddr_t);
+static int seghole_setprot(struct seg *, caddr_t, size_t, uint_t);
+static int seghole_checkprot(struct seg *, caddr_t, size_t, uint_t);
+static int seghole_sync(struct seg *, caddr_t, size_t, int, uint_t);
+static size_t seghole_incore(struct seg *, caddr_t, size_t, char *);
+static int seghole_lockop(struct seg *, caddr_t, size_t, int, int, ulong_t *,
+ size_t);
+static int seghole_getprot(struct seg *, caddr_t, size_t, uint_t *);
+static u_offset_t seghole_getoffset(struct seg *, caddr_t);
+static int seghole_gettype(struct seg *, caddr_t);
+static int seghole_getvp(struct seg *, caddr_t, struct vnode **);
+static int seghole_advise(struct seg *, caddr_t, size_t, uint_t);
+static void seghole_dump(struct seg *);
+static int seghole_pagelock(struct seg *, caddr_t, size_t, struct page ***,
+ enum lock_type, enum seg_rw);
+static int seghole_setpagesize(struct seg *, caddr_t, size_t, uint_t);
+static int seghole_capable(struct seg *, segcapability_t);
+
+static struct seg_ops seghole_ops = {
+ seghole_dup,
+ seghole_unmap,
+ seghole_free,
+ seghole_fault,
+ seghole_faulta,
+ seghole_setprot,
+ seghole_checkprot,
+ NULL, /* kluster: disabled */
+ NULL, /* swapout: disabled */
+ seghole_sync,
+ seghole_incore,
+ seghole_lockop,
+ seghole_getprot,
+ seghole_getoffset,
+ seghole_gettype,
+ seghole_getvp,
+ seghole_advise,
+ seghole_dump,
+ seghole_pagelock,
+ seghole_setpagesize,
+ NULL, /* getmemid: disabled */
+ NULL, /* getpolicy: disabled */
+ seghole_capable,
+ seg_inherit_notsup
+};
+
+/*
+ * Create a hole in the AS.
+ */
+int
+seghole_create(struct seg *seg, void *argsp)
+{
+ seghole_crargs_t *crargs = argsp;
+ seghole_data_t *data;
+
+ data = kmem_alloc(sizeof (seghole_data_t), KM_SLEEP);
+ data->shd_name = crargs->name;
+
+ seg->s_ops = &seghole_ops;
+ seg->s_data = data;
+ seg->s_flags = S_HOLE;
+
+ return (0);
+}
+
+static int
+seghole_dup(struct seg *seg, struct seg *newseg)
+{
+ seghole_data_t *shd = (seghole_data_t *)seg->s_data;
+ seghole_data_t *newshd;
+
+ ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as));
+
+ newshd = kmem_zalloc(sizeof (seghole_data_t), KM_SLEEP);
+ newshd->shd_name = shd->shd_name;
+
+ newseg->s_ops = seg->s_ops;
+ newseg->s_data = newshd;
+ newseg->s_flags = S_HOLE;
+
+ return (0);
+}
+
+static int
+seghole_unmap(struct seg *seg, caddr_t addr, size_t len)
+{
+ seghole_data_t *sud = (seghole_data_t *)seg->s_data;
+
+ ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as));
+
+ /* Entire segment is being unmapped */
+ if (addr == seg->s_base && len == seg->s_size) {
+ seg_free(seg);
+ return (0);
+ }
+
+ /* Shrinking from low address side */
+ if (addr == seg->s_base) {
+ seg->s_base += len;
+ seg->s_size -= len;
+ return (0);
+ }
+
+ /* Shrinking from high address side */
+ if ((addr + len) == (seg->s_base + seg->s_size)) {
+ seg->s_size -= len;
+ return (0);
+ }
+
+ /* Do not tolerate splitting the segment */
+ return (EINVAL);
+}
+
+static void
+seghole_free(struct seg *seg)
+{
+ seghole_data_t *data = (seghole_data_t *)seg->s_data;
+
+ ASSERT(data != NULL);
+
+ kmem_free(data, sizeof (*data));
+ seg->s_data = NULL;
+}
+
+/* ARGSUSED */
+static faultcode_t
+seghole_fault(struct hat *hat, struct seg *seg, caddr_t addr, size_t len,
+ enum fault_type type, enum seg_rw tw)
+{
+ ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
+
+ return (FC_NOMAP);
+}
+
+/* ARGSUSED */
+static faultcode_t
+seghole_faulta(struct seg *seg, caddr_t addr)
+{
+ return (FC_NOMAP);
+}
+
+/* ARGSUSED */
+static int
+seghole_setprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot)
+{
+ ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
+
+ return (ENOMEM);
+}
+
+/* ARGSUSED */
+static int
+seghole_checkprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot)
+{
+ ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
+
+ return (ENOMEM);
+}
+
+/* ARGSUSED */
+static int
+seghole_sync(struct seg *seg, caddr_t addr, size_t len, int attr, uint_t flags)
+{
+ /* Always succeed since there are no backing store to sync */
+ return (0);
+}
+
+/* ARGSUSED */
+static size_t
+seghole_incore(struct seg *seg, caddr_t addr, size_t len, char *vec)
+{
+ ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
+
+ return (0);
+}
+
+/* ARGSUSED */
+static int
+seghole_lockop(struct seg *seg, caddr_t addr, size_t len, int attr, int op,
+ ulong_t *lockmap, size_t pos)
+{
+ /*
+ * Emit an error consistent with there being no segment in this hole in
+ * the AS. The MC_LOCKAS and MC_UNLOCKAS commands will explicitly skip
+ * hole segments, allowing such operations to proceed as expected.
+ */
+ return (ENOMEM);
+}
+
+static int
+seghole_getprot(struct seg *seg, caddr_t addr, size_t len, uint_t *protv)
+{
+ size_t pgno;
+
+ ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
+
+ /*
+ * Few SEGOP_GETPROT callers actually check for an error, so it's
+ * necessary to report zeroed protection for the length of the request.
+ */
+ pgno = seg_page(seg, addr + len) - seg_page(seg, addr) + 1;
+ while (pgno > 0) {
+ protv[--pgno] = 0;
+ }
+
+ return (ENOMEM);
+}
+
+/* ARGSUSED */
+static u_offset_t
+seghole_getoffset(struct seg *seg, caddr_t addr)
+{
+ /*
+ * To avoid leaking information about the layout of the kernel address
+ * space, always report '0' as the offset.
+ */
+ return (0);
+}
+
+/* ARGSUSED */
+static int
+seghole_gettype(struct seg *seg, caddr_t addr)
+{
+ return (MAP_PRIVATE);
+}
+
+/* ARGSUSED */
+static int
+seghole_getvp(struct seg *seg, caddr_t addr, struct vnode **vpp)
+{
+ ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
+
+ return (ENOMEM);
+}
+
+/* ARGSUSED */
+static int
+seghole_advise(struct seg *seg, caddr_t addr, size_t len, uint_t behav)
+{
+ return (ENOMEM);
+}
+
+/* ARGSUSED */
+static void
+seghole_dump(struct seg *seg)
+{
+ /* There's nothing to dump from a hole in the AS */
+}
+
+/* ARGSUSED */
+static int
+seghole_pagelock(struct seg *seg, caddr_t addr, size_t len, struct page ***ppp,
+ enum lock_type type, enum seg_rw rw)
+{
+ return (EFAULT);
+}
+
+/* ARGSUSED */
+static int
+seghole_setpagesize(struct seg *seg, caddr_t addr, size_t len, uint_t szc)
+{
+ return (ENOMEM);
+}
+
+/* ARGSUSED */
+static int
+seghole_capable(struct seg *seg, segcapability_t capability)
+{
+ /* no special capablities */
+ return (0);
+}
diff --git a/usr/src/uts/common/vm/seg_hole.h b/usr/src/uts/common/vm/seg_hole.h
new file mode 100644
index 0000000000..fb48a057e0
--- /dev/null
+++ b/usr/src/uts/common/vm/seg_hole.h
@@ -0,0 +1,40 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2017 Joyent, Inc.
+ */
+
+#ifndef _VM_SEG_HOLE_H
+#define _VM_SEG_HOLE_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct seghole_crargs {
+ const char *name;
+} seghole_crargs_t;
+
+typedef struct seghole_data {
+ const char *shd_name;
+} seghole_data_t;
+
+extern int seghole_create(struct seg *, void *);
+
+#define AS_MAP_CHECK_SEGHOLE(crfp) \
+ ((crfp) == (int (*)())seghole_create)
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _VM_SEG_HOLE_H */
diff --git a/usr/src/uts/common/vm/vm_as.c b/usr/src/uts/common/vm/vm_as.c
index 0becd0f81c..502fb8f386 100644
--- a/usr/src/uts/common/vm/vm_as.c
+++ b/usr/src/uts/common/vm/vm_as.c
@@ -21,7 +21,7 @@
/*
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
- * Copyright 2016 Joyent, Inc.
+ * Copyright 2017 Joyent, Inc.
* Copyright (c) 2016 by Delphix. All rights reserved.
*/
@@ -68,6 +68,7 @@
#include <vm/seg_kmem.h>
#include <vm/seg_map.h>
#include <vm/seg_spt.h>
+#include <vm/seg_hole.h>
#include <vm/page.h>
clock_t deadlk_wait = 1; /* number of ticks to wait before retrying */
@@ -819,7 +820,9 @@ as_dup(struct as *as, struct proc *forkedproc)
as_free(newas);
return (error);
}
- newas->a_size += seg->s_size;
+ if ((newseg->s_flags & S_HOLE) == 0) {
+ newas->a_size += seg->s_size;
+ }
}
newas->a_resvsize = as->a_resvsize - purgesize;
@@ -1330,6 +1333,8 @@ top:
as_clearwatchprot(as, raddr, eaddr - raddr);
for (seg = as_findseg(as, raddr, 0); seg != NULL; seg = seg_next) {
+ const boolean_t is_hole = ((seg->s_flags & S_HOLE) != 0);
+
if (eaddr <= seg->s_base)
break; /* eaddr was in a gap; all done */
@@ -1434,9 +1439,11 @@ retry:
return (-1);
}
- as->a_size -= ssize;
- if (rsize)
- as->a_resvsize -= rsize;
+ if (!is_hole) {
+ as->a_size -= ssize;
+ if (rsize)
+ as->a_resvsize -= rsize;
+ }
raddr += ssize;
}
AS_LOCK_EXIT(as);
@@ -1686,6 +1693,7 @@ as_map_locked(struct as *as, caddr_t addr, size_t size, int (*crfp)(),
size_t rsize; /* rounded up size */
int error;
int unmap = 0;
+ boolean_t is_hole = B_FALSE;
/*
* The use of a_proc is preferred to handle the case where curproc is
* a door_call server and is allocating memory in the client's (a_proc)
@@ -1712,7 +1720,14 @@ as_map_locked(struct as *as, caddr_t addr, size_t size, int (*crfp)(),
gethrestime(&as->a_updatetime);
if (as != &kas) {
- if (as->a_size + rsize > (size_t)p->p_vmem_ctl) {
+ /*
+ * Ensure that the virtual size of the process will not exceed
+ * the configured limit. Since seg_hole segments will later
+ * set the S_HOLE flag indicating their status as a hole in the
+ * AS, they are excluded from this check.
+ */
+ if (as->a_size + rsize > (size_t)p->p_vmem_ctl &&
+ !AS_MAP_CHECK_SEGHOLE(crfp)) {
AS_LOCK_EXIT(as);
(void) rctl_action(rctlproc_legacy[RLIMIT_VMEM],
@@ -1770,19 +1785,24 @@ as_map_locked(struct as *as, caddr_t addr, size_t size, int (*crfp)(),
}
/*
* Add size now so as_unmap will work if as_ctl fails.
+ * Not applicable to explicit hole segments.
*/
- as->a_size += rsize;
- as->a_resvsize += rsize;
+ if ((seg->s_flags & S_HOLE) == 0) {
+ as->a_size += rsize;
+ as->a_resvsize += rsize;
+ } else {
+ is_hole = B_TRUE;
+ }
}
as_setwatch(as);
/*
- * If the address space is locked,
- * establish memory locks for the new segment.
+ * Establish memory locks for the segment if the address space is
+ * locked, provided it's not an explicit hole in the AS.
*/
mutex_enter(&as->a_contents);
- if (AS_ISPGLCK(as)) {
+ if (AS_ISPGLCK(as) && !is_hole) {
mutex_exit(&as->a_contents);
AS_LOCK_EXIT(as);
error = as_ctl(as, addr, size, MC_LOCK, 0, 0, NULL, 0);
@@ -2310,6 +2330,9 @@ retry:
}
for (seg = AS_SEGFIRST(as); seg; seg = AS_SEGNEXT(as, seg)) {
+ if (seg->s_flags & S_HOLE != 0) {
+ continue;
+ }
error = SEGOP_LOCKOP(seg, seg->s_base,
seg->s_size, attr, MC_LOCK, mlock_map, pos);
if (error != 0)
@@ -2339,6 +2362,9 @@ retry:
mutex_exit(&as->a_contents);
for (seg = AS_SEGFIRST(as); seg; seg = AS_SEGNEXT(as, seg)) {
+ if (seg->s_flags & S_HOLE != 0) {
+ continue;
+ }
error = SEGOP_LOCKOP(seg, seg->s_base,
seg->s_size, attr, MC_UNLOCK, NULL, 0);
if (error != 0)
diff --git a/usr/src/uts/i86pc/vm/vm_machdep.c b/usr/src/uts/i86pc/vm/vm_machdep.c
index 0625e37bbf..152a717ad0 100644
--- a/usr/src/uts/i86pc/vm/vm_machdep.c
+++ b/usr/src/uts/i86pc/vm/vm_machdep.c
@@ -24,7 +24,7 @@
/*
* Copyright (c) 2010, Intel Corporation.
* All rights reserved.
- * Copyright 2016 Joyent, Inc.
+ * Copyright 2017 Joyent, Inc.
*/
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
@@ -689,9 +689,6 @@ map_addr_proc(
base = p->p_brkbase;
#if defined(__amd64)
- /*
- * XX64 Yes, this needs more work.
- */
if (p->p_model == DATAMODEL_NATIVE) {
if (userlimit < as->a_userlimit) {
/*
@@ -711,16 +708,24 @@ map_addr_proc(
}
} else {
/*
- * XX64 This layout is probably wrong .. but in
- * the event we make the amd64 address space look
- * like sparcv9 i.e. with the stack -above- the
- * heap, this bit of code might even be correct.
+ * With the stack positioned at a higher address than
+ * the heap for 64-bit processes, it is necessary to be
+ * mindful of its location and potential size.
+ *
+ * Unallocated space above the top of the stack (that
+ * is, at a lower address) but still within the bounds
+ * of the stack limit should be considered unavailable.
+ *
+ * As the 64-bit stack guard is mapped in immediately
+ * adjacent to the stack limit boundary, this prevents
+ * new mappings from having accidentally dangerous
+ * proximity to the stack.
*/
slen = p->p_usrstack - base -
((p->p_stk_ctl + PAGEOFFSET) & PAGEMASK);
}
} else
-#endif
+#endif /* defined(__amd64) */
slen = userlimit - base;
/* Make len be a multiple of PAGESIZE */