diff options
Diffstat (limited to 'usr/src')
25 files changed, 721 insertions, 122 deletions
diff --git a/usr/src/cmd/mdb/common/modules/genunix/memory.c b/usr/src/cmd/mdb/common/modules/genunix/memory.c index 34e746f36c..fa4918b9b8 100644 --- a/usr/src/cmd/mdb/common/modules/genunix/memory.c +++ b/usr/src/cmd/mdb/common/modules/genunix/memory.c @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright 2015 Joyent, Inc. + * Copyright 2017 Joyent, Inc. */ #include <mdb/mdb_param.h> @@ -40,6 +40,7 @@ #include <sys/vnode.h> #include <vm/seg_map.h> #include <vm/seg_vn.h> +#include <vm/seg_hole.h> #if defined(__i386) || defined(__amd64) #include <sys/balloon_impl.h> #endif @@ -975,6 +976,11 @@ seg(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) return (DCMD_OK); } +typedef struct pmap_walk_types { + uintptr_t pwt_segvn; + uintptr_t pwt_seghole; +} pmap_walk_types_t; + /*ARGSUSED*/ static int pmap_walk_count_pages(uintptr_t addr, const void *data, void *out) @@ -987,12 +993,14 @@ pmap_walk_count_pages(uintptr_t addr, const void *data, void *out) } static int -pmap_walk_seg(uintptr_t addr, const struct seg *seg, uintptr_t segvn) +pmap_walk_seg(uintptr_t addr, const struct seg *seg, + const pmap_walk_types_t *types) { + const uintptr_t ops = (uintptr_t)seg->s_ops; mdb_printf("%0?p %0?p %7dk", addr, seg->s_base, seg->s_size / 1024); - if (segvn == (uintptr_t)seg->s_ops && seg->s_data != NULL) { + if (ops == types->pwt_segvn && seg->s_data != NULL) { struct segvn_data svn; pgcnt_t nres = 0; @@ -1018,6 +1026,18 @@ pmap_walk_seg(uintptr_t addr, const struct seg *seg, uintptr_t segvn) } else { mdb_printf(" [ anon ]"); } + } else if (ops == types->pwt_seghole && seg->s_data != NULL) { + seghole_data_t shd; + char name[16]; + + (void) mdb_vread(&shd, sizeof (shd), (uintptr_t)seg->s_data); + if (shd.shd_name == NULL || mdb_readstr(name, sizeof (name), + (uintptr_t)shd.shd_name) == 0) { + name[0] = '\0'; + } + + mdb_printf(" %8s [ hole%s%s ]", "-", + name[0] == '0' ? "" : ":", name); } else { mdb_printf(" %8s [ &%a ]", "?", seg->s_ops); } @@ -1027,11 +1047,14 @@ pmap_walk_seg(uintptr_t addr, const struct seg *seg, uintptr_t segvn) } static int -pmap_walk_seg_quick(uintptr_t addr, const struct seg *seg, uintptr_t segvn) +pmap_walk_seg_quick(uintptr_t addr, const struct seg *seg, + const pmap_walk_types_t *types) { + const uintptr_t ops = (uintptr_t)seg->s_ops; + mdb_printf("%0?p %0?p %7dk", addr, seg->s_base, seg->s_size / 1024); - if (segvn == (uintptr_t)seg->s_ops && seg->s_data != NULL) { + if (ops == types->pwt_segvn && seg->s_data != NULL) { struct segvn_data svn; svn.vp = NULL; @@ -1054,10 +1077,10 @@ pmap_walk_seg_quick(uintptr_t addr, const struct seg *seg, uintptr_t segvn) int pmap(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) { - uintptr_t segvn; proc_t proc; uint_t quick = FALSE; mdb_walk_cb_t cb = (mdb_walk_cb_t)pmap_walk_seg; + pmap_walk_types_t wtypes = { 0 }; GElf_Sym sym; @@ -1074,9 +1097,9 @@ pmap(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) } if (mdb_lookup_by_name("segvn_ops", &sym) == 0) - segvn = (uintptr_t)sym.st_value; - else - segvn = NULL; + wtypes.pwt_segvn = (uintptr_t)sym.st_value; + if (mdb_lookup_by_name("seghole_ops", &sym) == 0) + wtypes.pwt_seghole = (uintptr_t)sym.st_value; mdb_printf("%?s %?s %8s ", "SEG", "BASE", "SIZE"); @@ -1087,7 +1110,7 @@ pmap(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) mdb_printf("%8s %s\n", "RES", "PATH"); } - if (mdb_pwalk("seg", cb, (void *)segvn, (uintptr_t)proc.p_as) == -1) { + if (mdb_pwalk("seg", cb, (void *)&wtypes, (uintptr_t)proc.p_as) == -1) { mdb_warn("failed to walk segments of as %p", proc.p_as); return (DCMD_ERR); } diff --git a/usr/src/uts/common/Makefile.files b/usr/src/uts/common/Makefile.files index 0071fb9b35..f06d10c527 100644 --- a/usr/src/uts/common/Makefile.files +++ b/usr/src/uts/common/Makefile.files @@ -303,6 +303,7 @@ GENUNIX_OBJS += \ sctp_crc32.o \ secflags.o \ seg_dev.o \ + seg_hole.o \ seg_kp.o \ seg_kpm.o \ seg_map.o \ diff --git a/usr/src/uts/common/exec/elf/elf.c b/usr/src/uts/common/exec/elf/elf.c index 141baa4aeb..d3cc0b8f0d 100644 --- a/usr/src/uts/common/exec/elf/elf.c +++ b/usr/src/uts/common/exec/elf/elf.c @@ -2075,6 +2075,10 @@ top: void *tmp = NULL; extern struct seg_ops segspt_shmops; + if ((seg->s_flags & S_HOLE) != 0) { + continue; + } + for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) { uint_t prot; size_t size; diff --git a/usr/src/uts/common/fs/proc/prioctl.c b/usr/src/uts/common/fs/proc/prioctl.c index f1f39ed3be..3ed40c5d96 100644 --- a/usr/src/uts/common/fs/proc/prioctl.c +++ b/usr/src/uts/common/fs/proc/prioctl.c @@ -22,6 +22,7 @@ /* * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * Copyright 2017 Joyent, Inc. */ /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ @@ -3542,6 +3543,10 @@ oprgetmap(proc_t *p, list_t *iolhead) caddr_t saddr, naddr; void *tmp = NULL; + if ((seg->s_flags & S_HOLE) != 0) { + continue; + } + for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) { prot = pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr); if (saddr == naddr) @@ -3602,6 +3607,10 @@ oprgetmap32(proc_t *p, list_t *iolhead) caddr_t saddr, naddr; void *tmp = NULL; + if ((seg->s_flags & S_HOLE) != 0) { + continue; + } + for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) { prot = pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr); if (saddr == naddr) @@ -3655,6 +3664,10 @@ oprpdsize(struct as *as) void *tmp = NULL; size_t npage; + if ((seg->s_flags & S_HOLE) != 0) { + continue; + } + for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) { (void) pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr); if ((npage = (naddr - saddr) / PAGESIZE) != 0) @@ -3685,6 +3698,10 @@ oprpdsize32(struct as *as) void *tmp = NULL; size_t npage; + if ((seg->s_flags & S_HOLE) != 0) { + continue; + } + for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) { (void) pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr); if ((npage = (naddr - saddr) / PAGESIZE) != 0) @@ -3735,6 +3752,10 @@ again: caddr_t saddr, naddr; void *tmp = NULL; + if ((seg->s_flags & S_HOLE) != 0) { + continue; + } + for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) { size_t len; size_t npage; @@ -3842,6 +3863,10 @@ again: caddr_t saddr, naddr; void *tmp = NULL; + if ((seg->s_flags & S_HOLE) != 0) { + continue; + } + for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) { size_t len; size_t npage; diff --git a/usr/src/uts/common/fs/proc/prsubr.c b/usr/src/uts/common/fs/proc/prsubr.c index 4781fbaa77..a2ab06d769 100644 --- a/usr/src/uts/common/fs/proc/prsubr.c +++ b/usr/src/uts/common/fs/proc/prsubr.c @@ -1383,6 +1383,10 @@ prnsegs(struct as *as, int reserved) caddr_t saddr, naddr; void *tmp = NULL; + if ((seg->s_flags & S_HOLE) != 0) { + continue; + } + for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) { (void) pr_getprot(seg, reserved, &tmp, &saddr, &naddr, eaddr); @@ -1638,6 +1642,10 @@ prgetmap(proc_t *p, int reserved, list_t *iolhead) caddr_t saddr, naddr; void *tmp = NULL; + if ((seg->s_flags & S_HOLE) != 0) { + continue; + } + for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) { prot = pr_getprot(seg, reserved, &tmp, &saddr, &naddr, eaddr); @@ -1749,6 +1757,10 @@ prgetmap32(proc_t *p, int reserved, list_t *iolhead) caddr_t saddr, naddr; void *tmp = NULL; + if ((seg->s_flags & S_HOLE) != 0) { + continue; + } + for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) { prot = pr_getprot(seg, reserved, &tmp, &saddr, &naddr, eaddr); @@ -1852,6 +1864,10 @@ prpdsize(struct as *as) void *tmp = NULL; size_t npage; + if ((seg->s_flags & S_HOLE) != 0) { + continue; + } + for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) { (void) pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr); if ((npage = (naddr - saddr) / PAGESIZE) != 0) @@ -1882,6 +1898,10 @@ prpdsize32(struct as *as) void *tmp = NULL; size_t npage; + if ((seg->s_flags & S_HOLE) != 0) { + continue; + } + for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) { (void) pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr); if ((npage = (naddr - saddr) / PAGESIZE) != 0) @@ -1933,6 +1953,10 @@ again: caddr_t saddr, naddr; void *tmp = NULL; + if ((seg->s_flags & S_HOLE) != 0) { + continue; + } + for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) { struct vnode *vp; struct vattr vattr; @@ -2080,6 +2104,10 @@ again: caddr_t saddr, naddr; void *tmp = NULL; + if ((seg->s_flags & S_HOLE) != 0) { + continue; + } + for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) { struct vnode *vp; struct vattr vattr; @@ -4032,6 +4060,9 @@ prgetxmap(proc_t *p, list_t *iolhead) uint64_t npages; uint64_t pagenum; + if ((seg->s_flags & S_HOLE) != 0) { + continue; + } /* * Segment loop part one: iterate from the base of the segment * to its end, pausing at each address boundary (baddr) between @@ -4228,6 +4259,10 @@ prgetxmap32(proc_t *p, list_t *iolhead) uint64_t npages; uint64_t pagenum; + if ((seg->s_flags & S_HOLE) != 0) { + continue; + } + /* * Segment loop part one: iterate from the base of the segment * to its end, pausing at each address boundary (baddr) between diff --git a/usr/src/uts/common/fs/ufs/ufs_vnops.c b/usr/src/uts/common/fs/ufs/ufs_vnops.c index 801e6f26fc..91efa776cc 100644 --- a/usr/src/uts/common/fs/ufs/ufs_vnops.c +++ b/usr/src/uts/common/fs/ufs/ufs_vnops.c @@ -21,7 +21,7 @@ /* * Copyright (c) 1984, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright 2017 Joyent, Inc. + * Copyright 2018 Joyent, Inc. * Copyright (c) 2016 by Delphix. All rights reserved. */ @@ -183,7 +183,6 @@ static int ufs_setsecattr(struct vnode *, vsecattr_t *, int, struct cred *, static int ufs_priv_access(void *, int, struct cred *); static int ufs_eventlookup(struct vnode *, char *, struct cred *, struct vnode **); -extern int as_map_locked(struct as *, caddr_t, size_t, int ((*)()), void *); /* * For lockfs: ulockfs begin/end is now inlined in the ufs_xxx functions. diff --git a/usr/src/uts/common/os/exec.c b/usr/src/uts/common/os/exec.c index 0065b4945b..53c552f135 100644 --- a/usr/src/uts/common/os/exec.c +++ b/usr/src/uts/common/os/exec.c @@ -26,7 +26,7 @@ /* Copyright (c) 1988 AT&T */ /* All Rights Reserved */ /* - * Copyright 2016 Joyent, Inc. + * Copyright 2017 Joyent, Inc. */ #include <sys/types.h> @@ -78,6 +78,7 @@ #include <vm/as.h> #include <vm/seg.h> #include <vm/seg_vn.h> +#include <vm/seg_hole.h> #define PRIV_RESET 0x01 /* needs to reset privs */ #define PRIV_SETID 0x02 /* needs to change uids */ @@ -115,6 +116,14 @@ size_t aslr_max_brk_skew = 16 * 1024 * 1024; /* 16MB */ size_t aslr_max_stack_skew = 64 * 1024; /* 64KB */ /* + * Size of guard segment for 64-bit processes and minimum size it can be shrunk + * to in the case of grow() operations. These are kept as variables in case + * they need to be tuned in an emergency. + */ +size_t stack_guard_seg_sz = 256 * 1024 * 1024; +size_t stack_guard_min_sz = 64 * 1024 * 1024; + +/* * exece() - system call wrapper around exec_common() */ int @@ -1858,6 +1867,15 @@ exec_get_spslew(void) * The initial user stack layout is as follows: * * User Stack + * +---------------+ + * | | + * | stack guard | + * | (64-bit only) | + * | | + * +...............+ <--- stack limit (base - curproc->p_stk_ctl) + * . . + * . . + * . . * +---------------+ <--- curproc->p_usrstack * | | * | slew | @@ -1899,6 +1917,11 @@ exec_get_spslew(void) * +---------------+ <--- argv[] * | argc | * +---------------+ <--- stack base + * + * In 64-bit processes, a stack guard segment is allocated at the address + * immediately below where the stack limit ends. This protects new library + * mappings (such as the linker) from being placed in relatively dangerous + * proximity to the stack. */ int exec_args(execa_t *uap, uarg_t *args, intpdata_t *intp, void **auxvpp) @@ -1912,6 +1935,9 @@ exec_args(execa_t *uap, uarg_t *args, intpdata_t *intp, void **auxvpp) struct as *as; extern int use_stk_lpg; size_t sp_slew; +#if defined(_LP64) + const size_t sg_sz = (stack_guard_seg_sz & PAGEMASK); +#endif /* defined(_LP64) */ args->from_model = p->p_model; if (p->p_model == DATAMODEL_NATIVE) { @@ -2060,6 +2086,8 @@ exec_args(execa_t *uap, uarg_t *args, intpdata_t *intp, void **auxvpp) p->p_brkpageszc = 0; p->p_stksize = 0; p->p_stkpageszc = 0; + p->p_stkg_start = 0; + p->p_stkg_end = 0; p->p_model = args->to_model; p->p_usrstack = usrstack; p->p_stkprot = args->stk_prot; @@ -2097,10 +2125,36 @@ exec_args(execa_t *uap, uarg_t *args, intpdata_t *intp, void **auxvpp) (void) hat_setup(as->a_hat, HAT_ALLOC); hat_join_srd(as->a_hat, args->ex_vp); - /* - * Finally, write out the contents of the new stack. - */ + /* Write out the contents of the new stack. */ error = stk_copyout(args, usrstack - sp_slew, auxvpp, up); kmem_free(args->stk_base, args->stk_size); + +#if defined(_LP64) + /* Add stack guard segment (if needed) after successful copyout */ + if (error == 0 && p->p_model == DATAMODEL_LP64 && sg_sz != 0) { + seghole_crargs_t sca; + caddr_t addr_end = (caddr_t)(((uintptr_t)usrstack - + p->p_stk_ctl) & PAGEMASK); + caddr_t addr_start = addr_end - sg_sz; + + DTRACE_PROBE4(stack__guard__chk, proc_t *, p, + caddr_t, addr_start, caddr_t, addr_end, size_t, sg_sz); + + if (addr_end >= usrstack || addr_start >= addr_end || + valid_usr_range(addr_start, sg_sz, PROT_NONE, as, + as->a_userlimit) != RANGE_OKAY) { + return (E2BIG); + } + + /* Create un-mappable area in AS with seg_hole */ + sca.name = "stack_guard"; + error = as_map(as, addr_start, sg_sz, seghole_create, &sca); + if (error == 0) { + p->p_stkg_start = (uintptr_t)addr_start; + p->p_stkg_end = (uintptr_t)addr_start + sg_sz; + } + } +#endif /* defined(_LP64) */ + return (error); } diff --git a/usr/src/uts/common/os/grow.c b/usr/src/uts/common/os/grow.c index 647bca2542..de2a4f26c4 100644 --- a/usr/src/uts/common/os/grow.c +++ b/usr/src/uts/common/os/grow.c @@ -19,7 +19,10 @@ * CDDL HEADER END */ -/* Copyright 2013 OmniTI Computer Consulting, Inc. All rights reserved. */ +/* + * Copyright 2013 OmniTI Computer Consulting, Inc. All rights reserved. + * Copyright 2017 Joyent, Inc. + */ /* * Copyright 2009 Sun Microsystems, Inc. All rights reserved. @@ -329,9 +332,10 @@ grow(caddr_t sp) } else { err = grow_internal(sp, p->p_stkpageszc); } + newsize = p->p_stksize; as_rangeunlock(as); - if (err == 0 && (newsize = p->p_stksize) > oldsize) { + if (err == 0 && newsize > oldsize) { ASSERT(IS_P2ALIGNED(oldsize, PAGESIZE)); ASSERT(IS_P2ALIGNED(newsize, PAGESIZE)); /* @@ -424,6 +428,7 @@ grow_internal(caddr_t sp, uint_t growszc) struct proc *p = curproc; size_t newsize; size_t oldsize; + uintptr_t new_start; int error; size_t pgsz; uint_t szc; @@ -494,7 +499,32 @@ grow_internal(caddr_t sp, uint_t growszc) } crargs.lgrp_mem_policy_flags = LGRP_MP_FLAG_EXTEND_DOWN; - if ((error = as_map(p->p_as, p->p_usrstack - newsize, newsize - oldsize, + /* + * The stack is about to grow into its guard. This can be acceptable + * if the size restriction on the stack has been expanded since its + * initialization during exec(). In such cases, the guard segment will + * be shrunk, provided the new size is reasonable. + */ + new_start = (uintptr_t)p->p_usrstack - newsize; + if (p->p_stkg_start != 0 && new_start > p->p_stkg_start && + new_start < p->p_stkg_end) { + const size_t unmap_sz = p->p_stkg_end - new_start; + const size_t remain_sz = new_start - p->p_stkg_start; + extern size_t stack_guard_min_sz; + + /* Do not allow the guard to shrink below minimum size */ + if (remain_sz < stack_guard_min_sz) { + return (ENOMEM); + } + + error = as_unmap(p->p_as, (caddr_t)new_start, unmap_sz); + if (error != 0) { + return (error); + } + p->p_stkg_end -= unmap_sz; + } + + if ((error = as_map(p->p_as, (caddr_t)new_start, newsize - oldsize, segvn_create, &crargs)) != 0) { if (error == EAGAIN) { cmn_err(CE_WARN, "Sorry, no swap space to grow stack " diff --git a/usr/src/uts/common/sys/proc.h b/usr/src/uts/common/sys/proc.h index e1b1a2289f..712bd7cb24 100644 --- a/usr/src/uts/common/sys/proc.h +++ b/usr/src/uts/common/sys/proc.h @@ -251,8 +251,15 @@ typedef struct proc { kmutex_t p_maplock; /* lock for pr_mappage() */ struct proc *p_rlink; /* linked list for server */ kcondvar_t p_srwchan_cv; - size_t p_stksize; /* process stack size in bytes */ - uint_t p_stkpageszc; /* preferred stack max page size code */ + + /* + * Stack sizing and guard information. + * Generally protected by as_rangelock() + */ + size_t p_stksize; /* process stack size in bytes */ + uint_t p_stkpageszc; /* preferred stack max page size code */ + uintptr_t p_stkg_start; /* start of stack guard */ + uintptr_t p_stkg_end; /* end of stack guard */ /* * Microstate accounting, resource usage, and real-time profiling diff --git a/usr/src/uts/common/vm/as.h b/usr/src/uts/common/vm/as.h index e910db1ddc..83bd7b52ba 100644 --- a/usr/src/uts/common/vm/as.h +++ b/usr/src/uts/common/vm/as.h @@ -24,7 +24,7 @@ */ /* - * Copyright (c) 2013, Joyent, Inc. All rights reserved. + * Copyright 2018 Joyent, Inc. */ /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ @@ -256,6 +256,8 @@ extern struct as kas; /* kernel's address space */ #define AS_SEGNEXT(as, seg) AVL_NEXT(&(as)->a_segtree, (seg)) #define AS_SEGPREV(as, seg) AVL_PREV(&(as)->a_segtree, (seg)) +typedef int (*segcreate_func_t)(struct seg **, void *); + void as_init(void); void as_avlinit(struct as *); struct seg *as_segat(struct as *as, caddr_t addr); @@ -273,8 +275,10 @@ faultcode_t as_faulta(struct as *as, caddr_t addr, size_t size); int as_setprot(struct as *as, caddr_t addr, size_t size, uint_t prot); int as_checkprot(struct as *as, caddr_t addr, size_t size, uint_t prot); int as_unmap(struct as *as, caddr_t addr, size_t size); -int as_map(struct as *as, caddr_t addr, size_t size, int ((*crfp)()), - void *argsp); +int as_map(struct as *as, caddr_t addr, size_t size, segcreate_func_t crfp, + void *argsp); +int as_map_locked(struct as *as, caddr_t addr, size_t size, + segcreate_func_t crfp, void *argsp); void as_purge(struct as *as); int as_gap(struct as *as, size_t minlen, caddr_t *basep, size_t *lenp, uint_t flags, caddr_t addr); diff --git a/usr/src/uts/common/vm/seg.h b/usr/src/uts/common/vm/seg.h index be1c9514e9..9dde7028c4 100644 --- a/usr/src/uts/common/vm/seg.h +++ b/usr/src/uts/common/vm/seg.h @@ -21,7 +21,7 @@ /* * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. - * Copyright (c) 2015, Joyent, Inc. + * Copyright 2017 Joyent, Inc. */ /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ @@ -113,6 +113,7 @@ typedef struct seg { } seg_t; #define S_PURGE (0x01) /* seg should be purged in as_gap() */ +#define S_HOLE (0x02) /* seg represents hole in AS */ struct seg_ops { int (*dup)(struct seg *, struct seg *); diff --git a/usr/src/uts/common/vm/seg_dev.c b/usr/src/uts/common/vm/seg_dev.c index f43a288cec..89e6461bbf 100644 --- a/usr/src/uts/common/vm/seg_dev.c +++ b/usr/src/uts/common/vm/seg_dev.c @@ -22,6 +22,7 @@ /* * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * Copyright 2018 Joyent, Inc. */ /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ @@ -357,8 +358,9 @@ devmap_ctxto(void *data) * Create a device segment. */ int -segdev_create(struct seg *seg, void *argsp) +segdev_create(struct seg **segpp, void *argsp) { + struct seg *seg = *segpp; struct segdev_data *sdp; struct segdev_crargs *a = (struct segdev_crargs *)argsp; devmap_handle_t *dhp = (devmap_handle_t *)a->devmap_data; diff --git a/usr/src/uts/common/vm/seg_dev.h b/usr/src/uts/common/vm/seg_dev.h index 6240125489..07a15afc6b 100644 --- a/usr/src/uts/common/vm/seg_dev.h +++ b/usr/src/uts/common/vm/seg_dev.h @@ -21,6 +21,7 @@ /* * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * Copyright 2018 Joyent, Inc. */ /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ @@ -122,7 +123,7 @@ struct devmap_pmem_cookie { extern void segdev_init(void); -extern int segdev_create(struct seg *, void *); +extern int segdev_create(struct seg **, void *); extern int segdev_copyto(struct seg *, caddr_t, const void *, void *, size_t); extern int segdev_copyfrom(struct seg *, caddr_t, const void *, void *, size_t); diff --git a/usr/src/uts/common/vm/seg_hole.c b/usr/src/uts/common/vm/seg_hole.c new file mode 100644 index 0000000000..14b2153718 --- /dev/null +++ b/usr/src/uts/common/vm/seg_hole.c @@ -0,0 +1,304 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2018 Joyent, Inc. + */ + + +#include <sys/types.h> +#include <sys/param.h> +#include <sys/errno.h> +#include <sys/cred.h> +#include <sys/kmem.h> +#include <sys/lgrp.h> +#include <sys/mman.h> + +#include <vm/hat.h> +#include <vm/as.h> +#include <vm/seg.h> +#include <vm/seg_hole.h> + + +static int seghole_dup(struct seg *, struct seg *); +static int seghole_unmap(struct seg *, caddr_t, size_t); +static void seghole_free(struct seg *); +static faultcode_t seghole_fault(struct hat *, struct seg *, caddr_t, size_t, + enum fault_type, enum seg_rw); +static faultcode_t seghole_faulta(struct seg *, caddr_t); +static int seghole_setprot(struct seg *, caddr_t, size_t, uint_t); +static int seghole_checkprot(struct seg *, caddr_t, size_t, uint_t); +static int seghole_sync(struct seg *, caddr_t, size_t, int, uint_t); +static size_t seghole_incore(struct seg *, caddr_t, size_t, char *); +static int seghole_lockop(struct seg *, caddr_t, size_t, int, int, ulong_t *, + size_t); +static int seghole_getprot(struct seg *, caddr_t, size_t, uint_t *); +static u_offset_t seghole_getoffset(struct seg *, caddr_t); +static int seghole_gettype(struct seg *, caddr_t); +static int seghole_getvp(struct seg *, caddr_t, struct vnode **); +static int seghole_advise(struct seg *, caddr_t, size_t, uint_t); +static void seghole_dump(struct seg *); +static int seghole_pagelock(struct seg *, caddr_t, size_t, struct page ***, + enum lock_type, enum seg_rw); +static int seghole_setpagesize(struct seg *, caddr_t, size_t, uint_t); +static int seghole_capable(struct seg *, segcapability_t); + +static struct seg_ops seghole_ops = { + seghole_dup, + seghole_unmap, + seghole_free, + seghole_fault, + seghole_faulta, + seghole_setprot, + seghole_checkprot, + NULL, /* kluster: disabled */ + NULL, /* swapout: disabled */ + seghole_sync, + seghole_incore, + seghole_lockop, + seghole_getprot, + seghole_getoffset, + seghole_gettype, + seghole_getvp, + seghole_advise, + seghole_dump, + seghole_pagelock, + seghole_setpagesize, + NULL, /* getmemid: disabled */ + NULL, /* getpolicy: disabled */ + seghole_capable, + seg_inherit_notsup +}; + +/* + * Create a hole in the AS. + */ +int +seghole_create(struct seg **segpp, void *argsp) +{ + struct seg *seg = *segpp; + seghole_crargs_t *crargs = argsp; + seghole_data_t *data; + + data = kmem_alloc(sizeof (seghole_data_t), KM_SLEEP); + data->shd_name = crargs->name; + + seg->s_ops = &seghole_ops; + seg->s_data = data; + seg->s_flags = S_HOLE; + + return (0); +} + +static int +seghole_dup(struct seg *seg, struct seg *newseg) +{ + seghole_data_t *shd = (seghole_data_t *)seg->s_data; + seghole_data_t *newshd; + + ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as)); + + newshd = kmem_zalloc(sizeof (seghole_data_t), KM_SLEEP); + newshd->shd_name = shd->shd_name; + + newseg->s_ops = seg->s_ops; + newseg->s_data = newshd; + newseg->s_flags = S_HOLE; + + return (0); +} + +static int +seghole_unmap(struct seg *seg, caddr_t addr, size_t len) +{ + ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as)); + + /* Entire segment is being unmapped */ + if (addr == seg->s_base && len == seg->s_size) { + seg_free(seg); + return (0); + } + + /* Shrinking from low address side */ + if (addr == seg->s_base) { + seg->s_base += len; + seg->s_size -= len; + return (0); + } + + /* Shrinking from high address side */ + if ((addr + len) == (seg->s_base + seg->s_size)) { + seg->s_size -= len; + return (0); + } + + /* Do not tolerate splitting the segment */ + return (EINVAL); +} + +static void +seghole_free(struct seg *seg) +{ + seghole_data_t *data = (seghole_data_t *)seg->s_data; + + ASSERT(data != NULL); + + kmem_free(data, sizeof (*data)); + seg->s_data = NULL; +} + +/* ARGSUSED */ +static faultcode_t +seghole_fault(struct hat *hat, struct seg *seg, caddr_t addr, size_t len, + enum fault_type type, enum seg_rw tw) +{ + ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as)); + + return (FC_NOMAP); +} + +/* ARGSUSED */ +static faultcode_t +seghole_faulta(struct seg *seg, caddr_t addr) +{ + return (FC_NOMAP); +} + +/* ARGSUSED */ +static int +seghole_setprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot) +{ + ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as)); + + return (ENOMEM); +} + +/* ARGSUSED */ +static int +seghole_checkprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot) +{ + ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as)); + + return (ENOMEM); +} + +/* ARGSUSED */ +static int +seghole_sync(struct seg *seg, caddr_t addr, size_t len, int attr, uint_t flags) +{ + /* Always succeed since there are no backing store to sync */ + return (0); +} + +/* ARGSUSED */ +static size_t +seghole_incore(struct seg *seg, caddr_t addr, size_t len, char *vec) +{ + ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as)); + + return (0); +} + +/* ARGSUSED */ +static int +seghole_lockop(struct seg *seg, caddr_t addr, size_t len, int attr, int op, + ulong_t *lockmap, size_t pos) +{ + /* + * Emit an error consistent with there being no segment in this hole in + * the AS. The MC_LOCKAS and MC_UNLOCKAS commands will explicitly skip + * hole segments, allowing such operations to proceed as expected. + */ + return (ENOMEM); +} + +static int +seghole_getprot(struct seg *seg, caddr_t addr, size_t len, uint_t *protv) +{ + size_t pgno; + + ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as)); + + /* + * Few SEGOP_GETPROT callers actually check for an error, so it's + * necessary to report zeroed protection for the length of the request. + */ + pgno = seg_page(seg, addr + len) - seg_page(seg, addr) + 1; + while (pgno > 0) { + protv[--pgno] = 0; + } + + return (ENOMEM); +} + +/* ARGSUSED */ +static u_offset_t +seghole_getoffset(struct seg *seg, caddr_t addr) +{ + /* + * To avoid leaking information about the layout of the kernel address + * space, always report '0' as the offset. + */ + return (0); +} + +/* ARGSUSED */ +static int +seghole_gettype(struct seg *seg, caddr_t addr) +{ + return (MAP_PRIVATE); +} + +/* ARGSUSED */ +static int +seghole_getvp(struct seg *seg, caddr_t addr, struct vnode **vpp) +{ + ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as)); + + return (ENOMEM); +} + +/* ARGSUSED */ +static int +seghole_advise(struct seg *seg, caddr_t addr, size_t len, uint_t behav) +{ + return (ENOMEM); +} + +/* ARGSUSED */ +static void +seghole_dump(struct seg *seg) +{ + /* There's nothing to dump from a hole in the AS */ +} + +/* ARGSUSED */ +static int +seghole_pagelock(struct seg *seg, caddr_t addr, size_t len, struct page ***ppp, + enum lock_type type, enum seg_rw rw) +{ + return (EFAULT); +} + +/* ARGSUSED */ +static int +seghole_setpagesize(struct seg *seg, caddr_t addr, size_t len, uint_t szc) +{ + return (ENOMEM); +} + +/* ARGSUSED */ +static int +seghole_capable(struct seg *seg, segcapability_t capability) +{ + /* no special capablities */ + return (0); +} diff --git a/usr/src/uts/common/vm/seg_hole.h b/usr/src/uts/common/vm/seg_hole.h new file mode 100644 index 0000000000..2bff880f4f --- /dev/null +++ b/usr/src/uts/common/vm/seg_hole.h @@ -0,0 +1,40 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2018 Joyent, Inc. + */ + +#ifndef _VM_SEG_HOLE_H +#define _VM_SEG_HOLE_H + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct seghole_crargs { + const char *name; +} seghole_crargs_t; + +typedef struct seghole_data { + const char *shd_name; +} seghole_data_t; + +extern int seghole_create(struct seg **, void *); + +#define AS_MAP_CHECK_SEGHOLE(crfp) \ + ((crfp) == (segcreate_func_t)seghole_create) + +#ifdef __cplusplus +} +#endif + +#endif /* _VM_SEG_HOLE_H */ diff --git a/usr/src/uts/common/vm/seg_spt.c b/usr/src/uts/common/vm/seg_spt.c index b0f992b7a6..cc00e16333 100644 --- a/usr/src/uts/common/vm/seg_spt.c +++ b/usr/src/uts/common/vm/seg_spt.c @@ -20,7 +20,7 @@ */ /* * Copyright (c) 1993, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2015, Joyent, Inc. All rights reserved. + * Copyright 2018 Joyent, Inc. * Copyright (c) 2016 by Delphix. All rights reserved. */ @@ -72,7 +72,7 @@ size_t spt_used; */ pgcnt_t segspt_minfree = 0; -static int segspt_create(struct seg *seg, caddr_t argsp); +static int segspt_create(struct seg **segpp, void *argsp); static int segspt_unmap(struct seg *seg, caddr_t raddr, size_t ssize); static void segspt_free(struct seg *seg); static void segspt_free_pages(struct seg *seg, caddr_t addr, size_t len); @@ -369,8 +369,9 @@ segspt_unmap(struct seg *seg, caddr_t raddr, size_t ssize) } int -segspt_create(struct seg *seg, caddr_t argsp) +segspt_create(struct seg **segpp, void *argsp) { + struct seg *seg = *segpp; int err; caddr_t addr = seg->s_base; struct spt_data *sptd; @@ -1671,8 +1672,9 @@ softlock_decrement: } int -segspt_shmattach(struct seg *seg, caddr_t *argsp) +segspt_shmattach(struct seg **segpp, void *argsp) { + struct seg *seg = *segpp; struct shm_data *shmd_arg = (struct shm_data *)argsp; struct shm_data *shmd; struct anon_map *shm_amp = shmd_arg->shm_amp; diff --git a/usr/src/uts/common/vm/seg_spt.h b/usr/src/uts/common/vm/seg_spt.h index ebc2ebf465..d8cea0a4d0 100644 --- a/usr/src/uts/common/vm/seg_spt.h +++ b/usr/src/uts/common/vm/seg_spt.h @@ -21,13 +21,12 @@ /* * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * Copyright 2018 Joyent, Inc. */ #ifndef _VM_SEG_SPT_H #define _VM_SEG_SPT_H -#pragma ident "%Z%%M% %I% %E% SMI" - #ifdef __cplusplus extern "C" { #endif @@ -101,7 +100,7 @@ typedef struct shm_data { int sptcreate(size_t size, struct seg **sptseg, struct anon_map *amp, uint_t prot, uint_t flags, uint_t szc); void sptdestroy(struct as *, struct anon_map *); -int segspt_shmattach(struct seg *, caddr_t *); +int segspt_shmattach(struct seg **, void *); #define isspt(sp) ((sp)->shm_sptinfo ? (sp)->shm_sptinfo->sptas : NULL) #define spt_locked(a) ((a) & SHM_SHARE_MMU) diff --git a/usr/src/uts/common/vm/seg_umap.c b/usr/src/uts/common/vm/seg_umap.c index 3b4bb04f69..985cb51759 100644 --- a/usr/src/uts/common/vm/seg_umap.c +++ b/usr/src/uts/common/vm/seg_umap.c @@ -10,7 +10,7 @@ */ /* - * Copyright 2016 Joyent, Inc. + * Copyright 2018 Joyent, Inc. */ /* @@ -99,8 +99,9 @@ static struct seg_ops segumap_ops = { * Create a kernel/user-mapped segment. */ int -segumap_create(struct seg *seg, void *argsp) +segumap_create(struct seg **segpp, void *argsp) { + struct seg *seg = *segpp; segumap_crargs_t *a = (struct segumap_crargs *)argsp; segumap_data_t *data; diff --git a/usr/src/uts/common/vm/seg_umap.h b/usr/src/uts/common/vm/seg_umap.h index 8db23723ed..c348bf471a 100644 --- a/usr/src/uts/common/vm/seg_umap.h +++ b/usr/src/uts/common/vm/seg_umap.h @@ -10,7 +10,7 @@ */ /* - * Copyright 2016 Joyent, Inc. + * Copyright 2018 Joyent, Inc. */ #ifndef _VM_SEG_UMAP_H @@ -33,7 +33,7 @@ typedef struct segumap_data { size_t sud_softlockcnt; } segumap_data_t; -extern int segumap_create(struct seg *, void *); +extern int segumap_create(struct seg **, void *); #ifdef __cplusplus } diff --git a/usr/src/uts/common/vm/seg_vn.c b/usr/src/uts/common/vm/seg_vn.c index 875dec7fe9..deb08c24e6 100644 --- a/usr/src/uts/common/vm/seg_vn.c +++ b/usr/src/uts/common/vm/seg_vn.c @@ -20,7 +20,7 @@ */ /* * Copyright (c) 1986, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright 2015, Joyent, Inc. All rights reserved. + * Copyright 2018 Joyent, Inc. * Copyright 2015 Nexenta Systems, Inc. All rights reserved. */ @@ -538,8 +538,9 @@ segvn_setvnode_mpss(vnode_t *vp) } int -segvn_create(struct seg *seg, void *argsp) +segvn_create(struct seg **segpp, void *argsp) { + struct seg *seg = *segpp; extern lgrp_mem_policy_t lgrp_mem_default_policy; struct segvn_crargs *a = (struct segvn_crargs *)argsp; struct segvn_data *svd; @@ -758,6 +759,11 @@ segvn_create(struct seg *seg, void *argsp) (a->szc == pseg->s_szc && IS_P2ALIGNED(pseg->s_base, pgsz) && IS_P2ALIGNED(pseg->s_size, pgsz))); + /* + * Communicate out the newly concatenated + * segment as part of the result. + */ + *segpp = pseg; return (0); } } @@ -797,6 +803,11 @@ segvn_create(struct seg *seg, void *argsp) (a->szc == nseg->s_szc && IS_P2ALIGNED(nseg->s_base, pgsz) && IS_P2ALIGNED(nseg->s_size, pgsz))); + /* + * Communicate out the newly concatenated + * segment as part of the result. + */ + *segpp = nseg; return (0); } } @@ -1253,10 +1264,8 @@ segvn_concat(struct seg *seg1, struct seg *seg2, int amp_cat) * Return 0 on success. */ static int -segvn_extend_prev(seg1, seg2, a, swresv) - struct seg *seg1, *seg2; - struct segvn_crargs *a; - size_t swresv; +segvn_extend_prev(struct seg *seg1, struct seg *seg2, struct segvn_crargs *a, + size_t swresv) { struct segvn_data *svd1 = (struct segvn_data *)seg1->s_data; size_t size; @@ -1333,7 +1342,7 @@ segvn_extend_prev(seg1, seg2, a, swresv) struct vpage *vp, *evp; new_vpage = kmem_zalloc(vpgtob(seg_pages(seg1) + seg_pages(seg2)), - KM_NOSLEEP); + KM_NOSLEEP); if (new_vpage == NULL) return (-1); bcopy(svd1->vpage, new_vpage, vpgtob(seg_pages(seg1))); @@ -1373,11 +1382,8 @@ segvn_extend_prev(seg1, seg2, a, swresv) * Return 0 on success. */ static int -segvn_extend_next( - struct seg *seg1, - struct seg *seg2, - struct segvn_crargs *a, - size_t swresv) +segvn_extend_next(struct seg *seg1, struct seg *seg2, struct segvn_crargs *a, + size_t swresv) { struct segvn_data *svd2 = (struct segvn_data *)seg2->s_data; size_t size; @@ -3357,7 +3363,6 @@ static int segvn_fill_vp_pages(struct segvn_data *svd, vnode_t *vp, u_offset_t off, uint_t szc, page_t **ppa, page_t **ppplist, uint_t *ret_pszc, int *downsize) - { page_t *pplist = *ppplist; size_t pgsz = page_get_pagesize(szc); @@ -3498,7 +3503,7 @@ segvn_fill_vp_pages(struct segvn_data *svd, vnode_t *vp, u_offset_t off, goto out; } io_err = VOP_PAGEIO(vp, io_pplist, io_off, io_len, - B_READ, svd->cred, NULL); + B_READ, svd->cred, NULL); if (io_err) { VM_STAT_ADD(segvnvmstats.fill_vp_pages[8]); page_unlock(targpp); @@ -9456,7 +9461,7 @@ segvn_purge(struct seg *seg) /*ARGSUSED*/ static int segvn_reclaim(void *ptag, caddr_t addr, size_t len, struct page **pplist, - enum seg_rw rw, int async) + enum seg_rw rw, int async) { struct seg *seg = (struct seg *)ptag; struct segvn_data *svd = (struct segvn_data *)seg->s_data; @@ -9533,7 +9538,7 @@ segvn_reclaim(void *ptag, caddr_t addr, size_t len, struct page **pplist, /*ARGSUSED*/ static int shamp_reclaim(void *ptag, caddr_t addr, size_t len, struct page **pplist, - enum seg_rw rw, int async) + enum seg_rw rw, int async) { amp_t *amp = (amp_t *)ptag; pgcnt_t np, npages; @@ -10206,10 +10211,8 @@ segvn_trupdate(void) } static void -segvn_trupdate_seg(struct seg *seg, - segvn_data_t *svd, - svntr_t *svntrp, - ulong_t hash) +segvn_trupdate_seg(struct seg *seg, segvn_data_t *svd, svntr_t *svntrp, + ulong_t hash) { proc_t *p; lgrp_id_t lgrp_id; diff --git a/usr/src/uts/common/vm/seg_vn.h b/usr/src/uts/common/vm/seg_vn.h index 6fef7d678d..97a0db012d 100644 --- a/usr/src/uts/common/vm/seg_vn.h +++ b/usr/src/uts/common/vm/seg_vn.h @@ -21,7 +21,7 @@ /* * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. - * Copyright (c) 2015, Joyent, Inc. All rights reserved. + * Copyright 2018 Joyent, Inc. */ /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ @@ -162,14 +162,14 @@ typedef struct segvn_data { { NULL, NULL, 0, MAP_PRIVATE, prot, max, 0, NULL, 0, 0 } #define AS_MAP_CHECK_VNODE_LPOOB(crfp, argsp) \ - ((crfp) == (int (*)())segvn_create && \ + ((crfp) == (segcreate_func_t)segvn_create && \ (((struct segvn_crargs *)(argsp))->flags & \ (MAP_TEXT | MAP_INITDATA)) && \ ((struct segvn_crargs *)(argsp))->szc == 0 && \ ((struct segvn_crargs *)(argsp))->vp != NULL) #define AS_MAP_CHECK_ANON_LPOOB(crfp, argsp) \ - ((crfp) == (int (*)())segvn_create && \ + ((crfp) == (segcreate_func_t)segvn_create && \ (((struct segvn_crargs *)(argsp))->szc == 0 || \ ((struct segvn_crargs *)(argsp))->szc == AS_MAP_HEAP || \ ((struct segvn_crargs *)(argsp))->szc == AS_MAP_STACK) && \ @@ -228,7 +228,7 @@ typedef struct svntr_stats { } svntr_stats_t; extern void segvn_init(void); -extern int segvn_create(struct seg *, void *); +extern int segvn_create(struct seg **, void *); extern struct seg_ops segvn_ops; diff --git a/usr/src/uts/common/vm/vm_as.c b/usr/src/uts/common/vm/vm_as.c index ea28b0814d..853b092e6d 100644 --- a/usr/src/uts/common/vm/vm_as.c +++ b/usr/src/uts/common/vm/vm_as.c @@ -21,7 +21,7 @@ /* * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. - * Copyright 2015, Joyent, Inc. All rights reserved. + * Copyright 2018 Joyent, Inc. * Copyright (c) 2016 by Delphix. All rights reserved. */ @@ -67,6 +67,7 @@ #include <vm/seg_kmem.h> #include <vm/seg_map.h> #include <vm/seg_spt.h> +#include <vm/seg_hole.h> #include <vm/page.h> clock_t deadlk_wait = 1; /* number of ticks to wait before retrying */ @@ -75,7 +76,6 @@ static struct kmem_cache *as_cache; static void as_setwatchprot(struct as *, caddr_t, size_t, uint_t); static void as_clearwatchprot(struct as *, caddr_t, size_t); -int as_map_locked(struct as *, caddr_t, size_t, int ((*)()), void *); /* @@ -816,7 +816,9 @@ as_dup(struct as *as, struct proc *forkedproc) as_free(newas); return (error); } - newas->a_size += seg->s_size; + if ((newseg->s_flags & S_HOLE) == 0) { + newas->a_size += seg->s_size; + } } newas->a_resvsize = as->a_resvsize - purgesize; @@ -1312,6 +1314,8 @@ top: as_clearwatchprot(as, raddr, eaddr - raddr); for (seg = as_findseg(as, raddr, 0); seg != NULL; seg = seg_next) { + const boolean_t is_hole = ((seg->s_flags & S_HOLE) != 0); + if (eaddr <= seg->s_base) break; /* eaddr was in a gap; all done */ @@ -1416,9 +1420,11 @@ retry: return (-1); } - as->a_size -= ssize; - if (rsize) - as->a_resvsize -= rsize; + if (!is_hole) { + as->a_size -= ssize; + if (rsize) + as->a_resvsize -= rsize; + } raddr += ssize; } AS_LOCK_EXIT(as); @@ -1427,35 +1433,34 @@ retry: static int as_map_segvn_segs(struct as *as, caddr_t addr, size_t size, uint_t szcvec, - int (*crfp)(), struct segvn_crargs *vn_a, int *segcreated) + segcreate_func_t crfp, struct segvn_crargs *vn_a, boolean_t *segcreated) { - uint_t szc; - uint_t nszc; + uint_t szc, nszc, save_szcvec; int error; - caddr_t a; - caddr_t eaddr; - size_t segsize; - struct seg *seg; + caddr_t a, eaddr; size_t pgsz; - int do_off = (vn_a->vp != NULL || vn_a->amp != NULL); - uint_t save_szcvec; + const boolean_t do_off = (vn_a->vp != NULL || vn_a->amp != NULL); ASSERT(AS_WRITE_HELD(as)); ASSERT(IS_P2ALIGNED(addr, PAGESIZE)); ASSERT(IS_P2ALIGNED(size, PAGESIZE)); ASSERT(vn_a->vp == NULL || vn_a->amp == NULL); + if (!do_off) { vn_a->offset = 0; } if (szcvec <= 1) { - seg = seg_alloc(as, addr, size); + struct seg *seg, *segref; + + seg = segref = seg_alloc(as, addr, size); if (seg == NULL) { return (ENOMEM); } vn_a->szc = 0; - error = (*crfp)(seg, vn_a); + error = (*crfp)(&seg, vn_a); if (error != 0) { + VERIFY3P(seg, ==, segref); seg_free(seg); } else { as->a_size += size; @@ -1479,21 +1484,26 @@ as_map_segvn_segs(struct as *as, caddr_t addr, size_t size, uint_t szcvec, pgsz = page_get_pagesize(nszc); a = (caddr_t)P2ROUNDUP((uintptr_t)addr, pgsz); if (a != addr) { + struct seg *seg, *segref; + size_t segsize; + ASSERT(a < eaddr); + segsize = a - addr; - seg = seg_alloc(as, addr, segsize); + seg = segref = seg_alloc(as, addr, segsize); if (seg == NULL) { return (ENOMEM); } vn_a->szc = szc; - error = (*crfp)(seg, vn_a); + error = (*crfp)(&seg, vn_a); if (error != 0) { + VERIFY3P(seg, ==, segref); seg_free(seg); return (error); } as->a_size += segsize; as->a_resvsize += segsize; - *segcreated = 1; + *segcreated = B_TRUE; if (do_off) { vn_a->offset += segsize; } @@ -1509,20 +1519,24 @@ as_map_segvn_segs(struct as *as, caddr_t addr, size_t size, uint_t szcvec, a = (caddr_t)P2ALIGN((uintptr_t)eaddr, pgsz); ASSERT(a >= addr); if (a != addr) { + struct seg *seg, *segref; + size_t segsize; + segsize = a - addr; - seg = seg_alloc(as, addr, segsize); + seg = segref = seg_alloc(as, addr, segsize); if (seg == NULL) { return (ENOMEM); } vn_a->szc = szc; - error = (*crfp)(seg, vn_a); + error = (*crfp)(&seg, vn_a); if (error != 0) { + VERIFY3P(seg, ==, segref); seg_free(seg); return (error); } as->a_size += segsize; as->a_resvsize += segsize; - *segcreated = 1; + *segcreated = B_TRUE; if (do_off) { vn_a->offset += segsize; } @@ -1541,14 +1555,13 @@ as_map_segvn_segs(struct as *as, caddr_t addr, size_t size, uint_t szcvec, static int as_map_vnsegs(struct as *as, caddr_t addr, size_t size, - int (*crfp)(), struct segvn_crargs *vn_a, int *segcreated) + segcreate_func_t crfp, struct segvn_crargs *vn_a, boolean_t *segcreated) { uint_t mapflags = vn_a->flags & (MAP_TEXT | MAP_INITDATA); int type = (vn_a->type == MAP_SHARED) ? MAPPGSZC_SHM : MAPPGSZC_PRIVM; uint_t szcvec = map_pgszcvec(addr, size, (uintptr_t)addr, mapflags, type, 0); int error; - struct seg *seg; struct vattr va; u_offset_t eoff; size_t save_size = 0; @@ -1562,13 +1575,16 @@ as_map_vnsegs(struct as *as, caddr_t addr, size_t size, again: if (szcvec <= 1) { - seg = seg_alloc(as, addr, size); + struct seg *seg, *segref; + + seg = segref = seg_alloc(as, addr, size); if (seg == NULL) { return (ENOMEM); } vn_a->szc = 0; - error = (*crfp)(seg, vn_a); + error = (*crfp)(&seg, vn_a); if (error != 0) { + VERIFY3P(seg, ==, segref); seg_free(seg); } else { as->a_size += size; @@ -1623,7 +1639,7 @@ again: */ static int as_map_ansegs(struct as *as, caddr_t addr, size_t size, - int (*crfp)(), struct segvn_crargs *vn_a, int *segcreated) + segcreate_func_t crfp, struct segvn_crargs *vn_a, boolean_t *segcreated) { uint_t szcvec; uchar_t type; @@ -1653,21 +1669,21 @@ as_map_ansegs(struct as *as, caddr_t addr, size_t size, } int -as_map(struct as *as, caddr_t addr, size_t size, int (*crfp)(), void *argsp) +as_map(struct as *as, caddr_t addr, size_t size, segcreate_func_t crfp, + void *argsp) { AS_LOCK_ENTER(as, RW_WRITER); return (as_map_locked(as, addr, size, crfp, argsp)); } int -as_map_locked(struct as *as, caddr_t addr, size_t size, int (*crfp)(), +as_map_locked(struct as *as, caddr_t addr, size_t size, segcreate_func_t crfp, void *argsp) { - struct seg *seg = NULL; caddr_t raddr; /* rounded down addr */ size_t rsize; /* rounded up size */ int error; - int unmap = 0; + boolean_t is_hole = B_FALSE; /* * The use of a_proc is preferred to handle the case where curproc is * a door_call server and is allocating memory in the client's (a_proc) @@ -1693,63 +1709,97 @@ as_map_locked(struct as *as, caddr_t addr, size_t size, int (*crfp)(), as->a_updatedir = 1; /* inform /proc */ gethrestime(&as->a_updatetime); - if (as != &kas && as->a_size + rsize > (size_t)p->p_vmem_ctl) { - AS_LOCK_EXIT(as); - - (void) rctl_action(rctlproc_legacy[RLIMIT_VMEM], p->p_rctls, p, - RCA_UNSAFE_ALL); + if (as != &kas) { + /* + * Ensure that the virtual size of the process will not exceed + * the configured limit. Since seg_hole segments will later + * set the S_HOLE flag indicating their status as a hole in the + * AS, they are excluded from this check. + */ + if (as->a_size + rsize > (size_t)p->p_vmem_ctl && + !AS_MAP_CHECK_SEGHOLE(crfp)) { + AS_LOCK_EXIT(as); - return (ENOMEM); + (void) rctl_action(rctlproc_legacy[RLIMIT_VMEM], + p->p_rctls, p, RCA_UNSAFE_ALL); + return (ENOMEM); + } } if (AS_MAP_CHECK_VNODE_LPOOB(crfp, argsp)) { + boolean_t do_unmap = B_FALSE; + crargs = *(struct segvn_crargs *)argsp; - error = as_map_vnsegs(as, raddr, rsize, crfp, &crargs, &unmap); + error = as_map_vnsegs(as, raddr, rsize, crfp, &crargs, + &do_unmap); if (error != 0) { AS_LOCK_EXIT(as); - if (unmap) { + if (do_unmap) { (void) as_unmap(as, addr, size); } return (error); } } else if (AS_MAP_CHECK_ANON_LPOOB(crfp, argsp)) { + boolean_t do_unmap = B_FALSE; + crargs = *(struct segvn_crargs *)argsp; - error = as_map_ansegs(as, raddr, rsize, crfp, &crargs, &unmap); + error = as_map_ansegs(as, raddr, rsize, crfp, &crargs, + &do_unmap); if (error != 0) { AS_LOCK_EXIT(as); - if (unmap) { + if (do_unmap) { (void) as_unmap(as, addr, size); } return (error); } } else { - seg = seg_alloc(as, addr, size); + struct seg *seg, *segref; + + seg = segref = seg_alloc(as, addr, size); if (seg == NULL) { AS_LOCK_EXIT(as); return (ENOMEM); } - error = (*crfp)(seg, argsp); + /* + * It is possible that the segment creation routine will free + * 'seg' as part of a more advanced operation, such as when + * segvn concatenates adjacent segments together. When this + * occurs, the seg*_create routine must communicate the + * resulting segment out via the 'struct seg **' parameter. + * + * If segment creation fails, it must not free the passed-in + * segment, nor alter the argument pointer. + */ + error = (*crfp)(&seg, argsp); if (error != 0) { + VERIFY3P(seg, ==, segref); seg_free(seg); AS_LOCK_EXIT(as); return (error); } + /* - * Add size now so as_unmap will work if as_ctl fails. + * Check if the resulting segment represents a hole in the + * address space, rather than contributing to the AS size. */ - as->a_size += rsize; - as->a_resvsize += rsize; + is_hole = ((seg->s_flags & S_HOLE) != 0); + + /* Add size now so as_unmap will work if as_ctl fails. */ + if (!is_hole) { + as->a_size += rsize; + as->a_resvsize += rsize; + } } as_setwatch(as); /* - * If the address space is locked, - * establish memory locks for the new segment. + * Establish memory locks for the segment if the address space is + * locked, provided it's not an explicit hole in the AS. */ mutex_enter(&as->a_contents); - if (AS_ISPGLCK(as)) { + if (AS_ISPGLCK(as) && !is_hole) { mutex_exit(&as->a_contents); AS_LOCK_EXIT(as); error = as_ctl(as, addr, size, MC_LOCK, 0, 0, NULL, 0); @@ -2277,6 +2327,9 @@ retry: } for (seg = AS_SEGFIRST(as); seg; seg = AS_SEGNEXT(as, seg)) { + if ((seg->s_flags & S_HOLE) != 0) { + continue; + } error = SEGOP_LOCKOP(seg, seg->s_base, seg->s_size, attr, MC_LOCK, mlock_map, pos); if (error != 0) @@ -2306,6 +2359,9 @@ retry: mutex_exit(&as->a_contents); for (seg = AS_SEGFIRST(as); seg; seg = AS_SEGNEXT(as, seg)) { + if ((seg->s_flags & S_HOLE) != 0) { + continue; + } error = SEGOP_LOCKOP(seg, seg->s_base, seg->s_size, attr, MC_UNLOCK, NULL, 0); if (error != 0) diff --git a/usr/src/uts/i86pc/vm/vm_machdep.c b/usr/src/uts/i86pc/vm/vm_machdep.c index 4fa587ce7a..cb46bba853 100644 --- a/usr/src/uts/i86pc/vm/vm_machdep.c +++ b/usr/src/uts/i86pc/vm/vm_machdep.c @@ -776,9 +776,6 @@ map_addr_proc( base = p->p_brkbase; #if defined(__amd64) - /* - * XX64 Yes, this needs more work. - */ if (p->p_model == DATAMODEL_NATIVE) { if (userlimit < as->a_userlimit) { /* @@ -798,16 +795,24 @@ map_addr_proc( } } else { /* - * XX64 This layout is probably wrong .. but in - * the event we make the amd64 address space look - * like sparcv9 i.e. with the stack -above- the - * heap, this bit of code might even be correct. + * With the stack positioned at a higher address than + * the heap for 64-bit processes, it is necessary to be + * mindful of its location and potential size. + * + * Unallocated space above the top of the stack (that + * is, at a lower address) but still within the bounds + * of the stack limit should be considered unavailable. + * + * As the 64-bit stack guard is mapped in immediately + * adjacent to the stack limit boundary, this prevents + * new mappings from having accidentally dangerous + * proximity to the stack. */ slen = p->p_usrstack - base - ((p->p_stk_ctl + PAGEOFFSET) & PAGEMASK); } } else -#endif +#endif /* defined(__amd64) */ slen = userlimit - base; /* Make len be a multiple of PAGESIZE */ diff --git a/usr/src/uts/i86xpv/vm/seg_mf.c b/usr/src/uts/i86xpv/vm/seg_mf.c index 081ee85b15..4ce5f3777a 100644 --- a/usr/src/uts/i86xpv/vm/seg_mf.c +++ b/usr/src/uts/i86xpv/vm/seg_mf.c @@ -22,6 +22,7 @@ /* * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * Copyright 2018 Joyent, Inc. */ /* @@ -115,8 +116,9 @@ segmf_data_zalloc(struct seg *seg) } int -segmf_create(struct seg *seg, void *args) +segmf_create(struct seg **segpp, void *args) { + struct seg *seg = *segpp; struct segmf_crargs *a = args; struct segmf_data *data; struct as *as = seg->s_as; diff --git a/usr/src/uts/i86xpv/vm/seg_mf.h b/usr/src/uts/i86xpv/vm/seg_mf.h index 316a1f51bd..bc6aaf425d 100644 --- a/usr/src/uts/i86xpv/vm/seg_mf.h +++ b/usr/src/uts/i86xpv/vm/seg_mf.h @@ -22,6 +22,7 @@ /* * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * Copyright 2018 Joyent, Inc. */ #ifndef _VM_SEG_MF_H @@ -42,7 +43,7 @@ struct segmf_crargs { uchar_t maxprot; }; -extern int segmf_create(struct seg *, void *); +extern int segmf_create(struct seg **, void *); extern int segmf_add_mfns(struct seg *, caddr_t, mfn_t, pgcnt_t, domid_t); |