diff options
Diffstat (limited to 'usr/src/uts/common')
-rw-r--r-- | usr/src/uts/common/Makefile.files | 1 | ||||
-rw-r--r-- | usr/src/uts/common/crypto/api/kcf_random.c | 11 | ||||
-rw-r--r-- | usr/src/uts/common/os/sysent.c | 5 | ||||
-rw-r--r-- | usr/src/uts/common/sys/mman.h | 11 | ||||
-rw-r--r-- | usr/src/uts/common/sys/random.h | 10 | ||||
-rw-r--r-- | usr/src/uts/common/sys/syscall.h | 1 | ||||
-rw-r--r-- | usr/src/uts/common/syscall/getrandom.c | 80 | ||||
-rw-r--r-- | usr/src/uts/common/syscall/memcntl.c | 8 | ||||
-rw-r--r-- | usr/src/uts/common/vm/seg.h | 10 | ||||
-rw-r--r-- | usr/src/uts/common/vm/seg_dev.c | 1 | ||||
-rw-r--r-- | usr/src/uts/common/vm/seg_kmem.c | 1 | ||||
-rw-r--r-- | usr/src/uts/common/vm/seg_kp.c | 1 | ||||
-rw-r--r-- | usr/src/uts/common/vm/seg_kpm.c | 9 | ||||
-rw-r--r-- | usr/src/uts/common/vm/seg_map.c | 1 | ||||
-rw-r--r-- | usr/src/uts/common/vm/seg_spt.c | 2 | ||||
-rw-r--r-- | usr/src/uts/common/vm/seg_vn.c | 236 | ||||
-rw-r--r-- | usr/src/uts/common/vm/seg_vn.h | 9 | ||||
-rw-r--r-- | usr/src/uts/common/vm/vm_as.c | 15 | ||||
-rw-r--r-- | usr/src/uts/common/vm/vm_seg.c | 11 | ||||
-rw-r--r-- | usr/src/uts/common/vm/vpage.h | 15 |
20 files changed, 357 insertions, 81 deletions
diff --git a/usr/src/uts/common/Makefile.files b/usr/src/uts/common/Makefile.files index 4de1edf971..f022dc69e0 100644 --- a/usr/src/uts/common/Makefile.files +++ b/usr/src/uts/common/Makefile.files @@ -277,6 +277,7 @@ GENUNIX_OBJS += \ profil.o \ project.o \ qsort.o \ + getrandom.o \ rctl.o \ rctlsys.o \ readlink.o \ diff --git a/usr/src/uts/common/crypto/api/kcf_random.c b/usr/src/uts/common/crypto/api/kcf_random.c index 7766d8ba7a..75072fb686 100644 --- a/usr/src/uts/common/crypto/api/kcf_random.c +++ b/usr/src/uts/common/crypto/api/kcf_random.c @@ -21,6 +21,7 @@ /* * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright 2012 Nexenta Systems, Inc. All rights reserved. + * Copyright (c) 2015, Joyent, Inc. */ /* @@ -1096,3 +1097,13 @@ random_get_bytes(uint8_t *ptr, size_t len) return (0); return (kcf_rnd_get_bytes(ptr, len, B_TRUE)); } + +int +random_get_blocking_bytes(uint8_t *ptr, size_t len) +{ + ASSERT(!mutex_owned(&rndpool_lock)); + + if (len < 1) + return (0); + return (kcf_rnd_get_bytes(ptr, len, B_FALSE)); +} diff --git a/usr/src/uts/common/os/sysent.c b/usr/src/uts/common/os/sysent.c index edcc85774e..aa44ccf788 100644 --- a/usr/src/uts/common/os/sysent.c +++ b/usr/src/uts/common/os/sysent.c @@ -328,6 +328,7 @@ int getsockopt(int, int, int, void *, socklen_t *, int); int setsockopt(int, int, int, void *, socklen_t *, int); int sockconfig(int, void *, void *, void *, void *); ssize_t sendfilev(int, int, const struct sendfilevec *, int, size_t *); +int getrandom(void *, size_t, int); typedef int64_t (*llfcn_t)(); /* for casting one-word returns */ @@ -582,7 +583,7 @@ struct sysent sysent[NSYSCALL] = /* 123 */ SYSENT_CL("preadv", preadv, 5), /* 124 */ SYSENT_CL("pwritev", pwritev, 5), /* 125 */ SYSENT_LOADABLE(), /* (was fxstat) */ - /* 126 */ SYSENT_LOADABLE(), /* (was xmknod) */ + /* 126 */ SYSENT_CI("getrandom", getrandom, 3), /* 127 */ SYSENT_CI("mmapobj", mmapobjsys, 5), /* 128 */ IF_LP64( SYSENT_CI("setrlimit", setrlimit64, 2), @@ -947,7 +948,7 @@ struct sysent sysent32[NSYSCALL] = /* 123 */ SYSENT_CI("preadv", preadv, 5), /* 124 */ SYSENT_CI("pwritev", pwritev, 5), /* 125 */ SYSENT_LOADABLE32(), /* was fxstat32 */ - /* 126 */ SYSENT_LOADABLE32(), /* was xmknod */ + /* 126 */ SYSENT_CI("getrandom", getrandom, 3), /* 127 */ SYSENT_CI("mmapobj", mmapobjsys, 5), /* 128 */ SYSENT_CI("setrlimit", setrlimit32, 2), /* 129 */ SYSENT_CI("getrlimit", getrlimit32, 2), diff --git a/usr/src/uts/common/sys/mman.h b/usr/src/uts/common/sys/mman.h index 8f4cd1639f..e9ac3a37cc 100644 --- a/usr/src/uts/common/sys/mman.h +++ b/usr/src/uts/common/sys/mman.h @@ -25,7 +25,7 @@ * * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. - * Copyright 2012 Joyent, Inc. All rights reserved. + * Copyright 2015 Joyent, Inc. All rights reserved. */ /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ @@ -303,7 +303,12 @@ struct memcntl_mha32 { #endif /* !defined(__XOPEN_OR_POSIX) || defined(__EXTENSIONS__) */ #if (_POSIX_C_SOURCE <= 2) && !defined(_XPG4_2) || defined(__EXTENSIONS__) -/* advice to madvise */ +/* + * advice to madvise + * + * Note, if more than 4 bits worth of advice (eg. 16) are specified then + * changes will be necessary to the struct vpage. + */ #define MADV_NORMAL 0 /* no further special treatment */ #define MADV_RANDOM 1 /* expect random page references */ #define MADV_SEQUENTIAL 2 /* expect sequential page references */ @@ -313,6 +318,7 @@ struct memcntl_mha32 { #define MADV_ACCESS_DEFAULT 6 /* default access */ #define MADV_ACCESS_LWP 7 /* next LWP to access heavily */ #define MADV_ACCESS_MANY 8 /* many processes to access heavily */ + #endif /* (_POSIX_C_SOURCE <= 2) && !defined(_XPG4_2) ... */ #if !defined(__XOPEN_OR_POSIX) || defined(_XPG6) || defined(__EXTENSIONS__) @@ -342,6 +348,7 @@ struct memcntl_mha32 { #define MC_LOCKAS 5 /* lock address space in memory */ #define MC_UNLOCKAS 6 /* unlock address space from memory */ #define MC_HAT_ADVISE 7 /* advise hat map size */ +#define MC_INHERIT_ZERO 8 /* zero out regions on fork() */ /* sub-commands for MC_HAT_ADVISE */ #define MHA_MAPSIZE_VA 0x1 /* set preferred page size */ diff --git a/usr/src/uts/common/sys/random.h b/usr/src/uts/common/sys/random.h index a38201456f..b835d2f5ac 100644 --- a/usr/src/uts/common/sys/random.h +++ b/usr/src/uts/common/sys/random.h @@ -24,6 +24,7 @@ */ /* * Copyright 2010 Nexenta Systems, Inc. All rights reserved. + * Copyright (c) 2015, Joyent, Inc. */ #ifndef _SYS_RANDOM_H @@ -61,10 +62,19 @@ typedef struct swrand_stats { extern int random_add_entropy(uint8_t *, size_t, uint_t); extern int random_get_bytes(uint8_t *, size_t); +extern int random_get_blocking_bytes(uint8_t *, size_t); extern int random_get_pseudo_bytes(uint8_t *, size_t); #endif /* _KERNEL */ +/* + * Flags for the getrandom system call. Note, we may want to move these + * definitions if we expose getrandom(2) into a public system call. + */ +#define GRND_NONBLOCK 0x0001 /* O_NONBLOCK equiv */ +#define GRND_RANDOM 0x0002 /* Use /dev/random, not /dev/urandom */ +extern int getrandom(void *, size_t, int); + #ifdef __cplusplus } #endif diff --git a/usr/src/uts/common/sys/syscall.h b/usr/src/uts/common/sys/syscall.h index 5523f08552..7d86565564 100644 --- a/usr/src/uts/common/sys/syscall.h +++ b/usr/src/uts/common/sys/syscall.h @@ -296,6 +296,7 @@ extern "C" { #define SYS_writev 122 #define SYS_preadv 123 #define SYS_pwritev 124 +#define SYS_getrandom 126 #define SYS_mmapobj 127 #define SYS_setrlimit 128 #define SYS_getrlimit 129 diff --git a/usr/src/uts/common/syscall/getrandom.c b/usr/src/uts/common/syscall/getrandom.c new file mode 100644 index 0000000000..46b650b0dc --- /dev/null +++ b/usr/src/uts/common/syscall/getrandom.c @@ -0,0 +1,80 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright (c) 2015, Joyent, Inc. + */ + +/* + * getrandom system call implementation + */ + +#include <sys/types.h> +#include <sys/errno.h> +#include <sys/systm.h> +#include <sys/random.h> +#include <sys/ddi.h> +#include <sys/sunddi.h> +#include <sys/sysmacros.h> + +#include <sys/random.h> + +/* + * Impose a maximum upper bound on the number of bytes that we'll read in one + * go, ala a read of /dev/random. For /dev/urandom, we clamp it based on our + * return value, because the system call returns an int, we can't handle more + * than INT_MAX. + */ +#define MAXRANDBYTES 1024 +#define MAXURANDBYTES INT_MAX + +int +getrandom(void *bufp, size_t buflen, int flags) +{ + int out = 0; + uint8_t rbytes[128]; + uint8_t *buf = bufp; + + if (flags & ~(GRND_NONBLOCK | GRND_RANDOM)) + return (set_errno(EINVAL)); + + if ((flags & GRND_RANDOM) && buflen > MAXRANDBYTES) { + buflen = MAXRANDBYTES; + } else if (buflen > MAXURANDBYTES) { + buflen = MAXURANDBYTES; + } + + while (buflen > out) { + int err; + size_t len = MIN(sizeof (rbytes), buflen); + + if (flags & GRND_RANDOM) { + if (flags & GRND_NONBLOCK) + err = random_get_bytes(rbytes, len); + else + err = random_get_blocking_bytes(rbytes, len); + } else { + err = random_get_pseudo_bytes(rbytes, len); + } + + if (err == 0) { + if (ddi_copyout(rbytes, buf + out, len, 0) != 0) + return (set_errno(EFAULT)); + out += len; + } else if (err == EAGAIN && out > 0) { + break; + } else { + return (set_errno(err)); + } + } + + return (out); +} diff --git a/usr/src/uts/common/syscall/memcntl.c b/usr/src/uts/common/syscall/memcntl.c index 63c8b64ad0..4f220bbca5 100644 --- a/usr/src/uts/common/syscall/memcntl.c +++ b/usr/src/uts/common/syscall/memcntl.c @@ -21,15 +21,13 @@ /* * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. - * Copyright 2012 Joyent, Inc. All rights reserved. + * Copyright (c) 2015 Joyent, Inc. */ /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ /* All Rights Reserved */ -#pragma ident "%Z%%M% %I% %E% SMI" - #include <sys/types.h> #include <sys/bitmap.h> #include <sys/sysmacros.h> @@ -389,6 +387,10 @@ memcntl(caddr_t addr, size_t len, int cmd, caddr_t arg, int attr, int mask) return (error); } break; + case MC_INHERIT_ZERO: + if (arg != 0 || attr != NULL || mask != 0) + return (set_errno(EINVAL)); + break; default: return (set_errno(EINVAL)); } diff --git a/usr/src/uts/common/vm/seg.h b/usr/src/uts/common/vm/seg.h index 2e1e6a77de..343e308a82 100644 --- a/usr/src/uts/common/vm/seg.h +++ b/usr/src/uts/common/vm/seg.h @@ -21,6 +21,7 @@ /* * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * Copyright (c) 2015, Joyent, Inc. */ /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ @@ -39,8 +40,6 @@ #ifndef _VM_SEG_H #define _VM_SEG_H -#pragma ident "%Z%%M% %I% %E% SMI" - #include <sys/vnode.h> #include <sys/avl.h> #include <vm/seg_enum.h> @@ -142,6 +141,7 @@ struct seg_ops { int (*getmemid)(struct seg *, caddr_t, memid_t *); struct lgrp_mem_policy_info *(*getpolicy)(struct seg *, caddr_t); int (*capable)(struct seg *, segcapability_t); + int (*inherit)(struct seg *, caddr_t, size_t, uint_t); }; #ifdef _KERNEL @@ -238,6 +238,7 @@ extern segadvstat_t segadvstat; #define SEGOP_GETMEMID(s, a, mp) (*(s)->s_ops->getmemid)((s), (a), (mp)) #define SEGOP_GETPOLICY(s, a) (*(s)->s_ops->getpolicy)((s), (a)) #define SEGOP_CAPABLE(s, c) (*(s)->s_ops->capable)((s), (c)) +#define SEGOP_INHERIT(s, a, l, b) (*(s)->s_ops->inherit)((s), (a), (l), (b)) #define seg_page(seg, addr) \ (((uintptr_t)((addr) - (seg)->s_base)) >> PAGESHIFT) @@ -249,6 +250,11 @@ extern segadvstat_t segadvstat; #define IE_RETRY -2 /* internal to seg layer */ #define IE_REATTACH -3 /* internal to seg layer */ +/* Values for SEGOP_INHERIT */ +#define SEGP_INH_ZERO 0x01 + +int seg_inherit_notsup(struct seg *, caddr_t, size_t, uint_t); + /* Delay/retry factors for seg_p_mem_config_pre_del */ #define SEGP_PREDEL_DELAY_FACTOR 4 /* diff --git a/usr/src/uts/common/vm/seg_dev.c b/usr/src/uts/common/vm/seg_dev.c index 9d214024a5..6cf938a007 100644 --- a/usr/src/uts/common/vm/seg_dev.c +++ b/usr/src/uts/common/vm/seg_dev.c @@ -215,6 +215,7 @@ struct seg_ops segdev_ops = { segdev_getmemid, segdev_getpolicy, segdev_capable, + seg_inherit_notsup }; /* diff --git a/usr/src/uts/common/vm/seg_kmem.c b/usr/src/uts/common/vm/seg_kmem.c index 205aac9ded..90e1b73b70 100644 --- a/usr/src/uts/common/vm/seg_kmem.c +++ b/usr/src/uts/common/vm/seg_kmem.c @@ -797,6 +797,7 @@ static struct seg_ops segkmem_ops = { segkmem_getmemid, segkmem_getpolicy, /* getpolicy */ segkmem_capable, /* capable */ + seg_inherit_notsup /* inherit */ }; int diff --git a/usr/src/uts/common/vm/seg_kp.c b/usr/src/uts/common/vm/seg_kp.c index 2fe1e5f17d..d33ff004f1 100644 --- a/usr/src/uts/common/vm/seg_kp.c +++ b/usr/src/uts/common/vm/seg_kp.c @@ -169,6 +169,7 @@ static struct seg_ops segkp_ops = { segkp_getmemid, /* getmemid */ segkp_getpolicy, /* getpolicy */ segkp_capable, /* capable */ + seg_inherit_notsup /* inherit */ }; diff --git a/usr/src/uts/common/vm/seg_kpm.c b/usr/src/uts/common/vm/seg_kpm.c index 4f764588b1..0886513183 100644 --- a/usr/src/uts/common/vm/seg_kpm.c +++ b/usr/src/uts/common/vm/seg_kpm.c @@ -24,8 +24,6 @@ * Use is subject to license terms. */ -#pragma ident "%Z%%M% %I% %E% SMI" - /* * Kernel Physical Mapping (kpm) segment driver (segkpm). * @@ -136,6 +134,7 @@ static struct seg_ops segkpm_ops = { SEGKPM_BADOP(int), /* getmemid */ SEGKPM_BADOP(lgrp_mem_policy_info_t *), /* getpolicy */ segkpm_capable, /* capable */ + seg_inherit_notsup /* inherit */ }; /* @@ -160,8 +159,8 @@ segkpm_create(struct seg *seg, void *argsp) ASSERT(seg->s_as && RW_WRITE_HELD(&seg->s_as->a_lock)); ASSERT(btokpmp(seg->s_size) >= 1 && - kpmpageoff((uintptr_t)seg->s_base) == 0 && - kpmpageoff((uintptr_t)seg->s_base + seg->s_size) == 0); + kpmpageoff((uintptr_t)seg->s_base) == 0 && + kpmpageoff((uintptr_t)seg->s_base + seg->s_size) == 0); skd = kmem_zalloc(sizeof (struct segkpm_data), KM_SLEEP); @@ -193,7 +192,7 @@ segkpm_create(struct seg *seg, void *argsp) skd->skd_nvcolors = b->nvcolors; p = skd->skd_va_select = - kmem_zalloc(NCPU * b->nvcolors * sizeof (ushort_t), KM_SLEEP); + kmem_zalloc(NCPU * b->nvcolors * sizeof (ushort_t), KM_SLEEP); for (i = 0; i < NCPU; i++) for (j = 0; j < b->nvcolors; j++, p++) diff --git a/usr/src/uts/common/vm/seg_map.c b/usr/src/uts/common/vm/seg_map.c index a57d202a6a..1edb92e892 100644 --- a/usr/src/uts/common/vm/seg_map.c +++ b/usr/src/uts/common/vm/seg_map.c @@ -124,6 +124,7 @@ static struct seg_ops segmap_ops = { segmap_getmemid, /* getmemid */ segmap_getpolicy, /* getpolicy */ segmap_capable, /* capable */ + seg_inherit_notsup /* inherit */ }; /* diff --git a/usr/src/uts/common/vm/seg_spt.c b/usr/src/uts/common/vm/seg_spt.c index f087d5fc30..8d85fbaef7 100644 --- a/usr/src/uts/common/vm/seg_spt.c +++ b/usr/src/uts/common/vm/seg_spt.c @@ -109,6 +109,7 @@ struct seg_ops segspt_ops = { SEGSPT_BADOP(int), /* getmemid */ segspt_getpolicy, /* getpolicy */ SEGSPT_BADOP(int), /* capable */ + seg_inherit_notsup /* inherit */ }; static int segspt_shmdup(struct seg *seg, struct seg *newseg); @@ -168,6 +169,7 @@ struct seg_ops segspt_shmops = { segspt_shmgetmemid, segspt_shmgetpolicy, segspt_shmcapable, + seg_inherit_notsup }; static void segspt_purge(struct seg *seg); diff --git a/usr/src/uts/common/vm/seg_vn.c b/usr/src/uts/common/vm/seg_vn.c index 0e07ce22f5..b33832c157 100644 --- a/usr/src/uts/common/vm/seg_vn.c +++ b/usr/src/uts/common/vm/seg_vn.c @@ -20,7 +20,7 @@ */ /* * Copyright (c) 1986, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright 2014, Joyent, Inc. All rights reserved. + * Copyright 2015, Joyent, Inc. All rights reserved. */ /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ @@ -113,6 +113,7 @@ static int segvn_getmemid(struct seg *seg, caddr_t addr, memid_t *memidp); static lgrp_mem_policy_info_t *segvn_getpolicy(struct seg *, caddr_t); static int segvn_capable(struct seg *seg, segcapability_t capable); +static int segvn_inherit(struct seg *, caddr_t, size_t, uint_t); struct seg_ops segvn_ops = { segvn_dup, @@ -138,6 +139,7 @@ struct seg_ops segvn_ops = { segvn_getmemid, segvn_getpolicy, segvn_capable, + segvn_inherit }; /* @@ -807,6 +809,7 @@ segvn_create(struct seg *seg, void *argsp) svd->softlockcnt = 0; svd->softlockcnt_sbase = 0; svd->softlockcnt_send = 0; + svd->svn_inz = 0; svd->rcookie = HAT_INVALID_REGION_COOKIE; svd->pageswap = 0; @@ -1465,6 +1468,81 @@ segvn_extend_next( return (0); } +/* + * Duplicate all the pages in the segment. This may break COW sharing for a + * given page. If the page is marked with inherit zero set, then instead of + * duplicating the page, we zero the page. + */ +static int +segvn_dup_pages(struct seg *seg, struct seg *newseg) +{ + int error; + uint_t prot; + page_t *pp; + struct anon *ap, *newap; + size_t i; + caddr_t addr; + + struct segvn_data *svd = (struct segvn_data *)seg->s_data; + struct segvn_data *newsvd = (struct segvn_data *)newseg->s_data; + ulong_t old_idx = svd->anon_index; + ulong_t new_idx = 0; + + i = btopr(seg->s_size); + addr = seg->s_base; + + /* + * XXX break cow sharing using PAGESIZE + * pages. They will be relocated into larger + * pages at fault time. + */ + while (i-- > 0) { + if ((ap = anon_get_ptr(svd->amp->ahp, old_idx)) != NULL) { + struct vpage *vpp; + + vpp = &svd->vpage[seg_page(seg, addr)]; + + /* + * prot need not be computed below 'cause anon_private + * is going to ignore it anyway as child doesn't inherit + * pagelock from parent. + */ + prot = svd->pageprot ? VPP_PROT(vpp) : svd->prot; + + /* + * Check whether we should zero this or dup it. + */ + if (svd->svn_inz == SEGVN_INZ_ALL || + (svd->svn_inz == SEGVN_INZ_VPP && + VPP_ISINHZERO(vpp))) { + pp = anon_zero(newseg, addr, &newap, + newsvd->cred); + } else { + page_t *anon_pl[1+1]; + uint_t vpprot; + error = anon_getpage(&ap, &vpprot, anon_pl, + PAGESIZE, seg, addr, S_READ, svd->cred); + if (error != 0) + return (error); + + pp = anon_private(&newap, newseg, addr, prot, + anon_pl[0], 0, newsvd->cred); + } + if (pp == NULL) { + return (ENOMEM); + } + (void) anon_set_ptr(newsvd->amp->ahp, new_idx, newap, + ANON_SLEEP); + page_unlock(pp); + } + addr += PAGESIZE; + old_idx++; + new_idx++; + } + + return (0); +} + static int segvn_dup(struct seg *seg, struct seg *newseg) { @@ -1472,7 +1550,6 @@ segvn_dup(struct seg *seg, struct seg *newseg) struct segvn_data *newsvd; pgcnt_t npages = seg_pages(seg); int error = 0; - uint_t prot; size_t len; struct anon_map *amp; @@ -1516,6 +1593,7 @@ segvn_dup(struct seg *seg, struct seg *newseg) crhold(newsvd->cred); newsvd->advice = svd->advice; newsvd->pageadvice = svd->pageadvice; + newsvd->svn_inz = svd->svn_inz; newsvd->swresv = svd->swresv; newsvd->pageswap = svd->pageswap; newsvd->flags = svd->flags; @@ -1545,6 +1623,7 @@ segvn_dup(struct seg *seg, struct seg *newseg) ASSERT(svd->tr_state == SEGVN_TR_OFF); newsvd->tr_state = SEGVN_TR_OFF; if (svd->type == MAP_SHARED) { + ASSERT(svd->svn_inz == SEGVN_INZ_NONE); newsvd->amp = amp; ANON_LOCK_ENTER(&->a_rwlock, RW_WRITER); amp->refcnt++; @@ -1560,6 +1639,9 @@ segvn_dup(struct seg *seg, struct seg *newseg) ANON_SLEEP); newsvd->amp->a_szc = newseg->s_szc; newsvd->anon_index = 0; + ASSERT(svd->svn_inz == SEGVN_INZ_NONE || + svd->svn_inz == SEGVN_INZ_ALL || + svd->svn_inz == SEGVN_INZ_VPP); /* * We don't have to acquire the anon_map lock @@ -1583,17 +1665,16 @@ segvn_dup(struct seg *seg, struct seg *newseg) * The strategy here is to just break the * sharing on pages that could possibly be * softlocked. + * + * In addition, if any pages have been marked that they + * should be inherited as zero, then we immediately go + * ahead and break COW and zero them. In the case of a + * softlocked page that should be inherited zero, we + * break COW and just get a zero page. */ retry: - if (svd->softlockcnt) { - struct anon *ap, *newap; - size_t i; - uint_t vpprot; - page_t *anon_pl[1+1], *pp; - caddr_t addr; - ulong_t old_idx = svd->anon_index; - ulong_t new_idx = 0; - + if (svd->softlockcnt || + svd->svn_inz != SEGVN_INZ_NONE) { /* * The softlock count might be non zero * because some pages are still stuck in the @@ -1603,59 +1684,16 @@ retry: * pages]. Note, we have the writers lock so * nothing gets inserted during the flush. */ - if (reclaim == 1) { + if (svd->softlockcnt && reclaim == 1) { segvn_purge(seg); reclaim = 0; goto retry; } - i = btopr(seg->s_size); - addr = seg->s_base; - /* - * XXX break cow sharing using PAGESIZE - * pages. They will be relocated into larger - * pages at fault time. - */ - while (i-- > 0) { - if (ap = anon_get_ptr(amp->ahp, - old_idx)) { - error = anon_getpage(&ap, - &vpprot, anon_pl, PAGESIZE, - seg, addr, S_READ, - svd->cred); - if (error) { - newsvd->vpage = NULL; - goto out; - } - /* - * prot need not be computed - * below 'cause anon_private is - * going to ignore it anyway - * as child doesn't inherit - * pagelock from parent. - */ - prot = svd->pageprot ? - VPP_PROT( - &svd->vpage[ - seg_page(seg, addr)]) - : svd->prot; - pp = anon_private(&newap, - newseg, addr, prot, - anon_pl[0], 0, - newsvd->cred); - if (pp == NULL) { - /* no mem abort */ - newsvd->vpage = NULL; - error = ENOMEM; - goto out; - } - (void) anon_set_ptr( - newsvd->amp->ahp, new_idx, - newap, ANON_SLEEP); - page_unlock(pp); - } - addr += PAGESIZE; - old_idx++; - new_idx++; + + error = segvn_dup_pages(seg, newseg); + if (error != 0) { + newsvd->vpage = NULL; + goto out; } } else { /* common case */ if (seg->s_szc != 0) { @@ -2192,6 +2230,7 @@ retry: nsvd->softlockcnt = 0; nsvd->softlockcnt_sbase = 0; nsvd->softlockcnt_send = 0; + nsvd->svn_inz = svd->svn_inz; ASSERT(nsvd->rcookie == HAT_INVALID_REGION_COOKIE); if (svd->vp != NULL) { @@ -8004,7 +8043,7 @@ out: /* * Set advice from user for specified pages - * There are 5 types of advice: + * There are 9 types of advice: * MADV_NORMAL - Normal (default) behavior (whatever that is) * MADV_RANDOM - Random page references * do not allow readahead or 'klustering' @@ -8486,6 +8525,81 @@ segvn_advise(struct seg *seg, caddr_t addr, size_t len, uint_t behav) } /* + * There is one kind of inheritance that can be specified for pages: + * + * SEGP_INH_ZERO - Pages should be zeroed in the child + */ +static int +segvn_inherit(struct seg *seg, caddr_t addr, size_t len, uint_t behav) +{ + struct segvn_data *svd = (struct segvn_data *)seg->s_data; + struct vpage *bvpp, *evpp; + size_t page; + int ret = 0; + + ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock)); + + /* Can't support something we don't know about */ + if (behav != SEGP_INH_ZERO) + return (ENOTSUP); + + SEGVN_LOCK_ENTER(seg->s_as, &svd->lock, RW_WRITER); + + /* + * This must be a straightforward anonymous segment that is mapped + * privately and is not backed by a vnode. + */ + if (svd->tr_state != SEGVN_TR_OFF || + svd->type != MAP_PRIVATE || + svd->vp != NULL) { + ret = EINVAL; + goto out; + } + + /* + * If the entire segment has been marked as inherit zero, then no reason + * to do anything else. + */ + if (svd->svn_inz == SEGVN_INZ_ALL) { + ret = 0; + goto out; + } + + /* + * If this applies to the entire segment, simply mark it and we're done. + */ + if ((addr == seg->s_base) && (len == seg->s_size)) { + svd->svn_inz = SEGVN_INZ_ALL; + ret = 0; + goto out; + } + + /* + * We've been asked to mark a subset of this segment as inherit zero, + * therefore we need to mainpulate its vpages. + */ + if (svd->vpage == NULL) { + segvn_vpage(seg); + if (svd->vpage == NULL) { + ret = ENOMEM; + goto out; + } + } + + svd->svn_inz = SEGVN_INZ_VPP; + page = seg_page(seg, addr); + bvpp = &svd->vpage[page]; + evpp = &svd->vpage[page + (len >> PAGESHIFT)]; + for (; bvpp < evpp; bvpp++) + VPP_SETINHZERO(bvpp); + ret = 0; + +out: + SEGVN_LOCK_EXIT(seg->s_as, &svd->lock); + return (ret); +} + +/* * Create a vpage structure for this seg. */ static void diff --git a/usr/src/uts/common/vm/seg_vn.h b/usr/src/uts/common/vm/seg_vn.h index f94e0cb873..51ebda3a84 100644 --- a/usr/src/uts/common/vm/seg_vn.h +++ b/usr/src/uts/common/vm/seg_vn.h @@ -21,6 +21,7 @@ /* * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * Copyright (c) 2015, Joyent, Inc. All rights reserved. */ /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ @@ -98,6 +99,7 @@ typedef struct segvn_data { size_t swresv; /* swap space reserved for this segment */ uchar_t advice; /* madvise flags for segment */ uchar_t pageadvice; /* true if per page advice set */ + uchar_t svn_inz; /* true if pages marked as inherit zero */ ushort_t flags; /* flags - from sys/mman.h */ spgcnt_t softlockcnt; /* # of pages SOFTLOCKED in seg */ lgrp_mem_policy_info_t policy_info; /* memory allocation policy */ @@ -122,6 +124,13 @@ typedef struct segvn_data { #define SEGVN_TR_OFF (2) /* Text replication is disabled */ /* + * Inherit zero states + */ +#define SEGVN_INZ_NONE (0) /* Nothing in the segment is inherit zero */ +#define SEGVN_INZ_ALL (1) /* Everything in the segment is inherit zero */ +#define SEGVN_INZ_VPP (2) /* Check struct vpages for inherit zero */ + +/* * Macros for segvn segment driver locking. */ #define SEGVN_LOCK_ENTER(as, lock, type) rw_enter((lock), (type)) diff --git a/usr/src/uts/common/vm/vm_as.c b/usr/src/uts/common/vm/vm_as.c index 8caa257486..992254938f 100644 --- a/usr/src/uts/common/vm/vm_as.c +++ b/usr/src/uts/common/vm/vm_as.c @@ -21,7 +21,7 @@ /* * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. - * Copyright 2012, Joyent, Inc. All rights reserved. + * Copyright 2015, Joyent, Inc. All rights reserved. */ /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ @@ -2566,6 +2566,19 @@ retry: } break; + case MC_INHERIT_ZERO: + if (seg->s_ops->inherit == NULL) { + error = ENOTSUP; + } else { + error = SEGOP_INHERIT(seg, raddr, ssize, + SEGP_INH_ZERO); + } + if (error != 0) { + AS_LOCK_EXIT(as, &as->a_lock); + return (error); + } + break; + /* * Can't happen. */ diff --git a/usr/src/uts/common/vm/vm_seg.c b/usr/src/uts/common/vm/vm_seg.c index 65c6c5ecdc..e54401ddeb 100644 --- a/usr/src/uts/common/vm/vm_seg.c +++ b/usr/src/uts/common/vm/vm_seg.c @@ -21,6 +21,7 @@ /* * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * Copyright (c) 2015, Joyent, Inc. */ /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ @@ -1851,3 +1852,13 @@ seg_swresv(struct seg *seg) } return (swap); } + +/* + * General not supported function for SEGOP_INHERIT + */ +/* ARGSUSED */ +int +seg_inherit_notsup(struct seg *seg, caddr_t addr, size_t len, uint_t op) +{ + return (ENOTSUP); +} diff --git a/usr/src/uts/common/vm/vpage.h b/usr/src/uts/common/vm/vpage.h index 368bc629ff..5eaefb9738 100644 --- a/usr/src/uts/common/vm/vpage.h +++ b/usr/src/uts/common/vm/vpage.h @@ -21,6 +21,7 @@ /* * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * Copyright (c) 2015, Joyent, Inc. All rights reserved. */ /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ @@ -39,8 +40,6 @@ #ifndef _VM_VPAGE_H #define _VM_VPAGE_H -#pragma ident "%Z%%M% %I% %E% SMI" - #ifdef __cplusplus extern "C" { #endif @@ -57,9 +56,10 @@ struct vpage { * This was changed from a bitfield to flags/macros in order * to conserve space (uchar_t bitfields are not ANSI). This could * have been condensed to a uchar_t, but at the expense of complexity. - * We've stolen two bits from the top of nvp_advice: the first to store - * pplock, and the second to identify pages for which we have reserved - * swap space, but have not necessarily allocated anon slots. + * We've stolen three bits from the top of nvp_advice: the first to store + * pplock, the second to identify pages for which we have reserved + * swap space, but have not necessarily allocated anon slots, and the third to + * indicate that the page should be zeroed on fork. * * WARNING: VPP_SETADVICE(vpp, x) evaluates vpp twice, and VPP_PLOCK(vpp) * returns a positive integer when the lock is held, not necessarily (1). @@ -69,6 +69,7 @@ struct vpage { #define VP_PPLOCK_SHIFT (0x07) /* offset of lock hiding inside nvp_advice */ #define VP_SWAPRES_MASK (0x40) /* Swap space has been reserved, but we */ /* might not have allocated an anon slot */ +#define VP_INHZERO_MASK (0x20) /* zero page on fork() */ #define VPP_PROT(vpp) ((vpp)->nvp_prot) #define VPP_ADVICE(vpp) ((vpp)->nvp_advice & VP_ADVICE_MASK) @@ -76,6 +77,8 @@ struct vpage { ((uchar_t)((vpp)->nvp_advice & VP_PPLOCK_MASK)) #define VPP_ISSWAPRES(vpp) \ ((uchar_t)((vpp)->nvp_advice & VP_SWAPRES_MASK)) +#define VPP_ISINHZERO(vpp) \ + ((uchar_t)((vpp)->nvp_advice & VP_INHZERO_MASK)) #define VPP_SETPROT(vpp, x) ((vpp)->nvp_prot = (x)) #define VPP_SETADVICE(vpp, x) \ @@ -85,6 +88,8 @@ struct vpage { #define VPP_CLRPPLOCK(vpp) ((vpp)->nvp_advice &= ~VP_PPLOCK_MASK) #define VPP_SETSWAPRES(vpp) ((vpp)->nvp_advice |= VP_SWAPRES_MASK) #define VPP_CLRSWAPRES(vpp) ((vpp)->nvp_advice &= ~VP_SWAPRES_MASK) +#define VPP_SETINHZERO(vpp) ((vpp)->nvp_advice |= VP_INHZERO_MASK) +#define VPP_CLRINHZERO(vpp) ((vpp)->nvp_advice &= ~VP_INHZERO_MASK) #ifdef __cplusplus } |