summaryrefslogtreecommitdiff
path: root/usr/src/uts/common
diff options
context:
space:
mode:
Diffstat (limited to 'usr/src/uts/common')
-rw-r--r--usr/src/uts/common/Makefile.files1
-rw-r--r--usr/src/uts/common/crypto/api/kcf_random.c11
-rw-r--r--usr/src/uts/common/os/sysent.c5
-rw-r--r--usr/src/uts/common/sys/mman.h11
-rw-r--r--usr/src/uts/common/sys/random.h10
-rw-r--r--usr/src/uts/common/sys/syscall.h1
-rw-r--r--usr/src/uts/common/syscall/getrandom.c80
-rw-r--r--usr/src/uts/common/syscall/memcntl.c8
-rw-r--r--usr/src/uts/common/vm/seg.h10
-rw-r--r--usr/src/uts/common/vm/seg_dev.c1
-rw-r--r--usr/src/uts/common/vm/seg_kmem.c1
-rw-r--r--usr/src/uts/common/vm/seg_kp.c1
-rw-r--r--usr/src/uts/common/vm/seg_kpm.c9
-rw-r--r--usr/src/uts/common/vm/seg_map.c1
-rw-r--r--usr/src/uts/common/vm/seg_spt.c2
-rw-r--r--usr/src/uts/common/vm/seg_vn.c236
-rw-r--r--usr/src/uts/common/vm/seg_vn.h9
-rw-r--r--usr/src/uts/common/vm/vm_as.c15
-rw-r--r--usr/src/uts/common/vm/vm_seg.c11
-rw-r--r--usr/src/uts/common/vm/vpage.h15
20 files changed, 357 insertions, 81 deletions
diff --git a/usr/src/uts/common/Makefile.files b/usr/src/uts/common/Makefile.files
index 4de1edf971..f022dc69e0 100644
--- a/usr/src/uts/common/Makefile.files
+++ b/usr/src/uts/common/Makefile.files
@@ -277,6 +277,7 @@ GENUNIX_OBJS += \
profil.o \
project.o \
qsort.o \
+ getrandom.o \
rctl.o \
rctlsys.o \
readlink.o \
diff --git a/usr/src/uts/common/crypto/api/kcf_random.c b/usr/src/uts/common/crypto/api/kcf_random.c
index 7766d8ba7a..75072fb686 100644
--- a/usr/src/uts/common/crypto/api/kcf_random.c
+++ b/usr/src/uts/common/crypto/api/kcf_random.c
@@ -21,6 +21,7 @@
/*
* Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright 2012 Nexenta Systems, Inc. All rights reserved.
+ * Copyright (c) 2015, Joyent, Inc.
*/
/*
@@ -1096,3 +1097,13 @@ random_get_bytes(uint8_t *ptr, size_t len)
return (0);
return (kcf_rnd_get_bytes(ptr, len, B_TRUE));
}
+
+int
+random_get_blocking_bytes(uint8_t *ptr, size_t len)
+{
+ ASSERT(!mutex_owned(&rndpool_lock));
+
+ if (len < 1)
+ return (0);
+ return (kcf_rnd_get_bytes(ptr, len, B_FALSE));
+}
diff --git a/usr/src/uts/common/os/sysent.c b/usr/src/uts/common/os/sysent.c
index edcc85774e..aa44ccf788 100644
--- a/usr/src/uts/common/os/sysent.c
+++ b/usr/src/uts/common/os/sysent.c
@@ -328,6 +328,7 @@ int getsockopt(int, int, int, void *, socklen_t *, int);
int setsockopt(int, int, int, void *, socklen_t *, int);
int sockconfig(int, void *, void *, void *, void *);
ssize_t sendfilev(int, int, const struct sendfilevec *, int, size_t *);
+int getrandom(void *, size_t, int);
typedef int64_t (*llfcn_t)(); /* for casting one-word returns */
@@ -582,7 +583,7 @@ struct sysent sysent[NSYSCALL] =
/* 123 */ SYSENT_CL("preadv", preadv, 5),
/* 124 */ SYSENT_CL("pwritev", pwritev, 5),
/* 125 */ SYSENT_LOADABLE(), /* (was fxstat) */
- /* 126 */ SYSENT_LOADABLE(), /* (was xmknod) */
+ /* 126 */ SYSENT_CI("getrandom", getrandom, 3),
/* 127 */ SYSENT_CI("mmapobj", mmapobjsys, 5),
/* 128 */ IF_LP64(
SYSENT_CI("setrlimit", setrlimit64, 2),
@@ -947,7 +948,7 @@ struct sysent sysent32[NSYSCALL] =
/* 123 */ SYSENT_CI("preadv", preadv, 5),
/* 124 */ SYSENT_CI("pwritev", pwritev, 5),
/* 125 */ SYSENT_LOADABLE32(), /* was fxstat32 */
- /* 126 */ SYSENT_LOADABLE32(), /* was xmknod */
+ /* 126 */ SYSENT_CI("getrandom", getrandom, 3),
/* 127 */ SYSENT_CI("mmapobj", mmapobjsys, 5),
/* 128 */ SYSENT_CI("setrlimit", setrlimit32, 2),
/* 129 */ SYSENT_CI("getrlimit", getrlimit32, 2),
diff --git a/usr/src/uts/common/sys/mman.h b/usr/src/uts/common/sys/mman.h
index 8f4cd1639f..e9ac3a37cc 100644
--- a/usr/src/uts/common/sys/mman.h
+++ b/usr/src/uts/common/sys/mman.h
@@ -25,7 +25,7 @@
*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
- * Copyright 2012 Joyent, Inc. All rights reserved.
+ * Copyright 2015 Joyent, Inc. All rights reserved.
*/
/* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
@@ -303,7 +303,12 @@ struct memcntl_mha32 {
#endif /* !defined(__XOPEN_OR_POSIX) || defined(__EXTENSIONS__) */
#if (_POSIX_C_SOURCE <= 2) && !defined(_XPG4_2) || defined(__EXTENSIONS__)
-/* advice to madvise */
+/*
+ * advice to madvise
+ *
+ * Note, if more than 4 bits worth of advice (eg. 16) are specified then
+ * changes will be necessary to the struct vpage.
+ */
#define MADV_NORMAL 0 /* no further special treatment */
#define MADV_RANDOM 1 /* expect random page references */
#define MADV_SEQUENTIAL 2 /* expect sequential page references */
@@ -313,6 +318,7 @@ struct memcntl_mha32 {
#define MADV_ACCESS_DEFAULT 6 /* default access */
#define MADV_ACCESS_LWP 7 /* next LWP to access heavily */
#define MADV_ACCESS_MANY 8 /* many processes to access heavily */
+
#endif /* (_POSIX_C_SOURCE <= 2) && !defined(_XPG4_2) ... */
#if !defined(__XOPEN_OR_POSIX) || defined(_XPG6) || defined(__EXTENSIONS__)
@@ -342,6 +348,7 @@ struct memcntl_mha32 {
#define MC_LOCKAS 5 /* lock address space in memory */
#define MC_UNLOCKAS 6 /* unlock address space from memory */
#define MC_HAT_ADVISE 7 /* advise hat map size */
+#define MC_INHERIT_ZERO 8 /* zero out regions on fork() */
/* sub-commands for MC_HAT_ADVISE */
#define MHA_MAPSIZE_VA 0x1 /* set preferred page size */
diff --git a/usr/src/uts/common/sys/random.h b/usr/src/uts/common/sys/random.h
index a38201456f..b835d2f5ac 100644
--- a/usr/src/uts/common/sys/random.h
+++ b/usr/src/uts/common/sys/random.h
@@ -24,6 +24,7 @@
*/
/*
* Copyright 2010 Nexenta Systems, Inc. All rights reserved.
+ * Copyright (c) 2015, Joyent, Inc.
*/
#ifndef _SYS_RANDOM_H
@@ -61,10 +62,19 @@ typedef struct swrand_stats {
extern int random_add_entropy(uint8_t *, size_t, uint_t);
extern int random_get_bytes(uint8_t *, size_t);
+extern int random_get_blocking_bytes(uint8_t *, size_t);
extern int random_get_pseudo_bytes(uint8_t *, size_t);
#endif /* _KERNEL */
+/*
+ * Flags for the getrandom system call. Note, we may want to move these
+ * definitions if we expose getrandom(2) into a public system call.
+ */
+#define GRND_NONBLOCK 0x0001 /* O_NONBLOCK equiv */
+#define GRND_RANDOM 0x0002 /* Use /dev/random, not /dev/urandom */
+extern int getrandom(void *, size_t, int);
+
#ifdef __cplusplus
}
#endif
diff --git a/usr/src/uts/common/sys/syscall.h b/usr/src/uts/common/sys/syscall.h
index 5523f08552..7d86565564 100644
--- a/usr/src/uts/common/sys/syscall.h
+++ b/usr/src/uts/common/sys/syscall.h
@@ -296,6 +296,7 @@ extern "C" {
#define SYS_writev 122
#define SYS_preadv 123
#define SYS_pwritev 124
+#define SYS_getrandom 126
#define SYS_mmapobj 127
#define SYS_setrlimit 128
#define SYS_getrlimit 129
diff --git a/usr/src/uts/common/syscall/getrandom.c b/usr/src/uts/common/syscall/getrandom.c
new file mode 100644
index 0000000000..46b650b0dc
--- /dev/null
+++ b/usr/src/uts/common/syscall/getrandom.c
@@ -0,0 +1,80 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2015, Joyent, Inc.
+ */
+
+/*
+ * getrandom system call implementation
+ */
+
+#include <sys/types.h>
+#include <sys/errno.h>
+#include <sys/systm.h>
+#include <sys/random.h>
+#include <sys/ddi.h>
+#include <sys/sunddi.h>
+#include <sys/sysmacros.h>
+
+#include <sys/random.h>
+
+/*
+ * Impose a maximum upper bound on the number of bytes that we'll read in one
+ * go, ala a read of /dev/random. For /dev/urandom, we clamp it based on our
+ * return value, because the system call returns an int, we can't handle more
+ * than INT_MAX.
+ */
+#define MAXRANDBYTES 1024
+#define MAXURANDBYTES INT_MAX
+
+int
+getrandom(void *bufp, size_t buflen, int flags)
+{
+ int out = 0;
+ uint8_t rbytes[128];
+ uint8_t *buf = bufp;
+
+ if (flags & ~(GRND_NONBLOCK | GRND_RANDOM))
+ return (set_errno(EINVAL));
+
+ if ((flags & GRND_RANDOM) && buflen > MAXRANDBYTES) {
+ buflen = MAXRANDBYTES;
+ } else if (buflen > MAXURANDBYTES) {
+ buflen = MAXURANDBYTES;
+ }
+
+ while (buflen > out) {
+ int err;
+ size_t len = MIN(sizeof (rbytes), buflen);
+
+ if (flags & GRND_RANDOM) {
+ if (flags & GRND_NONBLOCK)
+ err = random_get_bytes(rbytes, len);
+ else
+ err = random_get_blocking_bytes(rbytes, len);
+ } else {
+ err = random_get_pseudo_bytes(rbytes, len);
+ }
+
+ if (err == 0) {
+ if (ddi_copyout(rbytes, buf + out, len, 0) != 0)
+ return (set_errno(EFAULT));
+ out += len;
+ } else if (err == EAGAIN && out > 0) {
+ break;
+ } else {
+ return (set_errno(err));
+ }
+ }
+
+ return (out);
+}
diff --git a/usr/src/uts/common/syscall/memcntl.c b/usr/src/uts/common/syscall/memcntl.c
index 63c8b64ad0..4f220bbca5 100644
--- a/usr/src/uts/common/syscall/memcntl.c
+++ b/usr/src/uts/common/syscall/memcntl.c
@@ -21,15 +21,13 @@
/*
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
- * Copyright 2012 Joyent, Inc. All rights reserved.
+ * Copyright (c) 2015 Joyent, Inc.
*/
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
/* All Rights Reserved */
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <sys/types.h>
#include <sys/bitmap.h>
#include <sys/sysmacros.h>
@@ -389,6 +387,10 @@ memcntl(caddr_t addr, size_t len, int cmd, caddr_t arg, int attr, int mask)
return (error);
}
break;
+ case MC_INHERIT_ZERO:
+ if (arg != 0 || attr != NULL || mask != 0)
+ return (set_errno(EINVAL));
+ break;
default:
return (set_errno(EINVAL));
}
diff --git a/usr/src/uts/common/vm/seg.h b/usr/src/uts/common/vm/seg.h
index 2e1e6a77de..343e308a82 100644
--- a/usr/src/uts/common/vm/seg.h
+++ b/usr/src/uts/common/vm/seg.h
@@ -21,6 +21,7 @@
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright (c) 2015, Joyent, Inc.
*/
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
@@ -39,8 +40,6 @@
#ifndef _VM_SEG_H
#define _VM_SEG_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <sys/vnode.h>
#include <sys/avl.h>
#include <vm/seg_enum.h>
@@ -142,6 +141,7 @@ struct seg_ops {
int (*getmemid)(struct seg *, caddr_t, memid_t *);
struct lgrp_mem_policy_info *(*getpolicy)(struct seg *, caddr_t);
int (*capable)(struct seg *, segcapability_t);
+ int (*inherit)(struct seg *, caddr_t, size_t, uint_t);
};
#ifdef _KERNEL
@@ -238,6 +238,7 @@ extern segadvstat_t segadvstat;
#define SEGOP_GETMEMID(s, a, mp) (*(s)->s_ops->getmemid)((s), (a), (mp))
#define SEGOP_GETPOLICY(s, a) (*(s)->s_ops->getpolicy)((s), (a))
#define SEGOP_CAPABLE(s, c) (*(s)->s_ops->capable)((s), (c))
+#define SEGOP_INHERIT(s, a, l, b) (*(s)->s_ops->inherit)((s), (a), (l), (b))
#define seg_page(seg, addr) \
(((uintptr_t)((addr) - (seg)->s_base)) >> PAGESHIFT)
@@ -249,6 +250,11 @@ extern segadvstat_t segadvstat;
#define IE_RETRY -2 /* internal to seg layer */
#define IE_REATTACH -3 /* internal to seg layer */
+/* Values for SEGOP_INHERIT */
+#define SEGP_INH_ZERO 0x01
+
+int seg_inherit_notsup(struct seg *, caddr_t, size_t, uint_t);
+
/* Delay/retry factors for seg_p_mem_config_pre_del */
#define SEGP_PREDEL_DELAY_FACTOR 4
/*
diff --git a/usr/src/uts/common/vm/seg_dev.c b/usr/src/uts/common/vm/seg_dev.c
index 9d214024a5..6cf938a007 100644
--- a/usr/src/uts/common/vm/seg_dev.c
+++ b/usr/src/uts/common/vm/seg_dev.c
@@ -215,6 +215,7 @@ struct seg_ops segdev_ops = {
segdev_getmemid,
segdev_getpolicy,
segdev_capable,
+ seg_inherit_notsup
};
/*
diff --git a/usr/src/uts/common/vm/seg_kmem.c b/usr/src/uts/common/vm/seg_kmem.c
index 205aac9ded..90e1b73b70 100644
--- a/usr/src/uts/common/vm/seg_kmem.c
+++ b/usr/src/uts/common/vm/seg_kmem.c
@@ -797,6 +797,7 @@ static struct seg_ops segkmem_ops = {
segkmem_getmemid,
segkmem_getpolicy, /* getpolicy */
segkmem_capable, /* capable */
+ seg_inherit_notsup /* inherit */
};
int
diff --git a/usr/src/uts/common/vm/seg_kp.c b/usr/src/uts/common/vm/seg_kp.c
index 2fe1e5f17d..d33ff004f1 100644
--- a/usr/src/uts/common/vm/seg_kp.c
+++ b/usr/src/uts/common/vm/seg_kp.c
@@ -169,6 +169,7 @@ static struct seg_ops segkp_ops = {
segkp_getmemid, /* getmemid */
segkp_getpolicy, /* getpolicy */
segkp_capable, /* capable */
+ seg_inherit_notsup /* inherit */
};
diff --git a/usr/src/uts/common/vm/seg_kpm.c b/usr/src/uts/common/vm/seg_kpm.c
index 4f764588b1..0886513183 100644
--- a/usr/src/uts/common/vm/seg_kpm.c
+++ b/usr/src/uts/common/vm/seg_kpm.c
@@ -24,8 +24,6 @@
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
/*
* Kernel Physical Mapping (kpm) segment driver (segkpm).
*
@@ -136,6 +134,7 @@ static struct seg_ops segkpm_ops = {
SEGKPM_BADOP(int), /* getmemid */
SEGKPM_BADOP(lgrp_mem_policy_info_t *), /* getpolicy */
segkpm_capable, /* capable */
+ seg_inherit_notsup /* inherit */
};
/*
@@ -160,8 +159,8 @@ segkpm_create(struct seg *seg, void *argsp)
ASSERT(seg->s_as && RW_WRITE_HELD(&seg->s_as->a_lock));
ASSERT(btokpmp(seg->s_size) >= 1 &&
- kpmpageoff((uintptr_t)seg->s_base) == 0 &&
- kpmpageoff((uintptr_t)seg->s_base + seg->s_size) == 0);
+ kpmpageoff((uintptr_t)seg->s_base) == 0 &&
+ kpmpageoff((uintptr_t)seg->s_base + seg->s_size) == 0);
skd = kmem_zalloc(sizeof (struct segkpm_data), KM_SLEEP);
@@ -193,7 +192,7 @@ segkpm_create(struct seg *seg, void *argsp)
skd->skd_nvcolors = b->nvcolors;
p = skd->skd_va_select =
- kmem_zalloc(NCPU * b->nvcolors * sizeof (ushort_t), KM_SLEEP);
+ kmem_zalloc(NCPU * b->nvcolors * sizeof (ushort_t), KM_SLEEP);
for (i = 0; i < NCPU; i++)
for (j = 0; j < b->nvcolors; j++, p++)
diff --git a/usr/src/uts/common/vm/seg_map.c b/usr/src/uts/common/vm/seg_map.c
index a57d202a6a..1edb92e892 100644
--- a/usr/src/uts/common/vm/seg_map.c
+++ b/usr/src/uts/common/vm/seg_map.c
@@ -124,6 +124,7 @@ static struct seg_ops segmap_ops = {
segmap_getmemid, /* getmemid */
segmap_getpolicy, /* getpolicy */
segmap_capable, /* capable */
+ seg_inherit_notsup /* inherit */
};
/*
diff --git a/usr/src/uts/common/vm/seg_spt.c b/usr/src/uts/common/vm/seg_spt.c
index f087d5fc30..8d85fbaef7 100644
--- a/usr/src/uts/common/vm/seg_spt.c
+++ b/usr/src/uts/common/vm/seg_spt.c
@@ -109,6 +109,7 @@ struct seg_ops segspt_ops = {
SEGSPT_BADOP(int), /* getmemid */
segspt_getpolicy, /* getpolicy */
SEGSPT_BADOP(int), /* capable */
+ seg_inherit_notsup /* inherit */
};
static int segspt_shmdup(struct seg *seg, struct seg *newseg);
@@ -168,6 +169,7 @@ struct seg_ops segspt_shmops = {
segspt_shmgetmemid,
segspt_shmgetpolicy,
segspt_shmcapable,
+ seg_inherit_notsup
};
static void segspt_purge(struct seg *seg);
diff --git a/usr/src/uts/common/vm/seg_vn.c b/usr/src/uts/common/vm/seg_vn.c
index 0e07ce22f5..b33832c157 100644
--- a/usr/src/uts/common/vm/seg_vn.c
+++ b/usr/src/uts/common/vm/seg_vn.c
@@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 1986, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2014, Joyent, Inc. All rights reserved.
+ * Copyright 2015, Joyent, Inc. All rights reserved.
*/
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
@@ -113,6 +113,7 @@ static int segvn_getmemid(struct seg *seg, caddr_t addr,
memid_t *memidp);
static lgrp_mem_policy_info_t *segvn_getpolicy(struct seg *, caddr_t);
static int segvn_capable(struct seg *seg, segcapability_t capable);
+static int segvn_inherit(struct seg *, caddr_t, size_t, uint_t);
struct seg_ops segvn_ops = {
segvn_dup,
@@ -138,6 +139,7 @@ struct seg_ops segvn_ops = {
segvn_getmemid,
segvn_getpolicy,
segvn_capable,
+ segvn_inherit
};
/*
@@ -807,6 +809,7 @@ segvn_create(struct seg *seg, void *argsp)
svd->softlockcnt = 0;
svd->softlockcnt_sbase = 0;
svd->softlockcnt_send = 0;
+ svd->svn_inz = 0;
svd->rcookie = HAT_INVALID_REGION_COOKIE;
svd->pageswap = 0;
@@ -1465,6 +1468,81 @@ segvn_extend_next(
return (0);
}
+/*
+ * Duplicate all the pages in the segment. This may break COW sharing for a
+ * given page. If the page is marked with inherit zero set, then instead of
+ * duplicating the page, we zero the page.
+ */
+static int
+segvn_dup_pages(struct seg *seg, struct seg *newseg)
+{
+ int error;
+ uint_t prot;
+ page_t *pp;
+ struct anon *ap, *newap;
+ size_t i;
+ caddr_t addr;
+
+ struct segvn_data *svd = (struct segvn_data *)seg->s_data;
+ struct segvn_data *newsvd = (struct segvn_data *)newseg->s_data;
+ ulong_t old_idx = svd->anon_index;
+ ulong_t new_idx = 0;
+
+ i = btopr(seg->s_size);
+ addr = seg->s_base;
+
+ /*
+ * XXX break cow sharing using PAGESIZE
+ * pages. They will be relocated into larger
+ * pages at fault time.
+ */
+ while (i-- > 0) {
+ if ((ap = anon_get_ptr(svd->amp->ahp, old_idx)) != NULL) {
+ struct vpage *vpp;
+
+ vpp = &svd->vpage[seg_page(seg, addr)];
+
+ /*
+ * prot need not be computed below 'cause anon_private
+ * is going to ignore it anyway as child doesn't inherit
+ * pagelock from parent.
+ */
+ prot = svd->pageprot ? VPP_PROT(vpp) : svd->prot;
+
+ /*
+ * Check whether we should zero this or dup it.
+ */
+ if (svd->svn_inz == SEGVN_INZ_ALL ||
+ (svd->svn_inz == SEGVN_INZ_VPP &&
+ VPP_ISINHZERO(vpp))) {
+ pp = anon_zero(newseg, addr, &newap,
+ newsvd->cred);
+ } else {
+ page_t *anon_pl[1+1];
+ uint_t vpprot;
+ error = anon_getpage(&ap, &vpprot, anon_pl,
+ PAGESIZE, seg, addr, S_READ, svd->cred);
+ if (error != 0)
+ return (error);
+
+ pp = anon_private(&newap, newseg, addr, prot,
+ anon_pl[0], 0, newsvd->cred);
+ }
+ if (pp == NULL) {
+ return (ENOMEM);
+ }
+ (void) anon_set_ptr(newsvd->amp->ahp, new_idx, newap,
+ ANON_SLEEP);
+ page_unlock(pp);
+ }
+ addr += PAGESIZE;
+ old_idx++;
+ new_idx++;
+ }
+
+ return (0);
+}
+
static int
segvn_dup(struct seg *seg, struct seg *newseg)
{
@@ -1472,7 +1550,6 @@ segvn_dup(struct seg *seg, struct seg *newseg)
struct segvn_data *newsvd;
pgcnt_t npages = seg_pages(seg);
int error = 0;
- uint_t prot;
size_t len;
struct anon_map *amp;
@@ -1516,6 +1593,7 @@ segvn_dup(struct seg *seg, struct seg *newseg)
crhold(newsvd->cred);
newsvd->advice = svd->advice;
newsvd->pageadvice = svd->pageadvice;
+ newsvd->svn_inz = svd->svn_inz;
newsvd->swresv = svd->swresv;
newsvd->pageswap = svd->pageswap;
newsvd->flags = svd->flags;
@@ -1545,6 +1623,7 @@ segvn_dup(struct seg *seg, struct seg *newseg)
ASSERT(svd->tr_state == SEGVN_TR_OFF);
newsvd->tr_state = SEGVN_TR_OFF;
if (svd->type == MAP_SHARED) {
+ ASSERT(svd->svn_inz == SEGVN_INZ_NONE);
newsvd->amp = amp;
ANON_LOCK_ENTER(&amp->a_rwlock, RW_WRITER);
amp->refcnt++;
@@ -1560,6 +1639,9 @@ segvn_dup(struct seg *seg, struct seg *newseg)
ANON_SLEEP);
newsvd->amp->a_szc = newseg->s_szc;
newsvd->anon_index = 0;
+ ASSERT(svd->svn_inz == SEGVN_INZ_NONE ||
+ svd->svn_inz == SEGVN_INZ_ALL ||
+ svd->svn_inz == SEGVN_INZ_VPP);
/*
* We don't have to acquire the anon_map lock
@@ -1583,17 +1665,16 @@ segvn_dup(struct seg *seg, struct seg *newseg)
* The strategy here is to just break the
* sharing on pages that could possibly be
* softlocked.
+ *
+ * In addition, if any pages have been marked that they
+ * should be inherited as zero, then we immediately go
+ * ahead and break COW and zero them. In the case of a
+ * softlocked page that should be inherited zero, we
+ * break COW and just get a zero page.
*/
retry:
- if (svd->softlockcnt) {
- struct anon *ap, *newap;
- size_t i;
- uint_t vpprot;
- page_t *anon_pl[1+1], *pp;
- caddr_t addr;
- ulong_t old_idx = svd->anon_index;
- ulong_t new_idx = 0;
-
+ if (svd->softlockcnt ||
+ svd->svn_inz != SEGVN_INZ_NONE) {
/*
* The softlock count might be non zero
* because some pages are still stuck in the
@@ -1603,59 +1684,16 @@ retry:
* pages]. Note, we have the writers lock so
* nothing gets inserted during the flush.
*/
- if (reclaim == 1) {
+ if (svd->softlockcnt && reclaim == 1) {
segvn_purge(seg);
reclaim = 0;
goto retry;
}
- i = btopr(seg->s_size);
- addr = seg->s_base;
- /*
- * XXX break cow sharing using PAGESIZE
- * pages. They will be relocated into larger
- * pages at fault time.
- */
- while (i-- > 0) {
- if (ap = anon_get_ptr(amp->ahp,
- old_idx)) {
- error = anon_getpage(&ap,
- &vpprot, anon_pl, PAGESIZE,
- seg, addr, S_READ,
- svd->cred);
- if (error) {
- newsvd->vpage = NULL;
- goto out;
- }
- /*
- * prot need not be computed
- * below 'cause anon_private is
- * going to ignore it anyway
- * as child doesn't inherit
- * pagelock from parent.
- */
- prot = svd->pageprot ?
- VPP_PROT(
- &svd->vpage[
- seg_page(seg, addr)])
- : svd->prot;
- pp = anon_private(&newap,
- newseg, addr, prot,
- anon_pl[0], 0,
- newsvd->cred);
- if (pp == NULL) {
- /* no mem abort */
- newsvd->vpage = NULL;
- error = ENOMEM;
- goto out;
- }
- (void) anon_set_ptr(
- newsvd->amp->ahp, new_idx,
- newap, ANON_SLEEP);
- page_unlock(pp);
- }
- addr += PAGESIZE;
- old_idx++;
- new_idx++;
+
+ error = segvn_dup_pages(seg, newseg);
+ if (error != 0) {
+ newsvd->vpage = NULL;
+ goto out;
}
} else { /* common case */
if (seg->s_szc != 0) {
@@ -2192,6 +2230,7 @@ retry:
nsvd->softlockcnt = 0;
nsvd->softlockcnt_sbase = 0;
nsvd->softlockcnt_send = 0;
+ nsvd->svn_inz = svd->svn_inz;
ASSERT(nsvd->rcookie == HAT_INVALID_REGION_COOKIE);
if (svd->vp != NULL) {
@@ -8004,7 +8043,7 @@ out:
/*
* Set advice from user for specified pages
- * There are 5 types of advice:
+ * There are 9 types of advice:
* MADV_NORMAL - Normal (default) behavior (whatever that is)
* MADV_RANDOM - Random page references
* do not allow readahead or 'klustering'
@@ -8486,6 +8525,81 @@ segvn_advise(struct seg *seg, caddr_t addr, size_t len, uint_t behav)
}
/*
+ * There is one kind of inheritance that can be specified for pages:
+ *
+ * SEGP_INH_ZERO - Pages should be zeroed in the child
+ */
+static int
+segvn_inherit(struct seg *seg, caddr_t addr, size_t len, uint_t behav)
+{
+ struct segvn_data *svd = (struct segvn_data *)seg->s_data;
+ struct vpage *bvpp, *evpp;
+ size_t page;
+ int ret = 0;
+
+ ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
+
+ /* Can't support something we don't know about */
+ if (behav != SEGP_INH_ZERO)
+ return (ENOTSUP);
+
+ SEGVN_LOCK_ENTER(seg->s_as, &svd->lock, RW_WRITER);
+
+ /*
+ * This must be a straightforward anonymous segment that is mapped
+ * privately and is not backed by a vnode.
+ */
+ if (svd->tr_state != SEGVN_TR_OFF ||
+ svd->type != MAP_PRIVATE ||
+ svd->vp != NULL) {
+ ret = EINVAL;
+ goto out;
+ }
+
+ /*
+ * If the entire segment has been marked as inherit zero, then no reason
+ * to do anything else.
+ */
+ if (svd->svn_inz == SEGVN_INZ_ALL) {
+ ret = 0;
+ goto out;
+ }
+
+ /*
+ * If this applies to the entire segment, simply mark it and we're done.
+ */
+ if ((addr == seg->s_base) && (len == seg->s_size)) {
+ svd->svn_inz = SEGVN_INZ_ALL;
+ ret = 0;
+ goto out;
+ }
+
+ /*
+ * We've been asked to mark a subset of this segment as inherit zero,
+ * therefore we need to mainpulate its vpages.
+ */
+ if (svd->vpage == NULL) {
+ segvn_vpage(seg);
+ if (svd->vpage == NULL) {
+ ret = ENOMEM;
+ goto out;
+ }
+ }
+
+ svd->svn_inz = SEGVN_INZ_VPP;
+ page = seg_page(seg, addr);
+ bvpp = &svd->vpage[page];
+ evpp = &svd->vpage[page + (len >> PAGESHIFT)];
+ for (; bvpp < evpp; bvpp++)
+ VPP_SETINHZERO(bvpp);
+ ret = 0;
+
+out:
+ SEGVN_LOCK_EXIT(seg->s_as, &svd->lock);
+ return (ret);
+}
+
+/*
* Create a vpage structure for this seg.
*/
static void
diff --git a/usr/src/uts/common/vm/seg_vn.h b/usr/src/uts/common/vm/seg_vn.h
index f94e0cb873..51ebda3a84 100644
--- a/usr/src/uts/common/vm/seg_vn.h
+++ b/usr/src/uts/common/vm/seg_vn.h
@@ -21,6 +21,7 @@
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright (c) 2015, Joyent, Inc. All rights reserved.
*/
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
@@ -98,6 +99,7 @@ typedef struct segvn_data {
size_t swresv; /* swap space reserved for this segment */
uchar_t advice; /* madvise flags for segment */
uchar_t pageadvice; /* true if per page advice set */
+ uchar_t svn_inz; /* true if pages marked as inherit zero */
ushort_t flags; /* flags - from sys/mman.h */
spgcnt_t softlockcnt; /* # of pages SOFTLOCKED in seg */
lgrp_mem_policy_info_t policy_info; /* memory allocation policy */
@@ -122,6 +124,13 @@ typedef struct segvn_data {
#define SEGVN_TR_OFF (2) /* Text replication is disabled */
/*
+ * Inherit zero states
+ */
+#define SEGVN_INZ_NONE (0) /* Nothing in the segment is inherit zero */
+#define SEGVN_INZ_ALL (1) /* Everything in the segment is inherit zero */
+#define SEGVN_INZ_VPP (2) /* Check struct vpages for inherit zero */
+
+/*
* Macros for segvn segment driver locking.
*/
#define SEGVN_LOCK_ENTER(as, lock, type) rw_enter((lock), (type))
diff --git a/usr/src/uts/common/vm/vm_as.c b/usr/src/uts/common/vm/vm_as.c
index 8caa257486..992254938f 100644
--- a/usr/src/uts/common/vm/vm_as.c
+++ b/usr/src/uts/common/vm/vm_as.c
@@ -21,7 +21,7 @@
/*
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
- * Copyright 2012, Joyent, Inc. All rights reserved.
+ * Copyright 2015, Joyent, Inc. All rights reserved.
*/
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
@@ -2566,6 +2566,19 @@ retry:
}
break;
+ case MC_INHERIT_ZERO:
+ if (seg->s_ops->inherit == NULL) {
+ error = ENOTSUP;
+ } else {
+ error = SEGOP_INHERIT(seg, raddr, ssize,
+ SEGP_INH_ZERO);
+ }
+ if (error != 0) {
+ AS_LOCK_EXIT(as, &as->a_lock);
+ return (error);
+ }
+ break;
+
/*
* Can't happen.
*/
diff --git a/usr/src/uts/common/vm/vm_seg.c b/usr/src/uts/common/vm/vm_seg.c
index 65c6c5ecdc..e54401ddeb 100644
--- a/usr/src/uts/common/vm/vm_seg.c
+++ b/usr/src/uts/common/vm/vm_seg.c
@@ -21,6 +21,7 @@
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright (c) 2015, Joyent, Inc.
*/
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
@@ -1851,3 +1852,13 @@ seg_swresv(struct seg *seg)
}
return (swap);
}
+
+/*
+ * General not supported function for SEGOP_INHERIT
+ */
+/* ARGSUSED */
+int
+seg_inherit_notsup(struct seg *seg, caddr_t addr, size_t len, uint_t op)
+{
+ return (ENOTSUP);
+}
diff --git a/usr/src/uts/common/vm/vpage.h b/usr/src/uts/common/vm/vpage.h
index 368bc629ff..5eaefb9738 100644
--- a/usr/src/uts/common/vm/vpage.h
+++ b/usr/src/uts/common/vm/vpage.h
@@ -21,6 +21,7 @@
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright (c) 2015, Joyent, Inc. All rights reserved.
*/
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
@@ -39,8 +40,6 @@
#ifndef _VM_VPAGE_H
#define _VM_VPAGE_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#ifdef __cplusplus
extern "C" {
#endif
@@ -57,9 +56,10 @@ struct vpage {
* This was changed from a bitfield to flags/macros in order
* to conserve space (uchar_t bitfields are not ANSI). This could
* have been condensed to a uchar_t, but at the expense of complexity.
- * We've stolen two bits from the top of nvp_advice: the first to store
- * pplock, and the second to identify pages for which we have reserved
- * swap space, but have not necessarily allocated anon slots.
+ * We've stolen three bits from the top of nvp_advice: the first to store
+ * pplock, the second to identify pages for which we have reserved
+ * swap space, but have not necessarily allocated anon slots, and the third to
+ * indicate that the page should be zeroed on fork.
*
* WARNING: VPP_SETADVICE(vpp, x) evaluates vpp twice, and VPP_PLOCK(vpp)
* returns a positive integer when the lock is held, not necessarily (1).
@@ -69,6 +69,7 @@ struct vpage {
#define VP_PPLOCK_SHIFT (0x07) /* offset of lock hiding inside nvp_advice */
#define VP_SWAPRES_MASK (0x40) /* Swap space has been reserved, but we */
/* might not have allocated an anon slot */
+#define VP_INHZERO_MASK (0x20) /* zero page on fork() */
#define VPP_PROT(vpp) ((vpp)->nvp_prot)
#define VPP_ADVICE(vpp) ((vpp)->nvp_advice & VP_ADVICE_MASK)
@@ -76,6 +77,8 @@ struct vpage {
((uchar_t)((vpp)->nvp_advice & VP_PPLOCK_MASK))
#define VPP_ISSWAPRES(vpp) \
((uchar_t)((vpp)->nvp_advice & VP_SWAPRES_MASK))
+#define VPP_ISINHZERO(vpp) \
+ ((uchar_t)((vpp)->nvp_advice & VP_INHZERO_MASK))
#define VPP_SETPROT(vpp, x) ((vpp)->nvp_prot = (x))
#define VPP_SETADVICE(vpp, x) \
@@ -85,6 +88,8 @@ struct vpage {
#define VPP_CLRPPLOCK(vpp) ((vpp)->nvp_advice &= ~VP_PPLOCK_MASK)
#define VPP_SETSWAPRES(vpp) ((vpp)->nvp_advice |= VP_SWAPRES_MASK)
#define VPP_CLRSWAPRES(vpp) ((vpp)->nvp_advice &= ~VP_SWAPRES_MASK)
+#define VPP_SETINHZERO(vpp) ((vpp)->nvp_advice |= VP_INHZERO_MASK)
+#define VPP_CLRINHZERO(vpp) ((vpp)->nvp_advice &= ~VP_INHZERO_MASK)
#ifdef __cplusplus
}