summaryrefslogtreecommitdiff
path: root/usr/src/uts/common/syscall
diff options
context:
space:
mode:
Diffstat (limited to 'usr/src/uts/common/syscall')
-rw-r--r--usr/src/uts/common/syscall/brandsys.c8
-rw-r--r--usr/src/uts/common/syscall/fcntl.c3
-rw-r--r--usr/src/uts/common/syscall/memcntl.c8
-rw-r--r--usr/src/uts/common/syscall/open.c8
-rw-r--r--usr/src/uts/common/syscall/poll.c328
-rw-r--r--usr/src/uts/common/syscall/rusagesys.c14
-rw-r--r--usr/src/uts/common/syscall/rw.c222
-rw-r--r--usr/src/uts/common/syscall/sendfile.c19
-rw-r--r--usr/src/uts/common/syscall/stat.c2
-rw-r--r--usr/src/uts/common/syscall/sysconfig.c24
-rw-r--r--usr/src/uts/common/syscall/uadmin.c6
11 files changed, 415 insertions, 227 deletions
diff --git a/usr/src/uts/common/syscall/brandsys.c b/usr/src/uts/common/syscall/brandsys.c
index 9b4bd38baa..8ee5511fd0 100644
--- a/usr/src/uts/common/syscall/brandsys.c
+++ b/usr/src/uts/common/syscall/brandsys.c
@@ -23,7 +23,9 @@
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
+/*
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
+ */
#include <sys/brand.h>
#include <sys/systm.h>
@@ -35,7 +37,7 @@
*/
int64_t
brandsys(int cmd, uintptr_t arg1, uintptr_t arg2, uintptr_t arg3,
- uintptr_t arg4, uintptr_t arg5, uintptr_t arg6)
+ uintptr_t arg4, uintptr_t arg5)
{
struct proc *p = curthread->t_procp;
int64_t rval = 0;
@@ -49,7 +51,7 @@ brandsys(int cmd, uintptr_t arg1, uintptr_t arg2, uintptr_t arg3,
return (set_errno(ENOSYS));
if ((err = ZBROP(p->p_zone)->b_brandsys(cmd, &rval, arg1, arg2, arg3,
- arg4, arg5, arg6)) != 0)
+ arg4, arg5)) != 0)
return (set_errno(err));
return (rval);
diff --git a/usr/src/uts/common/syscall/fcntl.c b/usr/src/uts/common/syscall/fcntl.c
index 371bc83c29..d631fe62f6 100644
--- a/usr/src/uts/common/syscall/fcntl.c
+++ b/usr/src/uts/common/syscall/fcntl.c
@@ -54,7 +54,8 @@
#include <sys/cmn_err.h>
-static int flock_check(vnode_t *, flock64_t *, offset_t, offset_t);
+/* This is global so that it can be used by brand emulation. */
+int flock_check(vnode_t *, flock64_t *, offset_t, offset_t);
static int flock_get_start(vnode_t *, flock64_t *, offset_t, u_offset_t *);
static void fd_too_big(proc_t *);
diff --git a/usr/src/uts/common/syscall/memcntl.c b/usr/src/uts/common/syscall/memcntl.c
index 1ee4b6a395..721f884a7e 100644
--- a/usr/src/uts/common/syscall/memcntl.c
+++ b/usr/src/uts/common/syscall/memcntl.c
@@ -115,13 +115,17 @@ memcntl(caddr_t addr, size_t len, int cmd, caddr_t arg, int attr, int mask)
* MS_SYNC used to be defined to be zero but is now non-zero.
* For binary compatibility we still accept zero
* (the absence of MS_ASYNC) to mean the same thing.
+ * Binary compatibility is not an issue for MS_INVALCURPROC.
*/
iarg = (uintptr_t)arg;
if ((iarg & ~MS_INVALIDATE) == 0)
iarg |= MS_SYNC;
- if (((iarg & ~(MS_SYNC|MS_ASYNC|MS_INVALIDATE)) != 0) ||
- ((iarg & (MS_SYNC|MS_ASYNC)) == (MS_SYNC|MS_ASYNC))) {
+ if (((iarg &
+ ~(MS_SYNC|MS_ASYNC|MS_INVALIDATE|MS_INVALCURPROC)) != 0) ||
+ ((iarg & (MS_SYNC|MS_ASYNC)) == (MS_SYNC|MS_ASYNC)) ||
+ ((iarg & (MS_INVALIDATE|MS_INVALCURPROC)) ==
+ (MS_INVALIDATE|MS_INVALCURPROC))) {
error = set_errno(EINVAL);
} else {
error = as_ctl(as, addr, len, cmd, attr, iarg, NULL, 0);
diff --git a/usr/src/uts/common/syscall/open.c b/usr/src/uts/common/syscall/open.c
index edb04c824b..874e31869c 100644
--- a/usr/src/uts/common/syscall/open.c
+++ b/usr/src/uts/common/syscall/open.c
@@ -74,12 +74,12 @@ copen(int startfd, char *fname, int filemode, int createmode)
if (filemode & (FSEARCH|FEXEC)) {
/*
- * Must be one or the other and neither FREAD nor FWRITE
+ * Must be one or the other.
* Must not be any of FAPPEND FCREAT FTRUNC FXATTR FXATTRDIROPEN
- * XXX: Should these just be silently ignored?
+ * XXX: Should these just be silently ignored like we
+ * silently ignore FREAD|FWRITE?
*/
- if ((filemode & (FREAD|FWRITE)) ||
- (filemode & (FSEARCH|FEXEC)) == (FSEARCH|FEXEC) ||
+ if ((filemode & (FSEARCH|FEXEC)) == (FSEARCH|FEXEC) ||
(filemode & (FAPPEND|FCREAT|FTRUNC|FXATTR|FXATTRDIROPEN)))
return (set_errno(EINVAL));
}
diff --git a/usr/src/uts/common/syscall/poll.c b/usr/src/uts/common/syscall/poll.c
index cc125f127a..3d0a5cc04b 100644
--- a/usr/src/uts/common/syscall/poll.c
+++ b/usr/src/uts/common/syscall/poll.c
@@ -29,7 +29,7 @@
/*
* Copyright (c) 2012 by Delphix. All rights reserved.
- * Copyright 2015, Joyent, Inc.
+ * Copyright 2016, Joyent, Inc.
*/
/*
@@ -317,20 +317,58 @@ polllock(pollhead_t *php, kmutex_t *lp)
return (0);
}
-static int
-poll_common(pollfd_t *fds, nfds_t nfds, timespec_t *tsp, k_sigset_t *ksetp)
+int
+poll_copyin(pollstate_t *ps, pollfd_t *fds, nfds_t nfds)
+{
+ pollfd_t *pollfdp;
+ nfds_t old_nfds;
+
+ /*
+ * NOTE: for performance, buffers are saved across poll() calls.
+ * The theory is that if a process polls heavily, it tends to poll
+ * on the same set of descriptors. Therefore, we only reallocate
+ * buffers when nfds changes. There is no hysteresis control,
+ * because there is no data to suggest that this is necessary;
+ * the penalty of reallocating is not *that* great in any event.
+ */
+ old_nfds = ps->ps_nfds;
+ if (nfds != old_nfds) {
+ kmem_free(ps->ps_pollfd, old_nfds * sizeof (pollfd_t));
+ pollfdp = kmem_alloc(nfds * sizeof (pollfd_t), KM_SLEEP);
+ ps->ps_pollfd = pollfdp;
+ ps->ps_nfds = nfds;
+ }
+
+ pollfdp = ps->ps_pollfd;
+ if (copyin(fds, pollfdp, nfds * sizeof (pollfd_t))) {
+ return (EFAULT);
+ }
+
+ if (fds == NULL) {
+ /*
+ * If the process has page 0 mapped, then the copyin() above
+ * will succeed even if fds is NULL. However, our cached
+ * poll lists are keyed by the address of the passed-in fds
+ * structure, and we use the value NULL to indicate an unused
+ * poll cache list entry. As such, we elect not to support
+ * NULL as a valid (user) memory address and fail the poll()
+ * call.
+ */
+ return (EFAULT);
+ }
+ return (0);
+}
+
+int
+poll_common(pollstate_t *ps, pollfd_t *fds, nfds_t nfds, timespec_t *tsp,
+ int *fdcnt)
{
kthread_t *t = curthread;
- klwp_t *lwp = ttolwp(t);
proc_t *p = ttoproc(t);
- int fdcnt = 0;
- int i;
hrtime_t deadline; /* hrtime value when we want to return */
pollfd_t *pollfdp;
- pollstate_t *ps;
pollcache_t *pcp;
int error = 0;
- nfds_t old_nfds;
int cacheindex = 0; /* which cache set is used */
/*
@@ -348,32 +386,11 @@ poll_common(pollfd_t *fds, nfds_t nfds, timespec_t *tsp, k_sigset_t *ksetp)
}
/*
- * Reset our signal mask, if requested.
- */
- if (ksetp != NULL) {
- mutex_enter(&p->p_lock);
- schedctl_finish_sigblock(t);
- lwp->lwp_sigoldmask = t->t_hold;
- t->t_hold = *ksetp;
- t->t_flag |= T_TOMASK;
- /*
- * Call cv_reltimedwait_sig() just to check for signals.
- * We will return immediately with either 0 or -1.
- */
- if (!cv_reltimedwait_sig(&t->t_delay_cv, &p->p_lock, 0,
- TR_CLOCK_TICK)) {
- mutex_exit(&p->p_lock);
- error = EINTR;
- goto pollout;
- }
- mutex_exit(&p->p_lock);
- }
-
- /*
- * Check to see if this guy just wants to use poll() as a timeout.
+ * Check to see if the caller just wants to use poll() as a timeout.
* If yes then bypass all the other stuff and make him sleep.
*/
if (nfds == 0) {
+ *fdcnt = 0;
/*
* Sleep until we have passed the requested future
* time or until interrupted by a signal.
@@ -385,66 +402,14 @@ poll_common(pollfd_t *fds, nfds_t nfds, timespec_t *tsp, k_sigset_t *ksetp)
&t->t_delay_lock, deadline)) > 0)
continue;
mutex_exit(&t->t_delay_lock);
- error = (error == 0) ? EINTR : 0;
+ return ((error == 0) ? EINTR : 0);
}
- goto pollout;
- }
-
- if (nfds > p->p_fno_ctl) {
- mutex_enter(&p->p_lock);
- (void) rctl_action(rctlproc_legacy[RLIMIT_NOFILE],
- p->p_rctls, p, RCA_SAFE);
- mutex_exit(&p->p_lock);
- error = EINVAL;
- goto pollout;
- }
-
- /*
- * Need to allocate memory for pollstate before anything because
- * the mutex and cv are created in this space
- */
- ps = pollstate_create();
-
- if (ps->ps_pcache == NULL)
- ps->ps_pcache = pcache_alloc();
- pcp = ps->ps_pcache;
-
- /*
- * NOTE: for performance, buffers are saved across poll() calls.
- * The theory is that if a process polls heavily, it tends to poll
- * on the same set of descriptors. Therefore, we only reallocate
- * buffers when nfds changes. There is no hysteresis control,
- * because there is no data to suggest that this is necessary;
- * the penalty of reallocating is not *that* great in any event.
- */
- old_nfds = ps->ps_nfds;
- if (nfds != old_nfds) {
-
- kmem_free(ps->ps_pollfd, old_nfds * sizeof (pollfd_t));
- pollfdp = kmem_alloc(nfds * sizeof (pollfd_t), KM_SLEEP);
- ps->ps_pollfd = pollfdp;
- ps->ps_nfds = nfds;
+ return (0);
}
+ VERIFY(ps != NULL);
pollfdp = ps->ps_pollfd;
- if (copyin(fds, pollfdp, nfds * sizeof (pollfd_t))) {
- error = EFAULT;
- goto pollout;
- }
-
- if (fds == NULL) {
- /*
- * If the process has page 0 mapped, then the copyin() above
- * will succeed even if fds is NULL. However, our cached
- * poll lists are keyed by the address of the passed-in fds
- * structure, and we use the value NULL to indicate an unused
- * poll cache list entry. As such, we elect not to support
- * NULL as a valid (user) memory address and fail the poll()
- * call.
- */
- error = EINVAL;
- goto pollout;
- }
+ VERIFY(pollfdp != NULL);
/*
* If this thread polls for the first time, allocate ALL poll
@@ -460,10 +425,10 @@ poll_common(pollfd_t *fds, nfds_t nfds, timespec_t *tsp, k_sigset_t *ksetp)
/*
* poll and cache this poll fd list in ps_pcacheset[0].
*/
- error = pcacheset_cache_list(ps, fds, &fdcnt, cacheindex);
- if (fdcnt || error) {
+ error = pcacheset_cache_list(ps, fds, fdcnt, cacheindex);
+ if (error || *fdcnt) {
mutex_exit(&ps->ps_lock);
- goto pollout;
+ return (error);
}
} else {
pollcacheset_t *pcset = ps->ps_pcacheset;
@@ -488,11 +453,11 @@ poll_common(pollfd_t *fds, nfds_t nfds, timespec_t *tsp, k_sigset_t *ksetp)
* the callee will guarantee the consistency
* of cached poll list and cache content.
*/
- error = pcacheset_resolve(ps, nfds, &fdcnt,
+ error = pcacheset_resolve(ps, nfds, fdcnt,
cacheindex);
if (error) {
mutex_exit(&ps->ps_lock);
- goto pollout;
+ return (error);
}
break;
}
@@ -509,11 +474,11 @@ poll_common(pollfd_t *fds, nfds_t nfds, timespec_t *tsp, k_sigset_t *ksetp)
* found an unused entry. Use it to cache
* this poll list.
*/
- error = pcacheset_cache_list(ps, fds, &fdcnt,
+ error = pcacheset_cache_list(ps, fds, fdcnt,
cacheindex);
- if (fdcnt || error) {
+ if (error || *fdcnt) {
mutex_exit(&ps->ps_lock);
- goto pollout;
+ return (error);
}
break;
}
@@ -527,10 +492,10 @@ poll_common(pollfd_t *fds, nfds_t nfds, timespec_t *tsp, k_sigset_t *ksetp)
cacheindex = pcacheset_replace(ps);
ASSERT(cacheindex < ps->ps_nsets);
pcset[cacheindex].pcs_usradr = (uintptr_t)fds;
- error = pcacheset_resolve(ps, nfds, &fdcnt, cacheindex);
+ error = pcacheset_resolve(ps, nfds, fdcnt, cacheindex);
if (error) {
mutex_exit(&ps->ps_lock);
- goto pollout;
+ return (error);
}
}
}
@@ -548,8 +513,8 @@ poll_common(pollfd_t *fds, nfds_t nfds, timespec_t *tsp, k_sigset_t *ksetp)
mutex_enter(&pcp->pc_lock);
for (;;) {
pcp->pc_flag = 0;
- error = pcache_poll(pollfdp, ps, nfds, &fdcnt, cacheindex);
- if (fdcnt || error) {
+ error = pcache_poll(pollfdp, ps, nfds, fdcnt, cacheindex);
+ if (error || *fdcnt) {
mutex_exit(&pcp->pc_lock);
mutex_exit(&ps->ps_lock);
break;
@@ -595,13 +560,116 @@ poll_common(pollfd_t *fds, nfds_t nfds, timespec_t *tsp, k_sigset_t *ksetp)
mutex_enter(&pcp->pc_lock);
}
+ return (error);
+}
+
+/*
+ * This is the system call trap that poll(),
+ * select() and pselect() are built upon.
+ * It is a private interface between libc and the kernel.
+ */
+int
+pollsys(pollfd_t *fds, nfds_t nfds, timespec_t *timeoutp, sigset_t *setp)
+{
+ kthread_t *t = curthread;
+ klwp_t *lwp = ttolwp(t);
+ proc_t *p = ttoproc(t);
+ timespec_t ts;
+ timespec_t *tsp;
+ k_sigset_t kset;
+ pollstate_t *ps = NULL;
+ pollfd_t *pollfdp = NULL;
+ int error = 0, fdcnt = 0;
+
+ /*
+ * Copy in timeout
+ */
+ if (timeoutp == NULL) {
+ tsp = NULL;
+ } else {
+ if (get_udatamodel() == DATAMODEL_NATIVE) {
+ if (copyin(timeoutp, &ts, sizeof (ts)))
+ return (set_errno(EFAULT));
+ } else {
+ timespec32_t ts32;
+
+ if (copyin(timeoutp, &ts32, sizeof (ts32)))
+ return (set_errno(EFAULT));
+ TIMESPEC32_TO_TIMESPEC(&ts, &ts32)
+ }
+
+ if (itimerspecfix(&ts))
+ return (set_errno(EINVAL));
+ tsp = &ts;
+ }
+
+ /*
+ * Copy in and reset signal mask, if requested.
+ */
+ if (setp != NULL) {
+ sigset_t set;
+
+ if (copyin(setp, &set, sizeof (set)))
+ return (set_errno(EFAULT));
+ sigutok(&set, &kset);
+
+ mutex_enter(&p->p_lock);
+ schedctl_finish_sigblock(t);
+ lwp->lwp_sigoldmask = t->t_hold;
+ t->t_hold = kset;
+ t->t_flag |= T_TOMASK;
+ /*
+ * Call cv_reltimedwait_sig() just to check for signals.
+ * We will return immediately with either 0 or -1.
+ */
+ if (!cv_reltimedwait_sig(&t->t_delay_cv, &p->p_lock, 0,
+ TR_CLOCK_TICK)) {
+ mutex_exit(&p->p_lock);
+ error = EINTR;
+ goto pollout;
+ }
+ mutex_exit(&p->p_lock);
+ }
+
+ /*
+ * Initialize pollstate and copy in pollfd data if present.
+ * If nfds == 0, we will skip all of the copying and check steps and
+ * proceed directly into poll_common to process the supplied timeout.
+ */
+ if (nfds != 0) {
+ if (nfds > p->p_fno_ctl) {
+ mutex_enter(&p->p_lock);
+ (void) rctl_action(rctlproc_legacy[RLIMIT_NOFILE],
+ p->p_rctls, p, RCA_SAFE);
+ mutex_exit(&p->p_lock);
+ error = EINVAL;
+ goto pollout;
+ }
+
+ /*
+ * Need to allocate memory for pollstate before anything
+ * because the mutex and cv are created in this space
+ */
+ ps = pollstate_create();
+ if (ps->ps_pcache == NULL)
+ ps->ps_pcache = pcache_alloc();
+
+ if ((error = poll_copyin(ps, fds, nfds)) != 0)
+ goto pollout;
+ pollfdp = ps->ps_pollfd;
+ }
+
+ /*
+ * Perform the actual poll.
+ */
+ error = poll_common(ps, fds, nfds, tsp, &fdcnt);
+
pollout:
/*
- * If we changed the signal mask but we received
- * no signal then restore the signal mask.
- * Otherwise psig() will deal with the signal mask.
+ * If we changed the signal mask but we received no signal then restore
+ * the signal mask. Otherwise psig() will deal with the signal mask.
*/
- if (ksetp != NULL) {
+ if (setp != NULL) {
mutex_enter(&p->p_lock);
if (lwp->lwp_cursig == 0) {
t->t_hold = lwp->lwp_sigoldmask;
@@ -612,12 +680,10 @@ pollout:
if (error)
return (set_errno(error));
-
/*
* Copy out the events and return the fdcnt to the user.
*/
- if (nfds != 0 &&
- copyout(pollfdp, fds, nfds * sizeof (pollfd_t)))
+ if (nfds != 0 && copyout(pollfdp, fds, nfds * sizeof (pollfd_t)))
return (set_errno(EFAULT));
#ifdef DEBUG
@@ -625,7 +691,7 @@ pollout:
* Another sanity check:
*/
if (fdcnt) {
- int reventcnt = 0;
+ int i, reventcnt = 0;
for (i = 0; i < nfds; i++) {
if (pollfdp[i].fd < 0) {
@@ -638,6 +704,8 @@ pollout:
}
ASSERT(fdcnt == reventcnt);
} else {
+ int i;
+
for (i = 0; i < nfds; i++) {
ASSERT(pollfdp[i].revents == 0);
}
@@ -648,52 +716,6 @@ pollout:
}
/*
- * This is the system call trap that poll(),
- * select() and pselect() are built upon.
- * It is a private interface between libc and the kernel.
- */
-int
-pollsys(pollfd_t *fds, nfds_t nfds, timespec_t *timeoutp, sigset_t *setp)
-{
- timespec_t ts;
- timespec_t *tsp;
- sigset_t set;
- k_sigset_t kset;
- k_sigset_t *ksetp;
- model_t datamodel = get_udatamodel();
-
- if (timeoutp == NULL)
- tsp = NULL;
- else {
- if (datamodel == DATAMODEL_NATIVE) {
- if (copyin(timeoutp, &ts, sizeof (ts)))
- return (set_errno(EFAULT));
- } else {
- timespec32_t ts32;
-
- if (copyin(timeoutp, &ts32, sizeof (ts32)))
- return (set_errno(EFAULT));
- TIMESPEC32_TO_TIMESPEC(&ts, &ts32)
- }
-
- if (itimerspecfix(&ts))
- return (set_errno(EINVAL));
- tsp = &ts;
- }
-
- if (setp == NULL)
- ksetp = NULL;
- else {
- if (copyin(setp, &set, sizeof (set)))
- return (set_errno(EFAULT));
- sigutok(&set, &kset);
- ksetp = &kset;
- }
-
- return (poll_common(fds, nfds, tsp, ksetp));
-}
-
-/*
* Clean up any state left around by poll(2). Called when a thread exits.
*/
void
diff --git a/usr/src/uts/common/syscall/rusagesys.c b/usr/src/uts/common/syscall/rusagesys.c
index 3e0e63f4c0..417c629168 100644
--- a/usr/src/uts/common/syscall/rusagesys.c
+++ b/usr/src/uts/common/syscall/rusagesys.c
@@ -21,6 +21,7 @@
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2014 Joyent, Inc. All rights reserved.
*/
/*
@@ -257,6 +258,19 @@ rusagesys(int code, void *arg1, void *arg2, void *arg3, void *arg4)
case _RUSAGESYS_GETVMUSAGE:
return (vm_getusage((uint_t)(uintptr_t)arg1, (time_t)arg2,
(vmusage_t *)arg3, (size_t *)arg4, 0));
+ case _RUSAGESYS_INVALMAP:
+ /*
+ * SPARC sfmmu hat does not support HAT_CURPROC_PGUNLOAD
+ * handling so callers on SPARC should get simple sync
+ * handling with invalidation to all processes.
+ */
+#if defined(__sparc)
+ return (memcntl((caddr_t)arg2, (size_t)arg3, MC_SYNC,
+ (caddr_t)(MS_ASYNC | MS_INVALIDATE), 0, 0));
+#else
+ return (vm_map_inval((pid_t)(uintptr_t)arg1, (caddr_t)arg2,
+ (size_t)arg3));
+#endif
default:
return (set_errno(EINVAL));
}
diff --git a/usr/src/uts/common/syscall/rw.c b/usr/src/uts/common/syscall/rw.c
index a28894b2c9..943b7d244e 100644
--- a/usr/src/uts/common/syscall/rw.c
+++ b/usr/src/uts/common/syscall/rw.c
@@ -22,7 +22,7 @@
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
- * Copyright (c) 2015, Joyent, Inc. All rights reserved.
+ * Copyright 2015, Joyent, Inc. All rights reserved.
*/
/* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
@@ -50,6 +50,7 @@
#include <sys/debug.h>
#include <sys/rctl.h>
#include <sys/nbmlock.h>
+#include <sys/limits.h>
#define COPYOUT_MAX_CACHE (1<<17) /* 128K */
@@ -607,19 +608,12 @@ out:
return (bcount);
}
-/*
- * XXX -- The SVID refers to IOV_MAX, but doesn't define it. Grrrr....
- * XXX -- However, SVVS expects readv() and writev() to fail if
- * XXX -- iovcnt > 16 (yes, it's hard-coded in the SVVS source),
- * XXX -- so I guess that's the "interface".
- */
-#define DEF_IOV_MAX 16
-
ssize_t
readv(int fdes, struct iovec *iovp, int iovcnt)
{
struct uio auio;
- struct iovec aiov[DEF_IOV_MAX];
+ struct iovec buf[IOV_MAX_STACK], *aiov = buf;
+ int aiovlen = 0;
file_t *fp;
register vnode_t *vp;
struct cpu *cp;
@@ -630,9 +624,14 @@ readv(int fdes, struct iovec *iovp, int iovcnt)
u_offset_t fileoff;
int in_crit = 0;
- if (iovcnt <= 0 || iovcnt > DEF_IOV_MAX)
+ if (iovcnt <= 0 || iovcnt > IOV_MAX)
return (set_errno(EINVAL));
+ if (iovcnt > IOV_MAX_STACK) {
+ aiovlen = iovcnt * sizeof (iovec_t);
+ aiov = kmem_alloc(aiovlen, KM_SLEEP);
+ }
+
#ifdef _SYSCALL32_IMPL
/*
* 32-bit callers need to have their iovec expanded,
@@ -640,36 +639,63 @@ readv(int fdes, struct iovec *iovp, int iovcnt)
* of data in a single call.
*/
if (get_udatamodel() == DATAMODEL_ILP32) {
- struct iovec32 aiov32[DEF_IOV_MAX];
+ struct iovec32 buf32[IOV_MAX_STACK], *aiov32 = buf32;
+ int aiov32len;
ssize32_t count32;
- if (copyin(iovp, aiov32, iovcnt * sizeof (struct iovec32)))
+ aiov32len = iovcnt * sizeof (iovec32_t);
+ if (aiovlen != 0)
+ aiov32 = kmem_alloc(aiov32len, KM_SLEEP);
+
+ if (copyin(iovp, aiov32, aiov32len)) {
+ if (aiovlen != 0) {
+ kmem_free(aiov32, aiov32len);
+ kmem_free(aiov, aiovlen);
+ }
return (set_errno(EFAULT));
+ }
count32 = 0;
for (i = 0; i < iovcnt; i++) {
ssize32_t iovlen32 = aiov32[i].iov_len;
count32 += iovlen32;
- if (iovlen32 < 0 || count32 < 0)
+ if (iovlen32 < 0 || count32 < 0) {
+ if (aiovlen != 0) {
+ kmem_free(aiov32, aiov32len);
+ kmem_free(aiov, aiovlen);
+ }
return (set_errno(EINVAL));
+ }
aiov[i].iov_len = iovlen32;
aiov[i].iov_base =
(caddr_t)(uintptr_t)aiov32[i].iov_base;
}
+
+ if (aiovlen != 0)
+ kmem_free(aiov32, aiov32len);
} else
#endif
- if (copyin(iovp, aiov, iovcnt * sizeof (struct iovec)))
+ if (copyin(iovp, aiov, iovcnt * sizeof (iovec_t))) {
+ if (aiovlen != 0)
+ kmem_free(aiov, aiovlen);
return (set_errno(EFAULT));
+ }
count = 0;
for (i = 0; i < iovcnt; i++) {
ssize_t iovlen = aiov[i].iov_len;
count += iovlen;
- if (iovlen < 0 || count < 0)
+ if (iovlen < 0 || count < 0) {
+ if (aiovlen != 0)
+ kmem_free(aiov, aiovlen);
return (set_errno(EINVAL));
+ }
}
- if ((fp = getf(fdes)) == NULL)
+ if ((fp = getf(fdes)) == NULL) {
+ if (aiovlen != 0)
+ kmem_free(aiov, aiovlen);
return (set_errno(EBADF));
+ }
if (((fflag = fp->f_flag) & FREAD) == 0) {
error = EBADF;
goto out;
@@ -768,6 +794,8 @@ out:
if (in_crit)
nbl_end_crit(vp);
releasef(fdes);
+ if (aiovlen != 0)
+ kmem_free(aiov, aiovlen);
if (error)
return (set_errno(error));
return (count);
@@ -777,7 +805,8 @@ ssize_t
writev(int fdes, struct iovec *iovp, int iovcnt)
{
struct uio auio;
- struct iovec aiov[DEF_IOV_MAX];
+ struct iovec buf[IOV_MAX_STACK], *aiov = buf;
+ int aiovlen = 0;
file_t *fp;
register vnode_t *vp;
struct cpu *cp;
@@ -788,9 +817,14 @@ writev(int fdes, struct iovec *iovp, int iovcnt)
u_offset_t fileoff;
int in_crit = 0;
- if (iovcnt <= 0 || iovcnt > DEF_IOV_MAX)
+ if (iovcnt <= 0 || iovcnt > IOV_MAX)
return (set_errno(EINVAL));
+ if (iovcnt > IOV_MAX_STACK) {
+ aiovlen = iovcnt * sizeof (iovec_t);
+ aiov = kmem_alloc(aiovlen, KM_SLEEP);
+ }
+
#ifdef _SYSCALL32_IMPL
/*
* 32-bit callers need to have their iovec expanded,
@@ -798,36 +832,62 @@ writev(int fdes, struct iovec *iovp, int iovcnt)
* of data in a single call.
*/
if (get_udatamodel() == DATAMODEL_ILP32) {
- struct iovec32 aiov32[DEF_IOV_MAX];
+ struct iovec32 buf32[IOV_MAX_STACK], *aiov32 = buf32;
+ int aiov32len;
ssize32_t count32;
- if (copyin(iovp, aiov32, iovcnt * sizeof (struct iovec32)))
+ aiov32len = iovcnt * sizeof (iovec32_t);
+ if (aiovlen != 0)
+ aiov32 = kmem_alloc(aiov32len, KM_SLEEP);
+
+ if (copyin(iovp, aiov32, aiov32len)) {
+ if (aiovlen != 0) {
+ kmem_free(aiov32, aiov32len);
+ kmem_free(aiov, aiovlen);
+ }
return (set_errno(EFAULT));
+ }
count32 = 0;
for (i = 0; i < iovcnt; i++) {
ssize32_t iovlen = aiov32[i].iov_len;
count32 += iovlen;
- if (iovlen < 0 || count32 < 0)
+ if (iovlen < 0 || count32 < 0) {
+ if (aiovlen != 0) {
+ kmem_free(aiov32, aiov32len);
+ kmem_free(aiov, aiovlen);
+ }
return (set_errno(EINVAL));
+ }
aiov[i].iov_len = iovlen;
aiov[i].iov_base =
(caddr_t)(uintptr_t)aiov32[i].iov_base;
}
+ if (aiovlen != 0)
+ kmem_free(aiov32, aiov32len);
} else
#endif
- if (copyin(iovp, aiov, iovcnt * sizeof (struct iovec)))
+ if (copyin(iovp, aiov, iovcnt * sizeof (iovec_t))) {
+ if (aiovlen != 0)
+ kmem_free(aiov, aiovlen);
return (set_errno(EFAULT));
+ }
count = 0;
for (i = 0; i < iovcnt; i++) {
ssize_t iovlen = aiov[i].iov_len;
count += iovlen;
- if (iovlen < 0 || count < 0)
+ if (iovlen < 0 || count < 0) {
+ if (aiovlen != 0)
+ kmem_free(aiov, aiovlen);
return (set_errno(EINVAL));
+ }
}
- if ((fp = getf(fdes)) == NULL)
+ if ((fp = getf(fdes)) == NULL) {
+ if (aiovlen != 0)
+ kmem_free(aiov, aiovlen);
return (set_errno(EBADF));
+ }
if (((fflag = fp->f_flag) & FWRITE) == 0) {
error = EBADF;
goto out;
@@ -917,6 +977,8 @@ out:
if (in_crit)
nbl_end_crit(vp);
releasef(fdes);
+ if (aiovlen != 0)
+ kmem_free(aiov, aiovlen);
if (error)
return (set_errno(error));
return (count);
@@ -927,7 +989,8 @@ preadv(int fdes, struct iovec *iovp, int iovcnt, off_t offset,
off_t extended_offset)
{
struct uio auio;
- struct iovec aiov[DEF_IOV_MAX];
+ struct iovec buf[IOV_MAX_STACK], *aiov = buf;
+ int aiovlen = 0;
file_t *fp;
register vnode_t *vp;
struct cpu *cp;
@@ -952,9 +1015,14 @@ preadv(int fdes, struct iovec *iovp, int iovcnt, off_t offset,
int in_crit = 0;
- if (iovcnt <= 0 || iovcnt > DEF_IOV_MAX)
+ if (iovcnt <= 0 || iovcnt > IOV_MAX)
return (set_errno(EINVAL));
+ if (iovcnt > IOV_MAX_STACK) {
+ aiovlen = iovcnt * sizeof (iovec_t);
+ aiov = kmem_alloc(aiovlen, KM_SLEEP);
+ }
+
#ifdef _SYSCALL32_IMPL
/*
* 32-bit callers need to have their iovec expanded,
@@ -962,39 +1030,68 @@ preadv(int fdes, struct iovec *iovp, int iovcnt, off_t offset,
* of data in a single call.
*/
if (get_udatamodel() == DATAMODEL_ILP32) {
- struct iovec32 aiov32[DEF_IOV_MAX];
+ struct iovec32 buf32[IOV_MAX_STACK], *aiov32 = buf32;
+ int aiov32len;
ssize32_t count32;
- if (copyin(iovp, aiov32, iovcnt * sizeof (struct iovec32)))
+ aiov32len = iovcnt * sizeof (iovec32_t);
+ if (aiovlen != 0)
+ aiov32 = kmem_alloc(aiov32len, KM_SLEEP);
+
+ if (copyin(iovp, aiov32, aiov32len)) {
+ if (aiovlen != 0) {
+ kmem_free(aiov32, aiov32len);
+ kmem_free(aiov, aiovlen);
+ }
return (set_errno(EFAULT));
+ }
count32 = 0;
for (i = 0; i < iovcnt; i++) {
ssize32_t iovlen32 = aiov32[i].iov_len;
count32 += iovlen32;
- if (iovlen32 < 0 || count32 < 0)
+ if (iovlen32 < 0 || count32 < 0) {
+ if (aiovlen != 0) {
+ kmem_free(aiov32, aiov32len);
+ kmem_free(aiov, aiovlen);
+ }
return (set_errno(EINVAL));
+ }
aiov[i].iov_len = iovlen32;
aiov[i].iov_base =
(caddr_t)(uintptr_t)aiov32[i].iov_base;
}
+ if (aiovlen != 0)
+ kmem_free(aiov32, aiov32len);
} else
#endif /* _SYSCALL32_IMPL */
- if (copyin(iovp, aiov, iovcnt * sizeof (struct iovec)))
+ if (copyin(iovp, aiov, iovcnt * sizeof (iovec_t))) {
+ if (aiovlen != 0)
+ kmem_free(aiov, aiovlen);
return (set_errno(EFAULT));
+ }
count = 0;
for (i = 0; i < iovcnt; i++) {
ssize_t iovlen = aiov[i].iov_len;
count += iovlen;
- if (iovlen < 0 || count < 0)
+ if (iovlen < 0 || count < 0) {
+ if (aiovlen != 0)
+ kmem_free(aiov, aiovlen);
return (set_errno(EINVAL));
+ }
}
- if ((bcount = (ssize_t)count) < 0)
+ if ((bcount = (ssize_t)count) < 0) {
+ if (aiovlen != 0)
+ kmem_free(aiov, aiovlen);
return (set_errno(EINVAL));
- if ((fp = getf(fdes)) == NULL)
+ }
+ if ((fp = getf(fdes)) == NULL) {
+ if (aiovlen != 0)
+ kmem_free(aiov, aiovlen);
return (set_errno(EBADF));
+ }
if (((fflag = fp->f_flag) & FREAD) == 0) {
error = EBADF;
goto out;
@@ -1099,6 +1196,8 @@ out:
if (in_crit)
nbl_end_crit(vp);
releasef(fdes);
+ if (aiovlen != 0)
+ kmem_free(aiov, aiovlen);
if (error)
return (set_errno(error));
return (count);
@@ -1109,7 +1208,8 @@ pwritev(int fdes, struct iovec *iovp, int iovcnt, off_t offset,
off_t extended_offset)
{
struct uio auio;
- struct iovec aiov[DEF_IOV_MAX];
+ struct iovec buf[IOV_MAX_STACK], *aiov = buf;
+ int aiovlen = 0;
file_t *fp;
register vnode_t *vp;
struct cpu *cp;
@@ -1134,9 +1234,14 @@ pwritev(int fdes, struct iovec *iovp, int iovcnt, off_t offset,
int in_crit = 0;
- if (iovcnt <= 0 || iovcnt > DEF_IOV_MAX)
+ if (iovcnt <= 0 || iovcnt > IOV_MAX)
return (set_errno(EINVAL));
+ if (iovcnt > IOV_MAX_STACK) {
+ aiovlen = iovcnt * sizeof (iovec_t);
+ aiov = kmem_alloc(aiovlen, KM_SLEEP);
+ }
+
#ifdef _SYSCALL32_IMPL
/*
* 32-bit callers need to have their iovec expanded,
@@ -1144,39 +1249,68 @@ pwritev(int fdes, struct iovec *iovp, int iovcnt, off_t offset,
* of data in a single call.
*/
if (get_udatamodel() == DATAMODEL_ILP32) {
- struct iovec32 aiov32[DEF_IOV_MAX];
+ struct iovec32 buf32[IOV_MAX_STACK], *aiov32 = buf32;
+ int aiov32len;
ssize32_t count32;
- if (copyin(iovp, aiov32, iovcnt * sizeof (struct iovec32)))
+ aiov32len = iovcnt * sizeof (iovec32_t);
+ if (aiovlen != 0)
+ aiov32 = kmem_alloc(aiov32len, KM_SLEEP);
+
+ if (copyin(iovp, aiov32, aiov32len)) {
+ if (aiovlen != 0) {
+ kmem_free(aiov32, aiov32len);
+ kmem_free(aiov, aiovlen);
+ }
return (set_errno(EFAULT));
+ }
count32 = 0;
for (i = 0; i < iovcnt; i++) {
ssize32_t iovlen32 = aiov32[i].iov_len;
count32 += iovlen32;
- if (iovlen32 < 0 || count32 < 0)
+ if (iovlen32 < 0 || count32 < 0) {
+ if (aiovlen != 0) {
+ kmem_free(aiov32, aiov32len);
+ kmem_free(aiov, aiovlen);
+ }
return (set_errno(EINVAL));
+ }
aiov[i].iov_len = iovlen32;
aiov[i].iov_base =
(caddr_t)(uintptr_t)aiov32[i].iov_base;
}
+ if (aiovlen != 0)
+ kmem_free(aiov32, aiov32len);
} else
#endif /* _SYSCALL32_IMPL */
- if (copyin(iovp, aiov, iovcnt * sizeof (struct iovec)))
+ if (copyin(iovp, aiov, iovcnt * sizeof (iovec_t))) {
+ if (aiovlen != 0)
+ kmem_free(aiov, aiovlen);
return (set_errno(EFAULT));
+ }
count = 0;
for (i = 0; i < iovcnt; i++) {
ssize_t iovlen = aiov[i].iov_len;
count += iovlen;
- if (iovlen < 0 || count < 0)
+ if (iovlen < 0 || count < 0) {
+ if (aiovlen != 0)
+ kmem_free(aiov, aiovlen);
return (set_errno(EINVAL));
+ }
}
- if ((bcount = (ssize_t)count) < 0)
+ if ((bcount = (ssize_t)count) < 0) {
+ if (aiovlen != 0)
+ kmem_free(aiov, aiovlen);
return (set_errno(EINVAL));
- if ((fp = getf(fdes)) == NULL)
+ }
+ if ((fp = getf(fdes)) == NULL) {
+ if (aiovlen != 0)
+ kmem_free(aiov, aiovlen);
return (set_errno(EBADF));
+ }
if (((fflag = fp->f_flag) & FWRITE) == 0) {
error = EBADF;
goto out;
@@ -1308,6 +1442,8 @@ out:
if (in_crit)
nbl_end_crit(vp);
releasef(fdes);
+ if (aiovlen != 0)
+ kmem_free(aiov, aiovlen);
if (error)
return (set_errno(error));
return (count);
diff --git a/usr/src/uts/common/syscall/sendfile.c b/usr/src/uts/common/syscall/sendfile.c
index cb8246f584..ccceca7c6d 100644
--- a/usr/src/uts/common/syscall/sendfile.c
+++ b/usr/src/uts/common/syscall/sendfile.c
@@ -82,7 +82,7 @@ extern sotpi_info_t *sotpi_sototpi(struct sonode *);
* 64 bit kernel or 32 bit kernel. For 32 bit apps, we can't transfer
* more than 2GB of data.
*/
-int
+static int
sendvec_chunk64(file_t *fp, u_offset_t *fileoff, struct ksendfilevec64 *sfv,
int copy_cnt, ssize32_t *count)
{
@@ -343,7 +343,7 @@ sendvec_chunk64(file_t *fp, u_offset_t *fileoff, struct ksendfilevec64 *sfv,
return (0);
}
-ssize32_t
+static ssize32_t
sendvec64(file_t *fp, const struct ksendfilevec64 *vec, int sfvcnt,
size32_t *xferred, int fildes)
{
@@ -390,7 +390,7 @@ sendvec64(file_t *fp, const struct ksendfilevec64 *vec, int sfvcnt,
}
#endif
-int
+static int
sendvec_small_chunk(file_t *fp, u_offset_t *fileoff, struct sendfilevec *sfv,
int copy_cnt, ssize_t total_size, int maxblk, ssize_t *count)
{
@@ -680,7 +680,7 @@ sendvec_small_chunk(file_t *fp, u_offset_t *fileoff, struct sendfilevec *sfv,
}
-int
+static int
sendvec_chunk(file_t *fp, u_offset_t *fileoff, struct sendfilevec *sfv,
int copy_cnt, ssize_t *count)
{
@@ -1160,6 +1160,17 @@ sendfilev(int opcode, int fildes, const struct sendfilevec *vec, int sfvcnt,
} else {
maxblk = (int)vp->v_stream->sd_maxblk;
}
+
+ /*
+ * We need to make sure that the socket that we're sending on
+ * supports sendfile behavior. sockfs doesn't know that the APIs
+ * we want to use are coming from sendfile, so we can't rely on
+ * it to check for us.
+ */
+ if ((so->so_mode & SM_SENDFILESUPP) == 0) {
+ error = EOPNOTSUPP;
+ goto err;
+ }
break;
case VREG:
break;
diff --git a/usr/src/uts/common/syscall/stat.c b/usr/src/uts/common/syscall/stat.c
index 4085104cc7..93f26121bc 100644
--- a/usr/src/uts/common/syscall/stat.c
+++ b/usr/src/uts/common/syscall/stat.c
@@ -61,7 +61,7 @@
* to VOP_GETATTR
*/
-static int
+int
cstatat_getvp(int fd, char *name, int follow, vnode_t **vp, cred_t **cred)
{
vnode_t *startvp;
diff --git a/usr/src/uts/common/syscall/sysconfig.c b/usr/src/uts/common/syscall/sysconfig.c
index 03f2fabe13..26ea859224 100644
--- a/usr/src/uts/common/syscall/sysconfig.c
+++ b/usr/src/uts/common/syscall/sysconfig.c
@@ -22,6 +22,7 @@
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2012 Joyent, Inc. All rights reserved.
*/
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
@@ -170,8 +171,8 @@ sysconfig(int which)
* even though rcapd can be used on the global zone too.
*/
if (!INGLOBALZONE(curproc) &&
- curproc->p_zone->zone_phys_mcap != 0)
- return (MIN(btop(curproc->p_zone->zone_phys_mcap),
+ curproc->p_zone->zone_phys_mem_ctl != UINT64_MAX)
+ return (MIN(btop(curproc->p_zone->zone_phys_mem_ctl),
physinstalled));
return (physinstalled);
@@ -179,26 +180,23 @@ sysconfig(int which)
case _CONFIG_AVPHYS_PAGES:
/*
* If the non-global zone has a phys. memory cap, use
- * the phys. memory cap - zone's current rss. We always
+ * the phys. memory cap - zone's rss. We always
* report the system-wide value for the global zone, even
- * though rcapd can be used on the global zone too.
+ * though memory capping can be used on the global zone too.
+ * We use the cached value for the RSS since vm_getusage()
+ * is so expensive and we don't need this value to be exact.
*/
if (!INGLOBALZONE(curproc) &&
- curproc->p_zone->zone_phys_mcap != 0) {
+ curproc->p_zone->zone_phys_mem_ctl != UINT64_MAX) {
pgcnt_t cap, rss, free;
- vmusage_t in_use;
- size_t cnt = 1;
- cap = btop(curproc->p_zone->zone_phys_mcap);
+ cap = btop(curproc->p_zone->zone_phys_mem_ctl);
if (cap > physinstalled)
return (freemem);
- if (vm_getusage(VMUSAGE_ZONE, 1, &in_use, &cnt,
- FKIOCTL) != 0)
- in_use.vmu_rss_all = 0;
- rss = btop(in_use.vmu_rss_all);
+ rss = btop(curproc->p_zone->zone_phys_mem);
/*
- * Because rcapd implements a soft cap, it is possible
+ * Because this is a soft cap, it is possible
* for rss to be temporarily over the cap.
*/
if (cap > rss)
diff --git a/usr/src/uts/common/syscall/uadmin.c b/usr/src/uts/common/syscall/uadmin.c
index 2dda4001bf..68aa1a95f5 100644
--- a/usr/src/uts/common/syscall/uadmin.c
+++ b/usr/src/uts/common/syscall/uadmin.c
@@ -78,7 +78,7 @@ volatile int fastreboot_dryrun = 0;
* system with many zones.
*/
void
-killall(zoneid_t zoneid)
+killall(zoneid_t zoneid, boolean_t force)
{
proc_t *p;
@@ -108,7 +108,7 @@ killall(zoneid_t zoneid)
p->p_stat != SIDL &&
p->p_stat != SZOMB) {
mutex_enter(&p->p_lock);
- if (sigismember(&p->p_sig, SIGKILL)) {
+ if (!force && sigismember(&p->p_sig, SIGKILL)) {
mutex_exit(&p->p_lock);
p = p->p_next;
} else {
@@ -245,7 +245,7 @@ kadmin(int cmd, int fcn, void *mdep, cred_t *credp)
*/
zone_shutdown_global();
- killall(ALL_ZONES);
+ killall(ALL_ZONES, B_FALSE);
/*
* If we are calling kadmin() from a kernel context then we
* do not release these resources.