diff options
Diffstat (limited to 'usr/src/uts/common/syscall')
-rw-r--r-- | usr/src/uts/common/syscall/brandsys.c | 8 | ||||
-rw-r--r-- | usr/src/uts/common/syscall/fcntl.c | 3 | ||||
-rw-r--r-- | usr/src/uts/common/syscall/memcntl.c | 8 | ||||
-rw-r--r-- | usr/src/uts/common/syscall/open.c | 8 | ||||
-rw-r--r-- | usr/src/uts/common/syscall/poll.c | 328 | ||||
-rw-r--r-- | usr/src/uts/common/syscall/rusagesys.c | 14 | ||||
-rw-r--r-- | usr/src/uts/common/syscall/rw.c | 222 | ||||
-rw-r--r-- | usr/src/uts/common/syscall/sendfile.c | 19 | ||||
-rw-r--r-- | usr/src/uts/common/syscall/stat.c | 2 | ||||
-rw-r--r-- | usr/src/uts/common/syscall/sysconfig.c | 24 | ||||
-rw-r--r-- | usr/src/uts/common/syscall/uadmin.c | 6 |
11 files changed, 415 insertions, 227 deletions
diff --git a/usr/src/uts/common/syscall/brandsys.c b/usr/src/uts/common/syscall/brandsys.c index 9b4bd38baa..8ee5511fd0 100644 --- a/usr/src/uts/common/syscall/brandsys.c +++ b/usr/src/uts/common/syscall/brandsys.c @@ -23,7 +23,9 @@ * Use is subject to license terms. */ -#pragma ident "%Z%%M% %I% %E% SMI" +/* + * Copyright (c) 2014, Joyent, Inc. All rights reserved. + */ #include <sys/brand.h> #include <sys/systm.h> @@ -35,7 +37,7 @@ */ int64_t brandsys(int cmd, uintptr_t arg1, uintptr_t arg2, uintptr_t arg3, - uintptr_t arg4, uintptr_t arg5, uintptr_t arg6) + uintptr_t arg4, uintptr_t arg5) { struct proc *p = curthread->t_procp; int64_t rval = 0; @@ -49,7 +51,7 @@ brandsys(int cmd, uintptr_t arg1, uintptr_t arg2, uintptr_t arg3, return (set_errno(ENOSYS)); if ((err = ZBROP(p->p_zone)->b_brandsys(cmd, &rval, arg1, arg2, arg3, - arg4, arg5, arg6)) != 0) + arg4, arg5)) != 0) return (set_errno(err)); return (rval); diff --git a/usr/src/uts/common/syscall/fcntl.c b/usr/src/uts/common/syscall/fcntl.c index 371bc83c29..d631fe62f6 100644 --- a/usr/src/uts/common/syscall/fcntl.c +++ b/usr/src/uts/common/syscall/fcntl.c @@ -54,7 +54,8 @@ #include <sys/cmn_err.h> -static int flock_check(vnode_t *, flock64_t *, offset_t, offset_t); +/* This is global so that it can be used by brand emulation. */ +int flock_check(vnode_t *, flock64_t *, offset_t, offset_t); static int flock_get_start(vnode_t *, flock64_t *, offset_t, u_offset_t *); static void fd_too_big(proc_t *); diff --git a/usr/src/uts/common/syscall/memcntl.c b/usr/src/uts/common/syscall/memcntl.c index 1ee4b6a395..721f884a7e 100644 --- a/usr/src/uts/common/syscall/memcntl.c +++ b/usr/src/uts/common/syscall/memcntl.c @@ -115,13 +115,17 @@ memcntl(caddr_t addr, size_t len, int cmd, caddr_t arg, int attr, int mask) * MS_SYNC used to be defined to be zero but is now non-zero. * For binary compatibility we still accept zero * (the absence of MS_ASYNC) to mean the same thing. + * Binary compatibility is not an issue for MS_INVALCURPROC. */ iarg = (uintptr_t)arg; if ((iarg & ~MS_INVALIDATE) == 0) iarg |= MS_SYNC; - if (((iarg & ~(MS_SYNC|MS_ASYNC|MS_INVALIDATE)) != 0) || - ((iarg & (MS_SYNC|MS_ASYNC)) == (MS_SYNC|MS_ASYNC))) { + if (((iarg & + ~(MS_SYNC|MS_ASYNC|MS_INVALIDATE|MS_INVALCURPROC)) != 0) || + ((iarg & (MS_SYNC|MS_ASYNC)) == (MS_SYNC|MS_ASYNC)) || + ((iarg & (MS_INVALIDATE|MS_INVALCURPROC)) == + (MS_INVALIDATE|MS_INVALCURPROC))) { error = set_errno(EINVAL); } else { error = as_ctl(as, addr, len, cmd, attr, iarg, NULL, 0); diff --git a/usr/src/uts/common/syscall/open.c b/usr/src/uts/common/syscall/open.c index edb04c824b..874e31869c 100644 --- a/usr/src/uts/common/syscall/open.c +++ b/usr/src/uts/common/syscall/open.c @@ -74,12 +74,12 @@ copen(int startfd, char *fname, int filemode, int createmode) if (filemode & (FSEARCH|FEXEC)) { /* - * Must be one or the other and neither FREAD nor FWRITE + * Must be one or the other. * Must not be any of FAPPEND FCREAT FTRUNC FXATTR FXATTRDIROPEN - * XXX: Should these just be silently ignored? + * XXX: Should these just be silently ignored like we + * silently ignore FREAD|FWRITE? */ - if ((filemode & (FREAD|FWRITE)) || - (filemode & (FSEARCH|FEXEC)) == (FSEARCH|FEXEC) || + if ((filemode & (FSEARCH|FEXEC)) == (FSEARCH|FEXEC) || (filemode & (FAPPEND|FCREAT|FTRUNC|FXATTR|FXATTRDIROPEN))) return (set_errno(EINVAL)); } diff --git a/usr/src/uts/common/syscall/poll.c b/usr/src/uts/common/syscall/poll.c index cc125f127a..3d0a5cc04b 100644 --- a/usr/src/uts/common/syscall/poll.c +++ b/usr/src/uts/common/syscall/poll.c @@ -29,7 +29,7 @@ /* * Copyright (c) 2012 by Delphix. All rights reserved. - * Copyright 2015, Joyent, Inc. + * Copyright 2016, Joyent, Inc. */ /* @@ -317,20 +317,58 @@ polllock(pollhead_t *php, kmutex_t *lp) return (0); } -static int -poll_common(pollfd_t *fds, nfds_t nfds, timespec_t *tsp, k_sigset_t *ksetp) +int +poll_copyin(pollstate_t *ps, pollfd_t *fds, nfds_t nfds) +{ + pollfd_t *pollfdp; + nfds_t old_nfds; + + /* + * NOTE: for performance, buffers are saved across poll() calls. + * The theory is that if a process polls heavily, it tends to poll + * on the same set of descriptors. Therefore, we only reallocate + * buffers when nfds changes. There is no hysteresis control, + * because there is no data to suggest that this is necessary; + * the penalty of reallocating is not *that* great in any event. + */ + old_nfds = ps->ps_nfds; + if (nfds != old_nfds) { + kmem_free(ps->ps_pollfd, old_nfds * sizeof (pollfd_t)); + pollfdp = kmem_alloc(nfds * sizeof (pollfd_t), KM_SLEEP); + ps->ps_pollfd = pollfdp; + ps->ps_nfds = nfds; + } + + pollfdp = ps->ps_pollfd; + if (copyin(fds, pollfdp, nfds * sizeof (pollfd_t))) { + return (EFAULT); + } + + if (fds == NULL) { + /* + * If the process has page 0 mapped, then the copyin() above + * will succeed even if fds is NULL. However, our cached + * poll lists are keyed by the address of the passed-in fds + * structure, and we use the value NULL to indicate an unused + * poll cache list entry. As such, we elect not to support + * NULL as a valid (user) memory address and fail the poll() + * call. + */ + return (EFAULT); + } + return (0); +} + +int +poll_common(pollstate_t *ps, pollfd_t *fds, nfds_t nfds, timespec_t *tsp, + int *fdcnt) { kthread_t *t = curthread; - klwp_t *lwp = ttolwp(t); proc_t *p = ttoproc(t); - int fdcnt = 0; - int i; hrtime_t deadline; /* hrtime value when we want to return */ pollfd_t *pollfdp; - pollstate_t *ps; pollcache_t *pcp; int error = 0; - nfds_t old_nfds; int cacheindex = 0; /* which cache set is used */ /* @@ -348,32 +386,11 @@ poll_common(pollfd_t *fds, nfds_t nfds, timespec_t *tsp, k_sigset_t *ksetp) } /* - * Reset our signal mask, if requested. - */ - if (ksetp != NULL) { - mutex_enter(&p->p_lock); - schedctl_finish_sigblock(t); - lwp->lwp_sigoldmask = t->t_hold; - t->t_hold = *ksetp; - t->t_flag |= T_TOMASK; - /* - * Call cv_reltimedwait_sig() just to check for signals. - * We will return immediately with either 0 or -1. - */ - if (!cv_reltimedwait_sig(&t->t_delay_cv, &p->p_lock, 0, - TR_CLOCK_TICK)) { - mutex_exit(&p->p_lock); - error = EINTR; - goto pollout; - } - mutex_exit(&p->p_lock); - } - - /* - * Check to see if this guy just wants to use poll() as a timeout. + * Check to see if the caller just wants to use poll() as a timeout. * If yes then bypass all the other stuff and make him sleep. */ if (nfds == 0) { + *fdcnt = 0; /* * Sleep until we have passed the requested future * time or until interrupted by a signal. @@ -385,66 +402,14 @@ poll_common(pollfd_t *fds, nfds_t nfds, timespec_t *tsp, k_sigset_t *ksetp) &t->t_delay_lock, deadline)) > 0) continue; mutex_exit(&t->t_delay_lock); - error = (error == 0) ? EINTR : 0; + return ((error == 0) ? EINTR : 0); } - goto pollout; - } - - if (nfds > p->p_fno_ctl) { - mutex_enter(&p->p_lock); - (void) rctl_action(rctlproc_legacy[RLIMIT_NOFILE], - p->p_rctls, p, RCA_SAFE); - mutex_exit(&p->p_lock); - error = EINVAL; - goto pollout; - } - - /* - * Need to allocate memory for pollstate before anything because - * the mutex and cv are created in this space - */ - ps = pollstate_create(); - - if (ps->ps_pcache == NULL) - ps->ps_pcache = pcache_alloc(); - pcp = ps->ps_pcache; - - /* - * NOTE: for performance, buffers are saved across poll() calls. - * The theory is that if a process polls heavily, it tends to poll - * on the same set of descriptors. Therefore, we only reallocate - * buffers when nfds changes. There is no hysteresis control, - * because there is no data to suggest that this is necessary; - * the penalty of reallocating is not *that* great in any event. - */ - old_nfds = ps->ps_nfds; - if (nfds != old_nfds) { - - kmem_free(ps->ps_pollfd, old_nfds * sizeof (pollfd_t)); - pollfdp = kmem_alloc(nfds * sizeof (pollfd_t), KM_SLEEP); - ps->ps_pollfd = pollfdp; - ps->ps_nfds = nfds; + return (0); } + VERIFY(ps != NULL); pollfdp = ps->ps_pollfd; - if (copyin(fds, pollfdp, nfds * sizeof (pollfd_t))) { - error = EFAULT; - goto pollout; - } - - if (fds == NULL) { - /* - * If the process has page 0 mapped, then the copyin() above - * will succeed even if fds is NULL. However, our cached - * poll lists are keyed by the address of the passed-in fds - * structure, and we use the value NULL to indicate an unused - * poll cache list entry. As such, we elect not to support - * NULL as a valid (user) memory address and fail the poll() - * call. - */ - error = EINVAL; - goto pollout; - } + VERIFY(pollfdp != NULL); /* * If this thread polls for the first time, allocate ALL poll @@ -460,10 +425,10 @@ poll_common(pollfd_t *fds, nfds_t nfds, timespec_t *tsp, k_sigset_t *ksetp) /* * poll and cache this poll fd list in ps_pcacheset[0]. */ - error = pcacheset_cache_list(ps, fds, &fdcnt, cacheindex); - if (fdcnt || error) { + error = pcacheset_cache_list(ps, fds, fdcnt, cacheindex); + if (error || *fdcnt) { mutex_exit(&ps->ps_lock); - goto pollout; + return (error); } } else { pollcacheset_t *pcset = ps->ps_pcacheset; @@ -488,11 +453,11 @@ poll_common(pollfd_t *fds, nfds_t nfds, timespec_t *tsp, k_sigset_t *ksetp) * the callee will guarantee the consistency * of cached poll list and cache content. */ - error = pcacheset_resolve(ps, nfds, &fdcnt, + error = pcacheset_resolve(ps, nfds, fdcnt, cacheindex); if (error) { mutex_exit(&ps->ps_lock); - goto pollout; + return (error); } break; } @@ -509,11 +474,11 @@ poll_common(pollfd_t *fds, nfds_t nfds, timespec_t *tsp, k_sigset_t *ksetp) * found an unused entry. Use it to cache * this poll list. */ - error = pcacheset_cache_list(ps, fds, &fdcnt, + error = pcacheset_cache_list(ps, fds, fdcnt, cacheindex); - if (fdcnt || error) { + if (error || *fdcnt) { mutex_exit(&ps->ps_lock); - goto pollout; + return (error); } break; } @@ -527,10 +492,10 @@ poll_common(pollfd_t *fds, nfds_t nfds, timespec_t *tsp, k_sigset_t *ksetp) cacheindex = pcacheset_replace(ps); ASSERT(cacheindex < ps->ps_nsets); pcset[cacheindex].pcs_usradr = (uintptr_t)fds; - error = pcacheset_resolve(ps, nfds, &fdcnt, cacheindex); + error = pcacheset_resolve(ps, nfds, fdcnt, cacheindex); if (error) { mutex_exit(&ps->ps_lock); - goto pollout; + return (error); } } } @@ -548,8 +513,8 @@ poll_common(pollfd_t *fds, nfds_t nfds, timespec_t *tsp, k_sigset_t *ksetp) mutex_enter(&pcp->pc_lock); for (;;) { pcp->pc_flag = 0; - error = pcache_poll(pollfdp, ps, nfds, &fdcnt, cacheindex); - if (fdcnt || error) { + error = pcache_poll(pollfdp, ps, nfds, fdcnt, cacheindex); + if (error || *fdcnt) { mutex_exit(&pcp->pc_lock); mutex_exit(&ps->ps_lock); break; @@ -595,13 +560,116 @@ poll_common(pollfd_t *fds, nfds_t nfds, timespec_t *tsp, k_sigset_t *ksetp) mutex_enter(&pcp->pc_lock); } + return (error); +} + +/* + * This is the system call trap that poll(), + * select() and pselect() are built upon. + * It is a private interface between libc and the kernel. + */ +int +pollsys(pollfd_t *fds, nfds_t nfds, timespec_t *timeoutp, sigset_t *setp) +{ + kthread_t *t = curthread; + klwp_t *lwp = ttolwp(t); + proc_t *p = ttoproc(t); + timespec_t ts; + timespec_t *tsp; + k_sigset_t kset; + pollstate_t *ps = NULL; + pollfd_t *pollfdp = NULL; + int error = 0, fdcnt = 0; + + /* + * Copy in timeout + */ + if (timeoutp == NULL) { + tsp = NULL; + } else { + if (get_udatamodel() == DATAMODEL_NATIVE) { + if (copyin(timeoutp, &ts, sizeof (ts))) + return (set_errno(EFAULT)); + } else { + timespec32_t ts32; + + if (copyin(timeoutp, &ts32, sizeof (ts32))) + return (set_errno(EFAULT)); + TIMESPEC32_TO_TIMESPEC(&ts, &ts32) + } + + if (itimerspecfix(&ts)) + return (set_errno(EINVAL)); + tsp = &ts; + } + + /* + * Copy in and reset signal mask, if requested. + */ + if (setp != NULL) { + sigset_t set; + + if (copyin(setp, &set, sizeof (set))) + return (set_errno(EFAULT)); + sigutok(&set, &kset); + + mutex_enter(&p->p_lock); + schedctl_finish_sigblock(t); + lwp->lwp_sigoldmask = t->t_hold; + t->t_hold = kset; + t->t_flag |= T_TOMASK; + /* + * Call cv_reltimedwait_sig() just to check for signals. + * We will return immediately with either 0 or -1. + */ + if (!cv_reltimedwait_sig(&t->t_delay_cv, &p->p_lock, 0, + TR_CLOCK_TICK)) { + mutex_exit(&p->p_lock); + error = EINTR; + goto pollout; + } + mutex_exit(&p->p_lock); + } + + /* + * Initialize pollstate and copy in pollfd data if present. + * If nfds == 0, we will skip all of the copying and check steps and + * proceed directly into poll_common to process the supplied timeout. + */ + if (nfds != 0) { + if (nfds > p->p_fno_ctl) { + mutex_enter(&p->p_lock); + (void) rctl_action(rctlproc_legacy[RLIMIT_NOFILE], + p->p_rctls, p, RCA_SAFE); + mutex_exit(&p->p_lock); + error = EINVAL; + goto pollout; + } + + /* + * Need to allocate memory for pollstate before anything + * because the mutex and cv are created in this space + */ + ps = pollstate_create(); + if (ps->ps_pcache == NULL) + ps->ps_pcache = pcache_alloc(); + + if ((error = poll_copyin(ps, fds, nfds)) != 0) + goto pollout; + pollfdp = ps->ps_pollfd; + } + + /* + * Perform the actual poll. + */ + error = poll_common(ps, fds, nfds, tsp, &fdcnt); + pollout: /* - * If we changed the signal mask but we received - * no signal then restore the signal mask. - * Otherwise psig() will deal with the signal mask. + * If we changed the signal mask but we received no signal then restore + * the signal mask. Otherwise psig() will deal with the signal mask. */ - if (ksetp != NULL) { + if (setp != NULL) { mutex_enter(&p->p_lock); if (lwp->lwp_cursig == 0) { t->t_hold = lwp->lwp_sigoldmask; @@ -612,12 +680,10 @@ pollout: if (error) return (set_errno(error)); - /* * Copy out the events and return the fdcnt to the user. */ - if (nfds != 0 && - copyout(pollfdp, fds, nfds * sizeof (pollfd_t))) + if (nfds != 0 && copyout(pollfdp, fds, nfds * sizeof (pollfd_t))) return (set_errno(EFAULT)); #ifdef DEBUG @@ -625,7 +691,7 @@ pollout: * Another sanity check: */ if (fdcnt) { - int reventcnt = 0; + int i, reventcnt = 0; for (i = 0; i < nfds; i++) { if (pollfdp[i].fd < 0) { @@ -638,6 +704,8 @@ pollout: } ASSERT(fdcnt == reventcnt); } else { + int i; + for (i = 0; i < nfds; i++) { ASSERT(pollfdp[i].revents == 0); } @@ -648,52 +716,6 @@ pollout: } /* - * This is the system call trap that poll(), - * select() and pselect() are built upon. - * It is a private interface between libc and the kernel. - */ -int -pollsys(pollfd_t *fds, nfds_t nfds, timespec_t *timeoutp, sigset_t *setp) -{ - timespec_t ts; - timespec_t *tsp; - sigset_t set; - k_sigset_t kset; - k_sigset_t *ksetp; - model_t datamodel = get_udatamodel(); - - if (timeoutp == NULL) - tsp = NULL; - else { - if (datamodel == DATAMODEL_NATIVE) { - if (copyin(timeoutp, &ts, sizeof (ts))) - return (set_errno(EFAULT)); - } else { - timespec32_t ts32; - - if (copyin(timeoutp, &ts32, sizeof (ts32))) - return (set_errno(EFAULT)); - TIMESPEC32_TO_TIMESPEC(&ts, &ts32) - } - - if (itimerspecfix(&ts)) - return (set_errno(EINVAL)); - tsp = &ts; - } - - if (setp == NULL) - ksetp = NULL; - else { - if (copyin(setp, &set, sizeof (set))) - return (set_errno(EFAULT)); - sigutok(&set, &kset); - ksetp = &kset; - } - - return (poll_common(fds, nfds, tsp, ksetp)); -} - -/* * Clean up any state left around by poll(2). Called when a thread exits. */ void diff --git a/usr/src/uts/common/syscall/rusagesys.c b/usr/src/uts/common/syscall/rusagesys.c index 3e0e63f4c0..417c629168 100644 --- a/usr/src/uts/common/syscall/rusagesys.c +++ b/usr/src/uts/common/syscall/rusagesys.c @@ -21,6 +21,7 @@ /* * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * Copyright 2014 Joyent, Inc. All rights reserved. */ /* @@ -257,6 +258,19 @@ rusagesys(int code, void *arg1, void *arg2, void *arg3, void *arg4) case _RUSAGESYS_GETVMUSAGE: return (vm_getusage((uint_t)(uintptr_t)arg1, (time_t)arg2, (vmusage_t *)arg3, (size_t *)arg4, 0)); + case _RUSAGESYS_INVALMAP: + /* + * SPARC sfmmu hat does not support HAT_CURPROC_PGUNLOAD + * handling so callers on SPARC should get simple sync + * handling with invalidation to all processes. + */ +#if defined(__sparc) + return (memcntl((caddr_t)arg2, (size_t)arg3, MC_SYNC, + (caddr_t)(MS_ASYNC | MS_INVALIDATE), 0, 0)); +#else + return (vm_map_inval((pid_t)(uintptr_t)arg1, (caddr_t)arg2, + (size_t)arg3)); +#endif default: return (set_errno(EINVAL)); } diff --git a/usr/src/uts/common/syscall/rw.c b/usr/src/uts/common/syscall/rw.c index a28894b2c9..943b7d244e 100644 --- a/usr/src/uts/common/syscall/rw.c +++ b/usr/src/uts/common/syscall/rw.c @@ -22,7 +22,7 @@ /* * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. - * Copyright (c) 2015, Joyent, Inc. All rights reserved. + * Copyright 2015, Joyent, Inc. All rights reserved. */ /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ @@ -50,6 +50,7 @@ #include <sys/debug.h> #include <sys/rctl.h> #include <sys/nbmlock.h> +#include <sys/limits.h> #define COPYOUT_MAX_CACHE (1<<17) /* 128K */ @@ -607,19 +608,12 @@ out: return (bcount); } -/* - * XXX -- The SVID refers to IOV_MAX, but doesn't define it. Grrrr.... - * XXX -- However, SVVS expects readv() and writev() to fail if - * XXX -- iovcnt > 16 (yes, it's hard-coded in the SVVS source), - * XXX -- so I guess that's the "interface". - */ -#define DEF_IOV_MAX 16 - ssize_t readv(int fdes, struct iovec *iovp, int iovcnt) { struct uio auio; - struct iovec aiov[DEF_IOV_MAX]; + struct iovec buf[IOV_MAX_STACK], *aiov = buf; + int aiovlen = 0; file_t *fp; register vnode_t *vp; struct cpu *cp; @@ -630,9 +624,14 @@ readv(int fdes, struct iovec *iovp, int iovcnt) u_offset_t fileoff; int in_crit = 0; - if (iovcnt <= 0 || iovcnt > DEF_IOV_MAX) + if (iovcnt <= 0 || iovcnt > IOV_MAX) return (set_errno(EINVAL)); + if (iovcnt > IOV_MAX_STACK) { + aiovlen = iovcnt * sizeof (iovec_t); + aiov = kmem_alloc(aiovlen, KM_SLEEP); + } + #ifdef _SYSCALL32_IMPL /* * 32-bit callers need to have their iovec expanded, @@ -640,36 +639,63 @@ readv(int fdes, struct iovec *iovp, int iovcnt) * of data in a single call. */ if (get_udatamodel() == DATAMODEL_ILP32) { - struct iovec32 aiov32[DEF_IOV_MAX]; + struct iovec32 buf32[IOV_MAX_STACK], *aiov32 = buf32; + int aiov32len; ssize32_t count32; - if (copyin(iovp, aiov32, iovcnt * sizeof (struct iovec32))) + aiov32len = iovcnt * sizeof (iovec32_t); + if (aiovlen != 0) + aiov32 = kmem_alloc(aiov32len, KM_SLEEP); + + if (copyin(iovp, aiov32, aiov32len)) { + if (aiovlen != 0) { + kmem_free(aiov32, aiov32len); + kmem_free(aiov, aiovlen); + } return (set_errno(EFAULT)); + } count32 = 0; for (i = 0; i < iovcnt; i++) { ssize32_t iovlen32 = aiov32[i].iov_len; count32 += iovlen32; - if (iovlen32 < 0 || count32 < 0) + if (iovlen32 < 0 || count32 < 0) { + if (aiovlen != 0) { + kmem_free(aiov32, aiov32len); + kmem_free(aiov, aiovlen); + } return (set_errno(EINVAL)); + } aiov[i].iov_len = iovlen32; aiov[i].iov_base = (caddr_t)(uintptr_t)aiov32[i].iov_base; } + + if (aiovlen != 0) + kmem_free(aiov32, aiov32len); } else #endif - if (copyin(iovp, aiov, iovcnt * sizeof (struct iovec))) + if (copyin(iovp, aiov, iovcnt * sizeof (iovec_t))) { + if (aiovlen != 0) + kmem_free(aiov, aiovlen); return (set_errno(EFAULT)); + } count = 0; for (i = 0; i < iovcnt; i++) { ssize_t iovlen = aiov[i].iov_len; count += iovlen; - if (iovlen < 0 || count < 0) + if (iovlen < 0 || count < 0) { + if (aiovlen != 0) + kmem_free(aiov, aiovlen); return (set_errno(EINVAL)); + } } - if ((fp = getf(fdes)) == NULL) + if ((fp = getf(fdes)) == NULL) { + if (aiovlen != 0) + kmem_free(aiov, aiovlen); return (set_errno(EBADF)); + } if (((fflag = fp->f_flag) & FREAD) == 0) { error = EBADF; goto out; @@ -768,6 +794,8 @@ out: if (in_crit) nbl_end_crit(vp); releasef(fdes); + if (aiovlen != 0) + kmem_free(aiov, aiovlen); if (error) return (set_errno(error)); return (count); @@ -777,7 +805,8 @@ ssize_t writev(int fdes, struct iovec *iovp, int iovcnt) { struct uio auio; - struct iovec aiov[DEF_IOV_MAX]; + struct iovec buf[IOV_MAX_STACK], *aiov = buf; + int aiovlen = 0; file_t *fp; register vnode_t *vp; struct cpu *cp; @@ -788,9 +817,14 @@ writev(int fdes, struct iovec *iovp, int iovcnt) u_offset_t fileoff; int in_crit = 0; - if (iovcnt <= 0 || iovcnt > DEF_IOV_MAX) + if (iovcnt <= 0 || iovcnt > IOV_MAX) return (set_errno(EINVAL)); + if (iovcnt > IOV_MAX_STACK) { + aiovlen = iovcnt * sizeof (iovec_t); + aiov = kmem_alloc(aiovlen, KM_SLEEP); + } + #ifdef _SYSCALL32_IMPL /* * 32-bit callers need to have their iovec expanded, @@ -798,36 +832,62 @@ writev(int fdes, struct iovec *iovp, int iovcnt) * of data in a single call. */ if (get_udatamodel() == DATAMODEL_ILP32) { - struct iovec32 aiov32[DEF_IOV_MAX]; + struct iovec32 buf32[IOV_MAX_STACK], *aiov32 = buf32; + int aiov32len; ssize32_t count32; - if (copyin(iovp, aiov32, iovcnt * sizeof (struct iovec32))) + aiov32len = iovcnt * sizeof (iovec32_t); + if (aiovlen != 0) + aiov32 = kmem_alloc(aiov32len, KM_SLEEP); + + if (copyin(iovp, aiov32, aiov32len)) { + if (aiovlen != 0) { + kmem_free(aiov32, aiov32len); + kmem_free(aiov, aiovlen); + } return (set_errno(EFAULT)); + } count32 = 0; for (i = 0; i < iovcnt; i++) { ssize32_t iovlen = aiov32[i].iov_len; count32 += iovlen; - if (iovlen < 0 || count32 < 0) + if (iovlen < 0 || count32 < 0) { + if (aiovlen != 0) { + kmem_free(aiov32, aiov32len); + kmem_free(aiov, aiovlen); + } return (set_errno(EINVAL)); + } aiov[i].iov_len = iovlen; aiov[i].iov_base = (caddr_t)(uintptr_t)aiov32[i].iov_base; } + if (aiovlen != 0) + kmem_free(aiov32, aiov32len); } else #endif - if (copyin(iovp, aiov, iovcnt * sizeof (struct iovec))) + if (copyin(iovp, aiov, iovcnt * sizeof (iovec_t))) { + if (aiovlen != 0) + kmem_free(aiov, aiovlen); return (set_errno(EFAULT)); + } count = 0; for (i = 0; i < iovcnt; i++) { ssize_t iovlen = aiov[i].iov_len; count += iovlen; - if (iovlen < 0 || count < 0) + if (iovlen < 0 || count < 0) { + if (aiovlen != 0) + kmem_free(aiov, aiovlen); return (set_errno(EINVAL)); + } } - if ((fp = getf(fdes)) == NULL) + if ((fp = getf(fdes)) == NULL) { + if (aiovlen != 0) + kmem_free(aiov, aiovlen); return (set_errno(EBADF)); + } if (((fflag = fp->f_flag) & FWRITE) == 0) { error = EBADF; goto out; @@ -917,6 +977,8 @@ out: if (in_crit) nbl_end_crit(vp); releasef(fdes); + if (aiovlen != 0) + kmem_free(aiov, aiovlen); if (error) return (set_errno(error)); return (count); @@ -927,7 +989,8 @@ preadv(int fdes, struct iovec *iovp, int iovcnt, off_t offset, off_t extended_offset) { struct uio auio; - struct iovec aiov[DEF_IOV_MAX]; + struct iovec buf[IOV_MAX_STACK], *aiov = buf; + int aiovlen = 0; file_t *fp; register vnode_t *vp; struct cpu *cp; @@ -952,9 +1015,14 @@ preadv(int fdes, struct iovec *iovp, int iovcnt, off_t offset, int in_crit = 0; - if (iovcnt <= 0 || iovcnt > DEF_IOV_MAX) + if (iovcnt <= 0 || iovcnt > IOV_MAX) return (set_errno(EINVAL)); + if (iovcnt > IOV_MAX_STACK) { + aiovlen = iovcnt * sizeof (iovec_t); + aiov = kmem_alloc(aiovlen, KM_SLEEP); + } + #ifdef _SYSCALL32_IMPL /* * 32-bit callers need to have their iovec expanded, @@ -962,39 +1030,68 @@ preadv(int fdes, struct iovec *iovp, int iovcnt, off_t offset, * of data in a single call. */ if (get_udatamodel() == DATAMODEL_ILP32) { - struct iovec32 aiov32[DEF_IOV_MAX]; + struct iovec32 buf32[IOV_MAX_STACK], *aiov32 = buf32; + int aiov32len; ssize32_t count32; - if (copyin(iovp, aiov32, iovcnt * sizeof (struct iovec32))) + aiov32len = iovcnt * sizeof (iovec32_t); + if (aiovlen != 0) + aiov32 = kmem_alloc(aiov32len, KM_SLEEP); + + if (copyin(iovp, aiov32, aiov32len)) { + if (aiovlen != 0) { + kmem_free(aiov32, aiov32len); + kmem_free(aiov, aiovlen); + } return (set_errno(EFAULT)); + } count32 = 0; for (i = 0; i < iovcnt; i++) { ssize32_t iovlen32 = aiov32[i].iov_len; count32 += iovlen32; - if (iovlen32 < 0 || count32 < 0) + if (iovlen32 < 0 || count32 < 0) { + if (aiovlen != 0) { + kmem_free(aiov32, aiov32len); + kmem_free(aiov, aiovlen); + } return (set_errno(EINVAL)); + } aiov[i].iov_len = iovlen32; aiov[i].iov_base = (caddr_t)(uintptr_t)aiov32[i].iov_base; } + if (aiovlen != 0) + kmem_free(aiov32, aiov32len); } else #endif /* _SYSCALL32_IMPL */ - if (copyin(iovp, aiov, iovcnt * sizeof (struct iovec))) + if (copyin(iovp, aiov, iovcnt * sizeof (iovec_t))) { + if (aiovlen != 0) + kmem_free(aiov, aiovlen); return (set_errno(EFAULT)); + } count = 0; for (i = 0; i < iovcnt; i++) { ssize_t iovlen = aiov[i].iov_len; count += iovlen; - if (iovlen < 0 || count < 0) + if (iovlen < 0 || count < 0) { + if (aiovlen != 0) + kmem_free(aiov, aiovlen); return (set_errno(EINVAL)); + } } - if ((bcount = (ssize_t)count) < 0) + if ((bcount = (ssize_t)count) < 0) { + if (aiovlen != 0) + kmem_free(aiov, aiovlen); return (set_errno(EINVAL)); - if ((fp = getf(fdes)) == NULL) + } + if ((fp = getf(fdes)) == NULL) { + if (aiovlen != 0) + kmem_free(aiov, aiovlen); return (set_errno(EBADF)); + } if (((fflag = fp->f_flag) & FREAD) == 0) { error = EBADF; goto out; @@ -1099,6 +1196,8 @@ out: if (in_crit) nbl_end_crit(vp); releasef(fdes); + if (aiovlen != 0) + kmem_free(aiov, aiovlen); if (error) return (set_errno(error)); return (count); @@ -1109,7 +1208,8 @@ pwritev(int fdes, struct iovec *iovp, int iovcnt, off_t offset, off_t extended_offset) { struct uio auio; - struct iovec aiov[DEF_IOV_MAX]; + struct iovec buf[IOV_MAX_STACK], *aiov = buf; + int aiovlen = 0; file_t *fp; register vnode_t *vp; struct cpu *cp; @@ -1134,9 +1234,14 @@ pwritev(int fdes, struct iovec *iovp, int iovcnt, off_t offset, int in_crit = 0; - if (iovcnt <= 0 || iovcnt > DEF_IOV_MAX) + if (iovcnt <= 0 || iovcnt > IOV_MAX) return (set_errno(EINVAL)); + if (iovcnt > IOV_MAX_STACK) { + aiovlen = iovcnt * sizeof (iovec_t); + aiov = kmem_alloc(aiovlen, KM_SLEEP); + } + #ifdef _SYSCALL32_IMPL /* * 32-bit callers need to have their iovec expanded, @@ -1144,39 +1249,68 @@ pwritev(int fdes, struct iovec *iovp, int iovcnt, off_t offset, * of data in a single call. */ if (get_udatamodel() == DATAMODEL_ILP32) { - struct iovec32 aiov32[DEF_IOV_MAX]; + struct iovec32 buf32[IOV_MAX_STACK], *aiov32 = buf32; + int aiov32len; ssize32_t count32; - if (copyin(iovp, aiov32, iovcnt * sizeof (struct iovec32))) + aiov32len = iovcnt * sizeof (iovec32_t); + if (aiovlen != 0) + aiov32 = kmem_alloc(aiov32len, KM_SLEEP); + + if (copyin(iovp, aiov32, aiov32len)) { + if (aiovlen != 0) { + kmem_free(aiov32, aiov32len); + kmem_free(aiov, aiovlen); + } return (set_errno(EFAULT)); + } count32 = 0; for (i = 0; i < iovcnt; i++) { ssize32_t iovlen32 = aiov32[i].iov_len; count32 += iovlen32; - if (iovlen32 < 0 || count32 < 0) + if (iovlen32 < 0 || count32 < 0) { + if (aiovlen != 0) { + kmem_free(aiov32, aiov32len); + kmem_free(aiov, aiovlen); + } return (set_errno(EINVAL)); + } aiov[i].iov_len = iovlen32; aiov[i].iov_base = (caddr_t)(uintptr_t)aiov32[i].iov_base; } + if (aiovlen != 0) + kmem_free(aiov32, aiov32len); } else #endif /* _SYSCALL32_IMPL */ - if (copyin(iovp, aiov, iovcnt * sizeof (struct iovec))) + if (copyin(iovp, aiov, iovcnt * sizeof (iovec_t))) { + if (aiovlen != 0) + kmem_free(aiov, aiovlen); return (set_errno(EFAULT)); + } count = 0; for (i = 0; i < iovcnt; i++) { ssize_t iovlen = aiov[i].iov_len; count += iovlen; - if (iovlen < 0 || count < 0) + if (iovlen < 0 || count < 0) { + if (aiovlen != 0) + kmem_free(aiov, aiovlen); return (set_errno(EINVAL)); + } } - if ((bcount = (ssize_t)count) < 0) + if ((bcount = (ssize_t)count) < 0) { + if (aiovlen != 0) + kmem_free(aiov, aiovlen); return (set_errno(EINVAL)); - if ((fp = getf(fdes)) == NULL) + } + if ((fp = getf(fdes)) == NULL) { + if (aiovlen != 0) + kmem_free(aiov, aiovlen); return (set_errno(EBADF)); + } if (((fflag = fp->f_flag) & FWRITE) == 0) { error = EBADF; goto out; @@ -1308,6 +1442,8 @@ out: if (in_crit) nbl_end_crit(vp); releasef(fdes); + if (aiovlen != 0) + kmem_free(aiov, aiovlen); if (error) return (set_errno(error)); return (count); diff --git a/usr/src/uts/common/syscall/sendfile.c b/usr/src/uts/common/syscall/sendfile.c index cb8246f584..ccceca7c6d 100644 --- a/usr/src/uts/common/syscall/sendfile.c +++ b/usr/src/uts/common/syscall/sendfile.c @@ -82,7 +82,7 @@ extern sotpi_info_t *sotpi_sototpi(struct sonode *); * 64 bit kernel or 32 bit kernel. For 32 bit apps, we can't transfer * more than 2GB of data. */ -int +static int sendvec_chunk64(file_t *fp, u_offset_t *fileoff, struct ksendfilevec64 *sfv, int copy_cnt, ssize32_t *count) { @@ -343,7 +343,7 @@ sendvec_chunk64(file_t *fp, u_offset_t *fileoff, struct ksendfilevec64 *sfv, return (0); } -ssize32_t +static ssize32_t sendvec64(file_t *fp, const struct ksendfilevec64 *vec, int sfvcnt, size32_t *xferred, int fildes) { @@ -390,7 +390,7 @@ sendvec64(file_t *fp, const struct ksendfilevec64 *vec, int sfvcnt, } #endif -int +static int sendvec_small_chunk(file_t *fp, u_offset_t *fileoff, struct sendfilevec *sfv, int copy_cnt, ssize_t total_size, int maxblk, ssize_t *count) { @@ -680,7 +680,7 @@ sendvec_small_chunk(file_t *fp, u_offset_t *fileoff, struct sendfilevec *sfv, } -int +static int sendvec_chunk(file_t *fp, u_offset_t *fileoff, struct sendfilevec *sfv, int copy_cnt, ssize_t *count) { @@ -1160,6 +1160,17 @@ sendfilev(int opcode, int fildes, const struct sendfilevec *vec, int sfvcnt, } else { maxblk = (int)vp->v_stream->sd_maxblk; } + + /* + * We need to make sure that the socket that we're sending on + * supports sendfile behavior. sockfs doesn't know that the APIs + * we want to use are coming from sendfile, so we can't rely on + * it to check for us. + */ + if ((so->so_mode & SM_SENDFILESUPP) == 0) { + error = EOPNOTSUPP; + goto err; + } break; case VREG: break; diff --git a/usr/src/uts/common/syscall/stat.c b/usr/src/uts/common/syscall/stat.c index 4085104cc7..93f26121bc 100644 --- a/usr/src/uts/common/syscall/stat.c +++ b/usr/src/uts/common/syscall/stat.c @@ -61,7 +61,7 @@ * to VOP_GETATTR */ -static int +int cstatat_getvp(int fd, char *name, int follow, vnode_t **vp, cred_t **cred) { vnode_t *startvp; diff --git a/usr/src/uts/common/syscall/sysconfig.c b/usr/src/uts/common/syscall/sysconfig.c index 03f2fabe13..26ea859224 100644 --- a/usr/src/uts/common/syscall/sysconfig.c +++ b/usr/src/uts/common/syscall/sysconfig.c @@ -22,6 +22,7 @@ /* * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * Copyright 2012 Joyent, Inc. All rights reserved. */ /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ @@ -170,8 +171,8 @@ sysconfig(int which) * even though rcapd can be used on the global zone too. */ if (!INGLOBALZONE(curproc) && - curproc->p_zone->zone_phys_mcap != 0) - return (MIN(btop(curproc->p_zone->zone_phys_mcap), + curproc->p_zone->zone_phys_mem_ctl != UINT64_MAX) + return (MIN(btop(curproc->p_zone->zone_phys_mem_ctl), physinstalled)); return (physinstalled); @@ -179,26 +180,23 @@ sysconfig(int which) case _CONFIG_AVPHYS_PAGES: /* * If the non-global zone has a phys. memory cap, use - * the phys. memory cap - zone's current rss. We always + * the phys. memory cap - zone's rss. We always * report the system-wide value for the global zone, even - * though rcapd can be used on the global zone too. + * though memory capping can be used on the global zone too. + * We use the cached value for the RSS since vm_getusage() + * is so expensive and we don't need this value to be exact. */ if (!INGLOBALZONE(curproc) && - curproc->p_zone->zone_phys_mcap != 0) { + curproc->p_zone->zone_phys_mem_ctl != UINT64_MAX) { pgcnt_t cap, rss, free; - vmusage_t in_use; - size_t cnt = 1; - cap = btop(curproc->p_zone->zone_phys_mcap); + cap = btop(curproc->p_zone->zone_phys_mem_ctl); if (cap > physinstalled) return (freemem); - if (vm_getusage(VMUSAGE_ZONE, 1, &in_use, &cnt, - FKIOCTL) != 0) - in_use.vmu_rss_all = 0; - rss = btop(in_use.vmu_rss_all); + rss = btop(curproc->p_zone->zone_phys_mem); /* - * Because rcapd implements a soft cap, it is possible + * Because this is a soft cap, it is possible * for rss to be temporarily over the cap. */ if (cap > rss) diff --git a/usr/src/uts/common/syscall/uadmin.c b/usr/src/uts/common/syscall/uadmin.c index 2dda4001bf..68aa1a95f5 100644 --- a/usr/src/uts/common/syscall/uadmin.c +++ b/usr/src/uts/common/syscall/uadmin.c @@ -78,7 +78,7 @@ volatile int fastreboot_dryrun = 0; * system with many zones. */ void -killall(zoneid_t zoneid) +killall(zoneid_t zoneid, boolean_t force) { proc_t *p; @@ -108,7 +108,7 @@ killall(zoneid_t zoneid) p->p_stat != SIDL && p->p_stat != SZOMB) { mutex_enter(&p->p_lock); - if (sigismember(&p->p_sig, SIGKILL)) { + if (!force && sigismember(&p->p_sig, SIGKILL)) { mutex_exit(&p->p_lock); p = p->p_next; } else { @@ -245,7 +245,7 @@ kadmin(int cmd, int fcn, void *mdep, cred_t *credp) */ zone_shutdown_global(); - killall(ALL_ZONES); + killall(ALL_ZONES, B_FALSE); /* * If we are calling kadmin() from a kernel context then we * do not release these resources. |