summaryrefslogtreecommitdiff
path: root/usr/src/uts/common/syscall/rw.c
diff options
context:
space:
mode:
authorstevel@tonic-gate <none@none>2005-06-14 00:00:00 -0700
committerstevel@tonic-gate <none@none>2005-06-14 00:00:00 -0700
commit7c478bd95313f5f23a4c958a745db2134aa03244 (patch)
treec871e58545497667cbb4b0a4f2daf204743e1fe7 /usr/src/uts/common/syscall/rw.c
downloadillumos-joyent-7c478bd95313f5f23a4c958a745db2134aa03244.tar.gz
OpenSolaris Launch
Diffstat (limited to 'usr/src/uts/common/syscall/rw.c')
-rw-r--r--usr/src/uts/common/syscall/rw.c1223
1 files changed, 1223 insertions, 0 deletions
diff --git a/usr/src/uts/common/syscall/rw.c b/usr/src/uts/common/syscall/rw.c
new file mode 100644
index 0000000000..d2f35e2051
--- /dev/null
+++ b/usr/src/uts/common/syscall/rw.c
@@ -0,0 +1,1223 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+/* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
+/* All Rights Reserved */
+
+/*
+ * Portions of this source code were derived from Berkeley 4.3 BSD
+ * under license from the Regents of the University of California.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <sys/param.h>
+#include <sys/isa_defs.h>
+#include <sys/types.h>
+#include <sys/inttypes.h>
+#include <sys/sysmacros.h>
+#include <sys/cred.h>
+#include <sys/user.h>
+#include <sys/systm.h>
+#include <sys/errno.h>
+#include <sys/vnode.h>
+#include <sys/file.h>
+#include <sys/proc.h>
+#include <sys/cpuvar.h>
+#include <sys/uio.h>
+#include <sys/ioreq.h>
+#include <sys/debug.h>
+#include <sys/rctl.h>
+#include <sys/nbmlock.h>
+
+#define COPYOUT_MIN_SIZE (1<<17) /* 128K */
+
+static size_t copyout_min_size = COPYOUT_MIN_SIZE;
+
+/*
+ * read, write, pread, pwrite, readv, and writev syscalls.
+ *
+ * 64-bit open: all open's are large file opens.
+ * Large Files: the behaviour of read depends on whether the fd
+ * corresponds to large open or not.
+ * 32-bit open: FOFFMAX flag not set.
+ * read until MAXOFF32_T - 1 and read at MAXOFF32_T returns
+ * EOVERFLOW if count is non-zero and if size of file
+ * is > MAXOFF32_T. If size of file is <= MAXOFF32_T read
+ * at >= MAXOFF32_T returns EOF.
+ */
+
+/*
+ * Native system call
+ */
+ssize_t
+read(int fdes, void *cbuf, size_t count)
+{
+ struct uio auio;
+ struct iovec aiov;
+ file_t *fp;
+ register vnode_t *vp;
+ struct cpu *cp;
+ int fflag, ioflag, rwflag;
+ ssize_t cnt, bcount;
+ int error = 0;
+ u_offset_t fileoff;
+ int in_crit = 0;
+
+ if ((cnt = (ssize_t)count) < 0)
+ return (set_errno(EINVAL));
+ if ((fp = getf(fdes)) == NULL)
+ return (set_errno(EBADF));
+ if (((fflag = fp->f_flag) & FREAD) == 0) {
+ error = EBADF;
+ goto out;
+ }
+ vp = fp->f_vnode;
+
+ if (vp->v_type == VREG && cnt == 0) {
+ goto out;
+ }
+
+ rwflag = 0;
+ aiov.iov_base = cbuf;
+ aiov.iov_len = cnt;
+
+ /*
+ * We have to enter the critical region before calling VOP_RWLOCK
+ * to avoid a deadlock with write() calls.
+ */
+ if (nbl_need_check(vp)) {
+ int svmand;
+
+ nbl_start_crit(vp, RW_READER);
+ in_crit = 1;
+ error = nbl_svmand(vp, fp->f_cred, &svmand);
+ if (error != 0)
+ goto out;
+ if (nbl_conflict(vp, NBL_READ, fp->f_offset, cnt, svmand)) {
+ error = EACCES;
+ goto out;
+ }
+ }
+
+ (void) VOP_RWLOCK(vp, rwflag, NULL);
+
+ /*
+ * We do the following checks inside VOP_RWLOCK so as to
+ * prevent file size from changing while these checks are
+ * being done. Also, we load fp's offset to the local
+ * variable fileoff because we can have a parallel lseek
+ * going on (f_offset is not protected by any lock) which
+ * could change f_offset. We need to see the value only
+ * once here and take a decision. Seeing it more than once
+ * can lead to incorrect functionality.
+ */
+
+ fileoff = (u_offset_t)fp->f_offset;
+ if (fileoff >= OFFSET_MAX(fp) && (vp->v_type == VREG)) {
+ struct vattr va;
+ va.va_mask = AT_SIZE;
+ if ((error = VOP_GETATTR(vp, &va, 0, fp->f_cred))) {
+ VOP_RWUNLOCK(vp, rwflag, NULL);
+ goto out;
+ }
+ if (fileoff >= va.va_size) {
+ cnt = 0;
+ VOP_RWUNLOCK(vp, rwflag, NULL);
+ goto out;
+ } else {
+ error = EOVERFLOW;
+ VOP_RWUNLOCK(vp, rwflag, NULL);
+ goto out;
+ }
+ }
+ if ((vp->v_type == VREG) &&
+ (fileoff + cnt > OFFSET_MAX(fp))) {
+ cnt = (ssize_t)(OFFSET_MAX(fp) - fileoff);
+ }
+ auio.uio_loffset = fileoff;
+ auio.uio_iov = &aiov;
+ auio.uio_iovcnt = 1;
+ auio.uio_resid = bcount = cnt;
+ auio.uio_segflg = UIO_USERSPACE;
+ auio.uio_llimit = MAXOFFSET_T;
+ auio.uio_fmode = fflag;
+ /*
+ * Only use bypass caches when the count is large enough
+ */
+ if (bcount < copyout_min_size)
+ auio.uio_extflg = UIO_COPY_CACHED;
+ else
+ auio.uio_extflg = UIO_COPY_DEFAULT;
+
+ ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC);
+
+ /* If read sync is not asked for, filter sync flags */
+ if ((ioflag & FRSYNC) == 0)
+ ioflag &= ~(FSYNC|FDSYNC);
+ error = VOP_READ(vp, &auio, ioflag, fp->f_cred, NULL);
+ cnt -= auio.uio_resid;
+ CPU_STATS_ENTER_K();
+ cp = CPU;
+ CPU_STATS_ADDQ(cp, sys, sysread, 1);
+ CPU_STATS_ADDQ(cp, sys, readch, (ulong_t)cnt);
+ CPU_STATS_EXIT_K();
+ ttolwp(curthread)->lwp_ru.ioch += (ulong_t)cnt;
+
+ if (vp->v_type == VFIFO) /* Backward compatibility */
+ fp->f_offset = cnt;
+ else if (((fp->f_flag & FAPPEND) == 0) ||
+ (vp->v_type != VREG) || (bcount != 0)) /* POSIX */
+ fp->f_offset = auio.uio_loffset;
+ VOP_RWUNLOCK(vp, rwflag, NULL);
+
+ if (error == EINTR && cnt != 0)
+ error = 0;
+out:
+ if (in_crit)
+ nbl_end_crit(vp);
+ releasef(fdes);
+ if (error)
+ return (set_errno(error));
+ return (cnt);
+}
+
+/*
+ * Native system call
+ */
+ssize_t
+write(int fdes, void *cbuf, size_t count)
+{
+ struct uio auio;
+ struct iovec aiov;
+ file_t *fp;
+ register vnode_t *vp;
+ struct cpu *cp;
+ int fflag, ioflag, rwflag;
+ ssize_t cnt, bcount;
+ int error = 0;
+ u_offset_t fileoff;
+ int in_crit = 0;
+
+ if ((cnt = (ssize_t)count) < 0)
+ return (set_errno(EINVAL));
+ if ((fp = getf(fdes)) == NULL)
+ return (set_errno(EBADF));
+ if (((fflag = fp->f_flag) & FWRITE) == 0) {
+ error = EBADF;
+ goto out;
+ }
+ vp = fp->f_vnode;
+
+ if (vp->v_type == VREG && cnt == 0) {
+ goto out;
+ }
+
+ rwflag = 1;
+ aiov.iov_base = cbuf;
+ aiov.iov_len = cnt;
+
+ /*
+ * We have to enter the critical region before calling VOP_RWLOCK
+ * to avoid a deadlock with ufs.
+ */
+ if (nbl_need_check(vp)) {
+ int svmand;
+
+ nbl_start_crit(vp, RW_READER);
+ in_crit = 1;
+ error = nbl_svmand(vp, fp->f_cred, &svmand);
+ if (error != 0)
+ goto out;
+ if (nbl_conflict(vp, NBL_WRITE, fp->f_offset, cnt, svmand)) {
+ error = EACCES;
+ goto out;
+ }
+ }
+
+ (void) VOP_RWLOCK(vp, rwflag, NULL);
+
+ fileoff = fp->f_offset;
+ if (vp->v_type == VREG) {
+
+ /*
+ * We raise psignal if write for >0 bytes causes
+ * it to exceed the ulimit.
+ */
+ if (fileoff >= curproc->p_fsz_ctl) {
+ VOP_RWUNLOCK(vp, rwflag, NULL);
+
+ mutex_enter(&curproc->p_lock);
+ (void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE],
+ curproc->p_rctls, curproc, RCA_UNSAFE_SIGINFO);
+ mutex_exit(&curproc->p_lock);
+
+ error = EFBIG;
+ goto out;
+ }
+ /*
+ * We return EFBIG if write is done at an offset
+ * greater than the offset maximum for this file structure.
+ */
+
+ if (fileoff >= OFFSET_MAX(fp)) {
+ VOP_RWUNLOCK(vp, rwflag, NULL);
+ error = EFBIG;
+ goto out;
+ }
+ /*
+ * Limit the bytes to be written upto offset maximum for
+ * this open file structure.
+ */
+ if (fileoff + cnt > OFFSET_MAX(fp))
+ cnt = (ssize_t)(OFFSET_MAX(fp) - fileoff);
+ }
+ auio.uio_loffset = fileoff;
+ auio.uio_iov = &aiov;
+ auio.uio_iovcnt = 1;
+ auio.uio_resid = bcount = cnt;
+ auio.uio_segflg = UIO_USERSPACE;
+ auio.uio_llimit = curproc->p_fsz_ctl;
+ auio.uio_fmode = fflag;
+ auio.uio_extflg = UIO_COPY_DEFAULT;
+
+ ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC);
+
+ error = VOP_WRITE(vp, &auio, ioflag, fp->f_cred, NULL);
+ cnt -= auio.uio_resid;
+ CPU_STATS_ENTER_K();
+ cp = CPU;
+ CPU_STATS_ADDQ(cp, sys, syswrite, 1);
+ CPU_STATS_ADDQ(cp, sys, writech, (ulong_t)cnt);
+ CPU_STATS_EXIT_K();
+ ttolwp(curthread)->lwp_ru.ioch += (ulong_t)cnt;
+
+ if (vp->v_type == VFIFO) /* Backward compatibility */
+ fp->f_offset = cnt;
+ else if (((fp->f_flag & FAPPEND) == 0) ||
+ (vp->v_type != VREG) || (bcount != 0)) /* POSIX */
+ fp->f_offset = auio.uio_loffset;
+ VOP_RWUNLOCK(vp, rwflag, NULL);
+
+ if (error == EINTR && cnt != 0)
+ error = 0;
+out:
+ if (in_crit)
+ nbl_end_crit(vp);
+ releasef(fdes);
+ if (error)
+ return (set_errno(error));
+ return (cnt);
+}
+
+ssize_t
+pread(int fdes, void *cbuf, size_t count, off_t offset)
+{
+ struct uio auio;
+ struct iovec aiov;
+ file_t *fp;
+ register vnode_t *vp;
+ struct cpu *cp;
+ int fflag, ioflag, rwflag;
+ ssize_t bcount;
+ int error = 0;
+ u_offset_t fileoff = (u_offset_t)(ulong_t)offset;
+#ifdef _SYSCALL32_IMPL
+ u_offset_t maxoff = get_udatamodel() == DATAMODEL_ILP32 ?
+ MAXOFF32_T : MAXOFFSET_T;
+#else
+ const u_offset_t maxoff = MAXOFF32_T;
+#endif
+ int in_crit = 0;
+
+ if ((bcount = (ssize_t)count) < 0)
+ return (set_errno(EINVAL));
+
+ if ((fp = getf(fdes)) == NULL)
+ return (set_errno(EBADF));
+ if (((fflag = fp->f_flag) & (FREAD)) == 0) {
+ error = EBADF;
+ goto out;
+ }
+
+ rwflag = 0;
+ vp = fp->f_vnode;
+
+ if (vp->v_type == VREG) {
+
+ if (bcount == 0)
+ goto out;
+
+ /*
+ * Return EINVAL if an invalid offset comes to pread.
+ * Negative offset from user will cause this error.
+ */
+
+ if (fileoff > maxoff) {
+ error = EINVAL;
+ goto out;
+ }
+ /*
+ * Limit offset such that we don't read or write
+ * a file beyond the maximum offset representable in
+ * an off_t structure.
+ */
+ if (fileoff + bcount > maxoff)
+ bcount = (ssize_t)((offset_t)maxoff - fileoff);
+ } else if (vp->v_type == VFIFO) {
+ error = ESPIPE;
+ goto out;
+ }
+
+ /*
+ * We have to enter the critical region before calling VOP_RWLOCK
+ * to avoid a deadlock with ufs.
+ */
+ if (nbl_need_check(vp)) {
+ int svmand;
+
+ nbl_start_crit(vp, RW_READER);
+ in_crit = 1;
+ error = nbl_svmand(vp, fp->f_cred, &svmand);
+ if (error != 0)
+ goto out;
+ if (nbl_conflict(vp, NBL_READ, fileoff, bcount, svmand)) {
+ error = EACCES;
+ goto out;
+ }
+ }
+
+ aiov.iov_base = cbuf;
+ aiov.iov_len = bcount;
+ (void) VOP_RWLOCK(vp, rwflag, NULL);
+ if (vp->v_type == VREG && fileoff == (u_offset_t)maxoff) {
+ struct vattr va;
+ va.va_mask = AT_SIZE;
+ if ((error = VOP_GETATTR(vp, &va, 0, fp->f_cred))) {
+ VOP_RWUNLOCK(vp, rwflag, NULL);
+ goto out;
+ }
+ VOP_RWUNLOCK(vp, rwflag, NULL);
+
+ /*
+ * We have to return EOF if fileoff is >= file size.
+ */
+ if (fileoff >= va.va_size) {
+ bcount = 0;
+ goto out;
+ }
+
+ /*
+ * File is greater than or equal to maxoff and therefore
+ * we return EOVERFLOW.
+ */
+ error = EOVERFLOW;
+ goto out;
+ }
+ auio.uio_loffset = fileoff;
+ auio.uio_iov = &aiov;
+ auio.uio_iovcnt = 1;
+ auio.uio_resid = bcount;
+ auio.uio_segflg = UIO_USERSPACE;
+ auio.uio_llimit = MAXOFFSET_T;
+ auio.uio_fmode = fflag;
+ auio.uio_extflg = UIO_COPY_CACHED;
+
+ ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC);
+
+ /* If read sync is not asked for, filter sync flags */
+ if ((ioflag & FRSYNC) == 0)
+ ioflag &= ~(FSYNC|FDSYNC);
+ error = VOP_READ(vp, &auio, ioflag, fp->f_cred, NULL);
+ bcount -= auio.uio_resid;
+ CPU_STATS_ENTER_K();
+ cp = CPU;
+ CPU_STATS_ADDQ(cp, sys, sysread, 1);
+ CPU_STATS_ADDQ(cp, sys, readch, (ulong_t)bcount);
+ CPU_STATS_EXIT_K();
+ ttolwp(curthread)->lwp_ru.ioch += (ulong_t)bcount;
+ VOP_RWUNLOCK(vp, rwflag, NULL);
+
+ if (error == EINTR && bcount != 0)
+ error = 0;
+out:
+ if (in_crit)
+ nbl_end_crit(vp);
+ releasef(fdes);
+ if (error)
+ return (set_errno(error));
+ return (bcount);
+}
+
+ssize_t
+pwrite(int fdes, void *cbuf, size_t count, off_t offset)
+{
+ struct uio auio;
+ struct iovec aiov;
+ file_t *fp;
+ register vnode_t *vp;
+ struct cpu *cp;
+ int fflag, ioflag, rwflag;
+ ssize_t bcount;
+ int error = 0;
+ u_offset_t fileoff = (u_offset_t)(ulong_t)offset;
+#ifdef _SYSCALL32_IMPL
+ u_offset_t maxoff = get_udatamodel() == DATAMODEL_ILP32 ?
+ MAXOFF32_T : MAXOFFSET_T;
+#else
+ const u_offset_t maxoff = MAXOFF32_T;
+#endif
+ int in_crit = 0;
+
+ if ((bcount = (ssize_t)count) < 0)
+ return (set_errno(EINVAL));
+ if ((fp = getf(fdes)) == NULL)
+ return (set_errno(EBADF));
+ if (((fflag = fp->f_flag) & (FWRITE)) == 0) {
+ error = EBADF;
+ goto out;
+ }
+
+ rwflag = 1;
+ vp = fp->f_vnode;
+
+ if (vp->v_type == VREG) {
+
+ if (bcount == 0)
+ goto out;
+
+ /*
+ * return EINVAL for offsets that cannot be
+ * represented in an off_t.
+ */
+ if (fileoff > maxoff) {
+ error = EINVAL;
+ goto out;
+ }
+ /*
+ * Take appropriate action if we are trying to write above the
+ * resource limit.
+ */
+ if (fileoff >= curproc->p_fsz_ctl) {
+ mutex_enter(&curproc->p_lock);
+ (void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE],
+ curproc->p_rctls, curproc, RCA_UNSAFE_SIGINFO);
+ mutex_exit(&curproc->p_lock);
+
+ error = EFBIG;
+ goto out;
+ }
+ /*
+ * Don't allow pwrite to cause file sizes to exceed
+ * maxoff.
+ */
+ if (fileoff == maxoff) {
+ error = EFBIG;
+ goto out;
+ }
+ if (fileoff + count > maxoff)
+ bcount = (ssize_t)((u_offset_t)maxoff - fileoff);
+ } else if (vp->v_type == VFIFO) {
+ error = ESPIPE;
+ goto out;
+ }
+
+ /*
+ * We have to enter the critical region before calling VOP_RWLOCK
+ * to avoid a deadlock with ufs.
+ */
+ if (nbl_need_check(vp)) {
+ int svmand;
+
+ nbl_start_crit(vp, RW_READER);
+ in_crit = 1;
+ error = nbl_svmand(vp, fp->f_cred, &svmand);
+ if (error != 0)
+ goto out;
+ if (nbl_conflict(vp, NBL_WRITE, fileoff, bcount, svmand)) {
+ error = EACCES;
+ goto out;
+ }
+ }
+
+ aiov.iov_base = cbuf;
+ aiov.iov_len = bcount;
+ (void) VOP_RWLOCK(vp, rwflag, NULL);
+ auio.uio_loffset = fileoff;
+ auio.uio_iov = &aiov;
+ auio.uio_iovcnt = 1;
+ auio.uio_resid = bcount;
+ auio.uio_segflg = UIO_USERSPACE;
+ auio.uio_llimit = curproc->p_fsz_ctl;
+ auio.uio_fmode = fflag;
+ auio.uio_extflg = UIO_COPY_CACHED;
+
+ ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC);
+
+ error = VOP_WRITE(vp, &auio, ioflag, fp->f_cred, NULL);
+ bcount -= auio.uio_resid;
+ CPU_STATS_ENTER_K();
+ cp = CPU;
+ CPU_STATS_ADDQ(cp, sys, syswrite, 1);
+ CPU_STATS_ADDQ(cp, sys, writech, (ulong_t)bcount);
+ CPU_STATS_EXIT_K();
+ ttolwp(curthread)->lwp_ru.ioch += (ulong_t)bcount;
+ VOP_RWUNLOCK(vp, rwflag, NULL);
+
+ if (error == EINTR && bcount != 0)
+ error = 0;
+out:
+ if (in_crit)
+ nbl_end_crit(vp);
+ releasef(fdes);
+ if (error)
+ return (set_errno(error));
+ return (bcount);
+}
+
+/*
+ * XXX -- The SVID refers to IOV_MAX, but doesn't define it. Grrrr....
+ * XXX -- However, SVVS expects readv() and writev() to fail if
+ * XXX -- iovcnt > 16 (yes, it's hard-coded in the SVVS source),
+ * XXX -- so I guess that's the "interface".
+ */
+#define DEF_IOV_MAX 16
+
+ssize_t
+readv(int fdes, struct iovec *iovp, int iovcnt)
+{
+ struct uio auio;
+ struct iovec aiov[DEF_IOV_MAX];
+ file_t *fp;
+ register vnode_t *vp;
+ struct cpu *cp;
+ int fflag, ioflag, rwflag;
+ ssize_t count, bcount;
+ int error = 0;
+ int i;
+ u_offset_t fileoff;
+ int in_crit = 0;
+
+ if (iovcnt <= 0 || iovcnt > DEF_IOV_MAX)
+ return (set_errno(EINVAL));
+
+#ifdef _SYSCALL32_IMPL
+ /*
+ * 32-bit callers need to have their iovec expanded,
+ * while ensuring that they can't move more than 2Gbytes
+ * of data in a single call.
+ */
+ if (get_udatamodel() == DATAMODEL_ILP32) {
+ struct iovec32 aiov32[DEF_IOV_MAX];
+ ssize32_t count32;
+
+ if (copyin(iovp, aiov32, iovcnt * sizeof (struct iovec32)))
+ return (set_errno(EFAULT));
+
+ count32 = 0;
+ for (i = 0; i < iovcnt; i++) {
+ ssize32_t iovlen32 = aiov32[i].iov_len;
+ count32 += iovlen32;
+ if (iovlen32 < 0 || count32 < 0)
+ return (set_errno(EINVAL));
+ aiov[i].iov_len = iovlen32;
+ aiov[i].iov_base =
+ (caddr_t)(uintptr_t)aiov32[i].iov_base;
+ }
+ } else
+#endif
+ if (copyin(iovp, aiov, iovcnt * sizeof (struct iovec)))
+ return (set_errno(EFAULT));
+
+ count = 0;
+ for (i = 0; i < iovcnt; i++) {
+ ssize_t iovlen = aiov[i].iov_len;
+ count += iovlen;
+ if (iovlen < 0 || count < 0)
+ return (set_errno(EINVAL));
+ }
+ if ((fp = getf(fdes)) == NULL)
+ return (set_errno(EBADF));
+ if (((fflag = fp->f_flag) & FREAD) == 0) {
+ error = EBADF;
+ goto out;
+ }
+ vp = fp->f_vnode;
+ if (vp->v_type == VREG && count == 0) {
+ goto out;
+ }
+
+ rwflag = 0;
+
+ /*
+ * We have to enter the critical region before calling VOP_RWLOCK
+ * to avoid a deadlock with ufs.
+ */
+ if (nbl_need_check(vp)) {
+ int svmand;
+
+ nbl_start_crit(vp, RW_READER);
+ in_crit = 1;
+ error = nbl_svmand(vp, fp->f_cred, &svmand);
+ if (error != 0)
+ goto out;
+ if (nbl_conflict(vp, NBL_READ, fp->f_offset, count, svmand)) {
+ error = EACCES;
+ goto out;
+ }
+ }
+
+ (void) VOP_RWLOCK(vp, rwflag, NULL);
+ fileoff = fp->f_offset;
+
+ /*
+ * Behaviour is same as read. Please see comments in read.
+ */
+
+ if ((vp->v_type == VREG) && (fileoff >= OFFSET_MAX(fp))) {
+ struct vattr va;
+ va.va_mask = AT_SIZE;
+ if ((error = VOP_GETATTR(vp, &va, 0, fp->f_cred))) {
+ VOP_RWUNLOCK(vp, rwflag, NULL);
+ goto out;
+ }
+ if (fileoff >= va.va_size) {
+ VOP_RWUNLOCK(vp, rwflag, NULL);
+ count = 0;
+ goto out;
+ } else {
+ VOP_RWUNLOCK(vp, rwflag, NULL);
+ error = EOVERFLOW;
+ goto out;
+ }
+ }
+ if ((vp->v_type == VREG) && (fileoff + count > OFFSET_MAX(fp))) {
+ count = (ssize_t)(OFFSET_MAX(fp) - fileoff);
+ }
+ auio.uio_loffset = fileoff;
+ auio.uio_iov = aiov;
+ auio.uio_iovcnt = iovcnt;
+ auio.uio_resid = bcount = count;
+ auio.uio_segflg = UIO_USERSPACE;
+ auio.uio_llimit = MAXOFFSET_T;
+ auio.uio_fmode = fflag;
+ if (bcount < copyout_min_size)
+ auio.uio_extflg = UIO_COPY_CACHED;
+ else
+ auio.uio_extflg = UIO_COPY_DEFAULT;
+
+
+ ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC);
+
+ /* If read sync is not asked for, filter sync flags */
+ if ((ioflag & FRSYNC) == 0)
+ ioflag &= ~(FSYNC|FDSYNC);
+ error = VOP_READ(vp, &auio, ioflag, fp->f_cred, NULL);
+ count -= auio.uio_resid;
+ CPU_STATS_ENTER_K();
+ cp = CPU;
+ CPU_STATS_ADDQ(cp, sys, sysread, 1);
+ CPU_STATS_ADDQ(cp, sys, readch, (ulong_t)count);
+ CPU_STATS_EXIT_K();
+ ttolwp(curthread)->lwp_ru.ioch += (ulong_t)count;
+
+ if (vp->v_type == VFIFO) /* Backward compatibility */
+ fp->f_offset = count;
+ else if (((fp->f_flag & FAPPEND) == 0) ||
+ (vp->v_type != VREG) || (bcount != 0)) /* POSIX */
+ fp->f_offset = auio.uio_loffset;
+
+ VOP_RWUNLOCK(vp, rwflag, NULL);
+
+ if (error == EINTR && count != 0)
+ error = 0;
+out:
+ if (in_crit)
+ nbl_end_crit(vp);
+ releasef(fdes);
+ if (error)
+ return (set_errno(error));
+ return (count);
+}
+
+ssize_t
+writev(int fdes, struct iovec *iovp, int iovcnt)
+{
+ struct uio auio;
+ struct iovec aiov[DEF_IOV_MAX];
+ file_t *fp;
+ register vnode_t *vp;
+ struct cpu *cp;
+ int fflag, ioflag, rwflag;
+ ssize_t count, bcount;
+ int error = 0;
+ int i;
+ u_offset_t fileoff;
+ int in_crit = 0;
+
+ if (iovcnt <= 0 || iovcnt > DEF_IOV_MAX)
+ return (set_errno(EINVAL));
+
+#ifdef _SYSCALL32_IMPL
+ /*
+ * 32-bit callers need to have their iovec expanded,
+ * while ensuring that they can't move more than 2Gbytes
+ * of data in a single call.
+ */
+ if (get_udatamodel() == DATAMODEL_ILP32) {
+ struct iovec32 aiov32[DEF_IOV_MAX];
+ ssize32_t count32;
+
+ if (copyin(iovp, aiov32, iovcnt * sizeof (struct iovec32)))
+ return (set_errno(EFAULT));
+
+ count32 = 0;
+ for (i = 0; i < iovcnt; i++) {
+ ssize32_t iovlen = aiov32[i].iov_len;
+ count32 += iovlen;
+ if (iovlen < 0 || count32 < 0)
+ return (set_errno(EINVAL));
+ aiov[i].iov_len = iovlen;
+ aiov[i].iov_base =
+ (caddr_t)(uintptr_t)aiov32[i].iov_base;
+ }
+ } else
+#endif
+ if (copyin(iovp, aiov, iovcnt * sizeof (struct iovec)))
+ return (set_errno(EFAULT));
+
+ count = 0;
+ for (i = 0; i < iovcnt; i++) {
+ ssize_t iovlen = aiov[i].iov_len;
+ count += iovlen;
+ if (iovlen < 0 || count < 0)
+ return (set_errno(EINVAL));
+ }
+ if ((fp = getf(fdes)) == NULL)
+ return (set_errno(EBADF));
+ if (((fflag = fp->f_flag) & FWRITE) == 0) {
+ error = EBADF;
+ goto out;
+ }
+ vp = fp->f_vnode;
+ if (vp->v_type == VREG && count == 0) {
+ goto out;
+ }
+
+ rwflag = 1;
+
+ /*
+ * We have to enter the critical region before calling VOP_RWLOCK
+ * to avoid a deadlock with ufs.
+ */
+ if (nbl_need_check(vp)) {
+ int svmand;
+
+ nbl_start_crit(vp, RW_READER);
+ in_crit = 1;
+ error = nbl_svmand(vp, fp->f_cred, &svmand);
+ if (error != 0)
+ goto out;
+ if (nbl_conflict(vp, NBL_WRITE, fp->f_offset, count, svmand)) {
+ error = EACCES;
+ goto out;
+ }
+ }
+
+ (void) VOP_RWLOCK(vp, rwflag, NULL);
+
+ fileoff = fp->f_offset;
+
+ /*
+ * Behaviour is same as write. Please see comments for write.
+ */
+
+ if (vp->v_type == VREG) {
+ if (fileoff >= curproc->p_fsz_ctl) {
+ VOP_RWUNLOCK(vp, rwflag, NULL);
+ mutex_enter(&curproc->p_lock);
+ (void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE],
+ curproc->p_rctls, curproc, RCA_UNSAFE_SIGINFO);
+ mutex_exit(&curproc->p_lock);
+ error = EFBIG;
+ goto out;
+ }
+ if (fileoff >= OFFSET_MAX(fp)) {
+ VOP_RWUNLOCK(vp, rwflag, NULL);
+ error = EFBIG;
+ goto out;
+ }
+ if (fileoff + count > OFFSET_MAX(fp))
+ count = (ssize_t)(OFFSET_MAX(fp) - fileoff);
+ }
+ auio.uio_loffset = fileoff;
+ auio.uio_iov = aiov;
+ auio.uio_iovcnt = iovcnt;
+ auio.uio_resid = bcount = count;
+ auio.uio_segflg = UIO_USERSPACE;
+ auio.uio_llimit = curproc->p_fsz_ctl;
+ auio.uio_fmode = fflag;
+ auio.uio_extflg = UIO_COPY_DEFAULT;
+
+ ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC);
+
+ error = VOP_WRITE(vp, &auio, ioflag, fp->f_cred, NULL);
+ count -= auio.uio_resid;
+ CPU_STATS_ENTER_K();
+ cp = CPU;
+ CPU_STATS_ADDQ(cp, sys, syswrite, 1);
+ CPU_STATS_ADDQ(cp, sys, writech, (ulong_t)count);
+ CPU_STATS_EXIT_K();
+ ttolwp(curthread)->lwp_ru.ioch += (ulong_t)count;
+
+ if (vp->v_type == VFIFO) /* Backward compatibility */
+ fp->f_offset = count;
+ else if (((fp->f_flag & FAPPEND) == 0) ||
+ (vp->v_type != VREG) || (bcount != 0)) /* POSIX */
+ fp->f_offset = auio.uio_loffset;
+ VOP_RWUNLOCK(vp, rwflag, NULL);
+
+ if (error == EINTR && count != 0)
+ error = 0;
+out:
+ if (in_crit)
+ nbl_end_crit(vp);
+ releasef(fdes);
+ if (error)
+ return (set_errno(error));
+ return (count);
+}
+
+#if defined(_SYSCALL32_IMPL) || defined(_ILP32)
+
+/*
+ * This syscall supplies 64-bit file offsets to 32-bit applications only.
+ */
+ssize32_t
+pread64(int fdes, void *cbuf, size32_t count, uint32_t offset_1,
+ uint32_t offset_2)
+{
+ struct uio auio;
+ struct iovec aiov;
+ file_t *fp;
+ register vnode_t *vp;
+ struct cpu *cp;
+ int fflag, ioflag, rwflag;
+ ssize_t bcount;
+ int error = 0;
+ u_offset_t fileoff;
+ int in_crit = 0;
+
+#if defined(_LITTLE_ENDIAN)
+ fileoff = ((u_offset_t)offset_2 << 32) | (u_offset_t)offset_1;
+#else
+ fileoff = ((u_offset_t)offset_1 << 32) | (u_offset_t)offset_2;
+#endif
+
+ if ((bcount = (ssize_t)count) < 0 || bcount > INT32_MAX)
+ return (set_errno(EINVAL));
+
+ if ((fp = getf(fdes)) == NULL)
+ return (set_errno(EBADF));
+ if (((fflag = fp->f_flag) & (FREAD)) == 0) {
+ error = EBADF;
+ goto out;
+ }
+
+ rwflag = 0;
+ vp = fp->f_vnode;
+
+ if (vp->v_type == VREG) {
+
+ if (bcount == 0)
+ goto out;
+
+ /*
+ * Same as pread. See comments in pread.
+ */
+
+ if (fileoff > MAXOFFSET_T) {
+ error = EINVAL;
+ goto out;
+ }
+ if (fileoff + bcount > MAXOFFSET_T)
+ bcount = (ssize_t)(MAXOFFSET_T - fileoff);
+ } else if (vp->v_type == VFIFO) {
+ error = ESPIPE;
+ goto out;
+ }
+
+ /*
+ * We have to enter the critical region before calling VOP_RWLOCK
+ * to avoid a deadlock with ufs.
+ */
+ if (nbl_need_check(vp)) {
+ int svmand;
+
+ nbl_start_crit(vp, RW_READER);
+ in_crit = 1;
+ error = nbl_svmand(vp, fp->f_cred, &svmand);
+ if (error != 0)
+ goto out;
+ if (nbl_conflict(vp, NBL_READ, fileoff, bcount, svmand)) {
+ error = EACCES;
+ goto out;
+ }
+ }
+
+ aiov.iov_base = cbuf;
+ aiov.iov_len = bcount;
+ (void) VOP_RWLOCK(vp, rwflag, NULL);
+ auio.uio_loffset = fileoff;
+
+ /*
+ * Note: File size can never be greater than MAXOFFSET_T.
+ * If ever we start supporting 128 bit files the code
+ * similar to the one in pread at this place should be here.
+ * Here we avoid the unnecessary VOP_GETATTR() when we
+ * know that fileoff == MAXOFFSET_T implies that it is always
+ * greater than or equal to file size.
+ */
+ auio.uio_iov = &aiov;
+ auio.uio_iovcnt = 1;
+ auio.uio_resid = bcount;
+ auio.uio_segflg = UIO_USERSPACE;
+ auio.uio_llimit = MAXOFFSET_T;
+ auio.uio_fmode = fflag;
+ auio.uio_extflg = UIO_COPY_CACHED;
+
+ ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC);
+
+ /* If read sync is not asked for, filter sync flags */
+ if ((ioflag & FRSYNC) == 0)
+ ioflag &= ~(FSYNC|FDSYNC);
+ error = VOP_READ(vp, &auio, ioflag, fp->f_cred, NULL);
+ bcount -= auio.uio_resid;
+ CPU_STATS_ENTER_K();
+ cp = CPU;
+ CPU_STATS_ADDQ(cp, sys, sysread, 1);
+ CPU_STATS_ADDQ(cp, sys, readch, (ulong_t)bcount);
+ CPU_STATS_EXIT_K();
+ ttolwp(curthread)->lwp_ru.ioch += (ulong_t)bcount;
+ VOP_RWUNLOCK(vp, rwflag, NULL);
+
+ if (error == EINTR && bcount != 0)
+ error = 0;
+out:
+ if (in_crit)
+ nbl_end_crit(vp);
+ releasef(fdes);
+ if (error)
+ return (set_errno(error));
+ return (bcount);
+}
+
+/*
+ * This syscall supplies 64-bit file offsets to 32-bit applications only.
+ */
+ssize32_t
+pwrite64(int fdes, void *cbuf, size32_t count, uint32_t offset_1,
+ uint32_t offset_2)
+{
+ struct uio auio;
+ struct iovec aiov;
+ file_t *fp;
+ register vnode_t *vp;
+ struct cpu *cp;
+ int fflag, ioflag, rwflag;
+ ssize_t bcount;
+ int error = 0;
+ u_offset_t fileoff;
+ int in_crit = 0;
+
+#if defined(_LITTLE_ENDIAN)
+ fileoff = ((u_offset_t)offset_2 << 32) | (u_offset_t)offset_1;
+#else
+ fileoff = ((u_offset_t)offset_1 << 32) | (u_offset_t)offset_2;
+#endif
+
+ if ((bcount = (ssize_t)count) < 0 || bcount > INT32_MAX)
+ return (set_errno(EINVAL));
+ if ((fp = getf(fdes)) == NULL)
+ return (set_errno(EBADF));
+ if (((fflag = fp->f_flag) & (FWRITE)) == 0) {
+ error = EBADF;
+ goto out;
+ }
+
+ rwflag = 1;
+ vp = fp->f_vnode;
+
+ if (vp->v_type == VREG) {
+
+ if (bcount == 0)
+ goto out;
+
+ /*
+ * See comments in pwrite.
+ */
+ if (fileoff > MAXOFFSET_T) {
+ error = EINVAL;
+ goto out;
+ }
+ if (fileoff >= curproc->p_fsz_ctl) {
+ mutex_enter(&curproc->p_lock);
+ (void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE],
+ curproc->p_rctls, curproc, RCA_SAFE);
+ mutex_exit(&curproc->p_lock);
+ error = EFBIG;
+ goto out;
+ }
+ if (fileoff == MAXOFFSET_T) {
+ error = EFBIG;
+ goto out;
+ }
+ if (fileoff + bcount > MAXOFFSET_T)
+ bcount = (ssize_t)((u_offset_t)MAXOFFSET_T - fileoff);
+ } else if (vp->v_type == VFIFO) {
+ error = ESPIPE;
+ goto out;
+ }
+
+ /*
+ * We have to enter the critical region before calling VOP_RWLOCK
+ * to avoid a deadlock with ufs.
+ */
+ if (nbl_need_check(vp)) {
+ int svmand;
+
+ nbl_start_crit(vp, RW_READER);
+ in_crit = 1;
+ error = nbl_svmand(vp, fp->f_cred, &svmand);
+ if (error != 0)
+ goto out;
+ if (nbl_conflict(vp, NBL_WRITE, fileoff, bcount, svmand)) {
+ error = EACCES;
+ goto out;
+ }
+ }
+
+ aiov.iov_base = cbuf;
+ aiov.iov_len = bcount;
+ (void) VOP_RWLOCK(vp, rwflag, NULL);
+ auio.uio_loffset = fileoff;
+ auio.uio_iov = &aiov;
+ auio.uio_iovcnt = 1;
+ auio.uio_resid = bcount;
+ auio.uio_segflg = UIO_USERSPACE;
+ auio.uio_llimit = curproc->p_fsz_ctl;
+ auio.uio_fmode = fflag;
+ auio.uio_extflg = UIO_COPY_CACHED;
+
+ ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC);
+
+ error = VOP_WRITE(vp, &auio, ioflag, fp->f_cred, NULL);
+ bcount -= auio.uio_resid;
+ CPU_STATS_ENTER_K();
+ cp = CPU;
+ CPU_STATS_ADDQ(cp, sys, syswrite, 1);
+ CPU_STATS_ADDQ(cp, sys, writech, (ulong_t)bcount);
+ CPU_STATS_EXIT_K();
+ ttolwp(curthread)->lwp_ru.ioch += (ulong_t)bcount;
+ VOP_RWUNLOCK(vp, rwflag, NULL);
+
+ if (error == EINTR && bcount != 0)
+ error = 0;
+out:
+ if (in_crit)
+ nbl_end_crit(vp);
+ releasef(fdes);
+ if (error)
+ return (set_errno(error));
+ return (bcount);
+}
+
+#endif /* _SYSCALL32_IMPL || _ILP32 */
+
+#ifdef _SYSCALL32_IMPL
+/*
+ * Tail-call elimination of xxx32() down to xxx()
+ *
+ * A number of xxx32 system calls take a len (or count) argument and
+ * return a number in the range [0,len] or -1 on error.
+ * Given an ssize32_t input len, the downcall xxx() will return
+ * a 64-bit value that is -1 or in the range [0,len] which actually
+ * is a proper return value for the xxx32 call. So even if the xxx32
+ * calls can be considered as returning a ssize32_t, they are currently
+ * declared as returning a ssize_t as this enables tail-call elimination.
+ *
+ * The cast of len (or count) to ssize32_t is needed to ensure we pass
+ * down negative input values as such and let the downcall handle error
+ * reporting. Functions covered by this comments are:
+ *
+ * rw.c: read32, write32, pread32, pwrite32, readv32, writev32.
+ * socksyscall.c: recv32, recvfrom32, send32, sendto32.
+ * readlink.c: readlink32.
+ */
+
+ssize_t
+read32(int32_t fdes, caddr32_t cbuf, size32_t count)
+{
+ return (read(fdes,
+ (void *)(uintptr_t)cbuf, (ssize32_t)count));
+}
+
+ssize_t
+write32(int32_t fdes, caddr32_t cbuf, size32_t count)
+{
+ return (write(fdes,
+ (void *)(uintptr_t)cbuf, (ssize32_t)count));
+}
+
+ssize_t
+pread32(int32_t fdes, caddr32_t cbuf, size32_t count, off32_t offset)
+{
+ return (pread(fdes,
+ (void *)(uintptr_t)cbuf, (ssize32_t)count,
+ (off_t)(uint32_t)offset));
+}
+
+ssize_t
+pwrite32(int32_t fdes, caddr32_t cbuf, size32_t count, off32_t offset)
+{
+ return (pwrite(fdes,
+ (void *)(uintptr_t)cbuf, (ssize32_t)count,
+ (off_t)(uint32_t)offset));
+}
+
+ssize_t
+readv32(int32_t fdes, caddr32_t iovp, int32_t iovcnt)
+{
+ return (readv(fdes, (void *)(uintptr_t)iovp, iovcnt));
+}
+
+ssize_t
+writev32(int32_t fdes, caddr32_t iovp, int32_t iovcnt)
+{
+ return (writev(fdes, (void *)(uintptr_t)iovp, iovcnt));
+}
+
+#endif /* _SYSCALL32_IMPL */