diff options
author | stevel@tonic-gate <none@none> | 2005-06-14 00:00:00 -0700 |
---|---|---|
committer | stevel@tonic-gate <none@none> | 2005-06-14 00:00:00 -0700 |
commit | 7c478bd95313f5f23a4c958a745db2134aa03244 (patch) | |
tree | c871e58545497667cbb4b0a4f2daf204743e1fe7 /usr/src/uts/common/syscall/rw.c | |
download | illumos-joyent-7c478bd95313f5f23a4c958a745db2134aa03244.tar.gz |
OpenSolaris Launch
Diffstat (limited to 'usr/src/uts/common/syscall/rw.c')
-rw-r--r-- | usr/src/uts/common/syscall/rw.c | 1223 |
1 files changed, 1223 insertions, 0 deletions
diff --git a/usr/src/uts/common/syscall/rw.c b/usr/src/uts/common/syscall/rw.c new file mode 100644 index 0000000000..d2f35e2051 --- /dev/null +++ b/usr/src/uts/common/syscall/rw.c @@ -0,0 +1,1223 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ +/* All Rights Reserved */ + +/* + * Portions of this source code were derived from Berkeley 4.3 BSD + * under license from the Regents of the University of California. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/param.h> +#include <sys/isa_defs.h> +#include <sys/types.h> +#include <sys/inttypes.h> +#include <sys/sysmacros.h> +#include <sys/cred.h> +#include <sys/user.h> +#include <sys/systm.h> +#include <sys/errno.h> +#include <sys/vnode.h> +#include <sys/file.h> +#include <sys/proc.h> +#include <sys/cpuvar.h> +#include <sys/uio.h> +#include <sys/ioreq.h> +#include <sys/debug.h> +#include <sys/rctl.h> +#include <sys/nbmlock.h> + +#define COPYOUT_MIN_SIZE (1<<17) /* 128K */ + +static size_t copyout_min_size = COPYOUT_MIN_SIZE; + +/* + * read, write, pread, pwrite, readv, and writev syscalls. + * + * 64-bit open: all open's are large file opens. + * Large Files: the behaviour of read depends on whether the fd + * corresponds to large open or not. + * 32-bit open: FOFFMAX flag not set. + * read until MAXOFF32_T - 1 and read at MAXOFF32_T returns + * EOVERFLOW if count is non-zero and if size of file + * is > MAXOFF32_T. If size of file is <= MAXOFF32_T read + * at >= MAXOFF32_T returns EOF. + */ + +/* + * Native system call + */ +ssize_t +read(int fdes, void *cbuf, size_t count) +{ + struct uio auio; + struct iovec aiov; + file_t *fp; + register vnode_t *vp; + struct cpu *cp; + int fflag, ioflag, rwflag; + ssize_t cnt, bcount; + int error = 0; + u_offset_t fileoff; + int in_crit = 0; + + if ((cnt = (ssize_t)count) < 0) + return (set_errno(EINVAL)); + if ((fp = getf(fdes)) == NULL) + return (set_errno(EBADF)); + if (((fflag = fp->f_flag) & FREAD) == 0) { + error = EBADF; + goto out; + } + vp = fp->f_vnode; + + if (vp->v_type == VREG && cnt == 0) { + goto out; + } + + rwflag = 0; + aiov.iov_base = cbuf; + aiov.iov_len = cnt; + + /* + * We have to enter the critical region before calling VOP_RWLOCK + * to avoid a deadlock with write() calls. + */ + if (nbl_need_check(vp)) { + int svmand; + + nbl_start_crit(vp, RW_READER); + in_crit = 1; + error = nbl_svmand(vp, fp->f_cred, &svmand); + if (error != 0) + goto out; + if (nbl_conflict(vp, NBL_READ, fp->f_offset, cnt, svmand)) { + error = EACCES; + goto out; + } + } + + (void) VOP_RWLOCK(vp, rwflag, NULL); + + /* + * We do the following checks inside VOP_RWLOCK so as to + * prevent file size from changing while these checks are + * being done. Also, we load fp's offset to the local + * variable fileoff because we can have a parallel lseek + * going on (f_offset is not protected by any lock) which + * could change f_offset. We need to see the value only + * once here and take a decision. Seeing it more than once + * can lead to incorrect functionality. + */ + + fileoff = (u_offset_t)fp->f_offset; + if (fileoff >= OFFSET_MAX(fp) && (vp->v_type == VREG)) { + struct vattr va; + va.va_mask = AT_SIZE; + if ((error = VOP_GETATTR(vp, &va, 0, fp->f_cred))) { + VOP_RWUNLOCK(vp, rwflag, NULL); + goto out; + } + if (fileoff >= va.va_size) { + cnt = 0; + VOP_RWUNLOCK(vp, rwflag, NULL); + goto out; + } else { + error = EOVERFLOW; + VOP_RWUNLOCK(vp, rwflag, NULL); + goto out; + } + } + if ((vp->v_type == VREG) && + (fileoff + cnt > OFFSET_MAX(fp))) { + cnt = (ssize_t)(OFFSET_MAX(fp) - fileoff); + } + auio.uio_loffset = fileoff; + auio.uio_iov = &aiov; + auio.uio_iovcnt = 1; + auio.uio_resid = bcount = cnt; + auio.uio_segflg = UIO_USERSPACE; + auio.uio_llimit = MAXOFFSET_T; + auio.uio_fmode = fflag; + /* + * Only use bypass caches when the count is large enough + */ + if (bcount < copyout_min_size) + auio.uio_extflg = UIO_COPY_CACHED; + else + auio.uio_extflg = UIO_COPY_DEFAULT; + + ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC); + + /* If read sync is not asked for, filter sync flags */ + if ((ioflag & FRSYNC) == 0) + ioflag &= ~(FSYNC|FDSYNC); + error = VOP_READ(vp, &auio, ioflag, fp->f_cred, NULL); + cnt -= auio.uio_resid; + CPU_STATS_ENTER_K(); + cp = CPU; + CPU_STATS_ADDQ(cp, sys, sysread, 1); + CPU_STATS_ADDQ(cp, sys, readch, (ulong_t)cnt); + CPU_STATS_EXIT_K(); + ttolwp(curthread)->lwp_ru.ioch += (ulong_t)cnt; + + if (vp->v_type == VFIFO) /* Backward compatibility */ + fp->f_offset = cnt; + else if (((fp->f_flag & FAPPEND) == 0) || + (vp->v_type != VREG) || (bcount != 0)) /* POSIX */ + fp->f_offset = auio.uio_loffset; + VOP_RWUNLOCK(vp, rwflag, NULL); + + if (error == EINTR && cnt != 0) + error = 0; +out: + if (in_crit) + nbl_end_crit(vp); + releasef(fdes); + if (error) + return (set_errno(error)); + return (cnt); +} + +/* + * Native system call + */ +ssize_t +write(int fdes, void *cbuf, size_t count) +{ + struct uio auio; + struct iovec aiov; + file_t *fp; + register vnode_t *vp; + struct cpu *cp; + int fflag, ioflag, rwflag; + ssize_t cnt, bcount; + int error = 0; + u_offset_t fileoff; + int in_crit = 0; + + if ((cnt = (ssize_t)count) < 0) + return (set_errno(EINVAL)); + if ((fp = getf(fdes)) == NULL) + return (set_errno(EBADF)); + if (((fflag = fp->f_flag) & FWRITE) == 0) { + error = EBADF; + goto out; + } + vp = fp->f_vnode; + + if (vp->v_type == VREG && cnt == 0) { + goto out; + } + + rwflag = 1; + aiov.iov_base = cbuf; + aiov.iov_len = cnt; + + /* + * We have to enter the critical region before calling VOP_RWLOCK + * to avoid a deadlock with ufs. + */ + if (nbl_need_check(vp)) { + int svmand; + + nbl_start_crit(vp, RW_READER); + in_crit = 1; + error = nbl_svmand(vp, fp->f_cred, &svmand); + if (error != 0) + goto out; + if (nbl_conflict(vp, NBL_WRITE, fp->f_offset, cnt, svmand)) { + error = EACCES; + goto out; + } + } + + (void) VOP_RWLOCK(vp, rwflag, NULL); + + fileoff = fp->f_offset; + if (vp->v_type == VREG) { + + /* + * We raise psignal if write for >0 bytes causes + * it to exceed the ulimit. + */ + if (fileoff >= curproc->p_fsz_ctl) { + VOP_RWUNLOCK(vp, rwflag, NULL); + + mutex_enter(&curproc->p_lock); + (void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE], + curproc->p_rctls, curproc, RCA_UNSAFE_SIGINFO); + mutex_exit(&curproc->p_lock); + + error = EFBIG; + goto out; + } + /* + * We return EFBIG if write is done at an offset + * greater than the offset maximum for this file structure. + */ + + if (fileoff >= OFFSET_MAX(fp)) { + VOP_RWUNLOCK(vp, rwflag, NULL); + error = EFBIG; + goto out; + } + /* + * Limit the bytes to be written upto offset maximum for + * this open file structure. + */ + if (fileoff + cnt > OFFSET_MAX(fp)) + cnt = (ssize_t)(OFFSET_MAX(fp) - fileoff); + } + auio.uio_loffset = fileoff; + auio.uio_iov = &aiov; + auio.uio_iovcnt = 1; + auio.uio_resid = bcount = cnt; + auio.uio_segflg = UIO_USERSPACE; + auio.uio_llimit = curproc->p_fsz_ctl; + auio.uio_fmode = fflag; + auio.uio_extflg = UIO_COPY_DEFAULT; + + ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC); + + error = VOP_WRITE(vp, &auio, ioflag, fp->f_cred, NULL); + cnt -= auio.uio_resid; + CPU_STATS_ENTER_K(); + cp = CPU; + CPU_STATS_ADDQ(cp, sys, syswrite, 1); + CPU_STATS_ADDQ(cp, sys, writech, (ulong_t)cnt); + CPU_STATS_EXIT_K(); + ttolwp(curthread)->lwp_ru.ioch += (ulong_t)cnt; + + if (vp->v_type == VFIFO) /* Backward compatibility */ + fp->f_offset = cnt; + else if (((fp->f_flag & FAPPEND) == 0) || + (vp->v_type != VREG) || (bcount != 0)) /* POSIX */ + fp->f_offset = auio.uio_loffset; + VOP_RWUNLOCK(vp, rwflag, NULL); + + if (error == EINTR && cnt != 0) + error = 0; +out: + if (in_crit) + nbl_end_crit(vp); + releasef(fdes); + if (error) + return (set_errno(error)); + return (cnt); +} + +ssize_t +pread(int fdes, void *cbuf, size_t count, off_t offset) +{ + struct uio auio; + struct iovec aiov; + file_t *fp; + register vnode_t *vp; + struct cpu *cp; + int fflag, ioflag, rwflag; + ssize_t bcount; + int error = 0; + u_offset_t fileoff = (u_offset_t)(ulong_t)offset; +#ifdef _SYSCALL32_IMPL + u_offset_t maxoff = get_udatamodel() == DATAMODEL_ILP32 ? + MAXOFF32_T : MAXOFFSET_T; +#else + const u_offset_t maxoff = MAXOFF32_T; +#endif + int in_crit = 0; + + if ((bcount = (ssize_t)count) < 0) + return (set_errno(EINVAL)); + + if ((fp = getf(fdes)) == NULL) + return (set_errno(EBADF)); + if (((fflag = fp->f_flag) & (FREAD)) == 0) { + error = EBADF; + goto out; + } + + rwflag = 0; + vp = fp->f_vnode; + + if (vp->v_type == VREG) { + + if (bcount == 0) + goto out; + + /* + * Return EINVAL if an invalid offset comes to pread. + * Negative offset from user will cause this error. + */ + + if (fileoff > maxoff) { + error = EINVAL; + goto out; + } + /* + * Limit offset such that we don't read or write + * a file beyond the maximum offset representable in + * an off_t structure. + */ + if (fileoff + bcount > maxoff) + bcount = (ssize_t)((offset_t)maxoff - fileoff); + } else if (vp->v_type == VFIFO) { + error = ESPIPE; + goto out; + } + + /* + * We have to enter the critical region before calling VOP_RWLOCK + * to avoid a deadlock with ufs. + */ + if (nbl_need_check(vp)) { + int svmand; + + nbl_start_crit(vp, RW_READER); + in_crit = 1; + error = nbl_svmand(vp, fp->f_cred, &svmand); + if (error != 0) + goto out; + if (nbl_conflict(vp, NBL_READ, fileoff, bcount, svmand)) { + error = EACCES; + goto out; + } + } + + aiov.iov_base = cbuf; + aiov.iov_len = bcount; + (void) VOP_RWLOCK(vp, rwflag, NULL); + if (vp->v_type == VREG && fileoff == (u_offset_t)maxoff) { + struct vattr va; + va.va_mask = AT_SIZE; + if ((error = VOP_GETATTR(vp, &va, 0, fp->f_cred))) { + VOP_RWUNLOCK(vp, rwflag, NULL); + goto out; + } + VOP_RWUNLOCK(vp, rwflag, NULL); + + /* + * We have to return EOF if fileoff is >= file size. + */ + if (fileoff >= va.va_size) { + bcount = 0; + goto out; + } + + /* + * File is greater than or equal to maxoff and therefore + * we return EOVERFLOW. + */ + error = EOVERFLOW; + goto out; + } + auio.uio_loffset = fileoff; + auio.uio_iov = &aiov; + auio.uio_iovcnt = 1; + auio.uio_resid = bcount; + auio.uio_segflg = UIO_USERSPACE; + auio.uio_llimit = MAXOFFSET_T; + auio.uio_fmode = fflag; + auio.uio_extflg = UIO_COPY_CACHED; + + ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC); + + /* If read sync is not asked for, filter sync flags */ + if ((ioflag & FRSYNC) == 0) + ioflag &= ~(FSYNC|FDSYNC); + error = VOP_READ(vp, &auio, ioflag, fp->f_cred, NULL); + bcount -= auio.uio_resid; + CPU_STATS_ENTER_K(); + cp = CPU; + CPU_STATS_ADDQ(cp, sys, sysread, 1); + CPU_STATS_ADDQ(cp, sys, readch, (ulong_t)bcount); + CPU_STATS_EXIT_K(); + ttolwp(curthread)->lwp_ru.ioch += (ulong_t)bcount; + VOP_RWUNLOCK(vp, rwflag, NULL); + + if (error == EINTR && bcount != 0) + error = 0; +out: + if (in_crit) + nbl_end_crit(vp); + releasef(fdes); + if (error) + return (set_errno(error)); + return (bcount); +} + +ssize_t +pwrite(int fdes, void *cbuf, size_t count, off_t offset) +{ + struct uio auio; + struct iovec aiov; + file_t *fp; + register vnode_t *vp; + struct cpu *cp; + int fflag, ioflag, rwflag; + ssize_t bcount; + int error = 0; + u_offset_t fileoff = (u_offset_t)(ulong_t)offset; +#ifdef _SYSCALL32_IMPL + u_offset_t maxoff = get_udatamodel() == DATAMODEL_ILP32 ? + MAXOFF32_T : MAXOFFSET_T; +#else + const u_offset_t maxoff = MAXOFF32_T; +#endif + int in_crit = 0; + + if ((bcount = (ssize_t)count) < 0) + return (set_errno(EINVAL)); + if ((fp = getf(fdes)) == NULL) + return (set_errno(EBADF)); + if (((fflag = fp->f_flag) & (FWRITE)) == 0) { + error = EBADF; + goto out; + } + + rwflag = 1; + vp = fp->f_vnode; + + if (vp->v_type == VREG) { + + if (bcount == 0) + goto out; + + /* + * return EINVAL for offsets that cannot be + * represented in an off_t. + */ + if (fileoff > maxoff) { + error = EINVAL; + goto out; + } + /* + * Take appropriate action if we are trying to write above the + * resource limit. + */ + if (fileoff >= curproc->p_fsz_ctl) { + mutex_enter(&curproc->p_lock); + (void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE], + curproc->p_rctls, curproc, RCA_UNSAFE_SIGINFO); + mutex_exit(&curproc->p_lock); + + error = EFBIG; + goto out; + } + /* + * Don't allow pwrite to cause file sizes to exceed + * maxoff. + */ + if (fileoff == maxoff) { + error = EFBIG; + goto out; + } + if (fileoff + count > maxoff) + bcount = (ssize_t)((u_offset_t)maxoff - fileoff); + } else if (vp->v_type == VFIFO) { + error = ESPIPE; + goto out; + } + + /* + * We have to enter the critical region before calling VOP_RWLOCK + * to avoid a deadlock with ufs. + */ + if (nbl_need_check(vp)) { + int svmand; + + nbl_start_crit(vp, RW_READER); + in_crit = 1; + error = nbl_svmand(vp, fp->f_cred, &svmand); + if (error != 0) + goto out; + if (nbl_conflict(vp, NBL_WRITE, fileoff, bcount, svmand)) { + error = EACCES; + goto out; + } + } + + aiov.iov_base = cbuf; + aiov.iov_len = bcount; + (void) VOP_RWLOCK(vp, rwflag, NULL); + auio.uio_loffset = fileoff; + auio.uio_iov = &aiov; + auio.uio_iovcnt = 1; + auio.uio_resid = bcount; + auio.uio_segflg = UIO_USERSPACE; + auio.uio_llimit = curproc->p_fsz_ctl; + auio.uio_fmode = fflag; + auio.uio_extflg = UIO_COPY_CACHED; + + ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC); + + error = VOP_WRITE(vp, &auio, ioflag, fp->f_cred, NULL); + bcount -= auio.uio_resid; + CPU_STATS_ENTER_K(); + cp = CPU; + CPU_STATS_ADDQ(cp, sys, syswrite, 1); + CPU_STATS_ADDQ(cp, sys, writech, (ulong_t)bcount); + CPU_STATS_EXIT_K(); + ttolwp(curthread)->lwp_ru.ioch += (ulong_t)bcount; + VOP_RWUNLOCK(vp, rwflag, NULL); + + if (error == EINTR && bcount != 0) + error = 0; +out: + if (in_crit) + nbl_end_crit(vp); + releasef(fdes); + if (error) + return (set_errno(error)); + return (bcount); +} + +/* + * XXX -- The SVID refers to IOV_MAX, but doesn't define it. Grrrr.... + * XXX -- However, SVVS expects readv() and writev() to fail if + * XXX -- iovcnt > 16 (yes, it's hard-coded in the SVVS source), + * XXX -- so I guess that's the "interface". + */ +#define DEF_IOV_MAX 16 + +ssize_t +readv(int fdes, struct iovec *iovp, int iovcnt) +{ + struct uio auio; + struct iovec aiov[DEF_IOV_MAX]; + file_t *fp; + register vnode_t *vp; + struct cpu *cp; + int fflag, ioflag, rwflag; + ssize_t count, bcount; + int error = 0; + int i; + u_offset_t fileoff; + int in_crit = 0; + + if (iovcnt <= 0 || iovcnt > DEF_IOV_MAX) + return (set_errno(EINVAL)); + +#ifdef _SYSCALL32_IMPL + /* + * 32-bit callers need to have their iovec expanded, + * while ensuring that they can't move more than 2Gbytes + * of data in a single call. + */ + if (get_udatamodel() == DATAMODEL_ILP32) { + struct iovec32 aiov32[DEF_IOV_MAX]; + ssize32_t count32; + + if (copyin(iovp, aiov32, iovcnt * sizeof (struct iovec32))) + return (set_errno(EFAULT)); + + count32 = 0; + for (i = 0; i < iovcnt; i++) { + ssize32_t iovlen32 = aiov32[i].iov_len; + count32 += iovlen32; + if (iovlen32 < 0 || count32 < 0) + return (set_errno(EINVAL)); + aiov[i].iov_len = iovlen32; + aiov[i].iov_base = + (caddr_t)(uintptr_t)aiov32[i].iov_base; + } + } else +#endif + if (copyin(iovp, aiov, iovcnt * sizeof (struct iovec))) + return (set_errno(EFAULT)); + + count = 0; + for (i = 0; i < iovcnt; i++) { + ssize_t iovlen = aiov[i].iov_len; + count += iovlen; + if (iovlen < 0 || count < 0) + return (set_errno(EINVAL)); + } + if ((fp = getf(fdes)) == NULL) + return (set_errno(EBADF)); + if (((fflag = fp->f_flag) & FREAD) == 0) { + error = EBADF; + goto out; + } + vp = fp->f_vnode; + if (vp->v_type == VREG && count == 0) { + goto out; + } + + rwflag = 0; + + /* + * We have to enter the critical region before calling VOP_RWLOCK + * to avoid a deadlock with ufs. + */ + if (nbl_need_check(vp)) { + int svmand; + + nbl_start_crit(vp, RW_READER); + in_crit = 1; + error = nbl_svmand(vp, fp->f_cred, &svmand); + if (error != 0) + goto out; + if (nbl_conflict(vp, NBL_READ, fp->f_offset, count, svmand)) { + error = EACCES; + goto out; + } + } + + (void) VOP_RWLOCK(vp, rwflag, NULL); + fileoff = fp->f_offset; + + /* + * Behaviour is same as read. Please see comments in read. + */ + + if ((vp->v_type == VREG) && (fileoff >= OFFSET_MAX(fp))) { + struct vattr va; + va.va_mask = AT_SIZE; + if ((error = VOP_GETATTR(vp, &va, 0, fp->f_cred))) { + VOP_RWUNLOCK(vp, rwflag, NULL); + goto out; + } + if (fileoff >= va.va_size) { + VOP_RWUNLOCK(vp, rwflag, NULL); + count = 0; + goto out; + } else { + VOP_RWUNLOCK(vp, rwflag, NULL); + error = EOVERFLOW; + goto out; + } + } + if ((vp->v_type == VREG) && (fileoff + count > OFFSET_MAX(fp))) { + count = (ssize_t)(OFFSET_MAX(fp) - fileoff); + } + auio.uio_loffset = fileoff; + auio.uio_iov = aiov; + auio.uio_iovcnt = iovcnt; + auio.uio_resid = bcount = count; + auio.uio_segflg = UIO_USERSPACE; + auio.uio_llimit = MAXOFFSET_T; + auio.uio_fmode = fflag; + if (bcount < copyout_min_size) + auio.uio_extflg = UIO_COPY_CACHED; + else + auio.uio_extflg = UIO_COPY_DEFAULT; + + + ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC); + + /* If read sync is not asked for, filter sync flags */ + if ((ioflag & FRSYNC) == 0) + ioflag &= ~(FSYNC|FDSYNC); + error = VOP_READ(vp, &auio, ioflag, fp->f_cred, NULL); + count -= auio.uio_resid; + CPU_STATS_ENTER_K(); + cp = CPU; + CPU_STATS_ADDQ(cp, sys, sysread, 1); + CPU_STATS_ADDQ(cp, sys, readch, (ulong_t)count); + CPU_STATS_EXIT_K(); + ttolwp(curthread)->lwp_ru.ioch += (ulong_t)count; + + if (vp->v_type == VFIFO) /* Backward compatibility */ + fp->f_offset = count; + else if (((fp->f_flag & FAPPEND) == 0) || + (vp->v_type != VREG) || (bcount != 0)) /* POSIX */ + fp->f_offset = auio.uio_loffset; + + VOP_RWUNLOCK(vp, rwflag, NULL); + + if (error == EINTR && count != 0) + error = 0; +out: + if (in_crit) + nbl_end_crit(vp); + releasef(fdes); + if (error) + return (set_errno(error)); + return (count); +} + +ssize_t +writev(int fdes, struct iovec *iovp, int iovcnt) +{ + struct uio auio; + struct iovec aiov[DEF_IOV_MAX]; + file_t *fp; + register vnode_t *vp; + struct cpu *cp; + int fflag, ioflag, rwflag; + ssize_t count, bcount; + int error = 0; + int i; + u_offset_t fileoff; + int in_crit = 0; + + if (iovcnt <= 0 || iovcnt > DEF_IOV_MAX) + return (set_errno(EINVAL)); + +#ifdef _SYSCALL32_IMPL + /* + * 32-bit callers need to have their iovec expanded, + * while ensuring that they can't move more than 2Gbytes + * of data in a single call. + */ + if (get_udatamodel() == DATAMODEL_ILP32) { + struct iovec32 aiov32[DEF_IOV_MAX]; + ssize32_t count32; + + if (copyin(iovp, aiov32, iovcnt * sizeof (struct iovec32))) + return (set_errno(EFAULT)); + + count32 = 0; + for (i = 0; i < iovcnt; i++) { + ssize32_t iovlen = aiov32[i].iov_len; + count32 += iovlen; + if (iovlen < 0 || count32 < 0) + return (set_errno(EINVAL)); + aiov[i].iov_len = iovlen; + aiov[i].iov_base = + (caddr_t)(uintptr_t)aiov32[i].iov_base; + } + } else +#endif + if (copyin(iovp, aiov, iovcnt * sizeof (struct iovec))) + return (set_errno(EFAULT)); + + count = 0; + for (i = 0; i < iovcnt; i++) { + ssize_t iovlen = aiov[i].iov_len; + count += iovlen; + if (iovlen < 0 || count < 0) + return (set_errno(EINVAL)); + } + if ((fp = getf(fdes)) == NULL) + return (set_errno(EBADF)); + if (((fflag = fp->f_flag) & FWRITE) == 0) { + error = EBADF; + goto out; + } + vp = fp->f_vnode; + if (vp->v_type == VREG && count == 0) { + goto out; + } + + rwflag = 1; + + /* + * We have to enter the critical region before calling VOP_RWLOCK + * to avoid a deadlock with ufs. + */ + if (nbl_need_check(vp)) { + int svmand; + + nbl_start_crit(vp, RW_READER); + in_crit = 1; + error = nbl_svmand(vp, fp->f_cred, &svmand); + if (error != 0) + goto out; + if (nbl_conflict(vp, NBL_WRITE, fp->f_offset, count, svmand)) { + error = EACCES; + goto out; + } + } + + (void) VOP_RWLOCK(vp, rwflag, NULL); + + fileoff = fp->f_offset; + + /* + * Behaviour is same as write. Please see comments for write. + */ + + if (vp->v_type == VREG) { + if (fileoff >= curproc->p_fsz_ctl) { + VOP_RWUNLOCK(vp, rwflag, NULL); + mutex_enter(&curproc->p_lock); + (void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE], + curproc->p_rctls, curproc, RCA_UNSAFE_SIGINFO); + mutex_exit(&curproc->p_lock); + error = EFBIG; + goto out; + } + if (fileoff >= OFFSET_MAX(fp)) { + VOP_RWUNLOCK(vp, rwflag, NULL); + error = EFBIG; + goto out; + } + if (fileoff + count > OFFSET_MAX(fp)) + count = (ssize_t)(OFFSET_MAX(fp) - fileoff); + } + auio.uio_loffset = fileoff; + auio.uio_iov = aiov; + auio.uio_iovcnt = iovcnt; + auio.uio_resid = bcount = count; + auio.uio_segflg = UIO_USERSPACE; + auio.uio_llimit = curproc->p_fsz_ctl; + auio.uio_fmode = fflag; + auio.uio_extflg = UIO_COPY_DEFAULT; + + ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC); + + error = VOP_WRITE(vp, &auio, ioflag, fp->f_cred, NULL); + count -= auio.uio_resid; + CPU_STATS_ENTER_K(); + cp = CPU; + CPU_STATS_ADDQ(cp, sys, syswrite, 1); + CPU_STATS_ADDQ(cp, sys, writech, (ulong_t)count); + CPU_STATS_EXIT_K(); + ttolwp(curthread)->lwp_ru.ioch += (ulong_t)count; + + if (vp->v_type == VFIFO) /* Backward compatibility */ + fp->f_offset = count; + else if (((fp->f_flag & FAPPEND) == 0) || + (vp->v_type != VREG) || (bcount != 0)) /* POSIX */ + fp->f_offset = auio.uio_loffset; + VOP_RWUNLOCK(vp, rwflag, NULL); + + if (error == EINTR && count != 0) + error = 0; +out: + if (in_crit) + nbl_end_crit(vp); + releasef(fdes); + if (error) + return (set_errno(error)); + return (count); +} + +#if defined(_SYSCALL32_IMPL) || defined(_ILP32) + +/* + * This syscall supplies 64-bit file offsets to 32-bit applications only. + */ +ssize32_t +pread64(int fdes, void *cbuf, size32_t count, uint32_t offset_1, + uint32_t offset_2) +{ + struct uio auio; + struct iovec aiov; + file_t *fp; + register vnode_t *vp; + struct cpu *cp; + int fflag, ioflag, rwflag; + ssize_t bcount; + int error = 0; + u_offset_t fileoff; + int in_crit = 0; + +#if defined(_LITTLE_ENDIAN) + fileoff = ((u_offset_t)offset_2 << 32) | (u_offset_t)offset_1; +#else + fileoff = ((u_offset_t)offset_1 << 32) | (u_offset_t)offset_2; +#endif + + if ((bcount = (ssize_t)count) < 0 || bcount > INT32_MAX) + return (set_errno(EINVAL)); + + if ((fp = getf(fdes)) == NULL) + return (set_errno(EBADF)); + if (((fflag = fp->f_flag) & (FREAD)) == 0) { + error = EBADF; + goto out; + } + + rwflag = 0; + vp = fp->f_vnode; + + if (vp->v_type == VREG) { + + if (bcount == 0) + goto out; + + /* + * Same as pread. See comments in pread. + */ + + if (fileoff > MAXOFFSET_T) { + error = EINVAL; + goto out; + } + if (fileoff + bcount > MAXOFFSET_T) + bcount = (ssize_t)(MAXOFFSET_T - fileoff); + } else if (vp->v_type == VFIFO) { + error = ESPIPE; + goto out; + } + + /* + * We have to enter the critical region before calling VOP_RWLOCK + * to avoid a deadlock with ufs. + */ + if (nbl_need_check(vp)) { + int svmand; + + nbl_start_crit(vp, RW_READER); + in_crit = 1; + error = nbl_svmand(vp, fp->f_cred, &svmand); + if (error != 0) + goto out; + if (nbl_conflict(vp, NBL_READ, fileoff, bcount, svmand)) { + error = EACCES; + goto out; + } + } + + aiov.iov_base = cbuf; + aiov.iov_len = bcount; + (void) VOP_RWLOCK(vp, rwflag, NULL); + auio.uio_loffset = fileoff; + + /* + * Note: File size can never be greater than MAXOFFSET_T. + * If ever we start supporting 128 bit files the code + * similar to the one in pread at this place should be here. + * Here we avoid the unnecessary VOP_GETATTR() when we + * know that fileoff == MAXOFFSET_T implies that it is always + * greater than or equal to file size. + */ + auio.uio_iov = &aiov; + auio.uio_iovcnt = 1; + auio.uio_resid = bcount; + auio.uio_segflg = UIO_USERSPACE; + auio.uio_llimit = MAXOFFSET_T; + auio.uio_fmode = fflag; + auio.uio_extflg = UIO_COPY_CACHED; + + ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC); + + /* If read sync is not asked for, filter sync flags */ + if ((ioflag & FRSYNC) == 0) + ioflag &= ~(FSYNC|FDSYNC); + error = VOP_READ(vp, &auio, ioflag, fp->f_cred, NULL); + bcount -= auio.uio_resid; + CPU_STATS_ENTER_K(); + cp = CPU; + CPU_STATS_ADDQ(cp, sys, sysread, 1); + CPU_STATS_ADDQ(cp, sys, readch, (ulong_t)bcount); + CPU_STATS_EXIT_K(); + ttolwp(curthread)->lwp_ru.ioch += (ulong_t)bcount; + VOP_RWUNLOCK(vp, rwflag, NULL); + + if (error == EINTR && bcount != 0) + error = 0; +out: + if (in_crit) + nbl_end_crit(vp); + releasef(fdes); + if (error) + return (set_errno(error)); + return (bcount); +} + +/* + * This syscall supplies 64-bit file offsets to 32-bit applications only. + */ +ssize32_t +pwrite64(int fdes, void *cbuf, size32_t count, uint32_t offset_1, + uint32_t offset_2) +{ + struct uio auio; + struct iovec aiov; + file_t *fp; + register vnode_t *vp; + struct cpu *cp; + int fflag, ioflag, rwflag; + ssize_t bcount; + int error = 0; + u_offset_t fileoff; + int in_crit = 0; + +#if defined(_LITTLE_ENDIAN) + fileoff = ((u_offset_t)offset_2 << 32) | (u_offset_t)offset_1; +#else + fileoff = ((u_offset_t)offset_1 << 32) | (u_offset_t)offset_2; +#endif + + if ((bcount = (ssize_t)count) < 0 || bcount > INT32_MAX) + return (set_errno(EINVAL)); + if ((fp = getf(fdes)) == NULL) + return (set_errno(EBADF)); + if (((fflag = fp->f_flag) & (FWRITE)) == 0) { + error = EBADF; + goto out; + } + + rwflag = 1; + vp = fp->f_vnode; + + if (vp->v_type == VREG) { + + if (bcount == 0) + goto out; + + /* + * See comments in pwrite. + */ + if (fileoff > MAXOFFSET_T) { + error = EINVAL; + goto out; + } + if (fileoff >= curproc->p_fsz_ctl) { + mutex_enter(&curproc->p_lock); + (void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE], + curproc->p_rctls, curproc, RCA_SAFE); + mutex_exit(&curproc->p_lock); + error = EFBIG; + goto out; + } + if (fileoff == MAXOFFSET_T) { + error = EFBIG; + goto out; + } + if (fileoff + bcount > MAXOFFSET_T) + bcount = (ssize_t)((u_offset_t)MAXOFFSET_T - fileoff); + } else if (vp->v_type == VFIFO) { + error = ESPIPE; + goto out; + } + + /* + * We have to enter the critical region before calling VOP_RWLOCK + * to avoid a deadlock with ufs. + */ + if (nbl_need_check(vp)) { + int svmand; + + nbl_start_crit(vp, RW_READER); + in_crit = 1; + error = nbl_svmand(vp, fp->f_cred, &svmand); + if (error != 0) + goto out; + if (nbl_conflict(vp, NBL_WRITE, fileoff, bcount, svmand)) { + error = EACCES; + goto out; + } + } + + aiov.iov_base = cbuf; + aiov.iov_len = bcount; + (void) VOP_RWLOCK(vp, rwflag, NULL); + auio.uio_loffset = fileoff; + auio.uio_iov = &aiov; + auio.uio_iovcnt = 1; + auio.uio_resid = bcount; + auio.uio_segflg = UIO_USERSPACE; + auio.uio_llimit = curproc->p_fsz_ctl; + auio.uio_fmode = fflag; + auio.uio_extflg = UIO_COPY_CACHED; + + ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC); + + error = VOP_WRITE(vp, &auio, ioflag, fp->f_cred, NULL); + bcount -= auio.uio_resid; + CPU_STATS_ENTER_K(); + cp = CPU; + CPU_STATS_ADDQ(cp, sys, syswrite, 1); + CPU_STATS_ADDQ(cp, sys, writech, (ulong_t)bcount); + CPU_STATS_EXIT_K(); + ttolwp(curthread)->lwp_ru.ioch += (ulong_t)bcount; + VOP_RWUNLOCK(vp, rwflag, NULL); + + if (error == EINTR && bcount != 0) + error = 0; +out: + if (in_crit) + nbl_end_crit(vp); + releasef(fdes); + if (error) + return (set_errno(error)); + return (bcount); +} + +#endif /* _SYSCALL32_IMPL || _ILP32 */ + +#ifdef _SYSCALL32_IMPL +/* + * Tail-call elimination of xxx32() down to xxx() + * + * A number of xxx32 system calls take a len (or count) argument and + * return a number in the range [0,len] or -1 on error. + * Given an ssize32_t input len, the downcall xxx() will return + * a 64-bit value that is -1 or in the range [0,len] which actually + * is a proper return value for the xxx32 call. So even if the xxx32 + * calls can be considered as returning a ssize32_t, they are currently + * declared as returning a ssize_t as this enables tail-call elimination. + * + * The cast of len (or count) to ssize32_t is needed to ensure we pass + * down negative input values as such and let the downcall handle error + * reporting. Functions covered by this comments are: + * + * rw.c: read32, write32, pread32, pwrite32, readv32, writev32. + * socksyscall.c: recv32, recvfrom32, send32, sendto32. + * readlink.c: readlink32. + */ + +ssize_t +read32(int32_t fdes, caddr32_t cbuf, size32_t count) +{ + return (read(fdes, + (void *)(uintptr_t)cbuf, (ssize32_t)count)); +} + +ssize_t +write32(int32_t fdes, caddr32_t cbuf, size32_t count) +{ + return (write(fdes, + (void *)(uintptr_t)cbuf, (ssize32_t)count)); +} + +ssize_t +pread32(int32_t fdes, caddr32_t cbuf, size32_t count, off32_t offset) +{ + return (pread(fdes, + (void *)(uintptr_t)cbuf, (ssize32_t)count, + (off_t)(uint32_t)offset)); +} + +ssize_t +pwrite32(int32_t fdes, caddr32_t cbuf, size32_t count, off32_t offset) +{ + return (pwrite(fdes, + (void *)(uintptr_t)cbuf, (ssize32_t)count, + (off_t)(uint32_t)offset)); +} + +ssize_t +readv32(int32_t fdes, caddr32_t iovp, int32_t iovcnt) +{ + return (readv(fdes, (void *)(uintptr_t)iovp, iovcnt)); +} + +ssize_t +writev32(int32_t fdes, caddr32_t iovp, int32_t iovcnt) +{ + return (writev(fdes, (void *)(uintptr_t)iovp, iovcnt)); +} + +#endif /* _SYSCALL32_IMPL */ |