diff options
Diffstat (limited to 'usr/src/uts/common/syscall/rw.c')
-rw-r--r-- | usr/src/uts/common/syscall/rw.c | 375 |
1 files changed, 235 insertions, 140 deletions
diff --git a/usr/src/uts/common/syscall/rw.c b/usr/src/uts/common/syscall/rw.c index a28894b2c9..23f03e841d 100644 --- a/usr/src/uts/common/syscall/rw.c +++ b/usr/src/uts/common/syscall/rw.c @@ -22,7 +22,7 @@ /* * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. - * Copyright (c) 2015, Joyent, Inc. All rights reserved. + * Copyright 2017, Joyent, Inc. */ /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ @@ -50,6 +50,7 @@ #include <sys/debug.h> #include <sys/rctl.h> #include <sys/nbmlock.h> +#include <sys/limits.h> #define COPYOUT_MAX_CACHE (1<<17) /* 128K */ @@ -607,19 +608,12 @@ out: return (bcount); } -/* - * XXX -- The SVID refers to IOV_MAX, but doesn't define it. Grrrr.... - * XXX -- However, SVVS expects readv() and writev() to fail if - * XXX -- iovcnt > 16 (yes, it's hard-coded in the SVVS source), - * XXX -- so I guess that's the "interface". - */ -#define DEF_IOV_MAX 16 - ssize_t readv(int fdes, struct iovec *iovp, int iovcnt) { struct uio auio; - struct iovec aiov[DEF_IOV_MAX]; + struct iovec buf[IOV_MAX_STACK], *aiov = buf; + int aiovlen = 0; file_t *fp; register vnode_t *vp; struct cpu *cp; @@ -630,9 +624,14 @@ readv(int fdes, struct iovec *iovp, int iovcnt) u_offset_t fileoff; int in_crit = 0; - if (iovcnt <= 0 || iovcnt > DEF_IOV_MAX) + if (iovcnt <= 0 || iovcnt > IOV_MAX) return (set_errno(EINVAL)); + if (iovcnt > IOV_MAX_STACK) { + aiovlen = iovcnt * sizeof (iovec_t); + aiov = kmem_alloc(aiovlen, KM_SLEEP); + } + #ifdef _SYSCALL32_IMPL /* * 32-bit callers need to have their iovec expanded, @@ -640,36 +639,63 @@ readv(int fdes, struct iovec *iovp, int iovcnt) * of data in a single call. */ if (get_udatamodel() == DATAMODEL_ILP32) { - struct iovec32 aiov32[DEF_IOV_MAX]; + struct iovec32 buf32[IOV_MAX_STACK], *aiov32 = buf32; + int aiov32len; ssize32_t count32; - if (copyin(iovp, aiov32, iovcnt * sizeof (struct iovec32))) + aiov32len = iovcnt * sizeof (iovec32_t); + if (aiovlen != 0) + aiov32 = kmem_alloc(aiov32len, KM_SLEEP); + + if (copyin(iovp, aiov32, aiov32len)) { + if (aiovlen != 0) { + kmem_free(aiov32, aiov32len); + kmem_free(aiov, aiovlen); + } return (set_errno(EFAULT)); + } count32 = 0; for (i = 0; i < iovcnt; i++) { ssize32_t iovlen32 = aiov32[i].iov_len; count32 += iovlen32; - if (iovlen32 < 0 || count32 < 0) + if (iovlen32 < 0 || count32 < 0) { + if (aiovlen != 0) { + kmem_free(aiov32, aiov32len); + kmem_free(aiov, aiovlen); + } return (set_errno(EINVAL)); + } aiov[i].iov_len = iovlen32; aiov[i].iov_base = (caddr_t)(uintptr_t)aiov32[i].iov_base; } + + if (aiovlen != 0) + kmem_free(aiov32, aiov32len); } else #endif - if (copyin(iovp, aiov, iovcnt * sizeof (struct iovec))) + if (copyin(iovp, aiov, iovcnt * sizeof (iovec_t))) { + if (aiovlen != 0) + kmem_free(aiov, aiovlen); return (set_errno(EFAULT)); + } count = 0; for (i = 0; i < iovcnt; i++) { ssize_t iovlen = aiov[i].iov_len; count += iovlen; - if (iovlen < 0 || count < 0) + if (iovlen < 0 || count < 0) { + if (aiovlen != 0) + kmem_free(aiov, aiovlen); return (set_errno(EINVAL)); + } } - if ((fp = getf(fdes)) == NULL) + if ((fp = getf(fdes)) == NULL) { + if (aiovlen != 0) + kmem_free(aiov, aiovlen); return (set_errno(EBADF)); + } if (((fflag = fp->f_flag) & FREAD) == 0) { error = EBADF; goto out; @@ -768,6 +794,8 @@ out: if (in_crit) nbl_end_crit(vp); releasef(fdes); + if (aiovlen != 0) + kmem_free(aiov, aiovlen); if (error) return (set_errno(error)); return (count); @@ -777,7 +805,8 @@ ssize_t writev(int fdes, struct iovec *iovp, int iovcnt) { struct uio auio; - struct iovec aiov[DEF_IOV_MAX]; + struct iovec buf[IOV_MAX_STACK], *aiov = buf; + int aiovlen = 0; file_t *fp; register vnode_t *vp; struct cpu *cp; @@ -788,9 +817,14 @@ writev(int fdes, struct iovec *iovp, int iovcnt) u_offset_t fileoff; int in_crit = 0; - if (iovcnt <= 0 || iovcnt > DEF_IOV_MAX) + if (iovcnt <= 0 || iovcnt > IOV_MAX) return (set_errno(EINVAL)); + if (iovcnt > IOV_MAX_STACK) { + aiovlen = iovcnt * sizeof (iovec_t); + aiov = kmem_alloc(aiovlen, KM_SLEEP); + } + #ifdef _SYSCALL32_IMPL /* * 32-bit callers need to have their iovec expanded, @@ -798,36 +832,62 @@ writev(int fdes, struct iovec *iovp, int iovcnt) * of data in a single call. */ if (get_udatamodel() == DATAMODEL_ILP32) { - struct iovec32 aiov32[DEF_IOV_MAX]; + struct iovec32 buf32[IOV_MAX_STACK], *aiov32 = buf32; + int aiov32len; ssize32_t count32; - if (copyin(iovp, aiov32, iovcnt * sizeof (struct iovec32))) + aiov32len = iovcnt * sizeof (iovec32_t); + if (aiovlen != 0) + aiov32 = kmem_alloc(aiov32len, KM_SLEEP); + + if (copyin(iovp, aiov32, aiov32len)) { + if (aiovlen != 0) { + kmem_free(aiov32, aiov32len); + kmem_free(aiov, aiovlen); + } return (set_errno(EFAULT)); + } count32 = 0; for (i = 0; i < iovcnt; i++) { ssize32_t iovlen = aiov32[i].iov_len; count32 += iovlen; - if (iovlen < 0 || count32 < 0) + if (iovlen < 0 || count32 < 0) { + if (aiovlen != 0) { + kmem_free(aiov32, aiov32len); + kmem_free(aiov, aiovlen); + } return (set_errno(EINVAL)); + } aiov[i].iov_len = iovlen; aiov[i].iov_base = (caddr_t)(uintptr_t)aiov32[i].iov_base; } + if (aiovlen != 0) + kmem_free(aiov32, aiov32len); } else #endif - if (copyin(iovp, aiov, iovcnt * sizeof (struct iovec))) + if (copyin(iovp, aiov, iovcnt * sizeof (iovec_t))) { + if (aiovlen != 0) + kmem_free(aiov, aiovlen); return (set_errno(EFAULT)); + } count = 0; for (i = 0; i < iovcnt; i++) { ssize_t iovlen = aiov[i].iov_len; count += iovlen; - if (iovlen < 0 || count < 0) + if (iovlen < 0 || count < 0) { + if (aiovlen != 0) + kmem_free(aiov, aiovlen); return (set_errno(EINVAL)); + } } - if ((fp = getf(fdes)) == NULL) + if ((fp = getf(fdes)) == NULL) { + if (aiovlen != 0) + kmem_free(aiov, aiovlen); return (set_errno(EBADF)); + } if (((fflag = fp->f_flag) & FWRITE) == 0) { error = EBADF; goto out; @@ -917,6 +977,8 @@ out: if (in_crit) nbl_end_crit(vp); releasef(fdes); + if (aiovlen != 0) + kmem_free(aiov, aiovlen); if (error) return (set_errno(error)); return (count); @@ -927,7 +989,8 @@ preadv(int fdes, struct iovec *iovp, int iovcnt, off_t offset, off_t extended_offset) { struct uio auio; - struct iovec aiov[DEF_IOV_MAX]; + struct iovec buf[IOV_MAX_STACK], *aiov = buf; + int aiovlen = 0; file_t *fp; register vnode_t *vp; struct cpu *cp; @@ -936,25 +999,35 @@ preadv(int fdes, struct iovec *iovp, int iovcnt, off_t offset, int error = 0; int i; + /* + * In a 64-bit kernel, this interface supports native 64-bit + * applications as well as 32-bit applications using both standard and + * large-file access. For 32-bit large-file aware applications, the + * offset is passed as two parameters which are joined into the actual + * offset used. The 64-bit libc always passes 0 for the extended_offset. + * Note that off_t is a signed value, but the preadv/pwritev API treats + * the offset as a position in the file for the operation, so passing + * a negative value will likely fail the maximum offset checks below + * because we convert it to an unsigned value which will be larger than + * the maximum valid offset. + */ #if defined(_SYSCALL32_IMPL) || defined(_ILP32) u_offset_t fileoff = ((u_offset_t)extended_offset << 32) | (u_offset_t)offset; #else /* _SYSCALL32_IMPL || _ILP32 */ u_offset_t fileoff = (u_offset_t)(ulong_t)offset; #endif /* _SYSCALL32_IMPR || _ILP32 */ -#ifdef _SYSCALL32_IMPL - const u_offset_t maxoff = get_udatamodel() == DATAMODEL_ILP32 && - extended_offset == 0? - MAXOFF32_T : MAXOFFSET_T; -#else /* _SYSCALL32_IMPL */ - const u_offset_t maxoff = MAXOFF32_T; -#endif /* _SYSCALL32_IMPL */ int in_crit = 0; - if (iovcnt <= 0 || iovcnt > DEF_IOV_MAX) + if (iovcnt <= 0 || iovcnt > IOV_MAX) return (set_errno(EINVAL)); + if (iovcnt > IOV_MAX_STACK) { + aiovlen = iovcnt * sizeof (iovec_t); + aiov = kmem_alloc(aiovlen, KM_SLEEP); + } + #ifdef _SYSCALL32_IMPL /* * 32-bit callers need to have their iovec expanded, @@ -962,61 +1035,104 @@ preadv(int fdes, struct iovec *iovp, int iovcnt, off_t offset, * of data in a single call. */ if (get_udatamodel() == DATAMODEL_ILP32) { - struct iovec32 aiov32[DEF_IOV_MAX]; + struct iovec32 buf32[IOV_MAX_STACK], *aiov32 = buf32; + int aiov32len; ssize32_t count32; - if (copyin(iovp, aiov32, iovcnt * sizeof (struct iovec32))) + aiov32len = iovcnt * sizeof (iovec32_t); + if (aiovlen != 0) + aiov32 = kmem_alloc(aiov32len, KM_SLEEP); + + if (copyin(iovp, aiov32, aiov32len)) { + if (aiovlen != 0) { + kmem_free(aiov32, aiov32len); + kmem_free(aiov, aiovlen); + } return (set_errno(EFAULT)); + } count32 = 0; for (i = 0; i < iovcnt; i++) { ssize32_t iovlen32 = aiov32[i].iov_len; count32 += iovlen32; - if (iovlen32 < 0 || count32 < 0) + if (iovlen32 < 0 || count32 < 0) { + if (aiovlen != 0) { + kmem_free(aiov32, aiov32len); + kmem_free(aiov, aiovlen); + } return (set_errno(EINVAL)); + } aiov[i].iov_len = iovlen32; aiov[i].iov_base = (caddr_t)(uintptr_t)aiov32[i].iov_base; } + if (aiovlen != 0) + kmem_free(aiov32, aiov32len); } else #endif /* _SYSCALL32_IMPL */ - if (copyin(iovp, aiov, iovcnt * sizeof (struct iovec))) + if (copyin(iovp, aiov, iovcnt * sizeof (iovec_t))) { + if (aiovlen != 0) + kmem_free(aiov, aiovlen); return (set_errno(EFAULT)); + } count = 0; for (i = 0; i < iovcnt; i++) { ssize_t iovlen = aiov[i].iov_len; count += iovlen; - if (iovlen < 0 || count < 0) + if (iovlen < 0 || count < 0) { + if (aiovlen != 0) + kmem_free(aiov, aiovlen); return (set_errno(EINVAL)); + } } - if ((bcount = (ssize_t)count) < 0) + if ((bcount = count) < 0) { + if (aiovlen != 0) + kmem_free(aiov, aiovlen); return (set_errno(EINVAL)); - if ((fp = getf(fdes)) == NULL) + } + if ((fp = getf(fdes)) == NULL) { + if (aiovlen != 0) + kmem_free(aiov, aiovlen); return (set_errno(EBADF)); + } if (((fflag = fp->f_flag) & FREAD) == 0) { error = EBADF; goto out; } vp = fp->f_vnode; rwflag = 0; - if (vp->v_type == VREG) { + /* + * Behaviour is same as read(2). Please see comments in read(2). + */ + if (vp->v_type == VREG) { if (bcount == 0) goto out; - /* - * return EINVAL for offsets that cannot be - * represented in an off_t. - */ - if (fileoff > maxoff) { - error = EINVAL; + /* Handle offset past maximum offset allowed for file. */ + if (fileoff >= OFFSET_MAX(fp)) { + struct vattr va; + va.va_mask = AT_SIZE; + + error = VOP_GETATTR(vp, &va, 0, fp->f_cred, NULL); + if (error == 0) { + if (fileoff >= va.va_size) { + count = 0; + } else { + error = EOVERFLOW; + } + } goto out; } - if (fileoff + bcount > maxoff) - bcount = (ssize_t)((u_offset_t)maxoff - fileoff); + ASSERT(bcount == count); + + /* Note: modified count used in nbl_conflict() call below. */ + if ((fileoff + count) > OFFSET_MAX(fp)) + count = (ssize_t)(OFFSET_MAX(fp) - fileoff); + } else if (vp->v_type == VFIFO) { error = ESPIPE; goto out; @@ -1033,8 +1149,7 @@ preadv(int fdes, struct iovec *iovp, int iovcnt, off_t offset, error = nbl_svmand(vp, fp->f_cred, &svmand); if (error != 0) goto out; - if (nbl_conflict(vp, NBL_WRITE, fileoff, count, svmand, - NULL)) { + if (nbl_conflict(vp, NBL_WRITE, fileoff, count, svmand, NULL)) { error = EACCES; goto out; } @@ -1042,33 +1157,6 @@ preadv(int fdes, struct iovec *iovp, int iovcnt, off_t offset, (void) VOP_RWLOCK(vp, rwflag, NULL); - /* - * Behaviour is same as read(2). Please see comments in - * read(2). - */ - - if ((vp->v_type == VREG) && (fileoff >= OFFSET_MAX(fp))) { - struct vattr va; - va.va_mask = AT_SIZE; - if ((error = - VOP_GETATTR(vp, &va, 0, fp->f_cred, NULL))) { - VOP_RWUNLOCK(vp, rwflag, NULL); - goto out; - } - if (fileoff >= va.va_size) { - VOP_RWUNLOCK(vp, rwflag, NULL); - count = 0; - goto out; - } else { - VOP_RWUNLOCK(vp, rwflag, NULL); - error = EOVERFLOW; - goto out; - } - } - if ((vp->v_type == VREG) && - (fileoff + count > OFFSET_MAX(fp))) { - count = (ssize_t)(OFFSET_MAX(fp) - fileoff); - } auio.uio_loffset = fileoff; auio.uio_iov = aiov; auio.uio_iovcnt = iovcnt; @@ -1099,6 +1187,8 @@ out: if (in_crit) nbl_end_crit(vp); releasef(fdes); + if (aiovlen != 0) + kmem_free(aiov, aiovlen); if (error) return (set_errno(error)); return (count); @@ -1109,7 +1199,8 @@ pwritev(int fdes, struct iovec *iovp, int iovcnt, off_t offset, off_t extended_offset) { struct uio auio; - struct iovec aiov[DEF_IOV_MAX]; + struct iovec buf[IOV_MAX_STACK], *aiov = buf; + int aiovlen = 0; file_t *fp; register vnode_t *vp; struct cpu *cp; @@ -1118,25 +1209,26 @@ pwritev(int fdes, struct iovec *iovp, int iovcnt, off_t offset, int error = 0; int i; + /* + * See the comment in preadv for how the offset is handled. + */ #if defined(_SYSCALL32_IMPL) || defined(_ILP32) u_offset_t fileoff = ((u_offset_t)extended_offset << 32) | (u_offset_t)offset; #else /* _SYSCALL32_IMPL || _ILP32 */ u_offset_t fileoff = (u_offset_t)(ulong_t)offset; #endif /* _SYSCALL32_IMPR || _ILP32 */ -#ifdef _SYSCALL32_IMPL - const u_offset_t maxoff = get_udatamodel() == DATAMODEL_ILP32 && - extended_offset == 0? - MAXOFF32_T : MAXOFFSET_T; -#else /* _SYSCALL32_IMPL */ - const u_offset_t maxoff = MAXOFF32_T; -#endif /* _SYSCALL32_IMPL */ int in_crit = 0; - if (iovcnt <= 0 || iovcnt > DEF_IOV_MAX) + if (iovcnt <= 0 || iovcnt > IOV_MAX) return (set_errno(EINVAL)); + if (iovcnt > IOV_MAX_STACK) { + aiovlen = iovcnt * sizeof (iovec_t); + aiov = kmem_alloc(aiovlen, KM_SLEEP); + } + #ifdef _SYSCALL32_IMPL /* * 32-bit callers need to have their iovec expanded, @@ -1144,58 +1236,92 @@ pwritev(int fdes, struct iovec *iovp, int iovcnt, off_t offset, * of data in a single call. */ if (get_udatamodel() == DATAMODEL_ILP32) { - struct iovec32 aiov32[DEF_IOV_MAX]; + struct iovec32 buf32[IOV_MAX_STACK], *aiov32 = buf32; + int aiov32len; ssize32_t count32; - if (copyin(iovp, aiov32, iovcnt * sizeof (struct iovec32))) + aiov32len = iovcnt * sizeof (iovec32_t); + if (aiovlen != 0) + aiov32 = kmem_alloc(aiov32len, KM_SLEEP); + + if (copyin(iovp, aiov32, aiov32len)) { + if (aiovlen != 0) { + kmem_free(aiov32, aiov32len); + kmem_free(aiov, aiovlen); + } return (set_errno(EFAULT)); + } count32 = 0; for (i = 0; i < iovcnt; i++) { ssize32_t iovlen32 = aiov32[i].iov_len; count32 += iovlen32; - if (iovlen32 < 0 || count32 < 0) + if (iovlen32 < 0 || count32 < 0) { + if (aiovlen != 0) { + kmem_free(aiov32, aiov32len); + kmem_free(aiov, aiovlen); + } return (set_errno(EINVAL)); + } aiov[i].iov_len = iovlen32; aiov[i].iov_base = (caddr_t)(uintptr_t)aiov32[i].iov_base; } + if (aiovlen != 0) + kmem_free(aiov32, aiov32len); } else #endif /* _SYSCALL32_IMPL */ - if (copyin(iovp, aiov, iovcnt * sizeof (struct iovec))) + if (copyin(iovp, aiov, iovcnt * sizeof (iovec_t))) { + if (aiovlen != 0) + kmem_free(aiov, aiovlen); return (set_errno(EFAULT)); + } count = 0; for (i = 0; i < iovcnt; i++) { ssize_t iovlen = aiov[i].iov_len; count += iovlen; - if (iovlen < 0 || count < 0) + if (iovlen < 0 || count < 0) { + if (aiovlen != 0) + kmem_free(aiov, aiovlen); return (set_errno(EINVAL)); + } } - if ((bcount = (ssize_t)count) < 0) + if ((bcount = count) < 0) { + if (aiovlen != 0) + kmem_free(aiov, aiovlen); return (set_errno(EINVAL)); - if ((fp = getf(fdes)) == NULL) + } + if ((fp = getf(fdes)) == NULL) { + if (aiovlen != 0) + kmem_free(aiov, aiovlen); return (set_errno(EBADF)); + } if (((fflag = fp->f_flag) & FWRITE) == 0) { error = EBADF; goto out; } vp = fp->f_vnode; rwflag = 1; - if (vp->v_type == VREG) { + /* + * The kernel's write(2) code checks the rctl & OFFSET_MAX and returns + * EFBIG when fileoff exceeds either limit. We do the same. + */ + if (vp->v_type == VREG) { if (bcount == 0) goto out; /* - * return EINVAL for offsets that cannot be - * represented in an off_t. + * Don't allow pwritev to cause file size to exceed the proper + * offset limit. */ - if (fileoff > maxoff) { - error = EINVAL; + if (fileoff >= OFFSET_MAX(fp)) { + error = EFBIG; goto out; } + /* * Take appropriate action if we are trying * to write above the resource limit. @@ -1218,17 +1344,13 @@ pwritev(int fdes, struct iovec *iovp, int iovcnt, off_t offset, error = EFBIG; goto out; } - /* - * Don't allow pwritev to cause file sizes to exceed - * maxoff. - */ - if (fileoff == maxoff) { - error = EFBIG; - goto out; - } - if (fileoff + bcount > maxoff) - bcount = (ssize_t)((u_offset_t)maxoff - fileoff); + ASSERT(bcount == count); + + /* Note: modified count used in nbl_conflict() call below. */ + if ((fileoff + count) > OFFSET_MAX(fp)) + count = (ssize_t)(OFFSET_MAX(fp) - fileoff); + } else if (vp->v_type == VFIFO) { error = ESPIPE; goto out; @@ -1245,8 +1367,7 @@ pwritev(int fdes, struct iovec *iovp, int iovcnt, off_t offset, error = nbl_svmand(vp, fp->f_cred, &svmand); if (error != 0) goto out; - if (nbl_conflict(vp, NBL_WRITE, fileoff, count, svmand, - NULL)) { + if (nbl_conflict(vp, NBL_WRITE, fileoff, count, svmand, NULL)) { error = EACCES; goto out; } @@ -1254,34 +1375,6 @@ pwritev(int fdes, struct iovec *iovp, int iovcnt, off_t offset, (void) VOP_RWLOCK(vp, rwflag, NULL); - - /* - * Behaviour is same as write(2). Please see comments for - * write(2). - */ - - if (vp->v_type == VREG) { - if (fileoff >= curproc->p_fsz_ctl) { - VOP_RWUNLOCK(vp, rwflag, NULL); - mutex_enter(&curproc->p_lock); - /* see above rctl_action comment */ - (void) rctl_action( - rctlproc_legacy[RLIMIT_FSIZE], - curproc->p_rctls, - curproc, RCA_UNSAFE_SIGINFO); - mutex_exit(&curproc->p_lock); - error = EFBIG; - goto out; - } - if (fileoff >= OFFSET_MAX(fp)) { - VOP_RWUNLOCK(vp, rwflag, NULL); - error = EFBIG; - goto out; - } - if (fileoff + count > OFFSET_MAX(fp)) - count = (ssize_t)(OFFSET_MAX(fp) - fileoff); - } - auio.uio_loffset = fileoff; auio.uio_iov = aiov; auio.uio_iovcnt = iovcnt; @@ -1308,6 +1401,8 @@ out: if (in_crit) nbl_end_crit(vp); releasef(fdes); + if (aiovlen != 0) + kmem_free(aiov, aiovlen); if (error) return (set_errno(error)); return (count); |