From 81c3d08501e57b65e096ec6a2b1d62f87cf8ced7 Mon Sep 17 00:00:00 2001 From: Jerry Jelinek Date: Tue, 24 Jan 2017 16:03:35 +0000 Subject: 12365 pwritev64 can't write at offsets between [2 GiB, 4 GiB) Portions contributed by: John Levon Reviewed by: Patrick Mooney Reviewed by: Robert Mustacchi Approved by: Dan McDonald --- usr/src/pkg/manifests/system-test-ostest.mf | 4 +- usr/src/test/os-tests/runfiles/default.run | 5 +- usr/src/test/os-tests/tests/Makefile | 9 +- usr/src/test/os-tests/tests/writev.c | 109 +++++++++++++++++++ usr/src/uts/common/syscall/rw.c | 161 +++++++++++----------------- 5 files changed, 183 insertions(+), 105 deletions(-) create mode 100644 usr/src/test/os-tests/tests/writev.c diff --git a/usr/src/pkg/manifests/system-test-ostest.mf b/usr/src/pkg/manifests/system-test-ostest.mf index 07d6b57fc0..3aca714511 100644 --- a/usr/src/pkg/manifests/system-test-ostest.mf +++ b/usr/src/pkg/manifests/system-test-ostest.mf @@ -12,7 +12,7 @@ # # Copyright (c) 2012, 2016 by Delphix. All rights reserved. # Copyright 2014, OmniTI Computer Consulting, Inc. All rights reserved. -# Copyright 2019 Joyent, Inc. +# Copyright 2020 Joyent, Inc. # set name=pkg.fmri value=pkg:/system/test/ostest@$(PKGVERS) @@ -80,6 +80,8 @@ file path=opt/os-tests/tests/sockfs/nosignal mode=0555 file path=opt/os-tests/tests/sockfs/sockpair mode=0555 file path=opt/os-tests/tests/spoof-ras mode=0555 file path=opt/os-tests/tests/stress/dladm-kstat mode=0555 +file path=opt/os-tests/tests/writev.32 mode=0555 +file path=opt/os-tests/tests/writev.64 mode=0555 license cr_Sun license=cr_Sun license lic_CDDL license=lic_CDDL depend fmri=system/io/tests type=require diff --git a/usr/src/test/os-tests/runfiles/default.run b/usr/src/test/os-tests/runfiles/default.run index 46c8bf68dd..e34a035e23 100644 --- a/usr/src/test/os-tests/runfiles/default.run +++ b/usr/src/test/os-tests/runfiles/default.run @@ -11,7 +11,7 @@ # # Copyright (c) 2012 by Delphix. All rights reserved. -# Copyright 2019 Joyent, Inc. +# Copyright 2020 Joyent, Inc. # [DEFAULT] @@ -76,3 +76,6 @@ user = root arch = i86pc timeout = 300 tests = ['ldt', 'badseg'] + +[/opt/os-tests/tests/writev.32] +[/opt/os-tests/tests/writev.64] diff --git a/usr/src/test/os-tests/tests/Makefile b/usr/src/test/os-tests/tests/Makefile index 53bfd1b5a1..309df786b0 100644 --- a/usr/src/test/os-tests/tests/Makefile +++ b/usr/src/test/os-tests/tests/Makefile @@ -11,7 +11,7 @@ # # Copyright (c) 2012, 2016 by Delphix. All rights reserved. -# Copyright 2019 Joyent, Inc. +# Copyright 2020 Joyent, Inc. # SUBDIRS_i386 = i386 @@ -30,7 +30,8 @@ SUBDIRS = \ $(SUBDIRS_$(MACH)) PROGS = \ - odirectory + odirectory \ + writev CPPFLAGS += -D_REENTRANT PROGS32 = $(PROGS:%=%.32) @@ -44,6 +45,10 @@ ROOTOPTPROGS = $(PROGS32:%=$(ROOTOPTDIR)/%) \ odirectory.32 := LDLIBS += -lsocket odirectory.64 := LDLIBS64 += -lsocket +writev.32 := CPPFLAGS += -D_FILE_OFFSET_BITS=64 +writev.32 := CSTD = $(CSTD_GNU99) +writev.64 := CSTD = $(CSTD_GNU99) + include $(SRC)/cmd/Makefile.cmd all := TARGET = all diff --git a/usr/src/test/os-tests/tests/writev.c b/usr/src/test/os-tests/tests/writev.c new file mode 100644 index 0000000000..e4f4bf0a66 --- /dev/null +++ b/usr/src/test/os-tests/tests/writev.c @@ -0,0 +1,109 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2020 Joyent, Inc. + */ + +/* + * Some simple testing of the read/writev() family: specifically we're checking + * IOV_MAX == 1024, and that a large-file compiled 32-bit binary can correctly + * access certain offsets. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#define ONE_GIG ((off_t)1024 * 1024 * 1024) + +#define DATA_LEN (sizeof ("data")) + +char path[] = "/var/tmp/writev_test.XXXXXX"; + +static void +cleanup(void) +{ + (void) unlink(path); +} + +int +main(int argc, char *argv[]) +{ + char data[(IOV_MAX + 1) * DATA_LEN] = ""; + struct iovec iov[IOV_MAX + 1]; + + if (IOV_MAX != 1024) + errx(EXIT_FAILURE, "IOV_MAX != 1024"); + + int fd = mkstemp(path); + + if (fd == -1) + err(EXIT_FAILURE, "failed to create file"); + + (void) atexit(cleanup); + + int ret = ftruncate(fd, ONE_GIG * 8); + + if (ret != 0) + err(EXIT_FAILURE, "failed to truncate file"); + + for (int i = 0; i < IOV_MAX + 1; i++) { + (void) strcpy(data + i * DATA_LEN, "data"); + iov[i].iov_base = data + i * 5; + iov[i].iov_len = DATA_LEN; + } + + ssize_t written = writev(fd, iov, IOV_MAX + 1); + + if (written != -1 || errno != EINVAL) + errx(EXIT_FAILURE, "writev(IOV_MAX + 1) didn't fail properly"); + + written = writev(fd, iov, IOV_MAX); + + if (written == -1) + err(EXIT_FAILURE, "writev failed"); + + bzero(data, sizeof (data)); + + ssize_t read = preadv(fd, iov, IOV_MAX, 0); + + if (read != DATA_LEN * IOV_MAX) + err(EXIT_FAILURE, "preadv failed"); + + for (int i = 0; i < IOV_MAX; i++) { + if (strcmp(data + i * DATA_LEN, "data") != 0) + errx(EXIT_FAILURE, "bad read at 0x%lx", i * DATA_LEN); + } + + /* + * Now test various "interesting" offsets. + */ + + for (off_t off = 0; off < ONE_GIG * 8; off += ONE_GIG) { + if ((written = pwritev(fd, iov, 1, off)) != DATA_LEN) + err(EXIT_FAILURE, "pwritev(0x%lx) failed", off); + } + + for (off_t off = 0; off < ONE_GIG * 8; off += ONE_GIG) { + if ((read = preadv(fd, iov, 1, off)) != DATA_LEN) + err(EXIT_FAILURE, "preadv(0x%lx) failed", off); + if (strcmp(data, "data") != 0) + errx(EXIT_FAILURE, "bad read at 0x%lx", off); + } + + return (EXIT_SUCCESS); +} diff --git a/usr/src/uts/common/syscall/rw.c b/usr/src/uts/common/syscall/rw.c index 943b7d244e..14f6d278a1 100644 --- a/usr/src/uts/common/syscall/rw.c +++ b/usr/src/uts/common/syscall/rw.c @@ -22,11 +22,11 @@ /* * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. - * Copyright 2015, Joyent, Inc. All rights reserved. + * Copyright 2020, Joyent, Inc. */ /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ -/* All Rights Reserved */ +/* All Rights Reserved */ /* * Portions of this source code were derived from Berkeley 4.3 BSD @@ -999,19 +999,24 @@ preadv(int fdes, struct iovec *iovp, int iovcnt, off_t offset, int error = 0; int i; + /* + * In a 64-bit kernel, this interface supports native 64-bit + * applications as well as 32-bit applications using both standard and + * large-file access. For 32-bit large-file aware applications, the + * offset is passed as two parameters which are joined into the actual + * offset used. The 64-bit libc always passes 0 for the extended_offset. + * Note that off_t is a signed value, but the preadv/pwritev API treats + * the offset as a position in the file for the operation, so passing + * a negative value will likely fail the maximum offset checks below + * because we convert it to an unsigned value which will be larger than + * the maximum valid offset. + */ #if defined(_SYSCALL32_IMPL) || defined(_ILP32) u_offset_t fileoff = ((u_offset_t)extended_offset << 32) | (u_offset_t)offset; #else /* _SYSCALL32_IMPL || _ILP32 */ u_offset_t fileoff = (u_offset_t)(ulong_t)offset; #endif /* _SYSCALL32_IMPR || _ILP32 */ -#ifdef _SYSCALL32_IMPL - const u_offset_t maxoff = get_udatamodel() == DATAMODEL_ILP32 && - extended_offset == 0? - MAXOFF32_T : MAXOFFSET_T; -#else /* _SYSCALL32_IMPL */ - const u_offset_t maxoff = MAXOFF32_T; -#endif /* _SYSCALL32_IMPL */ int in_crit = 0; @@ -1082,7 +1087,7 @@ preadv(int fdes, struct iovec *iovp, int iovcnt, off_t offset, } } - if ((bcount = (ssize_t)count) < 0) { + if ((bcount = count) < 0) { if (aiovlen != 0) kmem_free(aiov, aiovlen); return (set_errno(EINVAL)); @@ -1098,22 +1103,36 @@ preadv(int fdes, struct iovec *iovp, int iovcnt, off_t offset, } vp = fp->f_vnode; rwflag = 0; - if (vp->v_type == VREG) { + /* + * Behaviour is same as read(2). Please see comments in read above. + */ + if (vp->v_type == VREG) { if (bcount == 0) goto out; - /* - * return EINVAL for offsets that cannot be - * represented in an off_t. - */ - if (fileoff > maxoff) { - error = EINVAL; + /* Handle offset past maximum offset allowed for file. */ + if (fileoff >= OFFSET_MAX(fp)) { + struct vattr va; + va.va_mask = AT_SIZE; + + error = VOP_GETATTR(vp, &va, 0, fp->f_cred, NULL); + if (error == 0) { + if (fileoff >= va.va_size) { + count = 0; + } else { + error = EOVERFLOW; + } + } goto out; } - if (fileoff + bcount > maxoff) - bcount = (ssize_t)((u_offset_t)maxoff - fileoff); + ASSERT(bcount == count); + + /* Note: modified count used in nbl_conflict() call below. */ + if ((fileoff + count) > OFFSET_MAX(fp)) + count = (ssize_t)(OFFSET_MAX(fp) - fileoff); + } else if (vp->v_type == VFIFO) { error = ESPIPE; goto out; @@ -1130,8 +1149,7 @@ preadv(int fdes, struct iovec *iovp, int iovcnt, off_t offset, error = nbl_svmand(vp, fp->f_cred, &svmand); if (error != 0) goto out; - if (nbl_conflict(vp, NBL_WRITE, fileoff, count, svmand, - NULL)) { + if (nbl_conflict(vp, NBL_WRITE, fileoff, count, svmand, NULL)) { error = EACCES; goto out; } @@ -1139,33 +1157,6 @@ preadv(int fdes, struct iovec *iovp, int iovcnt, off_t offset, (void) VOP_RWLOCK(vp, rwflag, NULL); - /* - * Behaviour is same as read(2). Please see comments in - * read(2). - */ - - if ((vp->v_type == VREG) && (fileoff >= OFFSET_MAX(fp))) { - struct vattr va; - va.va_mask = AT_SIZE; - if ((error = - VOP_GETATTR(vp, &va, 0, fp->f_cred, NULL))) { - VOP_RWUNLOCK(vp, rwflag, NULL); - goto out; - } - if (fileoff >= va.va_size) { - VOP_RWUNLOCK(vp, rwflag, NULL); - count = 0; - goto out; - } else { - VOP_RWUNLOCK(vp, rwflag, NULL); - error = EOVERFLOW; - goto out; - } - } - if ((vp->v_type == VREG) && - (fileoff + count > OFFSET_MAX(fp))) { - count = (ssize_t)(OFFSET_MAX(fp) - fileoff); - } auio.uio_loffset = fileoff; auio.uio_iov = aiov; auio.uio_iovcnt = iovcnt; @@ -1218,19 +1209,15 @@ pwritev(int fdes, struct iovec *iovp, int iovcnt, off_t offset, int error = 0; int i; + /* + * See the comment in preadv for how the offset is handled. + */ #if defined(_SYSCALL32_IMPL) || defined(_ILP32) u_offset_t fileoff = ((u_offset_t)extended_offset << 32) | (u_offset_t)offset; #else /* _SYSCALL32_IMPL || _ILP32 */ u_offset_t fileoff = (u_offset_t)(ulong_t)offset; #endif /* _SYSCALL32_IMPR || _ILP32 */ -#ifdef _SYSCALL32_IMPL - const u_offset_t maxoff = get_udatamodel() == DATAMODEL_ILP32 && - extended_offset == 0? - MAXOFF32_T : MAXOFFSET_T; -#else /* _SYSCALL32_IMPL */ - const u_offset_t maxoff = MAXOFF32_T; -#endif /* _SYSCALL32_IMPL */ int in_crit = 0; @@ -1301,7 +1288,7 @@ pwritev(int fdes, struct iovec *iovp, int iovcnt, off_t offset, } } - if ((bcount = (ssize_t)count) < 0) { + if ((bcount = count) < 0) { if (aiovlen != 0) kmem_free(aiov, aiovlen); return (set_errno(EINVAL)); @@ -1317,19 +1304,24 @@ pwritev(int fdes, struct iovec *iovp, int iovcnt, off_t offset, } vp = fp->f_vnode; rwflag = 1; - if (vp->v_type == VREG) { + /* + * The kernel's write(2) code checks OFFSET_MAX and the rctl, and + * returns EFBIG when fileoff exceeds either limit. We do the same. + */ + if (vp->v_type == VREG) { if (bcount == 0) goto out; /* - * return EINVAL for offsets that cannot be - * represented in an off_t. + * Don't allow pwritev to cause file size to exceed the proper + * offset limit. */ - if (fileoff > maxoff) { - error = EINVAL; + if (fileoff >= OFFSET_MAX(fp)) { + error = EFBIG; goto out; } + /* * Take appropriate action if we are trying * to write above the resource limit. @@ -1352,17 +1344,13 @@ pwritev(int fdes, struct iovec *iovp, int iovcnt, off_t offset, error = EFBIG; goto out; } - /* - * Don't allow pwritev to cause file sizes to exceed - * maxoff. - */ - if (fileoff == maxoff) { - error = EFBIG; - goto out; - } - if (fileoff + bcount > maxoff) - bcount = (ssize_t)((u_offset_t)maxoff - fileoff); + ASSERT(bcount == count); + + /* Note: modified count used in nbl_conflict() call below. */ + if ((fileoff + count) > OFFSET_MAX(fp)) + count = (ssize_t)(OFFSET_MAX(fp) - fileoff); + } else if (vp->v_type == VFIFO) { error = ESPIPE; goto out; @@ -1379,8 +1367,7 @@ pwritev(int fdes, struct iovec *iovp, int iovcnt, off_t offset, error = nbl_svmand(vp, fp->f_cred, &svmand); if (error != 0) goto out; - if (nbl_conflict(vp, NBL_WRITE, fileoff, count, svmand, - NULL)) { + if (nbl_conflict(vp, NBL_WRITE, fileoff, count, svmand, NULL)) { error = EACCES; goto out; } @@ -1388,34 +1375,6 @@ pwritev(int fdes, struct iovec *iovp, int iovcnt, off_t offset, (void) VOP_RWLOCK(vp, rwflag, NULL); - - /* - * Behaviour is same as write(2). Please see comments for - * write(2). - */ - - if (vp->v_type == VREG) { - if (fileoff >= curproc->p_fsz_ctl) { - VOP_RWUNLOCK(vp, rwflag, NULL); - mutex_enter(&curproc->p_lock); - /* see above rctl_action comment */ - (void) rctl_action( - rctlproc_legacy[RLIMIT_FSIZE], - curproc->p_rctls, - curproc, RCA_UNSAFE_SIGINFO); - mutex_exit(&curproc->p_lock); - error = EFBIG; - goto out; - } - if (fileoff >= OFFSET_MAX(fp)) { - VOP_RWUNLOCK(vp, rwflag, NULL); - error = EFBIG; - goto out; - } - if (fileoff + count > OFFSET_MAX(fp)) - count = (ssize_t)(OFFSET_MAX(fp) - fileoff); - } - auio.uio_loffset = fileoff; auio.uio_iov = aiov; auio.uio_iovcnt = iovcnt; -- cgit v1.2.3