diff options
| author | Robert Mustacchi <rm@joyent.com> | 2016-07-21 21:31:37 +0000 |
|---|---|---|
| committer | Robert Mustacchi <rm@joyent.com> | 2016-08-03 22:24:53 +0000 |
| commit | 08280dbb7d32cb16511f00c4ea7e1902b2412609 (patch) | |
| tree | c4f7b770f4f0ef56d82b201bf8b35020ca3fec8f | |
| parent | 5216f207fca0ab66e4f96f2fdaea9154b1317359 (diff) | |
| download | illumos-joyent-08280dbb7d32cb16511f00c4ea7e1902b2412609.tar.gz | |
OS-5539 tmpfs space accounting needs improvement
Reviewed by: Alex Wilson <alex.wilson@joyent.com>
Reviewed by: Patrick Mooney <patrick.mooney@joyent.com>
Approved by: Patrick Mooney <patrick.mooney@joyent.com>
| -rw-r--r-- | usr/src/test/os-tests/runfiles/default.run | 4 | ||||
| -rw-r--r-- | usr/src/test/os-tests/tests/Makefile | 2 | ||||
| -rw-r--r-- | usr/src/test/os-tests/tests/tmpfs/Makefile | 52 | ||||
| -rw-r--r-- | usr/src/test/os-tests/tests/tmpfs/tmpfs_badmount.ksh | 114 | ||||
| -rw-r--r-- | usr/src/test/os-tests/tests/tmpfs/tmpfs_enospc.ksh | 74 | ||||
| -rw-r--r-- | usr/src/test/os-tests/tests/tmpfs/tmpfs_full.c | 94 | ||||
| -rw-r--r-- | usr/src/uts/common/fs/tmpfs/tmp_dir.c | 61 | ||||
| -rw-r--r-- | usr/src/uts/common/fs/tmpfs/tmp_subr.c | 78 | ||||
| -rw-r--r-- | usr/src/uts/common/fs/tmpfs/tmp_tnode.c | 67 | ||||
| -rw-r--r-- | usr/src/uts/common/fs/tmpfs/tmp_vfsops.c | 41 | ||||
| -rw-r--r-- | usr/src/uts/common/fs/tmpfs/tmp_vnops.c | 30 | ||||
| -rw-r--r-- | usr/src/uts/common/sys/fs/tmp.h | 16 |
12 files changed, 567 insertions, 66 deletions
diff --git a/usr/src/test/os-tests/runfiles/default.run b/usr/src/test/os-tests/runfiles/default.run index 4e73ebc8b3..3971c5b74c 100644 --- a/usr/src/test/os-tests/runfiles/default.run +++ b/usr/src/test/os-tests/runfiles/default.run @@ -26,3 +26,7 @@ user = root [/opt/os-tests/tests/sigqueue] tests = ['sigqueue_queue_size'] + +[/opt/os-tests/tests/tmpfs] +user = root +tests = ['tmpfs_badmount', 'tmpfs_enospc'] diff --git a/usr/src/test/os-tests/tests/Makefile b/usr/src/test/os-tests/tests/Makefile index cd4104500c..77f1a7a0ec 100644 --- a/usr/src/test/os-tests/tests/Makefile +++ b/usr/src/test/os-tests/tests/Makefile @@ -13,6 +13,6 @@ # Copyright (c) 2012 by Delphix. All rights reserved. # -SUBDIRS = poll sigqueue spoof-ras +SUBDIRS = poll sigqueue spoof-ras tmpfs include $(SRC)/test/Makefile.com diff --git a/usr/src/test/os-tests/tests/tmpfs/Makefile b/usr/src/test/os-tests/tests/tmpfs/Makefile new file mode 100644 index 0000000000..d6515b38fa --- /dev/null +++ b/usr/src/test/os-tests/tests/tmpfs/Makefile @@ -0,0 +1,52 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2016 Joyent, Inc. +# + +include $(SRC)/Makefile.master + +ROOTOPTPKG = $(ROOT)/opt/os-tests +TESTDIR = $(ROOTOPTPKG)/tests/tmpfs + +PROGS = tmpfs_full +SCRIPTS = tmpfs_badmount \ + tmpfs_enospc + +include $(SRC)/cmd/Makefile.cmd +include $(SRC)/test/Makefile.com + +CMDS = $(PROGS:%=$(TESTDIR)/%) $(SCRIPTS:%=$(TESTDIR)/%) +$(CMDS) := FILEMODE = 0555 + +all: $(PROGS) + +install: all $(CMDS) + +lint: + +clobber: clean + -$(RM) $(PROGS) + +clean: + -$(RM) *.o + +$(CMDS): $(TESTDIR) $(PROGS) + +$(TESTDIR): + $(INS.dir) + +$(TESTDIR)/%: %.ksh + $(INS.rename) + +$(TESTDIR)/%: % + $(INS.file) diff --git a/usr/src/test/os-tests/tests/tmpfs/tmpfs_badmount.ksh b/usr/src/test/os-tests/tests/tmpfs/tmpfs_badmount.ksh new file mode 100644 index 0000000000..7e2c4a6095 --- /dev/null +++ b/usr/src/test/os-tests/tests/tmpfs/tmpfs_badmount.ksh @@ -0,0 +1,114 @@ +#!/usr/bin/ksh +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2016 Joyent, Inc. +# + +# +# Test various options to try and mount a tmpfs. Aside from the first to +# verify that we can mount tmpfs at all, these should all fail. +# + +tb_arg0=$(basename $0) +tb_mountpoint="/var/tmp/$0.$$" +tb_mount=/usr/sbin/mount +tb_umount=/usr/sbin/umount + +function fatal +{ + rmdir $tb_mountpoint + typeset msg="$*" + [[ -z "$msg" ]] && msg="test failed" + echo "$tb_arg0: test failed $msg" >&2 + exit 1 +} + +function check_mount +{ + mkdir -p $tb_mountpoint || fatal \ + "failed to make mountpoint $tb_mountpoint" + $tb_mount -F tmpfs swap $tb_mountpoint || fatal \ + "failed to mount tmpfs, check user perms" + $tb_umount $tb_mountpoint || fatal \ + "failed to unmount test point" +} + +function test_one +{ + typeset opts=$1 + + [[ -z "$opts" ]] && fatal "missing required opts" + $tb_mount -F tmpfs -o $opts swap $tb_mountpoint 2>/dev/null + if [[ $? -eq 0 ]]; then + $tb_umount $tb_mountpoint + fatal "successfully mounted with opts $opts, expected failure" + fi +} + +check_mount + +# +# Test invalid percentages. +# +test_one "size=-5%" +test_one "size=200%" +test_one "size=55.55555%" +test_one "size=100.0%" +test_one "size=bad%" +test_one "size=30g%" +test_one "size=%" +test_one "size=%wat" + +# +# Test invalid sizes. Only kmg are valid prefixes. +# +test_one "size=hello;world" +test_one "size=0xnope" +test_one "size=3.14g" +test_one "size=3;14" +test_one "size=thisisanormalsize" +test_one "size=" +test_one "size=100mtry" + +# +# Now, we need to try and trigger a bunch of overflow. We're going to do +# this assuming we're on a 64-bit kernel (which will always overflow a +# 32-bit kernel). +# +test_one "size=20000000000000000000" +test_one "size=1ggggggggggggggggggg" +test_one "size=1mmmmmmmmmmmmmmmmmmm" +test_one "size=1kkkkkkkkkkkkkkkkkkk" +test_one "size=1kkkkkkkkkkkkkkkkkkk" +test_one "size=18014398509481983k" +test_one "size=17592186044416m" +test_one "size=17179869185g" +test_one "size=17179869184g" + +# +# Let's throw a couple bad modes around while we're here. +# +test_one "mode=17777" +test_one "mode=27777" +test_one "mode=37777" +test_one "mode=47777" +test_one "mode=57777" +test_one "mode=67777" +test_one "mode=77777" +test_one "mode=87777" +test_one "mode=97777" +test_one "mode=asdf" +test_one "mode=deadbeef" +test_one "mode=kefka" + +rmdir $tb_mountpoint diff --git a/usr/src/test/os-tests/tests/tmpfs/tmpfs_enospc.ksh b/usr/src/test/os-tests/tests/tmpfs/tmpfs_enospc.ksh new file mode 100644 index 0000000000..a285f306e2 --- /dev/null +++ b/usr/src/test/os-tests/tests/tmpfs/tmpfs_enospc.ksh @@ -0,0 +1,74 @@ +#!/usr/bin/ksh +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2016 Joyent, Inc. +# + +# +# Verify that if we fill up a tmpfs that we can't then perform +# additional things to it that would result in the creation or use of +# kernel memory. +# + +te_arg0=$(basename $0) +te_root=$(dirname $0) +te_bin=$te_root/tmpfs_full +te_mountpoint="/var/tmp/$0.$$" +te_mount=/usr/sbin/mount +te_umount=/usr/sbin/umount +te_testfile=1m +te_mounted= +te_exit=1 + +function fatal +{ + [[ -n "$te_mounted" ]] && $te_umount $te_mountpoint + rmdir $te_mountpoint + typeset msg="$*" + [[ -z "$msg" ]] && msg="test failed" + echo "$te_arg0: test failed $msg" >&2 + exit 1 +} + +function setup +{ + typeset ofile=$te_mountpoint/$te_testfile + + mkdir -p $te_mountpoint || fatal \ + "failed to make mountpoint $te_mountpoint" + $te_mount -F tmpfs swap $te_mountpoint || fatal \ + "failed to mount tmpfs, check user perms" + te_mounted=1 + dd if=/dev/zero of=$ofile bs=1M count=1 2>/dev/null || fatal \ + "failed to create a 1 MB file" + $te_mount -F tmpfs -o remount,size=512k swap $te_mountpoint || + fatal "failed to remount tmpfs" +} + +function run_test +{ + $te_bin $te_mountpoint $te_testfile || fatal "$te_bin failed" +} + +function cleanup +{ + te_mounted= + $te_umount $te_mountpoint || fatal "failed to unmount $te_mountpoint" + rmdir $te_mountpoint || fatal "failed to remove $te_mountpoint" +} + +setup +run_test +cleanup + +exit 0 diff --git a/usr/src/test/os-tests/tests/tmpfs/tmpfs_full.c b/usr/src/test/os-tests/tests/tmpfs/tmpfs_full.c new file mode 100644 index 0000000000..6c6037710b --- /dev/null +++ b/usr/src/test/os-tests/tests/tmpfs/tmpfs_full.c @@ -0,0 +1,94 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2016 Joyent, Inc. + */ + +/* + * Given a path to a tmpfs that has already been marked as full, attempt to + * perform certain activities on it, all of which should fail with ENOSPC. + */ + +#include <stdio.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/statvfs.h> +#include <fcntl.h> +#include <errno.h> +#include <strings.h> +#include <sys/debug.h> +#include <unistd.h> + +int +main(int argc, const char *argv[]) +{ + int fd, ret; + struct statvfs vfs; + + if (argc != 3) { + fprintf(stderr, "test failed: missing path or file\n"); + return (1); + } + + if ((fd = open(argv[1], O_RDONLY)) < 0) { + fprintf(stderr, "test failed: failed to open root %s: %s\n", + argv[1], strerror(errno)); + return (1); + } + + if (fstatvfs(fd, &vfs) != 0) { + fprintf(stderr, "test failed: failed to stat vfs for %s: %s\n", + argv[1], strerror(errno)); + return (1); + } + + if (strncmp("tmpfs", vfs.f_basetype, FSTYPSZ) != 0) { + fprintf(stderr, "test failed: asked to run on non-tmpfs\n"); + return (1); + } + + /* + * Once a few additional bugs in tmpfs are fixed, we should double check + * and make sure that the free space here is actually zero before + * continuing. + */ + + /* + * Go through operations that would create nodes and make sure that they + * all fail. + */ + + ret = openat(fd, "Mnemosyne", O_RDWR | O_CREAT, 0755); + VERIFY3S(ret, ==, -1); + VERIFY3S(errno, ==, ENOSPC); + + ret = mkdirat(fd, "Euterpe", 0775); + VERIFY3S(ret, ==, -1); + VERIFY3S(errno, ==, ENOSPC); + + ret = symlinkat("/dev/null", fd, "Melpomene"); + VERIFY3S(ret, ==, -1); + VERIFY3S(errno, ==, ENOSPC); + + ret = linkat(fd, argv[2], fd, "Urania", 0); + VERIFY3S(ret, ==, -1); + VERIFY3S(errno, ==, ENOSPC); + + /* + * Make sure we can't create open extended attributes. + */ + ret = openat(fd, "Lethe", O_RDWR | O_CREAT | O_XATTR); + VERIFY3S(ret, ==, -1); + VERIFY3S(errno, ==, ENOSPC); + + return (0); +} diff --git a/usr/src/uts/common/fs/tmpfs/tmp_dir.c b/usr/src/uts/common/fs/tmpfs/tmp_dir.c index 387cc6ae54..1a620642cc 100644 --- a/usr/src/uts/common/fs/tmpfs/tmp_dir.c +++ b/usr/src/uts/common/fs/tmpfs/tmp_dir.c @@ -21,10 +21,9 @@ /* * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * Copyright 2016 Joyent, Inc. */ -#pragma ident "%Z%%M% %I% %E% SMI" - #include <sys/types.h> #include <sys/param.h> #include <sys/sysmacros.h> @@ -383,20 +382,7 @@ tdirenter( /* * Unmake the inode we just made. */ - rw_enter(&tp->tn_rwlock, RW_WRITER); - if ((tp->tn_type) == VDIR) { - ASSERT(tdp == NULL); - /* - * cleanup allocs made by tdirinit() - */ - tdirtrunc(tp); - } - mutex_enter(&tp->tn_tlock); - tp->tn_nlink = 0; - mutex_exit(&tp->tn_tlock); - gethrestime(&tp->tn_ctime); - rw_exit(&tp->tn_rwlock); - tmpnode_rele(tp); + tmpnode_cleanup(tp); tp = NULL; } } else if (tpp) { @@ -431,6 +417,7 @@ tdirdelete( enum dr_op op, struct cred *cred) { + struct tmount *tm; struct tdirent *tpdp; int error; size_t namelen; @@ -516,7 +503,8 @@ tdirdelete( */ namelen = strlen(tpdp->td_name) + 1; - kmem_free(tpdp, sizeof (struct tdirent) + namelen); + tm = TNTOTM(dir); + tmp_kmem_free(tm, tpdp, sizeof (struct tdirent) + namelen); dir->tn_size -= (sizeof (struct tdirent) + namelen); dir->tn_dirents--; @@ -538,19 +526,27 @@ tdirdelete( * tdirinit is used internally to initialize a directory (dir) * with '.' and '..' entries without checking permissions and locking */ -void +int tdirinit( struct tmpnode *parent, /* parent of directory to initialize */ struct tmpnode *dir) /* the new directory */ { + struct tmount *tm; struct tdirent *dot, *dotdot; timestruc_t now; ASSERT(RW_WRITE_HELD(&parent->tn_rwlock)); ASSERT(dir->tn_type == VDIR); - dot = kmem_zalloc(sizeof (struct tdirent) + 2, KM_SLEEP); - dotdot = kmem_zalloc(sizeof (struct tdirent) + 3, KM_SLEEP); + tm = TNTOTM(parent); + dot = tmp_kmem_zalloc(tm, sizeof (struct tdirent) + 2, KM_SLEEP); + if (dot == NULL) + return (ENOSPC); + dotdot = tmp_kmem_zalloc(tm, sizeof (struct tdirent) + 3, KM_SLEEP); + if (dotdot == NULL) { + tmp_kmem_free(tm, dot, sizeof (struct tdirent) + 2); + return (ENOSPC); + } /* * Initialize the entries @@ -601,6 +597,8 @@ tdirinit( dir->tn_size = 2 * sizeof (struct tdirent) + 5; /* dot and dotdot */ dir->tn_dirents = 2; dir->tn_nlink = 2; + + return (0); } @@ -612,6 +610,7 @@ tdirtrunc(struct tmpnode *dir) { struct tdirent *tdp; struct tmpnode *tp; + struct tmount *tm; size_t namelen; timestruc_t now; int isvattrdir, isdotdot, skip_decr; @@ -619,6 +618,8 @@ tdirtrunc(struct tmpnode *dir) ASSERT(RW_WRITE_HELD(&dir->tn_rwlock)); ASSERT(dir->tn_type == VDIR); + tm = TNTOTM(dir); + isvattrdir = (dir->tn_vnode->v_flag & V_XATTRDIR) ? 1 : 0; for (tdp = dir->tn_dir; tdp; tdp = dir->tn_dir) { ASSERT(tdp->td_next != tdp); @@ -650,7 +651,7 @@ tdirtrunc(struct tmpnode *dir) tmpfs_hash_out(tdp); - kmem_free(tdp, sizeof (struct tdirent) + namelen); + tmp_kmem_free(tm, tdp, sizeof (struct tdirent) + namelen); dir->tn_size -= (sizeof (struct tdirent) + namelen); dir->tn_dirents--; } @@ -903,6 +904,7 @@ tdiraddentry( enum de_op op, struct tmpnode *fromtp) { + struct tmount *tm; struct tdirent *tdp, *tpdp; size_t namelen, alloc_size; timestruc_t now; @@ -923,9 +925,10 @@ tdiraddentry( /* * Allocate and initialize directory entry */ + tm = TNTOTM(dir); namelen = strlen(name) + 1; alloc_size = namelen + sizeof (struct tdirent); - tdp = kmem_zalloc(alloc_size, KM_NOSLEEP | KM_NORMALPRI); + tdp = tmp_kmem_zalloc(tm, alloc_size, KM_NOSLEEP | KM_NORMALPRI); if (tdp == NULL) return (ENOSPC); @@ -1025,7 +1028,10 @@ tdirmaketnode( ((va->va_mask & AT_MTIME) && TIMESPEC_OVERFLOW(&va->va_mtime))) return (EOVERFLOW); type = va->va_type; - tp = kmem_zalloc(sizeof (struct tmpnode), KM_SLEEP); + tp = tmp_kmem_zalloc(tm, sizeof (struct tmpnode), KM_SLEEP); + if (tp == NULL) { + return (ENOSPC); + } tmpnode_init(tm, tp, va, cred); /* setup normal file/dir's extended attribute directory */ @@ -1087,8 +1093,13 @@ tdirmaketnode( if (va->va_mask & AT_MTIME) tp->tn_mtime = va->va_mtime; - if (op == DE_MKDIR) - tdirinit(dir, tp); + if (op == DE_MKDIR) { + int ret; + if ((ret = tdirinit(dir, tp)) != 0) { + tmpnode_cleanup(tp); + return (ret); + } + } *newnode = tp; return (0); diff --git a/usr/src/uts/common/fs/tmpfs/tmp_subr.c b/usr/src/uts/common/fs/tmpfs/tmp_subr.c index e6e2b392fe..cea31fd3ff 100644 --- a/usr/src/uts/common/fs/tmpfs/tmp_subr.c +++ b/usr/src/uts/common/fs/tmpfs/tmp_subr.c @@ -20,7 +20,7 @@ */ /* * Copyright (c) 1990, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright 2015 Joyent, Inc. + * Copyright 2016 Joyent, Inc. */ #include <sys/types.h> @@ -43,6 +43,7 @@ #include <sys/fs/tmpnode.h> #include <sys/ddi.h> #include <sys/sunddi.h> +#include <vm/anon.h> #define KILOBYTE 1024 #define MEGABYTE (1024 * KILOBYTE) @@ -54,6 +55,48 @@ extern pgcnt_t swapfs_minfree; +void * +tmp_kmem_zalloc(struct tmount *tm, size_t size, int flag) +{ + void *buf; + zone_t *zone; + + zone = tm->tm_vfsp->vfs_zone; + mutex_enter(&tm->tm_contents); + if (tm->tm_anonmem + size > tm->tm_anonmax || + tm->tm_anonmem + size < tm->tm_anonmem || + size + ptob(tmpfs_minfree) <= size || + !anon_checkspace(size + ptob(tmpfs_minfree), zone) || + anon_try_resv_zone(size, zone) == 0) { + mutex_exit(&tm->tm_contents); + return (NULL); + } + tm->tm_anonmem += size; + mutex_exit(&tm->tm_contents); + + buf = kmem_zalloc(size, flag); + if (buf == NULL) { + mutex_enter(&tm->tm_contents); + ASSERT(tm->tm_anonmem > tm->tm_anonmem - size); + tm->tm_anonmem -= size; + mutex_exit(&tm->tm_contents); + anon_unresv_zone(size, tm->tm_vfsp->vfs_zone); + } + + return (buf); +} + +void +tmp_kmem_free(struct tmount *tm, void *buf, size_t size) +{ + kmem_free(buf, size); + mutex_enter(&tm->tm_contents); + ASSERT(tm->tm_anonmem > tm->tm_anonmem - size); + tm->tm_anonmem -= size; + mutex_exit(&tm->tm_contents); + anon_unresv_zone(size, tm->tm_vfsp->vfs_zone); +} + int tmp_taccess(void *vtp, int mode, struct cred *cred) { @@ -99,8 +142,8 @@ tmp_sticky_remove_access(struct tmpnode *dir, struct tmpnode *entry, } /* - * Convert a string containing a number (number of bytes) to a pgcnt_t, - * containing the corresponding number of pages. On 32-bit kernels, the + * Convert a string containing a number (number of bytes) to a size_t, + * containing the corresponding number of bytes. On 32-bit kernels, the * maximum value encoded in 'str' is PAGESIZE * ULONG_MAX, while the value * returned in 'maxpg' is at most ULONG_MAX. * @@ -118,14 +161,12 @@ tmp_sticky_remove_access(struct tmpnode *dir, struct tmpnode *entry, * error. */ int -tmp_convnum(char *str, pgcnt_t *maxpg) +tmp_convnum(char *str, size_t *maxbytes) { u_longlong_t num = 0; -#ifdef _LP64 - u_longlong_t max_bytes = ULONG_MAX; -#else - u_longlong_t max_bytes = PAGESIZE * (uint64_t)ULONG_MAX; -#endif + u_longlong_t max_bytes = (uint64_t)SIZE_MAX; + size_t pages; + char *c; const struct convchar { char *cc_char; @@ -215,14 +256,23 @@ valid_char: } done: + /* - * Since btopr() rounds up to page granularity, this round-up can - * cause an overflow only if 'num' is between (max_bytes - PAGESIZE) - * and (max_bytes). In this case the resulting number is zero, which - * is what we check for below. + * We've been given a size in bytes; however, we want to make sure that + * we have at least one page worth no matter what. Therefore we use + * btopr to round up. However, this may cause an overflow only if 'num' + * is between (max_bytes - PAGESIZE) and (max_bytes). In this case the + * resulting number is zero, which is what we check for below. Note, we + * require at least one page, so if pages is zero, well, it wasn't going + * to work anyways. */ - if ((*maxpg = (pgcnt_t)btopr(num)) == 0 && num != 0) + pages = btopr(num); + if (pages == 0) { return (EINVAL); + } + + *maxbytes = ptob(pages); + return (0); } diff --git a/usr/src/uts/common/fs/tmpfs/tmp_tnode.c b/usr/src/uts/common/fs/tmpfs/tmp_tnode.c index 51e57b2611..91ff8bb7fa 100644 --- a/usr/src/uts/common/fs/tmpfs/tmp_tnode.c +++ b/usr/src/uts/common/fs/tmpfs/tmp_tnode.c @@ -21,6 +21,7 @@ /* * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * Copyright 2016 Joyent, Inc. */ #include <sys/types.h> @@ -64,21 +65,35 @@ tmp_resv( int pagecreate) /* call anon_resv if set */ { pgcnt_t pages = btopr(delta); + size_t pbytes = ptob(pages); zone_t *zone; ASSERT(RW_WRITE_HELD(&tp->tn_rwlock)); ASSERT(tp->tn_type == VREG); + /* - * pagecreate is set only if we actually need to call anon_resv - * to reserve an additional page of anonymous memory. - * Since anon_resv always reserves a page at a time, - * it should only get called when we know we're growing the - * file into a new page or filling a hole. + * pagecreate is set only if we actually need to call anon_resv to + * reserve an additional page of anonymous memory. Since anon_resv + * always reserves a page at a time, it should only get called when we + * know we're growing the file into a new page or filling a hole. This + * is why we transform delta into a number of pages. However, because we + * track bytes and not pages, we convert that back to a number of bytes + * that we allocate against. * - * Deny if trying to reserve more than tmpfs can allocate + * Deny if trying to reserve more than tmpfs can allocate, the + * allocation causes an overflow, or the delta round up overflowed. + * Note, that btopr rounds up, so we need to catch the unsigned + * overflow. Note, rounding up when we are within a page of SIZE_MAX is + * done by adding a page, overflowing, which will then be rounded back + * to zero. Hence the following check. */ + if (pages == 0 && delta != 0) + return (1); + zone = tm->tm_vfsp->vfs_zone; - if (pagecreate && ((tm->tm_anonmem + pages > tm->tm_anonmax) || + if (pagecreate && ((tm->tm_anonmem + pbytes > tm->tm_anonmax) || + (tm->tm_anonmem + pbytes < tm->tm_anonmem) || + (ptob(pages + tmpfs_minfree) <= pbytes) || (!anon_checkspace(ptob(pages + tmpfs_minfree), zone)) || (anon_try_resv_zone(delta, zone) == 0))) { return (1); @@ -89,7 +104,7 @@ tmp_resv( */ if (pagecreate) { mutex_enter(&tm->tm_contents); - tm->tm_anonmem += pages; + tm->tm_anonmem += pbytes; mutex_exit(&tm->tm_contents); TRACE_2(TR_FAC_VM, TR_ANON_TMPFS, "anon tmpfs:%p %lu", @@ -110,13 +125,27 @@ tmp_unresv( struct tmpnode *tp, size_t delta) { + size_t pages, pbytes; + ASSERT(RW_WRITE_HELD(&tp->tn_rwlock)); ASSERT(tp->tn_type == VREG); + /* + * If this is true, we have a grevious overflow bug and some size + * accounting has been messed with as having an amount to truncate at + * this size would imply that all of memory was used for this file. No + * matter how small the kernel, it will always need at least one page. + */ + pages = btopr(delta); + if (pages == 0 && delta != 0) + panic("tmpfs unsigned overflow detected"); + pbytes = ptob(pages); + anon_unresv_zone(delta, tm->tm_vfsp->vfs_zone); mutex_enter(&tm->tm_contents); - tm->tm_anonmem -= btopr(delta); + ASSERT(tm->tm_anonmem > tm->tm_anonmem - pbytes); + tm->tm_anonmem -= pbytes; mutex_exit(&tm->tm_contents); TRACE_2(TR_FAC_VM, TR_ANON_TMPFS, "anon tmpfs:%p %lu", tp, delta); @@ -154,6 +183,26 @@ tmpnode_growmap(struct tmpnode *tp, ulong_t newsize) } /* + * This is used to clean up a tmpnode that hasn't made it out the door. In other + * words, we allocated it and did a tmpnode_init; however, before it could get + * fully inserted into a directory, bad things happened and it failed. + */ +void +tmpnode_cleanup(struct tmpnode *tp) +{ + rw_enter(&tp->tn_rwlock, RW_WRITER); + if ((tp->tn_type) == VDIR) { + tdirtrunc(tp); + } + mutex_enter(&tp->tn_tlock); + tp->tn_nlink = 0; + mutex_exit(&tp->tn_tlock); + gethrestime(&tp->tn_ctime); + rw_exit(&tp->tn_rwlock); + tmpnode_rele(tp); +} + +/* * Initialize a tmpnode and add it to file list under mount point. */ void diff --git a/usr/src/uts/common/fs/tmpfs/tmp_vfsops.c b/usr/src/uts/common/fs/tmpfs/tmp_vfsops.c index 3c088c442c..3d73364bcd 100644 --- a/usr/src/uts/common/fs/tmpfs/tmp_vfsops.c +++ b/usr/src/uts/common/fs/tmpfs/tmp_vfsops.c @@ -20,7 +20,7 @@ */ /* * Copyright (c) 1990, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright 2015 Joyent, Inc. + * Copyright 2016 Joyent, Inc. */ #include <sys/types.h> @@ -237,7 +237,7 @@ tmp_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr) struct tmpnode *tp; struct pathname dpn; int error; - pgcnt_t anonmax; + size_t anonmax; struct vattr rattr; int got_attrs; boolean_t mode_arg = B_FALSE; @@ -281,7 +281,7 @@ tmp_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr) if ((error = tmp_convnum(argstr, &anonmax)) != 0) goto out; } else { - anonmax = ULONG_MAX; + anonmax = SIZE_MAX; } /* @@ -357,7 +357,17 @@ tmp_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr) rattr.va_mode = (mode_t)(S_IFDIR | root_mode); rattr.va_type = VDIR; rattr.va_rdev = 0; - tp = kmem_zalloc(sizeof (struct tmpnode), KM_SLEEP); + tp = tmp_kmem_zalloc(tm, sizeof (struct tmpnode), KM_SLEEP); + if (tp == NULL) { + kmem_free(tm->tm_mntpath, strlen(tm->tm_mntpath) + 1); + mutex_destroy(&tm->tm_contents); + mutex_destroy(&tm->tm_renamelck); + kmem_free(tm, sizeof (struct tmount)); + + pn_free(&dpn); + error = ENOMEM; + goto out; + } tmpnode_init(tm, tp, &rattr, cr); /* @@ -396,7 +406,28 @@ tmp_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr) tp->tn_nlink = 0; tm->tm_rootnode = tp; - tdirinit(tp, tp); + if (tdirinit(tp, tp) != 0) { + /* + * While we would normally let our VOP_INACTIVE function take + * care of cleaning up here, we're in a bit of a delicate + * situation, so we do so manually. While it's tempting to try + * and rely upon tmpfs_freevfs() and others, it's probably safer + * for the time to do this manually at the cost of duplication. + */ + vn_invalid(TNTOV(tp)); + rw_destroy(&tp->tn_rwlock); + mutex_destroy(&tp->tn_tlock); + vn_free(TNTOV(tp)); + tmp_kmem_free(tm, tp, sizeof (struct tmpnode)); + + kmem_free(tm->tm_mntpath, strlen(tm->tm_mntpath) + 1); + mutex_destroy(&tm->tm_contents); + mutex_destroy(&tm->tm_renamelck); + kmem_free(tm, sizeof (struct tmount)); + pn_free(&dpn); + error = ENOMEM; + goto out; + } rw_exit(&tp->tn_rwlock); diff --git a/usr/src/uts/common/fs/tmpfs/tmp_vnops.c b/usr/src/uts/common/fs/tmpfs/tmp_vnops.c index 98951ed751..9087454e32 100644 --- a/usr/src/uts/common/fs/tmpfs/tmp_vnops.c +++ b/usr/src/uts/common/fs/tmpfs/tmp_vnops.c @@ -25,7 +25,7 @@ */ /* - * Copyright 2016, Joyent, Inc. + * Copyright 2016 Joyent, Inc. * Copyright 2015 Nexenta Systems, Inc. All rights reserved. * Copyright 2016 RackTop Systems. */ @@ -868,6 +868,8 @@ tmp_lookup( rw_enter(&tp->tn_rwlock, RW_WRITER); if (tp->tn_xattrdp == NULL) { + int err; + if (!(flags & CREATE_XATTR_DIR)) { rw_exit(&tp->tn_rwlock); return (ENOENT); @@ -888,8 +890,13 @@ tmp_lookup( return (error); } - xdp = kmem_zalloc(sizeof (struct tmpnode), KM_SLEEP); tm = VTOTM(dvp); + xdp = tmp_kmem_zalloc(tm, sizeof (struct tmpnode), + KM_SLEEP); + if (xdp == NULL) { + rw_exit(&tp->tn_rwlock); + return (ENOSPC); + } tmpnode_init(tm, xdp, &tp->tn_attr, NULL); /* * Fix-up fields unique to attribute directories. @@ -907,7 +914,16 @@ tmp_lookup( } xdp->tn_vnode->v_type = VDIR; xdp->tn_vnode->v_flag |= V_XATTRDIR; - tdirinit(tp, xdp); + if ((err = tdirinit(tp, xdp)) != 0) { + rw_exit(&tp->tn_rwlock); + /* + * This never got properly initialized so we can + * just clean it up. + */ + xdp->tn_vnode->v_flag &= V_XATTRDIR; + tmpnode_cleanup(tp); + return (err); + } tp->tn_xattrdp = xdp; } else { VN_HOLD(tp->tn_xattrdp->tn_vnode); @@ -1626,12 +1642,12 @@ tmp_symlink( rw_exit(&parent->tn_rwlock); if (error) { - if (self) + if (self != NULL) tmpnode_rele(self); return (error); } len = strlen(tnm) + 1; - cp = kmem_alloc(len, KM_NOSLEEP | KM_NORMALPRI); + cp = tmp_kmem_zalloc(tm, len, KM_NOSLEEP | KM_NORMALPRI); if (cp == NULL) { tmpnode_rele(self); return (ENOSPC); @@ -1741,7 +1757,7 @@ top: goto top; } if (tp->tn_type == VLNK) - kmem_free(tp->tn_symlink, tp->tn_size + 1); + tmp_kmem_free(tm, tp->tn_symlink, tp->tn_size + 1); } /* @@ -1775,7 +1791,7 @@ top: rw_destroy(&tp->tn_rwlock); mutex_destroy(&tp->tn_tlock); vn_free(TNTOV(tp)); - kmem_free(tp, sizeof (struct tmpnode)); + tmp_kmem_free(tm, tp, sizeof (struct tmpnode)); /* If the filesystem was umounted by force, rele the vfs ref */ if (tm->tm_vfsp->vfs_flag & VFS_UNMOUNTED) diff --git a/usr/src/uts/common/sys/fs/tmp.h b/usr/src/uts/common/sys/fs/tmp.h index f8740e8873..bd5a0c1f4f 100644 --- a/usr/src/uts/common/sys/fs/tmp.h +++ b/usr/src/uts/common/sys/fs/tmp.h @@ -23,7 +23,7 @@ * All rights reserved. Use is subject to license terms. */ /* - * Copyright 2015 Joyent, Inc. + * Copyright 2016 Joyent, Inc. */ #ifndef _SYS_FS_TMP_H @@ -43,8 +43,9 @@ struct tmount { struct vfs *tm_vfsp; /* filesystem's vfs struct */ struct tmpnode *tm_rootnode; /* root tmpnode */ char *tm_mntpath; /* name of tmpfs mount point */ - ulong_t tm_anonmax; /* file system max anon reservation */ - pgcnt_t tm_anonmem; /* pages of reserved anon memory */ + size_t tm_anonmax; /* file system max anon reservation */ + size_t tm_anonmem; /* bytes of reserved anon memory */ + /* and allocated kmem for the fs */ dev_t tm_dev; /* unique dev # of mounted `device' */ uint_t tm_gen; /* pseudo generation number for files */ kmutex_t tm_contents; /* lock for tmount structure */ @@ -58,6 +59,7 @@ struct tmount { #define VTOTM(vp) ((struct tmount *)(vp)->v_vfsp->vfs_data) #define VTOTN(vp) ((struct tmpnode *)(vp)->v_data) #define TNTOV(tp) ((tp)->tn_vnode) +#define TNTOTM(tp) (VTOTM(TNTOV(tp))) #define tmpnode_hold(tp) VN_HOLD(TNTOV(tp)) #define tmpnode_rele(tp) VN_RELE(TNTOV(tp)) @@ -93,24 +95,28 @@ extern size_t tmpfs_minfree; /* Anonymous memory in pages */ extern void tmpnode_init(struct tmount *, struct tmpnode *, struct vattr *, struct cred *); +extern void tmpnode_cleanup(struct tmpnode *tp); extern int tmpnode_trunc(struct tmount *, struct tmpnode *, ulong_t); extern void tmpnode_growmap(struct tmpnode *, ulong_t); extern int tdirlookup(struct tmpnode *, char *, struct tmpnode **, struct cred *); extern int tdirdelete(struct tmpnode *, struct tmpnode *, char *, enum dr_op, struct cred *); -extern void tdirinit(struct tmpnode *, struct tmpnode *); +extern int tdirinit(struct tmpnode *, struct tmpnode *); extern void tdirtrunc(struct tmpnode *); extern int tmp_resv(struct tmount *, struct tmpnode *, size_t, int); extern int tmp_taccess(void *, int, struct cred *); extern int tmp_sticky_remove_access(struct tmpnode *, struct tmpnode *, struct cred *); -extern int tmp_convnum(char *, pgcnt_t *); +extern int tmp_convnum(char *, size_t *); extern int tmp_convmode(char *, mode_t *); extern int tdirenter(struct tmount *, struct tmpnode *, char *, enum de_op, struct tmpnode *, struct tmpnode *, struct vattr *, struct tmpnode **, struct cred *, caller_context_t *); +extern void *tmp_kmem_zalloc(struct tmount *, size_t, int); +extern void tmp_kmem_free(struct tmount *, void *, size_t); + #define TMP_MUSTHAVE 0x01 #ifdef __cplusplus |
