summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRobert Mustacchi <rm@joyent.com>2016-07-21 21:31:37 +0000
committerRobert Mustacchi <rm@joyent.com>2016-08-03 22:24:53 +0000
commit08280dbb7d32cb16511f00c4ea7e1902b2412609 (patch)
treec4f7b770f4f0ef56d82b201bf8b35020ca3fec8f
parent5216f207fca0ab66e4f96f2fdaea9154b1317359 (diff)
downloadillumos-joyent-08280dbb7d32cb16511f00c4ea7e1902b2412609.tar.gz
OS-5539 tmpfs space accounting needs improvement
Reviewed by: Alex Wilson <alex.wilson@joyent.com> Reviewed by: Patrick Mooney <patrick.mooney@joyent.com> Approved by: Patrick Mooney <patrick.mooney@joyent.com>
-rw-r--r--usr/src/test/os-tests/runfiles/default.run4
-rw-r--r--usr/src/test/os-tests/tests/Makefile2
-rw-r--r--usr/src/test/os-tests/tests/tmpfs/Makefile52
-rw-r--r--usr/src/test/os-tests/tests/tmpfs/tmpfs_badmount.ksh114
-rw-r--r--usr/src/test/os-tests/tests/tmpfs/tmpfs_enospc.ksh74
-rw-r--r--usr/src/test/os-tests/tests/tmpfs/tmpfs_full.c94
-rw-r--r--usr/src/uts/common/fs/tmpfs/tmp_dir.c61
-rw-r--r--usr/src/uts/common/fs/tmpfs/tmp_subr.c78
-rw-r--r--usr/src/uts/common/fs/tmpfs/tmp_tnode.c67
-rw-r--r--usr/src/uts/common/fs/tmpfs/tmp_vfsops.c41
-rw-r--r--usr/src/uts/common/fs/tmpfs/tmp_vnops.c30
-rw-r--r--usr/src/uts/common/sys/fs/tmp.h16
12 files changed, 567 insertions, 66 deletions
diff --git a/usr/src/test/os-tests/runfiles/default.run b/usr/src/test/os-tests/runfiles/default.run
index 4e73ebc8b3..3971c5b74c 100644
--- a/usr/src/test/os-tests/runfiles/default.run
+++ b/usr/src/test/os-tests/runfiles/default.run
@@ -26,3 +26,7 @@ user = root
[/opt/os-tests/tests/sigqueue]
tests = ['sigqueue_queue_size']
+
+[/opt/os-tests/tests/tmpfs]
+user = root
+tests = ['tmpfs_badmount', 'tmpfs_enospc']
diff --git a/usr/src/test/os-tests/tests/Makefile b/usr/src/test/os-tests/tests/Makefile
index cd4104500c..77f1a7a0ec 100644
--- a/usr/src/test/os-tests/tests/Makefile
+++ b/usr/src/test/os-tests/tests/Makefile
@@ -13,6 +13,6 @@
# Copyright (c) 2012 by Delphix. All rights reserved.
#
-SUBDIRS = poll sigqueue spoof-ras
+SUBDIRS = poll sigqueue spoof-ras tmpfs
include $(SRC)/test/Makefile.com
diff --git a/usr/src/test/os-tests/tests/tmpfs/Makefile b/usr/src/test/os-tests/tests/tmpfs/Makefile
new file mode 100644
index 0000000000..d6515b38fa
--- /dev/null
+++ b/usr/src/test/os-tests/tests/tmpfs/Makefile
@@ -0,0 +1,52 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2016 Joyent, Inc.
+#
+
+include $(SRC)/Makefile.master
+
+ROOTOPTPKG = $(ROOT)/opt/os-tests
+TESTDIR = $(ROOTOPTPKG)/tests/tmpfs
+
+PROGS = tmpfs_full
+SCRIPTS = tmpfs_badmount \
+ tmpfs_enospc
+
+include $(SRC)/cmd/Makefile.cmd
+include $(SRC)/test/Makefile.com
+
+CMDS = $(PROGS:%=$(TESTDIR)/%) $(SCRIPTS:%=$(TESTDIR)/%)
+$(CMDS) := FILEMODE = 0555
+
+all: $(PROGS)
+
+install: all $(CMDS)
+
+lint:
+
+clobber: clean
+ -$(RM) $(PROGS)
+
+clean:
+ -$(RM) *.o
+
+$(CMDS): $(TESTDIR) $(PROGS)
+
+$(TESTDIR):
+ $(INS.dir)
+
+$(TESTDIR)/%: %.ksh
+ $(INS.rename)
+
+$(TESTDIR)/%: %
+ $(INS.file)
diff --git a/usr/src/test/os-tests/tests/tmpfs/tmpfs_badmount.ksh b/usr/src/test/os-tests/tests/tmpfs/tmpfs_badmount.ksh
new file mode 100644
index 0000000000..7e2c4a6095
--- /dev/null
+++ b/usr/src/test/os-tests/tests/tmpfs/tmpfs_badmount.ksh
@@ -0,0 +1,114 @@
+#!/usr/bin/ksh
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2016 Joyent, Inc.
+#
+
+#
+# Test various options to try and mount a tmpfs. Aside from the first to
+# verify that we can mount tmpfs at all, these should all fail.
+#
+
+tb_arg0=$(basename $0)
+tb_mountpoint="/var/tmp/$0.$$"
+tb_mount=/usr/sbin/mount
+tb_umount=/usr/sbin/umount
+
+function fatal
+{
+ rmdir $tb_mountpoint
+ typeset msg="$*"
+ [[ -z "$msg" ]] && msg="test failed"
+ echo "$tb_arg0: test failed $msg" >&2
+ exit 1
+}
+
+function check_mount
+{
+ mkdir -p $tb_mountpoint || fatal \
+ "failed to make mountpoint $tb_mountpoint"
+ $tb_mount -F tmpfs swap $tb_mountpoint || fatal \
+ "failed to mount tmpfs, check user perms"
+ $tb_umount $tb_mountpoint || fatal \
+ "failed to unmount test point"
+}
+
+function test_one
+{
+ typeset opts=$1
+
+ [[ -z "$opts" ]] && fatal "missing required opts"
+ $tb_mount -F tmpfs -o $opts swap $tb_mountpoint 2>/dev/null
+ if [[ $? -eq 0 ]]; then
+ $tb_umount $tb_mountpoint
+ fatal "successfully mounted with opts $opts, expected failure"
+ fi
+}
+
+check_mount
+
+#
+# Test invalid percentages.
+#
+test_one "size=-5%"
+test_one "size=200%"
+test_one "size=55.55555%"
+test_one "size=100.0%"
+test_one "size=bad%"
+test_one "size=30g%"
+test_one "size=%"
+test_one "size=%wat"
+
+#
+# Test invalid sizes. Only kmg are valid prefixes.
+#
+test_one "size=hello;world"
+test_one "size=0xnope"
+test_one "size=3.14g"
+test_one "size=3;14"
+test_one "size=thisisanormalsize"
+test_one "size="
+test_one "size=100mtry"
+
+#
+# Now, we need to try and trigger a bunch of overflow. We're going to do
+# this assuming we're on a 64-bit kernel (which will always overflow a
+# 32-bit kernel).
+#
+test_one "size=20000000000000000000"
+test_one "size=1ggggggggggggggggggg"
+test_one "size=1mmmmmmmmmmmmmmmmmmm"
+test_one "size=1kkkkkkkkkkkkkkkkkkk"
+test_one "size=1kkkkkkkkkkkkkkkkkkk"
+test_one "size=18014398509481983k"
+test_one "size=17592186044416m"
+test_one "size=17179869185g"
+test_one "size=17179869184g"
+
+#
+# Let's throw a couple bad modes around while we're here.
+#
+test_one "mode=17777"
+test_one "mode=27777"
+test_one "mode=37777"
+test_one "mode=47777"
+test_one "mode=57777"
+test_one "mode=67777"
+test_one "mode=77777"
+test_one "mode=87777"
+test_one "mode=97777"
+test_one "mode=asdf"
+test_one "mode=deadbeef"
+test_one "mode=kefka"
+
+rmdir $tb_mountpoint
diff --git a/usr/src/test/os-tests/tests/tmpfs/tmpfs_enospc.ksh b/usr/src/test/os-tests/tests/tmpfs/tmpfs_enospc.ksh
new file mode 100644
index 0000000000..a285f306e2
--- /dev/null
+++ b/usr/src/test/os-tests/tests/tmpfs/tmpfs_enospc.ksh
@@ -0,0 +1,74 @@
+#!/usr/bin/ksh
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2016 Joyent, Inc.
+#
+
+#
+# Verify that if we fill up a tmpfs that we can't then perform
+# additional things to it that would result in the creation or use of
+# kernel memory.
+#
+
+te_arg0=$(basename $0)
+te_root=$(dirname $0)
+te_bin=$te_root/tmpfs_full
+te_mountpoint="/var/tmp/$0.$$"
+te_mount=/usr/sbin/mount
+te_umount=/usr/sbin/umount
+te_testfile=1m
+te_mounted=
+te_exit=1
+
+function fatal
+{
+ [[ -n "$te_mounted" ]] && $te_umount $te_mountpoint
+ rmdir $te_mountpoint
+ typeset msg="$*"
+ [[ -z "$msg" ]] && msg="test failed"
+ echo "$te_arg0: test failed $msg" >&2
+ exit 1
+}
+
+function setup
+{
+ typeset ofile=$te_mountpoint/$te_testfile
+
+ mkdir -p $te_mountpoint || fatal \
+ "failed to make mountpoint $te_mountpoint"
+ $te_mount -F tmpfs swap $te_mountpoint || fatal \
+ "failed to mount tmpfs, check user perms"
+ te_mounted=1
+ dd if=/dev/zero of=$ofile bs=1M count=1 2>/dev/null || fatal \
+ "failed to create a 1 MB file"
+ $te_mount -F tmpfs -o remount,size=512k swap $te_mountpoint ||
+ fatal "failed to remount tmpfs"
+}
+
+function run_test
+{
+ $te_bin $te_mountpoint $te_testfile || fatal "$te_bin failed"
+}
+
+function cleanup
+{
+ te_mounted=
+ $te_umount $te_mountpoint || fatal "failed to unmount $te_mountpoint"
+ rmdir $te_mountpoint || fatal "failed to remove $te_mountpoint"
+}
+
+setup
+run_test
+cleanup
+
+exit 0
diff --git a/usr/src/test/os-tests/tests/tmpfs/tmpfs_full.c b/usr/src/test/os-tests/tests/tmpfs/tmpfs_full.c
new file mode 100644
index 0000000000..6c6037710b
--- /dev/null
+++ b/usr/src/test/os-tests/tests/tmpfs/tmpfs_full.c
@@ -0,0 +1,94 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2016 Joyent, Inc.
+ */
+
+/*
+ * Given a path to a tmpfs that has already been marked as full, attempt to
+ * perform certain activities on it, all of which should fail with ENOSPC.
+ */
+
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/statvfs.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <strings.h>
+#include <sys/debug.h>
+#include <unistd.h>
+
+int
+main(int argc, const char *argv[])
+{
+ int fd, ret;
+ struct statvfs vfs;
+
+ if (argc != 3) {
+ fprintf(stderr, "test failed: missing path or file\n");
+ return (1);
+ }
+
+ if ((fd = open(argv[1], O_RDONLY)) < 0) {
+ fprintf(stderr, "test failed: failed to open root %s: %s\n",
+ argv[1], strerror(errno));
+ return (1);
+ }
+
+ if (fstatvfs(fd, &vfs) != 0) {
+ fprintf(stderr, "test failed: failed to stat vfs for %s: %s\n",
+ argv[1], strerror(errno));
+ return (1);
+ }
+
+ if (strncmp("tmpfs", vfs.f_basetype, FSTYPSZ) != 0) {
+ fprintf(stderr, "test failed: asked to run on non-tmpfs\n");
+ return (1);
+ }
+
+ /*
+ * Once a few additional bugs in tmpfs are fixed, we should double check
+ * and make sure that the free space here is actually zero before
+ * continuing.
+ */
+
+ /*
+ * Go through operations that would create nodes and make sure that they
+ * all fail.
+ */
+
+ ret = openat(fd, "Mnemosyne", O_RDWR | O_CREAT, 0755);
+ VERIFY3S(ret, ==, -1);
+ VERIFY3S(errno, ==, ENOSPC);
+
+ ret = mkdirat(fd, "Euterpe", 0775);
+ VERIFY3S(ret, ==, -1);
+ VERIFY3S(errno, ==, ENOSPC);
+
+ ret = symlinkat("/dev/null", fd, "Melpomene");
+ VERIFY3S(ret, ==, -1);
+ VERIFY3S(errno, ==, ENOSPC);
+
+ ret = linkat(fd, argv[2], fd, "Urania", 0);
+ VERIFY3S(ret, ==, -1);
+ VERIFY3S(errno, ==, ENOSPC);
+
+ /*
+ * Make sure we can't create open extended attributes.
+ */
+ ret = openat(fd, "Lethe", O_RDWR | O_CREAT | O_XATTR);
+ VERIFY3S(ret, ==, -1);
+ VERIFY3S(errno, ==, ENOSPC);
+
+ return (0);
+}
diff --git a/usr/src/uts/common/fs/tmpfs/tmp_dir.c b/usr/src/uts/common/fs/tmpfs/tmp_dir.c
index 387cc6ae54..1a620642cc 100644
--- a/usr/src/uts/common/fs/tmpfs/tmp_dir.c
+++ b/usr/src/uts/common/fs/tmpfs/tmp_dir.c
@@ -21,10 +21,9 @@
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2016 Joyent, Inc.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <sys/types.h>
#include <sys/param.h>
#include <sys/sysmacros.h>
@@ -383,20 +382,7 @@ tdirenter(
/*
* Unmake the inode we just made.
*/
- rw_enter(&tp->tn_rwlock, RW_WRITER);
- if ((tp->tn_type) == VDIR) {
- ASSERT(tdp == NULL);
- /*
- * cleanup allocs made by tdirinit()
- */
- tdirtrunc(tp);
- }
- mutex_enter(&tp->tn_tlock);
- tp->tn_nlink = 0;
- mutex_exit(&tp->tn_tlock);
- gethrestime(&tp->tn_ctime);
- rw_exit(&tp->tn_rwlock);
- tmpnode_rele(tp);
+ tmpnode_cleanup(tp);
tp = NULL;
}
} else if (tpp) {
@@ -431,6 +417,7 @@ tdirdelete(
enum dr_op op,
struct cred *cred)
{
+ struct tmount *tm;
struct tdirent *tpdp;
int error;
size_t namelen;
@@ -516,7 +503,8 @@ tdirdelete(
*/
namelen = strlen(tpdp->td_name) + 1;
- kmem_free(tpdp, sizeof (struct tdirent) + namelen);
+ tm = TNTOTM(dir);
+ tmp_kmem_free(tm, tpdp, sizeof (struct tdirent) + namelen);
dir->tn_size -= (sizeof (struct tdirent) + namelen);
dir->tn_dirents--;
@@ -538,19 +526,27 @@ tdirdelete(
* tdirinit is used internally to initialize a directory (dir)
* with '.' and '..' entries without checking permissions and locking
*/
-void
+int
tdirinit(
struct tmpnode *parent, /* parent of directory to initialize */
struct tmpnode *dir) /* the new directory */
{
+ struct tmount *tm;
struct tdirent *dot, *dotdot;
timestruc_t now;
ASSERT(RW_WRITE_HELD(&parent->tn_rwlock));
ASSERT(dir->tn_type == VDIR);
- dot = kmem_zalloc(sizeof (struct tdirent) + 2, KM_SLEEP);
- dotdot = kmem_zalloc(sizeof (struct tdirent) + 3, KM_SLEEP);
+ tm = TNTOTM(parent);
+ dot = tmp_kmem_zalloc(tm, sizeof (struct tdirent) + 2, KM_SLEEP);
+ if (dot == NULL)
+ return (ENOSPC);
+ dotdot = tmp_kmem_zalloc(tm, sizeof (struct tdirent) + 3, KM_SLEEP);
+ if (dotdot == NULL) {
+ tmp_kmem_free(tm, dot, sizeof (struct tdirent) + 2);
+ return (ENOSPC);
+ }
/*
* Initialize the entries
@@ -601,6 +597,8 @@ tdirinit(
dir->tn_size = 2 * sizeof (struct tdirent) + 5; /* dot and dotdot */
dir->tn_dirents = 2;
dir->tn_nlink = 2;
+
+ return (0);
}
@@ -612,6 +610,7 @@ tdirtrunc(struct tmpnode *dir)
{
struct tdirent *tdp;
struct tmpnode *tp;
+ struct tmount *tm;
size_t namelen;
timestruc_t now;
int isvattrdir, isdotdot, skip_decr;
@@ -619,6 +618,8 @@ tdirtrunc(struct tmpnode *dir)
ASSERT(RW_WRITE_HELD(&dir->tn_rwlock));
ASSERT(dir->tn_type == VDIR);
+ tm = TNTOTM(dir);
+
isvattrdir = (dir->tn_vnode->v_flag & V_XATTRDIR) ? 1 : 0;
for (tdp = dir->tn_dir; tdp; tdp = dir->tn_dir) {
ASSERT(tdp->td_next != tdp);
@@ -650,7 +651,7 @@ tdirtrunc(struct tmpnode *dir)
tmpfs_hash_out(tdp);
- kmem_free(tdp, sizeof (struct tdirent) + namelen);
+ tmp_kmem_free(tm, tdp, sizeof (struct tdirent) + namelen);
dir->tn_size -= (sizeof (struct tdirent) + namelen);
dir->tn_dirents--;
}
@@ -903,6 +904,7 @@ tdiraddentry(
enum de_op op,
struct tmpnode *fromtp)
{
+ struct tmount *tm;
struct tdirent *tdp, *tpdp;
size_t namelen, alloc_size;
timestruc_t now;
@@ -923,9 +925,10 @@ tdiraddentry(
/*
* Allocate and initialize directory entry
*/
+ tm = TNTOTM(dir);
namelen = strlen(name) + 1;
alloc_size = namelen + sizeof (struct tdirent);
- tdp = kmem_zalloc(alloc_size, KM_NOSLEEP | KM_NORMALPRI);
+ tdp = tmp_kmem_zalloc(tm, alloc_size, KM_NOSLEEP | KM_NORMALPRI);
if (tdp == NULL)
return (ENOSPC);
@@ -1025,7 +1028,10 @@ tdirmaketnode(
((va->va_mask & AT_MTIME) && TIMESPEC_OVERFLOW(&va->va_mtime)))
return (EOVERFLOW);
type = va->va_type;
- tp = kmem_zalloc(sizeof (struct tmpnode), KM_SLEEP);
+ tp = tmp_kmem_zalloc(tm, sizeof (struct tmpnode), KM_SLEEP);
+ if (tp == NULL) {
+ return (ENOSPC);
+ }
tmpnode_init(tm, tp, va, cred);
/* setup normal file/dir's extended attribute directory */
@@ -1087,8 +1093,13 @@ tdirmaketnode(
if (va->va_mask & AT_MTIME)
tp->tn_mtime = va->va_mtime;
- if (op == DE_MKDIR)
- tdirinit(dir, tp);
+ if (op == DE_MKDIR) {
+ int ret;
+ if ((ret = tdirinit(dir, tp)) != 0) {
+ tmpnode_cleanup(tp);
+ return (ret);
+ }
+ }
*newnode = tp;
return (0);
diff --git a/usr/src/uts/common/fs/tmpfs/tmp_subr.c b/usr/src/uts/common/fs/tmpfs/tmp_subr.c
index e6e2b392fe..cea31fd3ff 100644
--- a/usr/src/uts/common/fs/tmpfs/tmp_subr.c
+++ b/usr/src/uts/common/fs/tmpfs/tmp_subr.c
@@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 1990, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2015 Joyent, Inc.
+ * Copyright 2016 Joyent, Inc.
*/
#include <sys/types.h>
@@ -43,6 +43,7 @@
#include <sys/fs/tmpnode.h>
#include <sys/ddi.h>
#include <sys/sunddi.h>
+#include <vm/anon.h>
#define KILOBYTE 1024
#define MEGABYTE (1024 * KILOBYTE)
@@ -54,6 +55,48 @@
extern pgcnt_t swapfs_minfree;
+void *
+tmp_kmem_zalloc(struct tmount *tm, size_t size, int flag)
+{
+ void *buf;
+ zone_t *zone;
+
+ zone = tm->tm_vfsp->vfs_zone;
+ mutex_enter(&tm->tm_contents);
+ if (tm->tm_anonmem + size > tm->tm_anonmax ||
+ tm->tm_anonmem + size < tm->tm_anonmem ||
+ size + ptob(tmpfs_minfree) <= size ||
+ !anon_checkspace(size + ptob(tmpfs_minfree), zone) ||
+ anon_try_resv_zone(size, zone) == 0) {
+ mutex_exit(&tm->tm_contents);
+ return (NULL);
+ }
+ tm->tm_anonmem += size;
+ mutex_exit(&tm->tm_contents);
+
+ buf = kmem_zalloc(size, flag);
+ if (buf == NULL) {
+ mutex_enter(&tm->tm_contents);
+ ASSERT(tm->tm_anonmem > tm->tm_anonmem - size);
+ tm->tm_anonmem -= size;
+ mutex_exit(&tm->tm_contents);
+ anon_unresv_zone(size, tm->tm_vfsp->vfs_zone);
+ }
+
+ return (buf);
+}
+
+void
+tmp_kmem_free(struct tmount *tm, void *buf, size_t size)
+{
+ kmem_free(buf, size);
+ mutex_enter(&tm->tm_contents);
+ ASSERT(tm->tm_anonmem > tm->tm_anonmem - size);
+ tm->tm_anonmem -= size;
+ mutex_exit(&tm->tm_contents);
+ anon_unresv_zone(size, tm->tm_vfsp->vfs_zone);
+}
+
int
tmp_taccess(void *vtp, int mode, struct cred *cred)
{
@@ -99,8 +142,8 @@ tmp_sticky_remove_access(struct tmpnode *dir, struct tmpnode *entry,
}
/*
- * Convert a string containing a number (number of bytes) to a pgcnt_t,
- * containing the corresponding number of pages. On 32-bit kernels, the
+ * Convert a string containing a number (number of bytes) to a size_t,
+ * containing the corresponding number of bytes. On 32-bit kernels, the
* maximum value encoded in 'str' is PAGESIZE * ULONG_MAX, while the value
* returned in 'maxpg' is at most ULONG_MAX.
*
@@ -118,14 +161,12 @@ tmp_sticky_remove_access(struct tmpnode *dir, struct tmpnode *entry,
* error.
*/
int
-tmp_convnum(char *str, pgcnt_t *maxpg)
+tmp_convnum(char *str, size_t *maxbytes)
{
u_longlong_t num = 0;
-#ifdef _LP64
- u_longlong_t max_bytes = ULONG_MAX;
-#else
- u_longlong_t max_bytes = PAGESIZE * (uint64_t)ULONG_MAX;
-#endif
+ u_longlong_t max_bytes = (uint64_t)SIZE_MAX;
+ size_t pages;
+
char *c;
const struct convchar {
char *cc_char;
@@ -215,14 +256,23 @@ valid_char:
}
done:
+
/*
- * Since btopr() rounds up to page granularity, this round-up can
- * cause an overflow only if 'num' is between (max_bytes - PAGESIZE)
- * and (max_bytes). In this case the resulting number is zero, which
- * is what we check for below.
+ * We've been given a size in bytes; however, we want to make sure that
+ * we have at least one page worth no matter what. Therefore we use
+ * btopr to round up. However, this may cause an overflow only if 'num'
+ * is between (max_bytes - PAGESIZE) and (max_bytes). In this case the
+ * resulting number is zero, which is what we check for below. Note, we
+ * require at least one page, so if pages is zero, well, it wasn't going
+ * to work anyways.
*/
- if ((*maxpg = (pgcnt_t)btopr(num)) == 0 && num != 0)
+ pages = btopr(num);
+ if (pages == 0) {
return (EINVAL);
+ }
+
+ *maxbytes = ptob(pages);
+
return (0);
}
diff --git a/usr/src/uts/common/fs/tmpfs/tmp_tnode.c b/usr/src/uts/common/fs/tmpfs/tmp_tnode.c
index 51e57b2611..91ff8bb7fa 100644
--- a/usr/src/uts/common/fs/tmpfs/tmp_tnode.c
+++ b/usr/src/uts/common/fs/tmpfs/tmp_tnode.c
@@ -21,6 +21,7 @@
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2016 Joyent, Inc.
*/
#include <sys/types.h>
@@ -64,21 +65,35 @@ tmp_resv(
int pagecreate) /* call anon_resv if set */
{
pgcnt_t pages = btopr(delta);
+ size_t pbytes = ptob(pages);
zone_t *zone;
ASSERT(RW_WRITE_HELD(&tp->tn_rwlock));
ASSERT(tp->tn_type == VREG);
+
/*
- * pagecreate is set only if we actually need to call anon_resv
- * to reserve an additional page of anonymous memory.
- * Since anon_resv always reserves a page at a time,
- * it should only get called when we know we're growing the
- * file into a new page or filling a hole.
+ * pagecreate is set only if we actually need to call anon_resv to
+ * reserve an additional page of anonymous memory. Since anon_resv
+ * always reserves a page at a time, it should only get called when we
+ * know we're growing the file into a new page or filling a hole. This
+ * is why we transform delta into a number of pages. However, because we
+ * track bytes and not pages, we convert that back to a number of bytes
+ * that we allocate against.
*
- * Deny if trying to reserve more than tmpfs can allocate
+ * Deny if trying to reserve more than tmpfs can allocate, the
+ * allocation causes an overflow, or the delta round up overflowed.
+ * Note, that btopr rounds up, so we need to catch the unsigned
+ * overflow. Note, rounding up when we are within a page of SIZE_MAX is
+ * done by adding a page, overflowing, which will then be rounded back
+ * to zero. Hence the following check.
*/
+ if (pages == 0 && delta != 0)
+ return (1);
+
zone = tm->tm_vfsp->vfs_zone;
- if (pagecreate && ((tm->tm_anonmem + pages > tm->tm_anonmax) ||
+ if (pagecreate && ((tm->tm_anonmem + pbytes > tm->tm_anonmax) ||
+ (tm->tm_anonmem + pbytes < tm->tm_anonmem) ||
+ (ptob(pages + tmpfs_minfree) <= pbytes) ||
(!anon_checkspace(ptob(pages + tmpfs_minfree), zone)) ||
(anon_try_resv_zone(delta, zone) == 0))) {
return (1);
@@ -89,7 +104,7 @@ tmp_resv(
*/
if (pagecreate) {
mutex_enter(&tm->tm_contents);
- tm->tm_anonmem += pages;
+ tm->tm_anonmem += pbytes;
mutex_exit(&tm->tm_contents);
TRACE_2(TR_FAC_VM, TR_ANON_TMPFS, "anon tmpfs:%p %lu",
@@ -110,13 +125,27 @@ tmp_unresv(
struct tmpnode *tp,
size_t delta)
{
+ size_t pages, pbytes;
+
ASSERT(RW_WRITE_HELD(&tp->tn_rwlock));
ASSERT(tp->tn_type == VREG);
+ /*
+ * If this is true, we have a grevious overflow bug and some size
+ * accounting has been messed with as having an amount to truncate at
+ * this size would imply that all of memory was used for this file. No
+ * matter how small the kernel, it will always need at least one page.
+ */
+ pages = btopr(delta);
+ if (pages == 0 && delta != 0)
+ panic("tmpfs unsigned overflow detected");
+ pbytes = ptob(pages);
+
anon_unresv_zone(delta, tm->tm_vfsp->vfs_zone);
mutex_enter(&tm->tm_contents);
- tm->tm_anonmem -= btopr(delta);
+ ASSERT(tm->tm_anonmem > tm->tm_anonmem - pbytes);
+ tm->tm_anonmem -= pbytes;
mutex_exit(&tm->tm_contents);
TRACE_2(TR_FAC_VM, TR_ANON_TMPFS, "anon tmpfs:%p %lu", tp, delta);
@@ -154,6 +183,26 @@ tmpnode_growmap(struct tmpnode *tp, ulong_t newsize)
}
/*
+ * This is used to clean up a tmpnode that hasn't made it out the door. In other
+ * words, we allocated it and did a tmpnode_init; however, before it could get
+ * fully inserted into a directory, bad things happened and it failed.
+ */
+void
+tmpnode_cleanup(struct tmpnode *tp)
+{
+ rw_enter(&tp->tn_rwlock, RW_WRITER);
+ if ((tp->tn_type) == VDIR) {
+ tdirtrunc(tp);
+ }
+ mutex_enter(&tp->tn_tlock);
+ tp->tn_nlink = 0;
+ mutex_exit(&tp->tn_tlock);
+ gethrestime(&tp->tn_ctime);
+ rw_exit(&tp->tn_rwlock);
+ tmpnode_rele(tp);
+}
+
+/*
* Initialize a tmpnode and add it to file list under mount point.
*/
void
diff --git a/usr/src/uts/common/fs/tmpfs/tmp_vfsops.c b/usr/src/uts/common/fs/tmpfs/tmp_vfsops.c
index 3c088c442c..3d73364bcd 100644
--- a/usr/src/uts/common/fs/tmpfs/tmp_vfsops.c
+++ b/usr/src/uts/common/fs/tmpfs/tmp_vfsops.c
@@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 1990, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2015 Joyent, Inc.
+ * Copyright 2016 Joyent, Inc.
*/
#include <sys/types.h>
@@ -237,7 +237,7 @@ tmp_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr)
struct tmpnode *tp;
struct pathname dpn;
int error;
- pgcnt_t anonmax;
+ size_t anonmax;
struct vattr rattr;
int got_attrs;
boolean_t mode_arg = B_FALSE;
@@ -281,7 +281,7 @@ tmp_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr)
if ((error = tmp_convnum(argstr, &anonmax)) != 0)
goto out;
} else {
- anonmax = ULONG_MAX;
+ anonmax = SIZE_MAX;
}
/*
@@ -357,7 +357,17 @@ tmp_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr)
rattr.va_mode = (mode_t)(S_IFDIR | root_mode);
rattr.va_type = VDIR;
rattr.va_rdev = 0;
- tp = kmem_zalloc(sizeof (struct tmpnode), KM_SLEEP);
+ tp = tmp_kmem_zalloc(tm, sizeof (struct tmpnode), KM_SLEEP);
+ if (tp == NULL) {
+ kmem_free(tm->tm_mntpath, strlen(tm->tm_mntpath) + 1);
+ mutex_destroy(&tm->tm_contents);
+ mutex_destroy(&tm->tm_renamelck);
+ kmem_free(tm, sizeof (struct tmount));
+
+ pn_free(&dpn);
+ error = ENOMEM;
+ goto out;
+ }
tmpnode_init(tm, tp, &rattr, cr);
/*
@@ -396,7 +406,28 @@ tmp_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr)
tp->tn_nlink = 0;
tm->tm_rootnode = tp;
- tdirinit(tp, tp);
+ if (tdirinit(tp, tp) != 0) {
+ /*
+ * While we would normally let our VOP_INACTIVE function take
+ * care of cleaning up here, we're in a bit of a delicate
+ * situation, so we do so manually. While it's tempting to try
+ * and rely upon tmpfs_freevfs() and others, it's probably safer
+ * for the time to do this manually at the cost of duplication.
+ */
+ vn_invalid(TNTOV(tp));
+ rw_destroy(&tp->tn_rwlock);
+ mutex_destroy(&tp->tn_tlock);
+ vn_free(TNTOV(tp));
+ tmp_kmem_free(tm, tp, sizeof (struct tmpnode));
+
+ kmem_free(tm->tm_mntpath, strlen(tm->tm_mntpath) + 1);
+ mutex_destroy(&tm->tm_contents);
+ mutex_destroy(&tm->tm_renamelck);
+ kmem_free(tm, sizeof (struct tmount));
+ pn_free(&dpn);
+ error = ENOMEM;
+ goto out;
+ }
rw_exit(&tp->tn_rwlock);
diff --git a/usr/src/uts/common/fs/tmpfs/tmp_vnops.c b/usr/src/uts/common/fs/tmpfs/tmp_vnops.c
index 98951ed751..9087454e32 100644
--- a/usr/src/uts/common/fs/tmpfs/tmp_vnops.c
+++ b/usr/src/uts/common/fs/tmpfs/tmp_vnops.c
@@ -25,7 +25,7 @@
*/
/*
- * Copyright 2016, Joyent, Inc.
+ * Copyright 2016 Joyent, Inc.
* Copyright 2015 Nexenta Systems, Inc. All rights reserved.
* Copyright 2016 RackTop Systems.
*/
@@ -868,6 +868,8 @@ tmp_lookup(
rw_enter(&tp->tn_rwlock, RW_WRITER);
if (tp->tn_xattrdp == NULL) {
+ int err;
+
if (!(flags & CREATE_XATTR_DIR)) {
rw_exit(&tp->tn_rwlock);
return (ENOENT);
@@ -888,8 +890,13 @@ tmp_lookup(
return (error);
}
- xdp = kmem_zalloc(sizeof (struct tmpnode), KM_SLEEP);
tm = VTOTM(dvp);
+ xdp = tmp_kmem_zalloc(tm, sizeof (struct tmpnode),
+ KM_SLEEP);
+ if (xdp == NULL) {
+ rw_exit(&tp->tn_rwlock);
+ return (ENOSPC);
+ }
tmpnode_init(tm, xdp, &tp->tn_attr, NULL);
/*
* Fix-up fields unique to attribute directories.
@@ -907,7 +914,16 @@ tmp_lookup(
}
xdp->tn_vnode->v_type = VDIR;
xdp->tn_vnode->v_flag |= V_XATTRDIR;
- tdirinit(tp, xdp);
+ if ((err = tdirinit(tp, xdp)) != 0) {
+ rw_exit(&tp->tn_rwlock);
+ /*
+ * This never got properly initialized so we can
+ * just clean it up.
+ */
+ xdp->tn_vnode->v_flag &= V_XATTRDIR;
+ tmpnode_cleanup(tp);
+ return (err);
+ }
tp->tn_xattrdp = xdp;
} else {
VN_HOLD(tp->tn_xattrdp->tn_vnode);
@@ -1626,12 +1642,12 @@ tmp_symlink(
rw_exit(&parent->tn_rwlock);
if (error) {
- if (self)
+ if (self != NULL)
tmpnode_rele(self);
return (error);
}
len = strlen(tnm) + 1;
- cp = kmem_alloc(len, KM_NOSLEEP | KM_NORMALPRI);
+ cp = tmp_kmem_zalloc(tm, len, KM_NOSLEEP | KM_NORMALPRI);
if (cp == NULL) {
tmpnode_rele(self);
return (ENOSPC);
@@ -1741,7 +1757,7 @@ top:
goto top;
}
if (tp->tn_type == VLNK)
- kmem_free(tp->tn_symlink, tp->tn_size + 1);
+ tmp_kmem_free(tm, tp->tn_symlink, tp->tn_size + 1);
}
/*
@@ -1775,7 +1791,7 @@ top:
rw_destroy(&tp->tn_rwlock);
mutex_destroy(&tp->tn_tlock);
vn_free(TNTOV(tp));
- kmem_free(tp, sizeof (struct tmpnode));
+ tmp_kmem_free(tm, tp, sizeof (struct tmpnode));
/* If the filesystem was umounted by force, rele the vfs ref */
if (tm->tm_vfsp->vfs_flag & VFS_UNMOUNTED)
diff --git a/usr/src/uts/common/sys/fs/tmp.h b/usr/src/uts/common/sys/fs/tmp.h
index f8740e8873..bd5a0c1f4f 100644
--- a/usr/src/uts/common/sys/fs/tmp.h
+++ b/usr/src/uts/common/sys/fs/tmp.h
@@ -23,7 +23,7 @@
* All rights reserved. Use is subject to license terms.
*/
/*
- * Copyright 2015 Joyent, Inc.
+ * Copyright 2016 Joyent, Inc.
*/
#ifndef _SYS_FS_TMP_H
@@ -43,8 +43,9 @@ struct tmount {
struct vfs *tm_vfsp; /* filesystem's vfs struct */
struct tmpnode *tm_rootnode; /* root tmpnode */
char *tm_mntpath; /* name of tmpfs mount point */
- ulong_t tm_anonmax; /* file system max anon reservation */
- pgcnt_t tm_anonmem; /* pages of reserved anon memory */
+ size_t tm_anonmax; /* file system max anon reservation */
+ size_t tm_anonmem; /* bytes of reserved anon memory */
+ /* and allocated kmem for the fs */
dev_t tm_dev; /* unique dev # of mounted `device' */
uint_t tm_gen; /* pseudo generation number for files */
kmutex_t tm_contents; /* lock for tmount structure */
@@ -58,6 +59,7 @@ struct tmount {
#define VTOTM(vp) ((struct tmount *)(vp)->v_vfsp->vfs_data)
#define VTOTN(vp) ((struct tmpnode *)(vp)->v_data)
#define TNTOV(tp) ((tp)->tn_vnode)
+#define TNTOTM(tp) (VTOTM(TNTOV(tp)))
#define tmpnode_hold(tp) VN_HOLD(TNTOV(tp))
#define tmpnode_rele(tp) VN_RELE(TNTOV(tp))
@@ -93,24 +95,28 @@ extern size_t tmpfs_minfree; /* Anonymous memory in pages */
extern void tmpnode_init(struct tmount *, struct tmpnode *,
struct vattr *, struct cred *);
+extern void tmpnode_cleanup(struct tmpnode *tp);
extern int tmpnode_trunc(struct tmount *, struct tmpnode *, ulong_t);
extern void tmpnode_growmap(struct tmpnode *, ulong_t);
extern int tdirlookup(struct tmpnode *, char *, struct tmpnode **,
struct cred *);
extern int tdirdelete(struct tmpnode *, struct tmpnode *, char *,
enum dr_op, struct cred *);
-extern void tdirinit(struct tmpnode *, struct tmpnode *);
+extern int tdirinit(struct tmpnode *, struct tmpnode *);
extern void tdirtrunc(struct tmpnode *);
extern int tmp_resv(struct tmount *, struct tmpnode *, size_t, int);
extern int tmp_taccess(void *, int, struct cred *);
extern int tmp_sticky_remove_access(struct tmpnode *, struct tmpnode *,
struct cred *);
-extern int tmp_convnum(char *, pgcnt_t *);
+extern int tmp_convnum(char *, size_t *);
extern int tmp_convmode(char *, mode_t *);
extern int tdirenter(struct tmount *, struct tmpnode *, char *,
enum de_op, struct tmpnode *, struct tmpnode *, struct vattr *,
struct tmpnode **, struct cred *, caller_context_t *);
+extern void *tmp_kmem_zalloc(struct tmount *, size_t, int);
+extern void tmp_kmem_free(struct tmount *, void *, size_t);
+
#define TMP_MUSTHAVE 0x01
#ifdef __cplusplus