summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJason King <jasonbking@users.noreply.github.com>2020-06-03 15:03:49 -0500
committerGitHub <noreply@github.com>2020-06-03 15:03:49 -0500
commit71b43f2a12f58ef8bc5a1965a3b742749bb49231 (patch)
tree4369300a9f43681a8ea759142d919982a0db2a31
parent1522aad90cd52cad5f5eb1fae5521a9630ae60f0 (diff)
downloadillumos-joyent-release-20200604.tar.gz
OS-8054 inotify watches lead to EBUSY during zfs mount (#305)release-20200604
Portions contributed by: Mike Gerdts <mike.gerdts@joyent.com> Reviewed by: John Levon <john.levon@joyent.com> Reviewed by: Dan McDonald <danmcd@joyent.com> Approved by: Dan McDonald <danmcd@joyent.com>
-rw-r--r--usr/src/pkg/manifests/system-test-zfstest.mf7
-rw-r--r--usr/src/test/zfs-tests/cmd/watch_dir/Makefile19
-rw-r--r--usr/src/test/zfs-tests/cmd/watch_dir/watch_dir.c151
-rw-r--r--usr/src/test/zfs-tests/include/commands.cfg3
-rw-r--r--usr/src/test/zfs-tests/runfiles/smartos.run3
-rw-r--r--usr/src/test/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_watched_inotify.ksh74
-rw-r--r--usr/src/test/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_watched_portfs.ksh74
-rw-r--r--usr/src/uts/common/fs/portfs/port_fop.c101
-rw-r--r--usr/src/uts/common/fs/vnode.c35
-rw-r--r--usr/src/uts/common/fs/zfs/zfs_vfsops.c4
-rw-r--r--usr/src/uts/common/io/inotify.c18
-rw-r--r--usr/src/uts/common/sys/vnode.h46
12 files changed, 471 insertions, 64 deletions
diff --git a/usr/src/pkg/manifests/system-test-zfstest.mf b/usr/src/pkg/manifests/system-test-zfstest.mf
index 0f05230a9b..9f78c29bb5 100644
--- a/usr/src/pkg/manifests/system-test-zfstest.mf
+++ b/usr/src/pkg/manifests/system-test-zfstest.mf
@@ -203,6 +203,7 @@ file path=opt/zfs-tests/bin/randwritecomp mode=0555
file path=opt/zfs-tests/bin/readmmap mode=0555
file path=opt/zfs-tests/bin/rename_dir mode=0555
file path=opt/zfs-tests/bin/rm_lnkcnt_zero_file mode=0555
+file path=opt/zfs-tests/bin/watch_dir mode=0555
file path=opt/zfs-tests/bin/zfstest mode=0555
file path=opt/zfs-tests/callbacks/zfs_dbgmsg mode=0555
file path=opt/zfs-tests/callbacks/zfs_mmp mode=0555
@@ -1027,6 +1028,12 @@ file \
file \
path=opt/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_encrypted \
mode=0555
+file \
+ path=opt/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_watched_inotify \
+ mode=0555
+file \
+ path=opt/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_watched_portfs \
+ mode=0555
file path=opt/zfs-tests/tests/functional/cli_root/zfs_program/cleanup \
mode=0555
file path=opt/zfs-tests/tests/functional/cli_root/zfs_program/setup mode=0555
diff --git a/usr/src/test/zfs-tests/cmd/watch_dir/Makefile b/usr/src/test/zfs-tests/cmd/watch_dir/Makefile
new file mode 100644
index 0000000000..517dff64af
--- /dev/null
+++ b/usr/src/test/zfs-tests/cmd/watch_dir/Makefile
@@ -0,0 +1,19 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2020 Joyent, Inc.
+#
+
+PROG = watch_dir
+
+include $(SRC)/cmd/Makefile.cmd
+include ../Makefile.subdirs
diff --git a/usr/src/test/zfs-tests/cmd/watch_dir/watch_dir.c b/usr/src/test/zfs-tests/cmd/watch_dir/watch_dir.c
new file mode 100644
index 0000000000..f6d8445162
--- /dev/null
+++ b/usr/src/test/zfs-tests/cmd/watch_dir/watch_dir.c
@@ -0,0 +1,151 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2020 Joyent, Inc.
+ */
+
+/*
+ * This program watches a directory with portfs or inotify, exiting when the
+ * directory is removed. It is useful in tests that ensure that watching a
+ * directory does not prevent it from being used as a mount point.
+ */
+#include <limits.h>
+#include <port.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/inotify.h>
+#include <unistd.h>
+
+void
+fail_usage(void)
+{
+ (void) fprintf(stderr, "Usage: watch <portfs|inotify> directory\n");
+ exit(1);
+}
+
+#define MAX_PES 8
+
+void
+watch_port(const char *path)
+{
+ int port;
+ struct file_obj fobj = {0};
+
+ if ((port = port_create()) < 0) {
+ perror("port_create");
+ exit(1);
+ }
+
+ fobj.fo_name = (char *)path;
+ for (;;) {
+ timespec_t ts = {300, 0};
+ port_event_t pe;
+
+ if (port_associate(port, PORT_SOURCE_FILE, (uintptr_t)&fobj,
+ 0, (char *)path) != 0) {
+ perror("port_associate");
+ exit(1);
+ }
+
+ if (port_get(port, &pe, &ts) != 0) {
+ perror("port_get");
+ exit(1);
+ }
+
+ if (pe.portev_events & FILE_DELETE) {
+ (void) printf("DELETE\t%s\n", path);
+ exit(0);
+ }
+ if (pe.portev_events & MOUNTEDOVER) {
+ (void) printf("MOUNTEDOVER\t%s\n", path);
+ }
+ }
+}
+
+void
+watch_inotify(const char *path)
+{
+ int in, wd;
+ struct inotify_event ev;
+
+ if ((in = inotify_init()) < 0) {
+ perror("inotify_init");
+ exit(1);
+ }
+ if ((wd = inotify_add_watch(in, path, IN_DELETE_SELF)) == -1) {
+ perror("inotify_add_watch");
+ exit(1);
+ }
+
+ for (;;) {
+ ssize_t cnt;
+ char evpath[PATH_MAX];
+
+ cnt = read(in, &ev, sizeof (ev));
+ if (cnt != sizeof (ev)) {
+ (void) fprintf(stderr,
+ "read: expected %ld bytes got %ld\n",
+ sizeof (ev), cnt);
+ exit(1);
+ }
+ if (ev.len != 0) {
+ if (ev.len > sizeof (evpath)) {
+ (void) fprintf(stderr, "read: oversize "
+ "path (%u bytes)\n", ev.len);
+ exit(1);
+ }
+ cnt = read(in, evpath, ev.len);
+ if (cnt != ev.len) {
+ (void) fprintf(stderr, "read: expected %ld "
+ "bytes for path, got %ld\n", ev.len, cnt);
+ exit(1);
+ }
+ evpath[ev.len - 1] = '\0';
+ } else {
+ evpath[0] = '\0';
+ }
+ if (ev.mask & IN_DELETE_SELF) {
+ /*
+ * IN_DELETE_SELF events don't appear to include
+ * the path in the event.
+ */
+ (void) printf("DELETE_SELF\n");
+ exit(0);
+ } else {
+ (void) printf("EVENT_%08x\t%s\n", ev.mask, evpath);
+ }
+
+ }
+}
+
+int
+main(int argc, char **argv)
+{
+ const char *watcher, *path;
+
+ if (argc != 3) {
+ fail_usage();
+ }
+ watcher = argv[1];
+ path = argv[2];
+
+ if (strcmp(watcher, "portfs") == 0) {
+ watch_port(path);
+ } else if (strcmp(watcher, "inotify") == 0) {
+ watch_inotify(path);
+ } else {
+ fail_usage();
+ }
+
+ return (0);
+}
diff --git a/usr/src/test/zfs-tests/include/commands.cfg b/usr/src/test/zfs-tests/include/commands.cfg
index f9b0bdf7ac..0ad892e2b2 100644
--- a/usr/src/test/zfs-tests/include/commands.cfg
+++ b/usr/src/test/zfs-tests/include/commands.cfg
@@ -196,4 +196,5 @@ export ZFSTEST_FILES='chg_usr_exec
randwritecomp
readmmap
rename_dir
- rm_lnkcnt_zero_file'
+ rm_lnkcnt_zero_file
+ watch_dir'
diff --git a/usr/src/test/zfs-tests/runfiles/smartos.run b/usr/src/test/zfs-tests/runfiles/smartos.run
index 47059b0b52..18e819a301 100644
--- a/usr/src/test/zfs-tests/runfiles/smartos.run
+++ b/usr/src/test/zfs-tests/runfiles/smartos.run
@@ -130,7 +130,8 @@ tests = ['zfs_mount_001_pos', 'zfs_mount_002_pos', 'zfs_mount_003_pos',
'zfs_mount_007_pos', 'zfs_mount_008_pos', 'zfs_mount_009_neg',
'zfs_mount_010_neg', 'zfs_mount_011_neg', 'zfs_mount_012_neg',
'zfs_mount_all_001_pos', 'zfs_mount_all_fail', 'zfs_mount_all_mountpoints',
- 'zfs_mount_encrypted']
+ 'zfs_mount_encrypted', 'zfs_mount_watched_inotify',
+ 'zfs_mount_watched_portfs' ]
[/opt/zfs-tests/tests/functional/cli_root/zfs_program]
tests = ['zfs_program_json']
diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_watched_inotify.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_watched_inotify.ksh
new file mode 100644
index 0000000000..0f181621e8
--- /dev/null
+++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_watched_inotify.ksh
@@ -0,0 +1,74 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2020 Joyent, Inc.
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+#
+# DESCRIPTION:
+# 'zfs mount' should not get EBUSY due to inotify(5) watching a directory
+#
+# STRATEGY:
+# 1. Create a directory
+# 2. Start watching the directory with inotify(5).
+# 3. Create a filesystem
+# 4. Mount the filesystem at the directory created in step 1
+# 5. Destroy the filesystem
+# 6. Remove the directory
+# 7. Verify the watcher saw the directory removal
+#
+
+verify_runnable "both"
+
+function cleanup
+{
+ datasetexists $TESTPOOL/$TESTFS1 && \
+ log_must zfs destroy -f $TESTPOOL/$TESTFS1
+ log_must rm -rf "$TESTDIR/mntpt" "$TESTDIR/watch_dir.log"
+}
+
+log_onexit cleanup
+
+log_assert "'zfs mount' should not get EBUSY due to inotify(5) watching a directory"
+
+# 1. Create a directory.
+log_must mkdir -p "$TESTDIR/mntpt"
+
+# 2. Start watching the directory with inotify(5).
+watch_dir inotify $TESTDIR/mntpt > $TESTDIR/watch_dir.log &
+
+# 3. Create a filesystem
+log_must zfs create $TESTPOOL/$TESTFS1
+
+# 4. Mount the file system at the directory created in step 1
+log_must zfs set mountpoint=$TESTDIR/mntpt $TESTPOOL/$TESTFS1
+
+# 5. Destroy the filesystem
+log_must zfs destroy $TESTPOOL/$TESTFS1
+
+# 6. Remove the directory. The corresponding inotify event will cause the
+# watcher to exit.
+log_must rmdir $TESTDIR/mntpt
+
+# 7. Verify the watcher saw the directory removal. This ensures that the watcher
+# was watching the directory we are interested in.
+wait
+log_must grep -q DELETE_SELF $TESTDIR/watch_dir.log
+
+log_pass "'zfs mount' should not get EBUSY due to inotify(5) watching a directory"
diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_watched_portfs.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_watched_portfs.ksh
new file mode 100644
index 0000000000..3135203973
--- /dev/null
+++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_watched_portfs.ksh
@@ -0,0 +1,74 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2020 Joyent, Inc.
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+#
+# DESCRIPTION:
+# 'zfs mount' should not get EBUSY due to portfs watching a directory
+#
+# STRATEGY:
+# 1. Create a directory
+# 2. Start watching the directory with port_associate
+# 3. Create a filesystem
+# 4. Mount the filesystem at the directory created in step 1
+# 5. Destroy the filesystem
+# 6. Remove the directory
+# 7. Verify the watcher saw the directory removal
+#
+
+verify_runnable "both"
+
+function cleanup
+{
+ datasetexists $TESTPOOL/$TESTFS1 && \
+ log_must zfs destroy -f $TESTPOOL/$TESTFS1
+ log_must rm -rf "$TESTDIR/mntpt" "$TESTDIR/watch_dir.log"
+}
+
+log_onexit cleanup
+
+log_assert "'zfs mount' should not get EBUSY due to portfs watching a directory"
+
+# 1. Create a directory.
+log_must mkdir -p "$TESTDIR/mntpt"
+
+# 2. Start watching the directory with port_associate
+watch_dir portfs $TESTDIR/mntpt > $TESTDIR/watch_dir.log &
+
+# 3. Create a filesystem
+log_must zfs create $TESTPOOL/$TESTFS1
+
+# 4. Mount the file system at the directory created in step 1
+log_must zfs set mountpoint=$TESTDIR/mntpt $TESTPOOL/$TESTFS1
+
+# 5. Destroy the filesystem
+log_must zfs destroy $TESTPOOL/$TESTFS1
+
+# 6. Remove the directory. The corresponding portfs event will cause the
+# watcher to exit.
+log_must rmdir $TESTDIR/mntpt
+
+# 7. Verify the watcher saw the directory removal. This ensures that the watcher
+# was watching the directory we are interested in.
+wait
+log_must grep -q DELETE.$TESTDIR/mntpt $TESTDIR/watch_dir.log
+
+log_pass "'zfs mount' should not get EBUSY due to portfs watching a directory"
diff --git a/usr/src/uts/common/fs/portfs/port_fop.c b/usr/src/uts/common/fs/portfs/port_fop.c
index 019de0540a..252ff2d9ac 100644
--- a/usr/src/uts/common/fs/portfs/port_fop.c
+++ b/usr/src/uts/common/fs/portfs/port_fop.c
@@ -24,7 +24,7 @@
*/
/*
- * Copyright (c) 2018, Joyent, Inc.
+ * Copyright 2020 Joyent, Inc.
*/
/*
@@ -257,7 +257,7 @@ const fs_operation_def_t port_vnodesrc_template[] = {
VOPNAME_READ, { .femop_read = port_fop_read },
VOPNAME_WRITE, { .femop_write = port_fop_write },
VOPNAME_MAP, { .femop_map = port_fop_map },
- VOPNAME_SETATTR, { .femop_setattr = port_fop_setattr },
+ VOPNAME_SETATTR, { .femop_setattr = port_fop_setattr },
VOPNAME_CREATE, { .femop_create = port_fop_create },
VOPNAME_REMOVE, { .femop_remove = port_fop_remove },
VOPNAME_LINK, { .femop_link = port_fop_link },
@@ -266,7 +266,7 @@ const fs_operation_def_t port_vnodesrc_template[] = {
VOPNAME_RMDIR, { .femop_rmdir = port_fop_rmdir },
VOPNAME_READDIR, { .femop_readdir = port_fop_readdir },
VOPNAME_SYMLINK, { .femop_symlink = port_fop_symlink },
- VOPNAME_SETSECATTR, { .femop_setsecattr = port_fop_setsecattr },
+ VOPNAME_SETSECATTR, { .femop_setsecattr = port_fop_setsecattr },
VOPNAME_VNEVENT, { .femop_vnevent = port_fop_vnevent },
NULL, NULL
};
@@ -275,7 +275,7 @@ const fs_operation_def_t port_vnodesrc_template[] = {
* Fsem - vfs ops hooks
*/
const fs_operation_def_t port_vfssrc_template[] = {
- VFSNAME_UNMOUNT, { .fsemop_unmount = port_fop_unmount },
+ VFSNAME_UNMOUNT, { .fsemop_unmount = port_fop_unmount },
NULL, NULL
};
@@ -539,14 +539,14 @@ port_fop_trimpfplist(vnode_t *vp)
port_pcache_remove_fop(pfcp, pfp);
mutex_exit(&pfcp->pfc_lock);
if (tdvp != NULL)
- VN_RELE(tdvp);
+ VN_PHANTOM_RELE(tdvp);
}
}
}
/*
* This routine returns 1, if the vnode can be rele'ed by the caller.
- * The caller has to VN_RELE the vnode with out holding any
+ * The caller has to VN_PHANTOM_RELE the vnode with out holding any
* locks.
*/
int
@@ -616,7 +616,7 @@ port_fop_femuninstall(vnode_t *vp)
* able to remove it from the port's queue).
*
* vpp and dvpp will point to the vnode and directory vnode which the caller
- * is required to VN_RELE without holding any locks.
+ * is required to VN_PHANTOM_RELE without holding any locks.
*/
int
port_remove_fop(portfop_t *pfp, portfop_cache_t *pfcp, int cleanup,
@@ -726,12 +726,12 @@ port_cache_lookup_fop(portfop_cache_t *pfcp, pid_t pid, uintptr_t obj)
/*
* Given the file name, get the vnode and also the directory vnode
- * On return, the vnodes are held (VN_HOLD). The caller has to VN_RELE
- * the vnode(s).
+ * On return, the vnodes are held with phantom holds (VN_PHANTOM_HOLD). The
+ * caller has to VN_PHANTOM_RELE the vnode(s).
*/
int
port_fop_getdvp(void *objptr, vnode_t **vp, vnode_t **dvp,
- char **cname, int *len, int follow)
+ char **cname, int *len, int follow)
{
int error = 0;
struct pathname pn;
@@ -777,6 +777,17 @@ port_fop_getdvp(void *objptr, vnode_t **vp, vnode_t **dvp,
}
}
+ /* Trade VN_HOLD()s from lookuppn with VN_PHANTOM_HOLD()s */
+ if (dvp != NULL) {
+ VN_PHANTOM_HOLD(*dvp);
+ VN_RELE(*dvp);
+ }
+
+ if (vp != NULL) {
+ VN_PHANTOM_HOLD(*vp);
+ VN_RELE(*vp);
+ }
+
pn_free(&pn);
return (error);
}
@@ -815,7 +826,7 @@ port_getsrc(port_t *pp, int source)
*/
static void
port_check_timestamp(portfop_cache_t *pfcp, vnode_t *vp, vnode_t *dvp,
- portfop_t *pfp, void *objptr, uintptr_t object)
+ portfop_t *pfp, void *objptr, uintptr_t object)
{
vattr_t vatt;
portfop_vp_t *pvp = vp->v_fopdata;
@@ -1102,8 +1113,8 @@ port_install_fopdata(vnode_t *vp)
*/
int
port_pfp_setup(portfop_t **pfpp, port_t *pp, vnode_t *vp, portfop_cache_t *pfcp,
- uintptr_t object, int events, void *user, char *cname, int clen,
- vnode_t *dvp)
+ uintptr_t object, int events, void *user, char *cname, int clen,
+ vnode_t *dvp)
{
portfop_t *pfp = NULL;
port_kevent_t *pkevp;
@@ -1176,7 +1187,7 @@ port_pfp_setup(portfop_t **pfpp, port_t *pp, vnode_t *vp, portfop_cache_t *pfcp,
* Hold a reference to the vnode since
* we successfully installed the hooks.
*/
- VN_HOLD(vp);
+ VN_PHANTOM_HOLD(vp);
} else {
(void) fem_uninstall(vp, femp, vp);
pvp->pvp_femp = NULL;
@@ -1209,7 +1220,7 @@ port_pfp_setup(portfop_t **pfpp, port_t *pp, vnode_t *vp, portfop_cache_t *pfcp,
* Hold the directory vnode since we have a reference now.
*/
if (dvp != NULL)
- VN_HOLD(dvp);
+ VN_PHANTOM_HOLD(dvp);
*pfpp = pfp;
return (0);
}
@@ -1224,9 +1235,9 @@ port_resolve_vp(vnode_t *vp)
*/
if (vfs_mntdummyvp && mntfstype != 0 &&
vp->v_vfsp->vfs_fstype == mntfstype) {
- VN_RELE(vp);
+ VN_PHANTOM_RELE(vp);
vp = vfs_mntdummyvp;
- VN_HOLD(vfs_mntdummyvp);
+ VN_PHANTOM_HOLD(vfs_mntdummyvp);
}
/*
@@ -1234,8 +1245,8 @@ port_resolve_vp(vnode_t *vp)
* hardlinks.
*/
if ((VOP_REALVP(vp, &rvp, NULL) == 0) && vp != rvp) {
- VN_HOLD(rvp);
- VN_RELE(vp);
+ VN_PHANTOM_HOLD(rvp);
+ VN_PHANTOM_RELE(vp);
vp = rvp;
}
return (vp);
@@ -1247,10 +1258,10 @@ port_resolve_vp(vnode_t *vp)
* The association is identified by the object pointer and the pid.
* The events argument contains the events to be monitored for.
*
- * The vnode will have a VN_HOLD once the fem hooks are installed.
+ * The vnode will have a VN_PHANTOM_HOLD once the fem hooks are installed.
*
- * Every reference(pfp) to the directory vnode will have a VN_HOLD to ensure
- * that the directory vnode pointer does not change.
+ * Every reference(pfp) to the directory vnode will have a VN_PHANTOM_HOLD to
+ * ensure that the directory vnode pointer does not change.
*/
int
port_associate_fop(port_t *pp, int source, uintptr_t object, int events,
@@ -1330,7 +1341,7 @@ port_associate_fop(port_t *pp, int source, uintptr_t object, int events,
*/
if (dvp != NULL && dvp->v_vfsp != vp->v_vfsp &&
!(orig->v_type == VPROC && vp != NULL && vp->v_type != VPROC)) {
- VN_RELE(dvp);
+ VN_PHANTOM_RELE(dvp);
dvp = NULL;
}
@@ -1350,8 +1361,8 @@ port_associate_fop(port_t *pp, int source, uintptr_t object, int events,
pfp = port_cache_lookup_fop(pfcp, curproc->p_pid, object);
/*
- * If it is not the same vnode, just discard it. VN_RELE needs to be
- * called with no locks held, therefore save vnode pointers and
+ * If it is not the same vnode, just discard it. VN_PHANTOM_RELE needs
+ * to be called with no locks held, therefore save vnode pointers and
* vn_rele them later.
*/
if (pfp != NULL && (pfp->pfop_vp != vp || pfp->pfop_dvp != dvp)) {
@@ -1404,7 +1415,7 @@ port_associate_fop(port_t *pp, int source, uintptr_t object, int events,
* This vnode pointer is just used
* for comparison, so rele it
*/
- VN_RELE(tvp);
+ VN_PHANTOM_RELE(tvp);
}
}
@@ -1437,8 +1448,8 @@ port_associate_fop(port_t *pp, int source, uintptr_t object, int events,
* active and it is not being removed from
* the vnode list. This is checked in
* port_remove_fop with the vnode lock held.
- * The vnode returned is VN_RELE'ed after dropping
- * the locks.
+ * The vnode returned is VN_PHANTOM_RELE'ed after
+ * dropping the locks.
*/
tdvp = tvp = NULL;
if (port_remove_fop(pfp, pfcp, 0, NULL, &tvp, &tdvp)) {
@@ -1451,9 +1462,9 @@ port_associate_fop(port_t *pp, int source, uintptr_t object, int events,
}
mutex_exit(&pfcp->pfc_lock);
if (tvp != NULL)
- VN_RELE(tvp);
+ VN_PHANTOM_RELE(tvp);
if (tdvp != NULL)
- VN_RELE(tdvp);
+ VN_PHANTOM_RELE(tdvp);
goto errout;
}
} else {
@@ -1519,14 +1530,14 @@ errout:
* Release the hold acquired due to the lookup operation.
*/
if (vp != NULL)
- VN_RELE(vp);
+ VN_PHANTOM_RELE(vp);
if (dvp != NULL)
- VN_RELE(dvp);
+ VN_PHANTOM_RELE(dvp);
if (oldvp != NULL)
- VN_RELE(oldvp);
+ VN_PHANTOM_RELE(oldvp);
if (olddvp != NULL)
- VN_RELE(olddvp);
+ VN_PHANTOM_RELE(olddvp);
/*
* copied file name not used, free it.
@@ -1587,9 +1598,9 @@ port_dissociate_fop(port_t *pp, uintptr_t object)
(void) port_remove_fop(pfp, pfcp, 1, &active, &tvp, &tdvp);
mutex_exit(&pfcp->pfc_lock);
if (tvp != NULL)
- VN_RELE(tvp);
+ VN_PHANTOM_RELE(tvp);
if (tdvp != NULL)
- VN_RELE(tdvp);
+ VN_PHANTOM_RELE(tdvp);
return (active ? 0 : ENOENT);
}
@@ -1610,7 +1621,7 @@ port_close_fop(void *arg, int port, pid_t pid, int lastclose)
portfop_t *pfpnext;
int index, i;
port_source_t *pse;
- vnode_t *tdvp = NULL;
+ vnode_t *tdvp = NULL;
vnode_t *vpl[PORTFOP_NVP];
pse = port_getsrc(pp, PORT_SOURCE_FILE);
@@ -1627,7 +1638,7 @@ port_close_fop(void *arg, int port, pid_t pid, int lastclose)
* be possible as the port is being closed.
*
* The common case is that the port is not shared and all the entries
- * are of this pid and have to be freed. Since VN_RELE has to be
+ * are of this pid and have to be freed. Since VN_PHANTOM_RELE has to be
* called outside the lock, we do it in batches.
*/
hashtbl = (portfop_t **)pfcp->pfc_hash;
@@ -1654,14 +1665,14 @@ port_close_fop(void *arg, int port, pid_t pid, int lastclose)
if (pfp == NULL)
index++;
/*
- * Now call VN_RELE if we have collected enough vnodes or
- * we have reached the end of the hash table.
+ * Now call VN_PHANTOM_RELE if we have collected enough vnodes
+ * or we have reached the end of the hash table.
*/
if (i >= (PORTFOP_NVP - 1) ||
(i > 0 && index == PORTFOP_HASHSIZE)) {
mutex_exit(&pfcp->pfc_lock);
while (i > 0) {
- VN_RELE(vpl[--i]);
+ VN_PHANTOM_RELE(vpl[--i]);
vpl[i] = NULL;
}
mutex_enter(&pfcp->pfc_lock);
@@ -1769,7 +1780,7 @@ port_fop_excep(list_t *tlist, int op)
port_pcache_remove_fop(pfcp, pfp);
mutex_exit(&pfcp->pfc_lock);
if (tdvp != NULL)
- VN_RELE(tdvp);
+ VN_PHANTOM_RELE(tdvp);
}
}
@@ -1933,7 +1944,7 @@ port_fop_sendevent(vnode_t *vp, int events, vnode_t *dvp, char *cname)
* that may be attempting to remove an object from the vnode's.
*/
if (port_fop_femuninstall(vp))
- VN_RELE(vp);
+ VN_PHANTOM_RELE(vp);
/*
* Send exception events and discard the watch entries.
@@ -1980,7 +1991,7 @@ port_fop(vnode_t *vp, int op, int retval)
event |= FILE_TRUNC;
}
if (event) {
- port_fop_sendevent(vp, event, NULL, NULL);
+ port_fop_sendevent(vp, event, NULL, NULL);
}
}
@@ -2068,7 +2079,7 @@ port_fop_unmount(fsemarg_t *vf, int flag, cred_t *cr)
* unmount is in process.
*/
port_fop_sendevent(pvp->pvp_vp, UNMOUNTED, NULL, NULL);
- VN_RELE(pvp->pvp_vp);
+ VN_PHANTOM_RELE(pvp->pvp_vp);
}
error = vfsnext_unmount(vf, flag, cr);
diff --git a/usr/src/uts/common/fs/vnode.c b/usr/src/uts/common/fs/vnode.c
index 9e0b071999..c57884a940 100644
--- a/usr/src/uts/common/fs/vnode.c
+++ b/usr/src/uts/common/fs/vnode.c
@@ -21,7 +21,7 @@
/*
* Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2018, Joyent, Inc.
+ * Copyright 2020 Joyent, Inc.
* Copyright 2016 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2011, 2017 by Delphix. All rights reserved.
*/
@@ -852,6 +852,37 @@ vn_rele(vnode_t *vp)
mutex_exit(&vp->v_lock);
}
+void
+vn_phantom_rele(vnode_t *vp)
+{
+ VERIFY(vp->v_count > 0);
+
+ mutex_enter(&vp->v_lock);
+ VERIFY3U(vp->v_count, >=, vp->v_phantom_count);
+ vp->v_phantom_count--;
+ DTRACE_PROBE1(vn__phantom_rele, vnode_t *, vp);
+ if (vp->v_count == 1) {
+ ASSERT0(vp->v_phantom_count);
+ mutex_exit(&vp->v_lock);
+ VOP_INACTIVE(vp, CRED(), NULL);
+ return;
+ }
+ VN_RELE_LOCKED(vp);
+ mutex_exit(&vp->v_lock);
+}
+
+/*
+ * Return the number of non-phantom holds. Things such as portfs will use
+ * phantom holds to prevent it from blocking filesystems from mounting over
+ * watched directories.
+ */
+uint_t
+vn_count(vnode_t *vp)
+{
+ ASSERT(MUTEX_HELD(&vp->v_lock));
+ return (vp->v_count - vp->v_phantom_count);
+}
+
/*
* Release a vnode referenced by the DNLC. Multiple DNLC references are treated
* as a single reference, so v_count is not decremented until the last DNLC hold
@@ -2428,6 +2459,7 @@ vn_reinit(vnode_t *vp)
{
vp->v_count = 1;
vp->v_count_dnlc = 0;
+ vp->v_phantom_count = 0;
vp->v_vfsp = NULL;
vp->v_stream = NULL;
vp->v_vfsmountedhere = NULL;
@@ -2484,6 +2516,7 @@ vn_free(vnode_t *vp)
*/
ASSERT((vp->v_count == 0) || (vp->v_count == 1));
ASSERT(vp->v_count_dnlc == 0);
+ ASSERT0(vp->v_phantom_count);
VERIFY(vp->v_path != NULL);
if (vp->v_path != vn_vpath_empty) {
kmem_free(vp->v_path, strlen(vp->v_path) + 1);
diff --git a/usr/src/uts/common/fs/zfs/zfs_vfsops.c b/usr/src/uts/common/fs/zfs/zfs_vfsops.c
index d06cb875cf..e78907b5a6 100644
--- a/usr/src/uts/common/fs/zfs/zfs_vfsops.c
+++ b/usr/src/uts/common/fs/zfs/zfs_vfsops.c
@@ -24,7 +24,7 @@
* Copyright (c) 2012, 2015 by Delphix. All rights reserved.
* Copyright (c) 2014 Integros [integros.com]
* Copyright 2016 Nexenta Systems, Inc. All rights reserved.
- * Copyright 2019 Joyent, Inc.
+ * Copyright 2020 Joyent, Inc.
* Copyright 2020 Joshua M. Clulow <josh@sysmgr.org>
* Copyright 2020 OmniOS Community Edition (OmniOSce) Association.
*/
@@ -1893,7 +1893,7 @@ zfs_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr)
mutex_enter(&mvp->v_lock);
if ((uap->flags & MS_REMOUNT) == 0 &&
(uap->flags & MS_OVERLAY) == 0 &&
- (mvp->v_count != 1 || (mvp->v_flag & VROOT))) {
+ (vn_count(mvp) != 1 || (mvp->v_flag & VROOT))) {
mutex_exit(&mvp->v_lock);
return (SET_ERROR(EBUSY));
}
diff --git a/usr/src/uts/common/io/inotify.c b/usr/src/uts/common/io/inotify.c
index b20c63481f..67bf55f213 100644
--- a/usr/src/uts/common/io/inotify.c
+++ b/usr/src/uts/common/io/inotify.c
@@ -10,7 +10,7 @@
*/
/*
- * Copyright 2019 Joyent, Inc.
+ * Copyright 2020 Joyent, Inc.
* Copyright (c) 2015 The MathWorks, Inc. All rights reserved.
*/
@@ -728,7 +728,7 @@ inotify_watch_remove(inotify_state_t *state, inotify_watch_t *watch)
crfree(child->inw_cred);
}
- VN_RELE(child->inw_vp);
+ VN_PHANTOM_RELE(child->inw_vp);
/*
* We're down (or should be down) to a single reference to
@@ -738,7 +738,7 @@ inotify_watch_remove(inotify_state_t *state, inotify_watch_t *watch)
}
inotify_watch_event(watch, IN_IGNORED | IN_REMOVAL, NULL);
- VN_RELE(watch->inw_vp);
+ VN_PHANTOM_RELE(watch->inw_vp);
/*
* It's now safe to zombify the watch -- we know that the only reference
@@ -830,7 +830,7 @@ inotify_watch_delete(inotify_watch_t *watch, uint32_t event)
err = inotify_fem_uninstall(watch->inw_vp, watch);
VERIFY(err == 0);
- VN_RELE(watch->inw_vp);
+ VN_PHANTOM_RELE(watch->inw_vp);
/*
* It's now safe to zombify the watch -- which won't actually delete
@@ -865,7 +865,7 @@ inotify_watch_insert(inotify_watch_t *watch, vnode_t *vp, char *name)
return;
}
- VN_HOLD(vp);
+ VN_PHANTOM_HOLD(vp);
watch = inotify_watch_add(state, watch, name, vp, watch->inw_mask);
VERIFY(watch != NULL);
@@ -897,7 +897,7 @@ inotify_add_watch(inotify_state_t *state, vnode_t *vp, uint32_t mask,
return (ENOSPC);
}
- VN_HOLD(vp);
+ VN_PHANTOM_HOLD(vp);
watch = inotify_watch_add(state, NULL, NULL, vp, set);
*wdp = watch->inw_wd;
mutex_exit(&state->ins_lock);
@@ -978,6 +978,10 @@ inotify_add_child(inotify_state_t *state, vnode_t *vp, char *name)
return (0);
}
+ /* Trade the plain hold from lookupnameat() for a phantom hold */
+ VN_PHANTOM_HOLD(cvp);
+ VN_RELE(cvp);
+
watch = inotify_watch_add(state, watch, name, cvp, watch->inw_mask);
VERIFY(watch != NULL);
mutex_exit(&state->ins_lock);
@@ -1065,7 +1069,7 @@ inotify_clean(void *arg)
*/
savecred = curthread->t_cred;
curthread->t_cred = watch->inw_cred;
- VN_RELE(watch->inw_vp);
+ VN_PHANTOM_RELE(watch->inw_vp);
crfree(watch->inw_cred);
curthread->t_cred = savecred;
diff --git a/usr/src/uts/common/sys/vnode.h b/usr/src/uts/common/sys/vnode.h
index 494264731b..df5da6c2e7 100644
--- a/usr/src/uts/common/sys/vnode.h
+++ b/usr/src/uts/common/sys/vnode.h
@@ -21,7 +21,7 @@
/*
* Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2018, Joyent, Inc.
+ * Copyright 2020 Joyent, Inc.
* Copyright (c) 2011, 2017 by Delphix. All rights reserved.
* Copyright 2017 RackTop Systems.
*/
@@ -197,6 +197,7 @@ struct vsd_node {
* v_count
* v_shrlocks
* v_path
+ * v_phantom_count
* v_vsd
* v_xattrdir
*
@@ -214,6 +215,7 @@ struct vsd_node {
* v_lock
* v_flag
* v_count
+ * v_phantom_count
* v_data
* v_vfsp
* v_stream
@@ -285,6 +287,8 @@ typedef struct vnode {
kmutex_t v_lock; /* protects vnode fields */
uint_t v_flag; /* vnode flags (see below) */
uint_t v_count; /* reference count */
+ /* non vn_count() ref count (see below) */
+ uint_t v_phantom_count;
void *v_data; /* private data for fs */
struct vfs *v_vfsp; /* ptr to containing VFS */
struct stdata *v_stream; /* associated stream */
@@ -811,9 +815,9 @@ typedef enum vnevent {
VE_REMOVE = 3, /* Remove of vnode's name */
VE_RMDIR = 4, /* Remove of directory vnode's name */
VE_CREATE = 5, /* Create with vnode's name which exists */
- VE_LINK = 6, /* Link with vnode's name as source */
+ VE_LINK = 6, /* Link with vnode's name as source */
VE_RENAME_DEST_DIR = 7, /* Rename with vnode as target dir */
- VE_MOUNTEDOVER = 8, /* File or Filesystem got mounted over vnode */
+ VE_MOUNTEDOVER = 8, /* File or Filesystem got mounted over vnode */
VE_TRUNCATE = 9, /* Truncate */
VE_PRE_RENAME_SRC = 10, /* Pre-rename, with vnode as source */
VE_PRE_RENAME_DEST = 11, /* Pre-rename, with vnode as target/dest. */
@@ -1294,9 +1298,9 @@ void vn_recycle(vnode_t *);
void vn_free(vnode_t *);
int vn_is_readonly(vnode_t *);
-int vn_is_opened(vnode_t *, v_mode_t);
-int vn_is_mapped(vnode_t *, v_mode_t);
-int vn_has_other_opens(vnode_t *, v_mode_t);
+int vn_is_opened(vnode_t *, v_mode_t);
+int vn_is_mapped(vnode_t *, v_mode_t);
+int vn_has_other_opens(vnode_t *, v_mode_t);
void vn_open_upgrade(vnode_t *, int);
void vn_open_downgrade(vnode_t *, int);
@@ -1335,10 +1339,12 @@ int vn_createat(char *pnamep, enum uio_seg seg, struct vattr *vap,
int vn_rdwr(enum uio_rw rw, struct vnode *vp, caddr_t base, ssize_t len,
offset_t offset, enum uio_seg seg, int ioflag, rlim64_t ulimit,
cred_t *cr, ssize_t *residp);
+uint_t vn_count(struct vnode *vp);
void vn_rele(struct vnode *vp);
void vn_rele_async(struct vnode *vp, struct taskq *taskq);
void vn_rele_dnlc(struct vnode *vp);
void vn_rele_stream(struct vnode *vp);
+void vn_phantom_rele(struct vnode *vp);
int vn_link(char *from, char *to, enum uio_seg seg);
int vn_linkat(vnode_t *fstartvp, char *from, enum symfollow follow,
vnode_t *tstartvp, char *to, enum uio_seg seg);
@@ -1443,6 +1449,16 @@ extern uint_t pvn_vmodsort_supported;
* this->vp->v_path == NULL ? "NULL" : stringof(this->vp->v_path),
* this->vp->v_count)
* }'
+ *
+ * There are some situations where we don't want a hold to make the vnode
+ * 'busy'. For example, watching a directory via port events or inotify
+ * should not prevent a filesystem from mounting on a watched directory.
+ * For those instances, a phantom hold is used via VN_PHANTOM_HOLD().
+ *
+ * A phantom hold works identically to regular hold, except that those holds
+ * are excluded from the return value of vn_count().
+ *
+ * A phantom hold must be released by VN_PHANTOM_RELE().
*/
#define VN_HOLD_LOCKED(vp) { \
ASSERT(mutex_owned(&(vp)->v_lock)); \
@@ -1471,6 +1487,22 @@ extern uint_t pvn_vmodsort_supported;
DTRACE_PROBE1(vn__rele, vnode_t *, vp); \
}
+#define VN_PHANTOM_HOLD_LOCKED(vp) { \
+ VN_HOLD_LOCKED(vp); \
+ (vp)->v_phantom_count++; \
+ DTRACE_PROBE1(vn__phantom_hold, vnode_t *, vp); \
+}
+
+#define VN_PHANTOM_HOLD(vp) { \
+ mutex_enter(&(vp)->v_lock); \
+ VN_PHANTOM_HOLD_LOCKED(vp); \
+ mutex_exit(&(vp)->v_lock); \
+}
+
+#define VN_PHANTOM_RELE(vp) { \
+ vn_phantom_rele(vp); \
+}
+
#define VN_SET_VFS_TYPE_DEV(vp, vfsp, type, dev) { \
(vp)->v_vfsp = (vfsp); \
(vp)->v_type = (type); \
@@ -1481,7 +1513,7 @@ extern uint_t pvn_vmodsort_supported;
* Compare two vnodes for equality. In general this macro should be used
* in preference to calling VOP_CMP directly.
*/
-#define VN_CMP(VP1, VP2) ((VP1) == (VP2) ? 1 : \
+#define VN_CMP(VP1, VP2) ((VP1) == (VP2) ? 1 : \
((VP1) && (VP2) && (vn_getops(VP1) == vn_getops(VP2)) ? \
VOP_CMP(VP1, VP2, NULL) : 0))