summaryrefslogtreecommitdiff
path: root/usr/src
diff options
context:
space:
mode:
authorThomas Haynes <Thomas.Haynes@Sun.COM>2008-08-13 07:44:04 -0700
committerThomas Haynes <Thomas.Haynes@Sun.COM>2008-08-13 07:44:04 -0700
commitd3a1459128b677cee1a84512ca49eef4bffd392d (patch)
tree3b5092228cc526ffc05b7873c7c2d9db4275a9ee /usr/src
parentf1c2346531c82ed357422b37e6c09f7b692e0c45 (diff)
downloadillumos-joyent-d3a1459128b677cee1a84512ca49eef4bffd392d.tar.gz
6684208 mirror mounted mountpoints don't want to umount when idle
6722697 snv92 dies often dereferencing a null ptr in nfs4_record_ephemeral_mount() 6722777 snv_92 dies often with Deadlock: cycle in blocking chain, out of nfs4_ephemeral_harvest_forest()
Diffstat (limited to 'usr/src')
-rw-r--r--usr/src/uts/common/fs/nfs/nfs4_stub_vnops.c313
-rw-r--r--usr/src/uts/common/fs/nfs/nfs4_vfsops.c16
-rw-r--r--usr/src/uts/common/nfs/nfs4_clnt.h8
3 files changed, 219 insertions, 118 deletions
diff --git a/usr/src/uts/common/fs/nfs/nfs4_stub_vnops.c b/usr/src/uts/common/fs/nfs/nfs4_stub_vnops.c
index 073d6dcc8c..72c63b8525 100644
--- a/usr/src/uts/common/fs/nfs/nfs4_stub_vnops.c
+++ b/usr/src/uts/common/fs/nfs/nfs4_stub_vnops.c
@@ -24,8 +24,6 @@
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
/*
* Support for ephemeral mounts, e.g. mirror-mounts. These mounts are
* triggered from a "stub" rnode via a special set of vnodeops.
@@ -290,6 +288,35 @@ const fs_operation_def_t nfs4_trigger_vnodeops_template[] = {
NULL, NULL
};
+static void
+nfs4_ephemeral_tree_hold(nfs4_ephemeral_tree_t *net)
+{
+ mutex_enter(&net->net_cnt_lock);
+ net->net_refcnt++;
+ ASSERT(net->net_refcnt != 0);
+ mutex_exit(&net->net_cnt_lock);
+}
+
+/*
+ * We need a safe way to decrement the refcnt whilst the
+ * lock is being held.
+ */
+static void
+nfs4_ephemeral_tree_decr(nfs4_ephemeral_tree_t *net)
+{
+ ASSERT(mutex_owned(&net->net_cnt_lock));
+ ASSERT(net->net_refcnt != 0);
+ net->net_refcnt--;
+}
+
+static void
+nfs4_ephemeral_tree_rele(nfs4_ephemeral_tree_t *net)
+{
+ mutex_enter(&net->net_cnt_lock);
+ nfs4_ephemeral_tree_decr(net);
+ mutex_exit(&net->net_cnt_lock);
+}
+
/*
* Trigger ops for stub vnodes; for mirror mounts, etc.
*
@@ -701,17 +728,23 @@ nfs4_trigger_mount(vnode_t *vp, vnode_t **newvpp)
net = mi->mi_ephemeral_tree;
mutex_exit(&mi->mi_lock);
- mutex_enter(&net->net_cnt_lock);
- net->net_refcnt++;
- mutex_exit(&net->net_cnt_lock);
+ nfs4_ephemeral_tree_hold(net);
+
+ mutex_enter(&net->net_tree_lock);
/*
- * Note that we do not do any checks to
- * see if the parent has been nuked.
- * We count on the vfs layer having protected
- * us from feet shooters.
+ * We can only procede if the tree is neither locked
+ * nor being torn down.
*/
- mutex_enter(&net->net_tree_lock);
+ mutex_enter(&net->net_cnt_lock);
+ if (net->net_status & NFS4_EPHEMERAL_TREE_PROCESSING) {
+ nfs4_ephemeral_tree_decr(net);
+ mutex_exit(&net->net_cnt_lock);
+ mutex_exit(&net->net_tree_lock);
+
+ return (EIO);
+ }
+ mutex_exit(&net->net_cnt_lock);
}
mutex_enter(&net->net_cnt_lock);
@@ -752,7 +785,7 @@ done:
net->net_status &= ~NFS4_EPHEMERAL_TREE_MOUNTING;
if (is_building)
net->net_status &= ~NFS4_EPHEMERAL_TREE_BUILDING;
- net->net_refcnt--;
+ nfs4_ephemeral_tree_decr(net);
mutex_exit(&net->net_cnt_lock);
mutex_exit(&net->net_tree_lock);
@@ -1358,7 +1391,7 @@ nfs4_trigger_nargs_destroy(struct nfs_args *nargs)
*
* This is called from nfs4_mount().
*/
-void
+int
nfs4_record_ephemeral_mount(mntinfo4_t *mi, vnode_t *mvp)
{
mntinfo4_t *mi_parent;
@@ -1373,6 +1406,8 @@ nfs4_record_ephemeral_mount(mntinfo4_t *mi, vnode_t *mvp)
nfs4_trigger_globals_t *ntg;
zone_t *zone = curproc->p_zone;
+ int rc = 0;
+
mi_parent = VTOMI4(mvp);
/*
@@ -1386,6 +1421,28 @@ nfs4_record_ephemeral_mount(mntinfo4_t *mi, vnode_t *mvp)
mutex_enter(&mi_parent->mi_lock);
mutex_enter(&mi->mi_lock);
+ net = mi->mi_ephemeral_tree =
+ mi_parent->mi_ephemeral_tree;
+
+ /*
+ * If the mi_ephemeral_tree is NULL, then it
+ * means that either the harvester or a manual
+ * umount has cleared the tree out right before
+ * we got here.
+ *
+ * There is nothing we can do here, so return
+ * to the caller and let them decide whether they
+ * try again.
+ */
+ if (net == NULL) {
+ mutex_exit(&mi->mi_lock);
+ mutex_exit(&mi_parent->mi_lock);
+
+ return (EBUSY);
+ }
+
+ nfs4_ephemeral_tree_hold(net);
+
/*
* We need to tack together the ephemeral mount
* with this new mntinfo.
@@ -1403,10 +1460,6 @@ nfs4_record_ephemeral_mount(mntinfo4_t *mi, vnode_t *mvp)
mi->mi_flags |= MI4_EPHEMERAL;
mi->mi_ephemeral = eph;
- net = mi->mi_ephemeral_tree =
- mi_parent->mi_ephemeral_tree;
- ASSERT(net != NULL);
-
/*
* If the enclosing mntinfo4 is also ephemeral,
* then we need to point to its enclosing parent.
@@ -1423,19 +1476,29 @@ nfs4_record_ephemeral_mount(mntinfo4_t *mi, vnode_t *mvp)
* branch.
*/
prior = mi_parent->mi_ephemeral;
- ASSERT(prior != NULL);
- if (prior->ne_child == NULL) {
- prior->ne_child = eph;
+ if (prior == NULL) {
+ /*
+ * Race condition, clean up, and
+ * let caller handle mntinfo.
+ */
+ mi->mi_flags &= ~MI4_EPHEMERAL;
+ mi->mi_ephemeral = NULL;
+ kmem_free(eph, sizeof (*eph));
+ rc = EBUSY;
} else {
- child = prior->ne_child;
+ if (prior->ne_child == NULL) {
+ prior->ne_child = eph;
+ } else {
+ child = prior->ne_child;
- prior->ne_child = eph;
- eph->ne_peer = child;
+ prior->ne_child = eph;
+ eph->ne_peer = child;
- child->ne_prior = eph;
- }
+ child->ne_prior = eph;
+ }
- eph->ne_prior = prior;
+ eph->ne_prior = prior;
+ }
} else {
/*
* The parent mntinfo4 is the non-ephemeral
@@ -1460,8 +1523,12 @@ nfs4_record_ephemeral_mount(mntinfo4_t *mi, vnode_t *mvp)
eph->ne_prior = NULL;
}
+ nfs4_ephemeral_tree_rele(net);
+
mutex_exit(&mi->mi_lock);
mutex_exit(&mi_parent->mi_lock);
+
+ return (rc);
}
/*
@@ -1639,8 +1706,8 @@ nfs4_ephemeral_umount_unlock(bool_t *pmust_unlock,
if (*pmust_unlock) {
mutex_enter(&net->net_cnt_lock);
- net->net_refcnt--;
net->net_status &= ~NFS4_EPHEMERAL_TREE_UMOUNTING;
+ nfs4_ephemeral_tree_decr(net);
mutex_exit(&net->net_cnt_lock);
mutex_exit(&net->net_tree_lock);
@@ -1694,6 +1761,13 @@ nfs4_ephemeral_umount(mntinfo4_t *mi, int flag, cred_t *cr,
nfs4_ephemeral_tree_t *net;
int is_derooting = FALSE;
int is_recursed = FALSE;
+ int was_locked = FALSE;
+
+ /*
+ * Make sure to set the default state for cleaning
+ * up the tree in the caller (and on the way out).
+ */
+ *pmust_unlock = FALSE;
/*
* The active vnodes on this file system may be ephemeral
@@ -1744,13 +1818,16 @@ nfs4_ephemeral_umount(mntinfo4_t *mi, int flag, cred_t *cr,
mutex_enter(&net->net_cnt_lock);
if (net->net_status &
NFS4_EPHEMERAL_TREE_LOCKED) {
- mutex_exit(&net->net_cnt_lock);
- mutex_exit(&mi->mi_lock);
-
/*
- * Someone is already working on
- * it. We need to back off and
- * let them proceed.
+ * If the tree is locked, we need
+ * to decide whether we are the
+ * harvester or some explicit call
+ * for a umount. The only way that
+ * we are the harvester is if
+ * MS_SYSSPACE is set.
+ *
+ * We only let the harvester through
+ * at this point.
*
* We return EBUSY so that the
* caller knows something is
@@ -1758,81 +1835,94 @@ nfs4_ephemeral_umount(mntinfo4_t *mi, int flag, cred_t *cr,
* time, the umount in the other
* thread may have already occured.
*/
- return (EBUSY);
- } else
+ if (!(flag & MS_SYSSPACE)) {
+ mutex_exit(&net->net_cnt_lock);
+ mutex_exit(&mi->mi_lock);
+
+ return (EBUSY);
+ }
+
+ was_locked = TRUE;
+ } else {
net->net_refcnt++;
+ ASSERT(net->net_refcnt != 0);
+ }
+
mutex_exit(&net->net_cnt_lock);
}
mutex_exit(&mi->mi_lock);
/*
- * If we grab the lock, it means that no other
- * operation is working on the tree. If we don't
- * grab it, we need to decide if this is because
- * we are a recursive call or a new operation.
- *
- * If we are a recursive call, we proceed without
- * the lock.
- *
- * Else we have to wait until the lock becomes free.
+ * If we are not the harvester, we need to check
+ * to see if we need to grab the tree lock.
*/
- if (!mutex_tryenter(&net->net_tree_lock)) {
- if (!is_recursed) {
- mutex_enter(&net->net_cnt_lock);
- if (net->net_status &
- (NFS4_EPHEMERAL_TREE_DEROOTING
- | NFS4_EPHEMERAL_TREE_INVALID)) {
- net->net_refcnt--;
- mutex_exit(&net->net_cnt_lock);
- goto is_busy;
- }
- mutex_exit(&net->net_cnt_lock);
-
+ if (was_locked == FALSE) {
+ /*
+ * If we grab the lock, it means that no other
+ * operation is working on the tree. If we don't
+ * grab it, we need to decide if this is because
+ * we are a recursive call or a new operation.
+ */
+ if (mutex_tryenter(&net->net_tree_lock)) {
+ *pmust_unlock = TRUE;
+ } else {
/*
- * We can't hold any other locks whilst
- * we wait on this to free up.
+ * If we are a recursive call, we can
+ * proceed without the lock.
+ * Otherwise we have to wait until
+ * the lock becomes free.
*/
- mutex_enter(&net->net_tree_lock);
+ if (!is_recursed) {
+ mutex_enter(&net->net_cnt_lock);
+ if (net->net_status &
+ (NFS4_EPHEMERAL_TREE_DEROOTING
+ | NFS4_EPHEMERAL_TREE_INVALID)) {
+ nfs4_ephemeral_tree_decr(net);
+ mutex_exit(&net->net_cnt_lock);
+ goto is_busy;
+ }
+ mutex_exit(&net->net_cnt_lock);
- /*
- * Note that while mi->mi_ephemeral
- * may change and thus we have to
- * update eph, it is the case that
- * we have tied down net and
- * do not care if mi->mi_ephemeral_tree
- * has changed.
- */
- mutex_enter(&mi->mi_lock);
- eph = mi->mi_ephemeral;
- mutex_exit(&mi->mi_lock);
+ /*
+ * We can't hold any other locks whilst
+ * we wait on this to free up.
+ */
+ mutex_enter(&net->net_tree_lock);
- /*
- * Okay, we need to see if either the
- * tree got nuked or the current node
- * got nuked. Both of which will cause
- * an error.
- *
- * Note that a subsequent retry of the
- * umount shall work.
- */
- mutex_enter(&net->net_cnt_lock);
- if (net->net_status &
- NFS4_EPHEMERAL_TREE_INVALID ||
- (!is_derooting && eph == NULL)) {
- net->net_refcnt--;
+ /*
+ * Note that while mi->mi_ephemeral
+ * may change and thus we have to
+ * update eph, it is the case that
+ * we have tied down net and
+ * do not care if mi->mi_ephemeral_tree
+ * has changed.
+ */
+ mutex_enter(&mi->mi_lock);
+ eph = mi->mi_ephemeral;
+ mutex_exit(&mi->mi_lock);
+
+ /*
+ * Okay, we need to see if either the
+ * tree got nuked or the current node
+ * got nuked. Both of which will cause
+ * an error.
+ *
+ * Note that a subsequent retry of the
+ * umount shall work.
+ */
+ mutex_enter(&net->net_cnt_lock);
+ if (net->net_status &
+ NFS4_EPHEMERAL_TREE_INVALID ||
+ (!is_derooting && eph == NULL)) {
+ nfs4_ephemeral_tree_decr(net);
+ mutex_exit(&net->net_cnt_lock);
+ mutex_exit(&net->net_tree_lock);
+ goto is_busy;
+ }
mutex_exit(&net->net_cnt_lock);
- mutex_exit(&net->net_tree_lock);
- goto is_busy;
+ *pmust_unlock = TRUE;
}
- mutex_exit(&net->net_cnt_lock);
- *pmust_unlock = TRUE;
}
- } else {
- /*
- * If we grab it right away, everything must
- * be great!
- */
- *pmust_unlock = TRUE;
}
/*
@@ -1892,14 +1982,24 @@ nfs4_ephemeral_umount(mntinfo4_t *mi, int flag, cred_t *cr,
/*
* We have derooted and we have caused the tree to be
- * invalid.
+ * invalidated.
*/
mutex_enter(&net->net_cnt_lock);
net->net_status &= ~NFS4_EPHEMERAL_TREE_DEROOTING;
net->net_status |= NFS4_EPHEMERAL_TREE_INVALID;
- net->net_refcnt--;
+ nfs4_ephemeral_tree_decr(net);
mutex_exit(&net->net_cnt_lock);
+ if (was_locked == FALSE)
+ mutex_exit(&net->net_tree_lock);
+
+ /*
+ * We have just blown away any notation of this
+ * tree being locked. We can't let the caller
+ * try to clean things up.
+ */
+ *pmust_unlock = FALSE;
+
/*
* At this point, the tree should no
* longer be associated with the
@@ -1991,17 +2091,15 @@ nfs4_ephemeral_harvest_forest(nfs4_trigger_globals_t *ntg,
vfs_t *vfsp;
if (force)
- flag = MS_FORCE;
+ flag = MS_FORCE | MS_SYSSPACE;
else
- flag = 0;
+ flag = MS_SYSSPACE;
mutex_enter(&ntg->ntg_forest_lock);
for (net = ntg->ntg_forest; net != NULL; net = next) {
next = net->net_next;
- mutex_enter(&net->net_cnt_lock);
- net->net_refcnt++;
- mutex_exit(&net->net_cnt_lock);
+ nfs4_ephemeral_tree_hold(net);
mutex_enter(&net->net_tree_lock);
@@ -2152,12 +2250,21 @@ nfs4_ephemeral_harvest_forest(nfs4_trigger_globals_t *ntg,
check_done:
/*
- * Are we done with this tree?
+ * At this point we are done processing this tree.
+ *
+ * If the tree is invalid and we are the only reference
+ * to it, then we push it on the local linked list
+ * to remove it at the end. We avoid that action now
+ * to keep the tree processing going along at a fair clip.
+ *
+ * Else, even if we are the only reference, we drop
+ * our hold on the current tree and allow it to be
+ * reused as needed.
*/
mutex_enter(&net->net_cnt_lock);
if (net->net_refcnt == 1 &&
net->net_status & NFS4_EPHEMERAL_TREE_INVALID) {
- net->net_refcnt--;
+ nfs4_ephemeral_tree_decr(net);
net->net_status &= ~NFS4_EPHEMERAL_TREE_LOCKED;
mutex_exit(&net->net_cnt_lock);
mutex_exit(&net->net_tree_lock);
@@ -2172,7 +2279,7 @@ check_done:
continue;
}
- net->net_refcnt--;
+ nfs4_ephemeral_tree_decr(net);
net->net_status &= ~NFS4_EPHEMERAL_TREE_LOCKED;
mutex_exit(&net->net_cnt_lock);
mutex_exit(&net->net_tree_lock);
diff --git a/usr/src/uts/common/fs/nfs/nfs4_vfsops.c b/usr/src/uts/common/fs/nfs/nfs4_vfsops.c
index b1d120eef6..bcf024f19d 100644
--- a/usr/src/uts/common/fs/nfs/nfs4_vfsops.c
+++ b/usr/src/uts/common/fs/nfs/nfs4_vfsops.c
@@ -28,8 +28,6 @@
* All Rights Reserved
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <sys/param.h>
#include <sys/types.h>
#include <sys/systm.h>
@@ -1152,7 +1150,7 @@ proceed:
* Time to tie in the mirror mount info at last!
*/
if (flags & NFSMNT_EPHEMERAL)
- nfs4_record_ephemeral_mount(mi, mvp);
+ error = nfs4_record_ephemeral_mount(mi, mvp);
errout:
if (error) {
@@ -2331,7 +2329,7 @@ nfs4_unmount(vfs_t *vfsp, int flag, cred_t *cr)
ushort_t omax;
int removed;
- bool_t must_unlock = FALSE;
+ bool_t must_unlock;
nfs4_ephemeral_tree_t *eph_tree;
@@ -2384,13 +2382,7 @@ nfs4_unmount(vfs_t *vfsp, int flag, cred_t *cr)
*/
if (nfs4_ephemeral_umount(mi, flag, cr,
&must_unlock, &eph_tree)) {
-
- /*
- * Note that we ignore must_unlock
- * because it is garbage at this point.
- * I.e., it only has meaning upon
- * success.
- */
+ ASSERT(must_unlock == FALSE);
mutex_enter(&mi->mi_async_lock);
mi->mi_max_threads = omax;
mutex_exit(&mi->mi_async_lock);
@@ -3984,7 +3976,7 @@ nfs4_free_mount(vfs_t *vfsp, int flag, cred_t *cr)
boolean_t async_thread;
int removed;
- bool_t must_unlock = FALSE;
+ bool_t must_unlock;
nfs4_ephemeral_tree_t *eph_tree;
/*
diff --git a/usr/src/uts/common/nfs/nfs4_clnt.h b/usr/src/uts/common/nfs/nfs4_clnt.h
index 9c3d912e76..14a0fe190e 100644
--- a/usr/src/uts/common/nfs/nfs4_clnt.h
+++ b/usr/src/uts/common/nfs/nfs4_clnt.h
@@ -34,8 +34,6 @@
#ifndef _NFS4_CLNT_H
#define _NFS4_CLNT_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <sys/errno.h>
#include <sys/types.h>
#include <sys/kstat.h>
@@ -1398,6 +1396,10 @@ typedef struct nfs4_ephemeral_tree {
#define NFS4_EPHEMERAL_TREE_UMOUNTING 0x10
#define NFS4_EPHEMERAL_TREE_LOCKED 0x20
+#define NFS4_EPHEMERAL_TREE_PROCESSING (NFS4_EPHEMERAL_TREE_DEROOTING | \
+ NFS4_EPHEMERAL_TREE_INVALID | NFS4_EPHEMERAL_TREE_UMOUNTING | \
+ NFS4_EPHEMERAL_TREE_LOCKED)
+
/*
* This macro evaluates to non-zero if the given op releases state at the
* server.
@@ -1982,7 +1984,7 @@ extern int nfs4_ephemeral_umount(mntinfo4_t *, int, cred_t *,
extern void nfs4_ephemeral_umount_unlock(bool_t *,
nfs4_ephemeral_tree_t **);
-extern void nfs4_record_ephemeral_mount(mntinfo4_t *mi, vnode_t *mvp);
+extern int nfs4_record_ephemeral_mount(mntinfo4_t *mi, vnode_t *mvp);
extern int wait_for_recall(vnode_t *, vnode_t *, nfs4_op_hint_t,
nfs4_recov_state_t *);