summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGeorge Wilson <George.Wilson@Sun.COM>2010-03-19 13:34:41 -0700
committerGeorge Wilson <George.Wilson@Sun.COM>2010-03-19 13:34:41 -0700
commitdcba9f3fbefe06ad19972b4de0351924601e5767 (patch)
tree7f8431ca8d2371417b806919a00c48d181c786d8
parent3228339cea1b23699cb9832992ee764dec04b2f1 (diff)
downloadillumos-joyent-dcba9f3fbefe06ad19972b4de0351924601e5767.tar.gz
6923585 deadlock while booting OpenSolaris build 132 from mirrored rpool with removed submirror
-rw-r--r--usr/src/uts/common/fs/zfs/dnode.c9
-rw-r--r--usr/src/uts/common/fs/zfs/spa_misc.c20
-rw-r--r--usr/src/uts/common/fs/zfs/sys/vdev.h3
-rw-r--r--usr/src/uts/common/fs/zfs/sys/vdev_impl.h6
-rw-r--r--usr/src/uts/common/fs/zfs/vdev.c29
-rw-r--r--usr/src/uts/common/fs/zfs/vdev_disk.c65
-rw-r--r--usr/src/uts/common/fs/zfs/vdev_file.c18
-rw-r--r--usr/src/uts/common/fs/zfs/vdev_mirror.c8
-rw-r--r--usr/src/uts/common/fs/zfs/vdev_missing.c6
-rw-r--r--usr/src/uts/common/fs/zfs/vdev_raidz.c2
-rw-r--r--usr/src/uts/common/fs/zfs/vdev_root.c4
-rw-r--r--usr/src/uts/common/os/driver_lyr.c6
-rw-r--r--usr/src/uts/common/sys/sunldi.h6
13 files changed, 167 insertions, 15 deletions
diff --git a/usr/src/uts/common/fs/zfs/dnode.c b/usr/src/uts/common/fs/zfs/dnode.c
index 77c8d616b9..64f0287f7c 100644
--- a/usr/src/uts/common/fs/zfs/dnode.c
+++ b/usr/src/uts/common/fs/zfs/dnode.c
@@ -591,9 +591,14 @@ dnode_hold_impl(objset_t *os, uint64_t object, int flag,
/*
* If you are holding the spa config lock as writer, you shouldn't
- * be asking the DMU to do *anything*.
+ * be asking the DMU to do *anything* unless it's the root pool
+ * which may require us to read from the root filesystem while
+ * holding some (not all) of the locks as writer.
*/
- ASSERT(spa_config_held(os->os_spa, SCL_ALL, RW_WRITER) == 0);
+ ASSERT(spa_config_held(os->os_spa, SCL_ALL, RW_WRITER) == 0 ||
+ (spa_is_root(os->os_spa) &&
+ spa_config_held(os->os_spa, SCL_STATE, RW_WRITER) &&
+ !spa_config_held(os->os_spa, SCL_ZIO, RW_WRITER)));
if (object == DMU_USERUSED_OBJECT || object == DMU_GROUPUSED_OBJECT) {
dn = (object == DMU_USERUSED_OBJECT) ?
diff --git a/usr/src/uts/common/fs/zfs/spa_misc.c b/usr/src/uts/common/fs/zfs/spa_misc.c
index c98e82c90e..f1929a513a 100644
--- a/usr/src/uts/common/fs/zfs/spa_misc.c
+++ b/usr/src/uts/common/fs/zfs/spa_misc.c
@@ -959,7 +959,22 @@ spa_vdev_state_enter(spa_t *spa, int oplocks)
{
int locks = SCL_STATE_ALL | oplocks;
- spa_config_enter(spa, locks, spa, RW_WRITER);
+ /*
+ * Root pools may need to read of the underlying devfs filesystem
+ * when opening up a vdev. Unfortunately if we're holding the
+ * SCL_ZIO lock it will result in a deadlock when we try to issue
+ * the read from the root filesystem. Instead we "prefetch"
+ * the associated vnodes that we need prior to opening the
+ * underlying devices and cache them so that we can prevent
+ * any I/O when we are doing the actual open.
+ */
+ if (spa_is_root(spa)) {
+ spa_config_enter(spa, SCL_STATE | SCL_L2ARC, spa, RW_WRITER);
+ vdev_hold(spa->spa_root_vdev);
+ spa_config_enter(spa, SCL_ZIO | oplocks, spa, RW_WRITER);
+ } else {
+ spa_config_enter(spa, locks, spa, RW_WRITER);
+ }
spa->spa_vdev_locks = locks;
}
@@ -978,6 +993,9 @@ spa_vdev_state_exit(spa_t *spa, vdev_t *vd, int error)
spa->spa_config_generation++;
}
+ if (spa_is_root(spa))
+ vdev_rele(spa->spa_root_vdev);
+
ASSERT3U(spa->spa_vdev_locks, >=, SCL_STATE_ALL);
spa_config_exit(spa, spa->spa_vdev_locks, spa);
diff --git a/usr/src/uts/common/fs/zfs/sys/vdev.h b/usr/src/uts/common/fs/zfs/sys/vdev.h
index 3bf5ba8042..b37516a984 100644
--- a/usr/src/uts/common/fs/zfs/sys/vdev.h
+++ b/usr/src/uts/common/fs/zfs/sys/vdev.h
@@ -70,6 +70,9 @@ extern boolean_t vdev_dtl_required(vdev_t *vd);
extern boolean_t vdev_resilver_needed(vdev_t *vd,
uint64_t *minp, uint64_t *maxp);
+extern void vdev_hold(vdev_t *);
+extern void vdev_rele(vdev_t *);
+
extern int vdev_metaslab_init(vdev_t *vd, uint64_t txg);
extern void vdev_metaslab_fini(vdev_t *vd);
extern void vdev_metaslab_set_size(vdev_t *);
diff --git a/usr/src/uts/common/fs/zfs/sys/vdev_impl.h b/usr/src/uts/common/fs/zfs/sys/vdev_impl.h
index 238b9610f5..ce56986235 100644
--- a/usr/src/uts/common/fs/zfs/sys/vdev_impl.h
+++ b/usr/src/uts/common/fs/zfs/sys/vdev_impl.h
@@ -62,6 +62,8 @@ typedef uint64_t vdev_asize_func_t(vdev_t *vd, uint64_t psize);
typedef int vdev_io_start_func_t(zio_t *zio);
typedef void vdev_io_done_func_t(zio_t *zio);
typedef void vdev_state_change_func_t(vdev_t *vd, int, int);
+typedef void vdev_hold_func_t(vdev_t *vd);
+typedef void vdev_rele_func_t(vdev_t *vd);
typedef struct vdev_ops {
vdev_open_func_t *vdev_op_open;
@@ -70,6 +72,8 @@ typedef struct vdev_ops {
vdev_io_start_func_t *vdev_op_io_start;
vdev_io_done_func_t *vdev_op_io_done;
vdev_state_change_func_t *vdev_op_state_change;
+ vdev_hold_func_t *vdev_op_hold;
+ vdev_rele_func_t *vdev_op_rele;
char vdev_op_type[16];
boolean_t vdev_op_leaf;
} vdev_ops_t;
@@ -121,6 +125,8 @@ struct vdev {
vdev_ops_t *vdev_ops; /* vdev operations */
spa_t *vdev_spa; /* spa for this vdev */
void *vdev_tsd; /* type-specific data */
+ vnode_t *vdev_name_vp; /* vnode for pathname */
+ vnode_t *vdev_devid_vp; /* vnode for devid */
vdev_t *vdev_top; /* top-level vdev */
vdev_t *vdev_parent; /* parent vdev */
vdev_t **vdev_child; /* array of children */
diff --git a/usr/src/uts/common/fs/zfs/vdev.c b/usr/src/uts/common/fs/zfs/vdev.c
index 3c043f2ec6..3e587aeca2 100644
--- a/usr/src/uts/common/fs/zfs/vdev.c
+++ b/usr/src/uts/common/fs/zfs/vdev.c
@@ -1420,6 +1420,35 @@ vdev_close(vdev_t *vd)
vd->vdev_stat.vs_aux = VDEV_AUX_NONE;
}
+void
+vdev_hold(vdev_t *vd)
+{
+ spa_t *spa = vd->vdev_spa;
+
+ ASSERT(spa_is_root(spa));
+ if (spa->spa_state == POOL_STATE_UNINITIALIZED)
+ return;
+
+ for (int c = 0; c < vd->vdev_children; c++)
+ vdev_hold(vd->vdev_child[c]);
+
+ if (vd->vdev_ops->vdev_op_leaf)
+ vd->vdev_ops->vdev_op_hold(vd);
+}
+
+void
+vdev_rele(vdev_t *vd)
+{
+ spa_t *spa = vd->vdev_spa;
+
+ ASSERT(spa_is_root(spa));
+ for (int c = 0; c < vd->vdev_children; c++)
+ vdev_rele(vd->vdev_child[c]);
+
+ if (vd->vdev_ops->vdev_op_leaf)
+ vd->vdev_ops->vdev_op_rele(vd);
+}
+
/*
* Reopen all interior vdevs and any unopened leaves. We don't actually
* reopen leaf vdevs which had previously been opened as they might deadlock
diff --git a/usr/src/uts/common/fs/zfs/vdev_disk.c b/usr/src/uts/common/fs/zfs/vdev_disk.c
index 08e28b2749..4a26c7e82b 100644
--- a/usr/src/uts/common/fs/zfs/vdev_disk.c
+++ b/usr/src/uts/common/fs/zfs/vdev_disk.c
@@ -19,12 +19,12 @@
* CDDL HEADER END
*/
/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#include <sys/zfs_context.h>
-#include <sys/spa.h>
+#include <sys/spa_impl.h>
#include <sys/refcount.h>
#include <sys/vdev_disk.h>
#include <sys/vdev_impl.h>
@@ -44,6 +44,65 @@ typedef struct vdev_disk_buf {
zio_t *vdb_io;
} vdev_disk_buf_t;
+static void
+vdev_disk_hold(vdev_t *vd)
+{
+ ddi_devid_t devid;
+ char *minor;
+
+ ASSERT(spa_config_held(vd->vdev_spa, SCL_STATE, RW_WRITER));
+
+ /*
+ * We must have a pathname, and it must be absolute.
+ */
+ if (vd->vdev_path == NULL || vd->vdev_path[0] != '/')
+ return;
+
+ /*
+ * Only prefetch path and devid info if the device has
+ * never been opened.
+ */
+ if (vd->vdev_tsd != NULL)
+ return;
+
+ if (vd->vdev_wholedisk == -1ULL) {
+ size_t len = strlen(vd->vdev_path) + 3;
+ char *buf = kmem_alloc(len, KM_SLEEP);
+
+ (void) snprintf(buf, len, "%ss0", vd->vdev_path);
+
+ (void) ldi_vp_from_name(buf, &vd->vdev_name_vp);
+ kmem_free(buf, len);
+ }
+
+ if (vd->vdev_name_vp == NULL)
+ (void) ldi_vp_from_name(vd->vdev_path, &vd->vdev_name_vp);
+
+ if (vd->vdev_devid != NULL &&
+ ddi_devid_str_decode(vd->vdev_devid, &devid, &minor) == 0) {
+ (void) ldi_vp_from_devid(devid, minor, &vd->vdev_devid_vp);
+ ddi_devid_str_free(minor);
+ ddi_devid_free(devid);
+ }
+}
+
+static void
+vdev_disk_rele(vdev_t *vd)
+{
+ ASSERT(spa_config_held(vd->vdev_spa, SCL_STATE, RW_WRITER));
+
+ if (vd->vdev_name_vp) {
+ VN_RELE_ASYNC(vd->vdev_name_vp,
+ dsl_pool_vnrele_taskq(vd->vdev_spa->spa_dsl_pool));
+ vd->vdev_name_vp = NULL;
+ }
+ if (vd->vdev_devid_vp) {
+ VN_RELE_ASYNC(vd->vdev_devid_vp,
+ dsl_pool_vnrele_taskq(vd->vdev_spa->spa_dsl_pool));
+ vd->vdev_devid_vp = NULL;
+ }
+}
+
static int
vdev_disk_open(vdev_t *vd, uint64_t *psize, uint64_t *ashift)
{
@@ -463,6 +522,8 @@ vdev_ops_t vdev_disk_ops = {
vdev_disk_io_start,
vdev_disk_io_done,
NULL,
+ vdev_disk_hold,
+ vdev_disk_rele,
VDEV_TYPE_DISK, /* name of this vdev type */
B_TRUE /* leaf vdev */
};
diff --git a/usr/src/uts/common/fs/zfs/vdev_file.c b/usr/src/uts/common/fs/zfs/vdev_file.c
index 779e88edb9..c7a47487a6 100644
--- a/usr/src/uts/common/fs/zfs/vdev_file.c
+++ b/usr/src/uts/common/fs/zfs/vdev_file.c
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -35,6 +35,18 @@
* Virtual device vector for files.
*/
+static void
+vdev_file_hold(vdev_t *vd)
+{
+ ASSERT(vd->vdev_path != NULL);
+}
+
+static void
+vdev_file_rele(vdev_t *vd)
+{
+ ASSERT(vd->vdev_path != NULL);
+}
+
static int
vdev_file_open(vdev_t *vd, uint64_t *psize, uint64_t *ashift)
{
@@ -178,6 +190,8 @@ vdev_ops_t vdev_file_ops = {
vdev_file_io_start,
vdev_file_io_done,
NULL,
+ vdev_file_hold,
+ vdev_file_rele,
VDEV_TYPE_FILE, /* name of this vdev type */
B_TRUE /* leaf vdev */
};
@@ -194,6 +208,8 @@ vdev_ops_t vdev_disk_ops = {
vdev_file_io_start,
vdev_file_io_done,
NULL,
+ vdev_file_hold,
+ vdev_file_rele,
VDEV_TYPE_DISK, /* name of this vdev type */
B_TRUE /* leaf vdev */
};
diff --git a/usr/src/uts/common/fs/zfs/vdev_mirror.c b/usr/src/uts/common/fs/zfs/vdev_mirror.c
index ac2a9b0f4d..698c0275d3 100644
--- a/usr/src/uts/common/fs/zfs/vdev_mirror.c
+++ b/usr/src/uts/common/fs/zfs/vdev_mirror.c
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -452,6 +452,8 @@ vdev_ops_t vdev_mirror_ops = {
vdev_mirror_io_start,
vdev_mirror_io_done,
vdev_mirror_state_change,
+ NULL,
+ NULL,
VDEV_TYPE_MIRROR, /* name of this vdev type */
B_FALSE /* not a leaf vdev */
};
@@ -463,6 +465,8 @@ vdev_ops_t vdev_replacing_ops = {
vdev_mirror_io_start,
vdev_mirror_io_done,
vdev_mirror_state_change,
+ NULL,
+ NULL,
VDEV_TYPE_REPLACING, /* name of this vdev type */
B_FALSE /* not a leaf vdev */
};
@@ -474,6 +478,8 @@ vdev_ops_t vdev_spare_ops = {
vdev_mirror_io_start,
vdev_mirror_io_done,
vdev_mirror_state_change,
+ NULL,
+ NULL,
VDEV_TYPE_SPARE, /* name of this vdev type */
B_FALSE /* not a leaf vdev */
};
diff --git a/usr/src/uts/common/fs/zfs/vdev_missing.c b/usr/src/uts/common/fs/zfs/vdev_missing.c
index e1bf7d86a3..6a5588d592 100644
--- a/usr/src/uts/common/fs/zfs/vdev_missing.c
+++ b/usr/src/uts/common/fs/zfs/vdev_missing.c
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -80,6 +80,8 @@ vdev_ops_t vdev_missing_ops = {
vdev_missing_io_start,
vdev_missing_io_done,
NULL,
+ NULL,
+ NULL,
VDEV_TYPE_MISSING, /* name of this vdev type */
B_TRUE /* leaf vdev */
};
@@ -91,6 +93,8 @@ vdev_ops_t vdev_hole_ops = {
vdev_missing_io_start,
vdev_missing_io_done,
NULL,
+ NULL,
+ NULL,
VDEV_TYPE_HOLE, /* name of this vdev type */
B_TRUE /* leaf vdev */
};
diff --git a/usr/src/uts/common/fs/zfs/vdev_raidz.c b/usr/src/uts/common/fs/zfs/vdev_raidz.c
index aa031dd25b..30415c8abb 100644
--- a/usr/src/uts/common/fs/zfs/vdev_raidz.c
+++ b/usr/src/uts/common/fs/zfs/vdev_raidz.c
@@ -2139,6 +2139,8 @@ vdev_ops_t vdev_raidz_ops = {
vdev_raidz_io_start,
vdev_raidz_io_done,
vdev_raidz_state_change,
+ NULL,
+ NULL,
VDEV_TYPE_RAIDZ, /* name of this vdev type */
B_FALSE /* not a leaf vdev */
};
diff --git a/usr/src/uts/common/fs/zfs/vdev_root.c b/usr/src/uts/common/fs/zfs/vdev_root.c
index 524c8e6060..879f78f3a5 100644
--- a/usr/src/uts/common/fs/zfs/vdev_root.c
+++ b/usr/src/uts/common/fs/zfs/vdev_root.c
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -109,6 +109,8 @@ vdev_ops_t vdev_root_ops = {
NULL, /* io_start - not applicable to the root */
NULL, /* io_done - not applicable to the root */
vdev_root_state_change,
+ NULL,
+ NULL,
VDEV_TYPE_ROOT, /* name of this vdev type */
B_FALSE /* not a leaf vdev */
};
diff --git a/usr/src/uts/common/os/driver_lyr.c b/usr/src/uts/common/os/driver_lyr.c
index fafb02ad89..a669c68336 100644
--- a/usr/src/uts/common/os/driver_lyr.c
+++ b/usr/src/uts/common/os/driver_lyr.c
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -533,7 +533,7 @@ ldi_vp_from_dev(dev_t dev, int otyp, vnode_t **vpp)
}
/* get a vnode to a device by pathname */
-static int
+int
ldi_vp_from_name(char *path, vnode_t **vpp)
{
vnode_t *vp = NULL;
@@ -668,7 +668,7 @@ ldi_devid_match(ddi_devid_t devid, dev_info_t *dip, dev_t dev)
}
/* get a handle to a device by devid and minor name */
-static int
+int
ldi_vp_from_devid(ddi_devid_t devid, char *minor_name, vnode_t **vpp)
{
dev_info_t *dip;
diff --git a/usr/src/uts/common/sys/sunldi.h b/usr/src/uts/common/sys/sunldi.h
index 71e9d9a7da..2a27a03ef4 100644
--- a/usr/src/uts/common/sys/sunldi.h
+++ b/usr/src/uts/common/sys/sunldi.h
@@ -19,15 +19,13 @@
* CDDL HEADER END
*/
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_SUNLDI_H
#define _SYS_SUNLDI_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <sys/modctl.h>
#include <sys/stream.h>
#include <sys/open.h>
@@ -101,6 +99,8 @@ extern int ldi_open_by_name(char *, int, cred_t *,
ldi_handle_t *, ldi_ident_t);
extern int ldi_open_by_devid(ddi_devid_t, char *, int, cred_t *,
ldi_handle_t *, ldi_ident_t);
+extern int ldi_vp_from_name(char *, vnode_t **);
+extern int ldi_vp_from_devid(ddi_devid_t, char *, vnode_t **);
extern int ldi_close(ldi_handle_t, int flag, cred_t *);