diff options
author | George Wilson <George.Wilson@Sun.COM> | 2010-03-19 13:34:41 -0700 |
---|---|---|
committer | George Wilson <George.Wilson@Sun.COM> | 2010-03-19 13:34:41 -0700 |
commit | dcba9f3fbefe06ad19972b4de0351924601e5767 (patch) | |
tree | 7f8431ca8d2371417b806919a00c48d181c786d8 | |
parent | 3228339cea1b23699cb9832992ee764dec04b2f1 (diff) | |
download | illumos-joyent-dcba9f3fbefe06ad19972b4de0351924601e5767.tar.gz |
6923585 deadlock while booting OpenSolaris build 132 from mirrored rpool with removed submirror
-rw-r--r-- | usr/src/uts/common/fs/zfs/dnode.c | 9 | ||||
-rw-r--r-- | usr/src/uts/common/fs/zfs/spa_misc.c | 20 | ||||
-rw-r--r-- | usr/src/uts/common/fs/zfs/sys/vdev.h | 3 | ||||
-rw-r--r-- | usr/src/uts/common/fs/zfs/sys/vdev_impl.h | 6 | ||||
-rw-r--r-- | usr/src/uts/common/fs/zfs/vdev.c | 29 | ||||
-rw-r--r-- | usr/src/uts/common/fs/zfs/vdev_disk.c | 65 | ||||
-rw-r--r-- | usr/src/uts/common/fs/zfs/vdev_file.c | 18 | ||||
-rw-r--r-- | usr/src/uts/common/fs/zfs/vdev_mirror.c | 8 | ||||
-rw-r--r-- | usr/src/uts/common/fs/zfs/vdev_missing.c | 6 | ||||
-rw-r--r-- | usr/src/uts/common/fs/zfs/vdev_raidz.c | 2 | ||||
-rw-r--r-- | usr/src/uts/common/fs/zfs/vdev_root.c | 4 | ||||
-rw-r--r-- | usr/src/uts/common/os/driver_lyr.c | 6 | ||||
-rw-r--r-- | usr/src/uts/common/sys/sunldi.h | 6 |
13 files changed, 167 insertions, 15 deletions
diff --git a/usr/src/uts/common/fs/zfs/dnode.c b/usr/src/uts/common/fs/zfs/dnode.c index 77c8d616b9..64f0287f7c 100644 --- a/usr/src/uts/common/fs/zfs/dnode.c +++ b/usr/src/uts/common/fs/zfs/dnode.c @@ -591,9 +591,14 @@ dnode_hold_impl(objset_t *os, uint64_t object, int flag, /* * If you are holding the spa config lock as writer, you shouldn't - * be asking the DMU to do *anything*. + * be asking the DMU to do *anything* unless it's the root pool + * which may require us to read from the root filesystem while + * holding some (not all) of the locks as writer. */ - ASSERT(spa_config_held(os->os_spa, SCL_ALL, RW_WRITER) == 0); + ASSERT(spa_config_held(os->os_spa, SCL_ALL, RW_WRITER) == 0 || + (spa_is_root(os->os_spa) && + spa_config_held(os->os_spa, SCL_STATE, RW_WRITER) && + !spa_config_held(os->os_spa, SCL_ZIO, RW_WRITER))); if (object == DMU_USERUSED_OBJECT || object == DMU_GROUPUSED_OBJECT) { dn = (object == DMU_USERUSED_OBJECT) ? diff --git a/usr/src/uts/common/fs/zfs/spa_misc.c b/usr/src/uts/common/fs/zfs/spa_misc.c index c98e82c90e..f1929a513a 100644 --- a/usr/src/uts/common/fs/zfs/spa_misc.c +++ b/usr/src/uts/common/fs/zfs/spa_misc.c @@ -959,7 +959,22 @@ spa_vdev_state_enter(spa_t *spa, int oplocks) { int locks = SCL_STATE_ALL | oplocks; - spa_config_enter(spa, locks, spa, RW_WRITER); + /* + * Root pools may need to read of the underlying devfs filesystem + * when opening up a vdev. Unfortunately if we're holding the + * SCL_ZIO lock it will result in a deadlock when we try to issue + * the read from the root filesystem. Instead we "prefetch" + * the associated vnodes that we need prior to opening the + * underlying devices and cache them so that we can prevent + * any I/O when we are doing the actual open. + */ + if (spa_is_root(spa)) { + spa_config_enter(spa, SCL_STATE | SCL_L2ARC, spa, RW_WRITER); + vdev_hold(spa->spa_root_vdev); + spa_config_enter(spa, SCL_ZIO | oplocks, spa, RW_WRITER); + } else { + spa_config_enter(spa, locks, spa, RW_WRITER); + } spa->spa_vdev_locks = locks; } @@ -978,6 +993,9 @@ spa_vdev_state_exit(spa_t *spa, vdev_t *vd, int error) spa->spa_config_generation++; } + if (spa_is_root(spa)) + vdev_rele(spa->spa_root_vdev); + ASSERT3U(spa->spa_vdev_locks, >=, SCL_STATE_ALL); spa_config_exit(spa, spa->spa_vdev_locks, spa); diff --git a/usr/src/uts/common/fs/zfs/sys/vdev.h b/usr/src/uts/common/fs/zfs/sys/vdev.h index 3bf5ba8042..b37516a984 100644 --- a/usr/src/uts/common/fs/zfs/sys/vdev.h +++ b/usr/src/uts/common/fs/zfs/sys/vdev.h @@ -70,6 +70,9 @@ extern boolean_t vdev_dtl_required(vdev_t *vd); extern boolean_t vdev_resilver_needed(vdev_t *vd, uint64_t *minp, uint64_t *maxp); +extern void vdev_hold(vdev_t *); +extern void vdev_rele(vdev_t *); + extern int vdev_metaslab_init(vdev_t *vd, uint64_t txg); extern void vdev_metaslab_fini(vdev_t *vd); extern void vdev_metaslab_set_size(vdev_t *); diff --git a/usr/src/uts/common/fs/zfs/sys/vdev_impl.h b/usr/src/uts/common/fs/zfs/sys/vdev_impl.h index 238b9610f5..ce56986235 100644 --- a/usr/src/uts/common/fs/zfs/sys/vdev_impl.h +++ b/usr/src/uts/common/fs/zfs/sys/vdev_impl.h @@ -62,6 +62,8 @@ typedef uint64_t vdev_asize_func_t(vdev_t *vd, uint64_t psize); typedef int vdev_io_start_func_t(zio_t *zio); typedef void vdev_io_done_func_t(zio_t *zio); typedef void vdev_state_change_func_t(vdev_t *vd, int, int); +typedef void vdev_hold_func_t(vdev_t *vd); +typedef void vdev_rele_func_t(vdev_t *vd); typedef struct vdev_ops { vdev_open_func_t *vdev_op_open; @@ -70,6 +72,8 @@ typedef struct vdev_ops { vdev_io_start_func_t *vdev_op_io_start; vdev_io_done_func_t *vdev_op_io_done; vdev_state_change_func_t *vdev_op_state_change; + vdev_hold_func_t *vdev_op_hold; + vdev_rele_func_t *vdev_op_rele; char vdev_op_type[16]; boolean_t vdev_op_leaf; } vdev_ops_t; @@ -121,6 +125,8 @@ struct vdev { vdev_ops_t *vdev_ops; /* vdev operations */ spa_t *vdev_spa; /* spa for this vdev */ void *vdev_tsd; /* type-specific data */ + vnode_t *vdev_name_vp; /* vnode for pathname */ + vnode_t *vdev_devid_vp; /* vnode for devid */ vdev_t *vdev_top; /* top-level vdev */ vdev_t *vdev_parent; /* parent vdev */ vdev_t **vdev_child; /* array of children */ diff --git a/usr/src/uts/common/fs/zfs/vdev.c b/usr/src/uts/common/fs/zfs/vdev.c index 3c043f2ec6..3e587aeca2 100644 --- a/usr/src/uts/common/fs/zfs/vdev.c +++ b/usr/src/uts/common/fs/zfs/vdev.c @@ -1420,6 +1420,35 @@ vdev_close(vdev_t *vd) vd->vdev_stat.vs_aux = VDEV_AUX_NONE; } +void +vdev_hold(vdev_t *vd) +{ + spa_t *spa = vd->vdev_spa; + + ASSERT(spa_is_root(spa)); + if (spa->spa_state == POOL_STATE_UNINITIALIZED) + return; + + for (int c = 0; c < vd->vdev_children; c++) + vdev_hold(vd->vdev_child[c]); + + if (vd->vdev_ops->vdev_op_leaf) + vd->vdev_ops->vdev_op_hold(vd); +} + +void +vdev_rele(vdev_t *vd) +{ + spa_t *spa = vd->vdev_spa; + + ASSERT(spa_is_root(spa)); + for (int c = 0; c < vd->vdev_children; c++) + vdev_rele(vd->vdev_child[c]); + + if (vd->vdev_ops->vdev_op_leaf) + vd->vdev_ops->vdev_op_rele(vd); +} + /* * Reopen all interior vdevs and any unopened leaves. We don't actually * reopen leaf vdevs which had previously been opened as they might deadlock diff --git a/usr/src/uts/common/fs/zfs/vdev_disk.c b/usr/src/uts/common/fs/zfs/vdev_disk.c index 08e28b2749..4a26c7e82b 100644 --- a/usr/src/uts/common/fs/zfs/vdev_disk.c +++ b/usr/src/uts/common/fs/zfs/vdev_disk.c @@ -19,12 +19,12 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #include <sys/zfs_context.h> -#include <sys/spa.h> +#include <sys/spa_impl.h> #include <sys/refcount.h> #include <sys/vdev_disk.h> #include <sys/vdev_impl.h> @@ -44,6 +44,65 @@ typedef struct vdev_disk_buf { zio_t *vdb_io; } vdev_disk_buf_t; +static void +vdev_disk_hold(vdev_t *vd) +{ + ddi_devid_t devid; + char *minor; + + ASSERT(spa_config_held(vd->vdev_spa, SCL_STATE, RW_WRITER)); + + /* + * We must have a pathname, and it must be absolute. + */ + if (vd->vdev_path == NULL || vd->vdev_path[0] != '/') + return; + + /* + * Only prefetch path and devid info if the device has + * never been opened. + */ + if (vd->vdev_tsd != NULL) + return; + + if (vd->vdev_wholedisk == -1ULL) { + size_t len = strlen(vd->vdev_path) + 3; + char *buf = kmem_alloc(len, KM_SLEEP); + + (void) snprintf(buf, len, "%ss0", vd->vdev_path); + + (void) ldi_vp_from_name(buf, &vd->vdev_name_vp); + kmem_free(buf, len); + } + + if (vd->vdev_name_vp == NULL) + (void) ldi_vp_from_name(vd->vdev_path, &vd->vdev_name_vp); + + if (vd->vdev_devid != NULL && + ddi_devid_str_decode(vd->vdev_devid, &devid, &minor) == 0) { + (void) ldi_vp_from_devid(devid, minor, &vd->vdev_devid_vp); + ddi_devid_str_free(minor); + ddi_devid_free(devid); + } +} + +static void +vdev_disk_rele(vdev_t *vd) +{ + ASSERT(spa_config_held(vd->vdev_spa, SCL_STATE, RW_WRITER)); + + if (vd->vdev_name_vp) { + VN_RELE_ASYNC(vd->vdev_name_vp, + dsl_pool_vnrele_taskq(vd->vdev_spa->spa_dsl_pool)); + vd->vdev_name_vp = NULL; + } + if (vd->vdev_devid_vp) { + VN_RELE_ASYNC(vd->vdev_devid_vp, + dsl_pool_vnrele_taskq(vd->vdev_spa->spa_dsl_pool)); + vd->vdev_devid_vp = NULL; + } +} + static int vdev_disk_open(vdev_t *vd, uint64_t *psize, uint64_t *ashift) { @@ -463,6 +522,8 @@ vdev_ops_t vdev_disk_ops = { vdev_disk_io_start, vdev_disk_io_done, NULL, + vdev_disk_hold, + vdev_disk_rele, VDEV_TYPE_DISK, /* name of this vdev type */ B_TRUE /* leaf vdev */ }; diff --git a/usr/src/uts/common/fs/zfs/vdev_file.c b/usr/src/uts/common/fs/zfs/vdev_file.c index 779e88edb9..c7a47487a6 100644 --- a/usr/src/uts/common/fs/zfs/vdev_file.c +++ b/usr/src/uts/common/fs/zfs/vdev_file.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -35,6 +35,18 @@ * Virtual device vector for files. */ +static void +vdev_file_hold(vdev_t *vd) +{ + ASSERT(vd->vdev_path != NULL); +} + +static void +vdev_file_rele(vdev_t *vd) +{ + ASSERT(vd->vdev_path != NULL); +} + static int vdev_file_open(vdev_t *vd, uint64_t *psize, uint64_t *ashift) { @@ -178,6 +190,8 @@ vdev_ops_t vdev_file_ops = { vdev_file_io_start, vdev_file_io_done, NULL, + vdev_file_hold, + vdev_file_rele, VDEV_TYPE_FILE, /* name of this vdev type */ B_TRUE /* leaf vdev */ }; @@ -194,6 +208,8 @@ vdev_ops_t vdev_disk_ops = { vdev_file_io_start, vdev_file_io_done, NULL, + vdev_file_hold, + vdev_file_rele, VDEV_TYPE_DISK, /* name of this vdev type */ B_TRUE /* leaf vdev */ }; diff --git a/usr/src/uts/common/fs/zfs/vdev_mirror.c b/usr/src/uts/common/fs/zfs/vdev_mirror.c index ac2a9b0f4d..698c0275d3 100644 --- a/usr/src/uts/common/fs/zfs/vdev_mirror.c +++ b/usr/src/uts/common/fs/zfs/vdev_mirror.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -452,6 +452,8 @@ vdev_ops_t vdev_mirror_ops = { vdev_mirror_io_start, vdev_mirror_io_done, vdev_mirror_state_change, + NULL, + NULL, VDEV_TYPE_MIRROR, /* name of this vdev type */ B_FALSE /* not a leaf vdev */ }; @@ -463,6 +465,8 @@ vdev_ops_t vdev_replacing_ops = { vdev_mirror_io_start, vdev_mirror_io_done, vdev_mirror_state_change, + NULL, + NULL, VDEV_TYPE_REPLACING, /* name of this vdev type */ B_FALSE /* not a leaf vdev */ }; @@ -474,6 +478,8 @@ vdev_ops_t vdev_spare_ops = { vdev_mirror_io_start, vdev_mirror_io_done, vdev_mirror_state_change, + NULL, + NULL, VDEV_TYPE_SPARE, /* name of this vdev type */ B_FALSE /* not a leaf vdev */ }; diff --git a/usr/src/uts/common/fs/zfs/vdev_missing.c b/usr/src/uts/common/fs/zfs/vdev_missing.c index e1bf7d86a3..6a5588d592 100644 --- a/usr/src/uts/common/fs/zfs/vdev_missing.c +++ b/usr/src/uts/common/fs/zfs/vdev_missing.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -80,6 +80,8 @@ vdev_ops_t vdev_missing_ops = { vdev_missing_io_start, vdev_missing_io_done, NULL, + NULL, + NULL, VDEV_TYPE_MISSING, /* name of this vdev type */ B_TRUE /* leaf vdev */ }; @@ -91,6 +93,8 @@ vdev_ops_t vdev_hole_ops = { vdev_missing_io_start, vdev_missing_io_done, NULL, + NULL, + NULL, VDEV_TYPE_HOLE, /* name of this vdev type */ B_TRUE /* leaf vdev */ }; diff --git a/usr/src/uts/common/fs/zfs/vdev_raidz.c b/usr/src/uts/common/fs/zfs/vdev_raidz.c index aa031dd25b..30415c8abb 100644 --- a/usr/src/uts/common/fs/zfs/vdev_raidz.c +++ b/usr/src/uts/common/fs/zfs/vdev_raidz.c @@ -2139,6 +2139,8 @@ vdev_ops_t vdev_raidz_ops = { vdev_raidz_io_start, vdev_raidz_io_done, vdev_raidz_state_change, + NULL, + NULL, VDEV_TYPE_RAIDZ, /* name of this vdev type */ B_FALSE /* not a leaf vdev */ }; diff --git a/usr/src/uts/common/fs/zfs/vdev_root.c b/usr/src/uts/common/fs/zfs/vdev_root.c index 524c8e6060..879f78f3a5 100644 --- a/usr/src/uts/common/fs/zfs/vdev_root.c +++ b/usr/src/uts/common/fs/zfs/vdev_root.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -109,6 +109,8 @@ vdev_ops_t vdev_root_ops = { NULL, /* io_start - not applicable to the root */ NULL, /* io_done - not applicable to the root */ vdev_root_state_change, + NULL, + NULL, VDEV_TYPE_ROOT, /* name of this vdev type */ B_FALSE /* not a leaf vdev */ }; diff --git a/usr/src/uts/common/os/driver_lyr.c b/usr/src/uts/common/os/driver_lyr.c index fafb02ad89..a669c68336 100644 --- a/usr/src/uts/common/os/driver_lyr.c +++ b/usr/src/uts/common/os/driver_lyr.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -533,7 +533,7 @@ ldi_vp_from_dev(dev_t dev, int otyp, vnode_t **vpp) } /* get a vnode to a device by pathname */ -static int +int ldi_vp_from_name(char *path, vnode_t **vpp) { vnode_t *vp = NULL; @@ -668,7 +668,7 @@ ldi_devid_match(ddi_devid_t devid, dev_info_t *dip, dev_t dev) } /* get a handle to a device by devid and minor name */ -static int +int ldi_vp_from_devid(ddi_devid_t devid, char *minor_name, vnode_t **vpp) { dev_info_t *dip; diff --git a/usr/src/uts/common/sys/sunldi.h b/usr/src/uts/common/sys/sunldi.h index 71e9d9a7da..2a27a03ef4 100644 --- a/usr/src/uts/common/sys/sunldi.h +++ b/usr/src/uts/common/sys/sunldi.h @@ -19,15 +19,13 @@ * CDDL HEADER END */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #ifndef _SYS_SUNLDI_H #define _SYS_SUNLDI_H -#pragma ident "%Z%%M% %I% %E% SMI" - #include <sys/modctl.h> #include <sys/stream.h> #include <sys/open.h> @@ -101,6 +99,8 @@ extern int ldi_open_by_name(char *, int, cred_t *, ldi_handle_t *, ldi_ident_t); extern int ldi_open_by_devid(ddi_devid_t, char *, int, cred_t *, ldi_handle_t *, ldi_ident_t); +extern int ldi_vp_from_name(char *, vnode_t **); +extern int ldi_vp_from_devid(ddi_devid_t, char *, vnode_t **); extern int ldi_close(ldi_handle_t, int flag, cred_t *); |