diff options
author | eschrock <none@none> | 2006-05-16 11:20:11 -0700 |
---|---|---|
committer | eschrock <none@none> | 2006-05-16 11:20:11 -0700 |
commit | 560e6e964ab70b838deafc93f6d9da97f690ec25 (patch) | |
tree | 7c3288ecf822441f65e8af0700d62fbce4250bc4 /usr/src | |
parent | 870e5f5a88924152aec599e51ebb5ebbb899560e (diff) | |
download | illumos-gate-560e6e964ab70b838deafc93f6d9da97f690ec25.tar.gz |
6424405 zpool import destroyed_pool can damage existing pool using same devices
Diffstat (limited to 'usr/src')
-rw-r--r-- | usr/src/uts/common/fs/zfs/spa.c | 40 | ||||
-rw-r--r-- | usr/src/uts/common/fs/zfs/sys/vdev.h | 1 | ||||
-rw-r--r-- | usr/src/uts/common/fs/zfs/sys/vdev_impl.h | 3 | ||||
-rw-r--r-- | usr/src/uts/common/fs/zfs/vdev.c | 225 |
4 files changed, 135 insertions, 134 deletions
diff --git a/usr/src/uts/common/fs/zfs/spa.c b/usr/src/uts/common/fs/zfs/spa.c index 8de9585e2d..2fe82c2e80 100644 --- a/usr/src/uts/common/fs/zfs/spa.c +++ b/usr/src/uts/common/fs/zfs/spa.c @@ -313,6 +313,25 @@ spa_load(spa_t *spa, nvlist_t *config, spa_load_state_t state, int mosconfig) } /* + * Validate the labels for all leaf vdevs. We need to grab the config + * lock because all label I/O is done with the ZIO_FLAG_CONFIG_HELD + * flag. + */ + spa_config_enter(spa, RW_READER, FTAG); + error = vdev_validate(rvd); + spa_config_exit(spa, FTAG); + + if (error != 0) { + error = EBADF; + goto out; + } + + if (rvd->vdev_state <= VDEV_STATE_CANT_OPEN) { + error = ENXIO; + goto out; + } + + /* * Find the best uberblock. */ bzero(ub, sizeof (uberblock_t)); @@ -444,16 +463,9 @@ spa_load(spa_t *spa, nvlist_t *config, spa_load_state_t state, int mosconfig) } /* - * Load the vdev state for all top level vdevs. We need to grab the - * config lock because all label I/O is done with the - * ZIO_FLAG_CONFIG_HELD flag. + * Load the vdev state for all toplevel vdevs. */ - spa_config_enter(spa, RW_READER, FTAG); - error = vdev_load(rvd); - spa_config_exit(spa, FTAG); - - if (error) - goto out; + vdev_load(rvd); /* * Propagate the leaf DTLs we just loaded all the way up the tree. @@ -569,11 +581,11 @@ spa_open_common(const char *pool, spa_t **spapp, void *tag, nvlist_t **config) if (error == EBADF) { /* - * If vdev_load() returns EBADF, it indicates that one - * of the vdevs indicates that the pool has been - * exported or destroyed. If this is the case, the - * config cache is out of sync and we should remove the - * pool from the namespace. + * If vdev_validate() returns failure (indicated by + * EBADF), it indicates that one of the vdevs indicates + * that the pool has been exported or destroyed. If + * this is the case, the config cache is out of sync and + * we should remove the pool from the namespace. */ spa_unload(spa); spa_deactivate(spa); diff --git a/usr/src/uts/common/fs/zfs/sys/vdev.h b/usr/src/uts/common/fs/zfs/sys/vdev.h index c8d5db50f5..5a2e6750a0 100644 --- a/usr/src/uts/common/fs/zfs/sys/vdev.h +++ b/usr/src/uts/common/fs/zfs/sys/vdev.h @@ -58,6 +58,7 @@ typedef struct vdev_knob { #define VDEV_FAULT_COUNT 2 extern int vdev_open(vdev_t *); +extern int vdev_validate(vdev_t *); extern void vdev_close(vdev_t *); extern int vdev_create(vdev_t *, uint64_t txg); extern void vdev_init(vdev_t *, uint64_t txg); diff --git a/usr/src/uts/common/fs/zfs/sys/vdev_impl.h b/usr/src/uts/common/fs/zfs/sys/vdev_impl.h index 9ea2ca5373..1b18df8cda 100644 --- a/usr/src/uts/common/fs/zfs/sys/vdev_impl.h +++ b/usr/src/uts/common/fs/zfs/sys/vdev_impl.h @@ -128,6 +128,7 @@ struct vdev { uint64_t vdev_asize; /* allocatable device capacity */ uint64_t vdev_ashift; /* block alignment shift */ uint64_t vdev_state; /* see VDEV_STATE_* #defines */ + uint64_t vdev_prevstate; /* used when reopening a vdev */ vdev_ops_t *vdev_ops; /* vdev operations */ spa_t *vdev_spa; /* spa for this vdev */ void *vdev_tsd; /* type-specific data */ @@ -264,7 +265,7 @@ extern void vdev_remove_parent(vdev_t *cvd); /* * vdev sync load and sync */ -extern int vdev_load(vdev_t *vd); +extern void vdev_load(vdev_t *vd); extern void vdev_sync(vdev_t *vd, uint64_t txg); extern void vdev_sync_done(vdev_t *vd, uint64_t txg); extern void vdev_dirty(vdev_t *vd, int flags, void *arg, uint64_t txg); diff --git a/usr/src/uts/common/fs/zfs/vdev.c b/usr/src/uts/common/fs/zfs/vdev.c index 1914d8d903..726852cb4d 100644 --- a/usr/src/uts/common/fs/zfs/vdev.c +++ b/usr/src/uts/common/fs/zfs/vdev.c @@ -801,13 +801,6 @@ vdev_open(vdev_t *vd) } /* - * If we were able to open a vdev that was marked permanently - * unavailable, clear that state now. - */ - if (vd->vdev_not_present) - vd->vdev_not_present = 0; - - /* * This allows the ZFS DE to close cases appropriately. If a device * goes away and later returns, we want to close the associated case. * But it's not enough to simply post this only when a device goes from @@ -823,6 +816,79 @@ vdev_open(vdev_t *vd) } /* + * Called once the vdevs are all opened, this routine validates the label + * contents. This needs to be done before vdev_load() so that we don't + * inadvertently do repair I/Os to the wrong device, and so that vdev_reopen() + * won't succeed if the device has been changed underneath. + * + * This function will only return failure if one of the vdevs indicates that it + * has since been destroyed or exported. This is only possible if + * /etc/zfs/zpool.cache was readonly at the time. Otherwise, the vdev state + * will be updated but the function will return 0. + */ +int +vdev_validate(vdev_t *vd) +{ + spa_t *spa = vd->vdev_spa; + int c; + nvlist_t *label; + uint64_t guid; + uint64_t state; + + for (c = 0; c < vd->vdev_children; c++) + if (vdev_validate(vd->vdev_child[c]) != 0) + return (-1); + + if (vd->vdev_ops->vdev_op_leaf) { + + if ((label = vdev_label_read_config(vd)) == NULL) { + vdev_set_state(vd, B_TRUE, VDEV_STATE_CANT_OPEN, + VDEV_AUX_BAD_LABEL); + return (0); + } + + if (nvlist_lookup_uint64(label, ZPOOL_CONFIG_POOL_GUID, + &guid) != 0 || guid != spa_guid(spa)) { + vdev_set_state(vd, B_FALSE, VDEV_STATE_CANT_OPEN, + VDEV_AUX_CORRUPT_DATA); + nvlist_free(label); + return (0); + } + + if (nvlist_lookup_uint64(label, ZPOOL_CONFIG_GUID, + &guid) != 0 || guid != vd->vdev_guid) { + vdev_set_state(vd, B_FALSE, VDEV_STATE_CANT_OPEN, + VDEV_AUX_CORRUPT_DATA); + nvlist_free(label); + return (0); + } + + if (nvlist_lookup_uint64(label, ZPOOL_CONFIG_POOL_STATE, + &state) != 0) { + vdev_set_state(vd, B_FALSE, VDEV_STATE_CANT_OPEN, + VDEV_AUX_CORRUPT_DATA); + nvlist_free(label); + return (0); + } + + nvlist_free(label); + + if (spa->spa_load_state == SPA_LOAD_OPEN && + state != POOL_STATE_ACTIVE) + return (-1); + } + + /* + * If we were able to open and validate a vdev that was previously + * marked permanently unavailable, clear that state now. + */ + if (vd->vdev_not_present) + vd->vdev_not_present = 0; + + return (0); +} + +/* * Close a virtual device. */ void @@ -836,6 +902,13 @@ vdev_close(vdev_t *vd) vd->vdev_cache_active = B_FALSE; } + /* + * We record the previous state before we close it, so that if we are + * doing a reopen(), we don't generate FMA ereports if we notice that + * it's still faulted. + */ + vd->vdev_prevstate = vd->vdev_state; + if (vd->vdev_offline) vd->vdev_state = VDEV_STATE_OFFLINE; else @@ -1101,125 +1174,32 @@ vdev_dtl_sync(vdev_t *vd, uint64_t txg) dmu_tx_commit(tx); } -int +void vdev_load(vdev_t *vd) { - spa_t *spa = vd->vdev_spa; - int c, error; - nvlist_t *label; - uint64_t guid, state; - - dprintf("loading %s\n", vdev_description(vd)); + int c; /* * Recursively load all children. */ for (c = 0; c < vd->vdev_children; c++) - if ((error = vdev_load(vd->vdev_child[c])) != 0) - return (error); - - /* - * If this is a leaf vdev, make sure its agrees with its disk labels. - */ - if (vd->vdev_ops->vdev_op_leaf) { - - if (vdev_is_dead(vd)) - return (0); - - /* - * XXX state transitions don't propagate to parent here. - * Also, merely setting the state isn't sufficient because - * it's not persistent; a vdev_reopen() would make us - * forget all about it. - */ - if ((label = vdev_label_read_config(vd)) == NULL) { - dprintf("can't load label config\n"); - vdev_set_state(vd, B_FALSE, VDEV_STATE_CANT_OPEN, - VDEV_AUX_CORRUPT_DATA); - return (0); - } - - if (nvlist_lookup_uint64(label, ZPOOL_CONFIG_POOL_GUID, - &guid) != 0 || guid != spa_guid(spa)) { - dprintf("bad or missing pool GUID (%llu)\n", guid); - vdev_set_state(vd, B_FALSE, VDEV_STATE_CANT_OPEN, - VDEV_AUX_CORRUPT_DATA); - nvlist_free(label); - return (0); - } - - if (nvlist_lookup_uint64(label, ZPOOL_CONFIG_GUID, &guid) || - guid != vd->vdev_guid) { - dprintf("bad or missing vdev guid (%llu != %llu)\n", - guid, vd->vdev_guid); - vdev_set_state(vd, B_FALSE, VDEV_STATE_CANT_OPEN, - VDEV_AUX_CORRUPT_DATA); - nvlist_free(label); - return (0); - } - - /* - * If we find a vdev with a matching pool guid and vdev guid, - * but the pool state is not active, it indicates that the user - * exported or destroyed the pool without affecting the config - * cache (if / was mounted readonly, for example). In this - * case, immediately return EBADF so the caller can remove it - * from the config. - */ - if (nvlist_lookup_uint64(label, ZPOOL_CONFIG_POOL_STATE, - &state)) { - dprintf("missing pool state\n"); - vdev_set_state(vd, B_FALSE, VDEV_STATE_CANT_OPEN, - VDEV_AUX_CORRUPT_DATA); - nvlist_free(label); - return (0); - } - - if (state != POOL_STATE_ACTIVE && - (spa->spa_load_state == SPA_LOAD_OPEN || - (state != POOL_STATE_EXPORTED && - state != POOL_STATE_DESTROYED))) { - dprintf("pool state not active (%llu)\n", state); - nvlist_free(label); - return (EBADF); - } - - nvlist_free(label); - } + vdev_load(vd->vdev_child[c]); /* * If this is a top-level vdev, initialize its metaslabs. */ - if (vd == vd->vdev_top) { - - if (vd->vdev_ashift == 0 || vd->vdev_asize == 0) { - vdev_set_state(vd, B_FALSE, VDEV_STATE_CANT_OPEN, - VDEV_AUX_CORRUPT_DATA); - return (0); - } - - if ((error = vdev_metaslab_init(vd, 0)) != 0) { - vdev_set_state(vd, B_FALSE, VDEV_STATE_CANT_OPEN, - VDEV_AUX_CORRUPT_DATA); - return (0); - } - } + if (vd == vd->vdev_top && + (vd->vdev_ashift == 0 || vd->vdev_asize == 0 || + vdev_metaslab_init(vd, 0) != 0)) + vdev_set_state(vd, B_FALSE, VDEV_STATE_CANT_OPEN, + VDEV_AUX_CORRUPT_DATA); /* * If this is a leaf vdev, load its DTL. */ - if (vd->vdev_ops->vdev_op_leaf) { - error = vdev_dtl_load(vd); - if (error) { - dprintf("can't load DTL for %s, error %d\n", - vdev_description(vd), error); - vdev_set_state(vd, B_FALSE, VDEV_STATE_CANT_OPEN, - VDEV_AUX_CORRUPT_DATA); - return (0); - } - } - - return (0); + if (vd->vdev_ops->vdev_op_leaf && vdev_dtl_load(vd) != 0) + vdev_set_state(vd, B_FALSE, VDEV_STATE_CANT_OPEN, + VDEV_AUX_CORRUPT_DATA); } void @@ -1770,14 +1750,14 @@ vdev_propagate_state(vdev_t *vd) void vdev_set_state(vdev_t *vd, boolean_t isopen, vdev_state_t state, vdev_aux_t aux) { - uint64_t prev_state; + uint64_t save_state; if (state == vd->vdev_state) { vd->vdev_stat.vs_aux = aux; return; } - prev_state = vd->vdev_state; + save_state = vd->vdev_state; vd->vdev_state = state; vd->vdev_stat.vs_aux = aux; @@ -1789,7 +1769,18 @@ vdev_set_state(vdev_t *vd, boolean_t isopen, vdev_state_t state, vdev_aux_t aux) * begin with. Failure to open such a device is not considered * an error. */ - if (!vd->vdev_not_present && + if (vd->vdev_spa->spa_load_state == SPA_LOAD_IMPORT && + vd->vdev_ops->vdev_op_leaf) + vd->vdev_not_present = 1; + + /* + * Post the appropriate ereport. If the 'prevstate' field is + * set to something other than VDEV_STATE_UNKNOWN, it indicates + * that this is part of a vdev_reopen(). In this case, we don't + * want to post the ereport if the device was already in the + * CANT_OPEN state beforehand. + */ + if (vd->vdev_prevstate != state && !vd->vdev_not_present && vd != vd->vdev_spa->spa_root_vdev) { const char *class; @@ -1817,12 +1808,8 @@ vdev_set_state(vdev_t *vd, boolean_t isopen, vdev_state_t state, vdev_aux_t aux) } zfs_ereport_post(class, vd->vdev_spa, - vd, NULL, prev_state, 0); + vd, NULL, save_state, 0); } - - if (vd->vdev_spa->spa_load_state == SPA_LOAD_IMPORT && - vd->vdev_ops->vdev_op_leaf) - vd->vdev_not_present = 1; } if (isopen) |