summaryrefslogtreecommitdiff
path: root/usr/src/uts/common/fs/zfs/vdev.c
diff options
context:
space:
mode:
Diffstat (limited to 'usr/src/uts/common/fs/zfs/vdev.c')
-rw-r--r--usr/src/uts/common/fs/zfs/vdev.c110
1 files changed, 81 insertions, 29 deletions
diff --git a/usr/src/uts/common/fs/zfs/vdev.c b/usr/src/uts/common/fs/zfs/vdev.c
index 01e892f4c4..9773ec7960 100644
--- a/usr/src/uts/common/fs/zfs/vdev.c
+++ b/usr/src/uts/common/fs/zfs/vdev.c
@@ -27,6 +27,7 @@
* Copyright 2016 Toomas Soome <tsoome@me.com>
* Copyright 2019 Joyent, Inc.
* Copyright (c) 2017, Intel Corporation.
+ * Copyright (c) 2019, Datto Inc. All rights reserved.
*/
#include <sys/zfs_context.h>
@@ -98,6 +99,12 @@ boolean_t vdev_validate_skip = B_FALSE;
int zfs_vdev_dtl_sm_blksz = (1 << 12);
/*
+ * Ignore errors during scrub/resilver. Allows to work around resilver
+ * upon import when there are pool errors.
+ */
+int zfs_scan_ignore_errors = 0;
+
+/*
* vdev-wide space maps that have lots of entries written to them at
* the end of each transaction can benefit from a higher I/O bandwidth
* (e.g. vdev_obsolete_sm), thus we default their block size to 128K.
@@ -772,7 +779,7 @@ vdev_alloc(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, uint_t id,
&vd->vdev_resilver_txg);
if (nvlist_exists(nv, ZPOOL_CONFIG_RESILVER_DEFER))
- vdev_set_deferred_resilver(spa, vd);
+ vdev_defer_resilver(vd);
/*
* When importing a pool, we want to ignore the persistent fault
@@ -1764,18 +1771,12 @@ vdev_open(vdev_t *vd)
}
/*
- * If a leaf vdev has a DTL, and seems healthy, then kick off a
- * resilver. But don't do this if we are doing a reopen for a scrub,
- * since this would just restart the scrub we are already doing.
+ * If this is a leaf vdev, assess whether a resilver is needed.
+ * But don't do this if we are doing a reopen for a scrub, since
+ * this would just restart the scrub we are already doing.
*/
- if (vd->vdev_ops->vdev_op_leaf && !spa->spa_scrub_reopen &&
- vdev_resilver_needed(vd, NULL, NULL)) {
- if (dsl_scan_resilvering(spa->spa_dsl_pool) &&
- spa_feature_is_enabled(spa, SPA_FEATURE_RESILVER_DEFER))
- vdev_set_deferred_resilver(spa, vd);
- else
- spa_async_request(spa, SPA_ASYNC_RESILVER);
- }
+ if (vd->vdev_ops->vdev_op_leaf && !spa->spa_scrub_reopen)
+ dsl_scan_assess_vdev(spa->spa_dsl_pool, vd);
return (0);
}
@@ -2470,7 +2471,6 @@ vdev_dtl_should_excise(vdev_t *vd)
spa_t *spa = vd->vdev_spa;
dsl_scan_t *scn = spa->spa_dsl_pool->dp_scan;
- ASSERT0(scn->scn_phys.scn_errors);
ASSERT0(vd->vdev_children);
if (vd->vdev_state < VDEV_STATE_DEGRADED)
@@ -2520,10 +2520,29 @@ vdev_dtl_reassess(vdev_t *vd, uint64_t txg, uint64_t scrub_txg, int scrub_done)
if (vd->vdev_ops->vdev_op_leaf) {
dsl_scan_t *scn = spa->spa_dsl_pool->dp_scan;
+ boolean_t wasempty = B_TRUE;
mutex_enter(&vd->vdev_dtl_lock);
/*
+ * If requested, pretend the scan completed cleanly.
+ */
+ if (zfs_scan_ignore_errors && scn)
+ scn->scn_phys.scn_errors = 0;
+
+ if (scrub_txg != 0 &&
+ !range_tree_is_empty(vd->vdev_dtl[DTL_MISSING])) {
+ wasempty = B_FALSE;
+ zfs_dbgmsg("guid:%llu txg:%llu scrub:%llu started:%d "
+ "dtl:%llu/%llu errors:%llu",
+ (u_longlong_t)vd->vdev_guid, (u_longlong_t)txg,
+ (u_longlong_t)scrub_txg, spa->spa_scrub_started,
+ (u_longlong_t)vdev_dtl_min(vd),
+ (u_longlong_t)vdev_dtl_max(vd),
+ (u_longlong_t)(scn ? scn->scn_phys.scn_errors : 0));
+ }
+
+ /*
* If we've completed a scan cleanly then determine
* if this vdev should remove any DTLs. We only want to
* excise regions on vdevs that were available during
@@ -2559,6 +2578,14 @@ vdev_dtl_reassess(vdev_t *vd, uint64_t txg, uint64_t scrub_txg, int scrub_done)
space_reftree_generate_map(&reftree,
vd->vdev_dtl[DTL_MISSING], 1);
space_reftree_destroy(&reftree);
+
+ if (!range_tree_is_empty(vd->vdev_dtl[DTL_MISSING])) {
+ zfs_dbgmsg("update DTL_MISSING:%llu/%llu",
+ (u_longlong_t)vdev_dtl_min(vd),
+ (u_longlong_t)vdev_dtl_max(vd));
+ } else if (!wasempty) {
+ zfs_dbgmsg("DTL_MISSING is now empty");
+ }
}
range_tree_vacate(vd->vdev_dtl[DTL_PARTIAL], NULL, NULL);
range_tree_walk(vd->vdev_dtl[DTL_MISSING],
@@ -3543,14 +3570,11 @@ vdev_clear(spa_t *spa, vdev_t *vd)
if (vd != rvd && vdev_writeable(vd->vdev_top))
vdev_state_dirty(vd->vdev_top);
- if (vd->vdev_aux == NULL && !vdev_is_dead(vd)) {
- if (dsl_scan_resilvering(spa->spa_dsl_pool) &&
- spa_feature_is_enabled(spa,
- SPA_FEATURE_RESILVER_DEFER))
- vdev_set_deferred_resilver(spa, vd);
- else
- spa_async_request(spa, SPA_ASYNC_RESILVER);
- }
+ /* If a resilver isn't required, check if vdevs can be culled */
+ if (vd->vdev_aux == NULL && !vdev_is_dead(vd) &&
+ !dsl_scan_resilvering(spa->spa_dsl_pool) &&
+ !dsl_scan_resilver_scheduled(spa->spa_dsl_pool))
+ spa_async_request(spa, SPA_ASYNC_RESILVER_DONE);
spa_event_notify(spa, vd, NULL, ESC_ZFS_VDEV_CLEAR);
}
@@ -4559,18 +4583,46 @@ vdev_deadman(vdev_t *vd)
}
void
-vdev_set_deferred_resilver(spa_t *spa, vdev_t *vd)
+vdev_defer_resilver(vdev_t *vd)
{
- for (uint64_t i = 0; i < vd->vdev_children; i++)
- vdev_set_deferred_resilver(spa, vd->vdev_child[i]);
+ ASSERT(vd->vdev_ops->vdev_op_leaf);
- if (!vd->vdev_ops->vdev_op_leaf || !vdev_writeable(vd) ||
- range_tree_is_empty(vd->vdev_dtl[DTL_MISSING])) {
- return;
+ vd->vdev_resilver_deferred = B_TRUE;
+ vd->vdev_spa->spa_resilver_deferred = B_TRUE;
+}
+
+/*
+ * Clears the resilver deferred flag on all leaf devs under vd. Returns
+ * B_TRUE if we have devices that need to be resilvered and are available to
+ * accept resilver I/Os.
+ */
+boolean_t
+vdev_clear_resilver_deferred(vdev_t *vd, dmu_tx_t *tx)
+{
+ boolean_t resilver_needed = B_FALSE;
+ spa_t *spa = vd->vdev_spa;
+
+ for (int c = 0; c < vd->vdev_children; c++) {
+ vdev_t *cvd = vd->vdev_child[c];
+ resilver_needed |= vdev_clear_resilver_deferred(cvd, tx);
}
- vd->vdev_resilver_deferred = B_TRUE;
- spa->spa_resilver_deferred = B_TRUE;
+ if (vd == spa->spa_root_vdev &&
+ spa_feature_is_active(spa, SPA_FEATURE_RESILVER_DEFER)) {
+ spa_feature_decr(spa, SPA_FEATURE_RESILVER_DEFER, tx);
+ vdev_config_dirty(vd);
+ spa->spa_resilver_deferred = B_FALSE;
+ return (resilver_needed);
+ }
+
+ if (!vdev_is_concrete(vd) || vd->vdev_aux ||
+ !vd->vdev_ops->vdev_op_leaf)
+ return (resilver_needed);
+
+ vd->vdev_resilver_deferred = B_FALSE;
+
+ return (!vdev_is_dead(vd) && !vd->vdev_offline &&
+ vdev_resilver_needed(vd, NULL, NULL));
}
/*