summaryrefslogtreecommitdiff
path: root/usr/src/uts/common/fs
diff options
context:
space:
mode:
authorJohn Levon <john.levon@joyent.com>2020-05-26 13:57:13 +0000
committerJohn Levon <john.levon@joyent.com>2020-05-26 13:57:13 +0000
commit5b2acc0949194447bba6e45a0fa44d0b5f42f208 (patch)
tree7ea9eb87bc68fee386dd39035ce715e87a0e673c /usr/src/uts/common/fs
parent8ca018083101bf1cb175869679bc123187fb1bab (diff)
parent2a1277d3064386cd5c4e372301007aa330bf1d5e (diff)
downloadillumos-joyent-gcc9.tar.gz
mergegcc9
Diffstat (limited to 'usr/src/uts/common/fs')
-rw-r--r--usr/src/uts/common/fs/zfs/dsl_scan.c131
-rw-r--r--usr/src/uts/common/fs/zfs/spa.c14
-rw-r--r--usr/src/uts/common/fs/zfs/sys/dsl_scan.h6
-rw-r--r--usr/src/uts/common/fs/zfs/sys/spa.h3
-rw-r--r--usr/src/uts/common/fs/zfs/sys/vdev.h3
-rw-r--r--usr/src/uts/common/fs/zfs/sys/zfs_ioctl.h5
-rw-r--r--usr/src/uts/common/fs/zfs/vdev.c110
-rw-r--r--usr/src/uts/common/fs/zfs/zio_inject.c36
8 files changed, 214 insertions, 94 deletions
diff --git a/usr/src/uts/common/fs/zfs/dsl_scan.c b/usr/src/uts/common/fs/zfs/dsl_scan.c
index b619719ba9..fa7b9fb2fc 100644
--- a/usr/src/uts/common/fs/zfs/dsl_scan.c
+++ b/usr/src/uts/common/fs/zfs/dsl_scan.c
@@ -24,7 +24,7 @@
* Copyright 2016 Gary Mills
* Copyright (c) 2011, 2017 by Delphix. All rights reserved.
* Copyright 2019 Joyent, Inc.
- * Copyright (c) 2017 Datto Inc.
+ * Copyright (c) 2017, 2019, Datto Inc. All rights reserved.
*/
#include <sys/dsl_scan.h>
@@ -549,6 +549,22 @@ dsl_scan_init(dsl_pool_t *dp, uint64_t txg)
zfs_dbgmsg("new-style scrub was modified "
"by old software; restarting in txg %llu",
(longlong_t)scn->scn_restart_txg);
+ } else if (dsl_scan_resilvering(dp)) {
+ /*
+ * If a resilver is in progress and there are already
+ * errors, restart it instead of finishing this scan and
+ * then restarting it. If there haven't been any errors
+ * then remember that the incore DTL is valid.
+ */
+ if (scn->scn_phys.scn_errors > 0) {
+ scn->scn_restart_txg = txg;
+ zfs_dbgmsg("resilver can't excise DTL_MISSING "
+ "when finished; restarting in txg %llu",
+ (u_longlong_t)scn->scn_restart_txg);
+ } else {
+ /* it's safe to excise DTL when finished */
+ spa->spa_scrub_started = B_TRUE;
+ }
}
}
@@ -599,6 +615,13 @@ dsl_scan_restarting(dsl_scan_t *scn, dmu_tx_t *tx)
}
boolean_t
+dsl_scan_resilver_scheduled(dsl_pool_t *dp)
+{
+ return ((dp->dp_scan && dp->dp_scan->scn_restart_txg != 0) ||
+ (spa_async_tasks(dp->dp_spa) & SPA_ASYNC_RESILVER));
+}
+
+boolean_t
dsl_scan_scrubbing(const dsl_pool_t *dp)
{
dsl_scan_phys_t *scn_phys = &dp->dp_scan->scn_phys;
@@ -794,7 +817,7 @@ dsl_scan(dsl_pool_t *dp, pool_scan_func_t func)
(void) spa_vdev_state_exit(spa, NULL, 0);
if (func == POOL_SCAN_RESILVER) {
- dsl_resilver_restart(spa->spa_dsl_pool, 0);
+ dsl_scan_restart_resilver(spa->spa_dsl_pool, 0);
return (0);
}
@@ -813,41 +836,6 @@ dsl_scan(dsl_pool_t *dp, pool_scan_func_t func)
dsl_scan_setup_sync, &func, 0, ZFS_SPACE_CHECK_EXTRA_RESERVED));
}
-/*
- * Sets the resilver defer flag to B_FALSE on all leaf devs under vd. Returns
- * B_TRUE if we have devices that need to be resilvered and are available to
- * accept resilver I/Os.
- */
-static boolean_t
-dsl_scan_clear_deferred(vdev_t *vd, dmu_tx_t *tx)
-{
- boolean_t resilver_needed = B_FALSE;
- spa_t *spa = vd->vdev_spa;
-
- for (int c = 0; c < vd->vdev_children; c++) {
- resilver_needed |=
- dsl_scan_clear_deferred(vd->vdev_child[c], tx);
- }
-
- if (vd == spa->spa_root_vdev &&
- spa_feature_is_active(spa, SPA_FEATURE_RESILVER_DEFER)) {
- spa_feature_decr(spa, SPA_FEATURE_RESILVER_DEFER, tx);
- vdev_config_dirty(vd);
- spa->spa_resilver_deferred = B_FALSE;
- return (resilver_needed);
- }
-
- if (!vdev_is_concrete(vd) || vd->vdev_aux ||
- !vd->vdev_ops->vdev_op_leaf)
- return (resilver_needed);
-
- if (vd->vdev_resilver_deferred)
- vd->vdev_resilver_deferred = B_FALSE;
-
- return (!vdev_is_dead(vd) && !vd->vdev_offline &&
- vdev_resilver_needed(vd, NULL, NULL));
-}
-
/* ARGSUSED */
static void
dsl_scan_done(dsl_scan_t *scn, boolean_t complete, dmu_tx_t *tx)
@@ -915,7 +903,6 @@ dsl_scan_done(dsl_scan_t *scn, boolean_t complete, dmu_tx_t *tx)
"errors=%llu", spa_get_errlog_size(spa));
if (DSL_SCAN_IS_SCRUB_RESILVER(scn)) {
- spa->spa_scrub_started = B_FALSE;
spa->spa_scrub_active = B_FALSE;
/*
@@ -943,30 +930,33 @@ dsl_scan_done(dsl_scan_t *scn, boolean_t complete, dmu_tx_t *tx)
spa_errlog_rotate(spa);
/*
+ * Don't clear flag until after vdev_dtl_reassess to ensure that
+ * DTL_MISSING will get updated when possible.
+ */
+ spa->spa_scrub_started = B_FALSE;
+
+ /*
* We may have finished replacing a device.
* Let the async thread assess this and handle the detach.
*/
spa_async_request(spa, SPA_ASYNC_RESILVER_DONE);
/*
- * Clear any deferred_resilver flags in the config.
+ * Clear any resilver_deferred flags in the config.
* If there are drives that need resilvering, kick
* off an asynchronous request to start resilver.
- * dsl_scan_clear_deferred() may update the config
+ * vdev_clear_resilver_deferred() may update the config
* before the resilver can restart. In the event of
* a crash during this period, the spa loading code
* will find the drives that need to be resilvered
- * when the machine reboots and start the resilver then.
+ * and start the resilver then.
*/
- if (spa_feature_is_enabled(spa, SPA_FEATURE_RESILVER_DEFER)) {
- boolean_t resilver_needed =
- dsl_scan_clear_deferred(spa->spa_root_vdev, tx);
- if (resilver_needed) {
- spa_history_log_internal(spa,
- "starting deferred resilver", tx,
- "errors=%llu", spa_get_errlog_size(spa));
- spa_async_request(spa, SPA_ASYNC_RESILVER);
- }
+ if (spa_feature_is_enabled(spa, SPA_FEATURE_RESILVER_DEFER) &&
+ vdev_clear_resilver_deferred(spa->spa_root_vdev, tx)) {
+ spa_history_log_internal(spa,
+ "starting deferred resilver", tx, "errors=%llu",
+ (u_longlong_t)spa_get_errlog_size(spa));
+ spa_async_request(spa, SPA_ASYNC_RESILVER);
}
}
@@ -1073,7 +1063,7 @@ dsl_scrub_set_pause_resume(const dsl_pool_t *dp, pool_scrub_cmd_t cmd)
/* start a new scan, or restart an existing one. */
void
-dsl_resilver_restart(dsl_pool_t *dp, uint64_t txg)
+dsl_scan_restart_resilver(dsl_pool_t *dp, uint64_t txg)
{
if (txg == 0) {
dmu_tx_t *tx;
@@ -1221,10 +1211,13 @@ scan_ds_queue_sync(dsl_scan_t *scn, dmu_tx_t *tx)
static boolean_t
dsl_scan_should_clear(dsl_scan_t *scn)
{
+ spa_t *spa = scn->scn_dp->dp_spa;
vdev_t *rvd = scn->scn_dp->dp_spa->spa_root_vdev;
- uint64_t mlim_hard, mlim_soft, mused;
- uint64_t alloc = metaslab_class_get_alloc(spa_normal_class(
- scn->scn_dp->dp_spa));
+ uint64_t alloc, mlim_hard, mlim_soft, mused;
+
+ alloc = metaslab_class_get_alloc(spa_normal_class(spa));
+ alloc += metaslab_class_get_alloc(spa_special_class(spa));
+ alloc += metaslab_class_get_alloc(spa_dedup_class(spa));
mlim_hard = MAX((physmem / zfs_scan_mem_lim_fact) * PAGESIZE,
zfs_scan_mem_lim_min);
@@ -4208,3 +4201,33 @@ dsl_scan_freed(spa_t *spa, const blkptr_t *bp)
for (int i = 0; i < BP_GET_NDVAS(bp); i++)
dsl_scan_freed_dva(spa, bp, i);
}
+
+/*
+ * Check if a vdev needs resilvering (non-empty DTL), if so, and resilver has
+ * not started, start it. Otherwise, only restart if max txg in DTL range is
+ * greater than the max txg in the current scan. If the DTL max is less than
+ * the scan max, then the vdev has not missed any new data since the resilver
+ * started, so a restart is not needed.
+ */
+void
+dsl_scan_assess_vdev(dsl_pool_t *dp, vdev_t *vd)
+{
+ uint64_t min, max;
+
+ if (!vdev_resilver_needed(vd, &min, &max))
+ return;
+
+ if (!dsl_scan_resilvering(dp)) {
+ spa_async_request(dp->dp_spa, SPA_ASYNC_RESILVER);
+ return;
+ }
+
+ if (max <= dp->dp_scan->scn_phys.scn_max_txg)
+ return;
+
+ /* restart is needed, check if it can be deferred */
+ if (spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_RESILVER_DEFER))
+ vdev_defer_resilver(vd);
+ else
+ spa_async_request(dp->dp_spa, SPA_ASYNC_RESILVER);
+}
diff --git a/usr/src/uts/common/fs/zfs/spa.c b/usr/src/uts/common/fs/zfs/spa.c
index 547fa1e2bb..fc08eebbc0 100644
--- a/usr/src/uts/common/fs/zfs/spa.c
+++ b/usr/src/uts/common/fs/zfs/spa.c
@@ -27,9 +27,9 @@
* Copyright 2013 Saso Kiselkov. All rights reserved.
* Copyright (c) 2014 Integros [integros.com]
* Copyright 2016 Toomas Soome <tsoome@me.com>
+ * Copyright (c) 2017, 2019, Datto Inc. All rights reserved.
* Copyright 2019 Joyent, Inc.
* Copyright (c) 2017, Intel Corporation.
- * Copyright (c) 2017 Datto Inc.
* Copyright 2018 OmniOS Community Edition (OmniOSce) Association.
* Copyright 2020 Joshua M. Clulow <josh@sysmgr.org>
*/
@@ -6397,9 +6397,9 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing)
*/
if (dsl_scan_resilvering(spa_get_dsl(spa)) &&
spa_feature_is_enabled(spa, SPA_FEATURE_RESILVER_DEFER))
- vdev_set_deferred_resilver(spa, newvd);
+ vdev_defer_resilver(newvd);
else
- dsl_resilver_restart(spa->spa_dsl_pool, dtl_max_txg);
+ dsl_scan_restart_resilver(spa->spa_dsl_pool, dtl_max_txg);
if (spa->spa_bootfs)
spa_event_notify(spa, newvd, NULL, ESC_ZFS_BOOTFS_VDEV_ATTACH);
@@ -7637,7 +7637,7 @@ spa_async_thread(void *arg)
if (tasks & SPA_ASYNC_RESILVER &&
(!dsl_scan_resilvering(dp) ||
!spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_RESILVER_DEFER)))
- dsl_resilver_restart(dp, 0);
+ dsl_scan_restart_resilver(dp, 0);
if (tasks & SPA_ASYNC_INITIALIZE_RESTART) {
mutex_enter(&spa_namespace_lock);
@@ -7753,6 +7753,12 @@ spa_async_request(spa_t *spa, int task)
mutex_exit(&spa->spa_async_lock);
}
+int
+spa_async_tasks(spa_t *spa)
+{
+ return (spa->spa_async_tasks);
+}
+
/*
* ==========================================================================
* SPA syncing routines
diff --git a/usr/src/uts/common/fs/zfs/sys/dsl_scan.h b/usr/src/uts/common/fs/zfs/sys/dsl_scan.h
index 1b600405ae..4693293290 100644
--- a/usr/src/uts/common/fs/zfs/sys/dsl_scan.h
+++ b/usr/src/uts/common/fs/zfs/sys/dsl_scan.h
@@ -21,7 +21,7 @@
/*
* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2017 by Delphix. All rights reserved.
- * Copyright (c) 2017 Datto Inc.
+ * Copyright (c) 2017, 2019, Datto Inc. All rights reserved.
*/
#ifndef _SYS_DSL_SCAN_H
@@ -164,10 +164,12 @@ void dsl_scan_fini(struct dsl_pool *dp);
void dsl_scan_sync(struct dsl_pool *, dmu_tx_t *);
int dsl_scan_cancel(struct dsl_pool *);
int dsl_scan(struct dsl_pool *, pool_scan_func_t);
+void dsl_scan_assess_vdev(struct dsl_pool *dp, vdev_t *vd);
boolean_t dsl_scan_scrubbing(const struct dsl_pool *dp);
int dsl_scrub_set_pause_resume(const struct dsl_pool *dp, pool_scrub_cmd_t cmd);
-void dsl_resilver_restart(struct dsl_pool *, uint64_t txg);
+void dsl_scan_restart_resilver(struct dsl_pool *, uint64_t txg);
boolean_t dsl_scan_resilvering(struct dsl_pool *dp);
+boolean_t dsl_scan_resilver_scheduled(struct dsl_pool *dp);
boolean_t dsl_dataset_unstable(struct dsl_dataset *ds);
void dsl_scan_ddt_entry(dsl_scan_t *scn, enum zio_checksum checksum,
ddt_entry_t *dde, dmu_tx_t *tx);
diff --git a/usr/src/uts/common/fs/zfs/sys/spa.h b/usr/src/uts/common/fs/zfs/sys/spa.h
index 31faac4f77..33cdfbeb4b 100644
--- a/usr/src/uts/common/fs/zfs/sys/spa.h
+++ b/usr/src/uts/common/fs/zfs/sys/spa.h
@@ -26,7 +26,7 @@
* Copyright 2013 Saso Kiselkov. All rights reserved.
* Copyright (c) 2014 Integros [integros.com]
* Copyright 2019 Joyent, Inc.
- * Copyright (c) 2017 Datto Inc.
+ * Copyright (c) 2017, 2019, Datto Inc. All rights reserved.
* Copyright (c) 2017, Intel Corporation.
* Copyright 2020 Joshua M. Clulow <josh@sysmgr.org>
*/
@@ -775,6 +775,7 @@ extern void spa_async_request(spa_t *spa, int flag);
extern void spa_async_unrequest(spa_t *spa, int flag);
extern void spa_async_suspend(spa_t *spa);
extern void spa_async_resume(spa_t *spa);
+extern int spa_async_tasks(spa_t *spa);
extern spa_t *spa_inject_addref(char *pool);
extern void spa_inject_delref(spa_t *spa);
extern void spa_scan_stat_init(spa_t *spa);
diff --git a/usr/src/uts/common/fs/zfs/sys/vdev.h b/usr/src/uts/common/fs/zfs/sys/vdev.h
index a6de7e6f2c..b8c2ee5c9e 100644
--- a/usr/src/uts/common/fs/zfs/sys/vdev.h
+++ b/usr/src/uts/common/fs/zfs/sys/vdev.h
@@ -23,6 +23,7 @@
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011, 2017 by Delphix. All rights reserved.
* Copyright (c) 2017, Intel Corporation.
+ * Copyright (c) 2019, Datto Inc. All rights reserved.
*/
#ifndef _SYS_VDEV_H
@@ -153,6 +154,8 @@ extern void vdev_state_dirty(vdev_t *vd);
extern void vdev_state_clean(vdev_t *vd);
extern void vdev_set_deferred_resilver(spa_t *spa, vdev_t *vd);
+extern void vdev_defer_resilver(vdev_t *vd);
+extern boolean_t vdev_clear_resilver_deferred(vdev_t *vd, dmu_tx_t *tx);
typedef enum vdev_config_flag {
VDEV_CONFIG_SPARE = 1 << 0,
diff --git a/usr/src/uts/common/fs/zfs/sys/zfs_ioctl.h b/usr/src/uts/common/fs/zfs/sys/zfs_ioctl.h
index 9947bedf54..60d4d6805f 100644
--- a/usr/src/uts/common/fs/zfs/sys/zfs_ioctl.h
+++ b/usr/src/uts/common/fs/zfs/sys/zfs_ioctl.h
@@ -24,6 +24,7 @@
* Copyright (c) 2012, 2017 by Delphix. All rights reserved.
* Copyright 2016 RackTop Systems.
* Copyright (c) 2014 Integros [integros.com]
+ * Copyright (c) 2017, Intel Corporation.
*/
#ifndef _SYS_ZFS_IOCTL_H
@@ -389,6 +390,10 @@ typedef struct zinject_record {
#define ZI_NO_DVA (-1)
+/* scaled frequency ranges */
+#define ZI_PERCENTAGE_MIN 4294UL
+#define ZI_PERCENTAGE_MAX UINT32_MAX
+
typedef enum zinject_type {
ZINJECT_UNINITIALIZED,
ZINJECT_DATA_FAULT,
diff --git a/usr/src/uts/common/fs/zfs/vdev.c b/usr/src/uts/common/fs/zfs/vdev.c
index 01e892f4c4..9773ec7960 100644
--- a/usr/src/uts/common/fs/zfs/vdev.c
+++ b/usr/src/uts/common/fs/zfs/vdev.c
@@ -27,6 +27,7 @@
* Copyright 2016 Toomas Soome <tsoome@me.com>
* Copyright 2019 Joyent, Inc.
* Copyright (c) 2017, Intel Corporation.
+ * Copyright (c) 2019, Datto Inc. All rights reserved.
*/
#include <sys/zfs_context.h>
@@ -98,6 +99,12 @@ boolean_t vdev_validate_skip = B_FALSE;
int zfs_vdev_dtl_sm_blksz = (1 << 12);
/*
+ * Ignore errors during scrub/resilver. Allows to work around resilver
+ * upon import when there are pool errors.
+ */
+int zfs_scan_ignore_errors = 0;
+
+/*
* vdev-wide space maps that have lots of entries written to them at
* the end of each transaction can benefit from a higher I/O bandwidth
* (e.g. vdev_obsolete_sm), thus we default their block size to 128K.
@@ -772,7 +779,7 @@ vdev_alloc(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, uint_t id,
&vd->vdev_resilver_txg);
if (nvlist_exists(nv, ZPOOL_CONFIG_RESILVER_DEFER))
- vdev_set_deferred_resilver(spa, vd);
+ vdev_defer_resilver(vd);
/*
* When importing a pool, we want to ignore the persistent fault
@@ -1764,18 +1771,12 @@ vdev_open(vdev_t *vd)
}
/*
- * If a leaf vdev has a DTL, and seems healthy, then kick off a
- * resilver. But don't do this if we are doing a reopen for a scrub,
- * since this would just restart the scrub we are already doing.
+ * If this is a leaf vdev, assess whether a resilver is needed.
+ * But don't do this if we are doing a reopen for a scrub, since
+ * this would just restart the scrub we are already doing.
*/
- if (vd->vdev_ops->vdev_op_leaf && !spa->spa_scrub_reopen &&
- vdev_resilver_needed(vd, NULL, NULL)) {
- if (dsl_scan_resilvering(spa->spa_dsl_pool) &&
- spa_feature_is_enabled(spa, SPA_FEATURE_RESILVER_DEFER))
- vdev_set_deferred_resilver(spa, vd);
- else
- spa_async_request(spa, SPA_ASYNC_RESILVER);
- }
+ if (vd->vdev_ops->vdev_op_leaf && !spa->spa_scrub_reopen)
+ dsl_scan_assess_vdev(spa->spa_dsl_pool, vd);
return (0);
}
@@ -2470,7 +2471,6 @@ vdev_dtl_should_excise(vdev_t *vd)
spa_t *spa = vd->vdev_spa;
dsl_scan_t *scn = spa->spa_dsl_pool->dp_scan;
- ASSERT0(scn->scn_phys.scn_errors);
ASSERT0(vd->vdev_children);
if (vd->vdev_state < VDEV_STATE_DEGRADED)
@@ -2520,10 +2520,29 @@ vdev_dtl_reassess(vdev_t *vd, uint64_t txg, uint64_t scrub_txg, int scrub_done)
if (vd->vdev_ops->vdev_op_leaf) {
dsl_scan_t *scn = spa->spa_dsl_pool->dp_scan;
+ boolean_t wasempty = B_TRUE;
mutex_enter(&vd->vdev_dtl_lock);
/*
+ * If requested, pretend the scan completed cleanly.
+ */
+ if (zfs_scan_ignore_errors && scn)
+ scn->scn_phys.scn_errors = 0;
+
+ if (scrub_txg != 0 &&
+ !range_tree_is_empty(vd->vdev_dtl[DTL_MISSING])) {
+ wasempty = B_FALSE;
+ zfs_dbgmsg("guid:%llu txg:%llu scrub:%llu started:%d "
+ "dtl:%llu/%llu errors:%llu",
+ (u_longlong_t)vd->vdev_guid, (u_longlong_t)txg,
+ (u_longlong_t)scrub_txg, spa->spa_scrub_started,
+ (u_longlong_t)vdev_dtl_min(vd),
+ (u_longlong_t)vdev_dtl_max(vd),
+ (u_longlong_t)(scn ? scn->scn_phys.scn_errors : 0));
+ }
+
+ /*
* If we've completed a scan cleanly then determine
* if this vdev should remove any DTLs. We only want to
* excise regions on vdevs that were available during
@@ -2559,6 +2578,14 @@ vdev_dtl_reassess(vdev_t *vd, uint64_t txg, uint64_t scrub_txg, int scrub_done)
space_reftree_generate_map(&reftree,
vd->vdev_dtl[DTL_MISSING], 1);
space_reftree_destroy(&reftree);
+
+ if (!range_tree_is_empty(vd->vdev_dtl[DTL_MISSING])) {
+ zfs_dbgmsg("update DTL_MISSING:%llu/%llu",
+ (u_longlong_t)vdev_dtl_min(vd),
+ (u_longlong_t)vdev_dtl_max(vd));
+ } else if (!wasempty) {
+ zfs_dbgmsg("DTL_MISSING is now empty");
+ }
}
range_tree_vacate(vd->vdev_dtl[DTL_PARTIAL], NULL, NULL);
range_tree_walk(vd->vdev_dtl[DTL_MISSING],
@@ -3543,14 +3570,11 @@ vdev_clear(spa_t *spa, vdev_t *vd)
if (vd != rvd && vdev_writeable(vd->vdev_top))
vdev_state_dirty(vd->vdev_top);
- if (vd->vdev_aux == NULL && !vdev_is_dead(vd)) {
- if (dsl_scan_resilvering(spa->spa_dsl_pool) &&
- spa_feature_is_enabled(spa,
- SPA_FEATURE_RESILVER_DEFER))
- vdev_set_deferred_resilver(spa, vd);
- else
- spa_async_request(spa, SPA_ASYNC_RESILVER);
- }
+ /* If a resilver isn't required, check if vdevs can be culled */
+ if (vd->vdev_aux == NULL && !vdev_is_dead(vd) &&
+ !dsl_scan_resilvering(spa->spa_dsl_pool) &&
+ !dsl_scan_resilver_scheduled(spa->spa_dsl_pool))
+ spa_async_request(spa, SPA_ASYNC_RESILVER_DONE);
spa_event_notify(spa, vd, NULL, ESC_ZFS_VDEV_CLEAR);
}
@@ -4559,18 +4583,46 @@ vdev_deadman(vdev_t *vd)
}
void
-vdev_set_deferred_resilver(spa_t *spa, vdev_t *vd)
+vdev_defer_resilver(vdev_t *vd)
{
- for (uint64_t i = 0; i < vd->vdev_children; i++)
- vdev_set_deferred_resilver(spa, vd->vdev_child[i]);
+ ASSERT(vd->vdev_ops->vdev_op_leaf);
- if (!vd->vdev_ops->vdev_op_leaf || !vdev_writeable(vd) ||
- range_tree_is_empty(vd->vdev_dtl[DTL_MISSING])) {
- return;
+ vd->vdev_resilver_deferred = B_TRUE;
+ vd->vdev_spa->spa_resilver_deferred = B_TRUE;
+}
+
+/*
+ * Clears the resilver deferred flag on all leaf devs under vd. Returns
+ * B_TRUE if we have devices that need to be resilvered and are available to
+ * accept resilver I/Os.
+ */
+boolean_t
+vdev_clear_resilver_deferred(vdev_t *vd, dmu_tx_t *tx)
+{
+ boolean_t resilver_needed = B_FALSE;
+ spa_t *spa = vd->vdev_spa;
+
+ for (int c = 0; c < vd->vdev_children; c++) {
+ vdev_t *cvd = vd->vdev_child[c];
+ resilver_needed |= vdev_clear_resilver_deferred(cvd, tx);
}
- vd->vdev_resilver_deferred = B_TRUE;
- spa->spa_resilver_deferred = B_TRUE;
+ if (vd == spa->spa_root_vdev &&
+ spa_feature_is_active(spa, SPA_FEATURE_RESILVER_DEFER)) {
+ spa_feature_decr(spa, SPA_FEATURE_RESILVER_DEFER, tx);
+ vdev_config_dirty(vd);
+ spa->spa_resilver_deferred = B_FALSE;
+ return (resilver_needed);
+ }
+
+ if (!vdev_is_concrete(vd) || vd->vdev_aux ||
+ !vd->vdev_ops->vdev_op_leaf)
+ return (resilver_needed);
+
+ vd->vdev_resilver_deferred = B_FALSE;
+
+ return (!vdev_is_dead(vd) && !vd->vdev_offline &&
+ vdev_resilver_needed(vd, NULL, NULL));
}
/*
diff --git a/usr/src/uts/common/fs/zfs/zio_inject.c b/usr/src/uts/common/fs/zfs/zio_inject.c
index a65721d175..e332da9672 100644
--- a/usr/src/uts/common/fs/zfs/zio_inject.c
+++ b/usr/src/uts/common/fs/zfs/zio_inject.c
@@ -21,6 +21,7 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2015 by Delphix. All rights reserved.
+ * Copyright (c) 2017, Intel Corporation.
*/
/*
@@ -100,6 +101,26 @@ static kmutex_t inject_delay_mtx;
static int inject_next_id = 1;
/*
+ * Test if the requested frequency was triggered
+ */
+static boolean_t
+freq_triggered(uint32_t frequency)
+{
+ /*
+ * zero implies always (100%)
+ */
+ if (frequency == 0)
+ return (B_TRUE);
+
+ /*
+ * Note: we still handle legacy (unscaled) frequecy values
+ */
+ uint32_t maximum = (frequency <= 100) ? 100 : ZI_PERCENTAGE_MAX;
+
+ return (spa_get_random(maximum) < frequency);
+}
+
+/*
* Returns true if the given record matches the I/O in progress.
*/
static boolean_t
@@ -114,8 +135,7 @@ zio_match_handler(zbookmark_phys_t *zb, uint64_t type, int dva,
record->zi_object == DMU_META_DNODE_OBJECT) {
if (record->zi_type == DMU_OT_NONE ||
type == record->zi_type)
- return (record->zi_freq == 0 ||
- spa_get_random(100) < record->zi_freq);
+ return (freq_triggered(record->zi_freq));
else
return (B_FALSE);
}
@@ -130,8 +150,7 @@ zio_match_handler(zbookmark_phys_t *zb, uint64_t type, int dva,
zb->zb_blkid <= record->zi_end &&
(record->zi_dvas == 0 || (record->zi_dvas & (1ULL << dva))) &&
error == record->zi_error) {
- return (record->zi_freq == 0 ||
- spa_get_random(100) < record->zi_freq);
+ return (freq_triggered(record->zi_freq));
}
return (B_FALSE);
@@ -360,6 +379,12 @@ zio_handle_device_injection(vdev_t *vd, zio_t *zio, int error)
if (handler->zi_record.zi_error == error) {
/*
+ * limit error injection if requested
+ */
+ if (!freq_triggered(handler->zi_record.zi_freq))
+ continue;
+
+ /*
* For a failed open, pretend like the device
* has gone away.
*/
@@ -527,6 +552,9 @@ zio_handle_io_delay(zio_t *zio)
if (handler->zi_record.zi_cmd != ZINJECT_DELAY_IO)
continue;
+ if (!freq_triggered(handler->zi_record.zi_freq))
+ continue;
+
if (vd->vdev_guid != handler->zi_record.zi_guid)
continue;