summaryrefslogtreecommitdiff
path: root/usr/src
diff options
context:
space:
mode:
authorJohn Poduska <jpoduska@datto.com>2020-05-21 14:01:04 -0500
committerJason King <jason.king@joyent.com>2020-05-22 14:16:38 -0500
commit165c5c6fe7d6c7a95878c8a3aae7da65d1da1d90 (patch)
treea3c526b366e720565eac524a198596796d2d898d /usr/src
parent9c3024a3457d2d1269be18124a1ac69e33000da7 (diff)
downloadillumos-joyent-165c5c6fe7d6c7a95878c8a3aae7da65d1da1d90.tar.gz
12774 Resilver restarts unnecessarily when it encounters errors
Portions contributed by: Tim Chase <tim@chase2k.com> Portions contributed by: Jason King <jason.king@joyent.com> Reviewed by: Brian Behlendorf <behlendorf1@llnl.gov> Reviewed by: Paul Zuchowski <pzuchowski@datto.com> Reviewed by: Toomas Soome <tsoome@me.com>
Diffstat (limited to 'usr/src')
-rw-r--r--usr/src/pkg/manifests/system-test-zfstest.mf2
-rw-r--r--usr/src/test/zfs-tests/runfiles/omnios.run2
-rw-r--r--usr/src/test/zfs-tests/runfiles/openindiana.run2
-rw-r--r--usr/src/test/zfs-tests/runfiles/smartos.run2
-rw-r--r--usr/src/test/zfs-tests/tests/functional/resilver/Makefile3
-rwxr-xr-xusr/src/test/zfs-tests/tests/functional/resilver/resilver_restart_001.ksh2
-rw-r--r--usr/src/test/zfs-tests/tests/functional/resilver/resilver_restart_002.ksh110
-rw-r--r--usr/src/test/zfs-tests/tests/functional/resilver/sysevent.c45
-rw-r--r--usr/src/uts/common/fs/zfs/dsl_scan.c23
-rw-r--r--usr/src/uts/common/fs/zfs/vdev.c34
10 files changed, 204 insertions, 21 deletions
diff --git a/usr/src/pkg/manifests/system-test-zfstest.mf b/usr/src/pkg/manifests/system-test-zfstest.mf
index 233da7a9ad..3a4d20d9bf 100644
--- a/usr/src/pkg/manifests/system-test-zfstest.mf
+++ b/usr/src/pkg/manifests/system-test-zfstest.mf
@@ -2933,6 +2933,8 @@ file path=opt/zfs-tests/tests/functional/resilver/cleanup mode=0555
file path=opt/zfs-tests/tests/functional/resilver/resilver.cfg mode=0444
file path=opt/zfs-tests/tests/functional/resilver/resilver_restart_001 \
mode=0555
+file path=opt/zfs-tests/tests/functional/resilver/resilver_restart_002 \
+ mode=0555
file path=opt/zfs-tests/tests/functional/resilver/setup mode=0555
file path=opt/zfs-tests/tests/functional/resilver/sysevent mode=0555
file path=opt/zfs-tests/tests/functional/rootpool/cleanup mode=0555
diff --git a/usr/src/test/zfs-tests/runfiles/omnios.run b/usr/src/test/zfs-tests/runfiles/omnios.run
index 9a3722aa6a..f287933c2f 100644
--- a/usr/src/test/zfs-tests/runfiles/omnios.run
+++ b/usr/src/test/zfs-tests/runfiles/omnios.run
@@ -205,7 +205,7 @@ tests = ['zfs_rename_001_pos', 'zfs_rename_002_pos', 'zfs_rename_003_pos',
tests = ['zfs_reservation_001_pos', 'zfs_reservation_002_pos']
[/opt/zfs-tests/tests/functional/resilver]
-tests = ['resilver_restart_001']
+tests = ['resilver_restart_001', 'resilver_restart_002']
tags = ['functional', 'resilver']
[/opt/zfs-tests/tests/functional/cli_root/zfs_rollback]
diff --git a/usr/src/test/zfs-tests/runfiles/openindiana.run b/usr/src/test/zfs-tests/runfiles/openindiana.run
index ad0615047c..21d2055a8c 100644
--- a/usr/src/test/zfs-tests/runfiles/openindiana.run
+++ b/usr/src/test/zfs-tests/runfiles/openindiana.run
@@ -205,7 +205,7 @@ tests = ['zfs_rename_001_pos', 'zfs_rename_002_pos', 'zfs_rename_003_pos',
tests = ['zfs_reservation_001_pos', 'zfs_reservation_002_pos']
[/opt/zfs-tests/tests/functional/resilver]
-tests = ['resilver_restart_001']
+tests = ['resilver_restart_001', 'resilver_restart_002']
tags = ['functional', 'resilver']
[/opt/zfs-tests/tests/functional/cli_root/zfs_rollback]
diff --git a/usr/src/test/zfs-tests/runfiles/smartos.run b/usr/src/test/zfs-tests/runfiles/smartos.run
index a9ee33ac4f..92b9b18c57 100644
--- a/usr/src/test/zfs-tests/runfiles/smartos.run
+++ b/usr/src/test/zfs-tests/runfiles/smartos.run
@@ -164,7 +164,7 @@ tests = ['zfs_rename_001_pos', 'zfs_rename_002_pos', 'zfs_rename_003_pos',
tests = ['zfs_reservation_001_pos', 'zfs_reservation_002_pos']
[/opt/zfs-tests/tests/functional/resilver]
-tests = ['resilver_restart_001']
+tests = ['resilver_restart_001', 'resilver_restart_002']
tags = ['functional', 'resilver']
[/opt/zfs-tests/tests/functional/cli_root/zfs_rollback]
diff --git a/usr/src/test/zfs-tests/tests/functional/resilver/Makefile b/usr/src/test/zfs-tests/tests/functional/resilver/Makefile
index 85ee34a135..dd564d19c2 100644
--- a/usr/src/test/zfs-tests/tests/functional/resilver/Makefile
+++ b/usr/src/test/zfs-tests/tests/functional/resilver/Makefile
@@ -19,6 +19,7 @@ PROG = sysevent
SCRIPTS = cleanup \
resilver_restart_001 \
+ resilver_restart_002 \
setup
include $(SRC)/cmd/Makefile.cmd
@@ -40,6 +41,8 @@ $(FILES) := FILEMODE = 0444
CPPFLAGS += -D__EXTENSIONS__
LDLIBS += -lsysevent
+CSTD = $(CSTD_GNU99)
+
all: $(PROG)
$(PROG): $(OBJS)
diff --git a/usr/src/test/zfs-tests/tests/functional/resilver/resilver_restart_001.ksh b/usr/src/test/zfs-tests/tests/functional/resilver/resilver_restart_001.ksh
index 87e0e68cff..6333f4197a 100755
--- a/usr/src/test/zfs-tests/tests/functional/resilver/resilver_restart_001.ksh
+++ b/usr/src/test/zfs-tests/tests/functional/resilver/resilver_restart_001.ksh
@@ -105,7 +105,7 @@ log_onexit cleanup
# Monitor for resilver start events and log them to $EVTFILE as they occur
EVTFILE=$(mktemp /tmp/resilver_events.XXXXXX)
-EVTPID=$($SYSEVENT $EVTFILE)
+EVTPID=$($SYSEVENT -o $EVTFILE ESC_ZFS_resilver_start)
log_must test -n "$EVTPID"
log_must truncate -s $VDEV_FILE_SIZE ${VDEV_FILES[@]} $SPARE_VDEV_FILE
diff --git a/usr/src/test/zfs-tests/tests/functional/resilver/resilver_restart_002.ksh b/usr/src/test/zfs-tests/tests/functional/resilver/resilver_restart_002.ksh
new file mode 100644
index 0000000000..da111da9fd
--- /dev/null
+++ b/usr/src/test/zfs-tests/tests/functional/resilver/resilver_restart_002.ksh
@@ -0,0 +1,110 @@
+#!/bin/ksh -p
+
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2020, Datto Inc. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/resilver/resilver.cfg
+
+SYSEVENT=$STF_SUITE/tests/functional/resilver/sysevent
+
+#
+# DESCRIPTION:
+# Testing resilver completes when scan errors are encountered, but relevant
+# DTL's have not been lost.
+#
+# STRATEGY:
+# 1. Create a pool (1k recordsize)
+# 2. Create a 32m file (32k records)
+# 3. Inject an error halfway through the file
+# 4. Start a resilver, ensure the error is triggered and that the resilver
+# does not restart after finishing
+#
+# NB: use legacy scanning to ensure scan of specific block causes error
+#
+
+function cleanup
+{
+ log_must zinject -c all
+ destroy_pool $TESTPOOL
+ rm -f ${VDEV_FILES[@]} $SPARE_VDEV_FILE
+ log_must set_tunable32 zfs_scan_legacy $ORIG_SCAN_LEGACY
+ [[ -n "$EVTPID" ]] && kill "$EVTPID"
+ [[ -n "$EVTFILE" ]] && rm -f "$EVTFILE"
+}
+
+log_assert "Check for resilver restarts caused by scan errors"
+
+ORIG_SCAN_LEGACY=$(get_tunable zfs_scan_legacy)
+
+log_onexit cleanup
+
+# use legacy scan to ensure injected error will be triggered
+log_must set_tunable32 zfs_scan_legacy 1
+
+ # create the pool and a 32M file (32k blocks)
+log_must truncate -s $VDEV_FILE_SIZE ${VDEV_FILES[0]} $SPARE_VDEV_FILE
+log_must zpool create -f -O recordsize=1k $TESTPOOL ${VDEV_FILES[0]}
+log_must dd if=/dev/urandom of=/$TESTPOOL/file bs=1M count=32 > /dev/null 2>&1
+
+# determine objset/object
+objset=$(zdb -d $TESTPOOL/ | sed -ne 's/.*ID \([0-9]*\).*/\1/p')
+object=$(ls -i /$TESTPOOL/file | awk '{print $1}')
+
+# inject event to cause error during resilver
+log_must zinject -b `printf "%x:%x:0:3fff" $objset $object` $TESTPOOL
+
+
+EVTFILE=$(mktemp /tmp/resilver_events.XXXXXX)
+EVTPID=$($SYSEVENT -o $EVTFILE ESC_ZFS_resilver_start ESC_ZFS_resilver_finish)
+log_must test -n "$EVTPID"
+
+# start resilver
+log_must zpool attach $TESTPOOL ${VDEV_FILES[0]} $SPARE_VDEV_FILE
+
+log_note "waiting for read errors to start showing up"
+for iter in {0..59}
+do
+ zpool sync $TESTPOOL
+ err=$(zpool status $TESTPOOL | grep ${VDEV_FILES[0]} | awk '{print $3}')
+ (( $err > 0 )) && break
+ sleep 1
+done
+
+(( $err == 0 )) && log_fail "Unable to induce errors in resilver"
+
+log_note "waiting for resilver to finish"
+for iter in {0..59}
+do
+ finish=$(grep "ESC_ZFS_resilver_finish" $EVTFILE | wc -l)
+ (( $finish > 0 )) && break
+ sleep 1
+done
+
+(( $finish == 0 )) && log_fail "resilver took too long to finish"
+
+# wait a few syncs to ensure that zfs does not restart the resilver
+log_must zpool sync $TESTPOOL
+log_must zpool sync $TESTPOOL
+
+# check if resilver was restarted
+start=$(grep "ESC_ZFS_resilver_start" $EVTFILE | wc -l)
+(( $start != 1 )) && log_fail "resilver restarted unnecessarily"
+
+log_pass "Resilver did not restart unnecessarily from scan errors"
diff --git a/usr/src/test/zfs-tests/tests/functional/resilver/sysevent.c b/usr/src/test/zfs-tests/tests/functional/resilver/sysevent.c
index 1310c07f90..62a8b2faa2 100644
--- a/usr/src/test/zfs-tests/tests/functional/resilver/sysevent.c
+++ b/usr/src/test/zfs-tests/tests/functional/resilver/sysevent.c
@@ -52,7 +52,6 @@ process_event(sysevent_t *ev)
errx(EXIT_FAILURE, "failed to retrieve sysevent metadata");
VERIFY0(strcmp(class, EC_ZFS));
- VERIFY0(strcmp(subclass, ESC_ZFS_RESILVER_START));
flockfile(out);
(void) fprintf(out, "Received %s.%s event\n", class, subclass);
@@ -77,11 +76,8 @@ child_fatal(int fd, const char *msg, ...)
}
static void
-do_child(int fd)
+do_child(int fd, char * const subclasses[], size_t n)
{
- const char *subclasses[] = {
- ESC_ZFS_RESILVER_START,
- };
sysevent_handle_t *handle;
int ret = 0;
@@ -90,8 +86,8 @@ do_child(int fd)
strerror(errno));
}
- if (sysevent_subscribe_event(handle, EC_ZFS, subclasses,
- ARRAY_SIZE(subclasses)) != 0) {
+ if (sysevent_subscribe_event(handle, EC_ZFS,
+ (const char **)subclasses, n) != 0) {
child_fatal(fd, "failed to subscribe to sysevents: %s",
strerror(errno));
}
@@ -107,20 +103,39 @@ do_child(int fd)
(void) pause();
}
+static void
+usage(const char *name)
+{
+ (void) fprintf(stderr, "Usage: %s [-o outfile] zfs_event...\n", name);
+ exit(2);
+}
+
int
-main(int argc, char **argv)
+main(int argc, char * const argv[])
{
+ const char *outfile = NULL;
pid_t child;
int fds[2];
int ret = 0;
-
- if (argc < 2) {
- (void) fprintf(stderr, "Usage: %s outfile\n", argv[0]);
- exit(EXIT_FAILURE);
+ int c;
+
+ while ((c = getopt(argc, argv, "o:")) != -1) {
+ switch (c) {
+ case 'o':
+ outfile = optarg;
+ break;
+ case '?':
+ (void) fprintf(stderr, "Invalid option -%c\n", optopt);
+ usage(argv[0]);
+ }
}
- if ((out = fopen(argv[1], "w")) == NULL)
- err(EXIT_FAILURE, "unable to open %s", argv[1]);
+ if (outfile != NULL) {
+ if ((out = fopen(optarg, "w")) == NULL)
+ err(EXIT_FAILURE, "unable to open %s", optarg);
+ } else {
+ out = stdout;
+ }
VERIFY0(pipe(fds));
@@ -128,7 +143,7 @@ main(int argc, char **argv)
case -1:
err(EXIT_FAILURE, "unable to fork");
case 0:
- do_child(fds[1]);
+ do_child(fds[1], argv + optind, (size_t)(argc - optind));
break;
default:
break;
diff --git a/usr/src/uts/common/fs/zfs/dsl_scan.c b/usr/src/uts/common/fs/zfs/dsl_scan.c
index 427ed961bb..fa7b9fb2fc 100644
--- a/usr/src/uts/common/fs/zfs/dsl_scan.c
+++ b/usr/src/uts/common/fs/zfs/dsl_scan.c
@@ -549,6 +549,22 @@ dsl_scan_init(dsl_pool_t *dp, uint64_t txg)
zfs_dbgmsg("new-style scrub was modified "
"by old software; restarting in txg %llu",
(longlong_t)scn->scn_restart_txg);
+ } else if (dsl_scan_resilvering(dp)) {
+ /*
+ * If a resilver is in progress and there are already
+ * errors, restart it instead of finishing this scan and
+ * then restarting it. If there haven't been any errors
+ * then remember that the incore DTL is valid.
+ */
+ if (scn->scn_phys.scn_errors > 0) {
+ scn->scn_restart_txg = txg;
+ zfs_dbgmsg("resilver can't excise DTL_MISSING "
+ "when finished; restarting in txg %llu",
+ (u_longlong_t)scn->scn_restart_txg);
+ } else {
+ /* it's safe to excise DTL when finished */
+ spa->spa_scrub_started = B_TRUE;
+ }
}
}
@@ -887,7 +903,6 @@ dsl_scan_done(dsl_scan_t *scn, boolean_t complete, dmu_tx_t *tx)
"errors=%llu", spa_get_errlog_size(spa));
if (DSL_SCAN_IS_SCRUB_RESILVER(scn)) {
- spa->spa_scrub_started = B_FALSE;
spa->spa_scrub_active = B_FALSE;
/*
@@ -915,6 +930,12 @@ dsl_scan_done(dsl_scan_t *scn, boolean_t complete, dmu_tx_t *tx)
spa_errlog_rotate(spa);
/*
+ * Don't clear flag until after vdev_dtl_reassess to ensure that
+ * DTL_MISSING will get updated when possible.
+ */
+ spa->spa_scrub_started = B_FALSE;
+
+ /*
* We may have finished replacing a device.
* Let the async thread assess this and handle the detach.
*/
diff --git a/usr/src/uts/common/fs/zfs/vdev.c b/usr/src/uts/common/fs/zfs/vdev.c
index f824490255..9773ec7960 100644
--- a/usr/src/uts/common/fs/zfs/vdev.c
+++ b/usr/src/uts/common/fs/zfs/vdev.c
@@ -99,6 +99,12 @@ boolean_t vdev_validate_skip = B_FALSE;
int zfs_vdev_dtl_sm_blksz = (1 << 12);
/*
+ * Ignore errors during scrub/resilver. Allows to work around resilver
+ * upon import when there are pool errors.
+ */
+int zfs_scan_ignore_errors = 0;
+
+/*
* vdev-wide space maps that have lots of entries written to them at
* the end of each transaction can benefit from a higher I/O bandwidth
* (e.g. vdev_obsolete_sm), thus we default their block size to 128K.
@@ -2465,7 +2471,6 @@ vdev_dtl_should_excise(vdev_t *vd)
spa_t *spa = vd->vdev_spa;
dsl_scan_t *scn = spa->spa_dsl_pool->dp_scan;
- ASSERT0(scn->scn_phys.scn_errors);
ASSERT0(vd->vdev_children);
if (vd->vdev_state < VDEV_STATE_DEGRADED)
@@ -2515,10 +2520,29 @@ vdev_dtl_reassess(vdev_t *vd, uint64_t txg, uint64_t scrub_txg, int scrub_done)
if (vd->vdev_ops->vdev_op_leaf) {
dsl_scan_t *scn = spa->spa_dsl_pool->dp_scan;
+ boolean_t wasempty = B_TRUE;
mutex_enter(&vd->vdev_dtl_lock);
/*
+ * If requested, pretend the scan completed cleanly.
+ */
+ if (zfs_scan_ignore_errors && scn)
+ scn->scn_phys.scn_errors = 0;
+
+ if (scrub_txg != 0 &&
+ !range_tree_is_empty(vd->vdev_dtl[DTL_MISSING])) {
+ wasempty = B_FALSE;
+ zfs_dbgmsg("guid:%llu txg:%llu scrub:%llu started:%d "
+ "dtl:%llu/%llu errors:%llu",
+ (u_longlong_t)vd->vdev_guid, (u_longlong_t)txg,
+ (u_longlong_t)scrub_txg, spa->spa_scrub_started,
+ (u_longlong_t)vdev_dtl_min(vd),
+ (u_longlong_t)vdev_dtl_max(vd),
+ (u_longlong_t)(scn ? scn->scn_phys.scn_errors : 0));
+ }
+
+ /*
* If we've completed a scan cleanly then determine
* if this vdev should remove any DTLs. We only want to
* excise regions on vdevs that were available during
@@ -2554,6 +2578,14 @@ vdev_dtl_reassess(vdev_t *vd, uint64_t txg, uint64_t scrub_txg, int scrub_done)
space_reftree_generate_map(&reftree,
vd->vdev_dtl[DTL_MISSING], 1);
space_reftree_destroy(&reftree);
+
+ if (!range_tree_is_empty(vd->vdev_dtl[DTL_MISSING])) {
+ zfs_dbgmsg("update DTL_MISSING:%llu/%llu",
+ (u_longlong_t)vdev_dtl_min(vd),
+ (u_longlong_t)vdev_dtl_max(vd));
+ } else if (!wasempty) {
+ zfs_dbgmsg("DTL_MISSING is now empty");
+ }
}
range_tree_vacate(vd->vdev_dtl[DTL_PARTIAL], NULL, NULL);
range_tree_walk(vd->vdev_dtl[DTL_MISSING],