diff options
author | John Poduska <jpoduska@datto.com> | 2020-05-21 14:01:04 -0500 |
---|---|---|
committer | Jason King <jason.king@joyent.com> | 2020-05-22 14:16:38 -0500 |
commit | 165c5c6fe7d6c7a95878c8a3aae7da65d1da1d90 (patch) | |
tree | a3c526b366e720565eac524a198596796d2d898d /usr/src | |
parent | 9c3024a3457d2d1269be18124a1ac69e33000da7 (diff) | |
download | illumos-joyent-165c5c6fe7d6c7a95878c8a3aae7da65d1da1d90.tar.gz |
12774 Resilver restarts unnecessarily when it encounters errors
Portions contributed by: Tim Chase <tim@chase2k.com>
Portions contributed by: Jason King <jason.king@joyent.com>
Reviewed by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed by: Paul Zuchowski <pzuchowski@datto.com>
Reviewed by: Toomas Soome <tsoome@me.com>
Diffstat (limited to 'usr/src')
10 files changed, 204 insertions, 21 deletions
diff --git a/usr/src/pkg/manifests/system-test-zfstest.mf b/usr/src/pkg/manifests/system-test-zfstest.mf index 233da7a9ad..3a4d20d9bf 100644 --- a/usr/src/pkg/manifests/system-test-zfstest.mf +++ b/usr/src/pkg/manifests/system-test-zfstest.mf @@ -2933,6 +2933,8 @@ file path=opt/zfs-tests/tests/functional/resilver/cleanup mode=0555 file path=opt/zfs-tests/tests/functional/resilver/resilver.cfg mode=0444 file path=opt/zfs-tests/tests/functional/resilver/resilver_restart_001 \ mode=0555 +file path=opt/zfs-tests/tests/functional/resilver/resilver_restart_002 \ + mode=0555 file path=opt/zfs-tests/tests/functional/resilver/setup mode=0555 file path=opt/zfs-tests/tests/functional/resilver/sysevent mode=0555 file path=opt/zfs-tests/tests/functional/rootpool/cleanup mode=0555 diff --git a/usr/src/test/zfs-tests/runfiles/omnios.run b/usr/src/test/zfs-tests/runfiles/omnios.run index 9a3722aa6a..f287933c2f 100644 --- a/usr/src/test/zfs-tests/runfiles/omnios.run +++ b/usr/src/test/zfs-tests/runfiles/omnios.run @@ -205,7 +205,7 @@ tests = ['zfs_rename_001_pos', 'zfs_rename_002_pos', 'zfs_rename_003_pos', tests = ['zfs_reservation_001_pos', 'zfs_reservation_002_pos'] [/opt/zfs-tests/tests/functional/resilver] -tests = ['resilver_restart_001'] +tests = ['resilver_restart_001', 'resilver_restart_002'] tags = ['functional', 'resilver'] [/opt/zfs-tests/tests/functional/cli_root/zfs_rollback] diff --git a/usr/src/test/zfs-tests/runfiles/openindiana.run b/usr/src/test/zfs-tests/runfiles/openindiana.run index ad0615047c..21d2055a8c 100644 --- a/usr/src/test/zfs-tests/runfiles/openindiana.run +++ b/usr/src/test/zfs-tests/runfiles/openindiana.run @@ -205,7 +205,7 @@ tests = ['zfs_rename_001_pos', 'zfs_rename_002_pos', 'zfs_rename_003_pos', tests = ['zfs_reservation_001_pos', 'zfs_reservation_002_pos'] [/opt/zfs-tests/tests/functional/resilver] -tests = ['resilver_restart_001'] +tests = ['resilver_restart_001', 'resilver_restart_002'] tags = ['functional', 'resilver'] [/opt/zfs-tests/tests/functional/cli_root/zfs_rollback] diff --git a/usr/src/test/zfs-tests/runfiles/smartos.run b/usr/src/test/zfs-tests/runfiles/smartos.run index a9ee33ac4f..92b9b18c57 100644 --- a/usr/src/test/zfs-tests/runfiles/smartos.run +++ b/usr/src/test/zfs-tests/runfiles/smartos.run @@ -164,7 +164,7 @@ tests = ['zfs_rename_001_pos', 'zfs_rename_002_pos', 'zfs_rename_003_pos', tests = ['zfs_reservation_001_pos', 'zfs_reservation_002_pos'] [/opt/zfs-tests/tests/functional/resilver] -tests = ['resilver_restart_001'] +tests = ['resilver_restart_001', 'resilver_restart_002'] tags = ['functional', 'resilver'] [/opt/zfs-tests/tests/functional/cli_root/zfs_rollback] diff --git a/usr/src/test/zfs-tests/tests/functional/resilver/Makefile b/usr/src/test/zfs-tests/tests/functional/resilver/Makefile index 85ee34a135..dd564d19c2 100644 --- a/usr/src/test/zfs-tests/tests/functional/resilver/Makefile +++ b/usr/src/test/zfs-tests/tests/functional/resilver/Makefile @@ -19,6 +19,7 @@ PROG = sysevent SCRIPTS = cleanup \ resilver_restart_001 \ + resilver_restart_002 \ setup include $(SRC)/cmd/Makefile.cmd @@ -40,6 +41,8 @@ $(FILES) := FILEMODE = 0444 CPPFLAGS += -D__EXTENSIONS__ LDLIBS += -lsysevent +CSTD = $(CSTD_GNU99) + all: $(PROG) $(PROG): $(OBJS) diff --git a/usr/src/test/zfs-tests/tests/functional/resilver/resilver_restart_001.ksh b/usr/src/test/zfs-tests/tests/functional/resilver/resilver_restart_001.ksh index 87e0e68cff..6333f4197a 100755 --- a/usr/src/test/zfs-tests/tests/functional/resilver/resilver_restart_001.ksh +++ b/usr/src/test/zfs-tests/tests/functional/resilver/resilver_restart_001.ksh @@ -105,7 +105,7 @@ log_onexit cleanup # Monitor for resilver start events and log them to $EVTFILE as they occur EVTFILE=$(mktemp /tmp/resilver_events.XXXXXX) -EVTPID=$($SYSEVENT $EVTFILE) +EVTPID=$($SYSEVENT -o $EVTFILE ESC_ZFS_resilver_start) log_must test -n "$EVTPID" log_must truncate -s $VDEV_FILE_SIZE ${VDEV_FILES[@]} $SPARE_VDEV_FILE diff --git a/usr/src/test/zfs-tests/tests/functional/resilver/resilver_restart_002.ksh b/usr/src/test/zfs-tests/tests/functional/resilver/resilver_restart_002.ksh new file mode 100644 index 0000000000..da111da9fd --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/resilver/resilver_restart_002.ksh @@ -0,0 +1,110 @@ +#!/bin/ksh -p + +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2020, Datto Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/resilver/resilver.cfg + +SYSEVENT=$STF_SUITE/tests/functional/resilver/sysevent + +# +# DESCRIPTION: +# Testing resilver completes when scan errors are encountered, but relevant +# DTL's have not been lost. +# +# STRATEGY: +# 1. Create a pool (1k recordsize) +# 2. Create a 32m file (32k records) +# 3. Inject an error halfway through the file +# 4. Start a resilver, ensure the error is triggered and that the resilver +# does not restart after finishing +# +# NB: use legacy scanning to ensure scan of specific block causes error +# + +function cleanup +{ + log_must zinject -c all + destroy_pool $TESTPOOL + rm -f ${VDEV_FILES[@]} $SPARE_VDEV_FILE + log_must set_tunable32 zfs_scan_legacy $ORIG_SCAN_LEGACY + [[ -n "$EVTPID" ]] && kill "$EVTPID" + [[ -n "$EVTFILE" ]] && rm -f "$EVTFILE" +} + +log_assert "Check for resilver restarts caused by scan errors" + +ORIG_SCAN_LEGACY=$(get_tunable zfs_scan_legacy) + +log_onexit cleanup + +# use legacy scan to ensure injected error will be triggered +log_must set_tunable32 zfs_scan_legacy 1 + + # create the pool and a 32M file (32k blocks) +log_must truncate -s $VDEV_FILE_SIZE ${VDEV_FILES[0]} $SPARE_VDEV_FILE +log_must zpool create -f -O recordsize=1k $TESTPOOL ${VDEV_FILES[0]} +log_must dd if=/dev/urandom of=/$TESTPOOL/file bs=1M count=32 > /dev/null 2>&1 + +# determine objset/object +objset=$(zdb -d $TESTPOOL/ | sed -ne 's/.*ID \([0-9]*\).*/\1/p') +object=$(ls -i /$TESTPOOL/file | awk '{print $1}') + +# inject event to cause error during resilver +log_must zinject -b `printf "%x:%x:0:3fff" $objset $object` $TESTPOOL + + +EVTFILE=$(mktemp /tmp/resilver_events.XXXXXX) +EVTPID=$($SYSEVENT -o $EVTFILE ESC_ZFS_resilver_start ESC_ZFS_resilver_finish) +log_must test -n "$EVTPID" + +# start resilver +log_must zpool attach $TESTPOOL ${VDEV_FILES[0]} $SPARE_VDEV_FILE + +log_note "waiting for read errors to start showing up" +for iter in {0..59} +do + zpool sync $TESTPOOL + err=$(zpool status $TESTPOOL | grep ${VDEV_FILES[0]} | awk '{print $3}') + (( $err > 0 )) && break + sleep 1 +done + +(( $err == 0 )) && log_fail "Unable to induce errors in resilver" + +log_note "waiting for resilver to finish" +for iter in {0..59} +do + finish=$(grep "ESC_ZFS_resilver_finish" $EVTFILE | wc -l) + (( $finish > 0 )) && break + sleep 1 +done + +(( $finish == 0 )) && log_fail "resilver took too long to finish" + +# wait a few syncs to ensure that zfs does not restart the resilver +log_must zpool sync $TESTPOOL +log_must zpool sync $TESTPOOL + +# check if resilver was restarted +start=$(grep "ESC_ZFS_resilver_start" $EVTFILE | wc -l) +(( $start != 1 )) && log_fail "resilver restarted unnecessarily" + +log_pass "Resilver did not restart unnecessarily from scan errors" diff --git a/usr/src/test/zfs-tests/tests/functional/resilver/sysevent.c b/usr/src/test/zfs-tests/tests/functional/resilver/sysevent.c index 1310c07f90..62a8b2faa2 100644 --- a/usr/src/test/zfs-tests/tests/functional/resilver/sysevent.c +++ b/usr/src/test/zfs-tests/tests/functional/resilver/sysevent.c @@ -52,7 +52,6 @@ process_event(sysevent_t *ev) errx(EXIT_FAILURE, "failed to retrieve sysevent metadata"); VERIFY0(strcmp(class, EC_ZFS)); - VERIFY0(strcmp(subclass, ESC_ZFS_RESILVER_START)); flockfile(out); (void) fprintf(out, "Received %s.%s event\n", class, subclass); @@ -77,11 +76,8 @@ child_fatal(int fd, const char *msg, ...) } static void -do_child(int fd) +do_child(int fd, char * const subclasses[], size_t n) { - const char *subclasses[] = { - ESC_ZFS_RESILVER_START, - }; sysevent_handle_t *handle; int ret = 0; @@ -90,8 +86,8 @@ do_child(int fd) strerror(errno)); } - if (sysevent_subscribe_event(handle, EC_ZFS, subclasses, - ARRAY_SIZE(subclasses)) != 0) { + if (sysevent_subscribe_event(handle, EC_ZFS, + (const char **)subclasses, n) != 0) { child_fatal(fd, "failed to subscribe to sysevents: %s", strerror(errno)); } @@ -107,20 +103,39 @@ do_child(int fd) (void) pause(); } +static void +usage(const char *name) +{ + (void) fprintf(stderr, "Usage: %s [-o outfile] zfs_event...\n", name); + exit(2); +} + int -main(int argc, char **argv) +main(int argc, char * const argv[]) { + const char *outfile = NULL; pid_t child; int fds[2]; int ret = 0; - - if (argc < 2) { - (void) fprintf(stderr, "Usage: %s outfile\n", argv[0]); - exit(EXIT_FAILURE); + int c; + + while ((c = getopt(argc, argv, "o:")) != -1) { + switch (c) { + case 'o': + outfile = optarg; + break; + case '?': + (void) fprintf(stderr, "Invalid option -%c\n", optopt); + usage(argv[0]); + } } - if ((out = fopen(argv[1], "w")) == NULL) - err(EXIT_FAILURE, "unable to open %s", argv[1]); + if (outfile != NULL) { + if ((out = fopen(optarg, "w")) == NULL) + err(EXIT_FAILURE, "unable to open %s", optarg); + } else { + out = stdout; + } VERIFY0(pipe(fds)); @@ -128,7 +143,7 @@ main(int argc, char **argv) case -1: err(EXIT_FAILURE, "unable to fork"); case 0: - do_child(fds[1]); + do_child(fds[1], argv + optind, (size_t)(argc - optind)); break; default: break; diff --git a/usr/src/uts/common/fs/zfs/dsl_scan.c b/usr/src/uts/common/fs/zfs/dsl_scan.c index 427ed961bb..fa7b9fb2fc 100644 --- a/usr/src/uts/common/fs/zfs/dsl_scan.c +++ b/usr/src/uts/common/fs/zfs/dsl_scan.c @@ -549,6 +549,22 @@ dsl_scan_init(dsl_pool_t *dp, uint64_t txg) zfs_dbgmsg("new-style scrub was modified " "by old software; restarting in txg %llu", (longlong_t)scn->scn_restart_txg); + } else if (dsl_scan_resilvering(dp)) { + /* + * If a resilver is in progress and there are already + * errors, restart it instead of finishing this scan and + * then restarting it. If there haven't been any errors + * then remember that the incore DTL is valid. + */ + if (scn->scn_phys.scn_errors > 0) { + scn->scn_restart_txg = txg; + zfs_dbgmsg("resilver can't excise DTL_MISSING " + "when finished; restarting in txg %llu", + (u_longlong_t)scn->scn_restart_txg); + } else { + /* it's safe to excise DTL when finished */ + spa->spa_scrub_started = B_TRUE; + } } } @@ -887,7 +903,6 @@ dsl_scan_done(dsl_scan_t *scn, boolean_t complete, dmu_tx_t *tx) "errors=%llu", spa_get_errlog_size(spa)); if (DSL_SCAN_IS_SCRUB_RESILVER(scn)) { - spa->spa_scrub_started = B_FALSE; spa->spa_scrub_active = B_FALSE; /* @@ -915,6 +930,12 @@ dsl_scan_done(dsl_scan_t *scn, boolean_t complete, dmu_tx_t *tx) spa_errlog_rotate(spa); /* + * Don't clear flag until after vdev_dtl_reassess to ensure that + * DTL_MISSING will get updated when possible. + */ + spa->spa_scrub_started = B_FALSE; + + /* * We may have finished replacing a device. * Let the async thread assess this and handle the detach. */ diff --git a/usr/src/uts/common/fs/zfs/vdev.c b/usr/src/uts/common/fs/zfs/vdev.c index f824490255..9773ec7960 100644 --- a/usr/src/uts/common/fs/zfs/vdev.c +++ b/usr/src/uts/common/fs/zfs/vdev.c @@ -99,6 +99,12 @@ boolean_t vdev_validate_skip = B_FALSE; int zfs_vdev_dtl_sm_blksz = (1 << 12); /* + * Ignore errors during scrub/resilver. Allows to work around resilver + * upon import when there are pool errors. + */ +int zfs_scan_ignore_errors = 0; + +/* * vdev-wide space maps that have lots of entries written to them at * the end of each transaction can benefit from a higher I/O bandwidth * (e.g. vdev_obsolete_sm), thus we default their block size to 128K. @@ -2465,7 +2471,6 @@ vdev_dtl_should_excise(vdev_t *vd) spa_t *spa = vd->vdev_spa; dsl_scan_t *scn = spa->spa_dsl_pool->dp_scan; - ASSERT0(scn->scn_phys.scn_errors); ASSERT0(vd->vdev_children); if (vd->vdev_state < VDEV_STATE_DEGRADED) @@ -2515,10 +2520,29 @@ vdev_dtl_reassess(vdev_t *vd, uint64_t txg, uint64_t scrub_txg, int scrub_done) if (vd->vdev_ops->vdev_op_leaf) { dsl_scan_t *scn = spa->spa_dsl_pool->dp_scan; + boolean_t wasempty = B_TRUE; mutex_enter(&vd->vdev_dtl_lock); /* + * If requested, pretend the scan completed cleanly. + */ + if (zfs_scan_ignore_errors && scn) + scn->scn_phys.scn_errors = 0; + + if (scrub_txg != 0 && + !range_tree_is_empty(vd->vdev_dtl[DTL_MISSING])) { + wasempty = B_FALSE; + zfs_dbgmsg("guid:%llu txg:%llu scrub:%llu started:%d " + "dtl:%llu/%llu errors:%llu", + (u_longlong_t)vd->vdev_guid, (u_longlong_t)txg, + (u_longlong_t)scrub_txg, spa->spa_scrub_started, + (u_longlong_t)vdev_dtl_min(vd), + (u_longlong_t)vdev_dtl_max(vd), + (u_longlong_t)(scn ? scn->scn_phys.scn_errors : 0)); + } + + /* * If we've completed a scan cleanly then determine * if this vdev should remove any DTLs. We only want to * excise regions on vdevs that were available during @@ -2554,6 +2578,14 @@ vdev_dtl_reassess(vdev_t *vd, uint64_t txg, uint64_t scrub_txg, int scrub_done) space_reftree_generate_map(&reftree, vd->vdev_dtl[DTL_MISSING], 1); space_reftree_destroy(&reftree); + + if (!range_tree_is_empty(vd->vdev_dtl[DTL_MISSING])) { + zfs_dbgmsg("update DTL_MISSING:%llu/%llu", + (u_longlong_t)vdev_dtl_min(vd), + (u_longlong_t)vdev_dtl_max(vd)); + } else if (!wasempty) { + zfs_dbgmsg("DTL_MISSING is now empty"); + } } range_tree_vacate(vd->vdev_dtl[DTL_PARTIAL], NULL, NULL); range_tree_walk(vd->vdev_dtl[DTL_MISSING], |