From 0c06d385ea5bbe11d20ecea2e02cdc78733d5359 Mon Sep 17 00:00:00 2001 From: jwpoduska Date: Fri, 24 Apr 2020 12:36:43 -0500 Subject: 12636 Prevent unnecessary resilver restarts Reviewed by: Brian Behlendorf Reviewed by: John Gallagher Reviewed by: Kjeld Schouten Reviewed by: John Kennedy Reviewed by: Jerry Jelinek Portions contributed by: Alexander Motin Portions contributed by: Jason King Approved by: Robert Mustacchi --- usr/src/pkg/manifests/system-test-zfstest.mf | 7 + usr/src/test/zfs-tests/runfiles/delphix.run | 4 + usr/src/test/zfs-tests/runfiles/omnios.run | 4 + usr/src/test/zfs-tests/runfiles/openindiana.run | 4 + usr/src/test/zfs-tests/runfiles/smartos.run | 4 + .../zfs-tests/tests/functional/resilver/Makefile | 71 ++++++++ .../tests/functional/resilver/cleanup.ksh | 31 ++++ .../tests/functional/resilver/resilver.cfg | 32 ++++ .../functional/resilver/resilver_restart_001.ksh | 196 +++++++++++++++++++++ .../zfs-tests/tests/functional/resilver/setup.ksh | 31 ++++ .../zfs-tests/tests/functional/resilver/sysevent.c | 148 ++++++++++++++++ usr/src/uts/common/fs/zfs/dsl_scan.c | 108 ++++++------ usr/src/uts/common/fs/zfs/spa.c | 14 +- usr/src/uts/common/fs/zfs/sys/dsl_scan.h | 6 +- usr/src/uts/common/fs/zfs/sys/spa.h | 3 +- usr/src/uts/common/fs/zfs/sys/vdev.h | 3 + usr/src/uts/common/fs/zfs/vdev.c | 76 +++++--- 17 files changed, 654 insertions(+), 88 deletions(-) create mode 100644 usr/src/test/zfs-tests/tests/functional/resilver/Makefile create mode 100755 usr/src/test/zfs-tests/tests/functional/resilver/cleanup.ksh create mode 100644 usr/src/test/zfs-tests/tests/functional/resilver/resilver.cfg create mode 100755 usr/src/test/zfs-tests/tests/functional/resilver/resilver_restart_001.ksh create mode 100755 usr/src/test/zfs-tests/tests/functional/resilver/setup.ksh create mode 100644 usr/src/test/zfs-tests/tests/functional/resilver/sysevent.c diff --git a/usr/src/pkg/manifests/system-test-zfstest.mf b/usr/src/pkg/manifests/system-test-zfstest.mf index 09d431d538..233da7a9ad 100644 --- a/usr/src/pkg/manifests/system-test-zfstest.mf +++ b/usr/src/pkg/manifests/system-test-zfstest.mf @@ -150,6 +150,7 @@ dir path=opt/zfs-tests/tests/functional/removal dir path=opt/zfs-tests/tests/functional/rename_dirs dir path=opt/zfs-tests/tests/functional/replacement dir path=opt/zfs-tests/tests/functional/reservation +dir path=opt/zfs-tests/tests/functional/resilver dir path=opt/zfs-tests/tests/functional/rootpool dir path=opt/zfs-tests/tests/functional/rsend dir path=opt/zfs-tests/tests/functional/scrub_mirror @@ -2928,6 +2929,12 @@ file path=opt/zfs-tests/tests/functional/reservation/reservation_021_neg \ file path=opt/zfs-tests/tests/functional/reservation/reservation_022_pos \ mode=0555 file path=opt/zfs-tests/tests/functional/reservation/setup mode=0555 +file path=opt/zfs-tests/tests/functional/resilver/cleanup mode=0555 +file path=opt/zfs-tests/tests/functional/resilver/resilver.cfg mode=0444 +file path=opt/zfs-tests/tests/functional/resilver/resilver_restart_001 \ + mode=0555 +file path=opt/zfs-tests/tests/functional/resilver/setup mode=0555 +file path=opt/zfs-tests/tests/functional/resilver/sysevent mode=0555 file path=opt/zfs-tests/tests/functional/rootpool/cleanup mode=0555 file path=opt/zfs-tests/tests/functional/rootpool/rootpool_002_neg mode=0555 file path=opt/zfs-tests/tests/functional/rootpool/rootpool_003_neg mode=0555 diff --git a/usr/src/test/zfs-tests/runfiles/delphix.run b/usr/src/test/zfs-tests/runfiles/delphix.run index ef1c80efcc..8acd2710bf 100644 --- a/usr/src/test/zfs-tests/runfiles/delphix.run +++ b/usr/src/test/zfs-tests/runfiles/delphix.run @@ -370,6 +370,10 @@ tests = ['zpool_replace_001_neg', 'zpool_replace_002_neg', 'replace-o_ashift', tests = ['zpool_resilver_bad_args', 'zpool_resilver_restart'] tags = ['functional', 'cli_root', 'zpool_resilver'] +[/opt/zfs-tests/tests/functional/resilver] +tests = ['resilver_restart_001'] +tags = ['functional', 'resilver'] + [/opt/zfs-tests/tests/functional/cli_root/zpool_scrub] tests = ['zpool_scrub_001_neg', 'zpool_scrub_002_pos', 'zpool_scrub_003_pos', 'zpool_scrub_004_pos', 'zpool_scrub_005_pos', 'zpool_scrub_print_repairing', diff --git a/usr/src/test/zfs-tests/runfiles/omnios.run b/usr/src/test/zfs-tests/runfiles/omnios.run index dd8f0738de..9a3722aa6a 100644 --- a/usr/src/test/zfs-tests/runfiles/omnios.run +++ b/usr/src/test/zfs-tests/runfiles/omnios.run @@ -204,6 +204,10 @@ tests = ['zfs_rename_001_pos', 'zfs_rename_002_pos', 'zfs_rename_003_pos', [/opt/zfs-tests/tests/functional/cli_root/zfs_reservation] tests = ['zfs_reservation_001_pos', 'zfs_reservation_002_pos'] +[/opt/zfs-tests/tests/functional/resilver] +tests = ['resilver_restart_001'] +tags = ['functional', 'resilver'] + [/opt/zfs-tests/tests/functional/cli_root/zfs_rollback] tests = ['zfs_rollback_001_pos', 'zfs_rollback_002_pos', 'zfs_rollback_003_neg', 'zfs_rollback_004_neg'] diff --git a/usr/src/test/zfs-tests/runfiles/openindiana.run b/usr/src/test/zfs-tests/runfiles/openindiana.run index 031bd8bf0f..ad0615047c 100644 --- a/usr/src/test/zfs-tests/runfiles/openindiana.run +++ b/usr/src/test/zfs-tests/runfiles/openindiana.run @@ -204,6 +204,10 @@ tests = ['zfs_rename_001_pos', 'zfs_rename_002_pos', 'zfs_rename_003_pos', [/opt/zfs-tests/tests/functional/cli_root/zfs_reservation] tests = ['zfs_reservation_001_pos', 'zfs_reservation_002_pos'] +[/opt/zfs-tests/tests/functional/resilver] +tests = ['resilver_restart_001'] +tags = ['functional', 'resilver'] + [/opt/zfs-tests/tests/functional/cli_root/zfs_rollback] tests = ['zfs_rollback_001_pos', 'zfs_rollback_002_pos', 'zfs_rollback_003_neg', 'zfs_rollback_004_neg'] diff --git a/usr/src/test/zfs-tests/runfiles/smartos.run b/usr/src/test/zfs-tests/runfiles/smartos.run index 1a3f3b7bae..a9ee33ac4f 100644 --- a/usr/src/test/zfs-tests/runfiles/smartos.run +++ b/usr/src/test/zfs-tests/runfiles/smartos.run @@ -163,6 +163,10 @@ tests = ['zfs_rename_001_pos', 'zfs_rename_002_pos', 'zfs_rename_003_pos', [/opt/zfs-tests/tests/functional/cli_root/zfs_reservation] tests = ['zfs_reservation_001_pos', 'zfs_reservation_002_pos'] +[/opt/zfs-tests/tests/functional/resilver] +tests = ['resilver_restart_001'] +tags = ['functional', 'resilver'] + [/opt/zfs-tests/tests/functional/cli_root/zfs_rollback] tests = ['zfs_rollback_001_pos', 'zfs_rollback_002_pos', 'zfs_rollback_003_neg', 'zfs_rollback_004_neg'] diff --git a/usr/src/test/zfs-tests/tests/functional/resilver/Makefile b/usr/src/test/zfs-tests/tests/functional/resilver/Makefile new file mode 100644 index 0000000000..85ee34a135 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/resilver/Makefile @@ -0,0 +1,71 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2020 Joyent, Inc. +# + +include $(SRC)/Makefile.master + +PROG = sysevent + +SCRIPTS = cleanup \ + resilver_restart_001 \ + setup + +include $(SRC)/cmd/Makefile.cmd +include $(SRC)/test/Makefile.com + +ROOTOPTPKG = $(ROOT)/opt/zfs-tests +TARGETDIR = $(ROOTOPTPKG)/tests/functional/resilver + +OBJS = $(PROG:%=%.o) +SRCS = $(OBJS:%.o=%.c) +SRCFILES = resilver.cfg + +CMDS = $(PROG:%=$(TARGETDIR)/%) $(SCRIPTS:%=$(TARGETDIR)/%) +$(CMDS) := FILEMODE = 0555 + +FILES = $(SRCFILES:%=$(TARGETDIR)/%) +$(FILES) := FILEMODE = 0444 + +CPPFLAGS += -D__EXTENSIONS__ +LDLIBS += -lsysevent + +all: $(PROG) + +$(PROG): $(OBJS) + $(LINK.c) $(OBJS) -o $@ $(LDFLAGS) $(LDLIBS) + $(POST_PROCESS) + +%.o: %.c + $(COMPILE.c) $< + +install: all $(CMDS) $(FILES) + +clobber: clean + -$(RM) $(PROG) + +clean: + -$(RM) $(OBJS) + +$(CMDS): $(TARGETDIR) $(PROG) + +$(FILES): $(SRCFILES) + +$(TARGETDIR): + $(INS.dir) + +$(TARGETDIR)/%: % + $(INS.file) + +$(TARGETDIR)/%: %.ksh + $(INS.rename) diff --git a/usr/src/test/zfs-tests/tests/functional/resilver/cleanup.ksh b/usr/src/test/zfs-tests/tests/functional/resilver/cleanup.ksh new file mode 100755 index 0000000000..4dfa814245 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/resilver/cleanup.ksh @@ -0,0 +1,31 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END + +# +# Copyright (c) 2019, Datto Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/resilver/resilver.cfg + +verify_runnable "global" + +log_pass diff --git a/usr/src/test/zfs-tests/tests/functional/resilver/resilver.cfg b/usr/src/test/zfs-tests/tests/functional/resilver/resilver.cfg new file mode 100644 index 0000000000..88dfd24aed --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/resilver/resilver.cfg @@ -0,0 +1,32 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END + +# +# Copyright (c) 2019, Datto Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib + +verify_runnable "global" + +set -A VDEV_FILES $TEST_BASE_DIR/file-{1..4} +SPARE_VDEV_FILE=$TEST_BASE_DIR/spare-1 + +VDEV_FILE_SIZE=$(( $SPA_MINDEVSIZE * 2 )) diff --git a/usr/src/test/zfs-tests/tests/functional/resilver/resilver_restart_001.ksh b/usr/src/test/zfs-tests/tests/functional/resilver/resilver_restart_001.ksh new file mode 100755 index 0000000000..87e0e68cff --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/resilver/resilver_restart_001.ksh @@ -0,0 +1,196 @@ +#!/bin/ksh -p + +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2019, Datto Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/resilver/resilver.cfg + +SYSEVENT=$STF_SUITE/tests/functional/resilver/sysevent + +# +# DESCRIPTION: +# Testing resilver restart logic both with and without the deferred resilver +# feature enabled, verifying that resilver is not restarted when it is +# unecessary. +# +# STRATEGY: +# 1. Create a pool +# 2. Create four filesystems with the primary cache disable to force reads +# 3. Write four files simultaneously, one to each filesystem +# 4. Do with and without deferred resilvers enabled +# a. Replace a vdev with a spare & suspend resilver immediately +# b. Verify resilver starts properly +# c. Offline / online another vdev to introduce a new DTL range +# d. Verify resilver restart restart or defer +# e. Inject read errors on vdev that was offlined / onlned +# f. Verify that resilver did not restart +# g. Unsuspend resilver and wait for it to finish +# h. Verify that there are two resilvers and nothing is deferred +# + +function cleanup +{ + log_must set_tunable32 zfs_resilver_min_time_ms $ORIG_RESILVER_MIN_TIME + log_must set_tunable32 zfs_scan_suspend_progress \ + $ORIG_SCAN_SUSPEND_PROGRESS + log_must zinject -c all + destroy_pool $TESTPOOL + rm -f ${VDEV_FILES[@]} $SPARE_VDEV_FILE + [[ -n "$EVTFILE" ]] && rm -f "$EVTFILE" + [[ -n "$EVTPID" ]] && kill "$EVTPID" +} + +# count resilver events in zpool and number of deferred rsilvers on vdevs +function verify_restarts # +{ + msg=$1 + cnt=$2 + defer=$3 + + # check the number of resilver start in events log + RESILVERS=$(wc -l $EVTFILE | awk '{ print $1 }') + log_note "expected $cnt resilver start(s)$msg, found $RESILVERS" + [[ "$RESILVERS" -ne "$cnt" ]] && + log_fail "expected $cnt resilver start(s)$msg, found $RESILVERS" + + [[ -z "$defer" ]] && return + + # use zdb to find which vdevs have the resilver defer flag + VDEV_DEFERS=$(zdb -C $TESTPOOL | awk ' + /children/ { gsub(/[^0-9]/, ""); child = $0 } + /com\.datto:resilver_defer$/ { print child } + ') + + if [[ "$defer" == "-" ]] + then + [[ -n $VDEV_DEFERS ]] && + log_fail "didn't expect any vdevs to have resilver deferred" + return + fi + + [[ $VDEV_DEFERS -eq $defer ]] || + log_fail "resilver deferred set on unexpected vdev: $VDEV_DEFERS" +} + +log_assert "Check for unnecessary resilver restarts" + +ORIG_RESILVER_MIN_TIME=$(get_tunable zfs_resilver_min_time_ms) +ORIG_SCAN_SUSPEND_PROGRESS=$(get_tunable zfs_scan_suspend_progress) + +set -A RESTARTS -- '1' '2' '2' '2' +set -A VDEVS -- '' '' '' '' +set -A DEFER_RESTARTS -- '1' '1' '1' '2' +set -A DEFER_VDEVS -- '-' '2' '2' '-' + +VDEV_REPLACE="${VDEV_FILES[1]} $SPARE_VDEV_FILE" + +log_onexit cleanup + +# Monitor for resilver start events and log them to $EVTFILE as they occur +EVTFILE=$(mktemp /tmp/resilver_events.XXXXXX) +EVTPID=$($SYSEVENT $EVTFILE) +log_must test -n "$EVTPID" + +log_must truncate -s $VDEV_FILE_SIZE ${VDEV_FILES[@]} $SPARE_VDEV_FILE + +log_must zpool create -f -o feature@resilver_defer=disabled $TESTPOOL \ + raidz ${VDEV_FILES[@]} + +# create 4 filesystems +for fs in fs{0..3} +do + log_must zfs create -o primarycache=none -o recordsize=1k $TESTPOOL/$fs +done + +# simultaneously write 16M to each of them +set -A DATAPATHS /$TESTPOOL/fs{0..3}/dat.0 +log_note "Writing data files" +for path in ${DATAPATHS[@]} +do + dd if=/dev/urandom of=$path bs=1M count=16 > /dev/null 2>&1 & +done +wait + +# test without and with deferred resilve feature enabled +for test in "without" "with" +do + log_note "Testing $test deferred resilvers" + + if [[ $test == "with" ]] + then + log_must zpool set feature@resilver_defer=enabled $TESTPOOL + RESTARTS=( "${DEFER_RESTARTS[@]}" ) + VDEVS=( "${DEFER_VDEVS[@]}" ) + VDEV_REPLACE="$SPARE_VDEV_FILE ${VDEV_FILES[1]}" + fi + + # clear the events + cp /dev/null $EVTFILE + + # limit scanning time + log_must set_tunable32 zfs_resilver_min_time_ms 50 + + # initiate a resilver and suspend the scan as soon as possible + log_must zpool replace $TESTPOOL $VDEV_REPLACE + log_must set_tunable32 zfs_scan_suspend_progress 1 + + # there should only be 1 resilver start + verify_restarts '' "${RESTARTS[0]}" "${VDEVS[0]}" + + # offline then online a vdev to introduce a new DTL range after current + # scan, which should restart (or defer) the resilver + log_must zpool offline $TESTPOOL ${VDEV_FILES[2]} + log_must zpool sync $TESTPOOL + log_must zpool online $TESTPOOL ${VDEV_FILES[2]} + log_must zpool sync $TESTPOOL + + # there should now be 2 resilver starts w/o defer, 1 with defer + verify_restarts ' after offline/online' "${RESTARTS[1]}" "${VDEVS[1]}" + + # inject read io errors on vdev and verify resilver does not restart + log_must zinject -a -d ${VDEV_FILES[2]} -e io -T read -f 0.25 $TESTPOOL + log_must cat ${DATAPATHS[1]} > /dev/null + log_must zinject -c all + + # there should still be 2 resilver starts w/o defer, 1 with defer + verify_restarts ' after zinject' "${RESTARTS[2]}" "${VDEVS[2]}" + + # unsuspend resilver + log_must set_tunable32 zfs_scan_suspend_progress 0 + log_must set_tunable32 zfs_resilver_min_time_ms 3000 + + # wait for resilver to finish + for iter in {0..59} + do + is_pool_resilvered $TESTPOOL && break + sleep 1 + done + is_pool_resilvered $TESTPOOL || + log_fail "resilver timed out" + + # wait for a few txg's to see if a resilver happens + log_must zpool sync $TESTPOOL + log_must zpool sync $TESTPOOL + + # there should now be 2 resilver starts + verify_restarts ' after resilver' "${RESTARTS[3]}" "${VDEVS[3]}" +done + +log_pass "Resilver did not restart unnecessarily" diff --git a/usr/src/test/zfs-tests/tests/functional/resilver/setup.ksh b/usr/src/test/zfs-tests/tests/functional/resilver/setup.ksh new file mode 100755 index 0000000000..4dfa814245 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/resilver/setup.ksh @@ -0,0 +1,31 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END + +# +# Copyright (c) 2019, Datto Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/resilver/resilver.cfg + +verify_runnable "global" + +log_pass diff --git a/usr/src/test/zfs-tests/tests/functional/resilver/sysevent.c b/usr/src/test/zfs-tests/tests/functional/resilver/sysevent.c new file mode 100644 index 0000000000..1310c07f90 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/resilver/sysevent.c @@ -0,0 +1,148 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at http://smartos.org/CDDL + * + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file. + * + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + * + * Copyright 2020 Joyent, Inc. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +FILE *out; + +static void +process_event(sysevent_t *ev) +{ + char *class = NULL; + char *subclass = NULL; + + /* get sysevent metadata and add to the nvlist */ + class = sysevent_get_class_name(ev); + subclass = sysevent_get_subclass_name(ev); + + if (class == NULL || subclass == NULL) + errx(EXIT_FAILURE, "failed to retrieve sysevent metadata"); + + VERIFY0(strcmp(class, EC_ZFS)); + VERIFY0(strcmp(subclass, ESC_ZFS_RESILVER_START)); + + flockfile(out); + (void) fprintf(out, "Received %s.%s event\n", class, subclass); + (void) fflush(out); + funlockfile(out); +} + +static void +child_fatal(int fd, const char *msg, ...) +{ + va_list ap; + int fail = EXIT_FAILURE; + + va_start(ap, msg); + (void) vfprintf(stderr, msg, ap); + va_end(ap); + (void) fputc('\n', stderr); + + (void) write(fd, &fail, sizeof (fail)); + (void) close(fd); + exit(EXIT_FAILURE); +} + +static void +do_child(int fd) +{ + const char *subclasses[] = { + ESC_ZFS_RESILVER_START, + }; + sysevent_handle_t *handle; + int ret = 0; + + if ((handle = sysevent_bind_handle(process_event)) == NULL) { + child_fatal(fd, "sysevent_bind_handle() failed: %s", + strerror(errno)); + } + + if (sysevent_subscribe_event(handle, EC_ZFS, subclasses, + ARRAY_SIZE(subclasses)) != 0) { + child_fatal(fd, "failed to subscribe to sysevents: %s", + strerror(errno)); + } + + (void) write(fd, &ret, sizeof (ret)); + (void) close(fd); + + /* leave stderr open so any errors get captured by test harness */ + (void) fclose(stdin); + (void) fclose(stdout); + + for (;;) + (void) pause(); +} + +int +main(int argc, char **argv) +{ + pid_t child; + int fds[2]; + int ret = 0; + + if (argc < 2) { + (void) fprintf(stderr, "Usage: %s outfile\n", argv[0]); + exit(EXIT_FAILURE); + } + + if ((out = fopen(argv[1], "w")) == NULL) + err(EXIT_FAILURE, "unable to open %s", argv[1]); + + VERIFY0(pipe(fds)); + + switch (child = fork()) { + case -1: + err(EXIT_FAILURE, "unable to fork"); + case 0: + do_child(fds[1]); + break; + default: + break; + } + + (void) close(fds[1]); + + if (read(fds[0], &ret, sizeof (ret)) < 0) + err(EXIT_FAILURE, "failure waiting on child"); + + if (ret != 0) + return (ret); + + (void) close(fds[0]); + (void) printf("%d\n", child); + return (0); +} diff --git a/usr/src/uts/common/fs/zfs/dsl_scan.c b/usr/src/uts/common/fs/zfs/dsl_scan.c index b619719ba9..427ed961bb 100644 --- a/usr/src/uts/common/fs/zfs/dsl_scan.c +++ b/usr/src/uts/common/fs/zfs/dsl_scan.c @@ -24,7 +24,7 @@ * Copyright 2016 Gary Mills * Copyright (c) 2011, 2017 by Delphix. All rights reserved. * Copyright 2019 Joyent, Inc. - * Copyright (c) 2017 Datto Inc. + * Copyright (c) 2017, 2019, Datto Inc. All rights reserved. */ #include @@ -598,6 +598,13 @@ dsl_scan_restarting(dsl_scan_t *scn, dmu_tx_t *tx) scn->scn_restart_txg <= tx->tx_txg); } +boolean_t +dsl_scan_resilver_scheduled(dsl_pool_t *dp) +{ + return ((dp->dp_scan && dp->dp_scan->scn_restart_txg != 0) || + (spa_async_tasks(dp->dp_spa) & SPA_ASYNC_RESILVER)); +} + boolean_t dsl_scan_scrubbing(const dsl_pool_t *dp) { @@ -794,7 +801,7 @@ dsl_scan(dsl_pool_t *dp, pool_scan_func_t func) (void) spa_vdev_state_exit(spa, NULL, 0); if (func == POOL_SCAN_RESILVER) { - dsl_resilver_restart(spa->spa_dsl_pool, 0); + dsl_scan_restart_resilver(spa->spa_dsl_pool, 0); return (0); } @@ -813,41 +820,6 @@ dsl_scan(dsl_pool_t *dp, pool_scan_func_t func) dsl_scan_setup_sync, &func, 0, ZFS_SPACE_CHECK_EXTRA_RESERVED)); } -/* - * Sets the resilver defer flag to B_FALSE on all leaf devs under vd. Returns - * B_TRUE if we have devices that need to be resilvered and are available to - * accept resilver I/Os. - */ -static boolean_t -dsl_scan_clear_deferred(vdev_t *vd, dmu_tx_t *tx) -{ - boolean_t resilver_needed = B_FALSE; - spa_t *spa = vd->vdev_spa; - - for (int c = 0; c < vd->vdev_children; c++) { - resilver_needed |= - dsl_scan_clear_deferred(vd->vdev_child[c], tx); - } - - if (vd == spa->spa_root_vdev && - spa_feature_is_active(spa, SPA_FEATURE_RESILVER_DEFER)) { - spa_feature_decr(spa, SPA_FEATURE_RESILVER_DEFER, tx); - vdev_config_dirty(vd); - spa->spa_resilver_deferred = B_FALSE; - return (resilver_needed); - } - - if (!vdev_is_concrete(vd) || vd->vdev_aux || - !vd->vdev_ops->vdev_op_leaf) - return (resilver_needed); - - if (vd->vdev_resilver_deferred) - vd->vdev_resilver_deferred = B_FALSE; - - return (!vdev_is_dead(vd) && !vd->vdev_offline && - vdev_resilver_needed(vd, NULL, NULL)); -} - /* ARGSUSED */ static void dsl_scan_done(dsl_scan_t *scn, boolean_t complete, dmu_tx_t *tx) @@ -949,24 +921,21 @@ dsl_scan_done(dsl_scan_t *scn, boolean_t complete, dmu_tx_t *tx) spa_async_request(spa, SPA_ASYNC_RESILVER_DONE); /* - * Clear any deferred_resilver flags in the config. + * Clear any resilver_deferred flags in the config. * If there are drives that need resilvering, kick * off an asynchronous request to start resilver. - * dsl_scan_clear_deferred() may update the config + * vdev_clear_resilver_deferred() may update the config * before the resilver can restart. In the event of * a crash during this period, the spa loading code * will find the drives that need to be resilvered - * when the machine reboots and start the resilver then. + * and start the resilver then. */ - if (spa_feature_is_enabled(spa, SPA_FEATURE_RESILVER_DEFER)) { - boolean_t resilver_needed = - dsl_scan_clear_deferred(spa->spa_root_vdev, tx); - if (resilver_needed) { - spa_history_log_internal(spa, - "starting deferred resilver", tx, - "errors=%llu", spa_get_errlog_size(spa)); - spa_async_request(spa, SPA_ASYNC_RESILVER); - } + if (spa_feature_is_enabled(spa, SPA_FEATURE_RESILVER_DEFER) && + vdev_clear_resilver_deferred(spa->spa_root_vdev, tx)) { + spa_history_log_internal(spa, + "starting deferred resilver", tx, "errors=%llu", + (u_longlong_t)spa_get_errlog_size(spa)); + spa_async_request(spa, SPA_ASYNC_RESILVER); } } @@ -1073,7 +1042,7 @@ dsl_scrub_set_pause_resume(const dsl_pool_t *dp, pool_scrub_cmd_t cmd) /* start a new scan, or restart an existing one. */ void -dsl_resilver_restart(dsl_pool_t *dp, uint64_t txg) +dsl_scan_restart_resilver(dsl_pool_t *dp, uint64_t txg) { if (txg == 0) { dmu_tx_t *tx; @@ -1221,10 +1190,13 @@ scan_ds_queue_sync(dsl_scan_t *scn, dmu_tx_t *tx) static boolean_t dsl_scan_should_clear(dsl_scan_t *scn) { + spa_t *spa = scn->scn_dp->dp_spa; vdev_t *rvd = scn->scn_dp->dp_spa->spa_root_vdev; - uint64_t mlim_hard, mlim_soft, mused; - uint64_t alloc = metaslab_class_get_alloc(spa_normal_class( - scn->scn_dp->dp_spa)); + uint64_t alloc, mlim_hard, mlim_soft, mused; + + alloc = metaslab_class_get_alloc(spa_normal_class(spa)); + alloc += metaslab_class_get_alloc(spa_special_class(spa)); + alloc += metaslab_class_get_alloc(spa_dedup_class(spa)); mlim_hard = MAX((physmem / zfs_scan_mem_lim_fact) * PAGESIZE, zfs_scan_mem_lim_min); @@ -4208,3 +4180,33 @@ dsl_scan_freed(spa_t *spa, const blkptr_t *bp) for (int i = 0; i < BP_GET_NDVAS(bp); i++) dsl_scan_freed_dva(spa, bp, i); } + +/* + * Check if a vdev needs resilvering (non-empty DTL), if so, and resilver has + * not started, start it. Otherwise, only restart if max txg in DTL range is + * greater than the max txg in the current scan. If the DTL max is less than + * the scan max, then the vdev has not missed any new data since the resilver + * started, so a restart is not needed. + */ +void +dsl_scan_assess_vdev(dsl_pool_t *dp, vdev_t *vd) +{ + uint64_t min, max; + + if (!vdev_resilver_needed(vd, &min, &max)) + return; + + if (!dsl_scan_resilvering(dp)) { + spa_async_request(dp->dp_spa, SPA_ASYNC_RESILVER); + return; + } + + if (max <= dp->dp_scan->scn_phys.scn_max_txg) + return; + + /* restart is needed, check if it can be deferred */ + if (spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_RESILVER_DEFER)) + vdev_defer_resilver(vd); + else + spa_async_request(dp->dp_spa, SPA_ASYNC_RESILVER); +} diff --git a/usr/src/uts/common/fs/zfs/spa.c b/usr/src/uts/common/fs/zfs/spa.c index d94b96cd57..67c9784f75 100644 --- a/usr/src/uts/common/fs/zfs/spa.c +++ b/usr/src/uts/common/fs/zfs/spa.c @@ -27,9 +27,9 @@ * Copyright 2013 Saso Kiselkov. All rights reserved. * Copyright (c) 2014 Integros [integros.com] * Copyright 2016 Toomas Soome + * Copyright (c) 2017, 2019, Datto Inc. All rights reserved. * Copyright 2019 Joyent, Inc. * Copyright (c) 2017, Intel Corporation. - * Copyright (c) 2017 Datto Inc. * Copyright 2018 OmniOS Community Edition (OmniOSce) Association. * Copyright 2020 Joshua M. Clulow */ @@ -6380,9 +6380,9 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing) */ if (dsl_scan_resilvering(spa_get_dsl(spa)) && spa_feature_is_enabled(spa, SPA_FEATURE_RESILVER_DEFER)) - vdev_set_deferred_resilver(spa, newvd); + vdev_defer_resilver(newvd); else - dsl_resilver_restart(spa->spa_dsl_pool, dtl_max_txg); + dsl_scan_restart_resilver(spa->spa_dsl_pool, dtl_max_txg); if (spa->spa_bootfs) spa_event_notify(spa, newvd, NULL, ESC_ZFS_BOOTFS_VDEV_ATTACH); @@ -7620,7 +7620,7 @@ spa_async_thread(void *arg) if (tasks & SPA_ASYNC_RESILVER && (!dsl_scan_resilvering(dp) || !spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_RESILVER_DEFER))) - dsl_resilver_restart(dp, 0); + dsl_scan_restart_resilver(dp, 0); if (tasks & SPA_ASYNC_INITIALIZE_RESTART) { mutex_enter(&spa_namespace_lock); @@ -7736,6 +7736,12 @@ spa_async_request(spa_t *spa, int task) mutex_exit(&spa->spa_async_lock); } +int +spa_async_tasks(spa_t *spa) +{ + return (spa->spa_async_tasks); +} + /* * ========================================================================== * SPA syncing routines diff --git a/usr/src/uts/common/fs/zfs/sys/dsl_scan.h b/usr/src/uts/common/fs/zfs/sys/dsl_scan.h index 1b600405ae..4693293290 100644 --- a/usr/src/uts/common/fs/zfs/sys/dsl_scan.h +++ b/usr/src/uts/common/fs/zfs/sys/dsl_scan.h @@ -21,7 +21,7 @@ /* * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2012, 2017 by Delphix. All rights reserved. - * Copyright (c) 2017 Datto Inc. + * Copyright (c) 2017, 2019, Datto Inc. All rights reserved. */ #ifndef _SYS_DSL_SCAN_H @@ -164,10 +164,12 @@ void dsl_scan_fini(struct dsl_pool *dp); void dsl_scan_sync(struct dsl_pool *, dmu_tx_t *); int dsl_scan_cancel(struct dsl_pool *); int dsl_scan(struct dsl_pool *, pool_scan_func_t); +void dsl_scan_assess_vdev(struct dsl_pool *dp, vdev_t *vd); boolean_t dsl_scan_scrubbing(const struct dsl_pool *dp); int dsl_scrub_set_pause_resume(const struct dsl_pool *dp, pool_scrub_cmd_t cmd); -void dsl_resilver_restart(struct dsl_pool *, uint64_t txg); +void dsl_scan_restart_resilver(struct dsl_pool *, uint64_t txg); boolean_t dsl_scan_resilvering(struct dsl_pool *dp); +boolean_t dsl_scan_resilver_scheduled(struct dsl_pool *dp); boolean_t dsl_dataset_unstable(struct dsl_dataset *ds); void dsl_scan_ddt_entry(dsl_scan_t *scn, enum zio_checksum checksum, ddt_entry_t *dde, dmu_tx_t *tx); diff --git a/usr/src/uts/common/fs/zfs/sys/spa.h b/usr/src/uts/common/fs/zfs/sys/spa.h index 31faac4f77..33cdfbeb4b 100644 --- a/usr/src/uts/common/fs/zfs/sys/spa.h +++ b/usr/src/uts/common/fs/zfs/sys/spa.h @@ -26,7 +26,7 @@ * Copyright 2013 Saso Kiselkov. All rights reserved. * Copyright (c) 2014 Integros [integros.com] * Copyright 2019 Joyent, Inc. - * Copyright (c) 2017 Datto Inc. + * Copyright (c) 2017, 2019, Datto Inc. All rights reserved. * Copyright (c) 2017, Intel Corporation. * Copyright 2020 Joshua M. Clulow */ @@ -775,6 +775,7 @@ extern void spa_async_request(spa_t *spa, int flag); extern void spa_async_unrequest(spa_t *spa, int flag); extern void spa_async_suspend(spa_t *spa); extern void spa_async_resume(spa_t *spa); +extern int spa_async_tasks(spa_t *spa); extern spa_t *spa_inject_addref(char *pool); extern void spa_inject_delref(spa_t *spa); extern void spa_scan_stat_init(spa_t *spa); diff --git a/usr/src/uts/common/fs/zfs/sys/vdev.h b/usr/src/uts/common/fs/zfs/sys/vdev.h index a6de7e6f2c..b8c2ee5c9e 100644 --- a/usr/src/uts/common/fs/zfs/sys/vdev.h +++ b/usr/src/uts/common/fs/zfs/sys/vdev.h @@ -23,6 +23,7 @@ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2011, 2017 by Delphix. All rights reserved. * Copyright (c) 2017, Intel Corporation. + * Copyright (c) 2019, Datto Inc. All rights reserved. */ #ifndef _SYS_VDEV_H @@ -153,6 +154,8 @@ extern void vdev_state_dirty(vdev_t *vd); extern void vdev_state_clean(vdev_t *vd); extern void vdev_set_deferred_resilver(spa_t *spa, vdev_t *vd); +extern void vdev_defer_resilver(vdev_t *vd); +extern boolean_t vdev_clear_resilver_deferred(vdev_t *vd, dmu_tx_t *tx); typedef enum vdev_config_flag { VDEV_CONFIG_SPARE = 1 << 0, diff --git a/usr/src/uts/common/fs/zfs/vdev.c b/usr/src/uts/common/fs/zfs/vdev.c index 01e892f4c4..f824490255 100644 --- a/usr/src/uts/common/fs/zfs/vdev.c +++ b/usr/src/uts/common/fs/zfs/vdev.c @@ -27,6 +27,7 @@ * Copyright 2016 Toomas Soome * Copyright 2019 Joyent, Inc. * Copyright (c) 2017, Intel Corporation. + * Copyright (c) 2019, Datto Inc. All rights reserved. */ #include @@ -772,7 +773,7 @@ vdev_alloc(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, uint_t id, &vd->vdev_resilver_txg); if (nvlist_exists(nv, ZPOOL_CONFIG_RESILVER_DEFER)) - vdev_set_deferred_resilver(spa, vd); + vdev_defer_resilver(vd); /* * When importing a pool, we want to ignore the persistent fault @@ -1764,18 +1765,12 @@ vdev_open(vdev_t *vd) } /* - * If a leaf vdev has a DTL, and seems healthy, then kick off a - * resilver. But don't do this if we are doing a reopen for a scrub, - * since this would just restart the scrub we are already doing. + * If this is a leaf vdev, assess whether a resilver is needed. + * But don't do this if we are doing a reopen for a scrub, since + * this would just restart the scrub we are already doing. */ - if (vd->vdev_ops->vdev_op_leaf && !spa->spa_scrub_reopen && - vdev_resilver_needed(vd, NULL, NULL)) { - if (dsl_scan_resilvering(spa->spa_dsl_pool) && - spa_feature_is_enabled(spa, SPA_FEATURE_RESILVER_DEFER)) - vdev_set_deferred_resilver(spa, vd); - else - spa_async_request(spa, SPA_ASYNC_RESILVER); - } + if (vd->vdev_ops->vdev_op_leaf && !spa->spa_scrub_reopen) + dsl_scan_assess_vdev(spa->spa_dsl_pool, vd); return (0); } @@ -3543,14 +3538,11 @@ vdev_clear(spa_t *spa, vdev_t *vd) if (vd != rvd && vdev_writeable(vd->vdev_top)) vdev_state_dirty(vd->vdev_top); - if (vd->vdev_aux == NULL && !vdev_is_dead(vd)) { - if (dsl_scan_resilvering(spa->spa_dsl_pool) && - spa_feature_is_enabled(spa, - SPA_FEATURE_RESILVER_DEFER)) - vdev_set_deferred_resilver(spa, vd); - else - spa_async_request(spa, SPA_ASYNC_RESILVER); - } + /* If a resilver isn't required, check if vdevs can be culled */ + if (vd->vdev_aux == NULL && !vdev_is_dead(vd) && + !dsl_scan_resilvering(spa->spa_dsl_pool) && + !dsl_scan_resilver_scheduled(spa->spa_dsl_pool)) + spa_async_request(spa, SPA_ASYNC_RESILVER_DONE); spa_event_notify(spa, vd, NULL, ESC_ZFS_VDEV_CLEAR); } @@ -4559,18 +4551,46 @@ vdev_deadman(vdev_t *vd) } void -vdev_set_deferred_resilver(spa_t *spa, vdev_t *vd) +vdev_defer_resilver(vdev_t *vd) { - for (uint64_t i = 0; i < vd->vdev_children; i++) - vdev_set_deferred_resilver(spa, vd->vdev_child[i]); + ASSERT(vd->vdev_ops->vdev_op_leaf); - if (!vd->vdev_ops->vdev_op_leaf || !vdev_writeable(vd) || - range_tree_is_empty(vd->vdev_dtl[DTL_MISSING])) { - return; + vd->vdev_resilver_deferred = B_TRUE; + vd->vdev_spa->spa_resilver_deferred = B_TRUE; +} + +/* + * Clears the resilver deferred flag on all leaf devs under vd. Returns + * B_TRUE if we have devices that need to be resilvered and are available to + * accept resilver I/Os. + */ +boolean_t +vdev_clear_resilver_deferred(vdev_t *vd, dmu_tx_t *tx) +{ + boolean_t resilver_needed = B_FALSE; + spa_t *spa = vd->vdev_spa; + + for (int c = 0; c < vd->vdev_children; c++) { + vdev_t *cvd = vd->vdev_child[c]; + resilver_needed |= vdev_clear_resilver_deferred(cvd, tx); } - vd->vdev_resilver_deferred = B_TRUE; - spa->spa_resilver_deferred = B_TRUE; + if (vd == spa->spa_root_vdev && + spa_feature_is_active(spa, SPA_FEATURE_RESILVER_DEFER)) { + spa_feature_decr(spa, SPA_FEATURE_RESILVER_DEFER, tx); + vdev_config_dirty(vd); + spa->spa_resilver_deferred = B_FALSE; + return (resilver_needed); + } + + if (!vdev_is_concrete(vd) || vd->vdev_aux || + !vd->vdev_ops->vdev_op_leaf) + return (resilver_needed); + + vd->vdev_resilver_deferred = B_FALSE; + + return (!vdev_is_dead(vd) && !vd->vdev_offline && + vdev_resilver_needed(vd, NULL, NULL)); } /* -- cgit v1.2.3