summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorjwpoduska <jpoduska@datto.com>2020-04-24 12:36:43 -0500
committerJason King <jason.king@joyent.com>2020-05-12 15:42:50 -0500
commit0c06d385ea5bbe11d20ecea2e02cdc78733d5359 (patch)
tree65a89a62f96b8226ef13a3ac6cac87ecf97b1fc9
parent8291b3b94350ddd6df6ecd55435b59079f7a3dd2 (diff)
downloadillumos-joyent-0c06d385ea5bbe11d20ecea2e02cdc78733d5359.tar.gz
12636 Prevent unnecessary resilver restarts
Reviewed by: Brian Behlendorf <behlendorf1@llnl.gov> Reviewed by: John Gallagher <john.gallagher@delphix.com> Reviewed by: Kjeld Schouten <kjeld@schouten-lebbing.nl> Reviewed by: John Kennedy <john.kennedy@delphix.com> Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com> Portions contributed by: Alexander Motin <mav@FreeBSD.org> Portions contributed by: Jason King <jason.king@joyent.com> Approved by: Robert Mustacchi <rm@fingolfin.org>
-rw-r--r--usr/src/pkg/manifests/system-test-zfstest.mf7
-rw-r--r--usr/src/test/zfs-tests/runfiles/delphix.run4
-rw-r--r--usr/src/test/zfs-tests/runfiles/omnios.run4
-rw-r--r--usr/src/test/zfs-tests/runfiles/openindiana.run4
-rw-r--r--usr/src/test/zfs-tests/runfiles/smartos.run4
-rw-r--r--usr/src/test/zfs-tests/tests/functional/resilver/Makefile71
-rwxr-xr-xusr/src/test/zfs-tests/tests/functional/resilver/cleanup.ksh31
-rw-r--r--usr/src/test/zfs-tests/tests/functional/resilver/resilver.cfg32
-rwxr-xr-xusr/src/test/zfs-tests/tests/functional/resilver/resilver_restart_001.ksh196
-rwxr-xr-xusr/src/test/zfs-tests/tests/functional/resilver/setup.ksh31
-rw-r--r--usr/src/test/zfs-tests/tests/functional/resilver/sysevent.c148
-rw-r--r--usr/src/uts/common/fs/zfs/dsl_scan.c108
-rw-r--r--usr/src/uts/common/fs/zfs/spa.c14
-rw-r--r--usr/src/uts/common/fs/zfs/sys/dsl_scan.h6
-rw-r--r--usr/src/uts/common/fs/zfs/sys/spa.h3
-rw-r--r--usr/src/uts/common/fs/zfs/sys/vdev.h3
-rw-r--r--usr/src/uts/common/fs/zfs/vdev.c76
17 files changed, 654 insertions, 88 deletions
diff --git a/usr/src/pkg/manifests/system-test-zfstest.mf b/usr/src/pkg/manifests/system-test-zfstest.mf
index 09d431d538..233da7a9ad 100644
--- a/usr/src/pkg/manifests/system-test-zfstest.mf
+++ b/usr/src/pkg/manifests/system-test-zfstest.mf
@@ -150,6 +150,7 @@ dir path=opt/zfs-tests/tests/functional/removal
dir path=opt/zfs-tests/tests/functional/rename_dirs
dir path=opt/zfs-tests/tests/functional/replacement
dir path=opt/zfs-tests/tests/functional/reservation
+dir path=opt/zfs-tests/tests/functional/resilver
dir path=opt/zfs-tests/tests/functional/rootpool
dir path=opt/zfs-tests/tests/functional/rsend
dir path=opt/zfs-tests/tests/functional/scrub_mirror
@@ -2928,6 +2929,12 @@ file path=opt/zfs-tests/tests/functional/reservation/reservation_021_neg \
file path=opt/zfs-tests/tests/functional/reservation/reservation_022_pos \
mode=0555
file path=opt/zfs-tests/tests/functional/reservation/setup mode=0555
+file path=opt/zfs-tests/tests/functional/resilver/cleanup mode=0555
+file path=opt/zfs-tests/tests/functional/resilver/resilver.cfg mode=0444
+file path=opt/zfs-tests/tests/functional/resilver/resilver_restart_001 \
+ mode=0555
+file path=opt/zfs-tests/tests/functional/resilver/setup mode=0555
+file path=opt/zfs-tests/tests/functional/resilver/sysevent mode=0555
file path=opt/zfs-tests/tests/functional/rootpool/cleanup mode=0555
file path=opt/zfs-tests/tests/functional/rootpool/rootpool_002_neg mode=0555
file path=opt/zfs-tests/tests/functional/rootpool/rootpool_003_neg mode=0555
diff --git a/usr/src/test/zfs-tests/runfiles/delphix.run b/usr/src/test/zfs-tests/runfiles/delphix.run
index ef1c80efcc..8acd2710bf 100644
--- a/usr/src/test/zfs-tests/runfiles/delphix.run
+++ b/usr/src/test/zfs-tests/runfiles/delphix.run
@@ -370,6 +370,10 @@ tests = ['zpool_replace_001_neg', 'zpool_replace_002_neg', 'replace-o_ashift',
tests = ['zpool_resilver_bad_args', 'zpool_resilver_restart']
tags = ['functional', 'cli_root', 'zpool_resilver']
+[/opt/zfs-tests/tests/functional/resilver]
+tests = ['resilver_restart_001']
+tags = ['functional', 'resilver']
+
[/opt/zfs-tests/tests/functional/cli_root/zpool_scrub]
tests = ['zpool_scrub_001_neg', 'zpool_scrub_002_pos', 'zpool_scrub_003_pos',
'zpool_scrub_004_pos', 'zpool_scrub_005_pos', 'zpool_scrub_print_repairing',
diff --git a/usr/src/test/zfs-tests/runfiles/omnios.run b/usr/src/test/zfs-tests/runfiles/omnios.run
index dd8f0738de..9a3722aa6a 100644
--- a/usr/src/test/zfs-tests/runfiles/omnios.run
+++ b/usr/src/test/zfs-tests/runfiles/omnios.run
@@ -204,6 +204,10 @@ tests = ['zfs_rename_001_pos', 'zfs_rename_002_pos', 'zfs_rename_003_pos',
[/opt/zfs-tests/tests/functional/cli_root/zfs_reservation]
tests = ['zfs_reservation_001_pos', 'zfs_reservation_002_pos']
+[/opt/zfs-tests/tests/functional/resilver]
+tests = ['resilver_restart_001']
+tags = ['functional', 'resilver']
+
[/opt/zfs-tests/tests/functional/cli_root/zfs_rollback]
tests = ['zfs_rollback_001_pos', 'zfs_rollback_002_pos',
'zfs_rollback_003_neg', 'zfs_rollback_004_neg']
diff --git a/usr/src/test/zfs-tests/runfiles/openindiana.run b/usr/src/test/zfs-tests/runfiles/openindiana.run
index 031bd8bf0f..ad0615047c 100644
--- a/usr/src/test/zfs-tests/runfiles/openindiana.run
+++ b/usr/src/test/zfs-tests/runfiles/openindiana.run
@@ -204,6 +204,10 @@ tests = ['zfs_rename_001_pos', 'zfs_rename_002_pos', 'zfs_rename_003_pos',
[/opt/zfs-tests/tests/functional/cli_root/zfs_reservation]
tests = ['zfs_reservation_001_pos', 'zfs_reservation_002_pos']
+[/opt/zfs-tests/tests/functional/resilver]
+tests = ['resilver_restart_001']
+tags = ['functional', 'resilver']
+
[/opt/zfs-tests/tests/functional/cli_root/zfs_rollback]
tests = ['zfs_rollback_001_pos', 'zfs_rollback_002_pos',
'zfs_rollback_003_neg', 'zfs_rollback_004_neg']
diff --git a/usr/src/test/zfs-tests/runfiles/smartos.run b/usr/src/test/zfs-tests/runfiles/smartos.run
index 1a3f3b7bae..a9ee33ac4f 100644
--- a/usr/src/test/zfs-tests/runfiles/smartos.run
+++ b/usr/src/test/zfs-tests/runfiles/smartos.run
@@ -163,6 +163,10 @@ tests = ['zfs_rename_001_pos', 'zfs_rename_002_pos', 'zfs_rename_003_pos',
[/opt/zfs-tests/tests/functional/cli_root/zfs_reservation]
tests = ['zfs_reservation_001_pos', 'zfs_reservation_002_pos']
+[/opt/zfs-tests/tests/functional/resilver]
+tests = ['resilver_restart_001']
+tags = ['functional', 'resilver']
+
[/opt/zfs-tests/tests/functional/cli_root/zfs_rollback]
tests = ['zfs_rollback_001_pos', 'zfs_rollback_002_pos',
'zfs_rollback_003_neg', 'zfs_rollback_004_neg']
diff --git a/usr/src/test/zfs-tests/tests/functional/resilver/Makefile b/usr/src/test/zfs-tests/tests/functional/resilver/Makefile
new file mode 100644
index 0000000000..85ee34a135
--- /dev/null
+++ b/usr/src/test/zfs-tests/tests/functional/resilver/Makefile
@@ -0,0 +1,71 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2020 Joyent, Inc.
+#
+
+include $(SRC)/Makefile.master
+
+PROG = sysevent
+
+SCRIPTS = cleanup \
+ resilver_restart_001 \
+ setup
+
+include $(SRC)/cmd/Makefile.cmd
+include $(SRC)/test/Makefile.com
+
+ROOTOPTPKG = $(ROOT)/opt/zfs-tests
+TARGETDIR = $(ROOTOPTPKG)/tests/functional/resilver
+
+OBJS = $(PROG:%=%.o)
+SRCS = $(OBJS:%.o=%.c)
+SRCFILES = resilver.cfg
+
+CMDS = $(PROG:%=$(TARGETDIR)/%) $(SCRIPTS:%=$(TARGETDIR)/%)
+$(CMDS) := FILEMODE = 0555
+
+FILES = $(SRCFILES:%=$(TARGETDIR)/%)
+$(FILES) := FILEMODE = 0444
+
+CPPFLAGS += -D__EXTENSIONS__
+LDLIBS += -lsysevent
+
+all: $(PROG)
+
+$(PROG): $(OBJS)
+ $(LINK.c) $(OBJS) -o $@ $(LDFLAGS) $(LDLIBS)
+ $(POST_PROCESS)
+
+%.o: %.c
+ $(COMPILE.c) $<
+
+install: all $(CMDS) $(FILES)
+
+clobber: clean
+ -$(RM) $(PROG)
+
+clean:
+ -$(RM) $(OBJS)
+
+$(CMDS): $(TARGETDIR) $(PROG)
+
+$(FILES): $(SRCFILES)
+
+$(TARGETDIR):
+ $(INS.dir)
+
+$(TARGETDIR)/%: %
+ $(INS.file)
+
+$(TARGETDIR)/%: %.ksh
+ $(INS.rename)
diff --git a/usr/src/test/zfs-tests/tests/functional/resilver/cleanup.ksh b/usr/src/test/zfs-tests/tests/functional/resilver/cleanup.ksh
new file mode 100755
index 0000000000..4dfa814245
--- /dev/null
+++ b/usr/src/test/zfs-tests/tests/functional/resilver/cleanup.ksh
@@ -0,0 +1,31 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+
+#
+# Copyright (c) 2019, Datto Inc. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/resilver/resilver.cfg
+
+verify_runnable "global"
+
+log_pass
diff --git a/usr/src/test/zfs-tests/tests/functional/resilver/resilver.cfg b/usr/src/test/zfs-tests/tests/functional/resilver/resilver.cfg
new file mode 100644
index 0000000000..88dfd24aed
--- /dev/null
+++ b/usr/src/test/zfs-tests/tests/functional/resilver/resilver.cfg
@@ -0,0 +1,32 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+
+#
+# Copyright (c) 2019, Datto Inc. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+verify_runnable "global"
+
+set -A VDEV_FILES $TEST_BASE_DIR/file-{1..4}
+SPARE_VDEV_FILE=$TEST_BASE_DIR/spare-1
+
+VDEV_FILE_SIZE=$(( $SPA_MINDEVSIZE * 2 ))
diff --git a/usr/src/test/zfs-tests/tests/functional/resilver/resilver_restart_001.ksh b/usr/src/test/zfs-tests/tests/functional/resilver/resilver_restart_001.ksh
new file mode 100755
index 0000000000..87e0e68cff
--- /dev/null
+++ b/usr/src/test/zfs-tests/tests/functional/resilver/resilver_restart_001.ksh
@@ -0,0 +1,196 @@
+#!/bin/ksh -p
+
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2019, Datto Inc. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/resilver/resilver.cfg
+
+SYSEVENT=$STF_SUITE/tests/functional/resilver/sysevent
+
+#
+# DESCRIPTION:
+# Testing resilver restart logic both with and without the deferred resilver
+# feature enabled, verifying that resilver is not restarted when it is
+# unecessary.
+#
+# STRATEGY:
+# 1. Create a pool
+# 2. Create four filesystems with the primary cache disable to force reads
+# 3. Write four files simultaneously, one to each filesystem
+# 4. Do with and without deferred resilvers enabled
+# a. Replace a vdev with a spare & suspend resilver immediately
+# b. Verify resilver starts properly
+# c. Offline / online another vdev to introduce a new DTL range
+# d. Verify resilver restart restart or defer
+# e. Inject read errors on vdev that was offlined / onlned
+# f. Verify that resilver did not restart
+# g. Unsuspend resilver and wait for it to finish
+# h. Verify that there are two resilvers and nothing is deferred
+#
+
+function cleanup
+{
+ log_must set_tunable32 zfs_resilver_min_time_ms $ORIG_RESILVER_MIN_TIME
+ log_must set_tunable32 zfs_scan_suspend_progress \
+ $ORIG_SCAN_SUSPEND_PROGRESS
+ log_must zinject -c all
+ destroy_pool $TESTPOOL
+ rm -f ${VDEV_FILES[@]} $SPARE_VDEV_FILE
+ [[ -n "$EVTFILE" ]] && rm -f "$EVTFILE"
+ [[ -n "$EVTPID" ]] && kill "$EVTPID"
+}
+
+# count resilver events in zpool and number of deferred rsilvers on vdevs
+function verify_restarts # <msg> <cnt> <defer>
+{
+ msg=$1
+ cnt=$2
+ defer=$3
+
+ # check the number of resilver start in events log
+ RESILVERS=$(wc -l $EVTFILE | awk '{ print $1 }')
+ log_note "expected $cnt resilver start(s)$msg, found $RESILVERS"
+ [[ "$RESILVERS" -ne "$cnt" ]] &&
+ log_fail "expected $cnt resilver start(s)$msg, found $RESILVERS"
+
+ [[ -z "$defer" ]] && return
+
+ # use zdb to find which vdevs have the resilver defer flag
+ VDEV_DEFERS=$(zdb -C $TESTPOOL | awk '
+ /children/ { gsub(/[^0-9]/, ""); child = $0 }
+ /com\.datto:resilver_defer$/ { print child }
+ ')
+
+ if [[ "$defer" == "-" ]]
+ then
+ [[ -n $VDEV_DEFERS ]] &&
+ log_fail "didn't expect any vdevs to have resilver deferred"
+ return
+ fi
+
+ [[ $VDEV_DEFERS -eq $defer ]] ||
+ log_fail "resilver deferred set on unexpected vdev: $VDEV_DEFERS"
+}
+
+log_assert "Check for unnecessary resilver restarts"
+
+ORIG_RESILVER_MIN_TIME=$(get_tunable zfs_resilver_min_time_ms)
+ORIG_SCAN_SUSPEND_PROGRESS=$(get_tunable zfs_scan_suspend_progress)
+
+set -A RESTARTS -- '1' '2' '2' '2'
+set -A VDEVS -- '' '' '' ''
+set -A DEFER_RESTARTS -- '1' '1' '1' '2'
+set -A DEFER_VDEVS -- '-' '2' '2' '-'
+
+VDEV_REPLACE="${VDEV_FILES[1]} $SPARE_VDEV_FILE"
+
+log_onexit cleanup
+
+# Monitor for resilver start events and log them to $EVTFILE as they occur
+EVTFILE=$(mktemp /tmp/resilver_events.XXXXXX)
+EVTPID=$($SYSEVENT $EVTFILE)
+log_must test -n "$EVTPID"
+
+log_must truncate -s $VDEV_FILE_SIZE ${VDEV_FILES[@]} $SPARE_VDEV_FILE
+
+log_must zpool create -f -o feature@resilver_defer=disabled $TESTPOOL \
+ raidz ${VDEV_FILES[@]}
+
+# create 4 filesystems
+for fs in fs{0..3}
+do
+ log_must zfs create -o primarycache=none -o recordsize=1k $TESTPOOL/$fs
+done
+
+# simultaneously write 16M to each of them
+set -A DATAPATHS /$TESTPOOL/fs{0..3}/dat.0
+log_note "Writing data files"
+for path in ${DATAPATHS[@]}
+do
+ dd if=/dev/urandom of=$path bs=1M count=16 > /dev/null 2>&1 &
+done
+wait
+
+# test without and with deferred resilve feature enabled
+for test in "without" "with"
+do
+ log_note "Testing $test deferred resilvers"
+
+ if [[ $test == "with" ]]
+ then
+ log_must zpool set feature@resilver_defer=enabled $TESTPOOL
+ RESTARTS=( "${DEFER_RESTARTS[@]}" )
+ VDEVS=( "${DEFER_VDEVS[@]}" )
+ VDEV_REPLACE="$SPARE_VDEV_FILE ${VDEV_FILES[1]}"
+ fi
+
+ # clear the events
+ cp /dev/null $EVTFILE
+
+ # limit scanning time
+ log_must set_tunable32 zfs_resilver_min_time_ms 50
+
+ # initiate a resilver and suspend the scan as soon as possible
+ log_must zpool replace $TESTPOOL $VDEV_REPLACE
+ log_must set_tunable32 zfs_scan_suspend_progress 1
+
+ # there should only be 1 resilver start
+ verify_restarts '' "${RESTARTS[0]}" "${VDEVS[0]}"
+
+ # offline then online a vdev to introduce a new DTL range after current
+ # scan, which should restart (or defer) the resilver
+ log_must zpool offline $TESTPOOL ${VDEV_FILES[2]}
+ log_must zpool sync $TESTPOOL
+ log_must zpool online $TESTPOOL ${VDEV_FILES[2]}
+ log_must zpool sync $TESTPOOL
+
+ # there should now be 2 resilver starts w/o defer, 1 with defer
+ verify_restarts ' after offline/online' "${RESTARTS[1]}" "${VDEVS[1]}"
+
+ # inject read io errors on vdev and verify resilver does not restart
+ log_must zinject -a -d ${VDEV_FILES[2]} -e io -T read -f 0.25 $TESTPOOL
+ log_must cat ${DATAPATHS[1]} > /dev/null
+ log_must zinject -c all
+
+ # there should still be 2 resilver starts w/o defer, 1 with defer
+ verify_restarts ' after zinject' "${RESTARTS[2]}" "${VDEVS[2]}"
+
+ # unsuspend resilver
+ log_must set_tunable32 zfs_scan_suspend_progress 0
+ log_must set_tunable32 zfs_resilver_min_time_ms 3000
+
+ # wait for resilver to finish
+ for iter in {0..59}
+ do
+ is_pool_resilvered $TESTPOOL && break
+ sleep 1
+ done
+ is_pool_resilvered $TESTPOOL ||
+ log_fail "resilver timed out"
+
+ # wait for a few txg's to see if a resilver happens
+ log_must zpool sync $TESTPOOL
+ log_must zpool sync $TESTPOOL
+
+ # there should now be 2 resilver starts
+ verify_restarts ' after resilver' "${RESTARTS[3]}" "${VDEVS[3]}"
+done
+
+log_pass "Resilver did not restart unnecessarily"
diff --git a/usr/src/test/zfs-tests/tests/functional/resilver/setup.ksh b/usr/src/test/zfs-tests/tests/functional/resilver/setup.ksh
new file mode 100755
index 0000000000..4dfa814245
--- /dev/null
+++ b/usr/src/test/zfs-tests/tests/functional/resilver/setup.ksh
@@ -0,0 +1,31 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+
+#
+# Copyright (c) 2019, Datto Inc. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/resilver/resilver.cfg
+
+verify_runnable "global"
+
+log_pass
diff --git a/usr/src/test/zfs-tests/tests/functional/resilver/sysevent.c b/usr/src/test/zfs-tests/tests/functional/resilver/sysevent.c
new file mode 100644
index 0000000000..1310c07f90
--- /dev/null
+++ b/usr/src/test/zfs-tests/tests/functional/resilver/sysevent.c
@@ -0,0 +1,148 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at http://smartos.org/CDDL
+ *
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file.
+ *
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ *
+ * Copyright 2020 Joyent, Inc.
+ *
+ */
+
+#include <err.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/debug.h>
+#include <sys/sysmacros.h>
+#include <sys/types.h>
+#include <libsysevent.h>
+#include <sys/sysevent/eventdefs.h>
+
+FILE *out;
+
+static void
+process_event(sysevent_t *ev)
+{
+ char *class = NULL;
+ char *subclass = NULL;
+
+ /* get sysevent metadata and add to the nvlist */
+ class = sysevent_get_class_name(ev);
+ subclass = sysevent_get_subclass_name(ev);
+
+ if (class == NULL || subclass == NULL)
+ errx(EXIT_FAILURE, "failed to retrieve sysevent metadata");
+
+ VERIFY0(strcmp(class, EC_ZFS));
+ VERIFY0(strcmp(subclass, ESC_ZFS_RESILVER_START));
+
+ flockfile(out);
+ (void) fprintf(out, "Received %s.%s event\n", class, subclass);
+ (void) fflush(out);
+ funlockfile(out);
+}
+
+static void
+child_fatal(int fd, const char *msg, ...)
+{
+ va_list ap;
+ int fail = EXIT_FAILURE;
+
+ va_start(ap, msg);
+ (void) vfprintf(stderr, msg, ap);
+ va_end(ap);
+ (void) fputc('\n', stderr);
+
+ (void) write(fd, &fail, sizeof (fail));
+ (void) close(fd);
+ exit(EXIT_FAILURE);
+}
+
+static void
+do_child(int fd)
+{
+ const char *subclasses[] = {
+ ESC_ZFS_RESILVER_START,
+ };
+ sysevent_handle_t *handle;
+ int ret = 0;
+
+ if ((handle = sysevent_bind_handle(process_event)) == NULL) {
+ child_fatal(fd, "sysevent_bind_handle() failed: %s",
+ strerror(errno));
+ }
+
+ if (sysevent_subscribe_event(handle, EC_ZFS, subclasses,
+ ARRAY_SIZE(subclasses)) != 0) {
+ child_fatal(fd, "failed to subscribe to sysevents: %s",
+ strerror(errno));
+ }
+
+ (void) write(fd, &ret, sizeof (ret));
+ (void) close(fd);
+
+ /* leave stderr open so any errors get captured by test harness */
+ (void) fclose(stdin);
+ (void) fclose(stdout);
+
+ for (;;)
+ (void) pause();
+}
+
+int
+main(int argc, char **argv)
+{
+ pid_t child;
+ int fds[2];
+ int ret = 0;
+
+ if (argc < 2) {
+ (void) fprintf(stderr, "Usage: %s outfile\n", argv[0]);
+ exit(EXIT_FAILURE);
+ }
+
+ if ((out = fopen(argv[1], "w")) == NULL)
+ err(EXIT_FAILURE, "unable to open %s", argv[1]);
+
+ VERIFY0(pipe(fds));
+
+ switch (child = fork()) {
+ case -1:
+ err(EXIT_FAILURE, "unable to fork");
+ case 0:
+ do_child(fds[1]);
+ break;
+ default:
+ break;
+ }
+
+ (void) close(fds[1]);
+
+ if (read(fds[0], &ret, sizeof (ret)) < 0)
+ err(EXIT_FAILURE, "failure waiting on child");
+
+ if (ret != 0)
+ return (ret);
+
+ (void) close(fds[0]);
+ (void) printf("%d\n", child);
+ return (0);
+}
diff --git a/usr/src/uts/common/fs/zfs/dsl_scan.c b/usr/src/uts/common/fs/zfs/dsl_scan.c
index b619719ba9..427ed961bb 100644
--- a/usr/src/uts/common/fs/zfs/dsl_scan.c
+++ b/usr/src/uts/common/fs/zfs/dsl_scan.c
@@ -24,7 +24,7 @@
* Copyright 2016 Gary Mills
* Copyright (c) 2011, 2017 by Delphix. All rights reserved.
* Copyright 2019 Joyent, Inc.
- * Copyright (c) 2017 Datto Inc.
+ * Copyright (c) 2017, 2019, Datto Inc. All rights reserved.
*/
#include <sys/dsl_scan.h>
@@ -599,6 +599,13 @@ dsl_scan_restarting(dsl_scan_t *scn, dmu_tx_t *tx)
}
boolean_t
+dsl_scan_resilver_scheduled(dsl_pool_t *dp)
+{
+ return ((dp->dp_scan && dp->dp_scan->scn_restart_txg != 0) ||
+ (spa_async_tasks(dp->dp_spa) & SPA_ASYNC_RESILVER));
+}
+
+boolean_t
dsl_scan_scrubbing(const dsl_pool_t *dp)
{
dsl_scan_phys_t *scn_phys = &dp->dp_scan->scn_phys;
@@ -794,7 +801,7 @@ dsl_scan(dsl_pool_t *dp, pool_scan_func_t func)
(void) spa_vdev_state_exit(spa, NULL, 0);
if (func == POOL_SCAN_RESILVER) {
- dsl_resilver_restart(spa->spa_dsl_pool, 0);
+ dsl_scan_restart_resilver(spa->spa_dsl_pool, 0);
return (0);
}
@@ -813,41 +820,6 @@ dsl_scan(dsl_pool_t *dp, pool_scan_func_t func)
dsl_scan_setup_sync, &func, 0, ZFS_SPACE_CHECK_EXTRA_RESERVED));
}
-/*
- * Sets the resilver defer flag to B_FALSE on all leaf devs under vd. Returns
- * B_TRUE if we have devices that need to be resilvered and are available to
- * accept resilver I/Os.
- */
-static boolean_t
-dsl_scan_clear_deferred(vdev_t *vd, dmu_tx_t *tx)
-{
- boolean_t resilver_needed = B_FALSE;
- spa_t *spa = vd->vdev_spa;
-
- for (int c = 0; c < vd->vdev_children; c++) {
- resilver_needed |=
- dsl_scan_clear_deferred(vd->vdev_child[c], tx);
- }
-
- if (vd == spa->spa_root_vdev &&
- spa_feature_is_active(spa, SPA_FEATURE_RESILVER_DEFER)) {
- spa_feature_decr(spa, SPA_FEATURE_RESILVER_DEFER, tx);
- vdev_config_dirty(vd);
- spa->spa_resilver_deferred = B_FALSE;
- return (resilver_needed);
- }
-
- if (!vdev_is_concrete(vd) || vd->vdev_aux ||
- !vd->vdev_ops->vdev_op_leaf)
- return (resilver_needed);
-
- if (vd->vdev_resilver_deferred)
- vd->vdev_resilver_deferred = B_FALSE;
-
- return (!vdev_is_dead(vd) && !vd->vdev_offline &&
- vdev_resilver_needed(vd, NULL, NULL));
-}
-
/* ARGSUSED */
static void
dsl_scan_done(dsl_scan_t *scn, boolean_t complete, dmu_tx_t *tx)
@@ -949,24 +921,21 @@ dsl_scan_done(dsl_scan_t *scn, boolean_t complete, dmu_tx_t *tx)
spa_async_request(spa, SPA_ASYNC_RESILVER_DONE);
/*
- * Clear any deferred_resilver flags in the config.
+ * Clear any resilver_deferred flags in the config.
* If there are drives that need resilvering, kick
* off an asynchronous request to start resilver.
- * dsl_scan_clear_deferred() may update the config
+ * vdev_clear_resilver_deferred() may update the config
* before the resilver can restart. In the event of
* a crash during this period, the spa loading code
* will find the drives that need to be resilvered
- * when the machine reboots and start the resilver then.
+ * and start the resilver then.
*/
- if (spa_feature_is_enabled(spa, SPA_FEATURE_RESILVER_DEFER)) {
- boolean_t resilver_needed =
- dsl_scan_clear_deferred(spa->spa_root_vdev, tx);
- if (resilver_needed) {
- spa_history_log_internal(spa,
- "starting deferred resilver", tx,
- "errors=%llu", spa_get_errlog_size(spa));
- spa_async_request(spa, SPA_ASYNC_RESILVER);
- }
+ if (spa_feature_is_enabled(spa, SPA_FEATURE_RESILVER_DEFER) &&
+ vdev_clear_resilver_deferred(spa->spa_root_vdev, tx)) {
+ spa_history_log_internal(spa,
+ "starting deferred resilver", tx, "errors=%llu",
+ (u_longlong_t)spa_get_errlog_size(spa));
+ spa_async_request(spa, SPA_ASYNC_RESILVER);
}
}
@@ -1073,7 +1042,7 @@ dsl_scrub_set_pause_resume(const dsl_pool_t *dp, pool_scrub_cmd_t cmd)
/* start a new scan, or restart an existing one. */
void
-dsl_resilver_restart(dsl_pool_t *dp, uint64_t txg)
+dsl_scan_restart_resilver(dsl_pool_t *dp, uint64_t txg)
{
if (txg == 0) {
dmu_tx_t *tx;
@@ -1221,10 +1190,13 @@ scan_ds_queue_sync(dsl_scan_t *scn, dmu_tx_t *tx)
static boolean_t
dsl_scan_should_clear(dsl_scan_t *scn)
{
+ spa_t *spa = scn->scn_dp->dp_spa;
vdev_t *rvd = scn->scn_dp->dp_spa->spa_root_vdev;
- uint64_t mlim_hard, mlim_soft, mused;
- uint64_t alloc = metaslab_class_get_alloc(spa_normal_class(
- scn->scn_dp->dp_spa));
+ uint64_t alloc, mlim_hard, mlim_soft, mused;
+
+ alloc = metaslab_class_get_alloc(spa_normal_class(spa));
+ alloc += metaslab_class_get_alloc(spa_special_class(spa));
+ alloc += metaslab_class_get_alloc(spa_dedup_class(spa));
mlim_hard = MAX((physmem / zfs_scan_mem_lim_fact) * PAGESIZE,
zfs_scan_mem_lim_min);
@@ -4208,3 +4180,33 @@ dsl_scan_freed(spa_t *spa, const blkptr_t *bp)
for (int i = 0; i < BP_GET_NDVAS(bp); i++)
dsl_scan_freed_dva(spa, bp, i);
}
+
+/*
+ * Check if a vdev needs resilvering (non-empty DTL), if so, and resilver has
+ * not started, start it. Otherwise, only restart if max txg in DTL range is
+ * greater than the max txg in the current scan. If the DTL max is less than
+ * the scan max, then the vdev has not missed any new data since the resilver
+ * started, so a restart is not needed.
+ */
+void
+dsl_scan_assess_vdev(dsl_pool_t *dp, vdev_t *vd)
+{
+ uint64_t min, max;
+
+ if (!vdev_resilver_needed(vd, &min, &max))
+ return;
+
+ if (!dsl_scan_resilvering(dp)) {
+ spa_async_request(dp->dp_spa, SPA_ASYNC_RESILVER);
+ return;
+ }
+
+ if (max <= dp->dp_scan->scn_phys.scn_max_txg)
+ return;
+
+ /* restart is needed, check if it can be deferred */
+ if (spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_RESILVER_DEFER))
+ vdev_defer_resilver(vd);
+ else
+ spa_async_request(dp->dp_spa, SPA_ASYNC_RESILVER);
+}
diff --git a/usr/src/uts/common/fs/zfs/spa.c b/usr/src/uts/common/fs/zfs/spa.c
index d94b96cd57..67c9784f75 100644
--- a/usr/src/uts/common/fs/zfs/spa.c
+++ b/usr/src/uts/common/fs/zfs/spa.c
@@ -27,9 +27,9 @@
* Copyright 2013 Saso Kiselkov. All rights reserved.
* Copyright (c) 2014 Integros [integros.com]
* Copyright 2016 Toomas Soome <tsoome@me.com>
+ * Copyright (c) 2017, 2019, Datto Inc. All rights reserved.
* Copyright 2019 Joyent, Inc.
* Copyright (c) 2017, Intel Corporation.
- * Copyright (c) 2017 Datto Inc.
* Copyright 2018 OmniOS Community Edition (OmniOSce) Association.
* Copyright 2020 Joshua M. Clulow <josh@sysmgr.org>
*/
@@ -6380,9 +6380,9 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing)
*/
if (dsl_scan_resilvering(spa_get_dsl(spa)) &&
spa_feature_is_enabled(spa, SPA_FEATURE_RESILVER_DEFER))
- vdev_set_deferred_resilver(spa, newvd);
+ vdev_defer_resilver(newvd);
else
- dsl_resilver_restart(spa->spa_dsl_pool, dtl_max_txg);
+ dsl_scan_restart_resilver(spa->spa_dsl_pool, dtl_max_txg);
if (spa->spa_bootfs)
spa_event_notify(spa, newvd, NULL, ESC_ZFS_BOOTFS_VDEV_ATTACH);
@@ -7620,7 +7620,7 @@ spa_async_thread(void *arg)
if (tasks & SPA_ASYNC_RESILVER &&
(!dsl_scan_resilvering(dp) ||
!spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_RESILVER_DEFER)))
- dsl_resilver_restart(dp, 0);
+ dsl_scan_restart_resilver(dp, 0);
if (tasks & SPA_ASYNC_INITIALIZE_RESTART) {
mutex_enter(&spa_namespace_lock);
@@ -7736,6 +7736,12 @@ spa_async_request(spa_t *spa, int task)
mutex_exit(&spa->spa_async_lock);
}
+int
+spa_async_tasks(spa_t *spa)
+{
+ return (spa->spa_async_tasks);
+}
+
/*
* ==========================================================================
* SPA syncing routines
diff --git a/usr/src/uts/common/fs/zfs/sys/dsl_scan.h b/usr/src/uts/common/fs/zfs/sys/dsl_scan.h
index 1b600405ae..4693293290 100644
--- a/usr/src/uts/common/fs/zfs/sys/dsl_scan.h
+++ b/usr/src/uts/common/fs/zfs/sys/dsl_scan.h
@@ -21,7 +21,7 @@
/*
* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2017 by Delphix. All rights reserved.
- * Copyright (c) 2017 Datto Inc.
+ * Copyright (c) 2017, 2019, Datto Inc. All rights reserved.
*/
#ifndef _SYS_DSL_SCAN_H
@@ -164,10 +164,12 @@ void dsl_scan_fini(struct dsl_pool *dp);
void dsl_scan_sync(struct dsl_pool *, dmu_tx_t *);
int dsl_scan_cancel(struct dsl_pool *);
int dsl_scan(struct dsl_pool *, pool_scan_func_t);
+void dsl_scan_assess_vdev(struct dsl_pool *dp, vdev_t *vd);
boolean_t dsl_scan_scrubbing(const struct dsl_pool *dp);
int dsl_scrub_set_pause_resume(const struct dsl_pool *dp, pool_scrub_cmd_t cmd);
-void dsl_resilver_restart(struct dsl_pool *, uint64_t txg);
+void dsl_scan_restart_resilver(struct dsl_pool *, uint64_t txg);
boolean_t dsl_scan_resilvering(struct dsl_pool *dp);
+boolean_t dsl_scan_resilver_scheduled(struct dsl_pool *dp);
boolean_t dsl_dataset_unstable(struct dsl_dataset *ds);
void dsl_scan_ddt_entry(dsl_scan_t *scn, enum zio_checksum checksum,
ddt_entry_t *dde, dmu_tx_t *tx);
diff --git a/usr/src/uts/common/fs/zfs/sys/spa.h b/usr/src/uts/common/fs/zfs/sys/spa.h
index 31faac4f77..33cdfbeb4b 100644
--- a/usr/src/uts/common/fs/zfs/sys/spa.h
+++ b/usr/src/uts/common/fs/zfs/sys/spa.h
@@ -26,7 +26,7 @@
* Copyright 2013 Saso Kiselkov. All rights reserved.
* Copyright (c) 2014 Integros [integros.com]
* Copyright 2019 Joyent, Inc.
- * Copyright (c) 2017 Datto Inc.
+ * Copyright (c) 2017, 2019, Datto Inc. All rights reserved.
* Copyright (c) 2017, Intel Corporation.
* Copyright 2020 Joshua M. Clulow <josh@sysmgr.org>
*/
@@ -775,6 +775,7 @@ extern void spa_async_request(spa_t *spa, int flag);
extern void spa_async_unrequest(spa_t *spa, int flag);
extern void spa_async_suspend(spa_t *spa);
extern void spa_async_resume(spa_t *spa);
+extern int spa_async_tasks(spa_t *spa);
extern spa_t *spa_inject_addref(char *pool);
extern void spa_inject_delref(spa_t *spa);
extern void spa_scan_stat_init(spa_t *spa);
diff --git a/usr/src/uts/common/fs/zfs/sys/vdev.h b/usr/src/uts/common/fs/zfs/sys/vdev.h
index a6de7e6f2c..b8c2ee5c9e 100644
--- a/usr/src/uts/common/fs/zfs/sys/vdev.h
+++ b/usr/src/uts/common/fs/zfs/sys/vdev.h
@@ -23,6 +23,7 @@
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011, 2017 by Delphix. All rights reserved.
* Copyright (c) 2017, Intel Corporation.
+ * Copyright (c) 2019, Datto Inc. All rights reserved.
*/
#ifndef _SYS_VDEV_H
@@ -153,6 +154,8 @@ extern void vdev_state_dirty(vdev_t *vd);
extern void vdev_state_clean(vdev_t *vd);
extern void vdev_set_deferred_resilver(spa_t *spa, vdev_t *vd);
+extern void vdev_defer_resilver(vdev_t *vd);
+extern boolean_t vdev_clear_resilver_deferred(vdev_t *vd, dmu_tx_t *tx);
typedef enum vdev_config_flag {
VDEV_CONFIG_SPARE = 1 << 0,
diff --git a/usr/src/uts/common/fs/zfs/vdev.c b/usr/src/uts/common/fs/zfs/vdev.c
index 01e892f4c4..f824490255 100644
--- a/usr/src/uts/common/fs/zfs/vdev.c
+++ b/usr/src/uts/common/fs/zfs/vdev.c
@@ -27,6 +27,7 @@
* Copyright 2016 Toomas Soome <tsoome@me.com>
* Copyright 2019 Joyent, Inc.
* Copyright (c) 2017, Intel Corporation.
+ * Copyright (c) 2019, Datto Inc. All rights reserved.
*/
#include <sys/zfs_context.h>
@@ -772,7 +773,7 @@ vdev_alloc(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, uint_t id,
&vd->vdev_resilver_txg);
if (nvlist_exists(nv, ZPOOL_CONFIG_RESILVER_DEFER))
- vdev_set_deferred_resilver(spa, vd);
+ vdev_defer_resilver(vd);
/*
* When importing a pool, we want to ignore the persistent fault
@@ -1764,18 +1765,12 @@ vdev_open(vdev_t *vd)
}
/*
- * If a leaf vdev has a DTL, and seems healthy, then kick off a
- * resilver. But don't do this if we are doing a reopen for a scrub,
- * since this would just restart the scrub we are already doing.
+ * If this is a leaf vdev, assess whether a resilver is needed.
+ * But don't do this if we are doing a reopen for a scrub, since
+ * this would just restart the scrub we are already doing.
*/
- if (vd->vdev_ops->vdev_op_leaf && !spa->spa_scrub_reopen &&
- vdev_resilver_needed(vd, NULL, NULL)) {
- if (dsl_scan_resilvering(spa->spa_dsl_pool) &&
- spa_feature_is_enabled(spa, SPA_FEATURE_RESILVER_DEFER))
- vdev_set_deferred_resilver(spa, vd);
- else
- spa_async_request(spa, SPA_ASYNC_RESILVER);
- }
+ if (vd->vdev_ops->vdev_op_leaf && !spa->spa_scrub_reopen)
+ dsl_scan_assess_vdev(spa->spa_dsl_pool, vd);
return (0);
}
@@ -3543,14 +3538,11 @@ vdev_clear(spa_t *spa, vdev_t *vd)
if (vd != rvd && vdev_writeable(vd->vdev_top))
vdev_state_dirty(vd->vdev_top);
- if (vd->vdev_aux == NULL && !vdev_is_dead(vd)) {
- if (dsl_scan_resilvering(spa->spa_dsl_pool) &&
- spa_feature_is_enabled(spa,
- SPA_FEATURE_RESILVER_DEFER))
- vdev_set_deferred_resilver(spa, vd);
- else
- spa_async_request(spa, SPA_ASYNC_RESILVER);
- }
+ /* If a resilver isn't required, check if vdevs can be culled */
+ if (vd->vdev_aux == NULL && !vdev_is_dead(vd) &&
+ !dsl_scan_resilvering(spa->spa_dsl_pool) &&
+ !dsl_scan_resilver_scheduled(spa->spa_dsl_pool))
+ spa_async_request(spa, SPA_ASYNC_RESILVER_DONE);
spa_event_notify(spa, vd, NULL, ESC_ZFS_VDEV_CLEAR);
}
@@ -4559,18 +4551,46 @@ vdev_deadman(vdev_t *vd)
}
void
-vdev_set_deferred_resilver(spa_t *spa, vdev_t *vd)
+vdev_defer_resilver(vdev_t *vd)
{
- for (uint64_t i = 0; i < vd->vdev_children; i++)
- vdev_set_deferred_resilver(spa, vd->vdev_child[i]);
+ ASSERT(vd->vdev_ops->vdev_op_leaf);
- if (!vd->vdev_ops->vdev_op_leaf || !vdev_writeable(vd) ||
- range_tree_is_empty(vd->vdev_dtl[DTL_MISSING])) {
- return;
+ vd->vdev_resilver_deferred = B_TRUE;
+ vd->vdev_spa->spa_resilver_deferred = B_TRUE;
+}
+
+/*
+ * Clears the resilver deferred flag on all leaf devs under vd. Returns
+ * B_TRUE if we have devices that need to be resilvered and are available to
+ * accept resilver I/Os.
+ */
+boolean_t
+vdev_clear_resilver_deferred(vdev_t *vd, dmu_tx_t *tx)
+{
+ boolean_t resilver_needed = B_FALSE;
+ spa_t *spa = vd->vdev_spa;
+
+ for (int c = 0; c < vd->vdev_children; c++) {
+ vdev_t *cvd = vd->vdev_child[c];
+ resilver_needed |= vdev_clear_resilver_deferred(cvd, tx);
}
- vd->vdev_resilver_deferred = B_TRUE;
- spa->spa_resilver_deferred = B_TRUE;
+ if (vd == spa->spa_root_vdev &&
+ spa_feature_is_active(spa, SPA_FEATURE_RESILVER_DEFER)) {
+ spa_feature_decr(spa, SPA_FEATURE_RESILVER_DEFER, tx);
+ vdev_config_dirty(vd);
+ spa->spa_resilver_deferred = B_FALSE;
+ return (resilver_needed);
+ }
+
+ if (!vdev_is_concrete(vd) || vd->vdev_aux ||
+ !vd->vdev_ops->vdev_op_leaf)
+ return (resilver_needed);
+
+ vd->vdev_resilver_deferred = B_FALSE;
+
+ return (!vdev_is_dead(vd) && !vd->vdev_offline &&
+ vdev_resilver_needed(vd, NULL, NULL));
}
/*