summaryrefslogtreecommitdiff
path: root/usr/src/uts/common
diff options
context:
space:
mode:
authorAlek Pinchuk <apinchuk@datto.com>2017-07-11 15:17:02 -0400
committerPrakash Surya <prakash.surya@delphix.com>2017-08-21 12:29:56 -0700
commit1702cce751c5cb7ead878d0205a6c90b027e3de8 (patch)
tree4128f9cf802c8cdb929eb5e0f9468a60aae00609 /usr/src/uts/common
parent8f9a8cb713c7aacb6b1ec423716315b430386f88 (diff)
downloadillumos-joyent-1702cce751c5cb7ead878d0205a6c90b027e3de8.tar.gz
8414 Implemented zpool scrub pause/resume
Reviewed by: George Melikov <mail@gmelikov.ru> Reviewed by: Brian Behlendorf <behlendorf1@llnl.gov> Reviewed by: Brad Lewis <brad.lewis@delphix.com> Reviewed by: Serapheim Dimitropoulos <serapheim@delphix.com> Reviewed by: Matt Ahrens <mahrens@delphix.com> Approved by: Dan McDonald <danmcd@joyent.com>
Diffstat (limited to 'usr/src/uts/common')
-rw-r--r--usr/src/uts/common/fs/zfs/bpobj.c4
-rw-r--r--usr/src/uts/common/fs/zfs/dsl_scan.c172
-rw-r--r--usr/src/uts/common/fs/zfs/spa.c11
-rw-r--r--usr/src/uts/common/fs/zfs/spa_misc.c8
-rw-r--r--usr/src/uts/common/fs/zfs/sys/dsl_scan.h13
-rw-r--r--usr/src/uts/common/fs/zfs/sys/spa.h2
-rw-r--r--usr/src/uts/common/fs/zfs/sys/spa_impl.h3
-rw-r--r--usr/src/uts/common/fs/zfs/zfs_ioctl.c9
-rw-r--r--usr/src/uts/common/sys/fs/zfs.h14
9 files changed, 197 insertions, 39 deletions
diff --git a/usr/src/uts/common/fs/zfs/bpobj.c b/usr/src/uts/common/fs/zfs/bpobj.c
index 19e97cbabf..0bcfc00313 100644
--- a/usr/src/uts/common/fs/zfs/bpobj.c
+++ b/usr/src/uts/common/fs/zfs/bpobj.c
@@ -22,6 +22,7 @@
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011, 2016 by Delphix. All rights reserved.
* Copyright (c) 2014 Integros [integros.com]
+ * Copyright (c) 2017 Datto Inc.
*/
#include <sys/bpobj.h>
@@ -212,6 +213,9 @@ bpobj_iterate_impl(bpobj_t *bpo, bpobj_itor_t func, void *arg, dmu_tx_t *tx,
mutex_enter(&bpo->bpo_lock);
+ if (!bpobj_hasentries(bpo))
+ goto out;
+
if (free)
dmu_buf_will_dirty(bpo->bpo_dbuf, tx);
diff --git a/usr/src/uts/common/fs/zfs/dsl_scan.c b/usr/src/uts/common/fs/zfs/dsl_scan.c
index ccd9eb4c39..67ee4d48cd 100644
--- a/usr/src/uts/common/fs/zfs/dsl_scan.c
+++ b/usr/src/uts/common/fs/zfs/dsl_scan.c
@@ -23,6 +23,7 @@
* Copyright 2016 Gary Mills
* Copyright (c) 2011, 2016 by Delphix. All rights reserved.
* Copyright 2017 Joyent, Inc.
+ * Copyright (c) 2017 Datto Inc.
*/
#include <sys/dsl_scan.h>
@@ -285,6 +286,8 @@ dsl_scan_done(dsl_scan_t *scn, boolean_t complete, dmu_tx_t *tx)
scn->scn_phys.scn_queue_obj = 0;
}
+ scn->scn_phys.scn_flags &= ~DSF_SCRUB_PAUSED;
+
/*
* If we were "restarted" from a stopped state, don't bother
* with anything else.
@@ -369,6 +372,91 @@ dsl_scan_cancel(dsl_pool_t *dp)
dsl_scan_cancel_sync, NULL, 3, ZFS_SPACE_CHECK_RESERVED));
}
+boolean_t
+dsl_scan_is_paused_scrub(const dsl_scan_t *scn)
+{
+ if (dsl_scan_scrubbing(scn->scn_dp) &&
+ scn->scn_phys.scn_flags & DSF_SCRUB_PAUSED)
+ return (B_TRUE);
+
+ return (B_FALSE);
+}
+
+static int
+dsl_scrub_pause_resume_check(void *arg, dmu_tx_t *tx)
+{
+ pool_scrub_cmd_t *cmd = arg;
+ dsl_pool_t *dp = dmu_tx_pool(tx);
+ dsl_scan_t *scn = dp->dp_scan;
+
+ if (*cmd == POOL_SCRUB_PAUSE) {
+ /* can't pause a scrub when there is no in-progress scrub */
+ if (!dsl_scan_scrubbing(dp))
+ return (SET_ERROR(ENOENT));
+
+ /* can't pause a paused scrub */
+ if (dsl_scan_is_paused_scrub(scn))
+ return (SET_ERROR(EBUSY));
+ } else if (*cmd != POOL_SCRUB_NORMAL) {
+ return (SET_ERROR(ENOTSUP));
+ }
+
+ return (0);
+}
+
+static void
+dsl_scrub_pause_resume_sync(void *arg, dmu_tx_t *tx)
+{
+ pool_scrub_cmd_t *cmd = arg;
+ dsl_pool_t *dp = dmu_tx_pool(tx);
+ spa_t *spa = dp->dp_spa;
+ dsl_scan_t *scn = dp->dp_scan;
+
+ if (*cmd == POOL_SCRUB_PAUSE) {
+ /* can't pause a scrub when there is no in-progress scrub */
+ spa->spa_scan_pass_scrub_pause = gethrestime_sec();
+ scn->scn_phys.scn_flags |= DSF_SCRUB_PAUSED;
+ dsl_scan_sync_state(scn, tx);
+ } else {
+ ASSERT3U(*cmd, ==, POOL_SCRUB_NORMAL);
+ if (dsl_scan_is_paused_scrub(scn)) {
+ /*
+ * We need to keep track of how much time we spend
+ * paused per pass so that we can adjust the scrub rate
+ * shown in the output of 'zpool status'
+ */
+ spa->spa_scan_pass_scrub_spent_paused +=
+ gethrestime_sec() - spa->spa_scan_pass_scrub_pause;
+ spa->spa_scan_pass_scrub_pause = 0;
+ scn->scn_phys.scn_flags &= ~DSF_SCRUB_PAUSED;
+ dsl_scan_sync_state(scn, tx);
+ }
+ }
+}
+
+/*
+ * Set scrub pause/resume state if it makes sense to do so
+ */
+int
+dsl_scrub_set_pause_resume(const dsl_pool_t *dp, pool_scrub_cmd_t cmd)
+{
+ return (dsl_sync_task(spa_name(dp->dp_spa),
+ dsl_scrub_pause_resume_check, dsl_scrub_pause_resume_sync, &cmd, 3,
+ ZFS_SPACE_CHECK_RESERVED));
+}
+
+boolean_t
+dsl_scan_scrubbing(const dsl_pool_t *dp)
+{
+ dsl_scan_t *scn = dp->dp_scan;
+
+ if (scn->scn_phys.scn_state == DSS_SCANNING &&
+ scn->scn_phys.scn_func == POOL_SCAN_SCRUB)
+ return (B_TRUE);
+
+ return (B_FALSE);
+}
+
static void dsl_scan_visitbp(blkptr_t *bp, const zbookmark_phys_t *zb,
dnode_phys_t *dnp, dsl_dataset_t *ds, dsl_scan_t *scn,
dmu_objset_type_t ostype, dmu_tx_t *tx);
@@ -410,14 +498,14 @@ dsl_scan_sync_state(dsl_scan_t *scn, dmu_tx_t *tx)
extern int zfs_vdev_async_write_active_min_dirty_percent;
static boolean_t
-dsl_scan_check_pause(dsl_scan_t *scn, const zbookmark_phys_t *zb)
+dsl_scan_check_suspend(dsl_scan_t *scn, const zbookmark_phys_t *zb)
{
/* we never skip user/group accounting objects */
if (zb && (int64_t)zb->zb_object < 0)
return (B_FALSE);
- if (scn->scn_pausing)
- return (B_TRUE); /* we're already pausing */
+ if (scn->scn_suspending)
+ return (B_TRUE); /* we're already suspending */
if (!ZB_IS_ZERO(&scn->scn_phys.scn_bookmark))
return (B_FALSE); /* we're resuming */
@@ -427,7 +515,7 @@ dsl_scan_check_pause(dsl_scan_t *scn, const zbookmark_phys_t *zb)
return (B_FALSE);
/*
- * We pause if:
+ * We suspend if:
* - we have scanned for the maximum time: an entire txg
* timeout (default 5 sec)
* or
@@ -450,19 +538,19 @@ dsl_scan_check_pause(dsl_scan_t *scn, const zbookmark_phys_t *zb)
dirty_pct >= zfs_vdev_async_write_active_min_dirty_percent)) ||
spa_shutting_down(scn->scn_dp->dp_spa)) {
if (zb) {
- dprintf("pausing at bookmark %llx/%llx/%llx/%llx\n",
+ dprintf("suspending at bookmark %llx/%llx/%llx/%llx\n",
(longlong_t)zb->zb_objset,
(longlong_t)zb->zb_object,
(longlong_t)zb->zb_level,
(longlong_t)zb->zb_blkid);
scn->scn_phys.scn_bookmark = *zb;
}
- dprintf("pausing at DDT bookmark %llx/%llx/%llx/%llx\n",
+ dprintf("suspending at DDT bookmark %llx/%llx/%llx/%llx\n",
(longlong_t)scn->scn_phys.scn_ddt_bookmark.ddb_class,
(longlong_t)scn->scn_phys.scn_ddt_bookmark.ddb_type,
(longlong_t)scn->scn_phys.scn_ddt_bookmark.ddb_checksum,
(longlong_t)scn->scn_phys.scn_ddt_bookmark.ddb_cursor);
- scn->scn_pausing = B_TRUE;
+ scn->scn_suspending = B_TRUE;
return (B_TRUE);
}
return (B_FALSE);
@@ -600,7 +688,7 @@ dsl_scan_check_resume(dsl_scan_t *scn, const dnode_phys_t *dnp,
/*
* If we found the block we're trying to resume from, or
* we went past it to a different object, zero it out to
- * indicate that it's OK to start checking for pausing
+ * indicate that it's OK to start checking for suspending
* again.
*/
if (bcmp(zb, &scn->scn_phys.scn_bookmark, sizeof (*zb)) == 0 ||
@@ -703,7 +791,7 @@ dsl_scan_recurse(dsl_scan_t *scn, dsl_dataset_t *ds, dmu_objset_type_t ostype,
/*
* We also always visit user/group accounting
* objects, and never skip them, even if we are
- * pausing. This is necessary so that the space
+ * suspending. This is necessary so that the space
* deltas from this txg get integrated.
*/
dsl_scan_visitdnode(scn, ds, osp->os_type,
@@ -759,7 +847,7 @@ dsl_scan_visitbp(blkptr_t *bp, const zbookmark_phys_t *zb,
/* ASSERT(pbuf == NULL || arc_released(pbuf)); */
- if (dsl_scan_check_pause(scn, zb))
+ if (dsl_scan_check_suspend(scn, zb))
return;
if (dsl_scan_check_resume(scn, dnp, zb))
@@ -1096,14 +1184,14 @@ dsl_scan_visitds(dsl_scan_t *scn, uint64_t dsobj, dmu_tx_t *tx)
char *dsname = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP);
dsl_dataset_name(ds, dsname);
zfs_dbgmsg("scanned dataset %llu (%s) with min=%llu max=%llu; "
- "pausing=%u",
+ "suspending=%u",
(longlong_t)dsobj, dsname,
(longlong_t)scn->scn_phys.scn_cur_min_txg,
(longlong_t)scn->scn_phys.scn_cur_max_txg,
- (int)scn->scn_pausing);
+ (int)scn->scn_suspending);
kmem_free(dsname, ZFS_MAX_DATASET_NAME_LEN);
- if (scn->scn_pausing)
+ if (scn->scn_suspending)
goto out;
/*
@@ -1267,13 +1355,13 @@ dsl_scan_ddt(dsl_scan_t *scn, dmu_tx_t *tx)
dsl_scan_ddt_entry(scn, ddb->ddb_checksum, &dde, tx);
n++;
- if (dsl_scan_check_pause(scn, NULL))
+ if (dsl_scan_check_suspend(scn, NULL))
break;
}
- zfs_dbgmsg("scanned %llu ddt entries with class_max = %u; pausing=%u",
- (longlong_t)n, (int)scn->scn_phys.scn_ddt_class_max,
- (int)scn->scn_pausing);
+ zfs_dbgmsg("scanned %llu ddt entries with class_max = %u; "
+ "suspending=%u", (longlong_t)n,
+ (int)scn->scn_phys.scn_ddt_class_max, (int)scn->scn_suspending);
ASSERT(error == 0 || error == ENOENT);
ASSERT(error != ENOENT ||
@@ -1316,7 +1404,7 @@ dsl_scan_visit(dsl_scan_t *scn, dmu_tx_t *tx)
scn->scn_phys.scn_cur_min_txg = scn->scn_phys.scn_min_txg;
scn->scn_phys.scn_cur_max_txg = scn->scn_phys.scn_max_txg;
dsl_scan_ddt(scn, tx);
- if (scn->scn_pausing)
+ if (scn->scn_suspending)
return;
}
@@ -1328,7 +1416,7 @@ dsl_scan_visit(dsl_scan_t *scn, dmu_tx_t *tx)
dsl_scan_visit_rootbp(scn, NULL,
&dp->dp_meta_rootbp, tx);
spa_set_rootblkptr(dp->dp_spa, &dp->dp_meta_rootbp);
- if (scn->scn_pausing)
+ if (scn->scn_suspending)
return;
if (spa_version(dp->dp_spa) < SPA_VERSION_DSL_SCRUB) {
@@ -1338,22 +1426,22 @@ dsl_scan_visit(dsl_scan_t *scn, dmu_tx_t *tx)
dsl_scan_visitds(scn,
dp->dp_origin_snap->ds_object, tx);
}
- ASSERT(!scn->scn_pausing);
+ ASSERT(!scn->scn_suspending);
} else if (scn->scn_phys.scn_bookmark.zb_objset !=
ZB_DESTROYED_OBJSET) {
/*
- * If we were paused, continue from here. Note if the
- * ds we were paused on was deleted, the zb_objset may
+ * If we were suspended, continue from here. Note if the
+ * ds we were suspended on was deleted, the zb_objset may
* be -1, so we will skip this and find a new objset
* below.
*/
dsl_scan_visitds(scn, scn->scn_phys.scn_bookmark.zb_objset, tx);
- if (scn->scn_pausing)
+ if (scn->scn_suspending)
return;
}
/*
- * In case we were paused right at the end of the ds, zero the
+ * In case we were suspended right at the end of the ds, zero the
* bookmark so we don't think that we're still trying to resume.
*/
bzero(&scn->scn_phys.scn_bookmark, sizeof (zbookmark_phys_t));
@@ -1385,14 +1473,14 @@ dsl_scan_visit(dsl_scan_t *scn, dmu_tx_t *tx)
dsl_scan_visitds(scn, dsobj, tx);
zap_cursor_fini(&zc);
- if (scn->scn_pausing)
+ if (scn->scn_suspending)
return;
}
zap_cursor_fini(&zc);
}
static boolean_t
-dsl_scan_free_should_pause(dsl_scan_t *scn)
+dsl_scan_free_should_suspend(dsl_scan_t *scn)
{
uint64_t elapsed_nanosecs;
@@ -1416,7 +1504,7 @@ dsl_scan_free_block_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
if (!scn->scn_is_bptree ||
(BP_GET_LEVEL(bp) == 0 && BP_GET_TYPE(bp) != DMU_OT_OBJSET)) {
- if (dsl_scan_free_should_pause(scn))
+ if (dsl_scan_free_should_suspend(scn))
return (SET_ERROR(ERESTART));
}
@@ -1439,7 +1527,8 @@ dsl_scan_active(dsl_scan_t *scn)
return (B_FALSE);
if (spa_shutting_down(spa))
return (B_FALSE);
- if (scn->scn_phys.scn_state == DSS_SCANNING ||
+ if ((scn->scn_phys.scn_state == DSS_SCANNING &&
+ !dsl_scan_is_paused_scrub(scn)) ||
(scn->scn_async_destroying && !scn->scn_async_stalled))
return (B_TRUE);
@@ -1494,12 +1583,12 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx)
return;
scn->scn_visited_this_txg = 0;
- scn->scn_pausing = B_FALSE;
+ scn->scn_suspending = B_FALSE;
scn->scn_sync_start_time = gethrtime();
spa->spa_scrub_active = B_TRUE;
/*
- * First process the async destroys. If we pause, don't do
+ * First process the async destroys. If we suspend, don't do
* any scrubbing or resilvering. This ensures that there are no
* async destroys while we are scanning, so the scan code doesn't
* have to worry about traversing it. It is also faster to free the
@@ -1616,7 +1705,7 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx)
return;
if (scn->scn_done_txg == tx->tx_txg) {
- ASSERT(!scn->scn_pausing);
+ ASSERT(!scn->scn_suspending);
/* finished with scan. */
zfs_dbgmsg("txg %llu scan complete", tx->tx_txg);
dsl_scan_done(scn, B_TRUE, tx);
@@ -1625,6 +1714,9 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx)
return;
}
+ if (dsl_scan_is_paused_scrub(scn))
+ return;
+
if (scn->scn_phys.scn_ddt_bookmark.ddb_class <=
scn->scn_phys.scn_ddt_class_max) {
zfs_dbgmsg("doing scan sync txg %llu; "
@@ -1659,7 +1751,7 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx)
(longlong_t)scn->scn_visited_this_txg,
(longlong_t)NSEC2MSEC(gethrtime() - scn->scn_sync_start_time));
- if (!scn->scn_pausing) {
+ if (!scn->scn_suspending) {
scn->scn_done_txg = tx->tx_txg + 1;
zfs_dbgmsg("txg %llu traversal complete, waiting till txg %llu",
tx->tx_txg, scn->scn_done_txg);
@@ -1867,11 +1959,15 @@ dsl_scan_scrub_cb(dsl_pool_t *dp,
return (0);
}
-/* Called by the ZFS_IOC_POOL_SCAN ioctl to start a scrub or resilver */
+/*
+ * Called by the ZFS_IOC_POOL_SCAN ioctl to start a scrub or resilver.
+ * Can also be called to resume a paused scrub.
+ */
int
dsl_scan(dsl_pool_t *dp, pool_scan_func_t func)
{
spa_t *spa = dp->dp_spa;
+ dsl_scan_t *scn = dp->dp_scan;
/*
* Purge all vdev caches and probe all devices. We do this here
@@ -1886,6 +1982,16 @@ dsl_scan(dsl_pool_t *dp, pool_scan_func_t func)
spa->spa_scrub_reopen = B_FALSE;
(void) spa_vdev_state_exit(spa, NULL, 0);
+ if (func == POOL_SCAN_SCRUB && dsl_scan_is_paused_scrub(scn)) {
+ /* got scrub start cmd, resume paused scrub */
+ int err = dsl_scrub_set_pause_resume(scn->scn_dp,
+ POOL_SCRUB_NORMAL);
+ if (err == 0)
+ return (ECANCELED);
+
+ return (SET_ERROR(err));
+ }
+
return (dsl_sync_task(spa_name(spa), dsl_scan_setup_check,
dsl_scan_setup_sync, &func, 0, ZFS_SPACE_CHECK_NONE));
}
diff --git a/usr/src/uts/common/fs/zfs/spa.c b/usr/src/uts/common/fs/zfs/spa.c
index 4fa6a6c79a..576ef1525c 100644
--- a/usr/src/uts/common/fs/zfs/spa.c
+++ b/usr/src/uts/common/fs/zfs/spa.c
@@ -28,6 +28,7 @@
* Copyright (c) 2014 Integros [integros.com]
* Copyright 2016 Toomas Soome <tsoome@me.com>
* Copyright 2017 Joyent, Inc.
+ * Copyright (c) 2017 Datto Inc.
*/
/*
@@ -5748,6 +5749,16 @@ spa_vdev_setfru(spa_t *spa, uint64_t guid, const char *newfru)
* SPA Scanning
* ==========================================================================
*/
+int
+spa_scrub_pause_resume(spa_t *spa, pool_scrub_cmd_t cmd)
+{
+ ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == 0);
+
+ if (dsl_scan_resilvering(spa->spa_dsl_pool))
+ return (SET_ERROR(EBUSY));
+
+ return (dsl_scrub_set_pause_resume(spa->spa_dsl_pool, cmd));
+}
int
spa_scan_stop(spa_t *spa)
diff --git a/usr/src/uts/common/fs/zfs/spa_misc.c b/usr/src/uts/common/fs/zfs/spa_misc.c
index 6555d4eee5..22d69b185b 100644
--- a/usr/src/uts/common/fs/zfs/spa_misc.c
+++ b/usr/src/uts/common/fs/zfs/spa_misc.c
@@ -25,6 +25,7 @@
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
* Copyright 2013 Saso Kiselkov. All rights reserved.
* Copyright (c) 2014 Integros [integros.com]
+ * Copyright (c) 2017 Datto Inc.
*/
#include <sys/zfs_context.h>
@@ -2008,6 +2009,11 @@ spa_scan_stat_init(spa_t *spa)
{
/* data not stored on disk */
spa->spa_scan_pass_start = gethrestime_sec();
+ if (dsl_scan_is_paused_scrub(spa->spa_dsl_pool->dp_scan))
+ spa->spa_scan_pass_scrub_pause = spa->spa_scan_pass_start;
+ else
+ spa->spa_scan_pass_scrub_pause = 0;
+ spa->spa_scan_pass_scrub_spent_paused = 0;
spa->spa_scan_pass_exam = 0;
vdev_scan_stat_init(spa->spa_root_vdev);
}
@@ -2038,6 +2044,8 @@ spa_scan_get_stats(spa_t *spa, pool_scan_stat_t *ps)
/* data not stored on disk */
ps->pss_pass_start = spa->spa_scan_pass_start;
ps->pss_pass_exam = spa->spa_scan_pass_exam;
+ ps->pss_pass_scrub_pause = spa->spa_scan_pass_scrub_pause;
+ ps->pss_pass_scrub_spent_paused = spa->spa_scan_pass_scrub_spent_paused;
return (0);
}
diff --git a/usr/src/uts/common/fs/zfs/sys/dsl_scan.h b/usr/src/uts/common/fs/zfs/sys/dsl_scan.h
index ee8512c07d..fd950cc014 100644
--- a/usr/src/uts/common/fs/zfs/sys/dsl_scan.h
+++ b/usr/src/uts/common/fs/zfs/sys/dsl_scan.h
@@ -21,6 +21,7 @@
/*
* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2014 by Delphix. All rights reserved.
+ * Copyright (c) 2017 Datto Inc.
*/
#ifndef _SYS_DSL_SCAN_H
@@ -70,6 +71,7 @@ typedef struct dsl_scan_phys {
typedef enum dsl_scan_flags {
DSF_VISIT_DS_AGAIN = 1<<0,
+ DSF_SCRUB_PAUSED = 1<<1,
} dsl_scan_flags_t;
/*
@@ -82,8 +84,8 @@ typedef enum dsl_scan_flags {
*
* The following members of this structure direct the behavior of the scan:
*
- * scn_pausing - a scan that cannot be completed in a single txg or
- * has exceeded its allotted time will need to pause.
+ * scn_suspending - a scan that cannot be completed in a single txg or
+ * has exceeded its allotted time will need to suspend.
* When this flag is set the scanner will stop traversing
* the pool and write out the current state to disk.
*
@@ -105,7 +107,7 @@ typedef enum dsl_scan_flags {
typedef struct dsl_scan {
struct dsl_pool *scn_dp;
- boolean_t scn_pausing;
+ boolean_t scn_suspending;
uint64_t scn_restart_txg;
uint64_t scn_done_txg;
uint64_t scn_sync_start_time;
@@ -115,8 +117,6 @@ typedef struct dsl_scan {
boolean_t scn_is_bptree;
boolean_t scn_async_destroying;
boolean_t scn_async_stalled;
-
- /* for debugging / information */
uint64_t scn_visited_this_txg;
dsl_scan_phys_t scn_phys;
@@ -127,6 +127,8 @@ void dsl_scan_fini(struct dsl_pool *dp);
void dsl_scan_sync(struct dsl_pool *, dmu_tx_t *);
int dsl_scan_cancel(struct dsl_pool *);
int dsl_scan(struct dsl_pool *, pool_scan_func_t);
+boolean_t dsl_scan_scrubbing(const struct dsl_pool *dp);
+int dsl_scrub_set_pause_resume(const struct dsl_pool *dp, pool_scrub_cmd_t cmd);
void dsl_resilver_restart(struct dsl_pool *, uint64_t txg);
boolean_t dsl_scan_resilvering(struct dsl_pool *dp);
boolean_t dsl_dataset_unstable(struct dsl_dataset *ds);
@@ -137,6 +139,7 @@ void dsl_scan_ds_snapshotted(struct dsl_dataset *ds, struct dmu_tx *tx);
void dsl_scan_ds_clone_swapped(struct dsl_dataset *ds1, struct dsl_dataset *ds2,
struct dmu_tx *tx);
boolean_t dsl_scan_active(dsl_scan_t *scn);
+boolean_t dsl_scan_is_paused_scrub(const dsl_scan_t *scn);
#ifdef __cplusplus
}
diff --git a/usr/src/uts/common/fs/zfs/sys/spa.h b/usr/src/uts/common/fs/zfs/sys/spa.h
index 0243c3effd..1d267c971e 100644
--- a/usr/src/uts/common/fs/zfs/sys/spa.h
+++ b/usr/src/uts/common/fs/zfs/sys/spa.h
@@ -26,6 +26,7 @@
* Copyright 2013 Saso Kiselkov. All rights reserved.
* Copyright (c) 2014 Integros [integros.com]
* Copyright 2017 Joyent, Inc.
+ * Copyright (c) 2017 Datto Inc.
*/
#ifndef _SYS_SPA_H
@@ -676,6 +677,7 @@ extern void spa_l2cache_drop(spa_t *spa);
/* scanning */
extern int spa_scan(spa_t *spa, pool_scan_func_t func);
extern int spa_scan_stop(spa_t *spa);
+extern int spa_scrub_pause_resume(spa_t *spa, pool_scrub_cmd_t flag);
/* spa syncing */
extern void spa_sync(spa_t *spa, uint64_t txg); /* only for DMU use */
diff --git a/usr/src/uts/common/fs/zfs/sys/spa_impl.h b/usr/src/uts/common/fs/zfs/sys/spa_impl.h
index 73d2df0168..bca9131261 100644
--- a/usr/src/uts/common/fs/zfs/sys/spa_impl.h
+++ b/usr/src/uts/common/fs/zfs/sys/spa_impl.h
@@ -24,6 +24,7 @@
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
* Copyright 2013 Saso Kiselkov. All rights reserved.
+ * Copyright (c) 2017 Datto Inc.
*/
#ifndef _SYS_SPA_IMPL_H
@@ -192,6 +193,8 @@ struct spa {
uint8_t spa_scrub_started; /* started since last boot */
uint8_t spa_scrub_reopen; /* scrub doing vdev_reopen */
uint64_t spa_scan_pass_start; /* start time per pass/reboot */
+ uint64_t spa_scan_pass_scrub_pause; /* scrub pause time */
+ uint64_t spa_scan_pass_scrub_spent_paused; /* total paused */
uint64_t spa_scan_pass_exam; /* examined bytes per pass */
kmutex_t spa_async_lock; /* protect async state */
kthread_t *spa_async_thread; /* thread doing async task */
diff --git a/usr/src/uts/common/fs/zfs/zfs_ioctl.c b/usr/src/uts/common/fs/zfs/zfs_ioctl.c
index 5ffda31f24..418c736221 100644
--- a/usr/src/uts/common/fs/zfs/zfs_ioctl.c
+++ b/usr/src/uts/common/fs/zfs/zfs_ioctl.c
@@ -32,6 +32,7 @@
* Copyright (c) 2014 Integros [integros.com]
* Copyright 2016 Toomas Soome <tsoome@me.com>
* Copyright 2017 RackTop Systems.
+ * Copyright (c) 2017 Datto Inc.
*/
/*
@@ -1683,6 +1684,7 @@ zfs_ioc_pool_tryimport(zfs_cmd_t *zc)
* inputs:
* zc_name name of the pool
* zc_cookie scan func (pool_scan_func_t)
+ * zc_flags scrub pause/resume flag (pool_scrub_cmd_t)
*/
static int
zfs_ioc_pool_scan(zfs_cmd_t *zc)
@@ -1693,7 +1695,12 @@ zfs_ioc_pool_scan(zfs_cmd_t *zc)
if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
return (error);
- if (zc->zc_cookie == POOL_SCAN_NONE)
+ if (zc->zc_flags >= POOL_SCRUB_FLAGS_END)
+ return (SET_ERROR(EINVAL));
+
+ if (zc->zc_flags == POOL_SCRUB_PAUSE)
+ error = spa_scrub_pause_resume(spa, POOL_SCRUB_PAUSE);
+ else if (zc->zc_cookie == POOL_SCAN_NONE)
error = spa_scan_stop(spa);
else
error = spa_scan(spa, zc->zc_cookie);
diff --git a/usr/src/uts/common/sys/fs/zfs.h b/usr/src/uts/common/sys/fs/zfs.h
index 389e9b59fa..c12cb65084 100644
--- a/usr/src/uts/common/sys/fs/zfs.h
+++ b/usr/src/uts/common/sys/fs/zfs.h
@@ -25,6 +25,7 @@
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2014 Integros [integros.com]
* Copyright 2017 Joyent, Inc.
+ * Copyright (c) 2017 Datto Inc.
*/
/* Portions Copyright 2010 Robert Milkowski */
@@ -694,6 +695,16 @@ typedef enum pool_scan_func {
} pool_scan_func_t;
/*
+ * Used to control scrub pause and resume.
+ */
+typedef enum pool_scrub_cmd {
+ POOL_SCRUB_NORMAL = 0,
+ POOL_SCRUB_PAUSE,
+ POOL_SCRUB_FLAGS_END
+} pool_scrub_cmd_t;
+
+
+/*
* ZIO types. Needed to interpret vdev statistics below.
*/
typedef enum zio_type {
@@ -725,6 +736,9 @@ typedef struct pool_scan_stat {
/* values not stored on disk */
uint64_t pss_pass_exam; /* examined bytes per scan pass */
uint64_t pss_pass_start; /* start time of a scan pass */
+ uint64_t pss_pass_scrub_pause; /* pause time of a scurb pass */
+ /* cumulative time scrub spent paused, needed for rate calculation */
+ uint64_t pss_pass_scrub_spent_paused;
} pool_scan_stat_t;
typedef enum dsl_scan_state {