diff options
author | Alek Pinchuk <apinchuk@datto.com> | 2017-07-11 15:17:02 -0400 |
---|---|---|
committer | Prakash Surya <prakash.surya@delphix.com> | 2017-08-21 12:29:56 -0700 |
commit | 1702cce751c5cb7ead878d0205a6c90b027e3de8 (patch) | |
tree | 4128f9cf802c8cdb929eb5e0f9468a60aae00609 /usr/src/uts/common | |
parent | 8f9a8cb713c7aacb6b1ec423716315b430386f88 (diff) | |
download | illumos-joyent-1702cce751c5cb7ead878d0205a6c90b027e3de8.tar.gz |
8414 Implemented zpool scrub pause/resume
Reviewed by: George Melikov <mail@gmelikov.ru>
Reviewed by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed by: Brad Lewis <brad.lewis@delphix.com>
Reviewed by: Serapheim Dimitropoulos <serapheim@delphix.com>
Reviewed by: Matt Ahrens <mahrens@delphix.com>
Approved by: Dan McDonald <danmcd@joyent.com>
Diffstat (limited to 'usr/src/uts/common')
-rw-r--r-- | usr/src/uts/common/fs/zfs/bpobj.c | 4 | ||||
-rw-r--r-- | usr/src/uts/common/fs/zfs/dsl_scan.c | 172 | ||||
-rw-r--r-- | usr/src/uts/common/fs/zfs/spa.c | 11 | ||||
-rw-r--r-- | usr/src/uts/common/fs/zfs/spa_misc.c | 8 | ||||
-rw-r--r-- | usr/src/uts/common/fs/zfs/sys/dsl_scan.h | 13 | ||||
-rw-r--r-- | usr/src/uts/common/fs/zfs/sys/spa.h | 2 | ||||
-rw-r--r-- | usr/src/uts/common/fs/zfs/sys/spa_impl.h | 3 | ||||
-rw-r--r-- | usr/src/uts/common/fs/zfs/zfs_ioctl.c | 9 | ||||
-rw-r--r-- | usr/src/uts/common/sys/fs/zfs.h | 14 |
9 files changed, 197 insertions, 39 deletions
diff --git a/usr/src/uts/common/fs/zfs/bpobj.c b/usr/src/uts/common/fs/zfs/bpobj.c index 19e97cbabf..0bcfc00313 100644 --- a/usr/src/uts/common/fs/zfs/bpobj.c +++ b/usr/src/uts/common/fs/zfs/bpobj.c @@ -22,6 +22,7 @@ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2011, 2016 by Delphix. All rights reserved. * Copyright (c) 2014 Integros [integros.com] + * Copyright (c) 2017 Datto Inc. */ #include <sys/bpobj.h> @@ -212,6 +213,9 @@ bpobj_iterate_impl(bpobj_t *bpo, bpobj_itor_t func, void *arg, dmu_tx_t *tx, mutex_enter(&bpo->bpo_lock); + if (!bpobj_hasentries(bpo)) + goto out; + if (free) dmu_buf_will_dirty(bpo->bpo_dbuf, tx); diff --git a/usr/src/uts/common/fs/zfs/dsl_scan.c b/usr/src/uts/common/fs/zfs/dsl_scan.c index ccd9eb4c39..67ee4d48cd 100644 --- a/usr/src/uts/common/fs/zfs/dsl_scan.c +++ b/usr/src/uts/common/fs/zfs/dsl_scan.c @@ -23,6 +23,7 @@ * Copyright 2016 Gary Mills * Copyright (c) 2011, 2016 by Delphix. All rights reserved. * Copyright 2017 Joyent, Inc. + * Copyright (c) 2017 Datto Inc. */ #include <sys/dsl_scan.h> @@ -285,6 +286,8 @@ dsl_scan_done(dsl_scan_t *scn, boolean_t complete, dmu_tx_t *tx) scn->scn_phys.scn_queue_obj = 0; } + scn->scn_phys.scn_flags &= ~DSF_SCRUB_PAUSED; + /* * If we were "restarted" from a stopped state, don't bother * with anything else. @@ -369,6 +372,91 @@ dsl_scan_cancel(dsl_pool_t *dp) dsl_scan_cancel_sync, NULL, 3, ZFS_SPACE_CHECK_RESERVED)); } +boolean_t +dsl_scan_is_paused_scrub(const dsl_scan_t *scn) +{ + if (dsl_scan_scrubbing(scn->scn_dp) && + scn->scn_phys.scn_flags & DSF_SCRUB_PAUSED) + return (B_TRUE); + + return (B_FALSE); +} + +static int +dsl_scrub_pause_resume_check(void *arg, dmu_tx_t *tx) +{ + pool_scrub_cmd_t *cmd = arg; + dsl_pool_t *dp = dmu_tx_pool(tx); + dsl_scan_t *scn = dp->dp_scan; + + if (*cmd == POOL_SCRUB_PAUSE) { + /* can't pause a scrub when there is no in-progress scrub */ + if (!dsl_scan_scrubbing(dp)) + return (SET_ERROR(ENOENT)); + + /* can't pause a paused scrub */ + if (dsl_scan_is_paused_scrub(scn)) + return (SET_ERROR(EBUSY)); + } else if (*cmd != POOL_SCRUB_NORMAL) { + return (SET_ERROR(ENOTSUP)); + } + + return (0); +} + +static void +dsl_scrub_pause_resume_sync(void *arg, dmu_tx_t *tx) +{ + pool_scrub_cmd_t *cmd = arg; + dsl_pool_t *dp = dmu_tx_pool(tx); + spa_t *spa = dp->dp_spa; + dsl_scan_t *scn = dp->dp_scan; + + if (*cmd == POOL_SCRUB_PAUSE) { + /* can't pause a scrub when there is no in-progress scrub */ + spa->spa_scan_pass_scrub_pause = gethrestime_sec(); + scn->scn_phys.scn_flags |= DSF_SCRUB_PAUSED; + dsl_scan_sync_state(scn, tx); + } else { + ASSERT3U(*cmd, ==, POOL_SCRUB_NORMAL); + if (dsl_scan_is_paused_scrub(scn)) { + /* + * We need to keep track of how much time we spend + * paused per pass so that we can adjust the scrub rate + * shown in the output of 'zpool status' + */ + spa->spa_scan_pass_scrub_spent_paused += + gethrestime_sec() - spa->spa_scan_pass_scrub_pause; + spa->spa_scan_pass_scrub_pause = 0; + scn->scn_phys.scn_flags &= ~DSF_SCRUB_PAUSED; + dsl_scan_sync_state(scn, tx); + } + } +} + +/* + * Set scrub pause/resume state if it makes sense to do so + */ +int +dsl_scrub_set_pause_resume(const dsl_pool_t *dp, pool_scrub_cmd_t cmd) +{ + return (dsl_sync_task(spa_name(dp->dp_spa), + dsl_scrub_pause_resume_check, dsl_scrub_pause_resume_sync, &cmd, 3, + ZFS_SPACE_CHECK_RESERVED)); +} + +boolean_t +dsl_scan_scrubbing(const dsl_pool_t *dp) +{ + dsl_scan_t *scn = dp->dp_scan; + + if (scn->scn_phys.scn_state == DSS_SCANNING && + scn->scn_phys.scn_func == POOL_SCAN_SCRUB) + return (B_TRUE); + + return (B_FALSE); +} + static void dsl_scan_visitbp(blkptr_t *bp, const zbookmark_phys_t *zb, dnode_phys_t *dnp, dsl_dataset_t *ds, dsl_scan_t *scn, dmu_objset_type_t ostype, dmu_tx_t *tx); @@ -410,14 +498,14 @@ dsl_scan_sync_state(dsl_scan_t *scn, dmu_tx_t *tx) extern int zfs_vdev_async_write_active_min_dirty_percent; static boolean_t -dsl_scan_check_pause(dsl_scan_t *scn, const zbookmark_phys_t *zb) +dsl_scan_check_suspend(dsl_scan_t *scn, const zbookmark_phys_t *zb) { /* we never skip user/group accounting objects */ if (zb && (int64_t)zb->zb_object < 0) return (B_FALSE); - if (scn->scn_pausing) - return (B_TRUE); /* we're already pausing */ + if (scn->scn_suspending) + return (B_TRUE); /* we're already suspending */ if (!ZB_IS_ZERO(&scn->scn_phys.scn_bookmark)) return (B_FALSE); /* we're resuming */ @@ -427,7 +515,7 @@ dsl_scan_check_pause(dsl_scan_t *scn, const zbookmark_phys_t *zb) return (B_FALSE); /* - * We pause if: + * We suspend if: * - we have scanned for the maximum time: an entire txg * timeout (default 5 sec) * or @@ -450,19 +538,19 @@ dsl_scan_check_pause(dsl_scan_t *scn, const zbookmark_phys_t *zb) dirty_pct >= zfs_vdev_async_write_active_min_dirty_percent)) || spa_shutting_down(scn->scn_dp->dp_spa)) { if (zb) { - dprintf("pausing at bookmark %llx/%llx/%llx/%llx\n", + dprintf("suspending at bookmark %llx/%llx/%llx/%llx\n", (longlong_t)zb->zb_objset, (longlong_t)zb->zb_object, (longlong_t)zb->zb_level, (longlong_t)zb->zb_blkid); scn->scn_phys.scn_bookmark = *zb; } - dprintf("pausing at DDT bookmark %llx/%llx/%llx/%llx\n", + dprintf("suspending at DDT bookmark %llx/%llx/%llx/%llx\n", (longlong_t)scn->scn_phys.scn_ddt_bookmark.ddb_class, (longlong_t)scn->scn_phys.scn_ddt_bookmark.ddb_type, (longlong_t)scn->scn_phys.scn_ddt_bookmark.ddb_checksum, (longlong_t)scn->scn_phys.scn_ddt_bookmark.ddb_cursor); - scn->scn_pausing = B_TRUE; + scn->scn_suspending = B_TRUE; return (B_TRUE); } return (B_FALSE); @@ -600,7 +688,7 @@ dsl_scan_check_resume(dsl_scan_t *scn, const dnode_phys_t *dnp, /* * If we found the block we're trying to resume from, or * we went past it to a different object, zero it out to - * indicate that it's OK to start checking for pausing + * indicate that it's OK to start checking for suspending * again. */ if (bcmp(zb, &scn->scn_phys.scn_bookmark, sizeof (*zb)) == 0 || @@ -703,7 +791,7 @@ dsl_scan_recurse(dsl_scan_t *scn, dsl_dataset_t *ds, dmu_objset_type_t ostype, /* * We also always visit user/group accounting * objects, and never skip them, even if we are - * pausing. This is necessary so that the space + * suspending. This is necessary so that the space * deltas from this txg get integrated. */ dsl_scan_visitdnode(scn, ds, osp->os_type, @@ -759,7 +847,7 @@ dsl_scan_visitbp(blkptr_t *bp, const zbookmark_phys_t *zb, /* ASSERT(pbuf == NULL || arc_released(pbuf)); */ - if (dsl_scan_check_pause(scn, zb)) + if (dsl_scan_check_suspend(scn, zb)) return; if (dsl_scan_check_resume(scn, dnp, zb)) @@ -1096,14 +1184,14 @@ dsl_scan_visitds(dsl_scan_t *scn, uint64_t dsobj, dmu_tx_t *tx) char *dsname = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP); dsl_dataset_name(ds, dsname); zfs_dbgmsg("scanned dataset %llu (%s) with min=%llu max=%llu; " - "pausing=%u", + "suspending=%u", (longlong_t)dsobj, dsname, (longlong_t)scn->scn_phys.scn_cur_min_txg, (longlong_t)scn->scn_phys.scn_cur_max_txg, - (int)scn->scn_pausing); + (int)scn->scn_suspending); kmem_free(dsname, ZFS_MAX_DATASET_NAME_LEN); - if (scn->scn_pausing) + if (scn->scn_suspending) goto out; /* @@ -1267,13 +1355,13 @@ dsl_scan_ddt(dsl_scan_t *scn, dmu_tx_t *tx) dsl_scan_ddt_entry(scn, ddb->ddb_checksum, &dde, tx); n++; - if (dsl_scan_check_pause(scn, NULL)) + if (dsl_scan_check_suspend(scn, NULL)) break; } - zfs_dbgmsg("scanned %llu ddt entries with class_max = %u; pausing=%u", - (longlong_t)n, (int)scn->scn_phys.scn_ddt_class_max, - (int)scn->scn_pausing); + zfs_dbgmsg("scanned %llu ddt entries with class_max = %u; " + "suspending=%u", (longlong_t)n, + (int)scn->scn_phys.scn_ddt_class_max, (int)scn->scn_suspending); ASSERT(error == 0 || error == ENOENT); ASSERT(error != ENOENT || @@ -1316,7 +1404,7 @@ dsl_scan_visit(dsl_scan_t *scn, dmu_tx_t *tx) scn->scn_phys.scn_cur_min_txg = scn->scn_phys.scn_min_txg; scn->scn_phys.scn_cur_max_txg = scn->scn_phys.scn_max_txg; dsl_scan_ddt(scn, tx); - if (scn->scn_pausing) + if (scn->scn_suspending) return; } @@ -1328,7 +1416,7 @@ dsl_scan_visit(dsl_scan_t *scn, dmu_tx_t *tx) dsl_scan_visit_rootbp(scn, NULL, &dp->dp_meta_rootbp, tx); spa_set_rootblkptr(dp->dp_spa, &dp->dp_meta_rootbp); - if (scn->scn_pausing) + if (scn->scn_suspending) return; if (spa_version(dp->dp_spa) < SPA_VERSION_DSL_SCRUB) { @@ -1338,22 +1426,22 @@ dsl_scan_visit(dsl_scan_t *scn, dmu_tx_t *tx) dsl_scan_visitds(scn, dp->dp_origin_snap->ds_object, tx); } - ASSERT(!scn->scn_pausing); + ASSERT(!scn->scn_suspending); } else if (scn->scn_phys.scn_bookmark.zb_objset != ZB_DESTROYED_OBJSET) { /* - * If we were paused, continue from here. Note if the - * ds we were paused on was deleted, the zb_objset may + * If we were suspended, continue from here. Note if the + * ds we were suspended on was deleted, the zb_objset may * be -1, so we will skip this and find a new objset * below. */ dsl_scan_visitds(scn, scn->scn_phys.scn_bookmark.zb_objset, tx); - if (scn->scn_pausing) + if (scn->scn_suspending) return; } /* - * In case we were paused right at the end of the ds, zero the + * In case we were suspended right at the end of the ds, zero the * bookmark so we don't think that we're still trying to resume. */ bzero(&scn->scn_phys.scn_bookmark, sizeof (zbookmark_phys_t)); @@ -1385,14 +1473,14 @@ dsl_scan_visit(dsl_scan_t *scn, dmu_tx_t *tx) dsl_scan_visitds(scn, dsobj, tx); zap_cursor_fini(&zc); - if (scn->scn_pausing) + if (scn->scn_suspending) return; } zap_cursor_fini(&zc); } static boolean_t -dsl_scan_free_should_pause(dsl_scan_t *scn) +dsl_scan_free_should_suspend(dsl_scan_t *scn) { uint64_t elapsed_nanosecs; @@ -1416,7 +1504,7 @@ dsl_scan_free_block_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx) if (!scn->scn_is_bptree || (BP_GET_LEVEL(bp) == 0 && BP_GET_TYPE(bp) != DMU_OT_OBJSET)) { - if (dsl_scan_free_should_pause(scn)) + if (dsl_scan_free_should_suspend(scn)) return (SET_ERROR(ERESTART)); } @@ -1439,7 +1527,8 @@ dsl_scan_active(dsl_scan_t *scn) return (B_FALSE); if (spa_shutting_down(spa)) return (B_FALSE); - if (scn->scn_phys.scn_state == DSS_SCANNING || + if ((scn->scn_phys.scn_state == DSS_SCANNING && + !dsl_scan_is_paused_scrub(scn)) || (scn->scn_async_destroying && !scn->scn_async_stalled)) return (B_TRUE); @@ -1494,12 +1583,12 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx) return; scn->scn_visited_this_txg = 0; - scn->scn_pausing = B_FALSE; + scn->scn_suspending = B_FALSE; scn->scn_sync_start_time = gethrtime(); spa->spa_scrub_active = B_TRUE; /* - * First process the async destroys. If we pause, don't do + * First process the async destroys. If we suspend, don't do * any scrubbing or resilvering. This ensures that there are no * async destroys while we are scanning, so the scan code doesn't * have to worry about traversing it. It is also faster to free the @@ -1616,7 +1705,7 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx) return; if (scn->scn_done_txg == tx->tx_txg) { - ASSERT(!scn->scn_pausing); + ASSERT(!scn->scn_suspending); /* finished with scan. */ zfs_dbgmsg("txg %llu scan complete", tx->tx_txg); dsl_scan_done(scn, B_TRUE, tx); @@ -1625,6 +1714,9 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx) return; } + if (dsl_scan_is_paused_scrub(scn)) + return; + if (scn->scn_phys.scn_ddt_bookmark.ddb_class <= scn->scn_phys.scn_ddt_class_max) { zfs_dbgmsg("doing scan sync txg %llu; " @@ -1659,7 +1751,7 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx) (longlong_t)scn->scn_visited_this_txg, (longlong_t)NSEC2MSEC(gethrtime() - scn->scn_sync_start_time)); - if (!scn->scn_pausing) { + if (!scn->scn_suspending) { scn->scn_done_txg = tx->tx_txg + 1; zfs_dbgmsg("txg %llu traversal complete, waiting till txg %llu", tx->tx_txg, scn->scn_done_txg); @@ -1867,11 +1959,15 @@ dsl_scan_scrub_cb(dsl_pool_t *dp, return (0); } -/* Called by the ZFS_IOC_POOL_SCAN ioctl to start a scrub or resilver */ +/* + * Called by the ZFS_IOC_POOL_SCAN ioctl to start a scrub or resilver. + * Can also be called to resume a paused scrub. + */ int dsl_scan(dsl_pool_t *dp, pool_scan_func_t func) { spa_t *spa = dp->dp_spa; + dsl_scan_t *scn = dp->dp_scan; /* * Purge all vdev caches and probe all devices. We do this here @@ -1886,6 +1982,16 @@ dsl_scan(dsl_pool_t *dp, pool_scan_func_t func) spa->spa_scrub_reopen = B_FALSE; (void) spa_vdev_state_exit(spa, NULL, 0); + if (func == POOL_SCAN_SCRUB && dsl_scan_is_paused_scrub(scn)) { + /* got scrub start cmd, resume paused scrub */ + int err = dsl_scrub_set_pause_resume(scn->scn_dp, + POOL_SCRUB_NORMAL); + if (err == 0) + return (ECANCELED); + + return (SET_ERROR(err)); + } + return (dsl_sync_task(spa_name(spa), dsl_scan_setup_check, dsl_scan_setup_sync, &func, 0, ZFS_SPACE_CHECK_NONE)); } diff --git a/usr/src/uts/common/fs/zfs/spa.c b/usr/src/uts/common/fs/zfs/spa.c index 4fa6a6c79a..576ef1525c 100644 --- a/usr/src/uts/common/fs/zfs/spa.c +++ b/usr/src/uts/common/fs/zfs/spa.c @@ -28,6 +28,7 @@ * Copyright (c) 2014 Integros [integros.com] * Copyright 2016 Toomas Soome <tsoome@me.com> * Copyright 2017 Joyent, Inc. + * Copyright (c) 2017 Datto Inc. */ /* @@ -5748,6 +5749,16 @@ spa_vdev_setfru(spa_t *spa, uint64_t guid, const char *newfru) * SPA Scanning * ========================================================================== */ +int +spa_scrub_pause_resume(spa_t *spa, pool_scrub_cmd_t cmd) +{ + ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == 0); + + if (dsl_scan_resilvering(spa->spa_dsl_pool)) + return (SET_ERROR(EBUSY)); + + return (dsl_scrub_set_pause_resume(spa->spa_dsl_pool, cmd)); +} int spa_scan_stop(spa_t *spa) diff --git a/usr/src/uts/common/fs/zfs/spa_misc.c b/usr/src/uts/common/fs/zfs/spa_misc.c index 6555d4eee5..22d69b185b 100644 --- a/usr/src/uts/common/fs/zfs/spa_misc.c +++ b/usr/src/uts/common/fs/zfs/spa_misc.c @@ -25,6 +25,7 @@ * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved. * Copyright 2013 Saso Kiselkov. All rights reserved. * Copyright (c) 2014 Integros [integros.com] + * Copyright (c) 2017 Datto Inc. */ #include <sys/zfs_context.h> @@ -2008,6 +2009,11 @@ spa_scan_stat_init(spa_t *spa) { /* data not stored on disk */ spa->spa_scan_pass_start = gethrestime_sec(); + if (dsl_scan_is_paused_scrub(spa->spa_dsl_pool->dp_scan)) + spa->spa_scan_pass_scrub_pause = spa->spa_scan_pass_start; + else + spa->spa_scan_pass_scrub_pause = 0; + spa->spa_scan_pass_scrub_spent_paused = 0; spa->spa_scan_pass_exam = 0; vdev_scan_stat_init(spa->spa_root_vdev); } @@ -2038,6 +2044,8 @@ spa_scan_get_stats(spa_t *spa, pool_scan_stat_t *ps) /* data not stored on disk */ ps->pss_pass_start = spa->spa_scan_pass_start; ps->pss_pass_exam = spa->spa_scan_pass_exam; + ps->pss_pass_scrub_pause = spa->spa_scan_pass_scrub_pause; + ps->pss_pass_scrub_spent_paused = spa->spa_scan_pass_scrub_spent_paused; return (0); } diff --git a/usr/src/uts/common/fs/zfs/sys/dsl_scan.h b/usr/src/uts/common/fs/zfs/sys/dsl_scan.h index ee8512c07d..fd950cc014 100644 --- a/usr/src/uts/common/fs/zfs/sys/dsl_scan.h +++ b/usr/src/uts/common/fs/zfs/sys/dsl_scan.h @@ -21,6 +21,7 @@ /* * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2012, 2014 by Delphix. All rights reserved. + * Copyright (c) 2017 Datto Inc. */ #ifndef _SYS_DSL_SCAN_H @@ -70,6 +71,7 @@ typedef struct dsl_scan_phys { typedef enum dsl_scan_flags { DSF_VISIT_DS_AGAIN = 1<<0, + DSF_SCRUB_PAUSED = 1<<1, } dsl_scan_flags_t; /* @@ -82,8 +84,8 @@ typedef enum dsl_scan_flags { * * The following members of this structure direct the behavior of the scan: * - * scn_pausing - a scan that cannot be completed in a single txg or - * has exceeded its allotted time will need to pause. + * scn_suspending - a scan that cannot be completed in a single txg or + * has exceeded its allotted time will need to suspend. * When this flag is set the scanner will stop traversing * the pool and write out the current state to disk. * @@ -105,7 +107,7 @@ typedef enum dsl_scan_flags { typedef struct dsl_scan { struct dsl_pool *scn_dp; - boolean_t scn_pausing; + boolean_t scn_suspending; uint64_t scn_restart_txg; uint64_t scn_done_txg; uint64_t scn_sync_start_time; @@ -115,8 +117,6 @@ typedef struct dsl_scan { boolean_t scn_is_bptree; boolean_t scn_async_destroying; boolean_t scn_async_stalled; - - /* for debugging / information */ uint64_t scn_visited_this_txg; dsl_scan_phys_t scn_phys; @@ -127,6 +127,8 @@ void dsl_scan_fini(struct dsl_pool *dp); void dsl_scan_sync(struct dsl_pool *, dmu_tx_t *); int dsl_scan_cancel(struct dsl_pool *); int dsl_scan(struct dsl_pool *, pool_scan_func_t); +boolean_t dsl_scan_scrubbing(const struct dsl_pool *dp); +int dsl_scrub_set_pause_resume(const struct dsl_pool *dp, pool_scrub_cmd_t cmd); void dsl_resilver_restart(struct dsl_pool *, uint64_t txg); boolean_t dsl_scan_resilvering(struct dsl_pool *dp); boolean_t dsl_dataset_unstable(struct dsl_dataset *ds); @@ -137,6 +139,7 @@ void dsl_scan_ds_snapshotted(struct dsl_dataset *ds, struct dmu_tx *tx); void dsl_scan_ds_clone_swapped(struct dsl_dataset *ds1, struct dsl_dataset *ds2, struct dmu_tx *tx); boolean_t dsl_scan_active(dsl_scan_t *scn); +boolean_t dsl_scan_is_paused_scrub(const dsl_scan_t *scn); #ifdef __cplusplus } diff --git a/usr/src/uts/common/fs/zfs/sys/spa.h b/usr/src/uts/common/fs/zfs/sys/spa.h index 0243c3effd..1d267c971e 100644 --- a/usr/src/uts/common/fs/zfs/sys/spa.h +++ b/usr/src/uts/common/fs/zfs/sys/spa.h @@ -26,6 +26,7 @@ * Copyright 2013 Saso Kiselkov. All rights reserved. * Copyright (c) 2014 Integros [integros.com] * Copyright 2017 Joyent, Inc. + * Copyright (c) 2017 Datto Inc. */ #ifndef _SYS_SPA_H @@ -676,6 +677,7 @@ extern void spa_l2cache_drop(spa_t *spa); /* scanning */ extern int spa_scan(spa_t *spa, pool_scan_func_t func); extern int spa_scan_stop(spa_t *spa); +extern int spa_scrub_pause_resume(spa_t *spa, pool_scrub_cmd_t flag); /* spa syncing */ extern void spa_sync(spa_t *spa, uint64_t txg); /* only for DMU use */ diff --git a/usr/src/uts/common/fs/zfs/sys/spa_impl.h b/usr/src/uts/common/fs/zfs/sys/spa_impl.h index 73d2df0168..bca9131261 100644 --- a/usr/src/uts/common/fs/zfs/sys/spa_impl.h +++ b/usr/src/uts/common/fs/zfs/sys/spa_impl.h @@ -24,6 +24,7 @@ * Copyright 2011 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved. * Copyright 2013 Saso Kiselkov. All rights reserved. + * Copyright (c) 2017 Datto Inc. */ #ifndef _SYS_SPA_IMPL_H @@ -192,6 +193,8 @@ struct spa { uint8_t spa_scrub_started; /* started since last boot */ uint8_t spa_scrub_reopen; /* scrub doing vdev_reopen */ uint64_t spa_scan_pass_start; /* start time per pass/reboot */ + uint64_t spa_scan_pass_scrub_pause; /* scrub pause time */ + uint64_t spa_scan_pass_scrub_spent_paused; /* total paused */ uint64_t spa_scan_pass_exam; /* examined bytes per pass */ kmutex_t spa_async_lock; /* protect async state */ kthread_t *spa_async_thread; /* thread doing async task */ diff --git a/usr/src/uts/common/fs/zfs/zfs_ioctl.c b/usr/src/uts/common/fs/zfs/zfs_ioctl.c index 5ffda31f24..418c736221 100644 --- a/usr/src/uts/common/fs/zfs/zfs_ioctl.c +++ b/usr/src/uts/common/fs/zfs/zfs_ioctl.c @@ -32,6 +32,7 @@ * Copyright (c) 2014 Integros [integros.com] * Copyright 2016 Toomas Soome <tsoome@me.com> * Copyright 2017 RackTop Systems. + * Copyright (c) 2017 Datto Inc. */ /* @@ -1683,6 +1684,7 @@ zfs_ioc_pool_tryimport(zfs_cmd_t *zc) * inputs: * zc_name name of the pool * zc_cookie scan func (pool_scan_func_t) + * zc_flags scrub pause/resume flag (pool_scrub_cmd_t) */ static int zfs_ioc_pool_scan(zfs_cmd_t *zc) @@ -1693,7 +1695,12 @@ zfs_ioc_pool_scan(zfs_cmd_t *zc) if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) return (error); - if (zc->zc_cookie == POOL_SCAN_NONE) + if (zc->zc_flags >= POOL_SCRUB_FLAGS_END) + return (SET_ERROR(EINVAL)); + + if (zc->zc_flags == POOL_SCRUB_PAUSE) + error = spa_scrub_pause_resume(spa, POOL_SCRUB_PAUSE); + else if (zc->zc_cookie == POOL_SCAN_NONE) error = spa_scan_stop(spa); else error = spa_scan(spa, zc->zc_cookie); diff --git a/usr/src/uts/common/sys/fs/zfs.h b/usr/src/uts/common/sys/fs/zfs.h index 389e9b59fa..c12cb65084 100644 --- a/usr/src/uts/common/sys/fs/zfs.h +++ b/usr/src/uts/common/sys/fs/zfs.h @@ -25,6 +25,7 @@ * Copyright 2011 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2014 Integros [integros.com] * Copyright 2017 Joyent, Inc. + * Copyright (c) 2017 Datto Inc. */ /* Portions Copyright 2010 Robert Milkowski */ @@ -694,6 +695,16 @@ typedef enum pool_scan_func { } pool_scan_func_t; /* + * Used to control scrub pause and resume. + */ +typedef enum pool_scrub_cmd { + POOL_SCRUB_NORMAL = 0, + POOL_SCRUB_PAUSE, + POOL_SCRUB_FLAGS_END +} pool_scrub_cmd_t; + + +/* * ZIO types. Needed to interpret vdev statistics below. */ typedef enum zio_type { @@ -725,6 +736,9 @@ typedef struct pool_scan_stat { /* values not stored on disk */ uint64_t pss_pass_exam; /* examined bytes per scan pass */ uint64_t pss_pass_start; /* start time of a scan pass */ + uint64_t pss_pass_scrub_pause; /* pause time of a scurb pass */ + /* cumulative time scrub spent paused, needed for rate calculation */ + uint64_t pss_pass_scrub_spent_paused; } pool_scan_stat_t; typedef enum dsl_scan_state { |