aboutsummaryrefslogtreecommitdiff
path: root/sys/cddl/contrib/opensolaris
diff options
context:
space:
mode:
authorAndriy Gapon <avg@FreeBSD.org>2017-09-09 11:00:07 +0000
committerAndriy Gapon <avg@FreeBSD.org>2017-09-09 11:00:07 +0000
commit90354c32001b90916abfd050c2a210f8b81bb140 (patch)
tree64504b92b63d862ce1a1b1cc24c7e9b53b1e4e31 /sys/cddl/contrib/opensolaris
parent705f4b2ceb763333b1d256baabc4ca9ee718bd77 (diff)
parentefd3d79ea33392718095ebe6efcd726bcc945760 (diff)
downloadsrc-90354c32001b90916abfd050c2a210f8b81bb140.tar.gz
src-90354c32001b90916abfd050c2a210f8b81bb140.zip
MFV r323107: 8414 Implemented zpool scrub pause/resume
illumos/illumos-gate@1702cce751c5cb7ead878d0205a6c90b027e3de8 https://github.com/illumos/illumos-gate/commit/1702cce751c5cb7ead878d0205a6c90b027e3de8 FreeBSD note: rather than merging the zpool.8 update I copied the zpool scrub section from the illumos zpool.1m to FreeBSD zpool.8 almost verbatim. Now that the illumos page uses the mdoc format, it was an easier option. Perhaps the change is not in perfect compliance with the FreeBSD style, but I think that it is acceptible. https://www.illumos.org/issues/8414 This issue tracks the port of scrub pause from ZoL: https://github.com/zfsonlinux/zfs/pull/6167 Currently, there is no way to pause a scrub. Pausing may be useful when the pool is busy with other I/O to preserve bandwidth. Description This patch adds the ability to pause and resume scrubbing. This is achieved by maintaining a persistent on-disk scrub state. While the state is 'paused' we do not scrub any more blocks. We do however perform regular scan housekeeping such as freeing async destroyed and deadlist blocks while paused. Motivation and Context Scrub pausing can be an I/O intensive operation and people have been asking for the ability to pause a scrub for a while. This allows one to preserve scrub progress while freeing up bandwidth for other I/O. Reviewed by: George Melikov <mail@gmelikov.ru> Reviewed by: Brian Behlendorf <behlendorf1@llnl.gov> Reviewed by: Brad Lewis <brad.lewis@delphix.com> Reviewed by: Serapheim Dimitropoulos <serapheim@delphix.com> Reviewed by: Matt Ahrens <mahrens@delphix.com> Approved by: Dan McDonald <danmcd@joyent.com> Author: Alek Pinchuk <apinchuk@datto.com> MFC after: 2 weeks
Notes
Notes: svn path=/head/; revision=323355
Diffstat (limited to 'sys/cddl/contrib/opensolaris')
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/bpobj.c4
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_scan.c172
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c11
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c8
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_scan.h13
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa.h2
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa_impl.h3
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c9
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/sys/fs/zfs.h14
9 files changed, 197 insertions, 39 deletions
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/bpobj.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/bpobj.c
index 19e97cbabf30..0bcfc0031311 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/bpobj.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/bpobj.c
@@ -22,6 +22,7 @@
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011, 2016 by Delphix. All rights reserved.
* Copyright (c) 2014 Integros [integros.com]
+ * Copyright (c) 2017 Datto Inc.
*/
#include <sys/bpobj.h>
@@ -212,6 +213,9 @@ bpobj_iterate_impl(bpobj_t *bpo, bpobj_itor_t func, void *arg, dmu_tx_t *tx,
mutex_enter(&bpo->bpo_lock);
+ if (!bpobj_hasentries(bpo))
+ goto out;
+
if (free)
dmu_buf_will_dirty(bpo->bpo_dbuf, tx);
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_scan.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_scan.c
index 639eefe8da64..ac1ac50751ce 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_scan.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_scan.c
@@ -22,6 +22,7 @@
* Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright 2016 Gary Mills
* Copyright (c) 2011, 2016 by Delphix. All rights reserved.
+ * Copyright (c) 2017 Datto Inc.
*/
#include <sys/dsl_scan.h>
@@ -310,6 +311,8 @@ dsl_scan_done(dsl_scan_t *scn, boolean_t complete, dmu_tx_t *tx)
scn->scn_phys.scn_queue_obj = 0;
}
+ scn->scn_phys.scn_flags &= ~DSF_SCRUB_PAUSED;
+
/*
* If we were "restarted" from a stopped state, don't bother
* with anything else.
@@ -393,6 +396,91 @@ dsl_scan_cancel(dsl_pool_t *dp)
dsl_scan_cancel_sync, NULL, 3, ZFS_SPACE_CHECK_RESERVED));
}
+boolean_t
+dsl_scan_is_paused_scrub(const dsl_scan_t *scn)
+{
+ if (dsl_scan_scrubbing(scn->scn_dp) &&
+ scn->scn_phys.scn_flags & DSF_SCRUB_PAUSED)
+ return (B_TRUE);
+
+ return (B_FALSE);
+}
+
+static int
+dsl_scrub_pause_resume_check(void *arg, dmu_tx_t *tx)
+{
+ pool_scrub_cmd_t *cmd = arg;
+ dsl_pool_t *dp = dmu_tx_pool(tx);
+ dsl_scan_t *scn = dp->dp_scan;
+
+ if (*cmd == POOL_SCRUB_PAUSE) {
+ /* can't pause a scrub when there is no in-progress scrub */
+ if (!dsl_scan_scrubbing(dp))
+ return (SET_ERROR(ENOENT));
+
+ /* can't pause a paused scrub */
+ if (dsl_scan_is_paused_scrub(scn))
+ return (SET_ERROR(EBUSY));
+ } else if (*cmd != POOL_SCRUB_NORMAL) {
+ return (SET_ERROR(ENOTSUP));
+ }
+
+ return (0);
+}
+
+static void
+dsl_scrub_pause_resume_sync(void *arg, dmu_tx_t *tx)
+{
+ pool_scrub_cmd_t *cmd = arg;
+ dsl_pool_t *dp = dmu_tx_pool(tx);
+ spa_t *spa = dp->dp_spa;
+ dsl_scan_t *scn = dp->dp_scan;
+
+ if (*cmd == POOL_SCRUB_PAUSE) {
+ /* can't pause a scrub when there is no in-progress scrub */
+ spa->spa_scan_pass_scrub_pause = gethrestime_sec();
+ scn->scn_phys.scn_flags |= DSF_SCRUB_PAUSED;
+ dsl_scan_sync_state(scn, tx);
+ } else {
+ ASSERT3U(*cmd, ==, POOL_SCRUB_NORMAL);
+ if (dsl_scan_is_paused_scrub(scn)) {
+ /*
+ * We need to keep track of how much time we spend
+ * paused per pass so that we can adjust the scrub rate
+ * shown in the output of 'zpool status'
+ */
+ spa->spa_scan_pass_scrub_spent_paused +=
+ gethrestime_sec() - spa->spa_scan_pass_scrub_pause;
+ spa->spa_scan_pass_scrub_pause = 0;
+ scn->scn_phys.scn_flags &= ~DSF_SCRUB_PAUSED;
+ dsl_scan_sync_state(scn, tx);
+ }
+ }
+}
+
+/*
+ * Set scrub pause/resume state if it makes sense to do so
+ */
+int
+dsl_scrub_set_pause_resume(const dsl_pool_t *dp, pool_scrub_cmd_t cmd)
+{
+ return (dsl_sync_task(spa_name(dp->dp_spa),
+ dsl_scrub_pause_resume_check, dsl_scrub_pause_resume_sync, &cmd, 3,
+ ZFS_SPACE_CHECK_RESERVED));
+}
+
+boolean_t
+dsl_scan_scrubbing(const dsl_pool_t *dp)
+{
+ dsl_scan_t *scn = dp->dp_scan;
+
+ if (scn->scn_phys.scn_state == DSS_SCANNING &&
+ scn->scn_phys.scn_func == POOL_SCAN_SCRUB)
+ return (B_TRUE);
+
+ return (B_FALSE);
+}
+
static void dsl_scan_visitbp(blkptr_t *bp, const zbookmark_phys_t *zb,
dnode_phys_t *dnp, dsl_dataset_t *ds, dsl_scan_t *scn,
dmu_objset_type_t ostype, dmu_tx_t *tx);
@@ -435,14 +523,14 @@ dsl_scan_sync_state(dsl_scan_t *scn, dmu_tx_t *tx)
extern int zfs_vdev_async_write_active_min_dirty_percent;
static boolean_t
-dsl_scan_check_pause(dsl_scan_t *scn, const zbookmark_phys_t *zb)
+dsl_scan_check_suspend(dsl_scan_t *scn, const zbookmark_phys_t *zb)
{
/* we never skip user/group accounting objects */
if (zb && (int64_t)zb->zb_object < 0)
return (B_FALSE);
- if (scn->scn_pausing)
- return (B_TRUE); /* we're already pausing */
+ if (scn->scn_suspending)
+ return (B_TRUE); /* we're already suspending */
if (!ZB_IS_ZERO(&scn->scn_phys.scn_bookmark))
return (B_FALSE); /* we're resuming */
@@ -452,7 +540,7 @@ dsl_scan_check_pause(dsl_scan_t *scn, const zbookmark_phys_t *zb)
return (B_FALSE);
/*
- * We pause if:
+ * We suspend if:
* - we have scanned for the maximum time: an entire txg
* timeout (default 5 sec)
* or
@@ -475,19 +563,19 @@ dsl_scan_check_pause(dsl_scan_t *scn, const zbookmark_phys_t *zb)
dirty_pct >= zfs_vdev_async_write_active_min_dirty_percent)) ||
spa_shutting_down(scn->scn_dp->dp_spa)) {
if (zb) {
- dprintf("pausing at bookmark %llx/%llx/%llx/%llx\n",
+ dprintf("suspending at bookmark %llx/%llx/%llx/%llx\n",
(longlong_t)zb->zb_objset,
(longlong_t)zb->zb_object,
(longlong_t)zb->zb_level,
(longlong_t)zb->zb_blkid);
scn->scn_phys.scn_bookmark = *zb;
}
- dprintf("pausing at DDT bookmark %llx/%llx/%llx/%llx\n",
+ dprintf("suspending at DDT bookmark %llx/%llx/%llx/%llx\n",
(longlong_t)scn->scn_phys.scn_ddt_bookmark.ddb_class,
(longlong_t)scn->scn_phys.scn_ddt_bookmark.ddb_type,
(longlong_t)scn->scn_phys.scn_ddt_bookmark.ddb_checksum,
(longlong_t)scn->scn_phys.scn_ddt_bookmark.ddb_cursor);
- scn->scn_pausing = B_TRUE;
+ scn->scn_suspending = B_TRUE;
return (B_TRUE);
}
return (B_FALSE);
@@ -625,7 +713,7 @@ dsl_scan_check_resume(dsl_scan_t *scn, const dnode_phys_t *dnp,
/*
* If we found the block we're trying to resume from, or
* we went past it to a different object, zero it out to
- * indicate that it's OK to start checking for pausing
+ * indicate that it's OK to start checking for suspending
* again.
*/
if (bcmp(zb, &scn->scn_phys.scn_bookmark, sizeof (*zb)) == 0 ||
@@ -728,7 +816,7 @@ dsl_scan_recurse(dsl_scan_t *scn, dsl_dataset_t *ds, dmu_objset_type_t ostype,
/*
* We also always visit user/group accounting
* objects, and never skip them, even if we are
- * pausing. This is necessary so that the space
+ * suspending. This is necessary so that the space
* deltas from this txg get integrated.
*/
dsl_scan_visitdnode(scn, ds, osp->os_type,
@@ -784,7 +872,7 @@ dsl_scan_visitbp(blkptr_t *bp, const zbookmark_phys_t *zb,
/* ASSERT(pbuf == NULL || arc_released(pbuf)); */
- if (dsl_scan_check_pause(scn, zb))
+ if (dsl_scan_check_suspend(scn, zb))
return;
if (dsl_scan_check_resume(scn, dnp, zb))
@@ -1121,14 +1209,14 @@ dsl_scan_visitds(dsl_scan_t *scn, uint64_t dsobj, dmu_tx_t *tx)
char *dsname = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP);
dsl_dataset_name(ds, dsname);
zfs_dbgmsg("scanned dataset %llu (%s) with min=%llu max=%llu; "
- "pausing=%u",
+ "suspending=%u",
(longlong_t)dsobj, dsname,
(longlong_t)scn->scn_phys.scn_cur_min_txg,
(longlong_t)scn->scn_phys.scn_cur_max_txg,
- (int)scn->scn_pausing);
+ (int)scn->scn_suspending);
kmem_free(dsname, ZFS_MAX_DATASET_NAME_LEN);
- if (scn->scn_pausing)
+ if (scn->scn_suspending)
goto out;
/*
@@ -1292,13 +1380,13 @@ dsl_scan_ddt(dsl_scan_t *scn, dmu_tx_t *tx)
dsl_scan_ddt_entry(scn, ddb->ddb_checksum, &dde, tx);
n++;
- if (dsl_scan_check_pause(scn, NULL))
+ if (dsl_scan_check_suspend(scn, NULL))
break;
}
- zfs_dbgmsg("scanned %llu ddt entries with class_max = %u; pausing=%u",
- (longlong_t)n, (int)scn->scn_phys.scn_ddt_class_max,
- (int)scn->scn_pausing);
+ zfs_dbgmsg("scanned %llu ddt entries with class_max = %u; "
+ "suspending=%u", (longlong_t)n,
+ (int)scn->scn_phys.scn_ddt_class_max, (int)scn->scn_suspending);
ASSERT(error == 0 || error == ENOENT);
ASSERT(error != ENOENT ||
@@ -1341,7 +1429,7 @@ dsl_scan_visit(dsl_scan_t *scn, dmu_tx_t *tx)
scn->scn_phys.scn_cur_min_txg = scn->scn_phys.scn_min_txg;
scn->scn_phys.scn_cur_max_txg = scn->scn_phys.scn_max_txg;
dsl_scan_ddt(scn, tx);
- if (scn->scn_pausing)
+ if (scn->scn_suspending)
return;
}
@@ -1353,7 +1441,7 @@ dsl_scan_visit(dsl_scan_t *scn, dmu_tx_t *tx)
dsl_scan_visit_rootbp(scn, NULL,
&dp->dp_meta_rootbp, tx);
spa_set_rootblkptr(dp->dp_spa, &dp->dp_meta_rootbp);
- if (scn->scn_pausing)
+ if (scn->scn_suspending)
return;
if (spa_version(dp->dp_spa) < SPA_VERSION_DSL_SCRUB) {
@@ -1363,22 +1451,22 @@ dsl_scan_visit(dsl_scan_t *scn, dmu_tx_t *tx)
dsl_scan_visitds(scn,
dp->dp_origin_snap->ds_object, tx);
}
- ASSERT(!scn->scn_pausing);
+ ASSERT(!scn->scn_suspending);
} else if (scn->scn_phys.scn_bookmark.zb_objset !=
ZB_DESTROYED_OBJSET) {
/*
- * If we were paused, continue from here. Note if the
- * ds we were paused on was deleted, the zb_objset may
+ * If we were suspended, continue from here. Note if the
+ * ds we were suspended on was deleted, the zb_objset may
* be -1, so we will skip this and find a new objset
* below.
*/
dsl_scan_visitds(scn, scn->scn_phys.scn_bookmark.zb_objset, tx);
- if (scn->scn_pausing)
+ if (scn->scn_suspending)
return;
}
/*
- * In case we were paused right at the end of the ds, zero the
+ * In case we were suspended right at the end of the ds, zero the
* bookmark so we don't think that we're still trying to resume.
*/
bzero(&scn->scn_phys.scn_bookmark, sizeof (zbookmark_phys_t));
@@ -1410,14 +1498,14 @@ dsl_scan_visit(dsl_scan_t *scn, dmu_tx_t *tx)
dsl_scan_visitds(scn, dsobj, tx);
zap_cursor_fini(&zc);
- if (scn->scn_pausing)
+ if (scn->scn_suspending)
return;
}
zap_cursor_fini(&zc);
}
static boolean_t
-dsl_scan_free_should_pause(dsl_scan_t *scn)
+dsl_scan_free_should_suspend(dsl_scan_t *scn)
{
uint64_t elapsed_nanosecs;
@@ -1441,7 +1529,7 @@ dsl_scan_free_block_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
if (!scn->scn_is_bptree ||
(BP_GET_LEVEL(bp) == 0 && BP_GET_TYPE(bp) != DMU_OT_OBJSET)) {
- if (dsl_scan_free_should_pause(scn))
+ if (dsl_scan_free_should_suspend(scn))
return (SET_ERROR(ERESTART));
}
@@ -1464,7 +1552,8 @@ dsl_scan_active(dsl_scan_t *scn)
return (B_FALSE);
if (spa_shutting_down(spa))
return (B_FALSE);
- if (scn->scn_phys.scn_state == DSS_SCANNING ||
+ if ((scn->scn_phys.scn_state == DSS_SCANNING &&
+ !dsl_scan_is_paused_scrub(scn)) ||
(scn->scn_async_destroying && !scn->scn_async_stalled))
return (B_TRUE);
@@ -1519,12 +1608,12 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx)
return;
scn->scn_visited_this_txg = 0;
- scn->scn_pausing = B_FALSE;
+ scn->scn_suspending = B_FALSE;
scn->scn_sync_start_time = gethrtime();
spa->spa_scrub_active = B_TRUE;
/*
- * First process the async destroys. If we pause, don't do
+ * First process the async destroys. If we suspend, don't do
* any scrubbing or resilvering. This ensures that there are no
* async destroys while we are scanning, so the scan code doesn't
* have to worry about traversing it. It is also faster to free the
@@ -1641,7 +1730,7 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx)
return;
if (scn->scn_done_txg == tx->tx_txg) {
- ASSERT(!scn->scn_pausing);
+ ASSERT(!scn->scn_suspending);
/* finished with scan. */
zfs_dbgmsg("txg %llu scan complete", tx->tx_txg);
dsl_scan_done(scn, B_TRUE, tx);
@@ -1650,6 +1739,9 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx)
return;
}
+ if (dsl_scan_is_paused_scrub(scn))
+ return;
+
if (scn->scn_phys.scn_ddt_bookmark.ddb_class <=
scn->scn_phys.scn_ddt_class_max) {
zfs_dbgmsg("doing scan sync txg %llu; "
@@ -1684,7 +1776,7 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx)
(longlong_t)scn->scn_visited_this_txg,
(longlong_t)NSEC2MSEC(gethrtime() - scn->scn_sync_start_time));
- if (!scn->scn_pausing) {
+ if (!scn->scn_suspending) {
scn->scn_done_txg = tx->tx_txg + 1;
zfs_dbgmsg("txg %llu traversal complete, waiting till txg %llu",
tx->tx_txg, scn->scn_done_txg);
@@ -1893,11 +1985,15 @@ dsl_scan_scrub_cb(dsl_pool_t *dp,
return (0);
}
-/* Called by the ZFS_IOC_POOL_SCAN ioctl to start a scrub or resilver */
+/*
+ * Called by the ZFS_IOC_POOL_SCAN ioctl to start a scrub or resilver.
+ * Can also be called to resume a paused scrub.
+ */
int
dsl_scan(dsl_pool_t *dp, pool_scan_func_t func)
{
spa_t *spa = dp->dp_spa;
+ dsl_scan_t *scn = dp->dp_scan;
/*
* Purge all vdev caches and probe all devices. We do this here
@@ -1912,6 +2008,16 @@ dsl_scan(dsl_pool_t *dp, pool_scan_func_t func)
spa->spa_scrub_reopen = B_FALSE;
(void) spa_vdev_state_exit(spa, NULL, 0);
+ if (func == POOL_SCAN_SCRUB && dsl_scan_is_paused_scrub(scn)) {
+ /* got scrub start cmd, resume paused scrub */
+ int err = dsl_scrub_set_pause_resume(scn->scn_dp,
+ POOL_SCRUB_NORMAL);
+ if (err == 0)
+ return (ECANCELED);
+
+ return (SET_ERROR(err));
+ }
+
return (dsl_sync_task(spa_name(spa), dsl_scan_setup_check,
dsl_scan_setup_sync, &func, 0, ZFS_SPACE_CHECK_NONE));
}
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c
index 3b2b227d0cce..f2235cecca51 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c
@@ -28,6 +28,7 @@
* Copyright 2013 Saso Kiselkov. All rights reserved.
* Copyright (c) 2014 Integros [integros.com]
* Copyright 2016 Toomas Soome <tsoome@me.com>
+ * Copyright (c) 2017 Datto Inc.
*/
/*
@@ -6048,6 +6049,16 @@ spa_vdev_setfru(spa_t *spa, uint64_t guid, const char *newfru)
* SPA Scanning
* ==========================================================================
*/
+int
+spa_scrub_pause_resume(spa_t *spa, pool_scrub_cmd_t cmd)
+{
+ ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == 0);
+
+ if (dsl_scan_resilvering(spa->spa_dsl_pool))
+ return (SET_ERROR(EBUSY));
+
+ return (dsl_scrub_set_pause_resume(spa->spa_dsl_pool, cmd));
+}
int
spa_scan_stop(spa_t *spa)
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c
index c700a0ffd74f..b6b0e2189508 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c
@@ -26,6 +26,7 @@
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
* Copyright 2013 Saso Kiselkov. All rights reserved.
* Copyright (c) 2014 Integros [integros.com]
+ * Copyright (c) 2017 Datto Inc.
*/
#include <sys/zfs_context.h>
@@ -2145,6 +2146,11 @@ spa_scan_stat_init(spa_t *spa)
{
/* data not stored on disk */
spa->spa_scan_pass_start = gethrestime_sec();
+ if (dsl_scan_is_paused_scrub(spa->spa_dsl_pool->dp_scan))
+ spa->spa_scan_pass_scrub_pause = spa->spa_scan_pass_start;
+ else
+ spa->spa_scan_pass_scrub_pause = 0;
+ spa->spa_scan_pass_scrub_spent_paused = 0;
spa->spa_scan_pass_exam = 0;
vdev_scan_stat_init(spa->spa_root_vdev);
}
@@ -2175,6 +2181,8 @@ spa_scan_get_stats(spa_t *spa, pool_scan_stat_t *ps)
/* data not stored on disk */
ps->pss_pass_start = spa->spa_scan_pass_start;
ps->pss_pass_exam = spa->spa_scan_pass_exam;
+ ps->pss_pass_scrub_pause = spa->spa_scan_pass_scrub_pause;
+ ps->pss_pass_scrub_spent_paused = spa->spa_scan_pass_scrub_spent_paused;
return (0);
}
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_scan.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_scan.h
index ee8512c07dac..fd950cc01476 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_scan.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_scan.h
@@ -21,6 +21,7 @@
/*
* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2014 by Delphix. All rights reserved.
+ * Copyright (c) 2017 Datto Inc.
*/
#ifndef _SYS_DSL_SCAN_H
@@ -70,6 +71,7 @@ typedef struct dsl_scan_phys {
typedef enum dsl_scan_flags {
DSF_VISIT_DS_AGAIN = 1<<0,
+ DSF_SCRUB_PAUSED = 1<<1,
} dsl_scan_flags_t;
/*
@@ -82,8 +84,8 @@ typedef enum dsl_scan_flags {
*
* The following members of this structure direct the behavior of the scan:
*
- * scn_pausing - a scan that cannot be completed in a single txg or
- * has exceeded its allotted time will need to pause.
+ * scn_suspending - a scan that cannot be completed in a single txg or
+ * has exceeded its allotted time will need to suspend.
* When this flag is set the scanner will stop traversing
* the pool and write out the current state to disk.
*
@@ -105,7 +107,7 @@ typedef enum dsl_scan_flags {
typedef struct dsl_scan {
struct dsl_pool *scn_dp;
- boolean_t scn_pausing;
+ boolean_t scn_suspending;
uint64_t scn_restart_txg;
uint64_t scn_done_txg;
uint64_t scn_sync_start_time;
@@ -115,8 +117,6 @@ typedef struct dsl_scan {
boolean_t scn_is_bptree;
boolean_t scn_async_destroying;
boolean_t scn_async_stalled;
-
- /* for debugging / information */
uint64_t scn_visited_this_txg;
dsl_scan_phys_t scn_phys;
@@ -127,6 +127,8 @@ void dsl_scan_fini(struct dsl_pool *dp);
void dsl_scan_sync(struct dsl_pool *, dmu_tx_t *);
int dsl_scan_cancel(struct dsl_pool *);
int dsl_scan(struct dsl_pool *, pool_scan_func_t);
+boolean_t dsl_scan_scrubbing(const struct dsl_pool *dp);
+int dsl_scrub_set_pause_resume(const struct dsl_pool *dp, pool_scrub_cmd_t cmd);
void dsl_resilver_restart(struct dsl_pool *, uint64_t txg);
boolean_t dsl_scan_resilvering(struct dsl_pool *dp);
boolean_t dsl_dataset_unstable(struct dsl_dataset *ds);
@@ -137,6 +139,7 @@ void dsl_scan_ds_snapshotted(struct dsl_dataset *ds, struct dmu_tx *tx);
void dsl_scan_ds_clone_swapped(struct dsl_dataset *ds1, struct dsl_dataset *ds2,
struct dmu_tx *tx);
boolean_t dsl_scan_active(dsl_scan_t *scn);
+boolean_t dsl_scan_is_paused_scrub(const dsl_scan_t *scn);
#ifdef __cplusplus
}
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa.h
index c66867bef165..71ffc5b12f9a 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa.h
@@ -25,6 +25,7 @@
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
* Copyright 2013 Saso Kiselkov. All rights reserved.
* Copyright (c) 2014 Integros [integros.com]
+ * Copyright (c) 2017 Datto Inc.
*/
#ifndef _SYS_SPA_H
@@ -695,6 +696,7 @@ extern void spa_l2cache_drop(spa_t *spa);
/* scanning */
extern int spa_scan(spa_t *spa, pool_scan_func_t func);
extern int spa_scan_stop(spa_t *spa);
+extern int spa_scrub_pause_resume(spa_t *spa, pool_scrub_cmd_t flag);
/* spa syncing */
extern void spa_sync(spa_t *spa, uint64_t txg); /* only for DMU use */
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa_impl.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa_impl.h
index f308e7387430..78cdaecf496f 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa_impl.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa_impl.h
@@ -25,6 +25,7 @@
* Copyright 2013 Martin Matuska <mm@FreeBSD.org>. All rights reserved.
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
* Copyright 2013 Saso Kiselkov. All rights reserved.
+ * Copyright (c) 2017 Datto Inc.
*/
#ifndef _SYS_SPA_IMPL_H
@@ -193,6 +194,8 @@ struct spa {
uint8_t spa_scrub_started; /* started since last boot */
uint8_t spa_scrub_reopen; /* scrub doing vdev_reopen */
uint64_t spa_scan_pass_start; /* start time per pass/reboot */
+ uint64_t spa_scan_pass_scrub_pause; /* scrub pause time */
+ uint64_t spa_scan_pass_scrub_spent_paused; /* total paused */
uint64_t spa_scan_pass_exam; /* examined bytes per pass */
kmutex_t spa_async_lock; /* protect async state */
kthread_t *spa_async_thread; /* thread doing async task */
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c
index 1befdae31d77..425aed567404 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c
@@ -33,6 +33,7 @@
* Copyright (c) 2014 Integros [integros.com]
* Copyright 2016 Toomas Soome <tsoome@me.com>
* Copyright 2017 RackTop Systems.
+ * Copyright (c) 2017 Datto Inc.
*/
/*
@@ -1727,6 +1728,7 @@ zfs_ioc_pool_tryimport(zfs_cmd_t *zc)
* inputs:
* zc_name name of the pool
* zc_cookie scan func (pool_scan_func_t)
+ * zc_flags scrub pause/resume flag (pool_scrub_cmd_t)
*/
static int
zfs_ioc_pool_scan(zfs_cmd_t *zc)
@@ -1737,7 +1739,12 @@ zfs_ioc_pool_scan(zfs_cmd_t *zc)
if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
return (error);
- if (zc->zc_cookie == POOL_SCAN_NONE)
+ if (zc->zc_flags >= POOL_SCRUB_FLAGS_END)
+ return (SET_ERROR(EINVAL));
+
+ if (zc->zc_flags == POOL_SCRUB_PAUSE)
+ error = spa_scrub_pause_resume(spa, POOL_SCRUB_PAUSE);
+ else if (zc->zc_cookie == POOL_SCAN_NONE)
error = spa_scan_stop(spa);
else
error = spa_scan(spa, zc->zc_cookie);
diff --git a/sys/cddl/contrib/opensolaris/uts/common/sys/fs/zfs.h b/sys/cddl/contrib/opensolaris/uts/common/sys/fs/zfs.h
index 7c72c887d419..676e5ca85729 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/sys/fs/zfs.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/sys/fs/zfs.h
@@ -26,6 +26,7 @@
* Copyright (c) 2013, Joyent, Inc. All rights reserved.
* Copyright (c) 2012, Martin Matuska <mm@FreeBSD.org>. All rights reserved.
* Copyright (c) 2014 Integros [integros.com]
+ * Copyright (c) 2017 Datto Inc.
*/
/* Portions Copyright 2010 Robert Milkowski */
@@ -697,6 +698,16 @@ typedef enum pool_scan_func {
} pool_scan_func_t;
/*
+ * Used to control scrub pause and resume.
+ */
+typedef enum pool_scrub_cmd {
+ POOL_SCRUB_NORMAL = 0,
+ POOL_SCRUB_PAUSE,
+ POOL_SCRUB_FLAGS_END
+} pool_scrub_cmd_t;
+
+
+/*
* ZIO types. Needed to interpret vdev statistics below.
*/
typedef enum zio_type {
@@ -728,6 +739,9 @@ typedef struct pool_scan_stat {
/* values not stored on disk */
uint64_t pss_pass_exam; /* examined bytes per scan pass */
uint64_t pss_pass_start; /* start time of a scan pass */
+ uint64_t pss_pass_scrub_pause; /* pause time of a scurb pass */
+ /* cumulative time scrub spent paused, needed for rate calculation */
+ uint64_t pss_pass_scrub_spent_paused;
} pool_scan_stat_t;
typedef enum dsl_scan_state {