summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSerapheim Dimitropoulos <serapheim@delphix.com>2017-12-05 09:45:46 -0800
committerPrakash Surya <prakash.surya@delphix.com>2018-05-17 06:50:48 -0700
commitfa41d87de9ec9000964c605eb01d6dc19e4a1abe (patch)
tree705158eafbc22e72a6aa75267d9c313647a2c989
parent221813c13b43ef48330b03725e00edee85108cf1 (diff)
downloadillumos-joyent-fa41d87de9ec9000964c605eb01d6dc19e4a1abe.tar.gz
9464 txg_kick() fails to see that we are quiescing, forcing transactions to their next stages without leaving them accumulate changes
Reviewed by: Matt Ahrens <matt@delphix.com> Reviewed by: Brad Lewis <brad.lewis@delphix.com> Reviewed by: Andriy Gapon <avg@FreeBSD.org> Approved by: Dan McDonald <danmcd@joyent.com>
-rw-r--r--usr/src/uts/common/fs/zfs/dmu_tx.c7
-rw-r--r--usr/src/uts/common/fs/zfs/sys/txg_impl.h3
-rw-r--r--usr/src/uts/common/fs/zfs/txg.c37
3 files changed, 41 insertions, 6 deletions
diff --git a/usr/src/uts/common/fs/zfs/dmu_tx.c b/usr/src/uts/common/fs/zfs/dmu_tx.c
index e39e5c4f42..557f7f2e81 100644
--- a/usr/src/uts/common/fs/zfs/dmu_tx.c
+++ b/usr/src/uts/common/fs/zfs/dmu_tx.c
@@ -1085,7 +1085,12 @@ dmu_tx_wait(dmu_tx_t *tx)
mutex_exit(&dn->dn_mtx);
tx->tx_needassign_txh = NULL;
} else {
- txg_wait_open(tx->tx_pool, tx->tx_lasttried_txg + 1);
+ /*
+ * If we have a lot of dirty data just wait until we sync
+ * out a TXG at which point we'll hopefully have synced
+ * a portion of the changes.
+ */
+ txg_wait_synced(dp, spa_last_synced_txg(spa) + 1);
}
}
diff --git a/usr/src/uts/common/fs/zfs/sys/txg_impl.h b/usr/src/uts/common/fs/zfs/sys/txg_impl.h
index e583d61eac..bf3b269d70 100644
--- a/usr/src/uts/common/fs/zfs/sys/txg_impl.h
+++ b/usr/src/uts/common/fs/zfs/sys/txg_impl.h
@@ -25,7 +25,7 @@
*/
/*
- * Copyright (c) 2013 by Delphix. All rights reserved.
+ * Copyright (c) 2013, 2017 by Delphix. All rights reserved.
*/
#ifndef _SYS_TXG_IMPL_H
@@ -92,6 +92,7 @@ typedef struct tx_state {
kmutex_t tx_sync_lock; /* protects the rest of this struct */
uint64_t tx_open_txg; /* currently open txg id */
+ uint64_t tx_quiescing_txg; /* currently quiescing txg id */
uint64_t tx_quiesced_txg; /* quiesced txg waiting for sync */
uint64_t tx_syncing_txg; /* currently syncing txg id */
uint64_t tx_synced_txg; /* last synced txg id */
diff --git a/usr/src/uts/common/fs/zfs/txg.c b/usr/src/uts/common/fs/zfs/txg.c
index 06e7536962..743b914a20 100644
--- a/usr/src/uts/common/fs/zfs/txg.c
+++ b/usr/src/uts/common/fs/zfs/txg.c
@@ -444,6 +444,30 @@ txg_dispatch_callbacks(dsl_pool_t *dp, uint64_t txg)
}
}
+static boolean_t
+txg_is_syncing(dsl_pool_t *dp)
+{
+ tx_state_t *tx = &dp->dp_tx;
+ ASSERT(MUTEX_HELD(&tx->tx_sync_lock));
+ return (tx->tx_syncing_txg != 0);
+}
+
+static boolean_t
+txg_is_quiescing(dsl_pool_t *dp)
+{
+ tx_state_t *tx = &dp->dp_tx;
+ ASSERT(MUTEX_HELD(&tx->tx_sync_lock));
+ return (tx->tx_quiescing_txg != 0);
+}
+
+static boolean_t
+txg_has_quiesced_to_sync(dsl_pool_t *dp)
+{
+ tx_state_t *tx = &dp->dp_tx;
+ ASSERT(MUTEX_HELD(&tx->tx_sync_lock));
+ return (tx->tx_quiesced_txg != 0);
+}
+
static void
txg_sync_thread(void *arg)
{
@@ -470,7 +494,7 @@ txg_sync_thread(void *arg)
while (!dsl_scan_active(dp->dp_scan) &&
!tx->tx_exiting && timer > 0 &&
tx->tx_synced_txg >= tx->tx_sync_txg_waiting &&
- tx->tx_quiesced_txg == 0 &&
+ !txg_has_quiesced_to_sync(dp) &&
dp->dp_dirty_total < zfs_dirty_data_sync) {
dprintf("waiting; tx_synced=%llu waiting=%llu dp=%p\n",
tx->tx_synced_txg, tx->tx_sync_txg_waiting, dp);
@@ -483,7 +507,7 @@ txg_sync_thread(void *arg)
* Wait until the quiesce thread hands off a txg to us,
* prompting it to do so if necessary.
*/
- while (!tx->tx_exiting && tx->tx_quiesced_txg == 0) {
+ while (!tx->tx_exiting && !txg_has_quiesced_to_sync(dp)) {
if (tx->tx_quiesce_txg_waiting < tx->tx_open_txg+1)
tx->tx_quiesce_txg_waiting = tx->tx_open_txg+1;
cv_broadcast(&tx->tx_quiesce_more_cv);
@@ -498,6 +522,7 @@ txg_sync_thread(void *arg)
* us. This may cause the quiescing thread to now be
* able to quiesce another txg, so we must signal it.
*/
+ ASSERT(tx->tx_quiesced_txg != 0);
txg = tx->tx_quiesced_txg;
tx->tx_quiesced_txg = 0;
tx->tx_syncing_txg = txg;
@@ -546,7 +571,7 @@ txg_quiesce_thread(void *arg)
*/
while (!tx->tx_exiting &&
(tx->tx_open_txg >= tx->tx_quiesce_txg_waiting ||
- tx->tx_quiesced_txg != 0))
+ txg_has_quiesced_to_sync(dp)))
txg_thread_wait(tx, &cpr, &tx->tx_quiesce_more_cv, 0);
if (tx->tx_exiting)
@@ -556,6 +581,8 @@ txg_quiesce_thread(void *arg)
dprintf("txg=%llu quiesce_txg=%llu sync_txg=%llu\n",
txg, tx->tx_quiesce_txg_waiting,
tx->tx_sync_txg_waiting);
+ tx->tx_quiescing_txg = txg;
+
mutex_exit(&tx->tx_sync_lock);
txg_quiesce(dp, txg);
mutex_enter(&tx->tx_sync_lock);
@@ -564,6 +591,7 @@ txg_quiesce_thread(void *arg)
* Hand this txg off to the sync thread.
*/
dprintf("quiesce done, handing off txg %llu\n", txg);
+ tx->tx_quiescing_txg = 0;
tx->tx_quiesced_txg = txg;
DTRACE_PROBE2(txg__quiesced, dsl_pool_t *, dp, uint64_t, txg);
cv_broadcast(&tx->tx_sync_more_cv);
@@ -661,7 +689,8 @@ txg_kick(dsl_pool_t *dp)
ASSERT(!dsl_pool_config_held(dp));
mutex_enter(&tx->tx_sync_lock);
- if (tx->tx_syncing_txg == 0 &&
+ if (!txg_is_syncing(dp) &&
+ !txg_is_quiescing(dp) &&
tx->tx_quiesce_txg_waiting <= tx->tx_open_txg &&
tx->tx_sync_txg_waiting <= tx->tx_synced_txg &&
tx->tx_quiesced_txg <= tx->tx_synced_txg) {