summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJerry Jelinek <jerry.jelinek@joyent.com>2018-11-30 21:32:37 +0000
committerJerry Jelinek <jerry.jelinek@joyent.com>2018-11-30 21:32:37 +0000
commitb738b17750f6942c4c4e0d907d75c86465e05646 (patch)
treeb7987c63eb7cdd0f6be596e69bb528254babcaef
parent0ab0c69ec646299a62509ae4f83cd6fd04829575 (diff)
parentcab3a55e158118937e07d059c46f1bc14d1f254d (diff)
downloadillumos-joyent-b738b17750f6942c4c4e0d907d75c86465e05646.tar.gz
[illumos-gate merge]
commit cab3a55e158118937e07d059c46f1bc14d1f254d 9962 zil_commit should omit cache thrash
-rw-r--r--usr/src/cmd/ztest/ztest.c4
-rw-r--r--usr/src/uts/common/fs/zfs/dmu.c30
-rw-r--r--usr/src/uts/common/fs/zfs/sys/zil_impl.h14
-rw-r--r--usr/src/uts/common/fs/zfs/zfs_vnops.c10
-rw-r--r--usr/src/uts/common/fs/zfs/zil.c217
-rw-r--r--usr/src/uts/common/fs/zfs/zvol.c4
6 files changed, 203 insertions, 76 deletions
diff --git a/usr/src/cmd/ztest/ztest.c b/usr/src/cmd/ztest/ztest.c
index ca1c7a3cc0..46a8dd2c4e 100644
--- a/usr/src/cmd/ztest/ztest.c
+++ b/usr/src/cmd/ztest/ztest.c
@@ -1840,6 +1840,7 @@ zil_replay_func_t *ztest_replay_vector[TX_MAX_TYPE] = {
* ZIL get_data callbacks
*/
+/* ARGSUSED */
static void
ztest_get_done(zgd_t *zgd, int error)
{
@@ -1852,9 +1853,6 @@ ztest_get_done(zgd_t *zgd, int error)
ztest_range_unlock(zgd->zgd_rl);
ztest_object_unlock(zd, object);
- if (error == 0 && zgd->zgd_bp)
- zil_lwb_add_block(zgd->zgd_lwb, zgd->zgd_bp);
-
umem_free(zgd, sizeof (*zgd));
}
diff --git a/usr/src/uts/common/fs/zfs/dmu.c b/usr/src/uts/common/fs/zfs/dmu.c
index ce0c42d1ee..44157a3f62 100644
--- a/usr/src/uts/common/fs/zfs/dmu.c
+++ b/usr/src/uts/common/fs/zfs/dmu.c
@@ -1706,6 +1706,15 @@ dmu_sync_done(zio_t *zio, arc_buf_t *buf, void *varg)
dmu_sync_arg_t *dsa = varg;
dbuf_dirty_record_t *dr = dsa->dsa_dr;
dmu_buf_impl_t *db = dr->dr_dbuf;
+ zgd_t *zgd = dsa->dsa_zgd;
+
+ /*
+ * Record the vdev(s) backing this blkptr so they can be flushed after
+ * the writes for the lwb have completed.
+ */
+ if (zio->io_error == 0) {
+ zil_lwb_add_block(zgd->zgd_lwb, zgd->zgd_bp);
+ }
mutex_enter(&db->db_mtx);
ASSERT(dr->dt.dl.dr_override_state == DR_IN_DMU_SYNC);
@@ -1756,13 +1765,22 @@ dmu_sync_late_arrival_done(zio_t *zio)
blkptr_t *bp = zio->io_bp;
dmu_sync_arg_t *dsa = zio->io_private;
blkptr_t *bp_orig = &zio->io_bp_orig;
+ zgd_t *zgd = dsa->dsa_zgd;
- if (zio->io_error == 0 && !BP_IS_HOLE(bp)) {
- ASSERT(!(zio->io_flags & ZIO_FLAG_NOPWRITE));
- ASSERT(BP_IS_HOLE(bp_orig) || !BP_EQUAL(bp, bp_orig));
- ASSERT(zio->io_bp->blk_birth == zio->io_txg);
- ASSERT(zio->io_txg > spa_syncing_txg(zio->io_spa));
- zio_free(zio->io_spa, zio->io_txg, zio->io_bp);
+ if (zio->io_error == 0) {
+ /*
+ * Record the vdev(s) backing this blkptr so they can be
+ * flushed after the writes for the lwb have completed.
+ */
+ zil_lwb_add_block(zgd->zgd_lwb, zgd->zgd_bp);
+
+ if (!BP_IS_HOLE(bp)) {
+ ASSERT(!(zio->io_flags & ZIO_FLAG_NOPWRITE));
+ ASSERT(BP_IS_HOLE(bp_orig) || !BP_EQUAL(bp, bp_orig));
+ ASSERT(zio->io_bp->blk_birth == zio->io_txg);
+ ASSERT(zio->io_txg > spa_syncing_txg(zio->io_spa));
+ zio_free(zio->io_spa, zio->io_txg, zio->io_bp);
+ }
}
dmu_tx_commit(dsa->dsa_tx);
diff --git a/usr/src/uts/common/fs/zfs/sys/zil_impl.h b/usr/src/uts/common/fs/zfs/sys/zil_impl.h
index af71cab7f1..e2eb4025a6 100644
--- a/usr/src/uts/common/fs/zfs/sys/zil_impl.h
+++ b/usr/src/uts/common/fs/zfs/sys/zil_impl.h
@@ -48,10 +48,11 @@ extern "C" {
* via zil_lwb_write_issue(). Again, the zilog's "zl_issuer_lock" must
* be held when making this transition.
*
- * After the lwb's zio completes, and the vdev's are flushed, the lwb
- * will transition into the "done" state via zil_lwb_write_done(). When
- * transitioning from "issued" to "done", the zilog's "zl_lock" must be
- * held, *not* the "zl_issuer_lock".
+ * After the lwb's write zio completes, it transitions into the "write
+ * done" state via zil_lwb_write_done(); and then into the "flush done"
+ * state via zil_lwb_flush_vdevs_done(). When transitioning from
+ * "issued" to "write done", and then from "write done" to "flush done",
+ * the zilog's "zl_lock" must be held, *not* the "zl_issuer_lock".
*
* The zilog's "zl_issuer_lock" can become heavily contended in certain
* workloads, so we specifically avoid acquiring that lock when
@@ -68,13 +69,14 @@ extern "C" {
* "zl_issuer_lock" will prevent a concurrent thread from transitioning
* that lwb to the "issued" state. Likewise, if an lwb is already in the
* "issued" state, holding the "zl_lock" will prevent a concurrent
- * thread from transitioning that lwb to the "done" state.
+ * thread from transitioning that lwb to the "write done" state.
*/
typedef enum {
LWB_STATE_CLOSED,
LWB_STATE_OPENED,
LWB_STATE_ISSUED,
- LWB_STATE_DONE,
+ LWB_STATE_WRITE_DONE,
+ LWB_STATE_FLUSH_DONE,
LWB_NUM_STATES
} lwb_state_t;
diff --git a/usr/src/uts/common/fs/zfs/zfs_vnops.c b/usr/src/uts/common/fs/zfs/zfs_vnops.c
index 792dbb7bac..475020a20a 100644
--- a/usr/src/uts/common/fs/zfs/zfs_vnops.c
+++ b/usr/src/uts/common/fs/zfs/zfs_vnops.c
@@ -1017,6 +1017,7 @@ zfs_write(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct)
return (0);
}
+/* ARGSUSED */
void
zfs_get_done(zgd_t *zgd, int error)
{
@@ -1034,9 +1035,6 @@ zfs_get_done(zgd_t *zgd, int error)
*/
VN_RELE_ASYNC(ZTOV(zp), dsl_pool_vnrele_taskq(dmu_objset_pool(os)));
- if (error == 0 && zgd->zgd_bp)
- zil_lwb_add_block(zgd->zgd_lwb, zgd->zgd_bp);
-
kmem_free(zgd, sizeof (zgd_t));
}
@@ -1160,11 +1158,7 @@ zfs_get_data(void *arg, lr_write_t *lr, char *buf, struct lwb *lwb, zio_t *zio)
* TX_WRITE2 relies on the data previously
* written by the TX_WRITE that caused
* EALREADY. We zero out the BP because
- * it is the old, currently-on-disk BP,
- * so there's no need to zio_flush() its
- * vdevs (flushing would needlesly hurt
- * performance, and doesn't work on
- * indirect vdevs).
+ * it is the old, currently-on-disk BP.
*/
zgd->zgd_bp = NULL;
BP_ZERO(bp);
diff --git a/usr/src/uts/common/fs/zfs/zil.c b/usr/src/uts/common/fs/zfs/zil.c
index 07d67a0816..05c988a3bc 100644
--- a/usr/src/uts/common/fs/zfs/zil.c
+++ b/usr/src/uts/common/fs/zfs/zil.c
@@ -550,7 +550,7 @@ zil_free_lwb(zilog_t *zilog, lwb_t *lwb)
ASSERT3P(lwb->lwb_root_zio, ==, NULL);
ASSERT3U(lwb->lwb_max_txg, <=, spa_syncing_txg(zilog->zl_spa));
ASSERT(lwb->lwb_state == LWB_STATE_CLOSED ||
- lwb->lwb_state == LWB_STATE_DONE);
+ lwb->lwb_state == LWB_STATE_FLUSH_DONE);
/*
* Clear the zilog's field to indicate this lwb is no longer
@@ -962,7 +962,8 @@ zil_commit_waiter_link_lwb(zil_commit_waiter_t *zcw, lwb_t *lwb)
ASSERT3P(zcw->zcw_lwb, ==, NULL);
ASSERT3P(lwb, !=, NULL);
ASSERT(lwb->lwb_state == LWB_STATE_OPENED ||
- lwb->lwb_state == LWB_STATE_ISSUED);
+ lwb->lwb_state == LWB_STATE_ISSUED ||
+ lwb->lwb_state == LWB_STATE_WRITE_DONE);
list_insert_tail(&lwb->lwb_waiters, zcw);
zcw->zcw_lwb = lwb;
@@ -1008,6 +1009,42 @@ zil_lwb_add_block(lwb_t *lwb, const blkptr_t *bp)
mutex_exit(&lwb->lwb_vdev_lock);
}
+static void
+zil_lwb_flush_defer(lwb_t *lwb, lwb_t *nlwb)
+{
+ avl_tree_t *src = &lwb->lwb_vdev_tree;
+ avl_tree_t *dst = &nlwb->lwb_vdev_tree;
+ void *cookie = NULL;
+ zil_vdev_node_t *zv;
+
+ ASSERT3S(lwb->lwb_state, ==, LWB_STATE_WRITE_DONE);
+ ASSERT3S(nlwb->lwb_state, !=, LWB_STATE_WRITE_DONE);
+ ASSERT3S(nlwb->lwb_state, !=, LWB_STATE_FLUSH_DONE);
+
+ /*
+ * While 'lwb' is at a point in its lifetime where lwb_vdev_tree does
+ * not need the protection of lwb_vdev_lock (it will only be modified
+ * while holding zilog->zl_lock) as its writes and those of its
+ * children have all completed. The younger 'nlwb' may be waiting on
+ * future writes to additional vdevs.
+ */
+ mutex_enter(&nlwb->lwb_vdev_lock);
+ /*
+ * Tear down the 'lwb' vdev tree, ensuring that entries which do not
+ * exist in 'nlwb' are moved to it, freeing any would-be duplicates.
+ */
+ while ((zv = avl_destroy_nodes(src, &cookie)) != NULL) {
+ avl_index_t where;
+
+ if (avl_find(dst, zv, &where) == NULL) {
+ avl_insert(dst, zv, where);
+ } else {
+ kmem_free(zv, sizeof (*zv));
+ }
+ }
+ mutex_exit(&nlwb->lwb_vdev_lock);
+}
+
void
zil_lwb_add_txg(lwb_t *lwb, uint64_t txg)
{
@@ -1015,9 +1052,13 @@ zil_lwb_add_txg(lwb_t *lwb, uint64_t txg)
}
/*
- * This function is a called after all VDEVs associated with a given lwb
+ * This function is a called after all vdevs associated with a given lwb
* write have completed their DKIOCFLUSHWRITECACHE command; or as soon
- * as the lwb write completes, if "zil_nocacheflush" is set.
+ * as the lwb write completes, if "zil_nocacheflush" is set. Further,
+ * all "previous" lwb's will have completed before this function is
+ * called; i.e. this function is called for all previous lwbs before
+ * it's called for "this" lwb (enforced via zio the dependencies
+ * configured in zil_lwb_set_zio_dependency()).
*
* The intention is for this function to be called as soon as the
* contents of an lwb are considered "stable" on disk, and will survive
@@ -1054,7 +1095,9 @@ zil_lwb_flush_vdevs_done(zio_t *zio)
zilog->zl_last_lwb_latency = gethrtime() - lwb->lwb_issued_timestamp;
lwb->lwb_root_zio = NULL;
- lwb->lwb_state = LWB_STATE_DONE;
+
+ ASSERT3S(lwb->lwb_state, ==, LWB_STATE_WRITE_DONE);
+ lwb->lwb_state = LWB_STATE_FLUSH_DONE;
if (zilog->zl_last_lwb_opened == lwb) {
/*
@@ -1095,14 +1138,17 @@ zil_lwb_flush_vdevs_done(zio_t *zio)
}
/*
- * This is called when an lwb write completes. This means, this specific
- * lwb was written to disk, and all dependent lwb have also been
- * written to disk.
- *
- * At this point, a DKIOCFLUSHWRITECACHE command hasn't been issued to
- * the VDEVs involved in writing out this specific lwb. The lwb will be
- * "done" once zil_lwb_flush_vdevs_done() is called, which occurs in the
- * zio completion callback for the lwb's root zio.
+ * This is called when an lwb's write zio completes. The callback's
+ * purpose is to issue the DKIOCFLUSHWRITECACHE commands for the vdevs
+ * in the lwb's lwb_vdev_tree. The tree will contain the vdevs involved
+ * in writing out this specific lwb's data, and in the case that cache
+ * flushes have been deferred, vdevs involved in writing the data for
+ * previous lwbs. The writes corresponding to all the vdevs in the
+ * lwb_vdev_tree will have completed by the time this is called, due to
+ * the zio dependencies configured in zil_lwb_set_zio_dependency(),
+ * which takes deferred flushes into account. The lwb will be "done"
+ * once zil_lwb_flush_vdevs_done() is called, which occurs in the zio
+ * completion callback for the lwb's root zio.
*/
static void
zil_lwb_write_done(zio_t *zio)
@@ -1113,6 +1159,7 @@ zil_lwb_write_done(zio_t *zio)
avl_tree_t *t = &lwb->lwb_vdev_tree;
void *cookie = NULL;
zil_vdev_node_t *zv;
+ lwb_t *nlwb;
ASSERT3S(spa_config_held(spa, SCL_STATE, RW_READER), !=, 0);
@@ -1126,10 +1173,11 @@ zil_lwb_write_done(zio_t *zio)
abd_put(zio->io_abd);
- ASSERT3S(lwb->lwb_state, ==, LWB_STATE_ISSUED);
-
mutex_enter(&zilog->zl_lock);
+ ASSERT3S(lwb->lwb_state, ==, LWB_STATE_ISSUED);
+ lwb->lwb_state = LWB_STATE_WRITE_DONE;
lwb->lwb_write_zio = NULL;
+ nlwb = list_next(&zilog->zl_lwb_list, lwb);
mutex_exit(&zilog->zl_lock);
if (avl_numnodes(t) == 0)
@@ -1148,6 +1196,27 @@ zil_lwb_write_done(zio_t *zio)
return;
}
+ /*
+ * If this lwb does not have any threads waiting for it to
+ * complete, we want to defer issuing the DKIOCFLUSHWRITECACHE
+ * command to the vdevs written to by "this" lwb, and instead
+ * rely on the "next" lwb to handle the DKIOCFLUSHWRITECACHE
+ * command for those vdevs. Thus, we merge the vdev tree of
+ * "this" lwb with the vdev tree of the "next" lwb in the list,
+ * and assume the "next" lwb will handle flushing the vdevs (or
+ * deferring the flush(s) again).
+ *
+ * This is a useful performance optimization, especially for
+ * workloads with lots of async write activity and few sync
+ * write and/or fsync activity, as it has the potential to
+ * coalesce multiple flush commands to a vdev into one.
+ */
+ if (list_head(&lwb->lwb_waiters) == NULL && nlwb != NULL) {
+ zil_lwb_flush_defer(lwb, nlwb);
+ ASSERT(avl_is_empty(&lwb->lwb_vdev_tree));
+ return;
+ }
+
while ((zv = avl_destroy_nodes(t, &cookie)) != NULL) {
vdev_t *vd = vdev_lookup_top(spa, zv->zv_vdev);
if (vd != NULL)
@@ -1156,6 +1225,73 @@ zil_lwb_write_done(zio_t *zio)
}
}
+static void
+zil_lwb_set_zio_dependency(zilog_t *zilog, lwb_t *lwb)
+{
+ lwb_t *last_lwb_opened = zilog->zl_last_lwb_opened;
+
+ ASSERT(MUTEX_HELD(&zilog->zl_issuer_lock));
+ ASSERT(MUTEX_HELD(&zilog->zl_lock));
+
+ /*
+ * The zilog's "zl_last_lwb_opened" field is used to build the
+ * lwb/zio dependency chain, which is used to preserve the
+ * ordering of lwb completions that is required by the semantics
+ * of the ZIL. Each new lwb zio becomes a parent of the
+ * "previous" lwb zio, such that the new lwb's zio cannot
+ * complete until the "previous" lwb's zio completes.
+ *
+ * This is required by the semantics of zil_commit(); the commit
+ * waiters attached to the lwbs will be woken in the lwb zio's
+ * completion callback, so this zio dependency graph ensures the
+ * waiters are woken in the correct order (the same order the
+ * lwbs were created).
+ */
+ if (last_lwb_opened != NULL &&
+ last_lwb_opened->lwb_state != LWB_STATE_FLUSH_DONE) {
+ ASSERT(last_lwb_opened->lwb_state == LWB_STATE_OPENED ||
+ last_lwb_opened->lwb_state == LWB_STATE_ISSUED ||
+ last_lwb_opened->lwb_state == LWB_STATE_WRITE_DONE);
+
+ ASSERT3P(last_lwb_opened->lwb_root_zio, !=, NULL);
+ zio_add_child(lwb->lwb_root_zio,
+ last_lwb_opened->lwb_root_zio);
+
+ /*
+ * If the previous lwb's write hasn't already completed,
+ * we also want to order the completion of the lwb write
+ * zios (above, we only order the completion of the lwb
+ * root zios). This is required because of how we can
+ * defer the DKIOCFLUSHWRITECACHE commands for each lwb.
+ *
+ * When the DKIOCFLUSHWRITECACHE commands are defered,
+ * the previous lwb will rely on this lwb to flush the
+ * vdevs written to by that previous lwb. Thus, we need
+ * to ensure this lwb doesn't issue the flush until
+ * after the previous lwb's write completes. We ensure
+ * this ordering by setting the zio parent/child
+ * relationship here.
+ *
+ * Without this relationship on the lwb's write zio,
+ * it's possible for this lwb's write to complete prior
+ * to the previous lwb's write completing; and thus, the
+ * vdevs for the previous lwb would be flushed prior to
+ * that lwb's data being written to those vdevs (the
+ * vdevs are flushed in the lwb write zio's completion
+ * handler, zil_lwb_write_done()).
+ */
+ if (last_lwb_opened->lwb_state != LWB_STATE_WRITE_DONE) {
+ ASSERT(last_lwb_opened->lwb_state == LWB_STATE_OPENED ||
+ last_lwb_opened->lwb_state == LWB_STATE_ISSUED);
+
+ ASSERT3P(last_lwb_opened->lwb_write_zio, !=, NULL);
+ zio_add_child(lwb->lwb_write_zio,
+ last_lwb_opened->lwb_write_zio);
+ }
+ }
+}
+
+
/*
* This function's purpose is to "open" an lwb such that it is ready to
* accept new itxs being committed to it. To do this, the lwb's zio
@@ -1200,33 +1336,8 @@ zil_lwb_write_open(zilog_t *zilog, lwb_t *lwb)
lwb->lwb_state = LWB_STATE_OPENED;
mutex_enter(&zilog->zl_lock);
-
- /*
- * The zilog's "zl_last_lwb_opened" field is used to
- * build the lwb/zio dependency chain, which is used to
- * preserve the ordering of lwb completions that is
- * required by the semantics of the ZIL. Each new lwb
- * zio becomes a parent of the "previous" lwb zio, such
- * that the new lwb's zio cannot complete until the
- * "previous" lwb's zio completes.
- *
- * This is required by the semantics of zil_commit();
- * the commit waiters attached to the lwbs will be woken
- * in the lwb zio's completion callback, so this zio
- * dependency graph ensures the waiters are woken in the
- * correct order (the same order the lwbs were created).
- */
- lwb_t *last_lwb_opened = zilog->zl_last_lwb_opened;
- if (last_lwb_opened != NULL &&
- last_lwb_opened->lwb_state != LWB_STATE_DONE) {
- ASSERT(last_lwb_opened->lwb_state == LWB_STATE_OPENED ||
- last_lwb_opened->lwb_state == LWB_STATE_ISSUED);
- ASSERT3P(last_lwb_opened->lwb_root_zio, !=, NULL);
- zio_add_child(lwb->lwb_root_zio,
- last_lwb_opened->lwb_root_zio);
- }
+ zil_lwb_set_zio_dependency(zilog, lwb);
zilog->zl_last_lwb_opened = lwb;
-
mutex_exit(&zilog->zl_lock);
}
@@ -1927,7 +2038,8 @@ zil_prune_commit_list(zilog_t *zilog)
mutex_enter(&zilog->zl_lock);
lwb_t *last_lwb = zilog->zl_last_lwb_opened;
- if (last_lwb == NULL || last_lwb->lwb_state == LWB_STATE_DONE) {
+ if (last_lwb == NULL ||
+ last_lwb->lwb_state == LWB_STATE_FLUSH_DONE) {
/*
* All of the itxs this waiter was waiting on
* must have already completed (or there were
@@ -2008,7 +2120,8 @@ zil_process_commit_list(zilog_t *zilog)
lwb = zil_create(zilog);
} else {
ASSERT3S(lwb->lwb_state, !=, LWB_STATE_ISSUED);
- ASSERT3S(lwb->lwb_state, !=, LWB_STATE_DONE);
+ ASSERT3S(lwb->lwb_state, !=, LWB_STATE_WRITE_DONE);
+ ASSERT3S(lwb->lwb_state, !=, LWB_STATE_FLUSH_DONE);
}
while (itx = list_head(&zilog->zl_itx_commit_list)) {
@@ -2110,7 +2223,8 @@ zil_process_commit_list(zilog_t *zilog)
ASSERT(list_is_empty(&nolwb_waiters));
ASSERT3P(lwb, !=, NULL);
ASSERT3S(lwb->lwb_state, !=, LWB_STATE_ISSUED);
- ASSERT3S(lwb->lwb_state, !=, LWB_STATE_DONE);
+ ASSERT3S(lwb->lwb_state, !=, LWB_STATE_WRITE_DONE);
+ ASSERT3S(lwb->lwb_state, !=, LWB_STATE_FLUSH_DONE);
/*
* At this point, the ZIL block pointed at by the "lwb"
@@ -2231,7 +2345,8 @@ zil_commit_waiter_timeout(zilog_t *zilog, zil_commit_waiter_t *zcw)
* acquiring it when it's not necessary to do so.
*/
if (lwb->lwb_state == LWB_STATE_ISSUED ||
- lwb->lwb_state == LWB_STATE_DONE)
+ lwb->lwb_state == LWB_STATE_WRITE_DONE ||
+ lwb->lwb_state == LWB_STATE_FLUSH_DONE)
return;
/*
@@ -2279,7 +2394,8 @@ zil_commit_waiter_timeout(zilog_t *zilog, zil_commit_waiter_t *zcw)
* more details on the lwb states, and locking requirements.
*/
if (lwb->lwb_state == LWB_STATE_ISSUED ||
- lwb->lwb_state == LWB_STATE_DONE)
+ lwb->lwb_state == LWB_STATE_WRITE_DONE ||
+ lwb->lwb_state == LWB_STATE_FLUSH_DONE)
goto out;
ASSERT3S(lwb->lwb_state, ==, LWB_STATE_OPENED);
@@ -2452,7 +2568,8 @@ zil_commit_waiter(zilog_t *zilog, zil_commit_waiter_t *zcw)
IMPLY(lwb != NULL,
lwb->lwb_state == LWB_STATE_ISSUED ||
- lwb->lwb_state == LWB_STATE_DONE);
+ lwb->lwb_state == LWB_STATE_WRITE_DONE ||
+ lwb->lwb_state == LWB_STATE_FLUSH_DONE);
cv_wait(&zcw->zcw_cv, &zcw->zcw_lock);
}
}
@@ -3094,13 +3211,13 @@ zil_suspend(const char *osname, void **cookiep)
* to disk before proceeding. If we used zil_commit instead, it
* would just call txg_wait_synced(), because zl_suspend is set.
* txg_wait_synced() doesn't wait for these lwb's to be
- * LWB_STATE_DONE before returning.
+ * LWB_STATE_FLUSH_DONE before returning.
*/
zil_commit_impl(zilog, 0);
/*
- * Now that we've ensured all lwb's are LWB_STATE_DONE, we use
- * txg_wait_synced() to ensure the data from the zilog has
+ * Now that we've ensured all lwb's are LWB_STATE_FLUSH_DONE, we
+ * use txg_wait_synced() to ensure the data from the zilog has
* migrated to the main pool before calling zil_destroy().
*/
txg_wait_synced(zilog->zl_dmu_pool, 0);
diff --git a/usr/src/uts/common/fs/zfs/zvol.c b/usr/src/uts/common/fs/zfs/zvol.c
index b58cf54a17..1e787f31b2 100644
--- a/usr/src/uts/common/fs/zfs/zvol.c
+++ b/usr/src/uts/common/fs/zfs/zvol.c
@@ -980,6 +980,7 @@ zvol_close(dev_t dev, int flag, int otyp, cred_t *cr)
return (error);
}
+/* ARGSUSED */
static void
zvol_get_done(zgd_t *zgd, int error)
{
@@ -988,9 +989,6 @@ zvol_get_done(zgd_t *zgd, int error)
zfs_range_unlock(zgd->zgd_rl);
- if (error == 0 && zgd->zgd_bp)
- zil_lwb_add_block(zgd->zgd_lwb, zgd->zgd_bp);
-
kmem_free(zgd, sizeof (zgd_t));
}