summaryrefslogtreecommitdiff
path: root/usr/src/uts/common/fs/zfs/dmu.c
diff options
context:
space:
mode:
authorMatthew Ahrens <mahrens@delphix.com>2017-04-14 12:59:18 -0700
committerPrakash Surya <prakash.surya@delphix.com>2017-06-29 16:10:41 -0700
commitb7edcb940884114e61382937505433c4c38c0278 (patch)
treebe15a04ce63e756567ba3c15bafcece9f0a0810a /usr/src/uts/common/fs/zfs/dmu.c
parent16a7e5ac116c85d965007a5f201104b564e82210 (diff)
downloadillumos-gate-b7edcb940884114e61382937505433c4c38c0278.tar.gz
8378 crash due to bp in-memory modification of nopwrite block
Reviewed by: Prakash Surya <prakash.surya@delphix.com> Reviewed by: George Wilson <george.wilson@delphix.com> Approved by: Robert Mustacchi <rm@joyent.com>
Diffstat (limited to 'usr/src/uts/common/fs/zfs/dmu.c')
-rw-r--r--usr/src/uts/common/fs/zfs/dmu.c60
1 files changed, 45 insertions, 15 deletions
diff --git a/usr/src/uts/common/fs/zfs/dmu.c b/usr/src/uts/common/fs/zfs/dmu.c
index 5fafb95c79..1d4d921c83 100644
--- a/usr/src/uts/common/fs/zfs/dmu.c
+++ b/usr/src/uts/common/fs/zfs/dmu.c
@@ -1574,6 +1574,7 @@ dmu_sync_done(zio_t *zio, arc_buf_t *buf, void *varg)
uint8_t chksum = BP_GET_CHECKSUM(bp_orig);
ASSERT(BP_EQUAL(bp, bp_orig));
+ VERIFY(BP_EQUAL(bp, db->db_blkptr));
ASSERT(zio->io_prop.zp_compress != ZIO_COMPRESS_OFF);
ASSERT(zio_checksum_table[chksum].ci_flags &
ZCHECKSUM_FLAG_NOPWRITE);
@@ -1614,19 +1615,11 @@ dmu_sync_late_arrival_done(zio_t *zio)
blkptr_t *bp_orig = &zio->io_bp_orig;
if (zio->io_error == 0 && !BP_IS_HOLE(bp)) {
- /*
- * If we didn't allocate a new block (i.e. ZIO_FLAG_NOPWRITE)
- * then there is nothing to do here. Otherwise, free the
- * newly allocated block in this txg.
- */
- if (zio->io_flags & ZIO_FLAG_NOPWRITE) {
- ASSERT(BP_EQUAL(bp, bp_orig));
- } else {
- ASSERT(BP_IS_HOLE(bp_orig) || !BP_EQUAL(bp, bp_orig));
- ASSERT(zio->io_bp->blk_birth == zio->io_txg);
- ASSERT(zio->io_txg > spa_syncing_txg(zio->io_spa));
- zio_free(zio->io_spa, zio->io_txg, zio->io_bp);
- }
+ ASSERT(!(zio->io_flags & ZIO_FLAG_NOPWRITE));
+ ASSERT(BP_IS_HOLE(bp_orig) || !BP_EQUAL(bp, bp_orig));
+ ASSERT(zio->io_bp->blk_birth == zio->io_txg);
+ ASSERT(zio->io_txg > spa_syncing_txg(zio->io_spa));
+ zio_free(zio->io_spa, zio->io_txg, zio->io_bp);
}
dmu_tx_commit(dsa->dsa_tx);
@@ -1658,6 +1651,29 @@ dmu_sync_late_arrival(zio_t *pio, objset_t *os, dmu_sync_cb_t *done, zgd_t *zgd,
dsa->dsa_zgd = zgd;
dsa->dsa_tx = tx;
+ /*
+ * Since we are currently syncing this txg, it's nontrivial to
+ * determine what BP to nopwrite against, so we disable nopwrite.
+ *
+ * When syncing, the db_blkptr is initially the BP of the previous
+ * txg. We can not nopwrite against it because it will be changed
+ * (this is similar to the non-late-arrival case where the dbuf is
+ * dirty in a future txg).
+ *
+ * Then dbuf_write_ready() sets bp_blkptr to the location we will write.
+ * We can not nopwrite against it because although the BP will not
+ * (typically) be changed, the data has not yet been persisted to this
+ * location.
+ *
+ * Finally, when dbuf_write_done() is called, it is theoretically
+ * possible to always nopwrite, because the data that was written in
+ * this txg is the same data that we are trying to write. However we
+ * would need to check that this dbuf is not dirty in any future
+ * txg's (as we do in the normal dmu_sync() path). For simplicity, we
+ * don't nopwrite in this case.
+ */
+ zp->zp_nopwrite = B_FALSE;
+
zio_nowait(zio_write(pio, os->os_spa, dmu_tx_get_txg(tx), zgd->zgd_bp,
abd_get_from_buf(zgd->zgd_db->db_data, zgd->zgd_db->db_size),
zgd->zgd_db->db_size, zgd->zgd_db->db_size, zp,
@@ -1695,7 +1711,6 @@ dmu_sync_late_arrival(zio_t *pio, objset_t *os, dmu_sync_cb_t *done, zgd_t *zgd,
int
dmu_sync(zio_t *pio, uint64_t txg, dmu_sync_cb_t *done, zgd_t *zgd)
{
- blkptr_t *bp = zgd->zgd_bp;
dmu_buf_impl_t *db = (dmu_buf_impl_t *)zgd->zgd_db;
objset_t *os = db->db_objset;
dsl_dataset_t *ds = os->os_dsl_dataset;
@@ -1762,6 +1777,21 @@ dmu_sync(zio_t *pio, uint64_t txg, dmu_sync_cb_t *done, zgd_t *zgd)
ASSERT(dr->dr_next == NULL || dr->dr_next->dr_txg < txg);
+ if (db->db_blkptr != NULL) {
+ /*
+ * We need to fill in zgd_bp with the current blkptr so that
+ * the nopwrite code can check if we're writing the same
+ * data that's already on disk. We can only nopwrite if we
+ * are sure that after making the copy, db_blkptr will not
+ * change until our i/o completes. We ensure this by
+ * holding the db_mtx, and only allowing nopwrite if the
+ * block is not already dirty (see below). This is verified
+ * by dmu_sync_done(), which VERIFYs that the db_blkptr has
+ * not changed.
+ */
+ *zgd->zgd_bp = *db->db_blkptr;
+ }
+
/*
* Assume the on-disk data is X, the current syncing data (in
* txg - 1) is Y, and the current in-memory data is Z (currently
@@ -1813,7 +1843,7 @@ dmu_sync(zio_t *pio, uint64_t txg, dmu_sync_cb_t *done, zgd_t *zgd)
dsa->dsa_tx = NULL;
zio_nowait(arc_write(pio, os->os_spa, txg,
- bp, dr->dt.dl.dr_data, DBUF_IS_L2CACHEABLE(db),
+ zgd->zgd_bp, dr->dt.dl.dr_data, DBUF_IS_L2CACHEABLE(db),
&zp, dmu_sync_ready, NULL, NULL, dmu_sync_done, dsa,
ZIO_PRIORITY_SYNC_WRITE, ZIO_FLAG_CANFAIL, &zb));