summaryrefslogtreecommitdiff
path: root/usr/src/uts/common/fs/zfs/dmu_send.c
diff options
context:
space:
mode:
Diffstat (limited to 'usr/src/uts/common/fs/zfs/dmu_send.c')
-rw-r--r--usr/src/uts/common/fs/zfs/dmu_send.c471
1 files changed, 378 insertions, 93 deletions
diff --git a/usr/src/uts/common/fs/zfs/dmu_send.c b/usr/src/uts/common/fs/zfs/dmu_send.c
index 6d65086079..bfc0a6f585 100644
--- a/usr/src/uts/common/fs/zfs/dmu_send.c
+++ b/usr/src/uts/common/fs/zfs/dmu_send.c
@@ -61,6 +61,8 @@ int zfs_send_corrupt_data = B_FALSE;
int zfs_send_queue_length = 16 * 1024 * 1024;
/* Set this tunable to FALSE to disable setting of DRR_FLAG_FREERECORDS */
int zfs_send_set_freerecords_bit = B_TRUE;
+/* Set this tunable to FALSE is disable sending unmodified spill blocks. */
+int zfs_send_unmodified_spill_blocks = B_TRUE;
/*
* Use this to override the recordsize calculation for fast zfs send estimates.
@@ -90,6 +92,8 @@ struct send_block_record {
bqueue_node_t ln;
};
+static int do_dump(dmu_sendarg_t *dsa, struct send_block_record *data);
+
static int
dump_bytes(dmu_sendarg_t *dsp, void *buf, int len)
{
@@ -97,18 +101,17 @@ dump_bytes(dmu_sendarg_t *dsp, void *buf, int len)
ssize_t resid; /* have to get resid to get detailed errno */
/*
- * The code does not rely on this (len being a multiple of 8). We keep
+ * The code does not rely on len being a multiple of 8. We keep
* this assertion because of the corresponding assertion in
* receive_read(). Keeping this assertion ensures that we do not
* inadvertently break backwards compatibility (causing the assertion
- * in receive_read() to trigger on old software).
- *
- * Removing the assertions could be rolled into a new feature that uses
- * data that isn't 8-byte aligned; if the assertions were removed, a
- * feature flag would have to be added.
+ * in receive_read() to trigger on old software). Newer feature flags
+ * (such as raw send) may break this assertion since they were
+ * introduced after the requirement was made obsolete.
*/
- ASSERT0(len % 8);
+ ASSERT(len % 8 == 0 ||
+ (dsp->dsa_featureflags & DMU_BACKUP_FEATURE_RAW) != 0);
dsp->dsa_err = vn_rdwr(UIO_WRITE, dsp->dsa_vp,
(caddr_t)buf, len,
@@ -189,9 +192,6 @@ dump_free(dmu_sendarg_t *dsp, uint64_t object, uint64_t offset,
(object == dsp->dsa_last_data_object &&
offset > dsp->dsa_last_data_offset));
- if (length != -1ULL && offset + length < offset)
- length = -1ULL;
-
/*
* If there is a pending op, but it's not PENDING_FREE, push it out,
* since free block aggregation can only be done for blocks of the
@@ -208,19 +208,22 @@ dump_free(dmu_sendarg_t *dsp, uint64_t object, uint64_t offset,
if (dsp->dsa_pending_op == PENDING_FREE) {
/*
- * There should never be a PENDING_FREE if length is -1
- * (because dump_dnode is the only place where this
- * function is called with a -1, and only after flushing
- * any pending record).
+ * There should never be a PENDING_FREE if length is
+ * DMU_OBJECT_END (because dump_dnode is the only place where
+ * this function is called with a DMU_OBJECT_END, and only after
+ * flushing any pending record).
*/
- ASSERT(length != -1ULL);
+ ASSERT(length != DMU_OBJECT_END);
/*
* Check to see whether this free block can be aggregated
* with pending one.
*/
if (drrf->drr_object == object && drrf->drr_offset +
drrf->drr_length == offset) {
- drrf->drr_length += length;
+ if (offset + length < offset)
+ drrf->drr_length = DMU_OBJECT_END;
+ else
+ drrf->drr_length += length;
return (0);
} else {
/* not a continuation. Push out pending record */
@@ -234,9 +237,12 @@ dump_free(dmu_sendarg_t *dsp, uint64_t object, uint64_t offset,
dsp->dsa_drr->drr_type = DRR_FREE;
drrf->drr_object = object;
drrf->drr_offset = offset;
- drrf->drr_length = length;
+ if (offset + length < offset)
+ drrf->drr_length = DMU_OBJECT_END;
+ else
+ drrf->drr_length = length;
drrf->drr_toguid = dsp->dsa_toguid;
- if (length == -1ULL) {
+ if (length == DMU_OBJECT_END) {
if (dump_record(dsp, NULL, 0) != 0)
return (SET_ERROR(EINTR));
} else {
@@ -247,11 +253,11 @@ dump_free(dmu_sendarg_t *dsp, uint64_t object, uint64_t offset,
}
static int
-dump_write(dmu_sendarg_t *dsp, dmu_object_type_t type,
- uint64_t object, uint64_t offset, int lsize, int psize, const blkptr_t *bp,
- void *data)
+dump_write(dmu_sendarg_t *dsp, dmu_object_type_t type, uint64_t object,
+ uint64_t offset, int lsize, int psize, const blkptr_t *bp, void *data)
{
uint64_t payload_size;
+ boolean_t raw = (dsp->dsa_featureflags & DMU_BACKUP_FEATURE_RAW);
struct drr_write *drrw = &(dsp->dsa_drr->drr_u.drr_write);
/*
@@ -284,16 +290,36 @@ dump_write(dmu_sendarg_t *dsp, dmu_object_type_t type,
drrw->drr_toguid = dsp->dsa_toguid;
drrw->drr_logical_size = lsize;
- /* only set the compression fields if the buf is compressed */
- if (lsize != psize) {
- ASSERT(dsp->dsa_featureflags & DMU_BACKUP_FEATURE_COMPRESSED);
+ /* only set the compression fields if the buf is compressed or raw */
+ if (raw || lsize != psize) {
ASSERT(!BP_IS_EMBEDDED(bp));
- ASSERT(!BP_SHOULD_BYTESWAP(bp));
- ASSERT(!DMU_OT_IS_METADATA(BP_GET_TYPE(bp)));
- ASSERT3U(BP_GET_COMPRESS(bp), !=, ZIO_COMPRESS_OFF);
ASSERT3S(psize, >, 0);
- ASSERT3S(lsize, >=, psize);
+ if (raw) {
+ ASSERT(BP_IS_PROTECTED(bp));
+
+ /*
+ * This is a raw protected block so we need to pass
+ * along everything the receiving side will need to
+ * interpret this block, including the byteswap, salt,
+ * IV, and MAC.
+ */
+ if (BP_SHOULD_BYTESWAP(bp))
+ drrw->drr_flags |= DRR_RAW_BYTESWAP;
+ zio_crypt_decode_params_bp(bp, drrw->drr_salt,
+ drrw->drr_iv);
+ zio_crypt_decode_mac_bp(bp, drrw->drr_mac);
+ } else {
+ /* this is a compressed block */
+ ASSERT(dsp->dsa_featureflags &
+ DMU_BACKUP_FEATURE_COMPRESSED);
+ ASSERT(!BP_SHOULD_BYTESWAP(bp));
+ ASSERT(!DMU_OT_IS_METADATA(BP_GET_TYPE(bp)));
+ ASSERT3U(BP_GET_COMPRESS(bp), !=, ZIO_COMPRESS_OFF);
+ ASSERT3S(lsize, >=, psize);
+ }
+
+ /* set fields common to compressed and raw sends */
drrw->drr_compressiontype = BP_GET_COMPRESS(bp);
drrw->drr_compressed_size = psize;
payload_size = drrw->drr_compressed_size;
@@ -301,22 +327,23 @@ dump_write(dmu_sendarg_t *dsp, dmu_object_type_t type,
payload_size = drrw->drr_logical_size;
}
- if (bp == NULL || BP_IS_EMBEDDED(bp)) {
+ if (bp == NULL || BP_IS_EMBEDDED(bp) || (BP_IS_PROTECTED(bp) && !raw)) {
/*
- * There's no pre-computed checksum for partial-block
- * writes or embedded BP's, so (like
- * fletcher4-checkummed blocks) userland will have to
- * compute a dedup-capable checksum itself.
+ * There's no pre-computed checksum for partial-block writes,
+ * embedded BP's, or encrypted BP's that are being sent as
+ * plaintext, so (like fletcher4-checkummed blocks) userland
+ * will have to compute a dedup-capable checksum itself.
*/
drrw->drr_checksumtype = ZIO_CHECKSUM_OFF;
} else {
drrw->drr_checksumtype = BP_GET_CHECKSUM(bp);
if (zio_checksum_table[drrw->drr_checksumtype].ci_flags &
ZCHECKSUM_FLAG_DEDUP)
- drrw->drr_checksumflags |= DRR_CHECKSUM_DEDUP;
+ drrw->drr_flags |= DRR_CHECKSUM_DEDUP;
DDK_SET_LSIZE(&drrw->drr_key, BP_GET_LSIZE(bp));
DDK_SET_PSIZE(&drrw->drr_key, BP_GET_PSIZE(bp));
DDK_SET_COMPRESS(&drrw->drr_key, BP_GET_COMPRESS(bp));
+ DDK_SET_CRYPT(&drrw->drr_key, BP_IS_PROTECTED(bp));
drrw->drr_key.ddk_cksum = bp->blk_cksum;
}
@@ -360,9 +387,11 @@ dump_write_embedded(dmu_sendarg_t *dsp, uint64_t object, uint64_t offset,
}
static int
-dump_spill(dmu_sendarg_t *dsp, uint64_t object, int blksz, void *data)
+dump_spill(dmu_sendarg_t *dsp, const blkptr_t *bp, uint64_t object, void *data)
{
struct drr_spill *drrs = &(dsp->dsa_drr->drr_u.drr_spill);
+ uint64_t blksz = BP_GET_LSIZE(bp);
+ uint64_t payload_size = blksz;
if (dsp->dsa_pending_op != PENDING_NONE) {
if (dump_record(dsp, NULL, 0) != 0)
@@ -377,7 +406,26 @@ dump_spill(dmu_sendarg_t *dsp, uint64_t object, int blksz, void *data)
drrs->drr_length = blksz;
drrs->drr_toguid = dsp->dsa_toguid;
- if (dump_record(dsp, data, blksz) != 0)
+ /* See comment in dump_dnode() for full details */
+ if (zfs_send_unmodified_spill_blocks &&
+ (bp->blk_birth <= dsp->dsa_fromtxg)) {
+ drrs->drr_flags |= DRR_SPILL_UNMODIFIED;
+ }
+
+ /* handle raw send fields */
+ if (dsp->dsa_featureflags & DMU_BACKUP_FEATURE_RAW) {
+ ASSERT(BP_IS_PROTECTED(bp));
+
+ if (BP_SHOULD_BYTESWAP(bp))
+ drrs->drr_flags |= DRR_RAW_BYTESWAP;
+ drrs->drr_compressiontype = BP_GET_COMPRESS(bp);
+ drrs->drr_compressed_size = BP_GET_PSIZE(bp);
+ zio_crypt_decode_params_bp(bp, drrs->drr_salt, drrs->drr_iv);
+ zio_crypt_decode_mac_bp(bp, drrs->drr_mac);
+ payload_size = drrs->drr_compressed_size;
+ }
+
+ if (dump_record(dsp, data, payload_size) != 0)
return (SET_ERROR(EINTR));
return (0);
}
@@ -429,9 +477,11 @@ dump_freeobjects(dmu_sendarg_t *dsp, uint64_t firstobj, uint64_t numobjs)
}
static int
-dump_dnode(dmu_sendarg_t *dsp, uint64_t object, dnode_phys_t *dnp)
+dump_dnode(dmu_sendarg_t *dsp, const blkptr_t *bp, uint64_t object,
+ dnode_phys_t *dnp)
{
struct drr_object *drro = &(dsp->dsa_drr->drr_u.drr_object);
+ int bonuslen;
if (object < dsp->dsa_resume_object) {
/*
@@ -472,20 +522,111 @@ dump_dnode(dmu_sendarg_t *dsp, uint64_t object, dnode_phys_t *dnp)
drro->drr_blksz > SPA_OLD_MAXBLOCKSIZE)
drro->drr_blksz = SPA_OLD_MAXBLOCKSIZE;
- if (dump_record(dsp, DN_BONUS(dnp),
- P2ROUNDUP(dnp->dn_bonuslen, 8)) != 0) {
- return (SET_ERROR(EINTR));
+ bonuslen = P2ROUNDUP(dnp->dn_bonuslen, 8);
+
+ if ((dsp->dsa_featureflags & DMU_BACKUP_FEATURE_RAW)) {
+ ASSERT(BP_IS_ENCRYPTED(bp));
+
+ if (BP_SHOULD_BYTESWAP(bp))
+ drro->drr_flags |= DRR_RAW_BYTESWAP;
+
+ /* needed for reconstructing dnp on recv side */
+ drro->drr_maxblkid = dnp->dn_maxblkid;
+ drro->drr_indblkshift = dnp->dn_indblkshift;
+ drro->drr_nlevels = dnp->dn_nlevels;
+ drro->drr_nblkptr = dnp->dn_nblkptr;
+
+ /*
+ * Since we encrypt the entire bonus area, the (raw) part
+ * beyond the bonuslen is actually nonzero, so we need
+ * to send it.
+ */
+ if (bonuslen != 0) {
+ drro->drr_raw_bonuslen = DN_MAX_BONUS_LEN(dnp);
+ bonuslen = drro->drr_raw_bonuslen;
+ }
}
+ /*
+ * DRR_OBJECT_SPILL is set for every dnode which references a
+ * spill block. This allows the receiving pool to definitively
+ * determine when a spill block should be kept or freed.
+ */
+ if (dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR)
+ drro->drr_flags |= DRR_OBJECT_SPILL;
+
+ if (dump_record(dsp, DN_BONUS(dnp), bonuslen) != 0)
+ return (SET_ERROR(EINTR));
+
/* Free anything past the end of the file. */
if (dump_free(dsp, object, (dnp->dn_maxblkid + 1) *
- (dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT), -1ULL) != 0)
+ (dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT), DMU_OBJECT_END) != 0)
return (SET_ERROR(EINTR));
+
+ /*
+ * Send DRR_SPILL records for unmodified spill blocks. This is useful
+ * because changing certain attributes of the object (e.g. blocksize)
+ * can cause old versions of ZFS to incorrectly remove a spill block.
+ * Including these records in the stream forces an up to date version
+ * to always be written ensuring they're never lost. Current versions
+ * of the code which understand the DRR_FLAG_SPILL_BLOCK feature can
+ * ignore these unmodified spill blocks.
+ */
+ if (zfs_send_unmodified_spill_blocks &&
+ (dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) &&
+ (DN_SPILL_BLKPTR(dnp)->blk_birth <= dsp->dsa_fromtxg)) {
+ struct send_block_record record;
+
+ bzero(&record, sizeof (struct send_block_record));
+ record.eos_marker = B_FALSE;
+ record.bp = *DN_SPILL_BLKPTR(dnp);
+ SET_BOOKMARK(&(record.zb), dmu_objset_id(dsp->dsa_os),
+ object, 0, DMU_SPILL_BLKID);
+
+ if (do_dump(dsp, &record) != 0)
+ return (SET_ERROR(EINTR));
+ }
+
if (dsp->dsa_err != 0)
return (SET_ERROR(EINTR));
return (0);
}
+static int
+dump_object_range(dmu_sendarg_t *dsp, const blkptr_t *bp, uint64_t firstobj,
+ uint64_t numslots)
+{
+ struct drr_object_range *drror =
+ &(dsp->dsa_drr->drr_u.drr_object_range);
+
+ /* we only use this record type for raw sends */
+ ASSERT(BP_IS_PROTECTED(bp));
+ ASSERT(dsp->dsa_featureflags & DMU_BACKUP_FEATURE_RAW);
+ ASSERT3U(BP_GET_COMPRESS(bp), ==, ZIO_COMPRESS_OFF);
+ ASSERT3U(BP_GET_TYPE(bp), ==, DMU_OT_DNODE);
+ ASSERT0(BP_GET_LEVEL(bp));
+
+ if (dsp->dsa_pending_op != PENDING_NONE) {
+ if (dump_record(dsp, NULL, 0) != 0)
+ return (SET_ERROR(EINTR));
+ dsp->dsa_pending_op = PENDING_NONE;
+ }
+
+ bzero(dsp->dsa_drr, sizeof (dmu_replay_record_t));
+ dsp->dsa_drr->drr_type = DRR_OBJECT_RANGE;
+ drror->drr_firstobj = firstobj;
+ drror->drr_numslots = numslots;
+ drror->drr_toguid = dsp->dsa_toguid;
+ if (BP_SHOULD_BYTESWAP(bp))
+ drror->drr_flags |= DRR_RAW_BYTESWAP;
+ zio_crypt_decode_params_bp(bp, drror->drr_salt, drror->drr_iv);
+ zio_crypt_decode_mac_bp(bp, drror->drr_mac);
+
+ if (dump_record(dsp, NULL, 0) != 0)
+ return (SET_ERROR(EINTR));
+ return (0);
+}
+
static boolean_t
backup_do_embed(dmu_sendarg_t *dsp, const blkptr_t *bp)
{
@@ -529,6 +670,7 @@ send_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
ASSERT(zb->zb_object == DMU_META_DNODE_OBJECT ||
zb->zb_object >= sta->resume.zb_object);
+ ASSERT3P(sta->ds, !=, NULL);
if (sta->cancel)
return (SET_ERROR(EINTR));
@@ -601,6 +743,18 @@ do_dump(dmu_sendarg_t *dsa, struct send_block_record *data)
ASSERT(zb->zb_object == DMU_META_DNODE_OBJECT ||
zb->zb_object >= dsa->dsa_resume_object);
+ /*
+ * All bps of an encrypted os should have the encryption bit set.
+ * If this is not true it indicates tampering and we report an error.
+ */
+ if (dsa->dsa_os->os_encrypted &&
+ !BP_IS_HOLE(bp) && !BP_USES_CRYPT(bp)) {
+ spa_log_error(spa, zb);
+ zfs_panic_recover("unencrypted block in encrypted "
+ "object set %llu", ds->ds_object);
+ return (SET_ERROR(EIO));
+ }
+
if (zb->zb_object != DMU_META_DNODE_OBJECT &&
DMU_OBJECT_IS_SPECIAL(zb->zb_object)) {
return (0);
@@ -612,40 +766,66 @@ do_dump(dmu_sendarg_t *dsa, struct send_block_record *data)
} else if (BP_IS_HOLE(bp)) {
uint64_t span = BP_SPAN(dblkszsec, indblkshift, zb->zb_level);
uint64_t offset = zb->zb_blkid * span;
- err = dump_free(dsa, zb->zb_object, offset, span);
+ /* Don't dump free records for offsets > DMU_OBJECT_END */
+ if (zb->zb_blkid == 0 || span <= DMU_OBJECT_END / zb->zb_blkid)
+ err = dump_free(dsa, zb->zb_object, offset, span);
} else if (zb->zb_level > 0 || type == DMU_OT_OBJSET) {
return (0);
} else if (type == DMU_OT_DNODE) {
int epb = BP_GET_LSIZE(bp) >> DNODE_SHIFT;
arc_flags_t aflags = ARC_FLAG_WAIT;
arc_buf_t *abuf;
+ enum zio_flag zioflags = ZIO_FLAG_CANFAIL;
+
+ if (dsa->dsa_featureflags & DMU_BACKUP_FEATURE_RAW) {
+ ASSERT(BP_IS_ENCRYPTED(bp));
+ ASSERT3U(BP_GET_COMPRESS(bp), ==, ZIO_COMPRESS_OFF);
+ zioflags |= ZIO_FLAG_RAW;
+ }
ASSERT0(zb->zb_level);
if (arc_read(NULL, spa, bp, arc_getbuf_func, &abuf,
- ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL,
- &aflags, zb) != 0)
+ ZIO_PRIORITY_ASYNC_READ, zioflags, &aflags, zb) != 0)
return (SET_ERROR(EIO));
dnode_phys_t *blk = abuf->b_data;
uint64_t dnobj = zb->zb_blkid * epb;
- for (int i = 0; i < epb; i += blk[i].dn_extra_slots + 1) {
- err = dump_dnode(dsa, dnobj + i, blk + i);
- if (err != 0)
- break;
+
+ /*
+ * Raw sends require sending encryption parameters for the
+ * block of dnodes. Regular sends do not need to send this
+ * info.
+ */
+ if (dsa->dsa_featureflags & DMU_BACKUP_FEATURE_RAW) {
+ ASSERT(arc_is_encrypted(abuf));
+ err = dump_object_range(dsa, bp, dnobj, epb);
+ }
+
+ if (err == 0) {
+ for (int i = 0; i < epb;
+ i += blk[i].dn_extra_slots + 1) {
+ err = dump_dnode(dsa, bp, dnobj + i, blk + i);
+ if (err != 0)
+ break;
+ }
}
arc_buf_destroy(abuf, &abuf);
} else if (type == DMU_OT_SA) {
arc_flags_t aflags = ARC_FLAG_WAIT;
arc_buf_t *abuf;
- int blksz = BP_GET_LSIZE(bp);
+ enum zio_flag zioflags = ZIO_FLAG_CANFAIL;
+
+ if (dsa->dsa_featureflags & DMU_BACKUP_FEATURE_RAW) {
+ ASSERT(BP_IS_PROTECTED(bp));
+ zioflags |= ZIO_FLAG_RAW;
+ }
if (arc_read(NULL, spa, bp, arc_getbuf_func, &abuf,
- ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL,
- &aflags, zb) != 0)
+ ZIO_PRIORITY_ASYNC_READ, zioflags, &aflags, zb) != 0)
return (SET_ERROR(EIO));
- err = dump_spill(dsa, zb->zb_object, blksz, abuf->b_data);
+ err = dump_spill(dsa, bp, zb->zb_object, abuf->b_data);
arc_buf_destroy(abuf, &abuf);
} else if (backup_do_embed(dsa, bp)) {
/* it's an embedded level-0 block of a regular object */
@@ -667,6 +847,14 @@ do_dump(dmu_sendarg_t *dsa, struct send_block_record *data)
*/
boolean_t split_large_blocks = blksz > SPA_OLD_MAXBLOCKSIZE &&
!(dsa->dsa_featureflags & DMU_BACKUP_FEATURE_LARGE_BLOCKS);
+
+ /*
+ * Raw sends require that we always get raw data as it exists
+ * on disk, so we assert that we are not splitting blocks here.
+ */
+ boolean_t request_raw =
+ (dsa->dsa_featureflags & DMU_BACKUP_FEATURE_RAW) != 0;
+
/*
* We should only request compressed data from the ARC if all
* the following are true:
@@ -682,6 +870,8 @@ do_dump(dmu_sendarg_t *dsa, struct send_block_record *data)
!split_large_blocks && !BP_SHOULD_BYTESWAP(bp) &&
!BP_IS_EMBEDDED(bp) && !DMU_OT_IS_METADATA(BP_GET_TYPE(bp));
+ IMPLY(request_raw, !split_large_blocks);
+ IMPLY(request_raw, BP_IS_PROTECTED(bp));
ASSERT0(zb->zb_level);
ASSERT(zb->zb_object > dsa->dsa_resume_object ||
(zb->zb_object == dsa->dsa_resume_object &&
@@ -695,8 +885,11 @@ do_dump(dmu_sendarg_t *dsa, struct send_block_record *data)
ASSERT3U(blksz, ==, BP_GET_LSIZE(bp));
enum zio_flag zioflags = ZIO_FLAG_CANFAIL;
- if (request_compressed)
+ if (request_raw)
zioflags |= ZIO_FLAG_RAW;
+ else if (request_compressed)
+ zioflags |= ZIO_FLAG_RAW_COMPRESS;
+
if (arc_read(NULL, spa, bp, arc_getbuf_func, &abuf,
ZIO_PRIORITY_ASYNC_READ, zioflags, &aflags, zb) != 0) {
if (zfs_send_corrupt_data) {
@@ -716,6 +909,7 @@ do_dump(dmu_sendarg_t *dsa, struct send_block_record *data)
offset = zb->zb_blkid * blksz;
if (split_large_blocks) {
+ ASSERT0(arc_is_encrypted(abuf));
ASSERT3U(arc_get_compression(abuf), ==,
ZIO_COMPRESS_OFF);
char *buf = abuf->b_data;
@@ -758,7 +952,7 @@ static int
dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *to_ds,
zfs_bookmark_phys_t *ancestor_zb, boolean_t is_clone,
boolean_t embedok, boolean_t large_block_ok, boolean_t compressok,
- int outfd, uint64_t resumeobj, uint64_t resumeoff,
+ boolean_t rawok, int outfd, uint64_t resumeobj, uint64_t resumeoff,
vnode_t *vp, offset_t *off)
{
objset_t *os;
@@ -775,6 +969,28 @@ dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *to_ds,
return (err);
}
+ /*
+ * If this is a non-raw send of an encrypted ds, we can ensure that
+ * the objset_phys_t is authenticated. This is safe because this is
+ * either a snapshot or we have owned the dataset, ensuring that
+ * it can't be modified.
+ */
+ if (!rawok && os->os_encrypted &&
+ arc_is_unauthenticated(os->os_phys_buf)) {
+ zbookmark_phys_t zb;
+
+ SET_BOOKMARK(&zb, to_ds->ds_object, ZB_ROOT_OBJECT,
+ ZB_ROOT_LEVEL, ZB_ROOT_BLKID);
+ err = arc_untransform(os->os_phys_buf, os->os_spa,
+ &zb, B_FALSE);
+ if (err != 0) {
+ dsl_pool_rele(dp, tag);
+ return (err);
+ }
+
+ ASSERT0(arc_is_unauthenticated(os->os_phys_buf));
+ }
+
drr = kmem_zalloc(sizeof (dmu_replay_record_t), KM_SLEEP);
drr->drr_type = DRR_BEGIN;
drr->drr_u.drr_begin.drr_magic = DMU_BACKUP_MAGIC;
@@ -795,22 +1011,29 @@ dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *to_ds,
}
#endif
- if (large_block_ok && to_ds->ds_feature_inuse[SPA_FEATURE_LARGE_BLOCKS])
+ /* raw sends imply large_block_ok */
+ if ((large_block_ok || rawok) &&
+ to_ds->ds_feature_inuse[SPA_FEATURE_LARGE_BLOCKS])
featureflags |= DMU_BACKUP_FEATURE_LARGE_BLOCKS;
if (to_ds->ds_feature_inuse[SPA_FEATURE_LARGE_DNODE])
featureflags |= DMU_BACKUP_FEATURE_LARGE_DNODE;
- if (embedok &&
+
+ /* encrypted datasets will not have embedded blocks */
+ if ((embedok || rawok) && !os->os_encrypted &&
spa_feature_is_active(dp->dp_spa, SPA_FEATURE_EMBEDDED_DATA)) {
featureflags |= DMU_BACKUP_FEATURE_EMBED_DATA;
- if (spa_feature_is_active(dp->dp_spa, SPA_FEATURE_LZ4_COMPRESS))
- featureflags |= DMU_BACKUP_FEATURE_LZ4;
}
- if (compressok) {
+
+ /* raw send implies compressok */
+ if (compressok || rawok)
featureflags |= DMU_BACKUP_FEATURE_COMPRESSED;
- }
+ if (rawok && os->os_encrypted)
+ featureflags |= DMU_BACKUP_FEATURE_RAW;
+
if ((featureflags &
- (DMU_BACKUP_FEATURE_EMBED_DATA | DMU_BACKUP_FEATURE_COMPRESSED)) !=
- 0 && spa_feature_is_active(dp->dp_spa, SPA_FEATURE_LZ4_COMPRESS)) {
+ (DMU_BACKUP_FEATURE_EMBED_DATA | DMU_BACKUP_FEATURE_COMPRESSED |
+ DMU_BACKUP_FEATURE_RAW)) != 0 &&
+ spa_feature_is_active(dp->dp_spa, SPA_FEATURE_LZ4_COMPRESS)) {
featureflags |= DMU_BACKUP_FEATURE_LZ4;
}
@@ -832,6 +1055,8 @@ dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *to_ds,
if (zfs_send_set_freerecords_bit)
drr->drr_u.drr_begin.drr_flags |= DRR_FLAG_FREERECORDS;
+ drr->drr_u.drr_begin.drr_flags |= DRR_FLAG_SPILL_BLOCK;
+
if (ancestor_zb != NULL) {
drr->drr_u.drr_begin.drr_fromguid =
ancestor_zb->zbm_guid;
@@ -852,6 +1077,7 @@ dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *to_ds,
dsp->dsa_os = os;
dsp->dsa_off = off;
dsp->dsa_toguid = dsl_dataset_phys(to_ds)->ds_guid;
+ dsp->dsa_fromtxg = fromtxg;
dsp->dsa_pending_op = PENDING_NONE;
dsp->dsa_featureflags = featureflags;
dsp->dsa_resume_object = resumeobj;
@@ -866,19 +1092,47 @@ dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *to_ds,
void *payload = NULL;
size_t payload_len = 0;
- if (resumeobj != 0 || resumeoff != 0) {
- dmu_object_info_t to_doi;
- err = dmu_object_info(os, resumeobj, &to_doi);
- if (err != 0)
- goto out;
- SET_BOOKMARK(&to_arg.resume, to_ds->ds_object, resumeobj, 0,
- resumeoff / to_doi.doi_data_block_size);
-
+ /* handle features that require a DRR_BEGIN payload */
+ if (featureflags &
+ (DMU_BACKUP_FEATURE_RESUMING | DMU_BACKUP_FEATURE_RAW)) {
+ nvlist_t *keynvl = NULL;
nvlist_t *nvl = fnvlist_alloc();
- fnvlist_add_uint64(nvl, "resume_object", resumeobj);
- fnvlist_add_uint64(nvl, "resume_offset", resumeoff);
+
+ if (featureflags & DMU_BACKUP_FEATURE_RESUMING) {
+ dmu_object_info_t to_doi;
+ err = dmu_object_info(os, resumeobj, &to_doi);
+ if (err != 0) {
+ fnvlist_free(nvl);
+ goto out;
+ }
+
+ SET_BOOKMARK(&to_arg.resume, to_ds->ds_object,
+ resumeobj, 0,
+ resumeoff / to_doi.doi_data_block_size);
+
+ fnvlist_add_uint64(nvl, "resume_object", resumeobj);
+ fnvlist_add_uint64(nvl, "resume_offset", resumeoff);
+ }
+
+ if (featureflags & DMU_BACKUP_FEATURE_RAW) {
+ uint64_t ivset_guid = (ancestor_zb != NULL) ?
+ ancestor_zb->zbm_ivset_guid : 0;
+
+ ASSERT(os->os_encrypted);
+
+ err = dsl_crypto_populate_key_nvlist(to_ds,
+ ivset_guid, &keynvl);
+ if (err != 0) {
+ fnvlist_free(nvl);
+ goto out;
+ }
+
+ fnvlist_add_nvlist(nvl, "crypt_keydata", keynvl);
+ }
+
payload = fnvlist_pack(nvl, &payload_len);
drr->drr_payloadlen = payload_len;
+ fnvlist_free(keynvl);
fnvlist_free(nvl);
}
@@ -896,6 +1150,8 @@ dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *to_ds,
to_arg.ds = to_ds;
to_arg.fromtxg = fromtxg;
to_arg.flags = TRAVERSE_PRE | TRAVERSE_PREFETCH;
+ if (rawok)
+ to_arg.flags |= TRAVERSE_NO_DECRYPT;
(void) thread_create(NULL, 0, send_traverse_thread, &to_arg, 0, curproc,
TS_RUN, minclsyspri);
@@ -942,7 +1198,6 @@ dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *to_ds,
if (dump_record(dsp, NULL, 0) != 0)
err = dsp->dsa_err;
-
out:
mutex_enter(&to_ds->ds_sendstream_lock);
list_remove(&to_ds->ds_sendstreams, dsp);
@@ -961,60 +1216,77 @@ out:
int
dmu_send_obj(const char *pool, uint64_t tosnap, uint64_t fromsnap,
boolean_t embedok, boolean_t large_block_ok, boolean_t compressok,
- int outfd, vnode_t *vp, offset_t *off)
+ boolean_t rawok, int outfd, vnode_t *vp, offset_t *off)
{
dsl_pool_t *dp;
dsl_dataset_t *ds;
dsl_dataset_t *fromds = NULL;
+ ds_hold_flags_t dsflags = (rawok) ? 0 : DS_HOLD_FLAG_DECRYPT;
int err;
err = dsl_pool_hold(pool, FTAG, &dp);
if (err != 0)
return (err);
- err = dsl_dataset_hold_obj(dp, tosnap, FTAG, &ds);
+ err = dsl_dataset_hold_obj_flags(dp, tosnap, dsflags, FTAG, &ds);
if (err != 0) {
dsl_pool_rele(dp, FTAG);
return (err);
}
if (fromsnap != 0) {
- zfs_bookmark_phys_t zb;
+ zfs_bookmark_phys_t zb = { 0 };
boolean_t is_clone;
err = dsl_dataset_hold_obj(dp, fromsnap, FTAG, &fromds);
if (err != 0) {
- dsl_dataset_rele(ds, FTAG);
+ dsl_dataset_rele_flags(ds, dsflags, FTAG);
dsl_pool_rele(dp, FTAG);
return (err);
}
- if (!dsl_dataset_is_before(ds, fromds, 0))
+ if (!dsl_dataset_is_before(ds, fromds, 0)) {
err = SET_ERROR(EXDEV);
+ dsl_dataset_rele(fromds, FTAG);
+ dsl_dataset_rele_flags(ds, dsflags, FTAG);
+ dsl_pool_rele(dp, FTAG);
+ return (err);
+ }
+
zb.zbm_creation_time =
dsl_dataset_phys(fromds)->ds_creation_time;
zb.zbm_creation_txg = dsl_dataset_phys(fromds)->ds_creation_txg;
zb.zbm_guid = dsl_dataset_phys(fromds)->ds_guid;
+
+ if (dsl_dataset_is_zapified(fromds)) {
+ (void) zap_lookup(dp->dp_meta_objset,
+ fromds->ds_object, DS_FIELD_IVSET_GUID, 8, 1,
+ &zb.zbm_ivset_guid);
+ }
+
is_clone = (fromds->ds_dir != ds->ds_dir);
dsl_dataset_rele(fromds, FTAG);
err = dmu_send_impl(FTAG, dp, ds, &zb, is_clone,
- embedok, large_block_ok, compressok, outfd, 0, 0, vp, off);
+ embedok, large_block_ok, compressok, rawok, outfd,
+ 0, 0, vp, off);
} else {
err = dmu_send_impl(FTAG, dp, ds, NULL, B_FALSE,
- embedok, large_block_ok, compressok, outfd, 0, 0, vp, off);
+ embedok, large_block_ok, compressok, rawok, outfd,
+ 0, 0, vp, off);
}
- dsl_dataset_rele(ds, FTAG);
+ dsl_dataset_rele_flags(ds, dsflags, FTAG);
return (err);
}
int
dmu_send(const char *tosnap, const char *fromsnap, boolean_t embedok,
- boolean_t large_block_ok, boolean_t compressok, int outfd,
- uint64_t resumeobj, uint64_t resumeoff,
- vnode_t *vp, offset_t *off)
+ boolean_t large_block_ok, boolean_t compressok, boolean_t rawok,
+ int outfd, uint64_t resumeobj, uint64_t resumeoff, vnode_t *vp,
+ offset_t *off)
{
dsl_pool_t *dp;
dsl_dataset_t *ds;
int err;
+ ds_hold_flags_t dsflags = (rawok) ? 0 : DS_HOLD_FLAG_DECRYPT;
boolean_t owned = B_FALSE;
if (fromsnap != NULL && strpbrk(fromsnap, "@#") == NULL)
@@ -1029,10 +1301,10 @@ dmu_send(const char *tosnap, const char *fromsnap, boolean_t embedok,
* We are sending a filesystem or volume. Ensure
* that it doesn't change by owning the dataset.
*/
- err = dsl_dataset_own(dp, tosnap, FTAG, &ds);
+ err = dsl_dataset_own(dp, tosnap, dsflags, FTAG, &ds);
owned = B_TRUE;
} else {
- err = dsl_dataset_hold(dp, tosnap, FTAG, &ds);
+ err = dsl_dataset_hold_flags(dp, tosnap, dsflags, FTAG, &ds);
}
if (err != 0) {
dsl_pool_rele(dp, FTAG);
@@ -1040,7 +1312,7 @@ dmu_send(const char *tosnap, const char *fromsnap, boolean_t embedok,
}
if (fromsnap != NULL) {
- zfs_bookmark_phys_t zb;
+ zfs_bookmark_phys_t zb = { 0 };
boolean_t is_clone = B_FALSE;
int fsnamelen = strchr(tosnap, '@') - tosnap;
@@ -1066,28 +1338,40 @@ dmu_send(const char *tosnap, const char *fromsnap, boolean_t embedok,
dsl_dataset_phys(fromds)->ds_creation_txg;
zb.zbm_guid = dsl_dataset_phys(fromds)->ds_guid;
is_clone = (ds->ds_dir != fromds->ds_dir);
+
+ if (dsl_dataset_is_zapified(fromds)) {
+ (void) zap_lookup(dp->dp_meta_objset,
+ fromds->ds_object,
+ DS_FIELD_IVSET_GUID, 8, 1,
+ &zb.zbm_ivset_guid);
+ }
dsl_dataset_rele(fromds, FTAG);
}
} else {
err = dsl_bookmark_lookup(dp, fromsnap, ds, &zb);
}
if (err != 0) {
- dsl_dataset_rele(ds, FTAG);
+ if (owned)
+ dsl_dataset_disown(ds, dsflags, FTAG);
+ else
+ dsl_dataset_rele_flags(ds, dsflags, FTAG);
+
dsl_pool_rele(dp, FTAG);
return (err);
}
err = dmu_send_impl(FTAG, dp, ds, &zb, is_clone,
- embedok, large_block_ok, compressok,
+ embedok, large_block_ok, compressok, rawok,
outfd, resumeobj, resumeoff, vp, off);
} else {
err = dmu_send_impl(FTAG, dp, ds, NULL, B_FALSE,
- embedok, large_block_ok, compressok,
+ embedok, large_block_ok, compressok, rawok,
outfd, resumeobj, resumeoff, vp, off);
}
if (owned)
- dsl_dataset_disown(ds, FTAG);
+ dsl_dataset_disown(ds, dsflags, FTAG);
else
- dsl_dataset_rele(ds, FTAG);
+ dsl_dataset_rele_flags(ds, dsflags, FTAG);
+
return (err);
}
@@ -1242,7 +1526,8 @@ dmu_send_estimate_from_txg(dsl_dataset_t *ds, uint64_t from_txg,
* traverse the blocks of the snapshot with birth times after
* from_txg, summing their uncompressed size
*/
- err = traverse_dataset(ds, from_txg, TRAVERSE_POST,
+ err = traverse_dataset(ds, from_txg,
+ TRAVERSE_POST | TRAVERSE_NO_DECRYPT,
dmu_calculate_send_traversal, &size);
if (err)
return (err);