diff options
| author | Tom Caputi <tcaputi@datto.com> | 2019-06-25 19:39:35 +0000 |
|---|---|---|
| committer | Jerry Jelinek <jerry.jelinek@joyent.com> | 2019-06-25 19:40:06 +0000 |
| commit | eb633035c80613ec93d62f90482837adaaf21a0a (patch) | |
| tree | 67f2e3e15231d06a3525ce3958bbce24aa3de7e8 /usr/src/uts/common/fs/zfs/zio.c | |
| parent | 07eb1aef88b873c5c1036d9cf69820c1ef6a32fb (diff) | |
| download | illumos-joyent-eb633035c80613ec93d62f90482837adaaf21a0a.tar.gz | |
8727 Native data and metadata encryption for zfs
Portions contributed by: Jorgen Lundman <lundman@lundman.net>
Portions contributed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Portions contributed by: Paul Zuchowski <pzuchowski@datto.com>
Portions contributed by: Tim Chase <tim@chase2k.com>
Portions contributed by: Matthew Ahrens <mahrens@delphix.com>
Portions contributed by: ab-oe <arkadiusz.bubala@open-e.com>
Portions contributed by: Brian Behlendorf <behlendorf1@llnl.gov>
Portions contributed by: loli10K <ezomori.nozomu@gmail.com>
Portions contributed by: Igor K <igor@dilos.org>
Portions contributed by: Richard Laager <rlaager@wiktel.com>
Reviewed by: Jason Cohen <jwittlincohen@gmail.com>
Reviewed by: Allan Jude <allanjude@freebsd.org>
Reviewed by: George Melikov <mail@gmelikov.ru>
Reviewed by: Paul Dagnelie <pcd@delphix.com>
Reviewed by: RageLtMan <rageltman@sempervictus>
Reviewed by: Matthew Thode <prometheanfire@gentoo.org>
Reviewed by: Giuseppe Di Natale <dinatale2@llnl.gov>
Reviewed by: Kash Pande <kash@tripleback.net>
Reviewed by: Alek Pinchuk <apinchuk@datto.com>
Reviewed by: Dan Kimmel <dan.kimmel@delphix.com>
Reviewed by: David Quigley <david.quigley@intel.com>
Reviewed by: Jorgen Lundman <lundman@lundman.net>
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Reviewed by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed by: Toomas Soome <tsoome@me.com>
Reviewed by: C Fraire <cfraire@me.com>
Reviewed by: Jason King <jason.king@joyent.com>
Reviewed by: Andy Stormont <astormont@racktopsystems.com>
Approved by: Garrett D'Amore <garrett@damore.org>
Diffstat (limited to 'usr/src/uts/common/fs/zfs/zio.c')
| -rw-r--r-- | usr/src/uts/common/fs/zfs/zio.c | 479 |
1 files changed, 432 insertions, 47 deletions
diff --git a/usr/src/uts/common/fs/zfs/zio.c b/usr/src/uts/common/fs/zfs/zio.c index 76312e6a74..2e44d59daf 100644 --- a/usr/src/uts/common/fs/zfs/zio.c +++ b/usr/src/uts/common/fs/zfs/zio.c @@ -45,6 +45,7 @@ #include <sys/metaslab_impl.h> #include <sys/abd.h> #include <sys/cityhash.h> +#include <sys/dsl_crypt.h> /* * ========================================================================== @@ -270,6 +271,13 @@ zio_data_buf_free(void *buf, size_t size) kmem_cache_free(zio_data_buf_cache[c], buf); } +/* ARGSUSED */ +static void +zio_abd_free(void *abd, size_t size) +{ + abd_free((abd_t *)abd); +} + /* * ========================================================================== * Push and pop I/O transform buffers @@ -322,7 +330,7 @@ zio_pop_transforms(zio_t *zio) /* * ========================================================================== - * I/O transform callbacks for subblocks and decompression + * I/O transform callbacks for subblocks, decompression, and decryption * ========================================================================== */ static void @@ -348,6 +356,132 @@ zio_decompress(zio_t *zio, abd_t *data, uint64_t size) } } +static void +zio_decrypt(zio_t *zio, abd_t *data, uint64_t size) +{ + int ret; + void *tmp; + blkptr_t *bp = zio->io_bp; + spa_t *spa = zio->io_spa; + uint64_t dsobj = zio->io_bookmark.zb_objset; + uint64_t lsize = BP_GET_LSIZE(bp); + dmu_object_type_t ot = BP_GET_TYPE(bp); + uint8_t salt[ZIO_DATA_SALT_LEN]; + uint8_t iv[ZIO_DATA_IV_LEN]; + uint8_t mac[ZIO_DATA_MAC_LEN]; + boolean_t no_crypt = B_FALSE; + + ASSERT(BP_USES_CRYPT(bp)); + ASSERT3U(size, !=, 0); + + if (zio->io_error != 0) + return; + + /* + * Verify the cksum of MACs stored in an indirect bp. It will always + * be possible to verify this since it does not require an encryption + * key. + */ + if (BP_HAS_INDIRECT_MAC_CKSUM(bp)) { + zio_crypt_decode_mac_bp(bp, mac); + + if (BP_GET_COMPRESS(bp) != ZIO_COMPRESS_OFF) { + /* + * We haven't decompressed the data yet, but + * zio_crypt_do_indirect_mac_checksum() requires + * decompressed data to be able to parse out the MACs + * from the indirect block. We decompress it now and + * throw away the result after we are finished. + */ + tmp = zio_buf_alloc(lsize); + ret = zio_decompress_data(BP_GET_COMPRESS(bp), + zio->io_abd, tmp, zio->io_size, lsize); + if (ret != 0) { + ret = SET_ERROR(EIO); + goto error; + } + ret = zio_crypt_do_indirect_mac_checksum(B_FALSE, + tmp, lsize, BP_SHOULD_BYTESWAP(bp), mac); + zio_buf_free(tmp, lsize); + } else { + ret = zio_crypt_do_indirect_mac_checksum_abd(B_FALSE, + zio->io_abd, size, BP_SHOULD_BYTESWAP(bp), mac); + } + abd_copy(data, zio->io_abd, size); + + if (ret != 0) + goto error; + + return; + } + + /* + * If this is an authenticated block, just check the MAC. It would be + * nice to separate this out into its own flag, but for the moment + * enum zio_flag is out of bits. + */ + if (BP_IS_AUTHENTICATED(bp)) { + if (ot == DMU_OT_OBJSET) { + ret = spa_do_crypt_objset_mac_abd(B_FALSE, spa, + dsobj, zio->io_abd, size, BP_SHOULD_BYTESWAP(bp)); + } else { + zio_crypt_decode_mac_bp(bp, mac); + ret = spa_do_crypt_mac_abd(B_FALSE, spa, dsobj, + zio->io_abd, size, mac); + } + abd_copy(data, zio->io_abd, size); + + if (zio_injection_enabled && ot != DMU_OT_DNODE && ret == 0) { + ret = zio_handle_decrypt_injection(spa, + &zio->io_bookmark, ot, ECKSUM); + } + if (ret != 0) + goto error; + + return; + } + + zio_crypt_decode_params_bp(bp, salt, iv); + + if (ot == DMU_OT_INTENT_LOG) { + tmp = abd_borrow_buf_copy(zio->io_abd, sizeof (zil_chain_t)); + zio_crypt_decode_mac_zil(tmp, mac); + abd_return_buf(zio->io_abd, tmp, sizeof (zil_chain_t)); + } else { + zio_crypt_decode_mac_bp(bp, mac); + } + + ret = spa_do_crypt_abd(B_FALSE, spa, &zio->io_bookmark, BP_GET_TYPE(bp), + BP_GET_DEDUP(bp), BP_SHOULD_BYTESWAP(bp), salt, iv, mac, size, data, + zio->io_abd, &no_crypt); + if (no_crypt) + abd_copy(data, zio->io_abd, size); + + if (ret != 0) + goto error; + + return; + +error: + /* assert that the key was found unless this was speculative */ + ASSERT(ret != EACCES || (zio->io_flags & ZIO_FLAG_SPECULATIVE)); + + /* + * If there was a decryption / authentication error return EIO as + * the io_error. If this was not a speculative zio, create an ereport. + */ + if (ret == ECKSUM) { + zio->io_error = SET_ERROR(EIO); + if ((zio->io_flags & ZIO_FLAG_SPECULATIVE) == 0) { + spa_log_error(spa, &zio->io_bookmark); + zfs_ereport_post(FM_EREPORT_ZFS_AUTHENTICATION, + spa, NULL, &zio->io_bookmark, zio, 0, 0); + } + } else { + zio->io_error = ret; + } +} + /* * ========================================================================== * I/O parent/child relationships and pipeline interlocks @@ -565,7 +699,7 @@ zio_create(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp, ASSERT(!bp || !(flags & ZIO_FLAG_CONFIG_WRITER)); ASSERT(vd || stage == ZIO_STAGE_OPEN); - IMPLY(lsize != psize, (flags & ZIO_FLAG_RAW) != 0); + IMPLY(lsize != psize, (flags & ZIO_FLAG_RAW_COMPRESS) != 0); zio = kmem_cache_alloc(zio_cache, KM_SLEEP); bzero(zio, sizeof (zio_t)); @@ -836,9 +970,12 @@ zio_write(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp, * Data can be NULL if we are going to call zio_write_override() to * provide the already-allocated BP. But we may need the data to * verify a dedup hit (if requested). In this case, don't try to - * dedup (just take the already-allocated BP verbatim). + * dedup (just take the already-allocated BP verbatim). Encrypted + * dedup blocks need data as well so we also disable dedup in this + * case. */ - if (data == NULL && zio->io_prop.zp_dedup_verify) { + if (data == NULL && + (zio->io_prop.zp_dedup_verify || zio->io_prop.zp_encrypt)) { zio->io_prop.zp_dedup = zio->io_prop.zp_dedup_verify = B_FALSE; } @@ -1189,23 +1326,30 @@ static int zio_read_bp_init(zio_t *zio) { blkptr_t *bp = zio->io_bp; + uint64_t psize = + BP_IS_EMBEDDED(bp) ? BPE_GET_PSIZE(bp) : BP_GET_PSIZE(bp); ASSERT3P(zio->io_bp, ==, &zio->io_bp_copy); if (BP_GET_COMPRESS(bp) != ZIO_COMPRESS_OFF && zio->io_child_type == ZIO_CHILD_LOGICAL && - !(zio->io_flags & ZIO_FLAG_RAW)) { - uint64_t psize = - BP_IS_EMBEDDED(bp) ? BPE_GET_PSIZE(bp) : BP_GET_PSIZE(bp); + !(zio->io_flags & ZIO_FLAG_RAW_COMPRESS)) { zio_push_transform(zio, abd_alloc_sametype(zio->io_abd, psize), psize, psize, zio_decompress); } - if (BP_IS_EMBEDDED(bp) && BPE_GET_ETYPE(bp) == BP_EMBEDDED_TYPE_DATA) { - zio->io_pipeline = ZIO_INTERLOCK_PIPELINE; + if (((BP_IS_PROTECTED(bp) && !(zio->io_flags & ZIO_FLAG_RAW_ENCRYPT)) || + BP_HAS_INDIRECT_MAC_CKSUM(bp)) && + zio->io_child_type == ZIO_CHILD_LOGICAL) { + zio_push_transform(zio, abd_alloc_sametype(zio->io_abd, psize), + psize, psize, zio_decrypt); + } + if (BP_IS_EMBEDDED(bp) && BPE_GET_ETYPE(bp) == BP_EMBEDDED_TYPE_DATA) { int psize = BPE_GET_PSIZE(bp); void *data = abd_borrow_buf(zio->io_abd, psize); + + zio->io_pipeline = ZIO_INTERLOCK_PIPELINE; decode_embedded_bp_compressed(bp, data); abd_return_buf_copy(zio->io_abd, data, psize); } else { @@ -1266,7 +1410,8 @@ zio_write_bp_init(zio_t *zio) ASSERT((zio_checksum_table[zp->zp_checksum].ci_flags & ZCHECKSUM_FLAG_DEDUP) || zp->zp_dedup_verify); - if (BP_GET_CHECKSUM(bp) == zp->zp_checksum) { + if (BP_GET_CHECKSUM(bp) == zp->zp_checksum && + !zp->zp_encrypt) { BP_SET_DEDUP(bp, 1); zio->io_pipeline |= ZIO_STAGE_DDT_WRITE; return (ZIO_PIPELINE_CONTINUE); @@ -1295,8 +1440,6 @@ zio_write_compress(zio_t *zio) uint64_t psize = zio->io_size; int pass = 1; - EQUIV(lsize != psize, (zio->io_flags & ZIO_FLAG_RAW) != 0); - /* * If our children haven't all reached the ready stage, * wait for them and then repeat this pipeline stage. @@ -1347,13 +1490,15 @@ zio_write_compress(zio_t *zio) } /* If it's a compressed write that is not raw, compress the buffer. */ - if (compress != ZIO_COMPRESS_OFF && psize == lsize) { + if (compress != ZIO_COMPRESS_OFF && + !(zio->io_flags & ZIO_FLAG_RAW_COMPRESS)) { void *cbuf = zio_buf_alloc(lsize); psize = zio_compress_data(compress, zio->io_abd, cbuf, lsize); if (psize == 0 || psize == lsize) { compress = ZIO_COMPRESS_OFF; zio_buf_free(cbuf, lsize); - } else if (!zp->zp_dedup && psize <= BPE_PAYLOAD_SIZE && + } else if (!zp->zp_dedup && !zp->zp_encrypt && + psize <= BPE_PAYLOAD_SIZE && zp->zp_level == 0 && !DMU_OT_HAS_FILL(zp->zp_type) && spa_feature_is_enabled(spa, SPA_FEATURE_EMBEDDED_DATA)) { encode_embedded_bp_compressed(bp, @@ -1400,6 +1545,20 @@ zio_write_compress(zio_t *zio) zio->io_bp_override = NULL; *bp = zio->io_bp_orig; zio->io_pipeline = zio->io_orig_pipeline; + + } else if ((zio->io_flags & ZIO_FLAG_RAW_ENCRYPT) != 0 && + zp->zp_type == DMU_OT_DNODE) { + /* + * The DMU actually relies on the zio layer's compression + * to free metadnode blocks that have had all contained + * dnodes freed. As a result, even when doing a raw + * receive, we must check whether the block can be compressed + * to a hole. + */ + psize = zio_compress_data(ZIO_COMPRESS_EMPTY, + zio->io_abd, NULL, lsize); + if (psize == 0) + compress = ZIO_COMPRESS_OFF; } else { ASSERT3U(psize, !=, 0); } @@ -1417,7 +1576,6 @@ zio_write_compress(zio_t *zio) pass >= zfs_sync_pass_rewrite) { VERIFY3U(psize, !=, 0); enum zio_stage gang_stages = zio->io_pipeline & ZIO_GANG_STAGES; - zio->io_pipeline = ZIO_REWRITE_PIPELINE | gang_stages; zio->io_flags |= ZIO_FLAG_IO_REWRITE; } else { @@ -1447,6 +1605,8 @@ zio_write_compress(zio_t *zio) if (zp->zp_dedup) { ASSERT(zio->io_child_type == ZIO_CHILD_LOGICAL); ASSERT(!(zio->io_flags & ZIO_FLAG_IO_REWRITE)); + ASSERT(!zp->zp_encrypt || + DMU_OT_IS_ENCRYPTED(zp->zp_type)); zio->io_pipeline = ZIO_DDT_WRITE_PIPELINE; } if (zp->zp_nopwrite) { @@ -1794,7 +1954,8 @@ zio_suspend(spa_t *spa, zio_t *zio, zio_suspend_reason_t reason) "failure and the failure mode property for this pool " "is set to panic.", spa_name(spa)); - zfs_ereport_post(FM_EREPORT_ZFS_IO_FAILURE, spa, NULL, NULL, 0, 0); + zfs_ereport_post(FM_EREPORT_ZFS_IO_FAILURE, spa, NULL, + NULL, NULL, 0, 0); mutex_enter(&spa->spa_suspend_lock); @@ -2231,6 +2392,13 @@ zio_write_gang_block(zio_t *pio) int error; boolean_t has_data = !(pio->io_flags & ZIO_FLAG_NODATA); + /* + * encrypted blocks need DVA[2] free so encrypted gang headers can't + * have a third copy. + */ + if (gio->io_prop.zp_encrypt && gbh_copies >= SPA_DVAS_PER_BP) + gbh_copies = SPA_DVAS_PER_BP - 1; + int flags = METASLAB_HINTBP_FAVOR | METASLAB_GANG_HEADER; if (pio->io_flags & ZIO_FLAG_IO_ALLOCATING) { ASSERT(pio->io_priority == ZIO_PRIORITY_ASYNC_WRITE); @@ -2309,6 +2477,11 @@ zio_write_gang_block(zio_t *pio) zp.zp_dedup = B_FALSE; zp.zp_dedup_verify = B_FALSE; zp.zp_nopwrite = B_FALSE; + zp.zp_encrypt = gio->io_prop.zp_encrypt; + zp.zp_byteorder = gio->io_prop.zp_byteorder; + bzero(zp.zp_salt, ZIO_DATA_SALT_LEN); + bzero(zp.zp_iv, ZIO_DATA_IV_LEN); + bzero(zp.zp_mac, ZIO_DATA_MAC_LEN); zio_t *cio = zio_write(zio, spa, txg, &gbh->zg_blkptr[g], has_data ? abd_get_offset(pio->io_abd, pio->io_size - @@ -2383,6 +2556,7 @@ zio_nop_write(zio_t *zio) if (BP_IS_HOLE(bp_orig) || !(zio_checksum_table[BP_GET_CHECKSUM(bp)].ci_flags & ZCHECKSUM_FLAG_NOPWRITE) || + BP_IS_ENCRYPTED(bp) || BP_IS_ENCRYPTED(bp_orig) || BP_GET_CHECKSUM(bp) != BP_GET_CHECKSUM(bp_orig) || BP_GET_COMPRESS(bp) != BP_GET_COMPRESS(bp_orig) || BP_GET_DEDUP(bp) != BP_GET_DEDUP(bp_orig) || @@ -2521,7 +2695,7 @@ static boolean_t zio_ddt_collision(zio_t *zio, ddt_t *ddt, ddt_entry_t *dde) { spa_t *spa = zio->io_spa; - boolean_t do_raw = (zio->io_flags & ZIO_FLAG_RAW); + boolean_t do_raw = !!(zio->io_flags & ZIO_FLAG_RAW); /* We should never get a raw, override zio */ ASSERT(!(zio->io_bp_override && do_raw)); @@ -2531,11 +2705,21 @@ zio_ddt_collision(zio_t *zio, ddt_t *ddt, ddt_entry_t *dde) * because when zio->io_bp is an override bp, we will not have * pushed the I/O transforms. That's an important optimization * because otherwise we'd compress/encrypt all dmu_sync() data twice. + * However, we should never get a raw, override zio so in these + * cases we can compare the io_data directly. This is useful because + * it allows us to do dedup verification even if we don't have access + * to the original data (for instance, if the encryption keys aren't + * loaded). */ + for (int p = DDT_PHYS_SINGLE; p <= DDT_PHYS_TRIPLE; p++) { zio_t *lio = dde->dde_lead_zio[p]; - if (lio != NULL) { + if (lio != NULL && do_raw) { + return (lio->io_size != zio->io_size || + abd_cmp(zio->io_abd, lio->io_abd, + zio->io_size) != 0); + } else if (lio != NULL) { return (lio->io_orig_size != zio->io_orig_size || abd_cmp(zio->io_orig_abd, lio->io_orig_abd, zio->io_orig_size) != 0); @@ -2545,7 +2729,36 @@ zio_ddt_collision(zio_t *zio, ddt_t *ddt, ddt_entry_t *dde) for (int p = DDT_PHYS_SINGLE; p <= DDT_PHYS_TRIPLE; p++) { ddt_phys_t *ddp = &dde->dde_phys[p]; - if (ddp->ddp_phys_birth != 0) { + if (ddp->ddp_phys_birth != 0 && do_raw) { + blkptr_t blk = *zio->io_bp; + uint64_t psize; + abd_t *tmpabd; + int error; + + ddt_bp_fill(ddp, &blk, ddp->ddp_phys_birth); + psize = BP_GET_PSIZE(&blk); + + if (psize != zio->io_size) + return (B_TRUE); + + ddt_exit(ddt); + + tmpabd = abd_alloc_for_io(psize, B_TRUE); + + error = zio_wait(zio_read(NULL, spa, &blk, tmpabd, + psize, NULL, NULL, ZIO_PRIORITY_SYNC_READ, + ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE | + ZIO_FLAG_RAW, &zio->io_bookmark)); + + if (error == 0) { + if (abd_cmp(tmpabd, zio->io_abd, psize) != 0) + error = SET_ERROR(ENOENT); + } + + abd_free(tmpabd); + ddt_enter(ddt); + return (error != 0); + } else if (ddp->ddp_phys_birth != 0) { arc_buf_t *abuf = NULL; arc_flags_t aflags = ARC_FLAG_WAIT; int zio_flags = ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE; @@ -2554,6 +2767,9 @@ zio_ddt_collision(zio_t *zio, ddt_t *ddt, ddt_entry_t *dde) ddt_bp_fill(ddp, &blk, ddp->ddp_phys_birth); + if (BP_GET_LSIZE(&blk) != zio->io_orig_size) + return (B_TRUE); + ddt_exit(ddt); /* @@ -2578,10 +2794,9 @@ zio_ddt_collision(zio_t *zio, ddt_t *ddt, ddt_entry_t *dde) zio_flags, &aflags, &zio->io_bookmark); if (error == 0) { - if (arc_buf_size(abuf) != zio->io_orig_size || - abd_cmp_buf(zio->io_orig_abd, abuf->b_data, + if (abd_cmp_buf(zio->io_orig_abd, abuf->b_data, zio->io_orig_size) != 0) - error = SET_ERROR(EEXIST); + error = SET_ERROR(ENOENT); arc_buf_destroy(abuf, &abuf); } @@ -3048,7 +3263,7 @@ zio_dva_unallocate(zio_t *zio, zio_gang_node_t *gn, blkptr_t *bp) * Try to allocate an intent log block. Return 0 on success, errno on failure. */ int -zio_alloc_zil(spa_t *spa, uint64_t objset, uint64_t txg, blkptr_t *new_bp, +zio_alloc_zil(spa_t *spa, objset_t *os, uint64_t txg, blkptr_t *new_bp, blkptr_t *old_bp, uint64_t size, boolean_t *slog) { int error = 1; @@ -3074,14 +3289,15 @@ zio_alloc_zil(spa_t *spa, uint64_t objset, uint64_t txg, blkptr_t *new_bp, */ error = metaslab_alloc(spa, spa_log_class(spa), size, new_bp, 1, txg, old_bp, METASLAB_HINTBP_AVOID, &io_alloc_list, NULL, - cityhash4(0, 0, 0, objset) % spa->spa_alloc_count); + cityhash4(0, 0, 0, + os->os_dsl_dataset->ds_object) % spa->spa_alloc_count); if (error == 0) { *slog = TRUE; } else { error = metaslab_alloc(spa, spa_normal_class(spa), size, new_bp, 1, txg, old_bp, METASLAB_HINTBP_AVOID, - &io_alloc_list, NULL, cityhash4(0, 0, 0, objset) % - spa->spa_alloc_count); + &io_alloc_list, NULL, cityhash4(0, 0, 0, + os->os_dsl_dataset->ds_object) % spa->spa_alloc_count); if (error == 0) *slog = FALSE; } @@ -3098,6 +3314,23 @@ zio_alloc_zil(spa_t *spa, uint64_t objset, uint64_t txg, blkptr_t *new_bp, BP_SET_LEVEL(new_bp, 0); BP_SET_DEDUP(new_bp, 0); BP_SET_BYTEORDER(new_bp, ZFS_HOST_BYTEORDER); + + /* + * encrypted blocks will require an IV and salt. We generate + * these now since we will not be rewriting the bp at + * rewrite time. + */ + if (os->os_encrypted) { + uint8_t iv[ZIO_DATA_IV_LEN]; + uint8_t salt[ZIO_DATA_SALT_LEN]; + + BP_SET_CRYPT(new_bp, B_TRUE); + VERIFY0(spa_crypt_get_salt(spa, + dmu_objset_id(os), salt)); + VERIFY0(zio_crypt_generate_iv(iv)); + + zio_crypt_encode_params_bp(new_bp, salt, iv); + } } else { zfs_dbgmsg("%s: zil block allocation failure: " "size %llu, error %d", spa_name(spa), size, error); @@ -3332,7 +3565,7 @@ zio_change_priority(zio_t *pio, zio_priority_t priority) */ static void zio_vsd_default_cksum_finish(zio_cksum_report_t *zcr, - const void *good_buf) + const abd_t *good_buf) { /* no processing needed */ zfs_ereport_finish_checksum(zcr, good_buf, zcr->zcr_cbdata, B_FALSE); @@ -3342,14 +3575,14 @@ zio_vsd_default_cksum_finish(zio_cksum_report_t *zcr, void zio_vsd_default_cksum_report(zio_t *zio, zio_cksum_report_t *zcr, void *ignored) { - void *buf = zio_buf_alloc(zio->io_size); + void *abd = abd_alloc_sametype(zio->io_abd, zio->io_size); - abd_copy_to_buf(buf, zio->io_abd, zio->io_size); + abd_copy(abd, zio->io_abd, zio->io_size); zcr->zcr_cbinfo = zio->io_size; - zcr->zcr_cbdata = buf; + zcr->zcr_cbdata = abd; zcr->zcr_finish = zio_vsd_default_cksum_finish; - zcr->zcr_free = zio_buf_free; + zcr->zcr_free = zio_abd_free; } static int @@ -3460,6 +3693,164 @@ zio_vdev_io_bypass(zio_t *zio) /* * ========================================================================== + * Encrypt and store encryption parameters + * ========================================================================== + */ + + +/* + * This function is used for ZIO_STAGE_ENCRYPT. It is responsible for + * managing the storage of encryption parameters and passing them to the + * lower-level encryption functions. + */ +static int +zio_encrypt(zio_t *zio) +{ + zio_prop_t *zp = &zio->io_prop; + spa_t *spa = zio->io_spa; + blkptr_t *bp = zio->io_bp; + uint64_t psize = BP_GET_PSIZE(bp); + uint64_t dsobj = zio->io_bookmark.zb_objset; + dmu_object_type_t ot = BP_GET_TYPE(bp); + void *enc_buf = NULL; + abd_t *eabd = NULL; + uint8_t salt[ZIO_DATA_SALT_LEN]; + uint8_t iv[ZIO_DATA_IV_LEN]; + uint8_t mac[ZIO_DATA_MAC_LEN]; + boolean_t no_crypt = B_FALSE; + + /* the root zio already encrypted the data */ + if (zio->io_child_type == ZIO_CHILD_GANG) + return (ZIO_PIPELINE_CONTINUE); + + /* only ZIL blocks are re-encrypted on rewrite */ + if (!IO_IS_ALLOCATING(zio) && ot != DMU_OT_INTENT_LOG) + return (ZIO_PIPELINE_CONTINUE); + + if (!(zp->zp_encrypt || BP_IS_ENCRYPTED(bp))) { + BP_SET_CRYPT(bp, B_FALSE); + return (ZIO_PIPELINE_CONTINUE); + } + + /* if we are doing raw encryption set the provided encryption params */ + if (zio->io_flags & ZIO_FLAG_RAW_ENCRYPT) { + ASSERT0(BP_GET_LEVEL(bp)); + BP_SET_CRYPT(bp, B_TRUE); + BP_SET_BYTEORDER(bp, zp->zp_byteorder); + if (ot != DMU_OT_OBJSET) + zio_crypt_encode_mac_bp(bp, zp->zp_mac); + + /* dnode blocks must be written out in the provided byteorder */ + if (zp->zp_byteorder != ZFS_HOST_BYTEORDER && + ot == DMU_OT_DNODE) { + void *bswap_buf = zio_buf_alloc(psize); + abd_t *babd = abd_get_from_buf(bswap_buf, psize); + + ASSERT3U(BP_GET_COMPRESS(bp), ==, ZIO_COMPRESS_OFF); + abd_copy_to_buf(bswap_buf, zio->io_abd, psize); + dmu_ot_byteswap[DMU_OT_BYTESWAP(ot)].ob_func(bswap_buf, + psize); + + abd_take_ownership_of_buf(babd, B_TRUE); + zio_push_transform(zio, babd, psize, psize, NULL); + } + + if (DMU_OT_IS_ENCRYPTED(ot)) + zio_crypt_encode_params_bp(bp, zp->zp_salt, zp->zp_iv); + return (ZIO_PIPELINE_CONTINUE); + } + + /* indirect blocks only maintain a cksum of the lower level MACs */ + if (BP_GET_LEVEL(bp) > 0) { + BP_SET_CRYPT(bp, B_TRUE); + VERIFY0(zio_crypt_do_indirect_mac_checksum_abd(B_TRUE, + zio->io_orig_abd, BP_GET_LSIZE(bp), BP_SHOULD_BYTESWAP(bp), + mac)); + zio_crypt_encode_mac_bp(bp, mac); + return (ZIO_PIPELINE_CONTINUE); + } + + /* + * Objset blocks are a special case since they have 2 256-bit MACs + * embedded within them. + */ + if (ot == DMU_OT_OBJSET) { + ASSERT0(DMU_OT_IS_ENCRYPTED(ot)); + ASSERT3U(BP_GET_COMPRESS(bp), ==, ZIO_COMPRESS_OFF); + BP_SET_CRYPT(bp, B_TRUE); + VERIFY0(spa_do_crypt_objset_mac_abd(B_TRUE, spa, dsobj, + zio->io_abd, psize, BP_SHOULD_BYTESWAP(bp))); + return (ZIO_PIPELINE_CONTINUE); + } + + /* unencrypted object types are only authenticated with a MAC */ + if (!DMU_OT_IS_ENCRYPTED(ot)) { + BP_SET_CRYPT(bp, B_TRUE); + VERIFY0(spa_do_crypt_mac_abd(B_TRUE, spa, dsobj, + zio->io_abd, psize, mac)); + zio_crypt_encode_mac_bp(bp, mac); + return (ZIO_PIPELINE_CONTINUE); + } + + /* + * Later passes of sync-to-convergence may decide to rewrite data + * in place to avoid more disk reallocations. This presents a problem + * for encryption because this consitutes rewriting the new data with + * the same encryption key and IV. However, this only applies to blocks + * in the MOS (particularly the spacemaps) and we do not encrypt the + * MOS. We assert that the zio is allocating or an intent log write + * to enforce this. + */ + ASSERT(IO_IS_ALLOCATING(zio) || ot == DMU_OT_INTENT_LOG); + ASSERT(BP_GET_LEVEL(bp) == 0 || ot == DMU_OT_INTENT_LOG); + ASSERT(spa_feature_is_active(spa, SPA_FEATURE_ENCRYPTION)); + ASSERT3U(psize, !=, 0); + + enc_buf = zio_buf_alloc(psize); + eabd = abd_get_from_buf(enc_buf, psize); + abd_take_ownership_of_buf(eabd, B_TRUE); + + /* + * For an explanation of what encryption parameters are stored + * where, see the block comment in zio_crypt.c. + */ + if (ot == DMU_OT_INTENT_LOG) { + zio_crypt_decode_params_bp(bp, salt, iv); + } else { + BP_SET_CRYPT(bp, B_TRUE); + } + + /* Perform the encryption. This should not fail */ + VERIFY0(spa_do_crypt_abd(B_TRUE, spa, &zio->io_bookmark, + BP_GET_TYPE(bp), BP_GET_DEDUP(bp), BP_SHOULD_BYTESWAP(bp), + salt, iv, mac, psize, zio->io_abd, eabd, &no_crypt)); + + /* encode encryption metadata into the bp */ + if (ot == DMU_OT_INTENT_LOG) { + /* + * ZIL blocks store the MAC in the embedded checksum, so the + * transform must always be applied. + */ + zio_crypt_encode_mac_zil(enc_buf, mac); + zio_push_transform(zio, eabd, psize, psize, NULL); + } else { + BP_SET_CRYPT(bp, B_TRUE); + zio_crypt_encode_params_bp(bp, salt, iv); + zio_crypt_encode_mac_bp(bp, mac); + + if (no_crypt) { + ASSERT3U(ot, ==, DMU_OT_DNODE); + abd_free(eabd); + } else { + zio_push_transform(zio, eabd, psize, psize, NULL); + } + } + + return (ZIO_PIPELINE_CONTINUE); +} + +/* + * ========================================================================== * Generate and verify checksums * ========================================================================== */ @@ -3519,8 +3910,8 @@ zio_checksum_verify(zio_t *zio) if (error == ECKSUM && !(zio->io_flags & ZIO_FLAG_SPECULATIVE)) { zfs_ereport_start_checksum(zio->io_spa, - zio->io_vd, zio, zio->io_offset, - zio->io_size, NULL, &info); + zio->io_vd, &zio->io_bookmark, zio, + zio->io_offset, zio->io_size, NULL, &info); } } @@ -3765,7 +4156,6 @@ zio_done(zio_t *zio) if (zio->io_type == ZIO_TYPE_WRITE && !BP_IS_HOLE(bp) && zio->io_bp_override == NULL && !(zio->io_flags & ZIO_FLAG_IO_REPAIR)) { - ASSERT(!BP_SHOULD_BYTESWAP(bp)); ASSERT3U(zio->io_prop.zp_copies, <=, BP_GET_NDVAS(bp)); ASSERT(BP_COUNT_GANG(bp) == 0 || (BP_COUNT_GANG(bp) == BP_GET_NDVAS(bp))); @@ -3790,26 +4180,19 @@ zio_done(zio_t *zio) zio_cksum_report_t *zcr = zio->io_cksum_report; uint64_t align = zcr->zcr_align; uint64_t asize = P2ROUNDUP(psize, align); - char *abuf = NULL; abd_t *adata = zio->io_abd; if (asize != psize) { - adata = abd_alloc_linear(asize, B_TRUE); + adata = abd_alloc(asize, B_TRUE); abd_copy(adata, zio->io_abd, psize); abd_zero_off(adata, psize, asize - psize); } - if (adata != NULL) - abuf = abd_borrow_buf_copy(adata, asize); - zio->io_cksum_report = zcr->zcr_next; zcr->zcr_next = NULL; - zcr->zcr_finish(zcr, abuf); + zcr->zcr_finish(zcr, adata); zfs_ereport_free_checksum(zcr); - if (adata != NULL) - abd_return_buf(adata, abuf, asize); - if (asize != psize) abd_free(adata); } @@ -3827,7 +4210,8 @@ zio_done(zio_t *zio) * device is currently unavailable. */ if (zio->io_error != ECKSUM && vd != NULL && !vdev_is_dead(vd)) - zfs_ereport_post(FM_EREPORT_ZFS_IO, spa, vd, zio, 0, 0); + zfs_ereport_post(FM_EREPORT_ZFS_IO, spa, vd, + &zio->io_bookmark, zio, 0, 0); if ((zio->io_error == EIO || !(zio->io_flags & (ZIO_FLAG_SPECULATIVE | ZIO_FLAG_DONT_PROPAGATE))) && @@ -3836,9 +4220,9 @@ zio_done(zio_t *zio) * For logical I/O requests, tell the SPA to log the * error and generate a logical data ereport. */ - spa_log_error(spa, zio); - zfs_ereport_post(FM_EREPORT_ZFS_DATA, spa, NULL, zio, - 0, 0); + spa_log_error(spa, &zio->io_bookmark); + zfs_ereport_post(FM_EREPORT_ZFS_DATA, spa, NULL, + &zio->io_bookmark, zio, 0, 0); } } @@ -4029,6 +4413,7 @@ static zio_pipe_stage_t *zio_pipeline[] = { zio_free_bp_init, zio_issue_async, zio_write_compress, + zio_encrypt, zio_checksum_generate, zio_nop_write, zio_ddt_read_start, |
