diff options
200 files changed, 19953 insertions, 1513 deletions
diff --git a/usr/src/cmd/availdevs/availdevs.c b/usr/src/cmd/availdevs/availdevs.c index 6c39f394d1..ed0e2b2eed 100644 --- a/usr/src/cmd/availdevs/availdevs.c +++ b/usr/src/cmd/availdevs/availdevs.c @@ -169,7 +169,7 @@ add_pool_to_xml(nvlist_t *config, void *data) (xmlChar *)zjni_pool_state_to_str(state)); (void) xmlSetProp(pool, (xmlChar *)ATTR_POOL_STATUS, (xmlChar *) - zjni_pool_status_to_str(zpool_import_status(config, &c))); + zjni_pool_status_to_str(zpool_import_status(config, &c, NULL))); return (0); } diff --git a/usr/src/cmd/mdb/common/modules/zfs/zfs.c b/usr/src/cmd/mdb/common/modules/zfs/zfs.c index 271ce782ba..89b3a4873e 100644 --- a/usr/src/cmd/mdb/common/modules/zfs/zfs.c +++ b/usr/src/cmd/mdb/common/modules/zfs/zfs.c @@ -27,6 +27,17 @@ /* Portions Copyright 2010 Robert Milkowski */ +/* + * ZFS_MDB lets dmu.h know that we don't have dmu_ot, and we will define our + * own macros to access the target's dmu_ot. Therefore it must be defined + * before including any ZFS headers. Note that we don't define + * DMU_OT_IS_ENCRYPTED_IMPL() or DMU_OT_BYTESWAP_IMPL(), therefore using them + * will result in a compilation error. If they are needed in the future, we + * can implement them similarly to mdb_dmu_ot_is_encrypted_impl(). + */ +#define ZFS_MDB +#define DMU_OT_IS_ENCRYPTED_IMPL(ot) mdb_dmu_ot_is_encrypted_impl(ot) + #include <mdb/mdb_ctf.h> #include <sys/zfs_context.h> #include <sys/mdb_modapi.h> @@ -567,6 +578,30 @@ dva(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) return (DCMD_OK); } +typedef struct mdb_dmu_object_type_info { + boolean_t ot_encrypt; +} mdb_dmu_object_type_info_t; + +static boolean_t +mdb_dmu_ot_is_encrypted_impl(dmu_object_type_t ot) +{ + mdb_dmu_object_type_info_t mdoti; + GElf_Sym sym; + size_t sz = mdb_ctf_sizeof_by_name("dmu_object_type_info_t"); + + if (mdb_lookup_by_obj(ZFS_OBJ_NAME, "dmu_ot", &sym)) { + mdb_warn("failed to find " ZFS_OBJ_NAME "`dmu_ot"); + return (B_FALSE); + } + + if (mdb_ctf_vread(&mdoti, "dmu_object_type_info_t", + "mdb_dmu_object_type_info_t", sym.st_value + sz * ot, 0) != 0) { + return (B_FALSE); + } + + return (mdoti.ot_encrypt); +} + /* ARGSUSED */ static int blkptr(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) @@ -2839,13 +2874,6 @@ zfs_blkstats(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) dmu_object_type_t t; zfs_blkstat_t *tzb; uint64_t ditto; - dmu_object_type_info_t dmu_ot[DMU_OT_NUMTYPES + 10]; - /* +10 in case it grew */ - - if (mdb_readvar(&dmu_ot, "dmu_ot") == -1) { - mdb_warn("failed to read 'dmu_ot'"); - return (DCMD_ERR); - } if (mdb_getopts(argc, argv, 'v', MDB_OPT_SETBITS, TRUE, &verbose, @@ -2894,8 +2922,8 @@ zfs_blkstats(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) strcpy(typename, "other"); else if (t == DMU_OT_TOTAL) strcpy(typename, "Total"); - else if (mdb_readstr(typename, sizeof (typename), - (uintptr_t)dmu_ot[t].ot_name) == -1) { + else if (enum_lookup("enum dmu_object_type", + t, "DMU_OT_", sizeof (typename), typename) == -1) { mdb_warn("failed to read type name"); return (DCMD_ERR); } diff --git a/usr/src/cmd/mdb/intel/amd64/libzpool/Makefile b/usr/src/cmd/mdb/intel/amd64/libzpool/Makefile index b0ebe4c7c8..b5bbd8467b 100644 --- a/usr/src/cmd/mdb/intel/amd64/libzpool/Makefile +++ b/usr/src/cmd/mdb/intel/amd64/libzpool/Makefile @@ -42,7 +42,8 @@ GENUNIX_DIR = ../../../common/modules/genunix CPPFLAGS.first = -I$(SRC)/lib/libfakekernel/common -D_FAKE_KERNEL CPPFLAGS += -I../../../../../lib/libzpool/common \ - -I../../../../../uts/common/fs/zfs + -I../../../../../uts/common/fs/zfs \ + -I../../../../../common/zfs CSTD= $(CSTD_GNU99) C99LMODE= -Xc99=%all diff --git a/usr/src/cmd/mdb/intel/amd64/zfs/Makefile b/usr/src/cmd/mdb/intel/amd64/zfs/Makefile index 8224d7593d..4ffa954410 100644 --- a/usr/src/cmd/mdb/intel/amd64/zfs/Makefile +++ b/usr/src/cmd/mdb/intel/amd64/zfs/Makefile @@ -39,6 +39,7 @@ include ../../../common/modules/zfs/Makefile.zfs CPPFLAGS += -I../../../../../uts/common/fs/zfs CPPFLAGS += -I../../../../../uts/common/fs/zfs/lua +CPPFLAGS += -I../../../../../common/zfs CSTD= $(CSTD_GNU99) C99LMODE= -Xc99=%all diff --git a/usr/src/cmd/mdb/intel/ia32/libzpool/Makefile b/usr/src/cmd/mdb/intel/ia32/libzpool/Makefile index 0ed9a6d470..c19d9c56a5 100644 --- a/usr/src/cmd/mdb/intel/ia32/libzpool/Makefile +++ b/usr/src/cmd/mdb/intel/ia32/libzpool/Makefile @@ -43,7 +43,8 @@ GENUNIX_DIR = ../../../common/modules/genunix CPPFLAGS.first = -I$(SRC)/lib/libfakekernel/common -D_FAKE_KERNEL CPPFLAGS += -I../../../../../lib/libzpool/common \ -I../../../../../uts/common/fs/zfs \ - -I../../../../../uts/common/fs/zfs/lua + -I../../../../../uts/common/fs/zfs/lua \ + -I../../../../../common/zfs CSTD= $(CSTD_GNU99) C99LMODE= -Xc99=%all diff --git a/usr/src/cmd/mdb/sparc/v7/libzpool/Makefile b/usr/src/cmd/mdb/sparc/v7/libzpool/Makefile index f8f2b3fae3..5b24c538e4 100644 --- a/usr/src/cmd/mdb/sparc/v7/libzpool/Makefile +++ b/usr/src/cmd/mdb/sparc/v7/libzpool/Makefile @@ -41,7 +41,8 @@ GENUNIX_DIR = ../../../common/modules/genunix CPPFLAGS += -I../../../../../lib/libzpool/common \ -I../../../../../uts/common/fs/zfs \ - -I../../../../../uts/common/fs/zfs/lua + -I../../../../../uts/common/fs/zfs/lua \ + -I../../../../../common/zfs CSTD= $(CSTD_GNU99) C99LMODE= -Xc99=%all diff --git a/usr/src/cmd/mdb/sparc/v9/libzpool/Makefile b/usr/src/cmd/mdb/sparc/v9/libzpool/Makefile index 6b50bacc96..f512dc4c10 100644 --- a/usr/src/cmd/mdb/sparc/v9/libzpool/Makefile +++ b/usr/src/cmd/mdb/sparc/v9/libzpool/Makefile @@ -42,7 +42,8 @@ GENUNIX_DIR = ../../../common/modules/genunix CPPFLAGS += -I../../../../../lib/libzpool/common \ -I../../../../../uts/common/fs/zfs \ - -I../../../../../uts/common/fs/zfs/lua + -I../../../../../uts/common/fs/zfs/lua \ + -I../../../../../common/zfs CSTD= $(CSTD_GNU99) C99LMODE= -Xc99=%all diff --git a/usr/src/cmd/mdb/sparc/v9/zfs/Makefile b/usr/src/cmd/mdb/sparc/v9/zfs/Makefile index 116cc938d8..e2407effa9 100644 --- a/usr/src/cmd/mdb/sparc/v9/zfs/Makefile +++ b/usr/src/cmd/mdb/sparc/v9/zfs/Makefile @@ -39,6 +39,7 @@ include ../../../common/modules/zfs/Makefile.zfs CPPFLAGS += -I../../../../../uts/common/fs/zfs CPPFLAGS += -I../../../../../uts/common/fs/zfs/lua +CPPFLAGS += -I../../../../../common/zfs CSTD= $(CSTD_GNU99) C99LMODE= -Xc99=%all diff --git a/usr/src/cmd/truss/codes.c b/usr/src/cmd/truss/codes.c index ac49896686..888e6c9caf 100644 --- a/usr/src/cmd/truss/codes.c +++ b/usr/src/cmd/truss/codes.c @@ -1294,6 +1294,12 @@ const struct ioc { "zfs_cmd_t" }, { (uint_t)ZFS_IOC_POOL_SYNC, "ZFS_IOC_POOL_SYNC", "zfs_cmd_t" }, + { (uint_t)ZFS_IOC_LOAD_KEY, "ZFS_IOC_LOAD_KEY", + "zfs_cmd_t" }, + { (uint_t)ZFS_IOC_UNLOAD_KEY, "ZFS_IOC_UNLOAD_KEY", + "zfs_cmd_t" }, + { (uint_t)ZFS_IOC_CHANGE_KEY, "ZFS_IOC_CHANGE_KEY", + "zfs_cmd_t" }, /* kssl ioctls */ { (uint_t)KSSL_ADD_ENTRY, "KSSL_ADD_ENTRY", diff --git a/usr/src/cmd/zdb/Makefile.com b/usr/src/cmd/zdb/Makefile.com index be85c6dbb5..1834e9b2f8 100644 --- a/usr/src/cmd/zdb/Makefile.com +++ b/usr/src/cmd/zdb/Makefile.com @@ -65,6 +65,9 @@ LINTFLAGS64 += -xerroff=E_NAME_DEF_NOT_USED2 LINTFLAGS += -erroff=E_STATIC_UNUSED LINTFLAGS64 += -erroff=E_STATIC_UNUSED +LINTFLAGS += -erroff=E_BAD_PTR_CAST_ALIGN +LINTFLAGS64 += -erroff=E_BAD_PTR_CAST_ALIGN + .KEEP_STATE: all: $(PROG) diff --git a/usr/src/cmd/zdb/zdb.c b/usr/src/cmd/zdb/zdb.c index 16c8c60df1..26ed6dacf8 100644 --- a/usr/src/cmd/zdb/zdb.c +++ b/usr/src/cmd/zdb/zdb.c @@ -65,6 +65,7 @@ #include <sys/abd.h> #include <sys/blkptr.h> #include <sys/dsl_scan.h> +#include <sys/dsl_crypt.h> #include <zfs_comutil.h> #include <libcmdutils.h> #undef verify @@ -1388,7 +1389,7 @@ static void snprintf_blkptr_compact(char *blkbuf, size_t buflen, const blkptr_t *bp) { const dva_t *dva = bp->blk_dva; - int ndvas = dump_opt['d'] > 5 ? BP_GET_NDVAS(bp) : 1; + unsigned int ndvas = dump_opt['d'] > 5 ? BP_GET_NDVAS(bp) : 1; if (dump_opt['b'] >= 6) { snprintf_blkptr(blkbuf, buflen, bp); @@ -1406,7 +1407,7 @@ snprintf_blkptr_compact(char *blkbuf, size_t buflen, const blkptr_t *bp) } blkbuf[0] = '\0'; - for (int i = 0; i < ndvas; i++) + for (unsigned int i = 0; i < ndvas; i++) (void) snprintf(blkbuf + strlen(blkbuf), buflen - strlen(blkbuf), "%llu:%llx:%llx ", (u_longlong_t)DVA_GET_VDEV(&dva[i]), @@ -1871,14 +1872,14 @@ open_objset(const char *path, dmu_objset_type_t type, void *tag, objset_t **osp) uint64_t version = 0; VERIFY3P(sa_os, ==, NULL); - err = dmu_objset_own(path, type, B_TRUE, tag, osp); + err = dmu_objset_own(path, type, B_TRUE, B_FALSE, tag, osp); if (err != 0) { (void) fprintf(stderr, "failed to own dataset '%s': %s\n", path, strerror(err)); return (err); } - if (dmu_objset_type(*osp) == DMU_OST_ZFS) { + if (dmu_objset_type(*osp) == DMU_OST_ZFS && !(*osp)->os_encrypted) { (void) zap_lookup(*osp, MASTER_NODE_OBJ, ZPL_VERSION_STR, 8, 1, &version); if (version >= ZPL_VERSION_SA) { @@ -1890,7 +1891,7 @@ open_objset(const char *path, dmu_objset_type_t type, void *tag, objset_t **osp) if (err != 0) { (void) fprintf(stderr, "sa_setup failed: %s\n", strerror(err)); - dmu_objset_disown(*osp, tag); + dmu_objset_disown(*osp, B_FALSE, tag); *osp = NULL; } } @@ -1905,7 +1906,7 @@ close_objset(objset_t *os, void *tag) VERIFY3P(os, ==, sa_os); if (os->os_sa != NULL) sa_tear_down(os); - dmu_objset_disown(os, tag); + dmu_objset_disown(os, B_FALSE, tag); sa_attr_table = NULL; sa_os = NULL; } @@ -2061,6 +2062,7 @@ dump_dmu_objset(objset_t *os, uint64_t object, void *data, size_t size) { } + static object_viewer_t *object_viewer[DMU_OT_NUMTYPES + 1] = { dump_none, /* unallocated */ dump_zap, /* object directory */ @@ -2126,6 +2128,7 @@ dump_object(objset_t *os, uint64_t object, int verbosity, int *print_header, dmu_buf_t *db = NULL; dmu_object_info_t doi; dnode_t *dn; + boolean_t dnode_held = B_FALSE; void *bonus = NULL; size_t bsize = 0; char iblk[32], dblk[32], lsize[32], asize[32], fill[32], dnsize[32]; @@ -2149,16 +2152,33 @@ dump_object(objset_t *os, uint64_t object, int verbosity, int *print_header, if (object == 0) { dn = DMU_META_DNODE(os); + dmu_object_info_from_dnode(dn, &doi); } else { - error = dmu_bonus_hold(os, object, FTAG, &db); + /* + * Encrypted datasets will have sensitive bonus buffers + * encrypted. Therefore we cannot hold the bonus buffer and + * must hold the dnode itself instead. + */ + error = dmu_object_info(os, object, &doi); if (error) - fatal("dmu_bonus_hold(%llu) failed, errno %u", - object, error); - bonus = db->db_data; - bsize = db->db_size; - dn = DB_DNODE((dmu_buf_impl_t *)db); + fatal("dmu_object_info() failed, errno %u", error); + + if (os->os_encrypted && + DMU_OT_IS_ENCRYPTED(doi.doi_bonus_type)) { + error = dnode_hold(os, object, FTAG, &dn); + if (error) + fatal("dnode_hold() failed, errno %u", error); + dnode_held = B_TRUE; + } else { + error = dmu_bonus_hold(os, object, FTAG, &db); + if (error) + fatal("dmu_bonus_hold(%llu) failed, errno %u", + object, error); + bonus = db->db_data; + bsize = db->db_size; + dn = DB_DNODE((dmu_buf_impl_t *)db); + } } - dmu_object_info_from_dnode(dn, &doi); if (dnode_slots_used != NULL) *dnode_slots_used = doi.doi_dnodesize / DNODE_MIN_SIZE; @@ -2207,9 +2227,20 @@ dump_object(objset_t *os, uint64_t object, int verbosity, int *print_header, (void) printf("\tdnode maxblkid: %llu\n", (longlong_t)dn->dn_phys->dn_maxblkid); - object_viewer[ZDB_OT_TYPE(doi.doi_bonus_type)](os, object, - bonus, bsize); - object_viewer[ZDB_OT_TYPE(doi.doi_type)](os, object, NULL, 0); + if (!dnode_held) { + object_viewer[ZDB_OT_TYPE(doi.doi_bonus_type)](os, + object, bonus, bsize); + } else { + (void) printf("\t\t(bonus encrypted)\n"); + } + + if (!os->os_encrypted || !DMU_OT_IS_ENCRYPTED(doi.doi_type)) { + object_viewer[ZDB_OT_TYPE(doi.doi_type)](os, object, + NULL, 0); + } else { + (void) printf("\t\t(object encrypted)\n"); + } + *print_header = 1; } @@ -2253,6 +2284,8 @@ dump_object(objset_t *os, uint64_t object, int verbosity, int *print_header, if (db != NULL) dmu_buf_rele(db, FTAG); + if (dnode_held) + dnode_rele(dn, FTAG); } static void @@ -2631,7 +2664,7 @@ dump_path(char *ds, char *path) if (err != 0) { (void) fprintf(stderr, "can't lookup root znode: %s\n", strerror(err)); - dmu_objset_disown(os, FTAG); + dmu_objset_disown(os, B_FALSE, FTAG); return (EINVAL); } @@ -3729,7 +3762,8 @@ dump_block_stats(spa_t *spa) zdb_cb_t zcb; zdb_blkstats_t *zb, *tzb; uint64_t norm_alloc, norm_space, total_alloc, total_found; - int flags = TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA | TRAVERSE_HARD; + int flags = TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA | + TRAVERSE_NO_DECRYPT | TRAVERSE_HARD; boolean_t leaks = B_FALSE; int err; @@ -4106,8 +4140,8 @@ dump_simulated_ddt(spa_t *spa) spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); - (void) traverse_pool(spa, 0, TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA, - zdb_ddt_add_cb, &t); + (void) traverse_pool(spa, 0, TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA | + TRAVERSE_NO_DECRYPT, zdb_ddt_add_cb, &t); spa_config_exit(spa, SCL_CONFIG, FTAG); diff --git a/usr/src/cmd/zdb/zdb_il.c b/usr/src/cmd/zdb/zdb_il.c index 9daf9a7000..7439cdbae9 100644 --- a/usr/src/cmd/zdb/zdb_il.c +++ b/usr/src/cmd/zdb/zdb_il.c @@ -325,8 +325,13 @@ print_log_record(zilog_t *zilog, lr_t *lr, void *arg, uint64_t claim_txg) (u_longlong_t)lr->lrc_txg, (u_longlong_t)lr->lrc_seq); - if (txtype && verbose >= 3) - zil_rec_info[txtype].zri_print(zilog, txtype, lr); + if (txtype && verbose >= 3) { + if (!zilog->zl_os->os_encrypted) { + zil_rec_info[txtype].zri_print(zilog, txtype, lr); + } else { + (void) printf("%s(encrypted)\n", tab_prefix); + } + } zil_rec_info[txtype].zri_count++; zil_rec_info[0].zri_count++; @@ -418,7 +423,7 @@ dump_intent_log(zilog_t *zilog) if (verbose >= 2) { (void) printf("\n"); (void) zil_parse(zilog, print_log_block, print_log_record, NULL, - zh->zh_claim_txg); + zh->zh_claim_txg, B_FALSE); print_log_stats(verbose); } } diff --git a/usr/src/cmd/zfs/zfs_main.c b/usr/src/cmd/zfs/zfs_main.c index 283f3ff044..6265d4ad4e 100644 --- a/usr/src/cmd/zfs/zfs_main.c +++ b/usr/src/cmd/zfs/zfs_main.c @@ -111,6 +111,9 @@ static int zfs_do_diff(int argc, char **argv); static int zfs_do_bookmark(int argc, char **argv); static int zfs_do_remap(int argc, char **argv); static int zfs_do_channel_program(int argc, char **argv); +static int zfs_do_load_key(int argc, char **argv); +static int zfs_do_unload_key(int argc, char **argv); +static int zfs_do_change_key(int argc, char **argv); /* * Enable a reasonable set of defaults for libumem debugging on DEBUG builds. @@ -160,6 +163,9 @@ typedef enum { HELP_REMAP, HELP_BOOKMARK, HELP_CHANNEL_PROGRAM, + HELP_LOAD_KEY, + HELP_UNLOAD_KEY, + HELP_CHANGE_KEY, } zfs_help_t; typedef struct zfs_command { @@ -215,6 +221,9 @@ static zfs_command_t command_table[] = { { "release", zfs_do_release, HELP_RELEASE }, { "diff", zfs_do_diff, HELP_DIFF }, { "remap", zfs_do_remap, HELP_REMAP }, + { "load-key", zfs_do_load_key, HELP_LOAD_KEY }, + { "unload-key", zfs_do_unload_key, HELP_UNLOAD_KEY }, + { "change-key", zfs_do_change_key, HELP_CHANGE_KEY }, }; #define NCOMMAND (sizeof (command_table) / sizeof (command_table[0])) @@ -256,7 +265,7 @@ get_usage(zfs_help_t idx) "[filesystem|volume|snapshot] ...\n")); case HELP_MOUNT: return (gettext("\tmount\n" - "\tmount [-vO] [-o opts] <-a | filesystem>\n")); + "\tmount [-lvO] [-o opts] <-a | filesystem>\n")); case HELP_PROMOTE: return (gettext("\tpromote <clone-filesystem>\n")); case HELP_RECEIVE: @@ -273,16 +282,16 @@ get_usage(zfs_help_t idx) case HELP_ROLLBACK: return (gettext("\trollback [-rRf] <snapshot>\n")); case HELP_SEND: - return (gettext("\tsend [-DnPpRvLec] [-[iI] snapshot] " + return (gettext("\tsend [-DnPpRvLecr] [-[iI] snapshot] " "<snapshot>\n" - "\tsend [-Le] [-i snapshot|bookmark] " + "\tsend [-Lecr] [-i snapshot|bookmark] " "<filesystem|volume|snapshot>\n" "\tsend [-nvPe] -t <receive_resume_token>\n")); case HELP_SET: return (gettext("\tset <property=value> ... " "<filesystem|volume|snapshot> ...\n")); case HELP_SHARE: - return (gettext("\tshare <-a | filesystem>\n")); + return (gettext("\tshare [-l] <-a | filesystem>\n")); case HELP_SNAPSHOT: return (gettext("\tsnapshot [-r] [-o property=value] ... " "<filesystem|volume>@<snap> ...\n")); @@ -339,6 +348,17 @@ get_usage(zfs_help_t idx) return (gettext("\tprogram [-jn] [-t <instruction limit>] " "[-m <memory limit (b)>] <pool> <program file> " "[lua args...]\n")); + case HELP_LOAD_KEY: + return (gettext("\tload-key [-rn] [-L <keylocation>] " + "<-a | filesystem|volume>\n")); + case HELP_UNLOAD_KEY: + return (gettext("\tunload-key [-r] " + "<-a | filesystem|volume>\n")); + case HELP_CHANGE_KEY: + return (gettext("\tchange-key [-l] [-o keyformat=<value>]\n" + "\t [-o keylocation=<value>] [-o pbkfd2iters=<value>]\n" + "\t <filesystem|volume>\n" + "\tchange-key -i [-l] <filesystem|volume>\n")); } abort(); @@ -872,7 +892,7 @@ zfs_do_create(int argc, char **argv) (void) snprintf(msg, sizeof (msg), gettext("cannot create '%s'"), argv[0]); if (props && (real_props = zfs_valid_proplist(g_zfs, type, - props, 0, NULL, zpool_handle, msg)) == NULL) { + props, 0, NULL, zpool_handle, B_TRUE, msg)) == NULL) { zpool_close(zpool_handle); goto error; } @@ -3745,11 +3765,12 @@ zfs_do_send(int argc, char **argv) {"embed", no_argument, NULL, 'e'}, {"resume", required_argument, NULL, 't'}, {"compressed", no_argument, NULL, 'c'}, + {"raw", no_argument, NULL, 'w'}, {0, 0, 0, 0} }; /* check options */ - while ((c = getopt_long(argc, argv, ":i:I:RbDpvnPLet:c", long_options, + while ((c = getopt_long(argc, argv, ":i:I:RbDpvnPLet:cw", long_options, NULL)) != -1) { switch (c) { case 'i': @@ -3797,6 +3818,12 @@ zfs_do_send(int argc, char **argv) case 'c': flags.compress = B_TRUE; break; + case 'w': + flags.raw = B_TRUE; + flags.compress = B_TRUE; + flags.embed_data = B_TRUE; + flags.largeblock = B_TRUE; + break; case ':': /* * If a parameter was not passed, optopt contains the @@ -3904,6 +3931,8 @@ zfs_do_send(int argc, char **argv) lzc_flags |= LZC_SEND_FLAG_EMBED_DATA; if (flags.compress) lzc_flags |= LZC_SEND_FLAG_COMPRESS; + if (flags.raw) + lzc_flags |= LZC_SEND_FLAG_RAW; if (fromname != NULL && (fromname[0] == '#' || fromname[0] == '@')) { @@ -4140,6 +4169,8 @@ zfs_do_receive(int argc, char **argv) #define ZFS_DELEG_PERM_DIFF "diff" #define ZFS_DELEG_PERM_BOOKMARK "bookmark" #define ZFS_DELEG_PERM_REMAP "remap" +#define ZFS_DELEG_PERM_LOAD_KEY "load-key" +#define ZFS_DELEG_PERM_CHANGE_KEY "change-key" #define ZFS_NUM_DELEG_NOTES ZFS_DELEG_NOTE_NONE @@ -4161,6 +4192,8 @@ static zfs_deleg_perm_tab_t zfs_deleg_perm_tbl[] = { { ZFS_DELEG_PERM_SNAPSHOT, ZFS_DELEG_NOTE_SNAPSHOT }, { ZFS_DELEG_PERM_BOOKMARK, ZFS_DELEG_NOTE_BOOKMARK }, { ZFS_DELEG_PERM_REMAP, ZFS_DELEG_NOTE_REMAP }, + { ZFS_DELEG_PERM_LOAD_KEY, ZFS_DELEG_NOTE_LOAD_KEY }, + { ZFS_DELEG_PERM_CHANGE_KEY, ZFS_DELEG_NOTE_CHANGE_KEY }, { ZFS_DELEG_PERM_GROUPQUOTA, ZFS_DELEG_NOTE_GROUPQUOTA }, { ZFS_DELEG_PERM_GROUPUSED, ZFS_DELEG_NOTE_GROUPUSED }, @@ -4728,6 +4761,12 @@ deleg_perm_comment(zfs_deleg_note_t note) case ZFS_DELEG_NOTE_SNAPSHOT: str = gettext(""); break; + case ZFS_DELEG_NOTE_LOAD_KEY: + str = gettext("Allows loading or unloading an encryption key"); + break; + case ZFS_DELEG_NOTE_CHANGE_KEY: + str = gettext("Allows changing or adding an encryption key"); + break; /* * case ZFS_DELEG_NOTE_VSCAN: * str = gettext(""); @@ -5963,6 +6002,22 @@ share_mount_one(zfs_handle_t *zhp, int op, int flags, char *protocol, } /* + * If this filesystem is encrypted and does not have + * a loaded key, we can not mount it. + */ + if ((flags & MS_CRYPT) == 0 && + zfs_prop_get_int(zhp, ZFS_PROP_ENCRYPTION) != ZIO_CRYPT_OFF && + zfs_prop_get_int(zhp, ZFS_PROP_KEYSTATUS) == + ZFS_KEYSTATUS_UNAVAILABLE) { + if (!explicit) + return (0); + + (void) fprintf(stderr, gettext("cannot %s '%s': " + "encryption key not loaded\n"), cmdname, zfs_get_name(zhp)); + return (1); + } + + /* * If this filesystem is inconsistent and has a receive resume * token, we can not mount it. */ @@ -6008,7 +6063,7 @@ share_mount_one(zfs_handle_t *zhp, int op, int flags, char *protocol, } if (!zfs_is_mounted(zhp, NULL) && - zfs_mount(zhp, NULL, 0) != 0) + zfs_mount(zhp, NULL, flags) != 0) return (1); if (protocol == NULL) { @@ -6135,7 +6190,7 @@ share_mount(int op, int argc, char **argv) int flags = 0; /* check options */ - while ((c = getopt(argc, argv, op == OP_MOUNT ? ":avo:O" : "a")) + while ((c = getopt(argc, argv, op == OP_MOUNT ? ":alvo:O" : "al")) != -1) { switch (c) { case 'a': @@ -6144,6 +6199,9 @@ share_mount(int op, int argc, char **argv) case 'v': verbose = B_TRUE; break; + case 'l': + flags |= MS_CRYPT; + break; case 'o': if (*optarg == '\0') { (void) fprintf(stderr, gettext("empty mount " @@ -7289,6 +7347,229 @@ usage: return (-1); } +typedef struct loadkey_cbdata { + boolean_t cb_loadkey; + boolean_t cb_recursive; + boolean_t cb_noop; + char *cb_keylocation; + uint64_t cb_numfailed; + uint64_t cb_numattempted; +} loadkey_cbdata_t; + +static int +load_key_callback(zfs_handle_t *zhp, void *data) +{ + int ret; + boolean_t is_encroot; + loadkey_cbdata_t *cb = data; + uint64_t keystatus = zfs_prop_get_int(zhp, ZFS_PROP_KEYSTATUS); + + /* + * If we are working recursively, we want to skip loading / unloading + * keys for non-encryption roots and datasets whose keys are already + * in the desired end-state. + */ + if (cb->cb_recursive) { + ret = zfs_crypto_get_encryption_root(zhp, &is_encroot, NULL); + if (ret != 0) + return (ret); + if (!is_encroot) + return (0); + + if ((cb->cb_loadkey && keystatus == ZFS_KEYSTATUS_AVAILABLE) || + (!cb->cb_loadkey && keystatus == ZFS_KEYSTATUS_UNAVAILABLE)) + return (0); + } + + cb->cb_numattempted++; + + if (cb->cb_loadkey) + ret = zfs_crypto_load_key(zhp, cb->cb_noop, cb->cb_keylocation); + else + ret = zfs_crypto_unload_key(zhp); + + if (ret != 0) { + cb->cb_numfailed++; + return (ret); + } + + return (0); +} + +static int +load_unload_keys(int argc, char **argv, boolean_t loadkey) +{ + int c, ret = 0, flags = 0; + boolean_t do_all = B_FALSE; + loadkey_cbdata_t cb = { 0 }; + + cb.cb_loadkey = loadkey; + + while ((c = getopt(argc, argv, "anrL:")) != -1) { + /* noop and alternate keylocations only apply to zfs load-key */ + if (loadkey) { + switch (c) { + case 'n': + cb.cb_noop = B_TRUE; + continue; + case 'L': + cb.cb_keylocation = optarg; + continue; + default: + break; + } + } + + switch (c) { + case 'a': + do_all = B_TRUE; + cb.cb_recursive = B_TRUE; + break; + case 'r': + flags |= ZFS_ITER_RECURSE; + cb.cb_recursive = B_TRUE; + break; + default: + (void) fprintf(stderr, + gettext("invalid option '%c'\n"), optopt); + usage(B_FALSE); + } + } + + argc -= optind; + argv += optind; + + if (!do_all && argc == 0) { + (void) fprintf(stderr, + gettext("Missing dataset argument or -a option\n")); + usage(B_FALSE); + } + + if (do_all && argc != 0) { + (void) fprintf(stderr, + gettext("Cannot specify dataset with -a option\n")); + usage(B_FALSE); + } + + if (cb.cb_recursive && cb.cb_keylocation != NULL && + strcmp(cb.cb_keylocation, "prompt") != 0) { + (void) fprintf(stderr, gettext("alternate keylocation may only " + "be 'prompt' with -r or -a\n")); + usage(B_FALSE); + } + + ret = zfs_for_each(argc, argv, flags, + ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, NULL, NULL, 0, + load_key_callback, &cb); + + if (cb.cb_noop || (cb.cb_recursive && cb.cb_numattempted != 0)) { + (void) printf(gettext("%llu / %llu key(s) successfully %s\n"), + (u_longlong_t)(cb.cb_numattempted - cb.cb_numfailed), + (u_longlong_t)cb.cb_numattempted, + loadkey ? (cb.cb_noop ? "verified" : "loaded") : + "unloaded"); + } + + if (cb.cb_numfailed != 0) + ret = -1; + + return (ret); +} + +static int +zfs_do_load_key(int argc, char **argv) +{ + return (load_unload_keys(argc, argv, B_TRUE)); +} + + +static int +zfs_do_unload_key(int argc, char **argv) +{ + return (load_unload_keys(argc, argv, B_FALSE)); +} + +static int +zfs_do_change_key(int argc, char **argv) +{ + int c, ret; + uint64_t keystatus; + boolean_t loadkey = B_FALSE, inheritkey = B_FALSE; + zfs_handle_t *zhp = NULL; + nvlist_t *props = fnvlist_alloc(); + + while ((c = getopt(argc, argv, "lio:")) != -1) { + switch (c) { + case 'l': + loadkey = B_TRUE; + break; + case 'i': + inheritkey = B_TRUE; + break; + case 'o': + if (parseprop(props, optarg) != 0) { + nvlist_free(props); + return (1); + } + break; + default: + (void) fprintf(stderr, + gettext("invalid option '%c'\n"), optopt); + usage(B_FALSE); + } + } + + if (inheritkey && !nvlist_empty(props)) { + (void) fprintf(stderr, + gettext("Properties not allowed for inheriting\n")); + usage(B_FALSE); + } + + argc -= optind; + argv += optind; + + if (argc < 1) { + (void) fprintf(stderr, gettext("Missing dataset argument\n")); + usage(B_FALSE); + } + + if (argc > 1) { + (void) fprintf(stderr, gettext("Too many arguments\n")); + usage(B_FALSE); + } + + zhp = zfs_open(g_zfs, argv[argc - 1], + ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME); + if (zhp == NULL) + usage(B_FALSE); + + if (loadkey) { + keystatus = zfs_prop_get_int(zhp, ZFS_PROP_KEYSTATUS); + if (keystatus != ZFS_KEYSTATUS_AVAILABLE) { + ret = zfs_crypto_load_key(zhp, B_FALSE, NULL); + if (ret != 0) { + nvlist_free(props); + zfs_close(zhp); + return (-1); + } + } + + /* refresh the properties so the new keystatus is visible */ + zfs_refresh_properties(zhp); + } + + ret = zfs_crypto_rewrap(zhp, props, inheritkey); + if (ret != 0) { + nvlist_free(props); + zfs_close(zhp); + return (-1); + } + + nvlist_free(props); + zfs_close(zhp); + return (0); +} + int main(int argc, char **argv) { diff --git a/usr/src/cmd/zinject/translate.c b/usr/src/cmd/zinject/translate.c index 53a38e1ea8..090f2448b0 100644 --- a/usr/src/cmd/zinject/translate.c +++ b/usr/src/cmd/zinject/translate.c @@ -175,7 +175,7 @@ object_from_path(const char *dataset, const char *path, struct stat64 *statbuf, */ sync(); - err = dmu_objset_own(dataset, DMU_OST_ZFS, B_TRUE, FTAG, &os); + err = dmu_objset_own(dataset, DMU_OST_ZFS, B_TRUE, B_FALSE, FTAG, &os); if (err != 0) { (void) fprintf(stderr, "cannot open dataset '%s': %s\n", dataset, strerror(err)); @@ -185,7 +185,7 @@ object_from_path(const char *dataset, const char *path, struct stat64 *statbuf, record->zi_objset = dmu_objset_id(os); record->zi_object = statbuf->st_ino; - dmu_objset_disown(os, FTAG); + dmu_objset_disown(os, B_FALSE, FTAG); return (0); } @@ -261,7 +261,7 @@ calculate_range(const char *dataset, err_type_t type, int level, char *range, * size. */ if ((err = dmu_objset_own(dataset, DMU_OST_ANY, - B_TRUE, FTAG, &os)) != 0) { + B_TRUE, B_FALSE, FTAG, &os)) != 0) { (void) fprintf(stderr, "cannot open dataset '%s': %s\n", dataset, strerror(err)); goto out; @@ -323,7 +323,7 @@ out: dnode_rele(dn, FTAG); } if (os) - dmu_objset_disown(os, FTAG); + dmu_objset_disown(os, B_FALSE, FTAG); return (ret); } diff --git a/usr/src/cmd/zinject/zinject.c b/usr/src/cmd/zinject/zinject.c index 1c0b3199bd..fc836f11e5 100644 --- a/usr/src/cmd/zinject/zinject.c +++ b/usr/src/cmd/zinject/zinject.c @@ -112,9 +112,9 @@ * specified. * * The '-e' option takes a string describing the errno to simulate. This must - * be either 'io' or 'checksum'. In most cases this will result in the same - * behavior, but RAID-Z will produce a different set of ereports for this - * situation. + * be one of 'io', 'checksum', or 'decrypt'. In most cases this will result + * in the same behavior, but RAID-Z will produce a different set of ereports + * for this situation. * * The '-a', '-u', and '-m' flags toggle internal flush behavior. If '-a' is * specified, then the ARC cache is flushed appropriately. If '-u' is @@ -296,8 +296,9 @@ usage(void) "\t\tinterperted depending on the '-t' option.\n" "\n" "\t\t-q\tQuiet mode. Only print out the handler number added.\n" - "\t\t-e\tInject a specific error. Must be either 'io' or\n" - "\t\t\t'checksum', or 'decompress'. Default is 'io'.\n" + "\t\t-e\tInject a specific error. Must be one of 'io', " + "'checksum',\n" + "\t\t\t'decompress', or decrypt. Default is 'io'.\n" "\t\t-C\tInject the given error only into specific DVAs. The\n" "\t\t\tDVAs should be specified as a list of 0-indexed DVAs\n" "\t\t\tseparated by commas (ex. '0,2').\n" @@ -817,6 +818,8 @@ main(int argc, char **argv) error = EIO; } else if (strcasecmp(optarg, "checksum") == 0) { error = ECKSUM; + } else if (strcasecmp(optarg, "decrypt") == 0) { + error = EACCES; } else if (strcasecmp(optarg, "nxio") == 0) { error = ENXIO; } else if (strcasecmp(optarg, "dtl") == 0) { @@ -1144,14 +1147,29 @@ main(int argc, char **argv) (void) fprintf(stderr, "the '-C' option may " "not be used with logical data errors " "'decrypt' and 'decompress'\n"); + record.zi_dvas = dvas; + } + } + + record.zi_cmd = ZINJECT_DATA_FAULT; + + if (error == EACCES) { + if (type != TYPE_DATA) { + (void) fprintf(stderr, "decryption errors " + "may only be injected for 'data' types\n"); libzfs_fini(g_zfs); return (1); } - record.zi_dvas = dvas; + record.zi_cmd = ZINJECT_DECRYPT_FAULT; + /* + * Internally, ZFS actually uses ECKSUM for decryption + * errors since EACCES is used to indicate the key was + * not found. + */ + error = ECKSUM; } - record.zi_cmd = ZINJECT_DATA_FAULT; if (translate_record(type, argv[0], range, level, &record, pool, dataset) != 0) return (1); diff --git a/usr/src/cmd/zoneadm/Makefile b/usr/src/cmd/zoneadm/Makefile index 2b01078aec..23584bbb5b 100644 --- a/usr/src/cmd/zoneadm/Makefile +++ b/usr/src/cmd/zoneadm/Makefile @@ -38,6 +38,8 @@ POFILES= $(OBJS:%.o=%.po) LDLIBS += -lzonecfg -lsocket -lgen -lpool -lzfs -luuid -lnvpair -lbrand -ldladm -lsecdb +INCS += -I../../common/zfs + CERRWARN += -_gcc=-Wno-uninitialized .KEEP_STATE: diff --git a/usr/src/cmd/zpool/zpool_main.c b/usr/src/cmd/zpool/zpool_main.c index 86d924fba9..dc77a21341 100644 --- a/usr/src/cmd/zpool/zpool_main.c +++ b/usr/src/cmd/zpool/zpool_main.c @@ -247,11 +247,13 @@ get_usage(zpool_help_t idx) return (gettext("\thistory [-il] [<pool>] ...\n")); case HELP_IMPORT: return (gettext("\timport [-d dir] [-D]\n" + "\timport [-d dir | -c cachefile] [-F [-n]] [-l] " + "<pool | id>\n" "\timport [-o mntopts] [-o property=value] ... \n" - "\t [-d dir | -c cachefile] [-D] [-f] [-m] [-N] " + "\t [-d dir | -c cachefile] [-D] [-l] [-f] [-m] [-N] " "[-R root] [-F [-n]] -a\n" "\timport [-o mntopts] [-o property=value] ... \n" - "\t [-d dir | -c cachefile] [-D] [-f] [-m] [-N] " + "\t [-d dir | -c cachefile] [-D] [-l] [-f] [-m] [-N] " "[-R root] [-F [-n]] [-t]\n" "\t [--rewind-to-checkpoint] <pool | id> [newpool]\n")); case HELP_IOSTAT: @@ -292,7 +294,7 @@ get_usage(zpool_help_t idx) case HELP_SET: return (gettext("\tset <property=value> <pool> \n")); case HELP_SPLIT: - return (gettext("\tsplit [-gLnP] [-R altroot] [-o mntopts]\n" + return (gettext("\tsplit [-gLlnP] [-R altroot] [-o mntopts]\n" "\t [-o property=value] <pool> <newpool> " "[<device> ...]\n")); case HELP_REGUID: @@ -1955,6 +1957,7 @@ show_import(nvlist_t *config) char *hostname = "unknown"; nvlist_t *nvroot, *nvinfo; int reason; + zpool_errata_t errata; const char *health; uint_t vsc; char *comment; @@ -1973,7 +1976,7 @@ show_import(nvlist_t *config) (uint64_t **)&vs, &vsc) == 0); health = zpool_state_to_name(vs->vs_state, vs->vs_aux); - reason = zpool_import_status(config, &msgid); + reason = zpool_import_status(config, &msgid, &errata); (void) printf(gettext(" pool: %s\n"), name); (void) printf(gettext(" id: %llu\n"), (u_longlong_t)guid); @@ -2072,6 +2075,11 @@ show_import(nvlist_t *config) "resilvered.\n")); break; + case ZPOOL_STATUS_ERRATA: + (void) printf(gettext(" status: Errata #%d detected.\n"), + errata); + break; + default: /* * No other status can be seen when importing pools. @@ -2093,6 +2101,55 @@ show_import(nvlist_t *config) (void) printf(gettext(" action: The pool can be " "imported using its name or numeric " "identifier and\n\tthe '-f' flag.\n")); + } else if (reason == ZPOOL_STATUS_ERRATA) { + switch (errata) { + case ZPOOL_ERRATA_NONE: + break; + + case ZPOOL_ERRATA_ZOL_2094_SCRUB: + (void) printf(gettext(" action: The pool can " + "be imported using its name or numeric " + "identifier,\n\thowever there is a compat" + "ibility issue which should be corrected" + "\n\tby running 'zpool scrub'\n")); + break; + + case ZPOOL_ERRATA_ZOL_2094_ASYNC_DESTROY: + (void) printf(gettext(" action: The pool can" + "not be imported with this version of ZFS " + "due to\n\tan active asynchronous destroy. " + "Revert to an earlier version\n\tand " + "allow the destroy to complete before " + "updating.\n")); + break; + + case ZPOOL_ERRATA_ZOL_6845_ENCRYPTION: + (void) printf(gettext(" action: Existing " + "encrypted datasets contain an on-disk " + "incompatibility, which\n\tneeds to be " + "corrected. Backup these datasets to new " + "encrypted datasets\n\tand destroy the " + "old ones.\n")); + break; + case ZPOOL_ERRATA_ZOL_8308_ENCRYPTION: + (void) printf(gettext(" action: Any existing " + "encrypted datasets contain an on-disk " + "incompatibility which\n\tmay cause " + "on-disk corruption with 'zfs recv' and " + "which needs to be\n\tcorrected. Enable " + "the bookmark_v2 feature and backup " + "these datasets to new encrypted " + "datasets and\n\tdestroy the old ones. " + "If this pool does not contain any " + "encrypted datasets, simply enable\n\t" + "the bookmark_v2 feature.\n")); + break; + default: + /* + * All errata must contain an action message. + */ + assert(0); + } } else { (void) printf(gettext(" action: The pool can be " "imported using its name or numeric " @@ -2235,6 +2292,7 @@ static int do_import(nvlist_t *config, const char *newname, const char *mntopts, nvlist_t *props, int flags) { + int ret = 0; zpool_handle_t *zhp; char *name; uint64_t version; @@ -2315,6 +2373,16 @@ do_import(nvlist_t *config, const char *newname, const char *mntopts, if ((zhp = zpool_open_canfail(g_zfs, name)) == NULL) return (1); + /* + * Loading keys is best effort. We don't want to return immediately + * if it fails but we do want to give the error to the caller. + */ + if (flags & ZFS_IMPORT_LOAD_KEYS) { + ret = zfs_crypto_attempt_load_keys(g_zfs, name); + if (ret != 0) + ret = 1; + } + if (zpool_get_state(zhp) != POOL_STATE_UNAVAIL && !(flags & ZFS_IMPORT_ONLY) && zpool_enable_datasets(zhp, mntopts, 0) != 0) { @@ -2323,7 +2391,7 @@ do_import(nvlist_t *config, const char *newname, const char *mntopts, } zpool_close(zhp); - return (0); + return (ret); } /* @@ -2401,9 +2469,9 @@ zpool_do_checkpoint(int argc, char **argv) /* * zpool import [-d dir] [-D] - * import [-o mntopts] [-o prop=value] ... [-R root] [-D] + * import [-o mntopts] [-o prop=value] ... [-R root] [-D] [-l] * [-d dir | -c cachefile] [-f] -a - * import [-o mntopts] [-o prop=value] ... [-R root] [-D] + * import [-o mntopts] [-o prop=value] ... [-R root] [-D] [-l] * [-d dir | -c cachefile] [-f] [-n] [-F] [-t] * <pool | id> [newpool] * @@ -2443,6 +2511,7 @@ zpool_do_checkpoint(int argc, char **argv) * -a Import all pools found. * * -o Set property=value and/or temporary mount options (without '='). + * -l Load encryption keys while importing. * * --rewind-to-checkpoint * Import the pool and revert back to the checkpoint. @@ -2487,7 +2556,7 @@ zpool_do_import(int argc, char **argv) }; /* check options */ - while ((c = getopt_long(argc, argv, ":aCc:d:DEfFmnNo:rR:tT:VX", + while ((c = getopt_long(argc, argv, ":aCc:d:DEfFlmnNo:rR:tT:VX", long_options, NULL)) != -1) { switch (c) { case 'a': @@ -2518,6 +2587,9 @@ zpool_do_import(int argc, char **argv) case 'F': do_rewind = B_TRUE; break; + case 'l': + flags |= ZFS_IMPORT_LOAD_KEYS; + break; case 'm': flags |= ZFS_IMPORT_MISSING_LOG; break; @@ -2591,6 +2663,17 @@ zpool_do_import(int argc, char **argv) usage(B_FALSE); } + if ((flags & ZFS_IMPORT_LOAD_KEYS) && (flags & ZFS_IMPORT_ONLY)) { + (void) fprintf(stderr, gettext("-l is incompatible with -N\n")); + usage(B_FALSE); + } + + if ((flags & ZFS_IMPORT_LOAD_KEYS) && !do_all && argc == 0) { + (void) fprintf(stderr, gettext("-l is only meaningful during " + "an import\n")); + usage(B_FALSE); + } + if ((dryrun || xtreme_rewind) && !do_rewind) { (void) fprintf(stderr, gettext("-n or -X only meaningful with -F\n")); @@ -4034,6 +4117,7 @@ zpool_do_detach(int argc, char **argv) * -o Set property=value, or set mount options. * -P Display full path for vdev name. * -R Mount the split-off pool under an alternate root. + * -l Load encryption keys while importing. * * Splits the named pool and gives it the new pool name. Devices to be split * off may be listed, provided that no more than one device is specified @@ -4051,6 +4135,7 @@ zpool_do_split(int argc, char **argv) char *mntopts = NULL; splitflags_t flags; int c, ret = 0; + boolean_t loadkeys = B_FALSE; zpool_handle_t *zhp; nvlist_t *config, *props = NULL; @@ -4059,7 +4144,7 @@ zpool_do_split(int argc, char **argv) flags.name_flags = 0; /* check options */ - while ((c = getopt(argc, argv, ":gLR:no:P")) != -1) { + while ((c = getopt(argc, argv, ":gLR:lno:P")) != -1) { switch (c) { case 'g': flags.name_flags |= VDEV_NAME_GUID; @@ -4076,6 +4161,9 @@ zpool_do_split(int argc, char **argv) usage(B_FALSE); } break; + case 'l': + loadkeys = B_TRUE; + break; case 'n': flags.dryrun = B_TRUE; break; @@ -4114,6 +4202,12 @@ zpool_do_split(int argc, char **argv) usage(B_FALSE); } + if (!flags.import && loadkeys) { + (void) fprintf(stderr, gettext("loading keys is only " + "valid when importing the pool\n")); + usage(B_FALSE); + } + argc -= optind; argv += optind; @@ -4159,6 +4253,13 @@ zpool_do_split(int argc, char **argv) */ if ((zhp = zpool_open_canfail(g_zfs, newpool)) == NULL) return (1); + + if (loadkeys) { + ret = zfs_crypto_attempt_load_keys(g_zfs, newpool); + if (ret != 0) + ret = 1; + } + if (zpool_get_state(zhp) != POOL_STATE_UNAVAIL && zpool_enable_datasets(zhp, mntopts, 0) != 0) { ret = 1; @@ -5203,12 +5304,13 @@ status_callback(zpool_handle_t *zhp, void *data) nvlist_t *config, *nvroot; char *msgid; int reason; + zpool_errata_t errata; const char *health; uint_t c; vdev_stat_t *vs; config = zpool_get_config(zhp, NULL); - reason = zpool_get_status(zhp, &msgid); + reason = zpool_get_status(zhp, &msgid, &errata); cbp->cb_count++; @@ -5434,6 +5536,52 @@ status_callback(zpool_handle_t *zhp, void *data) "'zpool clear'.\n")); break; + case ZPOOL_STATUS_ERRATA: + (void) printf(gettext("status: Errata #%d detected.\n"), + errata); + + switch (errata) { + case ZPOOL_ERRATA_NONE: + break; + + case ZPOOL_ERRATA_ZOL_2094_SCRUB: + (void) printf(gettext("action: To correct the issue " + "run 'zpool scrub'.\n")); + break; + + case ZPOOL_ERRATA_ZOL_6845_ENCRYPTION: + (void) printf(gettext("\tExisting encrypted datasets " + "contain an on-disk incompatibility\n\twhich " + "needs to be corrected.\n")); + (void) printf(gettext("action: To correct the issue " + "backup existing encrypted datasets to new\n\t" + "encrypted datasets and destroy the old ones. " + "'zfs mount -o ro' can\n\tbe used to temporarily " + "mount existing encrypted datasets readonly.\n")); + break; + + case ZPOOL_ERRATA_ZOL_8308_ENCRYPTION: + (void) printf(gettext("\tExisting encrypted datasets " + "contain an on-disk incompatibility\n\twhich " + "needs to be corrected.\n")); + (void) printf(gettext("action: To correct the issue " + "enable the bookmark_v2 feature and " + "backup\n\tany existing encrypted datasets to " + "new encrypted datasets and\n\tdestroy the old " + "ones. If this pool does not contain any\n\t" + "encrypted datasets, simply enable the " + "bookmark_v2 feature\n")); + break; + + default: + /* + * All errata which allow the pool to be imported + * must contain an action message. + */ + assert(0); + } + break; + default: /* * The remaining errors can't actually be generated, yet. diff --git a/usr/src/cmd/zstreamdump/zstreamdump.c b/usr/src/cmd/zstreamdump/zstreamdump.c index 987f4fdf12..3f14c08179 100644 --- a/usr/src/cmd/zstreamdump/zstreamdump.c +++ b/usr/src/cmd/zstreamdump/zstreamdump.c @@ -196,12 +196,33 @@ print_block(char *buf, int length) } } +/* + * Print an array of bytes to stdout as hexidecimal characters. str must + * have buf_len * 2 + 1 bytes of space. + */ +static void +sprintf_bytes(char *str, uint8_t *buf, uint_t buf_len) +{ + int i, n; + + for (i = 0; i < buf_len; i++) { + n = sprintf(str, "%02x", buf[i] & 0xff); + str += n; + } + + str[0] = '\0'; +} + int main(int argc, char *argv[]) { char *buf = safe_malloc(SPA_MAXBLOCKSIZE); uint64_t drr_record_count[DRR_NUMTYPES] = { 0 }; + char salt[ZIO_DATA_SALT_LEN * 2 + 1]; + char iv[ZIO_DATA_IV_LEN * 2 + 1]; + char mac[ZIO_DATA_MAC_LEN * 2 + 1]; uint64_t total_records = 0; + uint64_t payload_size; dmu_replay_record_t thedrr; dmu_replay_record_t *drr = &thedrr; struct drr_begin *drrb = &thedrr.drr_u.drr_begin; @@ -213,6 +234,7 @@ main(int argc, char *argv[]) struct drr_free *drrf = &thedrr.drr_u.drr_free; struct drr_spill *drrs = &thedrr.drr_u.drr_spill; struct drr_write_embedded *drrwe = &thedrr.drr_u.drr_write_embedded; + struct drr_object_range *drror = &thedrr.drr_u.drr_object_range; struct drr_checksum *drrc = &thedrr.drr_u.drr_checksum; char c; boolean_t verbose = B_FALSE; @@ -412,26 +434,37 @@ main(int argc, char *argv[]) drro->drr_blksz = BSWAP_32(drro->drr_blksz); drro->drr_bonuslen = BSWAP_32(drro->drr_bonuslen); + drro->drr_raw_bonuslen = + BSWAP_32(drro->drr_raw_bonuslen); drro->drr_toguid = BSWAP_64(drro->drr_toguid); } + + payload_size = DRR_OBJECT_PAYLOAD_SIZE(drro); + if (verbose) { (void) printf("OBJECT object = %" PRIu64 - " type = %u bonustype = %u blksz = %u" - " bonuslen = %u dn_slots = %u\n", - drro->drr_object, + "type = %u bonustype = %u blksz = %u " + "bonuslen = %u " + "raw_bonuslen = %u flags = %u maxblkid " + "= %llu " + "indblkshift = %u nlevels = %u " + "nblkptr = %u\n", + (u_longlong_t)drro->drr_object, drro->drr_type, drro->drr_bonustype, drro->drr_blksz, drro->drr_bonuslen, - drro->drr_dn_slots); + drro->drr_raw_bonuslen, + drro->drr_flags, + (u_longlong_t)drro->drr_maxblkid, + drro->drr_indblkshift, + drro->drr_nlevels, + drro->drr_nblkptr); } if (drro->drr_bonuslen > 0) { - (void) ssread(buf, - P2ROUNDUP(drro->drr_bonuslen, 8), &zc); - if (dump) { - print_block(buf, - P2ROUNDUP(drro->drr_bonuslen, 8)); - } + (void) ssread(buf, payload_size, &zc); + if (dump) + print_block(buf, payload_size); } break; @@ -465,28 +498,40 @@ main(int argc, char *argv[]) BSWAP_64(drrw->drr_compressed_size); } - uint64_t payload_size = DRR_WRITE_PAYLOAD_SIZE(drrw); + payload_size = DRR_WRITE_PAYLOAD_SIZE(drrw); /* * If this is verbose and/or dump output, * print info on the modified block */ if (verbose) { + sprintf_bytes(salt, drrw->drr_salt, + ZIO_DATA_SALT_LEN); + sprintf_bytes(iv, drrw->drr_iv, + ZIO_DATA_IV_LEN); + sprintf_bytes(mac, drrw->drr_mac, + ZIO_DATA_MAC_LEN); + (void) printf("WRITE object = %llu type = %u " "checksum type = %u compression type = %u\n" - " offset = %llu logical_size = %llu " + " flags = %u offset = %llu " + "logical_size = %llu " "compressed_size = %llu " - "payload_size = %llu " - "props = %llx\n", + "payload_size = %llu props = %llx " + "salt = %s iv = %s mac = %s\n", (u_longlong_t)drrw->drr_object, drrw->drr_type, drrw->drr_checksumtype, drrw->drr_compressiontype, + drrw->drr_flags, (u_longlong_t)drrw->drr_offset, (u_longlong_t)drrw->drr_logical_size, (u_longlong_t)drrw->drr_compressed_size, (u_longlong_t)payload_size, - (u_longlong_t)drrw->drr_key.ddk_prop); + (u_longlong_t)drrw->drr_key.ddk_prop, + salt, + iv, + mac); } /* @@ -557,15 +602,40 @@ main(int argc, char *argv[]) if (do_byteswap) { drrs->drr_object = BSWAP_64(drrs->drr_object); drrs->drr_length = BSWAP_64(drrs->drr_length); + drrs->drr_compressed_size = + BSWAP_64(drrs->drr_compressed_size); + drrs->drr_type = BSWAP_32(drrs->drr_type); } + + payload_size = DRR_SPILL_PAYLOAD_SIZE(drrs); + if (verbose) { + sprintf_bytes(salt, drrs->drr_salt, + ZIO_DATA_SALT_LEN); + sprintf_bytes(iv, drrs->drr_iv, + ZIO_DATA_IV_LEN); + sprintf_bytes(mac, drrs->drr_mac, + ZIO_DATA_MAC_LEN); + (void) printf("SPILL block for object = %llu " - "length = %llu\n", drrs->drr_object, - drrs->drr_length); + "length = %llu flags = %u " + "compression type = %u " + "compressed_size = %llu " + "payload_size = %llu " + "salt = %s iv = %s mac = %s\n", + (u_longlong_t)drrs->drr_object, + (u_longlong_t)drrs->drr_length, + drrs->drr_flags, + drrs->drr_compressiontype, + (u_longlong_t)drrs->drr_compressed_size, + (u_longlong_t)payload_size, + salt, + iv, + mac); } - (void) ssread(buf, drrs->drr_length, &zc); + (void) ssread(buf, payload_size, &zc); if (dump) { - print_block(buf, drrs->drr_length); + print_block(buf, payload_size); } break; case DRR_WRITE_EMBEDDED: @@ -600,6 +670,36 @@ main(int argc, char *argv[]) (void) ssread(buf, P2ROUNDUP(drrwe->drr_psize, 8), &zc); break; + case DRR_OBJECT_RANGE: + if (do_byteswap) { + drror->drr_firstobj = + BSWAP_64(drror->drr_firstobj); + drror->drr_numslots = + BSWAP_64(drror->drr_numslots); + drror->drr_toguid = BSWAP_64(drror->drr_toguid); + } + if (verbose) { + sprintf_bytes(salt, drror->drr_salt, + ZIO_DATA_SALT_LEN); + sprintf_bytes(iv, drror->drr_iv, + ZIO_DATA_IV_LEN); + sprintf_bytes(mac, drror->drr_mac, + ZIO_DATA_MAC_LEN); + + (void) printf("OBJECT_RANGE firstobj = %llu " + "numslots = %llu flags = %u " + "salt = %s iv = %s mac = %s\n", + (u_longlong_t)drror->drr_firstobj, + (u_longlong_t)drror->drr_numslots, + drror->drr_flags, + salt, + iv, + mac); + } + break; + case DRR_NUMTYPES: + /* should never be reached */ + exit(1); } if (drr->drr_type != DRR_BEGIN && very_verbose) { (void) printf(" checksum = %llx/%llx/%llx/%llx\n", diff --git a/usr/src/cmd/ztest/ztest.c b/usr/src/cmd/ztest/ztest.c index c0054cddbe..39cdf261b9 100644 --- a/usr/src/cmd/ztest/ztest.c +++ b/usr/src/cmd/ztest/ztest.c @@ -212,6 +212,7 @@ extern unsigned long zfs_reconstruct_indirect_damage_fraction; static ztest_shared_opts_t *ztest_shared_opts; static ztest_shared_opts_t ztest_opts; +static char *ztest_wkeydata = "abcdefghijklmnopqrstuvwxyz012345"; typedef struct ztest_shared_ds { uint64_t zd_seq; @@ -1213,6 +1214,42 @@ ztest_spa_prop_set_uint64(zpool_prop_t prop, uint64_t value) return (error); } +static int +ztest_dmu_objset_own(const char *name, dmu_objset_type_t type, + boolean_t readonly, boolean_t decrypt, void *tag, objset_t **osp) +{ + int err; + + err = dmu_objset_own(name, type, readonly, decrypt, tag, osp); + if (decrypt && err == EACCES) { + char ddname[ZFS_MAX_DATASET_NAME_LEN]; + dsl_crypto_params_t *dcp; + nvlist_t *crypto_args = fnvlist_alloc(); + char *cp = NULL; + + /* spa_keystore_load_wkey() expects a dsl dir name */ + (void) strcpy(ddname, name); + cp = strchr(ddname, '@'); + if (cp != NULL) + *cp = '\0'; + + fnvlist_add_uint8_array(crypto_args, "wkeydata", + (uint8_t *)ztest_wkeydata, WRAPPING_KEY_LEN); + VERIFY0(dsl_crypto_params_create_nvlist(DCP_CMD_NONE, NULL, + crypto_args, &dcp)); + err = spa_keystore_load_wkey(ddname, dcp, B_FALSE); + dsl_crypto_params_free(dcp, B_FALSE); + fnvlist_free(crypto_args); + + if (err != 0) + return (err); + + err = dmu_objset_own(name, type, readonly, decrypt, tag, osp); + } + + return (err); +} + static void ztest_rll_init(rll_t *rll) { @@ -1856,7 +1893,7 @@ ztest_replay_write(void *arg1, void *arg2, boolean_t byteswap) dmu_write(os, lr->lr_foid, offset, length, data, tx); } else { bcopy(data, abuf->b_data, length); - dmu_assign_arcbuf(db, offset, abuf, tx); + dmu_assign_arcbuf_by_dbuf(db, offset, abuf, tx); } (void) ztest_log_write(zd, tx, lr); @@ -2563,7 +2600,7 @@ ztest_spa_create_destroy(ztest_ds_t *zd, uint64_t id) */ nvroot = make_vdev_root("/dev/bogus", NULL, NULL, 0, 0, NULL, 0, 0, 1); VERIFY3U(ENOENT, ==, - spa_create("ztest_bad_file", nvroot, NULL, NULL)); + spa_create("ztest_bad_file", nvroot, NULL, NULL, NULL)); nvlist_free(nvroot); /* @@ -2571,7 +2608,7 @@ ztest_spa_create_destroy(ztest_ds_t *zd, uint64_t id) */ nvroot = make_vdev_root("/dev/bogus", NULL, NULL, 0, 0, NULL, 0, 2, 1); VERIFY3U(ENOENT, ==, - spa_create("ztest_bad_mirror", nvroot, NULL, NULL)); + spa_create("ztest_bad_mirror", nvroot, NULL, NULL, NULL)); nvlist_free(nvroot); /* @@ -2580,7 +2617,8 @@ ztest_spa_create_destroy(ztest_ds_t *zd, uint64_t id) */ rw_enter(&ztest_name_lock, RW_READER); nvroot = make_vdev_root("/dev/bogus", NULL, NULL, 0, 0, NULL, 0, 0, 1); - VERIFY3U(EEXIST, ==, spa_create(zo->zo_pool, nvroot, NULL, NULL)); + VERIFY3U(EEXIST, ==, + spa_create(zo->zo_pool, nvroot, NULL, NULL, NULL)); nvlist_free(nvroot); VERIFY3U(0, ==, spa_open(zo->zo_pool, &spa, FTAG)); VERIFY3U(EBUSY, ==, spa_destroy(zo->zo_pool)); @@ -2691,7 +2729,7 @@ ztest_spa_upgrade(ztest_ds_t *zd, uint64_t id) props = fnvlist_alloc(); fnvlist_add_uint64(props, zpool_prop_to_name(ZPOOL_PROP_VERSION), version); - VERIFY0(spa_create(name, nvroot, props, NULL)); + VERIFY0(spa_create(name, nvroot, props, NULL, NULL)); fnvlist_free(nvroot); fnvlist_free(props); @@ -3713,11 +3751,65 @@ ztest_objset_create_cb(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx) static int ztest_dataset_create(char *dsname) { - uint64_t zilset = ztest_random(100); - int err = dmu_objset_create(dsname, DMU_OST_OTHER, 0, + int err; + uint64_t rand; + dsl_crypto_params_t *dcp = NULL; + + /* + * 50% of the time, we create encrypted datasets + * using a random cipher suite and a hard-coded + * wrapping key. + */ +#ifdef WITHCRYPTO + /* + * Until the crypto framework is compiled in userland, the ztest using + * crypto will not work. + */ + rand = ztest_random(2); +#else + rand = 0; +#endif + if (rand != 0) { + nvlist_t *crypto_args = fnvlist_alloc(); + nvlist_t *props = fnvlist_alloc(); + + /* slight bias towards the default cipher suite */ + rand = ztest_random(ZIO_CRYPT_FUNCTIONS); + if (rand < ZIO_CRYPT_AES_128_CCM) + rand = ZIO_CRYPT_ON; + + fnvlist_add_uint64(props, + zfs_prop_to_name(ZFS_PROP_ENCRYPTION), rand); + fnvlist_add_uint8_array(crypto_args, "wkeydata", + (uint8_t *)ztest_wkeydata, WRAPPING_KEY_LEN); + + /* + * These parameters aren't really used by the kernel. They + * are simply stored so that userspace knows how to load + * the wrapping key. + */ + fnvlist_add_uint64(props, + zfs_prop_to_name(ZFS_PROP_KEYFORMAT), ZFS_KEYFORMAT_RAW); + fnvlist_add_string(props, + zfs_prop_to_name(ZFS_PROP_KEYLOCATION), "prompt"); + fnvlist_add_uint64(props, + zfs_prop_to_name(ZFS_PROP_PBKDF2_SALT), 0ULL); + fnvlist_add_uint64(props, + zfs_prop_to_name(ZFS_PROP_PBKDF2_ITERS), 0ULL); + + VERIFY0(dsl_crypto_params_create_nvlist(DCP_CMD_NONE, props, + crypto_args, &dcp)); + + fnvlist_free(crypto_args); + fnvlist_free(props); + } + + err = dmu_objset_create(dsname, DMU_OST_OTHER, 0, dcp, ztest_objset_create_cb, NULL); + dsl_crypto_params_free(dcp, !!err); - if (err || zilset < 80) + rand = ztest_random(100); + if (err || rand < 80) return (err); if (ztest_opts.zo_verbose >= 6) @@ -3737,7 +3829,8 @@ ztest_objset_destroy_cb(const char *name, void *arg) /* * Verify that the dataset contains a directory object. */ - VERIFY0(dmu_objset_own(name, DMU_OST_OTHER, B_TRUE, FTAG, &os)); + VERIFY0(ztest_dmu_objset_own(name, DMU_OST_OTHER, B_TRUE, + B_TRUE, FTAG, &os)); error = dmu_object_info(os, ZTEST_DIROBJ, &doi); if (error != ENOENT) { /* We could have crashed in the middle of destroying it */ @@ -3745,7 +3838,7 @@ ztest_objset_destroy_cb(const char *name, void *arg) ASSERT3U(doi.doi_type, ==, DMU_OT_ZAP_OTHER); ASSERT3S(doi.doi_physical_blocks_512, >=, 0); } - dmu_objset_disown(os, FTAG); + dmu_objset_disown(os, B_TRUE, FTAG); /* * Destroy the dataset. @@ -3818,11 +3911,12 @@ ztest_dmu_objset_create_destroy(ztest_ds_t *zd, uint64_t id) * (invoked from ztest_objset_destroy_cb()) should just throw it away. */ if (ztest_random(2) == 0 && - dmu_objset_own(name, DMU_OST_OTHER, B_FALSE, FTAG, &os) == 0) { + ztest_dmu_objset_own(name, DMU_OST_OTHER, B_FALSE, + B_TRUE, FTAG, &os) == 0) { ztest_zd_init(&zdtmp, NULL, os); zil_replay(os, &zdtmp, ztest_replay_vector); ztest_zd_fini(&zdtmp); - dmu_objset_disown(os, FTAG); + dmu_objset_disown(os, B_TRUE, FTAG); } /* @@ -3836,8 +3930,8 @@ ztest_dmu_objset_create_destroy(ztest_ds_t *zd, uint64_t id) /* * Verify that the destroyed dataset is no longer in the namespace. */ - VERIFY3U(ENOENT, ==, dmu_objset_own(name, DMU_OST_OTHER, B_TRUE, - FTAG, &os)); + VERIFY3U(ENOENT, ==, ztest_dmu_objset_own(name, DMU_OST_OTHER, B_TRUE, + B_TRUE, FTAG, &os)); /* * Verify that we can create a new dataset. @@ -3852,7 +3946,8 @@ ztest_dmu_objset_create_destroy(ztest_ds_t *zd, uint64_t id) fatal(0, "dmu_objset_create(%s) = %d", name, error); } - VERIFY0(dmu_objset_own(name, DMU_OST_OTHER, B_FALSE, FTAG, &os)); + VERIFY0(ztest_dmu_objset_own(name, DMU_OST_OTHER, B_FALSE, B_TRUE, + FTAG, &os)); ztest_zd_init(&zdtmp, NULL, os); @@ -3876,7 +3971,7 @@ ztest_dmu_objset_create_destroy(ztest_ds_t *zd, uint64_t id) * Verify that we cannot create an existing dataset. */ VERIFY3U(EEXIST, ==, - dmu_objset_create(name, DMU_OST_OTHER, 0, NULL, NULL)); + dmu_objset_create(name, DMU_OST_OTHER, 0, NULL, NULL, NULL)); /* * Verify that we can hold an objset that is also owned. @@ -3887,11 +3982,11 @@ ztest_dmu_objset_create_destroy(ztest_ds_t *zd, uint64_t id) /* * Verify that we cannot own an objset that is already owned. */ - VERIFY3U(EBUSY, ==, - dmu_objset_own(name, DMU_OST_OTHER, B_FALSE, FTAG, &os2)); + VERIFY3U(EBUSY, ==, ztest_dmu_objset_own(name, DMU_OST_OTHER, + B_FALSE, B_TRUE, FTAG, &os2)); zil_close(zilog); - dmu_objset_disown(os, FTAG); + dmu_objset_disown(os, B_TRUE, FTAG); ztest_zd_fini(&zdtmp); rw_exit(&ztest_name_lock); @@ -4025,19 +4120,20 @@ ztest_dsl_dataset_promote_busy(ztest_ds_t *zd, uint64_t id) fatal(0, "dmu_objset_create(%s) = %d", clone2name, error); } - error = dmu_objset_own(snap2name, DMU_OST_ANY, B_TRUE, FTAG, &os); + error = ztest_dmu_objset_own(snap2name, DMU_OST_ANY, B_TRUE, B_TRUE, + FTAG, &os); if (error) fatal(0, "dmu_objset_own(%s) = %d", snap2name, error); error = dsl_dataset_promote(clone2name, NULL); if (error == ENOSPC) { - dmu_objset_disown(os, FTAG); + dmu_objset_disown(os, B_TRUE, FTAG); ztest_record_enospc(FTAG); goto out; } if (error != EBUSY) fatal(0, "dsl_dataset_promote(%s), %d, not EBUSY", clone2name, error); - dmu_objset_disown(os, FTAG); + dmu_objset_disown(os, B_TRUE, FTAG); out: ztest_dsl_dataset_cleanup(osname, id); @@ -4403,7 +4499,7 @@ ztest_dmu_read_write_zcopy(ztest_ds_t *zd, uint64_t id) * bigobj, at the tail of the nth chunk * * The chunk size is set equal to bigobj block size so that - * dmu_assign_arcbuf() can be tested for object updates. + * dmu_assign_arcbuf_by_dbuf() can be tested for object updates. */ /* @@ -4463,7 +4559,7 @@ ztest_dmu_read_write_zcopy(ztest_ds_t *zd, uint64_t id) /* * In iteration 5 (i == 5) use arcbufs * that don't match bigobj blksz to test - * dmu_assign_arcbuf() when it can't directly + * dmu_assign_arcbuf_by_dbuf() when it can't directly * assign an arcbuf to a dbuf. */ for (j = 0; j < s; j++) { @@ -4508,8 +4604,8 @@ ztest_dmu_read_write_zcopy(ztest_ds_t *zd, uint64_t id) /* * 50% of the time don't read objects in the 1st iteration to - * test dmu_assign_arcbuf() for the case when there're no - * existing dbufs for the specified offsets. + * test dmu_assign_arcbuf_by_dbuf() for the case when there are + * no existing dbufs for the specified offsets. */ if (i != 0 || ztest_random(2) != 0) { error = dmu_read(os, packobj, packoff, @@ -4554,12 +4650,12 @@ ztest_dmu_read_write_zcopy(ztest_ds_t *zd, uint64_t id) FTAG, &dbt, DMU_READ_NO_PREFETCH) == 0); } if (i != 5 || chunksize < (SPA_MINBLOCKSIZE * 2)) { - dmu_assign_arcbuf(bonus_db, off, + dmu_assign_arcbuf_by_dbuf(bonus_db, off, bigbuf_arcbufs[j], tx); } else { - dmu_assign_arcbuf(bonus_db, off, + dmu_assign_arcbuf_by_dbuf(bonus_db, off, bigbuf_arcbufs[2 * j], tx); - dmu_assign_arcbuf(bonus_db, + dmu_assign_arcbuf_by_dbuf(bonus_db, off + chunksize / 2, bigbuf_arcbufs[2 * j + 1], tx); } @@ -6273,7 +6369,8 @@ ztest_dataset_open(int d) } ASSERT(error == 0 || error == EEXIST); - VERIFY0(dmu_objset_own(name, DMU_OST_OTHER, B_FALSE, zd, &os)); + VERIFY0(ztest_dmu_objset_own(name, DMU_OST_OTHER, B_FALSE, + B_TRUE, zd, &os)); rw_exit(&ztest_name_lock); ztest_zd_init(zd, ZTEST_GET_SHARED_DS(d), os); @@ -6314,7 +6411,7 @@ ztest_dataset_close(int d) ztest_ds_t *zd = &ztest_ds[d]; zil_close(zd->zd_zilog); - dmu_objset_disown(zd->zd_os, zd); + dmu_objset_disown(zd->zd_os, B_TRUE, zd); ztest_zd_fini(zd); } @@ -6364,13 +6461,13 @@ ztest_run(ztest_shared_t *zs) ztest_spa = spa; dmu_objset_stats_t dds; - VERIFY0(dmu_objset_own(ztest_opts.zo_pool, - DMU_OST_ANY, B_TRUE, FTAG, &os)); + VERIFY0(ztest_dmu_objset_own(ztest_opts.zo_pool, + DMU_OST_ANY, B_TRUE, B_TRUE, FTAG, &os)); dsl_pool_config_enter(dmu_objset_pool(os), FTAG); dmu_objset_fast_stat(os, &dds); dsl_pool_config_exit(dmu_objset_pool(os), FTAG); zs->zs_guid = dds.dds_guid; - dmu_objset_disown(os, FTAG); + dmu_objset_disown(os, B_TRUE, FTAG); spa->spa_dedup_ditto = 2 * ZIO_DEDUPDITTO_MIN; @@ -6584,10 +6681,9 @@ ztest_freeze(void) VERIFY3U(0, ==, spa_open(ztest_opts.zo_pool, &spa, FTAG)); ASSERT(spa_freeze_txg(spa) == UINT64_MAX); VERIFY3U(0, ==, ztest_dataset_open(0)); - ztest_dataset_close(0); - ztest_spa = spa; txg_wait_synced(spa_get_dsl(spa), 0); + ztest_dataset_close(0); ztest_reguid(NULL, 0); spa_close(spa, FTAG); @@ -6715,7 +6811,8 @@ ztest_init(ztest_shared_t *zs) spa_feature_table[i].fi_uname); VERIFY3U(0, ==, nvlist_add_uint64(props, buf, 0)); } - VERIFY3U(0, ==, spa_create(ztest_opts.zo_pool, nvroot, props, NULL)); + VERIFY3U(0, ==, + spa_create(ztest_opts.zo_pool, nvroot, props, NULL, NULL)); nvlist_free(nvroot); nvlist_free(props); @@ -6748,7 +6845,6 @@ setup_data_fd(void) (void) unlink(ztest_name_data); } - static int shared_data_size(ztest_shared_hdr_t *hdr) { diff --git a/usr/src/common/crypto/modes/ccm.c b/usr/src/common/crypto/modes/ccm.c index d03a73ab04..09d9ece462 100644 --- a/usr/src/common/crypto/modes/ccm.c +++ b/usr/src/common/crypto/modes/ccm.c @@ -889,7 +889,7 @@ ccm_init_ctx(ccm_ctx_t *ccm_ctx, char *param, int kmflag, rv = CRYPTO_MECHANISM_PARAM_INVALID; goto out; } - if (!is_encrypt_init) { + if (!is_encrypt_init && ccm_ctx->ccm_data_len != 0) { /* allocate buffer for storing decrypted plaintext */ #ifdef _KERNEL ccm_ctx->ccm_pt_buf = kmem_alloc(ccm_ctx->ccm_data_len, diff --git a/usr/src/common/crypto/modes/modes.c b/usr/src/common/crypto/modes/modes.c index fbf66c0531..212374e152 100644 --- a/usr/src/common/crypto/modes/modes.c +++ b/usr/src/common/crypto/modes/modes.c @@ -264,7 +264,7 @@ crypto_uio_data(crypto_data_t *data, uchar_t *buf, int len, cmd_type_t cmd, offset -= uiop->uio_iov[vec_idx++].iov_len) ; - if (vec_idx == uiop->uio_iovcnt) { + if (vec_idx == uiop->uio_iovcnt && length > 0) { /* * The caller specified an offset that is larger than * the total size of the buffers it provided. diff --git a/usr/src/common/zfs/zfeature_common.c b/usr/src/common/zfs/zfeature_common.c index feab5145c0..0eb681b723 100644 --- a/usr/src/common/zfs/zfeature_common.c +++ b/usr/src/common/zfs/zfeature_common.c @@ -262,6 +262,18 @@ zpool_feature_init(void) ZFEATURE_FLAG_PER_DATASET, large_blocks_deps); { + static const spa_feature_t bookmark_v2_deps[] = { + SPA_FEATURE_EXTENSIBLE_DATASET, + SPA_FEATURE_BOOKMARKS, + SPA_FEATURE_NONE + }; + zfeature_register(SPA_FEATURE_BOOKMARK_V2, + "com.datto:bookmark_v2", "bookmark_v2", + "Support for larger bookmarks", + ZFEATURE_FLAG_PER_DATASET, bookmark_v2_deps); + } + + { static const spa_feature_t large_dnode_deps[] = { SPA_FEATURE_EXTENSIBLE_DATASET, SPA_FEATURE_NONE @@ -324,4 +336,14 @@ zpool_feature_init(void) "com.datto:resilver_defer", "resilver_defer", "Support for defering new resilvers when one is already running.", ZFEATURE_FLAG_READONLY_COMPAT, NULL); + + static const spa_feature_t encryption_deps[] = { + SPA_FEATURE_EXTENSIBLE_DATASET, + SPA_FEATURE_BOOKMARK_V2, + SPA_FEATURE_NONE + }; + zfeature_register(SPA_FEATURE_ENCRYPTION, + "com.datto:encryption", "encryption", + "Support for dataset level encryption", + ZFEATURE_FLAG_PER_DATASET, encryption_deps); } diff --git a/usr/src/common/zfs/zfeature_common.h b/usr/src/common/zfs/zfeature_common.h index ebe9626caf..adaa782f98 100644 --- a/usr/src/common/zfs/zfeature_common.h +++ b/usr/src/common/zfs/zfeature_common.h @@ -64,6 +64,8 @@ typedef enum spa_feature { SPA_FEATURE_SPACEMAP_V2, SPA_FEATURE_ALLOCATION_CLASSES, SPA_FEATURE_RESILVER_DEFER, + SPA_FEATURE_ENCRYPTION, + SPA_FEATURE_BOOKMARK_V2, SPA_FEATURES } spa_feature_t; diff --git a/usr/src/common/zfs/zfs_deleg.c b/usr/src/common/zfs/zfs_deleg.c index a3383f4ccf..76248de87d 100644 --- a/usr/src/common/zfs/zfs_deleg.c +++ b/usr/src/common/zfs/zfs_deleg.c @@ -66,6 +66,8 @@ zfs_deleg_perm_tab_t zfs_deleg_perm_tab[] = { {ZFS_DELEG_PERM_GROUPUSED}, {ZFS_DELEG_PERM_HOLD}, {ZFS_DELEG_PERM_RELEASE}, + {ZFS_DELEG_PERM_LOAD_KEY}, + {ZFS_DELEG_PERM_CHANGE_KEY}, {NULL} }; diff --git a/usr/src/common/zfs/zfs_deleg.h b/usr/src/common/zfs/zfs_deleg.h index 06d2df9bb8..e97b1dae22 100644 --- a/usr/src/common/zfs/zfs_deleg.h +++ b/usr/src/common/zfs/zfs_deleg.h @@ -68,6 +68,8 @@ typedef enum { ZFS_DELEG_NOTE_DIFF, ZFS_DELEG_NOTE_BOOKMARK, ZFS_DELEG_NOTE_REMAP, + ZFS_DELEG_NOTE_LOAD_KEY, + ZFS_DELEG_NOTE_CHANGE_KEY, ZFS_DELEG_NOTE_NONE } zfs_deleg_note_t; diff --git a/usr/src/common/zfs/zfs_prop.c b/usr/src/common/zfs/zfs_prop.c index 3e01d1ce49..ffe94997ed 100644 --- a/usr/src/common/zfs/zfs_prop.c +++ b/usr/src/common/zfs/zfs_prop.c @@ -34,6 +34,7 @@ #include <sys/zfs_acl.h> #include <sys/zfs_ioctl.h> #include <sys/zfs_znode.h> +#include <sys/dsl_crypt.h> #include "zfs_prop.h" #include "zfs_deleg.h" @@ -115,6 +116,26 @@ zfs_prop_init(void) { NULL } }; + static zprop_index_t crypto_table[] = { + { "on", ZIO_CRYPT_ON }, + { "off", ZIO_CRYPT_OFF }, + { "aes-128-ccm", ZIO_CRYPT_AES_128_CCM }, + { "aes-192-ccm", ZIO_CRYPT_AES_192_CCM }, + { "aes-256-ccm", ZIO_CRYPT_AES_256_CCM }, + { "aes-128-gcm", ZIO_CRYPT_AES_128_GCM }, + { "aes-192-gcm", ZIO_CRYPT_AES_192_GCM }, + { "aes-256-gcm", ZIO_CRYPT_AES_256_GCM }, + { NULL } + }; + + static zprop_index_t keyformat_table[] = { + { "none", ZFS_KEYFORMAT_NONE }, + { "raw", ZFS_KEYFORMAT_RAW }, + { "hex", ZFS_KEYFORMAT_HEX }, + { "passphrase", ZFS_KEYFORMAT_PASSPHRASE }, + { NULL } + }; + static zprop_index_t snapdir_table[] = { { "hidden", ZFS_SNAPDIR_HIDDEN }, { "visible", ZFS_SNAPDIR_VISIBLE }, @@ -183,6 +204,13 @@ zfs_prop_init(void) { NULL } }; + static zprop_index_t keystatus_table[] = { + { "none", ZFS_KEYSTATUS_NONE }, + { "unavailable", ZFS_KEYSTATUS_UNAVAILABLE }, + { "available", ZFS_KEYSTATUS_AVAILABLE }, + { NULL } + }; + static zprop_index_t logbias_table[] = { { "latency", ZFS_LOGBIAS_LATENCY }, { "throughput", ZFS_LOGBIAS_THROUGHPUT }, @@ -316,12 +344,16 @@ zfs_prop_init(void) PROP_DEFAULT, ZFS_TYPE_FILESYSTEM, "on | off | noauto", "CANMOUNT", canmount_table); - /* readonly index (boolean) properties */ + /* readonly index properties */ zprop_register_index(ZFS_PROP_MOUNTED, "mounted", 0, PROP_READONLY, ZFS_TYPE_FILESYSTEM, "yes | no", "MOUNTED", boolean_table); zprop_register_index(ZFS_PROP_DEFER_DESTROY, "defer_destroy", 0, PROP_READONLY, ZFS_TYPE_SNAPSHOT, "yes | no", "DEFER_DESTROY", boolean_table); + zprop_register_index(ZFS_PROP_KEYSTATUS, "keystatus", + ZFS_KEYSTATUS_NONE, PROP_READONLY, ZFS_TYPE_DATASET, + "none | unavailable | available", + "KEYSTATUS", keystatus_table); /* set once index properties */ zprop_register_index(ZFS_PROP_NORMALIZE, "normalization", 0, @@ -332,6 +364,15 @@ zfs_prop_init(void) ZFS_CASE_SENSITIVE, PROP_ONETIME, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT, "sensitive | insensitive | mixed", "CASE", case_table); + zprop_register_index(ZFS_PROP_KEYFORMAT, "keyformat", + ZFS_KEYFORMAT_NONE, PROP_ONETIME_DEFAULT, + ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, + "none | raw | hex | passphrase", "KEYFORMAT", keyformat_table); + zprop_register_index(ZFS_PROP_ENCRYPTION, "encryption", + ZIO_CRYPT_DEFAULT, PROP_ONETIME, ZFS_TYPE_DATASET, + "on | off | aes-128-ccm | aes-192-ccm | aes-256-ccm | " + "aes-128-gcm | aes-192-gcm | aes-256-gcm", "ENCRYPTION", + crypto_table); /* set once index (boolean) properties */ zprop_register_index(ZFS_PROP_UTF8ONLY, "utf8only", 0, PROP_ONETIME, @@ -362,6 +403,12 @@ zfs_prop_init(void) "receive_resume_token", NULL, PROP_READONLY, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "<string token>", "RESUMETOK"); + zprop_register_string(ZFS_PROP_ENCRYPTION_ROOT, "encryptionroot", NULL, + PROP_READONLY, ZFS_TYPE_DATASET, "<filesystem | volume>", + "ENCROOT"); + zprop_register_string(ZFS_PROP_KEYLOCATION, "keylocation", + "none", PROP_DEFAULT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, + "prompt | <file URI>", "KEYLOCATION"); /* readonly number properties */ zprop_register_number(ZFS_PROP_USED, "used", 0, PROP_READONLY, @@ -410,6 +457,9 @@ zfs_prop_init(void) ZFS_TYPE_DATASET | ZFS_TYPE_BOOKMARK, "<uint64>", "GUID"); zprop_register_number(ZFS_PROP_CREATETXG, "createtxg", 0, PROP_READONLY, ZFS_TYPE_DATASET | ZFS_TYPE_BOOKMARK, "<uint64>", "CREATETXG"); + zprop_register_number(ZFS_PROP_PBKDF2_ITERS, "pbkdf2iters", + 0, PROP_ONETIME_DEFAULT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, + "<iters>", "PBKDF2ITERS"); /* default number properties */ zprop_register_number(ZFS_PROP_QUOTA, "quota", 0, PROP_DEFAULT, @@ -460,8 +510,16 @@ zfs_prop_init(void) PROP_READONLY, ZFS_TYPE_DATASET, "OBJSETID"); zprop_register_hidden(ZFS_PROP_INCONSISTENT, "inconsistent", PROP_TYPE_NUMBER, PROP_READONLY, ZFS_TYPE_DATASET, "INCONSISTENT"); + zprop_register_hidden(ZFS_PROP_IVSET_GUID, "ivsetguid", + PROP_TYPE_NUMBER, PROP_READONLY, + ZFS_TYPE_DATASET | ZFS_TYPE_BOOKMARK, "IVSETGUID"); zprop_register_hidden(ZFS_PROP_PREV_SNAP, "prevsnap", PROP_TYPE_STRING, PROP_READONLY, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "PREVSNAP"); + zprop_register_hidden(ZFS_PROP_PBKDF2_SALT, "pbkdf2salt", + PROP_TYPE_NUMBER, PROP_ONETIME_DEFAULT, + ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "PBKDF2SALT"); + zprop_register_hidden(ZFS_PROP_KEY_GUID, "keyguid", PROP_TYPE_NUMBER, + PROP_READONLY, ZFS_TYPE_DATASET, "KEYGUID"); /* oddball properties */ zprop_register_impl(ZFS_PROP_CREATION, "creation", PROP_TYPE_NUMBER, 0, @@ -602,7 +660,8 @@ boolean_t zfs_prop_readonly(zfs_prop_t prop) { return (zfs_prop_table[prop].pd_attr == PROP_READONLY || - zfs_prop_table[prop].pd_attr == PROP_ONETIME); + zfs_prop_table[prop].pd_attr == PROP_ONETIME || + zfs_prop_table[prop].pd_attr == PROP_ONETIME_DEFAULT); } /* @@ -620,7 +679,8 @@ zfs_prop_visible(zfs_prop_t prop) boolean_t zfs_prop_setonce(zfs_prop_t prop) { - return (zfs_prop_table[prop].pd_attr == PROP_ONETIME); + return (zfs_prop_table[prop].pd_attr == PROP_ONETIME || + zfs_prop_table[prop].pd_attr == PROP_ONETIME_DEFAULT); } const char * @@ -655,6 +715,40 @@ zfs_prop_inheritable(zfs_prop_t prop) zfs_prop_table[prop].pd_attr == PROP_ONETIME); } +/* + * Returns TRUE if property is one of the encryption properties that requires + * a loaded encryption key to modify. + */ +boolean_t +zfs_prop_encryption_key_param(zfs_prop_t prop) +{ + /* + * keylocation does not count as an encryption property. It can be + * changed at will without needing the master keys. + */ + return (prop == ZFS_PROP_PBKDF2_SALT || prop == ZFS_PROP_PBKDF2_ITERS || + prop == ZFS_PROP_KEYFORMAT); +} + +/* + * Helper function used by both kernelspace and userspace to check the + * keylocation property. If encrypted is set, the keylocation must be valid + * for an encrypted dataset. + */ +boolean_t +zfs_prop_valid_keylocation(const char *str, boolean_t encrypted) +{ + if (strcmp("none", str) == 0) + return (!encrypted); + else if (strcmp("prompt", str) == 0) + return (B_TRUE); + else if (strlen(str) > 8 && strncmp("file:///", str, 8) == 0) + return (B_TRUE); + + return (B_FALSE); +} + + #ifndef _KERNEL /* diff --git a/usr/src/common/zfs/zfs_prop.h b/usr/src/common/zfs/zfs_prop.h index a63262311b..45423cc72f 100644 --- a/usr/src/common/zfs/zfs_prop.h +++ b/usr/src/common/zfs/zfs_prop.h @@ -51,9 +51,12 @@ typedef enum { * ONETIME properties are a sort of conglomeration of READONLY * and INHERIT. They can be set only during object creation, * after that they are READONLY. If not explicitly set during - * creation, they can be inherited. + * creation, they can be inherited. ONETIME_DEFAULT properties + * work the same way, but they will default instead of + * inheriting a value. */ - PROP_ONETIME + PROP_ONETIME, + PROP_ONETIME_DEFAULT } zprop_attr_t; typedef struct zfs_index { diff --git a/usr/src/lib/Makefile b/usr/src/lib/Makefile index 7931ddba10..a5b8ab9e57 100644 --- a/usr/src/lib/Makefile +++ b/usr/src/lib/Makefile @@ -676,7 +676,7 @@ libvolmgt: libadm libvrrpadm: libdladm libscf libvscan: libscf libsecdb libzfs: libdevid libgen libuutil libadm libavl libefi libidmap \ - libumem libtsol libzfs_core + libumem libtsol libzfs_core libcryptoutil pkcs11 libmd libcmdutils libzfs_jni: libdiskmgt libzfs libzonecfg: libuuid libsysevent libsec libbrand libpool libscf libproc \ libuutil libbsm libsecdb diff --git a/usr/src/lib/libuutil/common/libuutil.h b/usr/src/lib/libuutil/common/libuutil.h index ec1bf907c1..a6e11ff054 100644 --- a/usr/src/lib/libuutil/common/libuutil.h +++ b/usr/src/lib/libuutil/common/libuutil.h @@ -245,7 +245,7 @@ void uu_list_pool_destroy(uu_list_pool_t *); * usage: * * foo_t *a; - * a = malloc(sizeof(*a)); + * a = malloc(sizeof (*a)); * uu_list_node_init(a, &a->foo_list, pool); * ... * uu_list_node_fini(a, &a->foo_list, pool); @@ -348,7 +348,7 @@ void uu_avl_pool_destroy(uu_avl_pool_t *); * usage: * * foo_t *a; - * a = malloc(sizeof(*a)); + * a = malloc(sizeof (*a)); * uu_avl_node_init(a, &a->foo_avl, pool); * ... * uu_avl_node_fini(a, &a->foo_avl, pool); diff --git a/usr/src/lib/libzfs/Makefile.com b/usr/src/lib/libzfs/Makefile.com index 996087bc0a..355923acae 100644 --- a/usr/src/lib/libzfs/Makefile.com +++ b/usr/src/lib/libzfs/Makefile.com @@ -41,6 +41,7 @@ OBJS_SHARED= \ OBJS_COMMON= \ libzfs_changelist.o \ libzfs_config.o \ + libzfs_crypto.o \ libzfs_dataset.o \ libzfs_diff.o \ libzfs_fru.o \ @@ -72,7 +73,8 @@ INCS += -I../../libc/inc CSTD= $(CSTD_GNU99) C99LMODE= -Xc99=%all LDLIBS += -lc -lm -ldevid -lgen -lnvpair -luutil -lavl -lefi \ - -ladm -lidmap -ltsol -lmd -lumem -lzfs_core -lcmdutils + -ladm -lidmap -ltsol -lcryptoutil -lpkcs11 -lmd -lumem -lzfs_core \ + -lcmdutils CPPFLAGS += $(INCS) -D_LARGEFILE64_SOURCE=1 -D_REENTRANT $(NOT_RELEASE_BUILD)CPPFLAGS += -DDEBUG @@ -89,6 +91,12 @@ SRCS= $(OBJS_COMMON:%.o=$(SRCDIR)/%.c) \ $(OBJS_SHARED:%.o=$(SRC)/common/zfs/%.c) $(LINTLIB) := SRCS= $(SRCDIR)/$(LINTSRC) +# lint complains about unused inline functions, even though +# they are "inline", not "static inline", with "extern inline" +# implementations and usage in libzpool. +LINTFLAGS += -erroff=E_STATIC_UNUSED +LINTFLAGS64 += -erroff=E_STATIC_UNUSED + .KEEP_STATE: all: $(LIBS) diff --git a/usr/src/lib/libzfs/common/libzfs.h b/usr/src/lib/libzfs/common/libzfs.h index af5e5c35d5..ca3bb76797 100644 --- a/usr/src/lib/libzfs/common/libzfs.h +++ b/usr/src/lib/libzfs/common/libzfs.h @@ -141,6 +141,7 @@ typedef enum zfs_error { EZFS_INITIALIZING, /* currently initializing */ EZFS_NO_INITIALIZE, /* no active initialize */ EZFS_NO_RESILVER_DEFER, /* pool doesn't support resilver_defer */ + EZFS_CRYPTOFAILED, /* failed to setup encryption */ EZFS_UNKNOWN } zfs_error_t; @@ -336,6 +337,7 @@ typedef enum { ZPOOL_STATUS_IO_FAILURE_CONTINUE, /* failed I/O, failmode 'continue' */ ZPOOL_STATUS_IO_FAILURE_MMP, /* failed MMP, failmode not 'panic' */ ZPOOL_STATUS_BAD_LOG, /* cannot read log chain(s) */ + ZPOOL_STATUS_ERRATA, /* informational errata available */ /* * If the pool has unsupported features but can still be opened in @@ -371,8 +373,10 @@ typedef enum { ZPOOL_STATUS_OK } zpool_status_t; -extern zpool_status_t zpool_get_status(zpool_handle_t *, char **); -extern zpool_status_t zpool_import_status(nvlist_t *, char **); +extern zpool_status_t zpool_get_status(zpool_handle_t *, char **, + zpool_errata_t *); +extern zpool_status_t zpool_import_status(nvlist_t *, char **, + zpool_errata_t *); extern void zpool_dump_ddt(const ddt_stat_t *dds, const ddt_histogram_t *ddh); /* @@ -474,8 +478,8 @@ extern uint64_t zfs_prop_default_numeric(zfs_prop_t); extern const char *zfs_prop_column_name(zfs_prop_t); extern boolean_t zfs_prop_align_right(zfs_prop_t); -extern nvlist_t *zfs_valid_proplist(libzfs_handle_t *, zfs_type_t, - nvlist_t *, uint64_t, zfs_handle_t *, zpool_handle_t *, const char *); +extern nvlist_t *zfs_valid_proplist(libzfs_handle_t *, zfs_type_t, nvlist_t *, + uint64_t, zfs_handle_t *, zpool_handle_t *, boolean_t, const char *); extern const char *zfs_prop_to_name(zfs_prop_t); extern int zfs_prop_set(zfs_handle_t *, const char *, const char *); @@ -505,6 +509,19 @@ extern nvlist_t *zfs_get_recvd_props(zfs_handle_t *); extern nvlist_t *zfs_get_clones_nvl(zfs_handle_t *); +/* + * zfs encryption management + */ +extern int zfs_crypto_get_encryption_root(zfs_handle_t *, boolean_t *, char *); +extern int zfs_crypto_create(libzfs_handle_t *, char *, nvlist_t *, nvlist_t *, + uint8_t **, uint_t *); +extern int zfs_crypto_clone_check(libzfs_handle_t *, zfs_handle_t *, char *, + nvlist_t *); +extern int zfs_crypto_attempt_load_keys(libzfs_handle_t *, char *); +extern int zfs_crypto_load_key(zfs_handle_t *, boolean_t, char *); +extern int zfs_crypto_unload_key(zfs_handle_t *); +extern int zfs_crypto_rewrap(zfs_handle_t *, nvlist_t *, boolean_t); + typedef struct zprop_list { int pl_prop; char *pl_user_prop; @@ -653,6 +670,9 @@ typedef struct sendflags { /* compressed WRITE records are permitted */ boolean_t compress; + + /* raw encrypted records are permitted */ + boolean_t raw; } sendflags_t; typedef boolean_t (snapfilter_cb_t)(zfs_handle_t *, void *); @@ -737,6 +757,7 @@ extern const char *zfs_type_to_name(zfs_type_t); extern void zfs_refresh_properties(zfs_handle_t *); extern int zfs_name_valid(const char *, zfs_type_t); extern zfs_handle_t *zfs_path_to_zhandle(libzfs_handle_t *, char *, zfs_type_t); +extern int zfs_parent_name(zfs_handle_t *, char *, size_t); extern boolean_t zfs_dataset_exists(libzfs_handle_t *, const char *, zfs_type_t); extern int zfs_spa_version(zfs_handle_t *, int *); diff --git a/usr/src/lib/libzfs/common/libzfs_changelist.c b/usr/src/lib/libzfs/common/libzfs_changelist.c index af5cb35f9d..99d226019f 100644 --- a/usr/src/lib/libzfs/common/libzfs_changelist.c +++ b/usr/src/lib/libzfs/common/libzfs_changelist.c @@ -225,6 +225,7 @@ changelist_postfix(prop_changelist_t *clp) boolean_t sharenfs; boolean_t sharesmb; boolean_t mounted; + boolean_t needs_key; /* * If we are in the global zone, but this dataset is exported @@ -253,9 +254,12 @@ changelist_postfix(prop_changelist_t *clp) shareopts, sizeof (shareopts), NULL, NULL, 0, B_FALSE) == 0) && (strcmp(shareopts, "off") != 0)); + needs_key = (zfs_prop_get_int(cn->cn_handle, + ZFS_PROP_KEYSTATUS) == ZFS_KEYSTATUS_UNAVAILABLE); + mounted = zfs_is_mounted(cn->cn_handle, NULL); - if (!mounted && (cn->cn_mounted || + if (!mounted && !needs_key && (cn->cn_mounted || ((sharenfs || sharesmb || clp->cl_waslegacy) && (zfs_prop_get_int(cn->cn_handle, ZFS_PROP_CANMOUNT) == ZFS_CANMOUNT_ON)))) { diff --git a/usr/src/lib/libzfs/common/libzfs_crypto.c b/usr/src/lib/libzfs/common/libzfs_crypto.c new file mode 100644 index 0000000000..4533ed8111 --- /dev/null +++ b/usr/src/lib/libzfs/common/libzfs_crypto.c @@ -0,0 +1,1529 @@ +/* + * CDDL HEADER START + * + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2017, Datto, Inc. All rights reserved. + */ + +#include <string.h> +#include <strings.h> +#include <ctype.h> +#include <unistd.h> +#include <sys/zfs_context.h> +#include <sys/fs/zfs.h> +#include <sys/dsl_crypt.h> +#ifdef sun +#include <kmfapi.h> +#include <security/pkcs11.h> +#include <cryptoutil.h> +#else +#include <sys/crypto/icp.h> +#endif +#include <libintl.h> +#include <termios.h> +#include <signal.h> +#include <errno.h> +#include <libzfs.h> +#include "libzfs_impl.h" +#include "zfeature_common.h" + +/* + * User keys are used to decrypt the master encryption keys of a dataset. This + * indirection allows a user to change his / her access key without having to + * re-encrypt the entire dataset. User keys can be provided in one of several + * ways. Raw keys are simply given to the kernel as is. Similarly, hex keys + * are converted to binary and passed into the kernel. Password based keys are + * a bit more complicated. Passwords alone do not provide suitable entropy for + * encryption and may be too short or too long to be used. In order to derive + * a more appropriate key we use a PBKDF2 function. This function is designed + * to take a (relatively) long time to calculate in order to discourage + * attackers from guessing from a list of common passwords. PBKDF2 requires + * 2 additional parameters. The first is the number of iterations to run, which + * will ultimately determine how long it takes to derive the resulting key from + * the password. The second parameter is a salt that is randomly generated for + * each dataset. The salt is used to "tweak" PBKDF2 such that a group of + * attackers cannot reasonably generate a table of commonly known passwords to + * their output keys and expect it work for all past and future PBKDF2 users. + * We store the salt as a hidden property of the dataset (although it is + * technically ok if the salt is known to the attacker). + */ + +typedef enum key_locator { + KEY_LOCATOR_NONE, + KEY_LOCATOR_PROMPT, + KEY_LOCATOR_URI +} key_locator_t; + +#define MIN_PASSPHRASE_LEN 8 +#define MAX_PASSPHRASE_LEN 512 +#define MAX_KEY_PROMPT_ATTEMPTS 3 + +static int caught_interrupt; + +static zfs_keylocation_t +zfs_prop_parse_keylocation(const char *str) +{ + if (strcmp("prompt", str) == 0) + return (ZFS_KEYLOCATION_PROMPT); + else if (strlen(str) > 8 && strncmp("file:///", str, 8) == 0) + return (ZFS_KEYLOCATION_URI); + + return (ZFS_KEYLOCATION_NONE); +} + +static int +hex_key_to_raw(char *hex, int hexlen, uint8_t *out) +{ + int ret, i; + unsigned int c; + + for (i = 0; i < hexlen; i += 2) { + if (!isxdigit(hex[i]) || !isxdigit(hex[i + 1])) { + ret = EINVAL; + goto error; + } + + ret = sscanf(&hex[i], "%02x", &c); + if (ret != 1) { + ret = EINVAL; + goto error; + } + + out[i / 2] = c; + } + + return (0); + +error: + return (ret); +} + + +static void +catch_signal(int sig) +{ + caught_interrupt = sig; +} + +static char * +get_format_prompt_string(zfs_keyformat_t format) +{ + switch (format) { + case ZFS_KEYFORMAT_RAW: + return ("raw key"); + case ZFS_KEYFORMAT_HEX: + return ("hex key"); + case ZFS_KEYFORMAT_PASSPHRASE: + return ("passphrase"); + default: + /* shouldn't happen */ + return (NULL); + } +} + +static int +get_key_material_raw(FILE *fd, const char *fsname, zfs_keyformat_t keyformat, + boolean_t again, boolean_t newkey, uint8_t **buf, size_t *len_out) +{ + int ret = 0, bytes; + size_t buflen = 0; + struct termios old_term, new_term; + struct sigaction act, osigint, osigtstp; + + *len_out = 0; + + if (isatty(fileno(fd))) { + /* + * handle SIGINT and ignore SIGSTP. This is necessary to + * restore the state of the terminal. + */ + caught_interrupt = 0; + act.sa_flags = 0; + (void) sigemptyset(&act.sa_mask); + act.sa_handler = catch_signal; + + (void) sigaction(SIGINT, &act, &osigint); + act.sa_handler = SIG_IGN; + (void) sigaction(SIGTSTP, &act, &osigtstp); + + /* prompt for the key */ + if (fsname != NULL) { + (void) printf("%s %s%s for '%s': ", + (again) ? "Re-enter" : "Enter", + (newkey) ? "new " : "", + get_format_prompt_string( + (zfs_keyformat_t)keyformat), + fsname); + } else { + (void) printf("%s %s%s: ", + (again) ? "Re-enter" : "Enter", + (newkey) ? "new " : "", + get_format_prompt_string( + (zfs_keyformat_t)keyformat)); + + } + (void) fflush(stdout); + + /* disable the terminal echo for key input */ + (void) tcgetattr(fileno(fd), &old_term); + + new_term = old_term; + new_term.c_lflag &= ~(ECHO | ECHOE | ECHOK | ECHONL); + + ret = tcsetattr(fileno(fd), TCSAFLUSH, &new_term); + if (ret != 0) { + ret = errno; + errno = 0; + goto out; + } + } + + /* read the key material */ + if (keyformat != ZFS_KEYFORMAT_RAW) { + bytes = getline((char **)buf, &buflen, fd); + if (bytes < 0) { + ret = errno; + errno = 0; + goto out; + } + + /* trim the ending newline if it exists */ + if ((*buf)[bytes - 1] == '\n') { + (*buf)[bytes - 1] = '\0'; + bytes--; + } + } else { + /* + * Raw keys may have newline characters in them and so can't + * use getline(). Here we attempt to read 33 bytes so that we + * can properly check the key length (the file should only have + * 32 bytes). + */ + *buf = malloc((WRAPPING_KEY_LEN + 1) * sizeof (char)); + if (*buf == NULL) { + ret = ENOMEM; + goto out; + } + + bytes = fread(*buf, 1, WRAPPING_KEY_LEN + 1, fd); + if (bytes < 0) { + /* size errors are handled by the calling function */ + free(*buf); + *buf = NULL; + ret = errno; + errno = 0; + goto out; + } + } + + *len_out = bytes; + +out: + if (isatty(fileno(fd))) { + /* reset the teminal */ + (void) tcsetattr(fileno(fd), TCSAFLUSH, &old_term); + (void) sigaction(SIGINT, &osigint, NULL); + (void) sigaction(SIGTSTP, &osigtstp, NULL); + + /* if we caught a signal, re-throw it now */ + if (caught_interrupt != 0) { + (void) kill(getpid(), caught_interrupt); + } + + /* print the newline that was not echo'd */ + (void) printf("\n"); + } + + return (ret); + +} + +/* + * Attempts to fetch key material, no matter where it might live. The key + * material is allocated and returned in km_out. *can_retry_out will be set + * to B_TRUE if the user is providing the key material interactively, allowing + * for re-entry attempts. + */ +static int +get_key_material(libzfs_handle_t *hdl, boolean_t do_verify, boolean_t newkey, + zfs_keyformat_t keyformat, char *keylocation, const char *fsname, + uint8_t **km_out, size_t *kmlen_out, boolean_t *can_retry_out) +{ + int ret, i; + zfs_keylocation_t keyloc = ZFS_KEYLOCATION_NONE; + FILE *fd = NULL; + uint8_t *km = NULL, *km2 = NULL; + size_t kmlen, kmlen2; + boolean_t can_retry = B_FALSE; + + /* verify and parse the keylocation */ + keyloc = zfs_prop_parse_keylocation(keylocation); + + /* open the appropriate file descriptor */ + switch (keyloc) { + case ZFS_KEYLOCATION_PROMPT: + fd = stdin; + if (isatty(fileno(fd))) { + can_retry = B_TRUE; + + /* raw keys cannot be entered on the terminal */ + if (keyformat == ZFS_KEYFORMAT_RAW) { + ret = EINVAL; + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "Cannot enter raw keys on the terminal")); + goto error; + } + } + break; + case ZFS_KEYLOCATION_URI: + fd = fopen(&keylocation[7], "r"); + if (!fd) { + ret = errno; + errno = 0; + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "Failed to open key material file")); + goto error; + } + break; + default: + ret = EINVAL; + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "Invalid keylocation.")); + goto error; + } + + /* fetch the key material into the buffer */ + ret = get_key_material_raw(fd, fsname, keyformat, B_FALSE, newkey, + &km, &kmlen); + if (ret != 0) + goto error; + + /* do basic validation of the key material */ + switch (keyformat) { + case ZFS_KEYFORMAT_RAW: + /* verify the key length is correct */ + if (kmlen < WRAPPING_KEY_LEN) { + ret = EINVAL; + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "Raw key too short (expected %u)."), + WRAPPING_KEY_LEN); + goto error; + } + + if (kmlen > WRAPPING_KEY_LEN) { + ret = EINVAL; + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "Raw key too long (expected %u)."), + WRAPPING_KEY_LEN); + goto error; + } + break; + case ZFS_KEYFORMAT_HEX: + /* verify the key length is correct */ + if (kmlen < WRAPPING_KEY_LEN * 2) { + ret = EINVAL; + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "Hex key too short (expected %u)."), + WRAPPING_KEY_LEN * 2); + goto error; + } + + if (kmlen > WRAPPING_KEY_LEN * 2) { + ret = EINVAL; + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "Hex key too long (expected %u)."), + WRAPPING_KEY_LEN * 2); + goto error; + } + + /* check for invalid hex digits */ + for (i = 0; i < WRAPPING_KEY_LEN * 2; i++) { + if (!isxdigit((char)km[i])) { + ret = EINVAL; + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "Invalid hex character detected.")); + goto error; + } + } + break; + case ZFS_KEYFORMAT_PASSPHRASE: + /* verify the length is within bounds */ + if (kmlen > MAX_PASSPHRASE_LEN) { + ret = EINVAL; + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "Passphrase too long (max %u)."), + MAX_PASSPHRASE_LEN); + goto error; + } + + if (kmlen < MIN_PASSPHRASE_LEN) { + ret = EINVAL; + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "Passphrase too short (min %u)."), + MIN_PASSPHRASE_LEN); + goto error; + } + break; + default: + /* can't happen, checked above */ + break; + } + + if (do_verify && isatty(fileno(fd))) { + ret = get_key_material_raw(fd, fsname, keyformat, B_TRUE, + newkey, &km2, &kmlen2); + if (ret != 0) + goto error; + + if (kmlen2 != kmlen || + (memcmp((char *)km, (char *)km2, kmlen) != 0)) { + ret = EINVAL; + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "Provided keys do not match.")); + goto error; + } + } + + if (fd != stdin) + (void) fclose(fd); + + if (km2 != NULL) + free(km2); + + *km_out = km; + *kmlen_out = kmlen; + if (can_retry_out != NULL) + *can_retry_out = can_retry; + + return (0); + +error: + if (km != NULL) + free(km); + + if (km2 != NULL) + free(km2); + + if (fd != NULL && fd != stdin) + (void) fclose(fd); + + *km_out = NULL; + *kmlen_out = 0; + + if (can_retry_out != NULL) + *can_retry_out = can_retry; + + return (ret); +} + +/* This needs to be fixed to be compatible with other platforms */ + +static int +pbkdf2(uint8_t *passphrase, size_t passphraselen, uint8_t *salt, + size_t saltlen, uint64_t iterations, uint8_t *output, + size_t outputlen) +{ + int ret = 0; + CK_SESSION_HANDLE session; + char *tmpkeydata = NULL; + size_t tmpkeydatalen = 0; + CK_OBJECT_HANDLE obj; + + /* initialize output */ + (void) memset(output, 0, outputlen); + + ret = SUNW_C_GetMechSession(CKM_PKCS5_PBKD2, &session); + if (ret) { + (void) fprintf(stderr, "failed to connect to pkcs5: %s\n", + pkcs11_strerror(ret)); + return (ret); + } + + ret = pkcs11_PasswdToPBKD2Object(session, (char *)passphrase, + passphraselen, salt, saltlen, iterations, CKK_AES, outputlen, 0, + &obj); + + if (ret == CKR_OK) + ret = pkcs11_ObjectToKey(session, obj, (void **)&tmpkeydata, + &tmpkeydatalen, B_TRUE); + + (void) C_CloseSession(session); + if (ret) { + (void) fprintf(stderr, "unable to generate key: %s\n", + pkcs11_strerror(ret)); + return (ret); + } + + /* + * Because it allocates an area for the passphrase, we copy it out + * then zero the original + */ + (void) memcpy(output, tmpkeydata, tmpkeydatalen); + (void) memset(tmpkeydata, 0, tmpkeydatalen); + free(tmpkeydata); + + return (ret); +} + +/* ARGSUSED */ +static int +derive_key(libzfs_handle_t *hdl, zfs_keyformat_t format, uint64_t iters, + uint8_t *key_material, size_t key_material_len, uint64_t salt, + uint8_t **key_out) +{ + int ret; + uint8_t *key; + + *key_out = NULL; + + key = zfs_alloc(hdl, WRAPPING_KEY_LEN); + if (!key) + return (ENOMEM); + + switch (format) { + case ZFS_KEYFORMAT_RAW: + bcopy(key_material, key, WRAPPING_KEY_LEN); + break; + case ZFS_KEYFORMAT_HEX: + ret = hex_key_to_raw((char *)key_material, + WRAPPING_KEY_LEN * 2, key); + if (ret != 0) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "Invalid hex key provided.")); + goto error; + } + break; + case ZFS_KEYFORMAT_PASSPHRASE: + salt = LE_64(salt); + ret = pbkdf2(key_material, strlen((char *)key_material), + ((uint8_t *)&salt), sizeof (uint64_t), iters, + key, WRAPPING_KEY_LEN); + if (ret != 0) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "Failed to generate key from passphrase.")); + goto error; + } + break; + default: + ret = EINVAL; + goto error; + } + + *key_out = key; + return (0); + +error: + free(key); + + *key_out = NULL; + return (ret); +} + +static boolean_t +encryption_feature_is_enabled(zpool_handle_t *zph) +{ + nvlist_t *features; + uint64_t feat_refcount; + + /* check that features can be enabled */ + if (zpool_get_prop_int(zph, ZPOOL_PROP_VERSION, NULL) + < SPA_VERSION_FEATURES) + return (B_FALSE); + + /* check for crypto feature */ + features = zpool_get_features(zph); + if (!features || nvlist_lookup_uint64(features, + spa_feature_table[SPA_FEATURE_ENCRYPTION].fi_guid, + &feat_refcount) != 0) + return (B_FALSE); + + return (B_TRUE); +} + +static int +populate_create_encryption_params_nvlists(libzfs_handle_t *hdl, + zfs_handle_t *zhp, boolean_t newkey, zfs_keyformat_t keyformat, + char *keylocation, nvlist_t *props, uint8_t **wkeydata, uint_t *wkeylen) +{ + int ret; + uint64_t iters = 0, salt = 0; + uint8_t *key_material = NULL; + size_t key_material_len = 0; + uint8_t *key_data = NULL; + const char *fsname = (zhp) ? zfs_get_name(zhp) : NULL; + + /* get key material from keyformat and keylocation */ + ret = get_key_material(hdl, B_TRUE, newkey, keyformat, keylocation, + fsname, &key_material, &key_material_len, NULL); + if (ret != 0) + goto error; + + /* passphrase formats require a salt and pbkdf2 iters property */ + if (keyformat == ZFS_KEYFORMAT_PASSPHRASE) { +#ifdef sun + /* always generate a new salt */ + ret = pkcs11_get_random(&salt, sizeof (uint64_t)); + if (ret != 0) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "Failed to generate salt.")); + goto error; + } +#else + random_init(); + + ret = random_get_bytes((uint8_t *)&salt, sizeof (uint64_t)); + if (ret != 0) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "Failed to generate salt.")); + goto error; + } + + random_fini(); +#endif + + ret = nvlist_add_uint64(props, + zfs_prop_to_name(ZFS_PROP_PBKDF2_SALT), salt); + if (ret != 0) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "Failed to add salt to properties.")); + goto error; + } + + /* + * If not otherwise specified, use the default number of + * pbkdf2 iterations. If specified, we have already checked + * that the given value is greater than MIN_PBKDF2_ITERATIONS + * during zfs_valid_proplist(). + */ + ret = nvlist_lookup_uint64(props, + zfs_prop_to_name(ZFS_PROP_PBKDF2_ITERS), &iters); + if (ret == ENOENT) { + iters = DEFAULT_PBKDF2_ITERATIONS; + ret = nvlist_add_uint64(props, + zfs_prop_to_name(ZFS_PROP_PBKDF2_ITERS), iters); + if (ret != 0) + goto error; + } else if (ret != 0) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "Failed to get pbkdf2 iterations.")); + goto error; + } + } else { + /* check that pbkdf2iters was not specified by the user */ + ret = nvlist_lookup_uint64(props, + zfs_prop_to_name(ZFS_PROP_PBKDF2_ITERS), &iters); + if (ret == 0) { + ret = EINVAL; + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "Cannot specify pbkdf2iters with a non-passphrase " + "keyformat.")); + goto error; + } + } + + /* derive a key from the key material */ + ret = derive_key(hdl, (zfs_keyformat_t)keyformat, iters, key_material, + key_material_len, salt, &key_data); + if (ret != 0) + goto error; + + free(key_material); + + *wkeydata = key_data; + *wkeylen = WRAPPING_KEY_LEN; + return (0); + +error: + if (key_material != NULL) + free(key_material); + if (key_data != NULL) + free(key_data); + + *wkeydata = NULL; + *wkeylen = 0; + return (ret); +} + +static boolean_t +proplist_has_encryption_props(nvlist_t *props) +{ + int ret; + uint64_t intval; + char *strval; + + ret = nvlist_lookup_uint64(props, + zfs_prop_to_name(ZFS_PROP_ENCRYPTION), &intval); + if (ret == 0 && intval != ZIO_CRYPT_OFF) + return (B_TRUE); + + ret = nvlist_lookup_string(props, + zfs_prop_to_name(ZFS_PROP_KEYLOCATION), &strval); + if (ret == 0 && strcmp(strval, "none") != 0) + return (B_TRUE); + + ret = nvlist_lookup_uint64(props, + zfs_prop_to_name(ZFS_PROP_KEYFORMAT), &intval); + if (ret == 0) + return (B_TRUE); + + ret = nvlist_lookup_uint64(props, + zfs_prop_to_name(ZFS_PROP_PBKDF2_ITERS), &intval); + if (ret == 0) + return (B_TRUE); + + return (B_FALSE); +} + +int +zfs_crypto_get_encryption_root(zfs_handle_t *zhp, boolean_t *is_encroot, + char *buf) +{ + int ret; + char prop_encroot[MAXNAMELEN]; + + /* if the dataset isn't encrypted, just return */ + if (zfs_prop_get_int(zhp, ZFS_PROP_ENCRYPTION) == ZIO_CRYPT_OFF) { + *is_encroot = B_FALSE; + if (buf != NULL) + buf[0] = '\0'; + return (0); + } + + ret = zfs_prop_get(zhp, ZFS_PROP_ENCRYPTION_ROOT, prop_encroot, + sizeof (prop_encroot), NULL, NULL, 0, B_TRUE); + if (ret != 0) { + *is_encroot = B_FALSE; + if (buf != NULL) + buf[0] = '\0'; + return (ret); + } + + *is_encroot = strcmp(prop_encroot, zfs_get_name(zhp)) == 0; + if (buf != NULL) + (void) strcpy(buf, prop_encroot); + + return (0); +} + +int +zfs_crypto_create(libzfs_handle_t *hdl, char *parent_name, nvlist_t *props, + nvlist_t *pool_props, uint8_t **wkeydata_out, uint_t *wkeylen_out) +{ + int ret; + uint64_t crypt = ZIO_CRYPT_INHERIT, pcrypt = ZIO_CRYPT_INHERIT; + uint64_t keyformat = ZFS_KEYFORMAT_NONE; + char *keylocation = NULL; + zfs_handle_t *pzhp = NULL; + uint8_t *wkeydata = NULL; + uint_t wkeylen = 0; + boolean_t local_crypt = B_TRUE; + + /* lookup crypt from props */ + ret = nvlist_lookup_uint64(props, + zfs_prop_to_name(ZFS_PROP_ENCRYPTION), &crypt); + if (ret != 0) + local_crypt = B_FALSE; + + /* lookup key location and format from props */ + (void) nvlist_lookup_uint64(props, + zfs_prop_to_name(ZFS_PROP_KEYFORMAT), &keyformat); + (void) nvlist_lookup_string(props, + zfs_prop_to_name(ZFS_PROP_KEYLOCATION), &keylocation); + + if (parent_name != NULL) { + /* get a reference to parent dataset */ + pzhp = make_dataset_handle(hdl, parent_name); + if (pzhp == NULL) { + ret = ENOENT; + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "Failed to lookup parent.")); + goto out; + } + + /* Lookup parent's crypt */ + pcrypt = zfs_prop_get_int(pzhp, ZFS_PROP_ENCRYPTION); + + /* Params require the encryption feature */ + if (!encryption_feature_is_enabled(pzhp->zpool_hdl)) { + if (proplist_has_encryption_props(props)) { + ret = EINVAL; + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "Encryption feature not enabled.")); + goto out; + } + + ret = 0; + goto out; + } + } else { + /* + * special case for root dataset where encryption feature + * feature won't be on disk yet + */ + if (!nvlist_exists(pool_props, "feature@encryption")) { + if (proplist_has_encryption_props(props)) { + ret = EINVAL; + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "Encryption feature not enabled.")); + goto out; + } + + ret = 0; + goto out; + } + + pcrypt = ZIO_CRYPT_OFF; + } + + /* Check for encryption being explicitly truned off */ + if (crypt == ZIO_CRYPT_OFF && pcrypt != ZIO_CRYPT_OFF) { + ret = EINVAL; + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "Invalid encryption value. Dataset must be encrypted.")); + goto out; + } + + /* Get the inherited encryption property if we don't have it locally */ + if (!local_crypt) + crypt = pcrypt; + + /* + * At this point crypt should be the actual encryption value. If + * encryption is off just verify that no encryption properties have + * been specified and return. + */ + if (crypt == ZIO_CRYPT_OFF) { + if (proplist_has_encryption_props(props)) { + ret = EINVAL; + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "Encryption must be turned on to set encryption " + "properties.")); + goto out; + } + + ret = 0; + goto out; + } + + /* + * If we have a parent crypt it is valid to specify encryption alone. + * This will result in a child that is encrypted with the chosen + * encryption suite that will also inherit the parent's key. If + * the parent is not encrypted we need an encryption suite provided. + */ + if (pcrypt == ZIO_CRYPT_OFF && keylocation == NULL && + keyformat == ZFS_KEYFORMAT_NONE) { + ret = EINVAL; + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "Keyformat required for new encryption root.")); + goto out; + } + + /* + * Specifying a keylocation implies this will be a new encryption root. + * Check that a keyformat is also specified. + */ + if (keylocation != NULL && keyformat == ZFS_KEYFORMAT_NONE) { + ret = EINVAL; + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "Keyformat required for new encryption root.")); + goto out; + } + + /* default to prompt if no keylocation is specified */ + if (keyformat != ZFS_KEYFORMAT_NONE && keylocation == NULL) { + keylocation = "prompt"; + ret = nvlist_add_string(props, + zfs_prop_to_name(ZFS_PROP_KEYLOCATION), keylocation); + if (ret != 0) + goto out; + } + + /* + * If a local key is provided, this dataset will be a new + * encryption root. Populate the encryption params. + */ + if (keylocation != NULL) { + ret = populate_create_encryption_params_nvlists(hdl, NULL, + B_FALSE, keyformat, keylocation, props, &wkeydata, + &wkeylen); + if (ret != 0) + goto out; + } + + if (pzhp != NULL) + zfs_close(pzhp); + + *wkeydata_out = wkeydata; + *wkeylen_out = wkeylen; + return (0); + +out: + if (pzhp != NULL) + zfs_close(pzhp); + if (wkeydata != NULL) + free(wkeydata); + + *wkeydata_out = NULL; + *wkeylen_out = 0; + return (ret); +} + +int +zfs_crypto_clone_check(libzfs_handle_t *hdl, zfs_handle_t *origin_zhp, + char *parent_name, nvlist_t *props) +{ + int ret; + zfs_handle_t *pzhp = NULL; + uint64_t pcrypt, ocrypt; + + /* + * No encryption properties should be specified. They will all be + * inherited from the origin dataset. + */ + if (nvlist_exists(props, zfs_prop_to_name(ZFS_PROP_KEYFORMAT)) || + nvlist_exists(props, zfs_prop_to_name(ZFS_PROP_KEYLOCATION)) || + nvlist_exists(props, zfs_prop_to_name(ZFS_PROP_ENCRYPTION)) || + nvlist_exists(props, zfs_prop_to_name(ZFS_PROP_PBKDF2_ITERS))) { + ret = EINVAL; + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "Encryption properties must inherit from origin dataset.")); + goto out; + } + + /* get a reference to parent dataset, should never be NULL */ + pzhp = make_dataset_handle(hdl, parent_name); + if (pzhp == NULL) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "Failed to lookup parent.")); + return (ENOENT); + } + + /* Lookup parent's crypt */ + pcrypt = zfs_prop_get_int(pzhp, ZFS_PROP_ENCRYPTION); + ocrypt = zfs_prop_get_int(origin_zhp, ZFS_PROP_ENCRYPTION); + + /* all children of encrypted parents must be encrypted */ + if (pcrypt != ZIO_CRYPT_OFF && ocrypt == ZIO_CRYPT_OFF) { + ret = EINVAL; + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "Cannot create unencrypted clone as a child " + "of encrypted parent.")); + goto out; + } + + zfs_close(pzhp); + return (0); + +out: + if (pzhp != NULL) + zfs_close(pzhp); + return (ret); +} + +typedef struct loadkeys_cbdata { + uint64_t cb_numfailed; + uint64_t cb_numattempted; +} loadkey_cbdata_t; + +static int +load_keys_cb(zfs_handle_t *zhp, void *arg) +{ + int ret; + boolean_t is_encroot; + loadkey_cbdata_t *cb = arg; + uint64_t keystatus = zfs_prop_get_int(zhp, ZFS_PROP_KEYSTATUS); + + /* only attempt to load keys for encryption roots */ + ret = zfs_crypto_get_encryption_root(zhp, &is_encroot, NULL); + if (ret != 0 || !is_encroot) + goto out; + + /* don't attempt to load already loaded keys */ + if (keystatus == ZFS_KEYSTATUS_AVAILABLE) + goto out; + + /* Attempt to load the key. Record status in cb. */ + cb->cb_numattempted++; + + ret = zfs_crypto_load_key(zhp, B_FALSE, NULL); + if (ret) + cb->cb_numfailed++; + +out: + (void) zfs_iter_filesystems(zhp, load_keys_cb, cb); + zfs_close(zhp); + + /* always return 0, since this function is best effort */ + return (0); +} + +/* + * This function is best effort. It attempts to load all the keys for the given + * filesystem and all of its children. + */ +int +zfs_crypto_attempt_load_keys(libzfs_handle_t *hdl, char *fsname) +{ + int ret; + zfs_handle_t *zhp = NULL; + loadkey_cbdata_t cb = { 0 }; + + zhp = zfs_open(hdl, fsname, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME); + if (zhp == NULL) { + ret = ENOENT; + goto error; + } + + ret = load_keys_cb(zfs_handle_dup(zhp), &cb); + if (ret) + goto error; + + (void) printf(gettext("%llu / %llu keys successfully loaded\n"), + (u_longlong_t)(cb.cb_numattempted - cb.cb_numfailed), + (u_longlong_t)cb.cb_numattempted); + + if (cb.cb_numfailed != 0) { + ret = -1; + goto error; + } + + zfs_close(zhp); + return (0); + +error: + if (zhp != NULL) + zfs_close(zhp); + return (ret); +} + +int +zfs_crypto_load_key(zfs_handle_t *zhp, boolean_t noop, char *alt_keylocation) +{ + int ret, attempts = 0; + char errbuf[1024]; + uint64_t keystatus, iters = 0, salt = 0; + uint64_t keyformat = ZFS_KEYFORMAT_NONE; + char prop_keylocation[MAXNAMELEN]; + char prop_encroot[MAXNAMELEN]; + char *keylocation = NULL; + uint8_t *key_material = NULL, *key_data = NULL; + size_t key_material_len; + boolean_t is_encroot, can_retry = B_FALSE, correctible = B_FALSE; + + (void) snprintf(errbuf, sizeof (errbuf), + dgettext(TEXT_DOMAIN, "Key load error")); + + /* check that encryption is enabled for the pool */ + if (!encryption_feature_is_enabled(zhp->zpool_hdl)) { + zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, + "Encryption feature not enabled.")); + ret = EINVAL; + goto error; + } + + /* Fetch the keyformat. Check that the dataset is encrypted. */ + keyformat = zfs_prop_get_int(zhp, ZFS_PROP_KEYFORMAT); + if (keyformat == ZFS_KEYFORMAT_NONE) { + zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, + "'%s' is not encrypted."), zfs_get_name(zhp)); + ret = EINVAL; + goto error; + } + + /* + * Fetch the key location. Check that we are working with an + * encryption root. + */ + ret = zfs_crypto_get_encryption_root(zhp, &is_encroot, prop_encroot); + if (ret != 0) { + zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, + "Failed to get encryption root for '%s'."), + zfs_get_name(zhp)); + goto error; + } else if (!is_encroot) { + zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, + "Keys must be loaded for encryption root of '%s' (%s)."), + zfs_get_name(zhp), prop_encroot); + ret = EINVAL; + goto error; + } + + /* + * if the caller has elected to override the keylocation property + * use that instead + */ + if (alt_keylocation != NULL) { + keylocation = alt_keylocation; + } else { + ret = zfs_prop_get(zhp, ZFS_PROP_KEYLOCATION, prop_keylocation, + sizeof (prop_keylocation), NULL, NULL, 0, B_TRUE); + if (ret != 0) { + zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, + "Failed to get keylocation for '%s'."), + zfs_get_name(zhp)); + goto error; + } + + keylocation = prop_keylocation; + } + + /* check that the key is unloaded unless this is a noop */ + if (!noop) { + keystatus = zfs_prop_get_int(zhp, ZFS_PROP_KEYSTATUS); + if (keystatus == ZFS_KEYSTATUS_AVAILABLE) { + zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, + "Key already loaded for '%s'."), zfs_get_name(zhp)); + ret = EEXIST; + goto error; + } + } + + /* passphrase formats require a salt and pbkdf2_iters property */ + if (keyformat == ZFS_KEYFORMAT_PASSPHRASE) { + salt = zfs_prop_get_int(zhp, ZFS_PROP_PBKDF2_SALT); + iters = zfs_prop_get_int(zhp, ZFS_PROP_PBKDF2_ITERS); + } + +try_again: + /* fetching and deriving the key are correctible errors. set the flag */ + correctible = B_TRUE; + + /* get key material from key format and location */ + ret = get_key_material(zhp->zfs_hdl, B_FALSE, B_FALSE, keyformat, + keylocation, zfs_get_name(zhp), &key_material, &key_material_len, + &can_retry); + if (ret != 0) + goto error; + + /* derive a key from the key material */ + ret = derive_key(zhp->zfs_hdl, keyformat, iters, key_material, + key_material_len, salt, &key_data); + if (ret != 0) + goto error; + + correctible = B_FALSE; + + /* pass the wrapping key and noop flag to the ioctl */ + ret = lzc_load_key(zhp->zfs_name, noop, key_data, WRAPPING_KEY_LEN); + if (ret != 0) { + switch (ret) { + case EINVAL: + zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, + "Invalid parameters provided for %s."), + zfs_get_name(zhp)); + break; + case EEXIST: + zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, + "Key already loaded for '%s'."), zfs_get_name(zhp)); + break; + case EBUSY: + zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, + "'%s' is busy."), zfs_get_name(zhp)); + break; + case EACCES: + correctible = B_TRUE; + zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, + "Incorrect key provided for '%s'."), + zfs_get_name(zhp)); + break; + } + goto error; + } + + free(key_material); + free(key_data); + + return (0); + +error: + (void) zfs_error(zhp->zfs_hdl, EZFS_CRYPTOFAILED, errbuf); + if (key_material != NULL) { + free(key_material); + key_material = NULL; + } + if (key_data != NULL) { + free(key_data); + key_data = NULL; + } + + /* + * Here we decide if it is ok to allow the user to retry entering their + * key. The can_retry flag will be set if the user is entering their + * key from an interactive prompt. The correctible flag will only be + * set if an error that occured could be corrected by retrying. Both + * flags are needed to allow the user to attempt key entry again + */ + if (can_retry && correctible && attempts <= MAX_KEY_PROMPT_ATTEMPTS) { + attempts++; + goto try_again; + } + + return (ret); +} + +int +zfs_crypto_unload_key(zfs_handle_t *zhp) +{ + int ret; + char errbuf[1024]; + char prop_encroot[MAXNAMELEN]; + uint64_t keystatus, keyformat; + boolean_t is_encroot; + + (void) snprintf(errbuf, sizeof (errbuf), + dgettext(TEXT_DOMAIN, "Key unload error")); + + /* check that encryption is enabled for the pool */ + if (!encryption_feature_is_enabled(zhp->zpool_hdl)) { + zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, + "Encryption feature not enabled.")); + ret = EINVAL; + goto error; + } + + /* Fetch the keyformat. Check that the dataset is encrypted. */ + keyformat = zfs_prop_get_int(zhp, ZFS_PROP_KEYFORMAT); + if (keyformat == ZFS_KEYFORMAT_NONE) { + zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, + "'%s' is not encrypted."), zfs_get_name(zhp)); + ret = EINVAL; + goto error; + } + + /* + * Fetch the key location. Check that we are working with an + * encryption root. + */ + ret = zfs_crypto_get_encryption_root(zhp, &is_encroot, prop_encroot); + if (ret != 0) { + zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, + "Failed to get encryption root for '%s'."), + zfs_get_name(zhp)); + goto error; + } else if (!is_encroot) { + zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, + "Keys must be unloaded for encryption root of '%s' (%s)."), + zfs_get_name(zhp), prop_encroot); + ret = EINVAL; + goto error; + } + + /* check that the key is loaded */ + keystatus = zfs_prop_get_int(zhp, ZFS_PROP_KEYSTATUS); + if (keystatus == ZFS_KEYSTATUS_UNAVAILABLE) { + zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, + "Key already unloaded for '%s'."), zfs_get_name(zhp)); + ret = EACCES; + goto error; + } + + /* call the ioctl */ + ret = lzc_unload_key(zhp->zfs_name); + + if (ret != 0) { + switch (ret) { + case EACCES: + zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, + "Key already unloaded for '%s'."), + zfs_get_name(zhp)); + break; + case EBUSY: + zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, + "'%s' is busy."), zfs_get_name(zhp)); + break; + } + (void) zfs_error(zhp->zfs_hdl, EZFS_CRYPTOFAILED, errbuf); + } + + return (ret); + +error: + (void) zfs_error(zhp->zfs_hdl, EZFS_CRYPTOFAILED, errbuf); + return (ret); +} + +static int +zfs_crypto_verify_rewrap_nvlist(zfs_handle_t *zhp, nvlist_t *props, + nvlist_t **props_out, char *errbuf) +{ + int ret; + nvpair_t *elem = NULL; + zfs_prop_t prop; + nvlist_t *new_props = NULL; + + new_props = fnvlist_alloc(); + + /* + * loop through all provided properties, we should only have + * keyformat, keylocation and pbkdf2iters. The actual validation of + * values is done by zfs_valid_proplist(). + */ + while ((elem = nvlist_next_nvpair(props, elem)) != NULL) { + const char *propname = nvpair_name(elem); + prop = zfs_name_to_prop(propname); + + switch (prop) { + case ZFS_PROP_PBKDF2_ITERS: + case ZFS_PROP_KEYFORMAT: + case ZFS_PROP_KEYLOCATION: + break; + default: + ret = EINVAL; + zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, + "Only keyformat, keylocation and pbkdf2iters may " + "be set with this command.")); + goto error; + } + } + + new_props = zfs_valid_proplist(zhp->zfs_hdl, zhp->zfs_type, props, + zfs_prop_get_int(zhp, ZFS_PROP_ZONED), NULL, zhp->zpool_hdl, + B_TRUE, errbuf); + if (new_props == NULL) + goto error; + + *props_out = new_props; + return (0); + +error: + nvlist_free(new_props); + *props_out = NULL; + return (ret); +} + +int +zfs_crypto_rewrap(zfs_handle_t *zhp, nvlist_t *raw_props, boolean_t inheritkey) +{ + int ret; + char errbuf[1024]; + boolean_t is_encroot; + nvlist_t *props = NULL; + uint8_t *wkeydata = NULL; + uint_t wkeylen = 0; + dcp_cmd_t cmd = (inheritkey) ? DCP_CMD_INHERIT : DCP_CMD_NEW_KEY; + uint64_t crypt, pcrypt, keystatus, pkeystatus; + uint64_t keyformat = ZFS_KEYFORMAT_NONE; + zfs_handle_t *pzhp = NULL; + char *keylocation = NULL; + char origin_name[MAXNAMELEN]; + char prop_keylocation[MAXNAMELEN]; + char parent_name[ZFS_MAX_DATASET_NAME_LEN]; + + (void) snprintf(errbuf, sizeof (errbuf), + dgettext(TEXT_DOMAIN, "Key change error")); + + /* check that encryption is enabled for the pool */ + if (!encryption_feature_is_enabled(zhp->zpool_hdl)) { + zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, + "Encryption feature not enabled.")); + ret = EINVAL; + goto error; + } + + /* get crypt from dataset */ + crypt = zfs_prop_get_int(zhp, ZFS_PROP_ENCRYPTION); + if (crypt == ZIO_CRYPT_OFF) { + zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, + "Dataset not encrypted.")); + ret = EINVAL; + goto error; + } + + /* get the encryption root of the dataset */ + ret = zfs_crypto_get_encryption_root(zhp, &is_encroot, NULL); + if (ret != 0) { + zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, + "Failed to get encryption root for '%s'."), + zfs_get_name(zhp)); + goto error; + } + + /* Clones use their origin's key and cannot rewrap it */ + ret = zfs_prop_get(zhp, ZFS_PROP_ORIGIN, origin_name, + sizeof (origin_name), NULL, NULL, 0, B_TRUE); + if (ret == 0 && strcmp(origin_name, "") != 0) { + zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, + "Keys cannot be changed on clones.")); + ret = EINVAL; + goto error; + } + + /* + * If the user wants to use the inheritkey variant of this function + * we don't need to collect any crypto arguments. + */ + if (!inheritkey) { + /* validate the provided properties */ + ret = zfs_crypto_verify_rewrap_nvlist(zhp, raw_props, &props, + errbuf); + if (ret != 0) + goto error; + + /* + * Load keyformat and keylocation from the nvlist. Fetch from + * the dataset properties if not specified. + */ + (void) nvlist_lookup_uint64(props, + zfs_prop_to_name(ZFS_PROP_KEYFORMAT), &keyformat); + (void) nvlist_lookup_string(props, + zfs_prop_to_name(ZFS_PROP_KEYLOCATION), &keylocation); + + if (is_encroot) { + /* + * If this is already an ecryption root, just keep + * any properties not set by the user. + */ + if (keyformat == ZFS_KEYFORMAT_NONE) { + keyformat = zfs_prop_get_int(zhp, + ZFS_PROP_KEYFORMAT); + ret = nvlist_add_uint64(props, + zfs_prop_to_name(ZFS_PROP_KEYFORMAT), + keyformat); + } + + if (keylocation == NULL) { + ret = zfs_prop_get(zhp, ZFS_PROP_KEYLOCATION, + prop_keylocation, sizeof (prop_keylocation), + NULL, NULL, 0, B_TRUE); + if (ret != 0) { + zfs_error_aux(zhp->zfs_hdl, + dgettext(TEXT_DOMAIN, "Failed to " + "get existing keylocation " + "property.")); + goto error; + } + + keylocation = prop_keylocation; + } + } else { + /* need a new key for non-encryption roots */ + if (keyformat == ZFS_KEYFORMAT_NONE) { + ret = EINVAL; + zfs_error_aux(zhp->zfs_hdl, + dgettext(TEXT_DOMAIN, "Keyformat required " + "for new encryption root.")); + goto error; + } + + /* default to prompt if no keylocation is specified */ + if (keylocation == NULL) { + keylocation = "prompt"; + ret = nvlist_add_string(props, + zfs_prop_to_name(ZFS_PROP_KEYLOCATION), + keylocation); + if (ret != 0) + goto error; + } + } + + /* fetch the new wrapping key and associated properties */ + ret = populate_create_encryption_params_nvlists(zhp->zfs_hdl, + zhp, B_TRUE, keyformat, keylocation, props, &wkeydata, + &wkeylen); + if (ret != 0) + goto error; + } else { + /* check that zhp is an encryption root */ + if (!is_encroot) { + zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, + "Key inheriting can only be performed on " + "encryption roots.")); + ret = EINVAL; + goto error; + } + + /* get the parent's name */ + ret = zfs_parent_name(zhp, parent_name, sizeof (parent_name)); + if (ret != 0) { + zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, + "Root dataset cannot inherit key.")); + ret = EINVAL; + goto error; + } + + /* get a handle to the parent */ + pzhp = make_dataset_handle(zhp->zfs_hdl, parent_name); + if (pzhp == NULL) { + zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, + "Failed to lookup parent.")); + ret = ENOENT; + goto error; + } + + /* parent must be encrypted */ + pcrypt = zfs_prop_get_int(pzhp, ZFS_PROP_ENCRYPTION); + if (pcrypt == ZIO_CRYPT_OFF) { + zfs_error_aux(pzhp->zfs_hdl, dgettext(TEXT_DOMAIN, + "Parent must be encrypted.")); + ret = EINVAL; + goto error; + } + + /* check that the parent's key is loaded */ + pkeystatus = zfs_prop_get_int(pzhp, ZFS_PROP_KEYSTATUS); + if (pkeystatus == ZFS_KEYSTATUS_UNAVAILABLE) { + zfs_error_aux(pzhp->zfs_hdl, dgettext(TEXT_DOMAIN, + "Parent key must be loaded.")); + ret = EACCES; + goto error; + } + } + + /* check that the key is loaded */ + keystatus = zfs_prop_get_int(zhp, ZFS_PROP_KEYSTATUS); + if (keystatus == ZFS_KEYSTATUS_UNAVAILABLE) { + zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, + "Key must be loaded.")); + ret = EACCES; + goto error; + } + + /* call the ioctl */ + ret = lzc_change_key(zhp->zfs_name, cmd, props, wkeydata, wkeylen); + if (ret != 0) { + switch (ret) { + case EINVAL: + zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, + "Invalid properties for key change.")); + break; + case EACCES: + zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, + "Key is not currently loaded.")); + break; + } + (void) zfs_error(zhp->zfs_hdl, EZFS_CRYPTOFAILED, errbuf); + } + + if (pzhp != NULL) + zfs_close(pzhp); + if (props != NULL) + nvlist_free(props); + if (wkeydata != NULL) + free(wkeydata); + + return (ret); + +error: + if (pzhp != NULL) + zfs_close(pzhp); + if (props != NULL) + nvlist_free(props); + if (wkeydata != NULL) + free(wkeydata); + + (void) zfs_error(zhp->zfs_hdl, EZFS_CRYPTOFAILED, errbuf); + return (ret); +} diff --git a/usr/src/lib/libzfs/common/libzfs_dataset.c b/usr/src/lib/libzfs/common/libzfs_dataset.c index 2ca09e51d4..117e414a9a 100644 --- a/usr/src/lib/libzfs/common/libzfs_dataset.c +++ b/usr/src/lib/libzfs/common/libzfs_dataset.c @@ -59,6 +59,7 @@ #include <sys/dnode.h> #include <sys/spa.h> #include <sys/zap.h> +#include <sys/dsl_crypt.h> #include <libzfs.h> #include "zfs_namecheck.h" @@ -951,7 +952,7 @@ zfs_which_resv_prop(zfs_handle_t *zhp, zfs_prop_t *resv_prop) nvlist_t * zfs_valid_proplist(libzfs_handle_t *hdl, zfs_type_t type, nvlist_t *nvl, uint64_t zoned, zfs_handle_t *zhp, zpool_handle_t *zpool_hdl, - const char *errbuf) + boolean_t key_params_ok, const char *errbuf) { nvpair_t *elem; uint64_t intval; @@ -1108,7 +1109,8 @@ zfs_valid_proplist(libzfs_handle_t *hdl, zfs_type_t type, nvlist_t *nvl, } if (zfs_prop_readonly(prop) && - (!zfs_prop_setonce(prop) || zhp != NULL)) { + !(zfs_prop_setonce(prop) && zhp == NULL) && + !(zfs_prop_encryption_key_param(prop) && key_params_ok)) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "'%s' is readonly"), propname); @@ -1403,6 +1405,48 @@ badlabel: break; + case ZFS_PROP_KEYLOCATION: + if (!zfs_prop_valid_keylocation(strval, B_FALSE)) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "invalid keylocation")); + (void) zfs_error(hdl, EZFS_BADPROP, errbuf); + goto error; + } + + if (zhp != NULL) { + uint64_t crypt = + zfs_prop_get_int(zhp, ZFS_PROP_ENCRYPTION); + + if (crypt == ZIO_CRYPT_OFF && + strcmp(strval, "none") != 0) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "keylocation must be 'none' " + "for unencrypted datasets")); + (void) zfs_error(hdl, EZFS_BADPROP, + errbuf); + goto error; + } else if (crypt != ZIO_CRYPT_OFF && + strcmp(strval, "none") == 0) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "keylocation must not be 'none' " + "for encrypted datasets")); + (void) zfs_error(hdl, EZFS_BADPROP, + errbuf); + goto error; + } + } + break; + + case ZFS_PROP_PBKDF2_ITERS: + if (intval < MIN_PBKDF2_ITERATIONS) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "minimum pbkdf2 iterations is %u"), + MIN_PBKDF2_ITERATIONS); + (void) zfs_error(hdl, EZFS_BADPROP, errbuf); + goto error; + } + break; + case ZFS_PROP_UTF8ONLY: chosen_utf = (int)intval; break; @@ -1476,6 +1520,27 @@ badlabel: break; } } + + /* check encryption properties */ + if (zhp != NULL) { + int64_t crypt = zfs_prop_get_int(zhp, + ZFS_PROP_ENCRYPTION); + + switch (prop) { + case ZFS_PROP_COPIES: + if (crypt != ZIO_CRYPT_OFF && intval > 2) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "encrypted datasets cannot have " + "3 copies")); + (void) zfs_error(hdl, EZFS_BADPROP, + errbuf); + goto error; + } + break; + default: + break; + } + } } /* @@ -1688,6 +1753,16 @@ zfs_setprop_error(libzfs_handle_t *hdl, zfs_prop_t prop, int err, } break; + case EACCES: + if (prop == ZFS_PROP_KEYLOCATION) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "keylocation may only be set on encryption roots")); + (void) zfs_error(hdl, EZFS_BADPROP, errbuf); + } else { + (void) zfs_standard_error(hdl, err, errbuf); + } + break; + case EOVERFLOW: /* * This platform can't address a volume this big. @@ -1757,7 +1832,7 @@ zfs_prop_set_list(zfs_handle_t *zhp, nvlist_t *props) if ((nvl = zfs_valid_proplist(hdl, zhp->zfs_type, props, zfs_prop_get_int(zhp, ZFS_PROP_ZONED), zhp, zhp->zpool_hdl, - errbuf)) == NULL) + B_FALSE, errbuf)) == NULL) goto error; /* @@ -3254,6 +3329,12 @@ parent_name(const char *path, char *buf, size_t buflen) return (0); } +int +zfs_parent_name(zfs_handle_t *zhp, char *buf, size_t buflen) +{ + return (parent_name(zfs_get_name(zhp), buf, buflen)); +} + /* * If accept_ancestor is false, then check to make sure that the given path has * a parent, and that it exists. If accept_ancestor is true, then find the @@ -3486,7 +3567,10 @@ zfs_create(libzfs_handle_t *hdl, const char *path, zfs_type_t type, int ret; uint64_t size = 0; uint64_t blocksize = zfs_prop_default_numeric(ZFS_PROP_VOLBLOCKSIZE); + uint8_t *wkeydata = NULL; + uint_t wkeylen = 0; char errbuf[1024]; + char parent[MAXNAMELEN]; uint64_t zoned; enum lzc_dataset_type ost; zpool_handle_t *zpool_handle; @@ -3539,7 +3623,7 @@ zfs_create(libzfs_handle_t *hdl, const char *path, zfs_type_t type, return (-1); if (props && (props = zfs_valid_proplist(hdl, type, props, - zoned, NULL, zpool_handle, errbuf)) == 0) { + zoned, NULL, zpool_handle, B_TRUE, errbuf)) == 0) { zpool_close(zpool_handle); return (-1); } @@ -3591,15 +3675,21 @@ zfs_create(libzfs_handle_t *hdl, const char *path, zfs_type_t type, } } + (void) parent_name(path, parent, sizeof (parent)); + if (zfs_crypto_create(hdl, parent, props, NULL, &wkeydata, + &wkeylen) != 0) { + nvlist_free(props); + return (zfs_error(hdl, EZFS_CRYPTOFAILED, errbuf)); + } + /* create the dataset */ - ret = lzc_create(path, ost, props); + ret = lzc_create(path, ost, props, wkeydata, wkeylen); nvlist_free(props); + if (wkeydata != NULL) + free(wkeydata); /* check for failure */ if (ret != 0) { - char parent[ZFS_MAX_DATASET_NAME_LEN]; - (void) parent_name(path, parent, sizeof (parent)); - switch (errno) { case ENOENT: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, @@ -3620,6 +3710,12 @@ zfs_create(libzfs_handle_t *hdl, const char *path, zfs_type_t type, zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid property value(s) specified")); return (zfs_error(hdl, EZFS_BADPROP, errbuf)); + case EACCES: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "encryption root's key is not loaded " + "or provided")); + return (zfs_error(hdl, EZFS_CRYPTOFAILED, errbuf)); + #ifdef _ILP32 case EOVERFLOW: /* @@ -3815,7 +3911,7 @@ zfs_clone(zfs_handle_t *zhp, const char *target, nvlist_t *props) type = ZFS_TYPE_FILESYSTEM; } if ((props = zfs_valid_proplist(hdl, type, props, zoned, - zhp, zhp->zpool_hdl, errbuf)) == NULL) + zhp, zhp->zpool_hdl, B_TRUE, errbuf)) == NULL) return (-1); if (zfs_fix_auto_resv(zhp, props) == -1) { nvlist_free(props); @@ -3823,6 +3919,11 @@ zfs_clone(zfs_handle_t *zhp, const char *target, nvlist_t *props) } } + if (zfs_crypto_clone_check(hdl, zhp, parent, props) != 0) { + nvlist_free(props); + return (zfs_error(hdl, EZFS_CRYPTOFAILED, errbuf)); + } + ret = lzc_clone(target, zhp->zfs_name, props); nvlist_free(props); @@ -4001,7 +4102,7 @@ zfs_snapshot_nvl(libzfs_handle_t *hdl, nvlist_t *snaps, nvlist_t *props) if (props != NULL && (props = zfs_valid_proplist(hdl, ZFS_TYPE_SNAPSHOT, - props, B_FALSE, NULL, zpool_hdl, errbuf)) == NULL) { + props, B_FALSE, NULL, zpool_hdl, B_FALSE, errbuf)) == NULL) { zpool_close(zpool_hdl); return (-1); } @@ -4392,6 +4493,18 @@ zfs_rename(zfs_handle_t *zhp, const char *target, boolean_t recursive, "a child dataset already has a snapshot " "with the new name")); (void) zfs_error(hdl, EZFS_EXISTS, errbuf); + } else if (errno == EACCES) { + if (zfs_prop_get_int(zhp, ZFS_PROP_ENCRYPTION) == + ZIO_CRYPT_OFF) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "cannot rename an unencrypted dataset to " + "be a decendent of an encrypted one")); + } else { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "cannot move encryption child outside of " + "its encryption root")); + } + (void) zfs_error(hdl, EZFS_CRYPTOFAILED, errbuf); } else { (void) zfs_standard_error(zhp->zfs_hdl, errno, errbuf); } diff --git a/usr/src/lib/libzfs/common/libzfs_diff.c b/usr/src/lib/libzfs/common/libzfs_diff.c index ad1fa67f1f..4d2bd5156d 100644 --- a/usr/src/lib/libzfs/common/libzfs_diff.c +++ b/usr/src/lib/libzfs/common/libzfs_diff.c @@ -112,6 +112,11 @@ get_stats_for_obj(differ_info_t *di, const char *dsname, uint64_t obj, "The sys_config privilege or diff delegated permission " "is needed\nto discover path names")); return (-1); + } else if (di->zerr == EACCES) { + (void) snprintf(di->errbuf, sizeof (di->errbuf), + dgettext(TEXT_DOMAIN, + "Key must be loaded to discover path names")); + return (-1); } else { (void) snprintf(di->errbuf, sizeof (di->errbuf), dgettext(TEXT_DOMAIN, diff --git a/usr/src/lib/libzfs/common/libzfs_iter.c b/usr/src/lib/libzfs/common/libzfs_iter.c index bdef9757ef..cb5ed6b005 100644 --- a/usr/src/lib/libzfs/common/libzfs_iter.c +++ b/usr/src/lib/libzfs/common/libzfs_iter.c @@ -191,6 +191,7 @@ zfs_iter_bookmarks(zfs_handle_t *zhp, zfs_iter_f func, void *data) fnvlist_add_boolean(props, zfs_prop_to_name(ZFS_PROP_GUID)); fnvlist_add_boolean(props, zfs_prop_to_name(ZFS_PROP_CREATETXG)); fnvlist_add_boolean(props, zfs_prop_to_name(ZFS_PROP_CREATION)); + fnvlist_add_boolean(props, zfs_prop_to_name(ZFS_PROP_IVSET_GUID)); if ((err = lzc_get_bookmarks(zhp->zfs_name, props, &bmarks)) != 0) goto out; diff --git a/usr/src/lib/libzfs/common/libzfs_mount.c b/usr/src/lib/libzfs/common/libzfs_mount.c index 9bbf4d2233..44c0ec89e9 100644 --- a/usr/src/lib/libzfs/common/libzfs_mount.c +++ b/usr/src/lib/libzfs/common/libzfs_mount.c @@ -80,6 +80,7 @@ #include <sys/mount.h> #include <sys/stat.h> #include <sys/statvfs.h> +#include <sys/dsl_crypt.h> #include <libzfs.h> @@ -339,6 +340,8 @@ zfs_mount(zfs_handle_t *zhp, const char *options, int flags) char mountpoint[ZFS_MAXPROPLEN]; char mntopts[MNT_LINE_MAX]; libzfs_handle_t *hdl = zhp->zfs_hdl; + uint64_t keystatus; + int rc; if (options == NULL) mntopts[0] = '\0'; @@ -354,6 +357,39 @@ zfs_mount(zfs_handle_t *zhp, const char *options, int flags) if (!zfs_is_mountable(zhp, mountpoint, sizeof (mountpoint), NULL)) return (0); + /* + * If the filesystem is encrypted the key must be loaded in order to + * mount. If the key isn't loaded, the MS_CRYPT flag decides whether + * or not we attempt to load the keys. Note: we must call + * zfs_refresh_properties() here since some callers of this function + * (most notably zpool_enable_datasets()) may implicitly load our key + * by loading the parent's key first. + */ + if (zfs_prop_get_int(zhp, ZFS_PROP_ENCRYPTION) != ZIO_CRYPT_OFF) { + zfs_refresh_properties(zhp); + keystatus = zfs_prop_get_int(zhp, ZFS_PROP_KEYSTATUS); + + /* + * If the key is unavailable and MS_CRYPT is set give the + * user a chance to enter the key. Otherwise just fail + * immediately. + */ + if (keystatus == ZFS_KEYSTATUS_UNAVAILABLE) { + if (flags & MS_CRYPT) { + rc = zfs_crypto_load_key(zhp, B_FALSE, NULL); + if (rc != 0) + return (rc); + } else { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "encryption key not loaded")); + return (zfs_error_fmt(hdl, EZFS_MOUNTFAILED, + dgettext(TEXT_DOMAIN, "cannot mount '%s'"), + mountpoint)); + } + } + + } + /* Create the directory if it doesn't already exist */ if (lstat(mountpoint, &buf) != 0) { if (mkdirp(mountpoint, 0755) != 0) { @@ -1121,6 +1157,12 @@ zfs_iter_cb(zfs_handle_t *zhp, void *data) return (0); } + if (zfs_prop_get_int(zhp, ZFS_PROP_KEYSTATUS) == + ZFS_KEYSTATUS_UNAVAILABLE) { + zfs_close(zhp); + return (0); + } + /* * If this filesystem is inconsistent and has a receive resume * token, we can not mount it. @@ -1313,6 +1355,10 @@ zfs_mount_one(zfs_handle_t *zhp, void *arg) mount_state_t *ms = arg; int ret = 0; + if (zfs_prop_get_int(zhp, ZFS_PROP_KEYSTATUS) == + ZFS_KEYSTATUS_UNAVAILABLE) + return (0); + if (zfs_mount(zhp, ms->ms_mntopts, ms->ms_mntflags) != 0) ret = ms->ms_mntstatus = -1; return (ret); diff --git a/usr/src/lib/libzfs/common/libzfs_pool.c b/usr/src/lib/libzfs/common/libzfs_pool.c index 1f636dd147..f82518d86b 100644 --- a/usr/src/lib/libzfs/common/libzfs_pool.c +++ b/usr/src/lib/libzfs/common/libzfs_pool.c @@ -1162,6 +1162,9 @@ zpool_create(libzfs_handle_t *hdl, const char *pool, nvlist_t *nvroot, zfs_cmd_t zc = { 0 }; nvlist_t *zc_fsprops = NULL; nvlist_t *zc_props = NULL; + nvlist_t *hidden_args = NULL; + uint8_t *wkeydata = NULL; + uint_t wkeylen = 0; char msg[1024]; int ret = -1; @@ -1192,7 +1195,7 @@ zpool_create(libzfs_handle_t *hdl, const char *pool, nvlist_t *nvroot, strcmp(zonestr, "on") == 0); if ((zc_fsprops = zfs_valid_proplist(hdl, ZFS_TYPE_FILESYSTEM, - fsprops, zoned, NULL, NULL, msg)) == NULL) { + fsprops, zoned, NULL, NULL, B_TRUE, msg)) == NULL) { goto create_failed; } @@ -1210,10 +1213,27 @@ zpool_create(libzfs_handle_t *hdl, const char *pool, nvlist_t *nvroot, (nvlist_alloc(&zc_props, NV_UNIQUE_NAME, 0) != 0)) { goto create_failed; } + if (zfs_crypto_create(hdl, NULL, zc_fsprops, props, + &wkeydata, &wkeylen) != 0) { + (void) zfs_error(hdl, EZFS_CRYPTOFAILED, msg); + goto create_failed; + } if (nvlist_add_nvlist(zc_props, ZPOOL_ROOTFS_PROPS, zc_fsprops) != 0) { goto create_failed; } + if (wkeydata != NULL) { + if (nvlist_alloc(&hidden_args, NV_UNIQUE_NAME, 0) != 0) + goto create_failed; + + if (nvlist_add_uint8_array(hidden_args, "wkeydata", + wkeydata, wkeylen) != 0) + goto create_failed; + + if (nvlist_add_nvlist(zc_props, ZPOOL_HIDDEN_ARGS, + hidden_args) != 0) + goto create_failed; + } } if (zc_props && zcmd_write_src_nvlist(hdl, &zc, zc_props) != 0) @@ -1226,6 +1246,9 @@ zpool_create(libzfs_handle_t *hdl, const char *pool, nvlist_t *nvroot, zcmd_free_nvlists(&zc); nvlist_free(zc_props); nvlist_free(zc_fsprops); + nvlist_free(hidden_args); + if (wkeydata != NULL) + free(wkeydata); switch (errno) { case EBUSY: @@ -1286,6 +1309,9 @@ create_failed: zcmd_free_nvlists(&zc); nvlist_free(zc_props); nvlist_free(zc_fsprops); + nvlist_free(hidden_args); + if (wkeydata != NULL) + free(wkeydata); return (ret); } diff --git a/usr/src/lib/libzfs/common/libzfs_sendrecv.c b/usr/src/lib/libzfs/common/libzfs_sendrecv.c index 7ed81fd0d1..c933a24e89 100644 --- a/usr/src/lib/libzfs/common/libzfs_sendrecv.c +++ b/usr/src/lib/libzfs/common/libzfs_sendrecv.c @@ -55,6 +55,7 @@ #include <zlib.h> #include <sha2.h> #include <sys/zio_checksum.h> +#include <sys/dsl_crypt.h> #include <sys/ddt.h> /* in libzfs_dataset.c */ @@ -324,11 +325,9 @@ cksummer(void *arg) struct drr_object *drro = &drr->drr_u.drr_object; if (drro->drr_bonuslen > 0) { (void) ssread(buf, - P2ROUNDUP((uint64_t)drro->drr_bonuslen, 8), - ofp); + DRR_OBJECT_PAYLOAD_SIZE(drro), ofp); } - if (dump_record(drr, buf, - P2ROUNDUP((uint64_t)drro->drr_bonuslen, 8), + if (dump_record(drr, buf, DRR_OBJECT_PAYLOAD_SIZE(drro), &stream_cksum, outfd) != 0) goto out; break; @@ -337,8 +336,8 @@ cksummer(void *arg) case DRR_SPILL: { struct drr_spill *drrs = &drr->drr_u.drr_spill; - (void) ssread(buf, drrs->drr_length, ofp); - if (dump_record(drr, buf, drrs->drr_length, + (void) ssread(buf, DRR_SPILL_PAYLOAD_SIZE(drrs), ofp); + if (dump_record(drr, buf, DRR_SPILL_PAYLOAD_SIZE(drrs), &stream_cksum, outfd) != 0) goto out; break; @@ -368,7 +367,7 @@ cksummer(void *arg) if (ZIO_CHECKSUM_EQUAL(drrw->drr_key.ddk_cksum, zero_cksum) || - !DRR_IS_DEDUP_CAPABLE(drrw->drr_checksumflags)) { + !DRR_IS_DEDUP_CAPABLE(drrw->drr_flags)) { SHA256_CTX ctx; zio_cksum_t tmpsha256; @@ -384,7 +383,7 @@ cksummer(void *arg) drrw->drr_key.ddk_cksum.zc_word[3] = BE_64(tmpsha256.zc_word[3]); drrw->drr_checksumtype = ZIO_CHECKSUM_SHA256; - drrw->drr_checksumflags = DRR_CHECKSUM_DEDUP; + drrw->drr_flags |= DRR_CHECKSUM_DEDUP; } dataref.ref_guid = drrw->drr_toguid; @@ -413,8 +412,7 @@ cksummer(void *arg) wbr_drrr->drr_checksumtype = drrw->drr_checksumtype; - wbr_drrr->drr_checksumflags = - drrw->drr_checksumtype; + wbr_drrr->drr_flags = drrw->drr_flags; wbr_drrr->drr_key.ddk_cksum = drrw->drr_key.ddk_cksum; wbr_drrr->drr_key.ddk_prop = @@ -453,6 +451,14 @@ cksummer(void *arg) break; } + case DRR_OBJECT_RANGE: + { + if (dump_record(drr, NULL, 0, &stream_cksum, + outfd) != 0) + goto out; + break; + } + default: (void) fprintf(stderr, "INVALID record type 0x%x\n", drr->drr_type); @@ -601,6 +607,7 @@ typedef struct send_data { const char *fsname; const char *fromsnap; const char *tosnap; + boolean_t raw; boolean_t recursive; boolean_t verbose; @@ -620,6 +627,7 @@ typedef struct send_data { * "snapprops" -> { name (lastname) -> { name -> value } } * * "origin" -> number (guid) (if clone) + * "is_encroot" -> boolean * "sent" -> boolean (not on-disk) * } * } @@ -778,7 +786,7 @@ static int send_iterate_fs(zfs_handle_t *zhp, void *arg) { send_data_t *sd = arg; - nvlist_t *nvfs, *nv; + nvlist_t *nvfs = NULL, *nv = NULL; int rv = 0; uint64_t parent_fromsnap_guid_save = sd->parent_fromsnap_guid; uint64_t fromsnap_txg_save = sd->fromsnap_txg; @@ -842,8 +850,37 @@ send_iterate_fs(zfs_handle_t *zhp, void *arg) /* iterate over props */ VERIFY(0 == nvlist_alloc(&nv, NV_UNIQUE_NAME, 0)); send_iterate_prop(zhp, nv); + + if (zfs_prop_get_int(zhp, ZFS_PROP_ENCRYPTION) != ZIO_CRYPT_OFF) { + boolean_t encroot; + + /* determine if this dataset is an encryption root */ + if (zfs_crypto_get_encryption_root(zhp, &encroot, NULL) != 0) { + rv = -1; + goto out; + } + + if (encroot) + VERIFY(0 == nvlist_add_boolean(nvfs, "is_encroot")); + + /* + * Encrypted datasets can only be sent with properties if + * the raw flag is specified because the receive side doesn't + * currently have a mechanism for recursively asking the user + * for new encryption parameters. + */ + if (!sd->raw) { + (void) fprintf(stderr, dgettext(TEXT_DOMAIN, + "cannot send %s@%s: encrypted dataset %s may not " + "be sent with properties without the raw flag\n"), + sd->fsname, sd->tosnap, zhp->zfs_name); + rv = -1; + goto out; + } + + } + VERIFY(0 == nvlist_add_nvlist(nvfs, "props", nv)); - nvlist_free(nv); /* iterate over snaps, and set sd->parent_fromsnap_guid */ sd->parent_fromsnap_guid = 0; @@ -859,7 +896,6 @@ send_iterate_fs(zfs_handle_t *zhp, void *arg) (void) snprintf(guidstring, sizeof (guidstring), "0x%llx", (longlong_t)guid); VERIFY(0 == nvlist_add_nvlist(sd->fss, guidstring, nvfs)); - nvlist_free(nvfs); /* iterate over children */ if (sd->recursive) @@ -869,6 +905,8 @@ out: sd->parent_fromsnap_guid = parent_fromsnap_guid_save; sd->fromsnap_txg = fromsnap_txg_save; sd->tosnap_txg = tosnap_txg_save; + nvlist_free(nv); + nvlist_free(nvfs); zfs_close(zhp); return (rv); @@ -876,7 +914,7 @@ out: static int gather_nvlist(libzfs_handle_t *hdl, const char *fsname, const char *fromsnap, - const char *tosnap, boolean_t recursive, boolean_t verbose, + const char *tosnap, boolean_t recursive, boolean_t raw, boolean_t verbose, nvlist_t **nvlp, avl_tree_t **avlp) { zfs_handle_t *zhp; @@ -892,6 +930,7 @@ gather_nvlist(libzfs_handle_t *hdl, const char *fsname, const char *fromsnap, sd.fromsnap = fromsnap; sd.tosnap = tosnap; sd.recursive = recursive; + sd.raw = raw; sd.verbose = verbose; if ((error = send_iterate_fs(zhp, &sd)) != 0) { @@ -923,7 +962,7 @@ typedef struct send_dump_data { uint64_t prevsnap_obj; boolean_t seenfrom, seento, replicate, doall, fromorigin; boolean_t verbose, dryrun, parsable, progress, embed_data, std_out; - boolean_t large_block, compress; + boolean_t large_block, compress, raw; int outfd; boolean_t err; nvlist_t *fss; @@ -965,6 +1004,11 @@ estimate_ioctl(zfs_handle_t *zhp, uint64_t fromsnap_obj, "not an earlier snapshot from the same fs")); return (zfs_error(hdl, EZFS_CROSSTARGET, errbuf)); + case EACCES: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "source key must be loaded")); + return (zfs_error(hdl, EZFS_CRYPTOFAILED, errbuf)); + case ENOENT: if (zfs_dataset_exists(hdl, zc.zc_name, ZFS_TYPE_SNAPSHOT)) { @@ -1045,6 +1089,11 @@ dump_ioctl(zfs_handle_t *zhp, const char *fromsnap, uint64_t fromsnap_obj, "not an earlier snapshot from the same fs")); return (zfs_error(hdl, EZFS_CROSSTARGET, errbuf)); + case EACCES: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "source key must be loaded")); + return (zfs_error(hdl, EZFS_CRYPTOFAILED, errbuf)); + case ENOENT: if (zfs_dataset_exists(hdl, zc.zc_name, ZFS_TYPE_SNAPSHOT)) { @@ -1226,6 +1275,8 @@ dump_snapshot(zfs_handle_t *zhp, void *arg) flags |= LZC_SEND_FLAG_EMBED_DATA; if (sdd->compress) flags |= LZC_SEND_FLAG_COMPRESS; + if (sdd->raw) + flags |= LZC_SEND_FLAG_RAW; if (!sdd->doall && !isfromsnap && !istosnap) { if (sdd->replicate) { @@ -1610,6 +1661,8 @@ zfs_send_resume(libzfs_handle_t *hdl, sendflags_t *flags, int outfd, lzc_flags |= LZC_SEND_FLAG_EMBED_DATA; if (flags->compress || nvlist_exists(resume_nvl, "compressok")) lzc_flags |= LZC_SEND_FLAG_COMPRESS; + if (flags->raw || nvlist_exists(resume_nvl, "rawok")) + lzc_flags |= LZC_SEND_FLAG_RAW; if (guid_to_name(hdl, toname, toguid, B_FALSE, name) != 0) { if (zfs_dataset_exists(hdl, toname, ZFS_TYPE_DATASET)) { @@ -1687,6 +1740,11 @@ zfs_send_resume(libzfs_handle_t *hdl, sendflags_t *flags, int outfd, switch (error) { case 0: return (0); + case EACCES: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "source key must be loaded")); + return (zfs_error(hdl, EZFS_CRYPTOFAILED, errbuf)); + case EXDEV: case ENOENT: case EDQUOT: @@ -1765,7 +1823,14 @@ zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap, } } - if (flags->dedup && !flags->dryrun) { + /* + * Start the dedup thread if this is a dedup stream. We do not bother + * doing this if this a raw send of an encrypted dataset with dedup off + * because normal encrypted blocks won't dedup. + */ + if (flags->dedup && !flags->dryrun && !(flags->raw && + zfs_prop_get_int(zhp, ZFS_PROP_ENCRYPTION) != ZIO_CRYPT_OFF && + zfs_prop_get_int(zhp, ZFS_PROP_DEDUP) == ZIO_CHECKSUM_OFF)) { featureflags |= (DMU_BACKUP_FEATURE_DEDUP | DMU_BACKUP_FEATURE_DEDUPPROPS); if ((err = pipe(pipefd)) != 0) { @@ -1804,10 +1869,13 @@ zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap, VERIFY(0 == nvlist_add_boolean(hdrnv, "not_recursive")); } + if (flags->raw) { + VERIFY(0 == nvlist_add_boolean(hdrnv, "raw")); + } err = gather_nvlist(zhp->zfs_hdl, zhp->zfs_name, - fromsnap, tosnap, flags->replicate, flags->verbose, - &fss, &fsavl); + fromsnap, tosnap, flags->replicate, flags->raw, + flags->verbose, &fss, &fsavl); if (err) goto err_out; VERIFY(0 == nvlist_add_nvlist(hdrnv, "fss", fss)); @@ -1872,6 +1940,7 @@ zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap, sdd.large_block = flags->largeblock; sdd.embed_data = flags->embed_data; sdd.compress = flags->compress; + sdd.raw = flags->raw; sdd.filter_cb = filter_func; sdd.filter_cb_arg = cb_arg; if (debugnvp) @@ -2033,6 +2102,11 @@ zfs_send_one(zfs_handle_t *zhp, const char *from, int fd, } return (zfs_error(hdl, EZFS_NOENT, errbuf)); + case EACCES: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "dataset key must be loaded")); + return (zfs_error(hdl, EZFS_CRYPTOFAILED, errbuf)); + case EBUSY: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "target is busy; if a filesystem, " @@ -2123,26 +2197,86 @@ recv_read_nvlist(libzfs_handle_t *hdl, int fd, int len, nvlist_t **nvp, return (0); } +/* + * Returns the grand origin (origin of origin of origin...) of a given handle. + * If this dataset is not a clone, it simply returns a copy of the original + * handle. + */ +static zfs_handle_t * +recv_open_grand_origin(zfs_handle_t *zhp) +{ + char origin[ZFS_MAX_DATASET_NAME_LEN]; + zprop_source_t src; + zfs_handle_t *ozhp = zfs_handle_dup(zhp); + + while (ozhp != NULL) { + if (zfs_prop_get(ozhp, ZFS_PROP_ORIGIN, origin, + sizeof (origin), &src, NULL, 0, B_FALSE) != 0) + break; + + (void) zfs_close(ozhp); + ozhp = zfs_open(zhp->zfs_hdl, origin, ZFS_TYPE_FILESYSTEM); + } + + return (ozhp); +} + +static int +recv_rename_impl(zfs_handle_t *zhp, const char *source, const char *target) +{ + int err; + zfs_handle_t *ozhp = NULL; + + /* + * Attempt to rename the dataset. If it fails with EACCES we have + * attempted to rename the dataset outside of its encryption root. + * Force the dataset to become an encryption root and try again. + */ + err = lzc_rename(source, target); + if (err == EACCES) { + ozhp = recv_open_grand_origin(zhp); + if (ozhp == NULL) { + err = ENOENT; + goto out; + } + + err = lzc_change_key(ozhp->zfs_name, DCP_CMD_FORCE_NEW_KEY, + NULL, NULL, 0); + if (err != 0) + goto out; + + err = lzc_rename(source, target); + } + +out: + if (ozhp != NULL) + zfs_close(ozhp); + return (err); +} + static int recv_rename(libzfs_handle_t *hdl, const char *name, const char *tryname, int baselen, char *newname, recvflags_t *flags) { static int seq; int err; - prop_changelist_t *clp; - zfs_handle_t *zhp; + prop_changelist_t *clp = NULL; + zfs_handle_t *zhp = NULL; zhp = zfs_open(hdl, name, ZFS_TYPE_DATASET); - if (zhp == NULL) - return (-1); + if (zhp == NULL) { + err = -1; + goto out; + } clp = changelist_gather(zhp, ZFS_PROP_NAME, 0, flags->force ? MS_FORCE : 0); - zfs_close(zhp); - if (clp == NULL) - return (-1); + if (clp == NULL) { + err = -1; + goto out; + } err = changelist_prefix(clp); if (err) - return (err); + goto out; if (tryname) { (void) strcpy(newname, tryname); @@ -2150,7 +2284,7 @@ recv_rename(libzfs_handle_t *hdl, const char *name, const char *tryname, (void) printf("attempting rename %s to %s\n", name, newname); } - err = lzc_rename(name, newname); + err = recv_rename_impl(zhp, name, newname); if (err == 0) changelist_rename(clp, name, tryname); } else { @@ -2166,7 +2300,7 @@ recv_rename(libzfs_handle_t *hdl, const char *name, const char *tryname, (void) printf("failed - trying rename %s to %s\n", name, newname); } - err = lzc_rename(name, newname); + err = recv_rename_impl(zhp, name, newname); if (err == 0) changelist_rename(clp, name, newname); if (err && flags->verbose) { @@ -2182,7 +2316,62 @@ recv_rename(libzfs_handle_t *hdl, const char *name, const char *tryname, } (void) changelist_postfix(clp); - changelist_free(clp); + +out: + if (clp != NULL) + changelist_free(clp); + if (zhp != NULL) + zfs_close(zhp); + + return (err); +} + +static int +recv_promote(libzfs_handle_t *hdl, const char *fsname, + const char *origin_fsname, recvflags_t *flags) +{ + int err; + zfs_cmd_t zc = {"\0"}; + zfs_handle_t *zhp = NULL, *ozhp = NULL; + + if (flags->verbose) + (void) printf("promoting %s\n", fsname); + + (void) strlcpy(zc.zc_value, origin_fsname, sizeof (zc.zc_value)); + (void) strlcpy(zc.zc_name, fsname, sizeof (zc.zc_name)); + + /* + * Attempt to promote the dataset. If it fails with EACCES the + * promotion would cause this dataset to leave its encryption root. + * Force the origin to become an encryption root and try again. + */ + err = zfs_ioctl(hdl, ZFS_IOC_PROMOTE, &zc); + if (err == EACCES) { + zhp = zfs_open(hdl, fsname, ZFS_TYPE_DATASET); + if (zhp == NULL) { + err = -1; + goto out; + } + + ozhp = recv_open_grand_origin(zhp); + if (ozhp == NULL) { + err = -1; + goto out; + } + + err = lzc_change_key(ozhp->zfs_name, DCP_CMD_FORCE_NEW_KEY, + NULL, NULL, 0); + if (err != 0) + goto out; + + err = zfs_ioctl(hdl, ZFS_IOC_PROMOTE, &zc); + } + +out: + if (zhp != NULL) + zfs_close(zhp); + if (ozhp != NULL) + zfs_close(ozhp); return (err); } @@ -2386,6 +2575,150 @@ created_before(libzfs_handle_t *hdl, avl_tree_t *avl, return (rv); } +/* + * This function reestablishes the heirarchy of encryption roots after a + * recursive incremental receive has completed. This must be done after the + * second call to recv_incremental_replication() has renamed and promoted all + * sent datasets to their final locations in the dataset heriarchy. + */ +/* ARGSUSED */ +static int +recv_fix_encryption_hierarchy(libzfs_handle_t *hdl, const char *destname, + nvlist_t *stream_nv, avl_tree_t *stream_avl) +{ + int err; + nvpair_t *fselem = NULL; + nvlist_t *stream_fss; + char *cp; + char top_zfs[ZFS_MAX_DATASET_NAME_LEN]; + + (void) strcpy(top_zfs, destname); + cp = strrchr(top_zfs, '@'); + if (cp != NULL) + *cp = '\0'; + + VERIFY(0 == nvlist_lookup_nvlist(stream_nv, "fss", &stream_fss)); + + while ((fselem = nvlist_next_nvpair(stream_fss, fselem)) != NULL) { + zfs_handle_t *zhp = NULL; + uint64_t crypt; + nvlist_t *snaps, *props, *stream_nvfs = NULL; + nvpair_t *snapel = NULL; + boolean_t is_encroot, is_clone, stream_encroot; + char *cp; + char *stream_keylocation = NULL; + char keylocation[MAXNAMELEN]; + char fsname[ZFS_MAX_DATASET_NAME_LEN]; + + keylocation[0] = '\0'; + VERIFY(0 == nvpair_value_nvlist(fselem, &stream_nvfs)); + VERIFY(0 == nvlist_lookup_nvlist(stream_nvfs, "snaps", &snaps)); + VERIFY(0 == nvlist_lookup_nvlist(stream_nvfs, "props", &props)); + stream_encroot = nvlist_exists(stream_nvfs, "is_encroot"); + + /* find a snapshot from the stream that exists locally */ + err = ENOENT; + while ((snapel = nvlist_next_nvpair(snaps, snapel)) != NULL) { + uint64_t guid; + + VERIFY(0 == nvpair_value_uint64(snapel, &guid)); + err = guid_to_name(hdl, destname, guid, B_FALSE, + fsname); + if (err == 0) + break; + } + + if (err != 0) + continue; + + cp = strchr(fsname, '@'); + if (cp != NULL) + *cp = '\0'; + + zhp = zfs_open(hdl, fsname, ZFS_TYPE_DATASET); + if (zhp == NULL) { + err = ENOENT; + goto error; + } + + crypt = zfs_prop_get_int(zhp, ZFS_PROP_ENCRYPTION); + is_clone = zhp->zfs_dmustats.dds_origin[0] != '\0'; + (void) zfs_crypto_get_encryption_root(zhp, &is_encroot, NULL); + + /* we don't need to do anything for unencrypted filesystems */ + if (crypt == ZIO_CRYPT_OFF) { + zfs_close(zhp); + continue; + } + + /* + * If the dataset is flagged as an encryption root, was not + * received as a clone and is not currently an encryption root, + * force it to become one. Fixup the keylocation if necessary. + */ + if (stream_encroot) { + if (!is_clone && !is_encroot) { + err = lzc_change_key(fsname, + DCP_CMD_FORCE_NEW_KEY, NULL, NULL, 0); + if (err != 0) { + zfs_close(zhp); + goto error; + } + } + + VERIFY(0 == nvlist_lookup_string(props, + zfs_prop_to_name(ZFS_PROP_KEYLOCATION), + &stream_keylocation)); + + /* + * Refresh the properties in case the call to + * lzc_change_key() changed the value. + */ + zfs_refresh_properties(zhp); + err = zfs_prop_get(zhp, ZFS_PROP_KEYLOCATION, + keylocation, sizeof (keylocation), NULL, NULL, + 0, B_TRUE); + if (err != 0) { + zfs_close(zhp); + goto error; + } + + if (strcmp(keylocation, stream_keylocation) != 0) { + err = zfs_prop_set(zhp, + zfs_prop_to_name(ZFS_PROP_KEYLOCATION), + stream_keylocation); + if (err != 0) { + zfs_close(zhp); + goto error; + } + } + } + + /* + * If the dataset is not flagged as an encryption root and is + * currently an encryption root, force it to inherit from its + * parent. The root of a raw send should never be + * force-inherited. + */ + if (!stream_encroot && is_encroot && + strcmp(top_zfs, fsname) != 0) { + err = lzc_change_key(fsname, DCP_CMD_FORCE_INHERIT, + NULL, NULL, 0); + if (err != 0) { + zfs_close(zhp); + goto error; + } + } + + zfs_close(zhp); + } + + return (0); + +error: + return (err); +} + static int recv_incremental_replication(libzfs_handle_t *hdl, const char *tofs, recvflags_t *flags, nvlist_t *stream_nv, avl_tree_t *stream_avl, @@ -2412,7 +2745,7 @@ again: needagain = progress = B_FALSE; if ((error = gather_nvlist(hdl, tofs, fromsnap, NULL, - recursive, B_FALSE, &local_nv, &local_avl)) != 0) + recursive, B_TRUE, B_FALSE, &local_nv, &local_avl)) != 0) return (error); /* @@ -2461,22 +2794,15 @@ again: stream_originguid, originguid)) { case 1: { /* promote it! */ - zfs_cmd_t zc = { 0 }; nvlist_t *origin_nvfs; char *origin_fsname; - if (flags->verbose) - (void) printf("promoting %s\n", fsname); - origin_nvfs = fsavl_find(local_avl, originguid, NULL); VERIFY(0 == nvlist_lookup_string(origin_nvfs, "name", &origin_fsname)); - (void) strlcpy(zc.zc_value, origin_fsname, - sizeof (zc.zc_value)); - (void) strlcpy(zc.zc_name, fsname, - sizeof (zc.zc_name)); - error = zfs_ioctl(hdl, ZFS_IOC_PROMOTE, &zc); + error = recv_promote(hdl, fsname, origin_fsname, + flags); if (error == 0) progress = B_TRUE; break; @@ -2665,7 +2991,7 @@ again: goto again; } - return (needagain); + return (needagain || error != 0); } static int @@ -2685,7 +3011,7 @@ zfs_receive_package(libzfs_handle_t *hdl, int fd, const char *destname, int error; boolean_t anyerr = B_FALSE; boolean_t softerr = B_FALSE; - boolean_t recursive; + boolean_t recursive, raw; (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, "cannot receive")); @@ -2709,6 +3035,7 @@ zfs_receive_package(libzfs_handle_t *hdl, int fd, const char *destname, recursive = (nvlist_lookup_boolean(stream_nv, "not_recursive") == ENOENT); + raw = (nvlist_lookup_boolean(stream_nv, "raw") == 0); if (recursive && strchr(destname, '@')) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, @@ -2864,6 +3191,11 @@ zfs_receive_package(libzfs_handle_t *hdl, int fd, const char *destname, stream_nv, stream_avl, NULL); } + if (raw && softerr == 0) { + softerr = recv_fix_encryption_hierarchy(hdl, destname, + stream_nv, stream_avl); + } + out: fsavl_destroy(stream_avl); nvlist_free(stream_nv); @@ -3030,14 +3362,18 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, const char *chopprefix; boolean_t newfs = B_FALSE; boolean_t stream_wantsnewfs; + boolean_t newprops = B_FALSE; uint64_t parent_snapguid = 0; prop_changelist_t *clp = NULL; nvlist_t *snapprops_nvlist = NULL; zprop_errflags_t prop_errflags; boolean_t recursive; char *snapname = NULL; + nvlist_t *props = NULL; + char tmp_keylocation[MAXNAMELEN]; begin_time = time(NULL); + bzero(tmp_keylocation, MAXNAMELEN); (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, "cannot receive")); @@ -3046,32 +3382,50 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, ENOENT); if (stream_avl != NULL) { + char *keylocation = NULL; + nvlist_t *lookup = NULL; nvlist_t *fs = fsavl_find(stream_avl, drrb->drr_toguid, &snapname); - nvlist_t *props; int ret; (void) nvlist_lookup_uint64(fs, "parentfromsnap", &parent_snapguid); err = nvlist_lookup_nvlist(fs, "props", &props); - if (err) + if (err) { VERIFY(0 == nvlist_alloc(&props, NV_UNIQUE_NAME, 0)); + newprops = B_TRUE; + } + /* + * The keylocation property may only be set on encryption roots, + * but this dataset might not become an encryption root until + * recv_fix_encryption_hierarchy() is called. That function + * will fixup the keylocation anyway, so we temporarily unset + * the keylocation for now to avoid any errors from the receive + * ioctl. + */ + err = nvlist_lookup_string(props, + zfs_prop_to_name(ZFS_PROP_KEYLOCATION), &keylocation); + if (err == 0) { + (void) strcpy(tmp_keylocation, keylocation); + (void) nvlist_remove_all(props, + zfs_prop_to_name(ZFS_PROP_KEYLOCATION)); + } if (flags->canmountoff) { VERIFY(0 == nvlist_add_uint64(props, zfs_prop_to_name(ZFS_PROP_CANMOUNT), 0)); } ret = zcmd_write_src_nvlist(hdl, &zc, props); - if (err) - nvlist_free(props); - if (0 == nvlist_lookup_nvlist(fs, "snapprops", &props)) { - VERIFY(0 == nvlist_lookup_nvlist(props, + if (0 == nvlist_lookup_nvlist(fs, "snapprops", &lookup)) { + VERIFY(0 == nvlist_lookup_nvlist(lookup, snapname, &snapprops_nvlist)); } - if (ret != 0) - return (-1); + if (ret != 0) { + err = -1; + goto out; + } } cp = NULL; @@ -3092,7 +3446,8 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, if (strchr(tosnap, '@')) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid " "argument - snapshot not allowed with -e")); - return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf)); + err = zfs_error(hdl, EZFS_INVALIDNAME, errbuf); + goto out; } chopprefix = strrchr(sendfs, '/'); @@ -3119,7 +3474,8 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, if (strchr(tosnap, '@')) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid " "argument - snapshot not allowed with -d")); - return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf)); + err = zfs_error(hdl, EZFS_INVALIDNAME, errbuf); + goto out; } chopprefix = strchr(drrb->drr_toname, '/'); @@ -3137,7 +3493,8 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "cannot specify snapshot name for multi-snapshot " "stream")); - return (zfs_error(hdl, EZFS_BADSTREAM, errbuf)); + err = zfs_error(hdl, EZFS_BADSTREAM, errbuf); + goto out; } chopprefix = drrb->drr_toname + strlen(drrb->drr_toname); } @@ -3156,7 +3513,8 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, free(cp); if (!zfs_name_valid(zc.zc_value, ZFS_TYPE_SNAPSHOT)) { zcmd_free_nvlists(&zc); - return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf)); + err = zfs_error(hdl, EZFS_INVALIDNAME, errbuf); + goto out; } /* @@ -3174,7 +3532,8 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "local origin for clone %s does not exist"), zc.zc_value); - return (zfs_error(hdl, EZFS_NOENT, errbuf)); + err = zfs_error(hdl, EZFS_NOENT, errbuf); + goto out; } if (flags->verbose) (void) printf("found clone origin %s\n", zc.zc_string); @@ -3182,6 +3541,10 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, boolean_t resuming = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo) & DMU_BACKUP_FEATURE_RESUMING; + boolean_t raw = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo) & + DMU_BACKUP_FEATURE_RAW; + boolean_t embedded = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo) & + DMU_BACKUP_FEATURE_EMBED_DATA; stream_wantsnewfs = (drrb->drr_fromguid == NULL || (drrb->drr_flags & DRR_FLAG_CLONE) || originsnap) && !resuming; @@ -3241,6 +3604,7 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, if (zfs_dataset_exists(hdl, zc.zc_name, ZFS_TYPE_DATASET)) { zfs_handle_t *zhp; + boolean_t encrypted; /* * Destination fs exists. It must be one of these cases: @@ -3257,7 +3621,8 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, "destination '%s' exists\n" "must specify -F to overwrite it"), zc.zc_name); - return (zfs_error(hdl, EZFS_EXISTS, errbuf)); + err = zfs_error(hdl, EZFS_EXISTS, errbuf); + goto out; } if (ioctl(hdl->libzfs_fd, ZFS_IOC_SNAPSHOT_LIST_NEXT, &zc) == 0) { @@ -3266,14 +3631,16 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, "destination has snapshots (eg. %s)\n" "must destroy them to overwrite it"), zc.zc_name); - return (zfs_error(hdl, EZFS_EXISTS, errbuf)); + err = zfs_error(hdl, EZFS_EXISTS, errbuf); + goto out; } } if ((zhp = zfs_open(hdl, zc.zc_name, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME)) == NULL) { zcmd_free_nvlists(&zc); - return (-1); + err = -1; + goto out; } if (stream_wantsnewfs && @@ -3284,7 +3651,42 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, "destination '%s' is a clone\n" "must destroy it to overwrite it"), zc.zc_name); - return (zfs_error(hdl, EZFS_EXISTS, errbuf)); + err = zfs_error(hdl, EZFS_EXISTS, errbuf); + goto out; + } + + /* + * Raw sends can not be performed as an incremental on top + * of existing unencrypted datasets. zfs recv -F cant be + * used to blow away an existing encrypted filesystem. This + * is because it would require the dsl dir to point to the + * new key (or lack of a key) and the old key at the same + * time. The -F flag may still be used for deleting + * intermediate snapshots that would otherwise prevent the + * receive from working. + */ + encrypted = zfs_prop_get_int(zhp, ZFS_PROP_ENCRYPTION) != + ZIO_CRYPT_OFF; + if (!stream_wantsnewfs && !encrypted && raw) { + zfs_close(zhp); + zcmd_free_nvlists(&zc); + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "cannot perform raw receive on top of " + "existing unencrypted dataset")); + err = zfs_error(hdl, EZFS_BADRESTORE, errbuf); + goto out; + } + + if (stream_wantsnewfs && flags->force && + ((raw && !encrypted) || encrypted)) { + zfs_close(zhp); + zcmd_free_nvlists(&zc); + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "zfs receive -F cannot be used to destroy an " + "encrypted filesystem or overwrite an " + "unencrypted one with an encrypted one")); + err = zfs_error(hdl, EZFS_BADRESTORE, errbuf); + goto out; } if (!flags->dryrun && zhp->zfs_type == ZFS_TYPE_FILESYSTEM && @@ -3294,13 +3696,15 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, if (clp == NULL) { zfs_close(zhp); zcmd_free_nvlists(&zc); - return (-1); + err = -1; + goto out; } if (changelist_prefix(clp) != 0) { changelist_free(clp); zfs_close(zhp); zcmd_free_nvlists(&zc); - return (-1); + err = -1; + goto out; } } @@ -3317,6 +3721,8 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, zfs_close(zhp); } else { + zfs_handle_t *zhp; + /* * Destination filesystem does not exist. Therefore we better * be creating a new filesystem (either from a full backup, or @@ -3329,7 +3735,8 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, zcmd_free_nvlists(&zc); zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "destination '%s' does not exist"), zc.zc_name); - return (zfs_error(hdl, EZFS_NOENT, errbuf)); + err = zfs_error(hdl, EZFS_NOENT, errbuf); + goto out; } /* @@ -3341,10 +3748,45 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, if (flags->isprefix && !flags->istail && !flags->dryrun && create_parents(hdl, zc.zc_value, strlen(tosnap)) != 0) { zcmd_free_nvlists(&zc); - return (zfs_error(hdl, EZFS_BADRESTORE, errbuf)); + err = zfs_error(hdl, EZFS_BADRESTORE, errbuf); + goto out; + } + + /* + * It is invalid to receive a properties stream that was + * unencrypted on the send side as a child of an encrypted + * parent. Technically there is nothing preventing this, but + * it would mean that the encryption=off property which is + * locally set on the send side would not be received correctly. + * We can infer encryption=off if the stream is not raw and + * properties were included since the send side will only ever + * send the encryption property in a raw nvlist header. + */ + if (!raw && props != NULL) { + uint64_t crypt; + + zhp = zfs_open(hdl, zc.zc_name, ZFS_TYPE_DATASET); + if (zhp == NULL) { + err = zfs_error(hdl, EZFS_BADRESTORE, + errbuf); + goto out; + } + + crypt = zfs_prop_get_int(zhp, ZFS_PROP_ENCRYPTION); + zfs_close(zhp); + + if (crypt != ZIO_CRYPT_OFF) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "parent '%s' must not be encrypted to " + "receive unencrypted property"), + zc.zc_name); + err = zfs_error(hdl, EZFS_BADPROP, errbuf); + goto out; + } } newfs = B_TRUE; + *cp = '/'; } zc.zc_begin_record = *drr_noswap; @@ -3361,7 +3803,8 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, if (flags->dryrun) { zcmd_free_nvlists(&zc); - return (recv_skip(hdl, infd, flags->byteswap)); + err = recv_skip(hdl, infd, flags->byteswap); + goto out; } zc.zc_nvlist_dst = (uint64_t)(uintptr_t)prop_errbuf; @@ -3448,7 +3891,7 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, * get a strange "does not exist" error message. */ *cp = '\0'; - if (gather_nvlist(hdl, zc.zc_value, NULL, NULL, B_FALSE, + if (gather_nvlist(hdl, zc.zc_value, NULL, NULL, B_FALSE, B_TRUE, B_FALSE, &local_nv, &local_avl) == 0) { *cp = '@'; fs = fsavl_find(local_avl, drrb->drr_toguid, NULL); @@ -3484,6 +3927,20 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, "since most recent snapshot"), zc.zc_name); (void) zfs_error(hdl, EZFS_BADRESTORE, errbuf); break; + case EACCES: + if (raw && stream_wantsnewfs) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "failed to create encryption key")); + } else if (raw && !stream_wantsnewfs) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "encryption key does not match " + "existing key")); + } else { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "inherited key must be loaded")); + } + (void) zfs_error(hdl, EZFS_CRYPTOFAILED, errbuf); + break; case EEXIST: cp = strchr(zc.zc_value, '@'); if (newfs) { @@ -3498,6 +3955,10 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, *cp = '@'; break; case EINVAL: + if (embedded && !raw) + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "incompatible embedded data stream " + "feature with encrypted receive.")); (void) zfs_error(hdl, EZFS_BADSTREAM, errbuf); break; case ECKSUM: @@ -3514,6 +3975,20 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, "destination %s space quota exceeded"), zc.zc_name); (void) zfs_error(hdl, EZFS_NOSPC, errbuf); break; + case ZFS_ERR_FROM_IVSET_GUID_MISSING: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "IV set guid missing. See errata %u at" + "http://zfsonlinux.org/msg/ZFS-8000-ER"), + ZPOOL_ERRATA_ZOL_8308_ENCRYPTION); + (void) zfs_error(hdl, EZFS_BADSTREAM, errbuf); + break; + case ZFS_ERR_FROM_IVSET_GUID_MISMATCH: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "IV set guid mismatch. See the 'zfs receive' " + "man page section\n discussing the limitations " + "of raw encrypted send streams.")); + (void) zfs_error(hdl, EZFS_BADSTREAM, errbuf); + break; default: (void) zfs_standard_error(hdl, ioctl_errno, errbuf); } @@ -3566,8 +4041,10 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, (void) fprintf(stderr, "\n"); } - if (err || ioctl_err) - return (-1); + if (err || ioctl_err) { + err = -1; + goto out; + } *action_handlep = zc.zc_action_handle; @@ -3585,7 +4062,18 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, buf1, delta, buf2); } - return (0); + err = 0; +out: + + if (tmp_keylocation[0] != '\0') { + VERIFY(0 == nvlist_add_string(props, + zfs_prop_to_name(ZFS_PROP_KEYLOCATION), tmp_keylocation)); + } + + if (newprops) + nvlist_free(props); + + return (err); } static int diff --git a/usr/src/lib/libzfs/common/libzfs_status.c b/usr/src/lib/libzfs/common/libzfs_status.c index 975309c423..46ea7f944f 100644 --- a/usr/src/lib/libzfs/common/libzfs_status.c +++ b/usr/src/lib/libzfs/common/libzfs_status.c @@ -198,7 +198,7 @@ find_vdev_problem(nvlist_t *vdev, int (*func)(uint64_t, uint64_t, uint64_t)) * only picks the most damaging of all the current errors to report. */ static zpool_status_t -check_status(nvlist_t *config, boolean_t isimport) +check_status(nvlist_t *config, boolean_t isimport, zpool_errata_t *erratap) { nvlist_t *nvroot; vdev_stat_t *vs; @@ -209,6 +209,7 @@ check_status(nvlist_t *config, boolean_t isimport) uint64_t stateval; uint64_t suspended; uint64_t hostid = 0; + uint64_t errata = 0; unsigned long system_hostid = get_system_hostid(); verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION, @@ -369,6 +370,15 @@ check_status(nvlist_t *config, boolean_t isimport) return (ZPOOL_STATUS_REMOVED_DEV); /* + * Informational errata available. + */ + (void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_ERRATA, &errata); + if (errata) { + *erratap = errata; + return (ZPOOL_STATUS_ERRATA); + } + + /* * Outdated, but usable, version */ if (SPA_VERSION_IS_SUPPORTED(version) && version != SPA_VERSION) @@ -403,9 +413,9 @@ check_status(nvlist_t *config, boolean_t isimport) } zpool_status_t -zpool_get_status(zpool_handle_t *zhp, char **msgid) +zpool_get_status(zpool_handle_t *zhp, char **msgid, zpool_errata_t *errata) { - zpool_status_t ret = check_status(zhp->zpool_config, B_FALSE); + zpool_status_t ret = check_status(zhp->zpool_config, B_FALSE, errata); if (ret >= NMSGID) *msgid = NULL; @@ -416,9 +426,9 @@ zpool_get_status(zpool_handle_t *zhp, char **msgid) } zpool_status_t -zpool_import_status(nvlist_t *config, char **msgid) +zpool_import_status(nvlist_t *config, char **msgid, zpool_errata_t *errata) { - zpool_status_t ret = check_status(config, B_TRUE); + zpool_status_t ret = check_status(config, B_TRUE, errata); if (ret >= NMSGID) *msgid = NULL; diff --git a/usr/src/lib/libzfs/common/libzfs_util.c b/usr/src/lib/libzfs/common/libzfs_util.c index 246874b9f9..b56c394b59 100644 --- a/usr/src/lib/libzfs/common/libzfs_util.c +++ b/usr/src/lib/libzfs/common/libzfs_util.c @@ -263,6 +263,8 @@ libzfs_error_description(libzfs_handle_t *hdl) case EZFS_NO_RESILVER_DEFER: return (dgettext(TEXT_DOMAIN, "this action requires the " "resilver_defer feature")); + case EZFS_CRYPTOFAILED: + return (dgettext(TEXT_DOMAIN, "encryption failure")); case EZFS_UNKNOWN: return (dgettext(TEXT_DOMAIN, "unknown error")); default: diff --git a/usr/src/lib/libzfs/common/mapfile-vers b/usr/src/lib/libzfs/common/mapfile-vers index 26a68259af..f4412bbd9a 100644 --- a/usr/src/lib/libzfs/common/mapfile-vers +++ b/usr/src/lib/libzfs/common/mapfile-vers @@ -71,6 +71,11 @@ SYMBOL_VERSION SUNWprivate_1.1 { zfs_close; zfs_create; zfs_create_ancestors; + zfs_crypto_attempt_load_keys; + zfs_crypto_get_encryption_root; + zfs_crypto_load_key; + zfs_crypto_rewrap; + zfs_crypto_unload_key; zfs_dataset_exists; zfs_deleg_share_nfs; zfs_destroy; diff --git a/usr/src/lib/libzfs_core/common/libzfs_core.c b/usr/src/lib/libzfs_core/common/libzfs_core.c index 61259846a8..df973b532d 100644 --- a/usr/src/lib/libzfs_core/common/libzfs_core.c +++ b/usr/src/lib/libzfs_core/common/libzfs_core.c @@ -189,34 +189,49 @@ lzc_ioctl(zfs_ioc_t ioc, const char *name, } out: - fnvlist_pack_free(packed, size); + if (packed != NULL) + fnvlist_pack_free(packed, size); free((void *)(uintptr_t)zc.zc_nvlist_dst); return (error); } int -lzc_create(const char *fsname, enum lzc_dataset_type type, nvlist_t *props) +lzc_create(const char *fsname, enum lzc_dataset_type type, nvlist_t *props, + uint8_t *wkeydata, uint_t wkeylen) { int error; + nvlist_t *hidden_args = NULL; nvlist_t *args = fnvlist_alloc(); + fnvlist_add_int32(args, "type", (dmu_objset_type_t)type); if (props != NULL) fnvlist_add_nvlist(args, "props", props); + + if (wkeydata != NULL) { + hidden_args = fnvlist_alloc(); + fnvlist_add_uint8_array(hidden_args, "wkeydata", wkeydata, + wkeylen); + fnvlist_add_nvlist(args, ZPOOL_HIDDEN_ARGS, hidden_args); + } + error = lzc_ioctl(ZFS_IOC_CREATE, fsname, args, NULL); + nvlist_free(hidden_args); nvlist_free(args); return (error); } int -lzc_clone(const char *fsname, const char *origin, - nvlist_t *props) +lzc_clone(const char *fsname, const char *origin, nvlist_t *props) { int error; + nvlist_t *hidden_args = NULL; nvlist_t *args = fnvlist_alloc(); + fnvlist_add_string(args, "origin", origin); if (props != NULL) fnvlist_add_nvlist(args, "props", props); error = lzc_ioctl(ZFS_IOC_CLONE, fsname, args, NULL); + nvlist_free(hidden_args); nvlist_free(args); return (error); } @@ -584,6 +599,8 @@ lzc_send_resume(const char *snapname, const char *from, int fd, fnvlist_add_boolean(args, "embedok"); if (flags & LZC_SEND_FLAG_COMPRESS) fnvlist_add_boolean(args, "compressok"); + if (flags & LZC_SEND_FLAG_RAW) + fnvlist_add_boolean(args, "rawok"); if (resumeobj != 0 || resumeoff != 0) { fnvlist_add_uint64(args, "resume_object", resumeobj); fnvlist_add_uint64(args, "resume_offset", resumeoff); @@ -654,7 +671,7 @@ recv_read(int fd, void *buf, int ilen) static int recv_impl(const char *snapname, nvlist_t *props, const char *origin, - boolean_t force, boolean_t resumable, int fd, + boolean_t force, boolean_t resumable, boolean_t raw, int fd, const dmu_replay_record_t *begin_record) { /* @@ -747,9 +764,10 @@ out: */ int lzc_receive(const char *snapname, nvlist_t *props, const char *origin, - boolean_t force, int fd) + boolean_t raw, boolean_t force, int fd) { - return (recv_impl(snapname, props, origin, force, B_FALSE, fd, NULL)); + return (recv_impl(snapname, props, origin, force, B_FALSE, raw, fd, + NULL)); } /* @@ -760,9 +778,10 @@ lzc_receive(const char *snapname, nvlist_t *props, const char *origin, */ int lzc_receive_resumable(const char *snapname, nvlist_t *props, const char *origin, - boolean_t force, int fd) + boolean_t force, boolean_t raw, int fd) { - return (recv_impl(snapname, props, origin, force, B_TRUE, fd, NULL)); + return (recv_impl(snapname, props, origin, force, B_TRUE, raw, fd, + NULL)); } /* @@ -778,12 +797,12 @@ lzc_receive_resumable(const char *snapname, nvlist_t *props, const char *origin, */ int lzc_receive_with_header(const char *snapname, nvlist_t *props, - const char *origin, boolean_t force, boolean_t resumable, int fd, - const dmu_replay_record_t *begin_record) + const char *origin, boolean_t force, boolean_t resumable, boolean_t raw, + int fd, const dmu_replay_record_t *begin_record) { if (begin_record == NULL) return (EINVAL); - return (recv_impl(snapname, props, origin, force, resumable, fd, + return (recv_impl(snapname, props, origin, force, resumable, raw, fd, begin_record)); } @@ -882,6 +901,7 @@ lzc_bookmark(nvlist_t *bookmarks, nvlist_t **errlist) * "guid" - globally unique identifier of the snapshot it refers to * "createtxg" - txg when the snapshot it refers to was created * "creation" - timestamp when the snapshot it refers to was created + * "ivsetguid" - IVset guid for identifying encrypted snapshots * * The format of the returned nvlist as follows: * <short name of bookmark> -> { @@ -1115,3 +1135,66 @@ lzc_initialize(const char *poolname, pool_initialize_func_t cmd_type, return (error); } + +/* + * Performs key management functions + * + * crypto_cmd should be a value from zfs_ioc_crypto_cmd_t. If the command + * specifies to load or change a wrapping key, the key should be specified in + * the hidden_args nvlist so that it is not logged + */ +int +lzc_load_key(const char *fsname, boolean_t noop, uint8_t *wkeydata, + uint_t wkeylen) +{ + int error; + nvlist_t *ioc_args; + nvlist_t *hidden_args; + + if (wkeydata == NULL) + return (EINVAL); + + ioc_args = fnvlist_alloc(); + hidden_args = fnvlist_alloc(); + fnvlist_add_uint8_array(hidden_args, "wkeydata", wkeydata, wkeylen); + fnvlist_add_nvlist(ioc_args, ZPOOL_HIDDEN_ARGS, hidden_args); + if (noop) + fnvlist_add_boolean(ioc_args, "noop"); + error = lzc_ioctl(ZFS_IOC_LOAD_KEY, fsname, ioc_args, NULL); + nvlist_free(hidden_args); + nvlist_free(ioc_args); + + return (error); +} + +int +lzc_unload_key(const char *fsname) +{ + return (lzc_ioctl(ZFS_IOC_UNLOAD_KEY, fsname, NULL, NULL)); +} + +int +lzc_change_key(const char *fsname, uint64_t crypt_cmd, nvlist_t *props, + uint8_t *wkeydata, uint_t wkeylen) +{ + int error; + nvlist_t *ioc_args = fnvlist_alloc(); + nvlist_t *hidden_args = NULL; + + fnvlist_add_uint64(ioc_args, "crypt_cmd", crypt_cmd); + + if (wkeydata != NULL) { + hidden_args = fnvlist_alloc(); + fnvlist_add_uint8_array(hidden_args, "wkeydata", wkeydata, + wkeylen); + fnvlist_add_nvlist(ioc_args, ZPOOL_HIDDEN_ARGS, hidden_args); + } + + if (props != NULL) + fnvlist_add_nvlist(ioc_args, "props", props); + + error = lzc_ioctl(ZFS_IOC_CHANGE_KEY, fsname, ioc_args, NULL); + nvlist_free(hidden_args); + nvlist_free(ioc_args); + return (error); +} diff --git a/usr/src/lib/libzfs_core/common/libzfs_core.h b/usr/src/lib/libzfs_core/common/libzfs_core.h index cbc0c68af8..f1905f9968 100644 --- a/usr/src/lib/libzfs_core/common/libzfs_core.h +++ b/usr/src/lib/libzfs_core/common/libzfs_core.h @@ -52,7 +52,8 @@ enum lzc_dataset_type { int lzc_remap(const char *fsname); int lzc_snapshot(nvlist_t *, nvlist_t *, nvlist_t **); -int lzc_create(const char *, enum lzc_dataset_type, nvlist_t *); +int lzc_create(const char *, enum lzc_dataset_type, nvlist_t *, uint8_t *, + uint_t); int lzc_clone(const char *, const char *, nvlist_t *); int lzc_promote(const char *, char *, int); int lzc_destroy_snaps(nvlist_t *, boolean_t, nvlist_t **); @@ -61,6 +62,9 @@ int lzc_get_bookmarks(const char *, nvlist_t *, nvlist_t **); int lzc_destroy_bookmarks(nvlist_t *, nvlist_t **); int lzc_initialize(const char *, pool_initialize_func_t, nvlist_t *, nvlist_t **); +int lzc_load_key(const char *, boolean_t, uint8_t *, uint_t); +int lzc_unload_key(const char *); +int lzc_change_key(const char *, uint64_t, nvlist_t *, uint8_t *, uint_t); int lzc_snaprange_space(const char *, const char *, uint64_t *); @@ -71,7 +75,8 @@ int lzc_get_holds(const char *, nvlist_t **); enum lzc_send_flags { LZC_SEND_FLAG_EMBED_DATA = 1 << 0, LZC_SEND_FLAG_LARGE_BLOCK = 1 << 1, - LZC_SEND_FLAG_COMPRESS = 1 << 2 + LZC_SEND_FLAG_COMPRESS = 1 << 2, + LZC_SEND_FLAG_RAW = 1 << 3 }; int lzc_send(const char *, const char *, int, enum lzc_send_flags); @@ -81,11 +86,12 @@ int lzc_send_space(const char *, const char *, enum lzc_send_flags, uint64_t *); struct dmu_replay_record; -int lzc_receive(const char *, nvlist_t *, const char *, boolean_t, int); -int lzc_receive_resumable(const char *, nvlist_t *, const char *, +int lzc_receive(const char *, nvlist_t *, const char *, boolean_t, boolean_t, int); +int lzc_receive_resumable(const char *, nvlist_t *, const char *, + boolean_t, boolean_t, int); int lzc_receive_with_header(const char *, nvlist_t *, const char *, boolean_t, - boolean_t, int, const struct dmu_replay_record *); + boolean_t, boolean_t, int, const struct dmu_replay_record *); boolean_t lzc_exists(const char *); diff --git a/usr/src/lib/libzfs_core/common/mapfile-vers b/usr/src/lib/libzfs_core/common/mapfile-vers index 93a7334fb1..c14ec17a43 100644 --- a/usr/src/lib/libzfs_core/common/mapfile-vers +++ b/usr/src/lib/libzfs_core/common/mapfile-vers @@ -74,6 +74,9 @@ SYMBOL_VERSION ILLUMOS_0.1 { lzc_channel_program_nosync; lzc_clone; lzc_create; + lzc_load_key; + lzc_unload_key; + lzc_change_key; lzc_destroy_bookmarks; lzc_destroy_snaps; lzc_exists; diff --git a/usr/src/lib/libzfs_jni/common/libzfs_jni_dataset.c b/usr/src/lib/libzfs_jni/common/libzfs_jni_dataset.c index 2aa11ba23b..0e2d1d3370 100644 --- a/usr/src/lib/libzfs_jni/common/libzfs_jni_dataset.c +++ b/usr/src/lib/libzfs_jni/common/libzfs_jni_dataset.c @@ -382,7 +382,7 @@ populate_PoolBean(JNIEnv *env, zpool_handle_t *zphp, zfs_handle_t *zhp, (*env)->CallVoidMethod(env, object->object, pool_stats->method_setPoolStatus, zjni_pool_status_to_obj(env, - zpool_get_status(zphp, &msgid))); + zpool_get_status(zphp, &msgid, NULL))); (*env)->CallVoidMethod(env, object->object, pool_stats->method_setPoolVersion, diff --git a/usr/src/lib/libzfs_jni/common/libzfs_jni_pool.c b/usr/src/lib/libzfs_jni/common/libzfs_jni_pool.c index ebc800af80..2eb36c0145 100644 --- a/usr/src/lib/libzfs_jni/common/libzfs_jni_pool.c +++ b/usr/src/lib/libzfs_jni/common/libzfs_jni_pool.c @@ -407,7 +407,8 @@ populate_ImportablePoolBean(JNIEnv *env, ImportablePoolBean_t *bean, (*env)->CallVoidMethod(env, object->object, pool_stats->method_setPoolStatus, - zjni_pool_status_to_obj(env, zpool_import_status(config, &c))); + zjni_pool_status_to_obj(env, zpool_import_status(config, &c, + NULL))); (*env)->CallVoidMethod(env, object->object, pool_stats->method_setPoolVersion, (jlong)version); diff --git a/usr/src/lib/libzpool/common/kernel.c b/usr/src/lib/libzpool/common/kernel.c index fb14f88817..95a1c54622 100644 --- a/usr/src/lib/libzpool/common/kernel.c +++ b/usr/src/lib/libzpool/common/kernel.c @@ -42,6 +42,11 @@ #include <sys/utsname.h> #include <sys/systeminfo.h> #include <libzfs.h> +#include <sys/crypto/common.h> +#include <sys/crypto/impl.h> +#include <sys/crypto/api.h> +#include <sys/sha2.h> +#include <crypto/aes/aes_impl.h> extern void system_taskq_init(void); extern void system_taskq_fini(void); @@ -595,3 +600,87 @@ geterror(struct buf *bp) } return (error); } + +int +crypto_create_ctx_template(crypto_mechanism_t *mech, + crypto_key_t *key, crypto_ctx_template_t *tmpl, int kmflag) +{ + return (0); +} + +crypto_mech_type_t +crypto_mech2id(crypto_mech_name_t name) +{ + return (CRYPTO_MECH_INVALID); +} + +int +crypto_mac(crypto_mechanism_t *mech, crypto_data_t *data, + crypto_key_t *key, crypto_ctx_template_t impl, + crypto_data_t *mac, crypto_call_req_t *cr) +{ + return (0); +} + +int +crypto_encrypt(crypto_mechanism_t *mech, crypto_data_t *plaintext, + crypto_key_t *key, crypto_ctx_template_t tmpl, + crypto_data_t *ciphertext, crypto_call_req_t *cr) +{ + return (0); +} + +/* This could probably be a weak reference */ +int +crypto_decrypt(crypto_mechanism_t *mech, crypto_data_t *plaintext, + crypto_key_t *key, crypto_ctx_template_t tmpl, + crypto_data_t *ciphertext, crypto_call_req_t *cr) +{ + return (0); +} + + +int +crypto_digest_final(crypto_context_t context, crypto_data_t *digest, + crypto_call_req_t *cr) +{ + return (0); +} + +int +crypto_digest_update(crypto_context_t context, crypto_data_t *data, + crypto_call_req_t *cr) +{ + return (0); +} + +int +crypto_digest_init(crypto_mechanism_t *mech, crypto_context_t *ctxp, + crypto_call_req_t *crq) +{ + return (0); +} + +void +crypto_destroy_ctx_template(crypto_ctx_template_t tmpl) +{ +} + +extern int crypto_mac_init(crypto_mechanism_t *mech, crypto_key_t *key, + crypto_ctx_template_t tmpl, crypto_context_t *ctxp, + crypto_call_req_t *cr) +{ + return (0); +} + +extern int crypto_mac_update(crypto_context_t ctx, crypto_data_t *data, + crypto_call_req_t *cr) +{ + return (0); +} + +extern int crypto_mac_final(crypto_context_t ctx, crypto_data_t *data, + crypto_call_req_t *cr) +{ + return (0); +} diff --git a/usr/src/man/man1m/zfs.1m b/usr/src/man/man1m/zfs.1m index d80006ddeb..c9b25085cd 100644 --- a/usr/src/man/man1m/zfs.1m +++ b/usr/src/man/man1m/zfs.1m @@ -151,7 +151,7 @@ .Cm mount .Nm .Cm mount -.Op Fl Ov +.Op Fl Olv .Op Fl o Ar options .Fl a | Ar filesystem .Nm @@ -169,7 +169,7 @@ .Ar snapshot bookmark .Nm .Cm send -.Op Fl DLPRcenpv +.Op Fl DLPRcenpvw .Op Oo Fl I Ns | Ns Fl i Oc Ar snapshot .Ar snapshot .Nm @@ -276,6 +276,22 @@ .Op Fl m Ar memory_limit .Ar pool script .Op Ar arg1 No ... +.Nm +.Cm load-key +.Op Fl rn +.Op Fl L Ar keylocation +.Op Fl a Ns | Ns Ar filesystem +.Nm +.Cm unload-key +.Op Fl r +.Op Fl a Ns | Ns Ar filesystem +.Nm +.Cm change-key +.Op Fl l +.Op Fl o Sy keylocation Ns = Ns Ar value +.Op Fl o Sy keyformat Ns = Ns Ar value +.Op Fl o Sy pbkdf2iters Ns = Ns Ar value +.Ar filesystem .Sh DESCRIPTION The .Nm @@ -582,6 +598,21 @@ if the snapshot has been marked for deferred destroy by using the command. Otherwise, the property is .Sy off . +.It Sy encryptionroot +For encrypted datasets, indicates where the dataset is currently inheriting its +encryption key from. +Loading or unloading a key for the +.Sy encryptionroot +will implicitly load / unload the key for any inheriting datasets +.Po see +.Nm zfs Cm load-key +and +.Nm zfs Cm unload-key +.Pc . +Clones will always share an encryption key with their origin. +See the +.Sy Encryption +section for details. .It Sy filesystem_count The total number of filesystems and volumes that exist under this location in the dataset tree. @@ -596,6 +627,16 @@ GUID. Thus, the .Sy guid is suitable to identify a snapshot across pools. +.It Sy keystatus +Indicates if an encryption key is currently loaded into ZFS. +The possible values are +.Sy none , available , +and +.Sy unavailable . +See +.Nm Cm load-key +and +.Nm Cm unload-key . .It Sy logicalreferenced The amount of space that is .Qq logically @@ -1111,6 +1152,78 @@ option. Controls whether device nodes can be opened on this file system. The default value is .Sy on . +.It Xo +.Sy encryption Ns = Ns Sy on Ns | Ns Sy off Ns | Ns Sy aes-128-ccm Ns | Ns +.Sy aes-192-ccm Ns | Ns Sy aes-256-ccm Ns | Ns Sy aes-128-gcm Ns | Ns +.Sy aes-192-gcm Ns | Ns Sy aes-256-gcm +.Xc +Controls the encryption cipher suite +.Pq block cipher, key length, and mode +used for this dataset. +Requires the encryption feature to be enabled on the pool. +Requires a +.Sy keyformat +to be set at dataset creation time. +.Pp +Selecting +.Sy encryption Ns = Ns Sy on +when creating a dataset indicates that the default encryption suite will be +selected, which is currently +.Sy aes-256-ccm . +In order to provide consistent data protection, encryption must be specified at +dataset creation time and it cannot be changed afterwards. +.Pp +For more details and caveats about encryption see the +.Sx Encryption +section. +.It Sy keyformat Ns = Ns Sy raw Ns | Ns Sy hex Ns | Ns Sy passphrase +Controls what format the user's encryption key will be provided as. +This property is only set for encrypted datasets which are encryption roots. +.Pp +Raw keys and hex keys must be 32 bytes long +.Pq regardless of the chosen encryption suite +and must be randomly generated. +A raw key can be generated with the following command: +.Bd -literal +# dd if=/dev/urandom of=/path/to/output/key bs=32 count=1 +.Ed +.Pp +Passphrases must be between 8 and 512 bytes long and will be processed through +PBKDF2 before being used +.Po see the +.Nm pbkdf2iters +property +.Pc . +Even though the encryption suite cannot be changed after dataset creation, the +keyformat can be with +.Nm Cm change-key . +.It Sy keylocation Ns = Ns Sy prompt Ns | Ns Ar file://<absolute file path> +Controls where the user's encryption key will be loaded from by default for +commands such as +.Nm Cm load-key +and +.Nm Cm mount Fl l . +This property is only set for encrypted datasets which are encryption roots. +If unspecified, the default is +.Sy prompt . +.Pp +Even though the encryption suite cannot be changed after dataset creation, the +keylocation can be with either +.Nm Cm set +or +.Nm Cm change-key . +If +.Sy prompt +is selected ZFS will ask for the key at the command prompt when +it is required to access the encrypted data +.Po see +.Nm Cm load-key +.Pc . +This setting will also allow the key to be passed in via STDIN, but users +should be careful not to place keys which should be kept secret on the +command line. +If a file URI is selected, the key will be loaded from the specified absolute +file path. .It Sy exec Ns = Ns Sy on Ns | Ns Sy off Controls whether processes can be executed from within this file system. The default value is @@ -1179,6 +1292,24 @@ See for more information on .Sy nbmand mounts. +.It Sy pbkdf2iters Ns = Ns Ar iterations +Controls the number of PBKDF2 iterations that a +.Sy passphrase +encryption key should be run through when processing it into an encryption key. +This property is only defined when encryption is enabled and a keyformat of +.Sy passphrase +is selected. +The goal of PBKDF2 is to significantly increase the computational difficulty +needed to brute force a user's passphrase. +This is accomplished by forcing the attacker to run each passphrase through a +computationally expensive hashing function many times before they arrive at the +resulting key. +A user who actually knows the passphrase will only have to pay this cost once. +As CPUs become better at processing, this number should be raised to ensure that +a brute force attack is still not possible. +The current default is 350000 and the minimum is 100000. +This property may be changed with +.Nm Cm change-key . .It Sy primarycache Ns = Ns Sy all Ns | Ns Sy none Ns | Ns Sy metadata Controls what is cached in the primary cache .Pq ARC . @@ -1568,7 +1699,7 @@ Controls the behavior of synchronous requests .Pq e.g. fsync, O_DSYNC . .Sy standard is the -.Tn POSIX +POSIX specified behavior of ensuring all synchronous requests are written to stable storage and all devices are flushed to ensure data is not cached by device controllers @@ -1646,7 +1777,7 @@ when the pool is low on space. For a sparse volume, changes to .Sy volsize are not reflected in the -.Sy refreservation. +.Sy refreservation . A volume that is not sparse is said to be .Qq thick provisioned . A sparse volume can become thick provisioned by setting @@ -1698,7 +1829,7 @@ property is Traditionally, .Ux and -.Tn POSIX +POSIX file systems have case-sensitive file names. .Pp The @@ -1857,6 +1988,83 @@ installed or upgraded, use the and .Xr dumpadm 1M commands. +.Ss "Encryption" +Enabling the +.Sy encryption +feature allows for the creation of encrypted filesystems and volumes. +ZFS will encrypt all user data including file and zvol data, file attributes, +ACLs, permission bits, directory listings, FUID mappings, and userused/groupused +data. +ZFS +will not encrypt metadata related to the pool structure, including dataset +names, dataset hierarchy, file size, file holes, and dedup tables. +Key rotation is managed internally by the ZFS kernel module and changing the +user's key does not require re-encrypting the entire dataset. +Datasets can be scrubbed, resilvered, renamed, and deleted without the +encryption keys being loaded +.Po see the +.Nm Cm load-key +subcommand for more info on key loading +.Pc . +.Pp +Creating an encrypted dataset requires specifying the +.Sy encryption +and +.Sy keyformat +properties at creation time, along with an optional +.Sy keylocation +and +.Sy pbkdf2iters . +After entering an encryption key, the created +dataset will become an encryption root. +Any descendant datasets will inherit their encryption key from the encryption +root by default, meaning that loading, unloading, or changing the key for the +encryption root will implicitly do the same for all inheriting datasets. +If this inheritance is not desired, simply supply a +.Sy keyformat +when creating the child dataset or use +.Nm Cm change-key +to break an existing relationship, creating a new encryption root on the child. +Note that the child's +.Sy keyformat +may match that of the parent while still creating a new encryption root, and +that changing the +.Sy encryption +property alone does not create a new encryption root; this would simply use a +different cipher suite with the same key as its encryption root. +The one exception is that clones will always use their origin's encryption key. +As a result of this exception, some encryption-related properties (namely +.Sy keystatus , +.Sy keyformat , +.Sy keylocation , +and +.Sy pbkdf2iters ) +do not inherit like other ZFS properties and instead use the value determined +by their encryption root. +Encryption root inheritance can be tracked via the read-only +.Sy encryptionroot +property. +.Pp +Encryption changes the behavior of a few ZFS operations. +Encryption is applied after compression so compression ratios are preserved. +Normally checksums in ZFS are 256 bits long, but for encrypted data the checksum +is 128 bits of the user-chosen checksum and 128 bits of MAC from the encryption +suite, which provides additional protection against maliciously altered data. +Deduplication is still possible with encryption enabled but for security, +datasets will only dedup against themselves, their snapshots, and their clones. +.Pp +There are a few limitations on encrypted datasets. +Encrypted data cannot be embedded via the +.Sy embedded_data +feature. +Encrypted datasets may not have +.Sy copies Ns = Ns Sy 3 +since the implementation stores some encryption metadata where the third copy +would normally be. +Since compression is applied before encryption datasets may be vulnerable to a +CRIME-like attack if applications accessing the data allow for it. +Deduplication with encryption will leak information about which blocks are +equivalent in a dataset and will incur an extra CPU cost per block written. .Sh SUBCOMMANDS All subcommands that modify state are logged persistently to the pool in their original form. @@ -2621,7 +2829,7 @@ Displays all ZFS file systems currently mounted. .It Xo .Nm .Cm mount -.Op Fl Ov +.Op Fl Olv .Op Fl o Ar options .Fl a | Ar filesystem .Xc @@ -2635,6 +2843,16 @@ for more information. .It Fl a Mount all available ZFS file systems. Invoked automatically as part of the boot process. +.It Fl l +Load keys for encrypted filesystems as they are being mounted. +This is equivalent to executing +.Nm Cm load-key +on each encryption root before mounting it. +Note that if a filesystem has a +.Sy keylocation +of +.Sy prompt +this will cause the terminal to interactively block after asking for the key. .It Ar filesystem Mount the specified filesystem. .It Fl o Ar options @@ -2720,7 +2938,7 @@ feature. .It Xo .Nm .Cm send -.Op Fl DLPRcenpv +.Op Fl DLPRcenpvw .Op Oo Fl I Ns | Ns Fl i Oc Ar snapshot .Ar snapshot .Xc @@ -2733,7 +2951,7 @@ The output can be redirected to a file or to a different system .Pc . By default, a full stream is generated. .Bl -tag -width "-D" -.It Fl D, -dedup +.It Fl D , -dedup Generate a deduplicated stream. Blocks which would have been sent multiple times in the send stream will only be sent once. @@ -2756,7 +2974,7 @@ is similar to The incremental source may be specified as with the .Fl i option. -.It Fl L, -large-block +.It Fl L , -large-block Generate a stream which may contain blocks larger than 128KB. This flag has no effect if the .Sy large_blocks @@ -2771,9 +2989,9 @@ See for details on ZFS feature flags and the .Sy large_blocks feature. -.It Fl P, -parsable +.It Fl P , -parsable Print machine-parsable verbose information about the stream package generated. -.It Fl R, -replicate +.It Fl R , -replicate Generate a replication stream package, which will replicate the specified file system, and all descendent file systems, up to the named snapshot. When received, all properties, snapshots, descendent file systems, and clones @@ -2792,7 +3010,12 @@ If the .Fl F flag is specified when this stream is received, snapshots and file systems that do not exist on the sending side are destroyed. -.It Fl e, -embed +If the +.Fl R +flag is used to send encrypted datasets, then +.Fl w +must also be specified. +.It Fl e , -embed Generate a more compact stream by using .Sy WRITE_EMBEDDED records for blocks which are stored more compactly on disk by the @@ -2808,12 +3031,16 @@ If the .Sy lz4_compress feature is active on the sending system, then the receiving system must have that feature enabled as well. +Datasets that are sent with this flag may not be received as an encrypted +dataset, since encrypted datasets cannot use the +.Sy embedded_data +feature. See .Xr zpool-features 5 for details on ZFS feature flags and the .Sy embedded_data feature. -.It Fl c, -compressed +.It Fl c , -compressed Generate a more compact stream by using compressed WRITE records for blocks which are compressed on disk and in memory .Po see the @@ -2854,7 +3081,7 @@ be fully specified not just .Em @origin .Pc . -.It Fl n, -dryrun +.It Fl n , -dryrun Do a dry-run .Pq Qq No-op send. @@ -2868,13 +3095,35 @@ In this case, the verbose output will be written to standard output .Po contrast with a non-dry-run, where the stream is written to standard output and the verbose output goes to standard error .Pc . -.It Fl p, -props +.It Fl p , -props Include the dataset's properties in the stream. This flag is implicit when .Fl R is specified. The receiving system must also support this feature. -.It Fl v, -verbose +Sends of encrypted datasets must use +.Fl w +when using this flag. +.It Fl w , -raw +For encrypted datasets, send data exactly as it exists on disk. +This allows backups to be taken even if encryption keys are not currently +loaded. +The backup may then be received on an untrusted machine since that machine will +not have the encryption keys to read the protected data or alter it without +being detected. +Upon being received, the dataset will have the same encryption keys as it did +on the send side, although the +.Sy keylocation +property will be defaulted to +.Sy prompt +if not otherwise provided. +For unencrypted datasets, this flag will be equivalent to +.Fl Lec . +Note that if you do not use this flag for sending encrypted datasets, +data will be sent unencrypted and may be re-encrypted with a different +encryption key on the receiving system, which will disable the ability +to do a raw send to that system for incrementals. +.It Fl v , -verbose Print verbose information about the stream package generated. This information includes a per-second report of how much data has been sent. .Pp @@ -2884,7 +3133,7 @@ You will be able to receive your streams on future versions of ZFS . .It Xo .Nm .Cm send -.Op Fl Lce +.Op Fl Lcew .Op Fl i Ar snapshot Ns | Ns Ar bookmark .Ar filesystem Ns | Ns Ar volume Ns | Ns Ar snapshot .Xc @@ -2896,7 +3145,7 @@ When the stream generated from a filesystem or volume is received, the default snapshot name will be .Qq --head-- . .Bl -tag -width "-L" -.It Fl L, -large-block +.It Fl L , -large-block Generate a stream which may contain blocks larger than 128KB. This flag has no effect if the .Sy large_blocks @@ -2911,7 +3160,7 @@ See for details on ZFS feature flags and the .Sy large_blocks feature. -.It Fl c, -compressed +.It Fl c , -compressed Generate a more compact stream by using compressed WRITE records for blocks which are compressed on disk and in memory .Po see the @@ -2930,7 +3179,7 @@ option is not supplied in conjunction with .Fl c , then the data will be decompressed before sending so it can be split into smaller block sizes. -.It Fl e, -embed +.It Fl e , -embed Generate a more compact stream by using .Sy WRITE_EMBEDDED records for blocks which are stored more compactly on disk by the @@ -2946,6 +3195,10 @@ If the .Sy lz4_compress feature is active on the sending system, then the receiving system must have that feature enabled as well. +Datasets that are sent with this flag may not be received as an encrypted +dataset, since encrypted datasets cannot use the +.Sy embedded_data +feature. See .Xr zpool-features 5 for details on ZFS feature flags and the @@ -2966,6 +3219,25 @@ character and following If the incremental target is a clone, the incremental source can be the origin snapshot, or an earlier snapshot in the origin's filesystem, or the origin's origin, etc. +.It Fl w , -raw +For encrypted datasets, send data exactly as it exists on disk. +This allows backups to be taken even if encryption keys are not currently +loaded. +The backup may then be received on an untrusted machine since that machine will +not have the encryption keys to read the protected data or alter it without +being detected. +Upon being received, the dataset will have the same encryption keys as it did +on the send side, although the +.Sy keylocation +property will be defaulted to +.Sy prompt +if not otherwise provided. +For unencrypted datasets, this flag will be equivalent to +.Fl Lec . +Note that if you do not use this flag for sending encrypted datasets, +data will be sent unencrypted and may be re-encrypted with a different +encryption key on the receiving system, which will disable the ability +to do a raw send to that system for incrementals. .El .It Xo .Nm @@ -3005,7 +3277,7 @@ Streams are created using the subcommand, which by default creates a full stream. .Nm zfs Cm recv can be used as an alias for -.Nm zfs Cm receive. +.Nm zfs Cm receive . .Pp If an incremental stream is received, then the destination file system must already exist, and its most recent snapshot must match the incremental stream's @@ -3025,6 +3297,55 @@ destroyed by using the .Nm zfs Cm destroy Fl d command. .Pp +Raw encrypted send streams (created with +.Nm zfs Cm send Fl w +) may only be received as is, and cannot be re-encrypted, decrypted, or +recompressed by the receive process. +Unencrypted streams can be received as encrypted datasets, either through +inheritance or by specifying encryption parameters with the +.Fl o +options. +Note that the +.Sy keylocation +property cannot be overridden to +.Sy prompt +during a receive. +This is because the receive process itself is already using +stdin for the send stream. +Instead, the property can be overridden after the receive completes. +.Pp +The added security provided by raw sends adds some restrictions to the send +and receive process. +ZFS will not allow a mix of raw receives and non-raw receives. +Specifically, any raw incremental receives that are attempted after +a non-raw receive will fail. +Non-raw receives do not have this restriction and, therefore, are always +possible. +Because of this, it is best practice to always use either raw sends for +their security benefits or non-raw sends for their flexibility when working +with encrypted datasets, but not a combination. +.Pp +The reason for this restriction stems from the inherent restrictions of the +AEAD ciphers that ZFS uses to encrypt data. +When using ZFS native encryption, each block of data is encrypted against +a randomly generated number known as the "initialization vector" (IV), +which is stored in the filesystem metadata. +This number is required by the encryption algorithms whenever the data is to +be decrypted. +Together, all of the IVs provided for all of the blocks in a given snapshot +are collectively called an "IV set". +When ZFS performs a raw send, the IV set is transferred from the source to +the destination in the send stream. +When ZFS performs a non-raw send, the data is decrypted by the source +system and re-encrypted by the destination system, creating a snapshot with +effectively the same data, but a different IV set. +In order for decryption to work after a raw send, ZFS must ensure that the +IV set used on both the source and destination side match. +When an incremental raw receive is performed on top of an existing snapshot, +ZFS will check to confirm that the "from" snapshot on both the source and +destination were using the same IV set, ensuring the new IV set is consistent. +.Pp +ds The name of the snapshot .Pq and file system, if a full stream is received that this subcommand creates depends on the argument type and the use of the @@ -3236,6 +3557,10 @@ diff subcommand Allows lookup of paths within a dataset given an object number, and the ability to create snapshots necessary to 'zfs diff'. +load-key subcommand Allows loading and unloading of encryption key + (see 'zfs load-key' and 'zfs unload-key'). +change-key subcommand Allows changing an encryption key via + 'zfs change-key'. mount subcommand Allows mount/umount of ZFS datasets promote subcommand Must also have the 'mount' and 'promote' ability in the origin file system @@ -3541,6 +3866,105 @@ See .Xr zfs-program 1M for more information. .El +.It Xo +.Nm Cm load-key +.Op Fl nr +.Op Fl L Ar keylocation +.Fl a Ns | Ns filesystem +.Xc +Use +.Ar keylocation +instead of the +.Sy keylocation +property. +This will not change the value of the property on the dataset. +Note that if used with either +.Fl r +or +.Fl a +.Ar keylocation +may only be given as +.Sy prompt . +.Bl -tag -width Ds +.It Fl a +Loads the keys for all encryption roots in all imported pools. +.It Fl n +Do a dry-run +.Cm load-key . +This will cause zfs to simply check that the provided key is correct. +This command may be run even if the key is already loaded. +.It Fl r +Recursively loads the keys for the specified filesystem and all descendent +encryption roots. +.El +.It Xo +.Nm +.Cm unload-key +.Op Fl r +.Fl a Ns | Ns Ar filesystem +.Xc +Unloads a key from ZFS, removing the ability to access the dataset and all of +its children that inherit the +.Sy encryption +property. +This requires that the dataset is not currently open or mounted. +Once the key is unloaded the +.Sy keystatus +property will be set to +.Sy unavailable . +.Bl -tag -width Ds +.It Fl a +Unloads the keys for all encryption roots in all imported pools. +.It Fl r +Recursively unloads the keys for the specified filesystem and all descendent +encryption roots. +.El +.It Xo +.Nm +.Cm change-key +.Op Fl il +.Op Fl o Sy keylocation Ns = Ns Ar value +.Op Fl o Sy keyformat Ns = Ns Ar value +.Op Fl o Sy pbkdf2iters Ns = Ns Ar value +.Ar filesystem +.Xc +Allows a user to change the encryption key used to access a dataset. +This command requires that the existing key for the dataset is already loaded +into ZFS. +This command may also be used to change the +.Sy keylocation , keyformat , +and +.Sy pbkdf2iters +properties as needed. +If the dataset was not previously an encryption root it will become one. +Alternatively, the +.Fl i +flag may be provided to cause an encryption root to inherit the +parent's key instead. +.Bl -tag -width Ds +.It Fl i +Indicates that ZFS should make +.Ar filesystem +inherit the key of its parent. +Note that this command can only be run on an encryption root that has an +encrypted parent. +.It Fl l +Ensures the key is loaded before attempting to change the key. +This is effectively equivalent to +.Qq Nm Cm load-key Ar filesystem ; Nm Cm change-key Ar filesystem . +.It Fl o Sy property Ns = Ns Ar value +Allows the user to set encryption key properties +.Pq +.Sy keyformat , keylocation , +and +.Sy pbkdf2iters +while changing the key. +This is the only way to alter +.Sy keyformat +and +.Sy pbkdf2iters +after the dataset has been created. +.El .El .Sh EXIT STATUS The diff --git a/usr/src/man/man1m/zpool.1m b/usr/src/man/man1m/zpool.1m index ac30394a52..d0bf65c577 100644 --- a/usr/src/man/man1m/zpool.1m +++ b/usr/src/man/man1m/zpool.1m @@ -89,7 +89,7 @@ .Nm .Cm import .Fl a -.Op Fl DfmN +.Op Fl DflmN .Op Fl F Op Fl n .Op Fl c Ar cachefile Ns | Ns Fl d Ar dir .Op Fl o Ar mntopts @@ -168,7 +168,7 @@ .Ar pool .Nm .Cm split -.Op Fl gLnP +.Op Fl gLlnP .Oo Fl o Ar property Ns = Ns Ar value Oc Ns ... .Op Fl R Ar root .Ar pool newpool @@ -1276,7 +1276,7 @@ Lists destroyed pools only. .Nm .Cm import .Fl a -.Op Fl DfmN +.Op Fl DflmN .Op Fl F Op Fl n .Op Fl c Ar cachefile Ns | Ns Fl d Ar dir .Op Fl o Ar mntopts @@ -1326,6 +1326,16 @@ transactions. Not all damaged pools can be recovered by using this option. If successful, the data from the discarded transactions is irretrievably lost. This option is ignored if the pool is importable or already imported. +.It Fl l +Indicates that this command will request encryption keys for all encrypted +datasets it attempts to mount as it is bringing the pool online. +Note that if any datasets have a +.Sy keylocation +of +.Sy prompt +this command will block waiting for the keys to be entered. +Without this flag encrypted datasets will be left unavailable until the keys are +loaded. .It Fl m Allows a pool to import when there is a missing log device. Recent transactions can be lost because the log device will be discarded. @@ -1423,6 +1433,18 @@ transactions. Not all damaged pools can be recovered by using this option. If successful, the data from the discarded transactions is irretrievably lost. This option is ignored if the pool is importable or already imported. +.It Fl l +Indicates that the zpool command will request encryption keys for all +encrypted datasets it attempts to mount as it is bringing the pool +online. +This is equivalent to running +.Nm Cm mount +on each encrypted dataset immediately after the pool is imported. +If any datasets have a +.Sy prompt +keysource this command will block waiting for the key to be entered. +Otherwise, encrypted datasets will be left unavailable until the keys are +loaded. .It Fl m Allows a pool to import when there is a missing log device. Recent transactions can be lost because the log device will be discarded. @@ -1858,7 +1880,7 @@ values. .It Xo .Nm .Cm split -.Op Fl gLnP +.Op Fl gLlnP .Oo Fl o Ar property Ns = Ns Ar value Oc Ns ... .Op Fl R Ar root .Ar pool newpool @@ -1884,6 +1906,16 @@ Display real paths for vdevs resolving all symbolic links. This can be used to look up the current block device name regardless of the .Pa /dev/disk/ path used to open it. +.It Fl l +Indicates that this command will request encryption keys for all encrypted +datasets it attempts to mount as it is bringing the new pool online. +Note that if any datasets have a +.Sy keylocation +of +.Sy prompt +this command will block waiting for the keys to be entered. +Without this flag encrypted datasets will be left unavailable and unmounted +until the keys are loaded. .It Fl n Do dry run, do not actually perform the split. Print out the expected configuration of diff --git a/usr/src/man/man5/zpool-features.5 b/usr/src/man/man5/zpool-features.5 index ff34ce5d48..88efb3d543 100644 --- a/usr/src/man/man5/zpool-features.5 +++ b/usr/src/man/man5/zpool-features.5 @@ -626,6 +626,27 @@ Booting off of pools using \fBskein\fR is supported. .sp .ne 2 .na +\fB\fBbookmark_v2\fR\fR +.ad +.RS 4n +.TS +l l . +GUID com.datto:bookmark_v2 +READ\-ONLY COMPATIBLE no +DEPENDENCIES extensible_dataset +.TE + +This feature enables the creation and management of larger bookmarks which are +needed for other features in ZFS. + +This feature becomes \fBactive\fR when a v2 bookmark is created and will be +returned to the \fBenabled\fR state when all v2 bookmarks are destroyed. + +.RE + +.sp +.ne 2 +.na \fB\fBedonr\fR\fR .ad .RS 4n @@ -682,6 +703,26 @@ This feature becomes \fBactive\fR when a dedicated allocation class vdev (dedup or special) is created with zpool create or zpool add. With device removal, it can be returned to the \fBenabled\fR state if all the top-level vdevs from an allocation class are removed. +.RE + +.sp +.ne 2 +.na +\fB\fBcom.datto:encryption\fR\fR +.ad +.RS 4n +.TS +l l . +GUID com.datto:encryption +READ\-ONLY COMPATIBLE no +DEPENDENCIES extensible_dataset +.TE + +This feature enables the creation and management of natively encrypted datasets. + +This feature becomes \fBactive\fR when an encrypted dataset is created +and will be returned to the \fBenabled\fR state when all datasets that +use this feature are destroyed. .RE .sp diff --git a/usr/src/pkg/manifests/system-test-zfstest.mf b/usr/src/pkg/manifests/system-test-zfstest.mf index bf1e75980b..c0a85644aa 100644 --- a/usr/src/pkg/manifests/system-test-zfstest.mf +++ b/usr/src/pkg/manifests/system-test-zfstest.mf @@ -48,12 +48,14 @@ dir path=opt/zfs-tests/tests/functional/clean_mirror dir path=opt/zfs-tests/tests/functional/cli_root dir path=opt/zfs-tests/tests/functional/cli_root/zdb dir path=opt/zfs-tests/tests/functional/cli_root/zfs +dir path=opt/zfs-tests/tests/functional/cli_root/zfs_change-key dir path=opt/zfs-tests/tests/functional/cli_root/zfs_clone dir path=opt/zfs-tests/tests/functional/cli_root/zfs_copies dir path=opt/zfs-tests/tests/functional/cli_root/zfs_create dir path=opt/zfs-tests/tests/functional/cli_root/zfs_destroy dir path=opt/zfs-tests/tests/functional/cli_root/zfs_get dir path=opt/zfs-tests/tests/functional/cli_root/zfs_inherit +dir path=opt/zfs-tests/tests/functional/cli_root/zfs_load-key dir path=opt/zfs-tests/tests/functional/cli_root/zfs_mount dir path=opt/zfs-tests/tests/functional/cli_root/zfs_program dir path=opt/zfs-tests/tests/functional/cli_root/zfs_promote @@ -67,6 +69,7 @@ dir path=opt/zfs-tests/tests/functional/cli_root/zfs_send dir path=opt/zfs-tests/tests/functional/cli_root/zfs_set dir path=opt/zfs-tests/tests/functional/cli_root/zfs_share dir path=opt/zfs-tests/tests/functional/cli_root/zfs_snapshot +dir path=opt/zfs-tests/tests/functional/cli_root/zfs_unload-key dir path=opt/zfs-tests/tests/functional/cli_root/zfs_unmount dir path=opt/zfs-tests/tests/functional/cli_root/zfs_unshare dir path=opt/zfs-tests/tests/functional/cli_root/zfs_upgrade @@ -696,6 +699,31 @@ file path=opt/zfs-tests/tests/functional/cli_root/zfs/setup mode=0555 file path=opt/zfs-tests/tests/functional/cli_root/zfs/zfs_001_neg mode=0555 file path=opt/zfs-tests/tests/functional/cli_root/zfs/zfs_002_pos mode=0555 file path=opt/zfs-tests/tests/functional/cli_root/zfs/zfs_003_neg mode=0555 +file path=opt/zfs-tests/tests/functional/cli_root/zfs_change-key/cleanup \ + mode=0555 +file path=opt/zfs-tests/tests/functional/cli_root/zfs_change-key/setup \ + mode=0555 +file \ + path=opt/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key \ + mode=0555 +file \ + path=opt/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_child \ + mode=0555 +file \ + path=opt/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_format \ + mode=0555 +file \ + path=opt/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_inherit \ + mode=0555 +file \ + path=opt/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_load \ + mode=0555 +file \ + path=opt/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_location \ + mode=0555 +file \ + path=opt/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_pbkdf2iters \ + mode=0555 file path=opt/zfs-tests/tests/functional/cli_root/zfs_clone/cleanup mode=0555 file path=opt/zfs-tests/tests/functional/cli_root/zfs_clone/setup mode=0555 file path=opt/zfs-tests/tests/functional/cli_root/zfs_clone/zfs_clone_001_neg \ @@ -718,6 +746,9 @@ file path=opt/zfs-tests/tests/functional/cli_root/zfs_clone/zfs_clone_009_neg \ mode=0555 file path=opt/zfs-tests/tests/functional/cli_root/zfs_clone/zfs_clone_010_pos \ mode=0555 +file \ + path=opt/zfs-tests/tests/functional/cli_root/zfs_clone/zfs_clone_encrypted \ + mode=0555 file path=opt/zfs-tests/tests/functional/cli_root/zfs_copies/cleanup mode=0555 file path=opt/zfs-tests/tests/functional/cli_root/zfs_copies/setup mode=0555 file path=opt/zfs-tests/tests/functional/cli_root/zfs_copies/zfs_copies.cfg \ @@ -790,6 +821,12 @@ file \ file \ path=opt/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_common.kshlib \ mode=0444 +file \ + path=opt/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_crypt_combos \ + mode=0555 +file \ + path=opt/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_encrypted \ + mode=0555 file path=opt/zfs-tests/tests/functional/cli_root/zfs_destroy/cleanup \ mode=0555 file path=opt/zfs-tests/tests/functional/cli_root/zfs_destroy/setup mode=0555 @@ -886,6 +923,32 @@ file \ file \ path=opt/zfs-tests/tests/functional/cli_root/zfs_inherit/zfs_inherit_003_pos \ mode=0555 +file path=opt/zfs-tests/tests/functional/cli_root/zfs_load-key/cleanup \ + mode=0555 +file path=opt/zfs-tests/tests/functional/cli_root/zfs_load-key/setup mode=0555 +file path=opt/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key \ + mode=0555 +file \ + path=opt/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key.cfg \ + mode=0444 +file \ + path=opt/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_all \ + mode=0555 +file \ + path=opt/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib \ + mode=0444 +file \ + path=opt/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_file \ + mode=0555 +file \ + path=opt/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_location \ + mode=0555 +file \ + path=opt/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_noop \ + mode=0555 +file \ + path=opt/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_recursive \ + mode=0555 file path=opt/zfs-tests/tests/functional/cli_root/zfs_mount/cleanup mode=0555 file path=opt/zfs-tests/tests/functional/cli_root/zfs_mount/setup mode=0555 file path=opt/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount.cfg \ @@ -924,6 +987,9 @@ file path=opt/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_all_fail \ file \ path=opt/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_all_mountpoints \ mode=0555 +file \ + path=opt/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_encrypted \ + mode=0555 file path=opt/zfs-tests/tests/functional/cli_root/zfs_program/cleanup \ mode=0555 file path=opt/zfs-tests/tests/functional/cli_root/zfs_program/setup mode=0555 @@ -958,6 +1024,9 @@ file \ file \ path=opt/zfs-tests/tests/functional/cli_root/zfs_promote/zfs_promote_008_pos \ mode=0555 +file \ + path=opt/zfs-tests/tests/functional/cli_root/zfs_promote/zfs_promote_encryptionroot \ + mode=0555 file path=opt/zfs-tests/tests/functional/cli_root/zfs_property/cleanup \ mode=0555 file path=opt/zfs-tests/tests/functional/cli_root/zfs_property/setup mode=0555 @@ -1009,6 +1078,17 @@ file \ file \ path=opt/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_014_pos \ mode=0555 +file \ + path=opt/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_from_encrypted \ + mode=0555 +file path=opt/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_raw \ + mode=0555 +file \ + path=opt/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_raw_incremental \ + mode=0555 +file \ + path=opt/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_to_encrypted \ + mode=0555 file path=opt/zfs-tests/tests/functional/cli_root/zfs_remap/cleanup mode=0555 file path=opt/zfs-tests/tests/functional/cli_root/zfs_remap/setup mode=0555 file path=opt/zfs-tests/tests/functional/cli_root/zfs_remap/zfs_remap_cliargs \ @@ -1064,6 +1144,12 @@ file \ file \ path=opt/zfs-tests/tests/functional/cli_root/zfs_rename/zfs_rename_014_neg \ mode=0555 +file \ + path=opt/zfs-tests/tests/functional/cli_root/zfs_rename/zfs_rename_encrypted_child \ + mode=0555 +file \ + path=opt/zfs-tests/tests/functional/cli_root/zfs_rename/zfs_rename_to_encrypted \ + mode=0555 file path=opt/zfs-tests/tests/functional/cli_root/zfs_reservation/cleanup \ mode=0555 file path=opt/zfs-tests/tests/functional/cli_root/zfs_reservation/setup \ @@ -1113,6 +1199,13 @@ file path=opt/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_006_pos \ mode=0555 file path=opt/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_007_pos \ mode=0555 +file path=opt/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_encrypted \ + mode=0555 +file \ + path=opt/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_encrypted_unloaded \ + mode=0555 +file path=opt/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_raw \ + mode=0555 file path=opt/zfs-tests/tests/functional/cli_root/zfs_set/cache_001_pos \ mode=0555 file path=opt/zfs-tests/tests/functional/cli_root/zfs_set/cache_002_neg \ @@ -1175,6 +1268,8 @@ file path=opt/zfs-tests/tests/functional/cli_root/zfs_set/zfs_set_003_neg \ file \ path=opt/zfs-tests/tests/functional/cli_root/zfs_set/zfs_set_common.kshlib \ mode=0444 +file path=opt/zfs-tests/tests/functional/cli_root/zfs_set/zfs_set_keylocation \ + mode=0555 file path=opt/zfs-tests/tests/functional/cli_root/zfs_share/cleanup mode=0555 file path=opt/zfs-tests/tests/functional/cli_root/zfs_share/setup mode=0555 file path=opt/zfs-tests/tests/functional/cli_root/zfs_share/zfs_share.cfg \ @@ -1234,6 +1329,19 @@ file \ file \ path=opt/zfs-tests/tests/functional/cli_root/zfs_snapshot/zfs_snapshot_009_pos \ mode=0555 +file path=opt/zfs-tests/tests/functional/cli_root/zfs_unload-key/cleanup \ + mode=0555 +file path=opt/zfs-tests/tests/functional/cli_root/zfs_unload-key/setup \ + mode=0555 +file \ + path=opt/zfs-tests/tests/functional/cli_root/zfs_unload-key/zfs_unload-key \ + mode=0555 +file \ + path=opt/zfs-tests/tests/functional/cli_root/zfs_unload-key/zfs_unload-key_all \ + mode=0555 +file \ + path=opt/zfs-tests/tests/functional/cli_root/zfs_unload-key/zfs_unload-key_recursive \ + mode=0555 file path=opt/zfs-tests/tests/functional/cli_root/zfs_unmount/cleanup \ mode=0555 file path=opt/zfs-tests/tests/functional/cli_root/zfs_unmount/setup mode=0555 @@ -1456,6 +1564,12 @@ file \ path=opt/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_024_pos \ mode=0555 file \ + path=opt/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_crypt_combos \ + mode=0555 +file \ + path=opt/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_encrypted \ + mode=0555 +file \ path=opt/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_features_001_pos \ mode=0555 file \ @@ -1541,10 +1655,15 @@ file \ path=opt/zfs-tests/tests/functional/cli_root/zpool_history/zpool_history_002_pos \ mode=0555 file \ + path=opt/zfs-tests/tests/functional/cli_root/zpool_import/blockfiles/missing_ivset.dat.bz2 \ + mode=0444 +file \ path=opt/zfs-tests/tests/functional/cli_root/zpool_import/blockfiles/unclean_export.dat.bz2 \ mode=0444 file path=opt/zfs-tests/tests/functional/cli_root/zpool_import/cleanup \ mode=0555 +file path=opt/zfs-tests/tests/functional/cli_root/zpool_import/cryptv0.dat.bz2 \ + mode=0444 file \ path=opt/zfs-tests/tests/functional/cli_root/zpool_import/import_cachefile_device_added \ mode=0555 @@ -1628,6 +1747,18 @@ file \ path=opt/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_all_001_pos \ mode=0555 file \ + path=opt/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_encrypted \ + mode=0555 +file \ + path=opt/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_encrypted_load \ + mode=0555 +file \ + path=opt/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_errata3 \ + mode=0555 +file \ + path=opt/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_errata4 \ + mode=0555 +file \ path=opt/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_features_001_pos \ mode=0555 file \ @@ -1800,6 +1931,9 @@ file \ path=opt/zfs-tests/tests/functional/cli_root/zpool_scrub/zpool_scrub_005_pos \ mode=0555 file \ + path=opt/zfs-tests/tests/functional/cli_root/zpool_scrub/zpool_scrub_encrypted_unloaded \ + mode=0555 +file \ path=opt/zfs-tests/tests/functional/cli_root/zpool_scrub/zpool_scrub_multiple_copies \ mode=0555 file \ @@ -2672,9 +2806,17 @@ file path=opt/zfs-tests/tests/functional/rsend/send-c_volume mode=0555 file path=opt/zfs-tests/tests/functional/rsend/send-c_zstreamdump mode=0555 file path=opt/zfs-tests/tests/functional/rsend/send-cpL_varied_recsize \ mode=0555 +file path=opt/zfs-tests/tests/functional/rsend/send_encrypted_files mode=0555 +file path=opt/zfs-tests/tests/functional/rsend/send_encrypted_hierarchy \ + mode=0555 +file path=opt/zfs-tests/tests/functional/rsend/send_encrypted_truncated_files \ + mode=0555 file path=opt/zfs-tests/tests/functional/rsend/send_freeobjects mode=0555 +file path=opt/zfs-tests/tests/functional/rsend/send_mixed_raw mode=0555 file path=opt/zfs-tests/tests/functional/rsend/send_realloc_dnode_size \ mode=0555 +file path=opt/zfs-tests/tests/functional/rsend/send_realloc_encrypted_files \ + mode=0555 file path=opt/zfs-tests/tests/functional/rsend/setup mode=0555 file path=opt/zfs-tests/tests/functional/scrub_mirror/cleanup mode=0555 file path=opt/zfs-tests/tests/functional/scrub_mirror/default.cfg mode=0444 diff --git a/usr/src/test/zfs-tests/runfiles/delphix.run b/usr/src/test/zfs-tests/runfiles/delphix.run index 738fe89309..fb5c8e7bbe 100644 --- a/usr/src/test/zfs-tests/runfiles/delphix.run +++ b/usr/src/test/zfs-tests/runfiles/delphix.run @@ -113,11 +113,16 @@ post = [/opt/zfs-tests/tests/functional/cli_root/zfs] tests = ['zfs_001_neg', 'zfs_002_pos', 'zfs_003_neg'] +[/opt/zfs-tests/tests/functional/cli_root/zfs_change-key] +tests = ['zfs_change-key', 'zfs_change-key_child', 'zfs_change-key_format', + 'zfs_change-key_inherit', 'zfs_change-key_load', 'zfs_change-key_location', + 'zfs_change-key_pbkdf2iters'] + [/opt/zfs-tests/tests/functional/cli_root/zfs_clone] tests = ['zfs_clone_001_neg', 'zfs_clone_002_pos', 'zfs_clone_003_pos', 'zfs_clone_004_pos', 'zfs_clone_005_pos', 'zfs_clone_006_pos', 'zfs_clone_007_pos', 'zfs_clone_008_neg', 'zfs_clone_009_neg', - 'zfs_clone_010_pos'] + 'zfs_clone_010_pos', 'zfs_clone_encrypted'] [/opt/zfs-tests/tests/functional/cli_root/zfs_copies] tests = ['zfs_copies_001_pos', 'zfs_copies_002_pos', 'zfs_copies_003_pos', @@ -128,7 +133,8 @@ tests = ['zfs_create_001_pos', 'zfs_create_002_pos', 'zfs_create_003_pos', 'zfs_create_004_pos', 'zfs_create_005_pos', 'zfs_create_006_pos', 'zfs_create_007_pos', 'zfs_create_008_neg', 'zfs_create_009_neg', 'zfs_create_010_neg', 'zfs_create_011_pos', 'zfs_create_012_pos', - 'zfs_create_013_pos'] + 'zfs_create_013_pos', 'zfs_create_encrypted', + 'zfs_create_crypt_combos'] [/opt/zfs-tests/tests/functional/cli_root/zfs_destroy] tests = ['zfs_destroy_001_pos', 'zfs_destroy_002_pos', 'zfs_destroy_003_pos', @@ -146,12 +152,17 @@ tests = ['zfs_get_001_pos', 'zfs_get_002_pos', 'zfs_get_003_pos', [/opt/zfs-tests/tests/functional/cli_root/zfs_inherit] tests = ['zfs_inherit_001_neg', 'zfs_inherit_002_neg', 'zfs_inherit_003_pos'] +[/opt/zfs-tests/tests/functional/cli_root/zfs_load-key] +tests = ['zfs_load-key', 'zfs_load-key_all', 'zfs_load-key_file', + 'zfs_load-key_location', 'zfs_load-key_noop', 'zfs_load-key_recursive'] + [/opt/zfs-tests/tests/functional/cli_root/zfs_mount] tests = ['zfs_mount_001_pos', 'zfs_mount_002_pos', 'zfs_mount_003_pos', 'zfs_mount_004_pos', 'zfs_mount_005_pos', 'zfs_mount_006_pos', 'zfs_mount_007_pos', 'zfs_mount_008_pos', 'zfs_mount_009_neg', 'zfs_mount_010_neg', 'zfs_mount_011_neg', 'zfs_mount_012_neg', - 'zfs_mount_all_001_pos', 'zfs_mount_all_fail', 'zfs_mount_all_mountpoints'] + 'zfs_mount_all_001_pos', 'zfs_mount_all_fail', 'zfs_mount_all_mountpoints', + 'zfs_mount_encrypted'] [/opt/zfs-tests/tests/functional/cli_root/zfs_program] tests = ['zfs_program_json'] @@ -159,7 +170,7 @@ tests = ['zfs_program_json'] [/opt/zfs-tests/tests/functional/cli_root/zfs_promote] tests = ['zfs_promote_001_pos', 'zfs_promote_002_pos', 'zfs_promote_003_pos', 'zfs_promote_004_pos', 'zfs_promote_005_pos', 'zfs_promote_006_neg', - 'zfs_promote_007_neg', 'zfs_promote_008_pos'] + 'zfs_promote_007_neg', 'zfs_promote_008_pos', 'zfs_promote_encryptionroot'] [/opt/zfs-tests/tests/functional/cli_root/zfs_property] tests = ['zfs_written_property_001_pos'] @@ -169,7 +180,9 @@ tests = ['zfs_receive_001_pos', 'zfs_receive_002_pos', 'zfs_receive_003_pos', 'zfs_receive_005_neg', 'zfs_receive_006_pos', 'zfs_receive_007_neg', 'zfs_receive_008_pos', 'zfs_receive_009_neg', 'zfs_receive_010_pos', 'zfs_receive_011_pos', 'zfs_receive_012_pos', - 'zfs_receive_013_pos', 'zfs_receive_014_pos'] + 'zfs_receive_013_pos', 'zfs_receive_014_pos', + 'zfs_receive_from_encrypted', 'zfs_receive_raw', + 'zfs_receive_raw_incremental', 'zfs_receive_to_encrypted'] [/opt/zfs-tests/tests/functional/cli_root/zfs_remap] tests = ['zfs_remap_cliargs', 'zfs_remap_obsolete_counts'] @@ -179,7 +192,8 @@ tests = ['zfs_rename_001_pos', 'zfs_rename_002_pos', 'zfs_rename_003_pos', 'zfs_rename_004_neg', 'zfs_rename_005_neg', 'zfs_rename_006_pos', 'zfs_rename_007_pos', 'zfs_rename_008_pos', 'zfs_rename_009_neg', 'zfs_rename_010_neg', 'zfs_rename_011_pos', 'zfs_rename_012_neg', - 'zfs_rename_013_pos', 'zfs_rename_014_neg'] + 'zfs_rename_013_pos', 'zfs_rename_014_neg', 'zfs_rename_encrypted_child', + 'zfs_rename_to_encrypted'] [/opt/zfs-tests/tests/functional/cli_root/zfs_reservation] tests = ['zfs_reservation_001_pos', 'zfs_reservation_002_pos'] @@ -191,7 +205,7 @@ tests = ['zfs_rollback_001_pos', 'zfs_rollback_002_pos', [/opt/zfs-tests/tests/functional/cli_root/zfs_send] tests = ['zfs_send_001_pos', 'zfs_send_002_pos', 'zfs_send_003_pos', 'zfs_send_004_neg', 'zfs_send_005_pos', 'zfs_send_006_pos', - 'zfs_send_007_pos'] + 'zfs_send_007_pos', 'zfs_send_encrypted', 'zfs_send_raw'] [/opt/zfs-tests/tests/functional/cli_root/zfs_set] tests = ['cache_001_pos', 'cache_002_neg', 'canmount_001_pos', @@ -202,7 +216,7 @@ tests = ['cache_001_pos', 'cache_002_neg', 'canmount_001_pos', 'ro_props_001_pos', 'share_mount_001_neg', 'snapdir_001_pos', 'user_property_001_pos', 'user_property_002_pos', 'user_property_003_neg', 'user_property_004_pos', 'version_001_neg', 'zfs_set_001_neg', - 'zfs_set_002_neg', 'zfs_set_003_neg'] + 'zfs_set_002_neg', 'zfs_set_003_neg', 'zfs_set_keylocation'] [/opt/zfs-tests/tests/functional/cli_root/zfs_share] tests = ['zfs_share_001_pos', 'zfs_share_002_pos', 'zfs_share_003_pos', @@ -216,6 +230,9 @@ tests = ['zfs_snapshot_001_neg', 'zfs_snapshot_002_neg', 'zfs_snapshot_006_pos', 'zfs_snapshot_007_neg', 'zfs_snapshot_008_neg', 'zfs_snapshot_009_pos'] +[/opt/zfs-tests/tests/functional/cli_root/zfs_unload-key] +tests = ['zfs_unload-key', 'zfs_unload-key_all', 'zfs_unload-key_recursive'] + [/opt/zfs-tests/tests/functional/cli_root/zfs_unmount] tests = ['zfs_unmount_001_pos', 'zfs_unmount_002_pos', 'zfs_unmount_003_pos', 'zfs_unmount_004_pos', 'zfs_unmount_005_pos', 'zfs_unmount_006_pos', @@ -258,6 +275,7 @@ tests = ['zpool_create_001_pos', 'zpool_create_002_pos', 'zpool_create_018_pos', 'zpool_create_019_pos', 'zpool_create_020_pos', 'zpool_create_021_pos', 'zpool_create_022_pos', 'zpool_create_023_neg', 'zpool_create_024_pos', + 'zpool_create_encrypted', 'zpool_create_crypt_combos', 'zpool_create_features_001_pos', 'zpool_create_features_002_pos', 'zpool_create_features_003_pos', 'zpool_create_features_004_neg', 'zpool_create_tempname'] @@ -306,7 +324,8 @@ tests = ['zpool_import_001_pos', 'zpool_import_002_pos', 'import_rewind_device_replaced', 'import_cachefile_shared_device', 'import_paths_changed', - 'import_devices_missing'] + 'import_devices_missing', + 'zpool_import_encrypted', 'zpool_import_encrypted_load'] [/opt/zfs-tests/tests/functional/cli_root/zpool_labelclear] tests = ['zpool_labelclear_active', 'zpool_labelclear_exported'] @@ -346,7 +365,8 @@ tags = ['functional', 'cli_root', 'zpool_resilver'] [/opt/zfs-tests/tests/functional/cli_root/zpool_scrub] tests = ['zpool_scrub_001_neg', 'zpool_scrub_002_pos', 'zpool_scrub_003_pos', 'zpool_scrub_004_pos', 'zpool_scrub_005_pos', 'zpool_scrub_print_repairing', - 'zpool_scrub_offline_device', 'zpool_scrub_multiple_copies'] + 'zpool_scrub_offline_device', 'zpool_scrub_multiple_copies', + 'zpool_scrub_encrypted_unloaded'] [/opt/zfs-tests/tests/functional/cli_root/zpool_set] tests = ['zpool_set_001_pos', 'zpool_set_002_neg', 'zpool_set_003_neg'] @@ -408,7 +428,7 @@ tests = ['ctime_001_pos' ] tests = ['zfs_allow_001_pos', 'zfs_allow_002_pos', 'zfs_allow_004_pos', 'zfs_allow_005_pos', 'zfs_allow_006_pos', 'zfs_allow_007_pos', 'zfs_allow_008_pos', 'zfs_allow_009_neg', - 'zfs_allow_010_pos', 'zfs_allow_011_neg', 'zfs_allow_012_neg', + 'zfs_allow_011_neg', 'zfs_allow_012_neg', 'zfs_unallow_001_pos', 'zfs_unallow_002_pos', 'zfs_unallow_003_pos', 'zfs_unallow_004_pos', 'zfs_unallow_005_pos', 'zfs_unallow_006_pos', 'zfs_unallow_007_neg', 'zfs_unallow_008_neg'] @@ -583,7 +603,10 @@ tests = ['rsend_001_pos', 'rsend_002_pos', 'rsend_003_pos', 'rsend_004_pos', 'send-c_lz4_disabled', 'send-c_recv_lz4_disabled', 'send-c_mixed_compression', 'send-c_stream_size_estimate', 'send-cD', 'send-c_embedded_blocks', 'send-c_resume', 'send-cpL_varied_recsize', - 'send-c_recv_dedup'] + 'send-c_recv_dedup', 'send_encrypted_files', 'send_encrypted_hierarchy', + 'send_encrypted_truncated_files', + 'send_realloc_encrypted_files', + 'send_mixed_raw'] [/opt/zfs-tests/tests/functional/scrub_mirror] tests = ['scrub_mirror_001_pos', 'scrub_mirror_002_pos', diff --git a/usr/src/test/zfs-tests/runfiles/omnios.run b/usr/src/test/zfs-tests/runfiles/omnios.run index 926f65cb20..5d3571307e 100644 --- a/usr/src/test/zfs-tests/runfiles/omnios.run +++ b/usr/src/test/zfs-tests/runfiles/omnios.run @@ -114,11 +114,16 @@ post = [/opt/zfs-tests/tests/functional/cli_root/zfs] tests = ['zfs_001_neg', 'zfs_002_pos', 'zfs_003_neg'] +[/opt/zfs-tests/tests/functional/cli_root/zfs_change-key] +tests = ['zfs_change-key', 'zfs_change-key_child', 'zfs_change-key_format', + 'zfs_change-key_inherit', 'zfs_change-key_load', 'zfs_change-key_location', + 'zfs_change-key_pbkdf2iters'] + [/opt/zfs-tests/tests/functional/cli_root/zfs_clone] tests = ['zfs_clone_001_neg', 'zfs_clone_002_pos', 'zfs_clone_003_pos', 'zfs_clone_004_pos', 'zfs_clone_005_pos', 'zfs_clone_006_pos', 'zfs_clone_007_pos', 'zfs_clone_008_neg', 'zfs_clone_009_neg', - 'zfs_clone_010_pos'] + 'zfs_clone_010_pos', 'zfs_clone_encrypted'] [/opt/zfs-tests/tests/functional/cli_root/zfs_copies] tests = ['zfs_copies_001_pos', 'zfs_copies_002_pos', 'zfs_copies_003_pos', @@ -129,7 +134,7 @@ tests = ['zfs_create_001_pos', 'zfs_create_002_pos', 'zfs_create_003_pos', 'zfs_create_004_pos', 'zfs_create_005_pos', 'zfs_create_006_pos', 'zfs_create_007_pos', 'zfs_create_008_neg', 'zfs_create_009_neg', 'zfs_create_010_neg', 'zfs_create_011_pos', 'zfs_create_012_pos', - 'zfs_create_013_pos'] + 'zfs_create_013_pos', 'zfs_create_encrypted', 'zfs_create_crypt_combos'] [/opt/zfs-tests/tests/functional/cli_root/zfs_destroy] tests = ['zfs_destroy_001_pos', 'zfs_destroy_002_pos', 'zfs_destroy_003_pos', @@ -147,12 +152,17 @@ tests = ['zfs_get_001_pos', 'zfs_get_002_pos', 'zfs_get_003_pos', [/opt/zfs-tests/tests/functional/cli_root/zfs_inherit] tests = ['zfs_inherit_001_neg', 'zfs_inherit_002_neg', 'zfs_inherit_003_pos'] +[/opt/zfs-tests/tests/functional/cli_root/zfs_load-key] +tests = ['zfs_load-key', 'zfs_load-key_all', 'zfs_load-key_file', + 'zfs_load-key_location', 'zfs_load-key_noop', 'zfs_load-key_recursive'] + [/opt/zfs-tests/tests/functional/cli_root/zfs_mount] tests = ['zfs_mount_001_pos', 'zfs_mount_002_pos', 'zfs_mount_003_pos', 'zfs_mount_004_pos', 'zfs_mount_005_pos', 'zfs_mount_006_pos', 'zfs_mount_007_pos', 'zfs_mount_008_pos', 'zfs_mount_009_neg', 'zfs_mount_010_neg', 'zfs_mount_011_neg', 'zfs_mount_012_neg', - 'zfs_mount_all_001_pos', 'zfs_mount_all_fail', 'zfs_mount_all_mountpoints'] + 'zfs_mount_all_001_pos', 'zfs_mount_all_fail', 'zfs_mount_all_mountpoints' + 'zfs_mount_encrypted'] [/opt/zfs-tests/tests/functional/cli_root/zfs_program] tests = ['zfs_program_json'] @@ -160,7 +170,7 @@ tests = ['zfs_program_json'] [/opt/zfs-tests/tests/functional/cli_root/zfs_promote] tests = ['zfs_promote_001_pos', 'zfs_promote_002_pos', 'zfs_promote_003_pos', 'zfs_promote_004_pos', 'zfs_promote_005_pos', 'zfs_promote_006_neg', - 'zfs_promote_007_neg', 'zfs_promote_008_pos'] + 'zfs_promote_007_neg', 'zfs_promote_008_pos', 'zfs_promote_encryptionroot'] [/opt/zfs-tests/tests/functional/cli_root/zfs_property] tests = ['zfs_written_property_001_pos'] @@ -170,7 +180,9 @@ tests = ['zfs_receive_001_pos', 'zfs_receive_002_pos', 'zfs_receive_003_pos', 'zfs_receive_005_neg', 'zfs_receive_006_pos', 'zfs_receive_007_neg', 'zfs_receive_008_pos', 'zfs_receive_009_neg', 'zfs_receive_010_pos', 'zfs_receive_011_pos', 'zfs_receive_012_pos', - 'zfs_receive_013_pos', 'zfs_receive_014_pos'] + 'zfs_receive_013_pos', 'zfs_receive_014_pos', + 'zfs_receive_from_encrypted', 'zfs_receive_raw', + 'zfs_receive_raw_incremental', 'zfs_receive_to_encrypted'] [/opt/zfs-tests/tests/functional/cli_root/zfs_remap] tests = ['zfs_remap_cliargs', 'zfs_remap_obsolete_counts'] @@ -180,7 +192,8 @@ tests = ['zfs_rename_001_pos', 'zfs_rename_002_pos', 'zfs_rename_003_pos', 'zfs_rename_004_neg', 'zfs_rename_005_neg', 'zfs_rename_006_pos', 'zfs_rename_007_pos', 'zfs_rename_008_pos', 'zfs_rename_009_neg', 'zfs_rename_010_neg', 'zfs_rename_011_pos', 'zfs_rename_012_neg', - 'zfs_rename_013_pos', 'zfs_rename_014_neg'] + 'zfs_rename_013_pos', 'zfs_rename_014_neg', 'zfs_rename_encrypted_child', + 'zfs_rename_to_encrypted'] [/opt/zfs-tests/tests/functional/cli_root/zfs_reservation] tests = ['zfs_reservation_001_pos', 'zfs_reservation_002_pos'] @@ -192,7 +205,7 @@ tests = ['zfs_rollback_001_pos', 'zfs_rollback_002_pos', [/opt/zfs-tests/tests/functional/cli_root/zfs_send] tests = ['zfs_send_001_pos', 'zfs_send_002_pos', 'zfs_send_003_pos', 'zfs_send_004_neg', 'zfs_send_005_pos', 'zfs_send_006_pos', - 'zfs_send_007_pos'] + 'zfs_send_007_pos', 'zfs_send_encrypted', 'zfs_send_raw'] [/opt/zfs-tests/tests/functional/cli_root/zfs_set] tests = ['cache_001_pos', 'cache_002_neg', 'canmount_001_pos', @@ -203,7 +216,7 @@ tests = ['cache_001_pos', 'cache_002_neg', 'canmount_001_pos', 'ro_props_001_pos', 'share_mount_001_neg', 'snapdir_001_pos', 'user_property_001_pos', 'user_property_002_pos', 'user_property_003_neg', 'user_property_004_pos', 'version_001_neg', 'zfs_set_001_neg', - 'zfs_set_002_neg', 'zfs_set_003_neg'] + 'zfs_set_002_neg', 'zfs_set_003_neg', 'zfs_set_keylocation'] [/opt/zfs-tests/tests/functional/cli_root/zfs_share] tests = ['zfs_share_001_pos', 'zfs_share_002_pos', 'zfs_share_003_pos', @@ -217,6 +230,9 @@ tests = ['zfs_snapshot_001_neg', 'zfs_snapshot_002_neg', 'zfs_snapshot_006_pos', 'zfs_snapshot_007_neg', 'zfs_snapshot_008_neg', 'zfs_snapshot_009_pos'] +[/opt/zfs-tests/tests/functional/cli_root/zfs_unload-key] +tests = ['zfs_unload-key', 'zfs_unload-key_all', 'zfs_unload-key_recursive'] + [/opt/zfs-tests/tests/functional/cli_root/zfs_unmount] tests = ['zfs_unmount_001_pos', 'zfs_unmount_002_pos', 'zfs_unmount_003_pos', 'zfs_unmount_004_pos', 'zfs_unmount_005_pos', 'zfs_unmount_006_pos', @@ -259,6 +275,7 @@ tests = ['zpool_create_001_pos', 'zpool_create_002_pos', 'zpool_create_018_pos', 'zpool_create_019_pos', 'zpool_create_020_pos', 'zpool_create_021_pos', 'zpool_create_022_pos', 'zpool_create_023_neg', 'zpool_create_024_pos', + 'zpool_create_encrypted', 'zpool_create_crypt_combos', 'zpool_create_features_001_pos', 'zpool_create_features_002_pos', 'zpool_create_features_003_pos', 'zpool_create_features_004_neg', 'zpool_create_tempname'] @@ -306,7 +323,8 @@ tests = ['zpool_import_001_pos', 'zpool_import_002_pos', 'import_rewind_config_changed', 'import_cachefile_shared_device', 'import_paths_changed', - 'import_devices_missing'] + 'import_devices_missing', + 'zpool_import_encrypted', 'zpool_import_encrypted_load'] [/opt/zfs-tests/tests/functional/cli_root/zpool_labelclear] tests = ['zpool_labelclear_active', 'zpool_labelclear_exported'] @@ -347,7 +365,8 @@ tags = ['functional', 'cli_root', 'zpool_resilver'] [/opt/zfs-tests/tests/functional/cli_root/zpool_scrub] tests = ['zpool_scrub_001_neg', 'zpool_scrub_002_pos', 'zpool_scrub_003_pos', 'zpool_scrub_004_pos', 'zpool_scrub_005_pos', 'zpool_scrub_print_repairing', - 'zpool_scrub_offline_device', 'zpool_scrub_multiple_copies'] + 'zpool_scrub_offline_device', 'zpool_scrub_multiple_copies', + 'zpool_scrub_encrypted_unloaded'] [/opt/zfs-tests/tests/functional/cli_root/zpool_set] tests = ['zpool_set_001_pos', 'zpool_set_002_neg', 'zpool_set_003_neg'] @@ -583,7 +602,10 @@ tests = ['rsend_001_pos', 'rsend_002_pos', 'rsend_003_pos', 'rsend_004_pos', 'send-c_lz4_disabled', 'send-c_recv_lz4_disabled', 'send-c_mixed_compression', 'send-c_stream_size_estimate', 'send-cD', 'send-c_embedded_blocks', 'send-c_resume', 'send-cpL_varied_recsize', - 'send-c_recv_dedup'] + 'send-c_recv_dedup', 'send_encrypted_files', 'send_encrypted_hierarchy', + 'send_encrypted_truncated_files', + 'send_realloc_encrypted_files', + 'send_mixed_raw'] [/opt/zfs-tests/tests/functional/scrub_mirror] tests = ['scrub_mirror_001_pos', 'scrub_mirror_002_pos', diff --git a/usr/src/test/zfs-tests/runfiles/openindiana.run b/usr/src/test/zfs-tests/runfiles/openindiana.run index f86d6d9a7b..451852e863 100644 --- a/usr/src/test/zfs-tests/runfiles/openindiana.run +++ b/usr/src/test/zfs-tests/runfiles/openindiana.run @@ -114,11 +114,16 @@ post = [/opt/zfs-tests/tests/functional/cli_root/zfs] tests = ['zfs_001_neg', 'zfs_002_pos', 'zfs_003_neg'] +[/opt/zfs-tests/tests/functional/cli_root/zfs_change-key] +tests = ['zfs_change-key', 'zfs_change-key_child', 'zfs_change-key_format', + 'zfs_change-key_inherit', 'zfs_change-key_load', 'zfs_change-key_location', + 'zfs_change-key_pbkdf2iters'] + [/opt/zfs-tests/tests/functional/cli_root/zfs_clone] tests = ['zfs_clone_001_neg', 'zfs_clone_002_pos', 'zfs_clone_003_pos', 'zfs_clone_004_pos', 'zfs_clone_005_pos', 'zfs_clone_006_pos', 'zfs_clone_007_pos', 'zfs_clone_008_neg', 'zfs_clone_009_neg', - 'zfs_clone_010_pos'] + 'zfs_clone_010_pos', 'zfs_clone_encrypted'] [/opt/zfs-tests/tests/functional/cli_root/zfs_copies] tests = ['zfs_copies_001_pos', 'zfs_copies_002_pos', 'zfs_copies_003_pos', @@ -129,7 +134,7 @@ tests = ['zfs_create_001_pos', 'zfs_create_002_pos', 'zfs_create_003_pos', 'zfs_create_004_pos', 'zfs_create_005_pos', 'zfs_create_006_pos', 'zfs_create_007_pos', 'zfs_create_008_neg', 'zfs_create_009_neg', 'zfs_create_010_neg', 'zfs_create_011_pos', 'zfs_create_012_pos', - 'zfs_create_013_pos'] + 'zfs_create_013_pos', 'zfs_create_encrypted', 'zfs_create_crypt_combos'] [/opt/zfs-tests/tests/functional/cli_root/zfs_destroy] tests = ['zfs_destroy_001_pos', 'zfs_destroy_002_pos', 'zfs_destroy_003_pos', @@ -147,12 +152,17 @@ tests = ['zfs_get_001_pos', 'zfs_get_002_pos', 'zfs_get_003_pos', [/opt/zfs-tests/tests/functional/cli_root/zfs_inherit] tests = ['zfs_inherit_001_neg', 'zfs_inherit_002_neg', 'zfs_inherit_003_pos'] +[/opt/zfs-tests/tests/functional/cli_root/zfs_load-key] +tests = ['zfs_load-key', 'zfs_load-key_all', 'zfs_load-key_file', + 'zfs_load-key_location', 'zfs_load-key_noop', 'zfs_load-key_recursive'] + [/opt/zfs-tests/tests/functional/cli_root/zfs_mount] tests = ['zfs_mount_001_pos', 'zfs_mount_002_pos', 'zfs_mount_003_pos', 'zfs_mount_004_pos', 'zfs_mount_005_pos', 'zfs_mount_006_pos', 'zfs_mount_007_pos', 'zfs_mount_008_pos', 'zfs_mount_009_neg', 'zfs_mount_010_neg', 'zfs_mount_011_neg', 'zfs_mount_012_neg', - 'zfs_mount_all_001_pos', 'zfs_mount_all_fail', 'zfs_mount_all_mountpoints'] + 'zfs_mount_all_001_pos', 'zfs_mount_all_fail', 'zfs_mount_all_mountpoints' + 'zfs_mount_encrypted'] [/opt/zfs-tests/tests/functional/cli_root/zfs_program] tests = ['zfs_program_json'] @@ -160,7 +170,7 @@ tests = ['zfs_program_json'] [/opt/zfs-tests/tests/functional/cli_root/zfs_promote] tests = ['zfs_promote_001_pos', 'zfs_promote_002_pos', 'zfs_promote_003_pos', 'zfs_promote_004_pos', 'zfs_promote_005_pos', 'zfs_promote_006_neg', - 'zfs_promote_007_neg', 'zfs_promote_008_pos'] + 'zfs_promote_007_neg', 'zfs_promote_008_pos', 'zfs_promote_encryptionroot'] [/opt/zfs-tests/tests/functional/cli_root/zfs_property] tests = ['zfs_written_property_001_pos'] @@ -170,7 +180,9 @@ tests = ['zfs_receive_001_pos', 'zfs_receive_002_pos', 'zfs_receive_003_pos', 'zfs_receive_005_neg', 'zfs_receive_006_pos', 'zfs_receive_007_neg', 'zfs_receive_008_pos', 'zfs_receive_009_neg', 'zfs_receive_010_pos', 'zfs_receive_011_pos', 'zfs_receive_012_pos', - 'zfs_receive_013_pos', 'zfs_receive_014_pos'] + 'zfs_receive_013_pos', 'zfs_receive_014_pos', + 'zfs_receive_from_encrypted', 'zfs_receive_raw', + 'zfs_receive_raw_incremental', 'zfs_receive_to_encrypted'] [/opt/zfs-tests/tests/functional/cli_root/zfs_remap] tests = ['zfs_remap_cliargs', 'zfs_remap_obsolete_counts'] @@ -180,7 +192,8 @@ tests = ['zfs_rename_001_pos', 'zfs_rename_002_pos', 'zfs_rename_003_pos', 'zfs_rename_004_neg', 'zfs_rename_005_neg', 'zfs_rename_006_pos', 'zfs_rename_007_pos', 'zfs_rename_008_pos', 'zfs_rename_009_neg', 'zfs_rename_010_neg', 'zfs_rename_011_pos', 'zfs_rename_012_neg', - 'zfs_rename_013_pos', 'zfs_rename_014_neg'] + 'zfs_rename_013_pos', 'zfs_rename_014_neg', 'zfs_rename_encrypted_child', + 'zfs_rename_to_encrypted'] [/opt/zfs-tests/tests/functional/cli_root/zfs_reservation] tests = ['zfs_reservation_001_pos', 'zfs_reservation_002_pos'] @@ -192,7 +205,7 @@ tests = ['zfs_rollback_001_pos', 'zfs_rollback_002_pos', [/opt/zfs-tests/tests/functional/cli_root/zfs_send] tests = ['zfs_send_001_pos', 'zfs_send_002_pos', 'zfs_send_003_pos', 'zfs_send_004_neg', 'zfs_send_005_pos', 'zfs_send_006_pos', - 'zfs_send_007_pos'] + 'zfs_send_007_pos', 'zfs_send_encrypted', 'zfs_send_raw'] [/opt/zfs-tests/tests/functional/cli_root/zfs_set] tests = ['cache_001_pos', 'cache_002_neg', 'canmount_001_pos', @@ -203,7 +216,7 @@ tests = ['cache_001_pos', 'cache_002_neg', 'canmount_001_pos', 'ro_props_001_pos', 'share_mount_001_neg', 'snapdir_001_pos', 'user_property_001_pos', 'user_property_002_pos', 'user_property_003_neg', 'user_property_004_pos', 'version_001_neg', 'zfs_set_001_neg', - 'zfs_set_002_neg', 'zfs_set_003_neg'] + 'zfs_set_002_neg', 'zfs_set_003_neg', 'zfs_set_keylocation'] [/opt/zfs-tests/tests/functional/cli_root/zfs_share] tests = ['zfs_share_001_pos', 'zfs_share_002_pos', 'zfs_share_003_pos', @@ -217,6 +230,9 @@ tests = ['zfs_snapshot_001_neg', 'zfs_snapshot_002_neg', 'zfs_snapshot_006_pos', 'zfs_snapshot_007_neg', 'zfs_snapshot_008_neg', 'zfs_snapshot_009_pos'] +[/opt/zfs-tests/tests/functional/cli_root/zfs_unload-key] +tests = ['zfs_unload-key', 'zfs_unload-key_all', 'zfs_unload-key_recursive'] + [/opt/zfs-tests/tests/functional/cli_root/zfs_unmount] tests = ['zfs_unmount_001_pos', 'zfs_unmount_002_pos', 'zfs_unmount_003_pos', 'zfs_unmount_004_pos', 'zfs_unmount_005_pos', 'zfs_unmount_006_pos', @@ -259,6 +275,7 @@ tests = ['zpool_create_001_pos', 'zpool_create_002_pos', 'zpool_create_018_pos', 'zpool_create_019_pos', 'zpool_create_020_pos', 'zpool_create_021_pos', 'zpool_create_022_pos', 'zpool_create_023_neg', 'zpool_create_024_pos', + 'zpool_create_encrypted', 'zpool_create_crypt_combos', 'zpool_create_features_001_pos', 'zpool_create_features_002_pos', 'zpool_create_features_003_pos', 'zpool_create_features_004_neg', 'zpool_create_tempname'] @@ -306,7 +323,8 @@ tests = ['zpool_import_001_pos', 'zpool_import_002_pos', 'import_rewind_config_changed', 'import_cachefile_shared_device', 'import_paths_changed', - 'import_devices_missing'] + 'import_devices_missing', + 'zpool_import_encrypted', 'zpool_import_encrypted_load'] [/opt/zfs-tests/tests/functional/cli_root/zpool_labelclear] tests = ['zpool_labelclear_active', 'zpool_labelclear_exported'] @@ -347,7 +365,8 @@ tags = ['functional', 'cli_root', 'zpool_resilver'] [/opt/zfs-tests/tests/functional/cli_root/zpool_scrub] tests = ['zpool_scrub_001_neg', 'zpool_scrub_002_pos', 'zpool_scrub_003_pos', 'zpool_scrub_004_pos', 'zpool_scrub_005_pos', 'zpool_scrub_print_repairing', - 'zpool_scrub_offline_device', 'zpool_scrub_multiple_copies'] + 'zpool_scrub_offline_device', 'zpool_scrub_multiple_copies', + 'zpool_scrub_encrypted_unloaded'] [/opt/zfs-tests/tests/functional/cli_root/zpool_set] tests = ['zpool_set_001_pos', 'zpool_set_002_neg', 'zpool_set_003_neg'] @@ -583,7 +602,10 @@ tests = ['rsend_001_pos', 'rsend_002_pos', 'rsend_003_pos', 'rsend_004_pos', 'send-c_lz4_disabled', 'send-c_recv_lz4_disabled', 'send-c_mixed_compression', 'send-c_stream_size_estimate', 'send-cD', 'send-c_embedded_blocks', 'send-c_resume', 'send-cpL_varied_recsize', - 'send-c_recv_dedup'] + 'send-c_recv_dedup', 'send_encrypted_files', 'send_encrypted_hierarchy', + 'send_encrypted_truncated_files', + 'send_realloc_encrypted_files', + 'send_mixed_raw'] [/opt/zfs-tests/tests/functional/scrub_mirror] tests = ['scrub_mirror_001_pos', 'scrub_mirror_002_pos', diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_change-key/Makefile b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_change-key/Makefile new file mode 100644 index 0000000000..5be730f3fa --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_change-key/Makefile @@ -0,0 +1,21 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2012, 2016 by Delphix. All rights reserved. +# + +include $(SRC)/Makefile.master + +ROOTOPTPKG = $(ROOT)/opt/zfs-tests +TARGETDIR = $(ROOTOPTPKG)/tests/functional/cli_root/zfs_change-key + +include $(SRC)/test/zfs-tests/Makefile.com diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_change-key/cleanup.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_change-key/cleanup.ksh new file mode 100644 index 0000000000..79cd6e9f90 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_change-key/cleanup.ksh @@ -0,0 +1,30 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# + +. $STF_SUITE/include/libtest.shlib + +default_cleanup diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_change-key/setup.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_change-key/setup.ksh new file mode 100644 index 0000000000..6a9af3bc28 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_change-key/setup.ksh @@ -0,0 +1,32 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# + +. $STF_SUITE/include/libtest.shlib + +DISK=${DISKS%% *} + +default_setup $DISK diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key.ksh new file mode 100644 index 0000000000..781caae5b5 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key.ksh @@ -0,0 +1,62 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017 Datto, Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib + +# +# DESCRIPTION: +# 'zfs change-key' should change the key material. +# +# STRATEGY: +# 1. Create an encrypted dataset +# 2. Attempt to change the key +# 3. Unmount the dataset and unload its key +# 4. Attempt to load the old key +# 5. Verify the key is not loaded +# 6. Attempt to load the new key +# 7. Verify the key is loaded +# + +verify_runnable "both" + +function cleanup +{ + datasetexists $TESTPOOL/$TESTFS1 && \ + log_must zfs destroy -f $TESTPOOL/$TESTFS1 +} +log_onexit cleanup + +log_assert "'zfs change-key' should change the key material" + +log_must eval "echo $PASSPHRASE1 | zfs create -o encryption=on" \ + "-o keyformat=passphrase -o keylocation=prompt $TESTPOOL/$TESTFS1" +log_must eval "echo $PASSPHRASE2 | zfs change-key $TESTPOOL/$TESTFS1" + +log_must zfs unmount $TESTPOOL/$TESTFS1 +log_must zfs unload-key $TESTPOOL/$TESTFS1 + +log_mustnot eval "echo $PASSPHRASE1 | zfs load-key $TESTPOOL/$TESTFS1" +log_must key_unavailable $TESTPOOL/$TESTFS1 + +log_must eval "echo $PASSPHRASE2 | zfs load-key $TESTPOOL/$TESTFS1" +log_must key_available $TESTPOOL/$TESTFS1 + +log_pass "'zfs change-key' changes the key material" diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_child.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_child.ksh new file mode 100644 index 0000000000..dda7c1df43 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_child.ksh @@ -0,0 +1,86 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017 Datto, Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib + +# +# DESCRIPTION: +# 'zfs change-key' should promote an encrypted child to an encryption root. +# +# STRATEGY: +# 1. Create an encrypted dataset +# 2. Create an encrypted child dataset +# 3. Attempt to change the key without any flags +# 4. Attempt to change the key specifying keylocation +# 5. Attempt to change the key specifying keyformat +# 6. Verify the new encryption root can unload and load its key +# 7. Recreate the child dataset +# 8. Attempt to change the key specifying both the keylocation and keyformat +# 9. Verify the new encryption root can unload and load its key +# + +verify_runnable "both" + +function cleanup +{ + datasetexists $TESTPOOL/$TESTFS1 && \ + log_must zfs destroy -r $TESTPOOL/$TESTFS1 +} + +log_onexit cleanup + +log_assert "'zfs change-key' should promote an encrypted child to an" \ + "encryption root" + +log_must eval "echo $PASSPHRASE1 | zfs create -o encryption=on" \ + "-o keyformat=passphrase -o keylocation=prompt $TESTPOOL/$TESTFS1" +log_must zfs create $TESTPOOL/$TESTFS1/child + +log_mustnot eval "echo $PASSPHRASE2 | zfs change-key" \ + "$TESTPOOL/$TESTFS1/child" + +log_mustnot eval "echo $PASSPHRASE2 | zfs change-key -o keylocation=prompt" \ + "$TESTPOOL/$TESTFS1/child" + +log_must eval "echo $PASSPHRASE2 | zfs change-key -o keyformat=passphrase" \ + "$TESTPOOL/$TESTFS1/child" + +log_must zfs unmount $TESTPOOL/$TESTFS1/child +log_must zfs unload-key $TESTPOOL/$TESTFS1/child +log_must key_unavailable $TESTPOOL/$TESTFS1/child + +log_must eval "echo $PASSPHRASE2 | zfs load-key $TESTPOOL/$TESTFS1/child" +log_must key_available $TESTPOOL/$TESTFS1/child + +log_must zfs destroy $TESTPOOL/$TESTFS1/child +log_must zfs create $TESTPOOL/$TESTFS1/child + +log_must eval "echo $PASSPHRASE2 | zfs change-key -o keyformat=passphrase" \ + "-o keylocation=prompt $TESTPOOL/$TESTFS1/child" + +log_must zfs unmount $TESTPOOL/$TESTFS1/child +log_must zfs unload-key $TESTPOOL/$TESTFS1/child +log_must key_unavailable $TESTPOOL/$TESTFS1/child + +log_must eval "echo $PASSPHRASE2 | zfs load-key $TESTPOOL/$TESTFS1/child" +log_must key_available $TESTPOOL/$TESTFS1/child + +log_pass "'zfs change-key' promotes an encrypted child to an encryption root" diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_format.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_format.ksh new file mode 100644 index 0000000000..1c68b8f4cc --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_format.ksh @@ -0,0 +1,72 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017 Datto, Inc. All rights reserved. +# Copyright (c) 2019 DilOS +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib + +# +# DESCRIPTION: +# 'zfs change-key -o' should change the key format. +# +# STRATEGY: +# 1. Create an encryption dataset with a passphrase key format +# 2. Unmount the dataset +# 3. Verify the key format is passphrase +# 4. Change the key format to hex +# 5. Verify the key format is hex +# 6. Attempt to reload the dataset's key +# 7. Change the key format to raw +# 8. Verify the key format is raw +# 9. Attempt to reload the dataset's key +# + +verify_runnable "both" + +function cleanup +{ + datasetexists $TESTPOOL/$TESTFS1 && \ + log_must zfs destroy -f $TESTPOOL/$TESTFS1 +} +log_onexit cleanup + +log_assert "'zfs change-key -o' should change the key format" + +log_must eval "echo $PASSPHRASE | zfs create -o encryption=on" \ + "-o keyformat=passphrase -o keylocation=prompt $TESTPOOL/$TESTFS1" +log_must zfs unmount $TESTPOOL/$TESTFS1 + +log_must verify_keyformat $TESTPOOL/$TESTFS1 "passphrase" + +log_must eval "echo $HEXKEY | zfs change-key -o keyformat=hex" \ + "$TESTPOOL/$TESTFS1" +log_must verify_keyformat $TESTPOOL/$TESTFS1 "hex" + +log_must zfs unload-key $TESTPOOL/$TESTFS1 +log_must eval "echo $HEXKEY | zfs load-key $TESTPOOL/$TESTFS1" + +log_must eval "echo $RAWKEY | tr -d '\n' | zfs change-key -o keyformat=raw" \ + "$TESTPOOL/$TESTFS1" +log_must verify_keyformat $TESTPOOL/$TESTFS1 "raw" + +log_must zfs unload-key $TESTPOOL/$TESTFS1 +log_must eval "echo $RAWKEY | tr -d '\n' | zfs load-key $TESTPOOL/$TESTFS1" + +log_pass "'zfs change-key -o' changes the key format" diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_inherit.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_inherit.ksh new file mode 100644 index 0000000000..94820c37ec --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_inherit.ksh @@ -0,0 +1,78 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017 Datto, Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib + +# +# DESCRIPTION: +# 'zfs change-key -i' should cause a dataset to inherit its parent key +# +# STRATEGY: +# 1. Create a parent encrypted dataset +# 2. Create a child dataset as an encryption root +# 3. Attempt to inherit the parent key +# 4. Verify the key is inherited +# 5. Unmount the parent and unload its key +# 6. Verify the key is unavailable for parent and child +# 7. Load the parent key +# 8. Verify the key is available for parent and child +# 9. Attempt to mount the datasets +# + +verify_runnable "both" + +function cleanup +{ + datasetexists $TESTPOOL/$TESTFS1 && \ + log_must zfs destroy -r $TESTPOOL/$TESTFS1 +} +log_onexit cleanup + +log_assert "'zfs change-key -i' should cause a dataset to inherit its" \ + "parent key" + +log_must eval "echo $PASSPHRASE | zfs create -o encryption=on" \ + "-o keyformat=passphrase -o keylocation=prompt $TESTPOOL/$TESTFS1" +log_must eval "echo $PASSPHRASE1 | zfs create -o encryption=on" \ + "-o keyformat=passphrase -o keylocation=prompt" \ + "$TESTPOOL/$TESTFS1/child" + +log_must verify_encryption_root $TESTPOOL/$TESTFS1/child \ + "$TESTPOOL/$TESTFS1/child" + +log_must zfs change-key -i $TESTPOOL/$TESTFS1/child +log_must verify_encryption_root $TESTPOOL/$TESTFS1/child "$TESTPOOL/$TESTFS1" + +log_must zfs unmount $TESTPOOL/$TESTFS1 +log_must zfs unload-key $TESTPOOL/$TESTFS1 + +log_must key_unavailable $TESTPOOL/$TESTFS1 +log_must key_unavailable $TESTPOOL/$TESTFS1/child + +log_must eval "echo $PASSPHRASE | zfs load-key $TESTPOOL/$TESTFS1" + +log_must key_available $TESTPOOL/$TESTFS1 +log_must key_available $TESTPOOL/$TESTFS1/child + +log_must zfs mount $TESTPOOL/$TESTFS1 +log_must zfs mount $TESTPOOL/$TESTFS1/child + +log_pass "'zfs change-key -i' causes a dataset to inherit its parent key" diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_load.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_load.ksh new file mode 100644 index 0000000000..4ed4aadfe0 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_load.ksh @@ -0,0 +1,58 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017 Datto, Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib + +# +# DESCRIPTION: +# 'zfs change-key -l' should load a dataset's key to change it. +# +# STRATEGY: +# 1. Create an encrypted dataset +# 2. Unload dataset and unload its key +# 3. Attempt to change the key +# 4. Verify the dataset key is loaded +# 3. Attempt to change the key +# + +verify_runnable "both" + +function cleanup +{ + datasetexists $TESTPOOL/$TESTFS1 && \ + log_must zfs destroy -f $TESTPOOL/$TESTFS1 +} +log_onexit cleanup + +log_assert "'zfs change-key -l' should load a dataset's key to change it" + +log_must eval "echo $PASSPHRASE > /$TESTPOOL/pkey" +log_must zfs create -o encryption=on -o keyformat=passphrase \ + -o keylocation=file:///$TESTPOOL/pkey $TESTPOOL/$TESTFS1 +log_must zfs unmount $TESTPOOL/$TESTFS1 +log_must zfs unload-key $TESTPOOL/$TESTFS1 + +log_must zfs change-key -l $TESTPOOL/$TESTFS1 +log_must key_available $TESTPOOL/$TESTFS1 + +log_must zfs change-key -l $TESTPOOL/$TESTFS1 + +log_pass "'zfs change-key -l' loads a dataset's key to change it" diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_location.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_location.ksh new file mode 100644 index 0000000000..5cbe34b269 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_location.ksh @@ -0,0 +1,65 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017 Datto, Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib + +# +# DESCRIPTION: +# 'zfs change-key -o' should change the keylocation. +# +# STRATEGY: +# 1. Create an encryption dataset with a file key location +# 2. Change the key location to 'prompt' +# 3. Verify the key location +# 4. Unmount the dataset and unload its key +# 5. Attempt to load the dataset's key +# 6. Attempt to change the key location to 'none' +# 7. Attempt to change the key location to an invalid value +# + +verify_runnable "both" + +function cleanup +{ + datasetexists $TESTPOOL/$TESTFS1 && \ + log_must zfs destroy -f $TESTPOOL/$TESTFS1 +} +log_onexit cleanup + +log_assert "'zfs change-key -o' should change the keylocation" + +log_must eval "echo $PASSPHRASE > /$TESTPOOL/pkey" +log_must zfs create -o encryption=on -o keyformat=passphrase \ + -o keylocation=file:///$TESTPOOL/pkey $TESTPOOL/$TESTFS1 +log_must verify_keylocation $TESTPOOL/$TESTFS1 "file:///$TESTPOOL/pkey" + +log_must eval "echo $PASSPHRASE1 | zfs change-key -o keylocation=prompt" \ + "$TESTPOOL/$TESTFS1" +log_must verify_keylocation $TESTPOOL/$TESTFS1 "prompt" + +log_must zfs unmount $TESTPOOL/$TESTFS1 +log_must zfs unload-key $TESTPOOL/$TESTFS1 +log_must eval "echo $PASSPHRASE1 | zfs load-key $TESTPOOL/$TESTFS1" + +log_mustnot zfs change-key -o keylocation=none $TESTPOOL/$TESTFS1 +log_mustnot zfs change-key -o keylocation=foobar $TESTPOOL/$TESTFS1 + +log_pass "'zfs change-key -o' changes the keylocation" diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_pbkdf2iters.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_pbkdf2iters.ksh new file mode 100644 index 0000000000..b1672248be --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_change-key/zfs_change-key_pbkdf2iters.ksh @@ -0,0 +1,75 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017 Datto, Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib + +# +# DESCRIPTION: +# 'zfs change-key -o' should change the pbkdf2 iterations. +# +# STRATEGY: +# 1. Create an encryption dataset with 200k PBKDF2 iterations +# 2. Unmount the dataset +# 3. Change the PBKDF2 iterations to 150k +# 4. Verify the PBKDF2 iterations +# 5. Unload the dataset's key +# 6. Attempt to load the dataset's key +# + +verify_runnable "both" + +function verify_pbkdf2iters +{ + typeset ds=$1 + typeset iterations=$2 + typeset iters=$(get_prop pbkdf2iters $ds) + + if [[ "$iters" != "$iterations" ]]; then + log_fail "Expected $iterations iterations, got $iters" + fi + + return 0 +} + +function cleanup +{ + datasetexists $TESTPOOL/$TESTFS1 && \ + log_must zfs destroy -f $TESTPOOL/$TESTFS1 +} +log_onexit cleanup + +log_assert "'zfs change-key -o' should change the pbkdf2 iterations" + +log_must eval "echo $PASSPHRASE > /$TESTPOOL/pkey" +log_must zfs create -o encryption=on -o keyformat=passphrase \ + -o keylocation=file:///$TESTPOOL/pkey -o pbkdf2iters=200000 \ + $TESTPOOL/$TESTFS1 + +log_must zfs unmount $TESTPOOL/$TESTFS1 +log_must verify_pbkdf2iters $TESTPOOL/$TESTFS1 "200000" + +log_must zfs change-key -o pbkdf2iters=150000 $TESTPOOL/$TESTFS1 +log_must verify_pbkdf2iters $TESTPOOL/$TESTFS1 "150000" + +log_must zfs unload-key $TESTPOOL/$TESTFS1 +log_must zfs load-key $TESTPOOL/$TESTFS1 + +log_pass "'zfs change-key -o' changes the pbkdf2 iterations" diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_clone/zfs_clone_encrypted.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_clone/zfs_clone_encrypted.ksh new file mode 100644 index 0000000000..86f335bde2 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_clone/zfs_clone_encrypted.ksh @@ -0,0 +1,83 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017 Datto, Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib + +# +# DESCRIPTION: +# 'zfs clone' should create encrypted clones of encrypted datasets +# +# STRATEGY: +# 1. Create an encrypted dataset +# 2. Create a snapshot of the dataset +# 3. Attempt to clone the snapshot as an unencrypted dataset +# 4. Attempt to clone the snapshot with a new key +# 5. Attempt to clone the snapshot as a child of an unencrypted dataset +# 6. Attempt to clone the snapshot as a child of an encrypted dataset +# 7. Verify the encryption root of the datasets +# 8. Unmount all datasets and unload their keys +# 9. Attempt to load the encryption root's key +# 10. Verify each dataset's key is loaded +# 11. Attempt to mount each dataset +# + +verify_runnable "both" + +function cleanup +{ + datasetexists $TESTPOOL/$TESTFS2 && \ + log_must zfs destroy -f $TESTPOOL/$TESTFS2 + datasetexists $TESTPOOL/$TESTFS1 && \ + log_must zfs destroy -r $TESTPOOL/$TESTFS1 +} +log_onexit cleanup + +log_assert "'zfs clone' should create encrypted clones of encrypted datasets" + +log_must eval "echo $PASSPHRASE | zfs create -o encryption=on" \ + "-o keyformat=passphrase -o keylocation=prompt $TESTPOOL/$TESTFS1" +log_must zfs snapshot $TESTPOOL/$TESTFS1@now + +log_mustnot zfs clone -o encryption=off $TESTPOOL/$TESTFS1@now \ + $TESTPOOL/$TESTFS2 +log_mustnot eval "echo $PASSPHRASE1 | zfs clone -o keyformat=passphrase" \ + "$TESTPOOL/$TESTFS1@now $TESTPOOL/$TESTFS2" +log_must zfs clone $TESTPOOL/$TESTFS1@now $TESTPOOL/$TESTFS2 +log_must zfs clone $TESTPOOL/$TESTFS1@now $TESTPOOL/$TESTFS1/child + +log_must verify_encryption_root $TESTPOOL/$TESTFS2 $TESTPOOL/$TESTFS1 +log_must verify_encryption_root $TESTPOOL/$TESTFS1/child $TESTPOOL/$TESTFS1 + +log_must zfs unmount $TESTPOOL/$TESTFS1 +log_must zfs unmount $TESTPOOL/$TESTFS2 +log_must zfs unload-key -a + +log_must eval "echo $PASSPHRASE | zfs load-key $TESTPOOL/$TESTFS1" + +log_must key_available $TESTPOOL/$TESTFS1 +log_must key_available $TESTPOOL/$TESTFS1/child +log_must key_available $TESTPOOL/$TESTFS2 + +log_must zfs mount $TESTPOOL/$TESTFS1 +log_must zfs mount $TESTPOOL/$TESTFS1/child +log_must zfs mount $TESTPOOL/$TESTFS2 + +log_pass "'zfs clone' creates encrypted clones of encrypted datasets" diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_crypt_combos.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_crypt_combos.ksh new file mode 100644 index 0000000000..91e4cbdada --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_crypt_combos.ksh @@ -0,0 +1,99 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017, Datto, Inc. All rights reserved. +# Copyright (c) 2019, DilOS +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/cli_root/zfs_create/zfs_create_common.kshlib +. $STF_SUITE/tests/functional/cli_root/zfs_create/properties.kshlib + +# +# DESCRIPTION: +# 'zfs create' should create an encrypted dataset with a valid encryption +# algorithm, key format, key location, and key. +# +# STRATEGY: +# 1. Create a filesystem for each combination of encryption type and key format +# 2. Verify that each filesystem has the correct properties set +# + +verify_runnable "both" + +function cleanup +{ + datasetexists $TESTPOOL/$TESTFS1 && \ + log_must zfs destroy -f $TESTPOOL/$TESTFS1 +} + +log_onexit cleanup + +set -A ENCRYPTION_ALGS \ + "encryption=on" \ + "encryption=aes-128-ccm" \ + "encryption=aes-192-ccm" \ + "encryption=aes-256-ccm" \ + "encryption=aes-128-gcm" \ + "encryption=aes-192-gcm" \ + "encryption=aes-256-gcm" + +set -A ENCRYPTION_PROPS \ + "encryption=aes-256-ccm" \ + "encryption=aes-128-ccm" \ + "encryption=aes-192-ccm" \ + "encryption=aes-256-ccm" \ + "encryption=aes-128-gcm" \ + "encryption=aes-192-gcm" \ + "encryption=aes-256-gcm" + +set -A KEYFORMATS "keyformat=raw" \ + "keyformat=hex" \ + "keyformat=passphrase" + +set -A USER_KEYS "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz" \ + "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb" \ + "abcdefgh" + +log_assert "'zfs create' should create encrypted datasets using all" \ + "combinations of supported properties" + +typeset -i i=0 +while (( i < ${#ENCRYPTION_ALGS[*]} )); do + typeset -i j=0 + while (( j < ${#KEYFORMATS[*]} )); do + log_must eval "echo ${USER_KEYS[j]} | tr -d '\n' | zfs create" \ + "-o ${ENCRYPTION_ALGS[i]} -o ${KEYFORMATS[j]}" \ + "$TESTPOOL/$TESTFS1" + + datasetexists $TESTPOOL/$TESTFS1 || \ + log_fail "Failed to create dataset using" \ + "${ENCRYPTION_ALGS[i]} and ${KEYFORMATS[j]}" + + propertycheck $TESTPOOL/$TESTFS1 ${ENCRYPTION_PROPS[i]} || \ + log_fail "failed to set ${ENCRYPTION_ALGS[i]}" + propertycheck $TESTPOOL/$TESTFS1 ${KEYFORMATS[j]} || \ + log_fail "failed to set ${KEYFORMATS[j]}" + + log_must zfs destroy -f $TESTPOOL/$TESTFS1 + (( j = j + 1 )) + done + (( i = i + 1 )) +done + +log_pass "'zfs create' creates encrypted datasets using all combinations of" \ + "supported properties" diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_encrypted.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_encrypted.ksh new file mode 100644 index 0000000000..9d5ecab0df --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_create/zfs_create_encrypted.ksh @@ -0,0 +1,134 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017, Datto, Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/cli_root/zfs_create/zfs_create_common.kshlib +. $STF_SUITE/tests/functional/cli_root/zfs_create/properties.kshlib +. $STF_SUITE/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib + +# +# DESCRIPTION: +# ZFS should create datasets only if they have a valid combination of +# encryption properties set. +# +# penc = parent encrypted +# enc = encryption +# loc = keylocation provided +# fmt = keyformat provided +# +# penc enc fmt loc valid notes +# ------------------------------------------- +# no unspec 0 0 yes inherit no encryption (not tested here) +# no unspec 0 1 no no crypt specified +# no unspec 1 0 no no crypt specified +# no unspec 1 1 no no crypt specified +# no off 0 0 yes explicit no encryption +# no off 0 1 no keylocation given, but crypt off +# no off 1 0 no keyformat given, but crypt off +# no off 1 1 no keyformat given, but crypt off +# no on 0 0 no no keyformat specified for new key +# no on 0 1 no no keyformat specified for new key +# no on 1 0 yes new encryption root +# no on 1 1 yes new encryption root +# yes unspec 0 0 yes inherit encryption +# yes unspec 0 1 no no keyformat specified +# yes unspec 1 0 yes new encryption root, crypt inherited +# yes unspec 1 1 yes new encryption root, crypt inherited +# yes off 0 0 no unencrypted child of encrypted parent +# yes off 0 1 no unencrypted child of encrypted parent +# yes off 1 0 no unencrypted child of encrypted parent +# yes off 1 1 no unencrypted child of encrypted parent +# yes on 0 0 yes inherited encryption, local crypt +# yes on 0 1 no no keyformat specified for new key +# yes on 1 0 yes new encryption root +# yes on 1 1 yes new encryption root +# +# STRATEGY: +# 1. Attempt to create a dataset using all combinations of encryption +# properties +# + +verify_runnable "both" + +function cleanup +{ + datasetexists $TESTPOOL/$TESTFS1 && \ + log_must zfs destroy -r $TESTPOOL/$TESTFS1 + datasetexists $TESTPOOL/$TESTFS2 && \ + log_must zfs destroy -r $TESTPOOL/$TESTFS2 +} +log_onexit cleanup + +log_assert "ZFS should create datasets only if they have a valid" \ + "combination of encryption properties set." + +# Unencrypted parent +log_must zfs create $TESTPOOL/$TESTFS1 +log_mustnot zfs create -o keyformat=passphrase $TESTPOOL/$TESTFS1/c1 +log_mustnot zfs create -o keylocation=prompt $TESTPOOL/$TESTFS1/c1 +log_mustnot zfs create -o keyformat=passphrase -o keylocation=prompt \ + $TESTPOOL/$TESTFS1/c1 + +log_must zfs create -o encryption=off $TESTPOOL/$TESTFS1/c1 +log_mustnot zfs create -o encryption=off -o keylocation=prompt \ + $TESTPOOL/$TESTFS1/c2 +log_mustnot zfs create -o encryption=off -o keyformat=passphrase \ + $TESTPOOL/$TESTFS1/c2 +log_mustnot zfs create -o encryption=off -o keyformat=passphrase \ + -o keylocation=prompt $TESTPOOL/$TESTFS1/c2 + +log_mustnot zfs create -o encryption=on $TESTPOOL/$TESTFS1/c2 +log_mustnot zfs create -o encryption=on -o keylocation=prompt \ + $TESTPOOL/$TESTFS1/c2 +log_must eval "echo $PASSPHRASE | zfs create -o encryption=on" \ + "-o keyformat=passphrase $TESTPOOL/$TESTFS1/c3" +log_must eval "echo $PASSPHRASE | zfs create -o encryption=on" \ + "-o keyformat=passphrase -o keylocation=prompt $TESTPOOL/$TESTFS1/c4" + +# Encrypted parent +log_must eval "echo $PASSPHRASE | zfs create -o encryption=on" \ + "-o keyformat=passphrase $TESTPOOL/$TESTFS2" + +log_must zfs create $TESTPOOL/$TESTFS2/c1 +log_mustnot zfs create -o keylocation=prompt $TESTPOOL/$TESTFS2/c2 +log_must eval "echo $PASSPHRASE | zfs create -o keyformat=passphrase" \ + "$TESTPOOL/$TESTFS2/c3" +log_must eval "echo $PASSPHRASE | zfs create -o keyformat=passphrase" \ + "-o keylocation=prompt $TESTPOOL/$TESTFS2/c4" + +log_mustnot zfs create -o encryption=off $TESTPOOL/$TESTFS2/c5 +log_mustnot zfs create -o encryption=off -o keylocation=prompt \ + $TESTPOOL/$TESTFS2/c5 +log_mustnot zfs create -o encryption=off -o keyformat=passphrase \ + $TESTPOOL/$TESTFS2/c5 +log_mustnot zfs create -o encryption=off -o keyformat=passphrase \ + -o keylocation=prompt $TESTPOOL/$TESTFS2/c5 + +log_must eval "echo $PASSPHRASE | zfs create -o encryption=on" \ + "$TESTPOOL/$TESTFS2/c5" +log_mustnot zfs create -o encryption=on -o keylocation=prompt \ + $TESTPOOL/$TESTFS2/c6 +log_must eval "echo $PASSPHRASE | zfs create -o encryption=on" \ + "-o keyformat=passphrase $TESTPOOL/$TESTFS2/c6" +log_must eval "echo $PASSPHRASE | zfs create -o encryption=on" \ + "-o keyformat=passphrase -o keylocation=prompt $TESTPOOL/$TESTFS2/c7" + +log_pass "ZFS creates datasets only if they have a valid combination of" \ + "encryption properties set." diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_destroy/zfs_destroy_003_pos.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_destroy/zfs_destroy_003_pos.ksh index 5ffe51ebef..d41dc9b5bc 100644 --- a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_destroy/zfs_destroy_003_pos.ksh +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_destroy/zfs_destroy_003_pos.ksh @@ -134,8 +134,8 @@ log_note "Verify that 'zfs destroy -r' fails to destroy dataset " \ for obj in $child_fs $child_fs1 $ctr $ctr1; do log_mustnot zfs destroy -r $obj datasetexists $obj || \ - log_fail "'zfs destroy -r' fails to keep clone " \ - "dependant outside the hirearchy." + log_fail "'zfs destroy -r' fails to keep dependent " \ + "clone outside the hierarchy." done diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_load-key/Makefile b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_load-key/Makefile new file mode 100644 index 0000000000..0751428a0d --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_load-key/Makefile @@ -0,0 +1,21 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2012, 2016 by Delphix. All rights reserved. +# + +include $(SRC)/Makefile.master + +ROOTOPTPKG = $(ROOT)/opt/zfs-tests +TARGETDIR = $(ROOTOPTPKG)/tests/functional/cli_root/zfs_load-key + +include $(SRC)/test/zfs-tests/Makefile.com diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_load-key/cleanup.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_load-key/cleanup.ksh new file mode 100644 index 0000000000..79cd6e9f90 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_load-key/cleanup.ksh @@ -0,0 +1,30 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# + +. $STF_SUITE/include/libtest.shlib + +default_cleanup diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_load-key/setup.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_load-key/setup.ksh new file mode 100644 index 0000000000..6a9af3bc28 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_load-key/setup.ksh @@ -0,0 +1,32 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# + +. $STF_SUITE/include/libtest.shlib + +DISK=${DISKS%% *} + +default_setup $DISK diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key.cfg b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key.cfg new file mode 100644 index 0000000000..90d9f63f1d --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key.cfg @@ -0,0 +1,26 @@ +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017 Datto, Inc. All rights reserved. +# + +export PASSPHRASE="password" +export PASSPHRASE1="password1" +export PASSPHRASE2="password2" +export HEXKEY="000102030405060708090A0B0C0D0E0F101112131415161718191A1B1C1D1E1F" +export HEXKEY1="201F1E1D1C1B1A191817161514131211100F0E0D0C0B0A090807060504030201" +export RAWKEY="aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" +export RAWKEY1="bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb" diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key.ksh new file mode 100644 index 0000000000..847a6aabd3 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key.ksh @@ -0,0 +1,85 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017 Datto, Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib + +# +# DESCRIPTION: +# 'zfs load-key' should only load a key for an unloaded encrypted dataset. +# +# STRATEGY: +# 1. Attempt to load the default dataset's key +# 2. Unmount the dataset +# 3. Attempt to load the default dataset's key +# 4. Create an encrypted dataset +# 5. Unmount the dataset and unload its key +# 6. Attempt to load the dataset's key +# 7. Verify the dataset's key is loaded +# 8. Attempt to load the dataset's key again +# 9. Create an encrypted pool +# 10. Unmount the pool and unload its key +# 11. Attempt to load the pool's key +# 12. Verify the pool's key is loaded +# 13. Attempt to load the pool's key again +# + +verify_runnable "both" + +function cleanup +{ + datasetexists $TESTPOOL/$TESTFS1 && \ + log_must zfs destroy $TESTPOOL/$TESTFS1 + poolexists $TESTPOOL1 && log_must destroy_pool $TESTPOOL1 +} +log_onexit cleanup + +log_assert "'zfs load-key' should only load the key for an" \ + "unloaded encrypted dataset" + +log_mustnot eval "echo $PASSPHRASE | zfs load-key $TESTPOOL/$TESTFS" + +log_must zfs unmount $TESTPOOL/$TESTFS +log_mustnot eval "echo $PASSPHRASE | zfs load-key $TESTPOOL/$TESTFS" + +log_must eval "echo $PASSPHRASE | zfs create -o encryption=on" \ + "-o keyformat=passphrase -o keylocation=prompt $TESTPOOL/$TESTFS1" + +log_must zfs unmount $TESTPOOL/$TESTFS1 +log_must zfs unload-key $TESTPOOL/$TESTFS1 + +log_must eval "echo $PASSPHRASE | zfs load-key $TESTPOOL/$TESTFS1" +log_must key_available $TESTPOOL/$TESTFS1 + +log_mustnot eval "echo $PASSPHRASE | zfs load-key $TESTPOOL/$TESTFS1" + +typeset DISK2="$(echo $DISKS | awk '{ print $2 }')" +log_must eval "echo $PASSPHRASE | zpool create -O encryption=on" \ + "-O keyformat=passphrase -O keylocation=prompt $TESTPOOL1 $DISK2" + +log_must zfs unmount $TESTPOOL1 +log_must zfs unload-key $TESTPOOL1 + +log_must eval "echo $PASSPHRASE | zfs load-key $TESTPOOL1" +log_must key_available $TESTPOOL1 + +log_mustnot eval "echo $PASSPHRASE | zfs load-key $TESTPOOL1" + +log_pass "'zfs load-key' only loads the key for an unloaded encrypted dataset" diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_all.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_all.ksh new file mode 100644 index 0000000000..5e331fd120 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_all.ksh @@ -0,0 +1,77 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017 Datto, Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib + +# +# DESCRIPTION: +# 'zfs load-key -a' should load keys for all datasets. +# +# STRATEGY: +# 1. Create an encrypted filesystem, encrypted zvol, and an encrypted pool +# 2. Unmount all datasets and unload their keys +# 3. Attempt to load all dataset keys +# 4. Verify each dataset has its key loaded +# 5. Attempt to mount the pool and filesystem +# + +verify_runnable "both" + +function cleanup +{ + datasetexists $TESTPOOL/$TESTFS1 && \ + log_must zfs destroy $TESTPOOL/$TESTFS1 + datasetexists $TESTPOOL/zvol && log_must zfs destroy $TESTPOOL/zvol + poolexists $TESTPOOL1 && log_must destroy_pool $TESTPOOL1 +} +log_onexit cleanup + +log_assert "'zfs load-key -a' should load keys for all datasets" + +log_must eval "echo $PASSPHRASE1 > /$TESTPOOL/pkey" +log_must zfs create -o encryption=on -o keyformat=passphrase \ + -o keylocation=file:///$TESTPOOL/pkey $TESTPOOL/$TESTFS1 + +log_must zfs create -V 64M -o encryption=on -o keyformat=passphrase \ + -o keylocation=file:///$TESTPOOL/pkey $TESTPOOL/zvol + +typeset DISK2="$(echo $DISKS | awk '{ print $2}')" +log_must zpool create -O encryption=on -O keyformat=passphrase \ + -O keylocation=file:///$TESTPOOL/pkey $TESTPOOL1 $DISK2 + +log_must zfs unmount $TESTPOOL/$TESTFS1 +log_must zfs unload-key $TESTPOOL/$TESTFS1 + +log_must zfs unload-key $TESTPOOL/zvol + +log_must zfs unmount $TESTPOOL1 +log_must zfs unload-key $TESTPOOL1 + +log_must zfs load-key -a + +log_must key_available $TESTPOOL1 +log_must key_available $TESTPOOL/zvol +log_must key_available $TESTPOOL/$TESTFS1 + +log_must zfs mount $TESTPOOL1 +log_must zfs mount $TESTPOOL/$TESTFS1 + +log_pass "'zfs load-key -a' loads keys for all datasets" diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib new file mode 100644 index 0000000000..627b68267e --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib @@ -0,0 +1,102 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017 Datto, Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/cli_root/zfs_load-key/zfs_load-key.cfg + +# Return 0 is a dataset key is available, 1 otherwise +# +# $1 - dataset +# +function key_available +{ + typeset ds=$1 + + datasetexists $ds || return 1 + + typeset val=$(get_prop keystatus $ds) + if [[ "$val" == "none" ]]; then + log_note "Dataset $ds is not encrypted" + elif [[ "$val" == "available" ]]; then + return 0 + fi + + return 1 +} + +function key_unavailable +{ + key_available $1 && return 1 + return 0 +} + +function verify_keyformat +{ + typeset ds=$1 + typeset format=$2 + typeset fmt=$(get_prop keyformat $ds) + + if [[ "$fmt" != "$format" ]]; then + log_fail "Expected keyformat $format, got $fmt" + fi + + return 0 +} + +function verify_keylocation +{ + typeset ds=$1 + typeset location=$2 + typeset keyloc=$(get_prop keylocation $ds) + + if [[ "$keyloc" != "$location" ]]; then + log_fail "Expected keylocation $location, got $keyloc" + fi + + return 0 +} + +function verify_encryption_root +{ + typeset ds=$1 + typeset val=$2 + typeset eroot=$(get_prop encryptionroot $ds) + + if [[ "$eroot" != "$val" ]]; then + log_note "Expected encryption root '$val', got '$eroot'" + return 1 + fi + + return 0 +} + +function verify_origin +{ + typeset ds=$1 + typeset val=$2 + typeset orig=$(get_prop origin $ds) + + if [[ "$orig" != "$val" ]]; then + log_note "Expected origin '$val', got '$orig'" + return 1 + fi + + return 0 +} diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_file.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_file.ksh new file mode 100644 index 0000000000..7cbda43ff2 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_file.ksh @@ -0,0 +1,58 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017 Datto, Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib + +# +# DESCRIPTION: +# 'zfs load-key' should load a dataset's key from a file. +# +# STRATEGY: +# 1. Create an encrypted dataset with a key file +# 2. Unmount the dataset and unload the key +# 3. Attempt to load the dataset's key +# 4. Verify the key is loaded +# 5. Attempt to mount the dataset +# + +verify_runnable "both" + +function cleanup +{ + datasetexists $TESTPOOL/$TESTFS1 && \ + log_must zfs destroy $TESTPOOL/$TESTFS1 +} +log_onexit cleanup + +log_assert "'zfs load-key' should load a key from a file" + +log_must eval "echo $PASSPHRASE > /$TESTPOOL/pkey" +log_must zfs create -o encryption=on -o keyformat=passphrase \ + -o keylocation=file:///$TESTPOOL/pkey $TESTPOOL/$TESTFS1 + +log_must zfs unmount $TESTPOOL/$TESTFS1 +log_must zfs unload-key $TESTPOOL/$TESTFS1 + +log_must zfs load-key $TESTPOOL/$TESTFS1 +log_must key_available $TESTPOOL/$TESTFS1 +log_must zfs mount $TESTPOOL/$TESTFS1 + +log_pass "'zfs load-key' loads a key from a file" diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_location.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_location.ksh new file mode 100644 index 0000000000..d0b1cdb20e --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_location.ksh @@ -0,0 +1,73 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017 Datto, Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib + +# +# DESCRIPTION: +# 'zfs load-key -L' should override keylocation with provided value. +# +# STRATEGY: +# 1. Create a key file +# 2. Copy the key file to another location +# 3. Create an encrypted dataset using the keyfile +# 4. Unmount the dataset and unload its key +# 5. Attempt to load the dataset specifying a keylocation of file +# 6. Verify the key is loaded +# 7. Verify the keylocation is the original key file +# 8. Unload the dataset's key +# 9. Attempt to load the dataset specifying a keylocation of prompt +# 10. Verify the key is loaded +# 11. Verify the keylocation is the original key file +# + +verify_runnable "both" + +function cleanup +{ + datasetexists $TESTPOOL/$TESTFS1 && \ + log_must zfs destroy $TESTPOOL/$TESTFS1 +} +log_onexit cleanup + +log_assert "'zfs load-key -L' should override keylocation with provided value" + +typeset key_location="/$TESTPOOL/pkey1" + +log_must eval "echo $PASSPHRASE > $key_location" +log_must cp $key_location /$TESTPOOL/pkey2 + +log_must zfs create -o encryption=on -o keyformat=passphrase \ + -o keylocation=file://$key_location $TESTPOOL/$TESTFS1 + +log_must zfs unmount $TESTPOOL/$TESTFS1 +log_must zfs unload-key $TESTPOOL/$TESTFS1 + +log_must zfs load-key -L file:///$TESTPOOL/pkey2 $TESTPOOL/$TESTFS1 +log_must key_available $TESTPOOL/$TESTFS1 +log_must verify_keylocation $TESTPOOL/$TESTFS1 "file://$key_location" + +log_must zfs unload-key $TESTPOOL/$TESTFS1 +log_must eval "echo $PASSPHRASE | zfs load-key -L prompt $TESTPOOL/$TESTFS1" +log_must key_available $TESTPOOL/$TESTFS1 +log_must verify_keylocation $TESTPOOL/$TESTFS1 "file://$key_location" + +log_pass "'zfs load-key -L' overrides keylocation with provided value" diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_noop.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_noop.ksh new file mode 100644 index 0000000000..bfce786448 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_noop.ksh @@ -0,0 +1,54 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017 Datto, Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib + +# +# DESCRIPTION: +# 'zfs load-key -n' should load the key for an already loaded dataset. +# +# STRATEGY: +# 1. Create an encrypted dataset +# 2. Attempt to load the dataset's key +# 3. Verify the key is loaded +# 4. Attempt to load the dataset's key with an invalid key +# + +verify_runnable "both" + +function cleanup +{ + datasetexists $TESTPOOL/$TESTFS1 && \ + log_must zfs destroy $TESTPOOL/$TESTFS1 +} +log_onexit cleanup + +log_assert "'zfs load-key -n' should load the key for a loaded dataset" + +log_must eval "echo $PASSPHRASE | zfs create -o encryption=on" \ + "-o keyformat=passphrase -o keylocation=prompt $TESTPOOL/$TESTFS1" + +log_must eval "echo $PASSPHRASE | zfs load-key -n $TESTPOOL/$TESTFS1" +log_must key_available $TESTPOOL/$TESTFS1 + +log_mustnot eval "echo $PASSPHRASE1 | zfs load-key -n $TESTPOOL/$TESTFS1" + +log_pass "'zfs load-key -n' loads the key for a loaded dataset" diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_recursive.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_recursive.ksh new file mode 100644 index 0000000000..7385b69cf5 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_load-key/zfs_load-key_recursive.ksh @@ -0,0 +1,66 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017 Datto, Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib + +# +# DESCRIPTION: +# 'zfs load-key -r' should recursively load keys. +# +# STRATEGY: +# 1. Create an encrypted dataset +# 2. Create a child dataset as an encryption root +# 3. Unmount all datasets and unload their keys +# 4. Attempt to load all dataset keys +# 5. Verify each dataset has its key loaded +# 6. Attempt to mount each dataset +# + +verify_runnable "both" + +function cleanup +{ + datasetexists $TESTPOOL/$TESTFS1 && \ + log_must zfs destroy -r $TESTPOOL/$TESTFS1 +} +log_onexit cleanup + +log_assert "'zfs load-key -r' should recursively load keys" + +log_must eval "echo $PASSPHRASE1 > /$TESTPOOL/pkey" +log_must zfs create -o encryption=on -o keyformat=passphrase \ + -o keylocation=file:///$TESTPOOL/pkey $TESTPOOL/$TESTFS1 + +log_must zfs create -o keyformat=passphrase \ + -o keylocation=file:///$TESTPOOL/pkey $TESTPOOL/$TESTFS1/child + +log_must zfs unmount $TESTPOOL/$TESTFS1 +log_must zfs unload-key $TESTPOOL/$TESTFS1/child +log_must zfs unload-key $TESTPOOL/$TESTFS1 + +log_must zfs load-key -r $TESTPOOL +log_must key_available $TESTPOOL/$TESTFS1 +log_must key_available $TESTPOOL/$TESTFS1/child + +log_must zfs mount $TESTPOOL/$TESTFS1 +log_must zfs mount $TESTPOOL/$TESTFS1/child + +log_pass "'zfs load-key -r' recursively loads keys" diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_encrypted.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_encrypted.ksh new file mode 100644 index 0000000000..9749a9b3aa --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_encrypted.ksh @@ -0,0 +1,69 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017, Datto, Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib + +# +# DESCRIPTION: +# 'zfs mount -l' should accept a valid key as it mounts the filesystem. +# +# STRATEGY: +# 1. Create an encrypted dataset +# 2. Unmount and unload the dataset's key +# 3. Verify the key is unloaded +# 4. Attempt to mount all datasets in the pool +# 5. Verify that no error code is produced +# 6. Verify that the encrypted dataset is not mounted +# 7. Attempt to load the key while mounting the dataset +# 8. Verify the key is loaded +# 9. Verify the dataset is mounted +# + +verify_runnable "both" + +function cleanup +{ + datasetexists $TESTPOOL/$TESTFS1 && \ + log_must zfs destroy -f $TESTPOOL/$TESTFS1 +} + +log_onexit cleanup + +log_assert "'zfs mount -l' should properly load a valid wrapping key" + +log_must eval "echo $PASSPHRASE | zfs create -o encryption=on" \ + "-o keyformat=passphrase $TESTPOOL/$TESTFS1" + +log_must zfs unmount $TESTPOOL/$TESTFS1 +log_must zfs unload-key $TESTPOOL/$TESTFS1 +log_must key_unavailable $TESTPOOL/$TESTFS1 + +log_must zfs mount -a +unmounted $TESTPOOL/$TESTFS1 || \ + log_fail "Filesystem $TESTPOOL/$TESTFS1 is mounted" + +log_must eval "echo $PASSPHRASE | zfs mount -l $TESTPOOL/$TESTFS1" +log_must key_available $TESTPOOL/$TESTFS1 + +mounted $TESTPOOL/$TESTFS1 || \ + log_fail "Filesystem $TESTPOOL/$TESTFS1 is unmounted" + +log_pass "'zfs mount -l' properly loads a valid wrapping key" diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_promote/zfs_promote_encryptionroot.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_promote/zfs_promote_encryptionroot.ksh new file mode 100644 index 0000000000..336c7b2538 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_promote/zfs_promote_encryptionroot.ksh @@ -0,0 +1,80 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017 Datto, Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib + +# +# DESCRIPTION: +# ZFS must promote clones of an encryption root. +# +# STRATEGY: +# 1. Create an encrypted dataset +# 2. Clone the encryption root +# 3. Clone the clone +# 4. Verify the encryption root of all three datasets is the origin +# 5. Promote the clone of the clone +# 6. Verify the encryption root of all three datasets is still the origin +# 7. Promote the clone of the original encryption root +# 8. Verify the encryption root of all three datasets is the promoted dataset +# + +verify_runnable "both" + +function cleanup +{ + datasetexists $TESTPOOL/$TESTFS1 && \ + log_must zfs destroy -Rf $TESTPOOL/$TESTFS1 + datasetexists $TESTPOOL/clone1 && \ + log_must zfs destroy -Rf $TESTPOOL/clone1 + datasetexists $TESTPOOL/clone2 && \ + log_must zfs destroy -Rf $TESTPOOL/clone2 +} +log_onexit cleanup + +log_assert "ZFS must promote clones of an encryption root" + +passphrase="password" +snaproot="$TESTPOOL/$TESTFS1@snap1" +snapclone="$TESTPOOL/clone1@snap2" + +log_must eval "echo $passphrase | zfs create -o encryption=on" \ + "-o keyformat=passphrase $TESTPOOL/$TESTFS1" + +log_must zfs snap $snaproot +log_must zfs clone $snaproot $TESTPOOL/clone1 +log_must zfs snap $snapclone +log_must zfs clone $snapclone $TESTPOOL/clone2 + +log_must verify_encryption_root $TESTPOOL/$TESTFS1 $TESTPOOL/$TESTFS1 +log_must verify_encryption_root $TESTPOOL/clone1 $TESTPOOL/$TESTFS1 +log_must verify_encryption_root $TESTPOOL/clone2 $TESTPOOL/$TESTFS1 + +log_must zfs promote $TESTPOOL/clone2 +log_must verify_encryption_root $TESTPOOL/$TESTFS1 $TESTPOOL/$TESTFS1 +log_must verify_encryption_root $TESTPOOL/clone1 $TESTPOOL/$TESTFS1 +log_must verify_encryption_root $TESTPOOL/clone2 $TESTPOOL/$TESTFS1 + +log_must zfs promote $TESTPOOL/clone2 +log_must verify_encryption_root $TESTPOOL/$TESTFS1 $TESTPOOL/clone2 +log_must verify_encryption_root $TESTPOOL/clone1 $TESTPOOL/clone2 +log_must verify_encryption_root $TESTPOOL/clone2 $TESTPOOL/clone2 + +log_pass "ZFS promotes clones of an encryption root" diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_receive/setup.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_receive/setup.ksh index eb00235877..486513256e 100644 --- a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_receive/setup.ksh +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_receive/setup.ksh @@ -28,6 +28,7 @@ . $STF_SUITE/include/libtest.shlib DISK=${DISKS%% *} + if is_global_zone; then default_volume_setup $DISK else diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_from_encrypted.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_from_encrypted.ksh new file mode 100644 index 0000000000..5eee9eecf4 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_from_encrypted.ksh @@ -0,0 +1,83 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017 Datto, Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib + +# +# DESCRIPTION: +# ZFS should receive an unencrypted stream from an encrypted dataset +# +# STRATEGY: +# 1. Create an unencrypted dataset +# 2. Create an encrypted dataset +# 3. Create and checksum a file on the encrypted dataset +# 4. Snapshot the encrypted dataset +# 5. Attempt to receive the snapshot into an unencrypted child +# 6. Verify encryption is not enabled +# 7. Verify the cheksum of the file is the same as the original +# 8. Attempt to receive the snapshot into an encrypted child +# 9. Verify the cheksum of the file is the same as the original +# + +verify_runnable "both" + +function cleanup +{ + datasetexists $TESTPOOL/$TESTFS1 && \ + log_must zfs destroy -r $TESTPOOL/$TESTFS1 + + datasetexists $TESTPOOL/$TESTFS2 && \ + log_must zfs destroy -r $TESTPOOL/$TESTFS2 +} + +log_onexit cleanup + +log_assert "ZFS should receive an unencrypted stream from an encrypted dataset" + +typeset passphrase="password" +typeset snap="$TESTPOOL/$TESTFS2@snap" + +log_must zfs create $TESTPOOL/$TESTFS1 +log_must eval "echo $passphrase | zfs create -o encryption=on" \ + "-o keyformat=passphrase $TESTPOOL/$TESTFS2" + +log_must mkfile 1M /$TESTPOOL/$TESTFS2/$TESTFILE0 +typeset checksum=$(md5sum /$TESTPOOL/$TESTFS2/$TESTFILE0 | awk '{ print $1 }') + +log_must zfs snapshot $snap + +log_note "Verify ZFS can receive into an unencrypted child" +log_must eval "zfs send $snap | zfs receive $TESTPOOL/$TESTFS1/c1" + +crypt=$(get_prop encryption $TESTPOOL/$TESTFS1/c1) +[[ "$crypt" == "off" ]] || log_fail "Received unencrypted stream as encrypted" + +typeset cksum1=$(md5sum /$TESTPOOL/$TESTFS1/c1/$TESTFILE0 | awk '{ print $1 }') +[[ "$cksum1" == "$checksum" ]] || \ + log_fail "Checksums differ ($cksum1 != $checksum)" + +log_note "Verify ZFS can receive into an encrypted child" +log_must eval "zfs send $snap | zfs receive $TESTPOOL/$TESTFS2/c1" + +typeset cksum2=$(md5sum /$TESTPOOL/$TESTFS2/c1/$TESTFILE0 | awk '{ print $1 }') +[[ "$cksum2" == "$checksum" ]] || \ + log_fail "Checksums differ ($cksum2 != $checksum)" + +log_pass "ZFS can receive an unencrypted stream from an encrypted dataset" diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_raw.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_raw.ksh new file mode 100644 index 0000000000..2042b37a98 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_raw.ksh @@ -0,0 +1,93 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017 Datto, Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib + +# +# DESCRIPTION: +# ZFS should receive streams from raw sends. +# +# STRATEGY: +# 1. Create an encrypted dataset +# 2. Create a file and get its checksum +# 3. Snapshot the dataset +# 4. Attempt to receive a raw send stream as a child of an unencrypted dataset +# 5. Verify the key is unavailable +# 6. Attempt to load the key and mount the dataset +# 7. Verify the cheksum of the file is the same as the original +# 8. Attempt to receive a raw send stream as a child of an encrypted dataset +# 9. Verify the key is unavailable +# 10. Attempt to load the key and mount the dataset +# 11. Verify the cheksum of the file is the same as the original +# + +verify_runnable "both" + +function cleanup +{ + datasetexists $TESTPOOL/$TESTFS1 && \ + log_must zfs destroy -r $TESTPOOL/$TESTFS1 + + datasetexists $TESTPOOL/$TESTFS2 && \ + log_must zfs destroy -r $TESTPOOL/$TESTFS2 +} + +log_onexit cleanup + +log_assert "ZFS should receive streams from raw sends" + +typeset passphrase="password" +typeset snap="$TESTPOOL/$TESTFS1@snap" + +log_must eval "echo $passphrase | zfs create -o encryption=on" \ + "-o keyformat=passphrase $TESTPOOL/$TESTFS1" + +log_must mkfile 1M /$TESTPOOL/$TESTFS1/$TESTFILE0 +typeset checksum=$(md5sum /$TESTPOOL/$TESTFS1/$TESTFILE0 | \ + awk '{ print $1 }') + +log_must zfs snapshot $snap + +log_note "Verify ZFS can receive a raw send stream from an encrypted dataset" +log_must eval "zfs send -w $snap | zfs receive $TESTPOOL/$TESTFS2" + +keystatus=$(get_prop keystatus $TESTPOOL/$TESTFS2) +[[ "$keystatus" == "unavailable" ]] || \ + log_fail "Expected keystatus unavailable, got $keystatus" + +log_must eval "echo $passphrase | zfs mount -l $TESTPOOL/$TESTFS2" + +typeset cksum1=$(md5sum /$TESTPOOL/$TESTFS2/$TESTFILE0 | awk '{ print $1 }') +[[ "$cksum1" == "$checksum" ]] || \ + log_fail "Checksums differ ($cksum1 != $checksum)" + +log_must eval "zfs send -w $snap | zfs receive $TESTPOOL/$TESTFS1/c1" + +keystatus=$(get_prop keystatus $TESTPOOL/$TESTFS1/c1) +[[ "$keystatus" == "unavailable" ]] || \ + log_fail "Expected keystatus unavailable, got $keystatus" + +log_must eval "echo $passphrase | zfs mount -l $TESTPOOL/$TESTFS1/c1" +typeset cksum2=$(md5sum /$TESTPOOL/$TESTFS1/c1/$TESTFILE0 | \ + awk '{ print $1 }') +[[ "$cksum2" == "$checksum" ]] || \ + log_fail "Checksums differ ($cksum2 != $checksum)" + +log_pass "ZFS can receive streams from raw sends" diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_raw_incremental.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_raw_incremental.ksh new file mode 100644 index 0000000000..48878327b8 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_raw_incremental.ksh @@ -0,0 +1,95 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017 Datto, Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib + +# +# DESCRIPTION: +# ZFS should receive streams from raw incremental sends. +# +# STRATEGY: +# 1. Create an encrypted dataset +# 2. Snapshot the dataset +# 3. Create a file and get its checksum +# 4. Snapshot the dataset +# 5. Attempt to receive a raw send stream of the first snapshot +# 6. Change the passphrase required to unlock the original filesystem +# 7. Attempt and intentionally fail to receive the second snapshot +# 8. Verify that the required passphrase hasn't changed on the receive side +# 9. Attempt a real raw incremental send stream of the second snapshot +# 10. Attempt load the key and mount the dataset +# 11. Verify the checksum of the file is the same as the original +# + +verify_runnable "both" + +function cleanup +{ + datasetexists $TESTPOOL/$TESTFS1 && \ + log_must zfs destroy -r $TESTPOOL/$TESTFS1 + + datasetexists $TESTPOOL/$TESTFS2 && \ + log_must zfs destroy -r $TESTPOOL/$TESTFS2 + + [[ -f $ibackup ]] && log_must rm -f $ibackup +} + +log_onexit cleanup + +log_assert "ZFS should receive streams from raw incremental sends" + +typeset ibackup="/var/tmp/ibackup.$$" +typeset ibackup_trunc="/var/tmp/ibackup_trunc.$$" +typeset passphrase="password" +typeset passphrase2="password2" +typeset snap1="$TESTPOOL/$TESTFS1@snap1" +typeset snap2="$TESTPOOL/$TESTFS1@snap2" + +log_must eval "echo $passphrase | zfs create -o encryption=on" \ + "-o keyformat=passphrase $TESTPOOL/$TESTFS1" + +log_must zfs snapshot $snap1 + +log_must mkfile 1M /$TESTPOOL/$TESTFS1/$TESTFILE0 +typeset checksum=$(md5sum /$TESTPOOL/$TESTFS1/$TESTFILE0 | awk '{ print $1 }') + +log_must zfs snapshot $snap2 + +log_must eval "zfs send -w $snap1 | zfs receive $TESTPOOL/$TESTFS2" +log_must eval "echo $passphrase2 | zfs change-key $TESTPOOL/$TESTFS1" +log_must eval "zfs send -w -i $snap1 $snap2 > $ibackup" + +typeset trunc_size=$(stat -c %s $ibackup) +trunc_size=$(expr $trunc_size - 64) +log_must cp $ibackup $ibackup_trunc +log_must truncate -s $trunc_size $ibackup_trunc +log_mustnot eval "zfs receive $TESTPOOL/$TESTFS2 < $ibackup_trunc" +log_mustnot eval "echo $passphrase2 | zfs load-key $TESTPOOL/$TESTFS2" +log_must eval "echo $passphrase | zfs load-key $TESTPOOL/$TESTFS2" +log_must zfs unload-key $TESTPOOL/$TESTFS2 + +log_must eval "zfs receive $TESTPOOL/$TESTFS2 < $ibackup" +log_must eval "echo $passphrase2 | zfs mount -l $TESTPOOL/$TESTFS2" + +typeset cksum1=$(md5sum /$TESTPOOL/$TESTFS2/$TESTFILE0 | awk '{ print $1 }') +[[ "$cksum1" == "$checksum" ]] || \ + log_fail "Checksums differ ($cksum1 != $checksum)" + +log_pass "ZFS can receive streams from raw incremental sends" diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_to_encrypted.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_to_encrypted.ksh new file mode 100644 index 0000000000..57896c6fd3 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_to_encrypted.ksh @@ -0,0 +1,75 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017 Datto, Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib + +# +# DESCRIPTION: +# ZFS should receive to an encrypted child dataset. +# +# STRATEGY: +# 1. Snapshot the default dataset +# 2. Create an encrypted dataset +# 3. Attempt to receive a stream to an encrypted child +# 4. Attempt to receive a stream with properties to an encrypted child +# 5. Attempt to receive a replication stream to an encrypted child +# 6. Unmount and unload the encrypted dataset keys +# 7. Attempt to receive a snapshot stream to an encrypted child +# + +verify_runnable "both" + +function cleanup +{ + snapexists $snap && log_must_busy zfs destroy -f $snap + + datasetexists $TESTPOOL/$TESTFS1 && \ + log_must zfs destroy -r $TESTPOOL/$TESTFS1 +} + +log_onexit cleanup + +log_assert "ZFS should receive to an encrypted child dataset" + +typeset passphrase="password" +typeset snap="$TESTPOOL/$TESTFS@snap" +typeset testfile="testfile" + +log_must zfs snapshot $snap + +log_must eval "echo $passphrase | zfs create -o encryption=on" \ + "-o keyformat=passphrase $TESTPOOL/$TESTFS1" + +log_note "Verifying ZFS will receive to an encrypted child" +log_must eval "zfs send $snap | zfs receive $TESTPOOL/$TESTFS1/c1" + +log_note "Verifying 'send -p' will not receive to an encrypted child" +log_mustnot eval "zfs send -p $snap | zfs receive $TESTPOOL/$TESTFS1/c2" + +log_note "Verifying 'send -R' will not receive to an encrypted child" +log_mustnot eval "zfs send -R $snap | zfs receive $TESTPOOL/$TESTFS1/c3" + +log_note "Verifying ZFS will not receive to an encrypted child when the" \ + "parent key is unloaded" +log_must zfs unmount $TESTPOOL/$TESTFS1 +log_must zfs unload-key $TESTPOOL/$TESTFS1 +log_mustnot eval "zfs send $snap | zfs receive $TESTPOOL/$TESTFS1/c4" + +log_pass "ZFS can receive to an encrypted child dataset" diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_rename/zfs_rename_encrypted_child.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_rename/zfs_rename_encrypted_child.ksh new file mode 100644 index 0000000000..fa57658f18 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_rename/zfs_rename_encrypted_child.ksh @@ -0,0 +1,78 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017 Datto, Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib + +# +# DESCRIPTION: +# 'zfs rename' should not move an encrypted child dataset outside of its +# encryption root. +# +# STRATEGY: +# 1. Create two encryption roots, and a child and grandchild of the first +# encryption root +# 2. Attempt to rename the grandchild under an unencrypted parent +# 3. Attempt to rename the grandchild under a different encrypted parent +# 4. Attempt to rename the grandchild under the current parent +# 5. Verify the encryption root of the dataset +# 6. Attempt to rename the grandchild to a child +# 7. Verify the encryption root of the dataset +# + +verify_runnable "both" + +function cleanup +{ + datasetexists $TESTPOOL/$TESTFS2 && \ + log_must zfs destroy -r $TESTPOOL/$TESTFS2 + datasetexists $TESTPOOL/$TESTFS3 && \ + log_must zfs destroy -r $TESTPOOL/$TESTFS3 +} +log_onexit cleanup + +log_assert "'zfs rename' should not move an encrypted child outside of its" \ + "encryption root" + +log_must eval "echo $PASSPHRASE | zfs create -o encryption=on" \ + "-o keyformat=passphrase -o keylocation=prompt $TESTPOOL/$TESTFS2" +log_must zfs create $TESTPOOL/$TESTFS2/child +log_must zfs create $TESTPOOL/$TESTFS2/child/grandchild +log_must eval "echo $PASSPHRASE1 | zfs create -o encryption=on" \ + "-o keyformat=passphrase -o keylocation=prompt $TESTPOOL/$TESTFS3" + +log_mustnot zfs rename $TESTPOOL/$TESTFS2/child/grandchild \ + $TESTPOOL/grandchild + +log_mustnot zfs rename $TESTPOOL/$TESTFS2/child/grandchild \ + $TESTPOOL/$TESTFS3/grandchild + +log_must zfs rename $TESTPOOL/$TESTFS2/child/grandchild \ + $TESTPOOL/$TESTFS2/child/grandchild2 +log_must verify_encryption_root $TESTPOOL/$TESTFS2/child/grandchild2 \ + $TESTPOOL/$TESTFS2 + +log_must zfs rename $TESTPOOL/$TESTFS2/child/grandchild2 \ + $TESTPOOL/$TESTFS2/grandchild2 +log_must verify_encryption_root $TESTPOOL/$TESTFS2/grandchild2 \ + $TESTPOOL/$TESTFS2 + +log_pass "'zfs rename' does not move an encrypted child outside of its" \ + "encryption root" diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_rename/zfs_rename_to_encrypted.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_rename/zfs_rename_to_encrypted.ksh new file mode 100644 index 0000000000..400592aaca --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_rename/zfs_rename_to_encrypted.ksh @@ -0,0 +1,51 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017 Datto, Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib + +# +# DESCRIPTION: +# 'zfs rename' should not rename an unencrypted dataset to a child +# of an encrypted dataset +# +# STRATEGY: +# 1. Create an encrypted dataset +# 2. Attempt to rename the default dataset to a child of the encrypted dataset +# + +verify_runnable "both" + +function cleanup +{ + datasetexists $TESTPOOL/$TESTFS2 && \ + log_must zfs destroy $TESTPOOL/$TESTFS2 +} +log_onexit cleanup + +log_assert "'zfs rename' should not rename an unencrypted dataset to a" \ + "child of an encrypted dataset" + +log_must eval "echo $PASSPHRASE | zfs create -o encryption=on" \ + "-o keyformat=passphrase -o keylocation=prompt $TESTPOOL/$TESTFS2" +log_mustnot zfs rename $TESTPOOL/$TESTFS $TESTPOOL/$TESTFS2/$TESTFS + +log_pass "'zfs rename' does not rename an unencrypted dataset to a child" \ + "of an encrypted dataset" diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_encrypted.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_encrypted.ksh new file mode 100644 index 0000000000..490e146ba6 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_encrypted.ksh @@ -0,0 +1,76 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017, Datto, Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib + +# +# DESCRIPTION: +# ZFS should perform unencrypted sends of encrypted datasets, unless the '-p' +# or '-R' options are specified. +# +# STRATEGY: +# 1. Create an encrypted dataset +# 6. Create a child encryption root +# 2. Snapshot the dataset +# 3. Attempt a send +# 4. Attempt a send with properties +# 5. Attempt a replication send +# 7. Unmount the parent and unload its key +# 8. Attempt a send of the parent dataset +# 9. Attempt a send of the child encryption root +# + +verify_runnable "both" + +function cleanup +{ + datasetexists $TESTPOOL/$TESTFS1 && \ + log_must zfs destroy -r $TESTPOOL/$TESTFS1 +} + +log_onexit cleanup + +log_assert "ZFS should perform unencrypted sends of encrypted datasets, " \ + "unless the '-p' or '-R' options are specified" + +typeset passphrase="password" +typeset passphrase1="password1" +typeset snap="$TESTPOOL/$TESTFS1@snap" + +log_must eval "echo $passphrase | zfs create -o encryption=on" \ + "-o keyformat=passphrase $TESTPOOL/$TESTFS1" + +log_must eval "echo $passphrase1 | zfs create -o encryption=on" \ + "-o keyformat=passphrase $TESTPOOL/$TESTFS1/child" + +log_must zfs snapshot -r $snap + +log_must eval "zfs send $snap > /dev/null" +log_mustnot eval "zfs send -p $snap > /dev/null" +log_mustnot eval "zfs send -R $snap > /dev/null" + +log_must zfs unmount $TESTPOOL/$TESTFS1 +log_must zfs unload-key $TESTPOOL/$TESTFS1 + +log_mustnot eval "zfs send $snap > /dev/null" +log_must eval "zfs send $TESTPOOL/$TESTFS1/child@snap > /dev/null" + +log_pass "ZFS performs unencrypted sends of encrypted datasets, unless the" \ + "'-p' or '-R' options are specified" diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_encrypted_unloaded.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_encrypted_unloaded.ksh new file mode 100644 index 0000000000..112ee1143d --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_encrypted_unloaded.ksh @@ -0,0 +1,59 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017, Datto, Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib + +# +# DESCRIPTION: +# ZFS should not perform unencrypted sends from encrypted datasets +# with unloaded keys. +# +# STRATEGY: +# 1. Create an encrypted dataset +# 2. Snapshot the dataset +# 3. Unload the dataset key +# 4. Verify sending the stream fails +# + +verify_runnable "both" + +function cleanup +{ + datasetexists $TESTPOOL/$TESTFS1 && \ + log_must zfs destroy -r $TESTPOOL/$TESTFS1 +} + +log_onexit cleanup + +log_assert "ZFS should not perform unencrypted sends from encrypted datasets" \ + "with unloaded keys." + +typeset passphrase="password" +typeset snap="$TESTPOOL/$TESTFS1@snap" + +log_must eval "echo $passphrase | zfs create -o encryption=on" \ + "-o keyformat=passphrase $TESTPOOL/$TESTFS1" +log_must zfs snapshot $snap +log_must zfs unmount $TESTPOOL/$TESTFS1 +log_must zfs unload-key $TESTPOOL/$TESTFS1 +log_mustnot eval "zfs send $snap > /dev/null" + +log_pass "ZFS does not perform unencrypted sends from encrypted datasets" \ + "with unloaded keys." diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_raw.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_raw.ksh new file mode 100644 index 0000000000..85cc7407e1 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_raw.ksh @@ -0,0 +1,79 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017, Datto, Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib + +# +# DESCRIPTION: +# ZFS should perform raw sends of datasets. +# +# STRATEGY: +# 1. Create an encrypted dataset +# 2. Snapshot the default dataset and the encrypted dataset +# 3. Attempt a raw send of both datasets +# 4. Attempt a raw send with properties of both datasets +# 5. Attempt a raw replication send of both datasets +# 6. Unmount and unload the encrypted dataset key +# 7. Attempt a raw send of the encrypted dataset +# + +verify_runnable "both" + +function cleanup +{ + snapexists $snap && \ + log_must zfs destroy $snap + + datasetexists $TESTPOOL/$TESTFS1 && \ + log_must zfs destroy -r $TESTPOOL/$TESTFS1 +} + +log_onexit cleanup + +log_assert "ZFS should perform raw sends of datasets" + +typeset passphrase="password" +typeset snap="$TESTPOOL/$TESTFS@snap" +typeset snap1="$TESTPOOL/$TESTFS1@snap" + +log_must eval "echo $passphrase | zfs create -o encryption=on" \ + "-o keyformat=passphrase $TESTPOOL/$TESTFS1" + +log_must zfs snapshot $snap +log_must zfs snapshot $snap1 + +log_must eval "zfs send -w $snap > /dev/null" +log_must eval "zfs send -w $snap1 > /dev/null" + +log_note "Verify ZFS can perform raw sends with properties" +log_must eval "zfs send -wp $snap > /dev/null" +log_must eval "zfs send -wp $snap1 > /dev/null" + +log_note "Verify ZFS can perform raw replication sends" +log_must eval "zfs send -wR $snap > /dev/null" +log_must eval "zfs send -wR $snap1 > /dev/null" + +log_note "Verify ZFS can perform a raw send of an encrypted datasets with" \ + "its key unloaded" +log_must zfs unmount $TESTPOOL/$TESTFS1 +log_must zfs unload-key $TESTPOOL/$TESTFS1 +log_must eval "zfs send -w $snap1 > /dev/null" + +log_pass "ZFS performs raw sends of datasets" diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_set/setup.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_set/setup.ksh index 8868747d22..b2b5c077bf 100644 --- a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_set/setup.ksh +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_set/setup.ksh @@ -28,4 +28,5 @@ . $STF_SUITE/include/libtest.shlib DISK=${DISKS%% *} + default_container_volume_setup $DISK diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_set/zfs_set_keylocation.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_set/zfs_set_keylocation.ksh new file mode 100644 index 0000000000..313fa4e4d1 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_set/zfs_set_keylocation.ksh @@ -0,0 +1,93 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017 Datto, Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib + +# +# DESCRIPTION: +# Unencrypted datasets should only allow keylocation of 'none', encryption +# roots should only allow keylocation of 'prompt' and file URI, and encrypted +# child datasets should not be able to change their keylocation. +# +# STRATEGY: +# 1. Verify the key location of the default dataset is 'none' +# 2. Attempt to change the key location of the default dataset +# 3. Create an encrypted dataset using a key file +# 4. Attempt to change the key location of the encrypted dataset to 'none', +# an invalid location, its current location, and 'prompt' +# 5. Attempt to reload the encrypted dataset key using the new key location +# 6. Create a encrypted child dataset +# 7. Verify the key location of the child dataset is 'none' +# 8. Attempt to change the key location of the child dataset +# 9. Verify the key location of the child dataset has not changed +# + +verify_runnable "both" + +function cleanup +{ + datasetexists $TESTPOOL/$TESTFS1 && \ + log_must zfs destroy -r $TESTPOOL/$TESTFS1 +} +log_onexit cleanup + +log_assert "Key location can only be 'prompt' or a file path for encryption" \ + "roots, and 'none' for unencrypted volumes" + +log_must eval "echo $PASSPHRASE > /$TESTPOOL/pkey" + +log_must verify_keylocation $TESTPOOL/$TESTFS "none" +log_must zfs set keylocation=none $TESTPOOL/$TESTFS +log_mustnot zfs set keylocation=/$TESTPOOL/pkey $TESTPOOL/$TESTFS +log_mustnot zfs set keylocation=file:///$TESTPOOL/pkey $TESTPOOL/$TESTFS +log_must verify_keylocation $TESTPOOL/$TESTFS "none" + +log_must zfs create -o encryption=on -o keyformat=passphrase \ + -o keylocation=file:///$TESTPOOL/pkey $TESTPOOL/$TESTFS1 + +log_mustnot zfs set keylocation=none $TESTPOOL/$TESTFS1 +log_mustnot zfs set keylocation=/$TESTPOOL/pkey $TESTPOOL/$TESTFS1 + +log_must zfs set keylocation=file:///$TESTPOOL/pkey $TESTPOOL/$TESTFS1 +log_must verify_keylocation $TESTPOOL/$TESTFS1 "file:///$TESTPOOL/pkey" + +log_must zfs set keylocation=prompt $TESTPOOL/$TESTFS1 +log_must verify_keylocation $TESTPOOL/$TESTFS1 "prompt" + +log_must zfs unmount $TESTPOOL/$TESTFS1 +log_must zfs unload-key $TESTPOOL/$TESTFS1 + +log_must rm /$TESTPOOL/pkey +log_must eval "echo $PASSPHRASE | zfs load-key $TESTPOOL/$TESTFS1" +log_must zfs mount $TESTPOOL/$TESTFS1 + +log_must zfs create $TESTPOOL/$TESTFS1/child +log_must verify_keylocation $TESTPOOL/$TESTFS1/child "none" + +log_mustnot zfs set keylocation=none $TESTPOOL/$TESTFS1/child +log_mustnot zfs set keylocation=prompt $TESTPOOL/$TESTFS1/child +log_mustnot zfs set keylocation=file:///$TESTPOOL/pkey $TESTPOOL/$TESTFS1/child +log_mustnot zfs set keylocation=/$TESTPOOL/pkey $TESTPOOL/$TESTFS1/child + +log_must verify_keylocation $TESTPOOL/$TESTFS1/child "none" + +log_pass "Key location can only be 'prompt' or a file path for encryption" \ + "roots, and 'none' for unencrypted volumes" diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_unload-key/Makefile b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_unload-key/Makefile new file mode 100644 index 0000000000..8fe2bf42ca --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_unload-key/Makefile @@ -0,0 +1,21 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2012, 2016 by Delphix. All rights reserved. +# + +include $(SRC)/Makefile.master + +ROOTOPTPKG = $(ROOT)/opt/zfs-tests +TARGETDIR = $(ROOTOPTPKG)/tests/functional/cli_root/zfs_unload-key + +include $(SRC)/test/zfs-tests/Makefile.com diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_unload-key/cleanup.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_unload-key/cleanup.ksh new file mode 100644 index 0000000000..79cd6e9f90 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_unload-key/cleanup.ksh @@ -0,0 +1,30 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# + +. $STF_SUITE/include/libtest.shlib + +default_cleanup diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_unload-key/setup.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_unload-key/setup.ksh new file mode 100644 index 0000000000..6a9af3bc28 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_unload-key/setup.ksh @@ -0,0 +1,32 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# + +. $STF_SUITE/include/libtest.shlib + +DISK=${DISKS%% *} + +default_setup $DISK diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_unload-key/zfs_unload-key.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_unload-key/zfs_unload-key.ksh new file mode 100644 index 0000000000..9e08ac69d4 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_unload-key/zfs_unload-key.ksh @@ -0,0 +1,69 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017 Datto, Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib + +# +# DESCRIPTION: +# 'zfs unload-key' should only unload the key of an unmounted dataset. +# +# STRATEGY: +# 1. Attempt to unload the default dataset's key +# 2. Unmount the dataset +# 3. Attempt to unload the default dataset's key +# 4. Create an encrypted dataset +# 5. Attempt to unload the dataset's key +# 6. Verify the key is loaded +# 7. Unmount the dataset +# 8. Attempt to unload the dataset's key +# 9. Verify the key is not loaded +# 10. Attempt to unload the dataset's key +# + +verify_runnable "both" + +function cleanup +{ + datasetexists $TESTPOOL/$TESTFS1 && \ + log_must zfs destroy $TESTPOOL/$TESTFS1 +} +log_onexit cleanup + +log_assert "'zfs unload-key' should unload the key for an unmounted" \ + "encrypted dataset" + +log_mustnot zfs unload-key $TESTPOOL/$TESTFS + +log_must zfs unmount $TESTPOOL/$TESTFS +log_mustnot zfs unload-key $TESTPOOL/$TESTFS + +log_must eval "echo $PASSPHRASE | zfs create -o encryption=on" \ + "-o keyformat=passphrase -o keylocation=prompt $TESTPOOL/$TESTFS1" +log_mustnot zfs unload-key $TESTPOOL/$TESTFS1 +log_must key_available $TESTPOOL/$TESTFS1 + +log_must zfs unmount $TESTPOOL/$TESTFS1 +log_must zfs unload-key $TESTPOOL/$TESTFS1 +log_must key_unavailable $TESTPOOL/$TESTFS1 + +log_mustnot zfs unload-key $TESTPOOL/$TESTFS1 + +log_pass "'zfs unload-key' unloads the key for an unmounted encrypted dataset" diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_unload-key/zfs_unload-key_all.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_unload-key/zfs_unload-key_all.ksh new file mode 100644 index 0000000000..ecb98d1894 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_unload-key/zfs_unload-key_all.ksh @@ -0,0 +1,76 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017 Datto, Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib + +# +# DESCRIPTION: +# 'zfs unload-key -a' should unload keys for all datasets. +# +# STRATEGY: +# 1. Create an encrypted filesystem, encrypted child dataset, an encrypted +# zvol, and an encrypted pool +# 2. Unmount all datasets +# 3. Attempt to unload all dataset keys +# 4. Verify each dataset has its key unloaded +# 5. Attempt to mount each dataset +# + +verify_runnable "both" + +function cleanup +{ + datasetexists $TESTPOOL/$TESTFS1 && \ + log_must zfs destroy -r $TESTPOOL/$TESTFS1 + datasetexists $TESTPOOL/zvol && log_must zfs destroy $TESTPOOL/zvol + poolexists $TESTPOOL1 && log_must destroy_pool $TESTPOOL1 +} +log_onexit cleanup + +log_assert "'zfs unload-key -a' should unload keys for all datasets" + +log_must eval "echo $PASSPHRASE1 > /$TESTPOOL/pkey" +log_must zfs create -o encryption=on -o keyformat=passphrase \ + -o keylocation=file:///$TESTPOOL/pkey $TESTPOOL/$TESTFS1 +log_must zfs create $TESTPOOL/$TESTFS1/child + +log_must zfs create -V 64M -o encryption=on -o keyformat=passphrase \ + -o keylocation=file:///$TESTPOOL/pkey $TESTPOOL/zvol + +typeset DISK2="$(echo $DISKS | awk '{ print $2}')" +log_must zpool create -O encryption=on -O keyformat=passphrase \ + -O keylocation=file:///$TESTPOOL/pkey $TESTPOOL1 $DISK2 + +log_must zfs unmount $TESTPOOL/$TESTFS1 +log_must zfs unmount $TESTPOOL1 + +log_must zfs unload-key -a + +log_must key_unavailable $TESTPOOL/$TESTFS1 +log_must key_unavailable $TESTPOOL/$TESTFS1/child +log_must key_unavailable $TESTPOOL/zvol +log_must key_unavailable $TESTPOOL1 + +log_mustnot zfs mount $TESTPOOL +log_mustnot zfs mount $TESTPOOL/zvol +log_mustnot zfs mount $TESTPOOL/$TESTFS1 + +log_pass "'zfs unload-key -a' unloads keys for all datasets" diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_unload-key/zfs_unload-key_recursive.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_unload-key/zfs_unload-key_recursive.ksh new file mode 100644 index 0000000000..9766b59058 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_unload-key/zfs_unload-key_recursive.ksh @@ -0,0 +1,72 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017 Datto, Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib + +# +# DESCRIPTION: +# 'zfs unload-key -r' should recursively unload keys. +# +# STRATEGY: +# 1. Create a parent encrypted dataset +# 2. Create a sibling encrypted dataset +# 3. Create a child dataset as an encryption root +# 4. Unmount all datasets +# 5. Attempt to unload all dataset keys under parent +# 6. Verify parent and child have their keys unloaded +# 7. Verify sibling has its key loaded +# 8. Attempt to mount all datasets +# + +verify_runnable "both" + +function cleanup +{ + datasetexists $TESTPOOL/$TESTFS1 && \ + log_must zfs destroy -r $TESTPOOL/$TESTFS1 +} +log_onexit cleanup + +log_assert "'zfs unload-key -r' should recursively unload keys" + +log_must eval "echo $PASSPHRASE > /$TESTPOOL/pkey" +log_must zfs create -o encryption=on -o keyformat=passphrase \ + -o keylocation=file:///$TESTPOOL/pkey $TESTPOOL/$TESTFS1 +log_must zfs create -o keyformat=passphrase \ + -o keylocation=file:///$TESTPOOL/pkey $TESTPOOL/$TESTFS1/child +log_must eval "echo $PASSPHRASE1 | zfs create -o encryption=on" \ + "-o keyformat=passphrase -o keylocation=prompt $TESTPOOL/$TESTFS2" + +log_must zfs unmount $TESTPOOL/$TESTFS1 +log_must zfs unmount $TESTPOOL/$TESTFS2 + +log_must zfs unload-key -r $TESTPOOL/$TESTFS1 + +log_must key_unavailable $TESTPOOL/$TESTFS1 +log_must key_unavailable $TESTPOOL/$TESTFS1/child + +log_must key_available $TESTPOOL/$TESTFS2 + +log_mustnot zfs mount $TESTPOOL/$TESTFS1 +log_mustnot zfs mount $TESTPOOL/$TESTFS1/child +log_must zfs mount $TESTPOOL/$TESTFS2 + +log_pass "'zfs unload-key -r' recursively unloads keys" diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_005_pos.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_005_pos.ksh index 2afbec37dc..2afbec37dc 100755..100644 --- a/usr/src/test/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_005_pos.ksh +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_005_pos.ksh diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_crypt_combos.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_crypt_combos.ksh new file mode 100644 index 0000000000..d28d5953c5 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_crypt_combos.ksh @@ -0,0 +1,90 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017, Datto, Inc. All rights reserved. +# Copyright (c) 2019, DilOS +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/cli_root/zfs_create/zfs_create_common.kshlib + +# +# DESCRIPTION: +# 'zpool create' should create encrypted pools when using a valid encryption +# algorithm, key format, key location, and key. +# +# STRATEGY: +# 1. Create a pool for each combination of encryption type and key format +# 2. Verify that each filesystem has the correct properties set +# + +verify_runnable "global" + +function cleanup +{ + poolexists $TESTPOOL && destroy_pool $TESTPOOL +} +log_onexit cleanup + +set -A ENCRYPTION_ALGS "encryption=on" \ + "encryption=aes-128-ccm" \ + "encryption=aes-192-ccm" \ + "encryption=aes-256-ccm" \ + "encryption=aes-128-gcm" \ + "encryption=aes-192-gcm" \ + "encryption=aes-256-gcm" + +set -A ENCRYPTION_PROPS "encryption=aes-256-ccm" \ + "encryption=aes-128-ccm" \ + "encryption=aes-192-ccm" \ + "encryption=aes-256-ccm" \ + "encryption=aes-128-gcm" \ + "encryption=aes-192-gcm" \ + "encryption=aes-256-gcm" + +set -A KEYFORMATS "keyformat=raw" \ + "keyformat=hex" \ + "keyformat=passphrase" + +set -A USER_KEYS "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz" \ + "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb" \ + "abcdefgh" + +log_assert "'zpool create' should create encrypted pools when using a valid" \ + "encryption algorithm, key format, key location, and key." + +typeset -i i=0 +while (( i < ${#ENCRYPTION_ALGS[*]} )); do + typeset -i j=0 + while (( j < ${#KEYFORMATS[*]} )); do + log_must eval "printf '%s' ${USER_KEYS[j]} | zpool create" \ + "-O ${ENCRYPTION_ALGS[i]} -O ${KEYFORMATS[j]}" \ + "$TESTPOOL $DISKS" + + propertycheck $TESTPOOL ${ENCRYPTION_PROPS[i]} || \ + log_fail "failed to set ${ENCRYPTION_ALGS[i]}" + propertycheck $TESTPOOL ${KEYFORMATS[j]} || \ + log_fail "failed to set ${KEYFORMATS[j]}" + + log_must zpool destroy $TESTPOOL + (( j = j + 1 )) + done + (( i = i + 1 )) +done + +log_pass "'zpool create' creates encrypted pools when using a valid" \ + "encryption algorithm, key format, key location, and key." diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_encrypted.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_encrypted.ksh new file mode 100644 index 0000000000..aa154d5c65 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zpool_create/zpool_create_encrypted.ksh @@ -0,0 +1,95 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017, Datto, Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib + +# +# DESCRIPTION: +# 'zpool create' should create an encrypted dataset only if it has a valid +# combination of encryption properties set. +# +# enc = encryption +# loc = keylocation provided +# fmt = keyformat provided +# +# U = unspecified +# N = off +# Y = on +# +# enc fmt loc valid notes +# ------------------------------------------- +# U 0 1 no no crypt specified +# U 1 0 no no crypt specified +# U 1 1 no no crypt specified +# N 0 0 yes explicit no encryption +# N 0 1 no keylocation given, but crypt off +# N 1 0 no keyformat given, but crypt off +# N 1 1 no keyformat given, but crypt off +# Y 0 0 no no keyformat specified for new key +# Y 0 1 no no keyformat specified for new key +# Y 1 0 yes new encryption root +# Y 1 1 yes new encryption root +# +# STRATEGY: +# 1. Attempt to create a dataset using all combinations of encryption +# properties +# + +verify_runnable "global" + +function cleanup +{ + poolexists $TESTPOOL && destroy_pool $TESTPOOL +} +log_onexit cleanup + +log_assert "'zpool create' should create an encrypted dataset only if it" \ + "has a valid combination of encryption properties set." + +log_mustnot zpool create -O keylocation=prompt $TESTPOOL $DISKS +log_mustnot zpool create -O keyformat=passphrase $TESTPOOL $DISKS +log_mustnot zpool create -O keyformat=passphrase -O keylocation=prompt \ + $TESTPOOL $DISKS + +log_must zpool create -O encryption=off $TESTPOOL $DISKS +log_must zpool destroy $TESTPOOL + +log_mustnot zpool create -O encryption=off -O keylocation=prompt \ + $TESTPOOL $DISKS +log_mustnot zpool create -O encryption=off -O keyformat=passphrase \ + $TESTPOOL $DISKS +log_mustnot zpool create -O encryption=off -O keyformat=passphrase \ + -O keylocation=prompt $TESTPOOL $DISKS + +log_mustnot zpool create -O encryption=on $TESTPOOL $DISKS +log_mustnot zpool create -O encryption=on -O keylocation=prompt \ + $TESTPOOL $DISKS + +log_must eval "echo $PASSPHRASE | zpool create -O encryption=on" \ + "-O keyformat=passphrase $TESTPOOL $DISKS" +log_must zpool destroy $TESTPOOL + +log_must eval "echo $PASSPHRASE | zpool create -O encryption=on" \ + "-O keyformat=passphrase -O keylocation=prompt $TESTPOOL $DISKS" +log_must zpool destroy $TESTPOOL + +log_pass "'zpool create' creates an encrypted dataset only if it has a" \ + "valid combination of encryption properties set." diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zpool_get/zpool_get.cfg b/usr/src/test/zfs-tests/tests/functional/cli_root/zpool_get/zpool_get.cfg index fc4ce85bdb..998fe0e2b3 100644 --- a/usr/src/test/zfs-tests/tests/functional/cli_root/zpool_get/zpool_get.cfg +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zpool_get/zpool_get.cfg @@ -79,4 +79,6 @@ typeset -a properties=( "feature@spacemap_v2" "feature@allocation_classes" "feature@resilver_defer" + "feature@encryption" + "feature@bookmark_v2" ) diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zpool_import/blockfiles/missing_ivset.dat.bz2 b/usr/src/test/zfs-tests/tests/functional/cli_root/zpool_import/blockfiles/missing_ivset.dat.bz2 Binary files differnew file mode 100644 index 0000000000..2b91d9003b --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zpool_import/blockfiles/missing_ivset.dat.bz2 diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zpool_import/cryptv0.dat.bz2 b/usr/src/test/zfs-tests/tests/functional/cli_root/zpool_import/cryptv0.dat.bz2 Binary files differnew file mode 100644 index 0000000000..1c625c2c44 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zpool_import/cryptv0.dat.bz2 diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_encrypted.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_encrypted.ksh new file mode 100644 index 0000000000..4e9013afeb --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_encrypted.ksh @@ -0,0 +1,59 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017 Datto, Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/cli_root/zpool_import/zpool_import.cfg +. $STF_SUITE/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib + +# +# DESCRIPTION: +# 'zpool import' should import a pool with an encrypted dataset without +# mounting it. +# +# STRATEGY: +# 1. Create an encrypted pool +# 2. Export the pool +# 3. Attempt to import the pool +# 4. Verify the pool exists and the key is not loaded +# + +verify_runnable "both" + +function cleanup +{ + destroy_pool $TESTPOOL1 + log_must rm $VDEV0 + log_must mkfile $FILE_SIZE $VDEV0 +} +log_onexit cleanup + +log_assert "'zpool import' should import a pool with an encrypted dataset" \ + "without mounting it" + +log_must eval "echo $PASSPHRASE | zpool create -O encryption=on" \ + "-O keyformat=passphrase -O keylocation=prompt $TESTPOOL1 $VDEV0" +log_must zpool export $TESTPOOL1 +log_must zpool import -d $DEVICE_DIR $TESTPOOL1 +log_must poolexists $TESTPOOL1 +log_must key_unavailable $TESTPOOL1 +log_must unmounted $TESTPOOL1 + +log_pass "'zpool import' imports a pool with an encrypted dataset without" \ + "mounting it" diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_encrypted_load.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_encrypted_load.ksh new file mode 100644 index 0000000000..d060e8a798 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_encrypted_load.ksh @@ -0,0 +1,59 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017 Datto, Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/cli_root/zpool_import/zpool_import.cfg +. $STF_SUITE/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib + +# +# DESCRIPTION: +# 'zpool import -l' should import a pool with an encrypted dataset and load +# its key. +# +# STRATEGY: +# 1. Create an encrypted pool +# 2. Export the pool +# 3. Attempt to import the pool with the key +# 4. Verify the pool exists and the key is loaded +# + +verify_runnable "both" + +function cleanup +{ + destroy_pool $TESTPOOL1 + log_must rm $VDEV0 + log_must mkfile $FILE_SIZE $VDEV0 +} +log_onexit cleanup + +log_assert "'zpool import -l' should import a pool with an encrypted dataset" \ + "and load its key" + +log_must eval "echo $PASSPHRASE | zpool create -O encryption=on" \ + "-O keyformat=passphrase -O keylocation=prompt $TESTPOOL1 $VDEV0" +log_must zpool export $TESTPOOL1 +log_must eval "echo $PASSPHRASE | zpool import -l -d $DEVICE_DIR $TESTPOOL1" +log_must poolexists $TESTPOOL1 +log_must key_available $TESTPOOL1 +log_must mounted $TESTPOOL1 + +log_pass "'zpool import -l' imports a pool with an encrypted dataset and" \ + "loads its key" diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_errata3.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_errata3.ksh new file mode 100644 index 0000000000..e58af126b3 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_errata3.ksh @@ -0,0 +1,99 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017 Datto, Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib + +# +# DESCRIPTION: +# 'zpool import' should import a pool with Errata #3 while preventing +# the user from performing read write operations +# +# STRATEGY: +# 1. Import a pre-packaged pool with Errata #3 +# 2. Attempt to write to the effected datasets +# 3. Attempt to read from the effected datasets +# 4. Attempt to perform a raw send of the effected datasets +# 5. Perform a regular send of the datasets under a new encryption root +# 6. Verify the new datasets can be read from and written to +# 7. Destroy the old effected datasets +# 8. Reimport the pool and verify that the errata is no longer present +# + +verify_runnable "global" + +POOL_NAME=cryptv0 +POOL_FILE=cryptv0.dat + +function uncompress_pool +{ + + log_note "Creating pool from $POOL_FILE" + log_must bzcat \ + $STF_SUITE/tests/functional/cli_root/zpool_import/$POOL_FILE.bz2 \ + > /$TESTPOOL/$POOL_FILE + return 0 +} + +function cleanup +{ + poolexists $POOL_NAME && log_must zpool destroy $POOL_NAME + [[ -e /$TESTPOOL/$POOL_FILE ]] && rm /$TESTPOOL/$POOL_FILE + return 0 +} +log_onexit cleanup + +log_assert "Verify that Errata 3 is properly handled" + +uncompress_pool +log_must zpool import -d /$TESTPOOL/ $POOL_NAME +log_must eval "zpool status $POOL_NAME | grep -q Errata" # also detects 'Errata #4' +log_must eval "echo 'password' | zfs load-key $POOL_NAME/testfs" +log_must eval "echo 'password' | zfs load-key $POOL_NAME/testvol" + +log_mustnot zfs mount $POOL_NAME/testfs +log_must zfs mount -o ro $POOL_NAME/testfs + +old_mntpnt=$(get_prop mountpoint $POOL_NAME/testfs) +log_must eval "ls $old_mntpnt | grep -q testfile" +log_mustnot dd if=/dev/zero of=/dev/zvol/rdsk/$POOL_NAME/testvol bs=512 count=1 +log_must dd if=/dev/zvol/rdsk/$POOL_NAME/testvol of=/dev/null bs=512 count=1 +log_must eval "echo 'password' | zfs create \ + -o encryption=on -o keyformat=passphrase -o keylocation=prompt \ + cryptv0/encroot" +log_mustnot eval "zfs send -w $POOL_NAME/testfs@snap1 | \ + zfs recv $POOL_NAME/encroot/testfs" +log_mustnot eval "zfs send -w $POOL_NAME/testvol@snap1 | \ + zfs recv $POOL_NAME/encroot/testvol" + +log_must eval "zfs send $POOL_NAME/testfs@snap1 | \ + zfs recv $POOL_NAME/encroot/testfs" +log_must eval "zfs send $POOL_NAME/testvol@snap1 | \ + zfs recv $POOL_NAME/encroot/testvol" +block_device_wait +log_must dd if=/dev/zero of=/dev/zvol/rdsk/$POOL_NAME/encroot/testvol bs=512 count=1 +new_mntpnt=$(get_prop mountpoint $POOL_NAME/encroot/testfs) +log_must eval "ls $new_mntpnt | grep -q testfile" +log_must zfs destroy -r $POOL_NAME/testfs +log_must zfs destroy -r $POOL_NAME/testvol + +log_must zpool export $POOL_NAME +log_must zpool import -d /$TESTPOOL/ $POOL_NAME +log_mustnot eval "zpool status $POOL_NAME | grep -q 'Errata #3'" +log_pass "Errata 3 is properly handled" diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_errata4.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_errata4.ksh new file mode 100755 index 0000000000..d06a9cd754 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zpool_import/zpool_import_errata4.ksh @@ -0,0 +1,143 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2019 Datto, Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib + +# +# DESCRIPTION: +# 'zpool import' should import a pool with Errata #4. Users should be +# able to set the zfs_disable_ivset_guid_check to continue normal +# operation and the errata should disappear when no more effected +# datasets remain. +# +# STRATEGY: +# 1. Import a pre-packaged pool with Errata #4 and verify its state +# 2. Prepare pool to fix existing datasets +# 3. Use raw sends to fix datasets +# 4. Ensure fixed datasets match their initial counterparts +# 5. Destroy the initial datasets and verify the errata is gone +# + +verify_runnable "global" + +POOL_NAME=missing_ivset +POOL_FILE=missing_ivset.dat + +function uncompress_pool +{ + log_note "Creating pool from $POOL_FILE" + log_must bzcat \ + $STF_SUITE/tests/functional/cli_root/zpool_import/blockfiles/$POOL_FILE.bz2 \ + > /$TESTPOOL/$POOL_FILE + return 0 +} + +function cleanup +{ + log_must set_tunable32 zfs_disable_ivset_guid_check 0 + poolexists $POOL_NAME && log_must zpool destroy $POOL_NAME + [[ -e /$TESTPOOL/$POOL_FILE ]] && rm /$TESTPOOL/$POOL_FILE + return 0 +} +log_onexit cleanup + +log_assert "Verify that Errata 4 is properly handled" + +function has_ivset_guid # dataset +{ + ds="$1" + ivset_guid=$(get_prop ivsetguid $ds) + + if [ "$ivset_guid" == "-" ]; then + return 1 + else + return 0 + fi +} + +# 1. Import a pre-packaged pool with Errata #4 and verify its state +uncompress_pool +log_must zpool import -d /$TESTPOOL/ $POOL_NAME +log_must eval "zpool status $POOL_NAME | grep -q 'Errata #4'" +log_must eval "zpool status $POOL_NAME | grep -q ZFS-8000-ER" +bm2_value=$(zpool get -H -o value feature@bookmark_v2 $POOL_NAME) +if [ "$bm2_value" != "disabled" ]; then + log_fail "initial pool's bookmark_v2 feature is not disabled" +fi + +log_mustnot has_ivset_guid $POOL_NAME/testfs@snap1 +log_mustnot has_ivset_guid $POOL_NAME/testfs@snap2 +log_mustnot has_ivset_guid $POOL_NAME/testfs@snap3 +log_mustnot has_ivset_guid $POOL_NAME/testvol@snap1 +log_mustnot has_ivset_guid $POOL_NAME/testvol@snap2 +log_mustnot has_ivset_guid $POOL_NAME/testvol@snap3 + +# 2. Prepare pool to fix existing datasets +log_must zpool set feature@bookmark_v2=enabled $POOL_NAME +log_must set_tunable32 zfs_disable_ivset_guid_check 1 +log_must zfs create $POOL_NAME/fixed + +# 3. Use raw sends to fix datasets +log_must eval "zfs send -w $POOL_NAME/testfs@snap1 | \ + zfs recv $POOL_NAME/fixed/testfs" +log_must eval "zfs send -w -i @snap1 $POOL_NAME/testfs@snap2 | \ + zfs recv $POOL_NAME/fixed/testfs" +log_must eval \ + "zfs send -w -i $POOL_NAME/testfs#snap2 $POOL_NAME/testfs@snap3 | \ + zfs recv $POOL_NAME/fixed/testfs" + +log_must eval "zfs send -w $POOL_NAME/testvol@snap1 | \ + zfs recv $POOL_NAME/fixed/testvol" +log_must eval "zfs send -w -i @snap1 $POOL_NAME/testvol@snap2 | \ + zfs recv $POOL_NAME/fixed/testvol" +log_must eval \ + "zfs send -w -i $POOL_NAME/testvol#snap2 $POOL_NAME/testvol@snap3 | \ + zfs recv $POOL_NAME/fixed/testvol" + +# 4. Ensure fixed datasets match their initial counterparts +log_must eval "echo 'password' | zfs load-key $POOL_NAME/testfs" +log_must eval "echo 'password' | zfs load-key $POOL_NAME/testvol" +log_must eval "echo 'password' | zfs load-key $POOL_NAME/fixed/testfs" +log_must eval "echo 'password' | zfs load-key $POOL_NAME/fixed/testvol" +log_must zfs mount $POOL_NAME/testfs +log_must zfs mount $POOL_NAME/fixed/testfs +block_device_wait + +old_mntpnt=$(get_prop mountpoint $POOL_NAME/testfs) +new_mntpnt=$(get_prop mountpoint $POOL_NAME/fixed/testfs) +log_must diff -r "$old_mntpnt" "$new_mntpnt" +log_must diff /dev/zvol/$POOL_NAME/testvol /dev/zvol/$POOL_NAME/fixed/testvol + +log_must has_ivset_guid $POOL_NAME/fixed/testfs@snap1 +log_must has_ivset_guid $POOL_NAME/fixed/testfs@snap2 +log_must has_ivset_guid $POOL_NAME/fixed/testfs@snap3 +log_must has_ivset_guid $POOL_NAME/fixed/testvol@snap1 +log_must has_ivset_guid $POOL_NAME/fixed/testvol@snap2 +log_must has_ivset_guid $POOL_NAME/fixed/testvol@snap3 + +# 5. Destroy the initial datasets and verify the errata is gone +log_must zfs destroy -r $POOL_NAME/testfs +log_must zfs destroy -r $POOL_NAME/testvol + +log_must zpool export $POOL_NAME +log_must zpool import -d /$TESTPOOL/ $POOL_NAME +log_mustnot eval "zpool status $POOL_NAME | grep -q 'Errata #4'" +log_mustnot eval "zpool status $POOL_NAME | grep -q ZFS-8000-ER" +log_pass "Errata 4 is properly handled" diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zpool_scrub/zpool_scrub_encrypted_unloaded.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zpool_scrub/zpool_scrub_encrypted_unloaded.ksh new file mode 100644 index 0000000000..483a683bd5 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zpool_scrub/zpool_scrub_encrypted_unloaded.ksh @@ -0,0 +1,71 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017 Datto, Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib + +# +# DESCRIPTION: +# Scrubs must work on an encrypted dataset with an unloaded key. +# +# STRATEGY: +# 1. Create an encrypted dataset +# 2. Generate data on the dataset +# 3. Unmount the encrypted dataset and unload its key +# 4. Start a scrub +# 5. Wait for the scrub to complete +# 6. Verify the scrub had no errors +# 7. Load the dataset key and mount it +# + +verify_runnable "global" + +function cleanup +{ + datasetexists $TESTPOOL/$TESTFS2 && \ + log_must zfs destroy $TESTPOOL/$TESTFS2 +} +log_onexit cleanup + +log_assert "Scrubs must work on an encrypted dataset with an unloaded key" + +log_must eval "echo 'password' | zfs create -o encryption=on" \ + "-o keyformat=passphrase $TESTPOOL/$TESTFS2" + +typeset mntpnt=$(get_prop mountpoint $TESTPOOL/$TESTFS2) +log_must mkfile 10m $mntpnt/file1 + +for i in 2..10; do + log_must mkfile 512b $mntpnt/file$i +done + +log_must zfs unmount $TESTPOOL/$TESTFS2 +log_must zfs unload-key $TESTPOOL/$TESTFS2 + +log_must zpool scrub $TESTPOOL + +while ! is_pool_scrubbed $TESTPOOL; do + sleep 1 +done + +log_must check_pool_status $TESTPOOL "scan" "with 0 errors" + +log_must eval "echo 'password' | zfs mount -l $TESTPOOL/$TESTFS2" + +log_pass "Scrubs work on an encrypted dataset with an unloaded key" diff --git a/usr/src/test/zfs-tests/tests/functional/rsend/rsend.kshlib b/usr/src/test/zfs-tests/tests/functional/rsend/rsend.kshlib index 5d2ba60f18..6b22cb709e 100644 --- a/usr/src/test/zfs-tests/tests/functional/rsend/rsend.kshlib +++ b/usr/src/test/zfs-tests/tests/functional/rsend/rsend.kshlib @@ -428,6 +428,108 @@ function rm_files } # +# Simulate a random set of operations which could be reasonably expected +# to occur on an average filesystem. +# +# $1 Number of files to modify +# $2 Maximum file size +# $3 File system to modify the file on +# $4 Enabled xattrs (optional) +# +function churn_files +{ + nfiles=$1 + maxsize=$2 + fs=$3 + xattrs=${4:-1} + + # + # Remove roughly half of the files in order to make it more + # likely that a dnode will be reallocated. + # + for ((i=0; i<$nfiles; i=i+1)); do + file_name="/$fs/file-$i" + + if [[ -e $file_name ]]; then + if [ $((RANDOM % 2)) -eq 0 ]; then + rm $file_name || \ + log_fail "Failed to remove $file_name" + fi + fi + done + + # + # Remount the filesystem to simulate normal usage. This resets + # the last allocated object id allowing for new objects to be + # reallocated in the locations of previously freed objects. + # + log_must zfs unmount $fs + log_must zfs mount $fs + + for i in {0..$nfiles}; do + file_name="/$fs/file-$i" + file_size=$((($RANDOM * $RANDOM % ($maxsize - 1)) + 1)) + + # + # When the file exists modify it in one of five ways to + # simulate normal usage: + # - (20%) Remove and set and extended attribute on the file + # - (20%) Overwrite the existing file + # - (20%) Truncate the existing file to a random length + # - (20%) Truncate the existing file to zero length + # - (20%) Remove the file + # + # Otherwise create the missing file. 20% of the created + # files will be small and use embedded block pointers, the + # remainder with have random sizes up to the maximum size. + # Three extended attributes are attached to all of the files. + # + if [[ -e $file_name ]]; then + value=$((RANDOM % 5)) + if [ $value -eq 0 -a $xattrs -ne 0 ]; then + attrname="testattr$((RANDOM % 3))" + attr -qr $attrname $file_name || \ + log_fail "Failed to remove $attrname" + attr -qs $attrname -V TestValue $file_name || \ + log_fail "Failed to set $attrname" + elif [ $value -eq 1 ]; then + dd if=/dev/urandom of=$file_name \ + bs=$file_size count=1 >/dev/null 2>&1 || \ + log_fail "Failed to overwrite $file_name" + elif [ $value -eq 2 ]; then + truncate -s $file_size $file_name || \ + log_fail "Failed to truncate $file_name" + elif [ $value -eq 3 ]; then + truncate -s 0 $file_name || \ + log_fail "Failed to truncate $file_name" + else + rm $file_name || \ + log_fail "Failed to remove $file_name" + fi + else + if [ $((RANDOM % 5)) -eq 0 ]; then + file_size=$((($RANDOM % 64) + 1)) + fi + + dd if=/dev/urandom of=$file_name \ + bs=$file_size count=1 >/dev/null 2>&1 || \ + log_fail "Failed to create $file_name" + + if [ $xattrs -ne 0 ]; then + for j in {0..2}; do + attrname="testattr$j" + attr -qs $attrname -V TestValue \ + $file_name || log_fail \ + "Failed to set $attrname" + done + fi + fi + done + + return 0 +} + +# # Mess up file contents # # $1 The file path @@ -606,7 +708,7 @@ function parse_dump if ($1 == "OBJECT") print $1" "$4 if ($1 == "FREEOBJECTS") print $1" "$4" "$7 if ($1 == "FREE") print $1" "$7" "$10 - if ($1 == "WRITE") print $1" "$15" "$18" "$21" "$24" "$27}' + if ($1 == "WRITE") print $1" "$15" "$21" "$24" "$27" "$30}' } # diff --git a/usr/src/test/zfs-tests/tests/functional/rsend/send_encrypted_files.ksh b/usr/src/test/zfs-tests/tests/functional/rsend/send_encrypted_files.ksh new file mode 100644 index 0000000000..c85dd40965 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/rsend/send_encrypted_files.ksh @@ -0,0 +1,130 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2018 by Datto Inc. All rights reserved. +# + +. $STF_SUITE/tests/functional/rsend/rsend.kshlib + +# +# DESCRIPTION: +# Verify that a raw zfs send and receive can deal with several different +# types of file layouts. +# +# STRATEGY: +# 1. Create a new encrypted filesystem +# 2. Add an empty file to the filesystem +# 3. Add a small 512 byte file to the filesystem +# 4. Add a larger 32M file to the filesystem +# 5. Add a large sparse file to the filesystem +# 6. Add 1000 empty files to the filesystem +# 7. Add a file with a large xattr value +# 8. Use xattrtest to create files with random xattrs (with and without xattrs=on) +# 9. Take a snapshot of the filesystem +# 10. Remove the 1000 empty files to the filesystem +# 11. Take another snapshot of the filesystem +# 12. Send and receive both snapshots +# 13. Mount the filesystem and check the contents +# + +verify_runnable "both" + +function cleanup +{ + datasetexists $TESTPOOL/$TESTFS2 && \ + log_must zfs destroy -r $TESTPOOL/$TESTFS2 + datasetexists $TESTPOOL/recv && \ + log_must zfs destroy -r $TESTPOOL/recv + [[ -f $keyfile ]] && log_must rm $keyfile + [[ -f $sendfile ]] && log_must rm $sendfile +} +log_onexit cleanup + +function recursive_cksum +{ + find $1 -type f -exec sha256sum {} \; | \ + sort -k 2 | awk '{ print $1 }' | sha256sum +} + +log_assert "Verify 'zfs send -w' works with many different file layouts" + +typeset keyfile=/$TESTPOOL/pkey +typeset sendfile=/$TESTPOOL/sendfile +typeset sendfile2=/$TESTPOOL/sendfile2 + +# Create an encrypted dataset +log_must eval "echo 'password' > $keyfile" +log_must zfs create -o encryption=on -o keyformat=passphrase \ + -o keylocation=file://$keyfile $TESTPOOL/$TESTFS2 + +# Create files with varied layouts on disk +log_must touch /$TESTPOOL/$TESTFS2/empty +log_must mkfile 512 /$TESTPOOL/$TESTFS2/small +log_must mkfile 32M /$TESTPOOL/$TESTFS2/full +log_must dd if=/dev/urandom of=/$TESTPOOL/$TESTFS2/sparse \ + bs=512 count=1 seek=1048576 >/dev/null 2>&1 + +log_must mkdir -p /$TESTPOOL/$TESTFS2/dir +for i in {1..1000}; do + log_must mkfile 512 /$TESTPOOL/$TESTFS2/dir/file-$i +done + +log_must mkdir -p /$TESTPOOL/$TESTFS2/xattrondir +log_must zfs set xattr=on $TESTPOOL/$TESTFS2 + +# XXX - the lines below (through the end of the file) that are commented out +# are differences from ZoL due to currently unsupported extended attribute code +# on illumos. +# log_must xattrtest -f 10 -x 3 -s 32768 -r -k -p /$TESTPOOL/$TESTFS2/xattrondir +# log_must mkdir -p /$TESTPOOL/$TESTFS2/xattrsadir +# log_must zfs set xattr=sa $TESTPOOL/$TESTFS2 +# log_must xattrtest -f 10 -x 3 -s 32768 -r -k -p /$TESTPOOL/$TESTFS2/xattrsadir + +# ZoL issue #7432 +# log_must zfs set compression=on xattr=sa $TESTPOOL/$TESTFS2 +# log_must touch /$TESTPOOL/$TESTFS2/attrs +# log_must eval "python -c 'print \"a\" * 4096' | \ +# attr -s bigval /$TESTPOOL/$TESTFS2/attrs" +# log_must zfs set compression=off xattr=on $TESTPOOL/$TESTFS2 + +log_must zfs snapshot $TESTPOOL/$TESTFS2@snap1 + +# Remove the empty files created in the first snapshot +for i in {1..1000}; do + log_must rm /$TESTPOOL/$TESTFS2/dir/file-$i +done +sync + +log_must zfs snapshot $TESTPOOL/$TESTFS2@snap2 +expected_cksum=$(recursive_cksum /$TESTPOOL/$TESTFS2) + +log_must eval "zfs send -wp $TESTPOOL/$TESTFS2@snap1 > $sendfile" +log_must eval "zfs send -wp -i @snap1 $TESTPOOL/$TESTFS2@snap2 > $sendfile2" + +log_must eval "zfs recv -F $TESTPOOL/recv < $sendfile" +log_must eval "zfs recv -F $TESTPOOL/recv < $sendfile2" +log_must zfs load-key $TESTPOOL/recv + +log_must zfs mount -a +actual_cksum=$(recursive_cksum /$TESTPOOL/recv) +[[ "$expected_cksum" != "$actual_cksum" ]] && \ + log_fail "Recursive checksums differ ($expected_cksum != $actual_cksum)" + +# log_must xattrtest -f 10 -o3 -y -p /$TESTPOOL/recv/xattrondir +# log_must xattrtest -f 10 -o3 -y -p /$TESTPOOL/recv/xattrsadir + +log_pass "Verified 'zfs send -w' works with many different file layouts" diff --git a/usr/src/test/zfs-tests/tests/functional/rsend/send_encrypted_hierarchy.ksh b/usr/src/test/zfs-tests/tests/functional/rsend/send_encrypted_hierarchy.ksh new file mode 100644 index 0000000000..5e19a6b6c0 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/rsend/send_encrypted_hierarchy.ksh @@ -0,0 +1,96 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017 by Datto Inc. All rights reserved. +# + +. $STF_SUITE/tests/functional/rsend/rsend.kshlib +. $STF_SUITE/tests/functional/cli_root/zfs_load-key/zfs_load-key_common.kshlib + +# +# DESCRIPTION: +# Raw recursive sends preserve filesystem structure. +# +# STRATEGY: +# 1. Create an encrypted filesystem with a clone and a child +# 2. Snapshot and send the filesystem tree +# 3. Verify that the filesystem structure was correctly received +# 4. Change the child to an encryption root and promote the clone +# 5. Snapshot and send the filesystem tree again +# 6. Verify that the new structure is received correctly +# + +verify_runnable "both" + +function cleanup +{ + log_must cleanup_pool $POOL + log_must cleanup_pool $POOL2 + log_must setup_test_model $POOL +} + +log_assert "Raw recursive sends preserve filesystem structure." +log_onexit cleanup + +# Create the filesystem heirarchy +log_must cleanup_pool $POOL +log_must eval "echo $PASSPHRASE | zfs create -o encryption=on" \ + "-o keyformat=passphrase $POOL/$FS" +log_must zfs snapshot $POOL/$FS@snap +log_must zfs clone $POOL/$FS@snap $POOL/clone +log_must zfs create $POOL/$FS/child + +# Back up the tree and verify the structure +log_must zfs snapshot -r $POOL@before +log_must eval "zfs send -wR $POOL@before > $BACKDIR/fs-before-R" +log_must eval "zfs receive -d -F $POOL2 < $BACKDIR/fs-before-R" +dstds=$(get_dst_ds $POOL/$FS $POOL2) +log_must cmp_ds_subs $POOL/$FS $dstds + +log_must verify_encryption_root $POOL/$FS $POOL/$FS +log_must verify_keylocation $POOL/$FS "prompt" +log_must verify_origin $POOL/$FS "-" + +log_must verify_encryption_root $POOL/clone $POOL/$FS +log_must verify_keylocation $POOL/clone "none" +log_must verify_origin $POOL/clone "$POOL/$FS@snap" + +log_must verify_encryption_root $POOL/$FS/child $POOL/$FS +log_must verify_keylocation $POOL/$FS/child "none" + +# Alter the heirarchy and re-send +log_must eval "echo $PASSPHRASE1 | zfs change-key -o keyformat=passphrase" \ + "$POOL/$FS/child" +log_must zfs promote $POOL/clone +log_must zfs snapshot -r $POOL@after +log_must eval "zfs send -wR -i $POOL@before $POOL@after >" \ + "$BACKDIR/fs-after-R" +log_must eval "zfs receive -d -F $POOL2 < $BACKDIR/fs-after-R" +log_must cmp_ds_subs $POOL/$FS $dstds + +log_must verify_encryption_root $POOL/$FS $POOL/clone +log_must verify_keylocation $POOL/$FS "none" +log_must verify_origin $POOL/$FS "$POOL/clone@snap" + +log_must verify_encryption_root $POOL/clone $POOL/clone +log_must verify_keylocation $POOL/clone "prompt" +log_must verify_origin $POOL/clone "-" + +log_must verify_encryption_root $POOL/$FS/child $POOL/$FS/child +log_must verify_keylocation $POOL/$FS/child "prompt" + +log_pass "Raw recursive sends preserve filesystem structure." diff --git a/usr/src/test/zfs-tests/tests/functional/rsend/send_encrypted_truncated_files.ksh b/usr/src/test/zfs-tests/tests/functional/rsend/send_encrypted_truncated_files.ksh new file mode 100644 index 0000000000..d701bcecb9 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/rsend/send_encrypted_truncated_files.ksh @@ -0,0 +1,118 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2018 by Datto Inc. All rights reserved. +# + +. $STF_SUITE/tests/functional/rsend/rsend.kshlib + +# +# DESCRIPTION: +# +# +# STRATEGY: +# 1. Create a new encrypted filesystem +# 2. Add a 4 files that are to be truncated later +# 3. Take a snapshot of the filesystem +# 4. Truncate one of the files from 32M to 128k +# 5. Truncate one of the files from 512k to 384k +# 6. Truncate one of the files from 512k to 0 to 384k via reallocation +# 7. Truncate one of the files from 1k to 0 to 512b via reallocation +# 8. Take another snapshot of the filesystem +# 9. Send and receive both snapshots +# 10. Mount the filesystem and check the contents +# + +verify_runnable "both" + +function cleanup +{ + datasetexists $TESTPOOL/$TESTFS2 && \ + log_must zfs destroy -r $TESTPOOL/$TESTFS2 + datasetexists $TESTPOOL/recv && \ + log_must zfs destroy -r $TESTPOOL/recv + [[ -f $keyfile ]] && log_must rm $keyfile + [[ -f $sendfile ]] && log_must rm $sendfile +} +log_onexit cleanup + +function recursive_cksum +{ + find $1 -type f -exec sha256sum {} \; | \ + sort -k 2 | awk '{ print $1 }' | sha256sum +} + +log_assert "Verify 'zfs send -w' works with many different file layouts" + +typeset keyfile=/$TESTPOOL/pkey +typeset sendfile=/$TESTPOOL/sendfile +typeset sendfile2=/$TESTPOOL/sendfile2 + +# Create an encrypted dataset +log_must eval "echo 'password' > $keyfile" +log_must zfs create -o encryption=on -o keyformat=passphrase \ + -o keylocation=file://$keyfile $TESTPOOL/$TESTFS2 + +# Explicitly set the recordsize since the truncation sizes below depend on +# this value being 128k. This is currently same as the default recordsize. +log_must zfs set recordsize=128k $TESTPOOL/$TESTFS2 + +# Create files with varied layouts on disk +log_must mkfile 32M /$TESTPOOL/$TESTFS2/truncated +log_must mkfile 524288 /$TESTPOOL/$TESTFS2/truncated2 +log_must mkfile 524288 /$TESTPOOL/$TESTFS2/truncated3 +log_must mkfile 1024 /$TESTPOOL/$TESTFS2/truncated4 + +log_must zfs snapshot $TESTPOOL/$TESTFS2@snap1 + +# +# Truncate files created in the first snapshot. The first tests +# truncating a large file to a single block. The second tests +# truncating one block off the end of a file without changing +# the required nlevels to hold it. The third tests handling +# of a maxblkid that is dropped and then raised again. The +# fourth tests an object that is truncated from a single block +# to a smaller single block. +# +log_must truncate -s 131072 /$TESTPOOL/$TESTFS2/truncated +log_must truncate -s 393216 /$TESTPOOL/$TESTFS2/truncated2 +log_must rm -f /$TESTPOOL/$TESTFS2/truncated3 +log_must rm -f /$TESTPOOL/$TESTFS2/truncated4 +log_must zpool sync $TESTPOOL +log_must zfs umount $TESTPOOL/$TESTFS2 +log_must zfs mount $TESTPOOL/$TESTFS2 +log_must dd if=/dev/urandom of=/$TESTPOOL/$TESTFS2/truncated3 \ + bs=128k count=3 iflag=fullblock +log_must dd if=/dev/urandom of=/$TESTPOOL/$TESTFS2/truncated4 \ + bs=512 count=1 iflag=fullblock + +log_must zfs snapshot $TESTPOOL/$TESTFS2@snap2 +expected_cksum=$(recursive_cksum /$TESTPOOL/$TESTFS2) + +log_must eval "zfs send -wp $TESTPOOL/$TESTFS2@snap1 > $sendfile" +log_must eval "zfs send -wp -i @snap1 $TESTPOOL/$TESTFS2@snap2 > $sendfile2" + +log_must eval "zfs recv -F $TESTPOOL/recv < $sendfile" +log_must eval "zfs recv -F $TESTPOOL/recv < $sendfile2" +log_must zfs load-key $TESTPOOL/recv + +log_must zfs mount -a +actual_cksum=$(recursive_cksum /$TESTPOOL/recv) +[[ "$expected_cksum" != "$actual_cksum" ]] && \ + log_fail "Recursive checksums differ ($expected_cksum != $actual_cksum)" + +log_pass "Verified 'zfs send -w' works with many different file layouts" diff --git a/usr/src/test/zfs-tests/tests/functional/rsend/send_freeobjects.ksh b/usr/src/test/zfs-tests/tests/functional/rsend/send_freeobjects.ksh index 6533352a9a..6533352a9a 100755..100644 --- a/usr/src/test/zfs-tests/tests/functional/rsend/send_freeobjects.ksh +++ b/usr/src/test/zfs-tests/tests/functional/rsend/send_freeobjects.ksh diff --git a/usr/src/test/zfs-tests/tests/functional/rsend/send_mixed_raw.ksh b/usr/src/test/zfs-tests/tests/functional/rsend/send_mixed_raw.ksh new file mode 100755 index 0000000000..eea535af11 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/rsend/send_mixed_raw.ksh @@ -0,0 +1,118 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# +# +# Copyright (c) 2019 Datto, Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib + +# +# DESCRIPTION: +# Verify that 'zfs receive' produces an error when mixing +# raw and non-raw sends in a way that would break IV set +# consistency. +# +# STRATEGY: +# 1. Create an initial dataset with 3 snapshots. +# 2. Perform a raw send of the first snapshot to 2 other datasets. +# 3. Perform a non-raw send of the second snapshot to one of +# the other datasets. Perform a raw send from this dataset to +# the last one. +# 4. Attempt to raw send the final snapshot of the first dataset +# to the other 2 datasets, which should fail. +# 5. Repeat steps 1-4, but using bookmarks for incremental sends. +# +# +# A B C notes +# ------------------------------------------------------------------------------ +# snap1 ---raw---> snap1 --raw--> snap1 # all snaps initialized via raw send +# snap2 -non-raw-> snap2 --raw--> snap2 # A sends non-raw to B, B sends raw to C +# snap3 ------------raw---------> snap3 # attempt send to C (should fail) +# + + +verify_runnable "both" + +function cleanup +{ + datasetexists $TESTPOOL/$TESTFS3 && \ + log_must zfs destroy -r $TESTPOOL/$TESTFS3 + datasetexists $TESTPOOL/$TESTFS2 && \ + log_must zfs destroy -r $TESTPOOL/$TESTFS2 + datasetexists $TESTPOOL/$TESTFS1 && \ + log_must zfs destroy -r $TESTPOOL/$TESTFS1 +} +log_onexit cleanup + +log_assert "Mixing raw and non-raw receives should fail" + +typeset passphrase="password" + +log_must eval "echo $passphrase | zfs create -o encryption=on" \ + "-o keyformat=passphrase $TESTPOOL/$TESTFS1" + +log_must zfs snapshot $TESTPOOL/$TESTFS1@1 +log_must touch /$TESTPOOL/$TESTFS1/a +log_must zfs snapshot $TESTPOOL/$TESTFS1@2 +log_must touch /$TESTPOOL/$TESTFS1/b +log_must zfs snapshot $TESTPOOL/$TESTFS1@3 + +# Testing with snapshots +log_must eval "zfs send -w $TESTPOOL/$TESTFS1@1 |" \ + "zfs receive $TESTPOOL/$TESTFS2" +log_must eval "echo $passphrase | zfs load-key $TESTPOOL/$TESTFS2" +log_must eval "zfs send -w $TESTPOOL/$TESTFS2@1 |" \ + "zfs receive $TESTPOOL/$TESTFS3" +log_must eval "echo $passphrase | zfs load-key $TESTPOOL/$TESTFS3" + +log_must eval "zfs send -i $TESTPOOL/$TESTFS1@1 $TESTPOOL/$TESTFS1@2 |" \ + "zfs receive $TESTPOOL/$TESTFS2" +log_must eval "zfs send -w -i $TESTPOOL/$TESTFS2@1 $TESTPOOL/$TESTFS2@2 |" \ + "zfs receive $TESTPOOL/$TESTFS3" + +log_mustnot eval "zfs send -w -i $TESTPOOL/$TESTFS1@2 $TESTPOOL/$TESTFS1@3 |" \ + "zfs receive $TESTPOOL/$TESTFS2" +log_mustnot eval "zfs send -w -i $TESTPOOL/$TESTFS2@2 $TESTPOOL/$TESTFS2@3 |" \ + "zfs receive $TESTPOOL/$TESTFS3" + +log_must zfs destroy -r $TESTPOOL/$TESTFS3 +log_must zfs destroy -r $TESTPOOL/$TESTFS2 + +# Testing with bookmarks +log_must zfs bookmark $TESTPOOL/$TESTFS1@1 $TESTPOOL/$TESTFS1#b1 +log_must zfs bookmark $TESTPOOL/$TESTFS1@2 $TESTPOOL/$TESTFS1#b2 + +log_must eval "zfs send -w $TESTPOOL/$TESTFS1@1 |" \ + "zfs receive $TESTPOOL/$TESTFS2" +log_must eval "echo $passphrase | zfs load-key $TESTPOOL/$TESTFS2" + +log_must zfs bookmark $TESTPOOL/$TESTFS2@1 $TESTPOOL/$TESTFS2#b1 + +log_must eval "zfs send -w $TESTPOOL/$TESTFS2@1 |" \ + "zfs receive $TESTPOOL/$TESTFS3" +log_must eval "echo $passphrase | zfs load-key $TESTPOOL/$TESTFS3" + +log_must eval "zfs send -i $TESTPOOL/$TESTFS1#b1 $TESTPOOL/$TESTFS1@2 |" \ + "zfs receive $TESTPOOL/$TESTFS2" +log_must eval "zfs send -w -i $TESTPOOL/$TESTFS2#b1 $TESTPOOL/$TESTFS2@2 |" \ + "zfs receive $TESTPOOL/$TESTFS3" + +log_mustnot eval "zfs send -w -i $TESTPOOL/$TESTFS1#b2" \ + "$TESTPOOL/$TESTFS1@3 | zfs receive $TESTPOOL/$TESTFS2" +log_mustnot eval "zfs send -w -i $TESTPOOL/$TESTFS2#b2" \ + "$TESTPOOL/$TESTFS2@3 | zfs receive $TESTPOOL/$TESTFS3" + +log_pass "Mixing raw and non-raw receives fail as expected" diff --git a/usr/src/test/zfs-tests/tests/functional/rsend/send_realloc_dnode_size.ksh b/usr/src/test/zfs-tests/tests/functional/rsend/send_realloc_dnode_size.ksh index 12a72fa092..12a72fa092 100755..100644 --- a/usr/src/test/zfs-tests/tests/functional/rsend/send_realloc_dnode_size.ksh +++ b/usr/src/test/zfs-tests/tests/functional/rsend/send_realloc_dnode_size.ksh diff --git a/usr/src/test/zfs-tests/tests/functional/rsend/send_realloc_encrypted_files.ksh b/usr/src/test/zfs-tests/tests/functional/rsend/send_realloc_encrypted_files.ksh new file mode 100644 index 0000000000..0649beaa35 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/rsend/send_realloc_encrypted_files.ksh @@ -0,0 +1,112 @@ +#!/bin/ksh + +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2019 by Lawrence Livermore National Security, LLC. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/rsend/rsend.kshlib + +# +# Description: +# Verify encrypted raw incremental receives handle dnode reallocation. + +# Strategy: +# 1. Create a pool containing an encrypted filesystem. +# 2. Use 'zfs send -wp' to perform a raw send of the initial filesystem. +# 3. Repeat the followings steps N times to verify raw incremental receives. +# a) Randomly change several key dataset properties. +# b) Modify the contents of the filesystem such that dnode reallocation +# is likely during the 'zfs receive', and receive_object() exercises +# as much of its functionality as possible. +# c) Create a new snapshot and generate an raw incremental stream. +# d) Receive the raw incremental stream and verify the received contents. +# e) Destroy the incremental stream and old snapshot. +# + +verify_runnable "both" + +log_assert "Verify encrypted raw incremental receive handles reallocation" + +function cleanup +{ + rm -f $BACKDIR/fs@* + rm -f $keyfile + destroy_dataset $POOL/fs "-rR" + destroy_dataset $POOL/newfs "-rR" +} + +log_onexit cleanup + +typeset keyfile=/$TESTPOOL/pkey + +# Create an encrypted dataset +log_must eval "echo 'password' > $keyfile" +log_must zfs create -o encryption=on -o keyformat=passphrase \ + -o keylocation=file://$keyfile $POOL/fs + +last_snap=1 +log_must zfs snapshot $POOL/fs@snap${last_snap} +log_must eval "zfs send -wp $POOL/fs@snap${last_snap} \ + >$BACKDIR/fs@snap${last_snap}" +log_must eval "zfs recv $POOL/newfs < $BACKDIR/fs@snap${last_snap}" + +# Set atime=off to prevent the recursive_cksum from modifying newfs. +log_must zfs set atime=off $POOL/newfs + +for i in {1..5}; do + # Randomly modify several dataset properties in order to generate + # more interesting incremental send streams. + rand_set_prop $POOL/fs checksum "off" "fletcher4" "sha256" + rand_set_prop $POOL/fs compression "off" "lzjb" "gzip" "lz4" + rand_set_prop $POOL/fs recordsize "32K" "128K" + rand_set_prop $POOL/fs dnodesize "legacy" "auto" "4k" + rand_set_prop $POOL/fs xattr "on" "sa" + + # Churn the filesystem in such a way that we're likely to be both + # allocating and reallocating objects in the incremental stream. + # + # Disable xattrs until the following spill block issue is resolved: + # https://github.com/openzfs/openzfs/pull/705 + # + log_must churn_files 1000 524288 $POOL/fs 0 + expected_cksum=$(recursive_cksum /$fs) + + # Create a snapshot and use it to send an incremental stream. + this_snap=$((last_snap + 1)) + log_must zfs snapshot $POOL/fs@snap${this_snap} + log_must eval "zfs send -wp -i $POOL/fs@snap${last_snap} \ + $POOL/fs@snap${this_snap} > $BACKDIR/fs@snap${this_snap}" + + # Receive the incremental stream and verify the received contents. + log_must eval "zfs recv -Fu $POOL/newfs < $BACKDIR/fs@snap${this_snap}" + + log_must zfs load-key $POOL/newfs + log_must zfs mount $POOL/newfs + actual_cksum=$(recursive_cksum /$POOL/newfs) + log_must zfs umount $POOL/newfs + log_must zfs unload-key $POOL/newfs + + if [[ "$expected_cksum" != "$actual_cksum" ]]; then + log_fail "Checksums differ ($expected_cksum != $actual_cksum)" + fi + + # Destroy the incremental stream and old snapshot. + rm -f $BACKDIR/fs@snap${last_snap} + log_must zfs destroy $POOL/fs@snap${last_snap} + log_must zfs destroy $POOL/newfs@snap${last_snap} + last_snap=$this_snap +done + +log_pass "Verify encrypted raw incremental receive handles reallocation" diff --git a/usr/src/uts/common/Makefile.files b/usr/src/uts/common/Makefile.files index 52b9f08c74..5c7ed57fe3 100644 --- a/usr/src/uts/common/Makefile.files +++ b/usr/src/uts/common/Makefile.files @@ -1342,6 +1342,7 @@ ZFS_COMMON_OBJS += \ dnode_sync.o \ dsl_bookmark.o \ dsl_dir.o \ + dsl_crypt.o \ dsl_dataset.o \ dsl_deadlist.o \ dsl_destroy.o \ @@ -1354,6 +1355,7 @@ ZFS_COMMON_OBJS += \ dsl_scan.o \ zfeature.o \ gzip.o \ + hkdf.o \ lz4.o \ lzjb.o \ metaslab.o \ @@ -1409,6 +1411,7 @@ ZFS_COMMON_OBJS += \ zio.o \ zio_checksum.o \ zio_compress.o \ + zio_crypt.o \ zio_inject.o \ zle.o \ zrlock.o \ diff --git a/usr/src/uts/common/crypto/core/kcf_prov_lib.c b/usr/src/uts/common/crypto/core/kcf_prov_lib.c index 65322bfb3c..8982e8425c 100644 --- a/usr/src/uts/common/crypto/core/kcf_prov_lib.c +++ b/usr/src/uts/common/crypto/core/kcf_prov_lib.c @@ -246,7 +246,7 @@ crypto_update_uio(void *ctx, crypto_data_t *input, crypto_data_t *output, offset >= uiop->uio_iov[vec_idx].iov_len; offset -= uiop->uio_iov[vec_idx++].iov_len) ; - if (vec_idx == uiop->uio_iovcnt) { + if (vec_idx == uiop->uio_iovcnt && length > 0) { /* * The caller specified an offset that is larger than the * total size of the buffers it provided. diff --git a/usr/src/uts/common/fs/zfs/abd.c b/usr/src/uts/common/fs/zfs/abd.c index 0ab3513718..5417514e41 100644 --- a/usr/src/uts/common/fs/zfs/abd.c +++ b/usr/src/uts/common/fs/zfs/abd.c @@ -427,8 +427,9 @@ abd_alloc_for_io(size_t size, boolean_t is_metadata) * buffer data with sabd. Use abd_put() to free. sabd must not be freed while * any derived ABDs exist. */ -abd_t * -abd_get_offset(abd_t *sabd, size_t off) +/* ARGSUSED */ +static inline abd_t * +abd_get_offset_impl(abd_t *sabd, size_t off, size_t size) { abd_t *abd; @@ -480,6 +481,25 @@ abd_get_offset(abd_t *sabd, size_t off) return (abd); } +abd_t * +abd_get_offset(abd_t *sabd, size_t off) +{ + size_t size = sabd->abd_size > off ? sabd->abd_size - off : 0; + + VERIFY3U(size, >, 0); + + return (abd_get_offset_impl(sabd, off, size)); +} + +abd_t * +abd_get_offset_size(abd_t *sabd, size_t off, size_t size) +{ + ASSERT3U(off + size, <=, sabd->abd_size); + + return (abd_get_offset_impl(sabd, off, size)); +} + + /* * Allocate a linear ABD structure for buf. You must free this with abd_put() * since the resulting ABD doesn't own its own buffer. diff --git a/usr/src/uts/common/fs/zfs/arc.c b/usr/src/uts/common/fs/zfs/arc.c index 3a07d72d93..90f5314d81 100644 --- a/usr/src/uts/common/fs/zfs/arc.c +++ b/usr/src/uts/common/fs/zfs/arc.c @@ -250,6 +250,21 @@ * ARC is disabled, then the L2ARC's block must be transformed to look * like the physical block in the main data pool before comparing the * checksum and determining its validity. + * + * The L1ARC has a slightly different system for storing encrypted data. + * Raw (encrypted + possibly compressed) data has a few subtle differences from + * data that is just compressed. The biggest difference is that it is not + * possible to decrypt encrypted data (or visa versa) if the keys aren't loaded. + * The other difference is that encryption cannot be treated as a suggestion. + * If a caller would prefer compressed data, but they actually wind up with + * uncompressed data the worst thing that could happen is there might be a + * performance hit. If the caller requests encrypted data, however, we must be + * sure they actually get it or else secret information could be leaked. Raw + * data is stored in hdr->b_crypt_hdr.b_rabd. An encrypted header, therefore, + * may have both an encrypted version and a decrypted version of its data at + * once. When a caller needs a raw arc_buf_t, it is allocated and the data is + * copied out of this header. To avoid complications with b_pabd, raw buffers + * cannot be shared. */ #include <sys/spa.h> @@ -266,6 +281,8 @@ #include <sys/zio_checksum.h> #include <sys/multilist.h> #include <sys/abd.h> +#include <sys/zil.h> +#include <sys/fm/fs/zfs.h> #ifdef _KERNEL #include <sys/vmsystm.h> #include <vm/anon.h> @@ -481,7 +498,7 @@ typedef struct arc_stats { kstat_named_t arcstat_evict_skip; /* * Number of times arc_evict_state() was unable to evict enough - * buffers to reach it's target amount. + * buffers to reach its target amount. */ kstat_named_t arcstat_evict_not_enough; kstat_named_t arcstat_evict_l2_cached; @@ -883,7 +900,10 @@ struct arc_callback { void *acb_private; arc_read_done_func_t *acb_done; arc_buf_t *acb_buf; + boolean_t acb_encrypted; boolean_t acb_compressed; + boolean_t acb_noauth; + zbookmark_phys_t acb_zb; zio_t *acb_zio_dummy; zio_t *acb_zio_head; arc_callback_t *acb_next; @@ -963,6 +983,36 @@ typedef struct l1arc_buf_hdr { abd_t *b_pabd; } l1arc_buf_hdr_t; +/* + * Encrypted blocks will need to be stored encrypted on the L2ARC + * disk as they appear in the main pool. In order for this to work we + * need to pass around the encryption parameters so they can be used + * to write data to the L2ARC. This struct is only defined in the + * arc_buf_hdr_t if the L1 header is defined and has the ARC_FLAG_ENCRYPTED + * flag set. + */ +typedef struct arc_buf_hdr_crypt { + abd_t *b_rabd; /* raw encrypted data */ + dmu_object_type_t b_ot; /* object type */ + uint32_t b_ebufcnt; /* number or encryped buffers */ + + /* dsobj for looking up encryption key for l2arc encryption */ + uint64_t b_dsobj; /* for looking up key */ + + /* encryption parameters */ + uint8_t b_salt[ZIO_DATA_SALT_LEN]; + uint8_t b_iv[ZIO_DATA_IV_LEN]; + + /* + * Technically this could be removed since we will always be able to + * get the mac from the bp when we need it. However, it is inconvenient + * for callers of arc code to have to pass a bp in all the time. This + * also allows us to assert that L2ARC data is properly encrypted to + * match the data in the main storage pool. + */ + uint8_t b_mac[ZIO_DATA_MAC_LEN]; +} arc_buf_hdr_crypt_t; + typedef struct l2arc_dev l2arc_dev_t; typedef struct l2arc_buf_hdr { @@ -1013,6 +1063,11 @@ struct arc_buf_hdr { l2arc_buf_hdr_t b_l2hdr; /* L1ARC fields. Undefined when in l2arc_only state */ l1arc_buf_hdr_t b_l1hdr; + /* + * Encryption parameters. Defined only when ARC_FLAG_ENCRYPTED + * is set and the L1 header exists. + */ + arc_buf_hdr_crypt_t b_crypt_hdr; }; #define GHOST_STATE(state) \ @@ -1035,6 +1090,8 @@ struct arc_buf_hdr { #define HDR_L2_WRITING(hdr) ((hdr)->b_flags & ARC_FLAG_L2_WRITING) #define HDR_L2_EVICTED(hdr) ((hdr)->b_flags & ARC_FLAG_L2_EVICTED) #define HDR_L2_WRITE_HEAD(hdr) ((hdr)->b_flags & ARC_FLAG_L2_WRITE_HEAD) +#define HDR_PROTECTED(hdr) ((hdr)->b_flags & ARC_FLAG_PROTECTED) +#define HDR_NOAUTH(hdr) ((hdr)->b_flags & ARC_FLAG_NOAUTH) #define HDR_SHARED_DATA(hdr) ((hdr)->b_flags & ARC_FLAG_SHARED_DATA) #define HDR_ISTYPE_METADATA(hdr) \ @@ -1043,6 +1100,13 @@ struct arc_buf_hdr { #define HDR_HAS_L1HDR(hdr) ((hdr)->b_flags & ARC_FLAG_HAS_L1HDR) #define HDR_HAS_L2HDR(hdr) ((hdr)->b_flags & ARC_FLAG_HAS_L2HDR) +#define HDR_HAS_RABD(hdr) \ + (HDR_HAS_L1HDR(hdr) && HDR_PROTECTED(hdr) && \ + (hdr)->b_crypt_hdr.b_rabd != NULL) +#define HDR_ENCRYPTED(hdr) \ + (HDR_PROTECTED(hdr) && DMU_OT_IS_ENCRYPTED((hdr)->b_crypt_hdr.b_ot)) +#define HDR_AUTHENTICATED(hdr) \ + (HDR_PROTECTED(hdr) && !DMU_OT_IS_ENCRYPTED((hdr)->b_crypt_hdr.b_ot)) /* For storing compression mode in b_flags */ #define HDR_COMPRESS_OFFSET (highbit64(ARC_FLAG_COMPRESS_0) - 1) @@ -1055,12 +1119,14 @@ struct arc_buf_hdr { #define ARC_BUF_LAST(buf) ((buf)->b_next == NULL) #define ARC_BUF_SHARED(buf) ((buf)->b_flags & ARC_BUF_FLAG_SHARED) #define ARC_BUF_COMPRESSED(buf) ((buf)->b_flags & ARC_BUF_FLAG_COMPRESSED) +#define ARC_BUF_ENCRYPTED(buf) ((buf)->b_flags & ARC_BUF_FLAG_ENCRYPTED) /* * Other sizes */ -#define HDR_FULL_SIZE ((int64_t)sizeof (arc_buf_hdr_t)) +#define HDR_FULL_CRYPT_SIZE ((int64_t)sizeof (arc_buf_hdr_t)) +#define HDR_FULL_SIZE ((int64_t)offsetof(arc_buf_hdr_t, b_crypt_hdr)) #define HDR_L2ONLY_SIZE ((int64_t)offsetof(arc_buf_hdr_t, b_l1hdr)) /* @@ -1174,13 +1240,21 @@ static kcondvar_t l2arc_feed_thr_cv; static uint8_t l2arc_thread_exit; static abd_t *arc_get_data_abd(arc_buf_hdr_t *, uint64_t, void *); +typedef enum arc_fill_flags { + ARC_FILL_LOCKED = 1 << 0, /* hdr lock is held */ + ARC_FILL_COMPRESSED = 1 << 1, /* fill with compressed data */ + ARC_FILL_ENCRYPTED = 1 << 2, /* fill with encrypted data */ + ARC_FILL_NOAUTH = 1 << 3, /* don't attempt to authenticate */ + ARC_FILL_IN_PLACE = 1 << 4 /* fill in place (special case) */ +} arc_fill_flags_t; + static void *arc_get_data_buf(arc_buf_hdr_t *, uint64_t, void *); static void arc_get_data_impl(arc_buf_hdr_t *, uint64_t, void *); static void arc_free_data_abd(arc_buf_hdr_t *, abd_t *, uint64_t, void *); static void arc_free_data_buf(arc_buf_hdr_t *, void *, uint64_t, void *); static void arc_free_data_impl(arc_buf_hdr_t *hdr, uint64_t size, void *tag); -static void arc_hdr_free_pabd(arc_buf_hdr_t *); -static void arc_hdr_alloc_pabd(arc_buf_hdr_t *); +static void arc_hdr_free_pabd(arc_buf_hdr_t *, boolean_t); +static void arc_hdr_alloc_pabd(arc_buf_hdr_t *, boolean_t); static void arc_access(arc_buf_hdr_t *, kmutex_t *); static boolean_t arc_is_overflowing(); static void arc_buf_watch(arc_buf_t *); @@ -1323,7 +1397,9 @@ buf_hash_remove(arc_buf_hdr_t *hdr) /* * Global data structures and functions for the buf kmem cache. */ + static kmem_cache_t *hdr_full_cache; +static kmem_cache_t *hdr_full_crypt_cache; static kmem_cache_t *hdr_l2only_cache; static kmem_cache_t *buf_cache; @@ -1337,6 +1413,7 @@ buf_fini(void) for (i = 0; i < BUF_LOCKS; i++) mutex_destroy(&buf_hash_table.ht_locks[i].ht_lock); kmem_cache_destroy(hdr_full_cache); + kmem_cache_destroy(hdr_full_crypt_cache); kmem_cache_destroy(hdr_l2only_cache); kmem_cache_destroy(buf_cache); } @@ -1352,6 +1429,7 @@ hdr_full_cons(void *vbuf, void *unused, int kmflag) arc_buf_hdr_t *hdr = vbuf; bzero(hdr, HDR_FULL_SIZE); + hdr->b_l1hdr.b_byteswap = DMU_BSWAP_NUMFUNCS; cv_init(&hdr->b_l1hdr.b_cv, NULL, CV_DEFAULT, NULL); zfs_refcount_create(&hdr->b_l1hdr.b_refcnt); mutex_init(&hdr->b_l1hdr.b_freeze_lock, NULL, MUTEX_DEFAULT, NULL); @@ -1363,6 +1441,19 @@ hdr_full_cons(void *vbuf, void *unused, int kmflag) /* ARGSUSED */ static int +hdr_full_crypt_cons(void *vbuf, void *unused, int kmflag) +{ + arc_buf_hdr_t *hdr = vbuf; + + (void) hdr_full_cons(vbuf, unused, kmflag); + bzero(&hdr->b_crypt_hdr, sizeof (hdr->b_crypt_hdr)); + arc_space_consume(sizeof (hdr->b_crypt_hdr), ARC_SPACE_HDRS); + + return (0); +} + +/* ARGSUSED */ +static int hdr_l2only_cons(void *vbuf, void *unused, int kmflag) { arc_buf_hdr_t *hdr = vbuf; @@ -1406,6 +1497,16 @@ hdr_full_dest(void *vbuf, void *unused) /* ARGSUSED */ static void +hdr_full_crypt_dest(void *vbuf, void *unused) +{ + arc_buf_hdr_t *hdr = vbuf; + + hdr_full_dest(hdr, unused); + arc_space_return(sizeof (hdr->b_crypt_hdr), ARC_SPACE_HDRS); +} + +/* ARGSUSED */ +static void hdr_l2only_dest(void *vbuf, void *unused) { arc_buf_hdr_t *hdr = vbuf; @@ -1467,6 +1568,9 @@ retry: hdr_full_cache = kmem_cache_create("arc_buf_hdr_t_full", HDR_FULL_SIZE, 0, hdr_full_cons, hdr_full_dest, hdr_recl, NULL, NULL, 0); + hdr_full_crypt_cache = kmem_cache_create("arc_buf_hdr_t_full_crypt", + HDR_FULL_CRYPT_SIZE, 0, hdr_full_crypt_cons, hdr_full_crypt_dest, + hdr_recl, NULL, NULL, 0); hdr_l2only_cache = kmem_cache_create("arc_buf_hdr_t_l2only", HDR_L2ONLY_SIZE, 0, hdr_l2only_cons, hdr_l2only_dest, hdr_recl, NULL, NULL, 0); @@ -1501,6 +1605,47 @@ arc_buf_lsize(arc_buf_t *buf) return (HDR_GET_LSIZE(buf->b_hdr)); } +/* + * This function will return B_TRUE if the buffer is encrypted in memory. + * This buffer can be decrypted by calling arc_untransform(). + */ +boolean_t +arc_is_encrypted(arc_buf_t *buf) +{ + return (ARC_BUF_ENCRYPTED(buf) != 0); +} + +/* + * Returns B_TRUE if the buffer represents data that has not had its MAC + * verified yet. + */ +boolean_t +arc_is_unauthenticated(arc_buf_t *buf) +{ + return (HDR_NOAUTH(buf->b_hdr) != 0); +} + +void +arc_get_raw_params(arc_buf_t *buf, boolean_t *byteorder, uint8_t *salt, + uint8_t *iv, uint8_t *mac) +{ + arc_buf_hdr_t *hdr = buf->b_hdr; + + ASSERT(HDR_PROTECTED(hdr)); + + bcopy(hdr->b_crypt_hdr.b_salt, salt, ZIO_DATA_SALT_LEN); + bcopy(hdr->b_crypt_hdr.b_iv, iv, ZIO_DATA_IV_LEN); + bcopy(hdr->b_crypt_hdr.b_mac, mac, ZIO_DATA_MAC_LEN); + *byteorder = (hdr->b_l1hdr.b_byteswap == DMU_BSWAP_NUMFUNCS) ? + /* CONSTCOND */ + ZFS_HOST_BYTEORDER : !ZFS_HOST_BYTEORDER; +} + +/* + * Indicates how this buffer is compressed in memory. If it is not compressed + * the value will be ZIO_COMPRESS_OFF. It can be made normally readable with + * arc_untransform() as long as it is also unencrypted. + */ enum zio_compress arc_get_compression(arc_buf_t *buf) { @@ -1510,6 +1655,18 @@ arc_get_compression(arc_buf_t *buf) #define ARC_MINTIME (hz>>4) /* 62 ms */ +/* + * Return the compression algorithm used to store this data in the ARC. If ARC + * compression is enabled or this is an encrypted block, this will be the same + * as what's used to store it on-disk. Otherwise, this will be ZIO_COMPRESS_OFF. + */ +static inline enum zio_compress +arc_hdr_get_compress(arc_buf_hdr_t *hdr) +{ + return (HDR_COMPRESSION_ENABLED(hdr) ? + HDR_GET_COMPRESS(hdr) : ZIO_COMPRESS_OFF); +} + static inline boolean_t arc_buf_is_shared(arc_buf_t *buf) { @@ -1537,6 +1694,7 @@ static inline void arc_cksum_free(arc_buf_hdr_t *hdr) { ASSERT(HDR_HAS_L1HDR(hdr)); + mutex_enter(&hdr->b_l1hdr.b_freeze_lock); if (hdr->b_l1hdr.b_freeze_cksum != NULL) { kmem_free(hdr->b_l1hdr.b_freeze_cksum, sizeof (zio_cksum_t)); @@ -1547,6 +1705,7 @@ arc_cksum_free(arc_buf_hdr_t *hdr) /* * Return true iff at least one of the bufs on hdr is not compressed. + * Encrypted buffers count as compressed. */ static boolean_t arc_hdr_has_uncompressed_buf(arc_buf_hdr_t *hdr) @@ -1593,6 +1752,11 @@ arc_cksum_verify(arc_buf_t *buf) mutex_exit(&hdr->b_l1hdr.b_freeze_lock); } +/* + * This function makes the assumption that data stored in the L2ARC + * will be transformed exactly as it is in the main pool. Because of + * this we can verify the checksum against the reading process's bp. + */ static boolean_t arc_cksum_is_equal(arc_buf_hdr_t *hdr, zio_t *zio) { @@ -1689,6 +1853,7 @@ arc_cksum_compute(arc_buf_t *buf) return; } + ASSERT(!ARC_BUF_ENCRYPTED(buf)); ASSERT(!ARC_BUF_COMPRESSED(buf)); hdr->b_l1hdr.b_freeze_cksum = kmem_alloc(sizeof (zio_cksum_t), KM_SLEEP); @@ -1881,15 +2046,14 @@ arc_hdr_set_compress(arc_buf_hdr_t *hdr, enum zio_compress cmp) */ if (!zfs_compressed_arc_enabled || HDR_GET_PSIZE(hdr) == 0) { arc_hdr_clear_flags(hdr, ARC_FLAG_COMPRESSED_ARC); - HDR_SET_COMPRESS(hdr, ZIO_COMPRESS_OFF); ASSERT(!HDR_COMPRESSION_ENABLED(hdr)); - ASSERT3U(HDR_GET_COMPRESS(hdr), ==, ZIO_COMPRESS_OFF); } else { arc_hdr_set_flags(hdr, ARC_FLAG_COMPRESSED_ARC); - HDR_SET_COMPRESS(hdr, cmp); - ASSERT3U(HDR_GET_COMPRESS(hdr), ==, cmp); ASSERT(HDR_COMPRESSION_ENABLED(hdr)); } + + HDR_SET_COMPRESS(hdr, cmp); + ASSERT3U(HDR_GET_COMPRESS(hdr), ==, cmp); } /* @@ -1921,15 +2085,250 @@ arc_buf_try_copy_decompressed_data(arc_buf_t *buf) } /* + * Note: With encryption support, the following assertion is no longer + * necessarily valid. If we receive two back to back raw snapshots + * (send -w), the second receive can use a hdr with a cksum already + * calculated. This happens via: + * dmu_recv_stream() -> receive_read_record() -> arc_loan_raw_buf() + * The rsend/send_mixed_raw test case exercises this code path. + * * There were no decompressed bufs, so there should not be a * checksum on the hdr either. + * EQUIV(!copied, hdr->b_l1hdr.b_freeze_cksum == NULL); */ - EQUIV(!copied, hdr->b_l1hdr.b_freeze_cksum == NULL); return (copied); } /* + * Return the size of the block, b_pabd, that is stored in the arc_buf_hdr_t. + */ +static uint64_t +arc_hdr_size(arc_buf_hdr_t *hdr) +{ + uint64_t size; + + if (arc_hdr_get_compress(hdr) != ZIO_COMPRESS_OFF && + HDR_GET_PSIZE(hdr) > 0) { + size = HDR_GET_PSIZE(hdr); + } else { + ASSERT3U(HDR_GET_LSIZE(hdr), !=, 0); + size = HDR_GET_LSIZE(hdr); + } + return (size); +} + +static int +arc_hdr_authenticate(arc_buf_hdr_t *hdr, spa_t *spa, uint64_t dsobj) +{ + int ret; + uint64_t csize; + uint64_t lsize = HDR_GET_LSIZE(hdr); + uint64_t psize = HDR_GET_PSIZE(hdr); + void *tmpbuf = NULL; + abd_t *abd = hdr->b_l1hdr.b_pabd; + + ASSERT(HDR_LOCK(hdr) == NULL || MUTEX_HELD(HDR_LOCK(hdr))); + ASSERT(HDR_AUTHENTICATED(hdr)); + ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL); + + /* + * The MAC is calculated on the compressed data that is stored on disk. + * However, if compressed arc is disabled we will only have the + * decompressed data available to us now. Compress it into a temporary + * abd so we can verify the MAC. The performance overhead of this will + * be relatively low, since most objects in an encrypted objset will + * be encrypted (instead of authenticated) anyway. + */ + if (HDR_GET_COMPRESS(hdr) != ZIO_COMPRESS_OFF && + !HDR_COMPRESSION_ENABLED(hdr)) { + tmpbuf = zio_buf_alloc(lsize); + abd = abd_get_from_buf(tmpbuf, lsize); + abd_take_ownership_of_buf(abd, B_TRUE); + + csize = zio_compress_data(HDR_GET_COMPRESS(hdr), + hdr->b_l1hdr.b_pabd, tmpbuf, lsize); + ASSERT3U(csize, <=, psize); + abd_zero_off(abd, csize, psize - csize); + } + + /* + * Authentication is best effort. We authenticate whenever the key is + * available. If we succeed we clear ARC_FLAG_NOAUTH. + */ + if (hdr->b_crypt_hdr.b_ot == DMU_OT_OBJSET) { + ASSERT3U(HDR_GET_COMPRESS(hdr), ==, ZIO_COMPRESS_OFF); + ASSERT3U(lsize, ==, psize); + ret = spa_do_crypt_objset_mac_abd(B_FALSE, spa, dsobj, abd, + psize, hdr->b_l1hdr.b_byteswap != DMU_BSWAP_NUMFUNCS); + } else { + ret = spa_do_crypt_mac_abd(B_FALSE, spa, dsobj, abd, psize, + hdr->b_crypt_hdr.b_mac); + } + + if (ret == 0) + arc_hdr_clear_flags(hdr, ARC_FLAG_NOAUTH); + else if (ret != ENOENT) + goto error; + + if (tmpbuf != NULL) + abd_free(abd); + + return (0); + +error: + if (tmpbuf != NULL) + abd_free(abd); + + return (ret); +} + +/* + * This function will take a header that only has raw encrypted data in + * b_crypt_hdr.b_rabd and decrypt it into a new buffer which is stored in + * b_l1hdr.b_pabd. If designated in the header flags, this function will + * also decompress the data. + */ +static int +arc_hdr_decrypt(arc_buf_hdr_t *hdr, spa_t *spa, const zbookmark_phys_t *zb) +{ + int ret; + abd_t *cabd = NULL; + void *tmp = NULL; + boolean_t no_crypt = B_FALSE; + boolean_t bswap = (hdr->b_l1hdr.b_byteswap != DMU_BSWAP_NUMFUNCS); + + ASSERT(HDR_LOCK(hdr) == NULL || MUTEX_HELD(HDR_LOCK(hdr))); + ASSERT(HDR_ENCRYPTED(hdr)); + + arc_hdr_alloc_pabd(hdr, B_FALSE); + + ret = spa_do_crypt_abd(B_FALSE, spa, zb, hdr->b_crypt_hdr.b_ot, + B_FALSE, bswap, hdr->b_crypt_hdr.b_salt, hdr->b_crypt_hdr.b_iv, + hdr->b_crypt_hdr.b_mac, HDR_GET_PSIZE(hdr), hdr->b_l1hdr.b_pabd, + hdr->b_crypt_hdr.b_rabd, &no_crypt); + if (ret != 0) + goto error; + + if (no_crypt) { + abd_copy(hdr->b_l1hdr.b_pabd, hdr->b_crypt_hdr.b_rabd, + HDR_GET_PSIZE(hdr)); + } + + /* + * If this header has disabled arc compression but the b_pabd is + * compressed after decrypting it, we need to decompress the newly + * decrypted data. + */ + if (HDR_GET_COMPRESS(hdr) != ZIO_COMPRESS_OFF && + !HDR_COMPRESSION_ENABLED(hdr)) { + /* + * We want to make sure that we are correctly honoring the + * zfs_abd_scatter_enabled setting, so we allocate an abd here + * and then loan a buffer from it, rather than allocating a + * linear buffer and wrapping it in an abd later. + */ + cabd = arc_get_data_abd(hdr, arc_hdr_size(hdr), hdr); + tmp = abd_borrow_buf(cabd, arc_hdr_size(hdr)); + + ret = zio_decompress_data(HDR_GET_COMPRESS(hdr), + hdr->b_l1hdr.b_pabd, tmp, HDR_GET_PSIZE(hdr), + HDR_GET_LSIZE(hdr)); + if (ret != 0) { + abd_return_buf(cabd, tmp, arc_hdr_size(hdr)); + goto error; + } + + abd_return_buf_copy(cabd, tmp, arc_hdr_size(hdr)); + arc_free_data_abd(hdr, hdr->b_l1hdr.b_pabd, + arc_hdr_size(hdr), hdr); + hdr->b_l1hdr.b_pabd = cabd; + } + + return (0); + +error: + arc_hdr_free_pabd(hdr, B_FALSE); + if (cabd != NULL) + arc_free_data_buf(hdr, cabd, arc_hdr_size(hdr), hdr); + + return (ret); +} + +/* + * This function is called during arc_buf_fill() to prepare the header's + * abd plaintext pointer for use. This involves authenticated protected + * data and decrypting encrypted data into the plaintext abd. + */ +static int +arc_fill_hdr_crypt(arc_buf_hdr_t *hdr, kmutex_t *hash_lock, spa_t *spa, + const zbookmark_phys_t *zb, boolean_t noauth) +{ + int ret; + + ASSERT(HDR_PROTECTED(hdr)); + + if (hash_lock != NULL) + mutex_enter(hash_lock); + + if (HDR_NOAUTH(hdr) && !noauth) { + /* + * The caller requested authenticated data but our data has + * not been authenticated yet. Verify the MAC now if we can. + */ + ret = arc_hdr_authenticate(hdr, spa, zb->zb_objset); + if (ret != 0) + goto error; + } else if (HDR_HAS_RABD(hdr) && hdr->b_l1hdr.b_pabd == NULL) { + /* + * If we only have the encrypted version of the data, but the + * unencrypted version was requested we take this opportunity + * to store the decrypted version in the header for future use. + */ + ret = arc_hdr_decrypt(hdr, spa, zb); + if (ret != 0) + goto error; + } + + ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL); + + if (hash_lock != NULL) + mutex_exit(hash_lock); + + return (0); + +error: + if (hash_lock != NULL) + mutex_exit(hash_lock); + + return (ret); +} + +/* + * This function is used by the dbuf code to decrypt bonus buffers in place. + * The dbuf code itself doesn't have any locking for decrypting a shared dnode + * block, so we use the hash lock here to protect against concurrent calls to + * arc_buf_fill(). + */ +/* ARGSUSED */ +static void +arc_buf_untransform_in_place(arc_buf_t *buf, kmutex_t *hash_lock) +{ + arc_buf_hdr_t *hdr = buf->b_hdr; + + ASSERT(HDR_ENCRYPTED(hdr)); + ASSERT3U(hdr->b_crypt_hdr.b_ot, ==, DMU_OT_DNODE); + ASSERT(HDR_LOCK(hdr) == NULL || MUTEX_HELD(HDR_LOCK(hdr))); + ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL); + + zio_crypt_copy_dnode_bonus(hdr->b_l1hdr.b_pabd, buf->b_data, + arc_buf_size(buf)); + buf->b_flags &= ~ARC_BUF_FLAG_ENCRYPTED; + buf->b_flags &= ~ARC_BUF_FLAG_COMPRESSED; + hdr->b_crypt_hdr.b_ebufcnt -= 1; +} + +/* * Given a buf that has a data buffer attached to it, this function will * efficiently fill the buf with data of the specified compression setting from * the hdr and update the hdr's b_freeze_cksum if necessary. If the buf and hdr @@ -1943,15 +2342,90 @@ arc_buf_try_copy_decompressed_data(arc_buf_t *buf) * the correct-sized data buffer. */ static int -arc_buf_fill(arc_buf_t *buf, boolean_t compressed) +arc_buf_fill(arc_buf_t *buf, spa_t *spa, const zbookmark_phys_t *zb, + arc_fill_flags_t flags) { + int error = 0; arc_buf_hdr_t *hdr = buf->b_hdr; - boolean_t hdr_compressed = (HDR_GET_COMPRESS(hdr) != ZIO_COMPRESS_OFF); + boolean_t hdr_compressed = + (arc_hdr_get_compress(hdr) != ZIO_COMPRESS_OFF); + boolean_t compressed = (flags & ARC_FILL_COMPRESSED) != 0; + boolean_t encrypted = (flags & ARC_FILL_ENCRYPTED) != 0; dmu_object_byteswap_t bswap = hdr->b_l1hdr.b_byteswap; + kmutex_t *hash_lock = (flags & ARC_FILL_LOCKED) ? NULL : HDR_LOCK(hdr); ASSERT3P(buf->b_data, !=, NULL); - IMPLY(compressed, hdr_compressed); + IMPLY(compressed, hdr_compressed || ARC_BUF_ENCRYPTED(buf)); IMPLY(compressed, ARC_BUF_COMPRESSED(buf)); + IMPLY(encrypted, HDR_ENCRYPTED(hdr)); + IMPLY(encrypted, ARC_BUF_ENCRYPTED(buf)); + IMPLY(encrypted, ARC_BUF_COMPRESSED(buf)); + IMPLY(encrypted, !ARC_BUF_SHARED(buf)); + + /* + * If the caller wanted encrypted data we just need to copy it from + * b_rabd and potentially byteswap it. We won't be able to do any + * further transforms on it. + */ + if (encrypted) { + ASSERT(HDR_HAS_RABD(hdr)); + abd_copy_to_buf(buf->b_data, hdr->b_crypt_hdr.b_rabd, + HDR_GET_PSIZE(hdr)); + goto byteswap; + } + + /* + * Adjust encrypted and authenticated headers to accomodate + * the request if needed. Dnode blocks (ARC_FILL_IN_PLACE) are + * allowed to fail decryption due to keys not being loaded + * without being marked as an IO error. + */ + if (HDR_PROTECTED(hdr)) { + error = arc_fill_hdr_crypt(hdr, hash_lock, spa, + zb, !!(flags & ARC_FILL_NOAUTH)); + if (error == EACCES && (flags & ARC_FILL_IN_PLACE) != 0) { + return (error); + } else if (error != 0) { + if (hash_lock != NULL) + mutex_enter(hash_lock); + arc_hdr_set_flags(hdr, ARC_FLAG_IO_ERROR); + if (hash_lock != NULL) + mutex_exit(hash_lock); + return (error); + } + } + + /* + * There is a special case here for dnode blocks which are + * decrypting their bonus buffers. These blocks may request to + * be decrypted in-place. This is necessary because there may + * be many dnodes pointing into this buffer and there is + * currently no method to synchronize replacing the backing + * b_data buffer and updating all of the pointers. Here we use + * the hash lock to ensure there are no races. If the need + * arises for other types to be decrypted in-place, they must + * add handling here as well. + */ + if ((flags & ARC_FILL_IN_PLACE) != 0) { + ASSERT(!hdr_compressed); + ASSERT(!compressed); + ASSERT(!encrypted); + + if (HDR_ENCRYPTED(hdr) && ARC_BUF_ENCRYPTED(buf)) { + ASSERT3U(hdr->b_crypt_hdr.b_ot, ==, DMU_OT_DNODE); + + if (hash_lock != NULL) + mutex_enter(hash_lock); + arc_buf_untransform_in_place(buf, hash_lock); + if (hash_lock != NULL) + mutex_exit(hash_lock); + + /* Compute the hdr's checksum if necessary */ + arc_cksum_compute(buf); + } + + return (0); + } if (hdr_compressed == compressed) { if (!arc_buf_is_shared(buf)) { @@ -1970,7 +2444,7 @@ arc_buf_fill(arc_buf_t *buf, boolean_t compressed) if (arc_buf_is_shared(buf)) { ASSERT(ARC_BUF_COMPRESSED(buf)); - /* We need to give the buf it's own b_data */ + /* We need to give the buf its own b_data */ buf->b_flags &= ~ARC_BUF_FLAG_SHARED; buf->b_data = arc_get_data_buf(hdr, HDR_GET_LSIZE(hdr), buf); @@ -2006,7 +2480,7 @@ arc_buf_fill(arc_buf_t *buf, boolean_t compressed) ASSERT3P(hdr->b_l1hdr.b_freeze_cksum, !=, NULL); return (0); } else { - int error = zio_decompress_data(HDR_GET_COMPRESS(hdr), + error = zio_decompress_data(HDR_GET_COMPRESS(hdr), hdr->b_l1hdr.b_pabd, buf->b_data, HDR_GET_PSIZE(hdr), HDR_GET_LSIZE(hdr)); @@ -2017,13 +2491,19 @@ arc_buf_fill(arc_buf_t *buf, boolean_t compressed) if (error != 0) { zfs_dbgmsg( "hdr %p, compress %d, psize %d, lsize %d", - hdr, HDR_GET_COMPRESS(hdr), + hdr, arc_hdr_get_compress(hdr), HDR_GET_PSIZE(hdr), HDR_GET_LSIZE(hdr)); + if (hash_lock != NULL) + mutex_enter(hash_lock); + arc_hdr_set_flags(hdr, ARC_FLAG_IO_ERROR); + if (hash_lock != NULL) + mutex_exit(hash_lock); return (SET_ERROR(EIO)); } } } +byteswap: /* Byteswap the buf's data if necessary */ if (bswap != DMU_BSWAP_NUMFUNCS) { ASSERT(!HDR_SHARED_DATA(hdr)); @@ -2037,28 +2517,35 @@ arc_buf_fill(arc_buf_t *buf, boolean_t compressed) return (0); } -int -arc_decompress(arc_buf_t *buf) -{ - return (arc_buf_fill(buf, B_FALSE)); -} - /* - * Return the size of the block, b_pabd, that is stored in the arc_buf_hdr_t. + * If this function is being called to decrypt an encrypted buffer or verify an + * authenticated one, the key must be loaded and a mapping must be made + * available in the keystore via spa_keystore_create_mapping() or one of its + * callers. */ -static uint64_t -arc_hdr_size(arc_buf_hdr_t *hdr) +int +arc_untransform(arc_buf_t *buf, spa_t *spa, const zbookmark_phys_t *zb, + boolean_t in_place) { - uint64_t size; + int ret; + arc_fill_flags_t flags = 0; - if (HDR_GET_COMPRESS(hdr) != ZIO_COMPRESS_OFF && - HDR_GET_PSIZE(hdr) > 0) { - size = HDR_GET_PSIZE(hdr); - } else { - ASSERT3U(HDR_GET_LSIZE(hdr), !=, 0); - size = HDR_GET_LSIZE(hdr); + if (in_place) + flags |= ARC_FILL_IN_PLACE; + + ret = arc_buf_fill(buf, spa, zb, flags); + if (ret == ECKSUM) { + /* + * Convert authentication and decryption errors to EIO + * (and generate an ereport) before leaving the ARC. + */ + ret = SET_ERROR(EIO); + spa_log_error(spa, zb); + zfs_ereport_post(FM_EREPORT_ZFS_AUTHENTICATION, + spa, NULL, zb, NULL, 0, 0); } - return (size); + + return (ret); } /* @@ -2077,6 +2564,7 @@ arc_evictable_space_increment(arc_buf_hdr_t *hdr, arc_state_t *state) ASSERT0(hdr->b_l1hdr.b_bufcnt); ASSERT3P(hdr->b_l1hdr.b_buf, ==, NULL); ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL); + ASSERT(!HDR_HAS_RABD(hdr)); (void) zfs_refcount_add_many(&state->arcs_esize[type], HDR_GET_LSIZE(hdr), hdr); return; @@ -2087,6 +2575,10 @@ arc_evictable_space_increment(arc_buf_hdr_t *hdr, arc_state_t *state) (void) zfs_refcount_add_many(&state->arcs_esize[type], arc_hdr_size(hdr), hdr); } + if (HDR_HAS_RABD(hdr)) { + (void) zfs_refcount_add_many(&state->arcs_esize[type], + HDR_GET_PSIZE(hdr), hdr); + } for (arc_buf_t *buf = hdr->b_l1hdr.b_buf; buf != NULL; buf = buf->b_next) { if (arc_buf_is_shared(buf)) @@ -2112,6 +2604,7 @@ arc_evictable_space_decrement(arc_buf_hdr_t *hdr, arc_state_t *state) ASSERT0(hdr->b_l1hdr.b_bufcnt); ASSERT3P(hdr->b_l1hdr.b_buf, ==, NULL); ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL); + ASSERT(!HDR_HAS_RABD(hdr)); (void) zfs_refcount_remove_many(&state->arcs_esize[type], HDR_GET_LSIZE(hdr), hdr); return; @@ -2122,6 +2615,10 @@ arc_evictable_space_decrement(arc_buf_hdr_t *hdr, arc_state_t *state) (void) zfs_refcount_remove_many(&state->arcs_esize[type], arc_hdr_size(hdr), hdr); } + if (HDR_HAS_RABD(hdr)) { + (void) zfs_refcount_remove_many(&state->arcs_esize[type], + HDR_GET_PSIZE(hdr), hdr); + } for (arc_buf_t *buf = hdr->b_l1hdr.b_buf; buf != NULL; buf = buf->b_next) { if (arc_buf_is_shared(buf)) @@ -2215,7 +2712,9 @@ arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *hdr, old_state = hdr->b_l1hdr.b_state; refcnt = zfs_refcount_count(&hdr->b_l1hdr.b_refcnt); bufcnt = hdr->b_l1hdr.b_bufcnt; - update_old = (bufcnt > 0 || hdr->b_l1hdr.b_pabd != NULL); + + update_old = (bufcnt > 0 || hdr->b_l1hdr.b_pabd != NULL || + HDR_HAS_RABD(hdr)); } else { old_state = arc_l2c_only; refcnt = 0; @@ -2286,6 +2785,7 @@ arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *hdr, (void) zfs_refcount_add_many(&new_state->arcs_size, HDR_GET_LSIZE(hdr), hdr); ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL); + ASSERT(!HDR_HAS_RABD(hdr)); } else { uint32_t buffers = 0; @@ -2319,8 +2819,12 @@ arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *hdr, (void) zfs_refcount_add_many( &new_state->arcs_size, arc_hdr_size(hdr), hdr); - } else { - ASSERT(GHOST_STATE(old_state)); + } + + if (HDR_HAS_RABD(hdr)) { + (void) zfs_refcount_add_many( + &new_state->arcs_size, + HDR_GET_PSIZE(hdr), hdr); } } } @@ -2330,6 +2834,7 @@ arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *hdr, if (GHOST_STATE(old_state)) { ASSERT0(bufcnt); ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL); + ASSERT(!HDR_HAS_RABD(hdr)); /* * When moving a header off of a ghost state, @@ -2369,9 +2874,20 @@ arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *hdr, buf); } ASSERT3U(bufcnt, ==, buffers); - ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL); - (void) zfs_refcount_remove_many( - &old_state->arcs_size, arc_hdr_size(hdr), hdr); + ASSERT(hdr->b_l1hdr.b_pabd != NULL || + HDR_HAS_RABD(hdr)); + + if (hdr->b_l1hdr.b_pabd != NULL) { + (void) zfs_refcount_remove_many( + &old_state->arcs_size, arc_hdr_size(hdr), + hdr); + } + + if (HDR_HAS_RABD(hdr)) { + (void) zfs_refcount_remove_many( + &old_state->arcs_size, HDR_GET_PSIZE(hdr), + hdr); + } } } @@ -2463,12 +2979,13 @@ arc_can_share(arc_buf_hdr_t *hdr, arc_buf_t *buf) { /* * The criteria for sharing a hdr's data are: - * 1. the hdr's compression matches the buf's compression - * 2. the hdr doesn't need to be byteswapped - * 3. the hdr isn't already being shared - * 4. the buf is either compressed or it is the last buf in the hdr list + * 1. the buffer is not encrypted + * 2. the hdr's compression matches the buf's compression + * 3. the hdr doesn't need to be byteswapped + * 4. the hdr isn't already being shared + * 5. the buf is either compressed or it is the last buf in the hdr list * - * Criterion #4 maintains the invariant that shared uncompressed + * Criterion #5 maintains the invariant that shared uncompressed * bufs must be the final buf in the hdr's b_buf list. Reading this, you * might ask, "if a compressed buf is allocated first, won't that be the * last thing in the list?", but in that case it's impossible to create @@ -2483,9 +3000,11 @@ arc_can_share(arc_buf_hdr_t *hdr, arc_buf_t *buf) * sharing if the new buf isn't the first to be added. */ ASSERT3P(buf->b_hdr, ==, hdr); - boolean_t hdr_compressed = HDR_GET_COMPRESS(hdr) != ZIO_COMPRESS_OFF; + boolean_t hdr_compressed = arc_hdr_get_compress(hdr) != + ZIO_COMPRESS_OFF; boolean_t buf_compressed = ARC_BUF_COMPRESSED(buf) != 0; - return (buf_compressed == hdr_compressed && + return (!ARC_BUF_ENCRYPTED(buf) && + buf_compressed == hdr_compressed && hdr->b_l1hdr.b_byteswap == DMU_BSWAP_NUMFUNCS && !HDR_SHARED_DATA(hdr) && (ARC_BUF_LAST(buf) || ARC_BUF_COMPRESSED(buf))); @@ -2497,10 +3016,12 @@ arc_can_share(arc_buf_hdr_t *hdr, arc_buf_t *buf) * copy was made successfully, or an error code otherwise. */ static int -arc_buf_alloc_impl(arc_buf_hdr_t *hdr, void *tag, boolean_t compressed, +arc_buf_alloc_impl(arc_buf_hdr_t *hdr, spa_t *spa, const zbookmark_phys_t *zb, + void *tag, boolean_t encrypted, boolean_t compressed, boolean_t noauth, boolean_t fill, arc_buf_t **ret) { arc_buf_t *buf; + arc_fill_flags_t flags = ARC_FILL_LOCKED; ASSERT(HDR_HAS_L1HDR(hdr)); ASSERT3U(HDR_GET_LSIZE(hdr), >, 0); @@ -2508,6 +3029,7 @@ arc_buf_alloc_impl(arc_buf_hdr_t *hdr, void *tag, boolean_t compressed, hdr->b_type == ARC_BUFC_METADATA); ASSERT3P(ret, !=, NULL); ASSERT3P(*ret, ==, NULL); + IMPLY(encrypted, compressed); buf = *ret = kmem_cache_alloc(buf_cache, KM_PUSHPAGE); buf->b_hdr = hdr; @@ -2525,16 +3047,28 @@ arc_buf_alloc_impl(arc_buf_hdr_t *hdr, void *tag, boolean_t compressed, /* * Only honor requests for compressed bufs if the hdr is actually - * compressed. + * compressed. This must be overriden if the buffer is encrypted since + * encrypted buffers cannot be decompressed. */ - if (compressed && HDR_GET_COMPRESS(hdr) != ZIO_COMPRESS_OFF) + if (encrypted) { buf->b_flags |= ARC_BUF_FLAG_COMPRESSED; + buf->b_flags |= ARC_BUF_FLAG_ENCRYPTED; + flags |= ARC_FILL_COMPRESSED | ARC_FILL_ENCRYPTED; + } else if (compressed && + arc_hdr_get_compress(hdr) != ZIO_COMPRESS_OFF) { + buf->b_flags |= ARC_BUF_FLAG_COMPRESSED; + flags |= ARC_FILL_COMPRESSED; + } + + if (noauth) { + ASSERT0(encrypted); + flags |= ARC_FILL_NOAUTH; + } /* * If the hdr's data can be shared then we share the data buffer and * set the appropriate bit in the hdr's b_flags to indicate the hdr is - * sharing it's b_pabd with the arc_buf_t. Otherwise, we allocate a new - * buffer to store the buf's data. + * allocate a new buffer to store the buf's data. * * There are two additional restrictions here because we're sharing * hdr -> buf instead of the usual buf -> hdr. First, the hdr can't be @@ -2545,7 +3079,7 @@ arc_buf_alloc_impl(arc_buf_hdr_t *hdr, void *tag, boolean_t compressed, * need to be ABD-aware. */ boolean_t can_share = arc_can_share(hdr, buf) && !HDR_L2_WRITING(hdr) && - abd_is_linear(hdr->b_l1hdr.b_pabd); + hdr->b_l1hdr.b_pabd != NULL && abd_is_linear(hdr->b_l1hdr.b_pabd); /* Set up b_data and sharing */ if (can_share) { @@ -2561,13 +3095,16 @@ arc_buf_alloc_impl(arc_buf_hdr_t *hdr, void *tag, boolean_t compressed, hdr->b_l1hdr.b_buf = buf; hdr->b_l1hdr.b_bufcnt += 1; + if (encrypted) + hdr->b_crypt_hdr.b_ebufcnt += 1; /* * If the user wants the data from the hdr, we need to either copy or * decompress the data. */ if (fill) { - return (arc_buf_fill(buf, ARC_BUF_COMPRESSED(buf) != 0)); + ASSERT3P(zb, !=, NULL); + return (arc_buf_fill(buf, spa, zb, flags)); } return (0); @@ -2613,6 +3150,19 @@ arc_loan_compressed_buf(spa_t *spa, uint64_t psize, uint64_t lsize, return (buf); } +arc_buf_t * +arc_loan_raw_buf(spa_t *spa, uint64_t dsobj, boolean_t byteorder, + const uint8_t *salt, const uint8_t *iv, const uint8_t *mac, + dmu_object_type_t ot, uint64_t psize, uint64_t lsize, + enum zio_compress compression_type) +{ + arc_buf_t *buf = arc_alloc_raw_buf(spa, arc_onloan_tag, dsobj, + byteorder, salt, iv, mac, ot, psize, lsize, compression_type); + + atomic_add_64(&arc_loaned_bytes, psize); + return (buf); +} + /* * Return a loaned arc buffer to the arc. @@ -2658,11 +3208,11 @@ l2arc_free_abd_on_write(abd_t *abd, size_t size, arc_buf_contents_t type) } static void -arc_hdr_free_on_write(arc_buf_hdr_t *hdr) +arc_hdr_free_on_write(arc_buf_hdr_t *hdr, boolean_t free_rdata) { arc_state_t *state = hdr->b_l1hdr.b_state; arc_buf_contents_t type = arc_buf_type(hdr); - uint64_t size = arc_hdr_size(hdr); + uint64_t size = (free_rdata) ? HDR_GET_PSIZE(hdr) : arc_hdr_size(hdr); /* protected by hash lock, if in the hash table */ if (multilist_link_active(&hdr->b_l1hdr.b_arc_node)) { @@ -2680,7 +3230,11 @@ arc_hdr_free_on_write(arc_buf_hdr_t *hdr) arc_space_return(size, ARC_SPACE_DATA); } - l2arc_free_abd_on_write(hdr->b_l1hdr.b_pabd, size, type); + if (free_rdata) { + l2arc_free_abd_on_write(hdr->b_crypt_hdr.b_rabd, size, type); + } else { + l2arc_free_abd_on_write(hdr->b_l1hdr.b_pabd, size, type); + } } /* @@ -2691,10 +3245,12 @@ arc_hdr_free_on_write(arc_buf_hdr_t *hdr) static void arc_share_buf(arc_buf_hdr_t *hdr, arc_buf_t *buf) { + /* LINTED */ arc_state_t *state = hdr->b_l1hdr.b_state; ASSERT(arc_can_share(hdr, buf)); ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL); + ASSERT(!ARC_BUF_ENCRYPTED(buf)); ASSERT(MUTEX_HELD(HDR_LOCK(hdr)) || HDR_EMPTY(hdr)); /* @@ -2702,7 +3258,8 @@ arc_share_buf(arc_buf_hdr_t *hdr, arc_buf_t *buf) * refcount ownership to the hdr since it always owns * the refcount whenever an arc_buf_t is shared. */ - zfs_refcount_transfer_ownership(&state->arcs_size, buf, hdr); + zfs_refcount_transfer_ownership_many(&hdr->b_l1hdr.b_state->arcs_size, + arc_hdr_size(hdr), buf, hdr); hdr->b_l1hdr.b_pabd = abd_get_from_buf(buf->b_data, arc_buf_size(buf)); abd_take_ownership_of_buf(hdr->b_l1hdr.b_pabd, HDR_ISTYPE_METADATA(hdr)); @@ -2722,6 +3279,7 @@ arc_share_buf(arc_buf_hdr_t *hdr, arc_buf_t *buf) static void arc_unshare_buf(arc_buf_hdr_t *hdr, arc_buf_t *buf) { + /* LINTED */ arc_state_t *state = hdr->b_l1hdr.b_state; ASSERT(arc_buf_is_shared(buf)); @@ -2732,7 +3290,8 @@ arc_unshare_buf(arc_buf_hdr_t *hdr, arc_buf_t *buf) * We are no longer sharing this buffer so we need * to transfer its ownership to the rightful owner. */ - zfs_refcount_transfer_ownership(&state->arcs_size, hdr, buf); + zfs_refcount_transfer_ownership_many(&hdr->b_l1hdr.b_state->arcs_size, + arc_hdr_size(hdr), hdr, buf); arc_hdr_clear_flags(hdr, ARC_FLAG_SHARED_DATA); abd_release_ownership_of_buf(hdr->b_l1hdr.b_pabd); abd_put(hdr->b_l1hdr.b_pabd); @@ -2756,12 +3315,12 @@ arc_unshare_buf(arc_buf_hdr_t *hdr, arc_buf_t *buf) static arc_buf_t * arc_buf_remove(arc_buf_hdr_t *hdr, arc_buf_t *buf) { - ASSERT(HDR_HAS_L1HDR(hdr)); - ASSERT(MUTEX_HELD(HDR_LOCK(hdr)) || HDR_EMPTY(hdr)); - arc_buf_t **bufp = &hdr->b_l1hdr.b_buf; arc_buf_t *lastbuf = NULL; + ASSERT(HDR_HAS_L1HDR(hdr)); + ASSERT(MUTEX_HELD(HDR_LOCK(hdr)) || HDR_EMPTY(hdr)); + /* * Remove the buf from the hdr list and locate the last * remaining buffer on the list. @@ -2824,6 +3383,21 @@ arc_buf_destroy_impl(arc_buf_t *buf) ASSERT(hdr->b_l1hdr.b_bufcnt > 0); hdr->b_l1hdr.b_bufcnt -= 1; + + if (ARC_BUF_ENCRYPTED(buf)) { + hdr->b_crypt_hdr.b_ebufcnt -= 1; + + /* + * If we have no more encrypted buffers and we've + * already gotten a copy of the decrypted data we can + * free b_rabd to save some space. + */ + if (hdr->b_crypt_hdr.b_ebufcnt == 0 && + HDR_HAS_RABD(hdr) && hdr->b_l1hdr.b_pabd != NULL && + !HDR_IO_IN_PROGRESS(hdr)) { + arc_hdr_free_pabd(hdr, B_TRUE); + } + } } arc_buf_t *lastbuf = arc_buf_remove(hdr, buf); @@ -2838,16 +3412,17 @@ arc_buf_destroy_impl(arc_buf_t *buf) * There is an equivalent case for compressed bufs, but since * they aren't guaranteed to be the last buf in the list and * that is an exceedingly rare case, we just allow that space be - * wasted temporarily. + * wasted temporarily. We must also be careful not to share + * encrypted buffers, since they cannot be shared. */ - if (lastbuf != NULL) { + if (lastbuf != NULL && !ARC_BUF_ENCRYPTED(lastbuf)) { /* Only one buf can be shared at once */ VERIFY(!arc_buf_is_shared(lastbuf)); /* hdr is uncompressed so can't have compressed buf */ VERIFY(!ARC_BUF_COMPRESSED(lastbuf)); ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL); - arc_hdr_free_pabd(hdr); + arc_hdr_free_pabd(hdr, B_FALSE); /* * We must setup a new shared block between the @@ -2868,7 +3443,7 @@ arc_buf_destroy_impl(arc_buf_t *buf) */ ASSERT3P(lastbuf, !=, NULL); ASSERT(arc_buf_is_shared(lastbuf) || - HDR_GET_COMPRESS(hdr) != ZIO_COMPRESS_OFF); + arc_hdr_get_compress(hdr) != ZIO_COMPRESS_OFF); } /* @@ -2885,26 +3460,40 @@ arc_buf_destroy_impl(arc_buf_t *buf) } static void -arc_hdr_alloc_pabd(arc_buf_hdr_t *hdr) +arc_hdr_alloc_pabd(arc_buf_hdr_t *hdr, boolean_t alloc_rdata) { + uint64_t size; + ASSERT3U(HDR_GET_LSIZE(hdr), >, 0); ASSERT(HDR_HAS_L1HDR(hdr)); - ASSERT(!HDR_SHARED_DATA(hdr)); + ASSERT(!HDR_SHARED_DATA(hdr) || alloc_rdata); + IMPLY(alloc_rdata, HDR_PROTECTED(hdr)); - ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL); - hdr->b_l1hdr.b_pabd = arc_get_data_abd(hdr, arc_hdr_size(hdr), hdr); - hdr->b_l1hdr.b_byteswap = DMU_BSWAP_NUMFUNCS; - ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL); + if (alloc_rdata) { + size = HDR_GET_PSIZE(hdr); + ASSERT3P(hdr->b_crypt_hdr.b_rabd, ==, NULL); + hdr->b_crypt_hdr.b_rabd = arc_get_data_abd(hdr, size, hdr); + ASSERT3P(hdr->b_crypt_hdr.b_rabd, !=, NULL); + } else { + size = arc_hdr_size(hdr); + ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL); + hdr->b_l1hdr.b_pabd = arc_get_data_abd(hdr, size, hdr); + ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL); + } - ARCSTAT_INCR(arcstat_compressed_size, arc_hdr_size(hdr)); + ARCSTAT_INCR(arcstat_compressed_size, size); ARCSTAT_INCR(arcstat_uncompressed_size, HDR_GET_LSIZE(hdr)); } static void -arc_hdr_free_pabd(arc_buf_hdr_t *hdr) +arc_hdr_free_pabd(arc_buf_hdr_t *hdr, boolean_t free_rdata) { + uint64_t size = (free_rdata) ? HDR_GET_PSIZE(hdr) : arc_hdr_size(hdr); + ASSERT(HDR_HAS_L1HDR(hdr)); - ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL); + ASSERT(hdr->b_l1hdr.b_pabd != NULL || HDR_HAS_RABD(hdr)); + IMPLY(free_rdata, HDR_HAS_RABD(hdr)); + /* * If the hdr is currently being written to the l2arc then @@ -2913,28 +3502,41 @@ arc_hdr_free_pabd(arc_buf_hdr_t *hdr) * writing it to the l2arc device. */ if (HDR_L2_WRITING(hdr)) { - arc_hdr_free_on_write(hdr); + arc_hdr_free_on_write(hdr, free_rdata); ARCSTAT_BUMP(arcstat_l2_free_on_write); + } else if (free_rdata) { + arc_free_data_abd(hdr, hdr->b_crypt_hdr.b_rabd, size, hdr); } else { arc_free_data_abd(hdr, hdr->b_l1hdr.b_pabd, - arc_hdr_size(hdr), hdr); + size, hdr); } - hdr->b_l1hdr.b_pabd = NULL; - hdr->b_l1hdr.b_byteswap = DMU_BSWAP_NUMFUNCS; - ARCSTAT_INCR(arcstat_compressed_size, -arc_hdr_size(hdr)); + if (free_rdata) { + hdr->b_crypt_hdr.b_rabd = NULL; + } else { + hdr->b_l1hdr.b_pabd = NULL; + } + + if (hdr->b_l1hdr.b_pabd == NULL && !HDR_HAS_RABD(hdr)) + hdr->b_l1hdr.b_byteswap = DMU_BSWAP_NUMFUNCS; + + ARCSTAT_INCR(arcstat_compressed_size, -size); ARCSTAT_INCR(arcstat_uncompressed_size, -HDR_GET_LSIZE(hdr)); } static arc_buf_hdr_t * arc_hdr_alloc(uint64_t spa, int32_t psize, int32_t lsize, - enum zio_compress compression_type, arc_buf_contents_t type) + boolean_t protected, enum zio_compress compression_type, + arc_buf_contents_t type, boolean_t alloc_rdata) { arc_buf_hdr_t *hdr; VERIFY(type == ARC_BUFC_DATA || type == ARC_BUFC_METADATA); - - hdr = kmem_cache_alloc(hdr_full_cache, KM_PUSHPAGE); + if (protected) { + hdr = kmem_cache_alloc(hdr_full_crypt_cache, KM_PUSHPAGE); + } else { + hdr = kmem_cache_alloc(hdr_full_cache, KM_PUSHPAGE); + } ASSERT(HDR_EMPTY(hdr)); ASSERT3P(hdr->b_l1hdr.b_freeze_cksum, ==, NULL); ASSERT3P(hdr->b_l1hdr.b_thawed, ==, NULL); @@ -2945,6 +3547,8 @@ arc_hdr_alloc(uint64_t spa, int32_t psize, int32_t lsize, hdr->b_flags = 0; arc_hdr_set_flags(hdr, arc_bufc_to_flags(type) | ARC_FLAG_HAS_L1HDR); arc_hdr_set_compress(hdr, compression_type); + if (protected) + arc_hdr_set_flags(hdr, ARC_FLAG_PROTECTED); hdr->b_l1hdr.b_state = arc_anon; hdr->b_l1hdr.b_arc_access = 0; @@ -2956,7 +3560,7 @@ arc_hdr_alloc(uint64_t spa, int32_t psize, int32_t lsize, * the compressed or uncompressed data depending on the block * it references and compressed arc enablement. */ - arc_hdr_alloc_pabd(hdr); + arc_hdr_alloc_pabd(hdr, alloc_rdata); ASSERT(zfs_refcount_is_zero(&hdr->b_l1hdr.b_refcnt)); return (hdr); @@ -2980,6 +3584,16 @@ arc_hdr_realloc(arc_buf_hdr_t *hdr, kmem_cache_t *old, kmem_cache_t *new) ASSERT((old == hdr_full_cache && new == hdr_l2only_cache) || (old == hdr_l2only_cache && new == hdr_full_cache)); + /* + * if the caller wanted a new full header and the header is to be + * encrypted we will actually allocate the header from the full crypt + * cache instead. The same applies to freeing from the old cache. + */ + if (HDR_PROTECTED(hdr) && new == hdr_full_cache) + new = hdr_full_crypt_cache; + if (HDR_PROTECTED(hdr) && old == hdr_full_cache) + old = hdr_full_crypt_cache; + nhdr = kmem_cache_alloc(new, KM_PUSHPAGE); ASSERT(MUTEX_HELD(HDR_LOCK(hdr))); @@ -2987,7 +3601,7 @@ arc_hdr_realloc(arc_buf_hdr_t *hdr, kmem_cache_t *old, kmem_cache_t *new) bcopy(hdr, nhdr, HDR_L2ONLY_SIZE); - if (new == hdr_full_cache) { + if (new == hdr_full_cache || new == hdr_full_crypt_cache) { arc_hdr_set_flags(nhdr, ARC_FLAG_HAS_L1HDR); /* * arc_access and arc_change_state need to be aware that a @@ -2998,6 +3612,7 @@ arc_hdr_realloc(arc_buf_hdr_t *hdr, kmem_cache_t *old, kmem_cache_t *new) /* Verify previous threads set to NULL before freeing */ ASSERT3P(nhdr->b_l1hdr.b_pabd, ==, NULL); + ASSERT(!HDR_HAS_RABD(hdr)); } else { ASSERT3P(hdr->b_l1hdr.b_buf, ==, NULL); ASSERT0(hdr->b_l1hdr.b_bufcnt); @@ -3020,6 +3635,7 @@ arc_hdr_realloc(arc_buf_hdr_t *hdr, kmem_cache_t *old, kmem_cache_t *new) */ VERIFY(!HDR_L2_WRITING(hdr)); VERIFY3P(hdr->b_l1hdr.b_pabd, ==, NULL); + ASSERT(!HDR_HAS_RABD(hdr)); #ifdef ZFS_DEBUG if (hdr->b_l1hdr.b_thawed != NULL) { @@ -3071,6 +3687,156 @@ arc_hdr_realloc(arc_buf_hdr_t *hdr, kmem_cache_t *old, kmem_cache_t *new) } /* + * This function allows an L1 header to be reallocated as a crypt + * header and vice versa. If we are going to a crypt header, the + * new fields will be zeroed out. + */ +static arc_buf_hdr_t * +arc_hdr_realloc_crypt(arc_buf_hdr_t *hdr, boolean_t need_crypt) +{ + arc_buf_hdr_t *nhdr; + arc_buf_t *buf; + kmem_cache_t *ncache, *ocache; + + ASSERT(HDR_HAS_L1HDR(hdr)); + ASSERT3U(!!HDR_PROTECTED(hdr), !=, need_crypt); + ASSERT3P(hdr->b_l1hdr.b_state, ==, arc_anon); + ASSERT(!multilist_link_active(&hdr->b_l1hdr.b_arc_node)); + ASSERT(!list_link_active(&hdr->b_l2hdr.b_l2node)); + ASSERT3P(hdr->b_hash_next, ==, NULL); + + if (need_crypt) { + ncache = hdr_full_crypt_cache; + ocache = hdr_full_cache; + } else { + ncache = hdr_full_cache; + ocache = hdr_full_crypt_cache; + } + + nhdr = kmem_cache_alloc(ncache, KM_PUSHPAGE); + + /* + * Copy all members that aren't locks or condvars to the new header. + * No lists are pointing to us (as we asserted above), so we don't + * need to worry about the list nodes. + */ + nhdr->b_dva = hdr->b_dva; + nhdr->b_birth = hdr->b_birth; + nhdr->b_type = hdr->b_type; + nhdr->b_flags = hdr->b_flags; + nhdr->b_psize = hdr->b_psize; + nhdr->b_lsize = hdr->b_lsize; + nhdr->b_spa = hdr->b_spa; + nhdr->b_l2hdr.b_dev = hdr->b_l2hdr.b_dev; + nhdr->b_l2hdr.b_daddr = hdr->b_l2hdr.b_daddr; + nhdr->b_l1hdr.b_freeze_cksum = hdr->b_l1hdr.b_freeze_cksum; + nhdr->b_l1hdr.b_bufcnt = hdr->b_l1hdr.b_bufcnt; + nhdr->b_l1hdr.b_byteswap = hdr->b_l1hdr.b_byteswap; + nhdr->b_l1hdr.b_state = hdr->b_l1hdr.b_state; + nhdr->b_l1hdr.b_arc_access = hdr->b_l1hdr.b_arc_access; + nhdr->b_l1hdr.b_acb = hdr->b_l1hdr.b_acb; + nhdr->b_l1hdr.b_pabd = hdr->b_l1hdr.b_pabd; +#ifdef ZFS_DEBUG + if (hdr->b_l1hdr.b_thawed != NULL) { + nhdr->b_l1hdr.b_thawed = hdr->b_l1hdr.b_thawed; + hdr->b_l1hdr.b_thawed = NULL; + } +#endif + + /* + * This refcount_add() exists only to ensure that the individual + * arc buffers always point to a header that is referenced, avoiding + * a small race condition that could trigger ASSERTs. + */ + (void) zfs_refcount_add(&nhdr->b_l1hdr.b_refcnt, FTAG); + nhdr->b_l1hdr.b_buf = hdr->b_l1hdr.b_buf; + for (buf = nhdr->b_l1hdr.b_buf; buf != NULL; buf = buf->b_next) { + mutex_enter(&buf->b_evict_lock); + buf->b_hdr = nhdr; + mutex_exit(&buf->b_evict_lock); + } + zfs_refcount_transfer(&nhdr->b_l1hdr.b_refcnt, &hdr->b_l1hdr.b_refcnt); + (void) zfs_refcount_remove(&nhdr->b_l1hdr.b_refcnt, FTAG); + ASSERT0(zfs_refcount_count(&hdr->b_l1hdr.b_refcnt)); + + if (need_crypt) { + arc_hdr_set_flags(nhdr, ARC_FLAG_PROTECTED); + } else { + arc_hdr_clear_flags(nhdr, ARC_FLAG_PROTECTED); + } + + /* unset all members of the original hdr */ + bzero(&hdr->b_dva, sizeof (dva_t)); + hdr->b_birth = 0; + hdr->b_type = ARC_BUFC_INVALID; + hdr->b_flags = 0; + hdr->b_psize = 0; + hdr->b_lsize = 0; + hdr->b_spa = 0; + hdr->b_l2hdr.b_dev = NULL; + hdr->b_l2hdr.b_daddr = 0; + hdr->b_l1hdr.b_freeze_cksum = NULL; + hdr->b_l1hdr.b_buf = NULL; + hdr->b_l1hdr.b_bufcnt = 0; + hdr->b_l1hdr.b_byteswap = 0; + hdr->b_l1hdr.b_state = NULL; + hdr->b_l1hdr.b_arc_access = 0; + hdr->b_l1hdr.b_acb = NULL; + hdr->b_l1hdr.b_pabd = NULL; + + if (ocache == hdr_full_crypt_cache) { + ASSERT(!HDR_HAS_RABD(hdr)); + hdr->b_crypt_hdr.b_ot = DMU_OT_NONE; + hdr->b_crypt_hdr.b_ebufcnt = 0; + hdr->b_crypt_hdr.b_dsobj = 0; + bzero(hdr->b_crypt_hdr.b_salt, ZIO_DATA_SALT_LEN); + bzero(hdr->b_crypt_hdr.b_iv, ZIO_DATA_IV_LEN); + bzero(hdr->b_crypt_hdr.b_mac, ZIO_DATA_MAC_LEN); + } + + buf_discard_identity(hdr); + kmem_cache_free(ocache, hdr); + + return (nhdr); +} + +/* + * This function is used by the send / receive code to convert a newly + * allocated arc_buf_t to one that is suitable for a raw encrypted write. It + * is also used to allow the root objset block to be uupdated without altering + * its embedded MACs. Both block types will always be uncompressed so we do not + * have to worry about compression type or psize. + */ +void +arc_convert_to_raw(arc_buf_t *buf, uint64_t dsobj, boolean_t byteorder, + dmu_object_type_t ot, const uint8_t *salt, const uint8_t *iv, + const uint8_t *mac) +{ + arc_buf_hdr_t *hdr = buf->b_hdr; + + ASSERT(ot == DMU_OT_DNODE || ot == DMU_OT_OBJSET); + ASSERT(HDR_HAS_L1HDR(hdr)); + ASSERT3P(hdr->b_l1hdr.b_state, ==, arc_anon); + + buf->b_flags |= (ARC_BUF_FLAG_COMPRESSED | ARC_BUF_FLAG_ENCRYPTED); + if (!HDR_PROTECTED(hdr)) + hdr = arc_hdr_realloc_crypt(hdr, B_TRUE); + hdr->b_crypt_hdr.b_dsobj = dsobj; + hdr->b_crypt_hdr.b_ot = ot; + hdr->b_l1hdr.b_byteswap = (byteorder == ZFS_HOST_BYTEORDER) ? + DMU_BSWAP_NUMFUNCS : DMU_OT_BYTESWAP(ot); + if (!arc_hdr_has_uncompressed_buf(hdr)) + arc_cksum_free(hdr); + + if (salt != NULL) + bcopy(salt, hdr->b_crypt_hdr.b_salt, ZIO_DATA_SALT_LEN); + if (iv != NULL) + bcopy(iv, hdr->b_crypt_hdr.b_iv, ZIO_DATA_IV_LEN); + if (mac != NULL) + bcopy(mac, hdr->b_crypt_hdr.b_mac, ZIO_DATA_MAC_LEN); +} + +/* * Allocate a new arc_buf_hdr_t and arc_buf_t and return the buf to the caller. * The buf is returned thawed since we expect the consumer to modify it. */ @@ -3078,11 +3844,12 @@ arc_buf_t * arc_alloc_buf(spa_t *spa, void *tag, arc_buf_contents_t type, int32_t size) { arc_buf_hdr_t *hdr = arc_hdr_alloc(spa_load_guid(spa), size, size, - ZIO_COMPRESS_OFF, type); + B_FALSE, ZIO_COMPRESS_OFF, type, B_FALSE); ASSERT(!MUTEX_HELD(HDR_LOCK(hdr))); arc_buf_t *buf = NULL; - VERIFY0(arc_buf_alloc_impl(hdr, tag, B_FALSE, B_FALSE, &buf)); + VERIFY0(arc_buf_alloc_impl(hdr, spa, NULL, tag, B_FALSE, B_FALSE, + B_FALSE, B_FALSE, &buf)); arc_buf_thaw(buf); return (buf); @@ -3098,33 +3865,76 @@ arc_alloc_compressed_buf(spa_t *spa, void *tag, uint64_t psize, uint64_t lsize, { ASSERT3U(lsize, >, 0); ASSERT3U(lsize, >=, psize); - ASSERT(compression_type > ZIO_COMPRESS_OFF); - ASSERT(compression_type < ZIO_COMPRESS_FUNCTIONS); + ASSERT3U(compression_type, >, ZIO_COMPRESS_OFF); + ASSERT3U(compression_type, <, ZIO_COMPRESS_FUNCTIONS); arc_buf_hdr_t *hdr = arc_hdr_alloc(spa_load_guid(spa), psize, lsize, - compression_type, ARC_BUFC_DATA); + B_FALSE, compression_type, ARC_BUFC_DATA, B_FALSE); ASSERT(!MUTEX_HELD(HDR_LOCK(hdr))); arc_buf_t *buf = NULL; - VERIFY0(arc_buf_alloc_impl(hdr, tag, B_TRUE, B_FALSE, &buf)); + VERIFY0(arc_buf_alloc_impl(hdr, spa, NULL, tag, B_FALSE, + B_TRUE, B_FALSE, B_FALSE, &buf)); arc_buf_thaw(buf); ASSERT3P(hdr->b_l1hdr.b_freeze_cksum, ==, NULL); if (!arc_buf_is_shared(buf)) { /* * To ensure that the hdr has the correct data in it if we call - * arc_decompress() on this buf before it's been written to + * arc_untransform() on this buf before it's been written to * disk, it's easiest if we just set up sharing between the * buf and the hdr. */ ASSERT(!abd_is_linear(hdr->b_l1hdr.b_pabd)); - arc_hdr_free_pabd(hdr); + arc_hdr_free_pabd(hdr, B_FALSE); arc_share_buf(hdr, buf); } return (buf); } +arc_buf_t * +arc_alloc_raw_buf(spa_t *spa, void *tag, uint64_t dsobj, boolean_t byteorder, + const uint8_t *salt, const uint8_t *iv, const uint8_t *mac, + dmu_object_type_t ot, uint64_t psize, uint64_t lsize, + enum zio_compress compression_type) +{ + arc_buf_hdr_t *hdr; + arc_buf_t *buf; + arc_buf_contents_t type = DMU_OT_IS_METADATA(ot) ? + ARC_BUFC_METADATA : ARC_BUFC_DATA; + + ASSERT3U(lsize, >, 0); + ASSERT3U(lsize, >=, psize); + ASSERT3U(compression_type, >=, ZIO_COMPRESS_OFF); + ASSERT3U(compression_type, <, ZIO_COMPRESS_FUNCTIONS); + + hdr = arc_hdr_alloc(spa_load_guid(spa), psize, lsize, B_TRUE, + compression_type, type, B_TRUE); + ASSERT(!MUTEX_HELD(HDR_LOCK(hdr))); + + hdr->b_crypt_hdr.b_dsobj = dsobj; + hdr->b_crypt_hdr.b_ot = ot; + hdr->b_l1hdr.b_byteswap = (byteorder == ZFS_HOST_BYTEORDER) ? + DMU_BSWAP_NUMFUNCS : DMU_OT_BYTESWAP(ot); + bcopy(salt, hdr->b_crypt_hdr.b_salt, ZIO_DATA_SALT_LEN); + bcopy(iv, hdr->b_crypt_hdr.b_iv, ZIO_DATA_IV_LEN); + bcopy(mac, hdr->b_crypt_hdr.b_mac, ZIO_DATA_MAC_LEN); + + /* + * This buffer will be considered encrypted even if the ot is not an + * encrypted type. It will become authenticated instead in + * arc_write_ready(). + */ + buf = NULL; + VERIFY0(arc_buf_alloc_impl(hdr, spa, NULL, tag, B_TRUE, B_TRUE, + B_FALSE, B_FALSE, &buf)); + arc_buf_thaw(buf); + ASSERT3P(hdr->b_l1hdr.b_freeze_cksum, ==, NULL); + + return (buf); +} + static void arc_hdr_l2hdr_destroy(arc_buf_hdr_t *hdr) { @@ -3200,15 +4010,23 @@ arc_hdr_destroy(arc_buf_hdr_t *hdr) #endif if (hdr->b_l1hdr.b_pabd != NULL) { - arc_hdr_free_pabd(hdr); + arc_hdr_free_pabd(hdr, B_FALSE); } + + if (HDR_HAS_RABD(hdr)) + arc_hdr_free_pabd(hdr, B_TRUE); } ASSERT3P(hdr->b_hash_next, ==, NULL); if (HDR_HAS_L1HDR(hdr)) { ASSERT(!multilist_link_active(&hdr->b_l1hdr.b_arc_node)); ASSERT3P(hdr->b_l1hdr.b_acb, ==, NULL); - kmem_cache_free(hdr_full_cache, hdr); + + if (!HDR_PROTECTED(hdr)) { + kmem_cache_free(hdr_full_cache, hdr); + } else { + kmem_cache_free(hdr_full_crypt_cache, hdr); + } } else { kmem_cache_free(hdr_l2only_cache, hdr); } @@ -3242,7 +4060,7 @@ arc_buf_destroy(arc_buf_t *buf, void* tag) /* * Evict the arc_buf_hdr that is provided as a parameter. The resultant - * state of the header is dependent on it's state prior to entering this + * state of the header is dependent on its state prior to entering this * function. The following transitions are possible: * * - arc_mru -> arc_mru_ghost @@ -3270,9 +4088,9 @@ arc_evict_hdr(arc_buf_hdr_t *hdr, kmutex_t *hash_lock) /* * l2arc_write_buffers() relies on a header's L1 portion - * (i.e. its b_pabd field) during it's write phase. + * (i.e. its b_pabd field) during its write phase. * Thus, we cannot push a header onto the arc_l2c_only - * state (removing it's L1 piece) until the header is + * state (removing its L1 piece) until the header is * done being written to the l2arc. */ if (HDR_HAS_L2HDR(hdr) && HDR_L2_WRITING(hdr)) { @@ -3285,8 +4103,9 @@ arc_evict_hdr(arc_buf_hdr_t *hdr, kmutex_t *hash_lock) DTRACE_PROBE1(arc__delete, arc_buf_hdr_t *, hdr); - ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL); if (HDR_HAS_L2HDR(hdr)) { + ASSERT(hdr->b_l1hdr.b_pabd == NULL); + ASSERT(!HDR_HAS_RABD(hdr)); /* * This buffer is cached on the 2nd Level ARC; * don't destroy the header. @@ -3352,7 +4171,11 @@ arc_evict_hdr(arc_buf_hdr_t *hdr, kmutex_t *hash_lock) * This ensures that the accounting is updated correctly * in arc_free_data_impl(). */ - arc_hdr_free_pabd(hdr); + if (hdr->b_l1hdr.b_pabd != NULL) + arc_hdr_free_pabd(hdr, B_FALSE); + + if (HDR_HAS_RABD(hdr)) + arc_hdr_free_pabd(hdr, B_TRUE); arc_change_state(evicted_state, hdr, hash_lock); ASSERT(HDR_IN_HASH_TABLE(hdr)); @@ -4323,7 +5146,7 @@ arc_reap_cb(void *arg, zthr_t *zthr) /* * Adapt arc info given the number of bytes we are trying to add and - * the state that we are comming from. This function is only called + * the state that we are coming from. This function is only called * when we are adding new content to the cache. */ static void @@ -4464,7 +5287,7 @@ arc_get_data_impl(arc_buf_hdr_t *hdr, uint64_t size, void *tag) * upper limit, we must be adding data faster than the evict * thread can evict. Thus, to ensure we don't compound the * problem by adding more data and forcing arc_size to grow even - * further past it's target size, we halt and wait for the + * further past its target size, we halt and wait for the * eviction thread to catch up. * * It's also possible that the reclaim thread is unable to evict @@ -4799,24 +5622,69 @@ arc_getbuf_func(zio_t *zio, const zbookmark_phys_t *zb, const blkptr_t *bp, } static void -arc_hdr_verify(arc_buf_hdr_t *hdr, blkptr_t *bp) +arc_hdr_verify(arc_buf_hdr_t *hdr, const blkptr_t *bp) { if (BP_IS_HOLE(bp) || BP_IS_EMBEDDED(bp)) { ASSERT3U(HDR_GET_PSIZE(hdr), ==, 0); - ASSERT3U(HDR_GET_COMPRESS(hdr), ==, ZIO_COMPRESS_OFF); + ASSERT3U(arc_hdr_get_compress(hdr), ==, ZIO_COMPRESS_OFF); } else { if (HDR_COMPRESSION_ENABLED(hdr)) { - ASSERT3U(HDR_GET_COMPRESS(hdr), ==, + ASSERT3U(arc_hdr_get_compress(hdr), ==, BP_GET_COMPRESS(bp)); } ASSERT3U(HDR_GET_LSIZE(hdr), ==, BP_GET_LSIZE(bp)); ASSERT3U(HDR_GET_PSIZE(hdr), ==, BP_GET_PSIZE(bp)); + ASSERT3U(!!HDR_PROTECTED(hdr), ==, BP_IS_PROTECTED(bp)); + } +} + +/* + * XXX this should be changed to return an error, and callers + * re-read from disk on failure (on nondebug bits). + */ +static void +arc_hdr_verify_checksum(spa_t *spa, arc_buf_hdr_t *hdr, const blkptr_t *bp) +{ + arc_hdr_verify(hdr, bp); + if (BP_IS_HOLE(bp) || BP_IS_EMBEDDED(bp)) + return; + int err = 0; + abd_t *abd = NULL; + if (BP_IS_ENCRYPTED(bp)) { + if (HDR_HAS_RABD(hdr)) { + abd = hdr->b_crypt_hdr.b_rabd; + } + } else if (HDR_COMPRESSION_ENABLED(hdr)) { + abd = hdr->b_l1hdr.b_pabd; + } + if (abd != NULL) { + /* + * The offset is only used for labels, which are not + * cached in the ARC, so it doesn't matter what we + * pass for the offset parameter. + */ + int psize = HDR_GET_PSIZE(hdr); + err = zio_checksum_error_impl(spa, bp, + BP_GET_CHECKSUM(bp), abd, psize, 0, NULL); + if (err != 0) { + /* + * Use abd_copy_to_buf() rather than + * abd_borrow_buf_copy() so that we are sure to + * include the buf in crash dumps. + */ + void *buf = kmem_alloc(psize, KM_SLEEP); + abd_copy_to_buf(buf, abd, psize); + panic("checksum of cached data doesn't match BP " + "err=%u hdr=%p bp=%p abd=%p buf=%p", + err, (void *)hdr, (void *)bp, (void *)abd, buf); + } } } static void arc_read_done(zio_t *zio) { + blkptr_t *bp = zio->io_bp; arc_buf_hdr_t *hdr = zio->io_private; kmutex_t *hash_lock = NULL; arc_callback_t *callback_list; @@ -4847,6 +5715,26 @@ arc_read_done(zio_t *zio) ASSERT3P(hash_lock, !=, NULL); } + if (BP_IS_PROTECTED(bp)) { + hdr->b_crypt_hdr.b_ot = BP_GET_TYPE(bp); + hdr->b_crypt_hdr.b_dsobj = zio->io_bookmark.zb_objset; + zio_crypt_decode_params_bp(bp, hdr->b_crypt_hdr.b_salt, + hdr->b_crypt_hdr.b_iv); + + if (BP_GET_TYPE(bp) == DMU_OT_INTENT_LOG) { + void *tmpbuf; + + tmpbuf = abd_borrow_buf_copy(zio->io_abd, + sizeof (zil_chain_t)); + zio_crypt_decode_mac_zil(tmpbuf, + hdr->b_crypt_hdr.b_mac); + abd_return_buf(zio->io_abd, tmpbuf, + sizeof (zil_chain_t)); + } else { + zio_crypt_decode_mac_bp(bp, hdr->b_crypt_hdr.b_mac); + } + } + if (zio->io_error == 0) { /* byteswap if necessary */ if (BP_SHOULD_BYTESWAP(zio->io_bp)) { @@ -4895,8 +5783,32 @@ arc_read_done(zio_t *zio) if (zio->io_error != 0) continue; - int error = arc_buf_alloc_impl(hdr, acb->acb_private, - acb->acb_compressed, B_TRUE, &acb->acb_buf); + int error = arc_buf_alloc_impl(hdr, zio->io_spa, + &acb->acb_zb, acb->acb_private, acb->acb_encrypted, + acb->acb_compressed, acb->acb_noauth, B_TRUE, + &acb->acb_buf); + + /* + * Assert non-speculative zios didn't fail because an + * encryption key wasn't loaded + */ + ASSERT((zio->io_flags & ZIO_FLAG_SPECULATIVE) || + error != EACCES); + + /* + * If we failed to decrypt, report an error now (as the zio + * layer would have done if it had done the transforms). + */ + if (error == ECKSUM) { + ASSERT(BP_IS_PROTECTED(bp)); + error = SET_ERROR(EIO); + if ((zio->io_flags & ZIO_FLAG_SPECULATIVE) == 0) { + spa_log_error(zio->io_spa, &acb->acb_zb); + zfs_ereport_post(FM_EREPORT_ZFS_AUTHENTICATION, + zio->io_spa, NULL, &acb->acb_zb, zio, 0, 0); + } + } + if (error != 0) { /* * Decompression failed. Set io_error @@ -4915,6 +5827,7 @@ arc_read_done(zio_t *zio) zio->io_error = error; } } + /* * If there are multiple callbacks, we must have the hash lock, * because the only way for multiple threads to find this hdr is @@ -4926,11 +5839,8 @@ arc_read_done(zio_t *zio) hdr->b_l1hdr.b_acb = NULL; arc_hdr_clear_flags(hdr, ARC_FLAG_IO_IN_PROGRESS); - if (callback_cnt == 0) { - ASSERT(HDR_PREFETCH(hdr)); - ASSERT0(hdr->b_l1hdr.b_bufcnt); - ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL); - } + if (callback_cnt == 0) + ASSERT(hdr->b_l1hdr.b_pabd != NULL || HDR_HAS_RABD(hdr)); ASSERT(zfs_refcount_is_zero(&hdr->b_l1hdr.b_refcnt) || callback_list != NULL); @@ -4968,6 +5878,7 @@ arc_read_done(zio_t *zio) /* execute each callback and free its structure */ while ((acb = callback_list) != NULL) { + if (acb->acb_done != NULL) { if (zio->io_error != 0 && acb->acb_buf != NULL) { /* @@ -5022,7 +5933,11 @@ arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, arc_read_done_func_t *done, kmutex_t *hash_lock = NULL; zio_t *rzio; uint64_t guid = spa_load_guid(spa); - boolean_t compressed_read = (zio_flags & ZIO_FLAG_RAW) != 0; + boolean_t compressed_read = (zio_flags & ZIO_FLAG_RAW_COMPRESS) != 0; + boolean_t encrypted_read = BP_IS_ENCRYPTED(bp) && + (zio_flags & ZIO_FLAG_RAW_ENCRYPT) != 0; + boolean_t noauth_read = BP_IS_AUTHENTICATED(bp) && + (zio_flags & ZIO_FLAG_RAW_ENCRYPT) != 0; int rc = 0; ASSERT(!BP_IS_EMBEDDED(bp) || @@ -5037,7 +5952,15 @@ top: hdr = buf_hash_find(guid, bp, &hash_lock); } - if (hdr != NULL && HDR_HAS_L1HDR(hdr) && hdr->b_l1hdr.b_pabd != NULL) { + /* + * Determine if we have an L1 cache hit or a cache miss. For simplicity + * we maintain encrypted data seperately from compressed / uncompressed + * data. If the user is requesting raw encrypted data and we don't have + * that in the header we will read from disk to guarantee that we can + * get it even if the encryption keys aren't loaded. + */ + if (hdr != NULL && HDR_HAS_L1HDR(hdr) && (HDR_HAS_RABD(hdr) || + (hdr->b_l1hdr.b_pabd != NULL && !encrypted_read))) { arc_buf_t *buf = NULL; *arc_flags |= ARC_FLAG_CACHED; @@ -5077,6 +6000,9 @@ top: acb->acb_done = done; acb->acb_private = private; acb->acb_compressed = compressed_read; + acb->acb_encrypted = encrypted_read; + acb->acb_noauth = noauth_read; + acb->acb_zb = *zb; if (pio != NULL) acb->acb_zio_dummy = zio_null(pio, spa, NULL, NULL, NULL, zio_flags); @@ -5120,15 +6046,35 @@ top: ASSERT(!BP_IS_EMBEDDED(bp) || !BP_IS_HOLE(bp)); + arc_hdr_verify_checksum(spa, hdr, bp); + /* Get a buf with the desired data in it. */ - rc = arc_buf_alloc_impl(hdr, private, - compressed_read, B_TRUE, &buf); + rc = arc_buf_alloc_impl(hdr, spa, zb, private, + encrypted_read, compressed_read, noauth_read, + B_TRUE, &buf); + if (rc == ECKSUM) { + /* + * Convert authentication and decryption errors + * to EIO (and generate an ereport if needed) + * before leaving the ARC. + */ + rc = SET_ERROR(EIO); + if ((zio_flags & ZIO_FLAG_SPECULATIVE) == 0) { + spa_log_error(spa, zb); + zfs_ereport_post( + FM_EREPORT_ZFS_AUTHENTICATION, + spa, NULL, zb, NULL, 0, 0); + } + } if (rc != 0) { - arc_buf_destroy(buf, private); + (void) remove_reference(hdr, hash_lock, + private); + arc_buf_destroy_impl(buf); buf = NULL; } + /* assert any errors weren't due to unloaded keys */ ASSERT((zio_flags & ZIO_FLAG_SPECULATIVE) || - rc == 0 || rc != ENOENT); + rc != EACCES); } else if (*arc_flags & ARC_FLAG_PREFETCH && zfs_refcount_count(&hdr->b_l1hdr.b_refcnt) == 0) { arc_hdr_set_flags(hdr, ARC_FLAG_PREFETCH); @@ -5155,13 +6101,15 @@ top: uint64_t addr = 0; boolean_t devw = B_FALSE; uint64_t size; + abd_t *hdr_abd; if (hdr == NULL) { /* this block is not in the cache */ arc_buf_hdr_t *exists = NULL; arc_buf_contents_t type = BP_GET_BUFC_TYPE(bp); hdr = arc_hdr_alloc(spa_load_guid(spa), psize, lsize, - BP_GET_COMPRESS(bp), type); + BP_IS_PROTECTED(bp), BP_GET_COMPRESS(bp), type, + encrypted_read); if (!BP_IS_EMBEDDED(bp)) { hdr->b_dva = *BP_IDENTITY(bp); @@ -5177,25 +6125,43 @@ top: } } else { /* - * This block is in the ghost cache. If it was L2-only - * (and thus didn't have an L1 hdr), we realloc the - * header to add an L1 hdr. + * This block is in the ghost cache or encrypted data + * was requested and we didn't have it. If it was + * L2-only (and thus didn't have an L1 hdr), + * we realloc the header to add an L1 hdr. */ if (!HDR_HAS_L1HDR(hdr)) { hdr = arc_hdr_realloc(hdr, hdr_l2only_cache, hdr_full_cache); } - ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL); - ASSERT(GHOST_STATE(hdr->b_l1hdr.b_state)); - ASSERT(!HDR_IO_IN_PROGRESS(hdr)); - ASSERT(zfs_refcount_is_zero(&hdr->b_l1hdr.b_refcnt)); - ASSERT3P(hdr->b_l1hdr.b_buf, ==, NULL); - ASSERT3P(hdr->b_l1hdr.b_freeze_cksum, ==, NULL); + + if (GHOST_STATE(hdr->b_l1hdr.b_state)) { + ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL); + ASSERT(!HDR_HAS_RABD(hdr)); + ASSERT(!HDR_IO_IN_PROGRESS(hdr)); + ASSERT0(zfs_refcount_count( + &hdr->b_l1hdr.b_refcnt)); + ASSERT3P(hdr->b_l1hdr.b_buf, ==, NULL); + ASSERT3P(hdr->b_l1hdr.b_freeze_cksum, ==, NULL); + } else if (HDR_IO_IN_PROGRESS(hdr)) { + /* + * If this header already had an IO in progress + * and we are performing another IO to fetch + * encrypted data we must wait until the first + * IO completes so as not to confuse + * arc_read_done(). This should be very rare + * and so the performance impact shouldn't + * matter. + */ + cv_wait(&hdr->b_l1hdr.b_cv, hash_lock); + mutex_exit(hash_lock); + goto top; + } /* * This is a delicate dance that we play here. - * This hdr is in the ghost list so we access it - * to move it out of the ghost list before we + * This hdr might be in the ghost list so we access + * it to move it out of the ghost list before we * initiate the read. If it's a prefetch then * it won't have a callback so we'll remove the * reference that arc_buf_alloc_impl() created. We @@ -5203,28 +6169,44 @@ top: * avoid hitting an assert in remove_reference(). */ arc_access(hdr, hash_lock); - arc_hdr_alloc_pabd(hdr); + arc_hdr_alloc_pabd(hdr, encrypted_read); } - ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL); - size = arc_hdr_size(hdr); - /* - * If compression is enabled on the hdr, then will do - * RAW I/O and will store the compressed data in the hdr's - * data block. Otherwise, the hdr's data block will contain - * the uncompressed data. - */ - if (HDR_GET_COMPRESS(hdr) != ZIO_COMPRESS_OFF) { + if (encrypted_read) { + ASSERT(HDR_HAS_RABD(hdr)); + size = HDR_GET_PSIZE(hdr); + hdr_abd = hdr->b_crypt_hdr.b_rabd; zio_flags |= ZIO_FLAG_RAW; + } else { + ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL); + size = arc_hdr_size(hdr); + hdr_abd = hdr->b_l1hdr.b_pabd; + + if (arc_hdr_get_compress(hdr) != ZIO_COMPRESS_OFF) { + zio_flags |= ZIO_FLAG_RAW_COMPRESS; + } + + /* + * For authenticated bp's, we do not ask the ZIO layer + * to authenticate them since this will cause the entire + * IO to fail if the key isn't loaded. Instead, we + * defer authentication until arc_buf_fill(), which will + * verify the data when the key is available. + */ + if (BP_IS_AUTHENTICATED(bp)) + zio_flags |= ZIO_FLAG_RAW_ENCRYPT; } - if (*arc_flags & ARC_FLAG_PREFETCH) + if (*arc_flags & ARC_FLAG_PREFETCH && + zfs_refcount_is_zero(&hdr->b_l1hdr.b_refcnt)) arc_hdr_set_flags(hdr, ARC_FLAG_PREFETCH); if (*arc_flags & ARC_FLAG_PRESCIENT_PREFETCH) arc_hdr_set_flags(hdr, ARC_FLAG_PRESCIENT_PREFETCH); if (*arc_flags & ARC_FLAG_L2CACHE) arc_hdr_set_flags(hdr, ARC_FLAG_L2CACHE); + if (BP_IS_AUTHENTICATED(bp)) + arc_hdr_set_flags(hdr, ARC_FLAG_NOAUTH); if (BP_GET_LEVEL(bp) > 0) arc_hdr_set_flags(hdr, ARC_FLAG_INDIRECT); if (*arc_flags & ARC_FLAG_PREDICTIVE_PREFETCH) @@ -5235,6 +6217,9 @@ top: acb->acb_done = done; acb->acb_private = private; acb->acb_compressed = compressed_read; + acb->acb_encrypted = encrypted_read; + acb->acb_noauth = noauth_read; + acb->acb_zb = *zb; ASSERT3P(hdr->b_l1hdr.b_acb, ==, NULL); hdr->b_l1hdr.b_acb = acb; @@ -5309,7 +6294,7 @@ top: HDR_ISTYPE_METADATA(hdr)); cb->l2rcb_abd = abd; } else { - abd = hdr->b_l1hdr.b_pabd; + abd = hdr_abd; } ASSERT(addr >= VDEV_LABEL_START_SIZE && @@ -5322,7 +6307,7 @@ top: * Issue a null zio if the underlying buffer * was squashed to zero size by compression. */ - ASSERT3U(HDR_GET_COMPRESS(hdr), !=, + ASSERT3U(arc_hdr_get_compress(hdr), !=, ZIO_COMPRESS_EMPTY); rzio = zio_read_phys(pio, vd, addr, asize, abd, @@ -5339,7 +6324,8 @@ top: DTRACE_PROBE2(l2arc__read, vdev_t *, vd, zio_t *, rzio); - ARCSTAT_INCR(arcstat_l2_read_bytes, size); + ARCSTAT_INCR(arcstat_l2_read_bytes, + HDR_GET_PSIZE(hdr)); if (*arc_flags & ARC_FLAG_NOWAIT) { zio_nowait(rzio); @@ -5371,7 +6357,7 @@ top: } } - rzio = zio_read(pio, spa, bp, hdr->b_l1hdr.b_pabd, size, + rzio = zio_read(pio, spa, bp, hdr_abd, size, arc_read_done, hdr, priority, zio_flags, zb); acb->acb_zio_head = rzio; @@ -5384,7 +6370,7 @@ top: ASSERT(*arc_flags & ARC_FLAG_NOWAIT); zio_nowait(rzio); } - return (0); + return (rc); } /* @@ -5448,7 +6434,7 @@ arc_release(arc_buf_t *buf, void *tag) arc_buf_hdr_t *hdr = buf->b_hdr; /* - * It would be nice to assert that if it's DMU metadata (level > + * It would be nice to assert that if its DMU metadata (level > * 0 || it's the dnode file), then it must be syncing context. * But we don't know that information at this level. */ @@ -5464,7 +6450,13 @@ arc_release(arc_buf_t *buf, void *tag) */ if (hdr->b_l1hdr.b_state == arc_anon) { mutex_exit(&buf->b_evict_lock); - ASSERT(!HDR_IO_IN_PROGRESS(hdr)); + /* + * If we are called from dmu_convert_mdn_block_to_raw(), + * a write might be in progress. This is OK because + * the caller won't change the content of this buffer, + * only the flags (via arc_convert_to_raw()). + */ + /* ASSERT(!HDR_IO_IN_PROGRESS(hdr)); */ ASSERT(!HDR_IN_HASH_TABLE(hdr)); ASSERT(!HDR_HAS_L2HDR(hdr)); ASSERT(HDR_EMPTY(hdr)); @@ -5525,7 +6517,8 @@ arc_release(arc_buf_t *buf, void *tag) uint64_t spa = hdr->b_spa; uint64_t psize = HDR_GET_PSIZE(hdr); uint64_t lsize = HDR_GET_LSIZE(hdr); - enum zio_compress compress = HDR_GET_COMPRESS(hdr); + boolean_t protected = HDR_PROTECTED(hdr); + enum zio_compress compress = arc_hdr_get_compress(hdr); arc_buf_contents_t type = arc_buf_type(hdr); VERIFY3U(hdr->b_type, ==, type); @@ -5550,6 +6543,7 @@ arc_release(arc_buf_t *buf, void *tag) * buffer, then we must stop sharing that block. */ if (arc_buf_is_shared(buf)) { + ASSERT3P(hdr->b_l1hdr.b_buf, !=, buf); VERIFY(!arc_buf_is_shared(lastbuf)); /* @@ -5567,7 +6561,7 @@ arc_release(arc_buf_t *buf, void *tag) if (arc_can_share(hdr, lastbuf)) { arc_share_buf(hdr, lastbuf); } else { - arc_hdr_alloc_pabd(hdr); + arc_hdr_alloc_pabd(hdr, B_FALSE); abd_copy_from_buf(hdr->b_l1hdr.b_pabd, buf->b_data, psize); } @@ -5582,10 +6576,10 @@ arc_release(arc_buf_t *buf, void *tag) * if we have a compressed, shared buffer. */ ASSERT(arc_buf_is_shared(lastbuf) || - HDR_GET_COMPRESS(hdr) != ZIO_COMPRESS_OFF); + arc_hdr_get_compress(hdr) != ZIO_COMPRESS_OFF); ASSERT(!ARC_BUF_SHARED(buf)); } - ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL); + ASSERT(hdr->b_l1hdr.b_pabd != NULL || HDR_HAS_RABD(hdr)); ASSERT3P(state, !=, arc_l2c_only); (void) zfs_refcount_remove_many(&state->arcs_size, @@ -5599,16 +6593,24 @@ arc_release(arc_buf_t *buf, void *tag) } hdr->b_l1hdr.b_bufcnt -= 1; + if (ARC_BUF_ENCRYPTED(buf)) + hdr->b_crypt_hdr.b_ebufcnt -= 1; + arc_cksum_verify(buf); arc_buf_unwatch(buf); + /* if this is the last uncompressed buf free the checksum */ + if (!arc_hdr_has_uncompressed_buf(hdr)) + arc_cksum_free(hdr); + mutex_exit(hash_lock); /* * Allocate a new hdr. The new hdr will contain a b_pabd * buffer which will be freed in arc_write(). */ - nhdr = arc_hdr_alloc(spa, psize, lsize, compress, type); + nhdr = arc_hdr_alloc(spa, psize, lsize, protected, + compress, type, HDR_HAS_RABD(hdr)); ASSERT3P(nhdr->b_l1hdr.b_buf, ==, NULL); ASSERT0(nhdr->b_l1hdr.b_bufcnt); ASSERT0(zfs_refcount_count(&nhdr->b_l1hdr.b_refcnt)); @@ -5617,6 +6619,8 @@ arc_release(arc_buf_t *buf, void *tag) nhdr->b_l1hdr.b_buf = buf; nhdr->b_l1hdr.b_bufcnt = 1; + if (ARC_BUF_ENCRYPTED(buf)) + nhdr->b_crypt_hdr.b_ebufcnt = 1; (void) zfs_refcount_add(&nhdr->b_l1hdr.b_refcnt, tag); buf->b_hdr = nhdr; @@ -5631,8 +6635,8 @@ arc_release(arc_buf_t *buf, void *tag) ASSERT(!HDR_IO_IN_PROGRESS(hdr)); arc_change_state(arc_anon, hdr, hash_lock); hdr->b_l1hdr.b_arc_access = 0; - mutex_exit(hash_lock); + mutex_exit(hash_lock); buf_discard_identity(hdr); arc_buf_thaw(buf); } @@ -5669,7 +6673,8 @@ arc_write_ready(zio_t *zio) arc_write_callback_t *callback = zio->io_private; arc_buf_t *buf = callback->awcb_buf; arc_buf_hdr_t *hdr = buf->b_hdr; - uint64_t psize = BP_IS_HOLE(zio->io_bp) ? 0 : BP_GET_PSIZE(zio->io_bp); + blkptr_t *bp = zio->io_bp; + uint64_t psize = BP_IS_HOLE(bp) ? 0 : BP_GET_PSIZE(bp); ASSERT(HDR_HAS_L1HDR(hdr)); ASSERT(!zfs_refcount_is_zero(&buf->b_hdr->b_l1hdr.b_refcnt)); @@ -5687,11 +6692,15 @@ arc_write_ready(zio_t *zio) if (arc_buf_is_shared(buf)) { arc_unshare_buf(hdr, buf); } else { - arc_hdr_free_pabd(hdr); + arc_hdr_free_pabd(hdr, B_FALSE); } } + + if (HDR_HAS_RABD(hdr)) + arc_hdr_free_pabd(hdr, B_TRUE); } ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL); + ASSERT(!HDR_HAS_RABD(hdr)); ASSERT(!HDR_SHARED_DATA(hdr)); ASSERT(!arc_buf_is_shared(buf)); @@ -5700,23 +6709,69 @@ arc_write_ready(zio_t *zio) if (HDR_IO_IN_PROGRESS(hdr)) ASSERT(zio->io_flags & ZIO_FLAG_REEXECUTED); - arc_cksum_compute(buf); arc_hdr_set_flags(hdr, ARC_FLAG_IO_IN_PROGRESS); + if (BP_IS_PROTECTED(bp) != !!HDR_PROTECTED(hdr)) + hdr = arc_hdr_realloc_crypt(hdr, BP_IS_PROTECTED(bp)); + + if (BP_IS_PROTECTED(bp)) { + /* ZIL blocks are written through zio_rewrite */ + ASSERT3U(BP_GET_TYPE(bp), !=, DMU_OT_INTENT_LOG); + ASSERT(HDR_PROTECTED(hdr)); + + if (BP_SHOULD_BYTESWAP(bp)) { + if (BP_GET_LEVEL(bp) > 0) { + hdr->b_l1hdr.b_byteswap = DMU_BSWAP_UINT64; + } else { + hdr->b_l1hdr.b_byteswap = + DMU_OT_BYTESWAP(BP_GET_TYPE(bp)); + } + } else { + hdr->b_l1hdr.b_byteswap = DMU_BSWAP_NUMFUNCS; + } + + hdr->b_crypt_hdr.b_ot = BP_GET_TYPE(bp); + hdr->b_crypt_hdr.b_dsobj = zio->io_bookmark.zb_objset; + zio_crypt_decode_params_bp(bp, hdr->b_crypt_hdr.b_salt, + hdr->b_crypt_hdr.b_iv); + zio_crypt_decode_mac_bp(bp, hdr->b_crypt_hdr.b_mac); + } + + /* + * If this block was written for raw encryption but the zio layer + * ended up only authenticating it, adjust the buffer flags now. + */ + if (BP_IS_AUTHENTICATED(bp) && ARC_BUF_ENCRYPTED(buf)) { + arc_hdr_set_flags(hdr, ARC_FLAG_NOAUTH); + buf->b_flags &= ~ARC_BUF_FLAG_ENCRYPTED; + if (BP_GET_COMPRESS(bp) == ZIO_COMPRESS_OFF) + buf->b_flags &= ~ARC_BUF_FLAG_COMPRESSED; + } else if (BP_IS_HOLE(bp) && ARC_BUF_ENCRYPTED(buf)) { + buf->b_flags &= ~ARC_BUF_FLAG_ENCRYPTED; + buf->b_flags &= ~ARC_BUF_FLAG_COMPRESSED; + } + + /* this must be done after the buffer flags are adjusted */ + arc_cksum_compute(buf); + enum zio_compress compress; - if (BP_IS_HOLE(zio->io_bp) || BP_IS_EMBEDDED(zio->io_bp)) { + if (BP_IS_HOLE(bp) || BP_IS_EMBEDDED(bp)) { compress = ZIO_COMPRESS_OFF; } else { - ASSERT3U(HDR_GET_LSIZE(hdr), ==, BP_GET_LSIZE(zio->io_bp)); - compress = BP_GET_COMPRESS(zio->io_bp); + ASSERT3U(HDR_GET_LSIZE(hdr), ==, BP_GET_LSIZE(bp)); + compress = BP_GET_COMPRESS(bp); } HDR_SET_PSIZE(hdr, psize); arc_hdr_set_compress(hdr, compress); + if (zio->io_error != 0 || psize == 0) + goto out; /* - * Fill the hdr with data. If the hdr is compressed, the data we want - * is available from the zio, otherwise we can take it from the buf. + * Fill the hdr with data. If the buffer is encrypted we have no choice + * but to copy the data into b_rabd. If the hdr is compressed, the data + * we want is available from the zio, otherwise we can take it from + * the buf. * * We might be able to share the buf's data with the hdr here. However, * doing so would cause the ARC to be full of linear ABDs if we write a @@ -5726,23 +6781,29 @@ arc_write_ready(zio_t *zio) * written. Therefore, if they're allowed then we allocate one and copy * the data into it; otherwise, we share the data directly if we can. */ - if (zfs_abd_scatter_enabled || !arc_can_share(hdr, buf)) { - arc_hdr_alloc_pabd(hdr); - + if (ARC_BUF_ENCRYPTED(buf)) { + ASSERT3U(psize, >, 0); + ASSERT(ARC_BUF_COMPRESSED(buf)); + arc_hdr_alloc_pabd(hdr, B_TRUE); + abd_copy(hdr->b_crypt_hdr.b_rabd, zio->io_abd, psize); + } else if (zfs_abd_scatter_enabled || !arc_can_share(hdr, buf)) { /* * Ideally, we would always copy the io_abd into b_pabd, but the * user may have disabled compressed ARC, thus we must check the * hdr's compression setting rather than the io_bp's. */ - if (HDR_GET_COMPRESS(hdr) != ZIO_COMPRESS_OFF) { - ASSERT3U(BP_GET_COMPRESS(zio->io_bp), !=, - ZIO_COMPRESS_OFF); + if (BP_IS_ENCRYPTED(bp)) { ASSERT3U(psize, >, 0); - + arc_hdr_alloc_pabd(hdr, B_TRUE); + abd_copy(hdr->b_crypt_hdr.b_rabd, zio->io_abd, psize); + } else if (arc_hdr_get_compress(hdr) != ZIO_COMPRESS_OFF && + !ARC_BUF_COMPRESSED(buf)) { + ASSERT3U(psize, >, 0); + arc_hdr_alloc_pabd(hdr, B_FALSE); abd_copy(hdr->b_l1hdr.b_pabd, zio->io_abd, psize); } else { ASSERT3U(zio->io_orig_size, ==, arc_hdr_size(hdr)); - + arc_hdr_alloc_pabd(hdr, B_FALSE); abd_copy_from_buf(hdr->b_l1hdr.b_pabd, buf->b_data, arc_buf_size(buf)); } @@ -5750,11 +6811,11 @@ arc_write_ready(zio_t *zio) ASSERT3P(buf->b_data, ==, abd_to_buf(zio->io_orig_abd)); ASSERT3U(zio->io_orig_size, ==, arc_buf_size(buf)); ASSERT3U(hdr->b_l1hdr.b_bufcnt, ==, 1); - arc_share_buf(hdr, buf); } - arc_hdr_verify(hdr, zio->io_bp); +out: + arc_hdr_verify(hdr, bp); } static void @@ -5882,17 +6943,33 @@ arc_write(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp, arc_buf_t *buf, ASSERT3U(hdr->b_l1hdr.b_bufcnt, >, 0); if (l2arc) arc_hdr_set_flags(hdr, ARC_FLAG_L2CACHE); - if (ARC_BUF_COMPRESSED(buf)) { - /* - * We're writing a pre-compressed buffer. Make the - * compression algorithm requested by the zio_prop_t match - * the pre-compressed buffer's compression algorithm. - */ - localprop.zp_compress = HDR_GET_COMPRESS(hdr); - ASSERT3U(HDR_GET_LSIZE(hdr), !=, arc_buf_size(buf)); + if (ARC_BUF_ENCRYPTED(buf)) { + ASSERT(ARC_BUF_COMPRESSED(buf)); + localprop.zp_encrypt = B_TRUE; + localprop.zp_compress = HDR_GET_COMPRESS(hdr); + /* CONSTCOND */ + localprop.zp_byteorder = + (hdr->b_l1hdr.b_byteswap == DMU_BSWAP_NUMFUNCS) ? + ZFS_HOST_BYTEORDER : !ZFS_HOST_BYTEORDER; + bcopy(hdr->b_crypt_hdr.b_salt, localprop.zp_salt, + ZIO_DATA_SALT_LEN); + bcopy(hdr->b_crypt_hdr.b_iv, localprop.zp_iv, + ZIO_DATA_IV_LEN); + bcopy(hdr->b_crypt_hdr.b_mac, localprop.zp_mac, + ZIO_DATA_MAC_LEN); + if (DMU_OT_IS_ENCRYPTED(localprop.zp_type)) { + localprop.zp_nopwrite = B_FALSE; + localprop.zp_copies = + MIN(localprop.zp_copies, SPA_DVAS_PER_BP - 1); + } zio_flags |= ZIO_FLAG_RAW; + } else if (ARC_BUF_COMPRESSED(buf)) { + ASSERT3U(HDR_GET_LSIZE(hdr), !=, arc_buf_size(buf)); + localprop.zp_compress = HDR_GET_COMPRESS(hdr); + zio_flags |= ZIO_FLAG_RAW_COMPRESS; } + callback = kmem_zalloc(sizeof (arc_write_callback_t), KM_SLEEP); callback->awcb_ready = ready; callback->awcb_children_ready = children_ready; @@ -5915,11 +6992,17 @@ arc_write(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp, arc_buf_t *buf, if (arc_buf_is_shared(buf)) { arc_unshare_buf(hdr, buf); } else { - arc_hdr_free_pabd(hdr); + arc_hdr_free_pabd(hdr, B_FALSE); } VERIFY3P(buf->b_data, !=, NULL); - arc_hdr_set_compress(hdr, ZIO_COMPRESS_OFF); } + + if (HDR_HAS_RABD(hdr)) + arc_hdr_free_pabd(hdr, B_TRUE); + + if (!(zio_flags & ZIO_FLAG_RAW)) + arc_hdr_set_compress(hdr, ZIO_COMPRESS_OFF); + ASSERT(!arc_buf_is_shared(buf)); ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL); @@ -6123,8 +7206,8 @@ arc_state_multilist_index_func(multilist_t *ml, void *obj) /* * The assumption here, is the hash value for a given - * arc_buf_hdr_t will remain constant throughout it's lifetime - * (i.e. it's b_spa, b_dva, and b_birth fields don't change). + * arc_buf_hdr_t will remain constant throughout its lifetime + * (i.e. its b_spa, b_dva, and b_birth fields don't change). * Thus, we don't need to store the header's sublist index * on insertion, as this index can be recalculated on removal. * @@ -6248,6 +7331,8 @@ arc_state_fini(void) multilist_destroy(arc_mru_ghost->arcs_list[ARC_BUFC_DATA]); multilist_destroy(arc_mfu->arcs_list[ARC_BUFC_DATA]); multilist_destroy(arc_mfu_ghost->arcs_list[ARC_BUFC_DATA]); + multilist_destroy(arc_l2c_only->arcs_list[ARC_BUFC_METADATA]); + multilist_destroy(arc_l2c_only->arcs_list[ARC_BUFC_DATA]); aggsum_fini(&arc_meta_used); aggsum_fini(&arc_size); @@ -6256,6 +7341,7 @@ arc_state_fini(void) aggsum_fini(&astat_hdr_size); aggsum_fini(&astat_other_size); aggsum_fini(&astat_l2_hdr_size); + } uint64_t @@ -6843,6 +7929,96 @@ top: kmem_free(cb, sizeof (l2arc_write_callback_t)); } +static int +l2arc_untransform(zio_t *zio, l2arc_read_callback_t *cb) +{ + int ret; + spa_t *spa = zio->io_spa; + arc_buf_hdr_t *hdr = cb->l2rcb_hdr; + blkptr_t *bp = zio->io_bp; + uint8_t salt[ZIO_DATA_SALT_LEN]; + uint8_t iv[ZIO_DATA_IV_LEN]; + uint8_t mac[ZIO_DATA_MAC_LEN]; + boolean_t no_crypt = B_FALSE; + + /* + * ZIL data is never be written to the L2ARC, so we don't need + * special handling for its unique MAC storage. + */ + ASSERT3U(BP_GET_TYPE(bp), !=, DMU_OT_INTENT_LOG); + ASSERT(MUTEX_HELD(HDR_LOCK(hdr))); + ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL); + + /* + * If the data was encrypted, decrypt it now. Note that + * we must check the bp here and not the hdr, since the + * hdr does not have its encryption parameters updated + * until arc_read_done(). + */ + if (BP_IS_ENCRYPTED(bp)) { + abd_t *eabd = arc_get_data_abd(hdr, arc_hdr_size(hdr), hdr); + + zio_crypt_decode_params_bp(bp, salt, iv); + zio_crypt_decode_mac_bp(bp, mac); + + ret = spa_do_crypt_abd(B_FALSE, spa, &cb->l2rcb_zb, + BP_GET_TYPE(bp), BP_GET_DEDUP(bp), BP_SHOULD_BYTESWAP(bp), + salt, iv, mac, HDR_GET_PSIZE(hdr), eabd, + hdr->b_l1hdr.b_pabd, &no_crypt); + if (ret != 0) { + arc_free_data_abd(hdr, eabd, arc_hdr_size(hdr), hdr); + goto error; + } + + /* + * If we actually performed decryption, replace b_pabd + * with the decrypted data. Otherwise we can just throw + * our decryption buffer away. + */ + if (!no_crypt) { + arc_free_data_abd(hdr, hdr->b_l1hdr.b_pabd, + arc_hdr_size(hdr), hdr); + hdr->b_l1hdr.b_pabd = eabd; + zio->io_abd = eabd; + } else { + arc_free_data_abd(hdr, eabd, arc_hdr_size(hdr), hdr); + } + } + + /* + * If the L2ARC block was compressed, but ARC compression + * is disabled we decompress the data into a new buffer and + * replace the existing data. + */ + if (HDR_GET_COMPRESS(hdr) != ZIO_COMPRESS_OFF && + !HDR_COMPRESSION_ENABLED(hdr)) { + abd_t *cabd = arc_get_data_abd(hdr, arc_hdr_size(hdr), hdr); + void *tmp = abd_borrow_buf(cabd, arc_hdr_size(hdr)); + + ret = zio_decompress_data(HDR_GET_COMPRESS(hdr), + hdr->b_l1hdr.b_pabd, tmp, HDR_GET_PSIZE(hdr), + HDR_GET_LSIZE(hdr)); + if (ret != 0) { + abd_return_buf_copy(cabd, tmp, arc_hdr_size(hdr)); + arc_free_data_abd(hdr, cabd, arc_hdr_size(hdr), hdr); + goto error; + } + + abd_return_buf_copy(cabd, tmp, arc_hdr_size(hdr)); + arc_free_data_abd(hdr, hdr->b_l1hdr.b_pabd, + arc_hdr_size(hdr), hdr); + hdr->b_l1hdr.b_pabd = cabd; + zio->io_abd = cabd; + zio->io_size = HDR_GET_LSIZE(hdr); + } + + return (0); + +error: + return (ret); +} + + /* * A read to a cache device completed. Validate buffer contents before * handing over to the regular ARC routines. @@ -6850,17 +8026,19 @@ top: static void l2arc_read_done(zio_t *zio) { - l2arc_read_callback_t *cb; + int tfm_error = 0; + l2arc_read_callback_t *cb = zio->io_private; arc_buf_hdr_t *hdr; kmutex_t *hash_lock; boolean_t valid_cksum; + boolean_t using_rdata = (BP_IS_ENCRYPTED(&cb->l2rcb_bp) && + (cb->l2rcb_flags & ZIO_FLAG_RAW_ENCRYPT)); ASSERT3P(zio->io_vd, !=, NULL); ASSERT(zio->io_flags & ZIO_FLAG_DONT_PROPAGATE); spa_config_exit(zio->io_spa, SCL_L2ARC, zio->io_vd); - cb = zio->io_private; ASSERT3P(cb, !=, NULL); hdr = cb->l2rcb_hdr; ASSERT3P(hdr, !=, NULL); @@ -6876,8 +8054,13 @@ l2arc_read_done(zio_t *zio) if (cb->l2rcb_abd != NULL) { ASSERT3U(arc_hdr_size(hdr), <, zio->io_size); if (zio->io_error == 0) { - abd_copy(hdr->b_l1hdr.b_pabd, cb->l2rcb_abd, - arc_hdr_size(hdr)); + if (using_rdata) { + abd_copy(hdr->b_crypt_hdr.b_rabd, + cb->l2rcb_abd, arc_hdr_size(hdr)); + } else { + abd_copy(hdr->b_l1hdr.b_pabd, + cb->l2rcb_abd, arc_hdr_size(hdr)); + } } /* @@ -6893,7 +8076,15 @@ l2arc_read_done(zio_t *zio) */ abd_free(cb->l2rcb_abd); zio->io_size = zio->io_orig_size = arc_hdr_size(hdr); - zio->io_abd = zio->io_orig_abd = hdr->b_l1hdr.b_pabd; + + if (using_rdata) { + ASSERT(HDR_HAS_RABD(hdr)); + zio->io_abd = zio->io_orig_abd = + hdr->b_crypt_hdr.b_rabd; + } else { + ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL); + zio->io_abd = zio->io_orig_abd = hdr->b_l1hdr.b_pabd; + } } ASSERT3P(zio->io_abd, !=, NULL); @@ -6901,12 +8092,23 @@ l2arc_read_done(zio_t *zio) /* * Check this survived the L2ARC journey. */ - ASSERT3P(zio->io_abd, ==, hdr->b_l1hdr.b_pabd); + ASSERT(zio->io_abd == hdr->b_l1hdr.b_pabd || + (HDR_HAS_RABD(hdr) && zio->io_abd == hdr->b_crypt_hdr.b_rabd)); zio->io_bp_copy = cb->l2rcb_bp; /* XXX fix in L2ARC 2.0 */ zio->io_bp = &zio->io_bp_copy; /* XXX fix in L2ARC 2.0 */ valid_cksum = arc_cksum_is_equal(hdr, zio); - if (valid_cksum && zio->io_error == 0 && !HDR_L2_EVICTED(hdr)) { + + /* + * b_rabd will always match the data as it exists on disk if it is + * being used. Therefore if we are reading into b_rabd we do not + * attempt to untransform the data. + */ + if (valid_cksum && !using_rdata) + tfm_error = l2arc_untransform(zio, cb); + + if (valid_cksum && tfm_error == 0 && zio->io_error == 0 && + !HDR_L2_EVICTED(hdr)) { mutex_exit(hash_lock); zio->io_private = hdr; arc_read_done(zio); @@ -6921,7 +8123,7 @@ l2arc_read_done(zio_t *zio) } else { zio->io_error = SET_ERROR(EIO); } - if (!valid_cksum) + if (!valid_cksum || tfm_error != 0) ARCSTAT_BUMP(arcstat_l2_cksum_bad); /* @@ -6931,11 +8133,13 @@ l2arc_read_done(zio_t *zio) */ if (zio->io_waiter == NULL) { zio_t *pio = zio_unique_parent(zio); + void *abd = (using_rdata) ? + hdr->b_crypt_hdr.b_rabd : hdr->b_l1hdr.b_pabd; ASSERT(!pio || pio->io_child_type == ZIO_CHILD_LOGICAL); zio_nowait(zio_read(pio, zio->io_spa, zio->io_bp, - hdr->b_l1hdr.b_pabd, zio->io_size, arc_read_done, + abd, zio->io_size, arc_read_done, hdr, zio->io_priority, cb->l2rcb_flags, &cb->l2rcb_zb)); } @@ -7095,6 +8299,123 @@ top: } /* + * Handle any abd transforms that might be required for writing to the L2ARC. + * If successful, this function will always return an abd with the data + * transformed as it is on disk in a new abd of asize bytes. + */ +static int +l2arc_apply_transforms(spa_t *spa, arc_buf_hdr_t *hdr, uint64_t asize, + abd_t **abd_out) +{ + int ret; + void *tmp = NULL; + abd_t *cabd = NULL, *eabd = NULL, *to_write = hdr->b_l1hdr.b_pabd; + enum zio_compress compress = HDR_GET_COMPRESS(hdr); + uint64_t psize = HDR_GET_PSIZE(hdr); + uint64_t size = arc_hdr_size(hdr); + boolean_t ismd = HDR_ISTYPE_METADATA(hdr); + boolean_t bswap = (hdr->b_l1hdr.b_byteswap != DMU_BSWAP_NUMFUNCS); + dsl_crypto_key_t *dck = NULL; + uint8_t mac[ZIO_DATA_MAC_LEN] = { 0 }; + boolean_t no_crypt = B_FALSE; + + ASSERT((HDR_GET_COMPRESS(hdr) != ZIO_COMPRESS_OFF && + !HDR_COMPRESSION_ENABLED(hdr)) || + HDR_ENCRYPTED(hdr) || HDR_SHARED_DATA(hdr) || psize != asize); + ASSERT3U(psize, <=, asize); + + /* + * If this data simply needs its own buffer, we simply allocate it + * and copy the data. This may be done to eliminate a dependency on a + * shared buffer or to reallocate the buffer to match asize. + */ + if (HDR_HAS_RABD(hdr) && asize != psize) { + ASSERT3U(asize, >=, psize); + to_write = abd_alloc_for_io(asize, ismd); + abd_copy(to_write, hdr->b_crypt_hdr.b_rabd, psize); + if (psize != asize) + abd_zero_off(to_write, psize, asize - psize); + goto out; + } + + if ((compress == ZIO_COMPRESS_OFF || HDR_COMPRESSION_ENABLED(hdr)) && + !HDR_ENCRYPTED(hdr)) { + ASSERT3U(size, ==, psize); + to_write = abd_alloc_for_io(asize, ismd); + abd_copy(to_write, hdr->b_l1hdr.b_pabd, size); + if (size != asize) + abd_zero_off(to_write, size, asize - size); + goto out; + } + + if (compress != ZIO_COMPRESS_OFF && !HDR_COMPRESSION_ENABLED(hdr)) { + cabd = abd_alloc_for_io(asize, ismd); + tmp = abd_borrow_buf(cabd, asize); + + psize = zio_compress_data(compress, to_write, tmp, size); + ASSERT3U(psize, <=, HDR_GET_PSIZE(hdr)); + if (psize < asize) + bzero((char *)tmp + psize, asize - psize); + psize = HDR_GET_PSIZE(hdr); + abd_return_buf_copy(cabd, tmp, asize); + to_write = cabd; + } + + if (HDR_ENCRYPTED(hdr)) { + eabd = abd_alloc_for_io(asize, ismd); + + /* + * If the dataset was disowned before the buffer + * made it to this point, the key to re-encrypt + * it won't be available. In this case we simply + * won't write the buffer to the L2ARC. + */ + ret = spa_keystore_lookup_key(spa, hdr->b_crypt_hdr.b_dsobj, + FTAG, &dck); + if (ret != 0) + goto error; + + ret = zio_do_crypt_abd(B_TRUE, &dck->dck_key, + hdr->b_crypt_hdr.b_ot, bswap, hdr->b_crypt_hdr.b_salt, + hdr->b_crypt_hdr.b_iv, mac, psize, to_write, eabd, + &no_crypt); + if (ret != 0) + goto error; + + if (no_crypt) + abd_copy(eabd, to_write, psize); + + if (psize != asize) + abd_zero_off(eabd, psize, asize - psize); + + /* assert that the MAC we got here matches the one we saved */ + ASSERT0(bcmp(mac, hdr->b_crypt_hdr.b_mac, ZIO_DATA_MAC_LEN)); + spa_keystore_dsl_key_rele(spa, dck, FTAG); + + if (to_write == cabd) + abd_free(cabd); + + to_write = eabd; + } + +out: + ASSERT3P(to_write, !=, hdr->b_l1hdr.b_pabd); + *abd_out = to_write; + return (0); + +error: + if (dck != NULL) + spa_keystore_dsl_key_rele(spa, dck, FTAG); + if (cabd != NULL) + abd_free(cabd); + if (eabd != NULL) + abd_free(eabd); + + *abd_out = NULL; + return (ret); +} + +/* * Find and write ARC buffers to the L2ARC device. * * An ARC_FLAG_L2_WRITING flag is set so that the L2ARC buffers are not valid @@ -7130,6 +8451,8 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz) multilist_sublist_t *mls = l2arc_sublist_lock(try); uint64_t passed_sz = 0; + VERIFY3P(mls, !=, NULL); + /* * L2ARC fast warmup. * @@ -7147,6 +8470,7 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz) for (; hdr; hdr = hdr_prev) { kmutex_t *hash_lock; + abd_t *to_write = NULL; if (arc_warm == B_FALSE) hdr_prev = multilist_sublist_next(mls, hdr); @@ -7184,9 +8508,10 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz) ASSERT(HDR_HAS_L1HDR(hdr)); ASSERT3U(HDR_GET_PSIZE(hdr), >, 0); - ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL); ASSERT3U(arc_hdr_size(hdr), >, 0); - uint64_t psize = arc_hdr_size(hdr); + ASSERT(hdr->b_l1hdr.b_pabd != NULL || + HDR_HAS_RABD(hdr)); + uint64_t psize = HDR_GET_PSIZE(hdr); uint64_t asize = vdev_psize_to_asize(dev->l2ad_vdev, psize); @@ -7196,6 +8521,57 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz) break; } + /* + * We rely on the L1 portion of the header below, so + * it's invalid for this header to have been evicted out + * of the ghost cache, prior to being written out. The + * ARC_FLAG_L2_WRITING bit ensures this won't happen. + */ + arc_hdr_set_flags(hdr, ARC_FLAG_L2_WRITING); + ASSERT(HDR_HAS_L1HDR(hdr)); + + ASSERT3U(HDR_GET_PSIZE(hdr), >, 0); + ASSERT(hdr->b_l1hdr.b_pabd != NULL || + HDR_HAS_RABD(hdr)); + ASSERT3U(arc_hdr_size(hdr), >, 0); + + /* + * If this header has b_rabd, we can use this since it + * must always match the data exactly as it exists on + * disk. Otherwise, the L2ARC can normally use the + * hdr's data, but if we're sharing data between the + * hdr and one of its bufs, L2ARC needs its own copy of + * the data so that the ZIO below can't race with the + * buf consumer. To ensure that this copy will be + * available for the lifetime of the ZIO and be cleaned + * up afterwards, we add it to the l2arc_free_on_write + * queue. If we need to apply any transforms to the + * data (compression, encryption) we will also need the + * extra buffer. + */ + if (HDR_HAS_RABD(hdr) && psize == asize) { + to_write = hdr->b_crypt_hdr.b_rabd; + } else if ((HDR_COMPRESSION_ENABLED(hdr) || + HDR_GET_COMPRESS(hdr) == ZIO_COMPRESS_OFF) && + !HDR_ENCRYPTED(hdr) && !HDR_SHARED_DATA(hdr) && + psize == asize) { + to_write = hdr->b_l1hdr.b_pabd; + } else { + int ret; + arc_buf_contents_t type = arc_buf_type(hdr); + + ret = l2arc_apply_transforms(spa, hdr, asize, + &to_write); + if (ret != 0) { + arc_hdr_clear_flags(hdr, + ARC_FLAG_L2_WRITING); + mutex_exit(hash_lock); + continue; + } + + l2arc_free_abd_on_write(to_write, asize, type); + } + if (pio == NULL) { /* * Insert a dummy header on the buflist so @@ -7223,37 +8599,9 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz) list_insert_head(&dev->l2ad_buflist, hdr); mutex_exit(&dev->l2ad_mtx); - (void) zfs_refcount_add_many(&dev->l2ad_alloc, psize, - hdr); + (void) zfs_refcount_add_many(&dev->l2ad_alloc, + arc_hdr_size(hdr), hdr); - /* - * Normally the L2ARC can use the hdr's data, but if - * we're sharing data between the hdr and one of its - * bufs, L2ARC needs its own copy of the data so that - * the ZIO below can't race with the buf consumer. - * Another case where we need to create a copy of the - * data is when the buffer size is not device-aligned - * and we need to pad the block to make it such. - * That also keeps the clock hand suitably aligned. - * - * To ensure that the copy will be available for the - * lifetime of the ZIO and be cleaned up afterwards, we - * add it to the l2arc_free_on_write queue. - */ - abd_t *to_write; - if (!HDR_SHARED_DATA(hdr) && psize == asize) { - to_write = hdr->b_l1hdr.b_pabd; - } else { - to_write = abd_alloc_for_io(asize, - HDR_ISTYPE_METADATA(hdr)); - abd_copy(to_write, hdr->b_l1hdr.b_pabd, psize); - if (asize != psize) { - abd_zero_off(to_write, psize, - asize - psize); - } - l2arc_free_abd_on_write(to_write, asize, - arc_buf_type(hdr)); - } wzio = zio_write_phys(pio, dev->l2ad_vdev, hdr->b_l2hdr.b_daddr, asize, to_write, ZIO_CHECKSUM_OFF, NULL, hdr, diff --git a/usr/src/uts/common/fs/zfs/bpobj.c b/usr/src/uts/common/fs/zfs/bpobj.c index bbdd765214..ec0d115cfc 100644 --- a/usr/src/uts/common/fs/zfs/bpobj.c +++ b/usr/src/uts/common/fs/zfs/bpobj.c @@ -266,7 +266,7 @@ bpobj_iterate_impl(bpobj_t *bpo, bpobj_itor_t func, void *arg, dmu_tx_t *tx, } if (free) { VERIFY3U(0, ==, dmu_free_range(bpo->bpo_os, bpo->bpo_object, - (i + 1) * sizeof (blkptr_t), -1ULL, tx)); + (i + 1) * sizeof (blkptr_t), DMU_OBJECT_END, tx)); } if (err || !bpo->bpo_havesubobj || bpo->bpo_phys->bpo_subobjs == 0) goto out; @@ -344,7 +344,7 @@ bpobj_iterate_impl(bpobj_t *bpo, bpobj_itor_t func, void *arg, dmu_tx_t *tx, if (free) { VERIFY3U(0, ==, dmu_free_range(bpo->bpo_os, bpo->bpo_phys->bpo_subobjs, - (i + 1) * sizeof (uint64_t), -1ULL, tx)); + (i + 1) * sizeof (uint64_t), DMU_OBJECT_END, tx)); } out: diff --git a/usr/src/uts/common/fs/zfs/bptree.c b/usr/src/uts/common/fs/zfs/bptree.c index c74d07236c..1a432507f7 100644 --- a/usr/src/uts/common/fs/zfs/bptree.c +++ b/usr/src/uts/common/fs/zfs/bptree.c @@ -211,7 +211,8 @@ bptree_iterate(objset_t *os, uint64_t obj, boolean_t free, bptree_itor_t func, err = 0; for (i = ba.ba_phys->bt_begin; i < ba.ba_phys->bt_end; i++) { bptree_entry_phys_t bte; - int flags = TRAVERSE_PREFETCH_METADATA | TRAVERSE_POST; + int flags = TRAVERSE_PREFETCH_METADATA | TRAVERSE_POST + | TRAVERSE_NO_DECRYPT; err = dmu_read(os, obj, i * sizeof (bte), sizeof (bte), &bte, DMU_READ_NO_PREFETCH); diff --git a/usr/src/uts/common/fs/zfs/dbuf.c b/usr/src/uts/common/fs/zfs/dbuf.c index b9d6ca26fe..9c7205bd0d 100644 --- a/usr/src/uts/common/fs/zfs/dbuf.c +++ b/usr/src/uts/common/fs/zfs/dbuf.c @@ -157,7 +157,7 @@ uint64_t dbuf_metadata_cache_overflow; * cache size). Once the eviction thread is woken up and eviction is required, * it will continue evicting buffers until it's able to reduce the cache size * to the low water mark. If the cache size continues to grow and hits the high - * water mark, then callers adding elments to the cache will begin to evict + * water mark, then callers adding elements to the cache will begin to evict * directly from the cache until the cache is no longer above the high water * mark. */ @@ -310,7 +310,7 @@ dbuf_hash_remove(dmu_buf_impl_t *db) dmu_buf_impl_t *dbf, **dbp; /* - * We musn't hold db_mtx to maintain lock ordering: + * We mustn't hold db_mtx to maintain lock ordering: * DBUF_HASH_MUTEX > db_mtx. */ ASSERT(zfs_refcount_is_zero(&db->db_holds)); @@ -413,7 +413,7 @@ dbuf_evict_user(dmu_buf_impl_t *db) boolean_t dbuf_is_metadata(dmu_buf_impl_t *db) { - if (db->db_level > 0) { + if (db->db_level > 0 || db->db_blkid == DMU_SPILL_BLKID) { return (B_TRUE); } else { boolean_t is_metadata; @@ -941,6 +941,7 @@ dbuf_whichblock(dnode_t *dn, int64_t level, uint64_t offset) } } +/* ARGSUSED */ static void dbuf_read_done(zio_t *zio, const zbookmark_phys_t *zb, const blkptr_t *bp, arc_buf_t *buf, void *vdb) @@ -984,12 +985,71 @@ dbuf_read_done(zio_t *zio, const zbookmark_phys_t *zb, const blkptr_t *bp, dbuf_rele_and_unlock(db, NULL, B_FALSE); } -static void + +/* + * This function ensures that, when doing a decrypting read of a block, + * we make sure we have decrypted the dnode associated with it. We must do + * this so that we ensure we are fully authenticating the checksum-of-MACs + * tree from the root of the objset down to this block. Indirect blocks are + * always verified against their secure checksum-of-MACs assuming that the + * dnode containing them is correct. Now that we are doing a decrypting read, + * we can be sure that the key is loaded and verify that assumption. This is + * especially important considering that we always read encrypted dnode + * blocks as raw data (without verifying their MACs) to start, and + * decrypt / authenticate them when we need to read an encrypted bonus buffer. + */ +static int +dbuf_read_verify_dnode_crypt(dmu_buf_impl_t *db, uint32_t flags) +{ + int err = 0; + objset_t *os = db->db_objset; + arc_buf_t *dnode_abuf; + dnode_t *dn; + zbookmark_phys_t zb; + + ASSERT(MUTEX_HELD(&db->db_mtx)); + + if (!os->os_encrypted || os->os_raw_receive || + (flags & DB_RF_NO_DECRYPT) != 0) + return (0); + + DB_DNODE_ENTER(db); + dn = DB_DNODE(db); + dnode_abuf = (dn->dn_dbuf != NULL) ? dn->dn_dbuf->db_buf : NULL; + + if (dnode_abuf == NULL || !arc_is_encrypted(dnode_abuf)) { + DB_DNODE_EXIT(db); + return (0); + } + + SET_BOOKMARK(&zb, dmu_objset_id(os), + DMU_META_DNODE_OBJECT, 0, dn->dn_dbuf->db_blkid); + err = arc_untransform(dnode_abuf, os->os_spa, &zb, B_TRUE); + + /* + * An error code of EACCES tells us that the key is still not + * available. This is ok if we are only reading authenticated + * (and therefore non-encrypted) blocks. + */ + if (err == EACCES && ((db->db_blkid != DMU_BONUS_BLKID && + !DMU_OT_IS_ENCRYPTED(dn->dn_type)) || + (db->db_blkid == DMU_BONUS_BLKID && + !DMU_OT_IS_ENCRYPTED(dn->dn_bonustype)))) + err = 0; + + + DB_DNODE_EXIT(db); + + return (err); +} + +static int dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags) { dnode_t *dn; zbookmark_phys_t zb; arc_flags_t aflags = ARC_FLAG_NOWAIT; + int err, zio_flags = 0; DB_DNODE_ENTER(db); dn = DB_DNODE(db); @@ -1008,6 +1068,14 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags) int bonuslen = MIN(dn->dn_bonuslen, dn->dn_phys->dn_bonuslen); int max_bonuslen = DN_SLOTS_TO_BONUSLEN(dn->dn_num_slots); + /* if the underlying dnode block is encrypted, decrypt it */ + err = dbuf_read_verify_dnode_crypt(db, flags); + if (err != 0) { + DB_DNODE_EXIT(db); + mutex_exit(&db->db_mtx); + return (err); + } + ASSERT3U(bonuslen, <=, db->db.db_size); db->db.db_data = zio_buf_alloc(max_bonuslen); arc_space_consume(max_bonuslen, ARC_SPACE_BONUS); @@ -1018,7 +1086,7 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags) DB_DNODE_EXIT(db); db->db_state = DB_CACHED; mutex_exit(&db->db_mtx); - return; + return (0); } /* @@ -1058,7 +1126,30 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags) DB_DNODE_EXIT(db); db->db_state = DB_CACHED; mutex_exit(&db->db_mtx); - return; + return (0); + } + + SET_BOOKMARK(&zb, dmu_objset_id(db->db_objset), + db->db.db_object, db->db_level, db->db_blkid); + + /* + * All bps of an encrypted os should have the encryption bit set. + * If this is not true it indicates tampering and we report an error. + */ + if (db->db_objset->os_encrypted && !BP_USES_CRYPT(db->db_blkptr)) { + spa_log_error(db->db_objset->os_spa, &zb); + zfs_panic_recover("unencrypted block in encrypted " + "object set %llu", dmu_objset_id(db->db_objset)); + DB_DNODE_EXIT(db); + mutex_exit(&db->db_mtx); + return (SET_ERROR(EIO)); + } + + err = dbuf_read_verify_dnode_crypt(db, flags); + if (err != 0) { + DB_DNODE_EXIT(db); + mutex_exit(&db->db_mtx); + return (err); } DB_DNODE_EXIT(db); @@ -1069,16 +1160,19 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags) if (DBUF_IS_L2CACHEABLE(db)) aflags |= ARC_FLAG_L2CACHE; - SET_BOOKMARK(&zb, db->db_objset->os_dsl_dataset ? - db->db_objset->os_dsl_dataset->ds_object : DMU_META_OBJSET, - db->db.db_object, db->db_level, db->db_blkid); - dbuf_add_ref(db, NULL); - (void) arc_read(zio, db->db_objset->os_spa, db->db_blkptr, - dbuf_read_done, db, ZIO_PRIORITY_SYNC_READ, - (flags & DB_RF_CANFAIL) ? ZIO_FLAG_CANFAIL : ZIO_FLAG_MUSTSUCCEED, + zio_flags = (flags & DB_RF_CANFAIL) ? + ZIO_FLAG_CANFAIL : ZIO_FLAG_MUSTSUCCEED; + + if ((flags & DB_RF_NO_DECRYPT) && BP_IS_PROTECTED(db->db_blkptr)) + zio_flags |= ZIO_FLAG_RAW; + + err = arc_read(zio, db->db_objset->os_spa, db->db_blkptr, + dbuf_read_done, db, ZIO_PRIORITY_SYNC_READ, zio_flags, &aflags, &zb); + + return (err); } /* @@ -1116,7 +1210,7 @@ dbuf_fix_old_data(dmu_buf_impl_t *db, uint64_t txg) * or (if there a no active holders) * just null out the current db_data pointer. */ - ASSERT(dr->dr_txg >= txg - 2); + ASSERT3U(dr->dr_txg, >=, txg - 2); if (db->db_blkid == DMU_BONUS_BLKID) { /* Note that the data bufs here are zio_bufs */ dnode_t *dn = DB_DNODE(db); @@ -1125,18 +1219,31 @@ dbuf_fix_old_data(dmu_buf_impl_t *db, uint64_t txg) arc_space_consume(bonuslen, ARC_SPACE_BONUS); bcopy(db->db.db_data, dr->dt.dl.dr_data, bonuslen); } else if (zfs_refcount_count(&db->db_holds) > db->db_dirtycnt) { + dnode_t *dn = DB_DNODE(db); int size = arc_buf_size(db->db_buf); arc_buf_contents_t type = DBUF_GET_BUFC_TYPE(db); spa_t *spa = db->db_objset->os_spa; enum zio_compress compress_type = arc_get_compression(db->db_buf); - if (compress_type == ZIO_COMPRESS_OFF) { - dr->dt.dl.dr_data = arc_alloc_buf(spa, db, type, size); - } else { + if (arc_is_encrypted(db->db_buf)) { + boolean_t byteorder; + uint8_t salt[ZIO_DATA_SALT_LEN]; + uint8_t iv[ZIO_DATA_IV_LEN]; + uint8_t mac[ZIO_DATA_MAC_LEN]; + + arc_get_raw_params(db->db_buf, &byteorder, salt, + iv, mac); + dr->dt.dl.dr_data = arc_alloc_raw_buf(spa, db, + dmu_objset_id(dn->dn_objset), byteorder, salt, iv, + mac, dn->dn_type, size, arc_buf_lsize(db->db_buf), + compress_type); + } else if (compress_type != ZIO_COMPRESS_OFF) { ASSERT3U(type, ==, ARC_BUFC_DATA); dr->dt.dl.dr_data = arc_alloc_compressed_buf(spa, db, size, arc_buf_lsize(db->db_buf), compress_type); + } else { + dr->dt.dl.dr_data = arc_alloc_buf(spa, db, type, size); } bcopy(db->db.db_data, dr->dt.dl.dr_data->b_data, size); } else { @@ -1172,20 +1279,36 @@ dbuf_read(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags) mutex_enter(&db->db_mtx); if (db->db_state == DB_CACHED) { + spa_t *spa = dn->dn_objset->os_spa; + /* - * If the arc buf is compressed, we need to decompress it to - * read the data. This could happen during the "zfs receive" of - * a stream which is compressed and deduplicated. + * Ensure that this block's dnode has been decrypted if + * the caller has requested decrypted data. */ - if (db->db_buf != NULL && - arc_get_compression(db->db_buf) != ZIO_COMPRESS_OFF) { - dbuf_fix_old_data(db, - spa_syncing_txg(dmu_objset_spa(db->db_objset))); - err = arc_decompress(db->db_buf); + err = dbuf_read_verify_dnode_crypt(db, flags); + + /* + * If the arc buf is compressed or encrypted and the caller + * requested uncompressed data, we need to untransform it + * before returning. We also call arc_untransform() on any + * unauthenticated blocks, which will verify their MAC if + * the key is now available. + */ + if (err == 0 && db->db_buf != NULL && + (flags & DB_RF_NO_DECRYPT) == 0 && + (arc_is_encrypted(db->db_buf) || + arc_is_unauthenticated(db->db_buf) || + arc_get_compression(db->db_buf) != ZIO_COMPRESS_OFF)) { + zbookmark_phys_t zb; + + SET_BOOKMARK(&zb, dmu_objset_id(db->db_objset), + db->db.db_object, db->db_level, db->db_blkid); + dbuf_fix_old_data(db, spa_syncing_txg(spa)); + err = arc_untransform(db->db_buf, spa, &zb, B_FALSE); dbuf_set_data(db, db->db_buf); } mutex_exit(&db->db_mtx); - if (prefetch) + if (err == 0 && prefetch) dmu_zfetch(&dn->dn_zfetch, db->db_blkid, 1, B_TRUE); if ((flags & DB_RF_HAVESTRUCT) == 0) rw_exit(&dn->dn_struct_rwlock); @@ -1199,18 +1322,18 @@ dbuf_read(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags) zio = zio_root(spa, NULL, NULL, ZIO_FLAG_CANFAIL); need_wait = B_TRUE; } - dbuf_read_impl(db, zio, flags); + err = dbuf_read_impl(db, zio, flags); /* dbuf_read_impl has dropped db_mtx for us */ - if (prefetch) + if (!err && prefetch) dmu_zfetch(&dn->dn_zfetch, db->db_blkid, 1, B_TRUE); if ((flags & DB_RF_HAVESTRUCT) == 0) rw_exit(&dn->dn_struct_rwlock); DB_DNODE_EXIT(db); - if (need_wait) + if (!err && need_wait) err = zio_wait(zio); } else { /* @@ -1300,6 +1423,7 @@ dbuf_unoverride(dbuf_dirty_record_t *dr) dr->dt.dl.dr_override_state = DR_NOT_OVERRIDDEN; dr->dt.dl.dr_nopwrite = B_FALSE; + dr->dt.dl.dr_has_raw_params = B_FALSE; /* * Release the already-written buffer, so we leave it in @@ -1744,7 +1868,10 @@ dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx) ddt_prefetch(os->os_spa, db->db_blkptr); if (db->db_level == 0) { - dnode_new_blkid(dn, db->db_blkid, tx, drop_struct_lock); + ASSERT(!db->db_objset->os_raw_receive || + dn->dn_maxblkid >= db->db_blkid); + dnode_new_blkid(dn, db->db_blkid, tx, + drop_struct_lock, B_FALSE); ASSERT(dn->dn_maxblkid >= db->db_blkid); } @@ -1891,11 +2018,10 @@ dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx) return (B_FALSE); } -void -dmu_buf_will_dirty(dmu_buf_t *db_fake, dmu_tx_t *tx) +static void +dmu_buf_will_dirty_impl(dmu_buf_t *db_fake, int flags, dmu_tx_t *tx) { dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake; - int rf = DB_RF_MUST_SUCCEED | DB_RF_NOPREFETCH; ASSERT(tx->tx_txg != 0); ASSERT(!zfs_refcount_is_zero(&db->db_holds)); @@ -1926,13 +2052,20 @@ dmu_buf_will_dirty(dmu_buf_t *db_fake, dmu_tx_t *tx) DB_DNODE_ENTER(db); if (RW_WRITE_HELD(&DB_DNODE(db)->dn_struct_rwlock)) - rf |= DB_RF_HAVESTRUCT; + flags |= DB_RF_HAVESTRUCT; DB_DNODE_EXIT(db); - (void) dbuf_read(db, NULL, rf); + (void) dbuf_read(db, NULL, flags); (void) dbuf_dirty(db, tx); } void +dmu_buf_will_dirty(dmu_buf_t *db_fake, dmu_tx_t *tx) +{ + dmu_buf_will_dirty_impl(db_fake, + DB_RF_MUST_SUCCEED | DB_RF_NOPREFETCH, tx); +} + +void dmu_buf_will_not_fill(dmu_buf_t *db_fake, dmu_tx_t *tx) { dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake; @@ -1959,6 +2092,44 @@ dmu_buf_will_fill(dmu_buf_t *db_fake, dmu_tx_t *tx) (void) dbuf_dirty(db, tx); } +/* + * This function is effectively the same as dmu_buf_will_dirty(), but + * indicates the caller expects raw encrypted data in the db, and provides + * the crypt params (byteorder, salt, iv, mac) which should be stored in the + * blkptr_t when this dbuf is written. This is only used for blocks of + * dnodes during a raw receive. + */ +void +dmu_buf_set_crypt_params(dmu_buf_t *db_fake, boolean_t byteorder, + const uint8_t *salt, const uint8_t *iv, const uint8_t *mac, dmu_tx_t *tx) +{ + dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake; + dbuf_dirty_record_t *dr; + + /* + * dr_has_raw_params is only processed for blocks of dnodes + * (see dbuf_sync_dnode_leaf_crypt()). + */ + ASSERT3U(db->db.db_object, ==, DMU_META_DNODE_OBJECT); + ASSERT3U(db->db_level, ==, 0); + + dmu_buf_will_dirty_impl(db_fake, + DB_RF_MUST_SUCCEED | DB_RF_NOPREFETCH | DB_RF_NO_DECRYPT, tx); + + dr = db->db_last_dirty; + while (dr != NULL && dr->dr_txg > tx->tx_txg) + dr = dr->dr_next; + + ASSERT3P(dr, !=, NULL); + ASSERT3U(dr->dr_txg, ==, tx->tx_txg); + + dr->dt.dl.dr_has_raw_params = B_TRUE; + dr->dt.dl.dr_byteorder = byteorder; + bcopy(salt, dr->dt.dl.dr_salt, ZIO_DATA_SALT_LEN); + bcopy(iv, dr->dt.dl.dr_iv, ZIO_DATA_IV_LEN); + bcopy(mac, dr->dt.dl.dr_mac, ZIO_DATA_MAC_LEN); +} + #pragma weak dmu_buf_fill_done = dbuf_fill_done /* ARGSUSED */ void @@ -2045,6 +2216,13 @@ dbuf_assign_arcbuf(dmu_buf_impl_t *db, arc_buf_t *buf, dmu_tx_t *tx) if (db->db_state == DB_CACHED && zfs_refcount_count(&db->db_holds) - 1 > db->db_dirtycnt) { + /* + * In practice, we will never have a case where we have an + * encrypted arc buffer while additional holds exist on the + * dbuf. We don't handle this here so we simply assert that + * fact instead. + */ + ASSERT(!arc_is_encrypted(buf)); mutex_exit(&db->db_mtx); (void) dbuf_dirty(db, tx); bcopy(buf->b_data, db->db.db_data, db->db.db_size); @@ -2060,6 +2238,7 @@ dbuf_assign_arcbuf(dmu_buf_impl_t *db, arc_buf_t *buf, dmu_tx_t *tx) ASSERT(db->db_buf != NULL); if (dr != NULL && dr->dr_txg == tx->tx_txg) { ASSERT(dr->dt.dl.dr_data == db->db_buf); + if (!arc_released(db->db_buf)) { ASSERT(dr->dt.dl.dr_override_state == DR_OVERRIDDEN); @@ -2383,15 +2562,20 @@ dbuf_issue_final_prefetch(dbuf_prefetch_arg_t *dpa, blkptr_t *bp) if (BP_IS_HOLE(bp) || BP_IS_EMBEDDED(bp)) return; + int zio_flags = ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE; arc_flags_t aflags = dpa->dpa_aflags | ARC_FLAG_NOWAIT | ARC_FLAG_PREFETCH; + /* dnodes are always read as raw and then converted later */ + if (BP_GET_TYPE(bp) == DMU_OT_DNODE && BP_IS_PROTECTED(bp) && + dpa->dpa_curlevel == 0) + zio_flags |= ZIO_FLAG_RAW; + ASSERT3U(dpa->dpa_curlevel, ==, BP_GET_LEVEL(bp)); ASSERT3U(dpa->dpa_curlevel, ==, dpa->dpa_zb.zb_level); ASSERT(dpa->dpa_zio != NULL); (void) arc_read(dpa->dpa_zio, dpa->dpa_spa, bp, NULL, NULL, - dpa->dpa_prio, ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE, - &aflags, &dpa->dpa_zb); + dpa->dpa_prio, zio_flags, &aflags, &dpa->dpa_zb); } /* @@ -2399,6 +2583,7 @@ dbuf_issue_final_prefetch(dbuf_prefetch_arg_t *dpa, blkptr_t *bp) * will either read in the next indirect block down the tree or issue the actual * prefetch if the next block down is our target. */ +/* ARGSUSED */ static void dbuf_prefetch_indirect_done(zio_t *zio, const zbookmark_phys_t *zb, const blkptr_t *iobp, arc_buf_t *abuf, void *private) @@ -2428,7 +2613,7 @@ dbuf_prefetch_indirect_done(zio_t *zio, const zbookmark_phys_t *zb, */ if (zio != NULL) { ASSERT3S(BP_GET_LEVEL(zio->io_bp), ==, dpa->dpa_curlevel); - if (zio->io_flags & ZIO_FLAG_RAW) { + if (zio->io_flags & ZIO_FLAG_RAW_COMPRESS) { ASSERT3U(BP_GET_PSIZE(zio->io_bp), ==, zio->io_size); } else { ASSERT3U(BP_GET_LSIZE(zio->io_bp), ==, zio->io_size); @@ -2485,7 +2670,8 @@ dbuf_prefetch_indirect_done(zio_t *zio, const zbookmark_phys_t *zb, * Issue prefetch reads for the given block on the given level. If the indirect * blocks above that block are not in memory, we will read them in * asynchronously. As a result, this call never blocks waiting for a read to - * complete. + * complete. Note that the prefetch might fail if the dataset is encrypted and + * the encryption key is unmapped before the IO completes. */ void dbuf_prefetch(dnode_t *dn, int64_t level, uint64_t blkid, zio_priority_t prio, @@ -2614,6 +2800,43 @@ dbuf_prefetch(dnode_t *dn, int64_t level, uint64_t blkid, zio_priority_t prio, } /* + * Helper function for __dbuf_hold_impl() to copy a buffer. Handles + * the case of encrypted, compressed and uncompressed buffers by + * allocating the new buffer, respectively, with arc_alloc_raw_buf(), + * arc_alloc_compressed_buf() or arc_alloc_buf().* + * + * NOTE: Declared noinline to avoid stack bloat in __dbuf_hold_impl(). + */ +static void +dbuf_hold_copy(dnode_t *dn, dmu_buf_impl_t *db, dbuf_dirty_record_t *dr) +{ + arc_buf_t *data = dr->dt.dl.dr_data; + enum zio_compress compress_type = arc_get_compression(data); + + if (arc_is_encrypted(data)) { + boolean_t byteorder; + uint8_t salt[ZIO_DATA_SALT_LEN]; + uint8_t iv[ZIO_DATA_IV_LEN]; + uint8_t mac[ZIO_DATA_MAC_LEN]; + + arc_get_raw_params(data, &byteorder, salt, iv, mac); + dbuf_set_data(db, arc_alloc_raw_buf(dn->dn_objset->os_spa, db, + dmu_objset_id(dn->dn_objset), byteorder, salt, iv, mac, + dn->dn_type, arc_buf_size(data), arc_buf_lsize(data), + compress_type)); + } else if (compress_type != ZIO_COMPRESS_OFF) { + dbuf_set_data(db, arc_alloc_compressed_buf( + dn->dn_objset->os_spa, db, arc_buf_size(data), + arc_buf_lsize(data), compress_type)); + } else { + dbuf_set_data(db, arc_alloc_buf(dn->dn_objset->os_spa, db, + DBUF_GET_BUFC_TYPE(db), db->db.db_size)); + } + + bcopy(data->b_data, db->db.db_data, arc_buf_size(data)); +} + +/* * Returns with db_holds incremented, and db_mtx not held. * Note: dn_struct_rwlock must be held. */ @@ -2677,16 +2900,8 @@ top: dn->dn_object != DMU_META_DNODE_OBJECT && db->db_state == DB_CACHED && db->db_data_pending) { dbuf_dirty_record_t *dr = db->db_data_pending; - - if (dr->dt.dl.dr_data == db->db_buf) { - arc_buf_contents_t type = DBUF_GET_BUFC_TYPE(db); - - dbuf_set_data(db, - arc_alloc_buf(dn->dn_objset->os_spa, db, type, - db->db.db_size)); - bcopy(dr->dt.dl.dr_data->b_data, db->db.db_data, - db->db.db_size); - } + if (dr->dt.dl.dr_data == db->db_buf) + dbuf_hold_copy(dn, db, dr); } if (multilist_link_active(&db->db_cache_link)) { @@ -2960,6 +3175,20 @@ dbuf_refcount(dmu_buf_impl_t *db) return (zfs_refcount_count(&db->db_holds)); } +uint64_t +dmu_buf_user_refcount(dmu_buf_t *db_fake) +{ + uint64_t holds; + dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake; + + mutex_enter(&db->db_mtx); + ASSERT3U(zfs_refcount_count(&db->db_holds), >=, db->db_dirtycnt); + holds = zfs_refcount_count(&db->db_holds) - db->db_dirtycnt; + mutex_exit(&db->db_mtx); + + return (holds); +} + void * dmu_buf_replace_user(dmu_buf_t *db_fake, dmu_buf_user_t *old_user, dmu_buf_user_t *new_user) @@ -3088,6 +3317,50 @@ dbuf_check_blkptr(dnode_t *dn, dmu_buf_impl_t *db) } } +/* + * When syncing out blocks of dnodes, adjust the block to deal with + * encryption. Normally, we make sure the block is decrypted before writing + * it. If we have crypt params, then we are writing a raw (encrypted) block, + * from a raw receive. In this case, set the ARC buf's crypt params so + * that the BP will be filled with the correct byteorder, salt, iv, and mac. + * + * XXX we should handle decrypting the dnode block in dbuf_dirty(). + */ +static void +dbuf_prepare_encrypted_dnode_leaf(dbuf_dirty_record_t *dr) +{ + int err; + dmu_buf_impl_t *db = dr->dr_dbuf; + + ASSERT(MUTEX_HELD(&db->db_mtx)); + ASSERT3U(db->db.db_object, ==, DMU_META_DNODE_OBJECT); + ASSERT3U(db->db_level, ==, 0); + + if (!db->db_objset->os_raw_receive && arc_is_encrypted(db->db_buf)) { + zbookmark_phys_t zb; + + /* + * Unfortunately, there is currently no mechanism for + * syncing context to handle decryption errors. An error + * here is only possible if an attacker maliciously + * changed a dnode block and updated the associated + * checksums going up the block tree. + */ + SET_BOOKMARK(&zb, dmu_objset_id(db->db_objset), + db->db.db_object, db->db_level, db->db_blkid); + err = arc_untransform(db->db_buf, db->db_objset->os_spa, + &zb, B_TRUE); + if (err) + panic("Invalid dnode block MAC"); + } else if (dr->dt.dl.dr_has_raw_params) { + (void) arc_release(dr->dt.dl.dr_data, db); + arc_convert_to_raw(dr->dt.dl.dr_data, + dmu_objset_id(db->db_objset), + dr->dt.dl.dr_byteorder, DMU_OT_DNODE, + dr->dt.dl.dr_salt, dr->dt.dl.dr_iv, dr->dt.dl.dr_mac); + } +} + static void dbuf_sync_indirect(dbuf_dirty_record_t *dr, dmu_tx_t *tx) { @@ -3230,6 +3503,13 @@ dbuf_sync_leaf(dbuf_dirty_record_t *dr, dmu_tx_t *tx) ASSERT(dr->dt.dl.dr_override_state != DR_NOT_OVERRIDDEN); } + /* + * If this is a dnode block, ensure it is appropriately encrypted + * or decrypted, depending on what we are writing to it this txg. + */ + if (os->os_encrypted && dn->dn_object == DMU_META_DNODE_OBJECT) + dbuf_prepare_encrypted_dnode_leaf(dr); + if (db->db_state != DB_NOFILL && dn->dn_object != DMU_META_DNODE_OBJECT && zfs_refcount_count(&db->db_holds) > 1 && @@ -3247,16 +3527,26 @@ dbuf_sync_leaf(dbuf_dirty_record_t *dr, dmu_tx_t *tx) * DNONE_DNODE blocks). */ int psize = arc_buf_size(*datap); + int lsize = arc_buf_lsize(*datap); arc_buf_contents_t type = DBUF_GET_BUFC_TYPE(db); enum zio_compress compress_type = arc_get_compression(*datap); - if (compress_type == ZIO_COMPRESS_OFF) { - *datap = arc_alloc_buf(os->os_spa, db, type, psize); - } else { + if (arc_is_encrypted(*datap)) { + boolean_t byteorder; + uint8_t salt[ZIO_DATA_SALT_LEN]; + uint8_t iv[ZIO_DATA_IV_LEN]; + uint8_t mac[ZIO_DATA_MAC_LEN]; + + arc_get_raw_params(*datap, &byteorder, salt, iv, mac); + *datap = arc_alloc_raw_buf(os->os_spa, db, + dmu_objset_id(os), byteorder, salt, iv, mac, + dn->dn_type, psize, lsize, compress_type); + } else if (compress_type != ZIO_COMPRESS_OFF) { ASSERT3U(type, ==, ARC_BUFC_DATA); - int lsize = arc_buf_lsize(*datap); *datap = arc_alloc_compressed_buf(os->os_spa, db, psize, lsize, compress_type); + } else { + *datap = arc_alloc_buf(os->os_spa, db, type, psize); } bcopy(db->db.db_data, (*datap)->b_data, psize); } @@ -3357,8 +3647,10 @@ dbuf_write_ready(zio_t *zio, arc_buf_t *buf, void *vdb) if (db->db_level == 0) { mutex_enter(&dn->dn_mtx); if (db->db_blkid > dn->dn_phys->dn_maxblkid && - db->db_blkid != DMU_SPILL_BLKID) + db->db_blkid != DMU_SPILL_BLKID) { + ASSERT0(db->db_objset->os_raw_receive); dn->dn_phys->dn_maxblkid = db->db_blkid; + } mutex_exit(&dn->dn_mtx); if (dn->dn_type == DMU_OT_DNODE) { @@ -3393,7 +3685,7 @@ dbuf_write_ready(zio_t *zio, arc_buf_t *buf, void *vdb) DB_DNODE_EXIT(db); if (!BP_IS_EMBEDDED(bp)) - bp->blk_fill = fill; + BP_SET_FILL(bp, fill); mutex_exit(&db->db_mtx); @@ -3814,6 +4106,7 @@ dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, dmu_tx_t *tx) wp_flag |= (db->db_state == DB_NOFILL) ? WP_NOFILL : 0; dmu_write_policy(os, dn, db->db_level, wp_flag, &zp); + DB_DNODE_EXIT(db); /* diff --git a/usr/src/uts/common/fs/zfs/ddt.c b/usr/src/uts/common/fs/zfs/ddt.c index 1d51329511..8bcf6af8ba 100644 --- a/usr/src/uts/common/fs/zfs/ddt.c +++ b/usr/src/uts/common/fs/zfs/ddt.c @@ -253,6 +253,10 @@ ddt_bp_fill(const ddt_phys_t *ddp, blkptr_t *bp, uint64_t txg) BP_SET_BIRTH(bp, txg, ddp->ddp_phys_birth); } +/* + * The bp created via this function may be used for repairs and scrub, but it + * will be missing the salt / IV required to do a full decrypting read. + */ void ddt_bp_create(enum zio_checksum checksum, const ddt_key_t *ddk, const ddt_phys_t *ddp, blkptr_t *bp) @@ -263,11 +267,12 @@ ddt_bp_create(enum zio_checksum checksum, ddt_bp_fill(ddp, bp, ddp->ddp_phys_birth); bp->blk_cksum = ddk->ddk_cksum; - bp->blk_fill = 1; BP_SET_LSIZE(bp, DDK_GET_LSIZE(ddk)); BP_SET_PSIZE(bp, DDK_GET_PSIZE(ddk)); BP_SET_COMPRESS(bp, DDK_GET_COMPRESS(ddk)); + BP_SET_CRYPT(bp, DDK_GET_CRYPT(ddk)); + BP_SET_FILL(bp, 1); BP_SET_CHECKSUM(bp, checksum); BP_SET_TYPE(bp, DMU_OT_DEDUP); BP_SET_LEVEL(bp, 0); @@ -281,9 +286,12 @@ ddt_key_fill(ddt_key_t *ddk, const blkptr_t *bp) ddk->ddk_cksum = bp->blk_cksum; ddk->ddk_prop = 0; + ASSERT(BP_IS_ENCRYPTED(bp) || !BP_USES_CRYPT(bp)); + DDK_SET_LSIZE(ddk, BP_GET_LSIZE(bp)); DDK_SET_PSIZE(ddk, BP_GET_PSIZE(bp)); DDK_SET_COMPRESS(ddk, BP_GET_COMPRESS(bp)); + DDK_SET_CRYPT(ddk, BP_USES_CRYPT(bp)); } void @@ -367,7 +375,7 @@ ddt_stat_generate(ddt_t *ddt, ddt_entry_t *dde, ddt_stat_t *dds) if (ddp->ddp_phys_birth == 0) continue; - for (int d = 0; d < SPA_DVAS_PER_BP; d++) + for (int d = 0; d < DDE_GET_NDVAS(dde); d++) dsize += dva_get_dsize_sync(spa, &ddp->ddp_dva[d]); dds->dds_blocks += 1; @@ -521,6 +529,7 @@ ddt_ditto_copies_needed(ddt_t *ddt, ddt_entry_t *dde, ddt_phys_t *ddp_willref) uint64_t ditto = spa->spa_dedup_ditto; int total_copies = 0; int desired_copies = 0; + int copies_needed = 0; for (int p = DDT_PHYS_SINGLE; p <= DDT_PHYS_TRIPLE; p++) { ddt_phys_t *ddp = &dde->dde_phys[p]; @@ -546,7 +555,13 @@ ddt_ditto_copies_needed(ddt_t *ddt, ddt_entry_t *dde, ddt_phys_t *ddp_willref) if (total_refcnt >= ditto * ditto) desired_copies++; - return (MAX(desired_copies, total_copies) - total_copies); + copies_needed = MAX(desired_copies, total_copies) - total_copies; + + /* encrypted blocks store their IV in DVA[2] */ + if (DDK_GET_CRYPT(&dde->dde_key)) + copies_needed = MIN(copies_needed, SPA_DVAS_PER_BP - 1); + + return (copies_needed); } int @@ -556,7 +571,7 @@ ddt_ditto_copies_present(ddt_entry_t *dde) dva_t *dva = ddp->ddp_dva; int copies = 0 - DVA_GET_GANG(dva); - for (int d = 0; d < SPA_DVAS_PER_BP; d++, dva++) + for (int d = 0; d < DDE_GET_NDVAS(dde); d++, dva++) if (DVA_IS_VALID(dva)) copies++; diff --git a/usr/src/uts/common/fs/zfs/dmu.c b/usr/src/uts/common/fs/zfs/dmu.c index 95ca9f76aa..02bdfdfa12 100644 --- a/usr/src/uts/common/fs/zfs/dmu.c +++ b/usr/src/uts/common/fs/zfs/dmu.c @@ -96,60 +96,60 @@ int zfs_object_remap_one_indirect_delay_ticks = 0; uint64_t dmu_prefetch_max = 8 * SPA_MAXBLOCKSIZE; const dmu_object_type_info_t dmu_ot[DMU_OT_NUMTYPES] = { - { DMU_BSWAP_UINT8, TRUE, FALSE, "unallocated" }, - { DMU_BSWAP_ZAP, TRUE, TRUE, "object directory" }, - { DMU_BSWAP_UINT64, TRUE, TRUE, "object array" }, - { DMU_BSWAP_UINT8, TRUE, FALSE, "packed nvlist" }, - { DMU_BSWAP_UINT64, TRUE, FALSE, "packed nvlist size" }, - { DMU_BSWAP_UINT64, TRUE, FALSE, "bpobj" }, - { DMU_BSWAP_UINT64, TRUE, FALSE, "bpobj header" }, - { DMU_BSWAP_UINT64, TRUE, FALSE, "SPA space map header" }, - { DMU_BSWAP_UINT64, TRUE, FALSE, "SPA space map" }, - { DMU_BSWAP_UINT64, TRUE, FALSE, "ZIL intent log" }, - { DMU_BSWAP_DNODE, TRUE, FALSE, "DMU dnode" }, - { DMU_BSWAP_OBJSET, TRUE, TRUE, "DMU objset" }, - { DMU_BSWAP_UINT64, TRUE, TRUE, "DSL directory" }, - { DMU_BSWAP_ZAP, TRUE, TRUE, "DSL directory child map" }, - { DMU_BSWAP_ZAP, TRUE, TRUE, "DSL dataset snap map" }, - { DMU_BSWAP_ZAP, TRUE, TRUE, "DSL props" }, - { DMU_BSWAP_UINT64, TRUE, TRUE, "DSL dataset" }, - { DMU_BSWAP_ZNODE, TRUE, FALSE, "ZFS znode" }, - { DMU_BSWAP_OLDACL, TRUE, FALSE, "ZFS V0 ACL" }, - { DMU_BSWAP_UINT8, FALSE, FALSE, "ZFS plain file" }, - { DMU_BSWAP_ZAP, TRUE, FALSE, "ZFS directory" }, - { DMU_BSWAP_ZAP, TRUE, FALSE, "ZFS master node" }, - { DMU_BSWAP_ZAP, TRUE, FALSE, "ZFS delete queue" }, - { DMU_BSWAP_UINT8, FALSE, FALSE, "zvol object" }, - { DMU_BSWAP_ZAP, TRUE, FALSE, "zvol prop" }, - { DMU_BSWAP_UINT8, FALSE, FALSE, "other uint8[]" }, - { DMU_BSWAP_UINT64, FALSE, FALSE, "other uint64[]" }, - { DMU_BSWAP_ZAP, TRUE, FALSE, "other ZAP" }, - { DMU_BSWAP_ZAP, TRUE, FALSE, "persistent error log" }, - { DMU_BSWAP_UINT8, TRUE, FALSE, "SPA history" }, - { DMU_BSWAP_UINT64, TRUE, FALSE, "SPA history offsets" }, - { DMU_BSWAP_ZAP, TRUE, TRUE, "Pool properties" }, - { DMU_BSWAP_ZAP, TRUE, TRUE, "DSL permissions" }, - { DMU_BSWAP_ACL, TRUE, FALSE, "ZFS ACL" }, - { DMU_BSWAP_UINT8, TRUE, FALSE, "ZFS SYSACL" }, - { DMU_BSWAP_UINT8, TRUE, FALSE, "FUID table" }, - { DMU_BSWAP_UINT64, TRUE, FALSE, "FUID table size" }, - { DMU_BSWAP_ZAP, TRUE, TRUE, "DSL dataset next clones" }, - { DMU_BSWAP_ZAP, TRUE, FALSE, "scan work queue" }, - { DMU_BSWAP_ZAP, TRUE, FALSE, "ZFS user/group used" }, - { DMU_BSWAP_ZAP, TRUE, FALSE, "ZFS user/group quota" }, - { DMU_BSWAP_ZAP, TRUE, TRUE, "snapshot refcount tags" }, - { DMU_BSWAP_ZAP, TRUE, FALSE, "DDT ZAP algorithm" }, - { DMU_BSWAP_ZAP, TRUE, FALSE, "DDT statistics" }, - { DMU_BSWAP_UINT8, TRUE, FALSE, "System attributes" }, - { DMU_BSWAP_ZAP, TRUE, FALSE, "SA master node" }, - { DMU_BSWAP_ZAP, TRUE, FALSE, "SA attr registration" }, - { DMU_BSWAP_ZAP, TRUE, FALSE, "SA attr layouts" }, - { DMU_BSWAP_ZAP, TRUE, FALSE, "scan translations" }, - { DMU_BSWAP_UINT8, FALSE, FALSE, "deduplicated block" }, - { DMU_BSWAP_ZAP, TRUE, TRUE, "DSL deadlist map" }, - { DMU_BSWAP_UINT64, TRUE, TRUE, "DSL deadlist map hdr" }, - { DMU_BSWAP_ZAP, TRUE, TRUE, "DSL dir clones" }, - { DMU_BSWAP_UINT64, TRUE, FALSE, "bpobj subobj" } + { DMU_BSWAP_UINT8, TRUE, FALSE, FALSE, "unallocated" }, + { DMU_BSWAP_ZAP, TRUE, TRUE, FALSE, "object directory" }, + { DMU_BSWAP_UINT64, TRUE, TRUE, FALSE, "object array" }, + { DMU_BSWAP_UINT8, TRUE, FALSE, FALSE, "packed nvlist" }, + { DMU_BSWAP_UINT64, TRUE, FALSE, FALSE, "packed nvlist size" }, + { DMU_BSWAP_UINT64, TRUE, FALSE, FALSE, "bpobj" }, + { DMU_BSWAP_UINT64, TRUE, FALSE, FALSE, "bpobj header" }, + { DMU_BSWAP_UINT64, TRUE, FALSE, FALSE, "SPA space map header" }, + { DMU_BSWAP_UINT64, TRUE, FALSE, FALSE, "SPA space map" }, + { DMU_BSWAP_UINT64, TRUE, FALSE, TRUE, "ZIL intent log" }, + { DMU_BSWAP_DNODE, TRUE, FALSE, TRUE, "DMU dnode" }, + { DMU_BSWAP_OBJSET, TRUE, TRUE, FALSE, "DMU objset" }, + { DMU_BSWAP_UINT64, TRUE, TRUE, FALSE, "DSL directory" }, + { DMU_BSWAP_ZAP, TRUE, TRUE, FALSE, "DSL directory child map" }, + { DMU_BSWAP_ZAP, TRUE, TRUE, FALSE, "DSL dataset snap map" }, + { DMU_BSWAP_ZAP, TRUE, TRUE, FALSE, "DSL props" }, + { DMU_BSWAP_UINT64, TRUE, TRUE, FALSE, "DSL dataset" }, + { DMU_BSWAP_ZNODE, TRUE, FALSE, FALSE, "ZFS znode" }, + { DMU_BSWAP_OLDACL, TRUE, FALSE, TRUE, "ZFS V0 ACL" }, + { DMU_BSWAP_UINT8, FALSE, FALSE, TRUE, "ZFS plain file" }, + { DMU_BSWAP_ZAP, TRUE, FALSE, TRUE, "ZFS directory" }, + { DMU_BSWAP_ZAP, TRUE, FALSE, FALSE, "ZFS master node" }, + { DMU_BSWAP_ZAP, TRUE, FALSE, TRUE, "ZFS delete queue" }, + { DMU_BSWAP_UINT8, FALSE, FALSE, TRUE, "zvol object" }, + { DMU_BSWAP_ZAP, TRUE, FALSE, FALSE, "zvol prop" }, + { DMU_BSWAP_UINT8, FALSE, FALSE, TRUE, "other uint8[]" }, + { DMU_BSWAP_UINT64, FALSE, FALSE, TRUE, "other uint64[]" }, + { DMU_BSWAP_ZAP, TRUE, FALSE, FALSE, "other ZAP" }, + { DMU_BSWAP_ZAP, TRUE, FALSE, FALSE, "persistent error log" }, + { DMU_BSWAP_UINT8, TRUE, FALSE, FALSE, "SPA history" }, + { DMU_BSWAP_UINT64, TRUE, FALSE, FALSE, "SPA history offsets" }, + { DMU_BSWAP_ZAP, TRUE, TRUE, FALSE, "Pool properties" }, + { DMU_BSWAP_ZAP, TRUE, TRUE, FALSE, "DSL permissions" }, + { DMU_BSWAP_ACL, TRUE, FALSE, TRUE, "ZFS ACL" }, + { DMU_BSWAP_UINT8, TRUE, FALSE, TRUE, "ZFS SYSACL" }, + { DMU_BSWAP_UINT8, TRUE, FALSE, TRUE, "FUID table" }, + { DMU_BSWAP_UINT64, TRUE, FALSE, FALSE, "FUID table size" }, + { DMU_BSWAP_ZAP, TRUE, TRUE, FALSE, "DSL dataset next clones" }, + { DMU_BSWAP_ZAP, TRUE, FALSE, FALSE, "scan work queue" }, + { DMU_BSWAP_ZAP, TRUE, FALSE, TRUE, "ZFS user/group used" }, + { DMU_BSWAP_ZAP, TRUE, FALSE, TRUE, "ZFS user/group quota" }, + { DMU_BSWAP_ZAP, TRUE, TRUE, FALSE, "snapshot refcount tags" }, + { DMU_BSWAP_ZAP, TRUE, FALSE, FALSE, "DDT ZAP algorithm" }, + { DMU_BSWAP_ZAP, TRUE, FALSE, FALSE, "DDT statistics" }, + { DMU_BSWAP_UINT8, TRUE, FALSE, TRUE, "System attributes" }, + { DMU_BSWAP_ZAP, TRUE, FALSE, TRUE, "SA master node" }, + { DMU_BSWAP_ZAP, TRUE, FALSE, TRUE, "SA attr registration" }, + { DMU_BSWAP_ZAP, TRUE, FALSE, TRUE, "SA attr layouts" }, + { DMU_BSWAP_ZAP, TRUE, FALSE, FALSE, "scan translations" }, + { DMU_BSWAP_UINT8, FALSE, FALSE, TRUE, "deduplicated block" }, + { DMU_BSWAP_ZAP, TRUE, TRUE, FALSE, "DSL deadlist map" }, + { DMU_BSWAP_UINT64, TRUE, TRUE, FALSE, "DSL deadlist map hdr" }, + { DMU_BSWAP_ZAP, TRUE, TRUE, FALSE, "DSL dir clones" }, + { DMU_BSWAP_UINT64, TRUE, FALSE, FALSE, "bpobj subobj" } }; const dmu_object_byteswap_info_t dmu_ot_byteswap[DMU_BSWAP_NUMFUNCS] = { @@ -221,6 +221,8 @@ dmu_buf_hold_by_dnode(dnode_t *dn, uint64_t offset, if (flags & DMU_READ_NO_PREFETCH) db_flags |= DB_RF_NOPREFETCH; + if (flags & DMU_READ_NO_DECRYPT) + db_flags |= DB_RF_NO_DECRYPT; err = dmu_buf_hold_noread_by_dnode(dn, offset, tag, dbp); if (err == 0) { @@ -244,6 +246,8 @@ dmu_buf_hold(objset_t *os, uint64_t object, uint64_t offset, if (flags & DMU_READ_NO_PREFETCH) db_flags |= DB_RF_NOPREFETCH; + if (flags & DMU_READ_NO_DECRYPT) + db_flags |= DB_RF_NO_DECRYPT; err = dmu_buf_hold_noread(os, object, offset, tag, dbp); if (err == 0) { @@ -341,14 +345,72 @@ dmu_rm_spill(objset_t *os, uint64_t object, dmu_tx_t *tx) } /* + * Lookup and hold the bonus buffer for the provided dnode. If the dnode + * has not yet been allocated a new bonus dbuf a will be allocated. + * Returns ENOENT, EIO, or 0. + */ +int dmu_bonus_hold_by_dnode(dnode_t *dn, void *tag, dmu_buf_t **dbp, + uint32_t flags) +{ + dmu_buf_impl_t *db; + int error; + uint32_t db_flags = DB_RF_MUST_SUCCEED; + + if (flags & DMU_READ_NO_PREFETCH) + db_flags |= DB_RF_NOPREFETCH; + if (flags & DMU_READ_NO_DECRYPT) + db_flags |= DB_RF_NO_DECRYPT; + + rw_enter(&dn->dn_struct_rwlock, RW_READER); + if (dn->dn_bonus == NULL) { + rw_exit(&dn->dn_struct_rwlock); + rw_enter(&dn->dn_struct_rwlock, RW_WRITER); + if (dn->dn_bonus == NULL) + dbuf_create_bonus(dn); + } + db = dn->dn_bonus; + + /* as long as the bonus buf is held, the dnode will be held */ + if (zfs_refcount_add(&db->db_holds, tag) == 1) { + VERIFY(dnode_add_ref(dn, db)); + atomic_inc_32(&dn->dn_dbufs_count); + } + + /* + * Wait to drop dn_struct_rwlock until after adding the bonus dbuf's + * hold and incrementing the dbuf count to ensure that dnode_move() sees + * a dnode hold for every dbuf. + */ + rw_exit(&dn->dn_struct_rwlock); + + error = dbuf_read(db, NULL, db_flags); + if (error) { + dnode_evict_bonus(dn); + dbuf_rele(db, tag); + *dbp = NULL; + return (error); + } + + *dbp = &db->db; + return (0); +} + +/* * returns ENOENT, EIO, or 0. */ int -dmu_bonus_hold(objset_t *os, uint64_t object, void *tag, dmu_buf_t **dbp) +dmu_bonus_hold_impl(objset_t *os, uint64_t object, void *tag, uint32_t flags, + dmu_buf_t **dbp) { dnode_t *dn; dmu_buf_impl_t *db; int error; + uint32_t db_flags = DB_RF_MUST_SUCCEED; + + if (flags & DMU_READ_NO_PREFETCH) + db_flags |= DB_RF_NOPREFETCH; + if (flags & DMU_READ_NO_DECRYPT) + db_flags |= DB_RF_NO_DECRYPT; error = dnode_hold(os, object, FTAG, &dn); if (error) @@ -378,12 +440,24 @@ dmu_bonus_hold(objset_t *os, uint64_t object, void *tag, dmu_buf_t **dbp) dnode_rele(dn, FTAG); - VERIFY(0 == dbuf_read(db, NULL, DB_RF_MUST_SUCCEED | DB_RF_NOPREFETCH)); + error = dbuf_read(db, NULL, db_flags); + if (error) { + dnode_evict_bonus(dn); + dbuf_rele(db, tag); + *dbp = NULL; + return (error); + } *dbp = &db->db; return (0); } +int +dmu_bonus_hold(objset_t *os, uint64_t obj, void *tag, dmu_buf_t **dbp) +{ + return (dmu_bonus_hold_impl(os, obj, tag, DMU_READ_NO_PREFETCH, dbp)); +} + /* * returns ENOENT, EIO, or 0. * @@ -446,15 +520,20 @@ dmu_spill_hold_existing(dmu_buf_t *bonus, void *tag, dmu_buf_t **dbp) } int -dmu_spill_hold_by_bonus(dmu_buf_t *bonus, void *tag, dmu_buf_t **dbp) +dmu_spill_hold_by_bonus(dmu_buf_t *bonus, uint32_t flags, void *tag, + dmu_buf_t **dbp) { dmu_buf_impl_t *db = (dmu_buf_impl_t *)bonus; dnode_t *dn; int err; + uint32_t db_flags = DB_RF_CANFAIL; + + if (flags & DMU_READ_NO_DECRYPT) + db_flags |= DB_RF_NO_DECRYPT; DB_DNODE_ENTER(db); dn = DB_DNODE(db); - err = dmu_spill_hold_by_dnode(dn, DB_RF_CANFAIL, tag, dbp); + err = dmu_spill_hold_by_dnode(dn, db_flags, tag, dbp); DB_DNODE_EXIT(db); return (err); @@ -619,8 +698,8 @@ dmu_buf_rele_array(dmu_buf_t **dbp_fake, int numbufs, void *tag) * indirect blocks prefeteched will be those that point to the blocks containing * the data starting at offset, and continuing to offset + len. * - * Note that if the indirect blocks above the blocks being prefetched are not in - * cache, they will be asychronously read in. + * Note that if the indirect blocks above the blocks being prefetched are not + * in cache, they will be asychronously read in. */ void dmu_prefetch(objset_t *os, uint64_t object, int64_t level, uint64_t offset, @@ -835,6 +914,7 @@ dmu_free_long_range_impl(objset_t *os, dnode_t *dn, uint64_t offset, uint64_t, long_free_dirty_all_txgs, uint64_t, chunk_len, uint64_t, dmu_tx_get_txg(tx)); dnode_free_range(dn, chunk_begin, chunk_len, tx); + dmu_tx_commit(tx); length -= chunk_len; @@ -883,7 +963,9 @@ dmu_free_long_object(objset_t *os, uint64_t object) dmu_tx_mark_netfree(tx); err = dmu_tx_assign(tx, TXG_WAIT); if (err == 0) { - err = dmu_object_free(os, object, tx); + if (err == 0) + err = dmu_object_free(os, object, tx); + dmu_tx_commit(tx); } else { dmu_tx_abort(tx); @@ -901,7 +983,7 @@ dmu_free_range(objset_t *os, uint64_t object, uint64_t offset, if (err) return (err); ASSERT(offset < UINT64_MAX); - ASSERT(size == -1ULL || size <= UINT64_MAX - offset); + ASSERT(size == DMU_OBJECT_END || size <= UINT64_MAX - offset); dnode_free_range(dn, offset, size, tx); dnode_rele(dn, FTAG); return (0); @@ -1622,22 +1704,71 @@ dmu_return_arcbuf(arc_buf_t *buf) arc_buf_destroy(buf, FTAG); } +void +dmu_copy_from_buf(objset_t *os, uint64_t object, uint64_t offset, + dmu_buf_t *handle, dmu_tx_t *tx) +{ + dmu_buf_t *dst_handle; + dmu_buf_impl_t *dstdb; + dmu_buf_impl_t *srcdb = (dmu_buf_impl_t *)handle; + arc_buf_t *abuf; + uint64_t datalen; + boolean_t byteorder; + uint8_t salt[ZIO_DATA_SALT_LEN]; + uint8_t iv[ZIO_DATA_IV_LEN]; + uint8_t mac[ZIO_DATA_MAC_LEN]; + + ASSERT3P(srcdb->db_buf, !=, NULL); + + /* hold the db that we want to write to */ + VERIFY0(dmu_buf_hold(os, object, offset, FTAG, &dst_handle, + DMU_READ_NO_DECRYPT)); + dstdb = (dmu_buf_impl_t *)dst_handle; + datalen = arc_buf_size(srcdb->db_buf); + + /* allocated an arc buffer that matches the type of srcdb->db_buf */ + if (arc_is_encrypted(srcdb->db_buf)) { + arc_get_raw_params(srcdb->db_buf, &byteorder, salt, iv, mac); + abuf = arc_loan_raw_buf(os->os_spa, dmu_objset_id(os), + byteorder, salt, iv, mac, DB_DNODE(dstdb)->dn_type, + datalen, arc_buf_lsize(srcdb->db_buf), + arc_get_compression(srcdb->db_buf)); + } else { + /* we won't get a compressed db back from dmu_buf_hold() */ + ASSERT3U(arc_get_compression(srcdb->db_buf), + ==, ZIO_COMPRESS_OFF); + abuf = arc_loan_buf(os->os_spa, + DMU_OT_IS_METADATA(DB_DNODE(dstdb)->dn_type), datalen); + } + + ASSERT3U(datalen, ==, arc_buf_size(abuf)); + + /* copy the data to the new buffer and assign it to the dstdb */ + bcopy(srcdb->db_buf->b_data, abuf->b_data, datalen); + dbuf_assign_arcbuf(dstdb, abuf, tx); + dmu_buf_rele(dst_handle, FTAG); +} + /* * When possible directly assign passed loaned arc buffer to a dbuf. * If this is not possible copy the contents of passed arc buf via * dmu_write(). */ -void -dmu_assign_arcbuf_dnode(dnode_t *dn, uint64_t offset, arc_buf_t *buf, +int +dmu_assign_arcbuf_by_dnode(dnode_t *dn, uint64_t offset, arc_buf_t *buf, dmu_tx_t *tx) { dmu_buf_impl_t *db; + objset_t *os = dn->dn_objset; + uint64_t object = dn->dn_object; uint32_t blksz = (uint32_t)arc_buf_lsize(buf); uint64_t blkid; rw_enter(&dn->dn_struct_rwlock, RW_READER); blkid = dbuf_whichblock(dn, 0, offset); - VERIFY((db = dbuf_hold(dn, blkid, FTAG)) != NULL); + db = dbuf_hold(dn, blkid, FTAG); + if (db == NULL) + return (SET_ERROR(EIO)); rw_exit(&dn->dn_struct_rwlock); /* @@ -1648,32 +1779,33 @@ dmu_assign_arcbuf_dnode(dnode_t *dn, uint64_t offset, arc_buf_t *buf, dbuf_assign_arcbuf(db, buf, tx); dbuf_rele(db, FTAG); } else { - objset_t *os; - uint64_t object; - /* compressed bufs must always be assignable to their dbuf */ ASSERT3U(arc_get_compression(buf), ==, ZIO_COMPRESS_OFF); ASSERT(!(buf->b_flags & ARC_BUF_FLAG_COMPRESSED)); os = dn->dn_objset; object = dn->dn_object; - dbuf_rele(db, FTAG); dmu_write(os, object, offset, blksz, buf->b_data, tx); dmu_return_arcbuf(buf); XUIOSTAT_BUMP(xuiostat_wbuf_copied); } + + return (0); } -void -dmu_assign_arcbuf(dmu_buf_t *handle, uint64_t offset, arc_buf_t *buf, +int +dmu_assign_arcbuf_by_dbuf(dmu_buf_t *handle, uint64_t offset, arc_buf_t *buf, dmu_tx_t *tx) { + int err; dmu_buf_impl_t *dbuf = (dmu_buf_impl_t *)handle; DB_DNODE_ENTER(dbuf); - dmu_assign_arcbuf_dnode(DB_DNODE(dbuf), offset, buf, tx); + err = dmu_assign_arcbuf_by_dnode(DB_DNODE(dbuf), offset, buf, tx); DB_DNODE_EXIT(dbuf); + + return (err); } typedef struct { @@ -1700,7 +1832,7 @@ dmu_sync_ready(zio_t *zio, arc_buf_t *buf, void *varg) BP_SET_LSIZE(bp, db->db_size); } else if (!BP_IS_EMBEDDED(bp)) { ASSERT(BP_GET_LEVEL(bp) == 0); - bp->blk_fill = 1; + BP_SET_FILL(bp, 1); } } } @@ -2031,6 +2163,20 @@ dmu_sync(zio_t *pio, uint64_t txg, dmu_sync_cb_t *done, zgd_t *zgd) } int +dmu_object_set_nlevels(objset_t *os, uint64_t object, int nlevels, dmu_tx_t *tx) +{ + dnode_t *dn; + int err; + + err = dnode_hold(os, object, FTAG, &dn); + if (err) + return (err); + err = dnode_set_nlevels(dn, nlevels, tx); + dnode_rele(dn, FTAG); + return (err); +} + +int dmu_object_set_blocksize(objset_t *os, uint64_t object, uint64_t size, int ibs, dmu_tx_t *tx) { @@ -2045,6 +2191,23 @@ dmu_object_set_blocksize(objset_t *os, uint64_t object, uint64_t size, int ibs, return (err); } +int +dmu_object_set_maxblkid(objset_t *os, uint64_t object, uint64_t maxblkid, + dmu_tx_t *tx) +{ + dnode_t *dn; + int err; + + err = dnode_hold(os, object, FTAG, &dn); + if (err) + return (err); + rw_enter(&dn->dn_struct_rwlock, RW_WRITER); + dnode_new_blkid(dn, maxblkid, tx, B_FALSE, B_TRUE); + rw_exit(&dn->dn_struct_rwlock); + dnode_rele(dn, FTAG); + return (0); +} + void dmu_object_set_checksum(objset_t *os, uint64_t object, uint8_t checksum, dmu_tx_t *tx) @@ -2084,8 +2247,6 @@ dmu_object_set_compress(objset_t *os, uint64_t object, uint8_t compress, dnode_rele(dn, FTAG); } -int zfs_mdcomp_disable = 0; - /* * When the "redundant_metadata" property is set to "most", only indirect * blocks of this level and higher will have an additional ditto block. @@ -2104,6 +2265,7 @@ dmu_write_policy(objset_t *os, dnode_t *dn, int level, int wp, zio_prop_t *zp) boolean_t dedup = B_FALSE; boolean_t nopwrite = B_FALSE; boolean_t dedup_verify = os->os_dedup_verify; + boolean_t encrypt = B_FALSE; int copies = os->os_copies; /* @@ -2114,16 +2276,12 @@ dmu_write_policy(objset_t *os, dnode_t *dn, int level, int wp, zio_prop_t *zp) * 3. all other level 0 blocks */ if (ismd) { - if (zfs_mdcomp_disable) { - compress = ZIO_COMPRESS_EMPTY; - } else { - /* - * XXX -- we should design a compression algorithm - * that specializes in arrays of bps. - */ - compress = zio_compress_select(os->os_spa, - ZIO_COMPRESS_ON, ZIO_COMPRESS_ON); - } + /* + * XXX -- we should design a compression algorithm + * that specializes in arrays of bps. + */ + compress = zio_compress_select(os->os_spa, + ZIO_COMPRESS_ON, ZIO_COMPRESS_ON); /* * Metadata always gets checksummed. If the data @@ -2191,10 +2349,33 @@ dmu_write_policy(objset_t *os, dnode_t *dn, int level, int wp, zio_prop_t *zp) compress != ZIO_COMPRESS_OFF && zfs_nopwrite_enabled); } - zp->zp_checksum = checksum; - zp->zp_compress = compress; - ASSERT3U(zp->zp_compress, !=, ZIO_COMPRESS_INHERIT); + /* + * All objects in an encrypted objset are protected from modification + * via a MAC. Encrypted objects store their IV and salt in the last DVA + * in the bp, so we cannot use all copies. Encrypted objects are also + * not subject to nopwrite since writing the same data will still + * result in a new ciphertext. Only encrypted blocks can be dedup'd + * to avoid ambiguity in the dedup code since the DDT does not store + * object types. + */ + if (os->os_encrypted && (wp & WP_NOFILL) == 0) { + encrypt = B_TRUE; + if (DMU_OT_IS_ENCRYPTED(type)) { + copies = MIN(copies, SPA_DVAS_PER_BP - 1); + nopwrite = B_FALSE; + } else { + dedup = B_FALSE; + } + + if (level <= 0 && + (type == DMU_OT_DNODE || type == DMU_OT_OBJSET)) { + compress = ZIO_COMPRESS_EMPTY; + } + } + + zp->zp_compress = compress; + zp->zp_checksum = checksum; zp->zp_type = (wp & WP_SPILL) ? dn->dn_bonustype : type; zp->zp_level = level; zp->zp_copies = MIN(copies, spa_max_replication(os->os_spa)); @@ -2203,6 +2384,11 @@ dmu_write_policy(objset_t *os, dnode_t *dn, int level, int wp, zio_prop_t *zp) zp->zp_nopwrite = nopwrite; zp->zp_zpl_smallblk = DMU_OT_IS_FILE(zp->zp_type) ? os->os_zpl_special_smallblock : 0; + zp->zp_encrypt = encrypt; + zp->zp_byteorder = ZFS_HOST_BYTEORDER; + bzero(zp->zp_salt, ZIO_DATA_SALT_LEN); + bzero(zp->zp_iv, ZIO_DATA_IV_LEN); + bzero(zp->zp_mac, ZIO_DATA_MAC_LEN); } int diff --git a/usr/src/uts/common/fs/zfs/dmu_diff.c b/usr/src/uts/common/fs/zfs/dmu_diff.c index 982b96132c..76c32b1264 100644 --- a/usr/src/uts/common/fs/zfs/dmu_diff.c +++ b/usr/src/uts/common/fs/zfs/dmu_diff.c @@ -131,11 +131,14 @@ diff_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, arc_buf_t *abuf; arc_flags_t aflags = ARC_FLAG_WAIT; int blksz = BP_GET_LSIZE(bp); + int zio_flags = ZIO_FLAG_CANFAIL; int i; + if (BP_IS_PROTECTED(bp)) + zio_flags |= ZIO_FLAG_RAW; + if (arc_read(NULL, spa, bp, arc_getbuf_func, &abuf, - ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, - &aflags, zb) != 0) + ZIO_PRIORITY_ASYNC_READ, zio_flags, &aflags, zb) != 0) return (SET_ERROR(EIO)); blk = abuf->b_data; @@ -206,8 +209,17 @@ dmu_diff(const char *tosnap_name, const char *fromsnap_name, da.da_ddr.ddr_first = da.da_ddr.ddr_last = 0; da.da_err = 0; + /* + * Since zfs diff only looks at dnodes which are stored in plaintext + * (other than bonus buffers), we don't technically need to decrypt + * the dataset to perform this operation. However, the command line + * utility will still fail if the keys are not loaded because the + * dataset isn't mounted and because it will fail when it attempts to + * call the ZFS_IOC_OBJ_TO_STATS ioctl. + */ error = traverse_dataset(tosnap, fromtxg, - TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA, diff_cb, &da); + TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA | TRAVERSE_NO_DECRYPT, + diff_cb, &da); if (error != 0) { da.da_err = error; diff --git a/usr/src/uts/common/fs/zfs/dmu_object.c b/usr/src/uts/common/fs/zfs/dmu_object.c index f835987e7d..1a91fefe88 100644 --- a/usr/src/uts/common/fs/zfs/dmu_object.c +++ b/usr/src/uts/common/fs/zfs/dmu_object.c @@ -24,6 +24,7 @@ * Copyright 2014 HybridCluster. All rights reserved. */ +#include <sys/dbuf.h> #include <sys/dmu.h> #include <sys/dmu_objset.h> #include <sys/dmu_tx.h> @@ -263,13 +264,13 @@ dmu_object_reclaim(objset_t *os, uint64_t object, dmu_object_type_t ot, int blocksize, dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx) { return (dmu_object_reclaim_dnsize(os, object, ot, blocksize, bonustype, - bonuslen, DNODE_MIN_SIZE, tx)); + bonuslen, DNODE_MIN_SIZE, B_FALSE, tx)); } int dmu_object_reclaim_dnsize(objset_t *os, uint64_t object, dmu_object_type_t ot, int blocksize, dmu_object_type_t bonustype, int bonuslen, int dnodesize, - dmu_tx_t *tx) + boolean_t keep_spill, dmu_tx_t *tx) { dnode_t *dn; int dn_slots = dnodesize >> DNODE_SHIFT; @@ -286,7 +287,30 @@ dmu_object_reclaim_dnsize(objset_t *os, uint64_t object, dmu_object_type_t ot, if (err) return (err); - dnode_reallocate(dn, ot, blocksize, bonustype, bonuslen, dn_slots, tx); + dnode_reallocate(dn, ot, blocksize, bonustype, bonuslen, dn_slots, + keep_spill, tx); + + dnode_rele(dn, FTAG); + return (err); +} + +int +dmu_object_rm_spill(objset_t *os, uint64_t object, dmu_tx_t *tx) +{ + dnode_t *dn; + int err; + + err = dnode_hold_impl(os, object, DNODE_MUST_BE_ALLOCATED, 0, + FTAG, &dn); + if (err) + return (err); + + rw_enter(&dn->dn_struct_rwlock, RW_WRITER); + if (dn->dn_phys->dn_flags & DNODE_FLAG_SPILL_BLKPTR) { + dbuf_rm_spill(dn, tx); + dnode_rm_spill(dn, tx); + } + rw_exit(&dn->dn_struct_rwlock); dnode_rele(dn, FTAG); return (err); diff --git a/usr/src/uts/common/fs/zfs/dmu_objset.c b/usr/src/uts/common/fs/zfs/dmu_objset.c index 771b803973..4d0a5d2fd5 100644 --- a/usr/src/uts/common/fs/zfs/dmu_objset.c +++ b/usr/src/uts/common/fs/zfs/dmu_objset.c @@ -54,6 +54,7 @@ #include <sys/dsl_destroy.h> #include <sys/vdev.h> #include <sys/zfeature.h> +#include <sys/dmu_recv.h> #include "zfs_namecheck.h" /* @@ -418,16 +419,23 @@ dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp, if (!BP_IS_HOLE(os->os_rootbp)) { arc_flags_t aflags = ARC_FLAG_WAIT; zbookmark_phys_t zb; + enum zio_flag zio_flags = ZIO_FLAG_CANFAIL; SET_BOOKMARK(&zb, ds ? ds->ds_object : DMU_META_OBJSET, ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID); if (DMU_OS_IS_L2CACHEABLE(os)) aflags |= ARC_FLAG_L2CACHE; + if (ds != NULL && ds->ds_dir->dd_crypto_obj != 0) { + ASSERT3U(BP_GET_COMPRESS(bp), ==, ZIO_COMPRESS_OFF); + ASSERT(BP_IS_AUTHENTICATED(bp)); + zio_flags |= ZIO_FLAG_RAW; + } + dprintf_bp(os->os_rootbp, "reading %s", ""); err = arc_read(NULL, spa, os->os_rootbp, arc_getbuf_func, &os->os_phys_buf, - ZIO_PRIORITY_SYNC_READ, ZIO_FLAG_CANFAIL, &aflags, &zb); + ZIO_PRIORITY_SYNC_READ, zio_flags, &aflags, &zb); if (err != 0) { kmem_free(os, sizeof (objset_t)); /* convert checksum errors into IO errors */ @@ -468,6 +476,8 @@ dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp, if (ds != NULL) { boolean_t needlock = B_FALSE; + os->os_encrypted = (ds->ds_dir->dd_crypto_obj != 0); + /* * Note: it's valid to open the objset if the dataset is * long-held, in which case the pool_config lock will not @@ -477,6 +487,7 @@ dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp, needlock = B_TRUE; dsl_pool_config_enter(dmu_objset_pool(os), FTAG); } + err = dsl_prop_register(ds, zfs_prop_to_name(ZFS_PROP_PRIMARYCACHE), primary_cache_changed_cb, os); @@ -550,6 +561,7 @@ dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp, /* It's the meta-objset. */ os->os_checksum = ZIO_CHECKSUM_FLETCHER_4; os->os_compress = ZIO_COMPRESS_ON; + os->os_encrypted = B_FALSE; os->os_copies = spa_max_replication(spa); os->os_dedup_checksum = ZIO_CHECKSUM_OFF; os->os_dedup_verify = B_FALSE; @@ -640,16 +652,18 @@ dmu_objset_from_ds(dsl_dataset_t *ds, objset_t **osp) * can be held at a time. */ int -dmu_objset_hold(const char *name, void *tag, objset_t **osp) +dmu_objset_hold_flags(const char *name, boolean_t decrypt, void *tag, + objset_t **osp) { dsl_pool_t *dp; dsl_dataset_t *ds; int err; + ds_hold_flags_t flags = (decrypt) ? DS_HOLD_FLAG_DECRYPT : 0; err = dsl_pool_hold(name, tag, &dp); if (err != 0) return (err); - err = dsl_dataset_hold(dp, name, tag, &ds); + err = dsl_dataset_hold_flags(dp, name, flags, tag, &ds); if (err != 0) { dsl_pool_rele(dp, tag); return (err); @@ -664,23 +678,46 @@ dmu_objset_hold(const char *name, void *tag, objset_t **osp) return (err); } +int +dmu_objset_hold(const char *name, void *tag, objset_t **osp) +{ + return (dmu_objset_hold_flags(name, B_FALSE, tag, osp)); +} + +/* ARGSUSED */ static int dmu_objset_own_impl(dsl_dataset_t *ds, dmu_objset_type_t type, - boolean_t readonly, void *tag, objset_t **osp) + boolean_t readonly, boolean_t decrypt, void *tag, objset_t **osp) { int err; err = dmu_objset_from_ds(ds, osp); if (err != 0) { - dsl_dataset_disown(ds, tag); + return (err); } else if (type != DMU_OST_ANY && type != (*osp)->os_phys->os_type) { - dsl_dataset_disown(ds, tag); return (SET_ERROR(EINVAL)); } else if (!readonly && dsl_dataset_is_snapshot(ds)) { - dsl_dataset_disown(ds, tag); + return (SET_ERROR(EROFS)); + } else if (!readonly && decrypt && + dsl_dir_incompatible_encryption_version(ds->ds_dir)) { return (SET_ERROR(EROFS)); } - return (err); + + /* if we are decrypting, we can now check MACs in os->os_phys_buf */ + if (decrypt && arc_is_unauthenticated((*osp)->os_phys_buf)) { + zbookmark_phys_t zb; + + SET_BOOKMARK(&zb, ds->ds_object, ZB_ROOT_OBJECT, + ZB_ROOT_LEVEL, ZB_ROOT_BLKID); + err = arc_untransform((*osp)->os_phys_buf, (*osp)->os_spa, + &zb, B_FALSE); + if (err != 0) + return (err); + + ASSERT0(arc_is_unauthenticated((*osp)->os_phys_buf)); + } + + return (0); } /* @@ -690,48 +727,70 @@ dmu_objset_own_impl(dsl_dataset_t *ds, dmu_objset_type_t type, */ int dmu_objset_own(const char *name, dmu_objset_type_t type, - boolean_t readonly, void *tag, objset_t **osp) + boolean_t readonly, boolean_t decrypt, void *tag, objset_t **osp) { dsl_pool_t *dp; dsl_dataset_t *ds; int err; + ds_hold_flags_t flags = (decrypt) ? DS_HOLD_FLAG_DECRYPT : 0; err = dsl_pool_hold(name, FTAG, &dp); if (err != 0) return (err); - err = dsl_dataset_own(dp, name, tag, &ds); + err = dsl_dataset_own(dp, name, flags, tag, &ds); if (err != 0) { dsl_pool_rele(dp, FTAG); return (err); } - err = dmu_objset_own_impl(ds, type, readonly, tag, osp); + err = dmu_objset_own_impl(ds, type, readonly, decrypt, tag, osp); + if (err != 0) { + dsl_dataset_disown(ds, flags, tag); + dsl_pool_rele(dp, FTAG); + return (err); + } + dsl_pool_rele(dp, FTAG); - return (err); + return (0); } int dmu_objset_own_obj(dsl_pool_t *dp, uint64_t obj, dmu_objset_type_t type, - boolean_t readonly, void *tag, objset_t **osp) + boolean_t readonly, boolean_t decrypt, void *tag, objset_t **osp) { dsl_dataset_t *ds; int err; + ds_hold_flags_t flags = (decrypt) ? DS_HOLD_FLAG_DECRYPT : 0; - err = dsl_dataset_own_obj(dp, obj, tag, &ds); + err = dsl_dataset_own_obj(dp, obj, flags, tag, &ds); if (err != 0) return (err); - return (dmu_objset_own_impl(ds, type, readonly, tag, osp)); + err = dmu_objset_own_impl(ds, type, readonly, decrypt, tag, osp); + if (err != 0) { + dsl_dataset_disown(ds, flags, tag); + return (err); + } + + return (0); } void -dmu_objset_rele(objset_t *os, void *tag) +dmu_objset_rele_flags(objset_t *os, boolean_t decrypt, void *tag) { + ds_hold_flags_t flags = (decrypt) ? DS_HOLD_FLAG_DECRYPT : 0; + dsl_pool_t *dp = dmu_objset_pool(os); - dsl_dataset_rele(os->os_dsl_dataset, tag); + dsl_dataset_rele_flags(os->os_dsl_dataset, flags, tag); dsl_pool_rele(dp, tag); } +void +dmu_objset_rele(objset_t *os, void *tag) +{ + dmu_objset_rele_flags(os, B_FALSE, tag); +} + /* * When we are called, os MUST refer to an objset associated with a dataset * that is owned by 'tag'; that is, is held and long held by 'tag' and ds_owner @@ -745,7 +804,7 @@ dmu_objset_rele(objset_t *os, void *tag) */ void dmu_objset_refresh_ownership(dsl_dataset_t *ds, dsl_dataset_t **newds, - void *tag) + boolean_t decrypt, void *tag) { dsl_pool_t *dp; char name[ZFS_MAX_DATASET_NAME_LEN]; @@ -757,15 +816,18 @@ dmu_objset_refresh_ownership(dsl_dataset_t *ds, dsl_dataset_t **newds, dsl_dataset_name(ds, name); dp = ds->ds_dir->dd_pool; dsl_pool_config_enter(dp, FTAG); - dsl_dataset_disown(ds, tag); - VERIFY0(dsl_dataset_own(dp, name, tag, newds)); + + dsl_dataset_disown(ds, 0, tag); + VERIFY0(dsl_dataset_own(dp, name, + (decrypt) ? DS_HOLD_FLAG_DECRYPT : 0, tag, newds)); dsl_pool_config_exit(dp, FTAG); } void -dmu_objset_disown(objset_t *os, void *tag) +dmu_objset_disown(objset_t *os, boolean_t decrypt, void *tag) { - dsl_dataset_disown(os->os_dsl_dataset, tag); + dsl_dataset_disown(os->os_dsl_dataset, + (decrypt) ? DS_HOLD_FLAG_DECRYPT : 0, tag); } void @@ -842,6 +904,8 @@ dmu_objset_evict(objset_t *os) } else { mutex_exit(&os->os_lock); } + + } void @@ -887,16 +951,21 @@ dmu_objset_snap_cmtime(objset_t *os) return (dsl_dir_snap_cmtime(os->os_dsl_dataset->ds_dir)); } -/* called from dsl for meta-objset */ +/* ARGSUSED */ objset_t * -dmu_objset_create_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp, - dmu_objset_type_t type, dmu_tx_t *tx) +dmu_objset_create_impl_dnstats(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp, + dmu_objset_type_t type, int levels, int blksz, int ibs, dmu_tx_t *tx) { objset_t *os; dnode_t *mdn; ASSERT(dmu_tx_is_syncing(tx)); + if (blksz == 0) + blksz = 1 << DNODE_BLOCK_SHIFT; + if (ibs == 0) + ibs = DN_MAX_INDBLKSHIFT; + if (ds != NULL) VERIFY0(dmu_objset_from_ds(ds, &os)); else @@ -919,22 +988,25 @@ dmu_objset_create_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp, * to convergence, so minimizing its dn_nlevels matters. */ if (ds != NULL) { - int levels = 1; - - /* - * Determine the number of levels necessary for the meta-dnode - * to contain DN_MAX_OBJECT dnodes. Note that in order to - * ensure that we do not overflow 64 bits, there has to be - * a nlevels that gives us a number of blocks > DN_MAX_OBJECT - * but < 2^64. Therefore, - * (mdn->dn_indblkshift - SPA_BLKPTRSHIFT) (10) must be - * less than (64 - log2(DN_MAX_OBJECT)) (16). - */ - while ((uint64_t)mdn->dn_nblkptr << - (mdn->dn_datablkshift - DNODE_SHIFT + - (levels - 1) * (mdn->dn_indblkshift - SPA_BLKPTRSHIFT)) < - DN_MAX_OBJECT) - levels++; + if (levels == 0) { + levels = 1; + + /* + * Determine the number of levels necessary for the + * meta-dnode to contain DN_MAX_OBJECT dnodes. Note + * that in order to ensure that we do not overflow + * 64 bits, there has to be a nlevels that gives us a + * number of blocks > DN_MAX_OBJECT but < 2^64. + * Therefore, (mdn->dn_indblkshift - SPA_BLKPTRSHIFT) + * (10) must be less than (64 - log2(DN_MAX_OBJECT)) + * (16). + */ + while ((uint64_t)mdn->dn_nblkptr << + (mdn->dn_datablkshift - DNODE_SHIFT + (levels - 1) * + (mdn->dn_indblkshift - SPA_BLKPTRSHIFT)) < + DN_MAX_OBJECT) + levels++; + } mdn->dn_next_nlevels[tx->tx_txg & TXG_MASK] = mdn->dn_nlevels = levels; @@ -944,7 +1016,13 @@ dmu_objset_create_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp, ASSERT(type != DMU_OST_ANY); ASSERT(type < DMU_OST_NUMTYPES); os->os_phys->os_type = type; - if (dmu_objset_userused_enabled(os)) { + + /* + * Enable user accounting if it is enabled and this is not an + * encrypted receive. + */ + if (dmu_objset_userused_enabled(os) && + (!os->os_encrypted || !dmu_objset_is_receiving(os))) { os->os_phys->os_flags |= OBJSET_FLAG_USERACCOUNTING_COMPLETE; os->os_flags = os->os_phys->os_flags; } @@ -954,6 +1032,14 @@ dmu_objset_create_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp, return (os); } +/* called from dsl for meta-objset */ +objset_t * +dmu_objset_create_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp, + dmu_objset_type_t type, dmu_tx_t *tx) +{ + return (dmu_objset_create_impl_dnstats(spa, ds, bp, type, 0, 0, 0, tx)); +} + typedef struct dmu_objset_create_arg { const char *doca_name; cred_t *doca_cred; @@ -962,6 +1048,7 @@ typedef struct dmu_objset_create_arg { void *doca_userarg; dmu_objset_type_t doca_type; uint64_t doca_flags; + dsl_crypto_params_t *doca_dcp; } dmu_objset_create_arg_t; /*ARGSUSED*/ @@ -990,8 +1077,16 @@ dmu_objset_create_check(void *arg, dmu_tx_t *tx) dsl_dir_rele(pdd, FTAG); return (SET_ERROR(EEXIST)); } + + error = dmu_objset_create_crypt_check(pdd, doca->doca_dcp, NULL); + if (error != 0) { + dsl_dir_rele(pdd, FTAG); + return (error); + } + error = dsl_fs_ss_limit_check(pdd, 1, ZFS_PROP_FILESYSTEM_LIMIT, NULL, doca->doca_cred); + dsl_dir_rele(pdd, FTAG); return (error); @@ -1002,23 +1097,25 @@ dmu_objset_create_sync(void *arg, dmu_tx_t *tx) { dmu_objset_create_arg_t *doca = arg; dsl_pool_t *dp = dmu_tx_pool(tx); + spa_t *spa = dp->dp_spa; dsl_dir_t *pdd; const char *tail; dsl_dataset_t *ds; uint64_t obj; blkptr_t *bp; objset_t *os; + zio_t *rzio; VERIFY0(dsl_dir_hold(dp, doca->doca_name, FTAG, &pdd, &tail)); obj = dsl_dataset_create_sync(pdd, tail, NULL, doca->doca_flags, - doca->doca_cred, tx); + doca->doca_cred, doca->doca_dcp, tx); - VERIFY0(dsl_dataset_hold_obj(pdd->dd_pool, obj, FTAG, &ds)); + VERIFY0(dsl_dataset_hold_obj_flags(pdd->dd_pool, obj, + DS_HOLD_FLAG_DECRYPT, FTAG, &ds)); rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG); bp = dsl_dataset_get_blkptr(ds); - os = dmu_objset_create_impl(pdd->dd_pool->dp_spa, - ds, bp, doca->doca_type, tx); + os = dmu_objset_create_impl(spa, ds, bp, doca->doca_type, tx); rrw_exit(&ds->ds_bp_rwlock, FTAG); if (doca->doca_userfunc != NULL) { @@ -1026,16 +1123,68 @@ dmu_objset_create_sync(void *arg, dmu_tx_t *tx) doca->doca_cred, tx); } + /* + * The doca_userfunc() may write out some data that needs to be + * encrypted if the dataset is encrypted (specifically the root + * directory). This data must be written out before the encryption + * key mapping is removed by dsl_dataset_rele_flags(). Force the + * I/O to occur immediately by invoking the relevant sections of + * dsl_pool_sync(). + */ + if (os->os_encrypted) { + dsl_dataset_t *tmpds = NULL; + boolean_t need_sync_done = B_FALSE; + + mutex_enter(&ds->ds_lock); + ds->ds_owner = FTAG; + mutex_exit(&ds->ds_lock); + + rzio = zio_root(spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED); + tmpds = txg_list_remove_this(&dp->dp_dirty_datasets, ds, + tx->tx_txg); + if (tmpds != NULL) { + dsl_dataset_sync(ds, rzio, tx); + need_sync_done = B_TRUE; + } + VERIFY0(zio_wait(rzio)); + dmu_objset_do_userquota_updates(os, tx); + taskq_wait(dp->dp_sync_taskq); + if (txg_list_member(&dp->dp_dirty_datasets, ds, tx->tx_txg)) { + ASSERT3P(ds->ds_key_mapping, !=, NULL); + key_mapping_rele(spa, ds->ds_key_mapping, ds); + } + + rzio = zio_root(spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED); + tmpds = txg_list_remove_this(&dp->dp_dirty_datasets, ds, + tx->tx_txg); + if (tmpds != NULL) { + dmu_buf_rele(ds->ds_dbuf, ds); + dsl_dataset_sync(ds, rzio, tx); + } + VERIFY0(zio_wait(rzio)); + + if (need_sync_done) { + ASSERT3P(ds->ds_key_mapping, !=, NULL); + key_mapping_rele(spa, ds->ds_key_mapping, ds); + dsl_dataset_sync_done(ds, tx); + } + + mutex_enter(&ds->ds_lock); + ds->ds_owner = NULL; + mutex_exit(&ds->ds_lock); + } + spa_history_log_internal_ds(ds, "create", tx, ""); - dsl_dataset_rele(ds, FTAG); + dsl_dataset_rele_flags(ds, DS_HOLD_FLAG_DECRYPT, FTAG); dsl_dir_rele(pdd, FTAG); } int dmu_objset_create(const char *name, dmu_objset_type_t type, uint64_t flags, - void (*func)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx), void *arg) + dsl_crypto_params_t *dcp, dmu_objset_create_sync_func_t func, void *arg) { dmu_objset_create_arg_t doca; + dsl_crypto_params_t tmp_dcp = { 0 }; doca.doca_name = name; doca.doca_cred = CRED(); @@ -1044,9 +1193,19 @@ dmu_objset_create(const char *name, dmu_objset_type_t type, uint64_t flags, doca.doca_userarg = arg; doca.doca_type = type; + /* + * Some callers (mostly for testing) do not provide a dcp on their + * own but various code inside the sync task will require it to be + * allocated. Rather than adding NULL checks throughout this code + * or adding dummy dcp's to all of the callers we simply create a + * dummy one here and use that. This zero dcp will have the same + * effect as asking for inheritence of all encryption params. + */ + doca.doca_dcp = (dcp != NULL) ? dcp : &tmp_dcp; + return (dsl_sync_task(name, dmu_objset_create_check, dmu_objset_create_sync, &doca, - 5, ZFS_SPACE_CHECK_NORMAL)); + 6, ZFS_SPACE_CHECK_NORMAL)); } typedef struct dmu_objset_clone_arg { @@ -1086,18 +1245,29 @@ dmu_objset_clone_check(void *arg, dmu_tx_t *tx) dsl_dir_rele(pdd, FTAG); return (SET_ERROR(EDQUOT)); } - dsl_dir_rele(pdd, FTAG); error = dsl_dataset_hold(dp, doca->doca_origin, FTAG, &origin); - if (error != 0) + if (error != 0) { + dsl_dir_rele(pdd, FTAG); return (error); + } /* You can only clone snapshots, not the head datasets. */ if (!origin->ds_is_snapshot) { dsl_dataset_rele(origin, FTAG); + dsl_dir_rele(pdd, FTAG); return (SET_ERROR(EINVAL)); } + + error = dmu_objset_clone_crypt_check(pdd, origin->ds_dir); + if (error != 0) { + dsl_dataset_rele(origin, FTAG); + dsl_dir_rele(pdd, FTAG); + return (error); + } + dsl_dataset_rele(origin, FTAG); + dsl_dir_rele(pdd, FTAG); return (0); } @@ -1117,7 +1287,7 @@ dmu_objset_clone_sync(void *arg, dmu_tx_t *tx) VERIFY0(dsl_dataset_hold(dp, doca->doca_origin, FTAG, &origin)); obj = dsl_dataset_create_sync(pdd, tail, origin, 0, - doca->doca_cred, tx); + doca->doca_cred, NULL, tx); VERIFY0(dsl_dataset_hold_obj(pdd->dd_pool, obj, FTAG, &ds)); dsl_dataset_name(origin, namebuf); @@ -1139,7 +1309,7 @@ dmu_objset_clone(const char *clone, const char *origin) return (dsl_sync_task(clone, dmu_objset_clone_check, dmu_objset_clone_sync, &doca, - 5, ZFS_SPACE_CHECK_NORMAL)); + 6, ZFS_SPACE_CHECK_NORMAL)); } static int @@ -1299,10 +1469,10 @@ dmu_objset_write_ready(zio_t *zio, arc_buf_t *abuf, void *arg) blkptr_t *bp = zio->io_bp; objset_t *os = arg; dnode_phys_t *dnp = &os->os_phys->os_meta_dnode; + uint64_t fill = 0; ASSERT(!BP_IS_EMBEDDED(bp)); ASSERT3U(BP_GET_TYPE(bp), ==, DMU_OT_OBJSET); - ASSERT0(BP_GET_LEVEL(bp)); /* * Update rootbp fill count: it should be the number of objects @@ -1310,9 +1480,11 @@ dmu_objset_write_ready(zio_t *zio, arc_buf_t *abuf, void *arg) * objects that are stored in the objset_phys_t -- the meta * dnode and user/group accounting objects). */ - bp->blk_fill = 0; for (int i = 0; i < dnp->dn_nblkptr; i++) - bp->blk_fill += BP_GET_FILL(&dnp->dn_blkptr[i]); + fill += BP_GET_FILL(&dnp->dn_blkptr[i]); + + BP_SET_FILL(bp, fill); + if (os->os_dsl_dataset != NULL) rrw_enter(&os->os_dsl_dataset->ds_bp_rwlock, RW_WRITER, FTAG); *os->os_rootbp = *bp; @@ -1401,6 +1573,19 @@ dmu_objset_sync(objset_t *os, zio_t *pio, dmu_tx_t *tx) dmu_write_policy(os, NULL, 0, 0, &zp); + /* + * If we are either claiming the ZIL or doing a raw receive, write + * out the os_phys_buf raw. Neither of these actions will effect the + * MAC at this point. + */ + if (os->os_raw_receive || + os->os_next_write_raw[tx->tx_txg & TXG_MASK]) { + ASSERT(os->os_encrypted); + arc_convert_to_raw(os->os_phys_buf, + os->os_dsl_dataset->ds_object, ZFS_HOST_BYTEORDER, + DMU_OT_OBJSET, NULL, NULL, NULL); + } + zio = arc_write(pio, os->os_spa, tx->tx_txg, blkptr_copy, os->os_phys_buf, DMU_OS_IS_L2CACHEABLE(os), &zp, dmu_objset_write_ready, NULL, NULL, dmu_objset_write_done, @@ -1424,7 +1609,8 @@ dmu_objset_sync(objset_t *os, zio_t *pio, dmu_tx_t *tx) txgoff = tx->tx_txg & TXG_MASK; - if (dmu_objset_userused_enabled(os)) { + if (dmu_objset_userused_enabled(os) && + (!os->os_encrypted || !dmu_objset_is_receiving(os))) { /* * We must create the list here because it uses the * dn_dirty_link[] of this txg. But it may already @@ -1663,6 +1849,10 @@ dmu_objset_do_userquota_updates(objset_t *os, dmu_tx_t *tx) if (!dmu_objset_userused_enabled(os)) return; + /* if this is a raw receive just return and handle accounting later */ + if (os->os_encrypted && dmu_objset_is_receiving(os)) + return; + /* Allocate the user/groupused objects if necessary. */ if (DMU_USERUSED_DNODE(os)->dn_type == DMU_OT_NONE) { VERIFY0(zap_create_claim(os, @@ -1742,6 +1932,18 @@ dmu_objset_userquota_get_ids(dnode_t *dn, boolean_t before, dmu_tx_t *tx) if (!dmu_objset_userused_enabled(dn->dn_objset)) return; + /* + * Raw receives introduce a problem with user accounting. Raw + * receives cannot update the user accounting info because the + * user ids and the sizes are encrypted. To guarantee that we + * never end up with bad user accounting, we simply disable it + * during raw receives. We also disable this for normal receives + * so that an incremental raw receive may be done on top of an + * existing non-raw receive. + */ + if (os->os_encrypted && dmu_objset_is_receiving(os)) + return; + if (before && (flags & (DN_ID_CHKED_BONUS|DN_ID_OLD_EXIST| DN_ID_CHKED_SPILL))) return; @@ -2394,6 +2596,13 @@ dmu_objset_find(char *name, int func(const char *, void *), void *arg, return (error); } +boolean_t +dmu_objset_incompatible_encryption_version(objset_t *os) +{ + return (dsl_dir_incompatible_encryption_version( + os->os_dsl_dataset->ds_dir)); +} + void dmu_objset_set_user(objset_t *os, void *user_ptr) { diff --git a/usr/src/uts/common/fs/zfs/dmu_recv.c b/usr/src/uts/common/fs/zfs/dmu_recv.c index 542bb42f3f..b6f63e7e22 100644 --- a/usr/src/uts/common/fs/zfs/dmu_recv.c +++ b/usr/src/uts/common/fs/zfs/dmu_recv.c @@ -67,7 +67,7 @@ typedef struct dmu_recv_begin_arg { const char *drba_origin; dmu_recv_cookie_t *drba_cookie; cred_t *drba_cred; - uint64_t drba_snapobj; + dsl_crypto_params_t *drba_dcp; } dmu_recv_begin_arg_t; static int @@ -77,6 +77,11 @@ recv_begin_check_existing_impl(dmu_recv_begin_arg_t *drba, dsl_dataset_t *ds, uint64_t val; int error; dsl_pool_t *dp = ds->ds_dir->dd_pool; + struct drr_begin *drrb = drba->drba_cookie->drc_drrb; + uint64_t featureflags = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo); + boolean_t encrypted = ds->ds_dir->dd_crypto_obj != 0; + boolean_t raw = (featureflags & DMU_BACKUP_FEATURE_RAW) != 0; + boolean_t embed = (featureflags & DMU_BACKUP_FEATURE_EMBED_DATA) != 0; /* temporary clone name must not exist */ error = zap_lookup(dp->dp_meta_objset, @@ -110,6 +115,14 @@ recv_begin_check_existing_impl(dmu_recv_begin_arg_t *drba, dsl_dataset_t *ds, dsl_dataset_t *snap; uint64_t obj = dsl_dataset_phys(ds)->ds_prev_snap_obj; + /* Can't raw receive on top of an unencrypted dataset */ + if (!encrypted && raw) + return (SET_ERROR(EINVAL)); + + /* Encryption is incompatible with embedded data */ + if (encrypted && embed) + return (SET_ERROR(EINVAL)); + /* Find snapshot in this dir that matches fromguid. */ while (obj != 0) { error = dsl_dataset_hold_obj(dp, obj, FTAG, @@ -129,7 +142,7 @@ recv_begin_check_existing_impl(dmu_recv_begin_arg_t *drba, dsl_dataset_t *ds, return (SET_ERROR(ENODEV)); if (drba->drba_cookie->drc_force) { - drba->drba_snapobj = obj; + drba->drba_cookie->drc_fromsnapobj = obj; } else { /* * If we are not forcing, there must be no @@ -139,7 +152,8 @@ recv_begin_check_existing_impl(dmu_recv_begin_arg_t *drba, dsl_dataset_t *ds, dsl_dataset_rele(snap, FTAG); return (SET_ERROR(ETXTBSY)); } - drba->drba_snapobj = ds->ds_prev->ds_object; + drba->drba_cookie->drc_fromsnapobj = + ds->ds_prev->ds_object; } dsl_dataset_rele(snap, FTAG); @@ -147,9 +161,34 @@ recv_begin_check_existing_impl(dmu_recv_begin_arg_t *drba, dsl_dataset_t *ds, /* if full, then must be forced */ if (!drba->drba_cookie->drc_force) return (SET_ERROR(EEXIST)); - /* start from $ORIGIN@$ORIGIN, if supported */ - drba->drba_snapobj = dp->dp_origin_snap != NULL ? - dp->dp_origin_snap->ds_object : 0; + + /* + * We don't support using zfs recv -F to blow away + * encrypted filesystems. This would require the + * dsl dir to point to the old encryption key and + * the new one at the same time during the receive. + */ + if ((!encrypted && raw) || encrypted) + return (SET_ERROR(EINVAL)); + + /* + * Perform the same encryption checks we would if + * we were creating a new dataset from scratch. + */ + if (!raw) { + boolean_t will_encrypt; + + error = dmu_objset_create_crypt_check( + ds->ds_dir->dd_parent, drba->drba_dcp, + &will_encrypt); + if (error != 0) + return (error); + + if (will_encrypt && embed) + return (SET_ERROR(EINVAL)); + } + + drba->drba_cookie->drc_fromsnapobj = 0; } return (0); @@ -164,6 +203,7 @@ dmu_recv_begin_check(void *arg, dmu_tx_t *tx) struct drr_begin *drrb = drba->drba_cookie->drc_drrb; uint64_t fromguid = drrb->drr_fromguid; int flags = drrb->drr_flags; + ds_hold_flags_t dsflags = 0; int error; uint64_t featureflags = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo); dsl_dataset_t *ds; @@ -214,18 +254,34 @@ dmu_recv_begin_check(void *arg, dmu_tx_t *tx) !spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_LARGE_DNODE)) return (SET_ERROR(ENOTSUP)); - error = dsl_dataset_hold(dp, tofs, FTAG, &ds); + if ((featureflags & DMU_BACKUP_FEATURE_RAW)) { + /* raw receives require the encryption feature */ + if (!spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_ENCRYPTION)) + return (SET_ERROR(ENOTSUP)); + + /* embedded data is incompatible with encryption and raw recv */ + if (featureflags & DMU_BACKUP_FEATURE_EMBED_DATA) + return (SET_ERROR(EINVAL)); + + /* raw receives require spill block allocation flag */ + if (!(flags & DRR_FLAG_SPILL_BLOCK)) + return (SET_ERROR(ZFS_ERR_SPILL_BLOCK_FLAG_MISSING)); + } else { + dsflags |= DS_HOLD_FLAG_DECRYPT; + } + + error = dsl_dataset_hold_flags(dp, tofs, dsflags, FTAG, &ds); if (error == 0) { /* target fs already exists; recv into temp clone */ /* Can't recv a clone into an existing fs */ if (flags & DRR_FLAG_CLONE || drba->drba_origin) { - dsl_dataset_rele(ds, FTAG); + dsl_dataset_rele_flags(ds, dsflags, FTAG); return (SET_ERROR(EINVAL)); } error = recv_begin_check_existing_impl(drba, ds, fromguid); - dsl_dataset_rele(ds, FTAG); + dsl_dataset_rele_flags(ds, dsflags, FTAG); } else if (error == ENOENT) { /* target fs does not exist; must be a full backup or clone */ char buf[ZFS_MAX_DATASET_NAME_LEN]; @@ -250,10 +306,35 @@ dmu_recv_begin_check(void *arg, dmu_tx_t *tx) /* Open the parent of tofs */ ASSERT3U(strlen(tofs), <, sizeof (buf)); (void) strlcpy(buf, tofs, strrchr(tofs, '/') - tofs + 1); - error = dsl_dataset_hold(dp, buf, FTAG, &ds); + error = dsl_dataset_hold_flags(dp, buf, dsflags, FTAG, &ds); if (error != 0) return (error); + if ((featureflags & DMU_BACKUP_FEATURE_RAW) == 0 && + drba->drba_origin == NULL) { + boolean_t will_encrypt; + + /* + * Check that we aren't breaking any encryption rules + * and that we have all the parameters we need to + * create an encrypted dataset if necessary. If we are + * making an encrypted dataset the stream can't have + * embedded data. + */ + error = dmu_objset_create_crypt_check(ds->ds_dir, + drba->drba_dcp, &will_encrypt); + if (error != 0) { + dsl_dataset_rele_flags(ds, dsflags, FTAG); + return (error); + } + + if (will_encrypt && + (featureflags & DMU_BACKUP_FEATURE_EMBED_DATA)) { + dsl_dataset_rele_flags(ds, dsflags, FTAG); + return (SET_ERROR(EINVAL)); + } + } + /* * Check filesystem and snapshot limits before receiving. We'll * recheck snapshot limits again at the end (we create the @@ -262,39 +343,46 @@ dmu_recv_begin_check(void *arg, dmu_tx_t *tx) error = dsl_fs_ss_limit_check(ds->ds_dir, 1, ZFS_PROP_FILESYSTEM_LIMIT, NULL, drba->drba_cred); if (error != 0) { - dsl_dataset_rele(ds, FTAG); + dsl_dataset_rele_flags(ds, dsflags, FTAG); return (error); } error = dsl_fs_ss_limit_check(ds->ds_dir, 1, ZFS_PROP_SNAPSHOT_LIMIT, NULL, drba->drba_cred); if (error != 0) { - dsl_dataset_rele(ds, FTAG); + dsl_dataset_rele_flags(ds, dsflags, FTAG); return (error); } if (drba->drba_origin != NULL) { dsl_dataset_t *origin; - error = dsl_dataset_hold(dp, drba->drba_origin, - FTAG, &origin); + + error = dsl_dataset_hold_flags(dp, drba->drba_origin, + dsflags, FTAG, &origin); if (error != 0) { - dsl_dataset_rele(ds, FTAG); + dsl_dataset_rele_flags(ds, dsflags, FTAG); return (error); } if (!origin->ds_is_snapshot) { - dsl_dataset_rele(origin, FTAG); - dsl_dataset_rele(ds, FTAG); + dsl_dataset_rele_flags(origin, dsflags, FTAG); + dsl_dataset_rele_flags(ds, dsflags, FTAG); return (SET_ERROR(EINVAL)); } if (dsl_dataset_phys(origin)->ds_guid != fromguid && fromguid != 0) { - dsl_dataset_rele(origin, FTAG); - dsl_dataset_rele(ds, FTAG); + dsl_dataset_rele_flags(origin, dsflags, FTAG); + dsl_dataset_rele_flags(ds, dsflags, FTAG); return (SET_ERROR(ENODEV)); } - dsl_dataset_rele(origin, FTAG); + if (origin->ds_dir->dd_crypto_obj != 0 && + (featureflags & DMU_BACKUP_FEATURE_EMBED_DATA)) { + dsl_dataset_rele_flags(origin, dsflags, FTAG); + dsl_dataset_rele_flags(ds, dsflags, FTAG); + return (SET_ERROR(EINVAL)); + } + dsl_dataset_rele_flags(origin, dsflags, FTAG); } - dsl_dataset_rele(ds, FTAG); + dsl_dataset_rele_flags(ds, dsflags, FTAG); error = 0; } return (error); @@ -308,27 +396,51 @@ dmu_recv_begin_sync(void *arg, dmu_tx_t *tx) objset_t *mos = dp->dp_meta_objset; struct drr_begin *drrb = drba->drba_cookie->drc_drrb; const char *tofs = drba->drba_cookie->drc_tofs; + uint64_t featureflags = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo); dsl_dataset_t *ds, *newds; + objset_t *os; uint64_t dsobj; + ds_hold_flags_t dsflags = 0; int error; uint64_t crflags = 0; + dsl_crypto_params_t dummy_dcp = { 0 }; + dsl_crypto_params_t *dcp = drba->drba_dcp; if (drrb->drr_flags & DRR_FLAG_CI_DATA) crflags |= DS_FLAG_CI_DATASET; + if ((featureflags & DMU_BACKUP_FEATURE_RAW) == 0) + dsflags |= DS_HOLD_FLAG_DECRYPT; + + /* + * Raw, non-incremental recvs always use a dummy dcp with + * the raw cmd set. Raw incremental recvs do not use a dcp + * since the encryption parameters are already set in stone. + */ + if (dcp == NULL && drba->drba_cookie->drc_fromsnapobj == 0 && + drba->drba_origin == NULL) { + ASSERT3P(dcp, ==, NULL); + dcp = &dummy_dcp; - error = dsl_dataset_hold(dp, tofs, FTAG, &ds); + if (featureflags & DMU_BACKUP_FEATURE_RAW) + dcp->cp_cmd = DCP_CMD_RAW_RECV; + } + + error = dsl_dataset_hold_flags(dp, tofs, dsflags, FTAG, &ds); if (error == 0) { /* create temporary clone */ dsl_dataset_t *snap = NULL; - if (drba->drba_snapobj != 0) { + + if (drba->drba_cookie->drc_fromsnapobj != 0) { VERIFY0(dsl_dataset_hold_obj(dp, - drba->drba_snapobj, FTAG, &snap)); + drba->drba_cookie->drc_fromsnapobj, FTAG, &snap)); + ASSERT3P(dcp, ==, NULL); } + dsobj = dsl_dataset_create_sync(ds->ds_dir, recv_clone_name, - snap, crflags, drba->drba_cred, tx); - if (drba->drba_snapobj != 0) + snap, crflags, drba->drba_cred, dcp, tx); + if (drba->drba_cookie->drc_fromsnapobj != 0) dsl_dataset_rele(snap, FTAG); - dsl_dataset_rele(ds, FTAG); + dsl_dataset_rele_flags(ds, dsflags, FTAG); } else { dsl_dir_t *dd; const char *tail; @@ -339,18 +451,20 @@ dmu_recv_begin_sync(void *arg, dmu_tx_t *tx) if (drba->drba_origin != NULL) { VERIFY0(dsl_dataset_hold(dp, drba->drba_origin, FTAG, &origin)); + ASSERT3P(dcp, ==, NULL); } /* Create new dataset. */ - dsobj = dsl_dataset_create_sync(dd, - strrchr(tofs, '/') + 1, - origin, crflags, drba->drba_cred, tx); + dsobj = dsl_dataset_create_sync(dd, strrchr(tofs, '/') + 1, + origin, crflags, drba->drba_cred, dcp, tx); if (origin != NULL) dsl_dataset_rele(origin, FTAG); dsl_dir_rele(dd, FTAG); drba->drba_cookie->drc_newfs = B_TRUE; } - VERIFY0(dsl_dataset_own_obj(dp, dsobj, dmu_recv_tag, &newds)); + + VERIFY0(dsl_dataset_own_obj(dp, dsobj, dsflags, dmu_recv_tag, &newds)); + VERIFY0(dmu_objset_from_ds(newds, &os)); if (drba->drba_cookie->drc_resumable) { dsl_dataset_zapify(newds, tx); @@ -370,32 +484,46 @@ dmu_recv_begin_sync(void *arg, dmu_tx_t *tx) 8, 1, &zero, tx)); VERIFY0(zap_add(mos, dsobj, DS_FIELD_RESUME_BYTES, 8, 1, &zero, tx)); - if (DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo) & - DMU_BACKUP_FEATURE_LARGE_BLOCKS) { + if (featureflags & DMU_BACKUP_FEATURE_LARGE_BLOCKS) { VERIFY0(zap_add(mos, dsobj, DS_FIELD_RESUME_LARGEBLOCK, 8, 1, &one, tx)); } - if (DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo) & - DMU_BACKUP_FEATURE_EMBED_DATA) { + if (featureflags & DMU_BACKUP_FEATURE_EMBED_DATA) { VERIFY0(zap_add(mos, dsobj, DS_FIELD_RESUME_EMBEDOK, 8, 1, &one, tx)); } - if (DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo) & - DMU_BACKUP_FEATURE_COMPRESSED) { + if (featureflags & DMU_BACKUP_FEATURE_COMPRESSED) { VERIFY0(zap_add(mos, dsobj, DS_FIELD_RESUME_COMPRESSOK, 8, 1, &one, tx)); } + if (featureflags & DMU_BACKUP_FEATURE_RAW) { + VERIFY0(zap_add(mos, dsobj, DS_FIELD_RESUME_RAWOK, + 8, 1, &one, tx)); + } + } + + /* + * Usually the os->os_encrypted value is tied to the presence of a + * DSL Crypto Key object in the dd. However, that will not be received + * until dmu_recv_stream(), so we set the value manually for now. + */ + if (featureflags & DMU_BACKUP_FEATURE_RAW) { + os->os_encrypted = B_TRUE; + drba->drba_cookie->drc_raw = B_TRUE; } dmu_buf_will_dirty(newds->ds_dbuf, tx); dsl_dataset_phys(newds)->ds_flags |= DS_FLAG_INCONSISTENT; /* - * If we actually created a non-clone, we need to create the - * objset in our new dataset. + * If we actually created a non-clone, we need to create the objset + * in our new dataset. If this is a raw send we postpone this until + * dmu_recv_stream() so that we can allocate the metadnode with the + * properties from the DRR_BEGIN payload. */ rrw_enter(&newds->ds_bp_rwlock, RW_READER, FTAG); - if (BP_IS_HOLE(dsl_dataset_get_blkptr(newds))) { + if (BP_IS_HOLE(dsl_dataset_get_blkptr(newds)) && + (featureflags & DMU_BACKUP_FEATURE_RAW) == 0) { (void) dmu_objset_create_impl(dp->dp_spa, newds, dsl_dataset_get_blkptr(newds), drrb->drr_type, tx); } @@ -413,6 +541,7 @@ dmu_recv_resume_begin_check(void *arg, dmu_tx_t *tx) dsl_pool_t *dp = dmu_tx_pool(tx); struct drr_begin *drrb = drba->drba_cookie->drc_drrb; int error; + ds_hold_flags_t dsflags = 0; uint64_t featureflags = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo); dsl_dataset_t *ds; const char *tofs = drba->drba_cookie->drc_tofs; @@ -463,29 +592,37 @@ dmu_recv_resume_begin_check(void *arg, dmu_tx_t *tx) (void) snprintf(recvname, sizeof (recvname), "%s/%s", tofs, recv_clone_name); - if (dsl_dataset_hold(dp, recvname, FTAG, &ds) != 0) { + if (featureflags & DMU_BACKUP_FEATURE_RAW) { + /* raw receives require spill block allocation flag */ + if (!(drrb->drr_flags & DRR_FLAG_SPILL_BLOCK)) + return (SET_ERROR(ZFS_ERR_SPILL_BLOCK_FLAG_MISSING)); + } else { + dsflags |= DS_HOLD_FLAG_DECRYPT; + } + + if (dsl_dataset_hold_flags(dp, recvname, dsflags, FTAG, &ds) != 0) { /* %recv does not exist; continue in tofs */ - error = dsl_dataset_hold(dp, tofs, FTAG, &ds); + error = dsl_dataset_hold_flags(dp, tofs, dsflags, FTAG, &ds); if (error != 0) return (error); } /* check that ds is marked inconsistent */ if (!DS_IS_INCONSISTENT(ds)) { - dsl_dataset_rele(ds, FTAG); + dsl_dataset_rele_flags(ds, dsflags, FTAG); return (SET_ERROR(EINVAL)); } /* check that there is resuming data, and that the toguid matches */ if (!dsl_dataset_is_zapified(ds)) { - dsl_dataset_rele(ds, FTAG); + dsl_dataset_rele_flags(ds, dsflags, FTAG); return (SET_ERROR(EINVAL)); } uint64_t val; error = zap_lookup(dp->dp_meta_objset, ds->ds_object, DS_FIELD_RESUME_TOGUID, sizeof (val), 1, &val); if (error != 0 || drrb->drr_toguid != val) { - dsl_dataset_rele(ds, FTAG); + dsl_dataset_rele_flags(ds, dsflags, FTAG); return (SET_ERROR(EINVAL)); } @@ -495,13 +632,13 @@ dmu_recv_resume_begin_check(void *arg, dmu_tx_t *tx) * fails) because it will be marked inconsistent. */ if (dsl_dataset_has_owner(ds)) { - dsl_dataset_rele(ds, FTAG); + dsl_dataset_rele_flags(ds, dsflags, FTAG); return (SET_ERROR(EBUSY)); } /* There should not be any snapshots of this fs yet. */ if (ds->ds_prev != NULL && ds->ds_prev->ds_dir == ds->ds_dir) { - dsl_dataset_rele(ds, FTAG); + dsl_dataset_rele_flags(ds, dsflags, FTAG); return (SET_ERROR(EINVAL)); } @@ -515,11 +652,11 @@ dmu_recv_resume_begin_check(void *arg, dmu_tx_t *tx) (void) zap_lookup(dp->dp_meta_objset, ds->ds_object, DS_FIELD_RESUME_FROMGUID, sizeof (val), 1, &val); if (drrb->drr_fromguid != val) { - dsl_dataset_rele(ds, FTAG); + dsl_dataset_rele_flags(ds, dsflags, FTAG); return (SET_ERROR(EINVAL)); } - dsl_dataset_rele(ds, FTAG); + dsl_dataset_rele_flags(ds, dsflags, FTAG); return (0); } @@ -529,7 +666,11 @@ dmu_recv_resume_begin_sync(void *arg, dmu_tx_t *tx) dmu_recv_begin_arg_t *drba = arg; dsl_pool_t *dp = dmu_tx_pool(tx); const char *tofs = drba->drba_cookie->drc_tofs; + struct drr_begin *drrb = drba->drba_cookie->drc_drrb; + uint64_t featureflags = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo); dsl_dataset_t *ds; + objset_t *os; + ds_hold_flags_t dsflags = 0; uint64_t dsobj; /* 6 extra bytes for /%recv */ char recvname[ZFS_MAX_DATASET_NAME_LEN + 6]; @@ -537,9 +678,15 @@ dmu_recv_resume_begin_sync(void *arg, dmu_tx_t *tx) (void) snprintf(recvname, sizeof (recvname), "%s/%s", tofs, recv_clone_name); - if (dsl_dataset_hold(dp, recvname, FTAG, &ds) != 0) { + if (featureflags & DMU_BACKUP_FEATURE_RAW) { + drba->drba_cookie->drc_raw = B_TRUE; + } else { + dsflags |= DS_HOLD_FLAG_DECRYPT; + } + + if (dsl_dataset_hold_flags(dp, recvname, dsflags, FTAG, &ds) != 0) { /* %recv does not exist; continue in tofs */ - VERIFY0(dsl_dataset_hold(dp, tofs, FTAG, &ds)); + VERIFY0(dsl_dataset_hold_flags(dp, tofs, dsflags, FTAG, &ds)); drba->drba_cookie->drc_newfs = B_TRUE; } @@ -548,15 +695,17 @@ dmu_recv_resume_begin_sync(void *arg, dmu_tx_t *tx) dmu_buf_will_dirty(ds->ds_dbuf, tx); dsl_dataset_phys(ds)->ds_flags &= ~DS_FLAG_INCONSISTENT; dsobj = ds->ds_object; - dsl_dataset_rele(ds, FTAG); + dsl_dataset_rele_flags(ds, dsflags, FTAG); - VERIFY0(dsl_dataset_own_obj(dp, dsobj, dmu_recv_tag, &ds)); + VERIFY0(dsl_dataset_own_obj(dp, dsobj, dsflags, dmu_recv_tag, &ds)); + VERIFY0(dmu_objset_from_ds(ds, &os)); dmu_buf_will_dirty(ds->ds_dbuf, tx); dsl_dataset_phys(ds)->ds_flags |= DS_FLAG_INCONSISTENT; rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG); - ASSERT(!BP_IS_HOLE(dsl_dataset_get_blkptr(ds))); + ASSERT(!BP_IS_HOLE(dsl_dataset_get_blkptr(ds)) || + drba->drba_cookie->drc_raw); rrw_exit(&ds->ds_bp_rwlock, FTAG); drba->drba_cookie->drc_ds = ds; @@ -596,6 +745,9 @@ dmu_recv_begin(char *tofs, char *tosnap, dmu_replay_record_t *drr_begin, return (SET_ERROR(EINVAL)); } + if (drc->drc_drrb->drr_flags & DRR_FLAG_SPILL_BLOCK) + drc->drc_spill = B_TRUE; + drba.drba_origin = origin; drba.drba_cookie = drc; drba.drba_cred = CRED(); @@ -619,7 +771,7 @@ struct receive_record_arg { * If the record is a write, pointer to the arc_buf_t containing the * payload. */ - arc_buf_t *write_buf; + arc_buf_t *arc_buf; int payload_size; uint64_t bytes_read; /* bytes read from stream when record created */ boolean_t eos_marker; /* Marks the end of the stream */ @@ -643,10 +795,21 @@ struct receive_writer_arg { /* A map from guid to dataset to help handle dedup'd streams. */ avl_tree_t *guid_to_ds_map; boolean_t resumable; + boolean_t raw; /* DMU_BACKUP_FEATURE_RAW set */ + boolean_t spill; /* DRR_FLAG_SPILL_BLOCK set */ uint64_t last_object; uint64_t last_offset; uint64_t max_object; /* highest object ID referenced in stream */ uint64_t bytes_read; /* bytes read when current record created */ + + /* Encryption parameters for the last received DRR_OBJECT_RANGE */ + boolean_t or_crypt_params_present; + uint64_t or_firstobj; + uint64_t or_numslots; + uint8_t or_salt[ZIO_DATA_SALT_LEN]; + uint8_t or_iv[ZIO_DATA_IV_LEN]; + uint8_t or_mac[ZIO_DATA_MAC_LEN]; + boolean_t or_byteorder; }; struct objlist { @@ -679,12 +842,15 @@ struct receive_arg { zio_cksum_t prev_cksum; int err; boolean_t byteswap; + boolean_t raw; + uint64_t featureflags; /* Sorted list of objects not to issue prefetches for. */ struct objlist ignore_objlist; }; typedef struct guid_map_entry { uint64_t guid; + boolean_t raw; dsl_dataset_t *gme_ds; avl_node_t avlnode; } guid_map_entry_t; @@ -710,8 +876,14 @@ free_guid_map_onexit(void *arg) guid_map_entry_t *gmep; while ((gmep = avl_destroy_nodes(ca, &cookie)) != NULL) { - dsl_dataset_long_rele(gmep->gme_ds, gmep); - dsl_dataset_rele(gmep->gme_ds, gmep); + ds_hold_flags_t dsflags = DS_HOLD_FLAG_DECRYPT; + + if (gmep->raw) { + gmep->gme_ds->ds_objset->os_raw_receive = B_FALSE; + dsflags &= ~DS_HOLD_FLAG_DECRYPT; + } + + dsl_dataset_disown(gmep->gme_ds, dsflags, gmep); kmem_free(gmep, sizeof (guid_map_entry_t)); } avl_destroy(ca); @@ -727,7 +899,8 @@ receive_read(struct receive_arg *ra, int len, void *buf) * The code doesn't rely on this (lengths being multiples of 8). See * comment in dump_bytes. */ - ASSERT0(len % 8); + ASSERT(len % 8 == 0 || + (ra->featureflags & DMU_BACKUP_FEATURE_RAW) != 0); while (done < len) { ssize_t resid; @@ -780,7 +953,9 @@ byteswap_record(dmu_replay_record_t *drr) DO32(drr_object.drr_bonustype); DO32(drr_object.drr_blksz); DO32(drr_object.drr_bonuslen); + DO32(drr_object.drr_raw_bonuslen); DO64(drr_object.drr_toguid); + DO64(drr_object.drr_maxblkid); break; case DRR_FREEOBJECTS: DO64(drr_freeobjects.drr_firstobj); @@ -827,6 +1002,13 @@ byteswap_record(dmu_replay_record_t *drr) DO64(drr_spill.drr_object); DO64(drr_spill.drr_length); DO64(drr_spill.drr_toguid); + DO64(drr_spill.drr_compressed_size); + DO32(drr_spill.drr_type); + break; + case DRR_OBJECT_RANGE: + DO64(drr_object_range.drr_firstobj); + DO64(drr_object_range.drr_numslots); + DO64(drr_object_range.drr_toguid); break; case DRR_END: DO64(drr_end.drr_toguid); @@ -891,6 +1073,8 @@ save_resume_state(struct receive_writer_arg *rwa, rwa->os->os_dsl_dataset->ds_resume_bytes[txgoff] = rwa->bytes_read; } +int receive_object_delay_frac = 0; + static int receive_object(struct receive_writer_arg *rwa, struct drr_object *drro, void *data) @@ -902,6 +1086,10 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro, uint8_t dn_slots = drro->drr_dn_slots != 0 ? drro->drr_dn_slots : DNODE_MIN_SLOTS; + if (receive_object_delay_frac != 0 && + spa_get_random(receive_object_delay_frac) == 0) + delay(1); + if (drro->drr_type == DMU_OT_NONE || !DMU_OT_IS_VALID(drro->drr_type) || !DMU_OT_IS_VALID(drro->drr_bonustype) || @@ -917,6 +1105,37 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro, return (SET_ERROR(EINVAL)); } + if (rwa->raw) { + /* + * We should have received a DRR_OBJECT_RANGE record + * containing this block and stored it in rwa. + */ + if (drro->drr_object < rwa->or_firstobj || + drro->drr_object >= rwa->or_firstobj + rwa->or_numslots || + drro->drr_raw_bonuslen < drro->drr_bonuslen || + drro->drr_indblkshift > SPA_MAXBLOCKSHIFT || + drro->drr_nlevels > DN_MAX_LEVELS || + drro->drr_nblkptr > DN_MAX_NBLKPTR || + DN_SLOTS_TO_BONUSLEN(drro->drr_dn_slots) < + drro->drr_raw_bonuslen) + return (SET_ERROR(EINVAL)); + } else { + + /* + * The DRR_OBJECT_SPILL flag is valid when the DRR_BEGIN + * record indicates this by setting DRR_FLAG_SPILL_BLOCK. + */ + if (((drro->drr_flags & ~(DRR_OBJECT_SPILL))) || + (!rwa->spill && DRR_OBJECT_HAS_SPILL(drro->drr_flags))) { + return (SET_ERROR(EINVAL)); + } + + if (drro->drr_raw_bonuslen != 0 || drro->drr_nblkptr != 0 || + drro->drr_indblkshift != 0 || drro->drr_nlevels != 0) { + return (SET_ERROR(EINVAL)); + } + } + err = dmu_object_info(rwa->os, drro->drr_object, &doi); if (err != 0 && err != ENOENT && err != EEXIST) @@ -929,20 +1148,86 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro, * If we are losing blkptrs or changing the block size this must * be a new file instance. We must clear out the previous file * contents before we can change this type of metadata in the dnode. + * Raw receives will also check that the indirect structure of the + * dnode hasn't changed. */ if (err == 0) { - int nblkptr; + uint32_t indblksz = drro->drr_indblkshift ? + 1ULL << drro->drr_indblkshift : 0; + int nblkptr = deduce_nblkptr(drro->drr_bonustype, + drro->drr_bonuslen); + boolean_t did_free = B_FALSE; object = drro->drr_object; - nblkptr = deduce_nblkptr(drro->drr_bonustype, - drro->drr_bonuslen); + /* nblkptr should be bounded by the bonus size and type */ + if (rwa->raw && nblkptr != drro->drr_nblkptr) + return (SET_ERROR(EINVAL)); + /* + * Check for indicators that the object was freed and + * reallocated. For all sends, these indicators are: + * - A changed block size + * - A smaller nblkptr + * - A changed dnode size + * For raw sends we also check a few other fields to + * ensure we are preserving the objset structure exactly + * as it was on the receive side: + * - A changed indirect block size + * - A smaller nlevels + */ if (drro->drr_blksz != doi.doi_data_block_size || nblkptr < doi.doi_nblkptr || + dn_slots != doi.doi_dnodesize >> DNODE_SHIFT || + (rwa->raw && + (indblksz != doi.doi_metadata_block_size || + drro->drr_nlevels < doi.doi_indirection))) { + err = dmu_free_long_range(rwa->os, + drro->drr_object, 0, DMU_OBJECT_END); + if (err != 0) + return (SET_ERROR(EINVAL)); + else + did_free = B_TRUE; + } + + /* + * The dmu does not currently support decreasing nlevels + * or changing the number of dnode slots on an object. For + * non-raw sends, this does not matter and the new object + * can just use the previous one's nlevels. For raw sends, + * however, the structure of the received dnode (including + * nlevels and dnode slots) must match that of the send + * side. Therefore, instead of using dmu_object_reclaim(), + * we must free the object completely and call + * dmu_object_claim_dnsize() instead. + */ + if ((rwa->raw && drro->drr_nlevels < doi.doi_indirection) || dn_slots != doi.doi_dnodesize >> DNODE_SHIFT) { + err = dmu_free_long_object(rwa->os, drro->drr_object); + if (err != 0) + return (SET_ERROR(EINVAL)); + + txg_wait_synced(dmu_objset_pool(rwa->os), 0); + object = DMU_NEW_OBJECT; + } + + /* + * For raw receives, free everything beyond the new incoming + * maxblkid. Normally this would be done with a DRR_FREE + * record that would come after this DRR_OBJECT record is + * processed. However, for raw receives we manually set the + * maxblkid from the drr_maxblkid and so we must first free + * everything above that blkid to ensure the DMU is always + * consistent with itself. We will never free the first block + * of the object here because a maxblkid of 0 could indicate + * an object with a single block or one with no blocks. This + * free may be skipped when dmu_free_long_range() was called + * above since it covers the entire object's contents. + */ + if (rwa->raw && object != DMU_NEW_OBJECT && !did_free) { err = dmu_free_long_range(rwa->os, drro->drr_object, - 0, DMU_OBJECT_END); + (drro->drr_maxblkid + 1) * doi.doi_data_block_size, + DMU_OBJECT_END); if (err != 0) return (SET_ERROR(EINVAL)); } @@ -955,7 +1240,11 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro, * earlier in the stream. */ txg_wait_synced(dmu_objset_pool(rwa->os), 0); - object = drro->drr_object; + if (dmu_object_info(rwa->os, drro->drr_object, NULL) != ENOENT) + return (SET_ERROR(EINVAL)); + + /* object was freed and we are about to allocate a new one */ + object = DMU_NEW_OBJECT; } else { /* object is free and we are about to allocate a new one */ object = DMU_NEW_OBJECT; @@ -995,6 +1284,7 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro, tx = dmu_tx_create(rwa->os); dmu_tx_hold_bonus(tx, object); + dmu_tx_hold_write(tx, object, 0, 0); err = dmu_tx_assign(tx, TXG_WAIT); if (err != 0) { dmu_tx_abort(tx); @@ -1002,7 +1292,7 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro, } if (object == DMU_NEW_OBJECT) { - /* currently free, want to be allocated */ + /* Currently free, wants to be allocated */ err = dmu_object_claim_dnsize(rwa->os, drro->drr_object, drro->drr_type, drro->drr_blksz, drro->drr_bonustype, drro->drr_bonuslen, @@ -1010,39 +1300,116 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro, } else if (drro->drr_type != doi.doi_type || drro->drr_blksz != doi.doi_data_block_size || drro->drr_bonustype != doi.doi_bonus_type || - drro->drr_bonuslen != doi.doi_bonus_size || - drro->drr_dn_slots != (doi.doi_dnodesize >> DNODE_SHIFT)) { - /* currently allocated, but with different properties */ + drro->drr_bonuslen != doi.doi_bonus_size) { + /* Currently allocated, but with different properties */ err = dmu_object_reclaim_dnsize(rwa->os, drro->drr_object, drro->drr_type, drro->drr_blksz, drro->drr_bonustype, drro->drr_bonuslen, - drro->drr_dn_slots << DNODE_SHIFT, tx); + dn_slots << DNODE_SHIFT, rwa->spill ? + DRR_OBJECT_HAS_SPILL(drro->drr_flags) : B_FALSE, tx); + } else if (rwa->spill && !DRR_OBJECT_HAS_SPILL(drro->drr_flags)) { + /* + * Currently allocated, the existing version of this object + * may reference a spill block that is no longer allocated + * at the source and needs to be freed. + */ + err = dmu_object_rm_spill(rwa->os, drro->drr_object, tx); } + if (err != 0) { dmu_tx_commit(tx); return (SET_ERROR(EINVAL)); } + if (rwa->or_crypt_params_present) { + /* + * Set the crypt params for the buffer associated with this + * range of dnodes. This causes the blkptr_t to have the + * same crypt params (byteorder, salt, iv, mac) as on the + * sending side. + * + * Since we are committing this tx now, it is possible for + * the dnode block to end up on-disk with the incorrect MAC, + * if subsequent objects in this block are received in a + * different txg. However, since the dataset is marked as + * inconsistent, no code paths will do a non-raw read (or + * decrypt the block / verify the MAC). The receive code and + * scrub code can safely do raw reads and verify the + * checksum. They don't need to verify the MAC. + */ + dmu_buf_t *db = NULL; + uint64_t offset = rwa->or_firstobj * DNODE_MIN_SIZE; + + err = dmu_buf_hold_by_dnode(DMU_META_DNODE(rwa->os), + offset, FTAG, &db, DMU_READ_PREFETCH | DMU_READ_NO_DECRYPT); + if (err != 0) { + dmu_tx_commit(tx); + return (SET_ERROR(EINVAL)); + } + + dmu_buf_set_crypt_params(db, rwa->or_byteorder, + rwa->or_salt, rwa->or_iv, rwa->or_mac, tx); + + dmu_buf_rele(db, FTAG); + + rwa->or_crypt_params_present = B_FALSE; + } + dmu_object_set_checksum(rwa->os, drro->drr_object, drro->drr_checksumtype, tx); dmu_object_set_compress(rwa->os, drro->drr_object, drro->drr_compress, tx); + /* handle more restrictive dnode structuring for raw recvs */ + if (rwa->raw) { + /* + * Set the indirect block size, block shift, nlevels. + * This will not fail because we ensured all of the + * blocks were freed earlier if this is a new object. + * For non-new objects block size and indirect block + * shift cannot change and nlevels can only increase. + */ + VERIFY0(dmu_object_set_blocksize(rwa->os, drro->drr_object, + drro->drr_blksz, drro->drr_indblkshift, tx)); + VERIFY0(dmu_object_set_nlevels(rwa->os, drro->drr_object, + drro->drr_nlevels, tx)); + + /* + * Set the maxblkid. This will always succeed because + * we freed all blocks beyond the new maxblkid above. + */ + VERIFY0(dmu_object_set_maxblkid(rwa->os, drro->drr_object, + drro->drr_maxblkid, tx)); + } + if (data != NULL) { dmu_buf_t *db; + dnode_t *dn; + uint32_t flags = DMU_READ_NO_PREFETCH; + + if (rwa->raw) + flags |= DMU_READ_NO_DECRYPT; + + VERIFY0(dnode_hold(rwa->os, drro->drr_object, FTAG, &dn)); + VERIFY0(dmu_bonus_hold_by_dnode(dn, FTAG, &db, flags)); - VERIFY0(dmu_bonus_hold(rwa->os, drro->drr_object, FTAG, &db)); dmu_buf_will_dirty(db, tx); ASSERT3U(db->db_size, >=, drro->drr_bonuslen); - bcopy(data, db->db_data, drro->drr_bonuslen); - if (rwa->byteswap) { + bcopy(data, db->db_data, DRR_OBJECT_PAYLOAD_SIZE(drro)); + + /* + * Raw bonus buffers have their byteorder determined by the + * DRR_OBJECT_RANGE record. + */ + if (rwa->byteswap && !rwa->raw) { dmu_object_byteswap_t byteswap = DMU_OT_BYTESWAP(drro->drr_bonustype); dmu_ot_byteswap[byteswap].ob_func(db->db_data, - drro->drr_bonuslen); + DRR_OBJECT_PAYLOAD_SIZE(drro)); } dmu_buf_rele(db, FTAG); + dnode_rele(dn, FTAG); } dmu_tx_commit(tx); @@ -1063,15 +1430,17 @@ receive_freeobjects(struct receive_writer_arg *rwa, for (obj = drrfo->drr_firstobj == 0 ? 1 : drrfo->drr_firstobj; obj < drrfo->drr_firstobj + drrfo->drr_numobjs && next_err == 0; next_err = dmu_object_next(rwa->os, &obj, FALSE, 0)) { + dmu_object_info_t doi; int err; - err = dmu_object_info(rwa->os, obj, NULL); + err = dmu_object_info(rwa->os, obj, &doi); if (err == ENOENT) continue; else if (err != 0) return (err); err = dmu_free_long_object(rwa->os, obj); + if (err != 0) return (err); @@ -1087,8 +1456,9 @@ static int receive_write(struct receive_writer_arg *rwa, struct drr_write *drrw, arc_buf_t *abuf) { - dmu_tx_t *tx; int err; + dmu_tx_t *tx; + dnode_t *dn; if (drrw->drr_offset + drrw->drr_logical_size < drrw->drr_offset || !DMU_OT_IS_VALID(drrw->drr_type)) @@ -1113,7 +1483,6 @@ receive_write(struct receive_writer_arg *rwa, struct drr_write *drrw, return (SET_ERROR(EINVAL)); tx = dmu_tx_create(rwa->os); - dmu_tx_hold_write(tx, drrw->drr_object, drrw->drr_offset, drrw->drr_logical_size); err = dmu_tx_assign(tx, TXG_WAIT); @@ -1121,18 +1490,23 @@ receive_write(struct receive_writer_arg *rwa, struct drr_write *drrw, dmu_tx_abort(tx); return (err); } - if (rwa->byteswap) { + + if (rwa->byteswap && !arc_is_encrypted(abuf) && + arc_get_compression(abuf) == ZIO_COMPRESS_OFF) { dmu_object_byteswap_t byteswap = DMU_OT_BYTESWAP(drrw->drr_type); dmu_ot_byteswap[byteswap].ob_func(abuf->b_data, DRR_WRITE_PAYLOAD_SIZE(drrw)); } - /* use the bonus buf to look up the dnode in dmu_assign_arcbuf */ - dmu_buf_t *bonus; - if (dmu_bonus_hold(rwa->os, drrw->drr_object, FTAG, &bonus) != 0) - return (SET_ERROR(EINVAL)); - dmu_assign_arcbuf(bonus, drrw->drr_offset, abuf, tx); + VERIFY0(dnode_hold(rwa->os, drrw->drr_object, FTAG, &dn)); + err = dmu_assign_arcbuf_by_dnode(dn, drrw->drr_offset, abuf, tx); + if (err != 0) { + dnode_rele(dn, FTAG); + dmu_tx_commit(tx); + return (err); + } + dnode_rele(dn, FTAG); /* * Note: If the receive fails, we want the resume stream to start @@ -1142,7 +1516,6 @@ receive_write(struct receive_writer_arg *rwa, struct drr_write *drrw, */ save_resume_state(rwa, drrw->drr_object, drrw->drr_offset, tx); dmu_tx_commit(tx); - dmu_buf_rele(bonus, FTAG); return (0); } @@ -1164,6 +1537,7 @@ receive_write_byref(struct receive_writer_arg *rwa, guid_map_entry_t *gmep; avl_index_t where; objset_t *ref_os = NULL; + int flags = DMU_READ_PREFETCH; dmu_buf_t *dbp; if (drrwbr->drr_offset + drrwbr->drr_length < drrwbr->drr_offset) @@ -1188,8 +1562,12 @@ receive_write_byref(struct receive_writer_arg *rwa, if (drrwbr->drr_object > rwa->max_object) rwa->max_object = drrwbr->drr_object; + if (rwa->raw) + flags |= DMU_READ_NO_DECRYPT; + + /* may return either a regular db or an encrypted one */ err = dmu_buf_hold(ref_os, drrwbr->drr_refobject, - drrwbr->drr_refoffset, FTAG, &dbp, DMU_READ_PREFETCH); + drrwbr->drr_refoffset, FTAG, &dbp, flags); if (err != 0) return (err); @@ -1202,8 +1580,14 @@ receive_write_byref(struct receive_writer_arg *rwa, dmu_tx_abort(tx); return (err); } - dmu_write(rwa->os, drrwbr->drr_object, - drrwbr->drr_offset, drrwbr->drr_length, dbp->db_data, tx); + + if (rwa->raw) { + dmu_copy_from_buf(rwa->os, drrwbr->drr_object, + drrwbr->drr_offset, dbp, tx); + } else { + dmu_write(rwa->os, drrwbr->drr_object, + drrwbr->drr_offset, drrwbr->drr_length, dbp->db_data, tx); + } dmu_buf_rele(dbp, FTAG); /* See comment in restore_write. */ @@ -1229,6 +1613,8 @@ receive_write_embedded(struct receive_writer_arg *rwa, return (EINVAL); if (drrwe->drr_compression >= ZIO_COMPRESS_FUNCTIONS) return (EINVAL); + if (rwa->raw) + return (SET_ERROR(EINVAL)); if (drrwe->drr_object > rwa->max_object) rwa->max_object = drrwe->drr_object; @@ -1256,16 +1642,37 @@ receive_write_embedded(struct receive_writer_arg *rwa, static int receive_spill(struct receive_writer_arg *rwa, struct drr_spill *drrs, - void *data) + arc_buf_t *abuf) { dmu_tx_t *tx; dmu_buf_t *db, *db_spill; int err; + uint32_t flags = 0; if (drrs->drr_length < SPA_MINBLOCKSIZE || drrs->drr_length > spa_maxblocksize(dmu_objset_spa(rwa->os))) return (SET_ERROR(EINVAL)); + /* + * This is an unmodified spill block which was added to the stream + * to resolve an issue with incorrectly removing spill blocks. It + * should be ignored by current versions of the code which support + * the DRR_FLAG_SPILL_BLOCK flag. + */ + if (rwa->spill && DRR_SPILL_IS_UNMODIFIED(drrs->drr_flags)) { + dmu_return_arcbuf(abuf); + return (0); + } + + if (rwa->raw) { + if (!DMU_OT_IS_VALID(drrs->drr_type) || + drrs->drr_compressiontype >= ZIO_COMPRESS_FUNCTIONS || + drrs->drr_compressed_size == 0) + return (SET_ERROR(EINVAL)); + + flags |= DMU_READ_NO_DECRYPT; + } + if (dmu_object_info(rwa->os, drrs->drr_object, NULL) != 0) return (SET_ERROR(EINVAL)); @@ -1273,7 +1680,8 @@ receive_spill(struct receive_writer_arg *rwa, struct drr_spill *drrs, rwa->max_object = drrs->drr_object; VERIFY0(dmu_bonus_hold(rwa->os, drrs->drr_object, FTAG, &db)); - if ((err = dmu_spill_hold_by_bonus(db, FTAG, &db_spill)) != 0) { + if ((err = dmu_spill_hold_by_bonus(db, DMU_READ_NO_DECRYPT, FTAG, + &db_spill)) != 0) { dmu_buf_rele(db, FTAG); return (err); } @@ -1289,12 +1697,27 @@ receive_spill(struct receive_writer_arg *rwa, struct drr_spill *drrs, dmu_tx_abort(tx); return (err); } - dmu_buf_will_dirty(db_spill, tx); - if (db_spill->db_size < drrs->drr_length) + /* + * Spill blocks may both grow and shrink. When a change in size + * occurs any existing dbuf must be updated to match the logical + * size of the provided arc_buf_t. + */ + if (db_spill->db_size != drrs->drr_length) { + dmu_buf_will_fill(db_spill, tx); VERIFY(0 == dbuf_spill_set_blksz(db_spill, drrs->drr_length, tx)); - bcopy(data, db_spill->db_data, drrs->drr_length); + } + + if (rwa->byteswap && !arc_is_encrypted(abuf) && + arc_get_compression(abuf) == ZIO_COMPRESS_OFF) { + dmu_object_byteswap_t byteswap = + DMU_OT_BYTESWAP(drrs->drr_type); + dmu_ot_byteswap[byteswap].ob_func(abuf->b_data, + DRR_SPILL_PAYLOAD_SIZE(drrs)); + } + + dbuf_assign_arcbuf((dmu_buf_impl_t *)db_spill, abuf, tx); dmu_buf_rele(db, FTAG); dmu_buf_rele(db_spill, FTAG); @@ -1309,7 +1732,7 @@ receive_free(struct receive_writer_arg *rwa, struct drr_free *drrf) { int err; - if (drrf->drr_length != -1ULL && + if (drrf->drr_length != DMU_OBJECT_END && drrf->drr_offset + drrf->drr_length < drrf->drr_offset) return (SET_ERROR(EINVAL)); @@ -1325,18 +1748,81 @@ receive_free(struct receive_writer_arg *rwa, struct drr_free *drrf) return (err); } +static int +receive_object_range(struct receive_writer_arg *rwa, + struct drr_object_range *drror) +{ + /* + * By default, we assume this block is in our native format + * (ZFS_HOST_BYTEORDER). We then take into account whether + * the send stream is byteswapped (rwa->byteswap). Finally, + * we need to byteswap again if this particular block was + * in non-native format on the send side. + */ + boolean_t byteorder = ZFS_HOST_BYTEORDER ^ rwa->byteswap ^ + !!DRR_IS_RAW_BYTESWAPPED(drror->drr_flags); + + /* + * Since dnode block sizes are constant, we should not need to worry + * about making sure that the dnode block size is the same on the + * sending and receiving sides for the time being. For non-raw sends, + * this does not matter (and in fact we do not send a DRR_OBJECT_RANGE + * record at all). Raw sends require this record type because the + * encryption parameters are used to protect an entire block of bonus + * buffers. If the size of dnode blocks ever becomes variable, + * handling will need to be added to ensure that dnode block sizes + * match on the sending and receiving side. + */ + if (drror->drr_numslots != DNODES_PER_BLOCK || + P2PHASE(drror->drr_firstobj, DNODES_PER_BLOCK) != 0 || + !rwa->raw) + return (SET_ERROR(EINVAL)); + + if (drror->drr_firstobj > rwa->max_object) + rwa->max_object = drror->drr_firstobj; + + /* + * The DRR_OBJECT_RANGE handling must be deferred to receive_object() + * so that the block of dnodes is not written out when it's empty, + * and converted to a HOLE BP. + */ + rwa->or_crypt_params_present = B_TRUE; + rwa->or_firstobj = drror->drr_firstobj; + rwa->or_numslots = drror->drr_numslots; + bcopy(drror->drr_salt, rwa->or_salt, ZIO_DATA_SALT_LEN); + bcopy(drror->drr_iv, rwa->or_iv, ZIO_DATA_IV_LEN); + bcopy(drror->drr_mac, rwa->or_mac, ZIO_DATA_MAC_LEN); + rwa->or_byteorder = byteorder; + + return (0); +} + /* used to destroy the drc_ds on error */ static void dmu_recv_cleanup_ds(dmu_recv_cookie_t *drc) { - if (drc->drc_resumable) { - /* wait for our resume state to be written to disk */ - txg_wait_synced(drc->drc_ds->ds_dir->dd_pool, 0); - dsl_dataset_disown(drc->drc_ds, dmu_recv_tag); + dsl_dataset_t *ds = drc->drc_ds; + ds_hold_flags_t dsflags = (drc->drc_raw) ? 0 : DS_HOLD_FLAG_DECRYPT; + + /* + * Wait for the txg sync before cleaning up the receive. For + * resumable receives, this ensures that our resume state has + * been written out to disk. For raw receives, this ensures + * that the user accounting code will not attempt to do anything + * after we stopped receiving the dataset. + */ + txg_wait_synced(ds->ds_dir->dd_pool, 0); + ds->ds_objset->os_raw_receive = B_FALSE; + + rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG); + if (drc->drc_resumable && !BP_IS_HOLE(dsl_dataset_get_blkptr(ds))) { + rrw_exit(&ds->ds_bp_rwlock, FTAG); + dsl_dataset_disown(ds, dsflags, dmu_recv_tag); } else { char name[ZFS_MAX_DATASET_NAME_LEN]; - dsl_dataset_name(drc->drc_ds, name); - dsl_dataset_disown(drc->drc_ds, dmu_recv_tag); + rrw_exit(&ds->ds_bp_rwlock, FTAG); + dsl_dataset_name(ds, name); + dsl_dataset_disown(ds, dsflags, dmu_recv_tag); (void) dsl_destroy_head(name); } } @@ -1384,6 +1870,7 @@ receive_read_payload_and_next_header(struct receive_arg *ra, int len, void *buf) err = receive_read(ra, sizeof (ra->next_rrd->header), &ra->next_rrd->header); ra->next_rrd->bytes_read = ra->bytes_read; + if (err != 0) { kmem_free(ra->next_rrd, sizeof (*ra->next_rrd)); ra->next_rrd = NULL; @@ -1525,9 +2012,13 @@ receive_read_record(struct receive_arg *ra) case DRR_OBJECT: { struct drr_object *drro = &ra->rrd->header.drr_u.drr_object; - uint32_t size = P2ROUNDUP(drro->drr_bonuslen, 8); - void *buf = kmem_zalloc(size, KM_SLEEP); + uint32_t size = DRR_OBJECT_PAYLOAD_SIZE(drro); + void *buf = NULL; dmu_object_info_t doi; + + if (size != 0) + buf = kmem_zalloc(size, KM_SLEEP); + err = receive_read_payload_and_next_header(ra, size, buf); if (err != 0) { kmem_free(buf, size); @@ -1538,7 +2029,7 @@ receive_read_record(struct receive_arg *ra) * See receive_read_prefetch for an explanation why we're * storing this object in the ignore_obj_list. */ - if (err == ENOENT || + if (err == ENOENT || err == EEXIST || (err == 0 && doi.doi_data_block_size != drro->drr_blksz)) { objlist_insert(&ra->ignore_objlist, drro->drr_object); err = 0; @@ -1555,7 +2046,18 @@ receive_read_record(struct receive_arg *ra) struct drr_write *drrw = &ra->rrd->header.drr_u.drr_write; arc_buf_t *abuf; boolean_t is_meta = DMU_OT_IS_METADATA(drrw->drr_type); - if (DRR_WRITE_COMPRESSED(drrw)) { + + if (ra->raw) { + boolean_t byteorder = ZFS_HOST_BYTEORDER ^ + !!DRR_IS_RAW_BYTESWAPPED(drrw->drr_flags) ^ + ra->byteswap; + + abuf = arc_loan_raw_buf(dmu_objset_spa(ra->os), + drrw->drr_object, byteorder, drrw->drr_salt, + drrw->drr_iv, drrw->drr_mac, drrw->drr_type, + drrw->drr_compressed_size, drrw->drr_logical_size, + drrw->drr_compressiontype); + } else if (DRR_WRITE_COMPRESSED(drrw)) { ASSERT3U(drrw->drr_compressed_size, >, 0); ASSERT3U(drrw->drr_logical_size, >=, drrw->drr_compressed_size); @@ -1575,7 +2077,7 @@ receive_read_record(struct receive_arg *ra) dmu_return_arcbuf(abuf); return (err); } - ra->rrd->write_buf = abuf; + ra->rrd->arc_buf = abuf; receive_read_prefetch(ra, drrw->drr_object, drrw->drr_offset, drrw->drr_logical_size); return (err); @@ -1625,11 +2127,38 @@ receive_read_record(struct receive_arg *ra) case DRR_SPILL: { struct drr_spill *drrs = &ra->rrd->header.drr_u.drr_spill; - void *buf = kmem_zalloc(drrs->drr_length, KM_SLEEP); - err = receive_read_payload_and_next_header(ra, drrs->drr_length, - buf); - if (err != 0) - kmem_free(buf, drrs->drr_length); + arc_buf_t *abuf; + int len = DRR_SPILL_PAYLOAD_SIZE(drrs); + + /* DRR_SPILL records are either raw or uncompressed */ + if (ra->raw) { + boolean_t byteorder = ZFS_HOST_BYTEORDER ^ + !!DRR_IS_RAW_BYTESWAPPED(drrs->drr_flags) ^ + ra->byteswap; + + abuf = arc_loan_raw_buf(dmu_objset_spa(ra->os), + dmu_objset_id(ra->os), byteorder, drrs->drr_salt, + drrs->drr_iv, drrs->drr_mac, drrs->drr_type, + drrs->drr_compressed_size, drrs->drr_length, + drrs->drr_compressiontype); + } else { + abuf = arc_loan_buf(dmu_objset_spa(ra->os), + DMU_OT_IS_METADATA(drrs->drr_type), + drrs->drr_length); + } + + err = receive_read_payload_and_next_header(ra, len, + abuf->b_data); + if (err != 0) { + dmu_return_arcbuf(abuf); + return (err); + } + ra->rrd->arc_buf = abuf; + return (err); + } + case DRR_OBJECT_RANGE: + { + err = receive_read_payload_and_next_header(ra, 0, NULL); return (err); } default: @@ -1668,11 +2197,11 @@ receive_process_record(struct receive_writer_arg *rwa, case DRR_WRITE: { struct drr_write *drrw = &rrd->header.drr_u.drr_write; - err = receive_write(rwa, drrw, rrd->write_buf); + err = receive_write(rwa, drrw, rrd->arc_buf); /* if receive_write() is successful, it consumes the arc_buf */ if (err != 0) - dmu_return_arcbuf(rrd->write_buf); - rrd->write_buf = NULL; + dmu_return_arcbuf(rrd->arc_buf); + rrd->arc_buf = NULL; rrd->payload = NULL; return (err); } @@ -1699,11 +2228,20 @@ receive_process_record(struct receive_writer_arg *rwa, case DRR_SPILL: { struct drr_spill *drrs = &rrd->header.drr_u.drr_spill; - err = receive_spill(rwa, drrs, rrd->payload); - kmem_free(rrd->payload, rrd->payload_size); + err = receive_spill(rwa, drrs, rrd->arc_buf); + /* if receive_spill() is successful, it consumes the arc_buf */ + if (err != 0) + dmu_return_arcbuf(rrd->arc_buf); + rrd->arc_buf = NULL; rrd->payload = NULL; return (err); } + case DRR_OBJECT_RANGE: + { + struct drr_object_range *drror = + &rrd->header.drr_u.drr_object_range; + return (receive_object_range(rwa, drror)); + } default: return (SET_ERROR(EINVAL)); } @@ -1727,9 +2265,9 @@ receive_writer_thread(void *arg) */ if (rwa->err == 0) { rwa->err = receive_process_record(rwa, rrd); - } else if (rrd->write_buf != NULL) { - dmu_return_arcbuf(rrd->write_buf); - rrd->write_buf = NULL; + } else if (rrd->arc_buf != NULL) { + dmu_return_arcbuf(rrd->arc_buf); + rrd->arc_buf = NULL; rrd->payload = NULL; } else if (rrd->payload != NULL) { kmem_free(rrd->payload, rrd->payload_size); @@ -1794,6 +2332,7 @@ dmu_recv_stream(dmu_recv_cookie_t *drc, vnode_t *vp, offset_t *voffp, nvlist_t *begin_nvl = NULL; ra.byteswap = drc->drc_byteswap; + ra.raw = drc->drc_raw; ra.cksum = drc->drc_cksum; ra.vp = vp; ra.voff = *voffp; @@ -1819,17 +2358,21 @@ dmu_recv_stream(dmu_recv_cookie_t *drc, vnode_t *vp, offset_t *voffp, ASSERT(dsl_dataset_phys(drc->drc_ds)->ds_flags & DS_FLAG_INCONSISTENT); featureflags = DMU_GET_FEATUREFLAGS(drc->drc_drrb->drr_versioninfo); + ra.featureflags = featureflags; + + ASSERT0(ra.os->os_encrypted && + (featureflags & DMU_BACKUP_FEATURE_EMBED_DATA)); /* if this stream is dedup'ed, set up the avl tree for guid mapping */ if (featureflags & DMU_BACKUP_FEATURE_DEDUP) { minor_t minor; if (cleanup_fd == -1) { - ra.err = SET_ERROR(EBADF); + err = SET_ERROR(EBADF); goto out; } - ra.err = zfs_onexit_fd_hold(cleanup_fd, &minor); - if (ra.err != 0) { + err = zfs_onexit_fd_hold(cleanup_fd, &minor); + if (err != 0) { cleanup_fd = -1; goto out; } @@ -1843,12 +2386,12 @@ dmu_recv_stream(dmu_recv_cookie_t *drc, vnode_t *vp, offset_t *voffp, err = zfs_onexit_add_cb(minor, free_guid_map_onexit, rwa.guid_to_ds_map, action_handlep); - if (ra.err != 0) + if (err != 0) goto out; } else { err = zfs_onexit_cb_data(minor, *action_handlep, (void **)&rwa.guid_to_ds_map); - if (ra.err != 0) + if (err != 0) goto out; } @@ -1873,6 +2416,38 @@ dmu_recv_stream(dmu_recv_cookie_t *drc, vnode_t *vp, offset_t *voffp, goto out; } + /* handle DSL encryption key payload */ + if (featureflags & DMU_BACKUP_FEATURE_RAW) { + nvlist_t *keynvl = NULL; + + ASSERT(ra.os->os_encrypted); + ASSERT(drc->drc_raw); + + err = nvlist_lookup_nvlist(begin_nvl, "crypt_keydata", &keynvl); + if (err != 0) + goto out; + + /* + * If this is a new dataset we set the key immediately. + * Otherwise we don't want to change the key until we + * are sure the rest of the receive succeeded so we stash + * the keynvl away until then. + */ + err = dsl_crypto_recv_raw(spa_name(ra.os->os_spa), + drc->drc_ds->ds_object, drc->drc_fromsnapobj, + drc->drc_drrb->drr_type, keynvl, drc->drc_newfs); + if (err != 0) + goto out; + + /* see comment in dmu_recv_end_sync() */ + drc->drc_ivset_guid = 0; + (void) nvlist_lookup_uint64(keynvl, "to_ivset_guid", + &drc->drc_ivset_guid); + + if (!drc->drc_newfs) + drc->drc_keynvl = fnvlist_dup(keynvl); + } + if (featureflags & DMU_BACKUP_FEATURE_RESUMING) { err = resume_check(&ra, begin_nvl); if (err != 0) @@ -1886,6 +2461,9 @@ dmu_recv_stream(dmu_recv_cookie_t *drc, vnode_t *vp, offset_t *voffp, rwa.os = ra.os; rwa.byteswap = drc->drc_byteswap; rwa.resumable = drc->drc_resumable; + rwa.raw = drc->drc_raw; + rwa.spill = drc->drc_spill; + rwa.os->os_raw_receive = drc->drc_raw; (void) thread_create(NULL, 0, receive_writer_thread, &rwa, 0, curproc, TS_RUN, minclsyspri); @@ -1926,10 +2504,10 @@ dmu_recv_stream(dmu_recv_cookie_t *drc, vnode_t *vp, offset_t *voffp, sizeof (struct receive_record_arg) + ra.rrd->payload_size); ra.rrd = NULL; } - if (ra.next_rrd == NULL) - ra.next_rrd = kmem_zalloc(sizeof (*ra.next_rrd), KM_SLEEP); - ra.next_rrd->eos_marker = B_TRUE; - bqueue_enqueue(&rwa.q, ra.next_rrd, 1); + ASSERT3P(ra.rrd, ==, NULL); + ra.rrd = kmem_zalloc(sizeof (*ra.rrd), KM_SLEEP); + ra.rrd->eos_marker = B_TRUE; + bqueue_enqueue(&rwa.q, ra.rrd, 1); mutex_enter(&rwa.mutex); while (!rwa.done) { @@ -1979,6 +2557,14 @@ dmu_recv_stream(dmu_recv_cookie_t *drc, vnode_t *vp, offset_t *voffp, err = rwa.err; out: + /* + * If we hit an error before we started the receive_writer_thread + * we need to clean up the next_rrd we create by processing the + * DRR_BEGIN record. + */ + if (ra.next_rrd != NULL) + kmem_free(ra.next_rrd, sizeof (*ra.next_rrd)); + nvlist_free(begin_nvl); if ((featureflags & DMU_BACKUP_FEATURE_DEDUP) && (cleanup_fd != -1)) zfs_onexit_fd_rele(cleanup_fd); @@ -1990,6 +2576,7 @@ out: * the inconsistent state. */ dmu_recv_cleanup_ds(drc); + nvlist_free(drc->drc_keynvl); } *voffp = ra.voff; @@ -2045,6 +2632,15 @@ dmu_recv_end_check(void *arg, dmu_tx_t *tx) return (error); } } + if (drc->drc_keynvl != NULL) { + error = dsl_crypto_recv_raw_key_check(drc->drc_ds, + drc->drc_keynvl, tx); + if (error != 0) { + dsl_dataset_rele(origin_head, FTAG); + return (error); + } + } + error = dsl_dataset_clone_swap_check_impl(drc->drc_ds, origin_head, drc->drc_force, drc->drc_owner, tx); if (error != 0) { @@ -2070,9 +2666,11 @@ dmu_recv_end_sync(void *arg, dmu_tx_t *tx) { dmu_recv_cookie_t *drc = arg; dsl_pool_t *dp = dmu_tx_pool(tx); + boolean_t encrypted = drc->drc_ds->ds_dir->dd_crypto_obj != 0; spa_history_log_internal_ds(drc->drc_ds, "finish receiving", tx, "snap=%s", drc->drc_tosnap); + drc->drc_ds->ds_objset->os_raw_receive = B_FALSE; if (!drc->drc_newfs) { dsl_dataset_t *origin_head; @@ -2100,8 +2698,14 @@ dmu_recv_end_sync(void *arg, dmu_tx_t *tx) dsl_dataset_rele(snap, FTAG); } } - VERIFY3P(drc->drc_ds->ds_prev, ==, - origin_head->ds_prev); + if (drc->drc_keynvl != NULL) { + dsl_crypto_recv_raw_key_sync(drc->drc_ds, + drc->drc_keynvl, tx); + nvlist_free(drc->drc_keynvl); + drc->drc_keynvl = NULL; + } + + VERIFY3P(drc->drc_ds->ds_prev, ==, origin_head->ds_prev); dsl_dataset_clone_swap_sync_impl(drc->drc_ds, origin_head, tx); @@ -2162,21 +2766,50 @@ dmu_recv_end_sync(void *arg, dmu_tx_t *tx) drc->drc_newsnapobj = dsl_dataset_phys(drc->drc_ds)->ds_prev_snap_obj; } + + /* + * If this is a raw receive, the crypt_keydata nvlist will include + * a to_ivset_guid for us to set on the new snapshot. This value + * will override the value generated by the snapshot code. However, + * this value may not be present, because older implementations of + * the raw send code did not include this value, and we are still + * allowed to receive them if the zfs_disable_ivset_guid_check + * tunable is set, in which case we will leave the newly-generated + * value. + */ + if (drc->drc_raw && drc->drc_ivset_guid != 0) { + dmu_object_zapify(dp->dp_meta_objset, drc->drc_newsnapobj, + DMU_OT_DSL_DATASET, tx); + VERIFY0(zap_update(dp->dp_meta_objset, drc->drc_newsnapobj, + DS_FIELD_IVSET_GUID, sizeof (uint64_t), 1, + &drc->drc_ivset_guid, tx)); + } + /* * Release the hold from dmu_recv_begin. This must be done before - * we return to open context, so that when we free the dataset's dnode, - * we can evict its bonus buffer. + * we return to open context, so that when we free the dataset's dnode + * we can evict its bonus buffer. Since the dataset may be destroyed + * at this point (and therefore won't have a valid pointer to the spa) + * we release the key mapping manually here while we do have a valid + * pointer, if it exists. */ - dsl_dataset_disown(drc->drc_ds, dmu_recv_tag); + if (!drc->drc_raw && encrypted) { + (void) spa_keystore_remove_mapping(dmu_tx_pool(tx)->dp_spa, + drc->drc_ds->ds_object, drc->drc_ds); + } + dsl_dataset_disown(drc->drc_ds, 0, dmu_recv_tag); drc->drc_ds = NULL; } static int -add_ds_to_guidmap(const char *name, avl_tree_t *guid_map, uint64_t snapobj) +add_ds_to_guidmap(const char *name, avl_tree_t *guid_map, uint64_t snapobj, + boolean_t raw) { dsl_pool_t *dp; dsl_dataset_t *snapds; guid_map_entry_t *gmep; + objset_t *os; + ds_hold_flags_t dsflags = (raw) ? 0 : DS_HOLD_FLAG_DECRYPT; int err; ASSERT(guid_map != NULL); @@ -2185,12 +2818,29 @@ add_ds_to_guidmap(const char *name, avl_tree_t *guid_map, uint64_t snapobj) if (err != 0) return (err); gmep = kmem_alloc(sizeof (*gmep), KM_SLEEP); - err = dsl_dataset_hold_obj(dp, snapobj, gmep, &snapds); + err = dsl_dataset_own_obj(dp, snapobj, dsflags, gmep, &snapds); if (err == 0) { + /* + * If this is a deduplicated raw send stream, we need + * to make sure that we can still read raw blocks from + * earlier datasets in the stream, so we set the + * os_raw_receive flag now. + */ + if (raw) { + err = dmu_objset_from_ds(snapds, &os); + if (err != 0) { + dsl_dataset_disown(snapds, dsflags, FTAG); + dsl_pool_rele(dp, FTAG); + kmem_free(gmep, sizeof (*gmep)); + return (err); + } + os->os_raw_receive = B_TRUE; + } + + gmep->raw = raw; gmep->guid = dsl_dataset_phys(snapds)->ds_guid; gmep->gme_ds = snapds; avl_add(guid_map, gmep); - dsl_dataset_long_hold(snapds, gmep); } else { kmem_free(gmep, sizeof (*gmep)); } @@ -2241,10 +2891,10 @@ dmu_recv_end(dmu_recv_cookie_t *drc, void *owner) if (error != 0) { dmu_recv_cleanup_ds(drc); + nvlist_free(drc->drc_keynvl); } else if (drc->drc_guid_to_ds_map != NULL) { - (void) add_ds_to_guidmap(drc->drc_tofs, - drc->drc_guid_to_ds_map, - drc->drc_newsnapobj); + (void) add_ds_to_guidmap(drc->drc_tofs, drc->drc_guid_to_ds_map, + drc->drc_newsnapobj, drc->drc_raw); } return (error); } diff --git a/usr/src/uts/common/fs/zfs/dmu_send.c b/usr/src/uts/common/fs/zfs/dmu_send.c index 6d65086079..bfc0a6f585 100644 --- a/usr/src/uts/common/fs/zfs/dmu_send.c +++ b/usr/src/uts/common/fs/zfs/dmu_send.c @@ -61,6 +61,8 @@ int zfs_send_corrupt_data = B_FALSE; int zfs_send_queue_length = 16 * 1024 * 1024; /* Set this tunable to FALSE to disable setting of DRR_FLAG_FREERECORDS */ int zfs_send_set_freerecords_bit = B_TRUE; +/* Set this tunable to FALSE is disable sending unmodified spill blocks. */ +int zfs_send_unmodified_spill_blocks = B_TRUE; /* * Use this to override the recordsize calculation for fast zfs send estimates. @@ -90,6 +92,8 @@ struct send_block_record { bqueue_node_t ln; }; +static int do_dump(dmu_sendarg_t *dsa, struct send_block_record *data); + static int dump_bytes(dmu_sendarg_t *dsp, void *buf, int len) { @@ -97,18 +101,17 @@ dump_bytes(dmu_sendarg_t *dsp, void *buf, int len) ssize_t resid; /* have to get resid to get detailed errno */ /* - * The code does not rely on this (len being a multiple of 8). We keep + * The code does not rely on len being a multiple of 8. We keep * this assertion because of the corresponding assertion in * receive_read(). Keeping this assertion ensures that we do not * inadvertently break backwards compatibility (causing the assertion - * in receive_read() to trigger on old software). - * - * Removing the assertions could be rolled into a new feature that uses - * data that isn't 8-byte aligned; if the assertions were removed, a - * feature flag would have to be added. + * in receive_read() to trigger on old software). Newer feature flags + * (such as raw send) may break this assertion since they were + * introduced after the requirement was made obsolete. */ - ASSERT0(len % 8); + ASSERT(len % 8 == 0 || + (dsp->dsa_featureflags & DMU_BACKUP_FEATURE_RAW) != 0); dsp->dsa_err = vn_rdwr(UIO_WRITE, dsp->dsa_vp, (caddr_t)buf, len, @@ -189,9 +192,6 @@ dump_free(dmu_sendarg_t *dsp, uint64_t object, uint64_t offset, (object == dsp->dsa_last_data_object && offset > dsp->dsa_last_data_offset)); - if (length != -1ULL && offset + length < offset) - length = -1ULL; - /* * If there is a pending op, but it's not PENDING_FREE, push it out, * since free block aggregation can only be done for blocks of the @@ -208,19 +208,22 @@ dump_free(dmu_sendarg_t *dsp, uint64_t object, uint64_t offset, if (dsp->dsa_pending_op == PENDING_FREE) { /* - * There should never be a PENDING_FREE if length is -1 - * (because dump_dnode is the only place where this - * function is called with a -1, and only after flushing - * any pending record). + * There should never be a PENDING_FREE if length is + * DMU_OBJECT_END (because dump_dnode is the only place where + * this function is called with a DMU_OBJECT_END, and only after + * flushing any pending record). */ - ASSERT(length != -1ULL); + ASSERT(length != DMU_OBJECT_END); /* * Check to see whether this free block can be aggregated * with pending one. */ if (drrf->drr_object == object && drrf->drr_offset + drrf->drr_length == offset) { - drrf->drr_length += length; + if (offset + length < offset) + drrf->drr_length = DMU_OBJECT_END; + else + drrf->drr_length += length; return (0); } else { /* not a continuation. Push out pending record */ @@ -234,9 +237,12 @@ dump_free(dmu_sendarg_t *dsp, uint64_t object, uint64_t offset, dsp->dsa_drr->drr_type = DRR_FREE; drrf->drr_object = object; drrf->drr_offset = offset; - drrf->drr_length = length; + if (offset + length < offset) + drrf->drr_length = DMU_OBJECT_END; + else + drrf->drr_length = length; drrf->drr_toguid = dsp->dsa_toguid; - if (length == -1ULL) { + if (length == DMU_OBJECT_END) { if (dump_record(dsp, NULL, 0) != 0) return (SET_ERROR(EINTR)); } else { @@ -247,11 +253,11 @@ dump_free(dmu_sendarg_t *dsp, uint64_t object, uint64_t offset, } static int -dump_write(dmu_sendarg_t *dsp, dmu_object_type_t type, - uint64_t object, uint64_t offset, int lsize, int psize, const blkptr_t *bp, - void *data) +dump_write(dmu_sendarg_t *dsp, dmu_object_type_t type, uint64_t object, + uint64_t offset, int lsize, int psize, const blkptr_t *bp, void *data) { uint64_t payload_size; + boolean_t raw = (dsp->dsa_featureflags & DMU_BACKUP_FEATURE_RAW); struct drr_write *drrw = &(dsp->dsa_drr->drr_u.drr_write); /* @@ -284,16 +290,36 @@ dump_write(dmu_sendarg_t *dsp, dmu_object_type_t type, drrw->drr_toguid = dsp->dsa_toguid; drrw->drr_logical_size = lsize; - /* only set the compression fields if the buf is compressed */ - if (lsize != psize) { - ASSERT(dsp->dsa_featureflags & DMU_BACKUP_FEATURE_COMPRESSED); + /* only set the compression fields if the buf is compressed or raw */ + if (raw || lsize != psize) { ASSERT(!BP_IS_EMBEDDED(bp)); - ASSERT(!BP_SHOULD_BYTESWAP(bp)); - ASSERT(!DMU_OT_IS_METADATA(BP_GET_TYPE(bp))); - ASSERT3U(BP_GET_COMPRESS(bp), !=, ZIO_COMPRESS_OFF); ASSERT3S(psize, >, 0); - ASSERT3S(lsize, >=, psize); + if (raw) { + ASSERT(BP_IS_PROTECTED(bp)); + + /* + * This is a raw protected block so we need to pass + * along everything the receiving side will need to + * interpret this block, including the byteswap, salt, + * IV, and MAC. + */ + if (BP_SHOULD_BYTESWAP(bp)) + drrw->drr_flags |= DRR_RAW_BYTESWAP; + zio_crypt_decode_params_bp(bp, drrw->drr_salt, + drrw->drr_iv); + zio_crypt_decode_mac_bp(bp, drrw->drr_mac); + } else { + /* this is a compressed block */ + ASSERT(dsp->dsa_featureflags & + DMU_BACKUP_FEATURE_COMPRESSED); + ASSERT(!BP_SHOULD_BYTESWAP(bp)); + ASSERT(!DMU_OT_IS_METADATA(BP_GET_TYPE(bp))); + ASSERT3U(BP_GET_COMPRESS(bp), !=, ZIO_COMPRESS_OFF); + ASSERT3S(lsize, >=, psize); + } + + /* set fields common to compressed and raw sends */ drrw->drr_compressiontype = BP_GET_COMPRESS(bp); drrw->drr_compressed_size = psize; payload_size = drrw->drr_compressed_size; @@ -301,22 +327,23 @@ dump_write(dmu_sendarg_t *dsp, dmu_object_type_t type, payload_size = drrw->drr_logical_size; } - if (bp == NULL || BP_IS_EMBEDDED(bp)) { + if (bp == NULL || BP_IS_EMBEDDED(bp) || (BP_IS_PROTECTED(bp) && !raw)) { /* - * There's no pre-computed checksum for partial-block - * writes or embedded BP's, so (like - * fletcher4-checkummed blocks) userland will have to - * compute a dedup-capable checksum itself. + * There's no pre-computed checksum for partial-block writes, + * embedded BP's, or encrypted BP's that are being sent as + * plaintext, so (like fletcher4-checkummed blocks) userland + * will have to compute a dedup-capable checksum itself. */ drrw->drr_checksumtype = ZIO_CHECKSUM_OFF; } else { drrw->drr_checksumtype = BP_GET_CHECKSUM(bp); if (zio_checksum_table[drrw->drr_checksumtype].ci_flags & ZCHECKSUM_FLAG_DEDUP) - drrw->drr_checksumflags |= DRR_CHECKSUM_DEDUP; + drrw->drr_flags |= DRR_CHECKSUM_DEDUP; DDK_SET_LSIZE(&drrw->drr_key, BP_GET_LSIZE(bp)); DDK_SET_PSIZE(&drrw->drr_key, BP_GET_PSIZE(bp)); DDK_SET_COMPRESS(&drrw->drr_key, BP_GET_COMPRESS(bp)); + DDK_SET_CRYPT(&drrw->drr_key, BP_IS_PROTECTED(bp)); drrw->drr_key.ddk_cksum = bp->blk_cksum; } @@ -360,9 +387,11 @@ dump_write_embedded(dmu_sendarg_t *dsp, uint64_t object, uint64_t offset, } static int -dump_spill(dmu_sendarg_t *dsp, uint64_t object, int blksz, void *data) +dump_spill(dmu_sendarg_t *dsp, const blkptr_t *bp, uint64_t object, void *data) { struct drr_spill *drrs = &(dsp->dsa_drr->drr_u.drr_spill); + uint64_t blksz = BP_GET_LSIZE(bp); + uint64_t payload_size = blksz; if (dsp->dsa_pending_op != PENDING_NONE) { if (dump_record(dsp, NULL, 0) != 0) @@ -377,7 +406,26 @@ dump_spill(dmu_sendarg_t *dsp, uint64_t object, int blksz, void *data) drrs->drr_length = blksz; drrs->drr_toguid = dsp->dsa_toguid; - if (dump_record(dsp, data, blksz) != 0) + /* See comment in dump_dnode() for full details */ + if (zfs_send_unmodified_spill_blocks && + (bp->blk_birth <= dsp->dsa_fromtxg)) { + drrs->drr_flags |= DRR_SPILL_UNMODIFIED; + } + + /* handle raw send fields */ + if (dsp->dsa_featureflags & DMU_BACKUP_FEATURE_RAW) { + ASSERT(BP_IS_PROTECTED(bp)); + + if (BP_SHOULD_BYTESWAP(bp)) + drrs->drr_flags |= DRR_RAW_BYTESWAP; + drrs->drr_compressiontype = BP_GET_COMPRESS(bp); + drrs->drr_compressed_size = BP_GET_PSIZE(bp); + zio_crypt_decode_params_bp(bp, drrs->drr_salt, drrs->drr_iv); + zio_crypt_decode_mac_bp(bp, drrs->drr_mac); + payload_size = drrs->drr_compressed_size; + } + + if (dump_record(dsp, data, payload_size) != 0) return (SET_ERROR(EINTR)); return (0); } @@ -429,9 +477,11 @@ dump_freeobjects(dmu_sendarg_t *dsp, uint64_t firstobj, uint64_t numobjs) } static int -dump_dnode(dmu_sendarg_t *dsp, uint64_t object, dnode_phys_t *dnp) +dump_dnode(dmu_sendarg_t *dsp, const blkptr_t *bp, uint64_t object, + dnode_phys_t *dnp) { struct drr_object *drro = &(dsp->dsa_drr->drr_u.drr_object); + int bonuslen; if (object < dsp->dsa_resume_object) { /* @@ -472,20 +522,111 @@ dump_dnode(dmu_sendarg_t *dsp, uint64_t object, dnode_phys_t *dnp) drro->drr_blksz > SPA_OLD_MAXBLOCKSIZE) drro->drr_blksz = SPA_OLD_MAXBLOCKSIZE; - if (dump_record(dsp, DN_BONUS(dnp), - P2ROUNDUP(dnp->dn_bonuslen, 8)) != 0) { - return (SET_ERROR(EINTR)); + bonuslen = P2ROUNDUP(dnp->dn_bonuslen, 8); + + if ((dsp->dsa_featureflags & DMU_BACKUP_FEATURE_RAW)) { + ASSERT(BP_IS_ENCRYPTED(bp)); + + if (BP_SHOULD_BYTESWAP(bp)) + drro->drr_flags |= DRR_RAW_BYTESWAP; + + /* needed for reconstructing dnp on recv side */ + drro->drr_maxblkid = dnp->dn_maxblkid; + drro->drr_indblkshift = dnp->dn_indblkshift; + drro->drr_nlevels = dnp->dn_nlevels; + drro->drr_nblkptr = dnp->dn_nblkptr; + + /* + * Since we encrypt the entire bonus area, the (raw) part + * beyond the bonuslen is actually nonzero, so we need + * to send it. + */ + if (bonuslen != 0) { + drro->drr_raw_bonuslen = DN_MAX_BONUS_LEN(dnp); + bonuslen = drro->drr_raw_bonuslen; + } } + /* + * DRR_OBJECT_SPILL is set for every dnode which references a + * spill block. This allows the receiving pool to definitively + * determine when a spill block should be kept or freed. + */ + if (dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) + drro->drr_flags |= DRR_OBJECT_SPILL; + + if (dump_record(dsp, DN_BONUS(dnp), bonuslen) != 0) + return (SET_ERROR(EINTR)); + /* Free anything past the end of the file. */ if (dump_free(dsp, object, (dnp->dn_maxblkid + 1) * - (dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT), -1ULL) != 0) + (dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT), DMU_OBJECT_END) != 0) return (SET_ERROR(EINTR)); + + /* + * Send DRR_SPILL records for unmodified spill blocks. This is useful + * because changing certain attributes of the object (e.g. blocksize) + * can cause old versions of ZFS to incorrectly remove a spill block. + * Including these records in the stream forces an up to date version + * to always be written ensuring they're never lost. Current versions + * of the code which understand the DRR_FLAG_SPILL_BLOCK feature can + * ignore these unmodified spill blocks. + */ + if (zfs_send_unmodified_spill_blocks && + (dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) && + (DN_SPILL_BLKPTR(dnp)->blk_birth <= dsp->dsa_fromtxg)) { + struct send_block_record record; + + bzero(&record, sizeof (struct send_block_record)); + record.eos_marker = B_FALSE; + record.bp = *DN_SPILL_BLKPTR(dnp); + SET_BOOKMARK(&(record.zb), dmu_objset_id(dsp->dsa_os), + object, 0, DMU_SPILL_BLKID); + + if (do_dump(dsp, &record) != 0) + return (SET_ERROR(EINTR)); + } + if (dsp->dsa_err != 0) return (SET_ERROR(EINTR)); return (0); } +static int +dump_object_range(dmu_sendarg_t *dsp, const blkptr_t *bp, uint64_t firstobj, + uint64_t numslots) +{ + struct drr_object_range *drror = + &(dsp->dsa_drr->drr_u.drr_object_range); + + /* we only use this record type for raw sends */ + ASSERT(BP_IS_PROTECTED(bp)); + ASSERT(dsp->dsa_featureflags & DMU_BACKUP_FEATURE_RAW); + ASSERT3U(BP_GET_COMPRESS(bp), ==, ZIO_COMPRESS_OFF); + ASSERT3U(BP_GET_TYPE(bp), ==, DMU_OT_DNODE); + ASSERT0(BP_GET_LEVEL(bp)); + + if (dsp->dsa_pending_op != PENDING_NONE) { + if (dump_record(dsp, NULL, 0) != 0) + return (SET_ERROR(EINTR)); + dsp->dsa_pending_op = PENDING_NONE; + } + + bzero(dsp->dsa_drr, sizeof (dmu_replay_record_t)); + dsp->dsa_drr->drr_type = DRR_OBJECT_RANGE; + drror->drr_firstobj = firstobj; + drror->drr_numslots = numslots; + drror->drr_toguid = dsp->dsa_toguid; + if (BP_SHOULD_BYTESWAP(bp)) + drror->drr_flags |= DRR_RAW_BYTESWAP; + zio_crypt_decode_params_bp(bp, drror->drr_salt, drror->drr_iv); + zio_crypt_decode_mac_bp(bp, drror->drr_mac); + + if (dump_record(dsp, NULL, 0) != 0) + return (SET_ERROR(EINTR)); + return (0); +} + static boolean_t backup_do_embed(dmu_sendarg_t *dsp, const blkptr_t *bp) { @@ -529,6 +670,7 @@ send_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, ASSERT(zb->zb_object == DMU_META_DNODE_OBJECT || zb->zb_object >= sta->resume.zb_object); + ASSERT3P(sta->ds, !=, NULL); if (sta->cancel) return (SET_ERROR(EINTR)); @@ -601,6 +743,18 @@ do_dump(dmu_sendarg_t *dsa, struct send_block_record *data) ASSERT(zb->zb_object == DMU_META_DNODE_OBJECT || zb->zb_object >= dsa->dsa_resume_object); + /* + * All bps of an encrypted os should have the encryption bit set. + * If this is not true it indicates tampering and we report an error. + */ + if (dsa->dsa_os->os_encrypted && + !BP_IS_HOLE(bp) && !BP_USES_CRYPT(bp)) { + spa_log_error(spa, zb); + zfs_panic_recover("unencrypted block in encrypted " + "object set %llu", ds->ds_object); + return (SET_ERROR(EIO)); + } + if (zb->zb_object != DMU_META_DNODE_OBJECT && DMU_OBJECT_IS_SPECIAL(zb->zb_object)) { return (0); @@ -612,40 +766,66 @@ do_dump(dmu_sendarg_t *dsa, struct send_block_record *data) } else if (BP_IS_HOLE(bp)) { uint64_t span = BP_SPAN(dblkszsec, indblkshift, zb->zb_level); uint64_t offset = zb->zb_blkid * span; - err = dump_free(dsa, zb->zb_object, offset, span); + /* Don't dump free records for offsets > DMU_OBJECT_END */ + if (zb->zb_blkid == 0 || span <= DMU_OBJECT_END / zb->zb_blkid) + err = dump_free(dsa, zb->zb_object, offset, span); } else if (zb->zb_level > 0 || type == DMU_OT_OBJSET) { return (0); } else if (type == DMU_OT_DNODE) { int epb = BP_GET_LSIZE(bp) >> DNODE_SHIFT; arc_flags_t aflags = ARC_FLAG_WAIT; arc_buf_t *abuf; + enum zio_flag zioflags = ZIO_FLAG_CANFAIL; + + if (dsa->dsa_featureflags & DMU_BACKUP_FEATURE_RAW) { + ASSERT(BP_IS_ENCRYPTED(bp)); + ASSERT3U(BP_GET_COMPRESS(bp), ==, ZIO_COMPRESS_OFF); + zioflags |= ZIO_FLAG_RAW; + } ASSERT0(zb->zb_level); if (arc_read(NULL, spa, bp, arc_getbuf_func, &abuf, - ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, - &aflags, zb) != 0) + ZIO_PRIORITY_ASYNC_READ, zioflags, &aflags, zb) != 0) return (SET_ERROR(EIO)); dnode_phys_t *blk = abuf->b_data; uint64_t dnobj = zb->zb_blkid * epb; - for (int i = 0; i < epb; i += blk[i].dn_extra_slots + 1) { - err = dump_dnode(dsa, dnobj + i, blk + i); - if (err != 0) - break; + + /* + * Raw sends require sending encryption parameters for the + * block of dnodes. Regular sends do not need to send this + * info. + */ + if (dsa->dsa_featureflags & DMU_BACKUP_FEATURE_RAW) { + ASSERT(arc_is_encrypted(abuf)); + err = dump_object_range(dsa, bp, dnobj, epb); + } + + if (err == 0) { + for (int i = 0; i < epb; + i += blk[i].dn_extra_slots + 1) { + err = dump_dnode(dsa, bp, dnobj + i, blk + i); + if (err != 0) + break; + } } arc_buf_destroy(abuf, &abuf); } else if (type == DMU_OT_SA) { arc_flags_t aflags = ARC_FLAG_WAIT; arc_buf_t *abuf; - int blksz = BP_GET_LSIZE(bp); + enum zio_flag zioflags = ZIO_FLAG_CANFAIL; + + if (dsa->dsa_featureflags & DMU_BACKUP_FEATURE_RAW) { + ASSERT(BP_IS_PROTECTED(bp)); + zioflags |= ZIO_FLAG_RAW; + } if (arc_read(NULL, spa, bp, arc_getbuf_func, &abuf, - ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, - &aflags, zb) != 0) + ZIO_PRIORITY_ASYNC_READ, zioflags, &aflags, zb) != 0) return (SET_ERROR(EIO)); - err = dump_spill(dsa, zb->zb_object, blksz, abuf->b_data); + err = dump_spill(dsa, bp, zb->zb_object, abuf->b_data); arc_buf_destroy(abuf, &abuf); } else if (backup_do_embed(dsa, bp)) { /* it's an embedded level-0 block of a regular object */ @@ -667,6 +847,14 @@ do_dump(dmu_sendarg_t *dsa, struct send_block_record *data) */ boolean_t split_large_blocks = blksz > SPA_OLD_MAXBLOCKSIZE && !(dsa->dsa_featureflags & DMU_BACKUP_FEATURE_LARGE_BLOCKS); + + /* + * Raw sends require that we always get raw data as it exists + * on disk, so we assert that we are not splitting blocks here. + */ + boolean_t request_raw = + (dsa->dsa_featureflags & DMU_BACKUP_FEATURE_RAW) != 0; + /* * We should only request compressed data from the ARC if all * the following are true: @@ -682,6 +870,8 @@ do_dump(dmu_sendarg_t *dsa, struct send_block_record *data) !split_large_blocks && !BP_SHOULD_BYTESWAP(bp) && !BP_IS_EMBEDDED(bp) && !DMU_OT_IS_METADATA(BP_GET_TYPE(bp)); + IMPLY(request_raw, !split_large_blocks); + IMPLY(request_raw, BP_IS_PROTECTED(bp)); ASSERT0(zb->zb_level); ASSERT(zb->zb_object > dsa->dsa_resume_object || (zb->zb_object == dsa->dsa_resume_object && @@ -695,8 +885,11 @@ do_dump(dmu_sendarg_t *dsa, struct send_block_record *data) ASSERT3U(blksz, ==, BP_GET_LSIZE(bp)); enum zio_flag zioflags = ZIO_FLAG_CANFAIL; - if (request_compressed) + if (request_raw) zioflags |= ZIO_FLAG_RAW; + else if (request_compressed) + zioflags |= ZIO_FLAG_RAW_COMPRESS; + if (arc_read(NULL, spa, bp, arc_getbuf_func, &abuf, ZIO_PRIORITY_ASYNC_READ, zioflags, &aflags, zb) != 0) { if (zfs_send_corrupt_data) { @@ -716,6 +909,7 @@ do_dump(dmu_sendarg_t *dsa, struct send_block_record *data) offset = zb->zb_blkid * blksz; if (split_large_blocks) { + ASSERT0(arc_is_encrypted(abuf)); ASSERT3U(arc_get_compression(abuf), ==, ZIO_COMPRESS_OFF); char *buf = abuf->b_data; @@ -758,7 +952,7 @@ static int dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *to_ds, zfs_bookmark_phys_t *ancestor_zb, boolean_t is_clone, boolean_t embedok, boolean_t large_block_ok, boolean_t compressok, - int outfd, uint64_t resumeobj, uint64_t resumeoff, + boolean_t rawok, int outfd, uint64_t resumeobj, uint64_t resumeoff, vnode_t *vp, offset_t *off) { objset_t *os; @@ -775,6 +969,28 @@ dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *to_ds, return (err); } + /* + * If this is a non-raw send of an encrypted ds, we can ensure that + * the objset_phys_t is authenticated. This is safe because this is + * either a snapshot or we have owned the dataset, ensuring that + * it can't be modified. + */ + if (!rawok && os->os_encrypted && + arc_is_unauthenticated(os->os_phys_buf)) { + zbookmark_phys_t zb; + + SET_BOOKMARK(&zb, to_ds->ds_object, ZB_ROOT_OBJECT, + ZB_ROOT_LEVEL, ZB_ROOT_BLKID); + err = arc_untransform(os->os_phys_buf, os->os_spa, + &zb, B_FALSE); + if (err != 0) { + dsl_pool_rele(dp, tag); + return (err); + } + + ASSERT0(arc_is_unauthenticated(os->os_phys_buf)); + } + drr = kmem_zalloc(sizeof (dmu_replay_record_t), KM_SLEEP); drr->drr_type = DRR_BEGIN; drr->drr_u.drr_begin.drr_magic = DMU_BACKUP_MAGIC; @@ -795,22 +1011,29 @@ dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *to_ds, } #endif - if (large_block_ok && to_ds->ds_feature_inuse[SPA_FEATURE_LARGE_BLOCKS]) + /* raw sends imply large_block_ok */ + if ((large_block_ok || rawok) && + to_ds->ds_feature_inuse[SPA_FEATURE_LARGE_BLOCKS]) featureflags |= DMU_BACKUP_FEATURE_LARGE_BLOCKS; if (to_ds->ds_feature_inuse[SPA_FEATURE_LARGE_DNODE]) featureflags |= DMU_BACKUP_FEATURE_LARGE_DNODE; - if (embedok && + + /* encrypted datasets will not have embedded blocks */ + if ((embedok || rawok) && !os->os_encrypted && spa_feature_is_active(dp->dp_spa, SPA_FEATURE_EMBEDDED_DATA)) { featureflags |= DMU_BACKUP_FEATURE_EMBED_DATA; - if (spa_feature_is_active(dp->dp_spa, SPA_FEATURE_LZ4_COMPRESS)) - featureflags |= DMU_BACKUP_FEATURE_LZ4; } - if (compressok) { + + /* raw send implies compressok */ + if (compressok || rawok) featureflags |= DMU_BACKUP_FEATURE_COMPRESSED; - } + if (rawok && os->os_encrypted) + featureflags |= DMU_BACKUP_FEATURE_RAW; + if ((featureflags & - (DMU_BACKUP_FEATURE_EMBED_DATA | DMU_BACKUP_FEATURE_COMPRESSED)) != - 0 && spa_feature_is_active(dp->dp_spa, SPA_FEATURE_LZ4_COMPRESS)) { + (DMU_BACKUP_FEATURE_EMBED_DATA | DMU_BACKUP_FEATURE_COMPRESSED | + DMU_BACKUP_FEATURE_RAW)) != 0 && + spa_feature_is_active(dp->dp_spa, SPA_FEATURE_LZ4_COMPRESS)) { featureflags |= DMU_BACKUP_FEATURE_LZ4; } @@ -832,6 +1055,8 @@ dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *to_ds, if (zfs_send_set_freerecords_bit) drr->drr_u.drr_begin.drr_flags |= DRR_FLAG_FREERECORDS; + drr->drr_u.drr_begin.drr_flags |= DRR_FLAG_SPILL_BLOCK; + if (ancestor_zb != NULL) { drr->drr_u.drr_begin.drr_fromguid = ancestor_zb->zbm_guid; @@ -852,6 +1077,7 @@ dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *to_ds, dsp->dsa_os = os; dsp->dsa_off = off; dsp->dsa_toguid = dsl_dataset_phys(to_ds)->ds_guid; + dsp->dsa_fromtxg = fromtxg; dsp->dsa_pending_op = PENDING_NONE; dsp->dsa_featureflags = featureflags; dsp->dsa_resume_object = resumeobj; @@ -866,19 +1092,47 @@ dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *to_ds, void *payload = NULL; size_t payload_len = 0; - if (resumeobj != 0 || resumeoff != 0) { - dmu_object_info_t to_doi; - err = dmu_object_info(os, resumeobj, &to_doi); - if (err != 0) - goto out; - SET_BOOKMARK(&to_arg.resume, to_ds->ds_object, resumeobj, 0, - resumeoff / to_doi.doi_data_block_size); - + /* handle features that require a DRR_BEGIN payload */ + if (featureflags & + (DMU_BACKUP_FEATURE_RESUMING | DMU_BACKUP_FEATURE_RAW)) { + nvlist_t *keynvl = NULL; nvlist_t *nvl = fnvlist_alloc(); - fnvlist_add_uint64(nvl, "resume_object", resumeobj); - fnvlist_add_uint64(nvl, "resume_offset", resumeoff); + + if (featureflags & DMU_BACKUP_FEATURE_RESUMING) { + dmu_object_info_t to_doi; + err = dmu_object_info(os, resumeobj, &to_doi); + if (err != 0) { + fnvlist_free(nvl); + goto out; + } + + SET_BOOKMARK(&to_arg.resume, to_ds->ds_object, + resumeobj, 0, + resumeoff / to_doi.doi_data_block_size); + + fnvlist_add_uint64(nvl, "resume_object", resumeobj); + fnvlist_add_uint64(nvl, "resume_offset", resumeoff); + } + + if (featureflags & DMU_BACKUP_FEATURE_RAW) { + uint64_t ivset_guid = (ancestor_zb != NULL) ? + ancestor_zb->zbm_ivset_guid : 0; + + ASSERT(os->os_encrypted); + + err = dsl_crypto_populate_key_nvlist(to_ds, + ivset_guid, &keynvl); + if (err != 0) { + fnvlist_free(nvl); + goto out; + } + + fnvlist_add_nvlist(nvl, "crypt_keydata", keynvl); + } + payload = fnvlist_pack(nvl, &payload_len); drr->drr_payloadlen = payload_len; + fnvlist_free(keynvl); fnvlist_free(nvl); } @@ -896,6 +1150,8 @@ dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *to_ds, to_arg.ds = to_ds; to_arg.fromtxg = fromtxg; to_arg.flags = TRAVERSE_PRE | TRAVERSE_PREFETCH; + if (rawok) + to_arg.flags |= TRAVERSE_NO_DECRYPT; (void) thread_create(NULL, 0, send_traverse_thread, &to_arg, 0, curproc, TS_RUN, minclsyspri); @@ -942,7 +1198,6 @@ dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *to_ds, if (dump_record(dsp, NULL, 0) != 0) err = dsp->dsa_err; - out: mutex_enter(&to_ds->ds_sendstream_lock); list_remove(&to_ds->ds_sendstreams, dsp); @@ -961,60 +1216,77 @@ out: int dmu_send_obj(const char *pool, uint64_t tosnap, uint64_t fromsnap, boolean_t embedok, boolean_t large_block_ok, boolean_t compressok, - int outfd, vnode_t *vp, offset_t *off) + boolean_t rawok, int outfd, vnode_t *vp, offset_t *off) { dsl_pool_t *dp; dsl_dataset_t *ds; dsl_dataset_t *fromds = NULL; + ds_hold_flags_t dsflags = (rawok) ? 0 : DS_HOLD_FLAG_DECRYPT; int err; err = dsl_pool_hold(pool, FTAG, &dp); if (err != 0) return (err); - err = dsl_dataset_hold_obj(dp, tosnap, FTAG, &ds); + err = dsl_dataset_hold_obj_flags(dp, tosnap, dsflags, FTAG, &ds); if (err != 0) { dsl_pool_rele(dp, FTAG); return (err); } if (fromsnap != 0) { - zfs_bookmark_phys_t zb; + zfs_bookmark_phys_t zb = { 0 }; boolean_t is_clone; err = dsl_dataset_hold_obj(dp, fromsnap, FTAG, &fromds); if (err != 0) { - dsl_dataset_rele(ds, FTAG); + dsl_dataset_rele_flags(ds, dsflags, FTAG); dsl_pool_rele(dp, FTAG); return (err); } - if (!dsl_dataset_is_before(ds, fromds, 0)) + if (!dsl_dataset_is_before(ds, fromds, 0)) { err = SET_ERROR(EXDEV); + dsl_dataset_rele(fromds, FTAG); + dsl_dataset_rele_flags(ds, dsflags, FTAG); + dsl_pool_rele(dp, FTAG); + return (err); + } + zb.zbm_creation_time = dsl_dataset_phys(fromds)->ds_creation_time; zb.zbm_creation_txg = dsl_dataset_phys(fromds)->ds_creation_txg; zb.zbm_guid = dsl_dataset_phys(fromds)->ds_guid; + + if (dsl_dataset_is_zapified(fromds)) { + (void) zap_lookup(dp->dp_meta_objset, + fromds->ds_object, DS_FIELD_IVSET_GUID, 8, 1, + &zb.zbm_ivset_guid); + } + is_clone = (fromds->ds_dir != ds->ds_dir); dsl_dataset_rele(fromds, FTAG); err = dmu_send_impl(FTAG, dp, ds, &zb, is_clone, - embedok, large_block_ok, compressok, outfd, 0, 0, vp, off); + embedok, large_block_ok, compressok, rawok, outfd, + 0, 0, vp, off); } else { err = dmu_send_impl(FTAG, dp, ds, NULL, B_FALSE, - embedok, large_block_ok, compressok, outfd, 0, 0, vp, off); + embedok, large_block_ok, compressok, rawok, outfd, + 0, 0, vp, off); } - dsl_dataset_rele(ds, FTAG); + dsl_dataset_rele_flags(ds, dsflags, FTAG); return (err); } int dmu_send(const char *tosnap, const char *fromsnap, boolean_t embedok, - boolean_t large_block_ok, boolean_t compressok, int outfd, - uint64_t resumeobj, uint64_t resumeoff, - vnode_t *vp, offset_t *off) + boolean_t large_block_ok, boolean_t compressok, boolean_t rawok, + int outfd, uint64_t resumeobj, uint64_t resumeoff, vnode_t *vp, + offset_t *off) { dsl_pool_t *dp; dsl_dataset_t *ds; int err; + ds_hold_flags_t dsflags = (rawok) ? 0 : DS_HOLD_FLAG_DECRYPT; boolean_t owned = B_FALSE; if (fromsnap != NULL && strpbrk(fromsnap, "@#") == NULL) @@ -1029,10 +1301,10 @@ dmu_send(const char *tosnap, const char *fromsnap, boolean_t embedok, * We are sending a filesystem or volume. Ensure * that it doesn't change by owning the dataset. */ - err = dsl_dataset_own(dp, tosnap, FTAG, &ds); + err = dsl_dataset_own(dp, tosnap, dsflags, FTAG, &ds); owned = B_TRUE; } else { - err = dsl_dataset_hold(dp, tosnap, FTAG, &ds); + err = dsl_dataset_hold_flags(dp, tosnap, dsflags, FTAG, &ds); } if (err != 0) { dsl_pool_rele(dp, FTAG); @@ -1040,7 +1312,7 @@ dmu_send(const char *tosnap, const char *fromsnap, boolean_t embedok, } if (fromsnap != NULL) { - zfs_bookmark_phys_t zb; + zfs_bookmark_phys_t zb = { 0 }; boolean_t is_clone = B_FALSE; int fsnamelen = strchr(tosnap, '@') - tosnap; @@ -1066,28 +1338,40 @@ dmu_send(const char *tosnap, const char *fromsnap, boolean_t embedok, dsl_dataset_phys(fromds)->ds_creation_txg; zb.zbm_guid = dsl_dataset_phys(fromds)->ds_guid; is_clone = (ds->ds_dir != fromds->ds_dir); + + if (dsl_dataset_is_zapified(fromds)) { + (void) zap_lookup(dp->dp_meta_objset, + fromds->ds_object, + DS_FIELD_IVSET_GUID, 8, 1, + &zb.zbm_ivset_guid); + } dsl_dataset_rele(fromds, FTAG); } } else { err = dsl_bookmark_lookup(dp, fromsnap, ds, &zb); } if (err != 0) { - dsl_dataset_rele(ds, FTAG); + if (owned) + dsl_dataset_disown(ds, dsflags, FTAG); + else + dsl_dataset_rele_flags(ds, dsflags, FTAG); + dsl_pool_rele(dp, FTAG); return (err); } err = dmu_send_impl(FTAG, dp, ds, &zb, is_clone, - embedok, large_block_ok, compressok, + embedok, large_block_ok, compressok, rawok, outfd, resumeobj, resumeoff, vp, off); } else { err = dmu_send_impl(FTAG, dp, ds, NULL, B_FALSE, - embedok, large_block_ok, compressok, + embedok, large_block_ok, compressok, rawok, outfd, resumeobj, resumeoff, vp, off); } if (owned) - dsl_dataset_disown(ds, FTAG); + dsl_dataset_disown(ds, dsflags, FTAG); else - dsl_dataset_rele(ds, FTAG); + dsl_dataset_rele_flags(ds, dsflags, FTAG); + return (err); } @@ -1242,7 +1526,8 @@ dmu_send_estimate_from_txg(dsl_dataset_t *ds, uint64_t from_txg, * traverse the blocks of the snapshot with birth times after * from_txg, summing their uncompressed size */ - err = traverse_dataset(ds, from_txg, TRAVERSE_POST, + err = traverse_dataset(ds, from_txg, + TRAVERSE_POST | TRAVERSE_NO_DECRYPT, dmu_calculate_send_traversal, &size); if (err) return (err); diff --git a/usr/src/uts/common/fs/zfs/dmu_traverse.c b/usr/src/uts/common/fs/zfs/dmu_traverse.c index f57e510530..0547a09498 100644 --- a/usr/src/uts/common/fs/zfs/dmu_traverse.c +++ b/usr/src/uts/common/fs/zfs/dmu_traverse.c @@ -131,7 +131,7 @@ traverse_zil(traverse_data_t *td, zil_header_t *zh) zilog_t *zilog = zil_alloc(spa_get_dsl(td->td_spa)->dp_meta_objset, zh); (void) zil_parse(zilog, traverse_zil_block, traverse_zil_record, td, - claim_txg); + claim_txg, !(td->td_flags & TRAVERSE_NO_DECRYPT)); zil_free(zilog); } @@ -179,6 +179,7 @@ traverse_prefetch_metadata(traverse_data_t *td, const blkptr_t *bp, const zbookmark_phys_t *zb) { arc_flags_t flags = ARC_FLAG_NOWAIT | ARC_FLAG_PREFETCH; + int zio_flags = ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE; if (!(td->td_flags & TRAVERSE_PREFETCH_METADATA)) return; @@ -194,8 +195,11 @@ traverse_prefetch_metadata(traverse_data_t *td, if (BP_GET_LEVEL(bp) == 0 && BP_GET_TYPE(bp) != DMU_OT_DNODE) return; + if ((td->td_flags & TRAVERSE_NO_DECRYPT) && BP_IS_PROTECTED(bp)) + zio_flags |= ZIO_FLAG_RAW; + (void) arc_read(NULL, td->td_spa, bp, NULL, NULL, - ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb); + ZIO_PRIORITY_ASYNC_READ, zio_flags, &flags, zb); } static boolean_t @@ -294,6 +298,8 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp, blkptr_t *cbp; int epb = BP_GET_LSIZE(bp) >> SPA_BLKPTRSHIFT; + ASSERT(!BP_IS_PROTECTED(bp)); + err = arc_read(NULL, td->td_spa, bp, arc_getbuf_func, &buf, ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb); if (err != 0) @@ -318,11 +324,18 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp, } } else if (BP_GET_TYPE(bp) == DMU_OT_DNODE) { arc_flags_t flags = ARC_FLAG_WAIT; + uint32_t zio_flags = ZIO_FLAG_CANFAIL; int i; int epb = BP_GET_LSIZE(bp) >> DNODE_SHIFT; + /* + * dnode blocks might have their bonus buffers encrypted, so + * we must be careful to honor TRAVERSE_NO_DECRYPT + */ + if ((td->td_flags & TRAVERSE_NO_DECRYPT) && BP_IS_PROTECTED(bp)) + zio_flags |= ZIO_FLAG_RAW; err = arc_read(NULL, td->td_spa, bp, arc_getbuf_func, &buf, - ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb); + ZIO_PRIORITY_ASYNC_READ, zio_flags, &flags, zb); if (err != 0) goto post; dnode_phys_t *child_dnp = buf->b_data; @@ -340,10 +353,14 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp, break; } } else if (BP_GET_TYPE(bp) == DMU_OT_OBJSET) { + uint32_t zio_flags = ZIO_FLAG_CANFAIL; arc_flags_t flags = ARC_FLAG_WAIT; + if ((td->td_flags & TRAVERSE_NO_DECRYPT) && BP_IS_PROTECTED(bp)) + zio_flags |= ZIO_FLAG_RAW; + err = arc_read(NULL, td->td_spa, bp, arc_getbuf_func, &buf, - ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb); + ZIO_PRIORITY_ASYNC_READ, zio_flags, &flags, zb); if (err != 0) goto post; @@ -492,6 +509,7 @@ traverse_prefetcher(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, const zbookmark_phys_t *zb, const dnode_phys_t *dnp, void *arg) { prefetch_data_t *pfd = arg; + int zio_flags = ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE; arc_flags_t aflags = ARC_FLAG_NOWAIT | ARC_FLAG_PREFETCH | ARC_FLAG_PRESCIENT_PREFETCH; @@ -511,8 +529,11 @@ traverse_prefetcher(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, cv_broadcast(&pfd->pd_cv); mutex_exit(&pfd->pd_mtx); + if ((pfd->pd_flags & TRAVERSE_NO_DECRYPT) && BP_IS_PROTECTED(bp)) + zio_flags |= ZIO_FLAG_RAW; + (void) arc_read(NULL, spa, bp, NULL, NULL, ZIO_PRIORITY_ASYNC_READ, - ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE, &aflags, zb); + zio_flags, &aflags, zb); return (0); } @@ -581,15 +602,22 @@ traverse_impl(spa_t *spa, dsl_dataset_t *ds, uint64_t objset, blkptr_t *rootbp, mutex_init(&pd.pd_mtx, NULL, MUTEX_DEFAULT, NULL); cv_init(&pd.pd_cv, NULL, CV_DEFAULT, NULL); + SET_BOOKMARK(&czb, td.td_objset, + ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID); + /* See comment on ZIL traversal in dsl_scan_visitds. */ if (ds != NULL && !ds->ds_is_snapshot && !BP_IS_HOLE(rootbp)) { + enum zio_flag zio_flags = ZIO_FLAG_CANFAIL; arc_flags_t flags = ARC_FLAG_WAIT; objset_phys_t *osp; arc_buf_t *buf; - err = arc_read(NULL, td.td_spa, rootbp, - arc_getbuf_func, &buf, - ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, NULL); + if ((td.td_flags & TRAVERSE_NO_DECRYPT) && + BP_IS_PROTECTED(rootbp)) + zio_flags |= ZIO_FLAG_RAW; + + err = arc_read(NULL, td.td_spa, rootbp, arc_getbuf_func, + &buf, ZIO_PRIORITY_ASYNC_READ, zio_flags, &flags, &czb); if (err != 0) return (err); @@ -603,8 +631,6 @@ traverse_impl(spa_t *spa, dsl_dataset_t *ds, uint64_t objset, blkptr_t *rootbp, &td, TQ_NOQUEUE) == TASKQID_INVALID) pd.pd_exited = B_TRUE; - SET_BOOKMARK(&czb, td.td_objset, - ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID); err = traverse_visitbp(&td, NULL, rootbp, &czb); mutex_enter(&pd.pd_mtx); diff --git a/usr/src/uts/common/fs/zfs/dnode.c b/usr/src/uts/common/fs/zfs/dnode.c index f360eb997e..5a86650d28 100644 --- a/usr/src/uts/common/fs/zfs/dnode.c +++ b/usr/src/uts/common/fs/zfs/dnode.c @@ -136,6 +136,7 @@ dnode_cons(void *arg, void *unused, int kmflag) bzero(&dn->dn_rm_spillblk[0], sizeof (dn->dn_rm_spillblk)); bzero(&dn->dn_next_bonuslen[0], sizeof (dn->dn_next_bonuslen)); bzero(&dn->dn_next_blksz[0], sizeof (dn->dn_next_blksz)); + bzero(&dn->dn_next_maxblkid[0], sizeof (dn->dn_next_maxblkid)); for (i = 0; i < TXG_SIZE; i++) { multilist_link_init(&dn->dn_dirty_link[i]); @@ -196,6 +197,7 @@ dnode_dest(void *arg, void *unused) ASSERT0(dn->dn_rm_spillblk[i]); ASSERT0(dn->dn_next_bonuslen[i]); ASSERT0(dn->dn_next_blksz[i]); + ASSERT0(dn->dn_next_maxblkid[i]); } ASSERT0(dn->dn_allocated_txg); @@ -617,6 +619,7 @@ dnode_allocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, int ibs, ASSERT0(dn->dn_next_bonustype[i]); ASSERT0(dn->dn_rm_spillblk[i]); ASSERT0(dn->dn_next_blksz[i]); + ASSERT0(dn->dn_next_maxblkid[i]); ASSERT(!multilist_link_active(&dn->dn_dirty_link[i])); ASSERT3P(list_head(&dn->dn_dirty_records[i]), ==, NULL); ASSERT3P(dn->dn_free_ranges[i], ==, NULL); @@ -659,7 +662,8 @@ dnode_allocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, int ibs, void dnode_reallocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, - dmu_object_type_t bonustype, int bonuslen, int dn_slots, dmu_tx_t *tx) + dmu_object_type_t bonustype, int bonuslen, int dn_slots, + boolean_t keep_spill, dmu_tx_t *tx) { int nblkptr; @@ -708,7 +712,7 @@ dnode_reallocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, dn->dn_next_bonustype[tx->tx_txg&TXG_MASK] = bonustype; if (dn->dn_nblkptr != nblkptr) dn->dn_next_nblkptr[tx->tx_txg&TXG_MASK] = nblkptr; - if (dn->dn_phys->dn_flags & DNODE_FLAG_SPILL_BLKPTR) { + if (dn->dn_phys->dn_flags & DNODE_FLAG_SPILL_BLKPTR && !keep_spill) { dbuf_rm_spill(dn, tx); dnode_rm_spill(dn, tx); } @@ -785,6 +789,8 @@ dnode_move_impl(dnode_t *odn, dnode_t *ndn) sizeof (odn->dn_next_bonuslen)); bcopy(&odn->dn_next_blksz[0], &ndn->dn_next_blksz[0], sizeof (odn->dn_next_blksz)); + bcopy(&odn->dn_next_maxblkid[0], &ndn->dn_next_maxblkid[0], + sizeof (odn->dn_next_maxblkid)); for (i = 0; i < TXG_SIZE; i++) { list_move_tail(&ndn->dn_dirty_records[i], &odn->dn_dirty_records[i]); @@ -1321,7 +1327,11 @@ dnode_hold_impl(objset_t *os, uint64_t object, int flag, int slots, DNODE_STAT_BUMP(dnode_hold_dbuf_hold); return (SET_ERROR(EIO)); } - err = dbuf_read(db, NULL, DB_RF_CANFAIL); + /* + * We do not need to decrypt to read the dnode so it doesn't matter + * if we get the encrypted or decrypted version. + */ + err = dbuf_read(db, NULL, DB_RF_CANFAIL | DB_RF_NO_DECRYPT); if (err) { DNODE_STAT_BUMP(dnode_hold_dbuf_read); dbuf_rele(db, FTAG); @@ -1749,11 +1759,74 @@ fail: return (SET_ERROR(ENOTSUP)); } +static void +dnode_set_nlevels_impl(dnode_t *dn, int new_nlevels, dmu_tx_t *tx) +{ + uint64_t txgoff = tx->tx_txg & TXG_MASK; + int old_nlevels = dn->dn_nlevels; + dmu_buf_impl_t *db; + list_t *list; + dbuf_dirty_record_t *new, *dr, *dr_next; + + ASSERT(RW_WRITE_HELD(&dn->dn_struct_rwlock)); + + dn->dn_nlevels = new_nlevels; + + ASSERT3U(new_nlevels, >, dn->dn_next_nlevels[txgoff]); + dn->dn_next_nlevels[txgoff] = new_nlevels; + + /* dirty the left indirects */ + db = dbuf_hold_level(dn, old_nlevels, 0, FTAG); + ASSERT(db != NULL); + new = dbuf_dirty(db, tx); + dbuf_rele(db, FTAG); + + /* transfer the dirty records to the new indirect */ + mutex_enter(&dn->dn_mtx); + mutex_enter(&new->dt.di.dr_mtx); + list = &dn->dn_dirty_records[txgoff]; + for (dr = list_head(list); dr; dr = dr_next) { + dr_next = list_next(&dn->dn_dirty_records[txgoff], dr); + if (dr->dr_dbuf->db_level != new_nlevels-1 && + dr->dr_dbuf->db_blkid != DMU_BONUS_BLKID && + dr->dr_dbuf->db_blkid != DMU_SPILL_BLKID) { + ASSERT(dr->dr_dbuf->db_level == old_nlevels-1); + list_remove(&dn->dn_dirty_records[txgoff], dr); + list_insert_tail(&new->dt.di.dr_children, dr); + dr->dr_parent = new; + } + } + mutex_exit(&new->dt.di.dr_mtx); + mutex_exit(&dn->dn_mtx); +} + +int +dnode_set_nlevels(dnode_t *dn, int nlevels, dmu_tx_t *tx) +{ + int ret = 0; + + rw_enter(&dn->dn_struct_rwlock, RW_WRITER); + + if (dn->dn_nlevels == nlevels) { + ret = 0; + goto out; + } else if (nlevels < dn->dn_nlevels) { + ret = SET_ERROR(EINVAL); + goto out; + } + + dnode_set_nlevels_impl(dn, nlevels, tx); + +out: + rw_exit(&dn->dn_struct_rwlock); + return (ret); +} + /* read-holding callers must not rely on the lock being continuously held */ void -dnode_new_blkid(dnode_t *dn, uint64_t blkid, dmu_tx_t *tx, boolean_t have_read) +dnode_new_blkid(dnode_t *dn, uint64_t blkid, dmu_tx_t *tx, boolean_t have_read, + boolean_t force) { - uint64_t txgoff = tx->tx_txg & TXG_MASK; int epbs, new_nlevels; uint64_t sz; @@ -1777,13 +1850,25 @@ dnode_new_blkid(dnode_t *dn, uint64_t blkid, dmu_tx_t *tx, boolean_t have_read) } } - if (blkid <= dn->dn_maxblkid) + /* + * Raw sends (indicated by the force flag) require that we take the + * given blkid even if the value is lower than the current value. + */ + if (!force && blkid <= dn->dn_maxblkid) goto out; + /* + * We use the (otherwise unused) top bit of dn_next_maxblkid[txgoff] + * to indicate that this field is set. This allows us to set the + * maxblkid to 0 on an existing object in dnode_sync(). + */ dn->dn_maxblkid = blkid; + dn->dn_next_maxblkid[tx->tx_txg & TXG_MASK] = + blkid | DMU_NEXT_MAXBLKID_SET; /* * Compute the number of levels necessary to support the new maxblkid. + * Raw sends will ensure nlevels is set correctly for us. */ new_nlevels = 1; epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT; @@ -1791,40 +1876,11 @@ dnode_new_blkid(dnode_t *dn, uint64_t blkid, dmu_tx_t *tx, boolean_t have_read) sz <= blkid && sz >= dn->dn_nblkptr; sz <<= epbs) new_nlevels++; - if (new_nlevels > dn->dn_nlevels) { - int old_nlevels = dn->dn_nlevels; - dmu_buf_impl_t *db; - list_t *list; - dbuf_dirty_record_t *new, *dr, *dr_next; - - dn->dn_nlevels = new_nlevels; - - ASSERT3U(new_nlevels, >, dn->dn_next_nlevels[txgoff]); - dn->dn_next_nlevels[txgoff] = new_nlevels; - - /* dirty the left indirects */ - db = dbuf_hold_level(dn, old_nlevels, 0, FTAG); - ASSERT(db != NULL); - new = dbuf_dirty(db, tx); - dbuf_rele(db, FTAG); - - /* transfer the dirty records to the new indirect */ - mutex_enter(&dn->dn_mtx); - mutex_enter(&new->dt.di.dr_mtx); - list = &dn->dn_dirty_records[txgoff]; - for (dr = list_head(list); dr; dr = dr_next) { - dr_next = list_next(&dn->dn_dirty_records[txgoff], dr); - if (dr->dr_dbuf->db_level != new_nlevels-1 && - dr->dr_dbuf->db_blkid != DMU_BONUS_BLKID && - dr->dr_dbuf->db_blkid != DMU_SPILL_BLKID) { - ASSERT(dr->dr_dbuf->db_level == old_nlevels-1); - list_remove(&dn->dn_dirty_records[txgoff], dr); - list_insert_tail(&new->dt.di.dr_children, dr); - dr->dr_parent = new; - } - } - mutex_exit(&new->dt.di.dr_mtx); - mutex_exit(&dn->dn_mtx); + if (!force) { + if (new_nlevels > dn->dn_nlevels) + dnode_set_nlevels_impl(dn, new_nlevels, tx); + } else { + ASSERT3U(dn->dn_nlevels, >=, new_nlevels); } out: @@ -2249,7 +2305,8 @@ dnode_next_offset_level(dnode_t *dn, int flags, uint64_t *offset, */ return (SET_ERROR(ESRCH)); } - error = dbuf_read(db, NULL, DB_RF_CANFAIL | DB_RF_HAVESTRUCT); + error = dbuf_read(db, NULL, + DB_RF_CANFAIL | DB_RF_HAVESTRUCT | DB_RF_NO_DECRYPT); if (error) { dbuf_rele(db, FTAG); return (error); diff --git a/usr/src/uts/common/fs/zfs/dnode_sync.c b/usr/src/uts/common/fs/zfs/dnode_sync.c index 9283356608..f5ee8a290d 100644 --- a/usr/src/uts/common/fs/zfs/dnode_sync.c +++ b/usr/src/uts/common/fs/zfs/dnode_sync.c @@ -31,6 +31,7 @@ #include <sys/dmu.h> #include <sys/dmu_tx.h> #include <sys/dmu_objset.h> +#include <sys/dmu_recv.h> #include <sys/dsl_dataset.h> #include <sys/spa.h> #include <sys/range_tree.h> @@ -383,7 +384,21 @@ dnode_sync_free_range_impl(dnode_t *dn, uint64_t blkid, uint64_t nblks, } } - if (trunc) { + /* + * Do not truncate the maxblkid if we are performing a raw + * receive. The raw receive sets the maxblkid manually and + * must not be overridden. Usually, the last DRR_FREE record + * will be at the maxblkid, because the source system sets + * the maxblkid when truncating. However, if the last block + * was freed by overwriting with zeros and being compressed + * away to a hole, the source system will generate a DRR_FREE + * record while leaving the maxblkid after the end of that + * record. In this case we need to leave the maxblkid as + * indicated in the DRR_OBJECT record, so that it matches the + * source system, ensuring that the cryptographic hashes will + * match. + */ + if (trunc && !dn->dn_objset->os_raw_receive) { dn->dn_phys->dn_maxblkid = blkid == 0 ? 0 : blkid - 1; uint64_t off = (dn->dn_phys->dn_maxblkid + 1) * @@ -545,6 +560,7 @@ dnode_sync_free(dnode_t *dn, dmu_tx_t *tx) dn->dn_next_nlevels[txgoff] = 0; dn->dn_next_indblkshift[txgoff] = 0; dn->dn_next_blksz[txgoff] = 0; + dn->dn_next_maxblkid[txgoff] = 0; /* ASSERT(blkptrs are zero); */ ASSERT(dn->dn_phys->dn_type != DMU_OT_NONE); @@ -570,7 +586,7 @@ dnode_sync_free(dnode_t *dn, dmu_tx_t *tx) dnode_rele(dn, (void *)(uintptr_t)tx->tx_txg); /* * Now that we've released our hold, the dnode may - * be evicted, so we musn't access it. + * be evicted, so we mustn't access it. */ } @@ -580,6 +596,7 @@ dnode_sync_free(dnode_t *dn, dmu_tx_t *tx) void dnode_sync(dnode_t *dn, dmu_tx_t *tx) { + objset_t *os = dn->dn_objset; dnode_phys_t *dnp = dn->dn_phys; int txgoff = tx->tx_txg & TXG_MASK; list_t *list = &dn->dn_dirty_records[txgoff]; @@ -594,8 +611,13 @@ dnode_sync(dnode_t *dn, dmu_tx_t *tx) ASSERT(dn->dn_dbuf == NULL || arc_released(dn->dn_dbuf->db_buf)); - if (dmu_objset_userused_enabled(dn->dn_objset) && - !DMU_OBJECT_IS_SPECIAL(dn->dn_object)) { + /* + * Do user accounting if it is enabled and this is not + * an encrypted receive. + */ + if (dmu_objset_userused_enabled(os) && + !DMU_OBJECT_IS_SPECIAL(dn->dn_object) && + (!os->os_encrypted || !dmu_objset_is_receiving(os))) { mutex_enter(&dn->dn_mtx); dn->dn_oldused = DN_USED_BYTES(dn->dn_phys); dn->dn_oldflags = dn->dn_phys->dn_flags; @@ -603,7 +625,7 @@ dnode_sync(dnode_t *dn, dmu_tx_t *tx) mutex_exit(&dn->dn_mtx); dmu_objset_userquota_get_ids(dn, B_FALSE, tx); } else { - /* Once we account for it, we should always account for it. */ + /* Once we account for it, we should always account for it */ ASSERT(!(dn->dn_phys->dn_flags & DNODE_FLAG_USERUSED_ACCOUNTED)); } @@ -740,6 +762,19 @@ dnode_sync(dnode_t *dn, dmu_tx_t *tx) dn->dn_next_nlevels[txgoff] = 0; } + /* + * This must be done after dnode_sync_free_range() + * and dnode_increase_indirection(). See dnode_new_blkid() + * for an explanation of the high bit being set. + */ + if (dn->dn_next_maxblkid[txgoff]) { + mutex_enter(&dn->dn_mtx); + dnp->dn_maxblkid = + dn->dn_next_maxblkid[txgoff] & ~DMU_NEXT_MAXBLKID_SET; + dn->dn_next_maxblkid[txgoff] = 0; + mutex_exit(&dn->dn_mtx); + } + if (dn->dn_next_nblkptr[txgoff]) { /* this should only happen on a realloc */ ASSERT(dn->dn_allocated_txg == tx->tx_txg); diff --git a/usr/src/uts/common/fs/zfs/dsl_bookmark.c b/usr/src/uts/common/fs/zfs/dsl_bookmark.c index 0a58115341..a32198402f 100644 --- a/usr/src/uts/common/fs/zfs/dsl_bookmark.c +++ b/usr/src/uts/common/fs/zfs/dsl_bookmark.c @@ -70,6 +70,12 @@ dsl_dataset_bmark_lookup(dsl_dataset_t *ds, const char *shortname, if (dsl_dataset_phys(ds)->ds_flags & DS_FLAG_CI_DATASET) mt = MT_NORMALIZE; + /* + * Zero out the bookmark in case the one stored on disk + * is in an older, shorter format. + */ + bzero(bmark_phys, sizeof (*bmark_phys)); + err = zap_lookup_norm(mos, bmark_zapobj, shortname, sizeof (uint64_t), sizeof (*bmark_phys) / sizeof (uint64_t), bmark_phys, mt, NULL, 0, NULL); @@ -188,8 +194,9 @@ dsl_bookmark_create_sync(void *arg, dmu_tx_t *tx) for (nvpair_t *pair = nvlist_next_nvpair(dbca->dbca_bmarks, NULL); pair != NULL; pair = nvlist_next_nvpair(dbca->dbca_bmarks, pair)) { dsl_dataset_t *snapds, *bmark_fs; - zfs_bookmark_phys_t bmark_phys; + zfs_bookmark_phys_t bmark_phys = { 0 }; char *shortname; + uint32_t bmark_len = BOOKMARK_PHYS_SIZE_V1; VERIFY0(dsl_dataset_hold(dp, fnvpair_value_string(pair), FTAG, &snapds)); @@ -214,10 +221,29 @@ dsl_bookmark_create_sync(void *arg, dmu_tx_t *tx) bmark_phys.zbm_creation_time = dsl_dataset_phys(snapds)->ds_creation_time; + /* + * If the dataset is encrypted create a larger bookmark to + * accommodate the IVset guid. The IVset guid was added + * after the encryption feature to prevent a problem with + * raw sends. If we encounter an encrypted dataset without + * an IVset guid we fall back to a normal bookmark. + */ + if (snapds->ds_dir->dd_crypto_obj != 0 && + spa_feature_is_enabled(dp->dp_spa, + SPA_FEATURE_BOOKMARK_V2)) { + int err = zap_lookup(mos, snapds->ds_object, + DS_FIELD_IVSET_GUID, sizeof (uint64_t), 1, + &bmark_phys.zbm_ivset_guid); + if (err == 0) { + bmark_len = BOOKMARK_PHYS_SIZE_V2; + spa_feature_incr(dp->dp_spa, + SPA_FEATURE_BOOKMARK_V2, tx); + } + } + VERIFY0(zap_add(mos, bmark_fs->ds_bookmarks, shortname, sizeof (uint64_t), - sizeof (zfs_bookmark_phys_t) / sizeof (uint64_t), - &bmark_phys, tx)); + bmark_len / sizeof (uint64_t), &bmark_phys, tx)); spa_history_log_internal_ds(bmark_fs, "bookmark", tx, "name=%s creation_txg=%llu target_snap=%llu", @@ -267,7 +293,7 @@ dsl_get_bookmarks_impl(dsl_dataset_t *ds, nvlist_t *props, nvlist_t *outnvl) zap_cursor_retrieve(&zc, &attr) == 0; zap_cursor_advance(&zc)) { char *bmark_name = attr.za_name; - zfs_bookmark_phys_t bmark_phys; + zfs_bookmark_phys_t bmark_phys = { 0 }; err = dsl_dataset_bmark_lookup(ds, bmark_name, &bmark_phys); ASSERT3U(err, !=, ENOENT); @@ -290,6 +316,11 @@ dsl_get_bookmarks_impl(dsl_dataset_t *ds, nvlist_t *props, nvlist_t *outnvl) dsl_prop_nvlist_add_uint64(out_props, ZFS_PROP_CREATION, bmark_phys.zbm_creation_time); } + if (nvlist_exists(props, + zfs_prop_to_name(ZFS_PROP_IVSET_GUID))) { + dsl_prop_nvlist_add_uint64(out_props, + ZFS_PROP_IVSET_GUID, bmark_phys.zbm_ivset_guid); + } fnvlist_add_nvlist(outnvl, bmark_name, out_props); fnvlist_free(out_props); @@ -337,13 +368,26 @@ typedef struct dsl_bookmark_destroy_arg { static int dsl_dataset_bookmark_remove(dsl_dataset_t *ds, const char *name, dmu_tx_t *tx) { + int err; objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; uint64_t bmark_zapobj = ds->ds_bookmarks; matchtype_t mt = 0; + uint64_t int_size, num_ints; if (dsl_dataset_phys(ds)->ds_flags & DS_FLAG_CI_DATASET) mt = MT_NORMALIZE; + err = zap_length(mos, bmark_zapobj, name, &int_size, &num_ints); + if (err != 0) + return (err); + + ASSERT3U(int_size, ==, sizeof (uint64_t)); + + if (num_ints * int_size > BOOKMARK_PHYS_SIZE_V1) { + spa_feature_decr(dmu_objset_spa(mos), + SPA_FEATURE_BOOKMARK_V2, tx); + } + return (zap_remove_norm(mos, bmark_zapobj, name, mt, tx)); } diff --git a/usr/src/uts/common/fs/zfs/dsl_crypt.c b/usr/src/uts/common/fs/zfs/dsl_crypt.c new file mode 100644 index 0000000000..3937d3cb51 --- /dev/null +++ b/usr/src/uts/common/fs/zfs/dsl_crypt.c @@ -0,0 +1,2898 @@ +/* + * CDDL HEADER START + * + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2017, Datto, Inc. All rights reserved. + */ + +#include <sys/dsl_crypt.h> +#include <sys/dsl_pool.h> +#include <sys/zap.h> +#include <sys/zil.h> +#include <sys/dsl_dir.h> +#include <sys/dsl_prop.h> +#include <sys/spa_impl.h> +#include <sys/dmu_objset.h> +#include <sys/zvol.h> + +/* + * This file's primary purpose is for managing master encryption keys in + * memory and on disk. For more info on how these keys are used, see the + * block comment in zio_crypt.c. + * + * All master keys are stored encrypted on disk in the form of the DSL + * Crypto Key ZAP object. The binary key data in this object is always + * randomly generated and is encrypted with the user's wrapping key. This + * layer of indirection allows the user to change their key without + * needing to re-encrypt the entire dataset. The ZAP also holds on to the + * (non-encrypted) encryption algorithm identifier, IV, and MAC needed to + * safely decrypt the master key. For more info on the user's key see the + * block comment in libzfs_crypto.c + * + * In-memory encryption keys are managed through the spa_keystore. The + * keystore consists of 3 AVL trees, which are as follows: + * + * The Wrapping Key Tree: + * The wrapping key (wkey) tree stores the user's keys that are fed into the + * kernel through 'zfs load-key' and related commands. Datasets inherit their + * parent's wkey by default, so these structures are refcounted. The wrapping + * keys remain in memory until they are explicitly unloaded (with + * "zfs unload-key"). Unloading is only possible when no datasets are using + * them (refcount=0). + * + * The DSL Crypto Key Tree: + * The DSL Crypto Keys (DCK) are the in-memory representation of decrypted + * master keys. They are used by the functions in zio_crypt.c to perform + * encryption, decryption, and authentication. Snapshots and clones of a given + * dataset will share a DSL Crypto Key, so they are also refcounted. Once the + * refcount on a key hits zero, it is immediately zeroed out and freed. + * + * The Crypto Key Mapping Tree: + * The zio layer needs to lookup master keys by their dataset object id. Since + * the DSL Crypto Keys can belong to multiple datasets, we maintain a tree of + * dsl_key_mapping_t's which essentially just map the dataset object id to its + * appropriate DSL Crypto Key. The management for creating and destroying these + * mappings hooks into the code for owning and disowning datasets. Usually, + * there will only be one active dataset owner, but there are times + * (particularly during dataset creation and destruction) when this may not be + * true or the dataset may not be initialized enough to own. As a result, this + * object is also refcounted. + */ + +/* + * This tunable allows datasets to be raw received even if the stream does + * not include IVset guids or if the guids don't match. This is used as part + * of the resolution for ZPOOL_ERRATA_ZOL_8308_ENCRYPTION. + */ +int zfs_disable_ivset_guid_check = 0; + +static void +dsl_wrapping_key_hold(dsl_wrapping_key_t *wkey, void *tag) +{ + (void) zfs_refcount_add(&wkey->wk_refcnt, tag); +} + +static void +dsl_wrapping_key_rele(dsl_wrapping_key_t *wkey, void *tag) +{ + (void) zfs_refcount_remove(&wkey->wk_refcnt, tag); +} + +static void +dsl_wrapping_key_free(dsl_wrapping_key_t *wkey) +{ + ASSERT0(zfs_refcount_count(&wkey->wk_refcnt)); + + if (wkey->wk_key.ck_data) { + bzero(wkey->wk_key.ck_data, + CRYPTO_BITS2BYTES(wkey->wk_key.ck_length)); + kmem_free(wkey->wk_key.ck_data, + CRYPTO_BITS2BYTES(wkey->wk_key.ck_length)); + } + + zfs_refcount_destroy(&wkey->wk_refcnt); + kmem_free(wkey, sizeof (dsl_wrapping_key_t)); +} + +static int +dsl_wrapping_key_create(uint8_t *wkeydata, zfs_keyformat_t keyformat, + uint64_t salt, uint64_t iters, dsl_wrapping_key_t **wkey_out) +{ + int ret; + dsl_wrapping_key_t *wkey; + + /* allocate the wrapping key */ + wkey = kmem_alloc(sizeof (dsl_wrapping_key_t), KM_SLEEP); + if (!wkey) + return (SET_ERROR(ENOMEM)); + + /* allocate and initialize the underlying crypto key */ + wkey->wk_key.ck_data = kmem_alloc(WRAPPING_KEY_LEN, KM_SLEEP); + if (!wkey->wk_key.ck_data) { + ret = SET_ERROR(ENOMEM); + goto error; + } + + wkey->wk_key.ck_format = CRYPTO_KEY_RAW; + wkey->wk_key.ck_length = CRYPTO_BYTES2BITS(WRAPPING_KEY_LEN); + bcopy(wkeydata, wkey->wk_key.ck_data, WRAPPING_KEY_LEN); + + /* initialize the rest of the struct */ + zfs_refcount_create(&wkey->wk_refcnt); + wkey->wk_keyformat = keyformat; + wkey->wk_salt = salt; + wkey->wk_iters = iters; + + *wkey_out = wkey; + return (0); + +error: + dsl_wrapping_key_free(wkey); + + *wkey_out = NULL; + return (ret); +} + +int +dsl_crypto_params_create_nvlist(dcp_cmd_t cmd, nvlist_t *props, + nvlist_t *crypto_args, dsl_crypto_params_t **dcp_out) +{ + int ret; + uint64_t crypt = ZIO_CRYPT_INHERIT; + uint64_t keyformat = ZFS_KEYFORMAT_NONE; + uint64_t salt = 0, iters = 0; + dsl_crypto_params_t *dcp = NULL; + dsl_wrapping_key_t *wkey = NULL; + uint8_t *wkeydata = NULL; + uint_t wkeydata_len = 0; + char *keylocation = NULL; + + dcp = kmem_zalloc(sizeof (dsl_crypto_params_t), KM_SLEEP); + if (!dcp) { + ret = SET_ERROR(ENOMEM); + goto error; + } + + /* get relevant properties from the nvlist */ + dcp->cp_cmd = cmd; + + /* get relevant arguments from the nvlists */ + if (props != NULL) { + (void) nvlist_lookup_uint64(props, + zfs_prop_to_name(ZFS_PROP_ENCRYPTION), &crypt); + (void) nvlist_lookup_uint64(props, + zfs_prop_to_name(ZFS_PROP_KEYFORMAT), &keyformat); + (void) nvlist_lookup_string(props, + zfs_prop_to_name(ZFS_PROP_KEYLOCATION), &keylocation); + (void) nvlist_lookup_uint64(props, + zfs_prop_to_name(ZFS_PROP_PBKDF2_SALT), &salt); + (void) nvlist_lookup_uint64(props, + zfs_prop_to_name(ZFS_PROP_PBKDF2_ITERS), &iters); + dcp->cp_crypt = crypt; + } + + if (crypto_args != NULL) { + (void) nvlist_lookup_uint8_array(crypto_args, "wkeydata", + &wkeydata, &wkeydata_len); + } + + /* check for valid command */ + if (dcp->cp_cmd >= DCP_CMD_MAX) { + ret = SET_ERROR(EINVAL); + goto error; + } else { + dcp->cp_cmd = cmd; + } + + /* check for valid crypt */ + if (dcp->cp_crypt >= ZIO_CRYPT_FUNCTIONS) { + ret = SET_ERROR(EINVAL); + goto error; + } else { + dcp->cp_crypt = crypt; + } + + /* check for valid keyformat */ + if (keyformat >= ZFS_KEYFORMAT_FORMATS) { + ret = SET_ERROR(EINVAL); + goto error; + } + + /* check for a valid keylocation (of any kind) and copy it in */ + if (keylocation != NULL) { + if (!zfs_prop_valid_keylocation(keylocation, B_FALSE)) { + ret = SET_ERROR(EINVAL); + goto error; + } + + dcp->cp_keylocation = spa_strdup(keylocation); + } + + /* check wrapping key length, if given */ + if (wkeydata != NULL && wkeydata_len != WRAPPING_KEY_LEN) { + ret = SET_ERROR(EINVAL); + goto error; + } + + /* if the user asked for the deault crypt, determine that now */ + if (dcp->cp_crypt == ZIO_CRYPT_ON) + dcp->cp_crypt = ZIO_CRYPT_ON_VALUE; + + /* create the wrapping key from the raw data */ + if (wkeydata != NULL) { + /* create the wrapping key with the verified parameters */ + ret = dsl_wrapping_key_create(wkeydata, keyformat, salt, + iters, &wkey); + if (ret != 0) + goto error; + + dcp->cp_wkey = wkey; + } + + /* + * Remove the encryption properties from the nvlist since they are not + * maintained through the DSL. + */ + (void) nvlist_remove_all(props, zfs_prop_to_name(ZFS_PROP_ENCRYPTION)); + (void) nvlist_remove_all(props, zfs_prop_to_name(ZFS_PROP_KEYFORMAT)); + (void) nvlist_remove_all(props, zfs_prop_to_name(ZFS_PROP_PBKDF2_SALT)); + (void) nvlist_remove_all(props, + zfs_prop_to_name(ZFS_PROP_PBKDF2_ITERS)); + + *dcp_out = dcp; + + return (0); + +error: + if (wkey != NULL) + dsl_wrapping_key_free(wkey); + if (dcp != NULL) + kmem_free(dcp, sizeof (dsl_crypto_params_t)); + + *dcp_out = NULL; + return (ret); +} + +void +dsl_crypto_params_free(dsl_crypto_params_t *dcp, boolean_t unload) +{ + if (dcp == NULL) + return; + + if (dcp->cp_keylocation != NULL) + spa_strfree(dcp->cp_keylocation); + if (unload && dcp->cp_wkey != NULL) + dsl_wrapping_key_free(dcp->cp_wkey); + + kmem_free(dcp, sizeof (dsl_crypto_params_t)); +} + +static int +spa_crypto_key_compare(const void *a, const void *b) +{ + const dsl_crypto_key_t *dcka = a; + const dsl_crypto_key_t *dckb = b; + + if (dcka->dck_obj < dckb->dck_obj) + return (-1); + if (dcka->dck_obj > dckb->dck_obj) + return (1); + return (0); +} + +static int +spa_key_mapping_compare(const void *a, const void *b) +{ + const dsl_key_mapping_t *kma = a; + const dsl_key_mapping_t *kmb = b; + + if (kma->km_dsobj < kmb->km_dsobj) + return (-1); + if (kma->km_dsobj > kmb->km_dsobj) + return (1); + return (0); +} + +static int +spa_wkey_compare(const void *a, const void *b) +{ + const dsl_wrapping_key_t *wka = a; + const dsl_wrapping_key_t *wkb = b; + + if (wka->wk_ddobj < wkb->wk_ddobj) + return (-1); + if (wka->wk_ddobj > wkb->wk_ddobj) + return (1); + return (0); +} + +void +spa_keystore_init(spa_keystore_t *sk) +{ + rw_init(&sk->sk_dk_lock, NULL, RW_DEFAULT, NULL); + rw_init(&sk->sk_km_lock, NULL, RW_DEFAULT, NULL); + rw_init(&sk->sk_wkeys_lock, NULL, RW_DEFAULT, NULL); + avl_create(&sk->sk_dsl_keys, spa_crypto_key_compare, + sizeof (dsl_crypto_key_t), + offsetof(dsl_crypto_key_t, dck_avl_link)); + avl_create(&sk->sk_key_mappings, spa_key_mapping_compare, + sizeof (dsl_key_mapping_t), + offsetof(dsl_key_mapping_t, km_avl_link)); + avl_create(&sk->sk_wkeys, spa_wkey_compare, sizeof (dsl_wrapping_key_t), + offsetof(dsl_wrapping_key_t, wk_avl_link)); +} + +void +spa_keystore_fini(spa_keystore_t *sk) +{ + dsl_wrapping_key_t *wkey; + void *cookie = NULL; + + ASSERT(avl_is_empty(&sk->sk_dsl_keys)); + ASSERT(avl_is_empty(&sk->sk_key_mappings)); + + while ((wkey = avl_destroy_nodes(&sk->sk_wkeys, &cookie)) != NULL) + dsl_wrapping_key_free(wkey); + + avl_destroy(&sk->sk_wkeys); + avl_destroy(&sk->sk_key_mappings); + avl_destroy(&sk->sk_dsl_keys); + rw_destroy(&sk->sk_wkeys_lock); + rw_destroy(&sk->sk_km_lock); + rw_destroy(&sk->sk_dk_lock); +} + +static int +dsl_dir_get_encryption_root_ddobj(dsl_dir_t *dd, uint64_t *rddobj) +{ + if (dd->dd_crypto_obj == 0) + return (SET_ERROR(ENOENT)); + + return (zap_lookup(dd->dd_pool->dp_meta_objset, dd->dd_crypto_obj, + DSL_CRYPTO_KEY_ROOT_DDOBJ, 8, 1, rddobj)); +} + +int +dsl_dir_get_encryption_version(dsl_dir_t *dd, uint64_t *version) +{ + *version = 0; + + if (dd->dd_crypto_obj == 0) + return (SET_ERROR(ENOENT)); + + /* version 0 is implied by ENOENT */ + (void) zap_lookup(dd->dd_pool->dp_meta_objset, dd->dd_crypto_obj, + DSL_CRYPTO_KEY_VERSION, 8, 1, version); + + return (0); +} + +boolean_t +dsl_dir_incompatible_encryption_version(dsl_dir_t *dd) +{ + int ret; + uint64_t version = 0; + + ret = dsl_dir_get_encryption_version(dd, &version); + if (ret != 0) + return (B_FALSE); + + return (version != ZIO_CRYPT_KEY_CURRENT_VERSION); +} + +static int +spa_keystore_wkey_hold_ddobj_impl(spa_t *spa, uint64_t ddobj, + void *tag, dsl_wrapping_key_t **wkey_out) +{ + int ret; + dsl_wrapping_key_t search_wkey; + dsl_wrapping_key_t *found_wkey; + + ASSERT(RW_LOCK_HELD(&spa->spa_keystore.sk_wkeys_lock)); + + /* init the search wrapping key */ + search_wkey.wk_ddobj = ddobj; + + /* lookup the wrapping key */ + found_wkey = avl_find(&spa->spa_keystore.sk_wkeys, &search_wkey, NULL); + if (!found_wkey) { + ret = SET_ERROR(ENOENT); + goto error; + } + + /* increment the refcount */ + dsl_wrapping_key_hold(found_wkey, tag); + + *wkey_out = found_wkey; + return (0); + +error: + *wkey_out = NULL; + return (ret); +} + +static int +spa_keystore_wkey_hold_dd(spa_t *spa, dsl_dir_t *dd, void *tag, + dsl_wrapping_key_t **wkey_out) +{ + int ret; + dsl_wrapping_key_t *wkey; + uint64_t rddobj; + boolean_t locked = B_FALSE; + + if (!RW_WRITE_HELD(&spa->spa_keystore.sk_wkeys_lock)) { + rw_enter(&spa->spa_keystore.sk_wkeys_lock, RW_READER); + locked = B_TRUE; + } + + /* get the ddobj that the keylocation property was inherited from */ + ret = dsl_dir_get_encryption_root_ddobj(dd, &rddobj); + if (ret != 0) + goto error; + + /* lookup the wkey in the avl tree */ + ret = spa_keystore_wkey_hold_ddobj_impl(spa, rddobj, tag, &wkey); + if (ret != 0) + goto error; + + /* unlock the wkey tree if we locked it */ + if (locked) + rw_exit(&spa->spa_keystore.sk_wkeys_lock); + + *wkey_out = wkey; + return (0); + +error: + if (locked) + rw_exit(&spa->spa_keystore.sk_wkeys_lock); + + *wkey_out = NULL; + return (ret); +} + +int +dsl_crypto_can_set_keylocation(const char *dsname, const char *keylocation) +{ + int ret = 0; + dsl_dir_t *dd = NULL; + dsl_pool_t *dp = NULL; + uint64_t rddobj; + + /* hold the dsl dir */ + ret = dsl_pool_hold(dsname, FTAG, &dp); + if (ret != 0) + goto out; + + ret = dsl_dir_hold(dp, dsname, FTAG, &dd, NULL); + if (ret != 0) + goto out; + + /* if dd is not encrypted, the value may only be "none" */ + if (dd->dd_crypto_obj == 0) { + if (strcmp(keylocation, "none") != 0) { + ret = SET_ERROR(EACCES); + goto out; + } + + ret = 0; + goto out; + } + + /* check for a valid keylocation for encrypted datasets */ + if (!zfs_prop_valid_keylocation(keylocation, B_TRUE)) { + ret = SET_ERROR(EINVAL); + goto out; + } + + /* check that this is an encryption root */ + ret = dsl_dir_get_encryption_root_ddobj(dd, &rddobj); + if (ret != 0) + goto out; + + if (rddobj != dd->dd_object) { + ret = SET_ERROR(EACCES); + goto out; + } + + dsl_dir_rele(dd, FTAG); + dsl_pool_rele(dp, FTAG); + + return (0); + +out: + if (dd != NULL) + dsl_dir_rele(dd, FTAG); + if (dp != NULL) + dsl_pool_rele(dp, FTAG); + + return (ret); +} + +static void +dsl_crypto_key_free(dsl_crypto_key_t *dck) +{ + ASSERT(zfs_refcount_count(&dck->dck_holds) == 0); + + /* destroy the zio_crypt_key_t */ + zio_crypt_key_destroy(&dck->dck_key); + + /* free the refcount, wrapping key, and lock */ + zfs_refcount_destroy(&dck->dck_holds); + if (dck->dck_wkey) + dsl_wrapping_key_rele(dck->dck_wkey, dck); + + /* free the key */ + kmem_free(dck, sizeof (dsl_crypto_key_t)); +} + +static void +dsl_crypto_key_rele(dsl_crypto_key_t *dck, void *tag) +{ + if (zfs_refcount_remove(&dck->dck_holds, tag) == 0) + dsl_crypto_key_free(dck); +} + +static int +dsl_crypto_key_open(objset_t *mos, dsl_wrapping_key_t *wkey, + uint64_t dckobj, void *tag, dsl_crypto_key_t **dck_out) +{ + int ret; + uint64_t crypt = 0, guid = 0, version = 0; + uint8_t raw_keydata[MASTER_KEY_MAX_LEN]; + uint8_t raw_hmac_keydata[SHA512_HMAC_KEYLEN]; + uint8_t iv[WRAPPING_IV_LEN]; + uint8_t mac[WRAPPING_MAC_LEN]; + dsl_crypto_key_t *dck; + + /* allocate and initialize the key */ + dck = kmem_zalloc(sizeof (dsl_crypto_key_t), KM_SLEEP); + if (!dck) + return (SET_ERROR(ENOMEM)); + + /* fetch all of the values we need from the ZAP */ + ret = zap_lookup(mos, dckobj, DSL_CRYPTO_KEY_CRYPTO_SUITE, 8, 1, + &crypt); + if (ret != 0) + goto error; + + ret = zap_lookup(mos, dckobj, DSL_CRYPTO_KEY_GUID, 8, 1, &guid); + if (ret != 0) + goto error; + + ret = zap_lookup(mos, dckobj, DSL_CRYPTO_KEY_MASTER_KEY, 1, + MASTER_KEY_MAX_LEN, raw_keydata); + if (ret != 0) + goto error; + + ret = zap_lookup(mos, dckobj, DSL_CRYPTO_KEY_HMAC_KEY, 1, + SHA512_HMAC_KEYLEN, raw_hmac_keydata); + if (ret != 0) + goto error; + + ret = zap_lookup(mos, dckobj, DSL_CRYPTO_KEY_IV, 1, WRAPPING_IV_LEN, + iv); + if (ret != 0) + goto error; + + ret = zap_lookup(mos, dckobj, DSL_CRYPTO_KEY_MAC, 1, WRAPPING_MAC_LEN, + mac); + if (ret != 0) + goto error; + + /* the initial on-disk format for encryption did not have a version */ + (void) zap_lookup(mos, dckobj, DSL_CRYPTO_KEY_VERSION, 8, 1, &version); + + /* + * Unwrap the keys. If there is an error return EACCES to indicate + * an authentication failure. + */ + ret = zio_crypt_key_unwrap(&wkey->wk_key, crypt, version, guid, + raw_keydata, raw_hmac_keydata, iv, mac, &dck->dck_key); + if (ret != 0) { + ret = SET_ERROR(EACCES); + goto error; + } + + /* finish initializing the dsl_crypto_key_t */ + zfs_refcount_create(&dck->dck_holds); + dsl_wrapping_key_hold(wkey, dck); + dck->dck_wkey = wkey; + dck->dck_obj = dckobj; + (void) zfs_refcount_add(&dck->dck_holds, tag); + + *dck_out = dck; + return (0); + +error: + if (dck != NULL) { + bzero(dck, sizeof (dsl_crypto_key_t)); + kmem_free(dck, sizeof (dsl_crypto_key_t)); + } + + *dck_out = NULL; + return (ret); +} + +static int +spa_keystore_dsl_key_hold_impl(spa_t *spa, uint64_t dckobj, void *tag, + dsl_crypto_key_t **dck_out) +{ + int ret; + dsl_crypto_key_t search_dck; + dsl_crypto_key_t *found_dck; + + ASSERT(RW_LOCK_HELD(&spa->spa_keystore.sk_dk_lock)); + + /* init the search key */ + search_dck.dck_obj = dckobj; + + /* find the matching key in the keystore */ + found_dck = avl_find(&spa->spa_keystore.sk_dsl_keys, &search_dck, NULL); + if (!found_dck) { + ret = SET_ERROR(ENOENT); + goto error; + } + + /* increment the refcount */ + (void) zfs_refcount_add(&found_dck->dck_holds, tag); + + *dck_out = found_dck; + return (0); + +error: + *dck_out = NULL; + return (ret); +} + +static int +spa_keystore_dsl_key_hold_dd(spa_t *spa, dsl_dir_t *dd, void *tag, + dsl_crypto_key_t **dck_out) +{ + int ret; + avl_index_t where; + dsl_crypto_key_t *dck_io = NULL, *dck_ks = NULL; + dsl_wrapping_key_t *wkey = NULL; + uint64_t dckobj = dd->dd_crypto_obj; + + /* Lookup the key in the tree of currently loaded keys */ + rw_enter(&spa->spa_keystore.sk_dk_lock, RW_READER); + ret = spa_keystore_dsl_key_hold_impl(spa, dckobj, tag, &dck_ks); + rw_exit(&spa->spa_keystore.sk_dk_lock); + if (ret == 0) { + *dck_out = dck_ks; + return (0); + } + + /* Lookup the wrapping key from the keystore */ + ret = spa_keystore_wkey_hold_dd(spa, dd, FTAG, &wkey); + if (ret != 0) { + *dck_out = NULL; + return (SET_ERROR(EACCES)); + } + + /* Read the key from disk */ + ret = dsl_crypto_key_open(spa->spa_meta_objset, wkey, dckobj, + tag, &dck_io); + if (ret != 0) { + dsl_wrapping_key_rele(wkey, FTAG); + *dck_out = NULL; + return (ret); + } + + /* + * Add the key to the keystore. It may already exist if it was + * added while performing the read from disk. In this case discard + * it and return the key from the keystore. + */ + rw_enter(&spa->spa_keystore.sk_dk_lock, RW_WRITER); + ret = spa_keystore_dsl_key_hold_impl(spa, dckobj, tag, &dck_ks); + if (ret != 0) { + (void) avl_find(&spa->spa_keystore.sk_dsl_keys, dck_io, &where); + avl_insert(&spa->spa_keystore.sk_dsl_keys, dck_io, where); + *dck_out = dck_io; + } else { + dsl_crypto_key_free(dck_io); + *dck_out = dck_ks; + } + + /* Release the wrapping key (the dsl key now has a reference to it) */ + dsl_wrapping_key_rele(wkey, FTAG); + rw_exit(&spa->spa_keystore.sk_dk_lock); + + return (0); +} + +void +spa_keystore_dsl_key_rele(spa_t *spa, dsl_crypto_key_t *dck, void *tag) +{ + rw_enter(&spa->spa_keystore.sk_dk_lock, RW_WRITER); + + if (zfs_refcount_remove(&dck->dck_holds, tag) == 0) { + avl_remove(&spa->spa_keystore.sk_dsl_keys, dck); + dsl_crypto_key_free(dck); + } + + rw_exit(&spa->spa_keystore.sk_dk_lock); +} + +int +spa_keystore_load_wkey_impl(spa_t *spa, dsl_wrapping_key_t *wkey) +{ + int ret; + avl_index_t where; + dsl_wrapping_key_t *found_wkey; + + rw_enter(&spa->spa_keystore.sk_wkeys_lock, RW_WRITER); + + /* insert the wrapping key into the keystore */ + found_wkey = avl_find(&spa->spa_keystore.sk_wkeys, wkey, &where); + if (found_wkey != NULL) { + ret = SET_ERROR(EEXIST); + goto error_unlock; + } + avl_insert(&spa->spa_keystore.sk_wkeys, wkey, where); + + rw_exit(&spa->spa_keystore.sk_wkeys_lock); + + return (0); + +error_unlock: + rw_exit(&spa->spa_keystore.sk_wkeys_lock); + return (ret); +} + +int +spa_keystore_load_wkey(const char *dsname, dsl_crypto_params_t *dcp, + boolean_t noop) +{ + int ret; + dsl_dir_t *dd = NULL; + dsl_crypto_key_t *dck = NULL; + dsl_wrapping_key_t *wkey = dcp->cp_wkey; + dsl_pool_t *dp = NULL; + uint64_t keyformat, salt, iters; + + /* + * We don't validate the wrapping key's keyformat, salt, or iters + * since they will never be needed after the DCK has been wrapped. + */ + if (dcp->cp_wkey == NULL || + dcp->cp_cmd != DCP_CMD_NONE || + dcp->cp_crypt != ZIO_CRYPT_INHERIT || + dcp->cp_keylocation != NULL) + return (SET_ERROR(EINVAL)); + + ret = dsl_pool_hold(dsname, FTAG, &dp); + if (ret != 0) + goto error; + + if (!spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_ENCRYPTION)) { + ret = (SET_ERROR(ENOTSUP)); + goto error; + } + + /* hold the dsl dir */ + ret = dsl_dir_hold(dp, dsname, FTAG, &dd, NULL); + if (ret != 0) + goto error; + + /* initialize the wkey's ddobj */ + wkey->wk_ddobj = dd->dd_object; + + /* verify that the wkey is correct by opening its dsl key */ + ret = dsl_crypto_key_open(dp->dp_meta_objset, wkey, + dd->dd_crypto_obj, FTAG, &dck); + if (ret != 0) + goto error; + + /* initialize the wkey encryption parameters from the DSL Crypto Key */ + ret = zap_lookup(dp->dp_meta_objset, dd->dd_crypto_obj, + zfs_prop_to_name(ZFS_PROP_KEYFORMAT), 8, 1, &keyformat); + if (ret != 0) + goto error; + + ret = zap_lookup(dp->dp_meta_objset, dd->dd_crypto_obj, + zfs_prop_to_name(ZFS_PROP_PBKDF2_SALT), 8, 1, &salt); + if (ret != 0) + goto error; + + ret = zap_lookup(dp->dp_meta_objset, dd->dd_crypto_obj, + zfs_prop_to_name(ZFS_PROP_PBKDF2_ITERS), 8, 1, &iters); + if (ret != 0) + goto error; + + ASSERT3U(keyformat, <, ZFS_KEYFORMAT_FORMATS); + ASSERT3U(keyformat, !=, ZFS_KEYFORMAT_NONE); + IMPLY(keyformat == ZFS_KEYFORMAT_PASSPHRASE, iters != 0); + IMPLY(keyformat == ZFS_KEYFORMAT_PASSPHRASE, salt != 0); + IMPLY(keyformat != ZFS_KEYFORMAT_PASSPHRASE, iters == 0); + IMPLY(keyformat != ZFS_KEYFORMAT_PASSPHRASE, salt == 0); + + wkey->wk_keyformat = keyformat; + wkey->wk_salt = salt; + wkey->wk_iters = iters; + + /* + * At this point we have verified the wkey and confirmed that it can + * be used to decrypt a DSL Crypto Key. We can simply cleanup and + * return if this is all the user wanted to do. + */ + if (noop) + goto error; + + /* insert the wrapping key into the keystore */ + ret = spa_keystore_load_wkey_impl(dp->dp_spa, wkey); + if (ret != 0) + goto error; + + dsl_crypto_key_rele(dck, FTAG); + dsl_dir_rele(dd, FTAG); + dsl_pool_rele(dp, FTAG); + + return (0); + +error: + if (dck != NULL) + dsl_crypto_key_rele(dck, FTAG); + if (dd != NULL) + dsl_dir_rele(dd, FTAG); + if (dp != NULL) + dsl_pool_rele(dp, FTAG); + + return (ret); +} + +int +spa_keystore_unload_wkey_impl(spa_t *spa, uint64_t ddobj) +{ + int ret; + dsl_wrapping_key_t search_wkey; + dsl_wrapping_key_t *found_wkey; + + /* init the search wrapping key */ + search_wkey.wk_ddobj = ddobj; + + rw_enter(&spa->spa_keystore.sk_wkeys_lock, RW_WRITER); + + /* remove the wrapping key from the keystore */ + found_wkey = avl_find(&spa->spa_keystore.sk_wkeys, + &search_wkey, NULL); + if (!found_wkey) { + ret = SET_ERROR(EACCES); + goto error_unlock; + } else if (zfs_refcount_count(&found_wkey->wk_refcnt) != 0) { + ret = SET_ERROR(EBUSY); + goto error_unlock; + } + avl_remove(&spa->spa_keystore.sk_wkeys, found_wkey); + + rw_exit(&spa->spa_keystore.sk_wkeys_lock); + + /* free the wrapping key */ + dsl_wrapping_key_free(found_wkey); + + return (0); + +error_unlock: + rw_exit(&spa->spa_keystore.sk_wkeys_lock); + return (ret); +} + +int +spa_keystore_unload_wkey(const char *dsname) +{ + int ret = 0; + dsl_dir_t *dd = NULL; + dsl_pool_t *dp = NULL; + spa_t *spa = NULL; + + ret = spa_open(dsname, &spa, FTAG); + if (ret != 0) + return (ret); + + /* + * Wait for any outstanding txg IO to complete, releasing any + * remaining references on the wkey. + */ + if (spa_mode(spa) != FREAD) + txg_wait_synced(spa->spa_dsl_pool, 0); + + spa_close(spa, FTAG); + + /* hold the dsl dir */ + ret = dsl_pool_hold(dsname, FTAG, &dp); + if (ret != 0) + goto error; + + if (!spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_ENCRYPTION)) { + ret = (SET_ERROR(ENOTSUP)); + goto error; + } + + ret = dsl_dir_hold(dp, dsname, FTAG, &dd, NULL); + if (ret != 0) + goto error; + + /* unload the wkey */ + ret = spa_keystore_unload_wkey_impl(dp->dp_spa, dd->dd_object); + if (ret != 0) + goto error; + + dsl_dir_rele(dd, FTAG); + dsl_pool_rele(dp, FTAG); + + return (0); + +error: + if (dd != NULL) + dsl_dir_rele(dd, FTAG); + if (dp != NULL) + dsl_pool_rele(dp, FTAG); + + return (ret); +} + +void +key_mapping_add_ref(dsl_key_mapping_t *km, void *tag) +{ + ASSERT3U(zfs_refcount_count(&km->km_refcnt), >=, 1); + (void) zfs_refcount_add(&km->km_refcnt, tag); +} + +/* + * The locking here is a little tricky to ensure we don't cause unnecessary + * performance problems. We want to release a key mapping whenever someone + * decrements the refcount to 0, but freeing the mapping requires removing + * it from the spa_keystore, which requires holding sk_km_lock as a writer. + * Most of the time we don't want to hold this lock as a writer, since the + * same lock is held as a reader for each IO that needs to encrypt / decrypt + * data for any dataset and in practice we will only actually free the + * mapping after unmounting a dataset. + */ +void +key_mapping_rele(spa_t *spa, dsl_key_mapping_t *km, void *tag) +{ + ASSERT3U(zfs_refcount_count(&km->km_refcnt), >=, 1); + + if (zfs_refcount_remove(&km->km_refcnt, tag) != 0) + return; + + /* + * We think we are going to need to free the mapping. Add a + * reference to prevent most other releasers from thinking + * this might be their responsibility. This is inherently + * racy, so we will confirm that we are legitimately the + * last holder once we have the sk_km_lock as a writer. + */ + (void) zfs_refcount_add(&km->km_refcnt, FTAG); + + rw_enter(&spa->spa_keystore.sk_km_lock, RW_WRITER); + if (zfs_refcount_remove(&km->km_refcnt, FTAG) != 0) { + rw_exit(&spa->spa_keystore.sk_km_lock); + return; + } + + avl_remove(&spa->spa_keystore.sk_key_mappings, km); + rw_exit(&spa->spa_keystore.sk_km_lock); + + spa_keystore_dsl_key_rele(spa, km->km_key, km); + kmem_free(km, sizeof (dsl_key_mapping_t)); +} + +int +spa_keystore_create_mapping(spa_t *spa, dsl_dataset_t *ds, void *tag, + dsl_key_mapping_t **km_out) +{ + int ret; + avl_index_t where; + dsl_key_mapping_t *km, *found_km; + boolean_t should_free = B_FALSE; + + /* Allocate and initialize the mapping */ + km = kmem_zalloc(sizeof (dsl_key_mapping_t), KM_SLEEP); + zfs_refcount_create(&km->km_refcnt); + + ret = spa_keystore_dsl_key_hold_dd(spa, ds->ds_dir, km, &km->km_key); + if (ret != 0) { + zfs_refcount_destroy(&km->km_refcnt); + kmem_free(km, sizeof (dsl_key_mapping_t)); + + if (km_out != NULL) + *km_out = NULL; + return (ret); + } + + km->km_dsobj = ds->ds_object; + + rw_enter(&spa->spa_keystore.sk_km_lock, RW_WRITER); + + /* + * If a mapping already exists, simply increment its refcount and + * cleanup the one we made. We want to allocate / free outside of + * the lock because this lock is also used by the zio layer to lookup + * key mappings. Otherwise, use the one we created. Normally, there will + * only be one active reference at a time (the objset owner), but there + * are times when there could be multiple async users. + */ + found_km = avl_find(&spa->spa_keystore.sk_key_mappings, km, &where); + if (found_km != NULL) { + should_free = B_TRUE; + (void) zfs_refcount_add(&found_km->km_refcnt, tag); + if (km_out != NULL) + *km_out = found_km; + } else { + (void) zfs_refcount_add(&km->km_refcnt, tag); + avl_insert(&spa->spa_keystore.sk_key_mappings, km, where); + if (km_out != NULL) + *km_out = km; + } + + rw_exit(&spa->spa_keystore.sk_km_lock); + + if (should_free) { + spa_keystore_dsl_key_rele(spa, km->km_key, km); + zfs_refcount_destroy(&km->km_refcnt); + kmem_free(km, sizeof (dsl_key_mapping_t)); + } + + return (0); +} + +int +spa_keystore_remove_mapping(spa_t *spa, uint64_t dsobj, void *tag) +{ + int ret; + dsl_key_mapping_t search_km; + dsl_key_mapping_t *found_km; + + /* init the search key mapping */ + search_km.km_dsobj = dsobj; + + rw_enter(&spa->spa_keystore.sk_km_lock, RW_READER); + + /* find the matching mapping */ + found_km = avl_find(&spa->spa_keystore.sk_key_mappings, + &search_km, NULL); + if (found_km == NULL) { + ret = SET_ERROR(ENOENT); + goto error_unlock; + } + + rw_exit(&spa->spa_keystore.sk_km_lock); + + key_mapping_rele(spa, found_km, tag); + + return (0); + +error_unlock: + rw_exit(&spa->spa_keystore.sk_km_lock); + return (ret); +} + +/* + * This function is primarily used by the zio and arc layer to lookup + * DSL Crypto Keys for encryption. Callers must release the key with + * spa_keystore_dsl_key_rele(). The function may also be called with + * dck_out == NULL and tag == NULL to simply check that a key exists + * without getting a reference to it. + */ +int +spa_keystore_lookup_key(spa_t *spa, uint64_t dsobj, void *tag, + dsl_crypto_key_t **dck_out) +{ + int ret; + dsl_key_mapping_t search_km; + dsl_key_mapping_t *found_km; + + ASSERT((tag != NULL && dck_out != NULL) || + (tag == NULL && dck_out == NULL)); + + /* init the search key mapping */ + search_km.km_dsobj = dsobj; + + rw_enter(&spa->spa_keystore.sk_km_lock, RW_READER); + + /* remove the mapping from the tree */ + found_km = avl_find(&spa->spa_keystore.sk_key_mappings, &search_km, + NULL); + if (found_km == NULL) { + ret = SET_ERROR(ENOENT); + goto error_unlock; + } + + if (found_km && tag) + (void) zfs_refcount_add(&found_km->km_key->dck_holds, tag); + + rw_exit(&spa->spa_keystore.sk_km_lock); + + if (dck_out != NULL) + *dck_out = found_km->km_key; + return (0); + +error_unlock: + rw_exit(&spa->spa_keystore.sk_km_lock); + + if (dck_out != NULL) + *dck_out = NULL; + return (ret); +} + +static int +dmu_objset_check_wkey_loaded(dsl_dir_t *dd) +{ + int ret; + dsl_wrapping_key_t *wkey = NULL; + + ret = spa_keystore_wkey_hold_dd(dd->dd_pool->dp_spa, dd, FTAG, + &wkey); + if (ret != 0) + return (SET_ERROR(EACCES)); + + dsl_wrapping_key_rele(wkey, FTAG); + + return (0); +} + +static zfs_keystatus_t +dsl_dataset_get_keystatus(dsl_dir_t *dd) +{ + /* check if this dd has a has a dsl key */ + if (dd->dd_crypto_obj == 0) + return (ZFS_KEYSTATUS_NONE); + + return (dmu_objset_check_wkey_loaded(dd) == 0 ? + ZFS_KEYSTATUS_AVAILABLE : ZFS_KEYSTATUS_UNAVAILABLE); +} + +static int +dsl_dir_get_crypt(dsl_dir_t *dd, uint64_t *crypt) +{ + if (dd->dd_crypto_obj == 0) { + *crypt = ZIO_CRYPT_OFF; + return (0); + } + + return (zap_lookup(dd->dd_pool->dp_meta_objset, dd->dd_crypto_obj, + DSL_CRYPTO_KEY_CRYPTO_SUITE, 8, 1, crypt)); +} + +static void +dsl_crypto_key_sync_impl(objset_t *mos, uint64_t dckobj, uint64_t crypt, + uint64_t root_ddobj, uint64_t guid, uint8_t *iv, uint8_t *mac, + uint8_t *keydata, uint8_t *hmac_keydata, uint64_t keyformat, + uint64_t salt, uint64_t iters, dmu_tx_t *tx) +{ + VERIFY0(zap_update(mos, dckobj, DSL_CRYPTO_KEY_CRYPTO_SUITE, 8, 1, + &crypt, tx)); + VERIFY0(zap_update(mos, dckobj, DSL_CRYPTO_KEY_ROOT_DDOBJ, 8, 1, + &root_ddobj, tx)); + VERIFY0(zap_update(mos, dckobj, DSL_CRYPTO_KEY_GUID, 8, 1, + &guid, tx)); + VERIFY0(zap_update(mos, dckobj, DSL_CRYPTO_KEY_IV, 1, WRAPPING_IV_LEN, + iv, tx)); + VERIFY0(zap_update(mos, dckobj, DSL_CRYPTO_KEY_MAC, 1, WRAPPING_MAC_LEN, + mac, tx)); + VERIFY0(zap_update(mos, dckobj, DSL_CRYPTO_KEY_MASTER_KEY, 1, + MASTER_KEY_MAX_LEN, keydata, tx)); + VERIFY0(zap_update(mos, dckobj, DSL_CRYPTO_KEY_HMAC_KEY, 1, + SHA512_HMAC_KEYLEN, hmac_keydata, tx)); + VERIFY0(zap_update(mos, dckobj, zfs_prop_to_name(ZFS_PROP_KEYFORMAT), + 8, 1, &keyformat, tx)); + VERIFY0(zap_update(mos, dckobj, zfs_prop_to_name(ZFS_PROP_PBKDF2_SALT), + 8, 1, &salt, tx)); + VERIFY0(zap_update(mos, dckobj, zfs_prop_to_name(ZFS_PROP_PBKDF2_ITERS), + 8, 1, &iters, tx)); +} + +static void +dsl_crypto_key_sync(dsl_crypto_key_t *dck, dmu_tx_t *tx) +{ + zio_crypt_key_t *key = &dck->dck_key; + dsl_wrapping_key_t *wkey = dck->dck_wkey; + uint8_t keydata[MASTER_KEY_MAX_LEN]; + uint8_t hmac_keydata[SHA512_HMAC_KEYLEN]; + uint8_t iv[WRAPPING_IV_LEN]; + uint8_t mac[WRAPPING_MAC_LEN]; + + ASSERT(dmu_tx_is_syncing(tx)); + ASSERT3U(key->zk_crypt, <, ZIO_CRYPT_FUNCTIONS); + + /* encrypt and store the keys along with the IV and MAC */ + VERIFY0(zio_crypt_key_wrap(&dck->dck_wkey->wk_key, key, iv, mac, + keydata, hmac_keydata)); + + /* update the ZAP with the obtained values */ + dsl_crypto_key_sync_impl(tx->tx_pool->dp_meta_objset, dck->dck_obj, + key->zk_crypt, wkey->wk_ddobj, key->zk_guid, iv, mac, keydata, + hmac_keydata, wkey->wk_keyformat, wkey->wk_salt, wkey->wk_iters, + tx); +} + +typedef struct spa_keystore_change_key_args { + const char *skcka_dsname; + dsl_crypto_params_t *skcka_cp; +} spa_keystore_change_key_args_t; + +static int +spa_keystore_change_key_check(void *arg, dmu_tx_t *tx) +{ + int ret; + dsl_dir_t *dd = NULL; + dsl_pool_t *dp = dmu_tx_pool(tx); + spa_keystore_change_key_args_t *skcka = arg; + dsl_crypto_params_t *dcp = skcka->skcka_cp; + uint64_t rddobj; + + /* check for the encryption feature */ + if (!spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_ENCRYPTION)) { + ret = SET_ERROR(ENOTSUP); + goto error; + } + + /* check for valid key change command */ + if (dcp->cp_cmd != DCP_CMD_NEW_KEY && + dcp->cp_cmd != DCP_CMD_INHERIT && + dcp->cp_cmd != DCP_CMD_FORCE_NEW_KEY && + dcp->cp_cmd != DCP_CMD_FORCE_INHERIT) { + ret = SET_ERROR(EINVAL); + goto error; + } + + /* hold the dd */ + ret = dsl_dir_hold(dp, skcka->skcka_dsname, FTAG, &dd, NULL); + if (ret != 0) + goto error; + + /* verify that the dataset is encrypted */ + if (dd->dd_crypto_obj == 0) { + ret = SET_ERROR(EINVAL); + goto error; + } + + /* clones must always use their origin's key */ + if (dsl_dir_is_clone(dd)) { + ret = SET_ERROR(EINVAL); + goto error; + } + + /* lookup the ddobj we are inheriting the keylocation from */ + ret = dsl_dir_get_encryption_root_ddobj(dd, &rddobj); + if (ret != 0) + goto error; + + /* Handle inheritance */ + if (dcp->cp_cmd == DCP_CMD_INHERIT || + dcp->cp_cmd == DCP_CMD_FORCE_INHERIT) { + /* no other encryption params should be given */ + if (dcp->cp_crypt != ZIO_CRYPT_INHERIT || + dcp->cp_keylocation != NULL || + dcp->cp_wkey != NULL) { + ret = SET_ERROR(EINVAL); + goto error; + } + + /* check that this is an encryption root */ + if (dd->dd_object != rddobj) { + ret = SET_ERROR(EINVAL); + goto error; + } + + /* check that the parent is encrypted */ + if (dd->dd_parent->dd_crypto_obj == 0) { + ret = SET_ERROR(EINVAL); + goto error; + } + + /* if we are rewrapping check that both keys are loaded */ + if (dcp->cp_cmd == DCP_CMD_INHERIT) { + ret = dmu_objset_check_wkey_loaded(dd); + if (ret != 0) + goto error; + + ret = dmu_objset_check_wkey_loaded(dd->dd_parent); + if (ret != 0) + goto error; + } + + dsl_dir_rele(dd, FTAG); + return (0); + } + + /* handle forcing an encryption root without rewrapping */ + if (dcp->cp_cmd == DCP_CMD_FORCE_NEW_KEY) { + /* no other encryption params should be given */ + if (dcp->cp_crypt != ZIO_CRYPT_INHERIT || + dcp->cp_keylocation != NULL || + dcp->cp_wkey != NULL) { + ret = SET_ERROR(EINVAL); + goto error; + } + + /* check that this is not an encryption root */ + if (dd->dd_object == rddobj) { + ret = SET_ERROR(EINVAL); + goto error; + } + + dsl_dir_rele(dd, FTAG); + return (0); + } + + /* crypt cannot be changed after creation */ + if (dcp->cp_crypt != ZIO_CRYPT_INHERIT) { + ret = SET_ERROR(EINVAL); + goto error; + } + + /* we are not inheritting our parent's wkey so we need one ourselves */ + if (dcp->cp_wkey == NULL) { + ret = SET_ERROR(EINVAL); + goto error; + } + + /* check for a valid keyformat for the new wrapping key */ + if (dcp->cp_wkey->wk_keyformat >= ZFS_KEYFORMAT_FORMATS || + dcp->cp_wkey->wk_keyformat == ZFS_KEYFORMAT_NONE) { + ret = SET_ERROR(EINVAL); + goto error; + } + + /* + * If this dataset is not currently an encryption root we need a new + * keylocation for this dataset's new wrapping key. Otherwise we can + * just keep the one we already had. + */ + if (dd->dd_object != rddobj && dcp->cp_keylocation == NULL) { + ret = SET_ERROR(EINVAL); + goto error; + } + + /* check that the keylocation is valid if it is not NULL */ + if (dcp->cp_keylocation != NULL && + !zfs_prop_valid_keylocation(dcp->cp_keylocation, B_TRUE)) { + ret = SET_ERROR(EINVAL); + goto error; + } + + /* passphrases require pbkdf2 salt and iters */ + if (dcp->cp_wkey->wk_keyformat == ZFS_KEYFORMAT_PASSPHRASE) { + if (dcp->cp_wkey->wk_salt == 0 || + dcp->cp_wkey->wk_iters < MIN_PBKDF2_ITERATIONS) { + ret = SET_ERROR(EINVAL); + goto error; + } + } else { + if (dcp->cp_wkey->wk_salt != 0 || dcp->cp_wkey->wk_iters != 0) { + ret = SET_ERROR(EINVAL); + goto error; + } + } + + /* make sure the dd's wkey is loaded */ + ret = dmu_objset_check_wkey_loaded(dd); + if (ret != 0) + goto error; + + dsl_dir_rele(dd, FTAG); + + return (0); + +error: + if (dd != NULL) + dsl_dir_rele(dd, FTAG); + + return (ret); +} + + +static void +spa_keystore_change_key_sync_impl(uint64_t rddobj, uint64_t ddobj, + uint64_t new_rddobj, dsl_wrapping_key_t *wkey, dmu_tx_t *tx) +{ + zap_cursor_t *zc; + zap_attribute_t *za; + dsl_pool_t *dp = dmu_tx_pool(tx); + dsl_dir_t *dd = NULL; + dsl_crypto_key_t *dck = NULL; + uint64_t curr_rddobj; + + ASSERT(RW_WRITE_HELD(&dp->dp_spa->spa_keystore.sk_wkeys_lock)); + + /* hold the dd */ + VERIFY0(dsl_dir_hold_obj(dp, ddobj, NULL, FTAG, &dd)); + + /* ignore hidden dsl dirs */ + if (dd->dd_myname[0] == '$' || dd->dd_myname[0] == '%') { + dsl_dir_rele(dd, FTAG); + return; + } + + /* + * Stop recursing if this dsl dir didn't inherit from the root + * or if this dd is a clone. + */ + VERIFY0(dsl_dir_get_encryption_root_ddobj(dd, &curr_rddobj)); + if (curr_rddobj != rddobj || dsl_dir_is_clone(dd)) { + dsl_dir_rele(dd, FTAG); + return; + } + + /* + * If we don't have a wrapping key just update the dck to reflect the + * new encryption root. Otherwise rewrap the entire dck and re-sync it + * to disk. + */ + if (wkey == NULL) { + VERIFY0(zap_update(dp->dp_meta_objset, dd->dd_crypto_obj, + DSL_CRYPTO_KEY_ROOT_DDOBJ, 8, 1, &new_rddobj, tx)); + } else { + VERIFY0(spa_keystore_dsl_key_hold_dd(dp->dp_spa, dd, + FTAG, &dck)); + dsl_wrapping_key_hold(wkey, dck); + dsl_wrapping_key_rele(dck->dck_wkey, dck); + dck->dck_wkey = wkey; + dsl_crypto_key_sync(dck, tx); + spa_keystore_dsl_key_rele(dp->dp_spa, dck, FTAG); + } + + zc = kmem_alloc(sizeof (zap_cursor_t), KM_SLEEP); + za = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP); + + /* Recurse into all child dsl dirs. */ + for (zap_cursor_init(zc, dp->dp_meta_objset, + dsl_dir_phys(dd)->dd_child_dir_zapobj); + zap_cursor_retrieve(zc, za) == 0; + zap_cursor_advance(zc)) { + spa_keystore_change_key_sync_impl(rddobj, + za->za_first_integer, new_rddobj, wkey, tx); + } + zap_cursor_fini(zc); + + kmem_free(za, sizeof (zap_attribute_t)); + kmem_free(zc, sizeof (zap_cursor_t)); + + dsl_dir_rele(dd, FTAG); +} + +static void +spa_keystore_change_key_sync(void *arg, dmu_tx_t *tx) +{ + dsl_dataset_t *ds; + avl_index_t where; + dsl_pool_t *dp = dmu_tx_pool(tx); + spa_t *spa = dp->dp_spa; + spa_keystore_change_key_args_t *skcka = arg; + dsl_crypto_params_t *dcp = skcka->skcka_cp; + dsl_wrapping_key_t *wkey = NULL, *found_wkey; + dsl_wrapping_key_t wkey_search; + char *keylocation = dcp->cp_keylocation; + uint64_t rddobj, new_rddobj; + + /* create and initialize the wrapping key */ + VERIFY0(dsl_dataset_hold(dp, skcka->skcka_dsname, FTAG, &ds)); + ASSERT(!ds->ds_is_snapshot); + + if (dcp->cp_cmd == DCP_CMD_NEW_KEY || + dcp->cp_cmd == DCP_CMD_FORCE_NEW_KEY) { + /* + * We are changing to a new wkey. Set additional properties + * which can be sent along with this ioctl. Note that this + * command can set keylocation even if it can't normally be + * set via 'zfs set' due to a non-local keylocation. + */ + if (dcp->cp_cmd == DCP_CMD_NEW_KEY) { + wkey = dcp->cp_wkey; + wkey->wk_ddobj = ds->ds_dir->dd_object; + } else { + keylocation = "prompt"; + } + + if (keylocation != NULL) { + dsl_prop_set_sync_impl(ds, + zfs_prop_to_name(ZFS_PROP_KEYLOCATION), + ZPROP_SRC_LOCAL, 1, strlen(keylocation) + 1, + keylocation, tx); + } + + VERIFY0(dsl_dir_get_encryption_root_ddobj(ds->ds_dir, &rddobj)); + new_rddobj = ds->ds_dir->dd_object; + } else { + /* + * We are inheriting the parent's wkey. Unset any local + * keylocation and grab a reference to the wkey. + */ + if (dcp->cp_cmd == DCP_CMD_INHERIT) { + VERIFY0(spa_keystore_wkey_hold_dd(spa, + ds->ds_dir->dd_parent, FTAG, &wkey)); + } + + dsl_prop_set_sync_impl(ds, + zfs_prop_to_name(ZFS_PROP_KEYLOCATION), ZPROP_SRC_NONE, + 0, 0, NULL, tx); + + rddobj = ds->ds_dir->dd_object; + VERIFY0(dsl_dir_get_encryption_root_ddobj(ds->ds_dir->dd_parent, + &new_rddobj)); + } + + if (wkey == NULL) { + ASSERT(dcp->cp_cmd == DCP_CMD_FORCE_INHERIT || + dcp->cp_cmd == DCP_CMD_FORCE_NEW_KEY); + } + + rw_enter(&spa->spa_keystore.sk_wkeys_lock, RW_WRITER); + + /* recurse through all children and rewrap their keys */ + spa_keystore_change_key_sync_impl(rddobj, ds->ds_dir->dd_object, + new_rddobj, wkey, tx); + + /* + * All references to the old wkey should be released now (if it + * existed). Replace the wrapping key. + */ + wkey_search.wk_ddobj = ds->ds_dir->dd_object; + found_wkey = avl_find(&spa->spa_keystore.sk_wkeys, &wkey_search, NULL); + if (found_wkey != NULL) { + ASSERT0(zfs_refcount_count(&found_wkey->wk_refcnt)); + avl_remove(&spa->spa_keystore.sk_wkeys, found_wkey); + dsl_wrapping_key_free(found_wkey); + } + + if (dcp->cp_cmd == DCP_CMD_NEW_KEY) { + (void) avl_find(&spa->spa_keystore.sk_wkeys, wkey, &where); + avl_insert(&spa->spa_keystore.sk_wkeys, wkey, where); + } else if (wkey != NULL) { + dsl_wrapping_key_rele(wkey, FTAG); + } + + rw_exit(&spa->spa_keystore.sk_wkeys_lock); + + dsl_dataset_rele(ds, FTAG); +} + +int +spa_keystore_change_key(const char *dsname, dsl_crypto_params_t *dcp) +{ + spa_keystore_change_key_args_t skcka; + + /* initialize the args struct */ + skcka.skcka_dsname = dsname; + skcka.skcka_cp = dcp; + + /* + * Perform the actual work in syncing context. The blocks modified + * here could be calculated but it would require holding the pool + * lock and traversing all of the datasets that will have their keys + * changed. + */ + return (dsl_sync_task(dsname, spa_keystore_change_key_check, + spa_keystore_change_key_sync, &skcka, 15, + ZFS_SPACE_CHECK_RESERVED)); +} + +int +dsl_dir_rename_crypt_check(dsl_dir_t *dd, dsl_dir_t *newparent) +{ + int ret; + uint64_t curr_rddobj, parent_rddobj; + + if (dd->dd_crypto_obj == 0) { + /* children of encrypted parents must be encrypted */ + if (newparent->dd_crypto_obj != 0) { + ret = SET_ERROR(EACCES); + goto error; + } + + return (0); + } + + ret = dsl_dir_get_encryption_root_ddobj(dd, &curr_rddobj); + if (ret != 0) + goto error; + + /* + * if this is not an encryption root, we must make sure we are not + * moving dd to a new encryption root + */ + if (dd->dd_object != curr_rddobj) { + ret = dsl_dir_get_encryption_root_ddobj(newparent, + &parent_rddobj); + if (ret != 0) + goto error; + + if (parent_rddobj != curr_rddobj) { + ret = SET_ERROR(EACCES); + goto error; + } + } + + return (0); + +error: + return (ret); +} + +/* + * Check to make sure that a promote from targetdd to origindd will not require + * any key rewraps. + */ +int +dsl_dataset_promote_crypt_check(dsl_dir_t *target, dsl_dir_t *origin) +{ + int ret; + uint64_t rddobj, op_rddobj, tp_rddobj; + + /* If the dataset is not encrypted we don't need to check anything */ + if (origin->dd_crypto_obj == 0) + return (0); + + /* + * If we are not changing the first origin snapshot in a chain + * the encryption root won't change either. + */ + if (dsl_dir_is_clone(origin)) + return (0); + + /* + * If the origin is the encryption root we will update + * the DSL Crypto Key to point to the target instead. + */ + ret = dsl_dir_get_encryption_root_ddobj(origin, &rddobj); + if (ret != 0) + return (ret); + + if (rddobj == origin->dd_object) + return (0); + + /* + * The origin is inheriting its encryption root from its parent. + * Check that the parent of the target has the same encryption root. + */ + ret = dsl_dir_get_encryption_root_ddobj(origin->dd_parent, &op_rddobj); + if (ret != 0) + return (ret); + + ret = dsl_dir_get_encryption_root_ddobj(target->dd_parent, &tp_rddobj); + if (ret != 0) + return (ret); + + if (op_rddobj != tp_rddobj) + return (SET_ERROR(EACCES)); + + return (0); +} + +void +dsl_dataset_promote_crypt_sync(dsl_dir_t *target, dsl_dir_t *origin, + dmu_tx_t *tx) +{ + uint64_t rddobj; + dsl_pool_t *dp = target->dd_pool; + dsl_dataset_t *targetds; + dsl_dataset_t *originds; + char *keylocation; + + if (origin->dd_crypto_obj == 0) + return; + if (dsl_dir_is_clone(origin)) + return; + + VERIFY0(dsl_dir_get_encryption_root_ddobj(origin, &rddobj)); + + if (rddobj != origin->dd_object) + return; + + /* + * If the target is being promoted to the encryption root update the + * DSL Crypto Key and keylocation to reflect that. We also need to + * update the DSL Crypto Keys of all children inheriting their + * encryption root to point to the new target. Otherwise, the check + * function ensured that the encryption root will not change. + */ + keylocation = kmem_alloc(ZAP_MAXVALUELEN, KM_SLEEP); + + VERIFY0(dsl_dataset_hold_obj(dp, + dsl_dir_phys(target)->dd_head_dataset_obj, FTAG, &targetds)); + VERIFY0(dsl_dataset_hold_obj(dp, + dsl_dir_phys(origin)->dd_head_dataset_obj, FTAG, &originds)); + + VERIFY0(dsl_prop_get_dd(origin, zfs_prop_to_name(ZFS_PROP_KEYLOCATION), + 1, ZAP_MAXVALUELEN, keylocation, NULL, B_FALSE)); + dsl_prop_set_sync_impl(targetds, zfs_prop_to_name(ZFS_PROP_KEYLOCATION), + ZPROP_SRC_LOCAL, 1, strlen(keylocation) + 1, keylocation, tx); + dsl_prop_set_sync_impl(originds, zfs_prop_to_name(ZFS_PROP_KEYLOCATION), + ZPROP_SRC_NONE, 0, 0, NULL, tx); + + rw_enter(&dp->dp_spa->spa_keystore.sk_wkeys_lock, RW_WRITER); + spa_keystore_change_key_sync_impl(rddobj, origin->dd_object, + target->dd_object, NULL, tx); + rw_exit(&dp->dp_spa->spa_keystore.sk_wkeys_lock); + + dsl_dataset_rele(targetds, FTAG); + dsl_dataset_rele(originds, FTAG); + kmem_free(keylocation, ZAP_MAXVALUELEN); +} + +int +dmu_objset_clone_crypt_check(dsl_dir_t *parentdd, dsl_dir_t *origindd) +{ + int ret; + uint64_t pcrypt, crypt; + + /* + * Check that we are not making an unencrypted child of an + * encrypted parent. + */ + ret = dsl_dir_get_crypt(parentdd, &pcrypt); + if (ret != 0) + return (ret); + + ret = dsl_dir_get_crypt(origindd, &crypt); + if (ret != 0) + return (ret); + + ASSERT3U(pcrypt, !=, ZIO_CRYPT_INHERIT); + ASSERT3U(crypt, !=, ZIO_CRYPT_INHERIT); + + if (crypt == ZIO_CRYPT_OFF && pcrypt != ZIO_CRYPT_OFF) + return (SET_ERROR(EINVAL)); + + return (0); +} + + +int +dmu_objset_create_crypt_check(dsl_dir_t *parentdd, dsl_crypto_params_t *dcp, + boolean_t *will_encrypt) +{ + int ret; + uint64_t pcrypt, crypt; + dsl_crypto_params_t dummy_dcp = { 0 }; + + if (will_encrypt != NULL) + *will_encrypt = B_FALSE; + + if (dcp == NULL) + dcp = &dummy_dcp; + + if (dcp->cp_cmd != DCP_CMD_NONE) + return (SET_ERROR(EINVAL)); + + if (parentdd != NULL) { + ret = dsl_dir_get_crypt(parentdd, &pcrypt); + if (ret != 0) + return (ret); + } else { + pcrypt = ZIO_CRYPT_OFF; + } + + crypt = (dcp->cp_crypt == ZIO_CRYPT_INHERIT) ? pcrypt : dcp->cp_crypt; + + ASSERT3U(pcrypt, !=, ZIO_CRYPT_INHERIT); + ASSERT3U(crypt, !=, ZIO_CRYPT_INHERIT); + + /* + * We can't create an unencrypted child of an encrypted parent + * under any circumstances. + */ + if (crypt == ZIO_CRYPT_OFF && pcrypt != ZIO_CRYPT_OFF) + return (SET_ERROR(EINVAL)); + + /* check for valid dcp with no encryption (inherited or local) */ + if (crypt == ZIO_CRYPT_OFF) { + /* Must not specify encryption params */ + if (dcp->cp_wkey != NULL || + (dcp->cp_keylocation != NULL && + strcmp(dcp->cp_keylocation, "none") != 0)) + return (SET_ERROR(EINVAL)); + + return (0); + } + + if (will_encrypt != NULL) + *will_encrypt = B_TRUE; + + /* + * We will now definitely be encrypting. Check the feature flag. When + * creating the pool the caller will check this for us since we won't + * technically have the feature activated yet. + */ + if (parentdd != NULL && + !spa_feature_is_enabled(parentdd->dd_pool->dp_spa, + SPA_FEATURE_ENCRYPTION)) { + return (SET_ERROR(EOPNOTSUPP)); + } + + /* check for errata #4 (encryption enabled, bookmark_v2 disabled) */ + if (parentdd != NULL && + !spa_feature_is_enabled(parentdd->dd_pool->dp_spa, + SPA_FEATURE_BOOKMARK_V2)) { + return (SET_ERROR(EOPNOTSUPP)); + } + + /* handle inheritance */ + if (dcp->cp_wkey == NULL) { + ASSERT3P(parentdd, !=, NULL); + + /* key must be fully unspecified */ + if (dcp->cp_keylocation != NULL) + return (SET_ERROR(EINVAL)); + + /* parent must have a key to inherit */ + if (pcrypt == ZIO_CRYPT_OFF) + return (SET_ERROR(EINVAL)); + + /* check for parent key */ + ret = dmu_objset_check_wkey_loaded(parentdd); + if (ret != 0) + return (ret); + + return (0); + } + + /* At this point we should have a fully specified key. Check location */ + if (dcp->cp_keylocation == NULL || + !zfs_prop_valid_keylocation(dcp->cp_keylocation, B_TRUE)) + return (SET_ERROR(EINVAL)); + + /* Must have fully specified keyformat */ + switch (dcp->cp_wkey->wk_keyformat) { + case ZFS_KEYFORMAT_HEX: + case ZFS_KEYFORMAT_RAW: + /* requires no pbkdf2 iters and salt */ + if (dcp->cp_wkey->wk_salt != 0 || + dcp->cp_wkey->wk_iters != 0) + return (SET_ERROR(EINVAL)); + break; + case ZFS_KEYFORMAT_PASSPHRASE: + /* requires pbkdf2 iters and salt */ + if (dcp->cp_wkey->wk_salt == 0 || + dcp->cp_wkey->wk_iters < MIN_PBKDF2_ITERATIONS) + return (SET_ERROR(EINVAL)); + break; + case ZFS_KEYFORMAT_NONE: + default: + /* keyformat must be specified and valid */ + return (SET_ERROR(EINVAL)); + } + + return (0); +} + +void +dsl_dataset_create_crypt_sync(uint64_t dsobj, dsl_dir_t *dd, + dsl_dataset_t *origin, dsl_crypto_params_t *dcp, dmu_tx_t *tx) +{ + dsl_pool_t *dp = dd->dd_pool; + uint64_t crypt; + dsl_wrapping_key_t *wkey; + + /* clones always use their origin's wrapping key */ + if (dsl_dir_is_clone(dd)) { + ASSERT3P(dcp, ==, NULL); + + /* + * If this is an encrypted clone we just need to clone the + * dck into dd. Zapify the dd so we can do that. + */ + if (origin->ds_dir->dd_crypto_obj != 0) { + dmu_buf_will_dirty(dd->dd_dbuf, tx); + dsl_dir_zapify(dd, tx); + + dd->dd_crypto_obj = + dsl_crypto_key_clone_sync(origin->ds_dir, tx); + VERIFY0(zap_add(dp->dp_meta_objset, dd->dd_object, + DD_FIELD_CRYPTO_KEY_OBJ, sizeof (uint64_t), 1, + &dd->dd_crypto_obj, tx)); + } + + return; + } + + /* + * A NULL dcp at this point indicates this is the origin dataset + * which does not have an objset to encrypt. Raw receives will handle + * encryption separately later. In both cases we can simply return. + */ + if (dcp == NULL || dcp->cp_cmd == DCP_CMD_RAW_RECV) + return; + + crypt = dcp->cp_crypt; + wkey = dcp->cp_wkey; + + /* figure out the effective crypt */ + if (crypt == ZIO_CRYPT_INHERIT && dd->dd_parent != NULL) + VERIFY0(dsl_dir_get_crypt(dd->dd_parent, &crypt)); + + /* if we aren't doing encryption just return */ + if (crypt == ZIO_CRYPT_OFF || crypt == ZIO_CRYPT_INHERIT) + return; + + /* zapify the dd so that we can add the crypto key obj to it */ + dmu_buf_will_dirty(dd->dd_dbuf, tx); + dsl_dir_zapify(dd, tx); + + /* use the new key if given or inherit from the parent */ + if (wkey == NULL) { + VERIFY0(spa_keystore_wkey_hold_dd(dp->dp_spa, + dd->dd_parent, FTAG, &wkey)); + } else { + wkey->wk_ddobj = dd->dd_object; + } + + ASSERT3P(wkey, !=, NULL); + + /* Create or clone the DSL crypto key and activate the feature */ + dd->dd_crypto_obj = dsl_crypto_key_create_sync(crypt, wkey, tx); + VERIFY0(zap_add(dp->dp_meta_objset, dd->dd_object, + DD_FIELD_CRYPTO_KEY_OBJ, sizeof (uint64_t), 1, &dd->dd_crypto_obj, + tx)); + dsl_dataset_activate_feature(dsobj, SPA_FEATURE_ENCRYPTION, tx); + + /* + * If we inherited the wrapping key we release our reference now. + * Otherwise, this is a new key and we need to load it into the + * keystore. + */ + if (dcp->cp_wkey == NULL) { + dsl_wrapping_key_rele(wkey, FTAG); + } else { + VERIFY0(spa_keystore_load_wkey_impl(dp->dp_spa, wkey)); + } +} + +typedef struct dsl_crypto_recv_key_arg { + uint64_t dcrka_dsobj; + uint64_t dcrka_fromobj; + dmu_objset_type_t dcrka_ostype; + nvlist_t *dcrka_nvl; + boolean_t dcrka_do_key; +} dsl_crypto_recv_key_arg_t; + +static int +dsl_crypto_recv_raw_objset_check(dsl_dataset_t *ds, dsl_dataset_t *fromds, + dmu_objset_type_t ostype, nvlist_t *nvl, dmu_tx_t *tx) +{ + int ret; + objset_t *os; + dnode_t *mdn; + uint8_t *buf = NULL; + uint_t len; + uint64_t intval, nlevels, blksz, ibs; + uint64_t nblkptr, maxblkid; + + if (ostype != DMU_OST_ZFS && ostype != DMU_OST_ZVOL) + return (SET_ERROR(EINVAL)); + + /* raw receives also need info about the structure of the metadnode */ + ret = nvlist_lookup_uint64(nvl, "mdn_compress", &intval); + if (ret != 0 || intval >= ZIO_COMPRESS_LEGACY_FUNCTIONS) + return (SET_ERROR(EINVAL)); + + ret = nvlist_lookup_uint64(nvl, "mdn_checksum", &intval); + if (ret != 0 || intval >= ZIO_CHECKSUM_LEGACY_FUNCTIONS) + return (SET_ERROR(EINVAL)); + + ret = nvlist_lookup_uint64(nvl, "mdn_nlevels", &nlevels); + if (ret != 0 || nlevels > DN_MAX_LEVELS) + return (SET_ERROR(EINVAL)); + + ret = nvlist_lookup_uint64(nvl, "mdn_blksz", &blksz); + if (ret != 0 || blksz < SPA_MINBLOCKSIZE) + return (SET_ERROR(EINVAL)); + else if (blksz > spa_maxblocksize(tx->tx_pool->dp_spa)) + return (SET_ERROR(ENOTSUP)); + + ret = nvlist_lookup_uint64(nvl, "mdn_indblkshift", &ibs); + if (ret != 0 || ibs < DN_MIN_INDBLKSHIFT || ibs > DN_MAX_INDBLKSHIFT) + return (SET_ERROR(ENOTSUP)); + + ret = nvlist_lookup_uint64(nvl, "mdn_nblkptr", &nblkptr); + if (ret != 0 || nblkptr != DN_MAX_NBLKPTR) + return (SET_ERROR(ENOTSUP)); + + ret = nvlist_lookup_uint64(nvl, "mdn_maxblkid", &maxblkid); + if (ret != 0) + return (SET_ERROR(EINVAL)); + + ret = nvlist_lookup_uint8_array(nvl, "portable_mac", &buf, &len); + if (ret != 0 || len != ZIO_OBJSET_MAC_LEN) + return (SET_ERROR(EINVAL)); + + ret = dmu_objset_from_ds(ds, &os); + if (ret != 0) + return (ret); + + /* + * Useraccounting is not portable and must be done with the keys loaded. + * Therefore, whenever we do any kind of receive the useraccounting + * must not be present. + */ + ASSERT0(os->os_flags & OBJSET_FLAG_USERACCOUNTING_COMPLETE); + + mdn = DMU_META_DNODE(os); + + /* + * If we already created the objset, make sure its unchangeable + * properties match the ones received in the nvlist. + */ + rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG); + if (!BP_IS_HOLE(dsl_dataset_get_blkptr(ds)) && + (mdn->dn_nlevels != nlevels || mdn->dn_datablksz != blksz || + mdn->dn_indblkshift != ibs || mdn->dn_nblkptr != nblkptr)) { + rrw_exit(&ds->ds_bp_rwlock, FTAG); + return (SET_ERROR(EINVAL)); + } + rrw_exit(&ds->ds_bp_rwlock, FTAG); + + /* + * Check that the ivset guid of the fromds matches the one from the + * send stream. Older versions of the encryption code did not have + * an ivset guid on the from dataset and did not send one in the + * stream. For these streams we provide the + * zfs_disable_ivset_guid_check tunable to allow these datasets to + * be received with a generated ivset guid. + */ + if (fromds != NULL && !zfs_disable_ivset_guid_check) { + uint64_t from_ivset_guid = 0; + intval = 0; + + (void) nvlist_lookup_uint64(nvl, "from_ivset_guid", &intval); + (void) zap_lookup(tx->tx_pool->dp_meta_objset, + fromds->ds_object, DS_FIELD_IVSET_GUID, + sizeof (from_ivset_guid), 1, &from_ivset_guid); + + if (intval == 0 || from_ivset_guid == 0) + return (SET_ERROR(ZFS_ERR_FROM_IVSET_GUID_MISSING)); + + if (intval != from_ivset_guid) + return (SET_ERROR(ZFS_ERR_FROM_IVSET_GUID_MISMATCH)); + } + + /* + * Check that the ivset guid of the fromds matches the one from the + * send stream. Older versions of the encryption code did not have + * an ivset guid on the from dataset and did not send one in the + * stream. For these streams we provide the + * zfs_disable_ivset_guid_check tunable to allow these datasets to + * be received with a generated ivset guid. + */ + if (fromds != NULL && !zfs_disable_ivset_guid_check) { + uint64_t from_ivset_guid = 0; + intval = 0; + + (void) nvlist_lookup_uint64(nvl, "from_ivset_guid", &intval); + (void) zap_lookup(tx->tx_pool->dp_meta_objset, + fromds->ds_object, DS_FIELD_IVSET_GUID, + sizeof (from_ivset_guid), 1, &from_ivset_guid); + + if (intval == 0 || from_ivset_guid == 0) + return (SET_ERROR(ZFS_ERR_FROM_IVSET_GUID_MISSING)); + + if (intval != from_ivset_guid) + return (SET_ERROR(ZFS_ERR_FROM_IVSET_GUID_MISMATCH)); + } + + return (0); +} + +static void +dsl_crypto_recv_raw_objset_sync(dsl_dataset_t *ds, dmu_objset_type_t ostype, + nvlist_t *nvl, dmu_tx_t *tx) +{ + dsl_pool_t *dp = tx->tx_pool; + objset_t *os; + dnode_t *mdn; + zio_t *zio; + uint8_t *portable_mac; + uint_t len; + uint64_t compress, checksum, nlevels, blksz, ibs, maxblkid; + boolean_t newds = B_FALSE; + + VERIFY0(dmu_objset_from_ds(ds, &os)); + mdn = DMU_META_DNODE(os); + + /* + * Fetch the values we need from the nvlist. "to_ivset_guid" must + * be set on the snapshot, which doesn't exist yet. The receive + * code will take care of this for us later. + */ + compress = fnvlist_lookup_uint64(nvl, "mdn_compress"); + checksum = fnvlist_lookup_uint64(nvl, "mdn_checksum"); + nlevels = fnvlist_lookup_uint64(nvl, "mdn_nlevels"); + blksz = fnvlist_lookup_uint64(nvl, "mdn_blksz"); + ibs = fnvlist_lookup_uint64(nvl, "mdn_indblkshift"); + maxblkid = fnvlist_lookup_uint64(nvl, "mdn_maxblkid"); + VERIFY0(nvlist_lookup_uint8_array(nvl, "portable_mac", &portable_mac, + &len)); + + /* if we haven't created an objset for the ds yet, do that now */ + rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG); + if (BP_IS_HOLE(dsl_dataset_get_blkptr(ds))) { + (void) dmu_objset_create_impl_dnstats(dp->dp_spa, ds, + dsl_dataset_get_blkptr(ds), ostype, nlevels, blksz, + ibs, tx); + newds = B_TRUE; + } + rrw_exit(&ds->ds_bp_rwlock, FTAG); + + /* + * Set the portable MAC. The local MAC will always be zero since the + * incoming data will all be portable and user accounting will be + * deferred until the next mount. Afterwards, flag the os to be + * written out raw next time. + */ + arc_release(os->os_phys_buf, &os->os_phys_buf); + bcopy(portable_mac, os->os_phys->os_portable_mac, ZIO_OBJSET_MAC_LEN); + bzero(os->os_phys->os_local_mac, ZIO_OBJSET_MAC_LEN); + os->os_next_write_raw[tx->tx_txg & TXG_MASK] = B_TRUE; + + /* set metadnode compression and checksum */ + mdn->dn_compress = compress; + mdn->dn_checksum = checksum; + + rw_enter(&mdn->dn_struct_rwlock, RW_WRITER); + dnode_new_blkid(mdn, maxblkid, tx, B_FALSE, B_TRUE); + rw_exit(&mdn->dn_struct_rwlock); + + /* + * We can't normally dirty the dataset in syncing context unless + * we are creating a new dataset. In this case, we perform a + * pseudo txg sync here instead. + */ + if (newds) { + dsl_dataset_dirty(ds, tx); + } else { + zio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED); + dsl_dataset_sync(ds, zio, tx); + VERIFY0(zio_wait(zio)); + + /* dsl_dataset_sync_done will drop this reference. */ + dmu_buf_add_ref(ds->ds_dbuf, ds); + dsl_dataset_sync_done(ds, tx); + } +} + +int +dsl_crypto_recv_raw_key_check(dsl_dataset_t *ds, nvlist_t *nvl, dmu_tx_t *tx) +{ + int ret; + objset_t *mos = tx->tx_pool->dp_meta_objset; + uint8_t *buf = NULL; + uint_t len; + uint64_t intval, key_guid, version; + boolean_t is_passphrase = B_FALSE; + + ASSERT(dsl_dataset_phys(ds)->ds_flags & DS_FLAG_INCONSISTENT); + + /* + * Read and check all the encryption values from the nvlist. We need + * all of the fields of a DSL Crypto Key, as well as a fully specified + * wrapping key. + */ + ret = nvlist_lookup_uint64(nvl, DSL_CRYPTO_KEY_CRYPTO_SUITE, &intval); + if (ret != 0 || intval >= ZIO_CRYPT_FUNCTIONS || + intval <= ZIO_CRYPT_OFF) + return (SET_ERROR(EINVAL)); + + ret = nvlist_lookup_uint64(nvl, DSL_CRYPTO_KEY_GUID, &intval); + if (ret != 0) + return (SET_ERROR(EINVAL)); + + /* + * If this is an incremental receive make sure the given key guid + * matches the one we already have. + */ + if (ds->ds_dir->dd_crypto_obj != 0) { + ret = zap_lookup(mos, ds->ds_dir->dd_crypto_obj, + DSL_CRYPTO_KEY_GUID, 8, 1, &key_guid); + if (ret != 0) + return (ret); + if (intval != key_guid) + return (SET_ERROR(EACCES)); + } + + ret = nvlist_lookup_uint8_array(nvl, DSL_CRYPTO_KEY_MASTER_KEY, + &buf, &len); + if (ret != 0 || len != MASTER_KEY_MAX_LEN) + return (SET_ERROR(EINVAL)); + + ret = nvlist_lookup_uint8_array(nvl, DSL_CRYPTO_KEY_HMAC_KEY, + &buf, &len); + if (ret != 0 || len != SHA512_HMAC_KEYLEN) + return (SET_ERROR(EINVAL)); + + ret = nvlist_lookup_uint8_array(nvl, DSL_CRYPTO_KEY_IV, &buf, &len); + if (ret != 0 || len != WRAPPING_IV_LEN) + return (SET_ERROR(EINVAL)); + + ret = nvlist_lookup_uint8_array(nvl, DSL_CRYPTO_KEY_MAC, &buf, &len); + if (ret != 0 || len != WRAPPING_MAC_LEN) + return (SET_ERROR(EINVAL)); + + /* + * We don't support receiving old on-disk formats. The version 0 + * implementation protected several fields in an objset that were + * not always portable during a raw receive. As a result, we call + * the old version an on-disk errata #3. + */ + ret = nvlist_lookup_uint64(nvl, DSL_CRYPTO_KEY_VERSION, &version); + if (ret != 0 || version != ZIO_CRYPT_KEY_CURRENT_VERSION) + return (SET_ERROR(ENOTSUP)); + + ret = nvlist_lookup_uint64(nvl, zfs_prop_to_name(ZFS_PROP_KEYFORMAT), + &intval); + if (ret != 0 || intval >= ZFS_KEYFORMAT_FORMATS || + intval == ZFS_KEYFORMAT_NONE) + return (SET_ERROR(EINVAL)); + + is_passphrase = (intval == ZFS_KEYFORMAT_PASSPHRASE); + + /* + * for raw receives we allow any number of pbkdf2iters since there + * won't be a chance for the user to change it. + */ + ret = nvlist_lookup_uint64(nvl, zfs_prop_to_name(ZFS_PROP_PBKDF2_ITERS), + &intval); + if (ret != 0 || (is_passphrase == (intval == 0))) + return (SET_ERROR(EINVAL)); + + ret = nvlist_lookup_uint64(nvl, zfs_prop_to_name(ZFS_PROP_PBKDF2_SALT), + &intval); + if (ret != 0 || (is_passphrase == (intval == 0))) + return (SET_ERROR(EINVAL)); + + return (0); +} + +void +dsl_crypto_recv_raw_key_sync(dsl_dataset_t *ds, nvlist_t *nvl, dmu_tx_t *tx) +{ + dsl_pool_t *dp = tx->tx_pool; + objset_t *mos = dp->dp_meta_objset; + dsl_dir_t *dd = ds->ds_dir; + uint_t len; + uint64_t rddobj, one = 1; + uint8_t *keydata, *hmac_keydata, *iv, *mac; + uint64_t crypt, key_guid, keyformat, iters, salt; + uint64_t version = ZIO_CRYPT_KEY_CURRENT_VERSION; + char *keylocation = "prompt"; + + /* lookup the values we need to create the DSL Crypto Key */ + crypt = fnvlist_lookup_uint64(nvl, DSL_CRYPTO_KEY_CRYPTO_SUITE); + key_guid = fnvlist_lookup_uint64(nvl, DSL_CRYPTO_KEY_GUID); + keyformat = fnvlist_lookup_uint64(nvl, + zfs_prop_to_name(ZFS_PROP_KEYFORMAT)); + iters = fnvlist_lookup_uint64(nvl, + zfs_prop_to_name(ZFS_PROP_PBKDF2_ITERS)); + salt = fnvlist_lookup_uint64(nvl, + zfs_prop_to_name(ZFS_PROP_PBKDF2_SALT)); + VERIFY0(nvlist_lookup_uint8_array(nvl, DSL_CRYPTO_KEY_MASTER_KEY, + &keydata, &len)); + VERIFY0(nvlist_lookup_uint8_array(nvl, DSL_CRYPTO_KEY_HMAC_KEY, + &hmac_keydata, &len)); + VERIFY0(nvlist_lookup_uint8_array(nvl, DSL_CRYPTO_KEY_IV, &iv, &len)); + VERIFY0(nvlist_lookup_uint8_array(nvl, DSL_CRYPTO_KEY_MAC, &mac, &len)); + + /* if this is a new dataset setup the DSL Crypto Key. */ + if (dd->dd_crypto_obj == 0) { + /* zapify the dsl dir so we can add the key object to it */ + dmu_buf_will_dirty(dd->dd_dbuf, tx); + dsl_dir_zapify(dd, tx); + + /* create the DSL Crypto Key on disk and activate the feature */ + dd->dd_crypto_obj = zap_create(mos, + DMU_OTN_ZAP_METADATA, DMU_OT_NONE, 0, tx); + VERIFY0(zap_update(tx->tx_pool->dp_meta_objset, + dd->dd_crypto_obj, DSL_CRYPTO_KEY_REFCOUNT, + sizeof (uint64_t), 1, &one, tx)); + VERIFY0(zap_update(tx->tx_pool->dp_meta_objset, + dd->dd_crypto_obj, DSL_CRYPTO_KEY_VERSION, + sizeof (uint64_t), 1, &version, tx)); + + dsl_dataset_activate_feature(ds->ds_object, + SPA_FEATURE_ENCRYPTION, tx); + ds->ds_feature_inuse[SPA_FEATURE_ENCRYPTION] = B_TRUE; + + /* save the dd_crypto_obj on disk */ + VERIFY0(zap_add(mos, dd->dd_object, DD_FIELD_CRYPTO_KEY_OBJ, + sizeof (uint64_t), 1, &dd->dd_crypto_obj, tx)); + + /* + * Set the keylocation to prompt by default. If keylocation + * has been provided via the properties, this will be overridden + * later. + */ + dsl_prop_set_sync_impl(ds, + zfs_prop_to_name(ZFS_PROP_KEYLOCATION), + ZPROP_SRC_LOCAL, 1, strlen(keylocation) + 1, + keylocation, tx); + + rddobj = dd->dd_object; + } else { + VERIFY0(dsl_dir_get_encryption_root_ddobj(dd, &rddobj)); + } + + /* sync the key data to the ZAP object on disk */ + dsl_crypto_key_sync_impl(mos, dd->dd_crypto_obj, crypt, + rddobj, key_guid, iv, mac, keydata, hmac_keydata, keyformat, salt, + iters, tx); +} + +int +dsl_crypto_recv_key_check(void *arg, dmu_tx_t *tx) +{ + int ret; + dsl_crypto_recv_key_arg_t *dcrka = arg; + dsl_dataset_t *ds = NULL, *fromds = NULL; + + ret = dsl_dataset_hold_obj(tx->tx_pool, dcrka->dcrka_dsobj, + FTAG, &ds); + if (ret != 0) + goto out; + + if (dcrka->dcrka_fromobj != 0) { + ret = dsl_dataset_hold_obj(tx->tx_pool, dcrka->dcrka_fromobj, + FTAG, &fromds); + if (ret != 0) + goto out; + } + + ret = dsl_crypto_recv_raw_objset_check(ds, fromds, + dcrka->dcrka_ostype, dcrka->dcrka_nvl, tx); + if (ret != 0) + goto out; + + /* + * We run this check even if we won't be doing this part of + * the receive now so that we don't make the user wait until + * the receive finishes to fail. + */ + ret = dsl_crypto_recv_raw_key_check(ds, dcrka->dcrka_nvl, tx); + if (ret != 0) + goto out; + +out: + if (ds != NULL) + dsl_dataset_rele(ds, FTAG); + if (fromds != NULL) + dsl_dataset_rele(fromds, FTAG); + return (ret); +} + +void +dsl_crypto_recv_key_sync(void *arg, dmu_tx_t *tx) +{ + dsl_crypto_recv_key_arg_t *dcrka = arg; + dsl_dataset_t *ds; + + VERIFY0(dsl_dataset_hold_obj(tx->tx_pool, dcrka->dcrka_dsobj, + FTAG, &ds)); + dsl_crypto_recv_raw_objset_sync(ds, dcrka->dcrka_ostype, + dcrka->dcrka_nvl, tx); + if (dcrka->dcrka_do_key) + dsl_crypto_recv_raw_key_sync(ds, dcrka->dcrka_nvl, tx); + dsl_dataset_rele(ds, FTAG); +} + +/* + * This function is used to sync an nvlist representing a DSL Crypto Key and + * the associated encryption parameters. The key will be written exactly as is + * without wrapping it. + */ +int +dsl_crypto_recv_raw(const char *poolname, uint64_t dsobj, uint64_t fromobj, + dmu_objset_type_t ostype, nvlist_t *nvl, boolean_t do_key) +{ + dsl_crypto_recv_key_arg_t dcrka; + + dcrka.dcrka_dsobj = dsobj; + dcrka.dcrka_fromobj = fromobj; + dcrka.dcrka_ostype = ostype; + dcrka.dcrka_nvl = nvl; + dcrka.dcrka_do_key = do_key; + + return (dsl_sync_task(poolname, dsl_crypto_recv_key_check, + dsl_crypto_recv_key_sync, &dcrka, 1, ZFS_SPACE_CHECK_NORMAL)); +} + +int +dsl_crypto_populate_key_nvlist(dsl_dataset_t *ds, uint64_t from_ivset_guid, + nvlist_t **nvl_out) +{ + int ret; + objset_t *os; + dnode_t *mdn; + uint64_t rddobj; + nvlist_t *nvl = NULL; + uint64_t dckobj = ds->ds_dir->dd_crypto_obj; + dsl_dir_t *rdd = NULL; + dsl_pool_t *dp = ds->ds_dir->dd_pool; + objset_t *mos = dp->dp_meta_objset; + uint64_t crypt = 0, key_guid = 0, format = 0; + uint64_t iters = 0, salt = 0, version = 0; + uint64_t to_ivset_guid = 0; + uint8_t raw_keydata[MASTER_KEY_MAX_LEN]; + uint8_t raw_hmac_keydata[SHA512_HMAC_KEYLEN]; + uint8_t iv[WRAPPING_IV_LEN]; + uint8_t mac[WRAPPING_MAC_LEN]; + + ASSERT(dckobj != 0); + + VERIFY0(dmu_objset_from_ds(ds, &os)); + mdn = DMU_META_DNODE(os); + + ret = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP); + if (ret != 0) + goto error; + + /* lookup values from the DSL Crypto Key */ + ret = zap_lookup(mos, dckobj, DSL_CRYPTO_KEY_CRYPTO_SUITE, 8, 1, + &crypt); + if (ret != 0) + goto error; + + ret = zap_lookup(mos, dckobj, DSL_CRYPTO_KEY_GUID, 8, 1, &key_guid); + if (ret != 0) + goto error; + + ret = zap_lookup(mos, dckobj, DSL_CRYPTO_KEY_MASTER_KEY, 1, + MASTER_KEY_MAX_LEN, raw_keydata); + if (ret != 0) + goto error; + + ret = zap_lookup(mos, dckobj, DSL_CRYPTO_KEY_HMAC_KEY, 1, + SHA512_HMAC_KEYLEN, raw_hmac_keydata); + if (ret != 0) + goto error; + + ret = zap_lookup(mos, dckobj, DSL_CRYPTO_KEY_IV, 1, WRAPPING_IV_LEN, + iv); + if (ret != 0) + goto error; + + ret = zap_lookup(mos, dckobj, DSL_CRYPTO_KEY_MAC, 1, WRAPPING_MAC_LEN, + mac); + if (ret != 0) + goto error; + + /* see zfs_disable_ivset_guid_check tunable for errata info */ + ret = zap_lookup(mos, ds->ds_object, DS_FIELD_IVSET_GUID, 8, 1, + &to_ivset_guid); + if (ret != 0) + ASSERT3U(dp->dp_spa->spa_errata, !=, 0); + + /* + * We don't support raw sends of legacy on-disk formats. See the + * comment in dsl_crypto_recv_key_check() for details. + */ + ret = zap_lookup(mos, dckobj, DSL_CRYPTO_KEY_VERSION, 8, 1, &version); + if (ret != 0 || version != ZIO_CRYPT_KEY_CURRENT_VERSION) { + dp->dp_spa->spa_errata = ZPOOL_ERRATA_ZOL_6845_ENCRYPTION; + ret = SET_ERROR(ENOTSUP); + goto error; + } + + /* + * Lookup wrapping key properties. An early version of the code did + * not correctly add these values to the wrapping key or the DSL + * Crypto Key on disk for non encryption roots, so to be safe we + * always take the slightly circuitous route of looking it up from + * the encryption root's key. + */ + ret = dsl_dir_get_encryption_root_ddobj(ds->ds_dir, &rddobj); + if (ret != 0) + goto error; + + dsl_pool_config_enter(dp, FTAG); + + ret = dsl_dir_hold_obj(dp, rddobj, NULL, FTAG, &rdd); + if (ret != 0) + goto error_unlock; + + ret = zap_lookup(dp->dp_meta_objset, rdd->dd_crypto_obj, + zfs_prop_to_name(ZFS_PROP_KEYFORMAT), 8, 1, &format); + if (ret != 0) + goto error_unlock; + + if (format == ZFS_KEYFORMAT_PASSPHRASE) { + ret = zap_lookup(dp->dp_meta_objset, rdd->dd_crypto_obj, + zfs_prop_to_name(ZFS_PROP_PBKDF2_ITERS), 8, 1, &iters); + if (ret != 0) + goto error_unlock; + + ret = zap_lookup(dp->dp_meta_objset, rdd->dd_crypto_obj, + zfs_prop_to_name(ZFS_PROP_PBKDF2_SALT), 8, 1, &salt); + if (ret != 0) + goto error_unlock; + } + + dsl_dir_rele(rdd, FTAG); + dsl_pool_config_exit(dp, FTAG); + + fnvlist_add_uint64(nvl, DSL_CRYPTO_KEY_CRYPTO_SUITE, crypt); + fnvlist_add_uint64(nvl, DSL_CRYPTO_KEY_GUID, key_guid); + fnvlist_add_uint64(nvl, DSL_CRYPTO_KEY_VERSION, version); + VERIFY0(nvlist_add_uint8_array(nvl, DSL_CRYPTO_KEY_MASTER_KEY, + raw_keydata, MASTER_KEY_MAX_LEN)); + VERIFY0(nvlist_add_uint8_array(nvl, DSL_CRYPTO_KEY_HMAC_KEY, + raw_hmac_keydata, SHA512_HMAC_KEYLEN)); + VERIFY0(nvlist_add_uint8_array(nvl, DSL_CRYPTO_KEY_IV, iv, + WRAPPING_IV_LEN)); + VERIFY0(nvlist_add_uint8_array(nvl, DSL_CRYPTO_KEY_MAC, mac, + WRAPPING_MAC_LEN)); + VERIFY0(nvlist_add_uint8_array(nvl, "portable_mac", + os->os_phys->os_portable_mac, ZIO_OBJSET_MAC_LEN)); + fnvlist_add_uint64(nvl, zfs_prop_to_name(ZFS_PROP_KEYFORMAT), format); + fnvlist_add_uint64(nvl, zfs_prop_to_name(ZFS_PROP_PBKDF2_ITERS), iters); + fnvlist_add_uint64(nvl, zfs_prop_to_name(ZFS_PROP_PBKDF2_SALT), salt); + fnvlist_add_uint64(nvl, "mdn_checksum", mdn->dn_checksum); + fnvlist_add_uint64(nvl, "mdn_compress", mdn->dn_compress); + fnvlist_add_uint64(nvl, "mdn_nlevels", mdn->dn_nlevels); + fnvlist_add_uint64(nvl, "mdn_blksz", mdn->dn_datablksz); + fnvlist_add_uint64(nvl, "mdn_indblkshift", mdn->dn_indblkshift); + fnvlist_add_uint64(nvl, "mdn_nblkptr", mdn->dn_nblkptr); + fnvlist_add_uint64(nvl, "mdn_maxblkid", mdn->dn_maxblkid); + fnvlist_add_uint64(nvl, "to_ivset_guid", to_ivset_guid); + fnvlist_add_uint64(nvl, "from_ivset_guid", from_ivset_guid); + + *nvl_out = nvl; + return (0); + +error_unlock: + dsl_pool_config_exit(dp, FTAG); +error: + if (rdd != NULL) + dsl_dir_rele(rdd, FTAG); + nvlist_free(nvl); + + *nvl_out = NULL; + return (ret); +} + +uint64_t +dsl_crypto_key_create_sync(uint64_t crypt, dsl_wrapping_key_t *wkey, + dmu_tx_t *tx) +{ + dsl_crypto_key_t dck; + uint64_t version = ZIO_CRYPT_KEY_CURRENT_VERSION; + uint64_t one = 1ULL; + + ASSERT(dmu_tx_is_syncing(tx)); + ASSERT3U(crypt, <, ZIO_CRYPT_FUNCTIONS); + ASSERT3U(crypt, >, ZIO_CRYPT_OFF); + + /* create the DSL Crypto Key ZAP object */ + dck.dck_obj = zap_create(tx->tx_pool->dp_meta_objset, + DMU_OTN_ZAP_METADATA, DMU_OT_NONE, 0, tx); + + /* fill in the key (on the stack) and sync it to disk */ + dck.dck_wkey = wkey; + VERIFY0(zio_crypt_key_init(crypt, &dck.dck_key)); + + dsl_crypto_key_sync(&dck, tx); + VERIFY0(zap_update(tx->tx_pool->dp_meta_objset, dck.dck_obj, + DSL_CRYPTO_KEY_REFCOUNT, sizeof (uint64_t), 1, &one, tx)); + VERIFY0(zap_update(tx->tx_pool->dp_meta_objset, dck.dck_obj, + DSL_CRYPTO_KEY_VERSION, sizeof (uint64_t), 1, &version, tx)); + + zio_crypt_key_destroy(&dck.dck_key); + bzero(&dck.dck_key, sizeof (zio_crypt_key_t)); + + return (dck.dck_obj); +} + +uint64_t +dsl_crypto_key_clone_sync(dsl_dir_t *origindd, dmu_tx_t *tx) +{ + objset_t *mos = tx->tx_pool->dp_meta_objset; + + ASSERT(dmu_tx_is_syncing(tx)); + + VERIFY0(zap_increment(mos, origindd->dd_crypto_obj, + DSL_CRYPTO_KEY_REFCOUNT, 1, tx)); + + return (origindd->dd_crypto_obj); +} + +void +dsl_crypto_key_destroy_sync(uint64_t dckobj, dmu_tx_t *tx) +{ + objset_t *mos = tx->tx_pool->dp_meta_objset; + uint64_t refcnt; + + /* Decrement the refcount, destroy if this is the last reference */ + VERIFY0(zap_lookup(mos, dckobj, DSL_CRYPTO_KEY_REFCOUNT, + sizeof (uint64_t), 1, &refcnt)); + + if (refcnt != 1) { + VERIFY0(zap_increment(mos, dckobj, DSL_CRYPTO_KEY_REFCOUNT, + -1, tx)); + } else { + VERIFY0(zap_destroy(mos, dckobj, tx)); + } +} + +void +dsl_dataset_crypt_stats(dsl_dataset_t *ds, nvlist_t *nv) +{ + uint64_t intval; + dsl_dir_t *dd = ds->ds_dir; + dsl_dir_t *enc_root; + char buf[ZFS_MAX_DATASET_NAME_LEN]; + + if (dd->dd_crypto_obj == 0) + return; + + intval = dsl_dataset_get_keystatus(dd); + dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_KEYSTATUS, intval); + + if (dsl_dir_get_crypt(dd, &intval) == 0) + dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_ENCRYPTION, intval); + if (zap_lookup(dd->dd_pool->dp_meta_objset, dd->dd_crypto_obj, + DSL_CRYPTO_KEY_GUID, 8, 1, &intval) == 0) { + dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_KEY_GUID, intval); + } + if (zap_lookup(dd->dd_pool->dp_meta_objset, dd->dd_crypto_obj, + zfs_prop_to_name(ZFS_PROP_KEYFORMAT), 8, 1, &intval) == 0) { + dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_KEYFORMAT, intval); + } + if (zap_lookup(dd->dd_pool->dp_meta_objset, dd->dd_crypto_obj, + zfs_prop_to_name(ZFS_PROP_PBKDF2_SALT), 8, 1, &intval) == 0) { + dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_PBKDF2_SALT, intval); + } + if (zap_lookup(dd->dd_pool->dp_meta_objset, dd->dd_crypto_obj, + zfs_prop_to_name(ZFS_PROP_PBKDF2_ITERS), 8, 1, &intval) == 0) { + dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_PBKDF2_ITERS, intval); + } + if (zap_lookup(dd->dd_pool->dp_meta_objset, ds->ds_object, + DS_FIELD_IVSET_GUID, 8, 1, &intval) == 0) { + dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_IVSET_GUID, intval); + } + + if (dsl_dir_get_encryption_root_ddobj(dd, &intval) == 0) { + VERIFY0(dsl_dir_hold_obj(dd->dd_pool, intval, NULL, FTAG, + &enc_root)); + dsl_dir_name(enc_root, buf); + dsl_dir_rele(enc_root, FTAG); + dsl_prop_nvlist_add_string(nv, ZFS_PROP_ENCRYPTION_ROOT, buf); + } +} + +int +spa_crypt_get_salt(spa_t *spa, uint64_t dsobj, uint8_t *salt) +{ + int ret; + dsl_crypto_key_t *dck = NULL; + + /* look up the key from the spa's keystore */ + ret = spa_keystore_lookup_key(spa, dsobj, FTAG, &dck); + if (ret != 0) + goto error; + + ret = zio_crypt_key_get_salt(&dck->dck_key, salt); + if (ret != 0) + goto error; + + spa_keystore_dsl_key_rele(spa, dck, FTAG); + return (0); + +error: + if (dck != NULL) + spa_keystore_dsl_key_rele(spa, dck, FTAG); + return (ret); +} + +/* + * Objset blocks are a special case for MAC generation. These blocks have 2 + * 256-bit MACs which are embedded within the block itself, rather than a + * single 128 bit MAC. As a result, this function handles encoding and decoding + * the MACs on its own, unlike other functions in this file. + */ +int +spa_do_crypt_objset_mac_abd(boolean_t generate, spa_t *spa, uint64_t dsobj, + abd_t *abd, uint_t datalen, boolean_t byteswap) +{ + int ret; + dsl_crypto_key_t *dck = NULL; + void *buf = abd_borrow_buf_copy(abd, datalen); + objset_phys_t *osp = buf; + uint8_t portable_mac[ZIO_OBJSET_MAC_LEN]; + uint8_t local_mac[ZIO_OBJSET_MAC_LEN]; + + /* look up the key from the spa's keystore */ + ret = spa_keystore_lookup_key(spa, dsobj, FTAG, &dck); + if (ret != 0) + goto error; + + /* calculate both HMACs */ + ret = zio_crypt_do_objset_hmacs(&dck->dck_key, buf, datalen, + byteswap, portable_mac, local_mac); + if (ret != 0) + goto error; + + spa_keystore_dsl_key_rele(spa, dck, FTAG); + + /* if we are generating encode the HMACs in the objset_phys_t */ + if (generate) { + bcopy(portable_mac, osp->os_portable_mac, ZIO_OBJSET_MAC_LEN); + bcopy(local_mac, osp->os_local_mac, ZIO_OBJSET_MAC_LEN); + abd_return_buf_copy(abd, buf, datalen); + return (0); + } + + if (bcmp(portable_mac, osp->os_portable_mac, ZIO_OBJSET_MAC_LEN) != 0 || + bcmp(local_mac, osp->os_local_mac, ZIO_OBJSET_MAC_LEN) != 0) { + abd_return_buf(abd, buf, datalen); + return (SET_ERROR(ECKSUM)); + } + + abd_return_buf(abd, buf, datalen); + + return (0); + +error: + if (dck != NULL) + spa_keystore_dsl_key_rele(spa, dck, FTAG); + abd_return_buf(abd, buf, datalen); + return (ret); +} + +int +spa_do_crypt_mac_abd(boolean_t generate, spa_t *spa, uint64_t dsobj, abd_t *abd, + uint_t datalen, uint8_t *mac) +{ + int ret; + dsl_crypto_key_t *dck = NULL; + uint8_t *buf = abd_borrow_buf_copy(abd, datalen); + uint8_t digestbuf[ZIO_DATA_MAC_LEN]; + + /* look up the key from the spa's keystore */ + ret = spa_keystore_lookup_key(spa, dsobj, FTAG, &dck); + if (ret != 0) + goto error; + + /* perform the hmac */ + ret = zio_crypt_do_hmac(&dck->dck_key, buf, datalen, + digestbuf, ZIO_DATA_MAC_LEN); + if (ret != 0) + goto error; + + abd_return_buf(abd, buf, datalen); + spa_keystore_dsl_key_rele(spa, dck, FTAG); + + /* + * Truncate and fill in mac buffer if we were asked to generate a MAC. + * Otherwise verify that the MAC matched what we expected. + */ + if (generate) { + bcopy(digestbuf, mac, ZIO_DATA_MAC_LEN); + return (0); + } + + if (bcmp(digestbuf, mac, ZIO_DATA_MAC_LEN) != 0) + return (SET_ERROR(ECKSUM)); + + return (0); + +error: + if (dck != NULL) + spa_keystore_dsl_key_rele(spa, dck, FTAG); + abd_return_buf(abd, buf, datalen); + return (ret); +} + +/* + * This function serves as a multiplexer for encryption and decryption of + * all blocks (except the L2ARC). For encryption, it will populate the IV, + * salt, MAC, and cabd (the ciphertext). On decryption it will simply use + * these fields to populate pabd (the plaintext). + */ +/* ARGSUSED */ +int +spa_do_crypt_abd(boolean_t encrypt, spa_t *spa, const zbookmark_phys_t *zb, + dmu_object_type_t ot, boolean_t dedup, boolean_t bswap, uint8_t *salt, + uint8_t *iv, uint8_t *mac, uint_t datalen, abd_t *pabd, abd_t *cabd, + boolean_t *no_crypt) +{ + int ret; + dsl_crypto_key_t *dck = NULL; + uint8_t *plainbuf = NULL, *cipherbuf = NULL; + + ASSERT(spa_feature_is_active(spa, SPA_FEATURE_ENCRYPTION)); + + /* look up the key from the spa's keystore */ + ret = spa_keystore_lookup_key(spa, zb->zb_objset, FTAG, &dck); + if (ret != 0) { + ret = SET_ERROR(EACCES); + return (ret); + } + + if (encrypt) { + plainbuf = abd_borrow_buf_copy(pabd, datalen); + cipherbuf = abd_borrow_buf(cabd, datalen); + } else { + plainbuf = abd_borrow_buf(pabd, datalen); + cipherbuf = abd_borrow_buf_copy(cabd, datalen); + } + + /* + * Both encryption and decryption functions need a salt for key + * generation and an IV. When encrypting a non-dedup block, we + * generate the salt and IV randomly to be stored by the caller. Dedup + * blocks perform a (more expensive) HMAC of the plaintext to obtain + * the salt and the IV. ZIL blocks have their salt and IV generated + * at allocation time in zio_alloc_zil(). On decryption, we simply use + * the provided values. + */ + if (encrypt && ot != DMU_OT_INTENT_LOG && !dedup) { + ret = zio_crypt_key_get_salt(&dck->dck_key, salt); + if (ret != 0) + goto error; + + ret = zio_crypt_generate_iv(iv); + if (ret != 0) + goto error; + } else if (encrypt && dedup) { + ret = zio_crypt_generate_iv_salt_dedup(&dck->dck_key, + plainbuf, datalen, iv, salt); + if (ret != 0) + goto error; + } + + /* call lower level function to perform encryption / decryption */ + ret = zio_do_crypt_data(encrypt, &dck->dck_key, ot, bswap, salt, iv, + mac, datalen, plainbuf, cipherbuf, no_crypt); + + /* + * Handle injected decryption faults. Unfortunately, we cannot inject + * faults for dnode blocks because we might trigger the panic in + * dbuf_prepare_encrypted_dnode_leaf(), which exists because syncing + * context is not prepared to handle malicious decryption failures. + */ + if (zio_injection_enabled && !encrypt && ot != DMU_OT_DNODE && ret == 0) + ret = zio_handle_decrypt_injection(spa, zb, ot, ECKSUM); + if (ret != 0) + goto error; + + if (encrypt) { + abd_return_buf(pabd, plainbuf, datalen); + abd_return_buf_copy(cabd, cipherbuf, datalen); + } else { + abd_return_buf_copy(pabd, plainbuf, datalen); + abd_return_buf(cabd, cipherbuf, datalen); + } + + spa_keystore_dsl_key_rele(spa, dck, FTAG); + + return (0); + +error: + if (encrypt) { + /* zero out any state we might have changed while encrypting */ + bzero(salt, ZIO_DATA_SALT_LEN); + bzero(iv, ZIO_DATA_IV_LEN); + bzero(mac, ZIO_DATA_MAC_LEN); + abd_return_buf(pabd, plainbuf, datalen); + abd_return_buf_copy(cabd, cipherbuf, datalen); + } else { + abd_return_buf_copy(pabd, plainbuf, datalen); + abd_return_buf(cabd, cipherbuf, datalen); + } + + spa_keystore_dsl_key_rele(spa, dck, FTAG); + + return (ret); +} diff --git a/usr/src/uts/common/fs/zfs/dsl_dataset.c b/usr/src/uts/common/fs/zfs/dsl_dataset.c index fd4c35e000..a6061078f7 100644 --- a/usr/src/uts/common/fs/zfs/dsl_dataset.c +++ b/usr/src/uts/common/fs/zfs/dsl_dataset.c @@ -540,6 +540,13 @@ dsl_dataset_hold_obj(dsl_pool_t *dp, uint64_t dsobj, void *tag, ds->ds_reserved = ds->ds_quota = 0; } + if (err == 0 && ds->ds_dir->dd_crypto_obj != 0 && + ds->ds_is_snapshot && + zap_contains(mos, dsobj, DS_FIELD_IVSET_GUID) != 0) { + dp->dp_spa->spa_errata = + ZPOOL_ERRATA_ZOL_8308_ENCRYPTION; + } + dsl_deadlist_open(&ds->ds_deadlist, mos, dsl_dataset_phys(ds)->ds_deadlist_obj); uint64_t remap_deadlist_obj = @@ -591,17 +598,52 @@ dsl_dataset_hold_obj(dsl_pool_t *dp, uint64_t dsobj, void *tag, } } } + ASSERT3P(ds->ds_dbuf, ==, dbuf); ASSERT3P(dsl_dataset_phys(ds), ==, dbuf->db_data); ASSERT(dsl_dataset_phys(ds)->ds_prev_snap_obj != 0 || spa_version(dp->dp_spa) < SPA_VERSION_ORIGIN || dp->dp_origin_snap == NULL || ds == dp->dp_origin_snap); *dsp = ds; + return (0); } int -dsl_dataset_hold(dsl_pool_t *dp, const char *name, +dsl_dataset_create_key_mapping(dsl_dataset_t *ds) +{ + dsl_dir_t *dd = ds->ds_dir; + + if (dd->dd_crypto_obj == 0) + return (0); + + return (spa_keystore_create_mapping(dd->dd_pool->dp_spa, + ds, ds, &ds->ds_key_mapping)); +} + +int +dsl_dataset_hold_obj_flags(dsl_pool_t *dp, uint64_t dsobj, + ds_hold_flags_t flags, void *tag, dsl_dataset_t **dsp) +{ + int err; + + err = dsl_dataset_hold_obj(dp, dsobj, tag, dsp); + if (err != 0) + return (err); + + ASSERT3P(*dsp, !=, NULL); + + if (flags & DS_HOLD_FLAG_DECRYPT) { + err = dsl_dataset_create_key_mapping(*dsp); + if (err != 0) + dsl_dataset_rele(*dsp, tag); + } + + return (err); +} + +int +dsl_dataset_hold_flags(dsl_pool_t *dp, const char *name, ds_hold_flags_t flags, void *tag, dsl_dataset_t **dsp) { dsl_dir_t *dd; @@ -617,7 +659,7 @@ dsl_dataset_hold(dsl_pool_t *dp, const char *name, ASSERT(dsl_pool_config_held(dp)); obj = dsl_dir_phys(dd)->dd_head_dataset_obj; if (obj != 0) - err = dsl_dataset_hold_obj(dp, obj, tag, &ds); + err = dsl_dataset_hold_obj_flags(dp, obj, flags, tag, &ds); else err = SET_ERROR(ENOENT); @@ -626,16 +668,18 @@ dsl_dataset_hold(dsl_pool_t *dp, const char *name, dsl_dataset_t *snap_ds; if (*snapname++ != '@') { - dsl_dataset_rele(ds, tag); + dsl_dataset_rele_flags(ds, flags, tag); dsl_dir_rele(dd, FTAG); return (SET_ERROR(ENOENT)); } dprintf("looking for snapshot '%s'\n", snapname); err = dsl_dataset_snap_lookup(ds, snapname, &obj); - if (err == 0) - err = dsl_dataset_hold_obj(dp, obj, tag, &snap_ds); - dsl_dataset_rele(ds, tag); + if (err == 0) { + err = dsl_dataset_hold_obj_flags(dp, obj, flags, tag, + &snap_ds); + } + dsl_dataset_rele_flags(ds, flags, tag); if (err == 0) { mutex_enter(&snap_ds->ds_lock); @@ -653,14 +697,21 @@ dsl_dataset_hold(dsl_pool_t *dp, const char *name, } int -dsl_dataset_own_obj(dsl_pool_t *dp, uint64_t dsobj, +dsl_dataset_hold(dsl_pool_t *dp, const char *name, void *tag, + dsl_dataset_t **dsp) +{ + return (dsl_dataset_hold_flags(dp, name, 0, tag, dsp)); +} + +int +dsl_dataset_own_obj(dsl_pool_t *dp, uint64_t dsobj, ds_hold_flags_t flags, void *tag, dsl_dataset_t **dsp) { - int err = dsl_dataset_hold_obj(dp, dsobj, tag, dsp); + int err = dsl_dataset_hold_obj_flags(dp, dsobj, flags, tag, dsp); if (err != 0) return (err); if (!dsl_dataset_tryown(*dsp, tag)) { - dsl_dataset_rele(*dsp, tag); + dsl_dataset_rele_flags(*dsp, flags, tag); *dsp = NULL; return (SET_ERROR(EBUSY)); } @@ -668,14 +719,14 @@ dsl_dataset_own_obj(dsl_pool_t *dp, uint64_t dsobj, } int -dsl_dataset_own(dsl_pool_t *dp, const char *name, +dsl_dataset_own(dsl_pool_t *dp, const char *name, ds_hold_flags_t flags, void *tag, dsl_dataset_t **dsp) { - int err = dsl_dataset_hold(dp, name, tag, dsp); + int err = dsl_dataset_hold_flags(dp, name, flags, tag, dsp); if (err != 0) return (err); if (!dsl_dataset_tryown(*dsp, tag)) { - dsl_dataset_rele(*dsp, tag); + dsl_dataset_rele_flags(*dsp, flags, tag); return (SET_ERROR(EBUSY)); } return (0); @@ -757,7 +808,28 @@ dsl_dataset_rele(dsl_dataset_t *ds, void *tag) } void -dsl_dataset_disown(dsl_dataset_t *ds, void *tag) +dsl_dataset_remove_key_mapping(dsl_dataset_t *ds) +{ + dsl_dir_t *dd = ds->ds_dir; + + if (dd == NULL || dd->dd_crypto_obj == 0) + return; + + (void) spa_keystore_remove_mapping(dd->dd_pool->dp_spa, + ds->ds_object, ds); +} + +void +dsl_dataset_rele_flags(dsl_dataset_t *ds, ds_hold_flags_t flags, void *tag) +{ + if (flags & DS_HOLD_FLAG_DECRYPT) + dsl_dataset_remove_key_mapping(ds); + + dsl_dataset_rele(ds, tag); +} + +void +dsl_dataset_disown(dsl_dataset_t *ds, ds_hold_flags_t flags, void *tag) { ASSERT3P(ds->ds_owner, ==, tag); ASSERT(ds->ds_dbuf != NULL); @@ -766,7 +838,7 @@ dsl_dataset_disown(dsl_dataset_t *ds, void *tag) ds->ds_owner = NULL; mutex_exit(&ds->ds_lock); dsl_dataset_long_rele(ds, tag); - dsl_dataset_rele(ds, tag); + dsl_dataset_rele_flags(ds, flags, tag); } boolean_t @@ -795,7 +867,7 @@ dsl_dataset_has_owner(dsl_dataset_t *ds) return (rv); } -static void +void dsl_dataset_activate_feature(uint64_t dsobj, spa_feature_t f, dmu_tx_t *tx) { spa_t *spa = dmu_tx_pool(tx)->dp_spa; @@ -825,7 +897,7 @@ dsl_dataset_deactivate_feature(uint64_t dsobj, spa_feature_t f, dmu_tx_t *tx) uint64_t dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin, - uint64_t flags, dmu_tx_t *tx) + dsl_crypto_params_t *dcp, uint64_t flags, dmu_tx_t *tx) { dsl_pool_t *dp = dd->dd_pool; dmu_buf_t *dbuf; @@ -924,6 +996,9 @@ dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin, } } + /* handle encryption */ + dsl_dataset_create_crypt_sync(dsobj, dd, origin, dcp, tx); + if (spa_version(dp->dp_spa) >= SPA_VERSION_UNIQUE_ACCURATE) dsphys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE; @@ -946,6 +1021,8 @@ dsl_dataset_zero_zil(dsl_dataset_t *ds, dmu_tx_t *tx) zio_t *zio; bzero(&os->os_zil_header, sizeof (os->os_zil_header)); + if (os->os_encrypted) + os->os_next_write_raw[tx->tx_txg & TXG_MASK] = B_TRUE; zio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED); dsl_dataset_sync(ds, zio, tx); @@ -959,7 +1036,8 @@ dsl_dataset_zero_zil(dsl_dataset_t *ds, dmu_tx_t *tx) uint64_t dsl_dataset_create_sync(dsl_dir_t *pdd, const char *lastname, - dsl_dataset_t *origin, uint64_t flags, cred_t *cr, dmu_tx_t *tx) + dsl_dataset_t *origin, uint64_t flags, cred_t *cr, + dsl_crypto_params_t *dcp, dmu_tx_t *tx) { dsl_pool_t *dp = pdd->dd_pool; uint64_t dsobj, ddobj; @@ -971,7 +1049,7 @@ dsl_dataset_create_sync(dsl_dir_t *pdd, const char *lastname, ddobj = dsl_dir_create_sync(dp, pdd, lastname, tx); VERIFY0(dsl_dir_hold_obj(dp, ddobj, lastname, FTAG, &dd)); - dsobj = dsl_dataset_create_sync_dd(dd, origin, + dsobj = dsl_dataset_create_sync_dd(dd, origin, dcp, flags & ~DS_CREATE_FLAG_NODIRTY, tx); dsl_deleg_set_create_perms(dd, tx, cr); @@ -1099,8 +1177,18 @@ dsl_dataset_dirty(dsl_dataset_t *ds, dmu_tx_t *tx) dp = ds->ds_dir->dd_pool; if (txg_list_add(&dp->dp_dirty_datasets, ds, tx->tx_txg)) { + objset_t *os = ds->ds_objset; + /* up the hold count until we can be written out */ dmu_buf_add_ref(ds->ds_dbuf, ds); + + /* if this dataset is encrypted, grab a reference to the DCK */ + if (ds->ds_dir->dd_crypto_obj != 0 && + !os->os_raw_receive && + !os->os_next_write_raw[tx->tx_txg & TXG_MASK]) { + ASSERT3P(ds->ds_key_mapping, !=, NULL); + key_mapping_add_ref(ds->ds_key_mapping, ds); + } } } @@ -1471,6 +1559,30 @@ dsl_dataset_snapshot_sync_impl(dsl_dataset_t *ds, const char *snapname, sizeof (remap_deadlist_obj), 1, &remap_deadlist_obj, tx)); } + /* + * Create a ivset guid for this snapshot if the dataset is + * encrypted. This may be overridden by a raw receive. A + * previous implementation of this code did not have this + * field as part of the on-disk format for ZFS encryption + * (see errata #4). As part of the remediation for this + * issue, we ask the user to enable the bookmark_v2 feature + * which is now a dependency of the encryption feature. We + * use this as a heuristic to determine when the user has + * elected to correct any datasets created with the old code. + * As a result, we only do this step if the bookmark_v2 + * feature is enabled, which limits the number of states a + * given pool / dataset can be in with regards to terms of + * correcting the issue. + */ + if (ds->ds_dir->dd_crypto_obj != 0 && + spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_BOOKMARK_V2)) { + uint64_t ivset_guid = unique_create(); + + dmu_object_zapify(mos, dsobj, DMU_OT_DSL_DATASET, tx); + VERIFY0(zap_add(mos, dsobj, DS_FIELD_IVSET_GUID, + sizeof (ivset_guid), 1, &ivset_guid, tx)); + } + ASSERT3U(dsl_dataset_phys(ds)->ds_prev_snap_txg, <, tx->tx_txg); dsl_dataset_phys(ds)->ds_prev_snap_obj = dsobj; dsl_dataset_phys(ds)->ds_prev_snap_txg = crtxg; @@ -1750,6 +1862,11 @@ dsl_dataset_sync_done(dsl_dataset_t *ds, dmu_tx_t *tx) os->os_synced_dnodes = NULL; } + if (os->os_encrypted) + os->os_next_write_raw[tx->tx_txg & TXG_MASK] = B_FALSE; + else + ASSERT0(os->os_next_write_raw[tx->tx_txg & TXG_MASK]); + ASSERT(!dmu_objset_is_dirty(os, dmu_tx_get_txg(tx))); dmu_buf_rele(ds->ds_dbuf, ds); @@ -1874,6 +1991,10 @@ get_receive_resume_stats_impl(dsl_dataset_t *ds) DS_FIELD_RESUME_COMPRESSOK) == 0) { fnvlist_add_boolean(token_nv, "compressok"); } + if (zap_contains(dp->dp_meta_objset, ds->ds_object, + DS_FIELD_RESUME_RAWOK) == 0) { + fnvlist_add_boolean(token_nv, "rawok"); + } packed = fnvlist_pack(token_nv, &packed_size); fnvlist_free(token_nv); compressed = kmem_alloc(packed_size, KM_SLEEP); @@ -2196,6 +2317,7 @@ dsl_get_mountpoint(dsl_dataset_t *ds, const char *dsname, char *value, void dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv) { + int err; dsl_pool_t *dp = ds->ds_dir->dd_pool; ASSERT(dsl_pool_config_held(dp)); @@ -2240,13 +2362,24 @@ dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv) dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USERREFS, dsl_get_userrefs(ds)); dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_DEFER_DESTROY, - dsl_get_defer_destroy(ds)); + DS_IS_DEFER_DESTROY(ds) ? 1 : 0); + dsl_dataset_crypt_stats(ds, nv); if (dsl_dataset_phys(ds)->ds_prev_snap_obj != 0) { - uint64_t written; - if (dsl_get_written(ds, &written) == 0) { - dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_WRITTEN, - written); + uint64_t written, comp, uncomp; + dsl_pool_t *dp = ds->ds_dir->dd_pool; + dsl_dataset_t *prev; + + err = dsl_dataset_hold_obj(dp, + dsl_dataset_phys(ds)->ds_prev_snap_obj, FTAG, &prev); + if (err == 0) { + err = dsl_dataset_space_written(prev, ds, &written, + &comp, &uncomp); + dsl_dataset_rele(prev, FTAG); + if (err == 0) { + dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_WRITTEN, + written); + } } } @@ -2685,7 +2818,7 @@ dsl_dataset_rollback_sync(void *arg, dmu_tx_t *tx) fnvlist_add_string(ddra->ddra_result, "target", namebuf); cloneobj = dsl_dataset_create_sync(ds->ds_dir, "%rollback", - ds->ds_prev, DS_CREATE_FLAG_NODIRTY, kcred, tx); + ds->ds_prev, DS_CREATE_FLAG_NODIRTY, kcred, NULL, tx); VERIFY0(dsl_dataset_hold_obj(dp, cloneobj, FTAG, &clone)); @@ -2767,6 +2900,23 @@ dsl_dataset_promote_check(void *arg, dmu_tx_t *tx) return (SET_ERROR(EXDEV)); } + snap = list_head(&ddpa->shared_snaps); + if (snap == NULL) { + err = SET_ERROR(ENOENT); + goto out; + } + origin_ds = snap->ds; + + /* + * Encrypted clones share a DSL Crypto Key with their origin's dsl dir. + * When doing a promote we must make sure the encryption root for + * both the target and the target's origin does not change to avoid + * needing to rewrap encryption keys + */ + err = dsl_dataset_promote_crypt_check(hds->ds_dir, origin_ds->ds_dir); + if (err != 0) + goto out; + /* * Compute and check the amount of space to transfer. Since this is * so expensive, don't do the preliminary check. @@ -2950,6 +3100,8 @@ dsl_dataset_promote_sync(void *arg, dmu_tx_t *tx) VERIFY0(dsl_dir_hold_obj(dp, origin_ds->ds_dir->dd_object, NULL, FTAG, &odd)); + dsl_dataset_promote_crypt_sync(hds->ds_dir, odd, tx); + /* change origin's next snap */ dmu_buf_will_dirty(origin_ds->ds_dbuf, tx); oldnext_obj = dsl_dataset_phys(origin_ds)->ds_next_snap_obj; diff --git a/usr/src/uts/common/fs/zfs/dsl_destroy.c b/usr/src/uts/common/fs/zfs/dsl_destroy.c index 59a946cca0..40ea657095 100644 --- a/usr/src/uts/common/fs/zfs/dsl_destroy.c +++ b/usr/src/uts/common/fs/zfs/dsl_destroy.c @@ -680,8 +680,8 @@ old_synchronous_dataset_destroy(dsl_dataset_t *ds, dmu_tx_t *tx) ka.ds = ds; ka.tx = tx; VERIFY0(traverse_dataset(ds, - dsl_dataset_phys(ds)->ds_prev_snap_txg, TRAVERSE_POST, - kill_blkptr, &ka)); + dsl_dataset_phys(ds)->ds_prev_snap_txg, TRAVERSE_POST | + TRAVERSE_NO_DECRYPT, kill_blkptr, &ka)); ASSERT(!DS_UNIQUE_IS_ACCURATE(ds) || dsl_dataset_phys(ds)->ds_unique_bytes == 0); } @@ -784,6 +784,11 @@ dsl_dir_destroy_sync(uint64_t ddobj, dmu_tx_t *tx) for (t = 0; t < DD_USED_NUM; t++) ASSERT0(dsl_dir_phys(dd)->dd_used_breakdown[t]); + if (dd->dd_crypto_obj != 0) { + dsl_crypto_key_destroy_sync(dd->dd_crypto_obj, tx); + (void) spa_keystore_unload_wkey_impl(dp->dp_spa, dd->dd_object); + } + VERIFY0(zap_destroy(mos, dsl_dir_phys(dd)->dd_child_dir_zapobj, tx)); VERIFY0(zap_destroy(mos, dsl_dir_phys(dd)->dd_props_zapobj, tx)); if (dsl_dir_phys(dd)->dd_clones != 0) @@ -1033,7 +1038,8 @@ dsl_destroy_head(const char *name) * remove the objects from open context so that the txg sync * is not too long. */ - error = dmu_objset_own(name, DMU_OST_ANY, B_FALSE, FTAG, &os); + error = dmu_objset_own(name, DMU_OST_ANY, B_FALSE, B_FALSE, + FTAG, &os); if (error == 0) { uint64_t prev_snap_txg = dsl_dataset_phys(dmu_objset_ds(os))-> @@ -1044,7 +1050,7 @@ dsl_destroy_head(const char *name) (void) dmu_free_long_object(os, obj); /* sync out all frees */ txg_wait_synced(dmu_objset_pool(os), 0); - dmu_objset_disown(os, FTAG); + dmu_objset_disown(os, B_FALSE, FTAG); } } diff --git a/usr/src/uts/common/fs/zfs/dsl_dir.c b/usr/src/uts/common/fs/zfs/dsl_dir.c index 298516f8a4..02cad5f98e 100644 --- a/usr/src/uts/common/fs/zfs/dsl_dir.c +++ b/usr/src/uts/common/fs/zfs/dsl_dir.c @@ -37,6 +37,7 @@ #include <sys/dsl_deleg.h> #include <sys/dmu_impl.h> #include <sys/spa.h> +#include <sys/spa_impl.h> #include <sys/metaslab.h> #include <sys/zap.h> #include <sys/zio.h> @@ -163,6 +164,7 @@ dsl_dir_hold_obj(dsl_pool_t *dp, uint64_t ddobj, { dmu_buf_t *dbuf; dsl_dir_t *dd; + dmu_object_info_t doi; int err; ASSERT(dsl_pool_config_held(dp)); @@ -171,14 +173,11 @@ dsl_dir_hold_obj(dsl_pool_t *dp, uint64_t ddobj, if (err != 0) return (err); dd = dmu_buf_get_user(dbuf); -#ifdef ZFS_DEBUG - { - dmu_object_info_t doi; - dmu_object_info_from_db(dbuf, &doi); - ASSERT3U(doi.doi_bonus_type, ==, DMU_OT_DSL_DIR); - ASSERT3U(doi.doi_bonus_size, >=, sizeof (dsl_dir_phys_t)); - } -#endif + + dmu_object_info_from_db(dbuf, &doi); + ASSERT3U(doi.doi_bonus_type, ==, DMU_OT_DSL_DIR); + ASSERT3U(doi.doi_bonus_size, >=, sizeof (dsl_dir_phys_t)); + if (dd == NULL) { dsl_dir_t *winner; @@ -186,6 +185,21 @@ dsl_dir_hold_obj(dsl_pool_t *dp, uint64_t ddobj, dd->dd_object = ddobj; dd->dd_dbuf = dbuf; dd->dd_pool = dp; + + if (dsl_dir_is_zapified(dd) && + zap_contains(dp->dp_meta_objset, ddobj, + DD_FIELD_CRYPTO_KEY_OBJ) == 0) { + VERIFY0(zap_lookup(dp->dp_meta_objset, + ddobj, DD_FIELD_CRYPTO_KEY_OBJ, + sizeof (uint64_t), 1, &dd->dd_crypto_obj)); + + /* check for on-disk format errata */ + if (dsl_dir_incompatible_encryption_version(dd)) { + dp->dp_spa->spa_errata = + ZPOOL_ERRATA_ZOL_6845_ENCRYPTION; + } + } + mutex_init(&dd->dd_lock, NULL, MUTEX_DEFAULT, NULL); dsl_prop_init(dd); @@ -945,6 +959,7 @@ dsl_dir_create_sync(dsl_pool_t *dp, dsl_dir_t *pds, const char *name, DMU_OT_DSL_DIR_CHILD_MAP, DMU_OT_NONE, 0, tx); if (spa_version(dp->dp_spa) >= SPA_VERSION_USED_BREAKDOWN) ddphys->dd_flags |= DD_FLAG_USED_BREAKDOWN; + dmu_buf_rele(dbuf, FTAG); return (ddobj); @@ -1945,6 +1960,14 @@ dsl_dir_rename_check(void *arg, dmu_tx_t *tx) } } + /* check for encryption errors */ + error = dsl_dir_rename_crypt_check(dd, newparent); + if (error != 0) { + dsl_dir_rele(newparent, FTAG); + dsl_dir_rele(dd, FTAG); + return (SET_ERROR(EACCES)); + } + /* no rename into our descendant */ if (closest_common_ancestor(dd, newparent) == dd) { dsl_dir_rele(newparent, FTAG); diff --git a/usr/src/uts/common/fs/zfs/dsl_pool.c b/usr/src/uts/common/fs/zfs/dsl_pool.c index 54c88b1e3c..76bae90e68 100644 --- a/usr/src/uts/common/fs/zfs/dsl_pool.c +++ b/usr/src/uts/common/fs/zfs/dsl_pool.c @@ -438,7 +438,8 @@ dsl_pool_destroy_obsolete_bpobj(dsl_pool_t *dp, dmu_tx_t *tx) } dsl_pool_t * -dsl_pool_create(spa_t *spa, nvlist_t *zplprops, uint64_t txg) +dsl_pool_create(spa_t *spa, nvlist_t *zplprops, dsl_crypto_params_t *dcp, + uint64_t txg) { int err; dsl_pool_t *dp = dsl_pool_open_impl(spa, txg); @@ -451,6 +452,7 @@ dsl_pool_create(spa_t *spa, nvlist_t *zplprops, uint64_t txg) /* create and open the MOS (meta-objset) */ dp->dp_meta_objset = dmu_objset_create_impl(spa, NULL, &dp->dp_meta_rootbp, DMU_OST_META, tx); + spa->spa_meta_objset = dp->dp_meta_objset; /* create the pool directory */ err = zap_create_claim(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, @@ -488,11 +490,23 @@ dsl_pool_create(spa_t *spa, nvlist_t *zplprops, uint64_t txg) if (spa_version(spa) >= SPA_VERSION_DSL_SCRUB) dsl_pool_create_origin(dp, tx); + /* + * Some features may be needed when creating the root dataset, so we + * create the feature objects here. + */ + if (spa_version(spa) >= SPA_VERSION_FEATURES) + spa_feature_create_zap_objects(spa, tx); + + if (dcp != NULL && dcp->cp_crypt != ZIO_CRYPT_OFF && + dcp->cp_crypt != ZIO_CRYPT_INHERIT) + spa_feature_enable(spa, SPA_FEATURE_ENCRYPTION, tx); + /* create the root dataset */ - obj = dsl_dataset_create_sync_dd(dp->dp_root_dir, NULL, 0, tx); + obj = dsl_dataset_create_sync_dd(dp->dp_root_dir, NULL, dcp, 0, tx); /* create the root objset */ - VERIFY0(dsl_dataset_hold_obj(dp, obj, FTAG, &ds)); + VERIFY0(dsl_dataset_hold_obj_flags(dp, obj, + DS_HOLD_FLAG_DECRYPT, FTAG, &ds)); #ifdef _KERNEL { objset_t *os; @@ -503,7 +517,7 @@ dsl_pool_create(spa_t *spa, nvlist_t *zplprops, uint64_t txg) zfs_create_fs(os, kcred, zplprops, tx); } #endif - dsl_dataset_rele(ds, FTAG); + dsl_dataset_rele_flags(ds, DS_HOLD_FLAG_DECRYPT, FTAG); dmu_tx_commit(tx); @@ -664,9 +678,22 @@ dsl_pool_sync(dsl_pool_t *dp, uint64_t txg) */ zio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED); while ((ds = txg_list_remove(&dp->dp_dirty_datasets, txg)) != NULL) { + objset_t *os = ds->ds_objset; + ASSERT(list_link_active(&ds->ds_synced_link)); dmu_buf_rele(ds->ds_dbuf, ds); dsl_dataset_sync(ds, zio, tx); + + /* + * Release any key mappings created by calls to + * dsl_dataset_dirty() from the userquota accounting + * code paths. + */ + if (os->os_encrypted && !os->os_raw_receive && + !os->os_next_write_raw[txg & TXG_MASK]) { + ASSERT3P(ds->ds_key_mapping, !=, NULL); + key_mapping_rele(dp->dp_spa, ds->ds_key_mapping, ds); + } } VERIFY0(zio_wait(zio)); @@ -676,8 +703,17 @@ dsl_pool_sync(dsl_pool_t *dp, uint64_t txg) * * - move dead blocks from the pending deadlist to the on-disk deadlist * - release hold from dsl_dataset_dirty() + * - release key mapping hold from dsl_dataset_dirty() */ while ((ds = list_remove_head(&synced_datasets)) != NULL) { + objset_t *os = ds->ds_objset; + + if (os->os_encrypted && !os->os_raw_receive && + !os->os_next_write_raw[txg & TXG_MASK]) { + ASSERT3P(ds->ds_key_mapping, !=, NULL); + key_mapping_rele(dp->dp_spa, ds->ds_key_mapping, ds); + } + dsl_dataset_sync_done(ds, tx); } while ((dd = txg_list_remove(&dp->dp_dirty_dirs, txg)) != NULL) { @@ -1027,7 +1063,7 @@ dsl_pool_create_origin(dsl_pool_t *dp, dmu_tx_t *tx) /* create the origin dir, ds, & snap-ds */ dsobj = dsl_dataset_create_sync(dp->dp_root_dir, ORIGIN_DIR_NAME, - NULL, 0, kcred, tx); + NULL, 0, kcred, NULL, tx); VERIFY0(dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds)); dsl_dataset_snapshot_sync_impl(ds, ORIGIN_DIR_NAME, tx); VERIFY0(dsl_dataset_hold_obj(dp, dsl_dataset_phys(ds)->ds_prev_snap_obj, diff --git a/usr/src/uts/common/fs/zfs/dsl_prop.c b/usr/src/uts/common/fs/zfs/dsl_prop.c index ce0cd9b0fe..8197f0685a 100644 --- a/usr/src/uts/common/fs/zfs/dsl_prop.c +++ b/usr/src/uts/common/fs/zfs/dsl_prop.c @@ -926,7 +926,7 @@ typedef enum dsl_prop_getflags { DSL_PROP_GET_INHERITING = 0x1, /* searching parent of target ds */ DSL_PROP_GET_SNAPSHOT = 0x2, /* snapshot dataset */ DSL_PROP_GET_LOCAL = 0x4, /* local properties */ - DSL_PROP_GET_RECEIVED = 0x8 /* received properties */ + DSL_PROP_GET_RECEIVED = 0x8, /* received properties */ } dsl_prop_getflags_t; static int @@ -1093,6 +1093,7 @@ dsl_prop_get_all_ds(dsl_dataset_t *ds, nvlist_t **nvp, if (err) break; } + out: return (err); } diff --git a/usr/src/uts/common/fs/zfs/dsl_scan.c b/usr/src/uts/common/fs/zfs/dsl_scan.c index b5ef5a89e9..73634e33e2 100644 --- a/usr/src/uts/common/fs/zfs/dsl_scan.c +++ b/usr/src/uts/common/fs/zfs/dsl_scan.c @@ -487,6 +487,43 @@ dsl_scan_init(dsl_pool_t *dp, uint64_t txg) err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_SCAN, sizeof (uint64_t), SCAN_PHYS_NUMINTS, &scn->scn_phys); + + /* + * Detect if the pool contains the signature of #2094. If it + * does properly update the scn->scn_phys structure and notify + * the administrator by setting an errata for the pool. + */ + if (err == EOVERFLOW) { + uint64_t zaptmp[SCAN_PHYS_NUMINTS + 1]; + VERIFY3S(SCAN_PHYS_NUMINTS, ==, 24); + VERIFY3S(offsetof(dsl_scan_phys_t, scn_flags), ==, + (23 * sizeof (uint64_t))); + + err = zap_lookup(dp->dp_meta_objset, + DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_SCAN, + sizeof (uint64_t), SCAN_PHYS_NUMINTS + 1, &zaptmp); + if (err == 0) { + uint64_t overflow = zaptmp[SCAN_PHYS_NUMINTS]; + + if (overflow & ~DSF_VISIT_DS_AGAIN || + scn->scn_async_destroying) { + spa->spa_errata = + ZPOOL_ERRATA_ZOL_2094_ASYNC_DESTROY; + return (EOVERFLOW); + } + + bcopy(zaptmp, &scn->scn_phys, + SCAN_PHYS_NUMINTS * sizeof (uint64_t)); + scn->scn_phys.scn_flags = overflow; + + /* Required scrub already in progress. */ + if (scn->scn_phys.scn_state == DSS_FINISHED || + scn->scn_phys.scn_state == DSS_CANCELED) + spa->spa_errata = + ZPOOL_ERRATA_ZOL_2094_SCRUB; + } + } + if (err == ENOENT) return (0); else if (err) @@ -1379,7 +1416,7 @@ dsl_scan_zil(dsl_pool_t *dp, zil_header_t *zh) zilog = zil_alloc(dp->dp_meta_objset, zh); (void) zil_parse(zilog, dsl_scan_zil_block, dsl_scan_zil_record, &zsa, - claim_txg); + claim_txg, B_FALSE); zil_free(zilog); } @@ -1637,6 +1674,13 @@ dsl_scan_prefetch_thread(void *arg) mutex_exit(&spa->spa_scrub_lock); + if (BP_IS_PROTECTED(&spic->spic_bp)) { + ASSERT(BP_GET_TYPE(&spic->spic_bp) == DMU_OT_DNODE || + BP_GET_TYPE(&spic->spic_bp) == DMU_OT_OBJSET); + ASSERT3U(BP_GET_LEVEL(&spic->spic_bp), ==, 0); + zio_flags |= ZIO_FLAG_RAW; + } + /* issue the prefetch asynchronously */ (void) arc_read(scn->scn_zio_root, scn->scn_dp->dp_spa, &spic->spic_bp, dsl_scan_prefetch_cb, spic->spic_spc, @@ -1744,6 +1788,11 @@ dsl_scan_recurse(dsl_scan_t *scn, dsl_dataset_t *ds, dmu_objset_type_t ostype, int epb = BP_GET_LSIZE(bp) >> DNODE_SHIFT; arc_buf_t *buf; + if (BP_IS_PROTECTED(bp)) { + ASSERT3U(BP_GET_COMPRESS(bp), ==, ZIO_COMPRESS_OFF); + zio_flags |= ZIO_FLAG_RAW; + } + err = arc_read(NULL, dp->dp_spa, bp, arc_getbuf_func, &buf, ZIO_PRIORITY_SCRUB, zio_flags, &flags, zb); if (err) { diff --git a/usr/src/uts/common/fs/zfs/hkdf.c b/usr/src/uts/common/fs/zfs/hkdf.c new file mode 100644 index 0000000000..1d6cc898e4 --- /dev/null +++ b/usr/src/uts/common/fs/zfs/hkdf.c @@ -0,0 +1,173 @@ +/* + * CDDL HEADER START + * + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2017, Datto, Inc. All rights reserved. + */ + +#include <sys/dmu.h> +#include <sys/hkdf.h> +#include <sys/crypto/api.h> +#include <sys/sha2.h> +#include <sys/hkdf.h> + +static int +hkdf_sha512_extract(uint8_t *salt, uint_t salt_len, uint8_t *key_material, + uint_t km_len, uint8_t *out_buf) +{ + int ret; + crypto_mechanism_t mech; + crypto_key_t key; + crypto_data_t input_cd, output_cd; + + /* initialize HMAC mechanism */ + mech.cm_type = crypto_mech2id(SUN_CKM_SHA512_HMAC); + mech.cm_param = NULL; + mech.cm_param_len = 0; + + /* initialize the salt as a crypto key */ + key.ck_format = CRYPTO_KEY_RAW; + key.ck_length = CRYPTO_BYTES2BITS(salt_len); + key.ck_data = salt; + + /* initialize crypto data for the input and output data */ + input_cd.cd_format = CRYPTO_DATA_RAW; + input_cd.cd_offset = 0; + input_cd.cd_length = km_len; + input_cd.cd_raw.iov_base = (char *)key_material; + input_cd.cd_raw.iov_len = input_cd.cd_length; + + output_cd.cd_format = CRYPTO_DATA_RAW; + output_cd.cd_offset = 0; + output_cd.cd_length = SHA512_DIGEST_LENGTH; + output_cd.cd_raw.iov_base = (char *)out_buf; + output_cd.cd_raw.iov_len = output_cd.cd_length; + + ret = crypto_mac(&mech, &input_cd, &key, NULL, &output_cd, NULL); + if (ret != CRYPTO_SUCCESS) + return (SET_ERROR(EIO)); + + return (0); +} + +static int +hkdf_sha512_expand(uint8_t *extract_key, uint8_t *info, uint_t info_len, + uint8_t *out_buf, uint_t out_len) +{ + int ret; + crypto_mechanism_t mech; + crypto_context_t ctx; + crypto_key_t key; + crypto_data_t T_cd, info_cd, c_cd; + uint_t i, T_len = 0, pos = 0; + uint8_t c; + uint_t N = (out_len + SHA512_DIGEST_LENGTH) / SHA512_DIGEST_LENGTH; + uint8_t T[SHA512_DIGEST_LENGTH]; + + if (N > 255) + return (SET_ERROR(EINVAL)); + + /* initialize HMAC mechanism */ + mech.cm_type = crypto_mech2id(SUN_CKM_SHA512_HMAC); + mech.cm_param = NULL; + mech.cm_param_len = 0; + + /* initialize the salt as a crypto key */ + key.ck_format = CRYPTO_KEY_RAW; + key.ck_length = CRYPTO_BYTES2BITS(SHA512_DIGEST_LENGTH); + key.ck_data = extract_key; + + /* initialize crypto data for the input and output data */ + T_cd.cd_format = CRYPTO_DATA_RAW; + T_cd.cd_offset = 0; + T_cd.cd_raw.iov_base = (char *)T; + + c_cd.cd_format = CRYPTO_DATA_RAW; + c_cd.cd_offset = 0; + c_cd.cd_length = 1; + c_cd.cd_raw.iov_base = (char *)&c; + c_cd.cd_raw.iov_len = c_cd.cd_length; + + info_cd.cd_format = CRYPTO_DATA_RAW; + info_cd.cd_offset = 0; + info_cd.cd_length = info_len; + info_cd.cd_raw.iov_base = (char *)info; + info_cd.cd_raw.iov_len = info_cd.cd_length; + + for (i = 1; i <= N; i++) { + c = i; + + T_cd.cd_length = T_len; + T_cd.cd_raw.iov_len = T_cd.cd_length; + + ret = crypto_mac_init(&mech, &key, NULL, &ctx, NULL); + if (ret != CRYPTO_SUCCESS) + return (SET_ERROR(EIO)); + + ret = crypto_mac_update(ctx, &T_cd, NULL); + if (ret != CRYPTO_SUCCESS) + return (SET_ERROR(EIO)); + + ret = crypto_mac_update(ctx, &info_cd, NULL); + if (ret != CRYPTO_SUCCESS) + return (SET_ERROR(EIO)); + + ret = crypto_mac_update(ctx, &c_cd, NULL); + if (ret != CRYPTO_SUCCESS) + return (SET_ERROR(EIO)); + + T_len = SHA512_DIGEST_LENGTH; + T_cd.cd_length = T_len; + T_cd.cd_raw.iov_len = T_cd.cd_length; + + ret = crypto_mac_final(ctx, &T_cd, NULL); + if (ret != CRYPTO_SUCCESS) + return (SET_ERROR(EIO)); + + bcopy(T, out_buf + pos, + (i != N) ? SHA512_DIGEST_LENGTH : (out_len - pos)); + pos += SHA512_DIGEST_LENGTH; + } + + return (0); +} + +/* + * HKDF is designed to be a relatively fast function for deriving keys from a + * master key + a salt. We use this function to generate new encryption keys + * so as to avoid hitting the cryptographic limits of the underlying + * encryption modes. Note that, for the sake of deriving encryption keys, the + * info parameter is called the "salt" everywhere else in the code. + */ +int +hkdf_sha512(uint8_t *key_material, uint_t km_len, uint8_t *salt, + uint_t salt_len, uint8_t *info, uint_t info_len, uint8_t *output_key, + uint_t out_len) +{ + int ret; + uint8_t extract_key[SHA512_DIGEST_LENGTH]; + + ret = hkdf_sha512_extract(salt, salt_len, key_material, km_len, + extract_key); + if (ret != 0) + return (ret); + + ret = hkdf_sha512_expand(extract_key, info, info_len, output_key, + out_len); + if (ret != 0) + return (ret); + + return (0); +} diff --git a/usr/src/uts/common/fs/zfs/refcount.c b/usr/src/uts/common/fs/zfs/refcount.c index cac716e469..657a46717c 100644 --- a/usr/src/uts/common/fs/zfs/refcount.c +++ b/usr/src/uts/common/fs/zfs/refcount.c @@ -234,9 +234,10 @@ zfs_refcount_transfer(zfs_refcount_t *dst, zfs_refcount_t *src) list_destroy(&removed); } +/* ARGSUSED */ void -zfs_refcount_transfer_ownership(zfs_refcount_t *rc, void *current_holder, - void *new_holder) +zfs_refcount_transfer_ownership_many(zfs_refcount_t *rc, uint64_t number, + void *current_holder, void *new_holder) { reference_t *ref; boolean_t found = B_FALSE; @@ -249,7 +250,8 @@ zfs_refcount_transfer_ownership(zfs_refcount_t *rc, void *current_holder, for (ref = list_head(&rc->rc_list); ref; ref = list_next(&rc->rc_list, ref)) { - if (ref->ref_holder == current_holder) { + if (ref->ref_holder == current_holder && + ref->ref_number == number) { ref->ref_holder = new_holder; found = B_TRUE; break; @@ -259,6 +261,14 @@ zfs_refcount_transfer_ownership(zfs_refcount_t *rc, void *current_holder, mutex_exit(&rc->rc_mtx); } +void +zfs_refcount_transfer_ownership(zfs_refcount_t *rc, void *current_holder, + void *new_holder) +{ + zfs_refcount_transfer_ownership_many(rc, 1, current_holder, + new_holder); +} + /* * If tracking is enabled, return true if a reference exists that matches * the "holder" tag. If tracking is disabled, then return true if a reference diff --git a/usr/src/uts/common/fs/zfs/sa.c b/usr/src/uts/common/fs/zfs/sa.c index d3c0a3e8ef..5a4bc705aa 100644 --- a/usr/src/uts/common/fs/zfs/sa.c +++ b/usr/src/uts/common/fs/zfs/sa.c @@ -680,7 +680,7 @@ sa_build_layouts(sa_handle_t *hdl, sa_bulk_attr_t *attr_desc, int attr_count, boolean_t dummy; if (hdl->sa_spill == NULL) { - VERIFY(dmu_spill_hold_by_bonus(hdl->sa_bonus, NULL, + VERIFY(dmu_spill_hold_by_bonus(hdl->sa_bonus, 0, NULL, &hdl->sa_spill) == 0); } dmu_buf_will_dirty(hdl->sa_spill, tx); diff --git a/usr/src/uts/common/fs/zfs/spa.c b/usr/src/uts/common/fs/zfs/spa.c index 718b2868de..c72e462b4f 100644 --- a/usr/src/uts/common/fs/zfs/spa.c +++ b/usr/src/uts/common/fs/zfs/spa.c @@ -1201,6 +1201,8 @@ spa_activate(spa_t *spa, int mode) avl_create(&spa->spa_errlist_last, spa_error_entry_compare, sizeof (spa_error_entry_t), offsetof(spa_error_entry_t, se_avl)); + + spa_keystore_init(&spa->spa_keystore); } /* @@ -1252,10 +1254,11 @@ spa_deactivate(spa_t *spa) * still have errors left in the queues. Empty them just in case. */ spa_errlog_drain(spa); - avl_destroy(&spa->spa_errlist_scrub); avl_destroy(&spa->spa_errlist_last); + spa_keystore_fini(&spa->spa_keystore); + spa->spa_state = POOL_STATE_UNINITIALIZED; mutex_enter(&spa->spa_proc_lock); @@ -2089,8 +2092,8 @@ spa_load_verify(spa_t *spa) spa_load_verify_data, spa_load_verify_metadata); } error = traverse_pool(spa, spa->spa_verify_min_txg, - TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA, - spa_load_verify_cb, rio); + TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA | + TRAVERSE_NO_DECRYPT, spa_load_verify_cb, rio); } (void) zio_wait(rio); @@ -2290,7 +2293,7 @@ spa_load(spa_t *spa, spa_load_state_t state, spa_import_type_t type) spa->spa_loaded_ts.tv_nsec = 0; } if (error != EBADF) { - zfs_ereport_post(ereport, spa, NULL, NULL, 0, 0); + zfs_ereport_post(ereport, spa, NULL, NULL, NULL, 0, 0); } } spa->spa_load_state = error ? SPA_LOAD_ERROR : SPA_LOAD_NONE; @@ -3260,6 +3263,16 @@ spa_ld_check_features(spa_t *spa, boolean_t *missing_feat_writep) return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); } + /* + * Encryption was added before bookmark_v2, even though bookmark_v2 + * is now a dependency. If this pool has encryption enabled without + * bookmark_v2, trigger an errata message. + */ + if (spa_feature_is_enabled(spa, SPA_FEATURE_ENCRYPTION) && + !spa_feature_is_enabled(spa, SPA_FEATURE_BOOKMARK_V2)) { + spa->spa_errata = ZPOOL_ERRATA_ZOL_8308_ENCRYPTION; + } + return (0); } @@ -4829,11 +4842,27 @@ spa_l2cache_drop(spa_t *spa) } /* + * Verify encryption parameters for spa creation. If we are encrypting, we must + * have the encryption feature flag enabled. + */ +static int +spa_create_check_encryption_params(dsl_crypto_params_t *dcp, + boolean_t has_encryption) +{ + if (dcp->cp_crypt != ZIO_CRYPT_OFF && + dcp->cp_crypt != ZIO_CRYPT_INHERIT && + !has_encryption) + return (SET_ERROR(ENOTSUP)); + + return (dmu_objset_create_crypt_check(NULL, dcp, NULL)); +} + +/* * Pool Creation */ int spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props, - nvlist_t *zplprops) + nvlist_t *zplprops, dsl_crypto_params_t *dcp) { spa_t *spa; char *altroot = NULL; @@ -4848,6 +4877,9 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props, boolean_t has_features; char *poolname; nvlist_t *nvl; + boolean_t has_encryption; + spa_feature_t feat; + char *feat_name; if (props == NULL || nvlist_lookup_string(props, @@ -4888,10 +4920,27 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props, spa->spa_import_flags |= ZFS_IMPORT_TEMP_NAME; has_features = B_FALSE; + has_encryption = B_FALSE; for (nvpair_t *elem = nvlist_next_nvpair(props, NULL); elem != NULL; elem = nvlist_next_nvpair(props, elem)) { - if (zpool_prop_feature(nvpair_name(elem))) + if (zpool_prop_feature(nvpair_name(elem))) { has_features = B_TRUE; + feat_name = strchr(nvpair_name(elem), '@') + 1; + VERIFY0(zfeature_lookup_name(feat_name, &feat)); + if (feat == SPA_FEATURE_ENCRYPTION) + has_encryption = B_TRUE; + } + } + + /* verify encryption params, if they were provided */ + if (dcp != NULL) { + error = spa_create_check_encryption_params(dcp, has_encryption); + if (error != 0) { + spa_deactivate(spa); + spa_remove(spa); + mutex_exit(&spa_namespace_lock); + return (error); + } } if (has_features || nvlist_lookup_uint64(props, @@ -4991,8 +5040,7 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props, } spa->spa_is_initializing = B_TRUE; - spa->spa_dsl_pool = dp = dsl_pool_create(spa, zplprops, txg); - spa->spa_meta_objset = dp->dp_meta_objset; + spa->spa_dsl_pool = dp = dsl_pool_create(spa, zplprops, dcp, txg); spa->spa_is_initializing = B_FALSE; /* @@ -5017,9 +5065,6 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props, cmn_err(CE_PANIC, "failed to add pool config"); } - if (spa_version(spa) >= SPA_VERSION_FEATURES) - spa_feature_create_zap_objects(spa, tx); - if (zap_add(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_CREATION_VERSION, sizeof (uint64_t), 1, &version, tx) != 0) { diff --git a/usr/src/uts/common/fs/zfs/spa_config.c b/usr/src/uts/common/fs/zfs/spa_config.c index e01260f312..4719696ca4 100644 --- a/usr/src/uts/common/fs/zfs/spa_config.c +++ b/usr/src/uts/common/fs/zfs/spa_config.c @@ -281,7 +281,7 @@ spa_write_cachefile(spa_t *target, boolean_t removing, boolean_t postsysevent) */ if (target->spa_ccw_fail_time == 0) { zfs_ereport_post(FM_EREPORT_ZFS_CONFIG_CACHE_WRITE, - target, NULL, NULL, 0, 0); + target, NULL, NULL, NULL, 0, 0); } target->spa_ccw_fail_time = gethrtime(); spa_async_request(target, SPA_ASYNC_CONFIG_UPDATE); @@ -408,6 +408,7 @@ spa_config_generate(spa_t *spa, vdev_t *vd, uint64_t txg, int getstats) fnvlist_add_uint64(config, ZPOOL_CONFIG_POOL_STATE, spa_state(spa)); fnvlist_add_uint64(config, ZPOOL_CONFIG_POOL_TXG, txg); fnvlist_add_uint64(config, ZPOOL_CONFIG_POOL_GUID, spa_guid(spa)); + fnvlist_add_uint64(config, ZPOOL_CONFIG_ERRATA, spa->spa_errata); if (spa->spa_comment != NULL) { fnvlist_add_string(config, ZPOOL_CONFIG_COMMENT, spa->spa_comment); diff --git a/usr/src/uts/common/fs/zfs/spa_errlog.c b/usr/src/uts/common/fs/zfs/spa_errlog.c index 8ce780537a..f717ebb8c0 100644 --- a/usr/src/uts/common/fs/zfs/spa_errlog.c +++ b/usr/src/uts/common/fs/zfs/spa_errlog.c @@ -90,9 +90,8 @@ name_to_bookmark(char *buf, zbookmark_phys_t *zb) * during spa_errlog_sync(). */ void -spa_log_error(spa_t *spa, zio_t *zio) +spa_log_error(spa_t *spa, const zbookmark_phys_t *zb) { - zbookmark_phys_t *zb = &zio->io_logical->io_bookmark; spa_error_entry_t search; spa_error_entry_t *new; avl_tree_t *tree; diff --git a/usr/src/uts/common/fs/zfs/spa_history.c b/usr/src/uts/common/fs/zfs/spa_history.c index 2ad0dcfc5c..897d3c6e9a 100644 --- a/usr/src/uts/common/fs/zfs/spa_history.c +++ b/usr/src/uts/common/fs/zfs/spa_history.c @@ -384,11 +384,16 @@ spa_history_log_nvl(spa_t *spa, nvlist_t *nvl) { int err = 0; dmu_tx_t *tx; - nvlist_t *nvarg; + nvlist_t *nvarg, *in_nvl = NULL; if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY || !spa_writeable(spa)) return (SET_ERROR(EINVAL)); + err = nvlist_lookup_nvlist(nvl, ZPOOL_HIST_INPUT_NVL, &in_nvl); + if (err == 0) { + (void) nvlist_remove_all(in_nvl, ZPOOL_HIDDEN_ARGS); + } + tx = dmu_tx_create_dd(spa_get_dsl(spa)->dp_mos_dir); err = dmu_tx_assign(tx, TXG_WAIT); if (err) { diff --git a/usr/src/uts/common/fs/zfs/sys/abd.h b/usr/src/uts/common/fs/zfs/sys/abd.h index 952a0b68ba..621635933e 100644 --- a/usr/src/uts/common/fs/zfs/sys/abd.h +++ b/usr/src/uts/common/fs/zfs/sys/abd.h @@ -73,6 +73,7 @@ abd_t *abd_alloc_for_io(size_t, boolean_t); abd_t *abd_alloc_sametype(abd_t *, size_t); void abd_free(abd_t *); abd_t *abd_get_offset(abd_t *, size_t); +abd_t *abd_get_offset_size(abd_t *, size_t, size_t); abd_t *abd_get_from_buf(void *, size_t); void abd_put(abd_t *); diff --git a/usr/src/uts/common/fs/zfs/sys/arc.h b/usr/src/uts/common/fs/zfs/sys/arc.h index 1ce4740fcf..f636d3dcf2 100644 --- a/usr/src/uts/common/fs/zfs/sys/arc.h +++ b/usr/src/uts/common/fs/zfs/sys/arc.h @@ -58,8 +58,17 @@ _NOTE(CONSTCOND) } while (0) typedef struct arc_buf_hdr arc_buf_hdr_t; typedef struct arc_buf arc_buf_t; + +/* + * Because the ARC can store encrypted data, errors (not due to bugs) may arise + * while transforming data into its desired format - specifically, when + * decrypting, the key may not be present, or the HMAC may not be correct + * which signifies deliberate tampering with the on-disk state + * (assuming that the checksum was correct). The "error" parameter will be + * nonzero in this case, even if there is no associated zio. + */ typedef void arc_read_done_func_t(zio_t *zio, const zbookmark_phys_t *zb, - const blkptr_t *bp, arc_buf_t *buf, void *private); + const blkptr_t *bp, arc_buf_t *buf, void *priv); typedef void arc_write_done_func_t(zio_t *zio, arc_buf_t *buf, void *priv); /* generic arc_done_func_t's which you can use */ @@ -93,20 +102,29 @@ typedef enum arc_flags ARC_FLAG_L2_WRITING = 1 << 12, /* write in progress */ ARC_FLAG_L2_EVICTED = 1 << 13, /* evicted during I/O */ ARC_FLAG_L2_WRITE_HEAD = 1 << 14, /* head of write list */ + /* + * Encrypted or authenticated on disk (may be plaintext in memory). + * This header has b_crypt_hdr allocated. Does not include indirect + * blocks with checksums of MACs which will also have their X + * (encrypted) bit set in the bp. + */ + ARC_FLAG_PROTECTED = 1 << 15, + /* data has not been authenticated yet */ + ARC_FLAG_NOAUTH = 1 << 16, /* indicates that the buffer contains metadata (otherwise, data) */ - ARC_FLAG_BUFC_METADATA = 1 << 15, + ARC_FLAG_BUFC_METADATA = 1 << 17, /* Flags specifying whether optional hdr struct fields are defined */ - ARC_FLAG_HAS_L1HDR = 1 << 16, - ARC_FLAG_HAS_L2HDR = 1 << 17, + ARC_FLAG_HAS_L1HDR = 1 << 18, + ARC_FLAG_HAS_L2HDR = 1 << 19, /* * Indicates the arc_buf_hdr_t's b_pdata matches the on-disk data. * This allows the l2arc to use the blkptr's checksum to verify * the data without having to store the checksum in the hdr. */ - ARC_FLAG_COMPRESSED_ARC = 1 << 18, - ARC_FLAG_SHARED_DATA = 1 << 19, + ARC_FLAG_COMPRESSED_ARC = 1 << 20, + ARC_FLAG_SHARED_DATA = 1 << 21, /* * The arc buffer's compression mode is stored in the top 7 bits of the @@ -125,7 +143,12 @@ typedef enum arc_flags typedef enum arc_buf_flags { ARC_BUF_FLAG_SHARED = 1 << 0, - ARC_BUF_FLAG_COMPRESSED = 1 << 1 + ARC_BUF_FLAG_COMPRESSED = 1 << 1, + /* + * indicates whether this arc_buf_t is encrypted, regardless of + * state on-disk + */ + ARC_BUF_FLAG_ENCRYPTED = 1 << 2 } arc_buf_flags_t; struct arc_buf { @@ -159,15 +182,31 @@ typedef enum arc_space_type { void arc_space_consume(uint64_t space, arc_space_type_t type); void arc_space_return(uint64_t space, arc_space_type_t type); boolean_t arc_is_metadata(arc_buf_t *buf); +boolean_t arc_is_encrypted(arc_buf_t *buf); +boolean_t arc_is_unauthenticated(arc_buf_t *buf); enum zio_compress arc_get_compression(arc_buf_t *buf); -int arc_decompress(arc_buf_t *buf); +void arc_get_raw_params(arc_buf_t *buf, boolean_t *byteorder, uint8_t *salt, + uint8_t *iv, uint8_t *mac); +int arc_untransform(arc_buf_t *buf, spa_t *spa, const zbookmark_phys_t *zb, + boolean_t in_place); +void arc_convert_to_raw(arc_buf_t *buf, uint64_t dsobj, boolean_t byteorder, + dmu_object_type_t ot, const uint8_t *salt, const uint8_t *iv, + const uint8_t *mac); arc_buf_t *arc_alloc_buf(spa_t *spa, void *tag, arc_buf_contents_t type, int32_t size); arc_buf_t *arc_alloc_compressed_buf(spa_t *spa, void *tag, uint64_t psize, uint64_t lsize, enum zio_compress compression_type); +arc_buf_t *arc_alloc_raw_buf(spa_t *spa, void *tag, uint64_t dsobj, + boolean_t byteorder, const uint8_t *salt, const uint8_t *iv, + const uint8_t *mac, dmu_object_type_t ot, uint64_t psize, uint64_t lsize, + enum zio_compress compression_type); arc_buf_t *arc_loan_buf(spa_t *spa, boolean_t is_metadata, int size); arc_buf_t *arc_loan_compressed_buf(spa_t *spa, uint64_t psize, uint64_t lsize, enum zio_compress compression_type); +arc_buf_t *arc_loan_raw_buf(spa_t *spa, uint64_t dsobj, boolean_t byteorder, + const uint8_t *salt, const uint8_t *iv, const uint8_t *mac, + dmu_object_type_t ot, uint64_t psize, uint64_t lsize, + enum zio_compress compression_type); void arc_return_buf(arc_buf_t *buf, void *tag); void arc_loan_inuse_buf(arc_buf_t *buf, void *tag); void arc_buf_destroy(arc_buf_t *buf, void *tag); diff --git a/usr/src/uts/common/fs/zfs/sys/dbuf.h b/usr/src/uts/common/fs/zfs/sys/dbuf.h index eedefa3615..271232c61c 100644 --- a/usr/src/uts/common/fs/zfs/sys/dbuf.h +++ b/usr/src/uts/common/fs/zfs/sys/dbuf.h @@ -54,6 +54,7 @@ extern "C" { #define DB_RF_NOPREFETCH (1 << 3) #define DB_RF_NEVERWAIT (1 << 4) #define DB_RF_CACHED (1 << 5) +#define DB_RF_NO_DECRYPT (1 << 6) /* * The simplified state transition diagram for dbufs looks like: @@ -153,6 +154,16 @@ typedef struct dbuf_dirty_record { override_states_t dr_override_state; uint8_t dr_copies; boolean_t dr_nopwrite; + boolean_t dr_has_raw_params; + + /* + * If dr_has_raw_params is set, the following crypt + * params will be set on the BP that's written. + */ + boolean_t dr_byteorder; + uint8_t dr_salt[ZIO_DATA_SALT_LEN]; + uint8_t dr_iv[ZIO_DATA_IV_LEN]; + uint8_t dr_mac[ZIO_DATA_MAC_LEN]; } dl; } dt; } dbuf_dirty_record_t; diff --git a/usr/src/uts/common/fs/zfs/sys/ddt.h b/usr/src/uts/common/fs/zfs/sys/ddt.h index 15d2a9a7ad..9181e59fff 100644 --- a/usr/src/uts/common/fs/zfs/sys/ddt.h +++ b/usr/src/uts/common/fs/zfs/sys/ddt.h @@ -67,9 +67,10 @@ enum ddt_class { typedef struct ddt_key { zio_cksum_t ddk_cksum; /* 256-bit block checksum */ /* - * Encoded with logical & physical size, and compression, as follows: + * Encoded with logical & physical size, encryption, and compression, + * as follows: * +-------+-------+-------+-------+-------+-------+-------+-------+ - * | 0 | 0 | 0 | comp | PSIZE | LSIZE | + * | 0 | 0 | 0 |X| comp| PSIZE | LSIZE | * +-------+-------+-------+-------+-------+-------+-------+-------+ */ uint64_t ddk_prop; @@ -85,11 +86,17 @@ typedef struct ddt_key { #define DDK_SET_PSIZE(ddk, x) \ BF64_SET_SB((ddk)->ddk_prop, 16, 16, SPA_MINBLOCKSHIFT, 1, x) -#define DDK_GET_COMPRESS(ddk) BF64_GET((ddk)->ddk_prop, 32, 8) -#define DDK_SET_COMPRESS(ddk, x) BF64_SET((ddk)->ddk_prop, 32, 8, x) +#define DDK_GET_COMPRESS(ddk) BF64_GET((ddk)->ddk_prop, 32, 7) +#define DDK_SET_COMPRESS(ddk, x) BF64_SET((ddk)->ddk_prop, 32, 7, x) + +#define DDK_GET_CRYPT(ddk) BF64_GET((ddk)->ddk_prop, 39, 1) +#define DDK_SET_CRYPT(ddk, x) BF64_SET((ddk)->ddk_prop, 39, 1, x) #define DDT_KEY_WORDS (sizeof (ddt_key_t) / sizeof (uint64_t)) +#define DDE_GET_NDVAS(dde) (DDK_GET_CRYPT(&dde->dde_key) \ + ? SPA_DVAS_PER_BP - 1 : SPA_DVAS_PER_BP) + typedef struct ddt_phys { dva_t ddp_dva[SPA_DVAS_PER_BP]; uint64_t ddp_refcnt; diff --git a/usr/src/uts/common/fs/zfs/sys/dmu.h b/usr/src/uts/common/fs/zfs/sys/dmu.h index 28462ff4d5..ffce616cbc 100644 --- a/usr/src/uts/common/fs/zfs/sys/dmu.h +++ b/usr/src/uts/common/fs/zfs/sys/dmu.h @@ -77,6 +77,7 @@ struct arc_buf; struct zio_prop; struct sa_handle; struct locked_range; +struct dsl_crypto_params; typedef struct objset objset_t; typedef struct dmu_tx dmu_tx_t; @@ -106,7 +107,8 @@ typedef enum dmu_object_byteswap { #define DMU_OT_NEWTYPE 0x80 #define DMU_OT_METADATA 0x40 -#define DMU_OT_BYTESWAP_MASK 0x3f +#define DMU_OT_ENCRYPTED 0x20 +#define DMU_OT_BYTESWAP_MASK 0x1f /* * Defines a uint8_t object type. Object types specify if the data @@ -114,18 +116,28 @@ typedef enum dmu_object_byteswap { * (dmu_object_byteswap_t). All of the types created by this method * are cached in the dbuf metadata cache. */ -#define DMU_OT(byteswap, metadata) \ +#define DMU_OT(byteswap, metadata, encrypted) \ (DMU_OT_NEWTYPE | \ ((metadata) ? DMU_OT_METADATA : 0) | \ + ((encrypted) ? DMU_OT_ENCRYPTED : 0) | \ ((byteswap) & DMU_OT_BYTESWAP_MASK)) #define DMU_OT_IS_VALID(ot) (((ot) & DMU_OT_NEWTYPE) ? \ ((ot) & DMU_OT_BYTESWAP_MASK) < DMU_BSWAP_NUMFUNCS : \ (ot) < DMU_OT_NUMTYPES) +/* + * MDB doesn't have dmu_ot; it defines these macros itself. + */ +#ifndef ZFS_MDB +#define DMU_OT_IS_METADATA_IMPL(ot) (dmu_ot[ot].ot_metadata) +#define DMU_OT_IS_ENCRYPTED_IMPL(ot) (dmu_ot[ot].ot_encrypt) +#define DMU_OT_BYTESWAP_IMPL(ot) (dmu_ot[ot].ot_byteswap) +#endif + #define DMU_OT_IS_METADATA(ot) (((ot) & DMU_OT_NEWTYPE) ? \ ((ot) & DMU_OT_METADATA) : \ - dmu_ot[(ot)].ot_metadata) + DMU_OT_IS_METADATA_IMPL(ot)) #define DMU_OT_IS_DDT(ot) \ ((ot) == DMU_OT_DDT_ZAP) @@ -140,6 +152,10 @@ typedef enum dmu_object_byteswap { #define DMU_OT_IS_METADATA_CACHED(ot) (((ot) & DMU_OT_NEWTYPE) ? \ B_TRUE : dmu_ot[(ot)].ot_dbuf_metadata_cache) +#define DMU_OT_IS_ENCRYPTED(ot) (((ot) & DMU_OT_NEWTYPE) ? \ + ((ot) & DMU_OT_ENCRYPTED) : \ + DMU_OT_IS_ENCRYPTED_IMPL(ot)) + /* * These object types use bp_fill != 1 for their L0 bp's. Therefore they can't * have their data embedded (i.e. use a BP_IS_EMBEDDED() bp), because bp_fill @@ -150,7 +166,7 @@ typedef enum dmu_object_byteswap { #define DMU_OT_BYTESWAP(ot) (((ot) & DMU_OT_NEWTYPE) ? \ ((ot) & DMU_OT_BYTESWAP_MASK) : \ - dmu_ot[(ot)].ot_byteswap) + DMU_OT_BYTESWAP_IMPL(ot)) typedef enum dmu_object_type { DMU_OT_NONE, @@ -236,16 +252,27 @@ typedef enum dmu_object_type { /* * Names for valid types declared with DMU_OT(). */ - DMU_OTN_UINT8_DATA = DMU_OT(DMU_BSWAP_UINT8, B_FALSE), - DMU_OTN_UINT8_METADATA = DMU_OT(DMU_BSWAP_UINT8, B_TRUE), - DMU_OTN_UINT16_DATA = DMU_OT(DMU_BSWAP_UINT16, B_FALSE), - DMU_OTN_UINT16_METADATA = DMU_OT(DMU_BSWAP_UINT16, B_TRUE), - DMU_OTN_UINT32_DATA = DMU_OT(DMU_BSWAP_UINT32, B_FALSE), - DMU_OTN_UINT32_METADATA = DMU_OT(DMU_BSWAP_UINT32, B_TRUE), - DMU_OTN_UINT64_DATA = DMU_OT(DMU_BSWAP_UINT64, B_FALSE), - DMU_OTN_UINT64_METADATA = DMU_OT(DMU_BSWAP_UINT64, B_TRUE), - DMU_OTN_ZAP_DATA = DMU_OT(DMU_BSWAP_ZAP, B_FALSE), - DMU_OTN_ZAP_METADATA = DMU_OT(DMU_BSWAP_ZAP, B_TRUE), + DMU_OTN_UINT8_DATA = DMU_OT(DMU_BSWAP_UINT8, B_FALSE, B_FALSE), + DMU_OTN_UINT8_METADATA = DMU_OT(DMU_BSWAP_UINT8, B_TRUE, B_FALSE), + DMU_OTN_UINT16_DATA = DMU_OT(DMU_BSWAP_UINT16, B_FALSE, B_FALSE), + DMU_OTN_UINT16_METADATA = DMU_OT(DMU_BSWAP_UINT16, B_TRUE, B_FALSE), + DMU_OTN_UINT32_DATA = DMU_OT(DMU_BSWAP_UINT32, B_FALSE, B_FALSE), + DMU_OTN_UINT32_METADATA = DMU_OT(DMU_BSWAP_UINT32, B_TRUE, B_FALSE), + DMU_OTN_UINT64_DATA = DMU_OT(DMU_BSWAP_UINT64, B_FALSE, B_FALSE), + DMU_OTN_UINT64_METADATA = DMU_OT(DMU_BSWAP_UINT64, B_TRUE, B_FALSE), + DMU_OTN_ZAP_DATA = DMU_OT(DMU_BSWAP_ZAP, B_FALSE, B_FALSE), + DMU_OTN_ZAP_METADATA = DMU_OT(DMU_BSWAP_ZAP, B_TRUE, B_FALSE), + + DMU_OTN_UINT8_ENC_DATA = DMU_OT(DMU_BSWAP_UINT8, B_FALSE, B_TRUE), + DMU_OTN_UINT8_ENC_METADATA = DMU_OT(DMU_BSWAP_UINT8, B_TRUE, B_TRUE), + DMU_OTN_UINT16_ENC_DATA = DMU_OT(DMU_BSWAP_UINT16, B_FALSE, B_TRUE), + DMU_OTN_UINT16_ENC_METADATA = DMU_OT(DMU_BSWAP_UINT16, B_TRUE, B_TRUE), + DMU_OTN_UINT32_ENC_DATA = DMU_OT(DMU_BSWAP_UINT32, B_FALSE, B_TRUE), + DMU_OTN_UINT32_ENC_METADATA = DMU_OT(DMU_BSWAP_UINT32, B_TRUE, B_TRUE), + DMU_OTN_UINT64_ENC_DATA = DMU_OT(DMU_BSWAP_UINT64, B_FALSE, B_TRUE), + DMU_OTN_UINT64_ENC_METADATA = DMU_OT(DMU_BSWAP_UINT64, B_TRUE, B_TRUE), + DMU_OTN_ZAP_ENC_DATA = DMU_OT(DMU_BSWAP_ZAP, B_FALSE, B_TRUE), + DMU_OTN_ZAP_ENC_METADATA = DMU_OT(DMU_BSWAP_ZAP, B_TRUE, B_TRUE), } dmu_object_type_t; /* @@ -285,19 +312,24 @@ void zfs_znode_byteswap(void *buf, size_t size); */ #define DMU_BONUS_BLKID (-1ULL) #define DMU_SPILL_BLKID (-2ULL) + /* * Public routines to create, destroy, open, and close objsets. */ +typedef void dmu_objset_create_sync_func_t(objset_t *os, void *arg, + cred_t *cr, dmu_tx_t *tx); + int dmu_objset_hold(const char *name, void *tag, objset_t **osp); int dmu_objset_own(const char *name, dmu_objset_type_t type, - boolean_t readonly, void *tag, objset_t **osp); + boolean_t readonly, boolean_t key_required, void *tag, objset_t **osp); void dmu_objset_rele(objset_t *os, void *tag); -void dmu_objset_disown(objset_t *os, void *tag); +void dmu_objset_disown(objset_t *os, boolean_t key_required, void *tag); int dmu_objset_open_ds(struct dsl_dataset *ds, objset_t **osp); void dmu_objset_evict_dbufs(objset_t *os); int dmu_objset_create(const char *name, dmu_objset_type_t type, uint64_t flags, - void (*func)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx), void *arg); + struct dsl_crypto_params *dcp, dmu_objset_create_sync_func_t func, + void *arg); int dmu_objset_clone(const char *name, const char *origin); int dsl_destroy_snapshots_nvl(struct nvlist *snaps, boolean_t defer, struct nvlist *errlist); @@ -378,11 +410,12 @@ int dmu_object_claim_dnsize(objset_t *os, uint64_t object, dmu_object_type_t ot, int dnodesize, dmu_tx_t *tx); int dmu_object_reclaim_dnsize(objset_t *os, uint64_t object, dmu_object_type_t ot, int blocksize, dmu_object_type_t bonustype, - int bonuslen, int dnodesize, dmu_tx_t *txp); + int bonuslen, int dnodesize, boolean_t keep_spill, dmu_tx_t *txp); int dmu_object_claim(objset_t *os, uint64_t object, dmu_object_type_t ot, int blocksize, dmu_object_type_t bonus_type, int bonus_len, dmu_tx_t *tx); int dmu_object_reclaim(objset_t *os, uint64_t object, dmu_object_type_t ot, int blocksize, dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *txp); +int dmu_object_rm_spill(objset_t *os, uint64_t object, dmu_tx_t *tx); /* * Free an object from this objset. @@ -417,6 +450,13 @@ int dmu_object_next(objset_t *os, uint64_t *objectp, boolean_t hole, uint64_t txg); /* + * Set the number of levels on a dnode. nlevels must be greater than the + * current number of levels or an EINVAL will be returned. + */ +int dmu_object_set_nlevels(objset_t *os, uint64_t object, int nlevels, + dmu_tx_t *tx); + +/* * Set the data blocksize for an object. * * The object cannot have any blocks allcated beyond the first. If @@ -431,6 +471,14 @@ int dmu_object_set_blocksize(objset_t *os, uint64_t object, uint64_t size, int ibs, dmu_tx_t *tx); /* + * Manually set the maxblkid on a dnode. This will adjust nlevels accordingly + * to accommodate the change. When calling this function, the caller must + * ensure that the object's nlevels can sufficiently support the new maxblkid. + */ +int dmu_object_set_maxblkid(objset_t *os, uint64_t object, uint64_t maxblkid, + dmu_tx_t *tx); + +/* * Set the checksum property on a dnode. The new checksum algorithm will * apply to all newly written blocks; existing blocks will not be affected. */ @@ -460,6 +508,11 @@ dmu_write_embedded(objset_t *os, uint64_t object, uint64_t offset, void dmu_write_policy(objset_t *os, dnode_t *dn, int level, int wp, struct zio_prop *zp); +void dmu_write_policy_override_compress(struct zio_prop *zp, + enum zio_compress compress); +void dmu_write_policy_override_encrypt(struct zio_prop *zp, + dmu_object_type_t ot, boolean_t byteorder, enum zio_compress compress, + const uint8_t *salt, const uint8_t *iv, const uint8_t *mac); /* * The bonus data is accessed more or less like a regular buffer. * You must dmu_bonus_hold() to get the buffer, which will give you a @@ -472,7 +525,11 @@ void dmu_write_policy(objset_t *os, dnode_t *dn, int level, int wp, * * Returns ENOENT, EIO, or 0. */ +int dmu_bonus_hold_impl(objset_t *os, uint64_t object, void *tag, + uint32_t flags, dmu_buf_t **dbp); int dmu_bonus_hold(objset_t *os, uint64_t object, void *tag, dmu_buf_t **); +int dmu_bonus_hold_by_dnode(dnode_t *dn, void *tag, dmu_buf_t **dbp, + uint32_t flags); int dmu_bonus_max(void); int dmu_set_bonus(dmu_buf_t *, int, dmu_tx_t *); int dmu_set_bonustype(dmu_buf_t *, dmu_object_type_t, dmu_tx_t *); @@ -483,7 +540,8 @@ int dmu_rm_spill(objset_t *, uint64_t, dmu_tx_t *); * Special spill buffer support used by "SA" framework */ -int dmu_spill_hold_by_bonus(dmu_buf_t *bonus, void *tag, dmu_buf_t **dbp); +int dmu_spill_hold_by_bonus(dmu_buf_t *bonus, uint32_t flags, void *tag, + dmu_buf_t **dbp); int dmu_spill_hold_by_dnode(dnode_t *dn, uint32_t flags, void *tag, dmu_buf_t **dbp); int dmu_spill_hold_existing(dmu_buf_t *bonus, void *tag, dmu_buf_t **dbp); @@ -525,6 +583,7 @@ boolean_t dmu_buf_try_add_ref(dmu_buf_t *, objset_t *os, uint64_t object, void dmu_buf_rele(dmu_buf_t *db, void *tag); uint64_t dmu_buf_refcount(dmu_buf_t *db); +uint64_t dmu_buf_user_refcount(dmu_buf_t *db); /* * dmu_buf_hold_array holds the DMU buffers which contain all bytes in a @@ -685,6 +744,8 @@ struct blkptr *dmu_buf_get_blkptr(dmu_buf_t *db); * (ie. you've called dmu_tx_hold_object(tx, db->db_object)). */ void dmu_buf_will_dirty(dmu_buf_t *db, dmu_tx_t *tx); +void dmu_buf_set_crypt_params(dmu_buf_t *db_fake, boolean_t byteorder, + const uint8_t *salt, const uint8_t *iv, const uint8_t *mac, dmu_tx_t *tx); /* * You must create a transaction, then hold the objects which you will @@ -755,9 +816,9 @@ void dmu_tx_callback_register(dmu_tx_t *tx, dmu_tx_callback_func_t *dcb_func, * -1, the range from offset to end-of-file is freed. */ int dmu_free_range(objset_t *os, uint64_t object, uint64_t offset, - uint64_t size, dmu_tx_t *tx); + uint64_t size, dmu_tx_t *tx); int dmu_free_long_range(objset_t *os, uint64_t object, uint64_t offset, - uint64_t size); + uint64_t size); int dmu_free_long_object(objset_t *os, uint64_t object); /* @@ -768,6 +829,7 @@ int dmu_free_long_object(objset_t *os, uint64_t object); */ #define DMU_READ_PREFETCH 0 /* prefetch */ #define DMU_READ_NO_PREFETCH 1 /* don't prefetch */ +#define DMU_READ_NO_DECRYPT 2 /* don't decrypt */ int dmu_read(objset_t *os, uint64_t object, uint64_t offset, uint64_t size, void *buf, uint32_t flags); int dmu_read_by_dnode(dnode_t *dn, uint64_t offset, uint64_t size, void *buf, @@ -791,10 +853,15 @@ int dmu_write_pages(objset_t *os, uint64_t object, uint64_t offset, uint64_t size, struct page *pp, dmu_tx_t *tx); struct arc_buf *dmu_request_arcbuf(dmu_buf_t *handle, int size); void dmu_return_arcbuf(struct arc_buf *buf); -void dmu_assign_arcbuf_dnode(dnode_t *handle, uint64_t offset, +int dmu_assign_arcbuf_by_dnode(dnode_t *handle, uint64_t offset, struct arc_buf *buf, dmu_tx_t *tx); -void dmu_assign_arcbuf(dmu_buf_t *handle, uint64_t offset, struct arc_buf *buf, - dmu_tx_t *tx); +int dmu_assign_arcbuf_by_dbuf(dmu_buf_t *handle, uint64_t offset, + struct arc_buf *buf, dmu_tx_t *tx); +void dmu_convert_to_raw(dmu_buf_t *handle, boolean_t byteorder, + const uint8_t *salt, const uint8_t *iv, const uint8_t *mac, dmu_tx_t *tx); +#define dmu_assign_arcbuf dmu_assign_arcbuf_by_dbuf +void dmu_copy_from_buf(objset_t *os, uint64_t object, uint64_t offset, + dmu_buf_t *handle, dmu_tx_t *tx); int dmu_xuio_init(struct xuio *uio, int niov); void dmu_xuio_fini(struct xuio *uio); int dmu_xuio_add(struct xuio *uio, struct arc_buf *abuf, offset_t off, @@ -838,6 +905,7 @@ typedef struct dmu_object_type_info { dmu_object_byteswap_t ot_byteswap; boolean_t ot_metadata; boolean_t ot_dbuf_metadata_cache; + boolean_t ot_encrypt; char *ot_name; } dmu_object_type_info_t; @@ -1008,8 +1076,6 @@ int dmu_diff(const char *tosnap_name, const char *fromsnap_name, #define ZFS_CRC64_POLY 0xC96C5795D7870F42ULL /* ECMA-182, reflected form */ extern uint64_t zfs_crc64_table[256]; -extern int zfs_mdcomp_disable; - #ifdef __cplusplus } #endif diff --git a/usr/src/uts/common/fs/zfs/sys/dmu_impl.h b/usr/src/uts/common/fs/zfs/sys/dmu_impl.h index e820fe57ec..ccb5d7ac51 100644 --- a/usr/src/uts/common/fs/zfs/sys/dmu_impl.h +++ b/usr/src/uts/common/fs/zfs/sys/dmu_impl.h @@ -163,6 +163,7 @@ extern "C" { * dn_free_txg * dn_assigned_txg * dn_dirty_txg + * dd_assigned_tx * dn_notxholds * dn_dirtyctx * dn_dirtyctx_firstset @@ -277,6 +278,7 @@ typedef struct dmu_sendarg { objset_t *dsa_os; zio_cksum_t dsa_zc; uint64_t dsa_toguid; + uint64_t dsa_fromtxg; int dsa_err; dmu_pendop_t dsa_pending_op; uint64_t dsa_featureflags; diff --git a/usr/src/uts/common/fs/zfs/sys/dmu_objset.h b/usr/src/uts/common/fs/zfs/sys/dmu_objset.h index cae1c7719a..41ae18a8b9 100644 --- a/usr/src/uts/common/fs/zfs/sys/dmu_objset.h +++ b/usr/src/uts/common/fs/zfs/sys/dmu_objset.h @@ -59,13 +59,19 @@ struct dmu_tx; #define OBJSET_FLAG_USERACCOUNTING_COMPLETE (1ULL<<0) +/* all flags are currently non-portable */ +#define OBJSET_CRYPT_PORTABLE_FLAGS_MASK (0) + typedef struct objset_phys { dnode_phys_t os_meta_dnode; zil_header_t os_zil_header; uint64_t os_type; uint64_t os_flags; + uint8_t os_portable_mac[ZIO_OBJSET_MAC_LEN]; + uint8_t os_local_mac[ZIO_OBJSET_MAC_LEN]; char os_pad[OBJSET_PHYS_SIZE - sizeof (dnode_phys_t)*3 - - sizeof (zil_header_t) - sizeof (uint64_t)*2]; + sizeof (zil_header_t) - sizeof (uint64_t)*2 - + 2*ZIO_OBJSET_MAC_LEN]; dnode_phys_t os_userused_dnode; dnode_phys_t os_groupused_dnode; } objset_phys_t; @@ -77,6 +83,8 @@ struct objset { spa_t *os_spa; arc_buf_t *os_phys_buf; objset_phys_t *os_phys; + boolean_t os_encrypted; + /* * The following "special" dnodes have no parent, are exempt * from dnode_move(), and are not recorded in os_dnodes, but they @@ -132,6 +140,10 @@ struct objset { uint64_t os_flags; uint64_t os_freed_dnodes; boolean_t os_rescan_dnodes; + boolean_t os_raw_receive; + + /* os_phys_buf should be written raw next txg */ + boolean_t os_next_write_raw[TXG_SIZE]; /* Protected by os_obj_lock */ kmutex_t os_obj_lock; @@ -171,14 +183,18 @@ struct objset { /* called from zpl */ int dmu_objset_hold(const char *name, void *tag, objset_t **osp); +int dmu_objset_hold_flags(const char *name, boolean_t decrypt, void *tag, + objset_t **osp); int dmu_objset_own(const char *name, dmu_objset_type_t type, - boolean_t readonly, void *tag, objset_t **osp); + boolean_t readonly, boolean_t decrypt, void *tag, objset_t **osp); int dmu_objset_own_obj(struct dsl_pool *dp, uint64_t obj, - dmu_objset_type_t type, boolean_t readonly, void *tag, objset_t **osp); + dmu_objset_type_t type, boolean_t readonly, boolean_t decrypt, + void *tag, objset_t **osp); void dmu_objset_refresh_ownership(struct dsl_dataset *ds, - struct dsl_dataset **newds, void *tag); + struct dsl_dataset **newds, boolean_t key_needed, void *tag); void dmu_objset_rele(objset_t *os, void *tag); -void dmu_objset_disown(objset_t *os, void *tag); +void dmu_objset_rele_flags(objset_t *os, boolean_t decrypt, void *tag); +void dmu_objset_disown(objset_t *os, boolean_t decrypt, void *tag); int dmu_objset_from_ds(struct dsl_dataset *ds, objset_t **osp); void dmu_objset_stats(objset_t *os, nvlist_t *nv); @@ -196,6 +212,9 @@ timestruc_t dmu_objset_snap_cmtime(objset_t *os); /* called from dsl */ void dmu_objset_sync(objset_t *os, zio_t *zio, dmu_tx_t *tx); boolean_t dmu_objset_is_dirty(objset_t *os, uint64_t txg); +objset_t *dmu_objset_create_impl_dnstats(spa_t *spa, struct dsl_dataset *ds, + blkptr_t *bp, dmu_objset_type_t type, int levels, int blksz, int ibs, + dmu_tx_t *tx); objset_t *dmu_objset_create_impl(spa_t *spa, struct dsl_dataset *ds, blkptr_t *bp, dmu_objset_type_t type, dmu_tx_t *tx); int dmu_objset_open_impl(spa_t *spa, struct dsl_dataset *ds, blkptr_t *bp, @@ -206,6 +225,7 @@ void dmu_objset_userquota_get_ids(dnode_t *dn, boolean_t before, dmu_tx_t *tx); boolean_t dmu_objset_userused_enabled(objset_t *os); int dmu_objset_userspace_upgrade(objset_t *os); boolean_t dmu_objset_userspace_present(objset_t *os); +boolean_t dmu_objset_incompatible_encryption_version(objset_t *os); int dmu_fsname(const char *snapname, char *buf); void dmu_objset_evict_done(objset_t *os); diff --git a/usr/src/uts/common/fs/zfs/sys/dmu_recv.h b/usr/src/uts/common/fs/zfs/sys/dmu_recv.h index 56b69e61b1..e2b595e77b 100644 --- a/usr/src/uts/common/fs/zfs/sys/dmu_recv.h +++ b/usr/src/uts/common/fs/zfs/sys/dmu_recv.h @@ -45,10 +45,15 @@ typedef struct dmu_recv_cookie { boolean_t drc_byteswap; boolean_t drc_force; boolean_t drc_resumable; + boolean_t drc_raw; boolean_t drc_clone; + boolean_t drc_spill; struct avl_tree *drc_guid_to_ds_map; + nvlist_t *drc_keynvl; zio_cksum_t drc_cksum; + uint64_t drc_fromsnapobj; uint64_t drc_newsnapobj; + uint64_t drc_ivset_guid; void *drc_owner; cred_t *drc_cred; } dmu_recv_cookie_t; diff --git a/usr/src/uts/common/fs/zfs/sys/dmu_send.h b/usr/src/uts/common/fs/zfs/sys/dmu_send.h index 65d8e99db6..382f86622d 100644 --- a/usr/src/uts/common/fs/zfs/sys/dmu_send.h +++ b/usr/src/uts/common/fs/zfs/sys/dmu_send.h @@ -40,15 +40,14 @@ struct avl_tree; struct dmu_replay_record; int dmu_send(const char *tosnap, const char *fromsnap, boolean_t embedok, - boolean_t large_block_ok, boolean_t compressok, int outfd, - uint64_t resumeobj, uint64_t resumeoff, - struct vnode *vp, offset_t *off); + boolean_t large_block_ok, boolean_t compressok, boolean_t rawok, int outfd, + uint64_t resumeobj, uint64_t resumeoff, struct vnode *vp, offset_t *off); int dmu_send_estimate(struct dsl_dataset *ds, struct dsl_dataset *fromds, boolean_t stream_compressed, uint64_t *sizep); int dmu_send_estimate_from_txg(struct dsl_dataset *ds, uint64_t fromtxg, boolean_t stream_compressed, uint64_t *sizep); int dmu_send_obj(const char *pool, uint64_t tosnap, uint64_t fromsnap, boolean_t embedok, boolean_t large_block_ok, boolean_t compressok, - int outfd, struct vnode *vp, offset_t *off); + boolean_t rawok, int outfd, struct vnode *vp, offset_t *off); #endif /* _DMU_SEND_H */ diff --git a/usr/src/uts/common/fs/zfs/sys/dmu_traverse.h b/usr/src/uts/common/fs/zfs/sys/dmu_traverse.h index c010edd440..8ceef5cf13 100644 --- a/usr/src/uts/common/fs/zfs/sys/dmu_traverse.h +++ b/usr/src/uts/common/fs/zfs/sys/dmu_traverse.h @@ -49,6 +49,15 @@ typedef int (blkptr_cb_t)(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, #define TRAVERSE_PREFETCH (TRAVERSE_PREFETCH_METADATA | TRAVERSE_PREFETCH_DATA) #define TRAVERSE_HARD (1<<4) +/* + * Encrypted dnode blocks have encrypted bonus buffers while the rest + * of the dnode is left unencrypted. Callers can specify the + * TRAVERSE_NO_DECRYPT flag to indicate to the traversal code that + * they wish to receive the raw encrypted dnodes instead of attempting + * to read the logical data. + */ +#define TRAVERSE_NO_DECRYPT (1<<5) + /* Special traverse error return value to indicate skipping of children */ #define TRAVERSE_VISIT_NO_CHILDREN -1 diff --git a/usr/src/uts/common/fs/zfs/sys/dnode.h b/usr/src/uts/common/fs/zfs/sys/dnode.h index 3b7d619172..da72903113 100644 --- a/usr/src/uts/common/fs/zfs/sys/dnode.h +++ b/usr/src/uts/common/fs/zfs/sys/dnode.h @@ -74,9 +74,7 @@ extern "C" { /* * dnode id flags * - * Note: a file will never ever have its - * ids moved from bonus->spill - * and only in a crypto environment would it be on spill + * Note: a file will never ever have its ids moved from bonus->spill */ #define DN_ID_CHKED_BONUS 0x1 #define DN_ID_CHKED_SPILL 0x2 @@ -201,6 +199,8 @@ enum dnode_dirtycontext { * dataset and even within the same dnode block. */ +#define DNODE_CRYPT_PORTABLE_FLAGS_MASK (DNODE_FLAG_SPILL_BLKPTR) + typedef struct dnode_phys { uint8_t dn_type; /* dmu_object_type_t */ uint8_t dn_indblkshift; /* ln2(indirect block size) */ @@ -219,6 +219,13 @@ typedef struct dnode_phys { uint64_t dn_maxblkid; /* largest allocated block ID */ uint64_t dn_used; /* bytes (or sectors) of disk space */ + /* + * Both dn_pad2 and dn_pad3 are protected by the block's MAC. This + * allows us to protect any fields that might be added here in the + * future. In either case, developers will want to check + * zio_crypt_init_uios_dnode() to ensure the new field is being + * protected properly. + */ uint64_t dn_pad3[4]; union { blkptr_t dn_blkptr[1+DN_OLD_MAX_BONUSLEN/sizeof (blkptr_t)]; @@ -235,8 +242,8 @@ typedef struct dnode_phys { }; } dnode_phys_t; -#define DN_SPILL_BLKPTR(dnp) (blkptr_t *)((char *)(dnp) + \ - (((dnp)->dn_extra_slots + 1) << DNODE_SHIFT) - (1 << SPA_BLKPTRSHIFT)) +#define DN_SPILL_BLKPTR(dnp) ((blkptr_t *)((char *)(dnp) + \ + (((dnp)->dn_extra_slots + 1) << DNODE_SHIFT) - (1 << SPA_BLKPTRSHIFT))) struct dnode { /* @@ -282,6 +289,7 @@ struct dnode { uint8_t dn_rm_spillblk[TXG_SIZE]; /* for removing spill blk */ uint16_t dn_next_bonuslen[TXG_SIZE]; uint32_t dn_next_blksz[TXG_SIZE]; /* next block size in bytes */ + uint64_t dn_next_maxblkid[TXG_SIZE]; /* next maxblkid in bytes */ /* protected by dn_dbufs_mtx; declared here to fill 32-bit hole */ uint32_t dn_dbufs_count; /* count of dn_dbufs */ @@ -339,6 +347,12 @@ struct dnode { }; /* + * We use this (otherwise unused) bit to indicate if the value of + * dn_next_maxblkid[txgoff] is valid to use in dnode_sync(). + */ +#define DMU_NEXT_MAXBLKID_SET (1ULL << 63) + +/* * Adds a level of indirection between the dbuf and the dnode to avoid * iterating descendent dbufs in dnode_move(). Handles are not allocated * individually, but as an array of child dnodes in dnode_hold_impl(). @@ -381,15 +395,18 @@ void dnode_sync(dnode_t *dn, dmu_tx_t *tx); void dnode_allocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, int ibs, dmu_object_type_t bonustype, int bonuslen, int dn_slots, dmu_tx_t *tx); void dnode_reallocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, - dmu_object_type_t bonustype, int bonuslen, int dn_slots, dmu_tx_t *tx); + dmu_object_type_t bonustype, int bonuslen, int dn_slots, + boolean_t keep_spill, dmu_tx_t *tx); void dnode_free(dnode_t *dn, dmu_tx_t *tx); void dnode_byteswap(dnode_phys_t *dnp); void dnode_buf_byteswap(void *buf, size_t size); void dnode_verify(dnode_t *dn); +int dnode_set_nlevels(dnode_t *dn, int nlevels, dmu_tx_t *tx); int dnode_set_blksz(dnode_t *dn, uint64_t size, int ibs, dmu_tx_t *tx); void dnode_free_range(dnode_t *dn, uint64_t off, uint64_t len, dmu_tx_t *tx); void dnode_diduse_space(dnode_t *dn, int64_t space); -void dnode_new_blkid(dnode_t *dn, uint64_t blkid, dmu_tx_t *tx, boolean_t); +void dnode_new_blkid(dnode_t *dn, uint64_t blkid, dmu_tx_t *tx, + boolean_t have_read, boolean_t force); uint64_t dnode_block_freed(dnode_t *dn, uint64_t blkid); void dnode_init(void); void dnode_fini(void); diff --git a/usr/src/uts/common/fs/zfs/sys/dsl_bookmark.h b/usr/src/uts/common/fs/zfs/sys/dsl_bookmark.h index e477bb231c..3cdad74414 100644 --- a/usr/src/uts/common/fs/zfs/sys/dsl_bookmark.h +++ b/usr/src/uts/common/fs/zfs/sys/dsl_bookmark.h @@ -36,8 +36,25 @@ typedef struct zfs_bookmark_phys { uint64_t zbm_guid; /* guid of bookmarked dataset */ uint64_t zbm_creation_txg; /* birth transaction group */ uint64_t zbm_creation_time; /* bookmark creation time */ + + /* the following fields are reserved for redacted send / recv */ + uint64_t zbm_redaction_obj; /* redaction list object */ + uint64_t zbm_flags; /* ZBM_FLAG_* */ + uint64_t zbm_referenced_bytes_refd; + uint64_t zbm_compressed_bytes_refd; + uint64_t zbm_uncompressed_bytes_refd; + uint64_t zbm_referenced_freed_before_next_snap; + uint64_t zbm_compressed_freed_before_next_snap; + uint64_t zbm_uncompressed_freed_before_next_snap; + + /* fields used for raw sends */ + uint64_t zbm_ivset_guid; } zfs_bookmark_phys_t; + +#define BOOKMARK_PHYS_SIZE_V1 (3 * sizeof (uint64_t)) +#define BOOKMARK_PHYS_SIZE_V2 (12 * sizeof (uint64_t)) + int dsl_bookmark_create(nvlist_t *, nvlist_t *); int dsl_get_bookmarks(const char *, nvlist_t *, nvlist_t *); int dsl_get_bookmarks_impl(dsl_dataset_t *, nvlist_t *, nvlist_t *); diff --git a/usr/src/uts/common/fs/zfs/sys/dsl_crypt.h b/usr/src/uts/common/fs/zfs/sys/dsl_crypt.h new file mode 100644 index 0000000000..360a69b329 --- /dev/null +++ b/usr/src/uts/common/fs/zfs/sys/dsl_crypt.h @@ -0,0 +1,226 @@ +/* + * CDDL HEADER START + * + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2017, Datto, Inc. All rights reserved. + */ + +#ifndef _SYS_DSL_CRYPT_H +#define _SYS_DSL_CRYPT_H + +#include <sys/dmu_tx.h> +#include <sys/dmu.h> +#include <sys/zio_crypt.h> +#include <sys/spa.h> +#include <sys/dsl_dataset.h> + +/* + * ZAP entry keys for DSL Crypto Keys stored on disk. In addition, + * ZFS_PROP_KEYFORMAT, ZFS_PROP_PBKDF2_SALT, and ZFS_PROP_PBKDF2_ITERS are + * also maintained here using their respective property names. + */ +#define DSL_CRYPTO_KEY_CRYPTO_SUITE "DSL_CRYPTO_SUITE" +#define DSL_CRYPTO_KEY_GUID "DSL_CRYPTO_GUID" +#define DSL_CRYPTO_KEY_IV "DSL_CRYPTO_IV" +#define DSL_CRYPTO_KEY_MAC "DSL_CRYPTO_MAC" +#define DSL_CRYPTO_KEY_MASTER_KEY "DSL_CRYPTO_MASTER_KEY_1" +#define DSL_CRYPTO_KEY_HMAC_KEY "DSL_CRYPTO_HMAC_KEY_1" +#define DSL_CRYPTO_KEY_ROOT_DDOBJ "DSL_CRYPTO_ROOT_DDOBJ" +#define DSL_CRYPTO_KEY_REFCOUNT "DSL_CRYPTO_REFCOUNT" +#define DSL_CRYPTO_KEY_VERSION "DSL_CRYPTO_VERSION" + +/* + * In-memory representation of a wrapping key. One of these structs will exist + * for each encryption root with its key loaded. + */ +typedef struct dsl_wrapping_key { + /* link on spa_keystore_t:sk_wkeys */ + avl_node_t wk_avl_link; + + /* keyformat property enum */ + zfs_keyformat_t wk_keyformat; + + /* the pbkdf2 salt, if the keyformat is of type passphrase */ + uint64_t wk_salt; + + /* the pbkdf2 iterations, if the keyformat is of type passphrase */ + uint64_t wk_iters; + + /* actual wrapping key */ + crypto_key_t wk_key; + + /* refcount of holders of this key */ + zfs_refcount_t wk_refcnt; + + /* dsl directory object that owns this wrapping key */ + uint64_t wk_ddobj; +} dsl_wrapping_key_t; + +/* enum of commands indicating special actions that should be run */ +typedef enum dcp_cmd { + /* key creation commands */ + DCP_CMD_NONE = 0, /* no specific command */ + DCP_CMD_RAW_RECV, /* raw receive */ + + /* key changing commands */ + DCP_CMD_NEW_KEY, /* rewrap key as an encryption root */ + DCP_CMD_INHERIT, /* rewrap key with parent's wrapping key */ + DCP_CMD_FORCE_NEW_KEY, /* change to encryption root without rewrap */ + DCP_CMD_FORCE_INHERIT, /* inherit parent's key without rewrap */ + + DCP_CMD_MAX +} dcp_cmd_t; + +/* + * This struct is a simple wrapper around all the parameters that are usually + * required to setup encryption. It exists so that all of the params can be + * passed around the kernel together for convenience. + */ +typedef struct dsl_crypto_params { + /* command indicating intended action */ + dcp_cmd_t cp_cmd; + + /* the encryption algorithm */ + enum zio_encrypt cp_crypt; + + /* keylocation property string */ + char *cp_keylocation; + + /* the wrapping key */ + dsl_wrapping_key_t *cp_wkey; +} dsl_crypto_params_t; + +/* + * In-memory representation of a DSL Crypto Key object. One of these structs + * (and corresponding on-disk ZAP object) will exist for each encrypted + * clone family that is mounted or otherwise reading protected data. + */ +typedef struct dsl_crypto_key { + /* link on spa_keystore_t:sk_dsl_keys */ + avl_node_t dck_avl_link; + + /* refcount of dsl_key_mapping_t's holding this key */ + zfs_refcount_t dck_holds; + + /* master key used to derive encryption keys */ + zio_crypt_key_t dck_key; + + /* wrapping key for syncing this structure to disk */ + dsl_wrapping_key_t *dck_wkey; + + /* on-disk object id */ + uint64_t dck_obj; +} dsl_crypto_key_t; + +/* + * In-memory mapping of a dataset object id to a DSL Crypto Key. This is used + * to look up the corresponding dsl_crypto_key_t from the zio layer for + * performing data encryption and decryption. + */ +typedef struct dsl_key_mapping { + /* link on spa_keystore_t:sk_key_mappings */ + avl_node_t km_avl_link; + + /* refcount of how many users are depending on this mapping */ + zfs_refcount_t km_refcnt; + + /* dataset this crypto key belongs to (index) */ + uint64_t km_dsobj; + + /* crypto key (value) of this record */ + dsl_crypto_key_t *km_key; +} dsl_key_mapping_t; + +/* in memory structure for holding all wrapping and dsl keys */ +typedef struct spa_keystore { + /* lock for protecting sk_dsl_keys */ + krwlock_t sk_dk_lock; + + /* tree of all dsl_crypto_key_t's */ + avl_tree_t sk_dsl_keys; + + /* lock for protecting sk_key_mappings */ + krwlock_t sk_km_lock; + + /* tree of all dsl_key_mapping_t's, indexed by dsobj */ + avl_tree_t sk_key_mappings; + + /* lock for protecting the wrapping keys tree */ + krwlock_t sk_wkeys_lock; + + /* tree of all dsl_wrapping_key_t's, indexed by ddobj */ + avl_tree_t sk_wkeys; +} spa_keystore_t; + +int dsl_crypto_params_create_nvlist(dcp_cmd_t cmd, nvlist_t *props, + nvlist_t *crypto_args, dsl_crypto_params_t **dcp_out); +void dsl_crypto_params_free(dsl_crypto_params_t *dcp, boolean_t unload); +void dsl_dataset_crypt_stats(struct dsl_dataset *ds, nvlist_t *nv); +int dsl_crypto_can_set_keylocation(const char *dsname, const char *keylocation); +boolean_t dsl_dir_incompatible_encryption_version(dsl_dir_t *dd); + +void spa_keystore_init(spa_keystore_t *sk); +void spa_keystore_fini(spa_keystore_t *sk); + +void spa_keystore_dsl_key_rele(spa_t *spa, dsl_crypto_key_t *dck, void *tag); +int spa_keystore_load_wkey_impl(spa_t *spa, dsl_wrapping_key_t *wkey); +int spa_keystore_load_wkey(const char *dsname, dsl_crypto_params_t *dcp, + boolean_t noop); +int spa_keystore_unload_wkey_impl(spa_t *spa, uint64_t ddobj); +int spa_keystore_unload_wkey(const char *dsname); + +int spa_keystore_create_mapping(spa_t *spa, struct dsl_dataset *ds, void *tag, + dsl_key_mapping_t **km_out); +int spa_keystore_remove_mapping(spa_t *spa, uint64_t dsobj, void *tag); +void key_mapping_add_ref(dsl_key_mapping_t *km, void *tag); +void key_mapping_rele(spa_t *spa, dsl_key_mapping_t *km, void *tag); +int spa_keystore_lookup_key(spa_t *spa, uint64_t dsobj, void *tag, + dsl_crypto_key_t **dck_out); + +int dsl_crypto_populate_key_nvlist(struct dsl_dataset *ds, + uint64_t from_ivset_guid, nvlist_t **nvl_out); +int dsl_crypto_recv_raw_key_check(struct dsl_dataset *ds, + nvlist_t *nvl, dmu_tx_t *tx); +void dsl_crypto_recv_raw_key_sync(struct dsl_dataset *ds, + nvlist_t *nvl, dmu_tx_t *tx); +int dsl_crypto_recv_raw(const char *poolname, uint64_t dsobj, uint64_t fromobj, + dmu_objset_type_t ostype, nvlist_t *nvl, boolean_t do_key); + +int spa_keystore_change_key(const char *dsname, dsl_crypto_params_t *dcp); +int dsl_dir_rename_crypt_check(dsl_dir_t *dd, dsl_dir_t *newparent); +int dsl_dataset_promote_crypt_check(dsl_dir_t *target, dsl_dir_t *origin); +void dsl_dataset_promote_crypt_sync(dsl_dir_t *target, dsl_dir_t *origin, + dmu_tx_t *tx); +int dmu_objset_create_crypt_check(dsl_dir_t *parentdd, + dsl_crypto_params_t *dcp, boolean_t *will_encrypt); +void dsl_dataset_create_crypt_sync(uint64_t dsobj, dsl_dir_t *dd, + struct dsl_dataset *origin, dsl_crypto_params_t *dcp, dmu_tx_t *tx); +uint64_t dsl_crypto_key_create_sync(uint64_t crypt, dsl_wrapping_key_t *wkey, + dmu_tx_t *tx); +int dmu_objset_clone_crypt_check(dsl_dir_t *parentdd, dsl_dir_t *origindd); +uint64_t dsl_crypto_key_clone_sync(dsl_dir_t *origindd, dmu_tx_t *tx); +void dsl_crypto_key_destroy_sync(uint64_t dckobj, dmu_tx_t *tx); + +int spa_crypt_get_salt(spa_t *spa, uint64_t dsobj, uint8_t *salt); +int spa_do_crypt_mac_abd(boolean_t generate, spa_t *spa, uint64_t dsobj, + abd_t *abd, uint_t datalen, uint8_t *mac); +int spa_do_crypt_objset_mac_abd(boolean_t generate, spa_t *spa, uint64_t dsobj, + abd_t *abd, uint_t datalen, boolean_t byteswap); +int spa_do_crypt_abd(boolean_t encrypt, spa_t *spa, const zbookmark_phys_t *zb, + dmu_object_type_t ot, boolean_t dedup, boolean_t bswap, uint8_t *salt, + uint8_t *iv, uint8_t *mac, uint_t datalen, abd_t *pabd, abd_t *cabd, + boolean_t *no_crypt); + +#endif /* _SYS_DSL_CRYPT_H */ diff --git a/usr/src/uts/common/fs/zfs/sys/dsl_dataset.h b/usr/src/uts/common/fs/zfs/sys/dsl_dataset.h index 064ff617fd..189376eefc 100644 --- a/usr/src/uts/common/fs/zfs/sys/dsl_dataset.h +++ b/usr/src/uts/common/fs/zfs/sys/dsl_dataset.h @@ -40,6 +40,7 @@ #include <sys/dsl_deadlist.h> #include <sys/refcount.h> #include <sys/rrwlock.h> +#include <sys/dsl_crypt.h> #include <zfeature_common.h> #ifdef __cplusplus @@ -49,6 +50,8 @@ extern "C" { struct dsl_dataset; struct dsl_dir; struct dsl_pool; +struct dsl_crypto_params; +struct dsl_key_mapping; #define DS_FLAG_INCONSISTENT (1ULL<<0) #define DS_IS_INCONSISTENT(ds) \ @@ -106,6 +109,7 @@ struct dsl_pool; #define DS_FIELD_RESUME_LARGEBLOCK "com.delphix:resume_largeblockok" #define DS_FIELD_RESUME_EMBEDOK "com.delphix:resume_embedok" #define DS_FIELD_RESUME_COMPRESSOK "com.delphix:resume_compressok" +#define DS_FIELD_RESUME_RAWOK "com.datto:resume_rawok" /* * This field is set to the object number of the remap deadlist if one exists. @@ -113,6 +117,12 @@ struct dsl_pool; #define DS_FIELD_REMAP_DEADLIST "com.delphix:remap_deadlist" /* + * This field is set to the ivset guid for encrypted snapshots. This is used + * for validating raw receives. + */ +#define DS_FIELD_IVSET_GUID "com.datto:ivset_guid" + +/* * DS_FLAG_CI_DATASET is set if the dataset contains a file system whose * name lookups should be performed case-insensitively. */ @@ -164,6 +174,7 @@ typedef struct dsl_dataset { uint64_t ds_object; uint64_t ds_fsid_guid; boolean_t ds_is_snapshot; + struct dsl_key_mapping *ds_key_mapping; /* only used in syncing context, only valid for non-snapshots: */ struct dsl_dataset *ds_prev; @@ -293,26 +304,40 @@ typedef struct dsl_dataset_snapshot_arg { #define DS_UNIQUE_IS_ACCURATE(ds) \ ((dsl_dataset_phys(ds)->ds_flags & DS_FLAG_UNIQUE_ACCURATE) != 0) +/* flags for holding the dataset */ +typedef enum ds_hold_flags { + DS_HOLD_FLAG_DECRYPT = 1 << 0 /* needs access to encrypted data */ +} ds_hold_flags_t; + int dsl_dataset_hold(struct dsl_pool *dp, const char *name, void *tag, dsl_dataset_t **dsp); +int dsl_dataset_hold_flags(struct dsl_pool *dp, const char *name, + ds_hold_flags_t flags, void *tag, dsl_dataset_t **dsp); boolean_t dsl_dataset_try_add_ref(struct dsl_pool *dp, dsl_dataset_t *ds, void *tag); +int dsl_dataset_create_key_mapping(dsl_dataset_t *ds); int dsl_dataset_hold_obj(struct dsl_pool *dp, uint64_t dsobj, void *tag, dsl_dataset_t **); +int dsl_dataset_hold_obj_flags(struct dsl_pool *dp, uint64_t dsobj, + ds_hold_flags_t flags, void *tag, dsl_dataset_t **); +void dsl_dataset_remove_key_mapping(dsl_dataset_t *ds); void dsl_dataset_rele(dsl_dataset_t *ds, void *tag); +void dsl_dataset_rele_flags(dsl_dataset_t *ds, ds_hold_flags_t flags, + void *tag); int dsl_dataset_own(struct dsl_pool *dp, const char *name, - void *tag, dsl_dataset_t **dsp); + ds_hold_flags_t flags, void *tag, dsl_dataset_t **dsp); int dsl_dataset_own_obj(struct dsl_pool *dp, uint64_t dsobj, - void *tag, dsl_dataset_t **dsp); -void dsl_dataset_disown(dsl_dataset_t *ds, void *tag); + ds_hold_flags_t flags, void *tag, dsl_dataset_t **dsp); +void dsl_dataset_disown(dsl_dataset_t *ds, ds_hold_flags_t flags, void *tag); void dsl_dataset_name(dsl_dataset_t *ds, char *name); boolean_t dsl_dataset_tryown(dsl_dataset_t *ds, void *tag); int dsl_dataset_namelen(dsl_dataset_t *ds); boolean_t dsl_dataset_has_owner(dsl_dataset_t *ds); uint64_t dsl_dataset_create_sync(dsl_dir_t *pds, const char *lastname, - dsl_dataset_t *origin, uint64_t flags, cred_t *, dmu_tx_t *); + dsl_dataset_t *origin, uint64_t flags, cred_t *, + struct dsl_crypto_params *, dmu_tx_t *); uint64_t dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin, - uint64_t flags, dmu_tx_t *tx); + struct dsl_crypto_params *dcp, uint64_t flags, dmu_tx_t *tx); void dsl_dataset_snapshot_sync(void *arg, dmu_tx_t *tx); int dsl_dataset_snapshot_check(void *arg, dmu_tx_t *tx); int dsl_dataset_snapshot(nvlist_t *snaps, nvlist_t *props, nvlist_t *errors); @@ -434,6 +459,8 @@ void dsl_dataset_create_remap_deadlist(dsl_dataset_t *ds, dmu_tx_t *tx); boolean_t dsl_dataset_remap_deadlist_exists(dsl_dataset_t *ds); void dsl_dataset_destroy_remap_deadlist(dsl_dataset_t *ds, dmu_tx_t *tx); +void dsl_dataset_activate_feature(uint64_t dsobj, + spa_feature_t f, dmu_tx_t *tx); void dsl_dataset_deactivate_feature(uint64_t dsobj, spa_feature_t f, dmu_tx_t *tx); diff --git a/usr/src/uts/common/fs/zfs/sys/dsl_deleg.h b/usr/src/uts/common/fs/zfs/sys/dsl_deleg.h index 6fb6a121ad..dadbda324e 100644 --- a/usr/src/uts/common/fs/zfs/sys/dsl_deleg.h +++ b/usr/src/uts/common/fs/zfs/sys/dsl_deleg.h @@ -58,6 +58,8 @@ extern "C" { #define ZFS_DELEG_PERM_DIFF "diff" #define ZFS_DELEG_PERM_BOOKMARK "bookmark" #define ZFS_DELEG_PERM_REMAP "remap" +#define ZFS_DELEG_PERM_LOAD_KEY "load-key" +#define ZFS_DELEG_PERM_CHANGE_KEY "change-key" /* * Note: the names of properties that are marked delegatable are also diff --git a/usr/src/uts/common/fs/zfs/sys/dsl_dir.h b/usr/src/uts/common/fs/zfs/sys/dsl_dir.h index 21d953cb60..a9336f5321 100644 --- a/usr/src/uts/common/fs/zfs/sys/dsl_dir.h +++ b/usr/src/uts/common/fs/zfs/sys/dsl_dir.h @@ -33,6 +33,7 @@ #include <sys/dsl_synctask.h> #include <sys/refcount.h> #include <sys/zfs_context.h> +#include <sys/dsl_crypt.h> #ifdef __cplusplus extern "C" { @@ -48,6 +49,7 @@ struct dsl_dataset; #define DD_FIELD_FILESYSTEM_COUNT "com.joyent:filesystem_count" #define DD_FIELD_SNAPSHOT_COUNT "com.joyent:snapshot_count" #define DD_FIELD_LAST_REMAP_TXG "com.delphix:last_remap_txg" +#define DD_FIELD_CRYPTO_KEY_OBJ "com.datto:crypto_key_obj" typedef enum dd_used { DD_USED_HEAD, @@ -90,6 +92,7 @@ struct dsl_dir { /* These are immutable; no lock needed: */ uint64_t dd_object; + uint64_t dd_crypto_obj; dsl_pool_t *dd_pool; /* Stable until user eviction; no lock needed: */ diff --git a/usr/src/uts/common/fs/zfs/sys/dsl_pool.h b/usr/src/uts/common/fs/zfs/sys/dsl_pool.h index 66098900db..de13fa8bfa 100644 --- a/usr/src/uts/common/fs/zfs/sys/dsl_pool.h +++ b/usr/src/uts/common/fs/zfs/sys/dsl_pool.h @@ -51,6 +51,7 @@ struct dsl_dataset; struct dsl_pool; struct dmu_tx; struct dsl_scan; +struct dsl_crypto_params; extern uint64_t zfs_dirty_data_max; extern uint64_t zfs_dirty_data_max_max; @@ -144,7 +145,8 @@ typedef struct dsl_pool { int dsl_pool_init(spa_t *spa, uint64_t txg, dsl_pool_t **dpp); int dsl_pool_open(dsl_pool_t *dp); void dsl_pool_close(dsl_pool_t *dp); -dsl_pool_t *dsl_pool_create(spa_t *spa, nvlist_t *zplprops, uint64_t txg); +dsl_pool_t *dsl_pool_create(spa_t *spa, nvlist_t *zplprops, + struct dsl_crypto_params *dcp, uint64_t txg); void dsl_pool_sync(dsl_pool_t *dp, uint64_t txg); void dsl_pool_sync_done(dsl_pool_t *dp, uint64_t txg); int dsl_pool_sync_context(dsl_pool_t *dp); diff --git a/usr/src/uts/common/fs/zfs/sys/hkdf.h b/usr/src/uts/common/fs/zfs/sys/hkdf.h new file mode 100644 index 0000000000..e0f7678c03 --- /dev/null +++ b/usr/src/uts/common/fs/zfs/sys/hkdf.h @@ -0,0 +1,29 @@ +/* + * CDDL HEADER START + * + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2017, Datto, Inc. All rights reserved. + */ + +#ifndef _SYS_HKDF_H_ +#define _SYS_HKDF_H_ + +#include <sys/types.h> + +int hkdf_sha512(uint8_t *key_material, uint_t km_len, uint8_t *salt, + uint_t salt_len, uint8_t *info, uint_t info_len, uint8_t *output_key, + uint_t out_len); + +#endif /* _SYS_HKDF_H_ */ diff --git a/usr/src/uts/common/fs/zfs/sys/refcount.h b/usr/src/uts/common/fs/zfs/sys/refcount.h index 0059a245ee..ec36727065 100644 --- a/usr/src/uts/common/fs/zfs/sys/refcount.h +++ b/usr/src/uts/common/fs/zfs/sys/refcount.h @@ -76,6 +76,8 @@ int64_t zfs_refcount_add_many(zfs_refcount_t *, uint64_t, void *); int64_t zfs_refcount_remove_many(zfs_refcount_t *, uint64_t, void *); void zfs_refcount_transfer(zfs_refcount_t *, zfs_refcount_t *); void zfs_refcount_transfer_ownership(zfs_refcount_t *, void *, void *); +void zfs_refcount_transfer_ownership_many(zfs_refcount_t *, uint64_t, + void *, void *); boolean_t zfs_refcount_held(zfs_refcount_t *, void *); boolean_t zfs_refcount_not_held(zfs_refcount_t *, void *); @@ -107,6 +109,7 @@ typedef struct refcount { atomic_add_64(&(dst)->rc_count, __tmp); \ } #define zfs_refcount_transfer_ownership(rc, current_holder, new_holder) (void)0 +#define zfs_refcount_transfer_ownership_many(rc, nr, ch, nh) ((void)0) #define zfs_refcount_held(rc, holder) ((rc)->rc_count > 0) #define zfs_refcount_not_held(rc, holder) (B_TRUE) diff --git a/usr/src/uts/common/fs/zfs/sys/spa.h b/usr/src/uts/common/fs/zfs/sys/spa.h index 53b9e4ef5d..2d998d624d 100644 --- a/usr/src/uts/common/fs/zfs/sys/spa.h +++ b/usr/src/uts/common/fs/zfs/sys/spa.h @@ -61,6 +61,7 @@ typedef struct ddt ddt_t; typedef struct ddt_entry ddt_entry_t; struct dsl_pool; struct dsl_dataset; +struct dsl_crypto_params; /* * General-purpose 32-bit and 64-bit bitfield encodings. @@ -216,7 +217,7 @@ typedef struct zio_cksum_salt { * G gang block indicator * B byteorder (endianness) * D dedup - * X encryption (on version 30, which is not supported) + * X encryption * E blkptr_t contains embedded data (see below) * lvl level of indirection * type DMU object type @@ -230,6 +231,83 @@ typedef struct zio_cksum_salt { */ /* + * The blkptr_t's of encrypted blocks also need to store the encryption + * parameters so that the block can be decrypted. This layout is as follows: + * + * 64 56 48 40 32 24 16 8 0 + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * 0 | vdev1 | GRID | ASIZE | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * 1 |G| offset1 | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * 2 | vdev2 | GRID | ASIZE | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * 3 |G| offset2 | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * 4 | salt | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * 5 | IV1 | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * 6 |BDX|lvl| type | cksum |E| comp| PSIZE | LSIZE | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * 7 | padding | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * 8 | padding | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * 9 | physical birth txg | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * a | logical birth txg | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * b | IV2 | fill count | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * c | checksum[0] | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * d | checksum[1] | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * e | MAC[0] | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * f | MAC[1] | + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * + * Legend: + * + * salt Salt for generating encryption keys + * IV1 First 64 bits of encryption IV + * X Block requires encryption handling (set to 1) + * E blkptr_t contains embedded data (set to 0, see below) + * fill count number of non-zero blocks under this bp (truncated to 32 bits) + * IV2 Last 32 bits of encryption IV + * checksum[2] 128-bit checksum of the data this bp describes + * MAC[2] 128-bit message authentication code for this data + * + * The X bit being set indicates that this block is one of 3 types. If this is + * a level 0 block with an encrypted object type, the block is encrypted + * (see BP_IS_ENCRYPTED()). If this is a level 0 block with an unencrypted + * object type, this block is authenticated with an HMAC (see + * BP_IS_AUTHENTICATED()). Otherwise (if level > 0), this bp will use the MAC + * words to store a checksum-of-MACs from the level below (see + * BP_HAS_INDIRECT_MAC_CKSUM()). For convenience in the code, BP_IS_PROTECTED() + * refers to both encrypted and authenticated blocks and BP_USES_CRYPT() + * refers to any of these 3 kinds of blocks. + * + * The additional encryption parameters are the salt, IV, and MAC which are + * explained in greater detail in the block comment at the top of zio_crypt.c. + * The MAC occupies half of the checksum space since it serves a very similar + * purpose: to prevent data corruption on disk. The only functional difference + * is that the checksum is used to detect on-disk corruption whether or not the + * encryption key is loaded and the MAC provides additional protection against + * malicious disk tampering. We use the 3rd DVA to store the salt and first + * 64 bits of the IV. As a result encrypted blocks can only have 2 copies + * maximum instead of the normal 3. The last 32 bits of the IV are stored in + * the upper bits of what is usually the fill count. Note that only blocks at + * level 0 or -2 are ever encrypted, which allows us to guarantee that these + * 32 bits are not trampled over by other code (see zio_crypt.c for details). + * The salt and IV are not used for authenticated bps or bps with an indirect + * MAC checksum, so these blocks can utilize all 3 DVAs and the full 64 bits + * for the fill count. + */ + +/* * "Embedded" blkptr_t's don't actually point to a block, instead they * have a data payload embedded in the blkptr_t itself. See the comment * in blkptr.c for more details. @@ -284,7 +362,9 @@ typedef struct zio_cksum_salt { * BP's so the BP_SET_* macros can be used with them. etype, PSIZE, LSIZE must * be set with the BPE_SET_* macros. BP_SET_EMBEDDED() should be called before * other macros, as they assert that they are only used on BP's of the correct - * "embedded-ness". + * "embedded-ness". Encrypted blkptr_t's cannot be embedded because they use + * the payload space for encryption parameters (see the comment above on + * how encryption parameters are stored). */ #define BPE_GET_ETYPE(bp) \ @@ -308,7 +388,7 @@ _NOTE(CONSTCOND) } while (0) BF64_GET_SB((bp)->blk_prop, 25, 7, 0, 1)) #define BPE_SET_PSIZE(bp, x) do { \ ASSERT(BP_IS_EMBEDDED(bp)); \ - BF64_SET_SB((bp)->blk_prop, 25, 7, 0, 1, x); \ + BF64_SET_SB((bp)->blk_prop, 25, 7, 0, 1, x); \ _NOTE(CONSTCOND) } while (0) typedef enum bp_embedded_type { @@ -410,6 +490,26 @@ _NOTE(CONSTCOND) } while (0) #define BP_GET_LEVEL(bp) BF64_GET((bp)->blk_prop, 56, 5) #define BP_SET_LEVEL(bp, x) BF64_SET((bp)->blk_prop, 56, 5, x) +/* encrypted, authenticated, and MAC cksum bps use the same bit */ +#define BP_USES_CRYPT(bp) BF64_GET((bp)->blk_prop, 61, 1) +#define BP_SET_CRYPT(bp, x) BF64_SET((bp)->blk_prop, 61, 1, x) + +#define BP_IS_ENCRYPTED(bp) \ + (BP_USES_CRYPT(bp) && \ + BP_GET_LEVEL(bp) == 0 && \ + DMU_OT_IS_ENCRYPTED(BP_GET_TYPE(bp))) + +#define BP_IS_AUTHENTICATED(bp) \ + (BP_USES_CRYPT(bp) && \ + BP_GET_LEVEL(bp) == 0 && \ + !DMU_OT_IS_ENCRYPTED(BP_GET_TYPE(bp))) + +#define BP_HAS_INDIRECT_MAC_CKSUM(bp) \ + (BP_USES_CRYPT(bp) && BP_GET_LEVEL(bp) > 0) + +#define BP_IS_PROTECTED(bp) \ + (BP_IS_ENCRYPTED(bp) || BP_IS_AUTHENTICATED(bp)) + #define BP_GET_DEDUP(bp) BF64_GET((bp)->blk_prop, 62, 1) #define BP_SET_DEDUP(bp, x) BF64_SET((bp)->blk_prop, 62, 1, x) @@ -427,7 +527,26 @@ _NOTE(CONSTCOND) } while (0) (bp)->blk_phys_birth = ((logical) == (physical) ? 0 : (physical)); \ } -#define BP_GET_FILL(bp) (BP_IS_EMBEDDED(bp) ? 1 : (bp)->blk_fill) +#define BP_GET_FILL(bp) \ + ((BP_IS_ENCRYPTED(bp)) ? BF64_GET((bp)->blk_fill, 0, 32) : \ + ((BP_IS_EMBEDDED(bp)) ? 1 : (bp)->blk_fill)) + +#define BP_SET_FILL(bp, fill) \ +{ \ + if (BP_IS_ENCRYPTED(bp)) \ + BF64_SET((bp)->blk_fill, 0, 32, fill); \ + else \ + (bp)->blk_fill = fill; \ +} + +#define BP_GET_IV2(bp) \ + (ASSERT(BP_IS_ENCRYPTED(bp)), \ + BF64_GET((bp)->blk_fill, 32, 32)) +#define BP_SET_IV2(bp, iv2) \ +{ \ + ASSERT(BP_IS_ENCRYPTED(bp)); \ + BF64_SET((bp)->blk_fill, 32, 32, iv2); \ +} #define BP_IS_METADATA(bp) \ (BP_GET_LEVEL(bp) > 0 || DMU_OT_IS_METADATA(BP_GET_TYPE(bp))) @@ -436,7 +555,7 @@ _NOTE(CONSTCOND) } while (0) (BP_IS_EMBEDDED(bp) ? 0 : \ DVA_GET_ASIZE(&(bp)->blk_dva[0]) + \ DVA_GET_ASIZE(&(bp)->blk_dva[1]) + \ - DVA_GET_ASIZE(&(bp)->blk_dva[2])) + (DVA_GET_ASIZE(&(bp)->blk_dva[2]) * !BP_IS_ENCRYPTED(bp))) #define BP_GET_UCSIZE(bp) \ (BP_IS_METADATA(bp) ? BP_GET_PSIZE(bp) : BP_GET_LSIZE(bp)) @@ -445,13 +564,13 @@ _NOTE(CONSTCOND) } while (0) (BP_IS_EMBEDDED(bp) ? 0 : \ !!DVA_GET_ASIZE(&(bp)->blk_dva[0]) + \ !!DVA_GET_ASIZE(&(bp)->blk_dva[1]) + \ - !!DVA_GET_ASIZE(&(bp)->blk_dva[2])) + (!!DVA_GET_ASIZE(&(bp)->blk_dva[2]) * !BP_IS_ENCRYPTED(bp))) #define BP_COUNT_GANG(bp) \ (BP_IS_EMBEDDED(bp) ? 0 : \ (DVA_GET_GANG(&(bp)->blk_dva[0]) + \ DVA_GET_GANG(&(bp)->blk_dva[1]) + \ - DVA_GET_GANG(&(bp)->blk_dva[2]))) + (DVA_GET_GANG(&(bp)->blk_dva[2]) * !BP_IS_ENCRYPTED(bp)))) #define DVA_EQUAL(dva1, dva2) \ ((dva1)->dva_word[1] == (dva2)->dva_word[1] && \ @@ -470,6 +589,10 @@ _NOTE(CONSTCOND) } while (0) ((zc1).zc_word[2] - (zc2).zc_word[2]) | \ ((zc1).zc_word[3] - (zc2).zc_word[3]))) +#define ZIO_CHECKSUM_MAC_EQUAL(zc1, zc2) \ + (0 == (((zc1).zc_word[0] - (zc2).zc_word[0]) | \ + ((zc1).zc_word[1] - (zc2).zc_word[1]))) + #define ZIO_CHECKSUM_IS_ZERO(zc) \ (0 == ((zc)->zc_word[0] | (zc)->zc_word[1] | \ (zc)->zc_word[2] | (zc)->zc_word[3])) @@ -530,7 +653,7 @@ _NOTE(CONSTCOND) } while (0) #define BP_SHOULD_BYTESWAP(bp) (BP_GET_BYTEORDER(bp) != ZFS_HOST_BYTEORDER) -#define BP_SPRINTF_LEN 320 +#define BP_SPRINTF_LEN 400 /* * This macro allows code sharing between zfs, libzpool, and mdb. @@ -543,7 +666,18 @@ _NOTE(CONSTCOND) } while (0) { "zero", "single", "double", "triple" }; \ int len = 0; \ int copies = 0; \ - \ + const char *crypt_type; \ + if (bp != NULL) { \ + if (BP_IS_ENCRYPTED(bp)) { \ + crypt_type = "encrypted"; \ + } else if (BP_IS_AUTHENTICATED(bp)) { \ + crypt_type = "authenticated"; \ + } else if (BP_HAS_INDIRECT_MAC_CKSUM(bp)) { \ + crypt_type = "indirect-MAC"; \ + } else { \ + crypt_type = "unencrypted"; \ + } \ + } \ if (bp == NULL) { \ len += func(buf + len, size - len, "<NULL>"); \ } else if (BP_IS_HOLE(bp)) { \ @@ -577,18 +711,27 @@ _NOTE(CONSTCOND) } while (0) (u_longlong_t)DVA_GET_ASIZE(dva), \ ws); \ } \ + if (BP_IS_ENCRYPTED(bp)) { \ + len += func(buf + len, size - len, \ + "salt=%llx iv=%llx:%llx%c", \ + (u_longlong_t)bp->blk_dva[2].dva_word[0], \ + (u_longlong_t)bp->blk_dva[2].dva_word[1], \ + (u_longlong_t)BP_GET_IV2(bp), \ + ws); \ + } \ if (BP_IS_GANG(bp) && \ DVA_GET_ASIZE(&bp->blk_dva[2]) <= \ DVA_GET_ASIZE(&bp->blk_dva[1]) / 2) \ copies--; \ len += func(buf + len, size - len, \ - "[L%llu %s] %s %s %s %s %s %s%c" \ + "[L%llu %s] %s %s %s %s %s %s %s%c" \ "size=%llxL/%llxP birth=%lluL/%lluP fill=%llu%c" \ "cksum=%llx:%llx:%llx:%llx", \ (u_longlong_t)BP_GET_LEVEL(bp), \ type, \ checksum, \ compress, \ + crypt_type, \ BP_GET_BYTEORDER(bp) == 0 ? "BE" : "LE", \ BP_IS_GANG(bp) ? "gang" : "contiguous", \ BP_GET_DEDUP(bp) ? "dedup" : "unique", \ @@ -622,8 +765,8 @@ extern int spa_open_rewind(const char *pool, spa_t **, void *tag, nvlist_t *policy, nvlist_t **config); extern int spa_get_stats(const char *pool, nvlist_t **config, char *altroot, size_t buflen); -extern int spa_create(const char *pool, nvlist_t *config, nvlist_t *props, - nvlist_t *zplprops); +extern int spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props, + nvlist_t *zplprops, struct dsl_crypto_params *dcp); extern int spa_import_rootpool(char *devpath, char *devid); extern int spa_import(const char *pool, nvlist_t *config, nvlist_t *props, uint64_t flags); @@ -890,9 +1033,10 @@ extern void spa_history_log_internal_dd(dsl_dir_t *dd, const char *operation, /* error handling */ struct zbookmark_phys; -extern void spa_log_error(spa_t *spa, zio_t *zio); +extern void spa_log_error(spa_t *spa, const struct zbookmark_phys *zb); extern void zfs_ereport_post(const char *class, spa_t *spa, vdev_t *vd, - zio_t *zio, uint64_t stateoroffset, uint64_t length); + const struct zbookmark_phys *zb, struct zio *zio, uint64_t stateoroffset, + uint64_t length); extern void zfs_post_remove(spa_t *spa, vdev_t *vd); extern void zfs_post_state_change(spa_t *spa, vdev_t *vd); extern void zfs_post_autoreplace(spa_t *spa, vdev_t *vd); diff --git a/usr/src/uts/common/fs/zfs/sys/spa_impl.h b/usr/src/uts/common/fs/zfs/sys/spa_impl.h index 19516a1a1b..d63013ce0d 100644 --- a/usr/src/uts/common/fs/zfs/sys/spa_impl.h +++ b/usr/src/uts/common/fs/zfs/sys/spa_impl.h @@ -44,6 +44,7 @@ #include <sys/refcount.h> #include <sys/bplist.h> #include <sys/bpobj.h> +#include <sys/dsl_crypt.h> #include <sys/zfeature.h> #include <sys/zthr.h> #include <zfeature_common.h> @@ -372,6 +373,8 @@ struct spa { uint64_t spa_deadman_synctime; /* deadman expiration timer */ uint64_t spa_all_vdev_zaps; /* ZAP of per-vd ZAP obj #s */ spa_avz_action_t spa_avz_action; /* destroy/rebuild AVZ? */ + spa_keystore_t spa_keystore; /* loaded crypto keys */ + uint64_t spa_errata; /* errata issues detected */ /* * spa_iokstat_lock protects spa_iokstat and diff --git a/usr/src/uts/common/fs/zfs/sys/zfs_ioctl.h b/usr/src/uts/common/fs/zfs/sys/zfs_ioctl.h index 70916c45b7..1457200dd8 100644 --- a/usr/src/uts/common/fs/zfs/sys/zfs_ioctl.h +++ b/usr/src/uts/common/fs/zfs/sys/zfs_ioctl.h @@ -94,7 +94,7 @@ typedef enum drr_headertype { /* flag #21 is reserved for a Delphix feature */ #define DMU_BACKUP_FEATURE_COMPRESSED (1 << 22) #define DMU_BACKUP_FEATURE_LARGE_DNODE (1 << 23) -/* flag #24 is reserved for the raw send feature */ +#define DMU_BACKUP_FEATURE_RAW (1 << 24) /* flag #25 is reserved for the ZSTD compression feature */ /* @@ -105,7 +105,8 @@ typedef enum drr_headertype { DMU_BACKUP_FEATURE_EMBED_DATA | DMU_BACKUP_FEATURE_LZ4 | \ DMU_BACKUP_FEATURE_RESUMING | \ DMU_BACKUP_FEATURE_LARGE_BLOCKS | DMU_BACKUP_FEATURE_LARGE_DNODE | \ - DMU_BACKUP_FEATURE_COMPRESSED) + DMU_BACKUP_FEATURE_COMPRESSED | \ + DMU_BACKUP_FEATURE_RAW) /* Are all features in the given flag word currently supported? */ #define DMU_STREAM_SUPPORTED(x) (!((x) & ~DMU_BACKUP_FEATURE_MASK)) @@ -149,20 +150,50 @@ typedef enum dmu_send_resume_token_version { * cannot necessarily be received as a clone correctly. */ #define DRR_FLAG_FREERECORDS (1<<2) +/* + * When DRR_FLAG_SPILL_BLOCK is set it indicates the DRR_OBJECT_SPILL + * and DRR_SPILL_UNMODIFIED flags are meaningful in the send stream. + * + * When DRR_FLAG_SPILL_BLOCK is set, DRR_OBJECT records will have + * DRR_OBJECT_SPILL set if and only if they should have a spill block + * (either an existing one, or a new one in the send stream). When clear + * the object does not have a spill block and any existing spill block + * should be freed. + * + * Similarly, when DRR_FLAG_SPILL_BLOCK is set, DRR_SPILL records will + * have DRR_SPILL_UNMODIFIED set if and only if they were included for + * backward compatibility purposes, and can be safely ignored by new versions + * of zfs receive. Previous versions of ZFS which do not understand the + * DRR_FLAG_SPILL_BLOCK will process this record and recreate any missing + * spill blocks. + */ +#define DRR_FLAG_SPILL_BLOCK (1<<3) /* - * flags in the drr_checksumflags field in the DRR_WRITE and - * DRR_WRITE_BYREF blocks + * flags in the drr_flags field in the DRR_WRITE, DRR_SPILL, DRR_OBJECT, + * DRR_WRITE_BYREF, and DRR_OBJECT_RANGE blocks */ -#define DRR_CHECKSUM_DEDUP (1<<0) +#define DRR_CHECKSUM_DEDUP (1<<0) /* not used for DRR_SPILL blocks */ +#define DRR_RAW_BYTESWAP (1<<1) +#define DRR_OBJECT_SPILL (1<<2) /* OBJECT record has a spill block */ +#define DRR_SPILL_UNMODIFIED (1<<2) /* SPILL record for unmodified block */ #define DRR_IS_DEDUP_CAPABLE(flags) ((flags) & DRR_CHECKSUM_DEDUP) +#define DRR_IS_RAW_BYTESWAPPED(flags) ((flags) & DRR_RAW_BYTESWAP) +#define DRR_OBJECT_HAS_SPILL(flags) ((flags) & DRR_OBJECT_SPILL) +#define DRR_SPILL_IS_UNMODIFIED(flags) ((flags) & DRR_SPILL_UNMODIFIED) /* deal with compressed drr_write replay records */ #define DRR_WRITE_COMPRESSED(drrw) ((drrw)->drr_compressiontype != 0) #define DRR_WRITE_PAYLOAD_SIZE(drrw) \ (DRR_WRITE_COMPRESSED(drrw) ? (drrw)->drr_compressed_size : \ (drrw)->drr_logical_size) +#define DRR_SPILL_PAYLOAD_SIZE(drrs) \ + ((drrs)->drr_compressed_size ? \ + (drrs)->drr_compressed_size : (drrs)->drr_length) +#define DRR_OBJECT_PAYLOAD_SIZE(drro) \ + ((drro)->drr_raw_bonuslen != 0 ? \ + (drro)->drr_raw_bonuslen : P2ROUNDUP((drro)->drr_bonuslen, 8)) /* * zfs ioctl command structure @@ -171,7 +202,8 @@ typedef struct dmu_replay_record { enum { DRR_BEGIN, DRR_OBJECT, DRR_FREEOBJECTS, DRR_WRITE, DRR_FREE, DRR_END, DRR_WRITE_BYREF, - DRR_SPILL, DRR_WRITE_EMBEDDED, DRR_NUMTYPES + DRR_SPILL, DRR_WRITE_EMBEDDED, DRR_OBJECT_RANGE, + DRR_NUMTYPES } drr_type; uint32_t drr_payloadlen; union { @@ -198,8 +230,15 @@ typedef struct dmu_replay_record { uint8_t drr_checksumtype; uint8_t drr_compress; uint8_t drr_dn_slots; - uint8_t drr_pad[5]; + uint8_t drr_flags; + uint32_t drr_raw_bonuslen; uint64_t drr_toguid; + /* only (possibly) nonzero for raw streams */ + uint8_t drr_indblkshift; + uint8_t drr_nlevels; + uint8_t drr_nblkptr; + uint8_t drr_pad[5]; + uint64_t drr_maxblkid; /* bonus content follows */ } drr_object; struct drr_freeobjects { @@ -215,13 +254,17 @@ typedef struct dmu_replay_record { uint64_t drr_logical_size; uint64_t drr_toguid; uint8_t drr_checksumtype; - uint8_t drr_checksumflags; + uint8_t drr_flags; uint8_t drr_compressiontype; uint8_t drr_pad2[5]; /* deduplication key */ ddt_key_t drr_key; /* only nonzero if drr_compressiontype is not 0 */ uint64_t drr_compressed_size; + /* only nonzero for raw streams */ + uint8_t drr_salt[ZIO_DATA_SALT_LEN]; + uint8_t drr_iv[ZIO_DATA_IV_LEN]; + uint8_t drr_mac[ZIO_DATA_MAC_LEN]; /* content follows */ } drr_write; struct drr_free { @@ -242,7 +285,7 @@ typedef struct dmu_replay_record { uint64_t drr_refoffset; /* properties of the data */ uint8_t drr_checksumtype; - uint8_t drr_checksumflags; + uint8_t drr_flags; uint8_t drr_pad2[6]; ddt_key_t drr_key; /* deduplication key */ } drr_write_byref; @@ -250,7 +293,15 @@ typedef struct dmu_replay_record { uint64_t drr_object; uint64_t drr_length; uint64_t drr_toguid; - uint64_t drr_pad[4]; /* needed for crypto */ + uint8_t drr_flags; + uint8_t drr_compressiontype; + uint8_t drr_pad[6]; + /* only nonzero for raw streams */ + uint64_t drr_compressed_size; + uint8_t drr_salt[ZIO_DATA_SALT_LEN]; + uint8_t drr_iv[ZIO_DATA_IV_LEN]; + uint8_t drr_mac[ZIO_DATA_MAC_LEN]; + dmu_object_type_t drr_type; /* spill data follows */ } drr_spill; struct drr_write_embedded { @@ -266,6 +317,16 @@ typedef struct dmu_replay_record { uint32_t drr_psize; /* compr. (real) size of payload */ /* (possibly compressed) content follows */ } drr_write_embedded; + struct drr_object_range { + uint64_t drr_firstobj; + uint64_t drr_numslots; + uint64_t drr_toguid; + uint8_t drr_salt[ZIO_DATA_SALT_LEN]; + uint8_t drr_iv[ZIO_DATA_IV_LEN]; + uint8_t drr_mac[ZIO_DATA_MAC_LEN]; + uint8_t drr_flags; + uint8_t drr_pad[3]; + } drr_object_range; /* * Nore: drr_checksum is overlaid with all record types @@ -335,6 +396,7 @@ typedef enum zinject_type { ZINJECT_IGNORED_WRITES, ZINJECT_PANIC, ZINJECT_DELAY_IO, + ZINJECT_DECRYPT_FAULT, } zinject_type_t; typedef struct zfs_share { diff --git a/usr/src/uts/common/fs/zfs/sys/zil.h b/usr/src/uts/common/fs/zfs/sys/zil.h index e6b18da95b..2e44ff2b14 100644 --- a/usr/src/uts/common/fs/zfs/sys/zil.h +++ b/usr/src/uts/common/fs/zfs/sys/zil.h @@ -33,6 +33,7 @@ #include <sys/spa.h> #include <sys/zio.h> #include <sys/dmu.h> +#include <sys/zio_crypt.h> #ifdef __cplusplus extern "C" { @@ -407,7 +408,8 @@ typedef int zil_get_data_t(void *arg, lr_write_t *lr, char *dbuf, struct lwb *lwb, zio_t *zio); extern int zil_parse(zilog_t *zilog, zil_parse_blk_func_t *parse_blk_func, - zil_parse_lr_func_t *parse_lr_func, void *arg, uint64_t txg); + zil_parse_lr_func_t *parse_lr_func, void *arg, uint64_t txg, + boolean_t decrypt); extern void zil_init(void); extern void zil_fini(void); diff --git a/usr/src/uts/common/fs/zfs/sys/zio.h b/usr/src/uts/common/fs/zfs/sys/zio.h index 00d2ebbebb..ec4ec29d5a 100644 --- a/usr/src/uts/common/fs/zfs/sys/zio.h +++ b/usr/src/uts/common/fs/zfs/sys/zio.h @@ -104,6 +104,15 @@ enum zio_checksum { #define ZIO_DEDUPCHECKSUM ZIO_CHECKSUM_SHA256 #define ZIO_DEDUPDITTO_MIN 100 +#define ZIO_CRYPT_ON_VALUE ZIO_CRYPT_AES_256_CCM +#define ZIO_CRYPT_DEFAULT ZIO_CRYPT_OFF + +/* macros defining encryption lengths */ +#define ZIO_OBJSET_MAC_LEN 32 +#define ZIO_DATA_IV_LEN 12 +#define ZIO_DATA_SALT_LEN 8 +#define ZIO_DATA_MAC_LEN 16 + /* * The number of "legacy" compression functions which can be set on individual * objects. @@ -191,16 +200,18 @@ enum zio_flag { ZIO_FLAG_DONT_PROPAGATE = 1 << 20, ZIO_FLAG_IO_BYPASS = 1 << 21, ZIO_FLAG_IO_REWRITE = 1 << 22, - ZIO_FLAG_RAW = 1 << 23, - ZIO_FLAG_GANG_CHILD = 1 << 24, - ZIO_FLAG_DDT_CHILD = 1 << 25, - ZIO_FLAG_GODFATHER = 1 << 26, - ZIO_FLAG_NOPWRITE = 1 << 27, - ZIO_FLAG_REEXECUTED = 1 << 28, - ZIO_FLAG_DELEGATED = 1 << 29, + ZIO_FLAG_RAW_COMPRESS = 1 << 23, + ZIO_FLAG_RAW_ENCRYPT = 1 << 24, + ZIO_FLAG_GANG_CHILD = 1 << 25, + ZIO_FLAG_DDT_CHILD = 1 << 26, + ZIO_FLAG_GODFATHER = 1 << 27, + ZIO_FLAG_NOPWRITE = 1 << 28, + ZIO_FLAG_REEXECUTED = 1 << 29, + ZIO_FLAG_DELEGATED = 1 << 30, }; #define ZIO_FLAG_MUSTSUCCEED 0 +#define ZIO_FLAG_RAW (ZIO_FLAG_RAW_COMPRESS | ZIO_FLAG_RAW_ENCRYPT) #define ZIO_DDT_CHILD_FLAGS(zio) \ (((zio)->io_flags & ZIO_FLAG_DDT_INHERIT) | \ @@ -314,12 +325,17 @@ typedef struct zio_prop { boolean_t zp_dedup_verify; boolean_t zp_nopwrite; uint32_t zp_zpl_smallblk; + boolean_t zp_encrypt; + boolean_t zp_byteorder; + uint8_t zp_salt[ZIO_DATA_SALT_LEN]; + uint8_t zp_iv[ZIO_DATA_IV_LEN]; + uint8_t zp_mac[ZIO_DATA_MAC_LEN]; } zio_prop_t; typedef struct zio_cksum_report zio_cksum_report_t; typedef void zio_cksum_finish_f(zio_cksum_report_t *rep, - const void *good_data); + const abd_t *good_data); typedef void zio_cksum_free_f(void *cbdata, size_t size); struct zio_bad_cksum; /* defined in zio_checksum.h */ @@ -524,8 +540,9 @@ extern zio_t *zio_write_phys(zio_t *pio, vdev_t *vd, uint64_t offset, extern zio_t *zio_free_sync(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp, enum zio_flag flags); -extern int zio_alloc_zil(spa_t *spa, uint64_t objset, uint64_t txg, +extern int zio_alloc_zil(spa_t *spa, objset_t *os, uint64_t txg, blkptr_t *new_bp, blkptr_t *old_bp, uint64_t size, boolean_t *slog); +extern void zio_free_zil(spa_t *spa, uint64_t txg, blkptr_t *bp); extern void zio_flush(zio_t *zio, vdev_t *vd); extern void zio_shrink(zio_t *zio, uint64_t size); @@ -598,6 +615,8 @@ extern int zio_inject_list_next(int *id, char *name, size_t buflen, struct zinject_record *record); extern int zio_clear_fault(int id); extern void zio_handle_panic_injection(spa_t *spa, char *tag, uint64_t type); +extern int zio_handle_decrypt_injection(spa_t *spa, const zbookmark_phys_t *zb, + uint64_t type, int error); extern int zio_handle_fault_injection(zio_t *zio, int error); extern int zio_handle_device_injection(vdev_t *vd, zio_t *zio, int error); extern int zio_handle_label_injection(zio_t *zio, int error); @@ -607,18 +626,20 @@ extern hrtime_t zio_handle_io_delay(zio_t *zio); /* * Checksum ereport functions */ -extern void zfs_ereport_start_checksum(spa_t *spa, vdev_t *vd, struct zio *zio, - uint64_t offset, uint64_t length, void *arg, struct zio_bad_cksum *info); +extern void zfs_ereport_start_checksum(spa_t *spa, vdev_t *vd, + const zbookmark_phys_t *zb, struct zio *zio, uint64_t offset, + uint64_t length, void *arg, struct zio_bad_cksum *info); extern void zfs_ereport_finish_checksum(zio_cksum_report_t *report, - const void *good_data, const void *bad_data, boolean_t drop_if_identical); + const abd_t *good_data, const abd_t *bad_data, boolean_t drop_if_identical); extern void zfs_ereport_send_interim_checksum(zio_cksum_report_t *report); extern void zfs_ereport_free_checksum(zio_cksum_report_t *report); /* If we have the good data in hand, this function can be used */ extern void zfs_ereport_post_checksum(spa_t *spa, vdev_t *vd, - struct zio *zio, uint64_t offset, uint64_t length, - const void *good_data, const void *bad_data, struct zio_bad_cksum *info); + const zbookmark_phys_t *zb, struct zio *zio, uint64_t offset, + uint64_t length, const abd_t *good_data, const abd_t *bad_data, + struct zio_bad_cksum *info); /* Called from spa_sync(), but primarily an injection handler */ extern void spa_handle_ignored_writes(spa_t *spa); diff --git a/usr/src/uts/common/fs/zfs/sys/zio_checksum.h b/usr/src/uts/common/fs/zfs/sys/zio_checksum.h index 3eda057eae..6119163af8 100644 --- a/usr/src/uts/common/fs/zfs/sys/zio_checksum.h +++ b/usr/src/uts/common/fs/zfs/sys/zio_checksum.h @@ -54,7 +54,7 @@ typedef enum zio_checksum_flags { /* Uses salt value */ ZCHECKSUM_FLAG_SALTED = (1 << 4), /* Strong enough for nopwrite? */ - ZCHECKSUM_FLAG_NOPWRITE = (1 << 5) + ZCHECKSUM_FLAG_NOPWRITE = (1 << 5), } zio_checksum_flags_t; /* @@ -103,7 +103,7 @@ extern int zio_checksum_equal(spa_t *, blkptr_t *, enum zio_checksum, void *, uint64_t, uint64_t, zio_bad_cksum_t *); extern void zio_checksum_compute(zio_t *, enum zio_checksum, struct abd *, uint64_t); -extern int zio_checksum_error_impl(spa_t *, blkptr_t *, enum zio_checksum, +extern int zio_checksum_error_impl(spa_t *, const blkptr_t *, enum zio_checksum, struct abd *, uint64_t, uint64_t, zio_bad_cksum_t *); extern int zio_checksum_error(zio_t *zio, zio_bad_cksum_t *out); extern enum zio_checksum spa_dedup_checksum(spa_t *spa); diff --git a/usr/src/uts/common/fs/zfs/sys/zio_crypt.h b/usr/src/uts/common/fs/zfs/sys/zio_crypt.h new file mode 100644 index 0000000000..6163f97458 --- /dev/null +++ b/usr/src/uts/common/fs/zfs/sys/zio_crypt.h @@ -0,0 +1,152 @@ +/* + * CDDL HEADER START + * + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2017, Datto, Inc. All rights reserved. + */ + +#ifndef _SYS_ZIO_CRYPT_H +#define _SYS_ZIO_CRYPT_H + +#include <sys/dmu.h> +#include <sys/refcount.h> +#include <sys/crypto/api.h> +#include <sys/nvpair.h> +#include <sys/avl.h> +#include <sys/zio.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/* forward declarations */ +struct zbookmark_phys; + +#define WRAPPING_KEY_LEN 32 +#define WRAPPING_IV_LEN ZIO_DATA_IV_LEN +#define WRAPPING_MAC_LEN ZIO_DATA_MAC_LEN +#define MASTER_KEY_MAX_LEN 32 +#define SHA512_HMAC_KEYLEN 64 + +#define ZIO_CRYPT_KEY_CURRENT_VERSION 1ULL + +typedef enum zio_crypt_type { + ZC_TYPE_NONE = 0, + ZC_TYPE_CCM, + ZC_TYPE_GCM +} zio_crypt_type_t; + +/* table of supported crypto algorithms, modes and keylengths. */ +typedef struct zio_crypt_info { + /* mechanism name, needed by ICP */ + crypto_mech_name_t ci_mechname; + + /* cipher mode type (GCM, CCM) */ + zio_crypt_type_t ci_crypt_type; + + /* length of the encryption key */ + size_t ci_keylen; + + /* human-readable name of the encryption alforithm */ + char *ci_name; +} zio_crypt_info_t; + +extern zio_crypt_info_t zio_crypt_table[ZIO_CRYPT_FUNCTIONS]; + +/* in memory representation of an unwrapped key that is loaded into memory */ +typedef struct zio_crypt_key { + /* encryption algorithm */ + uint64_t zk_crypt; + + /* on-disk format version */ + uint64_t zk_version; + + /* GUID for uniquely identifying this key. Not encrypted on disk. */ + uint64_t zk_guid; + + /* buffer for master key */ + uint8_t zk_master_keydata[MASTER_KEY_MAX_LEN]; + + /* buffer for hmac key */ + uint8_t zk_hmac_keydata[SHA512_HMAC_KEYLEN]; + + /* buffer for currrent encryption key derived from master key */ + uint8_t zk_current_keydata[MASTER_KEY_MAX_LEN]; + + /* current 64 bit salt for deriving an encryption key */ + uint8_t zk_salt[ZIO_DATA_SALT_LEN]; + + /* count of how many times the current salt has been used */ + uint64_t zk_salt_count; + + /* illumos crypto api current encryption key */ + crypto_key_t zk_current_key; + + /* template of current encryption key for illumos crypto api */ + crypto_ctx_template_t zk_current_tmpl; + + /* illumos crypto api current hmac key */ + crypto_key_t zk_hmac_key; + + /* template of hmac key for illumos crypto api */ + crypto_ctx_template_t zk_hmac_tmpl; + + /* lock for changing the salt and dependant values */ + krwlock_t zk_salt_lock; +} zio_crypt_key_t; + +void zio_crypt_key_destroy(zio_crypt_key_t *key); +int zio_crypt_key_init(uint64_t crypt, zio_crypt_key_t *key); +int zio_crypt_key_get_salt(zio_crypt_key_t *key, uint8_t *salt_out); + +int zio_crypt_key_wrap(crypto_key_t *cwkey, zio_crypt_key_t *key, uint8_t *iv, + uint8_t *mac, uint8_t *keydata_out, uint8_t *hmac_keydata_out); +int zio_crypt_key_unwrap(crypto_key_t *cwkey, uint64_t crypt, uint64_t version, + uint64_t guid, uint8_t *keydata, uint8_t *hmac_keydata, uint8_t *iv, + uint8_t *mac, zio_crypt_key_t *key); +int zio_crypt_generate_iv(uint8_t *ivbuf); +int zio_crypt_generate_iv_salt_dedup(zio_crypt_key_t *key, uint8_t *data, + uint_t datalen, uint8_t *ivbuf, uint8_t *salt); + +void zio_crypt_encode_params_bp(blkptr_t *bp, uint8_t *salt, uint8_t *iv); +void zio_crypt_decode_params_bp(const blkptr_t *bp, uint8_t *salt, uint8_t *iv); +void zio_crypt_encode_mac_bp(blkptr_t *bp, uint8_t *mac); +void zio_crypt_decode_mac_bp(const blkptr_t *bp, uint8_t *mac); +void zio_crypt_encode_mac_zil(void *data, uint8_t *mac); +void zio_crypt_decode_mac_zil(const void *data, uint8_t *mac); +void zio_crypt_copy_dnode_bonus(abd_t *src_abd, uint8_t *dst, uint_t datalen); + +int zio_crypt_do_indirect_mac_checksum(boolean_t generate, void *buf, + uint_t datalen, boolean_t byteswap, uint8_t *cksum); +int zio_crypt_do_indirect_mac_checksum_abd(boolean_t generate, abd_t *abd, + uint_t datalen, boolean_t byteswap, uint8_t *cksum); +int zio_crypt_do_hmac(zio_crypt_key_t *key, uint8_t *data, uint_t datalen, + uint8_t *digestbuf, uint_t digestlen); +int zio_crypt_do_objset_hmacs(zio_crypt_key_t *key, void *data, uint_t datalen, + boolean_t byteswap, uint8_t *portable_mac, uint8_t *local_mac); +int zio_do_crypt_data(boolean_t encrypt, zio_crypt_key_t *key, + dmu_object_type_t ot, boolean_t byteswap, uint8_t *salt, uint8_t *iv, + uint8_t *mac, uint_t datalen, uint8_t *plainbuf, uint8_t *cipherbuf, + boolean_t *no_crypt); +int zio_do_crypt_abd(boolean_t encrypt, zio_crypt_key_t *key, + dmu_object_type_t ot, boolean_t byteswap, uint8_t *salt, uint8_t *iv, + uint8_t *mac, uint_t datalen, abd_t *pabd, abd_t *cabd, + boolean_t *no_crypt); + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_ZIO_CRYPT_H */ diff --git a/usr/src/uts/common/fs/zfs/sys/zio_impl.h b/usr/src/uts/common/fs/zfs/sys/zio_impl.h index a36749a308..703522b67d 100644 --- a/usr/src/uts/common/fs/zfs/sys/zio_impl.h +++ b/usr/src/uts/common/fs/zfs/sys/zio_impl.h @@ -99,6 +99,18 @@ extern "C" { * physical I/O. The nop write feature can handle writes in either * syncing or open context (i.e. zil writes) and as a result is mutually * exclusive with dedup. + * + * Encryption: + * Encryption and authentication is handled by the ZIO_STAGE_ENCRYPT stage. + * This stage determines how the encryption metadata is stored in the bp. + * Decryption and MAC verification is performed during zio_decrypt() as a + * transform callback. Encryption is mutually exclusive with nopwrite, because + * blocks with the same plaintext will be encrypted with different salts and + * IV's (if dedup is off), and therefore have different ciphertexts. For dedup + * blocks we deterministically generate the IV and salt by performing an HMAC + * of the plaintext, which is computationally expensive, but allows us to keep + * support for encrypted dedup. See the block comment in zio_crypt.c for + * details. */ /* @@ -113,32 +125,33 @@ enum zio_stage { ZIO_STAGE_ISSUE_ASYNC = 1 << 4, /* RWF-- */ ZIO_STAGE_WRITE_COMPRESS = 1 << 5, /* -W--- */ - ZIO_STAGE_CHECKSUM_GENERATE = 1 << 6, /* -W--- */ + ZIO_STAGE_ENCRYPT = 1 << 6, /* -W--- */ + ZIO_STAGE_CHECKSUM_GENERATE = 1 << 7, /* -W--- */ - ZIO_STAGE_NOP_WRITE = 1 << 7, /* -W--- */ + ZIO_STAGE_NOP_WRITE = 1 << 8, /* -W--- */ - ZIO_STAGE_DDT_READ_START = 1 << 8, /* R---- */ - ZIO_STAGE_DDT_READ_DONE = 1 << 9, /* R---- */ - ZIO_STAGE_DDT_WRITE = 1 << 10, /* -W--- */ - ZIO_STAGE_DDT_FREE = 1 << 11, /* --F-- */ + ZIO_STAGE_DDT_READ_START = 1 << 9, /* R---- */ + ZIO_STAGE_DDT_READ_DONE = 1 << 10, /* R---- */ + ZIO_STAGE_DDT_WRITE = 1 << 11, /* -W--- */ + ZIO_STAGE_DDT_FREE = 1 << 12, /* --F-- */ - ZIO_STAGE_GANG_ASSEMBLE = 1 << 12, /* RWFC- */ - ZIO_STAGE_GANG_ISSUE = 1 << 13, /* RWFC- */ + ZIO_STAGE_GANG_ASSEMBLE = 1 << 13, /* RWFC- */ + ZIO_STAGE_GANG_ISSUE = 1 << 14, /* RWFC- */ - ZIO_STAGE_DVA_THROTTLE = 1 << 14, /* -W--- */ - ZIO_STAGE_DVA_ALLOCATE = 1 << 15, /* -W--- */ - ZIO_STAGE_DVA_FREE = 1 << 16, /* --F-- */ - ZIO_STAGE_DVA_CLAIM = 1 << 17, /* ---C- */ + ZIO_STAGE_DVA_THROTTLE = 1 << 15, /* -W--- */ + ZIO_STAGE_DVA_ALLOCATE = 1 << 16, /* -W--- */ + ZIO_STAGE_DVA_FREE = 1 << 17, /* --F-- */ + ZIO_STAGE_DVA_CLAIM = 1 << 18, /* ---C- */ - ZIO_STAGE_READY = 1 << 18, /* RWFCI */ + ZIO_STAGE_READY = 1 << 19, /* RWFCI */ - ZIO_STAGE_VDEV_IO_START = 1 << 19, /* RW--I */ - ZIO_STAGE_VDEV_IO_DONE = 1 << 20, /* RW--I */ - ZIO_STAGE_VDEV_IO_ASSESS = 1 << 21, /* RW--I */ + ZIO_STAGE_VDEV_IO_START = 1 << 20, /* RW--I */ + ZIO_STAGE_VDEV_IO_DONE = 1 << 21, /* RW--I */ + ZIO_STAGE_VDEV_IO_ASSESS = 1 << 22, /* RW--I */ - ZIO_STAGE_CHECKSUM_VERIFY = 1 << 22, /* R---- */ + ZIO_STAGE_CHECKSUM_VERIFY = 1 << 23, /* R---- */ - ZIO_STAGE_DONE = 1 << 23 /* RWFCI */ + ZIO_STAGE_DONE = 1 << 24 /* RWFCI */ }; #define ZIO_INTERLOCK_STAGES \ @@ -190,12 +203,14 @@ enum zio_stage { #define ZIO_REWRITE_PIPELINE \ (ZIO_WRITE_COMMON_STAGES | \ ZIO_STAGE_WRITE_COMPRESS | \ + ZIO_STAGE_ENCRYPT | \ ZIO_STAGE_WRITE_BP_INIT) #define ZIO_WRITE_PIPELINE \ (ZIO_WRITE_COMMON_STAGES | \ ZIO_STAGE_WRITE_BP_INIT | \ ZIO_STAGE_WRITE_COMPRESS | \ + ZIO_STAGE_ENCRYPT | \ ZIO_STAGE_DVA_THROTTLE | \ ZIO_STAGE_DVA_ALLOCATE) @@ -210,6 +225,7 @@ enum zio_stage { ZIO_STAGE_WRITE_BP_INIT | \ ZIO_STAGE_ISSUE_ASYNC | \ ZIO_STAGE_WRITE_COMPRESS | \ + ZIO_STAGE_ENCRYPT | \ ZIO_STAGE_CHECKSUM_GENERATE | \ ZIO_STAGE_DDT_WRITE) diff --git a/usr/src/uts/common/fs/zfs/vdev.c b/usr/src/uts/common/fs/zfs/vdev.c index c9f1212168..73b7c8e2fc 100644 --- a/usr/src/uts/common/fs/zfs/vdev.c +++ b/usr/src/uts/common/fs/zfs/vdev.c @@ -1331,7 +1331,7 @@ vdev_probe_done(zio_t *zio) ASSERT(zio->io_error != 0); vdev_dbgmsg(vd, "failed probe"); zfs_ereport_post(FM_EREPORT_ZFS_PROBE_FAILURE, - spa, vd, NULL, 0, 0); + spa, vd, NULL, NULL, 0, 0); zio->io_error = SET_ERROR(ENXIO); } @@ -4189,7 +4189,8 @@ vdev_set_state(vdev_t *vd, boolean_t isopen, vdev_state_t state, vdev_aux_t aux) class = FM_EREPORT_ZFS_DEVICE_UNKNOWN; } - zfs_ereport_post(class, spa, vd, NULL, save_state, 0); + zfs_ereport_post(class, spa, vd, NULL, NULL, + save_state, 0); } /* Erase any notion of persistent removed state */ diff --git a/usr/src/uts/common/fs/zfs/vdev_indirect.c b/usr/src/uts/common/fs/zfs/vdev_indirect.c index 062c4073a8..9626589444 100644 --- a/usr/src/uts/common/fs/zfs/vdev_indirect.c +++ b/usr/src/uts/common/fs/zfs/vdev_indirect.c @@ -1381,7 +1381,7 @@ vdev_indirect_checksum_error(zio_t *zio, void *bad_buf = abd_borrow_buf_copy(ic->ic_data, is->is_size); abd_t *good_abd = is->is_good_child->ic_data; void *good_buf = abd_borrow_buf_copy(good_abd, is->is_size); - zfs_ereport_post_checksum(zio->io_spa, vd, zio, + zfs_ereport_post_checksum(zio->io_spa, vd, &zio->io_bookmark, zio, is->is_target_offset, is->is_size, good_buf, bad_buf, &zbc); abd_return_buf(ic->ic_data, bad_buf, is->is_size); abd_return_buf(good_abd, good_buf, is->is_size); @@ -1458,9 +1458,9 @@ vdev_indirect_all_checksum_errors(zio_t *zio) vd->vdev_stat.vs_checksum_errors++; mutex_exit(&vd->vdev_stat_lock); - zfs_ereport_post_checksum(zio->io_spa, vd, zio, - is->is_target_offset, is->is_size, - NULL, NULL, NULL); + zfs_ereport_post_checksum(zio->io_spa, vd, + &zio->io_bookmark, zio, is->is_target_offset, + is->is_size, NULL, NULL, NULL); } } } diff --git a/usr/src/uts/common/fs/zfs/vdev_raidz.c b/usr/src/uts/common/fs/zfs/vdev_raidz.c index 6502353542..0dcbb863e3 100644 --- a/usr/src/uts/common/fs/zfs/vdev_raidz.c +++ b/usr/src/uts/common/fs/zfs/vdev_raidz.c @@ -267,21 +267,17 @@ static void vdev_raidz_map_free(raidz_map_t *rm) { int c; - size_t size; for (c = 0; c < rm->rm_firstdatacol; c++) { abd_free(rm->rm_col[c].rc_abd); if (rm->rm_col[c].rc_gdata != NULL) - zio_buf_free(rm->rm_col[c].rc_gdata, - rm->rm_col[c].rc_size); + abd_free(rm->rm_col[c].rc_gdata); + } - size = 0; - for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++) { + for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++) abd_put(rm->rm_col[c].rc_abd); - size += rm->rm_col[c].rc_size; - } if (rm->rm_abd_copy != NULL) abd_free(rm->rm_abd_copy); @@ -314,14 +310,14 @@ vdev_raidz_cksum_free(void *arg, size_t ignored) } static void -vdev_raidz_cksum_finish(zio_cksum_report_t *zcr, const void *good_data) +vdev_raidz_cksum_finish(zio_cksum_report_t *zcr, const abd_t *good_data) { raidz_map_t *rm = zcr->zcr_cbdata; size_t c = zcr->zcr_cbinfo; - size_t x; + size_t x, offset; - const char *good = NULL; - char *bad; + const abd_t *good = NULL; + const abd_t *bad = rm->rm_col[c].rc_abd; if (good_data == NULL) { zfs_ereport_finish_checksum(zcr, NULL, NULL, B_FALSE); @@ -336,8 +332,6 @@ vdev_raidz_cksum_finish(zio_cksum_report_t *zcr, const void *good_data) */ if (rm->rm_col[0].rc_gdata == NULL) { abd_t *bad_parity[VDEV_RAIDZ_MAXPARITY]; - char *buf; - int offset; /* * Set up the rm_col[]s to generate the parity for @@ -346,20 +340,21 @@ vdev_raidz_cksum_finish(zio_cksum_report_t *zcr, const void *good_data) */ for (x = 0; x < rm->rm_firstdatacol; x++) { bad_parity[x] = rm->rm_col[x].rc_abd; - rm->rm_col[x].rc_gdata = - zio_buf_alloc(rm->rm_col[x].rc_size); rm->rm_col[x].rc_abd = - abd_get_from_buf(rm->rm_col[x].rc_gdata, + rm->rm_col[x].rc_gdata = + abd_alloc_sametype(rm->rm_col[x].rc_abd, rm->rm_col[x].rc_size); } /* fill in the data columns from good_data */ - buf = (char *)good_data; + offset = 0; for (; x < rm->rm_cols; x++) { abd_put(rm->rm_col[x].rc_abd); - rm->rm_col[x].rc_abd = abd_get_from_buf(buf, - rm->rm_col[x].rc_size); - buf += rm->rm_col[x].rc_size; + + rm->rm_col[x].rc_abd = + abd_get_offset_size((abd_t *)good_data, + offset, rm->rm_col[x].rc_size); + offset += rm->rm_col[x].rc_size; } /* @@ -368,34 +363,35 @@ vdev_raidz_cksum_finish(zio_cksum_report_t *zcr, const void *good_data) vdev_raidz_generate_parity(rm); /* restore everything back to its original state */ - for (x = 0; x < rm->rm_firstdatacol; x++) { - abd_put(rm->rm_col[x].rc_abd); + for (x = 0; x < rm->rm_firstdatacol; x++) rm->rm_col[x].rc_abd = bad_parity[x]; - } offset = 0; for (x = rm->rm_firstdatacol; x < rm->rm_cols; x++) { abd_put(rm->rm_col[x].rc_abd); - rm->rm_col[x].rc_abd = abd_get_offset( - rm->rm_abd_copy, offset); + rm->rm_col[x].rc_abd = abd_get_offset_size( + rm->rm_abd_copy, offset, + rm->rm_col[x].rc_size); offset += rm->rm_col[x].rc_size; } } ASSERT3P(rm->rm_col[c].rc_gdata, !=, NULL); - good = rm->rm_col[c].rc_gdata; + good = abd_get_offset_size(rm->rm_col[c].rc_gdata, 0, + rm->rm_col[c].rc_size); } else { /* adjust good_data to point at the start of our column */ - good = good_data; - + offset = 0; for (x = rm->rm_firstdatacol; x < c; x++) - good += rm->rm_col[x].rc_size; + offset += rm->rm_col[x].rc_size; + + good = abd_get_offset_size((abd_t *)good_data, offset, + rm->rm_col[c].rc_size); } - bad = abd_borrow_buf_copy(rm->rm_col[c].rc_abd, rm->rm_col[c].rc_size); /* we drop the ereport if it ends up that the data was good */ zfs_ereport_finish_checksum(zcr, good, bad, B_TRUE); - abd_return_buf(rm->rm_col[c].rc_abd, bad, rm->rm_col[c].rc_size); + abd_put((abd_t *)good); } /* @@ -438,14 +434,16 @@ vdev_raidz_cksum_report(zio_t *zio, zio_cksum_report_t *zcr, void *arg) for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++) size += rm->rm_col[c].rc_size; - rm->rm_abd_copy = - abd_alloc_sametype(rm->rm_col[rm->rm_firstdatacol].rc_abd, size); + rm->rm_abd_copy = abd_alloc_for_io(size, B_FALSE); for (offset = 0, c = rm->rm_firstdatacol; c < rm->rm_cols; c++) { raidz_col_t *col = &rm->rm_col[c]; - abd_t *tmp = abd_get_offset(rm->rm_abd_copy, offset); + abd_t *tmp = abd_get_offset_size(rm->rm_abd_copy, offset, + col->rc_size); - abd_copy(tmp, col->rc_abd, col->rc_size); + ASSERT3S(tmp->abd_size, >=, col->rc_size); + ASSERT3S(col->rc_abd->abd_size, >=, col->rc_size); + abd_copy_off(tmp, col->rc_abd, 0, 0, col->rc_size); abd_put(col->rc_abd); col->rc_abd = tmp; @@ -562,13 +560,15 @@ vdev_raidz_map_alloc(abd_t *abd, uint64_t size, uint64_t offset, for (c = 0; c < rm->rm_firstdatacol; c++) rm->rm_col[c].rc_abd = - abd_alloc_linear(rm->rm_col[c].rc_size, B_TRUE); + abd_alloc_linear(rm->rm_col[c].rc_size, B_FALSE); - rm->rm_col[c].rc_abd = abd_get_offset(abd, 0); + rm->rm_col[c].rc_abd = abd_get_offset_size(abd, 0, + rm->rm_col[c].rc_size); off = rm->rm_col[c].rc_size; for (c = c + 1; c < acols; c++) { - rm->rm_col[c].rc_abd = abd_get_offset(abd, off); + rm->rm_col[c].rc_abd = abd_get_offset_size(abd, off, + rm->rm_col[c].rc_size); off += rm->rm_col[c].rc_size; } @@ -683,7 +683,8 @@ vdev_raidz_generate_parity_p(raidz_map_t *rm) p = abd_to_buf(rm->rm_col[VDEV_RAIDZ_P].rc_abd); if (c == rm->rm_firstdatacol) { - abd_copy_to_buf(p, src, rm->rm_col[c].rc_size); + ASSERT3U(src->abd_size, >=, rm->rm_col[c].rc_size); + abd_copy_to_buf_off(p, src, 0, rm->rm_col[c].rc_size); } else { struct pqr_struct pqr = { p, NULL, NULL }; (void) abd_iterate_func(src, 0, rm->rm_col[c].rc_size, @@ -711,20 +712,22 @@ vdev_raidz_generate_parity_pq(raidz_map_t *rm) ccnt = rm->rm_col[c].rc_size / sizeof (p[0]); if (c == rm->rm_firstdatacol) { - abd_copy_to_buf(p, src, rm->rm_col[c].rc_size); - (void) memcpy(q, p, rm->rm_col[c].rc_size); - } else { - struct pqr_struct pqr = { p, q, NULL }; - (void) abd_iterate_func(src, 0, rm->rm_col[c].rc_size, - vdev_raidz_pq_func, &pqr); - } + ASSERT(ccnt == pcnt || ccnt == 0); - if (c == rm->rm_firstdatacol) { + abd_copy_to_buf_off(p, src, 0, rm->rm_col[c].rc_size); + (void) memcpy(q, p, rm->rm_col[c].rc_size); for (i = ccnt; i < pcnt; i++) { p[i] = 0; q[i] = 0; } } else { + struct pqr_struct pqr = { p, q, NULL }; + + ASSERT(ccnt <= pcnt); + + (void) abd_iterate_func(src, 0, rm->rm_col[c].rc_size, + vdev_raidz_pq_func, &pqr); + /* * Treat short columns as though they are full of 0s. * Note that there's therefore nothing needed for P. @@ -758,22 +761,24 @@ vdev_raidz_generate_parity_pqr(raidz_map_t *rm) ccnt = rm->rm_col[c].rc_size / sizeof (p[0]); if (c == rm->rm_firstdatacol) { - abd_copy_to_buf(p, src, rm->rm_col[c].rc_size); + ASSERT3S(src->abd_size, >=, rm->rm_col[c].rc_size); + ASSERT(ccnt == pcnt || ccnt == 0); + abd_copy_to_buf_off(p, src, 0, rm->rm_col[c].rc_size); (void) memcpy(q, p, rm->rm_col[c].rc_size); (void) memcpy(r, p, rm->rm_col[c].rc_size); - } else { - struct pqr_struct pqr = { p, q, r }; - (void) abd_iterate_func(src, 0, rm->rm_col[c].rc_size, - vdev_raidz_pqr_func, &pqr); - } - if (c == rm->rm_firstdatacol) { for (i = ccnt; i < pcnt; i++) { p[i] = 0; q[i] = 0; r[i] = 0; } } else { + struct pqr_struct pqr = { p, q, r }; + + ASSERT(ccnt <= pcnt); + (void) abd_iterate_func(src, 0, rm->rm_col[c].rc_size, + vdev_raidz_pqr_func, &pqr); + /* * Treat short columns as though they are full of 0s. * Note that there's therefore nothing needed for P. @@ -941,7 +946,9 @@ vdev_raidz_reconstruct_p(raidz_map_t *rm, int *tgts, int ntgts) src = rm->rm_col[VDEV_RAIDZ_P].rc_abd; dst = rm->rm_col[x].rc_abd; - abd_copy(dst, src, rm->rm_col[x].rc_size); + ASSERT3S(dst->abd_size, >=, rm->rm_col[x].rc_size); + ASSERT3S(src->abd_size, >=, rm->rm_col[x].rc_size); + abd_copy_off(dst, src, 0, 0, rm->rm_col[x].rc_size); for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++) { uint64_t size = MIN(rm->rm_col[x].rc_size, @@ -979,14 +986,19 @@ vdev_raidz_reconstruct_q(raidz_map_t *rm, int *tgts, int ntgts) dst = rm->rm_col[x].rc_abd; if (c == rm->rm_firstdatacol) { - abd_copy(dst, src, size); + if (dst != src) { + ASSERT3S(dst->abd_size, >=, size); + ASSERT3S(src->abd_size, >=, size); + abd_copy_off(dst, src, 0, 0, size); + } if (rm->rm_col[x].rc_size > size) abd_zero_off(dst, size, rm->rm_col[x].rc_size - size); } else { ASSERT3U(size, <=, rm->rm_col[x].rc_size); - (void) abd_iterate_func2(dst, src, 0, 0, size, - vdev_raidz_reconst_q_pre_func, NULL); + if (src != dst) + (void) abd_iterate_func2(dst, src, 0, 0, size, + vdev_raidz_reconst_q_pre_func, NULL); (void) abd_iterate_func(dst, size, rm->rm_col[x].rc_size - size, vdev_raidz_reconst_q_pre_tail_func, NULL); @@ -1475,7 +1487,9 @@ vdev_raidz_reconstruct_general(raidz_map_t *rm, int *tgts, int ntgts) bufs[c] = col->rc_abd; col->rc_abd = abd_alloc_linear(col->rc_size, B_TRUE); - abd_copy(col->rc_abd, bufs[c], col->rc_size); + ASSERT3S(col->rc_abd->abd_size, >=, col->rc_size); + ASSERT3S(bufs[c]->abd_size, >=, col->rc_size); + abd_copy_off(col->rc_abd, bufs[c], 0, 0, col->rc_size); } } @@ -1571,7 +1585,9 @@ vdev_raidz_reconstruct_general(raidz_map_t *rm, int *tgts, int ntgts) for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++) { raidz_col_t *col = &rm->rm_col[c]; - abd_copy(bufs[c], col->rc_abd, col->rc_size); + ASSERT3S(bufs[c]->abd_size, >=, col->rc_size); + ASSERT3S(col->rc_abd->abd_size, >=, col->rc_size); + abd_copy_off(bufs[c], col->rc_abd, 0, 0, col->rc_size); abd_free(col->rc_abd); col->rc_abd = bufs[c]; } @@ -2041,9 +2057,8 @@ vdev_raidz_io_start(zio_t *zio) * Report a checksum error for a child of a RAID-Z device. */ static void -raidz_checksum_error(zio_t *zio, raidz_col_t *rc, void *bad_data) +raidz_checksum_error(zio_t *zio, raidz_col_t *rc, abd_t *bad_data) { - void *buf; vdev_t *vd = zio->io_vd->vdev_child[rc->rc_devidx]; if (!(zio->io_flags & ZIO_FLAG_SPECULATIVE)) { @@ -2057,11 +2072,9 @@ raidz_checksum_error(zio_t *zio, raidz_col_t *rc, void *bad_data) zbc.zbc_has_cksum = 0; zbc.zbc_injected = rm->rm_ecksuminjected; - buf = abd_borrow_buf_copy(rc->rc_abd, rc->rc_size); - zfs_ereport_post_checksum(zio->io_spa, vd, zio, - rc->rc_offset, rc->rc_size, buf, bad_data, - &zbc); - abd_return_buf(rc->rc_abd, buf, rc->rc_size); + zfs_ereport_post_checksum(zio->io_spa, vd, + &zio->io_bookmark, zio, rc->rc_offset, rc->rc_size, + rc->rc_abd, bad_data, &zbc); } } @@ -2091,7 +2104,7 @@ raidz_checksum_verify(zio_t *zio) static int raidz_parity_verify(zio_t *zio, raidz_map_t *rm) { - void *orig[VDEV_RAIDZ_MAXPARITY]; + abd_t *orig[VDEV_RAIDZ_MAXPARITY]; int c, ret = 0; raidz_col_t *rc; @@ -2106,8 +2119,8 @@ raidz_parity_verify(zio_t *zio, raidz_map_t *rm) rc = &rm->rm_col[c]; if (!rc->rc_tried || rc->rc_error != 0) continue; - orig[c] = zio_buf_alloc(rc->rc_size); - abd_copy_to_buf(orig[c], rc->rc_abd, rc->rc_size); + orig[c] = abd_alloc_sametype(rc->rc_abd, rc->rc_size); + abd_copy(orig[c], rc->rc_abd, rc->rc_size); } vdev_raidz_generate_parity(rm); @@ -2116,12 +2129,12 @@ raidz_parity_verify(zio_t *zio, raidz_map_t *rm) rc = &rm->rm_col[c]; if (!rc->rc_tried || rc->rc_error != 0) continue; - if (abd_cmp_buf(rc->rc_abd, orig[c], rc->rc_size) != 0) { + if (abd_cmp(orig[c], rc->rc_abd, rc->rc_abd->abd_size) != 0) { raidz_checksum_error(zio, rc, orig[c]); rc->rc_error = SET_ERROR(ECKSUM); ret++; } - zio_buf_free(orig[c], rc->rc_size); + abd_free(orig[c]); } return (ret); @@ -2156,7 +2169,7 @@ vdev_raidz_combrec(zio_t *zio, int total_errors, int data_errors) { raidz_map_t *rm = zio->io_vsd; raidz_col_t *rc; - void *orig[VDEV_RAIDZ_MAXPARITY]; + abd_t *orig[VDEV_RAIDZ_MAXPARITY]; int tstore[VDEV_RAIDZ_MAXPARITY + 2]; int *tgts = &tstore[1]; int current, next, i, c, n; @@ -2205,7 +2218,8 @@ vdev_raidz_combrec(zio_t *zio, int total_errors, int data_errors) ASSERT(orig[i] != NULL); } - orig[n - 1] = zio_buf_alloc(rm->rm_col[0].rc_size); + orig[n - 1] = abd_alloc_sametype(rm->rm_col[0].rc_abd, + rm->rm_col[0].rc_size); current = 0; next = tgts[current]; @@ -2224,7 +2238,9 @@ vdev_raidz_combrec(zio_t *zio, int total_errors, int data_errors) ASSERT3S(c, >=, 0); ASSERT3S(c, <, rm->rm_cols); rc = &rm->rm_col[c]; - abd_copy_to_buf(orig[i], rc->rc_abd, + ASSERT3S(orig[i]->abd_size, >=, rc->rc_size); + ASSERT3S(rc->rc_abd->abd_size, >=, rc->rc_size); + abd_copy_off(orig[i], rc->rc_abd, 0, 0, rc->rc_size); } @@ -2256,7 +2272,9 @@ vdev_raidz_combrec(zio_t *zio, int total_errors, int data_errors) for (i = 0; i < n; i++) { c = tgts[i]; rc = &rm->rm_col[c]; - abd_copy_from_buf(rc->rc_abd, orig[i], + ASSERT3S(rc->rc_abd->abd_size, >=, rc->rc_size); + ASSERT3S(orig[i]->abd_size, >=, rc->rc_size); + abd_copy_off(rc->rc_abd, orig[i], 0, 0, rc->rc_size); } @@ -2294,9 +2312,8 @@ vdev_raidz_combrec(zio_t *zio, int total_errors, int data_errors) } n--; done: - for (i = 0; i < n; i++) { - zio_buf_free(orig[i], rm->rm_col[0].rc_size); - } + for (i = 0; i < n; i++) + abd_free(orig[i]); return (ret); } @@ -2555,7 +2572,8 @@ vdev_raidz_io_done(zio_t *zio) zfs_ereport_start_checksum( zio->io_spa, vd->vdev_child[rc->rc_devidx], - zio, rc->rc_offset, rc->rc_size, + &zio->io_bookmark, zio, + rc->rc_offset, rc->rc_size, (void *)(uintptr_t)c, &zbc); } } diff --git a/usr/src/uts/common/fs/zfs/zap_micro.c b/usr/src/uts/common/fs/zfs/zap_micro.c index 48b0be6665..dad227306b 100644 --- a/usr/src/uts/common/fs/zfs/zap_micro.c +++ b/usr/src/uts/common/fs/zfs/zap_micro.c @@ -670,7 +670,7 @@ mzap_create_impl(objset_t *os, uint64_t obj, int normflags, zap_flags_t flags, dmu_buf_will_dirty(db, tx); mzap_phys_t *zp = db->db_data; zp->mz_block_type = ZBT_MICRO; - zp->mz_salt = ((uintptr_t)db ^ (uintptr_t)tx ^ (obj << 1)) | 1ULL; + (void) random_get_pseudo_bytes((void *)&zp->mz_salt, sizeof (uint64_t)); zp->mz_normflags = normflags; if (flags != 0) { diff --git a/usr/src/uts/common/fs/zfs/zcp_get.c b/usr/src/uts/common/fs/zfs/zcp_get.c index 1478c288d8..80814aeae4 100644 --- a/usr/src/uts/common/fs/zfs/zcp_get.c +++ b/usr/src/uts/common/fs/zfs/zcp_get.c @@ -421,6 +421,15 @@ get_special_prop(lua_State *state, dsl_dataset_t *ds, const char *dsname, case ZFS_PROP_INCONSISTENT: numval = dsl_get_inconsistent(ds); break; + case ZFS_PROP_IVSET_GUID: + if (dsl_dataset_is_zapified(ds)) { + error = zap_lookup(ds->ds_dir->dd_pool->dp_meta_objset, + ds->ds_object, DS_FIELD_IVSET_GUID, + sizeof (numval), 1, &numval); + } else { + error = ENOENT; + } + break; case ZFS_PROP_RECEIVE_RESUME_TOKEN: { char *token = get_receive_resume_stats_impl(ds); VERIFY3U(strlcpy(strval, token, ZAP_MAXVALUELEN), <, diff --git a/usr/src/uts/common/fs/zfs/zfeature.c b/usr/src/uts/common/fs/zfs/zfeature.c index 35ce827979..da9077ee73 100644 --- a/usr/src/uts/common/fs/zfs/zfeature.c +++ b/usr/src/uts/common/fs/zfs/zfeature.c @@ -369,6 +369,19 @@ feature_enable_sync(spa_t *spa, zfeature_info_t *feature, dmu_tx_t *tx) spa->spa_feat_enabled_txg_obj, feature->fi_guid, sizeof (uint64_t), 1, &enabling_txg, tx)); } + + /* + * Errata #4 is mostly a problem with encrypted datasets, but it + * is also a problem where the old encryption feature did not + * depend on the bookmark_v2 feature. If the pool does not have + * any encrypted datasets we can resolve this issue simply by + * enabling this dependency. + */ + if (spa->spa_errata == ZPOOL_ERRATA_ZOL_8308_ENCRYPTION && + spa_feature_is_enabled(spa, SPA_FEATURE_ENCRYPTION) && + !spa_feature_is_active(spa, SPA_FEATURE_ENCRYPTION) && + feature->fi_feature == SPA_FEATURE_BOOKMARK_V2) + spa->spa_errata = 0; } static void @@ -413,8 +426,8 @@ spa_feature_create_zap_objects(spa_t *spa, dmu_tx_t *tx) * We create feature flags ZAP objects in two instances: during pool * creation and during pool upgrade. */ - ASSERT(dsl_pool_sync_context(spa_get_dsl(spa)) || (!spa->spa_sync_on && - tx->tx_txg == TXG_INITIAL)); + ASSERT((!spa->spa_sync_on && tx->tx_txg == TXG_INITIAL) || + dsl_pool_sync_context(spa_get_dsl(spa))); spa->spa_feat_for_read_obj = zap_create_link(spa->spa_meta_objset, DMU_OTN_ZAP_METADATA, DMU_POOL_DIRECTORY_OBJECT, diff --git a/usr/src/uts/common/fs/zfs/zfs_fm.c b/usr/src/uts/common/fs/zfs/zfs_fm.c index 398a3d04aa..07a7a9f70b 100644 --- a/usr/src/uts/common/fs/zfs/zfs_fm.c +++ b/usr/src/uts/common/fs/zfs/zfs_fm.c @@ -104,8 +104,8 @@ #ifdef _KERNEL static void zfs_ereport_start(nvlist_t **ereport_out, nvlist_t **detector_out, - const char *subclass, spa_t *spa, vdev_t *vd, zio_t *zio, - uint64_t stateoroffset, uint64_t size) + const char *subclass, spa_t *spa, vdev_t *vd, const zbookmark_phys_t *zb, + zio_t *zio, uint64_t stateoroffset, uint64_t size) { nvlist_t *ereport, *detector; @@ -318,24 +318,6 @@ zfs_ereport_start(nvlist_t **ereport_out, nvlist_t **detector_out, FM_EREPORT_PAYLOAD_ZFS_ZIO_SIZE, DATA_TYPE_UINT64, zio->io_size, NULL); } - - /* - * Payload for I/Os with corresponding logical information. - */ - if (zio->io_logical != NULL) - fm_payload_set(ereport, - FM_EREPORT_PAYLOAD_ZFS_ZIO_OBJSET, - DATA_TYPE_UINT64, - zio->io_logical->io_bookmark.zb_objset, - FM_EREPORT_PAYLOAD_ZFS_ZIO_OBJECT, - DATA_TYPE_UINT64, - zio->io_logical->io_bookmark.zb_object, - FM_EREPORT_PAYLOAD_ZFS_ZIO_LEVEL, - DATA_TYPE_INT64, - zio->io_logical->io_bookmark.zb_level, - FM_EREPORT_PAYLOAD_ZFS_ZIO_BLKID, - DATA_TYPE_UINT64, - zio->io_logical->io_bookmark.zb_blkid, NULL); } else if (vd != NULL) { /* * If we have a vdev but no zio, this is a device fault, and the @@ -347,6 +329,20 @@ zfs_ereport_start(nvlist_t **ereport_out, nvlist_t **detector_out, DATA_TYPE_UINT64, stateoroffset, NULL); } + /* + * Payload for I/Os with corresponding logical information. + */ + if (zb != NULL && (zio == NULL || zio->io_logical != NULL)) + fm_payload_set(ereport, + FM_EREPORT_PAYLOAD_ZFS_ZIO_OBJSET, + DATA_TYPE_UINT64, zb->zb_objset, + FM_EREPORT_PAYLOAD_ZFS_ZIO_OBJECT, + DATA_TYPE_UINT64, zb->zb_object, + FM_EREPORT_PAYLOAD_ZFS_ZIO_LEVEL, + DATA_TYPE_INT64, zb->zb_level, + FM_EREPORT_PAYLOAD_ZFS_ZIO_BLKID, + DATA_TYPE_UINT64, zb->zb_blkid, NULL); + mutex_exit(&spa->spa_errlist_lock); *ereport_out = ereport; @@ -501,11 +497,11 @@ range_total_size(zfs_ecksum_info_t *eip) static zfs_ecksum_info_t * annotate_ecksum(nvlist_t *ereport, zio_bad_cksum_t *info, - const uint8_t *goodbuf, const uint8_t *badbuf, size_t size, + const abd_t *goodabd, const abd_t *badabd, size_t size, boolean_t drop_if_identical) { - const uint64_t *good = (const uint64_t *)goodbuf; - const uint64_t *bad = (const uint64_t *)badbuf; + const uint64_t *good; + const uint64_t *bad; uint64_t allset = 0; uint64_t allcleared = 0; @@ -549,7 +545,7 @@ annotate_ecksum(nvlist_t *ereport, zio_bad_cksum_t *info, } } - if (badbuf == NULL || goodbuf == NULL) + if (badabd == NULL || goodabd == NULL) return (eip); ASSERT3U(nui64s, <=, UINT32_MAX); @@ -557,6 +553,9 @@ annotate_ecksum(nvlist_t *ereport, zio_bad_cksum_t *info, ASSERT3U(size, <=, SPA_MAXBLOCKSIZE); ASSERT3U(size, <=, UINT32_MAX); + good = (const uint64_t *) abd_borrow_buf_copy((abd_t *)goodabd, size); + bad = (const uint64_t *) abd_borrow_buf_copy((abd_t *)badabd, size); + /* build up the range list by comparing the two buffers. */ for (idx = 0; idx < nui64s; idx++) { if (good[idx] == bad[idx]) { @@ -586,6 +585,8 @@ annotate_ecksum(nvlist_t *ereport, zio_bad_cksum_t *info, */ if (inline_size == 0 && drop_if_identical) { kmem_free(eip, sizeof (*eip)); + abd_return_buf((abd_t *)goodabd, (void *)good, size); + abd_return_buf((abd_t *)badabd, (void *)bad, size); return (NULL); } @@ -626,6 +627,10 @@ annotate_ecksum(nvlist_t *ereport, zio_bad_cksum_t *info, eip->zei_ranges[range].zr_start *= sizeof (uint64_t); eip->zei_ranges[range].zr_end *= sizeof (uint64_t); } + + abd_return_buf((abd_t *)goodabd, (void *)good, size); + abd_return_buf((abd_t *)badabd, (void *)bad, size); + eip->zei_allowed_mingap *= sizeof (uint64_t); inline_size *= sizeof (uint64_t); @@ -666,15 +671,16 @@ annotate_ecksum(nvlist_t *ereport, zio_bad_cksum_t *info, #endif void -zfs_ereport_post(const char *subclass, spa_t *spa, vdev_t *vd, zio_t *zio, - uint64_t stateoroffset, uint64_t size) +zfs_ereport_post(const char *subclass, spa_t *spa, vdev_t *vd, + const struct zbookmark_phys *zb, zio_t *zio, uint64_t stateoroffset, + uint64_t size) { #ifdef _KERNEL nvlist_t *ereport = NULL; nvlist_t *detector = NULL; - zfs_ereport_start(&ereport, &detector, - subclass, spa, vd, zio, stateoroffset, size); + zfs_ereport_start(&ereport, &detector, subclass, spa, vd, + zb, zio, stateoroffset, size); if (ereport == NULL) return; @@ -687,7 +693,7 @@ zfs_ereport_post(const char *subclass, spa_t *spa, vdev_t *vd, zio_t *zio, } void -zfs_ereport_start_checksum(spa_t *spa, vdev_t *vd, +zfs_ereport_start_checksum(spa_t *spa, vdev_t *vd, const zbookmark_phys_t *zb, struct zio *zio, uint64_t offset, uint64_t length, void *arg, zio_bad_cksum_t *info) { @@ -709,7 +715,7 @@ zfs_ereport_start_checksum(spa_t *spa, vdev_t *vd, #ifdef _KERNEL zfs_ereport_start(&report->zcr_ereport, &report->zcr_detector, - FM_EREPORT_ZFS_CHECKSUM, spa, vd, zio, offset, length); + FM_EREPORT_ZFS_CHECKSUM, spa, vd, zb, zio, offset, length); if (report->zcr_ereport == NULL) { report->zcr_free(report->zcr_cbdata, report->zcr_cbinfo); @@ -729,8 +735,8 @@ zfs_ereport_start_checksum(spa_t *spa, vdev_t *vd, } void -zfs_ereport_finish_checksum(zio_cksum_report_t *report, - const void *good_data, const void *bad_data, boolean_t drop_if_identical) +zfs_ereport_finish_checksum(zio_cksum_report_t *report, const abd_t *good_data, + const abd_t *bad_data, boolean_t drop_if_identical) { #ifdef _KERNEL zfs_ecksum_info_t *info = NULL; @@ -777,17 +783,17 @@ zfs_ereport_send_interim_checksum(zio_cksum_report_t *report) } void -zfs_ereport_post_checksum(spa_t *spa, vdev_t *vd, +zfs_ereport_post_checksum(spa_t *spa, vdev_t *vd, const zbookmark_phys_t *zb, struct zio *zio, uint64_t offset, uint64_t length, - const void *good_data, const void *bad_data, zio_bad_cksum_t *zbc) + const abd_t *good_data, const abd_t *bad_data, zio_bad_cksum_t *zbc) { #ifdef _KERNEL nvlist_t *ereport = NULL; nvlist_t *detector = NULL; zfs_ecksum_info_t *info; - zfs_ereport_start(&ereport, &detector, - FM_EREPORT_ZFS_CHECKSUM, spa, vd, zio, offset, length); + zfs_ereport_start(&ereport, &detector, FM_EREPORT_ZFS_CHECKSUM, + spa, vd, zb, zio, offset, length); if (ereport == NULL) return; diff --git a/usr/src/uts/common/fs/zfs/zfs_ioctl.c b/usr/src/uts/common/fs/zfs/zfs_ioctl.c index 712abee22f..71018c3836 100644 --- a/usr/src/uts/common/fs/zfs/zfs_ioctl.c +++ b/usr/src/uts/common/fs/zfs/zfs_ioctl.c @@ -32,7 +32,7 @@ * Copyright (c) 2014 Integros [integros.com] * Copyright 2016 Toomas Soome <tsoome@me.com> * Copyright 2017 RackTop Systems. - * Copyright (c) 2017 Datto Inc. + * Copyright (c) 2017, Datto, Inc. All rights reserved. */ /* @@ -192,6 +192,7 @@ #include <sys/vdev_removal.h> #include <sys/vdev_impl.h> #include <sys/vdev_initialize.h> +#include <sys/dsl_crypt.h> #include "zfs_namecheck.h" #include "zfs_prop.h" @@ -593,12 +594,12 @@ zfs_set_slabel_policy(const char *name, char *strval, cred_t *cr) * Try to own the dataset; abort if there is any error, * (e.g., already mounted, in use, or other error). */ - error = dmu_objset_own(name, DMU_OST_ZFS, B_TRUE, + error = dmu_objset_own(name, DMU_OST_ZFS, B_TRUE, B_TRUE, setsl_tag, &os); if (error != 0) return (SET_ERROR(EPERM)); - dmu_objset_disown(os, setsl_tag); + dmu_objset_disown(os, B_TRUE, setsl_tag); if (new_default) { needed_priv = PRIV_FILE_DOWNGRADE_SL; @@ -1285,6 +1286,22 @@ zfs_secpolicy_release(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) return (0); } +/* ARGSUSED */ +static int +zfs_secpolicy_load_key(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) +{ + return (zfs_secpolicy_write_perms(zc->zc_name, + ZFS_DELEG_PERM_LOAD_KEY, cr)); +} + +/* ARGSUSED */ +static int +zfs_secpolicy_change_key(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr) +{ + return (zfs_secpolicy_write_perms(zc->zc_name, + ZFS_DELEG_PERM_CHANGE_KEY, cr)); +} + /* * Policy for allowing temporary snapshots to be taken or released */ @@ -1481,7 +1498,7 @@ zfsvfs_rele(zfsvfs_t *zfsvfs, void *tag) if (zfsvfs->z_vfs) { VFS_RELE(zfsvfs->z_vfs); } else { - dmu_objset_disown(zfsvfs->z_os, zfsvfs); + dmu_objset_disown(zfsvfs->z_os, B_TRUE, zfsvfs); zfsvfs_free(zfsvfs); } } @@ -1494,6 +1511,7 @@ zfs_ioc_pool_create(zfs_cmd_t *zc) nvlist_t *rootprops = NULL; nvlist_t *zplprops = NULL; char *spa_name = zc->zc_name; + dsl_crypto_params_t *dcp = NULL; if (error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size, zc->zc_iflags, &config)) @@ -1508,6 +1526,7 @@ zfs_ioc_pool_create(zfs_cmd_t *zc) if (props) { nvlist_t *nvl = NULL; + nvlist_t *hidden_args = NULL; uint64_t version = SPA_VERSION; char *tname; @@ -1527,6 +1546,18 @@ zfs_ioc_pool_create(zfs_cmd_t *zc) } (void) nvlist_remove_all(props, ZPOOL_ROOTFS_PROPS); } + + (void) nvlist_lookup_nvlist(props, ZPOOL_HIDDEN_ARGS, + &hidden_args); + error = dsl_crypto_params_create_nvlist(DCP_CMD_NONE, + rootprops, hidden_args, &dcp); + if (error != 0) { + nvlist_free(config); + nvlist_free(props); + return (error); + } + (void) nvlist_remove_all(props, ZPOOL_HIDDEN_ARGS); + VERIFY(nvlist_alloc(&zplprops, NV_UNIQUE_NAME, KM_SLEEP) == 0); error = zfs_fill_zplprops_root(version, rootprops, zplprops, NULL); @@ -1538,7 +1569,7 @@ zfs_ioc_pool_create(zfs_cmd_t *zc) spa_name = tname; } - error = spa_create(zc->zc_name, config, props, zplprops); + error = spa_create(zc->zc_name, config, props, zplprops, dcp); /* * Set the remaining root properties @@ -1552,6 +1583,7 @@ pool_props_bad: nvlist_free(zplprops); nvlist_free(config); nvlist_free(props); + dsl_crypto_params_free(dcp, !!error); return (error); } @@ -1830,15 +1862,16 @@ zfs_ioc_obj_to_path(zfs_cmd_t *zc) int error; /* XXX reading from objset not owned */ - if ((error = dmu_objset_hold(zc->zc_name, FTAG, &os)) != 0) + if ((error = dmu_objset_hold_flags(zc->zc_name, B_TRUE, + FTAG, &os)) != 0) return (error); if (dmu_objset_type(os) != DMU_OST_ZFS) { - dmu_objset_rele(os, FTAG); + dmu_objset_rele_flags(os, B_TRUE, FTAG); return (SET_ERROR(EINVAL)); } error = zfs_obj_to_path(os, zc->zc_obj, zc->zc_value, sizeof (zc->zc_value)); - dmu_objset_rele(os, FTAG); + dmu_objset_rele_flags(os, B_TRUE, FTAG); return (error); } @@ -1859,15 +1892,16 @@ zfs_ioc_obj_to_stats(zfs_cmd_t *zc) int error; /* XXX reading from objset not owned */ - if ((error = dmu_objset_hold(zc->zc_name, FTAG, &os)) != 0) + if ((error = dmu_objset_hold_flags(zc->zc_name, B_TRUE, + FTAG, &os)) != 0) return (error); if (dmu_objset_type(os) != DMU_OST_ZFS) { - dmu_objset_rele(os, FTAG); + dmu_objset_rele_flags(os, B_TRUE, FTAG); return (SET_ERROR(EINVAL)); } error = zfs_obj_to_stats(os, zc->zc_obj, &zc->zc_stat, zc->zc_value, sizeof (zc->zc_value)); - dmu_objset_rele(os, FTAG); + dmu_objset_rele_flags(os, B_TRUE, FTAG); return (error); } @@ -2437,7 +2471,8 @@ zfs_prop_set_special(const char *dsname, zprop_source_t source, { const char *propname = nvpair_name(pair); zfs_prop_t prop = zfs_name_to_prop(propname); - uint64_t intval; + uint64_t intval = 0; + char *strval = NULL; int err = -1; if (prop == ZPROP_INVAL) { @@ -2453,10 +2488,12 @@ zfs_prop_set_special(const char *dsname, zprop_source_t source, &pair) == 0); } - if (zfs_prop_get_type(prop) == PROP_TYPE_STRING) - return (-1); - - VERIFY(0 == nvpair_value_uint64(pair, &intval)); + /* all special properties are numeric except for keylocation */ + if (zfs_prop_get_type(prop) == PROP_TYPE_STRING) { + strval = fnvpair_value_string(pair); + } else { + intval = fnvpair_value_uint64(pair); + } switch (prop) { case ZFS_PROP_QUOTA: @@ -2480,6 +2517,16 @@ zfs_prop_set_special(const char *dsname, zprop_source_t source, if (err == 0) err = -1; break; + case ZFS_PROP_KEYLOCATION: + err = dsl_crypto_can_set_keylocation(dsname, strval); + + /* + * Set err to -1 to force the zfs_set_prop_nvlist code down the + * default path to set the value in the nvlist. + */ + if (err == 0) + err = -1; + break; case ZFS_PROP_RESERVATION: err = dsl_dir_set_reservation(dsname, source, intval); break; @@ -3183,6 +3230,8 @@ zfs_fill_zplprops_root(uint64_t spa_vers, nvlist_t *createprops, * innvl: { * "type" -> dmu_objset_type_t (int32) * (optional) "props" -> { prop -> value } + * (optional) "hidden_args" -> { "wkeydata" -> value } + * raw uint8_t array of encryption wrapping key data (32 bytes) * } * * outnvl: propname -> error code (int32) @@ -3193,15 +3242,18 @@ zfs_ioc_create(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl) int error = 0; zfs_creat_t zct = { 0 }; nvlist_t *nvprops = NULL; + nvlist_t *hidden_args = NULL; void (*cbfunc)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx); int32_t type32; dmu_objset_type_t type; boolean_t is_insensitive = B_FALSE; + dsl_crypto_params_t *dcp = NULL; if (nvlist_lookup_int32(innvl, "type", &type32) != 0) return (SET_ERROR(EINVAL)); type = type32; (void) nvlist_lookup_nvlist(innvl, "props", &nvprops); + (void) nvlist_lookup_nvlist(innvl, ZPOOL_HIDDEN_ARGS, &hidden_args); switch (type) { case DMU_OST_ZFS: @@ -3267,9 +3319,18 @@ zfs_ioc_create(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl) } } + error = dsl_crypto_params_create_nvlist(DCP_CMD_NONE, nvprops, + hidden_args, &dcp); + if (error != 0) { + nvlist_free(zct.zct_zplprops); + return (error); + } + error = dmu_objset_create(fsname, type, - is_insensitive ? DS_FLAG_CI_DATASET : 0, cbfunc, &zct); + is_insensitive ? DS_FLAG_CI_DATASET : 0, dcp, cbfunc, &zct); + nvlist_free(zct.zct_zplprops); + dsl_crypto_params_free(dcp, !!error); /* * It would be nice to do this atomically. @@ -3287,6 +3348,8 @@ zfs_ioc_create(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl) * innvl: { * "origin" -> name of origin snapshot * (optional) "props" -> { prop -> value } + * (optional) "hidden_args" -> { "wkeydata" -> value } + * raw uint8_t array of encryption wrapping key data (32 bytes) * } * * outnvl: propname -> error code (int32) @@ -3308,9 +3371,8 @@ zfs_ioc_clone(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl) if (dataset_namecheck(origin_name, NULL, NULL) != 0) return (SET_ERROR(EINVAL)); + error = dmu_objset_clone(fsname, origin_name); - if (error != 0) - return (error); /* * It would be nice to do this atomically. @@ -4328,7 +4390,11 @@ extract_delay_props(nvlist_t *props) { nvlist_t *delayprops; nvpair_t *nvp, *tmp; - static const zfs_prop_t delayable[] = { ZFS_PROP_REFQUOTA, 0 }; + static const zfs_prop_t delayable[] = { + ZFS_PROP_REFQUOTA, + ZFS_PROP_KEYLOCATION, + 0 + }; int i; VERIFY(nvlist_alloc(&delayprops, NV_UNIQUE_NAME, KM_SLEEP) == 0); @@ -4517,7 +4583,7 @@ zfs_ioc_recv(zfs_cmd_t *zc) } } - if (delayprops != NULL) { + if (delayprops != NULL && props != NULL) { /* * Merge delayed props back in with initial props, in case * we're DEBUG and zfs_ioc_recv_inject_err is set (which means @@ -4627,6 +4693,7 @@ zfs_ioc_send(zfs_cmd_t *zc) boolean_t embedok = (zc->zc_flags & 0x1); boolean_t large_block_ok = (zc->zc_flags & 0x2); boolean_t compressok = (zc->zc_flags & 0x4); + boolean_t rawok = (zc->zc_flags & 0x8); if (zc->zc_obj != 0) { dsl_pool_t *dp; @@ -4658,7 +4725,8 @@ zfs_ioc_send(zfs_cmd_t *zc) if (error != 0) return (error); - error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &tosnap); + error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, + FTAG, &tosnap); if (error != 0) { dsl_pool_rele(dp, FTAG); return (error); @@ -4674,7 +4742,7 @@ zfs_ioc_send(zfs_cmd_t *zc) } } - error = dmu_send_estimate(tosnap, fromsnap, compressok, + error = dmu_send_estimate(tosnap, fromsnap, compressok || rawok, &zc->zc_objset_type); if (fromsnap != NULL) @@ -4688,7 +4756,7 @@ zfs_ioc_send(zfs_cmd_t *zc) off = fp->f_offset; error = dmu_send_obj(zc->zc_name, zc->zc_sendobj, - zc->zc_fromobj, embedok, large_block_ok, compressok, + zc->zc_fromobj, embedok, large_block_ok, compressok, rawok, zc->zc_cookie, fp->f_vnode, &off); if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0) @@ -5078,7 +5146,7 @@ zfs_ioc_userspace_upgrade(zfs_cmd_t *zc) error = zfs_suspend_fs(zfsvfs); if (error == 0) { dmu_objset_refresh_ownership(ds, &newds, - zfsvfs); + B_TRUE, zfsvfs); error = zfs_resume_fs(zfsvfs, newds); } } @@ -5087,12 +5155,12 @@ zfs_ioc_userspace_upgrade(zfs_cmd_t *zc) VFS_RELE(zfsvfs->z_vfs); } else { /* XXX kind of reading contents without owning */ - error = dmu_objset_hold(zc->zc_name, FTAG, &os); + error = dmu_objset_hold_flags(zc->zc_name, B_TRUE, FTAG, &os); if (error != 0) return (error); error = dmu_objset_userspace_upgrade(os); - dmu_objset_rele(os, FTAG); + dmu_objset_rele_flags(os, B_TRUE, FTAG); } return (error); @@ -5250,7 +5318,7 @@ zfs_ioc_next_obj(zfs_cmd_t *zc) objset_t *os = NULL; int error; - error = dmu_objset_hold(zc->zc_name, FTAG, &os); + error = dmu_objset_hold_flags(zc->zc_name, B_TRUE, FTAG, &os); if (error != 0) return (error); @@ -5668,6 +5736,8 @@ zfs_ioc_space_snaps(const char *lastsnap, nvlist_t *innvl, nvlist_t *outnvl) * presence indicates DRR_WRITE_EMBEDDED records are permitted * (optional) "compressok" -> (value ignored) * presence indicates compressed DRR_WRITE records are permitted + * (optional) "rawok" -> (value ignored) + * presence indicates raw encrypted records should be used. * (optional) "resume_object" and "resume_offset" -> (uint64) * if present, resume send stream from specified object and offset. * } @@ -5685,6 +5755,7 @@ zfs_ioc_send_new(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl) boolean_t largeblockok; boolean_t embedok; boolean_t compressok; + boolean_t rawok; uint64_t resumeobj = 0; uint64_t resumeoff = 0; @@ -5697,6 +5768,7 @@ zfs_ioc_send_new(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl) largeblockok = nvlist_exists(innvl, "largeblockok"); embedok = nvlist_exists(innvl, "embedok"); compressok = nvlist_exists(innvl, "compressok"); + rawok = nvlist_exists(innvl, "rawok"); (void) nvlist_lookup_uint64(innvl, "resume_object", &resumeobj); (void) nvlist_lookup_uint64(innvl, "resume_offset", &resumeoff); @@ -5707,7 +5779,7 @@ zfs_ioc_send_new(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl) off = fp->f_offset; error = dmu_send(snapname, fromname, embedok, largeblockok, compressok, - fd, resumeobj, resumeoff, fp->f_vnode, &off); + rawok, fd, resumeobj, resumeoff, fp->f_vnode, &off); if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0) fp->f_offset = off; @@ -5742,6 +5814,7 @@ zfs_ioc_send_space(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl) int error; char *fromname; boolean_t compressok; + boolean_t rawok; uint64_t space; error = dsl_pool_hold(snapname, FTAG, &dp); @@ -5755,6 +5828,7 @@ zfs_ioc_send_space(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl) } compressok = nvlist_exists(innvl, "compressok"); + rawok = nvlist_exists(innvl, "rawok"); error = nvlist_lookup_string(innvl, "from", &fromname); if (error == 0) { @@ -5768,8 +5842,8 @@ zfs_ioc_send_space(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl) error = dsl_dataset_hold(dp, fromname, FTAG, &fromsnap); if (error != 0) goto out; - error = dmu_send_estimate(tosnap, fromsnap, compressok, - &space); + error = dmu_send_estimate(tosnap, fromsnap, + compressok || rawok, &space); dsl_dataset_rele(fromsnap, FTAG); } else if (strchr(fromname, '#') != NULL) { /* @@ -5784,7 +5858,8 @@ zfs_ioc_send_space(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl) if (error != 0) goto out; error = dmu_send_estimate_from_txg(tosnap, - frombm.zbm_creation_txg, compressok, &space); + frombm.zbm_creation_txg, compressok || rawok, + &space); } else { /* * from is not properly formatted as a snapshot or @@ -5797,7 +5872,8 @@ zfs_ioc_send_space(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl) /* * If estimating the size of a full send, use dmu_send_estimate. */ - error = dmu_send_estimate(tosnap, NULL, compressok, &space); + error = dmu_send_estimate(tosnap, NULL, compressok || rawok, + &space); } fnvlist_add_uint64(outnvl, "space", space); @@ -5846,6 +5922,124 @@ zfs_ioc_pool_sync(const char *pool, nvlist_t *innvl, nvlist_t *onvl) return (err); } +/* + * Load a user's wrapping key into the kernel. + * innvl: { + * "hidden_args" -> { "wkeydata" -> value } + * raw uint8_t array of encryption wrapping key data (32 bytes) + * (optional) "noop" -> (value ignored) + * presence indicated key should only be verified, not loaded + * } + */ +/* ARGSUSED */ +static int +zfs_ioc_load_key(const char *dsname, nvlist_t *innvl, nvlist_t *outnvl) +{ + int ret = 0; + dsl_crypto_params_t *dcp = NULL; + nvlist_t *hidden_args; + boolean_t noop = nvlist_exists(innvl, "noop"); + + if (strchr(dsname, '@') != NULL || strchr(dsname, '%') != NULL) { + ret = SET_ERROR(EINVAL); + goto error; + } + + ret = nvlist_lookup_nvlist(innvl, ZPOOL_HIDDEN_ARGS, &hidden_args); + if (ret != 0) { + ret = SET_ERROR(EINVAL); + goto error; + } + + ret = dsl_crypto_params_create_nvlist(DCP_CMD_NONE, NULL, + hidden_args, &dcp); + if (ret != 0) + goto error; + + ret = spa_keystore_load_wkey(dsname, dcp, noop); + if (ret != 0) + goto error; + + dsl_crypto_params_free(dcp, noop); + + return (0); + +error: + dsl_crypto_params_free(dcp, B_TRUE); + return (ret); +} + +/* + * Unload a user's wrapping key from the kernel. + * Both innvl and outnvl are unused. + */ +/* ARGSUSED */ +static int +zfs_ioc_unload_key(const char *dsname, nvlist_t *innvl, nvlist_t *outnvl) +{ + int ret = 0; + + if (strchr(dsname, '@') != NULL || strchr(dsname, '%') != NULL) { + ret = (SET_ERROR(EINVAL)); + goto out; + } + + ret = spa_keystore_unload_wkey(dsname); + if (ret != 0) + goto out; + +out: + return (ret); +} + +/* + * Changes a user's wrapping key used to decrypt a dataset. The keyformat, + * keylocation, pbkdf2salt, and pbkdf2iters properties can also be specified + * here to change how the key is derived in userspace. + * + * innvl: { + * "hidden_args" (optional) -> { "wkeydata" -> value } + * raw uint8_t array of new encryption wrapping key data (32 bytes) + * "props" (optional) -> { prop -> value } + * } + * + * outnvl is unused + */ +/* ARGSUSED */ +static int +zfs_ioc_change_key(const char *dsname, nvlist_t *innvl, nvlist_t *outnvl) +{ + int ret; + uint64_t cmd = DCP_CMD_NONE; + dsl_crypto_params_t *dcp = NULL; + nvlist_t *args = NULL, *hidden_args = NULL; + + if (strchr(dsname, '@') != NULL || strchr(dsname, '%') != NULL) { + ret = (SET_ERROR(EINVAL)); + goto error; + } + + (void) nvlist_lookup_uint64(innvl, "crypt_cmd", &cmd); + (void) nvlist_lookup_nvlist(innvl, "props", &args); + (void) nvlist_lookup_nvlist(innvl, ZPOOL_HIDDEN_ARGS, &hidden_args); + + ret = dsl_crypto_params_create_nvlist(cmd, args, hidden_args, &dcp); + if (ret != 0) + goto error; + + ret = spa_keystore_change_key(dsname, dcp); + if (ret != 0) + goto error; + + dsl_crypto_params_free(dcp, B_FALSE); + + return (0); + +error: + dsl_crypto_params_free(dcp, B_TRUE); + return (ret); +} + static zfs_ioc_vec_t zfs_ioc_vec[ZFS_IOC_LAST - ZFS_IOC_FIRST]; static void @@ -6040,6 +6234,17 @@ zfs_ioctl_init(void) zfs_ioc_pool_sync, zfs_secpolicy_none, POOL_NAME, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_FALSE); + zfs_ioctl_register("load-key", ZFS_IOC_LOAD_KEY, + zfs_ioc_load_key, zfs_secpolicy_load_key, + DATASET_NAME, POOL_CHECK_SUSPENDED, B_TRUE, B_TRUE); + zfs_ioctl_register("unload-key", ZFS_IOC_UNLOAD_KEY, + zfs_ioc_unload_key, zfs_secpolicy_load_key, + DATASET_NAME, POOL_CHECK_SUSPENDED, B_TRUE, B_TRUE); + zfs_ioctl_register("change-key", ZFS_IOC_CHANGE_KEY, + zfs_ioc_change_key, zfs_secpolicy_change_key, + DATASET_NAME, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, + B_TRUE, B_TRUE); + /* IOCTLS that use the legacy function signature */ zfs_ioctl_register_legacy(ZFS_IOC_POOL_FREEZE, zfs_ioc_pool_freeze, diff --git a/usr/src/uts/common/fs/zfs/zfs_vfsops.c b/usr/src/uts/common/fs/zfs/zfs_vfsops.c index f7beea4cc9..dfd13539cd 100644 --- a/usr/src/uts/common/fs/zfs/zfs_vfsops.c +++ b/usr/src/uts/common/fs/zfs/zfs_vfsops.c @@ -974,8 +974,8 @@ zfsvfs_create(const char *osname, zfsvfs_t **zfvp) * We claim to always be readonly so we can open snapshots; * other ZPL code will prevent us from writing to snapshots. */ - - error = dmu_objset_own(osname, DMU_OST_ZFS, B_TRUE, zfsvfs, &os); + error = dmu_objset_own(osname, DMU_OST_ZFS, B_TRUE, B_TRUE, zfsvfs, + &os); if (error != 0) { kmem_free(zfsvfs, sizeof (zfsvfs_t)); return (error); @@ -983,7 +983,7 @@ zfsvfs_create(const char *osname, zfsvfs_t **zfvp) error = zfsvfs_create_impl(zfvp, zfsvfs, os); if (error != 0) { - dmu_objset_disown(os, zfsvfs); + dmu_objset_disown(os, B_TRUE, zfsvfs); } return (error); } @@ -1084,7 +1084,10 @@ zfsvfs_setup(zfsvfs_t *zfsvfs, boolean_t mounting) zfsvfs->z_replay = B_FALSE; } } - zfsvfs->z_vfs->vfs_flag |= readonly; /* restore readonly bit */ + + /* restore readonly bit */ + if (readonly != 0) + zfsvfs->z_vfs->vfs_flag |= VFS_RDONLY; } /* @@ -1235,7 +1238,7 @@ zfs_domount(vfs_t *vfsp, char *osname) zfsctl_create(zfsvfs); out: if (error) { - dmu_objset_disown(zfsvfs->z_os, zfsvfs); + dmu_objset_disown(zfsvfs->z_os, B_TRUE, zfsvfs); zfsvfs_free(zfsvfs); } else { atomic_inc_32(&zfs_active_fs_count); @@ -1903,7 +1906,7 @@ zfs_umount(vfs_t *vfsp, int fflag, cred_t *cr) /* * Finally release the objset */ - dmu_objset_disown(os, zfsvfs); + dmu_objset_disown(os, B_TRUE, zfsvfs); } /* diff --git a/usr/src/uts/common/fs/zfs/zfs_vnops.c b/usr/src/uts/common/fs/zfs/zfs_vnops.c index a68fc3dd34..c8cb5b3935 100644 --- a/usr/src/uts/common/fs/zfs/zfs_vnops.c +++ b/usr/src/uts/common/fs/zfs/zfs_vnops.c @@ -915,8 +915,8 @@ zfs_write(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct) xuio_stat_wbuf_copied(); } else { ASSERT(xuio || tx_bytes == max_blksz); - dmu_assign_arcbuf(sa_get_db(zp->z_sa_hdl), - woff, abuf, tx); + dmu_assign_arcbuf_by_dbuf( + sa_get_db(zp->z_sa_hdl), woff, abuf, tx); } ASSERT(tx_bytes <= uio->uio_resid); uioskip(uio, tx_bytes); diff --git a/usr/src/uts/common/fs/zfs/zil.c b/usr/src/uts/common/fs/zfs/zil.c index 7e88c51a0b..e56104f979 100644 --- a/usr/src/uts/common/fs/zfs/zil.c +++ b/usr/src/uts/common/fs/zfs/zil.c @@ -194,8 +194,8 @@ zil_init_log_chain(zilog_t *zilog, blkptr_t *bp) * Read a log block and make sure it's valid. */ static int -zil_read_log_block(zilog_t *zilog, const blkptr_t *bp, blkptr_t *nbp, void *dst, - char **end) +zil_read_log_block(zilog_t *zilog, boolean_t decrypt, const blkptr_t *bp, + blkptr_t *nbp, void *dst, char **end) { enum zio_flag zio_flags = ZIO_FLAG_CANFAIL; arc_flags_t aflags = ARC_FLAG_WAIT; @@ -209,11 +209,14 @@ zil_read_log_block(zilog_t *zilog, const blkptr_t *bp, blkptr_t *nbp, void *dst, if (!(zilog->zl_header->zh_flags & ZIL_CLAIM_LR_SEQ_VALID)) zio_flags |= ZIO_FLAG_SPECULATIVE; + if (!decrypt) + zio_flags |= ZIO_FLAG_RAW; + SET_BOOKMARK(&zb, bp->blk_cksum.zc_word[ZIL_ZC_OBJSET], ZB_ZIL_OBJECT, ZB_ZIL_LEVEL, bp->blk_cksum.zc_word[ZIL_ZC_SEQ]); - error = arc_read(NULL, zilog->zl_spa, bp, arc_getbuf_func, &abuf, - ZIO_PRIORITY_SYNC_READ, zio_flags, &aflags, &zb); + error = arc_read(NULL, zilog->zl_spa, bp, arc_getbuf_func, + &abuf, ZIO_PRIORITY_SYNC_READ, zio_flags, &aflags, &zb); if (error == 0) { zio_cksum_t cksum = bp->blk_cksum; @@ -288,6 +291,14 @@ zil_read_log_data(zilog_t *zilog, const lr_write_t *lr, void *wbuf) if (zilog->zl_header->zh_claim_txg == 0) zio_flags |= ZIO_FLAG_SPECULATIVE | ZIO_FLAG_SCRUB; + /* + * If we are not using the resulting data, we are just checking that + * it hasn't been corrupted so we don't need to waste CPU time + * decompressing and decrypting it. + */ + if (wbuf == NULL) + zio_flags |= ZIO_FLAG_RAW; + SET_BOOKMARK(&zb, dmu_objset_id(zilog->zl_os), lr->lr_foid, ZB_ZIL_LEVEL, lr->lr_offset / BP_GET_LSIZE(bp)); @@ -308,7 +319,8 @@ zil_read_log_data(zilog_t *zilog, const lr_write_t *lr, void *wbuf) */ int zil_parse(zilog_t *zilog, zil_parse_blk_func_t *parse_blk_func, - zil_parse_lr_func_t *parse_lr_func, void *arg, uint64_t txg) + zil_parse_lr_func_t *parse_lr_func, void *arg, uint64_t txg, + boolean_t decrypt) { const zil_header_t *zh = zilog->zl_header; boolean_t claimed = !!zh->zh_claim_txg; @@ -347,7 +359,9 @@ zil_parse(zilog_t *zilog, zil_parse_blk_func_t *parse_blk_func, if (blk_seq > claim_blk_seq) break; - if ((error = parse_blk_func(zilog, &blk, arg, txg)) != 0) + + error = parse_blk_func(zilog, &blk, arg, txg); + if (error != 0) break; ASSERT3U(max_blk_seq, <, blk_seq); max_blk_seq = blk_seq; @@ -356,7 +370,8 @@ zil_parse(zilog_t *zilog, zil_parse_blk_func_t *parse_blk_func, if (max_lr_seq == claim_lr_seq && max_blk_seq == claim_blk_seq) break; - error = zil_read_log_block(zilog, &blk, &next_blk, lrbuf, &end); + error = zil_read_log_block(zilog, decrypt, &blk, &next_blk, + lrbuf, &end); if (error != 0) break; @@ -366,7 +381,9 @@ zil_parse(zilog_t *zilog, zil_parse_blk_func_t *parse_blk_func, ASSERT3U(reclen, >=, sizeof (lr_t)); if (lr->lrc_seq > claim_lr_seq) goto done; - if ((error = parse_lr_func(zilog, lr, arg, txg)) != 0) + + error = parse_lr_func(zilog, lr, arg, txg); + if (error != 0) goto done; ASSERT3U(max_lr_seq, <, lr->lrc_seq); max_lr_seq = lr->lrc_seq; @@ -381,7 +398,8 @@ done: zilog->zl_parse_lr_count = lr_count; ASSERT(!claimed || !(zh->zh_flags & ZIL_CLAIM_LR_SEQ_VALID) || - (max_blk_seq == claim_blk_seq && max_lr_seq == claim_lr_seq)); + (max_blk_seq == claim_blk_seq && max_lr_seq == claim_lr_seq) || + (decrypt && error == EIO)); zil_bp_tree_fini(zilog); zio_buf_free(lrbuf, SPA_OLD_MAXBLOCKSIZE); @@ -451,9 +469,12 @@ zil_claim_log_record(zilog_t *zilog, lr_t *lrc, void *tx, uint64_t first_txg) * waited for all writes to be stable first), so it is semantically * correct to declare this the end of the log. */ - if (lr->lr_blkptr.blk_birth >= first_txg && - (error = zil_read_log_data(zilog, lr, NULL)) != 0) - return (error); + if (lr->lr_blkptr.blk_birth >= first_txg) { + error = zil_read_log_data(zilog, lr, NULL); + if (error != 0) + return (error); + } + return (zil_claim_log_block(zilog, &lr->lr_blkptr, tx, first_txg)); } @@ -646,9 +667,8 @@ zil_create(zilog_t *zilog) BP_ZERO(&blk); } - error = zio_alloc_zil(zilog->zl_spa, - zilog->zl_os->os_dsl_dataset->ds_object, txg, &blk, NULL, - ZIL_MIN_BLKSZ, &slog); + error = zio_alloc_zil(zilog->zl_spa, zilog->zl_os, txg, &blk, + NULL, ZIL_MIN_BLKSZ, &slog); if (error == 0) zil_init_log_chain(zilog, &blk); @@ -736,7 +756,7 @@ zil_destroy_sync(zilog_t *zilog, dmu_tx_t *tx) { ASSERT(list_is_empty(&zilog->zl_lwb_list)); (void) zil_parse(zilog, zil_free_log_block, - zil_free_log_record, tx, zilog->zl_header->zh_claim_txg); + zil_free_log_record, tx, zilog->zl_header->zh_claim_txg, B_FALSE); } int @@ -750,7 +770,7 @@ zil_claim(dsl_pool_t *dp, dsl_dataset_t *ds, void *txarg) int error; error = dmu_objset_own_obj(dp, ds->ds_object, - DMU_OST_ANY, B_FALSE, FTAG, &os); + DMU_OST_ANY, B_FALSE, B_FALSE, FTAG, &os); if (error != 0) { /* * EBUSY indicates that the objset is inconsistent, in which @@ -800,11 +820,13 @@ zil_claim(dsl_pool_t *dp, dsl_dataset_t *ds, void *txarg) zh->zh_claim_txg == 0)) { if (!BP_IS_HOLE(&zh->zh_log)) { (void) zil_parse(zilog, zil_clear_log_block, - zil_noop_log_record, tx, first_txg); + zil_noop_log_record, tx, first_txg, B_FALSE); } BP_ZERO(&zh->zh_log); + if (os->os_encrypted) + os->os_next_write_raw[tx->tx_txg & TXG_MASK] = B_TRUE; dsl_dataset_dirty(dmu_objset_ds(os), tx); - dmu_objset_disown(os, FTAG); + dmu_objset_disown(os, B_FALSE, FTAG); return (0); } @@ -824,18 +846,20 @@ zil_claim(dsl_pool_t *dp, dsl_dataset_t *ds, void *txarg) ASSERT3U(zh->zh_claim_txg, <=, first_txg); if (zh->zh_claim_txg == 0 && !BP_IS_HOLE(&zh->zh_log)) { (void) zil_parse(zilog, zil_claim_log_block, - zil_claim_log_record, tx, first_txg); + zil_claim_log_record, tx, first_txg, B_FALSE); zh->zh_claim_txg = first_txg; zh->zh_claim_blk_seq = zilog->zl_parse_blk_seq; zh->zh_claim_lr_seq = zilog->zl_parse_lr_seq; if (zilog->zl_parse_lr_count || zilog->zl_parse_blk_count > 1) zh->zh_flags |= ZIL_REPLAY_NEEDED; zh->zh_flags |= ZIL_CLAIM_LR_SEQ_VALID; + if (os->os_encrypted) + os->os_next_write_raw[tx->tx_txg & TXG_MASK] = B_TRUE; dsl_dataset_dirty(dmu_objset_ds(os), tx); } ASSERT3U(first_txg, ==, (spa_last_synced_txg(zilog->zl_spa) + 1)); - dmu_objset_disown(os, FTAG); + dmu_objset_disown(os, B_FALSE, FTAG); return (0); } @@ -907,7 +931,7 @@ zil_check_log_chain(dsl_pool_t *dp, dsl_dataset_t *ds, void *tx) */ error = zil_parse(zilog, zil_claim_log_block, zil_claim_log_record, tx, zilog->zl_header->zh_claim_txg ? -1ULL : - spa_min_claim_txg(os->os_spa)); + spa_min_claim_txg(os->os_spa), B_FALSE); return ((error == ECKSUM || error == ENOENT) ? 0 : error); } @@ -1435,8 +1459,9 @@ zil_lwb_write_issue(zilog_t *zilog, lwb_t *lwb) BP_ZERO(bp); /* pass the old blkptr in order to spread log blocks across devs */ - error = zio_alloc_zil(spa, zilog->zl_os->os_dsl_dataset->ds_object, - txg, bp, &lwb->lwb_blk, zil_blksz, &slog); + error = zio_alloc_zil(spa, zilog->zl_os, txg, bp, &lwb->lwb_blk, + zil_blksz, &slog); + if (error == 0) { ASSERT3U(bp->blk_birth, ==, txg); bp->blk_cksum = lwb->lwb_blk.blk_cksum; @@ -3188,6 +3213,21 @@ zil_suspend(const char *osname, void **cookiep) return (0); } + /* + * The ZIL has work to do. Ensure that the associated encryption + * key will remain mapped while we are committing the log by + * grabbing a reference to it. If the key isn't loaded we have no + * choice but to return an error until the wrapping key is loaded. + */ + if (os->os_encrypted && + dsl_dataset_create_key_mapping(dmu_objset_ds(os)) != 0) { + zilog->zl_suspend--; + mutex_exit(&zilog->zl_lock); + dsl_dataset_long_rele(dmu_objset_ds(os), suspend_tag); + dsl_dataset_rele(dmu_objset_ds(os), suspend_tag); + return (SET_ERROR(EBUSY)); + } + zilog->zl_suspending = B_TRUE; mutex_exit(&zilog->zl_lock); @@ -3202,9 +3242,10 @@ zil_suspend(const char *osname, void **cookiep) zil_commit_impl(zilog, 0); /* - * Now that we've ensured all lwb's are LWB_STATE_FLUSH_DONE, we - * use txg_wait_synced() to ensure the data from the zilog has - * migrated to the main pool before calling zil_destroy(). + * Now that we've ensured all lwb's are LWB_STATE_DONE, + * txg_wait_synced() will be called from within zil_destroy(), + * which will ensure the data from the zilog has migrated to the + * main pool before it returns. */ txg_wait_synced(zilog->zl_dmu_pool, 0); @@ -3215,6 +3256,9 @@ zil_suspend(const char *osname, void **cookiep) cv_broadcast(&zilog->zl_cv_suspend); mutex_exit(&zilog->zl_lock); + if (os->os_encrypted) + dsl_dataset_remove_key_mapping(dmu_objset_ds(os)); + if (cookiep == NULL) zil_resume(os); else @@ -3381,7 +3425,7 @@ zil_replay(objset_t *os, void *arg, zil_replay_func_t *replay_func[TX_MAX_TYPE]) zilog->zl_replay_time = ddi_get_lbolt(); ASSERT(zilog->zl_replay_blks == 0); (void) zil_parse(zilog, zil_incr_blks, zil_replay_log_record, &zr, - zh->zh_claim_txg); + zh->zh_claim_txg, B_TRUE); kmem_free(zr.zr_lr, 2 * SPA_MAXBLOCKSIZE); zil_destroy(zilog, B_FALSE); diff --git a/usr/src/uts/common/fs/zfs/zio.c b/usr/src/uts/common/fs/zfs/zio.c index 76312e6a74..2e44d59daf 100644 --- a/usr/src/uts/common/fs/zfs/zio.c +++ b/usr/src/uts/common/fs/zfs/zio.c @@ -45,6 +45,7 @@ #include <sys/metaslab_impl.h> #include <sys/abd.h> #include <sys/cityhash.h> +#include <sys/dsl_crypt.h> /* * ========================================================================== @@ -270,6 +271,13 @@ zio_data_buf_free(void *buf, size_t size) kmem_cache_free(zio_data_buf_cache[c], buf); } +/* ARGSUSED */ +static void +zio_abd_free(void *abd, size_t size) +{ + abd_free((abd_t *)abd); +} + /* * ========================================================================== * Push and pop I/O transform buffers @@ -322,7 +330,7 @@ zio_pop_transforms(zio_t *zio) /* * ========================================================================== - * I/O transform callbacks for subblocks and decompression + * I/O transform callbacks for subblocks, decompression, and decryption * ========================================================================== */ static void @@ -348,6 +356,132 @@ zio_decompress(zio_t *zio, abd_t *data, uint64_t size) } } +static void +zio_decrypt(zio_t *zio, abd_t *data, uint64_t size) +{ + int ret; + void *tmp; + blkptr_t *bp = zio->io_bp; + spa_t *spa = zio->io_spa; + uint64_t dsobj = zio->io_bookmark.zb_objset; + uint64_t lsize = BP_GET_LSIZE(bp); + dmu_object_type_t ot = BP_GET_TYPE(bp); + uint8_t salt[ZIO_DATA_SALT_LEN]; + uint8_t iv[ZIO_DATA_IV_LEN]; + uint8_t mac[ZIO_DATA_MAC_LEN]; + boolean_t no_crypt = B_FALSE; + + ASSERT(BP_USES_CRYPT(bp)); + ASSERT3U(size, !=, 0); + + if (zio->io_error != 0) + return; + + /* + * Verify the cksum of MACs stored in an indirect bp. It will always + * be possible to verify this since it does not require an encryption + * key. + */ + if (BP_HAS_INDIRECT_MAC_CKSUM(bp)) { + zio_crypt_decode_mac_bp(bp, mac); + + if (BP_GET_COMPRESS(bp) != ZIO_COMPRESS_OFF) { + /* + * We haven't decompressed the data yet, but + * zio_crypt_do_indirect_mac_checksum() requires + * decompressed data to be able to parse out the MACs + * from the indirect block. We decompress it now and + * throw away the result after we are finished. + */ + tmp = zio_buf_alloc(lsize); + ret = zio_decompress_data(BP_GET_COMPRESS(bp), + zio->io_abd, tmp, zio->io_size, lsize); + if (ret != 0) { + ret = SET_ERROR(EIO); + goto error; + } + ret = zio_crypt_do_indirect_mac_checksum(B_FALSE, + tmp, lsize, BP_SHOULD_BYTESWAP(bp), mac); + zio_buf_free(tmp, lsize); + } else { + ret = zio_crypt_do_indirect_mac_checksum_abd(B_FALSE, + zio->io_abd, size, BP_SHOULD_BYTESWAP(bp), mac); + } + abd_copy(data, zio->io_abd, size); + + if (ret != 0) + goto error; + + return; + } + + /* + * If this is an authenticated block, just check the MAC. It would be + * nice to separate this out into its own flag, but for the moment + * enum zio_flag is out of bits. + */ + if (BP_IS_AUTHENTICATED(bp)) { + if (ot == DMU_OT_OBJSET) { + ret = spa_do_crypt_objset_mac_abd(B_FALSE, spa, + dsobj, zio->io_abd, size, BP_SHOULD_BYTESWAP(bp)); + } else { + zio_crypt_decode_mac_bp(bp, mac); + ret = spa_do_crypt_mac_abd(B_FALSE, spa, dsobj, + zio->io_abd, size, mac); + } + abd_copy(data, zio->io_abd, size); + + if (zio_injection_enabled && ot != DMU_OT_DNODE && ret == 0) { + ret = zio_handle_decrypt_injection(spa, + &zio->io_bookmark, ot, ECKSUM); + } + if (ret != 0) + goto error; + + return; + } + + zio_crypt_decode_params_bp(bp, salt, iv); + + if (ot == DMU_OT_INTENT_LOG) { + tmp = abd_borrow_buf_copy(zio->io_abd, sizeof (zil_chain_t)); + zio_crypt_decode_mac_zil(tmp, mac); + abd_return_buf(zio->io_abd, tmp, sizeof (zil_chain_t)); + } else { + zio_crypt_decode_mac_bp(bp, mac); + } + + ret = spa_do_crypt_abd(B_FALSE, spa, &zio->io_bookmark, BP_GET_TYPE(bp), + BP_GET_DEDUP(bp), BP_SHOULD_BYTESWAP(bp), salt, iv, mac, size, data, + zio->io_abd, &no_crypt); + if (no_crypt) + abd_copy(data, zio->io_abd, size); + + if (ret != 0) + goto error; + + return; + +error: + /* assert that the key was found unless this was speculative */ + ASSERT(ret != EACCES || (zio->io_flags & ZIO_FLAG_SPECULATIVE)); + + /* + * If there was a decryption / authentication error return EIO as + * the io_error. If this was not a speculative zio, create an ereport. + */ + if (ret == ECKSUM) { + zio->io_error = SET_ERROR(EIO); + if ((zio->io_flags & ZIO_FLAG_SPECULATIVE) == 0) { + spa_log_error(spa, &zio->io_bookmark); + zfs_ereport_post(FM_EREPORT_ZFS_AUTHENTICATION, + spa, NULL, &zio->io_bookmark, zio, 0, 0); + } + } else { + zio->io_error = ret; + } +} + /* * ========================================================================== * I/O parent/child relationships and pipeline interlocks @@ -565,7 +699,7 @@ zio_create(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp, ASSERT(!bp || !(flags & ZIO_FLAG_CONFIG_WRITER)); ASSERT(vd || stage == ZIO_STAGE_OPEN); - IMPLY(lsize != psize, (flags & ZIO_FLAG_RAW) != 0); + IMPLY(lsize != psize, (flags & ZIO_FLAG_RAW_COMPRESS) != 0); zio = kmem_cache_alloc(zio_cache, KM_SLEEP); bzero(zio, sizeof (zio_t)); @@ -836,9 +970,12 @@ zio_write(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp, * Data can be NULL if we are going to call zio_write_override() to * provide the already-allocated BP. But we may need the data to * verify a dedup hit (if requested). In this case, don't try to - * dedup (just take the already-allocated BP verbatim). + * dedup (just take the already-allocated BP verbatim). Encrypted + * dedup blocks need data as well so we also disable dedup in this + * case. */ - if (data == NULL && zio->io_prop.zp_dedup_verify) { + if (data == NULL && + (zio->io_prop.zp_dedup_verify || zio->io_prop.zp_encrypt)) { zio->io_prop.zp_dedup = zio->io_prop.zp_dedup_verify = B_FALSE; } @@ -1189,23 +1326,30 @@ static int zio_read_bp_init(zio_t *zio) { blkptr_t *bp = zio->io_bp; + uint64_t psize = + BP_IS_EMBEDDED(bp) ? BPE_GET_PSIZE(bp) : BP_GET_PSIZE(bp); ASSERT3P(zio->io_bp, ==, &zio->io_bp_copy); if (BP_GET_COMPRESS(bp) != ZIO_COMPRESS_OFF && zio->io_child_type == ZIO_CHILD_LOGICAL && - !(zio->io_flags & ZIO_FLAG_RAW)) { - uint64_t psize = - BP_IS_EMBEDDED(bp) ? BPE_GET_PSIZE(bp) : BP_GET_PSIZE(bp); + !(zio->io_flags & ZIO_FLAG_RAW_COMPRESS)) { zio_push_transform(zio, abd_alloc_sametype(zio->io_abd, psize), psize, psize, zio_decompress); } - if (BP_IS_EMBEDDED(bp) && BPE_GET_ETYPE(bp) == BP_EMBEDDED_TYPE_DATA) { - zio->io_pipeline = ZIO_INTERLOCK_PIPELINE; + if (((BP_IS_PROTECTED(bp) && !(zio->io_flags & ZIO_FLAG_RAW_ENCRYPT)) || + BP_HAS_INDIRECT_MAC_CKSUM(bp)) && + zio->io_child_type == ZIO_CHILD_LOGICAL) { + zio_push_transform(zio, abd_alloc_sametype(zio->io_abd, psize), + psize, psize, zio_decrypt); + } + if (BP_IS_EMBEDDED(bp) && BPE_GET_ETYPE(bp) == BP_EMBEDDED_TYPE_DATA) { int psize = BPE_GET_PSIZE(bp); void *data = abd_borrow_buf(zio->io_abd, psize); + + zio->io_pipeline = ZIO_INTERLOCK_PIPELINE; decode_embedded_bp_compressed(bp, data); abd_return_buf_copy(zio->io_abd, data, psize); } else { @@ -1266,7 +1410,8 @@ zio_write_bp_init(zio_t *zio) ASSERT((zio_checksum_table[zp->zp_checksum].ci_flags & ZCHECKSUM_FLAG_DEDUP) || zp->zp_dedup_verify); - if (BP_GET_CHECKSUM(bp) == zp->zp_checksum) { + if (BP_GET_CHECKSUM(bp) == zp->zp_checksum && + !zp->zp_encrypt) { BP_SET_DEDUP(bp, 1); zio->io_pipeline |= ZIO_STAGE_DDT_WRITE; return (ZIO_PIPELINE_CONTINUE); @@ -1295,8 +1440,6 @@ zio_write_compress(zio_t *zio) uint64_t psize = zio->io_size; int pass = 1; - EQUIV(lsize != psize, (zio->io_flags & ZIO_FLAG_RAW) != 0); - /* * If our children haven't all reached the ready stage, * wait for them and then repeat this pipeline stage. @@ -1347,13 +1490,15 @@ zio_write_compress(zio_t *zio) } /* If it's a compressed write that is not raw, compress the buffer. */ - if (compress != ZIO_COMPRESS_OFF && psize == lsize) { + if (compress != ZIO_COMPRESS_OFF && + !(zio->io_flags & ZIO_FLAG_RAW_COMPRESS)) { void *cbuf = zio_buf_alloc(lsize); psize = zio_compress_data(compress, zio->io_abd, cbuf, lsize); if (psize == 0 || psize == lsize) { compress = ZIO_COMPRESS_OFF; zio_buf_free(cbuf, lsize); - } else if (!zp->zp_dedup && psize <= BPE_PAYLOAD_SIZE && + } else if (!zp->zp_dedup && !zp->zp_encrypt && + psize <= BPE_PAYLOAD_SIZE && zp->zp_level == 0 && !DMU_OT_HAS_FILL(zp->zp_type) && spa_feature_is_enabled(spa, SPA_FEATURE_EMBEDDED_DATA)) { encode_embedded_bp_compressed(bp, @@ -1400,6 +1545,20 @@ zio_write_compress(zio_t *zio) zio->io_bp_override = NULL; *bp = zio->io_bp_orig; zio->io_pipeline = zio->io_orig_pipeline; + + } else if ((zio->io_flags & ZIO_FLAG_RAW_ENCRYPT) != 0 && + zp->zp_type == DMU_OT_DNODE) { + /* + * The DMU actually relies on the zio layer's compression + * to free metadnode blocks that have had all contained + * dnodes freed. As a result, even when doing a raw + * receive, we must check whether the block can be compressed + * to a hole. + */ + psize = zio_compress_data(ZIO_COMPRESS_EMPTY, + zio->io_abd, NULL, lsize); + if (psize == 0) + compress = ZIO_COMPRESS_OFF; } else { ASSERT3U(psize, !=, 0); } @@ -1417,7 +1576,6 @@ zio_write_compress(zio_t *zio) pass >= zfs_sync_pass_rewrite) { VERIFY3U(psize, !=, 0); enum zio_stage gang_stages = zio->io_pipeline & ZIO_GANG_STAGES; - zio->io_pipeline = ZIO_REWRITE_PIPELINE | gang_stages; zio->io_flags |= ZIO_FLAG_IO_REWRITE; } else { @@ -1447,6 +1605,8 @@ zio_write_compress(zio_t *zio) if (zp->zp_dedup) { ASSERT(zio->io_child_type == ZIO_CHILD_LOGICAL); ASSERT(!(zio->io_flags & ZIO_FLAG_IO_REWRITE)); + ASSERT(!zp->zp_encrypt || + DMU_OT_IS_ENCRYPTED(zp->zp_type)); zio->io_pipeline = ZIO_DDT_WRITE_PIPELINE; } if (zp->zp_nopwrite) { @@ -1794,7 +1954,8 @@ zio_suspend(spa_t *spa, zio_t *zio, zio_suspend_reason_t reason) "failure and the failure mode property for this pool " "is set to panic.", spa_name(spa)); - zfs_ereport_post(FM_EREPORT_ZFS_IO_FAILURE, spa, NULL, NULL, 0, 0); + zfs_ereport_post(FM_EREPORT_ZFS_IO_FAILURE, spa, NULL, + NULL, NULL, 0, 0); mutex_enter(&spa->spa_suspend_lock); @@ -2231,6 +2392,13 @@ zio_write_gang_block(zio_t *pio) int error; boolean_t has_data = !(pio->io_flags & ZIO_FLAG_NODATA); + /* + * encrypted blocks need DVA[2] free so encrypted gang headers can't + * have a third copy. + */ + if (gio->io_prop.zp_encrypt && gbh_copies >= SPA_DVAS_PER_BP) + gbh_copies = SPA_DVAS_PER_BP - 1; + int flags = METASLAB_HINTBP_FAVOR | METASLAB_GANG_HEADER; if (pio->io_flags & ZIO_FLAG_IO_ALLOCATING) { ASSERT(pio->io_priority == ZIO_PRIORITY_ASYNC_WRITE); @@ -2309,6 +2477,11 @@ zio_write_gang_block(zio_t *pio) zp.zp_dedup = B_FALSE; zp.zp_dedup_verify = B_FALSE; zp.zp_nopwrite = B_FALSE; + zp.zp_encrypt = gio->io_prop.zp_encrypt; + zp.zp_byteorder = gio->io_prop.zp_byteorder; + bzero(zp.zp_salt, ZIO_DATA_SALT_LEN); + bzero(zp.zp_iv, ZIO_DATA_IV_LEN); + bzero(zp.zp_mac, ZIO_DATA_MAC_LEN); zio_t *cio = zio_write(zio, spa, txg, &gbh->zg_blkptr[g], has_data ? abd_get_offset(pio->io_abd, pio->io_size - @@ -2383,6 +2556,7 @@ zio_nop_write(zio_t *zio) if (BP_IS_HOLE(bp_orig) || !(zio_checksum_table[BP_GET_CHECKSUM(bp)].ci_flags & ZCHECKSUM_FLAG_NOPWRITE) || + BP_IS_ENCRYPTED(bp) || BP_IS_ENCRYPTED(bp_orig) || BP_GET_CHECKSUM(bp) != BP_GET_CHECKSUM(bp_orig) || BP_GET_COMPRESS(bp) != BP_GET_COMPRESS(bp_orig) || BP_GET_DEDUP(bp) != BP_GET_DEDUP(bp_orig) || @@ -2521,7 +2695,7 @@ static boolean_t zio_ddt_collision(zio_t *zio, ddt_t *ddt, ddt_entry_t *dde) { spa_t *spa = zio->io_spa; - boolean_t do_raw = (zio->io_flags & ZIO_FLAG_RAW); + boolean_t do_raw = !!(zio->io_flags & ZIO_FLAG_RAW); /* We should never get a raw, override zio */ ASSERT(!(zio->io_bp_override && do_raw)); @@ -2531,11 +2705,21 @@ zio_ddt_collision(zio_t *zio, ddt_t *ddt, ddt_entry_t *dde) * because when zio->io_bp is an override bp, we will not have * pushed the I/O transforms. That's an important optimization * because otherwise we'd compress/encrypt all dmu_sync() data twice. + * However, we should never get a raw, override zio so in these + * cases we can compare the io_data directly. This is useful because + * it allows us to do dedup verification even if we don't have access + * to the original data (for instance, if the encryption keys aren't + * loaded). */ + for (int p = DDT_PHYS_SINGLE; p <= DDT_PHYS_TRIPLE; p++) { zio_t *lio = dde->dde_lead_zio[p]; - if (lio != NULL) { + if (lio != NULL && do_raw) { + return (lio->io_size != zio->io_size || + abd_cmp(zio->io_abd, lio->io_abd, + zio->io_size) != 0); + } else if (lio != NULL) { return (lio->io_orig_size != zio->io_orig_size || abd_cmp(zio->io_orig_abd, lio->io_orig_abd, zio->io_orig_size) != 0); @@ -2545,7 +2729,36 @@ zio_ddt_collision(zio_t *zio, ddt_t *ddt, ddt_entry_t *dde) for (int p = DDT_PHYS_SINGLE; p <= DDT_PHYS_TRIPLE; p++) { ddt_phys_t *ddp = &dde->dde_phys[p]; - if (ddp->ddp_phys_birth != 0) { + if (ddp->ddp_phys_birth != 0 && do_raw) { + blkptr_t blk = *zio->io_bp; + uint64_t psize; + abd_t *tmpabd; + int error; + + ddt_bp_fill(ddp, &blk, ddp->ddp_phys_birth); + psize = BP_GET_PSIZE(&blk); + + if (psize != zio->io_size) + return (B_TRUE); + + ddt_exit(ddt); + + tmpabd = abd_alloc_for_io(psize, B_TRUE); + + error = zio_wait(zio_read(NULL, spa, &blk, tmpabd, + psize, NULL, NULL, ZIO_PRIORITY_SYNC_READ, + ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE | + ZIO_FLAG_RAW, &zio->io_bookmark)); + + if (error == 0) { + if (abd_cmp(tmpabd, zio->io_abd, psize) != 0) + error = SET_ERROR(ENOENT); + } + + abd_free(tmpabd); + ddt_enter(ddt); + return (error != 0); + } else if (ddp->ddp_phys_birth != 0) { arc_buf_t *abuf = NULL; arc_flags_t aflags = ARC_FLAG_WAIT; int zio_flags = ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE; @@ -2554,6 +2767,9 @@ zio_ddt_collision(zio_t *zio, ddt_t *ddt, ddt_entry_t *dde) ddt_bp_fill(ddp, &blk, ddp->ddp_phys_birth); + if (BP_GET_LSIZE(&blk) != zio->io_orig_size) + return (B_TRUE); + ddt_exit(ddt); /* @@ -2578,10 +2794,9 @@ zio_ddt_collision(zio_t *zio, ddt_t *ddt, ddt_entry_t *dde) zio_flags, &aflags, &zio->io_bookmark); if (error == 0) { - if (arc_buf_size(abuf) != zio->io_orig_size || - abd_cmp_buf(zio->io_orig_abd, abuf->b_data, + if (abd_cmp_buf(zio->io_orig_abd, abuf->b_data, zio->io_orig_size) != 0) - error = SET_ERROR(EEXIST); + error = SET_ERROR(ENOENT); arc_buf_destroy(abuf, &abuf); } @@ -3048,7 +3263,7 @@ zio_dva_unallocate(zio_t *zio, zio_gang_node_t *gn, blkptr_t *bp) * Try to allocate an intent log block. Return 0 on success, errno on failure. */ int -zio_alloc_zil(spa_t *spa, uint64_t objset, uint64_t txg, blkptr_t *new_bp, +zio_alloc_zil(spa_t *spa, objset_t *os, uint64_t txg, blkptr_t *new_bp, blkptr_t *old_bp, uint64_t size, boolean_t *slog) { int error = 1; @@ -3074,14 +3289,15 @@ zio_alloc_zil(spa_t *spa, uint64_t objset, uint64_t txg, blkptr_t *new_bp, */ error = metaslab_alloc(spa, spa_log_class(spa), size, new_bp, 1, txg, old_bp, METASLAB_HINTBP_AVOID, &io_alloc_list, NULL, - cityhash4(0, 0, 0, objset) % spa->spa_alloc_count); + cityhash4(0, 0, 0, + os->os_dsl_dataset->ds_object) % spa->spa_alloc_count); if (error == 0) { *slog = TRUE; } else { error = metaslab_alloc(spa, spa_normal_class(spa), size, new_bp, 1, txg, old_bp, METASLAB_HINTBP_AVOID, - &io_alloc_list, NULL, cityhash4(0, 0, 0, objset) % - spa->spa_alloc_count); + &io_alloc_list, NULL, cityhash4(0, 0, 0, + os->os_dsl_dataset->ds_object) % spa->spa_alloc_count); if (error == 0) *slog = FALSE; } @@ -3098,6 +3314,23 @@ zio_alloc_zil(spa_t *spa, uint64_t objset, uint64_t txg, blkptr_t *new_bp, BP_SET_LEVEL(new_bp, 0); BP_SET_DEDUP(new_bp, 0); BP_SET_BYTEORDER(new_bp, ZFS_HOST_BYTEORDER); + + /* + * encrypted blocks will require an IV and salt. We generate + * these now since we will not be rewriting the bp at + * rewrite time. + */ + if (os->os_encrypted) { + uint8_t iv[ZIO_DATA_IV_LEN]; + uint8_t salt[ZIO_DATA_SALT_LEN]; + + BP_SET_CRYPT(new_bp, B_TRUE); + VERIFY0(spa_crypt_get_salt(spa, + dmu_objset_id(os), salt)); + VERIFY0(zio_crypt_generate_iv(iv)); + + zio_crypt_encode_params_bp(new_bp, salt, iv); + } } else { zfs_dbgmsg("%s: zil block allocation failure: " "size %llu, error %d", spa_name(spa), size, error); @@ -3332,7 +3565,7 @@ zio_change_priority(zio_t *pio, zio_priority_t priority) */ static void zio_vsd_default_cksum_finish(zio_cksum_report_t *zcr, - const void *good_buf) + const abd_t *good_buf) { /* no processing needed */ zfs_ereport_finish_checksum(zcr, good_buf, zcr->zcr_cbdata, B_FALSE); @@ -3342,14 +3575,14 @@ zio_vsd_default_cksum_finish(zio_cksum_report_t *zcr, void zio_vsd_default_cksum_report(zio_t *zio, zio_cksum_report_t *zcr, void *ignored) { - void *buf = zio_buf_alloc(zio->io_size); + void *abd = abd_alloc_sametype(zio->io_abd, zio->io_size); - abd_copy_to_buf(buf, zio->io_abd, zio->io_size); + abd_copy(abd, zio->io_abd, zio->io_size); zcr->zcr_cbinfo = zio->io_size; - zcr->zcr_cbdata = buf; + zcr->zcr_cbdata = abd; zcr->zcr_finish = zio_vsd_default_cksum_finish; - zcr->zcr_free = zio_buf_free; + zcr->zcr_free = zio_abd_free; } static int @@ -3460,6 +3693,164 @@ zio_vdev_io_bypass(zio_t *zio) /* * ========================================================================== + * Encrypt and store encryption parameters + * ========================================================================== + */ + + +/* + * This function is used for ZIO_STAGE_ENCRYPT. It is responsible for + * managing the storage of encryption parameters and passing them to the + * lower-level encryption functions. + */ +static int +zio_encrypt(zio_t *zio) +{ + zio_prop_t *zp = &zio->io_prop; + spa_t *spa = zio->io_spa; + blkptr_t *bp = zio->io_bp; + uint64_t psize = BP_GET_PSIZE(bp); + uint64_t dsobj = zio->io_bookmark.zb_objset; + dmu_object_type_t ot = BP_GET_TYPE(bp); + void *enc_buf = NULL; + abd_t *eabd = NULL; + uint8_t salt[ZIO_DATA_SALT_LEN]; + uint8_t iv[ZIO_DATA_IV_LEN]; + uint8_t mac[ZIO_DATA_MAC_LEN]; + boolean_t no_crypt = B_FALSE; + + /* the root zio already encrypted the data */ + if (zio->io_child_type == ZIO_CHILD_GANG) + return (ZIO_PIPELINE_CONTINUE); + + /* only ZIL blocks are re-encrypted on rewrite */ + if (!IO_IS_ALLOCATING(zio) && ot != DMU_OT_INTENT_LOG) + return (ZIO_PIPELINE_CONTINUE); + + if (!(zp->zp_encrypt || BP_IS_ENCRYPTED(bp))) { + BP_SET_CRYPT(bp, B_FALSE); + return (ZIO_PIPELINE_CONTINUE); + } + + /* if we are doing raw encryption set the provided encryption params */ + if (zio->io_flags & ZIO_FLAG_RAW_ENCRYPT) { + ASSERT0(BP_GET_LEVEL(bp)); + BP_SET_CRYPT(bp, B_TRUE); + BP_SET_BYTEORDER(bp, zp->zp_byteorder); + if (ot != DMU_OT_OBJSET) + zio_crypt_encode_mac_bp(bp, zp->zp_mac); + + /* dnode blocks must be written out in the provided byteorder */ + if (zp->zp_byteorder != ZFS_HOST_BYTEORDER && + ot == DMU_OT_DNODE) { + void *bswap_buf = zio_buf_alloc(psize); + abd_t *babd = abd_get_from_buf(bswap_buf, psize); + + ASSERT3U(BP_GET_COMPRESS(bp), ==, ZIO_COMPRESS_OFF); + abd_copy_to_buf(bswap_buf, zio->io_abd, psize); + dmu_ot_byteswap[DMU_OT_BYTESWAP(ot)].ob_func(bswap_buf, + psize); + + abd_take_ownership_of_buf(babd, B_TRUE); + zio_push_transform(zio, babd, psize, psize, NULL); + } + + if (DMU_OT_IS_ENCRYPTED(ot)) + zio_crypt_encode_params_bp(bp, zp->zp_salt, zp->zp_iv); + return (ZIO_PIPELINE_CONTINUE); + } + + /* indirect blocks only maintain a cksum of the lower level MACs */ + if (BP_GET_LEVEL(bp) > 0) { + BP_SET_CRYPT(bp, B_TRUE); + VERIFY0(zio_crypt_do_indirect_mac_checksum_abd(B_TRUE, + zio->io_orig_abd, BP_GET_LSIZE(bp), BP_SHOULD_BYTESWAP(bp), + mac)); + zio_crypt_encode_mac_bp(bp, mac); + return (ZIO_PIPELINE_CONTINUE); + } + + /* + * Objset blocks are a special case since they have 2 256-bit MACs + * embedded within them. + */ + if (ot == DMU_OT_OBJSET) { + ASSERT0(DMU_OT_IS_ENCRYPTED(ot)); + ASSERT3U(BP_GET_COMPRESS(bp), ==, ZIO_COMPRESS_OFF); + BP_SET_CRYPT(bp, B_TRUE); + VERIFY0(spa_do_crypt_objset_mac_abd(B_TRUE, spa, dsobj, + zio->io_abd, psize, BP_SHOULD_BYTESWAP(bp))); + return (ZIO_PIPELINE_CONTINUE); + } + + /* unencrypted object types are only authenticated with a MAC */ + if (!DMU_OT_IS_ENCRYPTED(ot)) { + BP_SET_CRYPT(bp, B_TRUE); + VERIFY0(spa_do_crypt_mac_abd(B_TRUE, spa, dsobj, + zio->io_abd, psize, mac)); + zio_crypt_encode_mac_bp(bp, mac); + return (ZIO_PIPELINE_CONTINUE); + } + + /* + * Later passes of sync-to-convergence may decide to rewrite data + * in place to avoid more disk reallocations. This presents a problem + * for encryption because this consitutes rewriting the new data with + * the same encryption key and IV. However, this only applies to blocks + * in the MOS (particularly the spacemaps) and we do not encrypt the + * MOS. We assert that the zio is allocating or an intent log write + * to enforce this. + */ + ASSERT(IO_IS_ALLOCATING(zio) || ot == DMU_OT_INTENT_LOG); + ASSERT(BP_GET_LEVEL(bp) == 0 || ot == DMU_OT_INTENT_LOG); + ASSERT(spa_feature_is_active(spa, SPA_FEATURE_ENCRYPTION)); + ASSERT3U(psize, !=, 0); + + enc_buf = zio_buf_alloc(psize); + eabd = abd_get_from_buf(enc_buf, psize); + abd_take_ownership_of_buf(eabd, B_TRUE); + + /* + * For an explanation of what encryption parameters are stored + * where, see the block comment in zio_crypt.c. + */ + if (ot == DMU_OT_INTENT_LOG) { + zio_crypt_decode_params_bp(bp, salt, iv); + } else { + BP_SET_CRYPT(bp, B_TRUE); + } + + /* Perform the encryption. This should not fail */ + VERIFY0(spa_do_crypt_abd(B_TRUE, spa, &zio->io_bookmark, + BP_GET_TYPE(bp), BP_GET_DEDUP(bp), BP_SHOULD_BYTESWAP(bp), + salt, iv, mac, psize, zio->io_abd, eabd, &no_crypt)); + + /* encode encryption metadata into the bp */ + if (ot == DMU_OT_INTENT_LOG) { + /* + * ZIL blocks store the MAC in the embedded checksum, so the + * transform must always be applied. + */ + zio_crypt_encode_mac_zil(enc_buf, mac); + zio_push_transform(zio, eabd, psize, psize, NULL); + } else { + BP_SET_CRYPT(bp, B_TRUE); + zio_crypt_encode_params_bp(bp, salt, iv); + zio_crypt_encode_mac_bp(bp, mac); + + if (no_crypt) { + ASSERT3U(ot, ==, DMU_OT_DNODE); + abd_free(eabd); + } else { + zio_push_transform(zio, eabd, psize, psize, NULL); + } + } + + return (ZIO_PIPELINE_CONTINUE); +} + +/* + * ========================================================================== * Generate and verify checksums * ========================================================================== */ @@ -3519,8 +3910,8 @@ zio_checksum_verify(zio_t *zio) if (error == ECKSUM && !(zio->io_flags & ZIO_FLAG_SPECULATIVE)) { zfs_ereport_start_checksum(zio->io_spa, - zio->io_vd, zio, zio->io_offset, - zio->io_size, NULL, &info); + zio->io_vd, &zio->io_bookmark, zio, + zio->io_offset, zio->io_size, NULL, &info); } } @@ -3765,7 +4156,6 @@ zio_done(zio_t *zio) if (zio->io_type == ZIO_TYPE_WRITE && !BP_IS_HOLE(bp) && zio->io_bp_override == NULL && !(zio->io_flags & ZIO_FLAG_IO_REPAIR)) { - ASSERT(!BP_SHOULD_BYTESWAP(bp)); ASSERT3U(zio->io_prop.zp_copies, <=, BP_GET_NDVAS(bp)); ASSERT(BP_COUNT_GANG(bp) == 0 || (BP_COUNT_GANG(bp) == BP_GET_NDVAS(bp))); @@ -3790,26 +4180,19 @@ zio_done(zio_t *zio) zio_cksum_report_t *zcr = zio->io_cksum_report; uint64_t align = zcr->zcr_align; uint64_t asize = P2ROUNDUP(psize, align); - char *abuf = NULL; abd_t *adata = zio->io_abd; if (asize != psize) { - adata = abd_alloc_linear(asize, B_TRUE); + adata = abd_alloc(asize, B_TRUE); abd_copy(adata, zio->io_abd, psize); abd_zero_off(adata, psize, asize - psize); } - if (adata != NULL) - abuf = abd_borrow_buf_copy(adata, asize); - zio->io_cksum_report = zcr->zcr_next; zcr->zcr_next = NULL; - zcr->zcr_finish(zcr, abuf); + zcr->zcr_finish(zcr, adata); zfs_ereport_free_checksum(zcr); - if (adata != NULL) - abd_return_buf(adata, abuf, asize); - if (asize != psize) abd_free(adata); } @@ -3827,7 +4210,8 @@ zio_done(zio_t *zio) * device is currently unavailable. */ if (zio->io_error != ECKSUM && vd != NULL && !vdev_is_dead(vd)) - zfs_ereport_post(FM_EREPORT_ZFS_IO, spa, vd, zio, 0, 0); + zfs_ereport_post(FM_EREPORT_ZFS_IO, spa, vd, + &zio->io_bookmark, zio, 0, 0); if ((zio->io_error == EIO || !(zio->io_flags & (ZIO_FLAG_SPECULATIVE | ZIO_FLAG_DONT_PROPAGATE))) && @@ -3836,9 +4220,9 @@ zio_done(zio_t *zio) * For logical I/O requests, tell the SPA to log the * error and generate a logical data ereport. */ - spa_log_error(spa, zio); - zfs_ereport_post(FM_EREPORT_ZFS_DATA, spa, NULL, zio, - 0, 0); + spa_log_error(spa, &zio->io_bookmark); + zfs_ereport_post(FM_EREPORT_ZFS_DATA, spa, NULL, + &zio->io_bookmark, zio, 0, 0); } } @@ -4029,6 +4413,7 @@ static zio_pipe_stage_t *zio_pipeline[] = { zio_free_bp_init, zio_issue_async, zio_write_compress, + zio_encrypt, zio_checksum_generate, zio_nop_write, zio_ddt_read_start, diff --git a/usr/src/uts/common/fs/zfs/zio_checksum.c b/usr/src/uts/common/fs/zfs/zio_checksum.c index e1c98b0b99..d5aa9303b8 100644 --- a/usr/src/uts/common/fs/zfs/zio_checksum.c +++ b/usr/src/uts/common/fs/zfs/zio_checksum.c @@ -242,9 +242,9 @@ zio_checksum_dedup_select(spa_t *spa, enum zio_checksum child, * a tuple which is guaranteed to be unique for the life of the pool. */ static void -zio_checksum_gang_verifier(zio_cksum_t *zcp, blkptr_t *bp) +zio_checksum_gang_verifier(zio_cksum_t *zcp, const blkptr_t *bp) { - dva_t *dva = BP_IDENTITY(bp); + const dva_t *dva = BP_IDENTITY(bp); uint64_t txg = BP_PHYSICAL_BIRTH(bp); ASSERT(BP_IS_GANG(bp)); @@ -287,6 +287,25 @@ zio_checksum_template_init(enum zio_checksum checksum, spa_t *spa) mutex_exit(&spa->spa_cksum_tmpls_lock); } +/* convenience function to update a checksum to accomodate an encryption MAC */ +static void +zio_checksum_handle_crypt(zio_cksum_t *cksum, zio_cksum_t *saved, boolean_t xor) +{ + /* + * Weak checksums do not have their entropy spread evenly + * across the bits of the checksum. Therefore, when truncating + * a weak checksum we XOR the first 2 words with the last 2 so + * that we don't "lose" any entropy unnecessarily. + */ + if (xor) { + cksum->zc_word[0] ^= cksum->zc_word[2]; + cksum->zc_word[1] ^= cksum->zc_word[3]; + } + + cksum->zc_word[2] = saved->zc_word[2]; + cksum->zc_word[3] = saved->zc_word[3]; +} + /* * Generate the checksum. */ @@ -294,11 +313,13 @@ void zio_checksum_compute(zio_t *zio, enum zio_checksum checksum, abd_t *abd, uint64_t size) { + static const uint64_t zec_magic = ZEC_MAGIC; blkptr_t *bp = zio->io_bp; uint64_t offset = zio->io_offset; zio_checksum_info_t *ci = &zio_checksum_table[checksum]; - zio_cksum_t cksum; + zio_cksum_t cksum, saved; spa_t *spa = zio->io_spa; + boolean_t insecure = (ci->ci_flags & ZCHECKSUM_FLAG_DEDUP) == 0; ASSERT((uint_t)checksum < ZIO_CHECKSUM_FUNCTIONS); ASSERT(ci->ci_func[0] != NULL); @@ -306,40 +327,68 @@ zio_checksum_compute(zio_t *zio, enum zio_checksum checksum, zio_checksum_template_init(checksum, spa); if (ci->ci_flags & ZCHECKSUM_FLAG_EMBEDDED) { - zio_eck_t *eck; - void *data = abd_to_buf(abd); + zio_eck_t eck; + size_t eck_offset; + + bzero(&saved, sizeof (zio_cksum_t)); if (checksum == ZIO_CHECKSUM_ZILOG2) { - zil_chain_t *zilc = data; + zil_chain_t zilc; + abd_copy_to_buf(&zilc, abd, sizeof (zil_chain_t)); - size = P2ROUNDUP_TYPED(zilc->zc_nused, ZIL_MIN_BLKSZ, + size = P2ROUNDUP_TYPED(zilc.zc_nused, ZIL_MIN_BLKSZ, uint64_t); - eck = &zilc->zc_eck; + eck = zilc.zc_eck; + eck_offset = offsetof(zil_chain_t, zc_eck); } else { - eck = (zio_eck_t *)((char *)data + size) - 1; + eck_offset = size - sizeof (zio_eck_t); + abd_copy_to_buf_off(&eck, abd, eck_offset, + sizeof (zio_eck_t)); } - if (checksum == ZIO_CHECKSUM_GANG_HEADER) - zio_checksum_gang_verifier(&eck->zec_cksum, bp); - else if (checksum == ZIO_CHECKSUM_LABEL) - zio_checksum_label_verifier(&eck->zec_cksum, offset); - else - bp->blk_cksum = eck->zec_cksum; - eck->zec_magic = ZEC_MAGIC; + + if (checksum == ZIO_CHECKSUM_GANG_HEADER) { + zio_checksum_gang_verifier(&eck.zec_cksum, bp); + } else if (checksum == ZIO_CHECKSUM_LABEL) { + zio_checksum_label_verifier(&eck.zec_cksum, offset); + } else { + saved = eck.zec_cksum; + eck.zec_cksum = bp->blk_cksum; + } + + abd_copy_from_buf_off(abd, &zec_magic, + eck_offset + offsetof(zio_eck_t, zec_magic), + sizeof (zec_magic)); + abd_copy_from_buf_off(abd, &eck.zec_cksum, + eck_offset + offsetof(zio_eck_t, zec_cksum), + sizeof (zio_cksum_t)); + ci->ci_func[0](abd, size, spa->spa_cksum_tmpls[checksum], &cksum); - eck->zec_cksum = cksum; + if (bp != NULL && BP_USES_CRYPT(bp) && + BP_GET_TYPE(bp) != DMU_OT_OBJSET) + zio_checksum_handle_crypt(&cksum, &saved, insecure); + + abd_copy_from_buf_off(abd, &cksum, + eck_offset + offsetof(zio_eck_t, zec_cksum), + sizeof (zio_cksum_t)); } else { + saved = bp->blk_cksum; ci->ci_func[0](abd, size, spa->spa_cksum_tmpls[checksum], - &bp->blk_cksum); + &cksum); + if (BP_USES_CRYPT(bp) && BP_GET_TYPE(bp) != DMU_OT_OBJSET) + zio_checksum_handle_crypt(&cksum, &saved, insecure); + bp->blk_cksum = cksum; } } int -zio_checksum_error_impl(spa_t *spa, blkptr_t *bp, enum zio_checksum checksum, - abd_t *abd, uint64_t size, uint64_t offset, zio_bad_cksum_t *info) +zio_checksum_error_impl(spa_t *spa, const blkptr_t *bp, + enum zio_checksum checksum, abd_t *abd, uint64_t size, + uint64_t offset, zio_bad_cksum_t *info) { zio_checksum_info_t *ci = &zio_checksum_table[checksum]; zio_cksum_t actual_cksum, expected_cksum; + zio_eck_t eck; int byteswap; if (checksum >= ZIO_CHECKSUM_FUNCTIONS || ci->ci_func[0] == NULL) @@ -348,33 +397,37 @@ zio_checksum_error_impl(spa_t *spa, blkptr_t *bp, enum zio_checksum checksum, zio_checksum_template_init(checksum, spa); if (ci->ci_flags & ZCHECKSUM_FLAG_EMBEDDED) { - zio_eck_t *eck; zio_cksum_t verifier; - uint64_t data_size = size; - void *data = abd_borrow_buf_copy(abd, data_size); + size_t eck_offset; if (checksum == ZIO_CHECKSUM_ZILOG2) { - zil_chain_t *zilc = data; + zil_chain_t zilc; uint64_t nused; - eck = &zilc->zc_eck; - if (eck->zec_magic == ZEC_MAGIC) { - nused = zilc->zc_nused; - } else if (eck->zec_magic == BSWAP_64(ZEC_MAGIC)) { - nused = BSWAP_64(zilc->zc_nused); + abd_copy_to_buf(&zilc, abd, sizeof (zil_chain_t)); + + eck = zilc.zc_eck; + eck_offset = offsetof(zil_chain_t, zc_eck) + + offsetof(zio_eck_t, zec_cksum); + + if (eck.zec_magic == ZEC_MAGIC) { + nused = zilc.zc_nused; + } else if (eck.zec_magic == BSWAP_64(ZEC_MAGIC)) { + nused = BSWAP_64(zilc.zc_nused); } else { - abd_return_buf(abd, data, data_size); return (SET_ERROR(ECKSUM)); } - if (nused > data_size) { - abd_return_buf(abd, data, data_size); + if (nused > size) { return (SET_ERROR(ECKSUM)); } size = P2ROUNDUP_TYPED(nused, ZIL_MIN_BLKSZ, uint64_t); } else { - eck = (zio_eck_t *)((char *)data + data_size) - 1; + eck_offset = size - sizeof (zio_eck_t); + abd_copy_to_buf_off(&eck, abd, eck_offset, + sizeof (zio_eck_t)); + eck_offset += offsetof(zio_eck_t, zec_cksum); } if (checksum == ZIO_CHECKSUM_GANG_HEADER) @@ -384,20 +437,21 @@ zio_checksum_error_impl(spa_t *spa, blkptr_t *bp, enum zio_checksum checksum, else verifier = bp->blk_cksum; - byteswap = (eck->zec_magic == BSWAP_64(ZEC_MAGIC)); + byteswap = (eck.zec_magic == BSWAP_64(ZEC_MAGIC)); if (byteswap) byteswap_uint64_array(&verifier, sizeof (zio_cksum_t)); - size_t eck_offset = (size_t)(&eck->zec_cksum) - (size_t)data; - expected_cksum = eck->zec_cksum; - eck->zec_cksum = verifier; - abd_return_buf_copy(abd, data, data_size); + expected_cksum = eck.zec_cksum; + + abd_copy_from_buf_off(abd, &verifier, eck_offset, + sizeof (zio_cksum_t)); ci->ci_func[byteswap](abd, size, spa->spa_cksum_tmpls[checksum], &actual_cksum); - abd_copy_from_buf_off(abd, &expected_cksum, - eck_offset, sizeof (zio_cksum_t)); + + abd_copy_from_buf_off(abd, &expected_cksum, eck_offset, + sizeof (zio_cksum_t)); if (byteswap) { byteswap_uint64_array(&expected_cksum, @@ -410,6 +464,26 @@ zio_checksum_error_impl(spa_t *spa, blkptr_t *bp, enum zio_checksum checksum, spa->spa_cksum_tmpls[checksum], &actual_cksum); } + /* + * MAC checksums are a special case since half of this checksum will + * actually be the encryption MAC. This will be verified by the + * decryption process, so we just check the truncated checksum now. + * Objset blocks use embedded MACs so we don't truncate the checksum + * for them. + */ + if (bp != NULL && BP_USES_CRYPT(bp) && + BP_GET_TYPE(bp) != DMU_OT_OBJSET) { + if (!(ci->ci_flags & ZCHECKSUM_FLAG_DEDUP)) { + actual_cksum.zc_word[0] ^= actual_cksum.zc_word[2]; + actual_cksum.zc_word[1] ^= actual_cksum.zc_word[3]; + } + + actual_cksum.zc_word[2] = 0; + actual_cksum.zc_word[3] = 0; + expected_cksum.zc_word[2] = 0; + expected_cksum.zc_word[3] = 0; + } + if (info != NULL) { info->zbc_expected = expected_cksum; info->zbc_actual = actual_cksum; @@ -418,7 +492,6 @@ zio_checksum_error_impl(spa_t *spa, blkptr_t *bp, enum zio_checksum checksum, info->zbc_injected = 0; info->zbc_has_cksum = 1; } - if (!ZIO_CHECKSUM_EQUAL(actual_cksum, expected_cksum)) return (SET_ERROR(ECKSUM)); diff --git a/usr/src/uts/common/fs/zfs/zio_compress.c b/usr/src/uts/common/fs/zfs/zio_compress.c index 9882806a7d..4ae2581e3b 100644 --- a/usr/src/uts/common/fs/zfs/zio_compress.c +++ b/usr/src/uts/common/fs/zfs/zio_compress.c @@ -144,20 +144,31 @@ zio_decompress_data_buf(enum zio_compress c, void *src, void *dst, return (ci->ci_decompress(src, dst, s_len, d_len, ci->ci_level)); } +void *zio_decompress_failed_buf; + int zio_decompress_data(enum zio_compress c, abd_t *src, void *dst, size_t s_len, size_t d_len) { void *tmp = abd_borrow_buf_copy(src, s_len); int ret = zio_decompress_data_buf(c, tmp, dst, s_len, d_len); - abd_return_buf(src, tmp, s_len); /* - * Decompression shouldn't fail, because we've already verifyied + * Decompression shouldn't fail, because we've already verified * the checksum. However, for extra protection (e.g. against bitflips * in non-ECC RAM), we handle this error (and test it). */ - ASSERT0(ret); + if (ret != 0) { + zio_decompress_failed_buf = kmem_alloc(s_len, KM_SLEEP); + bcopy(tmp, zio_decompress_failed_buf, s_len); + panic("decompression failed " + "err=%u c=%u buf=%p s_len=%u d_len=%u", + ret, (int)c, zio_decompress_failed_buf, + (int)s_len, (int)d_len); + } + + abd_return_buf(src, tmp, s_len); + if (zio_decompress_fail_fraction != 0 && spa_get_random(zio_decompress_fail_fraction) == 0) ret = SET_ERROR(EINVAL); diff --git a/usr/src/uts/common/fs/zfs/zio_crypt.c b/usr/src/uts/common/fs/zfs/zio_crypt.c new file mode 100644 index 0000000000..1d6b8286e3 --- /dev/null +++ b/usr/src/uts/common/fs/zfs/zio_crypt.c @@ -0,0 +1,2009 @@ +/* + * CDDL HEADER START + * + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2017, Datto, Inc. All rights reserved. + */ + +#include <sys/zio_crypt.h> +#include <sys/dmu.h> +#include <sys/dmu_objset.h> +#include <sys/dnode.h> +#include <sys/fs/zfs.h> +#include <sys/zio.h> +#include <sys/zil.h> +#include <sys/sha2.h> +#include <sys/hkdf.h> + +/* + * This file is responsible for handling all of the details of generating + * encryption parameters and performing encryption and authentication. + * + * BLOCK ENCRYPTION PARAMETERS: + * Encryption /Authentication Algorithm Suite (crypt): + * The encryption algorithm, mode, and key length we are going to use. We + * currently support AES in either GCM or CCM modes with 128, 192, and 256 bit + * keys. All authentication is currently done with SHA512-HMAC. + * + * Plaintext: + * The unencrypted data that we want to encrypt. + * + * Initialization Vector (IV): + * An initialization vector for the encryption algorithms. This is used to + * "tweak" the encryption algorithms so that two blocks of the same data are + * encrypted into different ciphertext outputs, thus obfuscating block patterns. + * The supported encryption modes (AES-GCM and AES-CCM) require that an IV is + * never reused with the same encryption key. This value is stored unencrypted + * and must simply be provided to the decryption function. We use a 96 bit IV + * (as recommended by NIST) for all block encryption. For non-dedup blocks we + * derive the IV randomly. The first 64 bits of the IV are stored in the second + * word of DVA[2] and the remaining 32 bits are stored in the upper 32 bits of + * blk_fill. This is safe because encrypted blocks can't use the upper 32 bits + * of blk_fill. We only encrypt level 0 blocks, which normally have a fill count + * of 1. The only exception is for DMU_OT_DNODE objects, where the fill count of + * level 0 blocks is the number of allocated dnodes in that block. The on-disk + * format supports at most 2^15 slots per L0 dnode block, because the maximum + * block size is 16MB (2^24). In either case, for level 0 blocks this number + * will still be smaller than UINT32_MAX so it is safe to store the IV in the + * top 32 bits of blk_fill, while leaving the bottom 32 bits of the fill count + * for the dnode code. + * + * Master key: + * This is the most important secret data of an encrypted dataset. It is used + * along with the salt to generate that actual encryption keys via HKDF. We + * do not use the master key to directly encrypt any data because there are + * theoretical limits on how much data can actually be safely encrypted with + * any encryption mode. The master key is stored encrypted on disk with the + * user's wrapping key. Its length is determined by the encryption algorithm. + * For details on how this is stored see the block comment in dsl_crypt.c + * + * Salt: + * Used as an input to the HKDF function, along with the master key. We use a + * 64 bit salt, stored unencrypted in the first word of DVA[2]. Any given salt + * can be used for encrypting many blocks, so we cache the current salt and the + * associated derived key in zio_crypt_t so we do not need to derive it again + * needlessly. + * + * Encryption Key: + * A secret binary key, generated from an HKDF function used to encrypt and + * decrypt data. + * + * Message Authenication Code (MAC) + * The MAC is an output of authenticated encryption modes such as AES-GCM and + * AES-CCM. Its purpose is to ensure that an attacker cannot modify encrypted + * data on disk and return garbage to the application. Effectively, it is a + * checksum that can not be reproduced by an attacker. We store the MAC in the + * second 128 bits of blk_cksum, leaving the first 128 bits for a truncated + * regular checksum of the ciphertext which can be used for scrubbing. + * + * OBJECT AUTHENTICATION: + * Some object types, such as DMU_OT_MASTER_NODE cannot be encrypted because + * they contain some info that always needs to be readable. To prevent this + * data from being altered, we authenticate this data using SHA512-HMAC. This + * will produce a MAC (similar to the one produced via encryption) which can + * be used to verify the object was not modified. HMACs do not require key + * rotation or IVs, so we can keep up to the full 3 copies of authenticated + * data. + * + * ZIL ENCRYPTION: + * ZIL blocks have their bp written to disk ahead of the associated data, so we + * cannot store the MAC there as we normally do. For these blocks the MAC is + * stored in the embedded checksum within the zil_chain_t header. The salt and + * IV are generated for the block on bp allocation instead of at encryption + * time. In addition, ZIL blocks have some pieces that must be left in plaintext + * for claiming even though all of the sensitive user data still needs to be + * encrypted. The function zio_crypt_init_uios_zil() handles parsing which + * pieces of the block need to be encrypted. All data that is not encrypted is + * authenticated using the AAD mechanisms that the supported encryption modes + * provide for. In order to preserve the semantics of the ZIL for encrypted + * datasets, the ZIL is not protected at the objset level as described below. + * + * DNODE ENCRYPTION: + * Similarly to ZIL blocks, the core part of each dnode_phys_t needs to be left + * in plaintext for scrubbing and claiming, but the bonus buffers might contain + * sensitive user data. The function zio_crypt_init_uios_dnode() handles parsing + * which pieces of the block need to be encrypted. For more details about + * dnode authentication and encryption, see zio_crypt_init_uios_dnode(). + * + * OBJECT SET AUTHENTICATION: + * Up to this point, everything we have encrypted and authenticated has been + * at level 0 (or -2 for the ZIL). If we did not do any further work the + * on-disk format would be susceptible to attacks that deleted or rearrannged + * the order of level 0 blocks. Ideally, the cleanest solution would be to + * maintain a tree of authentication MACs going up the bp tree. However, this + * presents a problem for raw sends. Send files do not send information about + * indirect blocks so there would be no convenient way to transfer the MACs and + * they cannot be recalculated on the receive side without the master key which + * would defeat one of the purposes of raw sends in the first place. Instead, + * for the indirect levels of the bp tree, we use a regular SHA512 of the MACs + * from the level below. We also include some portable fields from blk_prop such + * as the lsize and compression algorithm to prevent the data from being + * misinterpretted. + * + * At the objset level, we maintain 2 seperate 256 bit MACs in the + * objset_phys_t. The first one is "portable" and is the logical root of the + * MAC tree maintianed in the metadnode's bps. The second, is "local" and is + * used as the root MAC for the user accounting objects, which are also not + * transferred via "zfs send". The portable MAC is sent in the DRR_BEGIN payload + * of the send file. The useraccounting code ensures that the useraccounting + * info is not present upon a receive, so the local MAC can simply be cleared + * out at that time. For more info about objset_phys_t authentication, see + * zio_crypt_do_objset_hmacs(). + * + * CONSIDERATIONS FOR DEDUP: + * In order for dedup to work, blocks that we want to dedup with one another + * need to use the same IV and encryption key, so that they will have the same + * ciphertext. Normally, one should never reuse an IV with the same encryption + * key or else AES-GCM and AES-CCM can both actually leak the plaintext of both + * blocks. In this case, however, since we are using the same plaindata as + * well all that we end up with is a duplicate of the original ciphertext we + * already had. As a result, an attacker with read access to the raw disk will + * be able to tell which blocks are the same but this information is given away + * by dedup anyway. In order to get the same IVs and encryption keys for + * equivalent blocks of data we use an HMAC of the plaindata. We use an HMAC + * here so that a reproducible checksum of the plaindata is never available to + * the attacker. The HMAC key is kept alongside the master key, encrypted on + * disk. The first 64 bits of the HMAC are used in place of the random salt, and + * the next 96 bits are used as the IV. As a result of this mechanism, dedup + * will only work within a clone family since encrypted dedup requires use of + * the same master and HMAC keys. + */ + +/* + * After encrypting many blocks with the same key we may start to run up + * against the theoretical limits of how much data can securely be encrypted + * with a single key using the supported encryption modes. The most obvious + * limitation is that our risk of generating 2 equivalent 96 bit IVs increases + * the more IVs we generate (which both GCM and CCM modes strictly forbid). + * This risk actually grows surprisingly quickly over time according to the + * Birthday Problem. With a total IV space of 2^(96 bits), and assuming we have + * generated n IVs with a cryptographically secure RNG, the approximate + * probability p(n) of a collision is given as: + * + * p(n) ~= e^(-n*(n-1)/(2*(2^96))) + * + * [http://www.math.cornell.edu/~mec/2008-2009/TianyiZheng/Birthday.html] + * + * Assuming that we want to ensure that p(n) never goes over 1 / 1 trillion + * we must not write more than 398,065,730 blocks with the same encryption key. + * Therefore, we rotate our keys after 400,000,000 blocks have been written by + * generating a new random 64 bit salt for our HKDF encryption key generation + * function. + */ +#define ZFS_KEY_MAX_SALT_USES_DEFAULT 400000000 +#define ZFS_CURRENT_MAX_SALT_USES \ + (MIN(zfs_key_max_salt_uses, ZFS_KEY_MAX_SALT_USES_DEFAULT)) +unsigned long zfs_key_max_salt_uses = ZFS_KEY_MAX_SALT_USES_DEFAULT; + +/* + * Set to a nonzero value to cause zio_do_crypt_uio() to fail 1/this many + * calls, to test decryption error handling code paths. + */ +uint64_t zio_decrypt_fail_fraction = 0; + +typedef struct blkptr_auth_buf { + uint64_t bab_prop; /* blk_prop - portable mask */ + uint8_t bab_mac[ZIO_DATA_MAC_LEN]; /* MAC from blk_cksum */ + uint64_t bab_pad; /* reserved for future use */ +} blkptr_auth_buf_t; + +zio_crypt_info_t zio_crypt_table[ZIO_CRYPT_FUNCTIONS] = { + {"", ZC_TYPE_NONE, 0, "inherit"}, + {"", ZC_TYPE_NONE, 0, "on"}, + {"", ZC_TYPE_NONE, 0, "off"}, + {SUN_CKM_AES_CCM, ZC_TYPE_CCM, 16, "aes-128-ccm"}, + {SUN_CKM_AES_CCM, ZC_TYPE_CCM, 24, "aes-192-ccm"}, + {SUN_CKM_AES_CCM, ZC_TYPE_CCM, 32, "aes-256-ccm"}, + {SUN_CKM_AES_GCM, ZC_TYPE_GCM, 16, "aes-128-gcm"}, + {SUN_CKM_AES_GCM, ZC_TYPE_GCM, 24, "aes-192-gcm"}, + {SUN_CKM_AES_GCM, ZC_TYPE_GCM, 32, "aes-256-gcm"} +}; + +void +zio_crypt_key_destroy(zio_crypt_key_t *key) +{ + rw_destroy(&key->zk_salt_lock); + + /* free crypto templates */ + crypto_destroy_ctx_template(key->zk_current_tmpl); + crypto_destroy_ctx_template(key->zk_hmac_tmpl); + + /* zero out sensitive data */ + bzero(key, sizeof (zio_crypt_key_t)); +} + +int +zio_crypt_key_init(uint64_t crypt, zio_crypt_key_t *key) +{ + int ret; + crypto_mechanism_t mech; + uint_t keydata_len; + + ASSERT(key != NULL); + ASSERT3U(crypt, <, ZIO_CRYPT_FUNCTIONS); + + keydata_len = zio_crypt_table[crypt].ci_keylen; + bzero(key, sizeof (zio_crypt_key_t)); + + /* fill keydata buffers and salt with random data */ + ret = random_get_bytes((uint8_t *)&key->zk_guid, sizeof (uint64_t)); + if (ret != 0) + goto error; + + ret = random_get_bytes(key->zk_master_keydata, keydata_len); + if (ret != 0) + goto error; + + ret = random_get_bytes(key->zk_hmac_keydata, SHA512_HMAC_KEYLEN); + if (ret != 0) + goto error; + + ret = random_get_bytes(key->zk_salt, ZIO_DATA_SALT_LEN); + if (ret != 0) + goto error; + + /* derive the current key from the master key */ + ret = hkdf_sha512(key->zk_master_keydata, keydata_len, NULL, 0, + key->zk_salt, ZIO_DATA_SALT_LEN, key->zk_current_keydata, + keydata_len); + if (ret != 0) + goto error; + + /* initialize keys for the ICP */ + key->zk_current_key.ck_format = CRYPTO_KEY_RAW; + key->zk_current_key.ck_data = key->zk_current_keydata; + key->zk_current_key.ck_length = CRYPTO_BYTES2BITS(keydata_len); + + key->zk_hmac_key.ck_format = CRYPTO_KEY_RAW; + key->zk_hmac_key.ck_data = &key->zk_hmac_key; + key->zk_hmac_key.ck_length = CRYPTO_BYTES2BITS(SHA512_HMAC_KEYLEN); + + /* + * Initialize the crypto templates. It's ok if this fails because + * this is just an optimization. + */ + mech.cm_type = crypto_mech2id(zio_crypt_table[crypt].ci_mechname); + ret = crypto_create_ctx_template(&mech, &key->zk_current_key, + &key->zk_current_tmpl, KM_SLEEP); + if (ret != CRYPTO_SUCCESS) + key->zk_current_tmpl = NULL; + + mech.cm_type = crypto_mech2id(SUN_CKM_SHA512_HMAC); + ret = crypto_create_ctx_template(&mech, &key->zk_hmac_key, + &key->zk_hmac_tmpl, KM_SLEEP); + if (ret != CRYPTO_SUCCESS) + key->zk_hmac_tmpl = NULL; + + key->zk_crypt = crypt; + key->zk_version = ZIO_CRYPT_KEY_CURRENT_VERSION; + key->zk_salt_count = 0; + rw_init(&key->zk_salt_lock, NULL, RW_DEFAULT, NULL); + + return (0); + +error: + zio_crypt_key_destroy(key); + return (ret); +} + +static int +zio_crypt_key_change_salt(zio_crypt_key_t *key) +{ + int ret = 0; + uint8_t salt[ZIO_DATA_SALT_LEN]; + crypto_mechanism_t mech; + uint_t keydata_len = zio_crypt_table[key->zk_crypt].ci_keylen; + + /* generate a new salt */ + ret = random_get_bytes(salt, ZIO_DATA_SALT_LEN); + if (ret != 0) + goto error; + + rw_enter(&key->zk_salt_lock, RW_WRITER); + + /* someone beat us to the salt rotation, just unlock and return */ + if (key->zk_salt_count < ZFS_CURRENT_MAX_SALT_USES) + goto out_unlock; + + /* derive the current key from the master key and the new salt */ + ret = hkdf_sha512(key->zk_master_keydata, keydata_len, NULL, 0, + salt, ZIO_DATA_SALT_LEN, key->zk_current_keydata, keydata_len); + if (ret != 0) + goto out_unlock; + + /* assign the salt and reset the usage count */ + bcopy(salt, key->zk_salt, ZIO_DATA_SALT_LEN); + key->zk_salt_count = 0; + + /* destroy the old context template and create the new one */ + crypto_destroy_ctx_template(key->zk_current_tmpl); + ret = crypto_create_ctx_template(&mech, &key->zk_current_key, + &key->zk_current_tmpl, KM_SLEEP); + if (ret != CRYPTO_SUCCESS) + key->zk_current_tmpl = NULL; + + rw_exit(&key->zk_salt_lock); + + return (0); + +out_unlock: + rw_exit(&key->zk_salt_lock); +error: + return (ret); +} + +/* See comment above zfs_key_max_salt_uses definition for details */ +int +zio_crypt_key_get_salt(zio_crypt_key_t *key, uint8_t *salt) +{ + int ret; + boolean_t salt_change; + + rw_enter(&key->zk_salt_lock, RW_READER); + + bcopy(key->zk_salt, salt, ZIO_DATA_SALT_LEN); + salt_change = (atomic_inc_64_nv(&key->zk_salt_count) >= + ZFS_CURRENT_MAX_SALT_USES); + + rw_exit(&key->zk_salt_lock); + + if (salt_change) { + ret = zio_crypt_key_change_salt(key); + if (ret != 0) + goto error; + } + + return (0); + +error: + return (ret); +} + +void *failed_decrypt_buf; +int failed_decrypt_size; + +/* + * This function handles all encryption and decryption in zfs. When + * encrypting it expects puio to reference the plaintext and cuio to + * reference the cphertext. cuio must have enough space for the + * ciphertext + room for a MAC. datalen should be the length of the + * plaintext / ciphertext alone. + */ +/* ARGSUSED */ +static int +zio_do_crypt_uio(boolean_t encrypt, uint64_t crypt, crypto_key_t *key, + crypto_ctx_template_t tmpl, uint8_t *ivbuf, uint_t datalen, + uio_t *puio, uio_t *cuio, uint8_t *authbuf, uint_t auth_len) +{ + int ret; + crypto_data_t plaindata, cipherdata; + CK_AES_CCM_PARAMS ccmp; + CK_AES_GCM_PARAMS gcmp; + crypto_mechanism_t mech; + zio_crypt_info_t crypt_info; + uint_t plain_full_len, maclen; + + ASSERT3U(crypt, <, ZIO_CRYPT_FUNCTIONS); + ASSERT3U(key->ck_format, ==, CRYPTO_KEY_RAW); + + /* lookup the encryption info */ + crypt_info = zio_crypt_table[crypt]; + + /* the mac will always be the last iovec_t in the cipher uio */ + maclen = cuio->uio_iov[cuio->uio_iovcnt - 1].iov_len; + + ASSERT(maclen <= ZIO_DATA_MAC_LEN); + + /* setup encryption mechanism (same as crypt) */ + mech.cm_type = crypto_mech2id(crypt_info.ci_mechname); + + /* + * Strangely, the ICP requires that plain_full_len must include + * the MAC length when decrypting, even though the UIO does not + * need to have the extra space allocated. + */ + if (encrypt) { + plain_full_len = datalen; + } else { + plain_full_len = datalen + maclen; + } + + /* + * setup encryption params (currently only AES CCM and AES GCM + * are supported) + */ + if (crypt_info.ci_crypt_type == ZC_TYPE_CCM) { + ccmp.ulNonceSize = ZIO_DATA_IV_LEN; + ccmp.ulAuthDataSize = auth_len; + ccmp.authData = authbuf; + ccmp.ulMACSize = maclen; + ccmp.nonce = ivbuf; + ccmp.ulDataSize = plain_full_len; + + mech.cm_param = (char *)(&ccmp); + mech.cm_param_len = sizeof (CK_AES_CCM_PARAMS); + } else { + gcmp.ulIvLen = ZIO_DATA_IV_LEN; + gcmp.ulIvBits = CRYPTO_BYTES2BITS(ZIO_DATA_IV_LEN); + gcmp.ulAADLen = auth_len; + gcmp.pAAD = authbuf; + gcmp.ulTagBits = CRYPTO_BYTES2BITS(maclen); + gcmp.pIv = ivbuf; + + mech.cm_param = (char *)(&gcmp); + mech.cm_param_len = sizeof (CK_AES_GCM_PARAMS); + } + + /* populate the cipher and plain data structs. */ + plaindata.cd_format = CRYPTO_DATA_UIO; + plaindata.cd_offset = 0; + plaindata.cd_uio = puio; + plaindata.cd_miscdata = NULL; + plaindata.cd_length = plain_full_len; + + cipherdata.cd_format = CRYPTO_DATA_UIO; + cipherdata.cd_offset = 0; + cipherdata.cd_uio = cuio; + cipherdata.cd_miscdata = NULL; + cipherdata.cd_length = datalen + maclen; + + /* perform the actual encryption */ + if (encrypt) { + ret = crypto_encrypt(&mech, &plaindata, key, tmpl, &cipherdata, + NULL); + if (ret != CRYPTO_SUCCESS) { + ret = SET_ERROR(EIO); + goto error; + } + } else { + if (zio_decrypt_fail_fraction != 0 && + spa_get_random(zio_decrypt_fail_fraction) == 0) { + ret = CRYPTO_INVALID_MAC; + } else { + ret = crypto_decrypt(&mech, &cipherdata, + key, tmpl, &plaindata, NULL); + } + if (ret != CRYPTO_SUCCESS) { + ASSERT3U(ret, ==, CRYPTO_INVALID_MAC); + ret = SET_ERROR(ECKSUM); + goto error; + } + } + + return (0); + +error: + return (ret); +} + +int +zio_crypt_key_wrap(crypto_key_t *cwkey, zio_crypt_key_t *key, uint8_t *iv, + uint8_t *mac, uint8_t *keydata_out, uint8_t *hmac_keydata_out) +{ + int ret; + uio_t puio, cuio; + uint64_t aad[3]; + iovec_t plain_iovecs[2], cipher_iovecs[3]; + uint64_t crypt = key->zk_crypt; + uint_t enc_len, keydata_len, aad_len; + + ASSERT3U(crypt, <, ZIO_CRYPT_FUNCTIONS); + ASSERT3U(cwkey->ck_format, ==, CRYPTO_KEY_RAW); + + keydata_len = zio_crypt_table[crypt].ci_keylen; + + /* generate iv for wrapping the master and hmac key */ + ret = random_get_pseudo_bytes(iv, WRAPPING_IV_LEN); + if (ret != 0) + goto error; + + /* initialize uio_ts */ + plain_iovecs[0].iov_base = (char *)key->zk_master_keydata; + plain_iovecs[0].iov_len = keydata_len; + plain_iovecs[1].iov_base = (char *)key->zk_hmac_keydata; + plain_iovecs[1].iov_len = SHA512_HMAC_KEYLEN; + + cipher_iovecs[0].iov_base = (char *)keydata_out; + cipher_iovecs[0].iov_len = keydata_len; + cipher_iovecs[1].iov_base = (char *)hmac_keydata_out; + cipher_iovecs[1].iov_len = SHA512_HMAC_KEYLEN; + cipher_iovecs[2].iov_base = (char *)mac; + cipher_iovecs[2].iov_len = WRAPPING_MAC_LEN; + + /* + * Although we don't support writing to the old format, we do + * support rewrapping the key so that the user can move and + * quarantine datasets on the old format. + */ + if (key->zk_version == 0) { + aad_len = sizeof (uint64_t); + aad[0] = LE_64(key->zk_guid); + } else { + ASSERT3U(key->zk_version, ==, ZIO_CRYPT_KEY_CURRENT_VERSION); + aad_len = sizeof (uint64_t) * 3; + aad[0] = LE_64(key->zk_guid); + aad[1] = LE_64(crypt); + aad[2] = LE_64(key->zk_version); + } + + enc_len = zio_crypt_table[crypt].ci_keylen + SHA512_HMAC_KEYLEN; + puio.uio_iov = plain_iovecs; + puio.uio_iovcnt = 2; + puio.uio_segflg = UIO_SYSSPACE; + cuio.uio_iov = cipher_iovecs; + cuio.uio_iovcnt = 3; + cuio.uio_segflg = UIO_SYSSPACE; + + /* encrypt the keys and store the resulting ciphertext and mac */ + ret = zio_do_crypt_uio(B_TRUE, crypt, cwkey, NULL, iv, enc_len, + &puio, &cuio, (uint8_t *)aad, aad_len); + if (ret != 0) + goto error; + + return (0); + +error: + return (ret); +} + +int +zio_crypt_key_unwrap(crypto_key_t *cwkey, uint64_t crypt, uint64_t version, + uint64_t guid, uint8_t *keydata, uint8_t *hmac_keydata, uint8_t *iv, + uint8_t *mac, zio_crypt_key_t *key) +{ + int ret; + crypto_mechanism_t mech; + uio_t puio, cuio; + uint64_t aad[3]; + iovec_t plain_iovecs[2], cipher_iovecs[3]; + uint_t enc_len, keydata_len, aad_len; + + ASSERT3U(crypt, <, ZIO_CRYPT_FUNCTIONS); + ASSERT3U(cwkey->ck_format, ==, CRYPTO_KEY_RAW); + + rw_init(&key->zk_salt_lock, NULL, RW_DEFAULT, NULL); + keydata_len = zio_crypt_table[crypt].ci_keylen; + + /* initialize uio_ts */ + plain_iovecs[0].iov_base = (char *)key->zk_master_keydata; + plain_iovecs[0].iov_len = keydata_len; + plain_iovecs[1].iov_base = (char *)key->zk_hmac_keydata; + plain_iovecs[1].iov_len = SHA512_HMAC_KEYLEN; + + cipher_iovecs[0].iov_base = (char *)keydata; + cipher_iovecs[0].iov_len = keydata_len; + cipher_iovecs[1].iov_base = (char *)hmac_keydata; + cipher_iovecs[1].iov_len = SHA512_HMAC_KEYLEN; + cipher_iovecs[2].iov_base = (char *)mac; + cipher_iovecs[2].iov_len = WRAPPING_MAC_LEN; + + if (version == 0) { + aad_len = sizeof (uint64_t); + aad[0] = LE_64(guid); + } else { + ASSERT3U(version, ==, ZIO_CRYPT_KEY_CURRENT_VERSION); + aad_len = sizeof (uint64_t) * 3; + aad[0] = LE_64(guid); + aad[1] = LE_64(crypt); + aad[2] = LE_64(version); + } + + enc_len = keydata_len + SHA512_HMAC_KEYLEN; + puio.uio_iov = plain_iovecs; + puio.uio_segflg = UIO_SYSSPACE; + puio.uio_iovcnt = 2; + cuio.uio_iov = cipher_iovecs; + cuio.uio_iovcnt = 3; + cuio.uio_segflg = UIO_SYSSPACE; + + /* decrypt the keys and store the result in the output buffers */ + ret = zio_do_crypt_uio(B_FALSE, crypt, cwkey, NULL, iv, enc_len, + &puio, &cuio, (uint8_t *)aad, aad_len); + if (ret != 0) + goto error; + + /* generate a fresh salt */ + ret = random_get_bytes(key->zk_salt, ZIO_DATA_SALT_LEN); + if (ret != 0) + goto error; + + /* derive the current key from the master key */ + ret = hkdf_sha512(key->zk_master_keydata, keydata_len, NULL, 0, + key->zk_salt, ZIO_DATA_SALT_LEN, key->zk_current_keydata, + keydata_len); + if (ret != 0) + goto error; + + /* initialize keys for ICP */ + key->zk_current_key.ck_format = CRYPTO_KEY_RAW; + key->zk_current_key.ck_data = key->zk_current_keydata; + key->zk_current_key.ck_length = CRYPTO_BYTES2BITS(keydata_len); + + key->zk_hmac_key.ck_format = CRYPTO_KEY_RAW; + key->zk_hmac_key.ck_data = key->zk_hmac_keydata; + key->zk_hmac_key.ck_length = CRYPTO_BYTES2BITS(SHA512_HMAC_KEYLEN); + + /* + * Initialize the crypto templates. It's ok if this fails because + * this is just an optimization. + */ + mech.cm_type = crypto_mech2id(zio_crypt_table[crypt].ci_mechname); + ret = crypto_create_ctx_template(&mech, &key->zk_current_key, + &key->zk_current_tmpl, KM_SLEEP); + if (ret != CRYPTO_SUCCESS) + key->zk_current_tmpl = NULL; + + mech.cm_type = crypto_mech2id(SUN_CKM_SHA512_HMAC); + ret = crypto_create_ctx_template(&mech, &key->zk_hmac_key, + &key->zk_hmac_tmpl, KM_SLEEP); + if (ret != CRYPTO_SUCCESS) + key->zk_hmac_tmpl = NULL; + + key->zk_crypt = crypt; + key->zk_version = version; + key->zk_guid = guid; + key->zk_salt_count = 0; + + return (0); + +error: + zio_crypt_key_destroy(key); + return (ret); +} + +int +zio_crypt_generate_iv(uint8_t *ivbuf) +{ + int ret; + + /* randomly generate the IV */ + ret = random_get_pseudo_bytes(ivbuf, ZIO_DATA_IV_LEN); + if (ret != 0) + goto error; + + return (0); + +error: + bzero(ivbuf, ZIO_DATA_IV_LEN); + return (ret); +} + +int +zio_crypt_do_hmac(zio_crypt_key_t *key, uint8_t *data, uint_t datalen, + uint8_t *digestbuf, uint_t digestlen) +{ + int ret; + crypto_mechanism_t mech; + crypto_data_t in_data, digest_data; + uint8_t raw_digestbuf[SHA512_DIGEST_LENGTH]; + + ASSERT3U(digestlen, <=, SHA512_DIGEST_LENGTH); + + /* initialize sha512-hmac mechanism and crypto data */ + mech.cm_type = crypto_mech2id(SUN_CKM_SHA512_HMAC); + mech.cm_param = NULL; + mech.cm_param_len = 0; + + /* initialize the crypto data */ + in_data.cd_format = CRYPTO_DATA_RAW; + in_data.cd_offset = 0; + in_data.cd_length = datalen; + in_data.cd_raw.iov_base = (char *)data; + in_data.cd_raw.iov_len = in_data.cd_length; + + digest_data.cd_format = CRYPTO_DATA_RAW; + digest_data.cd_offset = 0; + digest_data.cd_length = SHA512_DIGEST_LENGTH; + digest_data.cd_raw.iov_base = (char *)raw_digestbuf; + digest_data.cd_raw.iov_len = digest_data.cd_length; + + /* generate the hmac */ + ret = crypto_mac(&mech, &in_data, &key->zk_hmac_key, key->zk_hmac_tmpl, + &digest_data, NULL); + if (ret != CRYPTO_SUCCESS) { + ret = SET_ERROR(EIO); + goto error; + } + + bcopy(raw_digestbuf, digestbuf, digestlen); + + return (0); + +error: + bzero(digestbuf, digestlen); + return (ret); +} + +int +zio_crypt_generate_iv_salt_dedup(zio_crypt_key_t *key, uint8_t *data, + uint_t datalen, uint8_t *ivbuf, uint8_t *salt) +{ + int ret; + uint8_t digestbuf[SHA512_DIGEST_LENGTH]; + + ret = zio_crypt_do_hmac(key, data, datalen, + digestbuf, SHA512_DIGEST_LENGTH); + if (ret != 0) + return (ret); + + bcopy(digestbuf, salt, ZIO_DATA_SALT_LEN); + bcopy(digestbuf + ZIO_DATA_SALT_LEN, ivbuf, ZIO_DATA_IV_LEN); + + return (0); +} + +/* + * The following functions are used to encode and decode encryption parameters + * into blkptr_t and zil_header_t. The ICP wants to use these parameters as + * byte strings, which normally means that these strings would not need to deal + * with byteswapping at all. However, both blkptr_t and zil_header_t may be + * byteswapped by lower layers and so we must "undo" that byteswap here upon + * decoding and encoding in a non-native byteorder. These functions require + * that the byteorder bit is correct before being called. + */ +void +zio_crypt_encode_params_bp(blkptr_t *bp, uint8_t *salt, uint8_t *iv) +{ + uint64_t val64; + uint32_t val32; + + ASSERT(BP_IS_ENCRYPTED(bp)); + + if (!BP_SHOULD_BYTESWAP(bp)) { + bcopy(salt, &bp->blk_dva[2].dva_word[0], sizeof (uint64_t)); + bcopy(iv, &bp->blk_dva[2].dva_word[1], sizeof (uint64_t)); + bcopy(iv + sizeof (uint64_t), &val32, sizeof (uint32_t)); + BP_SET_IV2(bp, val32); + } else { + bcopy(salt, &val64, sizeof (uint64_t)); + bp->blk_dva[2].dva_word[0] = BSWAP_64(val64); + + bcopy(iv, &val64, sizeof (uint64_t)); + bp->blk_dva[2].dva_word[1] = BSWAP_64(val64); + + bcopy(iv + sizeof (uint64_t), &val32, sizeof (uint32_t)); + BP_SET_IV2(bp, BSWAP_32(val32)); + } +} + +void +zio_crypt_decode_params_bp(const blkptr_t *bp, uint8_t *salt, uint8_t *iv) +{ + uint64_t val64; + uint32_t val32; + + ASSERT(BP_IS_PROTECTED(bp)); + + /* for convenience, so callers don't need to check */ + if (BP_IS_AUTHENTICATED(bp)) { + bzero(salt, ZIO_DATA_SALT_LEN); + bzero(iv, ZIO_DATA_IV_LEN); + return; + } + + if (!BP_SHOULD_BYTESWAP(bp)) { + bcopy(&bp->blk_dva[2].dva_word[0], salt, sizeof (uint64_t)); + bcopy(&bp->blk_dva[2].dva_word[1], iv, sizeof (uint64_t)); + + val32 = (uint32_t)BP_GET_IV2(bp); + bcopy(&val32, iv + sizeof (uint64_t), sizeof (uint32_t)); + } else { + val64 = BSWAP_64(bp->blk_dva[2].dva_word[0]); + bcopy(&val64, salt, sizeof (uint64_t)); + + val64 = BSWAP_64(bp->blk_dva[2].dva_word[1]); + bcopy(&val64, iv, sizeof (uint64_t)); + + val32 = BSWAP_32((uint32_t)BP_GET_IV2(bp)); + bcopy(&val32, iv + sizeof (uint64_t), sizeof (uint32_t)); + } +} + +void +zio_crypt_encode_mac_bp(blkptr_t *bp, uint8_t *mac) +{ + uint64_t val64; + + ASSERT(BP_USES_CRYPT(bp)); + ASSERT3U(BP_GET_TYPE(bp), !=, DMU_OT_OBJSET); + + if (!BP_SHOULD_BYTESWAP(bp)) { + bcopy(mac, &bp->blk_cksum.zc_word[2], sizeof (uint64_t)); + bcopy(mac + sizeof (uint64_t), &bp->blk_cksum.zc_word[3], + sizeof (uint64_t)); + } else { + bcopy(mac, &val64, sizeof (uint64_t)); + bp->blk_cksum.zc_word[2] = BSWAP_64(val64); + + bcopy(mac + sizeof (uint64_t), &val64, sizeof (uint64_t)); + bp->blk_cksum.zc_word[3] = BSWAP_64(val64); + } +} + +void +zio_crypt_decode_mac_bp(const blkptr_t *bp, uint8_t *mac) +{ + uint64_t val64; + + ASSERT(BP_USES_CRYPT(bp) || BP_IS_HOLE(bp)); + + /* for convenience, so callers don't need to check */ + if (BP_GET_TYPE(bp) == DMU_OT_OBJSET) { + bzero(mac, ZIO_DATA_MAC_LEN); + return; + } + + if (!BP_SHOULD_BYTESWAP(bp)) { + bcopy(&bp->blk_cksum.zc_word[2], mac, sizeof (uint64_t)); + bcopy(&bp->blk_cksum.zc_word[3], mac + sizeof (uint64_t), + sizeof (uint64_t)); + } else { + val64 = BSWAP_64(bp->blk_cksum.zc_word[2]); + bcopy(&val64, mac, sizeof (uint64_t)); + + val64 = BSWAP_64(bp->blk_cksum.zc_word[3]); + bcopy(&val64, mac + sizeof (uint64_t), sizeof (uint64_t)); + } +} + +void +zio_crypt_encode_mac_zil(void *data, uint8_t *mac) +{ + zil_chain_t *zilc = data; + + bcopy(mac, &zilc->zc_eck.zec_cksum.zc_word[2], sizeof (uint64_t)); + bcopy(mac + sizeof (uint64_t), &zilc->zc_eck.zec_cksum.zc_word[3], + sizeof (uint64_t)); +} + +void +zio_crypt_decode_mac_zil(const void *data, uint8_t *mac) +{ + /* + * The ZIL MAC is embedded in the block it protects, which will + * not have been byteswapped by the time this function has been called. + * As a result, we don't need to worry about byteswapping the MAC. + */ + const zil_chain_t *zilc = data; + + bcopy(&zilc->zc_eck.zec_cksum.zc_word[2], mac, sizeof (uint64_t)); + bcopy(&zilc->zc_eck.zec_cksum.zc_word[3], mac + sizeof (uint64_t), + sizeof (uint64_t)); +} + +/* + * This routine takes a block of dnodes (src_abd) and copies only the bonus + * buffers to the same offsets in the dst buffer. datalen should be the size + * of both the src_abd and the dst buffer (not just the length of the bonus + * buffers). + */ +void +zio_crypt_copy_dnode_bonus(abd_t *src_abd, uint8_t *dst, uint_t datalen) +{ + uint_t i, max_dnp = datalen >> DNODE_SHIFT; + uint8_t *src; + dnode_phys_t *dnp, *sdnp, *ddnp; + + src = abd_borrow_buf_copy(src_abd, datalen); + + sdnp = (dnode_phys_t *)src; + ddnp = (dnode_phys_t *)dst; + + for (i = 0; i < max_dnp; i += sdnp[i].dn_extra_slots + 1) { + dnp = &sdnp[i]; + if (dnp->dn_type != DMU_OT_NONE && + DMU_OT_IS_ENCRYPTED(dnp->dn_bonustype) && + dnp->dn_bonuslen != 0) { + bcopy(DN_BONUS(dnp), DN_BONUS(&ddnp[i]), + DN_MAX_BONUS_LEN(dnp)); + } + } + + abd_return_buf(src_abd, src, datalen); +} + +/* + * This function decides what fields from blk_prop are included in + * the on-disk various MAC algorithms. + */ +static void +zio_crypt_bp_zero_nonportable_blkprop(blkptr_t *bp, uint64_t version) +{ + /* + * Version 0 did not properly zero out all non-portable fields + * as it should have done. We maintain this code so that we can + * do read-only imports of pools on this version. + */ + if (version == 0) { + BP_SET_DEDUP(bp, 0); + BP_SET_CHECKSUM(bp, 0); + BP_SET_PSIZE(bp, SPA_MINBLOCKSIZE); + return; + } + + ASSERT3U(version, ==, ZIO_CRYPT_KEY_CURRENT_VERSION); + + /* + * The hole_birth feature might set these fields even if this bp + * is a hole. We zero them out here to guarantee that raw sends + * will function with or without the feature. + */ + if (BP_IS_HOLE(bp)) { + bp->blk_prop = 0ULL; + return; + } + + /* + * At L0 we want to verify these fields to ensure that data blocks + * can not be reinterpretted. For instance, we do not want an attacker + * to trick us into returning raw lz4 compressed data to the user + * by modifying the compression bits. At higher levels, we cannot + * enforce this policy since raw sends do not convey any information + * about indirect blocks, so these values might be different on the + * receive side. Fortunately, this does not open any new attack + * vectors, since any alterations that can be made to a higher level + * bp must still verify the correct order of the layer below it. + */ + if (BP_GET_LEVEL(bp) != 0) { + BP_SET_BYTEORDER(bp, 0); + BP_SET_COMPRESS(bp, 0); + + /* + * psize cannot be set to zero or it will trigger + * asserts, but the value doesn't really matter as + * long as it is constant. + */ + BP_SET_PSIZE(bp, SPA_MINBLOCKSIZE); + } + + BP_SET_DEDUP(bp, 0); + BP_SET_CHECKSUM(bp, 0); +} + +static void +zio_crypt_bp_auth_init(uint64_t version, boolean_t should_bswap, blkptr_t *bp, + blkptr_auth_buf_t *bab, uint_t *bab_len) +{ + blkptr_t tmpbp = *bp; + + if (should_bswap) + byteswap_uint64_array(&tmpbp, sizeof (blkptr_t)); + + ASSERT(BP_USES_CRYPT(&tmpbp) || BP_IS_HOLE(&tmpbp)); + ASSERT0(BP_IS_EMBEDDED(&tmpbp)); + + zio_crypt_decode_mac_bp(&tmpbp, bab->bab_mac); + + /* + * We always MAC blk_prop in LE to ensure portability. This + * must be done after decoding the mac, since the endianness + * will get zero'd out here. + */ + zio_crypt_bp_zero_nonportable_blkprop(&tmpbp, version); + bab->bab_prop = LE_64(tmpbp.blk_prop); + bab->bab_pad = 0ULL; + + /* version 0 did not include the padding */ + *bab_len = sizeof (blkptr_auth_buf_t); + if (version == 0) + *bab_len -= sizeof (uint64_t); +} + +static int +zio_crypt_bp_do_hmac_updates(crypto_context_t ctx, uint64_t version, + boolean_t should_bswap, blkptr_t *bp) +{ + int ret; + uint_t bab_len; + blkptr_auth_buf_t bab; + crypto_data_t cd; + + zio_crypt_bp_auth_init(version, should_bswap, bp, &bab, &bab_len); + cd.cd_format = CRYPTO_DATA_RAW; + cd.cd_offset = 0; + cd.cd_length = bab_len; + cd.cd_raw.iov_base = (char *)&bab; + cd.cd_raw.iov_len = cd.cd_length; + + ret = crypto_mac_update(ctx, &cd, NULL); + if (ret != CRYPTO_SUCCESS) { + ret = SET_ERROR(EIO); + goto error; + } + + return (0); + +error: + return (ret); +} + +static void +zio_crypt_bp_do_indrect_checksum_updates(SHA2_CTX *ctx, uint64_t version, + boolean_t should_bswap, blkptr_t *bp) +{ + uint_t bab_len; + blkptr_auth_buf_t bab; + + zio_crypt_bp_auth_init(version, should_bswap, bp, &bab, &bab_len); + SHA2Update(ctx, &bab, bab_len); +} + +static void +zio_crypt_bp_do_aad_updates(uint8_t **aadp, uint_t *aad_len, uint64_t version, + boolean_t should_bswap, blkptr_t *bp) +{ + uint_t bab_len; + blkptr_auth_buf_t bab; + + zio_crypt_bp_auth_init(version, should_bswap, bp, &bab, &bab_len); + bcopy(&bab, *aadp, bab_len); + *aadp += bab_len; + *aad_len += bab_len; +} + +static int +zio_crypt_do_dnode_hmac_updates(crypto_context_t ctx, uint64_t version, + boolean_t should_bswap, dnode_phys_t *dnp) +{ + int ret, i; + dnode_phys_t *adnp; + boolean_t le_bswap = (should_bswap == ZFS_HOST_BYTEORDER); + crypto_data_t cd; + uint8_t tmp_dncore[offsetof(dnode_phys_t, dn_blkptr)]; + + cd.cd_format = CRYPTO_DATA_RAW; + cd.cd_offset = 0; + + /* authenticate the core dnode (masking out non-portable bits) */ + bcopy(dnp, tmp_dncore, sizeof (tmp_dncore)); + adnp = (dnode_phys_t *)tmp_dncore; + if (le_bswap) { + adnp->dn_datablkszsec = BSWAP_16(adnp->dn_datablkszsec); + adnp->dn_bonuslen = BSWAP_16(adnp->dn_bonuslen); + adnp->dn_maxblkid = BSWAP_64(adnp->dn_maxblkid); + adnp->dn_used = BSWAP_64(adnp->dn_used); + } + adnp->dn_flags &= DNODE_CRYPT_PORTABLE_FLAGS_MASK; + adnp->dn_used = 0; + + cd.cd_length = sizeof (tmp_dncore); + cd.cd_raw.iov_base = (char *)adnp; + cd.cd_raw.iov_len = cd.cd_length; + + ret = crypto_mac_update(ctx, &cd, NULL); + if (ret != CRYPTO_SUCCESS) { + ret = SET_ERROR(EIO); + goto error; + } + + for (i = 0; i < dnp->dn_nblkptr; i++) { + ret = zio_crypt_bp_do_hmac_updates(ctx, version, + should_bswap, &dnp->dn_blkptr[i]); + if (ret != 0) + goto error; + } + + if (dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) { + ret = zio_crypt_bp_do_hmac_updates(ctx, version, + should_bswap, DN_SPILL_BLKPTR(dnp)); + if (ret != 0) + goto error; + } + + return (0); + +error: + return (ret); +} + +/* + * objset_phys_t blocks introduce a number of exceptions to the normal + * authentication process. objset_phys_t's contain 2 seperate HMACS for + * protecting the integrity of their data. The portable_mac protects the + * the metadnode. This MAC can be sent with a raw send and protects against + * reordering of data within the metadnode. The local_mac protects the user + * accounting objects which are not sent from one system to another. + * + * In addition, objset blocks are the only blocks that can be modified and + * written to disk without the key loaded under certain circumstances. During + * zil_claim() we need to be able to update the zil_header_t to complete + * claiming log blocks and during raw receives we need to write out the + * portable_mac from the send file. Both of these actions are possible + * because these fields are not protected by either MAC so neither one will + * need to modify the MACs without the key. However, when the modified blocks + * are written out they will be byteswapped into the host machine's native + * endianness which will modify fields protected by the MAC. As a result, MAC + * calculation for objset blocks works slightly differently from other block + * types. Where other block types MAC the data in whatever endianness is + * written to disk, objset blocks always MAC little endian version of their + * values. In the code, should_bswap is the value from BP_SHOULD_BYTESWAP() + * and le_bswap indicates whether a byteswap is needed to get this block + * into little endian format. + */ +/* ARGSUSED */ +int +zio_crypt_do_objset_hmacs(zio_crypt_key_t *key, void *data, uint_t datalen, + boolean_t should_bswap, uint8_t *portable_mac, uint8_t *local_mac) +{ + int ret; + crypto_mechanism_t mech; + crypto_context_t ctx; + crypto_data_t cd; + objset_phys_t *osp = data; + uint64_t intval; + boolean_t le_bswap = (should_bswap == ZFS_HOST_BYTEORDER); + uint8_t raw_portable_mac[SHA512_DIGEST_LENGTH]; + uint8_t raw_local_mac[SHA512_DIGEST_LENGTH]; + + /* initialize HMAC mechanism */ + mech.cm_type = crypto_mech2id(SUN_CKM_SHA512_HMAC); + mech.cm_param = NULL; + mech.cm_param_len = 0; + + cd.cd_format = CRYPTO_DATA_RAW; + cd.cd_offset = 0; + + /* calculate the portable MAC from the portable fields and metadnode */ + ret = crypto_mac_init(&mech, &key->zk_hmac_key, NULL, &ctx, NULL); + if (ret != CRYPTO_SUCCESS) { + ret = SET_ERROR(EIO); + goto error; + } + + /* add in the os_type */ + intval = (le_bswap) ? osp->os_type : BSWAP_64(osp->os_type); + cd.cd_length = sizeof (uint64_t); + cd.cd_raw.iov_base = (char *)&intval; + cd.cd_raw.iov_len = cd.cd_length; + + ret = crypto_mac_update(ctx, &cd, NULL); + if (ret != CRYPTO_SUCCESS) { + ret = SET_ERROR(EIO); + goto error; + } + + /* add in the portable os_flags */ + intval = osp->os_flags; + if (should_bswap) + intval = BSWAP_64(intval); + intval &= OBJSET_CRYPT_PORTABLE_FLAGS_MASK; + /* CONSTCOND */ + if (!ZFS_HOST_BYTEORDER) + intval = BSWAP_64(intval); + + cd.cd_length = sizeof (uint64_t); + cd.cd_raw.iov_base = (char *)&intval; + cd.cd_raw.iov_len = cd.cd_length; + + ret = crypto_mac_update(ctx, &cd, NULL); + if (ret != CRYPTO_SUCCESS) { + ret = SET_ERROR(EIO); + goto error; + } + + /* add in fields from the metadnode */ + ret = zio_crypt_do_dnode_hmac_updates(ctx, key->zk_version, + should_bswap, &osp->os_meta_dnode); + if (ret) + goto error; + + /* store the final digest in a temporary buffer and copy what we need */ + cd.cd_length = SHA512_DIGEST_LENGTH; + cd.cd_raw.iov_base = (char *)raw_portable_mac; + cd.cd_raw.iov_len = cd.cd_length; + + ret = crypto_mac_final(ctx, &cd, NULL); + if (ret != CRYPTO_SUCCESS) { + ret = SET_ERROR(EIO); + goto error; + } + + bcopy(raw_portable_mac, portable_mac, ZIO_OBJSET_MAC_LEN); + + /* + * The local MAC protects the user and group accounting. If these + * objects are not present, the local MAC is zeroed out. + */ + if ((osp->os_userused_dnode.dn_type == DMU_OT_NONE && + osp->os_groupused_dnode.dn_type == DMU_OT_NONE) || + (datalen <= OBJSET_OLD_PHYS_SIZE)) { + bzero(local_mac, ZIO_OBJSET_MAC_LEN); + return (0); + } + + /* calculate the local MAC from the userused and groupused dnodes */ + ret = crypto_mac_init(&mech, &key->zk_hmac_key, NULL, &ctx, NULL); + if (ret != CRYPTO_SUCCESS) { + ret = SET_ERROR(EIO); + goto error; + } + + /* add in the non-portable os_flags */ + intval = osp->os_flags; + if (should_bswap) + intval = BSWAP_64(intval); + intval &= ~OBJSET_CRYPT_PORTABLE_FLAGS_MASK; + /* CONSTCOND */ + if (!ZFS_HOST_BYTEORDER) + intval = BSWAP_64(intval); + + cd.cd_length = sizeof (uint64_t); + cd.cd_raw.iov_base = (char *)&intval; + cd.cd_raw.iov_len = cd.cd_length; + + ret = crypto_mac_update(ctx, &cd, NULL); + if (ret != CRYPTO_SUCCESS) { + ret = SET_ERROR(EIO); + goto error; + } + + /* add in fields from the user accounting dnodes */ + ret = zio_crypt_do_dnode_hmac_updates(ctx, key->zk_version, + should_bswap, &osp->os_userused_dnode); + if (ret) + goto error; + + ret = zio_crypt_do_dnode_hmac_updates(ctx, key->zk_version, + should_bswap, &osp->os_groupused_dnode); + if (ret) + goto error; + + /* store the final digest in a temporary buffer and copy what we need */ + cd.cd_length = SHA512_DIGEST_LENGTH; + cd.cd_raw.iov_base = (char *)raw_local_mac; + cd.cd_raw.iov_len = cd.cd_length; + + ret = crypto_mac_final(ctx, &cd, NULL); + if (ret != CRYPTO_SUCCESS) { + ret = SET_ERROR(EIO); + goto error; + } + + bcopy(raw_local_mac, local_mac, ZIO_OBJSET_MAC_LEN); + + return (0); + +error: + bzero(portable_mac, ZIO_OBJSET_MAC_LEN); + bzero(local_mac, ZIO_OBJSET_MAC_LEN); + return (ret); +} + +static void +zio_crypt_destroy_uio(uio_t *uio) +{ + if (uio->uio_iov) + kmem_free(uio->uio_iov, uio->uio_iovcnt * sizeof (iovec_t)); +} + +/* + * This function parses an uncompressed indirect block and returns a checksum + * of all the portable fields from all of the contained bps. The portable + * fields are the MAC and all of the fields from blk_prop except for the dedup, + * checksum, and psize bits. For an explanation of the purpose of this, see + * the comment block on object set authentication. + */ +static int +zio_crypt_do_indirect_mac_checksum_impl(boolean_t generate, void *buf, + uint_t datalen, uint64_t version, boolean_t byteswap, uint8_t *cksum) +{ + blkptr_t *bp; + int i, epb = datalen >> SPA_BLKPTRSHIFT; + SHA2_CTX ctx; + uint8_t digestbuf[SHA512_DIGEST_LENGTH]; + + /* checksum all of the MACs from the layer below */ + SHA2Init(SHA512, &ctx); + for (i = 0, bp = buf; i < epb; i++, bp++) { + zio_crypt_bp_do_indrect_checksum_updates(&ctx, version, + byteswap, bp); + } + SHA2Final(digestbuf, &ctx); + + if (generate) { + bcopy(digestbuf, cksum, ZIO_DATA_MAC_LEN); + return (0); + } + + if (bcmp(digestbuf, cksum, ZIO_DATA_MAC_LEN) != 0) + return (SET_ERROR(ECKSUM)); + + return (0); +} + +int +zio_crypt_do_indirect_mac_checksum(boolean_t generate, void *buf, + uint_t datalen, boolean_t byteswap, uint8_t *cksum) +{ + int ret; + + /* + * Unfortunately, callers of this function will not always have + * easy access to the on-disk format version. This info is + * normally found in the DSL Crypto Key, but the checksum-of-MACs + * is expected to be verifiable even when the key isn't loaded. + * Here, instead of doing a ZAP lookup for the version for each + * zio, we simply try both existing formats. + */ + ret = zio_crypt_do_indirect_mac_checksum_impl(generate, buf, + datalen, ZIO_CRYPT_KEY_CURRENT_VERSION, byteswap, cksum); + if (ret == ECKSUM) { + ASSERT(!generate); + ret = zio_crypt_do_indirect_mac_checksum_impl(generate, + buf, datalen, 0, byteswap, cksum); + } + + return (ret); +} + +int +zio_crypt_do_indirect_mac_checksum_abd(boolean_t generate, abd_t *abd, + uint_t datalen, boolean_t byteswap, uint8_t *cksum) +{ + int ret; + void *buf; + + buf = abd_borrow_buf_copy(abd, datalen); + ret = zio_crypt_do_indirect_mac_checksum(generate, buf, datalen, + byteswap, cksum); + abd_return_buf(abd, buf, datalen); + + return (ret); +} + +/* + * Special case handling routine for encrypting / decrypting ZIL blocks. + * We do not check for the older ZIL chain because the encryption feature + * was not available before the newer ZIL chain was introduced. The goal + * here is to encrypt everything except the blkptr_t of a lr_write_t and + * the zil_chain_t header. Everything that is not encrypted is authenticated. + */ + +/* ARGSUSED */ +static int +zio_crypt_init_uios_zil(boolean_t encrypt, uint8_t *plainbuf, + uint8_t *cipherbuf, uint_t datalen, boolean_t byteswap, uio_t *puio, + uio_t *cuio, uint_t *enc_len, uint8_t **authbuf, uint_t *auth_len, + boolean_t *no_crypt) +{ + int ret; + uint64_t txtype, lr_len; + uint_t nr_src, nr_dst, crypt_len; + uint_t aad_len = 0, nr_iovecs = 0, total_len = 0; + iovec_t *src_iovecs = NULL, *dst_iovecs = NULL; + uint8_t *src, *dst, *slrp, *dlrp, *blkend, *aadp; + zil_chain_t *zilc; + lr_t *lr; + uint8_t *aadbuf = zio_buf_alloc(datalen); + + /* cipherbuf always needs an extra iovec for the MAC */ + if (encrypt) { + src = plainbuf; + dst = cipherbuf; + nr_src = 0; + nr_dst = 1; + } else { + src = cipherbuf; + dst = plainbuf; + nr_src = 1; + nr_dst = 0; + } + + /* find the start and end record of the log block */ + zilc = (zil_chain_t *)src; + slrp = src + sizeof (zil_chain_t); + aadp = aadbuf; + blkend = src + ((byteswap) ? BSWAP_64(zilc->zc_nused) : zilc->zc_nused); + + /* calculate the number of encrypted iovecs we will need */ + for (; slrp < blkend; slrp += lr_len) { + lr = (lr_t *)slrp; + + if (!byteswap) { + txtype = lr->lrc_txtype; + lr_len = lr->lrc_reclen; + } else { + txtype = BSWAP_64(lr->lrc_txtype); + lr_len = BSWAP_64(lr->lrc_reclen); + } + + nr_iovecs++; + if (txtype == TX_WRITE && lr_len != sizeof (lr_write_t)) + nr_iovecs++; + } + + nr_src += nr_iovecs; + nr_dst += nr_iovecs; + + /* allocate the iovec arrays */ + if (nr_src != 0) { + src_iovecs = kmem_alloc(nr_src * sizeof (iovec_t), KM_SLEEP); + if (src_iovecs == NULL) { + ret = SET_ERROR(ENOMEM); + goto error; + } + } + + if (nr_dst != 0) { + dst_iovecs = kmem_alloc(nr_dst * sizeof (iovec_t), KM_SLEEP); + if (dst_iovecs == NULL) { + ret = SET_ERROR(ENOMEM); + goto error; + } + } + + /* + * Copy the plain zil header over and authenticate everything except + * the checksum that will store our MAC. If we are writing the data + * the embedded checksum will not have been calculated yet, so we don't + * authenticate that. + */ + bcopy(src, dst, sizeof (zil_chain_t)); + bcopy(src, aadp, sizeof (zil_chain_t) - sizeof (zio_eck_t)); + aadp += sizeof (zil_chain_t) - sizeof (zio_eck_t); + aad_len += sizeof (zil_chain_t) - sizeof (zio_eck_t); + + /* loop over records again, filling in iovecs */ + nr_iovecs = 0; + slrp = src + sizeof (zil_chain_t); + dlrp = dst + sizeof (zil_chain_t); + + for (; slrp < blkend; slrp += lr_len, dlrp += lr_len) { + lr = (lr_t *)slrp; + + if (!byteswap) { + txtype = lr->lrc_txtype; + lr_len = lr->lrc_reclen; + } else { + txtype = BSWAP_64(lr->lrc_txtype); + lr_len = BSWAP_64(lr->lrc_reclen); + } + + /* copy the common lr_t */ + bcopy(slrp, dlrp, sizeof (lr_t)); + bcopy(slrp, aadp, sizeof (lr_t)); + aadp += sizeof (lr_t); + aad_len += sizeof (lr_t); + + ASSERT3P(src_iovecs, !=, NULL); + ASSERT3P(dst_iovecs, !=, NULL); + + /* + * If this is a TX_WRITE record we want to encrypt everything + * except the bp if exists. If the bp does exist we want to + * authenticate it. + */ + if (txtype == TX_WRITE) { + crypt_len = sizeof (lr_write_t) - + sizeof (lr_t) - sizeof (blkptr_t); + src_iovecs[nr_iovecs].iov_base = (char *)slrp + + sizeof (lr_t); + src_iovecs[nr_iovecs].iov_len = crypt_len; + dst_iovecs[nr_iovecs].iov_base = (char *)dlrp + + sizeof (lr_t); + dst_iovecs[nr_iovecs].iov_len = crypt_len; + + /* copy the bp now since it will not be encrypted */ + bcopy(slrp + sizeof (lr_write_t) - sizeof (blkptr_t), + dlrp + sizeof (lr_write_t) - sizeof (blkptr_t), + sizeof (blkptr_t)); + bcopy(slrp + sizeof (lr_write_t) - sizeof (blkptr_t), + aadp, sizeof (blkptr_t)); + aadp += sizeof (blkptr_t); + aad_len += sizeof (blkptr_t); + nr_iovecs++; + total_len += crypt_len; + + if (lr_len != sizeof (lr_write_t)) { + crypt_len = lr_len - sizeof (lr_write_t); + src_iovecs[nr_iovecs].iov_base = (char *) + slrp + sizeof (lr_write_t); + src_iovecs[nr_iovecs].iov_len = crypt_len; + dst_iovecs[nr_iovecs].iov_base = (char *) + dlrp + sizeof (lr_write_t); + dst_iovecs[nr_iovecs].iov_len = crypt_len; + nr_iovecs++; + total_len += crypt_len; + } + } else { + crypt_len = lr_len - sizeof (lr_t); + src_iovecs[nr_iovecs].iov_base = (char *)slrp + + sizeof (lr_t); + src_iovecs[nr_iovecs].iov_len = crypt_len; + dst_iovecs[nr_iovecs].iov_base = (char *)dlrp + + sizeof (lr_t); + dst_iovecs[nr_iovecs].iov_len = crypt_len; + nr_iovecs++; + total_len += crypt_len; + } + } + + *no_crypt = (nr_iovecs == 0); + *enc_len = total_len; + *authbuf = aadbuf; + *auth_len = aad_len; + + if (encrypt) { + puio->uio_iov = src_iovecs; + puio->uio_iovcnt = nr_src; + cuio->uio_iov = dst_iovecs; + cuio->uio_iovcnt = nr_dst; + } else { + puio->uio_iov = dst_iovecs; + puio->uio_iovcnt = nr_dst; + cuio->uio_iov = src_iovecs; + cuio->uio_iovcnt = nr_src; + } + + return (0); + +error: + zio_buf_free(aadbuf, datalen); + if (src_iovecs != NULL) + kmem_free(src_iovecs, nr_src * sizeof (iovec_t)); + if (dst_iovecs != NULL) + kmem_free(dst_iovecs, nr_dst * sizeof (iovec_t)); + + *enc_len = 0; + *authbuf = NULL; + *auth_len = 0; + *no_crypt = B_FALSE; + puio->uio_iov = NULL; + puio->uio_iovcnt = 0; + cuio->uio_iov = NULL; + cuio->uio_iovcnt = 0; + return (ret); +} + +/* + * Special case handling routine for encrypting / decrypting dnode blocks. + */ +static int +zio_crypt_init_uios_dnode(boolean_t encrypt, uint64_t version, + uint8_t *plainbuf, uint8_t *cipherbuf, uint_t datalen, boolean_t byteswap, + uio_t *puio, uio_t *cuio, uint_t *enc_len, uint8_t **authbuf, + uint_t *auth_len, boolean_t *no_crypt) +{ + int ret; + uint_t nr_src, nr_dst, crypt_len; + uint_t aad_len = 0, nr_iovecs = 0, total_len = 0; + uint_t i, j, max_dnp = datalen >> DNODE_SHIFT; + iovec_t *src_iovecs = NULL, *dst_iovecs = NULL; + uint8_t *src, *dst, *aadp; + dnode_phys_t *dnp, *adnp, *sdnp, *ddnp; + uint8_t *aadbuf = zio_buf_alloc(datalen); + + if (encrypt) { + src = plainbuf; + dst = cipherbuf; + nr_src = 0; + nr_dst = 1; + } else { + src = cipherbuf; + dst = plainbuf; + nr_src = 1; + nr_dst = 0; + } + + sdnp = (dnode_phys_t *)src; + ddnp = (dnode_phys_t *)dst; + aadp = aadbuf; + + /* + * Count the number of iovecs we will need to do the encryption by + * counting the number of bonus buffers that need to be encrypted. + */ + for (i = 0; i < max_dnp; i += sdnp[i].dn_extra_slots + 1) { + /* + * This block may still be byteswapped. However, all of the + * values we use are either uint8_t's (for which byteswapping + * is a noop) or a * != 0 check, which will work regardless + * of whether or not we byteswap. + */ + if (sdnp[i].dn_type != DMU_OT_NONE && + DMU_OT_IS_ENCRYPTED(sdnp[i].dn_bonustype) && + sdnp[i].dn_bonuslen != 0) { + nr_iovecs++; + } + } + + nr_src += nr_iovecs; + nr_dst += nr_iovecs; + + if (nr_src != 0) { + src_iovecs = kmem_alloc(nr_src * sizeof (iovec_t), KM_SLEEP); + if (src_iovecs == NULL) { + ret = SET_ERROR(ENOMEM); + goto error; + } + } + + if (nr_dst != 0) { + dst_iovecs = kmem_alloc(nr_dst * sizeof (iovec_t), KM_SLEEP); + if (dst_iovecs == NULL) { + ret = SET_ERROR(ENOMEM); + goto error; + } + } + + nr_iovecs = 0; + + /* + * Iterate through the dnodes again, this time filling in the uios + * we allocated earlier. We also concatenate any data we want to + * authenticate onto aadbuf. + */ + for (i = 0; i < max_dnp; i += sdnp[i].dn_extra_slots + 1) { + dnp = &sdnp[i]; + /* copy over the core fields and blkptrs (kept as plaintext) */ + bcopy(dnp, &ddnp[i], (uint8_t *)DN_BONUS(dnp) - (uint8_t *)dnp); + if (dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) { + bcopy(DN_SPILL_BLKPTR(dnp), DN_SPILL_BLKPTR(&ddnp[i]), + sizeof (blkptr_t)); + } + + /* + * Handle authenticated data. We authenticate everything in + * the dnode that can be brought over when we do a raw send. + * This includes all of the core fields as well as the MACs + * stored in the bp checksums and all of the portable bits + * from blk_prop. We include the dnode padding here in case it + * ever gets used in the future. Some dn_flags and dn_used are + * not portable so we mask those out values out of the + * authenticated data. + */ + crypt_len = offsetof(dnode_phys_t, dn_blkptr); + bcopy(dnp, aadp, crypt_len); + adnp = (dnode_phys_t *)aadp; + adnp->dn_flags &= DNODE_CRYPT_PORTABLE_FLAGS_MASK; + adnp->dn_used = 0; + aadp += crypt_len; + aad_len += crypt_len; + + for (j = 0; j < dnp->dn_nblkptr; j++) { + zio_crypt_bp_do_aad_updates(&aadp, &aad_len, + version, byteswap, &dnp->dn_blkptr[j]); + } + + if (dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) { + zio_crypt_bp_do_aad_updates(&aadp, &aad_len, + version, byteswap, DN_SPILL_BLKPTR(dnp)); + } + + /* + * If this bonus buffer needs to be encrypted, we prepare an + * iovec_t. The encryption / decryption functions will fill + * this in for us with the encrypted or decrypted data. + * Otherwise we add the bonus buffer to the authenticated + * data buffer and copy it over to the destination. The + * encrypted iovec extends to DN_MAX_BONUS_LEN(dnp) so that + * we can guarantee alignment with the AES block size + * (128 bits). + */ + crypt_len = DN_MAX_BONUS_LEN(dnp); + if (dnp->dn_type != DMU_OT_NONE && + DMU_OT_IS_ENCRYPTED(dnp->dn_bonustype) && + dnp->dn_bonuslen != 0) { + ASSERT3U(nr_iovecs, <, nr_src); + ASSERT3U(nr_iovecs, <, nr_dst); + ASSERT3P(src_iovecs, !=, NULL); + ASSERT3P(dst_iovecs, !=, NULL); + src_iovecs[nr_iovecs].iov_base = DN_BONUS(dnp); + src_iovecs[nr_iovecs].iov_len = crypt_len; + dst_iovecs[nr_iovecs].iov_base = DN_BONUS(&ddnp[i]); + dst_iovecs[nr_iovecs].iov_len = crypt_len; + + nr_iovecs++; + total_len += crypt_len; + } else { + bcopy(DN_BONUS(dnp), DN_BONUS(&ddnp[i]), crypt_len); + bcopy(DN_BONUS(dnp), aadp, crypt_len); + aadp += crypt_len; + aad_len += crypt_len; + } + } + + *no_crypt = (nr_iovecs == 0); + *enc_len = total_len; + *authbuf = aadbuf; + *auth_len = aad_len; + + if (encrypt) { + puio->uio_iov = src_iovecs; + puio->uio_iovcnt = nr_src; + cuio->uio_iov = dst_iovecs; + cuio->uio_iovcnt = nr_dst; + } else { + puio->uio_iov = dst_iovecs; + puio->uio_iovcnt = nr_dst; + cuio->uio_iov = src_iovecs; + cuio->uio_iovcnt = nr_src; + } + + return (0); + +error: + zio_buf_free(aadbuf, datalen); + if (src_iovecs != NULL) + kmem_free(src_iovecs, nr_src * sizeof (iovec_t)); + if (dst_iovecs != NULL) + kmem_free(dst_iovecs, nr_dst * sizeof (iovec_t)); + + *enc_len = 0; + *authbuf = NULL; + *auth_len = 0; + *no_crypt = B_FALSE; + puio->uio_iov = NULL; + puio->uio_iovcnt = 0; + cuio->uio_iov = NULL; + cuio->uio_iovcnt = 0; + return (ret); +} + +/* ARGSUSED */ +static int +zio_crypt_init_uios_normal(boolean_t encrypt, uint8_t *plainbuf, + uint8_t *cipherbuf, uint_t datalen, uio_t *puio, uio_t *cuio, + uint_t *enc_len) +{ + int ret; + uint_t nr_plain = 1, nr_cipher = 2; + iovec_t *plain_iovecs = NULL, *cipher_iovecs = NULL; + + /* allocate the iovecs for the plain and cipher data */ + plain_iovecs = kmem_alloc(nr_plain * sizeof (iovec_t), + KM_SLEEP); + if (!plain_iovecs) { + ret = SET_ERROR(ENOMEM); + goto error; + } + + cipher_iovecs = kmem_alloc(nr_cipher * sizeof (iovec_t), + KM_SLEEP); + if (!cipher_iovecs) { + ret = SET_ERROR(ENOMEM); + goto error; + } + + plain_iovecs[0].iov_base = (void *)plainbuf; + plain_iovecs[0].iov_len = datalen; + cipher_iovecs[0].iov_base = (void *)cipherbuf; + cipher_iovecs[0].iov_len = datalen; + + *enc_len = datalen; + puio->uio_iov = plain_iovecs; + puio->uio_iovcnt = nr_plain; + cuio->uio_iov = cipher_iovecs; + cuio->uio_iovcnt = nr_cipher; + + return (0); + +error: + if (plain_iovecs != NULL) + kmem_free(plain_iovecs, nr_plain * sizeof (iovec_t)); + if (cipher_iovecs != NULL) + kmem_free(cipher_iovecs, nr_cipher * sizeof (iovec_t)); + + *enc_len = 0; + puio->uio_iov = NULL; + puio->uio_iovcnt = 0; + cuio->uio_iov = NULL; + cuio->uio_iovcnt = 0; + return (ret); +} + +/* + * This function builds up the plaintext (puio) and ciphertext (cuio) uios so + * that they can be used for encryption and decryption by zio_do_crypt_uio(). + * Most blocks will use zio_crypt_init_uios_normal(), with ZIL and dnode blocks + * requiring special handling to parse out pieces that are to be encrypted. The + * authbuf is used by these special cases to store additional authenticated + * data (AAD) for the encryption modes. + */ +/* ARGSUSED */ +static int +zio_crypt_init_uios(boolean_t encrypt, uint64_t version, dmu_object_type_t ot, + uint8_t *plainbuf, uint8_t *cipherbuf, uint_t datalen, boolean_t byteswap, + uint8_t *mac, uio_t *puio, uio_t *cuio, uint_t *enc_len, uint8_t **authbuf, + uint_t *auth_len, boolean_t *no_crypt) +{ + int ret; + iovec_t *mac_iov; + + ASSERT(DMU_OT_IS_ENCRYPTED(ot) || ot == DMU_OT_NONE); + + /* route to handler */ + switch (ot) { + case DMU_OT_INTENT_LOG: + ret = zio_crypt_init_uios_zil(encrypt, plainbuf, cipherbuf, + datalen, byteswap, puio, cuio, enc_len, authbuf, auth_len, + no_crypt); + break; + case DMU_OT_DNODE: + ret = zio_crypt_init_uios_dnode(encrypt, version, plainbuf, + cipherbuf, datalen, byteswap, puio, cuio, enc_len, authbuf, + auth_len, no_crypt); + break; + default: + ret = zio_crypt_init_uios_normal(encrypt, plainbuf, cipherbuf, + datalen, puio, cuio, enc_len); + *authbuf = NULL; + *auth_len = 0; + *no_crypt = B_FALSE; + break; + } + + if (ret != 0) + goto error; + + /* populate the uios */ + puio->uio_segflg = UIO_SYSSPACE; + cuio->uio_segflg = UIO_SYSSPACE; + + mac_iov = ((iovec_t *)&cuio->uio_iov[cuio->uio_iovcnt - 1]); + mac_iov->iov_base = (void *)mac; + mac_iov->iov_len = ZIO_DATA_MAC_LEN; + + return (0); + +error: + return (ret); +} + +/* + * Primary encryption / decryption entrypoint for zio data. + */ +int +zio_do_crypt_data(boolean_t encrypt, zio_crypt_key_t *key, + dmu_object_type_t ot, boolean_t byteswap, uint8_t *salt, uint8_t *iv, + uint8_t *mac, uint_t datalen, uint8_t *plainbuf, uint8_t *cipherbuf, + boolean_t *no_crypt) +{ + int ret; + boolean_t locked = B_FALSE; + uint64_t crypt = key->zk_crypt; + uint_t keydata_len = zio_crypt_table[crypt].ci_keylen; + uint_t enc_len, auth_len; + uio_t puio, cuio; + uint8_t enc_keydata[MASTER_KEY_MAX_LEN]; + crypto_key_t tmp_ckey, *ckey = NULL; + crypto_ctx_template_t tmpl; + uint8_t *authbuf = NULL; + + bzero(&puio, sizeof (uio_t)); + bzero(&cuio, sizeof (uio_t)); + + /* create uios for encryption */ + ret = zio_crypt_init_uios(encrypt, key->zk_version, ot, plainbuf, + cipherbuf, datalen, byteswap, mac, &puio, &cuio, &enc_len, + &authbuf, &auth_len, no_crypt); + if (ret != 0) + return (ret); + + /* + * If the needed key is the current one, just use it. Otherwise we + * need to generate a temporary one from the given salt + master key. + * If we are encrypting, we must return a copy of the current salt + * so that it can be stored in the blkptr_t. + */ + rw_enter(&key->zk_salt_lock, RW_READER); + locked = B_TRUE; + + if (bcmp(salt, key->zk_salt, ZIO_DATA_SALT_LEN) == 0) { + ckey = &key->zk_current_key; + tmpl = key->zk_current_tmpl; + } else { + rw_exit(&key->zk_salt_lock); + locked = B_FALSE; + + ret = hkdf_sha512(key->zk_master_keydata, keydata_len, NULL, 0, + salt, ZIO_DATA_SALT_LEN, enc_keydata, keydata_len); + if (ret != 0) + goto error; + + tmp_ckey.ck_format = CRYPTO_KEY_RAW; + tmp_ckey.ck_data = enc_keydata; + tmp_ckey.ck_length = CRYPTO_BYTES2BITS(keydata_len); + + ckey = &tmp_ckey; + tmpl = NULL; + } + + /* perform the encryption / decryption */ + ret = zio_do_crypt_uio(encrypt, key->zk_crypt, ckey, tmpl, iv, enc_len, + &puio, &cuio, authbuf, auth_len); + if (ret != 0) + goto error; + + if (locked) { + rw_exit(&key->zk_salt_lock); + locked = B_FALSE; + } + + if (authbuf != NULL) + zio_buf_free(authbuf, datalen); + if (ckey == &tmp_ckey) + bzero(enc_keydata, keydata_len); + zio_crypt_destroy_uio(&puio); + zio_crypt_destroy_uio(&cuio); + + return (0); + +error: + if (!encrypt) { + if (failed_decrypt_buf != NULL) + kmem_free(failed_decrypt_buf, failed_decrypt_size); + failed_decrypt_buf = kmem_alloc(datalen, KM_SLEEP); + failed_decrypt_size = datalen; + bcopy(cipherbuf, failed_decrypt_buf, datalen); + } + if (locked) + rw_exit(&key->zk_salt_lock); + if (authbuf != NULL) + zio_buf_free(authbuf, datalen); + if (ckey == &tmp_ckey) + bzero(enc_keydata, keydata_len); + zio_crypt_destroy_uio(&puio); + zio_crypt_destroy_uio(&cuio); + + return (ret); +} + +/* + * Simple wrapper around zio_do_crypt_data() to work with abd's instead of + * linear buffers. + */ +int +zio_do_crypt_abd(boolean_t encrypt, zio_crypt_key_t *key, dmu_object_type_t ot, + boolean_t byteswap, uint8_t *salt, uint8_t *iv, uint8_t *mac, + uint_t datalen, abd_t *pabd, abd_t *cabd, boolean_t *no_crypt) +{ + int ret; + void *ptmp, *ctmp; + + if (encrypt) { + ptmp = abd_borrow_buf_copy(pabd, datalen); + ctmp = abd_borrow_buf(cabd, datalen); + } else { + ptmp = abd_borrow_buf(pabd, datalen); + ctmp = abd_borrow_buf_copy(cabd, datalen); + } + + ret = zio_do_crypt_data(encrypt, key, ot, byteswap, salt, iv, mac, + datalen, ptmp, ctmp, no_crypt); + if (ret != 0) + goto error; + + if (encrypt) { + abd_return_buf(pabd, ptmp, datalen); + abd_return_buf_copy(cabd, ctmp, datalen); + } else { + abd_return_buf_copy(pabd, ptmp, datalen); + abd_return_buf(cabd, ctmp, datalen); + } + + return (0); + +error: + if (encrypt) { + abd_return_buf(pabd, ptmp, datalen); + abd_return_buf_copy(cabd, ctmp, datalen); + } else { + abd_return_buf_copy(pabd, ptmp, datalen); + abd_return_buf(cabd, ctmp, datalen); + } + + return (ret); +} diff --git a/usr/src/uts/common/fs/zfs/zio_inject.c b/usr/src/uts/common/fs/zfs/zio_inject.c index 71b859bc3d..f13fb18c16 100644 --- a/usr/src/uts/common/fs/zfs/zio_inject.c +++ b/usr/src/uts/common/fs/zfs/zio_inject.c @@ -194,6 +194,37 @@ zio_match_dva(zio_t *zio) /* + * Inject a decryption failure. Decryption failures can occur in + * both the ARC and the ZIO layers. + */ +int +zio_handle_decrypt_injection(spa_t *spa, const zbookmark_phys_t *zb, + uint64_t type, int error) +{ + int ret = 0; + inject_handler_t *handler; + + rw_enter(&inject_lock, RW_READER); + + for (handler = list_head(&inject_handlers); handler != NULL; + handler = list_next(&inject_handlers, handler)) { + + if (spa != handler->zi_spa || + handler->zi_record.zi_cmd != ZINJECT_DECRYPT_FAULT) + continue; + + if (zio_match_handler((zbookmark_phys_t *)zb, type, ZI_NO_DVA, + &handler->zi_record, error)) { + ret = error; + break; + } + } + + rw_exit(&inject_lock); + return (ret); +} + +/* * Determine if the I/O in question should return failure. Returns the errno * to be returned to the caller. */ diff --git a/usr/src/uts/common/fs/zfs/zvol.c b/usr/src/uts/common/fs/zfs/zvol.c index c6d9378649..2ac660e9f7 100644 --- a/usr/src/uts/common/fs/zfs/zvol.c +++ b/usr/src/uts/common/fs/zfs/zvol.c @@ -396,6 +396,7 @@ zvol_replay_truncate(void *arg1, void *arg2, boolean_t byteswap) * Replay a TX_WRITE ZIL transaction that didn't get committed * after a system failure */ +/* ARGSUSED */ static int zvol_replay_write(void *arg1, void *arg2, boolean_t byteswap) { @@ -504,7 +505,7 @@ zvol_create_minor(const char *name) } /* lie and say we're read-only */ - error = dmu_objset_own(name, DMU_OST_ZVOL, B_TRUE, FTAG, &os); + error = dmu_objset_own(name, DMU_OST_ZVOL, B_TRUE, B_TRUE, FTAG, &os); if (error) { mutex_exit(&zfsdev_state_lock); @@ -512,13 +513,13 @@ zvol_create_minor(const char *name) } if ((minor = zfsdev_minor_alloc()) == 0) { - dmu_objset_disown(os, FTAG); + dmu_objset_disown(os, 1, FTAG); mutex_exit(&zfsdev_state_lock); return (SET_ERROR(ENXIO)); } if (ddi_soft_state_zalloc(zfsdev_state, minor) != DDI_SUCCESS) { - dmu_objset_disown(os, FTAG); + dmu_objset_disown(os, 1, FTAG); mutex_exit(&zfsdev_state_lock); return (SET_ERROR(EAGAIN)); } @@ -530,7 +531,7 @@ zvol_create_minor(const char *name) if (ddi_create_minor_node(zfs_dip, chrbuf, S_IFCHR, minor, DDI_PSEUDO, 0) == DDI_FAILURE) { ddi_soft_state_free(zfsdev_state, minor); - dmu_objset_disown(os, FTAG); + dmu_objset_disown(os, 1, FTAG); mutex_exit(&zfsdev_state_lock); return (SET_ERROR(EAGAIN)); } @@ -541,7 +542,7 @@ zvol_create_minor(const char *name) minor, DDI_PSEUDO, 0) == DDI_FAILURE) { ddi_remove_minor_node(zfs_dip, chrbuf); ddi_soft_state_free(zfsdev_state, minor); - dmu_objset_disown(os, FTAG); + dmu_objset_disown(os, 1, FTAG); mutex_exit(&zfsdev_state_lock); return (SET_ERROR(EAGAIN)); } @@ -569,7 +570,7 @@ zvol_create_minor(const char *name) else zil_replay(os, zv, zvol_replay_vector); } - dmu_objset_disown(os, FTAG); + dmu_objset_disown(os, 1, FTAG); zv->zv_objset = NULL; zvol_minors++; @@ -633,7 +634,7 @@ zvol_first_open(zvol_state_t *zv) uint64_t readonly; /* lie and say we're read-only */ - error = dmu_objset_own(zv->zv_name, DMU_OST_ZVOL, B_TRUE, + error = dmu_objset_own(zv->zv_name, DMU_OST_ZVOL, B_TRUE, B_TRUE, zvol_tag, &os); if (error) return (error); @@ -642,13 +643,13 @@ zvol_first_open(zvol_state_t *zv) error = zap_lookup(os, ZVOL_ZAP_OBJ, "size", 8, 1, &volsize); if (error) { ASSERT(error == 0); - dmu_objset_disown(os, zvol_tag); + dmu_objset_disown(os, 1, zvol_tag); return (error); } error = dnode_hold(os, ZVOL_OBJ, zvol_tag, &zv->zv_dn); if (error) { - dmu_objset_disown(os, zvol_tag); + dmu_objset_disown(os, 1, zvol_tag); return (error); } @@ -682,7 +683,7 @@ zvol_last_close(zvol_state_t *zv) txg_wait_synced(dmu_objset_pool(zv->zv_objset), 0); dmu_objset_evict_dbufs(zv->zv_objset); - dmu_objset_disown(zv->zv_objset, zvol_tag); + dmu_objset_disown(zv->zv_objset, 1, zvol_tag); zv->zv_objset = NULL; } @@ -730,6 +731,7 @@ zvol_update_volsize(objset_t *os, uint64_t volsize) { dmu_tx_t *tx; int error; + uint64_t txg; ASSERT(MUTEX_HELD(&zfsdev_state_lock)); @@ -741,11 +743,14 @@ zvol_update_volsize(objset_t *os, uint64_t volsize) dmu_tx_abort(tx); return (error); } + txg = dmu_tx_get_txg(tx); error = zap_update(os, ZVOL_ZAP_OBJ, "size", 8, 1, &volsize, tx); dmu_tx_commit(tx); + txg_wait_synced(dmu_objset_pool(os), txg); + if (error == 0) error = dmu_free_long_range(os, ZVOL_OBJ, volsize, DMU_OBJECT_END); @@ -850,7 +855,7 @@ zvol_set_volsize(const char *name, uint64_t volsize) zv = zvol_minor_lookup(name); if (zv == NULL || zv->zv_objset == NULL) { - if ((error = dmu_objset_own(name, DMU_OST_ZVOL, B_FALSE, + if ((error = dmu_objset_own(name, DMU_OST_ZVOL, B_FALSE, B_TRUE, FTAG, &os)) != 0) { mutex_exit(&zfsdev_state_lock); return (error); @@ -872,7 +877,7 @@ zvol_set_volsize(const char *name, uint64_t volsize) error = zvol_update_live_volsize(zv, volsize); out: if (owned) { - dmu_objset_disown(os, FTAG); + dmu_objset_disown(os, B_TRUE, FTAG); if (zv != NULL) zv->zv_objset = NULL; } @@ -901,7 +906,12 @@ zvol_open(dev_t *devp, int flag, int otyp, cred_t *cr) mutex_exit(&zfsdev_state_lock); return (err); } - if ((flag & FWRITE) && (zv->zv_flags & ZVOL_RDONLY)) { + /* + * Check for a bad on-disk format version now since we + * lied about owning the dataset readonly before. + */ + if ((flag & FWRITE) && ((zv->zv_flags & ZVOL_RDONLY) || + dmu_objset_incompatible_encryption_version(zv->zv_objset))) { err = SET_ERROR(EROFS); goto out; } @@ -2099,6 +2109,9 @@ zvol_dumpify(zvol_state_t *zv) if (zv->zv_flags & ZVOL_RDONLY) return (SET_ERROR(EROFS)); + if (os->os_encrypted) + return (SET_ERROR(ENOTSUP)); + if (zap_lookup(zv->zv_objset, ZVOL_ZAP_OBJ, ZVOL_DUMPSIZE, 8, 1, &dumpsize) != 0 || dumpsize != zv->zv_volsize) { boolean_t resize = (dumpsize > 0); diff --git a/usr/src/uts/common/io/comstar/lu/stmf_sbd/sbd_zvol.c b/usr/src/uts/common/io/comstar/lu/stmf_sbd/sbd_zvol.c index 1f4dd29b18..b7c8874116 100644 --- a/usr/src/uts/common/io/comstar/lu/stmf_sbd/sbd_zvol.c +++ b/usr/src/uts/common/io/comstar/lu/stmf_sbd/sbd_zvol.c @@ -367,8 +367,8 @@ sbd_zvol_rele_write_bufs(sbd_lu_t *sl, stmf_data_buf_t *dbuf) abuf = abp[i]; size = arc_buf_size(abuf); - dmu_assign_arcbuf_dnode(sl->sl_zvol_dn_hdl, toffset, abuf, - tx); + (void) dmu_assign_arcbuf_by_dnode(sl->sl_zvol_dn_hdl, + toffset, abuf, tx); toffset += size; resid -= size; } diff --git a/usr/src/uts/common/os/printf.c b/usr/src/uts/common/os/printf.c index 1870adf4b4..42669f096d 100644 --- a/usr/src/uts/common/os/printf.c +++ b/usr/src/uts/common/os/printf.c @@ -65,7 +65,7 @@ uint32_t panicbuf_log = PANICBUFSIZE; uint32_t panicbuf_index = PANICBUFSIZE; -static int aask, aok; +int aask, aok; static int ce_to_sl[CE_IGNORE] = { SL_NOTE, SL_NOTE, SL_WARN, SL_FATAL }; static char ce_prefix[CE_IGNORE][10] = { "", "NOTICE: ", "WARNING: ", "" }; static char ce_suffix[CE_IGNORE][2] = { "", "\n", "\n", "" }; diff --git a/usr/src/uts/common/sys/fm/fs/zfs.h b/usr/src/uts/common/sys/fm/fs/zfs.h index 029af540b3..51a12f6817 100644 --- a/usr/src/uts/common/sys/fm/fs/zfs.h +++ b/usr/src/uts/common/sys/fm/fs/zfs.h @@ -33,6 +33,7 @@ extern "C" { #define ZFS_ERROR_CLASS "fs.zfs" #define FM_EREPORT_ZFS_CHECKSUM "checksum" +#define FM_EREPORT_ZFS_AUTHENTICATION "authentication" #define FM_EREPORT_ZFS_IO "io" #define FM_EREPORT_ZFS_DATA "data" #define FM_EREPORT_ZFS_POOL "zpool" diff --git a/usr/src/uts/common/sys/fs/zfs.h b/usr/src/uts/common/sys/fs/zfs.h index 9942a4c561..a5b311e4f1 100644 --- a/usr/src/uts/common/sys/fs/zfs.h +++ b/usr/src/uts/common/sys/fs/zfs.h @@ -165,6 +165,15 @@ typedef enum { ZFS_PROP_RECEIVE_RESUME_TOKEN, ZFS_PROP_REMAPTXG, /* not exposed to the user */ ZFS_PROP_SPECIAL_SMALL_BLOCKS, + ZFS_PROP_ENCRYPTION, + ZFS_PROP_KEYLOCATION, + ZFS_PROP_KEYFORMAT, + ZFS_PROP_PBKDF2_SALT, + ZFS_PROP_PBKDF2_ITERS, + ZFS_PROP_ENCRYPTION_ROOT, + ZFS_PROP_KEY_GUID, + ZFS_PROP_KEYSTATUS, + ZFS_PROP_IVSET_GUID, /* not exposed to the user */ ZFS_NUM_PROPS } zfs_prop_t; @@ -275,6 +284,8 @@ boolean_t zfs_prop_readonly(zfs_prop_t); boolean_t zfs_prop_visible(zfs_prop_t prop); boolean_t zfs_prop_inheritable(zfs_prop_t); boolean_t zfs_prop_setonce(zfs_prop_t); +boolean_t zfs_prop_encryption_key_param(zfs_prop_t); +boolean_t zfs_prop_valid_keylocation(const char *, boolean_t); const char *zfs_prop_to_name(zfs_prop_t); zfs_prop_t zfs_name_to_prop(const char *); boolean_t zfs_prop_user(const char *); @@ -385,6 +396,30 @@ typedef enum { ZFS_REDUNDANT_METADATA_MOST } zfs_redundant_metadata_type_t; +typedef enum zfs_keystatus { + ZFS_KEYSTATUS_NONE = 0, + ZFS_KEYSTATUS_UNAVAILABLE, + ZFS_KEYSTATUS_AVAILABLE +} zfs_keystatus_t; + +typedef enum zfs_keyformat { + ZFS_KEYFORMAT_NONE = 0, + ZFS_KEYFORMAT_RAW, + ZFS_KEYFORMAT_HEX, + ZFS_KEYFORMAT_PASSPHRASE, + ZFS_KEYFORMAT_FORMATS +} zfs_keyformat_t; + +typedef enum zfs_key_location { + ZFS_KEYLOCATION_NONE = 0, + ZFS_KEYLOCATION_PROMPT, + ZFS_KEYLOCATION_URI, + ZFS_KEYLOCATION_LOCATIONS +} zfs_keylocation_t; + +#define DEFAULT_PBKDF2_ITERATIONS 350000 +#define MIN_PBKDF2_ITERATIONS 100000 + /* * On-disk version number. */ @@ -594,6 +629,7 @@ typedef struct zpool_load_policy { #define ZPOOL_CONFIG_CAN_RDONLY "can_rdonly" /* not stored on disk */ #define ZPOOL_CONFIG_FEATURES_FOR_READ "features_for_read" #define ZPOOL_CONFIG_FEATURE_STATS "feature_stats" /* not stored on disk */ +#define ZPOOL_CONFIG_ERRATA "errata" /* not stored on disk */ #define ZPOOL_CONFIG_VDEV_TOP_ZAP "com.delphix:vdev_zap_top" #define ZPOOL_CONFIG_VDEV_LEAF_ZAP "com.delphix:vdev_zap_leaf" #define ZPOOL_CONFIG_HAS_PER_VDEV_ZAPS "com.delphix:has_per_vdev_zaps" @@ -825,6 +861,20 @@ typedef struct pool_scan_stat { uint64_t pss_issued; /* total bytes checked by scanner */ } pool_scan_stat_t; +/* + * Errata described by http://zfsonlinux.org/msg/ZFS-8000-ER. The ordering + * of this enum must be maintained to ensure the errata identifiers map to + * the correct documentation. New errata may only be appended to the list + * and must contain corresponding documentation at the above link. + */ +typedef enum zpool_errata { + ZPOOL_ERRATA_NONE, + ZPOOL_ERRATA_ZOL_2094_SCRUB, + ZPOOL_ERRATA_ZOL_2094_ASYNC_DESTROY, + ZPOOL_ERRATA_ZOL_6845_ENCRYPTION, + ZPOOL_ERRATA_ZOL_8308_ENCRYPTION, +} zpool_errata_t; + typedef struct pool_removal_stat { uint64_t prs_state; /* dsl_scan_state_t */ uint64_t prs_removing_vdev; @@ -1025,6 +1075,9 @@ typedef enum zfs_ioc { ZFS_IOC_POOL_DISCARD_CHECKPOINT, ZFS_IOC_POOL_INITIALIZE, ZFS_IOC_POOL_SYNC, + ZFS_IOC_LOAD_KEY, + ZFS_IOC_UNLOAD_KEY, + ZFS_IOC_CHANGE_KEY, ZFS_IOC_LAST } zfs_ioc_t; @@ -1041,7 +1094,10 @@ typedef enum { ZFS_ERR_DISCARDING_CHECKPOINT, ZFS_ERR_NO_CHECKPOINT, ZFS_ERR_DEVRM_IN_PROGRESS, - ZFS_ERR_VDEV_TOO_BIG + ZFS_ERR_VDEV_TOO_BIG, + ZFS_ERR_FROM_IVSET_GUID_MISSING, + ZFS_ERR_FROM_IVSET_GUID_MISMATCH, + ZFS_ERR_SPILL_BLOCK_FLAG_MISSING, } zfs_errno_t; /* @@ -1057,6 +1113,20 @@ typedef enum { SPA_LOAD_CREATE /* creation in progress */ } spa_load_state_t; +/* supported encryption algorithms */ +enum zio_encrypt { + ZIO_CRYPT_INHERIT = 0, + ZIO_CRYPT_ON, + ZIO_CRYPT_OFF, + ZIO_CRYPT_AES_128_CCM, + ZIO_CRYPT_AES_192_CCM, + ZIO_CRYPT_AES_256_CCM, + ZIO_CRYPT_AES_128_GCM, + ZIO_CRYPT_AES_192_GCM, + ZIO_CRYPT_AES_256_GCM, + ZIO_CRYPT_FUNCTIONS +}; + /* * Bookmark name values. */ @@ -1094,6 +1164,12 @@ typedef enum { #define ZPOOL_INITIALIZE_VDEVS "initialize_vdevs" /* + * Special nvlist name that will not have its args recorded in the pool's + * history log. + */ +#define ZPOOL_HIDDEN_ARGS "hidden_args" + +/* * Flags for ZFS_IOC_VDEV_SET_STATE */ #define ZFS_ONLINE_CHECKREMOVE 0x1 @@ -1113,6 +1189,7 @@ typedef enum { #define ZFS_IMPORT_CHECKPOINT 0x10 #define ZFS_IMPORT_TEMP_NAME 0x20 #define ZFS_IMPORT_SKIP_MMP 0x40 +#define ZFS_IMPORT_LOAD_KEYS 0x80 /* * Channel program argument/return nvlist keys and defaults. diff --git a/usr/src/uts/common/sys/mount.h b/usr/src/uts/common/sys/mount.h index 07151d1d13..b8f67a8e25 100644 --- a/usr/src/uts/common/sys/mount.h +++ b/usr/src/uts/common/sys/mount.h @@ -59,6 +59,12 @@ extern "C" { #define MS_NOSPLICE 0x1000 /* Don't splice fs instance into name space */ #define MS_NOCHECK 0x2000 /* Clustering: suppress mount busy checks */ /* + * MS_CRYPT indicates that encryption keys should be loaded if they are not + * already available. This is not defined in glibc, but it is never seen by + * the kernel so it will not cause any problems. + */ +#define MS_CRYPT 0x4000 +/* * Mask to sift out flag bits allowable from mount(2). */ #define MS_MASK \ |