8727 Native data and metadata encryption for zfs

Portions contributed by: Jorgen Lundman <lundman@lundman.net> Portions contributed by: Jerry Jelinek <jerry.jelinek@joyent.com> Portions contributed by: Paul Zuchowski <pzuchowski@datto.com> Portions contributed by: Tim Chase <tim@chase2k.com> Portions contributed by: Matthew Ahrens <mahrens@delphix.com> Portions contributed by: ab-oe <arkadiusz.bubala@open-e.com> Portions contributed by: Brian Behlendorf <behlendorf1@llnl.gov> Portions contributed by: loli10K <ezomori.nozomu@gmail.com> Portions contributed by: Igor K <igor@dilos.org> Portions contributed by: Richard Laager <rlaager@wiktel.com> Reviewed by: Jason Cohen <jwittlincohen@gmail.com> Reviewed by: Allan Jude <allanjude@freebsd.org> Reviewed by: George Melikov <mail@gmelikov.ru> Reviewed by: Paul Dagnelie <pcd@delphix.com> Reviewed by: RageLtMan <rageltman@sempervictus> Reviewed by: Matthew Thode <prometheanfire@gentoo.org> Reviewed by: Giuseppe Di Natale <dinatale2@llnl.gov> Reviewed by: Kash Pande <kash@tripleback.net> Reviewed by: Alek Pinchuk <apinchuk@datto.com> Reviewed by: Dan Kimmel <dan.kimmel@delphix.com> Reviewed by: David Quigley <david.quigley@intel.com> Reviewed by: Jorgen Lundman <lundman@lundman.net> Reviewed by: Matthew Ahrens <mahrens@delphix.com> Reviewed by: Brian Behlendorf <behlendorf1@llnl.gov> Reviewed by: Toomas Soome <tsoome@me.com> Reviewed by: C Fraire <cfraire@me.com> Reviewed by: Jason King <jason.king@joyent.com> Reviewed by: Andy Stormont <astormont@racktopsystems.com> Approved by: Garrett D'Amore <garrett@damore.org>
author: Tom Caputi <tcaputi@datto.com> 2019-06-25 19:39:35 +0000
committer: Jerry Jelinek <jerry.jelinek@joyent.com> 2019-06-25 19:40:06 +0000
commit: eb633035c80613ec93d62f90482837adaaf21a0a (patch)
tree: 67f2e3e15231d06a3525ce3958bbce24aa3de7e8 /usr/src/uts/common/fs
parent: 07eb1aef88b873c5c1036d9cf69820c1ef6a32fb (diff)
download: illumos-joyent-eb633035c80613ec93d62f90482837adaaf21a0a.tar.gz
74 files changed, 11514 insertions, 1205 deletions
diff --git a/usr/src/uts/common/fs/zfs/abd.c b/usr/src/uts/common/fs/zfs/abd.c
index 0ab3513718..5417514e41 100644
--- a/usr/src/uts/common/fs/zfs/abd.c
+++ b/usr/src/uts/common/fs/zfs/abd.c
@@ -427,8 +427,9 @@ abd_alloc_for_io(size_t size, boolean_t is_metadata)
  * buffer data with sabd. Use abd_put() to free. sabd must not be freed while
  * any derived ABDs exist.
  */
-abd_t *
-abd_get_offset(abd_t *sabd, size_t off)
+/* ARGSUSED */
+static inline abd_t *
+abd_get_offset_impl(abd_t *sabd, size_t off, size_t size)
 {
 	abd_t *abd;
 
@@ -480,6 +481,25 @@ abd_get_offset(abd_t *sabd, size_t off)
 	return (abd);
 }
 
+abd_t *
+abd_get_offset(abd_t *sabd, size_t off)
+{
+	size_t size = sabd->abd_size > off ? sabd->abd_size - off : 0;
+
+	VERIFY3U(size, >, 0);
+
+	return (abd_get_offset_impl(sabd, off, size));
+}
+
+abd_t *
+abd_get_offset_size(abd_t *sabd, size_t off, size_t size)
+{
+	ASSERT3U(off + size, <=, sabd->abd_size);
+
+	return (abd_get_offset_impl(sabd, off, size));
+}
+
+
 /*
  * Allocate a linear ABD structure for buf. You must free this with abd_put()
  * since the resulting ABD doesn't own its own buffer.
diff --git a/usr/src/uts/common/fs/zfs/arc.c b/usr/src/uts/common/fs/zfs/arc.c
index 3a07d72d93..90f5314d81 100644
--- a/usr/src/uts/common/fs/zfs/arc.c
+++ b/usr/src/uts/common/fs/zfs/arc.c
@@ -250,6 +250,21 @@
  * ARC is disabled, then the L2ARC's block must be transformed to look
  * like the physical block in the main data pool before comparing the
  * checksum and determining its validity.
+ *
+ * The L1ARC has a slightly different system for storing encrypted data.
+ * Raw (encrypted + possibly compressed) data has a few subtle differences from
+ * data that is just compressed. The biggest difference is that it is not
+ * possible to decrypt encrypted data (or visa versa) if the keys aren't loaded.
+ * The other difference is that encryption cannot be treated as a suggestion.
+ * If a caller would prefer compressed data, but they actually wind up with
+ * uncompressed data the worst thing that could happen is there might be a
+ * performance hit. If the caller requests encrypted data, however, we must be
+ * sure they actually get it or else secret information could be leaked. Raw
+ * data is stored in hdr->b_crypt_hdr.b_rabd. An encrypted header, therefore,
+ * may have both an encrypted version and a decrypted version of its data at
+ * once. When a caller needs a raw arc_buf_t, it is allocated and the data is
+ * copied out of this header. To avoid complications with b_pabd, raw buffers
+ * cannot be shared.
  */
 
 #include <sys/spa.h>
@@ -266,6 +281,8 @@
 #include <sys/zio_checksum.h>
 #include <sys/multilist.h>
 #include <sys/abd.h>
+#include <sys/zil.h>
+#include <sys/fm/fs/zfs.h>
 #ifdef _KERNEL
 #include <sys/vmsystm.h>
 #include <vm/anon.h>
@@ -481,7 +498,7 @@ typedef struct arc_stats {
 	kstat_named_t arcstat_evict_skip;
 	/*
 	 * Number of times arc_evict_state() was unable to evict enough
-	 * buffers to reach it's target amount.
+	 * buffers to reach its target amount.
 	 */
 	kstat_named_t arcstat_evict_not_enough;
 	kstat_named_t arcstat_evict_l2_cached;
@@ -883,7 +900,10 @@ struct arc_callback {
 	void			*acb_private;
 	arc_read_done_func_t	*acb_done;
 	arc_buf_t		*acb_buf;
+	boolean_t		acb_encrypted;
 	boolean_t		acb_compressed;
+	boolean_t		acb_noauth;
+	zbookmark_phys_t	acb_zb;
 	zio_t			*acb_zio_dummy;
 	zio_t			*acb_zio_head;
 	arc_callback_t		*acb_next;
@@ -963,6 +983,36 @@ typedef struct l1arc_buf_hdr {
 	abd_t			*b_pabd;
 } l1arc_buf_hdr_t;
 
+/*
+ * Encrypted blocks will need to be stored encrypted on the L2ARC
+ * disk as they appear in the main pool. In order for this to work we
+ * need to pass around the encryption parameters so they can be used
+ * to write data to the L2ARC. This struct is only defined in the
+ * arc_buf_hdr_t if the L1 header is defined and has the ARC_FLAG_ENCRYPTED
+ * flag set.
+ */
+typedef struct arc_buf_hdr_crypt {
+	abd_t		*b_rabd;		/* raw encrypted data */
+	dmu_object_type_t	b_ot;		/* object type */
+	uint32_t		b_ebufcnt;	/* number or encryped buffers */
+
+	/* dsobj for looking up encryption key for l2arc encryption */
+	uint64_t		b_dsobj;	/* for looking up key */
+
+	/* encryption parameters */
+	uint8_t		b_salt[ZIO_DATA_SALT_LEN];
+	uint8_t		b_iv[ZIO_DATA_IV_LEN];
+
+	/*
+	 * Technically this could be removed since we will always be able to
+	 * get the mac from the bp when we need it. However, it is inconvenient
+	 * for callers of arc code to have to pass a bp in all the time. This
+	 * also allows us to assert that L2ARC data is properly encrypted to
+	 * match the data in the main storage pool.
+	 */
+	uint8_t		b_mac[ZIO_DATA_MAC_LEN];
+} arc_buf_hdr_crypt_t;
+
 typedef struct l2arc_dev l2arc_dev_t;
 
 typedef struct l2arc_buf_hdr {
@@ -1013,6 +1063,11 @@ struct arc_buf_hdr {
 	l2arc_buf_hdr_t		b_l2hdr;
 	/* L1ARC fields. Undefined when in l2arc_only state */
 	l1arc_buf_hdr_t		b_l1hdr;
+	/*
+	 * Encryption parameters. Defined only when ARC_FLAG_ENCRYPTED
+	 * is set and the L1 header exists.
+	 */
+	arc_buf_hdr_crypt_t b_crypt_hdr;
 };
 
 #define	GHOST_STATE(state)	\
@@ -1035,6 +1090,8 @@ struct arc_buf_hdr {
 #define	HDR_L2_WRITING(hdr)	((hdr)->b_flags & ARC_FLAG_L2_WRITING)
 #define	HDR_L2_EVICTED(hdr)	((hdr)->b_flags & ARC_FLAG_L2_EVICTED)
 #define	HDR_L2_WRITE_HEAD(hdr)	((hdr)->b_flags & ARC_FLAG_L2_WRITE_HEAD)
+#define	HDR_PROTECTED(hdr)	((hdr)->b_flags & ARC_FLAG_PROTECTED)
+#define	HDR_NOAUTH(hdr)		((hdr)->b_flags & ARC_FLAG_NOAUTH)
 #define	HDR_SHARED_DATA(hdr)	((hdr)->b_flags & ARC_FLAG_SHARED_DATA)
 
 #define	HDR_ISTYPE_METADATA(hdr)	\
@@ -1043,6 +1100,13 @@ struct arc_buf_hdr {
 
 #define	HDR_HAS_L1HDR(hdr)	((hdr)->b_flags & ARC_FLAG_HAS_L1HDR)
 #define	HDR_HAS_L2HDR(hdr)	((hdr)->b_flags & ARC_FLAG_HAS_L2HDR)
+#define	HDR_HAS_RABD(hdr)	\
+	(HDR_HAS_L1HDR(hdr) && HDR_PROTECTED(hdr) &&	\
+	(hdr)->b_crypt_hdr.b_rabd != NULL)
+#define	HDR_ENCRYPTED(hdr)	\
+	(HDR_PROTECTED(hdr) && DMU_OT_IS_ENCRYPTED((hdr)->b_crypt_hdr.b_ot))
+#define	HDR_AUTHENTICATED(hdr)	\
+	(HDR_PROTECTED(hdr) && !DMU_OT_IS_ENCRYPTED((hdr)->b_crypt_hdr.b_ot))
 
 /* For storing compression mode in b_flags */
 #define	HDR_COMPRESS_OFFSET	(highbit64(ARC_FLAG_COMPRESS_0) - 1)
@@ -1055,12 +1119,14 @@ struct arc_buf_hdr {
 #define	ARC_BUF_LAST(buf)	((buf)->b_next == NULL)
 #define	ARC_BUF_SHARED(buf)	((buf)->b_flags & ARC_BUF_FLAG_SHARED)
 #define	ARC_BUF_COMPRESSED(buf)	((buf)->b_flags & ARC_BUF_FLAG_COMPRESSED)
+#define	ARC_BUF_ENCRYPTED(buf)	((buf)->b_flags & ARC_BUF_FLAG_ENCRYPTED)
 
 /*
  * Other sizes
  */
 
-#define	HDR_FULL_SIZE ((int64_t)sizeof (arc_buf_hdr_t))
+#define	HDR_FULL_CRYPT_SIZE ((int64_t)sizeof (arc_buf_hdr_t))
+#define	HDR_FULL_SIZE ((int64_t)offsetof(arc_buf_hdr_t, b_crypt_hdr))
 #define	HDR_L2ONLY_SIZE ((int64_t)offsetof(arc_buf_hdr_t, b_l1hdr))
 
 /*
@@ -1174,13 +1240,21 @@ static kcondvar_t l2arc_feed_thr_cv;
 static uint8_t l2arc_thread_exit;
 
 static abd_t *arc_get_data_abd(arc_buf_hdr_t *, uint64_t, void *);
+typedef enum arc_fill_flags {
+	ARC_FILL_LOCKED		= 1 << 0, /* hdr lock is held */
+	ARC_FILL_COMPRESSED	= 1 << 1, /* fill with compressed data */
+	ARC_FILL_ENCRYPTED	= 1 << 2, /* fill with encrypted data */
+	ARC_FILL_NOAUTH		= 1 << 3, /* don't attempt to authenticate */
+	ARC_FILL_IN_PLACE	= 1 << 4  /* fill in place (special case) */
+} arc_fill_flags_t;
+
 static void *arc_get_data_buf(arc_buf_hdr_t *, uint64_t, void *);
 static void arc_get_data_impl(arc_buf_hdr_t *, uint64_t, void *);
 static void arc_free_data_abd(arc_buf_hdr_t *, abd_t *, uint64_t, void *);
 static void arc_free_data_buf(arc_buf_hdr_t *, void *, uint64_t, void *);
 static void arc_free_data_impl(arc_buf_hdr_t *hdr, uint64_t size, void *tag);
-static void arc_hdr_free_pabd(arc_buf_hdr_t *);
-static void arc_hdr_alloc_pabd(arc_buf_hdr_t *);
+static void arc_hdr_free_pabd(arc_buf_hdr_t *, boolean_t);
+static void arc_hdr_alloc_pabd(arc_buf_hdr_t *, boolean_t);
 static void arc_access(arc_buf_hdr_t *, kmutex_t *);
 static boolean_t arc_is_overflowing();
 static void arc_buf_watch(arc_buf_t *);
@@ -1323,7 +1397,9 @@ buf_hash_remove(arc_buf_hdr_t *hdr)
 /*
  * Global data structures and functions for the buf kmem cache.
  */
+
 static kmem_cache_t *hdr_full_cache;
+static kmem_cache_t *hdr_full_crypt_cache;
 static kmem_cache_t *hdr_l2only_cache;
 static kmem_cache_t *buf_cache;
 
@@ -1337,6 +1413,7 @@ buf_fini(void)
 	for (i = 0; i < BUF_LOCKS; i++)
 		mutex_destroy(&buf_hash_table.ht_locks[i].ht_lock);
 	kmem_cache_destroy(hdr_full_cache);
+	kmem_cache_destroy(hdr_full_crypt_cache);
 	kmem_cache_destroy(hdr_l2only_cache);
 	kmem_cache_destroy(buf_cache);
 }
@@ -1352,6 +1429,7 @@ hdr_full_cons(void *vbuf, void *unused, int kmflag)
 	arc_buf_hdr_t *hdr = vbuf;
 
 	bzero(hdr, HDR_FULL_SIZE);
+	hdr->b_l1hdr.b_byteswap = DMU_BSWAP_NUMFUNCS;
 	cv_init(&hdr->b_l1hdr.b_cv, NULL, CV_DEFAULT, NULL);
 	zfs_refcount_create(&hdr->b_l1hdr.b_refcnt);
 	mutex_init(&hdr->b_l1hdr.b_freeze_lock, NULL, MUTEX_DEFAULT, NULL);
@@ -1363,6 +1441,19 @@ hdr_full_cons(void *vbuf, void *unused, int kmflag)
 
 /* ARGSUSED */
 static int
+hdr_full_crypt_cons(void *vbuf, void *unused, int kmflag)
+{
+	arc_buf_hdr_t *hdr = vbuf;
+
+	(void) hdr_full_cons(vbuf, unused, kmflag);
+	bzero(&hdr->b_crypt_hdr, sizeof (hdr->b_crypt_hdr));
+	arc_space_consume(sizeof (hdr->b_crypt_hdr), ARC_SPACE_HDRS);
+
+	return (0);
+}
+
+/* ARGSUSED */
+static int
 hdr_l2only_cons(void *vbuf, void *unused, int kmflag)
 {
 	arc_buf_hdr_t *hdr = vbuf;
@@ -1406,6 +1497,16 @@ hdr_full_dest(void *vbuf, void *unused)
 
 /* ARGSUSED */
 static void
+hdr_full_crypt_dest(void *vbuf, void *unused)
+{
+	arc_buf_hdr_t *hdr = vbuf;
+
+	hdr_full_dest(hdr, unused);
+	arc_space_return(sizeof (hdr->b_crypt_hdr), ARC_SPACE_HDRS);
+}
+
+/* ARGSUSED */
+static void
 hdr_l2only_dest(void *vbuf, void *unused)
 {
 	arc_buf_hdr_t *hdr = vbuf;
@@ -1467,6 +1568,9 @@ retry:
 
 	hdr_full_cache = kmem_cache_create("arc_buf_hdr_t_full", HDR_FULL_SIZE,
 	    0, hdr_full_cons, hdr_full_dest, hdr_recl, NULL, NULL, 0);
+	hdr_full_crypt_cache = kmem_cache_create("arc_buf_hdr_t_full_crypt",
+	    HDR_FULL_CRYPT_SIZE, 0, hdr_full_crypt_cons, hdr_full_crypt_dest,
+	    hdr_recl, NULL, NULL, 0);
 	hdr_l2only_cache = kmem_cache_create("arc_buf_hdr_t_l2only",
 	    HDR_L2ONLY_SIZE, 0, hdr_l2only_cons, hdr_l2only_dest, hdr_recl,
 	    NULL, NULL, 0);
@@ -1501,6 +1605,47 @@ arc_buf_lsize(arc_buf_t *buf)
 	return (HDR_GET_LSIZE(buf->b_hdr));
 }
 
+/*
+ * This function will return B_TRUE if the buffer is encrypted in memory.
+ * This buffer can be decrypted by calling arc_untransform().
+ */
+boolean_t
+arc_is_encrypted(arc_buf_t *buf)
+{
+	return (ARC_BUF_ENCRYPTED(buf) != 0);
+}
+
+/*
+ * Returns B_TRUE if the buffer represents data that has not had its MAC
+ * verified yet.
+ */
+boolean_t
+arc_is_unauthenticated(arc_buf_t *buf)
+{
+	return (HDR_NOAUTH(buf->b_hdr) != 0);
+}
+
+void
+arc_get_raw_params(arc_buf_t *buf, boolean_t *byteorder, uint8_t *salt,
+    uint8_t *iv, uint8_t *mac)
+{
+	arc_buf_hdr_t *hdr = buf->b_hdr;
+
+	ASSERT(HDR_PROTECTED(hdr));
+
+	bcopy(hdr->b_crypt_hdr.b_salt, salt, ZIO_DATA_SALT_LEN);
+	bcopy(hdr->b_crypt_hdr.b_iv, iv, ZIO_DATA_IV_LEN);
+	bcopy(hdr->b_crypt_hdr.b_mac, mac, ZIO_DATA_MAC_LEN);
+	*byteorder = (hdr->b_l1hdr.b_byteswap == DMU_BSWAP_NUMFUNCS) ?
+	    /* CONSTCOND */
+	    ZFS_HOST_BYTEORDER : !ZFS_HOST_BYTEORDER;
+}
+
+/*
+ * Indicates how this buffer is compressed in memory. If it is not compressed
+ * the value will be ZIO_COMPRESS_OFF. It can be made normally readable with
+ * arc_untransform() as long as it is also unencrypted.
+ */
 enum zio_compress
 arc_get_compression(arc_buf_t *buf)
 {
@@ -1510,6 +1655,18 @@ arc_get_compression(arc_buf_t *buf)
 
 #define	ARC_MINTIME	(hz>>4) /* 62 ms */
 
+/*
+ * Return the compression algorithm used to store this data in the ARC. If ARC
+ * compression is enabled or this is an encrypted block, this will be the same
+ * as what's used to store it on-disk. Otherwise, this will be ZIO_COMPRESS_OFF.
+ */
+static inline enum zio_compress
+arc_hdr_get_compress(arc_buf_hdr_t *hdr)
+{
+	return (HDR_COMPRESSION_ENABLED(hdr) ?
+	    HDR_GET_COMPRESS(hdr) : ZIO_COMPRESS_OFF);
+}
+
 static inline boolean_t
 arc_buf_is_shared(arc_buf_t *buf)
 {
@@ -1537,6 +1694,7 @@ static inline void
 arc_cksum_free(arc_buf_hdr_t *hdr)
 {
 	ASSERT(HDR_HAS_L1HDR(hdr));
+
 	mutex_enter(&hdr->b_l1hdr.b_freeze_lock);
 	if (hdr->b_l1hdr.b_freeze_cksum != NULL) {
 		kmem_free(hdr->b_l1hdr.b_freeze_cksum, sizeof (zio_cksum_t));
@@ -1547,6 +1705,7 @@ arc_cksum_free(arc_buf_hdr_t *hdr)
 
 /*
  * Return true iff at least one of the bufs on hdr is not compressed.
+ * Encrypted buffers count as compressed.
  */
 static boolean_t
 arc_hdr_has_uncompressed_buf(arc_buf_hdr_t *hdr)
@@ -1593,6 +1752,11 @@ arc_cksum_verify(arc_buf_t *buf)
 	mutex_exit(&hdr->b_l1hdr.b_freeze_lock);
 }
 
+/*
+ * This function makes the assumption that data stored in the L2ARC
+ * will be transformed exactly as it is in the main pool. Because of
+ * this we can verify the checksum against the reading process's bp.
+ */
 static boolean_t
 arc_cksum_is_equal(arc_buf_hdr_t *hdr, zio_t *zio)
 {
@@ -1689,6 +1853,7 @@ arc_cksum_compute(arc_buf_t *buf)
 		return;
 	}
 
+	ASSERT(!ARC_BUF_ENCRYPTED(buf));
 	ASSERT(!ARC_BUF_COMPRESSED(buf));
 	hdr->b_l1hdr.b_freeze_cksum = kmem_alloc(sizeof (zio_cksum_t),
 	    KM_SLEEP);
@@ -1881,15 +2046,14 @@ arc_hdr_set_compress(arc_buf_hdr_t *hdr, enum zio_compress cmp)
 	 */
 	if (!zfs_compressed_arc_enabled || HDR_GET_PSIZE(hdr) == 0) {
 		arc_hdr_clear_flags(hdr, ARC_FLAG_COMPRESSED_ARC);
-		HDR_SET_COMPRESS(hdr, ZIO_COMPRESS_OFF);
 		ASSERT(!HDR_COMPRESSION_ENABLED(hdr));
-		ASSERT3U(HDR_GET_COMPRESS(hdr), ==, ZIO_COMPRESS_OFF);
 	} else {
 		arc_hdr_set_flags(hdr, ARC_FLAG_COMPRESSED_ARC);
-		HDR_SET_COMPRESS(hdr, cmp);
-		ASSERT3U(HDR_GET_COMPRESS(hdr), ==, cmp);
 		ASSERT(HDR_COMPRESSION_ENABLED(hdr));
 	}
+
+	HDR_SET_COMPRESS(hdr, cmp);
+	ASSERT3U(HDR_GET_COMPRESS(hdr), ==, cmp);
 }
 
 /*
@@ -1921,15 +2085,250 @@ arc_buf_try_copy_decompressed_data(arc_buf_t *buf)
 	}
 
 	/*
+	 * Note: With encryption support, the following assertion is no longer
+	 * necessarily valid. If we receive two back to back raw snapshots
+	 * (send -w), the second receive can use a hdr with a cksum already
+	 * calculated. This happens via:
+	 *    dmu_recv_stream() -> receive_read_record() -> arc_loan_raw_buf()
+	 * The rsend/send_mixed_raw test case exercises this code path.
+	 *
 	 * There were no decompressed bufs, so there should not be a
 	 * checksum on the hdr either.
+	 * EQUIV(!copied, hdr->b_l1hdr.b_freeze_cksum == NULL);
 	 */
-	EQUIV(!copied, hdr->b_l1hdr.b_freeze_cksum == NULL);
 
 	return (copied);
 }
 
 /*
+ * Return the size of the block, b_pabd, that is stored in the arc_buf_hdr_t.
+ */
+static uint64_t
+arc_hdr_size(arc_buf_hdr_t *hdr)
+{
+	uint64_t size;
+
+	if (arc_hdr_get_compress(hdr) != ZIO_COMPRESS_OFF &&
+	    HDR_GET_PSIZE(hdr) > 0) {
+		size = HDR_GET_PSIZE(hdr);
+	} else {
+		ASSERT3U(HDR_GET_LSIZE(hdr), !=, 0);
+		size = HDR_GET_LSIZE(hdr);
+	}
+	return (size);
+}
+
+static int
+arc_hdr_authenticate(arc_buf_hdr_t *hdr, spa_t *spa, uint64_t dsobj)
+{
+	int ret;
+	uint64_t csize;
+	uint64_t lsize = HDR_GET_LSIZE(hdr);
+	uint64_t psize = HDR_GET_PSIZE(hdr);
+	void *tmpbuf = NULL;
+	abd_t *abd = hdr->b_l1hdr.b_pabd;
+
+	ASSERT(HDR_LOCK(hdr) == NULL || MUTEX_HELD(HDR_LOCK(hdr)));
+	ASSERT(HDR_AUTHENTICATED(hdr));
+	ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL);
+
+	/*
+	 * The MAC is calculated on the compressed data that is stored on disk.
+	 * However, if compressed arc is disabled we will only have the
+	 * decompressed data available to us now. Compress it into a temporary
+	 * abd so we can verify the MAC. The performance overhead of this will
+	 * be relatively low, since most objects in an encrypted objset will
+	 * be encrypted (instead of authenticated) anyway.
+	 */
+	if (HDR_GET_COMPRESS(hdr) != ZIO_COMPRESS_OFF &&
+	    !HDR_COMPRESSION_ENABLED(hdr)) {
+		tmpbuf = zio_buf_alloc(lsize);
+		abd = abd_get_from_buf(tmpbuf, lsize);
+		abd_take_ownership_of_buf(abd, B_TRUE);
+
+		csize = zio_compress_data(HDR_GET_COMPRESS(hdr),
+		    hdr->b_l1hdr.b_pabd, tmpbuf, lsize);
+		ASSERT3U(csize, <=, psize);
+		abd_zero_off(abd, csize, psize - csize);
+	}
+
+	/*
+	 * Authentication is best effort. We authenticate whenever the key is
+	 * available. If we succeed we clear ARC_FLAG_NOAUTH.
+	 */
+	if (hdr->b_crypt_hdr.b_ot == DMU_OT_OBJSET) {
+		ASSERT3U(HDR_GET_COMPRESS(hdr), ==, ZIO_COMPRESS_OFF);
+		ASSERT3U(lsize, ==, psize);
+		ret = spa_do_crypt_objset_mac_abd(B_FALSE, spa, dsobj, abd,
+		    psize, hdr->b_l1hdr.b_byteswap != DMU_BSWAP_NUMFUNCS);
+	} else {
+		ret = spa_do_crypt_mac_abd(B_FALSE, spa, dsobj, abd, psize,
+		    hdr->b_crypt_hdr.b_mac);
+	}
+
+	if (ret == 0)
+		arc_hdr_clear_flags(hdr, ARC_FLAG_NOAUTH);
+	else if (ret != ENOENT)
+		goto error;
+
+	if (tmpbuf != NULL)
+		abd_free(abd);
+
+	return (0);
+
+error:
+	if (tmpbuf != NULL)
+		abd_free(abd);
+
+	return (ret);
+}
+
+/*
+ * This function will take a header that only has raw encrypted data in
+ * b_crypt_hdr.b_rabd and decrypt it into a new buffer which is stored in
+ * b_l1hdr.b_pabd. If designated in the header flags, this function will
+ * also decompress the data.
+ */
+static int
+arc_hdr_decrypt(arc_buf_hdr_t *hdr, spa_t *spa, const zbookmark_phys_t *zb)
+{
+	int ret;
+	abd_t *cabd = NULL;
+	void *tmp = NULL;
+	boolean_t no_crypt = B_FALSE;
+	boolean_t bswap = (hdr->b_l1hdr.b_byteswap != DMU_BSWAP_NUMFUNCS);
+
+	ASSERT(HDR_LOCK(hdr) == NULL || MUTEX_HELD(HDR_LOCK(hdr)));
+	ASSERT(HDR_ENCRYPTED(hdr));
+
+	arc_hdr_alloc_pabd(hdr, B_FALSE);
+
+	ret = spa_do_crypt_abd(B_FALSE, spa, zb, hdr->b_crypt_hdr.b_ot,
+	    B_FALSE, bswap, hdr->b_crypt_hdr.b_salt, hdr->b_crypt_hdr.b_iv,
+	    hdr->b_crypt_hdr.b_mac, HDR_GET_PSIZE(hdr), hdr->b_l1hdr.b_pabd,
+	    hdr->b_crypt_hdr.b_rabd, &no_crypt);
+	if (ret != 0)
+		goto error;
+
+	if (no_crypt) {
+		abd_copy(hdr->b_l1hdr.b_pabd, hdr->b_crypt_hdr.b_rabd,
+		    HDR_GET_PSIZE(hdr));
+	}
+
+	/*
+	 * If this header has disabled arc compression but the b_pabd is
+	 * compressed after decrypting it, we need to decompress the newly
+	 * decrypted data.
+	 */
+	if (HDR_GET_COMPRESS(hdr) != ZIO_COMPRESS_OFF &&
+	    !HDR_COMPRESSION_ENABLED(hdr)) {
+		/*
+		 * We want to make sure that we are correctly honoring the
+		 * zfs_abd_scatter_enabled setting, so we allocate an abd here
+		 * and then loan a buffer from it, rather than allocating a
+		 * linear buffer and wrapping it in an abd later.
+		 */
+		cabd = arc_get_data_abd(hdr, arc_hdr_size(hdr), hdr);
+		tmp = abd_borrow_buf(cabd, arc_hdr_size(hdr));
+
+		ret = zio_decompress_data(HDR_GET_COMPRESS(hdr),
+		    hdr->b_l1hdr.b_pabd, tmp, HDR_GET_PSIZE(hdr),
+		    HDR_GET_LSIZE(hdr));
+		if (ret != 0) {
+			abd_return_buf(cabd, tmp, arc_hdr_size(hdr));
+			goto error;
+		}
+
+		abd_return_buf_copy(cabd, tmp, arc_hdr_size(hdr));
+		arc_free_data_abd(hdr, hdr->b_l1hdr.b_pabd,
+		    arc_hdr_size(hdr), hdr);
+		hdr->b_l1hdr.b_pabd = cabd;
+	}
+
+	return (0);
+
+error:
+	arc_hdr_free_pabd(hdr, B_FALSE);
+	if (cabd != NULL)
+		arc_free_data_buf(hdr, cabd, arc_hdr_size(hdr), hdr);
+
+	return (ret);
+}
+
+/*
+ * This function is called during arc_buf_fill() to prepare the header's
+ * abd plaintext pointer for use. This involves authenticated protected
+ * data and decrypting encrypted data into the plaintext abd.
+ */
+static int
+arc_fill_hdr_crypt(arc_buf_hdr_t *hdr, kmutex_t *hash_lock, spa_t *spa,
+    const zbookmark_phys_t *zb, boolean_t noauth)
+{
+	int ret;
+
+	ASSERT(HDR_PROTECTED(hdr));
+
+	if (hash_lock != NULL)
+		mutex_enter(hash_lock);
+
+	if (HDR_NOAUTH(hdr) && !noauth) {
+		/*
+		 * The caller requested authenticated data but our data has
+		 * not been authenticated yet. Verify the MAC now if we can.
+		 */
+		ret = arc_hdr_authenticate(hdr, spa, zb->zb_objset);
+		if (ret != 0)
+			goto error;
+	} else if (HDR_HAS_RABD(hdr) && hdr->b_l1hdr.b_pabd == NULL) {
+		/*
+		 * If we only have the encrypted version of the data, but the
+		 * unencrypted version was requested we take this opportunity
+		 * to store the decrypted version in the header for future use.
+		 */
+		ret = arc_hdr_decrypt(hdr, spa, zb);
+		if (ret != 0)
+			goto error;
+	}
+
+	ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL);
+
+	if (hash_lock != NULL)
+		mutex_exit(hash_lock);
+
+	return (0);
+
+error:
+	if (hash_lock != NULL)
+		mutex_exit(hash_lock);
+
+	return (ret);
+}
+
+/*
+ * This function is used by the dbuf code to decrypt bonus buffers in place.
+ * The dbuf code itself doesn't have any locking for decrypting a shared dnode
+ * block, so we use the hash lock here to protect against concurrent calls to
+ * arc_buf_fill().
+ */
+/* ARGSUSED */
+static void
+arc_buf_untransform_in_place(arc_buf_t *buf, kmutex_t *hash_lock)
+{
+	arc_buf_hdr_t *hdr = buf->b_hdr;
+
+	ASSERT(HDR_ENCRYPTED(hdr));
+	ASSERT3U(hdr->b_crypt_hdr.b_ot, ==, DMU_OT_DNODE);
+	ASSERT(HDR_LOCK(hdr) == NULL || MUTEX_HELD(HDR_LOCK(hdr)));
+	ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL);
+
+	zio_crypt_copy_dnode_bonus(hdr->b_l1hdr.b_pabd, buf->b_data,
+	    arc_buf_size(buf));
+	buf->b_flags &= ~ARC_BUF_FLAG_ENCRYPTED;
+	buf->b_flags &= ~ARC_BUF_FLAG_COMPRESSED;
+	hdr->b_crypt_hdr.b_ebufcnt -= 1;
+}
+
+/*
  * Given a buf that has a data buffer attached to it, this function will
  * efficiently fill the buf with data of the specified compression setting from
  * the hdr and update the hdr's b_freeze_cksum if necessary. If the buf and hdr
@@ -1943,15 +2342,90 @@ arc_buf_try_copy_decompressed_data(arc_buf_t *buf)
  * the correct-sized data buffer.
  */
 static int
-arc_buf_fill(arc_buf_t *buf, boolean_t compressed)
+arc_buf_fill(arc_buf_t *buf, spa_t *spa, const zbookmark_phys_t *zb,
+    arc_fill_flags_t flags)
 {
+	int error = 0;
 	arc_buf_hdr_t *hdr = buf->b_hdr;
-	boolean_t hdr_compressed = (HDR_GET_COMPRESS(hdr) != ZIO_COMPRESS_OFF);
+	boolean_t hdr_compressed =
+	    (arc_hdr_get_compress(hdr) != ZIO_COMPRESS_OFF);
+	boolean_t compressed = (flags & ARC_FILL_COMPRESSED) != 0;
+	boolean_t encrypted = (flags & ARC_FILL_ENCRYPTED) != 0;
 	dmu_object_byteswap_t bswap = hdr->b_l1hdr.b_byteswap;
+	kmutex_t *hash_lock = (flags & ARC_FILL_LOCKED) ? NULL : HDR_LOCK(hdr);
 
 	ASSERT3P(buf->b_data, !=, NULL);
-	IMPLY(compressed, hdr_compressed);
+	IMPLY(compressed, hdr_compressed || ARC_BUF_ENCRYPTED(buf));
 	IMPLY(compressed, ARC_BUF_COMPRESSED(buf));
+	IMPLY(encrypted, HDR_ENCRYPTED(hdr));
+	IMPLY(encrypted, ARC_BUF_ENCRYPTED(buf));
+	IMPLY(encrypted, ARC_BUF_COMPRESSED(buf));
+	IMPLY(encrypted, !ARC_BUF_SHARED(buf));
+
+	/*
+	 * If the caller wanted encrypted data we just need to copy it from
+	 * b_rabd and potentially byteswap it. We won't be able to do any
+	 * further transforms on it.
+	 */
+	if (encrypted) {
+		ASSERT(HDR_HAS_RABD(hdr));
+		abd_copy_to_buf(buf->b_data, hdr->b_crypt_hdr.b_rabd,
+		    HDR_GET_PSIZE(hdr));
+		goto byteswap;
+	}
+
+	/*
+	 * Adjust encrypted and authenticated headers to accomodate
+	 * the request if needed. Dnode blocks (ARC_FILL_IN_PLACE) are
+	 * allowed to fail decryption due to keys not being loaded
+	 * without being marked as an IO error.
+	 */
+	if (HDR_PROTECTED(hdr)) {
+		error = arc_fill_hdr_crypt(hdr, hash_lock, spa,
+		    zb, !!(flags & ARC_FILL_NOAUTH));
+		if (error == EACCES && (flags & ARC_FILL_IN_PLACE) != 0) {
+			return (error);
+		} else if (error != 0) {
+			if (hash_lock != NULL)
+				mutex_enter(hash_lock);
+			arc_hdr_set_flags(hdr, ARC_FLAG_IO_ERROR);
+			if (hash_lock != NULL)
+				mutex_exit(hash_lock);
+			return (error);
+		}
+	}
+
+	/*
+	 * There is a special case here for dnode blocks which are
+	 * decrypting their bonus buffers. These blocks may request to
+	 * be decrypted in-place. This is necessary because there may
+	 * be many dnodes pointing into this buffer and there is
+	 * currently no method to synchronize replacing the backing
+	 * b_data buffer and updating all of the pointers. Here we use
+	 * the hash lock to ensure there are no races. If the need
+	 * arises for other types to be decrypted in-place, they must
+	 * add handling here as well.
+	 */
+	if ((flags & ARC_FILL_IN_PLACE) != 0) {
+		ASSERT(!hdr_compressed);
+		ASSERT(!compressed);
+		ASSERT(!encrypted);
+
+		if (HDR_ENCRYPTED(hdr) && ARC_BUF_ENCRYPTED(buf)) {
+			ASSERT3U(hdr->b_crypt_hdr.b_ot, ==, DMU_OT_DNODE);
+
+			if (hash_lock != NULL)
+				mutex_enter(hash_lock);
+			arc_buf_untransform_in_place(buf, hash_lock);
+			if (hash_lock != NULL)
+				mutex_exit(hash_lock);
+
+			/* Compute the hdr's checksum if necessary */
+			arc_cksum_compute(buf);
+		}
+
+		return (0);
+	}
 
 	if (hdr_compressed == compressed) {
 		if (!arc_buf_is_shared(buf)) {
@@ -1970,7 +2444,7 @@ arc_buf_fill(arc_buf_t *buf, boolean_t compressed)
 		if (arc_buf_is_shared(buf)) {
 			ASSERT(ARC_BUF_COMPRESSED(buf));
 
-			/* We need to give the buf it's own b_data */
+			/* We need to give the buf its own b_data */
 			buf->b_flags &= ~ARC_BUF_FLAG_SHARED;
 			buf->b_data =
 			    arc_get_data_buf(hdr, HDR_GET_LSIZE(hdr), buf);
@@ -2006,7 +2480,7 @@ arc_buf_fill(arc_buf_t *buf, boolean_t compressed)
 			ASSERT3P(hdr->b_l1hdr.b_freeze_cksum, !=, NULL);
 			return (0);
 		} else {
-			int error = zio_decompress_data(HDR_GET_COMPRESS(hdr),
+			error = zio_decompress_data(HDR_GET_COMPRESS(hdr),
 			    hdr->b_l1hdr.b_pabd, buf->b_data,
 			    HDR_GET_PSIZE(hdr), HDR_GET_LSIZE(hdr));
 
@@ -2017,13 +2491,19 @@ arc_buf_fill(arc_buf_t *buf, boolean_t compressed)
 			if (error != 0) {
 				zfs_dbgmsg(
 				    "hdr %p, compress %d, psize %d, lsize %d",
-				    hdr, HDR_GET_COMPRESS(hdr),
+				    hdr, arc_hdr_get_compress(hdr),
 				    HDR_GET_PSIZE(hdr), HDR_GET_LSIZE(hdr));
+				if (hash_lock != NULL)
+					mutex_enter(hash_lock);
+				arc_hdr_set_flags(hdr, ARC_FLAG_IO_ERROR);
+				if (hash_lock != NULL)
+					mutex_exit(hash_lock);
 				return (SET_ERROR(EIO));
 			}
 		}
 	}
 
+byteswap:
 	/* Byteswap the buf's data if necessary */
 	if (bswap != DMU_BSWAP_NUMFUNCS) {
 		ASSERT(!HDR_SHARED_DATA(hdr));
@@ -2037,28 +2517,35 @@ arc_buf_fill(arc_buf_t *buf, boolean_t compressed)
 	return (0);
 }
 
-int
-arc_decompress(arc_buf_t *buf)
-{
-	return (arc_buf_fill(buf, B_FALSE));
-}
-
 /*
- * Return the size of the block, b_pabd, that is stored in the arc_buf_hdr_t.
+ * If this function is being called to decrypt an encrypted buffer or verify an
+ * authenticated one, the key must be loaded and a mapping must be made
+ * available in the keystore via spa_keystore_create_mapping() or one of its
+ * callers.
  */
-static uint64_t
-arc_hdr_size(arc_buf_hdr_t *hdr)
+int
+arc_untransform(arc_buf_t *buf, spa_t *spa, const zbookmark_phys_t *zb,
+    boolean_t in_place)
 {
-	uint64_t size;
+	int ret;
+	arc_fill_flags_t flags = 0;
 
-	if (HDR_GET_COMPRESS(hdr) != ZIO_COMPRESS_OFF &&
-	    HDR_GET_PSIZE(hdr) > 0) {
-		size = HDR_GET_PSIZE(hdr);
-	} else {
-		ASSERT3U(HDR_GET_LSIZE(hdr), !=, 0);
-		size = HDR_GET_LSIZE(hdr);
+	if (in_place)
+		flags |= ARC_FILL_IN_PLACE;
+
+	ret = arc_buf_fill(buf, spa, zb, flags);
+	if (ret == ECKSUM) {
+		/*
+		 * Convert authentication and decryption errors to EIO
+		 * (and generate an ereport) before leaving the ARC.
+		 */
+		ret = SET_ERROR(EIO);
+		spa_log_error(spa, zb);
+		zfs_ereport_post(FM_EREPORT_ZFS_AUTHENTICATION,
+		    spa, NULL, zb, NULL, 0, 0);
 	}
-	return (size);
+
+	return (ret);
 }
 
 /*
@@ -2077,6 +2564,7 @@ arc_evictable_space_increment(arc_buf_hdr_t *hdr, arc_state_t *state)
 		ASSERT0(hdr->b_l1hdr.b_bufcnt);
 		ASSERT3P(hdr->b_l1hdr.b_buf, ==, NULL);
 		ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL);
+		ASSERT(!HDR_HAS_RABD(hdr));
 		(void) zfs_refcount_add_many(&state->arcs_esize[type],
 		    HDR_GET_LSIZE(hdr), hdr);
 		return;
@@ -2087,6 +2575,10 @@ arc_evictable_space_increment(arc_buf_hdr_t *hdr, arc_state_t *state)
 		(void) zfs_refcount_add_many(&state->arcs_esize[type],
 		    arc_hdr_size(hdr), hdr);
 	}
+	if (HDR_HAS_RABD(hdr)) {
+		(void) zfs_refcount_add_many(&state->arcs_esize[type],
+		    HDR_GET_PSIZE(hdr), hdr);
+	}
 	for (arc_buf_t *buf = hdr->b_l1hdr.b_buf; buf != NULL;
 	    buf = buf->b_next) {
 		if (arc_buf_is_shared(buf))
@@ -2112,6 +2604,7 @@ arc_evictable_space_decrement(arc_buf_hdr_t *hdr, arc_state_t *state)
 		ASSERT0(hdr->b_l1hdr.b_bufcnt);
 		ASSERT3P(hdr->b_l1hdr.b_buf, ==, NULL);
 		ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL);
+		ASSERT(!HDR_HAS_RABD(hdr));
 		(void) zfs_refcount_remove_many(&state->arcs_esize[type],
 		    HDR_GET_LSIZE(hdr), hdr);
 		return;
@@ -2122,6 +2615,10 @@ arc_evictable_space_decrement(arc_buf_hdr_t *hdr, arc_state_t *state)
 		(void) zfs_refcount_remove_many(&state->arcs_esize[type],
 		    arc_hdr_size(hdr), hdr);
 	}
+	if (HDR_HAS_RABD(hdr)) {
+		(void) zfs_refcount_remove_many(&state->arcs_esize[type],
+		    HDR_GET_PSIZE(hdr), hdr);
+	}
 	for (arc_buf_t *buf = hdr->b_l1hdr.b_buf; buf != NULL;
 	    buf = buf->b_next) {
 		if (arc_buf_is_shared(buf))
@@ -2215,7 +2712,9 @@ arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *hdr,
 		old_state = hdr->b_l1hdr.b_state;
 		refcnt = zfs_refcount_count(&hdr->b_l1hdr.b_refcnt);
 		bufcnt = hdr->b_l1hdr.b_bufcnt;
-		update_old = (bufcnt > 0 || hdr->b_l1hdr.b_pabd != NULL);
+
+		update_old = (bufcnt > 0 || hdr->b_l1hdr.b_pabd != NULL ||
+		    HDR_HAS_RABD(hdr));
 	} else {
 		old_state = arc_l2c_only;
 		refcnt = 0;
@@ -2286,6 +2785,7 @@ arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *hdr,
 			(void) zfs_refcount_add_many(&new_state->arcs_size,
 			    HDR_GET_LSIZE(hdr), hdr);
 			ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL);
+			ASSERT(!HDR_HAS_RABD(hdr));
 		} else {
 			uint32_t buffers = 0;
 
@@ -2319,8 +2819,12 @@ arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *hdr,
 				(void) zfs_refcount_add_many(
 				    &new_state->arcs_size,
 				    arc_hdr_size(hdr), hdr);
-			} else {
-				ASSERT(GHOST_STATE(old_state));
+			}
+
+			if (HDR_HAS_RABD(hdr)) {
+				(void) zfs_refcount_add_many(
+				    &new_state->arcs_size,
+				    HDR_GET_PSIZE(hdr), hdr);
 			}
 		}
 	}
@@ -2330,6 +2834,7 @@ arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *hdr,
 		if (GHOST_STATE(old_state)) {
 			ASSERT0(bufcnt);
 			ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL);
+			ASSERT(!HDR_HAS_RABD(hdr));
 
 			/*
 			 * When moving a header off of a ghost state,
@@ -2369,9 +2874,20 @@ arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *hdr,
 				    buf);
 			}
 			ASSERT3U(bufcnt, ==, buffers);
-			ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL);
-			(void) zfs_refcount_remove_many(
-			    &old_state->arcs_size, arc_hdr_size(hdr), hdr);
+			ASSERT(hdr->b_l1hdr.b_pabd != NULL ||
+			    HDR_HAS_RABD(hdr));
+
+			if (hdr->b_l1hdr.b_pabd != NULL) {
+				(void) zfs_refcount_remove_many(
+				    &old_state->arcs_size, arc_hdr_size(hdr),
+				    hdr);
+			}
+
+			if (HDR_HAS_RABD(hdr)) {
+				(void) zfs_refcount_remove_many(
+				    &old_state->arcs_size, HDR_GET_PSIZE(hdr),
+				    hdr);
+			}
 		}
 	}
 
@@ -2463,12 +2979,13 @@ arc_can_share(arc_buf_hdr_t *hdr, arc_buf_t *buf)
 {
 	/*
 	 * The criteria for sharing a hdr's data are:
-	 * 1. the hdr's compression matches the buf's compression
-	 * 2. the hdr doesn't need to be byteswapped
-	 * 3. the hdr isn't already being shared
-	 * 4. the buf is either compressed or it is the last buf in the hdr list
+	 * 1. the buffer is not encrypted
+	 * 2. the hdr's compression matches the buf's compression
+	 * 3. the hdr doesn't need to be byteswapped
+	 * 4. the hdr isn't already being shared
+	 * 5. the buf is either compressed or it is the last buf in the hdr list
 	 *
-	 * Criterion #4 maintains the invariant that shared uncompressed
+	 * Criterion #5 maintains the invariant that shared uncompressed
 	 * bufs must be the final buf in the hdr's b_buf list. Reading this, you
 	 * might ask, "if a compressed buf is allocated first, won't that be the
 	 * last thing in the list?", but in that case it's impossible to create
@@ -2483,9 +3000,11 @@ arc_can_share(arc_buf_hdr_t *hdr, arc_buf_t *buf)
 	 * sharing if the new buf isn't the first to be added.
 	 */
 	ASSERT3P(buf->b_hdr, ==, hdr);
-	boolean_t hdr_compressed = HDR_GET_COMPRESS(hdr) != ZIO_COMPRESS_OFF;
+	boolean_t hdr_compressed = arc_hdr_get_compress(hdr) !=
+	    ZIO_COMPRESS_OFF;
 	boolean_t buf_compressed = ARC_BUF_COMPRESSED(buf) != 0;
-	return (buf_compressed == hdr_compressed &&
+	return (!ARC_BUF_ENCRYPTED(buf) &&
+	    buf_compressed == hdr_compressed &&
 	    hdr->b_l1hdr.b_byteswap == DMU_BSWAP_NUMFUNCS &&
 	    !HDR_SHARED_DATA(hdr) &&
 	    (ARC_BUF_LAST(buf) || ARC_BUF_COMPRESSED(buf)));
@@ -2497,10 +3016,12 @@ arc_can_share(arc_buf_hdr_t *hdr, arc_buf_t *buf)
  * copy was made successfully, or an error code otherwise.
  */
 static int
-arc_buf_alloc_impl(arc_buf_hdr_t *hdr, void *tag, boolean_t compressed,
+arc_buf_alloc_impl(arc_buf_hdr_t *hdr, spa_t *spa, const zbookmark_phys_t *zb,
+    void *tag, boolean_t encrypted, boolean_t compressed, boolean_t noauth,
     boolean_t fill, arc_buf_t **ret)
 {
 	arc_buf_t *buf;
+	arc_fill_flags_t flags = ARC_FILL_LOCKED;
 
 	ASSERT(HDR_HAS_L1HDR(hdr));
 	ASSERT3U(HDR_GET_LSIZE(hdr), >, 0);
@@ -2508,6 +3029,7 @@ arc_buf_alloc_impl(arc_buf_hdr_t *hdr, void *tag, boolean_t compressed,
 	    hdr->b_type == ARC_BUFC_METADATA);
 	ASSERT3P(ret, !=, NULL);
 	ASSERT3P(*ret, ==, NULL);
+	IMPLY(encrypted, compressed);
 
 	buf = *ret = kmem_cache_alloc(buf_cache, KM_PUSHPAGE);
 	buf->b_hdr = hdr;
@@ -2525,16 +3047,28 @@ arc_buf_alloc_impl(arc_buf_hdr_t *hdr, void *tag, boolean_t compressed,
 
 	/*
 	 * Only honor requests for compressed bufs if the hdr is actually
-	 * compressed.
+	 * compressed. This must be overriden if the buffer is encrypted since
+	 * encrypted buffers cannot be decompressed.
 	 */
-	if (compressed && HDR_GET_COMPRESS(hdr) != ZIO_COMPRESS_OFF)
+	if (encrypted) {
 		buf->b_flags |= ARC_BUF_FLAG_COMPRESSED;
+		buf->b_flags |= ARC_BUF_FLAG_ENCRYPTED;
+		flags |= ARC_FILL_COMPRESSED | ARC_FILL_ENCRYPTED;
+	} else if (compressed &&
+	    arc_hdr_get_compress(hdr) != ZIO_COMPRESS_OFF) {
+		buf->b_flags |= ARC_BUF_FLAG_COMPRESSED;
+		flags |= ARC_FILL_COMPRESSED;
+	}
+
+	if (noauth) {
+		ASSERT0(encrypted);
+		flags |= ARC_FILL_NOAUTH;
+	}
 
 	/*
 	 * If the hdr's data can be shared then we share the data buffer and
 	 * set the appropriate bit in the hdr's b_flags to indicate the hdr is
-	 * sharing it's b_pabd with the arc_buf_t. Otherwise, we allocate a new
-	 * buffer to store the buf's data.
+	 * allocate a new buffer to store the buf's data.
 	 *
 	 * There are two additional restrictions here because we're sharing
 	 * hdr -> buf instead of the usual buf -> hdr. First, the hdr can't be
@@ -2545,7 +3079,7 @@ arc_buf_alloc_impl(arc_buf_hdr_t *hdr, void *tag, boolean_t compressed,
 	 * need to be ABD-aware.
 	 */
 	boolean_t can_share = arc_can_share(hdr, buf) && !HDR_L2_WRITING(hdr) &&
-	    abd_is_linear(hdr->b_l1hdr.b_pabd);
+	    hdr->b_l1hdr.b_pabd != NULL && abd_is_linear(hdr->b_l1hdr.b_pabd);
 
 	/* Set up b_data and sharing */
 	if (can_share) {
@@ -2561,13 +3095,16 @@ arc_buf_alloc_impl(arc_buf_hdr_t *hdr, void *tag, boolean_t compressed,
 
 	hdr->b_l1hdr.b_buf = buf;
 	hdr->b_l1hdr.b_bufcnt += 1;
+	if (encrypted)
+		hdr->b_crypt_hdr.b_ebufcnt += 1;
 
 	/*
 	 * If the user wants the data from the hdr, we need to either copy or
 	 * decompress the data.
 	 */
 	if (fill) {
-		return (arc_buf_fill(buf, ARC_BUF_COMPRESSED(buf) != 0));
+		ASSERT3P(zb, !=, NULL);
+		return (arc_buf_fill(buf, spa, zb, flags));
 	}
 
 	return (0);
@@ -2613,6 +3150,19 @@ arc_loan_compressed_buf(spa_t *spa, uint64_t psize, uint64_t lsize,
 	return (buf);
 }
 
+arc_buf_t *
+arc_loan_raw_buf(spa_t *spa, uint64_t dsobj, boolean_t byteorder,
+    const uint8_t *salt, const uint8_t *iv, const uint8_t *mac,
+    dmu_object_type_t ot, uint64_t psize, uint64_t lsize,
+    enum zio_compress compression_type)
+{
+	arc_buf_t *buf = arc_alloc_raw_buf(spa, arc_onloan_tag, dsobj,
+	    byteorder, salt, iv, mac, ot, psize, lsize, compression_type);
+
+	atomic_add_64(&arc_loaned_bytes, psize);
+	return (buf);
+}
+
 
 /*
  * Return a loaned arc buffer to the arc.
@@ -2658,11 +3208,11 @@ l2arc_free_abd_on_write(abd_t *abd, size_t size, arc_buf_contents_t type)
 }
 
 static void
-arc_hdr_free_on_write(arc_buf_hdr_t *hdr)
+arc_hdr_free_on_write(arc_buf_hdr_t *hdr, boolean_t free_rdata)
 {
 	arc_state_t *state = hdr->b_l1hdr.b_state;
 	arc_buf_contents_t type = arc_buf_type(hdr);
-	uint64_t size = arc_hdr_size(hdr);
+	uint64_t size = (free_rdata) ? HDR_GET_PSIZE(hdr) : arc_hdr_size(hdr);
 
 	/* protected by hash lock, if in the hash table */
 	if (multilist_link_active(&hdr->b_l1hdr.b_arc_node)) {
@@ -2680,7 +3230,11 @@ arc_hdr_free_on_write(arc_buf_hdr_t *hdr)
 		arc_space_return(size, ARC_SPACE_DATA);
 	}
 
-	l2arc_free_abd_on_write(hdr->b_l1hdr.b_pabd, size, type);
+	if (free_rdata) {
+		l2arc_free_abd_on_write(hdr->b_crypt_hdr.b_rabd, size, type);
+	} else {
+		l2arc_free_abd_on_write(hdr->b_l1hdr.b_pabd, size, type);
+	}
 }
 
 /*
@@ -2691,10 +3245,12 @@ arc_hdr_free_on_write(arc_buf_hdr_t *hdr)
 static void
 arc_share_buf(arc_buf_hdr_t *hdr, arc_buf_t *buf)
 {
+	/* LINTED */
 	arc_state_t *state = hdr->b_l1hdr.b_state;
 
 	ASSERT(arc_can_share(hdr, buf));
 	ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL);
+	ASSERT(!ARC_BUF_ENCRYPTED(buf));
 	ASSERT(MUTEX_HELD(HDR_LOCK(hdr)) || HDR_EMPTY(hdr));
 
 	/*
@@ -2702,7 +3258,8 @@ arc_share_buf(arc_buf_hdr_t *hdr, arc_buf_t *buf)
 	 * refcount ownership to the hdr since it always owns
 	 * the refcount whenever an arc_buf_t is shared.
 	 */
-	zfs_refcount_transfer_ownership(&state->arcs_size, buf, hdr);
+	zfs_refcount_transfer_ownership_many(&hdr->b_l1hdr.b_state->arcs_size,
+	    arc_hdr_size(hdr), buf, hdr);
 	hdr->b_l1hdr.b_pabd = abd_get_from_buf(buf->b_data, arc_buf_size(buf));
 	abd_take_ownership_of_buf(hdr->b_l1hdr.b_pabd,
 	    HDR_ISTYPE_METADATA(hdr));
@@ -2722,6 +3279,7 @@ arc_share_buf(arc_buf_hdr_t *hdr, arc_buf_t *buf)
 static void
 arc_unshare_buf(arc_buf_hdr_t *hdr, arc_buf_t *buf)
 {
+	/* LINTED */
 	arc_state_t *state = hdr->b_l1hdr.b_state;
 
 	ASSERT(arc_buf_is_shared(buf));
@@ -2732,7 +3290,8 @@ arc_unshare_buf(arc_buf_hdr_t *hdr, arc_buf_t *buf)
 	 * We are no longer sharing this buffer so we need
 	 * to transfer its ownership to the rightful owner.
 	 */
-	zfs_refcount_transfer_ownership(&state->arcs_size, hdr, buf);
+	zfs_refcount_transfer_ownership_many(&hdr->b_l1hdr.b_state->arcs_size,
+	    arc_hdr_size(hdr), hdr, buf);
 	arc_hdr_clear_flags(hdr, ARC_FLAG_SHARED_DATA);
 	abd_release_ownership_of_buf(hdr->b_l1hdr.b_pabd);
 	abd_put(hdr->b_l1hdr.b_pabd);
@@ -2756,12 +3315,12 @@ arc_unshare_buf(arc_buf_hdr_t *hdr, arc_buf_t *buf)
 static arc_buf_t *
 arc_buf_remove(arc_buf_hdr_t *hdr, arc_buf_t *buf)
 {
-	ASSERT(HDR_HAS_L1HDR(hdr));
-	ASSERT(MUTEX_HELD(HDR_LOCK(hdr)) || HDR_EMPTY(hdr));
-
 	arc_buf_t **bufp = &hdr->b_l1hdr.b_buf;
 	arc_buf_t *lastbuf = NULL;
 
+	ASSERT(HDR_HAS_L1HDR(hdr));
+	ASSERT(MUTEX_HELD(HDR_LOCK(hdr)) || HDR_EMPTY(hdr));
+
 	/*
 	 * Remove the buf from the hdr list and locate the last
 	 * remaining buffer on the list.
@@ -2824,6 +3383,21 @@ arc_buf_destroy_impl(arc_buf_t *buf)
 
 		ASSERT(hdr->b_l1hdr.b_bufcnt > 0);
 		hdr->b_l1hdr.b_bufcnt -= 1;
+
+		if (ARC_BUF_ENCRYPTED(buf)) {
+			hdr->b_crypt_hdr.b_ebufcnt -= 1;
+
+			/*
+			 * If we have no more encrypted buffers and we've
+			 * already gotten a copy of the decrypted data we can
+			 * free b_rabd to save some space.
+			 */
+			if (hdr->b_crypt_hdr.b_ebufcnt == 0 &&
+			    HDR_HAS_RABD(hdr) && hdr->b_l1hdr.b_pabd != NULL &&
+			    !HDR_IO_IN_PROGRESS(hdr)) {
+				arc_hdr_free_pabd(hdr, B_TRUE);
+			}
+		}
 	}
 
 	arc_buf_t *lastbuf = arc_buf_remove(hdr, buf);
@@ -2838,16 +3412,17 @@ arc_buf_destroy_impl(arc_buf_t *buf)
 		 * There is an equivalent case for compressed bufs, but since
 		 * they aren't guaranteed to be the last buf in the list and
 		 * that is an exceedingly rare case, we just allow that space be
-		 * wasted temporarily.
+		 * wasted temporarily. We must also be careful not to share
+		 * encrypted buffers, since they cannot be shared.
 		 */
-		if (lastbuf != NULL) {
+		if (lastbuf != NULL && !ARC_BUF_ENCRYPTED(lastbuf)) {
 			/* Only one buf can be shared at once */
 			VERIFY(!arc_buf_is_shared(lastbuf));
 			/* hdr is uncompressed so can't have compressed buf */
 			VERIFY(!ARC_BUF_COMPRESSED(lastbuf));
 
 			ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL);
-			arc_hdr_free_pabd(hdr);
+			arc_hdr_free_pabd(hdr, B_FALSE);
 
 			/*
 			 * We must setup a new shared block between the
@@ -2868,7 +3443,7 @@ arc_buf_destroy_impl(arc_buf_t *buf)
 		 */
 		ASSERT3P(lastbuf, !=, NULL);
 		ASSERT(arc_buf_is_shared(lastbuf) ||
-		    HDR_GET_COMPRESS(hdr) != ZIO_COMPRESS_OFF);
+		    arc_hdr_get_compress(hdr) != ZIO_COMPRESS_OFF);
 	}
 
 	/*
@@ -2885,26 +3460,40 @@ arc_buf_destroy_impl(arc_buf_t *buf)
 }
 
 static void
-arc_hdr_alloc_pabd(arc_buf_hdr_t *hdr)
+arc_hdr_alloc_pabd(arc_buf_hdr_t *hdr, boolean_t alloc_rdata)
 {
+	uint64_t size;
+
 	ASSERT3U(HDR_GET_LSIZE(hdr), >, 0);
 	ASSERT(HDR_HAS_L1HDR(hdr));
-	ASSERT(!HDR_SHARED_DATA(hdr));
+	ASSERT(!HDR_SHARED_DATA(hdr) || alloc_rdata);
+	IMPLY(alloc_rdata, HDR_PROTECTED(hdr));
 
-	ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL);
-	hdr->b_l1hdr.b_pabd = arc_get_data_abd(hdr, arc_hdr_size(hdr), hdr);
-	hdr->b_l1hdr.b_byteswap = DMU_BSWAP_NUMFUNCS;
-	ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL);
+	if (alloc_rdata) {
+		size = HDR_GET_PSIZE(hdr);
+		ASSERT3P(hdr->b_crypt_hdr.b_rabd, ==, NULL);
+		hdr->b_crypt_hdr.b_rabd = arc_get_data_abd(hdr, size, hdr);
+		ASSERT3P(hdr->b_crypt_hdr.b_rabd, !=, NULL);
+	} else {
+		size = arc_hdr_size(hdr);
+		ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL);
+		hdr->b_l1hdr.b_pabd = arc_get_data_abd(hdr, size, hdr);
+		ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL);
+	}
 
-	ARCSTAT_INCR(arcstat_compressed_size, arc_hdr_size(hdr));
+	ARCSTAT_INCR(arcstat_compressed_size, size);
 	ARCSTAT_INCR(arcstat_uncompressed_size, HDR_GET_LSIZE(hdr));
 }
 
 static void
-arc_hdr_free_pabd(arc_buf_hdr_t *hdr)
+arc_hdr_free_pabd(arc_buf_hdr_t *hdr, boolean_t free_rdata)
 {
+	uint64_t size = (free_rdata) ? HDR_GET_PSIZE(hdr) : arc_hdr_size(hdr);
+
 	ASSERT(HDR_HAS_L1HDR(hdr));
-	ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL);
+	ASSERT(hdr->b_l1hdr.b_pabd != NULL || HDR_HAS_RABD(hdr));
+	IMPLY(free_rdata, HDR_HAS_RABD(hdr));
+
 
 	/*
 	 * If the hdr is currently being written to the l2arc then
@@ -2913,28 +3502,41 @@ arc_hdr_free_pabd(arc_buf_hdr_t *hdr)
 	 * writing it to the l2arc device.
 	 */
 	if (HDR_L2_WRITING(hdr)) {
-		arc_hdr_free_on_write(hdr);
+		arc_hdr_free_on_write(hdr, free_rdata);
 		ARCSTAT_BUMP(arcstat_l2_free_on_write);
+	} else if (free_rdata) {
+		arc_free_data_abd(hdr, hdr->b_crypt_hdr.b_rabd, size, hdr);
 	} else {
 		arc_free_data_abd(hdr, hdr->b_l1hdr.b_pabd,
-		    arc_hdr_size(hdr), hdr);
+		    size, hdr);
 	}
-	hdr->b_l1hdr.b_pabd = NULL;
-	hdr->b_l1hdr.b_byteswap = DMU_BSWAP_NUMFUNCS;
 
-	ARCSTAT_INCR(arcstat_compressed_size, -arc_hdr_size(hdr));
+	if (free_rdata) {
+		hdr->b_crypt_hdr.b_rabd = NULL;
+	} else {
+		hdr->b_l1hdr.b_pabd = NULL;
+	}
+
+	if (hdr->b_l1hdr.b_pabd == NULL && !HDR_HAS_RABD(hdr))
+		hdr->b_l1hdr.b_byteswap = DMU_BSWAP_NUMFUNCS;
+
+	ARCSTAT_INCR(arcstat_compressed_size, -size);
 	ARCSTAT_INCR(arcstat_uncompressed_size, -HDR_GET_LSIZE(hdr));
 }
 
 static arc_buf_hdr_t *
 arc_hdr_alloc(uint64_t spa, int32_t psize, int32_t lsize,
-    enum zio_compress compression_type, arc_buf_contents_t type)
+    boolean_t protected, enum zio_compress compression_type,
+    arc_buf_contents_t type, boolean_t alloc_rdata)
 {
 	arc_buf_hdr_t *hdr;
 
 	VERIFY(type == ARC_BUFC_DATA || type == ARC_BUFC_METADATA);
-
-	hdr = kmem_cache_alloc(hdr_full_cache, KM_PUSHPAGE);
+	if (protected) {
+		hdr = kmem_cache_alloc(hdr_full_crypt_cache, KM_PUSHPAGE);
+	} else {
+		hdr = kmem_cache_alloc(hdr_full_cache, KM_PUSHPAGE);
+	}
 	ASSERT(HDR_EMPTY(hdr));
 	ASSERT3P(hdr->b_l1hdr.b_freeze_cksum, ==, NULL);
 	ASSERT3P(hdr->b_l1hdr.b_thawed, ==, NULL);
@@ -2945,6 +3547,8 @@ arc_hdr_alloc(uint64_t spa, int32_t psize, int32_t lsize,
 	hdr->b_flags = 0;
 	arc_hdr_set_flags(hdr, arc_bufc_to_flags(type) | ARC_FLAG_HAS_L1HDR);
 	arc_hdr_set_compress(hdr, compression_type);
+	if (protected)
+		arc_hdr_set_flags(hdr, ARC_FLAG_PROTECTED);
 
 	hdr->b_l1hdr.b_state = arc_anon;
 	hdr->b_l1hdr.b_arc_access = 0;
@@ -2956,7 +3560,7 @@ arc_hdr_alloc(uint64_t spa, int32_t psize, int32_t lsize,
 	 * the compressed or uncompressed data depending on the block
 	 * it references and compressed arc enablement.
 	 */
-	arc_hdr_alloc_pabd(hdr);
+	arc_hdr_alloc_pabd(hdr, alloc_rdata);
 	ASSERT(zfs_refcount_is_zero(&hdr->b_l1hdr.b_refcnt));
 
 	return (hdr);
@@ -2980,6 +3584,16 @@ arc_hdr_realloc(arc_buf_hdr_t *hdr, kmem_cache_t *old, kmem_cache_t *new)
 	ASSERT((old == hdr_full_cache && new == hdr_l2only_cache) ||
 	    (old == hdr_l2only_cache && new == hdr_full_cache));
 
+	/*
+	 * if the caller wanted a new full header and the header is to be
+	 * encrypted we will actually allocate the header from the full crypt
+	 * cache instead. The same applies to freeing from the old cache.
+	 */
+	if (HDR_PROTECTED(hdr) && new == hdr_full_cache)
+		new = hdr_full_crypt_cache;
+	if (HDR_PROTECTED(hdr) && old == hdr_full_cache)
+		old = hdr_full_crypt_cache;
+
 	nhdr = kmem_cache_alloc(new, KM_PUSHPAGE);
 
 	ASSERT(MUTEX_HELD(HDR_LOCK(hdr)));
@@ -2987,7 +3601,7 @@ arc_hdr_realloc(arc_buf_hdr_t *hdr, kmem_cache_t *old, kmem_cache_t *new)
 
 	bcopy(hdr, nhdr, HDR_L2ONLY_SIZE);
 
-	if (new == hdr_full_cache) {
+	if (new == hdr_full_cache || new == hdr_full_crypt_cache) {
 		arc_hdr_set_flags(nhdr, ARC_FLAG_HAS_L1HDR);
 		/*
 		 * arc_access and arc_change_state need to be aware that a
@@ -2998,6 +3612,7 @@ arc_hdr_realloc(arc_buf_hdr_t *hdr, kmem_cache_t *old, kmem_cache_t *new)
 
 		/* Verify previous threads set to NULL before freeing */
 		ASSERT3P(nhdr->b_l1hdr.b_pabd, ==, NULL);
+		ASSERT(!HDR_HAS_RABD(hdr));
 	} else {
 		ASSERT3P(hdr->b_l1hdr.b_buf, ==, NULL);
 		ASSERT0(hdr->b_l1hdr.b_bufcnt);
@@ -3020,6 +3635,7 @@ arc_hdr_realloc(arc_buf_hdr_t *hdr, kmem_cache_t *old, kmem_cache_t *new)
 		 */
 		VERIFY(!HDR_L2_WRITING(hdr));
 		VERIFY3P(hdr->b_l1hdr.b_pabd, ==, NULL);
+		ASSERT(!HDR_HAS_RABD(hdr));
 
 #ifdef ZFS_DEBUG
 		if (hdr->b_l1hdr.b_thawed != NULL) {
@@ -3071,6 +3687,156 @@ arc_hdr_realloc(arc_buf_hdr_t *hdr, kmem_cache_t *old, kmem_cache_t *new)
 }
 
 /*
+ * This function allows an L1 header to be reallocated as a crypt
+ * header and vice versa. If we are going to a crypt header, the
+ * new fields will be zeroed out.
+ */
+static arc_buf_hdr_t *
+arc_hdr_realloc_crypt(arc_buf_hdr_t *hdr, boolean_t need_crypt)
+{
+	arc_buf_hdr_t *nhdr;
+	arc_buf_t *buf;
+	kmem_cache_t *ncache, *ocache;
+
+	ASSERT(HDR_HAS_L1HDR(hdr));
+	ASSERT3U(!!HDR_PROTECTED(hdr), !=, need_crypt);
+	ASSERT3P(hdr->b_l1hdr.b_state, ==, arc_anon);
+	ASSERT(!multilist_link_active(&hdr->b_l1hdr.b_arc_node));
+	ASSERT(!list_link_active(&hdr->b_l2hdr.b_l2node));
+	ASSERT3P(hdr->b_hash_next, ==, NULL);
+
+	if (need_crypt) {
+		ncache = hdr_full_crypt_cache;
+		ocache = hdr_full_cache;
+	} else {
+		ncache = hdr_full_cache;
+		ocache = hdr_full_crypt_cache;
+	}
+
+	nhdr = kmem_cache_alloc(ncache, KM_PUSHPAGE);
+
+	/*
+	 * Copy all members that aren't locks or condvars to the new header.
+	 * No lists are pointing to us (as we asserted above), so we don't
+	 * need to worry about the list nodes.
+	 */
+	nhdr->b_dva = hdr->b_dva;
+	nhdr->b_birth = hdr->b_birth;
+	nhdr->b_type = hdr->b_type;
+	nhdr->b_flags = hdr->b_flags;
+	nhdr->b_psize = hdr->b_psize;
+	nhdr->b_lsize = hdr->b_lsize;
+	nhdr->b_spa = hdr->b_spa;
+	nhdr->b_l2hdr.b_dev = hdr->b_l2hdr.b_dev;
+	nhdr->b_l2hdr.b_daddr = hdr->b_l2hdr.b_daddr;
+	nhdr->b_l1hdr.b_freeze_cksum = hdr->b_l1hdr.b_freeze_cksum;
+	nhdr->b_l1hdr.b_bufcnt = hdr->b_l1hdr.b_bufcnt;
+	nhdr->b_l1hdr.b_byteswap = hdr->b_l1hdr.b_byteswap;
+	nhdr->b_l1hdr.b_state = hdr->b_l1hdr.b_state;
+	nhdr->b_l1hdr.b_arc_access = hdr->b_l1hdr.b_arc_access;
+	nhdr->b_l1hdr.b_acb = hdr->b_l1hdr.b_acb;
+	nhdr->b_l1hdr.b_pabd = hdr->b_l1hdr.b_pabd;
+#ifdef ZFS_DEBUG
+	if (hdr->b_l1hdr.b_thawed != NULL) {
+		nhdr->b_l1hdr.b_thawed = hdr->b_l1hdr.b_thawed;
+		hdr->b_l1hdr.b_thawed = NULL;
+	}
+#endif
+
+	/*
+	 * This refcount_add() exists only to ensure that the individual
+	 * arc buffers always point to a header that is referenced, avoiding
+	 * a small race condition that could trigger ASSERTs.
+	 */
+	(void) zfs_refcount_add(&nhdr->b_l1hdr.b_refcnt, FTAG);
+	nhdr->b_l1hdr.b_buf = hdr->b_l1hdr.b_buf;
+	for (buf = nhdr->b_l1hdr.b_buf; buf != NULL; buf = buf->b_next) {
+		mutex_enter(&buf->b_evict_lock);
+		buf->b_hdr = nhdr;
+		mutex_exit(&buf->b_evict_lock);
+	}
+	zfs_refcount_transfer(&nhdr->b_l1hdr.b_refcnt, &hdr->b_l1hdr.b_refcnt);
+	(void) zfs_refcount_remove(&nhdr->b_l1hdr.b_refcnt, FTAG);
+	ASSERT0(zfs_refcount_count(&hdr->b_l1hdr.b_refcnt));
+
+	if (need_crypt) {
+		arc_hdr_set_flags(nhdr, ARC_FLAG_PROTECTED);
+	} else {
+		arc_hdr_clear_flags(nhdr, ARC_FLAG_PROTECTED);
+	}
+
+	/* unset all members of the original hdr */
+	bzero(&hdr->b_dva, sizeof (dva_t));
+	hdr->b_birth = 0;
+	hdr->b_type = ARC_BUFC_INVALID;
+	hdr->b_flags = 0;
+	hdr->b_psize = 0;
+	hdr->b_lsize = 0;
+	hdr->b_spa = 0;
+	hdr->b_l2hdr.b_dev = NULL;
+	hdr->b_l2hdr.b_daddr = 0;
+	hdr->b_l1hdr.b_freeze_cksum = NULL;
+	hdr->b_l1hdr.b_buf = NULL;
+	hdr->b_l1hdr.b_bufcnt = 0;
+	hdr->b_l1hdr.b_byteswap = 0;
+	hdr->b_l1hdr.b_state = NULL;
+	hdr->b_l1hdr.b_arc_access = 0;
+	hdr->b_l1hdr.b_acb = NULL;
+	hdr->b_l1hdr.b_pabd = NULL;
+
+	if (ocache == hdr_full_crypt_cache) {
+		ASSERT(!HDR_HAS_RABD(hdr));
+		hdr->b_crypt_hdr.b_ot = DMU_OT_NONE;
+		hdr->b_crypt_hdr.b_ebufcnt = 0;
+		hdr->b_crypt_hdr.b_dsobj = 0;
+		bzero(hdr->b_crypt_hdr.b_salt, ZIO_DATA_SALT_LEN);
+		bzero(hdr->b_crypt_hdr.b_iv, ZIO_DATA_IV_LEN);
+		bzero(hdr->b_crypt_hdr.b_mac, ZIO_DATA_MAC_LEN);
+	}
+
+	buf_discard_identity(hdr);
+	kmem_cache_free(ocache, hdr);
+
+	return (nhdr);
+}
+
+/*
+ * This function is used by the send / receive code to convert a newly
+ * allocated arc_buf_t to one that is suitable for a raw encrypted write. It
+ * is also used to allow the root objset block to be uupdated without altering
+ * its embedded MACs. Both block types will always be uncompressed so we do not
+ * have to worry about compression type or psize.
+ */
+void
+arc_convert_to_raw(arc_buf_t *buf, uint64_t dsobj, boolean_t byteorder,
+    dmu_object_type_t ot, const uint8_t *salt, const uint8_t *iv,
+    const uint8_t *mac)
+{
+	arc_buf_hdr_t *hdr = buf->b_hdr;
+
+	ASSERT(ot == DMU_OT_DNODE || ot == DMU_OT_OBJSET);
+	ASSERT(HDR_HAS_L1HDR(hdr));
+	ASSERT3P(hdr->b_l1hdr.b_state, ==, arc_anon);
+
+	buf->b_flags |= (ARC_BUF_FLAG_COMPRESSED | ARC_BUF_FLAG_ENCRYPTED);
+	if (!HDR_PROTECTED(hdr))
+		hdr = arc_hdr_realloc_crypt(hdr, B_TRUE);
+	hdr->b_crypt_hdr.b_dsobj = dsobj;
+	hdr->b_crypt_hdr.b_ot = ot;
+	hdr->b_l1hdr.b_byteswap = (byteorder == ZFS_HOST_BYTEORDER) ?
+	    DMU_BSWAP_NUMFUNCS : DMU_OT_BYTESWAP(ot);
+	if (!arc_hdr_has_uncompressed_buf(hdr))
+		arc_cksum_free(hdr);
+
+	if (salt != NULL)
+		bcopy(salt, hdr->b_crypt_hdr.b_salt, ZIO_DATA_SALT_LEN);
+	if (iv != NULL)
+		bcopy(iv, hdr->b_crypt_hdr.b_iv, ZIO_DATA_IV_LEN);
+	if (mac != NULL)
+		bcopy(mac, hdr->b_crypt_hdr.b_mac, ZIO_DATA_MAC_LEN);
+}
+
+/*
  * Allocate a new arc_buf_hdr_t and arc_buf_t and return the buf to the caller.
  * The buf is returned thawed since we expect the consumer to modify it.
  */
@@ -3078,11 +3844,12 @@ arc_buf_t *
 arc_alloc_buf(spa_t *spa, void *tag, arc_buf_contents_t type, int32_t size)
 {
 	arc_buf_hdr_t *hdr = arc_hdr_alloc(spa_load_guid(spa), size, size,
-	    ZIO_COMPRESS_OFF, type);
+	    B_FALSE, ZIO_COMPRESS_OFF, type, B_FALSE);
 	ASSERT(!MUTEX_HELD(HDR_LOCK(hdr)));
 
 	arc_buf_t *buf = NULL;
-	VERIFY0(arc_buf_alloc_impl(hdr, tag, B_FALSE, B_FALSE, &buf));
+	VERIFY0(arc_buf_alloc_impl(hdr, spa, NULL, tag, B_FALSE, B_FALSE,
+	    B_FALSE, B_FALSE, &buf));
 	arc_buf_thaw(buf);
 
 	return (buf);
@@ -3098,33 +3865,76 @@ arc_alloc_compressed_buf(spa_t *spa, void *tag, uint64_t psize, uint64_t lsize,
 {
 	ASSERT3U(lsize, >, 0);
 	ASSERT3U(lsize, >=, psize);
-	ASSERT(compression_type > ZIO_COMPRESS_OFF);
-	ASSERT(compression_type < ZIO_COMPRESS_FUNCTIONS);
+	ASSERT3U(compression_type, >, ZIO_COMPRESS_OFF);
+	ASSERT3U(compression_type, <, ZIO_COMPRESS_FUNCTIONS);
 
 	arc_buf_hdr_t *hdr = arc_hdr_alloc(spa_load_guid(spa), psize, lsize,
-	    compression_type, ARC_BUFC_DATA);
+	    B_FALSE, compression_type, ARC_BUFC_DATA, B_FALSE);
 	ASSERT(!MUTEX_HELD(HDR_LOCK(hdr)));
 
 	arc_buf_t *buf = NULL;
-	VERIFY0(arc_buf_alloc_impl(hdr, tag, B_TRUE, B_FALSE, &buf));
+	VERIFY0(arc_buf_alloc_impl(hdr, spa, NULL, tag, B_FALSE,
+	    B_TRUE, B_FALSE, B_FALSE, &buf));
 	arc_buf_thaw(buf);
 	ASSERT3P(hdr->b_l1hdr.b_freeze_cksum, ==, NULL);
 
 	if (!arc_buf_is_shared(buf)) {
 		/*
 		 * To ensure that the hdr has the correct data in it if we call
-		 * arc_decompress() on this buf before it's been written to
+		 * arc_untransform() on this buf before it's been written to
 		 * disk, it's easiest if we just set up sharing between the
 		 * buf and the hdr.
 		 */
 		ASSERT(!abd_is_linear(hdr->b_l1hdr.b_pabd));
-		arc_hdr_free_pabd(hdr);
+		arc_hdr_free_pabd(hdr, B_FALSE);
 		arc_share_buf(hdr, buf);
 	}
 
 	return (buf);
 }
 
+arc_buf_t *
+arc_alloc_raw_buf(spa_t *spa, void *tag, uint64_t dsobj, boolean_t byteorder,
+    const uint8_t *salt, const uint8_t *iv, const uint8_t *mac,
+    dmu_object_type_t ot, uint64_t psize, uint64_t lsize,
+    enum zio_compress compression_type)
+{
+	arc_buf_hdr_t *hdr;
+	arc_buf_t *buf;
+	arc_buf_contents_t type = DMU_OT_IS_METADATA(ot) ?
+	    ARC_BUFC_METADATA : ARC_BUFC_DATA;
+
+	ASSERT3U(lsize, >, 0);
+	ASSERT3U(lsize, >=, psize);
+	ASSERT3U(compression_type, >=, ZIO_COMPRESS_OFF);
+	ASSERT3U(compression_type, <, ZIO_COMPRESS_FUNCTIONS);
+
+	hdr = arc_hdr_alloc(spa_load_guid(spa), psize, lsize, B_TRUE,
+	    compression_type, type, B_TRUE);
+	ASSERT(!MUTEX_HELD(HDR_LOCK(hdr)));
+
+	hdr->b_crypt_hdr.b_dsobj = dsobj;
+	hdr->b_crypt_hdr.b_ot = ot;
+	hdr->b_l1hdr.b_byteswap = (byteorder == ZFS_HOST_BYTEORDER) ?
+	    DMU_BSWAP_NUMFUNCS : DMU_OT_BYTESWAP(ot);
+	bcopy(salt, hdr->b_crypt_hdr.b_salt, ZIO_DATA_SALT_LEN);
+	bcopy(iv, hdr->b_crypt_hdr.b_iv, ZIO_DATA_IV_LEN);
+	bcopy(mac, hdr->b_crypt_hdr.b_mac, ZIO_DATA_MAC_LEN);
+
+	/*
+	 * This buffer will be considered encrypted even if the ot is not an
+	 * encrypted type. It will become authenticated instead in
+	 * arc_write_ready().
+	 */
+	buf = NULL;
+	VERIFY0(arc_buf_alloc_impl(hdr, spa, NULL, tag, B_TRUE, B_TRUE,
+	    B_FALSE, B_FALSE, &buf));
+	arc_buf_thaw(buf);
+	ASSERT3P(hdr->b_l1hdr.b_freeze_cksum, ==, NULL);
+
+	return (buf);
+}
+
 static void
 arc_hdr_l2hdr_destroy(arc_buf_hdr_t *hdr)
 {
@@ -3200,15 +4010,23 @@ arc_hdr_destroy(arc_buf_hdr_t *hdr)
 #endif
 
 		if (hdr->b_l1hdr.b_pabd != NULL) {
-			arc_hdr_free_pabd(hdr);
+			arc_hdr_free_pabd(hdr, B_FALSE);
 		}
+
+		if (HDR_HAS_RABD(hdr))
+			arc_hdr_free_pabd(hdr, B_TRUE);
 	}
 
 	ASSERT3P(hdr->b_hash_next, ==, NULL);
 	if (HDR_HAS_L1HDR(hdr)) {
 		ASSERT(!multilist_link_active(&hdr->b_l1hdr.b_arc_node));
 		ASSERT3P(hdr->b_l1hdr.b_acb, ==, NULL);
-		kmem_cache_free(hdr_full_cache, hdr);
+
+		if (!HDR_PROTECTED(hdr)) {
+			kmem_cache_free(hdr_full_cache, hdr);
+		} else {
+			kmem_cache_free(hdr_full_crypt_cache, hdr);
+		}
 	} else {
 		kmem_cache_free(hdr_l2only_cache, hdr);
 	}
@@ -3242,7 +4060,7 @@ arc_buf_destroy(arc_buf_t *buf, void* tag)
 
 /*
  * Evict the arc_buf_hdr that is provided as a parameter. The resultant
- * state of the header is dependent on it's state prior to entering this
+ * state of the header is dependent on its state prior to entering this
  * function. The following transitions are possible:
  *
  *    - arc_mru -> arc_mru_ghost
@@ -3270,9 +4088,9 @@ arc_evict_hdr(arc_buf_hdr_t *hdr, kmutex_t *hash_lock)
 
 		/*
 		 * l2arc_write_buffers() relies on a header's L1 portion
-		 * (i.e. its b_pabd field) during it's write phase.
+		 * (i.e. its b_pabd field) during its write phase.
 		 * Thus, we cannot push a header onto the arc_l2c_only
-		 * state (removing it's L1 piece) until the header is
+		 * state (removing its L1 piece) until the header is
 		 * done being written to the l2arc.
 		 */
 		if (HDR_HAS_L2HDR(hdr) && HDR_L2_WRITING(hdr)) {
@@ -3285,8 +4103,9 @@ arc_evict_hdr(arc_buf_hdr_t *hdr, kmutex_t *hash_lock)
 
 		DTRACE_PROBE1(arc__delete, arc_buf_hdr_t *, hdr);
 
-		ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL);
 		if (HDR_HAS_L2HDR(hdr)) {
+			ASSERT(hdr->b_l1hdr.b_pabd == NULL);
+			ASSERT(!HDR_HAS_RABD(hdr));
 			/*
 			 * This buffer is cached on the 2nd Level ARC;
 			 * don't destroy the header.
@@ -3352,7 +4171,11 @@ arc_evict_hdr(arc_buf_hdr_t *hdr, kmutex_t *hash_lock)
 		 * This ensures that the accounting is updated correctly
 		 * in arc_free_data_impl().
 		 */
-		arc_hdr_free_pabd(hdr);
+		if (hdr->b_l1hdr.b_pabd != NULL)
+			arc_hdr_free_pabd(hdr, B_FALSE);
+
+		if (HDR_HAS_RABD(hdr))
+			arc_hdr_free_pabd(hdr, B_TRUE);
 
 		arc_change_state(evicted_state, hdr, hash_lock);
 		ASSERT(HDR_IN_HASH_TABLE(hdr));
@@ -4323,7 +5146,7 @@ arc_reap_cb(void *arg, zthr_t *zthr)
 
 /*
  * Adapt arc info given the number of bytes we are trying to add and
- * the state that we are comming from.  This function is only called
+ * the state that we are coming from.  This function is only called
  * when we are adding new content to the cache.
  */
 static void
@@ -4464,7 +5287,7 @@ arc_get_data_impl(arc_buf_hdr_t *hdr, uint64_t size, void *tag)
 	 * upper limit, we must be adding data faster than the evict
 	 * thread can evict. Thus, to ensure we don't compound the
 	 * problem by adding more data and forcing arc_size to grow even
-	 * further past it's target size, we halt and wait for the
+	 * further past its target size, we halt and wait for the
 	 * eviction thread to catch up.
 	 *
 	 * It's also possible that the reclaim thread is unable to evict
@@ -4799,24 +5622,69 @@ arc_getbuf_func(zio_t *zio, const zbookmark_phys_t *zb, const blkptr_t *bp,
 }
 
 static void
-arc_hdr_verify(arc_buf_hdr_t *hdr, blkptr_t *bp)
+arc_hdr_verify(arc_buf_hdr_t *hdr, const blkptr_t *bp)
 {
 	if (BP_IS_HOLE(bp) || BP_IS_EMBEDDED(bp)) {
 		ASSERT3U(HDR_GET_PSIZE(hdr), ==, 0);
-		ASSERT3U(HDR_GET_COMPRESS(hdr), ==, ZIO_COMPRESS_OFF);
+		ASSERT3U(arc_hdr_get_compress(hdr), ==, ZIO_COMPRESS_OFF);
 	} else {
 		if (HDR_COMPRESSION_ENABLED(hdr)) {
-			ASSERT3U(HDR_GET_COMPRESS(hdr), ==,
+			ASSERT3U(arc_hdr_get_compress(hdr), ==,
 			    BP_GET_COMPRESS(bp));
 		}
 		ASSERT3U(HDR_GET_LSIZE(hdr), ==, BP_GET_LSIZE(bp));
 		ASSERT3U(HDR_GET_PSIZE(hdr), ==, BP_GET_PSIZE(bp));
+		ASSERT3U(!!HDR_PROTECTED(hdr), ==, BP_IS_PROTECTED(bp));
+	}
+}
+
+/*
+ * XXX this should be changed to return an error, and callers
+ * re-read from disk on failure (on nondebug bits).
+ */
+static void
+arc_hdr_verify_checksum(spa_t *spa, arc_buf_hdr_t *hdr, const blkptr_t *bp)
+{
+	arc_hdr_verify(hdr, bp);
+	if (BP_IS_HOLE(bp) || BP_IS_EMBEDDED(bp))
+		return;
+	int err = 0;
+	abd_t *abd = NULL;
+	if (BP_IS_ENCRYPTED(bp)) {
+		if (HDR_HAS_RABD(hdr)) {
+			abd = hdr->b_crypt_hdr.b_rabd;
+		}
+	} else if (HDR_COMPRESSION_ENABLED(hdr)) {
+		abd = hdr->b_l1hdr.b_pabd;
+	}
+	if (abd != NULL) {
+		/*
+		 * The offset is only used for labels, which are not
+		 * cached in the ARC, so it doesn't matter what we
+		 * pass for the offset parameter.
+		 */
+		int psize = HDR_GET_PSIZE(hdr);
+		err = zio_checksum_error_impl(spa, bp,
+		    BP_GET_CHECKSUM(bp), abd, psize, 0, NULL);
+		if (err != 0) {
+			/*
+			 * Use abd_copy_to_buf() rather than
+			 * abd_borrow_buf_copy() so that we are sure to
+			 * include the buf in crash dumps.
+			 */
+			void *buf = kmem_alloc(psize, KM_SLEEP);
+			abd_copy_to_buf(buf, abd, psize);
+			panic("checksum of cached data doesn't match BP "
+			    "err=%u hdr=%p bp=%p abd=%p buf=%p",
+			    err, (void *)hdr, (void *)bp, (void *)abd, buf);
+		}
 	}
 }
 
 static void
 arc_read_done(zio_t *zio)
 {
+	blkptr_t	*bp = zio->io_bp;
 	arc_buf_hdr_t	*hdr = zio->io_private;
 	kmutex_t	*hash_lock = NULL;
 	arc_callback_t	*callback_list;
@@ -4847,6 +5715,26 @@ arc_read_done(zio_t *zio)
 		ASSERT3P(hash_lock, !=, NULL);
 	}
 
+	if (BP_IS_PROTECTED(bp)) {
+		hdr->b_crypt_hdr.b_ot = BP_GET_TYPE(bp);
+		hdr->b_crypt_hdr.b_dsobj = zio->io_bookmark.zb_objset;
+		zio_crypt_decode_params_bp(bp, hdr->b_crypt_hdr.b_salt,
+		    hdr->b_crypt_hdr.b_iv);
+
+		if (BP_GET_TYPE(bp) == DMU_OT_INTENT_LOG) {
+			void *tmpbuf;
+
+			tmpbuf = abd_borrow_buf_copy(zio->io_abd,
+			    sizeof (zil_chain_t));
+			zio_crypt_decode_mac_zil(tmpbuf,
+			    hdr->b_crypt_hdr.b_mac);
+			abd_return_buf(zio->io_abd, tmpbuf,
+			    sizeof (zil_chain_t));
+		} else {
+			zio_crypt_decode_mac_bp(bp, hdr->b_crypt_hdr.b_mac);
+		}
+	}
+
 	if (zio->io_error == 0) {
 		/* byteswap if necessary */
 		if (BP_SHOULD_BYTESWAP(zio->io_bp)) {
@@ -4895,8 +5783,32 @@ arc_read_done(zio_t *zio)
 		if (zio->io_error != 0)
 			continue;
 
-		int error = arc_buf_alloc_impl(hdr, acb->acb_private,
-		    acb->acb_compressed, B_TRUE, &acb->acb_buf);
+		int error = arc_buf_alloc_impl(hdr, zio->io_spa,
+		    &acb->acb_zb, acb->acb_private, acb->acb_encrypted,
+		    acb->acb_compressed, acb->acb_noauth, B_TRUE,
+		    &acb->acb_buf);
+
+		/*
+		 * Assert non-speculative zios didn't fail because an
+		 * encryption key wasn't loaded
+		 */
+		ASSERT((zio->io_flags & ZIO_FLAG_SPECULATIVE) ||
+		    error != EACCES);
+
+		/*
+		 * If we failed to decrypt, report an error now (as the zio
+		 * layer would have done if it had done the transforms).
+		 */
+		if (error == ECKSUM) {
+			ASSERT(BP_IS_PROTECTED(bp));
+			error = SET_ERROR(EIO);
+			if ((zio->io_flags & ZIO_FLAG_SPECULATIVE) == 0) {
+				spa_log_error(zio->io_spa, &acb->acb_zb);
+				zfs_ereport_post(FM_EREPORT_ZFS_AUTHENTICATION,
+				    zio->io_spa, NULL, &acb->acb_zb, zio, 0, 0);
+			}
+		}
+
 		if (error != 0) {
 			/*
 			 * Decompression failed.  Set io_error
@@ -4915,6 +5827,7 @@ arc_read_done(zio_t *zio)
 			zio->io_error = error;
 		}
 	}
+
 	/*
 	 * If there are multiple callbacks, we must have the hash lock,
 	 * because the only way for multiple threads to find this hdr is
@@ -4926,11 +5839,8 @@ arc_read_done(zio_t *zio)
 
 	hdr->b_l1hdr.b_acb = NULL;
 	arc_hdr_clear_flags(hdr, ARC_FLAG_IO_IN_PROGRESS);
-	if (callback_cnt == 0) {
-		ASSERT(HDR_PREFETCH(hdr));
-		ASSERT0(hdr->b_l1hdr.b_bufcnt);
-		ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL);
-	}
+	if (callback_cnt == 0)
+		ASSERT(hdr->b_l1hdr.b_pabd != NULL || HDR_HAS_RABD(hdr));
 
 	ASSERT(zfs_refcount_is_zero(&hdr->b_l1hdr.b_refcnt) ||
 	    callback_list != NULL);
@@ -4968,6 +5878,7 @@ arc_read_done(zio_t *zio)
 
 	/* execute each callback and free its structure */
 	while ((acb = callback_list) != NULL) {
+
 		if (acb->acb_done != NULL) {
 			if (zio->io_error != 0 && acb->acb_buf != NULL) {
 				/*
@@ -5022,7 +5933,11 @@ arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, arc_read_done_func_t *done,
 	kmutex_t *hash_lock = NULL;
 	zio_t *rzio;
 	uint64_t guid = spa_load_guid(spa);
-	boolean_t compressed_read = (zio_flags & ZIO_FLAG_RAW) != 0;
+	boolean_t compressed_read = (zio_flags & ZIO_FLAG_RAW_COMPRESS) != 0;
+	boolean_t encrypted_read = BP_IS_ENCRYPTED(bp) &&
+	    (zio_flags & ZIO_FLAG_RAW_ENCRYPT) != 0;
+	boolean_t noauth_read = BP_IS_AUTHENTICATED(bp) &&
+	    (zio_flags & ZIO_FLAG_RAW_ENCRYPT) != 0;
 	int rc = 0;
 
 	ASSERT(!BP_IS_EMBEDDED(bp) ||
@@ -5037,7 +5952,15 @@ top:
 		hdr = buf_hash_find(guid, bp, &hash_lock);
 	}
 
-	if (hdr != NULL && HDR_HAS_L1HDR(hdr) && hdr->b_l1hdr.b_pabd != NULL) {
+	/*
+	 * Determine if we have an L1 cache hit or a cache miss. For simplicity
+	 * we maintain encrypted data seperately from compressed / uncompressed
+	 * data. If the user is requesting raw encrypted data and we don't have
+	 * that in the header we will read from disk to guarantee that we can
+	 * get it even if the encryption keys aren't loaded.
+	 */
+	if (hdr != NULL && HDR_HAS_L1HDR(hdr) && (HDR_HAS_RABD(hdr) ||
+	    (hdr->b_l1hdr.b_pabd != NULL && !encrypted_read))) {
 		arc_buf_t *buf = NULL;
 		*arc_flags |= ARC_FLAG_CACHED;
 
@@ -5077,6 +6000,9 @@ top:
 				acb->acb_done = done;
 				acb->acb_private = private;
 				acb->acb_compressed = compressed_read;
+				acb->acb_encrypted = encrypted_read;
+				acb->acb_noauth = noauth_read;
+				acb->acb_zb = *zb;
 				if (pio != NULL)
 					acb->acb_zio_dummy = zio_null(pio,
 					    spa, NULL, NULL, NULL, zio_flags);
@@ -5120,15 +6046,35 @@ top:
 
 			ASSERT(!BP_IS_EMBEDDED(bp) || !BP_IS_HOLE(bp));
 
+			arc_hdr_verify_checksum(spa, hdr, bp);
+
 			/* Get a buf with the desired data in it. */
-			rc = arc_buf_alloc_impl(hdr, private,
-			    compressed_read, B_TRUE, &buf);
+			rc = arc_buf_alloc_impl(hdr, spa, zb, private,
+			    encrypted_read, compressed_read, noauth_read,
+			    B_TRUE, &buf);
+			if (rc == ECKSUM) {
+				/*
+				 * Convert authentication and decryption errors
+				 * to EIO (and generate an ereport if needed)
+				 * before leaving the ARC.
+				 */
+				rc = SET_ERROR(EIO);
+				if ((zio_flags & ZIO_FLAG_SPECULATIVE) == 0) {
+					spa_log_error(spa, zb);
+					zfs_ereport_post(
+					    FM_EREPORT_ZFS_AUTHENTICATION,
+					    spa, NULL, zb, NULL, 0, 0);
+				}
+			}
 			if (rc != 0) {
-				arc_buf_destroy(buf, private);
+				(void) remove_reference(hdr, hash_lock,
+				    private);
+				arc_buf_destroy_impl(buf);
 				buf = NULL;
 			}
+			/* assert any errors weren't due to unloaded keys */
 			ASSERT((zio_flags & ZIO_FLAG_SPECULATIVE) ||
-			    rc == 0 || rc != ENOENT);
+			    rc != EACCES);
 		} else if (*arc_flags & ARC_FLAG_PREFETCH &&
 		    zfs_refcount_count(&hdr->b_l1hdr.b_refcnt) == 0) {
 			arc_hdr_set_flags(hdr, ARC_FLAG_PREFETCH);
@@ -5155,13 +6101,15 @@ top:
 		uint64_t addr = 0;
 		boolean_t devw = B_FALSE;
 		uint64_t size;
+		abd_t *hdr_abd;
 
 		if (hdr == NULL) {
 			/* this block is not in the cache */
 			arc_buf_hdr_t *exists = NULL;
 			arc_buf_contents_t type = BP_GET_BUFC_TYPE(bp);
 			hdr = arc_hdr_alloc(spa_load_guid(spa), psize, lsize,
-			    BP_GET_COMPRESS(bp), type);
+			    BP_IS_PROTECTED(bp), BP_GET_COMPRESS(bp), type,
+			    encrypted_read);
 
 			if (!BP_IS_EMBEDDED(bp)) {
 				hdr->b_dva = *BP_IDENTITY(bp);
@@ -5177,25 +6125,43 @@ top:
 			}
 		} else {
 			/*
-			 * This block is in the ghost cache. If it was L2-only
-			 * (and thus didn't have an L1 hdr), we realloc the
-			 * header to add an L1 hdr.
+			 * This block is in the ghost cache or encrypted data
+			 * was requested and we didn't have it. If it was
+			 * L2-only (and thus didn't have an L1 hdr),
+			 * we realloc the header to add an L1 hdr.
 			 */
 			if (!HDR_HAS_L1HDR(hdr)) {
 				hdr = arc_hdr_realloc(hdr, hdr_l2only_cache,
 				    hdr_full_cache);
 			}
-			ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL);
-			ASSERT(GHOST_STATE(hdr->b_l1hdr.b_state));
-			ASSERT(!HDR_IO_IN_PROGRESS(hdr));
-			ASSERT(zfs_refcount_is_zero(&hdr->b_l1hdr.b_refcnt));
-			ASSERT3P(hdr->b_l1hdr.b_buf, ==, NULL);
-			ASSERT3P(hdr->b_l1hdr.b_freeze_cksum, ==, NULL);
+
+			if (GHOST_STATE(hdr->b_l1hdr.b_state)) {
+				ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL);
+				ASSERT(!HDR_HAS_RABD(hdr));
+				ASSERT(!HDR_IO_IN_PROGRESS(hdr));
+				ASSERT0(zfs_refcount_count(
+				    &hdr->b_l1hdr.b_refcnt));
+				ASSERT3P(hdr->b_l1hdr.b_buf, ==, NULL);
+				ASSERT3P(hdr->b_l1hdr.b_freeze_cksum, ==, NULL);
+			} else if (HDR_IO_IN_PROGRESS(hdr)) {
+				/*
+				 * If this header already had an IO in progress
+				 * and we are performing another IO to fetch
+				 * encrypted data we must wait until the first
+				 * IO completes so as not to confuse
+				 * arc_read_done(). This should be very rare
+				 * and so the performance impact shouldn't
+				 * matter.
+				 */
+				cv_wait(&hdr->b_l1hdr.b_cv, hash_lock);
+				mutex_exit(hash_lock);
+				goto top;
+			}
 
 			/*
 			 * This is a delicate dance that we play here.
-			 * This hdr is in the ghost list so we access it
-			 * to move it out of the ghost list before we
+			 * This hdr might be in the ghost list so we access
+			 * it to move it out of the ghost list before we
 			 * initiate the read. If it's a prefetch then
 			 * it won't have a callback so we'll remove the
 			 * reference that arc_buf_alloc_impl() created. We
@@ -5203,28 +6169,44 @@ top:
 			 * avoid hitting an assert in remove_reference().
 			 */
 			arc_access(hdr, hash_lock);
-			arc_hdr_alloc_pabd(hdr);
+			arc_hdr_alloc_pabd(hdr, encrypted_read);
 		}
-		ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL);
-		size = arc_hdr_size(hdr);
 
-		/*
-		 * If compression is enabled on the hdr, then will do
-		 * RAW I/O and will store the compressed data in the hdr's
-		 * data block. Otherwise, the hdr's data block will contain
-		 * the uncompressed data.
-		 */
-		if (HDR_GET_COMPRESS(hdr) != ZIO_COMPRESS_OFF) {
+		if (encrypted_read) {
+			ASSERT(HDR_HAS_RABD(hdr));
+			size = HDR_GET_PSIZE(hdr);
+			hdr_abd = hdr->b_crypt_hdr.b_rabd;
 			zio_flags |= ZIO_FLAG_RAW;
+		} else {
+			ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL);
+			size = arc_hdr_size(hdr);
+			hdr_abd = hdr->b_l1hdr.b_pabd;
+
+			if (arc_hdr_get_compress(hdr) != ZIO_COMPRESS_OFF) {
+				zio_flags |= ZIO_FLAG_RAW_COMPRESS;
+			}
+
+			/*
+			 * For authenticated bp's, we do not ask the ZIO layer
+			 * to authenticate them since this will cause the entire
+			 * IO to fail if the key isn't loaded. Instead, we
+			 * defer authentication until arc_buf_fill(), which will
+			 * verify the data when the key is available.
+			 */
+			if (BP_IS_AUTHENTICATED(bp))
+				zio_flags |= ZIO_FLAG_RAW_ENCRYPT;
 		}
 
-		if (*arc_flags & ARC_FLAG_PREFETCH)
+		if (*arc_flags & ARC_FLAG_PREFETCH &&
+		    zfs_refcount_is_zero(&hdr->b_l1hdr.b_refcnt))
 			arc_hdr_set_flags(hdr, ARC_FLAG_PREFETCH);
 		if (*arc_flags & ARC_FLAG_PRESCIENT_PREFETCH)
 			arc_hdr_set_flags(hdr, ARC_FLAG_PRESCIENT_PREFETCH);
 
 		if (*arc_flags & ARC_FLAG_L2CACHE)
 			arc_hdr_set_flags(hdr, ARC_FLAG_L2CACHE);
+		if (BP_IS_AUTHENTICATED(bp))
+			arc_hdr_set_flags(hdr, ARC_FLAG_NOAUTH);
 		if (BP_GET_LEVEL(bp) > 0)
 			arc_hdr_set_flags(hdr, ARC_FLAG_INDIRECT);
 		if (*arc_flags & ARC_FLAG_PREDICTIVE_PREFETCH)
@@ -5235,6 +6217,9 @@ top:
 		acb->acb_done = done;
 		acb->acb_private = private;
 		acb->acb_compressed = compressed_read;
+		acb->acb_encrypted = encrypted_read;
+		acb->acb_noauth = noauth_read;
+		acb->acb_zb = *zb;
 
 		ASSERT3P(hdr->b_l1hdr.b_acb, ==, NULL);
 		hdr->b_l1hdr.b_acb = acb;
@@ -5309,7 +6294,7 @@ top:
 					    HDR_ISTYPE_METADATA(hdr));
 					cb->l2rcb_abd = abd;
 				} else {
-					abd = hdr->b_l1hdr.b_pabd;
+					abd = hdr_abd;
 				}
 
 				ASSERT(addr >= VDEV_LABEL_START_SIZE &&
@@ -5322,7 +6307,7 @@ top:
 				 * Issue a null zio if the underlying buffer
 				 * was squashed to zero size by compression.
 				 */
-				ASSERT3U(HDR_GET_COMPRESS(hdr), !=,
+				ASSERT3U(arc_hdr_get_compress(hdr), !=,
 				    ZIO_COMPRESS_EMPTY);
 				rzio = zio_read_phys(pio, vd, addr,
 				    asize, abd,
@@ -5339,7 +6324,8 @@ top:
 
 				DTRACE_PROBE2(l2arc__read, vdev_t *, vd,
 				    zio_t *, rzio);
-				ARCSTAT_INCR(arcstat_l2_read_bytes, size);
+				ARCSTAT_INCR(arcstat_l2_read_bytes,
+				    HDR_GET_PSIZE(hdr));
 
 				if (*arc_flags & ARC_FLAG_NOWAIT) {
 					zio_nowait(rzio);
@@ -5371,7 +6357,7 @@ top:
 			}
 		}
 
-		rzio = zio_read(pio, spa, bp, hdr->b_l1hdr.b_pabd, size,
+		rzio = zio_read(pio, spa, bp, hdr_abd, size,
 		    arc_read_done, hdr, priority, zio_flags, zb);
 		acb->acb_zio_head = rzio;
 
@@ -5384,7 +6370,7 @@ top:
 		ASSERT(*arc_flags & ARC_FLAG_NOWAIT);
 		zio_nowait(rzio);
 	}
-	return (0);
+	return (rc);
 }
 
 /*
@@ -5448,7 +6434,7 @@ arc_release(arc_buf_t *buf, void *tag)
 	arc_buf_hdr_t *hdr = buf->b_hdr;
 
 	/*
-	 * It would be nice to assert that if it's DMU metadata (level >
+	 * It would be nice to assert that if its DMU metadata (level >
 	 * 0 || it's the dnode file), then it must be syncing context.
 	 * But we don't know that information at this level.
 	 */
@@ -5464,7 +6450,13 @@ arc_release(arc_buf_t *buf, void *tag)
 	 */
 	if (hdr->b_l1hdr.b_state == arc_anon) {
 		mutex_exit(&buf->b_evict_lock);
-		ASSERT(!HDR_IO_IN_PROGRESS(hdr));
+		/*
+		 * If we are called from dmu_convert_mdn_block_to_raw(),
+		 * a write might be in progress.  This is OK because
+		 * the caller won't change the content of this buffer,
+		 * only the flags (via arc_convert_to_raw()).
+		 */
+		/* ASSERT(!HDR_IO_IN_PROGRESS(hdr)); */
 		ASSERT(!HDR_IN_HASH_TABLE(hdr));
 		ASSERT(!HDR_HAS_L2HDR(hdr));
 		ASSERT(HDR_EMPTY(hdr));
@@ -5525,7 +6517,8 @@ arc_release(arc_buf_t *buf, void *tag)
 		uint64_t spa = hdr->b_spa;
 		uint64_t psize = HDR_GET_PSIZE(hdr);
 		uint64_t lsize = HDR_GET_LSIZE(hdr);
-		enum zio_compress compress = HDR_GET_COMPRESS(hdr);
+		boolean_t protected = HDR_PROTECTED(hdr);
+		enum zio_compress compress = arc_hdr_get_compress(hdr);
 		arc_buf_contents_t type = arc_buf_type(hdr);
 		VERIFY3U(hdr->b_type, ==, type);
 
@@ -5550,6 +6543,7 @@ arc_release(arc_buf_t *buf, void *tag)
 		 * buffer, then we must stop sharing that block.
 		 */
 		if (arc_buf_is_shared(buf)) {
+			ASSERT3P(hdr->b_l1hdr.b_buf, !=, buf);
 			VERIFY(!arc_buf_is_shared(lastbuf));
 
 			/*
@@ -5567,7 +6561,7 @@ arc_release(arc_buf_t *buf, void *tag)
 			if (arc_can_share(hdr, lastbuf)) {
 				arc_share_buf(hdr, lastbuf);
 			} else {
-				arc_hdr_alloc_pabd(hdr);
+				arc_hdr_alloc_pabd(hdr, B_FALSE);
 				abd_copy_from_buf(hdr->b_l1hdr.b_pabd,
 				    buf->b_data, psize);
 			}
@@ -5582,10 +6576,10 @@ arc_release(arc_buf_t *buf, void *tag)
 			 * if we have a compressed, shared buffer.
 			 */
 			ASSERT(arc_buf_is_shared(lastbuf) ||
-			    HDR_GET_COMPRESS(hdr) != ZIO_COMPRESS_OFF);
+			    arc_hdr_get_compress(hdr) != ZIO_COMPRESS_OFF);
 			ASSERT(!ARC_BUF_SHARED(buf));
 		}
-		ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL);
+		ASSERT(hdr->b_l1hdr.b_pabd != NULL || HDR_HAS_RABD(hdr));
 		ASSERT3P(state, !=, arc_l2c_only);
 
 		(void) zfs_refcount_remove_many(&state->arcs_size,
@@ -5599,16 +6593,24 @@ arc_release(arc_buf_t *buf, void *tag)
 		}
 
 		hdr->b_l1hdr.b_bufcnt -= 1;
+		if (ARC_BUF_ENCRYPTED(buf))
+			hdr->b_crypt_hdr.b_ebufcnt -= 1;
+
 		arc_cksum_verify(buf);
 		arc_buf_unwatch(buf);
 
+		/* if this is the last uncompressed buf free the checksum */
+		if (!arc_hdr_has_uncompressed_buf(hdr))
+			arc_cksum_free(hdr);
+
 		mutex_exit(hash_lock);
 
 		/*
 		 * Allocate a new hdr. The new hdr will contain a b_pabd
 		 * buffer which will be freed in arc_write().
 		 */
-		nhdr = arc_hdr_alloc(spa, psize, lsize, compress, type);
+		nhdr = arc_hdr_alloc(spa, psize, lsize, protected,
+		    compress, type, HDR_HAS_RABD(hdr));
 		ASSERT3P(nhdr->b_l1hdr.b_buf, ==, NULL);
 		ASSERT0(nhdr->b_l1hdr.b_bufcnt);
 		ASSERT0(zfs_refcount_count(&nhdr->b_l1hdr.b_refcnt));
@@ -5617,6 +6619,8 @@ arc_release(arc_buf_t *buf, void *tag)
 
 		nhdr->b_l1hdr.b_buf = buf;
 		nhdr->b_l1hdr.b_bufcnt = 1;
+		if (ARC_BUF_ENCRYPTED(buf))
+			nhdr->b_crypt_hdr.b_ebufcnt = 1;
 		(void) zfs_refcount_add(&nhdr->b_l1hdr.b_refcnt, tag);
 		buf->b_hdr = nhdr;
 
@@ -5631,8 +6635,8 @@ arc_release(arc_buf_t *buf, void *tag)
 		ASSERT(!HDR_IO_IN_PROGRESS(hdr));
 		arc_change_state(arc_anon, hdr, hash_lock);
 		hdr->b_l1hdr.b_arc_access = 0;
-		mutex_exit(hash_lock);
 
+		mutex_exit(hash_lock);
 		buf_discard_identity(hdr);
 		arc_buf_thaw(buf);
 	}
@@ -5669,7 +6673,8 @@ arc_write_ready(zio_t *zio)
 	arc_write_callback_t *callback = zio->io_private;
 	arc_buf_t *buf = callback->awcb_buf;
 	arc_buf_hdr_t *hdr = buf->b_hdr;
-	uint64_t psize = BP_IS_HOLE(zio->io_bp) ? 0 : BP_GET_PSIZE(zio->io_bp);
+	blkptr_t *bp = zio->io_bp;
+	uint64_t psize = BP_IS_HOLE(bp) ? 0 : BP_GET_PSIZE(bp);
 
 	ASSERT(HDR_HAS_L1HDR(hdr));
 	ASSERT(!zfs_refcount_is_zero(&buf->b_hdr->b_l1hdr.b_refcnt));
@@ -5687,11 +6692,15 @@ arc_write_ready(zio_t *zio)
 			if (arc_buf_is_shared(buf)) {
 				arc_unshare_buf(hdr, buf);
 			} else {
-				arc_hdr_free_pabd(hdr);
+				arc_hdr_free_pabd(hdr, B_FALSE);
 			}
 		}
+
+		if (HDR_HAS_RABD(hdr))
+			arc_hdr_free_pabd(hdr, B_TRUE);
 	}
 	ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL);
+	ASSERT(!HDR_HAS_RABD(hdr));
 	ASSERT(!HDR_SHARED_DATA(hdr));
 	ASSERT(!arc_buf_is_shared(buf));
 
@@ -5700,23 +6709,69 @@ arc_write_ready(zio_t *zio)
 	if (HDR_IO_IN_PROGRESS(hdr))
 		ASSERT(zio->io_flags & ZIO_FLAG_REEXECUTED);
 
-	arc_cksum_compute(buf);
 	arc_hdr_set_flags(hdr, ARC_FLAG_IO_IN_PROGRESS);
 
+	if (BP_IS_PROTECTED(bp) != !!HDR_PROTECTED(hdr))
+		hdr = arc_hdr_realloc_crypt(hdr, BP_IS_PROTECTED(bp));
+
+	if (BP_IS_PROTECTED(bp)) {
+		/* ZIL blocks are written through zio_rewrite */
+		ASSERT3U(BP_GET_TYPE(bp), !=, DMU_OT_INTENT_LOG);
+		ASSERT(HDR_PROTECTED(hdr));
+
+		if (BP_SHOULD_BYTESWAP(bp)) {
+			if (BP_GET_LEVEL(bp) > 0) {
+				hdr->b_l1hdr.b_byteswap = DMU_BSWAP_UINT64;
+			} else {
+				hdr->b_l1hdr.b_byteswap =
+				    DMU_OT_BYTESWAP(BP_GET_TYPE(bp));
+			}
+		} else {
+			hdr->b_l1hdr.b_byteswap = DMU_BSWAP_NUMFUNCS;
+		}
+
+		hdr->b_crypt_hdr.b_ot = BP_GET_TYPE(bp);
+		hdr->b_crypt_hdr.b_dsobj = zio->io_bookmark.zb_objset;
+		zio_crypt_decode_params_bp(bp, hdr->b_crypt_hdr.b_salt,
+		    hdr->b_crypt_hdr.b_iv);
+		zio_crypt_decode_mac_bp(bp, hdr->b_crypt_hdr.b_mac);
+	}
+
+	/*
+	 * If this block was written for raw encryption but the zio layer
+	 * ended up only authenticating it, adjust the buffer flags now.
+	 */
+	if (BP_IS_AUTHENTICATED(bp) && ARC_BUF_ENCRYPTED(buf)) {
+		arc_hdr_set_flags(hdr, ARC_FLAG_NOAUTH);
+		buf->b_flags &= ~ARC_BUF_FLAG_ENCRYPTED;
+		if (BP_GET_COMPRESS(bp) == ZIO_COMPRESS_OFF)
+			buf->b_flags &= ~ARC_BUF_FLAG_COMPRESSED;
+	} else if (BP_IS_HOLE(bp) && ARC_BUF_ENCRYPTED(buf)) {
+		buf->b_flags &= ~ARC_BUF_FLAG_ENCRYPTED;
+		buf->b_flags &= ~ARC_BUF_FLAG_COMPRESSED;
+	}
+
+	/* this must be done after the buffer flags are adjusted */
+	arc_cksum_compute(buf);
+
 	enum zio_compress compress;
-	if (BP_IS_HOLE(zio->io_bp) || BP_IS_EMBEDDED(zio->io_bp)) {
+	if (BP_IS_HOLE(bp) || BP_IS_EMBEDDED(bp)) {
 		compress = ZIO_COMPRESS_OFF;
 	} else {
-		ASSERT3U(HDR_GET_LSIZE(hdr), ==, BP_GET_LSIZE(zio->io_bp));
-		compress = BP_GET_COMPRESS(zio->io_bp);
+		ASSERT3U(HDR_GET_LSIZE(hdr), ==, BP_GET_LSIZE(bp));
+		compress = BP_GET_COMPRESS(bp);
 	}
 	HDR_SET_PSIZE(hdr, psize);
 	arc_hdr_set_compress(hdr, compress);
 
+	if (zio->io_error != 0 || psize == 0)
+		goto out;
 
 	/*
-	 * Fill the hdr with data. If the hdr is compressed, the data we want
-	 * is available from the zio, otherwise we can take it from the buf.
+	 * Fill the hdr with data. If the buffer is encrypted we have no choice
+	 * but to copy the data into b_rabd. If the hdr is compressed, the data
+	 * we want is available from the zio, otherwise we can take it from
+	 * the buf.
 	 *
 	 * We might be able to share the buf's data with the hdr here. However,
 	 * doing so would cause the ARC to be full of linear ABDs if we write a
@@ -5726,23 +6781,29 @@ arc_write_ready(zio_t *zio)
 	 * written. Therefore, if they're allowed then we allocate one and copy
 	 * the data into it; otherwise, we share the data directly if we can.
 	 */
-	if (zfs_abd_scatter_enabled || !arc_can_share(hdr, buf)) {
-		arc_hdr_alloc_pabd(hdr);
-
+	if (ARC_BUF_ENCRYPTED(buf)) {
+		ASSERT3U(psize, >, 0);
+		ASSERT(ARC_BUF_COMPRESSED(buf));
+		arc_hdr_alloc_pabd(hdr, B_TRUE);
+		abd_copy(hdr->b_crypt_hdr.b_rabd, zio->io_abd, psize);
+	} else if (zfs_abd_scatter_enabled || !arc_can_share(hdr, buf)) {
 		/*
 		 * Ideally, we would always copy the io_abd into b_pabd, but the
 		 * user may have disabled compressed ARC, thus we must check the
 		 * hdr's compression setting rather than the io_bp's.
 		 */
-		if (HDR_GET_COMPRESS(hdr) != ZIO_COMPRESS_OFF) {
-			ASSERT3U(BP_GET_COMPRESS(zio->io_bp), !=,
-			    ZIO_COMPRESS_OFF);
+		if (BP_IS_ENCRYPTED(bp)) {
 			ASSERT3U(psize, >, 0);
-
+			arc_hdr_alloc_pabd(hdr, B_TRUE);
+			abd_copy(hdr->b_crypt_hdr.b_rabd, zio->io_abd, psize);
+		} else if (arc_hdr_get_compress(hdr) != ZIO_COMPRESS_OFF &&
+		    !ARC_BUF_COMPRESSED(buf)) {
+			ASSERT3U(psize, >, 0);
+			arc_hdr_alloc_pabd(hdr, B_FALSE);
 			abd_copy(hdr->b_l1hdr.b_pabd, zio->io_abd, psize);
 		} else {
 			ASSERT3U(zio->io_orig_size, ==, arc_hdr_size(hdr));
-
+			arc_hdr_alloc_pabd(hdr, B_FALSE);
 			abd_copy_from_buf(hdr->b_l1hdr.b_pabd, buf->b_data,
 			    arc_buf_size(buf));
 		}
@@ -5750,11 +6811,11 @@ arc_write_ready(zio_t *zio)
 		ASSERT3P(buf->b_data, ==, abd_to_buf(zio->io_orig_abd));
 		ASSERT3U(zio->io_orig_size, ==, arc_buf_size(buf));
 		ASSERT3U(hdr->b_l1hdr.b_bufcnt, ==, 1);
-
 		arc_share_buf(hdr, buf);
 	}
 
-	arc_hdr_verify(hdr, zio->io_bp);
+out:
+	arc_hdr_verify(hdr, bp);
 }
 
 static void
@@ -5882,17 +6943,33 @@ arc_write(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp, arc_buf_t *buf,
 	ASSERT3U(hdr->b_l1hdr.b_bufcnt, >, 0);
 	if (l2arc)
 		arc_hdr_set_flags(hdr, ARC_FLAG_L2CACHE);
-	if (ARC_BUF_COMPRESSED(buf)) {
-		/*
-		 * We're writing a pre-compressed buffer.  Make the
-		 * compression algorithm requested by the zio_prop_t match
-		 * the pre-compressed buffer's compression algorithm.
-		 */
-		localprop.zp_compress = HDR_GET_COMPRESS(hdr);
 
-		ASSERT3U(HDR_GET_LSIZE(hdr), !=, arc_buf_size(buf));
+	if (ARC_BUF_ENCRYPTED(buf)) {
+		ASSERT(ARC_BUF_COMPRESSED(buf));
+		localprop.zp_encrypt = B_TRUE;
+		localprop.zp_compress = HDR_GET_COMPRESS(hdr);
+		/* CONSTCOND */
+		localprop.zp_byteorder =
+		    (hdr->b_l1hdr.b_byteswap == DMU_BSWAP_NUMFUNCS) ?
+		    ZFS_HOST_BYTEORDER : !ZFS_HOST_BYTEORDER;
+		bcopy(hdr->b_crypt_hdr.b_salt, localprop.zp_salt,
+		    ZIO_DATA_SALT_LEN);
+		bcopy(hdr->b_crypt_hdr.b_iv, localprop.zp_iv,
+		    ZIO_DATA_IV_LEN);
+		bcopy(hdr->b_crypt_hdr.b_mac, localprop.zp_mac,
+		    ZIO_DATA_MAC_LEN);
+		if (DMU_OT_IS_ENCRYPTED(localprop.zp_type)) {
+			localprop.zp_nopwrite = B_FALSE;
+			localprop.zp_copies =
+			    MIN(localprop.zp_copies, SPA_DVAS_PER_BP - 1);
+		}
 		zio_flags |= ZIO_FLAG_RAW;
+	} else if (ARC_BUF_COMPRESSED(buf)) {
+		ASSERT3U(HDR_GET_LSIZE(hdr), !=, arc_buf_size(buf));
+		localprop.zp_compress = HDR_GET_COMPRESS(hdr);
+		zio_flags |= ZIO_FLAG_RAW_COMPRESS;
 	}
+
 	callback = kmem_zalloc(sizeof (arc_write_callback_t), KM_SLEEP);
 	callback->awcb_ready = ready;
 	callback->awcb_children_ready = children_ready;
@@ -5915,11 +6992,17 @@ arc_write(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp, arc_buf_t *buf,
 		if (arc_buf_is_shared(buf)) {
 			arc_unshare_buf(hdr, buf);
 		} else {
-			arc_hdr_free_pabd(hdr);
+			arc_hdr_free_pabd(hdr, B_FALSE);
 		}
 		VERIFY3P(buf->b_data, !=, NULL);
-		arc_hdr_set_compress(hdr, ZIO_COMPRESS_OFF);
 	}
+
+	if (HDR_HAS_RABD(hdr))
+		arc_hdr_free_pabd(hdr, B_TRUE);
+
+	if (!(zio_flags & ZIO_FLAG_RAW))
+		arc_hdr_set_compress(hdr, ZIO_COMPRESS_OFF);
+
 	ASSERT(!arc_buf_is_shared(buf));
 	ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL);
 
@@ -6123,8 +7206,8 @@ arc_state_multilist_index_func(multilist_t *ml, void *obj)
 
 	/*
 	 * The assumption here, is the hash value for a given
-	 * arc_buf_hdr_t will remain constant throughout it's lifetime
-	 * (i.e. it's b_spa, b_dva, and b_birth fields don't change).
+	 * arc_buf_hdr_t will remain constant throughout its lifetime
+	 * (i.e. its b_spa, b_dva, and b_birth fields don't change).
 	 * Thus, we don't need to store the header's sublist index
 	 * on insertion, as this index can be recalculated on removal.
 	 *
@@ -6248,6 +7331,8 @@ arc_state_fini(void)
 	multilist_destroy(arc_mru_ghost->arcs_list[ARC_BUFC_DATA]);
 	multilist_destroy(arc_mfu->arcs_list[ARC_BUFC_DATA]);
 	multilist_destroy(arc_mfu_ghost->arcs_list[ARC_BUFC_DATA]);
+	multilist_destroy(arc_l2c_only->arcs_list[ARC_BUFC_METADATA]);
+	multilist_destroy(arc_l2c_only->arcs_list[ARC_BUFC_DATA]);
 
 	aggsum_fini(&arc_meta_used);
 	aggsum_fini(&arc_size);
@@ -6256,6 +7341,7 @@ arc_state_fini(void)
 	aggsum_fini(&astat_hdr_size);
 	aggsum_fini(&astat_other_size);
 	aggsum_fini(&astat_l2_hdr_size);
+
 }
 
 uint64_t
@@ -6843,6 +7929,96 @@ top:
 	kmem_free(cb, sizeof (l2arc_write_callback_t));
 }
 
+static int
+l2arc_untransform(zio_t *zio, l2arc_read_callback_t *cb)
+{
+	int ret;
+	spa_t *spa = zio->io_spa;
+	arc_buf_hdr_t *hdr = cb->l2rcb_hdr;
+	blkptr_t *bp = zio->io_bp;
+	uint8_t salt[ZIO_DATA_SALT_LEN];
+	uint8_t iv[ZIO_DATA_IV_LEN];
+	uint8_t mac[ZIO_DATA_MAC_LEN];
+	boolean_t no_crypt = B_FALSE;
+
+	/*
+	 * ZIL data is never be written to the L2ARC, so we don't need
+	 * special handling for its unique MAC storage.
+	 */
+	ASSERT3U(BP_GET_TYPE(bp), !=, DMU_OT_INTENT_LOG);
+	ASSERT(MUTEX_HELD(HDR_LOCK(hdr)));
+	ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL);
+
+	/*
+	 * If the data was encrypted, decrypt it now. Note that
+	 * we must check the bp here and not the hdr, since the
+	 * hdr does not have its encryption parameters updated
+	 * until arc_read_done().
+	 */
+	if (BP_IS_ENCRYPTED(bp)) {
+		abd_t *eabd = arc_get_data_abd(hdr, arc_hdr_size(hdr), hdr);
+
+		zio_crypt_decode_params_bp(bp, salt, iv);
+		zio_crypt_decode_mac_bp(bp, mac);
+
+		ret = spa_do_crypt_abd(B_FALSE, spa, &cb->l2rcb_zb,
+		    BP_GET_TYPE(bp), BP_GET_DEDUP(bp), BP_SHOULD_BYTESWAP(bp),
+		    salt, iv, mac, HDR_GET_PSIZE(hdr), eabd,
+		    hdr->b_l1hdr.b_pabd, &no_crypt);
+		if (ret != 0) {
+			arc_free_data_abd(hdr, eabd, arc_hdr_size(hdr), hdr);
+			goto error;
+		}
+
+		/*
+		 * If we actually performed decryption, replace b_pabd
+		 * with the decrypted data. Otherwise we can just throw
+		 * our decryption buffer away.
+		 */
+		if (!no_crypt) {
+			arc_free_data_abd(hdr, hdr->b_l1hdr.b_pabd,
+			    arc_hdr_size(hdr), hdr);
+			hdr->b_l1hdr.b_pabd = eabd;
+			zio->io_abd = eabd;
+		} else {
+			arc_free_data_abd(hdr, eabd, arc_hdr_size(hdr), hdr);
+		}
+	}
+
+	/*
+	 * If the L2ARC block was compressed, but ARC compression
+	 * is disabled we decompress the data into a new buffer and
+	 * replace the existing data.
+	 */
+	if (HDR_GET_COMPRESS(hdr) != ZIO_COMPRESS_OFF &&
+	    !HDR_COMPRESSION_ENABLED(hdr)) {
+		abd_t *cabd = arc_get_data_abd(hdr, arc_hdr_size(hdr), hdr);
+		void *tmp = abd_borrow_buf(cabd, arc_hdr_size(hdr));
+
+		ret = zio_decompress_data(HDR_GET_COMPRESS(hdr),
+		    hdr->b_l1hdr.b_pabd, tmp, HDR_GET_PSIZE(hdr),
+		    HDR_GET_LSIZE(hdr));
+		if (ret != 0) {
+			abd_return_buf_copy(cabd, tmp, arc_hdr_size(hdr));
+			arc_free_data_abd(hdr, cabd, arc_hdr_size(hdr), hdr);
+			goto error;
+		}
+
+		abd_return_buf_copy(cabd, tmp, arc_hdr_size(hdr));
+		arc_free_data_abd(hdr, hdr->b_l1hdr.b_pabd,
+		    arc_hdr_size(hdr), hdr);
+		hdr->b_l1hdr.b_pabd = cabd;
+		zio->io_abd = cabd;
+		zio->io_size = HDR_GET_LSIZE(hdr);
+	}
+
+	return (0);
+
+error:
+	return (ret);
+}
+
+
 /*
  * A read to a cache device completed.  Validate buffer contents before
  * handing over to the regular ARC routines.
@@ -6850,17 +8026,19 @@ top:
 static void
 l2arc_read_done(zio_t *zio)
 {
-	l2arc_read_callback_t *cb;
+	int tfm_error = 0;
+	l2arc_read_callback_t *cb = zio->io_private;
 	arc_buf_hdr_t *hdr;
 	kmutex_t *hash_lock;
 	boolean_t valid_cksum;
+	boolean_t using_rdata = (BP_IS_ENCRYPTED(&cb->l2rcb_bp) &&
+	    (cb->l2rcb_flags & ZIO_FLAG_RAW_ENCRYPT));
 
 	ASSERT3P(zio->io_vd, !=, NULL);
 	ASSERT(zio->io_flags & ZIO_FLAG_DONT_PROPAGATE);
 
 	spa_config_exit(zio->io_spa, SCL_L2ARC, zio->io_vd);
 
-	cb = zio->io_private;
 	ASSERT3P(cb, !=, NULL);
 	hdr = cb->l2rcb_hdr;
 	ASSERT3P(hdr, !=, NULL);
@@ -6876,8 +8054,13 @@ l2arc_read_done(zio_t *zio)
 	if (cb->l2rcb_abd != NULL) {
 		ASSERT3U(arc_hdr_size(hdr), <, zio->io_size);
 		if (zio->io_error == 0) {
-			abd_copy(hdr->b_l1hdr.b_pabd, cb->l2rcb_abd,
-			    arc_hdr_size(hdr));
+			if (using_rdata) {
+				abd_copy(hdr->b_crypt_hdr.b_rabd,
+				    cb->l2rcb_abd, arc_hdr_size(hdr));
+			} else {
+				abd_copy(hdr->b_l1hdr.b_pabd,
+				    cb->l2rcb_abd, arc_hdr_size(hdr));
+			}
 		}
 
 		/*
@@ -6893,7 +8076,15 @@ l2arc_read_done(zio_t *zio)
 		 */
 		abd_free(cb->l2rcb_abd);
 		zio->io_size = zio->io_orig_size = arc_hdr_size(hdr);
-		zio->io_abd = zio->io_orig_abd = hdr->b_l1hdr.b_pabd;
+
+		if (using_rdata) {
+			ASSERT(HDR_HAS_RABD(hdr));
+			zio->io_abd = zio->io_orig_abd =
+			    hdr->b_crypt_hdr.b_rabd;
+		} else {
+			ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL);
+			zio->io_abd = zio->io_orig_abd = hdr->b_l1hdr.b_pabd;
+		}
 	}
 
 	ASSERT3P(zio->io_abd, !=, NULL);
@@ -6901,12 +8092,23 @@ l2arc_read_done(zio_t *zio)
 	/*
 	 * Check this survived the L2ARC journey.
 	 */
-	ASSERT3P(zio->io_abd, ==, hdr->b_l1hdr.b_pabd);
+	ASSERT(zio->io_abd == hdr->b_l1hdr.b_pabd ||
+	    (HDR_HAS_RABD(hdr) && zio->io_abd == hdr->b_crypt_hdr.b_rabd));
 	zio->io_bp_copy = cb->l2rcb_bp;	/* XXX fix in L2ARC 2.0	*/
 	zio->io_bp = &zio->io_bp_copy;	/* XXX fix in L2ARC 2.0	*/
 
 	valid_cksum = arc_cksum_is_equal(hdr, zio);
-	if (valid_cksum && zio->io_error == 0 && !HDR_L2_EVICTED(hdr)) {
+
+	/*
+	 * b_rabd will always match the data as it exists on disk if it is
+	 * being used. Therefore if we are reading into b_rabd we do not
+	 * attempt to untransform the data.
+	 */
+	if (valid_cksum && !using_rdata)
+		tfm_error = l2arc_untransform(zio, cb);
+
+	if (valid_cksum && tfm_error == 0 && zio->io_error == 0 &&
+	    !HDR_L2_EVICTED(hdr)) {
 		mutex_exit(hash_lock);
 		zio->io_private = hdr;
 		arc_read_done(zio);
@@ -6921,7 +8123,7 @@ l2arc_read_done(zio_t *zio)
 		} else {
 			zio->io_error = SET_ERROR(EIO);
 		}
-		if (!valid_cksum)
+		if (!valid_cksum || tfm_error != 0)
 			ARCSTAT_BUMP(arcstat_l2_cksum_bad);
 
 		/*
@@ -6931,11 +8133,13 @@ l2arc_read_done(zio_t *zio)
 		 */
 		if (zio->io_waiter == NULL) {
 			zio_t *pio = zio_unique_parent(zio);
+			void *abd = (using_rdata) ?
+			    hdr->b_crypt_hdr.b_rabd : hdr->b_l1hdr.b_pabd;
 
 			ASSERT(!pio || pio->io_child_type == ZIO_CHILD_LOGICAL);
 
 			zio_nowait(zio_read(pio, zio->io_spa, zio->io_bp,
-			    hdr->b_l1hdr.b_pabd, zio->io_size, arc_read_done,
+			    abd, zio->io_size, arc_read_done,
 			    hdr, zio->io_priority, cb->l2rcb_flags,
 			    &cb->l2rcb_zb));
 		}
@@ -7095,6 +8299,123 @@ top:
 }
 
 /*
+ * Handle any abd transforms that might be required for writing to the L2ARC.
+ * If successful, this function will always return an abd with the data
+ * transformed as it is on disk in a new abd of asize bytes.
+ */
+static int
+l2arc_apply_transforms(spa_t *spa, arc_buf_hdr_t *hdr, uint64_t asize,
+    abd_t **abd_out)
+{
+	int ret;
+	void *tmp = NULL;
+	abd_t *cabd = NULL, *eabd = NULL, *to_write = hdr->b_l1hdr.b_pabd;
+	enum zio_compress compress = HDR_GET_COMPRESS(hdr);
+	uint64_t psize = HDR_GET_PSIZE(hdr);
+	uint64_t size = arc_hdr_size(hdr);
+	boolean_t ismd = HDR_ISTYPE_METADATA(hdr);
+	boolean_t bswap = (hdr->b_l1hdr.b_byteswap != DMU_BSWAP_NUMFUNCS);
+	dsl_crypto_key_t *dck = NULL;
+	uint8_t mac[ZIO_DATA_MAC_LEN] = { 0 };
+	boolean_t no_crypt = B_FALSE;
+
+	ASSERT((HDR_GET_COMPRESS(hdr) != ZIO_COMPRESS_OFF &&
+	    !HDR_COMPRESSION_ENABLED(hdr)) ||
+	    HDR_ENCRYPTED(hdr) || HDR_SHARED_DATA(hdr) || psize != asize);
+	ASSERT3U(psize, <=, asize);
+
+	/*
+	 * If this data simply needs its own buffer, we simply allocate it
+	 * and copy the data. This may be done to eliminate a dependency on a
+	 * shared buffer or to reallocate the buffer to match asize.
+	 */
+	if (HDR_HAS_RABD(hdr) && asize != psize) {
+		ASSERT3U(asize, >=, psize);
+		to_write = abd_alloc_for_io(asize, ismd);
+		abd_copy(to_write, hdr->b_crypt_hdr.b_rabd, psize);
+		if (psize != asize)
+			abd_zero_off(to_write, psize, asize - psize);
+		goto out;
+	}
+
+	if ((compress == ZIO_COMPRESS_OFF || HDR_COMPRESSION_ENABLED(hdr)) &&
+	    !HDR_ENCRYPTED(hdr)) {
+		ASSERT3U(size, ==, psize);
+		to_write = abd_alloc_for_io(asize, ismd);
+		abd_copy(to_write, hdr->b_l1hdr.b_pabd, size);
+		if (size != asize)
+			abd_zero_off(to_write, size, asize - size);
+		goto out;
+	}
+
+	if (compress != ZIO_COMPRESS_OFF && !HDR_COMPRESSION_ENABLED(hdr)) {
+		cabd = abd_alloc_for_io(asize, ismd);
+		tmp = abd_borrow_buf(cabd, asize);
+
+		psize = zio_compress_data(compress, to_write, tmp, size);
+		ASSERT3U(psize, <=, HDR_GET_PSIZE(hdr));
+		if (psize < asize)
+			bzero((char *)tmp + psize, asize - psize);
+		psize = HDR_GET_PSIZE(hdr);
+		abd_return_buf_copy(cabd, tmp, asize);
+		to_write = cabd;
+	}
+
+	if (HDR_ENCRYPTED(hdr)) {
+		eabd = abd_alloc_for_io(asize, ismd);
+
+		/*
+		 * If the dataset was disowned before the buffer
+		 * made it to this point, the key to re-encrypt
+		 * it won't be available. In this case we simply
+		 * won't write the buffer to the L2ARC.
+		 */
+		ret = spa_keystore_lookup_key(spa, hdr->b_crypt_hdr.b_dsobj,
+		    FTAG, &dck);
+		if (ret != 0)
+			goto error;
+
+		ret = zio_do_crypt_abd(B_TRUE, &dck->dck_key,
+		    hdr->b_crypt_hdr.b_ot, bswap, hdr->b_crypt_hdr.b_salt,
+		    hdr->b_crypt_hdr.b_iv, mac, psize, to_write, eabd,
+		    &no_crypt);
+		if (ret != 0)
+			goto error;
+
+		if (no_crypt)
+			abd_copy(eabd, to_write, psize);
+
+		if (psize != asize)
+			abd_zero_off(eabd, psize, asize - psize);
+
+		/* assert that the MAC we got here matches the one we saved */
+		ASSERT0(bcmp(mac, hdr->b_crypt_hdr.b_mac, ZIO_DATA_MAC_LEN));
+		spa_keystore_dsl_key_rele(spa, dck, FTAG);
+
+		if (to_write == cabd)
+			abd_free(cabd);
+
+		to_write = eabd;
+	}
+
+out:
+	ASSERT3P(to_write, !=, hdr->b_l1hdr.b_pabd);
+	*abd_out = to_write;
+	return (0);
+
+error:
+	if (dck != NULL)
+		spa_keystore_dsl_key_rele(spa, dck, FTAG);
+	if (cabd != NULL)
+		abd_free(cabd);
+	if (eabd != NULL)
+		abd_free(eabd);
+
+	*abd_out = NULL;
+	return (ret);
+}
+
+/*
  * Find and write ARC buffers to the L2ARC device.
  *
  * An ARC_FLAG_L2_WRITING flag is set so that the L2ARC buffers are not valid
@@ -7130,6 +8451,8 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz)
 		multilist_sublist_t *mls = l2arc_sublist_lock(try);
 		uint64_t passed_sz = 0;
 
+		VERIFY3P(mls, !=, NULL);
+
 		/*
 		 * L2ARC fast warmup.
 		 *
@@ -7147,6 +8470,7 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz)
 
 		for (; hdr; hdr = hdr_prev) {
 			kmutex_t *hash_lock;
+			abd_t *to_write = NULL;
 
 			if (arc_warm == B_FALSE)
 				hdr_prev = multilist_sublist_next(mls, hdr);
@@ -7184,9 +8508,10 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz)
 			ASSERT(HDR_HAS_L1HDR(hdr));
 
 			ASSERT3U(HDR_GET_PSIZE(hdr), >, 0);
-			ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL);
 			ASSERT3U(arc_hdr_size(hdr), >, 0);
-			uint64_t psize = arc_hdr_size(hdr);
+			ASSERT(hdr->b_l1hdr.b_pabd != NULL ||
+			    HDR_HAS_RABD(hdr));
+			uint64_t psize = HDR_GET_PSIZE(hdr);
 			uint64_t asize = vdev_psize_to_asize(dev->l2ad_vdev,
 			    psize);
 
@@ -7196,6 +8521,57 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz)
 				break;
 			}
 
+			/*
+			 * We rely on the L1 portion of the header below, so
+			 * it's invalid for this header to have been evicted out
+			 * of the ghost cache, prior to being written out. The
+			 * ARC_FLAG_L2_WRITING bit ensures this won't happen.
+			 */
+			arc_hdr_set_flags(hdr, ARC_FLAG_L2_WRITING);
+			ASSERT(HDR_HAS_L1HDR(hdr));
+
+			ASSERT3U(HDR_GET_PSIZE(hdr), >, 0);
+			ASSERT(hdr->b_l1hdr.b_pabd != NULL ||
+			    HDR_HAS_RABD(hdr));
+			ASSERT3U(arc_hdr_size(hdr), >, 0);
+
+			/*
+			 * If this header has b_rabd, we can use this since it
+			 * must always match the data exactly as it exists on
+			 * disk. Otherwise, the L2ARC can normally use the
+			 * hdr's data, but if we're sharing data between the
+			 * hdr and one of its bufs, L2ARC needs its own copy of
+			 * the data so that the ZIO below can't race with the
+			 * buf consumer. To ensure that this copy will be
+			 * available for the lifetime of the ZIO and be cleaned
+			 * up afterwards, we add it to the l2arc_free_on_write
+			 * queue. If we need to apply any transforms to the
+			 * data (compression, encryption) we will also need the
+			 * extra buffer.
+			 */
+			if (HDR_HAS_RABD(hdr) && psize == asize) {
+				to_write = hdr->b_crypt_hdr.b_rabd;
+			} else if ((HDR_COMPRESSION_ENABLED(hdr) ||
+			    HDR_GET_COMPRESS(hdr) == ZIO_COMPRESS_OFF) &&
+			    !HDR_ENCRYPTED(hdr) && !HDR_SHARED_DATA(hdr) &&
+			    psize == asize) {
+				to_write = hdr->b_l1hdr.b_pabd;
+			} else {
+				int ret;
+				arc_buf_contents_t type = arc_buf_type(hdr);
+
+				ret = l2arc_apply_transforms(spa, hdr, asize,
+				    &to_write);
+				if (ret != 0) {
+					arc_hdr_clear_flags(hdr,
+					    ARC_FLAG_L2_WRITING);
+					mutex_exit(hash_lock);
+					continue;
+				}
+
+				l2arc_free_abd_on_write(to_write, asize, type);
+			}
+
 			if (pio == NULL) {
 				/*
 				 * Insert a dummy header on the buflist so
@@ -7223,37 +8599,9 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz)
 			list_insert_head(&dev->l2ad_buflist, hdr);
 			mutex_exit(&dev->l2ad_mtx);
 
-			(void) zfs_refcount_add_many(&dev->l2ad_alloc, psize,
-			    hdr);
+			(void) zfs_refcount_add_many(&dev->l2ad_alloc,
+			    arc_hdr_size(hdr), hdr);
 
-			/*
-			 * Normally the L2ARC can use the hdr's data, but if
-			 * we're sharing data between the hdr and one of its
-			 * bufs, L2ARC needs its own copy of the data so that
-			 * the ZIO below can't race with the buf consumer.
-			 * Another case where we need to create a copy of the
-			 * data is when the buffer size is not device-aligned
-			 * and we need to pad the block to make it such.
-			 * That also keeps the clock hand suitably aligned.
-			 *
-			 * To ensure that the copy will be available for the
-			 * lifetime of the ZIO and be cleaned up afterwards, we
-			 * add it to the l2arc_free_on_write queue.
-			 */
-			abd_t *to_write;
-			if (!HDR_SHARED_DATA(hdr) && psize == asize) {
-				to_write = hdr->b_l1hdr.b_pabd;
-			} else {
-				to_write = abd_alloc_for_io(asize,
-				    HDR_ISTYPE_METADATA(hdr));
-				abd_copy(to_write, hdr->b_l1hdr.b_pabd, psize);
-				if (asize != psize) {
-					abd_zero_off(to_write, psize,
-					    asize - psize);
-				}
-				l2arc_free_abd_on_write(to_write, asize,
-				    arc_buf_type(hdr));
-			}
 			wzio = zio_write_phys(pio, dev->l2ad_vdev,
 			    hdr->b_l2hdr.b_daddr, asize, to_write,
 			    ZIO_CHECKSUM_OFF, NULL, hdr,
diff --git a/usr/src/uts/common/fs/zfs/bpobj.c b/usr/src/uts/common/fs/zfs/bpobj.c
index bbdd765214..ec0d115cfc 100644
--- a/usr/src/uts/common/fs/zfs/bpobj.c
+++ b/usr/src/uts/common/fs/zfs/bpobj.c
@@ -266,7 +266,7 @@ bpobj_iterate_impl(bpobj_t *bpo, bpobj_itor_t func, void *arg, dmu_tx_t *tx,
 	}
 	if (free) {
 		VERIFY3U(0, ==, dmu_free_range(bpo->bpo_os, bpo->bpo_object,
-		    (i + 1) * sizeof (blkptr_t), -1ULL, tx));
+		    (i + 1) * sizeof (blkptr_t), DMU_OBJECT_END, tx));
 	}
 	if (err || !bpo->bpo_havesubobj || bpo->bpo_phys->bpo_subobjs == 0)
 		goto out;
@@ -344,7 +344,7 @@ bpobj_iterate_impl(bpobj_t *bpo, bpobj_itor_t func, void *arg, dmu_tx_t *tx,
 	if (free) {
 		VERIFY3U(0, ==, dmu_free_range(bpo->bpo_os,
 		    bpo->bpo_phys->bpo_subobjs,
-		    (i + 1) * sizeof (uint64_t), -1ULL, tx));
+		    (i + 1) * sizeof (uint64_t), DMU_OBJECT_END, tx));
 	}
 
 out:
diff --git a/usr/src/uts/common/fs/zfs/bptree.c b/usr/src/uts/common/fs/zfs/bptree.c
index c74d07236c..1a432507f7 100644
--- a/usr/src/uts/common/fs/zfs/bptree.c
+++ b/usr/src/uts/common/fs/zfs/bptree.c
@@ -211,7 +211,8 @@ bptree_iterate(objset_t *os, uint64_t obj, boolean_t free, bptree_itor_t func,
 	err = 0;
 	for (i = ba.ba_phys->bt_begin; i < ba.ba_phys->bt_end; i++) {
 		bptree_entry_phys_t bte;
-		int flags = TRAVERSE_PREFETCH_METADATA | TRAVERSE_POST;
+		int flags = TRAVERSE_PREFETCH_METADATA | TRAVERSE_POST
+		    | TRAVERSE_NO_DECRYPT;
 
 		err = dmu_read(os, obj, i * sizeof (bte), sizeof (bte),
 		    &bte, DMU_READ_NO_PREFETCH);
diff --git a/usr/src/uts/common/fs/zfs/dbuf.c b/usr/src/uts/common/fs/zfs/dbuf.c
index b9d6ca26fe..9c7205bd0d 100644
--- a/usr/src/uts/common/fs/zfs/dbuf.c
+++ b/usr/src/uts/common/fs/zfs/dbuf.c
@@ -157,7 +157,7 @@ uint64_t dbuf_metadata_cache_overflow;
  * cache size). Once the eviction thread is woken up and eviction is required,
  * it will continue evicting buffers until it's able to reduce the cache size
  * to the low water mark. If the cache size continues to grow and hits the high
- * water mark, then callers adding elments to the cache will begin to evict
+ * water mark, then callers adding elements to the cache will begin to evict
  * directly from the cache until the cache is no longer above the high water
  * mark.
  */
@@ -310,7 +310,7 @@ dbuf_hash_remove(dmu_buf_impl_t *db)
 	dmu_buf_impl_t *dbf, **dbp;
 
 	/*
-	 * We musn't hold db_mtx to maintain lock ordering:
+	 * We mustn't hold db_mtx to maintain lock ordering:
 	 * DBUF_HASH_MUTEX > db_mtx.
 	 */
 	ASSERT(zfs_refcount_is_zero(&db->db_holds));
@@ -413,7 +413,7 @@ dbuf_evict_user(dmu_buf_impl_t *db)
 boolean_t
 dbuf_is_metadata(dmu_buf_impl_t *db)
 {
-	if (db->db_level > 0) {
+	if (db->db_level > 0 || db->db_blkid == DMU_SPILL_BLKID) {
 		return (B_TRUE);
 	} else {
 		boolean_t is_metadata;
@@ -941,6 +941,7 @@ dbuf_whichblock(dnode_t *dn, int64_t level, uint64_t offset)
 	}
 }
 
+/* ARGSUSED */
 static void
 dbuf_read_done(zio_t *zio, const zbookmark_phys_t *zb, const blkptr_t *bp,
     arc_buf_t *buf, void *vdb)
@@ -984,12 +985,71 @@ dbuf_read_done(zio_t *zio, const zbookmark_phys_t *zb, const blkptr_t *bp,
 	dbuf_rele_and_unlock(db, NULL, B_FALSE);
 }
 
-static void
+
+/*
+ * This function ensures that, when doing a decrypting read of a block,
+ * we make sure we have decrypted the dnode associated with it. We must do
+ * this so that we ensure we are fully authenticating the checksum-of-MACs
+ * tree from the root of the objset down to this block. Indirect blocks are
+ * always verified against their secure checksum-of-MACs assuming that the
+ * dnode containing them is correct. Now that we are doing a decrypting read,
+ * we can be sure that the key is loaded and verify that assumption. This is
+ * especially important considering that we always read encrypted dnode
+ * blocks as raw data (without verifying their MACs) to start, and
+ * decrypt / authenticate them when we need to read an encrypted bonus buffer.
+ */
+static int
+dbuf_read_verify_dnode_crypt(dmu_buf_impl_t *db, uint32_t flags)
+{
+	int err = 0;
+	objset_t *os = db->db_objset;
+	arc_buf_t *dnode_abuf;
+	dnode_t *dn;
+	zbookmark_phys_t zb;
+
+	ASSERT(MUTEX_HELD(&db->db_mtx));
+
+	if (!os->os_encrypted || os->os_raw_receive ||
+	    (flags & DB_RF_NO_DECRYPT) != 0)
+		return (0);
+
+	DB_DNODE_ENTER(db);
+	dn = DB_DNODE(db);
+	dnode_abuf = (dn->dn_dbuf != NULL) ? dn->dn_dbuf->db_buf : NULL;
+
+	if (dnode_abuf == NULL || !arc_is_encrypted(dnode_abuf)) {
+		DB_DNODE_EXIT(db);
+		return (0);
+	}
+
+	SET_BOOKMARK(&zb, dmu_objset_id(os),
+	    DMU_META_DNODE_OBJECT, 0, dn->dn_dbuf->db_blkid);
+	err = arc_untransform(dnode_abuf, os->os_spa, &zb, B_TRUE);
+
+	/*
+	 * An error code of EACCES tells us that the key is still not
+	 * available. This is ok if we are only reading authenticated
+	 * (and therefore non-encrypted) blocks.
+	 */
+	if (err == EACCES && ((db->db_blkid != DMU_BONUS_BLKID &&
+	    !DMU_OT_IS_ENCRYPTED(dn->dn_type)) ||
+	    (db->db_blkid == DMU_BONUS_BLKID &&
+	    !DMU_OT_IS_ENCRYPTED(dn->dn_bonustype))))
+		err = 0;
+
+
+	DB_DNODE_EXIT(db);
+
+	return (err);
+}
+
+static int
 dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags)
 {
 	dnode_t *dn;
 	zbookmark_phys_t zb;
 	arc_flags_t aflags = ARC_FLAG_NOWAIT;
+	int err, zio_flags = 0;
 
 	DB_DNODE_ENTER(db);
 	dn = DB_DNODE(db);
@@ -1008,6 +1068,14 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags)
 		int bonuslen = MIN(dn->dn_bonuslen, dn->dn_phys->dn_bonuslen);
 		int max_bonuslen = DN_SLOTS_TO_BONUSLEN(dn->dn_num_slots);
 
+		/* if the underlying dnode block is encrypted, decrypt it */
+		err = dbuf_read_verify_dnode_crypt(db, flags);
+		if (err != 0) {
+			DB_DNODE_EXIT(db);
+			mutex_exit(&db->db_mtx);
+			return (err);
+		}
+
 		ASSERT3U(bonuslen, <=, db->db.db_size);
 		db->db.db_data = zio_buf_alloc(max_bonuslen);
 		arc_space_consume(max_bonuslen, ARC_SPACE_BONUS);
@@ -1018,7 +1086,7 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags)
 		DB_DNODE_EXIT(db);
 		db->db_state = DB_CACHED;
 		mutex_exit(&db->db_mtx);
-		return;
+		return (0);
 	}
 
 	/*
@@ -1058,7 +1126,30 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags)
 		DB_DNODE_EXIT(db);
 		db->db_state = DB_CACHED;
 		mutex_exit(&db->db_mtx);
-		return;
+		return (0);
+	}
+
+	SET_BOOKMARK(&zb, dmu_objset_id(db->db_objset),
+	    db->db.db_object, db->db_level, db->db_blkid);
+
+	/*
+	 * All bps of an encrypted os should have the encryption bit set.
+	 * If this is not true it indicates tampering and we report an error.
+	 */
+	if (db->db_objset->os_encrypted && !BP_USES_CRYPT(db->db_blkptr)) {
+		spa_log_error(db->db_objset->os_spa, &zb);
+		zfs_panic_recover("unencrypted block in encrypted "
+		    "object set %llu", dmu_objset_id(db->db_objset));
+		DB_DNODE_EXIT(db);
+		mutex_exit(&db->db_mtx);
+		return (SET_ERROR(EIO));
+	}
+
+	err = dbuf_read_verify_dnode_crypt(db, flags);
+	if (err != 0) {
+		DB_DNODE_EXIT(db);
+		mutex_exit(&db->db_mtx);
+		return (err);
 	}
 
 	DB_DNODE_EXIT(db);
@@ -1069,16 +1160,19 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags)
 	if (DBUF_IS_L2CACHEABLE(db))
 		aflags |= ARC_FLAG_L2CACHE;
 
-	SET_BOOKMARK(&zb, db->db_objset->os_dsl_dataset ?
-	    db->db_objset->os_dsl_dataset->ds_object : DMU_META_OBJSET,
-	    db->db.db_object, db->db_level, db->db_blkid);
-
 	dbuf_add_ref(db, NULL);
 
-	(void) arc_read(zio, db->db_objset->os_spa, db->db_blkptr,
-	    dbuf_read_done, db, ZIO_PRIORITY_SYNC_READ,
-	    (flags & DB_RF_CANFAIL) ? ZIO_FLAG_CANFAIL : ZIO_FLAG_MUSTSUCCEED,
+	zio_flags = (flags & DB_RF_CANFAIL) ?
+	    ZIO_FLAG_CANFAIL : ZIO_FLAG_MUSTSUCCEED;
+
+	if ((flags & DB_RF_NO_DECRYPT) && BP_IS_PROTECTED(db->db_blkptr))
+		zio_flags |= ZIO_FLAG_RAW;
+
+	err = arc_read(zio, db->db_objset->os_spa, db->db_blkptr,
+	    dbuf_read_done, db, ZIO_PRIORITY_SYNC_READ, zio_flags,
 	    &aflags, &zb);
+
+	return (err);
 }
 
 /*
@@ -1116,7 +1210,7 @@ dbuf_fix_old_data(dmu_buf_impl_t *db, uint64_t txg)
 	 * or (if there a no active holders)
 	 *	just null out the current db_data pointer.
 	 */
-	ASSERT(dr->dr_txg >= txg - 2);
+	ASSERT3U(dr->dr_txg, >=, txg - 2);
 	if (db->db_blkid == DMU_BONUS_BLKID) {
 		/* Note that the data bufs here are zio_bufs */
 		dnode_t *dn = DB_DNODE(db);
@@ -1125,18 +1219,31 @@ dbuf_fix_old_data(dmu_buf_impl_t *db, uint64_t txg)
 		arc_space_consume(bonuslen, ARC_SPACE_BONUS);
 		bcopy(db->db.db_data, dr->dt.dl.dr_data, bonuslen);
 	} else if (zfs_refcount_count(&db->db_holds) > db->db_dirtycnt) {
+		dnode_t *dn = DB_DNODE(db);
 		int size = arc_buf_size(db->db_buf);
 		arc_buf_contents_t type = DBUF_GET_BUFC_TYPE(db);
 		spa_t *spa = db->db_objset->os_spa;
 		enum zio_compress compress_type =
 		    arc_get_compression(db->db_buf);
 
-		if (compress_type == ZIO_COMPRESS_OFF) {
-			dr->dt.dl.dr_data = arc_alloc_buf(spa, db, type, size);
-		} else {
+		if (arc_is_encrypted(db->db_buf)) {
+			boolean_t byteorder;
+			uint8_t salt[ZIO_DATA_SALT_LEN];
+			uint8_t iv[ZIO_DATA_IV_LEN];
+			uint8_t mac[ZIO_DATA_MAC_LEN];
+
+			arc_get_raw_params(db->db_buf, &byteorder, salt,
+			    iv, mac);
+			dr->dt.dl.dr_data = arc_alloc_raw_buf(spa, db,
+			    dmu_objset_id(dn->dn_objset), byteorder, salt, iv,
+			    mac, dn->dn_type, size, arc_buf_lsize(db->db_buf),
+			    compress_type);
+		} else if (compress_type != ZIO_COMPRESS_OFF) {
 			ASSERT3U(type, ==, ARC_BUFC_DATA);
 			dr->dt.dl.dr_data = arc_alloc_compressed_buf(spa, db,
 			    size, arc_buf_lsize(db->db_buf), compress_type);
+		} else {
+			dr->dt.dl.dr_data = arc_alloc_buf(spa, db, type, size);
 		}
 		bcopy(db->db.db_data, dr->dt.dl.dr_data->b_data, size);
 	} else {
@@ -1172,20 +1279,36 @@ dbuf_read(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags)
 
 	mutex_enter(&db->db_mtx);
 	if (db->db_state == DB_CACHED) {
+		spa_t *spa = dn->dn_objset->os_spa;
+
 		/*
-		 * If the arc buf is compressed, we need to decompress it to
-		 * read the data. This could happen during the "zfs receive" of
-		 * a stream which is compressed and deduplicated.
+		 * Ensure that this block's dnode has been decrypted if
+		 * the caller has requested decrypted data.
 		 */
-		if (db->db_buf != NULL &&
-		    arc_get_compression(db->db_buf) != ZIO_COMPRESS_OFF) {
-			dbuf_fix_old_data(db,
-			    spa_syncing_txg(dmu_objset_spa(db->db_objset)));
-			err = arc_decompress(db->db_buf);
+		err = dbuf_read_verify_dnode_crypt(db, flags);
+
+		/*
+		 * If the arc buf is compressed or encrypted and the caller
+		 * requested uncompressed data, we need to untransform it
+		 * before returning. We also call arc_untransform() on any
+		 * unauthenticated blocks, which will verify their MAC if
+		 * the key is now available.
+		 */
+		if (err == 0 && db->db_buf != NULL &&
+		    (flags & DB_RF_NO_DECRYPT) == 0 &&
+		    (arc_is_encrypted(db->db_buf) ||
+		    arc_is_unauthenticated(db->db_buf) ||
+		    arc_get_compression(db->db_buf) != ZIO_COMPRESS_OFF)) {
+			zbookmark_phys_t zb;
+
+			SET_BOOKMARK(&zb, dmu_objset_id(db->db_objset),
+			    db->db.db_object, db->db_level, db->db_blkid);
+			dbuf_fix_old_data(db, spa_syncing_txg(spa));
+			err = arc_untransform(db->db_buf, spa, &zb, B_FALSE);
 			dbuf_set_data(db, db->db_buf);
 		}
 		mutex_exit(&db->db_mtx);
-		if (prefetch)
+		if (err == 0 && prefetch)
 			dmu_zfetch(&dn->dn_zfetch, db->db_blkid, 1, B_TRUE);
 		if ((flags & DB_RF_HAVESTRUCT) == 0)
 			rw_exit(&dn->dn_struct_rwlock);
@@ -1199,18 +1322,18 @@ dbuf_read(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags)
 			zio = zio_root(spa, NULL, NULL, ZIO_FLAG_CANFAIL);
 			need_wait = B_TRUE;
 		}
-		dbuf_read_impl(db, zio, flags);
+		err = dbuf_read_impl(db, zio, flags);
 
 		/* dbuf_read_impl has dropped db_mtx for us */
 
-		if (prefetch)
+		if (!err && prefetch)
 			dmu_zfetch(&dn->dn_zfetch, db->db_blkid, 1, B_TRUE);
 
 		if ((flags & DB_RF_HAVESTRUCT) == 0)
 			rw_exit(&dn->dn_struct_rwlock);
 		DB_DNODE_EXIT(db);
 
-		if (need_wait)
+		if (!err && need_wait)
 			err = zio_wait(zio);
 	} else {
 		/*
@@ -1300,6 +1423,7 @@ dbuf_unoverride(dbuf_dirty_record_t *dr)
 
 	dr->dt.dl.dr_override_state = DR_NOT_OVERRIDDEN;
 	dr->dt.dl.dr_nopwrite = B_FALSE;
+	dr->dt.dl.dr_has_raw_params = B_FALSE;
 
 	/*
 	 * Release the already-written buffer, so we leave it in
@@ -1744,7 +1868,10 @@ dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
 	ddt_prefetch(os->os_spa, db->db_blkptr);
 
 	if (db->db_level == 0) {
-		dnode_new_blkid(dn, db->db_blkid, tx, drop_struct_lock);
+		ASSERT(!db->db_objset->os_raw_receive ||
+		    dn->dn_maxblkid >= db->db_blkid);
+		dnode_new_blkid(dn, db->db_blkid, tx,
+		    drop_struct_lock, B_FALSE);
 		ASSERT(dn->dn_maxblkid >= db->db_blkid);
 	}
 
@@ -1891,11 +2018,10 @@ dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
 	return (B_FALSE);
 }
 
-void
-dmu_buf_will_dirty(dmu_buf_t *db_fake, dmu_tx_t *tx)
+static void
+dmu_buf_will_dirty_impl(dmu_buf_t *db_fake, int flags, dmu_tx_t *tx)
 {
 	dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
-	int rf = DB_RF_MUST_SUCCEED | DB_RF_NOPREFETCH;
 
 	ASSERT(tx->tx_txg != 0);
 	ASSERT(!zfs_refcount_is_zero(&db->db_holds));
@@ -1926,13 +2052,20 @@ dmu_buf_will_dirty(dmu_buf_t *db_fake, dmu_tx_t *tx)
 
 	DB_DNODE_ENTER(db);
 	if (RW_WRITE_HELD(&DB_DNODE(db)->dn_struct_rwlock))
-		rf |= DB_RF_HAVESTRUCT;
+		flags |= DB_RF_HAVESTRUCT;
 	DB_DNODE_EXIT(db);
-	(void) dbuf_read(db, NULL, rf);
+	(void) dbuf_read(db, NULL, flags);
 	(void) dbuf_dirty(db, tx);
 }
 
 void
+dmu_buf_will_dirty(dmu_buf_t *db_fake, dmu_tx_t *tx)
+{
+	dmu_buf_will_dirty_impl(db_fake,
+	    DB_RF_MUST_SUCCEED | DB_RF_NOPREFETCH, tx);
+}
+
+void
 dmu_buf_will_not_fill(dmu_buf_t *db_fake, dmu_tx_t *tx)
 {
 	dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
@@ -1959,6 +2092,44 @@ dmu_buf_will_fill(dmu_buf_t *db_fake, dmu_tx_t *tx)
 	(void) dbuf_dirty(db, tx);
 }
 
+/*
+ * This function is effectively the same as dmu_buf_will_dirty(), but
+ * indicates the caller expects raw encrypted data in the db, and provides
+ * the crypt params (byteorder, salt, iv, mac) which should be stored in the
+ * blkptr_t when this dbuf is written.  This is only used for blocks of
+ * dnodes during a raw receive.
+ */
+void
+dmu_buf_set_crypt_params(dmu_buf_t *db_fake, boolean_t byteorder,
+    const uint8_t *salt, const uint8_t *iv, const uint8_t *mac, dmu_tx_t *tx)
+{
+	dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
+	dbuf_dirty_record_t *dr;
+
+	/*
+	 * dr_has_raw_params is only processed for blocks of dnodes
+	 * (see dbuf_sync_dnode_leaf_crypt()).
+	 */
+	ASSERT3U(db->db.db_object, ==, DMU_META_DNODE_OBJECT);
+	ASSERT3U(db->db_level, ==, 0);
+
+	dmu_buf_will_dirty_impl(db_fake,
+	    DB_RF_MUST_SUCCEED | DB_RF_NOPREFETCH | DB_RF_NO_DECRYPT, tx);
+
+	dr = db->db_last_dirty;
+	while (dr != NULL && dr->dr_txg > tx->tx_txg)
+		dr = dr->dr_next;
+
+	ASSERT3P(dr, !=, NULL);
+	ASSERT3U(dr->dr_txg, ==, tx->tx_txg);
+
+	dr->dt.dl.dr_has_raw_params = B_TRUE;
+	dr->dt.dl.dr_byteorder = byteorder;
+	bcopy(salt, dr->dt.dl.dr_salt, ZIO_DATA_SALT_LEN);
+	bcopy(iv, dr->dt.dl.dr_iv, ZIO_DATA_IV_LEN);
+	bcopy(mac, dr->dt.dl.dr_mac, ZIO_DATA_MAC_LEN);
+}
+
 #pragma weak dmu_buf_fill_done = dbuf_fill_done
 /* ARGSUSED */
 void
@@ -2045,6 +2216,13 @@ dbuf_assign_arcbuf(dmu_buf_impl_t *db, arc_buf_t *buf, dmu_tx_t *tx)
 
 	if (db->db_state == DB_CACHED &&
 	    zfs_refcount_count(&db->db_holds) - 1 > db->db_dirtycnt) {
+		/*
+		 * In practice, we will never have a case where we have an
+		 * encrypted arc buffer while additional holds exist on the
+		 * dbuf. We don't handle this here so we simply assert that
+		 * fact instead.
+		 */
+		ASSERT(!arc_is_encrypted(buf));
 		mutex_exit(&db->db_mtx);
 		(void) dbuf_dirty(db, tx);
 		bcopy(buf->b_data, db->db.db_data, db->db.db_size);
@@ -2060,6 +2238,7 @@ dbuf_assign_arcbuf(dmu_buf_impl_t *db, arc_buf_t *buf, dmu_tx_t *tx)
 		ASSERT(db->db_buf != NULL);
 		if (dr != NULL && dr->dr_txg == tx->tx_txg) {
 			ASSERT(dr->dt.dl.dr_data == db->db_buf);
+
 			if (!arc_released(db->db_buf)) {
 				ASSERT(dr->dt.dl.dr_override_state ==
 				    DR_OVERRIDDEN);
@@ -2383,15 +2562,20 @@ dbuf_issue_final_prefetch(dbuf_prefetch_arg_t *dpa, blkptr_t *bp)
 	if (BP_IS_HOLE(bp) || BP_IS_EMBEDDED(bp))
 		return;
 
+	int zio_flags = ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE;
 	arc_flags_t aflags =
 	    dpa->dpa_aflags | ARC_FLAG_NOWAIT | ARC_FLAG_PREFETCH;
 
+	/* dnodes are always read as raw and then converted later */
+	if (BP_GET_TYPE(bp) == DMU_OT_DNODE && BP_IS_PROTECTED(bp) &&
+	    dpa->dpa_curlevel == 0)
+		zio_flags |= ZIO_FLAG_RAW;
+
 	ASSERT3U(dpa->dpa_curlevel, ==, BP_GET_LEVEL(bp));
 	ASSERT3U(dpa->dpa_curlevel, ==, dpa->dpa_zb.zb_level);
 	ASSERT(dpa->dpa_zio != NULL);
 	(void) arc_read(dpa->dpa_zio, dpa->dpa_spa, bp, NULL, NULL,
-	    dpa->dpa_prio, ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE,
-	    &aflags, &dpa->dpa_zb);
+	    dpa->dpa_prio, zio_flags, &aflags, &dpa->dpa_zb);
 }
 
 /*
@@ -2399,6 +2583,7 @@ dbuf_issue_final_prefetch(dbuf_prefetch_arg_t *dpa, blkptr_t *bp)
  * will either read in the next indirect block down the tree or issue the actual
  * prefetch if the next block down is our target.
  */
+/* ARGSUSED */
 static void
 dbuf_prefetch_indirect_done(zio_t *zio, const zbookmark_phys_t *zb,
     const blkptr_t *iobp, arc_buf_t *abuf, void *private)
@@ -2428,7 +2613,7 @@ dbuf_prefetch_indirect_done(zio_t *zio, const zbookmark_phys_t *zb,
 	 */
 	if (zio != NULL) {
 		ASSERT3S(BP_GET_LEVEL(zio->io_bp), ==, dpa->dpa_curlevel);
-		if (zio->io_flags & ZIO_FLAG_RAW) {
+		if (zio->io_flags & ZIO_FLAG_RAW_COMPRESS) {
 			ASSERT3U(BP_GET_PSIZE(zio->io_bp), ==, zio->io_size);
 		} else {
 			ASSERT3U(BP_GET_LSIZE(zio->io_bp), ==, zio->io_size);
@@ -2485,7 +2670,8 @@ dbuf_prefetch_indirect_done(zio_t *zio, const zbookmark_phys_t *zb,
  * Issue prefetch reads for the given block on the given level.  If the indirect
  * blocks above that block are not in memory, we will read them in
  * asynchronously.  As a result, this call never blocks waiting for a read to
- * complete.
+ * complete. Note that the prefetch might fail if the dataset is encrypted and
+ * the encryption key is unmapped before the IO completes.
  */
 void
 dbuf_prefetch(dnode_t *dn, int64_t level, uint64_t blkid, zio_priority_t prio,
@@ -2614,6 +2800,43 @@ dbuf_prefetch(dnode_t *dn, int64_t level, uint64_t blkid, zio_priority_t prio,
 }
 
 /*
+ * Helper function for __dbuf_hold_impl() to copy a buffer. Handles
+ * the case of encrypted, compressed and uncompressed buffers by
+ * allocating the new buffer, respectively, with arc_alloc_raw_buf(),
+ * arc_alloc_compressed_buf() or arc_alloc_buf().*
+ *
+ * NOTE: Declared noinline to avoid stack bloat in __dbuf_hold_impl().
+ */
+static void
+dbuf_hold_copy(dnode_t *dn,	dmu_buf_impl_t *db, dbuf_dirty_record_t *dr)
+{
+	arc_buf_t *data = dr->dt.dl.dr_data;
+	enum zio_compress compress_type = arc_get_compression(data);
+
+	if (arc_is_encrypted(data)) {
+		boolean_t byteorder;
+		uint8_t salt[ZIO_DATA_SALT_LEN];
+		uint8_t iv[ZIO_DATA_IV_LEN];
+		uint8_t mac[ZIO_DATA_MAC_LEN];
+
+		arc_get_raw_params(data, &byteorder, salt, iv, mac);
+		dbuf_set_data(db, arc_alloc_raw_buf(dn->dn_objset->os_spa, db,
+		    dmu_objset_id(dn->dn_objset), byteorder, salt, iv, mac,
+		    dn->dn_type, arc_buf_size(data), arc_buf_lsize(data),
+		    compress_type));
+	} else if (compress_type != ZIO_COMPRESS_OFF) {
+		dbuf_set_data(db, arc_alloc_compressed_buf(
+		    dn->dn_objset->os_spa, db, arc_buf_size(data),
+		    arc_buf_lsize(data), compress_type));
+	} else {
+		dbuf_set_data(db, arc_alloc_buf(dn->dn_objset->os_spa, db,
+		    DBUF_GET_BUFC_TYPE(db), db->db.db_size));
+	}
+
+	bcopy(data->b_data, db->db.db_data, arc_buf_size(data));
+}
+
+/*
  * Returns with db_holds incremented, and db_mtx not held.
  * Note: dn_struct_rwlock must be held.
  */
@@ -2677,16 +2900,8 @@ top:
 	    dn->dn_object != DMU_META_DNODE_OBJECT &&
 	    db->db_state == DB_CACHED && db->db_data_pending) {
 		dbuf_dirty_record_t *dr = db->db_data_pending;
-
-		if (dr->dt.dl.dr_data == db->db_buf) {
-			arc_buf_contents_t type = DBUF_GET_BUFC_TYPE(db);
-
-			dbuf_set_data(db,
-			    arc_alloc_buf(dn->dn_objset->os_spa, db, type,
-			    db->db.db_size));
-			bcopy(dr->dt.dl.dr_data->b_data, db->db.db_data,
-			    db->db.db_size);
-		}
+		if (dr->dt.dl.dr_data == db->db_buf)
+			dbuf_hold_copy(dn, db, dr);
 	}
 
 	if (multilist_link_active(&db->db_cache_link)) {
@@ -2960,6 +3175,20 @@ dbuf_refcount(dmu_buf_impl_t *db)
 	return (zfs_refcount_count(&db->db_holds));
 }
 
+uint64_t
+dmu_buf_user_refcount(dmu_buf_t *db_fake)
+{
+	uint64_t holds;
+	dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
+
+	mutex_enter(&db->db_mtx);
+	ASSERT3U(zfs_refcount_count(&db->db_holds), >=, db->db_dirtycnt);
+	holds = zfs_refcount_count(&db->db_holds) - db->db_dirtycnt;
+	mutex_exit(&db->db_mtx);
+
+	return (holds);
+}
+
 void *
 dmu_buf_replace_user(dmu_buf_t *db_fake, dmu_buf_user_t *old_user,
     dmu_buf_user_t *new_user)
@@ -3088,6 +3317,50 @@ dbuf_check_blkptr(dnode_t *dn, dmu_buf_impl_t *db)
 	}
 }
 
+/*
+ * When syncing out blocks of dnodes, adjust the block to deal with
+ * encryption.  Normally, we make sure the block is decrypted before writing
+ * it.  If we have crypt params, then we are writing a raw (encrypted) block,
+ * from a raw receive.  In this case, set the ARC buf's crypt params so
+ * that the BP will be filled with the correct byteorder, salt, iv, and mac.
+ *
+ * XXX we should handle decrypting the dnode block in dbuf_dirty().
+ */
+static void
+dbuf_prepare_encrypted_dnode_leaf(dbuf_dirty_record_t *dr)
+{
+	int err;
+	dmu_buf_impl_t *db = dr->dr_dbuf;
+
+	ASSERT(MUTEX_HELD(&db->db_mtx));
+	ASSERT3U(db->db.db_object, ==, DMU_META_DNODE_OBJECT);
+	ASSERT3U(db->db_level, ==, 0);
+
+	if (!db->db_objset->os_raw_receive && arc_is_encrypted(db->db_buf)) {
+		zbookmark_phys_t zb;
+
+		/*
+		 * Unfortunately, there is currently no mechanism for
+		 * syncing context to handle decryption errors. An error
+		 * here is only possible if an attacker maliciously
+		 * changed a dnode block and updated the associated
+		 * checksums going up the block tree.
+		 */
+		SET_BOOKMARK(&zb, dmu_objset_id(db->db_objset),
+		    db->db.db_object, db->db_level, db->db_blkid);
+		err = arc_untransform(db->db_buf, db->db_objset->os_spa,
+		    &zb, B_TRUE);
+		if (err)
+			panic("Invalid dnode block MAC");
+	} else if (dr->dt.dl.dr_has_raw_params) {
+		(void) arc_release(dr->dt.dl.dr_data, db);
+		arc_convert_to_raw(dr->dt.dl.dr_data,
+		    dmu_objset_id(db->db_objset),
+		    dr->dt.dl.dr_byteorder, DMU_OT_DNODE,
+		    dr->dt.dl.dr_salt, dr->dt.dl.dr_iv, dr->dt.dl.dr_mac);
+	}
+}
+
 static void
 dbuf_sync_indirect(dbuf_dirty_record_t *dr, dmu_tx_t *tx)
 {
@@ -3230,6 +3503,13 @@ dbuf_sync_leaf(dbuf_dirty_record_t *dr, dmu_tx_t *tx)
 		ASSERT(dr->dt.dl.dr_override_state != DR_NOT_OVERRIDDEN);
 	}
 
+	/*
+	 * If this is a dnode block, ensure it is appropriately encrypted
+	 * or decrypted, depending on what we are writing to it this txg.
+	 */
+	if (os->os_encrypted && dn->dn_object == DMU_META_DNODE_OBJECT)
+		dbuf_prepare_encrypted_dnode_leaf(dr);
+
 	if (db->db_state != DB_NOFILL &&
 	    dn->dn_object != DMU_META_DNODE_OBJECT &&
 	    zfs_refcount_count(&db->db_holds) > 1 &&
@@ -3247,16 +3527,26 @@ dbuf_sync_leaf(dbuf_dirty_record_t *dr, dmu_tx_t *tx)
 		 * DNONE_DNODE blocks).
 		 */
 		int psize = arc_buf_size(*datap);
+		int lsize = arc_buf_lsize(*datap);
 		arc_buf_contents_t type = DBUF_GET_BUFC_TYPE(db);
 		enum zio_compress compress_type = arc_get_compression(*datap);
 
-		if (compress_type == ZIO_COMPRESS_OFF) {
-			*datap = arc_alloc_buf(os->os_spa, db, type, psize);
-		} else {
+		if (arc_is_encrypted(*datap)) {
+			boolean_t byteorder;
+			uint8_t salt[ZIO_DATA_SALT_LEN];
+			uint8_t iv[ZIO_DATA_IV_LEN];
+			uint8_t mac[ZIO_DATA_MAC_LEN];
+
+			arc_get_raw_params(*datap, &byteorder, salt, iv, mac);
+			*datap = arc_alloc_raw_buf(os->os_spa, db,
+			    dmu_objset_id(os), byteorder, salt, iv, mac,
+			    dn->dn_type, psize, lsize, compress_type);
+		} else if (compress_type != ZIO_COMPRESS_OFF) {
 			ASSERT3U(type, ==, ARC_BUFC_DATA);
-			int lsize = arc_buf_lsize(*datap);
 			*datap = arc_alloc_compressed_buf(os->os_spa, db,
 			    psize, lsize, compress_type);
+		} else {
+			*datap = arc_alloc_buf(os->os_spa, db, type, psize);
 		}
 		bcopy(db->db.db_data, (*datap)->b_data, psize);
 	}
@@ -3357,8 +3647,10 @@ dbuf_write_ready(zio_t *zio, arc_buf_t *buf, void *vdb)
 	if (db->db_level == 0) {
 		mutex_enter(&dn->dn_mtx);
 		if (db->db_blkid > dn->dn_phys->dn_maxblkid &&
-		    db->db_blkid != DMU_SPILL_BLKID)
+		    db->db_blkid != DMU_SPILL_BLKID) {
+			ASSERT0(db->db_objset->os_raw_receive);
 			dn->dn_phys->dn_maxblkid = db->db_blkid;
+		}
 		mutex_exit(&dn->dn_mtx);
 
 		if (dn->dn_type == DMU_OT_DNODE) {
@@ -3393,7 +3685,7 @@ dbuf_write_ready(zio_t *zio, arc_buf_t *buf, void *vdb)
 	DB_DNODE_EXIT(db);
 
 	if (!BP_IS_EMBEDDED(bp))
-		bp->blk_fill = fill;
+		BP_SET_FILL(bp, fill);
 
 	mutex_exit(&db->db_mtx);
 
@@ -3814,6 +4106,7 @@ dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, dmu_tx_t *tx)
 	wp_flag |= (db->db_state == DB_NOFILL) ? WP_NOFILL : 0;
 
 	dmu_write_policy(os, dn, db->db_level, wp_flag, &zp);
+
 	DB_DNODE_EXIT(db);
 
 	/*
diff --git a/usr/src/uts/common/fs/zfs/ddt.c b/usr/src/uts/common/fs/zfs/ddt.c
index 1d51329511..8bcf6af8ba 100644
--- a/usr/src/uts/common/fs/zfs/ddt.c
+++ b/usr/src/uts/common/fs/zfs/ddt.c
@@ -253,6 +253,10 @@ ddt_bp_fill(const ddt_phys_t *ddp, blkptr_t *bp, uint64_t txg)
 	BP_SET_BIRTH(bp, txg, ddp->ddp_phys_birth);
 }
 
+/*
+ * The bp created via this function may be used for repairs and scrub, but it
+ * will be missing the salt / IV required to do a full decrypting read.
+ */
 void
 ddt_bp_create(enum zio_checksum checksum,
     const ddt_key_t *ddk, const ddt_phys_t *ddp, blkptr_t *bp)
@@ -263,11 +267,12 @@ ddt_bp_create(enum zio_checksum checksum,
 		ddt_bp_fill(ddp, bp, ddp->ddp_phys_birth);
 
 	bp->blk_cksum = ddk->ddk_cksum;
-	bp->blk_fill = 1;
 
 	BP_SET_LSIZE(bp, DDK_GET_LSIZE(ddk));
 	BP_SET_PSIZE(bp, DDK_GET_PSIZE(ddk));
 	BP_SET_COMPRESS(bp, DDK_GET_COMPRESS(ddk));
+	BP_SET_CRYPT(bp, DDK_GET_CRYPT(ddk));
+	BP_SET_FILL(bp, 1);
 	BP_SET_CHECKSUM(bp, checksum);
 	BP_SET_TYPE(bp, DMU_OT_DEDUP);
 	BP_SET_LEVEL(bp, 0);
@@ -281,9 +286,12 @@ ddt_key_fill(ddt_key_t *ddk, const blkptr_t *bp)
 	ddk->ddk_cksum = bp->blk_cksum;
 	ddk->ddk_prop = 0;
 
+	ASSERT(BP_IS_ENCRYPTED(bp) || !BP_USES_CRYPT(bp));
+
 	DDK_SET_LSIZE(ddk, BP_GET_LSIZE(bp));
 	DDK_SET_PSIZE(ddk, BP_GET_PSIZE(bp));
 	DDK_SET_COMPRESS(ddk, BP_GET_COMPRESS(bp));
+	DDK_SET_CRYPT(ddk, BP_USES_CRYPT(bp));
 }
 
 void
@@ -367,7 +375,7 @@ ddt_stat_generate(ddt_t *ddt, ddt_entry_t *dde, ddt_stat_t *dds)
 		if (ddp->ddp_phys_birth == 0)
 			continue;
 
-		for (int d = 0; d < SPA_DVAS_PER_BP; d++)
+		for (int d = 0; d < DDE_GET_NDVAS(dde); d++)
 			dsize += dva_get_dsize_sync(spa, &ddp->ddp_dva[d]);
 
 		dds->dds_blocks += 1;
@@ -521,6 +529,7 @@ ddt_ditto_copies_needed(ddt_t *ddt, ddt_entry_t *dde, ddt_phys_t *ddp_willref)
 	uint64_t ditto = spa->spa_dedup_ditto;
 	int total_copies = 0;
 	int desired_copies = 0;
+	int copies_needed = 0;
 
 	for (int p = DDT_PHYS_SINGLE; p <= DDT_PHYS_TRIPLE; p++) {
 		ddt_phys_t *ddp = &dde->dde_phys[p];
@@ -546,7 +555,13 @@ ddt_ditto_copies_needed(ddt_t *ddt, ddt_entry_t *dde, ddt_phys_t *ddp_willref)
 	if (total_refcnt >= ditto * ditto)
 		desired_copies++;
 
-	return (MAX(desired_copies, total_copies) - total_copies);
+	copies_needed = MAX(desired_copies, total_copies) - total_copies;
+
+	/* encrypted blocks store their IV in DVA[2] */
+	if (DDK_GET_CRYPT(&dde->dde_key))
+		copies_needed = MIN(copies_needed, SPA_DVAS_PER_BP - 1);
+
+	return (copies_needed);
 }
 
 int
@@ -556,7 +571,7 @@ ddt_ditto_copies_present(ddt_entry_t *dde)
 	dva_t *dva = ddp->ddp_dva;
 	int copies = 0 - DVA_GET_GANG(dva);
 
-	for (int d = 0; d < SPA_DVAS_PER_BP; d++, dva++)
+	for (int d = 0; d < DDE_GET_NDVAS(dde); d++, dva++)
 		if (DVA_IS_VALID(dva))
 			copies++;
 
diff --git a/usr/src/uts/common/fs/zfs/dmu.c b/usr/src/uts/common/fs/zfs/dmu.c
index 95ca9f76aa..02bdfdfa12 100644
--- a/usr/src/uts/common/fs/zfs/dmu.c
+++ b/usr/src/uts/common/fs/zfs/dmu.c
@@ -96,60 +96,60 @@ int zfs_object_remap_one_indirect_delay_ticks = 0;
 uint64_t dmu_prefetch_max = 8 * SPA_MAXBLOCKSIZE;
 
 const dmu_object_type_info_t dmu_ot[DMU_OT_NUMTYPES] = {
-	{ DMU_BSWAP_UINT8,  TRUE,  FALSE,  "unallocated"		},
-	{ DMU_BSWAP_ZAP,    TRUE,  TRUE,   "object directory"		},
-	{ DMU_BSWAP_UINT64, TRUE,  TRUE,   "object array"		},
-	{ DMU_BSWAP_UINT8,  TRUE,  FALSE,  "packed nvlist"		},
-	{ DMU_BSWAP_UINT64, TRUE,  FALSE,  "packed nvlist size"		},
-	{ DMU_BSWAP_UINT64, TRUE,  FALSE,  "bpobj"			},
-	{ DMU_BSWAP_UINT64, TRUE,  FALSE,  "bpobj header"		},
-	{ DMU_BSWAP_UINT64, TRUE,  FALSE,  "SPA space map header"	},
-	{ DMU_BSWAP_UINT64, TRUE,  FALSE,  "SPA space map"		},
-	{ DMU_BSWAP_UINT64, TRUE,  FALSE,  "ZIL intent log"		},
-	{ DMU_BSWAP_DNODE,  TRUE,  FALSE,  "DMU dnode"			},
-	{ DMU_BSWAP_OBJSET, TRUE,  TRUE,   "DMU objset"			},
-	{ DMU_BSWAP_UINT64, TRUE,  TRUE,   "DSL directory"		},
-	{ DMU_BSWAP_ZAP,    TRUE,  TRUE,   "DSL directory child map"	},
-	{ DMU_BSWAP_ZAP,    TRUE,  TRUE,   "DSL dataset snap map"	},
-	{ DMU_BSWAP_ZAP,    TRUE,  TRUE,   "DSL props"			},
-	{ DMU_BSWAP_UINT64, TRUE,  TRUE,   "DSL dataset"		},
-	{ DMU_BSWAP_ZNODE,  TRUE,  FALSE,  "ZFS znode"			},
-	{ DMU_BSWAP_OLDACL, TRUE,  FALSE,  "ZFS V0 ACL"			},
-	{ DMU_BSWAP_UINT8,  FALSE, FALSE,  "ZFS plain file"		},
-	{ DMU_BSWAP_ZAP,    TRUE,  FALSE,  "ZFS directory"		},
-	{ DMU_BSWAP_ZAP,    TRUE,  FALSE,  "ZFS master node"		},
-	{ DMU_BSWAP_ZAP,    TRUE,  FALSE,  "ZFS delete queue"		},
-	{ DMU_BSWAP_UINT8,  FALSE, FALSE,  "zvol object"		},
-	{ DMU_BSWAP_ZAP,    TRUE,  FALSE,  "zvol prop"			},
-	{ DMU_BSWAP_UINT8,  FALSE, FALSE,  "other uint8[]"		},
-	{ DMU_BSWAP_UINT64, FALSE, FALSE,  "other uint64[]"		},
-	{ DMU_BSWAP_ZAP,    TRUE,  FALSE,  "other ZAP"			},
-	{ DMU_BSWAP_ZAP,    TRUE,  FALSE,  "persistent error log"	},
-	{ DMU_BSWAP_UINT8,  TRUE,  FALSE,  "SPA history"		},
-	{ DMU_BSWAP_UINT64, TRUE,  FALSE,  "SPA history offsets"	},
-	{ DMU_BSWAP_ZAP,    TRUE,  TRUE,   "Pool properties"		},
-	{ DMU_BSWAP_ZAP,    TRUE,  TRUE,   "DSL permissions"		},
-	{ DMU_BSWAP_ACL,    TRUE,  FALSE,  "ZFS ACL"			},
-	{ DMU_BSWAP_UINT8,  TRUE,  FALSE,  "ZFS SYSACL"			},
-	{ DMU_BSWAP_UINT8,  TRUE,  FALSE,  "FUID table"			},
-	{ DMU_BSWAP_UINT64, TRUE,  FALSE,  "FUID table size"		},
-	{ DMU_BSWAP_ZAP,    TRUE,  TRUE,   "DSL dataset next clones"	},
-	{ DMU_BSWAP_ZAP,    TRUE,  FALSE,  "scan work queue"		},
-	{ DMU_BSWAP_ZAP,    TRUE,  FALSE,  "ZFS user/group used"	},
-	{ DMU_BSWAP_ZAP,    TRUE,  FALSE,  "ZFS user/group quota"	},
-	{ DMU_BSWAP_ZAP,    TRUE,  TRUE,   "snapshot refcount tags"	},
-	{ DMU_BSWAP_ZAP,    TRUE,  FALSE,  "DDT ZAP algorithm"		},
-	{ DMU_BSWAP_ZAP,    TRUE,  FALSE,  "DDT statistics"		},
-	{ DMU_BSWAP_UINT8,  TRUE,  FALSE,  "System attributes"		},
-	{ DMU_BSWAP_ZAP,    TRUE,  FALSE,  "SA master node"		},
-	{ DMU_BSWAP_ZAP,    TRUE,  FALSE,  "SA attr registration"	},
-	{ DMU_BSWAP_ZAP,    TRUE,  FALSE,  "SA attr layouts"		},
-	{ DMU_BSWAP_ZAP,    TRUE,  FALSE,  "scan translations"		},
-	{ DMU_BSWAP_UINT8,  FALSE, FALSE,  "deduplicated block"		},
-	{ DMU_BSWAP_ZAP,    TRUE,  TRUE,   "DSL deadlist map"		},
-	{ DMU_BSWAP_UINT64, TRUE,  TRUE,   "DSL deadlist map hdr"	},
-	{ DMU_BSWAP_ZAP,    TRUE,  TRUE,   "DSL dir clones"		},
-	{ DMU_BSWAP_UINT64, TRUE,  FALSE,  "bpobj subobj"		}
+	{ DMU_BSWAP_UINT8,  TRUE,  FALSE, FALSE, "unallocated"		},
+	{ DMU_BSWAP_ZAP,    TRUE,  TRUE,  FALSE, "object directory"	},
+	{ DMU_BSWAP_UINT64, TRUE,  TRUE,  FALSE, "object array"		},
+	{ DMU_BSWAP_UINT8,  TRUE,  FALSE, FALSE, "packed nvlist"	},
+	{ DMU_BSWAP_UINT64, TRUE,  FALSE, FALSE, "packed nvlist size"	},
+	{ DMU_BSWAP_UINT64, TRUE,  FALSE, FALSE, "bpobj"	},
+	{ DMU_BSWAP_UINT64, TRUE,  FALSE, FALSE, "bpobj header"		},
+	{ DMU_BSWAP_UINT64, TRUE,  FALSE, FALSE, "SPA space map header"	},
+	{ DMU_BSWAP_UINT64, TRUE,  FALSE, FALSE, "SPA space map"	},
+	{ DMU_BSWAP_UINT64, TRUE,  FALSE, TRUE,  "ZIL intent log"	},
+	{ DMU_BSWAP_DNODE,  TRUE,  FALSE, TRUE,  "DMU dnode"	},
+	{ DMU_BSWAP_OBJSET, TRUE,  TRUE,  FALSE, "DMU objset"	},
+	{ DMU_BSWAP_UINT64, TRUE,  TRUE,  FALSE, "DSL directory"	},
+	{ DMU_BSWAP_ZAP,    TRUE,  TRUE,  FALSE, "DSL directory child map" },
+	{ DMU_BSWAP_ZAP,    TRUE,  TRUE,  FALSE, "DSL dataset snap map"	},
+	{ DMU_BSWAP_ZAP,    TRUE,  TRUE,  FALSE, "DSL props"	},
+	{ DMU_BSWAP_UINT64, TRUE,  TRUE,  FALSE, "DSL dataset"	},
+	{ DMU_BSWAP_ZNODE,  TRUE,  FALSE, FALSE, "ZFS znode"	},
+	{ DMU_BSWAP_OLDACL, TRUE,  FALSE, TRUE,  "ZFS V0 ACL"	},
+	{ DMU_BSWAP_UINT8,  FALSE, FALSE, TRUE,  "ZFS plain file"	},
+	{ DMU_BSWAP_ZAP,    TRUE,  FALSE, TRUE,  "ZFS directory"	},
+	{ DMU_BSWAP_ZAP,    TRUE,  FALSE, FALSE, "ZFS master node"	},
+	{ DMU_BSWAP_ZAP,    TRUE,  FALSE, TRUE,  "ZFS delete queue"	},
+	{ DMU_BSWAP_UINT8,  FALSE, FALSE, TRUE,  "zvol object"	},
+	{ DMU_BSWAP_ZAP,    TRUE,  FALSE, FALSE, "zvol prop"	},
+	{ DMU_BSWAP_UINT8,  FALSE, FALSE, TRUE,  "other uint8[]"	},
+	{ DMU_BSWAP_UINT64, FALSE, FALSE, TRUE,  "other uint64[]"	},
+	{ DMU_BSWAP_ZAP,    TRUE,  FALSE, FALSE, "other ZAP"	},
+	{ DMU_BSWAP_ZAP,    TRUE,  FALSE, FALSE, "persistent error log"	},
+	{ DMU_BSWAP_UINT8,  TRUE,  FALSE, FALSE, "SPA history"	},
+	{ DMU_BSWAP_UINT64, TRUE,  FALSE, FALSE, "SPA history offsets"	},
+	{ DMU_BSWAP_ZAP,    TRUE,  TRUE,  FALSE, "Pool properties"	},
+	{ DMU_BSWAP_ZAP,    TRUE,  TRUE,  FALSE, "DSL permissions"	},
+	{ DMU_BSWAP_ACL,    TRUE,  FALSE, TRUE,  "ZFS ACL"	},
+	{ DMU_BSWAP_UINT8,  TRUE,  FALSE, TRUE,  "ZFS SYSACL"	},
+	{ DMU_BSWAP_UINT8,  TRUE,  FALSE, TRUE,  "FUID table"	},
+	{ DMU_BSWAP_UINT64, TRUE,  FALSE, FALSE, "FUID table size"	},
+	{ DMU_BSWAP_ZAP,    TRUE,  TRUE,  FALSE, "DSL dataset next clones" },
+	{ DMU_BSWAP_ZAP,    TRUE,  FALSE, FALSE, "scan work queue"	},
+	{ DMU_BSWAP_ZAP,    TRUE,  FALSE, TRUE,  "ZFS user/group used"	},
+	{ DMU_BSWAP_ZAP,    TRUE,  FALSE, TRUE,  "ZFS user/group quota"	},
+	{ DMU_BSWAP_ZAP,    TRUE,  TRUE,  FALSE, "snapshot refcount tags" },
+	{ DMU_BSWAP_ZAP,    TRUE,  FALSE, FALSE, "DDT ZAP algorithm"	},
+	{ DMU_BSWAP_ZAP,    TRUE,  FALSE, FALSE, "DDT statistics"	},
+	{ DMU_BSWAP_UINT8,  TRUE,  FALSE, TRUE,  "System attributes"	},
+	{ DMU_BSWAP_ZAP,    TRUE,  FALSE, TRUE,  "SA master node"	},
+	{ DMU_BSWAP_ZAP,    TRUE,  FALSE, TRUE,  "SA attr registration"	},
+	{ DMU_BSWAP_ZAP,    TRUE,  FALSE, TRUE,  "SA attr layouts"	},
+	{ DMU_BSWAP_ZAP,    TRUE,  FALSE, FALSE, "scan translations"	},
+	{ DMU_BSWAP_UINT8,  FALSE, FALSE, TRUE,  "deduplicated block"	},
+	{ DMU_BSWAP_ZAP,    TRUE,  TRUE,  FALSE, "DSL deadlist map" },
+	{ DMU_BSWAP_UINT64, TRUE,  TRUE,  FALSE, "DSL deadlist map hdr"	},
+	{ DMU_BSWAP_ZAP,    TRUE,  TRUE,  FALSE, "DSL dir clones"	},
+	{ DMU_BSWAP_UINT64, TRUE,  FALSE, FALSE, "bpobj subobj"		}
 };
 
 const dmu_object_byteswap_info_t dmu_ot_byteswap[DMU_BSWAP_NUMFUNCS] = {
@@ -221,6 +221,8 @@ dmu_buf_hold_by_dnode(dnode_t *dn, uint64_t offset,
 
 	if (flags & DMU_READ_NO_PREFETCH)
 		db_flags |= DB_RF_NOPREFETCH;
+	if (flags & DMU_READ_NO_DECRYPT)
+		db_flags |= DB_RF_NO_DECRYPT;
 
 	err = dmu_buf_hold_noread_by_dnode(dn, offset, tag, dbp);
 	if (err == 0) {
@@ -244,6 +246,8 @@ dmu_buf_hold(objset_t *os, uint64_t object, uint64_t offset,
 
 	if (flags & DMU_READ_NO_PREFETCH)
 		db_flags |= DB_RF_NOPREFETCH;
+	if (flags & DMU_READ_NO_DECRYPT)
+		db_flags |= DB_RF_NO_DECRYPT;
 
 	err = dmu_buf_hold_noread(os, object, offset, tag, dbp);
 	if (err == 0) {
@@ -341,14 +345,72 @@ dmu_rm_spill(objset_t *os, uint64_t object, dmu_tx_t *tx)
 }
 
 /*
+ * Lookup and hold the bonus buffer for the provided dnode.  If the dnode
+ * has not yet been allocated a new bonus dbuf a will be allocated.
+ * Returns ENOENT, EIO, or 0.
+ */
+int dmu_bonus_hold_by_dnode(dnode_t *dn, void *tag, dmu_buf_t **dbp,
+    uint32_t flags)
+{
+	dmu_buf_impl_t *db;
+	int error;
+	uint32_t db_flags = DB_RF_MUST_SUCCEED;
+
+	if (flags & DMU_READ_NO_PREFETCH)
+		db_flags |= DB_RF_NOPREFETCH;
+	if (flags & DMU_READ_NO_DECRYPT)
+	db_flags |= DB_RF_NO_DECRYPT;
+
+	rw_enter(&dn->dn_struct_rwlock, RW_READER);
+	if (dn->dn_bonus == NULL) {
+		rw_exit(&dn->dn_struct_rwlock);
+		rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
+		if (dn->dn_bonus == NULL)
+			dbuf_create_bonus(dn);
+	}
+	db = dn->dn_bonus;
+
+	/* as long as the bonus buf is held, the dnode will be held */
+	if (zfs_refcount_add(&db->db_holds, tag) == 1) {
+		VERIFY(dnode_add_ref(dn, db));
+		atomic_inc_32(&dn->dn_dbufs_count);
+	}
+
+	/*
+	 * Wait to drop dn_struct_rwlock until after adding the bonus dbuf's
+	 * hold and incrementing the dbuf count to ensure that dnode_move() sees
+	 * a dnode hold for every dbuf.
+	 */
+	rw_exit(&dn->dn_struct_rwlock);
+
+	error = dbuf_read(db, NULL, db_flags);
+	if (error) {
+		dnode_evict_bonus(dn);
+		dbuf_rele(db, tag);
+		*dbp = NULL;
+		return (error);
+	}
+
+	*dbp = &db->db;
+	return (0);
+}
+
+/*
  * returns ENOENT, EIO, or 0.
  */
 int
-dmu_bonus_hold(objset_t *os, uint64_t object, void *tag, dmu_buf_t **dbp)
+dmu_bonus_hold_impl(objset_t *os, uint64_t object, void *tag, uint32_t flags,
+    dmu_buf_t **dbp)
 {
 	dnode_t *dn;
 	dmu_buf_impl_t *db;
 	int error;
+	uint32_t db_flags = DB_RF_MUST_SUCCEED;
+
+	if (flags & DMU_READ_NO_PREFETCH)
+		db_flags |= DB_RF_NOPREFETCH;
+	if (flags & DMU_READ_NO_DECRYPT)
+		db_flags |= DB_RF_NO_DECRYPT;
 
 	error = dnode_hold(os, object, FTAG, &dn);
 	if (error)
@@ -378,12 +440,24 @@ dmu_bonus_hold(objset_t *os, uint64_t object, void *tag, dmu_buf_t **dbp)
 
 	dnode_rele(dn, FTAG);
 
-	VERIFY(0 == dbuf_read(db, NULL, DB_RF_MUST_SUCCEED | DB_RF_NOPREFETCH));
+	error = dbuf_read(db, NULL, db_flags);
+	if (error) {
+		dnode_evict_bonus(dn);
+		dbuf_rele(db, tag);
+		*dbp = NULL;
+		return (error);
+	}
 
 	*dbp = &db->db;
 	return (0);
 }
 
+int
+dmu_bonus_hold(objset_t *os, uint64_t obj, void *tag, dmu_buf_t **dbp)
+{
+	return (dmu_bonus_hold_impl(os, obj, tag, DMU_READ_NO_PREFETCH, dbp));
+}
+
 /*
  * returns ENOENT, EIO, or 0.
  *
@@ -446,15 +520,20 @@ dmu_spill_hold_existing(dmu_buf_t *bonus, void *tag, dmu_buf_t **dbp)
 }
 
 int
-dmu_spill_hold_by_bonus(dmu_buf_t *bonus, void *tag, dmu_buf_t **dbp)
+dmu_spill_hold_by_bonus(dmu_buf_t *bonus, uint32_t flags, void *tag,
+    dmu_buf_t **dbp)
 {
 	dmu_buf_impl_t *db = (dmu_buf_impl_t *)bonus;
 	dnode_t *dn;
 	int err;
+	uint32_t db_flags = DB_RF_CANFAIL;
+
+	if (flags & DMU_READ_NO_DECRYPT)
+		db_flags |= DB_RF_NO_DECRYPT;
 
 	DB_DNODE_ENTER(db);
 	dn = DB_DNODE(db);
-	err = dmu_spill_hold_by_dnode(dn, DB_RF_CANFAIL, tag, dbp);
+	err = dmu_spill_hold_by_dnode(dn, db_flags, tag, dbp);
 	DB_DNODE_EXIT(db);
 
 	return (err);
@@ -619,8 +698,8 @@ dmu_buf_rele_array(dmu_buf_t **dbp_fake, int numbufs, void *tag)
  * indirect blocks prefeteched will be those that point to the blocks containing
  * the data starting at offset, and continuing to offset + len.
  *
- * Note that if the indirect blocks above the blocks being prefetched are not in
- * cache, they will be asychronously read in.
+ * Note that if the indirect blocks above the blocks being prefetched are not
+ * in cache, they will be asychronously read in.
  */
 void
 dmu_prefetch(objset_t *os, uint64_t object, int64_t level, uint64_t offset,
@@ -835,6 +914,7 @@ dmu_free_long_range_impl(objset_t *os, dnode_t *dn, uint64_t offset,
 		    uint64_t, long_free_dirty_all_txgs, uint64_t, chunk_len,
 		    uint64_t, dmu_tx_get_txg(tx));
 		dnode_free_range(dn, chunk_begin, chunk_len, tx);
+
 		dmu_tx_commit(tx);
 
 		length -= chunk_len;
@@ -883,7 +963,9 @@ dmu_free_long_object(objset_t *os, uint64_t object)
 	dmu_tx_mark_netfree(tx);
 	err = dmu_tx_assign(tx, TXG_WAIT);
 	if (err == 0) {
-		err = dmu_object_free(os, object, tx);
+		if (err == 0)
+			err = dmu_object_free(os, object, tx);
+
 		dmu_tx_commit(tx);
 	} else {
 		dmu_tx_abort(tx);
@@ -901,7 +983,7 @@ dmu_free_range(objset_t *os, uint64_t object, uint64_t offset,
 	if (err)
 		return (err);
 	ASSERT(offset < UINT64_MAX);
-	ASSERT(size == -1ULL || size <= UINT64_MAX - offset);
+	ASSERT(size == DMU_OBJECT_END || size <= UINT64_MAX - offset);
 	dnode_free_range(dn, offset, size, tx);
 	dnode_rele(dn, FTAG);
 	return (0);
@@ -1622,22 +1704,71 @@ dmu_return_arcbuf(arc_buf_t *buf)
 	arc_buf_destroy(buf, FTAG);
 }
 
+void
+dmu_copy_from_buf(objset_t *os, uint64_t object, uint64_t offset,
+    dmu_buf_t *handle, dmu_tx_t *tx)
+{
+	dmu_buf_t *dst_handle;
+	dmu_buf_impl_t *dstdb;
+	dmu_buf_impl_t *srcdb = (dmu_buf_impl_t *)handle;
+	arc_buf_t *abuf;
+	uint64_t datalen;
+	boolean_t byteorder;
+	uint8_t salt[ZIO_DATA_SALT_LEN];
+	uint8_t iv[ZIO_DATA_IV_LEN];
+	uint8_t mac[ZIO_DATA_MAC_LEN];
+
+	ASSERT3P(srcdb->db_buf, !=, NULL);
+
+	/* hold the db that we want to write to */
+	VERIFY0(dmu_buf_hold(os, object, offset, FTAG, &dst_handle,
+	    DMU_READ_NO_DECRYPT));
+	dstdb = (dmu_buf_impl_t *)dst_handle;
+	datalen = arc_buf_size(srcdb->db_buf);
+
+	/* allocated an arc buffer that matches the type of srcdb->db_buf */
+	if (arc_is_encrypted(srcdb->db_buf)) {
+		arc_get_raw_params(srcdb->db_buf, &byteorder, salt, iv, mac);
+		abuf = arc_loan_raw_buf(os->os_spa, dmu_objset_id(os),
+		    byteorder, salt, iv, mac, DB_DNODE(dstdb)->dn_type,
+		    datalen, arc_buf_lsize(srcdb->db_buf),
+		    arc_get_compression(srcdb->db_buf));
+	} else {
+		/* we won't get a compressed db back from dmu_buf_hold() */
+		ASSERT3U(arc_get_compression(srcdb->db_buf),
+		    ==, ZIO_COMPRESS_OFF);
+		abuf = arc_loan_buf(os->os_spa,
+		    DMU_OT_IS_METADATA(DB_DNODE(dstdb)->dn_type), datalen);
+	}
+
+	ASSERT3U(datalen, ==, arc_buf_size(abuf));
+
+	/* copy the data to the new buffer and assign it to the dstdb */
+	bcopy(srcdb->db_buf->b_data, abuf->b_data, datalen);
+	dbuf_assign_arcbuf(dstdb, abuf, tx);
+	dmu_buf_rele(dst_handle, FTAG);
+}
+
 /*
  * When possible directly assign passed loaned arc buffer to a dbuf.
  * If this is not possible copy the contents of passed arc buf via
  * dmu_write().
  */
-void
-dmu_assign_arcbuf_dnode(dnode_t *dn, uint64_t offset, arc_buf_t *buf,
+int
+dmu_assign_arcbuf_by_dnode(dnode_t *dn, uint64_t offset, arc_buf_t *buf,
     dmu_tx_t *tx)
 {
 	dmu_buf_impl_t *db;
+	objset_t *os = dn->dn_objset;
+	uint64_t object = dn->dn_object;
 	uint32_t blksz = (uint32_t)arc_buf_lsize(buf);
 	uint64_t blkid;
 
 	rw_enter(&dn->dn_struct_rwlock, RW_READER);
 	blkid = dbuf_whichblock(dn, 0, offset);
-	VERIFY((db = dbuf_hold(dn, blkid, FTAG)) != NULL);
+	db = dbuf_hold(dn, blkid, FTAG);
+	if (db == NULL)
+		return (SET_ERROR(EIO));
 	rw_exit(&dn->dn_struct_rwlock);
 
 	/*
@@ -1648,32 +1779,33 @@ dmu_assign_arcbuf_dnode(dnode_t *dn, uint64_t offset, arc_buf_t *buf,
 		dbuf_assign_arcbuf(db, buf, tx);
 		dbuf_rele(db, FTAG);
 	} else {
-		objset_t *os;
-		uint64_t object;
-
 		/* compressed bufs must always be assignable to their dbuf */
 		ASSERT3U(arc_get_compression(buf), ==, ZIO_COMPRESS_OFF);
 		ASSERT(!(buf->b_flags & ARC_BUF_FLAG_COMPRESSED));
 
 		os = dn->dn_objset;
 		object = dn->dn_object;
-
 		dbuf_rele(db, FTAG);
 		dmu_write(os, object, offset, blksz, buf->b_data, tx);
 		dmu_return_arcbuf(buf);
 		XUIOSTAT_BUMP(xuiostat_wbuf_copied);
 	}
+
+	return (0);
 }
 
-void
-dmu_assign_arcbuf(dmu_buf_t *handle, uint64_t offset, arc_buf_t *buf,
+int
+dmu_assign_arcbuf_by_dbuf(dmu_buf_t *handle, uint64_t offset, arc_buf_t *buf,
     dmu_tx_t *tx)
 {
+	int err;
 	dmu_buf_impl_t *dbuf = (dmu_buf_impl_t *)handle;
 
 	DB_DNODE_ENTER(dbuf);
-	dmu_assign_arcbuf_dnode(DB_DNODE(dbuf), offset, buf, tx);
+	err = dmu_assign_arcbuf_by_dnode(DB_DNODE(dbuf), offset, buf, tx);
 	DB_DNODE_EXIT(dbuf);
+
+	return (err);
 }
 
 typedef struct {
@@ -1700,7 +1832,7 @@ dmu_sync_ready(zio_t *zio, arc_buf_t *buf, void *varg)
 			BP_SET_LSIZE(bp, db->db_size);
 		} else if (!BP_IS_EMBEDDED(bp)) {
 			ASSERT(BP_GET_LEVEL(bp) == 0);
-			bp->blk_fill = 1;
+			BP_SET_FILL(bp, 1);
 		}
 	}
 }
@@ -2031,6 +2163,20 @@ dmu_sync(zio_t *pio, uint64_t txg, dmu_sync_cb_t *done, zgd_t *zgd)
 }
 
 int
+dmu_object_set_nlevels(objset_t *os, uint64_t object, int nlevels, dmu_tx_t *tx)
+{
+	dnode_t *dn;
+	int err;
+
+	err = dnode_hold(os, object, FTAG, &dn);
+	if (err)
+		return (err);
+	err = dnode_set_nlevels(dn, nlevels, tx);
+	dnode_rele(dn, FTAG);
+	return (err);
+}
+
+int
 dmu_object_set_blocksize(objset_t *os, uint64_t object, uint64_t size, int ibs,
     dmu_tx_t *tx)
 {
@@ -2045,6 +2191,23 @@ dmu_object_set_blocksize(objset_t *os, uint64_t object, uint64_t size, int ibs,
 	return (err);
 }
 
+int
+dmu_object_set_maxblkid(objset_t *os, uint64_t object, uint64_t maxblkid,
+    dmu_tx_t *tx)
+{
+	dnode_t *dn;
+	int err;
+
+	err = dnode_hold(os, object, FTAG, &dn);
+	if (err)
+		return (err);
+	rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
+	dnode_new_blkid(dn, maxblkid, tx, B_FALSE, B_TRUE);
+	rw_exit(&dn->dn_struct_rwlock);
+	dnode_rele(dn, FTAG);
+	return (0);
+}
+
 void
 dmu_object_set_checksum(objset_t *os, uint64_t object, uint8_t checksum,
     dmu_tx_t *tx)
@@ -2084,8 +2247,6 @@ dmu_object_set_compress(objset_t *os, uint64_t object, uint8_t compress,
 	dnode_rele(dn, FTAG);
 }
 
-int zfs_mdcomp_disable = 0;
-
 /*
  * When the "redundant_metadata" property is set to "most", only indirect
  * blocks of this level and higher will have an additional ditto block.
@@ -2104,6 +2265,7 @@ dmu_write_policy(objset_t *os, dnode_t *dn, int level, int wp, zio_prop_t *zp)
 	boolean_t dedup = B_FALSE;
 	boolean_t nopwrite = B_FALSE;
 	boolean_t dedup_verify = os->os_dedup_verify;
+	boolean_t encrypt = B_FALSE;
 	int copies = os->os_copies;
 
 	/*
@@ -2114,16 +2276,12 @@ dmu_write_policy(objset_t *os, dnode_t *dn, int level, int wp, zio_prop_t *zp)
 	 *	 3. all other level 0 blocks
 	 */
 	if (ismd) {
-		if (zfs_mdcomp_disable) {
-			compress = ZIO_COMPRESS_EMPTY;
-		} else {
-			/*
-			 * XXX -- we should design a compression algorithm
-			 * that specializes in arrays of bps.
-			 */
-			compress = zio_compress_select(os->os_spa,
-			    ZIO_COMPRESS_ON, ZIO_COMPRESS_ON);
-		}
+		/*
+		 * XXX -- we should design a compression algorithm
+		 * that specializes in arrays of bps.
+		 */
+		compress = zio_compress_select(os->os_spa,
+		    ZIO_COMPRESS_ON, ZIO_COMPRESS_ON);
 
 		/*
 		 * Metadata always gets checksummed.  If the data
@@ -2191,10 +2349,33 @@ dmu_write_policy(objset_t *os, dnode_t *dn, int level, int wp, zio_prop_t *zp)
 		    compress != ZIO_COMPRESS_OFF && zfs_nopwrite_enabled);
 	}
 
-	zp->zp_checksum = checksum;
-	zp->zp_compress = compress;
-	ASSERT3U(zp->zp_compress, !=, ZIO_COMPRESS_INHERIT);
+	/*
+	 * All objects in an encrypted objset are protected from modification
+	 * via a MAC. Encrypted objects store their IV and salt in the last DVA
+	 * in the bp, so we cannot use all copies. Encrypted objects are also
+	 * not subject to nopwrite since writing the same data will still
+	 * result in a new ciphertext. Only encrypted blocks can be dedup'd
+	 * to avoid ambiguity in the dedup code since the DDT does not store
+	 * object types.
+	 */
+	if (os->os_encrypted && (wp & WP_NOFILL) == 0) {
+		encrypt = B_TRUE;
 
+		if (DMU_OT_IS_ENCRYPTED(type)) {
+			copies = MIN(copies, SPA_DVAS_PER_BP - 1);
+			nopwrite = B_FALSE;
+		} else {
+			dedup = B_FALSE;
+		}
+
+		if (level <= 0 &&
+		    (type == DMU_OT_DNODE || type == DMU_OT_OBJSET)) {
+			compress = ZIO_COMPRESS_EMPTY;
+		}
+	}
+
+	zp->zp_compress = compress;
+	zp->zp_checksum = checksum;
 	zp->zp_type = (wp & WP_SPILL) ? dn->dn_bonustype : type;
 	zp->zp_level = level;
 	zp->zp_copies = MIN(copies, spa_max_replication(os->os_spa));
@@ -2203,6 +2384,11 @@ dmu_write_policy(objset_t *os, dnode_t *dn, int level, int wp, zio_prop_t *zp)
 	zp->zp_nopwrite = nopwrite;
 	zp->zp_zpl_smallblk = DMU_OT_IS_FILE(zp->zp_type) ?
 	    os->os_zpl_special_smallblock : 0;
+	zp->zp_encrypt = encrypt;
+	zp->zp_byteorder = ZFS_HOST_BYTEORDER;
+	bzero(zp->zp_salt, ZIO_DATA_SALT_LEN);
+	bzero(zp->zp_iv, ZIO_DATA_IV_LEN);
+	bzero(zp->zp_mac, ZIO_DATA_MAC_LEN);
 }
 
 int
diff --git a/usr/src/uts/common/fs/zfs/dmu_diff.c b/usr/src/uts/common/fs/zfs/dmu_diff.c
index 982b96132c..76c32b1264 100644
--- a/usr/src/uts/common/fs/zfs/dmu_diff.c
+++ b/usr/src/uts/common/fs/zfs/dmu_diff.c
@@ -131,11 +131,14 @@ diff_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
 		arc_buf_t *abuf;
 		arc_flags_t aflags = ARC_FLAG_WAIT;
 		int blksz = BP_GET_LSIZE(bp);
+		int zio_flags = ZIO_FLAG_CANFAIL;
 		int i;
 
+		if (BP_IS_PROTECTED(bp))
+			zio_flags |= ZIO_FLAG_RAW;
+
 		if (arc_read(NULL, spa, bp, arc_getbuf_func, &abuf,
-		    ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL,
-		    &aflags, zb) != 0)
+		    ZIO_PRIORITY_ASYNC_READ, zio_flags, &aflags, zb) != 0)
 			return (SET_ERROR(EIO));
 
 		blk = abuf->b_data;
@@ -206,8 +209,17 @@ dmu_diff(const char *tosnap_name, const char *fromsnap_name,
 	da.da_ddr.ddr_first = da.da_ddr.ddr_last = 0;
 	da.da_err = 0;
 
+	/*
+	 * Since zfs diff only looks at dnodes which are stored in plaintext
+	 * (other than bonus buffers), we don't technically need to decrypt
+	 * the dataset to perform this operation. However, the command line
+	 * utility will still fail if the keys are not loaded because the
+	 * dataset isn't mounted and because it will fail when it attempts to
+	 * call the ZFS_IOC_OBJ_TO_STATS ioctl.
+	 */
 	error = traverse_dataset(tosnap, fromtxg,
-	    TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA, diff_cb, &da);
+	    TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA | TRAVERSE_NO_DECRYPT,
+	    diff_cb, &da);
 
 	if (error != 0) {
 		da.da_err = error;
diff --git a/usr/src/uts/common/fs/zfs/dmu_object.c b/usr/src/uts/common/fs/zfs/dmu_object.c
index f835987e7d..1a91fefe88 100644
--- a/usr/src/uts/common/fs/zfs/dmu_object.c
+++ b/usr/src/uts/common/fs/zfs/dmu_object.c
@@ -24,6 +24,7 @@
  * Copyright 2014 HybridCluster. All rights reserved.
  */
 
+#include <sys/dbuf.h>
 #include <sys/dmu.h>
 #include <sys/dmu_objset.h>
 #include <sys/dmu_tx.h>
@@ -263,13 +264,13 @@ dmu_object_reclaim(objset_t *os, uint64_t object, dmu_object_type_t ot,
     int blocksize, dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx)
 {
 	return (dmu_object_reclaim_dnsize(os, object, ot, blocksize, bonustype,
-	    bonuslen, DNODE_MIN_SIZE, tx));
+	    bonuslen, DNODE_MIN_SIZE, B_FALSE, tx));
 }
 
 int
 dmu_object_reclaim_dnsize(objset_t *os, uint64_t object, dmu_object_type_t ot,
     int blocksize, dmu_object_type_t bonustype, int bonuslen, int dnodesize,
-    dmu_tx_t *tx)
+    boolean_t keep_spill, dmu_tx_t *tx)
 {
 	dnode_t *dn;
 	int dn_slots = dnodesize >> DNODE_SHIFT;
@@ -286,7 +287,30 @@ dmu_object_reclaim_dnsize(objset_t *os, uint64_t object, dmu_object_type_t ot,
 	if (err)
 		return (err);
 
-	dnode_reallocate(dn, ot, blocksize, bonustype, bonuslen, dn_slots, tx);
+	dnode_reallocate(dn, ot, blocksize, bonustype, bonuslen, dn_slots,
+	    keep_spill, tx);
+
+	dnode_rele(dn, FTAG);
+	return (err);
+}
+
+int
+dmu_object_rm_spill(objset_t *os, uint64_t object, dmu_tx_t *tx)
+{
+	dnode_t *dn;
+	int err;
+
+	err = dnode_hold_impl(os, object, DNODE_MUST_BE_ALLOCATED, 0,
+	    FTAG, &dn);
+	if (err)
+		return (err);
+
+	rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
+	if (dn->dn_phys->dn_flags & DNODE_FLAG_SPILL_BLKPTR) {
+		dbuf_rm_spill(dn, tx);
+		dnode_rm_spill(dn, tx);
+	}
+	rw_exit(&dn->dn_struct_rwlock);
 
 	dnode_rele(dn, FTAG);
 	return (err);
diff --git a/usr/src/uts/common/fs/zfs/dmu_objset.c b/usr/src/uts/common/fs/zfs/dmu_objset.c
index 771b803973..4d0a5d2fd5 100644
--- a/usr/src/uts/common/fs/zfs/dmu_objset.c
+++ b/usr/src/uts/common/fs/zfs/dmu_objset.c
@@ -54,6 +54,7 @@
 #include <sys/dsl_destroy.h>
 #include <sys/vdev.h>
 #include <sys/zfeature.h>
+#include <sys/dmu_recv.h>
 #include "zfs_namecheck.h"
 
 /*
@@ -418,16 +419,23 @@ dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
 	if (!BP_IS_HOLE(os->os_rootbp)) {
 		arc_flags_t aflags = ARC_FLAG_WAIT;
 		zbookmark_phys_t zb;
+		enum zio_flag zio_flags = ZIO_FLAG_CANFAIL;
 		SET_BOOKMARK(&zb, ds ? ds->ds_object : DMU_META_OBJSET,
 		    ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID);
 
 		if (DMU_OS_IS_L2CACHEABLE(os))
 			aflags |= ARC_FLAG_L2CACHE;
 
+		if (ds != NULL && ds->ds_dir->dd_crypto_obj != 0) {
+			ASSERT3U(BP_GET_COMPRESS(bp), ==, ZIO_COMPRESS_OFF);
+			ASSERT(BP_IS_AUTHENTICATED(bp));
+			zio_flags |= ZIO_FLAG_RAW;
+		}
+
 		dprintf_bp(os->os_rootbp, "reading %s", "");
 		err = arc_read(NULL, spa, os->os_rootbp,
 		    arc_getbuf_func, &os->os_phys_buf,
-		    ZIO_PRIORITY_SYNC_READ, ZIO_FLAG_CANFAIL, &aflags, &zb);
+		    ZIO_PRIORITY_SYNC_READ, zio_flags, &aflags, &zb);
 		if (err != 0) {
 			kmem_free(os, sizeof (objset_t));
 			/* convert checksum errors into IO errors */
@@ -468,6 +476,8 @@ dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
 	if (ds != NULL) {
 		boolean_t needlock = B_FALSE;
 
+		os->os_encrypted = (ds->ds_dir->dd_crypto_obj != 0);
+
 		/*
 		 * Note: it's valid to open the objset if the dataset is
 		 * long-held, in which case the pool_config lock will not
@@ -477,6 +487,7 @@ dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
 			needlock = B_TRUE;
 			dsl_pool_config_enter(dmu_objset_pool(os), FTAG);
 		}
+
 		err = dsl_prop_register(ds,
 		    zfs_prop_to_name(ZFS_PROP_PRIMARYCACHE),
 		    primary_cache_changed_cb, os);
@@ -550,6 +561,7 @@ dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
 		/* It's the meta-objset. */
 		os->os_checksum = ZIO_CHECKSUM_FLETCHER_4;
 		os->os_compress = ZIO_COMPRESS_ON;
+		os->os_encrypted = B_FALSE;
 		os->os_copies = spa_max_replication(spa);
 		os->os_dedup_checksum = ZIO_CHECKSUM_OFF;
 		os->os_dedup_verify = B_FALSE;
@@ -640,16 +652,18 @@ dmu_objset_from_ds(dsl_dataset_t *ds, objset_t **osp)
  * can be held at a time.
  */
 int
-dmu_objset_hold(const char *name, void *tag, objset_t **osp)
+dmu_objset_hold_flags(const char *name, boolean_t decrypt, void *tag,
+    objset_t **osp)
 {
 	dsl_pool_t *dp;
 	dsl_dataset_t *ds;
 	int err;
+	ds_hold_flags_t flags = (decrypt) ? DS_HOLD_FLAG_DECRYPT : 0;
 
 	err = dsl_pool_hold(name, tag, &dp);
 	if (err != 0)
 		return (err);
-	err = dsl_dataset_hold(dp, name, tag, &ds);
+	err = dsl_dataset_hold_flags(dp, name, flags, tag, &ds);
 	if (err != 0) {
 		dsl_pool_rele(dp, tag);
 		return (err);
@@ -664,23 +678,46 @@ dmu_objset_hold(const char *name, void *tag, objset_t **osp)
 	return (err);
 }
 
+int
+dmu_objset_hold(const char *name, void *tag, objset_t **osp)
+{
+	return (dmu_objset_hold_flags(name, B_FALSE, tag, osp));
+}
+
+/* ARGSUSED */
 static int
 dmu_objset_own_impl(dsl_dataset_t *ds, dmu_objset_type_t type,
-    boolean_t readonly, void *tag, objset_t **osp)
+    boolean_t readonly, boolean_t decrypt, void *tag, objset_t **osp)
 {
 	int err;
 
 	err = dmu_objset_from_ds(ds, osp);
 	if (err != 0) {
-		dsl_dataset_disown(ds, tag);
+		return (err);
 	} else if (type != DMU_OST_ANY && type != (*osp)->os_phys->os_type) {
-		dsl_dataset_disown(ds, tag);
 		return (SET_ERROR(EINVAL));
 	} else if (!readonly && dsl_dataset_is_snapshot(ds)) {
-		dsl_dataset_disown(ds, tag);
+		return (SET_ERROR(EROFS));
+	} else if (!readonly && decrypt &&
+	    dsl_dir_incompatible_encryption_version(ds->ds_dir)) {
 		return (SET_ERROR(EROFS));
 	}
-	return (err);
+
+	/* if we are decrypting, we can now check MACs in os->os_phys_buf */
+	if (decrypt && arc_is_unauthenticated((*osp)->os_phys_buf)) {
+		zbookmark_phys_t zb;
+
+		SET_BOOKMARK(&zb, ds->ds_object, ZB_ROOT_OBJECT,
+		    ZB_ROOT_LEVEL, ZB_ROOT_BLKID);
+		err = arc_untransform((*osp)->os_phys_buf, (*osp)->os_spa,
+		    &zb, B_FALSE);
+		if (err != 0)
+			return (err);
+
+		ASSERT0(arc_is_unauthenticated((*osp)->os_phys_buf));
+	}
+
+	return (0);
 }
 
 /*
@@ -690,48 +727,70 @@ dmu_objset_own_impl(dsl_dataset_t *ds, dmu_objset_type_t type,
  */
 int
 dmu_objset_own(const char *name, dmu_objset_type_t type,
-    boolean_t readonly, void *tag, objset_t **osp)
+    boolean_t readonly, boolean_t decrypt, void *tag, objset_t **osp)
 {
 	dsl_pool_t *dp;
 	dsl_dataset_t *ds;
 	int err;
+	ds_hold_flags_t flags = (decrypt) ? DS_HOLD_FLAG_DECRYPT : 0;
 
 	err = dsl_pool_hold(name, FTAG, &dp);
 	if (err != 0)
 		return (err);
-	err = dsl_dataset_own(dp, name, tag, &ds);
+	err = dsl_dataset_own(dp, name, flags, tag, &ds);
 	if (err != 0) {
 		dsl_pool_rele(dp, FTAG);
 		return (err);
 	}
-	err = dmu_objset_own_impl(ds, type, readonly, tag, osp);
+	err = dmu_objset_own_impl(ds, type, readonly, decrypt, tag, osp);
+	if (err != 0) {
+		dsl_dataset_disown(ds, flags, tag);
+		dsl_pool_rele(dp, FTAG);
+		return (err);
+	}
+
 	dsl_pool_rele(dp, FTAG);
 
-	return (err);
+	return (0);
 }
 
 int
 dmu_objset_own_obj(dsl_pool_t *dp, uint64_t obj, dmu_objset_type_t type,
-    boolean_t readonly, void *tag, objset_t **osp)
+    boolean_t readonly, boolean_t decrypt, void *tag, objset_t **osp)
 {
 	dsl_dataset_t *ds;
 	int err;
+	ds_hold_flags_t flags = (decrypt) ? DS_HOLD_FLAG_DECRYPT : 0;
 
-	err = dsl_dataset_own_obj(dp, obj, tag, &ds);
+	err = dsl_dataset_own_obj(dp, obj, flags, tag, &ds);
 	if (err != 0)
 		return (err);
 
-	return (dmu_objset_own_impl(ds, type, readonly, tag, osp));
+	err = dmu_objset_own_impl(ds, type, readonly, decrypt, tag, osp);
+	if (err != 0) {
+		dsl_dataset_disown(ds, flags, tag);
+		return (err);
+	}
+
+	return (0);
 }
 
 void
-dmu_objset_rele(objset_t *os, void *tag)
+dmu_objset_rele_flags(objset_t *os, boolean_t decrypt, void *tag)
 {
+	ds_hold_flags_t flags = (decrypt) ? DS_HOLD_FLAG_DECRYPT : 0;
+
 	dsl_pool_t *dp = dmu_objset_pool(os);
-	dsl_dataset_rele(os->os_dsl_dataset, tag);
+	dsl_dataset_rele_flags(os->os_dsl_dataset, flags, tag);
 	dsl_pool_rele(dp, tag);
 }
 
+void
+dmu_objset_rele(objset_t *os, void *tag)
+{
+	dmu_objset_rele_flags(os, B_FALSE, tag);
+}
+
 /*
  * When we are called, os MUST refer to an objset associated with a dataset
  * that is owned by 'tag'; that is, is held and long held by 'tag' and ds_owner
@@ -745,7 +804,7 @@ dmu_objset_rele(objset_t *os, void *tag)
  */
 void
 dmu_objset_refresh_ownership(dsl_dataset_t *ds, dsl_dataset_t **newds,
-    void *tag)
+    boolean_t decrypt, void *tag)
 {
 	dsl_pool_t *dp;
 	char name[ZFS_MAX_DATASET_NAME_LEN];
@@ -757,15 +816,18 @@ dmu_objset_refresh_ownership(dsl_dataset_t *ds, dsl_dataset_t **newds,
 	dsl_dataset_name(ds, name);
 	dp = ds->ds_dir->dd_pool;
 	dsl_pool_config_enter(dp, FTAG);
-	dsl_dataset_disown(ds, tag);
-	VERIFY0(dsl_dataset_own(dp, name, tag, newds));
+
+	dsl_dataset_disown(ds, 0, tag);
+	VERIFY0(dsl_dataset_own(dp, name,
+	    (decrypt) ? DS_HOLD_FLAG_DECRYPT : 0, tag, newds));
 	dsl_pool_config_exit(dp, FTAG);
 }
 
 void
-dmu_objset_disown(objset_t *os, void *tag)
+dmu_objset_disown(objset_t *os, boolean_t decrypt, void *tag)
 {
-	dsl_dataset_disown(os->os_dsl_dataset, tag);
+	dsl_dataset_disown(os->os_dsl_dataset,
+	    (decrypt) ? DS_HOLD_FLAG_DECRYPT : 0, tag);
 }
 
 void
@@ -842,6 +904,8 @@ dmu_objset_evict(objset_t *os)
 	} else {
 		mutex_exit(&os->os_lock);
 	}
+
+
 }
 
 void
@@ -887,16 +951,21 @@ dmu_objset_snap_cmtime(objset_t *os)
 	return (dsl_dir_snap_cmtime(os->os_dsl_dataset->ds_dir));
 }
 
-/* called from dsl for meta-objset */
+/* ARGSUSED */
 objset_t *
-dmu_objset_create_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
-    dmu_objset_type_t type, dmu_tx_t *tx)
+dmu_objset_create_impl_dnstats(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
+    dmu_objset_type_t type, int levels, int blksz, int ibs, dmu_tx_t *tx)
 {
 	objset_t *os;
 	dnode_t *mdn;
 
 	ASSERT(dmu_tx_is_syncing(tx));
 
+	if (blksz == 0)
+		blksz = 1 << DNODE_BLOCK_SHIFT;
+	if (ibs == 0)
+		ibs = DN_MAX_INDBLKSHIFT;
+
 	if (ds != NULL)
 		VERIFY0(dmu_objset_from_ds(ds, &os));
 	else
@@ -919,22 +988,25 @@ dmu_objset_create_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
 	 * to convergence, so minimizing its dn_nlevels matters.
 	 */
 	if (ds != NULL) {
-		int levels = 1;
-
-		/*
-		 * Determine the number of levels necessary for the meta-dnode
-		 * to contain DN_MAX_OBJECT dnodes.  Note that in order to
-		 * ensure that we do not overflow 64 bits, there has to be
-		 * a nlevels that gives us a number of blocks > DN_MAX_OBJECT
-		 * but < 2^64.  Therefore,
-		 * (mdn->dn_indblkshift - SPA_BLKPTRSHIFT) (10) must be
-		 * less than (64 - log2(DN_MAX_OBJECT)) (16).
-		 */
-		while ((uint64_t)mdn->dn_nblkptr <<
-		    (mdn->dn_datablkshift - DNODE_SHIFT +
-		    (levels - 1) * (mdn->dn_indblkshift - SPA_BLKPTRSHIFT)) <
-		    DN_MAX_OBJECT)
-			levels++;
+		if (levels == 0) {
+			levels = 1;
+
+			/*
+			 * Determine the number of levels necessary for the
+			 * meta-dnode to contain DN_MAX_OBJECT dnodes.  Note
+			 * that in order to ensure that we do not overflow
+			 * 64 bits, there has to be a nlevels that gives us a
+			 * number of blocks > DN_MAX_OBJECT but < 2^64.
+			 * Therefore, (mdn->dn_indblkshift - SPA_BLKPTRSHIFT)
+			 * (10) must be less than (64 - log2(DN_MAX_OBJECT))
+			 * (16).
+			 */
+			while ((uint64_t)mdn->dn_nblkptr <<
+			    (mdn->dn_datablkshift - DNODE_SHIFT + (levels - 1) *
+			    (mdn->dn_indblkshift - SPA_BLKPTRSHIFT)) <
+			    DN_MAX_OBJECT)
+				levels++;
+		}
 
 		mdn->dn_next_nlevels[tx->tx_txg & TXG_MASK] =
 		    mdn->dn_nlevels = levels;
@@ -944,7 +1016,13 @@ dmu_objset_create_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
 	ASSERT(type != DMU_OST_ANY);
 	ASSERT(type < DMU_OST_NUMTYPES);
 	os->os_phys->os_type = type;
-	if (dmu_objset_userused_enabled(os)) {
+
+	/*
+	 * Enable user accounting if it is enabled and this is not an
+	 * encrypted receive.
+	 */
+	if (dmu_objset_userused_enabled(os) &&
+	    (!os->os_encrypted || !dmu_objset_is_receiving(os))) {
 		os->os_phys->os_flags |= OBJSET_FLAG_USERACCOUNTING_COMPLETE;
 		os->os_flags = os->os_phys->os_flags;
 	}
@@ -954,6 +1032,14 @@ dmu_objset_create_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
 	return (os);
 }
 
+/* called from dsl for meta-objset */
+objset_t *
+dmu_objset_create_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
+    dmu_objset_type_t type, dmu_tx_t *tx)
+{
+	return (dmu_objset_create_impl_dnstats(spa, ds, bp, type, 0, 0, 0, tx));
+}
+
 typedef struct dmu_objset_create_arg {
 	const char *doca_name;
 	cred_t *doca_cred;
@@ -962,6 +1048,7 @@ typedef struct dmu_objset_create_arg {
 	void *doca_userarg;
 	dmu_objset_type_t doca_type;
 	uint64_t doca_flags;
+	dsl_crypto_params_t *doca_dcp;
 } dmu_objset_create_arg_t;
 
 /*ARGSUSED*/
@@ -990,8 +1077,16 @@ dmu_objset_create_check(void *arg, dmu_tx_t *tx)
 		dsl_dir_rele(pdd, FTAG);
 		return (SET_ERROR(EEXIST));
 	}
+
+	error = dmu_objset_create_crypt_check(pdd, doca->doca_dcp, NULL);
+	if (error != 0) {
+		dsl_dir_rele(pdd, FTAG);
+		return (error);
+	}
+
 	error = dsl_fs_ss_limit_check(pdd, 1, ZFS_PROP_FILESYSTEM_LIMIT, NULL,
 	    doca->doca_cred);
+
 	dsl_dir_rele(pdd, FTAG);
 
 	return (error);
@@ -1002,23 +1097,25 @@ dmu_objset_create_sync(void *arg, dmu_tx_t *tx)
 {
 	dmu_objset_create_arg_t *doca = arg;
 	dsl_pool_t *dp = dmu_tx_pool(tx);
+	spa_t *spa = dp->dp_spa;
 	dsl_dir_t *pdd;
 	const char *tail;
 	dsl_dataset_t *ds;
 	uint64_t obj;
 	blkptr_t *bp;
 	objset_t *os;
+	zio_t *rzio;
 
 	VERIFY0(dsl_dir_hold(dp, doca->doca_name, FTAG, &pdd, &tail));
 
 	obj = dsl_dataset_create_sync(pdd, tail, NULL, doca->doca_flags,
-	    doca->doca_cred, tx);
+	    doca->doca_cred, doca->doca_dcp, tx);
 
-	VERIFY0(dsl_dataset_hold_obj(pdd->dd_pool, obj, FTAG, &ds));
+	VERIFY0(dsl_dataset_hold_obj_flags(pdd->dd_pool, obj,
+	    DS_HOLD_FLAG_DECRYPT, FTAG, &ds));
 	rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG);
 	bp = dsl_dataset_get_blkptr(ds);
-	os = dmu_objset_create_impl(pdd->dd_pool->dp_spa,
-	    ds, bp, doca->doca_type, tx);
+	os = dmu_objset_create_impl(spa, ds, bp, doca->doca_type, tx);
 	rrw_exit(&ds->ds_bp_rwlock, FTAG);
 
 	if (doca->doca_userfunc != NULL) {
@@ -1026,16 +1123,68 @@ dmu_objset_create_sync(void *arg, dmu_tx_t *tx)
 		    doca->doca_cred, tx);
 	}
 
+	/*
+	 * The doca_userfunc() may write out some data that needs to be
+	 * encrypted if the dataset is encrypted (specifically the root
+	 * directory).  This data must be written out before the encryption
+	 * key mapping is removed by dsl_dataset_rele_flags().  Force the
+	 * I/O to occur immediately by invoking the relevant sections of
+	 * dsl_pool_sync().
+	 */
+	if (os->os_encrypted) {
+		dsl_dataset_t *tmpds = NULL;
+		boolean_t need_sync_done = B_FALSE;
+
+		mutex_enter(&ds->ds_lock);
+		ds->ds_owner = FTAG;
+		mutex_exit(&ds->ds_lock);
+
+		rzio = zio_root(spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED);
+		tmpds = txg_list_remove_this(&dp->dp_dirty_datasets, ds,
+		    tx->tx_txg);
+		if (tmpds != NULL) {
+			dsl_dataset_sync(ds, rzio, tx);
+			need_sync_done = B_TRUE;
+		}
+		VERIFY0(zio_wait(rzio));
+		dmu_objset_do_userquota_updates(os, tx);
+		taskq_wait(dp->dp_sync_taskq);
+		if (txg_list_member(&dp->dp_dirty_datasets, ds, tx->tx_txg)) {
+			ASSERT3P(ds->ds_key_mapping, !=, NULL);
+			key_mapping_rele(spa, ds->ds_key_mapping, ds);
+		}
+
+		rzio = zio_root(spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED);
+		tmpds = txg_list_remove_this(&dp->dp_dirty_datasets, ds,
+		    tx->tx_txg);
+		if (tmpds != NULL) {
+			dmu_buf_rele(ds->ds_dbuf, ds);
+			dsl_dataset_sync(ds, rzio, tx);
+		}
+		VERIFY0(zio_wait(rzio));
+
+		if (need_sync_done) {
+			ASSERT3P(ds->ds_key_mapping, !=, NULL);
+			key_mapping_rele(spa, ds->ds_key_mapping, ds);
+			dsl_dataset_sync_done(ds, tx);
+		}
+
+		mutex_enter(&ds->ds_lock);
+		ds->ds_owner = NULL;
+		mutex_exit(&ds->ds_lock);
+	}
+
 	spa_history_log_internal_ds(ds, "create", tx, "");
-	dsl_dataset_rele(ds, FTAG);
+	dsl_dataset_rele_flags(ds, DS_HOLD_FLAG_DECRYPT, FTAG);
 	dsl_dir_rele(pdd, FTAG);
 }
 
 int
 dmu_objset_create(const char *name, dmu_objset_type_t type, uint64_t flags,
-    void (*func)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx), void *arg)
+    dsl_crypto_params_t *dcp, dmu_objset_create_sync_func_t func, void *arg)
 {
 	dmu_objset_create_arg_t doca;
+	dsl_crypto_params_t tmp_dcp = { 0 };
 
 	doca.doca_name = name;
 	doca.doca_cred = CRED();
@@ -1044,9 +1193,19 @@ dmu_objset_create(const char *name, dmu_objset_type_t type, uint64_t flags,
 	doca.doca_userarg = arg;
 	doca.doca_type = type;
 
+	/*
+	 * Some callers (mostly for testing) do not provide a dcp on their
+	 * own but various code inside the sync task will require it to be
+	 * allocated. Rather than adding NULL checks throughout this code
+	 * or adding dummy dcp's to all of the callers we simply create a
+	 * dummy one here and use that. This zero dcp will have the same
+	 * effect as asking for inheritence of all encryption params.
+	 */
+	doca.doca_dcp = (dcp != NULL) ? dcp : &tmp_dcp;
+
 	return (dsl_sync_task(name,
 	    dmu_objset_create_check, dmu_objset_create_sync, &doca,
-	    5, ZFS_SPACE_CHECK_NORMAL));
+	    6, ZFS_SPACE_CHECK_NORMAL));
 }
 
 typedef struct dmu_objset_clone_arg {
@@ -1086,18 +1245,29 @@ dmu_objset_clone_check(void *arg, dmu_tx_t *tx)
 		dsl_dir_rele(pdd, FTAG);
 		return (SET_ERROR(EDQUOT));
 	}
-	dsl_dir_rele(pdd, FTAG);
 
 	error = dsl_dataset_hold(dp, doca->doca_origin, FTAG, &origin);
-	if (error != 0)
+	if (error != 0) {
+		dsl_dir_rele(pdd, FTAG);
 		return (error);
+	}
 
 	/* You can only clone snapshots, not the head datasets. */
 	if (!origin->ds_is_snapshot) {
 		dsl_dataset_rele(origin, FTAG);
+		dsl_dir_rele(pdd, FTAG);
 		return (SET_ERROR(EINVAL));
 	}
+
+	error = dmu_objset_clone_crypt_check(pdd, origin->ds_dir);
+	if (error != 0) {
+		dsl_dataset_rele(origin, FTAG);
+		dsl_dir_rele(pdd, FTAG);
+		return (error);
+	}
+
 	dsl_dataset_rele(origin, FTAG);
+	dsl_dir_rele(pdd, FTAG);
 
 	return (0);
 }
@@ -1117,7 +1287,7 @@ dmu_objset_clone_sync(void *arg, dmu_tx_t *tx)
 	VERIFY0(dsl_dataset_hold(dp, doca->doca_origin, FTAG, &origin));
 
 	obj = dsl_dataset_create_sync(pdd, tail, origin, 0,
-	    doca->doca_cred, tx);
+	    doca->doca_cred, NULL, tx);
 
 	VERIFY0(dsl_dataset_hold_obj(pdd->dd_pool, obj, FTAG, &ds));
 	dsl_dataset_name(origin, namebuf);
@@ -1139,7 +1309,7 @@ dmu_objset_clone(const char *clone, const char *origin)
 
 	return (dsl_sync_task(clone,
 	    dmu_objset_clone_check, dmu_objset_clone_sync, &doca,
-	    5, ZFS_SPACE_CHECK_NORMAL));
+	    6, ZFS_SPACE_CHECK_NORMAL));
 }
 
 static int
@@ -1299,10 +1469,10 @@ dmu_objset_write_ready(zio_t *zio, arc_buf_t *abuf, void *arg)
 	blkptr_t *bp = zio->io_bp;
 	objset_t *os = arg;
 	dnode_phys_t *dnp = &os->os_phys->os_meta_dnode;
+	uint64_t fill = 0;
 
 	ASSERT(!BP_IS_EMBEDDED(bp));
 	ASSERT3U(BP_GET_TYPE(bp), ==, DMU_OT_OBJSET);
-	ASSERT0(BP_GET_LEVEL(bp));
 
 	/*
 	 * Update rootbp fill count: it should be the number of objects
@@ -1310,9 +1480,11 @@ dmu_objset_write_ready(zio_t *zio, arc_buf_t *abuf, void *arg)
 	 * objects that are stored in the objset_phys_t -- the meta
 	 * dnode and user/group accounting objects).
 	 */
-	bp->blk_fill = 0;
 	for (int i = 0; i < dnp->dn_nblkptr; i++)
-		bp->blk_fill += BP_GET_FILL(&dnp->dn_blkptr[i]);
+		fill += BP_GET_FILL(&dnp->dn_blkptr[i]);
+
+	BP_SET_FILL(bp, fill);
+
 	if (os->os_dsl_dataset != NULL)
 		rrw_enter(&os->os_dsl_dataset->ds_bp_rwlock, RW_WRITER, FTAG);
 	*os->os_rootbp = *bp;
@@ -1401,6 +1573,19 @@ dmu_objset_sync(objset_t *os, zio_t *pio, dmu_tx_t *tx)
 
 	dmu_write_policy(os, NULL, 0, 0, &zp);
 
+	/*
+	 * If we are either claiming the ZIL or doing a raw receive, write
+	 * out the os_phys_buf raw. Neither of these actions will effect the
+	 * MAC at this point.
+	 */
+	if (os->os_raw_receive ||
+	    os->os_next_write_raw[tx->tx_txg & TXG_MASK]) {
+		ASSERT(os->os_encrypted);
+		arc_convert_to_raw(os->os_phys_buf,
+		    os->os_dsl_dataset->ds_object, ZFS_HOST_BYTEORDER,
+		    DMU_OT_OBJSET, NULL, NULL, NULL);
+	}
+
 	zio = arc_write(pio, os->os_spa, tx->tx_txg,
 	    blkptr_copy, os->os_phys_buf, DMU_OS_IS_L2CACHEABLE(os),
 	    &zp, dmu_objset_write_ready, NULL, NULL, dmu_objset_write_done,
@@ -1424,7 +1609,8 @@ dmu_objset_sync(objset_t *os, zio_t *pio, dmu_tx_t *tx)
 
 	txgoff = tx->tx_txg & TXG_MASK;
 
-	if (dmu_objset_userused_enabled(os)) {
+	if (dmu_objset_userused_enabled(os) &&
+	    (!os->os_encrypted || !dmu_objset_is_receiving(os))) {
 		/*
 		 * We must create the list here because it uses the
 		 * dn_dirty_link[] of this txg.  But it may already
@@ -1663,6 +1849,10 @@ dmu_objset_do_userquota_updates(objset_t *os, dmu_tx_t *tx)
 	if (!dmu_objset_userused_enabled(os))
 		return;
 
+	/* if this is a raw receive just return and handle accounting later */
+	if (os->os_encrypted && dmu_objset_is_receiving(os))
+		return;
+
 	/* Allocate the user/groupused objects if necessary. */
 	if (DMU_USERUSED_DNODE(os)->dn_type == DMU_OT_NONE) {
 		VERIFY0(zap_create_claim(os,
@@ -1742,6 +1932,18 @@ dmu_objset_userquota_get_ids(dnode_t *dn, boolean_t before, dmu_tx_t *tx)
 	if (!dmu_objset_userused_enabled(dn->dn_objset))
 		return;
 
+	/*
+	 * Raw receives introduce a problem with user accounting. Raw
+	 * receives cannot update the user accounting info because the
+	 * user ids and the sizes are encrypted. To guarantee that we
+	 * never end up with bad user accounting, we simply disable it
+	 * during raw receives. We also disable this for normal receives
+	 * so that an incremental raw receive may be done on top of an
+	 * existing non-raw receive.
+	 */
+	if (os->os_encrypted && dmu_objset_is_receiving(os))
+		return;
+
 	if (before && (flags & (DN_ID_CHKED_BONUS|DN_ID_OLD_EXIST|
 	    DN_ID_CHKED_SPILL)))
 		return;
@@ -2394,6 +2596,13 @@ dmu_objset_find(char *name, int func(const char *, void *), void *arg,
 	return (error);
 }
 
+boolean_t
+dmu_objset_incompatible_encryption_version(objset_t *os)
+{
+	return (dsl_dir_incompatible_encryption_version(
+	    os->os_dsl_dataset->ds_dir));
+}
+
 void
 dmu_objset_set_user(objset_t *os, void *user_ptr)
 {
diff --git a/usr/src/uts/common/fs/zfs/dmu_recv.c b/usr/src/uts/common/fs/zfs/dmu_recv.c
index 542bb42f3f..b6f63e7e22 100644
--- a/usr/src/uts/common/fs/zfs/dmu_recv.c
+++ b/usr/src/uts/common/fs/zfs/dmu_recv.c
@@ -67,7 +67,7 @@ typedef struct dmu_recv_begin_arg {
 	const char *drba_origin;
 	dmu_recv_cookie_t *drba_cookie;
 	cred_t *drba_cred;
-	uint64_t drba_snapobj;
+	dsl_crypto_params_t *drba_dcp;
 } dmu_recv_begin_arg_t;
 
 static int
@@ -77,6 +77,11 @@ recv_begin_check_existing_impl(dmu_recv_begin_arg_t *drba, dsl_dataset_t *ds,
 	uint64_t val;
 	int error;
 	dsl_pool_t *dp = ds->ds_dir->dd_pool;
+	struct drr_begin *drrb = drba->drba_cookie->drc_drrb;
+	uint64_t featureflags = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo);
+	boolean_t encrypted = ds->ds_dir->dd_crypto_obj != 0;
+	boolean_t raw = (featureflags & DMU_BACKUP_FEATURE_RAW) != 0;
+	boolean_t embed = (featureflags & DMU_BACKUP_FEATURE_EMBED_DATA) != 0;
 
 	/* temporary clone name must not exist */
 	error = zap_lookup(dp->dp_meta_objset,
@@ -110,6 +115,14 @@ recv_begin_check_existing_impl(dmu_recv_begin_arg_t *drba, dsl_dataset_t *ds,
 		dsl_dataset_t *snap;
 		uint64_t obj = dsl_dataset_phys(ds)->ds_prev_snap_obj;
 
+		/* Can't raw receive on top of an unencrypted dataset */
+		if (!encrypted && raw)
+			return (SET_ERROR(EINVAL));
+
+		/* Encryption is incompatible with embedded data */
+		if (encrypted && embed)
+			return (SET_ERROR(EINVAL));
+
 		/* Find snapshot in this dir that matches fromguid. */
 		while (obj != 0) {
 			error = dsl_dataset_hold_obj(dp, obj, FTAG,
@@ -129,7 +142,7 @@ recv_begin_check_existing_impl(dmu_recv_begin_arg_t *drba, dsl_dataset_t *ds,
 			return (SET_ERROR(ENODEV));
 
 		if (drba->drba_cookie->drc_force) {
-			drba->drba_snapobj = obj;
+			drba->drba_cookie->drc_fromsnapobj = obj;
 		} else {
 			/*
 			 * If we are not forcing, there must be no
@@ -139,7 +152,8 @@ recv_begin_check_existing_impl(dmu_recv_begin_arg_t *drba, dsl_dataset_t *ds,
 				dsl_dataset_rele(snap, FTAG);
 				return (SET_ERROR(ETXTBSY));
 			}
-			drba->drba_snapobj = ds->ds_prev->ds_object;
+			drba->drba_cookie->drc_fromsnapobj =
+			    ds->ds_prev->ds_object;
 		}
 
 		dsl_dataset_rele(snap, FTAG);
@@ -147,9 +161,34 @@ recv_begin_check_existing_impl(dmu_recv_begin_arg_t *drba, dsl_dataset_t *ds,
 		/* if full, then must be forced */
 		if (!drba->drba_cookie->drc_force)
 			return (SET_ERROR(EEXIST));
-		/* start from $ORIGIN@$ORIGIN, if supported */
-		drba->drba_snapobj = dp->dp_origin_snap != NULL ?
-		    dp->dp_origin_snap->ds_object : 0;
+
+		/*
+		 * We don't support using zfs recv -F to blow away
+		 * encrypted filesystems. This would require the
+		 * dsl dir to point to the old encryption key and
+		 * the new one at the same time during the receive.
+		 */
+		if ((!encrypted && raw) || encrypted)
+			return (SET_ERROR(EINVAL));
+
+		/*
+		 * Perform the same encryption checks we would if
+		 * we were creating a new dataset from scratch.
+		 */
+		if (!raw) {
+			boolean_t will_encrypt;
+
+			error = dmu_objset_create_crypt_check(
+			    ds->ds_dir->dd_parent, drba->drba_dcp,
+			    &will_encrypt);
+			if (error != 0)
+				return (error);
+
+			if (will_encrypt && embed)
+				return (SET_ERROR(EINVAL));
+		}
+
+		drba->drba_cookie->drc_fromsnapobj = 0;
 	}
 
 	return (0);
@@ -164,6 +203,7 @@ dmu_recv_begin_check(void *arg, dmu_tx_t *tx)
 	struct drr_begin *drrb = drba->drba_cookie->drc_drrb;
 	uint64_t fromguid = drrb->drr_fromguid;
 	int flags = drrb->drr_flags;
+	ds_hold_flags_t dsflags = 0;
 	int error;
 	uint64_t featureflags = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo);
 	dsl_dataset_t *ds;
@@ -214,18 +254,34 @@ dmu_recv_begin_check(void *arg, dmu_tx_t *tx)
 	    !spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_LARGE_DNODE))
 		return (SET_ERROR(ENOTSUP));
 
-	error = dsl_dataset_hold(dp, tofs, FTAG, &ds);
+	if ((featureflags & DMU_BACKUP_FEATURE_RAW)) {
+		/* raw receives require the encryption feature */
+		if (!spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_ENCRYPTION))
+			return (SET_ERROR(ENOTSUP));
+
+		/* embedded data is incompatible with encryption and raw recv */
+		if (featureflags & DMU_BACKUP_FEATURE_EMBED_DATA)
+			return (SET_ERROR(EINVAL));
+
+		/* raw receives require spill block allocation flag */
+		if (!(flags & DRR_FLAG_SPILL_BLOCK))
+			return (SET_ERROR(ZFS_ERR_SPILL_BLOCK_FLAG_MISSING));
+	} else {
+		dsflags |= DS_HOLD_FLAG_DECRYPT;
+	}
+
+	error = dsl_dataset_hold_flags(dp, tofs, dsflags, FTAG, &ds);
 	if (error == 0) {
 		/* target fs already exists; recv into temp clone */
 
 		/* Can't recv a clone into an existing fs */
 		if (flags & DRR_FLAG_CLONE || drba->drba_origin) {
-			dsl_dataset_rele(ds, FTAG);
+			dsl_dataset_rele_flags(ds, dsflags, FTAG);
 			return (SET_ERROR(EINVAL));
 		}
 
 		error = recv_begin_check_existing_impl(drba, ds, fromguid);
-		dsl_dataset_rele(ds, FTAG);
+		dsl_dataset_rele_flags(ds, dsflags, FTAG);
 	} else if (error == ENOENT) {
 		/* target fs does not exist; must be a full backup or clone */
 		char buf[ZFS_MAX_DATASET_NAME_LEN];
@@ -250,10 +306,35 @@ dmu_recv_begin_check(void *arg, dmu_tx_t *tx)
 		/* Open the parent of tofs */
 		ASSERT3U(strlen(tofs), <, sizeof (buf));
 		(void) strlcpy(buf, tofs, strrchr(tofs, '/') - tofs + 1);
-		error = dsl_dataset_hold(dp, buf, FTAG, &ds);
+		error = dsl_dataset_hold_flags(dp, buf, dsflags, FTAG, &ds);
 		if (error != 0)
 			return (error);
 
+		if ((featureflags & DMU_BACKUP_FEATURE_RAW) == 0 &&
+		    drba->drba_origin == NULL) {
+			boolean_t will_encrypt;
+
+			/*
+			 * Check that we aren't breaking any encryption rules
+			 * and that we have all the parameters we need to
+			 * create an encrypted dataset if necessary. If we are
+			 * making an encrypted dataset the stream can't have
+			 * embedded data.
+			 */
+			error = dmu_objset_create_crypt_check(ds->ds_dir,
+			    drba->drba_dcp, &will_encrypt);
+			if (error != 0) {
+				dsl_dataset_rele_flags(ds, dsflags, FTAG);
+				return (error);
+			}
+
+			if (will_encrypt &&
+			    (featureflags & DMU_BACKUP_FEATURE_EMBED_DATA)) {
+				dsl_dataset_rele_flags(ds, dsflags, FTAG);
+				return (SET_ERROR(EINVAL));
+			}
+		}
+
 		/*
 		 * Check filesystem and snapshot limits before receiving. We'll
 		 * recheck snapshot limits again at the end (we create the
@@ -262,39 +343,46 @@ dmu_recv_begin_check(void *arg, dmu_tx_t *tx)
 		error = dsl_fs_ss_limit_check(ds->ds_dir, 1,
 		    ZFS_PROP_FILESYSTEM_LIMIT, NULL, drba->drba_cred);
 		if (error != 0) {
-			dsl_dataset_rele(ds, FTAG);
+			dsl_dataset_rele_flags(ds, dsflags, FTAG);
 			return (error);
 		}
 
 		error = dsl_fs_ss_limit_check(ds->ds_dir, 1,
 		    ZFS_PROP_SNAPSHOT_LIMIT, NULL, drba->drba_cred);
 		if (error != 0) {
-			dsl_dataset_rele(ds, FTAG);
+			dsl_dataset_rele_flags(ds, dsflags, FTAG);
 			return (error);
 		}
 
 		if (drba->drba_origin != NULL) {
 			dsl_dataset_t *origin;
-			error = dsl_dataset_hold(dp, drba->drba_origin,
-			    FTAG, &origin);
+
+			error = dsl_dataset_hold_flags(dp, drba->drba_origin,
+			    dsflags, FTAG, &origin);
 			if (error != 0) {
-				dsl_dataset_rele(ds, FTAG);
+				dsl_dataset_rele_flags(ds, dsflags, FTAG);
 				return (error);
 			}
 			if (!origin->ds_is_snapshot) {
-				dsl_dataset_rele(origin, FTAG);
-				dsl_dataset_rele(ds, FTAG);
+				dsl_dataset_rele_flags(origin, dsflags, FTAG);
+				dsl_dataset_rele_flags(ds, dsflags, FTAG);
 				return (SET_ERROR(EINVAL));
 			}
 			if (dsl_dataset_phys(origin)->ds_guid != fromguid &&
 			    fromguid != 0) {
-				dsl_dataset_rele(origin, FTAG);
-				dsl_dataset_rele(ds, FTAG);
+				dsl_dataset_rele_flags(origin, dsflags, FTAG);
+				dsl_dataset_rele_flags(ds, dsflags, FTAG);
 				return (SET_ERROR(ENODEV));
 			}
-			dsl_dataset_rele(origin, FTAG);
+			if (origin->ds_dir->dd_crypto_obj != 0 &&
+			    (featureflags & DMU_BACKUP_FEATURE_EMBED_DATA)) {
+				dsl_dataset_rele_flags(origin, dsflags, FTAG);
+				dsl_dataset_rele_flags(ds, dsflags, FTAG);
+				return (SET_ERROR(EINVAL));
+			}
+			dsl_dataset_rele_flags(origin, dsflags, FTAG);
 		}
-		dsl_dataset_rele(ds, FTAG);
+		dsl_dataset_rele_flags(ds, dsflags, FTAG);
 		error = 0;
 	}
 	return (error);
@@ -308,27 +396,51 @@ dmu_recv_begin_sync(void *arg, dmu_tx_t *tx)
 	objset_t *mos = dp->dp_meta_objset;
 	struct drr_begin *drrb = drba->drba_cookie->drc_drrb;
 	const char *tofs = drba->drba_cookie->drc_tofs;
+	uint64_t featureflags = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo);
 	dsl_dataset_t *ds, *newds;
+	objset_t *os;
 	uint64_t dsobj;
+	ds_hold_flags_t dsflags = 0;
 	int error;
 	uint64_t crflags = 0;
+	dsl_crypto_params_t dummy_dcp = { 0 };
+	dsl_crypto_params_t *dcp = drba->drba_dcp;
 
 	if (drrb->drr_flags & DRR_FLAG_CI_DATA)
 		crflags |= DS_FLAG_CI_DATASET;
+	if ((featureflags & DMU_BACKUP_FEATURE_RAW) == 0)
+		dsflags |= DS_HOLD_FLAG_DECRYPT;
+
+	/*
+	 * Raw, non-incremental recvs always use a dummy dcp with
+	 * the raw cmd set. Raw incremental recvs do not use a dcp
+	 * since the encryption parameters are already set in stone.
+	 */
+	if (dcp == NULL && drba->drba_cookie->drc_fromsnapobj == 0 &&
+	    drba->drba_origin == NULL) {
+		ASSERT3P(dcp, ==, NULL);
+		dcp = &dummy_dcp;
 
-	error = dsl_dataset_hold(dp, tofs, FTAG, &ds);
+		if (featureflags & DMU_BACKUP_FEATURE_RAW)
+			dcp->cp_cmd = DCP_CMD_RAW_RECV;
+	}
+
+	error = dsl_dataset_hold_flags(dp, tofs, dsflags, FTAG, &ds);
 	if (error == 0) {
 		/* create temporary clone */
 		dsl_dataset_t *snap = NULL;
-		if (drba->drba_snapobj != 0) {
+
+		if (drba->drba_cookie->drc_fromsnapobj != 0) {
 			VERIFY0(dsl_dataset_hold_obj(dp,
-			    drba->drba_snapobj, FTAG, &snap));
+			    drba->drba_cookie->drc_fromsnapobj, FTAG, &snap));
+			ASSERT3P(dcp, ==, NULL);
 		}
+
 		dsobj = dsl_dataset_create_sync(ds->ds_dir, recv_clone_name,
-		    snap, crflags, drba->drba_cred, tx);
-		if (drba->drba_snapobj != 0)
+		    snap, crflags, drba->drba_cred, dcp, tx);
+		if (drba->drba_cookie->drc_fromsnapobj != 0)
 			dsl_dataset_rele(snap, FTAG);
-		dsl_dataset_rele(ds, FTAG);
+		dsl_dataset_rele_flags(ds, dsflags, FTAG);
 	} else {
 		dsl_dir_t *dd;
 		const char *tail;
@@ -339,18 +451,20 @@ dmu_recv_begin_sync(void *arg, dmu_tx_t *tx)
 		if (drba->drba_origin != NULL) {
 			VERIFY0(dsl_dataset_hold(dp, drba->drba_origin,
 			    FTAG, &origin));
+			ASSERT3P(dcp, ==, NULL);
 		}
 
 		/* Create new dataset. */
-		dsobj = dsl_dataset_create_sync(dd,
-		    strrchr(tofs, '/') + 1,
-		    origin, crflags, drba->drba_cred, tx);
+		dsobj = dsl_dataset_create_sync(dd, strrchr(tofs, '/') + 1,
+		    origin, crflags, drba->drba_cred, dcp, tx);
 		if (origin != NULL)
 			dsl_dataset_rele(origin, FTAG);
 		dsl_dir_rele(dd, FTAG);
 		drba->drba_cookie->drc_newfs = B_TRUE;
 	}
-	VERIFY0(dsl_dataset_own_obj(dp, dsobj, dmu_recv_tag, &newds));
+
+	VERIFY0(dsl_dataset_own_obj(dp, dsobj, dsflags, dmu_recv_tag, &newds));
+	VERIFY0(dmu_objset_from_ds(newds, &os));
 
 	if (drba->drba_cookie->drc_resumable) {
 		dsl_dataset_zapify(newds, tx);
@@ -370,32 +484,46 @@ dmu_recv_begin_sync(void *arg, dmu_tx_t *tx)
 		    8, 1, &zero, tx));
 		VERIFY0(zap_add(mos, dsobj, DS_FIELD_RESUME_BYTES,
 		    8, 1, &zero, tx));
-		if (DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo) &
-		    DMU_BACKUP_FEATURE_LARGE_BLOCKS) {
+		if (featureflags & DMU_BACKUP_FEATURE_LARGE_BLOCKS) {
 			VERIFY0(zap_add(mos, dsobj, DS_FIELD_RESUME_LARGEBLOCK,
 			    8, 1, &one, tx));
 		}
-		if (DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo) &
-		    DMU_BACKUP_FEATURE_EMBED_DATA) {
+		if (featureflags & DMU_BACKUP_FEATURE_EMBED_DATA) {
 			VERIFY0(zap_add(mos, dsobj, DS_FIELD_RESUME_EMBEDOK,
 			    8, 1, &one, tx));
 		}
-		if (DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo) &
-		    DMU_BACKUP_FEATURE_COMPRESSED) {
+		if (featureflags & DMU_BACKUP_FEATURE_COMPRESSED) {
 			VERIFY0(zap_add(mos, dsobj, DS_FIELD_RESUME_COMPRESSOK,
 			    8, 1, &one, tx));
 		}
+		if (featureflags & DMU_BACKUP_FEATURE_RAW) {
+			VERIFY0(zap_add(mos, dsobj, DS_FIELD_RESUME_RAWOK,
+			    8, 1, &one, tx));
+		}
+	}
+
+	/*
+	 * Usually the os->os_encrypted value is tied to the presence of a
+	 * DSL Crypto Key object in the dd. However, that will not be received
+	 * until dmu_recv_stream(), so we set the value manually for now.
+	 */
+	if (featureflags & DMU_BACKUP_FEATURE_RAW) {
+		os->os_encrypted = B_TRUE;
+		drba->drba_cookie->drc_raw = B_TRUE;
 	}
 
 	dmu_buf_will_dirty(newds->ds_dbuf, tx);
 	dsl_dataset_phys(newds)->ds_flags |= DS_FLAG_INCONSISTENT;
 
 	/*
-	 * If we actually created a non-clone, we need to create the
-	 * objset in our new dataset.
+	 * If we actually created a non-clone, we need to create the objset
+	 * in our new dataset. If this is a raw send we postpone this until
+	 * dmu_recv_stream() so that we can allocate the metadnode with the
+	 * properties from the DRR_BEGIN payload.
 	 */
 	rrw_enter(&newds->ds_bp_rwlock, RW_READER, FTAG);
-	if (BP_IS_HOLE(dsl_dataset_get_blkptr(newds))) {
+	if (BP_IS_HOLE(dsl_dataset_get_blkptr(newds)) &&
+	    (featureflags & DMU_BACKUP_FEATURE_RAW) == 0) {
 		(void) dmu_objset_create_impl(dp->dp_spa,
 		    newds, dsl_dataset_get_blkptr(newds), drrb->drr_type, tx);
 	}
@@ -413,6 +541,7 @@ dmu_recv_resume_begin_check(void *arg, dmu_tx_t *tx)
 	dsl_pool_t *dp = dmu_tx_pool(tx);
 	struct drr_begin *drrb = drba->drba_cookie->drc_drrb;
 	int error;
+	ds_hold_flags_t dsflags = 0;
 	uint64_t featureflags = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo);
 	dsl_dataset_t *ds;
 	const char *tofs = drba->drba_cookie->drc_tofs;
@@ -463,29 +592,37 @@ dmu_recv_resume_begin_check(void *arg, dmu_tx_t *tx)
 	(void) snprintf(recvname, sizeof (recvname), "%s/%s",
 	    tofs, recv_clone_name);
 
-	if (dsl_dataset_hold(dp, recvname, FTAG, &ds) != 0) {
+	if (featureflags & DMU_BACKUP_FEATURE_RAW) {
+		/* raw receives require spill block allocation flag */
+		if (!(drrb->drr_flags & DRR_FLAG_SPILL_BLOCK))
+			return (SET_ERROR(ZFS_ERR_SPILL_BLOCK_FLAG_MISSING));
+	} else {
+		dsflags |= DS_HOLD_FLAG_DECRYPT;
+	}
+
+	if (dsl_dataset_hold_flags(dp, recvname, dsflags, FTAG, &ds) != 0) {
 		/* %recv does not exist; continue in tofs */
-		error = dsl_dataset_hold(dp, tofs, FTAG, &ds);
+		error = dsl_dataset_hold_flags(dp, tofs, dsflags, FTAG, &ds);
 		if (error != 0)
 			return (error);
 	}
 
 	/* check that ds is marked inconsistent */
 	if (!DS_IS_INCONSISTENT(ds)) {
-		dsl_dataset_rele(ds, FTAG);
+		dsl_dataset_rele_flags(ds, dsflags, FTAG);
 		return (SET_ERROR(EINVAL));
 	}
 
 	/* check that there is resuming data, and that the toguid matches */
 	if (!dsl_dataset_is_zapified(ds)) {
-		dsl_dataset_rele(ds, FTAG);
+		dsl_dataset_rele_flags(ds, dsflags, FTAG);
 		return (SET_ERROR(EINVAL));
 	}
 	uint64_t val;
 	error = zap_lookup(dp->dp_meta_objset, ds->ds_object,
 	    DS_FIELD_RESUME_TOGUID, sizeof (val), 1, &val);
 	if (error != 0 || drrb->drr_toguid != val) {
-		dsl_dataset_rele(ds, FTAG);
+		dsl_dataset_rele_flags(ds, dsflags, FTAG);
 		return (SET_ERROR(EINVAL));
 	}
 
@@ -495,13 +632,13 @@ dmu_recv_resume_begin_check(void *arg, dmu_tx_t *tx)
 	 * fails) because it will be marked inconsistent.
 	 */
 	if (dsl_dataset_has_owner(ds)) {
-		dsl_dataset_rele(ds, FTAG);
+		dsl_dataset_rele_flags(ds, dsflags, FTAG);
 		return (SET_ERROR(EBUSY));
 	}
 
 	/* There should not be any snapshots of this fs yet. */
 	if (ds->ds_prev != NULL && ds->ds_prev->ds_dir == ds->ds_dir) {
-		dsl_dataset_rele(ds, FTAG);
+		dsl_dataset_rele_flags(ds, dsflags, FTAG);
 		return (SET_ERROR(EINVAL));
 	}
 
@@ -515,11 +652,11 @@ dmu_recv_resume_begin_check(void *arg, dmu_tx_t *tx)
 	(void) zap_lookup(dp->dp_meta_objset, ds->ds_object,
 	    DS_FIELD_RESUME_FROMGUID, sizeof (val), 1, &val);
 	if (drrb->drr_fromguid != val) {
-		dsl_dataset_rele(ds, FTAG);
+		dsl_dataset_rele_flags(ds, dsflags, FTAG);
 		return (SET_ERROR(EINVAL));
 	}
 
-	dsl_dataset_rele(ds, FTAG);
+	dsl_dataset_rele_flags(ds, dsflags, FTAG);
 	return (0);
 }
 
@@ -529,7 +666,11 @@ dmu_recv_resume_begin_sync(void *arg, dmu_tx_t *tx)
 	dmu_recv_begin_arg_t *drba = arg;
 	dsl_pool_t *dp = dmu_tx_pool(tx);
 	const char *tofs = drba->drba_cookie->drc_tofs;
+	struct drr_begin *drrb = drba->drba_cookie->drc_drrb;
+	uint64_t featureflags = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo);
 	dsl_dataset_t *ds;
+	objset_t *os;
+	ds_hold_flags_t dsflags = 0;
 	uint64_t dsobj;
 	/* 6 extra bytes for /%recv */
 	char recvname[ZFS_MAX_DATASET_NAME_LEN + 6];
@@ -537,9 +678,15 @@ dmu_recv_resume_begin_sync(void *arg, dmu_tx_t *tx)
 	(void) snprintf(recvname, sizeof (recvname), "%s/%s",
 	    tofs, recv_clone_name);
 
-	if (dsl_dataset_hold(dp, recvname, FTAG, &ds) != 0) {
+	if (featureflags & DMU_BACKUP_FEATURE_RAW) {
+		drba->drba_cookie->drc_raw = B_TRUE;
+	} else {
+		dsflags |= DS_HOLD_FLAG_DECRYPT;
+	}
+
+	if (dsl_dataset_hold_flags(dp, recvname, dsflags, FTAG, &ds) != 0) {
 		/* %recv does not exist; continue in tofs */
-		VERIFY0(dsl_dataset_hold(dp, tofs, FTAG, &ds));
+		VERIFY0(dsl_dataset_hold_flags(dp, tofs, dsflags, FTAG, &ds));
 		drba->drba_cookie->drc_newfs = B_TRUE;
 	}
 
@@ -548,15 +695,17 @@ dmu_recv_resume_begin_sync(void *arg, dmu_tx_t *tx)
 	dmu_buf_will_dirty(ds->ds_dbuf, tx);
 	dsl_dataset_phys(ds)->ds_flags &= ~DS_FLAG_INCONSISTENT;
 	dsobj = ds->ds_object;
-	dsl_dataset_rele(ds, FTAG);
+	dsl_dataset_rele_flags(ds, dsflags, FTAG);
 
-	VERIFY0(dsl_dataset_own_obj(dp, dsobj, dmu_recv_tag, &ds));
+	VERIFY0(dsl_dataset_own_obj(dp, dsobj, dsflags, dmu_recv_tag, &ds));
+	VERIFY0(dmu_objset_from_ds(ds, &os));
 
 	dmu_buf_will_dirty(ds->ds_dbuf, tx);
 	dsl_dataset_phys(ds)->ds_flags |= DS_FLAG_INCONSISTENT;
 
 	rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG);
-	ASSERT(!BP_IS_HOLE(dsl_dataset_get_blkptr(ds)));
+	ASSERT(!BP_IS_HOLE(dsl_dataset_get_blkptr(ds)) ||
+	    drba->drba_cookie->drc_raw);
 	rrw_exit(&ds->ds_bp_rwlock, FTAG);
 
 	drba->drba_cookie->drc_ds = ds;
@@ -596,6 +745,9 @@ dmu_recv_begin(char *tofs, char *tosnap, dmu_replay_record_t *drr_begin,
 		return (SET_ERROR(EINVAL));
 	}
 
+	if (drc->drc_drrb->drr_flags & DRR_FLAG_SPILL_BLOCK)
+		drc->drc_spill = B_TRUE;
+
 	drba.drba_origin = origin;
 	drba.drba_cookie = drc;
 	drba.drba_cred = CRED();
@@ -619,7 +771,7 @@ struct receive_record_arg {
 	 * If the record is a write, pointer to the arc_buf_t containing the
 	 * payload.
 	 */
-	arc_buf_t *write_buf;
+	arc_buf_t *arc_buf;
 	int payload_size;
 	uint64_t bytes_read; /* bytes read from stream when record created */
 	boolean_t eos_marker; /* Marks the end of the stream */
@@ -643,10 +795,21 @@ struct receive_writer_arg {
 	/* A map from guid to dataset to help handle dedup'd streams. */
 	avl_tree_t *guid_to_ds_map;
 	boolean_t resumable;
+	boolean_t raw;		/* DMU_BACKUP_FEATURE_RAW set */
+	boolean_t spill;	/* DRR_FLAG_SPILL_BLOCK set */
 	uint64_t last_object;
 	uint64_t last_offset;
 	uint64_t max_object; /* highest object ID referenced in stream */
 	uint64_t bytes_read; /* bytes read when current record created */
+
+	/* Encryption parameters for the last received DRR_OBJECT_RANGE */
+	boolean_t or_crypt_params_present;
+	uint64_t or_firstobj;
+	uint64_t or_numslots;
+	uint8_t or_salt[ZIO_DATA_SALT_LEN];
+	uint8_t or_iv[ZIO_DATA_IV_LEN];
+	uint8_t or_mac[ZIO_DATA_MAC_LEN];
+	boolean_t or_byteorder;
 };
 
 struct objlist {
@@ -679,12 +842,15 @@ struct receive_arg {
 	zio_cksum_t prev_cksum;
 	int err;
 	boolean_t byteswap;
+	boolean_t raw;
+	uint64_t featureflags;
 	/* Sorted list of objects not to issue prefetches for. */
 	struct objlist ignore_objlist;
 };
 
 typedef struct guid_map_entry {
 	uint64_t	guid;
+	boolean_t	raw;
 	dsl_dataset_t	*gme_ds;
 	avl_node_t	avlnode;
 } guid_map_entry_t;
@@ -710,8 +876,14 @@ free_guid_map_onexit(void *arg)
 	guid_map_entry_t *gmep;
 
 	while ((gmep = avl_destroy_nodes(ca, &cookie)) != NULL) {
-		dsl_dataset_long_rele(gmep->gme_ds, gmep);
-		dsl_dataset_rele(gmep->gme_ds, gmep);
+		ds_hold_flags_t dsflags = DS_HOLD_FLAG_DECRYPT;
+
+		if (gmep->raw) {
+			gmep->gme_ds->ds_objset->os_raw_receive = B_FALSE;
+			dsflags &= ~DS_HOLD_FLAG_DECRYPT;
+		}
+
+		dsl_dataset_disown(gmep->gme_ds, dsflags, gmep);
 		kmem_free(gmep, sizeof (guid_map_entry_t));
 	}
 	avl_destroy(ca);
@@ -727,7 +899,8 @@ receive_read(struct receive_arg *ra, int len, void *buf)
 	 * The code doesn't rely on this (lengths being multiples of 8).  See
 	 * comment in dump_bytes.
 	 */
-	ASSERT0(len % 8);
+	ASSERT(len % 8 == 0 ||
+	    (ra->featureflags & DMU_BACKUP_FEATURE_RAW) != 0);
 
 	while (done < len) {
 		ssize_t resid;
@@ -780,7 +953,9 @@ byteswap_record(dmu_replay_record_t *drr)
 		DO32(drr_object.drr_bonustype);
 		DO32(drr_object.drr_blksz);
 		DO32(drr_object.drr_bonuslen);
+		DO32(drr_object.drr_raw_bonuslen);
 		DO64(drr_object.drr_toguid);
+		DO64(drr_object.drr_maxblkid);
 		break;
 	case DRR_FREEOBJECTS:
 		DO64(drr_freeobjects.drr_firstobj);
@@ -827,6 +1002,13 @@ byteswap_record(dmu_replay_record_t *drr)
 		DO64(drr_spill.drr_object);
 		DO64(drr_spill.drr_length);
 		DO64(drr_spill.drr_toguid);
+		DO64(drr_spill.drr_compressed_size);
+		DO32(drr_spill.drr_type);
+		break;
+	case DRR_OBJECT_RANGE:
+		DO64(drr_object_range.drr_firstobj);
+		DO64(drr_object_range.drr_numslots);
+		DO64(drr_object_range.drr_toguid);
 		break;
 	case DRR_END:
 		DO64(drr_end.drr_toguid);
@@ -891,6 +1073,8 @@ save_resume_state(struct receive_writer_arg *rwa,
 	rwa->os->os_dsl_dataset->ds_resume_bytes[txgoff] = rwa->bytes_read;
 }
 
+int receive_object_delay_frac = 0;
+
 static int
 receive_object(struct receive_writer_arg *rwa, struct drr_object *drro,
     void *data)
@@ -902,6 +1086,10 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro,
 	uint8_t dn_slots = drro->drr_dn_slots != 0 ?
 	    drro->drr_dn_slots : DNODE_MIN_SLOTS;
 
+	if (receive_object_delay_frac != 0 &&
+	    spa_get_random(receive_object_delay_frac) == 0)
+		delay(1);
+
 	if (drro->drr_type == DMU_OT_NONE ||
 	    !DMU_OT_IS_VALID(drro->drr_type) ||
 	    !DMU_OT_IS_VALID(drro->drr_bonustype) ||
@@ -917,6 +1105,37 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro,
 		return (SET_ERROR(EINVAL));
 	}
 
+	if (rwa->raw) {
+		/*
+		 * We should have received a DRR_OBJECT_RANGE record
+		 * containing this block and stored it in rwa.
+		 */
+		if (drro->drr_object < rwa->or_firstobj ||
+		    drro->drr_object >= rwa->or_firstobj + rwa->or_numslots ||
+		    drro->drr_raw_bonuslen < drro->drr_bonuslen ||
+		    drro->drr_indblkshift > SPA_MAXBLOCKSHIFT ||
+		    drro->drr_nlevels > DN_MAX_LEVELS ||
+		    drro->drr_nblkptr > DN_MAX_NBLKPTR ||
+		    DN_SLOTS_TO_BONUSLEN(drro->drr_dn_slots) <
+		    drro->drr_raw_bonuslen)
+			return (SET_ERROR(EINVAL));
+	} else {
+
+		/*
+		 * The DRR_OBJECT_SPILL flag is valid when the DRR_BEGIN
+		 * record indicates this by setting DRR_FLAG_SPILL_BLOCK.
+		 */
+		if (((drro->drr_flags & ~(DRR_OBJECT_SPILL))) ||
+		    (!rwa->spill && DRR_OBJECT_HAS_SPILL(drro->drr_flags))) {
+			return (SET_ERROR(EINVAL));
+		}
+
+		if (drro->drr_raw_bonuslen != 0 || drro->drr_nblkptr != 0 ||
+		    drro->drr_indblkshift != 0 || drro->drr_nlevels != 0) {
+			return (SET_ERROR(EINVAL));
+		}
+	}
+
 	err = dmu_object_info(rwa->os, drro->drr_object, &doi);
 
 	if (err != 0 && err != ENOENT && err != EEXIST)
@@ -929,20 +1148,86 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro,
 	 * If we are losing blkptrs or changing the block size this must
 	 * be a new file instance.  We must clear out the previous file
 	 * contents before we can change this type of metadata in the dnode.
+	 * Raw receives will also check that the indirect structure of the
+	 * dnode hasn't changed.
 	 */
 	if (err == 0) {
-		int nblkptr;
+		uint32_t indblksz = drro->drr_indblkshift ?
+		    1ULL << drro->drr_indblkshift : 0;
+		int nblkptr = deduce_nblkptr(drro->drr_bonustype,
+		    drro->drr_bonuslen);
+		boolean_t did_free = B_FALSE;
 
 		object = drro->drr_object;
 
-		nblkptr = deduce_nblkptr(drro->drr_bonustype,
-		    drro->drr_bonuslen);
+		/* nblkptr should be bounded by the bonus size and type */
+		if (rwa->raw && nblkptr != drro->drr_nblkptr)
+			return (SET_ERROR(EINVAL));
 
+		/*
+		 * Check for indicators that the object was freed and
+		 * reallocated. For all sends, these indicators are:
+		 *	- A changed block size
+		 *	- A smaller nblkptr
+		 *	- A changed dnode size
+		 * For raw sends we also check a few other fields to
+		 * ensure we are preserving the objset structure exactly
+		 * as it was on the receive side:
+		 *	- A changed indirect block size
+		 *	- A smaller nlevels
+		 */
 		if (drro->drr_blksz != doi.doi_data_block_size ||
 		    nblkptr < doi.doi_nblkptr ||
+		    dn_slots != doi.doi_dnodesize >> DNODE_SHIFT ||
+		    (rwa->raw &&
+		    (indblksz != doi.doi_metadata_block_size ||
+		    drro->drr_nlevels < doi.doi_indirection))) {
+			err = dmu_free_long_range(rwa->os,
+			    drro->drr_object, 0, DMU_OBJECT_END);
+			if (err != 0)
+				return (SET_ERROR(EINVAL));
+			else
+				did_free = B_TRUE;
+		}
+
+		/*
+		 * The dmu does not currently support decreasing nlevels
+		 * or changing the number of dnode slots on an object. For
+		 * non-raw sends, this does not matter and the new object
+		 * can just use the previous one's nlevels. For raw sends,
+		 * however, the structure of the received dnode (including
+		 * nlevels and dnode slots) must match that of the send
+		 * side. Therefore, instead of using dmu_object_reclaim(),
+		 * we must free the object completely and call
+		 * dmu_object_claim_dnsize() instead.
+		 */
+		if ((rwa->raw && drro->drr_nlevels < doi.doi_indirection) ||
 		    dn_slots != doi.doi_dnodesize >> DNODE_SHIFT) {
+			err = dmu_free_long_object(rwa->os, drro->drr_object);
+			if (err != 0)
+				return (SET_ERROR(EINVAL));
+
+			txg_wait_synced(dmu_objset_pool(rwa->os), 0);
+			object = DMU_NEW_OBJECT;
+		}
+
+		/*
+		 * For raw receives, free everything beyond the new incoming
+		 * maxblkid. Normally this would be done with a DRR_FREE
+		 * record that would come after this DRR_OBJECT record is
+		 * processed. However, for raw receives we manually set the
+		 * maxblkid from the drr_maxblkid and so we must first free
+		 * everything above that blkid to ensure the DMU is always
+		 * consistent with itself. We will never free the first block
+		 * of the object here because a maxblkid of 0 could indicate
+		 * an object with a single block or one with no blocks. This
+		 * free may be skipped when dmu_free_long_range() was called
+		 * above since it covers the entire object's contents.
+		 */
+		if (rwa->raw && object != DMU_NEW_OBJECT && !did_free) {
 			err = dmu_free_long_range(rwa->os, drro->drr_object,
-			    0, DMU_OBJECT_END);
+			    (drro->drr_maxblkid + 1) * doi.doi_data_block_size,
+			    DMU_OBJECT_END);
 			if (err != 0)
 				return (SET_ERROR(EINVAL));
 		}
@@ -955,7 +1240,11 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro,
 		 * earlier in the stream.
 		 */
 		txg_wait_synced(dmu_objset_pool(rwa->os), 0);
-		object = drro->drr_object;
+		if (dmu_object_info(rwa->os, drro->drr_object, NULL) != ENOENT)
+			return (SET_ERROR(EINVAL));
+
+		/* object was freed and we are about to allocate a new one */
+		object = DMU_NEW_OBJECT;
 	} else {
 		/* object is free and we are about to allocate a new one */
 		object = DMU_NEW_OBJECT;
@@ -995,6 +1284,7 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro,
 
 	tx = dmu_tx_create(rwa->os);
 	dmu_tx_hold_bonus(tx, object);
+	dmu_tx_hold_write(tx, object, 0, 0);
 	err = dmu_tx_assign(tx, TXG_WAIT);
 	if (err != 0) {
 		dmu_tx_abort(tx);
@@ -1002,7 +1292,7 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro,
 	}
 
 	if (object == DMU_NEW_OBJECT) {
-		/* currently free, want to be allocated */
+		/* Currently free, wants to be allocated */
 		err = dmu_object_claim_dnsize(rwa->os, drro->drr_object,
 		    drro->drr_type, drro->drr_blksz,
 		    drro->drr_bonustype, drro->drr_bonuslen,
@@ -1010,39 +1300,116 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro,
 	} else if (drro->drr_type != doi.doi_type ||
 	    drro->drr_blksz != doi.doi_data_block_size ||
 	    drro->drr_bonustype != doi.doi_bonus_type ||
-	    drro->drr_bonuslen != doi.doi_bonus_size ||
-	    drro->drr_dn_slots != (doi.doi_dnodesize >> DNODE_SHIFT)) {
-		/* currently allocated, but with different properties */
+	    drro->drr_bonuslen != doi.doi_bonus_size) {
+		/* Currently allocated, but with different properties */
 		err = dmu_object_reclaim_dnsize(rwa->os, drro->drr_object,
 		    drro->drr_type, drro->drr_blksz,
 		    drro->drr_bonustype, drro->drr_bonuslen,
-		    drro->drr_dn_slots << DNODE_SHIFT, tx);
+		    dn_slots << DNODE_SHIFT, rwa->spill ?
+		    DRR_OBJECT_HAS_SPILL(drro->drr_flags) : B_FALSE, tx);
+	} else if (rwa->spill && !DRR_OBJECT_HAS_SPILL(drro->drr_flags)) {
+		/*
+		 * Currently allocated, the existing version of this object
+		 * may reference a spill block that is no longer allocated
+		 * at the source and needs to be freed.
+		 */
+		err = dmu_object_rm_spill(rwa->os, drro->drr_object, tx);
 	}
+
 	if (err != 0) {
 		dmu_tx_commit(tx);
 		return (SET_ERROR(EINVAL));
 	}
 
+	if (rwa->or_crypt_params_present) {
+		/*
+		 * Set the crypt params for the buffer associated with this
+		 * range of dnodes.  This causes the blkptr_t to have the
+		 * same crypt params (byteorder, salt, iv, mac) as on the
+		 * sending side.
+		 *
+		 * Since we are committing this tx now, it is possible for
+		 * the dnode block to end up on-disk with the incorrect MAC,
+		 * if subsequent objects in this block are received in a
+		 * different txg.  However, since the dataset is marked as
+		 * inconsistent, no code paths will do a non-raw read (or
+		 * decrypt the block / verify the MAC). The receive code and
+		 * scrub code can safely do raw reads and verify the
+		 * checksum.  They don't need to verify the MAC.
+		 */
+		dmu_buf_t *db = NULL;
+		uint64_t offset = rwa->or_firstobj * DNODE_MIN_SIZE;
+
+		err = dmu_buf_hold_by_dnode(DMU_META_DNODE(rwa->os),
+		    offset, FTAG, &db, DMU_READ_PREFETCH | DMU_READ_NO_DECRYPT);
+		if (err != 0) {
+			dmu_tx_commit(tx);
+			return (SET_ERROR(EINVAL));
+		}
+
+		dmu_buf_set_crypt_params(db, rwa->or_byteorder,
+		    rwa->or_salt, rwa->or_iv, rwa->or_mac, tx);
+
+		dmu_buf_rele(db, FTAG);
+
+		rwa->or_crypt_params_present = B_FALSE;
+	}
+
 	dmu_object_set_checksum(rwa->os, drro->drr_object,
 	    drro->drr_checksumtype, tx);
 	dmu_object_set_compress(rwa->os, drro->drr_object,
 	    drro->drr_compress, tx);
 
+	/* handle more restrictive dnode structuring for raw recvs */
+	if (rwa->raw) {
+		/*
+		 * Set the indirect block size, block shift, nlevels.
+		 * This will not fail because we ensured all of the
+		 * blocks were freed earlier if this is a new object.
+		 * For non-new objects block size and indirect block
+		 * shift cannot change and nlevels can only increase.
+		 */
+		VERIFY0(dmu_object_set_blocksize(rwa->os, drro->drr_object,
+		    drro->drr_blksz, drro->drr_indblkshift, tx));
+		VERIFY0(dmu_object_set_nlevels(rwa->os, drro->drr_object,
+		    drro->drr_nlevels, tx));
+
+		/*
+		 * Set the maxblkid. This will always succeed because
+		 * we freed all blocks beyond the new maxblkid above.
+		 */
+		VERIFY0(dmu_object_set_maxblkid(rwa->os, drro->drr_object,
+		    drro->drr_maxblkid, tx));
+	}
+
 	if (data != NULL) {
 		dmu_buf_t *db;
+		dnode_t *dn;
+		uint32_t flags = DMU_READ_NO_PREFETCH;
+
+		if (rwa->raw)
+			flags |= DMU_READ_NO_DECRYPT;
+
+		VERIFY0(dnode_hold(rwa->os, drro->drr_object, FTAG, &dn));
+		VERIFY0(dmu_bonus_hold_by_dnode(dn, FTAG, &db, flags));
 
-		VERIFY0(dmu_bonus_hold(rwa->os, drro->drr_object, FTAG, &db));
 		dmu_buf_will_dirty(db, tx);
 
 		ASSERT3U(db->db_size, >=, drro->drr_bonuslen);
-		bcopy(data, db->db_data, drro->drr_bonuslen);
-		if (rwa->byteswap) {
+		bcopy(data, db->db_data, DRR_OBJECT_PAYLOAD_SIZE(drro));
+
+		/*
+		 * Raw bonus buffers have their byteorder determined by the
+		 * DRR_OBJECT_RANGE record.
+		 */
+		if (rwa->byteswap && !rwa->raw) {
 			dmu_object_byteswap_t byteswap =
 			    DMU_OT_BYTESWAP(drro->drr_bonustype);
 			dmu_ot_byteswap[byteswap].ob_func(db->db_data,
-			    drro->drr_bonuslen);
+			    DRR_OBJECT_PAYLOAD_SIZE(drro));
 		}
 		dmu_buf_rele(db, FTAG);
+		dnode_rele(dn, FTAG);
 	}
 	dmu_tx_commit(tx);
 
@@ -1063,15 +1430,17 @@ receive_freeobjects(struct receive_writer_arg *rwa,
 	for (obj = drrfo->drr_firstobj == 0 ? 1 : drrfo->drr_firstobj;
 	    obj < drrfo->drr_firstobj + drrfo->drr_numobjs && next_err == 0;
 	    next_err = dmu_object_next(rwa->os, &obj, FALSE, 0)) {
+		dmu_object_info_t doi;
 		int err;
 
-		err = dmu_object_info(rwa->os, obj, NULL);
+		err = dmu_object_info(rwa->os, obj, &doi);
 		if (err == ENOENT)
 			continue;
 		else if (err != 0)
 			return (err);
 
 		err = dmu_free_long_object(rwa->os, obj);
+
 		if (err != 0)
 			return (err);
 
@@ -1087,8 +1456,9 @@ static int
 receive_write(struct receive_writer_arg *rwa, struct drr_write *drrw,
     arc_buf_t *abuf)
 {
-	dmu_tx_t *tx;
 	int err;
+	dmu_tx_t *tx;
+	dnode_t *dn;
 
 	if (drrw->drr_offset + drrw->drr_logical_size < drrw->drr_offset ||
 	    !DMU_OT_IS_VALID(drrw->drr_type))
@@ -1113,7 +1483,6 @@ receive_write(struct receive_writer_arg *rwa, struct drr_write *drrw,
 		return (SET_ERROR(EINVAL));
 
 	tx = dmu_tx_create(rwa->os);
-
 	dmu_tx_hold_write(tx, drrw->drr_object,
 	    drrw->drr_offset, drrw->drr_logical_size);
 	err = dmu_tx_assign(tx, TXG_WAIT);
@@ -1121,18 +1490,23 @@ receive_write(struct receive_writer_arg *rwa, struct drr_write *drrw,
 		dmu_tx_abort(tx);
 		return (err);
 	}
-	if (rwa->byteswap) {
+
+	if (rwa->byteswap && !arc_is_encrypted(abuf) &&
+	    arc_get_compression(abuf) == ZIO_COMPRESS_OFF) {
 		dmu_object_byteswap_t byteswap =
 		    DMU_OT_BYTESWAP(drrw->drr_type);
 		dmu_ot_byteswap[byteswap].ob_func(abuf->b_data,
 		    DRR_WRITE_PAYLOAD_SIZE(drrw));
 	}
 
-	/* use the bonus buf to look up the dnode in dmu_assign_arcbuf */
-	dmu_buf_t *bonus;
-	if (dmu_bonus_hold(rwa->os, drrw->drr_object, FTAG, &bonus) != 0)
-		return (SET_ERROR(EINVAL));
-	dmu_assign_arcbuf(bonus, drrw->drr_offset, abuf, tx);
+	VERIFY0(dnode_hold(rwa->os, drrw->drr_object, FTAG, &dn));
+	err = dmu_assign_arcbuf_by_dnode(dn, drrw->drr_offset, abuf, tx);
+	if (err != 0) {
+		dnode_rele(dn, FTAG);
+		dmu_tx_commit(tx);
+		return (err);
+	}
+	dnode_rele(dn, FTAG);
 
 	/*
 	 * Note: If the receive fails, we want the resume stream to start
@@ -1142,7 +1516,6 @@ receive_write(struct receive_writer_arg *rwa, struct drr_write *drrw,
 	 */
 	save_resume_state(rwa, drrw->drr_object, drrw->drr_offset, tx);
 	dmu_tx_commit(tx);
-	dmu_buf_rele(bonus, FTAG);
 
 	return (0);
 }
@@ -1164,6 +1537,7 @@ receive_write_byref(struct receive_writer_arg *rwa,
 	guid_map_entry_t *gmep;
 	avl_index_t where;
 	objset_t *ref_os = NULL;
+	int flags = DMU_READ_PREFETCH;
 	dmu_buf_t *dbp;
 
 	if (drrwbr->drr_offset + drrwbr->drr_length < drrwbr->drr_offset)
@@ -1188,8 +1562,12 @@ receive_write_byref(struct receive_writer_arg *rwa,
 	if (drrwbr->drr_object > rwa->max_object)
 		rwa->max_object = drrwbr->drr_object;
 
+	if (rwa->raw)
+		flags |= DMU_READ_NO_DECRYPT;
+
+	/* may return either a regular db or an encrypted one */
 	err = dmu_buf_hold(ref_os, drrwbr->drr_refobject,
-	    drrwbr->drr_refoffset, FTAG, &dbp, DMU_READ_PREFETCH);
+	    drrwbr->drr_refoffset, FTAG, &dbp, flags);
 	if (err != 0)
 		return (err);
 
@@ -1202,8 +1580,14 @@ receive_write_byref(struct receive_writer_arg *rwa,
 		dmu_tx_abort(tx);
 		return (err);
 	}
-	dmu_write(rwa->os, drrwbr->drr_object,
-	    drrwbr->drr_offset, drrwbr->drr_length, dbp->db_data, tx);
+
+	if (rwa->raw) {
+		dmu_copy_from_buf(rwa->os, drrwbr->drr_object,
+		    drrwbr->drr_offset, dbp, tx);
+	} else {
+		dmu_write(rwa->os, drrwbr->drr_object,
+		    drrwbr->drr_offset, drrwbr->drr_length, dbp->db_data, tx);
+	}
 	dmu_buf_rele(dbp, FTAG);
 
 	/* See comment in restore_write. */
@@ -1229,6 +1613,8 @@ receive_write_embedded(struct receive_writer_arg *rwa,
 		return (EINVAL);
 	if (drrwe->drr_compression >= ZIO_COMPRESS_FUNCTIONS)
 		return (EINVAL);
+	if (rwa->raw)
+		return (SET_ERROR(EINVAL));
 
 	if (drrwe->drr_object > rwa->max_object)
 		rwa->max_object = drrwe->drr_object;
@@ -1256,16 +1642,37 @@ receive_write_embedded(struct receive_writer_arg *rwa,
 
 static int
 receive_spill(struct receive_writer_arg *rwa, struct drr_spill *drrs,
-    void *data)
+    arc_buf_t *abuf)
 {
 	dmu_tx_t *tx;
 	dmu_buf_t *db, *db_spill;
 	int err;
+	uint32_t flags = 0;
 
 	if (drrs->drr_length < SPA_MINBLOCKSIZE ||
 	    drrs->drr_length > spa_maxblocksize(dmu_objset_spa(rwa->os)))
 		return (SET_ERROR(EINVAL));
 
+	/*
+	 * This is an unmodified spill block which was added to the stream
+	 * to resolve an issue with incorrectly removing spill blocks.  It
+	 * should be ignored by current versions of the code which support
+	 * the DRR_FLAG_SPILL_BLOCK flag.
+	 */
+	if (rwa->spill && DRR_SPILL_IS_UNMODIFIED(drrs->drr_flags)) {
+		dmu_return_arcbuf(abuf);
+		return (0);
+	}
+
+	if (rwa->raw) {
+		if (!DMU_OT_IS_VALID(drrs->drr_type) ||
+		    drrs->drr_compressiontype >= ZIO_COMPRESS_FUNCTIONS ||
+		    drrs->drr_compressed_size == 0)
+			return (SET_ERROR(EINVAL));
+
+		flags |= DMU_READ_NO_DECRYPT;
+	}
+
 	if (dmu_object_info(rwa->os, drrs->drr_object, NULL) != 0)
 		return (SET_ERROR(EINVAL));
 
@@ -1273,7 +1680,8 @@ receive_spill(struct receive_writer_arg *rwa, struct drr_spill *drrs,
 		rwa->max_object = drrs->drr_object;
 
 	VERIFY0(dmu_bonus_hold(rwa->os, drrs->drr_object, FTAG, &db));
-	if ((err = dmu_spill_hold_by_bonus(db, FTAG, &db_spill)) != 0) {
+	if ((err = dmu_spill_hold_by_bonus(db, DMU_READ_NO_DECRYPT, FTAG,
+	    &db_spill)) != 0) {
 		dmu_buf_rele(db, FTAG);
 		return (err);
 	}
@@ -1289,12 +1697,27 @@ receive_spill(struct receive_writer_arg *rwa, struct drr_spill *drrs,
 		dmu_tx_abort(tx);
 		return (err);
 	}
-	dmu_buf_will_dirty(db_spill, tx);
 
-	if (db_spill->db_size < drrs->drr_length)
+	/*
+	 * Spill blocks may both grow and shrink.  When a change in size
+	 * occurs any existing dbuf must be updated to match the logical
+	 * size of the provided arc_buf_t.
+	 */
+	if (db_spill->db_size != drrs->drr_length) {
+		dmu_buf_will_fill(db_spill, tx);
 		VERIFY(0 == dbuf_spill_set_blksz(db_spill,
 		    drrs->drr_length, tx));
-	bcopy(data, db_spill->db_data, drrs->drr_length);
+	}
+
+	if (rwa->byteswap && !arc_is_encrypted(abuf) &&
+	    arc_get_compression(abuf) == ZIO_COMPRESS_OFF) {
+		dmu_object_byteswap_t byteswap =
+		    DMU_OT_BYTESWAP(drrs->drr_type);
+		dmu_ot_byteswap[byteswap].ob_func(abuf->b_data,
+		    DRR_SPILL_PAYLOAD_SIZE(drrs));
+	}
+
+	dbuf_assign_arcbuf((dmu_buf_impl_t *)db_spill, abuf, tx);
 
 	dmu_buf_rele(db, FTAG);
 	dmu_buf_rele(db_spill, FTAG);
@@ -1309,7 +1732,7 @@ receive_free(struct receive_writer_arg *rwa, struct drr_free *drrf)
 {
 	int err;
 
-	if (drrf->drr_length != -1ULL &&
+	if (drrf->drr_length != DMU_OBJECT_END &&
 	    drrf->drr_offset + drrf->drr_length < drrf->drr_offset)
 		return (SET_ERROR(EINVAL));
 
@@ -1325,18 +1748,81 @@ receive_free(struct receive_writer_arg *rwa, struct drr_free *drrf)
 	return (err);
 }
 
+static int
+receive_object_range(struct receive_writer_arg *rwa,
+    struct drr_object_range *drror)
+{
+	/*
+	 * By default, we assume this block is in our native format
+	 * (ZFS_HOST_BYTEORDER). We then take into account whether
+	 * the send stream is byteswapped (rwa->byteswap). Finally,
+	 * we need to byteswap again if this particular block was
+	 * in non-native format on the send side.
+	 */
+	boolean_t byteorder = ZFS_HOST_BYTEORDER ^ rwa->byteswap ^
+	    !!DRR_IS_RAW_BYTESWAPPED(drror->drr_flags);
+
+	/*
+	 * Since dnode block sizes are constant, we should not need to worry
+	 * about making sure that the dnode block size is the same on the
+	 * sending and receiving sides for the time being. For non-raw sends,
+	 * this does not matter (and in fact we do not send a DRR_OBJECT_RANGE
+	 * record at all). Raw sends require this record type because the
+	 * encryption parameters are used to protect an entire block of bonus
+	 * buffers. If the size of dnode blocks ever becomes variable,
+	 * handling will need to be added to ensure that dnode block sizes
+	 * match on the sending and receiving side.
+	 */
+	if (drror->drr_numslots != DNODES_PER_BLOCK ||
+	    P2PHASE(drror->drr_firstobj, DNODES_PER_BLOCK) != 0 ||
+	    !rwa->raw)
+		return (SET_ERROR(EINVAL));
+
+	if (drror->drr_firstobj > rwa->max_object)
+		rwa->max_object = drror->drr_firstobj;
+
+	/*
+	 * The DRR_OBJECT_RANGE handling must be deferred to receive_object()
+	 * so that the block of dnodes is not written out when it's empty,
+	 * and converted to a HOLE BP.
+	 */
+	rwa->or_crypt_params_present = B_TRUE;
+	rwa->or_firstobj = drror->drr_firstobj;
+	rwa->or_numslots = drror->drr_numslots;
+	bcopy(drror->drr_salt, rwa->or_salt, ZIO_DATA_SALT_LEN);
+	bcopy(drror->drr_iv, rwa->or_iv, ZIO_DATA_IV_LEN);
+	bcopy(drror->drr_mac, rwa->or_mac, ZIO_DATA_MAC_LEN);
+	rwa->or_byteorder = byteorder;
+
+	return (0);
+}
+
 /* used to destroy the drc_ds on error */
 static void
 dmu_recv_cleanup_ds(dmu_recv_cookie_t *drc)
 {
-	if (drc->drc_resumable) {
-		/* wait for our resume state to be written to disk */
-		txg_wait_synced(drc->drc_ds->ds_dir->dd_pool, 0);
-		dsl_dataset_disown(drc->drc_ds, dmu_recv_tag);
+	dsl_dataset_t *ds = drc->drc_ds;
+	ds_hold_flags_t dsflags = (drc->drc_raw) ? 0 : DS_HOLD_FLAG_DECRYPT;
+
+	/*
+	 * Wait for the txg sync before cleaning up the receive. For
+	 * resumable receives, this ensures that our resume state has
+	 * been written out to disk. For raw receives, this ensures
+	 * that the user accounting code will not attempt to do anything
+	 * after we stopped receiving the dataset.
+	 */
+	txg_wait_synced(ds->ds_dir->dd_pool, 0);
+	ds->ds_objset->os_raw_receive = B_FALSE;
+
+	rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG);
+	if (drc->drc_resumable && !BP_IS_HOLE(dsl_dataset_get_blkptr(ds))) {
+		rrw_exit(&ds->ds_bp_rwlock, FTAG);
+		dsl_dataset_disown(ds, dsflags, dmu_recv_tag);
 	} else {
 		char name[ZFS_MAX_DATASET_NAME_LEN];
-		dsl_dataset_name(drc->drc_ds, name);
-		dsl_dataset_disown(drc->drc_ds, dmu_recv_tag);
+		rrw_exit(&ds->ds_bp_rwlock, FTAG);
+		dsl_dataset_name(ds, name);
+		dsl_dataset_disown(ds, dsflags, dmu_recv_tag);
 		(void) dsl_destroy_head(name);
 	}
 }
@@ -1384,6 +1870,7 @@ receive_read_payload_and_next_header(struct receive_arg *ra, int len, void *buf)
 	err = receive_read(ra, sizeof (ra->next_rrd->header),
 	    &ra->next_rrd->header);
 	ra->next_rrd->bytes_read = ra->bytes_read;
+
 	if (err != 0) {
 		kmem_free(ra->next_rrd, sizeof (*ra->next_rrd));
 		ra->next_rrd = NULL;
@@ -1525,9 +2012,13 @@ receive_read_record(struct receive_arg *ra)
 	case DRR_OBJECT:
 	{
 		struct drr_object *drro = &ra->rrd->header.drr_u.drr_object;
-		uint32_t size = P2ROUNDUP(drro->drr_bonuslen, 8);
-		void *buf = kmem_zalloc(size, KM_SLEEP);
+		uint32_t size = DRR_OBJECT_PAYLOAD_SIZE(drro);
+		void *buf = NULL;
 		dmu_object_info_t doi;
+
+		if (size != 0)
+			buf = kmem_zalloc(size, KM_SLEEP);
+
 		err = receive_read_payload_and_next_header(ra, size, buf);
 		if (err != 0) {
 			kmem_free(buf, size);
@@ -1538,7 +2029,7 @@ receive_read_record(struct receive_arg *ra)
 		 * See receive_read_prefetch for an explanation why we're
 		 * storing this object in the ignore_obj_list.
 		 */
-		if (err == ENOENT ||
+		if (err == ENOENT || err == EEXIST ||
 		    (err == 0 && doi.doi_data_block_size != drro->drr_blksz)) {
 			objlist_insert(&ra->ignore_objlist, drro->drr_object);
 			err = 0;
@@ -1555,7 +2046,18 @@ receive_read_record(struct receive_arg *ra)
 		struct drr_write *drrw = &ra->rrd->header.drr_u.drr_write;
 		arc_buf_t *abuf;
 		boolean_t is_meta = DMU_OT_IS_METADATA(drrw->drr_type);
-		if (DRR_WRITE_COMPRESSED(drrw)) {
+
+		if (ra->raw) {
+			boolean_t byteorder = ZFS_HOST_BYTEORDER ^
+			    !!DRR_IS_RAW_BYTESWAPPED(drrw->drr_flags) ^
+			    ra->byteswap;
+
+			abuf = arc_loan_raw_buf(dmu_objset_spa(ra->os),
+			    drrw->drr_object, byteorder, drrw->drr_salt,
+			    drrw->drr_iv, drrw->drr_mac, drrw->drr_type,
+			    drrw->drr_compressed_size, drrw->drr_logical_size,
+			    drrw->drr_compressiontype);
+		} else if (DRR_WRITE_COMPRESSED(drrw)) {
 			ASSERT3U(drrw->drr_compressed_size, >, 0);
 			ASSERT3U(drrw->drr_logical_size, >=,
 			    drrw->drr_compressed_size);
@@ -1575,7 +2077,7 @@ receive_read_record(struct receive_arg *ra)
 			dmu_return_arcbuf(abuf);
 			return (err);
 		}
-		ra->rrd->write_buf = abuf;
+		ra->rrd->arc_buf = abuf;
 		receive_read_prefetch(ra, drrw->drr_object, drrw->drr_offset,
 		    drrw->drr_logical_size);
 		return (err);
@@ -1625,11 +2127,38 @@ receive_read_record(struct receive_arg *ra)
 	case DRR_SPILL:
 	{
 		struct drr_spill *drrs = &ra->rrd->header.drr_u.drr_spill;
-		void *buf = kmem_zalloc(drrs->drr_length, KM_SLEEP);
-		err = receive_read_payload_and_next_header(ra, drrs->drr_length,
-		    buf);
-		if (err != 0)
-			kmem_free(buf, drrs->drr_length);
+		arc_buf_t *abuf;
+		int len = DRR_SPILL_PAYLOAD_SIZE(drrs);
+
+		/* DRR_SPILL records are either raw or uncompressed */
+		if (ra->raw) {
+			boolean_t byteorder = ZFS_HOST_BYTEORDER ^
+			    !!DRR_IS_RAW_BYTESWAPPED(drrs->drr_flags) ^
+			    ra->byteswap;
+
+			abuf = arc_loan_raw_buf(dmu_objset_spa(ra->os),
+			    dmu_objset_id(ra->os), byteorder, drrs->drr_salt,
+			    drrs->drr_iv, drrs->drr_mac, drrs->drr_type,
+			    drrs->drr_compressed_size, drrs->drr_length,
+			    drrs->drr_compressiontype);
+		} else {
+			abuf = arc_loan_buf(dmu_objset_spa(ra->os),
+			    DMU_OT_IS_METADATA(drrs->drr_type),
+			    drrs->drr_length);
+		}
+
+		err = receive_read_payload_and_next_header(ra, len,
+		    abuf->b_data);
+		if (err != 0) {
+			dmu_return_arcbuf(abuf);
+			return (err);
+		}
+		ra->rrd->arc_buf = abuf;
+		return (err);
+	}
+	case DRR_OBJECT_RANGE:
+	{
+		err = receive_read_payload_and_next_header(ra, 0, NULL);
 		return (err);
 	}
 	default:
@@ -1668,11 +2197,11 @@ receive_process_record(struct receive_writer_arg *rwa,
 	case DRR_WRITE:
 	{
 		struct drr_write *drrw = &rrd->header.drr_u.drr_write;
-		err = receive_write(rwa, drrw, rrd->write_buf);
+		err = receive_write(rwa, drrw, rrd->arc_buf);
 		/* if receive_write() is successful, it consumes the arc_buf */
 		if (err != 0)
-			dmu_return_arcbuf(rrd->write_buf);
-		rrd->write_buf = NULL;
+			dmu_return_arcbuf(rrd->arc_buf);
+		rrd->arc_buf = NULL;
 		rrd->payload = NULL;
 		return (err);
 	}
@@ -1699,11 +2228,20 @@ receive_process_record(struct receive_writer_arg *rwa,
 	case DRR_SPILL:
 	{
 		struct drr_spill *drrs = &rrd->header.drr_u.drr_spill;
-		err = receive_spill(rwa, drrs, rrd->payload);
-		kmem_free(rrd->payload, rrd->payload_size);
+		err = receive_spill(rwa, drrs, rrd->arc_buf);
+		/* if receive_spill() is successful, it consumes the arc_buf */
+		if (err != 0)
+			dmu_return_arcbuf(rrd->arc_buf);
+		rrd->arc_buf = NULL;
 		rrd->payload = NULL;
 		return (err);
 	}
+	case DRR_OBJECT_RANGE:
+	{
+		struct drr_object_range *drror =
+		    &rrd->header.drr_u.drr_object_range;
+		return (receive_object_range(rwa, drror));
+	}
 	default:
 		return (SET_ERROR(EINVAL));
 	}
@@ -1727,9 +2265,9 @@ receive_writer_thread(void *arg)
 		 */
 		if (rwa->err == 0) {
 			rwa->err = receive_process_record(rwa, rrd);
-		} else if (rrd->write_buf != NULL) {
-			dmu_return_arcbuf(rrd->write_buf);
-			rrd->write_buf = NULL;
+		} else if (rrd->arc_buf != NULL) {
+			dmu_return_arcbuf(rrd->arc_buf);
+			rrd->arc_buf = NULL;
 			rrd->payload = NULL;
 		} else if (rrd->payload != NULL) {
 			kmem_free(rrd->payload, rrd->payload_size);
@@ -1794,6 +2332,7 @@ dmu_recv_stream(dmu_recv_cookie_t *drc, vnode_t *vp, offset_t *voffp,
 	nvlist_t *begin_nvl = NULL;
 
 	ra.byteswap = drc->drc_byteswap;
+	ra.raw = drc->drc_raw;
 	ra.cksum = drc->drc_cksum;
 	ra.vp = vp;
 	ra.voff = *voffp;
@@ -1819,17 +2358,21 @@ dmu_recv_stream(dmu_recv_cookie_t *drc, vnode_t *vp, offset_t *voffp,
 	ASSERT(dsl_dataset_phys(drc->drc_ds)->ds_flags & DS_FLAG_INCONSISTENT);
 
 	featureflags = DMU_GET_FEATUREFLAGS(drc->drc_drrb->drr_versioninfo);
+	ra.featureflags = featureflags;
+
+	ASSERT0(ra.os->os_encrypted &&
+	    (featureflags & DMU_BACKUP_FEATURE_EMBED_DATA));
 
 	/* if this stream is dedup'ed, set up the avl tree for guid mapping */
 	if (featureflags & DMU_BACKUP_FEATURE_DEDUP) {
 		minor_t minor;
 
 		if (cleanup_fd == -1) {
-			ra.err = SET_ERROR(EBADF);
+			err = SET_ERROR(EBADF);
 			goto out;
 		}
-		ra.err = zfs_onexit_fd_hold(cleanup_fd, &minor);
-		if (ra.err != 0) {
+		err = zfs_onexit_fd_hold(cleanup_fd, &minor);
+		if (err != 0) {
 			cleanup_fd = -1;
 			goto out;
 		}
@@ -1843,12 +2386,12 @@ dmu_recv_stream(dmu_recv_cookie_t *drc, vnode_t *vp, offset_t *voffp,
 			err = zfs_onexit_add_cb(minor,
 			    free_guid_map_onexit, rwa.guid_to_ds_map,
 			    action_handlep);
-			if (ra.err != 0)
+			if (err != 0)
 				goto out;
 		} else {
 			err = zfs_onexit_cb_data(minor, *action_handlep,
 			    (void **)&rwa.guid_to_ds_map);
-			if (ra.err != 0)
+			if (err != 0)
 				goto out;
 		}
 
@@ -1873,6 +2416,38 @@ dmu_recv_stream(dmu_recv_cookie_t *drc, vnode_t *vp, offset_t *voffp,
 			goto out;
 	}
 
+	/* handle DSL encryption key payload */
+	if (featureflags & DMU_BACKUP_FEATURE_RAW) {
+		nvlist_t *keynvl = NULL;
+
+		ASSERT(ra.os->os_encrypted);
+		ASSERT(drc->drc_raw);
+
+		err = nvlist_lookup_nvlist(begin_nvl, "crypt_keydata", &keynvl);
+		if (err != 0)
+			goto out;
+
+		/*
+		 * If this is a new dataset we set the key immediately.
+		 * Otherwise we don't want to change the key until we
+		 * are sure the rest of the receive succeeded so we stash
+		 * the keynvl away until then.
+		 */
+		err = dsl_crypto_recv_raw(spa_name(ra.os->os_spa),
+		    drc->drc_ds->ds_object, drc->drc_fromsnapobj,
+		    drc->drc_drrb->drr_type, keynvl, drc->drc_newfs);
+		if (err != 0)
+			goto out;
+
+		/* see comment in dmu_recv_end_sync() */
+		drc->drc_ivset_guid = 0;
+		(void) nvlist_lookup_uint64(keynvl, "to_ivset_guid",
+		    &drc->drc_ivset_guid);
+
+		if (!drc->drc_newfs)
+			drc->drc_keynvl = fnvlist_dup(keynvl);
+	}
+
 	if (featureflags & DMU_BACKUP_FEATURE_RESUMING) {
 		err = resume_check(&ra, begin_nvl);
 		if (err != 0)
@@ -1886,6 +2461,9 @@ dmu_recv_stream(dmu_recv_cookie_t *drc, vnode_t *vp, offset_t *voffp,
 	rwa.os = ra.os;
 	rwa.byteswap = drc->drc_byteswap;
 	rwa.resumable = drc->drc_resumable;
+	rwa.raw = drc->drc_raw;
+	rwa.spill = drc->drc_spill;
+	rwa.os->os_raw_receive = drc->drc_raw;
 
 	(void) thread_create(NULL, 0, receive_writer_thread, &rwa, 0, curproc,
 	    TS_RUN, minclsyspri);
@@ -1926,10 +2504,10 @@ dmu_recv_stream(dmu_recv_cookie_t *drc, vnode_t *vp, offset_t *voffp,
 		    sizeof (struct receive_record_arg) + ra.rrd->payload_size);
 		ra.rrd = NULL;
 	}
-	if (ra.next_rrd == NULL)
-		ra.next_rrd = kmem_zalloc(sizeof (*ra.next_rrd), KM_SLEEP);
-	ra.next_rrd->eos_marker = B_TRUE;
-	bqueue_enqueue(&rwa.q, ra.next_rrd, 1);
+	ASSERT3P(ra.rrd, ==, NULL);
+	ra.rrd = kmem_zalloc(sizeof (*ra.rrd), KM_SLEEP);
+	ra.rrd->eos_marker = B_TRUE;
+	bqueue_enqueue(&rwa.q, ra.rrd, 1);
 
 	mutex_enter(&rwa.mutex);
 	while (!rwa.done) {
@@ -1979,6 +2557,14 @@ dmu_recv_stream(dmu_recv_cookie_t *drc, vnode_t *vp, offset_t *voffp,
 		err = rwa.err;
 
 out:
+	/*
+	 * If we hit an error before we started the receive_writer_thread
+	 * we need to clean up the next_rrd we create by processing the
+	 * DRR_BEGIN record.
+	 */
+	if (ra.next_rrd != NULL)
+		kmem_free(ra.next_rrd, sizeof (*ra.next_rrd));
+
 	nvlist_free(begin_nvl);
 	if ((featureflags & DMU_BACKUP_FEATURE_DEDUP) && (cleanup_fd != -1))
 		zfs_onexit_fd_rele(cleanup_fd);
@@ -1990,6 +2576,7 @@ out:
 		 * the inconsistent state.
 		 */
 		dmu_recv_cleanup_ds(drc);
+		nvlist_free(drc->drc_keynvl);
 	}
 
 	*voffp = ra.voff;
@@ -2045,6 +2632,15 @@ dmu_recv_end_check(void *arg, dmu_tx_t *tx)
 				return (error);
 			}
 		}
+		if (drc->drc_keynvl != NULL) {
+			error = dsl_crypto_recv_raw_key_check(drc->drc_ds,
+			    drc->drc_keynvl, tx);
+			if (error != 0) {
+				dsl_dataset_rele(origin_head, FTAG);
+				return (error);
+			}
+		}
+
 		error = dsl_dataset_clone_swap_check_impl(drc->drc_ds,
 		    origin_head, drc->drc_force, drc->drc_owner, tx);
 		if (error != 0) {
@@ -2070,9 +2666,11 @@ dmu_recv_end_sync(void *arg, dmu_tx_t *tx)
 {
 	dmu_recv_cookie_t *drc = arg;
 	dsl_pool_t *dp = dmu_tx_pool(tx);
+	boolean_t encrypted = drc->drc_ds->ds_dir->dd_crypto_obj != 0;
 
 	spa_history_log_internal_ds(drc->drc_ds, "finish receiving",
 	    tx, "snap=%s", drc->drc_tosnap);
+	drc->drc_ds->ds_objset->os_raw_receive = B_FALSE;
 
 	if (!drc->drc_newfs) {
 		dsl_dataset_t *origin_head;
@@ -2100,8 +2698,14 @@ dmu_recv_end_sync(void *arg, dmu_tx_t *tx)
 				dsl_dataset_rele(snap, FTAG);
 			}
 		}
-		VERIFY3P(drc->drc_ds->ds_prev, ==,
-		    origin_head->ds_prev);
+		if (drc->drc_keynvl != NULL) {
+			dsl_crypto_recv_raw_key_sync(drc->drc_ds,
+			    drc->drc_keynvl, tx);
+			nvlist_free(drc->drc_keynvl);
+			drc->drc_keynvl = NULL;
+		}
+
+		VERIFY3P(drc->drc_ds->ds_prev, ==, origin_head->ds_prev);
 
 		dsl_dataset_clone_swap_sync_impl(drc->drc_ds,
 		    origin_head, tx);
@@ -2162,21 +2766,50 @@ dmu_recv_end_sync(void *arg, dmu_tx_t *tx)
 		drc->drc_newsnapobj =
 		    dsl_dataset_phys(drc->drc_ds)->ds_prev_snap_obj;
 	}
+
+	/*
+	 * If this is a raw receive, the crypt_keydata nvlist will include
+	 * a to_ivset_guid for us to set on the new snapshot. This value
+	 * will override the value generated by the snapshot code. However,
+	 * this value may not be present, because older implementations of
+	 * the raw send code did not include this value, and we are still
+	 * allowed to receive them if the zfs_disable_ivset_guid_check
+	 * tunable is set, in which case we will leave the newly-generated
+	 * value.
+	 */
+	if (drc->drc_raw && drc->drc_ivset_guid != 0) {
+		dmu_object_zapify(dp->dp_meta_objset, drc->drc_newsnapobj,
+		    DMU_OT_DSL_DATASET, tx);
+		VERIFY0(zap_update(dp->dp_meta_objset, drc->drc_newsnapobj,
+		    DS_FIELD_IVSET_GUID, sizeof (uint64_t), 1,
+		    &drc->drc_ivset_guid, tx));
+	}
+
 	/*
 	 * Release the hold from dmu_recv_begin.  This must be done before
-	 * we return to open context, so that when we free the dataset's dnode,
-	 * we can evict its bonus buffer.
+	 * we return to open context, so that when we free the dataset's dnode
+	 * we can evict its bonus buffer. Since the dataset may be destroyed
+	 * at this point (and therefore won't have a valid pointer to the spa)
+	 * we release the key mapping manually here while we do have a valid
+	 * pointer, if it exists.
 	 */
-	dsl_dataset_disown(drc->drc_ds, dmu_recv_tag);
+	if (!drc->drc_raw && encrypted) {
+		(void) spa_keystore_remove_mapping(dmu_tx_pool(tx)->dp_spa,
+		    drc->drc_ds->ds_object, drc->drc_ds);
+	}
+	dsl_dataset_disown(drc->drc_ds, 0, dmu_recv_tag);
 	drc->drc_ds = NULL;
 }
 
 static int
-add_ds_to_guidmap(const char *name, avl_tree_t *guid_map, uint64_t snapobj)
+add_ds_to_guidmap(const char *name, avl_tree_t *guid_map, uint64_t snapobj,
+    boolean_t raw)
 {
 	dsl_pool_t *dp;
 	dsl_dataset_t *snapds;
 	guid_map_entry_t *gmep;
+	objset_t *os;
+	ds_hold_flags_t dsflags = (raw) ? 0 : DS_HOLD_FLAG_DECRYPT;
 	int err;
 
 	ASSERT(guid_map != NULL);
@@ -2185,12 +2818,29 @@ add_ds_to_guidmap(const char *name, avl_tree_t *guid_map, uint64_t snapobj)
 	if (err != 0)
 		return (err);
 	gmep = kmem_alloc(sizeof (*gmep), KM_SLEEP);
-	err = dsl_dataset_hold_obj(dp, snapobj, gmep, &snapds);
+	err = dsl_dataset_own_obj(dp, snapobj, dsflags, gmep, &snapds);
 	if (err == 0) {
+		/*
+		 * If this is a deduplicated raw send stream, we need
+		 * to make sure that we can still read raw blocks from
+		 * earlier datasets in the stream, so we set the
+		 * os_raw_receive flag now.
+		 */
+		if (raw) {
+			err = dmu_objset_from_ds(snapds, &os);
+			if (err != 0) {
+				dsl_dataset_disown(snapds, dsflags, FTAG);
+				dsl_pool_rele(dp, FTAG);
+				kmem_free(gmep, sizeof (*gmep));
+				return (err);
+			}
+			os->os_raw_receive = B_TRUE;
+		}
+
+		gmep->raw = raw;
 		gmep->guid = dsl_dataset_phys(snapds)->ds_guid;
 		gmep->gme_ds = snapds;
 		avl_add(guid_map, gmep);
-		dsl_dataset_long_hold(snapds, gmep);
 	} else {
 		kmem_free(gmep, sizeof (*gmep));
 	}
@@ -2241,10 +2891,10 @@ dmu_recv_end(dmu_recv_cookie_t *drc, void *owner)
 
 	if (error != 0) {
 		dmu_recv_cleanup_ds(drc);
+		nvlist_free(drc->drc_keynvl);
 	} else if (drc->drc_guid_to_ds_map != NULL) {
-		(void) add_ds_to_guidmap(drc->drc_tofs,
-		    drc->drc_guid_to_ds_map,
-		    drc->drc_newsnapobj);
+		(void) add_ds_to_guidmap(drc->drc_tofs, drc->drc_guid_to_ds_map,
+		    drc->drc_newsnapobj, drc->drc_raw);
 	}
 	return (error);
 }
diff --git a/usr/src/uts/common/fs/zfs/dmu_send.c b/usr/src/uts/common/fs/zfs/dmu_send.c
index 6d65086079..bfc0a6f585 100644
--- a/usr/src/uts/common/fs/zfs/dmu_send.c
+++ b/usr/src/uts/common/fs/zfs/dmu_send.c
@@ -61,6 +61,8 @@ int zfs_send_corrupt_data = B_FALSE;
 int zfs_send_queue_length = 16 * 1024 * 1024;
 /* Set this tunable to FALSE to disable setting of DRR_FLAG_FREERECORDS */
 int zfs_send_set_freerecords_bit = B_TRUE;
+/* Set this tunable to FALSE is disable sending unmodified spill blocks. */
+int zfs_send_unmodified_spill_blocks = B_TRUE;
 
 /*
  * Use this to override the recordsize calculation for fast zfs send estimates.
@@ -90,6 +92,8 @@ struct send_block_record {
 	bqueue_node_t		ln;
 };
 
+static int do_dump(dmu_sendarg_t *dsa, struct send_block_record *data);
+
 static int
 dump_bytes(dmu_sendarg_t *dsp, void *buf, int len)
 {
@@ -97,18 +101,17 @@ dump_bytes(dmu_sendarg_t *dsp, void *buf, int len)
 	ssize_t resid; /* have to get resid to get detailed errno */
 
 	/*
-	 * The code does not rely on this (len being a multiple of 8).  We keep
+	 * The code does not rely on len being a multiple of 8.  We keep
 	 * this assertion because of the corresponding assertion in
 	 * receive_read().  Keeping this assertion ensures that we do not
 	 * inadvertently break backwards compatibility (causing the assertion
-	 * in receive_read() to trigger on old software).
-	 *
-	 * Removing the assertions could be rolled into a new feature that uses
-	 * data that isn't 8-byte aligned; if the assertions were removed, a
-	 * feature flag would have to be added.
+	 * in receive_read() to trigger on old software). Newer feature flags
+	 * (such as raw send) may break this assertion since they were
+	 * introduced after the requirement was made obsolete.
 	 */
 
-	ASSERT0(len % 8);
+	ASSERT(len % 8 == 0 ||
+	    (dsp->dsa_featureflags & DMU_BACKUP_FEATURE_RAW) != 0);
 
 	dsp->dsa_err = vn_rdwr(UIO_WRITE, dsp->dsa_vp,
 	    (caddr_t)buf, len,
@@ -189,9 +192,6 @@ dump_free(dmu_sendarg_t *dsp, uint64_t object, uint64_t offset,
 	    (object == dsp->dsa_last_data_object &&
 	    offset > dsp->dsa_last_data_offset));
 
-	if (length != -1ULL && offset + length < offset)
-		length = -1ULL;
-
 	/*
 	 * If there is a pending op, but it's not PENDING_FREE, push it out,
 	 * since free block aggregation can only be done for blocks of the
@@ -208,19 +208,22 @@ dump_free(dmu_sendarg_t *dsp, uint64_t object, uint64_t offset,
 
 	if (dsp->dsa_pending_op == PENDING_FREE) {
 		/*
-		 * There should never be a PENDING_FREE if length is -1
-		 * (because dump_dnode is the only place where this
-		 * function is called with a -1, and only after flushing
-		 * any pending record).
+		 * There should never be a PENDING_FREE if length is
+		 * DMU_OBJECT_END (because dump_dnode is the only place where
+		 * this function is called with a DMU_OBJECT_END, and only after
+		 * flushing any pending record).
 		 */
-		ASSERT(length != -1ULL);
+		ASSERT(length != DMU_OBJECT_END);
 		/*
 		 * Check to see whether this free block can be aggregated
 		 * with pending one.
 		 */
 		if (drrf->drr_object == object && drrf->drr_offset +
 		    drrf->drr_length == offset) {
-			drrf->drr_length += length;
+			if (offset + length < offset)
+				drrf->drr_length = DMU_OBJECT_END;
+			else
+				drrf->drr_length += length;
 			return (0);
 		} else {
 			/* not a continuation.  Push out pending record */
@@ -234,9 +237,12 @@ dump_free(dmu_sendarg_t *dsp, uint64_t object, uint64_t offset,
 	dsp->dsa_drr->drr_type = DRR_FREE;
 	drrf->drr_object = object;
 	drrf->drr_offset = offset;
-	drrf->drr_length = length;
+	if (offset + length < offset)
+		drrf->drr_length = DMU_OBJECT_END;
+	else
+		drrf->drr_length = length;
 	drrf->drr_toguid = dsp->dsa_toguid;
-	if (length == -1ULL) {
+	if (length == DMU_OBJECT_END) {
 		if (dump_record(dsp, NULL, 0) != 0)
 			return (SET_ERROR(EINTR));
 	} else {
@@ -247,11 +253,11 @@ dump_free(dmu_sendarg_t *dsp, uint64_t object, uint64_t offset,
 }
 
 static int
-dump_write(dmu_sendarg_t *dsp, dmu_object_type_t type,
-    uint64_t object, uint64_t offset, int lsize, int psize, const blkptr_t *bp,
-    void *data)
+dump_write(dmu_sendarg_t *dsp, dmu_object_type_t type, uint64_t object,
+    uint64_t offset, int lsize, int psize, const blkptr_t *bp, void *data)
 {
 	uint64_t payload_size;
+	boolean_t raw = (dsp->dsa_featureflags & DMU_BACKUP_FEATURE_RAW);
 	struct drr_write *drrw = &(dsp->dsa_drr->drr_u.drr_write);
 
 	/*
@@ -284,16 +290,36 @@ dump_write(dmu_sendarg_t *dsp, dmu_object_type_t type,
 	drrw->drr_toguid = dsp->dsa_toguid;
 	drrw->drr_logical_size = lsize;
 
-	/* only set the compression fields if the buf is compressed */
-	if (lsize != psize) {
-		ASSERT(dsp->dsa_featureflags & DMU_BACKUP_FEATURE_COMPRESSED);
+	/* only set the compression fields if the buf is compressed or raw */
+	if (raw || lsize != psize) {
 		ASSERT(!BP_IS_EMBEDDED(bp));
-		ASSERT(!BP_SHOULD_BYTESWAP(bp));
-		ASSERT(!DMU_OT_IS_METADATA(BP_GET_TYPE(bp)));
-		ASSERT3U(BP_GET_COMPRESS(bp), !=, ZIO_COMPRESS_OFF);
 		ASSERT3S(psize, >, 0);
-		ASSERT3S(lsize, >=, psize);
 
+		if (raw) {
+			ASSERT(BP_IS_PROTECTED(bp));
+
+			/*
+			 * This is a raw protected block so we need to pass
+			 * along everything the receiving side will need to
+			 * interpret this block, including the byteswap, salt,
+			 * IV, and MAC.
+			 */
+			if (BP_SHOULD_BYTESWAP(bp))
+				drrw->drr_flags |= DRR_RAW_BYTESWAP;
+			zio_crypt_decode_params_bp(bp, drrw->drr_salt,
+			    drrw->drr_iv);
+			zio_crypt_decode_mac_bp(bp, drrw->drr_mac);
+		} else {
+			/* this is a compressed block */
+			ASSERT(dsp->dsa_featureflags &
+			    DMU_BACKUP_FEATURE_COMPRESSED);
+			ASSERT(!BP_SHOULD_BYTESWAP(bp));
+			ASSERT(!DMU_OT_IS_METADATA(BP_GET_TYPE(bp)));
+			ASSERT3U(BP_GET_COMPRESS(bp), !=, ZIO_COMPRESS_OFF);
+			ASSERT3S(lsize, >=, psize);
+		}
+
+		/* set fields common to compressed and raw sends */
 		drrw->drr_compressiontype = BP_GET_COMPRESS(bp);
 		drrw->drr_compressed_size = psize;
 		payload_size = drrw->drr_compressed_size;
@@ -301,22 +327,23 @@ dump_write(dmu_sendarg_t *dsp, dmu_object_type_t type,
 		payload_size = drrw->drr_logical_size;
 	}
 
-	if (bp == NULL || BP_IS_EMBEDDED(bp)) {
+	if (bp == NULL || BP_IS_EMBEDDED(bp) || (BP_IS_PROTECTED(bp) && !raw)) {
 		/*
-		 * There's no pre-computed checksum for partial-block
-		 * writes or embedded BP's, so (like
-		 * fletcher4-checkummed blocks) userland will have to
-		 * compute a dedup-capable checksum itself.
+		 * There's no pre-computed checksum for partial-block writes,
+		 * embedded BP's, or encrypted BP's that are being sent as
+		 * plaintext, so (like fletcher4-checkummed blocks) userland
+		 * will have to compute a dedup-capable checksum itself.
 		 */
 		drrw->drr_checksumtype = ZIO_CHECKSUM_OFF;
 	} else {
 		drrw->drr_checksumtype = BP_GET_CHECKSUM(bp);
 		if (zio_checksum_table[drrw->drr_checksumtype].ci_flags &
 		    ZCHECKSUM_FLAG_DEDUP)
-			drrw->drr_checksumflags |= DRR_CHECKSUM_DEDUP;
+			drrw->drr_flags |= DRR_CHECKSUM_DEDUP;
 		DDK_SET_LSIZE(&drrw->drr_key, BP_GET_LSIZE(bp));
 		DDK_SET_PSIZE(&drrw->drr_key, BP_GET_PSIZE(bp));
 		DDK_SET_COMPRESS(&drrw->drr_key, BP_GET_COMPRESS(bp));
+		DDK_SET_CRYPT(&drrw->drr_key, BP_IS_PROTECTED(bp));
 		drrw->drr_key.ddk_cksum = bp->blk_cksum;
 	}
 
@@ -360,9 +387,11 @@ dump_write_embedded(dmu_sendarg_t *dsp, uint64_t object, uint64_t offset,
 }
 
 static int
-dump_spill(dmu_sendarg_t *dsp, uint64_t object, int blksz, void *data)
+dump_spill(dmu_sendarg_t *dsp, const blkptr_t *bp, uint64_t object, void *data)
 {
 	struct drr_spill *drrs = &(dsp->dsa_drr->drr_u.drr_spill);
+	uint64_t blksz = BP_GET_LSIZE(bp);
+	uint64_t payload_size = blksz;
 
 	if (dsp->dsa_pending_op != PENDING_NONE) {
 		if (dump_record(dsp, NULL, 0) != 0)
@@ -377,7 +406,26 @@ dump_spill(dmu_sendarg_t *dsp, uint64_t object, int blksz, void *data)
 	drrs->drr_length = blksz;
 	drrs->drr_toguid = dsp->dsa_toguid;
 
-	if (dump_record(dsp, data, blksz) != 0)
+	/* See comment in dump_dnode() for full details */
+	if (zfs_send_unmodified_spill_blocks &&
+	    (bp->blk_birth <= dsp->dsa_fromtxg)) {
+		drrs->drr_flags |= DRR_SPILL_UNMODIFIED;
+	}
+
+	/* handle raw send fields */
+	if (dsp->dsa_featureflags & DMU_BACKUP_FEATURE_RAW) {
+		ASSERT(BP_IS_PROTECTED(bp));
+
+		if (BP_SHOULD_BYTESWAP(bp))
+			drrs->drr_flags |= DRR_RAW_BYTESWAP;
+		drrs->drr_compressiontype = BP_GET_COMPRESS(bp);
+		drrs->drr_compressed_size = BP_GET_PSIZE(bp);
+		zio_crypt_decode_params_bp(bp, drrs->drr_salt, drrs->drr_iv);
+		zio_crypt_decode_mac_bp(bp, drrs->drr_mac);
+		payload_size = drrs->drr_compressed_size;
+	}
+
+	if (dump_record(dsp, data, payload_size) != 0)
 		return (SET_ERROR(EINTR));
 	return (0);
 }
@@ -429,9 +477,11 @@ dump_freeobjects(dmu_sendarg_t *dsp, uint64_t firstobj, uint64_t numobjs)
 }
 
 static int
-dump_dnode(dmu_sendarg_t *dsp, uint64_t object, dnode_phys_t *dnp)
+dump_dnode(dmu_sendarg_t *dsp, const blkptr_t *bp, uint64_t object,
+    dnode_phys_t *dnp)
 {
 	struct drr_object *drro = &(dsp->dsa_drr->drr_u.drr_object);
+	int bonuslen;
 
 	if (object < dsp->dsa_resume_object) {
 		/*
@@ -472,20 +522,111 @@ dump_dnode(dmu_sendarg_t *dsp, uint64_t object, dnode_phys_t *dnp)
 	    drro->drr_blksz > SPA_OLD_MAXBLOCKSIZE)
 		drro->drr_blksz = SPA_OLD_MAXBLOCKSIZE;
 
-	if (dump_record(dsp, DN_BONUS(dnp),
-	    P2ROUNDUP(dnp->dn_bonuslen, 8)) != 0) {
-		return (SET_ERROR(EINTR));
+	bonuslen = P2ROUNDUP(dnp->dn_bonuslen, 8);
+
+	if ((dsp->dsa_featureflags & DMU_BACKUP_FEATURE_RAW)) {
+		ASSERT(BP_IS_ENCRYPTED(bp));
+
+		if (BP_SHOULD_BYTESWAP(bp))
+			drro->drr_flags |= DRR_RAW_BYTESWAP;
+
+		/* needed for reconstructing dnp on recv side */
+		drro->drr_maxblkid = dnp->dn_maxblkid;
+		drro->drr_indblkshift = dnp->dn_indblkshift;
+		drro->drr_nlevels = dnp->dn_nlevels;
+		drro->drr_nblkptr = dnp->dn_nblkptr;
+
+		/*
+		 * Since we encrypt the entire bonus area, the (raw) part
+		 * beyond the bonuslen is actually nonzero, so we need
+		 * to send it.
+		 */
+		if (bonuslen != 0) {
+			drro->drr_raw_bonuslen = DN_MAX_BONUS_LEN(dnp);
+			bonuslen = drro->drr_raw_bonuslen;
+		}
 	}
 
+	/*
+	 * DRR_OBJECT_SPILL is set for every dnode which references a
+	 * spill block.  This allows the receiving pool to definitively
+	 * determine when a spill block should be kept or freed.
+	 */
+	if (dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR)
+		drro->drr_flags |= DRR_OBJECT_SPILL;
+
+	if (dump_record(dsp, DN_BONUS(dnp), bonuslen) != 0)
+		return (SET_ERROR(EINTR));
+
 	/* Free anything past the end of the file. */
 	if (dump_free(dsp, object, (dnp->dn_maxblkid + 1) *
-	    (dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT), -1ULL) != 0)
+	    (dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT), DMU_OBJECT_END) != 0)
 		return (SET_ERROR(EINTR));
+
+	/*
+	 * Send DRR_SPILL records for unmodified spill blocks.  This is useful
+	 * because changing certain attributes of the object (e.g. blocksize)
+	 * can cause old versions of ZFS to incorrectly remove a spill block.
+	 * Including these records in the stream forces an up to date version
+	 * to always be written ensuring they're never lost.  Current versions
+	 * of the code which understand the DRR_FLAG_SPILL_BLOCK feature can
+	 * ignore these unmodified spill blocks.
+	 */
+	if (zfs_send_unmodified_spill_blocks &&
+	    (dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) &&
+	    (DN_SPILL_BLKPTR(dnp)->blk_birth <= dsp->dsa_fromtxg)) {
+		struct send_block_record record;
+
+		bzero(&record, sizeof (struct send_block_record));
+		record.eos_marker = B_FALSE;
+		record.bp = *DN_SPILL_BLKPTR(dnp);
+		SET_BOOKMARK(&(record.zb), dmu_objset_id(dsp->dsa_os),
+		    object, 0, DMU_SPILL_BLKID);
+
+		if (do_dump(dsp, &record) != 0)
+			return (SET_ERROR(EINTR));
+	}
+
 	if (dsp->dsa_err != 0)
 		return (SET_ERROR(EINTR));
 	return (0);
 }
 
+static int
+dump_object_range(dmu_sendarg_t *dsp, const blkptr_t *bp, uint64_t firstobj,
+    uint64_t numslots)
+{
+	struct drr_object_range *drror =
+	    &(dsp->dsa_drr->drr_u.drr_object_range);
+
+	/* we only use this record type for raw sends */
+	ASSERT(BP_IS_PROTECTED(bp));
+	ASSERT(dsp->dsa_featureflags & DMU_BACKUP_FEATURE_RAW);
+	ASSERT3U(BP_GET_COMPRESS(bp), ==, ZIO_COMPRESS_OFF);
+	ASSERT3U(BP_GET_TYPE(bp), ==, DMU_OT_DNODE);
+	ASSERT0(BP_GET_LEVEL(bp));
+
+	if (dsp->dsa_pending_op != PENDING_NONE) {
+		if (dump_record(dsp, NULL, 0) != 0)
+			return (SET_ERROR(EINTR));
+		dsp->dsa_pending_op = PENDING_NONE;
+	}
+
+	bzero(dsp->dsa_drr, sizeof (dmu_replay_record_t));
+	dsp->dsa_drr->drr_type = DRR_OBJECT_RANGE;
+	drror->drr_firstobj = firstobj;
+	drror->drr_numslots = numslots;
+	drror->drr_toguid = dsp->dsa_toguid;
+	if (BP_SHOULD_BYTESWAP(bp))
+		drror->drr_flags |= DRR_RAW_BYTESWAP;
+	zio_crypt_decode_params_bp(bp, drror->drr_salt, drror->drr_iv);
+	zio_crypt_decode_mac_bp(bp, drror->drr_mac);
+
+	if (dump_record(dsp, NULL, 0) != 0)
+		return (SET_ERROR(EINTR));
+	return (0);
+}
+
 static boolean_t
 backup_do_embed(dmu_sendarg_t *dsp, const blkptr_t *bp)
 {
@@ -529,6 +670,7 @@ send_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
 
 	ASSERT(zb->zb_object == DMU_META_DNODE_OBJECT ||
 	    zb->zb_object >= sta->resume.zb_object);
+	ASSERT3P(sta->ds, !=, NULL);
 
 	if (sta->cancel)
 		return (SET_ERROR(EINTR));
@@ -601,6 +743,18 @@ do_dump(dmu_sendarg_t *dsa, struct send_block_record *data)
 	ASSERT(zb->zb_object == DMU_META_DNODE_OBJECT ||
 	    zb->zb_object >= dsa->dsa_resume_object);
 
+	/*
+	 * All bps of an encrypted os should have the encryption bit set.
+	 * If this is not true it indicates tampering and we report an error.
+	 */
+	if (dsa->dsa_os->os_encrypted &&
+	    !BP_IS_HOLE(bp) && !BP_USES_CRYPT(bp)) {
+		spa_log_error(spa, zb);
+		zfs_panic_recover("unencrypted block in encrypted "
+		    "object set %llu", ds->ds_object);
+		return (SET_ERROR(EIO));
+	}
+
 	if (zb->zb_object != DMU_META_DNODE_OBJECT &&
 	    DMU_OBJECT_IS_SPECIAL(zb->zb_object)) {
 		return (0);
@@ -612,40 +766,66 @@ do_dump(dmu_sendarg_t *dsa, struct send_block_record *data)
 	} else if (BP_IS_HOLE(bp)) {
 		uint64_t span = BP_SPAN(dblkszsec, indblkshift, zb->zb_level);
 		uint64_t offset = zb->zb_blkid * span;
-		err = dump_free(dsa, zb->zb_object, offset, span);
+		/* Don't dump free records for offsets > DMU_OBJECT_END */
+		if (zb->zb_blkid == 0 || span <= DMU_OBJECT_END / zb->zb_blkid)
+			err = dump_free(dsa, zb->zb_object, offset, span);
 	} else if (zb->zb_level > 0 || type == DMU_OT_OBJSET) {
 		return (0);
 	} else if (type == DMU_OT_DNODE) {
 		int epb = BP_GET_LSIZE(bp) >> DNODE_SHIFT;
 		arc_flags_t aflags = ARC_FLAG_WAIT;
 		arc_buf_t *abuf;
+		enum zio_flag zioflags = ZIO_FLAG_CANFAIL;
+
+		if (dsa->dsa_featureflags & DMU_BACKUP_FEATURE_RAW) {
+			ASSERT(BP_IS_ENCRYPTED(bp));
+			ASSERT3U(BP_GET_COMPRESS(bp), ==, ZIO_COMPRESS_OFF);
+			zioflags |= ZIO_FLAG_RAW;
+		}
 
 		ASSERT0(zb->zb_level);
 
 		if (arc_read(NULL, spa, bp, arc_getbuf_func, &abuf,
-		    ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL,
-		    &aflags, zb) != 0)
+		    ZIO_PRIORITY_ASYNC_READ, zioflags, &aflags, zb) != 0)
 			return (SET_ERROR(EIO));
 
 		dnode_phys_t *blk = abuf->b_data;
 		uint64_t dnobj = zb->zb_blkid * epb;
-		for (int i = 0; i < epb; i += blk[i].dn_extra_slots + 1) {
-			err = dump_dnode(dsa, dnobj + i, blk + i);
-			if (err != 0)
-				break;
+
+		/*
+		 * Raw sends require sending encryption parameters for the
+		 * block of dnodes. Regular sends do not need to send this
+		 * info.
+		 */
+		if (dsa->dsa_featureflags & DMU_BACKUP_FEATURE_RAW) {
+			ASSERT(arc_is_encrypted(abuf));
+			err = dump_object_range(dsa, bp, dnobj, epb);
+		}
+
+		if (err == 0) {
+			for (int i = 0; i < epb;
+			    i += blk[i].dn_extra_slots + 1) {
+				err = dump_dnode(dsa, bp, dnobj + i, blk + i);
+				if (err != 0)
+					break;
+			}
 		}
 		arc_buf_destroy(abuf, &abuf);
 	} else if (type == DMU_OT_SA) {
 		arc_flags_t aflags = ARC_FLAG_WAIT;
 		arc_buf_t *abuf;
-		int blksz = BP_GET_LSIZE(bp);
+		enum zio_flag zioflags = ZIO_FLAG_CANFAIL;
+
+		if (dsa->dsa_featureflags & DMU_BACKUP_FEATURE_RAW) {
+			ASSERT(BP_IS_PROTECTED(bp));
+			zioflags |= ZIO_FLAG_RAW;
+		}
 
 		if (arc_read(NULL, spa, bp, arc_getbuf_func, &abuf,
-		    ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL,
-		    &aflags, zb) != 0)
+		    ZIO_PRIORITY_ASYNC_READ, zioflags, &aflags, zb) != 0)
 			return (SET_ERROR(EIO));
 
-		err = dump_spill(dsa, zb->zb_object, blksz, abuf->b_data);
+		err = dump_spill(dsa, bp, zb->zb_object, abuf->b_data);
 		arc_buf_destroy(abuf, &abuf);
 	} else if (backup_do_embed(dsa, bp)) {
 		/* it's an embedded level-0 block of a regular object */
@@ -667,6 +847,14 @@ do_dump(dmu_sendarg_t *dsa, struct send_block_record *data)
 		 */
 		boolean_t split_large_blocks = blksz > SPA_OLD_MAXBLOCKSIZE &&
 		    !(dsa->dsa_featureflags & DMU_BACKUP_FEATURE_LARGE_BLOCKS);
+
+		/*
+		 * Raw sends require that we always get raw data as it exists
+		 * on disk, so we assert that we are not splitting blocks here.
+		 */
+		boolean_t request_raw =
+		    (dsa->dsa_featureflags & DMU_BACKUP_FEATURE_RAW) != 0;
+
 		/*
 		 * We should only request compressed data from the ARC if all
 		 * the following are true:
@@ -682,6 +870,8 @@ do_dump(dmu_sendarg_t *dsa, struct send_block_record *data)
 		    !split_large_blocks && !BP_SHOULD_BYTESWAP(bp) &&
 		    !BP_IS_EMBEDDED(bp) && !DMU_OT_IS_METADATA(BP_GET_TYPE(bp));
 
+		IMPLY(request_raw, !split_large_blocks);
+		IMPLY(request_raw, BP_IS_PROTECTED(bp));
 		ASSERT0(zb->zb_level);
 		ASSERT(zb->zb_object > dsa->dsa_resume_object ||
 		    (zb->zb_object == dsa->dsa_resume_object &&
@@ -695,8 +885,11 @@ do_dump(dmu_sendarg_t *dsa, struct send_block_record *data)
 		ASSERT3U(blksz, ==, BP_GET_LSIZE(bp));
 
 		enum zio_flag zioflags = ZIO_FLAG_CANFAIL;
-		if (request_compressed)
+		if (request_raw)
 			zioflags |= ZIO_FLAG_RAW;
+		else if (request_compressed)
+			zioflags |= ZIO_FLAG_RAW_COMPRESS;
+
 		if (arc_read(NULL, spa, bp, arc_getbuf_func, &abuf,
 		    ZIO_PRIORITY_ASYNC_READ, zioflags, &aflags, zb) != 0) {
 			if (zfs_send_corrupt_data) {
@@ -716,6 +909,7 @@ do_dump(dmu_sendarg_t *dsa, struct send_block_record *data)
 		offset = zb->zb_blkid * blksz;
 
 		if (split_large_blocks) {
+			ASSERT0(arc_is_encrypted(abuf));
 			ASSERT3U(arc_get_compression(abuf), ==,
 			    ZIO_COMPRESS_OFF);
 			char *buf = abuf->b_data;
@@ -758,7 +952,7 @@ static int
 dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *to_ds,
     zfs_bookmark_phys_t *ancestor_zb, boolean_t is_clone,
     boolean_t embedok, boolean_t large_block_ok, boolean_t compressok,
-    int outfd, uint64_t resumeobj, uint64_t resumeoff,
+    boolean_t rawok, int outfd, uint64_t resumeobj, uint64_t resumeoff,
     vnode_t *vp, offset_t *off)
 {
 	objset_t *os;
@@ -775,6 +969,28 @@ dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *to_ds,
 		return (err);
 	}
 
+	/*
+	 * If this is a non-raw send of an encrypted ds, we can ensure that
+	 * the objset_phys_t is authenticated. This is safe because this is
+	 * either a snapshot or we have owned the dataset, ensuring that
+	 * it can't be modified.
+	 */
+	if (!rawok && os->os_encrypted &&
+	    arc_is_unauthenticated(os->os_phys_buf)) {
+		zbookmark_phys_t zb;
+
+		SET_BOOKMARK(&zb, to_ds->ds_object, ZB_ROOT_OBJECT,
+		    ZB_ROOT_LEVEL, ZB_ROOT_BLKID);
+		err = arc_untransform(os->os_phys_buf, os->os_spa,
+		    &zb, B_FALSE);
+		if (err != 0) {
+			dsl_pool_rele(dp, tag);
+			return (err);
+		}
+
+		ASSERT0(arc_is_unauthenticated(os->os_phys_buf));
+	}
+
 	drr = kmem_zalloc(sizeof (dmu_replay_record_t), KM_SLEEP);
 	drr->drr_type = DRR_BEGIN;
 	drr->drr_u.drr_begin.drr_magic = DMU_BACKUP_MAGIC;
@@ -795,22 +1011,29 @@ dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *to_ds,
 	}
 #endif
 
-	if (large_block_ok && to_ds->ds_feature_inuse[SPA_FEATURE_LARGE_BLOCKS])
+	/* raw sends imply large_block_ok */
+	if ((large_block_ok || rawok) &&
+	    to_ds->ds_feature_inuse[SPA_FEATURE_LARGE_BLOCKS])
 		featureflags |= DMU_BACKUP_FEATURE_LARGE_BLOCKS;
 	if (to_ds->ds_feature_inuse[SPA_FEATURE_LARGE_DNODE])
 		featureflags |= DMU_BACKUP_FEATURE_LARGE_DNODE;
-	if (embedok &&
+
+	/* encrypted datasets will not have embedded blocks */
+	if ((embedok || rawok) && !os->os_encrypted &&
 	    spa_feature_is_active(dp->dp_spa, SPA_FEATURE_EMBEDDED_DATA)) {
 		featureflags |= DMU_BACKUP_FEATURE_EMBED_DATA;
-		if (spa_feature_is_active(dp->dp_spa, SPA_FEATURE_LZ4_COMPRESS))
-			featureflags |= DMU_BACKUP_FEATURE_LZ4;
 	}
-	if (compressok) {
+
+	/* raw send implies compressok */
+	if (compressok || rawok)
 		featureflags |= DMU_BACKUP_FEATURE_COMPRESSED;
-	}
+	if (rawok && os->os_encrypted)
+		featureflags |= DMU_BACKUP_FEATURE_RAW;
+
 	if ((featureflags &
-	    (DMU_BACKUP_FEATURE_EMBED_DATA | DMU_BACKUP_FEATURE_COMPRESSED)) !=
-	    0 && spa_feature_is_active(dp->dp_spa, SPA_FEATURE_LZ4_COMPRESS)) {
+	    (DMU_BACKUP_FEATURE_EMBED_DATA | DMU_BACKUP_FEATURE_COMPRESSED |
+	    DMU_BACKUP_FEATURE_RAW)) != 0 &&
+	    spa_feature_is_active(dp->dp_spa, SPA_FEATURE_LZ4_COMPRESS)) {
 		featureflags |= DMU_BACKUP_FEATURE_LZ4;
 	}
 
@@ -832,6 +1055,8 @@ dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *to_ds,
 	if (zfs_send_set_freerecords_bit)
 		drr->drr_u.drr_begin.drr_flags |= DRR_FLAG_FREERECORDS;
 
+	drr->drr_u.drr_begin.drr_flags |= DRR_FLAG_SPILL_BLOCK;
+
 	if (ancestor_zb != NULL) {
 		drr->drr_u.drr_begin.drr_fromguid =
 		    ancestor_zb->zbm_guid;
@@ -852,6 +1077,7 @@ dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *to_ds,
 	dsp->dsa_os = os;
 	dsp->dsa_off = off;
 	dsp->dsa_toguid = dsl_dataset_phys(to_ds)->ds_guid;
+	dsp->dsa_fromtxg = fromtxg;
 	dsp->dsa_pending_op = PENDING_NONE;
 	dsp->dsa_featureflags = featureflags;
 	dsp->dsa_resume_object = resumeobj;
@@ -866,19 +1092,47 @@ dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *to_ds,
 
 	void *payload = NULL;
 	size_t payload_len = 0;
-	if (resumeobj != 0 || resumeoff != 0) {
-		dmu_object_info_t to_doi;
-		err = dmu_object_info(os, resumeobj, &to_doi);
-		if (err != 0)
-			goto out;
-		SET_BOOKMARK(&to_arg.resume, to_ds->ds_object, resumeobj, 0,
-		    resumeoff / to_doi.doi_data_block_size);
-
+	/* handle features that require a DRR_BEGIN payload */
+	if (featureflags &
+	    (DMU_BACKUP_FEATURE_RESUMING | DMU_BACKUP_FEATURE_RAW)) {
+		nvlist_t *keynvl = NULL;
 		nvlist_t *nvl = fnvlist_alloc();
-		fnvlist_add_uint64(nvl, "resume_object", resumeobj);
-		fnvlist_add_uint64(nvl, "resume_offset", resumeoff);
+
+		if (featureflags & DMU_BACKUP_FEATURE_RESUMING) {
+			dmu_object_info_t to_doi;
+			err = dmu_object_info(os, resumeobj, &to_doi);
+			if (err != 0) {
+				fnvlist_free(nvl);
+				goto out;
+			}
+
+			SET_BOOKMARK(&to_arg.resume, to_ds->ds_object,
+			    resumeobj, 0,
+			    resumeoff / to_doi.doi_data_block_size);
+
+			fnvlist_add_uint64(nvl, "resume_object", resumeobj);
+			fnvlist_add_uint64(nvl, "resume_offset", resumeoff);
+		}
+
+		if (featureflags & DMU_BACKUP_FEATURE_RAW) {
+			uint64_t ivset_guid = (ancestor_zb != NULL) ?
+			    ancestor_zb->zbm_ivset_guid : 0;
+
+			ASSERT(os->os_encrypted);
+
+			err = dsl_crypto_populate_key_nvlist(to_ds,
+			    ivset_guid, &keynvl);
+			if (err != 0) {
+				fnvlist_free(nvl);
+				goto out;
+			}
+
+			fnvlist_add_nvlist(nvl, "crypt_keydata", keynvl);
+		}
+
 		payload = fnvlist_pack(nvl, &payload_len);
 		drr->drr_payloadlen = payload_len;
+		fnvlist_free(keynvl);
 		fnvlist_free(nvl);
 	}
 
@@ -896,6 +1150,8 @@ dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *to_ds,
 	to_arg.ds = to_ds;
 	to_arg.fromtxg = fromtxg;
 	to_arg.flags = TRAVERSE_PRE | TRAVERSE_PREFETCH;
+	if (rawok)
+		to_arg.flags |= TRAVERSE_NO_DECRYPT;
 	(void) thread_create(NULL, 0, send_traverse_thread, &to_arg, 0, curproc,
 	    TS_RUN, minclsyspri);
 
@@ -942,7 +1198,6 @@ dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *to_ds,
 
 	if (dump_record(dsp, NULL, 0) != 0)
 		err = dsp->dsa_err;
-
 out:
 	mutex_enter(&to_ds->ds_sendstream_lock);
 	list_remove(&to_ds->ds_sendstreams, dsp);
@@ -961,60 +1216,77 @@ out:
 int
 dmu_send_obj(const char *pool, uint64_t tosnap, uint64_t fromsnap,
     boolean_t embedok, boolean_t large_block_ok, boolean_t compressok,
-    int outfd, vnode_t *vp, offset_t *off)
+    boolean_t rawok, int outfd, vnode_t *vp, offset_t *off)
 {
 	dsl_pool_t *dp;
 	dsl_dataset_t *ds;
 	dsl_dataset_t *fromds = NULL;
+	ds_hold_flags_t dsflags = (rawok) ? 0 : DS_HOLD_FLAG_DECRYPT;
 	int err;
 
 	err = dsl_pool_hold(pool, FTAG, &dp);
 	if (err != 0)
 		return (err);
 
-	err = dsl_dataset_hold_obj(dp, tosnap, FTAG, &ds);
+	err = dsl_dataset_hold_obj_flags(dp, tosnap, dsflags, FTAG, &ds);
 	if (err != 0) {
 		dsl_pool_rele(dp, FTAG);
 		return (err);
 	}
 
 	if (fromsnap != 0) {
-		zfs_bookmark_phys_t zb;
+		zfs_bookmark_phys_t zb = { 0 };
 		boolean_t is_clone;
 
 		err = dsl_dataset_hold_obj(dp, fromsnap, FTAG, &fromds);
 		if (err != 0) {
-			dsl_dataset_rele(ds, FTAG);
+			dsl_dataset_rele_flags(ds, dsflags, FTAG);
 			dsl_pool_rele(dp, FTAG);
 			return (err);
 		}
-		if (!dsl_dataset_is_before(ds, fromds, 0))
+		if (!dsl_dataset_is_before(ds, fromds, 0)) {
 			err = SET_ERROR(EXDEV);
+			dsl_dataset_rele(fromds, FTAG);
+			dsl_dataset_rele_flags(ds, dsflags, FTAG);
+			dsl_pool_rele(dp, FTAG);
+			return (err);
+		}
+
 		zb.zbm_creation_time =
 		    dsl_dataset_phys(fromds)->ds_creation_time;
 		zb.zbm_creation_txg = dsl_dataset_phys(fromds)->ds_creation_txg;
 		zb.zbm_guid = dsl_dataset_phys(fromds)->ds_guid;
+
+		if (dsl_dataset_is_zapified(fromds)) {
+			(void) zap_lookup(dp->dp_meta_objset,
+			    fromds->ds_object, DS_FIELD_IVSET_GUID, 8, 1,
+			    &zb.zbm_ivset_guid);
+		}
+
 		is_clone = (fromds->ds_dir != ds->ds_dir);
 		dsl_dataset_rele(fromds, FTAG);
 		err = dmu_send_impl(FTAG, dp, ds, &zb, is_clone,
-		    embedok, large_block_ok, compressok, outfd, 0, 0, vp, off);
+		    embedok, large_block_ok, compressok, rawok, outfd,
+		    0, 0, vp, off);
 	} else {
 		err = dmu_send_impl(FTAG, dp, ds, NULL, B_FALSE,
-		    embedok, large_block_ok, compressok, outfd, 0, 0, vp, off);
+		    embedok, large_block_ok, compressok, rawok, outfd,
+		    0, 0, vp, off);
 	}
-	dsl_dataset_rele(ds, FTAG);
+	dsl_dataset_rele_flags(ds, dsflags, FTAG);
 	return (err);
 }
 
 int
 dmu_send(const char *tosnap, const char *fromsnap, boolean_t embedok,
-    boolean_t large_block_ok, boolean_t compressok, int outfd,
-    uint64_t resumeobj, uint64_t resumeoff,
-    vnode_t *vp, offset_t *off)
+    boolean_t large_block_ok, boolean_t compressok, boolean_t rawok,
+    int outfd, uint64_t resumeobj, uint64_t resumeoff, vnode_t *vp,
+    offset_t *off)
 {
 	dsl_pool_t *dp;
 	dsl_dataset_t *ds;
 	int err;
+	ds_hold_flags_t dsflags = (rawok) ? 0 : DS_HOLD_FLAG_DECRYPT;
 	boolean_t owned = B_FALSE;
 
 	if (fromsnap != NULL && strpbrk(fromsnap, "@#") == NULL)
@@ -1029,10 +1301,10 @@ dmu_send(const char *tosnap, const char *fromsnap, boolean_t embedok,
 		 * We are sending a filesystem or volume.  Ensure
 		 * that it doesn't change by owning the dataset.
 		 */
-		err = dsl_dataset_own(dp, tosnap, FTAG, &ds);
+		err = dsl_dataset_own(dp, tosnap, dsflags, FTAG, &ds);
 		owned = B_TRUE;
 	} else {
-		err = dsl_dataset_hold(dp, tosnap, FTAG, &ds);
+		err = dsl_dataset_hold_flags(dp, tosnap, dsflags, FTAG, &ds);
 	}
 	if (err != 0) {
 		dsl_pool_rele(dp, FTAG);
@@ -1040,7 +1312,7 @@ dmu_send(const char *tosnap, const char *fromsnap, boolean_t embedok,
 	}
 
 	if (fromsnap != NULL) {
-		zfs_bookmark_phys_t zb;
+		zfs_bookmark_phys_t zb = { 0 };
 		boolean_t is_clone = B_FALSE;
 		int fsnamelen = strchr(tosnap, '@') - tosnap;
 
@@ -1066,28 +1338,40 @@ dmu_send(const char *tosnap, const char *fromsnap, boolean_t embedok,
 				    dsl_dataset_phys(fromds)->ds_creation_txg;
 				zb.zbm_guid = dsl_dataset_phys(fromds)->ds_guid;
 				is_clone = (ds->ds_dir != fromds->ds_dir);
+
+				if (dsl_dataset_is_zapified(fromds)) {
+					(void) zap_lookup(dp->dp_meta_objset,
+					    fromds->ds_object,
+					    DS_FIELD_IVSET_GUID, 8, 1,
+					    &zb.zbm_ivset_guid);
+				}
 				dsl_dataset_rele(fromds, FTAG);
 			}
 		} else {
 			err = dsl_bookmark_lookup(dp, fromsnap, ds, &zb);
 		}
 		if (err != 0) {
-			dsl_dataset_rele(ds, FTAG);
+			if (owned)
+				dsl_dataset_disown(ds, dsflags, FTAG);
+			else
+				dsl_dataset_rele_flags(ds, dsflags, FTAG);
+
 			dsl_pool_rele(dp, FTAG);
 			return (err);
 		}
 		err = dmu_send_impl(FTAG, dp, ds, &zb, is_clone,
-		    embedok, large_block_ok, compressok,
+		    embedok, large_block_ok, compressok, rawok,
 		    outfd, resumeobj, resumeoff, vp, off);
 	} else {
 		err = dmu_send_impl(FTAG, dp, ds, NULL, B_FALSE,
-		    embedok, large_block_ok, compressok,
+		    embedok, large_block_ok, compressok, rawok,
 		    outfd, resumeobj, resumeoff, vp, off);
 	}
 	if (owned)
-		dsl_dataset_disown(ds, FTAG);
+		dsl_dataset_disown(ds, dsflags, FTAG);
 	else
-		dsl_dataset_rele(ds, FTAG);
+		dsl_dataset_rele_flags(ds, dsflags, FTAG);
+
 	return (err);
 }
 
@@ -1242,7 +1526,8 @@ dmu_send_estimate_from_txg(dsl_dataset_t *ds, uint64_t from_txg,
 	 * traverse the blocks of the snapshot with birth times after
 	 * from_txg, summing their uncompressed size
 	 */
-	err = traverse_dataset(ds, from_txg, TRAVERSE_POST,
+	err = traverse_dataset(ds, from_txg,
+	    TRAVERSE_POST | TRAVERSE_NO_DECRYPT,
 	    dmu_calculate_send_traversal, &size);
 	if (err)
 		return (err);
diff --git a/usr/src/uts/common/fs/zfs/dmu_traverse.c b/usr/src/uts/common/fs/zfs/dmu_traverse.c
index f57e510530..0547a09498 100644
--- a/usr/src/uts/common/fs/zfs/dmu_traverse.c
+++ b/usr/src/uts/common/fs/zfs/dmu_traverse.c
@@ -131,7 +131,7 @@ traverse_zil(traverse_data_t *td, zil_header_t *zh)
 
 	zilog_t *zilog = zil_alloc(spa_get_dsl(td->td_spa)->dp_meta_objset, zh);
 	(void) zil_parse(zilog, traverse_zil_block, traverse_zil_record, td,
-	    claim_txg);
+	    claim_txg, !(td->td_flags & TRAVERSE_NO_DECRYPT));
 	zil_free(zilog);
 }
 
@@ -179,6 +179,7 @@ traverse_prefetch_metadata(traverse_data_t *td,
     const blkptr_t *bp, const zbookmark_phys_t *zb)
 {
 	arc_flags_t flags = ARC_FLAG_NOWAIT | ARC_FLAG_PREFETCH;
+	int zio_flags = ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE;
 
 	if (!(td->td_flags & TRAVERSE_PREFETCH_METADATA))
 		return;
@@ -194,8 +195,11 @@ traverse_prefetch_metadata(traverse_data_t *td,
 	if (BP_GET_LEVEL(bp) == 0 && BP_GET_TYPE(bp) != DMU_OT_DNODE)
 		return;
 
+	if ((td->td_flags & TRAVERSE_NO_DECRYPT) && BP_IS_PROTECTED(bp))
+		zio_flags |= ZIO_FLAG_RAW;
+
 	(void) arc_read(NULL, td->td_spa, bp, NULL, NULL,
-	    ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb);
+	    ZIO_PRIORITY_ASYNC_READ, zio_flags, &flags, zb);
 }
 
 static boolean_t
@@ -294,6 +298,8 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp,
 		blkptr_t *cbp;
 		int epb = BP_GET_LSIZE(bp) >> SPA_BLKPTRSHIFT;
 
+		ASSERT(!BP_IS_PROTECTED(bp));
+
 		err = arc_read(NULL, td->td_spa, bp, arc_getbuf_func, &buf,
 		    ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb);
 		if (err != 0)
@@ -318,11 +324,18 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp,
 		}
 	} else if (BP_GET_TYPE(bp) == DMU_OT_DNODE) {
 		arc_flags_t flags = ARC_FLAG_WAIT;
+		uint32_t zio_flags = ZIO_FLAG_CANFAIL;
 		int i;
 		int epb = BP_GET_LSIZE(bp) >> DNODE_SHIFT;
 
+		/*
+		 * dnode blocks might have their bonus buffers encrypted, so
+		 * we must be careful to honor TRAVERSE_NO_DECRYPT
+		 */
+		if ((td->td_flags & TRAVERSE_NO_DECRYPT) && BP_IS_PROTECTED(bp))
+			zio_flags |= ZIO_FLAG_RAW;
 		err = arc_read(NULL, td->td_spa, bp, arc_getbuf_func, &buf,
-		    ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb);
+		    ZIO_PRIORITY_ASYNC_READ, zio_flags, &flags, zb);
 		if (err != 0)
 			goto post;
 		dnode_phys_t *child_dnp = buf->b_data;
@@ -340,10 +353,14 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp,
 				break;
 		}
 	} else if (BP_GET_TYPE(bp) == DMU_OT_OBJSET) {
+		uint32_t zio_flags = ZIO_FLAG_CANFAIL;
 		arc_flags_t flags = ARC_FLAG_WAIT;
 
+		if ((td->td_flags & TRAVERSE_NO_DECRYPT) && BP_IS_PROTECTED(bp))
+			zio_flags |= ZIO_FLAG_RAW;
+
 		err = arc_read(NULL, td->td_spa, bp, arc_getbuf_func, &buf,
-		    ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb);
+		    ZIO_PRIORITY_ASYNC_READ, zio_flags, &flags, zb);
 		if (err != 0)
 			goto post;
 
@@ -492,6 +509,7 @@ traverse_prefetcher(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
     const zbookmark_phys_t *zb, const dnode_phys_t *dnp, void *arg)
 {
 	prefetch_data_t *pfd = arg;
+	int zio_flags = ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE;
 	arc_flags_t aflags = ARC_FLAG_NOWAIT | ARC_FLAG_PREFETCH |
 	    ARC_FLAG_PRESCIENT_PREFETCH;
 
@@ -511,8 +529,11 @@ traverse_prefetcher(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
 	cv_broadcast(&pfd->pd_cv);
 	mutex_exit(&pfd->pd_mtx);
 
+	if ((pfd->pd_flags & TRAVERSE_NO_DECRYPT) && BP_IS_PROTECTED(bp))
+		zio_flags |= ZIO_FLAG_RAW;
+
 	(void) arc_read(NULL, spa, bp, NULL, NULL, ZIO_PRIORITY_ASYNC_READ,
-	    ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE, &aflags, zb);
+	    zio_flags, &aflags, zb);
 
 	return (0);
 }
@@ -581,15 +602,22 @@ traverse_impl(spa_t *spa, dsl_dataset_t *ds, uint64_t objset, blkptr_t *rootbp,
 	mutex_init(&pd.pd_mtx, NULL, MUTEX_DEFAULT, NULL);
 	cv_init(&pd.pd_cv, NULL, CV_DEFAULT, NULL);
 
+	SET_BOOKMARK(&czb, td.td_objset,
+	    ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID);
+
 	/* See comment on ZIL traversal in dsl_scan_visitds. */
 	if (ds != NULL && !ds->ds_is_snapshot && !BP_IS_HOLE(rootbp)) {
+		enum zio_flag zio_flags = ZIO_FLAG_CANFAIL;
 		arc_flags_t flags = ARC_FLAG_WAIT;
 		objset_phys_t *osp;
 		arc_buf_t *buf;
 
-		err = arc_read(NULL, td.td_spa, rootbp,
-		    arc_getbuf_func, &buf,
-		    ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, NULL);
+		if ((td.td_flags & TRAVERSE_NO_DECRYPT) &&
+		    BP_IS_PROTECTED(rootbp))
+			zio_flags |= ZIO_FLAG_RAW;
+
+		err = arc_read(NULL, td.td_spa, rootbp, arc_getbuf_func,
+		    &buf, ZIO_PRIORITY_ASYNC_READ, zio_flags, &flags, &czb);
 		if (err != 0)
 			return (err);
 
@@ -603,8 +631,6 @@ traverse_impl(spa_t *spa, dsl_dataset_t *ds, uint64_t objset, blkptr_t *rootbp,
 	    &td, TQ_NOQUEUE) == TASKQID_INVALID)
 		pd.pd_exited = B_TRUE;
 
-	SET_BOOKMARK(&czb, td.td_objset,
-	    ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID);
 	err = traverse_visitbp(&td, NULL, rootbp, &czb);
 
 	mutex_enter(&pd.pd_mtx);
diff --git a/usr/src/uts/common/fs/zfs/dnode.c b/usr/src/uts/common/fs/zfs/dnode.c
index f360eb997e..5a86650d28 100644
--- a/usr/src/uts/common/fs/zfs/dnode.c
+++ b/usr/src/uts/common/fs/zfs/dnode.c
@@ -136,6 +136,7 @@ dnode_cons(void *arg, void *unused, int kmflag)
 	bzero(&dn->dn_rm_spillblk[0], sizeof (dn->dn_rm_spillblk));
 	bzero(&dn->dn_next_bonuslen[0], sizeof (dn->dn_next_bonuslen));
 	bzero(&dn->dn_next_blksz[0], sizeof (dn->dn_next_blksz));
+	bzero(&dn->dn_next_maxblkid[0], sizeof (dn->dn_next_maxblkid));
 
 	for (i = 0; i < TXG_SIZE; i++) {
 		multilist_link_init(&dn->dn_dirty_link[i]);
@@ -196,6 +197,7 @@ dnode_dest(void *arg, void *unused)
 		ASSERT0(dn->dn_rm_spillblk[i]);
 		ASSERT0(dn->dn_next_bonuslen[i]);
 		ASSERT0(dn->dn_next_blksz[i]);
+		ASSERT0(dn->dn_next_maxblkid[i]);
 	}
 
 	ASSERT0(dn->dn_allocated_txg);
@@ -617,6 +619,7 @@ dnode_allocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, int ibs,
 		ASSERT0(dn->dn_next_bonustype[i]);
 		ASSERT0(dn->dn_rm_spillblk[i]);
 		ASSERT0(dn->dn_next_blksz[i]);
+		ASSERT0(dn->dn_next_maxblkid[i]);
 		ASSERT(!multilist_link_active(&dn->dn_dirty_link[i]));
 		ASSERT3P(list_head(&dn->dn_dirty_records[i]), ==, NULL);
 		ASSERT3P(dn->dn_free_ranges[i], ==, NULL);
@@ -659,7 +662,8 @@ dnode_allocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, int ibs,
 
 void
 dnode_reallocate(dnode_t *dn, dmu_object_type_t ot, int blocksize,
-    dmu_object_type_t bonustype, int bonuslen, int dn_slots, dmu_tx_t *tx)
+    dmu_object_type_t bonustype, int bonuslen, int dn_slots,
+    boolean_t keep_spill, dmu_tx_t *tx)
 {
 	int nblkptr;
 
@@ -708,7 +712,7 @@ dnode_reallocate(dnode_t *dn, dmu_object_type_t ot, int blocksize,
 		dn->dn_next_bonustype[tx->tx_txg&TXG_MASK] = bonustype;
 	if (dn->dn_nblkptr != nblkptr)
 		dn->dn_next_nblkptr[tx->tx_txg&TXG_MASK] = nblkptr;
-	if (dn->dn_phys->dn_flags & DNODE_FLAG_SPILL_BLKPTR) {
+	if (dn->dn_phys->dn_flags & DNODE_FLAG_SPILL_BLKPTR && !keep_spill) {
 		dbuf_rm_spill(dn, tx);
 		dnode_rm_spill(dn, tx);
 	}
@@ -785,6 +789,8 @@ dnode_move_impl(dnode_t *odn, dnode_t *ndn)
 	    sizeof (odn->dn_next_bonuslen));
 	bcopy(&odn->dn_next_blksz[0], &ndn->dn_next_blksz[0],
 	    sizeof (odn->dn_next_blksz));
+	bcopy(&odn->dn_next_maxblkid[0], &ndn->dn_next_maxblkid[0],
+	    sizeof (odn->dn_next_maxblkid));
 	for (i = 0; i < TXG_SIZE; i++) {
 		list_move_tail(&ndn->dn_dirty_records[i],
 		    &odn->dn_dirty_records[i]);
@@ -1321,7 +1327,11 @@ dnode_hold_impl(objset_t *os, uint64_t object, int flag, int slots,
 		DNODE_STAT_BUMP(dnode_hold_dbuf_hold);
 		return (SET_ERROR(EIO));
 	}
-	err = dbuf_read(db, NULL, DB_RF_CANFAIL);
+	/*
+	 * We do not need to decrypt to read the dnode so it doesn't matter
+	 * if we get the encrypted or decrypted version.
+	 */
+	err = dbuf_read(db, NULL, DB_RF_CANFAIL | DB_RF_NO_DECRYPT);
 	if (err) {
 		DNODE_STAT_BUMP(dnode_hold_dbuf_read);
 		dbuf_rele(db, FTAG);
@@ -1749,11 +1759,74 @@ fail:
 	return (SET_ERROR(ENOTSUP));
 }
 
+static void
+dnode_set_nlevels_impl(dnode_t *dn, int new_nlevels, dmu_tx_t *tx)
+{
+	uint64_t txgoff = tx->tx_txg & TXG_MASK;
+	int old_nlevels = dn->dn_nlevels;
+	dmu_buf_impl_t *db;
+	list_t *list;
+	dbuf_dirty_record_t *new, *dr, *dr_next;
+
+	ASSERT(RW_WRITE_HELD(&dn->dn_struct_rwlock));
+
+	dn->dn_nlevels = new_nlevels;
+
+	ASSERT3U(new_nlevels, >, dn->dn_next_nlevels[txgoff]);
+	dn->dn_next_nlevels[txgoff] = new_nlevels;
+
+	/* dirty the left indirects */
+	db = dbuf_hold_level(dn, old_nlevels, 0, FTAG);
+	ASSERT(db != NULL);
+	new = dbuf_dirty(db, tx);
+	dbuf_rele(db, FTAG);
+
+	/* transfer the dirty records to the new indirect */
+	mutex_enter(&dn->dn_mtx);
+	mutex_enter(&new->dt.di.dr_mtx);
+	list = &dn->dn_dirty_records[txgoff];
+	for (dr = list_head(list); dr; dr = dr_next) {
+		dr_next = list_next(&dn->dn_dirty_records[txgoff], dr);
+		if (dr->dr_dbuf->db_level != new_nlevels-1 &&
+		    dr->dr_dbuf->db_blkid != DMU_BONUS_BLKID &&
+		    dr->dr_dbuf->db_blkid != DMU_SPILL_BLKID) {
+			ASSERT(dr->dr_dbuf->db_level == old_nlevels-1);
+			list_remove(&dn->dn_dirty_records[txgoff], dr);
+			list_insert_tail(&new->dt.di.dr_children, dr);
+			dr->dr_parent = new;
+		}
+	}
+	mutex_exit(&new->dt.di.dr_mtx);
+	mutex_exit(&dn->dn_mtx);
+}
+
+int
+dnode_set_nlevels(dnode_t *dn, int nlevels, dmu_tx_t *tx)
+{
+	int ret = 0;
+
+	rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
+
+	if (dn->dn_nlevels == nlevels) {
+		ret = 0;
+		goto out;
+	} else if (nlevels < dn->dn_nlevels) {
+		ret = SET_ERROR(EINVAL);
+		goto out;
+	}
+
+	dnode_set_nlevels_impl(dn, nlevels, tx);
+
+out:
+	rw_exit(&dn->dn_struct_rwlock);
+	return (ret);
+}
+
 /* read-holding callers must not rely on the lock being continuously held */
 void
-dnode_new_blkid(dnode_t *dn, uint64_t blkid, dmu_tx_t *tx, boolean_t have_read)
+dnode_new_blkid(dnode_t *dn, uint64_t blkid, dmu_tx_t *tx, boolean_t have_read,
+    boolean_t force)
 {
-	uint64_t txgoff = tx->tx_txg & TXG_MASK;
 	int epbs, new_nlevels;
 	uint64_t sz;
 
@@ -1777,13 +1850,25 @@ dnode_new_blkid(dnode_t *dn, uint64_t blkid, dmu_tx_t *tx, boolean_t have_read)
 		}
 	}
 
-	if (blkid <= dn->dn_maxblkid)
+	/*
+	 * Raw sends (indicated by the force flag) require that we take the
+	 * given blkid even if the value is lower than the current value.
+	 */
+	if (!force && blkid <= dn->dn_maxblkid)
 		goto out;
 
+	/*
+	 * We use the (otherwise unused) top bit of dn_next_maxblkid[txgoff]
+	 * to indicate that this field is set. This allows us to set the
+	 * maxblkid to 0 on an existing object in dnode_sync().
+	 */
 	dn->dn_maxblkid = blkid;
+	dn->dn_next_maxblkid[tx->tx_txg & TXG_MASK] =
+	    blkid | DMU_NEXT_MAXBLKID_SET;
 
 	/*
 	 * Compute the number of levels necessary to support the new maxblkid.
+	 * Raw sends will ensure nlevels is set correctly for us.
 	 */
 	new_nlevels = 1;
 	epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT;
@@ -1791,40 +1876,11 @@ dnode_new_blkid(dnode_t *dn, uint64_t blkid, dmu_tx_t *tx, boolean_t have_read)
 	    sz <= blkid && sz >= dn->dn_nblkptr; sz <<= epbs)
 		new_nlevels++;
 
-	if (new_nlevels > dn->dn_nlevels) {
-		int old_nlevels = dn->dn_nlevels;
-		dmu_buf_impl_t *db;
-		list_t *list;
-		dbuf_dirty_record_t *new, *dr, *dr_next;
-
-		dn->dn_nlevels = new_nlevels;
-
-		ASSERT3U(new_nlevels, >, dn->dn_next_nlevels[txgoff]);
-		dn->dn_next_nlevels[txgoff] = new_nlevels;
-
-		/* dirty the left indirects */
-		db = dbuf_hold_level(dn, old_nlevels, 0, FTAG);
-		ASSERT(db != NULL);
-		new = dbuf_dirty(db, tx);
-		dbuf_rele(db, FTAG);
-
-		/* transfer the dirty records to the new indirect */
-		mutex_enter(&dn->dn_mtx);
-		mutex_enter(&new->dt.di.dr_mtx);
-		list = &dn->dn_dirty_records[txgoff];
-		for (dr = list_head(list); dr; dr = dr_next) {
-			dr_next = list_next(&dn->dn_dirty_records[txgoff], dr);
-			if (dr->dr_dbuf->db_level != new_nlevels-1 &&
-			    dr->dr_dbuf->db_blkid != DMU_BONUS_BLKID &&
-			    dr->dr_dbuf->db_blkid != DMU_SPILL_BLKID) {
-				ASSERT(dr->dr_dbuf->db_level == old_nlevels-1);
-				list_remove(&dn->dn_dirty_records[txgoff], dr);
-				list_insert_tail(&new->dt.di.dr_children, dr);
-				dr->dr_parent = new;
-			}
-		}
-		mutex_exit(&new->dt.di.dr_mtx);
-		mutex_exit(&dn->dn_mtx);
+	if (!force) {
+		if (new_nlevels > dn->dn_nlevels)
+			dnode_set_nlevels_impl(dn, new_nlevels, tx);
+	} else {
+		ASSERT3U(dn->dn_nlevels, >=, new_nlevels);
 	}
 
 out:
@@ -2249,7 +2305,8 @@ dnode_next_offset_level(dnode_t *dn, int flags, uint64_t *offset,
 			 */
 			return (SET_ERROR(ESRCH));
 		}
-		error = dbuf_read(db, NULL, DB_RF_CANFAIL | DB_RF_HAVESTRUCT);
+		error = dbuf_read(db, NULL,
+		    DB_RF_CANFAIL | DB_RF_HAVESTRUCT | DB_RF_NO_DECRYPT);
 		if (error) {
 			dbuf_rele(db, FTAG);
 			return (error);
diff --git a/usr/src/uts/common/fs/zfs/dnode_sync.c b/usr/src/uts/common/fs/zfs/dnode_sync.c
index 9283356608..f5ee8a290d 100644
--- a/usr/src/uts/common/fs/zfs/dnode_sync.c
+++ b/usr/src/uts/common/fs/zfs/dnode_sync.c
@@ -31,6 +31,7 @@
 #include <sys/dmu.h>
 #include <sys/dmu_tx.h>
 #include <sys/dmu_objset.h>
+#include <sys/dmu_recv.h>
 #include <sys/dsl_dataset.h>
 #include <sys/spa.h>
 #include <sys/range_tree.h>
@@ -383,7 +384,21 @@ dnode_sync_free_range_impl(dnode_t *dn, uint64_t blkid, uint64_t nblks,
 		}
 	}
 
-	if (trunc) {
+	/*
+	 * Do not truncate the maxblkid if we are performing a raw
+	 * receive. The raw receive sets the maxblkid manually and
+	 * must not be overridden. Usually, the last DRR_FREE record
+	 * will be at the maxblkid, because the source system sets
+	 * the maxblkid when truncating. However, if the last block
+	 * was freed by overwriting with zeros and being compressed
+	 * away to a hole, the source system will generate a DRR_FREE
+	 * record while leaving the maxblkid after the end of that
+	 * record. In this case we need to leave the maxblkid as
+	 * indicated in the DRR_OBJECT record, so that it matches the
+	 * source system, ensuring that the cryptographic hashes will
+	 * match.
+	 */
+	if (trunc && !dn->dn_objset->os_raw_receive) {
 		dn->dn_phys->dn_maxblkid = blkid == 0 ? 0 : blkid - 1;
 
 		uint64_t off = (dn->dn_phys->dn_maxblkid + 1) *
@@ -545,6 +560,7 @@ dnode_sync_free(dnode_t *dn, dmu_tx_t *tx)
 	dn->dn_next_nlevels[txgoff] = 0;
 	dn->dn_next_indblkshift[txgoff] = 0;
 	dn->dn_next_blksz[txgoff] = 0;
+	dn->dn_next_maxblkid[txgoff] = 0;
 
 	/* ASSERT(blkptrs are zero); */
 	ASSERT(dn->dn_phys->dn_type != DMU_OT_NONE);
@@ -570,7 +586,7 @@ dnode_sync_free(dnode_t *dn, dmu_tx_t *tx)
 	dnode_rele(dn, (void *)(uintptr_t)tx->tx_txg);
 	/*
 	 * Now that we've released our hold, the dnode may
-	 * be evicted, so we musn't access it.
+	 * be evicted, so we mustn't access it.
 	 */
 }
 
@@ -580,6 +596,7 @@ dnode_sync_free(dnode_t *dn, dmu_tx_t *tx)
 void
 dnode_sync(dnode_t *dn, dmu_tx_t *tx)
 {
+	objset_t *os = dn->dn_objset;
 	dnode_phys_t *dnp = dn->dn_phys;
 	int txgoff = tx->tx_txg & TXG_MASK;
 	list_t *list = &dn->dn_dirty_records[txgoff];
@@ -594,8 +611,13 @@ dnode_sync(dnode_t *dn, dmu_tx_t *tx)
 
 	ASSERT(dn->dn_dbuf == NULL || arc_released(dn->dn_dbuf->db_buf));
 
-	if (dmu_objset_userused_enabled(dn->dn_objset) &&
-	    !DMU_OBJECT_IS_SPECIAL(dn->dn_object)) {
+	/*
+	 * Do user accounting if it is enabled and this is not
+	 * an encrypted receive.
+	 */
+	if (dmu_objset_userused_enabled(os) &&
+	    !DMU_OBJECT_IS_SPECIAL(dn->dn_object) &&
+	    (!os->os_encrypted || !dmu_objset_is_receiving(os))) {
 		mutex_enter(&dn->dn_mtx);
 		dn->dn_oldused = DN_USED_BYTES(dn->dn_phys);
 		dn->dn_oldflags = dn->dn_phys->dn_flags;
@@ -603,7 +625,7 @@ dnode_sync(dnode_t *dn, dmu_tx_t *tx)
 		mutex_exit(&dn->dn_mtx);
 		dmu_objset_userquota_get_ids(dn, B_FALSE, tx);
 	} else {
-		/* Once we account for it, we should always account for it. */
+		/* Once we account for it, we should always account for it */
 		ASSERT(!(dn->dn_phys->dn_flags &
 		    DNODE_FLAG_USERUSED_ACCOUNTED));
 	}
@@ -740,6 +762,19 @@ dnode_sync(dnode_t *dn, dmu_tx_t *tx)
 		dn->dn_next_nlevels[txgoff] = 0;
 	}
 
+	/*
+	 * This must be done after dnode_sync_free_range()
+	 * and dnode_increase_indirection(). See dnode_new_blkid()
+	 * for an explanation of the high bit being set.
+	 */
+	if (dn->dn_next_maxblkid[txgoff]) {
+		mutex_enter(&dn->dn_mtx);
+		dnp->dn_maxblkid =
+		    dn->dn_next_maxblkid[txgoff] & ~DMU_NEXT_MAXBLKID_SET;
+		dn->dn_next_maxblkid[txgoff] = 0;
+		mutex_exit(&dn->dn_mtx);
+	}
+
 	if (dn->dn_next_nblkptr[txgoff]) {
 		/* this should only happen on a realloc */
 		ASSERT(dn->dn_allocated_txg == tx->tx_txg);
diff --git a/usr/src/uts/common/fs/zfs/dsl_bookmark.c b/usr/src/uts/common/fs/zfs/dsl_bookmark.c
index 0a58115341..a32198402f 100644
--- a/usr/src/uts/common/fs/zfs/dsl_bookmark.c
+++ b/usr/src/uts/common/fs/zfs/dsl_bookmark.c
@@ -70,6 +70,12 @@ dsl_dataset_bmark_lookup(dsl_dataset_t *ds, const char *shortname,
 	if (dsl_dataset_phys(ds)->ds_flags & DS_FLAG_CI_DATASET)
 		mt = MT_NORMALIZE;
 
+	/*
+	 * Zero out the bookmark in case the one stored on disk
+	 * is in an older, shorter format.
+	 */
+	bzero(bmark_phys, sizeof (*bmark_phys));
+
 	err = zap_lookup_norm(mos, bmark_zapobj, shortname, sizeof (uint64_t),
 	    sizeof (*bmark_phys) / sizeof (uint64_t), bmark_phys, mt,
 	    NULL, 0, NULL);
@@ -188,8 +194,9 @@ dsl_bookmark_create_sync(void *arg, dmu_tx_t *tx)
 	for (nvpair_t *pair = nvlist_next_nvpair(dbca->dbca_bmarks, NULL);
 	    pair != NULL; pair = nvlist_next_nvpair(dbca->dbca_bmarks, pair)) {
 		dsl_dataset_t *snapds, *bmark_fs;
-		zfs_bookmark_phys_t bmark_phys;
+		zfs_bookmark_phys_t bmark_phys = { 0 };
 		char *shortname;
+		uint32_t bmark_len = BOOKMARK_PHYS_SIZE_V1;
 
 		VERIFY0(dsl_dataset_hold(dp, fnvpair_value_string(pair),
 		    FTAG, &snapds));
@@ -214,10 +221,29 @@ dsl_bookmark_create_sync(void *arg, dmu_tx_t *tx)
 		bmark_phys.zbm_creation_time =
 		    dsl_dataset_phys(snapds)->ds_creation_time;
 
+		/*
+		 * If the dataset is encrypted create a larger bookmark to
+		 * accommodate the IVset guid. The IVset guid was added
+		 * after the encryption feature to prevent a problem with
+		 * raw sends. If we encounter an encrypted dataset without
+		 * an IVset guid we fall back to a normal bookmark.
+		 */
+		if (snapds->ds_dir->dd_crypto_obj != 0 &&
+		    spa_feature_is_enabled(dp->dp_spa,
+		    SPA_FEATURE_BOOKMARK_V2)) {
+			int err = zap_lookup(mos, snapds->ds_object,
+			    DS_FIELD_IVSET_GUID, sizeof (uint64_t), 1,
+			    &bmark_phys.zbm_ivset_guid);
+			if (err == 0) {
+				bmark_len = BOOKMARK_PHYS_SIZE_V2;
+				spa_feature_incr(dp->dp_spa,
+				    SPA_FEATURE_BOOKMARK_V2, tx);
+			}
+		}
+
 		VERIFY0(zap_add(mos, bmark_fs->ds_bookmarks,
 		    shortname, sizeof (uint64_t),
-		    sizeof (zfs_bookmark_phys_t) / sizeof (uint64_t),
-		    &bmark_phys, tx));
+		    bmark_len / sizeof (uint64_t), &bmark_phys, tx));
 
 		spa_history_log_internal_ds(bmark_fs, "bookmark", tx,
 		    "name=%s creation_txg=%llu target_snap=%llu",
@@ -267,7 +293,7 @@ dsl_get_bookmarks_impl(dsl_dataset_t *ds, nvlist_t *props, nvlist_t *outnvl)
 	    zap_cursor_retrieve(&zc, &attr) == 0;
 	    zap_cursor_advance(&zc)) {
 		char *bmark_name = attr.za_name;
-		zfs_bookmark_phys_t bmark_phys;
+		zfs_bookmark_phys_t bmark_phys = { 0 };
 
 		err = dsl_dataset_bmark_lookup(ds, bmark_name, &bmark_phys);
 		ASSERT3U(err, !=, ENOENT);
@@ -290,6 +316,11 @@ dsl_get_bookmarks_impl(dsl_dataset_t *ds, nvlist_t *props, nvlist_t *outnvl)
 			dsl_prop_nvlist_add_uint64(out_props,
 			    ZFS_PROP_CREATION, bmark_phys.zbm_creation_time);
 		}
+		if (nvlist_exists(props,
+		    zfs_prop_to_name(ZFS_PROP_IVSET_GUID))) {
+			dsl_prop_nvlist_add_uint64(out_props,
+			    ZFS_PROP_IVSET_GUID, bmark_phys.zbm_ivset_guid);
+		}
 
 		fnvlist_add_nvlist(outnvl, bmark_name, out_props);
 		fnvlist_free(out_props);
@@ -337,13 +368,26 @@ typedef struct dsl_bookmark_destroy_arg {
 static int
 dsl_dataset_bookmark_remove(dsl_dataset_t *ds, const char *name, dmu_tx_t *tx)
 {
+	int err;
 	objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
 	uint64_t bmark_zapobj = ds->ds_bookmarks;
 	matchtype_t mt = 0;
+	uint64_t int_size, num_ints;
 
 	if (dsl_dataset_phys(ds)->ds_flags & DS_FLAG_CI_DATASET)
 		mt = MT_NORMALIZE;
 
+	err = zap_length(mos, bmark_zapobj, name, &int_size, &num_ints);
+	if (err != 0)
+		return (err);
+
+	ASSERT3U(int_size, ==, sizeof (uint64_t));
+
+	if (num_ints * int_size > BOOKMARK_PHYS_SIZE_V1) {
+		spa_feature_decr(dmu_objset_spa(mos),
+		    SPA_FEATURE_BOOKMARK_V2, tx);
+	}
+
 	return (zap_remove_norm(mos, bmark_zapobj, name, mt, tx));
 }
 
diff --git a/usr/src/uts/common/fs/zfs/dsl_crypt.c b/usr/src/uts/common/fs/zfs/dsl_crypt.c
new file mode 100644
index 0000000000..3937d3cb51
--- /dev/null
+++ b/usr/src/uts/common/fs/zfs/dsl_crypt.c
@@ -0,0 +1,2898 @@
+/*
+ * CDDL HEADER START
+ *
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2017, Datto, Inc. All rights reserved.
+ */
+
+#include <sys/dsl_crypt.h>
+#include <sys/dsl_pool.h>
+#include <sys/zap.h>
+#include <sys/zil.h>
+#include <sys/dsl_dir.h>
+#include <sys/dsl_prop.h>
+#include <sys/spa_impl.h>
+#include <sys/dmu_objset.h>
+#include <sys/zvol.h>
+
+/*
+ * This file's primary purpose is for managing master encryption keys in
+ * memory and on disk. For more info on how these keys are used, see the
+ * block comment in zio_crypt.c.
+ *
+ * All master keys are stored encrypted on disk in the form of the DSL
+ * Crypto Key ZAP object. The binary key data in this object is always
+ * randomly generated and is encrypted with the user's wrapping key. This
+ * layer of indirection allows the user to change their key without
+ * needing to re-encrypt the entire dataset. The ZAP also holds on to the
+ * (non-encrypted) encryption algorithm identifier, IV, and MAC needed to
+ * safely decrypt the master key. For more info on the user's key see the
+ * block comment in libzfs_crypto.c
+ *
+ * In-memory encryption keys are managed through the spa_keystore. The
+ * keystore consists of 3 AVL trees, which are as follows:
+ *
+ * The Wrapping Key Tree:
+ * The wrapping key (wkey) tree stores the user's keys that are fed into the
+ * kernel through 'zfs load-key' and related commands. Datasets inherit their
+ * parent's wkey by default, so these structures are refcounted. The wrapping
+ * keys remain in memory until they are explicitly unloaded (with
+ * "zfs unload-key"). Unloading is only possible when no datasets are using
+ * them (refcount=0).
+ *
+ * The DSL Crypto Key Tree:
+ * The DSL Crypto Keys (DCK) are the in-memory representation of decrypted
+ * master keys. They are used by the functions in zio_crypt.c to perform
+ * encryption, decryption, and authentication. Snapshots and clones of a given
+ * dataset will share a DSL Crypto Key, so they are also refcounted. Once the
+ * refcount on a key hits zero, it is immediately zeroed out and freed.
+ *
+ * The Crypto Key Mapping Tree:
+ * The zio layer needs to lookup master keys by their dataset object id. Since
+ * the DSL Crypto Keys can belong to multiple datasets, we maintain a tree of
+ * dsl_key_mapping_t's which essentially just map the dataset object id to its
+ * appropriate DSL Crypto Key. The management for creating and destroying these
+ * mappings hooks into the code for owning and disowning datasets. Usually,
+ * there will only be one active dataset owner, but there are times
+ * (particularly during dataset creation and destruction) when this may not be
+ * true or the dataset may not be initialized enough to own. As a result, this
+ * object is also refcounted.
+ */
+
+/*
+ * This tunable allows datasets to be raw received even if the stream does
+ * not include IVset guids or if the guids don't match. This is used as part
+ * of the resolution for ZPOOL_ERRATA_ZOL_8308_ENCRYPTION.
+ */
+int zfs_disable_ivset_guid_check = 0;
+
+static void
+dsl_wrapping_key_hold(dsl_wrapping_key_t *wkey, void *tag)
+{
+	(void) zfs_refcount_add(&wkey->wk_refcnt, tag);
+}
+
+static void
+dsl_wrapping_key_rele(dsl_wrapping_key_t *wkey, void *tag)
+{
+	(void) zfs_refcount_remove(&wkey->wk_refcnt, tag);
+}
+
+static void
+dsl_wrapping_key_free(dsl_wrapping_key_t *wkey)
+{
+	ASSERT0(zfs_refcount_count(&wkey->wk_refcnt));
+
+	if (wkey->wk_key.ck_data) {
+		bzero(wkey->wk_key.ck_data,
+		    CRYPTO_BITS2BYTES(wkey->wk_key.ck_length));
+		kmem_free(wkey->wk_key.ck_data,
+		    CRYPTO_BITS2BYTES(wkey->wk_key.ck_length));
+	}
+
+	zfs_refcount_destroy(&wkey->wk_refcnt);
+	kmem_free(wkey, sizeof (dsl_wrapping_key_t));
+}
+
+static int
+dsl_wrapping_key_create(uint8_t *wkeydata, zfs_keyformat_t keyformat,
+    uint64_t salt, uint64_t iters, dsl_wrapping_key_t **wkey_out)
+{
+	int ret;
+	dsl_wrapping_key_t *wkey;
+
+	/* allocate the wrapping key */
+	wkey = kmem_alloc(sizeof (dsl_wrapping_key_t), KM_SLEEP);
+	if (!wkey)
+		return (SET_ERROR(ENOMEM));
+
+	/* allocate and initialize the underlying crypto key */
+	wkey->wk_key.ck_data = kmem_alloc(WRAPPING_KEY_LEN, KM_SLEEP);
+	if (!wkey->wk_key.ck_data) {
+		ret = SET_ERROR(ENOMEM);
+		goto error;
+	}
+
+	wkey->wk_key.ck_format = CRYPTO_KEY_RAW;
+	wkey->wk_key.ck_length = CRYPTO_BYTES2BITS(WRAPPING_KEY_LEN);
+	bcopy(wkeydata, wkey->wk_key.ck_data, WRAPPING_KEY_LEN);
+
+	/* initialize the rest of the struct */
+	zfs_refcount_create(&wkey->wk_refcnt);
+	wkey->wk_keyformat = keyformat;
+	wkey->wk_salt = salt;
+	wkey->wk_iters = iters;
+
+	*wkey_out = wkey;
+	return (0);
+
+error:
+	dsl_wrapping_key_free(wkey);
+
+	*wkey_out = NULL;
+	return (ret);
+}
+
+int
+dsl_crypto_params_create_nvlist(dcp_cmd_t cmd, nvlist_t *props,
+    nvlist_t *crypto_args, dsl_crypto_params_t **dcp_out)
+{
+	int ret;
+	uint64_t crypt = ZIO_CRYPT_INHERIT;
+	uint64_t keyformat = ZFS_KEYFORMAT_NONE;
+	uint64_t salt = 0, iters = 0;
+	dsl_crypto_params_t *dcp = NULL;
+	dsl_wrapping_key_t *wkey = NULL;
+	uint8_t *wkeydata = NULL;
+	uint_t wkeydata_len = 0;
+	char *keylocation = NULL;
+
+	dcp = kmem_zalloc(sizeof (dsl_crypto_params_t), KM_SLEEP);
+	if (!dcp) {
+		ret = SET_ERROR(ENOMEM);
+		goto error;
+	}
+
+	/* get relevant properties from the nvlist */
+	dcp->cp_cmd = cmd;
+
+	/* get relevant arguments from the nvlists */
+	if (props != NULL) {
+		(void) nvlist_lookup_uint64(props,
+		    zfs_prop_to_name(ZFS_PROP_ENCRYPTION), &crypt);
+		(void) nvlist_lookup_uint64(props,
+		    zfs_prop_to_name(ZFS_PROP_KEYFORMAT), &keyformat);
+		(void) nvlist_lookup_string(props,
+		    zfs_prop_to_name(ZFS_PROP_KEYLOCATION), &keylocation);
+		(void) nvlist_lookup_uint64(props,
+		    zfs_prop_to_name(ZFS_PROP_PBKDF2_SALT), &salt);
+		(void) nvlist_lookup_uint64(props,
+		    zfs_prop_to_name(ZFS_PROP_PBKDF2_ITERS), &iters);
+		dcp->cp_crypt = crypt;
+	}
+
+	if (crypto_args != NULL) {
+		(void) nvlist_lookup_uint8_array(crypto_args, "wkeydata",
+		    &wkeydata, &wkeydata_len);
+	}
+
+	/* check for valid command */
+	if (dcp->cp_cmd >= DCP_CMD_MAX) {
+		ret = SET_ERROR(EINVAL);
+		goto error;
+	} else {
+		dcp->cp_cmd = cmd;
+	}
+
+	/* check for valid crypt */
+	if (dcp->cp_crypt >= ZIO_CRYPT_FUNCTIONS) {
+		ret = SET_ERROR(EINVAL);
+		goto error;
+	} else {
+		dcp->cp_crypt = crypt;
+	}
+
+	/* check for valid keyformat */
+	if (keyformat >= ZFS_KEYFORMAT_FORMATS) {
+		ret = SET_ERROR(EINVAL);
+		goto error;
+	}
+
+	/* check for a valid keylocation (of any kind) and copy it in */
+	if (keylocation != NULL) {
+		if (!zfs_prop_valid_keylocation(keylocation, B_FALSE)) {
+			ret = SET_ERROR(EINVAL);
+			goto error;
+		}
+
+		dcp->cp_keylocation = spa_strdup(keylocation);
+	}
+
+	/* check wrapping key length, if given */
+	if (wkeydata != NULL && wkeydata_len != WRAPPING_KEY_LEN) {
+		ret = SET_ERROR(EINVAL);
+		goto error;
+	}
+
+	/* if the user asked for the deault crypt, determine that now */
+	if (dcp->cp_crypt == ZIO_CRYPT_ON)
+		dcp->cp_crypt = ZIO_CRYPT_ON_VALUE;
+
+	/* create the wrapping key from the raw data */
+	if (wkeydata != NULL) {
+		/* create the wrapping key with the verified parameters */
+		ret = dsl_wrapping_key_create(wkeydata, keyformat, salt,
+		    iters, &wkey);
+		if (ret != 0)
+			goto error;
+
+		dcp->cp_wkey = wkey;
+	}
+
+	/*
+	 * Remove the encryption properties from the nvlist since they are not
+	 * maintained through the DSL.
+	 */
+	(void) nvlist_remove_all(props, zfs_prop_to_name(ZFS_PROP_ENCRYPTION));
+	(void) nvlist_remove_all(props, zfs_prop_to_name(ZFS_PROP_KEYFORMAT));
+	(void) nvlist_remove_all(props, zfs_prop_to_name(ZFS_PROP_PBKDF2_SALT));
+	(void) nvlist_remove_all(props,
+	    zfs_prop_to_name(ZFS_PROP_PBKDF2_ITERS));
+
+	*dcp_out = dcp;
+
+	return (0);
+
+error:
+	if (wkey != NULL)
+		dsl_wrapping_key_free(wkey);
+	if (dcp != NULL)
+		kmem_free(dcp, sizeof (dsl_crypto_params_t));
+
+	*dcp_out = NULL;
+	return (ret);
+}
+
+void
+dsl_crypto_params_free(dsl_crypto_params_t *dcp, boolean_t unload)
+{
+	if (dcp == NULL)
+		return;
+
+	if (dcp->cp_keylocation != NULL)
+		spa_strfree(dcp->cp_keylocation);
+	if (unload && dcp->cp_wkey != NULL)
+		dsl_wrapping_key_free(dcp->cp_wkey);
+
+	kmem_free(dcp, sizeof (dsl_crypto_params_t));
+}
+
+static int
+spa_crypto_key_compare(const void *a, const void *b)
+{
+	const dsl_crypto_key_t *dcka = a;
+	const dsl_crypto_key_t *dckb = b;
+
+	if (dcka->dck_obj < dckb->dck_obj)
+		return (-1);
+	if (dcka->dck_obj > dckb->dck_obj)
+		return (1);
+	return (0);
+}
+
+static int
+spa_key_mapping_compare(const void *a, const void *b)
+{
+	const dsl_key_mapping_t *kma = a;
+	const dsl_key_mapping_t *kmb = b;
+
+	if (kma->km_dsobj < kmb->km_dsobj)
+		return (-1);
+	if (kma->km_dsobj > kmb->km_dsobj)
+		return (1);
+	return (0);
+}
+
+static int
+spa_wkey_compare(const void *a, const void *b)
+{
+	const dsl_wrapping_key_t *wka = a;
+	const dsl_wrapping_key_t *wkb = b;
+
+	if (wka->wk_ddobj < wkb->wk_ddobj)
+		return (-1);
+	if (wka->wk_ddobj > wkb->wk_ddobj)
+		return (1);
+	return (0);
+}
+
+void
+spa_keystore_init(spa_keystore_t *sk)
+{
+	rw_init(&sk->sk_dk_lock, NULL, RW_DEFAULT, NULL);
+	rw_init(&sk->sk_km_lock, NULL, RW_DEFAULT, NULL);
+	rw_init(&sk->sk_wkeys_lock, NULL, RW_DEFAULT, NULL);
+	avl_create(&sk->sk_dsl_keys, spa_crypto_key_compare,
+	    sizeof (dsl_crypto_key_t),
+	    offsetof(dsl_crypto_key_t, dck_avl_link));
+	avl_create(&sk->sk_key_mappings, spa_key_mapping_compare,
+	    sizeof (dsl_key_mapping_t),
+	    offsetof(dsl_key_mapping_t, km_avl_link));
+	avl_create(&sk->sk_wkeys, spa_wkey_compare, sizeof (dsl_wrapping_key_t),
+	    offsetof(dsl_wrapping_key_t, wk_avl_link));
+}
+
+void
+spa_keystore_fini(spa_keystore_t *sk)
+{
+	dsl_wrapping_key_t *wkey;
+	void *cookie = NULL;
+
+	ASSERT(avl_is_empty(&sk->sk_dsl_keys));
+	ASSERT(avl_is_empty(&sk->sk_key_mappings));
+
+	while ((wkey = avl_destroy_nodes(&sk->sk_wkeys, &cookie)) != NULL)
+		dsl_wrapping_key_free(wkey);
+
+	avl_destroy(&sk->sk_wkeys);
+	avl_destroy(&sk->sk_key_mappings);
+	avl_destroy(&sk->sk_dsl_keys);
+	rw_destroy(&sk->sk_wkeys_lock);
+	rw_destroy(&sk->sk_km_lock);
+	rw_destroy(&sk->sk_dk_lock);
+}
+
+static int
+dsl_dir_get_encryption_root_ddobj(dsl_dir_t *dd, uint64_t *rddobj)
+{
+	if (dd->dd_crypto_obj == 0)
+		return (SET_ERROR(ENOENT));
+
+	return (zap_lookup(dd->dd_pool->dp_meta_objset, dd->dd_crypto_obj,
+	    DSL_CRYPTO_KEY_ROOT_DDOBJ, 8, 1, rddobj));
+}
+
+int
+dsl_dir_get_encryption_version(dsl_dir_t *dd, uint64_t *version)
+{
+	*version = 0;
+
+	if (dd->dd_crypto_obj == 0)
+		return (SET_ERROR(ENOENT));
+
+	/* version 0 is implied by ENOENT */
+	(void) zap_lookup(dd->dd_pool->dp_meta_objset, dd->dd_crypto_obj,
+	    DSL_CRYPTO_KEY_VERSION, 8, 1, version);
+
+	return (0);
+}
+
+boolean_t
+dsl_dir_incompatible_encryption_version(dsl_dir_t *dd)
+{
+	int ret;
+	uint64_t version = 0;
+
+	ret = dsl_dir_get_encryption_version(dd, &version);
+	if (ret != 0)
+		return (B_FALSE);
+
+	return (version != ZIO_CRYPT_KEY_CURRENT_VERSION);
+}
+
+static int
+spa_keystore_wkey_hold_ddobj_impl(spa_t *spa, uint64_t ddobj,
+    void *tag, dsl_wrapping_key_t **wkey_out)
+{
+	int ret;
+	dsl_wrapping_key_t search_wkey;
+	dsl_wrapping_key_t *found_wkey;
+
+	ASSERT(RW_LOCK_HELD(&spa->spa_keystore.sk_wkeys_lock));
+
+	/* init the search wrapping key */
+	search_wkey.wk_ddobj = ddobj;
+
+	/* lookup the wrapping key */
+	found_wkey = avl_find(&spa->spa_keystore.sk_wkeys, &search_wkey, NULL);
+	if (!found_wkey) {
+		ret = SET_ERROR(ENOENT);
+		goto error;
+	}
+
+	/* increment the refcount */
+	dsl_wrapping_key_hold(found_wkey, tag);
+
+	*wkey_out = found_wkey;
+	return (0);
+
+error:
+	*wkey_out = NULL;
+	return (ret);
+}
+
+static int
+spa_keystore_wkey_hold_dd(spa_t *spa, dsl_dir_t *dd, void *tag,
+    dsl_wrapping_key_t **wkey_out)
+{
+	int ret;
+	dsl_wrapping_key_t *wkey;
+	uint64_t rddobj;
+	boolean_t locked = B_FALSE;
+
+	if (!RW_WRITE_HELD(&spa->spa_keystore.sk_wkeys_lock)) {
+		rw_enter(&spa->spa_keystore.sk_wkeys_lock, RW_READER);
+		locked = B_TRUE;
+	}
+
+	/* get the ddobj that the keylocation property was inherited from */
+	ret = dsl_dir_get_encryption_root_ddobj(dd, &rddobj);
+	if (ret != 0)
+		goto error;
+
+	/* lookup the wkey in the avl tree */
+	ret = spa_keystore_wkey_hold_ddobj_impl(spa, rddobj, tag, &wkey);
+	if (ret != 0)
+		goto error;
+
+	/* unlock the wkey tree if we locked it */
+	if (locked)
+		rw_exit(&spa->spa_keystore.sk_wkeys_lock);
+
+	*wkey_out = wkey;
+	return (0);
+
+error:
+	if (locked)
+		rw_exit(&spa->spa_keystore.sk_wkeys_lock);
+
+	*wkey_out = NULL;
+	return (ret);
+}
+
+int
+dsl_crypto_can_set_keylocation(const char *dsname, const char *keylocation)
+{
+	int ret = 0;
+	dsl_dir_t *dd = NULL;
+	dsl_pool_t *dp = NULL;
+	uint64_t rddobj;
+
+	/* hold the dsl dir */
+	ret = dsl_pool_hold(dsname, FTAG, &dp);
+	if (ret != 0)
+		goto out;
+
+	ret = dsl_dir_hold(dp, dsname, FTAG, &dd, NULL);
+	if (ret != 0)
+		goto out;
+
+	/* if dd is not encrypted, the value may only be "none" */
+	if (dd->dd_crypto_obj == 0) {
+		if (strcmp(keylocation, "none") != 0) {
+			ret = SET_ERROR(EACCES);
+			goto out;
+		}
+
+		ret = 0;
+		goto out;
+	}
+
+	/* check for a valid keylocation for encrypted datasets */
+	if (!zfs_prop_valid_keylocation(keylocation, B_TRUE)) {
+		ret = SET_ERROR(EINVAL);
+		goto out;
+	}
+
+	/* check that this is an encryption root */
+	ret = dsl_dir_get_encryption_root_ddobj(dd, &rddobj);
+	if (ret != 0)
+		goto out;
+
+	if (rddobj != dd->dd_object) {
+		ret = SET_ERROR(EACCES);
+		goto out;
+	}
+
+	dsl_dir_rele(dd, FTAG);
+	dsl_pool_rele(dp, FTAG);
+
+	return (0);
+
+out:
+	if (dd != NULL)
+		dsl_dir_rele(dd, FTAG);
+	if (dp != NULL)
+		dsl_pool_rele(dp, FTAG);
+
+	return (ret);
+}
+
+static void
+dsl_crypto_key_free(dsl_crypto_key_t *dck)
+{
+	ASSERT(zfs_refcount_count(&dck->dck_holds) == 0);
+
+	/* destroy the zio_crypt_key_t */
+	zio_crypt_key_destroy(&dck->dck_key);
+
+	/* free the refcount, wrapping key, and lock */
+	zfs_refcount_destroy(&dck->dck_holds);
+	if (dck->dck_wkey)
+		dsl_wrapping_key_rele(dck->dck_wkey, dck);
+
+	/* free the key */
+	kmem_free(dck, sizeof (dsl_crypto_key_t));
+}
+
+static void
+dsl_crypto_key_rele(dsl_crypto_key_t *dck, void *tag)
+{
+	if (zfs_refcount_remove(&dck->dck_holds, tag) == 0)
+		dsl_crypto_key_free(dck);
+}
+
+static int
+dsl_crypto_key_open(objset_t *mos, dsl_wrapping_key_t *wkey,
+    uint64_t dckobj, void *tag, dsl_crypto_key_t **dck_out)
+{
+	int ret;
+	uint64_t crypt = 0, guid = 0, version = 0;
+	uint8_t raw_keydata[MASTER_KEY_MAX_LEN];
+	uint8_t raw_hmac_keydata[SHA512_HMAC_KEYLEN];
+	uint8_t iv[WRAPPING_IV_LEN];
+	uint8_t mac[WRAPPING_MAC_LEN];
+	dsl_crypto_key_t *dck;
+
+	/* allocate and initialize the key */
+	dck = kmem_zalloc(sizeof (dsl_crypto_key_t), KM_SLEEP);
+	if (!dck)
+		return (SET_ERROR(ENOMEM));
+
+	/* fetch all of the values we need from the ZAP */
+	ret = zap_lookup(mos, dckobj, DSL_CRYPTO_KEY_CRYPTO_SUITE, 8, 1,
+	    &crypt);
+	if (ret != 0)
+		goto error;
+
+	ret = zap_lookup(mos, dckobj, DSL_CRYPTO_KEY_GUID, 8, 1, &guid);
+	if (ret != 0)
+		goto error;
+
+	ret = zap_lookup(mos, dckobj, DSL_CRYPTO_KEY_MASTER_KEY, 1,
+	    MASTER_KEY_MAX_LEN, raw_keydata);
+	if (ret != 0)
+		goto error;
+
+	ret = zap_lookup(mos, dckobj, DSL_CRYPTO_KEY_HMAC_KEY, 1,
+	    SHA512_HMAC_KEYLEN, raw_hmac_keydata);
+	if (ret != 0)
+		goto error;
+
+	ret = zap_lookup(mos, dckobj, DSL_CRYPTO_KEY_IV, 1, WRAPPING_IV_LEN,
+	    iv);
+	if (ret != 0)
+		goto error;
+
+	ret = zap_lookup(mos, dckobj, DSL_CRYPTO_KEY_MAC, 1, WRAPPING_MAC_LEN,
+	    mac);
+	if (ret != 0)
+		goto error;
+
+	/* the initial on-disk format for encryption did not have a version */
+	(void) zap_lookup(mos, dckobj, DSL_CRYPTO_KEY_VERSION, 8, 1, &version);
+
+	/*
+	 * Unwrap the keys. If there is an error return EACCES to indicate
+	 * an authentication failure.
+	 */
+	ret = zio_crypt_key_unwrap(&wkey->wk_key, crypt, version, guid,
+	    raw_keydata, raw_hmac_keydata, iv, mac, &dck->dck_key);
+	if (ret != 0) {
+		ret = SET_ERROR(EACCES);
+		goto error;
+	}
+
+	/* finish initializing the dsl_crypto_key_t */
+	zfs_refcount_create(&dck->dck_holds);
+	dsl_wrapping_key_hold(wkey, dck);
+	dck->dck_wkey = wkey;
+	dck->dck_obj = dckobj;
+	(void) zfs_refcount_add(&dck->dck_holds, tag);
+
+	*dck_out = dck;
+	return (0);
+
+error:
+	if (dck != NULL) {
+		bzero(dck, sizeof (dsl_crypto_key_t));
+		kmem_free(dck, sizeof (dsl_crypto_key_t));
+	}
+
+	*dck_out = NULL;
+	return (ret);
+}
+
+static int
+spa_keystore_dsl_key_hold_impl(spa_t *spa, uint64_t dckobj, void *tag,
+    dsl_crypto_key_t **dck_out)
+{
+	int ret;
+	dsl_crypto_key_t search_dck;
+	dsl_crypto_key_t *found_dck;
+
+	ASSERT(RW_LOCK_HELD(&spa->spa_keystore.sk_dk_lock));
+
+	/* init the search key */
+	search_dck.dck_obj = dckobj;
+
+	/* find the matching key in the keystore */
+	found_dck = avl_find(&spa->spa_keystore.sk_dsl_keys, &search_dck, NULL);
+	if (!found_dck) {
+		ret = SET_ERROR(ENOENT);
+		goto error;
+	}
+
+	/* increment the refcount */
+	(void) zfs_refcount_add(&found_dck->dck_holds, tag);
+
+	*dck_out = found_dck;
+	return (0);
+
+error:
+	*dck_out = NULL;
+	return (ret);
+}
+
+static int
+spa_keystore_dsl_key_hold_dd(spa_t *spa, dsl_dir_t *dd, void *tag,
+    dsl_crypto_key_t **dck_out)
+{
+	int ret;
+	avl_index_t where;
+	dsl_crypto_key_t *dck_io = NULL, *dck_ks = NULL;
+	dsl_wrapping_key_t *wkey = NULL;
+	uint64_t dckobj = dd->dd_crypto_obj;
+
+	/* Lookup the key in the tree of currently loaded keys */
+	rw_enter(&spa->spa_keystore.sk_dk_lock, RW_READER);
+	ret = spa_keystore_dsl_key_hold_impl(spa, dckobj, tag, &dck_ks);
+	rw_exit(&spa->spa_keystore.sk_dk_lock);
+	if (ret == 0) {
+		*dck_out = dck_ks;
+		return (0);
+	}
+
+	/* Lookup the wrapping key from the keystore */
+	ret = spa_keystore_wkey_hold_dd(spa, dd, FTAG, &wkey);
+	if (ret != 0) {
+		*dck_out = NULL;
+		return (SET_ERROR(EACCES));
+	}
+
+	/* Read the key from disk */
+	ret = dsl_crypto_key_open(spa->spa_meta_objset, wkey, dckobj,
+	    tag, &dck_io);
+	if (ret != 0) {
+		dsl_wrapping_key_rele(wkey, FTAG);
+		*dck_out = NULL;
+		return (ret);
+	}
+
+	/*
+	 * Add the key to the keystore.  It may already exist if it was
+	 * added while performing the read from disk.  In this case discard
+	 * it and return the key from the keystore.
+	 */
+	rw_enter(&spa->spa_keystore.sk_dk_lock, RW_WRITER);
+	ret = spa_keystore_dsl_key_hold_impl(spa, dckobj, tag, &dck_ks);
+	if (ret != 0) {
+		(void) avl_find(&spa->spa_keystore.sk_dsl_keys, dck_io, &where);
+		avl_insert(&spa->spa_keystore.sk_dsl_keys, dck_io, where);
+		*dck_out = dck_io;
+	} else {
+		dsl_crypto_key_free(dck_io);
+		*dck_out = dck_ks;
+	}
+
+	/* Release the wrapping key (the dsl key now has a reference to it) */
+	dsl_wrapping_key_rele(wkey, FTAG);
+	rw_exit(&spa->spa_keystore.sk_dk_lock);
+
+	return (0);
+}
+
+void
+spa_keystore_dsl_key_rele(spa_t *spa, dsl_crypto_key_t *dck, void *tag)
+{
+	rw_enter(&spa->spa_keystore.sk_dk_lock, RW_WRITER);
+
+	if (zfs_refcount_remove(&dck->dck_holds, tag) == 0) {
+		avl_remove(&spa->spa_keystore.sk_dsl_keys, dck);
+		dsl_crypto_key_free(dck);
+	}
+
+	rw_exit(&spa->spa_keystore.sk_dk_lock);
+}
+
+int
+spa_keystore_load_wkey_impl(spa_t *spa, dsl_wrapping_key_t *wkey)
+{
+	int ret;
+	avl_index_t where;
+	dsl_wrapping_key_t *found_wkey;
+
+	rw_enter(&spa->spa_keystore.sk_wkeys_lock, RW_WRITER);
+
+	/* insert the wrapping key into the keystore */
+	found_wkey = avl_find(&spa->spa_keystore.sk_wkeys, wkey, &where);
+	if (found_wkey != NULL) {
+		ret = SET_ERROR(EEXIST);
+		goto error_unlock;
+	}
+	avl_insert(&spa->spa_keystore.sk_wkeys, wkey, where);
+
+	rw_exit(&spa->spa_keystore.sk_wkeys_lock);
+
+	return (0);
+
+error_unlock:
+	rw_exit(&spa->spa_keystore.sk_wkeys_lock);
+	return (ret);
+}
+
+int
+spa_keystore_load_wkey(const char *dsname, dsl_crypto_params_t *dcp,
+    boolean_t noop)
+{
+	int ret;
+	dsl_dir_t *dd = NULL;
+	dsl_crypto_key_t *dck = NULL;
+	dsl_wrapping_key_t *wkey = dcp->cp_wkey;
+	dsl_pool_t *dp = NULL;
+	uint64_t keyformat, salt, iters;
+
+	/*
+	 * We don't validate the wrapping key's keyformat, salt, or iters
+	 * since they will never be needed after the DCK has been wrapped.
+	 */
+	if (dcp->cp_wkey == NULL ||
+	    dcp->cp_cmd != DCP_CMD_NONE ||
+	    dcp->cp_crypt != ZIO_CRYPT_INHERIT ||
+	    dcp->cp_keylocation != NULL)
+		return (SET_ERROR(EINVAL));
+
+	ret = dsl_pool_hold(dsname, FTAG, &dp);
+	if (ret != 0)
+		goto error;
+
+	if (!spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_ENCRYPTION)) {
+		ret = (SET_ERROR(ENOTSUP));
+		goto error;
+	}
+
+	/* hold the dsl dir */
+	ret = dsl_dir_hold(dp, dsname, FTAG, &dd, NULL);
+	if (ret != 0)
+		goto error;
+
+	/* initialize the wkey's ddobj */
+	wkey->wk_ddobj = dd->dd_object;
+
+	/* verify that the wkey is correct by opening its dsl key */
+	ret = dsl_crypto_key_open(dp->dp_meta_objset, wkey,
+	    dd->dd_crypto_obj, FTAG, &dck);
+	if (ret != 0)
+		goto error;
+
+	/* initialize the wkey encryption parameters from the DSL Crypto Key */
+	ret = zap_lookup(dp->dp_meta_objset, dd->dd_crypto_obj,
+	    zfs_prop_to_name(ZFS_PROP_KEYFORMAT), 8, 1, &keyformat);
+	if (ret != 0)
+		goto error;
+
+	ret = zap_lookup(dp->dp_meta_objset, dd->dd_crypto_obj,
+	    zfs_prop_to_name(ZFS_PROP_PBKDF2_SALT), 8, 1, &salt);
+	if (ret != 0)
+		goto error;
+
+	ret = zap_lookup(dp->dp_meta_objset, dd->dd_crypto_obj,
+	    zfs_prop_to_name(ZFS_PROP_PBKDF2_ITERS), 8, 1, &iters);
+	if (ret != 0)
+		goto error;
+
+	ASSERT3U(keyformat, <, ZFS_KEYFORMAT_FORMATS);
+	ASSERT3U(keyformat, !=, ZFS_KEYFORMAT_NONE);
+	IMPLY(keyformat == ZFS_KEYFORMAT_PASSPHRASE, iters != 0);
+	IMPLY(keyformat == ZFS_KEYFORMAT_PASSPHRASE, salt != 0);
+	IMPLY(keyformat != ZFS_KEYFORMAT_PASSPHRASE, iters == 0);
+	IMPLY(keyformat != ZFS_KEYFORMAT_PASSPHRASE, salt == 0);
+
+	wkey->wk_keyformat = keyformat;
+	wkey->wk_salt = salt;
+	wkey->wk_iters = iters;
+
+	/*
+	 * At this point we have verified the wkey and confirmed that it can
+	 * be used to decrypt a DSL Crypto Key. We can simply cleanup and
+	 * return if this is all the user wanted to do.
+	 */
+	if (noop)
+		goto error;
+
+	/* insert the wrapping key into the keystore */
+	ret = spa_keystore_load_wkey_impl(dp->dp_spa, wkey);
+	if (ret != 0)
+		goto error;
+
+	dsl_crypto_key_rele(dck, FTAG);
+	dsl_dir_rele(dd, FTAG);
+	dsl_pool_rele(dp, FTAG);
+
+	return (0);
+
+error:
+	if (dck != NULL)
+		dsl_crypto_key_rele(dck, FTAG);
+	if (dd != NULL)
+		dsl_dir_rele(dd, FTAG);
+	if (dp != NULL)
+		dsl_pool_rele(dp, FTAG);
+
+	return (ret);
+}
+
+int
+spa_keystore_unload_wkey_impl(spa_t *spa, uint64_t ddobj)
+{
+	int ret;
+	dsl_wrapping_key_t search_wkey;
+	dsl_wrapping_key_t *found_wkey;
+
+	/* init the search wrapping key */
+	search_wkey.wk_ddobj = ddobj;
+
+	rw_enter(&spa->spa_keystore.sk_wkeys_lock, RW_WRITER);
+
+	/* remove the wrapping key from the keystore */
+	found_wkey = avl_find(&spa->spa_keystore.sk_wkeys,
+	    &search_wkey, NULL);
+	if (!found_wkey) {
+		ret = SET_ERROR(EACCES);
+		goto error_unlock;
+	} else if (zfs_refcount_count(&found_wkey->wk_refcnt) != 0) {
+		ret = SET_ERROR(EBUSY);
+		goto error_unlock;
+	}
+	avl_remove(&spa->spa_keystore.sk_wkeys, found_wkey);
+
+	rw_exit(&spa->spa_keystore.sk_wkeys_lock);
+
+	/* free the wrapping key */
+	dsl_wrapping_key_free(found_wkey);
+
+	return (0);
+
+error_unlock:
+	rw_exit(&spa->spa_keystore.sk_wkeys_lock);
+	return (ret);
+}
+
+int
+spa_keystore_unload_wkey(const char *dsname)
+{
+	int ret = 0;
+	dsl_dir_t *dd = NULL;
+	dsl_pool_t *dp = NULL;
+	spa_t *spa = NULL;
+
+	ret = spa_open(dsname, &spa, FTAG);
+	if (ret != 0)
+		return (ret);
+
+	/*
+	 * Wait for any outstanding txg IO to complete, releasing any
+	 * remaining references on the wkey.
+	 */
+	if (spa_mode(spa) != FREAD)
+		txg_wait_synced(spa->spa_dsl_pool, 0);
+
+	spa_close(spa, FTAG);
+
+	/* hold the dsl dir */
+	ret = dsl_pool_hold(dsname, FTAG, &dp);
+	if (ret != 0)
+		goto error;
+
+	if (!spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_ENCRYPTION)) {
+		ret = (SET_ERROR(ENOTSUP));
+		goto error;
+	}
+
+	ret = dsl_dir_hold(dp, dsname, FTAG, &dd, NULL);
+	if (ret != 0)
+		goto error;
+
+	/* unload the wkey */
+	ret = spa_keystore_unload_wkey_impl(dp->dp_spa, dd->dd_object);
+	if (ret != 0)
+		goto error;
+
+	dsl_dir_rele(dd, FTAG);
+	dsl_pool_rele(dp, FTAG);
+
+	return (0);
+
+error:
+	if (dd != NULL)
+		dsl_dir_rele(dd, FTAG);
+	if (dp != NULL)
+		dsl_pool_rele(dp, FTAG);
+
+	return (ret);
+}
+
+void
+key_mapping_add_ref(dsl_key_mapping_t *km, void *tag)
+{
+	ASSERT3U(zfs_refcount_count(&km->km_refcnt), >=, 1);
+	(void) zfs_refcount_add(&km->km_refcnt, tag);
+}
+
+/*
+ * The locking here is a little tricky to ensure we don't cause unnecessary
+ * performance problems. We want to release a key mapping whenever someone
+ * decrements the refcount to 0, but freeing the mapping requires removing
+ * it from the spa_keystore, which requires holding sk_km_lock as a writer.
+ * Most of the time we don't want to hold this lock as a writer, since the
+ * same lock is held as a reader for each IO that needs to encrypt / decrypt
+ * data for any dataset and in practice we will only actually free the
+ * mapping after unmounting a dataset.
+ */
+void
+key_mapping_rele(spa_t *spa, dsl_key_mapping_t *km, void *tag)
+{
+	ASSERT3U(zfs_refcount_count(&km->km_refcnt), >=, 1);
+
+	if (zfs_refcount_remove(&km->km_refcnt, tag) != 0)
+		return;
+
+	/*
+	 * We think we are going to need to free the mapping. Add a
+	 * reference to prevent most other releasers from thinking
+	 * this might be their responsibility. This is inherently
+	 * racy, so we will confirm that we are legitimately the
+	 * last holder once we have the sk_km_lock as a writer.
+	 */
+	(void) zfs_refcount_add(&km->km_refcnt, FTAG);
+
+	rw_enter(&spa->spa_keystore.sk_km_lock, RW_WRITER);
+	if (zfs_refcount_remove(&km->km_refcnt, FTAG) != 0) {
+		rw_exit(&spa->spa_keystore.sk_km_lock);
+		return;
+	}
+
+	avl_remove(&spa->spa_keystore.sk_key_mappings, km);
+	rw_exit(&spa->spa_keystore.sk_km_lock);
+
+	spa_keystore_dsl_key_rele(spa, km->km_key, km);
+	kmem_free(km, sizeof (dsl_key_mapping_t));
+}
+
+int
+spa_keystore_create_mapping(spa_t *spa, dsl_dataset_t *ds, void *tag,
+    dsl_key_mapping_t **km_out)
+{
+	int ret;
+	avl_index_t where;
+	dsl_key_mapping_t *km, *found_km;
+	boolean_t should_free = B_FALSE;
+
+	/* Allocate and initialize the mapping */
+	km = kmem_zalloc(sizeof (dsl_key_mapping_t), KM_SLEEP);
+	zfs_refcount_create(&km->km_refcnt);
+
+	ret = spa_keystore_dsl_key_hold_dd(spa, ds->ds_dir, km, &km->km_key);
+	if (ret != 0) {
+		zfs_refcount_destroy(&km->km_refcnt);
+		kmem_free(km, sizeof (dsl_key_mapping_t));
+
+		if (km_out != NULL)
+			*km_out = NULL;
+		return (ret);
+	}
+
+	km->km_dsobj = ds->ds_object;
+
+	rw_enter(&spa->spa_keystore.sk_km_lock, RW_WRITER);
+
+	/*
+	 * If a mapping already exists, simply increment its refcount and
+	 * cleanup the one we made. We want to allocate / free outside of
+	 * the lock because this lock is also used by the zio layer to lookup
+	 * key mappings. Otherwise, use the one we created. Normally, there will
+	 * only be one active reference at a time (the objset owner), but there
+	 * are times when there could be multiple async users.
+	 */
+	found_km = avl_find(&spa->spa_keystore.sk_key_mappings, km, &where);
+	if (found_km != NULL) {
+		should_free = B_TRUE;
+		(void) zfs_refcount_add(&found_km->km_refcnt, tag);
+		if (km_out != NULL)
+			*km_out = found_km;
+	} else {
+		(void) zfs_refcount_add(&km->km_refcnt, tag);
+		avl_insert(&spa->spa_keystore.sk_key_mappings, km, where);
+		if (km_out != NULL)
+			*km_out = km;
+	}
+
+	rw_exit(&spa->spa_keystore.sk_km_lock);
+
+	if (should_free) {
+		spa_keystore_dsl_key_rele(spa, km->km_key, km);
+		zfs_refcount_destroy(&km->km_refcnt);
+		kmem_free(km, sizeof (dsl_key_mapping_t));
+	}
+
+	return (0);
+}
+
+int
+spa_keystore_remove_mapping(spa_t *spa, uint64_t dsobj, void *tag)
+{
+	int ret;
+	dsl_key_mapping_t search_km;
+	dsl_key_mapping_t *found_km;
+
+	/* init the search key mapping */
+	search_km.km_dsobj = dsobj;
+
+	rw_enter(&spa->spa_keystore.sk_km_lock, RW_READER);
+
+	/* find the matching mapping */
+	found_km = avl_find(&spa->spa_keystore.sk_key_mappings,
+	    &search_km, NULL);
+	if (found_km == NULL) {
+		ret = SET_ERROR(ENOENT);
+		goto error_unlock;
+	}
+
+	rw_exit(&spa->spa_keystore.sk_km_lock);
+
+	key_mapping_rele(spa, found_km, tag);
+
+	return (0);
+
+error_unlock:
+	rw_exit(&spa->spa_keystore.sk_km_lock);
+	return (ret);
+}
+
+/*
+ * This function is primarily used by the zio and arc layer to lookup
+ * DSL Crypto Keys for encryption. Callers must release the key with
+ * spa_keystore_dsl_key_rele(). The function may also be called with
+ * dck_out == NULL and tag == NULL to simply check that a key exists
+ * without getting a reference to it.
+ */
+int
+spa_keystore_lookup_key(spa_t *spa, uint64_t dsobj, void *tag,
+    dsl_crypto_key_t **dck_out)
+{
+	int ret;
+	dsl_key_mapping_t search_km;
+	dsl_key_mapping_t *found_km;
+
+	ASSERT((tag != NULL && dck_out != NULL) ||
+	    (tag == NULL && dck_out == NULL));
+
+	/* init the search key mapping */
+	search_km.km_dsobj = dsobj;
+
+	rw_enter(&spa->spa_keystore.sk_km_lock, RW_READER);
+
+	/* remove the mapping from the tree */
+	found_km = avl_find(&spa->spa_keystore.sk_key_mappings, &search_km,
+	    NULL);
+	if (found_km == NULL) {
+		ret = SET_ERROR(ENOENT);
+		goto error_unlock;
+	}
+
+	if (found_km && tag)
+		(void) zfs_refcount_add(&found_km->km_key->dck_holds, tag);
+
+	rw_exit(&spa->spa_keystore.sk_km_lock);
+
+	if (dck_out != NULL)
+		*dck_out = found_km->km_key;
+	return (0);
+
+error_unlock:
+	rw_exit(&spa->spa_keystore.sk_km_lock);
+
+	if (dck_out != NULL)
+		*dck_out = NULL;
+	return (ret);
+}
+
+static int
+dmu_objset_check_wkey_loaded(dsl_dir_t *dd)
+{
+	int ret;
+	dsl_wrapping_key_t *wkey = NULL;
+
+	ret = spa_keystore_wkey_hold_dd(dd->dd_pool->dp_spa, dd, FTAG,
+	    &wkey);
+	if (ret != 0)
+		return (SET_ERROR(EACCES));
+
+	dsl_wrapping_key_rele(wkey, FTAG);
+
+	return (0);
+}
+
+static zfs_keystatus_t
+dsl_dataset_get_keystatus(dsl_dir_t *dd)
+{
+	/* check if this dd has a has a dsl key */
+	if (dd->dd_crypto_obj == 0)
+		return (ZFS_KEYSTATUS_NONE);
+
+	return (dmu_objset_check_wkey_loaded(dd) == 0 ?
+	    ZFS_KEYSTATUS_AVAILABLE : ZFS_KEYSTATUS_UNAVAILABLE);
+}
+
+static int
+dsl_dir_get_crypt(dsl_dir_t *dd, uint64_t *crypt)
+{
+	if (dd->dd_crypto_obj == 0) {
+		*crypt = ZIO_CRYPT_OFF;
+		return (0);
+	}
+
+	return (zap_lookup(dd->dd_pool->dp_meta_objset, dd->dd_crypto_obj,
+	    DSL_CRYPTO_KEY_CRYPTO_SUITE, 8, 1, crypt));
+}
+
+static void
+dsl_crypto_key_sync_impl(objset_t *mos, uint64_t dckobj, uint64_t crypt,
+    uint64_t root_ddobj, uint64_t guid, uint8_t *iv, uint8_t *mac,
+    uint8_t *keydata, uint8_t *hmac_keydata, uint64_t keyformat,
+    uint64_t salt, uint64_t iters, dmu_tx_t *tx)
+{
+	VERIFY0(zap_update(mos, dckobj, DSL_CRYPTO_KEY_CRYPTO_SUITE, 8, 1,
+	    &crypt, tx));
+	VERIFY0(zap_update(mos, dckobj, DSL_CRYPTO_KEY_ROOT_DDOBJ, 8, 1,
+	    &root_ddobj, tx));
+	VERIFY0(zap_update(mos, dckobj, DSL_CRYPTO_KEY_GUID, 8, 1,
+	    &guid, tx));
+	VERIFY0(zap_update(mos, dckobj, DSL_CRYPTO_KEY_IV, 1, WRAPPING_IV_LEN,
+	    iv, tx));
+	VERIFY0(zap_update(mos, dckobj, DSL_CRYPTO_KEY_MAC, 1, WRAPPING_MAC_LEN,
+	    mac, tx));
+	VERIFY0(zap_update(mos, dckobj, DSL_CRYPTO_KEY_MASTER_KEY, 1,
+	    MASTER_KEY_MAX_LEN, keydata, tx));
+	VERIFY0(zap_update(mos, dckobj, DSL_CRYPTO_KEY_HMAC_KEY, 1,
+	    SHA512_HMAC_KEYLEN, hmac_keydata, tx));
+	VERIFY0(zap_update(mos, dckobj, zfs_prop_to_name(ZFS_PROP_KEYFORMAT),
+	    8, 1, &keyformat, tx));
+	VERIFY0(zap_update(mos, dckobj, zfs_prop_to_name(ZFS_PROP_PBKDF2_SALT),
+	    8, 1, &salt, tx));
+	VERIFY0(zap_update(mos, dckobj, zfs_prop_to_name(ZFS_PROP_PBKDF2_ITERS),
+	    8, 1, &iters, tx));
+}
+
+static void
+dsl_crypto_key_sync(dsl_crypto_key_t *dck, dmu_tx_t *tx)
+{
+	zio_crypt_key_t *key = &dck->dck_key;
+	dsl_wrapping_key_t *wkey = dck->dck_wkey;
+	uint8_t keydata[MASTER_KEY_MAX_LEN];
+	uint8_t hmac_keydata[SHA512_HMAC_KEYLEN];
+	uint8_t iv[WRAPPING_IV_LEN];
+	uint8_t mac[WRAPPING_MAC_LEN];
+
+	ASSERT(dmu_tx_is_syncing(tx));
+	ASSERT3U(key->zk_crypt, <, ZIO_CRYPT_FUNCTIONS);
+
+	/* encrypt and store the keys along with the IV and MAC */
+	VERIFY0(zio_crypt_key_wrap(&dck->dck_wkey->wk_key, key, iv, mac,
+	    keydata, hmac_keydata));
+
+	/* update the ZAP with the obtained values */
+	dsl_crypto_key_sync_impl(tx->tx_pool->dp_meta_objset, dck->dck_obj,
+	    key->zk_crypt, wkey->wk_ddobj, key->zk_guid, iv, mac, keydata,
+	    hmac_keydata, wkey->wk_keyformat, wkey->wk_salt, wkey->wk_iters,
+	    tx);
+}
+
+typedef struct spa_keystore_change_key_args {
+	const char *skcka_dsname;
+	dsl_crypto_params_t *skcka_cp;
+} spa_keystore_change_key_args_t;
+
+static int
+spa_keystore_change_key_check(void *arg, dmu_tx_t *tx)
+{
+	int ret;
+	dsl_dir_t *dd = NULL;
+	dsl_pool_t *dp = dmu_tx_pool(tx);
+	spa_keystore_change_key_args_t *skcka = arg;
+	dsl_crypto_params_t *dcp = skcka->skcka_cp;
+	uint64_t rddobj;
+
+	/* check for the encryption feature */
+	if (!spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_ENCRYPTION)) {
+		ret = SET_ERROR(ENOTSUP);
+		goto error;
+	}
+
+	/* check for valid key change command */
+	if (dcp->cp_cmd != DCP_CMD_NEW_KEY &&
+	    dcp->cp_cmd != DCP_CMD_INHERIT &&
+	    dcp->cp_cmd != DCP_CMD_FORCE_NEW_KEY &&
+	    dcp->cp_cmd != DCP_CMD_FORCE_INHERIT) {
+		ret = SET_ERROR(EINVAL);
+		goto error;
+	}
+
+	/* hold the dd */
+	ret = dsl_dir_hold(dp, skcka->skcka_dsname, FTAG, &dd, NULL);
+	if (ret != 0)
+		goto error;
+
+	/* verify that the dataset is encrypted */
+	if (dd->dd_crypto_obj == 0) {
+		ret = SET_ERROR(EINVAL);
+		goto error;
+	}
+
+	/* clones must always use their origin's key */
+	if (dsl_dir_is_clone(dd)) {
+		ret = SET_ERROR(EINVAL);
+		goto error;
+	}
+
+	/* lookup the ddobj we are inheriting the keylocation from */
+	ret = dsl_dir_get_encryption_root_ddobj(dd, &rddobj);
+	if (ret != 0)
+		goto error;
+
+	/* Handle inheritance */
+	if (dcp->cp_cmd == DCP_CMD_INHERIT ||
+	    dcp->cp_cmd == DCP_CMD_FORCE_INHERIT) {
+		/* no other encryption params should be given */
+		if (dcp->cp_crypt != ZIO_CRYPT_INHERIT ||
+		    dcp->cp_keylocation != NULL ||
+		    dcp->cp_wkey != NULL) {
+			ret = SET_ERROR(EINVAL);
+			goto error;
+		}
+
+		/* check that this is an encryption root */
+		if (dd->dd_object != rddobj) {
+			ret = SET_ERROR(EINVAL);
+			goto error;
+		}
+
+		/* check that the parent is encrypted */
+		if (dd->dd_parent->dd_crypto_obj == 0) {
+			ret = SET_ERROR(EINVAL);
+			goto error;
+		}
+
+		/* if we are rewrapping check that both keys are loaded */
+		if (dcp->cp_cmd == DCP_CMD_INHERIT) {
+			ret = dmu_objset_check_wkey_loaded(dd);
+			if (ret != 0)
+				goto error;
+
+			ret = dmu_objset_check_wkey_loaded(dd->dd_parent);
+			if (ret != 0)
+				goto error;
+		}
+
+		dsl_dir_rele(dd, FTAG);
+		return (0);
+	}
+
+	/* handle forcing an encryption root without rewrapping */
+	if (dcp->cp_cmd == DCP_CMD_FORCE_NEW_KEY) {
+		/* no other encryption params should be given */
+		if (dcp->cp_crypt != ZIO_CRYPT_INHERIT ||
+		    dcp->cp_keylocation != NULL ||
+		    dcp->cp_wkey != NULL) {
+			ret = SET_ERROR(EINVAL);
+			goto error;
+		}
+
+		/* check that this is not an encryption root */
+		if (dd->dd_object == rddobj) {
+			ret = SET_ERROR(EINVAL);
+			goto error;
+		}
+
+		dsl_dir_rele(dd, FTAG);
+		return (0);
+	}
+
+	/* crypt cannot be changed after creation */
+	if (dcp->cp_crypt != ZIO_CRYPT_INHERIT) {
+		ret = SET_ERROR(EINVAL);
+		goto error;
+	}
+
+	/* we are not inheritting our parent's wkey so we need one ourselves */
+	if (dcp->cp_wkey == NULL) {
+		ret = SET_ERROR(EINVAL);
+		goto error;
+	}
+
+	/* check for a valid keyformat for the new wrapping key */
+	if (dcp->cp_wkey->wk_keyformat >= ZFS_KEYFORMAT_FORMATS ||
+	    dcp->cp_wkey->wk_keyformat == ZFS_KEYFORMAT_NONE) {
+		ret = SET_ERROR(EINVAL);
+		goto error;
+	}
+
+	/*
+	 * If this dataset is not currently an encryption root we need a new
+	 * keylocation for this dataset's new wrapping key. Otherwise we can
+	 * just keep the one we already had.
+	 */
+	if (dd->dd_object != rddobj && dcp->cp_keylocation == NULL) {
+		ret = SET_ERROR(EINVAL);
+		goto error;
+	}
+
+	/* check that the keylocation is valid if it is not NULL */
+	if (dcp->cp_keylocation != NULL &&
+	    !zfs_prop_valid_keylocation(dcp->cp_keylocation, B_TRUE)) {
+		ret = SET_ERROR(EINVAL);
+		goto error;
+	}
+
+	/* passphrases require pbkdf2 salt and iters */
+	if (dcp->cp_wkey->wk_keyformat == ZFS_KEYFORMAT_PASSPHRASE) {
+		if (dcp->cp_wkey->wk_salt == 0 ||
+		    dcp->cp_wkey->wk_iters < MIN_PBKDF2_ITERATIONS) {
+			ret = SET_ERROR(EINVAL);
+			goto error;
+		}
+	} else {
+		if (dcp->cp_wkey->wk_salt != 0 || dcp->cp_wkey->wk_iters != 0) {
+			ret = SET_ERROR(EINVAL);
+			goto error;
+		}
+	}
+
+	/* make sure the dd's wkey is loaded */
+	ret = dmu_objset_check_wkey_loaded(dd);
+	if (ret != 0)
+		goto error;
+
+	dsl_dir_rele(dd, FTAG);
+
+	return (0);
+
+error:
+	if (dd != NULL)
+		dsl_dir_rele(dd, FTAG);
+
+	return (ret);
+}
+
+
+static void
+spa_keystore_change_key_sync_impl(uint64_t rddobj, uint64_t ddobj,
+    uint64_t new_rddobj, dsl_wrapping_key_t *wkey, dmu_tx_t *tx)
+{
+	zap_cursor_t *zc;
+	zap_attribute_t *za;
+	dsl_pool_t *dp = dmu_tx_pool(tx);
+	dsl_dir_t *dd = NULL;
+	dsl_crypto_key_t *dck = NULL;
+	uint64_t curr_rddobj;
+
+	ASSERT(RW_WRITE_HELD(&dp->dp_spa->spa_keystore.sk_wkeys_lock));
+
+	/* hold the dd */
+	VERIFY0(dsl_dir_hold_obj(dp, ddobj, NULL, FTAG, &dd));
+
+	/* ignore hidden dsl dirs */
+	if (dd->dd_myname[0] == '$' || dd->dd_myname[0] == '%') {
+		dsl_dir_rele(dd, FTAG);
+		return;
+	}
+
+	/*
+	 * Stop recursing if this dsl dir didn't inherit from the root
+	 * or if this dd is a clone.
+	 */
+	VERIFY0(dsl_dir_get_encryption_root_ddobj(dd, &curr_rddobj));
+	if (curr_rddobj != rddobj || dsl_dir_is_clone(dd)) {
+		dsl_dir_rele(dd, FTAG);
+		return;
+	}
+
+	/*
+	 * If we don't have a wrapping key just update the dck to reflect the
+	 * new encryption root. Otherwise rewrap the entire dck and re-sync it
+	 * to disk.
+	 */
+	if (wkey == NULL) {
+		VERIFY0(zap_update(dp->dp_meta_objset, dd->dd_crypto_obj,
+		    DSL_CRYPTO_KEY_ROOT_DDOBJ, 8, 1, &new_rddobj, tx));
+	} else {
+		VERIFY0(spa_keystore_dsl_key_hold_dd(dp->dp_spa, dd,
+		    FTAG, &dck));
+		dsl_wrapping_key_hold(wkey, dck);
+		dsl_wrapping_key_rele(dck->dck_wkey, dck);
+		dck->dck_wkey = wkey;
+		dsl_crypto_key_sync(dck, tx);
+		spa_keystore_dsl_key_rele(dp->dp_spa, dck, FTAG);
+	}
+
+	zc = kmem_alloc(sizeof (zap_cursor_t), KM_SLEEP);
+	za = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP);
+
+	/* Recurse into all child dsl dirs. */
+	for (zap_cursor_init(zc, dp->dp_meta_objset,
+	    dsl_dir_phys(dd)->dd_child_dir_zapobj);
+	    zap_cursor_retrieve(zc, za) == 0;
+	    zap_cursor_advance(zc)) {
+		spa_keystore_change_key_sync_impl(rddobj,
+		    za->za_first_integer, new_rddobj, wkey, tx);
+	}
+	zap_cursor_fini(zc);
+
+	kmem_free(za, sizeof (zap_attribute_t));
+	kmem_free(zc, sizeof (zap_cursor_t));
+
+	dsl_dir_rele(dd, FTAG);
+}
+
+static void
+spa_keystore_change_key_sync(void *arg, dmu_tx_t *tx)
+{
+	dsl_dataset_t *ds;
+	avl_index_t where;
+	dsl_pool_t *dp = dmu_tx_pool(tx);
+	spa_t *spa = dp->dp_spa;
+	spa_keystore_change_key_args_t *skcka = arg;
+	dsl_crypto_params_t *dcp = skcka->skcka_cp;
+	dsl_wrapping_key_t *wkey = NULL, *found_wkey;
+	dsl_wrapping_key_t wkey_search;
+	char *keylocation = dcp->cp_keylocation;
+	uint64_t rddobj, new_rddobj;
+
+	/* create and initialize the wrapping key */
+	VERIFY0(dsl_dataset_hold(dp, skcka->skcka_dsname, FTAG, &ds));
+	ASSERT(!ds->ds_is_snapshot);
+
+	if (dcp->cp_cmd == DCP_CMD_NEW_KEY ||
+	    dcp->cp_cmd == DCP_CMD_FORCE_NEW_KEY) {
+		/*
+		 * We are changing to a new wkey. Set additional properties
+		 * which can be sent along with this ioctl. Note that this
+		 * command can set keylocation even if it can't normally be
+		 * set via 'zfs set' due to a non-local keylocation.
+		 */
+		if (dcp->cp_cmd == DCP_CMD_NEW_KEY) {
+			wkey = dcp->cp_wkey;
+			wkey->wk_ddobj = ds->ds_dir->dd_object;
+		} else {
+			keylocation = "prompt";
+		}
+
+		if (keylocation != NULL) {
+			dsl_prop_set_sync_impl(ds,
+			    zfs_prop_to_name(ZFS_PROP_KEYLOCATION),
+			    ZPROP_SRC_LOCAL, 1, strlen(keylocation) + 1,
+			    keylocation, tx);
+		}
+
+		VERIFY0(dsl_dir_get_encryption_root_ddobj(ds->ds_dir, &rddobj));
+		new_rddobj = ds->ds_dir->dd_object;
+	} else {
+		/*
+		 * We are inheriting the parent's wkey. Unset any local
+		 * keylocation and grab a reference to the wkey.
+		 */
+		if (dcp->cp_cmd == DCP_CMD_INHERIT) {
+			VERIFY0(spa_keystore_wkey_hold_dd(spa,
+			    ds->ds_dir->dd_parent, FTAG, &wkey));
+		}
+
+		dsl_prop_set_sync_impl(ds,
+		    zfs_prop_to_name(ZFS_PROP_KEYLOCATION), ZPROP_SRC_NONE,
+		    0, 0, NULL, tx);
+
+		rddobj = ds->ds_dir->dd_object;
+		VERIFY0(dsl_dir_get_encryption_root_ddobj(ds->ds_dir->dd_parent,
+		    &new_rddobj));
+	}
+
+	if (wkey == NULL) {
+		ASSERT(dcp->cp_cmd == DCP_CMD_FORCE_INHERIT ||
+		    dcp->cp_cmd == DCP_CMD_FORCE_NEW_KEY);
+	}
+
+	rw_enter(&spa->spa_keystore.sk_wkeys_lock, RW_WRITER);
+
+	/* recurse through all children and rewrap their keys */
+	spa_keystore_change_key_sync_impl(rddobj, ds->ds_dir->dd_object,
+	    new_rddobj, wkey, tx);
+
+	/*
+	 * All references to the old wkey should be released now (if it
+	 * existed). Replace the wrapping key.
+	 */
+	wkey_search.wk_ddobj = ds->ds_dir->dd_object;
+	found_wkey = avl_find(&spa->spa_keystore.sk_wkeys, &wkey_search, NULL);
+	if (found_wkey != NULL) {
+		ASSERT0(zfs_refcount_count(&found_wkey->wk_refcnt));
+		avl_remove(&spa->spa_keystore.sk_wkeys, found_wkey);
+		dsl_wrapping_key_free(found_wkey);
+	}
+
+	if (dcp->cp_cmd == DCP_CMD_NEW_KEY) {
+		(void) avl_find(&spa->spa_keystore.sk_wkeys, wkey, &where);
+		avl_insert(&spa->spa_keystore.sk_wkeys, wkey, where);
+	} else if (wkey != NULL) {
+		dsl_wrapping_key_rele(wkey, FTAG);
+	}
+
+	rw_exit(&spa->spa_keystore.sk_wkeys_lock);
+
+	dsl_dataset_rele(ds, FTAG);
+}
+
+int
+spa_keystore_change_key(const char *dsname, dsl_crypto_params_t *dcp)
+{
+	spa_keystore_change_key_args_t skcka;
+
+	/* initialize the args struct */
+	skcka.skcka_dsname = dsname;
+	skcka.skcka_cp = dcp;
+
+	/*
+	 * Perform the actual work in syncing context. The blocks modified
+	 * here could be calculated but it would require holding the pool
+	 * lock and traversing all of the datasets that will have their keys
+	 * changed.
+	 */
+	return (dsl_sync_task(dsname, spa_keystore_change_key_check,
+	    spa_keystore_change_key_sync, &skcka, 15,
+	    ZFS_SPACE_CHECK_RESERVED));
+}
+
+int
+dsl_dir_rename_crypt_check(dsl_dir_t *dd, dsl_dir_t *newparent)
+{
+	int ret;
+	uint64_t curr_rddobj, parent_rddobj;
+
+	if (dd->dd_crypto_obj == 0) {
+		/* children of encrypted parents must be encrypted */
+		if (newparent->dd_crypto_obj != 0) {
+			ret = SET_ERROR(EACCES);
+			goto error;
+		}
+
+		return (0);
+	}
+
+	ret = dsl_dir_get_encryption_root_ddobj(dd, &curr_rddobj);
+	if (ret != 0)
+		goto error;
+
+	/*
+	 * if this is not an encryption root, we must make sure we are not
+	 * moving dd to a new encryption root
+	 */
+	if (dd->dd_object != curr_rddobj) {
+		ret = dsl_dir_get_encryption_root_ddobj(newparent,
+		    &parent_rddobj);
+		if (ret != 0)
+			goto error;
+
+		if (parent_rddobj != curr_rddobj) {
+			ret = SET_ERROR(EACCES);
+			goto error;
+		}
+	}
+
+	return (0);
+
+error:
+	return (ret);
+}
+
+/*
+ * Check to make sure that a promote from targetdd to origindd will not require
+ * any key rewraps.
+ */
+int
+dsl_dataset_promote_crypt_check(dsl_dir_t *target, dsl_dir_t *origin)
+{
+	int ret;
+	uint64_t rddobj, op_rddobj, tp_rddobj;
+
+	/* If the dataset is not encrypted we don't need to check anything */
+	if (origin->dd_crypto_obj == 0)
+		return (0);
+
+	/*
+	 * If we are not changing the first origin snapshot in a chain
+	 * the encryption root won't change either.
+	 */
+	if (dsl_dir_is_clone(origin))
+		return (0);
+
+	/*
+	 * If the origin is the encryption root we will update
+	 * the DSL Crypto Key to point to the target instead.
+	 */
+	ret = dsl_dir_get_encryption_root_ddobj(origin, &rddobj);
+	if (ret != 0)
+		return (ret);
+
+	if (rddobj == origin->dd_object)
+		return (0);
+
+	/*
+	 * The origin is inheriting its encryption root from its parent.
+	 * Check that the parent of the target has the same encryption root.
+	 */
+	ret = dsl_dir_get_encryption_root_ddobj(origin->dd_parent, &op_rddobj);
+	if (ret != 0)
+		return (ret);
+
+	ret = dsl_dir_get_encryption_root_ddobj(target->dd_parent, &tp_rddobj);
+	if (ret != 0)
+		return (ret);
+
+	if (op_rddobj != tp_rddobj)
+		return (SET_ERROR(EACCES));
+
+	return (0);
+}
+
+void
+dsl_dataset_promote_crypt_sync(dsl_dir_t *target, dsl_dir_t *origin,
+    dmu_tx_t *tx)
+{
+	uint64_t rddobj;
+	dsl_pool_t *dp = target->dd_pool;
+	dsl_dataset_t *targetds;
+	dsl_dataset_t *originds;
+	char *keylocation;
+
+	if (origin->dd_crypto_obj == 0)
+		return;
+	if (dsl_dir_is_clone(origin))
+		return;
+
+	VERIFY0(dsl_dir_get_encryption_root_ddobj(origin, &rddobj));
+
+	if (rddobj != origin->dd_object)
+		return;
+
+	/*
+	 * If the target is being promoted to the encryption root update the
+	 * DSL Crypto Key and keylocation to reflect that. We also need to
+	 * update the DSL Crypto Keys of all children inheriting their
+	 * encryption root to point to the new target. Otherwise, the check
+	 * function ensured that the encryption root will not change.
+	 */
+	keylocation = kmem_alloc(ZAP_MAXVALUELEN, KM_SLEEP);
+
+	VERIFY0(dsl_dataset_hold_obj(dp,
+	    dsl_dir_phys(target)->dd_head_dataset_obj, FTAG, &targetds));
+	VERIFY0(dsl_dataset_hold_obj(dp,
+	    dsl_dir_phys(origin)->dd_head_dataset_obj, FTAG, &originds));
+
+	VERIFY0(dsl_prop_get_dd(origin, zfs_prop_to_name(ZFS_PROP_KEYLOCATION),
+	    1, ZAP_MAXVALUELEN, keylocation, NULL, B_FALSE));
+	dsl_prop_set_sync_impl(targetds, zfs_prop_to_name(ZFS_PROP_KEYLOCATION),
+	    ZPROP_SRC_LOCAL, 1, strlen(keylocation) + 1, keylocation, tx);
+	dsl_prop_set_sync_impl(originds, zfs_prop_to_name(ZFS_PROP_KEYLOCATION),
+	    ZPROP_SRC_NONE, 0, 0, NULL, tx);
+
+	rw_enter(&dp->dp_spa->spa_keystore.sk_wkeys_lock, RW_WRITER);
+	spa_keystore_change_key_sync_impl(rddobj, origin->dd_object,
+	    target->dd_object, NULL, tx);
+	rw_exit(&dp->dp_spa->spa_keystore.sk_wkeys_lock);
+
+	dsl_dataset_rele(targetds, FTAG);
+	dsl_dataset_rele(originds, FTAG);
+	kmem_free(keylocation, ZAP_MAXVALUELEN);
+}
+
+int
+dmu_objset_clone_crypt_check(dsl_dir_t *parentdd, dsl_dir_t *origindd)
+{
+	int ret;
+	uint64_t pcrypt, crypt;
+
+	/*
+	 * Check that we are not making an unencrypted child of an
+	 * encrypted parent.
+	 */
+	ret = dsl_dir_get_crypt(parentdd, &pcrypt);
+	if (ret != 0)
+		return (ret);
+
+	ret = dsl_dir_get_crypt(origindd, &crypt);
+	if (ret != 0)
+		return (ret);
+
+	ASSERT3U(pcrypt, !=, ZIO_CRYPT_INHERIT);
+	ASSERT3U(crypt, !=, ZIO_CRYPT_INHERIT);
+
+	if (crypt == ZIO_CRYPT_OFF && pcrypt != ZIO_CRYPT_OFF)
+		return (SET_ERROR(EINVAL));
+
+	return (0);
+}
+
+
+int
+dmu_objset_create_crypt_check(dsl_dir_t *parentdd, dsl_crypto_params_t *dcp,
+    boolean_t *will_encrypt)
+{
+	int ret;
+	uint64_t pcrypt, crypt;
+	dsl_crypto_params_t dummy_dcp = { 0 };
+
+	if (will_encrypt != NULL)
+		*will_encrypt = B_FALSE;
+
+	if (dcp == NULL)
+		dcp = &dummy_dcp;
+
+	if (dcp->cp_cmd != DCP_CMD_NONE)
+		return (SET_ERROR(EINVAL));
+
+	if (parentdd != NULL) {
+		ret = dsl_dir_get_crypt(parentdd, &pcrypt);
+		if (ret != 0)
+			return (ret);
+	} else {
+		pcrypt = ZIO_CRYPT_OFF;
+	}
+
+	crypt = (dcp->cp_crypt == ZIO_CRYPT_INHERIT) ? pcrypt : dcp->cp_crypt;
+
+	ASSERT3U(pcrypt, !=, ZIO_CRYPT_INHERIT);
+	ASSERT3U(crypt, !=, ZIO_CRYPT_INHERIT);
+
+	/*
+	 * We can't create an unencrypted child of an encrypted parent
+	 * under any circumstances.
+	 */
+	if (crypt == ZIO_CRYPT_OFF && pcrypt != ZIO_CRYPT_OFF)
+		return (SET_ERROR(EINVAL));
+
+	/* check for valid dcp with no encryption (inherited or local) */
+	if (crypt == ZIO_CRYPT_OFF) {
+		/* Must not specify encryption params */
+		if (dcp->cp_wkey != NULL ||
+		    (dcp->cp_keylocation != NULL &&
+		    strcmp(dcp->cp_keylocation, "none") != 0))
+			return (SET_ERROR(EINVAL));
+
+		return (0);
+	}
+
+	if (will_encrypt != NULL)
+		*will_encrypt = B_TRUE;
+
+	/*
+	 * We will now definitely be encrypting. Check the feature flag. When
+	 * creating the pool the caller will check this for us since we won't
+	 * technically have the feature activated yet.
+	 */
+	if (parentdd != NULL &&
+	    !spa_feature_is_enabled(parentdd->dd_pool->dp_spa,
+	    SPA_FEATURE_ENCRYPTION)) {
+		return (SET_ERROR(EOPNOTSUPP));
+	}
+
+	/* check for errata #4 (encryption enabled, bookmark_v2 disabled) */
+	if (parentdd != NULL &&
+	    !spa_feature_is_enabled(parentdd->dd_pool->dp_spa,
+	    SPA_FEATURE_BOOKMARK_V2)) {
+		return (SET_ERROR(EOPNOTSUPP));
+	}
+
+	/* handle inheritance */
+	if (dcp->cp_wkey == NULL) {
+		ASSERT3P(parentdd, !=, NULL);
+
+		/* key must be fully unspecified */
+		if (dcp->cp_keylocation != NULL)
+			return (SET_ERROR(EINVAL));
+
+		/* parent must have a key to inherit */
+		if (pcrypt == ZIO_CRYPT_OFF)
+			return (SET_ERROR(EINVAL));
+
+		/* check for parent key */
+		ret = dmu_objset_check_wkey_loaded(parentdd);
+		if (ret != 0)
+			return (ret);
+
+		return (0);
+	}
+
+	/* At this point we should have a fully specified key. Check location */
+	if (dcp->cp_keylocation == NULL ||
+	    !zfs_prop_valid_keylocation(dcp->cp_keylocation, B_TRUE))
+		return (SET_ERROR(EINVAL));
+
+	/* Must have fully specified keyformat */
+	switch (dcp->cp_wkey->wk_keyformat) {
+		case ZFS_KEYFORMAT_HEX:
+		case ZFS_KEYFORMAT_RAW:
+			/* requires no pbkdf2 iters and salt */
+			if (dcp->cp_wkey->wk_salt != 0 ||
+			    dcp->cp_wkey->wk_iters != 0)
+				return (SET_ERROR(EINVAL));
+			break;
+		case ZFS_KEYFORMAT_PASSPHRASE:
+			/* requires pbkdf2 iters and salt */
+			if (dcp->cp_wkey->wk_salt == 0 ||
+			    dcp->cp_wkey->wk_iters < MIN_PBKDF2_ITERATIONS)
+				return (SET_ERROR(EINVAL));
+			break;
+		case ZFS_KEYFORMAT_NONE:
+		default:
+			/* keyformat must be specified and valid */
+			return (SET_ERROR(EINVAL));
+	}
+
+	return (0);
+}
+
+void
+dsl_dataset_create_crypt_sync(uint64_t dsobj, dsl_dir_t *dd,
+    dsl_dataset_t *origin, dsl_crypto_params_t *dcp, dmu_tx_t *tx)
+{
+	dsl_pool_t *dp = dd->dd_pool;
+	uint64_t crypt;
+	dsl_wrapping_key_t *wkey;
+
+	/* clones always use their origin's wrapping key */
+	if (dsl_dir_is_clone(dd)) {
+		ASSERT3P(dcp, ==, NULL);
+
+		/*
+		 * If this is an encrypted clone we just need to clone the
+		 * dck into dd. Zapify the dd so we can do that.
+		 */
+		if (origin->ds_dir->dd_crypto_obj != 0) {
+			dmu_buf_will_dirty(dd->dd_dbuf, tx);
+			dsl_dir_zapify(dd, tx);
+
+			dd->dd_crypto_obj =
+			    dsl_crypto_key_clone_sync(origin->ds_dir, tx);
+			VERIFY0(zap_add(dp->dp_meta_objset, dd->dd_object,
+			    DD_FIELD_CRYPTO_KEY_OBJ, sizeof (uint64_t), 1,
+			    &dd->dd_crypto_obj, tx));
+		}
+
+		return;
+	}
+
+	/*
+	 * A NULL dcp at this point indicates this is the origin dataset
+	 * which does not have an objset to encrypt. Raw receives will handle
+	 * encryption separately later. In both cases we can simply return.
+	 */
+	if (dcp == NULL || dcp->cp_cmd == DCP_CMD_RAW_RECV)
+		return;
+
+	crypt = dcp->cp_crypt;
+	wkey = dcp->cp_wkey;
+
+	/* figure out the effective crypt */
+	if (crypt == ZIO_CRYPT_INHERIT && dd->dd_parent != NULL)
+		VERIFY0(dsl_dir_get_crypt(dd->dd_parent, &crypt));
+
+	/* if we aren't doing encryption just return */
+	if (crypt == ZIO_CRYPT_OFF || crypt == ZIO_CRYPT_INHERIT)
+		return;
+
+	/* zapify the dd so that we can add the crypto key obj to it */
+	dmu_buf_will_dirty(dd->dd_dbuf, tx);
+	dsl_dir_zapify(dd, tx);
+
+	/* use the new key if given or inherit from the parent */
+	if (wkey == NULL) {
+		VERIFY0(spa_keystore_wkey_hold_dd(dp->dp_spa,
+		    dd->dd_parent, FTAG, &wkey));
+	} else {
+		wkey->wk_ddobj = dd->dd_object;
+	}
+
+	ASSERT3P(wkey, !=, NULL);
+
+	/* Create or clone the DSL crypto key and activate the feature */
+	dd->dd_crypto_obj = dsl_crypto_key_create_sync(crypt, wkey, tx);
+	VERIFY0(zap_add(dp->dp_meta_objset, dd->dd_object,
+	    DD_FIELD_CRYPTO_KEY_OBJ, sizeof (uint64_t), 1, &dd->dd_crypto_obj,
+	    tx));
+	dsl_dataset_activate_feature(dsobj, SPA_FEATURE_ENCRYPTION, tx);
+
+	/*
+	 * If we inherited the wrapping key we release our reference now.
+	 * Otherwise, this is a new key and we need to load it into the
+	 * keystore.
+	 */
+	if (dcp->cp_wkey == NULL) {
+		dsl_wrapping_key_rele(wkey, FTAG);
+	} else {
+		VERIFY0(spa_keystore_load_wkey_impl(dp->dp_spa, wkey));
+	}
+}
+
+typedef struct dsl_crypto_recv_key_arg {
+	uint64_t dcrka_dsobj;
+	uint64_t dcrka_fromobj;
+	dmu_objset_type_t dcrka_ostype;
+	nvlist_t *dcrka_nvl;
+	boolean_t dcrka_do_key;
+} dsl_crypto_recv_key_arg_t;
+
+static int
+dsl_crypto_recv_raw_objset_check(dsl_dataset_t *ds, dsl_dataset_t *fromds,
+    dmu_objset_type_t ostype, nvlist_t *nvl, dmu_tx_t *tx)
+{
+	int ret;
+	objset_t *os;
+	dnode_t *mdn;
+	uint8_t *buf = NULL;
+	uint_t len;
+	uint64_t intval, nlevels, blksz, ibs;
+	uint64_t nblkptr, maxblkid;
+
+	if (ostype != DMU_OST_ZFS && ostype != DMU_OST_ZVOL)
+		return (SET_ERROR(EINVAL));
+
+	/* raw receives also need info about the structure of the metadnode */
+	ret = nvlist_lookup_uint64(nvl, "mdn_compress", &intval);
+	if (ret != 0 || intval >= ZIO_COMPRESS_LEGACY_FUNCTIONS)
+		return (SET_ERROR(EINVAL));
+
+	ret = nvlist_lookup_uint64(nvl, "mdn_checksum", &intval);
+	if (ret != 0 || intval >= ZIO_CHECKSUM_LEGACY_FUNCTIONS)
+		return (SET_ERROR(EINVAL));
+
+	ret = nvlist_lookup_uint64(nvl, "mdn_nlevels", &nlevels);
+	if (ret != 0 || nlevels > DN_MAX_LEVELS)
+		return (SET_ERROR(EINVAL));
+
+	ret = nvlist_lookup_uint64(nvl, "mdn_blksz", &blksz);
+	if (ret != 0 || blksz < SPA_MINBLOCKSIZE)
+		return (SET_ERROR(EINVAL));
+	else if (blksz > spa_maxblocksize(tx->tx_pool->dp_spa))
+		return (SET_ERROR(ENOTSUP));
+
+	ret = nvlist_lookup_uint64(nvl, "mdn_indblkshift", &ibs);
+	if (ret != 0 || ibs < DN_MIN_INDBLKSHIFT || ibs > DN_MAX_INDBLKSHIFT)
+		return (SET_ERROR(ENOTSUP));
+
+	ret = nvlist_lookup_uint64(nvl, "mdn_nblkptr", &nblkptr);
+	if (ret != 0 || nblkptr != DN_MAX_NBLKPTR)
+		return (SET_ERROR(ENOTSUP));
+
+	ret = nvlist_lookup_uint64(nvl, "mdn_maxblkid", &maxblkid);
+	if (ret != 0)
+		return (SET_ERROR(EINVAL));
+
+	ret = nvlist_lookup_uint8_array(nvl, "portable_mac", &buf, &len);
+	if (ret != 0 || len != ZIO_OBJSET_MAC_LEN)
+		return (SET_ERROR(EINVAL));
+
+	ret = dmu_objset_from_ds(ds, &os);
+	if (ret != 0)
+		return (ret);
+
+	/*
+	 * Useraccounting is not portable and must be done with the keys loaded.
+	 * Therefore, whenever we do any kind of receive the useraccounting
+	 * must not be present.
+	 */
+	ASSERT0(os->os_flags & OBJSET_FLAG_USERACCOUNTING_COMPLETE);
+
+	mdn = DMU_META_DNODE(os);
+
+	/*
+	 * If we already created the objset, make sure its unchangeable
+	 * properties match the ones received in the nvlist.
+	 */
+	rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG);
+	if (!BP_IS_HOLE(dsl_dataset_get_blkptr(ds)) &&
+	    (mdn->dn_nlevels != nlevels || mdn->dn_datablksz != blksz ||
+	    mdn->dn_indblkshift != ibs || mdn->dn_nblkptr != nblkptr)) {
+		rrw_exit(&ds->ds_bp_rwlock, FTAG);
+		return (SET_ERROR(EINVAL));
+	}
+	rrw_exit(&ds->ds_bp_rwlock, FTAG);
+
+	/*
+	 * Check that the ivset guid of the fromds matches the one from the
+	 * send stream. Older versions of the encryption code did not have
+	 * an ivset guid on the from dataset and did not send one in the
+	 * stream. For these streams we provide the
+	 * zfs_disable_ivset_guid_check tunable to allow these datasets to
+	 * be received with a generated ivset guid.
+	 */
+	if (fromds != NULL && !zfs_disable_ivset_guid_check) {
+		uint64_t from_ivset_guid = 0;
+		intval = 0;
+
+		(void) nvlist_lookup_uint64(nvl, "from_ivset_guid", &intval);
+		(void) zap_lookup(tx->tx_pool->dp_meta_objset,
+		    fromds->ds_object, DS_FIELD_IVSET_GUID,
+		    sizeof (from_ivset_guid), 1, &from_ivset_guid);
+
+		if (intval == 0 || from_ivset_guid == 0)
+			return (SET_ERROR(ZFS_ERR_FROM_IVSET_GUID_MISSING));
+
+		if (intval != from_ivset_guid)
+			return (SET_ERROR(ZFS_ERR_FROM_IVSET_GUID_MISMATCH));
+	}
+
+	/*
+	 * Check that the ivset guid of the fromds matches the one from the
+	 * send stream. Older versions of the encryption code did not have
+	 * an ivset guid on the from dataset and did not send one in the
+	 * stream. For these streams we provide the
+	 * zfs_disable_ivset_guid_check tunable to allow these datasets to
+	 * be received with a generated ivset guid.
+	 */
+	if (fromds != NULL && !zfs_disable_ivset_guid_check) {
+		uint64_t from_ivset_guid = 0;
+		intval = 0;
+
+		(void) nvlist_lookup_uint64(nvl, "from_ivset_guid", &intval);
+		(void) zap_lookup(tx->tx_pool->dp_meta_objset,
+		    fromds->ds_object, DS_FIELD_IVSET_GUID,
+		    sizeof (from_ivset_guid), 1, &from_ivset_guid);
+
+		if (intval == 0 || from_ivset_guid == 0)
+			return (SET_ERROR(ZFS_ERR_FROM_IVSET_GUID_MISSING));
+
+		if (intval != from_ivset_guid)
+			return (SET_ERROR(ZFS_ERR_FROM_IVSET_GUID_MISMATCH));
+	}
+
+	return (0);
+}
+
+static void
+dsl_crypto_recv_raw_objset_sync(dsl_dataset_t *ds, dmu_objset_type_t ostype,
+    nvlist_t *nvl, dmu_tx_t *tx)
+{
+	dsl_pool_t *dp = tx->tx_pool;
+	objset_t *os;
+	dnode_t *mdn;
+	zio_t *zio;
+	uint8_t *portable_mac;
+	uint_t len;
+	uint64_t compress, checksum, nlevels, blksz, ibs, maxblkid;
+	boolean_t newds = B_FALSE;
+
+	VERIFY0(dmu_objset_from_ds(ds, &os));
+	mdn = DMU_META_DNODE(os);
+
+	/*
+	 * Fetch the values we need from the nvlist. "to_ivset_guid" must
+	 * be set on the snapshot, which doesn't exist yet. The receive
+	 * code will take care of this for us later.
+	 */
+	compress = fnvlist_lookup_uint64(nvl, "mdn_compress");
+	checksum = fnvlist_lookup_uint64(nvl, "mdn_checksum");
+	nlevels = fnvlist_lookup_uint64(nvl, "mdn_nlevels");
+	blksz = fnvlist_lookup_uint64(nvl, "mdn_blksz");
+	ibs = fnvlist_lookup_uint64(nvl, "mdn_indblkshift");
+	maxblkid = fnvlist_lookup_uint64(nvl, "mdn_maxblkid");
+	VERIFY0(nvlist_lookup_uint8_array(nvl, "portable_mac", &portable_mac,
+	    &len));
+
+	/* if we haven't created an objset for the ds yet, do that now */
+	rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG);
+	if (BP_IS_HOLE(dsl_dataset_get_blkptr(ds))) {
+		(void) dmu_objset_create_impl_dnstats(dp->dp_spa, ds,
+		    dsl_dataset_get_blkptr(ds), ostype, nlevels, blksz,
+		    ibs, tx);
+		newds = B_TRUE;
+	}
+	rrw_exit(&ds->ds_bp_rwlock, FTAG);
+
+	/*
+	 * Set the portable MAC. The local MAC will always be zero since the
+	 * incoming data will all be portable and user accounting will be
+	 * deferred until the next mount. Afterwards, flag the os to be
+	 * written out raw next time.
+	 */
+	arc_release(os->os_phys_buf, &os->os_phys_buf);
+	bcopy(portable_mac, os->os_phys->os_portable_mac, ZIO_OBJSET_MAC_LEN);
+	bzero(os->os_phys->os_local_mac, ZIO_OBJSET_MAC_LEN);
+	os->os_next_write_raw[tx->tx_txg & TXG_MASK] = B_TRUE;
+
+	/* set metadnode compression and checksum */
+	mdn->dn_compress = compress;
+	mdn->dn_checksum = checksum;
+
+	rw_enter(&mdn->dn_struct_rwlock, RW_WRITER);
+	dnode_new_blkid(mdn, maxblkid, tx, B_FALSE, B_TRUE);
+	rw_exit(&mdn->dn_struct_rwlock);
+
+	/*
+	 * We can't normally dirty the dataset in syncing context unless
+	 * we are creating a new dataset. In this case, we perform a
+	 * pseudo txg sync here instead.
+	 */
+	if (newds) {
+		dsl_dataset_dirty(ds, tx);
+	} else {
+		zio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED);
+		dsl_dataset_sync(ds, zio, tx);
+		VERIFY0(zio_wait(zio));
+
+		/* dsl_dataset_sync_done will drop this reference. */
+		dmu_buf_add_ref(ds->ds_dbuf, ds);
+		dsl_dataset_sync_done(ds, tx);
+	}
+}
+
+int
+dsl_crypto_recv_raw_key_check(dsl_dataset_t *ds, nvlist_t *nvl, dmu_tx_t *tx)
+{
+	int ret;
+	objset_t *mos = tx->tx_pool->dp_meta_objset;
+	uint8_t *buf = NULL;
+	uint_t len;
+	uint64_t intval, key_guid, version;
+	boolean_t is_passphrase = B_FALSE;
+
+	ASSERT(dsl_dataset_phys(ds)->ds_flags & DS_FLAG_INCONSISTENT);
+
+	/*
+	 * Read and check all the encryption values from the nvlist. We need
+	 * all of the fields of a DSL Crypto Key, as well as a fully specified
+	 * wrapping key.
+	 */
+	ret = nvlist_lookup_uint64(nvl, DSL_CRYPTO_KEY_CRYPTO_SUITE, &intval);
+	if (ret != 0 || intval >= ZIO_CRYPT_FUNCTIONS ||
+	    intval <= ZIO_CRYPT_OFF)
+		return (SET_ERROR(EINVAL));
+
+	ret = nvlist_lookup_uint64(nvl, DSL_CRYPTO_KEY_GUID, &intval);
+	if (ret != 0)
+		return (SET_ERROR(EINVAL));
+
+	/*
+	 * If this is an incremental receive make sure the given key guid
+	 * matches the one we already have.
+	 */
+	if (ds->ds_dir->dd_crypto_obj != 0) {
+		ret = zap_lookup(mos, ds->ds_dir->dd_crypto_obj,
+		    DSL_CRYPTO_KEY_GUID, 8, 1, &key_guid);
+		if (ret != 0)
+			return (ret);
+		if (intval != key_guid)
+			return (SET_ERROR(EACCES));
+	}
+
+	ret = nvlist_lookup_uint8_array(nvl, DSL_CRYPTO_KEY_MASTER_KEY,
+	    &buf, &len);
+	if (ret != 0 || len != MASTER_KEY_MAX_LEN)
+		return (SET_ERROR(EINVAL));
+
+	ret = nvlist_lookup_uint8_array(nvl, DSL_CRYPTO_KEY_HMAC_KEY,
+	    &buf, &len);
+	if (ret != 0 || len != SHA512_HMAC_KEYLEN)
+		return (SET_ERROR(EINVAL));
+
+	ret = nvlist_lookup_uint8_array(nvl, DSL_CRYPTO_KEY_IV, &buf, &len);
+	if (ret != 0 || len != WRAPPING_IV_LEN)
+		return (SET_ERROR(EINVAL));
+
+	ret = nvlist_lookup_uint8_array(nvl, DSL_CRYPTO_KEY_MAC, &buf, &len);
+	if (ret != 0 || len != WRAPPING_MAC_LEN)
+		return (SET_ERROR(EINVAL));
+
+	/*
+	 * We don't support receiving old on-disk formats. The version 0
+	 * implementation protected several fields in an objset that were
+	 * not always portable during a raw receive. As a result, we call
+	 * the old version an on-disk errata #3.
+	 */
+	ret = nvlist_lookup_uint64(nvl, DSL_CRYPTO_KEY_VERSION, &version);
+	if (ret != 0 || version != ZIO_CRYPT_KEY_CURRENT_VERSION)
+		return (SET_ERROR(ENOTSUP));
+
+	ret = nvlist_lookup_uint64(nvl, zfs_prop_to_name(ZFS_PROP_KEYFORMAT),
+	    &intval);
+	if (ret != 0 || intval >= ZFS_KEYFORMAT_FORMATS ||
+	    intval == ZFS_KEYFORMAT_NONE)
+		return (SET_ERROR(EINVAL));
+
+	is_passphrase = (intval == ZFS_KEYFORMAT_PASSPHRASE);
+
+	/*
+	 * for raw receives we allow any number of pbkdf2iters since there
+	 * won't be a chance for the user to change it.
+	 */
+	ret = nvlist_lookup_uint64(nvl, zfs_prop_to_name(ZFS_PROP_PBKDF2_ITERS),
+	    &intval);
+	if (ret != 0 || (is_passphrase == (intval == 0)))
+		return (SET_ERROR(EINVAL));
+
+	ret = nvlist_lookup_uint64(nvl, zfs_prop_to_name(ZFS_PROP_PBKDF2_SALT),
+	    &intval);
+	if (ret != 0 || (is_passphrase == (intval == 0)))
+		return (SET_ERROR(EINVAL));
+
+	return (0);
+}
+
+void
+dsl_crypto_recv_raw_key_sync(dsl_dataset_t *ds, nvlist_t *nvl, dmu_tx_t *tx)
+{
+	dsl_pool_t *dp = tx->tx_pool;
+	objset_t *mos = dp->dp_meta_objset;
+	dsl_dir_t *dd = ds->ds_dir;
+	uint_t len;
+	uint64_t rddobj, one = 1;
+	uint8_t *keydata, *hmac_keydata, *iv, *mac;
+	uint64_t crypt, key_guid, keyformat, iters, salt;
+	uint64_t version = ZIO_CRYPT_KEY_CURRENT_VERSION;
+	char *keylocation = "prompt";
+
+	/* lookup the values we need to create the DSL Crypto Key */
+	crypt = fnvlist_lookup_uint64(nvl, DSL_CRYPTO_KEY_CRYPTO_SUITE);
+	key_guid = fnvlist_lookup_uint64(nvl, DSL_CRYPTO_KEY_GUID);
+	keyformat = fnvlist_lookup_uint64(nvl,
+	    zfs_prop_to_name(ZFS_PROP_KEYFORMAT));
+	iters = fnvlist_lookup_uint64(nvl,
+	    zfs_prop_to_name(ZFS_PROP_PBKDF2_ITERS));
+	salt = fnvlist_lookup_uint64(nvl,
+	    zfs_prop_to_name(ZFS_PROP_PBKDF2_SALT));
+	VERIFY0(nvlist_lookup_uint8_array(nvl, DSL_CRYPTO_KEY_MASTER_KEY,
+	    &keydata, &len));
+	VERIFY0(nvlist_lookup_uint8_array(nvl, DSL_CRYPTO_KEY_HMAC_KEY,
+	    &hmac_keydata, &len));
+	VERIFY0(nvlist_lookup_uint8_array(nvl, DSL_CRYPTO_KEY_IV, &iv, &len));
+	VERIFY0(nvlist_lookup_uint8_array(nvl, DSL_CRYPTO_KEY_MAC, &mac, &len));
+
+	/* if this is a new dataset setup the DSL Crypto Key. */
+	if (dd->dd_crypto_obj == 0) {
+		/* zapify the dsl dir so we can add the key object to it */
+		dmu_buf_will_dirty(dd->dd_dbuf, tx);
+		dsl_dir_zapify(dd, tx);
+
+		/* create the DSL Crypto Key on disk and activate the feature */
+		dd->dd_crypto_obj = zap_create(mos,
+		    DMU_OTN_ZAP_METADATA, DMU_OT_NONE, 0, tx);
+		VERIFY0(zap_update(tx->tx_pool->dp_meta_objset,
+		    dd->dd_crypto_obj, DSL_CRYPTO_KEY_REFCOUNT,
+		    sizeof (uint64_t), 1, &one, tx));
+		VERIFY0(zap_update(tx->tx_pool->dp_meta_objset,
+		    dd->dd_crypto_obj, DSL_CRYPTO_KEY_VERSION,
+		    sizeof (uint64_t), 1, &version, tx));
+
+		dsl_dataset_activate_feature(ds->ds_object,
+		    SPA_FEATURE_ENCRYPTION, tx);
+		ds->ds_feature_inuse[SPA_FEATURE_ENCRYPTION] = B_TRUE;
+
+		/* save the dd_crypto_obj on disk */
+		VERIFY0(zap_add(mos, dd->dd_object, DD_FIELD_CRYPTO_KEY_OBJ,
+		    sizeof (uint64_t), 1, &dd->dd_crypto_obj, tx));
+
+		/*
+		 * Set the keylocation to prompt by default. If keylocation
+		 * has been provided via the properties, this will be overridden
+		 * later.
+		 */
+		dsl_prop_set_sync_impl(ds,
+		    zfs_prop_to_name(ZFS_PROP_KEYLOCATION),
+		    ZPROP_SRC_LOCAL, 1, strlen(keylocation) + 1,
+		    keylocation, tx);
+
+		rddobj = dd->dd_object;
+	} else {
+		VERIFY0(dsl_dir_get_encryption_root_ddobj(dd, &rddobj));
+	}
+
+	/* sync the key data to the ZAP object on disk */
+	dsl_crypto_key_sync_impl(mos, dd->dd_crypto_obj, crypt,
+	    rddobj, key_guid, iv, mac, keydata, hmac_keydata, keyformat, salt,
+	    iters, tx);
+}
+
+int
+dsl_crypto_recv_key_check(void *arg, dmu_tx_t *tx)
+{
+	int ret;
+	dsl_crypto_recv_key_arg_t *dcrka = arg;
+	dsl_dataset_t *ds = NULL, *fromds = NULL;
+
+	ret = dsl_dataset_hold_obj(tx->tx_pool, dcrka->dcrka_dsobj,
+	    FTAG, &ds);
+	if (ret != 0)
+		goto out;
+
+	if (dcrka->dcrka_fromobj != 0) {
+		ret = dsl_dataset_hold_obj(tx->tx_pool, dcrka->dcrka_fromobj,
+		    FTAG, &fromds);
+		if (ret != 0)
+			goto out;
+	}
+
+	ret = dsl_crypto_recv_raw_objset_check(ds, fromds,
+	    dcrka->dcrka_ostype, dcrka->dcrka_nvl, tx);
+	if (ret != 0)
+		goto out;
+
+	/*
+	 * We run this check even if we won't be doing this part of
+	 * the receive now so that we don't make the user wait until
+	 * the receive finishes to fail.
+	 */
+	ret = dsl_crypto_recv_raw_key_check(ds, dcrka->dcrka_nvl, tx);
+	if (ret != 0)
+		goto out;
+
+out:
+	if (ds != NULL)
+		dsl_dataset_rele(ds, FTAG);
+	if (fromds != NULL)
+		dsl_dataset_rele(fromds, FTAG);
+	return (ret);
+}
+
+void
+dsl_crypto_recv_key_sync(void *arg, dmu_tx_t *tx)
+{
+	dsl_crypto_recv_key_arg_t *dcrka = arg;
+	dsl_dataset_t *ds;
+
+	VERIFY0(dsl_dataset_hold_obj(tx->tx_pool, dcrka->dcrka_dsobj,
+	    FTAG, &ds));
+	dsl_crypto_recv_raw_objset_sync(ds, dcrka->dcrka_ostype,
+	    dcrka->dcrka_nvl, tx);
+	if (dcrka->dcrka_do_key)
+		dsl_crypto_recv_raw_key_sync(ds, dcrka->dcrka_nvl, tx);
+	dsl_dataset_rele(ds, FTAG);
+}
+
+/*
+ * This function is used to sync an nvlist representing a DSL Crypto Key and
+ * the associated encryption parameters. The key will be written exactly as is
+ * without wrapping it.
+ */
+int
+dsl_crypto_recv_raw(const char *poolname, uint64_t dsobj, uint64_t fromobj,
+    dmu_objset_type_t ostype, nvlist_t *nvl, boolean_t do_key)
+{
+	dsl_crypto_recv_key_arg_t dcrka;
+
+	dcrka.dcrka_dsobj = dsobj;
+	dcrka.dcrka_fromobj = fromobj;
+	dcrka.dcrka_ostype = ostype;
+	dcrka.dcrka_nvl = nvl;
+	dcrka.dcrka_do_key = do_key;
+
+	return (dsl_sync_task(poolname, dsl_crypto_recv_key_check,
+	    dsl_crypto_recv_key_sync, &dcrka, 1, ZFS_SPACE_CHECK_NORMAL));
+}
+
+int
+dsl_crypto_populate_key_nvlist(dsl_dataset_t *ds, uint64_t from_ivset_guid,
+    nvlist_t **nvl_out)
+{
+	int ret;
+	objset_t *os;
+	dnode_t *mdn;
+	uint64_t rddobj;
+	nvlist_t *nvl = NULL;
+	uint64_t dckobj = ds->ds_dir->dd_crypto_obj;
+	dsl_dir_t *rdd = NULL;
+	dsl_pool_t *dp = ds->ds_dir->dd_pool;
+	objset_t *mos = dp->dp_meta_objset;
+	uint64_t crypt = 0, key_guid = 0, format = 0;
+	uint64_t iters = 0, salt = 0, version = 0;
+	uint64_t to_ivset_guid = 0;
+	uint8_t raw_keydata[MASTER_KEY_MAX_LEN];
+	uint8_t raw_hmac_keydata[SHA512_HMAC_KEYLEN];
+	uint8_t iv[WRAPPING_IV_LEN];
+	uint8_t mac[WRAPPING_MAC_LEN];
+
+	ASSERT(dckobj != 0);
+
+	VERIFY0(dmu_objset_from_ds(ds, &os));
+	mdn = DMU_META_DNODE(os);
+
+	ret = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP);
+	if (ret != 0)
+		goto error;
+
+	/* lookup values from the DSL Crypto Key */
+	ret = zap_lookup(mos, dckobj, DSL_CRYPTO_KEY_CRYPTO_SUITE, 8, 1,
+	    &crypt);
+	if (ret != 0)
+		goto error;
+
+	ret = zap_lookup(mos, dckobj, DSL_CRYPTO_KEY_GUID, 8, 1, &key_guid);
+	if (ret != 0)
+		goto error;
+
+	ret = zap_lookup(mos, dckobj, DSL_CRYPTO_KEY_MASTER_KEY, 1,
+	    MASTER_KEY_MAX_LEN, raw_keydata);
+	if (ret != 0)
+		goto error;
+
+	ret = zap_lookup(mos, dckobj, DSL_CRYPTO_KEY_HMAC_KEY, 1,
+	    SHA512_HMAC_KEYLEN, raw_hmac_keydata);
+	if (ret != 0)
+		goto error;
+
+	ret = zap_lookup(mos, dckobj, DSL_CRYPTO_KEY_IV, 1, WRAPPING_IV_LEN,
+	    iv);
+	if (ret != 0)
+		goto error;
+
+	ret = zap_lookup(mos, dckobj, DSL_CRYPTO_KEY_MAC, 1, WRAPPING_MAC_LEN,
+	    mac);
+	if (ret != 0)
+		goto error;
+
+	/* see zfs_disable_ivset_guid_check tunable for errata info */
+	ret = zap_lookup(mos, ds->ds_object, DS_FIELD_IVSET_GUID, 8, 1,
+	    &to_ivset_guid);
+	if (ret != 0)
+		ASSERT3U(dp->dp_spa->spa_errata, !=, 0);
+
+	/*
+	 * We don't support raw sends of legacy on-disk formats. See the
+	 * comment in dsl_crypto_recv_key_check() for details.
+	 */
+	ret = zap_lookup(mos, dckobj, DSL_CRYPTO_KEY_VERSION, 8, 1, &version);
+	if (ret != 0 || version != ZIO_CRYPT_KEY_CURRENT_VERSION) {
+		dp->dp_spa->spa_errata = ZPOOL_ERRATA_ZOL_6845_ENCRYPTION;
+		ret = SET_ERROR(ENOTSUP);
+		goto error;
+	}
+
+	/*
+	 * Lookup wrapping key properties. An early version of the code did
+	 * not correctly add these values to the wrapping key or the DSL
+	 * Crypto Key on disk for non encryption roots, so to be safe we
+	 * always take the slightly circuitous route of looking it up from
+	 * the encryption root's key.
+	 */
+	ret = dsl_dir_get_encryption_root_ddobj(ds->ds_dir, &rddobj);
+	if (ret != 0)
+		goto error;
+
+	dsl_pool_config_enter(dp, FTAG);
+
+	ret = dsl_dir_hold_obj(dp, rddobj, NULL, FTAG, &rdd);
+	if (ret != 0)
+		goto error_unlock;
+
+	ret = zap_lookup(dp->dp_meta_objset, rdd->dd_crypto_obj,
+	    zfs_prop_to_name(ZFS_PROP_KEYFORMAT), 8, 1, &format);
+	if (ret != 0)
+		goto error_unlock;
+
+	if (format == ZFS_KEYFORMAT_PASSPHRASE) {
+		ret = zap_lookup(dp->dp_meta_objset, rdd->dd_crypto_obj,
+		    zfs_prop_to_name(ZFS_PROP_PBKDF2_ITERS), 8, 1, &iters);
+		if (ret != 0)
+			goto error_unlock;
+
+		ret = zap_lookup(dp->dp_meta_objset, rdd->dd_crypto_obj,
+		    zfs_prop_to_name(ZFS_PROP_PBKDF2_SALT), 8, 1, &salt);
+		if (ret != 0)
+			goto error_unlock;
+	}
+
+	dsl_dir_rele(rdd, FTAG);
+	dsl_pool_config_exit(dp, FTAG);
+
+	fnvlist_add_uint64(nvl, DSL_CRYPTO_KEY_CRYPTO_SUITE, crypt);
+	fnvlist_add_uint64(nvl, DSL_CRYPTO_KEY_GUID, key_guid);
+	fnvlist_add_uint64(nvl, DSL_CRYPTO_KEY_VERSION, version);
+	VERIFY0(nvlist_add_uint8_array(nvl, DSL_CRYPTO_KEY_MASTER_KEY,
+	    raw_keydata, MASTER_KEY_MAX_LEN));
+	VERIFY0(nvlist_add_uint8_array(nvl, DSL_CRYPTO_KEY_HMAC_KEY,
+	    raw_hmac_keydata, SHA512_HMAC_KEYLEN));
+	VERIFY0(nvlist_add_uint8_array(nvl, DSL_CRYPTO_KEY_IV, iv,
+	    WRAPPING_IV_LEN));
+	VERIFY0(nvlist_add_uint8_array(nvl, DSL_CRYPTO_KEY_MAC, mac,
+	    WRAPPING_MAC_LEN));
+	VERIFY0(nvlist_add_uint8_array(nvl, "portable_mac",
+	    os->os_phys->os_portable_mac, ZIO_OBJSET_MAC_LEN));
+	fnvlist_add_uint64(nvl, zfs_prop_to_name(ZFS_PROP_KEYFORMAT), format);
+	fnvlist_add_uint64(nvl, zfs_prop_to_name(ZFS_PROP_PBKDF2_ITERS), iters);
+	fnvlist_add_uint64(nvl, zfs_prop_to_name(ZFS_PROP_PBKDF2_SALT), salt);
+	fnvlist_add_uint64(nvl, "mdn_checksum", mdn->dn_checksum);
+	fnvlist_add_uint64(nvl, "mdn_compress", mdn->dn_compress);
+	fnvlist_add_uint64(nvl, "mdn_nlevels", mdn->dn_nlevels);
+	fnvlist_add_uint64(nvl, "mdn_blksz", mdn->dn_datablksz);
+	fnvlist_add_uint64(nvl, "mdn_indblkshift", mdn->dn_indblkshift);
+	fnvlist_add_uint64(nvl, "mdn_nblkptr", mdn->dn_nblkptr);
+	fnvlist_add_uint64(nvl, "mdn_maxblkid", mdn->dn_maxblkid);
+	fnvlist_add_uint64(nvl, "to_ivset_guid", to_ivset_guid);
+	fnvlist_add_uint64(nvl, "from_ivset_guid", from_ivset_guid);
+
+	*nvl_out = nvl;
+	return (0);
+
+error_unlock:
+	dsl_pool_config_exit(dp, FTAG);
+error:
+	if (rdd != NULL)
+		dsl_dir_rele(rdd, FTAG);
+	nvlist_free(nvl);
+
+	*nvl_out = NULL;
+	return (ret);
+}
+
+uint64_t
+dsl_crypto_key_create_sync(uint64_t crypt, dsl_wrapping_key_t *wkey,
+    dmu_tx_t *tx)
+{
+	dsl_crypto_key_t dck;
+	uint64_t version = ZIO_CRYPT_KEY_CURRENT_VERSION;
+	uint64_t one = 1ULL;
+
+	ASSERT(dmu_tx_is_syncing(tx));
+	ASSERT3U(crypt, <, ZIO_CRYPT_FUNCTIONS);
+	ASSERT3U(crypt, >, ZIO_CRYPT_OFF);
+
+	/* create the DSL Crypto Key ZAP object */
+	dck.dck_obj = zap_create(tx->tx_pool->dp_meta_objset,
+	    DMU_OTN_ZAP_METADATA, DMU_OT_NONE, 0, tx);
+
+	/* fill in the key (on the stack) and sync it to disk */
+	dck.dck_wkey = wkey;
+	VERIFY0(zio_crypt_key_init(crypt, &dck.dck_key));
+
+	dsl_crypto_key_sync(&dck, tx);
+	VERIFY0(zap_update(tx->tx_pool->dp_meta_objset, dck.dck_obj,
+	    DSL_CRYPTO_KEY_REFCOUNT, sizeof (uint64_t), 1, &one, tx));
+	VERIFY0(zap_update(tx->tx_pool->dp_meta_objset, dck.dck_obj,
+	    DSL_CRYPTO_KEY_VERSION, sizeof (uint64_t), 1, &version, tx));
+
+	zio_crypt_key_destroy(&dck.dck_key);
+	bzero(&dck.dck_key, sizeof (zio_crypt_key_t));
+
+	return (dck.dck_obj);
+}
+
+uint64_t
+dsl_crypto_key_clone_sync(dsl_dir_t *origindd, dmu_tx_t *tx)
+{
+	objset_t *mos = tx->tx_pool->dp_meta_objset;
+
+	ASSERT(dmu_tx_is_syncing(tx));
+
+	VERIFY0(zap_increment(mos, origindd->dd_crypto_obj,
+	    DSL_CRYPTO_KEY_REFCOUNT, 1, tx));
+
+	return (origindd->dd_crypto_obj);
+}
+
+void
+dsl_crypto_key_destroy_sync(uint64_t dckobj, dmu_tx_t *tx)
+{
+	objset_t *mos = tx->tx_pool->dp_meta_objset;
+	uint64_t refcnt;
+
+	/* Decrement the refcount, destroy if this is the last reference */
+	VERIFY0(zap_lookup(mos, dckobj, DSL_CRYPTO_KEY_REFCOUNT,
+	    sizeof (uint64_t), 1, &refcnt));
+
+	if (refcnt != 1) {
+		VERIFY0(zap_increment(mos, dckobj, DSL_CRYPTO_KEY_REFCOUNT,
+		    -1, tx));
+	} else {
+		VERIFY0(zap_destroy(mos, dckobj, tx));
+	}
+}
+
+void
+dsl_dataset_crypt_stats(dsl_dataset_t *ds, nvlist_t *nv)
+{
+	uint64_t intval;
+	dsl_dir_t *dd = ds->ds_dir;
+	dsl_dir_t *enc_root;
+	char buf[ZFS_MAX_DATASET_NAME_LEN];
+
+	if (dd->dd_crypto_obj == 0)
+		return;
+
+	intval = dsl_dataset_get_keystatus(dd);
+	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_KEYSTATUS, intval);
+
+	if (dsl_dir_get_crypt(dd, &intval) == 0)
+		dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_ENCRYPTION, intval);
+	if (zap_lookup(dd->dd_pool->dp_meta_objset, dd->dd_crypto_obj,
+	    DSL_CRYPTO_KEY_GUID, 8, 1, &intval) == 0) {
+		dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_KEY_GUID, intval);
+	}
+	if (zap_lookup(dd->dd_pool->dp_meta_objset, dd->dd_crypto_obj,
+	    zfs_prop_to_name(ZFS_PROP_KEYFORMAT), 8, 1, &intval) == 0) {
+		dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_KEYFORMAT, intval);
+	}
+	if (zap_lookup(dd->dd_pool->dp_meta_objset, dd->dd_crypto_obj,
+	    zfs_prop_to_name(ZFS_PROP_PBKDF2_SALT), 8, 1, &intval) == 0) {
+		dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_PBKDF2_SALT, intval);
+	}
+	if (zap_lookup(dd->dd_pool->dp_meta_objset, dd->dd_crypto_obj,
+	    zfs_prop_to_name(ZFS_PROP_PBKDF2_ITERS), 8, 1, &intval) == 0) {
+		dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_PBKDF2_ITERS, intval);
+	}
+	if (zap_lookup(dd->dd_pool->dp_meta_objset, ds->ds_object,
+	    DS_FIELD_IVSET_GUID, 8, 1, &intval) == 0) {
+		dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_IVSET_GUID, intval);
+	}
+
+	if (dsl_dir_get_encryption_root_ddobj(dd, &intval) == 0) {
+		VERIFY0(dsl_dir_hold_obj(dd->dd_pool, intval, NULL, FTAG,
+		    &enc_root));
+		dsl_dir_name(enc_root, buf);
+		dsl_dir_rele(enc_root, FTAG);
+		dsl_prop_nvlist_add_string(nv, ZFS_PROP_ENCRYPTION_ROOT, buf);
+	}
+}
+
+int
+spa_crypt_get_salt(spa_t *spa, uint64_t dsobj, uint8_t *salt)
+{
+	int ret;
+	dsl_crypto_key_t *dck = NULL;
+
+	/* look up the key from the spa's keystore */
+	ret = spa_keystore_lookup_key(spa, dsobj, FTAG, &dck);
+	if (ret != 0)
+		goto error;
+
+	ret = zio_crypt_key_get_salt(&dck->dck_key, salt);
+	if (ret != 0)
+		goto error;
+
+	spa_keystore_dsl_key_rele(spa, dck, FTAG);
+	return (0);
+
+error:
+	if (dck != NULL)
+		spa_keystore_dsl_key_rele(spa, dck, FTAG);
+	return (ret);
+}
+
+/*
+ * Objset blocks are a special case for MAC generation. These blocks have 2
+ * 256-bit MACs which are embedded within the block itself, rather than a
+ * single 128 bit MAC. As a result, this function handles encoding and decoding
+ * the MACs on its own, unlike other functions in this file.
+ */
+int
+spa_do_crypt_objset_mac_abd(boolean_t generate, spa_t *spa, uint64_t dsobj,
+    abd_t *abd, uint_t datalen, boolean_t byteswap)
+{
+	int ret;
+	dsl_crypto_key_t *dck = NULL;
+	void *buf = abd_borrow_buf_copy(abd, datalen);
+	objset_phys_t *osp = buf;
+	uint8_t portable_mac[ZIO_OBJSET_MAC_LEN];
+	uint8_t local_mac[ZIO_OBJSET_MAC_LEN];
+
+	/* look up the key from the spa's keystore */
+	ret = spa_keystore_lookup_key(spa, dsobj, FTAG, &dck);
+	if (ret != 0)
+		goto error;
+
+	/* calculate both HMACs */
+	ret = zio_crypt_do_objset_hmacs(&dck->dck_key, buf, datalen,
+	    byteswap, portable_mac, local_mac);
+	if (ret != 0)
+		goto error;
+
+	spa_keystore_dsl_key_rele(spa, dck, FTAG);
+
+	/* if we are generating encode the HMACs in the objset_phys_t */
+	if (generate) {
+		bcopy(portable_mac, osp->os_portable_mac, ZIO_OBJSET_MAC_LEN);
+		bcopy(local_mac, osp->os_local_mac, ZIO_OBJSET_MAC_LEN);
+		abd_return_buf_copy(abd, buf, datalen);
+		return (0);
+	}
+
+	if (bcmp(portable_mac, osp->os_portable_mac, ZIO_OBJSET_MAC_LEN) != 0 ||
+	    bcmp(local_mac, osp->os_local_mac, ZIO_OBJSET_MAC_LEN) != 0) {
+		abd_return_buf(abd, buf, datalen);
+		return (SET_ERROR(ECKSUM));
+	}
+
+	abd_return_buf(abd, buf, datalen);
+
+	return (0);
+
+error:
+	if (dck != NULL)
+		spa_keystore_dsl_key_rele(spa, dck, FTAG);
+	abd_return_buf(abd, buf, datalen);
+	return (ret);
+}
+
+int
+spa_do_crypt_mac_abd(boolean_t generate, spa_t *spa, uint64_t dsobj, abd_t *abd,
+    uint_t datalen, uint8_t *mac)
+{
+	int ret;
+	dsl_crypto_key_t *dck = NULL;
+	uint8_t *buf = abd_borrow_buf_copy(abd, datalen);
+	uint8_t digestbuf[ZIO_DATA_MAC_LEN];
+
+	/* look up the key from the spa's keystore */
+	ret = spa_keystore_lookup_key(spa, dsobj, FTAG, &dck);
+	if (ret != 0)
+		goto error;
+
+	/* perform the hmac */
+	ret = zio_crypt_do_hmac(&dck->dck_key, buf, datalen,
+	    digestbuf, ZIO_DATA_MAC_LEN);
+	if (ret != 0)
+		goto error;
+
+	abd_return_buf(abd, buf, datalen);
+	spa_keystore_dsl_key_rele(spa, dck, FTAG);
+
+	/*
+	 * Truncate and fill in mac buffer if we were asked to generate a MAC.
+	 * Otherwise verify that the MAC matched what we expected.
+	 */
+	if (generate) {
+		bcopy(digestbuf, mac, ZIO_DATA_MAC_LEN);
+		return (0);
+	}
+
+	if (bcmp(digestbuf, mac, ZIO_DATA_MAC_LEN) != 0)
+		return (SET_ERROR(ECKSUM));
+
+	return (0);
+
+error:
+	if (dck != NULL)
+		spa_keystore_dsl_key_rele(spa, dck, FTAG);
+	abd_return_buf(abd, buf, datalen);
+	return (ret);
+}
+
+/*
+ * This function serves as a multiplexer for encryption and decryption of
+ * all blocks (except the L2ARC). For encryption, it will populate the IV,
+ * salt, MAC, and cabd (the ciphertext). On decryption it will simply use
+ * these fields to populate pabd (the plaintext).
+ */
+/* ARGSUSED */
+int
+spa_do_crypt_abd(boolean_t encrypt, spa_t *spa, const zbookmark_phys_t *zb,
+    dmu_object_type_t ot, boolean_t dedup, boolean_t bswap, uint8_t *salt,
+    uint8_t *iv, uint8_t *mac, uint_t datalen, abd_t *pabd, abd_t *cabd,
+    boolean_t *no_crypt)
+{
+	int ret;
+	dsl_crypto_key_t *dck = NULL;
+	uint8_t *plainbuf = NULL, *cipherbuf = NULL;
+
+	ASSERT(spa_feature_is_active(spa, SPA_FEATURE_ENCRYPTION));
+
+	/* look up the key from the spa's keystore */
+	ret = spa_keystore_lookup_key(spa, zb->zb_objset, FTAG, &dck);
+	if (ret != 0) {
+		ret = SET_ERROR(EACCES);
+		return (ret);
+	}
+
+	if (encrypt) {
+		plainbuf = abd_borrow_buf_copy(pabd, datalen);
+		cipherbuf = abd_borrow_buf(cabd, datalen);
+	} else {
+		plainbuf = abd_borrow_buf(pabd, datalen);
+		cipherbuf = abd_borrow_buf_copy(cabd, datalen);
+	}
+
+	/*
+	 * Both encryption and decryption functions need a salt for key
+	 * generation and an IV. When encrypting a non-dedup block, we
+	 * generate the salt and IV randomly to be stored by the caller. Dedup
+	 * blocks perform a (more expensive) HMAC of the plaintext to obtain
+	 * the salt and the IV. ZIL blocks have their salt and IV generated
+	 * at allocation time in zio_alloc_zil(). On decryption, we simply use
+	 * the provided values.
+	 */
+	if (encrypt && ot != DMU_OT_INTENT_LOG && !dedup) {
+		ret = zio_crypt_key_get_salt(&dck->dck_key, salt);
+		if (ret != 0)
+			goto error;
+
+		ret = zio_crypt_generate_iv(iv);
+		if (ret != 0)
+			goto error;
+	} else if (encrypt && dedup) {
+		ret = zio_crypt_generate_iv_salt_dedup(&dck->dck_key,
+		    plainbuf, datalen, iv, salt);
+		if (ret != 0)
+			goto error;
+	}
+
+	/* call lower level function to perform encryption / decryption */
+	ret = zio_do_crypt_data(encrypt, &dck->dck_key, ot, bswap, salt, iv,
+	    mac, datalen, plainbuf, cipherbuf, no_crypt);
+
+	/*
+	 * Handle injected decryption faults. Unfortunately, we cannot inject
+	 * faults for dnode blocks because we might trigger the panic in
+	 * dbuf_prepare_encrypted_dnode_leaf(), which exists because syncing
+	 * context is not prepared to handle malicious decryption failures.
+	 */
+	if (zio_injection_enabled && !encrypt && ot != DMU_OT_DNODE && ret == 0)
+		ret = zio_handle_decrypt_injection(spa, zb, ot, ECKSUM);
+	if (ret != 0)
+		goto error;
+
+	if (encrypt) {
+		abd_return_buf(pabd, plainbuf, datalen);
+		abd_return_buf_copy(cabd, cipherbuf, datalen);
+	} else {
+		abd_return_buf_copy(pabd, plainbuf, datalen);
+		abd_return_buf(cabd, cipherbuf, datalen);
+	}
+
+	spa_keystore_dsl_key_rele(spa, dck, FTAG);
+
+	return (0);
+
+error:
+	if (encrypt) {
+		/* zero out any state we might have changed while encrypting */
+		bzero(salt, ZIO_DATA_SALT_LEN);
+		bzero(iv, ZIO_DATA_IV_LEN);
+		bzero(mac, ZIO_DATA_MAC_LEN);
+		abd_return_buf(pabd, plainbuf, datalen);
+		abd_return_buf_copy(cabd, cipherbuf, datalen);
+	} else {
+		abd_return_buf_copy(pabd, plainbuf, datalen);
+		abd_return_buf(cabd, cipherbuf, datalen);
+	}
+
+	spa_keystore_dsl_key_rele(spa, dck, FTAG);
+
+	return (ret);
+}
diff --git a/usr/src/uts/common/fs/zfs/dsl_dataset.c b/usr/src/uts/common/fs/zfs/dsl_dataset.c
index fd4c35e000..a6061078f7 100644
--- a/usr/src/uts/common/fs/zfs/dsl_dataset.c
+++ b/usr/src/uts/common/fs/zfs/dsl_dataset.c
@@ -540,6 +540,13 @@ dsl_dataset_hold_obj(dsl_pool_t *dp, uint64_t dsobj, void *tag,
 			ds->ds_reserved = ds->ds_quota = 0;
 		}
 
+		if (err == 0 && ds->ds_dir->dd_crypto_obj != 0 &&
+		    ds->ds_is_snapshot &&
+		    zap_contains(mos, dsobj, DS_FIELD_IVSET_GUID) != 0) {
+			dp->dp_spa->spa_errata =
+			    ZPOOL_ERRATA_ZOL_8308_ENCRYPTION;
+		}
+
 		dsl_deadlist_open(&ds->ds_deadlist,
 		    mos, dsl_dataset_phys(ds)->ds_deadlist_obj);
 		uint64_t remap_deadlist_obj =
@@ -591,17 +598,52 @@ dsl_dataset_hold_obj(dsl_pool_t *dp, uint64_t dsobj, void *tag,
 			}
 		}
 	}
+
 	ASSERT3P(ds->ds_dbuf, ==, dbuf);
 	ASSERT3P(dsl_dataset_phys(ds), ==, dbuf->db_data);
 	ASSERT(dsl_dataset_phys(ds)->ds_prev_snap_obj != 0 ||
 	    spa_version(dp->dp_spa) < SPA_VERSION_ORIGIN ||
 	    dp->dp_origin_snap == NULL || ds == dp->dp_origin_snap);
 	*dsp = ds;
+
 	return (0);
 }
 
 int
-dsl_dataset_hold(dsl_pool_t *dp, const char *name,
+dsl_dataset_create_key_mapping(dsl_dataset_t *ds)
+{
+	dsl_dir_t *dd = ds->ds_dir;
+
+	if (dd->dd_crypto_obj == 0)
+		return (0);
+
+	return (spa_keystore_create_mapping(dd->dd_pool->dp_spa,
+	    ds, ds, &ds->ds_key_mapping));
+}
+
+int
+dsl_dataset_hold_obj_flags(dsl_pool_t *dp, uint64_t dsobj,
+    ds_hold_flags_t flags, void *tag, dsl_dataset_t **dsp)
+{
+	int err;
+
+	err = dsl_dataset_hold_obj(dp, dsobj, tag, dsp);
+	if (err != 0)
+		return (err);
+
+	ASSERT3P(*dsp, !=, NULL);
+
+	if (flags & DS_HOLD_FLAG_DECRYPT) {
+		err = dsl_dataset_create_key_mapping(*dsp);
+		if (err != 0)
+			dsl_dataset_rele(*dsp, tag);
+	}
+
+	return (err);
+}
+
+int
+dsl_dataset_hold_flags(dsl_pool_t *dp, const char *name, ds_hold_flags_t flags,
     void *tag, dsl_dataset_t **dsp)
 {
 	dsl_dir_t *dd;
@@ -617,7 +659,7 @@ dsl_dataset_hold(dsl_pool_t *dp, const char *name,
 	ASSERT(dsl_pool_config_held(dp));
 	obj = dsl_dir_phys(dd)->dd_head_dataset_obj;
 	if (obj != 0)
-		err = dsl_dataset_hold_obj(dp, obj, tag, &ds);
+		err = dsl_dataset_hold_obj_flags(dp, obj, flags, tag, &ds);
 	else
 		err = SET_ERROR(ENOENT);
 
@@ -626,16 +668,18 @@ dsl_dataset_hold(dsl_pool_t *dp, const char *name,
 		dsl_dataset_t *snap_ds;
 
 		if (*snapname++ != '@') {
-			dsl_dataset_rele(ds, tag);
+			dsl_dataset_rele_flags(ds, flags, tag);
 			dsl_dir_rele(dd, FTAG);
 			return (SET_ERROR(ENOENT));
 		}
 
 		dprintf("looking for snapshot '%s'\n", snapname);
 		err = dsl_dataset_snap_lookup(ds, snapname, &obj);
-		if (err == 0)
-			err = dsl_dataset_hold_obj(dp, obj, tag, &snap_ds);
-		dsl_dataset_rele(ds, tag);
+		if (err == 0) {
+			err = dsl_dataset_hold_obj_flags(dp, obj, flags, tag,
+			    &snap_ds);
+		}
+		dsl_dataset_rele_flags(ds, flags, tag);
 
 		if (err == 0) {
 			mutex_enter(&snap_ds->ds_lock);
@@ -653,14 +697,21 @@ dsl_dataset_hold(dsl_pool_t *dp, const char *name,
 }
 
 int
-dsl_dataset_own_obj(dsl_pool_t *dp, uint64_t dsobj,
+dsl_dataset_hold(dsl_pool_t *dp, const char *name, void *tag,
+    dsl_dataset_t **dsp)
+{
+	return (dsl_dataset_hold_flags(dp, name, 0, tag, dsp));
+}
+
+int
+dsl_dataset_own_obj(dsl_pool_t *dp, uint64_t dsobj, ds_hold_flags_t flags,
     void *tag, dsl_dataset_t **dsp)
 {
-	int err = dsl_dataset_hold_obj(dp, dsobj, tag, dsp);
+	int err = dsl_dataset_hold_obj_flags(dp, dsobj, flags, tag, dsp);
 	if (err != 0)
 		return (err);
 	if (!dsl_dataset_tryown(*dsp, tag)) {
-		dsl_dataset_rele(*dsp, tag);
+		dsl_dataset_rele_flags(*dsp, flags, tag);
 		*dsp = NULL;
 		return (SET_ERROR(EBUSY));
 	}
@@ -668,14 +719,14 @@ dsl_dataset_own_obj(dsl_pool_t *dp, uint64_t dsobj,
 }
 
 int
-dsl_dataset_own(dsl_pool_t *dp, const char *name,
+dsl_dataset_own(dsl_pool_t *dp, const char *name, ds_hold_flags_t flags,
     void *tag, dsl_dataset_t **dsp)
 {
-	int err = dsl_dataset_hold(dp, name, tag, dsp);
+	int err = dsl_dataset_hold_flags(dp, name, flags, tag, dsp);
 	if (err != 0)
 		return (err);
 	if (!dsl_dataset_tryown(*dsp, tag)) {
-		dsl_dataset_rele(*dsp, tag);
+		dsl_dataset_rele_flags(*dsp, flags, tag);
 		return (SET_ERROR(EBUSY));
 	}
 	return (0);
@@ -757,7 +808,28 @@ dsl_dataset_rele(dsl_dataset_t *ds, void *tag)
 }
 
 void
-dsl_dataset_disown(dsl_dataset_t *ds, void *tag)
+dsl_dataset_remove_key_mapping(dsl_dataset_t *ds)
+{
+	dsl_dir_t *dd = ds->ds_dir;
+
+	if (dd == NULL || dd->dd_crypto_obj == 0)
+		return;
+
+	(void) spa_keystore_remove_mapping(dd->dd_pool->dp_spa,
+	    ds->ds_object, ds);
+}
+
+void
+dsl_dataset_rele_flags(dsl_dataset_t *ds, ds_hold_flags_t flags, void *tag)
+{
+	if (flags & DS_HOLD_FLAG_DECRYPT)
+		dsl_dataset_remove_key_mapping(ds);
+
+	dsl_dataset_rele(ds, tag);
+}
+
+void
+dsl_dataset_disown(dsl_dataset_t *ds, ds_hold_flags_t flags, void *tag)
 {
 	ASSERT3P(ds->ds_owner, ==, tag);
 	ASSERT(ds->ds_dbuf != NULL);
@@ -766,7 +838,7 @@ dsl_dataset_disown(dsl_dataset_t *ds, void *tag)
 	ds->ds_owner = NULL;
 	mutex_exit(&ds->ds_lock);
 	dsl_dataset_long_rele(ds, tag);
-	dsl_dataset_rele(ds, tag);
+	dsl_dataset_rele_flags(ds, flags, tag);
 }
 
 boolean_t
@@ -795,7 +867,7 @@ dsl_dataset_has_owner(dsl_dataset_t *ds)
 	return (rv);
 }
 
-static void
+void
 dsl_dataset_activate_feature(uint64_t dsobj, spa_feature_t f, dmu_tx_t *tx)
 {
 	spa_t *spa = dmu_tx_pool(tx)->dp_spa;
@@ -825,7 +897,7 @@ dsl_dataset_deactivate_feature(uint64_t dsobj, spa_feature_t f, dmu_tx_t *tx)
 
 uint64_t
 dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin,
-    uint64_t flags, dmu_tx_t *tx)
+    dsl_crypto_params_t *dcp, uint64_t flags, dmu_tx_t *tx)
 {
 	dsl_pool_t *dp = dd->dd_pool;
 	dmu_buf_t *dbuf;
@@ -924,6 +996,9 @@ dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin,
 		}
 	}
 
+	/* handle encryption */
+	dsl_dataset_create_crypt_sync(dsobj, dd, origin, dcp, tx);
+
 	if (spa_version(dp->dp_spa) >= SPA_VERSION_UNIQUE_ACCURATE)
 		dsphys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE;
 
@@ -946,6 +1021,8 @@ dsl_dataset_zero_zil(dsl_dataset_t *ds, dmu_tx_t *tx)
 		zio_t *zio;
 
 		bzero(&os->os_zil_header, sizeof (os->os_zil_header));
+		if (os->os_encrypted)
+			os->os_next_write_raw[tx->tx_txg & TXG_MASK] = B_TRUE;
 
 		zio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED);
 		dsl_dataset_sync(ds, zio, tx);
@@ -959,7 +1036,8 @@ dsl_dataset_zero_zil(dsl_dataset_t *ds, dmu_tx_t *tx)
 
 uint64_t
 dsl_dataset_create_sync(dsl_dir_t *pdd, const char *lastname,
-    dsl_dataset_t *origin, uint64_t flags, cred_t *cr, dmu_tx_t *tx)
+    dsl_dataset_t *origin, uint64_t flags, cred_t *cr,
+    dsl_crypto_params_t *dcp, dmu_tx_t *tx)
 {
 	dsl_pool_t *dp = pdd->dd_pool;
 	uint64_t dsobj, ddobj;
@@ -971,7 +1049,7 @@ dsl_dataset_create_sync(dsl_dir_t *pdd, const char *lastname,
 	ddobj = dsl_dir_create_sync(dp, pdd, lastname, tx);
 	VERIFY0(dsl_dir_hold_obj(dp, ddobj, lastname, FTAG, &dd));
 
-	dsobj = dsl_dataset_create_sync_dd(dd, origin,
+	dsobj = dsl_dataset_create_sync_dd(dd, origin, dcp,
 	    flags & ~DS_CREATE_FLAG_NODIRTY, tx);
 
 	dsl_deleg_set_create_perms(dd, tx, cr);
@@ -1099,8 +1177,18 @@ dsl_dataset_dirty(dsl_dataset_t *ds, dmu_tx_t *tx)
 
 	dp = ds->ds_dir->dd_pool;
 	if (txg_list_add(&dp->dp_dirty_datasets, ds, tx->tx_txg)) {
+		objset_t *os = ds->ds_objset;
+
 		/* up the hold count until we can be written out */
 		dmu_buf_add_ref(ds->ds_dbuf, ds);
+
+		/* if this dataset is encrypted, grab a reference to the DCK */
+		if (ds->ds_dir->dd_crypto_obj != 0 &&
+		    !os->os_raw_receive &&
+		    !os->os_next_write_raw[tx->tx_txg & TXG_MASK]) {
+			ASSERT3P(ds->ds_key_mapping, !=, NULL);
+			key_mapping_add_ref(ds->ds_key_mapping, ds);
+		}
 	}
 }
 
@@ -1471,6 +1559,30 @@ dsl_dataset_snapshot_sync_impl(dsl_dataset_t *ds, const char *snapname,
 		    sizeof (remap_deadlist_obj), 1, &remap_deadlist_obj, tx));
 	}
 
+	/*
+	 * Create a ivset guid for this snapshot if the dataset is
+	 * encrypted. This may be overridden by a raw receive. A
+	 * previous implementation of this code did not have this
+	 * field as part of the on-disk format for ZFS encryption
+	 * (see errata #4). As part of the remediation for this
+	 * issue, we ask the user to enable the bookmark_v2 feature
+	 * which is now a dependency of the encryption feature. We
+	 * use this as a heuristic to determine when the user has
+	 * elected to correct any datasets created with the old code.
+	 * As a result, we only do this step if the bookmark_v2
+	 * feature is enabled, which limits the number of states a
+	 * given pool / dataset can be in with regards to terms of
+	 * correcting the issue.
+	 */
+	if (ds->ds_dir->dd_crypto_obj != 0 &&
+	    spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_BOOKMARK_V2)) {
+		uint64_t ivset_guid = unique_create();
+
+		dmu_object_zapify(mos, dsobj, DMU_OT_DSL_DATASET, tx);
+		VERIFY0(zap_add(mos, dsobj, DS_FIELD_IVSET_GUID,
+		    sizeof (ivset_guid), 1, &ivset_guid, tx));
+	}
+
 	ASSERT3U(dsl_dataset_phys(ds)->ds_prev_snap_txg, <, tx->tx_txg);
 	dsl_dataset_phys(ds)->ds_prev_snap_obj = dsobj;
 	dsl_dataset_phys(ds)->ds_prev_snap_txg = crtxg;
@@ -1750,6 +1862,11 @@ dsl_dataset_sync_done(dsl_dataset_t *ds, dmu_tx_t *tx)
 		os->os_synced_dnodes = NULL;
 	}
 
+	if (os->os_encrypted)
+		os->os_next_write_raw[tx->tx_txg & TXG_MASK] = B_FALSE;
+	else
+		ASSERT0(os->os_next_write_raw[tx->tx_txg & TXG_MASK]);
+
 	ASSERT(!dmu_objset_is_dirty(os, dmu_tx_get_txg(tx)));
 
 	dmu_buf_rele(ds->ds_dbuf, ds);
@@ -1874,6 +1991,10 @@ get_receive_resume_stats_impl(dsl_dataset_t *ds)
 		    DS_FIELD_RESUME_COMPRESSOK) == 0) {
 			fnvlist_add_boolean(token_nv, "compressok");
 		}
+		if (zap_contains(dp->dp_meta_objset, ds->ds_object,
+		    DS_FIELD_RESUME_RAWOK) == 0) {
+			fnvlist_add_boolean(token_nv, "rawok");
+		}
 		packed = fnvlist_pack(token_nv, &packed_size);
 		fnvlist_free(token_nv);
 		compressed = kmem_alloc(packed_size, KM_SLEEP);
@@ -2196,6 +2317,7 @@ dsl_get_mountpoint(dsl_dataset_t *ds, const char *dsname, char *value,
 void
 dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv)
 {
+	int err;
 	dsl_pool_t *dp = ds->ds_dir->dd_pool;
 
 	ASSERT(dsl_pool_config_held(dp));
@@ -2240,13 +2362,24 @@ dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv)
 	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USERREFS,
 	    dsl_get_userrefs(ds));
 	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_DEFER_DESTROY,
-	    dsl_get_defer_destroy(ds));
+	    DS_IS_DEFER_DESTROY(ds) ? 1 : 0);
+	dsl_dataset_crypt_stats(ds, nv);
 
 	if (dsl_dataset_phys(ds)->ds_prev_snap_obj != 0) {
-		uint64_t written;
-		if (dsl_get_written(ds, &written) == 0) {
-			dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_WRITTEN,
-			    written);
+		uint64_t written, comp, uncomp;
+		dsl_pool_t *dp = ds->ds_dir->dd_pool;
+		dsl_dataset_t *prev;
+
+		err = dsl_dataset_hold_obj(dp,
+		    dsl_dataset_phys(ds)->ds_prev_snap_obj, FTAG, &prev);
+		if (err == 0) {
+			err = dsl_dataset_space_written(prev, ds, &written,
+			    &comp, &uncomp);
+			dsl_dataset_rele(prev, FTAG);
+			if (err == 0) {
+				dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_WRITTEN,
+				    written);
+			}
 		}
 	}
 
@@ -2685,7 +2818,7 @@ dsl_dataset_rollback_sync(void *arg, dmu_tx_t *tx)
 	fnvlist_add_string(ddra->ddra_result, "target", namebuf);
 
 	cloneobj = dsl_dataset_create_sync(ds->ds_dir, "%rollback",
-	    ds->ds_prev, DS_CREATE_FLAG_NODIRTY, kcred, tx);
+	    ds->ds_prev, DS_CREATE_FLAG_NODIRTY, kcred, NULL, tx);
 
 	VERIFY0(dsl_dataset_hold_obj(dp, cloneobj, FTAG, &clone));
 
@@ -2767,6 +2900,23 @@ dsl_dataset_promote_check(void *arg, dmu_tx_t *tx)
 		return (SET_ERROR(EXDEV));
 	}
 
+	snap = list_head(&ddpa->shared_snaps);
+	if (snap == NULL) {
+		err = SET_ERROR(ENOENT);
+		goto out;
+	}
+	origin_ds = snap->ds;
+
+	/*
+	 * Encrypted clones share a DSL Crypto Key with their origin's dsl dir.
+	 * When doing a promote we must make sure the encryption root for
+	 * both the target and the target's origin does not change to avoid
+	 * needing to rewrap encryption keys
+	 */
+	err = dsl_dataset_promote_crypt_check(hds->ds_dir, origin_ds->ds_dir);
+	if (err != 0)
+		goto out;
+
 	/*
 	 * Compute and check the amount of space to transfer.  Since this is
 	 * so expensive, don't do the preliminary check.
@@ -2950,6 +3100,8 @@ dsl_dataset_promote_sync(void *arg, dmu_tx_t *tx)
 	VERIFY0(dsl_dir_hold_obj(dp, origin_ds->ds_dir->dd_object,
 	    NULL, FTAG, &odd));
 
+	dsl_dataset_promote_crypt_sync(hds->ds_dir, odd, tx);
+
 	/* change origin's next snap */
 	dmu_buf_will_dirty(origin_ds->ds_dbuf, tx);
 	oldnext_obj = dsl_dataset_phys(origin_ds)->ds_next_snap_obj;
diff --git a/usr/src/uts/common/fs/zfs/dsl_destroy.c b/usr/src/uts/common/fs/zfs/dsl_destroy.c
index 59a946cca0..40ea657095 100644
--- a/usr/src/uts/common/fs/zfs/dsl_destroy.c
+++ b/usr/src/uts/common/fs/zfs/dsl_destroy.c
@@ -680,8 +680,8 @@ old_synchronous_dataset_destroy(dsl_dataset_t *ds, dmu_tx_t *tx)
 	ka.ds = ds;
 	ka.tx = tx;
 	VERIFY0(traverse_dataset(ds,
-	    dsl_dataset_phys(ds)->ds_prev_snap_txg, TRAVERSE_POST,
-	    kill_blkptr, &ka));
+	    dsl_dataset_phys(ds)->ds_prev_snap_txg, TRAVERSE_POST |
+	    TRAVERSE_NO_DECRYPT, kill_blkptr, &ka));
 	ASSERT(!DS_UNIQUE_IS_ACCURATE(ds) ||
 	    dsl_dataset_phys(ds)->ds_unique_bytes == 0);
 }
@@ -784,6 +784,11 @@ dsl_dir_destroy_sync(uint64_t ddobj, dmu_tx_t *tx)
 	for (t = 0; t < DD_USED_NUM; t++)
 		ASSERT0(dsl_dir_phys(dd)->dd_used_breakdown[t]);
 
+	if (dd->dd_crypto_obj != 0) {
+		dsl_crypto_key_destroy_sync(dd->dd_crypto_obj, tx);
+		(void) spa_keystore_unload_wkey_impl(dp->dp_spa, dd->dd_object);
+	}
+
 	VERIFY0(zap_destroy(mos, dsl_dir_phys(dd)->dd_child_dir_zapobj, tx));
 	VERIFY0(zap_destroy(mos, dsl_dir_phys(dd)->dd_props_zapobj, tx));
 	if (dsl_dir_phys(dd)->dd_clones != 0)
@@ -1033,7 +1038,8 @@ dsl_destroy_head(const char *name)
 		 * remove the objects from open context so that the txg sync
 		 * is not too long.
 		 */
-		error = dmu_objset_own(name, DMU_OST_ANY, B_FALSE, FTAG, &os);
+		error = dmu_objset_own(name, DMU_OST_ANY, B_FALSE, B_FALSE,
+		    FTAG, &os);
 		if (error == 0) {
 			uint64_t prev_snap_txg =
 			    dsl_dataset_phys(dmu_objset_ds(os))->
@@ -1044,7 +1050,7 @@ dsl_destroy_head(const char *name)
 				(void) dmu_free_long_object(os, obj);
 			/* sync out all frees */
 			txg_wait_synced(dmu_objset_pool(os), 0);
-			dmu_objset_disown(os, FTAG);
+			dmu_objset_disown(os, B_FALSE, FTAG);
 		}
 	}
 
diff --git a/usr/src/uts/common/fs/zfs/dsl_dir.c b/usr/src/uts/common/fs/zfs/dsl_dir.c
index 298516f8a4..02cad5f98e 100644
--- a/usr/src/uts/common/fs/zfs/dsl_dir.c
+++ b/usr/src/uts/common/fs/zfs/dsl_dir.c
@@ -37,6 +37,7 @@
 #include <sys/dsl_deleg.h>
 #include <sys/dmu_impl.h>
 #include <sys/spa.h>
+#include <sys/spa_impl.h>
 #include <sys/metaslab.h>
 #include <sys/zap.h>
 #include <sys/zio.h>
@@ -163,6 +164,7 @@ dsl_dir_hold_obj(dsl_pool_t *dp, uint64_t ddobj,
 {
 	dmu_buf_t *dbuf;
 	dsl_dir_t *dd;
+	dmu_object_info_t doi;
 	int err;
 
 	ASSERT(dsl_pool_config_held(dp));
@@ -171,14 +173,11 @@ dsl_dir_hold_obj(dsl_pool_t *dp, uint64_t ddobj,
 	if (err != 0)
 		return (err);
 	dd = dmu_buf_get_user(dbuf);
-#ifdef ZFS_DEBUG
-	{
-		dmu_object_info_t doi;
-		dmu_object_info_from_db(dbuf, &doi);
-		ASSERT3U(doi.doi_bonus_type, ==, DMU_OT_DSL_DIR);
-		ASSERT3U(doi.doi_bonus_size, >=, sizeof (dsl_dir_phys_t));
-	}
-#endif
+
+	dmu_object_info_from_db(dbuf, &doi);
+	ASSERT3U(doi.doi_bonus_type, ==, DMU_OT_DSL_DIR);
+	ASSERT3U(doi.doi_bonus_size, >=, sizeof (dsl_dir_phys_t));
+
 	if (dd == NULL) {
 		dsl_dir_t *winner;
 
@@ -186,6 +185,21 @@ dsl_dir_hold_obj(dsl_pool_t *dp, uint64_t ddobj,
 		dd->dd_object = ddobj;
 		dd->dd_dbuf = dbuf;
 		dd->dd_pool = dp;
+
+		if (dsl_dir_is_zapified(dd) &&
+		    zap_contains(dp->dp_meta_objset, ddobj,
+		    DD_FIELD_CRYPTO_KEY_OBJ) == 0) {
+			VERIFY0(zap_lookup(dp->dp_meta_objset,
+			    ddobj, DD_FIELD_CRYPTO_KEY_OBJ,
+			    sizeof (uint64_t), 1, &dd->dd_crypto_obj));
+
+			/* check for on-disk format errata */
+			if (dsl_dir_incompatible_encryption_version(dd)) {
+				dp->dp_spa->spa_errata =
+				    ZPOOL_ERRATA_ZOL_6845_ENCRYPTION;
+			}
+		}
+
 		mutex_init(&dd->dd_lock, NULL, MUTEX_DEFAULT, NULL);
 		dsl_prop_init(dd);
 
@@ -945,6 +959,7 @@ dsl_dir_create_sync(dsl_pool_t *dp, dsl_dir_t *pds, const char *name,
 	    DMU_OT_DSL_DIR_CHILD_MAP, DMU_OT_NONE, 0, tx);
 	if (spa_version(dp->dp_spa) >= SPA_VERSION_USED_BREAKDOWN)
 		ddphys->dd_flags |= DD_FLAG_USED_BREAKDOWN;
+
 	dmu_buf_rele(dbuf, FTAG);
 
 	return (ddobj);
@@ -1945,6 +1960,14 @@ dsl_dir_rename_check(void *arg, dmu_tx_t *tx)
 			}
 		}
 
+		/* check for encryption errors */
+		error = dsl_dir_rename_crypt_check(dd, newparent);
+		if (error != 0) {
+			dsl_dir_rele(newparent, FTAG);
+			dsl_dir_rele(dd, FTAG);
+			return (SET_ERROR(EACCES));
+		}
+
 		/* no rename into our descendant */
 		if (closest_common_ancestor(dd, newparent) == dd) {
 			dsl_dir_rele(newparent, FTAG);
diff --git a/usr/src/uts/common/fs/zfs/dsl_pool.c b/usr/src/uts/common/fs/zfs/dsl_pool.c
index 54c88b1e3c..76bae90e68 100644
--- a/usr/src/uts/common/fs/zfs/dsl_pool.c
+++ b/usr/src/uts/common/fs/zfs/dsl_pool.c
@@ -438,7 +438,8 @@ dsl_pool_destroy_obsolete_bpobj(dsl_pool_t *dp, dmu_tx_t *tx)
 }
 
 dsl_pool_t *
-dsl_pool_create(spa_t *spa, nvlist_t *zplprops, uint64_t txg)
+dsl_pool_create(spa_t *spa, nvlist_t *zplprops, dsl_crypto_params_t *dcp,
+    uint64_t txg)
 {
 	int err;
 	dsl_pool_t *dp = dsl_pool_open_impl(spa, txg);
@@ -451,6 +452,7 @@ dsl_pool_create(spa_t *spa, nvlist_t *zplprops, uint64_t txg)
 	/* create and open the MOS (meta-objset) */
 	dp->dp_meta_objset = dmu_objset_create_impl(spa,
 	    NULL, &dp->dp_meta_rootbp, DMU_OST_META, tx);
+	spa->spa_meta_objset = dp->dp_meta_objset;
 
 	/* create the pool directory */
 	err = zap_create_claim(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
@@ -488,11 +490,23 @@ dsl_pool_create(spa_t *spa, nvlist_t *zplprops, uint64_t txg)
 	if (spa_version(spa) >= SPA_VERSION_DSL_SCRUB)
 		dsl_pool_create_origin(dp, tx);
 
+	/*
+	 * Some features may be needed when creating the root dataset, so we
+	 * create the feature objects here.
+	 */
+	if (spa_version(spa) >= SPA_VERSION_FEATURES)
+		spa_feature_create_zap_objects(spa, tx);
+
+	if (dcp != NULL && dcp->cp_crypt != ZIO_CRYPT_OFF &&
+	    dcp->cp_crypt != ZIO_CRYPT_INHERIT)
+		spa_feature_enable(spa, SPA_FEATURE_ENCRYPTION, tx);
+
 	/* create the root dataset */
-	obj = dsl_dataset_create_sync_dd(dp->dp_root_dir, NULL, 0, tx);
+	obj = dsl_dataset_create_sync_dd(dp->dp_root_dir, NULL, dcp, 0, tx);
 
 	/* create the root objset */
-	VERIFY0(dsl_dataset_hold_obj(dp, obj, FTAG, &ds));
+	VERIFY0(dsl_dataset_hold_obj_flags(dp, obj,
+	    DS_HOLD_FLAG_DECRYPT, FTAG, &ds));
 #ifdef _KERNEL
 	{
 		objset_t *os;
@@ -503,7 +517,7 @@ dsl_pool_create(spa_t *spa, nvlist_t *zplprops, uint64_t txg)
 		zfs_create_fs(os, kcred, zplprops, tx);
 	}
 #endif
-	dsl_dataset_rele(ds, FTAG);
+	dsl_dataset_rele_flags(ds, DS_HOLD_FLAG_DECRYPT, FTAG);
 
 	dmu_tx_commit(tx);
 
@@ -664,9 +678,22 @@ dsl_pool_sync(dsl_pool_t *dp, uint64_t txg)
 	 */
 	zio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED);
 	while ((ds = txg_list_remove(&dp->dp_dirty_datasets, txg)) != NULL) {
+		objset_t *os = ds->ds_objset;
+
 		ASSERT(list_link_active(&ds->ds_synced_link));
 		dmu_buf_rele(ds->ds_dbuf, ds);
 		dsl_dataset_sync(ds, zio, tx);
+
+		/*
+		 * Release any key mappings created by calls to
+		 * dsl_dataset_dirty() from the userquota accounting
+		 * code paths.
+		 */
+		if (os->os_encrypted && !os->os_raw_receive &&
+		    !os->os_next_write_raw[txg & TXG_MASK]) {
+			ASSERT3P(ds->ds_key_mapping, !=, NULL);
+			key_mapping_rele(dp->dp_spa, ds->ds_key_mapping, ds);
+		}
 	}
 	VERIFY0(zio_wait(zio));
 
@@ -676,8 +703,17 @@ dsl_pool_sync(dsl_pool_t *dp, uint64_t txg)
 	 *
 	 *  - move dead blocks from the pending deadlist to the on-disk deadlist
 	 *  - release hold from dsl_dataset_dirty()
+	 *  - release key mapping hold from dsl_dataset_dirty()
 	 */
 	while ((ds = list_remove_head(&synced_datasets)) != NULL) {
+		objset_t *os = ds->ds_objset;
+
+		if (os->os_encrypted && !os->os_raw_receive &&
+		    !os->os_next_write_raw[txg & TXG_MASK]) {
+			ASSERT3P(ds->ds_key_mapping, !=, NULL);
+			key_mapping_rele(dp->dp_spa, ds->ds_key_mapping, ds);
+		}
+
 		dsl_dataset_sync_done(ds, tx);
 	}
 	while ((dd = txg_list_remove(&dp->dp_dirty_dirs, txg)) != NULL) {
@@ -1027,7 +1063,7 @@ dsl_pool_create_origin(dsl_pool_t *dp, dmu_tx_t *tx)
 
 	/* create the origin dir, ds, & snap-ds */
 	dsobj = dsl_dataset_create_sync(dp->dp_root_dir, ORIGIN_DIR_NAME,
-	    NULL, 0, kcred, tx);
+	    NULL, 0, kcred, NULL, tx);
 	VERIFY0(dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds));
 	dsl_dataset_snapshot_sync_impl(ds, ORIGIN_DIR_NAME, tx);
 	VERIFY0(dsl_dataset_hold_obj(dp, dsl_dataset_phys(ds)->ds_prev_snap_obj,
diff --git a/usr/src/uts/common/fs/zfs/dsl_prop.c b/usr/src/uts/common/fs/zfs/dsl_prop.c
index ce0cd9b0fe..8197f0685a 100644
--- a/usr/src/uts/common/fs/zfs/dsl_prop.c
+++ b/usr/src/uts/common/fs/zfs/dsl_prop.c
@@ -926,7 +926,7 @@ typedef enum dsl_prop_getflags {
 	DSL_PROP_GET_INHERITING = 0x1,	/* searching parent of target ds */
 	DSL_PROP_GET_SNAPSHOT = 0x2,	/* snapshot dataset */
 	DSL_PROP_GET_LOCAL = 0x4,	/* local properties */
-	DSL_PROP_GET_RECEIVED = 0x8	/* received properties */
+	DSL_PROP_GET_RECEIVED = 0x8,	/* received properties */
 } dsl_prop_getflags_t;
 
 static int
@@ -1093,6 +1093,7 @@ dsl_prop_get_all_ds(dsl_dataset_t *ds, nvlist_t **nvp,
 		if (err)
 			break;
 	}
+
 out:
 	return (err);
 }
diff --git a/usr/src/uts/common/fs/zfs/dsl_scan.c b/usr/src/uts/common/fs/zfs/dsl_scan.c
index b5ef5a89e9..73634e33e2 100644
--- a/usr/src/uts/common/fs/zfs/dsl_scan.c
+++ b/usr/src/uts/common/fs/zfs/dsl_scan.c
@@ -487,6 +487,43 @@ dsl_scan_init(dsl_pool_t *dp, uint64_t txg)
 		err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
 		    DMU_POOL_SCAN, sizeof (uint64_t), SCAN_PHYS_NUMINTS,
 		    &scn->scn_phys);
+
+		/*
+		 * Detect if the pool contains the signature of #2094.  If it
+		 * does properly update the scn->scn_phys structure and notify
+		 * the administrator by setting an errata for the pool.
+		 */
+		if (err == EOVERFLOW) {
+			uint64_t zaptmp[SCAN_PHYS_NUMINTS + 1];
+			VERIFY3S(SCAN_PHYS_NUMINTS, ==, 24);
+			VERIFY3S(offsetof(dsl_scan_phys_t, scn_flags), ==,
+			    (23 * sizeof (uint64_t)));
+
+			err = zap_lookup(dp->dp_meta_objset,
+			    DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_SCAN,
+			    sizeof (uint64_t), SCAN_PHYS_NUMINTS + 1, &zaptmp);
+			if (err == 0) {
+				uint64_t overflow = zaptmp[SCAN_PHYS_NUMINTS];
+
+				if (overflow & ~DSF_VISIT_DS_AGAIN ||
+				    scn->scn_async_destroying) {
+					spa->spa_errata =
+					    ZPOOL_ERRATA_ZOL_2094_ASYNC_DESTROY;
+					return (EOVERFLOW);
+				}
+
+				bcopy(zaptmp, &scn->scn_phys,
+				    SCAN_PHYS_NUMINTS * sizeof (uint64_t));
+				scn->scn_phys.scn_flags = overflow;
+
+				/* Required scrub already in progress. */
+				if (scn->scn_phys.scn_state == DSS_FINISHED ||
+				    scn->scn_phys.scn_state == DSS_CANCELED)
+					spa->spa_errata =
+					    ZPOOL_ERRATA_ZOL_2094_SCRUB;
+			}
+		}
+
 		if (err == ENOENT)
 			return (0);
 		else if (err)
@@ -1379,7 +1416,7 @@ dsl_scan_zil(dsl_pool_t *dp, zil_header_t *zh)
 	zilog = zil_alloc(dp->dp_meta_objset, zh);
 
 	(void) zil_parse(zilog, dsl_scan_zil_block, dsl_scan_zil_record, &zsa,
-	    claim_txg);
+	    claim_txg, B_FALSE);
 
 	zil_free(zilog);
 }
@@ -1637,6 +1674,13 @@ dsl_scan_prefetch_thread(void *arg)
 
 		mutex_exit(&spa->spa_scrub_lock);
 
+		if (BP_IS_PROTECTED(&spic->spic_bp)) {
+			ASSERT(BP_GET_TYPE(&spic->spic_bp) == DMU_OT_DNODE ||
+			    BP_GET_TYPE(&spic->spic_bp) == DMU_OT_OBJSET);
+			ASSERT3U(BP_GET_LEVEL(&spic->spic_bp), ==, 0);
+			zio_flags |= ZIO_FLAG_RAW;
+		}
+
 		/* issue the prefetch asynchronously */
 		(void) arc_read(scn->scn_zio_root, scn->scn_dp->dp_spa,
 		    &spic->spic_bp, dsl_scan_prefetch_cb, spic->spic_spc,
@@ -1744,6 +1788,11 @@ dsl_scan_recurse(dsl_scan_t *scn, dsl_dataset_t *ds, dmu_objset_type_t ostype,
 		int epb = BP_GET_LSIZE(bp) >> DNODE_SHIFT;
 		arc_buf_t *buf;
 
+		if (BP_IS_PROTECTED(bp)) {
+			ASSERT3U(BP_GET_COMPRESS(bp), ==, ZIO_COMPRESS_OFF);
+			zio_flags |= ZIO_FLAG_RAW;
+		}
+
 		err = arc_read(NULL, dp->dp_spa, bp, arc_getbuf_func, &buf,
 		    ZIO_PRIORITY_SCRUB, zio_flags, &flags, zb);
 		if (err) {
diff --git a/usr/src/uts/common/fs/zfs/hkdf.c b/usr/src/uts/common/fs/zfs/hkdf.c
new file mode 100644
index 0000000000..1d6cc898e4
--- /dev/null
+++ b/usr/src/uts/common/fs/zfs/hkdf.c
@@ -0,0 +1,173 @@
+/*
+ * CDDL HEADER START
+ *
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2017, Datto, Inc. All rights reserved.
+ */
+
+#include <sys/dmu.h>
+#include <sys/hkdf.h>
+#include <sys/crypto/api.h>
+#include <sys/sha2.h>
+#include <sys/hkdf.h>
+
+static int
+hkdf_sha512_extract(uint8_t *salt, uint_t salt_len, uint8_t *key_material,
+    uint_t km_len, uint8_t *out_buf)
+{
+	int ret;
+	crypto_mechanism_t mech;
+	crypto_key_t key;
+	crypto_data_t input_cd, output_cd;
+
+	/* initialize HMAC mechanism */
+	mech.cm_type = crypto_mech2id(SUN_CKM_SHA512_HMAC);
+	mech.cm_param = NULL;
+	mech.cm_param_len = 0;
+
+	/* initialize the salt as a crypto key */
+	key.ck_format = CRYPTO_KEY_RAW;
+	key.ck_length = CRYPTO_BYTES2BITS(salt_len);
+	key.ck_data = salt;
+
+	/* initialize crypto data for the input and output data */
+	input_cd.cd_format = CRYPTO_DATA_RAW;
+	input_cd.cd_offset = 0;
+	input_cd.cd_length = km_len;
+	input_cd.cd_raw.iov_base = (char *)key_material;
+	input_cd.cd_raw.iov_len = input_cd.cd_length;
+
+	output_cd.cd_format = CRYPTO_DATA_RAW;
+	output_cd.cd_offset = 0;
+	output_cd.cd_length = SHA512_DIGEST_LENGTH;
+	output_cd.cd_raw.iov_base = (char *)out_buf;
+	output_cd.cd_raw.iov_len = output_cd.cd_length;
+
+	ret = crypto_mac(&mech, &input_cd, &key, NULL, &output_cd, NULL);
+	if (ret != CRYPTO_SUCCESS)
+		return (SET_ERROR(EIO));
+
+	return (0);
+}
+
+static int
+hkdf_sha512_expand(uint8_t *extract_key, uint8_t *info, uint_t info_len,
+    uint8_t *out_buf, uint_t out_len)
+{
+	int ret;
+	crypto_mechanism_t mech;
+	crypto_context_t ctx;
+	crypto_key_t key;
+	crypto_data_t T_cd, info_cd, c_cd;
+	uint_t i, T_len = 0, pos = 0;
+	uint8_t c;
+	uint_t N = (out_len + SHA512_DIGEST_LENGTH) / SHA512_DIGEST_LENGTH;
+	uint8_t T[SHA512_DIGEST_LENGTH];
+
+	if (N > 255)
+		return (SET_ERROR(EINVAL));
+
+	/* initialize HMAC mechanism */
+	mech.cm_type = crypto_mech2id(SUN_CKM_SHA512_HMAC);
+	mech.cm_param = NULL;
+	mech.cm_param_len = 0;
+
+	/* initialize the salt as a crypto key */
+	key.ck_format = CRYPTO_KEY_RAW;
+	key.ck_length = CRYPTO_BYTES2BITS(SHA512_DIGEST_LENGTH);
+	key.ck_data = extract_key;
+
+	/* initialize crypto data for the input and output data */
+	T_cd.cd_format = CRYPTO_DATA_RAW;
+	T_cd.cd_offset = 0;
+	T_cd.cd_raw.iov_base = (char *)T;
+
+	c_cd.cd_format = CRYPTO_DATA_RAW;
+	c_cd.cd_offset = 0;
+	c_cd.cd_length = 1;
+	c_cd.cd_raw.iov_base = (char *)&c;
+	c_cd.cd_raw.iov_len = c_cd.cd_length;
+
+	info_cd.cd_format = CRYPTO_DATA_RAW;
+	info_cd.cd_offset = 0;
+	info_cd.cd_length = info_len;
+	info_cd.cd_raw.iov_base = (char *)info;
+	info_cd.cd_raw.iov_len = info_cd.cd_length;
+
+	for (i = 1; i <= N; i++) {
+		c = i;
+
+		T_cd.cd_length = T_len;
+		T_cd.cd_raw.iov_len = T_cd.cd_length;
+
+		ret = crypto_mac_init(&mech, &key, NULL, &ctx, NULL);
+		if (ret != CRYPTO_SUCCESS)
+			return (SET_ERROR(EIO));
+
+		ret = crypto_mac_update(ctx, &T_cd, NULL);
+		if (ret != CRYPTO_SUCCESS)
+			return (SET_ERROR(EIO));
+
+		ret = crypto_mac_update(ctx, &info_cd, NULL);
+		if (ret != CRYPTO_SUCCESS)
+			return (SET_ERROR(EIO));
+
+		ret = crypto_mac_update(ctx, &c_cd, NULL);
+		if (ret != CRYPTO_SUCCESS)
+			return (SET_ERROR(EIO));
+
+		T_len = SHA512_DIGEST_LENGTH;
+		T_cd.cd_length = T_len;
+		T_cd.cd_raw.iov_len = T_cd.cd_length;
+
+		ret = crypto_mac_final(ctx, &T_cd, NULL);
+		if (ret != CRYPTO_SUCCESS)
+			return (SET_ERROR(EIO));
+
+		bcopy(T, out_buf + pos,
+		    (i != N) ? SHA512_DIGEST_LENGTH : (out_len - pos));
+		pos += SHA512_DIGEST_LENGTH;
+	}
+
+	return (0);
+}
+
+/*
+ * HKDF is designed to be a relatively fast function for deriving keys from a
+ * master key + a salt. We use this function to generate new encryption keys
+ * so as to avoid hitting the cryptographic limits of the underlying
+ * encryption modes. Note that, for the sake of deriving encryption keys, the
+ * info parameter is called the "salt" everywhere else in the code.
+ */
+int
+hkdf_sha512(uint8_t *key_material, uint_t km_len, uint8_t *salt,
+    uint_t salt_len, uint8_t *info, uint_t info_len, uint8_t *output_key,
+    uint_t out_len)
+{
+	int ret;
+	uint8_t extract_key[SHA512_DIGEST_LENGTH];
+
+	ret = hkdf_sha512_extract(salt, salt_len, key_material, km_len,
+	    extract_key);
+	if (ret != 0)
+		return (ret);
+
+	ret = hkdf_sha512_expand(extract_key, info, info_len, output_key,
+	    out_len);
+	if (ret != 0)
+		return (ret);
+
+	return (0);
+}
diff --git a/usr/src/uts/common/fs/zfs/refcount.c b/usr/src/uts/common/fs/zfs/refcount.c
index cac716e469..657a46717c 100644
--- a/usr/src/uts/common/fs/zfs/refcount.c
+++ b/usr/src/uts/common/fs/zfs/refcount.c
@@ -234,9 +234,10 @@ zfs_refcount_transfer(zfs_refcount_t *dst, zfs_refcount_t *src)
 	list_destroy(&removed);
 }
 
+/* ARGSUSED */
 void
-zfs_refcount_transfer_ownership(zfs_refcount_t *rc, void *current_holder,
-    void *new_holder)
+zfs_refcount_transfer_ownership_many(zfs_refcount_t *rc, uint64_t number,
+    void *current_holder, void *new_holder)
 {
 	reference_t *ref;
 	boolean_t found = B_FALSE;
@@ -249,7 +250,8 @@ zfs_refcount_transfer_ownership(zfs_refcount_t *rc, void *current_holder,
 
 	for (ref = list_head(&rc->rc_list); ref;
 	    ref = list_next(&rc->rc_list, ref)) {
-		if (ref->ref_holder == current_holder) {
+		if (ref->ref_holder == current_holder &&
+		    ref->ref_number == number) {
 			ref->ref_holder = new_holder;
 			found = B_TRUE;
 			break;
@@ -259,6 +261,14 @@ zfs_refcount_transfer_ownership(zfs_refcount_t *rc, void *current_holder,
 	mutex_exit(&rc->rc_mtx);
 }
 
+void
+zfs_refcount_transfer_ownership(zfs_refcount_t *rc, void *current_holder,
+    void *new_holder)
+{
+	zfs_refcount_transfer_ownership_many(rc, 1, current_holder,
+	    new_holder);
+}
+
 /*
  * If tracking is enabled, return true if a reference exists that matches
  * the "holder" tag. If tracking is disabled, then return true if a reference
diff --git a/usr/src/uts/common/fs/zfs/sa.c b/usr/src/uts/common/fs/zfs/sa.c
index d3c0a3e8ef..5a4bc705aa 100644
--- a/usr/src/uts/common/fs/zfs/sa.c
+++ b/usr/src/uts/common/fs/zfs/sa.c
@@ -680,7 +680,7 @@ sa_build_layouts(sa_handle_t *hdl, sa_bulk_attr_t *attr_desc, int attr_count,
 		boolean_t dummy;
 
 		if (hdl->sa_spill == NULL) {
-			VERIFY(dmu_spill_hold_by_bonus(hdl->sa_bonus, NULL,
+			VERIFY(dmu_spill_hold_by_bonus(hdl->sa_bonus, 0, NULL,
 			    &hdl->sa_spill) == 0);
 		}
 		dmu_buf_will_dirty(hdl->sa_spill, tx);
diff --git a/usr/src/uts/common/fs/zfs/spa.c b/usr/src/uts/common/fs/zfs/spa.c
index 718b2868de..c72e462b4f 100644
--- a/usr/src/uts/common/fs/zfs/spa.c
+++ b/usr/src/uts/common/fs/zfs/spa.c
@@ -1201,6 +1201,8 @@ spa_activate(spa_t *spa, int mode)
 	avl_create(&spa->spa_errlist_last,
 	    spa_error_entry_compare, sizeof (spa_error_entry_t),
 	    offsetof(spa_error_entry_t, se_avl));
+
+	spa_keystore_init(&spa->spa_keystore);
 }
 
 /*
@@ -1252,10 +1254,11 @@ spa_deactivate(spa_t *spa)
 	 * still have errors left in the queues.  Empty them just in case.
 	 */
 	spa_errlog_drain(spa);
-
 	avl_destroy(&spa->spa_errlist_scrub);
 	avl_destroy(&spa->spa_errlist_last);
 
+	spa_keystore_fini(&spa->spa_keystore);
+
 	spa->spa_state = POOL_STATE_UNINITIALIZED;
 
 	mutex_enter(&spa->spa_proc_lock);
@@ -2089,8 +2092,8 @@ spa_load_verify(spa_t *spa)
 			    spa_load_verify_data, spa_load_verify_metadata);
 		}
 		error = traverse_pool(spa, spa->spa_verify_min_txg,
-		    TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA,
-		    spa_load_verify_cb, rio);
+		    TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA |
+		    TRAVERSE_NO_DECRYPT, spa_load_verify_cb, rio);
 	}
 
 	(void) zio_wait(rio);
@@ -2290,7 +2293,7 @@ spa_load(spa_t *spa, spa_load_state_t state, spa_import_type_t type)
 			spa->spa_loaded_ts.tv_nsec = 0;
 		}
 		if (error != EBADF) {
-			zfs_ereport_post(ereport, spa, NULL, NULL, 0, 0);
+			zfs_ereport_post(ereport, spa, NULL, NULL, NULL, 0, 0);
 		}
 	}
 	spa->spa_load_state = error ? SPA_LOAD_ERROR : SPA_LOAD_NONE;
@@ -3260,6 +3263,16 @@ spa_ld_check_features(spa_t *spa, boolean_t *missing_feat_writep)
 			return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
 	}
 
+	/*
+	 * Encryption was added before bookmark_v2, even though bookmark_v2
+	 * is now a dependency. If this pool has encryption enabled without
+	 * bookmark_v2, trigger an errata message.
+	 */
+	if (spa_feature_is_enabled(spa, SPA_FEATURE_ENCRYPTION) &&
+	    !spa_feature_is_enabled(spa, SPA_FEATURE_BOOKMARK_V2)) {
+		spa->spa_errata = ZPOOL_ERRATA_ZOL_8308_ENCRYPTION;
+	}
+
 	return (0);
 }
 
@@ -4829,11 +4842,27 @@ spa_l2cache_drop(spa_t *spa)
 }
 
 /*
+ * Verify encryption parameters for spa creation. If we are encrypting, we must
+ * have the encryption feature flag enabled.
+ */
+static int
+spa_create_check_encryption_params(dsl_crypto_params_t *dcp,
+    boolean_t has_encryption)
+{
+	if (dcp->cp_crypt != ZIO_CRYPT_OFF &&
+	    dcp->cp_crypt != ZIO_CRYPT_INHERIT &&
+	    !has_encryption)
+		return (SET_ERROR(ENOTSUP));
+
+	return (dmu_objset_create_crypt_check(NULL, dcp, NULL));
+}
+
+/*
  * Pool Creation
  */
 int
 spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props,
-    nvlist_t *zplprops)
+    nvlist_t *zplprops, dsl_crypto_params_t *dcp)
 {
 	spa_t *spa;
 	char *altroot = NULL;
@@ -4848,6 +4877,9 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props,
 	boolean_t has_features;
 	char *poolname;
 	nvlist_t *nvl;
+	boolean_t has_encryption;
+	spa_feature_t feat;
+	char *feat_name;
 
 	if (props == NULL ||
 	    nvlist_lookup_string(props,
@@ -4888,10 +4920,27 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props,
 		spa->spa_import_flags |= ZFS_IMPORT_TEMP_NAME;
 
 	has_features = B_FALSE;
+	has_encryption = B_FALSE;
 	for (nvpair_t *elem = nvlist_next_nvpair(props, NULL);
 	    elem != NULL; elem = nvlist_next_nvpair(props, elem)) {
-		if (zpool_prop_feature(nvpair_name(elem)))
+		if (zpool_prop_feature(nvpair_name(elem))) {
 			has_features = B_TRUE;
+			feat_name = strchr(nvpair_name(elem), '@') + 1;
+			VERIFY0(zfeature_lookup_name(feat_name, &feat));
+			if (feat == SPA_FEATURE_ENCRYPTION)
+				has_encryption = B_TRUE;
+		}
+	}
+
+	/* verify encryption params, if they were provided */
+	if (dcp != NULL) {
+		error = spa_create_check_encryption_params(dcp, has_encryption);
+		if (error != 0) {
+			spa_deactivate(spa);
+			spa_remove(spa);
+			mutex_exit(&spa_namespace_lock);
+			return (error);
+		}
 	}
 
 	if (has_features || nvlist_lookup_uint64(props,
@@ -4991,8 +5040,7 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props,
 	}
 
 	spa->spa_is_initializing = B_TRUE;
-	spa->spa_dsl_pool = dp = dsl_pool_create(spa, zplprops, txg);
-	spa->spa_meta_objset = dp->dp_meta_objset;
+	spa->spa_dsl_pool = dp = dsl_pool_create(spa, zplprops, dcp, txg);
 	spa->spa_is_initializing = B_FALSE;
 
 	/*
@@ -5017,9 +5065,6 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props,
 		cmn_err(CE_PANIC, "failed to add pool config");
 	}
 
-	if (spa_version(spa) >= SPA_VERSION_FEATURES)
-		spa_feature_create_zap_objects(spa, tx);
-
 	if (zap_add(spa->spa_meta_objset,
 	    DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_CREATION_VERSION,
 	    sizeof (uint64_t), 1, &version, tx) != 0) {
diff --git a/usr/src/uts/common/fs/zfs/spa_config.c b/usr/src/uts/common/fs/zfs/spa_config.c
index e01260f312..4719696ca4 100644
--- a/usr/src/uts/common/fs/zfs/spa_config.c
+++ b/usr/src/uts/common/fs/zfs/spa_config.c
@@ -281,7 +281,7 @@ spa_write_cachefile(spa_t *target, boolean_t removing, boolean_t postsysevent)
 		 */
 		if (target->spa_ccw_fail_time == 0) {
 			zfs_ereport_post(FM_EREPORT_ZFS_CONFIG_CACHE_WRITE,
-			    target, NULL, NULL, 0, 0);
+			    target, NULL, NULL, NULL, 0, 0);
 		}
 		target->spa_ccw_fail_time = gethrtime();
 		spa_async_request(target, SPA_ASYNC_CONFIG_UPDATE);
@@ -408,6 +408,7 @@ spa_config_generate(spa_t *spa, vdev_t *vd, uint64_t txg, int getstats)
 	fnvlist_add_uint64(config, ZPOOL_CONFIG_POOL_STATE, spa_state(spa));
 	fnvlist_add_uint64(config, ZPOOL_CONFIG_POOL_TXG, txg);
 	fnvlist_add_uint64(config, ZPOOL_CONFIG_POOL_GUID, spa_guid(spa));
+	fnvlist_add_uint64(config, ZPOOL_CONFIG_ERRATA, spa->spa_errata);
 	if (spa->spa_comment != NULL) {
 		fnvlist_add_string(config, ZPOOL_CONFIG_COMMENT,
 		    spa->spa_comment);
diff --git a/usr/src/uts/common/fs/zfs/spa_errlog.c b/usr/src/uts/common/fs/zfs/spa_errlog.c
index 8ce780537a..f717ebb8c0 100644
--- a/usr/src/uts/common/fs/zfs/spa_errlog.c
+++ b/usr/src/uts/common/fs/zfs/spa_errlog.c
@@ -90,9 +90,8 @@ name_to_bookmark(char *buf, zbookmark_phys_t *zb)
  * during spa_errlog_sync().
  */
 void
-spa_log_error(spa_t *spa, zio_t *zio)
+spa_log_error(spa_t *spa, const zbookmark_phys_t *zb)
 {
-	zbookmark_phys_t *zb = &zio->io_logical->io_bookmark;
 	spa_error_entry_t search;
 	spa_error_entry_t *new;
 	avl_tree_t *tree;
diff --git a/usr/src/uts/common/fs/zfs/spa_history.c b/usr/src/uts/common/fs/zfs/spa_history.c
index 2ad0dcfc5c..897d3c6e9a 100644
--- a/usr/src/uts/common/fs/zfs/spa_history.c
+++ b/usr/src/uts/common/fs/zfs/spa_history.c
@@ -384,11 +384,16 @@ spa_history_log_nvl(spa_t *spa, nvlist_t *nvl)
 {
 	int err = 0;
 	dmu_tx_t *tx;
-	nvlist_t *nvarg;
+	nvlist_t *nvarg, *in_nvl = NULL;
 
 	if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY || !spa_writeable(spa))
 		return (SET_ERROR(EINVAL));
 
+	err = nvlist_lookup_nvlist(nvl, ZPOOL_HIST_INPUT_NVL, &in_nvl);
+	if (err == 0) {
+		(void) nvlist_remove_all(in_nvl, ZPOOL_HIDDEN_ARGS);
+	}
+
 	tx = dmu_tx_create_dd(spa_get_dsl(spa)->dp_mos_dir);
 	err = dmu_tx_assign(tx, TXG_WAIT);
 	if (err) {
diff --git a/usr/src/uts/common/fs/zfs/sys/abd.h b/usr/src/uts/common/fs/zfs/sys/abd.h
index 952a0b68ba..621635933e 100644
--- a/usr/src/uts/common/fs/zfs/sys/abd.h
+++ b/usr/src/uts/common/fs/zfs/sys/abd.h
@@ -73,6 +73,7 @@ abd_t *abd_alloc_for_io(size_t, boolean_t);
 abd_t *abd_alloc_sametype(abd_t *, size_t);
 void abd_free(abd_t *);
 abd_t *abd_get_offset(abd_t *, size_t);
+abd_t *abd_get_offset_size(abd_t *, size_t, size_t);
 abd_t *abd_get_from_buf(void *, size_t);
 void abd_put(abd_t *);
 
diff --git a/usr/src/uts/common/fs/zfs/sys/arc.h b/usr/src/uts/common/fs/zfs/sys/arc.h
index 1ce4740fcf..f636d3dcf2 100644
--- a/usr/src/uts/common/fs/zfs/sys/arc.h
+++ b/usr/src/uts/common/fs/zfs/sys/arc.h
@@ -58,8 +58,17 @@ _NOTE(CONSTCOND) } while (0)
 
 typedef struct arc_buf_hdr arc_buf_hdr_t;
 typedef struct arc_buf arc_buf_t;
+
+/*
+ * Because the ARC can store encrypted data, errors (not due to bugs) may arise
+ * while transforming data into its desired format - specifically, when
+ * decrypting, the key may not be present, or the HMAC may not be correct
+ * which signifies deliberate tampering with the on-disk state
+ * (assuming that the checksum was correct). The "error" parameter will be
+ * nonzero in this case, even if there is no associated zio.
+ */
 typedef void arc_read_done_func_t(zio_t *zio, const zbookmark_phys_t *zb,
-    const blkptr_t *bp, arc_buf_t *buf, void *private);
+    const blkptr_t *bp, arc_buf_t *buf, void *priv);
 typedef void arc_write_done_func_t(zio_t *zio, arc_buf_t *buf, void *priv);
 
 /* generic arc_done_func_t's which you can use */
@@ -93,20 +102,29 @@ typedef enum arc_flags
 	ARC_FLAG_L2_WRITING		= 1 << 12,	/* write in progress */
 	ARC_FLAG_L2_EVICTED		= 1 << 13,	/* evicted during I/O */
 	ARC_FLAG_L2_WRITE_HEAD		= 1 << 14,	/* head of write list */
+	/*
+	 * Encrypted or authenticated on disk (may be plaintext in memory).
+	 * This header has b_crypt_hdr allocated. Does not include indirect
+	 * blocks with checksums of MACs which will also have their X
+	 * (encrypted) bit set in the bp.
+	 */
+	ARC_FLAG_PROTECTED		= 1 << 15,
+	/* data has not been authenticated yet */
+	ARC_FLAG_NOAUTH			= 1 << 16,
 	/* indicates that the buffer contains metadata (otherwise, data) */
-	ARC_FLAG_BUFC_METADATA		= 1 << 15,
+	ARC_FLAG_BUFC_METADATA		= 1 << 17,
 
 	/* Flags specifying whether optional hdr struct fields are defined */
-	ARC_FLAG_HAS_L1HDR		= 1 << 16,
-	ARC_FLAG_HAS_L2HDR		= 1 << 17,
+	ARC_FLAG_HAS_L1HDR		= 1 << 18,
+	ARC_FLAG_HAS_L2HDR		= 1 << 19,
 
 	/*
 	 * Indicates the arc_buf_hdr_t's b_pdata matches the on-disk data.
 	 * This allows the l2arc to use the blkptr's checksum to verify
 	 * the data without having to store the checksum in the hdr.
 	 */
-	ARC_FLAG_COMPRESSED_ARC		= 1 << 18,
-	ARC_FLAG_SHARED_DATA		= 1 << 19,
+	ARC_FLAG_COMPRESSED_ARC		= 1 << 20,
+	ARC_FLAG_SHARED_DATA		= 1 << 21,
 
 	/*
 	 * The arc buffer's compression mode is stored in the top 7 bits of the
@@ -125,7 +143,12 @@ typedef enum arc_flags
 
 typedef enum arc_buf_flags {
 	ARC_BUF_FLAG_SHARED		= 1 << 0,
-	ARC_BUF_FLAG_COMPRESSED		= 1 << 1
+	ARC_BUF_FLAG_COMPRESSED		= 1 << 1,
+	/*
+	 * indicates whether this arc_buf_t is encrypted, regardless of
+	 * state on-disk
+	 */
+	ARC_BUF_FLAG_ENCRYPTED		= 1 << 2
 } arc_buf_flags_t;
 
 struct arc_buf {
@@ -159,15 +182,31 @@ typedef enum arc_space_type {
 void arc_space_consume(uint64_t space, arc_space_type_t type);
 void arc_space_return(uint64_t space, arc_space_type_t type);
 boolean_t arc_is_metadata(arc_buf_t *buf);
+boolean_t arc_is_encrypted(arc_buf_t *buf);
+boolean_t arc_is_unauthenticated(arc_buf_t *buf);
 enum zio_compress arc_get_compression(arc_buf_t *buf);
-int arc_decompress(arc_buf_t *buf);
+void arc_get_raw_params(arc_buf_t *buf, boolean_t *byteorder, uint8_t *salt,
+    uint8_t *iv, uint8_t *mac);
+int arc_untransform(arc_buf_t *buf, spa_t *spa, const zbookmark_phys_t *zb,
+    boolean_t in_place);
+void arc_convert_to_raw(arc_buf_t *buf, uint64_t dsobj, boolean_t byteorder,
+    dmu_object_type_t ot, const uint8_t *salt, const uint8_t *iv,
+    const uint8_t *mac);
 arc_buf_t *arc_alloc_buf(spa_t *spa, void *tag, arc_buf_contents_t type,
     int32_t size);
 arc_buf_t *arc_alloc_compressed_buf(spa_t *spa, void *tag,
     uint64_t psize, uint64_t lsize, enum zio_compress compression_type);
+arc_buf_t *arc_alloc_raw_buf(spa_t *spa, void *tag, uint64_t dsobj,
+    boolean_t byteorder, const uint8_t *salt, const uint8_t *iv,
+    const uint8_t *mac, dmu_object_type_t ot, uint64_t psize, uint64_t lsize,
+    enum zio_compress compression_type);
 arc_buf_t *arc_loan_buf(spa_t *spa, boolean_t is_metadata, int size);
 arc_buf_t *arc_loan_compressed_buf(spa_t *spa, uint64_t psize, uint64_t lsize,
     enum zio_compress compression_type);
+arc_buf_t *arc_loan_raw_buf(spa_t *spa, uint64_t dsobj, boolean_t byteorder,
+    const uint8_t *salt, const uint8_t *iv, const uint8_t *mac,
+    dmu_object_type_t ot, uint64_t psize, uint64_t lsize,
+    enum zio_compress compression_type);
 void arc_return_buf(arc_buf_t *buf, void *tag);
 void arc_loan_inuse_buf(arc_buf_t *buf, void *tag);
 void arc_buf_destroy(arc_buf_t *buf, void *tag);
diff --git a/usr/src/uts/common/fs/zfs/sys/dbuf.h b/usr/src/uts/common/fs/zfs/sys/dbuf.h
index eedefa3615..271232c61c 100644
--- a/usr/src/uts/common/fs/zfs/sys/dbuf.h
+++ b/usr/src/uts/common/fs/zfs/sys/dbuf.h
@@ -54,6 +54,7 @@ extern "C" {
 #define	DB_RF_NOPREFETCH	(1 << 3)
 #define	DB_RF_NEVERWAIT		(1 << 4)
 #define	DB_RF_CACHED		(1 << 5)
+#define	DB_RF_NO_DECRYPT	(1 << 6)
 
 /*
  * The simplified state transition diagram for dbufs looks like:
@@ -153,6 +154,16 @@ typedef struct dbuf_dirty_record {
 			override_states_t dr_override_state;
 			uint8_t dr_copies;
 			boolean_t dr_nopwrite;
+			boolean_t dr_has_raw_params;
+
+			/*
+			 * If dr_has_raw_params is set, the following crypt
+			 * params will be set on the BP that's written.
+			 */
+			boolean_t dr_byteorder;
+			uint8_t	dr_salt[ZIO_DATA_SALT_LEN];
+			uint8_t	dr_iv[ZIO_DATA_IV_LEN];
+			uint8_t	dr_mac[ZIO_DATA_MAC_LEN];
 		} dl;
 	} dt;
 } dbuf_dirty_record_t;
diff --git a/usr/src/uts/common/fs/zfs/sys/ddt.h b/usr/src/uts/common/fs/zfs/sys/ddt.h
index 15d2a9a7ad..9181e59fff 100644
--- a/usr/src/uts/common/fs/zfs/sys/ddt.h
+++ b/usr/src/uts/common/fs/zfs/sys/ddt.h
@@ -67,9 +67,10 @@ enum ddt_class {
 typedef struct ddt_key {
 	zio_cksum_t	ddk_cksum;	/* 256-bit block checksum */
 	/*
-	 * Encoded with logical & physical size, and compression, as follows:
+	 * Encoded with logical & physical size, encryption, and compression,
+	 * as follows:
 	 *   +-------+-------+-------+-------+-------+-------+-------+-------+
-	 *   |   0   |   0   |   0   | comp  |     PSIZE     |     LSIZE     |
+	 *   |   0   |   0   |   0   |X| comp|     PSIZE     |     LSIZE     |
 	 *   +-------+-------+-------+-------+-------+-------+-------+-------+
 	 */
 	uint64_t	ddk_prop;
@@ -85,11 +86,17 @@ typedef struct ddt_key {
 #define	DDK_SET_PSIZE(ddk, x)	\
 	BF64_SET_SB((ddk)->ddk_prop, 16, 16, SPA_MINBLOCKSHIFT, 1, x)
 
-#define	DDK_GET_COMPRESS(ddk)		BF64_GET((ddk)->ddk_prop, 32, 8)
-#define	DDK_SET_COMPRESS(ddk, x)	BF64_SET((ddk)->ddk_prop, 32, 8, x)
+#define	DDK_GET_COMPRESS(ddk)		BF64_GET((ddk)->ddk_prop, 32, 7)
+#define	DDK_SET_COMPRESS(ddk, x)	BF64_SET((ddk)->ddk_prop, 32, 7, x)
+
+#define	DDK_GET_CRYPT(ddk)		BF64_GET((ddk)->ddk_prop, 39, 1)
+#define	DDK_SET_CRYPT(ddk, x)	BF64_SET((ddk)->ddk_prop, 39, 1, x)
 
 #define	DDT_KEY_WORDS	(sizeof (ddt_key_t) / sizeof (uint64_t))
 
+#define	DDE_GET_NDVAS(dde) (DDK_GET_CRYPT(&dde->dde_key) \
+	? SPA_DVAS_PER_BP - 1 : SPA_DVAS_PER_BP)
+
 typedef struct ddt_phys {
 	dva_t		ddp_dva[SPA_DVAS_PER_BP];
 	uint64_t	ddp_refcnt;
diff --git a/usr/src/uts/common/fs/zfs/sys/dmu.h b/usr/src/uts/common/fs/zfs/sys/dmu.h
index 28462ff4d5..ffce616cbc 100644
--- a/usr/src/uts/common/fs/zfs/sys/dmu.h
+++ b/usr/src/uts/common/fs/zfs/sys/dmu.h
@@ -77,6 +77,7 @@ struct arc_buf;
 struct zio_prop;
 struct sa_handle;
 struct locked_range;
+struct dsl_crypto_params;
 
 typedef struct objset objset_t;
 typedef struct dmu_tx dmu_tx_t;
@@ -106,7 +107,8 @@ typedef enum dmu_object_byteswap {
 
 #define	DMU_OT_NEWTYPE 0x80
 #define	DMU_OT_METADATA 0x40
-#define	DMU_OT_BYTESWAP_MASK 0x3f
+#define	DMU_OT_ENCRYPTED 0x20
+#define	DMU_OT_BYTESWAP_MASK 0x1f
 
 /*
  * Defines a uint8_t object type. Object types specify if the data
@@ -114,18 +116,28 @@ typedef enum dmu_object_byteswap {
  * (dmu_object_byteswap_t). All of the types created by this method
  * are cached in the dbuf metadata cache.
  */
-#define	DMU_OT(byteswap, metadata) \
+#define	DMU_OT(byteswap, metadata, encrypted) \
 	(DMU_OT_NEWTYPE | \
 	((metadata) ? DMU_OT_METADATA : 0) | \
+	((encrypted) ? DMU_OT_ENCRYPTED : 0) | \
 	((byteswap) & DMU_OT_BYTESWAP_MASK))
 
 #define	DMU_OT_IS_VALID(ot) (((ot) & DMU_OT_NEWTYPE) ? \
 	((ot) & DMU_OT_BYTESWAP_MASK) < DMU_BSWAP_NUMFUNCS : \
 	(ot) < DMU_OT_NUMTYPES)
 
+/*
+ * MDB doesn't have dmu_ot; it defines these macros itself.
+ */
+#ifndef ZFS_MDB
+#define	DMU_OT_IS_METADATA_IMPL(ot) (dmu_ot[ot].ot_metadata)
+#define	DMU_OT_IS_ENCRYPTED_IMPL(ot) (dmu_ot[ot].ot_encrypt)
+#define	DMU_OT_BYTESWAP_IMPL(ot) (dmu_ot[ot].ot_byteswap)
+#endif
+
 #define	DMU_OT_IS_METADATA(ot) (((ot) & DMU_OT_NEWTYPE) ? \
 	((ot) & DMU_OT_METADATA) : \
-	dmu_ot[(ot)].ot_metadata)
+	DMU_OT_IS_METADATA_IMPL(ot))
 
 #define	DMU_OT_IS_DDT(ot) \
 	((ot) == DMU_OT_DDT_ZAP)
@@ -140,6 +152,10 @@ typedef enum dmu_object_byteswap {
 #define	DMU_OT_IS_METADATA_CACHED(ot) (((ot) & DMU_OT_NEWTYPE) ? \
 	B_TRUE : dmu_ot[(ot)].ot_dbuf_metadata_cache)
 
+#define	DMU_OT_IS_ENCRYPTED(ot) (((ot) & DMU_OT_NEWTYPE) ? \
+	((ot) & DMU_OT_ENCRYPTED) : \
+	DMU_OT_IS_ENCRYPTED_IMPL(ot))
+
 /*
  * These object types use bp_fill != 1 for their L0 bp's. Therefore they can't
  * have their data embedded (i.e. use a BP_IS_EMBEDDED() bp), because bp_fill
@@ -150,7 +166,7 @@ typedef enum dmu_object_byteswap {
 
 #define	DMU_OT_BYTESWAP(ot) (((ot) & DMU_OT_NEWTYPE) ? \
 	((ot) & DMU_OT_BYTESWAP_MASK) : \
-	dmu_ot[(ot)].ot_byteswap)
+	DMU_OT_BYTESWAP_IMPL(ot))
 
 typedef enum dmu_object_type {
 	DMU_OT_NONE,
@@ -236,16 +252,27 @@ typedef enum dmu_object_type {
 	/*
 	 * Names for valid types declared with DMU_OT().
 	 */
-	DMU_OTN_UINT8_DATA = DMU_OT(DMU_BSWAP_UINT8, B_FALSE),
-	DMU_OTN_UINT8_METADATA = DMU_OT(DMU_BSWAP_UINT8, B_TRUE),
-	DMU_OTN_UINT16_DATA = DMU_OT(DMU_BSWAP_UINT16, B_FALSE),
-	DMU_OTN_UINT16_METADATA = DMU_OT(DMU_BSWAP_UINT16, B_TRUE),
-	DMU_OTN_UINT32_DATA = DMU_OT(DMU_BSWAP_UINT32, B_FALSE),
-	DMU_OTN_UINT32_METADATA = DMU_OT(DMU_BSWAP_UINT32, B_TRUE),
-	DMU_OTN_UINT64_DATA = DMU_OT(DMU_BSWAP_UINT64, B_FALSE),
-	DMU_OTN_UINT64_METADATA = DMU_OT(DMU_BSWAP_UINT64, B_TRUE),
-	DMU_OTN_ZAP_DATA = DMU_OT(DMU_BSWAP_ZAP, B_FALSE),
-	DMU_OTN_ZAP_METADATA = DMU_OT(DMU_BSWAP_ZAP, B_TRUE),
+	DMU_OTN_UINT8_DATA = DMU_OT(DMU_BSWAP_UINT8, B_FALSE, B_FALSE),
+	DMU_OTN_UINT8_METADATA = DMU_OT(DMU_BSWAP_UINT8, B_TRUE, B_FALSE),
+	DMU_OTN_UINT16_DATA = DMU_OT(DMU_BSWAP_UINT16, B_FALSE, B_FALSE),
+	DMU_OTN_UINT16_METADATA = DMU_OT(DMU_BSWAP_UINT16, B_TRUE, B_FALSE),
+	DMU_OTN_UINT32_DATA = DMU_OT(DMU_BSWAP_UINT32, B_FALSE, B_FALSE),
+	DMU_OTN_UINT32_METADATA = DMU_OT(DMU_BSWAP_UINT32, B_TRUE, B_FALSE),
+	DMU_OTN_UINT64_DATA = DMU_OT(DMU_BSWAP_UINT64, B_FALSE, B_FALSE),
+	DMU_OTN_UINT64_METADATA = DMU_OT(DMU_BSWAP_UINT64, B_TRUE, B_FALSE),
+	DMU_OTN_ZAP_DATA = DMU_OT(DMU_BSWAP_ZAP, B_FALSE, B_FALSE),
+	DMU_OTN_ZAP_METADATA = DMU_OT(DMU_BSWAP_ZAP, B_TRUE, B_FALSE),
+
+	DMU_OTN_UINT8_ENC_DATA = DMU_OT(DMU_BSWAP_UINT8, B_FALSE, B_TRUE),
+	DMU_OTN_UINT8_ENC_METADATA = DMU_OT(DMU_BSWAP_UINT8, B_TRUE, B_TRUE),
+	DMU_OTN_UINT16_ENC_DATA = DMU_OT(DMU_BSWAP_UINT16, B_FALSE, B_TRUE),
+	DMU_OTN_UINT16_ENC_METADATA = DMU_OT(DMU_BSWAP_UINT16, B_TRUE, B_TRUE),
+	DMU_OTN_UINT32_ENC_DATA = DMU_OT(DMU_BSWAP_UINT32, B_FALSE, B_TRUE),
+	DMU_OTN_UINT32_ENC_METADATA = DMU_OT(DMU_BSWAP_UINT32, B_TRUE, B_TRUE),
+	DMU_OTN_UINT64_ENC_DATA = DMU_OT(DMU_BSWAP_UINT64, B_FALSE, B_TRUE),
+	DMU_OTN_UINT64_ENC_METADATA = DMU_OT(DMU_BSWAP_UINT64, B_TRUE, B_TRUE),
+	DMU_OTN_ZAP_ENC_DATA = DMU_OT(DMU_BSWAP_ZAP, B_FALSE, B_TRUE),
+	DMU_OTN_ZAP_ENC_METADATA = DMU_OT(DMU_BSWAP_ZAP, B_TRUE, B_TRUE),
 } dmu_object_type_t;
 
 /*
@@ -285,19 +312,24 @@ void zfs_znode_byteswap(void *buf, size_t size);
  */
 #define	DMU_BONUS_BLKID		(-1ULL)
 #define	DMU_SPILL_BLKID		(-2ULL)
+
 /*
  * Public routines to create, destroy, open, and close objsets.
  */
+typedef void dmu_objset_create_sync_func_t(objset_t *os, void *arg,
+    cred_t *cr, dmu_tx_t *tx);
+
 int dmu_objset_hold(const char *name, void *tag, objset_t **osp);
 int dmu_objset_own(const char *name, dmu_objset_type_t type,
-    boolean_t readonly, void *tag, objset_t **osp);
+    boolean_t readonly, boolean_t key_required, void *tag, objset_t **osp);
 void dmu_objset_rele(objset_t *os, void *tag);
-void dmu_objset_disown(objset_t *os, void *tag);
+void dmu_objset_disown(objset_t *os, boolean_t key_required, void *tag);
 int dmu_objset_open_ds(struct dsl_dataset *ds, objset_t **osp);
 
 void dmu_objset_evict_dbufs(objset_t *os);
 int dmu_objset_create(const char *name, dmu_objset_type_t type, uint64_t flags,
-    void (*func)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx), void *arg);
+    struct dsl_crypto_params *dcp, dmu_objset_create_sync_func_t func,
+    void *arg);
 int dmu_objset_clone(const char *name, const char *origin);
 int dsl_destroy_snapshots_nvl(struct nvlist *snaps, boolean_t defer,
     struct nvlist *errlist);
@@ -378,11 +410,12 @@ int dmu_object_claim_dnsize(objset_t *os, uint64_t object, dmu_object_type_t ot,
     int dnodesize, dmu_tx_t *tx);
 int dmu_object_reclaim_dnsize(objset_t *os, uint64_t object,
     dmu_object_type_t ot, int blocksize, dmu_object_type_t bonustype,
-    int bonuslen, int dnodesize, dmu_tx_t *txp);
+    int bonuslen, int dnodesize, boolean_t keep_spill, dmu_tx_t *txp);
 int dmu_object_claim(objset_t *os, uint64_t object, dmu_object_type_t ot,
     int blocksize, dmu_object_type_t bonus_type, int bonus_len, dmu_tx_t *tx);
 int dmu_object_reclaim(objset_t *os, uint64_t object, dmu_object_type_t ot,
     int blocksize, dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *txp);
+int dmu_object_rm_spill(objset_t *os, uint64_t object, dmu_tx_t *tx);
 
 /*
  * Free an object from this objset.
@@ -417,6 +450,13 @@ int dmu_object_next(objset_t *os, uint64_t *objectp,
     boolean_t hole, uint64_t txg);
 
 /*
+ * Set the number of levels on a dnode. nlevels must be greater than the
+ * current number of levels or an EINVAL will be returned.
+ */
+int dmu_object_set_nlevels(objset_t *os, uint64_t object, int nlevels,
+    dmu_tx_t *tx);
+
+/*
  * Set the data blocksize for an object.
  *
  * The object cannot have any blocks allcated beyond the first.  If
@@ -431,6 +471,14 @@ int dmu_object_set_blocksize(objset_t *os, uint64_t object, uint64_t size,
     int ibs, dmu_tx_t *tx);
 
 /*
+ * Manually set the maxblkid on a dnode. This will adjust nlevels accordingly
+ * to accommodate the change. When calling this function, the caller must
+ * ensure that the object's nlevels can sufficiently support the new maxblkid.
+ */
+int dmu_object_set_maxblkid(objset_t *os, uint64_t object, uint64_t maxblkid,
+    dmu_tx_t *tx);
+
+/*
  * Set the checksum property on a dnode.  The new checksum algorithm will
  * apply to all newly written blocks; existing blocks will not be affected.
  */
@@ -460,6 +508,11 @@ dmu_write_embedded(objset_t *os, uint64_t object, uint64_t offset,
 
 void dmu_write_policy(objset_t *os, dnode_t *dn, int level, int wp,
     struct zio_prop *zp);
+void dmu_write_policy_override_compress(struct zio_prop *zp,
+    enum zio_compress compress);
+void dmu_write_policy_override_encrypt(struct zio_prop *zp,
+    dmu_object_type_t ot, boolean_t byteorder, enum zio_compress compress,
+    const uint8_t *salt, const uint8_t *iv, const uint8_t *mac);
 /*
  * The bonus data is accessed more or less like a regular buffer.
  * You must dmu_bonus_hold() to get the buffer, which will give you a
@@ -472,7 +525,11 @@ void dmu_write_policy(objset_t *os, dnode_t *dn, int level, int wp,
  *
  * Returns ENOENT, EIO, or 0.
  */
+int dmu_bonus_hold_impl(objset_t *os, uint64_t object, void *tag,
+    uint32_t flags, dmu_buf_t **dbp);
 int dmu_bonus_hold(objset_t *os, uint64_t object, void *tag, dmu_buf_t **);
+int dmu_bonus_hold_by_dnode(dnode_t *dn, void *tag, dmu_buf_t **dbp,
+    uint32_t flags);
 int dmu_bonus_max(void);
 int dmu_set_bonus(dmu_buf_t *, int, dmu_tx_t *);
 int dmu_set_bonustype(dmu_buf_t *, dmu_object_type_t, dmu_tx_t *);
@@ -483,7 +540,8 @@ int dmu_rm_spill(objset_t *, uint64_t, dmu_tx_t *);
  * Special spill buffer support used by "SA" framework
  */
 
-int dmu_spill_hold_by_bonus(dmu_buf_t *bonus, void *tag, dmu_buf_t **dbp);
+int dmu_spill_hold_by_bonus(dmu_buf_t *bonus, uint32_t flags, void *tag,
+    dmu_buf_t **dbp);
 int dmu_spill_hold_by_dnode(dnode_t *dn, uint32_t flags,
     void *tag, dmu_buf_t **dbp);
 int dmu_spill_hold_existing(dmu_buf_t *bonus, void *tag, dmu_buf_t **dbp);
@@ -525,6 +583,7 @@ boolean_t dmu_buf_try_add_ref(dmu_buf_t *, objset_t *os, uint64_t object,
 
 void dmu_buf_rele(dmu_buf_t *db, void *tag);
 uint64_t dmu_buf_refcount(dmu_buf_t *db);
+uint64_t dmu_buf_user_refcount(dmu_buf_t *db);
 
 /*
  * dmu_buf_hold_array holds the DMU buffers which contain all bytes in a
@@ -685,6 +744,8 @@ struct blkptr *dmu_buf_get_blkptr(dmu_buf_t *db);
  * (ie. you've called dmu_tx_hold_object(tx, db->db_object)).
  */
 void dmu_buf_will_dirty(dmu_buf_t *db, dmu_tx_t *tx);
+void dmu_buf_set_crypt_params(dmu_buf_t *db_fake, boolean_t byteorder,
+    const uint8_t *salt, const uint8_t *iv, const uint8_t *mac, dmu_tx_t *tx);
 
 /*
  * You must create a transaction, then hold the objects which you will
@@ -755,9 +816,9 @@ void dmu_tx_callback_register(dmu_tx_t *tx, dmu_tx_callback_func_t *dcb_func,
  * -1, the range from offset to end-of-file is freed.
  */
 int dmu_free_range(objset_t *os, uint64_t object, uint64_t offset,
-	uint64_t size, dmu_tx_t *tx);
+    uint64_t size, dmu_tx_t *tx);
 int dmu_free_long_range(objset_t *os, uint64_t object, uint64_t offset,
-	uint64_t size);
+    uint64_t size);
 int dmu_free_long_object(objset_t *os, uint64_t object);
 
 /*
@@ -768,6 +829,7 @@ int dmu_free_long_object(objset_t *os, uint64_t object);
  */
 #define	DMU_READ_PREFETCH	0 /* prefetch */
 #define	DMU_READ_NO_PREFETCH	1 /* don't prefetch */
+#define	DMU_READ_NO_DECRYPT	2 /* don't decrypt */
 int dmu_read(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
 	void *buf, uint32_t flags);
 int dmu_read_by_dnode(dnode_t *dn, uint64_t offset, uint64_t size, void *buf,
@@ -791,10 +853,15 @@ int dmu_write_pages(objset_t *os, uint64_t object, uint64_t offset,
     uint64_t size, struct page *pp, dmu_tx_t *tx);
 struct arc_buf *dmu_request_arcbuf(dmu_buf_t *handle, int size);
 void dmu_return_arcbuf(struct arc_buf *buf);
-void dmu_assign_arcbuf_dnode(dnode_t *handle, uint64_t offset,
+int dmu_assign_arcbuf_by_dnode(dnode_t *handle, uint64_t offset,
     struct arc_buf *buf, dmu_tx_t *tx);
-void dmu_assign_arcbuf(dmu_buf_t *handle, uint64_t offset, struct arc_buf *buf,
-    dmu_tx_t *tx);
+int dmu_assign_arcbuf_by_dbuf(dmu_buf_t *handle, uint64_t offset,
+    struct arc_buf *buf, dmu_tx_t *tx);
+void dmu_convert_to_raw(dmu_buf_t *handle, boolean_t byteorder,
+    const uint8_t *salt, const uint8_t *iv, const uint8_t *mac, dmu_tx_t *tx);
+#define	dmu_assign_arcbuf	dmu_assign_arcbuf_by_dbuf
+void dmu_copy_from_buf(objset_t *os, uint64_t object, uint64_t offset,
+    dmu_buf_t *handle, dmu_tx_t *tx);
 int dmu_xuio_init(struct xuio *uio, int niov);
 void dmu_xuio_fini(struct xuio *uio);
 int dmu_xuio_add(struct xuio *uio, struct arc_buf *abuf, offset_t off,
@@ -838,6 +905,7 @@ typedef struct dmu_object_type_info {
 	dmu_object_byteswap_t	ot_byteswap;
 	boolean_t		ot_metadata;
 	boolean_t		ot_dbuf_metadata_cache;
+	boolean_t		ot_encrypt;
 	char			*ot_name;
 } dmu_object_type_info_t;
 
@@ -1008,8 +1076,6 @@ int dmu_diff(const char *tosnap_name, const char *fromsnap_name,
 #define	ZFS_CRC64_POLY	0xC96C5795D7870F42ULL	/* ECMA-182, reflected form */
 extern uint64_t zfs_crc64_table[256];
 
-extern int zfs_mdcomp_disable;
-
 #ifdef	__cplusplus
 }
 #endif
diff --git a/usr/src/uts/common/fs/zfs/sys/dmu_impl.h b/usr/src/uts/common/fs/zfs/sys/dmu_impl.h
index e820fe57ec..ccb5d7ac51 100644
--- a/usr/src/uts/common/fs/zfs/sys/dmu_impl.h
+++ b/usr/src/uts/common/fs/zfs/sys/dmu_impl.h
@@ -163,6 +163,7 @@ extern "C" {
  * 	dn_free_txg
  * 	dn_assigned_txg
  * 	dn_dirty_txg
+ * 	dd_assigned_tx
  * 	dn_notxholds
  * 	dn_dirtyctx
  * 	dn_dirtyctx_firstset
@@ -277,6 +278,7 @@ typedef struct dmu_sendarg {
 	objset_t *dsa_os;
 	zio_cksum_t dsa_zc;
 	uint64_t dsa_toguid;
+	uint64_t dsa_fromtxg;
 	int dsa_err;
 	dmu_pendop_t dsa_pending_op;
 	uint64_t dsa_featureflags;
diff --git a/usr/src/uts/common/fs/zfs/sys/dmu_objset.h b/usr/src/uts/common/fs/zfs/sys/dmu_objset.h
index cae1c7719a..41ae18a8b9 100644
--- a/usr/src/uts/common/fs/zfs/sys/dmu_objset.h
+++ b/usr/src/uts/common/fs/zfs/sys/dmu_objset.h
@@ -59,13 +59,19 @@ struct dmu_tx;
 
 #define	OBJSET_FLAG_USERACCOUNTING_COMPLETE	(1ULL<<0)
 
+/* all flags are currently non-portable */
+#define	OBJSET_CRYPT_PORTABLE_FLAGS_MASK	(0)
+
 typedef struct objset_phys {
 	dnode_phys_t os_meta_dnode;
 	zil_header_t os_zil_header;
 	uint64_t os_type;
 	uint64_t os_flags;
+	uint8_t os_portable_mac[ZIO_OBJSET_MAC_LEN];
+	uint8_t os_local_mac[ZIO_OBJSET_MAC_LEN];
 	char os_pad[OBJSET_PHYS_SIZE - sizeof (dnode_phys_t)*3 -
-	    sizeof (zil_header_t) - sizeof (uint64_t)*2];
+	    sizeof (zil_header_t) - sizeof (uint64_t)*2 -
+	    2*ZIO_OBJSET_MAC_LEN];
 	dnode_phys_t os_userused_dnode;
 	dnode_phys_t os_groupused_dnode;
 } objset_phys_t;
@@ -77,6 +83,8 @@ struct objset {
 	spa_t *os_spa;
 	arc_buf_t *os_phys_buf;
 	objset_phys_t *os_phys;
+	boolean_t os_encrypted;
+
 	/*
 	 * The following "special" dnodes have no parent, are exempt
 	 * from dnode_move(), and are not recorded in os_dnodes, but they
@@ -132,6 +140,10 @@ struct objset {
 	uint64_t os_flags;
 	uint64_t os_freed_dnodes;
 	boolean_t os_rescan_dnodes;
+	boolean_t os_raw_receive;
+
+	/* os_phys_buf should be written raw next txg */
+	boolean_t os_next_write_raw[TXG_SIZE];
 
 	/* Protected by os_obj_lock */
 	kmutex_t os_obj_lock;
@@ -171,14 +183,18 @@ struct objset {
 
 /* called from zpl */
 int dmu_objset_hold(const char *name, void *tag, objset_t **osp);
+int dmu_objset_hold_flags(const char *name, boolean_t decrypt, void *tag,
+    objset_t **osp);
 int dmu_objset_own(const char *name, dmu_objset_type_t type,
-    boolean_t readonly, void *tag, objset_t **osp);
+    boolean_t readonly, boolean_t decrypt, void *tag, objset_t **osp);
 int dmu_objset_own_obj(struct dsl_pool *dp, uint64_t obj,
-    dmu_objset_type_t type, boolean_t readonly, void *tag, objset_t **osp);
+    dmu_objset_type_t type, boolean_t readonly, boolean_t decrypt,
+    void *tag, objset_t **osp);
 void dmu_objset_refresh_ownership(struct dsl_dataset *ds,
-    struct dsl_dataset **newds, void *tag);
+    struct dsl_dataset **newds, boolean_t key_needed, void *tag);
 void dmu_objset_rele(objset_t *os, void *tag);
-void dmu_objset_disown(objset_t *os, void *tag);
+void dmu_objset_rele_flags(objset_t *os, boolean_t decrypt, void *tag);
+void dmu_objset_disown(objset_t *os, boolean_t decrypt, void *tag);
 int dmu_objset_from_ds(struct dsl_dataset *ds, objset_t **osp);
 
 void dmu_objset_stats(objset_t *os, nvlist_t *nv);
@@ -196,6 +212,9 @@ timestruc_t dmu_objset_snap_cmtime(objset_t *os);
 /* called from dsl */
 void dmu_objset_sync(objset_t *os, zio_t *zio, dmu_tx_t *tx);
 boolean_t dmu_objset_is_dirty(objset_t *os, uint64_t txg);
+objset_t *dmu_objset_create_impl_dnstats(spa_t *spa, struct dsl_dataset *ds,
+    blkptr_t *bp, dmu_objset_type_t type, int levels, int blksz, int ibs,
+    dmu_tx_t *tx);
 objset_t *dmu_objset_create_impl(spa_t *spa, struct dsl_dataset *ds,
     blkptr_t *bp, dmu_objset_type_t type, dmu_tx_t *tx);
 int dmu_objset_open_impl(spa_t *spa, struct dsl_dataset *ds, blkptr_t *bp,
@@ -206,6 +225,7 @@ void dmu_objset_userquota_get_ids(dnode_t *dn, boolean_t before, dmu_tx_t *tx);
 boolean_t dmu_objset_userused_enabled(objset_t *os);
 int dmu_objset_userspace_upgrade(objset_t *os);
 boolean_t dmu_objset_userspace_present(objset_t *os);
+boolean_t dmu_objset_incompatible_encryption_version(objset_t *os);
 int dmu_fsname(const char *snapname, char *buf);
 
 void dmu_objset_evict_done(objset_t *os);
diff --git a/usr/src/uts/common/fs/zfs/sys/dmu_recv.h b/usr/src/uts/common/fs/zfs/sys/dmu_recv.h
index 56b69e61b1..e2b595e77b 100644
--- a/usr/src/uts/common/fs/zfs/sys/dmu_recv.h
+++ b/usr/src/uts/common/fs/zfs/sys/dmu_recv.h
@@ -45,10 +45,15 @@ typedef struct dmu_recv_cookie {
 	boolean_t drc_byteswap;
 	boolean_t drc_force;
 	boolean_t drc_resumable;
+	boolean_t drc_raw;
 	boolean_t drc_clone;
+	boolean_t drc_spill;
 	struct avl_tree *drc_guid_to_ds_map;
+	nvlist_t *drc_keynvl;
 	zio_cksum_t drc_cksum;
+	uint64_t drc_fromsnapobj;
 	uint64_t drc_newsnapobj;
+	uint64_t drc_ivset_guid;
 	void *drc_owner;
 	cred_t *drc_cred;
 } dmu_recv_cookie_t;
diff --git a/usr/src/uts/common/fs/zfs/sys/dmu_send.h b/usr/src/uts/common/fs/zfs/sys/dmu_send.h
index 65d8e99db6..382f86622d 100644
--- a/usr/src/uts/common/fs/zfs/sys/dmu_send.h
+++ b/usr/src/uts/common/fs/zfs/sys/dmu_send.h
@@ -40,15 +40,14 @@ struct avl_tree;
 struct dmu_replay_record;
 
 int dmu_send(const char *tosnap, const char *fromsnap, boolean_t embedok,
-    boolean_t large_block_ok, boolean_t compressok, int outfd,
-    uint64_t resumeobj, uint64_t resumeoff,
-    struct vnode *vp, offset_t *off);
+    boolean_t large_block_ok, boolean_t compressok, boolean_t rawok, int outfd,
+    uint64_t resumeobj, uint64_t resumeoff, struct vnode *vp, offset_t *off);
 int dmu_send_estimate(struct dsl_dataset *ds, struct dsl_dataset *fromds,
     boolean_t stream_compressed, uint64_t *sizep);
 int dmu_send_estimate_from_txg(struct dsl_dataset *ds, uint64_t fromtxg,
     boolean_t stream_compressed, uint64_t *sizep);
 int dmu_send_obj(const char *pool, uint64_t tosnap, uint64_t fromsnap,
     boolean_t embedok, boolean_t large_block_ok, boolean_t compressok,
-    int outfd, struct vnode *vp, offset_t *off);
+    boolean_t rawok, int outfd, struct vnode *vp, offset_t *off);
 
 #endif /* _DMU_SEND_H */
diff --git a/usr/src/uts/common/fs/zfs/sys/dmu_traverse.h b/usr/src/uts/common/fs/zfs/sys/dmu_traverse.h
index c010edd440..8ceef5cf13 100644
--- a/usr/src/uts/common/fs/zfs/sys/dmu_traverse.h
+++ b/usr/src/uts/common/fs/zfs/sys/dmu_traverse.h
@@ -49,6 +49,15 @@ typedef int (blkptr_cb_t)(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
 #define	TRAVERSE_PREFETCH (TRAVERSE_PREFETCH_METADATA | TRAVERSE_PREFETCH_DATA)
 #define	TRAVERSE_HARD			(1<<4)
 
+/*
+ * Encrypted dnode blocks have encrypted bonus buffers while the rest
+ * of the dnode is left unencrypted. Callers can specify the
+ * TRAVERSE_NO_DECRYPT flag to indicate to the traversal code that
+ * they wish to receive the raw encrypted dnodes instead of attempting
+ * to read the logical data.
+ */
+#define	TRAVERSE_NO_DECRYPT		(1<<5)
+
 /* Special traverse error return value to indicate skipping of children */
 #define	TRAVERSE_VISIT_NO_CHILDREN	-1
 
diff --git a/usr/src/uts/common/fs/zfs/sys/dnode.h b/usr/src/uts/common/fs/zfs/sys/dnode.h
index 3b7d619172..da72903113 100644
--- a/usr/src/uts/common/fs/zfs/sys/dnode.h
+++ b/usr/src/uts/common/fs/zfs/sys/dnode.h
@@ -74,9 +74,7 @@ extern "C" {
 /*
  * dnode id flags
  *
- * Note: a file will never ever have its
- * ids moved from bonus->spill
- * and only in a crypto environment would it be on spill
+ * Note: a file will never ever have its ids moved from bonus->spill
  */
 #define	DN_ID_CHKED_BONUS	0x1
 #define	DN_ID_CHKED_SPILL	0x2
@@ -201,6 +199,8 @@ enum dnode_dirtycontext {
  * dataset and even within the same dnode block.
  */
 
+#define	DNODE_CRYPT_PORTABLE_FLAGS_MASK		(DNODE_FLAG_SPILL_BLKPTR)
+
 typedef struct dnode_phys {
 	uint8_t dn_type;		/* dmu_object_type_t */
 	uint8_t dn_indblkshift;		/* ln2(indirect block size) */
@@ -219,6 +219,13 @@ typedef struct dnode_phys {
 	uint64_t dn_maxblkid;		/* largest allocated block ID */
 	uint64_t dn_used;		/* bytes (or sectors) of disk space */
 
+	/*
+	 * Both dn_pad2 and dn_pad3 are protected by the block's MAC. This
+	 * allows us to protect any fields that might be added here in the
+	 * future. In either case, developers will want to check
+	 * zio_crypt_init_uios_dnode() to ensure the new field is being
+	 * protected properly.
+	 */
 	uint64_t dn_pad3[4];
 	union {
 		blkptr_t dn_blkptr[1+DN_OLD_MAX_BONUSLEN/sizeof (blkptr_t)];
@@ -235,8 +242,8 @@ typedef struct dnode_phys {
 	};
 } dnode_phys_t;
 
-#define	DN_SPILL_BLKPTR(dnp)	(blkptr_t *)((char *)(dnp) + \
-	(((dnp)->dn_extra_slots + 1) << DNODE_SHIFT) - (1 << SPA_BLKPTRSHIFT))
+#define	DN_SPILL_BLKPTR(dnp)	((blkptr_t *)((char *)(dnp) +		\
+	(((dnp)->dn_extra_slots + 1) << DNODE_SHIFT) - (1 << SPA_BLKPTRSHIFT)))
 
 struct dnode {
 	/*
@@ -282,6 +289,7 @@ struct dnode {
 	uint8_t dn_rm_spillblk[TXG_SIZE];	/* for removing spill blk */
 	uint16_t dn_next_bonuslen[TXG_SIZE];
 	uint32_t dn_next_blksz[TXG_SIZE];	/* next block size in bytes */
+	uint64_t dn_next_maxblkid[TXG_SIZE];	/* next maxblkid in bytes */
 
 	/* protected by dn_dbufs_mtx; declared here to fill 32-bit hole */
 	uint32_t dn_dbufs_count;	/* count of dn_dbufs */
@@ -339,6 +347,12 @@ struct dnode {
 };
 
 /*
+ * We use this (otherwise unused) bit to indicate if the value of
+ * dn_next_maxblkid[txgoff] is valid to use in dnode_sync().
+ */
+#define	DMU_NEXT_MAXBLKID_SET		(1ULL << 63)
+
+/*
  * Adds a level of indirection between the dbuf and the dnode to avoid
  * iterating descendent dbufs in dnode_move(). Handles are not allocated
  * individually, but as an array of child dnodes in dnode_hold_impl().
@@ -381,15 +395,18 @@ void dnode_sync(dnode_t *dn, dmu_tx_t *tx);
 void dnode_allocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, int ibs,
     dmu_object_type_t bonustype, int bonuslen, int dn_slots, dmu_tx_t *tx);
 void dnode_reallocate(dnode_t *dn, dmu_object_type_t ot, int blocksize,
-    dmu_object_type_t bonustype, int bonuslen, int dn_slots, dmu_tx_t *tx);
+    dmu_object_type_t bonustype, int bonuslen, int dn_slots,
+    boolean_t keep_spill, dmu_tx_t *tx);
 void dnode_free(dnode_t *dn, dmu_tx_t *tx);
 void dnode_byteswap(dnode_phys_t *dnp);
 void dnode_buf_byteswap(void *buf, size_t size);
 void dnode_verify(dnode_t *dn);
+int dnode_set_nlevels(dnode_t *dn, int nlevels, dmu_tx_t *tx);
 int dnode_set_blksz(dnode_t *dn, uint64_t size, int ibs, dmu_tx_t *tx);
 void dnode_free_range(dnode_t *dn, uint64_t off, uint64_t len, dmu_tx_t *tx);
 void dnode_diduse_space(dnode_t *dn, int64_t space);
-void dnode_new_blkid(dnode_t *dn, uint64_t blkid, dmu_tx_t *tx, boolean_t);
+void dnode_new_blkid(dnode_t *dn, uint64_t blkid, dmu_tx_t *tx,
+    boolean_t have_read, boolean_t force);
 uint64_t dnode_block_freed(dnode_t *dn, uint64_t blkid);
 void dnode_init(void);
 void dnode_fini(void);
diff --git a/usr/src/uts/common/fs/zfs/sys/dsl_bookmark.h b/usr/src/uts/common/fs/zfs/sys/dsl_bookmark.h
index e477bb231c..3cdad74414 100644
--- a/usr/src/uts/common/fs/zfs/sys/dsl_bookmark.h
+++ b/usr/src/uts/common/fs/zfs/sys/dsl_bookmark.h
@@ -36,8 +36,25 @@ typedef struct zfs_bookmark_phys {
 	uint64_t zbm_guid;		/* guid of bookmarked dataset */
 	uint64_t zbm_creation_txg;	/* birth transaction group */
 	uint64_t zbm_creation_time;	/* bookmark creation time */
+
+	/* the following fields are reserved for redacted send / recv */
+	uint64_t zbm_redaction_obj;	/* redaction list object */
+	uint64_t zbm_flags;		/* ZBM_FLAG_* */
+	uint64_t zbm_referenced_bytes_refd;
+	uint64_t zbm_compressed_bytes_refd;
+	uint64_t zbm_uncompressed_bytes_refd;
+	uint64_t zbm_referenced_freed_before_next_snap;
+	uint64_t zbm_compressed_freed_before_next_snap;
+	uint64_t zbm_uncompressed_freed_before_next_snap;
+
+	/* fields used for raw sends */
+	uint64_t zbm_ivset_guid;
 } zfs_bookmark_phys_t;
 
+
+#define	BOOKMARK_PHYS_SIZE_V1	(3 * sizeof (uint64_t))
+#define	BOOKMARK_PHYS_SIZE_V2	(12 * sizeof (uint64_t))
+
 int dsl_bookmark_create(nvlist_t *, nvlist_t *);
 int dsl_get_bookmarks(const char *, nvlist_t *, nvlist_t *);
 int dsl_get_bookmarks_impl(dsl_dataset_t *, nvlist_t *, nvlist_t *);
diff --git a/usr/src/uts/common/fs/zfs/sys/dsl_crypt.h b/usr/src/uts/common/fs/zfs/sys/dsl_crypt.h
new file mode 100644
index 0000000000..360a69b329
--- /dev/null
+++ b/usr/src/uts/common/fs/zfs/sys/dsl_crypt.h
@@ -0,0 +1,226 @@
+/*
+ * CDDL HEADER START
+ *
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2017, Datto, Inc. All rights reserved.
+ */
+
+#ifndef	_SYS_DSL_CRYPT_H
+#define	_SYS_DSL_CRYPT_H
+
+#include <sys/dmu_tx.h>
+#include <sys/dmu.h>
+#include <sys/zio_crypt.h>
+#include <sys/spa.h>
+#include <sys/dsl_dataset.h>
+
+/*
+ * ZAP entry keys for DSL Crypto Keys stored on disk. In addition,
+ * ZFS_PROP_KEYFORMAT, ZFS_PROP_PBKDF2_SALT, and ZFS_PROP_PBKDF2_ITERS are
+ * also maintained here using their respective property names.
+ */
+#define	DSL_CRYPTO_KEY_CRYPTO_SUITE	"DSL_CRYPTO_SUITE"
+#define	DSL_CRYPTO_KEY_GUID		"DSL_CRYPTO_GUID"
+#define	DSL_CRYPTO_KEY_IV		"DSL_CRYPTO_IV"
+#define	DSL_CRYPTO_KEY_MAC		"DSL_CRYPTO_MAC"
+#define	DSL_CRYPTO_KEY_MASTER_KEY	"DSL_CRYPTO_MASTER_KEY_1"
+#define	DSL_CRYPTO_KEY_HMAC_KEY		"DSL_CRYPTO_HMAC_KEY_1"
+#define	DSL_CRYPTO_KEY_ROOT_DDOBJ	"DSL_CRYPTO_ROOT_DDOBJ"
+#define	DSL_CRYPTO_KEY_REFCOUNT		"DSL_CRYPTO_REFCOUNT"
+#define	DSL_CRYPTO_KEY_VERSION		"DSL_CRYPTO_VERSION"
+
+/*
+ * In-memory representation of a wrapping key. One of these structs will exist
+ * for each encryption root with its key loaded.
+ */
+typedef struct dsl_wrapping_key {
+	/* link on spa_keystore_t:sk_wkeys */
+	avl_node_t wk_avl_link;
+
+	/* keyformat property enum */
+	zfs_keyformat_t wk_keyformat;
+
+	/* the pbkdf2 salt, if the keyformat is of type passphrase */
+	uint64_t wk_salt;
+
+	/* the pbkdf2 iterations, if the keyformat is of type passphrase */
+	uint64_t wk_iters;
+
+	/* actual wrapping key */
+	crypto_key_t wk_key;
+
+	/* refcount of holders of this key */
+	zfs_refcount_t wk_refcnt;
+
+	/* dsl directory object that owns this wrapping key */
+	uint64_t wk_ddobj;
+} dsl_wrapping_key_t;
+
+/* enum of commands indicating special actions that should be run */
+typedef enum dcp_cmd {
+	/* key creation commands */
+	DCP_CMD_NONE = 0,	/* no specific command */
+	DCP_CMD_RAW_RECV,	/* raw receive */
+
+	/* key changing commands */
+	DCP_CMD_NEW_KEY,	/* rewrap key as an encryption root */
+	DCP_CMD_INHERIT,	/* rewrap key with parent's wrapping key */
+	DCP_CMD_FORCE_NEW_KEY,	/* change to encryption root without rewrap */
+	DCP_CMD_FORCE_INHERIT,	/* inherit parent's key without rewrap */
+
+	DCP_CMD_MAX
+} dcp_cmd_t;
+
+/*
+ * This struct is a simple wrapper around all the parameters that are usually
+ * required to setup encryption. It exists so that all of the params can be
+ * passed around the kernel together for convenience.
+ */
+typedef struct dsl_crypto_params {
+	/* command indicating intended action */
+	dcp_cmd_t cp_cmd;
+
+	/* the encryption algorithm */
+	enum zio_encrypt cp_crypt;
+
+	/* keylocation property string */
+	char *cp_keylocation;
+
+	/* the wrapping key */
+	dsl_wrapping_key_t *cp_wkey;
+} dsl_crypto_params_t;
+
+/*
+ * In-memory representation of a DSL Crypto Key object. One of these structs
+ * (and corresponding on-disk ZAP object) will exist for each encrypted
+ * clone family that is mounted or otherwise reading protected data.
+ */
+typedef struct dsl_crypto_key {
+	/* link on spa_keystore_t:sk_dsl_keys */
+	avl_node_t dck_avl_link;
+
+	/* refcount of dsl_key_mapping_t's holding this key */
+	zfs_refcount_t dck_holds;
+
+	/* master key used to derive encryption keys */
+	zio_crypt_key_t dck_key;
+
+	/* wrapping key for syncing this structure to disk */
+	dsl_wrapping_key_t *dck_wkey;
+
+	/* on-disk object id */
+	uint64_t dck_obj;
+} dsl_crypto_key_t;
+
+/*
+ * In-memory mapping of a dataset object id to a DSL Crypto Key. This is used
+ * to look up the corresponding dsl_crypto_key_t from the zio layer for
+ * performing data encryption and decryption.
+ */
+typedef struct dsl_key_mapping {
+	/* link on spa_keystore_t:sk_key_mappings */
+	avl_node_t km_avl_link;
+
+	/* refcount of how many users are depending on this mapping */
+	zfs_refcount_t km_refcnt;
+
+	/* dataset this crypto key belongs to (index) */
+	uint64_t km_dsobj;
+
+	/* crypto key (value) of this record */
+	dsl_crypto_key_t *km_key;
+} dsl_key_mapping_t;
+
+/* in memory structure for holding all wrapping and dsl keys */
+typedef struct spa_keystore {
+	/* lock for protecting sk_dsl_keys */
+	krwlock_t sk_dk_lock;
+
+	/* tree of all dsl_crypto_key_t's */
+	avl_tree_t sk_dsl_keys;
+
+	/* lock for protecting sk_key_mappings */
+	krwlock_t sk_km_lock;
+
+	/* tree of all dsl_key_mapping_t's, indexed by dsobj */
+	avl_tree_t sk_key_mappings;
+
+	/* lock for protecting the wrapping keys tree */
+	krwlock_t sk_wkeys_lock;
+
+	/* tree of all dsl_wrapping_key_t's, indexed by ddobj */
+	avl_tree_t sk_wkeys;
+} spa_keystore_t;
+
+int dsl_crypto_params_create_nvlist(dcp_cmd_t cmd, nvlist_t *props,
+    nvlist_t *crypto_args, dsl_crypto_params_t **dcp_out);
+void dsl_crypto_params_free(dsl_crypto_params_t *dcp, boolean_t unload);
+void dsl_dataset_crypt_stats(struct dsl_dataset *ds, nvlist_t *nv);
+int dsl_crypto_can_set_keylocation(const char *dsname, const char *keylocation);
+boolean_t dsl_dir_incompatible_encryption_version(dsl_dir_t *dd);
+
+void spa_keystore_init(spa_keystore_t *sk);
+void spa_keystore_fini(spa_keystore_t *sk);
+
+void spa_keystore_dsl_key_rele(spa_t *spa, dsl_crypto_key_t *dck, void *tag);
+int spa_keystore_load_wkey_impl(spa_t *spa, dsl_wrapping_key_t *wkey);
+int spa_keystore_load_wkey(const char *dsname, dsl_crypto_params_t *dcp,
+    boolean_t noop);
+int spa_keystore_unload_wkey_impl(spa_t *spa, uint64_t ddobj);
+int spa_keystore_unload_wkey(const char *dsname);
+
+int spa_keystore_create_mapping(spa_t *spa, struct dsl_dataset *ds, void *tag,
+    dsl_key_mapping_t **km_out);
+int spa_keystore_remove_mapping(spa_t *spa, uint64_t dsobj, void *tag);
+void key_mapping_add_ref(dsl_key_mapping_t *km, void *tag);
+void key_mapping_rele(spa_t *spa, dsl_key_mapping_t *km, void *tag);
+int spa_keystore_lookup_key(spa_t *spa, uint64_t dsobj, void *tag,
+    dsl_crypto_key_t **dck_out);
+
+int dsl_crypto_populate_key_nvlist(struct dsl_dataset *ds,
+    uint64_t from_ivset_guid, nvlist_t **nvl_out);
+int dsl_crypto_recv_raw_key_check(struct dsl_dataset *ds,
+    nvlist_t *nvl, dmu_tx_t *tx);
+void dsl_crypto_recv_raw_key_sync(struct dsl_dataset *ds,
+    nvlist_t *nvl, dmu_tx_t *tx);
+int dsl_crypto_recv_raw(const char *poolname, uint64_t dsobj, uint64_t fromobj,
+    dmu_objset_type_t ostype, nvlist_t *nvl, boolean_t do_key);
+
+int spa_keystore_change_key(const char *dsname, dsl_crypto_params_t *dcp);
+int dsl_dir_rename_crypt_check(dsl_dir_t *dd, dsl_dir_t *newparent);
+int dsl_dataset_promote_crypt_check(dsl_dir_t *target, dsl_dir_t *origin);
+void dsl_dataset_promote_crypt_sync(dsl_dir_t *target, dsl_dir_t *origin,
+    dmu_tx_t *tx);
+int dmu_objset_create_crypt_check(dsl_dir_t *parentdd,
+    dsl_crypto_params_t *dcp, boolean_t *will_encrypt);
+void dsl_dataset_create_crypt_sync(uint64_t dsobj, dsl_dir_t *dd,
+    struct dsl_dataset *origin, dsl_crypto_params_t *dcp, dmu_tx_t *tx);
+uint64_t dsl_crypto_key_create_sync(uint64_t crypt, dsl_wrapping_key_t *wkey,
+    dmu_tx_t *tx);
+int dmu_objset_clone_crypt_check(dsl_dir_t *parentdd, dsl_dir_t *origindd);
+uint64_t dsl_crypto_key_clone_sync(dsl_dir_t *origindd, dmu_tx_t *tx);
+void dsl_crypto_key_destroy_sync(uint64_t dckobj, dmu_tx_t *tx);
+
+int spa_crypt_get_salt(spa_t *spa, uint64_t dsobj, uint8_t *salt);
+int spa_do_crypt_mac_abd(boolean_t generate, spa_t *spa, uint64_t dsobj,
+    abd_t *abd, uint_t datalen, uint8_t *mac);
+int spa_do_crypt_objset_mac_abd(boolean_t generate, spa_t *spa, uint64_t dsobj,
+    abd_t *abd, uint_t datalen, boolean_t byteswap);
+int spa_do_crypt_abd(boolean_t encrypt, spa_t *spa, const zbookmark_phys_t *zb,
+    dmu_object_type_t ot, boolean_t dedup, boolean_t bswap, uint8_t *salt,
+    uint8_t *iv, uint8_t *mac, uint_t datalen, abd_t *pabd, abd_t *cabd,
+    boolean_t *no_crypt);
+
+#endif	/* _SYS_DSL_CRYPT_H */
diff --git a/usr/src/uts/common/fs/zfs/sys/dsl_dataset.h b/usr/src/uts/common/fs/zfs/sys/dsl_dataset.h
index 064ff617fd..189376eefc 100644
--- a/usr/src/uts/common/fs/zfs/sys/dsl_dataset.h
+++ b/usr/src/uts/common/fs/zfs/sys/dsl_dataset.h
@@ -40,6 +40,7 @@
 #include <sys/dsl_deadlist.h>
 #include <sys/refcount.h>
 #include <sys/rrwlock.h>
+#include <sys/dsl_crypt.h>
 #include <zfeature_common.h>
 
 #ifdef	__cplusplus
@@ -49,6 +50,8 @@ extern "C" {
 struct dsl_dataset;
 struct dsl_dir;
 struct dsl_pool;
+struct dsl_crypto_params;
+struct dsl_key_mapping;
 
 #define	DS_FLAG_INCONSISTENT	(1ULL<<0)
 #define	DS_IS_INCONSISTENT(ds)	\
@@ -106,6 +109,7 @@ struct dsl_pool;
 #define	DS_FIELD_RESUME_LARGEBLOCK "com.delphix:resume_largeblockok"
 #define	DS_FIELD_RESUME_EMBEDOK "com.delphix:resume_embedok"
 #define	DS_FIELD_RESUME_COMPRESSOK "com.delphix:resume_compressok"
+#define	DS_FIELD_RESUME_RAWOK "com.datto:resume_rawok"
 
 /*
  * This field is set to the object number of the remap deadlist if one exists.
@@ -113,6 +117,12 @@ struct dsl_pool;
 #define	DS_FIELD_REMAP_DEADLIST	"com.delphix:remap_deadlist"
 
 /*
+ * This field is set to the ivset guid for encrypted snapshots. This is used
+ * for validating raw receives.
+ */
+#define	DS_FIELD_IVSET_GUID	"com.datto:ivset_guid"
+
+/*
  * DS_FLAG_CI_DATASET is set if the dataset contains a file system whose
  * name lookups should be performed case-insensitively.
  */
@@ -164,6 +174,7 @@ typedef struct dsl_dataset {
 	uint64_t ds_object;
 	uint64_t ds_fsid_guid;
 	boolean_t ds_is_snapshot;
+	struct dsl_key_mapping *ds_key_mapping;
 
 	/* only used in syncing context, only valid for non-snapshots: */
 	struct dsl_dataset *ds_prev;
@@ -293,26 +304,40 @@ typedef struct dsl_dataset_snapshot_arg {
 #define	DS_UNIQUE_IS_ACCURATE(ds)	\
 	((dsl_dataset_phys(ds)->ds_flags & DS_FLAG_UNIQUE_ACCURATE) != 0)
 
+/* flags for holding the dataset */
+typedef enum ds_hold_flags {
+	DS_HOLD_FLAG_DECRYPT    = 1 << 0 /* needs access to encrypted data */
+} ds_hold_flags_t;
+
 int dsl_dataset_hold(struct dsl_pool *dp, const char *name, void *tag,
     dsl_dataset_t **dsp);
+int dsl_dataset_hold_flags(struct dsl_pool *dp, const char *name,
+    ds_hold_flags_t flags, void *tag, dsl_dataset_t **dsp);
 boolean_t dsl_dataset_try_add_ref(struct dsl_pool *dp, dsl_dataset_t *ds,
     void *tag);
+int dsl_dataset_create_key_mapping(dsl_dataset_t *ds);
 int dsl_dataset_hold_obj(struct dsl_pool *dp, uint64_t dsobj, void *tag,
     dsl_dataset_t **);
+int dsl_dataset_hold_obj_flags(struct dsl_pool *dp, uint64_t dsobj,
+	ds_hold_flags_t flags, void *tag, dsl_dataset_t **);
+void dsl_dataset_remove_key_mapping(dsl_dataset_t *ds);
 void dsl_dataset_rele(dsl_dataset_t *ds, void *tag);
+void dsl_dataset_rele_flags(dsl_dataset_t *ds, ds_hold_flags_t flags,
+    void *tag);
 int dsl_dataset_own(struct dsl_pool *dp, const char *name,
-    void *tag, dsl_dataset_t **dsp);
+    ds_hold_flags_t flags, void *tag, dsl_dataset_t **dsp);
 int dsl_dataset_own_obj(struct dsl_pool *dp, uint64_t dsobj,
-    void *tag, dsl_dataset_t **dsp);
-void dsl_dataset_disown(dsl_dataset_t *ds, void *tag);
+    ds_hold_flags_t flags, void *tag, dsl_dataset_t **dsp);
+void dsl_dataset_disown(dsl_dataset_t *ds, ds_hold_flags_t flags, void *tag);
 void dsl_dataset_name(dsl_dataset_t *ds, char *name);
 boolean_t dsl_dataset_tryown(dsl_dataset_t *ds, void *tag);
 int dsl_dataset_namelen(dsl_dataset_t *ds);
 boolean_t dsl_dataset_has_owner(dsl_dataset_t *ds);
 uint64_t dsl_dataset_create_sync(dsl_dir_t *pds, const char *lastname,
-    dsl_dataset_t *origin, uint64_t flags, cred_t *, dmu_tx_t *);
+    dsl_dataset_t *origin, uint64_t flags, cred_t *,
+    struct dsl_crypto_params *, dmu_tx_t *);
 uint64_t dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin,
-    uint64_t flags, dmu_tx_t *tx);
+    struct dsl_crypto_params *dcp, uint64_t flags, dmu_tx_t *tx);
 void dsl_dataset_snapshot_sync(void *arg, dmu_tx_t *tx);
 int dsl_dataset_snapshot_check(void *arg, dmu_tx_t *tx);
 int dsl_dataset_snapshot(nvlist_t *snaps, nvlist_t *props, nvlist_t *errors);
@@ -434,6 +459,8 @@ void dsl_dataset_create_remap_deadlist(dsl_dataset_t *ds, dmu_tx_t *tx);
 boolean_t dsl_dataset_remap_deadlist_exists(dsl_dataset_t *ds);
 void dsl_dataset_destroy_remap_deadlist(dsl_dataset_t *ds, dmu_tx_t *tx);
 
+void dsl_dataset_activate_feature(uint64_t dsobj,
+    spa_feature_t f, dmu_tx_t *tx);
 void dsl_dataset_deactivate_feature(uint64_t dsobj,
     spa_feature_t f, dmu_tx_t *tx);
 
diff --git a/usr/src/uts/common/fs/zfs/sys/dsl_deleg.h b/usr/src/uts/common/fs/zfs/sys/dsl_deleg.h
index 6fb6a121ad..dadbda324e 100644
--- a/usr/src/uts/common/fs/zfs/sys/dsl_deleg.h
+++ b/usr/src/uts/common/fs/zfs/sys/dsl_deleg.h
@@ -58,6 +58,8 @@ extern "C" {
 #define	ZFS_DELEG_PERM_DIFF		"diff"
 #define	ZFS_DELEG_PERM_BOOKMARK		"bookmark"
 #define	ZFS_DELEG_PERM_REMAP		"remap"
+#define	ZFS_DELEG_PERM_LOAD_KEY		"load-key"
+#define	ZFS_DELEG_PERM_CHANGE_KEY	"change-key"
 
 /*
  * Note: the names of properties that are marked delegatable are also
diff --git a/usr/src/uts/common/fs/zfs/sys/dsl_dir.h b/usr/src/uts/common/fs/zfs/sys/dsl_dir.h
index 21d953cb60..a9336f5321 100644
--- a/usr/src/uts/common/fs/zfs/sys/dsl_dir.h
+++ b/usr/src/uts/common/fs/zfs/sys/dsl_dir.h
@@ -33,6 +33,7 @@
 #include <sys/dsl_synctask.h>
 #include <sys/refcount.h>
 #include <sys/zfs_context.h>
+#include <sys/dsl_crypt.h>
 
 #ifdef	__cplusplus
 extern "C" {
@@ -48,6 +49,7 @@ struct dsl_dataset;
 #define	DD_FIELD_FILESYSTEM_COUNT	"com.joyent:filesystem_count"
 #define	DD_FIELD_SNAPSHOT_COUNT		"com.joyent:snapshot_count"
 #define	DD_FIELD_LAST_REMAP_TXG		"com.delphix:last_remap_txg"
+#define	DD_FIELD_CRYPTO_KEY_OBJ		"com.datto:crypto_key_obj"
 
 typedef enum dd_used {
 	DD_USED_HEAD,
@@ -90,6 +92,7 @@ struct dsl_dir {
 
 	/* These are immutable; no lock needed: */
 	uint64_t dd_object;
+	uint64_t dd_crypto_obj;
 	dsl_pool_t *dd_pool;
 
 	/* Stable until user eviction; no lock needed: */
diff --git a/usr/src/uts/common/fs/zfs/sys/dsl_pool.h b/usr/src/uts/common/fs/zfs/sys/dsl_pool.h
index 66098900db..de13fa8bfa 100644
--- a/usr/src/uts/common/fs/zfs/sys/dsl_pool.h
+++ b/usr/src/uts/common/fs/zfs/sys/dsl_pool.h
@@ -51,6 +51,7 @@ struct dsl_dataset;
 struct dsl_pool;
 struct dmu_tx;
 struct dsl_scan;
+struct dsl_crypto_params;
 
 extern uint64_t zfs_dirty_data_max;
 extern uint64_t zfs_dirty_data_max_max;
@@ -144,7 +145,8 @@ typedef struct dsl_pool {
 int dsl_pool_init(spa_t *spa, uint64_t txg, dsl_pool_t **dpp);
 int dsl_pool_open(dsl_pool_t *dp);
 void dsl_pool_close(dsl_pool_t *dp);
-dsl_pool_t *dsl_pool_create(spa_t *spa, nvlist_t *zplprops, uint64_t txg);
+dsl_pool_t *dsl_pool_create(spa_t *spa, nvlist_t *zplprops,
+    struct dsl_crypto_params *dcp, uint64_t txg);
 void dsl_pool_sync(dsl_pool_t *dp, uint64_t txg);
 void dsl_pool_sync_done(dsl_pool_t *dp, uint64_t txg);
 int dsl_pool_sync_context(dsl_pool_t *dp);
diff --git a/usr/src/uts/common/fs/zfs/sys/hkdf.h b/usr/src/uts/common/fs/zfs/sys/hkdf.h
new file mode 100644
index 0000000000..e0f7678c03
--- /dev/null
+++ b/usr/src/uts/common/fs/zfs/sys/hkdf.h
@@ -0,0 +1,29 @@
+/*
+ * CDDL HEADER START
+ *
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2017, Datto, Inc. All rights reserved.
+ */
+
+#ifndef	_SYS_HKDF_H_
+#define	_SYS_HKDF_H_
+
+#include <sys/types.h>
+
+int hkdf_sha512(uint8_t *key_material, uint_t km_len, uint8_t *salt,
+    uint_t salt_len, uint8_t *info, uint_t info_len, uint8_t *output_key,
+    uint_t out_len);
+
+#endif	/* _SYS_HKDF_H_ */
diff --git a/usr/src/uts/common/fs/zfs/sys/refcount.h b/usr/src/uts/common/fs/zfs/sys/refcount.h
index 0059a245ee..ec36727065 100644
--- a/usr/src/uts/common/fs/zfs/sys/refcount.h
+++ b/usr/src/uts/common/fs/zfs/sys/refcount.h
@@ -76,6 +76,8 @@ int64_t zfs_refcount_add_many(zfs_refcount_t *, uint64_t, void *);
 int64_t zfs_refcount_remove_many(zfs_refcount_t *, uint64_t, void *);
 void zfs_refcount_transfer(zfs_refcount_t *, zfs_refcount_t *);
 void zfs_refcount_transfer_ownership(zfs_refcount_t *, void *, void *);
+void zfs_refcount_transfer_ownership_many(zfs_refcount_t *, uint64_t,
+    void *, void *);
 boolean_t zfs_refcount_held(zfs_refcount_t *, void *);
 boolean_t zfs_refcount_not_held(zfs_refcount_t *, void *);
 
@@ -107,6 +109,7 @@ typedef struct refcount {
 	atomic_add_64(&(dst)->rc_count, __tmp); \
 }
 #define	zfs_refcount_transfer_ownership(rc, current_holder, new_holder)	(void)0
+#define	zfs_refcount_transfer_ownership_many(rc, nr, ch, nh)	((void)0)
 #define	zfs_refcount_held(rc, holder)		((rc)->rc_count > 0)
 #define	zfs_refcount_not_held(rc, holder)		(B_TRUE)
 
diff --git a/usr/src/uts/common/fs/zfs/sys/spa.h b/usr/src/uts/common/fs/zfs/sys/spa.h
index 53b9e4ef5d..2d998d624d 100644
--- a/usr/src/uts/common/fs/zfs/sys/spa.h
+++ b/usr/src/uts/common/fs/zfs/sys/spa.h
@@ -61,6 +61,7 @@ typedef struct ddt ddt_t;
 typedef struct ddt_entry ddt_entry_t;
 struct dsl_pool;
 struct dsl_dataset;
+struct dsl_crypto_params;
 
 /*
  * General-purpose 32-bit and 64-bit bitfield encodings.
@@ -216,7 +217,7 @@ typedef struct zio_cksum_salt {
  * G		gang block indicator
  * B		byteorder (endianness)
  * D		dedup
- * X		encryption (on version 30, which is not supported)
+ * X		encryption
  * E		blkptr_t contains embedded data (see below)
  * lvl		level of indirection
  * type		DMU object type
@@ -230,6 +231,83 @@ typedef struct zio_cksum_salt {
  */
 
 /*
+ * The blkptr_t's of encrypted blocks also need to store the encryption
+ * parameters so that the block can be decrypted. This layout is as follows:
+ *
+ *	64	56	48	40	32	24	16	8	0
+ *	+-------+-------+-------+-------+-------+-------+-------+-------+
+ * 0	|		vdev1		| GRID  |	  ASIZE		|
+ *	+-------+-------+-------+-------+-------+-------+-------+-------+
+ * 1	|G|			 offset1				|
+ *	+-------+-------+-------+-------+-------+-------+-------+-------+
+ * 2	|		vdev2		| GRID  |	  ASIZE		|
+ *	+-------+-------+-------+-------+-------+-------+-------+-------+
+ * 3	|G|			 offset2				|
+ *	+-------+-------+-------+-------+-------+-------+-------+-------+
+ * 4	|			salt					|
+ *	+-------+-------+-------+-------+-------+-------+-------+-------+
+ * 5	|			IV1					|
+ *	+-------+-------+-------+-------+-------+-------+-------+-------+
+ * 6	|BDX|lvl| type	| cksum |E| comp|    PSIZE	|     LSIZE	|
+ *	+-------+-------+-------+-------+-------+-------+-------+-------+
+ * 7	|			padding					|
+ *	+-------+-------+-------+-------+-------+-------+-------+-------+
+ * 8	|			padding					|
+ *	+-------+-------+-------+-------+-------+-------+-------+-------+
+ * 9	|			physical birth txg			|
+ *	+-------+-------+-------+-------+-------+-------+-------+-------+
+ * a	|			logical birth txg			|
+ *	+-------+-------+-------+-------+-------+-------+-------+-------+
+ * b	|		IV2		|	    fill count		|
+ *	+-------+-------+-------+-------+-------+-------+-------+-------+
+ * c	|			checksum[0]				|
+ *	+-------+-------+-------+-------+-------+-------+-------+-------+
+ * d	|			checksum[1]				|
+ *	+-------+-------+-------+-------+-------+-------+-------+-------+
+ * e	|			MAC[0]					|
+ *	+-------+-------+-------+-------+-------+-------+-------+-------+
+ * f	|			MAC[1]					|
+ *	+-------+-------+-------+-------+-------+-------+-------+-------+
+ *
+ * Legend:
+ *
+ * salt		Salt for generating encryption keys
+ * IV1		First 64 bits of encryption IV
+ * X		Block requires encryption handling (set to 1)
+ * E		blkptr_t contains embedded data (set to 0, see below)
+ * fill count	number of non-zero blocks under this bp (truncated to 32 bits)
+ * IV2		Last 32 bits of encryption IV
+ * checksum[2]	128-bit checksum of the data this bp describes
+ * MAC[2]	128-bit message authentication code for this data
+ *
+ * The X bit being set indicates that this block is one of 3 types. If this is
+ * a level 0 block with an encrypted object type, the block is encrypted
+ * (see BP_IS_ENCRYPTED()). If this is a level 0 block with an unencrypted
+ * object type, this block is authenticated with an HMAC (see
+ * BP_IS_AUTHENTICATED()). Otherwise (if level > 0), this bp will use the MAC
+ * words to store a checksum-of-MACs from the level below (see
+ * BP_HAS_INDIRECT_MAC_CKSUM()). For convenience in the code, BP_IS_PROTECTED()
+ * refers to both encrypted and authenticated blocks and BP_USES_CRYPT()
+ * refers to any of these 3 kinds of blocks.
+ *
+ * The additional encryption parameters are the salt, IV, and MAC which are
+ * explained in greater detail in the block comment at the top of zio_crypt.c.
+ * The MAC occupies half of the checksum space since it serves a very similar
+ * purpose: to prevent data corruption on disk. The only functional difference
+ * is that the checksum is used to detect on-disk corruption whether or not the
+ * encryption key is loaded and the MAC provides additional protection against
+ * malicious disk tampering. We use the 3rd DVA to store the salt and first
+ * 64 bits of the IV. As a result encrypted blocks can only have 2 copies
+ * maximum instead of the normal 3. The last 32 bits of the IV are stored in
+ * the upper bits of what is usually the fill count. Note that only blocks at
+ * level 0 or -2 are ever encrypted, which allows us to guarantee that these
+ * 32 bits are not trampled over by other code (see zio_crypt.c for details).
+ * The salt and IV are not used for authenticated bps or bps with an indirect
+ * MAC checksum, so these blocks can utilize all 3 DVAs and the full 64 bits
+ * for the fill count.
+ */
+
+/*
  * "Embedded" blkptr_t's don't actually point to a block, instead they
  * have a data payload embedded in the blkptr_t itself.  See the comment
  * in blkptr.c for more details.
@@ -284,7 +362,9 @@ typedef struct zio_cksum_salt {
  * BP's so the BP_SET_* macros can be used with them.  etype, PSIZE, LSIZE must
  * be set with the BPE_SET_* macros.  BP_SET_EMBEDDED() should be called before
  * other macros, as they assert that they are only used on BP's of the correct
- * "embedded-ness".
+ * "embedded-ness". Encrypted blkptr_t's cannot be embedded because they use
+ * the payload space for encryption parameters (see the comment above on
+ * how encryption parameters are stored).
  */
 
 #define	BPE_GET_ETYPE(bp)	\
@@ -308,7 +388,7 @@ _NOTE(CONSTCOND) } while (0)
 	BF64_GET_SB((bp)->blk_prop, 25, 7, 0, 1))
 #define	BPE_SET_PSIZE(bp, x)	do { \
 	ASSERT(BP_IS_EMBEDDED(bp)); \
-	BF64_SET_SB((bp)->blk_prop, 25, 7, 0, 1, x); \
+	BF64_SET_SB((bp)->blk_prop, 25, 7, 0, 1, x);	\
 _NOTE(CONSTCOND) } while (0)
 
 typedef enum bp_embedded_type {
@@ -410,6 +490,26 @@ _NOTE(CONSTCOND) } while (0)
 #define	BP_GET_LEVEL(bp)		BF64_GET((bp)->blk_prop, 56, 5)
 #define	BP_SET_LEVEL(bp, x)		BF64_SET((bp)->blk_prop, 56, 5, x)
 
+/* encrypted, authenticated, and MAC cksum bps use the same bit */
+#define	BP_USES_CRYPT(bp)		BF64_GET((bp)->blk_prop, 61, 1)
+#define	BP_SET_CRYPT(bp, x)		BF64_SET((bp)->blk_prop, 61, 1, x)
+
+#define	BP_IS_ENCRYPTED(bp)			\
+	(BP_USES_CRYPT(bp) &&			\
+	BP_GET_LEVEL(bp) == 0 &&		\
+	DMU_OT_IS_ENCRYPTED(BP_GET_TYPE(bp)))
+
+#define	BP_IS_AUTHENTICATED(bp)			\
+	(BP_USES_CRYPT(bp) &&			\
+	BP_GET_LEVEL(bp) == 0 &&		\
+	!DMU_OT_IS_ENCRYPTED(BP_GET_TYPE(bp)))
+
+#define	BP_HAS_INDIRECT_MAC_CKSUM(bp)		\
+	(BP_USES_CRYPT(bp) && BP_GET_LEVEL(bp) > 0)
+
+#define	BP_IS_PROTECTED(bp)			\
+	(BP_IS_ENCRYPTED(bp) || BP_IS_AUTHENTICATED(bp))
+
 #define	BP_GET_DEDUP(bp)		BF64_GET((bp)->blk_prop, 62, 1)
 #define	BP_SET_DEDUP(bp, x)		BF64_SET((bp)->blk_prop, 62, 1, x)
 
@@ -427,7 +527,26 @@ _NOTE(CONSTCOND) } while (0)
 	(bp)->blk_phys_birth = ((logical) == (physical) ? 0 : (physical)); \
 }
 
-#define	BP_GET_FILL(bp) (BP_IS_EMBEDDED(bp) ? 1 : (bp)->blk_fill)
+#define	BP_GET_FILL(bp)				\
+	((BP_IS_ENCRYPTED(bp)) ? BF64_GET((bp)->blk_fill, 0, 32) : \
+	((BP_IS_EMBEDDED(bp)) ? 1 : (bp)->blk_fill))
+
+#define	BP_SET_FILL(bp, fill)			\
+{						\
+	if (BP_IS_ENCRYPTED(bp))			\
+		BF64_SET((bp)->blk_fill, 0, 32, fill); \
+	else					\
+		(bp)->blk_fill = fill;		\
+}
+
+#define	BP_GET_IV2(bp)				\
+	(ASSERT(BP_IS_ENCRYPTED(bp)),		\
+	BF64_GET((bp)->blk_fill, 32, 32))
+#define	BP_SET_IV2(bp, iv2)			\
+{						\
+	ASSERT(BP_IS_ENCRYPTED(bp));		\
+	BF64_SET((bp)->blk_fill, 32, 32, iv2);	\
+}
 
 #define	BP_IS_METADATA(bp)	\
 	(BP_GET_LEVEL(bp) > 0 || DMU_OT_IS_METADATA(BP_GET_TYPE(bp)))
@@ -436,7 +555,7 @@ _NOTE(CONSTCOND) } while (0)
 	(BP_IS_EMBEDDED(bp) ? 0 : \
 	DVA_GET_ASIZE(&(bp)->blk_dva[0]) + \
 	DVA_GET_ASIZE(&(bp)->blk_dva[1]) + \
-	DVA_GET_ASIZE(&(bp)->blk_dva[2]))
+	(DVA_GET_ASIZE(&(bp)->blk_dva[2]) * !BP_IS_ENCRYPTED(bp)))
 
 #define	BP_GET_UCSIZE(bp)	\
 	(BP_IS_METADATA(bp) ? BP_GET_PSIZE(bp) : BP_GET_LSIZE(bp))
@@ -445,13 +564,13 @@ _NOTE(CONSTCOND) } while (0)
 	(BP_IS_EMBEDDED(bp) ? 0 : \
 	!!DVA_GET_ASIZE(&(bp)->blk_dva[0]) + \
 	!!DVA_GET_ASIZE(&(bp)->blk_dva[1]) + \
-	!!DVA_GET_ASIZE(&(bp)->blk_dva[2]))
+	(!!DVA_GET_ASIZE(&(bp)->blk_dva[2]) * !BP_IS_ENCRYPTED(bp)))
 
 #define	BP_COUNT_GANG(bp)	\
 	(BP_IS_EMBEDDED(bp) ? 0 : \
 	(DVA_GET_GANG(&(bp)->blk_dva[0]) + \
 	DVA_GET_GANG(&(bp)->blk_dva[1]) + \
-	DVA_GET_GANG(&(bp)->blk_dva[2])))
+	(DVA_GET_GANG(&(bp)->blk_dva[2]) * !BP_IS_ENCRYPTED(bp))))
 
 #define	DVA_EQUAL(dva1, dva2)	\
 	((dva1)->dva_word[1] == (dva2)->dva_word[1] && \
@@ -470,6 +589,10 @@ _NOTE(CONSTCOND) } while (0)
 	((zc1).zc_word[2] - (zc2).zc_word[2]) | \
 	((zc1).zc_word[3] - (zc2).zc_word[3])))
 
+#define	ZIO_CHECKSUM_MAC_EQUAL(zc1, zc2) \
+	(0 == (((zc1).zc_word[0] - (zc2).zc_word[0]) | \
+	((zc1).zc_word[1] - (zc2).zc_word[1])))
+
 #define	ZIO_CHECKSUM_IS_ZERO(zc) \
 	(0 == ((zc)->zc_word[0] | (zc)->zc_word[1] | \
 	(zc)->zc_word[2] | (zc)->zc_word[3]))
@@ -530,7 +653,7 @@ _NOTE(CONSTCOND) } while (0)
 
 #define	BP_SHOULD_BYTESWAP(bp)	(BP_GET_BYTEORDER(bp) != ZFS_HOST_BYTEORDER)
 
-#define	BP_SPRINTF_LEN	320
+#define	BP_SPRINTF_LEN	400
 
 /*
  * This macro allows code sharing between zfs, libzpool, and mdb.
@@ -543,7 +666,18 @@ _NOTE(CONSTCOND) } while (0)
 	    { "zero", "single", "double", "triple" };			\
 	int len = 0;							\
 	int copies = 0;							\
-									\
+	const char *crypt_type;						\
+	if (bp != NULL) {						\
+		if (BP_IS_ENCRYPTED(bp)) {				\
+			crypt_type = "encrypted";			\
+		} else if (BP_IS_AUTHENTICATED(bp)) {			\
+			crypt_type = "authenticated";			\
+		} else if (BP_HAS_INDIRECT_MAC_CKSUM(bp)) {		\
+			crypt_type = "indirect-MAC";			\
+		} else {						\
+			crypt_type = "unencrypted";			\
+		}							\
+	}								\
 	if (bp == NULL) {						\
 		len += func(buf + len, size - len, "<NULL>");		\
 	} else if (BP_IS_HOLE(bp)) {					\
@@ -577,18 +711,27 @@ _NOTE(CONSTCOND) } while (0)
 			    (u_longlong_t)DVA_GET_ASIZE(dva),		\
 			    ws);					\
 		}							\
+		if (BP_IS_ENCRYPTED(bp)) {				\
+			len += func(buf + len, size - len,		\
+			    "salt=%llx iv=%llx:%llx%c",			\
+			    (u_longlong_t)bp->blk_dva[2].dva_word[0],	\
+			    (u_longlong_t)bp->blk_dva[2].dva_word[1],	\
+			    (u_longlong_t)BP_GET_IV2(bp),		\
+			    ws);					\
+		}							\
 		if (BP_IS_GANG(bp) &&					\
 		    DVA_GET_ASIZE(&bp->blk_dva[2]) <=			\
 		    DVA_GET_ASIZE(&bp->blk_dva[1]) / 2)			\
 			copies--;					\
 		len += func(buf + len, size - len,			\
-		    "[L%llu %s] %s %s %s %s %s %s%c"			\
+		    "[L%llu %s] %s %s %s %s %s %s %s%c"			\
 		    "size=%llxL/%llxP birth=%lluL/%lluP fill=%llu%c"	\
 		    "cksum=%llx:%llx:%llx:%llx",			\
 		    (u_longlong_t)BP_GET_LEVEL(bp),			\
 		    type,						\
 		    checksum,						\
 		    compress,						\
+		    crypt_type,						\
 		    BP_GET_BYTEORDER(bp) == 0 ? "BE" : "LE",		\
 		    BP_IS_GANG(bp) ? "gang" : "contiguous",		\
 		    BP_GET_DEDUP(bp) ? "dedup" : "unique",		\
@@ -622,8 +765,8 @@ extern int spa_open_rewind(const char *pool, spa_t **, void *tag,
     nvlist_t *policy, nvlist_t **config);
 extern int spa_get_stats(const char *pool, nvlist_t **config, char *altroot,
     size_t buflen);
-extern int spa_create(const char *pool, nvlist_t *config, nvlist_t *props,
-    nvlist_t *zplprops);
+extern int spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props,
+    nvlist_t *zplprops, struct dsl_crypto_params *dcp);
 extern int spa_import_rootpool(char *devpath, char *devid);
 extern int spa_import(const char *pool, nvlist_t *config, nvlist_t *props,
     uint64_t flags);
@@ -890,9 +1033,10 @@ extern void spa_history_log_internal_dd(dsl_dir_t *dd, const char *operation,
 
 /* error handling */
 struct zbookmark_phys;
-extern void spa_log_error(spa_t *spa, zio_t *zio);
+extern void spa_log_error(spa_t *spa, const struct zbookmark_phys *zb);
 extern void zfs_ereport_post(const char *class, spa_t *spa, vdev_t *vd,
-    zio_t *zio, uint64_t stateoroffset, uint64_t length);
+    const struct zbookmark_phys *zb, struct zio *zio, uint64_t stateoroffset,
+    uint64_t length);
 extern void zfs_post_remove(spa_t *spa, vdev_t *vd);
 extern void zfs_post_state_change(spa_t *spa, vdev_t *vd);
 extern void zfs_post_autoreplace(spa_t *spa, vdev_t *vd);
diff --git a/usr/src/uts/common/fs/zfs/sys/spa_impl.h b/usr/src/uts/common/fs/zfs/sys/spa_impl.h
index 19516a1a1b..d63013ce0d 100644
--- a/usr/src/uts/common/fs/zfs/sys/spa_impl.h
+++ b/usr/src/uts/common/fs/zfs/sys/spa_impl.h
@@ -44,6 +44,7 @@
 #include <sys/refcount.h>
 #include <sys/bplist.h>
 #include <sys/bpobj.h>
+#include <sys/dsl_crypt.h>
 #include <sys/zfeature.h>
 #include <sys/zthr.h>
 #include <zfeature_common.h>
@@ -372,6 +373,8 @@ struct spa {
 	uint64_t	spa_deadman_synctime;	/* deadman expiration timer */
 	uint64_t	spa_all_vdev_zaps;	/* ZAP of per-vd ZAP obj #s */
 	spa_avz_action_t	spa_avz_action;	/* destroy/rebuild AVZ? */
+	spa_keystore_t	spa_keystore;	/* loaded crypto keys */
+	uint64_t	spa_errata;	/* errata issues detected */
 
 	/*
 	 * spa_iokstat_lock protects spa_iokstat and
diff --git a/usr/src/uts/common/fs/zfs/sys/zfs_ioctl.h b/usr/src/uts/common/fs/zfs/sys/zfs_ioctl.h
index 70916c45b7..1457200dd8 100644
--- a/usr/src/uts/common/fs/zfs/sys/zfs_ioctl.h
+++ b/usr/src/uts/common/fs/zfs/sys/zfs_ioctl.h
@@ -94,7 +94,7 @@ typedef enum drr_headertype {
 /* flag #21 is reserved for a Delphix feature */
 #define	DMU_BACKUP_FEATURE_COMPRESSED		(1 << 22)
 #define	DMU_BACKUP_FEATURE_LARGE_DNODE		(1 << 23)
-/* flag #24 is reserved for the raw send feature */
+#define	DMU_BACKUP_FEATURE_RAW			(1 << 24)
 /* flag #25 is reserved for the ZSTD compression feature */
 
 /*
@@ -105,7 +105,8 @@ typedef enum drr_headertype {
     DMU_BACKUP_FEATURE_EMBED_DATA | DMU_BACKUP_FEATURE_LZ4 | \
     DMU_BACKUP_FEATURE_RESUMING | \
     DMU_BACKUP_FEATURE_LARGE_BLOCKS | DMU_BACKUP_FEATURE_LARGE_DNODE | \
-    DMU_BACKUP_FEATURE_COMPRESSED)
+    DMU_BACKUP_FEATURE_COMPRESSED | \
+    DMU_BACKUP_FEATURE_RAW)
 
 /* Are all features in the given flag word currently supported? */
 #define	DMU_STREAM_SUPPORTED(x)	(!((x) & ~DMU_BACKUP_FEATURE_MASK))
@@ -149,20 +150,50 @@ typedef enum dmu_send_resume_token_version {
  * cannot necessarily be received as a clone correctly.
  */
 #define	DRR_FLAG_FREERECORDS	(1<<2)
+/*
+ * When DRR_FLAG_SPILL_BLOCK is set it indicates the DRR_OBJECT_SPILL
+ * and DRR_SPILL_UNMODIFIED flags are meaningful in the send stream.
+ *
+ * When DRR_FLAG_SPILL_BLOCK is set, DRR_OBJECT records will have
+ * DRR_OBJECT_SPILL set if and only if they should have a spill block
+ * (either an existing one, or a new one in the send stream).  When clear
+ * the object does not have a spill block and any existing spill block
+ * should be freed.
+ *
+ * Similarly, when DRR_FLAG_SPILL_BLOCK is set, DRR_SPILL records will
+ * have DRR_SPILL_UNMODIFIED set if and only if they were included for
+ * backward compatibility purposes, and can be safely ignored by new versions
+ * of zfs receive.  Previous versions of ZFS which do not understand the
+ * DRR_FLAG_SPILL_BLOCK will process this record and recreate any missing
+ * spill blocks.
+ */
+#define	DRR_FLAG_SPILL_BLOCK	(1<<3)
 
 /*
- * flags in the drr_checksumflags field in the DRR_WRITE and
- * DRR_WRITE_BYREF blocks
+ * flags in the drr_flags field in the DRR_WRITE, DRR_SPILL, DRR_OBJECT,
+ * DRR_WRITE_BYREF, and DRR_OBJECT_RANGE blocks
  */
-#define	DRR_CHECKSUM_DEDUP	(1<<0)
+#define	DRR_CHECKSUM_DEDUP	(1<<0) /* not used for DRR_SPILL blocks */
+#define	DRR_RAW_BYTESWAP	(1<<1)
+#define	DRR_OBJECT_SPILL	(1<<2) /* OBJECT record has a spill block */
+#define	DRR_SPILL_UNMODIFIED	(1<<2) /* SPILL record for unmodified block */
 
 #define	DRR_IS_DEDUP_CAPABLE(flags)	((flags) & DRR_CHECKSUM_DEDUP)
+#define	DRR_IS_RAW_BYTESWAPPED(flags)	((flags) & DRR_RAW_BYTESWAP)
+#define	DRR_OBJECT_HAS_SPILL(flags)	((flags) & DRR_OBJECT_SPILL)
+#define	DRR_SPILL_IS_UNMODIFIED(flags)	((flags) & DRR_SPILL_UNMODIFIED)
 
 /* deal with compressed drr_write replay records */
 #define	DRR_WRITE_COMPRESSED(drrw)	((drrw)->drr_compressiontype != 0)
 #define	DRR_WRITE_PAYLOAD_SIZE(drrw) \
 	(DRR_WRITE_COMPRESSED(drrw) ? (drrw)->drr_compressed_size : \
 	(drrw)->drr_logical_size)
+#define	DRR_SPILL_PAYLOAD_SIZE(drrs) \
+	((drrs)->drr_compressed_size ? \
+	(drrs)->drr_compressed_size : (drrs)->drr_length)
+#define	DRR_OBJECT_PAYLOAD_SIZE(drro) \
+	((drro)->drr_raw_bonuslen != 0 ? \
+	(drro)->drr_raw_bonuslen : P2ROUNDUP((drro)->drr_bonuslen, 8))
 
 /*
  * zfs ioctl command structure
@@ -171,7 +202,8 @@ typedef struct dmu_replay_record {
 	enum {
 		DRR_BEGIN, DRR_OBJECT, DRR_FREEOBJECTS,
 		DRR_WRITE, DRR_FREE, DRR_END, DRR_WRITE_BYREF,
-		DRR_SPILL, DRR_WRITE_EMBEDDED, DRR_NUMTYPES
+		DRR_SPILL, DRR_WRITE_EMBEDDED, DRR_OBJECT_RANGE,
+		DRR_NUMTYPES
 	} drr_type;
 	uint32_t drr_payloadlen;
 	union {
@@ -198,8 +230,15 @@ typedef struct dmu_replay_record {
 			uint8_t drr_checksumtype;
 			uint8_t drr_compress;
 			uint8_t drr_dn_slots;
-			uint8_t drr_pad[5];
+			uint8_t drr_flags;
+			uint32_t drr_raw_bonuslen;
 			uint64_t drr_toguid;
+			/* only (possibly) nonzero for raw streams */
+			uint8_t drr_indblkshift;
+			uint8_t drr_nlevels;
+			uint8_t drr_nblkptr;
+			uint8_t drr_pad[5];
+			uint64_t drr_maxblkid;
 			/* bonus content follows */
 		} drr_object;
 		struct drr_freeobjects {
@@ -215,13 +254,17 @@ typedef struct dmu_replay_record {
 			uint64_t drr_logical_size;
 			uint64_t drr_toguid;
 			uint8_t drr_checksumtype;
-			uint8_t drr_checksumflags;
+			uint8_t drr_flags;
 			uint8_t drr_compressiontype;
 			uint8_t drr_pad2[5];
 			/* deduplication key */
 			ddt_key_t drr_key;
 			/* only nonzero if drr_compressiontype is not 0 */
 			uint64_t drr_compressed_size;
+			/* only nonzero for raw streams */
+			uint8_t drr_salt[ZIO_DATA_SALT_LEN];
+			uint8_t drr_iv[ZIO_DATA_IV_LEN];
+			uint8_t drr_mac[ZIO_DATA_MAC_LEN];
 			/* content follows */
 		} drr_write;
 		struct drr_free {
@@ -242,7 +285,7 @@ typedef struct dmu_replay_record {
 			uint64_t drr_refoffset;
 			/* properties of the data */
 			uint8_t drr_checksumtype;
-			uint8_t drr_checksumflags;
+			uint8_t drr_flags;
 			uint8_t drr_pad2[6];
 			ddt_key_t drr_key; /* deduplication key */
 		} drr_write_byref;
@@ -250,7 +293,15 @@ typedef struct dmu_replay_record {
 			uint64_t drr_object;
 			uint64_t drr_length;
 			uint64_t drr_toguid;
-			uint64_t drr_pad[4]; /* needed for crypto */
+			uint8_t drr_flags;
+			uint8_t drr_compressiontype;
+			uint8_t drr_pad[6];
+			/* only nonzero for raw streams */
+			uint64_t drr_compressed_size;
+			uint8_t drr_salt[ZIO_DATA_SALT_LEN];
+			uint8_t drr_iv[ZIO_DATA_IV_LEN];
+			uint8_t drr_mac[ZIO_DATA_MAC_LEN];
+			dmu_object_type_t drr_type;
 			/* spill data follows */
 		} drr_spill;
 		struct drr_write_embedded {
@@ -266,6 +317,16 @@ typedef struct dmu_replay_record {
 			uint32_t drr_psize; /* compr. (real) size of payload */
 			/* (possibly compressed) content follows */
 		} drr_write_embedded;
+		struct drr_object_range {
+			uint64_t drr_firstobj;
+			uint64_t drr_numslots;
+			uint64_t drr_toguid;
+			uint8_t drr_salt[ZIO_DATA_SALT_LEN];
+			uint8_t drr_iv[ZIO_DATA_IV_LEN];
+			uint8_t drr_mac[ZIO_DATA_MAC_LEN];
+			uint8_t drr_flags;
+			uint8_t drr_pad[3];
+		} drr_object_range;
 
 		/*
 		 * Nore: drr_checksum is overlaid with all record types
@@ -335,6 +396,7 @@ typedef enum zinject_type {
 	ZINJECT_IGNORED_WRITES,
 	ZINJECT_PANIC,
 	ZINJECT_DELAY_IO,
+	ZINJECT_DECRYPT_FAULT,
 } zinject_type_t;
 
 typedef struct zfs_share {
diff --git a/usr/src/uts/common/fs/zfs/sys/zil.h b/usr/src/uts/common/fs/zfs/sys/zil.h
index e6b18da95b..2e44ff2b14 100644
--- a/usr/src/uts/common/fs/zfs/sys/zil.h
+++ b/usr/src/uts/common/fs/zfs/sys/zil.h
@@ -33,6 +33,7 @@
 #include <sys/spa.h>
 #include <sys/zio.h>
 #include <sys/dmu.h>
+#include <sys/zio_crypt.h>
 
 #ifdef	__cplusplus
 extern "C" {
@@ -407,7 +408,8 @@ typedef int zil_get_data_t(void *arg, lr_write_t *lr, char *dbuf,
     struct lwb *lwb, zio_t *zio);
 
 extern int zil_parse(zilog_t *zilog, zil_parse_blk_func_t *parse_blk_func,
-    zil_parse_lr_func_t *parse_lr_func, void *arg, uint64_t txg);
+    zil_parse_lr_func_t *parse_lr_func, void *arg, uint64_t txg,
+    boolean_t decrypt);
 
 extern void	zil_init(void);
 extern void	zil_fini(void);
diff --git a/usr/src/uts/common/fs/zfs/sys/zio.h b/usr/src/uts/common/fs/zfs/sys/zio.h
index 00d2ebbebb..ec4ec29d5a 100644
--- a/usr/src/uts/common/fs/zfs/sys/zio.h
+++ b/usr/src/uts/common/fs/zfs/sys/zio.h
@@ -104,6 +104,15 @@ enum zio_checksum {
 #define	ZIO_DEDUPCHECKSUM	ZIO_CHECKSUM_SHA256
 #define	ZIO_DEDUPDITTO_MIN	100
 
+#define	ZIO_CRYPT_ON_VALUE	ZIO_CRYPT_AES_256_CCM
+#define	ZIO_CRYPT_DEFAULT	ZIO_CRYPT_OFF
+
+/* macros defining encryption lengths */
+#define	ZIO_OBJSET_MAC_LEN		32
+#define	ZIO_DATA_IV_LEN			12
+#define	ZIO_DATA_SALT_LEN		8
+#define	ZIO_DATA_MAC_LEN		16
+
 /*
  * The number of "legacy" compression functions which can be set on individual
  * objects.
@@ -191,16 +200,18 @@ enum zio_flag {
 	ZIO_FLAG_DONT_PROPAGATE	= 1 << 20,
 	ZIO_FLAG_IO_BYPASS	= 1 << 21,
 	ZIO_FLAG_IO_REWRITE	= 1 << 22,
-	ZIO_FLAG_RAW		= 1 << 23,
-	ZIO_FLAG_GANG_CHILD	= 1 << 24,
-	ZIO_FLAG_DDT_CHILD	= 1 << 25,
-	ZIO_FLAG_GODFATHER	= 1 << 26,
-	ZIO_FLAG_NOPWRITE	= 1 << 27,
-	ZIO_FLAG_REEXECUTED	= 1 << 28,
-	ZIO_FLAG_DELEGATED	= 1 << 29,
+	ZIO_FLAG_RAW_COMPRESS	= 1 << 23,
+	ZIO_FLAG_RAW_ENCRYPT	= 1 << 24,
+	ZIO_FLAG_GANG_CHILD	= 1 << 25,
+	ZIO_FLAG_DDT_CHILD	= 1 << 26,
+	ZIO_FLAG_GODFATHER	= 1 << 27,
+	ZIO_FLAG_NOPWRITE	= 1 << 28,
+	ZIO_FLAG_REEXECUTED	= 1 << 29,
+	ZIO_FLAG_DELEGATED	= 1 << 30,
 };
 
 #define	ZIO_FLAG_MUSTSUCCEED		0
+#define	ZIO_FLAG_RAW	(ZIO_FLAG_RAW_COMPRESS | ZIO_FLAG_RAW_ENCRYPT)
 
 #define	ZIO_DDT_CHILD_FLAGS(zio)				\
 	(((zio)->io_flags & ZIO_FLAG_DDT_INHERIT) |		\
@@ -314,12 +325,17 @@ typedef struct zio_prop {
 	boolean_t		zp_dedup_verify;
 	boolean_t		zp_nopwrite;
 	uint32_t		zp_zpl_smallblk;
+	boolean_t		zp_encrypt;
+	boolean_t		zp_byteorder;
+	uint8_t			zp_salt[ZIO_DATA_SALT_LEN];
+	uint8_t			zp_iv[ZIO_DATA_IV_LEN];
+	uint8_t			zp_mac[ZIO_DATA_MAC_LEN];
 } zio_prop_t;
 
 typedef struct zio_cksum_report zio_cksum_report_t;
 
 typedef void zio_cksum_finish_f(zio_cksum_report_t *rep,
-    const void *good_data);
+    const abd_t *good_data);
 typedef void zio_cksum_free_f(void *cbdata, size_t size);
 
 struct zio_bad_cksum;				/* defined in zio_checksum.h */
@@ -524,8 +540,9 @@ extern zio_t *zio_write_phys(zio_t *pio, vdev_t *vd, uint64_t offset,
 extern zio_t *zio_free_sync(zio_t *pio, spa_t *spa, uint64_t txg,
     const blkptr_t *bp, enum zio_flag flags);
 
-extern int zio_alloc_zil(spa_t *spa, uint64_t objset, uint64_t txg,
+extern int zio_alloc_zil(spa_t *spa, objset_t *os, uint64_t txg,
     blkptr_t *new_bp, blkptr_t *old_bp, uint64_t size, boolean_t *slog);
+extern void zio_free_zil(spa_t *spa, uint64_t txg, blkptr_t *bp);
 extern void zio_flush(zio_t *zio, vdev_t *vd);
 extern void zio_shrink(zio_t *zio, uint64_t size);
 
@@ -598,6 +615,8 @@ extern int zio_inject_list_next(int *id, char *name, size_t buflen,
     struct zinject_record *record);
 extern int zio_clear_fault(int id);
 extern void zio_handle_panic_injection(spa_t *spa, char *tag, uint64_t type);
+extern int zio_handle_decrypt_injection(spa_t *spa, const zbookmark_phys_t *zb,
+    uint64_t type, int error);
 extern int zio_handle_fault_injection(zio_t *zio, int error);
 extern int zio_handle_device_injection(vdev_t *vd, zio_t *zio, int error);
 extern int zio_handle_label_injection(zio_t *zio, int error);
@@ -607,18 +626,20 @@ extern hrtime_t zio_handle_io_delay(zio_t *zio);
 /*
  * Checksum ereport functions
  */
-extern void zfs_ereport_start_checksum(spa_t *spa, vdev_t *vd, struct zio *zio,
-    uint64_t offset, uint64_t length, void *arg, struct zio_bad_cksum *info);
+extern void zfs_ereport_start_checksum(spa_t *spa, vdev_t *vd,
+    const zbookmark_phys_t *zb, struct zio *zio, uint64_t offset,
+    uint64_t length, void *arg, struct zio_bad_cksum *info);
 extern void zfs_ereport_finish_checksum(zio_cksum_report_t *report,
-    const void *good_data, const void *bad_data, boolean_t drop_if_identical);
+    const abd_t *good_data, const abd_t *bad_data, boolean_t drop_if_identical);
 
 extern void zfs_ereport_send_interim_checksum(zio_cksum_report_t *report);
 extern void zfs_ereport_free_checksum(zio_cksum_report_t *report);
 
 /* If we have the good data in hand, this function can be used */
 extern void zfs_ereport_post_checksum(spa_t *spa, vdev_t *vd,
-    struct zio *zio, uint64_t offset, uint64_t length,
-    const void *good_data, const void *bad_data, struct zio_bad_cksum *info);
+    const zbookmark_phys_t *zb, struct zio *zio, uint64_t offset,
+    uint64_t length, const abd_t *good_data, const abd_t *bad_data,
+    struct zio_bad_cksum *info);
 
 /* Called from spa_sync(), but primarily an injection handler */
 extern void spa_handle_ignored_writes(spa_t *spa);
diff --git a/usr/src/uts/common/fs/zfs/sys/zio_checksum.h b/usr/src/uts/common/fs/zfs/sys/zio_checksum.h
index 3eda057eae..6119163af8 100644
--- a/usr/src/uts/common/fs/zfs/sys/zio_checksum.h
+++ b/usr/src/uts/common/fs/zfs/sys/zio_checksum.h
@@ -54,7 +54,7 @@ typedef enum zio_checksum_flags {
 	/* Uses salt value */
 	ZCHECKSUM_FLAG_SALTED = (1 << 4),
 	/* Strong enough for nopwrite? */
-	ZCHECKSUM_FLAG_NOPWRITE = (1 << 5)
+	ZCHECKSUM_FLAG_NOPWRITE = (1 << 5),
 } zio_checksum_flags_t;
 
 /*
@@ -103,7 +103,7 @@ extern int zio_checksum_equal(spa_t *, blkptr_t *, enum zio_checksum,
     void *, uint64_t, uint64_t, zio_bad_cksum_t *);
 extern void zio_checksum_compute(zio_t *, enum zio_checksum,
     struct abd *, uint64_t);
-extern int zio_checksum_error_impl(spa_t *, blkptr_t *, enum zio_checksum,
+extern int zio_checksum_error_impl(spa_t *, const blkptr_t *, enum zio_checksum,
     struct abd *, uint64_t, uint64_t, zio_bad_cksum_t *);
 extern int zio_checksum_error(zio_t *zio, zio_bad_cksum_t *out);
 extern enum zio_checksum spa_dedup_checksum(spa_t *spa);
diff --git a/usr/src/uts/common/fs/zfs/sys/zio_crypt.h b/usr/src/uts/common/fs/zfs/sys/zio_crypt.h
new file mode 100644
index 0000000000..6163f97458
--- /dev/null
+++ b/usr/src/uts/common/fs/zfs/sys/zio_crypt.h
@@ -0,0 +1,152 @@
+/*
+ * CDDL HEADER START
+ *
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2017, Datto, Inc. All rights reserved.
+ */
+
+#ifndef	_SYS_ZIO_CRYPT_H
+#define	_SYS_ZIO_CRYPT_H
+
+#include <sys/dmu.h>
+#include <sys/refcount.h>
+#include <sys/crypto/api.h>
+#include <sys/nvpair.h>
+#include <sys/avl.h>
+#include <sys/zio.h>
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+/* forward declarations */
+struct zbookmark_phys;
+
+#define	WRAPPING_KEY_LEN	32
+#define	WRAPPING_IV_LEN		ZIO_DATA_IV_LEN
+#define	WRAPPING_MAC_LEN	ZIO_DATA_MAC_LEN
+#define	MASTER_KEY_MAX_LEN	32
+#define	SHA512_HMAC_KEYLEN	64
+
+#define	ZIO_CRYPT_KEY_CURRENT_VERSION	1ULL
+
+typedef enum zio_crypt_type {
+	ZC_TYPE_NONE = 0,
+	ZC_TYPE_CCM,
+	ZC_TYPE_GCM
+} zio_crypt_type_t;
+
+/* table of supported crypto algorithms, modes and keylengths. */
+typedef struct zio_crypt_info {
+	/* mechanism name, needed by ICP */
+	crypto_mech_name_t ci_mechname;
+
+	/* cipher mode type (GCM, CCM) */
+	zio_crypt_type_t ci_crypt_type;
+
+	/* length of the encryption key */
+	size_t ci_keylen;
+
+	/* human-readable name of the encryption alforithm */
+	char *ci_name;
+} zio_crypt_info_t;
+
+extern zio_crypt_info_t zio_crypt_table[ZIO_CRYPT_FUNCTIONS];
+
+/* in memory representation of an unwrapped key that is loaded into memory */
+typedef struct zio_crypt_key {
+	/* encryption algorithm */
+	uint64_t zk_crypt;
+
+	/* on-disk format version */
+	uint64_t zk_version;
+
+	/* GUID for uniquely identifying this key. Not encrypted on disk. */
+	uint64_t zk_guid;
+
+	/* buffer for master key */
+	uint8_t zk_master_keydata[MASTER_KEY_MAX_LEN];
+
+	/* buffer for hmac key */
+	uint8_t zk_hmac_keydata[SHA512_HMAC_KEYLEN];
+
+	/* buffer for currrent encryption key derived from master key */
+	uint8_t zk_current_keydata[MASTER_KEY_MAX_LEN];
+
+	/* current 64 bit salt for deriving an encryption key */
+	uint8_t zk_salt[ZIO_DATA_SALT_LEN];
+
+	/* count of how many times the current salt has been used */
+	uint64_t zk_salt_count;
+
+	/* illumos crypto api current encryption key */
+	crypto_key_t zk_current_key;
+
+	/* template of current encryption key for illumos crypto api */
+	crypto_ctx_template_t zk_current_tmpl;
+
+	/* illumos crypto api current hmac key */
+	crypto_key_t zk_hmac_key;
+
+	/* template of hmac key for illumos crypto api */
+	crypto_ctx_template_t zk_hmac_tmpl;
+
+	/* lock for changing the salt and dependant values */
+	krwlock_t zk_salt_lock;
+} zio_crypt_key_t;
+
+void zio_crypt_key_destroy(zio_crypt_key_t *key);
+int zio_crypt_key_init(uint64_t crypt, zio_crypt_key_t *key);
+int zio_crypt_key_get_salt(zio_crypt_key_t *key, uint8_t *salt_out);
+
+int zio_crypt_key_wrap(crypto_key_t *cwkey, zio_crypt_key_t *key, uint8_t *iv,
+    uint8_t *mac, uint8_t *keydata_out, uint8_t *hmac_keydata_out);
+int zio_crypt_key_unwrap(crypto_key_t *cwkey, uint64_t crypt, uint64_t version,
+    uint64_t guid, uint8_t *keydata, uint8_t *hmac_keydata, uint8_t *iv,
+    uint8_t *mac, zio_crypt_key_t *key);
+int zio_crypt_generate_iv(uint8_t *ivbuf);
+int zio_crypt_generate_iv_salt_dedup(zio_crypt_key_t *key, uint8_t *data,
+    uint_t datalen, uint8_t *ivbuf, uint8_t *salt);
+
+void zio_crypt_encode_params_bp(blkptr_t *bp, uint8_t *salt, uint8_t *iv);
+void zio_crypt_decode_params_bp(const blkptr_t *bp, uint8_t *salt, uint8_t *iv);
+void zio_crypt_encode_mac_bp(blkptr_t *bp, uint8_t *mac);
+void zio_crypt_decode_mac_bp(const blkptr_t *bp, uint8_t *mac);
+void zio_crypt_encode_mac_zil(void *data, uint8_t *mac);
+void zio_crypt_decode_mac_zil(const void *data, uint8_t *mac);
+void zio_crypt_copy_dnode_bonus(abd_t *src_abd, uint8_t *dst, uint_t datalen);
+
+int zio_crypt_do_indirect_mac_checksum(boolean_t generate, void *buf,
+    uint_t datalen, boolean_t byteswap, uint8_t *cksum);
+int zio_crypt_do_indirect_mac_checksum_abd(boolean_t generate, abd_t *abd,
+    uint_t datalen, boolean_t byteswap, uint8_t *cksum);
+int zio_crypt_do_hmac(zio_crypt_key_t *key, uint8_t *data, uint_t datalen,
+    uint8_t *digestbuf, uint_t digestlen);
+int zio_crypt_do_objset_hmacs(zio_crypt_key_t *key, void *data, uint_t datalen,
+    boolean_t byteswap, uint8_t *portable_mac, uint8_t *local_mac);
+int zio_do_crypt_data(boolean_t encrypt, zio_crypt_key_t *key,
+    dmu_object_type_t ot, boolean_t byteswap, uint8_t *salt, uint8_t *iv,
+    uint8_t *mac, uint_t datalen, uint8_t *plainbuf, uint8_t *cipherbuf,
+    boolean_t *no_crypt);
+int zio_do_crypt_abd(boolean_t encrypt, zio_crypt_key_t *key,
+    dmu_object_type_t ot, boolean_t byteswap, uint8_t *salt, uint8_t *iv,
+    uint8_t *mac, uint_t datalen, abd_t *pabd, abd_t *cabd,
+    boolean_t *no_crypt);
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif /* _SYS_ZIO_CRYPT_H */
diff --git a/usr/src/uts/common/fs/zfs/sys/zio_impl.h b/usr/src/uts/common/fs/zfs/sys/zio_impl.h
index a36749a308..703522b67d 100644
--- a/usr/src/uts/common/fs/zfs/sys/zio_impl.h
+++ b/usr/src/uts/common/fs/zfs/sys/zio_impl.h
@@ -99,6 +99,18 @@ extern "C" {
  * physical I/O.  The nop write feature can handle writes in either
  * syncing or open context (i.e. zil writes) and as a result is mutually
  * exclusive with dedup.
+ *
+ * Encryption:
+ * Encryption and authentication is handled by the ZIO_STAGE_ENCRYPT stage.
+ * This stage determines how the encryption metadata is stored in the bp.
+ * Decryption and MAC verification is performed during zio_decrypt() as a
+ * transform callback. Encryption is mutually exclusive with nopwrite, because
+ * blocks with the same plaintext will be encrypted with different salts and
+ * IV's (if dedup is off), and therefore have different ciphertexts. For dedup
+ * blocks we deterministically generate the IV and salt by performing an HMAC
+ * of the plaintext, which is computationally expensive, but allows us to keep
+ * support for encrypted dedup. See the block comment in zio_crypt.c for
+ * details.
  */
 
 /*
@@ -113,32 +125,33 @@ enum zio_stage {
 	ZIO_STAGE_ISSUE_ASYNC		= 1 << 4,	/* RWF-- */
 	ZIO_STAGE_WRITE_COMPRESS	= 1 << 5,	/* -W--- */
 
-	ZIO_STAGE_CHECKSUM_GENERATE	= 1 << 6,	/* -W--- */
+	ZIO_STAGE_ENCRYPT		= 1 << 6,	/* -W--- */
+	ZIO_STAGE_CHECKSUM_GENERATE	= 1 << 7,	/* -W--- */
 
-	ZIO_STAGE_NOP_WRITE		= 1 << 7,	/* -W--- */
+	ZIO_STAGE_NOP_WRITE		= 1 << 8,	/* -W--- */
 
-	ZIO_STAGE_DDT_READ_START	= 1 << 8,	/* R---- */
-	ZIO_STAGE_DDT_READ_DONE		= 1 << 9,	/* R---- */
-	ZIO_STAGE_DDT_WRITE		= 1 << 10,	/* -W--- */
-	ZIO_STAGE_DDT_FREE		= 1 << 11,	/* --F-- */
+	ZIO_STAGE_DDT_READ_START	= 1 << 9,	/* R---- */
+	ZIO_STAGE_DDT_READ_DONE		= 1 << 10,	/* R---- */
+	ZIO_STAGE_DDT_WRITE		= 1 << 11,	/* -W--- */
+	ZIO_STAGE_DDT_FREE		= 1 << 12,	/* --F-- */
 
-	ZIO_STAGE_GANG_ASSEMBLE		= 1 << 12,	/* RWFC- */
-	ZIO_STAGE_GANG_ISSUE		= 1 << 13,	/* RWFC- */
+	ZIO_STAGE_GANG_ASSEMBLE		= 1 << 13,	/* RWFC- */
+	ZIO_STAGE_GANG_ISSUE		= 1 << 14,	/* RWFC- */
 
-	ZIO_STAGE_DVA_THROTTLE		= 1 << 14,	/* -W--- */
-	ZIO_STAGE_DVA_ALLOCATE		= 1 << 15,	/* -W--- */
-	ZIO_STAGE_DVA_FREE		= 1 << 16,	/* --F-- */
-	ZIO_STAGE_DVA_CLAIM		= 1 << 17,	/* ---C- */
+	ZIO_STAGE_DVA_THROTTLE		= 1 << 15,	/* -W--- */
+	ZIO_STAGE_DVA_ALLOCATE		= 1 << 16,	/* -W--- */
+	ZIO_STAGE_DVA_FREE		= 1 << 17,	/* --F-- */
+	ZIO_STAGE_DVA_CLAIM		= 1 << 18,	/* ---C- */
 
-	ZIO_STAGE_READY			= 1 << 18,	/* RWFCI */
+	ZIO_STAGE_READY			= 1 << 19,	/* RWFCI */
 
-	ZIO_STAGE_VDEV_IO_START		= 1 << 19,	/* RW--I */
-	ZIO_STAGE_VDEV_IO_DONE		= 1 << 20,	/* RW--I */
-	ZIO_STAGE_VDEV_IO_ASSESS	= 1 << 21,	/* RW--I */
+	ZIO_STAGE_VDEV_IO_START		= 1 << 20,	/* RW--I */
+	ZIO_STAGE_VDEV_IO_DONE		= 1 << 21,	/* RW--I */
+	ZIO_STAGE_VDEV_IO_ASSESS	= 1 << 22,	/* RW--I */
 
-	ZIO_STAGE_CHECKSUM_VERIFY	= 1 << 22,	/* R---- */
+	ZIO_STAGE_CHECKSUM_VERIFY	= 1 << 23,	/* R---- */
 
-	ZIO_STAGE_DONE			= 1 << 23	/* RWFCI */
+	ZIO_STAGE_DONE			= 1 << 24	/* RWFCI */
 };
 
 #define	ZIO_INTERLOCK_STAGES			\
@@ -190,12 +203,14 @@ enum zio_stage {
 #define	ZIO_REWRITE_PIPELINE			\
 	(ZIO_WRITE_COMMON_STAGES |		\
 	ZIO_STAGE_WRITE_COMPRESS |		\
+	ZIO_STAGE_ENCRYPT |			\
 	ZIO_STAGE_WRITE_BP_INIT)
 
 #define	ZIO_WRITE_PIPELINE			\
 	(ZIO_WRITE_COMMON_STAGES |		\
 	ZIO_STAGE_WRITE_BP_INIT |		\
 	ZIO_STAGE_WRITE_COMPRESS |		\
+	ZIO_STAGE_ENCRYPT |			\
 	ZIO_STAGE_DVA_THROTTLE |		\
 	ZIO_STAGE_DVA_ALLOCATE)
 
@@ -210,6 +225,7 @@ enum zio_stage {
 	ZIO_STAGE_WRITE_BP_INIT |		\
 	ZIO_STAGE_ISSUE_ASYNC |			\
 	ZIO_STAGE_WRITE_COMPRESS |		\
+	ZIO_STAGE_ENCRYPT |			\
 	ZIO_STAGE_CHECKSUM_GENERATE |		\
 	ZIO_STAGE_DDT_WRITE)
 
diff --git a/usr/src/uts/common/fs/zfs/vdev.c b/usr/src/uts/common/fs/zfs/vdev.c
index c9f1212168..73b7c8e2fc 100644
--- a/usr/src/uts/common/fs/zfs/vdev.c
+++ b/usr/src/uts/common/fs/zfs/vdev.c
@@ -1331,7 +1331,7 @@ vdev_probe_done(zio_t *zio)
 			ASSERT(zio->io_error != 0);
 			vdev_dbgmsg(vd, "failed probe");
 			zfs_ereport_post(FM_EREPORT_ZFS_PROBE_FAILURE,
-			    spa, vd, NULL, 0, 0);
+			    spa, vd, NULL, NULL, 0, 0);
 			zio->io_error = SET_ERROR(ENXIO);
 		}
 
@@ -4189,7 +4189,8 @@ vdev_set_state(vdev_t *vd, boolean_t isopen, vdev_state_t state, vdev_aux_t aux)
 				class = FM_EREPORT_ZFS_DEVICE_UNKNOWN;
 			}
 
-			zfs_ereport_post(class, spa, vd, NULL, save_state, 0);
+			zfs_ereport_post(class, spa, vd, NULL, NULL,
+			    save_state, 0);
 		}
 
 		/* Erase any notion of persistent removed state */
diff --git a/usr/src/uts/common/fs/zfs/vdev_indirect.c b/usr/src/uts/common/fs/zfs/vdev_indirect.c
index 062c4073a8..9626589444 100644
--- a/usr/src/uts/common/fs/zfs/vdev_indirect.c
+++ b/usr/src/uts/common/fs/zfs/vdev_indirect.c
@@ -1381,7 +1381,7 @@ vdev_indirect_checksum_error(zio_t *zio,
 	void *bad_buf = abd_borrow_buf_copy(ic->ic_data, is->is_size);
 	abd_t *good_abd = is->is_good_child->ic_data;
 	void *good_buf = abd_borrow_buf_copy(good_abd, is->is_size);
-	zfs_ereport_post_checksum(zio->io_spa, vd, zio,
+	zfs_ereport_post_checksum(zio->io_spa, vd, &zio->io_bookmark, zio,
 	    is->is_target_offset, is->is_size, good_buf, bad_buf, &zbc);
 	abd_return_buf(ic->ic_data, bad_buf, is->is_size);
 	abd_return_buf(good_abd, good_buf, is->is_size);
@@ -1458,9 +1458,9 @@ vdev_indirect_all_checksum_errors(zio_t *zio)
 			vd->vdev_stat.vs_checksum_errors++;
 			mutex_exit(&vd->vdev_stat_lock);
 
-			zfs_ereport_post_checksum(zio->io_spa, vd, zio,
-			    is->is_target_offset, is->is_size,
-			    NULL, NULL, NULL);
+			zfs_ereport_post_checksum(zio->io_spa, vd,
+			    &zio->io_bookmark, zio, is->is_target_offset,
+			    is->is_size, NULL, NULL, NULL);
 		}
 	}
 }
diff --git a/usr/src/uts/common/fs/zfs/vdev_raidz.c b/usr/src/uts/common/fs/zfs/vdev_raidz.c
index 6502353542..0dcbb863e3 100644
--- a/usr/src/uts/common/fs/zfs/vdev_raidz.c
+++ b/usr/src/uts/common/fs/zfs/vdev_raidz.c
@@ -267,21 +267,17 @@ static void
 vdev_raidz_map_free(raidz_map_t *rm)
 {
 	int c;
-	size_t size;
 
 	for (c = 0; c < rm->rm_firstdatacol; c++) {
 		abd_free(rm->rm_col[c].rc_abd);
 
 		if (rm->rm_col[c].rc_gdata != NULL)
-			zio_buf_free(rm->rm_col[c].rc_gdata,
-			    rm->rm_col[c].rc_size);
+			abd_free(rm->rm_col[c].rc_gdata);
+
 	}
 
-	size = 0;
-	for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++) {
+	for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++)
 		abd_put(rm->rm_col[c].rc_abd);
-		size += rm->rm_col[c].rc_size;
-	}
 
 	if (rm->rm_abd_copy != NULL)
 		abd_free(rm->rm_abd_copy);
@@ -314,14 +310,14 @@ vdev_raidz_cksum_free(void *arg, size_t ignored)
 }
 
 static void
-vdev_raidz_cksum_finish(zio_cksum_report_t *zcr, const void *good_data)
+vdev_raidz_cksum_finish(zio_cksum_report_t *zcr, const abd_t *good_data)
 {
 	raidz_map_t *rm = zcr->zcr_cbdata;
 	size_t c = zcr->zcr_cbinfo;
-	size_t x;
+	size_t x, offset;
 
-	const char *good = NULL;
-	char *bad;
+	const abd_t *good = NULL;
+	const abd_t *bad = rm->rm_col[c].rc_abd;
 
 	if (good_data == NULL) {
 		zfs_ereport_finish_checksum(zcr, NULL, NULL, B_FALSE);
@@ -336,8 +332,6 @@ vdev_raidz_cksum_finish(zio_cksum_report_t *zcr, const void *good_data)
 		 */
 		if (rm->rm_col[0].rc_gdata == NULL) {
 			abd_t *bad_parity[VDEV_RAIDZ_MAXPARITY];
-			char *buf;
-			int offset;
 
 			/*
 			 * Set up the rm_col[]s to generate the parity for
@@ -346,20 +340,21 @@ vdev_raidz_cksum_finish(zio_cksum_report_t *zcr, const void *good_data)
 			 */
 			for (x = 0; x < rm->rm_firstdatacol; x++) {
 				bad_parity[x] = rm->rm_col[x].rc_abd;
-				rm->rm_col[x].rc_gdata =
-				    zio_buf_alloc(rm->rm_col[x].rc_size);
 				rm->rm_col[x].rc_abd =
-				    abd_get_from_buf(rm->rm_col[x].rc_gdata,
+				    rm->rm_col[x].rc_gdata =
+				    abd_alloc_sametype(rm->rm_col[x].rc_abd,
 				    rm->rm_col[x].rc_size);
 			}
 
 			/* fill in the data columns from good_data */
-			buf = (char *)good_data;
+			offset = 0;
 			for (; x < rm->rm_cols; x++) {
 				abd_put(rm->rm_col[x].rc_abd);
-				rm->rm_col[x].rc_abd = abd_get_from_buf(buf,
-				    rm->rm_col[x].rc_size);
-				buf += rm->rm_col[x].rc_size;
+
+				rm->rm_col[x].rc_abd =
+				    abd_get_offset_size((abd_t *)good_data,
+				    offset, rm->rm_col[x].rc_size);
+				offset += rm->rm_col[x].rc_size;
 			}
 
 			/*
@@ -368,34 +363,35 @@ vdev_raidz_cksum_finish(zio_cksum_report_t *zcr, const void *good_data)
 			vdev_raidz_generate_parity(rm);
 
 			/* restore everything back to its original state */
-			for (x = 0; x < rm->rm_firstdatacol; x++) {
-				abd_put(rm->rm_col[x].rc_abd);
+			for (x = 0; x < rm->rm_firstdatacol; x++)
 				rm->rm_col[x].rc_abd = bad_parity[x];
-			}
 
 			offset = 0;
 			for (x = rm->rm_firstdatacol; x < rm->rm_cols; x++) {
 				abd_put(rm->rm_col[x].rc_abd);
-				rm->rm_col[x].rc_abd = abd_get_offset(
-				    rm->rm_abd_copy, offset);
+				rm->rm_col[x].rc_abd = abd_get_offset_size(
+				    rm->rm_abd_copy, offset,
+				    rm->rm_col[x].rc_size);
 				offset += rm->rm_col[x].rc_size;
 			}
 		}
 
 		ASSERT3P(rm->rm_col[c].rc_gdata, !=, NULL);
-		good = rm->rm_col[c].rc_gdata;
+		good = abd_get_offset_size(rm->rm_col[c].rc_gdata, 0,
+		    rm->rm_col[c].rc_size);
 	} else {
 		/* adjust good_data to point at the start of our column */
-		good = good_data;
-
+		offset = 0;
 		for (x = rm->rm_firstdatacol; x < c; x++)
-			good += rm->rm_col[x].rc_size;
+			offset += rm->rm_col[x].rc_size;
+
+		good = abd_get_offset_size((abd_t *)good_data, offset,
+		    rm->rm_col[c].rc_size);
 	}
 
-	bad = abd_borrow_buf_copy(rm->rm_col[c].rc_abd, rm->rm_col[c].rc_size);
 	/* we drop the ereport if it ends up that the data was good */
 	zfs_ereport_finish_checksum(zcr, good, bad, B_TRUE);
-	abd_return_buf(rm->rm_col[c].rc_abd, bad, rm->rm_col[c].rc_size);
+	abd_put((abd_t *)good);
 }
 
 /*
@@ -438,14 +434,16 @@ vdev_raidz_cksum_report(zio_t *zio, zio_cksum_report_t *zcr, void *arg)
 	for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++)
 		size += rm->rm_col[c].rc_size;
 
-	rm->rm_abd_copy =
-	    abd_alloc_sametype(rm->rm_col[rm->rm_firstdatacol].rc_abd, size);
+	rm->rm_abd_copy = abd_alloc_for_io(size, B_FALSE);
 
 	for (offset = 0, c = rm->rm_firstdatacol; c < rm->rm_cols; c++) {
 		raidz_col_t *col = &rm->rm_col[c];
-		abd_t *tmp = abd_get_offset(rm->rm_abd_copy, offset);
+		abd_t *tmp = abd_get_offset_size(rm->rm_abd_copy, offset,
+		    col->rc_size);
 
-		abd_copy(tmp, col->rc_abd, col->rc_size);
+		ASSERT3S(tmp->abd_size, >=, col->rc_size);
+		ASSERT3S(col->rc_abd->abd_size, >=, col->rc_size);
+		abd_copy_off(tmp, col->rc_abd, 0, 0, col->rc_size);
 		abd_put(col->rc_abd);
 		col->rc_abd = tmp;
 
@@ -562,13 +560,15 @@ vdev_raidz_map_alloc(abd_t *abd, uint64_t size, uint64_t offset,
 
 	for (c = 0; c < rm->rm_firstdatacol; c++)
 		rm->rm_col[c].rc_abd =
-		    abd_alloc_linear(rm->rm_col[c].rc_size, B_TRUE);
+		    abd_alloc_linear(rm->rm_col[c].rc_size, B_FALSE);
 
-	rm->rm_col[c].rc_abd = abd_get_offset(abd, 0);
+	rm->rm_col[c].rc_abd = abd_get_offset_size(abd, 0,
+	    rm->rm_col[c].rc_size);
 	off = rm->rm_col[c].rc_size;
 
 	for (c = c + 1; c < acols; c++) {
-		rm->rm_col[c].rc_abd = abd_get_offset(abd, off);
+		rm->rm_col[c].rc_abd = abd_get_offset_size(abd, off,
+		    rm->rm_col[c].rc_size);
 		off += rm->rm_col[c].rc_size;
 	}
 
@@ -683,7 +683,8 @@ vdev_raidz_generate_parity_p(raidz_map_t *rm)
 		p = abd_to_buf(rm->rm_col[VDEV_RAIDZ_P].rc_abd);
 
 		if (c == rm->rm_firstdatacol) {
-			abd_copy_to_buf(p, src, rm->rm_col[c].rc_size);
+			ASSERT3U(src->abd_size, >=, rm->rm_col[c].rc_size);
+			abd_copy_to_buf_off(p, src, 0, rm->rm_col[c].rc_size);
 		} else {
 			struct pqr_struct pqr = { p, NULL, NULL };
 			(void) abd_iterate_func(src, 0, rm->rm_col[c].rc_size,
@@ -711,20 +712,22 @@ vdev_raidz_generate_parity_pq(raidz_map_t *rm)
 		ccnt = rm->rm_col[c].rc_size / sizeof (p[0]);
 
 		if (c == rm->rm_firstdatacol) {
-			abd_copy_to_buf(p, src, rm->rm_col[c].rc_size);
-			(void) memcpy(q, p, rm->rm_col[c].rc_size);
-		} else {
-			struct pqr_struct pqr = { p, q, NULL };
-			(void) abd_iterate_func(src, 0, rm->rm_col[c].rc_size,
-			    vdev_raidz_pq_func, &pqr);
-		}
+			ASSERT(ccnt == pcnt || ccnt == 0);
 
-		if (c == rm->rm_firstdatacol) {
+			abd_copy_to_buf_off(p, src, 0, rm->rm_col[c].rc_size);
+			(void) memcpy(q, p, rm->rm_col[c].rc_size);
 			for (i = ccnt; i < pcnt; i++) {
 				p[i] = 0;
 				q[i] = 0;
 			}
 		} else {
+			struct pqr_struct pqr = { p, q, NULL };
+
+			ASSERT(ccnt <= pcnt);
+
+			(void) abd_iterate_func(src, 0, rm->rm_col[c].rc_size,
+			    vdev_raidz_pq_func, &pqr);
+
 			/*
 			 * Treat short columns as though they are full of 0s.
 			 * Note that there's therefore nothing needed for P.
@@ -758,22 +761,24 @@ vdev_raidz_generate_parity_pqr(raidz_map_t *rm)
 		ccnt = rm->rm_col[c].rc_size / sizeof (p[0]);
 
 		if (c == rm->rm_firstdatacol) {
-			abd_copy_to_buf(p, src, rm->rm_col[c].rc_size);
+			ASSERT3S(src->abd_size, >=, rm->rm_col[c].rc_size);
+			ASSERT(ccnt == pcnt || ccnt == 0);
+			abd_copy_to_buf_off(p, src, 0, rm->rm_col[c].rc_size);
 			(void) memcpy(q, p, rm->rm_col[c].rc_size);
 			(void) memcpy(r, p, rm->rm_col[c].rc_size);
-		} else {
-			struct pqr_struct pqr = { p, q, r };
-			(void) abd_iterate_func(src, 0, rm->rm_col[c].rc_size,
-			    vdev_raidz_pqr_func, &pqr);
-		}
 
-		if (c == rm->rm_firstdatacol) {
 			for (i = ccnt; i < pcnt; i++) {
 				p[i] = 0;
 				q[i] = 0;
 				r[i] = 0;
 			}
 		} else {
+			struct pqr_struct pqr = { p, q, r };
+
+			ASSERT(ccnt <= pcnt);
+			(void) abd_iterate_func(src, 0, rm->rm_col[c].rc_size,
+			    vdev_raidz_pqr_func, &pqr);
+
 			/*
 			 * Treat short columns as though they are full of 0s.
 			 * Note that there's therefore nothing needed for P.
@@ -941,7 +946,9 @@ vdev_raidz_reconstruct_p(raidz_map_t *rm, int *tgts, int ntgts)
 	src = rm->rm_col[VDEV_RAIDZ_P].rc_abd;
 	dst = rm->rm_col[x].rc_abd;
 
-	abd_copy(dst, src, rm->rm_col[x].rc_size);
+	ASSERT3S(dst->abd_size, >=, rm->rm_col[x].rc_size);
+	ASSERT3S(src->abd_size, >=, rm->rm_col[x].rc_size);
+	abd_copy_off(dst, src, 0, 0, rm->rm_col[x].rc_size);
 
 	for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++) {
 		uint64_t size = MIN(rm->rm_col[x].rc_size,
@@ -979,14 +986,19 @@ vdev_raidz_reconstruct_q(raidz_map_t *rm, int *tgts, int ntgts)
 		dst = rm->rm_col[x].rc_abd;
 
 		if (c == rm->rm_firstdatacol) {
-			abd_copy(dst, src, size);
+			if (dst != src) {
+				ASSERT3S(dst->abd_size, >=, size);
+				ASSERT3S(src->abd_size, >=, size);
+				abd_copy_off(dst, src, 0, 0, size);
+			}
 			if (rm->rm_col[x].rc_size > size)
 				abd_zero_off(dst, size,
 				    rm->rm_col[x].rc_size - size);
 		} else {
 			ASSERT3U(size, <=, rm->rm_col[x].rc_size);
-			(void) abd_iterate_func2(dst, src, 0, 0, size,
-			    vdev_raidz_reconst_q_pre_func, NULL);
+			if (src != dst)
+				(void) abd_iterate_func2(dst, src, 0, 0, size,
+				    vdev_raidz_reconst_q_pre_func, NULL);
 			(void) abd_iterate_func(dst,
 			    size, rm->rm_col[x].rc_size - size,
 			    vdev_raidz_reconst_q_pre_tail_func, NULL);
@@ -1475,7 +1487,9 @@ vdev_raidz_reconstruct_general(raidz_map_t *rm, int *tgts, int ntgts)
 
 			bufs[c] = col->rc_abd;
 			col->rc_abd = abd_alloc_linear(col->rc_size, B_TRUE);
-			abd_copy(col->rc_abd, bufs[c], col->rc_size);
+			ASSERT3S(col->rc_abd->abd_size, >=, col->rc_size);
+			ASSERT3S(bufs[c]->abd_size, >=, col->rc_size);
+			abd_copy_off(col->rc_abd, bufs[c], 0, 0, col->rc_size);
 		}
 	}
 
@@ -1571,7 +1585,9 @@ vdev_raidz_reconstruct_general(raidz_map_t *rm, int *tgts, int ntgts)
 		for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++) {
 			raidz_col_t *col = &rm->rm_col[c];
 
-			abd_copy(bufs[c], col->rc_abd, col->rc_size);
+			ASSERT3S(bufs[c]->abd_size, >=, col->rc_size);
+			ASSERT3S(col->rc_abd->abd_size, >=, col->rc_size);
+			abd_copy_off(bufs[c], col->rc_abd, 0, 0, col->rc_size);
 			abd_free(col->rc_abd);
 			col->rc_abd = bufs[c];
 		}
@@ -2041,9 +2057,8 @@ vdev_raidz_io_start(zio_t *zio)
  * Report a checksum error for a child of a RAID-Z device.
  */
 static void
-raidz_checksum_error(zio_t *zio, raidz_col_t *rc, void *bad_data)
+raidz_checksum_error(zio_t *zio, raidz_col_t *rc, abd_t *bad_data)
 {
-	void *buf;
 	vdev_t *vd = zio->io_vd->vdev_child[rc->rc_devidx];
 
 	if (!(zio->io_flags & ZIO_FLAG_SPECULATIVE)) {
@@ -2057,11 +2072,9 @@ raidz_checksum_error(zio_t *zio, raidz_col_t *rc, void *bad_data)
 		zbc.zbc_has_cksum = 0;
 		zbc.zbc_injected = rm->rm_ecksuminjected;
 
-		buf = abd_borrow_buf_copy(rc->rc_abd, rc->rc_size);
-		zfs_ereport_post_checksum(zio->io_spa, vd, zio,
-		    rc->rc_offset, rc->rc_size, buf, bad_data,
-		    &zbc);
-		abd_return_buf(rc->rc_abd, buf, rc->rc_size);
+		zfs_ereport_post_checksum(zio->io_spa, vd,
+		    &zio->io_bookmark, zio, rc->rc_offset, rc->rc_size,
+		    rc->rc_abd, bad_data, &zbc);
 	}
 }
 
@@ -2091,7 +2104,7 @@ raidz_checksum_verify(zio_t *zio)
 static int
 raidz_parity_verify(zio_t *zio, raidz_map_t *rm)
 {
-	void *orig[VDEV_RAIDZ_MAXPARITY];
+	abd_t *orig[VDEV_RAIDZ_MAXPARITY];
 	int c, ret = 0;
 	raidz_col_t *rc;
 
@@ -2106,8 +2119,8 @@ raidz_parity_verify(zio_t *zio, raidz_map_t *rm)
 		rc = &rm->rm_col[c];
 		if (!rc->rc_tried || rc->rc_error != 0)
 			continue;
-		orig[c] = zio_buf_alloc(rc->rc_size);
-		abd_copy_to_buf(orig[c], rc->rc_abd, rc->rc_size);
+		orig[c] = abd_alloc_sametype(rc->rc_abd, rc->rc_size);
+		abd_copy(orig[c], rc->rc_abd, rc->rc_size);
 	}
 
 	vdev_raidz_generate_parity(rm);
@@ -2116,12 +2129,12 @@ raidz_parity_verify(zio_t *zio, raidz_map_t *rm)
 		rc = &rm->rm_col[c];
 		if (!rc->rc_tried || rc->rc_error != 0)
 			continue;
-		if (abd_cmp_buf(rc->rc_abd, orig[c], rc->rc_size) != 0) {
+		if (abd_cmp(orig[c], rc->rc_abd, rc->rc_abd->abd_size) != 0) {
 			raidz_checksum_error(zio, rc, orig[c]);
 			rc->rc_error = SET_ERROR(ECKSUM);
 			ret++;
 		}
-		zio_buf_free(orig[c], rc->rc_size);
+		abd_free(orig[c]);
 	}
 
 	return (ret);
@@ -2156,7 +2169,7 @@ vdev_raidz_combrec(zio_t *zio, int total_errors, int data_errors)
 {
 	raidz_map_t *rm = zio->io_vsd;
 	raidz_col_t *rc;
-	void *orig[VDEV_RAIDZ_MAXPARITY];
+	abd_t *orig[VDEV_RAIDZ_MAXPARITY];
 	int tstore[VDEV_RAIDZ_MAXPARITY + 2];
 	int *tgts = &tstore[1];
 	int current, next, i, c, n;
@@ -2205,7 +2218,8 @@ vdev_raidz_combrec(zio_t *zio, int total_errors, int data_errors)
 			ASSERT(orig[i] != NULL);
 		}
 
-		orig[n - 1] = zio_buf_alloc(rm->rm_col[0].rc_size);
+		orig[n - 1] = abd_alloc_sametype(rm->rm_col[0].rc_abd,
+		    rm->rm_col[0].rc_size);
 
 		current = 0;
 		next = tgts[current];
@@ -2224,7 +2238,9 @@ vdev_raidz_combrec(zio_t *zio, int total_errors, int data_errors)
 				ASSERT3S(c, >=, 0);
 				ASSERT3S(c, <, rm->rm_cols);
 				rc = &rm->rm_col[c];
-				abd_copy_to_buf(orig[i], rc->rc_abd,
+				ASSERT3S(orig[i]->abd_size, >=, rc->rc_size);
+				ASSERT3S(rc->rc_abd->abd_size, >=, rc->rc_size);
+				abd_copy_off(orig[i], rc->rc_abd, 0, 0,
 				    rc->rc_size);
 			}
 
@@ -2256,7 +2272,9 @@ vdev_raidz_combrec(zio_t *zio, int total_errors, int data_errors)
 			for (i = 0; i < n; i++) {
 				c = tgts[i];
 				rc = &rm->rm_col[c];
-				abd_copy_from_buf(rc->rc_abd, orig[i],
+				ASSERT3S(rc->rc_abd->abd_size, >=, rc->rc_size);
+				ASSERT3S(orig[i]->abd_size, >=, rc->rc_size);
+				abd_copy_off(rc->rc_abd, orig[i], 0, 0,
 				    rc->rc_size);
 			}
 
@@ -2294,9 +2312,8 @@ vdev_raidz_combrec(zio_t *zio, int total_errors, int data_errors)
 	}
 	n--;
 done:
-	for (i = 0; i < n; i++) {
-		zio_buf_free(orig[i], rm->rm_col[0].rc_size);
-	}
+	for (i = 0; i < n; i++)
+		abd_free(orig[i]);
 
 	return (ret);
 }
@@ -2555,7 +2572,8 @@ vdev_raidz_io_done(zio_t *zio)
 					zfs_ereport_start_checksum(
 					    zio->io_spa,
 					    vd->vdev_child[rc->rc_devidx],
-					    zio, rc->rc_offset, rc->rc_size,
+					    &zio->io_bookmark, zio,
+					    rc->rc_offset, rc->rc_size,
 					    (void *)(uintptr_t)c, &zbc);
 				}
 			}
diff --git a/usr/src/uts/common/fs/zfs/zap_micro.c b/usr/src/uts/common/fs/zfs/zap_micro.c
index 48b0be6665..dad227306b 100644
--- a/usr/src/uts/common/fs/zfs/zap_micro.c
+++ b/usr/src/uts/common/fs/zfs/zap_micro.c
@@ -670,7 +670,7 @@ mzap_create_impl(objset_t *os, uint64_t obj, int normflags, zap_flags_t flags,
 	dmu_buf_will_dirty(db, tx);
 	mzap_phys_t *zp = db->db_data;
 	zp->mz_block_type = ZBT_MICRO;
-	zp->mz_salt = ((uintptr_t)db ^ (uintptr_t)tx ^ (obj << 1)) | 1ULL;
+	(void) random_get_pseudo_bytes((void *)&zp->mz_salt, sizeof (uint64_t));
 	zp->mz_normflags = normflags;
 
 	if (flags != 0) {
diff --git a/usr/src/uts/common/fs/zfs/zcp_get.c b/usr/src/uts/common/fs/zfs/zcp_get.c
index 1478c288d8..80814aeae4 100644
--- a/usr/src/uts/common/fs/zfs/zcp_get.c
+++ b/usr/src/uts/common/fs/zfs/zcp_get.c
@@ -421,6 +421,15 @@ get_special_prop(lua_State *state, dsl_dataset_t *ds, const char *dsname,
 	case ZFS_PROP_INCONSISTENT:
 		numval = dsl_get_inconsistent(ds);
 		break;
+	case ZFS_PROP_IVSET_GUID:
+		if (dsl_dataset_is_zapified(ds)) {
+			error = zap_lookup(ds->ds_dir->dd_pool->dp_meta_objset,
+			    ds->ds_object, DS_FIELD_IVSET_GUID,
+			    sizeof (numval), 1, &numval);
+		} else {
+			error = ENOENT;
+		}
+		break;
 	case ZFS_PROP_RECEIVE_RESUME_TOKEN: {
 		char *token = get_receive_resume_stats_impl(ds);
 		VERIFY3U(strlcpy(strval, token, ZAP_MAXVALUELEN), <,
diff --git a/usr/src/uts/common/fs/zfs/zfeature.c b/usr/src/uts/common/fs/zfs/zfeature.c
index 35ce827979..da9077ee73 100644
--- a/usr/src/uts/common/fs/zfs/zfeature.c
+++ b/usr/src/uts/common/fs/zfs/zfeature.c
@@ -369,6 +369,19 @@ feature_enable_sync(spa_t *spa, zfeature_info_t *feature, dmu_tx_t *tx)
 		    spa->spa_feat_enabled_txg_obj, feature->fi_guid,
 		    sizeof (uint64_t), 1, &enabling_txg, tx));
 	}
+
+	/*
+	 * Errata #4 is mostly a problem with encrypted datasets, but it
+	 * is also a problem where the old encryption feature did not
+	 * depend on the bookmark_v2 feature. If the pool does not have
+	 * any encrypted datasets we can resolve this issue simply by
+	 * enabling this dependency.
+	 */
+	if (spa->spa_errata == ZPOOL_ERRATA_ZOL_8308_ENCRYPTION &&
+	    spa_feature_is_enabled(spa, SPA_FEATURE_ENCRYPTION) &&
+	    !spa_feature_is_active(spa, SPA_FEATURE_ENCRYPTION) &&
+	    feature->fi_feature == SPA_FEATURE_BOOKMARK_V2)
+		spa->spa_errata = 0;
 }
 
 static void
@@ -413,8 +426,8 @@ spa_feature_create_zap_objects(spa_t *spa, dmu_tx_t *tx)
 	 * We create feature flags ZAP objects in two instances: during pool
 	 * creation and during pool upgrade.
 	 */
-	ASSERT(dsl_pool_sync_context(spa_get_dsl(spa)) || (!spa->spa_sync_on &&
-	    tx->tx_txg == TXG_INITIAL));
+	ASSERT((!spa->spa_sync_on && tx->tx_txg == TXG_INITIAL) ||
+	    dsl_pool_sync_context(spa_get_dsl(spa)));
 
 	spa->spa_feat_for_read_obj = zap_create_link(spa->spa_meta_objset,
 	    DMU_OTN_ZAP_METADATA, DMU_POOL_DIRECTORY_OBJECT,
diff --git a/usr/src/uts/common/fs/zfs/zfs_fm.c b/usr/src/uts/common/fs/zfs/zfs_fm.c
index 398a3d04aa..07a7a9f70b 100644
--- a/usr/src/uts/common/fs/zfs/zfs_fm.c
+++ b/usr/src/uts/common/fs/zfs/zfs_fm.c
@@ -104,8 +104,8 @@
 #ifdef _KERNEL
 static void
 zfs_ereport_start(nvlist_t **ereport_out, nvlist_t **detector_out,
-    const char *subclass, spa_t *spa, vdev_t *vd, zio_t *zio,
-    uint64_t stateoroffset, uint64_t size)
+    const char *subclass, spa_t *spa, vdev_t *vd, const zbookmark_phys_t *zb,
+    zio_t *zio, uint64_t stateoroffset, uint64_t size)
 {
 	nvlist_t *ereport, *detector;
 
@@ -318,24 +318,6 @@ zfs_ereport_start(nvlist_t **ereport_out, nvlist_t **detector_out,
 				    FM_EREPORT_PAYLOAD_ZFS_ZIO_SIZE,
 				    DATA_TYPE_UINT64, zio->io_size, NULL);
 		}
-
-		/*
-		 * Payload for I/Os with corresponding logical information.
-		 */
-		if (zio->io_logical != NULL)
-			fm_payload_set(ereport,
-			    FM_EREPORT_PAYLOAD_ZFS_ZIO_OBJSET,
-			    DATA_TYPE_UINT64,
-			    zio->io_logical->io_bookmark.zb_objset,
-			    FM_EREPORT_PAYLOAD_ZFS_ZIO_OBJECT,
-			    DATA_TYPE_UINT64,
-			    zio->io_logical->io_bookmark.zb_object,
-			    FM_EREPORT_PAYLOAD_ZFS_ZIO_LEVEL,
-			    DATA_TYPE_INT64,
-			    zio->io_logical->io_bookmark.zb_level,
-			    FM_EREPORT_PAYLOAD_ZFS_ZIO_BLKID,
-			    DATA_TYPE_UINT64,
-			    zio->io_logical->io_bookmark.zb_blkid, NULL);
 	} else if (vd != NULL) {
 		/*
 		 * If we have a vdev but no zio, this is a device fault, and the
@@ -347,6 +329,20 @@ zfs_ereport_start(nvlist_t **ereport_out, nvlist_t **detector_out,
 		    DATA_TYPE_UINT64, stateoroffset, NULL);
 	}
 
+	/*
+	 * Payload for I/Os with corresponding logical information.
+	 */
+	if (zb != NULL && (zio == NULL || zio->io_logical != NULL))
+		fm_payload_set(ereport,
+		    FM_EREPORT_PAYLOAD_ZFS_ZIO_OBJSET,
+		    DATA_TYPE_UINT64, zb->zb_objset,
+		    FM_EREPORT_PAYLOAD_ZFS_ZIO_OBJECT,
+		    DATA_TYPE_UINT64, zb->zb_object,
+		    FM_EREPORT_PAYLOAD_ZFS_ZIO_LEVEL,
+		    DATA_TYPE_INT64, zb->zb_level,
+		    FM_EREPORT_PAYLOAD_ZFS_ZIO_BLKID,
+		    DATA_TYPE_UINT64, zb->zb_blkid, NULL);
+
 	mutex_exit(&spa->spa_errlist_lock);
 
 	*ereport_out = ereport;
@@ -501,11 +497,11 @@ range_total_size(zfs_ecksum_info_t *eip)
 
 static zfs_ecksum_info_t *
 annotate_ecksum(nvlist_t *ereport, zio_bad_cksum_t *info,
-    const uint8_t *goodbuf, const uint8_t *badbuf, size_t size,
+    const abd_t *goodabd, const abd_t *badabd, size_t size,
     boolean_t drop_if_identical)
 {
-	const uint64_t *good = (const uint64_t *)goodbuf;
-	const uint64_t *bad = (const uint64_t *)badbuf;
+	const uint64_t *good;
+	const uint64_t *bad;
 
 	uint64_t allset = 0;
 	uint64_t allcleared = 0;
@@ -549,7 +545,7 @@ annotate_ecksum(nvlist_t *ereport, zio_bad_cksum_t *info,
 		}
 	}
 
-	if (badbuf == NULL || goodbuf == NULL)
+	if (badabd == NULL || goodabd == NULL)
 		return (eip);
 
 	ASSERT3U(nui64s, <=, UINT32_MAX);
@@ -557,6 +553,9 @@ annotate_ecksum(nvlist_t *ereport, zio_bad_cksum_t *info,
 	ASSERT3U(size, <=, SPA_MAXBLOCKSIZE);
 	ASSERT3U(size, <=, UINT32_MAX);
 
+	good = (const uint64_t *) abd_borrow_buf_copy((abd_t *)goodabd, size);
+	bad = (const uint64_t *) abd_borrow_buf_copy((abd_t *)badabd, size);
+
 	/* build up the range list by comparing the two buffers. */
 	for (idx = 0; idx < nui64s; idx++) {
 		if (good[idx] == bad[idx]) {
@@ -586,6 +585,8 @@ annotate_ecksum(nvlist_t *ereport, zio_bad_cksum_t *info,
 	 */
 	if (inline_size == 0 && drop_if_identical) {
 		kmem_free(eip, sizeof (*eip));
+		abd_return_buf((abd_t *)goodabd, (void *)good, size);
+		abd_return_buf((abd_t *)badabd, (void *)bad, size);
 		return (NULL);
 	}
 
@@ -626,6 +627,10 @@ annotate_ecksum(nvlist_t *ereport, zio_bad_cksum_t *info,
 		eip->zei_ranges[range].zr_start	*= sizeof (uint64_t);
 		eip->zei_ranges[range].zr_end	*= sizeof (uint64_t);
 	}
+
+	abd_return_buf((abd_t *)goodabd, (void *)good, size);
+	abd_return_buf((abd_t *)badabd, (void *)bad, size);
+
 	eip->zei_allowed_mingap	*= sizeof (uint64_t);
 	inline_size		*= sizeof (uint64_t);
 
@@ -666,15 +671,16 @@ annotate_ecksum(nvlist_t *ereport, zio_bad_cksum_t *info,
 #endif
 
 void
-zfs_ereport_post(const char *subclass, spa_t *spa, vdev_t *vd, zio_t *zio,
-    uint64_t stateoroffset, uint64_t size)
+zfs_ereport_post(const char *subclass, spa_t *spa, vdev_t *vd,
+    const struct zbookmark_phys *zb, zio_t *zio, uint64_t stateoroffset,
+    uint64_t size)
 {
 #ifdef _KERNEL
 	nvlist_t *ereport = NULL;
 	nvlist_t *detector = NULL;
 
-	zfs_ereport_start(&ereport, &detector,
-	    subclass, spa, vd, zio, stateoroffset, size);
+	zfs_ereport_start(&ereport, &detector, subclass, spa, vd,
+	    zb, zio, stateoroffset, size);
 
 	if (ereport == NULL)
 		return;
@@ -687,7 +693,7 @@ zfs_ereport_post(const char *subclass, spa_t *spa, vdev_t *vd, zio_t *zio,
 }
 
 void
-zfs_ereport_start_checksum(spa_t *spa, vdev_t *vd,
+zfs_ereport_start_checksum(spa_t *spa, vdev_t *vd, const zbookmark_phys_t *zb,
     struct zio *zio, uint64_t offset, uint64_t length, void *arg,
     zio_bad_cksum_t *info)
 {
@@ -709,7 +715,7 @@ zfs_ereport_start_checksum(spa_t *spa, vdev_t *vd,
 
 #ifdef _KERNEL
 	zfs_ereport_start(&report->zcr_ereport, &report->zcr_detector,
-	    FM_EREPORT_ZFS_CHECKSUM, spa, vd, zio, offset, length);
+	    FM_EREPORT_ZFS_CHECKSUM, spa, vd, zb, zio, offset, length);
 
 	if (report->zcr_ereport == NULL) {
 		report->zcr_free(report->zcr_cbdata, report->zcr_cbinfo);
@@ -729,8 +735,8 @@ zfs_ereport_start_checksum(spa_t *spa, vdev_t *vd,
 }
 
 void
-zfs_ereport_finish_checksum(zio_cksum_report_t *report,
-    const void *good_data, const void *bad_data, boolean_t drop_if_identical)
+zfs_ereport_finish_checksum(zio_cksum_report_t *report, const abd_t *good_data,
+    const abd_t *bad_data, boolean_t drop_if_identical)
 {
 #ifdef _KERNEL
 	zfs_ecksum_info_t *info = NULL;
@@ -777,17 +783,17 @@ zfs_ereport_send_interim_checksum(zio_cksum_report_t *report)
 }
 
 void
-zfs_ereport_post_checksum(spa_t *spa, vdev_t *vd,
+zfs_ereport_post_checksum(spa_t *spa, vdev_t *vd, const zbookmark_phys_t *zb,
     struct zio *zio, uint64_t offset, uint64_t length,
-    const void *good_data, const void *bad_data, zio_bad_cksum_t *zbc)
+    const abd_t *good_data, const abd_t *bad_data, zio_bad_cksum_t *zbc)
 {
 #ifdef _KERNEL
 	nvlist_t *ereport = NULL;
 	nvlist_t *detector = NULL;
 	zfs_ecksum_info_t *info;
 
-	zfs_ereport_start(&ereport, &detector,
-	    FM_EREPORT_ZFS_CHECKSUM, spa, vd, zio, offset, length);
+	zfs_ereport_start(&ereport, &detector, FM_EREPORT_ZFS_CHECKSUM,
+	    spa, vd, zb, zio, offset, length);
 
 	if (ereport == NULL)
 		return;
diff --git a/usr/src/uts/common/fs/zfs/zfs_ioctl.c b/usr/src/uts/common/fs/zfs/zfs_ioctl.c
index 712abee22f..71018c3836 100644
--- a/usr/src/uts/common/fs/zfs/zfs_ioctl.c
+++ b/usr/src/uts/common/fs/zfs/zfs_ioctl.c
@@ -32,7 +32,7 @@
  * Copyright (c) 2014 Integros [integros.com]
  * Copyright 2016 Toomas Soome <tsoome@me.com>
  * Copyright 2017 RackTop Systems.
- * Copyright (c) 2017 Datto Inc.
+ * Copyright (c) 2017, Datto, Inc. All rights reserved.
  */
 
 /*
@@ -192,6 +192,7 @@
 #include <sys/vdev_removal.h>
 #include <sys/vdev_impl.h>
 #include <sys/vdev_initialize.h>
+#include <sys/dsl_crypt.h>
 
 #include "zfs_namecheck.h"
 #include "zfs_prop.h"
@@ -593,12 +594,12 @@ zfs_set_slabel_policy(const char *name, char *strval, cred_t *cr)
 		 * Try to own the dataset; abort if there is any error,
 		 * (e.g., already mounted, in use, or other error).
 		 */
-		error = dmu_objset_own(name, DMU_OST_ZFS, B_TRUE,
+		error = dmu_objset_own(name, DMU_OST_ZFS, B_TRUE, B_TRUE,
 		    setsl_tag, &os);
 		if (error != 0)
 			return (SET_ERROR(EPERM));
 
-		dmu_objset_disown(os, setsl_tag);
+		dmu_objset_disown(os, B_TRUE, setsl_tag);
 
 		if (new_default) {
 			needed_priv = PRIV_FILE_DOWNGRADE_SL;
@@ -1285,6 +1286,22 @@ zfs_secpolicy_release(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 	return (0);
 }
 
+/* ARGSUSED */
+static int
+zfs_secpolicy_load_key(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
+{
+	return (zfs_secpolicy_write_perms(zc->zc_name,
+	    ZFS_DELEG_PERM_LOAD_KEY, cr));
+}
+
+/* ARGSUSED */
+static int
+zfs_secpolicy_change_key(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
+{
+	return (zfs_secpolicy_write_perms(zc->zc_name,
+	    ZFS_DELEG_PERM_CHANGE_KEY, cr));
+}
+
 /*
  * Policy for allowing temporary snapshots to be taken or released
  */
@@ -1481,7 +1498,7 @@ zfsvfs_rele(zfsvfs_t *zfsvfs, void *tag)
 	if (zfsvfs->z_vfs) {
 		VFS_RELE(zfsvfs->z_vfs);
 	} else {
-		dmu_objset_disown(zfsvfs->z_os, zfsvfs);
+		dmu_objset_disown(zfsvfs->z_os, B_TRUE, zfsvfs);
 		zfsvfs_free(zfsvfs);
 	}
 }
@@ -1494,6 +1511,7 @@ zfs_ioc_pool_create(zfs_cmd_t *zc)
 	nvlist_t *rootprops = NULL;
 	nvlist_t *zplprops = NULL;
 	char *spa_name = zc->zc_name;
+	dsl_crypto_params_t *dcp = NULL;
 
 	if (error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
 	    zc->zc_iflags, &config))
@@ -1508,6 +1526,7 @@ zfs_ioc_pool_create(zfs_cmd_t *zc)
 
 	if (props) {
 		nvlist_t *nvl = NULL;
+		nvlist_t *hidden_args = NULL;
 		uint64_t version = SPA_VERSION;
 		char *tname;
 
@@ -1527,6 +1546,18 @@ zfs_ioc_pool_create(zfs_cmd_t *zc)
 			}
 			(void) nvlist_remove_all(props, ZPOOL_ROOTFS_PROPS);
 		}
+
+		(void) nvlist_lookup_nvlist(props, ZPOOL_HIDDEN_ARGS,
+		    &hidden_args);
+		error = dsl_crypto_params_create_nvlist(DCP_CMD_NONE,
+		    rootprops, hidden_args, &dcp);
+		if (error != 0) {
+			nvlist_free(config);
+			nvlist_free(props);
+			return (error);
+		}
+		(void) nvlist_remove_all(props, ZPOOL_HIDDEN_ARGS);
+
 		VERIFY(nvlist_alloc(&zplprops, NV_UNIQUE_NAME, KM_SLEEP) == 0);
 		error = zfs_fill_zplprops_root(version, rootprops,
 		    zplprops, NULL);
@@ -1538,7 +1569,7 @@ zfs_ioc_pool_create(zfs_cmd_t *zc)
 			spa_name = tname;
 	}
 
-	error = spa_create(zc->zc_name, config, props, zplprops);
+	error = spa_create(zc->zc_name, config, props, zplprops, dcp);
 
 	/*
 	 * Set the remaining root properties
@@ -1552,6 +1583,7 @@ pool_props_bad:
 	nvlist_free(zplprops);
 	nvlist_free(config);
 	nvlist_free(props);
+	dsl_crypto_params_free(dcp, !!error);
 
 	return (error);
 }
@@ -1830,15 +1862,16 @@ zfs_ioc_obj_to_path(zfs_cmd_t *zc)
 	int error;
 
 	/* XXX reading from objset not owned */
-	if ((error = dmu_objset_hold(zc->zc_name, FTAG, &os)) != 0)
+	if ((error = dmu_objset_hold_flags(zc->zc_name, B_TRUE,
+	    FTAG, &os)) != 0)
 		return (error);
 	if (dmu_objset_type(os) != DMU_OST_ZFS) {
-		dmu_objset_rele(os, FTAG);
+		dmu_objset_rele_flags(os, B_TRUE, FTAG);
 		return (SET_ERROR(EINVAL));
 	}
 	error = zfs_obj_to_path(os, zc->zc_obj, zc->zc_value,
 	    sizeof (zc->zc_value));
-	dmu_objset_rele(os, FTAG);
+	dmu_objset_rele_flags(os, B_TRUE, FTAG);
 
 	return (error);
 }
@@ -1859,15 +1892,16 @@ zfs_ioc_obj_to_stats(zfs_cmd_t *zc)
 	int error;
 
 	/* XXX reading from objset not owned */
-	if ((error = dmu_objset_hold(zc->zc_name, FTAG, &os)) != 0)
+	if ((error = dmu_objset_hold_flags(zc->zc_name, B_TRUE,
+	    FTAG, &os)) != 0)
 		return (error);
 	if (dmu_objset_type(os) != DMU_OST_ZFS) {
-		dmu_objset_rele(os, FTAG);
+		dmu_objset_rele_flags(os, B_TRUE, FTAG);
 		return (SET_ERROR(EINVAL));
 	}
 	error = zfs_obj_to_stats(os, zc->zc_obj, &zc->zc_stat, zc->zc_value,
 	    sizeof (zc->zc_value));
-	dmu_objset_rele(os, FTAG);
+	dmu_objset_rele_flags(os, B_TRUE, FTAG);
 
 	return (error);
 }
@@ -2437,7 +2471,8 @@ zfs_prop_set_special(const char *dsname, zprop_source_t source,
 {
 	const char *propname = nvpair_name(pair);
 	zfs_prop_t prop = zfs_name_to_prop(propname);
-	uint64_t intval;
+	uint64_t intval = 0;
+	char *strval = NULL;
 	int err = -1;
 
 	if (prop == ZPROP_INVAL) {
@@ -2453,10 +2488,12 @@ zfs_prop_set_special(const char *dsname, zprop_source_t source,
 		    &pair) == 0);
 	}
 
-	if (zfs_prop_get_type(prop) == PROP_TYPE_STRING)
-		return (-1);
-
-	VERIFY(0 == nvpair_value_uint64(pair, &intval));
+	/* all special properties are numeric except for keylocation */
+	if (zfs_prop_get_type(prop) == PROP_TYPE_STRING) {
+		strval = fnvpair_value_string(pair);
+	} else {
+		intval = fnvpair_value_uint64(pair);
+	}
 
 	switch (prop) {
 	case ZFS_PROP_QUOTA:
@@ -2480,6 +2517,16 @@ zfs_prop_set_special(const char *dsname, zprop_source_t source,
 		if (err == 0)
 			err = -1;
 		break;
+	case ZFS_PROP_KEYLOCATION:
+		err = dsl_crypto_can_set_keylocation(dsname, strval);
+
+		/*
+		 * Set err to -1 to force the zfs_set_prop_nvlist code down the
+		 * default path to set the value in the nvlist.
+		 */
+		if (err == 0)
+			err = -1;
+		break;
 	case ZFS_PROP_RESERVATION:
 		err = dsl_dir_set_reservation(dsname, source, intval);
 		break;
@@ -3183,6 +3230,8 @@ zfs_fill_zplprops_root(uint64_t spa_vers, nvlist_t *createprops,
  * innvl: {
  *     "type" -> dmu_objset_type_t (int32)
  *     (optional) "props" -> { prop -> value }
+ *     (optional) "hidden_args" -> { "wkeydata" -> value }
+ *         raw uint8_t array of encryption wrapping key data (32 bytes)
  * }
  *
  * outnvl: propname -> error code (int32)
@@ -3193,15 +3242,18 @@ zfs_ioc_create(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
 	int error = 0;
 	zfs_creat_t zct = { 0 };
 	nvlist_t *nvprops = NULL;
+	nvlist_t *hidden_args = NULL;
 	void (*cbfunc)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx);
 	int32_t type32;
 	dmu_objset_type_t type;
 	boolean_t is_insensitive = B_FALSE;
+	dsl_crypto_params_t *dcp = NULL;
 
 	if (nvlist_lookup_int32(innvl, "type", &type32) != 0)
 		return (SET_ERROR(EINVAL));
 	type = type32;
 	(void) nvlist_lookup_nvlist(innvl, "props", &nvprops);
+	(void) nvlist_lookup_nvlist(innvl, ZPOOL_HIDDEN_ARGS, &hidden_args);
 
 	switch (type) {
 	case DMU_OST_ZFS:
@@ -3267,9 +3319,18 @@ zfs_ioc_create(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
 		}
 	}
 
+	error = dsl_crypto_params_create_nvlist(DCP_CMD_NONE, nvprops,
+	    hidden_args, &dcp);
+	if (error != 0) {
+		nvlist_free(zct.zct_zplprops);
+		return (error);
+	}
+
 	error = dmu_objset_create(fsname, type,
-	    is_insensitive ? DS_FLAG_CI_DATASET : 0, cbfunc, &zct);
+	    is_insensitive ? DS_FLAG_CI_DATASET : 0, dcp, cbfunc, &zct);
+
 	nvlist_free(zct.zct_zplprops);
+	dsl_crypto_params_free(dcp, !!error);
 
 	/*
 	 * It would be nice to do this atomically.
@@ -3287,6 +3348,8 @@ zfs_ioc_create(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
  * innvl: {
  *     "origin" -> name of origin snapshot
  *     (optional) "props" -> { prop -> value }
+ *     (optional) "hidden_args" -> { "wkeydata" -> value }
+ *         raw uint8_t array of encryption wrapping key data (32 bytes)
  * }
  *
  * outnvl: propname -> error code (int32)
@@ -3308,9 +3371,8 @@ zfs_ioc_clone(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
 
 	if (dataset_namecheck(origin_name, NULL, NULL) != 0)
 		return (SET_ERROR(EINVAL));
+
 	error = dmu_objset_clone(fsname, origin_name);
-	if (error != 0)
-		return (error);
 
 	/*
 	 * It would be nice to do this atomically.
@@ -4328,7 +4390,11 @@ extract_delay_props(nvlist_t *props)
 {
 	nvlist_t *delayprops;
 	nvpair_t *nvp, *tmp;
-	static const zfs_prop_t delayable[] = { ZFS_PROP_REFQUOTA, 0 };
+	static const zfs_prop_t delayable[] = {
+		ZFS_PROP_REFQUOTA,
+		ZFS_PROP_KEYLOCATION,
+		0
+	};
 	int i;
 
 	VERIFY(nvlist_alloc(&delayprops, NV_UNIQUE_NAME, KM_SLEEP) == 0);
@@ -4517,7 +4583,7 @@ zfs_ioc_recv(zfs_cmd_t *zc)
 		}
 	}
 
-	if (delayprops != NULL) {
+	if (delayprops != NULL && props != NULL) {
 		/*
 		 * Merge delayed props back in with initial props, in case
 		 * we're DEBUG and zfs_ioc_recv_inject_err is set (which means
@@ -4627,6 +4693,7 @@ zfs_ioc_send(zfs_cmd_t *zc)
 	boolean_t embedok = (zc->zc_flags & 0x1);
 	boolean_t large_block_ok = (zc->zc_flags & 0x2);
 	boolean_t compressok = (zc->zc_flags & 0x4);
+	boolean_t rawok = (zc->zc_flags & 0x8);
 
 	if (zc->zc_obj != 0) {
 		dsl_pool_t *dp;
@@ -4658,7 +4725,8 @@ zfs_ioc_send(zfs_cmd_t *zc)
 		if (error != 0)
 			return (error);
 
-		error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &tosnap);
+		error = dsl_dataset_hold_obj(dp, zc->zc_sendobj,
+		    FTAG, &tosnap);
 		if (error != 0) {
 			dsl_pool_rele(dp, FTAG);
 			return (error);
@@ -4674,7 +4742,7 @@ zfs_ioc_send(zfs_cmd_t *zc)
 			}
 		}
 
-		error = dmu_send_estimate(tosnap, fromsnap, compressok,
+		error = dmu_send_estimate(tosnap, fromsnap, compressok || rawok,
 		    &zc->zc_objset_type);
 
 		if (fromsnap != NULL)
@@ -4688,7 +4756,7 @@ zfs_ioc_send(zfs_cmd_t *zc)
 
 		off = fp->f_offset;
 		error = dmu_send_obj(zc->zc_name, zc->zc_sendobj,
-		    zc->zc_fromobj, embedok, large_block_ok, compressok,
+		    zc->zc_fromobj, embedok, large_block_ok, compressok, rawok,
 		    zc->zc_cookie, fp->f_vnode, &off);
 
 		if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
@@ -5078,7 +5146,7 @@ zfs_ioc_userspace_upgrade(zfs_cmd_t *zc)
 			error = zfs_suspend_fs(zfsvfs);
 			if (error == 0) {
 				dmu_objset_refresh_ownership(ds, &newds,
-				    zfsvfs);
+				    B_TRUE, zfsvfs);
 				error = zfs_resume_fs(zfsvfs, newds);
 			}
 		}
@@ -5087,12 +5155,12 @@ zfs_ioc_userspace_upgrade(zfs_cmd_t *zc)
 		VFS_RELE(zfsvfs->z_vfs);
 	} else {
 		/* XXX kind of reading contents without owning */
-		error = dmu_objset_hold(zc->zc_name, FTAG, &os);
+		error = dmu_objset_hold_flags(zc->zc_name, B_TRUE, FTAG, &os);
 		if (error != 0)
 			return (error);
 
 		error = dmu_objset_userspace_upgrade(os);
-		dmu_objset_rele(os, FTAG);
+		dmu_objset_rele_flags(os, B_TRUE, FTAG);
 	}
 
 	return (error);
@@ -5250,7 +5318,7 @@ zfs_ioc_next_obj(zfs_cmd_t *zc)
 	objset_t *os = NULL;
 	int error;
 
-	error = dmu_objset_hold(zc->zc_name, FTAG, &os);
+	error = dmu_objset_hold_flags(zc->zc_name, B_TRUE, FTAG, &os);
 	if (error != 0)
 		return (error);
 
@@ -5668,6 +5736,8 @@ zfs_ioc_space_snaps(const char *lastsnap, nvlist_t *innvl, nvlist_t *outnvl)
  *         presence indicates DRR_WRITE_EMBEDDED records are permitted
  *     (optional) "compressok" -> (value ignored)
  *         presence indicates compressed DRR_WRITE records are permitted
+ *     (optional) "rawok" -> (value ignored)
+ *         presence indicates raw encrypted records should be used.
  *     (optional) "resume_object" and "resume_offset" -> (uint64)
  *         if present, resume send stream from specified object and offset.
  * }
@@ -5685,6 +5755,7 @@ zfs_ioc_send_new(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
 	boolean_t largeblockok;
 	boolean_t embedok;
 	boolean_t compressok;
+	boolean_t rawok;
 	uint64_t resumeobj = 0;
 	uint64_t resumeoff = 0;
 
@@ -5697,6 +5768,7 @@ zfs_ioc_send_new(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
 	largeblockok = nvlist_exists(innvl, "largeblockok");
 	embedok = nvlist_exists(innvl, "embedok");
 	compressok = nvlist_exists(innvl, "compressok");
+	rawok = nvlist_exists(innvl, "rawok");
 
 	(void) nvlist_lookup_uint64(innvl, "resume_object", &resumeobj);
 	(void) nvlist_lookup_uint64(innvl, "resume_offset", &resumeoff);
@@ -5707,7 +5779,7 @@ zfs_ioc_send_new(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
 
 	off = fp->f_offset;
 	error = dmu_send(snapname, fromname, embedok, largeblockok, compressok,
-	    fd, resumeobj, resumeoff, fp->f_vnode, &off);
+	    rawok, fd, resumeobj, resumeoff, fp->f_vnode, &off);
 
 	if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
 		fp->f_offset = off;
@@ -5742,6 +5814,7 @@ zfs_ioc_send_space(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
 	int error;
 	char *fromname;
 	boolean_t compressok;
+	boolean_t rawok;
 	uint64_t space;
 
 	error = dsl_pool_hold(snapname, FTAG, &dp);
@@ -5755,6 +5828,7 @@ zfs_ioc_send_space(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
 	}
 
 	compressok = nvlist_exists(innvl, "compressok");
+	rawok = nvlist_exists(innvl, "rawok");
 
 	error = nvlist_lookup_string(innvl, "from", &fromname);
 	if (error == 0) {
@@ -5768,8 +5842,8 @@ zfs_ioc_send_space(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
 			error = dsl_dataset_hold(dp, fromname, FTAG, &fromsnap);
 			if (error != 0)
 				goto out;
-			error = dmu_send_estimate(tosnap, fromsnap, compressok,
-			    &space);
+			error = dmu_send_estimate(tosnap, fromsnap,
+			    compressok || rawok, &space);
 			dsl_dataset_rele(fromsnap, FTAG);
 		} else if (strchr(fromname, '#') != NULL) {
 			/*
@@ -5784,7 +5858,8 @@ zfs_ioc_send_space(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
 			if (error != 0)
 				goto out;
 			error = dmu_send_estimate_from_txg(tosnap,
-			    frombm.zbm_creation_txg, compressok, &space);
+			    frombm.zbm_creation_txg, compressok || rawok,
+			    &space);
 		} else {
 			/*
 			 * from is not properly formatted as a snapshot or
@@ -5797,7 +5872,8 @@ zfs_ioc_send_space(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
 		/*
 		 * If estimating the size of a full send, use dmu_send_estimate.
 		 */
-		error = dmu_send_estimate(tosnap, NULL, compressok, &space);
+		error = dmu_send_estimate(tosnap, NULL, compressok || rawok,
+		    &space);
 	}
 
 	fnvlist_add_uint64(outnvl, "space", space);
@@ -5846,6 +5922,124 @@ zfs_ioc_pool_sync(const char *pool, nvlist_t *innvl, nvlist_t *onvl)
 	return (err);
 }
 
+/*
+ * Load a user's wrapping key into the kernel.
+ * innvl: {
+ *     "hidden_args" -> { "wkeydata" -> value }
+ *         raw uint8_t array of encryption wrapping key data (32 bytes)
+ *     (optional) "noop" -> (value ignored)
+ *         presence indicated key should only be verified, not loaded
+ * }
+ */
+/* ARGSUSED */
+static int
+zfs_ioc_load_key(const char *dsname, nvlist_t *innvl, nvlist_t *outnvl)
+{
+	int ret = 0;
+	dsl_crypto_params_t *dcp = NULL;
+	nvlist_t *hidden_args;
+	boolean_t noop = nvlist_exists(innvl, "noop");
+
+	if (strchr(dsname, '@') != NULL || strchr(dsname, '%') != NULL) {
+		ret = SET_ERROR(EINVAL);
+		goto error;
+	}
+
+	ret = nvlist_lookup_nvlist(innvl, ZPOOL_HIDDEN_ARGS, &hidden_args);
+	if (ret != 0) {
+		ret = SET_ERROR(EINVAL);
+		goto error;
+	}
+
+	ret = dsl_crypto_params_create_nvlist(DCP_CMD_NONE, NULL,
+	    hidden_args, &dcp);
+	if (ret != 0)
+		goto error;
+
+	ret = spa_keystore_load_wkey(dsname, dcp, noop);
+	if (ret != 0)
+		goto error;
+
+	dsl_crypto_params_free(dcp, noop);
+
+	return (0);
+
+error:
+	dsl_crypto_params_free(dcp, B_TRUE);
+	return (ret);
+}
+
+/*
+ * Unload a user's wrapping key from the kernel.
+ * Both innvl and outnvl are unused.
+ */
+/* ARGSUSED */
+static int
+zfs_ioc_unload_key(const char *dsname, nvlist_t *innvl, nvlist_t *outnvl)
+{
+	int ret = 0;
+
+	if (strchr(dsname, '@') != NULL || strchr(dsname, '%') != NULL) {
+		ret = (SET_ERROR(EINVAL));
+		goto out;
+	}
+
+	ret = spa_keystore_unload_wkey(dsname);
+	if (ret != 0)
+		goto out;
+
+out:
+	return (ret);
+}
+
+/*
+ * Changes a user's wrapping key used to decrypt a dataset. The keyformat,
+ * keylocation, pbkdf2salt, and  pbkdf2iters properties can also be specified
+ * here to change how the key is derived in userspace.
+ *
+ * innvl: {
+ *    "hidden_args" (optional) -> { "wkeydata" -> value }
+ *         raw uint8_t array of new encryption wrapping key data (32 bytes)
+ *    "props" (optional) -> { prop -> value }
+ * }
+ *
+ * outnvl is unused
+ */
+/* ARGSUSED */
+static int
+zfs_ioc_change_key(const char *dsname, nvlist_t *innvl, nvlist_t *outnvl)
+{
+	int ret;
+	uint64_t cmd = DCP_CMD_NONE;
+	dsl_crypto_params_t *dcp = NULL;
+	nvlist_t *args = NULL, *hidden_args = NULL;
+
+	if (strchr(dsname, '@') != NULL || strchr(dsname, '%') != NULL) {
+		ret = (SET_ERROR(EINVAL));
+		goto error;
+	}
+
+	(void) nvlist_lookup_uint64(innvl, "crypt_cmd", &cmd);
+	(void) nvlist_lookup_nvlist(innvl, "props", &args);
+	(void) nvlist_lookup_nvlist(innvl, ZPOOL_HIDDEN_ARGS, &hidden_args);
+
+	ret = dsl_crypto_params_create_nvlist(cmd, args, hidden_args, &dcp);
+	if (ret != 0)
+		goto error;
+
+	ret = spa_keystore_change_key(dsname, dcp);
+	if (ret != 0)
+		goto error;
+
+	dsl_crypto_params_free(dcp, B_FALSE);
+
+	return (0);
+
+error:
+	dsl_crypto_params_free(dcp, B_TRUE);
+	return (ret);
+}
+
 static zfs_ioc_vec_t zfs_ioc_vec[ZFS_IOC_LAST - ZFS_IOC_FIRST];
 
 static void
@@ -6040,6 +6234,17 @@ zfs_ioctl_init(void)
 	    zfs_ioc_pool_sync, zfs_secpolicy_none, POOL_NAME,
 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_FALSE);
 
+	zfs_ioctl_register("load-key", ZFS_IOC_LOAD_KEY,
+	    zfs_ioc_load_key, zfs_secpolicy_load_key,
+	    DATASET_NAME, POOL_CHECK_SUSPENDED, B_TRUE, B_TRUE);
+	zfs_ioctl_register("unload-key", ZFS_IOC_UNLOAD_KEY,
+	    zfs_ioc_unload_key, zfs_secpolicy_load_key,
+	    DATASET_NAME, POOL_CHECK_SUSPENDED, B_TRUE, B_TRUE);
+	zfs_ioctl_register("change-key", ZFS_IOC_CHANGE_KEY,
+	    zfs_ioc_change_key, zfs_secpolicy_change_key,
+	    DATASET_NAME, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY,
+	    B_TRUE, B_TRUE);
+
 	/* IOCTLS that use the legacy function signature */
 
 	zfs_ioctl_register_legacy(ZFS_IOC_POOL_FREEZE, zfs_ioc_pool_freeze,
diff --git a/usr/src/uts/common/fs/zfs/zfs_vfsops.c b/usr/src/uts/common/fs/zfs/zfs_vfsops.c
index f7beea4cc9..dfd13539cd 100644
--- a/usr/src/uts/common/fs/zfs/zfs_vfsops.c
+++ b/usr/src/uts/common/fs/zfs/zfs_vfsops.c
@@ -974,8 +974,8 @@ zfsvfs_create(const char *osname, zfsvfs_t **zfvp)
 	 * We claim to always be readonly so we can open snapshots;
 	 * other ZPL code will prevent us from writing to snapshots.
 	 */
-
-	error = dmu_objset_own(osname, DMU_OST_ZFS, B_TRUE, zfsvfs, &os);
+	error = dmu_objset_own(osname, DMU_OST_ZFS, B_TRUE, B_TRUE, zfsvfs,
+	    &os);
 	if (error != 0) {
 		kmem_free(zfsvfs, sizeof (zfsvfs_t));
 		return (error);
@@ -983,7 +983,7 @@ zfsvfs_create(const char *osname, zfsvfs_t **zfvp)
 
 	error = zfsvfs_create_impl(zfvp, zfsvfs, os);
 	if (error != 0) {
-		dmu_objset_disown(os, zfsvfs);
+		dmu_objset_disown(os, B_TRUE, zfsvfs);
 	}
 	return (error);
 }
@@ -1084,7 +1084,10 @@ zfsvfs_setup(zfsvfs_t *zfsvfs, boolean_t mounting)
 				zfsvfs->z_replay = B_FALSE;
 			}
 		}
-		zfsvfs->z_vfs->vfs_flag |= readonly; /* restore readonly bit */
+
+		/* restore readonly bit */
+		if (readonly != 0)
+			zfsvfs->z_vfs->vfs_flag |= VFS_RDONLY;
 	}
 
 	/*
@@ -1235,7 +1238,7 @@ zfs_domount(vfs_t *vfsp, char *osname)
 		zfsctl_create(zfsvfs);
 out:
 	if (error) {
-		dmu_objset_disown(zfsvfs->z_os, zfsvfs);
+		dmu_objset_disown(zfsvfs->z_os, B_TRUE, zfsvfs);
 		zfsvfs_free(zfsvfs);
 	} else {
 		atomic_inc_32(&zfs_active_fs_count);
@@ -1903,7 +1906,7 @@ zfs_umount(vfs_t *vfsp, int fflag, cred_t *cr)
 		/*
 		 * Finally release the objset
 		 */
-		dmu_objset_disown(os, zfsvfs);
+		dmu_objset_disown(os, B_TRUE, zfsvfs);
 	}
 
 	/*
diff --git a/usr/src/uts/common/fs/zfs/zfs_vnops.c b/usr/src/uts/common/fs/zfs/zfs_vnops.c
index a68fc3dd34..c8cb5b3935 100644
--- a/usr/src/uts/common/fs/zfs/zfs_vnops.c
+++ b/usr/src/uts/common/fs/zfs/zfs_vnops.c
@@ -915,8 +915,8 @@ zfs_write(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct)
 				xuio_stat_wbuf_copied();
 			} else {
 				ASSERT(xuio || tx_bytes == max_blksz);
-				dmu_assign_arcbuf(sa_get_db(zp->z_sa_hdl),
-				    woff, abuf, tx);
+				dmu_assign_arcbuf_by_dbuf(
+				    sa_get_db(zp->z_sa_hdl), woff, abuf, tx);
 			}
 			ASSERT(tx_bytes <= uio->uio_resid);
 			uioskip(uio, tx_bytes);
diff --git a/usr/src/uts/common/fs/zfs/zil.c b/usr/src/uts/common/fs/zfs/zil.c
index 7e88c51a0b..e56104f979 100644
--- a/usr/src/uts/common/fs/zfs/zil.c
+++ b/usr/src/uts/common/fs/zfs/zil.c
@@ -194,8 +194,8 @@ zil_init_log_chain(zilog_t *zilog, blkptr_t *bp)
  * Read a log block and make sure it's valid.
  */
 static int
-zil_read_log_block(zilog_t *zilog, const blkptr_t *bp, blkptr_t *nbp, void *dst,
-    char **end)
+zil_read_log_block(zilog_t *zilog, boolean_t decrypt, const blkptr_t *bp,
+    blkptr_t *nbp, void *dst, char **end)
 {
 	enum zio_flag zio_flags = ZIO_FLAG_CANFAIL;
 	arc_flags_t aflags = ARC_FLAG_WAIT;
@@ -209,11 +209,14 @@ zil_read_log_block(zilog_t *zilog, const blkptr_t *bp, blkptr_t *nbp, void *dst,
 	if (!(zilog->zl_header->zh_flags & ZIL_CLAIM_LR_SEQ_VALID))
 		zio_flags |= ZIO_FLAG_SPECULATIVE;
 
+	if (!decrypt)
+		zio_flags |= ZIO_FLAG_RAW;
+
 	SET_BOOKMARK(&zb, bp->blk_cksum.zc_word[ZIL_ZC_OBJSET],
 	    ZB_ZIL_OBJECT, ZB_ZIL_LEVEL, bp->blk_cksum.zc_word[ZIL_ZC_SEQ]);
 
-	error = arc_read(NULL, zilog->zl_spa, bp, arc_getbuf_func, &abuf,
-	    ZIO_PRIORITY_SYNC_READ, zio_flags, &aflags, &zb);
+	error = arc_read(NULL, zilog->zl_spa, bp, arc_getbuf_func,
+	    &abuf, ZIO_PRIORITY_SYNC_READ, zio_flags, &aflags, &zb);
 
 	if (error == 0) {
 		zio_cksum_t cksum = bp->blk_cksum;
@@ -288,6 +291,14 @@ zil_read_log_data(zilog_t *zilog, const lr_write_t *lr, void *wbuf)
 	if (zilog->zl_header->zh_claim_txg == 0)
 		zio_flags |= ZIO_FLAG_SPECULATIVE | ZIO_FLAG_SCRUB;
 
+	/*
+	 * If we are not using the resulting data, we are just checking that
+	 * it hasn't been corrupted so we don't need to waste CPU time
+	 * decompressing and decrypting it.
+	 */
+	if (wbuf == NULL)
+		zio_flags |= ZIO_FLAG_RAW;
+
 	SET_BOOKMARK(&zb, dmu_objset_id(zilog->zl_os), lr->lr_foid,
 	    ZB_ZIL_LEVEL, lr->lr_offset / BP_GET_LSIZE(bp));
 
@@ -308,7 +319,8 @@ zil_read_log_data(zilog_t *zilog, const lr_write_t *lr, void *wbuf)
  */
 int
 zil_parse(zilog_t *zilog, zil_parse_blk_func_t *parse_blk_func,
-    zil_parse_lr_func_t *parse_lr_func, void *arg, uint64_t txg)
+    zil_parse_lr_func_t *parse_lr_func, void *arg, uint64_t txg,
+    boolean_t decrypt)
 {
 	const zil_header_t *zh = zilog->zl_header;
 	boolean_t claimed = !!zh->zh_claim_txg;
@@ -347,7 +359,9 @@ zil_parse(zilog_t *zilog, zil_parse_blk_func_t *parse_blk_func,
 
 		if (blk_seq > claim_blk_seq)
 			break;
-		if ((error = parse_blk_func(zilog, &blk, arg, txg)) != 0)
+
+		error = parse_blk_func(zilog, &blk, arg, txg);
+		if (error != 0)
 			break;
 		ASSERT3U(max_blk_seq, <, blk_seq);
 		max_blk_seq = blk_seq;
@@ -356,7 +370,8 @@ zil_parse(zilog_t *zilog, zil_parse_blk_func_t *parse_blk_func,
 		if (max_lr_seq == claim_lr_seq && max_blk_seq == claim_blk_seq)
 			break;
 
-		error = zil_read_log_block(zilog, &blk, &next_blk, lrbuf, &end);
+		error = zil_read_log_block(zilog, decrypt, &blk, &next_blk,
+		    lrbuf, &end);
 		if (error != 0)
 			break;
 
@@ -366,7 +381,9 @@ zil_parse(zilog_t *zilog, zil_parse_blk_func_t *parse_blk_func,
 			ASSERT3U(reclen, >=, sizeof (lr_t));
 			if (lr->lrc_seq > claim_lr_seq)
 				goto done;
-			if ((error = parse_lr_func(zilog, lr, arg, txg)) != 0)
+
+			error = parse_lr_func(zilog, lr, arg, txg);
+			if (error != 0)
 				goto done;
 			ASSERT3U(max_lr_seq, <, lr->lrc_seq);
 			max_lr_seq = lr->lrc_seq;
@@ -381,7 +398,8 @@ done:
 	zilog->zl_parse_lr_count = lr_count;
 
 	ASSERT(!claimed || !(zh->zh_flags & ZIL_CLAIM_LR_SEQ_VALID) ||
-	    (max_blk_seq == claim_blk_seq && max_lr_seq == claim_lr_seq));
+	    (max_blk_seq == claim_blk_seq && max_lr_seq == claim_lr_seq) ||
+	    (decrypt && error == EIO));
 
 	zil_bp_tree_fini(zilog);
 	zio_buf_free(lrbuf, SPA_OLD_MAXBLOCKSIZE);
@@ -451,9 +469,12 @@ zil_claim_log_record(zilog_t *zilog, lr_t *lrc, void *tx, uint64_t first_txg)
 	 * waited for all writes to be stable first), so it is semantically
 	 * correct to declare this the end of the log.
 	 */
-	if (lr->lr_blkptr.blk_birth >= first_txg &&
-	    (error = zil_read_log_data(zilog, lr, NULL)) != 0)
-		return (error);
+	if (lr->lr_blkptr.blk_birth >= first_txg) {
+		error = zil_read_log_data(zilog, lr, NULL);
+		if (error != 0)
+			return (error);
+	}
+
 	return (zil_claim_log_block(zilog, &lr->lr_blkptr, tx, first_txg));
 }
 
@@ -646,9 +667,8 @@ zil_create(zilog_t *zilog)
 			BP_ZERO(&blk);
 		}
 
-		error = zio_alloc_zil(zilog->zl_spa,
-		    zilog->zl_os->os_dsl_dataset->ds_object, txg, &blk, NULL,
-		    ZIL_MIN_BLKSZ, &slog);
+		error = zio_alloc_zil(zilog->zl_spa, zilog->zl_os, txg, &blk,
+		    NULL, ZIL_MIN_BLKSZ, &slog);
 
 		if (error == 0)
 			zil_init_log_chain(zilog, &blk);
@@ -736,7 +756,7 @@ zil_destroy_sync(zilog_t *zilog, dmu_tx_t *tx)
 {
 	ASSERT(list_is_empty(&zilog->zl_lwb_list));
 	(void) zil_parse(zilog, zil_free_log_block,
-	    zil_free_log_record, tx, zilog->zl_header->zh_claim_txg);
+	    zil_free_log_record, tx, zilog->zl_header->zh_claim_txg, B_FALSE);
 }
 
 int
@@ -750,7 +770,7 @@ zil_claim(dsl_pool_t *dp, dsl_dataset_t *ds, void *txarg)
 	int error;
 
 	error = dmu_objset_own_obj(dp, ds->ds_object,
-	    DMU_OST_ANY, B_FALSE, FTAG, &os);
+	    DMU_OST_ANY, B_FALSE, B_FALSE, FTAG, &os);
 	if (error != 0) {
 		/*
 		 * EBUSY indicates that the objset is inconsistent, in which
@@ -800,11 +820,13 @@ zil_claim(dsl_pool_t *dp, dsl_dataset_t *ds, void *txarg)
 	    zh->zh_claim_txg == 0)) {
 		if (!BP_IS_HOLE(&zh->zh_log)) {
 			(void) zil_parse(zilog, zil_clear_log_block,
-			    zil_noop_log_record, tx, first_txg);
+			    zil_noop_log_record, tx, first_txg, B_FALSE);
 		}
 		BP_ZERO(&zh->zh_log);
+		if (os->os_encrypted)
+			os->os_next_write_raw[tx->tx_txg & TXG_MASK] = B_TRUE;
 		dsl_dataset_dirty(dmu_objset_ds(os), tx);
-		dmu_objset_disown(os, FTAG);
+		dmu_objset_disown(os, B_FALSE, FTAG);
 		return (0);
 	}
 
@@ -824,18 +846,20 @@ zil_claim(dsl_pool_t *dp, dsl_dataset_t *ds, void *txarg)
 	ASSERT3U(zh->zh_claim_txg, <=, first_txg);
 	if (zh->zh_claim_txg == 0 && !BP_IS_HOLE(&zh->zh_log)) {
 		(void) zil_parse(zilog, zil_claim_log_block,
-		    zil_claim_log_record, tx, first_txg);
+		    zil_claim_log_record, tx, first_txg, B_FALSE);
 		zh->zh_claim_txg = first_txg;
 		zh->zh_claim_blk_seq = zilog->zl_parse_blk_seq;
 		zh->zh_claim_lr_seq = zilog->zl_parse_lr_seq;
 		if (zilog->zl_parse_lr_count || zilog->zl_parse_blk_count > 1)
 			zh->zh_flags |= ZIL_REPLAY_NEEDED;
 		zh->zh_flags |= ZIL_CLAIM_LR_SEQ_VALID;
+		if (os->os_encrypted)
+			os->os_next_write_raw[tx->tx_txg & TXG_MASK] = B_TRUE;
 		dsl_dataset_dirty(dmu_objset_ds(os), tx);
 	}
 
 	ASSERT3U(first_txg, ==, (spa_last_synced_txg(zilog->zl_spa) + 1));
-	dmu_objset_disown(os, FTAG);
+	dmu_objset_disown(os, B_FALSE, FTAG);
 	return (0);
 }
 
@@ -907,7 +931,7 @@ zil_check_log_chain(dsl_pool_t *dp, dsl_dataset_t *ds, void *tx)
 	 */
 	error = zil_parse(zilog, zil_claim_log_block, zil_claim_log_record, tx,
 	    zilog->zl_header->zh_claim_txg ? -1ULL :
-	    spa_min_claim_txg(os->os_spa));
+	    spa_min_claim_txg(os->os_spa), B_FALSE);
 
 	return ((error == ECKSUM || error == ENOENT) ? 0 : error);
 }
@@ -1435,8 +1459,9 @@ zil_lwb_write_issue(zilog_t *zilog, lwb_t *lwb)
 	BP_ZERO(bp);
 
 	/* pass the old blkptr in order to spread log blocks across devs */
-	error = zio_alloc_zil(spa, zilog->zl_os->os_dsl_dataset->ds_object,
-	    txg, bp, &lwb->lwb_blk, zil_blksz, &slog);
+	error = zio_alloc_zil(spa, zilog->zl_os, txg, bp, &lwb->lwb_blk,
+	    zil_blksz, &slog);
+
 	if (error == 0) {
 		ASSERT3U(bp->blk_birth, ==, txg);
 		bp->blk_cksum = lwb->lwb_blk.blk_cksum;
@@ -3188,6 +3213,21 @@ zil_suspend(const char *osname, void **cookiep)
 		return (0);
 	}
 
+	/*
+	 * The ZIL has work to do. Ensure that the associated encryption
+	 * key will remain mapped while we are committing the log by
+	 * grabbing a reference to it. If the key isn't loaded we have no
+	 * choice but to return an error until the wrapping key is loaded.
+	 */
+	if (os->os_encrypted &&
+	    dsl_dataset_create_key_mapping(dmu_objset_ds(os)) != 0) {
+		zilog->zl_suspend--;
+		mutex_exit(&zilog->zl_lock);
+		dsl_dataset_long_rele(dmu_objset_ds(os), suspend_tag);
+		dsl_dataset_rele(dmu_objset_ds(os), suspend_tag);
+		return (SET_ERROR(EBUSY));
+	}
+
 	zilog->zl_suspending = B_TRUE;
 	mutex_exit(&zilog->zl_lock);
 
@@ -3202,9 +3242,10 @@ zil_suspend(const char *osname, void **cookiep)
 	zil_commit_impl(zilog, 0);
 
 	/*
-	 * Now that we've ensured all lwb's are LWB_STATE_FLUSH_DONE, we
-	 * use txg_wait_synced() to ensure the data from the zilog has
-	 * migrated to the main pool before calling zil_destroy().
+	 * Now that we've ensured all lwb's are LWB_STATE_DONE,
+	 * txg_wait_synced() will be called from within zil_destroy(),
+	 * which will ensure the data from the zilog has migrated to the
+	 * main pool before it returns.
 	 */
 	txg_wait_synced(zilog->zl_dmu_pool, 0);
 
@@ -3215,6 +3256,9 @@ zil_suspend(const char *osname, void **cookiep)
 	cv_broadcast(&zilog->zl_cv_suspend);
 	mutex_exit(&zilog->zl_lock);
 
+	if (os->os_encrypted)
+		dsl_dataset_remove_key_mapping(dmu_objset_ds(os));
+
 	if (cookiep == NULL)
 		zil_resume(os);
 	else
@@ -3381,7 +3425,7 @@ zil_replay(objset_t *os, void *arg, zil_replay_func_t *replay_func[TX_MAX_TYPE])
 	zilog->zl_replay_time = ddi_get_lbolt();
 	ASSERT(zilog->zl_replay_blks == 0);
 	(void) zil_parse(zilog, zil_incr_blks, zil_replay_log_record, &zr,
-	    zh->zh_claim_txg);
+	    zh->zh_claim_txg, B_TRUE);
 	kmem_free(zr.zr_lr, 2 * SPA_MAXBLOCKSIZE);
 
 	zil_destroy(zilog, B_FALSE);
diff --git a/usr/src/uts/common/fs/zfs/zio.c b/usr/src/uts/common/fs/zfs/zio.c
index 76312e6a74..2e44d59daf 100644
--- a/usr/src/uts/common/fs/zfs/zio.c
+++ b/usr/src/uts/common/fs/zfs/zio.c
@@ -45,6 +45,7 @@
 #include <sys/metaslab_impl.h>
 #include <sys/abd.h>
 #include <sys/cityhash.h>
+#include <sys/dsl_crypt.h>
 
 /*
  * ==========================================================================
@@ -270,6 +271,13 @@ zio_data_buf_free(void *buf, size_t size)
 	kmem_cache_free(zio_data_buf_cache[c], buf);
 }
 
+/* ARGSUSED */
+static void
+zio_abd_free(void *abd, size_t size)
+{
+	abd_free((abd_t *)abd);
+}
+
 /*
  * ==========================================================================
  * Push and pop I/O transform buffers
@@ -322,7 +330,7 @@ zio_pop_transforms(zio_t *zio)
 
 /*
  * ==========================================================================
- * I/O transform callbacks for subblocks and decompression
+ * I/O transform callbacks for subblocks, decompression, and decryption
  * ==========================================================================
  */
 static void
@@ -348,6 +356,132 @@ zio_decompress(zio_t *zio, abd_t *data, uint64_t size)
 	}
 }
 
+static void
+zio_decrypt(zio_t *zio, abd_t *data, uint64_t size)
+{
+	int ret;
+	void *tmp;
+	blkptr_t *bp = zio->io_bp;
+	spa_t *spa = zio->io_spa;
+	uint64_t dsobj = zio->io_bookmark.zb_objset;
+	uint64_t lsize = BP_GET_LSIZE(bp);
+	dmu_object_type_t ot = BP_GET_TYPE(bp);
+	uint8_t salt[ZIO_DATA_SALT_LEN];
+	uint8_t iv[ZIO_DATA_IV_LEN];
+	uint8_t mac[ZIO_DATA_MAC_LEN];
+	boolean_t no_crypt = B_FALSE;
+
+	ASSERT(BP_USES_CRYPT(bp));
+	ASSERT3U(size, !=, 0);
+
+	if (zio->io_error != 0)
+		return;
+
+	/*
+	 * Verify the cksum of MACs stored in an indirect bp. It will always
+	 * be possible to verify this since it does not require an encryption
+	 * key.
+	 */
+	if (BP_HAS_INDIRECT_MAC_CKSUM(bp)) {
+		zio_crypt_decode_mac_bp(bp, mac);
+
+		if (BP_GET_COMPRESS(bp) != ZIO_COMPRESS_OFF) {
+			/*
+			 * We haven't decompressed the data yet, but
+			 * zio_crypt_do_indirect_mac_checksum() requires
+			 * decompressed data to be able to parse out the MACs
+			 * from the indirect block. We decompress it now and
+			 * throw away the result after we are finished.
+			 */
+			tmp = zio_buf_alloc(lsize);
+			ret = zio_decompress_data(BP_GET_COMPRESS(bp),
+			    zio->io_abd, tmp, zio->io_size, lsize);
+			if (ret != 0) {
+				ret = SET_ERROR(EIO);
+				goto error;
+			}
+			ret = zio_crypt_do_indirect_mac_checksum(B_FALSE,
+			    tmp, lsize, BP_SHOULD_BYTESWAP(bp), mac);
+			zio_buf_free(tmp, lsize);
+		} else {
+			ret = zio_crypt_do_indirect_mac_checksum_abd(B_FALSE,
+			    zio->io_abd, size, BP_SHOULD_BYTESWAP(bp), mac);
+		}
+		abd_copy(data, zio->io_abd, size);
+
+		if (ret != 0)
+			goto error;
+
+		return;
+	}
+
+	/*
+	 * If this is an authenticated block, just check the MAC. It would be
+	 * nice to separate this out into its own flag, but for the moment
+	 * enum zio_flag is out of bits.
+	 */
+	if (BP_IS_AUTHENTICATED(bp)) {
+		if (ot == DMU_OT_OBJSET) {
+			ret = spa_do_crypt_objset_mac_abd(B_FALSE, spa,
+			    dsobj, zio->io_abd, size, BP_SHOULD_BYTESWAP(bp));
+		} else {
+			zio_crypt_decode_mac_bp(bp, mac);
+			ret = spa_do_crypt_mac_abd(B_FALSE, spa, dsobj,
+			    zio->io_abd, size, mac);
+		}
+		abd_copy(data, zio->io_abd, size);
+
+		if (zio_injection_enabled && ot != DMU_OT_DNODE && ret == 0) {
+			ret = zio_handle_decrypt_injection(spa,
+			    &zio->io_bookmark, ot, ECKSUM);
+		}
+		if (ret != 0)
+			goto error;
+
+		return;
+	}
+
+	zio_crypt_decode_params_bp(bp, salt, iv);
+
+	if (ot == DMU_OT_INTENT_LOG) {
+		tmp = abd_borrow_buf_copy(zio->io_abd, sizeof (zil_chain_t));
+		zio_crypt_decode_mac_zil(tmp, mac);
+		abd_return_buf(zio->io_abd, tmp, sizeof (zil_chain_t));
+	} else {
+		zio_crypt_decode_mac_bp(bp, mac);
+	}
+
+	ret = spa_do_crypt_abd(B_FALSE, spa, &zio->io_bookmark, BP_GET_TYPE(bp),
+	    BP_GET_DEDUP(bp), BP_SHOULD_BYTESWAP(bp), salt, iv, mac, size, data,
+	    zio->io_abd, &no_crypt);
+	if (no_crypt)
+		abd_copy(data, zio->io_abd, size);
+
+	if (ret != 0)
+		goto error;
+
+	return;
+
+error:
+	/* assert that the key was found unless this was speculative */
+	ASSERT(ret != EACCES || (zio->io_flags & ZIO_FLAG_SPECULATIVE));
+
+	/*
+	 * If there was a decryption / authentication error return EIO as
+	 * the io_error. If this was not a speculative zio, create an ereport.
+	 */
+	if (ret == ECKSUM) {
+		zio->io_error = SET_ERROR(EIO);
+		if ((zio->io_flags & ZIO_FLAG_SPECULATIVE) == 0) {
+			spa_log_error(spa, &zio->io_bookmark);
+			zfs_ereport_post(FM_EREPORT_ZFS_AUTHENTICATION,
+			    spa, NULL, &zio->io_bookmark, zio, 0, 0);
+		}
+	} else {
+		zio->io_error = ret;
+	}
+}
+
 /*
  * ==========================================================================
  * I/O parent/child relationships and pipeline interlocks
@@ -565,7 +699,7 @@ zio_create(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp,
 	ASSERT(!bp || !(flags & ZIO_FLAG_CONFIG_WRITER));
 	ASSERT(vd || stage == ZIO_STAGE_OPEN);
 
-	IMPLY(lsize != psize, (flags & ZIO_FLAG_RAW) != 0);
+	IMPLY(lsize != psize, (flags & ZIO_FLAG_RAW_COMPRESS) != 0);
 
 	zio = kmem_cache_alloc(zio_cache, KM_SLEEP);
 	bzero(zio, sizeof (zio_t));
@@ -836,9 +970,12 @@ zio_write(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp,
 	 * Data can be NULL if we are going to call zio_write_override() to
 	 * provide the already-allocated BP.  But we may need the data to
 	 * verify a dedup hit (if requested).  In this case, don't try to
-	 * dedup (just take the already-allocated BP verbatim).
+	 * dedup (just take the already-allocated BP verbatim). Encrypted
+	 * dedup blocks need data as well so we also disable dedup in this
+	 * case.
 	 */
-	if (data == NULL && zio->io_prop.zp_dedup_verify) {
+	if (data == NULL &&
+	    (zio->io_prop.zp_dedup_verify || zio->io_prop.zp_encrypt)) {
 		zio->io_prop.zp_dedup = zio->io_prop.zp_dedup_verify = B_FALSE;
 	}
 
@@ -1189,23 +1326,30 @@ static int
 zio_read_bp_init(zio_t *zio)
 {
 	blkptr_t *bp = zio->io_bp;
+	uint64_t psize =
+	    BP_IS_EMBEDDED(bp) ? BPE_GET_PSIZE(bp) : BP_GET_PSIZE(bp);
 
 	ASSERT3P(zio->io_bp, ==, &zio->io_bp_copy);
 
 	if (BP_GET_COMPRESS(bp) != ZIO_COMPRESS_OFF &&
 	    zio->io_child_type == ZIO_CHILD_LOGICAL &&
-	    !(zio->io_flags & ZIO_FLAG_RAW)) {
-		uint64_t psize =
-		    BP_IS_EMBEDDED(bp) ? BPE_GET_PSIZE(bp) : BP_GET_PSIZE(bp);
+	    !(zio->io_flags & ZIO_FLAG_RAW_COMPRESS)) {
 		zio_push_transform(zio, abd_alloc_sametype(zio->io_abd, psize),
 		    psize, psize, zio_decompress);
 	}
 
-	if (BP_IS_EMBEDDED(bp) && BPE_GET_ETYPE(bp) == BP_EMBEDDED_TYPE_DATA) {
-		zio->io_pipeline = ZIO_INTERLOCK_PIPELINE;
+	if (((BP_IS_PROTECTED(bp) && !(zio->io_flags & ZIO_FLAG_RAW_ENCRYPT)) ||
+	    BP_HAS_INDIRECT_MAC_CKSUM(bp)) &&
+	    zio->io_child_type == ZIO_CHILD_LOGICAL) {
+		zio_push_transform(zio, abd_alloc_sametype(zio->io_abd, psize),
+		    psize, psize, zio_decrypt);
+	}
 
+	if (BP_IS_EMBEDDED(bp) && BPE_GET_ETYPE(bp) == BP_EMBEDDED_TYPE_DATA) {
 		int psize = BPE_GET_PSIZE(bp);
 		void *data = abd_borrow_buf(zio->io_abd, psize);
+
+		zio->io_pipeline = ZIO_INTERLOCK_PIPELINE;
 		decode_embedded_bp_compressed(bp, data);
 		abd_return_buf_copy(zio->io_abd, data, psize);
 	} else {
@@ -1266,7 +1410,8 @@ zio_write_bp_init(zio_t *zio)
 		ASSERT((zio_checksum_table[zp->zp_checksum].ci_flags &
 		    ZCHECKSUM_FLAG_DEDUP) || zp->zp_dedup_verify);
 
-		if (BP_GET_CHECKSUM(bp) == zp->zp_checksum) {
+		if (BP_GET_CHECKSUM(bp) == zp->zp_checksum &&
+		    !zp->zp_encrypt) {
 			BP_SET_DEDUP(bp, 1);
 			zio->io_pipeline |= ZIO_STAGE_DDT_WRITE;
 			return (ZIO_PIPELINE_CONTINUE);
@@ -1295,8 +1440,6 @@ zio_write_compress(zio_t *zio)
 	uint64_t psize = zio->io_size;
 	int pass = 1;
 
-	EQUIV(lsize != psize, (zio->io_flags & ZIO_FLAG_RAW) != 0);
-
 	/*
 	 * If our children haven't all reached the ready stage,
 	 * wait for them and then repeat this pipeline stage.
@@ -1347,13 +1490,15 @@ zio_write_compress(zio_t *zio)
 	}
 
 	/* If it's a compressed write that is not raw, compress the buffer. */
-	if (compress != ZIO_COMPRESS_OFF && psize == lsize) {
+	if (compress != ZIO_COMPRESS_OFF &&
+	    !(zio->io_flags & ZIO_FLAG_RAW_COMPRESS)) {
 		void *cbuf = zio_buf_alloc(lsize);
 		psize = zio_compress_data(compress, zio->io_abd, cbuf, lsize);
 		if (psize == 0 || psize == lsize) {
 			compress = ZIO_COMPRESS_OFF;
 			zio_buf_free(cbuf, lsize);
-		} else if (!zp->zp_dedup && psize <= BPE_PAYLOAD_SIZE &&
+		} else if (!zp->zp_dedup && !zp->zp_encrypt &&
+		    psize <= BPE_PAYLOAD_SIZE &&
 		    zp->zp_level == 0 && !DMU_OT_HAS_FILL(zp->zp_type) &&
 		    spa_feature_is_enabled(spa, SPA_FEATURE_EMBEDDED_DATA)) {
 			encode_embedded_bp_compressed(bp,
@@ -1400,6 +1545,20 @@ zio_write_compress(zio_t *zio)
 		zio->io_bp_override = NULL;
 		*bp = zio->io_bp_orig;
 		zio->io_pipeline = zio->io_orig_pipeline;
+
+	} else if ((zio->io_flags & ZIO_FLAG_RAW_ENCRYPT) != 0 &&
+	    zp->zp_type == DMU_OT_DNODE) {
+		/*
+		 * The DMU actually relies on the zio layer's compression
+		 * to free metadnode blocks that have had all contained
+		 * dnodes freed. As a result, even when doing a raw
+		 * receive, we must check whether the block can be compressed
+		 * to a hole.
+		 */
+		psize = zio_compress_data(ZIO_COMPRESS_EMPTY,
+		    zio->io_abd, NULL, lsize);
+		if (psize == 0)
+			compress = ZIO_COMPRESS_OFF;
 	} else {
 		ASSERT3U(psize, !=, 0);
 	}
@@ -1417,7 +1576,6 @@ zio_write_compress(zio_t *zio)
 	    pass >= zfs_sync_pass_rewrite) {
 		VERIFY3U(psize, !=, 0);
 		enum zio_stage gang_stages = zio->io_pipeline & ZIO_GANG_STAGES;
-
 		zio->io_pipeline = ZIO_REWRITE_PIPELINE | gang_stages;
 		zio->io_flags |= ZIO_FLAG_IO_REWRITE;
 	} else {
@@ -1447,6 +1605,8 @@ zio_write_compress(zio_t *zio)
 		if (zp->zp_dedup) {
 			ASSERT(zio->io_child_type == ZIO_CHILD_LOGICAL);
 			ASSERT(!(zio->io_flags & ZIO_FLAG_IO_REWRITE));
+			ASSERT(!zp->zp_encrypt ||
+			    DMU_OT_IS_ENCRYPTED(zp->zp_type));
 			zio->io_pipeline = ZIO_DDT_WRITE_PIPELINE;
 		}
 		if (zp->zp_nopwrite) {
@@ -1794,7 +1954,8 @@ zio_suspend(spa_t *spa, zio_t *zio, zio_suspend_reason_t reason)
 		    "failure and the failure mode property for this pool "
 		    "is set to panic.", spa_name(spa));
 
-	zfs_ereport_post(FM_EREPORT_ZFS_IO_FAILURE, spa, NULL, NULL, 0, 0);
+	zfs_ereport_post(FM_EREPORT_ZFS_IO_FAILURE, spa, NULL,
+	    NULL, NULL, 0, 0);
 
 	mutex_enter(&spa->spa_suspend_lock);
 
@@ -2231,6 +2392,13 @@ zio_write_gang_block(zio_t *pio)
 	int error;
 	boolean_t has_data = !(pio->io_flags & ZIO_FLAG_NODATA);
 
+	/*
+	 * encrypted blocks need DVA[2] free so encrypted gang headers can't
+	 * have a third copy.
+	 */
+	if (gio->io_prop.zp_encrypt && gbh_copies >= SPA_DVAS_PER_BP)
+		gbh_copies = SPA_DVAS_PER_BP - 1;
+
 	int flags = METASLAB_HINTBP_FAVOR | METASLAB_GANG_HEADER;
 	if (pio->io_flags & ZIO_FLAG_IO_ALLOCATING) {
 		ASSERT(pio->io_priority == ZIO_PRIORITY_ASYNC_WRITE);
@@ -2309,6 +2477,11 @@ zio_write_gang_block(zio_t *pio)
 		zp.zp_dedup = B_FALSE;
 		zp.zp_dedup_verify = B_FALSE;
 		zp.zp_nopwrite = B_FALSE;
+		zp.zp_encrypt = gio->io_prop.zp_encrypt;
+		zp.zp_byteorder = gio->io_prop.zp_byteorder;
+		bzero(zp.zp_salt, ZIO_DATA_SALT_LEN);
+		bzero(zp.zp_iv, ZIO_DATA_IV_LEN);
+		bzero(zp.zp_mac, ZIO_DATA_MAC_LEN);
 
 		zio_t *cio = zio_write(zio, spa, txg, &gbh->zg_blkptr[g],
 		    has_data ? abd_get_offset(pio->io_abd, pio->io_size -
@@ -2383,6 +2556,7 @@ zio_nop_write(zio_t *zio)
 	if (BP_IS_HOLE(bp_orig) ||
 	    !(zio_checksum_table[BP_GET_CHECKSUM(bp)].ci_flags &
 	    ZCHECKSUM_FLAG_NOPWRITE) ||
+	    BP_IS_ENCRYPTED(bp) || BP_IS_ENCRYPTED(bp_orig) ||
 	    BP_GET_CHECKSUM(bp) != BP_GET_CHECKSUM(bp_orig) ||
 	    BP_GET_COMPRESS(bp) != BP_GET_COMPRESS(bp_orig) ||
 	    BP_GET_DEDUP(bp) != BP_GET_DEDUP(bp_orig) ||
@@ -2521,7 +2695,7 @@ static boolean_t
 zio_ddt_collision(zio_t *zio, ddt_t *ddt, ddt_entry_t *dde)
 {
 	spa_t *spa = zio->io_spa;
-	boolean_t do_raw = (zio->io_flags & ZIO_FLAG_RAW);
+	boolean_t do_raw = !!(zio->io_flags & ZIO_FLAG_RAW);
 
 	/* We should never get a raw, override zio */
 	ASSERT(!(zio->io_bp_override && do_raw));
@@ -2531,11 +2705,21 @@ zio_ddt_collision(zio_t *zio, ddt_t *ddt, ddt_entry_t *dde)
 	 * because when zio->io_bp is an override bp, we will not have
 	 * pushed the I/O transforms.  That's an important optimization
 	 * because otherwise we'd compress/encrypt all dmu_sync() data twice.
+	 * However, we should never get a raw, override zio so in these
+	 * cases we can compare the io_data directly. This is useful because
+	 * it allows us to do dedup verification even if we don't have access
+	 * to the original data (for instance, if the encryption keys aren't
+	 * loaded).
 	 */
+
 	for (int p = DDT_PHYS_SINGLE; p <= DDT_PHYS_TRIPLE; p++) {
 		zio_t *lio = dde->dde_lead_zio[p];
 
-		if (lio != NULL) {
+		if (lio != NULL && do_raw) {
+			return (lio->io_size != zio->io_size ||
+			    abd_cmp(zio->io_abd, lio->io_abd,
+			    zio->io_size) != 0);
+		} else if (lio != NULL) {
 			return (lio->io_orig_size != zio->io_orig_size ||
 			    abd_cmp(zio->io_orig_abd, lio->io_orig_abd,
 			    zio->io_orig_size) != 0);
@@ -2545,7 +2729,36 @@ zio_ddt_collision(zio_t *zio, ddt_t *ddt, ddt_entry_t *dde)
 	for (int p = DDT_PHYS_SINGLE; p <= DDT_PHYS_TRIPLE; p++) {
 		ddt_phys_t *ddp = &dde->dde_phys[p];
 
-		if (ddp->ddp_phys_birth != 0) {
+		if (ddp->ddp_phys_birth != 0 && do_raw) {
+			blkptr_t blk = *zio->io_bp;
+			uint64_t psize;
+			abd_t *tmpabd;
+			int error;
+
+			ddt_bp_fill(ddp, &blk, ddp->ddp_phys_birth);
+			psize = BP_GET_PSIZE(&blk);
+
+			if (psize != zio->io_size)
+				return (B_TRUE);
+
+			ddt_exit(ddt);
+
+			tmpabd = abd_alloc_for_io(psize, B_TRUE);
+
+			error = zio_wait(zio_read(NULL, spa, &blk, tmpabd,
+			    psize, NULL, NULL, ZIO_PRIORITY_SYNC_READ,
+			    ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE |
+			    ZIO_FLAG_RAW, &zio->io_bookmark));
+
+			if (error == 0) {
+				if (abd_cmp(tmpabd, zio->io_abd, psize) != 0)
+					error = SET_ERROR(ENOENT);
+			}
+
+			abd_free(tmpabd);
+			ddt_enter(ddt);
+			return (error != 0);
+		} else if (ddp->ddp_phys_birth != 0) {
 			arc_buf_t *abuf = NULL;
 			arc_flags_t aflags = ARC_FLAG_WAIT;
 			int zio_flags = ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE;
@@ -2554,6 +2767,9 @@ zio_ddt_collision(zio_t *zio, ddt_t *ddt, ddt_entry_t *dde)
 
 			ddt_bp_fill(ddp, &blk, ddp->ddp_phys_birth);
 
+			if (BP_GET_LSIZE(&blk) != zio->io_orig_size)
+				return (B_TRUE);
+
 			ddt_exit(ddt);
 
 			/*
@@ -2578,10 +2794,9 @@ zio_ddt_collision(zio_t *zio, ddt_t *ddt, ddt_entry_t *dde)
 			    zio_flags, &aflags, &zio->io_bookmark);
 
 			if (error == 0) {
-				if (arc_buf_size(abuf) != zio->io_orig_size ||
-				    abd_cmp_buf(zio->io_orig_abd, abuf->b_data,
+				if (abd_cmp_buf(zio->io_orig_abd, abuf->b_data,
 				    zio->io_orig_size) != 0)
-					error = SET_ERROR(EEXIST);
+					error = SET_ERROR(ENOENT);
 				arc_buf_destroy(abuf, &abuf);
 			}
 
@@ -3048,7 +3263,7 @@ zio_dva_unallocate(zio_t *zio, zio_gang_node_t *gn, blkptr_t *bp)
  * Try to allocate an intent log block.  Return 0 on success, errno on failure.
  */
 int
-zio_alloc_zil(spa_t *spa, uint64_t objset, uint64_t txg, blkptr_t *new_bp,
+zio_alloc_zil(spa_t *spa, objset_t *os, uint64_t txg, blkptr_t *new_bp,
     blkptr_t *old_bp, uint64_t size, boolean_t *slog)
 {
 	int error = 1;
@@ -3074,14 +3289,15 @@ zio_alloc_zil(spa_t *spa, uint64_t objset, uint64_t txg, blkptr_t *new_bp,
 	 */
 	error = metaslab_alloc(spa, spa_log_class(spa), size, new_bp, 1,
 	    txg, old_bp, METASLAB_HINTBP_AVOID, &io_alloc_list, NULL,
-	    cityhash4(0, 0, 0, objset) % spa->spa_alloc_count);
+	    cityhash4(0, 0, 0,
+	    os->os_dsl_dataset->ds_object) % spa->spa_alloc_count);
 	if (error == 0) {
 		*slog = TRUE;
 	} else {
 		error = metaslab_alloc(spa, spa_normal_class(spa), size,
 		    new_bp, 1, txg, old_bp, METASLAB_HINTBP_AVOID,
-		    &io_alloc_list, NULL, cityhash4(0, 0, 0, objset) %
-		    spa->spa_alloc_count);
+		    &io_alloc_list, NULL, cityhash4(0, 0, 0,
+		    os->os_dsl_dataset->ds_object) % spa->spa_alloc_count);
 		if (error == 0)
 			*slog = FALSE;
 	}
@@ -3098,6 +3314,23 @@ zio_alloc_zil(spa_t *spa, uint64_t objset, uint64_t txg, blkptr_t *new_bp,
 		BP_SET_LEVEL(new_bp, 0);
 		BP_SET_DEDUP(new_bp, 0);
 		BP_SET_BYTEORDER(new_bp, ZFS_HOST_BYTEORDER);
+
+		/*
+		 * encrypted blocks will require an IV and salt. We generate
+		 * these now since we will not be rewriting the bp at
+		 * rewrite time.
+		 */
+		if (os->os_encrypted) {
+			uint8_t iv[ZIO_DATA_IV_LEN];
+			uint8_t salt[ZIO_DATA_SALT_LEN];
+
+			BP_SET_CRYPT(new_bp, B_TRUE);
+			VERIFY0(spa_crypt_get_salt(spa,
+			    dmu_objset_id(os), salt));
+			VERIFY0(zio_crypt_generate_iv(iv));
+
+			zio_crypt_encode_params_bp(new_bp, salt, iv);
+		}
 	} else {
 		zfs_dbgmsg("%s: zil block allocation failure: "
 		    "size %llu, error %d", spa_name(spa), size, error);
@@ -3332,7 +3565,7 @@ zio_change_priority(zio_t *pio, zio_priority_t priority)
  */
 static void
 zio_vsd_default_cksum_finish(zio_cksum_report_t *zcr,
-    const void *good_buf)
+    const abd_t *good_buf)
 {
 	/* no processing needed */
 	zfs_ereport_finish_checksum(zcr, good_buf, zcr->zcr_cbdata, B_FALSE);
@@ -3342,14 +3575,14 @@ zio_vsd_default_cksum_finish(zio_cksum_report_t *zcr,
 void
 zio_vsd_default_cksum_report(zio_t *zio, zio_cksum_report_t *zcr, void *ignored)
 {
-	void *buf = zio_buf_alloc(zio->io_size);
+	void *abd = abd_alloc_sametype(zio->io_abd, zio->io_size);
 
-	abd_copy_to_buf(buf, zio->io_abd, zio->io_size);
+	abd_copy(abd, zio->io_abd, zio->io_size);
 
 	zcr->zcr_cbinfo = zio->io_size;
-	zcr->zcr_cbdata = buf;
+	zcr->zcr_cbdata = abd;
 	zcr->zcr_finish = zio_vsd_default_cksum_finish;
-	zcr->zcr_free = zio_buf_free;
+	zcr->zcr_free = zio_abd_free;
 }
 
 static int
@@ -3460,6 +3693,164 @@ zio_vdev_io_bypass(zio_t *zio)
 
 /*
  * ==========================================================================
+ * Encrypt and store encryption parameters
+ * ==========================================================================
+ */
+
+
+/*
+ * This function is used for ZIO_STAGE_ENCRYPT. It is responsible for
+ * managing the storage of encryption parameters and passing them to the
+ * lower-level encryption functions.
+ */
+static int
+zio_encrypt(zio_t *zio)
+{
+	zio_prop_t *zp = &zio->io_prop;
+	spa_t *spa = zio->io_spa;
+	blkptr_t *bp = zio->io_bp;
+	uint64_t psize = BP_GET_PSIZE(bp);
+	uint64_t dsobj = zio->io_bookmark.zb_objset;
+	dmu_object_type_t ot = BP_GET_TYPE(bp);
+	void *enc_buf = NULL;
+	abd_t *eabd = NULL;
+	uint8_t salt[ZIO_DATA_SALT_LEN];
+	uint8_t iv[ZIO_DATA_IV_LEN];
+	uint8_t mac[ZIO_DATA_MAC_LEN];
+	boolean_t no_crypt = B_FALSE;
+
+	/* the root zio already encrypted the data */
+	if (zio->io_child_type == ZIO_CHILD_GANG)
+		return (ZIO_PIPELINE_CONTINUE);
+
+	/* only ZIL blocks are re-encrypted on rewrite */
+	if (!IO_IS_ALLOCATING(zio) && ot != DMU_OT_INTENT_LOG)
+		return (ZIO_PIPELINE_CONTINUE);
+
+	if (!(zp->zp_encrypt || BP_IS_ENCRYPTED(bp))) {
+		BP_SET_CRYPT(bp, B_FALSE);
+		return (ZIO_PIPELINE_CONTINUE);
+	}
+
+	/* if we are doing raw encryption set the provided encryption params */
+	if (zio->io_flags & ZIO_FLAG_RAW_ENCRYPT) {
+		ASSERT0(BP_GET_LEVEL(bp));
+		BP_SET_CRYPT(bp, B_TRUE);
+		BP_SET_BYTEORDER(bp, zp->zp_byteorder);
+		if (ot != DMU_OT_OBJSET)
+			zio_crypt_encode_mac_bp(bp, zp->zp_mac);
+
+		/* dnode blocks must be written out in the provided byteorder */
+		if (zp->zp_byteorder != ZFS_HOST_BYTEORDER &&
+		    ot == DMU_OT_DNODE) {
+			void *bswap_buf = zio_buf_alloc(psize);
+			abd_t *babd = abd_get_from_buf(bswap_buf, psize);
+
+			ASSERT3U(BP_GET_COMPRESS(bp), ==, ZIO_COMPRESS_OFF);
+			abd_copy_to_buf(bswap_buf, zio->io_abd, psize);
+			dmu_ot_byteswap[DMU_OT_BYTESWAP(ot)].ob_func(bswap_buf,
+			    psize);
+
+			abd_take_ownership_of_buf(babd, B_TRUE);
+			zio_push_transform(zio, babd, psize, psize, NULL);
+		}
+
+		if (DMU_OT_IS_ENCRYPTED(ot))
+			zio_crypt_encode_params_bp(bp, zp->zp_salt, zp->zp_iv);
+		return (ZIO_PIPELINE_CONTINUE);
+	}
+
+	/* indirect blocks only maintain a cksum of the lower level MACs */
+	if (BP_GET_LEVEL(bp) > 0) {
+		BP_SET_CRYPT(bp, B_TRUE);
+		VERIFY0(zio_crypt_do_indirect_mac_checksum_abd(B_TRUE,
+		    zio->io_orig_abd, BP_GET_LSIZE(bp), BP_SHOULD_BYTESWAP(bp),
+		    mac));
+		zio_crypt_encode_mac_bp(bp, mac);
+		return (ZIO_PIPELINE_CONTINUE);
+	}
+
+	/*
+	 * Objset blocks are a special case since they have 2 256-bit MACs
+	 * embedded within them.
+	 */
+	if (ot == DMU_OT_OBJSET) {
+		ASSERT0(DMU_OT_IS_ENCRYPTED(ot));
+		ASSERT3U(BP_GET_COMPRESS(bp), ==, ZIO_COMPRESS_OFF);
+		BP_SET_CRYPT(bp, B_TRUE);
+		VERIFY0(spa_do_crypt_objset_mac_abd(B_TRUE, spa, dsobj,
+		    zio->io_abd, psize, BP_SHOULD_BYTESWAP(bp)));
+		return (ZIO_PIPELINE_CONTINUE);
+	}
+
+	/* unencrypted object types are only authenticated with a MAC */
+	if (!DMU_OT_IS_ENCRYPTED(ot)) {
+		BP_SET_CRYPT(bp, B_TRUE);
+		VERIFY0(spa_do_crypt_mac_abd(B_TRUE, spa, dsobj,
+		    zio->io_abd, psize, mac));
+		zio_crypt_encode_mac_bp(bp, mac);
+		return (ZIO_PIPELINE_CONTINUE);
+	}
+
+	/*
+	 * Later passes of sync-to-convergence may decide to rewrite data
+	 * in place to avoid more disk reallocations. This presents a problem
+	 * for encryption because this consitutes rewriting the new data with
+	 * the same encryption key and IV. However, this only applies to blocks
+	 * in the MOS (particularly the spacemaps) and we do not encrypt the
+	 * MOS. We assert that the zio is allocating or an intent log write
+	 * to enforce this.
+	 */
+	ASSERT(IO_IS_ALLOCATING(zio) || ot == DMU_OT_INTENT_LOG);
+	ASSERT(BP_GET_LEVEL(bp) == 0 || ot == DMU_OT_INTENT_LOG);
+	ASSERT(spa_feature_is_active(spa, SPA_FEATURE_ENCRYPTION));
+	ASSERT3U(psize, !=, 0);
+
+	enc_buf = zio_buf_alloc(psize);
+	eabd = abd_get_from_buf(enc_buf, psize);
+	abd_take_ownership_of_buf(eabd, B_TRUE);
+
+	/*
+	 * For an explanation of what encryption parameters are stored
+	 * where, see the block comment in zio_crypt.c.
+	 */
+	if (ot == DMU_OT_INTENT_LOG) {
+		zio_crypt_decode_params_bp(bp, salt, iv);
+	} else {
+		BP_SET_CRYPT(bp, B_TRUE);
+	}
+
+	/* Perform the encryption. This should not fail */
+	VERIFY0(spa_do_crypt_abd(B_TRUE, spa, &zio->io_bookmark,
+	    BP_GET_TYPE(bp), BP_GET_DEDUP(bp), BP_SHOULD_BYTESWAP(bp),
+	    salt, iv, mac, psize, zio->io_abd, eabd, &no_crypt));
+
+	/* encode encryption metadata into the bp */
+	if (ot == DMU_OT_INTENT_LOG) {
+		/*
+		 * ZIL blocks store the MAC in the embedded checksum, so the
+		 * transform must always be applied.
+		 */
+		zio_crypt_encode_mac_zil(enc_buf, mac);
+		zio_push_transform(zio, eabd, psize, psize, NULL);
+	} else {
+		BP_SET_CRYPT(bp, B_TRUE);
+		zio_crypt_encode_params_bp(bp, salt, iv);
+		zio_crypt_encode_mac_bp(bp, mac);
+
+		if (no_crypt) {
+			ASSERT3U(ot, ==, DMU_OT_DNODE);
+			abd_free(eabd);
+		} else {
+			zio_push_transform(zio, eabd, psize, psize, NULL);
+		}
+	}
+
+	return (ZIO_PIPELINE_CONTINUE);
+}
+
+/*
+ * ==========================================================================
  * Generate and verify checksums
  * ==========================================================================
  */
@@ -3519,8 +3910,8 @@ zio_checksum_verify(zio_t *zio)
 		if (error == ECKSUM &&
 		    !(zio->io_flags & ZIO_FLAG_SPECULATIVE)) {
 			zfs_ereport_start_checksum(zio->io_spa,
-			    zio->io_vd, zio, zio->io_offset,
-			    zio->io_size, NULL, &info);
+			    zio->io_vd, &zio->io_bookmark, zio,
+			    zio->io_offset, zio->io_size, NULL, &info);
 		}
 	}
 
@@ -3765,7 +4156,6 @@ zio_done(zio_t *zio)
 		if (zio->io_type == ZIO_TYPE_WRITE && !BP_IS_HOLE(bp) &&
 		    zio->io_bp_override == NULL &&
 		    !(zio->io_flags & ZIO_FLAG_IO_REPAIR)) {
-			ASSERT(!BP_SHOULD_BYTESWAP(bp));
 			ASSERT3U(zio->io_prop.zp_copies, <=, BP_GET_NDVAS(bp));
 			ASSERT(BP_COUNT_GANG(bp) == 0 ||
 			    (BP_COUNT_GANG(bp) == BP_GET_NDVAS(bp)));
@@ -3790,26 +4180,19 @@ zio_done(zio_t *zio)
 			zio_cksum_report_t *zcr = zio->io_cksum_report;
 			uint64_t align = zcr->zcr_align;
 			uint64_t asize = P2ROUNDUP(psize, align);
-			char *abuf = NULL;
 			abd_t *adata = zio->io_abd;
 
 			if (asize != psize) {
-				adata = abd_alloc_linear(asize, B_TRUE);
+				adata = abd_alloc(asize, B_TRUE);
 				abd_copy(adata, zio->io_abd, psize);
 				abd_zero_off(adata, psize, asize - psize);
 			}
 
-			if (adata != NULL)
-				abuf = abd_borrow_buf_copy(adata, asize);
-
 			zio->io_cksum_report = zcr->zcr_next;
 			zcr->zcr_next = NULL;
-			zcr->zcr_finish(zcr, abuf);
+			zcr->zcr_finish(zcr, adata);
 			zfs_ereport_free_checksum(zcr);
 
-			if (adata != NULL)
-				abd_return_buf(adata, abuf, asize);
-
 			if (asize != psize)
 				abd_free(adata);
 		}
@@ -3827,7 +4210,8 @@ zio_done(zio_t *zio)
 		 * device is currently unavailable.
 		 */
 		if (zio->io_error != ECKSUM && vd != NULL && !vdev_is_dead(vd))
-			zfs_ereport_post(FM_EREPORT_ZFS_IO, spa, vd, zio, 0, 0);
+			zfs_ereport_post(FM_EREPORT_ZFS_IO, spa, vd,
+			    &zio->io_bookmark, zio, 0, 0);
 
 		if ((zio->io_error == EIO || !(zio->io_flags &
 		    (ZIO_FLAG_SPECULATIVE | ZIO_FLAG_DONT_PROPAGATE))) &&
@@ -3836,9 +4220,9 @@ zio_done(zio_t *zio)
 			 * For logical I/O requests, tell the SPA to log the
 			 * error and generate a logical data ereport.
 			 */
-			spa_log_error(spa, zio);
-			zfs_ereport_post(FM_EREPORT_ZFS_DATA, spa, NULL, zio,
-			    0, 0);
+			spa_log_error(spa, &zio->io_bookmark);
+			zfs_ereport_post(FM_EREPORT_ZFS_DATA, spa, NULL,
+			    &zio->io_bookmark, zio, 0, 0);
 		}
 	}
 
@@ -4029,6 +4413,7 @@ static zio_pipe_stage_t *zio_pipeline[] = {
 	zio_free_bp_init,
 	zio_issue_async,
 	zio_write_compress,
+	zio_encrypt,
 	zio_checksum_generate,
 	zio_nop_write,
 	zio_ddt_read_start,
diff --git a/usr/src/uts/common/fs/zfs/zio_checksum.c b/usr/src/uts/common/fs/zfs/zio_checksum.c
index e1c98b0b99..d5aa9303b8 100644
--- a/usr/src/uts/common/fs/zfs/zio_checksum.c
+++ b/usr/src/uts/common/fs/zfs/zio_checksum.c
@@ -242,9 +242,9 @@ zio_checksum_dedup_select(spa_t *spa, enum zio_checksum child,
  * a tuple which is guaranteed to be unique for the life of the pool.
  */
 static void
-zio_checksum_gang_verifier(zio_cksum_t *zcp, blkptr_t *bp)
+zio_checksum_gang_verifier(zio_cksum_t *zcp, const blkptr_t *bp)
 {
-	dva_t *dva = BP_IDENTITY(bp);
+	const dva_t *dva = BP_IDENTITY(bp);
 	uint64_t txg = BP_PHYSICAL_BIRTH(bp);
 
 	ASSERT(BP_IS_GANG(bp));
@@ -287,6 +287,25 @@ zio_checksum_template_init(enum zio_checksum checksum, spa_t *spa)
 	mutex_exit(&spa->spa_cksum_tmpls_lock);
 }
 
+/* convenience function to update a checksum to accomodate an encryption MAC */
+static void
+zio_checksum_handle_crypt(zio_cksum_t *cksum, zio_cksum_t *saved, boolean_t xor)
+{
+	/*
+	 * Weak checksums do not have their entropy spread evenly
+	 * across the bits of the checksum. Therefore, when truncating
+	 * a weak checksum we XOR the first 2 words with the last 2 so
+	 * that we don't "lose" any entropy unnecessarily.
+	 */
+	if (xor) {
+		cksum->zc_word[0] ^= cksum->zc_word[2];
+		cksum->zc_word[1] ^= cksum->zc_word[3];
+	}
+
+	cksum->zc_word[2] = saved->zc_word[2];
+	cksum->zc_word[3] = saved->zc_word[3];
+}
+
 /*
  * Generate the checksum.
  */
@@ -294,11 +313,13 @@ void
 zio_checksum_compute(zio_t *zio, enum zio_checksum checksum,
     abd_t *abd, uint64_t size)
 {
+	static const uint64_t zec_magic = ZEC_MAGIC;
 	blkptr_t *bp = zio->io_bp;
 	uint64_t offset = zio->io_offset;
 	zio_checksum_info_t *ci = &zio_checksum_table[checksum];
-	zio_cksum_t cksum;
+	zio_cksum_t cksum, saved;
 	spa_t *spa = zio->io_spa;
+	boolean_t insecure = (ci->ci_flags & ZCHECKSUM_FLAG_DEDUP) == 0;
 
 	ASSERT((uint_t)checksum < ZIO_CHECKSUM_FUNCTIONS);
 	ASSERT(ci->ci_func[0] != NULL);
@@ -306,40 +327,68 @@ zio_checksum_compute(zio_t *zio, enum zio_checksum checksum,
 	zio_checksum_template_init(checksum, spa);
 
 	if (ci->ci_flags & ZCHECKSUM_FLAG_EMBEDDED) {
-		zio_eck_t *eck;
-		void *data = abd_to_buf(abd);
+		zio_eck_t eck;
+		size_t eck_offset;
+
+		bzero(&saved, sizeof (zio_cksum_t));
 
 		if (checksum == ZIO_CHECKSUM_ZILOG2) {
-			zil_chain_t *zilc = data;
+			zil_chain_t zilc;
+			abd_copy_to_buf(&zilc, abd, sizeof (zil_chain_t));
 
-			size = P2ROUNDUP_TYPED(zilc->zc_nused, ZIL_MIN_BLKSZ,
+			size = P2ROUNDUP_TYPED(zilc.zc_nused, ZIL_MIN_BLKSZ,
 			    uint64_t);
-			eck = &zilc->zc_eck;
+			eck = zilc.zc_eck;
+			eck_offset = offsetof(zil_chain_t, zc_eck);
 		} else {
-			eck = (zio_eck_t *)((char *)data + size) - 1;
+			eck_offset = size - sizeof (zio_eck_t);
+			abd_copy_to_buf_off(&eck, abd, eck_offset,
+			    sizeof (zio_eck_t));
 		}
-		if (checksum == ZIO_CHECKSUM_GANG_HEADER)
-			zio_checksum_gang_verifier(&eck->zec_cksum, bp);
-		else if (checksum == ZIO_CHECKSUM_LABEL)
-			zio_checksum_label_verifier(&eck->zec_cksum, offset);
-		else
-			bp->blk_cksum = eck->zec_cksum;
-		eck->zec_magic = ZEC_MAGIC;
+
+		if (checksum == ZIO_CHECKSUM_GANG_HEADER) {
+			zio_checksum_gang_verifier(&eck.zec_cksum, bp);
+		} else if (checksum == ZIO_CHECKSUM_LABEL) {
+			zio_checksum_label_verifier(&eck.zec_cksum, offset);
+		} else {
+			saved = eck.zec_cksum;
+			eck.zec_cksum = bp->blk_cksum;
+		}
+
+		abd_copy_from_buf_off(abd, &zec_magic,
+		    eck_offset + offsetof(zio_eck_t, zec_magic),
+		    sizeof (zec_magic));
+		abd_copy_from_buf_off(abd, &eck.zec_cksum,
+		    eck_offset + offsetof(zio_eck_t, zec_cksum),
+		    sizeof (zio_cksum_t));
+
 		ci->ci_func[0](abd, size, spa->spa_cksum_tmpls[checksum],
 		    &cksum);
-		eck->zec_cksum = cksum;
+		if (bp != NULL && BP_USES_CRYPT(bp) &&
+		    BP_GET_TYPE(bp) != DMU_OT_OBJSET)
+			zio_checksum_handle_crypt(&cksum, &saved, insecure);
+
+		abd_copy_from_buf_off(abd, &cksum,
+		    eck_offset + offsetof(zio_eck_t, zec_cksum),
+		    sizeof (zio_cksum_t));
 	} else {
+		saved = bp->blk_cksum;
 		ci->ci_func[0](abd, size, spa->spa_cksum_tmpls[checksum],
-		    &bp->blk_cksum);
+		    &cksum);
+		if (BP_USES_CRYPT(bp) && BP_GET_TYPE(bp) != DMU_OT_OBJSET)
+			zio_checksum_handle_crypt(&cksum, &saved, insecure);
+		bp->blk_cksum = cksum;
 	}
 }
 
 int
-zio_checksum_error_impl(spa_t *spa, blkptr_t *bp, enum zio_checksum checksum,
-    abd_t *abd, uint64_t size, uint64_t offset, zio_bad_cksum_t *info)
+zio_checksum_error_impl(spa_t *spa, const blkptr_t *bp,
+    enum zio_checksum checksum, abd_t *abd, uint64_t size,
+    uint64_t offset, zio_bad_cksum_t *info)
 {
 	zio_checksum_info_t *ci = &zio_checksum_table[checksum];
 	zio_cksum_t actual_cksum, expected_cksum;
+	zio_eck_t eck;
 	int byteswap;
 
 	if (checksum >= ZIO_CHECKSUM_FUNCTIONS || ci->ci_func[0] == NULL)
@@ -348,33 +397,37 @@ zio_checksum_error_impl(spa_t *spa, blkptr_t *bp, enum zio_checksum checksum,
 	zio_checksum_template_init(checksum, spa);
 
 	if (ci->ci_flags & ZCHECKSUM_FLAG_EMBEDDED) {
-		zio_eck_t *eck;
 		zio_cksum_t verifier;
-		uint64_t data_size = size;
-		void *data = abd_borrow_buf_copy(abd, data_size);
+		size_t eck_offset;
 
 		if (checksum == ZIO_CHECKSUM_ZILOG2) {
-			zil_chain_t *zilc = data;
+			zil_chain_t zilc;
 			uint64_t nused;
 
-			eck = &zilc->zc_eck;
-			if (eck->zec_magic == ZEC_MAGIC) {
-				nused = zilc->zc_nused;
-			} else if (eck->zec_magic == BSWAP_64(ZEC_MAGIC)) {
-				nused = BSWAP_64(zilc->zc_nused);
+			abd_copy_to_buf(&zilc, abd, sizeof (zil_chain_t));
+
+			eck = zilc.zc_eck;
+			eck_offset = offsetof(zil_chain_t, zc_eck) +
+			    offsetof(zio_eck_t, zec_cksum);
+
+			if (eck.zec_magic == ZEC_MAGIC) {
+				nused = zilc.zc_nused;
+			} else if (eck.zec_magic == BSWAP_64(ZEC_MAGIC)) {
+				nused = BSWAP_64(zilc.zc_nused);
 			} else {
-				abd_return_buf(abd, data, data_size);
 				return (SET_ERROR(ECKSUM));
 			}
 
-			if (nused > data_size) {
-				abd_return_buf(abd, data, data_size);
+			if (nused > size) {
 				return (SET_ERROR(ECKSUM));
 			}
 
 			size = P2ROUNDUP_TYPED(nused, ZIL_MIN_BLKSZ, uint64_t);
 		} else {
-			eck = (zio_eck_t *)((char *)data + data_size) - 1;
+			eck_offset = size - sizeof (zio_eck_t);
+			abd_copy_to_buf_off(&eck, abd, eck_offset,
+			    sizeof (zio_eck_t));
+			eck_offset += offsetof(zio_eck_t, zec_cksum);
 		}
 
 		if (checksum == ZIO_CHECKSUM_GANG_HEADER)
@@ -384,20 +437,21 @@ zio_checksum_error_impl(spa_t *spa, blkptr_t *bp, enum zio_checksum checksum,
 		else
 			verifier = bp->blk_cksum;
 
-		byteswap = (eck->zec_magic == BSWAP_64(ZEC_MAGIC));
+		byteswap = (eck.zec_magic == BSWAP_64(ZEC_MAGIC));
 
 		if (byteswap)
 			byteswap_uint64_array(&verifier, sizeof (zio_cksum_t));
 
-		size_t eck_offset = (size_t)(&eck->zec_cksum) - (size_t)data;
-		expected_cksum = eck->zec_cksum;
-		eck->zec_cksum = verifier;
-		abd_return_buf_copy(abd, data, data_size);
+		expected_cksum = eck.zec_cksum;
+
+		abd_copy_from_buf_off(abd, &verifier, eck_offset,
+		    sizeof (zio_cksum_t));
 
 		ci->ci_func[byteswap](abd, size,
 		    spa->spa_cksum_tmpls[checksum], &actual_cksum);
-		abd_copy_from_buf_off(abd, &expected_cksum,
-		    eck_offset, sizeof (zio_cksum_t));
+
+		abd_copy_from_buf_off(abd, &expected_cksum, eck_offset,
+		    sizeof (zio_cksum_t));
 
 		if (byteswap) {
 			byteswap_uint64_array(&expected_cksum,
@@ -410,6 +464,26 @@ zio_checksum_error_impl(spa_t *spa, blkptr_t *bp, enum zio_checksum checksum,
 		    spa->spa_cksum_tmpls[checksum], &actual_cksum);
 	}
 
+	/*
+	 * MAC checksums are a special case since half of this checksum will
+	 * actually be the encryption MAC. This will be verified by the
+	 * decryption process, so we just check the truncated checksum now.
+	 * Objset blocks use embedded MACs so we don't truncate the checksum
+	 * for them.
+	 */
+	if (bp != NULL && BP_USES_CRYPT(bp) &&
+	    BP_GET_TYPE(bp) != DMU_OT_OBJSET) {
+		if (!(ci->ci_flags & ZCHECKSUM_FLAG_DEDUP)) {
+			actual_cksum.zc_word[0] ^= actual_cksum.zc_word[2];
+			actual_cksum.zc_word[1] ^= actual_cksum.zc_word[3];
+		}
+
+		actual_cksum.zc_word[2] = 0;
+		actual_cksum.zc_word[3] = 0;
+		expected_cksum.zc_word[2] = 0;
+		expected_cksum.zc_word[3] = 0;
+	}
+
 	if (info != NULL) {
 		info->zbc_expected = expected_cksum;
 		info->zbc_actual = actual_cksum;
@@ -418,7 +492,6 @@ zio_checksum_error_impl(spa_t *spa, blkptr_t *bp, enum zio_checksum checksum,
 		info->zbc_injected = 0;
 		info->zbc_has_cksum = 1;
 	}
-
 	if (!ZIO_CHECKSUM_EQUAL(actual_cksum, expected_cksum))
 		return (SET_ERROR(ECKSUM));
 
diff --git a/usr/src/uts/common/fs/zfs/zio_compress.c b/usr/src/uts/common/fs/zfs/zio_compress.c
index 9882806a7d..4ae2581e3b 100644
--- a/usr/src/uts/common/fs/zfs/zio_compress.c
+++ b/usr/src/uts/common/fs/zfs/zio_compress.c
@@ -144,20 +144,31 @@ zio_decompress_data_buf(enum zio_compress c, void *src, void *dst,
 	return (ci->ci_decompress(src, dst, s_len, d_len, ci->ci_level));
 }
 
+void *zio_decompress_failed_buf;
+
 int
 zio_decompress_data(enum zio_compress c, abd_t *src, void *dst,
     size_t s_len, size_t d_len)
 {
 	void *tmp = abd_borrow_buf_copy(src, s_len);
 	int ret = zio_decompress_data_buf(c, tmp, dst, s_len, d_len);
-	abd_return_buf(src, tmp, s_len);
 
 	/*
-	 * Decompression shouldn't fail, because we've already verifyied
+	 * Decompression shouldn't fail, because we've already verified
 	 * the checksum.  However, for extra protection (e.g. against bitflips
 	 * in non-ECC RAM), we handle this error (and test it).
 	 */
-	ASSERT0(ret);
+	if (ret != 0) {
+		zio_decompress_failed_buf = kmem_alloc(s_len, KM_SLEEP);
+		bcopy(tmp, zio_decompress_failed_buf, s_len);
+		panic("decompression failed "
+		    "err=%u c=%u buf=%p s_len=%u d_len=%u",
+		    ret, (int)c, zio_decompress_failed_buf,
+		    (int)s_len, (int)d_len);
+	}
+
+	abd_return_buf(src, tmp, s_len);
+
 	if (zio_decompress_fail_fraction != 0 &&
 	    spa_get_random(zio_decompress_fail_fraction) == 0)
 		ret = SET_ERROR(EINVAL);
diff --git a/usr/src/uts/common/fs/zfs/zio_crypt.c b/usr/src/uts/common/fs/zfs/zio_crypt.c
new file mode 100644
index 0000000000..1d6b8286e3
--- /dev/null
+++ b/usr/src/uts/common/fs/zfs/zio_crypt.c
@@ -0,0 +1,2009 @@
+/*
+ * CDDL HEADER START
+ *
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2017, Datto, Inc. All rights reserved.
+ */
+
+#include <sys/zio_crypt.h>
+#include <sys/dmu.h>
+#include <sys/dmu_objset.h>
+#include <sys/dnode.h>
+#include <sys/fs/zfs.h>
+#include <sys/zio.h>
+#include <sys/zil.h>
+#include <sys/sha2.h>
+#include <sys/hkdf.h>
+
+/*
+ * This file is responsible for handling all of the details of generating
+ * encryption parameters and performing encryption and authentication.
+ *
+ * BLOCK ENCRYPTION PARAMETERS:
+ * Encryption /Authentication Algorithm Suite (crypt):
+ * The encryption algorithm, mode, and key length we are going to use. We
+ * currently support AES in either GCM or CCM modes with 128, 192, and 256 bit
+ * keys. All authentication is currently done with SHA512-HMAC.
+ *
+ * Plaintext:
+ * The unencrypted data that we want to encrypt.
+ *
+ * Initialization Vector (IV):
+ * An initialization vector for the encryption algorithms. This is used to
+ * "tweak" the encryption algorithms so that two blocks of the same data are
+ * encrypted into different ciphertext outputs, thus obfuscating block patterns.
+ * The supported encryption modes (AES-GCM and AES-CCM) require that an IV is
+ * never reused with the same encryption key. This value is stored unencrypted
+ * and must simply be provided to the decryption function. We use a 96 bit IV
+ * (as recommended by NIST) for all block encryption. For non-dedup blocks we
+ * derive the IV randomly. The first 64 bits of the IV are stored in the second
+ * word of DVA[2] and the remaining 32 bits are stored in the upper 32 bits of
+ * blk_fill. This is safe because encrypted blocks can't use the upper 32 bits
+ * of blk_fill. We only encrypt level 0 blocks, which normally have a fill count
+ * of 1. The only exception is for DMU_OT_DNODE objects, where the fill count of
+ * level 0 blocks is the number of allocated dnodes in that block. The on-disk
+ * format supports at most 2^15 slots per L0 dnode block, because the maximum
+ * block size is 16MB (2^24). In either case, for level 0 blocks this number
+ * will still be smaller than UINT32_MAX so it is safe to store the IV in the
+ * top 32 bits of blk_fill, while leaving the bottom 32 bits of the fill count
+ * for the dnode code.
+ *
+ * Master key:
+ * This is the most important secret data of an encrypted dataset. It is used
+ * along with the salt to generate that actual encryption keys via HKDF. We
+ * do not use the master key to directly encrypt any data because there are
+ * theoretical limits on how much data can actually be safely encrypted with
+ * any encryption mode. The master key is stored encrypted on disk with the
+ * user's wrapping key. Its length is determined by the encryption algorithm.
+ * For details on how this is stored see the block comment in dsl_crypt.c
+ *
+ * Salt:
+ * Used as an input to the HKDF function, along with the master key. We use a
+ * 64 bit salt, stored unencrypted in the first word of DVA[2]. Any given salt
+ * can be used for encrypting many blocks, so we cache the current salt and the
+ * associated derived key in zio_crypt_t so we do not need to derive it again
+ * needlessly.
+ *
+ * Encryption Key:
+ * A secret binary key, generated from an HKDF function used to encrypt and
+ * decrypt data.
+ *
+ * Message Authenication Code (MAC)
+ * The MAC is an output of authenticated encryption modes such as AES-GCM and
+ * AES-CCM. Its purpose is to ensure that an attacker cannot modify encrypted
+ * data on disk and return garbage to the application. Effectively, it is a
+ * checksum that can not be reproduced by an attacker. We store the MAC in the
+ * second 128 bits of blk_cksum, leaving the first 128 bits for a truncated
+ * regular checksum of the ciphertext which can be used for scrubbing.
+ *
+ * OBJECT AUTHENTICATION:
+ * Some object types, such as DMU_OT_MASTER_NODE cannot be encrypted because
+ * they contain some info that always needs to be readable. To prevent this
+ * data from being altered, we authenticate this data using SHA512-HMAC. This
+ * will produce a MAC (similar to the one produced via encryption) which can
+ * be used to verify the object was not modified. HMACs do not require key
+ * rotation or IVs, so we can keep up to the full 3 copies of authenticated
+ * data.
+ *
+ * ZIL ENCRYPTION:
+ * ZIL blocks have their bp written to disk ahead of the associated data, so we
+ * cannot store the MAC there as we normally do. For these blocks the MAC is
+ * stored in the embedded checksum within the zil_chain_t header. The salt and
+ * IV are generated for the block on bp allocation instead of at encryption
+ * time. In addition, ZIL blocks have some pieces that must be left in plaintext
+ * for claiming even though all of the sensitive user data still needs to be
+ * encrypted. The function zio_crypt_init_uios_zil() handles parsing which
+ * pieces of the block need to be encrypted. All data that is not encrypted is
+ * authenticated using the AAD mechanisms that the supported encryption modes
+ * provide for. In order to preserve the semantics of the ZIL for encrypted
+ * datasets, the ZIL is not protected at the objset level as described below.
+ *
+ * DNODE ENCRYPTION:
+ * Similarly to ZIL blocks, the core part of each dnode_phys_t needs to be left
+ * in plaintext for scrubbing and claiming, but the bonus buffers might contain
+ * sensitive user data. The function zio_crypt_init_uios_dnode() handles parsing
+ * which pieces of the block need to be encrypted. For more details about
+ * dnode authentication and encryption, see zio_crypt_init_uios_dnode().
+ *
+ * OBJECT SET AUTHENTICATION:
+ * Up to this point, everything we have encrypted and authenticated has been
+ * at level 0 (or -2 for the ZIL). If we did not do any further work the
+ * on-disk format would be susceptible to attacks that deleted or rearrannged
+ * the order of level 0 blocks. Ideally, the cleanest solution would be to
+ * maintain a tree of authentication MACs going up the bp tree. However, this
+ * presents a problem for raw sends. Send files do not send information about
+ * indirect blocks so there would be no convenient way to transfer the MACs and
+ * they cannot be recalculated on the receive side without the master key which
+ * would defeat one of the purposes of raw sends in the first place. Instead,
+ * for the indirect levels of the bp tree, we use a regular SHA512 of the MACs
+ * from the level below. We also include some portable fields from blk_prop such
+ * as the lsize and compression algorithm to prevent the data from being
+ * misinterpretted.
+ *
+ * At the objset level, we maintain 2 seperate 256 bit MACs in the
+ * objset_phys_t. The first one is "portable" and is the logical root of the
+ * MAC tree maintianed in the metadnode's bps. The second, is "local" and is
+ * used as the root MAC for the user accounting objects, which are also not
+ * transferred via "zfs send". The portable MAC is sent in the DRR_BEGIN payload
+ * of the send file. The useraccounting code ensures that the useraccounting
+ * info is not present upon a receive, so the local MAC can simply be cleared
+ * out at that time. For more info about objset_phys_t authentication, see
+ * zio_crypt_do_objset_hmacs().
+ *
+ * CONSIDERATIONS FOR DEDUP:
+ * In order for dedup to work, blocks that we want to dedup with one another
+ * need to use the same IV and encryption key, so that they will have the same
+ * ciphertext. Normally, one should never reuse an IV with the same encryption
+ * key or else AES-GCM and AES-CCM can both actually leak the plaintext of both
+ * blocks. In this case, however, since we are using the same plaindata as
+ * well all that we end up with is a duplicate of the original ciphertext we
+ * already had. As a result, an attacker with read access to the raw disk will
+ * be able to tell which blocks are the same but this information is given away
+ * by dedup anyway. In order to get the same IVs and encryption keys for
+ * equivalent blocks of data we use an HMAC of the plaindata. We use an HMAC
+ * here so that a reproducible checksum of the plaindata is never available to
+ * the attacker. The HMAC key is kept alongside the master key, encrypted on
+ * disk. The first 64 bits of the HMAC are used in place of the random salt, and
+ * the next 96 bits are used as the IV. As a result of this mechanism, dedup
+ * will only work within a clone family since encrypted dedup requires use of
+ * the same master and HMAC keys.
+ */
+
+/*
+ * After encrypting many blocks with the same key we may start to run up
+ * against the theoretical limits of how much data can securely be encrypted
+ * with a single key using the supported encryption modes. The most obvious
+ * limitation is that our risk of generating 2 equivalent 96 bit IVs increases
+ * the more IVs we generate (which both GCM and CCM modes strictly forbid).
+ * This risk actually grows surprisingly quickly over time according to the
+ * Birthday Problem. With a total IV space of 2^(96 bits), and assuming we have
+ * generated n IVs with a cryptographically secure RNG, the approximate
+ * probability p(n) of a collision is given as:
+ *
+ * p(n) ~= e^(-n*(n-1)/(2*(2^96)))
+ *
+ * [http://www.math.cornell.edu/~mec/2008-2009/TianyiZheng/Birthday.html]
+ *
+ * Assuming that we want to ensure that p(n) never goes over 1 / 1 trillion
+ * we must not write more than 398,065,730 blocks with the same encryption key.
+ * Therefore, we rotate our keys after 400,000,000 blocks have been written by
+ * generating a new random 64 bit salt for our HKDF encryption key generation
+ * function.
+ */
+#define	ZFS_KEY_MAX_SALT_USES_DEFAULT	400000000
+#define	ZFS_CURRENT_MAX_SALT_USES	\
+	(MIN(zfs_key_max_salt_uses, ZFS_KEY_MAX_SALT_USES_DEFAULT))
+unsigned long zfs_key_max_salt_uses = ZFS_KEY_MAX_SALT_USES_DEFAULT;
+
+/*
+ * Set to a nonzero value to cause zio_do_crypt_uio() to fail 1/this many
+ * calls, to test decryption error handling code paths.
+ */
+uint64_t zio_decrypt_fail_fraction = 0;
+
+typedef struct blkptr_auth_buf {
+	uint64_t bab_prop;			/* blk_prop - portable mask */
+	uint8_t bab_mac[ZIO_DATA_MAC_LEN];	/* MAC from blk_cksum */
+	uint64_t bab_pad;			/* reserved for future use */
+} blkptr_auth_buf_t;
+
+zio_crypt_info_t zio_crypt_table[ZIO_CRYPT_FUNCTIONS] = {
+	{"",			ZC_TYPE_NONE,	0,	"inherit"},
+	{"",			ZC_TYPE_NONE,	0,	"on"},
+	{"",			ZC_TYPE_NONE,	0,	"off"},
+	{SUN_CKM_AES_CCM,	ZC_TYPE_CCM,	16,	"aes-128-ccm"},
+	{SUN_CKM_AES_CCM,	ZC_TYPE_CCM,	24,	"aes-192-ccm"},
+	{SUN_CKM_AES_CCM,	ZC_TYPE_CCM,	32,	"aes-256-ccm"},
+	{SUN_CKM_AES_GCM,	ZC_TYPE_GCM,	16,	"aes-128-gcm"},
+	{SUN_CKM_AES_GCM,	ZC_TYPE_GCM,	24,	"aes-192-gcm"},
+	{SUN_CKM_AES_GCM,	ZC_TYPE_GCM,	32,	"aes-256-gcm"}
+};
+
+void
+zio_crypt_key_destroy(zio_crypt_key_t *key)
+{
+	rw_destroy(&key->zk_salt_lock);
+
+	/* free crypto templates */
+	crypto_destroy_ctx_template(key->zk_current_tmpl);
+	crypto_destroy_ctx_template(key->zk_hmac_tmpl);
+
+	/* zero out sensitive data */
+	bzero(key, sizeof (zio_crypt_key_t));
+}
+
+int
+zio_crypt_key_init(uint64_t crypt, zio_crypt_key_t *key)
+{
+	int ret;
+	crypto_mechanism_t mech;
+	uint_t keydata_len;
+
+	ASSERT(key != NULL);
+	ASSERT3U(crypt, <, ZIO_CRYPT_FUNCTIONS);
+
+	keydata_len = zio_crypt_table[crypt].ci_keylen;
+	bzero(key, sizeof (zio_crypt_key_t));
+
+	/* fill keydata buffers and salt with random data */
+	ret = random_get_bytes((uint8_t *)&key->zk_guid, sizeof (uint64_t));
+	if (ret != 0)
+		goto error;
+
+	ret = random_get_bytes(key->zk_master_keydata, keydata_len);
+	if (ret != 0)
+		goto error;
+
+	ret = random_get_bytes(key->zk_hmac_keydata, SHA512_HMAC_KEYLEN);
+	if (ret != 0)
+		goto error;
+
+	ret = random_get_bytes(key->zk_salt, ZIO_DATA_SALT_LEN);
+	if (ret != 0)
+		goto error;
+
+	/* derive the current key from the master key */
+	ret = hkdf_sha512(key->zk_master_keydata, keydata_len, NULL, 0,
+	    key->zk_salt, ZIO_DATA_SALT_LEN, key->zk_current_keydata,
+	    keydata_len);
+	if (ret != 0)
+		goto error;
+
+	/* initialize keys for the ICP */
+	key->zk_current_key.ck_format = CRYPTO_KEY_RAW;
+	key->zk_current_key.ck_data = key->zk_current_keydata;
+	key->zk_current_key.ck_length = CRYPTO_BYTES2BITS(keydata_len);
+
+	key->zk_hmac_key.ck_format = CRYPTO_KEY_RAW;
+	key->zk_hmac_key.ck_data = &key->zk_hmac_key;
+	key->zk_hmac_key.ck_length = CRYPTO_BYTES2BITS(SHA512_HMAC_KEYLEN);
+
+	/*
+	 * Initialize the crypto templates. It's ok if this fails because
+	 * this is just an optimization.
+	 */
+	mech.cm_type = crypto_mech2id(zio_crypt_table[crypt].ci_mechname);
+	ret = crypto_create_ctx_template(&mech, &key->zk_current_key,
+	    &key->zk_current_tmpl, KM_SLEEP);
+	if (ret != CRYPTO_SUCCESS)
+		key->zk_current_tmpl = NULL;
+
+	mech.cm_type = crypto_mech2id(SUN_CKM_SHA512_HMAC);
+	ret = crypto_create_ctx_template(&mech, &key->zk_hmac_key,
+	    &key->zk_hmac_tmpl, KM_SLEEP);
+	if (ret != CRYPTO_SUCCESS)
+		key->zk_hmac_tmpl = NULL;
+
+	key->zk_crypt = crypt;
+	key->zk_version = ZIO_CRYPT_KEY_CURRENT_VERSION;
+	key->zk_salt_count = 0;
+	rw_init(&key->zk_salt_lock, NULL, RW_DEFAULT, NULL);
+
+	return (0);
+
+error:
+	zio_crypt_key_destroy(key);
+	return (ret);
+}
+
+static int
+zio_crypt_key_change_salt(zio_crypt_key_t *key)
+{
+	int ret = 0;
+	uint8_t salt[ZIO_DATA_SALT_LEN];
+	crypto_mechanism_t mech;
+	uint_t keydata_len = zio_crypt_table[key->zk_crypt].ci_keylen;
+
+	/* generate a new salt */
+	ret = random_get_bytes(salt, ZIO_DATA_SALT_LEN);
+	if (ret != 0)
+		goto error;
+
+	rw_enter(&key->zk_salt_lock, RW_WRITER);
+
+	/* someone beat us to the salt rotation, just unlock and return */
+	if (key->zk_salt_count < ZFS_CURRENT_MAX_SALT_USES)
+		goto out_unlock;
+
+	/* derive the current key from the master key and the new salt */
+	ret = hkdf_sha512(key->zk_master_keydata, keydata_len, NULL, 0,
+	    salt, ZIO_DATA_SALT_LEN, key->zk_current_keydata, keydata_len);
+	if (ret != 0)
+		goto out_unlock;
+
+	/* assign the salt and reset the usage count */
+	bcopy(salt, key->zk_salt, ZIO_DATA_SALT_LEN);
+	key->zk_salt_count = 0;
+
+	/* destroy the old context template and create the new one */
+	crypto_destroy_ctx_template(key->zk_current_tmpl);
+	ret = crypto_create_ctx_template(&mech, &key->zk_current_key,
+	    &key->zk_current_tmpl, KM_SLEEP);
+	if (ret != CRYPTO_SUCCESS)
+		key->zk_current_tmpl = NULL;
+
+	rw_exit(&key->zk_salt_lock);
+
+	return (0);
+
+out_unlock:
+	rw_exit(&key->zk_salt_lock);
+error:
+	return (ret);
+}
+
+/* See comment above zfs_key_max_salt_uses definition for details */
+int
+zio_crypt_key_get_salt(zio_crypt_key_t *key, uint8_t *salt)
+{
+	int ret;
+	boolean_t salt_change;
+
+	rw_enter(&key->zk_salt_lock, RW_READER);
+
+	bcopy(key->zk_salt, salt, ZIO_DATA_SALT_LEN);
+	salt_change = (atomic_inc_64_nv(&key->zk_salt_count) >=
+	    ZFS_CURRENT_MAX_SALT_USES);
+
+	rw_exit(&key->zk_salt_lock);
+
+	if (salt_change) {
+		ret = zio_crypt_key_change_salt(key);
+		if (ret != 0)
+			goto error;
+	}
+
+	return (0);
+
+error:
+	return (ret);
+}
+
+void *failed_decrypt_buf;
+int failed_decrypt_size;
+
+/*
+ * This function handles all encryption and decryption in zfs. When
+ * encrypting it expects puio to reference the plaintext and cuio to
+ * reference the cphertext. cuio must have enough space for the
+ * ciphertext + room for a MAC. datalen should be the length of the
+ * plaintext / ciphertext alone.
+ */
+/* ARGSUSED */
+static int
+zio_do_crypt_uio(boolean_t encrypt, uint64_t crypt, crypto_key_t *key,
+    crypto_ctx_template_t tmpl, uint8_t *ivbuf, uint_t datalen,
+    uio_t *puio, uio_t *cuio, uint8_t *authbuf, uint_t auth_len)
+{
+	int ret;
+	crypto_data_t plaindata, cipherdata;
+	CK_AES_CCM_PARAMS ccmp;
+	CK_AES_GCM_PARAMS gcmp;
+	crypto_mechanism_t mech;
+	zio_crypt_info_t crypt_info;
+	uint_t plain_full_len, maclen;
+
+	ASSERT3U(crypt, <, ZIO_CRYPT_FUNCTIONS);
+	ASSERT3U(key->ck_format, ==, CRYPTO_KEY_RAW);
+
+	/* lookup the encryption info */
+	crypt_info = zio_crypt_table[crypt];
+
+	/* the mac will always be the last iovec_t in the cipher uio */
+	maclen = cuio->uio_iov[cuio->uio_iovcnt - 1].iov_len;
+
+	ASSERT(maclen <= ZIO_DATA_MAC_LEN);
+
+	/* setup encryption mechanism (same as crypt) */
+	mech.cm_type = crypto_mech2id(crypt_info.ci_mechname);
+
+	/*
+	 * Strangely, the ICP requires that plain_full_len must include
+	 * the MAC length when decrypting, even though the UIO does not
+	 * need to have the extra space allocated.
+	 */
+	if (encrypt) {
+		plain_full_len = datalen;
+	} else {
+		plain_full_len = datalen + maclen;
+	}
+
+	/*
+	 * setup encryption params (currently only AES CCM and AES GCM
+	 * are supported)
+	 */
+	if (crypt_info.ci_crypt_type == ZC_TYPE_CCM) {
+		ccmp.ulNonceSize = ZIO_DATA_IV_LEN;
+		ccmp.ulAuthDataSize = auth_len;
+		ccmp.authData = authbuf;
+		ccmp.ulMACSize = maclen;
+		ccmp.nonce = ivbuf;
+		ccmp.ulDataSize = plain_full_len;
+
+		mech.cm_param = (char *)(&ccmp);
+		mech.cm_param_len = sizeof (CK_AES_CCM_PARAMS);
+	} else {
+		gcmp.ulIvLen = ZIO_DATA_IV_LEN;
+		gcmp.ulIvBits = CRYPTO_BYTES2BITS(ZIO_DATA_IV_LEN);
+		gcmp.ulAADLen = auth_len;
+		gcmp.pAAD = authbuf;
+		gcmp.ulTagBits = CRYPTO_BYTES2BITS(maclen);
+		gcmp.pIv = ivbuf;
+
+		mech.cm_param = (char *)(&gcmp);
+		mech.cm_param_len = sizeof (CK_AES_GCM_PARAMS);
+	}
+
+	/* populate the cipher and plain data structs. */
+	plaindata.cd_format = CRYPTO_DATA_UIO;
+	plaindata.cd_offset = 0;
+	plaindata.cd_uio = puio;
+	plaindata.cd_miscdata = NULL;
+	plaindata.cd_length = plain_full_len;
+
+	cipherdata.cd_format = CRYPTO_DATA_UIO;
+	cipherdata.cd_offset = 0;
+	cipherdata.cd_uio = cuio;
+	cipherdata.cd_miscdata = NULL;
+	cipherdata.cd_length = datalen + maclen;
+
+	/* perform the actual encryption */
+	if (encrypt) {
+		ret = crypto_encrypt(&mech, &plaindata, key, tmpl, &cipherdata,
+		    NULL);
+		if (ret != CRYPTO_SUCCESS) {
+			ret = SET_ERROR(EIO);
+			goto error;
+		}
+	} else {
+		if (zio_decrypt_fail_fraction != 0 &&
+		    spa_get_random(zio_decrypt_fail_fraction) == 0) {
+			ret = CRYPTO_INVALID_MAC;
+		} else {
+			ret = crypto_decrypt(&mech, &cipherdata,
+			    key, tmpl, &plaindata, NULL);
+		}
+		if (ret != CRYPTO_SUCCESS) {
+			ASSERT3U(ret, ==, CRYPTO_INVALID_MAC);
+			ret = SET_ERROR(ECKSUM);
+			goto error;
+		}
+	}
+
+	return (0);
+
+error:
+	return (ret);
+}
+
+int
+zio_crypt_key_wrap(crypto_key_t *cwkey, zio_crypt_key_t *key, uint8_t *iv,
+    uint8_t *mac, uint8_t *keydata_out, uint8_t *hmac_keydata_out)
+{
+	int ret;
+	uio_t puio, cuio;
+	uint64_t aad[3];
+	iovec_t plain_iovecs[2], cipher_iovecs[3];
+	uint64_t crypt = key->zk_crypt;
+	uint_t enc_len, keydata_len, aad_len;
+
+	ASSERT3U(crypt, <, ZIO_CRYPT_FUNCTIONS);
+	ASSERT3U(cwkey->ck_format, ==, CRYPTO_KEY_RAW);
+
+	keydata_len = zio_crypt_table[crypt].ci_keylen;
+
+	/* generate iv for wrapping the master and hmac key */
+	ret = random_get_pseudo_bytes(iv, WRAPPING_IV_LEN);
+	if (ret != 0)
+		goto error;
+
+	/* initialize uio_ts */
+	plain_iovecs[0].iov_base = (char *)key->zk_master_keydata;
+	plain_iovecs[0].iov_len = keydata_len;
+	plain_iovecs[1].iov_base = (char *)key->zk_hmac_keydata;
+	plain_iovecs[1].iov_len = SHA512_HMAC_KEYLEN;
+
+	cipher_iovecs[0].iov_base = (char *)keydata_out;
+	cipher_iovecs[0].iov_len = keydata_len;
+	cipher_iovecs[1].iov_base = (char *)hmac_keydata_out;
+	cipher_iovecs[1].iov_len = SHA512_HMAC_KEYLEN;
+	cipher_iovecs[2].iov_base = (char *)mac;
+	cipher_iovecs[2].iov_len = WRAPPING_MAC_LEN;
+
+	/*
+	 * Although we don't support writing to the old format, we do
+	 * support rewrapping the key so that the user can move and
+	 * quarantine datasets on the old format.
+	 */
+	if (key->zk_version == 0) {
+		aad_len = sizeof (uint64_t);
+		aad[0] = LE_64(key->zk_guid);
+	} else {
+		ASSERT3U(key->zk_version, ==, ZIO_CRYPT_KEY_CURRENT_VERSION);
+		aad_len = sizeof (uint64_t) * 3;
+		aad[0] = LE_64(key->zk_guid);
+		aad[1] = LE_64(crypt);
+		aad[2] = LE_64(key->zk_version);
+	}
+
+	enc_len = zio_crypt_table[crypt].ci_keylen + SHA512_HMAC_KEYLEN;
+	puio.uio_iov = plain_iovecs;
+	puio.uio_iovcnt = 2;
+	puio.uio_segflg = UIO_SYSSPACE;
+	cuio.uio_iov = cipher_iovecs;
+	cuio.uio_iovcnt = 3;
+	cuio.uio_segflg = UIO_SYSSPACE;
+
+	/* encrypt the keys and store the resulting ciphertext and mac */
+	ret = zio_do_crypt_uio(B_TRUE, crypt, cwkey, NULL, iv, enc_len,
+	    &puio, &cuio, (uint8_t *)aad, aad_len);
+	if (ret != 0)
+		goto error;
+
+	return (0);
+
+error:
+	return (ret);
+}
+
+int
+zio_crypt_key_unwrap(crypto_key_t *cwkey, uint64_t crypt, uint64_t version,
+    uint64_t guid, uint8_t *keydata, uint8_t *hmac_keydata, uint8_t *iv,
+    uint8_t *mac, zio_crypt_key_t *key)
+{
+	int ret;
+	crypto_mechanism_t mech;
+	uio_t puio, cuio;
+	uint64_t aad[3];
+	iovec_t plain_iovecs[2], cipher_iovecs[3];
+	uint_t enc_len, keydata_len, aad_len;
+
+	ASSERT3U(crypt, <, ZIO_CRYPT_FUNCTIONS);
+	ASSERT3U(cwkey->ck_format, ==, CRYPTO_KEY_RAW);
+
+	rw_init(&key->zk_salt_lock, NULL, RW_DEFAULT, NULL);
+	keydata_len = zio_crypt_table[crypt].ci_keylen;
+
+	/* initialize uio_ts */
+	plain_iovecs[0].iov_base = (char *)key->zk_master_keydata;
+	plain_iovecs[0].iov_len = keydata_len;
+	plain_iovecs[1].iov_base = (char *)key->zk_hmac_keydata;
+	plain_iovecs[1].iov_len = SHA512_HMAC_KEYLEN;
+
+	cipher_iovecs[0].iov_base = (char *)keydata;
+	cipher_iovecs[0].iov_len = keydata_len;
+	cipher_iovecs[1].iov_base = (char *)hmac_keydata;
+	cipher_iovecs[1].iov_len = SHA512_HMAC_KEYLEN;
+	cipher_iovecs[2].iov_base = (char *)mac;
+	cipher_iovecs[2].iov_len = WRAPPING_MAC_LEN;
+
+	if (version == 0) {
+		aad_len = sizeof (uint64_t);
+		aad[0] = LE_64(guid);
+	} else {
+		ASSERT3U(version, ==, ZIO_CRYPT_KEY_CURRENT_VERSION);
+		aad_len = sizeof (uint64_t) * 3;
+		aad[0] = LE_64(guid);
+		aad[1] = LE_64(crypt);
+		aad[2] = LE_64(version);
+	}
+
+	enc_len = keydata_len + SHA512_HMAC_KEYLEN;
+	puio.uio_iov = plain_iovecs;
+	puio.uio_segflg = UIO_SYSSPACE;
+	puio.uio_iovcnt = 2;
+	cuio.uio_iov = cipher_iovecs;
+	cuio.uio_iovcnt = 3;
+	cuio.uio_segflg = UIO_SYSSPACE;
+
+	/* decrypt the keys and store the result in the output buffers */
+	ret = zio_do_crypt_uio(B_FALSE, crypt, cwkey, NULL, iv, enc_len,
+	    &puio, &cuio, (uint8_t *)aad, aad_len);
+	if (ret != 0)
+		goto error;
+
+	/* generate a fresh salt */
+	ret = random_get_bytes(key->zk_salt, ZIO_DATA_SALT_LEN);
+	if (ret != 0)
+		goto error;
+
+	/* derive the current key from the master key */
+	ret = hkdf_sha512(key->zk_master_keydata, keydata_len, NULL, 0,
+	    key->zk_salt, ZIO_DATA_SALT_LEN, key->zk_current_keydata,
+	    keydata_len);
+	if (ret != 0)
+		goto error;
+
+	/* initialize keys for ICP */
+	key->zk_current_key.ck_format = CRYPTO_KEY_RAW;
+	key->zk_current_key.ck_data = key->zk_current_keydata;
+	key->zk_current_key.ck_length = CRYPTO_BYTES2BITS(keydata_len);
+
+	key->zk_hmac_key.ck_format = CRYPTO_KEY_RAW;
+	key->zk_hmac_key.ck_data = key->zk_hmac_keydata;
+	key->zk_hmac_key.ck_length = CRYPTO_BYTES2BITS(SHA512_HMAC_KEYLEN);
+
+	/*
+	 * Initialize the crypto templates. It's ok if this fails because
+	 * this is just an optimization.
+	 */
+	mech.cm_type = crypto_mech2id(zio_crypt_table[crypt].ci_mechname);
+	ret = crypto_create_ctx_template(&mech, &key->zk_current_key,
+	    &key->zk_current_tmpl, KM_SLEEP);
+	if (ret != CRYPTO_SUCCESS)
+		key->zk_current_tmpl = NULL;
+
+	mech.cm_type = crypto_mech2id(SUN_CKM_SHA512_HMAC);
+	ret = crypto_create_ctx_template(&mech, &key->zk_hmac_key,
+	    &key->zk_hmac_tmpl, KM_SLEEP);
+	if (ret != CRYPTO_SUCCESS)
+		key->zk_hmac_tmpl = NULL;
+
+	key->zk_crypt = crypt;
+	key->zk_version = version;
+	key->zk_guid = guid;
+	key->zk_salt_count = 0;
+
+	return (0);
+
+error:
+	zio_crypt_key_destroy(key);
+	return (ret);
+}
+
+int
+zio_crypt_generate_iv(uint8_t *ivbuf)
+{
+	int ret;
+
+	/* randomly generate the IV */
+	ret = random_get_pseudo_bytes(ivbuf, ZIO_DATA_IV_LEN);
+	if (ret != 0)
+		goto error;
+
+	return (0);
+
+error:
+	bzero(ivbuf, ZIO_DATA_IV_LEN);
+	return (ret);
+}
+
+int
+zio_crypt_do_hmac(zio_crypt_key_t *key, uint8_t *data, uint_t datalen,
+    uint8_t *digestbuf, uint_t digestlen)
+{
+	int ret;
+	crypto_mechanism_t mech;
+	crypto_data_t in_data, digest_data;
+	uint8_t raw_digestbuf[SHA512_DIGEST_LENGTH];
+
+	ASSERT3U(digestlen, <=, SHA512_DIGEST_LENGTH);
+
+	/* initialize sha512-hmac mechanism and crypto data */
+	mech.cm_type = crypto_mech2id(SUN_CKM_SHA512_HMAC);
+	mech.cm_param = NULL;
+	mech.cm_param_len = 0;
+
+	/* initialize the crypto data */
+	in_data.cd_format = CRYPTO_DATA_RAW;
+	in_data.cd_offset = 0;
+	in_data.cd_length = datalen;
+	in_data.cd_raw.iov_base = (char *)data;
+	in_data.cd_raw.iov_len = in_data.cd_length;
+
+	digest_data.cd_format = CRYPTO_DATA_RAW;
+	digest_data.cd_offset = 0;
+	digest_data.cd_length = SHA512_DIGEST_LENGTH;
+	digest_data.cd_raw.iov_base = (char *)raw_digestbuf;
+	digest_data.cd_raw.iov_len = digest_data.cd_length;
+
+	/* generate the hmac */
+	ret = crypto_mac(&mech, &in_data, &key->zk_hmac_key, key->zk_hmac_tmpl,
+	    &digest_data, NULL);
+	if (ret != CRYPTO_SUCCESS) {
+		ret = SET_ERROR(EIO);
+		goto error;
+	}
+
+	bcopy(raw_digestbuf, digestbuf, digestlen);
+
+	return (0);
+
+error:
+	bzero(digestbuf, digestlen);
+	return (ret);
+}
+
+int
+zio_crypt_generate_iv_salt_dedup(zio_crypt_key_t *key, uint8_t *data,
+    uint_t datalen, uint8_t *ivbuf, uint8_t *salt)
+{
+	int ret;
+	uint8_t digestbuf[SHA512_DIGEST_LENGTH];
+
+	ret = zio_crypt_do_hmac(key, data, datalen,
+	    digestbuf, SHA512_DIGEST_LENGTH);
+	if (ret != 0)
+		return (ret);
+
+	bcopy(digestbuf, salt, ZIO_DATA_SALT_LEN);
+	bcopy(digestbuf + ZIO_DATA_SALT_LEN, ivbuf, ZIO_DATA_IV_LEN);
+
+	return (0);
+}
+
+/*
+ * The following functions are used to encode and decode encryption parameters
+ * into blkptr_t and zil_header_t. The ICP wants to use these parameters as
+ * byte strings, which normally means that these strings would not need to deal
+ * with byteswapping at all. However, both blkptr_t and zil_header_t may be
+ * byteswapped by lower layers and so we must "undo" that byteswap here upon
+ * decoding and encoding in a non-native byteorder. These functions require
+ * that the byteorder bit is correct before being called.
+ */
+void
+zio_crypt_encode_params_bp(blkptr_t *bp, uint8_t *salt, uint8_t *iv)
+{
+	uint64_t val64;
+	uint32_t val32;
+
+	ASSERT(BP_IS_ENCRYPTED(bp));
+
+	if (!BP_SHOULD_BYTESWAP(bp)) {
+		bcopy(salt, &bp->blk_dva[2].dva_word[0], sizeof (uint64_t));
+		bcopy(iv, &bp->blk_dva[2].dva_word[1], sizeof (uint64_t));
+		bcopy(iv + sizeof (uint64_t), &val32, sizeof (uint32_t));
+		BP_SET_IV2(bp, val32);
+	} else {
+		bcopy(salt, &val64, sizeof (uint64_t));
+		bp->blk_dva[2].dva_word[0] = BSWAP_64(val64);
+
+		bcopy(iv, &val64, sizeof (uint64_t));
+		bp->blk_dva[2].dva_word[1] = BSWAP_64(val64);
+
+		bcopy(iv + sizeof (uint64_t), &val32, sizeof (uint32_t));
+		BP_SET_IV2(bp, BSWAP_32(val32));
+	}
+}
+
+void
+zio_crypt_decode_params_bp(const blkptr_t *bp, uint8_t *salt, uint8_t *iv)
+{
+	uint64_t val64;
+	uint32_t val32;
+
+	ASSERT(BP_IS_PROTECTED(bp));
+
+	/* for convenience, so callers don't need to check */
+	if (BP_IS_AUTHENTICATED(bp)) {
+		bzero(salt, ZIO_DATA_SALT_LEN);
+		bzero(iv, ZIO_DATA_IV_LEN);
+		return;
+	}
+
+	if (!BP_SHOULD_BYTESWAP(bp)) {
+		bcopy(&bp->blk_dva[2].dva_word[0], salt, sizeof (uint64_t));
+		bcopy(&bp->blk_dva[2].dva_word[1], iv, sizeof (uint64_t));
+
+		val32 = (uint32_t)BP_GET_IV2(bp);
+		bcopy(&val32, iv + sizeof (uint64_t), sizeof (uint32_t));
+	} else {
+		val64 = BSWAP_64(bp->blk_dva[2].dva_word[0]);
+		bcopy(&val64, salt, sizeof (uint64_t));
+
+		val64 = BSWAP_64(bp->blk_dva[2].dva_word[1]);
+		bcopy(&val64, iv, sizeof (uint64_t));
+
+		val32 = BSWAP_32((uint32_t)BP_GET_IV2(bp));
+		bcopy(&val32, iv + sizeof (uint64_t), sizeof (uint32_t));
+	}
+}
+
+void
+zio_crypt_encode_mac_bp(blkptr_t *bp, uint8_t *mac)
+{
+	uint64_t val64;
+
+	ASSERT(BP_USES_CRYPT(bp));
+	ASSERT3U(BP_GET_TYPE(bp), !=, DMU_OT_OBJSET);
+
+	if (!BP_SHOULD_BYTESWAP(bp)) {
+		bcopy(mac, &bp->blk_cksum.zc_word[2], sizeof (uint64_t));
+		bcopy(mac + sizeof (uint64_t), &bp->blk_cksum.zc_word[3],
+		    sizeof (uint64_t));
+	} else {
+		bcopy(mac, &val64, sizeof (uint64_t));
+		bp->blk_cksum.zc_word[2] = BSWAP_64(val64);
+
+		bcopy(mac + sizeof (uint64_t), &val64, sizeof (uint64_t));
+		bp->blk_cksum.zc_word[3] = BSWAP_64(val64);
+	}
+}
+
+void
+zio_crypt_decode_mac_bp(const blkptr_t *bp, uint8_t *mac)
+{
+	uint64_t val64;
+
+	ASSERT(BP_USES_CRYPT(bp) || BP_IS_HOLE(bp));
+
+	/* for convenience, so callers don't need to check */
+	if (BP_GET_TYPE(bp) == DMU_OT_OBJSET) {
+		bzero(mac, ZIO_DATA_MAC_LEN);
+		return;
+	}
+
+	if (!BP_SHOULD_BYTESWAP(bp)) {
+		bcopy(&bp->blk_cksum.zc_word[2], mac, sizeof (uint64_t));
+		bcopy(&bp->blk_cksum.zc_word[3], mac + sizeof (uint64_t),
+		    sizeof (uint64_t));
+	} else {
+		val64 = BSWAP_64(bp->blk_cksum.zc_word[2]);
+		bcopy(&val64, mac, sizeof (uint64_t));
+
+		val64 = BSWAP_64(bp->blk_cksum.zc_word[3]);
+		bcopy(&val64, mac + sizeof (uint64_t), sizeof (uint64_t));
+	}
+}
+
+void
+zio_crypt_encode_mac_zil(void *data, uint8_t *mac)
+{
+	zil_chain_t *zilc = data;
+
+	bcopy(mac, &zilc->zc_eck.zec_cksum.zc_word[2], sizeof (uint64_t));
+	bcopy(mac + sizeof (uint64_t), &zilc->zc_eck.zec_cksum.zc_word[3],
+	    sizeof (uint64_t));
+}
+
+void
+zio_crypt_decode_mac_zil(const void *data, uint8_t *mac)
+{
+	/*
+	 * The ZIL MAC is embedded in the block it protects, which will
+	 * not have been byteswapped by the time this function has been called.
+	 * As a result, we don't need to worry about byteswapping the MAC.
+	 */
+	const zil_chain_t *zilc = data;
+
+	bcopy(&zilc->zc_eck.zec_cksum.zc_word[2], mac, sizeof (uint64_t));
+	bcopy(&zilc->zc_eck.zec_cksum.zc_word[3], mac + sizeof (uint64_t),
+	    sizeof (uint64_t));
+}
+
+/*
+ * This routine takes a block of dnodes (src_abd) and copies only the bonus
+ * buffers to the same offsets in the dst buffer. datalen should be the size
+ * of both the src_abd and the dst buffer (not just the length of the bonus
+ * buffers).
+ */
+void
+zio_crypt_copy_dnode_bonus(abd_t *src_abd, uint8_t *dst, uint_t datalen)
+{
+	uint_t i, max_dnp = datalen >> DNODE_SHIFT;
+	uint8_t *src;
+	dnode_phys_t *dnp, *sdnp, *ddnp;
+
+	src = abd_borrow_buf_copy(src_abd, datalen);
+
+	sdnp = (dnode_phys_t *)src;
+	ddnp = (dnode_phys_t *)dst;
+
+	for (i = 0; i < max_dnp; i += sdnp[i].dn_extra_slots + 1) {
+		dnp = &sdnp[i];
+		if (dnp->dn_type != DMU_OT_NONE &&
+		    DMU_OT_IS_ENCRYPTED(dnp->dn_bonustype) &&
+		    dnp->dn_bonuslen != 0) {
+			bcopy(DN_BONUS(dnp), DN_BONUS(&ddnp[i]),
+			    DN_MAX_BONUS_LEN(dnp));
+		}
+	}
+
+	abd_return_buf(src_abd, src, datalen);
+}
+
+/*
+ * This function decides what fields from blk_prop are included in
+ * the on-disk various MAC algorithms.
+ */
+static void
+zio_crypt_bp_zero_nonportable_blkprop(blkptr_t *bp, uint64_t version)
+{
+	/*
+	 * Version 0 did not properly zero out all non-portable fields
+	 * as it should have done. We maintain this code so that we can
+	 * do read-only imports of pools on this version.
+	 */
+	if (version == 0) {
+		BP_SET_DEDUP(bp, 0);
+		BP_SET_CHECKSUM(bp, 0);
+		BP_SET_PSIZE(bp, SPA_MINBLOCKSIZE);
+		return;
+	}
+
+	ASSERT3U(version, ==, ZIO_CRYPT_KEY_CURRENT_VERSION);
+
+	/*
+	 * The hole_birth feature might set these fields even if this bp
+	 * is a hole. We zero them out here to guarantee that raw sends
+	 * will function with or without the feature.
+	 */
+	if (BP_IS_HOLE(bp)) {
+		bp->blk_prop = 0ULL;
+		return;
+	}
+
+	/*
+	 * At L0 we want to verify these fields to ensure that data blocks
+	 * can not be reinterpretted. For instance, we do not want an attacker
+	 * to trick us into returning raw lz4 compressed data to the user
+	 * by modifying the compression bits. At higher levels, we cannot
+	 * enforce this policy since raw sends do not convey any information
+	 * about indirect blocks, so these values might be different on the
+	 * receive side. Fortunately, this does not open any new attack
+	 * vectors, since any alterations that can be made to a higher level
+	 * bp must still verify the correct order of the layer below it.
+	 */
+	if (BP_GET_LEVEL(bp) != 0) {
+		BP_SET_BYTEORDER(bp, 0);
+		BP_SET_COMPRESS(bp, 0);
+
+		/*
+		 * psize cannot be set to zero or it will trigger
+		 * asserts, but the value doesn't really matter as
+		 * long as it is constant.
+		 */
+		BP_SET_PSIZE(bp, SPA_MINBLOCKSIZE);
+	}
+
+	BP_SET_DEDUP(bp, 0);
+	BP_SET_CHECKSUM(bp, 0);
+}
+
+static void
+zio_crypt_bp_auth_init(uint64_t version, boolean_t should_bswap, blkptr_t *bp,
+    blkptr_auth_buf_t *bab, uint_t *bab_len)
+{
+	blkptr_t tmpbp = *bp;
+
+	if (should_bswap)
+		byteswap_uint64_array(&tmpbp, sizeof (blkptr_t));
+
+	ASSERT(BP_USES_CRYPT(&tmpbp) || BP_IS_HOLE(&tmpbp));
+	ASSERT0(BP_IS_EMBEDDED(&tmpbp));
+
+	zio_crypt_decode_mac_bp(&tmpbp, bab->bab_mac);
+
+	/*
+	 * We always MAC blk_prop in LE to ensure portability. This
+	 * must be done after decoding the mac, since the endianness
+	 * will get zero'd out here.
+	 */
+	zio_crypt_bp_zero_nonportable_blkprop(&tmpbp, version);
+	bab->bab_prop = LE_64(tmpbp.blk_prop);
+	bab->bab_pad = 0ULL;
+
+	/* version 0 did not include the padding */
+	*bab_len = sizeof (blkptr_auth_buf_t);
+	if (version == 0)
+		*bab_len -= sizeof (uint64_t);
+}
+
+static int
+zio_crypt_bp_do_hmac_updates(crypto_context_t ctx, uint64_t version,
+    boolean_t should_bswap, blkptr_t *bp)
+{
+	int ret;
+	uint_t bab_len;
+	blkptr_auth_buf_t bab;
+	crypto_data_t cd;
+
+	zio_crypt_bp_auth_init(version, should_bswap, bp, &bab, &bab_len);
+	cd.cd_format = CRYPTO_DATA_RAW;
+	cd.cd_offset = 0;
+	cd.cd_length = bab_len;
+	cd.cd_raw.iov_base = (char *)&bab;
+	cd.cd_raw.iov_len = cd.cd_length;
+
+	ret = crypto_mac_update(ctx, &cd, NULL);
+	if (ret != CRYPTO_SUCCESS) {
+		ret = SET_ERROR(EIO);
+		goto error;
+	}
+
+	return (0);
+
+error:
+	return (ret);
+}
+
+static void
+zio_crypt_bp_do_indrect_checksum_updates(SHA2_CTX *ctx, uint64_t version,
+    boolean_t should_bswap, blkptr_t *bp)
+{
+	uint_t bab_len;
+	blkptr_auth_buf_t bab;
+
+	zio_crypt_bp_auth_init(version, should_bswap, bp, &bab, &bab_len);
+	SHA2Update(ctx, &bab, bab_len);
+}
+
+static void
+zio_crypt_bp_do_aad_updates(uint8_t **aadp, uint_t *aad_len, uint64_t version,
+    boolean_t should_bswap, blkptr_t *bp)
+{
+	uint_t bab_len;
+	blkptr_auth_buf_t bab;
+
+	zio_crypt_bp_auth_init(version, should_bswap, bp, &bab, &bab_len);
+	bcopy(&bab, *aadp, bab_len);
+	*aadp += bab_len;
+	*aad_len += bab_len;
+}
+
+static int
+zio_crypt_do_dnode_hmac_updates(crypto_context_t ctx, uint64_t version,
+    boolean_t should_bswap, dnode_phys_t *dnp)
+{
+	int ret, i;
+	dnode_phys_t *adnp;
+	boolean_t le_bswap = (should_bswap == ZFS_HOST_BYTEORDER);
+	crypto_data_t cd;
+	uint8_t tmp_dncore[offsetof(dnode_phys_t, dn_blkptr)];
+
+	cd.cd_format = CRYPTO_DATA_RAW;
+	cd.cd_offset = 0;
+
+	/* authenticate the core dnode (masking out non-portable bits) */
+	bcopy(dnp, tmp_dncore, sizeof (tmp_dncore));
+	adnp = (dnode_phys_t *)tmp_dncore;
+	if (le_bswap) {
+		adnp->dn_datablkszsec = BSWAP_16(adnp->dn_datablkszsec);
+		adnp->dn_bonuslen = BSWAP_16(adnp->dn_bonuslen);
+		adnp->dn_maxblkid = BSWAP_64(adnp->dn_maxblkid);
+		adnp->dn_used = BSWAP_64(adnp->dn_used);
+	}
+	adnp->dn_flags &= DNODE_CRYPT_PORTABLE_FLAGS_MASK;
+	adnp->dn_used = 0;
+
+	cd.cd_length = sizeof (tmp_dncore);
+	cd.cd_raw.iov_base = (char *)adnp;
+	cd.cd_raw.iov_len = cd.cd_length;
+
+	ret = crypto_mac_update(ctx, &cd, NULL);
+	if (ret != CRYPTO_SUCCESS) {
+		ret = SET_ERROR(EIO);
+		goto error;
+	}
+
+	for (i = 0; i < dnp->dn_nblkptr; i++) {
+		ret = zio_crypt_bp_do_hmac_updates(ctx, version,
+		    should_bswap, &dnp->dn_blkptr[i]);
+		if (ret != 0)
+			goto error;
+	}
+
+	if (dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) {
+		ret = zio_crypt_bp_do_hmac_updates(ctx, version,
+		    should_bswap, DN_SPILL_BLKPTR(dnp));
+		if (ret != 0)
+			goto error;
+	}
+
+	return (0);
+
+error:
+	return (ret);
+}
+
+/*
+ * objset_phys_t blocks introduce a number of exceptions to the normal
+ * authentication process. objset_phys_t's contain 2 seperate HMACS for
+ * protecting the integrity of their data. The portable_mac protects the
+ * the metadnode. This MAC can be sent with a raw send and protects against
+ * reordering of data within the metadnode. The local_mac protects the user
+ * accounting objects which are not sent from one system to another.
+ *
+ * In addition, objset blocks are the only blocks that can be modified and
+ * written to disk without the key loaded under certain circumstances. During
+ * zil_claim() we need to be able to update the zil_header_t to complete
+ * claiming log blocks and during raw receives we need to write out the
+ * portable_mac from the send file. Both of these actions are possible
+ * because these fields are not protected by either MAC so neither one will
+ * need to modify the MACs without the key. However, when the modified blocks
+ * are written out they will be byteswapped into the host machine's native
+ * endianness which will modify fields protected by the MAC. As a result, MAC
+ * calculation for objset blocks works slightly differently from other block
+ * types. Where other block types MAC the data in whatever endianness is
+ * written to disk, objset blocks always MAC little endian version of their
+ * values. In the code, should_bswap is the value from BP_SHOULD_BYTESWAP()
+ * and le_bswap indicates whether a byteswap is needed to get this block
+ * into little endian format.
+ */
+/* ARGSUSED */
+int
+zio_crypt_do_objset_hmacs(zio_crypt_key_t *key, void *data, uint_t datalen,
+    boolean_t should_bswap, uint8_t *portable_mac, uint8_t *local_mac)
+{
+	int ret;
+	crypto_mechanism_t mech;
+	crypto_context_t ctx;
+	crypto_data_t cd;
+	objset_phys_t *osp = data;
+	uint64_t intval;
+	boolean_t le_bswap = (should_bswap == ZFS_HOST_BYTEORDER);
+	uint8_t raw_portable_mac[SHA512_DIGEST_LENGTH];
+	uint8_t raw_local_mac[SHA512_DIGEST_LENGTH];
+
+	/* initialize HMAC mechanism */
+	mech.cm_type = crypto_mech2id(SUN_CKM_SHA512_HMAC);
+	mech.cm_param = NULL;
+	mech.cm_param_len = 0;
+
+	cd.cd_format = CRYPTO_DATA_RAW;
+	cd.cd_offset = 0;
+
+	/* calculate the portable MAC from the portable fields and metadnode */
+	ret = crypto_mac_init(&mech, &key->zk_hmac_key, NULL, &ctx, NULL);
+	if (ret != CRYPTO_SUCCESS) {
+		ret = SET_ERROR(EIO);
+		goto error;
+	}
+
+	/* add in the os_type */
+	intval = (le_bswap) ? osp->os_type : BSWAP_64(osp->os_type);
+	cd.cd_length = sizeof (uint64_t);
+	cd.cd_raw.iov_base = (char *)&intval;
+	cd.cd_raw.iov_len = cd.cd_length;
+
+	ret = crypto_mac_update(ctx, &cd, NULL);
+	if (ret != CRYPTO_SUCCESS) {
+		ret = SET_ERROR(EIO);
+		goto error;
+	}
+
+	/* add in the portable os_flags */
+	intval = osp->os_flags;
+	if (should_bswap)
+		intval = BSWAP_64(intval);
+	intval &= OBJSET_CRYPT_PORTABLE_FLAGS_MASK;
+	/* CONSTCOND */
+	if (!ZFS_HOST_BYTEORDER)
+		intval = BSWAP_64(intval);
+
+	cd.cd_length = sizeof (uint64_t);
+	cd.cd_raw.iov_base = (char *)&intval;
+	cd.cd_raw.iov_len = cd.cd_length;
+
+	ret = crypto_mac_update(ctx, &cd, NULL);
+	if (ret != CRYPTO_SUCCESS) {
+		ret = SET_ERROR(EIO);
+		goto error;
+	}
+
+	/* add in fields from the metadnode */
+	ret = zio_crypt_do_dnode_hmac_updates(ctx, key->zk_version,
+	    should_bswap, &osp->os_meta_dnode);
+	if (ret)
+		goto error;
+
+	/* store the final digest in a temporary buffer and copy what we need */
+	cd.cd_length = SHA512_DIGEST_LENGTH;
+	cd.cd_raw.iov_base = (char *)raw_portable_mac;
+	cd.cd_raw.iov_len = cd.cd_length;
+
+	ret = crypto_mac_final(ctx, &cd, NULL);
+	if (ret != CRYPTO_SUCCESS) {
+		ret = SET_ERROR(EIO);
+		goto error;
+	}
+
+	bcopy(raw_portable_mac, portable_mac, ZIO_OBJSET_MAC_LEN);
+
+	/*
+	 * The local MAC protects the user and group accounting. If these
+	 * objects are not present, the local MAC is zeroed out.
+	 */
+	if ((osp->os_userused_dnode.dn_type == DMU_OT_NONE &&
+	    osp->os_groupused_dnode.dn_type == DMU_OT_NONE) ||
+	    (datalen <= OBJSET_OLD_PHYS_SIZE)) {
+		bzero(local_mac, ZIO_OBJSET_MAC_LEN);
+		return (0);
+	}
+
+	/* calculate the local MAC from the userused and groupused dnodes */
+	ret = crypto_mac_init(&mech, &key->zk_hmac_key, NULL, &ctx, NULL);
+	if (ret != CRYPTO_SUCCESS) {
+		ret = SET_ERROR(EIO);
+		goto error;
+	}
+
+	/* add in the non-portable os_flags */
+	intval = osp->os_flags;
+	if (should_bswap)
+		intval = BSWAP_64(intval);
+	intval &= ~OBJSET_CRYPT_PORTABLE_FLAGS_MASK;
+	/* CONSTCOND */
+	if (!ZFS_HOST_BYTEORDER)
+		intval = BSWAP_64(intval);
+
+	cd.cd_length = sizeof (uint64_t);
+	cd.cd_raw.iov_base = (char *)&intval;
+	cd.cd_raw.iov_len = cd.cd_length;
+
+	ret = crypto_mac_update(ctx, &cd, NULL);
+	if (ret != CRYPTO_SUCCESS) {
+		ret = SET_ERROR(EIO);
+		goto error;
+	}
+
+	/* add in fields from the user accounting dnodes */
+	ret = zio_crypt_do_dnode_hmac_updates(ctx, key->zk_version,
+	    should_bswap, &osp->os_userused_dnode);
+	if (ret)
+		goto error;
+
+	ret = zio_crypt_do_dnode_hmac_updates(ctx, key->zk_version,
+	    should_bswap, &osp->os_groupused_dnode);
+	if (ret)
+		goto error;
+
+	/* store the final digest in a temporary buffer and copy what we need */
+	cd.cd_length = SHA512_DIGEST_LENGTH;
+	cd.cd_raw.iov_base = (char *)raw_local_mac;
+	cd.cd_raw.iov_len = cd.cd_length;
+
+	ret = crypto_mac_final(ctx, &cd, NULL);
+	if (ret != CRYPTO_SUCCESS) {
+		ret = SET_ERROR(EIO);
+		goto error;
+	}
+
+	bcopy(raw_local_mac, local_mac, ZIO_OBJSET_MAC_LEN);
+
+	return (0);
+
+error:
+	bzero(portable_mac, ZIO_OBJSET_MAC_LEN);
+	bzero(local_mac, ZIO_OBJSET_MAC_LEN);
+	return (ret);
+}
+
+static void
+zio_crypt_destroy_uio(uio_t *uio)
+{
+	if (uio->uio_iov)
+		kmem_free(uio->uio_iov, uio->uio_iovcnt * sizeof (iovec_t));
+}
+
+/*
+ * This function parses an uncompressed indirect block and returns a checksum
+ * of all the portable fields from all of the contained bps. The portable
+ * fields are the MAC and all of the fields from blk_prop except for the dedup,
+ * checksum, and psize bits. For an explanation of the purpose of this, see
+ * the comment block on object set authentication.
+ */
+static int
+zio_crypt_do_indirect_mac_checksum_impl(boolean_t generate, void *buf,
+    uint_t datalen, uint64_t version, boolean_t byteswap, uint8_t *cksum)
+{
+	blkptr_t *bp;
+	int i, epb = datalen >> SPA_BLKPTRSHIFT;
+	SHA2_CTX ctx;
+	uint8_t digestbuf[SHA512_DIGEST_LENGTH];
+
+	/* checksum all of the MACs from the layer below */
+	SHA2Init(SHA512, &ctx);
+	for (i = 0, bp = buf; i < epb; i++, bp++) {
+		zio_crypt_bp_do_indrect_checksum_updates(&ctx, version,
+		    byteswap, bp);
+	}
+	SHA2Final(digestbuf, &ctx);
+
+	if (generate) {
+		bcopy(digestbuf, cksum, ZIO_DATA_MAC_LEN);
+		return (0);
+	}
+
+	if (bcmp(digestbuf, cksum, ZIO_DATA_MAC_LEN) != 0)
+		return (SET_ERROR(ECKSUM));
+
+	return (0);
+}
+
+int
+zio_crypt_do_indirect_mac_checksum(boolean_t generate, void *buf,
+    uint_t datalen, boolean_t byteswap, uint8_t *cksum)
+{
+	int ret;
+
+	/*
+	 * Unfortunately, callers of this function will not always have
+	 * easy access to the on-disk format version. This info is
+	 * normally found in the DSL Crypto Key, but the checksum-of-MACs
+	 * is expected to be verifiable even when the key isn't loaded.
+	 * Here, instead of doing a ZAP lookup for the version for each
+	 * zio, we simply try both existing formats.
+	 */
+	ret = zio_crypt_do_indirect_mac_checksum_impl(generate, buf,
+	    datalen, ZIO_CRYPT_KEY_CURRENT_VERSION, byteswap, cksum);
+	if (ret == ECKSUM) {
+		ASSERT(!generate);
+		ret = zio_crypt_do_indirect_mac_checksum_impl(generate,
+		    buf, datalen, 0, byteswap, cksum);
+	}
+
+	return (ret);
+}
+
+int
+zio_crypt_do_indirect_mac_checksum_abd(boolean_t generate, abd_t *abd,
+    uint_t datalen, boolean_t byteswap, uint8_t *cksum)
+{
+	int ret;
+	void *buf;
+
+	buf = abd_borrow_buf_copy(abd, datalen);
+	ret = zio_crypt_do_indirect_mac_checksum(generate, buf, datalen,
+	    byteswap, cksum);
+	abd_return_buf(abd, buf, datalen);
+
+	return (ret);
+}
+
+/*
+ * Special case handling routine for encrypting / decrypting ZIL blocks.
+ * We do not check for the older ZIL chain because the encryption feature
+ * was not available before the newer ZIL chain was introduced. The goal
+ * here is to encrypt everything except the blkptr_t of a lr_write_t and
+ * the zil_chain_t header. Everything that is not encrypted is authenticated.
+ */
+
+/* ARGSUSED */
+static int
+zio_crypt_init_uios_zil(boolean_t encrypt, uint8_t *plainbuf,
+    uint8_t *cipherbuf, uint_t datalen, boolean_t byteswap, uio_t *puio,
+    uio_t *cuio, uint_t *enc_len, uint8_t **authbuf, uint_t *auth_len,
+    boolean_t *no_crypt)
+{
+	int ret;
+	uint64_t txtype, lr_len;
+	uint_t nr_src, nr_dst, crypt_len;
+	uint_t aad_len = 0, nr_iovecs = 0, total_len = 0;
+	iovec_t *src_iovecs = NULL, *dst_iovecs = NULL;
+	uint8_t *src, *dst, *slrp, *dlrp, *blkend, *aadp;
+	zil_chain_t *zilc;
+	lr_t *lr;
+	uint8_t *aadbuf = zio_buf_alloc(datalen);
+
+	/* cipherbuf always needs an extra iovec for the MAC */
+	if (encrypt) {
+		src = plainbuf;
+		dst = cipherbuf;
+		nr_src = 0;
+		nr_dst = 1;
+	} else {
+		src = cipherbuf;
+		dst = plainbuf;
+		nr_src = 1;
+		nr_dst = 0;
+	}
+
+	/* find the start and end record of the log block */
+	zilc = (zil_chain_t *)src;
+	slrp = src + sizeof (zil_chain_t);
+	aadp = aadbuf;
+	blkend = src + ((byteswap) ? BSWAP_64(zilc->zc_nused) : zilc->zc_nused);
+
+	/* calculate the number of encrypted iovecs we will need */
+	for (; slrp < blkend; slrp += lr_len) {
+		lr = (lr_t *)slrp;
+
+		if (!byteswap) {
+			txtype = lr->lrc_txtype;
+			lr_len = lr->lrc_reclen;
+		} else {
+			txtype = BSWAP_64(lr->lrc_txtype);
+			lr_len = BSWAP_64(lr->lrc_reclen);
+		}
+
+		nr_iovecs++;
+		if (txtype == TX_WRITE && lr_len != sizeof (lr_write_t))
+			nr_iovecs++;
+	}
+
+	nr_src += nr_iovecs;
+	nr_dst += nr_iovecs;
+
+	/* allocate the iovec arrays */
+	if (nr_src != 0) {
+		src_iovecs = kmem_alloc(nr_src * sizeof (iovec_t), KM_SLEEP);
+		if (src_iovecs == NULL) {
+			ret = SET_ERROR(ENOMEM);
+			goto error;
+		}
+	}
+
+	if (nr_dst != 0) {
+		dst_iovecs = kmem_alloc(nr_dst * sizeof (iovec_t), KM_SLEEP);
+		if (dst_iovecs == NULL) {
+			ret = SET_ERROR(ENOMEM);
+			goto error;
+		}
+	}
+
+	/*
+	 * Copy the plain zil header over and authenticate everything except
+	 * the checksum that will store our MAC. If we are writing the data
+	 * the embedded checksum will not have been calculated yet, so we don't
+	 * authenticate that.
+	 */
+	bcopy(src, dst, sizeof (zil_chain_t));
+	bcopy(src, aadp, sizeof (zil_chain_t) - sizeof (zio_eck_t));
+	aadp += sizeof (zil_chain_t) - sizeof (zio_eck_t);
+	aad_len += sizeof (zil_chain_t) - sizeof (zio_eck_t);
+
+	/* loop over records again, filling in iovecs */
+	nr_iovecs = 0;
+	slrp = src + sizeof (zil_chain_t);
+	dlrp = dst + sizeof (zil_chain_t);
+
+	for (; slrp < blkend; slrp += lr_len, dlrp += lr_len) {
+		lr = (lr_t *)slrp;
+
+		if (!byteswap) {
+			txtype = lr->lrc_txtype;
+			lr_len = lr->lrc_reclen;
+		} else {
+			txtype = BSWAP_64(lr->lrc_txtype);
+			lr_len = BSWAP_64(lr->lrc_reclen);
+		}
+
+		/* copy the common lr_t */
+		bcopy(slrp, dlrp, sizeof (lr_t));
+		bcopy(slrp, aadp, sizeof (lr_t));
+		aadp += sizeof (lr_t);
+		aad_len += sizeof (lr_t);
+
+		ASSERT3P(src_iovecs, !=, NULL);
+		ASSERT3P(dst_iovecs, !=, NULL);
+
+		/*
+		 * If this is a TX_WRITE record we want to encrypt everything
+		 * except the bp if exists. If the bp does exist we want to
+		 * authenticate it.
+		 */
+		if (txtype == TX_WRITE) {
+			crypt_len = sizeof (lr_write_t) -
+			    sizeof (lr_t) - sizeof (blkptr_t);
+			src_iovecs[nr_iovecs].iov_base = (char *)slrp +
+			    sizeof (lr_t);
+			src_iovecs[nr_iovecs].iov_len = crypt_len;
+			dst_iovecs[nr_iovecs].iov_base = (char *)dlrp +
+			    sizeof (lr_t);
+			dst_iovecs[nr_iovecs].iov_len = crypt_len;
+
+			/* copy the bp now since it will not be encrypted */
+			bcopy(slrp + sizeof (lr_write_t) - sizeof (blkptr_t),
+			    dlrp + sizeof (lr_write_t) - sizeof (blkptr_t),
+			    sizeof (blkptr_t));
+			bcopy(slrp + sizeof (lr_write_t) - sizeof (blkptr_t),
+			    aadp, sizeof (blkptr_t));
+			aadp += sizeof (blkptr_t);
+			aad_len += sizeof (blkptr_t);
+			nr_iovecs++;
+			total_len += crypt_len;
+
+			if (lr_len != sizeof (lr_write_t)) {
+				crypt_len = lr_len - sizeof (lr_write_t);
+				src_iovecs[nr_iovecs].iov_base = (char *)
+				    slrp + sizeof (lr_write_t);
+				src_iovecs[nr_iovecs].iov_len = crypt_len;
+				dst_iovecs[nr_iovecs].iov_base = (char *)
+				    dlrp + sizeof (lr_write_t);
+				dst_iovecs[nr_iovecs].iov_len = crypt_len;
+				nr_iovecs++;
+				total_len += crypt_len;
+			}
+		} else {
+			crypt_len = lr_len - sizeof (lr_t);
+			src_iovecs[nr_iovecs].iov_base = (char *)slrp +
+			    sizeof (lr_t);
+			src_iovecs[nr_iovecs].iov_len = crypt_len;
+			dst_iovecs[nr_iovecs].iov_base = (char *)dlrp +
+			    sizeof (lr_t);
+			dst_iovecs[nr_iovecs].iov_len = crypt_len;
+			nr_iovecs++;
+			total_len += crypt_len;
+		}
+	}
+
+	*no_crypt = (nr_iovecs == 0);
+	*enc_len = total_len;
+	*authbuf = aadbuf;
+	*auth_len = aad_len;
+
+	if (encrypt) {
+		puio->uio_iov = src_iovecs;
+		puio->uio_iovcnt = nr_src;
+		cuio->uio_iov = dst_iovecs;
+		cuio->uio_iovcnt = nr_dst;
+	} else {
+		puio->uio_iov = dst_iovecs;
+		puio->uio_iovcnt = nr_dst;
+		cuio->uio_iov = src_iovecs;
+		cuio->uio_iovcnt = nr_src;
+	}
+
+	return (0);
+
+error:
+	zio_buf_free(aadbuf, datalen);
+	if (src_iovecs != NULL)
+		kmem_free(src_iovecs, nr_src * sizeof (iovec_t));
+	if (dst_iovecs != NULL)
+		kmem_free(dst_iovecs, nr_dst * sizeof (iovec_t));
+
+	*enc_len = 0;
+	*authbuf = NULL;
+	*auth_len = 0;
+	*no_crypt = B_FALSE;
+	puio->uio_iov = NULL;
+	puio->uio_iovcnt = 0;
+	cuio->uio_iov = NULL;
+	cuio->uio_iovcnt = 0;
+	return (ret);
+}
+
+/*
+ * Special case handling routine for encrypting / decrypting dnode blocks.
+ */
+static int
+zio_crypt_init_uios_dnode(boolean_t encrypt, uint64_t version,
+    uint8_t *plainbuf, uint8_t *cipherbuf, uint_t datalen, boolean_t byteswap,
+    uio_t *puio, uio_t *cuio, uint_t *enc_len, uint8_t **authbuf,
+    uint_t *auth_len, boolean_t *no_crypt)
+{
+	int ret;
+	uint_t nr_src, nr_dst, crypt_len;
+	uint_t aad_len = 0, nr_iovecs = 0, total_len = 0;
+	uint_t i, j, max_dnp = datalen >> DNODE_SHIFT;
+	iovec_t *src_iovecs = NULL, *dst_iovecs = NULL;
+	uint8_t *src, *dst, *aadp;
+	dnode_phys_t *dnp, *adnp, *sdnp, *ddnp;
+	uint8_t *aadbuf = zio_buf_alloc(datalen);
+
+	if (encrypt) {
+		src = plainbuf;
+		dst = cipherbuf;
+		nr_src = 0;
+		nr_dst = 1;
+	} else {
+		src = cipherbuf;
+		dst = plainbuf;
+		nr_src = 1;
+		nr_dst = 0;
+	}
+
+	sdnp = (dnode_phys_t *)src;
+	ddnp = (dnode_phys_t *)dst;
+	aadp = aadbuf;
+
+	/*
+	 * Count the number of iovecs we will need to do the encryption by
+	 * counting the number of bonus buffers that need to be encrypted.
+	 */
+	for (i = 0; i < max_dnp; i += sdnp[i].dn_extra_slots + 1) {
+		/*
+		 * This block may still be byteswapped. However, all of the
+		 * values we use are either uint8_t's (for which byteswapping
+		 * is a noop) or a * != 0 check, which will work regardless
+		 * of whether or not we byteswap.
+		 */
+		if (sdnp[i].dn_type != DMU_OT_NONE &&
+		    DMU_OT_IS_ENCRYPTED(sdnp[i].dn_bonustype) &&
+		    sdnp[i].dn_bonuslen != 0) {
+			nr_iovecs++;
+		}
+	}
+
+	nr_src += nr_iovecs;
+	nr_dst += nr_iovecs;
+
+	if (nr_src != 0) {
+		src_iovecs = kmem_alloc(nr_src * sizeof (iovec_t), KM_SLEEP);
+		if (src_iovecs == NULL) {
+			ret = SET_ERROR(ENOMEM);
+			goto error;
+		}
+	}
+
+	if (nr_dst != 0) {
+		dst_iovecs = kmem_alloc(nr_dst * sizeof (iovec_t), KM_SLEEP);
+		if (dst_iovecs == NULL) {
+			ret = SET_ERROR(ENOMEM);
+			goto error;
+		}
+	}
+
+	nr_iovecs = 0;
+
+	/*
+	 * Iterate through the dnodes again, this time filling in the uios
+	 * we allocated earlier. We also concatenate any data we want to
+	 * authenticate onto aadbuf.
+	 */
+	for (i = 0; i < max_dnp; i += sdnp[i].dn_extra_slots + 1) {
+		dnp = &sdnp[i];
+		/* copy over the core fields and blkptrs (kept as plaintext) */
+		bcopy(dnp, &ddnp[i], (uint8_t *)DN_BONUS(dnp) - (uint8_t *)dnp);
+		if (dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) {
+			bcopy(DN_SPILL_BLKPTR(dnp), DN_SPILL_BLKPTR(&ddnp[i]),
+			    sizeof (blkptr_t));
+		}
+
+		/*
+		 * Handle authenticated data. We authenticate everything in
+		 * the dnode that can be brought over when we do a raw send.
+		 * This includes all of the core fields as well as the MACs
+		 * stored in the bp checksums and all of the portable bits
+		 * from blk_prop. We include the dnode padding here in case it
+		 * ever gets used in the future. Some dn_flags and dn_used are
+		 * not portable so we mask those out values out of the
+		 * authenticated data.
+		 */
+		crypt_len = offsetof(dnode_phys_t, dn_blkptr);
+		bcopy(dnp, aadp, crypt_len);
+		adnp = (dnode_phys_t *)aadp;
+		adnp->dn_flags &= DNODE_CRYPT_PORTABLE_FLAGS_MASK;
+		adnp->dn_used = 0;
+		aadp += crypt_len;
+		aad_len += crypt_len;
+
+		for (j = 0; j < dnp->dn_nblkptr; j++) {
+			zio_crypt_bp_do_aad_updates(&aadp, &aad_len,
+			    version, byteswap, &dnp->dn_blkptr[j]);
+		}
+
+		if (dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) {
+			zio_crypt_bp_do_aad_updates(&aadp, &aad_len,
+			    version, byteswap, DN_SPILL_BLKPTR(dnp));
+		}
+
+		/*
+		 * If this bonus buffer needs to be encrypted, we prepare an
+		 * iovec_t. The encryption / decryption functions will fill
+		 * this in for us with the encrypted or decrypted data.
+		 * Otherwise we add the bonus buffer to the authenticated
+		 * data buffer and copy it over to the destination. The
+		 * encrypted iovec extends to DN_MAX_BONUS_LEN(dnp) so that
+		 * we can guarantee alignment with the AES block size
+		 * (128 bits).
+		 */
+		crypt_len = DN_MAX_BONUS_LEN(dnp);
+		if (dnp->dn_type != DMU_OT_NONE &&
+		    DMU_OT_IS_ENCRYPTED(dnp->dn_bonustype) &&
+		    dnp->dn_bonuslen != 0) {
+			ASSERT3U(nr_iovecs, <, nr_src);
+			ASSERT3U(nr_iovecs, <, nr_dst);
+			ASSERT3P(src_iovecs, !=, NULL);
+			ASSERT3P(dst_iovecs, !=, NULL);
+			src_iovecs[nr_iovecs].iov_base = DN_BONUS(dnp);
+			src_iovecs[nr_iovecs].iov_len = crypt_len;
+			dst_iovecs[nr_iovecs].iov_base = DN_BONUS(&ddnp[i]);
+			dst_iovecs[nr_iovecs].iov_len = crypt_len;
+
+			nr_iovecs++;
+			total_len += crypt_len;
+		} else {
+			bcopy(DN_BONUS(dnp), DN_BONUS(&ddnp[i]), crypt_len);
+			bcopy(DN_BONUS(dnp), aadp, crypt_len);
+			aadp += crypt_len;
+			aad_len += crypt_len;
+		}
+	}
+
+	*no_crypt = (nr_iovecs == 0);
+	*enc_len = total_len;
+	*authbuf = aadbuf;
+	*auth_len = aad_len;
+
+	if (encrypt) {
+		puio->uio_iov = src_iovecs;
+		puio->uio_iovcnt = nr_src;
+		cuio->uio_iov = dst_iovecs;
+		cuio->uio_iovcnt = nr_dst;
+	} else {
+		puio->uio_iov = dst_iovecs;
+		puio->uio_iovcnt = nr_dst;
+		cuio->uio_iov = src_iovecs;
+		cuio->uio_iovcnt = nr_src;
+	}
+
+	return (0);
+
+error:
+	zio_buf_free(aadbuf, datalen);
+	if (src_iovecs != NULL)
+		kmem_free(src_iovecs, nr_src * sizeof (iovec_t));
+	if (dst_iovecs != NULL)
+		kmem_free(dst_iovecs, nr_dst * sizeof (iovec_t));
+
+	*enc_len = 0;
+	*authbuf = NULL;
+	*auth_len = 0;
+	*no_crypt = B_FALSE;
+	puio->uio_iov = NULL;
+	puio->uio_iovcnt = 0;
+	cuio->uio_iov = NULL;
+	cuio->uio_iovcnt = 0;
+	return (ret);
+}
+
+/* ARGSUSED */
+static int
+zio_crypt_init_uios_normal(boolean_t encrypt, uint8_t *plainbuf,
+    uint8_t *cipherbuf, uint_t datalen, uio_t *puio, uio_t *cuio,
+    uint_t *enc_len)
+{
+	int ret;
+	uint_t nr_plain = 1, nr_cipher = 2;
+	iovec_t *plain_iovecs = NULL, *cipher_iovecs = NULL;
+
+	/* allocate the iovecs for the plain and cipher data */
+	plain_iovecs = kmem_alloc(nr_plain * sizeof (iovec_t),
+	    KM_SLEEP);
+	if (!plain_iovecs) {
+		ret = SET_ERROR(ENOMEM);
+		goto error;
+	}
+
+	cipher_iovecs = kmem_alloc(nr_cipher * sizeof (iovec_t),
+	    KM_SLEEP);
+	if (!cipher_iovecs) {
+		ret = SET_ERROR(ENOMEM);
+		goto error;
+	}
+
+	plain_iovecs[0].iov_base = (void *)plainbuf;
+	plain_iovecs[0].iov_len = datalen;
+	cipher_iovecs[0].iov_base = (void *)cipherbuf;
+	cipher_iovecs[0].iov_len = datalen;
+
+	*enc_len = datalen;
+	puio->uio_iov = plain_iovecs;
+	puio->uio_iovcnt = nr_plain;
+	cuio->uio_iov = cipher_iovecs;
+	cuio->uio_iovcnt = nr_cipher;
+
+	return (0);
+
+error:
+	if (plain_iovecs != NULL)
+		kmem_free(plain_iovecs, nr_plain * sizeof (iovec_t));
+	if (cipher_iovecs != NULL)
+		kmem_free(cipher_iovecs, nr_cipher * sizeof (iovec_t));
+
+	*enc_len = 0;
+	puio->uio_iov = NULL;
+	puio->uio_iovcnt = 0;
+	cuio->uio_iov = NULL;
+	cuio->uio_iovcnt = 0;
+	return (ret);
+}
+
+/*
+ * This function builds up the plaintext (puio) and ciphertext (cuio) uios so
+ * that they can be used for encryption and decryption by zio_do_crypt_uio().
+ * Most blocks will use zio_crypt_init_uios_normal(), with ZIL and dnode blocks
+ * requiring special handling to parse out pieces that are to be encrypted. The
+ * authbuf is used by these special cases to store additional authenticated
+ * data (AAD) for the encryption modes.
+ */
+/* ARGSUSED */
+static int
+zio_crypt_init_uios(boolean_t encrypt, uint64_t version, dmu_object_type_t ot,
+    uint8_t *plainbuf, uint8_t *cipherbuf, uint_t datalen, boolean_t byteswap,
+    uint8_t *mac, uio_t *puio, uio_t *cuio, uint_t *enc_len, uint8_t **authbuf,
+    uint_t *auth_len, boolean_t *no_crypt)
+{
+	int ret;
+	iovec_t *mac_iov;
+
+	ASSERT(DMU_OT_IS_ENCRYPTED(ot) || ot == DMU_OT_NONE);
+
+	/* route to handler */
+	switch (ot) {
+	case DMU_OT_INTENT_LOG:
+		ret = zio_crypt_init_uios_zil(encrypt, plainbuf, cipherbuf,
+		    datalen, byteswap, puio, cuio, enc_len, authbuf, auth_len,
+		    no_crypt);
+		break;
+	case DMU_OT_DNODE:
+		ret = zio_crypt_init_uios_dnode(encrypt, version, plainbuf,
+		    cipherbuf, datalen, byteswap, puio, cuio, enc_len, authbuf,
+		    auth_len, no_crypt);
+		break;
+	default:
+		ret = zio_crypt_init_uios_normal(encrypt, plainbuf, cipherbuf,
+		    datalen, puio, cuio, enc_len);
+		*authbuf = NULL;
+		*auth_len = 0;
+		*no_crypt = B_FALSE;
+		break;
+	}
+
+	if (ret != 0)
+		goto error;
+
+	/* populate the uios */
+	puio->uio_segflg = UIO_SYSSPACE;
+	cuio->uio_segflg = UIO_SYSSPACE;
+
+	mac_iov = ((iovec_t *)&cuio->uio_iov[cuio->uio_iovcnt - 1]);
+	mac_iov->iov_base = (void *)mac;
+	mac_iov->iov_len = ZIO_DATA_MAC_LEN;
+
+	return (0);
+
+error:
+	return (ret);
+}
+
+/*
+ * Primary encryption / decryption entrypoint for zio data.
+ */
+int
+zio_do_crypt_data(boolean_t encrypt, zio_crypt_key_t *key,
+    dmu_object_type_t ot, boolean_t byteswap, uint8_t *salt, uint8_t *iv,
+    uint8_t *mac, uint_t datalen, uint8_t *plainbuf, uint8_t *cipherbuf,
+    boolean_t *no_crypt)
+{
+	int ret;
+	boolean_t locked = B_FALSE;
+	uint64_t crypt = key->zk_crypt;
+	uint_t keydata_len = zio_crypt_table[crypt].ci_keylen;
+	uint_t enc_len, auth_len;
+	uio_t puio, cuio;
+	uint8_t enc_keydata[MASTER_KEY_MAX_LEN];
+	crypto_key_t tmp_ckey, *ckey = NULL;
+	crypto_ctx_template_t tmpl;
+	uint8_t *authbuf = NULL;
+
+	bzero(&puio, sizeof (uio_t));
+	bzero(&cuio, sizeof (uio_t));
+
+	/* create uios for encryption */
+	ret = zio_crypt_init_uios(encrypt, key->zk_version, ot, plainbuf,
+	    cipherbuf, datalen, byteswap, mac, &puio, &cuio, &enc_len,
+	    &authbuf, &auth_len, no_crypt);
+	if (ret != 0)
+		return (ret);
+
+	/*
+	 * If the needed key is the current one, just use it. Otherwise we
+	 * need to generate a temporary one from the given salt + master key.
+	 * If we are encrypting, we must return a copy of the current salt
+	 * so that it can be stored in the blkptr_t.
+	 */
+	rw_enter(&key->zk_salt_lock, RW_READER);
+	locked = B_TRUE;
+
+	if (bcmp(salt, key->zk_salt, ZIO_DATA_SALT_LEN) == 0) {
+		ckey = &key->zk_current_key;
+		tmpl = key->zk_current_tmpl;
+	} else {
+		rw_exit(&key->zk_salt_lock);
+		locked = B_FALSE;
+
+		ret = hkdf_sha512(key->zk_master_keydata, keydata_len, NULL, 0,
+		    salt, ZIO_DATA_SALT_LEN, enc_keydata, keydata_len);
+		if (ret != 0)
+			goto error;
+
+		tmp_ckey.ck_format = CRYPTO_KEY_RAW;
+		tmp_ckey.ck_data = enc_keydata;
+		tmp_ckey.ck_length = CRYPTO_BYTES2BITS(keydata_len);
+
+		ckey = &tmp_ckey;
+		tmpl = NULL;
+	}
+
+	/* perform the encryption / decryption */
+	ret = zio_do_crypt_uio(encrypt, key->zk_crypt, ckey, tmpl, iv, enc_len,
+	    &puio, &cuio, authbuf, auth_len);
+	if (ret != 0)
+		goto error;
+
+	if (locked) {
+		rw_exit(&key->zk_salt_lock);
+		locked = B_FALSE;
+	}
+
+	if (authbuf != NULL)
+		zio_buf_free(authbuf, datalen);
+	if (ckey == &tmp_ckey)
+		bzero(enc_keydata, keydata_len);
+	zio_crypt_destroy_uio(&puio);
+	zio_crypt_destroy_uio(&cuio);
+
+	return (0);
+
+error:
+	if (!encrypt) {
+		if (failed_decrypt_buf != NULL)
+			kmem_free(failed_decrypt_buf, failed_decrypt_size);
+		failed_decrypt_buf = kmem_alloc(datalen, KM_SLEEP);
+		failed_decrypt_size = datalen;
+		bcopy(cipherbuf, failed_decrypt_buf, datalen);
+	}
+	if (locked)
+		rw_exit(&key->zk_salt_lock);
+	if (authbuf != NULL)
+		zio_buf_free(authbuf, datalen);
+	if (ckey == &tmp_ckey)
+		bzero(enc_keydata, keydata_len);
+	zio_crypt_destroy_uio(&puio);
+	zio_crypt_destroy_uio(&cuio);
+
+	return (ret);
+}
+
+/*
+ * Simple wrapper around zio_do_crypt_data() to work with abd's instead of
+ * linear buffers.
+ */
+int
+zio_do_crypt_abd(boolean_t encrypt, zio_crypt_key_t *key, dmu_object_type_t ot,
+    boolean_t byteswap, uint8_t *salt, uint8_t *iv, uint8_t *mac,
+    uint_t datalen, abd_t *pabd, abd_t *cabd, boolean_t *no_crypt)
+{
+	int ret;
+	void *ptmp, *ctmp;
+
+	if (encrypt) {
+		ptmp = abd_borrow_buf_copy(pabd, datalen);
+		ctmp = abd_borrow_buf(cabd, datalen);
+	} else {
+		ptmp = abd_borrow_buf(pabd, datalen);
+		ctmp = abd_borrow_buf_copy(cabd, datalen);
+	}
+
+	ret = zio_do_crypt_data(encrypt, key, ot, byteswap, salt, iv, mac,
+	    datalen, ptmp, ctmp, no_crypt);
+	if (ret != 0)
+		goto error;
+
+	if (encrypt) {
+		abd_return_buf(pabd, ptmp, datalen);
+		abd_return_buf_copy(cabd, ctmp, datalen);
+	} else {
+		abd_return_buf_copy(pabd, ptmp, datalen);
+		abd_return_buf(cabd, ctmp, datalen);
+	}
+
+	return (0);
+
+error:
+	if (encrypt) {
+		abd_return_buf(pabd, ptmp, datalen);
+		abd_return_buf_copy(cabd, ctmp, datalen);
+	} else {
+		abd_return_buf_copy(pabd, ptmp, datalen);
+		abd_return_buf(cabd, ctmp, datalen);
+	}
+
+	return (ret);
+}
diff --git a/usr/src/uts/common/fs/zfs/zio_inject.c b/usr/src/uts/common/fs/zfs/zio_inject.c
index 71b859bc3d..f13fb18c16 100644
--- a/usr/src/uts/common/fs/zfs/zio_inject.c
+++ b/usr/src/uts/common/fs/zfs/zio_inject.c
@@ -194,6 +194,37 @@ zio_match_dva(zio_t *zio)
 
 
 /*
+ * Inject a decryption failure. Decryption failures can occur in
+ * both the ARC and the ZIO layers.
+ */
+int
+zio_handle_decrypt_injection(spa_t *spa, const zbookmark_phys_t *zb,
+    uint64_t type, int error)
+{
+	int ret = 0;
+	inject_handler_t *handler;
+
+	rw_enter(&inject_lock, RW_READER);
+
+	for (handler = list_head(&inject_handlers); handler != NULL;
+	    handler = list_next(&inject_handlers, handler)) {
+
+		if (spa != handler->zi_spa ||
+		    handler->zi_record.zi_cmd != ZINJECT_DECRYPT_FAULT)
+			continue;
+
+		if (zio_match_handler((zbookmark_phys_t *)zb, type, ZI_NO_DVA,
+		    &handler->zi_record, error)) {
+			ret = error;
+			break;
+		}
+	}
+
+	rw_exit(&inject_lock);
+	return (ret);
+}
+
+/*
  * Determine if the I/O in question should return failure.  Returns the errno
  * to be returned to the caller.
  */
diff --git a/usr/src/uts/common/fs/zfs/zvol.c b/usr/src/uts/common/fs/zfs/zvol.c
index c6d9378649..2ac660e9f7 100644
--- a/usr/src/uts/common/fs/zfs/zvol.c
+++ b/usr/src/uts/common/fs/zfs/zvol.c
@@ -396,6 +396,7 @@ zvol_replay_truncate(void *arg1, void *arg2, boolean_t byteswap)
  * Replay a TX_WRITE ZIL transaction that didn't get committed
  * after a system failure
  */
+/* ARGSUSED */
 static int
 zvol_replay_write(void *arg1, void *arg2, boolean_t byteswap)
 {
@@ -504,7 +505,7 @@ zvol_create_minor(const char *name)
 	}
 
 	/* lie and say we're read-only */
-	error = dmu_objset_own(name, DMU_OST_ZVOL, B_TRUE, FTAG, &os);
+	error = dmu_objset_own(name, DMU_OST_ZVOL, B_TRUE, B_TRUE, FTAG, &os);
 
 	if (error) {
 		mutex_exit(&zfsdev_state_lock);
@@ -512,13 +513,13 @@ zvol_create_minor(const char *name)
 	}
 
 	if ((minor = zfsdev_minor_alloc()) == 0) {
-		dmu_objset_disown(os, FTAG);
+		dmu_objset_disown(os, 1, FTAG);
 		mutex_exit(&zfsdev_state_lock);
 		return (SET_ERROR(ENXIO));
 	}
 
 	if (ddi_soft_state_zalloc(zfsdev_state, minor) != DDI_SUCCESS) {
-		dmu_objset_disown(os, FTAG);
+		dmu_objset_disown(os, 1, FTAG);
 		mutex_exit(&zfsdev_state_lock);
 		return (SET_ERROR(EAGAIN));
 	}
@@ -530,7 +531,7 @@ zvol_create_minor(const char *name)
 	if (ddi_create_minor_node(zfs_dip, chrbuf, S_IFCHR,
 	    minor, DDI_PSEUDO, 0) == DDI_FAILURE) {
 		ddi_soft_state_free(zfsdev_state, minor);
-		dmu_objset_disown(os, FTAG);
+		dmu_objset_disown(os, 1, FTAG);
 		mutex_exit(&zfsdev_state_lock);
 		return (SET_ERROR(EAGAIN));
 	}
@@ -541,7 +542,7 @@ zvol_create_minor(const char *name)
 	    minor, DDI_PSEUDO, 0) == DDI_FAILURE) {
 		ddi_remove_minor_node(zfs_dip, chrbuf);
 		ddi_soft_state_free(zfsdev_state, minor);
-		dmu_objset_disown(os, FTAG);
+		dmu_objset_disown(os, 1, FTAG);
 		mutex_exit(&zfsdev_state_lock);
 		return (SET_ERROR(EAGAIN));
 	}
@@ -569,7 +570,7 @@ zvol_create_minor(const char *name)
 		else
 			zil_replay(os, zv, zvol_replay_vector);
 	}
-	dmu_objset_disown(os, FTAG);
+	dmu_objset_disown(os, 1, FTAG);
 	zv->zv_objset = NULL;
 
 	zvol_minors++;
@@ -633,7 +634,7 @@ zvol_first_open(zvol_state_t *zv)
 	uint64_t readonly;
 
 	/* lie and say we're read-only */
-	error = dmu_objset_own(zv->zv_name, DMU_OST_ZVOL, B_TRUE,
+	error = dmu_objset_own(zv->zv_name, DMU_OST_ZVOL, B_TRUE, B_TRUE,
 	    zvol_tag, &os);
 	if (error)
 		return (error);
@@ -642,13 +643,13 @@ zvol_first_open(zvol_state_t *zv)
 	error = zap_lookup(os, ZVOL_ZAP_OBJ, "size", 8, 1, &volsize);
 	if (error) {
 		ASSERT(error == 0);
-		dmu_objset_disown(os, zvol_tag);
+		dmu_objset_disown(os, 1, zvol_tag);
 		return (error);
 	}
 
 	error = dnode_hold(os, ZVOL_OBJ, zvol_tag, &zv->zv_dn);
 	if (error) {
-		dmu_objset_disown(os, zvol_tag);
+		dmu_objset_disown(os, 1, zvol_tag);
 		return (error);
 	}
 
@@ -682,7 +683,7 @@ zvol_last_close(zvol_state_t *zv)
 		txg_wait_synced(dmu_objset_pool(zv->zv_objset), 0);
 	dmu_objset_evict_dbufs(zv->zv_objset);
 
-	dmu_objset_disown(zv->zv_objset, zvol_tag);
+	dmu_objset_disown(zv->zv_objset, 1, zvol_tag);
 	zv->zv_objset = NULL;
 }
 
@@ -730,6 +731,7 @@ zvol_update_volsize(objset_t *os, uint64_t volsize)
 {
 	dmu_tx_t *tx;
 	int error;
+	uint64_t txg;
 
 	ASSERT(MUTEX_HELD(&zfsdev_state_lock));
 
@@ -741,11 +743,14 @@ zvol_update_volsize(objset_t *os, uint64_t volsize)
 		dmu_tx_abort(tx);
 		return (error);
 	}
+	txg = dmu_tx_get_txg(tx);
 
 	error = zap_update(os, ZVOL_ZAP_OBJ, "size", 8, 1,
 	    &volsize, tx);
 	dmu_tx_commit(tx);
 
+	txg_wait_synced(dmu_objset_pool(os), txg);
+
 	if (error == 0)
 		error = dmu_free_long_range(os,
 		    ZVOL_OBJ, volsize, DMU_OBJECT_END);
@@ -850,7 +855,7 @@ zvol_set_volsize(const char *name, uint64_t volsize)
 	zv = zvol_minor_lookup(name);
 
 	if (zv == NULL || zv->zv_objset == NULL) {
-		if ((error = dmu_objset_own(name, DMU_OST_ZVOL, B_FALSE,
+		if ((error = dmu_objset_own(name, DMU_OST_ZVOL, B_FALSE, B_TRUE,
 		    FTAG, &os)) != 0) {
 			mutex_exit(&zfsdev_state_lock);
 			return (error);
@@ -872,7 +877,7 @@ zvol_set_volsize(const char *name, uint64_t volsize)
 		error = zvol_update_live_volsize(zv, volsize);
 out:
 	if (owned) {
-		dmu_objset_disown(os, FTAG);
+		dmu_objset_disown(os, B_TRUE, FTAG);
 		if (zv != NULL)
 			zv->zv_objset = NULL;
 	}
@@ -901,7 +906,12 @@ zvol_open(dev_t *devp, int flag, int otyp, cred_t *cr)
 		mutex_exit(&zfsdev_state_lock);
 		return (err);
 	}
-	if ((flag & FWRITE) && (zv->zv_flags & ZVOL_RDONLY)) {
+	/*
+	 * Check for a bad on-disk format version now since we
+	 * lied about owning the dataset readonly before.
+	 */
+	if ((flag & FWRITE) && ((zv->zv_flags & ZVOL_RDONLY) ||
+	    dmu_objset_incompatible_encryption_version(zv->zv_objset))) {
 		err = SET_ERROR(EROFS);
 		goto out;
 	}
@@ -2099,6 +2109,9 @@ zvol_dumpify(zvol_state_t *zv)
 	if (zv->zv_flags & ZVOL_RDONLY)
 		return (SET_ERROR(EROFS));
 
+	if (os->os_encrypted)
+		return (SET_ERROR(ENOTSUP));
+
 	if (zap_lookup(zv->zv_objset, ZVOL_ZAP_OBJ, ZVOL_DUMPSIZE,
 	    8, 1, &dumpsize) != 0 || dumpsize != zv->zv_volsize) {
 		boolean_t resize = (dumpsize > 0);
author	Tom Caputi <tcaputi@datto.com>	2019-06-25 19:39:35 +0000
committer	Jerry Jelinek <jerry.jelinek@joyent.com>	2019-06-25 19:40:06 +0000
commit	eb633035c80613ec93d62f90482837adaaf21a0a (patch)
tree	67f2e3e15231d06a3525ce3958bbce24aa3de7e8 /usr/src/uts/common/fs
parent	07eb1aef88b873c5c1036d9cf69820c1ef6a32fb (diff)
download	illumos-joyent-eb633035c80613ec93d62f90482837adaaf21a0a.tar.gz