PSARC/2007/574 zfs send -R

6358519 'zfs restore' can't restore full backup into topmost filesystem 6421958 want recursive zfs send ('zfs send -r') 6465969 zfs receive error message could be a little more friendly 6482331 assertion failed: ra.err == 0 (0x10 == 0x0) 6577548 nvlist_next_nvpair() can not iterate recursively 6579048 zfs send -i "" fs@snap can succeed 6580447 "zfs list -t filesystem" slowly iterates over all snapshots 6581508 zfs issues confusing error message when doing an incremental send 6585612 'zfs recv -d' cannot receive the top-level filesystem backups 6589317 create-time permissions not granted on filesystems created by "zfs recv" 6596160 zfs create -p -b 1092 <filesystem> should fail. 6619393 help message for ::dbufs is slightly wrong 6620906 zfs_rename() gives incorrect error message 6621295 dsl_deleg_set_sync() should be broken up
author: ahrens <none@none> 2007-10-29 17:12:17 -0700
committer: ahrens <none@none> 2007-10-29 17:12:17 -0700
commit: 3cb34c601f3ef3016f638574f5982e80c3735c71 (patch)
tree: bbaa202cdc73b80f8c5169f479ba79234553d4ba
parent: 7451ee9355b4d9cafcf1bb6055bb01fc7bdaa1a1 (diff)
download: illumos-joyent-3cb34c601f3ef3016f638574f5982e80c3735c71.tar.gz
30 files changed, 3521 insertions, 1327 deletions
diff --git a/usr/src/cmd/mdb/common/modules/zfs/zfs.c b/usr/src/cmd/mdb/common/modules/zfs/zfs.c
index 0e094c2472..134f6d37b2 100644
--- a/usr/src/cmd/mdb/common/modules/zfs/zfs.c
+++ b/usr/src/cmd/mdb/common/modules/zfs/zfs.c
@@ -1892,7 +1892,8 @@ static const mdb_dcmd_t dcmds[] = {
 	{ "dbuf", ":", "print dmu_buf_impl_t", dbuf },
 	{ "dbuf_stats", ":", "dbuf stats", dbuf_stats },
 	{ "dbufs",
-	"\t[-O objset_t*] [-n objset_name | \"mos\"] [-o object | \"mdn\"] \n"
+	"\t[-O objset_impl_t*] [-n objset_name | \"mos\"] "
+	"[-o object | \"mdn\"] \n"
 	"\t[-l level] [-b blkid | \"bonus\"]",
 	"find dmu_buf_impl_t's that match specified criteria", dbufs },
 	{ "abuf_find", "dva_word[0] dva_word[1]",
diff --git a/usr/src/cmd/truss/codes.c b/usr/src/cmd/truss/codes.c
index 1ebc2f7fe0..b3c3d23352 100644
--- a/usr/src/cmd/truss/codes.c
+++ b/usr/src/cmd/truss/codes.c
@@ -897,9 +897,9 @@ const struct ioc {
 		"zfs_cmd_t" },
 	{ (uint_t)ZFS_IOC_RENAME,		"ZFS_IOC_RENAME",
 		"zfs_cmd_t" },
-	{ (uint_t)ZFS_IOC_RECVBACKUP,		"ZFS_IOC_RECVBACKUP",
+	{ (uint_t)ZFS_IOC_RECV,			"ZFS_IOC_RECV",
 		"zfs_cmd_t" },
-	{ (uint_t)ZFS_IOC_SENDBACKUP,		"ZFS_IOC_SENDBACKUP",
+	{ (uint_t)ZFS_IOC_SEND,			"ZFS_IOC_SEND",
 		"zfs_cmd_t" },
 	{ (uint_t)ZFS_IOC_INJECT_FAULT,		"ZFS_IOC_INJECT_FAULT",
 		"zfs_cmd_t" },
diff --git a/usr/src/cmd/zdb/zdb.c b/usr/src/cmd/zdb/zdb.c
index 055c283c1f..426e275080 100644
--- a/usr/src/cmd/zdb/zdb.c
+++ b/usr/src/cmd/zdb/zdb.c
@@ -734,8 +734,8 @@ dump_dsl_dir(objset_t *os, uint64_t object, void *data, size_t size)
 	    (u_longlong_t)dd->dd_head_dataset_obj);
 	(void) printf("\t\tparent_dir_obj = %llu\n",
 	    (u_longlong_t)dd->dd_parent_obj);
-	(void) printf("\t\tclone_parent_obj = %llu\n",
-	    (u_longlong_t)dd->dd_clone_parent_obj);
+	(void) printf("\t\torigin_obj = %llu\n",
+	    (u_longlong_t)dd->dd_origin_obj);
 	(void) printf("\t\tchild_dir_zapobj = %llu\n",
 	    (u_longlong_t)dd->dd_child_dir_zapobj);
 	(void) printf("\t\tused_bytes = %s\n", used);
diff --git a/usr/src/cmd/zfs/zfs_iter.c b/usr/src/cmd/zfs/zfs_iter.c
index abf0d72c66..c888bbafde 100644
--- a/usr/src/cmd/zfs/zfs_iter.c
+++ b/usr/src/cmd/zfs/zfs_iter.c
@@ -68,7 +68,7 @@ uu_avl_pool_t *avl_pool;
  * Called for each dataset.  If the object the object is of an appropriate type,
  * add it to the avl tree and recurse over any children as necessary.
  */
-int
+static int
 zfs_callback(zfs_handle_t *zhp, void *data)
 {
 	callback_data_t *cb = data;
@@ -100,10 +100,13 @@ zfs_callback(zfs_handle_t *zhp, void *data)
 	/*
 	 * Recurse if necessary.
 	 */
-	if (cb->cb_recurse && (zfs_get_type(zhp) == ZFS_TYPE_FILESYSTEM ||
-	    (zfs_get_type(zhp) == ZFS_TYPE_VOLUME && (cb->cb_types &
-	    ZFS_TYPE_SNAPSHOT))))
-		(void) zfs_iter_children(zhp, zfs_callback, data);
+	if (cb->cb_recurse) {
+		if (zfs_get_type(zhp) == ZFS_TYPE_FILESYSTEM)
+			(void) zfs_iter_filesystems(zhp, zfs_callback, data);
+		if (zfs_get_type(zhp) != ZFS_TYPE_SNAPSHOT &&
+		    (cb->cb_types & ZFS_TYPE_SNAPSHOT))
+			(void) zfs_iter_snapshots(zhp, zfs_callback, data);
+	}
 
 	if (!dontclose)
 		zfs_close(zhp);
diff --git a/usr/src/cmd/zfs/zfs_main.c b/usr/src/cmd/zfs/zfs_main.c
index e7ea903595..8a8fa3b0da 100644
--- a/usr/src/cmd/zfs/zfs_main.c
+++ b/usr/src/cmd/zfs/zfs_main.c
@@ -216,7 +216,7 @@ get_usage(zfs_help_t idx)
 	case HELP_ROLLBACK:
 		return (gettext("\trollback [-rRf] <snapshot>\n"));
 	case HELP_SEND:
-		return (gettext("\tsend [-i snapshot] <snapshot>\n"));
+		return (gettext("\tsend [-R] [-[iI] snapshot] <snapshot>\n"));
 	case HELP_SET:
 		return (gettext("\tset <property=value> "
 		    "<filesystem|volume> ...\n"));
@@ -490,6 +490,7 @@ zfs_do_create(int argc, char **argv)
 	uint64_t volsize;
 	int c;
 	boolean_t noreserve = B_FALSE;
+	boolean_t bflag = B_FALSE;
 	boolean_t parents = B_FALSE;
 	int ret = 1;
 	nvlist_t *props = NULL;
@@ -529,6 +530,7 @@ zfs_do_create(int argc, char **argv)
 			parents = B_TRUE;
 			break;
 		case 'b':
+			bflag = B_TRUE;
 			if (zfs_nicestrtonum(g_zfs, optarg, &intval) != 0) {
 				(void) fprintf(stderr, gettext("bad volume "
 				    "block size '%s': %s\n"), optarg,
@@ -580,9 +582,9 @@ zfs_do_create(int argc, char **argv)
 		}
 	}
 
-	if (noreserve && type != ZFS_TYPE_VOLUME) {
-		(void) fprintf(stderr, gettext("'-s' can only be used when "
-		    "creating a volume\n"));
+	if ((bflag || noreserve) && type != ZFS_TYPE_VOLUME) {
+		(void) fprintf(stderr, gettext("'-s' and '-b' can only be "
+		    "used when creating a volume\n"));
 		goto badusage;
 	}
 
@@ -1316,7 +1318,7 @@ upgrade_list_callback(zfs_handle_t *zhp, void *data)
 
 	/* list if it's old/new */
 	if ((!cb->cb_newer && version < ZPL_VERSION) ||
-	    (cb->cb_newer && version > SPA_VERSION)) {
+	    (cb->cb_newer && version > ZPL_VERSION)) {
 		char *str;
 		if (cb->cb_newer) {
 			str = gettext("The following filesystems are "
@@ -2196,7 +2198,8 @@ zfs_do_snapshot(int argc, char **argv)
 }
 
 /*
- * zfs send [-i <@snap>] <fs@snap>
+ * zfs send [-v] -R [-i|-I <@snap>] <fs@snap>
+ * zfs send [-v] [-i|-I <@snap>] <fs@snap>
  *
  * Send a backup stream to stdout.
  */
@@ -2204,18 +2207,35 @@ static int
 zfs_do_send(int argc, char **argv)
 {
 	char *fromname = NULL;
+	char *toname = NULL;
 	char *cp;
 	zfs_handle_t *zhp;
+	boolean_t doall = B_FALSE;
+	boolean_t replicate = B_FALSE;
+	boolean_t fromorigin = B_FALSE;
+	boolean_t verbose = B_FALSE;
 	int c, err;
 
 	/* check options */
-	while ((c = getopt(argc, argv, ":i:")) != -1) {
+	while ((c = getopt(argc, argv, ":i:I:Rv")) != -1) {
 		switch (c) {
 		case 'i':
 			if (fromname)
 				usage(B_FALSE);
 			fromname = optarg;
 			break;
+		case 'I':
+			if (fromname)
+				usage(B_FALSE);
+			fromname = optarg;
+			doall = B_TRUE;
+			break;
+		case 'R':
+			replicate = B_TRUE;
+			break;
+		case 'v':
+			verbose = B_TRUE;
+			break;
 		case ':':
 			(void) fprintf(stderr, gettext("missing argument for "
 			    "'%c' option\n"), optopt);
@@ -2248,37 +2268,62 @@ zfs_do_send(int argc, char **argv)
 		return (1);
 	}
 
-	if ((zhp = zfs_open(g_zfs, argv[0], ZFS_TYPE_SNAPSHOT)) == NULL)
+	cp = strchr(argv[0], '@');
+	if (cp == NULL) {
+		(void) fprintf(stderr,
+		    gettext("argument must be a snapshot\n"));
+		usage(B_FALSE);
+	}
+	*cp = '\0';
+	toname = cp + 1;
+	zhp = zfs_open(g_zfs, argv[0], ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME);
+	if (zhp == NULL)
 		return (1);
 
 	/*
 	 * If they specified the full path to the snapshot, chop off
-	 * everything except the short name of the snapshot.
+	 * everything except the short name of the snapshot, but special
+	 * case if they specify the origin.
 	 */
 	if (fromname && (cp = strchr(fromname, '@')) != NULL) {
-		if (cp != fromname &&
-		    strncmp(argv[0], fromname, cp - fromname + 1)) {
-			(void) fprintf(stderr,
-			    gettext("incremental source must be "
-			    "in same filesystem\n"));
-			usage(B_FALSE);
-		}
-		fromname = cp + 1;
-		if (strchr(fromname, '@') || strchr(fromname, '/')) {
-			(void) fprintf(stderr,
-			    gettext("invalid incremental source\n"));
-			usage(B_FALSE);
+		char origin[ZFS_MAXNAMELEN];
+		zprop_source_t src;
+
+		(void) zfs_prop_get(zhp, ZFS_PROP_ORIGIN,
+		    origin, sizeof (origin), &src, NULL, 0, B_FALSE);
+
+		if (strcmp(origin, fromname) == 0) {
+			fromname = NULL;
+			fromorigin = B_TRUE;
+		} else {
+			*cp = '\0';
+			if (cp != fromname && strcmp(argv[0], fromname)) {
+				(void) fprintf(stderr,
+				    gettext("incremental source must be "
+				    "in same filesystem\n"));
+				usage(B_FALSE);
+			}
+			fromname = cp + 1;
+			if (strchr(fromname, '@') || strchr(fromname, '/')) {
+				(void) fprintf(stderr,
+				    gettext("invalid incremental source\n"));
+				usage(B_FALSE);
+			}
 		}
 	}
 
-	err = zfs_send(zhp, fromname, STDOUT_FILENO);
+	if (replicate && fromname == NULL)
+		doall = B_TRUE;
+
+	err = zfs_send(zhp, fromname, toname, replicate, doall, fromorigin,
+	    verbose, STDOUT_FILENO);
 	zfs_close(zhp);
 
 	return (err != 0);
 }
 
 /*
- * zfs receive <fs@snap>
+ * zfs receive [-dnvF] <fs@snap>
  *
  * Restore a backup stream from stdin.
  */
@@ -2286,25 +2331,23 @@ static int
 zfs_do_receive(int argc, char **argv)
 {
 	int c, err;
-	boolean_t isprefix = B_FALSE;
-	boolean_t dryrun = B_FALSE;
-	boolean_t verbose = B_FALSE;
-	boolean_t force = B_FALSE;
+	recvflags_t flags;
 
+	bzero(&flags, sizeof (recvflags_t));
 	/* check options */
 	while ((c = getopt(argc, argv, ":dnvF")) != -1) {
 		switch (c) {
 		case 'd':
-			isprefix = B_TRUE;
+			flags.isprefix = B_TRUE;
 			break;
 		case 'n':
-			dryrun = B_TRUE;
+			flags.dryrun = B_TRUE;
 			break;
 		case 'v':
-			verbose = B_TRUE;
+			flags.verbose = B_TRUE;
 			break;
 		case 'F':
-			force = B_TRUE;
+			flags.force = B_TRUE;
 			break;
 		case ':':
 			(void) fprintf(stderr, gettext("missing argument for "
@@ -2339,8 +2382,7 @@ zfs_do_receive(int argc, char **argv)
 		return (1);
 	}
 
-	err = zfs_receive(g_zfs, argv[0], isprefix, verbose, dryrun, force,
-	    STDIN_FILENO);
+	err = zfs_receive(g_zfs, argv[0], flags, STDIN_FILENO, NULL);
 
 	return (err != 0);
 }
@@ -2939,9 +2981,8 @@ share_mount_one(zfs_handle_t *zhp, int op, int flags, char *protocol,
 
 			(void) fprintf(stderr, gettext("cannot %s '%s': "
 			    "legacy mountpoint\n"), cmdname, zfs_get_name(zhp));
-			(void) fprintf(stderr, gettext("use %s to "
-			    "%s this filesystem\n"), op == OP_SHARE ?
-			    "share(1M)" : "mount(1M)", cmdname);
+			(void) fprintf(stderr, gettext("use %s(1M) to "
+			    "%s this filesystem\n"), cmdname, cmdname);
 			return (1);
 		}
 
@@ -3093,8 +3134,10 @@ report_mount_progress(int current, int total)
 	if (current == 1) {
 		(void) printf(gettext("Mounting ZFS filesystems: "));
 		len = 0;
-	} else if (current != total && last_progress_time + MOUNT_TIME >= now)
-		return;		/* too soon to report again */
+	} else if (current != total && last_progress_time + MOUNT_TIME >= now) {
+		/* too soon to report again */
+		return;
+	}
 
 	last_progress_time = now;
 
diff --git a/usr/src/common/nvpair/nvpair.c b/usr/src/common/nvpair/nvpair.c
index 5f66864d6f..3d1f3972af 100644
--- a/usr/src/common/nvpair/nvpair.c
+++ b/usr/src/common/nvpair/nvpair.c
@@ -1126,13 +1126,15 @@ nvlist_next_nvpair(nvlist_t *nvl, nvpair_t *nvp)
 	curr = NVPAIR2I_NVP(nvp);
 
 	/*
-	 * Ensure that nvp is an valid pointer.
+	 * Ensure that nvp is a valid nvpair on this nvlist.
+	 * NB: nvp_curr is used only as a hint so that we don't always
+	 * have to walk the list to determine if nvp is still on the list.
 	 */
 	if (nvp == NULL)
 		curr = priv->nvp_list;
-	else if (priv->nvp_curr == curr)
+	else if (priv->nvp_curr == curr || nvlist_contains_nvp(nvl, nvp))
 		curr = curr->nvi_next;
-	else if (nvlist_contains_nvp(nvl, nvp) == 0)
+	else
 		curr = NULL;
 
 	priv->nvp_curr = curr;
diff --git a/usr/src/lib/libzfs/Makefile.com b/usr/src/lib/libzfs/Makefile.com
index 94accfce61..908a6e981d 100644
--- a/usr/src/lib/libzfs/Makefile.com
+++ b/usr/src/lib/libzfs/Makefile.com
@@ -31,7 +31,7 @@ VERS= .1
 OBJS_SHARED= zfs_namecheck.o zprop_common.o zfs_prop.o zpool_prop.o zfs_deleg.o
 OBJS_COMMON= libzfs_dataset.o libzfs_util.o libzfs_graph.o libzfs_mount.o \
 	libzfs_pool.o libzfs_changelist.o libzfs_config.o libzfs_import.o \
-	libzfs_status.o
+	libzfs_status.o libzfs_sendrecv.o
 OBJECTS= $(OBJS_COMMON) $(OBJS_SHARED)
 
 include ../../Makefile.lib
diff --git a/usr/src/lib/libzfs/common/libzfs.h b/usr/src/lib/libzfs/common/libzfs.h
index dd46a680e7..b70f89a6c7 100644
--- a/usr/src/lib/libzfs/common/libzfs.h
+++ b/usr/src/lib/libzfs/common/libzfs.h
@@ -417,11 +417,22 @@ extern int zfs_clone(zfs_handle_t *, const char *, nvlist_t *);
 extern int zfs_snapshot(libzfs_handle_t *, const char *, boolean_t);
 extern int zfs_rollback(zfs_handle_t *, zfs_handle_t *, int);
 extern int zfs_rename(zfs_handle_t *, const char *, boolean_t);
-extern int zfs_send(zfs_handle_t *, const char *, int);
-extern int zfs_receive(libzfs_handle_t *, const char *, int, int, int,
-    boolean_t, int);
+extern int zfs_send(zfs_handle_t *, const char *, const char *,
+    boolean_t, boolean_t, boolean_t, boolean_t, int);
 extern int zfs_promote(zfs_handle_t *);
 
+typedef struct recvflags {
+	boolean_t verbose : 1;
+	boolean_t isprefix : 1;
+	boolean_t dryrun : 1;
+	boolean_t force : 1;
+	boolean_t canmountoff : 1;
+	boolean_t byteswap : 1;
+} recvflags_t;
+
+extern int zfs_receive(libzfs_handle_t *, const char *, recvflags_t,
+    int, avl_tree_t *);
+
 /*
  * Miscellaneous functions.
  */
diff --git a/usr/src/lib/libzfs/common/libzfs_changelist.c b/usr/src/lib/libzfs/common/libzfs_changelist.c
index 2b53f7d983..6d690e8f78 100644
--- a/usr/src/lib/libzfs/common/libzfs_changelist.c
+++ b/usr/src/lib/libzfs/common/libzfs_changelist.c
@@ -350,14 +350,14 @@ changelist_haszonedchild(prop_changelist_t *clp)
  * Remove a node from a gathered list.
  */
 void
-changelist_remove(zfs_handle_t *zhp, prop_changelist_t *clp)
+changelist_remove(prop_changelist_t *clp, const char *name)
 {
 	prop_changenode_t *cn;
 
 	for (cn = uu_list_first(clp->cl_list); cn != NULL;
 	    cn = uu_list_next(clp->cl_list, cn)) {
 
-		if (strcmp(cn->cn_handle->zfs_name, zhp->zfs_name) == 0) {
+		if (strcmp(cn->cn_handle->zfs_name, name) == 0) {
 			uu_list_remove(clp->cl_list, cn);
 			zfs_close(cn->cn_handle);
 			free(cn);
diff --git a/usr/src/lib/libzfs/common/libzfs_dataset.c b/usr/src/lib/libzfs/common/libzfs_dataset.c
index db912e7f20..1fc002c39c 100644
--- a/usr/src/lib/libzfs/common/libzfs_dataset.c
+++ b/usr/src/lib/libzfs/common/libzfs_dataset.c
@@ -36,6 +36,7 @@
 #include <stdlib.h>
 #include <strings.h>
 #include <unistd.h>
+#include <stddef.h>
 #include <zone.h>
 #include <fcntl.h>
 #include <sys/mntent.h>
@@ -49,7 +50,6 @@
 #include <ucred.h>
 
 #include <sys/spa.h>
-#include <sys/zio.h>
 #include <sys/zap.h>
 #include <sys/zfs_i18n.h>
 #include <libzfs.h>
@@ -59,7 +59,6 @@
 #include "libzfs_impl.h"
 #include "zfs_deleg.h"
 
-static int create_parents(libzfs_handle_t *, char *, int);
 static int zvol_create_link_common(libzfs_handle_t *, const char *, int);
 
 /*
@@ -373,9 +372,6 @@ top:
 			zc.zc_objset_type = DMU_OST_ZFS;
 		}
 
-		/* If we can successfully roll it back, reget the stats */
-		if (ioctl(hdl->libzfs_fd, ZFS_IOC_ROLLBACK, &zc) == 0)
-			goto top;
 		/*
 		 * If we can successfully destroy it, pretend that it
 		 * never existed.
@@ -386,6 +382,9 @@ top:
 			errno = ENOENT;
 			return (NULL);
 		}
+		/* If we can successfully roll it back, reget the stats */
+		if (ioctl(hdl->libzfs_fd, ZFS_IOC_ROLLBACK, &zc) == 0)
+			goto top;
 	}
 
 	/*
@@ -467,7 +466,6 @@ zfs_close(zfs_handle_t *zhp)
 	free(zhp);
 }
 
-
 /*
  * Given an nvlist of properties to set, validates that they are correct, and
  * parses any numeric properties (index, boolean, etc) if they are specified as
@@ -1181,29 +1179,26 @@ static void
 zfs_destroy_perm_tree(avl_tree_t *tree)
 {
 	zfs_perm_node_t *permnode;
-	void *cookie;
+	void *cookie = NULL;
 
-	cookie = NULL;
-	while ((permnode = avl_destroy_nodes(tree,  &cookie)) != NULL) {
-		avl_remove(tree, permnode);
+	while ((permnode = avl_destroy_nodes(tree,  &cookie)) != NULL)
 		free(permnode);
-	}
+	avl_destroy(tree);
 }
 
 static void
 zfs_destroy_tree(avl_tree_t *tree)
 {
 	zfs_allow_node_t *allownode;
-	void *cookie;
+	void *cookie = NULL;
 
-	cookie = NULL;
 	while ((allownode = avl_destroy_nodes(tree, &cookie)) != NULL) {
 		zfs_destroy_perm_tree(&allownode->z_localdescend);
 		zfs_destroy_perm_tree(&allownode->z_local);
 		zfs_destroy_perm_tree(&allownode->z_descend);
-		avl_remove(tree, allownode);
 		free(allownode);
 	}
+	avl_destroy(tree);
 }
 
 void
@@ -2229,10 +2224,9 @@ uint64_t
 zfs_prop_get_int(zfs_handle_t *zhp, zfs_prop_t prop)
 {
 	char *source;
-	zprop_source_t sourcetype = ZPROP_SRC_NONE;
 	uint64_t val;
 
-	(void) get_numeric_property(zhp, prop, &sourcetype, &source, &val);
+	(void) get_numeric_property(zhp, prop, NULL, &source, &val);
 
 	return (val);
 }
@@ -2294,6 +2288,9 @@ zfs_iter_filesystems(zfs_handle_t *zhp, zfs_iter_f func, void *data)
 	zfs_handle_t *nzhp;
 	int ret;
 
+	if (zhp->zfs_type != ZFS_TYPE_FILESYSTEM)
+		return (0);
+
 	for ((void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
 	    ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_DATASET_LIST_NEXT, &zc) == 0;
 	    (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name))) {
@@ -2337,6 +2334,9 @@ zfs_iter_snapshots(zfs_handle_t *zhp, zfs_iter_f func, void *data)
 	zfs_handle_t *nzhp;
 	int ret;
 
+	if (zhp->zfs_type == ZFS_TYPE_SNAPSHOT)
+		return (0);
+
 	for ((void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
 	    ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_SNAPSHOT_LIST_NEXT,
 	    &zc) == 0;
@@ -2501,6 +2501,86 @@ zfs_dataset_exists(libzfs_handle_t *hdl, const char *path, zfs_type_t types)
 }
 
 /*
+ * Given a path to 'target', create all the ancestors between
+ * the prefixlen portion of the path, and the target itself.
+ * Fail if the initial prefixlen-ancestor does not already exist.
+ */
+int
+create_parents(libzfs_handle_t *hdl, char *target, int prefixlen)
+{
+	zfs_handle_t *h;
+	char *cp;
+	const char *opname;
+
+	/* make sure prefix exists */
+	cp = target + prefixlen;
+	if (*cp != '/') {
+		assert(strchr(cp, '/') == NULL);
+		h = zfs_open(hdl, target, ZFS_TYPE_FILESYSTEM);
+	} else {
+		*cp = '\0';
+		h = zfs_open(hdl, target, ZFS_TYPE_FILESYSTEM);
+		*cp = '/';
+	}
+	if (h == NULL)
+		return (-1);
+	zfs_close(h);
+
+	/*
+	 * Attempt to create, mount, and share any ancestor filesystems,
+	 * up to the prefixlen-long one.
+	 */
+	for (cp = target + prefixlen + 1;
+	    cp = strchr(cp, '/'); *cp = '/', cp++) {
+		char *logstr;
+
+		*cp = '\0';
+
+		h = make_dataset_handle(hdl, target);
+		if (h) {
+			/* it already exists, nothing to do here */
+			zfs_close(h);
+			continue;
+		}
+
+		logstr = hdl->libzfs_log_str;
+		hdl->libzfs_log_str = NULL;
+		if (zfs_create(hdl, target, ZFS_TYPE_FILESYSTEM,
+		    NULL) != 0) {
+			hdl->libzfs_log_str = logstr;
+			opname = dgettext(TEXT_DOMAIN, "create");
+			goto ancestorerr;
+		}
+
+		hdl->libzfs_log_str = logstr;
+		h = zfs_open(hdl, target, ZFS_TYPE_FILESYSTEM);
+		if (h == NULL) {
+			opname = dgettext(TEXT_DOMAIN, "open");
+			goto ancestorerr;
+		}
+
+		if (zfs_mount(h, NULL, 0) != 0) {
+			opname = dgettext(TEXT_DOMAIN, "mount");
+			goto ancestorerr;
+		}
+
+		if (zfs_share(h) != 0) {
+			opname = dgettext(TEXT_DOMAIN, "share");
+			goto ancestorerr;
+		}
+
+		zfs_close(h);
+	}
+
+	return (0);
+
+ancestorerr:
+	zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+	    "failed to %s ancestor '%s'"), opname, target);
+	return (-1);
+}
+
+/*
  * Creates non-existing ancestors of the given path.
  */
 int
@@ -2985,7 +3065,7 @@ zfs_promote(zfs_handle_t *zhp)
 		return (zfs_error(hdl, EZFS_BADTYPE, errbuf));
 	}
 
-	(void) strlcpy(parent, zhp->zfs_dmustats.dds_clone_of, sizeof (parent));
+	(void) strlcpy(parent, zhp->zfs_dmustats.dds_origin, sizeof (parent));
 	if (parent[0] == '\0') {
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 		    "not a cloned filesystem"));
@@ -2995,7 +3075,7 @@ zfs_promote(zfs_handle_t *zhp)
 	*cp = '\0';
 
 	/* Walk the snapshots we will be moving */
-	pzhp = zfs_open(hdl, zhp->zfs_dmustats.dds_clone_of, ZFS_TYPE_SNAPSHOT);
+	pzhp = zfs_open(hdl, zhp->zfs_dmustats.dds_origin, ZFS_TYPE_SNAPSHOT);
 	if (pzhp == NULL)
 		return (-1);
 	pd.cb_pivot_txg = zfs_prop_get_int(pzhp, ZFS_PROP_CREATETXG);
@@ -3014,7 +3094,7 @@ zfs_promote(zfs_handle_t *zhp)
 	}
 
 	/* issue the ioctl */
-	(void) strlcpy(zc.zc_value, zhp->zfs_dmustats.dds_clone_of,
+	(void) strlcpy(zc.zc_value, zhp->zfs_dmustats.dds_origin,
 	    sizeof (zc.zc_value));
 	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
 	ret = zfs_ioctl(hdl, ZFS_IOC_PROMOTE, &zc);
@@ -3161,386 +3241,6 @@ zfs_snapshot(libzfs_handle_t *hdl, const char *path, boolean_t recursive)
 }
 
 /*
- * Dumps a backup of the given snapshot (incremental from fromsnap if it's not
- * NULL) to the file descriptor specified by outfd.
- */
-int
-zfs_send(zfs_handle_t *zhp, const char *fromsnap, int outfd)
-{
-	zfs_cmd_t zc = { 0 };
-	char errbuf[1024];
-	libzfs_handle_t *hdl = zhp->zfs_hdl;
-
-	assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
-
-	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
-	if (fromsnap)
-		(void) strlcpy(zc.zc_value, fromsnap, sizeof (zc.zc_name));
-	zc.zc_cookie = outfd;
-
-	if (ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_SENDBACKUP, &zc) != 0) {
-		(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
-		    "cannot send '%s'"), zhp->zfs_name);
-
-		switch (errno) {
-
-		case EXDEV:
-			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
-			    "not an earlier snapshot from the same fs"));
-			return (zfs_error(hdl, EZFS_CROSSTARGET, errbuf));
-
-		case EDQUOT:
-		case EFBIG:
-		case EIO:
-		case ENOLINK:
-		case ENOSPC:
-		case ENOSTR:
-		case ENXIO:
-		case EPIPE:
-		case ERANGE:
-		case EFAULT:
-		case EROFS:
-			zfs_error_aux(hdl, strerror(errno));
-			return (zfs_error(hdl, EZFS_BADBACKUP, errbuf));
-
-		default:
-			return (zfs_standard_error(hdl, errno, errbuf));
-		}
-	}
-
-	return (0);
-}
-
-/*
- * Create ancestors of 'target', but not target itself, and not
- * ancestors whose names are shorter than prefixlen.  Die if
- * prefixlen-ancestor does not exist.
- */
-static int
-create_parents(libzfs_handle_t *hdl, char *target, int prefixlen)
-{
-	zfs_handle_t *h;
-	char *cp;
-
-	/* make sure prefix exists */
-	cp = strchr(target + prefixlen, '/');
-	if (cp == NULL) {
-		h = zfs_open(hdl, target, ZFS_TYPE_FILESYSTEM);
-	} else {
-		*cp = '\0';
-		h = zfs_open(hdl, target, ZFS_TYPE_FILESYSTEM);
-		*cp = '/';
-	}
-	if (h == NULL)
-		return (-1);
-	zfs_close(h);
-
-	/*
-	 * Attempt to create, mount, and share any ancestor filesystems,
-	 * up to the prefixlen-long one.
-	 */
-	for (cp = target + prefixlen + 1;
-	    cp = strchr(cp, '/'); *cp = '/', cp++) {
-		const char *opname;
-		char *logstr;
-
-		*cp = '\0';
-
-		h = make_dataset_handle(hdl, target);
-		if (h) {
-			/* it already exists, nothing to do here */
-			zfs_close(h);
-			continue;
-		}
-
-		opname = dgettext(TEXT_DOMAIN, "create");
-		logstr = hdl->libzfs_log_str;
-		hdl->libzfs_log_str = NULL;
-		if (zfs_create(hdl, target, ZFS_TYPE_FILESYSTEM,
-		    NULL) != 0) {
-			hdl->libzfs_log_str = logstr;
-			goto ancestorerr;
-		}
-
-		hdl->libzfs_log_str = logstr;
-		opname = dgettext(TEXT_DOMAIN, "open");
-		h = zfs_open(hdl, target, ZFS_TYPE_FILESYSTEM);
-		if (h == NULL)
-			goto ancestorerr;
-
-		opname = dgettext(TEXT_DOMAIN, "mount");
-		if (zfs_mount(h, NULL, 0) != 0)
-			goto ancestorerr;
-
-		opname = dgettext(TEXT_DOMAIN, "share");
-		if (zfs_share(h) != 0)
-			goto ancestorerr;
-
-		zfs_close(h);
-
-		continue;
-ancestorerr:
-		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
-		    "failed to %s ancestor '%s'"), opname, target);
-		return (-1);
-	}
-
-	return (0);
-}
-
-/*
- * Restores a backup of tosnap from the file descriptor specified by infd.
- */
-int
-zfs_receive(libzfs_handle_t *hdl, const char *tosnap, int isprefix,
-    int verbose, int dryrun, boolean_t force, int infd)
-{
-	zfs_cmd_t zc = { 0 };
-	time_t begin_time;
-	int ioctl_err, err, bytes, size, choplen;
-	char *cp;
-	dmu_replay_record_t drr;
-	struct drr_begin *drrb = &zc.zc_begin_record;
-	char errbuf[1024];
-	char chopprefix[ZFS_MAXNAMELEN];
-
-	begin_time = time(NULL);
-
-	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
-	    "cannot receive"));
-
-	/* read in the BEGIN record */
-	cp = (char *)&drr;
-	bytes = 0;
-	do {
-		size = read(infd, cp, sizeof (drr) - bytes);
-		cp += size;
-		bytes += size;
-	} while (size > 0);
-
-	if (size < 0 || bytes != sizeof (drr)) {
-		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
-		    "stream (failed to read first record)"));
-		return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
-	}
-
-	zc.zc_begin_record = drr.drr_u.drr_begin;
-
-	if (drrb->drr_magic != DMU_BACKUP_MAGIC &&
-	    drrb->drr_magic != BSWAP_64(DMU_BACKUP_MAGIC)) {
-		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
-		    "stream (bad magic number)"));
-		return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
-	}
-
-	if (drrb->drr_version != DMU_BACKUP_VERSION &&
-	    drrb->drr_version != BSWAP_64(DMU_BACKUP_VERSION)) {
-		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "only version "
-		    "0x%llx is supported (stream is version 0x%llx)"),
-		    DMU_BACKUP_VERSION, drrb->drr_version);
-		return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
-	}
-
-	if (strchr(drr.drr_u.drr_begin.drr_toname, '@') == NULL) {
-		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
-		    "stream (bad snapshot name)"));
-		return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
-	}
-	/*
-	 * Determine how much of the snapshot name stored in the stream
-	 * we are going to tack on to the name they specified on the
-	 * command line, and how much we are going to chop off.
-	 *
-	 * If they specified a snapshot, chop the entire name stored in
-	 * the stream.
-	 */
-	(void) strcpy(chopprefix, drr.drr_u.drr_begin.drr_toname);
-	if (isprefix) {
-		/*
-		 * They specified a fs with -d, we want to tack on
-		 * everything but the pool name stored in the stream
-		 */
-		if (strchr(tosnap, '@')) {
-			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
-			    "argument - snapshot not allowed with -d"));
-			return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
-		}
-		cp = strchr(chopprefix, '/');
-		if (cp == NULL)
-			cp = strchr(chopprefix, '@');
-		*cp = '\0';
-	} else if (strchr(tosnap, '@') == NULL) {
-		/*
-		 * If they specified a filesystem without -d, we want to
-		 * tack on everything after the fs specified in the
-		 * first name from the stream.
-		 */
-		cp = strchr(chopprefix, '@');
-		*cp = '\0';
-	}
-	choplen = strlen(chopprefix);
-
-	/*
-	 * Determine name of destination snapshot, store in zc_value.
-	 */
-	(void) strcpy(zc.zc_value, tosnap);
-	(void) strncat(zc.zc_value, drr.drr_u.drr_begin.drr_toname+choplen,
-	    sizeof (zc.zc_value));
-	if (!zfs_validate_name(hdl, zc.zc_value, ZFS_TYPE_SNAPSHOT, B_TRUE))
-		return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
-
-	(void) strcpy(zc.zc_name, zc.zc_value);
-	if (drrb->drr_fromguid) {
-		/* incremental backup stream */
-		zfs_handle_t *h;
-
-		/* do the recvbackup ioctl to the containing fs */
-		*strchr(zc.zc_name, '@') = '\0';
-
-		/* make sure destination fs exists */
-		h = zfs_open(hdl, zc.zc_name,
-		    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME);
-		if (h == NULL)
-			return (-1);
-		if (!dryrun && h->zfs_type == ZFS_TYPE_VOLUME) {
-			if (zvol_remove_link(hdl, h->zfs_name) != 0) {
-				zfs_close(h);
-				return (-1);
-			}
-		}
-		zfs_close(h);
-	} else {
-		/* full backup stream */
-
-		/* Make sure destination fs does not exist */
-		*strchr(zc.zc_name, '@') = '\0';
-		if (zfs_dataset_exists(hdl, zc.zc_name, ZFS_TYPE_DATASET)) {
-			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
-			    "destination '%s' exists"), zc.zc_name);
-			return (zfs_error(hdl, EZFS_EXISTS, errbuf));
-		}
-
-		if (strchr(zc.zc_name, '/') == NULL) {
-			/*
-			 * they're trying to do a recv into a
-			 * nonexistant topmost filesystem.
-			 */
-			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
-			    "destination does not exist"), zc.zc_name);
-			return (zfs_error(hdl, EZFS_EXISTS, errbuf));
-		}
-
-		/* Do the recvbackup ioctl to the fs's parent. */
-		*strrchr(zc.zc_name, '/') = '\0';
-
-		if (isprefix && (err = create_parents(hdl,
-		    zc.zc_value, strlen(tosnap))) != 0) {
-			return (zfs_error(hdl, EZFS_BADRESTORE, errbuf));
-		}
-
-	}
-
-	zc.zc_cookie = infd;
-	zc.zc_guid = force;
-	if (verbose) {
-		(void) printf("%s %s stream of %s into %s\n",
-		    dryrun ? "would receive" : "receiving",
-		    drrb->drr_fromguid ? "incremental" : "full",
-		    drr.drr_u.drr_begin.drr_toname,
-		    zc.zc_value);
-		(void) fflush(stdout);
-	}
-	if (dryrun)
-		return (0);
-	err = ioctl_err = zfs_ioctl(hdl, ZFS_IOC_RECVBACKUP, &zc);
-	if (ioctl_err != 0) {
-		switch (errno) {
-		case ENODEV:
-			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
-			    "most recent snapshot does not match incremental "
-			    "source"));
-			(void) zfs_error(hdl, EZFS_BADRESTORE, errbuf);
-			break;
-		case ETXTBSY:
-			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
-			    "destination has been modified since most recent "
-			    "snapshot"));
-			(void) zfs_error(hdl, EZFS_BADRESTORE, errbuf);
-			break;
-		case EEXIST:
-			if (drrb->drr_fromguid == 0) {
-				/* it's the containing fs that exists */
-				cp = strchr(zc.zc_value, '@');
-				*cp = '\0';
-			}
-			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
-			    "destination already exists"));
-			(void) zfs_error_fmt(hdl, EZFS_EXISTS,
-			    dgettext(TEXT_DOMAIN, "cannot restore to %s"),
-			    zc.zc_value);
-			break;
-		case EINVAL:
-			(void) zfs_error(hdl, EZFS_BADSTREAM, errbuf);
-			break;
-		case ECKSUM:
-			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
-			    "invalid stream (checksum mismatch)"));
-			(void) zfs_error(hdl, EZFS_BADSTREAM, errbuf);
-			break;
-		default:
-			(void) zfs_standard_error(hdl, errno, errbuf);
-		}
-	}
-
-	/*
-	 * Mount or recreate the /dev links for the target filesystem
-	 * (if created, or if we tore them down to do an incremental
-	 * restore), and the /dev links for the new snapshot (if
-	 * created). Also mount any children of the target filesystem
-	 * if we did an incremental receive.
-	 */
-	cp = strchr(zc.zc_value, '@');
-	if (cp && (ioctl_err == 0 || drrb->drr_fromguid)) {
-		zfs_handle_t *h;
-
-		*cp = '\0';
-		h = zfs_open(hdl, zc.zc_value,
-		    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME);
-		*cp = '@';
-		if (h) {
-			if (h->zfs_type == ZFS_TYPE_VOLUME) {
-				err = zvol_create_link(hdl, h->zfs_name);
-				if (err == 0 && ioctl_err == 0)
-					err = zvol_create_link(hdl,
-					    zc.zc_value);
-			} else if (!drrb->drr_fromguid) {
-				err = zfs_mount(h, NULL, 0);
-			}
-		zfs_close(h);
-		}
-	}
-
-	if (err || ioctl_err)
-		return (-1);
-
-	if (verbose) {
-		char buf1[64];
-		char buf2[64];
-		uint64_t bytes = zc.zc_cookie;
-		time_t delta = time(NULL) - begin_time;
-		if (delta == 0)
-			delta = 1;
-		zfs_nicenum(bytes, buf1, sizeof (buf1));
-		zfs_nicenum(bytes/delta, buf2, sizeof (buf1));
-
-		(void) printf("received %sB stream in %lu seconds (%sB/sec)\n",
-		    buf1, delta, buf2);
-	}
-
-	return (0);
-}
-
-/*
  * Destroy any more recent snapshots.  We invoke this callback on any dependents
  * of the snapshot first.  If the 'cb_dependent' member is non-zero, then this
  * is a dependent and we should just destroy it without checking the transaction
@@ -3577,14 +3277,14 @@ rollback_destroy(zfs_handle_t *zhp, void *data)
 			if (zfs_destroy(zhp) != 0)
 				cbp->cb_error = 1;
 			else
-				changelist_remove(zhp, cbp->cb_clp);
+				changelist_remove(cbp->cb_clp, zhp->zfs_name);
 			zhp->zfs_hdl->libzfs_log_str = logstr;
 		}
 	} else {
 		if (zfs_destroy(zhp) != 0)
 			cbp->cb_error = 1;
 		else
-			changelist_remove(zhp, cbp->cb_clp);
+			changelist_remove(cbp->cb_clp, zhp->zfs_name);
 	}
 
 	zfs_close(zhp);
@@ -3889,7 +3589,7 @@ zfs_rename(zfs_handle_t *zhp, const char *target, boolean_t recursive)
 		 * be in zc.zc_name
 		 */
 		(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
-		    "cannot rename to '%s'"), zc.zc_name);
+		    "cannot rename '%s'"), zc.zc_name);
 
 		if (recursive && errno == EEXIST) {
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
diff --git a/usr/src/lib/libzfs/common/libzfs_graph.c b/usr/src/lib/libzfs/common/libzfs_graph.c
index c283016df7..c6383a3654 100644
--- a/usr/src/lib/libzfs/common/libzfs_graph.c
+++ b/usr/src/lib/libzfs/common/libzfs_graph.c
@@ -19,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -396,8 +396,8 @@ iterate_children(libzfs_handle_t *hdl, zfs_graph_t *zgp, const char *dataset)
 	 */
 	(void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name));
 	if (ioctl(hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) == 0 &&
-	    zc.zc_objset_stats.dds_clone_of[0] != '\0') {
-		if (zfs_graph_add(hdl, zgp, zc.zc_objset_stats.dds_clone_of,
+	    zc.zc_objset_stats.dds_origin[0] != '\0') {
+		if (zfs_graph_add(hdl, zgp, zc.zc_objset_stats.dds_origin,
 		    zc.zc_name, zc.zc_objset_stats.dds_creation_txg) != 0)
 			return (-1);
 	}
diff --git a/usr/src/lib/libzfs/common/libzfs_impl.h b/usr/src/lib/libzfs/common/libzfs_impl.h
index cfc03791dd..631a2260ae 100644
--- a/usr/src/lib/libzfs/common/libzfs_impl.h
+++ b/usr/src/lib/libzfs/common/libzfs_impl.h
@@ -141,13 +141,14 @@ void zcmd_free_nvlists(zfs_cmd_t *);
 int changelist_prefix(prop_changelist_t *);
 int changelist_postfix(prop_changelist_t *);
 void changelist_rename(prop_changelist_t *, const char *, const char *);
-void changelist_remove(zfs_handle_t *, prop_changelist_t *);
+void changelist_remove(prop_changelist_t *, const char *);
 void changelist_free(prop_changelist_t *);
 prop_changelist_t *changelist_gather(zfs_handle_t *, zfs_prop_t, int);
 int changelist_unshare(prop_changelist_t *, zfs_share_proto_t *);
 int changelist_haszonedchild(prop_changelist_t *);
 
 void remove_mountpoint(zfs_handle_t *);
+int create_parents(libzfs_handle_t *, char *, int);
 
 zfs_handle_t *make_dataset_handle(libzfs_handle_t *, const char *);
 
diff --git a/usr/src/lib/libzfs/common/libzfs_mount.c b/usr/src/lib/libzfs/common/libzfs_mount.c
index 6810f7efdc..9206021782 100644
--- a/usr/src/lib/libzfs/common/libzfs_mount.c
+++ b/usr/src/lib/libzfs/common/libzfs_mount.c
@@ -543,7 +543,6 @@ static int (*_sa_parse_legacy_options)(sa_group_t, char *, char *);
  * values to be used later. This is triggered by the runtime loader.
  * Make sure the correct ISA version is loaded.
  */
-
 #pragma init(_zfs_init_libshare)
 static void
 _zfs_init_libshare(void)
@@ -596,7 +595,6 @@ _zfs_init_libshare(void)
  * service value is which part(s) of the API to initialize and is a
  * direct map to the libshare sa_init(service) interface.
  */
-
 int
 zfs_init_libshare(libzfs_handle_t *zhandle, int service)
 {
@@ -620,11 +618,9 @@ zfs_init_libshare(libzfs_handle_t *zhandle, int service)
  * Uninitialize the libshare API if it hasn't already been
  * uninitialized. It is OK to call multiple times.
  */
-
 void
 zfs_uninit_libshare(libzfs_handle_t *zhandle)
 {
-
 	if (zhandle != NULL && zhandle->libzfs_sharehdl != NULL) {
 		if (_sa_fini != NULL)
 			_sa_fini(zhandle->libzfs_sharehdl);
@@ -638,18 +634,14 @@ zfs_uninit_libshare(libzfs_handle_t *zhandle)
  * Call the legacy parse interface to get the protocol specific
  * options using the NULL arg to indicate that this is a "parse" only.
  */
-
 int
 zfs_parse_options(char *options, zfs_share_proto_t proto)
 {
-	int ret;
-
-	if (_sa_parse_legacy_options != NULL)
-		ret = _sa_parse_legacy_options(NULL, options,
-		    proto_table[proto].p_name);
-	else
-		ret = SA_CONFIG_ERR;
-	return (ret);
+	if (_sa_parse_legacy_options != NULL) {
+		return (_sa_parse_legacy_options(NULL, options,
+		    proto_table[proto].p_name));
+	}
+	return (SA_CONFIG_ERR);
 }
 
 /*
@@ -658,7 +650,6 @@ zfs_parse_options(char *options, zfs_share_proto_t proto)
  * wrapper around sa_find_share to find a share path in the
  * configuration.
  */
-
 static sa_share_t
 zfs_sa_find_share(sa_handle_t handle, char *path)
 {
@@ -673,7 +664,6 @@ zfs_sa_find_share(sa_handle_t handle, char *path)
  * Wrapper for sa_enable_share which enables a share for a specified
  * protocol.
  */
-
 static int
 zfs_sa_enable_share(sa_share_t share, char *proto)
 {
@@ -688,7 +678,6 @@ zfs_sa_enable_share(sa_share_t share, char *proto)
  * Wrapper for sa_enable_share which disables a share for a specified
  * protocol.
  */
-
 static int
 zfs_sa_disable_share(sa_share_t share, char *proto)
 {
@@ -702,7 +691,6 @@ zfs_sa_disable_share(sa_share_t share, char *proto)
  * protocol specific properties (sharenfs, sharesmb).  We rely
  * on "libshare" to the dirty work for us.
  */
-
 static int
 zfs_share_proto(zfs_handle_t *zhp, zfs_share_proto_t *proto)
 {
diff --git a/usr/src/lib/libzfs/common/libzfs_sendrecv.c b/usr/src/lib/libzfs/common/libzfs_sendrecv.c
new file mode 100644
index 0000000000..15f8a22b6e
--- /dev/null
+++ b/usr/src/lib/libzfs/common/libzfs_sendrecv.c
@@ -0,0 +1,1943 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#include <assert.h>
+#include <ctype.h>
+#include <errno.h>
+#include <libdevinfo.h>
+#include <libintl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <strings.h>
+#include <unistd.h>
+#include <stddef.h>
+#include <fcntl.h>
+#include <sys/mount.h>
+#include <sys/mntent.h>
+#include <sys/mnttab.h>
+#include <sys/avl.h>
+#include <stddef.h>
+
+#include <libzfs.h>
+
+#include "zfs_namecheck.h"
+#include "zfs_prop.h"
+#include "libzfs_impl.h"
+
+#include <fletcher.c> /* XXX */
+
+/*
+ * Routines for dealing with the AVL tree of fs-nvlists
+ */
+typedef struct fsavl_node {
+	avl_node_t fn_node;
+	nvlist_t *fn_nvfs;
+	char *fn_snapname;
+	uint64_t fn_guid;
+} fsavl_node_t;
+
+static int
+fsavl_compare(const void *arg1, const void *arg2)
+{
+	const fsavl_node_t *fn1 = arg1;
+	const fsavl_node_t *fn2 = arg2;
+
+	if (fn1->fn_guid > fn2->fn_guid)
+		return (+1);
+	else if (fn1->fn_guid < fn2->fn_guid)
+		return (-1);
+	else
+		return (0);
+}
+
+/*
+ * Given the GUID of a snapshot, find its containing filesystem and
+ * (optionally) name.
+ */
+static nvlist_t *
+fsavl_find(avl_tree_t *avl, uint64_t snapguid, char **snapname)
+{
+	fsavl_node_t fn_find;
+	fsavl_node_t *fn;
+
+	fn_find.fn_guid = snapguid;
+
+	fn = avl_find(avl, &fn_find, NULL);
+	if (fn) {
+		if (snapname)
+			*snapname = fn->fn_snapname;
+		return (fn->fn_nvfs);
+	}
+	return (NULL);
+}
+
+static avl_tree_t *
+fsavl_create(nvlist_t *fss)
+{
+	avl_tree_t *fsavl;
+	nvpair_t *fselem = NULL;
+
+	fsavl = malloc(sizeof (avl_tree_t));
+	avl_create(fsavl, fsavl_compare, sizeof (fsavl_node_t),
+	    offsetof(fsavl_node_t, fn_node));
+
+	while ((fselem = nvlist_next_nvpair(fss, fselem)) != NULL) {
+		nvlist_t *nvfs, *snaps;
+		nvpair_t *snapelem = NULL;
+
+		VERIFY(0 == nvpair_value_nvlist(fselem, &nvfs));
+		VERIFY(0 == nvlist_lookup_nvlist(nvfs, "snaps", &snaps));
+
+		while ((snapelem =
+		    nvlist_next_nvpair(snaps, snapelem)) != NULL) {
+			fsavl_node_t *fn;
+			uint64_t guid;
+
+			VERIFY(0 == nvpair_value_uint64(snapelem, &guid));
+			fn = malloc(sizeof (fsavl_node_t));
+			fn->fn_nvfs = nvfs;
+			fn->fn_snapname = nvpair_name(snapelem);
+			fn->fn_guid = guid;
+
+			/*
+			 * Note: if there are multiple snaps with the
+			 * same GUID, we ignore all but one.
+			 */
+			if (avl_find(fsavl, fn, NULL) == NULL)
+				avl_add(fsavl, fn);
+			else
+				free(fn);
+		}
+	}
+
+	return (fsavl);
+}
+
+static void
+fsavl_destroy(avl_tree_t *avl)
+{
+	fsavl_node_t *fn;
+	void *cookie;
+
+	if (avl == NULL)
+		return;
+
+	cookie = NULL;
+	while ((fn = avl_destroy_nodes(avl, &cookie)) != NULL)
+		free(fn);
+	avl_destroy(avl);
+}
+
+/*
+ * Routines for dealing with the giant nvlist of fs-nvlists, etc.
+ */
+typedef struct send_data {
+	uint64_t parent_fromsnap_guid;
+	nvlist_t *parent_snaps;
+	nvlist_t *fss;
+	const char *fromsnap;
+	const char *tosnap;
+
+	/*
+	 * The header nvlist is of the following format:
+	 * {
+	 *   "tosnap" -> string
+	 *   "fromsnap" -> string (if incremental)
+	 *   "fss" -> {
+	 *	id -> {
+	 *
+	 *	 "name" -> string (full name; for debugging)
+	 *	 "parentfromsnap" -> number (guid of fromsnap in parent)
+	 *
+	 *	 "props" -> { name -> value (only if set here) }
+	 *	 "snaps" -> { name (lastname) -> number (guid) }
+	 *
+	 *	 "origin" -> number (guid) (if clone)
+	 *	 "sent" -> boolean (not on-disk)
+	 *	}
+	 *   }
+	 * }
+	 *
+	 */
+} send_data_t;
+
+static int
+send_iterate_snap(zfs_handle_t *zhp, void *arg)
+{
+	send_data_t *sd = arg;
+	uint64_t guid = zhp->zfs_dmustats.dds_guid;
+	char *snapname;
+
+	snapname = strrchr(zhp->zfs_name, '@')+1;
+
+	VERIFY(0 == nvlist_add_uint64(sd->parent_snaps, snapname, guid));
+	/*
+	 * NB: if there is no fromsnap here (it's a newly created fs in
+	 * an incremental replication), we will substitute the tosnap.
+	 */
+	if ((sd->fromsnap && strcmp(snapname, sd->fromsnap) == 0) ||
+	    (sd->parent_fromsnap_guid == 0 && sd->tosnap &&
+	    strcmp(snapname, sd->tosnap) == 0)) {
+		sd->parent_fromsnap_guid = guid;
+	}
+
+	zfs_close(zhp);
+	return (0);
+}
+
+static void
+send_iterate_prop(zfs_handle_t *zhp, nvlist_t *nv)
+{
+	nvpair_t *elem = NULL;
+
+	while ((elem = nvlist_next_nvpair(zhp->zfs_props, elem)) != NULL) {
+		char *propname = nvpair_name(elem);
+		zfs_prop_t prop = zfs_name_to_prop(propname);
+		nvlist_t *propnv;
+
+		if (!zfs_prop_user(propname) && zfs_prop_readonly(prop))
+			continue;
+
+		verify(nvpair_value_nvlist(elem, &propnv) == 0);
+		if (prop == ZFS_PROP_QUOTA || prop == ZFS_PROP_RESERVATION) {
+			/* these guys are modifyable, but have no source */
+			uint64_t value;
+			verify(nvlist_lookup_uint64(propnv,
+			    ZPROP_VALUE, &value) == 0);
+		} else {
+			char *source;
+			if (nvlist_lookup_string(propnv,
+			    ZPROP_SOURCE, &source) != 0)
+				continue;
+			if (strcmp(source, zhp->zfs_name) != 0)
+				continue;
+		}
+
+		if (zfs_prop_user(propname) ||
+		    zfs_prop_get_type(prop) == PROP_TYPE_STRING) {
+			char *value;
+			verify(nvlist_lookup_string(propnv,
+			    ZPROP_VALUE, &value) == 0);
+			VERIFY(0 == nvlist_add_string(nv, propname, value));
+		} else {
+			uint64_t value;
+			verify(nvlist_lookup_uint64(propnv,
+			    ZPROP_VALUE, &value) == 0);
+			VERIFY(0 == nvlist_add_uint64(nv, propname, value));
+		}
+	}
+}
+
+static int
+send_iterate_fs(zfs_handle_t *zhp, void *arg)
+{
+	send_data_t *sd = arg;
+	nvlist_t *nvfs, *nv;
+	int rv;
+	uint64_t parent_fromsnap_guid_save = sd->parent_fromsnap_guid;
+	uint64_t guid = zhp->zfs_dmustats.dds_guid;
+	char guidstring[64];
+
+	VERIFY(0 == nvlist_alloc(&nvfs, NV_UNIQUE_NAME, 0));
+	VERIFY(0 == nvlist_add_string(nvfs, "name", zhp->zfs_name));
+	VERIFY(0 == nvlist_add_uint64(nvfs, "parentfromsnap",
+	    sd->parent_fromsnap_guid));
+
+	if (zhp->zfs_dmustats.dds_origin[0]) {
+		zfs_handle_t *origin = zfs_open(zhp->zfs_hdl,
+		    zhp->zfs_dmustats.dds_origin, ZFS_TYPE_SNAPSHOT);
+		if (origin == NULL)
+			return (-1);
+		VERIFY(0 == nvlist_add_uint64(nvfs, "origin",
+		    origin->zfs_dmustats.dds_guid));
+	}
+
+	/* iterate over props */
+	VERIFY(0 == nvlist_alloc(&nv, NV_UNIQUE_NAME, 0));
+	send_iterate_prop(zhp, nv);
+	VERIFY(0 == nvlist_add_nvlist(nvfs, "props", nv));
+	nvlist_free(nv);
+
+	/* iterate over snaps, and set sd->parent_fromsnap_guid */
+	sd->parent_fromsnap_guid = 0;
+	VERIFY(0 == nvlist_alloc(&sd->parent_snaps, NV_UNIQUE_NAME, 0));
+	(void) zfs_iter_snapshots(zhp, send_iterate_snap, sd);
+	VERIFY(0 == nvlist_add_nvlist(nvfs, "snaps", sd->parent_snaps));
+	nvlist_free(sd->parent_snaps);
+
+	/* add this fs to nvlist */
+	(void) snprintf(guidstring, sizeof (guidstring),
+	    "0x%llx", (longlong_t)guid);
+	VERIFY(0 == nvlist_add_nvlist(sd->fss, guidstring, nvfs));
+	nvlist_free(nvfs);
+
+	/* iterate over children */
+	rv = zfs_iter_filesystems(zhp, send_iterate_fs, sd);
+
+	sd->parent_fromsnap_guid = parent_fromsnap_guid_save;
+
+	zfs_close(zhp);
+	return (rv);
+}
+
+static int
+gather_nvlist(libzfs_handle_t *hdl, const char *fsname, const char *fromsnap,
+    const char *tosnap, nvlist_t **nvlp, avl_tree_t **avlp)
+{
+	zfs_handle_t *zhp;
+	send_data_t sd = { 0 };
+	int error;
+
+	zhp = zfs_open(hdl, fsname, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME);
+	if (zhp == NULL)
+		return (EZFS_BADTYPE);
+
+	VERIFY(0 == nvlist_alloc(&sd.fss, NV_UNIQUE_NAME, 0));
+	sd.fromsnap = fromsnap;
+	sd.tosnap = tosnap;
+	error = send_iterate_fs(zhp, &sd);
+
+	*nvlp = sd.fss;
+	if (avlp)
+		*avlp = fsavl_create(sd.fss);
+	return (error);
+}
+
+/*
+ * Routines for dealing with the sorted snapshot functionality
+ */
+typedef struct zfs_node {
+	zfs_handle_t	*zn_handle;
+	avl_node_t	zn_avlnode;
+} zfs_node_t;
+
+static int
+zfs_sort_snaps(zfs_handle_t *zhp, void *data)
+{
+	avl_tree_t *avl = data;
+	zfs_node_t *node = zfs_alloc(zhp->zfs_hdl, sizeof (zfs_node_t));
+
+	node->zn_handle = zhp;
+	avl_add(avl, node);
+	return (0);
+}
+
+/* ARGSUSED */
+static int
+zfs_snapshot_compare(const void *larg, const void *rarg)
+{
+	zfs_handle_t *l = ((zfs_node_t *)larg)->zn_handle;
+	zfs_handle_t *r = ((zfs_node_t *)rarg)->zn_handle;
+	uint64_t lcreate, rcreate;
+
+	/*
+	 * Sort them according to creation time.  We use the hidden
+	 * CREATETXG property to get an absolute ordering of snapshots.
+	 */
+	lcreate = zfs_prop_get_int(l, ZFS_PROP_CREATETXG);
+	rcreate = zfs_prop_get_int(r, ZFS_PROP_CREATETXG);
+
+	if (lcreate < rcreate)
+		return (-1);
+	else if (lcreate > rcreate)
+		return (+1);
+	else
+		return (0);
+}
+
+static int
+zfs_iter_snapshots_sorted(zfs_handle_t *zhp, zfs_iter_f callback, void *data)
+{
+	int ret = 0;
+	zfs_node_t *node;
+	avl_tree_t avl;
+	void *cookie = NULL;
+
+	avl_create(&avl, zfs_snapshot_compare,
+	    sizeof (zfs_node_t), offsetof(zfs_node_t, zn_avlnode));
+
+	ret = zfs_iter_snapshots(zhp, zfs_sort_snaps, &avl);
+
+	for (node = avl_first(&avl); node != NULL; node = AVL_NEXT(&avl, node))
+		ret |= callback(node->zn_handle, data);
+
+	while ((node = avl_destroy_nodes(&avl, &cookie)) != NULL)
+		free(node);
+
+	avl_destroy(&avl);
+
+	return (ret);
+}
+
+/*
+ * Routines specific to "zfs send"
+ */
+typedef struct send_dump_data {
+	/* these are all just the short snapname (the part after the @) */
+	const char *fromsnap;
+	const char *tosnap;
+	char lastsnap[ZFS_MAXNAMELEN];
+	boolean_t seenfrom, seento, replicate, doall, fromorigin;
+	boolean_t verbose;
+	int outfd;
+	boolean_t err;
+	nvlist_t *fss;
+	avl_tree_t *fsavl;
+} send_dump_data_t;
+
+/*
+ * Dumps a backup of the given snapshot (incremental from fromsnap if it's not
+ * NULL) to the file descriptor specified by outfd.
+ */
+static int
+dump_ioctl(zfs_handle_t *zhp, const char *fromsnap, boolean_t fromorigin,
+    int outfd)
+{
+	zfs_cmd_t zc = { 0 };
+	libzfs_handle_t *hdl = zhp->zfs_hdl;
+
+	assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
+	assert(fromsnap == NULL || fromsnap[0] == '\0' || !fromorigin);
+
+	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
+	if (fromsnap)
+		(void) strlcpy(zc.zc_value, fromsnap, sizeof (zc.zc_name));
+	zc.zc_cookie = outfd;
+	zc.zc_obj = fromorigin;
+
+	if (ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_SEND, &zc) != 0) {
+		char errbuf[1024];
+		(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
+		    "warning: cannot send '%s'"), zhp->zfs_name);
+
+		switch (errno) {
+
+		case EXDEV:
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "not an earlier snapshot from the same fs"));
+			return (zfs_error(hdl, EZFS_CROSSTARGET, errbuf));
+
+		case ENOENT:
+			if (zfs_dataset_exists(hdl, zc.zc_name,
+			    ZFS_TYPE_SNAPSHOT)) {
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "incremental source (@%s) does not exist"),
+				    zc.zc_value);
+			}
+			return (zfs_error(hdl, EZFS_NOENT, errbuf));
+
+		case EDQUOT:
+		case EFBIG:
+		case EIO:
+		case ENOLINK:
+		case ENOSPC:
+		case ENOSTR:
+		case ENXIO:
+		case EPIPE:
+		case ERANGE:
+		case EFAULT:
+		case EROFS:
+			zfs_error_aux(hdl, strerror(errno));
+			return (zfs_error(hdl, EZFS_BADBACKUP, errbuf));
+
+		default:
+			return (zfs_standard_error(hdl, errno, errbuf));
+		}
+	}
+
+	return (0);
+}
+
+static int
+dump_snapshot(zfs_handle_t *zhp, void *arg)
+{
+	send_dump_data_t *sdd = arg;
+	const char *thissnap;
+	int err;
+
+	thissnap = strchr(zhp->zfs_name, '@') + 1;
+
+	if (sdd->fromsnap && !sdd->seenfrom &&
+	    strcmp(sdd->fromsnap, thissnap) == 0) {
+		sdd->seenfrom = B_TRUE;
+		(void) strcpy(sdd->lastsnap, thissnap);
+		zfs_close(zhp);
+		return (0);
+	}
+
+	if (sdd->seento || !sdd->seenfrom) {
+		zfs_close(zhp);
+		return (0);
+	}
+
+	/* send it */
+	if (sdd->verbose) {
+		(void) fprintf(stderr, "sending from @%s to %s\n",
+		    sdd->lastsnap, zhp->zfs_name);
+	}
+
+	err = dump_ioctl(zhp, sdd->lastsnap,
+	    sdd->lastsnap[0] == '\0' && (sdd->fromorigin || sdd->replicate),
+	    sdd->outfd);
+
+	if (!sdd->seento && strcmp(sdd->tosnap, thissnap) == 0)
+		sdd->seento = B_TRUE;
+
+	(void) strcpy(sdd->lastsnap, thissnap);
+	zfs_close(zhp);
+	return (err);
+}
+
+static int
+dump_filesystem(zfs_handle_t *zhp, void *arg)
+{
+	int rv = 0;
+	send_dump_data_t *sdd = arg;
+	boolean_t missingfrom = B_FALSE;
+	zfs_cmd_t zc = { 0 };
+
+	(void) snprintf(zc.zc_name, sizeof (zc.zc_name), "%s@%s",
+	    zhp->zfs_name, sdd->tosnap);
+	if (ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) != 0) {
+		(void) fprintf(stderr, "WARNING: "
+		    "could not send %s@%s: does not exist\n",
+		    zhp->zfs_name, sdd->tosnap);
+		sdd->err = B_TRUE;
+		return (0);
+	}
+
+	if (sdd->replicate && sdd->fromsnap) {
+		/*
+		 * If this fs does not have fromsnap, and we're doing
+		 * recursive, we need to send a full stream from the
+		 * beginning (or an incremental from the origin if this
+		 * is a clone).  If we're doing non-recursive, then let
+		 * them get the error.
+		 */
+		(void) snprintf(zc.zc_name, sizeof (zc.zc_name), "%s@%s",
+		    zhp->zfs_name, sdd->fromsnap);
+		if (ioctl(zhp->zfs_hdl->libzfs_fd,
+		    ZFS_IOC_OBJSET_STATS, &zc) != 0) {
+			missingfrom = B_TRUE;
+		}
+	}
+
+	if (sdd->doall) {
+		sdd->seenfrom = sdd->seento = sdd->lastsnap[0] = 0;
+		if (sdd->fromsnap == NULL || missingfrom)
+			sdd->seenfrom = B_TRUE;
+
+		rv = zfs_iter_snapshots_sorted(zhp, dump_snapshot, arg);
+		if (!sdd->seenfrom) {
+			(void) fprintf(stderr,
+			    "WARNING: could not send %s@%s:\n"
+			    "incremental source (%s@%s) does not exist\n",
+			    zhp->zfs_name, sdd->tosnap,
+			    zhp->zfs_name, sdd->fromsnap);
+			sdd->err = B_TRUE;
+		} else if (!sdd->seento) {
+			(void) fprintf(stderr,
+			    "WARNING: could not send %s@%s:\n"
+			    "incremental source (%s@%s) "
+			    "is not earlier than it\n",
+			    zhp->zfs_name, sdd->tosnap,
+			    zhp->zfs_name, sdd->fromsnap);
+			sdd->err = B_TRUE;
+		}
+	} else {
+		zfs_handle_t *snapzhp;
+		char snapname[ZFS_MAXNAMELEN];
+
+		(void) snprintf(snapname, sizeof (snapname), "%s@%s",
+		    zfs_get_name(zhp), sdd->tosnap);
+		snapzhp = zfs_open(zhp->zfs_hdl, snapname, ZFS_TYPE_SNAPSHOT);
+		rv = dump_ioctl(snapzhp,
+		    missingfrom ? NULL : sdd->fromsnap,
+		    sdd->fromorigin || missingfrom,
+		    sdd->outfd);
+		sdd->seento = B_TRUE;
+		zfs_close(snapzhp);
+	}
+
+	return (rv);
+}
+
+static int
+dump_filesystems(zfs_handle_t *rzhp, void *arg)
+{
+	send_dump_data_t *sdd = arg;
+	nvpair_t *fspair;
+	boolean_t needagain, progress;
+
+	if (!sdd->replicate)
+		return (dump_filesystem(rzhp, sdd));
+
+again:
+	needagain = progress = B_FALSE;
+	for (fspair = nvlist_next_nvpair(sdd->fss, NULL); fspair;
+	    fspair = nvlist_next_nvpair(sdd->fss, fspair)) {
+		nvlist_t *fslist;
+		char *fsname;
+		zfs_handle_t *zhp;
+		int err;
+		uint64_t origin_guid = 0;
+		nvlist_t *origin_nv;
+
+		VERIFY(nvpair_value_nvlist(fspair, &fslist) == 0);
+		if (nvlist_lookup_boolean(fslist, "sent") == 0)
+			continue;
+
+		VERIFY(nvlist_lookup_string(fslist, "name", &fsname) == 0);
+		(void) nvlist_lookup_uint64(fslist, "origin", &origin_guid);
+
+		origin_nv = fsavl_find(sdd->fsavl, origin_guid, NULL);
+		if (origin_nv &&
+		    nvlist_lookup_boolean(origin_nv, "sent") == ENOENT) {
+			/*
+			 * origin has not been sent yet;
+			 * skip this clone.
+			 */
+			needagain = B_TRUE;
+			continue;
+		}
+
+		zhp = zfs_open(rzhp->zfs_hdl, fsname, ZFS_TYPE_DATASET);
+		err = dump_filesystem(zhp, sdd);
+		VERIFY(nvlist_add_boolean(fslist, "sent") == 0);
+		progress = B_TRUE;
+		zfs_close(zhp);
+		if (err)
+			return (err);
+	}
+	if (needagain) {
+		assert(progress);
+		goto again;
+	}
+	return (0);
+}
+
+/*
+ * Dumps a backup of tosnap, incremental from fromsnap if it isn't NULL.
+ * If 'doall', dump all intermediate snaps.
+ * If 'replicate', dump special header and do recursively.
+ */
+int
+zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap,
+    boolean_t replicate, boolean_t doall, boolean_t fromorigin,
+    boolean_t verbose, int outfd)
+{
+	char errbuf[1024];
+	send_dump_data_t sdd = { 0 };
+	int err;
+	nvlist_t *fss = NULL;
+	avl_tree_t *fsavl = NULL;
+
+	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
+	    "cannot send '%s'"), zhp->zfs_name);
+
+	if (fromsnap && fromsnap[0] == '\0') {
+		zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
+		    "zero-length incremental source"));
+		return (zfs_error(zhp->zfs_hdl, EZFS_NOENT, errbuf));
+	}
+
+	if (replicate || doall) {
+		dmu_replay_record_t drr = { 0 };
+		char *packbuf = NULL;
+		size_t buflen = 0;
+		zio_cksum_t zc = { 0 };
+
+		assert(fromsnap || doall);
+
+		if (replicate) {
+			nvlist_t *hdrnv;
+
+			VERIFY(0 == nvlist_alloc(&hdrnv, NV_UNIQUE_NAME, 0));
+			if (fromsnap) {
+				VERIFY(0 == nvlist_add_string(hdrnv,
+				    "fromsnap", fromsnap));
+			}
+			VERIFY(0 == nvlist_add_string(hdrnv, "tosnap", tosnap));
+
+			err = gather_nvlist(zhp->zfs_hdl, zhp->zfs_name,
+			    fromsnap, tosnap, &fss, &fsavl);
+			if (err)
+				return (err);
+			VERIFY(0 == nvlist_add_nvlist(hdrnv, "fss", fss));
+			err = nvlist_pack(hdrnv, &packbuf, &buflen,
+			    NV_ENCODE_XDR, 0);
+			nvlist_free(hdrnv);
+			if (err) {
+				fsavl_destroy(fsavl);
+				nvlist_free(fss);
+				return (zfs_standard_error(zhp->zfs_hdl,
+				    err, errbuf));
+			}
+		}
+
+		/* write first begin record */
+		drr.drr_type = DRR_BEGIN;
+		drr.drr_u.drr_begin.drr_magic = DMU_BACKUP_MAGIC;
+		drr.drr_u.drr_begin.drr_version = DMU_BACKUP_HEADER_VERSION;
+		(void) snprintf(drr.drr_u.drr_begin.drr_toname,
+		    sizeof (drr.drr_u.drr_begin.drr_toname),
+		    "%s@%s", zhp->zfs_name, tosnap);
+		drr.drr_payloadlen = buflen;
+		fletcher_4_incremental_native(&drr, sizeof (drr), &zc);
+		err = write(outfd, &drr, sizeof (drr));
+
+		/* write header nvlist */
+		if (err != -1) {
+			fletcher_4_incremental_native(packbuf, buflen, &zc);
+			err = write(outfd, packbuf, buflen);
+		}
+		free(packbuf);
+		if (err == -1) {
+			fsavl_destroy(fsavl);
+			nvlist_free(fss);
+			return (zfs_standard_error(zhp->zfs_hdl,
+			    errno, errbuf));
+		}
+
+		/* write end record */
+		if (err != -1) {
+			bzero(&drr, sizeof (drr));
+			drr.drr_type = DRR_END;
+			drr.drr_u.drr_end.drr_checksum = zc;
+			err = write(outfd, &drr, sizeof (drr));
+			if (err == -1) {
+				fsavl_destroy(fsavl);
+				nvlist_free(fss);
+				return (zfs_standard_error(zhp->zfs_hdl,
+				    errno, errbuf));
+			}
+		}
+	}
+
+	/* dump each stream */
+	sdd.fromsnap = fromsnap;
+	sdd.tosnap = tosnap;
+	sdd.outfd = outfd;
+	sdd.replicate = replicate;
+	sdd.doall = doall;
+	sdd.fromorigin = fromorigin;
+	sdd.fss = fss;
+	sdd.fsavl = fsavl;
+	sdd.verbose = verbose;
+	err = dump_filesystems(zhp, &sdd);
+	fsavl_destroy(fsavl);
+	nvlist_free(fss);
+
+	if (replicate || doall) {
+		/*
+		 * write final end record.  NB: want to do this even if
+		 * there was some error, because it might not be totally
+		 * failed.
+		 */
+		dmu_replay_record_t drr = { 0 };
+		drr.drr_type = DRR_END;
+		if (write(outfd, &drr, sizeof (drr)) == -1) {
+			return (zfs_standard_error(zhp->zfs_hdl,
+			    errno, errbuf));
+		}
+	}
+
+	return (err || sdd.err);
+}
+
+/*
+ * Routines specific to "zfs recv"
+ */
+
+static int
+recv_read(libzfs_handle_t *hdl, int fd, void *buf, int ilen,
+    boolean_t byteswap, zio_cksum_t *zc)
+{
+	char *cp = buf;
+	int rv;
+	int len = ilen;
+
+	do {
+		rv = read(fd, cp, len);
+		cp += rv;
+		len -= rv;
+	} while (rv > 0);
+
+	if (rv < 0 || len != 0) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "failed to read from stream"));
+		return (zfs_error(hdl, EZFS_BADSTREAM, dgettext(TEXT_DOMAIN,
+		    "cannot receive")));
+	}
+
+	if (zc) {
+		if (byteswap)
+			fletcher_4_incremental_byteswap(buf, ilen, zc);
+		else
+			fletcher_4_incremental_native(buf, ilen, zc);
+	}
+	return (0);
+}
+
+static int
+recv_read_nvlist(libzfs_handle_t *hdl, int fd, int len, nvlist_t **nvp,
+    boolean_t byteswap, zio_cksum_t *zc)
+{
+	char *buf;
+	int err;
+
+	buf = zfs_alloc(hdl, len);
+	if (buf == NULL)
+		return (ENOMEM);
+
+	err = recv_read(hdl, fd, buf, len, byteswap, zc);
+	if (err != 0) {
+		free(buf);
+		return (err);
+	}
+
+	err = nvlist_unpack(buf, len, nvp, 0);
+	free(buf);
+	if (err != 0) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
+		    "stream (malformed nvlist)"));
+		return (EINVAL);
+	}
+	return (0);
+}
+
+static int
+recv_rename(libzfs_handle_t *hdl, const char *name, const char *tryname,
+    int baselen, char *newname, recvflags_t flags)
+{
+	static int seq;
+	zfs_cmd_t zc = { 0 };
+	int err;
+	prop_changelist_t *clp = NULL;
+
+	if (strchr(name, '@') == NULL) {
+		zfs_handle_t *zhp = zfs_open(hdl, name,
+		    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME);
+		if (zhp == NULL)
+			return (-1);
+		clp = changelist_gather(zhp, ZFS_PROP_NAME,
+		    flags.force ? MS_FORCE : 0);
+		zfs_close(zhp);
+		if (clp == NULL)
+			return (-1);
+		err = changelist_prefix(clp);
+		if (err)
+			return (err);
+	}
+
+	if (tryname) {
+		(void) strcpy(newname, tryname);
+
+		zc.zc_objset_type = DMU_OST_ZFS;
+		(void) strlcpy(zc.zc_name, name, sizeof (zc.zc_name));
+		(void) strlcpy(zc.zc_value, tryname, sizeof (zc.zc_value));
+
+		if (flags.verbose) {
+			(void) printf("attempting rename %s to %s\n",
+			    zc.zc_name, zc.zc_value);
+		}
+		err = ioctl(hdl->libzfs_fd, ZFS_IOC_RENAME, &zc);
+		if (err == 0 && clp)
+			changelist_rename(clp, name, tryname);
+	} else {
+		err = ENOENT;
+	}
+
+	if (err != 0 && strncmp(name+baselen, "recv-", 5) != 0) {
+		seq++;
+
+		(void) strncpy(newname, name, baselen);
+		(void) snprintf(newname+baselen, ZFS_MAXNAMELEN-baselen,
+		    "recv-%u-%u", getpid(), seq);
+		(void) strlcpy(zc.zc_value, newname, sizeof (zc.zc_value));
+
+		if (flags.verbose) {
+			(void) printf("failed - trying rename %s to %s\n",
+			    zc.zc_name, zc.zc_value);
+		}
+		err = ioctl(hdl->libzfs_fd, ZFS_IOC_RENAME, &zc);
+		if (err == 0 && clp)
+			changelist_rename(clp, name, newname);
+		if (err && flags.verbose) {
+			(void) printf("failed (%u) - "
+			    "will try again on next pass\n", errno);
+		}
+		err = EAGAIN;
+	} else if (flags.verbose) {
+		if (err == 0)
+			(void) printf("success\n");
+		else
+			(void) printf("failed (%u)\n", errno);
+	}
+
+	if (clp) {
+		(void) changelist_postfix(clp);
+		changelist_free(clp);
+	}
+
+
+	return (err);
+}
+
+static int
+recv_destroy(libzfs_handle_t *hdl, const char *name, int baselen,
+    char *newname, recvflags_t flags)
+{
+	zfs_cmd_t zc = { 0 };
+	int err;
+	zfs_handle_t *zhp = NULL;
+
+	zc.zc_objset_type = DMU_OST_ZFS;
+	(void) strlcpy(zc.zc_name, name, sizeof (zc.zc_name));
+
+	/* unmount it */
+	if (strchr(name, '@') == NULL) {
+		zhp = zfs_open(hdl, name,
+		    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME);
+		if (zhp == NULL)
+			return (-1);
+		err = zfs_unmount(zhp, NULL, flags.force ? MS_FORCE : 0);
+		if (err) {
+			zfs_close(zhp);
+			return (err);
+		}
+	}
+
+	if (flags.verbose)
+		(void) printf("attempting destroy %s\n", zc.zc_name);
+	err = ioctl(hdl->libzfs_fd, ZFS_IOC_DESTROY, &zc);
+
+	if (err != 0) {
+		(void) zfs_mount(zhp, NULL, 0);
+		err = recv_rename(hdl, name, NULL, baselen, newname, flags);
+	}
+	if (zhp)
+		zfs_close(zhp);
+
+	if (flags.verbose) {
+		if (err == 0)
+			(void) printf("success\n");
+		else
+			(void) printf("failed (%u)\n", errno);
+	}
+
+	return (err);
+}
+
+typedef struct guid_to_name_data {
+	uint64_t guid;
+	char *name;
+} guid_to_name_data_t;
+
+static int
+guid_to_name_cb(zfs_handle_t *zhp, void *arg)
+{
+	guid_to_name_data_t *gtnd = arg;
+	int err;
+
+	if (zhp->zfs_dmustats.dds_guid == gtnd->guid) {
+		(void) strcpy(gtnd->name, zhp->zfs_name);
+		return (EEXIST);
+	}
+	err = zfs_iter_children(zhp, guid_to_name_cb, gtnd);
+	zfs_close(zhp);
+	return (err);
+}
+
+static int
+guid_to_name(libzfs_handle_t *hdl, const char *parent, uint64_t guid,
+    char *name)
+{
+	/* exhaustive search all local snapshots */
+	guid_to_name_data_t gtnd;
+	int err = 0;
+	zfs_handle_t *zhp;
+	char *cp;
+
+	gtnd.guid = guid;
+	gtnd.name = name;
+
+	if (strchr(parent, '@') == NULL) {
+		zhp = make_dataset_handle(hdl, parent);
+		if (zhp != NULL) {
+			err = zfs_iter_children(zhp, guid_to_name_cb, &gtnd);
+			zfs_close(zhp);
+			if (err == EEXIST)
+				return (0);
+		}
+	}
+
+	cp = strchr(parent, '/');
+	if (cp)
+		*cp = '\0';
+	zhp = make_dataset_handle(hdl, parent);
+	if (cp)
+		*cp = '/';
+
+	if (zhp) {
+		err = zfs_iter_children(zhp, guid_to_name_cb, &gtnd);
+		zfs_close(zhp);
+	}
+
+	return (err == EEXIST ? 0 : ENOENT);
+
+}
+
+/*
+ * Return true if dataset guid1 is created before guid2.
+ */
+static boolean_t
+created_before(libzfs_handle_t *hdl, avl_tree_t *avl,
+    uint64_t guid1, uint64_t guid2)
+{
+	nvlist_t *nvfs;
+	char *fsname, *snapname;
+	char buf[ZFS_MAXNAMELEN];
+	boolean_t rv;
+	zfs_node_t zn1, zn2;
+
+	if (guid2 == 0)
+		return (B_FALSE);
+	if (guid1 == 0)
+		return (B_TRUE);
+
+	nvfs = fsavl_find(avl, guid1, &snapname);
+	VERIFY(0 == nvlist_lookup_string(nvfs, "name", &fsname));
+	(void) snprintf(buf, sizeof (buf), "%s@%s", fsname, snapname);
+	zn1.zn_handle = zfs_open(hdl, buf, ZFS_TYPE_SNAPSHOT);
+
+	nvfs = fsavl_find(avl, guid2, &snapname);
+	VERIFY(0 == nvlist_lookup_string(nvfs, "name", &fsname));
+	(void) snprintf(buf, sizeof (buf), "%s@%s", fsname, snapname);
+	zn2.zn_handle = zfs_open(hdl, buf, ZFS_TYPE_SNAPSHOT);
+
+	rv = (zfs_snapshot_compare(&zn1, &zn2) == -1);
+
+	zfs_close(zn1.zn_handle);
+	zfs_close(zn2.zn_handle);
+
+	return (rv);
+}
+
+static int
+recv_incremental_replication(libzfs_handle_t *hdl, const char *tofs,
+    recvflags_t flags, nvlist_t *stream_nv, avl_tree_t *stream_avl)
+{
+	nvlist_t *local_nv;
+	avl_tree_t *local_avl;
+	nvpair_t *fselem, *nextfselem;
+	char *tosnap, *fromsnap;
+	char newname[ZFS_MAXNAMELEN];
+	int error;
+	boolean_t needagain, progress;
+
+	VERIFY(0 == nvlist_lookup_string(stream_nv, "fromsnap", &fromsnap));
+	VERIFY(0 == nvlist_lookup_string(stream_nv, "tosnap", &tosnap));
+
+	if (flags.dryrun)
+		return (0);
+
+again:
+	needagain = progress = B_FALSE;
+
+	if ((error = gather_nvlist(hdl, tofs, fromsnap, NULL,
+	    &local_nv, &local_avl)) != 0)
+		return (error);
+
+	/*
+	 * Process deletes and renames
+	 */
+	for (fselem = nvlist_next_nvpair(local_nv, NULL);
+	    fselem; fselem = nextfselem) {
+		nvlist_t *nvfs, *snaps;
+		nvlist_t *stream_nvfs = NULL;
+		nvpair_t *snapelem, *nextsnapelem;
+		uint64_t fromguid = 0;
+		uint64_t originguid = 0;
+		uint64_t stream_originguid = 0;
+		uint64_t parent_fromsnap_guid, stream_parent_fromsnap_guid;
+		char *fsname, *stream_fsname;
+
+		nextfselem = nvlist_next_nvpair(local_nv, fselem);
+
+		VERIFY(0 == nvpair_value_nvlist(fselem, &nvfs));
+		VERIFY(0 == nvlist_lookup_nvlist(nvfs, "snaps", &snaps));
+		VERIFY(0 == nvlist_lookup_string(nvfs, "name", &fsname));
+		VERIFY(0 == nvlist_lookup_uint64(nvfs, "parentfromsnap",
+		    &parent_fromsnap_guid));
+		(void) nvlist_lookup_uint64(nvfs, "origin", &originguid);
+
+		/*
+		 * First find the stream's fs, so we can check for
+		 * a different origin (due to "zfs promote")
+		 */
+		for (snapelem = nvlist_next_nvpair(snaps, NULL);
+		    snapelem; snapelem = nvlist_next_nvpair(snaps, snapelem)) {
+			uint64_t thisguid;
+
+			VERIFY(0 == nvpair_value_uint64(snapelem, &thisguid));
+			stream_nvfs = fsavl_find(stream_avl, thisguid, NULL);
+
+			if (stream_nvfs != NULL)
+				break;
+		}
+
+		/* check for promote */
+		(void) nvlist_lookup_uint64(stream_nvfs, "origin",
+		    &stream_originguid);
+		if (stream_nvfs && originguid != stream_originguid) {
+			if (created_before(hdl, local_avl, stream_originguid,
+			    originguid)) {
+				/* promote it! */
+				zfs_cmd_t zc = { 0 };
+				nvlist_t *origin_nvfs;
+				char *origin_fsname;
+
+				if (flags.verbose)
+					(void) printf("promoting %s\n", fsname);
+
+				origin_nvfs = fsavl_find(local_avl, originguid,
+				    NULL);
+				VERIFY(0 == nvlist_lookup_string(origin_nvfs,
+				    "name", &origin_fsname));
+				(void) strlcpy(zc.zc_value, origin_fsname,
+				    sizeof (zc.zc_value));
+				(void) strlcpy(zc.zc_name, fsname,
+				    sizeof (zc.zc_name));
+				error = zfs_ioctl(hdl, ZFS_IOC_PROMOTE, &zc);
+				if (error == 0)
+					progress = B_TRUE;
+			}
+			/*
+			 * We had/have the wrong origin, therefore our
+			 * list of snapshots is wrong.  Need to handle
+			 * them on the next pass.
+			 */
+			needagain = B_TRUE;
+			continue;
+		}
+
+		for (snapelem = nvlist_next_nvpair(snaps, NULL);
+		    snapelem; snapelem = nextsnapelem) {
+			uint64_t thisguid;
+			char *stream_snapname;
+			nvlist_t *found;
+
+			nextsnapelem = nvlist_next_nvpair(snaps, snapelem);
+
+			VERIFY(0 == nvpair_value_uint64(snapelem, &thisguid));
+			found = fsavl_find(stream_avl, thisguid,
+			    &stream_snapname);
+
+			/* check for delete */
+			if (found == NULL) {
+				char name[ZFS_MAXNAMELEN];
+
+				if (!flags.force)
+					continue;
+
+				(void) snprintf(name, sizeof (name), "%s@%s",
+				    fsname, nvpair_name(snapelem));
+
+				error = recv_destroy(hdl, name,
+				    strlen(fsname)+1, newname, flags);
+				if (error)
+					needagain = B_TRUE;
+				else
+					progress = B_TRUE;
+				continue;
+			}
+
+			stream_nvfs = found;
+
+			/* check for different snapname */
+			if (strcmp(nvpair_name(snapelem),
+			    stream_snapname) != 0) {
+				char name[ZFS_MAXNAMELEN];
+				char tryname[ZFS_MAXNAMELEN];
+
+				(void) snprintf(name, sizeof (name), "%s@%s",
+				    fsname, nvpair_name(snapelem));
+				(void) snprintf(tryname, sizeof (name), "%s@%s",
+				    fsname, stream_snapname);
+
+				error = recv_rename(hdl, name, tryname,
+				    strlen(fsname)+1, newname, flags);
+				if (error)
+					needagain = B_TRUE;
+				else
+					progress = B_TRUE;
+			}
+
+			if (strcmp(stream_snapname, fromsnap) == 0)
+				fromguid = thisguid;
+		}
+
+		/* check for delete */
+		if (stream_nvfs == NULL) {
+			if (!flags.force)
+				continue;
+
+			error = recv_destroy(hdl, fsname, strlen(tofs)+1,
+			    newname, flags);
+			if (error)
+				needagain = B_TRUE;
+			else
+				progress = B_TRUE;
+			continue;
+		}
+
+		if (fromguid == 0 && flags.verbose) {
+			(void) printf("local fs %s does not have fromsnap "
+			    "(%s in stream); must have been deleted locally; "
+			    "ignoring\n", fsname, fromsnap);
+			continue;
+		}
+
+		VERIFY(0 == nvlist_lookup_string(stream_nvfs,
+		    "name", &stream_fsname));
+		VERIFY(0 == nvlist_lookup_uint64(stream_nvfs,
+		    "parentfromsnap", &stream_parent_fromsnap_guid));
+
+		/* check for rename */
+		if ((stream_parent_fromsnap_guid != 0 &&
+		    stream_parent_fromsnap_guid != parent_fromsnap_guid) ||
+		    strcmp(strrchr(fsname, '/'),
+		    strrchr(stream_fsname, '/')) != 0) {
+			nvlist_t *parent;
+			char tryname[ZFS_MAXNAMELEN];
+
+			parent = fsavl_find(local_avl,
+			    stream_parent_fromsnap_guid, NULL);
+			/*
+			 * NB: parent might not be found if we used the
+			 * tosnap for stream_parent_fromsnap_guid,
+			 * because the parent is a newly-created fs;
+			 * we'll be able to rename it after we recv the
+			 * new fs.
+			 */
+			if (parent != NULL) {
+				char *pname;
+
+				VERIFY(0 == nvlist_lookup_string(parent, "name",
+				    &pname));
+				(void) snprintf(tryname, sizeof (tryname),
+				    "%s%s", pname, strrchr(stream_fsname, '/'));
+			} else {
+				tryname[0] = '\0';
+				if (flags.verbose) {
+					(void) printf("local fs %s new parent "
+					    "not found\n", fsname);
+				}
+			}
+
+			error = recv_rename(hdl, fsname, tryname,
+			    strlen(tofs)+1, newname, flags);
+			if (error)
+				needagain = B_TRUE;
+			else
+				progress = B_TRUE;
+		}
+	}
+
+	fsavl_destroy(local_avl);
+	nvlist_free(local_nv);
+
+	if (needagain && progress) {
+		/* do another pass to fix up temporary names */
+		if (flags.verbose)
+			(void) printf("another pass:\n");
+		goto again;
+	}
+
+	return (needagain);
+}
+
+static int
+zfs_receive_package(libzfs_handle_t *hdl, int fd, const char *destname,
+    recvflags_t flags, dmu_replay_record_t *drr, zio_cksum_t *zc)
+{
+	nvlist_t *stream_nv = NULL;
+	avl_tree_t *stream_avl = NULL;
+	char *fromsnap = NULL;
+	char tofs[ZFS_MAXNAMELEN];
+	char errbuf[1024];
+	dmu_replay_record_t drre;
+	int error;
+	boolean_t anyerr = B_FALSE;
+
+	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
+	    "cannot receive"));
+
+	if (strchr(destname, '@')) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "can not specify snapshot name for multi-snapshot stream"));
+		return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
+	}
+
+	assert(drr->drr_type == DRR_BEGIN);
+	assert(drr->drr_u.drr_begin.drr_magic == DMU_BACKUP_MAGIC);
+	assert(drr->drr_u.drr_begin.drr_version == DMU_BACKUP_HEADER_VERSION);
+
+	/*
+	 * Read in the nvlist from the stream.
+	 */
+	if (drr->drr_payloadlen != 0) {
+		if (!flags.isprefix) {
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "must use -d to receive replication "
+			    "(send -R) stream"));
+			return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
+		}
+
+		error = recv_read_nvlist(hdl, fd, drr->drr_payloadlen,
+		    &stream_nv, flags.byteswap, zc);
+		if (error) {
+			error = zfs_error(hdl, EZFS_BADSTREAM, errbuf);
+			goto out;
+		}
+	}
+
+	/*
+	 * Read in the end record and verify checksum.
+	 */
+	if (0 != (error = recv_read(hdl, fd, &drre, sizeof (drre),
+	    flags.byteswap, NULL)))
+		goto out;
+	if (flags.byteswap) {
+		drre.drr_type = BSWAP_32(drre.drr_type);
+		drre.drr_u.drr_end.drr_checksum.zc_word[0] =
+		    BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[0]);
+		drre.drr_u.drr_end.drr_checksum.zc_word[1] =
+		    BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[1]);
+		drre.drr_u.drr_end.drr_checksum.zc_word[2] =
+		    BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[2]);
+		drre.drr_u.drr_end.drr_checksum.zc_word[3] =
+		    BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[3]);
+	}
+	if (drre.drr_type != DRR_END) {
+		error = zfs_error(hdl, EZFS_BADSTREAM, errbuf);
+		goto out;
+	}
+	if (!ZIO_CHECKSUM_EQUAL(drre.drr_u.drr_end.drr_checksum, *zc)) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "incorrect header checksum"));
+		error = zfs_error(hdl, EZFS_BADSTREAM, errbuf);
+		goto out;
+	}
+
+	(void) nvlist_lookup_string(stream_nv, "fromsnap", &fromsnap);
+
+	if (drr->drr_payloadlen != 0) {
+		nvlist_t *stream_fss;
+
+		VERIFY(0 == nvlist_lookup_nvlist(stream_nv, "fss",
+		    &stream_fss));
+		stream_avl = fsavl_create(stream_fss);
+
+		if (fromsnap != NULL) {
+			(void) strlcpy(tofs, destname, ZFS_MAXNAMELEN);
+			if (flags.isprefix) {
+				int i = strcspn(drr->drr_u.drr_begin.drr_toname,
+				    "/@");
+				/* zfs_receive_one() will create_parents() */
+				(void) strlcat(tofs,
+				    &drr->drr_u.drr_begin.drr_toname[i],
+				    ZFS_MAXNAMELEN);
+				*strchr(tofs, '@') = '\0';
+			}
+			anyerr |= recv_incremental_replication(hdl, tofs,
+			    flags, stream_nv, stream_avl);
+		}
+	}
+
+
+	/* Finally, receive each contained stream */
+	do {
+		/*
+		 * we should figure out if it has a recoverable
+		 * error, in which case do a recv_skip() and drive on.
+		 * Note, if we fail due to already having this guid,
+		 * zfs_receive_one() will take care of it (ie,
+		 * recv_skip() and return 0).
+		 */
+		error = zfs_receive(hdl, destname, flags, fd, stream_avl);
+		if (error == ENODATA) {
+			error = 0;
+			break;
+		}
+		anyerr |= error;
+	} while (error == 0);
+
+	if (drr->drr_payloadlen != 0 && fromsnap != NULL) {
+		/*
+		 * Now that we have the fs's they sent us, try the
+		 * renames again.
+		 */
+		anyerr |= recv_incremental_replication(hdl, tofs, flags,
+		    stream_nv, stream_avl);
+	}
+
+out:
+	fsavl_destroy(stream_avl);
+	if (stream_nv)
+		nvlist_free(stream_nv);
+	if (anyerr)
+		error = -1;
+	return (error);
+}
+
+static int
+recv_skip(libzfs_handle_t *hdl, int fd, boolean_t byteswap)
+{
+	dmu_replay_record_t *drr;
+	void *buf = malloc(1<<20);
+
+	/* XXX would be great to use lseek if possible... */
+	drr = buf;
+
+	while (recv_read(hdl, fd, drr, sizeof (dmu_replay_record_t),
+	    byteswap, NULL) == 0) {
+		if (byteswap)
+			drr->drr_type = BSWAP_32(drr->drr_type);
+
+		switch (drr->drr_type) {
+		case DRR_BEGIN:
+			/* NB: not to be used on v2 stream packages */
+			assert(drr->drr_payloadlen == 0);
+			break;
+
+		case DRR_END:
+			free(buf);
+			return (0);
+
+		case DRR_OBJECT:
+			if (byteswap) {
+				drr->drr_u.drr_object.drr_bonuslen =
+				    BSWAP_32(drr->drr_u.drr_object.
+				    drr_bonuslen);
+			}
+			(void) recv_read(hdl, fd, buf,
+			    P2ROUNDUP(drr->drr_u.drr_object.drr_bonuslen, 8),
+			    B_FALSE, NULL);
+			break;
+
+		case DRR_WRITE:
+			if (byteswap) {
+				drr->drr_u.drr_write.drr_length =
+				    BSWAP_32(drr->drr_u.drr_write.drr_length);
+			}
+			(void) recv_read(hdl, fd, buf,
+			    drr->drr_u.drr_write.drr_length, B_FALSE, NULL);
+			break;
+
+		case DRR_FREEOBJECTS:
+		case DRR_FREE:
+			break;
+
+		default:
+			assert(!"invalid record type");
+		}
+	}
+
+	free(buf);
+	return (-1);
+}
+
+/*
+ * Restores a backup of tosnap from the file descriptor specified by infd.
+ */
+static int
+zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
+    recvflags_t flags, dmu_replay_record_t *drr,
+    dmu_replay_record_t *drr_noswap, avl_tree_t *stream_avl)
+{
+	zfs_cmd_t zc = { 0 };
+	time_t begin_time;
+	int ioctl_err, ioctl_errno, err, choplen;
+	char *cp;
+	struct drr_begin *drrb = &drr->drr_u.drr_begin;
+	char errbuf[1024];
+	char chopprefix[ZFS_MAXNAMELEN];
+	boolean_t newfs = B_FALSE;
+	boolean_t stream_wantsnewfs;
+	uint64_t parent_snapguid = 0;
+	prop_changelist_t *clp = NULL;
+
+	begin_time = time(NULL);
+
+	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
+	    "cannot receive"));
+
+	if (stream_avl != NULL) {
+		nvlist_t *fs = fsavl_find(stream_avl, drrb->drr_toguid, NULL);
+		nvlist_t *props;
+
+		(void) nvlist_lookup_uint64(fs, "parentfromsnap",
+		    &parent_snapguid);
+		err = nvlist_lookup_nvlist(fs, "props", &props);
+		if (err)
+			VERIFY(0 == nvlist_alloc(&props, NV_UNIQUE_NAME, 0));
+		if (flags.canmountoff) {
+			VERIFY(0 == nvlist_add_uint64(props,
+			    zfs_prop_to_name(ZFS_PROP_CANMOUNT), 0));
+		}
+		if (zcmd_write_src_nvlist(hdl, &zc, props) != 0)
+			return (-1);
+		if (err)
+			nvlist_free(props);
+	}
+
+	/*
+	 * Determine how much of the snapshot name stored in the stream
+	 * we are going to tack on to the name they specified on the
+	 * command line, and how much we are going to chop off.
+	 *
+	 * If they specified a snapshot, chop the entire name stored in
+	 * the stream.
+	 */
+	(void) strcpy(chopprefix, drrb->drr_toname);
+	if (flags.isprefix) {
+		/*
+		 * They specified a fs with -d, we want to tack on
+		 * everything but the pool name stored in the stream
+		 */
+		if (strchr(tosnap, '@')) {
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
+			    "argument - snapshot not allowed with -d"));
+			return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
+		}
+		cp = strchr(chopprefix, '/');
+		if (cp == NULL)
+			cp = strchr(chopprefix, '@');
+		*cp = '\0';
+	} else if (strchr(tosnap, '@') == NULL) {
+		/*
+		 * If they specified a filesystem without -d, we want to
+		 * tack on everything after the fs specified in the
+		 * first name from the stream.
+		 */
+		cp = strchr(chopprefix, '@');
+		*cp = '\0';
+	}
+	choplen = strlen(chopprefix);
+
+	/*
+	 * Determine name of destination snapshot, store in zc_value.
+	 */
+	(void) strcpy(zc.zc_value, tosnap);
+	(void) strncat(zc.zc_value, drrb->drr_toname+choplen,
+	    sizeof (zc.zc_value));
+	if (!zfs_name_valid(zc.zc_value, ZFS_TYPE_SNAPSHOT))
+		return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
+
+	/*
+	 * Determine the name of the origin snapshot, store in zc_string.
+	 */
+	if (drrb->drr_flags & DRR_FLAG_CLONE) {
+		if (guid_to_name(hdl, tosnap,
+		    drrb->drr_fromguid, zc.zc_string) != 0) {
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "local origin for clone %s does not exist"),
+			    zc.zc_value);
+			return (zfs_error(hdl, EZFS_NOENT, errbuf));
+		}
+		if (flags.verbose)
+			(void) printf("found clone origin %s\n", zc.zc_string);
+	}
+
+	stream_wantsnewfs = (drrb->drr_fromguid == NULL ||
+	    (drrb->drr_flags & DRR_FLAG_CLONE));
+
+	if (stream_wantsnewfs) {
+		/*
+		 * if the parent fs does not exist, look for it based on
+		 * the parent snap GUID
+		 */
+		(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
+		    "cannot receive new filesystem stream"));
+
+		(void) strcpy(zc.zc_name, zc.zc_value);
+		cp = strrchr(zc.zc_name, '/');
+		if (cp)
+			*cp = '\0';
+		if (cp &&
+		    !zfs_dataset_exists(hdl, zc.zc_name, ZFS_TYPE_DATASET)) {
+			char suffix[ZFS_MAXNAMELEN];
+			(void) strcpy(suffix, strrchr(zc.zc_value, '/'));
+			if (guid_to_name(hdl, tosnap, parent_snapguid,
+			    zc.zc_value) == 0) {
+				*strchr(zc.zc_value, '@') = '\0';
+				(void) strcat(zc.zc_value, suffix);
+			}
+		}
+	} else {
+		/*
+		 * if the fs does not exist, look for it based on the
+		 * fromsnap GUID
+		 */
+		(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
+		    "cannot receive incremental stream"));
+
+		(void) strcpy(zc.zc_name, zc.zc_value);
+		*strchr(zc.zc_name, '@') = '\0';
+
+		if (!zfs_dataset_exists(hdl, zc.zc_name, ZFS_TYPE_DATASET)) {
+			char snap[ZFS_MAXNAMELEN];
+			(void) strcpy(snap, strchr(zc.zc_value, '@'));
+			if (guid_to_name(hdl, tosnap, drrb->drr_fromguid,
+			    zc.zc_value) == 0) {
+				*strchr(zc.zc_value, '@') = '\0';
+				(void) strcat(zc.zc_value, snap);
+			}
+		}
+	}
+
+	(void) strcpy(zc.zc_name, zc.zc_value);
+	*strchr(zc.zc_name, '@') = '\0';
+
+	if (zfs_dataset_exists(hdl, zc.zc_name, ZFS_TYPE_DATASET)) {
+		zfs_handle_t *zhp;
+		/*
+		 * Destination fs exists.  Therefore this should either
+		 * be an incremental, or the stream specifies a new fs
+		 * (full stream or clone) and they want us to blow it
+		 * away (and have therefore specified -F and removed any
+		 * snapshots).
+		 */
+
+		if (stream_wantsnewfs) {
+			if (!flags.force) {
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "destination '%s' exists\n"
+				    "must specify -F to overwrite it"),
+				    zc.zc_name);
+				return (zfs_error(hdl, EZFS_EXISTS, errbuf));
+			}
+			if (ioctl(hdl->libzfs_fd, ZFS_IOC_SNAPSHOT_LIST_NEXT,
+			    &zc) == 0) {
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "destination has snapshots (eg. %s)\n"
+				    "must destroy them to overwrite it"),
+				    zc.zc_name);
+				return (zfs_error(hdl, EZFS_EXISTS, errbuf));
+			}
+		}
+
+		zhp = zfs_open(hdl, zc.zc_name,
+		    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME);
+		if (zhp == NULL)
+			return (-1);
+		if (stream_wantsnewfs &&
+		    zhp->zfs_dmustats.dds_origin[0]) {
+			zfs_close(zhp);
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "destination '%s' is a clone\n"
+			    "must destroy it to overwrite it"),
+			    zc.zc_name);
+			return (zfs_error(hdl, EZFS_EXISTS, errbuf));
+		}
+
+		if (!flags.dryrun && zhp->zfs_type == ZFS_TYPE_FILESYSTEM &&
+		    stream_wantsnewfs) {
+			/* We can't do online recv in this case */
+			clp = changelist_gather(zhp, ZFS_PROP_NAME, 0);
+			if (clp == NULL)
+				return (-1);
+			if (changelist_prefix(clp) != 0) {
+				changelist_free(clp);
+				return (-1);
+			}
+		}
+		if (!flags.dryrun && zhp->zfs_type == ZFS_TYPE_VOLUME) {
+			if (zvol_remove_link(hdl, zhp->zfs_name) != 0) {
+				zfs_close(zhp);
+				return (-1);
+			}
+		}
+		zfs_close(zhp);
+	} else {
+		/*
+		 * Destination FS does not exist.  Therefore we better
+		 * be creating a new filesystem (either from a full
+		 * backup, or a clone)
+		 */
+
+		if (!stream_wantsnewfs) {
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "destination '%s' does not exist"), zc.zc_name);
+			return (zfs_error(hdl, EZFS_NOENT, errbuf));
+		}
+
+		/* Do the recvbackup ioctl to the fs's parent. */
+		*strrchr(zc.zc_name, '/') = '\0';
+
+		if (flags.isprefix && !flags.dryrun) {
+			err = create_parents(hdl, zc.zc_value, strlen(tosnap));
+			if (err != 0) {
+				return (zfs_error(hdl,
+				    EZFS_BADRESTORE, errbuf));
+			}
+		}
+
+		newfs = B_TRUE;
+	}
+
+	zc.zc_begin_record = drr_noswap->drr_u.drr_begin;
+	zc.zc_cookie = infd;
+	zc.zc_guid = flags.force;
+	if (flags.verbose) {
+		(void) printf("%s %s stream of %s into %s\n",
+		    flags.dryrun ? "would receive" : "receiving",
+		    drrb->drr_fromguid ? "incremental" : "full",
+		    drrb->drr_toname, zc.zc_value);
+		(void) fflush(stdout);
+	}
+
+	if (flags.dryrun)
+		return (recv_skip(hdl, infd, flags.byteswap));
+
+	err = ioctl_err = zfs_ioctl(hdl, ZFS_IOC_RECV, &zc);
+	ioctl_errno = errno;
+	if (err && (ioctl_errno == ENOENT || ioctl_errno == ENODEV)) {
+		/*
+		 * It may be that this snapshot already exists,
+		 * in which case we want to consume & ignore it
+		 * rather than failing.
+		 */
+		avl_tree_t *local_avl;
+		nvlist_t *local_nv, *fs;
+		char *cp = strchr(zc.zc_value, '@');
+
+		/*
+		 * XXX Do this faster by just iterating over snaps in
+		 * this fs.  Also if zc_value does not exist, we will
+		 * get a strange "does not exist" error message.
+		 */
+		*cp = '\0';
+		if (gather_nvlist(hdl, zc.zc_value, NULL, NULL,
+		    &local_nv, &local_avl) == 0) {
+			*cp = '@';
+			fs = fsavl_find(local_avl, drrb->drr_toguid, NULL);
+			fsavl_destroy(local_avl);
+			nvlist_free(local_nv);
+
+			if (fs != NULL) {
+				if (flags.verbose) {
+					(void) printf("snap %s already exists; "
+					    "ignoring\n", zc.zc_value);
+				}
+				ioctl_err = recv_skip(hdl, infd,
+				    flags.byteswap);
+			}
+		}
+		*cp = '@';
+	}
+
+	if (ioctl_err != 0) {
+		switch (ioctl_errno) {
+		case ENODEV:
+			cp = strchr(zc.zc_value, '@');
+			*cp = '\0';
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "most recent snapshot of %s does not\n"
+			    "match incremental source"), zc.zc_value);
+			(void) zfs_error(hdl, EZFS_BADRESTORE, errbuf);
+			*cp = '@';
+			break;
+		case ETXTBSY:
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "destination %s has been modified\n"
+			    "since most recent snapshot"), zc.zc_name);
+			(void) zfs_error(hdl, EZFS_BADRESTORE, errbuf);
+			break;
+		case EEXIST:
+			cp = strchr(zc.zc_value, '@');
+			if (newfs) {
+				/* it's the containing fs that exists */
+				*cp = '\0';
+			}
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "destination already exists"));
+			(void) zfs_error_fmt(hdl, EZFS_EXISTS,
+			    dgettext(TEXT_DOMAIN, "cannot restore to %s"),
+			    zc.zc_value);
+			*cp = '@';
+			break;
+		case EINVAL:
+			(void) zfs_error(hdl, EZFS_BADSTREAM, errbuf);
+			break;
+		case ECKSUM:
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "invalid stream (checksum mismatch)"));
+			(void) zfs_error(hdl, EZFS_BADSTREAM, errbuf);
+			break;
+		default:
+			(void) zfs_standard_error(hdl, ioctl_errno, errbuf);
+		}
+	}
+
+	/*
+	 * Mount or recreate the /dev links for the target filesystem
+	 * (if created, or if we tore them down to do an incremental
+	 * restore), and the /dev links for the new snapshot (if
+	 * created). Also mount any children of the target filesystem
+	 * if we did an incremental receive.
+	 */
+	cp = strchr(zc.zc_value, '@');
+	if (cp && (ioctl_err == 0 || !newfs)) {
+		zfs_handle_t *h;
+
+		*cp = '\0';
+		h = zfs_open(hdl, zc.zc_value,
+		    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME);
+		*cp = '@';
+		if (h) {
+			if (h->zfs_type == ZFS_TYPE_VOLUME) {
+				err = zvol_create_link(hdl, h->zfs_name);
+				if (err == 0 && ioctl_err == 0)
+					err = zvol_create_link(hdl,
+					    zc.zc_value);
+			} else if (newfs) {
+				err = zfs_mount(h, NULL, 0);
+			}
+		zfs_close(h);
+		}
+	}
+
+	if (clp) {
+		err |= changelist_postfix(clp);
+		changelist_free(clp);
+	}
+
+	if (err || ioctl_err)
+		return (-1);
+
+	if (flags.verbose) {
+		char buf1[64];
+		char buf2[64];
+		uint64_t bytes = zc.zc_cookie;
+		time_t delta = time(NULL) - begin_time;
+		if (delta == 0)
+			delta = 1;
+		zfs_nicenum(bytes, buf1, sizeof (buf1));
+		zfs_nicenum(bytes/delta, buf2, sizeof (buf1));
+
+		(void) printf("received %sB stream in %lu seconds (%sB/sec)\n",
+		    buf1, delta, buf2);
+	}
+
+	return (0);
+}
+
+/*
+ * Restores a backup of tosnap from the file descriptor specified by infd.
+ */
+int
+zfs_receive(libzfs_handle_t *hdl, const char *tosnap, recvflags_t flags,
+    int infd, avl_tree_t *stream_avl)
+{
+	int err;
+	dmu_replay_record_t drr, drr_noswap;
+	struct drr_begin *drrb = &drr.drr_u.drr_begin;
+	char errbuf[1024];
+	zio_cksum_t zcksum = { 0 };
+
+	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
+	    "cannot receive"));
+
+	if (flags.isprefix &&
+	    !zfs_dataset_exists(hdl, tosnap, ZFS_TYPE_DATASET)) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "specified fs "
+		    "(%s) does not exist"), tosnap);
+		return (zfs_error(hdl, EZFS_NOENT, errbuf));
+	}
+
+	/* read in the BEGIN record */
+	if (0 != (err = recv_read(hdl, infd, &drr, sizeof (drr), B_FALSE,
+	    &zcksum)))
+		return (err);
+
+	if (drr.drr_type == DRR_END || drr.drr_type == BSWAP_32(DRR_END)) {
+		/* It's the double end record at the end of a package */
+		return (ENODATA);
+	}
+
+	/* the kernel needs the non-byteswapped begin record */
+	drr_noswap = drr;
+
+	if (drrb->drr_magic == BSWAP_64(DMU_BACKUP_MAGIC)) {
+		/*
+		 * We computed the checksum in the wrong byteorder in
+		 * recv_read() above; do it again correctly.
+		 */
+		bzero(&zcksum, sizeof (zio_cksum_t));
+		fletcher_4_incremental_byteswap(&drr, sizeof (drr), &zcksum);
+		flags.byteswap = B_TRUE;
+
+		drr.drr_type = BSWAP_32(drr.drr_type);
+		drr.drr_payloadlen = BSWAP_32(drr.drr_payloadlen);
+		drrb->drr_magic = BSWAP_64(drrb->drr_magic);
+		drrb->drr_version = BSWAP_64(drrb->drr_version);
+		drrb->drr_creation_time = BSWAP_64(drrb->drr_creation_time);
+		drrb->drr_type = BSWAP_32(drrb->drr_type);
+		drrb->drr_flags = BSWAP_32(drrb->drr_flags);
+		drrb->drr_toguid = BSWAP_64(drrb->drr_toguid);
+		drrb->drr_fromguid = BSWAP_64(drrb->drr_fromguid);
+	}
+
+	if (drrb->drr_magic != DMU_BACKUP_MAGIC || drr.drr_type != DRR_BEGIN) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
+		    "stream (bad magic number)"));
+		return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
+	}
+
+	if (strchr(drrb->drr_toname, '@') == NULL) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
+		    "stream (bad snapshot name)"));
+		return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
+	}
+
+	if (drrb->drr_version == DMU_BACKUP_STREAM_VERSION) {
+		return (zfs_receive_one(hdl, infd, tosnap, flags,
+		    &drr, &drr_noswap, stream_avl));
+	} else if (drrb->drr_version == DMU_BACKUP_HEADER_VERSION) {
+		return (zfs_receive_package(hdl, infd, tosnap, flags,
+		    &drr, &zcksum));
+	} else {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "stream is unsupported version %llu"),
+		    drrb->drr_version);
+		return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
+	}
+}
diff --git a/usr/src/uts/common/fs/zfs/bplist.c b/usr/src/uts/common/fs/zfs/bplist.c
index ecab27dd1c..e24c88045b 100644
--- a/usr/src/uts/common/fs/zfs/bplist.c
+++ b/usr/src/uts/common/fs/zfs/bplist.c
@@ -278,9 +278,7 @@ bplist_vacate(bplist_t *bpl, dmu_tx_t *tx)
 int
 bplist_space(bplist_t *bpl, uint64_t *usedp, uint64_t *compp, uint64_t *uncompp)
 {
-	uint64_t itor = 0, comp = 0, uncomp = 0;
 	int err;
-	blkptr_t bp;
 
 	mutex_enter(&bpl->bpl_lock);
 
@@ -298,6 +296,9 @@ bplist_space(bplist_t *bpl, uint64_t *usedp, uint64_t *compp, uint64_t *uncompp)
 	mutex_exit(&bpl->bpl_lock);
 
 	if (!bpl->bpl_havecomp) {
+		uint64_t itor = 0, comp = 0, uncomp = 0;
+		blkptr_t bp;
+
 		while ((err = bplist_iterate(bpl, &itor, &bp)) == 0) {
 			comp += BP_GET_PSIZE(&bp);
 			uncomp += BP_GET_UCSIZE(&bp);
diff --git a/usr/src/uts/common/fs/zfs/dmu_objset.c b/usr/src/uts/common/fs/zfs/dmu_objset.c
index 0f687ff66d..c249d5e20e 100644
--- a/usr/src/uts/common/fs/zfs/dmu_objset.c
+++ b/usr/src/uts/common/fs/zfs/dmu_objset.c
@@ -261,6 +261,45 @@ dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
 	return (0);
 }
 
+static int
+dmu_objset_open_ds_os(dsl_dataset_t *ds, objset_t *os, dmu_objset_type_t type)
+{
+	objset_impl_t *osi;
+	int err;
+
+	mutex_enter(&ds->ds_opening_lock);
+	osi = dsl_dataset_get_user_ptr(ds);
+	if (osi == NULL) {
+		err = dmu_objset_open_impl(dsl_dataset_get_spa(ds),
+		    ds, &ds->ds_phys->ds_bp, &osi);
+		if (err)
+			return (err);
+	}
+	mutex_exit(&ds->ds_opening_lock);
+
+	os->os = osi;
+	os->os_mode = DS_MODE_NONE;
+
+	if (type != DMU_OST_ANY && type != os->os->os_phys->os_type)
+		return (EINVAL);
+	return (0);
+}
+
+int
+dmu_objset_open_ds(dsl_dataset_t *ds, dmu_objset_type_t type, objset_t **osp)
+{
+	objset_t *os;
+	int err;
+
+	os = kmem_alloc(sizeof (objset_t), KM_SLEEP);
+	err = dmu_objset_open_ds_os(ds, os, type);
+	if (err)
+		kmem_free(os, sizeof (objset_t));
+	else
+		*osp = os;
+	return (err);
+}
+
 /* called from zpl */
 int
 dmu_objset_open(const char *name, dmu_objset_type_t type, int mode,
@@ -268,9 +307,10 @@ dmu_objset_open(const char *name, dmu_objset_type_t type, int mode,
 {
 	objset_t *os;
 	dsl_dataset_t *ds;
-	objset_impl_t *osi;
 	int err;
 
+	ASSERT(mode != DS_MODE_NONE);
+
 	os = kmem_alloc(sizeof (objset_t), KM_SLEEP);
 	err = dsl_dataset_open(name, mode, os, &ds);
 	if (err) {
@@ -278,34 +318,22 @@ dmu_objset_open(const char *name, dmu_objset_type_t type, int mode,
 		return (err);
 	}
 
-	mutex_enter(&ds->ds_opening_lock);
-	osi = dsl_dataset_get_user_ptr(ds);
-	if (osi == NULL) {
-		err = dmu_objset_open_impl(dsl_dataset_get_spa(ds),
-		    ds, &ds->ds_phys->ds_bp, &osi);
-		if (err) {
-			dsl_dataset_close(ds, mode, os);
-			kmem_free(os, sizeof (objset_t));
-			return (err);
-		}
-	}
-	mutex_exit(&ds->ds_opening_lock);
-
-	os->os = osi;
+	err = dmu_objset_open_ds_os(ds, os, type);
 	os->os_mode = mode;
-
-	if (type != DMU_OST_ANY && type != os->os->os_phys->os_type) {
-		dmu_objset_close(os);
-		return (EINVAL);
+	if (err) {
+		kmem_free(os, sizeof (objset_t));
+		dsl_dataset_close(ds, mode, os);
+	} else {
+		*osp = os;
 	}
-	*osp = os;
-	return (0);
+	return (err);
 }
 
 void
 dmu_objset_close(objset_t *os)
 {
-	dsl_dataset_close(os->os->os_dsl_dataset, os->os_mode, os);
+	if (os->os_mode != DS_MODE_NONE)
+		dsl_dataset_close(os->os->os_dsl_dataset, os->os_mode, os);
 	kmem_free(os, sizeof (objset_t));
 }
 
@@ -499,7 +527,7 @@ dmu_objset_create_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
 	ASSERT(dmu_tx_is_syncing(tx));
 
 	dsobj = dsl_dataset_create_sync(dd, oa->lastname,
-	    oa->clone_parent, tx);
+	    oa->clone_parent, cr, tx);
 
 	VERIFY(0 == dsl_dataset_open_obj(dd->dd_pool, dsobj, NULL,
 	    DS_MODE_STANDARD | DS_MODE_READONLY, FTAG, &ds));
@@ -515,11 +543,6 @@ dmu_objset_create_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
 			oa->userfunc(&osi->os, oa->userarg, cr, tx);
 	}
 
-	/*
-	 * Create create time permission if any?
-	 */
-	dsl_deleg_set_create_perms(ds->ds_dir, tx, cr);
-
 	spa_history_internal_log(LOG_DS_CREATE, dd->dd_pool->dp_spa,
 	    tx, cr, "dataset = %llu", dsobj);
 
@@ -580,13 +603,21 @@ dmu_objset_destroy(const char *name)
 	 * It would be nicer to do this in dsl_dataset_destroy_sync(),
 	 * but the replay log objset is modified in open context.
 	 */
-	error = dmu_objset_open(name, DMU_OST_ANY, DS_MODE_EXCLUSIVE, &os);
+	error = dmu_objset_open(name, DMU_OST_ANY,
+	    DS_MODE_EXCLUSIVE|DS_MODE_READONLY, &os);
 	if (error == 0) {
+		dsl_dataset_t *ds = os->os->os_dsl_dataset;
 		zil_destroy(dmu_objset_zil(os), B_FALSE);
-		dmu_objset_close(os);
+
+		/*
+		 * dsl_dataset_destroy() closes the ds.
+		 * os is just used as the tag after it's freed.
+		 */
+		kmem_free(os, sizeof (objset_t));
+		error = dsl_dataset_destroy(ds, os);
 	}
 
-	return (dsl_dataset_destroy(name));
+	return (error);
 }
 
 int
@@ -594,16 +625,23 @@ dmu_objset_rollback(const char *name)
 {
 	int err;
 	objset_t *os;
+	dsl_dataset_t *ds;
 
 	err = dmu_objset_open(name, DMU_OST_ANY,
 	    DS_MODE_EXCLUSIVE | DS_MODE_INCONSISTENT, &os);
 	if (err)
 		return (err);
 
-	/* XXX uncache everything? */
-	err = dsl_dataset_rollback(os->os->os_dsl_dataset);
+	ds = os->os->os_dsl_dataset;
+	err = dsl_dataset_rollback(ds, os->os->os_phys->os_type);
 
-	dmu_objset_close(os);
+	/*
+	 * NB: we close the objset manually because the rollback
+	 * actually implicitly called dmu_objset_evict(), thus freeing
+	 * the objset_impl_t.
+	 */
+	dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, os);
+	kmem_free(os, sizeof (objset_t));
 	return (err);
 }
 
@@ -612,6 +650,12 @@ struct snaparg {
 	char *snapname;
 	char failed[MAXPATHLEN];
 	boolean_t checkperms;
+	list_t objsets;
+};
+
+struct osnode {
+	list_node_t node;
+	objset_t *os;
 };
 
 static int
@@ -653,8 +697,13 @@ dmu_objset_snapshot_one(char *name, void *arg)
 	 */
 	err = zil_suspend(dmu_objset_zil(os));
 	if (err == 0) {
+		struct osnode *osn;
 		dsl_sync_task_create(sn->dstg, dsl_dataset_snapshot_check,
-		    dsl_dataset_snapshot_sync, os, sn->snapname, 3);
+		    dsl_dataset_snapshot_sync, os->os->os_dsl_dataset,
+		    sn->snapname, 3);
+		osn = kmem_alloc(sizeof (struct osnode), KM_SLEEP);
+		osn->os = os;
+		list_insert_tail(&sn->objsets, osn);
 	} else {
 		dmu_objset_close(os);
 	}
@@ -666,6 +715,7 @@ int
 dmu_objset_snapshot(char *fsname, char *snapname, boolean_t recursive)
 {
 	dsl_sync_task_t *dst;
+	struct osnode *osn;
 	struct snaparg sn = { 0 };
 	spa_t *spa;
 	int err;
@@ -678,6 +728,8 @@ dmu_objset_snapshot(char *fsname, char *snapname, boolean_t recursive)
 
 	sn.dstg = dsl_sync_task_group_create(spa_get_dsl(spa));
 	sn.snapname = snapname;
+	list_create(&sn.objsets, sizeof (struct osnode),
+	    offsetof(struct osnode, node));
 
 	if (recursive) {
 		sn.checkperms = B_TRUE;
@@ -695,12 +747,18 @@ dmu_objset_snapshot(char *fsname, char *snapname, boolean_t recursive)
 
 	for (dst = list_head(&sn.dstg->dstg_tasks); dst;
 	    dst = list_next(&sn.dstg->dstg_tasks, dst)) {
-		objset_t *os = dst->dst_arg1;
+		dsl_dataset_t *ds = dst->dst_arg1;
 		if (dst->dst_err)
-			dmu_objset_name(os, sn.failed);
-		zil_resume(dmu_objset_zil(os));
-		dmu_objset_close(os);
+			dsl_dataset_name(ds, sn.failed);
+	}
+
+	while (osn = list_head(&sn.objsets)) {
+		list_remove(&sn.objsets, osn);
+		zil_resume(dmu_objset_zil(osn->os));
+		dmu_objset_close(osn->os);
+		kmem_free(osn, sizeof (struct osnode));
 	}
+	list_destroy(&sn.objsets);
 out:
 	if (err)
 		(void) strcpy(fsname, sn.failed);
diff --git a/usr/src/uts/common/fs/zfs/dmu_send.c b/usr/src/uts/common/fs/zfs/dmu_send.c
index 812abd0265..19009b4415 100644
--- a/usr/src/uts/common/fs/zfs/dmu_send.c
+++ b/usr/src/uts/common/fs/zfs/dmu_send.c
@@ -41,9 +41,12 @@
 #include <sys/zap.h>
 #include <sys/zio_checksum.h>
 
+static char *dmu_recv_tag = "dmu_recv_tag";
+
 struct backuparg {
 	dmu_replay_record_t *drr;
 	vnode_t *vp;
+	offset_t *off;
 	objset_t *os;
 	zio_cksum_t zc;
 	int err;
@@ -59,6 +62,7 @@ dump_bytes(struct backuparg *ba, void *buf, int len)
 	ba->err = vn_rdwr(UIO_WRITE, ba->vp,
 	    (caddr_t)buf, len,
 	    0, UIO_SYSSPACE, FAPPEND, RLIM64_INFINITY, CRED(), &resid);
+	*ba->off += len;
 	return (ba->err);
 }
 
@@ -217,13 +221,15 @@ backup_cb(traverse_blk_cache_t *bc, spa_t *spa, void *arg)
 }
 
 int
-dmu_sendbackup(objset_t *tosnap, objset_t *fromsnap, vnode_t *vp)
+dmu_sendbackup(objset_t *tosnap, objset_t *fromsnap, boolean_t fromorigin,
+    vnode_t *vp, offset_t *off)
 {
 	dsl_dataset_t *ds = tosnap->os->os_dsl_dataset;
 	dsl_dataset_t *fromds = fromsnap ? fromsnap->os->os_dsl_dataset : NULL;
 	dmu_replay_record_t *drr;
 	struct backuparg ba;
 	int err;
+	uint64_t fromtxg = 0;
 
 	/* tosnap must be a snapshot */
 	if (ds->ds_phys->ds_next_snap_obj == 0)
@@ -231,25 +237,51 @@ dmu_sendbackup(objset_t *tosnap, objset_t *fromsnap, vnode_t *vp)
 
 	/* fromsnap must be an earlier snapshot from the same fs as tosnap */
 	if (fromds && (ds->ds_dir != fromds->ds_dir ||
-	    fromds->ds_phys->ds_creation_txg >=
-	    ds->ds_phys->ds_creation_txg))
+	    fromds->ds_phys->ds_creation_txg >= ds->ds_phys->ds_creation_txg))
 		return (EXDEV);
 
+	if (fromorigin) {
+		if (fromsnap)
+			return (EINVAL);
+
+		if (ds->ds_dir->dd_phys->dd_origin_obj != NULL) {
+			dsl_pool_t *dp = ds->ds_dir->dd_pool;
+			rw_enter(&dp->dp_config_rwlock, RW_READER);
+			err = dsl_dataset_open_obj(dp,
+			    ds->ds_dir->dd_phys->dd_origin_obj, NULL,
+			    DS_MODE_NONE, FTAG, &fromds);
+			rw_exit(&dp->dp_config_rwlock);
+			if (err)
+				return (err);
+		} else {
+			fromorigin = B_FALSE;
+		}
+	}
+
+
 	drr = kmem_zalloc(sizeof (dmu_replay_record_t), KM_SLEEP);
 	drr->drr_type = DRR_BEGIN;
 	drr->drr_u.drr_begin.drr_magic = DMU_BACKUP_MAGIC;
-	drr->drr_u.drr_begin.drr_version = DMU_BACKUP_VERSION;
+	drr->drr_u.drr_begin.drr_version = DMU_BACKUP_STREAM_VERSION;
 	drr->drr_u.drr_begin.drr_creation_time =
 	    ds->ds_phys->ds_creation_time;
 	drr->drr_u.drr_begin.drr_type = tosnap->os->os_phys->os_type;
+	if (fromorigin)
+		drr->drr_u.drr_begin.drr_flags |= DRR_FLAG_CLONE;
 	drr->drr_u.drr_begin.drr_toguid = ds->ds_phys->ds_guid;
 	if (fromds)
 		drr->drr_u.drr_begin.drr_fromguid = fromds->ds_phys->ds_guid;
 	dsl_dataset_name(ds, drr->drr_u.drr_begin.drr_toname);
 
+	if (fromds)
+		fromtxg = fromds->ds_phys->ds_creation_txg;
+	if (fromorigin)
+		dsl_dataset_close(fromds, DS_MODE_NONE, FTAG);
+
 	ba.drr = drr;
 	ba.vp = vp;
 	ba.os = tosnap;
+	ba.off = off;
 	ZIO_SET_CHECKSUM(&ba.zc, 0, 0, 0, 0);
 
 	if (dump_bytes(&ba, drr, sizeof (dmu_replay_record_t))) {
@@ -257,8 +289,7 @@ dmu_sendbackup(objset_t *tosnap, objset_t *fromsnap, vnode_t *vp)
 		return (ba.err);
 	}
 
-	err = traverse_dsl_dataset(ds,
-	    fromds ? fromds->ds_phys->ds_creation_txg : 0,
+	err = traverse_dsl_dataset(ds, fromtxg,
 	    ADVANCE_PRE | ADVANCE_HOLES | ADVANCE_DATA | ADVANCE_NOLOCK,
 	    backup_cb, &ba);
 
@@ -283,271 +314,410 @@ dmu_sendbackup(objset_t *tosnap, objset_t *fromsnap, vnode_t *vp)
 	return (0);
 }
 
-struct restorearg {
-	int err;
-	int byteswap;
-	vnode_t *vp;
-	char *buf;
-	uint64_t voff;
-	int buflen; /* number of valid bytes in buf */
-	int bufoff; /* next offset to read */
-	int bufsize; /* amount of memory allocated for buf */
-	zio_cksum_t zc;
+struct recvbeginsyncarg {
+	const char *tofs;
+	const char *tosnap;
+	dsl_dataset_t *origin;
+	uint64_t fromguid;
+	dmu_objset_type_t type;
+	void *tag;
+	boolean_t force;
+	char clonelastname[MAXNAMELEN];
+	dsl_dataset_t *ds; /* the ds to recv into; returned from the syncfunc */
 };
 
-static int
-replay_incremental_check(dsl_dataset_t *ds, struct drr_begin *drrb)
-{
-	const char *snapname;
-	int err;
-	uint64_t val;
-
-	/* must already be a snapshot of this fs */
-	if (ds->ds_phys->ds_prev_snap_obj == 0)
-		return (ENODEV);
-
-	/* most recent snapshot must match fromguid */
-	if (ds->ds_prev->ds_phys->ds_guid != drrb->drr_fromguid)
-		return (ENODEV);
-
-	/* new snapshot name must not exist */
-	snapname = strrchr(drrb->drr_toname, '@');
-	if (snapname == NULL)
-		return (EEXIST);
-
-	snapname++;
-	err = zap_lookup(ds->ds_dir->dd_pool->dp_meta_objset,
-	    ds->ds_phys->ds_snapnames_zapobj, snapname, 8, 1, &val);
-	if (err == 0)
-	return (EEXIST);
-	if (err != ENOENT)
-	return (err);
-
-	return (0);
-}
-
-/* ARGSUSED */
-static int
-replay_offline_incremental_check(void *arg1, void *arg2, dmu_tx_t *tx)
+static dsl_dataset_t *
+recv_full_sync_impl(dsl_pool_t *dp, uint64_t dsobj, dmu_objset_type_t type,
+    cred_t *cr, dmu_tx_t *tx)
 {
-	dsl_dataset_t *ds = arg1;
-	struct drr_begin *drrb = arg2;
+	dsl_dataset_t *ds;
 
-	/* must not have any changes since most recent snapshot */
-	if (dsl_dataset_modified_since_lastsnap(ds))
-		return (ETXTBSY);
+	VERIFY(0 == dsl_dataset_open_obj(dp, dsobj, NULL,
+	    DS_MODE_EXCLUSIVE, dmu_recv_tag, &ds));
 
-	return (replay_incremental_check(ds, drrb));
-}
+	if (type != DMU_OST_NONE) {
+		(void) dmu_objset_create_impl(dp->dp_spa,
+		    ds, &ds->ds_phys->ds_bp, type, tx);
+	}
 
-/* ARGSUSED */
-static void
-replay_offline_incremental_sync(void *arg1, void *arg2, cred_t *cr,
-    dmu_tx_t *tx)
-{
-	dsl_dataset_t *ds = arg1;
 	dmu_buf_will_dirty(ds->ds_dbuf, tx);
 	ds->ds_phys->ds_flags |= DS_FLAG_INCONSISTENT;
 
-	spa_history_internal_log(LOG_DS_REPLAY_INC_SYNC,
+	spa_history_internal_log(LOG_DS_REPLAY_FULL_SYNC,
 	    ds->ds_dir->dd_pool->dp_spa, tx, cr, "dataset = %lld",
 	    ds->ds_phys->ds_dir_obj);
+
+	return (ds);
 }
 
 /* ARGSUSED */
 static int
-replay_full_check(void *arg1, void *arg2, dmu_tx_t *tx)
+recv_full_check(void *arg1, void *arg2, dmu_tx_t *tx)
 {
 	dsl_dir_t *dd = arg1;
-	struct drr_begin *drrb = arg2;
+	struct recvbeginsyncarg *rbsa = arg2;
 	objset_t *mos = dd->dd_pool->dp_meta_objset;
-	char *cp;
 	uint64_t val;
 	int err;
 
-	cp = strchr(drrb->drr_toname, '@');
-	*cp = '\0';
 	err = zap_lookup(mos, dd->dd_phys->dd_child_dir_zapobj,
-	    strrchr(drrb->drr_toname, '/') + 1,
-	    sizeof (uint64_t), 1, &val);
-	*cp = '@';
+	    strrchr(rbsa->tofs, '/') + 1, sizeof (uint64_t), 1, &val);
 
 	if (err != ENOENT)
 		return (err ? err : EEXIST);
 
+	if (rbsa->origin) {
+		/* make sure it's a snap in the same pool */
+		if (rbsa->origin->ds_dir->dd_pool != dd->dd_pool)
+			return (EXDEV);
+		if (rbsa->origin->ds_phys->ds_num_children == 0)
+			return (EINVAL);
+		if (rbsa->origin->ds_phys->ds_guid != rbsa->fromguid)
+			return (ENODEV);
+	}
+
 	return (0);
 }
 
 static void
-replay_full_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
+recv_full_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
 {
 	dsl_dir_t *dd = arg1;
-	struct drr_begin *drrb = arg2;
-	char *cp;
-	dsl_dataset_t *ds;
+	struct recvbeginsyncarg *rbsa = arg2;
 	uint64_t dsobj;
 
-	cp = strchr(drrb->drr_toname, '@');
-	*cp = '\0';
-	dsobj = dsl_dataset_create_sync(dd, strrchr(drrb->drr_toname, '/') + 1,
-	    NULL, tx);
+	dsobj = dsl_dataset_create_sync(dd, strrchr(rbsa->tofs, '/') + 1,
+	    rbsa->origin, cr, tx);
 
-	VERIFY(0 == dsl_dataset_open_obj(dd->dd_pool, dsobj, NULL,
-	    DS_MODE_EXCLUSIVE, FTAG, &ds));
+	rbsa->ds = recv_full_sync_impl(dd->dd_pool, dsobj,
+	    rbsa->origin ? DMU_OST_NONE : rbsa->type, cr, tx);
+}
 
-	(void) dmu_objset_create_impl(dsl_dataset_get_spa(ds),
-	    ds, &ds->ds_phys->ds_bp, drrb->drr_type, tx);
+static int
+recv_full_existing_check(void *arg1, void *arg2, dmu_tx_t *tx)
+{
+	dsl_dataset_t *ds = arg1;
+	struct recvbeginsyncarg *rbsa = arg2;
+	int err;
 
-	dmu_buf_will_dirty(ds->ds_dbuf, tx);
-	ds->ds_phys->ds_flags |= DS_FLAG_INCONSISTENT;
+	/* must be a head ds */
+	if (ds->ds_phys->ds_next_snap_obj != 0)
+		return (EINVAL);
 
-	spa_history_internal_log(LOG_DS_REPLAY_FULL_SYNC,
-	    ds->ds_dir->dd_pool->dp_spa, tx, cr, "dataset = %lld",
-	    ds->ds_phys->ds_dir_obj);
+	/* must not be a clone ds */
+	if (ds->ds_prev != NULL)
+		return (EINVAL);
+
+	err = dsl_dataset_destroy_check(ds, rbsa->tag, tx);
+	if (err)
+		return (err);
 
-	*cp = '@';
+	if (rbsa->origin) {
+		/* make sure it's a snap in the same pool */
+		if (rbsa->origin->ds_dir->dd_pool != ds->ds_dir->dd_pool)
+			return (EXDEV);
+		if (rbsa->origin->ds_phys->ds_num_children == 0)
+			return (EINVAL);
+		if (rbsa->origin->ds_phys->ds_guid != rbsa->fromguid)
+			return (ENODEV);
+	}
 
-	dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG);
+	return (0);
 }
 
-struct onlineincarg {
-	dsl_dir_t *dd;
-	dsl_dataset_t *ohds;
-	boolean_t force;
-	const char *cosname;
-};
+static void
+recv_full_existing_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
+{
+	dsl_dataset_t *ds = arg1;
+	struct recvbeginsyncarg *rbsa = arg2;
+	dsl_dir_t *dd = ds->ds_dir;
+	uint64_t dsobj;
+
+	/*
+	 * NB: caller must provide an extra hold on the dsl_dir_t, so it
+	 * won't go away when dsl_dataset_destroy_sync() closes the
+	 * dataset.
+	 */
+	dsl_dataset_destroy_sync(ds, rbsa->tag, cr, tx);
+
+	dsobj = dsl_dataset_create_sync_impl(dd, rbsa->origin, tx);
+
+	rbsa->ds = recv_full_sync_impl(dd->dd_pool, dsobj,
+	    rbsa->origin ? DMU_OST_NONE : rbsa->type, cr, tx);
+}
 
 /* ARGSUSED */
 static int
-replay_online_incremental_check(void *arg1, void *arg2, dmu_tx_t *tx)
+recv_incremental_check(void *arg1, void *arg2, dmu_tx_t *tx)
 {
-	struct onlineincarg *oia = arg1;
+	dsl_dataset_t *ds = arg1;
+	struct recvbeginsyncarg *rbsa = arg2;
+	int err;
+	uint64_t val;
 
-	if (dsl_dataset_modified_since_lastsnap(oia->ohds) && !oia->force)
+	/* must not have any changes since most recent snapshot */
+	if (!rbsa->force && dsl_dataset_modified_since_lastsnap(ds))
 		return (ETXTBSY);
 
-	return (replay_incremental_check(oia->ohds, arg2));
+	/* must already be a snapshot of this fs */
+	if (ds->ds_phys->ds_prev_snap_obj == 0)
+		return (ENODEV);
+
+	/* most recent snapshot must match fromguid */
+	if (ds->ds_prev->ds_phys->ds_guid != rbsa->fromguid)
+		return (ENODEV);
+
+	/* new snapshot name must not exist */
+	err = zap_lookup(ds->ds_dir->dd_pool->dp_meta_objset,
+	    ds->ds_phys->ds_snapnames_zapobj, rbsa->tosnap, 8, 1, &val);
+	if (err == 0)
+		return (EEXIST);
+	if (err != ENOENT)
+		return (err);
+	return (0);
 }
 
 /* ARGSUSED */
 static void
-replay_online_incremental_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
+recv_online_incremental_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
 {
-	struct onlineincarg *oia = arg1;
-	dsl_dataset_t *ohds = oia->ohds;
-	dsl_dir_t *dd = oia->dd;
-	dsl_dataset_t *ods, *ds;
+	dsl_dataset_t *ohds = arg1;
+	struct recvbeginsyncarg *rbsa = arg2;
+	dsl_pool_t *dp = ohds->ds_dir->dd_pool;
+	dsl_dataset_t *ods, *cds;
 	uint64_t dsobj;
 
-	VERIFY(0 == dsl_dataset_open_obj(ohds->ds_dir->dd_pool,
-	    ohds->ds_phys->ds_prev_snap_obj, NULL,
-	    DS_MODE_STANDARD, FTAG, &ods));
-
-	dsobj = dsl_dataset_create_sync(dd, strrchr(oia->cosname, '/') + 1,
-	    ods, tx);
+	/* create the temporary clone */
+	VERIFY(0 == dsl_dataset_open_obj(dp, ohds->ds_phys->ds_prev_snap_obj,
+	    NULL, DS_MODE_STANDARD, FTAG, &ods));
+	dsobj = dsl_dataset_create_sync(ohds->ds_dir,
+	    rbsa->clonelastname, ods, cr, tx);
+	dsl_dataset_close(ods, DS_MODE_STANDARD, FTAG);
 
 	/* open the temporary clone */
-	VERIFY(0 == dsl_dataset_open_obj(dd->dd_pool, dsobj, NULL,
-	    DS_MODE_EXCLUSIVE, FTAG, &ds));
+	VERIFY(0 == dsl_dataset_open_obj(dp, dsobj, NULL,
+	    DS_MODE_EXCLUSIVE, dmu_recv_tag, &cds));
+
+	dmu_buf_will_dirty(cds->ds_dbuf, tx);
+	cds->ds_phys->ds_flags |= DS_FLAG_INCONSISTENT;
 
+	rbsa->ds = cds;
+
+	spa_history_internal_log(LOG_DS_REPLAY_INC_SYNC,
+	    dp->dp_spa, tx, cr, "dataset = %lld",
+	    cds->ds_phys->ds_dir_obj);
+}
+
+/* ARGSUSED */
+static void
+recv_offline_incremental_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
+{
+	dsl_dataset_t *ds = arg1;
 	dmu_buf_will_dirty(ds->ds_dbuf, tx);
 	ds->ds_phys->ds_flags |= DS_FLAG_INCONSISTENT;
 
 	spa_history_internal_log(LOG_DS_REPLAY_INC_SYNC,
 	    ds->ds_dir->dd_pool->dp_spa, tx, cr, "dataset = %lld",
 	    ds->ds_phys->ds_dir_obj);
-
-	dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG);
-	dsl_dataset_close(ods, DS_MODE_STANDARD, FTAG);
 }
 
-static int
-replay_end_check(void *arg1, void *arg2, dmu_tx_t *tx)
+/*
+ * NB: callers *MUST* call dmu_recv_stream() if dmu_recv_begin()
+ * succeeds; otherwise we will leak the holds on the datasets.
+ */
+int
+dmu_recv_begin(char *tofs, char *tosnap, struct drr_begin *drrb,
+    boolean_t force, objset_t *origin, boolean_t online, dmu_recv_cookie_t *drc)
 {
-	objset_t *os = arg1;
-	struct drr_begin *drrb = arg2;
-	char *snapname;
+	int err = 0;
+	boolean_t byteswap;
+	struct recvbeginsyncarg rbsa;
+	uint64_t version;
+	int flags;
+	dsl_dataset_t *ds;
 
-	/* XXX verify that drr_toname is in dd */
+	if (drrb->drr_magic == DMU_BACKUP_MAGIC)
+		byteswap = FALSE;
+	else if (drrb->drr_magic == BSWAP_64(DMU_BACKUP_MAGIC))
+		byteswap = TRUE;
+	else
+		return (EINVAL);
+
+	rbsa.tofs = tofs;
+	rbsa.tosnap = tosnap;
+	rbsa.origin = origin ? origin->os->os_dsl_dataset : NULL;
+	rbsa.fromguid = drrb->drr_fromguid;
+	rbsa.type = drrb->drr_type;
+	rbsa.tag = FTAG;
+	version = drrb->drr_version;
+	flags = drrb->drr_flags;
+
+	if (byteswap) {
+		rbsa.type = BSWAP_32(rbsa.type);
+		rbsa.fromguid = BSWAP_64(rbsa.fromguid);
+		version = BSWAP_64(version);
+		flags = BSWAP_32(flags);
+	}
 
-	snapname = strchr(drrb->drr_toname, '@');
-	if (snapname == NULL)
+	if (version != DMU_BACKUP_STREAM_VERSION ||
+	    rbsa.type >= DMU_OST_NUMTYPES ||
+	    ((flags & DRR_FLAG_CLONE) && origin == NULL))
 		return (EINVAL);
-	snapname++;
 
-	return (dsl_dataset_snapshot_check(os, snapname, tx));
-}
+	bzero(drc, sizeof (dmu_recv_cookie_t));
+	drc->drc_drrb = drrb;
+	drc->drc_tosnap = tosnap;
+	drc->drc_force = force;
 
-static void
-replay_end_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
-{
-	objset_t *os = arg1;
-	struct drr_begin *drrb = arg2;
-	char *snapname;
-	dsl_dataset_t *ds, *hds;
+	/*
+	 * Process the begin in syncing context.
+	 */
+	if (rbsa.fromguid && !(flags & DRR_FLAG_CLONE) && !online) {
+		/* offline incremental receive */
+		err = dsl_dataset_open(tofs,
+		    DS_MODE_EXCLUSIVE, dmu_recv_tag, &ds);
+		if (err)
+			return (err);
 
-	snapname = strchr(drrb->drr_toname, '@') + 1;
+		/*
+		 * Only do the rollback if the most recent snapshot
+		 * matches the incremental source
+		 */
+		if (force) {
+			if (ds->ds_prev == NULL ||
+			    ds->ds_prev->ds_phys->ds_guid !=
+			    rbsa.fromguid) {
+				dsl_dataset_close(ds, DS_MODE_EXCLUSIVE,
+				    dmu_recv_tag);
+				return (ENODEV);
+			}
+			(void) dsl_dataset_rollback(ds, DMU_OST_NONE);
+		}
+		rbsa.force = B_FALSE;
+		err = dsl_sync_task_do(ds->ds_dir->dd_pool,
+		    recv_incremental_check,
+		    recv_offline_incremental_sync,
+		    ds, &rbsa, 1);
+		if (err) {
+			dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, dmu_recv_tag);
+			return (err);
+		}
+		drc->drc_logical_ds = drc->drc_real_ds = ds;
+	} else if (rbsa.fromguid && !(flags & DRR_FLAG_CLONE)) {
+		/* online incremental receive */
 
-	dsl_dataset_snapshot_sync(os, snapname, cr, tx);
+		/* tmp clone name is: tofs/%tosnap" */
+		(void) snprintf(rbsa.clonelastname, sizeof (rbsa.clonelastname),
+		    "%%%s", tosnap);
 
-	/* set snapshot's creation time and guid */
-	hds = os->os->os_dsl_dataset;
-	VERIFY(0 == dsl_dataset_open_obj(hds->ds_dir->dd_pool,
-	    hds->ds_phys->ds_prev_snap_obj, NULL,
-	    DS_MODE_PRIMARY | DS_MODE_READONLY | DS_MODE_INCONSISTENT,
-	    FTAG, &ds));
+		/* open the dataset we are logically receiving into */
+		err = dsl_dataset_open(tofs,
+		    DS_MODE_STANDARD, dmu_recv_tag, &ds);
+		if (err)
+			return (err);
 
-	dmu_buf_will_dirty(ds->ds_dbuf, tx);
-	ds->ds_phys->ds_creation_time = drrb->drr_creation_time;
-	ds->ds_phys->ds_guid = drrb->drr_toguid;
-	ds->ds_phys->ds_flags &= ~DS_FLAG_INCONSISTENT;
+		rbsa.force = force;
+		err = dsl_sync_task_do(ds->ds_dir->dd_pool,
+		    recv_incremental_check,
+		    recv_online_incremental_sync, ds, &rbsa, 5);
+		if (err) {
+			dsl_dataset_close(ds, DS_MODE_STANDARD, dmu_recv_tag);
+			return (err);
+		}
+		drc->drc_logical_ds = ds;
+		drc->drc_real_ds = rbsa.ds;
+	} else {
+		/* create new fs -- full backup or clone */
+		dsl_dir_t *dd = NULL;
+		const char *tail;
 
-	/* log the end of the receive */
-	spa_history_internal_log(LOG_DS_RECEIVE, ds->ds_dir->dd_pool->dp_spa,
-	    tx, cr, "dataset = %llu", ds->ds_phys->ds_dir_obj);
+		err = dsl_dir_open(tofs, FTAG, &dd, &tail);
+		if (err)
+			return (err);
+		if (tail == NULL) {
+			if (!force) {
+				dsl_dir_close(dd, FTAG);
+				return (EEXIST);
+			}
 
-	dsl_dataset_close(ds, DS_MODE_PRIMARY, FTAG);
+			rw_enter(&dd->dd_pool->dp_config_rwlock, RW_READER);
+			err = dsl_dataset_open_obj(dd->dd_pool,
+			    dd->dd_phys->dd_head_dataset_obj, NULL,
+			    DS_MODE_EXCLUSIVE | DS_MODE_INCONSISTENT,
+			    FTAG, &ds);
+			rw_exit(&dd->dd_pool->dp_config_rwlock);
+			if (err) {
+				dsl_dir_close(dd, FTAG);
+				return (err);
+			}
 
-	dmu_buf_will_dirty(hds->ds_dbuf, tx);
-	hds->ds_phys->ds_flags &= ~DS_FLAG_INCONSISTENT;
+			err = dsl_sync_task_do(dd->dd_pool,
+			    recv_full_existing_check,
+			    recv_full_existing_sync, ds, &rbsa, 5);
+			/* if successful, sync task closes the ds for us */
+			if (err)
+				dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG);
+		} else {
+			err = dsl_sync_task_do(dd->dd_pool, recv_full_check,
+			    recv_full_sync, dd, &rbsa, 5);
+			if (err)
+				return (err);
+		}
+		dsl_dir_close(dd, FTAG);
+		if (err)
+			return (err);
+		drc->drc_logical_ds = drc->drc_real_ds = rbsa.ds;
+		drc->drc_newfs = B_TRUE;
+	}
+
+	/* downgrade our hold on the ds from EXCLUSIVE to PRIMARY */
+	dsl_dataset_downgrade(drc->drc_real_ds,
+	    DS_MODE_EXCLUSIVE, DS_MODE_PRIMARY);
+
+	return (0);
 }
 
+struct restorearg {
+	int err;
+	int byteswap;
+	vnode_t *vp;
+	char *buf;
+	uint64_t voff;
+	int bufsize; /* amount of memory allocated for buf */
+	zio_cksum_t cksum;
+};
+
 static void *
 restore_read(struct restorearg *ra, int len)
 {
 	void *rv;
+	int done = 0;
 
 	/* some things will require 8-byte alignment, so everything must */
 	ASSERT3U(len % 8, ==, 0);
 
-	while (ra->buflen - ra->bufoff < len) {
+	while (done < len) {
 		ssize_t resid;
-		int leftover = ra->buflen - ra->bufoff;
 
-		(void) memmove(ra->buf, ra->buf + ra->bufoff, leftover);
 		ra->err = vn_rdwr(UIO_READ, ra->vp,
-		    (caddr_t)ra->buf + leftover, ra->bufsize - leftover,
+		    (caddr_t)ra->buf + done, len - done,
 		    ra->voff, UIO_SYSSPACE, FAPPEND,
 		    RLIM64_INFINITY, CRED(), &resid);
 
-		ra->voff += ra->bufsize - leftover - resid;
-		ra->buflen = ra->bufsize - resid;
-		ra->bufoff = 0;
-		if (resid == ra->bufsize - leftover)
+		if (resid == len - done)
 			ra->err = EINVAL;
+		ra->voff += len - done - resid;
+		done = len - resid;
 		if (ra->err)
 			return (NULL);
-		/* Could compute checksum here? */
 	}
 
-	ASSERT3U(ra->bufoff % 8, ==, 0);
-	ASSERT3U(ra->buflen - ra->bufoff, >=, len);
-	rv = ra->buf + ra->bufoff;
-	ra->bufoff += len;
+	ASSERT3U(done, ==, len);
+	rv = ra->buf;
 	if (ra->byteswap)
-		fletcher_4_incremental_byteswap(rv, len, &ra->zc);
+		fletcher_4_incremental_byteswap(rv, len, &ra->cksum);
 	else
-		fletcher_4_incremental_native(rv, len, &ra->zc);
+		fletcher_4_incremental_native(rv, len, &ra->cksum);
 	return (rv);
 }
 
@@ -557,12 +727,14 @@ backup_byteswap(dmu_replay_record_t *drr)
 #define	DO64(X) (drr->drr_u.X = BSWAP_64(drr->drr_u.X))
 #define	DO32(X) (drr->drr_u.X = BSWAP_32(drr->drr_u.X))
 	drr->drr_type = BSWAP_32(drr->drr_type);
+	drr->drr_payloadlen = BSWAP_32(drr->drr_payloadlen);
 	switch (drr->drr_type) {
 	case DRR_BEGIN:
 		DO64(drr_begin.drr_magic);
 		DO64(drr_begin.drr_version);
 		DO64(drr_begin.drr_creation_time);
 		DO32(drr_begin.drr_type);
+		DO32(drr_begin.drr_flags);
 		DO64(drr_begin.drr_toguid);
 		DO64(drr_begin.drr_fromguid);
 		break;
@@ -786,52 +958,67 @@ restore_free(struct restorearg *ra, objset_t *os,
 	return (err);
 }
 
+static void
+recv_abort_cleanup(dmu_recv_cookie_t *drc)
+{
+	if (drc->drc_newfs || drc->drc_real_ds != drc->drc_logical_ds) {
+		/*
+		 * online incremental or new fs: destroy the fs (which
+		 * may be a clone) that we created
+		 */
+		(void) dsl_dataset_destroy(drc->drc_real_ds, dmu_recv_tag);
+		if (drc->drc_real_ds != drc->drc_logical_ds) {
+			dsl_dataset_close(drc->drc_logical_ds,
+			    DS_MODE_STANDARD, dmu_recv_tag);
+		}
+	} else {
+		/*
+		 * offline incremental: rollback to most recent snapshot.
+		 */
+		int lmode = DS_MODE_PRIMARY;
+		if (dsl_dataset_tryupgrade(drc->drc_real_ds,
+		    DS_MODE_PRIMARY, DS_MODE_EXCLUSIVE)) {
+			lmode = DS_MODE_EXCLUSIVE;
+			(void) dsl_dataset_rollback(drc->drc_real_ds,
+			    DMU_OST_NONE);
+		}
+		dsl_dataset_close(drc->drc_real_ds, lmode, FTAG);
+	}
+}
+
+/*
+ * NB: callers *must* call dmu_recv_end() if this succeeds.
+ */
 int
-dmu_recvbackup(char *tosnap, struct drr_begin *drrb, uint64_t *sizep,
-    boolean_t force, boolean_t online, vnode_t *vp, uint64_t voffset,
-    char *cosname)
+dmu_recv_stream(dmu_recv_cookie_t *drc, vnode_t *vp, offset_t *voffp)
 {
-	struct restorearg ra;
+	struct restorearg ra = { 0 };
 	dmu_replay_record_t *drr;
-	char *cp;
-	objset_t *os = NULL;
-	zio_cksum_t pzc;
-	char *clonebuf = NULL;
-	size_t len;
-
-	bzero(&ra, sizeof (ra));
-	ra.vp = vp;
-	ra.voff = voffset;
-	ra.bufsize = 1<<20;
-	ra.buf = kmem_alloc(ra.bufsize, KM_SLEEP);
+	objset_t *os;
+	zio_cksum_t pcksum;
 
-	if (drrb->drr_magic == DMU_BACKUP_MAGIC) {
-		ra.byteswap = FALSE;
-	} else if (drrb->drr_magic == BSWAP_64(DMU_BACKUP_MAGIC)) {
+	if (drc->drc_drrb->drr_magic == BSWAP_64(DMU_BACKUP_MAGIC))
 		ra.byteswap = TRUE;
-	} else {
-		ra.err = EINVAL;
-		goto out;
-	}
 
-	/*
-	 * NB: this assumes that struct drr_begin will be the largest in
-	 * dmu_replay_record_t's drr_u, and thus we don't need to pad it
-	 * with zeros to make it the same length as we wrote out.
-	 */
-	((dmu_replay_record_t *)ra.buf)->drr_type = DRR_BEGIN;
-	((dmu_replay_record_t *)ra.buf)->drr_pad = 0;
-	((dmu_replay_record_t *)ra.buf)->drr_u.drr_begin = *drrb;
-	if (ra.byteswap) {
-		fletcher_4_incremental_byteswap(ra.buf,
-		    sizeof (dmu_replay_record_t), &ra.zc);
-	} else {
-		fletcher_4_incremental_native(ra.buf,
-		    sizeof (dmu_replay_record_t), &ra.zc);
+	{
+		/* compute checksum of drr_begin record */
+		dmu_replay_record_t *drr;
+		drr = kmem_zalloc(sizeof (dmu_replay_record_t), KM_SLEEP);
+
+		drr->drr_type = DRR_BEGIN;
+		drr->drr_u.drr_begin = *drc->drc_drrb;
+		if (ra.byteswap) {
+			fletcher_4_incremental_byteswap(drr,
+			    sizeof (dmu_replay_record_t), &ra.cksum);
+		} else {
+			fletcher_4_incremental_native(drr,
+			    sizeof (dmu_replay_record_t), &ra.cksum);
+		}
+		kmem_free(drr, sizeof (dmu_replay_record_t));
 	}
-	(void) strcpy(drrb->drr_toname, tosnap); /* for the sync funcs */
 
 	if (ra.byteswap) {
+		struct drr_begin *drrb = drc->drc_drrb;
 		drrb->drr_magic = BSWAP_64(drrb->drr_magic);
 		drrb->drr_version = BSWAP_64(drrb->drr_version);
 		drrb->drr_creation_time = BSWAP_64(drrb->drr_creation_time);
@@ -840,133 +1027,26 @@ dmu_recvbackup(char *tosnap, struct drr_begin *drrb, uint64_t *sizep,
 		drrb->drr_fromguid = BSWAP_64(drrb->drr_fromguid);
 	}
 
-	ASSERT3U(drrb->drr_magic, ==, DMU_BACKUP_MAGIC);
-
-	if (drrb->drr_version != DMU_BACKUP_VERSION ||
-	    drrb->drr_type >= DMU_OST_NUMTYPES ||
-	    strchr(drrb->drr_toname, '@') == NULL) {
-		ra.err = EINVAL;
-		goto out;
-	}
-
-	/*
-	 * Process the begin in syncing context.
-	 */
-	if (drrb->drr_fromguid && !online) {
-		/* offline incremental receive */
-
-		dsl_dataset_t *ds = NULL;
-
-		cp = strchr(tosnap, '@');
-		*cp = '\0';
-		ra.err = dsl_dataset_open(tosnap, DS_MODE_EXCLUSIVE, FTAG, &ds);
-		*cp = '@';
-		if (ra.err)
-			goto out;
-
-		/*
-		 * Only do the rollback if the most recent snapshot
-		 * matches the incremental source
-		 */
-		if (force) {
-			if (ds->ds_prev == NULL ||
-			    ds->ds_prev->ds_phys->ds_guid !=
-			    drrb->drr_fromguid) {
-				dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG);
-				kmem_free(ra.buf, ra.bufsize);
-				return (ENODEV);
-			}
-			(void) dsl_dataset_rollback(ds);
-		}
-		ra.err = dsl_sync_task_do(ds->ds_dir->dd_pool,
-		    replay_offline_incremental_check,
-		    replay_offline_incremental_sync, ds, drrb, 1);
-		dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG);
-	} else if (drrb->drr_fromguid && online) {
-		/* online incremental receive */
-
-		const char *tail;
-		struct onlineincarg oia = { 0 };
-
-		/*
-		 * Get the dsl_dir for the parent of the
-		 * temporary clone.
-		 */
-		cp = strchr(tosnap, '@');
-		*cp = '\0';
-
-		/* tmp clone is: tonsap + '/' + '%' + "snapX" */
-		len = strlen(tosnap) + 2 + strlen(cp + 1) + 1;
-		clonebuf = kmem_alloc(len, KM_SLEEP);
-		(void) snprintf(clonebuf, len, "%s%c%c%s%c",
-		    tosnap, '/', '%', cp + 1, '\0');
-		ra.err = dsl_dir_open(tosnap, FTAG, &oia.dd, &tail);
-		*cp = '@';
-		if (ra.err)
-			goto out;
-
-		/* open the dataset we are logically receiving into */
-		*cp = '\0';
-		ra.err = dsl_dataset_open(tosnap, DS_MODE_STANDARD,
-		    FTAG, &oia.ohds);
-		*cp = '@';
-		if (ra.err) {
-			dsl_dir_close(oia.dd, FTAG);
-			goto out;
-		}
-
-		oia.force = force;
-		oia.cosname = clonebuf;
-		ra.err = dsl_sync_task_do(oia.dd->dd_pool,
-		    replay_online_incremental_check,
-		    replay_online_incremental_sync, &oia, drrb, 5);
-		dsl_dataset_close(oia.ohds, DS_MODE_STANDARD, FTAG);
-		dsl_dir_close(oia.dd, FTAG);
-	} else {
-		/* full backup */
-
-		dsl_dir_t *dd = NULL;
-		const char *tail;
-
-		/* can't restore full backup into topmost fs, for now */
-		if (strrchr(drrb->drr_toname, '/') == NULL) {
-			ra.err = EINVAL;
-			goto out;
-		}
-
-		cp = strchr(tosnap, '@');
-		*cp = '\0';
-		ra.err = dsl_dir_open(tosnap, FTAG, &dd, &tail);
-		*cp = '@';
-		if (ra.err)
-			goto out;
-		if (tail == NULL) {
-			ra.err = EEXIST;
-			goto out;
-		}
+	ra.vp = vp;
+	ra.voff = *voffp;
+	ra.bufsize = 1<<20;
+	ra.buf = kmem_alloc(ra.bufsize, KM_SLEEP);
 
-		ra.err = dsl_sync_task_do(dd->dd_pool, replay_full_check,
-		    replay_full_sync, dd, drrb, 5);
-		dsl_dir_close(dd, FTAG);
-	}
-	if (ra.err)
-		goto out;
+	/* these were verified in dmu_recv_begin */
+	ASSERT(drc->drc_drrb->drr_version == DMU_BACKUP_STREAM_VERSION);
+	ASSERT(drc->drc_drrb->drr_type < DMU_OST_NUMTYPES);
 
 	/*
 	 * Open the objset we are modifying.
 	 */
+	VERIFY(dmu_objset_open_ds(drc->drc_real_ds, DMU_OST_ANY, &os) == 0);
 
-	cp = strchr(tosnap, '@');
-	*cp = '\0';
-	ra.err = dmu_objset_open(clonebuf == NULL ? tosnap : clonebuf,
-	    DMU_OST_ANY, DS_MODE_PRIMARY | DS_MODE_INCONSISTENT, &os);
-	*cp = '@';
-	ASSERT3U(ra.err, ==, 0);
+	ASSERT(drc->drc_real_ds->ds_phys->ds_flags & DS_FLAG_INCONSISTENT);
 
 	/*
 	 * Read records and process them.
 	 */
-	pzc = ra.zc;
+	pcksum = ra.cksum;
 	while (ra.err == 0 &&
 	    NULL != (drr = restore_read(&ra, sizeof (*drr)))) {
 		if (issig(JUSTLOOKING) && issig(FORREAL)) {
@@ -1017,99 +1097,130 @@ dmu_recvbackup(char *tosnap, struct drr_begin *drrb, uint64_t *sizep,
 			 * everything before the DRR_END record.
 			 */
 			if (drre.drr_checksum.zc_word[0] != 0 &&
-			    !ZIO_CHECKSUM_EQUAL(drre.drr_checksum, pzc)) {
+			    !ZIO_CHECKSUM_EQUAL(drre.drr_checksum, pcksum)) {
 				ra.err = ECKSUM;
 				goto out;
 			}
-
-			if (clonebuf == NULL) {
-				ra.err = dsl_sync_task_do(dmu_objset_ds(os)->
-				    ds_dir->dd_pool, replay_end_check,
-				    replay_end_sync, os, drrb, 3);
-			}
 			goto out;
 		}
 		default:
 			ra.err = EINVAL;
 			goto out;
 		}
-		pzc = ra.zc;
+		pcksum = ra.cksum;
 	}
 
 out:
-	if (os) {
-		if (drrb->drr_fromguid && online && !ra.err)
-			dmu_objset_name(os, cosname);
-		dmu_objset_close(os);
-	}
+	dmu_objset_close(os);
 
-	/*
-	 * Make sure we don't rollback/destroy unless we actually
-	 * processed the begin properly.  'os' will only be set if this
-	 * is the case.
-	 */
-	if (ra.err && os && tosnap && strchr(tosnap, '@')) {
+	if (ra.err != 0) {
 		/*
 		 * rollback or destroy what we created, so we don't
 		 * leave it in the restoring state.
 		 */
-		dsl_dataset_t *ds;
-		int err;
-
-		cp = strchr(tosnap, '@');
-		*cp = '\0';
-		err = dsl_dataset_open(clonebuf == NULL ? tosnap : clonebuf,
-		    DS_MODE_EXCLUSIVE | DS_MODE_INCONSISTENT,
-		    FTAG, &ds);
-		if (err == 0) {
-			txg_wait_synced(ds->ds_dir->dd_pool, 0);
-			if (drrb->drr_fromguid) {
-				if (clonebuf != NULL) {
-					/*
-					 * online incremental: destroy
-					 * the temporarily created clone.
-					 */
-					dsl_dataset_close(ds, DS_MODE_EXCLUSIVE,
-					    FTAG);
-					(void) dmu_objset_destroy(clonebuf);
-				} else {
-					/*
-					 * offline incremental: rollback to
-					 * most recent snapshot.
-					 */
-					(void) dsl_dataset_rollback(ds);
-					dsl_dataset_close(ds, DS_MODE_EXCLUSIVE,
-					    FTAG);
-				}
-			} else {
-				/* full: destroy whole fs */
-				dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG);
-				(void) dsl_dataset_destroy(tosnap);
-			}
-		}
-		*cp = '@';
+		txg_wait_synced(drc->drc_real_ds->ds_dir->dd_pool, 0);
+		recv_abort_cleanup(drc);
 	}
 
-	if (clonebuf != NULL)
-		kmem_free(clonebuf, len);
 	kmem_free(ra.buf, ra.bufsize);
-	if (sizep)
-		*sizep = ra.voff;
+	*voffp = ra.voff;
 	return (ra.err);
 }
 
+struct recvendsyncarg {
+	char *tosnap;
+	uint64_t creation_time;
+	uint64_t toguid;
+};
+
+static int
+recv_end_check(void *arg1, void *arg2, dmu_tx_t *tx)
+{
+	dsl_dataset_t *ds = arg1;
+	struct recvendsyncarg *resa = arg2;
+
+	return (dsl_dataset_snapshot_check(ds, resa->tosnap, tx));
+}
+
+static void
+recv_end_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
+{
+	dsl_dataset_t *ds = arg1;
+	struct recvendsyncarg *resa = arg2;
+
+	dsl_dataset_snapshot_sync(ds, resa->tosnap, cr, tx);
+
+	/* set snapshot's creation time and guid */
+	dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx);
+	ds->ds_prev->ds_phys->ds_creation_time = resa->creation_time;
+	ds->ds_prev->ds_phys->ds_guid = resa->toguid;
+	ds->ds_prev->ds_phys->ds_flags &= ~DS_FLAG_INCONSISTENT;
+
+	dmu_buf_will_dirty(ds->ds_dbuf, tx);
+	ds->ds_phys->ds_flags &= ~DS_FLAG_INCONSISTENT;
+}
+
 int
-dmu_replay_end_snapshot(char *name, struct drr_begin *drrb)
+dmu_recv_end(dmu_recv_cookie_t *drc)
 {
-	objset_t *os;
-	int err;
+	int err = 0;
+	int lmode;
 
-	err = dmu_objset_open(name, DMU_OST_ZFS, DS_MODE_STANDARD, &os);
-	if (err)
-		return (err);
+	/*
+	 * XXX hack; seems the ds is still dirty and
+	 * dsl_pool_zil_clean() expects it to have a ds_user_ptr (and
+	 * zil), but clone_swap() can close it.
+	 */
+	txg_wait_synced(drc->drc_real_ds->ds_dir->dd_pool, 0);
 
-	err = dsl_sync_task_do(dmu_objset_ds(os)->ds_dir->dd_pool,
-	    replay_end_check, replay_end_sync, os, drrb, 3);
-	dmu_objset_close(os);
+	if (dsl_dataset_tryupgrade(drc->drc_real_ds,
+	    DS_MODE_PRIMARY, DS_MODE_EXCLUSIVE)) {
+		lmode = DS_MODE_EXCLUSIVE;
+	} else {
+		recv_abort_cleanup(drc);
+		return (EBUSY);
+	}
+
+	if (drc->drc_logical_ds != drc->drc_real_ds) {
+		if (err == 0 && dsl_dataset_tryupgrade(drc->drc_logical_ds,
+		    DS_MODE_STANDARD, DS_MODE_EXCLUSIVE)) {
+			lmode = DS_MODE_EXCLUSIVE;
+			err = dsl_dataset_clone_swap(drc->drc_real_ds,
+			    drc->drc_logical_ds, drc->drc_force);
+		} else {
+			lmode = DS_MODE_STANDARD;
+			err = EBUSY;
+		}
+	}
+
+	if (err == 0) {
+		struct recvendsyncarg resa;
+
+		resa.creation_time = drc->drc_drrb->drr_creation_time;
+		resa.toguid = drc->drc_drrb->drr_toguid;
+		resa.tosnap = drc->drc_tosnap;
+
+		err = dsl_sync_task_do(drc->drc_real_ds->ds_dir->dd_pool,
+		    recv_end_check, recv_end_sync,
+		    drc->drc_logical_ds, &resa, 3);
+		if (err) {
+			if (drc->drc_newfs) {
+				ASSERT(drc->drc_logical_ds == drc->drc_real_ds);
+				(void) dsl_dataset_destroy(drc->drc_real_ds,
+				    dmu_recv_tag);
+				return (err);
+			} else {
+				(void) dsl_dataset_rollback(drc->drc_logical_ds,
+				    DMU_OST_NONE);
+			}
+		}
+	}
+
+	if (drc->drc_logical_ds != drc->drc_real_ds) {
+		/* dsl_dataset_destroy() will close the ds */
+		(void) dsl_dataset_destroy(drc->drc_real_ds, dmu_recv_tag);
+	}
+	/* close the hold from dmu_recv_begin */
+	dsl_dataset_close(drc->drc_logical_ds, lmode, dmu_recv_tag);
 	return (err);
 }
diff --git a/usr/src/uts/common/fs/zfs/dsl_dataset.c b/usr/src/uts/common/fs/zfs/dsl_dataset.c
index 1cba47175a..8c62cd9cef 100644
--- a/usr/src/uts/common/fs/zfs/dsl_dataset.c
+++ b/usr/src/uts/common/fs/zfs/dsl_dataset.c
@@ -45,8 +45,6 @@ static dsl_checkfunc_t dsl_dataset_destroy_begin_check;
 static dsl_syncfunc_t dsl_dataset_destroy_begin_sync;
 static dsl_checkfunc_t dsl_dataset_rollback_check;
 static dsl_syncfunc_t dsl_dataset_rollback_sync;
-static dsl_checkfunc_t dsl_dataset_destroy_check;
-static dsl_syncfunc_t dsl_dataset_destroy_sync;
 
 #define	DS_REF_MAX	(1ULL << 62)
 
@@ -533,6 +531,39 @@ dsl_dataset_close(dsl_dataset_t *ds, int mode, void *tag)
 }
 
 void
+dsl_dataset_downgrade(dsl_dataset_t *ds, int oldmode, int newmode)
+{
+	uint64_t oldweight = ds_refcnt_weight[DS_MODE_LEVEL(oldmode)];
+	uint64_t newweight = ds_refcnt_weight[DS_MODE_LEVEL(newmode)];
+	mutex_enter(&ds->ds_lock);
+	ASSERT3U(ds->ds_open_refcount, >=, oldweight);
+	ASSERT3U(oldweight, >=, newweight);
+	ds->ds_open_refcount -= oldweight;
+	ds->ds_open_refcount += newweight;
+	mutex_exit(&ds->ds_lock);
+}
+
+boolean_t
+dsl_dataset_tryupgrade(dsl_dataset_t *ds, int oldmode, int newmode)
+{
+	boolean_t rv;
+	uint64_t oldweight = ds_refcnt_weight[DS_MODE_LEVEL(oldmode)];
+	uint64_t newweight = ds_refcnt_weight[DS_MODE_LEVEL(newmode)];
+	mutex_enter(&ds->ds_lock);
+	ASSERT3U(ds->ds_open_refcount, >=, oldweight);
+	ASSERT3U(newweight, >=, oldweight);
+	if (ds->ds_open_refcount - oldweight + newweight > DS_REF_MAX) {
+		rv = B_FALSE;
+	} else {
+		ds->ds_open_refcount -= oldweight;
+		ds->ds_open_refcount += newweight;
+		rv = B_TRUE;
+	}
+	mutex_exit(&ds->ds_lock);
+	return (rv);
+}
+
+void
 dsl_dataset_create_root(dsl_pool_t *dp, uint64_t *ddobjp, dmu_tx_t *tx)
 {
 	objset_t *mos = dp->dp_meta_objset;
@@ -574,24 +605,18 @@ dsl_dataset_create_root(dsl_pool_t *dp, uint64_t *ddobjp, dmu_tx_t *tx)
 }
 
 uint64_t
-dsl_dataset_create_sync(dsl_dir_t *pdd,
-    const char *lastname, dsl_dataset_t *clone_parent, dmu_tx_t *tx)
+dsl_dataset_create_sync_impl(dsl_dir_t *dd, dsl_dataset_t *origin, dmu_tx_t *tx)
 {
-	dsl_pool_t *dp = pdd->dd_pool;
+	dsl_pool_t *dp = dd->dd_pool;
 	dmu_buf_t *dbuf;
 	dsl_dataset_phys_t *dsphys;
-	uint64_t dsobj, ddobj;
+	uint64_t dsobj;
 	objset_t *mos = dp->dp_meta_objset;
-	dsl_dir_t *dd;
 
-	ASSERT(clone_parent == NULL || clone_parent->ds_dir->dd_pool == dp);
-	ASSERT(clone_parent == NULL ||
-	    clone_parent->ds_phys->ds_num_children > 0);
-	ASSERT(lastname[0] != '@');
+	ASSERT(origin == NULL || origin->ds_dir->dd_pool == dp);
+	ASSERT(origin == NULL || origin->ds_phys->ds_num_children > 0);
 	ASSERT(dmu_tx_is_syncing(tx));
-
-	ddobj = dsl_dir_create_sync(pdd, lastname, tx);
-	VERIFY(0 == dsl_dir_open_obj(dp, ddobj, lastname, FTAG, &dd));
+	ASSERT(dd->dd_phys->dd_head_dataset_obj == 0);
 
 	dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0,
 	    DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx);
@@ -608,28 +633,49 @@ dsl_dataset_create_sync(dsl_dir_t *pdd,
 	dsphys->ds_creation_txg = tx->tx_txg;
 	dsphys->ds_deadlist_obj =
 	    bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx);
-	if (clone_parent) {
-		dsphys->ds_prev_snap_obj = clone_parent->ds_object;
+	if (origin) {
+		dsphys->ds_prev_snap_obj = origin->ds_object;
 		dsphys->ds_prev_snap_txg =
-		    clone_parent->ds_phys->ds_creation_txg;
+		    origin->ds_phys->ds_creation_txg;
 		dsphys->ds_used_bytes =
-		    clone_parent->ds_phys->ds_used_bytes;
+		    origin->ds_phys->ds_used_bytes;
 		dsphys->ds_compressed_bytes =
-		    clone_parent->ds_phys->ds_compressed_bytes;
+		    origin->ds_phys->ds_compressed_bytes;
 		dsphys->ds_uncompressed_bytes =
-		    clone_parent->ds_phys->ds_uncompressed_bytes;
-		dsphys->ds_bp = clone_parent->ds_phys->ds_bp;
+		    origin->ds_phys->ds_uncompressed_bytes;
+		dsphys->ds_bp = origin->ds_phys->ds_bp;
 
-		dmu_buf_will_dirty(clone_parent->ds_dbuf, tx);
-		clone_parent->ds_phys->ds_num_children++;
+		dmu_buf_will_dirty(origin->ds_dbuf, tx);
+		origin->ds_phys->ds_num_children++;
 
 		dmu_buf_will_dirty(dd->dd_dbuf, tx);
-		dd->dd_phys->dd_clone_parent_obj = clone_parent->ds_object;
+		dd->dd_phys->dd_origin_obj = origin->ds_object;
 	}
 	dmu_buf_rele(dbuf, FTAG);
 
 	dmu_buf_will_dirty(dd->dd_dbuf, tx);
 	dd->dd_phys->dd_head_dataset_obj = dsobj;
+
+	return (dsobj);
+}
+
+uint64_t
+dsl_dataset_create_sync(dsl_dir_t *pdd,
+    const char *lastname, dsl_dataset_t *origin, cred_t *cr, dmu_tx_t *tx)
+{
+	dsl_pool_t *dp = pdd->dd_pool;
+	uint64_t dsobj, ddobj;
+	dsl_dir_t *dd;
+
+	ASSERT(lastname[0] != '@');
+
+	ddobj = dsl_dir_create_sync(pdd, lastname, tx);
+	VERIFY(0 == dsl_dir_open_obj(dp, ddobj, lastname, FTAG, &dd));
+
+	dsobj = dsl_dataset_create_sync_impl(dd, origin, tx);
+
+	dsl_deleg_set_create_perms(dd, tx, cr);
+
 	dsl_dir_close(dd, FTAG);
 
 	return (dsobj);
@@ -713,36 +759,36 @@ dsl_snapshots_destroy(char *fsname, char *snapname)
 	return (err);
 }
 
+/*
+ * ds must be opened EXCLUSIVE or PRIMARY.  on return (whether
+ * successful or not), ds will be closed and caller can no longer
+ * dereference it.
+ */
 int
-dsl_dataset_destroy(const char *name)
+dsl_dataset_destroy(dsl_dataset_t *ds, void *tag)
 {
 	int err;
 	dsl_sync_task_group_t *dstg;
 	objset_t *os;
-	dsl_dataset_t *ds;
 	dsl_dir_t *dd;
 	uint64_t obj;
 
-	if (strchr(name, '@')) {
+	if (ds->ds_open_refcount != DS_REF_MAX) {
+		if (dsl_dataset_tryupgrade(ds, DS_MODE_PRIMARY,
+		    DS_MODE_EXCLUSIVE) == 0) {
+			dsl_dataset_close(ds, DS_MODE_PRIMARY, tag);
+			return (EBUSY);
+		}
+	}
+
+	if (dsl_dataset_is_snapshot(ds)) {
 		/* Destroying a snapshot is simpler */
-		err = dsl_dataset_open(name,
-		    DS_MODE_EXCLUSIVE | DS_MODE_READONLY | DS_MODE_INCONSISTENT,
-		    FTAG, &ds);
-		if (err)
-			return (err);
 		err = dsl_sync_task_do(ds->ds_dir->dd_pool,
 		    dsl_dataset_destroy_check, dsl_dataset_destroy_sync,
-		    ds, FTAG, 0);
-		if (err)
-			dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG);
-		return (err);
+		    ds, tag, 0);
+		goto out;
 	}
 
-	err = dmu_objset_open(name, DMU_OST_ANY,
-	    DS_MODE_EXCLUSIVE | DS_MODE_INCONSISTENT, &os);
-	if (err)
-		return (err);
-	ds = os->os->os_dsl_dataset;
 	dd = ds->ds_dir;
 
 	/*
@@ -751,10 +797,12 @@ dsl_dataset_destroy(const char *name)
 	 */
 	err = dsl_sync_task_do(dd->dd_pool, dsl_dataset_destroy_begin_check,
 	    dsl_dataset_destroy_begin_sync, ds, NULL, 0);
-	if (err) {
-		dmu_objset_close(os);
-		return (err);
-	}
+	if (err)
+		goto out;
+
+	err = dmu_objset_open_ds(ds, DMU_OST_ANY, &os);
+	if (err)
+		goto out;
 
 	/*
 	 * remove the objects in open context, so that we won't
@@ -783,45 +831,47 @@ dsl_dataset_destroy(const char *name)
 
 	dmu_objset_close(os);
 	if (err != ESRCH)
-		return (err);
-
-	err = dsl_dataset_open(name,
-	    DS_MODE_EXCLUSIVE | DS_MODE_READONLY | DS_MODE_INCONSISTENT,
-	    FTAG, &ds);
-	if (err)
-		return (err);
+		goto out;
 
-	err = dsl_dir_open(name, FTAG, &dd, NULL);
-	if (err) {
-		dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG);
-		return (err);
+	if (ds->ds_user_ptr) {
+		ds->ds_user_evict_func(ds, ds->ds_user_ptr);
+		ds->ds_user_ptr = NULL;
 	}
 
+	rw_enter(&dd->dd_pool->dp_config_rwlock, RW_READER);
+	err = dsl_dir_open_obj(dd->dd_pool, dd->dd_object, NULL, FTAG, &dd);
+	rw_exit(&dd->dd_pool->dp_config_rwlock);
+
+	if (err)
+		goto out;
+
 	/*
 	 * Blow away the dsl_dir + head dataset.
 	 */
 	dstg = dsl_sync_task_group_create(ds->ds_dir->dd_pool);
 	dsl_sync_task_create(dstg, dsl_dataset_destroy_check,
-	    dsl_dataset_destroy_sync, ds, FTAG, 0);
+	    dsl_dataset_destroy_sync, ds, tag, 0);
 	dsl_sync_task_create(dstg, dsl_dir_destroy_check,
 	    dsl_dir_destroy_sync, dd, FTAG, 0);
 	err = dsl_sync_task_group_wait(dstg);
 	dsl_sync_task_group_destroy(dstg);
 	/* if it is successful, *destroy_sync will close the ds+dd */
-	if (err) {
-		dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG);
+	if (err)
 		dsl_dir_close(dd, FTAG);
-	}
+out:
+	if (err)
+		dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, tag);
 	return (err);
 }
 
 int
-dsl_dataset_rollback(dsl_dataset_t *ds)
+dsl_dataset_rollback(dsl_dataset_t *ds, dmu_objset_type_t ost)
 {
 	ASSERT3U(ds->ds_open_refcount, ==, DS_REF_MAX);
+
 	return (dsl_sync_task_do(ds->ds_dir->dd_pool,
 	    dsl_dataset_rollback_check, dsl_dataset_rollback_sync,
-	    ds, NULL, 0));
+	    ds, &ost, 0));
 }
 
 void *
@@ -927,14 +977,12 @@ static int
 dsl_dataset_rollback_check(void *arg1, void *arg2, dmu_tx_t *tx)
 {
 	dsl_dataset_t *ds = arg1;
+	dmu_objset_type_t *ost = arg2;
 
 	/*
-	 * There must be a previous snapshot.  I suppose we could roll
-	 * it back to being empty (and re-initialize the upper (ZPL)
-	 * layer).  But for now there's no way to do this via the user
-	 * interface.
+	 * We can only roll back to emptyness if it is a ZPL objset.
 	 */
-	if (ds->ds_phys->ds_prev_snap_txg == 0)
+	if (*ost != DMU_OST_ZFS && ds->ds_phys->ds_prev_snap_txg == 0)
 		return (EINVAL);
 
 	/*
@@ -958,17 +1006,29 @@ static void
 dsl_dataset_rollback_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
 {
 	dsl_dataset_t *ds = arg1;
+	dmu_objset_type_t *ost = arg2;
 	objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
 
 	dmu_buf_will_dirty(ds->ds_dbuf, tx);
 
 	/*
 	 * Before the roll back destroy the zil.
-	 * Note, ds_user_ptr can be null if we are doing a "zfs receive -F"
 	 */
 	if (ds->ds_user_ptr != NULL) {
 		zil_rollback_destroy(
 		    ((objset_impl_t *)ds->ds_user_ptr)->os_zil, tx);
+
+		/*
+		 * We need to make sure that the objset_impl_t is reopened after
+		 * we do the rollback, otherwise it will have the wrong
+		 * objset_phys_t.  Normally this would happen when this
+		 * DS_MODE_EXCLUSIVE dataset-open is closed, thus causing the
+		 * dataset to be immediately evicted.  But when doing "zfs recv
+		 * -F", we reopen the objset before that, so that there is no
+		 * window where the dataset is closed and inconsistent.
+		 */
+		ds->ds_user_evict_func(ds, ds->ds_user_ptr);
+		ds->ds_user_ptr = NULL;
 	}
 
 	/* Zero out the deadlist. */
@@ -1000,20 +1060,34 @@ dsl_dataset_rollback_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
 		    -used, -compressed, -uncompressed, tx);
 	}
 
-	/* Change our contents to that of the prev snapshot */
-	ASSERT3U(ds->ds_prev->ds_object, ==, ds->ds_phys->ds_prev_snap_obj);
-	ds->ds_phys->ds_bp = ds->ds_prev->ds_phys->ds_bp;
-	ds->ds_phys->ds_used_bytes = ds->ds_prev->ds_phys->ds_used_bytes;
-	ds->ds_phys->ds_compressed_bytes =
-	    ds->ds_prev->ds_phys->ds_compressed_bytes;
-	ds->ds_phys->ds_uncompressed_bytes =
-	    ds->ds_prev->ds_phys->ds_uncompressed_bytes;
-	ds->ds_phys->ds_flags = ds->ds_prev->ds_phys->ds_flags;
-	ds->ds_phys->ds_unique_bytes = 0;
+	if (ds->ds_prev) {
+		/* Change our contents to that of the prev snapshot */
+		ASSERT3U(ds->ds_prev->ds_object, ==,
+		    ds->ds_phys->ds_prev_snap_obj);
+		ds->ds_phys->ds_bp = ds->ds_prev->ds_phys->ds_bp;
+		ds->ds_phys->ds_used_bytes =
+		    ds->ds_prev->ds_phys->ds_used_bytes;
+		ds->ds_phys->ds_compressed_bytes =
+		    ds->ds_prev->ds_phys->ds_compressed_bytes;
+		ds->ds_phys->ds_uncompressed_bytes =
+		    ds->ds_prev->ds_phys->ds_uncompressed_bytes;
+		ds->ds_phys->ds_flags = ds->ds_prev->ds_phys->ds_flags;
+		ds->ds_phys->ds_unique_bytes = 0;
 
-	if (ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) {
-		dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx);
-		ds->ds_prev->ds_phys->ds_unique_bytes = 0;
+		if (ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) {
+			dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx);
+			ds->ds_prev->ds_phys->ds_unique_bytes = 0;
+		}
+	} else {
+		/* Zero out our contents, recreate objset */
+		bzero(&ds->ds_phys->ds_bp, sizeof (blkptr_t));
+		ds->ds_phys->ds_used_bytes = 0;
+		ds->ds_phys->ds_compressed_bytes = 0;
+		ds->ds_phys->ds_uncompressed_bytes = 0;
+		ds->ds_phys->ds_flags = 0;
+		ds->ds_phys->ds_unique_bytes = 0;
+		(void) dmu_objset_create_impl(ds->ds_dir->dd_pool->dp_spa, ds,
+		    &ds->ds_phys->ds_bp, *ost, tx);
 	}
 
 	spa_history_internal_log(LOG_DS_ROLLBACK, ds->ds_dir->dd_pool->dp_spa,
@@ -1025,6 +1099,9 @@ static int
 dsl_dataset_destroy_begin_check(void *arg1, void *arg2, dmu_tx_t *tx)
 {
 	dsl_dataset_t *ds = arg1;
+	objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
+	uint64_t count;
+	int err;
 
 	/*
 	 * Can't delete a head dataset if there are snapshots of it.
@@ -1035,6 +1112,17 @@ dsl_dataset_destroy_begin_check(void *arg1, void *arg2, dmu_tx_t *tx)
 	    ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object)
 		return (EINVAL);
 
+	/*
+	 * This is really a dsl_dir thing, but check it here so that
+	 * we'll be less likely to leave this dataset inconsistent &
+	 * nearly destroyed.
+	 */
+	err = zap_count(mos, ds->ds_dir->dd_phys->dd_child_dir_zapobj, &count);
+	if (err)
+		return (err);
+	if (count != 0)
+		return (EEXIST);
+
 	return (0);
 }
 
@@ -1054,7 +1142,7 @@ dsl_dataset_destroy_begin_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
 }
 
 /* ARGSUSED */
-static int
+int
 dsl_dataset_destroy_check(void *arg1, void *arg2, dmu_tx_t *tx)
 {
 	dsl_dataset_t *ds = arg1;
@@ -1083,7 +1171,7 @@ dsl_dataset_destroy_check(void *arg1, void *arg2, dmu_tx_t *tx)
 	return (0);
 }
 
-static void
+void
 dsl_dataset_destroy_sync(void *arg1, void *tag, cred_t *cr, dmu_tx_t *tx)
 {
 	dsl_dataset_t *ds = arg1;
@@ -1337,8 +1425,7 @@ dsl_dataset_destroy_sync(void *arg1, void *tag, cred_t *cr, dmu_tx_t *tx)
 int
 dsl_dataset_snapshot_check(void *arg1, void *arg2, dmu_tx_t *tx)
 {
-	objset_t *os = arg1;
-	dsl_dataset_t *ds = os->os->os_dsl_dataset;
+	dsl_dataset_t *ds = arg1;
 	const char *snapname = arg2;
 	objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
 	int err;
@@ -1375,8 +1462,7 @@ dsl_dataset_snapshot_check(void *arg1, void *arg2, dmu_tx_t *tx)
 void
 dsl_dataset_snapshot_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
 {
-	objset_t *os = arg1;
-	dsl_dataset_t *ds = os->os->os_dsl_dataset;
+	dsl_dataset_t *ds = arg1;
 	const char *snapname = arg2;
 	dsl_pool_t *dp = ds->ds_dir->dd_pool;
 	dmu_buf_t *dbuf;
@@ -1499,20 +1585,21 @@ dsl_dataset_fast_stat(dsl_dataset_t *ds, dmu_objset_stats_t *stat)
 {
 	stat->dds_creation_txg = ds->ds_phys->ds_creation_txg;
 	stat->dds_inconsistent = ds->ds_phys->ds_flags & DS_FLAG_INCONSISTENT;
+	stat->dds_guid = ds->ds_phys->ds_guid;
 	if (ds->ds_phys->ds_next_snap_obj) {
 		stat->dds_is_snapshot = B_TRUE;
 		stat->dds_num_clones = ds->ds_phys->ds_num_children - 1;
 	}
 
 	/* clone origin is really a dsl_dir thing... */
-	if (ds->ds_dir->dd_phys->dd_clone_parent_obj) {
+	if (ds->ds_dir->dd_phys->dd_origin_obj) {
 		dsl_dataset_t *ods;
 
 		rw_enter(&ds->ds_dir->dd_pool->dp_config_rwlock, RW_READER);
 		VERIFY(0 == dsl_dataset_open_obj(ds->ds_dir->dd_pool,
-		    ds->ds_dir->dd_phys->dd_clone_parent_obj,
+		    ds->ds_dir->dd_phys->dd_origin_obj,
 		    NULL, DS_MODE_NONE, FTAG, &ods));
-		dsl_dataset_name(ods, stat->dds_clone_of);
+		dsl_dataset_name(ods, stat->dds_origin);
 		dsl_dataset_close(ods, DS_MODE_NONE, FTAG);
 		rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock);
 	}
@@ -1808,9 +1895,9 @@ dsl_dataset_promote_check(void *arg1, void *arg2, dmu_tx_t *tx)
 	struct promotearg *pa = arg2;
 	dsl_dir_t *dd = hds->ds_dir;
 	dsl_pool_t *dp = hds->ds_dir->dd_pool;
-	dsl_dir_t *pdd = NULL;
+	dsl_dir_t *odd = NULL;
 	dsl_dataset_t *ds = NULL;
-	dsl_dataset_t *pivot_ds = NULL;
+	dsl_dataset_t *origin_ds = NULL;
 	dsl_dataset_t *newnext_ds = NULL;
 	int err;
 	char *name = NULL;
@@ -1820,23 +1907,22 @@ dsl_dataset_promote_check(void *arg1, void *arg2, dmu_tx_t *tx)
 	bzero(pa, sizeof (*pa));
 
 	/* Check that it is a clone */
-	if (dd->dd_phys->dd_clone_parent_obj == 0)
+	if (dd->dd_phys->dd_origin_obj == 0)
 		return (EINVAL);
 
 	/* Since this is so expensive, don't do the preliminary check */
 	if (!dmu_tx_is_syncing(tx))
 		return (0);
 
-	if (err = dsl_dataset_open_obj(dp,
-	    dd->dd_phys->dd_clone_parent_obj,
-	    NULL, DS_MODE_EXCLUSIVE, FTAG, &pivot_ds))
+	if (err = dsl_dataset_open_obj(dp, dd->dd_phys->dd_origin_obj,
+	    NULL, DS_MODE_EXCLUSIVE, FTAG, &origin_ds))
 		goto out;
-	pdd = pivot_ds->ds_dir;
+	odd = origin_ds->ds_dir;
 
 	{
 		dsl_dataset_t *phds;
 		if (err = dsl_dataset_open_obj(dd->dd_pool,
-		    pdd->dd_phys->dd_head_dataset_obj,
+		    odd->dd_phys->dd_head_dataset_obj,
 		    NULL, DS_MODE_NONE, FTAG, &phds))
 			goto out;
 		pa->snapnames_obj = phds->ds_phys->ds_snapnames_zapobj;
@@ -1848,10 +1934,10 @@ dsl_dataset_promote_check(void *arg1, void *arg2, dmu_tx_t *tx)
 		goto out;
 	}
 
-	/* find pivot point's new next ds */
+	/* find origin's new next ds */
 	VERIFY(0 == dsl_dataset_open_obj(dd->dd_pool, hds->ds_object,
 	    NULL, DS_MODE_NONE, FTAG, &newnext_ds));
-	while (newnext_ds->ds_phys->ds_prev_snap_obj != pivot_ds->ds_object) {
+	while (newnext_ds->ds_phys->ds_prev_snap_obj != origin_ds->ds_object) {
 		dsl_dataset_t *prev;
 
 		if (err = dsl_dataset_open_obj(dd->dd_pool,
@@ -1863,10 +1949,10 @@ dsl_dataset_promote_check(void *arg1, void *arg2, dmu_tx_t *tx)
 	}
 	pa->newnext_obj = newnext_ds->ds_object;
 
-	/* compute pivot point's new unique space */
+	/* compute origin's new unique space */
 	while ((err = bplist_iterate(&newnext_ds->ds_deadlist,
 	    &itor, &bp)) == 0) {
-		if (bp.blk_birth > pivot_ds->ds_phys->ds_prev_snap_txg)
+		if (bp.blk_birth > origin_ds->ds_phys->ds_prev_snap_txg)
 			pa->unique += bp_get_dasize(dd->dd_pool->dp_spa, &bp);
 	}
 	if (err != ENOENT)
@@ -1874,7 +1960,7 @@ dsl_dataset_promote_check(void *arg1, void *arg2, dmu_tx_t *tx)
 
 	/* Walk the snapshots that we are moving */
 	name = kmem_alloc(MAXPATHLEN, KM_SLEEP);
-	ds = pivot_ds;
+	ds = origin_ds;
 	/* CONSTCOND */
 	while (TRUE) {
 		uint64_t val, dlused, dlcomp, dluncomp;
@@ -1922,19 +2008,19 @@ dsl_dataset_promote_check(void *arg1, void *arg2, dmu_tx_t *tx)
 			dsl_dataset_close(prev, DS_MODE_EXCLUSIVE, FTAG);
 			break;
 		}
-		if (ds != pivot_ds)
+		if (ds != origin_ds)
 			dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG);
 		ds = prev;
 	}
 
 	/* Check that there is enough space here */
-	err = dsl_dir_transfer_possible(pdd, dd, pa->used);
+	err = dsl_dir_transfer_possible(odd, dd, pa->used);
 
 out:
-	if (ds && ds != pivot_ds)
+	if (ds && ds != origin_ds)
 		dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG);
-	if (pivot_ds)
-		dsl_dataset_close(pivot_ds, DS_MODE_EXCLUSIVE, FTAG);
+	if (origin_ds)
+		dsl_dataset_close(origin_ds, DS_MODE_EXCLUSIVE, FTAG);
 	if (newnext_ds)
 		dsl_dataset_close(newnext_ds, DS_MODE_NONE, FTAG);
 	if (name)
@@ -1949,26 +2035,25 @@ dsl_dataset_promote_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
 	struct promotearg *pa = arg2;
 	dsl_dir_t *dd = hds->ds_dir;
 	dsl_pool_t *dp = hds->ds_dir->dd_pool;
-	dsl_dir_t *pdd = NULL;
-	dsl_dataset_t *ds, *pivot_ds;
+	dsl_dir_t *odd = NULL;
+	dsl_dataset_t *ds, *origin_ds;
 	char *name;
 
-	ASSERT(dd->dd_phys->dd_clone_parent_obj != 0);
+	ASSERT(dd->dd_phys->dd_origin_obj != 0);
 	ASSERT(0 == (hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE));
 
-	VERIFY(0 == dsl_dataset_open_obj(dp,
-	    dd->dd_phys->dd_clone_parent_obj,
-	    NULL, DS_MODE_EXCLUSIVE, FTAG, &pivot_ds));
+	VERIFY(0 == dsl_dataset_open_obj(dp, dd->dd_phys->dd_origin_obj,
+	    NULL, DS_MODE_EXCLUSIVE, FTAG, &origin_ds));
 	/*
-	 * We need to explicitly open pdd, since pivot_ds's pdd will be
+	 * We need to explicitly open odd, since origin_ds's dd will be
 	 * changing.
 	 */
-	VERIFY(0 == dsl_dir_open_obj(dp, pivot_ds->ds_dir->dd_object,
-	    NULL, FTAG, &pdd));
+	VERIFY(0 == dsl_dir_open_obj(dp, origin_ds->ds_dir->dd_object,
+	    NULL, FTAG, &odd));
 
 	/* move snapshots to this dir */
 	name = kmem_alloc(MAXPATHLEN, KM_SLEEP);
-	ds = pivot_ds;
+	ds = origin_ds;
 	/* CONSTCOND */
 	while (TRUE) {
 		dsl_dataset_t *prev;
@@ -1983,9 +2068,9 @@ dsl_dataset_promote_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
 
 		/* change containing dsl_dir */
 		dmu_buf_will_dirty(ds->ds_dbuf, tx);
-		ASSERT3U(ds->ds_phys->ds_dir_obj, ==, pdd->dd_object);
+		ASSERT3U(ds->ds_phys->ds_dir_obj, ==, odd->dd_object);
 		ds->ds_phys->ds_dir_obj = dd->dd_object;
-		ASSERT3P(ds->ds_dir, ==, pdd);
+		ASSERT3P(ds->ds_dir, ==, odd);
 		dsl_dir_close(ds->ds_dir, ds);
 		VERIFY(0 == dsl_dir_open_obj(dp, dd->dd_object,
 		    NULL, ds, &ds->ds_dir));
@@ -2003,35 +2088,35 @@ dsl_dataset_promote_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
 			dsl_dataset_close(prev, DS_MODE_EXCLUSIVE, FTAG);
 			break;
 		}
-		if (ds != pivot_ds)
+		if (ds != origin_ds)
 			dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG);
 		ds = prev;
 	}
-	if (ds != pivot_ds)
+	if (ds != origin_ds)
 		dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG);
 
-	/* change pivot point's next snap */
-	dmu_buf_will_dirty(pivot_ds->ds_dbuf, tx);
-	pivot_ds->ds_phys->ds_next_snap_obj = pa->newnext_obj;
+	/* change origin's next snap */
+	dmu_buf_will_dirty(origin_ds->ds_dbuf, tx);
+	origin_ds->ds_phys->ds_next_snap_obj = pa->newnext_obj;
 
-	/* change clone_parent-age */
+	/* change origin */
 	dmu_buf_will_dirty(dd->dd_dbuf, tx);
-	ASSERT3U(dd->dd_phys->dd_clone_parent_obj, ==, pivot_ds->ds_object);
-	dd->dd_phys->dd_clone_parent_obj = pdd->dd_phys->dd_clone_parent_obj;
-	dmu_buf_will_dirty(pdd->dd_dbuf, tx);
-	pdd->dd_phys->dd_clone_parent_obj = pivot_ds->ds_object;
+	ASSERT3U(dd->dd_phys->dd_origin_obj, ==, origin_ds->ds_object);
+	dd->dd_phys->dd_origin_obj = odd->dd_phys->dd_origin_obj;
+	dmu_buf_will_dirty(odd->dd_dbuf, tx);
+	odd->dd_phys->dd_origin_obj = origin_ds->ds_object;
 
 	/* change space accounting */
-	dsl_dir_diduse_space(pdd, -pa->used, -pa->comp, -pa->uncomp, tx);
+	dsl_dir_diduse_space(odd, -pa->used, -pa->comp, -pa->uncomp, tx);
 	dsl_dir_diduse_space(dd, pa->used, pa->comp, pa->uncomp, tx);
-	pivot_ds->ds_phys->ds_unique_bytes = pa->unique;
+	origin_ds->ds_phys->ds_unique_bytes = pa->unique;
 
 	/* log history record */
 	spa_history_internal_log(LOG_DS_PROMOTE, dd->dd_pool->dp_spa, tx,
 	    cr, "dataset = %llu", ds->ds_object);
 
-	dsl_dir_close(pdd, FTAG);
-	dsl_dataset_close(pivot_ds, DS_MODE_EXCLUSIVE, FTAG);
+	dsl_dir_close(odd, FTAG);
+	dsl_dataset_close(origin_ds, DS_MODE_EXCLUSIVE, FTAG);
 	kmem_free(name, MAXPATHLEN);
 }
 
@@ -2066,122 +2151,85 @@ dsl_dataset_promote(const char *name)
 	return (err);
 }
 
-#define	SWITCH64(x, y) \
-	{ \
-		uint64_t __tmp = (x); \
-		(x) = (y); \
-		(y) = __tmp; \
-	}
+struct cloneswaparg {
+	dsl_dataset_t *cds; /* clone dataset */
+	dsl_dataset_t *ohds; /* origin's head dataset */
+	boolean_t force;
+};
 
 /* ARGSUSED */
 static int
 dsl_dataset_clone_swap_check(void *arg1, void *arg2, dmu_tx_t *tx)
 {
-	dsl_dataset_t *cds = arg1;	/* clone to become new head */
-	boolean_t *forcep = arg2;
-	dsl_dir_t *cdd = cds->ds_dir;
-	dsl_pool_t *dp = cds->ds_dir->dd_pool;
-	dsl_dataset_t *ods;	/* the snapshot cds is cloned off of */
-	dsl_dataset_t *ohds = NULL;
-	dsl_dir_t *odd;
-	int err;
+	struct cloneswaparg *csa = arg1;
 
-	/* check that it is a clone */
-	if (cdd->dd_phys->dd_clone_parent_obj == 0)
+	/* they should both be heads */
+	if (dsl_dataset_is_snapshot(csa->cds) ||
+	    dsl_dataset_is_snapshot(csa->ohds))
 		return (EINVAL);
 
-	/* check that cds is not a snapshot */
-	if (dsl_dataset_is_snapshot(cds))
+	/* the branch point should be just before them */
+	if (csa->cds->ds_prev != csa->ohds->ds_prev)
 		return (EINVAL);
 
-	/* open the origin */
-	if (err = dsl_dataset_open_obj(dp, cdd->dd_phys->dd_clone_parent_obj,
-	    NULL, DS_MODE_STANDARD | DS_MODE_READONLY, FTAG, &ods))
-		return (err);
-	odd = ods->ds_dir;
-
-	/* make sure the clone is descendant of origin */
-	if (cdd->dd_parent != odd) {
-		err = EINVAL;
-		goto out;
-	}
+	/* cds should be the clone */
+	if (csa->cds->ds_prev->ds_phys->ds_next_snap_obj !=
+	    csa->ohds->ds_object)
+		return (EINVAL);
 
-	/* check that there are no snapshots after the origin */
-	if (cds->ds_phys->ds_prev_snap_obj != ods->ds_object ||
-	    ods->ds_phys->ds_next_snap_obj !=
-	    odd->dd_phys->dd_head_dataset_obj) {
-		err = EINVAL;
-		goto out;
-	}
+	/* the clone should be a child of the origin */
+	if (csa->cds->ds_dir->dd_parent != csa->ohds->ds_dir)
+		return (EINVAL);
 
-	/*
-	 * Verify origin head dataset hasn't been modified or
-	 * 'force' has been passed down.
-	 */
-	if (!(*forcep) &&
-	    (err = dsl_dataset_open_obj(cdd->dd_pool,
-	    odd->dd_phys->dd_head_dataset_obj, NULL, DS_MODE_EXCLUSIVE,
-	    FTAG, &ohds)) == 0) {
-		if (dsl_dataset_modified_since_lastsnap(ohds))
-			err = ETXTBSY;
-		dsl_dataset_close(ohds, DS_MODE_EXCLUSIVE, FTAG);
-	}
-out:
-	dsl_dataset_close(ods, DS_MODE_STANDARD, FTAG);
-	return (err);
+	/* ohds shouldn't be modified unless 'force' */
+	if (!csa->force && dsl_dataset_modified_since_lastsnap(csa->ohds))
+		return (ETXTBSY);
+	return (0);
 }
 
 /* ARGSUSED */
 static void
 dsl_dataset_clone_swap_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
 {
-	dsl_dataset_t *cds = arg1;	/* clone to become new head */
-	dsl_dir_t *cdd = cds->ds_dir;
-	dsl_pool_t *dp = cds->ds_dir->dd_pool;
-	dsl_dataset_t *ods, *ohds;
-	dsl_dir_t *odd;
+	struct cloneswaparg *csa = arg1;
+	dsl_pool_t *dp = csa->cds->ds_dir->dd_pool;
 	uint64_t itor = 0;
 	blkptr_t bp;
 	uint64_t unique = 0;
 	int err;
 
-	ASSERT(cdd->dd_phys->dd_clone_parent_obj != 0);
-	ASSERT(dsl_dataset_is_snapshot(cds) == 0);
-
-	/* open the origin */
-	VERIFY(0 == dsl_dataset_open_obj(dp, cdd->dd_phys->dd_clone_parent_obj,
-	    NULL, DS_MODE_STANDARD | DS_MODE_READONLY, FTAG, &ods));
-	odd = ods->ds_dir;
-	ASSERT(cds->ds_phys->ds_prev_snap_obj == ods->ds_object);
-	ASSERT(ods->ds_phys->ds_next_snap_obj ==
-	    odd->dd_phys->dd_head_dataset_obj);
+	dmu_buf_will_dirty(csa->cds->ds_dbuf, tx);
+	dmu_buf_will_dirty(csa->ohds->ds_dbuf, tx);
+	dmu_buf_will_dirty(csa->cds->ds_prev->ds_dbuf, tx);
 
-	/* open the origin head */
-	VERIFY(0 == dsl_dataset_open_obj(cdd->dd_pool,
-	    odd->dd_phys->dd_head_dataset_obj, NULL, DS_MODE_EXCLUSIVE,
-	    FTAG, &ohds));
-	ASSERT(odd == ohds->ds_dir);
+	if (csa->cds->ds_user_ptr != NULL) {
+		csa->cds->ds_user_evict_func(csa->cds, csa->cds->ds_user_ptr);
+		csa->cds->ds_user_ptr = NULL;
+	}
 
-	dmu_buf_will_dirty(cds->ds_dbuf, tx);
-	dmu_buf_will_dirty(ohds->ds_dbuf, tx);
-	dmu_buf_will_dirty(ods->ds_dbuf, tx);
+	if (csa->ohds->ds_user_ptr != NULL) {
+		csa->ohds->ds_user_evict_func(csa->ohds,
+		    csa->ohds->ds_user_ptr);
+		csa->ohds->ds_user_ptr = NULL;
+	}
 
 	/* compute unique space */
-	while ((err = bplist_iterate(&cds->ds_deadlist, &itor, &bp)) == 0) {
-		if (bp.blk_birth > ods->ds_phys->ds_prev_snap_txg)
-			unique += bp_get_dasize(cdd->dd_pool->dp_spa, &bp);
+	while ((err = bplist_iterate(&csa->cds->ds_deadlist,
+	    &itor, &bp)) == 0) {
+		if (bp.blk_birth > csa->cds->ds_prev->ds_phys->ds_prev_snap_txg)
+			unique += bp_get_dasize(dp->dp_spa, &bp);
 	}
 	VERIFY(err == ENOENT);
 
 	/* reset origin's unique bytes */
-	ods->ds_phys->ds_unique_bytes = unique;
+	csa->cds->ds_prev->ds_phys->ds_unique_bytes = unique;
 
 	/* swap blkptrs */
 	{
 		blkptr_t tmp;
-		tmp = ohds->ds_phys->ds_bp;
-		ohds->ds_phys->ds_bp = cds->ds_phys->ds_bp;
-		cds->ds_phys->ds_bp = tmp;
+		tmp = csa->ohds->ds_phys->ds_bp;
+		csa->ohds->ds_phys->ds_bp = csa->cds->ds_phys->ds_bp;
+		csa->cds->ds_phys->ds_bp = tmp;
 	}
 
 	/* set dd_*_bytes */
@@ -2190,60 +2238,68 @@ dsl_dataset_clone_swap_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
 		uint64_t cdl_used, cdl_comp, cdl_uncomp;
 		uint64_t odl_used, odl_comp, odl_uncomp;
 
-		VERIFY(0 == bplist_space(&cds->ds_deadlist, &cdl_used,
+		VERIFY(0 == bplist_space(&csa->cds->ds_deadlist, &cdl_used,
 		    &cdl_comp, &cdl_uncomp));
-		VERIFY(0 == bplist_space(&ohds->ds_deadlist, &odl_used,
+		VERIFY(0 == bplist_space(&csa->ohds->ds_deadlist, &odl_used,
 		    &odl_comp, &odl_uncomp));
-		dused = cds->ds_phys->ds_used_bytes + cdl_used -
-		    (ohds->ds_phys->ds_used_bytes + odl_used);
-		dcomp = cds->ds_phys->ds_compressed_bytes + cdl_comp -
-		    (ohds->ds_phys->ds_compressed_bytes + odl_comp);
-		duncomp = cds->ds_phys->ds_uncompressed_bytes + cdl_uncomp -
-		    (ohds->ds_phys->ds_uncompressed_bytes + odl_uncomp);
-
-		dsl_dir_diduse_space(odd, dused, dcomp, duncomp, tx);
-		dsl_dir_diduse_space(cdd, -dused, -dcomp, -duncomp, tx);
+		dused = csa->cds->ds_phys->ds_used_bytes + cdl_used -
+		    (csa->ohds->ds_phys->ds_used_bytes + odl_used);
+		dcomp = csa->cds->ds_phys->ds_compressed_bytes + cdl_comp -
+		    (csa->ohds->ds_phys->ds_compressed_bytes + odl_comp);
+		duncomp = csa->cds->ds_phys->ds_uncompressed_bytes +
+		    cdl_uncomp -
+		    (csa->ohds->ds_phys->ds_uncompressed_bytes + odl_uncomp);
+
+		dsl_dir_diduse_space(csa->ohds->ds_dir,
+		    dused, dcomp, duncomp, tx);
+		dsl_dir_diduse_space(csa->cds->ds_dir,
+		    -dused, -dcomp, -duncomp, tx);
+	}
+
+#define	SWITCH64(x, y) \
+	{ \
+		uint64_t __tmp = (x); \
+		(x) = (y); \
+		(y) = __tmp; \
 	}
 
 	/* swap ds_*_bytes */
-	SWITCH64(ohds->ds_phys->ds_used_bytes, cds->ds_phys->ds_used_bytes);
-	SWITCH64(ohds->ds_phys->ds_compressed_bytes,
-	    cds->ds_phys->ds_compressed_bytes);
-	SWITCH64(ohds->ds_phys->ds_uncompressed_bytes,
-	    cds->ds_phys->ds_uncompressed_bytes);
+	SWITCH64(csa->ohds->ds_phys->ds_used_bytes,
+	    csa->cds->ds_phys->ds_used_bytes);
+	SWITCH64(csa->ohds->ds_phys->ds_compressed_bytes,
+	    csa->cds->ds_phys->ds_compressed_bytes);
+	SWITCH64(csa->ohds->ds_phys->ds_uncompressed_bytes,
+	    csa->cds->ds_phys->ds_uncompressed_bytes);
 
 	/* swap deadlists */
-	bplist_close(&cds->ds_deadlist);
-	bplist_close(&ohds->ds_deadlist);
-	SWITCH64(ohds->ds_phys->ds_deadlist_obj, cds->ds_phys->ds_deadlist_obj);
-	VERIFY(0 == bplist_open(&cds->ds_deadlist, dp->dp_meta_objset,
-	    cds->ds_phys->ds_deadlist_obj));
-	VERIFY(0 == bplist_open(&ohds->ds_deadlist, dp->dp_meta_objset,
-	    ohds->ds_phys->ds_deadlist_obj));
-
-	dsl_dataset_close(ohds, DS_MODE_EXCLUSIVE, FTAG);
-	dsl_dataset_close(ods, DS_MODE_STANDARD, FTAG);
+	bplist_close(&csa->cds->ds_deadlist);
+	bplist_close(&csa->ohds->ds_deadlist);
+	SWITCH64(csa->ohds->ds_phys->ds_deadlist_obj,
+	    csa->cds->ds_phys->ds_deadlist_obj);
+	VERIFY(0 == bplist_open(&csa->cds->ds_deadlist, dp->dp_meta_objset,
+	    csa->cds->ds_phys->ds_deadlist_obj));
+	VERIFY(0 == bplist_open(&csa->ohds->ds_deadlist, dp->dp_meta_objset,
+	    csa->ohds->ds_phys->ds_deadlist_obj));
 }
 
 /*
  * Swap the clone "cosname" with its origin head file system.
  */
 int
-dsl_dataset_clone_swap(const char *cosname, boolean_t force)
+dsl_dataset_clone_swap(dsl_dataset_t *clone, dsl_dataset_t *origin_head,
+    boolean_t force)
 {
-	dsl_dataset_t *ds;
-	int err;
+	struct cloneswaparg csa;
 
-	err = dsl_dataset_open(cosname,
-	    DS_MODE_EXCLUSIVE | DS_MODE_INCONSISTENT, FTAG, &ds);
-	if (err)
-		return (err);
+	ASSERT(clone->ds_open_refcount == DS_REF_MAX);
+	ASSERT(origin_head->ds_open_refcount == DS_REF_MAX);
 
-	err = dsl_sync_task_do(ds->ds_dir->dd_pool,
+	csa.cds = clone;
+	csa.ohds = origin_head;
+	csa.force = force;
+	return (dsl_sync_task_do(clone->ds_dir->dd_pool,
 	    dsl_dataset_clone_swap_check,
-	    dsl_dataset_clone_swap_sync, ds, &force, 9);
-	dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG);
-	return (err);
+	    dsl_dataset_clone_swap_sync, &csa, NULL, 9));
 }
 
 /*
diff --git a/usr/src/uts/common/fs/zfs/dsl_deleg.c b/usr/src/uts/common/fs/zfs/dsl_deleg.c
index 3a9ffa430d..e5d32bd5fc 100644
--- a/usr/src/uts/common/fs/zfs/dsl_deleg.c
+++ b/usr/src/uts/common/fs/zfs/dsl_deleg.c
@@ -151,36 +151,69 @@ dsl_deleg_can_unallow(char *ddname, nvlist_t *nvp, cred_t *cr)
 	return (0);
 }
 
-typedef struct {
-	nvlist_t *p_nvp;
-	boolean_t p_unset;
-} perm_args_t;
-
 static void
 dsl_deleg_set_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
 {
 	dsl_dir_t *dd = arg1;
-	perm_args_t *pa = arg2;
+	nvlist_t *nvp = arg2;
 	objset_t *mos = dd->dd_pool->dp_meta_objset;
 	nvpair_t *whopair = NULL;
 	uint64_t zapobj = dd->dd_phys->dd_deleg_zapobj;
 
 	if (zapobj == 0) {
-		if (pa->p_unset)
-			return;
 		dmu_buf_will_dirty(dd->dd_dbuf, tx);
 		zapobj = dd->dd_phys->dd_deleg_zapobj = zap_create(mos,
 		    DMU_OT_DSL_PERMS, DMU_OT_NONE, 0, tx);
 	}
 
-	while (whopair = nvlist_next_nvpair(pa->p_nvp, whopair)) {
+	while (whopair = nvlist_next_nvpair(nvp, whopair)) {
+		const char *whokey = nvpair_name(whopair);
+		nvlist_t *perms;
+		nvpair_t *permpair = NULL;
+		uint64_t jumpobj;
+
+		VERIFY(nvpair_value_nvlist(whopair, &perms) == 0);
+
+		if (zap_lookup(mos, zapobj, whokey, 8, 1, &jumpobj) != 0) {
+			jumpobj = zap_create(mos, DMU_OT_DSL_PERMS,
+			    DMU_OT_NONE, 0, tx);
+			VERIFY(zap_update(mos, zapobj,
+			    whokey, 8, 1, &jumpobj, tx) == 0);
+		}
+
+		while (permpair = nvlist_next_nvpair(perms, permpair)) {
+			const char *perm = nvpair_name(permpair);
+			uint64_t n = 0;
+
+			VERIFY(zap_update(mos, jumpobj,
+			    perm, 8, 1, &n, tx) == 0);
+			spa_history_internal_log(LOG_DS_PERM_UPDATE,
+			    dd->dd_pool->dp_spa, tx, cr,
+			    "%s %s dataset = %llu", whokey, perm,
+			    dd->dd_phys->dd_head_dataset_obj);
+		}
+	}
+}
+
+static void
+dsl_deleg_unset_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
+{
+	dsl_dir_t *dd = arg1;
+	nvlist_t *nvp = arg2;
+	objset_t *mos = dd->dd_pool->dp_meta_objset;
+	nvpair_t *whopair = NULL;
+	uint64_t zapobj = dd->dd_phys->dd_deleg_zapobj;
+
+	if (zapobj == 0)
+		return;
+
+	while (whopair = nvlist_next_nvpair(nvp, whopair)) {
 		const char *whokey = nvpair_name(whopair);
 		nvlist_t *perms;
 		nvpair_t *permpair = NULL;
 		uint64_t jumpobj;
 
 		if (nvpair_value_nvlist(whopair, &perms) != 0) {
-			ASSERT(pa->p_unset);
 			if (zap_lookup(mos, zapobj, whokey, 8,
 			    1, &jumpobj) == 0) {
 				(void) zap_remove(mos, zapobj, whokey, tx);
@@ -193,37 +226,21 @@ dsl_deleg_set_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
 			continue;
 		}
 
-		if (zap_lookup(mos, zapobj, whokey, 8, 1, &jumpobj) != 0) {
-			/*
-			 * If object doesn't exist and we are removing
-			 * it, then just continue to next item in nvlist
-			 */
-			if (pa->p_unset)
-				continue;
-			jumpobj = zap_create(mos, DMU_OT_DSL_PERMS,
-			    DMU_OT_NONE, 0, tx);
-			VERIFY(zap_update(mos, zapobj,
-			    whokey, 8, 1, &jumpobj, tx) == 0);
-		}
+		if (zap_lookup(mos, zapobj, whokey, 8, 1, &jumpobj) != 0)
+			continue;
 
 		while (permpair = nvlist_next_nvpair(perms, permpair)) {
 			const char *perm = nvpair_name(permpair);
 			uint64_t n = 0;
 
-			if (pa->p_unset) {
-				(void) zap_remove(mos, jumpobj, perm, tx);
-				if (zap_count(mos, jumpobj, &n) == 0 && !n) {
-					(void) zap_remove(mos, zapobj,
-					    whokey, tx);
-					VERIFY(0 == zap_destroy(mos,
-					    jumpobj, tx));
-				}
-			} else {
-				VERIFY(zap_update(mos, jumpobj,
-				    perm, 8, 1, &n, tx) == 0);
+			(void) zap_remove(mos, jumpobj, perm, tx);
+			if (zap_count(mos, jumpobj, &n) == 0 && n == 0) {
+				(void) zap_remove(mos, zapobj,
+				    whokey, tx);
+				VERIFY(0 == zap_destroy(mos,
+				    jumpobj, tx));
 			}
-			spa_history_internal_log((pa->p_unset == B_FALSE) ?
-			    LOG_DS_PERM_UPDATE : LOG_DS_PERM_REMOVE,
+			spa_history_internal_log(LOG_DS_PERM_REMOVE,
 			    dd->dd_pool->dp_spa, tx, cr,
 			    "%s %s dataset = %llu", whokey, perm,
 			    dd->dd_phys->dd_head_dataset_obj);
@@ -236,7 +253,6 @@ dsl_deleg_set(const char *ddname, nvlist_t *nvp, boolean_t unset)
 {
 	dsl_dir_t *dd;
 	int error;
-	perm_args_t pa;
 	nvpair_t *whopair = NULL;
 	int blocks_modified = 0;
 
@@ -253,11 +269,9 @@ dsl_deleg_set(const char *ddname, nvlist_t *nvp, boolean_t unset)
 	while (whopair = nvlist_next_nvpair(nvp, whopair))
 		blocks_modified++;
 
-	pa.p_nvp = nvp;
-	pa.p_unset = unset;
-
-	error = dsl_sync_task_do(dd->dd_pool, NULL, dsl_deleg_set_sync,
-	    dd, &pa, blocks_modified);
+	error = dsl_sync_task_do(dd->dd_pool, NULL,
+	    unset ? dsl_deleg_unset_sync : dsl_deleg_set_sync,
+	    dd, nvp, blocks_modified);
 	dsl_dir_close(dd, FTAG);
 
 	return (error);
diff --git a/usr/src/uts/common/fs/zfs/dsl_dir.c b/usr/src/uts/common/fs/zfs/dsl_dir.c
index 90c6ca4e15..d5e168e3b0 100644
--- a/usr/src/uts/common/fs/zfs/dsl_dir.c
+++ b/usr/src/uts/common/fs/zfs/dsl_dir.c
@@ -533,13 +533,13 @@ dsl_dir_stats(dsl_dir_t *dd, nvlist_t *nv)
 	    dd->dd_phys->dd_compressed_bytes));
 	mutex_exit(&dd->dd_lock);
 
-	if (dd->dd_phys->dd_clone_parent_obj) {
+	if (dd->dd_phys->dd_origin_obj) {
 		dsl_dataset_t *ds;
 		char buf[MAXNAMELEN];
 
 		rw_enter(&dd->dd_pool->dp_config_rwlock, RW_READER);
 		VERIFY(0 == dsl_dataset_open_obj(dd->dd_pool,
-		    dd->dd_phys->dd_clone_parent_obj,
+		    dd->dd_phys->dd_origin_obj,
 		    NULL, DS_MODE_NONE, FTAG, &ds));
 		dsl_dataset_name(ds, buf);
 		dsl_dataset_close(ds, DS_MODE_NONE, FTAG);
diff --git a/usr/src/uts/common/fs/zfs/dsl_pool.c b/usr/src/uts/common/fs/zfs/dsl_pool.c
index 7046254db8..6c615fa94c 100644
--- a/usr/src/uts/common/fs/zfs/dsl_pool.c
+++ b/usr/src/uts/common/fs/zfs/dsl_pool.c
@@ -70,7 +70,7 @@ dsl_pool_open_impl(spa_t *spa, uint64_t txg)
 	    offsetof(dsl_dir_t, dd_dirty_link));
 	txg_list_create(&dp->dp_sync_tasks,
 	    offsetof(dsl_sync_task_group_t, dstg_node));
-	list_create(&dp->dp_synced_objsets, sizeof (dsl_dataset_t),
+	list_create(&dp->dp_synced_datasets, sizeof (dsl_dataset_t),
 	    offsetof(dsl_dataset_t, ds_synced_link));
 
 	return (dp);
@@ -129,7 +129,7 @@ dsl_pool_close(dsl_pool_t *dp)
 
 	txg_list_destroy(&dp->dp_dirty_datasets);
 	txg_list_destroy(&dp->dp_dirty_dirs);
-	list_destroy(&dp->dp_synced_objsets);
+	list_destroy(&dp->dp_synced_datasets);
 
 	arc_flush();
 	txg_fini(dp);
@@ -181,7 +181,7 @@ dsl_pool_sync(dsl_pool_t *dp, uint64_t txg)
 	zio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED);
 	while (ds = txg_list_remove(&dp->dp_dirty_datasets, txg)) {
 		if (!list_link_active(&ds->ds_synced_link))
-			list_insert_tail(&dp->dp_synced_objsets, ds);
+			list_insert_tail(&dp->dp_synced_datasets, ds);
 		else
 			dmu_buf_rele(ds->ds_dbuf, ds);
 		dsl_dataset_sync(ds, zio, tx);
@@ -212,8 +212,8 @@ dsl_pool_zil_clean(dsl_pool_t *dp)
 {
 	dsl_dataset_t *ds;
 
-	while (ds = list_head(&dp->dp_synced_objsets)) {
-		list_remove(&dp->dp_synced_objsets, ds);
+	while (ds = list_head(&dp->dp_synced_datasets)) {
+		list_remove(&dp->dp_synced_datasets, ds);
 		ASSERT(ds->ds_user_ptr != NULL);
 		zil_clean(((objset_impl_t *)ds->ds_user_ptr)->os_zil);
 		dmu_buf_rele(ds->ds_dbuf, ds);
diff --git a/usr/src/uts/common/fs/zfs/sys/dmu.h b/usr/src/uts/common/fs/zfs/sys/dmu.h
index 3300e901a1..ee50db4b0e 100644
--- a/usr/src/uts/common/fs/zfs/sys/dmu.h
+++ b/usr/src/uts/common/fs/zfs/sys/dmu.h
@@ -161,6 +161,8 @@ void zfs_znode_byteswap(void *buf, size_t size);
  */
 int dmu_objset_open(const char *name, dmu_objset_type_t type, int mode,
     objset_t **osp);
+int dmu_objset_open_ds(struct dsl_dataset *ds, dmu_objset_type_t type,
+    objset_t **osp);
 void dmu_objset_close(objset_t *os);
 int dmu_objset_evict_dbufs(objset_t *os);
 int dmu_objset_create(const char *name, dmu_objset_type_t type,
@@ -486,10 +488,11 @@ void dmu_object_size_from_db(dmu_buf_t *db, uint32_t *blksize,
 typedef struct dmu_objset_stats {
 	uint64_t dds_num_clones; /* number of clones of this */
 	uint64_t dds_creation_txg;
+	uint64_t dds_guid;
 	dmu_objset_type_t dds_type;
 	uint8_t dds_is_snapshot;
 	uint8_t dds_inconsistent;
-	char dds_clone_of[MAXNAMELEN];
+	char dds_origin[MAXNAMELEN];
 } dmu_objset_stats_t;
 
 /*
@@ -578,11 +581,29 @@ typedef void (*dmu_traverse_cb_t)(objset_t *os, void *arg, struct blkptr *bp,
 void dmu_traverse_objset(objset_t *os, uint64_t txg_start,
     dmu_traverse_cb_t cb, void *arg);
 
-int dmu_sendbackup(objset_t *tosnap, objset_t *fromsnap, struct vnode *vp);
-int dmu_recvbackup(char *tosnap, struct drr_begin *drrb, uint64_t *sizep,
-    boolean_t force, boolean_t online, struct vnode *vp, uint64_t voffset,
-    char *cosname);
-int dmu_replay_end_snapshot(char *name, struct drr_begin *drrb);
+int dmu_sendbackup(objset_t *tosnap, objset_t *fromsnap, boolean_t fromorigin,
+    struct vnode *vp, offset_t *off);
+
+typedef struct dmu_recv_cookie {
+	/*
+	 * This structure is opaque!
+	 *
+	 * If logical and real are different, we are recving the stream
+	 * into the "real" temporary clone, and then switching it with
+	 * the "logical" target.
+	 */
+	struct dsl_dataset *drc_logical_ds;
+	struct dsl_dataset *drc_real_ds;
+	struct drr_begin *drc_drrb;
+	char *drc_tosnap;
+	boolean_t drc_newfs;
+	boolean_t drc_force;
+} dmu_recv_cookie_t;
+
+int dmu_recv_begin(char *tofs, char *tosnap, struct drr_begin *,
+    boolean_t force, objset_t *origin, boolean_t online, dmu_recv_cookie_t *);
+int dmu_recv_stream(dmu_recv_cookie_t *drc, struct vnode *vp, offset_t *voffp);
+int dmu_recv_end(dmu_recv_cookie_t *drc);
 
 /* CRC64 table */
 #define	ZFS_CRC64_POLY	0xC96C5795D7870F42ULL	/* ECMA-182, reflected form */
diff --git a/usr/src/uts/common/fs/zfs/sys/dsl_dataset.h b/usr/src/uts/common/fs/zfs/sys/dsl_dataset.h
index d02eba1ce7..53cee115e3 100644
--- a/usr/src/uts/common/fs/zfs/sys/dsl_dataset.h
+++ b/usr/src/uts/common/fs/zfs/sys/dsl_dataset.h
@@ -129,16 +129,23 @@ int dsl_dataset_open_obj(struct dsl_pool *dp, uint64_t dsobj,
     const char *tail, int mode, void *tag, dsl_dataset_t **);
 void dsl_dataset_name(dsl_dataset_t *ds, char *name);
 void dsl_dataset_close(dsl_dataset_t *ds, int mode, void *tag);
+void dsl_dataset_downgrade(dsl_dataset_t *ds, int oldmode, int newmode);
+boolean_t dsl_dataset_tryupgrade(dsl_dataset_t *ds, int oldmode, int newmode);
+uint64_t dsl_dataset_create_sync_impl(dsl_dir_t *dd, dsl_dataset_t *origin,
+    dmu_tx_t *tx);
 uint64_t dsl_dataset_create_sync(dsl_dir_t *pds,
-    const char *lastname, dsl_dataset_t *clone_parent, dmu_tx_t *tx);
-int dsl_dataset_destroy(const char *name);
+    const char *lastname, dsl_dataset_t *origin, cred_t *, dmu_tx_t *);
+int dsl_dataset_destroy(dsl_dataset_t *ds, void *tag);
 int dsl_snapshots_destroy(char *fsname, char *snapname);
+dsl_checkfunc_t dsl_dataset_destroy_check;
+dsl_syncfunc_t dsl_dataset_destroy_sync;
 dsl_checkfunc_t dsl_dataset_snapshot_check;
 dsl_syncfunc_t dsl_dataset_snapshot_sync;
-int dsl_dataset_rollback(dsl_dataset_t *ds);
+int dsl_dataset_rollback(dsl_dataset_t *ds, dmu_objset_type_t ost);
 int dsl_dataset_rename(char *name, const char *newname, boolean_t recursive);
 int dsl_dataset_promote(const char *name);
-int dsl_dataset_clone_swap(const char *name, boolean_t force);
+int dsl_dataset_clone_swap(dsl_dataset_t *clone, dsl_dataset_t *origin_head,
+    boolean_t force);
 
 void *dsl_dataset_set_user_ptr(dsl_dataset_t *ds,
     void *p, dsl_dataset_evict_func_t func);
diff --git a/usr/src/uts/common/fs/zfs/sys/dsl_dir.h b/usr/src/uts/common/fs/zfs/sys/dsl_dir.h
index bcab488f3b..d5db4c1d8d 100644
--- a/usr/src/uts/common/fs/zfs/sys/dsl_dir.h
+++ b/usr/src/uts/common/fs/zfs/sys/dsl_dir.h
@@ -44,7 +44,7 @@ typedef struct dsl_dir_phys {
 	uint64_t dd_creation_time; /* not actually used */
 	uint64_t dd_head_dataset_obj;
 	uint64_t dd_parent_obj;
-	uint64_t dd_clone_parent_obj;
+	uint64_t dd_origin_obj;
 	uint64_t dd_child_dir_zapobj;
 	/*
 	 * how much space our children are accounting for; for leaf
diff --git a/usr/src/uts/common/fs/zfs/sys/dsl_pool.h b/usr/src/uts/common/fs/zfs/sys/dsl_pool.h
index f7ec67a0e0..44adeea7c9 100644
--- a/usr/src/uts/common/fs/zfs/sys/dsl_pool.h
+++ b/usr/src/uts/common/fs/zfs/sys/dsl_pool.h
@@ -19,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -50,7 +50,7 @@ typedef struct dsl_pool {
 
 	/* No lock needed - sync context only */
 	blkptr_t dp_meta_rootbp;
-	list_t dp_synced_objsets;
+	list_t dp_synced_datasets;
 
 	/* Has its own locking */
 	tx_state_t dp_tx;
diff --git a/usr/src/uts/common/fs/zfs/sys/zfs_ioctl.h b/usr/src/uts/common/fs/zfs/sys/zfs_ioctl.h
index 93c8d76bc0..26f696f21b 100644
--- a/usr/src/uts/common/fs/zfs/sys/zfs_ioctl.h
+++ b/usr/src/uts/common/fs/zfs/sys/zfs_ioctl.h
@@ -47,9 +47,12 @@ extern "C" {
 #define	ZFS_SNAPDIR_HIDDEN		0
 #define	ZFS_SNAPDIR_VISIBLE		1
 
-#define	DMU_BACKUP_VERSION (1ULL)
+#define	DMU_BACKUP_STREAM_VERSION (1ULL)
+#define	DMU_BACKUP_HEADER_VERSION (2ULL)
 #define	DMU_BACKUP_MAGIC 0x2F5bacbacULL
 
+#define	DRR_FLAG_CLONE (1<<0)
+
 /*
  * zfs ioctl command structure
  */
@@ -58,14 +61,14 @@ typedef struct dmu_replay_record {
 		DRR_BEGIN, DRR_OBJECT, DRR_FREEOBJECTS,
 		DRR_WRITE, DRR_FREE, DRR_END,
 	} drr_type;
-	uint32_t drr_pad;
+	uint32_t drr_payloadlen;
 	union {
 		struct drr_begin {
 			uint64_t drr_magic;
 			uint64_t drr_version;
 			uint64_t drr_creation_time;
 			dmu_objset_type_t drr_type;
-			uint32_t drr_pad;
+			uint32_t drr_flags;
 			uint64_t drr_toguid;
 			uint64_t drr_fromguid;
 			char drr_toname[MAXNAMELEN];
@@ -131,6 +134,7 @@ typedef struct zfs_share {
 typedef struct zfs_cmd {
 	char		zc_name[MAXPATHLEN];
 	char		zc_value[MAXPATHLEN * 2];
+	char		zc_string[MAXNAMELEN];
 	uint64_t	zc_guid;
 	uint64_t	zc_nvlist_conf;		/* really (char *) */
 	uint64_t	zc_nvlist_conf_size;
diff --git a/usr/src/uts/common/fs/zfs/sys/zfs_znode.h b/usr/src/uts/common/fs/zfs/sys/zfs_znode.h
index b9e0c95290..fcd8574876 100644
--- a/usr/src/uts/common/fs/zfs/sys/zfs_znode.h
+++ b/usr/src/uts/common/fs/zfs/sys/zfs_znode.h
@@ -236,7 +236,7 @@ typedef struct znode {
 /*
  * ZFS_ENTER() is called on entry to each ZFS vnode and vfs operation.
  * ZFS_EXIT() must be called before exitting the vop.
- * ZFS_ENTER_VERIFY_ZP() does ZFS_ENTER plus verifies the znode is valid.
+ * ZFS_VERIFY_ZP() verifies the znode is valid.
  */
 #define	ZFS_ENTER(zfsvfs) \
 	{ \
@@ -249,14 +249,11 @@ typedef struct znode {
 
 #define	ZFS_EXIT(zfsvfs) rrw_exit(&(zfsvfs)->z_teardown_lock, FTAG)
 
-#define	ZFS_ENTER_VERIFY_ZP(zfsvfs, zp) \
-	{ \
-		ZFS_ENTER((zfsvfs)); \
-		if (!(zp)->z_dbuf_held) { \
-			ZFS_EXIT(zfsvfs); \
-			return (EIO); \
-		} \
-	}
+#define	ZFS_VERIFY_ZP(zp) \
+	if (!(zp)->z_dbuf_held) { \
+		ZFS_EXIT((zp)->z_zfsvfs); \
+		return (EIO); \
+	} \
 
 /*
  * Macros for dealing with dmu_buf_hold
diff --git a/usr/src/uts/common/fs/zfs/zfs_ioctl.c b/usr/src/uts/common/fs/zfs/zfs_ioctl.c
index 72e2524646..674e73406a 100644
--- a/usr/src/uts/common/fs/zfs/zfs_ioctl.c
+++ b/usr/src/uts/common/fs/zfs/zfs_ioctl.c
@@ -345,7 +345,7 @@ zfs_secpolicy_share(zfs_cmd_t *zc, cred_t *cr)
 	if (!INGLOBALZONE(curproc))
 		return (EPERM);
 
-	if (secpolicy_nfs(CRED()) == 0) {
+	if (secpolicy_nfs(cr) == 0) {
 		return (0);
 	} else {
 		vnode_t *vp;
@@ -477,7 +477,7 @@ zfs_secpolicy_promote(zfs_cmd_t *zc, cred_t *cr)
 
 		rw_enter(&dd->dd_pool->dp_config_rwlock, RW_READER);
 		error = dsl_dataset_open_obj(dd->dd_pool,
-		    dd->dd_phys->dd_clone_parent_obj, NULL,
+		    dd->dd_phys->dd_origin_obj, NULL,
 		    DS_MODE_NONE, FTAG, &pclone);
 		rw_exit(&dd->dd_pool->dp_config_rwlock);
 		if (error) {
@@ -1083,6 +1083,17 @@ zfs_ioc_vdev_setpath(zfs_cmd_t *zc)
 	return (error);
 }
 
+/*
+ * inputs:
+ * zc_name		name of filesystem
+ * zc_nvlist_dst_size	size of buffer for property nvlist
+ *
+ * outputs:
+ * zc_objset_stats	stats
+ * zc_nvlist_dst	property nvlist
+ * zc_nvlist_dst_size	size of property nvlist
+ * zc_value		alternate root
+ */
 static int
 zfs_ioc_objset_stats(zfs_cmd_t *zc)
 {
@@ -1133,6 +1144,19 @@ retry:
 	return (error);
 }
 
+/*
+ * inputs:
+ * zc_name		name of filesystem
+ * zc_cookie		zap cursor
+ * zc_nvlist_dst_size	size of buffer for property nvlist
+ *
+ * outputs:
+ * zc_name		name of next filesystem
+ * zc_objset_stats	stats
+ * zc_nvlist_dst	property nvlist
+ * zc_nvlist_dst_size	size of property nvlist
+ * zc_value		alternate root
+ */
 static int
 zfs_ioc_objset_version(zfs_cmd_t *zc)
 {
@@ -1226,6 +1250,19 @@ retry:
 	return (error);
 }
 
+/*
+ * inputs:
+ * zc_name		name of filesystem
+ * zc_cookie		zap cursor
+ * zc_nvlist_dst_size	size of buffer for property nvlist
+ *
+ * outputs:
+ * zc_name		name of next snapshot
+ * zc_objset_stats	stats
+ * zc_nvlist_dst	property nvlist
+ * zc_nvlist_dst_size	size of property nvlist
+ * zc_value		alternate root
+ */
 static int
 zfs_ioc_snapshot_list_next(zfs_cmd_t *zc)
 {
@@ -1270,6 +1307,10 @@ retry:
 	if (error == 0)
 		error = zfs_ioc_objset_stats(zc); /* fill in the stats */
 
+	/* if we failed, undo the @ that we tacked on to zc_name */
+	if (error != 0)
+		*strchr(zc->zc_name, '@') = '\0';
+
 	dmu_objset_close(os);
 	return (error);
 }
@@ -1435,6 +1476,14 @@ zfs_set_prop_nvlist(const char *name, nvlist_t *nvl)
 	return (0);
 }
 
+/*
+ * inputs:
+ * zc_name		name of filesystem
+ * zc_value		name of property to inherit
+ * zc_nvlist_src{_size}	nvlist of properties to apply
+ *
+ * outputs:		none
+ */
 static int
 zfs_ioc_set_prop(zfs_cmd_t *zc)
 {
@@ -1451,6 +1500,13 @@ zfs_ioc_set_prop(zfs_cmd_t *zc)
 	return (error);
 }
 
+/*
+ * inputs:
+ * zc_name		name of filesystem
+ * zc_value		name of property to inherit
+ *
+ * outputs:		none
+ */
 static int
 zfs_ioc_inherit_prop(zfs_cmd_t *zc)
 {
@@ -1553,6 +1609,14 @@ zfs_ioc_iscsi_perm_check(zfs_cmd_t *zc)
 	return (error);
 }
 
+/*
+ * inputs:
+ * zc_name		name of filesystem
+ * zc_nvlist_src{_size}	nvlist of delegated permissions
+ * zc_perm_action	allow/unallow flag
+ *
+ * outputs:		none
+ */
 static int
 zfs_ioc_set_fsacl(zfs_cmd_t *zc)
 {
@@ -1595,6 +1659,13 @@ zfs_ioc_set_fsacl(zfs_cmd_t *zc)
 	return (error);
 }
 
+/*
+ * inputs:
+ * zc_name		name of filesystem
+ *
+ * outputs:
+ * zc_nvlist_src{_size}	nvlist of delegated permissions
+ */
 static int
 zfs_ioc_get_fsacl(zfs_cmd_t *zc)
 {
@@ -1609,12 +1680,24 @@ zfs_ioc_get_fsacl(zfs_cmd_t *zc)
 	return (error);
 }
 
+/*
+ * inputs:
+ * zc_name		name of volume
+ *
+ * outputs:		none
+ */
 static int
 zfs_ioc_create_minor(zfs_cmd_t *zc)
 {
 	return (zvol_create_minor(zc->zc_name, ddi_driver_major(zfs_dip)));
 }
 
+/*
+ * inputs:
+ * zc_name		name of volume
+ *
+ * outputs:		none
+ */
 static int
 zfs_ioc_remove_minor(zfs_cmd_t *zc)
 {
@@ -1809,6 +1892,15 @@ zfs_normalization_get(const char *dataset, nvlist_t *proplist, int *norm,
 	return (0);
 }
 
+/*
+ * inputs:
+ * zc_objset_type	type of objset to create (fs vs zvol)
+ * zc_name		name of new objset
+ * zc_value		name of snapshot to clone from (may be empty)
+ * zc_nvlist_src{_size}	nvlist of properties to apply
+ *
+ * outputs:		none
+ */
 static int
 zfs_ioc_create(zfs_cmd_t *zc)
 {
@@ -1973,6 +2065,14 @@ zfs_ioc_create(zfs_cmd_t *zc)
 	return (error);
 }
 
+/*
+ * inputs:
+ * zc_name	name of filesystem
+ * zc_value	short name of snapshot
+ * zc_cookie	recursive flag
+ *
+ * outputs:	none
+ */
 static int
 zfs_ioc_snapshot(zfs_cmd_t *zc)
 {
@@ -2022,6 +2122,13 @@ zfs_unmount_snap(char *name, void *arg)
 	return (0);
 }
 
+/*
+ * inputs:
+ * zc_name	name of filesystem
+ * zc_value	short name of snapshot
+ *
+ * outputs:	none
+ */
 static int
 zfs_ioc_destroy_snaps(zfs_cmd_t *zc)
 {
@@ -2036,6 +2143,13 @@ zfs_ioc_destroy_snaps(zfs_cmd_t *zc)
 	return (dmu_snapshots_destroy(zc->zc_name, zc->zc_value));
 }
 
+/*
+ * inputs:
+ * zc_name		name of dataset to destroy
+ * zc_objset_type	type of objset
+ *
+ * outputs:		none
+ */
 static int
 zfs_ioc_destroy(zfs_cmd_t *zc)
 {
@@ -2048,12 +2162,26 @@ zfs_ioc_destroy(zfs_cmd_t *zc)
 	return (dmu_objset_destroy(zc->zc_name));
 }
 
+/*
+ * inputs:
+ * zc_name	name of snapshot to roll back to
+ *
+ * outputs:	none
+ */
 static int
 zfs_ioc_rollback(zfs_cmd_t *zc)
 {
 	return (dmu_objset_rollback(zc->zc_name));
 }
 
+/*
+ * inputs:
+ * zc_name	old name of dataset
+ * zc_value	new name of dataset
+ * zc_cookie	recursive flag (only valid for snapshots)
+ *
+ * outputs:	none
+ */
 static int
 zfs_ioc_rename(zfs_cmd_t *zc)
 {
@@ -2079,38 +2207,64 @@ zfs_ioc_rename(zfs_cmd_t *zc)
 	return (dmu_objset_rename(zc->zc_name, zc->zc_value, recursive));
 }
 
+/*
+ * inputs:
+ * zc_name		name of containing filesystem
+ * zc_nvlist_src{_size}	nvlist of properties to apply
+ * zc_value		name of snapshot to create
+ * zc_string		name of clone origin (if DRR_FLAG_CLONE)
+ * zc_cookie		file descriptor to recv from
+ * zc_begin_record	the BEGIN record of the stream (not byteswapped)
+ * zc_guid		force flag
+ *
+ * outputs:
+ * zc_cookie		number of bytes read
+ */
 static int
-zfs_ioc_recvbackup(zfs_cmd_t *zc)
+zfs_ioc_recv(zfs_cmd_t *zc)
 {
 	file_t *fp;
-	offset_t new_off;
 	objset_t *os;
+	dmu_recv_cookie_t drc;
 	zfsvfs_t *zfsvfs = NULL;
-	char *cp;
-	char cosname[MAXNAMELEN];
 	boolean_t force = (boolean_t)zc->zc_guid;
 	int error, fd;
+	offset_t off;
+	nvlist_t *props = NULL;
+	objset_t *origin = NULL;
+	char *tosnap;
+	char tofs[ZFS_MAXNAMELEN];
 
 	if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0 ||
 	    strchr(zc->zc_value, '@') == NULL ||
 	    strchr(zc->zc_value, '%'))
 		return (EINVAL);
 
+	(void) strcpy(tofs, zc->zc_value);
+	tosnap = strchr(tofs, '@');
+	*tosnap = '\0';
+	tosnap++;
+
+	if (zc->zc_nvlist_src != NULL &&
+	    (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
+	    &props)) != 0)
+		return (error);
+
 	fd = zc->zc_cookie;
 	fp = getf(fd);
-	if (fp == NULL)
+	if (fp == NULL) {
+		nvlist_free(props);
 		return (EBADF);
+	}
 
 	/*
 	 * Get the zfsvfs for the receiving objset. There
 	 * won't be one if we're operating on a zvol, if the
 	 * objset doesn't exist yet, or is not mounted.
 	 */
-	cp = strchr(zc->zc_value, '@');
-	*cp = '\0';
-	error = dmu_objset_open(zc->zc_value, DMU_OST_ANY,
+
+	error = dmu_objset_open(tofs, DMU_OST_ANY,
 	    DS_MODE_STANDARD | DS_MODE_READONLY, &os);
-	*cp = '@';
 	if (!error) {
 		if (dmu_objset_type(os) == DMU_OST_ZFS) {
 			mutex_enter(&os->os->os_user_ptr_lock);
@@ -2122,60 +2276,111 @@ zfs_ioc_recvbackup(zfs_cmd_t *zc)
 		dmu_objset_close(os);
 	}
 
-	error = dmu_recvbackup(zc->zc_value, &zc->zc_begin_record,
-	    &zc->zc_cookie, force, zfsvfs != NULL, fp->f_vnode,
-	    fp->f_offset, cosname);
+	if (zc->zc_string[0]) {
+		error = dmu_objset_open(zc->zc_string, DMU_OST_ANY,
+		    DS_MODE_STANDARD | DS_MODE_READONLY, &origin);
+		if (error) {
+			if (zfsvfs != NULL)
+				VFS_RELE(zfsvfs->z_vfs);
+			nvlist_free(props);
+			releasef(fd);
+			return (error);
+		}
+	}
+
+	error = dmu_recv_begin(tofs, tosnap, &zc->zc_begin_record,
+	    force, origin, zfsvfs != NULL, &drc);
+	if (origin)
+		dmu_objset_close(origin);
+	if (error) {
+		if (zfsvfs != NULL)
+			VFS_RELE(zfsvfs->z_vfs);
+		nvlist_free(props);
+		releasef(fd);
+		return (error);
+	}
 
 	/*
-	 * For incremental snapshots where we created a
-	 * temporary clone, we now swap zfsvfs::z_os with
-	 * the newly created and received "cosname".
+	 * If properties are supplied, they are to completely replace
+	 * the existing ones; "inherit" any existing properties.
 	 */
-	if (!error && zfsvfs != NULL) {
-		char osname[MAXNAMELEN];
-		int mode;
-
-		error = zfs_suspend_fs(zfsvfs, osname, &mode);
-		if (!error) {
-			int swap_err;
-			int snap_err = 0;
-
-			swap_err = dsl_dataset_clone_swap(cosname, force);
-			if (!swap_err) {
-				char *cp = strrchr(zc->zc_value, '@');
-
-				*cp = '\0';
-				snap_err = dmu_replay_end_snapshot(zc->zc_value,
-				    &zc->zc_begin_record);
-				*cp = '@';
+	if (props) {
+		objset_t *os;
+		nvlist_t *nv = NULL;
+
+		error = dmu_objset_open(tofs, DMU_OST_ANY,
+		    DS_MODE_STANDARD | DS_MODE_READONLY | DS_MODE_INCONSISTENT,
+		    &os);
+		if (error == 0) {
+			error = dsl_prop_get_all(os, &nv);
+			dmu_objset_close(os);
+		}
+		if (error == 0) {
+			nvpair_t *elem;
+			zfs_cmd_t zc2 = { 0 };
+
+			(void) strcpy(zc2.zc_name, tofs);
+			for (elem = nvlist_next_nvpair(nv, NULL); elem;
+			    elem = nvlist_next_nvpair(nv, elem)) {
+				(void) strcpy(zc2.zc_value, nvpair_name(elem));
+				if (zfs_secpolicy_inherit(&zc2, CRED()) == 0)
+					(void) zfs_ioc_inherit_prop(&zc2);
 			}
-			error = zfs_resume_fs(zfsvfs, osname, mode);
-			if (!error)
-				error = swap_err;
-			if (!error)
-				error = snap_err;
 		}
+		if (nv)
+			nvlist_free(nv);
+	}
+
+	/*
+	 * Set properties.  Note, we ignore errors.  Would be better to
+	 * do best-effort in zfs_set_prop_nvlist, too.
+	 */
+	(void) zfs_set_prop_nvlist(tofs, props);
+	nvlist_free(props);
+
+	off = fp->f_offset;
+	error = dmu_recv_stream(&drc, fp->f_vnode, &off);
 
-		/* destroy the clone we created */
-		(void) dmu_objset_destroy(cosname);
+	if (error == 0) {
+		if (zfsvfs != NULL) {
+			char osname[MAXNAMELEN];
+			int mode;
+
+			(void) zfs_suspend_fs(zfsvfs, osname, &mode);
+			error = dmu_recv_end(&drc);
+			error |= zfs_resume_fs(zfsvfs, osname, mode);
+		} else {
+			error = dmu_recv_end(&drc);
+		}
 	}
 	if (zfsvfs != NULL)
 		VFS_RELE(zfsvfs->z_vfs);
-	new_off = fp->f_offset + zc->zc_cookie;
-	if (VOP_SEEK(fp->f_vnode, fp->f_offset, &new_off, NULL) == 0)
-		fp->f_offset = new_off;
+
+	zc->zc_cookie = off - fp->f_offset;
+	if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
+		fp->f_offset = off;
 
 	releasef(fd);
 	return (error);
 }
 
+/*
+ * inputs:
+ * zc_name	name of snapshot to send
+ * zc_value	short name of incremental fromsnap (may be empty)
+ * zc_cookie	file descriptor to send stream to
+ * zc_obj	fromorigin flag (mutually exclusive with zc_value)
+ *
+ * outputs: none
+ */
 static int
-zfs_ioc_sendbackup(zfs_cmd_t *zc)
+zfs_ioc_send(zfs_cmd_t *zc)
 {
 	objset_t *fromsnap = NULL;
 	objset_t *tosnap;
 	file_t *fp;
 	int error;
+	offset_t off;
 
 	error = dmu_objset_open(zc->zc_name, DMU_OST_ANY,
 	    DS_MODE_STANDARD | DS_MODE_READONLY, &tosnap);
@@ -2207,8 +2412,11 @@ zfs_ioc_sendbackup(zfs_cmd_t *zc)
 		return (EBADF);
 	}
 
-	error = dmu_sendbackup(tosnap, fromsnap, fp->f_vnode);
+	off = fp->f_offset;
+	error = dmu_sendbackup(tosnap, fromsnap, zc->zc_obj, fp->f_vnode, &off);
 
+	if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
+		fp->f_offset = off;
 	releasef(zc->zc_cookie);
 	if (fromsnap)
 		dmu_objset_close(fromsnap);
@@ -2313,6 +2521,13 @@ zfs_ioc_clear(zfs_cmd_t *zc)
 	return (0);
 }
 
+/*
+ * inputs:
+ * zc_name	name of filesystem
+ * zc_value	name of origin snapshot
+ *
+ * outputs:	none
+ */
 static int
 zfs_ioc_promote(zfs_cmd_t *zc)
 {
@@ -2500,8 +2715,8 @@ static zfs_ioc_vec_t zfs_ioc_vec[] = {
 	{ zfs_ioc_destroy, zfs_secpolicy_destroy, DATASET_NAME, B_TRUE },
 	{ zfs_ioc_rollback, zfs_secpolicy_rollback, DATASET_NAME, B_TRUE },
 	{ zfs_ioc_rename, zfs_secpolicy_rename,	DATASET_NAME, B_TRUE },
-	{ zfs_ioc_recvbackup, zfs_secpolicy_receive, DATASET_NAME, B_TRUE },
-	{ zfs_ioc_sendbackup, zfs_secpolicy_send, DATASET_NAME, B_TRUE },
+	{ zfs_ioc_recv, zfs_secpolicy_receive, DATASET_NAME, B_TRUE },
+	{ zfs_ioc_send, zfs_secpolicy_send, DATASET_NAME, B_TRUE },
 	{ zfs_ioc_inject_fault,	zfs_secpolicy_inject, NO_NAME, B_FALSE },
 	{ zfs_ioc_clear_fault, zfs_secpolicy_inject, NO_NAME, B_FALSE },
 	{ zfs_ioc_inject_list_next, zfs_secpolicy_inject, NO_NAME, B_FALSE },
diff --git a/usr/src/uts/common/fs/zfs/zfs_vnops.c b/usr/src/uts/common/fs/zfs/zfs_vnops.c
index 573f746e72..9e94ef0560 100644
--- a/usr/src/uts/common/fs/zfs/zfs_vnops.c
+++ b/usr/src/uts/common/fs/zfs/zfs_vnops.c
@@ -88,9 +88,10 @@
  * to freed memory.  The example below illustrates the following Big Rules:
  *
  *  (1) A check must be made in each zfs thread for a mounted file system.
- *	This is done avoiding races using ZFS_ENTER(zfsvfs) or
- *      ZFS_ENTER_VERIFY(zfsvfs, zp).  A ZFS_EXIT(zfsvfs) is needed before
- *      all returns.
+ *	This is done avoiding races using ZFS_ENTER(zfsvfs).
+ *      A ZFS_EXIT(zfsvfs) is needed before all returns.  Any znodes
+ *      must be checked with ZFS_VERIFY_ZP(zp).  Both of these macros
+ *      can return EIO from the calling function.
  *
  *  (2)	VN_RELE() should always be the last thing except for zil_commit()
  *	(if necessary) and ZFS_EXIT(). This is for 3 reasons:
@@ -163,6 +164,7 @@
  *	ZFS_EXIT(zfsvfs);		// finished in zfs
  *	return (error);			// done, report error
  */
+
 /* ARGSUSED */
 static int
 zfs_open(vnode_t **vpp, int flag, cred_t *cr, caller_context_t *ct)
@@ -286,7 +288,8 @@ zfs_ioctl(vnode_t *vp, int com, intptr_t data, int flag, cred_t *cred,
 
 		zp = VTOZ(vp);
 		zfsvfs = zp->z_zfsvfs;
-		ZFS_ENTER_VERIFY_ZP(zfsvfs, zp);
+		ZFS_ENTER(zfsvfs);
+		ZFS_VERIFY_ZP(zp);
 
 		/* offset parameter is in/out */
 		error = zfs_holey(vp, com, &off);
@@ -432,7 +435,8 @@ zfs_read(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct)
 	int		error;
 	rl_t		*rl;
 
-	ZFS_ENTER_VERIFY_ZP(zfsvfs, zp);
+	ZFS_ENTER(zfsvfs);
+	ZFS_VERIFY_ZP(zp);
 	os = zfsvfs->z_os;
 
 	/*
@@ -625,7 +629,8 @@ zfs_write(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct)
 	if (limit == RLIM64_INFINITY || limit > MAXOFFSET_T)
 		limit = MAXOFFSET_T;
 
-	ZFS_ENTER_VERIFY_ZP(zfsvfs, zp);
+	ZFS_ENTER(zfsvfs);
+	ZFS_VERIFY_ZP(zp);
 	zilog = zfsvfs->z_log;
 
 	/*
@@ -951,7 +956,8 @@ zfs_access(vnode_t *vp, int mode, int flag, cred_t *cr,
 	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
 	int error;
 
-	ZFS_ENTER_VERIFY_ZP(zfsvfs, zp);
+	ZFS_ENTER(zfsvfs);
+	ZFS_VERIFY_ZP(zp);
 
 	if (flag & V_ACE_MASK)
 		error = zfs_zaccess(zp, mode, flag, B_FALSE, cr);
@@ -994,7 +1000,8 @@ zfs_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, struct pathname *pnp,
 	zfsvfs_t *zfsvfs = zdp->z_zfsvfs;
 	int	error;
 
-	ZFS_ENTER_VERIFY_ZP(zfsvfs, zdp);
+	ZFS_ENTER(zfsvfs);
+	ZFS_VERIFY_ZP(zdp);
 
 	*vpp = NULL;
 
@@ -1128,7 +1135,8 @@ zfs_create(vnode_t *dvp, char *name, vattr_t *vap, vcexcl_t excl,
 	    IS_EPHEMERAL(crgetuid(cr)) || IS_EPHEMERAL(crgetgid(cr))))
 		return (EINVAL);
 
-	ZFS_ENTER_VERIFY_ZP(zfsvfs, dzp);
+	ZFS_ENTER(zfsvfs);
+	ZFS_VERIFY_ZP(dzp);
 	os = zfsvfs->z_os;
 	zilog = zfsvfs->z_log;
 
@@ -1371,7 +1379,8 @@ zfs_remove(vnode_t *dvp, char *name, cred_t *cr, caller_context_t *ct,
 	int		error;
 	int		zflg = ZEXISTS;
 
-	ZFS_ENTER_VERIFY_ZP(zfsvfs, dzp);
+	ZFS_ENTER(zfsvfs);
+	ZFS_VERIFY_ZP(dzp);
 	zilog = zfsvfs->z_log;
 
 	if (flags & FIGNORECASE) {
@@ -1575,7 +1584,8 @@ zfs_mkdir(vnode_t *dvp, char *dirname, vattr_t *vap, vnode_t **vpp, cred_t *cr,
 	    IS_EPHEMERAL(crgetgid(cr))))
 		return (EINVAL);
 
-	ZFS_ENTER_VERIFY_ZP(zfsvfs, dzp);
+	ZFS_ENTER(zfsvfs);
+	ZFS_VERIFY_ZP(dzp);
 	zilog = zfsvfs->z_log;
 
 	if (dzp->z_phys->zp_flags & ZFS_XATTR) {
@@ -1721,7 +1731,8 @@ zfs_rmdir(vnode_t *dvp, char *name, vnode_t *cwd, cred_t *cr,
 	int		error;
 	int		zflg = ZEXISTS;
 
-	ZFS_ENTER_VERIFY_ZP(zfsvfs, dzp);
+	ZFS_ENTER(zfsvfs);
+	ZFS_VERIFY_ZP(dzp);
 	zilog = zfsvfs->z_log;
 
 	if (flags & FIGNORECASE)
@@ -1858,7 +1869,8 @@ zfs_readdir(vnode_t *vp, uio_t *uio, cred_t *cr, int *eofp,
 	int		error;
 	uint8_t		prefetch;
 
-	ZFS_ENTER_VERIFY_ZP(zfsvfs, zp);
+	ZFS_ENTER(zfsvfs);
+	ZFS_VERIFY_ZP(zp);
 
 	/*
 	 * If we are not given an eof variable,
@@ -2080,7 +2092,8 @@ zfs_fsync(vnode_t *vp, int syncflag, cred_t *cr, caller_context_t *ct)
 
 	(void) tsd_set(zfs_fsyncer_key, (void *)zfs_fsync_sync_cnt);
 
-	ZFS_ENTER_VERIFY_ZP(zfsvfs, zp);
+	ZFS_ENTER(zfsvfs);
+	ZFS_VERIFY_ZP(zp);
 	zil_commit(zfsvfs->z_log, zp->z_last_itx, zp->z_id);
 	ZFS_EXIT(zfsvfs);
 	return (0);
@@ -2116,7 +2129,8 @@ zfs_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr,
 	xoptattr_t *xoap = NULL;
 	boolean_t skipaclchk = (flags & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE;
 
-	ZFS_ENTER_VERIFY_ZP(zfsvfs, zp);
+	ZFS_ENTER(zfsvfs);
+	ZFS_VERIFY_ZP(zp);
 	pzp = zp->z_phys;
 
 	mutex_enter(&zp->z_lock);
@@ -2324,7 +2338,8 @@ zfs_setattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr,
 	if (mask & AT_NOSET)
 		return (EINVAL);
 
-	ZFS_ENTER_VERIFY_ZP(zfsvfs, zp);
+	ZFS_ENTER(zfsvfs);
+	ZFS_VERIFY_ZP(zp);
 
 	pzp = zp->z_phys;
 	zilog = zfsvfs->z_log;
@@ -2811,7 +2826,8 @@ zfs_rename(vnode_t *sdvp, char *snm, vnode_t *tdvp, char *tnm, cred_t *cr,
 	int		error = 0;
 	int		zflg = 0;
 
-	ZFS_ENTER_VERIFY_ZP(zfsvfs, sdzp);
+	ZFS_ENTER(zfsvfs);
+	ZFS_VERIFY_ZP(sdzp);
 	zilog = zfsvfs->z_log;
 
 	/*
@@ -2826,10 +2842,7 @@ zfs_rename(vnode_t *sdvp, char *snm, vnode_t *tdvp, char *tnm, cred_t *cr,
 	}
 
 	tdzp = VTOZ(tdvp);
-	if (!tdzp->z_dbuf_held) {
-		ZFS_EXIT(zfsvfs);
-		return (EIO);
-	}
+	ZFS_VERIFY_ZP(tdzp);
 	if (zfsvfs->z_case & ZFS_UTF8_ONLY && u8_validate(tnm,
 	    strlen(tnm), NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
 		ZFS_EXIT(zfsvfs);
@@ -3108,7 +3121,8 @@ zfs_symlink(vnode_t *dvp, char *name, vattr_t *vap, char *link, cred_t *cr,
 
 	ASSERT(vap->va_type == VLNK);
 
-	ZFS_ENTER_VERIFY_ZP(zfsvfs, dzp);
+	ZFS_ENTER(zfsvfs);
+	ZFS_VERIFY_ZP(dzp);
 	zilog = zfsvfs->z_log;
 
 	if (zfsvfs->z_case & ZFS_UTF8_ONLY && u8_validate(name, strlen(name),
@@ -3248,7 +3262,8 @@ zfs_readlink(vnode_t *vp, uio_t *uio, cred_t *cr, caller_context_t *ct)
 	size_t		bufsz;
 	int		error;
 
-	ZFS_ENTER_VERIFY_ZP(zfsvfs, zp);
+	ZFS_ENTER(zfsvfs);
+	ZFS_VERIFY_ZP(zp);
 
 	bufsz = (size_t)zp->z_phys->zp_size;
 	if (bufsz + sizeof (znode_phys_t) <= zp->z_dbuf->db_size) {
@@ -3305,7 +3320,8 @@ zfs_link(vnode_t *tdvp, vnode_t *svp, char *name, cred_t *cr,
 
 	ASSERT(tdvp->v_type == VDIR);
 
-	ZFS_ENTER_VERIFY_ZP(zfsvfs, dzp);
+	ZFS_ENTER(zfsvfs);
+	ZFS_VERIFY_ZP(dzp);
 	zilog = zfsvfs->z_log;
 
 	if (VOP_REALVP(svp, &realvp, ct) == 0)
@@ -3315,6 +3331,8 @@ zfs_link(vnode_t *tdvp, vnode_t *svp, char *name, cred_t *cr,
 		ZFS_EXIT(zfsvfs);
 		return (EXDEV);
 	}
+	szp = VTOZ(svp);
+	ZFS_VERIFY_ZP(szp);
 
 	if (zfsvfs->z_case & ZFS_UTF8_ONLY && u8_validate(name,
 	    strlen(name), NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
@@ -3324,11 +3342,6 @@ zfs_link(vnode_t *tdvp, vnode_t *svp, char *name, cred_t *cr,
 	if (flags & FIGNORECASE)
 		zf |= ZCILOOK;
 
-	szp = VTOZ(svp);
-	if (!szp->z_dbuf_held) {
-		ZFS_EXIT(zfsvfs);
-		return (EIO);
-	}
 top:
 	/*
 	 * We do not support links between attributes and non-attributes
@@ -3571,9 +3584,8 @@ zfs_putpage(vnode_t *vp, offset_t off, size_t len, int flags, cred_t *cr,
 	uint64_t	filesz;
 	int		error = 0;
 
-	ZFS_ENTER_VERIFY_ZP(zfsvfs, zp);
-
-	ASSERT(zp->z_dbuf_held && zp->z_phys);
+	ZFS_ENTER(zfsvfs);
+	ZFS_VERIFY_ZP(zp);
 
 	if (len == 0) {
 		/*
@@ -3712,7 +3724,8 @@ zfs_frlock(vnode_t *vp, int cmd, flock64_t *bfp, int flag, offset_t offset,
 	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
 	int error;
 
-	ZFS_ENTER_VERIFY_ZP(zfsvfs, zp);
+	ZFS_ENTER(zfsvfs);
+	ZFS_VERIFY_ZP(zp);
 
 	/*
 	 * We are following the UFS semantics with respect to mapcnt
@@ -3865,13 +3878,12 @@ zfs_getpage(vnode_t *vp, offset_t off, size_t len, uint_t *protp,
 	int		need_unlock = 0, err = 0;
 	offset_t	orig_off;
 
-	ZFS_ENTER_VERIFY_ZP(zfsvfs, zp);
+	ZFS_ENTER(zfsvfs);
+	ZFS_VERIFY_ZP(zp);
 
 	if (protp)
 		*protp = PROT_ALL;
 
-	ASSERT(zp->z_dbuf_held && zp->z_phys);
-
 	/* no faultahead (for now) */
 	if (pl == NULL) {
 		ZFS_EXIT(zfsvfs);
@@ -4004,7 +4016,8 @@ zfs_map(vnode_t *vp, offset_t off, struct as *as, caddr_t *addrp,
 	    ZFS_APPENDONLY)))
 		return (EPERM);
 
-	ZFS_ENTER_VERIFY_ZP(zfsvfs, zp);
+	ZFS_ENTER(zfsvfs);
+	ZFS_VERIFY_ZP(zp);
 
 	if (vp->v_flag & VNOMAP) {
 		ZFS_EXIT(zfsvfs);
@@ -4143,7 +4156,8 @@ zfs_space(vnode_t *vp, int cmd, flock64_t *bfp, int flag,
 	uint64_t	off, len;
 	int		error;
 
-	ZFS_ENTER_VERIFY_ZP(zfsvfs, zp);
+	ZFS_ENTER(zfsvfs);
+	ZFS_VERIFY_ZP(zp);
 
 top:
 	if (cmd != F_FREESP) {
@@ -4184,7 +4198,8 @@ zfs_fid(vnode_t *vp, fid_t *fidp, caller_context_t *ct)
 	zfid_short_t	*zfid;
 	int		size, i;
 
-	ZFS_ENTER_VERIFY_ZP(zfsvfs, zp);
+	ZFS_ENTER(zfsvfs);
+	ZFS_VERIFY_ZP(zp);
 	gen = (uint32_t)zp->z_gen;
 
 	size = (zfsvfs->z_parent != zfsvfs) ? LONG_FID_LEN : SHORT_FID_LEN;
@@ -4246,7 +4261,8 @@ zfs_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr,
 	case _PC_XATTR_EXISTS:
 		zp = VTOZ(vp);
 		zfsvfs = zp->z_zfsvfs;
-		ZFS_ENTER_VERIFY_ZP(zfsvfs, zp);
+		ZFS_ENTER(zfsvfs);
+		ZFS_VERIFY_ZP(zp);
 		*valp = 0;
 		error = zfs_dirent_lock(&dl, zp, "", &xzp,
 		    ZXATTR | ZEXISTS | ZSHARED, NULL, NULL);
@@ -4295,7 +4311,8 @@ zfs_getsecattr(vnode_t *vp, vsecattr_t *vsecp, int flag, cred_t *cr,
 	int error;
 	boolean_t skipaclchk = (flag & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE;
 
-	ZFS_ENTER_VERIFY_ZP(zfsvfs, zp);
+	ZFS_ENTER(zfsvfs);
+	ZFS_VERIFY_ZP(zp);
 	error = zfs_getacl(zp, vsecp, skipaclchk, cr);
 	ZFS_EXIT(zfsvfs);
 
@@ -4312,7 +4329,8 @@ zfs_setsecattr(vnode_t *vp, vsecattr_t *vsecp, int flag, cred_t *cr,
 	int error;
 	boolean_t skipaclchk = (flag & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE;
 
-	ZFS_ENTER_VERIFY_ZP(zfsvfs, zp);
+	ZFS_ENTER(zfsvfs);
+	ZFS_VERIFY_ZP(zp);
 	error = zfs_setacl(zp, vsecp, skipaclchk, cr);
 	ZFS_EXIT(zfsvfs);
 	return (error);
diff --git a/usr/src/uts/common/sys/fs/zfs.h b/usr/src/uts/common/sys/fs/zfs.h
index 608376ad7e..107224b5cd 100644
--- a/usr/src/uts/common/sys/fs/zfs.h
+++ b/usr/src/uts/common/sys/fs/zfs.h
@@ -496,8 +496,8 @@ typedef enum zfs_ioc {
 	ZFS_IOC_DESTROY,
 	ZFS_IOC_ROLLBACK,
 	ZFS_IOC_RENAME,
-	ZFS_IOC_RECVBACKUP,
-	ZFS_IOC_SENDBACKUP,
+	ZFS_IOC_RECV,
+	ZFS_IOC_SEND,
 	ZFS_IOC_INJECT_FAULT,
 	ZFS_IOC_CLEAR_FAULT,
 	ZFS_IOC_INJECT_LIST_NEXT,
author	ahrens <none@none>	2007-10-29 17:12:17 -0700
committer	ahrens <none@none>	2007-10-29 17:12:17 -0700
commit	3cb34c601f3ef3016f638574f5982e80c3735c71 (patch)
tree	bbaa202cdc73b80f8c5169f479ba79234553d4ba
parent	7451ee9355b4d9cafcf1bb6055bb01fc7bdaa1a1 (diff)
download	illumos-joyent-3cb34c601f3ef3016f638574f5982e80c3735c71.tar.gz