PSARC 2006/223 ZFS Hot Spares

PSARC 2006/303 ZFS Clone Promotion 6276916 support for "clone swap" 6288488 du reports misleading size on RAID-Z 6393490 libzfs should be a real library 6397148 fbufs debug code should be removed from buf_hash_insert() 6405966 Hot Spare support in ZFS 6409302 passing a non-root vdev via zpool_create() panics system 6415739 assertion failed: !(zio->io_flags & 0x00040) 6416759 ::dbufs does not find bonus buffers anymore 6417978 double parity RAID-Z a.k.a. RAID6 6424554 full block re-writes need not read data in 6425111 detaching an offline device can result in import confusion
author: eschrock <none@none> 2006-05-30 15:47:16 -0700
committer: eschrock <none@none> 2006-05-30 15:47:16 -0700
commit: 99653d4ee642c6528e88224f12409a5f23060994 (patch)
tree: 5cbcc540b8ed86b6a008f1084f9ca031368d926f
parent: 354a1801a85aa6b61ff4d5e290ab708ba57e56a3 (diff)
download: illumos-joyent-99653d4ee642c6528e88224f12409a5f23060994.tar.gz
81 files changed, 6423 insertions, 3226 deletions
diff --git a/usr/src/cmd/fm/modules/common/Makefile b/usr/src/cmd/fm/modules/common/Makefile
index 868a66df08..ef5ebba6f0 100644
--- a/usr/src/cmd/fm/modules/common/Makefile
+++ b/usr/src/cmd/fm/modules/common/Makefile
@@ -27,6 +27,6 @@
 #
 
 SUBDIRS = cpumem-retire eversholt io-retire ip-transport snmp-trapgen	\
-	syslog-msgs zfs-diagnosis
+	syslog-msgs zfs-diagnosis zfs-retire
 
 include ../../Makefile.subdirs
diff --git a/usr/src/cmd/fm/modules/common/zfs-retire/Makefile b/usr/src/cmd/fm/modules/common/zfs-retire/Makefile
new file mode 100644
index 0000000000..9d80ae77ee
--- /dev/null
+++ b/usr/src/cmd/fm/modules/common/zfs-retire/Makefile
@@ -0,0 +1,33 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+#ident	"%Z%%M%	%I%	%E% SMI"
+
+MODULE = zfs-retire
+CLASS = common
+SRCS = zfs_retire.c
+
+include ../../Makefile.plugin
+
+LDLIBS += -lzfs
diff --git a/usr/src/cmd/fm/modules/common/zfs-retire/zfs-retire.conf b/usr/src/cmd/fm/modules/common/zfs-retire/zfs-retire.conf
new file mode 100644
index 0000000000..f506384bff
--- /dev/null
+++ b/usr/src/cmd/fm/modules/common/zfs-retire/zfs-retire.conf
@@ -0,0 +1,29 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+#ident	"%Z%%M%	%I%	%E% SMI"
+#
+# fmd configuration file for the zfs retire agent.
+#
+subscribe fault.fs.zfs.device
diff --git a/usr/src/cmd/fm/modules/common/zfs-retire/zfs_retire.c b/usr/src/cmd/fm/modules/common/zfs-retire/zfs_retire.c
new file mode 100644
index 0000000000..962b37bb82
--- /dev/null
+++ b/usr/src/cmd/fm/modules/common/zfs-retire/zfs_retire.c
@@ -0,0 +1,231 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+/*
+ * The ZFS retire agent is responsible for managing hot spares across all pools.
+ * When we see a device fault, we try to open the associated pool and look for
+ * any hot spares.  We iterate over any available hot spares and attempt a
+ * 'zpool replace' for each one.
+ */
+
+#include <fm/fmd_api.h>
+#include <sys/fs/zfs.h>
+#include <sys/fm/protocol.h>
+#include <sys/fm/fs/zfs.h>
+#include <libzfs.h>
+
+/*
+ * Find a pool with a matching GUID.
+ */
+typedef struct find_cbdata {
+	uint64_t	cb_guid;
+	zpool_handle_t	*cb_zhp;
+} find_cbdata_t;
+
+static int
+find_pool(zpool_handle_t *zhp, void *data)
+{
+	find_cbdata_t *cbp = data;
+
+	if (cbp->cb_guid == zpool_get_guid(zhp)) {
+		cbp->cb_zhp = zhp;
+		return (1);
+	}
+
+	zpool_close(zhp);
+	return (0);
+}
+
+/*
+ * Find a vdev within a tree with a matching GUID.
+ */
+static nvlist_t *
+find_vdev(nvlist_t *nv, uint64_t search)
+{
+	uint64_t guid;
+	nvlist_t **child;
+	uint_t c, children;
+	nvlist_t *ret;
+
+	if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &guid) == 0 &&
+	    guid == search)
+		return (nv);
+
+	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
+	    &child, &children) != 0)
+		return (NULL);
+
+	for (c = 0; c < children; c++) {
+		if ((ret = find_vdev(child[c], search)) != NULL)
+			return (ret);
+	}
+
+	return (NULL);
+}
+
+/*ARGSUSED*/
+static void
+zfs_retire_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl,
+    const char *class)
+{
+	uint64_t pool_guid, vdev_guid;
+	char *dev_name;
+	zpool_handle_t *zhp;
+	nvlist_t *resource, *config, *nvroot;
+	nvlist_t *vdev;
+	nvlist_t **spares, **faults;
+	uint_t s, nspares, f, nfaults;
+	nvlist_t *replacement;
+	find_cbdata_t cb;
+	libzfs_handle_t *zhdl = fmd_hdl_getspecific(hdl);
+
+	/*
+	 * Get information from the fault.
+	 */
+	if (nvlist_lookup_nvlist_array(nvl, FM_SUSPECT_FAULT_LIST,
+	    &faults, &nfaults) != 0)
+		return;
+
+	for (f = 0; f < nfaults; f++) {
+		if (nvlist_lookup_nvlist(faults[f], FM_FAULT_RESOURCE,
+		    &resource) != 0 ||
+		    nvlist_lookup_uint64(resource, FM_FMRI_ZFS_POOL,
+		    &pool_guid) != 0 ||
+		    nvlist_lookup_uint64(resource, FM_FMRI_ZFS_VDEV,
+		    &vdev_guid) != 0)
+			continue;
+
+		/*
+		 * From the pool guid and vdev guid, get the pool name and
+		 * device name.
+		 */
+		cb.cb_guid = pool_guid;
+		if (zpool_iter(zhdl, find_pool, &cb) != 1)
+			continue;
+
+		zhp = cb.cb_zhp;
+		config = zpool_get_config(zhp, NULL);
+		if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
+		    &nvroot) != 0) {
+			zpool_close(zhp);
+			continue;
+		}
+
+		if ((vdev = find_vdev(nvroot, vdev_guid)) == NULL) {
+			zpool_close(zhp);
+			continue;
+		}
+
+		/*
+		 * Find out if there are any hot spares available in the pool.
+		 */
+		if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
+		    &spares, &nspares) != 0) {
+			zpool_close(zhp);
+			continue;
+		}
+
+		if (nvlist_alloc(&replacement, NV_UNIQUE_NAME, 0) != 0) {
+			zpool_close(zhp);
+			continue;
+		}
+
+		if (nvlist_add_string(replacement, ZPOOL_CONFIG_TYPE,
+		    VDEV_TYPE_ROOT) != 0) {
+			nvlist_free(replacement);
+			zpool_close(zhp);
+			continue;
+		}
+
+		dev_name = zpool_vdev_name(zhdl, zhp, vdev);
+
+		/*
+		 * Try to replace each spare, ending when we successfully
+		 * replace it.
+		 */
+		for (s = 0; s < nspares; s++) {
+			char *spare_name;
+
+			if (nvlist_lookup_string(spares[s], ZPOOL_CONFIG_PATH,
+			    &spare_name) != 0)
+				continue;
+
+			if (nvlist_add_nvlist_array(replacement,
+			    ZPOOL_CONFIG_CHILDREN, &spares[s], 1) != 0)
+				continue;
+
+			if (zpool_vdev_attach(zhp, dev_name, spare_name,
+			    replacement, B_TRUE) == 0)
+				break;
+		}
+
+		free(dev_name);
+		nvlist_free(replacement);
+		zpool_close(zhp);
+	}
+}
+
+static const fmd_hdl_ops_t fmd_ops = {
+	zfs_retire_recv,	/* fmdo_recv */
+	NULL,			/* fmdo_timeout */
+	NULL,			/* fmdo_close */
+	NULL,			/* fmdo_stats */
+	NULL,			/* fmdo_gc */
+};
+
+static const fmd_prop_t fmd_props[] = {
+	{ NULL, 0, NULL }
+};
+
+static const fmd_hdl_info_t fmd_info = {
+	"ZFS Retire Agent", "1.0", &fmd_ops, fmd_props
+};
+
+void
+_fmd_init(fmd_hdl_t *hdl)
+{
+	libzfs_handle_t *zhdl;
+
+	if ((zhdl = libzfs_init()) == NULL)
+		return;
+
+	if (fmd_hdl_register(hdl, FMD_API_VERSION, &fmd_info) != 0) {
+		libzfs_fini(zhdl);
+		return;
+	}
+
+	fmd_hdl_setspecific(hdl, zhdl);
+}
+
+void
+_fmd_fini(fmd_hdl_t *hdl)
+{
+	libzfs_handle_t *zhdl = fmd_hdl_getspecific(hdl);
+
+	if (zhdl != NULL)
+		libzfs_fini(zhdl);
+}
diff --git a/usr/src/cmd/fm/schemes/zfs/scheme.c b/usr/src/cmd/fm/schemes/zfs/scheme.c
index 7f2532a637..e28f7b231c 100644
--- a/usr/src/cmd/fm/schemes/zfs/scheme.c
+++ b/usr/src/cmd/fm/schemes/zfs/scheme.c
@@ -34,6 +34,8 @@ typedef struct cbdata {
 	zpool_handle_t	*cb_pool;
 } cbdata_t;
 
+libzfs_handle_t *g_zfs;
+
 static int
 find_pool(zpool_handle_t *zhp, void *data)
 {
@@ -66,7 +68,7 @@ fmd_fmri_nvl2str(nvlist_t *nvl, char *buf, size_t buflen)
 	cb.cb_guid = pool_guid;
 	cb.cb_pool = NULL;
 
-	if (zpool_iter(find_pool, &cb) == 1) {
+	if (zpool_iter(g_zfs, find_pool, &cb) == 1) {
 		name = zpool_get_name(cb.cb_pool);
 	} else {
 		(void) snprintf(guidbuf, sizeof (guidbuf), "%llx", pool_guid);
@@ -135,7 +137,7 @@ fmd_fmri_present(nvlist_t *nvl)
 	cb.cb_guid = pool_guid;
 	cb.cb_pool = NULL;
 
-	if (zpool_iter(find_pool, &cb) != 1)
+	if (zpool_iter(g_zfs, find_pool, &cb) != 1)
 		return (0);
 
 	if (nvlist_lookup_uint64(nvl, FM_FMRI_ZFS_VDEV, &vdev_guid) != 0) {
@@ -163,7 +165,7 @@ fmd_fmri_unusable(nvlist_t *nvl)
 	cb.cb_guid = pool_guid;
 	cb.cb_pool = NULL;
 
-	if (zpool_iter(find_pool, &cb) != 1)
+	if (zpool_iter(g_zfs, find_pool, &cb) != 1)
 		return (1);
 
 	if (nvlist_lookup_uint64(nvl, FM_FMRI_ZFS_VDEV, &vdev_guid) != 0) {
@@ -189,3 +191,21 @@ fmd_fmri_unusable(nvlist_t *nvl)
 
 	return (ret);
 }
+
+int
+fmd_fmri_init(void)
+{
+	g_zfs = libzfs_init();
+
+	if (g_zfs == NULL)
+		return (-1);
+	else
+		return (0);
+}
+
+void
+fmd_fmri_fini(void)
+{
+	if (g_zfs)
+		libzfs_fini(g_zfs);
+}
diff --git a/usr/src/cmd/fs.d/df.c b/usr/src/cmd/fs.d/df.c
index 0a38f44b1a..3ee66576a5 100644
--- a/usr/src/cmd/fs.d/df.c
+++ b/usr/src/cmd/fs.d/df.c
@@ -237,55 +237,43 @@ static void do_df(int, char **)	__NORETURN;
 static void parse_options(int, char **);
 static char *basename(char *);
 
-
-/* ARGSUSED */
-static void
-dummy_error_handler(const char *fmt, va_list ap)
-{
-	/* Do nothing */
-}
-
-static zfs_handle_t *(*_zfs_open)(const char *, int);
+static libzfs_handle_t *(*_libzfs_init)(boolean_t);
+static zfs_handle_t *(*_zfs_open)(libzfs_handle_t *, const char *, int);
 static void (*_zfs_close)(zfs_handle_t *);
 static uint64_t (*_zfs_prop_get_int)(zfs_handle_t *, zfs_prop_t);
-static void (*_zfs_set_error_handler)(void (*)(const char *, va_list));
+static libzfs_handle_t *g_zfs;
 
 /*
  * Dynamically check for libzfs, in case the user hasn't installed the SUNWzfs
  * packages.  A basic utility such as df shouldn't depend on optional
  * filesystems.
  */
-static int
+static boolean_t
 load_libzfs(void)
 {
 	void *hdl;
 
-	if (_zfs_open != NULL)
-		return (1);
+	if (_libzfs_init != NULL)
+		return (g_zfs != NULL);
 
 	if ((hdl = dlopen("libzfs.so", RTLD_LAZY)) != NULL) {
-		_zfs_set_error_handler = (void (*)())
-		    dlsym(hdl, "zfs_set_error_handler");
+		_libzfs_init = (libzfs_handle_t *(*)(boolean_t))dlsym(hdl,
+		    "libzfs_init");
 		_zfs_open = (zfs_handle_t *(*)())dlsym(hdl, "zfs_open");
 		_zfs_close = (void (*)())dlsym(hdl, "zfs_close");
 		_zfs_prop_get_int = (uint64_t (*)())
 		    dlsym(hdl, "zfs_prop_get_int");
 
-		if (_zfs_set_error_handler != NULL) {
+		if (_libzfs_init != NULL) {
 			assert(_zfs_open != NULL);
 			assert(_zfs_close != NULL);
 			assert(_zfs_prop_get_int != NULL);
 
-			/*
-			 * Disable ZFS error reporting, so we don't get messages
-			 * like "can't open ..." under race conditions.
-			 */
-			_zfs_set_error_handler(dummy_error_handler);
-			return (1);
+			g_zfs = _libzfs_init(B_FALSE);
 		}
 	}
 
-	return (0);
+	return (g_zfs != NULL);
 }
 
 int
@@ -1257,7 +1245,7 @@ adjust_total_blocks(struct df_request *dfrp, fsblkcnt64_t *total,
 	do {
 		*slash = '\0';
 
-		if ((zhp = _zfs_open(dataset, ZFS_TYPE_ANY)) == NULL) {
+		if ((zhp = _zfs_open(g_zfs, dataset, ZFS_TYPE_ANY)) == NULL) {
 			free(dataset);
 			return;
 		}
@@ -1274,7 +1262,7 @@ adjust_total_blocks(struct df_request *dfrp, fsblkcnt64_t *total,
 	} while ((slash = strrchr(dataset, '/')) != NULL);
 
 
-	if ((zhp = _zfs_open(dataset, ZFS_TYPE_ANY)) == NULL) {
+	if ((zhp = _zfs_open(g_zfs, dataset, ZFS_TYPE_ANY)) == NULL) {
 		free(dataset);
 		return;
 	}
diff --git a/usr/src/cmd/fs.d/zfs/fstyp/fstyp.c b/usr/src/cmd/fs.d/zfs/fstyp/fstyp.c
index 26376e36a6..6a8585d872 100644
--- a/usr/src/cmd/fs.d/zfs/fstyp/fstyp.c
+++ b/usr/src/cmd/fs.d/zfs/fstyp/fstyp.c
@@ -142,7 +142,8 @@ main(int argc, char **argv)
 		return (1);
 	}
 
-	if ((config = zpool_read_label(fd)) == NULL)
+	if (zpool_read_label(fd, &config) != 0 ||
+	    config == NULL)
 		return (1);
 
 	if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE,
diff --git a/usr/src/cmd/mdb/common/modules/zfs/zfs.c b/usr/src/cmd/mdb/common/modules/zfs/zfs.c
index 5b218aee5f..73b1cbef62 100644
--- a/usr/src/cmd/mdb/common/modules/zfs/zfs.c
+++ b/usr/src/cmd/mdb/common/modules/zfs/zfs.c
@@ -208,73 +208,6 @@ freelist_walk_fini(mdb_walk_state_t *wsp)
 {
 }
 
-typedef struct dbuf_walk_data {
-	dbuf_hash_table_t ht;
-	int64_t bucket;
-	uintptr_t dbp;
-	dmu_buf_impl_t db;
-} dbuf_walk_data_t;
-
-static int
-dbuf_walk_init(mdb_walk_state_t *wsp)
-{
-	dbuf_walk_data_t *dwd;
-
-	if (wsp->walk_addr != NULL) {
-		mdb_warn("must supply starting address\n");
-		return (WALK_ERR);
-	}
-
-	dwd = mdb_alloc(sizeof (dbuf_walk_data_t), UM_SLEEP);
-
-	if (mdb_readvar(&dwd->ht, "dbuf_hash_table") == -1) {
-		mdb_warn("failed to read 'dbuf_hash_table'");
-		mdb_free(dwd, sizeof (dbuf_walk_data_t));
-		return (WALK_ERR);
-	}
-	dwd->bucket = -1;
-	dwd->dbp = 0;
-	wsp->walk_data = dwd;
-	return (WALK_NEXT);
-}
-
-static int
-dbuf_walk_step(mdb_walk_state_t *wsp)
-{
-	int status;
-	dbuf_walk_data_t *dwd = wsp->walk_data;
-
-	while (dwd->dbp == 0) {
-		dwd->bucket++;
-		if (dwd->bucket == dwd->ht.hash_table_mask+1)
-			return (WALK_DONE);
-
-		if (mdb_vread(&dwd->dbp, sizeof (void *),
-		    (uintptr_t)(dwd->ht.hash_table+dwd->bucket)) == -1) {
-			mdb_warn("failed to read hash bucket %u at %p",
-			    dwd->bucket, dwd->ht.hash_table+dwd->bucket);
-			return (WALK_DONE);
-		}
-	}
-
-	wsp->walk_addr = dwd->dbp;
-	if (mdb_vread(&dwd->db, sizeof (dmu_buf_impl_t),
-	    wsp->walk_addr) == -1) {
-		mdb_warn("failed to read dbuf at %p", wsp->walk_addr);
-		return (WALK_DONE);
-	}
-	status = wsp->walk_callback(wsp->walk_addr, &dwd->db, wsp->walk_cbdata);
-
-	dwd->dbp = (uintptr_t)dwd->db.db_hash_next;
-	return (status);
-}
-
-static void
-dbuf_walk_fini(mdb_walk_state_t *wsp)
-{
-	dbuf_walk_data_t *dwd = wsp->walk_data;
-	mdb_free(dwd, sizeof (dbuf_walk_data_t));
-}
 
 static int
 dataset_name(uintptr_t addr, char *buf)
@@ -693,7 +626,7 @@ dbufs(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
 		return (DCMD_ERR);
 	}
 
-	if (mdb_pwalk("dbufs", dbufs_cb, &data, 0) != 0) {
+	if (mdb_pwalk("dmu_buf_impl_t", dbufs_cb, &data, 0) != 0) {
 		mdb_warn("can't walk dbufs");
 		return (DCMD_ERR);
 	}
@@ -1580,8 +1513,6 @@ static const mdb_walker_t walkers[] = {
 	{ LIST_WALK_NAME, LIST_WALK_DESC,
 		list_walk_init, list_walk_step, list_walk_fini },
 #endif
-	{ "dbufs", "walk cached ZFS dbufs",
-		dbuf_walk_init, dbuf_walk_step, dbuf_walk_fini },
 	{ "zms_freelist", "walk ZFS metaslab freelist",
 		freelist_walk_init, freelist_walk_step, freelist_walk_fini },
 	{ "txg_list", "given any txg_list_t *, walk all entries in all txgs",
diff --git a/usr/src/cmd/truss/codes.c b/usr/src/cmd/truss/codes.c
index 4e808b8e48..37e79f6322 100644
--- a/usr/src/cmd/truss/codes.c
+++ b/usr/src/cmd/truss/codes.c
@@ -937,6 +937,8 @@ const struct ioc {
 		"zfs_cmd_t" },
 	{ (uint_t)ZFS_IOC_BOOKMARK_NAME,	"ZFS_IOC_BOOKMARK_NAME",
 		"zfs_cmd_t" },
+	{ (uint_t)ZFS_IOC_PROMOTE,		"ZFS_IOC_PROMOTE",
+		"zfs_cmd_t" },
 
 	/* kssl ioctls */
 	{ (uint_t)KSSL_ADD_ENTRY,		"KSSL_ADD_ENTRY",
diff --git a/usr/src/cmd/zdb/zdb.c b/usr/src/cmd/zdb/zdb.c
index f283148ef8..0af9a59690 100644
--- a/usr/src/cmd/zdb/zdb.c
+++ b/usr/src/cmd/zdb/zdb.c
@@ -744,8 +744,8 @@ dump_dsl_dataset(objset_t *os, uint64_t object, void *data, size_t size)
 	    (u_longlong_t)ds->ds_fsid_guid);
 	(void) printf("\t\tguid = %llu\n",
 	    (u_longlong_t)ds->ds_guid);
-	(void) printf("\t\tinconsistent = %llu\n",
-	    (u_longlong_t)ds->ds_inconsistent);
+	(void) printf("\t\tflags = %llx\n",
+	    (u_longlong_t)ds->ds_flags);
 	(void) printf("\t\tbp = %s\n", blkbuf);
 }
 
@@ -755,7 +755,9 @@ dump_bplist(objset_t *mos, uint64_t object, char *name)
 	bplist_t bpl = { 0 };
 	blkptr_t blk, *bp = &blk;
 	uint64_t itor = 0;
-	char numbuf[6];
+	char bytes[6];
+	char comp[6];
+	char uncomp[6];
 
 	if (dump_opt['d'] < 3)
 		return;
@@ -766,10 +768,17 @@ dump_bplist(objset_t *mos, uint64_t object, char *name)
 		return;
 	}
 
-	nicenum(bpl.bpl_phys->bpl_bytes, numbuf);
-
-	(void) printf("\n    %s: %llu entries, %s\n",
-	    name, (u_longlong_t)bpl.bpl_phys->bpl_entries, numbuf);
+	nicenum(bpl.bpl_phys->bpl_bytes, bytes);
+	if (bpl.bpl_dbuf->db_size == sizeof (bplist_phys_t)) {
+		nicenum(bpl.bpl_phys->bpl_comp, comp);
+		nicenum(bpl.bpl_phys->bpl_uncomp, uncomp);
+		(void) printf("\n    %s: %llu entries, %s (%s/%s comp)\n",
+		    name, (u_longlong_t)bpl.bpl_phys->bpl_entries,
+		    bytes, comp, uncomp);
+	} else {
+		(void) printf("\n    %s: %llu entries, %s\n",
+		    name, (u_longlong_t)bpl.bpl_phys->bpl_entries, bytes);
+	}
 
 	if (dump_opt['d'] < 5) {
 		bplist_close(&bpl);
diff --git a/usr/src/cmd/zfs/zfs_iter.c b/usr/src/cmd/zfs/zfs_iter.c
index bc8e5ea59c..9f8f37b765 100644
--- a/usr/src/cmd/zfs/zfs_iter.c
+++ b/usr/src/cmd/zfs/zfs_iter.c
@@ -2,9 +2,8 @@
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
@@ -160,7 +159,7 @@ zfs_compare(const void *larg, const void *rarg, void *unused)
 }
 
 int
-zfs_for_each(int argc, char **argv, int recurse, zfs_type_t types,
+zfs_for_each(int argc, char **argv, boolean_t recurse, zfs_type_t types,
     zfs_iter_f callback, void *data)
 {
 	callback_data_t cb;
@@ -190,7 +189,7 @@ zfs_for_each(int argc, char **argv, int recurse, zfs_type_t types,
 		 * If given no arguments, iterate over all datasets.
 		 */
 		cb.cb_recurse = 1;
-		ret = zfs_iter_root(zfs_callback, &cb);
+		ret = zfs_iter_root(g_zfs, zfs_callback, &cb);
 	} else {
 		int i;
 		zfs_handle_t *zhp;
@@ -209,8 +208,8 @@ zfs_for_each(int argc, char **argv, int recurse, zfs_type_t types,
 		}
 
 		for (i = 0; i < argc; i++) {
-			if ((zhp = zfs_open(argv[i], argtype)) != NULL)
-				ret = zfs_callback(zhp, &cb);
+			if ((zhp = zfs_open(g_zfs, argv[i], argtype)) != NULL)
+				ret |= zfs_callback(zhp, &cb);
 			else
 				ret = 1;
 		}
diff --git a/usr/src/cmd/zfs/zfs_iter.h b/usr/src/cmd/zfs/zfs_iter.h
index 03428b827b..c69049b28f 100644
--- a/usr/src/cmd/zfs/zfs_iter.h
+++ b/usr/src/cmd/zfs/zfs_iter.h
@@ -2,9 +2,8 @@
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -33,7 +32,7 @@
 extern "C" {
 #endif
 
-int zfs_for_each(int, char **, int, zfs_type_t, zfs_iter_f, void *);
+int zfs_for_each(int, char **, boolean_t, zfs_type_t, zfs_iter_f, void *);
 
 #ifdef	__cplusplus
 }
diff --git a/usr/src/cmd/zfs/zfs_main.c b/usr/src/cmd/zfs/zfs_main.c
index 5b04a76f47..0fba9046bd 100644
--- a/usr/src/cmd/zfs/zfs_main.c
+++ b/usr/src/cmd/zfs/zfs_main.c
@@ -47,6 +47,9 @@
 #include <libzfs.h>
 
 #include "zfs_iter.h"
+#include "zfs_util.h"
+
+libzfs_handle_t *g_zfs;
 
 static FILE *mnttab_file;
 
@@ -66,6 +69,7 @@ static int zfs_do_share(int argc, char **argv);
 static int zfs_do_unshare(int argc, char **argv);
 static int zfs_do_send(int argc, char **argv);
 static int zfs_do_receive(int argc, char **argv);
+static int zfs_do_promote(int argc, char **argv);
 
 /*
  * These libumem hooks provide a reasonable set of defaults for the allocator's
@@ -91,6 +95,7 @@ typedef enum {
 	HELP_INHERIT,
 	HELP_LIST,
 	HELP_MOUNT,
+	HELP_PROMOTE,
 	HELP_RECEIVE,
 	HELP_RENAME,
 	HELP_ROLLBACK,
@@ -124,6 +129,7 @@ static zfs_command_t command_table[] = {
 	{ "snapshot",	zfs_do_snapshot,	HELP_SNAPSHOT		},
 	{ "rollback",	zfs_do_rollback,	HELP_ROLLBACK		},
 	{ "clone",	zfs_do_clone,		HELP_CLONE		},
+	{ "promote",	zfs_do_promote,		HELP_PROMOTE		},
 	{ "rename",	zfs_do_rename,		HELP_RENAME		},
 	{ NULL },
 	{ "list",	zfs_do_list,		HELP_LIST		},
@@ -176,6 +182,8 @@ get_usage(zfs_help_t idx)
 		return (gettext("\tmount\n"
 		    "\tmount [-o opts] [-O] -a\n"
 		    "\tmount [-o opts] [-O] <filesystem>\n"));
+	case HELP_PROMOTE:
+		return (gettext("\tpromote <clone filesystem>\n"));
 	case HELP_RECEIVE:
 		return (gettext("\treceive [-vn] <filesystem|volume|snapshot>\n"
 		    "\treceive [-vn] -d <filesystem>\n"));
@@ -228,10 +236,10 @@ safe_malloc(size_t size)
  * a complete usage message.
  */
 static void
-usage(int requested)
+usage(boolean_t requested)
 {
 	int i;
-	int show_properties = FALSE;
+	boolean_t show_properties = B_FALSE;
 	FILE *fp = requested ? stdout : stderr;
 
 	if (current_command == NULL) {
@@ -260,7 +268,7 @@ usage(int requested)
 	    strcmp(current_command->name, "get") == 0 ||
 	    strcmp(current_command->name, "inherit") == 0 ||
 	    strcmp(current_command->name, "list") == 0)
-		show_properties = TRUE;
+		show_properties = B_TRUE;
 
 	if (show_properties) {
 
@@ -313,27 +321,27 @@ zfs_do_clone(int argc, char **argv)
 	if (argc > 1 && argv[1][0] == '-') {
 		(void) fprintf(stderr, gettext("invalid option '%c'\n"),
 		    argv[1][1]);
-		usage(FALSE);
+		usage(B_FALSE);
 	}
 
 	/* check number of arguments */
 	if (argc < 2) {
 		(void) fprintf(stderr, gettext("missing source dataset "
 		    "argument\n"));
-		usage(FALSE);
+		usage(B_FALSE);
 	}
 	if (argc < 3) {
 		(void) fprintf(stderr, gettext("missing target dataset "
 		    "argument\n"));
-		usage(FALSE);
+		usage(B_FALSE);
 	}
 	if (argc > 3) {
 		(void) fprintf(stderr, gettext("too many arguments\n"));
-		usage(FALSE);
+		usage(B_FALSE);
 	}
 
 	/* open the source dataset */
-	if ((zhp = zfs_open(argv[1], ZFS_TYPE_SNAPSHOT)) == NULL)
+	if ((zhp = zfs_open(g_zfs, argv[1], ZFS_TYPE_SNAPSHOT)) == NULL)
 		return (1);
 
 	/* pass to libzfs */
@@ -341,7 +349,7 @@ zfs_do_clone(int argc, char **argv)
 
 	/* create the mountpoint if necessary */
 	if (ret == 0) {
-		zfs_handle_t *clone = zfs_open(argv[2], ZFS_TYPE_ANY);
+		zfs_handle_t *clone = zfs_open(g_zfs, argv[2], ZFS_TYPE_ANY);
 		if (clone != NULL) {
 			if ((ret = zfs_mount(clone, NULL, 0)) == 0)
 				ret = zfs_share(clone);
@@ -374,7 +382,7 @@ zfs_do_create(int argc, char **argv)
 	char *size = NULL;
 	char *blocksize = NULL;
 	int c;
-	int noreserve = FALSE;
+	boolean_t noreserve = B_FALSE;
 	int ret;
 
 	/* check options */
@@ -388,24 +396,24 @@ zfs_do_create(int argc, char **argv)
 			blocksize = optarg;
 			break;
 		case 's':
-			noreserve = TRUE;
+			noreserve = B_TRUE;
 			break;
 		case ':':
 			(void) fprintf(stderr, gettext("missing size "
 			    "argument\n"));
-			usage(FALSE);
+			usage(B_FALSE);
 			break;
 		case '?':
 			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
 			    optopt);
-			usage(FALSE);
+			usage(B_FALSE);
 		}
 	}
 
 	if (noreserve && type != ZFS_TYPE_VOLUME) {
 		(void) fprintf(stderr, gettext("'-s' can only be used when "
 		    "creating a volume\n"));
-		usage(FALSE);
+		usage(B_FALSE);
 	}
 
 	argc -= optind;
@@ -415,18 +423,18 @@ zfs_do_create(int argc, char **argv)
 	if (argc == 0) {
 		(void) fprintf(stderr, gettext("missing %s argument\n"),
 		    zfs_type_to_name(type));
-		usage(FALSE);
+		usage(B_FALSE);
 	}
 	if (argc > 1) {
 		(void) fprintf(stderr, gettext("too many arguments\n"));
-		usage(FALSE);
+		usage(B_FALSE);
 	}
 
 	/* pass to libzfs */
-	if (zfs_create(argv[0], type, size, blocksize) != 0)
+	if (zfs_create(g_zfs, argv[0], type, size, blocksize) != 0)
 		return (1);
 
-	if ((zhp = zfs_open(argv[0], ZFS_TYPE_ANY)) == NULL)
+	if ((zhp = zfs_open(g_zfs, argv[0], ZFS_TYPE_ANY)) == NULL)
 		return (1);
 
 	/*
@@ -476,7 +484,7 @@ zfs_do_create(int argc, char **argv)
  * either be a child, or a clone of a child.
  */
 typedef struct destroy_cbdata {
-	int		cb_first;
+	boolean_t	cb_first;
 	int		cb_force;
 	int		cb_recurse;
 	int		cb_error;
@@ -511,7 +519,7 @@ destroy_check_dependent(zfs_handle_t *zhp, void *data)
 			    zfs_type_to_name(zfs_get_type(cbp->cb_target)));
 			(void) fprintf(stderr, gettext("use '-r' to destroy "
 			    "the following datasets:\n"));
-			cbp->cb_first = 0;
+			cbp->cb_first = B_FALSE;
 			cbp->cb_error = 1;
 		}
 
@@ -532,7 +540,7 @@ destroy_check_dependent(zfs_handle_t *zhp, void *data)
 			    zfs_type_to_name(zfs_get_type(cbp->cb_target)));
 			(void) fprintf(stderr, gettext("use '-R' to destroy "
 			    "the following datasets:\n"));
-			cbp->cb_first = 0;
+			cbp->cb_first = B_FALSE;
 			cbp->cb_error = 1;
 		}
 
@@ -597,7 +605,7 @@ zfs_do_destroy(int argc, char **argv)
 		default:
 			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
 			    optopt);
-			usage(FALSE);
+			usage(B_FALSE);
 		}
 	}
 
@@ -607,15 +615,15 @@ zfs_do_destroy(int argc, char **argv)
 	/* check number of arguments */
 	if (argc == 0) {
 		(void) fprintf(stderr, gettext("missing path argument\n"));
-		usage(FALSE);
+		usage(B_FALSE);
 	}
 	if (argc > 1) {
 		(void) fprintf(stderr, gettext("too many arguments\n"));
-		usage(FALSE);
+		usage(B_FALSE);
 	}
 
 	/* Open the given dataset */
-	if ((zhp = zfs_open(argv[0], ZFS_TYPE_ANY)) == NULL)
+	if ((zhp = zfs_open(g_zfs, argv[0], ZFS_TYPE_ANY)) == NULL)
 		return (1);
 
 	cb.cb_target = zhp;
@@ -641,7 +649,7 @@ zfs_do_destroy(int argc, char **argv)
 	/*
 	 * Check for any dependents and/or clones.
 	 */
-	cb.cb_first = 1;
+	cb.cb_first = B_TRUE;
 	if (!cb.cb_doclones)
 		(void) zfs_iter_dependents(zhp, destroy_check_dependent, &cb);
 
@@ -678,13 +686,13 @@ zfs_do_destroy(int argc, char **argv)
  *  columns to display as well as which property types to allow.
  */
 typedef struct get_cbdata {
-	int cb_scripted;
 	int cb_sources;
-	int cb_literal;
 	int cb_columns[4];
-	zfs_prop_t cb_prop[ZFS_NPROP_ALL];
 	int cb_nprop;
-	int cb_isall;
+	boolean_t cb_scripted;
+	boolean_t cb_literal;
+	boolean_t cb_isall;
+	zfs_prop_t cb_prop[ZFS_NPROP_ALL];
 } get_cbdata_t;
 
 #define	GET_COL_NAME		1
@@ -804,7 +812,7 @@ static int
 zfs_do_get(int argc, char **argv)
 {
 	get_cbdata_t cb = { 0 };
-	int recurse = 0;
+	boolean_t recurse = B_FALSE;
 	int c;
 	char *value, *fields, *badopt;
 	int i;
@@ -823,18 +831,18 @@ zfs_do_get(int argc, char **argv)
 	while ((c = getopt(argc, argv, ":o:s:rHp")) != -1) {
 		switch (c) {
 		case 'p':
-			cb.cb_literal = TRUE;
+			cb.cb_literal = B_TRUE;
 			break;
 		case 'r':
-			recurse = TRUE;
+			recurse = B_TRUE;
 			break;
 		case 'H':
-			cb.cb_scripted = TRUE;
+			cb.cb_scripted = B_TRUE;
 			break;
 		case ':':
 			(void) fprintf(stderr, gettext("missing argument for "
 			    "'%c' option\n"), optopt);
-			usage(FALSE);
+			usage(B_FALSE);
 			break;
 		case 'o':
 			/*
@@ -852,7 +860,7 @@ zfs_do_get(int argc, char **argv)
 					(void) fprintf(stderr, gettext("too "
 					    "many fields given to -o "
 					    "option\n"));
-					usage(FALSE);
+					usage(B_FALSE);
 				}
 
 				switch (getsubopt(&optarg, col_subopts,
@@ -873,7 +881,7 @@ zfs_do_get(int argc, char **argv)
 					(void) fprintf(stderr,
 					    gettext("invalid column name "
 					    "'%s'\n"), value);
-					    usage(FALSE);
+					    usage(B_FALSE);
 				}
 			}
 			break;
@@ -906,7 +914,7 @@ zfs_do_get(int argc, char **argv)
 					(void) fprintf(stderr,
 					    gettext("invalid source "
 					    "'%s'\n"), value);
-					    usage(FALSE);
+					    usage(B_FALSE);
 				}
 			}
 			break;
@@ -914,7 +922,7 @@ zfs_do_get(int argc, char **argv)
 		case '?':
 			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
 			    optopt);
-			usage(FALSE);
+			usage(B_FALSE);
 		}
 	}
 
@@ -924,7 +932,7 @@ zfs_do_get(int argc, char **argv)
 	if (argc < 1) {
 		(void) fprintf(stderr, gettext("missing property "
 		    "argument\n"));
-		usage(FALSE);
+		usage(B_FALSE);
 	}
 
 	fields = argv[0];
@@ -935,7 +943,7 @@ zfs_do_get(int argc, char **argv)
 	 * given dataset.
 	 */
 	if (strcmp(fields, "all") == 0)
-		cb.cb_isall = TRUE;
+		cb.cb_isall = B_TRUE;
 
 	if ((ret = zfs_get_proplist(fields, cb.cb_prop, ZFS_NPROP_ALL,
 	    &cb.cb_nprop, &badopt)) != 0) {
@@ -945,7 +953,7 @@ zfs_do_get(int argc, char **argv)
 		else
 			(void) fprintf(stderr, gettext("too many properties "
 			    "specified\n"));
-		usage(FALSE);
+		usage(B_FALSE);
 	}
 
 	argc--;
@@ -954,7 +962,7 @@ zfs_do_get(int argc, char **argv)
 	/* check for at least one dataset name */
 	if (argc < 1) {
 		(void) fprintf(stderr, gettext("missing dataset argument\n"));
-		usage(FALSE);
+		usage(B_FALSE);
 	}
 
 	/*
@@ -1008,7 +1016,7 @@ inherit_callback(zfs_handle_t *zhp, void *data)
 static int
 zfs_do_inherit(int argc, char **argv)
 {
-	int recurse = 0;
+	boolean_t recurse = B_FALSE;
 	int c;
 	zfs_prop_t prop;
 	char *propname;
@@ -1017,13 +1025,13 @@ zfs_do_inherit(int argc, char **argv)
 	while ((c = getopt(argc, argv, "r")) != -1) {
 		switch (c) {
 		case 'r':
-			recurse = TRUE;
+			recurse = B_TRUE;
 			break;
 		case '?':
 		default:
 			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
 			    optopt);
-			usage(FALSE);
+			usage(B_FALSE);
 		}
 	}
 
@@ -1033,11 +1041,11 @@ zfs_do_inherit(int argc, char **argv)
 	/* check number of arguments */
 	if (argc < 1) {
 		(void) fprintf(stderr, gettext("missing property argument\n"));
-		usage(FALSE);
+		usage(B_FALSE);
 	}
 	if (argc < 2) {
 		(void) fprintf(stderr, gettext("missing dataset argument\n"));
-		usage(FALSE);
+		usage(B_FALSE);
 	}
 
 	propname = argv[0];
@@ -1050,7 +1058,7 @@ zfs_do_inherit(int argc, char **argv)
 	if ((prop = zfs_name_to_prop(propname)) == ZFS_PROP_INVAL) {
 		(void) fprintf(stderr, gettext("invalid property '%s'\n"),
 		    propname);
-		usage(FALSE);
+		usage(B_FALSE);
 	}
 	if (zfs_prop_readonly(prop)) {
 		(void) fprintf(stderr, gettext("%s property is read-only\n"),
@@ -1083,8 +1091,8 @@ zfs_do_inherit(int argc, char **argv)
  * '-r' is specified.
  */
 typedef struct list_cbdata {
-	int		cb_first;
-	int		cb_scripted;
+	boolean_t	cb_first;
+	boolean_t	cb_scripted;
 	zfs_prop_t	cb_fields[ZFS_NPROP_ALL];
 	int		cb_fieldcount;
 } list_cbdata_t;
@@ -1129,7 +1137,7 @@ print_dataset(zfs_handle_t *zhp, zfs_prop_t *fields, size_t count, int scripted)
 		}
 
 		if (zfs_prop_get(zhp, fields[i], property,
-		    sizeof (property), NULL, NULL, 0, FALSE) != 0)
+		    sizeof (property), NULL, NULL, 0, B_FALSE) != 0)
 			(void) strlcpy(property, "-", sizeof (property));
 
 		/*
@@ -1159,7 +1167,7 @@ list_callback(zfs_handle_t *zhp, void *data)
 	if (cbp->cb_first) {
 		if (!cbp->cb_scripted)
 			print_header(cbp->cb_fields, cbp->cb_fieldcount);
-		cbp->cb_first = FALSE;
+		cbp->cb_first = B_FALSE;
 	}
 
 	print_dataset(zhp, cbp->cb_fields, cbp->cb_fieldcount,
@@ -1172,8 +1180,8 @@ static int
 zfs_do_list(int argc, char **argv)
 {
 	int c;
-	int recurse = 0;
-	int scripted = FALSE;
+	boolean_t recurse = B_FALSE;
+	boolean_t scripted = B_FALSE;
 	static char default_fields[] =
 	    "name,used,available,referenced,mountpoint";
 	int types = ZFS_TYPE_ANY;
@@ -1193,10 +1201,10 @@ zfs_do_list(int argc, char **argv)
 			fields = optarg;
 			break;
 		case 'r':
-			recurse = TRUE;
+			recurse = B_TRUE;
 			break;
 		case 'H':
-			scripted = TRUE;
+			scripted = B_TRUE;
 			break;
 		case 't':
 			types = 0;
@@ -1216,19 +1224,19 @@ zfs_do_list(int argc, char **argv)
 					(void) fprintf(stderr,
 					    gettext("invalid type '%s'\n"),
 					    value);
-					usage(FALSE);
+					usage(B_FALSE);
 				}
 			}
 			break;
 		case ':':
 			(void) fprintf(stderr, gettext("missing argument for "
 			    "'%c' option\n"), optopt);
-			usage(FALSE);
+			usage(B_FALSE);
 			break;
 		case '?':
 			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
 			    optopt);
-			usage(FALSE);
+			usage(B_FALSE);
 		}
 	}
 
@@ -1258,16 +1266,16 @@ zfs_do_list(int argc, char **argv)
 		else
 			(void) fprintf(stderr, gettext("too many properties "
 			    "specified\n"));
-		usage(FALSE);
+		usage(B_FALSE);
 	}
 
 	cb.cb_fieldcount += alloffset;
 	cb.cb_scripted = scripted;
-	cb.cb_first = TRUE;
+	cb.cb_first = B_TRUE;
 
 	ret = zfs_for_each(argc, argv, recurse, types, list_callback, &cb);
 
-	if (ret == 0 && cb.cb_first == TRUE)
+	if (ret == 0 && cb.cb_first)
 		(void) printf(gettext("no datasets available\n"));
 
 	return (ret);
@@ -1283,39 +1291,76 @@ static int
 zfs_do_rename(int argc, char **argv)
 {
 	zfs_handle_t *zhp;
-	int ret = 1;
+	int ret;
 
 	/* check options */
 	if (argc > 1 && argv[1][0] == '-') {
 		(void) fprintf(stderr, gettext("invalid option '%c'\n"),
 		    argv[1][1]);
-		usage(FALSE);
+		usage(B_FALSE);
 	}
 
 	/* check number of arguments */
 	if (argc < 2) {
 		(void) fprintf(stderr, gettext("missing source dataset "
 		    "argument\n"));
-		usage(FALSE);
+		usage(B_FALSE);
 	}
 	if (argc < 3) {
 		(void) fprintf(stderr, gettext("missing target dataset "
 		    "argument\n"));
-		usage(FALSE);
+		usage(B_FALSE);
 	}
 	if (argc > 3) {
 		(void) fprintf(stderr, gettext("too many arguments\n"));
-		usage(FALSE);
+		usage(B_FALSE);
 	}
 
-	if ((zhp = zfs_open(argv[1], ZFS_TYPE_ANY)) == NULL)
+	if ((zhp = zfs_open(g_zfs, argv[1], ZFS_TYPE_ANY)) == NULL)
 		return (1);
 
-	if (zfs_rename(zhp, argv[2]) != 0)
-		goto error;
+	ret = (zfs_rename(zhp, argv[2]) != 0);
+
+	zfs_close(zhp);
+	return (ret);
+}
+
+/*
+ * zfs promote <fs>
+ *
+ * Promotes the given clone fs to be the parent
+ */
+/* ARGSUSED */
+static int
+zfs_do_promote(int argc, char **argv)
+{
+	zfs_handle_t *zhp;
+	int ret;
+
+	/* check options */
+	if (argc > 1 && argv[1][0] == '-') {
+		(void) fprintf(stderr, gettext("invalid option '%c'\n"),
+		    argv[1][1]);
+		usage(B_FALSE);
+	}
+
+	/* check number of arguments */
+	if (argc < 2) {
+		(void) fprintf(stderr, gettext("missing clone filesystem"
+		    "argument\n"));
+		usage(B_FALSE);
+	}
+	if (argc > 2) {
+		(void) fprintf(stderr, gettext("too many arguments\n"));
+		usage(B_FALSE);
+	}
+
+	zhp = zfs_open(g_zfs, argv[1], ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME);
+	if (zhp == NULL)
+		return (1);
+
+	ret = (zfs_promote(zhp) != 0);
 
-	ret = 0;
-error:
 	zfs_close(zhp);
 	return (ret);
 }
@@ -1333,12 +1378,12 @@ error:
  */
 typedef struct rollback_cbdata {
 	uint64_t	cb_create;
-	int		cb_first;
+	boolean_t	cb_first;
 	int		cb_doclones;
 	char		*cb_target;
 	int		cb_error;
-	int		cb_recurse;
-	int		cb_dependent;
+	boolean_t	cb_recurse;
+	boolean_t	cb_dependent;
 } rollback_cbdata_t;
 
 /*
@@ -1352,8 +1397,10 @@ rollback_check(zfs_handle_t *zhp, void *data)
 {
 	rollback_cbdata_t *cbp = data;
 
-	if (cbp->cb_doclones)
+	if (cbp->cb_doclones) {
+		zfs_close(zhp);
 		return (0);
+	}
 
 	if (!cbp->cb_dependent) {
 		if (strcmp(zfs_get_name(zhp), cbp->cb_target) != 0 &&
@@ -1374,10 +1421,10 @@ rollback_check(zfs_handle_t *zhp, void *data)
 			}
 
 			if (cbp->cb_recurse) {
-				cbp->cb_dependent = TRUE;
+				cbp->cb_dependent = B_TRUE;
 				(void) zfs_iter_dependents(zhp, rollback_check,
 				    cbp);
-				cbp->cb_dependent = FALSE;
+				cbp->cb_dependent = B_FALSE;
 			} else {
 				(void) fprintf(stderr, "%s\n",
 				    zfs_get_name(zhp));
@@ -1429,7 +1476,7 @@ zfs_do_rollback(int argc, char **argv)
 		case '?':
 			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
 			    optopt);
-			usage(FALSE);
+			usage(B_FALSE);
 		}
 	}
 
@@ -1439,22 +1486,22 @@ zfs_do_rollback(int argc, char **argv)
 	/* check number of arguments */
 	if (argc < 1) {
 		(void) fprintf(stderr, gettext("missing dataset argument\n"));
-		usage(FALSE);
+		usage(B_FALSE);
 	}
 	if (argc > 1) {
 		(void) fprintf(stderr, gettext("too many arguments\n"));
-		usage(FALSE);
+		usage(B_FALSE);
 	}
 
 	/* open the snapshot */
-	if ((snap = zfs_open(argv[0], ZFS_TYPE_SNAPSHOT)) == NULL)
+	if ((snap = zfs_open(g_zfs, argv[0], ZFS_TYPE_SNAPSHOT)) == NULL)
 		return (1);
 
 	/* open the parent dataset */
 	(void) strlcpy(parentname, argv[0], sizeof (parentname));
 	verify((delim = strrchr(parentname, '@')) != NULL);
 	*delim = '\0';
-	if ((zhp = zfs_open(parentname, ZFS_TYPE_ANY)) == NULL) {
+	if ((zhp = zfs_open(g_zfs, parentname, ZFS_TYPE_ANY)) == NULL) {
 		zfs_close(snap);
 		return (1);
 	}
@@ -1465,7 +1512,7 @@ zfs_do_rollback(int argc, char **argv)
 	 */
 	cb.cb_target = argv[0];
 	cb.cb_create = zfs_prop_get_int(snap, ZFS_PROP_CREATETXG);
-	cb.cb_first = 1;
+	cb.cb_first = B_TRUE;
 	cb.cb_error = 0;
 	(void) zfs_iter_children(zhp, rollback_check, &cb);
 
@@ -1606,18 +1653,18 @@ zfs_do_set(int argc, char **argv)
 	if (argc > 1 && argv[1][0] == '-') {
 		(void) fprintf(stderr, gettext("invalid option '%c'\n"),
 		    argv[1][1]);
-		usage(FALSE);
+		usage(B_FALSE);
 	}
 
 	/* check number of arguments */
 	if (argc < 2) {
 		(void) fprintf(stderr, gettext("missing property=value "
 		    "argument\n"));
-		usage(FALSE);
+		usage(B_FALSE);
 	}
 	if (argc < 3) {
 		(void) fprintf(stderr, gettext("missing dataset name\n"));
-		usage(FALSE);
+		usage(B_FALSE);
 	}
 
 	/* validate property=value argument */
@@ -1625,7 +1672,7 @@ zfs_do_set(int argc, char **argv)
 	if ((cb.cb_value = strchr(cb.cb_propname, '=')) == NULL) {
 		(void) fprintf(stderr, gettext("missing value in "
 		    "property=value argument\n"));
-		usage(FALSE);
+		usage(B_FALSE);
 	}
 
 	*cb.cb_value = '\0';
@@ -1634,12 +1681,12 @@ zfs_do_set(int argc, char **argv)
 	if (*cb.cb_propname == '\0') {
 		(void) fprintf(stderr,
 		    gettext("missing property in property=value argument\n"));
-		usage(FALSE);
+		usage(B_FALSE);
 	}
 	if (*cb.cb_value == '\0') {
 		(void) fprintf(stderr,
 		    gettext("missing value in property=value argument\n"));
-		usage(FALSE);
+		usage(B_FALSE);
 	}
 
 	/* get the property type */
@@ -1647,7 +1694,7 @@ zfs_do_set(int argc, char **argv)
 	    ZFS_PROP_INVAL) {
 		(void) fprintf(stderr,
 		    gettext("invalid property '%s'\n"), cb.cb_propname);
-		usage(FALSE);
+		usage(B_FALSE);
 	}
 
 	/*
@@ -1655,10 +1702,10 @@ zfs_do_set(int argc, char **argv)
 	 * once now so we don't generate multiple errors each time we try to
 	 * apply it to a dataset.
 	 */
-	if (zfs_prop_validate(cb.cb_prop, cb.cb_value, NULL) != 0)
+	if (zfs_prop_validate(g_zfs, cb.cb_prop, cb.cb_value, NULL) != 0)
 		return (1);
 
-	return (zfs_for_each(argc - 2, argv + 2, FALSE,
+	return (zfs_for_each(argc - 2, argv + 2, B_FALSE,
 	    ZFS_TYPE_ANY, set_callback, &cb));
 }
 
@@ -1675,20 +1722,20 @@ zfs_do_snapshot(int argc, char **argv)
 	if (argc > 1 && argv[1][0] == '-') {
 		(void) fprintf(stderr, gettext("invalid option '%c'\n"),
 		    argv[1][1]);
-		usage(FALSE);
+		usage(B_FALSE);
 	}
 
 	/* check number of arguments */
 	if (argc < 2) {
 		(void) fprintf(stderr, gettext("missing snapshot argument\n"));
-		usage(FALSE);
+		usage(B_FALSE);
 	}
 	if (argc > 2) {
 		(void) fprintf(stderr, gettext("too many arguments\n"));
-		usage(FALSE);
+		usage(B_FALSE);
 	}
 
-	return (zfs_snapshot(argv[1]) != 0);
+	return (zfs_snapshot(g_zfs, argv[1]) != 0);
 }
 
 /*
@@ -1712,12 +1759,12 @@ zfs_do_send(int argc, char **argv)
 		case ':':
 			(void) fprintf(stderr, gettext("missing argument for "
 			    "'%c' option\n"), optopt);
-			usage(FALSE);
+			usage(B_FALSE);
 			break;
 		case '?':
 			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
 			    optopt);
-			usage(FALSE);
+			usage(B_FALSE);
 		}
 	}
 
@@ -1727,11 +1774,11 @@ zfs_do_send(int argc, char **argv)
 	/* check number of arguments */
 	if (argc < 1) {
 		(void) fprintf(stderr, gettext("missing snapshot argument\n"));
-		usage(FALSE);
+		usage(B_FALSE);
 	}
 	if (argc > 1) {
 		(void) fprintf(stderr, gettext("too many arguments\n"));
-		usage(FALSE);
+		usage(B_FALSE);
 	}
 
 	if (isatty(STDOUT_FILENO)) {
@@ -1743,10 +1790,11 @@ zfs_do_send(int argc, char **argv)
 	}
 
 	if (fromname) {
-		if ((zhp_from = zfs_open(fromname, ZFS_TYPE_SNAPSHOT)) == NULL)
+		if ((zhp_from = zfs_open(g_zfs, fromname,
+		    ZFS_TYPE_SNAPSHOT)) == NULL)
 			return (1);
 	}
-	if ((zhp_to = zfs_open(argv[0], ZFS_TYPE_SNAPSHOT)) == NULL)
+	if ((zhp_to = zfs_open(g_zfs, argv[0], ZFS_TYPE_SNAPSHOT)) == NULL)
 		return (1);
 
 	err = zfs_send(zhp_to, zhp_from);
@@ -1767,31 +1815,31 @@ static int
 zfs_do_receive(int argc, char **argv)
 {
 	int c, err;
-	int isprefix = FALSE;
-	int dryrun = FALSE;
-	int verbose = FALSE;
+	boolean_t isprefix = B_FALSE;
+	boolean_t dryrun = B_FALSE;
+	boolean_t verbose = B_FALSE;
 
 	/* check options */
 	while ((c = getopt(argc, argv, ":dnv")) != -1) {
 		switch (c) {
 		case 'd':
-			isprefix = TRUE;
+			isprefix = B_TRUE;
 			break;
 		case 'n':
-			dryrun = TRUE;
+			dryrun = B_TRUE;
 			break;
 		case 'v':
-			verbose = TRUE;
+			verbose = B_TRUE;
 			break;
 		case ':':
 			(void) fprintf(stderr, gettext("missing argument for "
 			    "'%c' option\n"), optopt);
-			usage(FALSE);
+			usage(B_FALSE);
 			break;
 		case '?':
 			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
 			    optopt);
-			usage(FALSE);
+			usage(B_FALSE);
 		}
 	}
 
@@ -1801,11 +1849,11 @@ zfs_do_receive(int argc, char **argv)
 	/* check number of arguments */
 	if (argc < 1) {
 		(void) fprintf(stderr, gettext("missing snapshot argument\n"));
-		usage(FALSE);
+		usage(B_FALSE);
 	}
 	if (argc > 1) {
 		(void) fprintf(stderr, gettext("too many arguments\n"));
-		usage(FALSE);
+		usage(B_FALSE);
 	}
 
 	if (isatty(STDIN_FILENO)) {
@@ -1816,7 +1864,7 @@ zfs_do_receive(int argc, char **argv)
 		return (1);
 	}
 
-	err = zfs_receive(argv[0], isprefix, verbose, dryrun);
+	err = zfs_receive(g_zfs, argv[0], isprefix, verbose, dryrun);
 	return (err != 0);
 }
 
@@ -1868,7 +1916,7 @@ get_all_filesystems(zfs_handle_t ***fslist, size_t *count)
 {
 	get_all_cbdata_t cb = { 0 };
 
-	(void) zfs_iter_root(get_one_filesystem, &cb);
+	(void) zfs_iter_root(g_zfs, get_one_filesystem, &cb);
 
 	*fslist = cb.cb_handles;
 	*count = cb.cb_used;
@@ -1883,9 +1931,9 @@ mountpoint_compare(const void *a, const void *b)
 	char mountb[MAXPATHLEN];
 
 	verify(zfs_prop_get(*za, ZFS_PROP_MOUNTPOINT, mounta,
-	    sizeof (mounta), NULL, NULL, 0, FALSE) == 0);
+	    sizeof (mounta), NULL, NULL, 0, B_FALSE) == 0);
 	verify(zfs_prop_get(*zb, ZFS_PROP_MOUNTPOINT, mountb,
-	    sizeof (mountb), NULL, NULL, 0, FALSE) == 0);
+	    sizeof (mountb), NULL, NULL, 0, B_FALSE) == 0);
 
 	return (strcmp(mounta, mountb));
 }
@@ -1953,9 +2001,9 @@ share_mount_callback(zfs_handle_t *zhp, void *data)
 	 * with a legacy mountpoint, or those with legacy share options.
 	 */
 	verify(zfs_prop_get(zhp, ZFS_PROP_MOUNTPOINT, mountpoint,
-	    sizeof (mountpoint), NULL, NULL, 0, FALSE) == 0);
+	    sizeof (mountpoint), NULL, NULL, 0, B_FALSE) == 0);
 	verify(zfs_prop_get(zhp, ZFS_PROP_SHARENFS, shareopts,
-	    sizeof (shareopts), NULL, NULL, 0, FALSE) == 0);
+	    sizeof (shareopts), NULL, NULL, 0, B_FALSE) == 0);
 
 	if (cbp->cb_type == OP_SHARE) {
 		if (strcmp(shareopts, "off") == 0) {
@@ -2080,12 +2128,12 @@ share_or_mount(int type, int argc, char **argv)
 		case ':':
 			(void) fprintf(stderr, gettext("missing argument for "
 			    "'%c' option\n"), optopt);
-			usage(FALSE);
+			usage(B_FALSE);
 			break;
 		case '?':
 			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
 			    optopt);
-			usage(FALSE);
+			usage(B_FALSE);
 		}
 	}
 
@@ -2099,7 +2147,7 @@ share_or_mount(int type, int argc, char **argv)
 
 		if (argc != 0) {
 			(void) fprintf(stderr, gettext("too many arguments\n"));
-			usage(FALSE);
+			usage(B_FALSE);
 		}
 
 		get_all_filesystems(&fslist, &count);
@@ -2124,7 +2172,7 @@ share_or_mount(int type, int argc, char **argv)
 		if (type == OP_SHARE) {
 			(void) fprintf(stderr, gettext("missing filesystem "
 			    "argument\n"));
-			usage(FALSE);
+			usage(B_FALSE);
 		}
 
 		/*
@@ -2149,13 +2197,14 @@ share_or_mount(int type, int argc, char **argv)
 		if (argc > 1) {
 			(void) fprintf(stderr,
 			    gettext("too many arguments\n"));
-			usage(FALSE);
+			usage(B_FALSE);
 		}
 
-		if ((zhp = zfs_open(argv[0], ZFS_TYPE_FILESYSTEM)) == NULL)
+		if ((zhp = zfs_open(g_zfs, argv[0],
+		    ZFS_TYPE_FILESYSTEM)) == NULL)
 			ret = 1;
 		else {
-			cb.cb_explicit = TRUE;
+			cb.cb_explicit = B_TRUE;
 			ret = share_mount_callback(zhp, &cb);
 			zfs_close(zhp);
 		}
@@ -2210,7 +2259,7 @@ unshare_unmount_compare(const void *larg, const void *rarg, void *unused)
  * and unmount it appropriately.
  */
 static int
-unshare_unmount_path(int type, char *path, int flags, int is_manual)
+unshare_unmount_path(int type, char *path, int flags, boolean_t is_manual)
 {
 	zfs_handle_t *zhp;
 	int ret;
@@ -2252,12 +2301,13 @@ unshare_unmount_path(int type, char *path, int flags, int is_manual)
 		return (1);
 	}
 
-	if ((zhp = zfs_open(entry.mnt_special, ZFS_TYPE_FILESYSTEM)) == NULL)
+	if ((zhp = zfs_open(g_zfs, entry.mnt_special,
+	    ZFS_TYPE_FILESYSTEM)) == NULL)
 		return (1);
 
 	verify(zfs_prop_get(zhp, type == OP_SHARE ?
 		ZFS_PROP_SHARENFS : ZFS_PROP_MOUNTPOINT, property,
-		sizeof (property), NULL, NULL, 0, FALSE) == 0);
+		sizeof (property), NULL, NULL, 0, B_FALSE) == 0);
 
 	if (type == OP_SHARE) {
 		if (strcmp(property, "off") == 0) {
@@ -2318,7 +2368,7 @@ unshare_unmount(int type, int argc, char **argv)
 		case '?':
 			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
 			    optopt);
-			usage(FALSE);
+			usage(B_FALSE);
 		}
 	}
 
@@ -2329,7 +2379,7 @@ unshare_unmount(int type, int argc, char **argv)
 	if (do_all) {
 		if (argc != 0) {
 			(void) fprintf(stderr, gettext("too many arguments\n"));
-			usage(FALSE);
+			usage(B_FALSE);
 		}
 	} else if (argc != 1) {
 		if (argc == 0)
@@ -2338,7 +2388,7 @@ unshare_unmount(int type, int argc, char **argv)
 		else
 			(void) fprintf(stderr,
 			    gettext("too many arguments\n"));
-		usage(FALSE);
+		usage(B_FALSE);
 	}
 
 	if (do_all) {
@@ -2390,7 +2440,7 @@ unshare_unmount(int type, int argc, char **argv)
 			if (strchr(entry.mnt_special, '@') != NULL)
 				continue;
 
-			if ((zhp = zfs_open(entry.mnt_special,
+			if ((zhp = zfs_open(g_zfs, entry.mnt_special,
 			    ZFS_TYPE_FILESYSTEM)) == NULL) {
 				ret = 1;
 				continue;
@@ -2399,7 +2449,7 @@ unshare_unmount(int type, int argc, char **argv)
 			verify(zfs_prop_get(zhp, type == OP_SHARE ?
 			    ZFS_PROP_SHARENFS : ZFS_PROP_MOUNTPOINT,
 			    property, sizeof (property), NULL, NULL,
-			    0, FALSE) == 0);
+			    0, B_FALSE) == 0);
 
 			/* Ignore legacy mounts and shares */
 			if ((type == OP_SHARE &&
@@ -2476,14 +2526,15 @@ unshare_unmount(int type, int argc, char **argv)
 		 */
 		if (argv[0][0] == '/')
 			return (unshare_unmount_path(type, argv[0],
-				flags, FALSE));
+				flags, B_FALSE));
 
-		if ((zhp = zfs_open(argv[0], ZFS_TYPE_FILESYSTEM)) == NULL)
+		if ((zhp = zfs_open(g_zfs, argv[0],
+		    ZFS_TYPE_FILESYSTEM)) == NULL)
 			return (1);
 
 		verify(zfs_prop_get(zhp, type == OP_SHARE ?
 		    ZFS_PROP_SHARENFS : ZFS_PROP_MOUNTPOINT, property,
-		    sizeof (property), NULL, NULL, 0, FALSE) == 0);
+		    sizeof (property), NULL, NULL, 0, B_FALSE) == 0);
 
 		switch (type) {
 		case OP_SHARE:
@@ -2581,7 +2632,7 @@ manual_mount(int argc, char **argv)
 		case ':':
 			(void) fprintf(stderr, gettext("missing argument for "
 			    "'%c' option\n"), optopt);
-			usage(FALSE);
+			usage(B_FALSE);
 			break;
 		case '?':
 			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
@@ -2613,11 +2664,11 @@ manual_mount(int argc, char **argv)
 	path = argv[1];
 
 	/* try to open the dataset */
-	if ((zhp = zfs_open(dataset, ZFS_TYPE_FILESYSTEM)) == NULL)
+	if ((zhp = zfs_open(g_zfs, dataset, ZFS_TYPE_FILESYSTEM)) == NULL)
 		return (1);
 
 	(void) zfs_prop_get(zhp, ZFS_PROP_MOUNTPOINT, mountpoint,
-	    sizeof (mountpoint), NULL, NULL, 0, FALSE);
+	    sizeof (mountpoint), NULL, NULL, 0, B_FALSE);
 
 	/* check for legacy mountpoint and complain appropriately */
 	ret = 0;
@@ -2683,7 +2734,7 @@ manual_unmount(int argc, char **argv)
 		return (2);
 	}
 
-	return (unshare_unmount_path(OP_MOUNT, argv[0], flags, TRUE));
+	return (unshare_unmount_path(OP_MOUNT, argv[0], flags, B_TRUE));
 }
 
 static int
@@ -2702,9 +2753,9 @@ volcheck(zpool_handle_t *zhp, void *data)
  * links, depending on the value of 'isinit'.
  */
 static int
-do_volcheck(int isinit)
+do_volcheck(boolean_t isinit)
 {
-	return (zpool_iter(volcheck, (void *)isinit) ? 1 : 0);
+	return (zpool_iter(g_zfs, volcheck, (void *)isinit) ? 1 : 0);
 }
 
 int
@@ -2720,6 +2771,14 @@ main(int argc, char **argv)
 
 	opterr = 0;
 
+	if ((g_zfs = libzfs_init()) == NULL) {
+		(void) fprintf(stderr, gettext("internal error: failed to "
+		    "initialize ZFS library\n"));
+		return (1);
+	}
+
+	libzfs_print_on_error(g_zfs, B_TRUE);
+
 	if ((mnttab_file = fopen(MNTTAB, "r")) == NULL) {
 		(void) fprintf(stderr, gettext("internal error: unable to "
 		    "open %s\n"), MNTTAB);
@@ -2741,7 +2800,7 @@ main(int argc, char **argv)
 		 */
 		if (argc < 2) {
 			(void) fprintf(stderr, gettext("missing command\n"));
-			usage(FALSE);
+			usage(B_FALSE);
 		}
 
 		cmdname = argv[1];
@@ -2762,16 +2821,16 @@ main(int argc, char **argv)
 		 * Special case '-?'
 		 */
 		if (strcmp(cmdname, "-?") == 0)
-			usage(TRUE);
+			usage(B_TRUE);
 
 		/*
 		 * 'volinit' and 'volfini' do not appear in the usage message,
 		 * so we have to special case them here.
 		 */
 		if (strcmp(cmdname, "volinit") == 0)
-			return (do_volcheck(TRUE));
+			return (do_volcheck(B_TRUE));
 		else if (strcmp(cmdname, "volfini") == 0)
-			return (do_volcheck(FALSE));
+			return (do_volcheck(B_FALSE));
 
 		/*
 		 * Run the appropriate command.
@@ -2790,12 +2849,14 @@ main(int argc, char **argv)
 		if (i == NCOMMAND) {
 			(void) fprintf(stderr, gettext("unrecognized "
 			    "command '%s'\n"), cmdname);
-			usage(FALSE);
+			usage(B_FALSE);
 		}
 	}
 
 	(void) fclose(mnttab_file);
 
+	libzfs_fini(g_zfs);
+
 	/*
 	 * The 'ZFS_ABORT' environment variable causes us to dump core on exit
 	 * for the purposes of running ::findleaks.
diff --git a/usr/src/cmd/zfs/zfs_util.h b/usr/src/cmd/zfs/zfs_util.h
index 5b2fcfa9f3..c7f2f16186 100644
--- a/usr/src/cmd/zfs/zfs_util.h
+++ b/usr/src/cmd/zfs/zfs_util.h
@@ -2,9 +2,8 @@
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -29,11 +28,14 @@
 
 #pragma ident	"%Z%%M%	%I%	%E% SMI"
 
+#include <libzfs.h>
+
 #ifdef	__cplusplus
 extern "C" {
 #endif
 
 void * safe_malloc(size_t size);
+libzfs_handle_t *g_zfs;
 
 #ifdef	__cplusplus
 }
diff --git a/usr/src/cmd/zinject/Makefile.com b/usr/src/cmd/zinject/Makefile.com
index 14651a366c..c1ac4ac922 100644
--- a/usr/src/cmd/zinject/Makefile.com
+++ b/usr/src/cmd/zinject/Makefile.com
@@ -34,7 +34,7 @@ include ../../Makefile.cmd
 INCS +=	-I../../../lib/libzpool/common
 INCS +=	-I../../../uts/common/fs/zfs
 
-LDLIBS += -lzpool -lzfs
+LDLIBS += -lzpool -lzfs -lnvpair
 
 C99MODE=	-xc99=%all
 C99LMODE=	-Xc99=%all
diff --git a/usr/src/cmd/zinject/translate.c b/usr/src/cmd/zinject/translate.c
index 882b230930..b4f6693aa1 100644
--- a/usr/src/cmd/zinject/translate.c
+++ b/usr/src/cmd/zinject/translate.c
@@ -436,22 +436,28 @@ translate_device(const char *pool, const char *device, zinject_record_t *record)
 {
 	char *end;
 	zpool_handle_t *zhp;
+	nvlist_t *tgt;
+	boolean_t isspare;
 
 	/*
 	 * Given a device name or GUID, create an appropriate injection record
 	 * with zi_guid set.
 	 */
-	if ((zhp = zpool_open(pool)) == NULL)
+	if ((zhp = zpool_open(g_zfs, pool)) == NULL)
 		return (-1);
 
 	record->zi_guid = strtoull(device, &end, 16);
-	if (record->zi_guid == 0 || *end != '\0')
-		record->zi_guid = zpool_vdev_to_guid(zhp, device);
+	if (record->zi_guid == 0 || *end != '\0') {
+		tgt = zpool_find_vdev(zhp, device, &isspare);
 
-	if (record->zi_guid == 0) {
-		(void) fprintf(stderr, "cannot find device '%s' in pool '%s'\n",
-		    device, pool);
-		return (-1);
+		if (tgt == NULL) {
+			(void) fprintf(stderr, "cannot find device '%s' in "
+			    "pool '%s'\n", device, pool);
+			return (-1);
+		}
+
+		verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID,
+		    &record->zi_guid) == 0);
 	}
 
 	return (0);
diff --git a/usr/src/cmd/zinject/zinject.c b/usr/src/cmd/zinject/zinject.c
index b584fb0de5..02fc6a16ef 100644
--- a/usr/src/cmd/zinject/zinject.c
+++ b/usr/src/cmd/zinject/zinject.c
@@ -151,6 +151,7 @@
 
 #include "zinject.h"
 
+libzfs_handle_t *g_zfs;
 int zfs_fd;
 
 #define	ECKSUM	EBADE
@@ -479,6 +480,14 @@ main(int argc, char **argv)
 	int ret;
 	int flags = 0;
 
+	if ((g_zfs = libzfs_init()) == NULL) {
+		(void) fprintf(stderr, "internal error: failed to "
+		    "initialize ZFS library\n");
+		return (1);
+	}
+
+	libzfs_print_on_error(g_zfs, B_TRUE);
+
 	if ((zfs_fd = open(ZFS_DEV, O_RDWR)) < 0) {
 		(void) fprintf(stderr, "failed to open ZFS device\n");
 		return (1);
@@ -721,7 +730,7 @@ main(int argc, char **argv)
 	 * time we access the pool.
 	 */
 	if (dataset[0] != '\0' && domount) {
-		if ((zhp = zfs_open(dataset, ZFS_TYPE_ANY)) == NULL)
+		if ((zhp = zfs_open(g_zfs, dataset, ZFS_TYPE_ANY)) == NULL)
 			return (1);
 
 		if (zfs_unmount(zhp, NULL, 0) != 0)
@@ -735,5 +744,7 @@ main(int argc, char **argv)
 	if (dataset[0] != '\0' && domount)
 		ret = (zfs_mount(zhp, NULL, 0) != 0);
 
+	libzfs_fini(g_zfs);
+
 	return (ret);
 }
diff --git a/usr/src/cmd/zinject/zinject.h b/usr/src/cmd/zinject/zinject.h
index bdbc2454c4..8086c4bc80 100644
--- a/usr/src/cmd/zinject/zinject.h
+++ b/usr/src/cmd/zinject/zinject.h
@@ -57,6 +57,8 @@ int translate_device(const char *pool, const char *device,
     zinject_record_t *record);
 void usage(void);
 
+extern libzfs_handle_t *g_zfs;
+
 #ifdef	__cplusplus
 }
 #endif
diff --git a/usr/src/cmd/zoneadm/zfs.c b/usr/src/cmd/zoneadm/zfs.c
index 98fa5a44b5..eb9822781a 100644
--- a/usr/src/cmd/zoneadm/zfs.c
+++ b/usr/src/cmd/zoneadm/zfs.c
@@ -47,7 +47,7 @@
 
 #include "zoneadm.h"
 
-static const char *current_dataset;
+libzfs_handle_t *g_zfs;
 
 typedef struct zfs_mount_data {
 	char		*match_name;
@@ -61,41 +61,6 @@ typedef struct zfs_snapshot_data {
 } zfs_snapshot_data_t;
 
 /*
- * ZFS error handler to do nothing - do not print the libzfs error messages.
- */
-/* ARGSUSED */
-static void
-noop_err_handler(const char *fmt, va_list ap)
-{
-}
-
-/*
- * Custom error handler for errors incurred as part of verifying datasets.  We
- * want to trim off the leading 'cannot open ...' to create a better error
- * message.  The only other way this can fail is if we fail to set the 'zoned'
- * property.  In this case we just pass the error on verbatim.
- */
-static void
-err_handler(const char *fmt, va_list ap)
-{
-	char buf[1024];
-
-	(void) vsnprintf(buf, sizeof (buf), fmt, ap);
-
-	if (strncmp(gettext("cannot open "), buf,
-	    strlen(gettext("cannot open "))) == 0)
-		/*
-		 * TRANSLATION_NOTE
-		 * zfs and dataset are literals that should not be translated.
-		 */
-		(void) fprintf(stderr, gettext("could not verify zfs "
-		    "dataset %s%s\n"), current_dataset, strchr(buf, ':'));
-	else
-		(void) fprintf(stderr, gettext("could not verify zfs dataset "
-		    "%s: %s\n"), current_dataset, buf);
-}
-
-/*
  * A ZFS file system iterator call-back function which is used to validate
  * datasets imported into the zone.
  */
@@ -141,7 +106,7 @@ match_mountpoint(zfs_handle_t *zhp, void *data)
 
 	cbp = (zfs_mount_data_t *)data;
 	if (zfs_prop_get(zhp, ZFS_PROP_MOUNTPOINT, mp, sizeof (mp), NULL, NULL,
-	    0, FALSE) == 0 && strcmp(mp, cbp->match_name) == 0) {
+	    0, B_FALSE) == 0 && strcmp(mp, cbp->match_name) == 0) {
 		cbp->match_handle = zhp;
 		return (1);
 	}
@@ -161,7 +126,7 @@ mount2zhandle(char *mountpoint)
 
 	cb.match_name = mountpoint;
 	cb.match_handle = NULL;
-	(void) zfs_iter_root(match_mountpoint, &cb);
+	(void) zfs_iter_root(g_zfs, match_mountpoint, &cb);
 	return (cb.match_handle);
 }
 
@@ -331,7 +296,7 @@ take_snapshot(char *source_zone, zfs_handle_t *zhp, char *snapshot_name,
 
 	if (pre_snapshot(source_zone) != Z_OK)
 		return (Z_ERR);
-	res = zfs_snapshot(snapshot_name);
+	res = zfs_snapshot(g_zfs, snapshot_name);
 	if (post_snapshot(source_zone) != Z_OK)
 		return (Z_ERR);
 
@@ -443,7 +408,7 @@ clone_snap(char *snapshot_name, char *zonepath)
 	zfs_handle_t	*zhp;
 	zfs_handle_t	*clone;
 
-	if ((zhp = zfs_open(snapshot_name, ZFS_TYPE_SNAPSHOT)) == NULL)
+	if ((zhp = zfs_open(g_zfs, snapshot_name, ZFS_TYPE_SNAPSHOT)) == NULL)
 		return (Z_NO_ENTRY);
 
 	(void) printf(gettext("Cloning snapshot %s\n"), snapshot_name);
@@ -454,7 +419,7 @@ clone_snap(char *snapshot_name, char *zonepath)
 		return (Z_ERR);
 
 	/* create the mountpoint if necessary */
-	if ((clone = zfs_open(zonepath, ZFS_TYPE_ANY)) == NULL)
+	if ((clone = zfs_open(g_zfs, zonepath, ZFS_TYPE_ANY)) == NULL)
 		return (Z_ERR);
 
 	/*
@@ -574,14 +539,14 @@ snap2path(char *snap_name, char *path, int len)
 
 	/* Get the file system name from the snap_name. */
 	*p = '\0';
-	zhp = zfs_open(snap_name, ZFS_TYPE_ANY);
+	zhp = zfs_open(g_zfs, snap_name, ZFS_TYPE_ANY);
 	*p = '@';
 	if (zhp == NULL)
 		return (Z_ERR);
 
 	/* Get the file system mount point. */
 	if (zfs_prop_get(zhp, ZFS_PROP_MOUNTPOINT, mp, sizeof (mp), NULL, NULL,
-	    0, FALSE) != 0) {
+	    0, B_FALSE) != 0) {
 		zfs_close(zhp);
 		return (Z_ERR);
 	}
@@ -739,15 +704,16 @@ create_zfs_zonepath(char *zonepath)
 	if (path2name(zonepath, zfs_name, sizeof (zfs_name)) != Z_OK)
 		return;
 
-	zfs_set_error_handler(noop_err_handler);
-
-	if (zfs_create(zfs_name, ZFS_TYPE_FILESYSTEM, NULL, NULL) != 0 ||
-	    (zhp = zfs_open(zfs_name, ZFS_TYPE_ANY)) == NULL) {
-		zfs_set_error_handler(NULL);
+	if (zfs_create(g_zfs, zfs_name, ZFS_TYPE_FILESYSTEM, NULL, NULL) != 0 ||
+	    (zhp = zfs_open(g_zfs, zfs_name, ZFS_TYPE_ANY)) == NULL) {
+		(void) fprintf(stderr, gettext("cannot create ZFS dataset %s: "
+		    "%s\n"), zfs_name, libzfs_error_description(g_zfs));
 		return;
 	}
 
 	if (zfs_mount(zhp, NULL, 0) != 0) {
+		(void) fprintf(stderr, gettext("cannot mount ZFS dataset %s: "
+		    "%s\n"), zfs_name, libzfs_error_description(g_zfs));
 		(void) zfs_destroy(zhp);
 	} else if (zfs_prop_set(zhp, ZFS_PROP_SHARENFS, "off") != 0) {
 		(void) fprintf(stderr, gettext("file system %s successfully "
@@ -765,7 +731,6 @@ create_zfs_zonepath(char *zonepath)
 		}
 	}
 
-	zfs_set_error_handler(NULL);
 	zfs_close(zhp);
 }
 
@@ -782,12 +747,8 @@ destroy_zfs(char *zonepath)
 	boolean_t	is_clone = B_FALSE;
 	char		origin[ZFS_MAXPROPLEN];
 
-	zfs_set_error_handler(noop_err_handler);
-
-	if ((zhp = mount2zhandle(zonepath)) == NULL) {
-		zfs_set_error_handler(NULL);
+	if ((zhp = mount2zhandle(zonepath)) == NULL)
 		return (Z_ERR);
-	}
 
 	/*
 	 * We can't destroy the file system if it has dependents.
@@ -795,7 +756,6 @@ destroy_zfs(char *zonepath)
 	if (zfs_iter_dependents(zhp, has_dependent, NULL) != 0 ||
 	    zfs_unmount(zhp, NULL, 0) != 0) {
 		zfs_close(zhp);
-		zfs_set_error_handler(NULL);
 		return (Z_ERR);
 	}
 
@@ -804,10 +764,9 @@ destroy_zfs(char *zonepath)
 	 * to destroy that as well.
 	 */
 	if (zfs_prop_get(zhp, ZFS_PROP_ORIGIN, origin, sizeof (origin), NULL,
-	    NULL, 0, FALSE) == 0)
+	    NULL, 0, B_FALSE) == 0)
 		is_clone = B_TRUE;
 
-	zfs_set_error_handler(NULL);
 	if (zfs_destroy(zhp) != 0) {
 		/*
 		 * If the destroy fails for some reason, try to remount
@@ -818,7 +777,6 @@ destroy_zfs(char *zonepath)
 		zfs_close(zhp);
 		return (Z_ERR);
 	}
-	zfs_set_error_handler(noop_err_handler);
 
 	(void) printf(gettext("The ZFS file system for this zone has been "
 	    "destroyed.\n"));
@@ -829,17 +787,16 @@ destroy_zfs(char *zonepath)
 		/*
 		 * Try to clean up the snapshot that the clone was taken from.
 		 */
-		if ((ohp = zfs_open(origin, ZFS_TYPE_SNAPSHOT)) != NULL) {
+		if ((ohp = zfs_open(g_zfs, origin,
+		    ZFS_TYPE_SNAPSHOT)) != NULL) {
 			if (zfs_iter_dependents(ohp, has_dependent, NULL)
-			    == 0 && zfs_unmount(ohp, NULL, 0) == 0) {
+			    == 0 && zfs_unmount(ohp, NULL, 0) == 0)
 				(void) zfs_destroy(ohp);
-			}
 			zfs_close(ohp);
 		}
 	}
 
 	zfs_close(zhp);
-	zfs_set_error_handler(NULL);
 	return (Z_OK);
 }
 
@@ -889,12 +846,8 @@ move_zfs(char *zonepath, char *new_zonepath)
 	int		ret = Z_ERR;
 	zfs_handle_t	*zhp;
 
-	zfs_set_error_handler(noop_err_handler);
-
-	if ((zhp = mount2zhandle(zonepath)) == NULL) {
-		zfs_set_error_handler(NULL);
+	if ((zhp = mount2zhandle(zonepath)) == NULL)
 		return (Z_ERR);
-	}
 
 	if (zfs_prop_set(zhp, ZFS_PROP_MOUNTPOINT, new_zonepath) == 0) {
 		/*
@@ -906,7 +859,6 @@ move_zfs(char *zonepath, char *new_zonepath)
 	}
 
 	zfs_close(zhp);
-	zfs_set_error_handler(NULL);
 
 	return (ret);
 }
@@ -940,14 +892,13 @@ verify_datasets(zone_dochandle_t handle)
 		return (Z_ERR);
 	}
 
-	zfs_set_error_handler(err_handler);
-
 	while (zonecfg_getdsent(handle, &dstab) == Z_OK) {
 
-		current_dataset = dstab.zone_dataset_name;
-
-		if ((zhp = zfs_open(dstab.zone_dataset_name,
+		if ((zhp = zfs_open(g_zfs, dstab.zone_dataset_name,
 		    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME)) == NULL) {
+			(void) fprintf(stderr, gettext("could not verify zfs "
+			    "dataset %s: %s\n"), dstab.zone_dataset_name,
+			    libzfs_error_description(g_zfs));
 			return_code = Z_ERR;
 			continue;
 		}
@@ -978,7 +929,6 @@ verify_datasets(zone_dochandle_t handle)
 		zfs_close(zhp);
 	}
 	(void) zonecfg_enddsent(handle);
-	zfs_set_error_handler(NULL);
 
 	return (return_code);
 }
@@ -993,13 +943,11 @@ verify_fs_zfs(struct zone_fstab *fstab)
 	zfs_handle_t *zhp;
 	char propbuf[ZFS_MAXPROPLEN];
 
-	zfs_set_error_handler(noop_err_handler);
-
-	if ((zhp = zfs_open(fstab->zone_fs_special, ZFS_TYPE_ANY)) == NULL) {
+	if ((zhp = zfs_open(g_zfs, fstab->zone_fs_special,
+	    ZFS_TYPE_ANY)) == NULL) {
 		(void) fprintf(stderr, gettext("could not verify fs %s: "
 		    "could not access zfs dataset '%s'\n"),
 		    fstab->zone_fs_dir, fstab->zone_fs_special);
-		zfs_set_error_handler(NULL);
 		return (Z_ERR);
 	}
 
@@ -1008,7 +956,6 @@ verify_fs_zfs(struct zone_fstab *fstab)
 		    "'%s' is not a file system\n"),
 		    fstab->zone_fs_dir, fstab->zone_fs_special);
 		zfs_close(zhp);
-		zfs_set_error_handler(NULL);
 		return (Z_ERR);
 	}
 
@@ -1018,11 +965,21 @@ verify_fs_zfs(struct zone_fstab *fstab)
 		    "zfs '%s' mountpoint is not \"legacy\"\n"),
 		    fstab->zone_fs_dir, fstab->zone_fs_special);
 		zfs_close(zhp);
-		zfs_set_error_handler(NULL);
 		return (Z_ERR);
 	}
 
 	zfs_close(zhp);
-	zfs_set_error_handler(NULL);
+	return (Z_OK);
+}
+
+int
+init_zfs(void)
+{
+	if ((g_zfs = libzfs_init()) == NULL) {
+		(void) fprintf(stderr, gettext("failed to initialize ZFS "
+		    "library\n"));
+		return (Z_ERR);
+	}
+
 	return (Z_OK);
 }
diff --git a/usr/src/cmd/zoneadm/zoneadm.c b/usr/src/cmd/zoneadm/zoneadm.c
index e25895736c..50c3b1ecd7 100644
--- a/usr/src/cmd/zoneadm/zoneadm.c
+++ b/usr/src/cmd/zoneadm/zoneadm.c
@@ -4433,6 +4433,9 @@ main(int argc, char **argv)
 		exit(Z_ERR);
 	}
 
+	if (init_zfs() != Z_OK)
+		exit(Z_ERR);
+
 	while ((arg = getopt(argc, argv, "?z:R:")) != EOF) {
 		switch (arg) {
 		case '?':
diff --git a/usr/src/cmd/zoneadm/zoneadm.h b/usr/src/cmd/zoneadm/zoneadm.h
index 161d7cee18..d6aa67798d 100644
--- a/usr/src/cmd/zoneadm/zoneadm.h
+++ b/usr/src/cmd/zoneadm/zoneadm.h
@@ -81,6 +81,7 @@ extern boolean_t is_zonepath_zfs(char *zonepath);
 extern int move_zfs(char *zonepath, char *new_zonepath);
 extern int verify_datasets(zone_dochandle_t handle);
 extern int verify_fs_zfs(struct zone_fstab *fstab);
+extern int init_zfs(void);
 
 /*
  * sw_cmp.c
diff --git a/usr/src/cmd/zoneadmd/vplat.c b/usr/src/cmd/zoneadmd/vplat.c
index d629c7a9c4..84b06afb17 100644
--- a/usr/src/cmd/zoneadmd/vplat.c
+++ b/usr/src/cmd/zoneadmd/vplat.c
@@ -2631,21 +2631,13 @@ out:
 	return (error);
 }
 
-/* ARGSUSED */
-static void
-zfs_error_handler(const char *fmt, va_list ap)
-{
-	/*
-	 * Do nothing - we interpret the failures from each libzfs call below.
-	 */
-}
-
 static int
 validate_datasets(zlog_t *zlogp)
 {
 	zone_dochandle_t handle;
 	struct zone_dstab dstab;
 	zfs_handle_t *zhp;
+	libzfs_handle_t *hdl;
 
 	if ((handle = zonecfg_init_handle()) == NULL) {
 		zerror(zlogp, B_TRUE, "getting zone configuration handle");
@@ -2663,15 +2655,20 @@ validate_datasets(zlog_t *zlogp)
 		return (-1);
 	}
 
-	zfs_set_error_handler(zfs_error_handler);
+	if ((hdl = libzfs_init()) == NULL) {
+		zerror(zlogp, B_FALSE, "opening ZFS library");
+		zonecfg_fini_handle(handle);
+		return (-1);
+	}
 
 	while (zonecfg_getdsent(handle, &dstab) == Z_OK) {
 
-		if ((zhp = zfs_open(dstab.zone_dataset_name,
+		if ((zhp = zfs_open(hdl, dstab.zone_dataset_name,
 		    ZFS_TYPE_FILESYSTEM)) == NULL) {
 			zerror(zlogp, B_FALSE, "cannot open ZFS dataset '%s'",
 			    dstab.zone_dataset_name);
 			zonecfg_fini_handle(handle);
+			libzfs_fini(hdl);
 			return (-1);
 		}
 
@@ -2686,6 +2683,7 @@ validate_datasets(zlog_t *zlogp)
 			    dstab.zone_dataset_name);
 			zonecfg_fini_handle(handle);
 			zfs_close(zhp);
+			libzfs_fini(hdl);
 			return (-1);
 		}
 
@@ -2694,6 +2692,7 @@ validate_datasets(zlog_t *zlogp)
 	(void) zonecfg_enddsent(handle);
 
 	zonecfg_fini_handle(handle);
+	libzfs_fini(hdl);
 
 	return (0);
 }
diff --git a/usr/src/cmd/zpool/zpool_dataset.c b/usr/src/cmd/zpool/zpool_dataset.c
index d6cdde87bd..0b4c6a15fe 100644
--- a/usr/src/cmd/zpool/zpool_dataset.c
+++ b/usr/src/cmd/zpool/zpool_dataset.c
@@ -2,9 +2,8 @@
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -59,6 +58,8 @@ do_unmount(zfs_handle_t *zfsp, void *data)
 	if (zfs_unmount(zfsp, NULL, cbp->cb_force ? MS_FORCE : 0) != 0)
 		cbp->cb_failed = 1;
 
+	zfs_close(zfsp);
+
 	return (0);
 }
 
@@ -78,7 +79,8 @@ unmount_datasets(zpool_handle_t *zhp, int force)
 	if (zpool_get_state(zhp) == POOL_STATE_UNAVAIL)
 		return (0);
 
-	if ((zfsp = zfs_open(zpool_get_name(zhp), ZFS_TYPE_FILESYSTEM)) == NULL)
+	if ((zfsp = zfs_open(g_zfs, zpool_get_name(zhp),
+	    ZFS_TYPE_FILESYSTEM)) == NULL)
 		return (-1);
 
 	cb.cb_force = force;
@@ -89,12 +91,8 @@ unmount_datasets(zpool_handle_t *zhp, int force)
 		return (-1);
 	}
 
-	if (do_unmount(zfsp, &cb) != 0 || cb.cb_failed != 0) {
-		zfs_close(zfsp);
+	if (do_unmount(zfsp, &cb) != 0 || cb.cb_failed != 0)
 		return (-1);
-	}
-
-	zfs_close(zfsp);
 
 	return (0);
 }
@@ -108,8 +106,10 @@ do_mount_share(zfs_handle_t *zfsp, void *data)
 	cbdata_t *cbp = data;
 	int ret;
 
-	if (zfs_get_type(zfsp) != ZFS_TYPE_FILESYSTEM)
+	if (zfs_get_type(zfsp) != ZFS_TYPE_FILESYSTEM) {
+		zfs_close(zfsp);
 		return (0);
+	}
 
 	if (zfs_mount(zfsp, cbp->cb_mntopts, 0) != 0)
 		cbp->cb_failed = 1;
@@ -118,6 +118,7 @@ do_mount_share(zfs_handle_t *zfsp, void *data)
 
 	ret = zfs_iter_children(zfsp, do_mount_share, data);
 
+	zfs_close(zfsp);
 	return (ret);
 }
 
@@ -142,15 +143,12 @@ mount_share_datasets(zpool_handle_t *zhp, const char *options)
 	if (zpool_get_state(zhp) == POOL_STATE_UNAVAIL)
 		return (0);
 
-	if ((zfsp = zfs_open(zpool_get_name(zhp), ZFS_TYPE_FILESYSTEM)) == NULL)
+	if ((zfsp = zfs_open(g_zfs, zpool_get_name(zhp),
+	    ZFS_TYPE_FILESYSTEM)) == NULL)
 		return (-1);
 
-	if (do_mount_share(zfsp, &cb) != 0 || cb.cb_failed != 0) {
-		zfs_close(zfsp);
+	if (do_mount_share(zfsp, &cb) != 0 || cb.cb_failed != 0)
 		return (-1);
-	}
-
-	zfs_close(zfsp);
 
 	return (0);
 }
diff --git a/usr/src/cmd/zpool/zpool_iter.c b/usr/src/cmd/zpool/zpool_iter.c
index f99396da81..4a0a9ef162 100644
--- a/usr/src/cmd/zpool/zpool_iter.c
+++ b/usr/src/cmd/zpool/zpool_iter.c
@@ -2,9 +2,8 @@
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -50,7 +49,7 @@ typedef struct zpool_node {
 } zpool_node_t;
 
 struct zpool_list {
-	int		zl_findall;
+	boolean_t	zl_findall;
 	uu_avl_t	*zl_avl;
 	uu_avl_pool_t	*zl_pool;
 };
@@ -114,18 +113,18 @@ pool_list_get(int argc, char **argv, int *err)
 		no_memory();
 
 	if (argc == 0) {
-		(void) zpool_iter(add_pool, zlp);
-		zlp->zl_findall = TRUE;
+		(void) zpool_iter(g_zfs, add_pool, zlp);
+		zlp->zl_findall = B_TRUE;
 	} else {
 		int i;
 
 		for (i = 0; i < argc; i++) {
 			zpool_handle_t *zhp;
 
-			if ((zhp = zpool_open_canfail(argv[i])) != NULL)
+			if ((zhp = zpool_open_canfail(g_zfs, argv[i])) != NULL)
 				(void) add_pool(zhp, zlp);
 			else
-				*err = TRUE;
+				*err = B_TRUE;
 		}
 	}
 
@@ -141,7 +140,7 @@ void
 pool_list_update(zpool_list_t *zlp)
 {
 	if (zlp->zl_findall)
-		(void) zpool_iter(add_pool, zlp);
+		(void) zpool_iter(g_zfs, add_pool, zlp);
 }
 
 /*
@@ -223,7 +222,7 @@ pool_list_count(zpool_list_t *zlp)
  * using the pool_list_* interfaces.
  */
 int
-for_each_pool(int argc, char **argv, int unavail, zpool_iter_f func,
+for_each_pool(int argc, char **argv, boolean_t unavail, zpool_iter_f func,
     void *data)
 {
 	zpool_list_t *list;
diff --git a/usr/src/cmd/zpool/zpool_main.c b/usr/src/cmd/zpool/zpool_main.c
index e2297b24aa..c963776a9f 100644
--- a/usr/src/cmd/zpool/zpool_main.c
+++ b/usr/src/cmd/zpool/zpool_main.c
@@ -18,6 +18,7 @@
  *
  * CDDL HEADER END
  */
+
 /*
  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
@@ -51,6 +52,7 @@ static int zpool_do_create(int, char **);
 static int zpool_do_destroy(int, char **);
 
 static int zpool_do_add(int, char **);
+static int zpool_do_remove(int, char **);
 
 static int zpool_do_list(int, char **);
 static int zpool_do_iostat(int, char **);
@@ -76,7 +78,7 @@ static int zpool_do_upgrade(int, char **);
  * debugging facilities.
  */
 const char *
-_umem_debug_init()
+_umem_debug_init(void)
 {
 	return ("default,verbose"); /* $UMEM_DEBUG setting */
 }
@@ -101,6 +103,7 @@ typedef enum {
 	HELP_OFFLINE,
 	HELP_ONLINE,
 	HELP_REPLACE,
+	HELP_REMOVE,
 	HELP_SCRUB,
 	HELP_STATUS,
 	HELP_UPGRADE
@@ -127,6 +130,7 @@ static zpool_command_t command_table[] = {
 	{ "destroy",	zpool_do_destroy,	HELP_DESTROY		},
 	{ NULL },
 	{ "add",	zpool_do_add,		HELP_ADD		},
+	{ "remove",	zpool_do_remove,	HELP_REMOVE		},
 	{ NULL },
 	{ "list",	zpool_do_list,		HELP_LIST		},
 	{ "iostat",	zpool_do_iostat,	HELP_IOSTAT		},
@@ -188,6 +192,8 @@ get_usage(zpool_help_t idx) {
 	case HELP_REPLACE:
 		return (gettext("\treplace [-f] <pool> <device> "
 		    "[new_device]\n"));
+	case HELP_REMOVE:
+		return (gettext("\tremove <pool> <device>\n"));
 	case HELP_SCRUB:
 		return (gettext("\tscrub [-s] <pool> ...\n"));
 	case HELP_STATUS:
@@ -253,7 +259,7 @@ static char *column_subopts[] = {
  * a complete usage message.
  */
 void
-usage(int requested)
+usage(boolean_t requested)
 {
 	int i;
 	FILE *fp = requested ? stdout : stderr;
@@ -324,7 +330,7 @@ print_vdev_tree(zpool_handle_t *zhp, const char *name, nvlist_t *nv, int indent)
 		return;
 
 	for (c = 0; c < children; c++) {
-		vname = zpool_vdev_name(zhp, child[c]);
+		vname = zpool_vdev_name(g_zfs, zhp, child[c]);
 		print_vdev_tree(zhp, vname, child[c], indent + 2);
 		free(vname);
 	}
@@ -344,8 +350,8 @@ print_vdev_tree(zpool_handle_t *zhp, const char *name, nvlist_t *nv, int indent)
 int
 zpool_do_add(int argc, char **argv)
 {
-	int force = FALSE;
-	int dryrun = FALSE;
+	boolean_t force = B_FALSE;
+	boolean_t dryrun = B_FALSE;
 	int c;
 	nvlist_t *nvroot;
 	char *poolname;
@@ -357,15 +363,15 @@ zpool_do_add(int argc, char **argv)
 	while ((c = getopt(argc, argv, "fn")) != -1) {
 		switch (c) {
 		case 'f':
-			force = TRUE;
+			force = B_TRUE;
 			break;
 		case 'n':
-			dryrun = TRUE;
+			dryrun = B_TRUE;
 			break;
 		case '?':
 			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
 			    optopt);
-			usage(FALSE);
+			usage(B_FALSE);
 		}
 	}
 
@@ -375,11 +381,11 @@ zpool_do_add(int argc, char **argv)
 	/* get pool name and check number of arguments */
 	if (argc < 1) {
 		(void) fprintf(stderr, gettext("missing pool name argument\n"));
-		usage(FALSE);
+		usage(B_FALSE);
 	}
 	if (argc < 2) {
 		(void) fprintf(stderr, gettext("missing vdev specification\n"));
-		usage(FALSE);
+		usage(B_FALSE);
 	}
 
 	poolname = argv[0];
@@ -387,7 +393,7 @@ zpool_do_add(int argc, char **argv)
 	argc--;
 	argv++;
 
-	if ((zhp = zpool_open(poolname)) == NULL)
+	if ((zhp = zpool_open(g_zfs, poolname)) == NULL)
 		return (1);
 
 	if ((config = zpool_get_config(zhp, NULL)) == NULL) {
@@ -398,7 +404,7 @@ zpool_do_add(int argc, char **argv)
 	}
 
 	/* pass off to get_vdev_spec for processing */
-	nvroot = make_root_vdev(config, force, !force, argc, argv);
+	nvroot = make_root_vdev(config, force, !force, B_FALSE, argc, argv);
 	if (nvroot == NULL) {
 		zpool_close(zhp);
 		return (1);
@@ -421,6 +427,46 @@ zpool_do_add(int argc, char **argv)
 		ret = (zpool_add(zhp, nvroot) != 0);
 	}
 
+	nvlist_free(nvroot);
+	zpool_close(zhp);
+
+	return (ret);
+}
+
+/*
+ * zpool remove <pool> <vdev>
+ *
+ * Removes the given vdev from the pool.  Currently, this only supports removing
+ * spares from the pool.  Eventually, we'll want to support removing leaf vdevs
+ * (as an alias for 'detach') as well as toplevel vdevs.
+ */
+int
+zpool_do_remove(int argc, char **argv)
+{
+	char *poolname;
+	int ret;
+	zpool_handle_t *zhp;
+
+	argc--;
+	argv++;
+
+	/* get pool name and check number of arguments */
+	if (argc < 1) {
+		(void) fprintf(stderr, gettext("missing pool name argument\n"));
+		usage(B_FALSE);
+	}
+	if (argc < 2) {
+		(void) fprintf(stderr, gettext("missing device\n"));
+		usage(B_FALSE);
+	}
+
+	poolname = argv[0];
+
+	if ((zhp = zpool_open(g_zfs, poolname)) == NULL)
+		return (1);
+
+	ret = (zpool_vdev_remove(zhp, argv[1]) != 0);
+
 	return (ret);
 }
 
@@ -442,23 +488,25 @@ zpool_do_add(int argc, char **argv)
 int
 zpool_do_create(int argc, char **argv)
 {
-	int force = FALSE;
-	int dryrun = FALSE;
+	boolean_t force = B_FALSE;
+	boolean_t dryrun = B_FALSE;
 	int c;
 	nvlist_t *nvroot;
 	char *poolname;
 	int ret;
 	char *altroot = NULL;
 	char *mountpoint = NULL;
+	nvlist_t **child;
+	uint_t children;
 
 	/* check options */
 	while ((c = getopt(argc, argv, ":fnR:m:")) != -1) {
 		switch (c) {
 		case 'f':
-			force = TRUE;
+			force = B_TRUE;
 			break;
 		case 'n':
-			dryrun = TRUE;
+			dryrun = B_TRUE;
 			break;
 		case 'R':
 			altroot = optarg;
@@ -469,12 +517,12 @@ zpool_do_create(int argc, char **argv)
 		case ':':
 			(void) fprintf(stderr, gettext("missing argument for "
 			    "'%c' option\n"), optopt);
-			usage(FALSE);
+			usage(B_FALSE);
 			break;
 		case '?':
 			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
 			    optopt);
-			usage(FALSE);
+			usage(B_FALSE);
 		}
 	}
 
@@ -484,11 +532,11 @@ zpool_do_create(int argc, char **argv)
 	/* get pool name and check number of arguments */
 	if (argc < 1) {
 		(void) fprintf(stderr, gettext("missing pool name argument\n"));
-		usage(FALSE);
+		usage(B_FALSE);
 	}
 	if (argc < 2) {
 		(void) fprintf(stderr, gettext("missing vdev specification\n"));
-		usage(FALSE);
+		usage(B_FALSE);
 	}
 
 	poolname = argv[0];
@@ -506,13 +554,26 @@ zpool_do_create(int argc, char **argv)
 	}
 
 	/* pass off to get_vdev_spec for bulk processing */
-	nvroot = make_root_vdev(NULL, force, !force, argc - 1, argv + 1);
+	nvroot = make_root_vdev(NULL, force, !force, B_FALSE, argc - 1,
+	    argv + 1);
 	if (nvroot == NULL)
 		return (1);
 
+	/* make_root_vdev() allows 0 toplevel children if there are spares */
+	verify(nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
+	    &child, &children) == 0);
+	if (children == 0) {
+		(void) fprintf(stderr, gettext("invalid vdev "
+		    "specification: at least one toplevel vdev must be "
+		    "specified\n"));
+		return (1);
+	}
+
+
 	if (altroot != NULL && altroot[0] != '/') {
 		(void) fprintf(stderr, gettext("invalid alternate root '%s': "
 		    "must be an absolute path\n"));
+		nvlist_free(nvroot);
 		return (1);
 	}
 
@@ -530,6 +591,7 @@ zpool_do_create(int argc, char **argv)
 			(void) fprintf(stderr, gettext("invalid mountpoint "
 			    "'%s': must be an absolute path, 'legacy', or "
 			    "'none'\n"), mountpoint);
+			nvlist_free(nvroot);
 			return (1);
 		}
 
@@ -560,6 +622,7 @@ zpool_do_create(int argc, char **argv)
 				    "'%s' exists and is not empty\n"), buf);
 			(void) fprintf(stderr, gettext("use '-m' "
 			    "option to provide a different default\n"));
+			nvlist_free(nvroot);
 			return (1);
 		}
 	}
@@ -570,8 +633,6 @@ zpool_do_create(int argc, char **argv)
 		 * For a dry run invocation, print out a basic message and run
 		 * through all the vdevs in the list and print out in an
 		 * appropriate hierarchy.
-		 *
-		 * XXZFS find out of we can create the pool?
 		 */
 		(void) printf(gettext("would create '%s' with the "
 		    "following layout:\n\n"), poolname);
@@ -584,8 +645,8 @@ zpool_do_create(int argc, char **argv)
 		/*
 		 * Hand off to libzfs.
 		 */
-		if (zpool_create(poolname, nvroot, altroot) == 0) {
-			zfs_handle_t *pool = zfs_open(poolname,
+		if (zpool_create(g_zfs, poolname, nvroot, altroot) == 0) {
+			zfs_handle_t *pool = zfs_open(g_zfs, poolname,
 			    ZFS_TYPE_FILESYSTEM);
 			if (pool != NULL) {
 				if (mountpoint != NULL)
@@ -596,8 +657,10 @@ zpool_do_create(int argc, char **argv)
 					ret = zfs_share(pool);
 				zfs_close(pool);
 			}
+		} else if (libzfs_errno(g_zfs) == EZFS_INVALIDNAME) {
+			(void) fprintf(stderr, gettext("pool name may have "
+			    "been omitted\n"));
 		}
-
 	}
 
 	nvlist_free(nvroot);
@@ -615,7 +678,7 @@ zpool_do_create(int argc, char **argv)
 int
 zpool_do_destroy(int argc, char **argv)
 {
-	int force = FALSE;
+	boolean_t force = B_FALSE;
 	int c;
 	char *pool;
 	zpool_handle_t *zhp;
@@ -625,12 +688,12 @@ zpool_do_destroy(int argc, char **argv)
 	while ((c = getopt(argc, argv, "f")) != -1) {
 		switch (c) {
 		case 'f':
-			force = TRUE;
+			force = B_TRUE;
 			break;
 		case '?':
 			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
 			    optopt);
-			usage(FALSE);
+			usage(B_FALSE);
 		}
 	}
 
@@ -640,16 +703,16 @@ zpool_do_destroy(int argc, char **argv)
 	/* check arguments */
 	if (argc < 1) {
 		(void) fprintf(stderr, gettext("missing pool argument\n"));
-		usage(FALSE);
+		usage(B_FALSE);
 	}
 	if (argc > 1) {
 		(void) fprintf(stderr, gettext("too many arguments\n"));
-		usage(FALSE);
+		usage(B_FALSE);
 	}
 
 	pool = argv[0];
 
-	if ((zhp = zpool_open_canfail(pool)) == NULL) {
+	if ((zhp = zpool_open_canfail(g_zfs, pool)) == NULL) {
 		/*
 		 * As a special case, check for use of '/' in the name, and
 		 * direct the user to use 'zfs destroy' instead.
@@ -685,7 +748,7 @@ zpool_do_destroy(int argc, char **argv)
 int
 zpool_do_export(int argc, char **argv)
 {
-	int force = FALSE;
+	boolean_t force = B_FALSE;
 	int c;
 	zpool_handle_t *zhp;
 	int ret;
@@ -695,12 +758,12 @@ zpool_do_export(int argc, char **argv)
 	while ((c = getopt(argc, argv, "f")) != -1) {
 		switch (c) {
 		case 'f':
-			force = TRUE;
+			force = B_TRUE;
 			break;
 		case '?':
 			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
 			    optopt);
-			usage(FALSE);
+			usage(B_FALSE);
 		}
 	}
 
@@ -710,12 +773,12 @@ zpool_do_export(int argc, char **argv)
 	/* check arguments */
 	if (argc < 1) {
 		(void) fprintf(stderr, gettext("missing pool argument\n"));
-		usage(FALSE);
+		usage(B_FALSE);
 	}
 
 	ret = 0;
 	for (i = 0; i < argc; i++) {
-		if ((zhp = zpool_open_canfail(argv[i])) == NULL) {
+		if ((zhp = zpool_open_canfail(g_zfs, argv[i])) == NULL) {
 			ret = 1;
 			continue;
 		}
@@ -742,7 +805,7 @@ zpool_do_export(int argc, char **argv)
 static int
 max_width(zpool_handle_t *zhp, nvlist_t *nv, int depth, int max)
 {
-	char *name = zpool_vdev_name(zhp, nv);
+	char *name = zpool_vdev_name(g_zfs, zhp, nv);
 	nvlist_t **child;
 	uint_t c, children;
 	int ret;
@@ -752,13 +815,22 @@ max_width(zpool_handle_t *zhp, nvlist_t *nv, int depth, int max)
 
 	free(name);
 
+	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES,
+	    &child, &children) == 0) {
+		for (c = 0; c < children; c++)
+			if ((ret = max_width(zhp, child[c], depth + 2,
+			    max)) > max)
+				max = ret;
+	}
+
 	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
-	    &child, &children) != 0)
-		return (max);
+	    &child, &children) == 0) {
+		for (c = 0; c < children; c++)
+			if ((ret = max_width(zhp, child[c], depth + 2,
+			    max)) > max)
+				max = ret;
+	}
 
-	for (c = 0; c < children; c++)
-		if ((ret = max_width(zhp, child[c], depth + 2, max)) > max)
-			max = ret;
 
 	return (max);
 }
@@ -819,11 +891,22 @@ print_import_config(const char *name, nvlist_t *nv, int namewidth, int depth)
 		return;
 
 	for (c = 0; c < children; c++) {
-		vname = zpool_vdev_name(NULL, child[c]);
+		vname = zpool_vdev_name(g_zfs, NULL, child[c]);
 		print_import_config(vname, child[c],
 		    namewidth, depth + 2);
 		free(vname);
 	}
+
+	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES,
+	    &child, &children) != 0)
+		return;
+
+	(void) printf(gettext("\tspares\n"));
+	for (c = 0; c < children; c++) {
+		vname = zpool_vdev_name(g_zfs, NULL, child[c]);
+		(void) printf("\t  %s\n", vname);
+		free(vname);
+	}
 }
 
 /*
@@ -1009,13 +1092,13 @@ do_import(nvlist_t *config, const char *newname, const char *mntopts,
 		return (1);
 	}
 
-	if (zpool_import(config, newname, altroot) != 0)
+	if (zpool_import(g_zfs, config, newname, altroot) != 0)
 		return (1);
 
 	if (newname != NULL)
 		name = (char *)newname;
 
-	verify((zhp = zpool_open(name)) != NULL);
+	verify((zhp = zpool_open(g_zfs, name)) != NULL);
 
 	if (mount_share_datasets(zhp, mntopts) != 0) {
 		zpool_close(zhp);
@@ -1056,24 +1139,24 @@ zpool_do_import(int argc, char **argv)
 	int c;
 	int err;
 	nvlist_t *pools;
-	int do_all = FALSE;
-	int do_destroyed = FALSE;
+	boolean_t do_all = B_FALSE;
+	boolean_t do_destroyed = B_FALSE;
 	char *altroot = NULL;
 	char *mntopts = NULL;
-	int do_force = FALSE;
+	boolean_t do_force = B_FALSE;
 	nvpair_t *elem;
 	nvlist_t *config;
 	uint64_t searchguid;
 	char *searchname;
 	nvlist_t *found_config;
-	int first;
+	boolean_t first;
 	uint64_t pool_state;
 
 	/* check options */
 	while ((c = getopt(argc, argv, ":Dfd:R:ao:")) != -1) {
 		switch (c) {
 		case 'a':
-			do_all = TRUE;
+			do_all = B_TRUE;
 			break;
 		case 'd':
 			if (searchdirs == NULL) {
@@ -1089,10 +1172,10 @@ zpool_do_import(int argc, char **argv)
 			searchdirs[nsearch++] = optarg;
 			break;
 		case 'D':
-			do_destroyed = TRUE;
+			do_destroyed = B_TRUE;
 			break;
 		case 'f':
-			do_force = TRUE;
+			do_force = B_TRUE;
 			break;
 		case 'o':
 			mntopts = optarg;
@@ -1103,12 +1186,12 @@ zpool_do_import(int argc, char **argv)
 		case ':':
 			(void) fprintf(stderr, gettext("missing argument for "
 			    "'%c' option\n"), optopt);
-			usage(FALSE);
+			usage(B_FALSE);
 			break;
 		case '?':
 			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
 			    optopt);
-			usage(FALSE);
+			usage(B_FALSE);
 		}
 	}
 
@@ -1125,12 +1208,12 @@ zpool_do_import(int argc, char **argv)
 	if (do_all) {
 		if (argc != 0) {
 			(void) fprintf(stderr, gettext("too many arguments\n"));
-			usage(FALSE);
+			usage(B_FALSE);
 		}
 	} else {
 		if (argc > 2) {
 			(void) fprintf(stderr, gettext("too many arguments\n"));
-			usage(FALSE);
+			usage(B_FALSE);
 		}
 
 		/*
@@ -1141,12 +1224,15 @@ zpool_do_import(int argc, char **argv)
 		if (argc == 0 && !priv_ineffect(PRIV_SYS_CONFIG)) {
 			(void) fprintf(stderr, gettext("cannot "
 			    "discover pools: permission denied\n"));
+			free(searchdirs);
 			return (1);
 		}
 	}
 
-	if ((pools = zpool_find_import(nsearch, searchdirs)) == NULL)
+	if ((pools = zpool_find_import(g_zfs, nsearch, searchdirs)) == NULL) {
+		free(searchdirs);
 		return (1);
+	}
 
 	/*
 	 * We now have a list of all available pools in the given directories.
@@ -1176,7 +1262,7 @@ zpool_do_import(int argc, char **argv)
 
 	err = 0;
 	elem = NULL;
-	first = TRUE;
+	first = B_TRUE;
 	while ((elem = nvlist_next_nvpair(pools, elem)) != NULL) {
 
 		verify(nvpair_value_nvlist(elem, &config) == 0);
@@ -1190,7 +1276,7 @@ zpool_do_import(int argc, char **argv)
 
 		if (argc == 0) {
 			if (first)
-				first = FALSE;
+				first = B_FALSE;
 			else
 				(void) printf("\n");
 
@@ -1215,7 +1301,7 @@ zpool_do_import(int argc, char **argv)
 					    "one matching pool\n"), searchname);
 					(void) fprintf(stderr, gettext(
 					    "import by numeric ID instead\n"));
-					err = TRUE;
+					err = B_TRUE;
 				}
 				found_config = config;
 			}
@@ -1241,7 +1327,7 @@ zpool_do_import(int argc, char **argv)
 		if (found_config == NULL) {
 			(void) fprintf(stderr, gettext("cannot import '%s': "
 			    "no such pool available\n"), argv[0]);
-			err = TRUE;
+			err = B_TRUE;
 		} else {
 			err |= do_import(found_config, argc == 1 ? NULL :
 			    argv[1], mntopts, altroot, do_force);
@@ -1257,6 +1343,7 @@ zpool_do_import(int argc, char **argv)
 		    gettext("no pools available to import\n"));
 
 	nvlist_free(pools);
+	free(searchdirs);
 
 	return (err ? 1 : 0);
 }
@@ -1374,7 +1461,7 @@ print_vdev_stats(zpool_handle_t *zhp, const char *name, nvlist_t *oldnv,
 		return;
 
 	for (c = 0; c < children; c++) {
-		vname = zpool_vdev_name(zhp, newchild[c]);
+		vname = zpool_vdev_name(g_zfs, zhp, newchild[c]);
 		print_vdev_stats(zhp, vname, oldnv ? oldchild[c] : NULL,
 		    newchild[c], cb, depth + 2);
 		free(vname);
@@ -1476,19 +1563,19 @@ zpool_do_iostat(int argc, char **argv)
 	int npools;
 	unsigned long interval = 0, count = 0;
 	zpool_list_t *list;
-	int verbose = FALSE;
+	boolean_t verbose = B_FALSE;
 	iostat_cbdata_t cb;
 
 	/* check options */
 	while ((c = getopt(argc, argv, "v")) != -1) {
 		switch (c) {
 		case 'v':
-			verbose = TRUE;
+			verbose = B_TRUE;
 			break;
 		case '?':
 			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
 			    optopt);
-			usage(FALSE);
+			usage(B_FALSE);
 		}
 	}
 
@@ -1508,7 +1595,7 @@ zpool_do_iostat(int argc, char **argv)
 			if (interval == 0) {
 				(void) fprintf(stderr, gettext("interval "
 				    "cannot be zero\n"));
-				usage(FALSE);
+				usage(B_FALSE);
 			}
 
 			/*
@@ -1540,7 +1627,7 @@ zpool_do_iostat(int argc, char **argv)
 			if (interval == 0) {
 				(void) fprintf(stderr, gettext("interval "
 				    "cannot be zero\n"));
-				usage(FALSE);
+				usage(B_FALSE);
 			}
 
 			/*
@@ -1559,10 +1646,13 @@ zpool_do_iostat(int argc, char **argv)
 	if ((list = pool_list_get(argc, argv, &ret)) == NULL)
 		return (1);
 
-	if (pool_list_count(list) == 0 && argc != 0)
+	if (pool_list_count(list) == 0 && argc != 0) {
+		pool_list_free(list);
 		return (1);
+	}
 
 	if (pool_list_count(list) == 0 && interval == 0) {
+		pool_list_free(list);
 		(void) fprintf(stderr, gettext("no pools available\n"));
 		return (1);
 	}
@@ -1586,14 +1676,14 @@ zpool_do_iostat(int argc, char **argv)
 		 * before calculating the maximum name width, so that any
 		 * configuration changes are properly accounted for.
 		 */
-		(void) pool_list_iter(list, FALSE, refresh_iostat, &cb);
+		(void) pool_list_iter(list, B_FALSE, refresh_iostat, &cb);
 
 		/*
 		 * Iterate over all pools to determine the maximum width
 		 * for the pool / device name column across all pools.
 		 */
 		cb.cb_namewidth = 0;
-		(void) pool_list_iter(list, FALSE, get_namewidth, &cb);
+		(void) pool_list_iter(list, B_FALSE, get_namewidth, &cb);
 
 		/*
 		 * If it's the first time, or verbose mode, print the header.
@@ -1601,7 +1691,7 @@ zpool_do_iostat(int argc, char **argv)
 		if (++cb.cb_iteration == 1 || verbose)
 			print_iostat_header(&cb);
 
-		(void) pool_list_iter(list, FALSE, print_iostat, &cb);
+		(void) pool_list_iter(list, B_FALSE, print_iostat, &cb);
 
 		/*
 		 * If there's more than one pool, and we're not in verbose mode
@@ -1628,10 +1718,10 @@ zpool_do_iostat(int argc, char **argv)
 }
 
 typedef struct list_cbdata {
-	int	cb_scripted;
-	int	cb_first;
-	int	cb_fields[MAX_FIELDS];
-	int	cb_fieldcount;
+	boolean_t	cb_scripted;
+	boolean_t	cb_first;
+	int		cb_fields[MAX_FIELDS];
+	int		cb_fieldcount;
 } list_cbdata_t;
 
 /*
@@ -1675,7 +1765,7 @@ list_callback(zpool_handle_t *zhp, void *data)
 	if (cbp->cb_first) {
 		if (!cbp->cb_scripted)
 			print_header(cbp->cb_fields, cbp->cb_fieldcount);
-		cbp->cb_first = FALSE;
+		cbp->cb_first = B_FALSE;
 	}
 
 	if (zpool_get_state(zhp) == POOL_STATE_UNAVAIL) {
@@ -1803,7 +1893,7 @@ zpool_do_list(int argc, char **argv)
 	while ((c = getopt(argc, argv, ":Ho:")) != -1) {
 		switch (c) {
 		case 'H':
-			cb.cb_scripted = TRUE;
+			cb.cb_scripted = B_TRUE;
 			break;
 		case 'o':
 			fields = optarg;
@@ -1811,12 +1901,12 @@ zpool_do_list(int argc, char **argv)
 		case ':':
 			(void) fprintf(stderr, gettext("missing argument for "
 			    "'%c' option\n"), optopt);
-			usage(FALSE);
+			usage(B_FALSE);
 			break;
 		case '?':
 			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
 			    optopt);
-			usage(FALSE);
+			usage(B_FALSE);
 		}
 	}
 
@@ -1827,23 +1917,23 @@ zpool_do_list(int argc, char **argv)
 		if (cb.cb_fieldcount == MAX_FIELDS) {
 			(void) fprintf(stderr, gettext("too many "
 			    "properties given to -o option\n"));
-			usage(FALSE);
+			usage(B_FALSE);
 		}
 
 		if ((cb.cb_fields[cb.cb_fieldcount] = getsubopt(&fields,
 		    column_subopts, &value)) == -1) {
 			(void) fprintf(stderr, gettext("invalid property "
 			    "'%s'\n"), value);
-			usage(FALSE);
+			usage(B_FALSE);
 		}
 
 		cb.cb_fieldcount++;
 	}
 
 
-	cb.cb_first = TRUE;
+	cb.cb_first = B_TRUE;
 
-	ret = for_each_pool(argc, argv, TRUE, list_callback, &cb);
+	ret = for_each_pool(argc, argv, B_TRUE, list_callback, &cb);
 
 	if (argc == 0 && cb.cb_first) {
 		(void) printf(gettext("no pools available\n"));
@@ -1883,23 +1973,24 @@ zpool_get_vdev_by_name(nvlist_t *nv, char *name)
 static int
 zpool_do_attach_or_replace(int argc, char **argv, int replacing)
 {
-	int force = FALSE;
+	boolean_t force = B_FALSE;
 	int c;
 	nvlist_t *nvroot;
 	char *poolname, *old_disk, *new_disk;
 	zpool_handle_t *zhp;
 	nvlist_t *config;
+	int ret;
 
 	/* check options */
 	while ((c = getopt(argc, argv, "f")) != -1) {
 		switch (c) {
 		case 'f':
-			force = TRUE;
+			force = B_TRUE;
 			break;
 		case '?':
 			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
 			    optopt);
-			usage(FALSE);
+			usage(B_FALSE);
 		}
 	}
 
@@ -1909,7 +2000,7 @@ zpool_do_attach_or_replace(int argc, char **argv, int replacing)
 	/* get pool name and check number of arguments */
 	if (argc < 1) {
 		(void) fprintf(stderr, gettext("missing pool name argument\n"));
-		usage(FALSE);
+		usage(B_FALSE);
 	}
 
 	poolname = argv[0];
@@ -1917,7 +2008,7 @@ zpool_do_attach_or_replace(int argc, char **argv, int replacing)
 	if (argc < 2) {
 		(void) fprintf(stderr,
 		    gettext("missing <device> specification\n"));
-		usage(FALSE);
+		usage(B_FALSE);
 	}
 
 	old_disk = argv[1];
@@ -1926,7 +2017,7 @@ zpool_do_attach_or_replace(int argc, char **argv, int replacing)
 		if (!replacing) {
 			(void) fprintf(stderr,
 			    gettext("missing <new_device> specification\n"));
-			usage(FALSE);
+			usage(B_FALSE);
 		}
 		new_disk = old_disk;
 		argc -= 1;
@@ -1939,10 +2030,10 @@ zpool_do_attach_or_replace(int argc, char **argv, int replacing)
 
 	if (argc > 1) {
 		(void) fprintf(stderr, gettext("too many arguments\n"));
-		usage(FALSE);
+		usage(B_FALSE);
 	}
 
-	if ((zhp = zpool_open(poolname)) == NULL)
+	if ((zhp = zpool_open(g_zfs, poolname)) == NULL)
 		return (1);
 
 	if ((config = zpool_get_config(zhp, NULL)) == NULL) {
@@ -1952,13 +2043,18 @@ zpool_do_attach_or_replace(int argc, char **argv, int replacing)
 		return (1);
 	}
 
-	nvroot = make_root_vdev(config, force, B_FALSE, argc, argv);
+	nvroot = make_root_vdev(config, force, B_FALSE, replacing, argc, argv);
 	if (nvroot == NULL) {
 		zpool_close(zhp);
 		return (1);
 	}
 
-	return (zpool_vdev_attach(zhp, old_disk, new_disk, nvroot, replacing));
+	ret = zpool_vdev_attach(zhp, old_disk, new_disk, nvroot, replacing);
+
+	nvlist_free(nvroot);
+	zpool_close(zhp);
+
+	return (ret);
 }
 
 /*
@@ -2008,6 +2104,7 @@ zpool_do_detach(int argc, char **argv)
 	int c;
 	char *poolname, *path;
 	zpool_handle_t *zhp;
+	int ret;
 
 	/* check options */
 	while ((c = getopt(argc, argv, "f")) != -1) {
@@ -2016,7 +2113,7 @@ zpool_do_detach(int argc, char **argv)
 		case '?':
 			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
 			    optopt);
-			usage(FALSE);
+			usage(B_FALSE);
 		}
 	}
 
@@ -2026,22 +2123,26 @@ zpool_do_detach(int argc, char **argv)
 	/* get pool name and check number of arguments */
 	if (argc < 1) {
 		(void) fprintf(stderr, gettext("missing pool name argument\n"));
-		usage(FALSE);
+		usage(B_FALSE);
 	}
 
 	if (argc < 2) {
 		(void) fprintf(stderr,
 		    gettext("missing <device> specification\n"));
-		usage(FALSE);
+		usage(B_FALSE);
 	}
 
 	poolname = argv[0];
 	path = argv[1];
 
-	if ((zhp = zpool_open(poolname)) == NULL)
+	if ((zhp = zpool_open(g_zfs, poolname)) == NULL)
 		return (1);
 
-	return (zpool_vdev_detach(zhp, path));
+	ret = zpool_vdev_detach(zhp, path);
+
+	zpool_close(zhp);
+
+	return (ret);
 }
 
 /*
@@ -2063,7 +2164,7 @@ zpool_do_online(int argc, char **argv)
 		case '?':
 			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
 			    optopt);
-			usage(FALSE);
+			usage(B_FALSE);
 		}
 	}
 
@@ -2073,16 +2174,16 @@ zpool_do_online(int argc, char **argv)
 	/* get pool name and check number of arguments */
 	if (argc < 1) {
 		(void) fprintf(stderr, gettext("missing pool name\n"));
-		usage(FALSE);
+		usage(B_FALSE);
 	}
 	if (argc < 2) {
 		(void) fprintf(stderr, gettext("missing device name\n"));
-		usage(FALSE);
+		usage(B_FALSE);
 	}
 
 	poolname = argv[0];
 
-	if ((zhp = zpool_open(poolname)) == NULL)
+	if ((zhp = zpool_open(g_zfs, poolname)) == NULL)
 		return (1);
 
 	for (i = 1; i < argc; i++)
@@ -2092,6 +2193,8 @@ zpool_do_online(int argc, char **argv)
 		else
 			ret = 1;
 
+	zpool_close(zhp);
+
 	return (ret);
 }
 
@@ -2112,19 +2215,20 @@ zpool_do_offline(int argc, char **argv)
 	int c, i;
 	char *poolname;
 	zpool_handle_t *zhp;
-	int ret = 0, istmp = FALSE;
+	int ret = 0;
+	boolean_t istmp = B_FALSE;
 
 	/* check options */
 	while ((c = getopt(argc, argv, "ft")) != -1) {
 		switch (c) {
 		case 't':
-			istmp = TRUE;
+			istmp = B_TRUE;
 			break;
 		case 'f':
 		case '?':
 			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
 			    optopt);
-			usage(FALSE);
+			usage(B_FALSE);
 		}
 	}
 
@@ -2134,16 +2238,16 @@ zpool_do_offline(int argc, char **argv)
 	/* get pool name and check number of arguments */
 	if (argc < 1) {
 		(void) fprintf(stderr, gettext("missing pool name\n"));
-		usage(FALSE);
+		usage(B_FALSE);
 	}
 	if (argc < 2) {
 		(void) fprintf(stderr, gettext("missing device name\n"));
-		usage(FALSE);
+		usage(B_FALSE);
 	}
 
 	poolname = argv[0];
 
-	if ((zhp = zpool_open(poolname)) == NULL)
+	if ((zhp = zpool_open(g_zfs, poolname)) == NULL)
 		return (1);
 
 	for (i = 1; i < argc; i++)
@@ -2153,6 +2257,8 @@ zpool_do_offline(int argc, char **argv)
 		else
 			ret = 1;
 
+	zpool_close(zhp);
+
 	return (ret);
 }
 
@@ -2170,18 +2276,18 @@ zpool_do_clear(int argc, char **argv)
 
 	if (argc < 2) {
 		(void) fprintf(stderr, gettext("missing pool name\n"));
-		usage(FALSE);
+		usage(B_FALSE);
 	}
 
 	if (argc > 3) {
 		(void) fprintf(stderr, gettext("too many arguments\n"));
-		usage(FALSE);
+		usage(B_FALSE);
 	}
 
 	pool = argv[1];
 	device = argc == 3 ? argv[2] : NULL;
 
-	if ((zhp = zpool_open(pool)) == NULL)
+	if ((zhp = zpool_open(g_zfs, pool)) == NULL)
 		return (1);
 
 	if (zpool_clear(zhp, device) != 0)
@@ -2235,7 +2341,7 @@ zpool_do_scrub(int argc, char **argv)
 		case '?':
 			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
 			    optopt);
-			usage(FALSE);
+			usage(B_FALSE);
 		}
 	}
 
@@ -2244,17 +2350,17 @@ zpool_do_scrub(int argc, char **argv)
 
 	if (argc < 1) {
 		(void) fprintf(stderr, gettext("missing pool name argument\n"));
-		usage(FALSE);
+		usage(B_FALSE);
 	}
 
-	return (for_each_pool(argc, argv, TRUE, scrub_callback, &cb));
+	return (for_each_pool(argc, argv, B_TRUE, scrub_callback, &cb));
 }
 
 typedef struct status_cbdata {
-	int	cb_verbose;
-	int	cb_explain;
-	int	cb_count;
-	int	cb_first;
+	int		cb_count;
+	boolean_t	cb_verbose;
+	boolean_t	cb_explain;
+	boolean_t	cb_first;
 } status_cbdata_t;
 
 /*
@@ -2311,12 +2417,57 @@ print_scrub_status(nvlist_t *nvroot)
 	    (u_longlong_t)(minutes_left / 60), (uint_t)(minutes_left % 60));
 }
 
+typedef struct spare_cbdata {
+	uint64_t	cb_guid;
+	zpool_handle_t	*cb_zhp;
+} spare_cbdata_t;
+
+static boolean_t
+find_vdev(nvlist_t *nv, uint64_t search)
+{
+	uint64_t guid;
+	nvlist_t **child;
+	uint_t c, children;
+
+	if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &guid) == 0 &&
+	    search == guid)
+		return (B_TRUE);
+
+	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
+	    &child, &children) == 0) {
+		for (c = 0; c < children; c++)
+			if (find_vdev(child[c], search))
+				return (B_TRUE);
+	}
+
+	return (B_FALSE);
+}
+
+static int
+find_spare(zpool_handle_t *zhp, void *data)
+{
+	spare_cbdata_t *cbp = data;
+	nvlist_t *config, *nvroot;
+
+	config = zpool_get_config(zhp, NULL);
+	verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
+	    &nvroot) == 0);
+
+	if (find_vdev(nvroot, cbp->cb_guid)) {
+		cbp->cb_zhp = zhp;
+		return (1);
+	}
+
+	zpool_close(zhp);
+	return (0);
+}
+
 /*
  * Print out configuration state as requested by status_callback.
  */
 void
 print_status_config(zpool_handle_t *zhp, const char *name, nvlist_t *nv,
-    int namewidth, int depth)
+    int namewidth, int depth, boolean_t isspare)
 {
 	nvlist_t **child;
 	uint_t c, children;
@@ -2324,6 +2475,8 @@ print_status_config(zpool_handle_t *zhp, const char *name, nvlist_t *nv,
 	char rbuf[6], wbuf[6], cbuf[6], repaired[7];
 	char *vname;
 	uint64_t notpresent;
+	spare_cbdata_t cb;
+	const char *state;
 
 	verify(nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_STATS,
 	    (uint64_t **)&vs, &c) == 0);
@@ -2332,13 +2485,27 @@ print_status_config(zpool_handle_t *zhp, const char *name, nvlist_t *nv,
 	    &child, &children) != 0)
 		children = 0;
 
+	state = state_to_name(vs);
+	if (isspare) {
+		/*
+		 * For hot spares, we use the terms 'INUSE' and 'AVAILABLE' for
+		 * online drives.
+		 */
+		if (vs->vs_aux == VDEV_AUX_SPARED)
+			state = "INUSE";
+		else if (vs->vs_state == VDEV_STATE_HEALTHY)
+			state = "AVAIL";
+	}
+
 	(void) printf("\t%*s%-*s  %-8s", depth, "", namewidth - depth,
-	    name, state_to_name(vs));
+	    name, state);
 
-	zfs_nicenum(vs->vs_read_errors, rbuf, sizeof (rbuf));
-	zfs_nicenum(vs->vs_write_errors, wbuf, sizeof (wbuf));
-	zfs_nicenum(vs->vs_checksum_errors, cbuf, sizeof (cbuf));
-	(void) printf(" %5s %5s %5s", rbuf, wbuf, cbuf);
+	if (!isspare) {
+		zfs_nicenum(vs->vs_read_errors, rbuf, sizeof (rbuf));
+		zfs_nicenum(vs->vs_write_errors, wbuf, sizeof (wbuf));
+		zfs_nicenum(vs->vs_checksum_errors, cbuf, sizeof (cbuf));
+		(void) printf(" %5s %5s %5s", rbuf, wbuf, cbuf);
+	}
 
 	if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NOT_PRESENT,
 	    &notpresent) == 0) {
@@ -2365,6 +2532,24 @@ print_status_config(zpool_handle_t *zhp, const char *name, nvlist_t *nv,
 			(void) printf(gettext("newer version"));
 			break;
 
+		case VDEV_AUX_SPARED:
+			verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID,
+			    &cb.cb_guid) == 0);
+			if (zpool_iter(g_zfs, find_spare, &cb) == 1) {
+				if (strcmp(zpool_get_name(cb.cb_zhp),
+				    zpool_get_name(zhp)) == 0)
+					(void) printf(gettext("currently in "
+					    "use"));
+				else
+					(void) printf(gettext("in use by "
+					    "pool '%s'"),
+					    zpool_get_name(cb.cb_zhp));
+				zpool_close(cb.cb_zhp);
+			} else {
+				(void) printf(gettext("currently in use"));
+			}
+			break;
+
 		default:
 			(void) printf(gettext("corrupted data"));
 			break;
@@ -2382,9 +2567,9 @@ print_status_config(zpool_handle_t *zhp, const char *name, nvlist_t *nv,
 	(void) printf("\n");
 
 	for (c = 0; c < children; c++) {
-		vname = zpool_vdev_name(zhp, child[c]);
+		vname = zpool_vdev_name(g_zfs, zhp, child[c]);
 		print_status_config(zhp, vname, child[c],
-		    namewidth, depth + 2);
+		    namewidth, depth + 2, isspare);
 		free(vname);
 	}
 }
@@ -2443,6 +2628,26 @@ print_error_log(zpool_handle_t *zhp)
 	}
 }
 
+static void
+print_spares(zpool_handle_t *zhp, nvlist_t **spares, uint_t nspares,
+    int namewidth)
+{
+	uint_t i;
+	char *name;
+
+	if (nspares == 0)
+		return;
+
+	(void) printf(gettext("\tspares\n"));
+
+	for (i = 0; i < nspares; i++) {
+		name = zpool_vdev_name(g_zfs, zhp, spares[i]);
+		print_status_config(zhp, name, spares[i],
+		    namewidth, 2, B_TRUE);
+		free(name);
+	}
+}
+
 /*
  * Display a summary of pool status.  Displays a summary such as:
  *
@@ -2480,7 +2685,7 @@ status_callback(zpool_handle_t *zhp, void *data)
 		return (0);
 
 	if (cbp->cb_first)
-		cbp->cb_first = FALSE;
+		cbp->cb_first = B_FALSE;
 	else
 		(void) printf("\n");
 
@@ -2603,6 +2808,8 @@ status_callback(zpool_handle_t *zhp, void *data)
 		int namewidth;
 		uint64_t nerr;
 		size_t realerr;
+		nvlist_t **spares;
+		uint_t nspares;
 
 		verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
 		    &nvroot) == 0);
@@ -2618,7 +2825,11 @@ status_callback(zpool_handle_t *zhp, void *data)
 		(void) printf(gettext("\t%-*s  %-8s %5s %5s %5s\n"), namewidth,
 		    "NAME", "STATE", "READ", "WRITE", "CKSUM");
 		print_status_config(zhp, zpool_get_name(zhp), nvroot,
-		    namewidth, 0);
+		    namewidth, 0, B_FALSE);
+
+		if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
+		    &spares, &nspares) == 0)
+			print_spares(zhp, spares, nspares, namewidth);
 
 		if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_ERRCOUNT,
 		    &nerr) == 0) {
@@ -2632,6 +2843,7 @@ status_callback(zpool_handle_t *zhp, void *data)
 				nerr = realerr;
 
 			(void) printf("\n");
+
 			if (nerr == 0)
 				(void) printf(gettext("errors: No known data "
 				    "errors\n"));
@@ -2668,24 +2880,24 @@ zpool_do_status(int argc, char **argv)
 	while ((c = getopt(argc, argv, "vx")) != -1) {
 		switch (c) {
 		case 'v':
-			cb.cb_verbose = TRUE;
+			cb.cb_verbose = B_TRUE;
 			break;
 		case 'x':
-			cb.cb_explain = TRUE;
+			cb.cb_explain = B_TRUE;
 			break;
 		case '?':
 			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
 			    optopt);
-			usage(FALSE);
+			usage(B_FALSE);
 		}
 	}
 
 	argc -= optind;
 	argv += optind;
 
-	cb.cb_first = TRUE;
+	cb.cb_first = B_TRUE;
 
-	ret = for_each_pool(argc, argv, TRUE, status_callback, &cb);
+	ret = for_each_pool(argc, argv, B_TRUE, status_callback, &cb);
 
 	if (argc == 0 && cb.cb_count == 0)
 		(void) printf(gettext("no pools available\n"));
@@ -2731,13 +2943,13 @@ upgrade_cb(zpool_handle_t *zhp, void *arg)
 				    "versions.\n\n"));
 				(void) printf(gettext("VER  POOL\n"));
 				(void) printf(gettext("---  ------------\n"));
-				cbp->cb_first = FALSE;
+				cbp->cb_first = B_FALSE;
 			}
 
 			(void) printf("%2llu   %s\n", version,
 			    zpool_get_name(zhp));
 		} else {
-			cbp->cb_first = FALSE;
+			cbp->cb_first = B_FALSE;
 			ret = zpool_upgrade(zhp);
 			if (ret == 0)
 				(void) printf(gettext("Successfully upgraded "
@@ -2752,7 +2964,7 @@ upgrade_cb(zpool_handle_t *zhp, void *arg)
 			    "cannot be accessed on the current system.\n\n"));
 			(void) printf(gettext("VER  POOL\n"));
 			(void) printf(gettext("---  ------------\n"));
-			cbp->cb_first = FALSE;
+			cbp->cb_first = B_FALSE;
 		}
 
 		(void) printf("%2llu   %s\n", version,
@@ -2811,7 +3023,7 @@ zpool_do_upgrade(int argc, char **argv)
 	while ((c = getopt(argc, argv, "av")) != -1) {
 		switch (c) {
 		case 'a':
-			cb.cb_all = TRUE;
+			cb.cb_all = B_TRUE;
 			break;
 		case 'v':
 			showversions = B_TRUE;
@@ -2819,7 +3031,7 @@ zpool_do_upgrade(int argc, char **argv)
 		case '?':
 			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
 			    optopt);
-			usage(FALSE);
+			usage(B_FALSE);
 		}
 	}
 
@@ -2830,28 +3042,30 @@ zpool_do_upgrade(int argc, char **argv)
 		if (cb.cb_all || argc != 0) {
 			(void) fprintf(stderr, gettext("-v option is "
 			    "incompatible with other arguments\n"));
-			usage(FALSE);
+			usage(B_FALSE);
 		}
 	} else if (cb.cb_all) {
 		if (argc != 0) {
 			(void) fprintf(stderr, gettext("-a option is "
 			    "incompatible with other arguments\n"));
-			usage(FALSE);
+			usage(B_FALSE);
 		}
 	}
 
 	(void) printf(gettext("This system is currently running ZFS version "
 	    "%llu.\n\n"), ZFS_VERSION);
-	cb.cb_first = TRUE;
+	cb.cb_first = B_TRUE;
 	if (showversions) {
 		(void) printf(gettext("The following versions are "
 		    "suppored:\n\n"));
 		(void) printf(gettext("VER  DESCRIPTION\n"));
 		(void) printf("---  -----------------------------------------"
 		    "---------------\n");
-		(void) printf(gettext(" 1   Initial ZFS version.\n"));
+		(void) printf(gettext(" 1   Initial ZFS version\n"));
 		(void) printf(gettext(" 2   Ditto blocks "
 		    "(replicated metadata)\n"));
+		(void) printf(gettext(" 3   Hot spares and double parity "
+		    "RAID-Z\n"));
 		(void) printf(gettext("\nFor more information on a particular "
 		    "version, including supported releases, see:\n\n"));
 		(void) printf("http://www.opensolaris.org/os/community/zfs/"
@@ -2860,7 +3074,7 @@ zpool_do_upgrade(int argc, char **argv)
 	} else if (argc == 0) {
 		int notfound;
 
-		ret = zpool_iter(upgrade_cb, &cb);
+		ret = zpool_iter(g_zfs, upgrade_cb, &cb);
 		notfound = cb.cb_first;
 
 		if (!cb.cb_all && ret == 0) {
@@ -2868,7 +3082,7 @@ zpool_do_upgrade(int argc, char **argv)
 				(void) printf("\n");
 			cb.cb_first = B_TRUE;
 			cb.cb_newer = B_TRUE;
-			ret = zpool_iter(upgrade_cb, &cb);
+			ret = zpool_iter(g_zfs, upgrade_cb, &cb);
 			if (!cb.cb_first) {
 				notfound = B_FALSE;
 				(void) printf("\n");
@@ -2885,7 +3099,7 @@ zpool_do_upgrade(int argc, char **argv)
 				    "their associated\nfeatures.\n"));
 		}
 	} else {
-		ret = for_each_pool(argc, argv, FALSE, upgrade_one, NULL);
+		ret = for_each_pool(argc, argv, B_FALSE, upgrade_one, NULL);
 	}
 
 	return (ret);
@@ -2901,6 +3115,14 @@ main(int argc, char **argv)
 	(void) setlocale(LC_ALL, "");
 	(void) textdomain(TEXT_DOMAIN);
 
+	if ((g_zfs = libzfs_init()) == NULL) {
+		(void) fprintf(stderr, gettext("internal error: failed to "
+		    "initialize ZFS library"));
+		return (1);
+	}
+
+	libzfs_print_on_error(g_zfs, B_TRUE);
+
 	opterr = 0;
 
 	/*
@@ -2908,7 +3130,7 @@ main(int argc, char **argv)
 	 */
 	if (argc < 2) {
 		(void) fprintf(stderr, gettext("missing command\n"));
-		usage(FALSE);
+		usage(B_FALSE);
 	}
 
 	cmdname = argv[1];
@@ -2917,7 +3139,7 @@ main(int argc, char **argv)
 	 * Special case '-?'
 	 */
 	if (strcmp(cmdname, "-?") == 0)
-		usage(TRUE);
+		usage(B_TRUE);
 
 	/*
 	 * Run the appropriate command.
@@ -2946,9 +3168,11 @@ main(int argc, char **argv)
 	if (i == NCOMMAND) {
 		(void) fprintf(stderr, gettext("unrecognized "
 		    "command '%s'\n"), cmdname);
-		usage(FALSE);
+		usage(B_FALSE);
 	}
 
+	libzfs_fini(g_zfs);
+
 	/*
 	 * The 'ZFS_ABORT' environment variable causes us to dump core on exit
 	 * for the purposes of running ::findleaks.
diff --git a/usr/src/cmd/zpool/zpool_util.h b/usr/src/cmd/zpool/zpool_util.h
index b2243e8f08..3cb91756de 100644
--- a/usr/src/cmd/zpool/zpool_util.h
+++ b/usr/src/cmd/zpool/zpool_util.h
@@ -2,9 +2,8 @@
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
@@ -47,12 +46,12 @@ void no_memory(void);
  * Virtual device functions
  */
 nvlist_t *make_root_vdev(nvlist_t *poolconfig, int force, int check_rep,
-    int argc, char **argv);
+    boolean_t isreplace, int argc, char **argv);
 
 /*
  * Pool list functions
  */
-int for_each_pool(int, char **, int unavail, zpool_iter_f, void *);
+int for_each_pool(int, char **, boolean_t unavail, zpool_iter_f, void *);
 
 typedef struct zpool_list zpool_list_t;
 
@@ -69,6 +68,8 @@ void pool_list_remove(zpool_list_t *, zpool_handle_t *);
 int unmount_datasets(zpool_handle_t *, int);
 int mount_share_datasets(zpool_handle_t *, const char *);
 
+libzfs_handle_t *g_zfs;
+
 #ifdef	__cplusplus
 }
 #endif
diff --git a/usr/src/cmd/zpool/zpool_vdev.c b/usr/src/cmd/zpool/zpool_vdev.c
index 6fba820d10..fa106dffb9 100644
--- a/usr/src/cmd/zpool/zpool_vdev.c
+++ b/usr/src/cmd/zpool/zpool_vdev.c
@@ -18,6 +18,7 @@
  *
  * CDDL HEADER END
  */
+
 /*
  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
@@ -34,14 +35,19 @@
  * 		file=(path=...)
  *
  * 	Group vdevs
- * 		raidz=(...)
+ * 		raidz[1|2]=(...)
  * 		mirror=(...)
  *
+ * 	Hot spares
+ *
  * While the underlying implementation supports it, group vdevs cannot contain
  * other group vdevs.  All userland verification of devices is contained within
  * this file.  If successful, the nvlist returned can be passed directly to the
  * kernel; we've done as much verification as possible in userland.
  *
+ * Hot spares are a special case, and passed down as an array of disk vdevs, at
+ * the same level as the root of the vdev tree.
+ *
  * The only function exported by this file is 'get_vdev_spec'.  The function
  * performs several passes:
  *
@@ -84,10 +90,11 @@
  * vdev_error() function keeps track of whether we have seen an error yet, and
  * prints out a header if its the first error we've seen.
  */
-int error_seen;
-int is_force;
+boolean_t error_seen;
+boolean_t is_force;
 
-void
+/*PRINTFLIKE1*/
+static void
 vdev_error(const char *fmt, ...)
 {
 	va_list ap;
@@ -100,7 +107,7 @@ vdev_error(const char *fmt, ...)
 		else
 			(void) fprintf(stderr, gettext("the following errors "
 			    "must be manually repaired:\n"));
-		error_seen = TRUE;
+		error_seen = B_TRUE;
 	}
 
 	va_start(ap, fmt);
@@ -112,10 +119,10 @@ static void
 libdiskmgt_error(int error)
 {
 	/*
-	 * ENXIO is a valid error message if the device doesn't live in
+	 * ENXIO/ENODEV is a valid error message if the device doesn't live in
 	 * /dev/dsk.  Don't bother printing an error message in this case.
 	 */
-	if (error == ENXIO)
+	if (error == ENXIO || error == ENODEV)
 		return;
 
 	(void) fprintf(stderr, gettext("warning: device in use checking "
@@ -126,7 +133,7 @@ libdiskmgt_error(int error)
  * Validate a device, passing the bulk of the work off to libdiskmgt.
  */
 int
-check_slice(const char *path, int force, int wholedisk)
+check_slice(const char *path, int force, boolean_t wholedisk, boolean_t isspare)
 {
 	char *msg;
 	int error = 0;
@@ -137,12 +144,18 @@ check_slice(const char *path, int force, int wholedisk)
 		if (error != 0) {
 			libdiskmgt_error(error);
 			return (0);
-		} else {
+		} else if (!isspare ||
+		    strstr(msg, gettext("hot spare")) == NULL) {
+			/*
+			 * The above check is a rather severe hack.  It would
+			 * probably make more sense to have DM_WHO_ZPOOL_SPARE
+			 * instead.
+			 */
 			vdev_error("%s", msg);
 			free(msg);
+			ret = -1;
 		}
 
-		ret = -1;
 	}
 
 	/*
@@ -172,7 +185,7 @@ check_slice(const char *path, int force, int wholedisk)
  */
 /* ARGSUSED */
 int
-check_disk(const char *name, dm_descriptor_t disk, int force)
+check_disk(const char *name, dm_descriptor_t disk, int force, int isspare)
 {
 	dm_descriptor_t *drive, *media, *slice;
 	int err = 0;
@@ -227,8 +240,12 @@ check_disk(const char *name, dm_descriptor_t disk, int force)
 	 * overlapping slices because we are using the whole disk.
 	 */
 	for (i = 0; slice[i] != NULL; i++) {
-		if (check_slice(dm_get_name(slice[i], &err), force, TRUE) != 0)
+		char *name = dm_get_name(slice[i], &err);
+
+		if (check_slice(name, force, B_TRUE, isspare) != 0)
 			ret = -1;
+
+		dm_free_name(name);
 	}
 
 	dm_free_descriptors(slice);
@@ -239,7 +256,7 @@ check_disk(const char *name, dm_descriptor_t disk, int force)
  * Validate a device.
  */
 int
-check_device(const char *path, int force)
+check_device(const char *path, boolean_t force, boolean_t isspare)
 {
 	dm_descriptor_t desc;
 	int err;
@@ -252,12 +269,12 @@ check_device(const char *path, int force)
 	assert(dev != NULL);
 	dev++;
 	if ((desc = dm_get_descriptor_by_name(DM_ALIAS, dev, &err)) != NULL) {
-		err = check_disk(path, desc, force);
+		err = check_disk(path, desc, force, isspare);
 		dm_free_descriptor(desc);
 		return (err);
 	}
 
-	return (check_slice(path, force, FALSE));
+	return (check_slice(path, force, B_FALSE, isspare));
 }
 
 /*
@@ -265,17 +282,18 @@ check_device(const char *path, int force)
  * not in use by another pool.
  */
 int
-check_file(const char *file, int force)
+check_file(const char *file, boolean_t force, boolean_t isspare)
 {
 	char  *name;
 	int fd;
 	int ret = 0;
 	pool_state_t state;
+	boolean_t inuse;
 
 	if ((fd = open(file, O_RDONLY)) < 0)
 		return (0);
 
-	if (zpool_in_use(fd, &state, &name)) {
+	if (zpool_in_use(g_zfs, fd, &state, &name, &inuse) == 0 && inuse) {
 		const char *desc;
 
 		switch (state) {
@@ -296,9 +314,24 @@ check_file(const char *file, int force)
 			break;
 		}
 
-		if (state == POOL_STATE_ACTIVE || !force) {
-			vdev_error(gettext("%s is part of %s pool '%s'\n"),
-			    file, desc, name);
+		/*
+		 * Allow hot spares to be shared between pools.
+		 */
+		if (state == POOL_STATE_SPARE && isspare)
+			return (0);
+
+		if (state == POOL_STATE_ACTIVE ||
+		    state == POOL_STATE_SPARE || !force) {
+			switch (state) {
+			case POOL_STATE_SPARE:
+				vdev_error(gettext("%s is reserved as a hot "
+				    "spare for pool %s\n"), file, name);
+				break;
+			default:
+				vdev_error(gettext("%s is part of %s pool "
+				    "'%s'\n"), file, desc, name);
+				break;
+			}
 			ret = -1;
 		}
 
@@ -309,16 +342,16 @@ check_file(const char *file, int force)
 	return (ret);
 }
 
-static int
+static boolean_t
 is_whole_disk(const char *arg, struct stat64 *statbuf)
 {
 	char path[MAXPATHLEN];
 
 	(void) snprintf(path, sizeof (path), "%s%s", arg, BACKUP_SLICE);
 	if (stat64(path, statbuf) == 0)
-		return (TRUE);
+		return (B_TRUE);
 
-	return (FALSE);
+	return (B_FALSE);
 }
 
 /*
@@ -337,7 +370,7 @@ make_leaf_vdev(const char *arg)
 	struct stat64 statbuf;
 	nvlist_t *vdev = NULL;
 	char *type = NULL;
-	int wholedisk = FALSE;
+	boolean_t wholedisk = B_FALSE;
 
 	/*
 	 * Determine what type of vdev this is, and put the full path into
@@ -350,7 +383,7 @@ make_leaf_vdev(const char *arg)
 		 * examining the file descriptor afterwards.
 		 */
 		if (is_whole_disk(arg, &statbuf)) {
-			wholedisk = TRUE;
+			wholedisk = B_TRUE;
 		} else if (stat64(arg, &statbuf) != 0) {
 			(void) fprintf(stderr,
 			    gettext("cannot open '%s': %s\n"),
@@ -369,7 +402,7 @@ make_leaf_vdev(const char *arg)
 		(void) snprintf(path, sizeof (path), "%s/%s", DISK_ROOT,
 		    arg);
 		if (is_whole_disk(path, &statbuf)) {
-			wholedisk = TRUE;
+			wholedisk = B_TRUE;
 		} else if (stat64(path, &statbuf) != 0) {
 			/*
 			 * If we got ENOENT, then the user gave us
@@ -472,8 +505,9 @@ make_leaf_vdev(const char *arg)
  * 	spec have consistent replication levels.
  */
 typedef struct replication_level {
-	char	*type;
-	int	level;
+	char *zprl_type;
+	uint64_t zprl_children;
+	uint64_t zprl_parity;
 } replication_level_t;
 
 /*
@@ -482,7 +516,7 @@ typedef struct replication_level {
  * an error message will be displayed for each self-inconsistent vdev.
  */
 replication_level_t *
-get_replication(nvlist_t *nvroot, int fatal)
+get_replication(nvlist_t *nvroot, boolean_t fatal)
 {
 	nvlist_t **top;
 	uint_t t, toplevels;
@@ -491,14 +525,14 @@ get_replication(nvlist_t *nvroot, int fatal)
 	nvlist_t *nv;
 	char *type;
 	replication_level_t lastrep, rep, *ret;
-	int dontreport;
+	boolean_t dontreport;
 
 	ret = safe_malloc(sizeof (replication_level_t));
 
 	verify(nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
 	    &top, &toplevels) == 0);
 
-	lastrep.type = NULL;
+	lastrep.zprl_type = NULL;
 	for (t = 0; t < toplevels; t++) {
 		nv = top[t];
 
@@ -509,8 +543,9 @@ get_replication(nvlist_t *nvroot, int fatal)
 			/*
 			 * This is a 'file' or 'disk' vdev.
 			 */
-			rep.type = type;
-			rep.level = 1;
+			rep.zprl_type = type;
+			rep.zprl_children = 1;
+			rep.zprl_parity = 0;
 		} else {
 			uint64_t vdev_size;
 
@@ -523,8 +558,17 @@ get_replication(nvlist_t *nvroot, int fatal)
 			 * We also check that the size of each vdev (if it can
 			 * be determined) is the same.
 			 */
-			rep.type = type;
-			rep.level = 0;
+			rep.zprl_type = type;
+			rep.zprl_children = 0;
+
+			if (strcmp(type, VDEV_TYPE_RAIDZ) == 0) {
+				verify(nvlist_lookup_uint64(nv,
+				    ZPOOL_CONFIG_NPARITY,
+				    &rep.zprl_parity) == 0);
+				assert(rep.zprl_parity != 0);
+			} else {
+				rep.zprl_parity = 0;
+			}
 
 			/*
 			 * The 'dontreport' variable indicatest that we've
@@ -542,7 +586,7 @@ get_replication(nvlist_t *nvroot, int fatal)
 				char *childtype;
 				int fd, err;
 
-				rep.level++;
+				rep.zprl_children++;
 
 				verify(nvlist_lookup_string(cnv,
 				    ZPOOL_CONFIG_TYPE, &childtype) == 0);
@@ -563,10 +607,10 @@ get_replication(nvlist_t *nvroot, int fatal)
 						    "mismatched replication "
 						    "level: %s contains both "
 						    "files and devices\n"),
-						    rep.type);
+						    rep.zprl_type);
 					else
 						return (NULL);
-					dontreport = TRUE;
+					dontreport = B_TRUE;
 				}
 
 				/*
@@ -611,10 +655,10 @@ get_replication(nvlist_t *nvroot, int fatal)
 						vdev_error(gettext(
 						    "%s contains devices of "
 						    "different sizes\n"),
-						    rep.type);
+						    rep.zprl_type);
 					else
 						return (NULL);
-					dontreport = TRUE;
+					dontreport = B_TRUE;
 				}
 
 				type = childtype;
@@ -627,30 +671,45 @@ get_replication(nvlist_t *nvroot, int fatal)
 		 * vdev in 'rep'.  Compare it to 'lastrep' to see if its
 		 * different.
 		 */
-		if (lastrep.type != NULL) {
-			if (strcmp(lastrep.type, rep.type) != 0) {
+		if (lastrep.zprl_type != NULL) {
+			if (strcmp(lastrep.zprl_type, rep.zprl_type) != 0) {
 				if (ret != NULL)
 					free(ret);
 				ret = NULL;
 				if (fatal)
 					vdev_error(gettext(
-					    "mismatched replication "
-					    "level: both %s and %s vdevs are "
+					    "mismatched replication level: "
+					    "both %s and %s vdevs are "
 					    "present\n"),
-					    lastrep.type, rep.type);
+					    lastrep.zprl_type, rep.zprl_type);
 				else
 					return (NULL);
-			} else if (lastrep.level != rep.level) {
+			} else if (lastrep.zprl_parity != rep.zprl_parity) {
 				if (ret)
 					free(ret);
 				ret = NULL;
 				if (fatal)
 					vdev_error(gettext(
-					    "mismatched replication "
-					    "level: %d-way %s and %d-way %s "
+					    "mismatched replication level: "
+					    "both %llu and %llu device parity "
+					    "%s vdevs are present\n"),
+					    lastrep.zprl_parity,
+					    rep.zprl_parity,
+					    rep.zprl_type);
+				else
+					return (NULL);
+			} else if (lastrep.zprl_children != rep.zprl_children) {
+				if (ret)
+					free(ret);
+				ret = NULL;
+				if (fatal)
+					vdev_error(gettext(
+					    "mismatched replication level: "
+					    "both %llu-way and %llu-way %s "
 					    "vdevs are present\n"),
-					    lastrep.level, lastrep.type,
-					    rep.level, rep.type);
+					    lastrep.zprl_children,
+					    rep.zprl_children,
+					    rep.zprl_type);
 				else
 					return (NULL);
 			}
@@ -658,10 +717,8 @@ get_replication(nvlist_t *nvroot, int fatal)
 		lastrep = rep;
 	}
 
-	if (ret != NULL) {
-		ret->type = rep.type;
-		ret->level = rep.level;
-	}
+	if (ret != NULL)
+		*ret = rep;
 
 	return (ret);
 }
@@ -687,7 +744,7 @@ check_replication(nvlist_t *config, nvlist_t *newroot)
 
 		verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
 		    &nvroot) == 0);
-		if ((current = get_replication(nvroot, FALSE)) == NULL)
+		if ((current = get_replication(nvroot, B_FALSE)) == NULL)
 			return (0);
 	}
 
@@ -695,7 +752,7 @@ check_replication(nvlist_t *config, nvlist_t *newroot)
 	 * Get the replication level of the new vdev spec, reporting any
 	 * inconsistencies found.
 	 */
-	if ((new = get_replication(newroot, TRUE)) == NULL) {
+	if ((new = get_replication(newroot, B_TRUE)) == NULL) {
 		free(current);
 		return (-1);
 	}
@@ -706,13 +763,24 @@ check_replication(nvlist_t *config, nvlist_t *newroot)
 	 */
 	ret = 0;
 	if (current != NULL) {
-		if (strcmp(current->type, new->type) != 0 ||
-		    current->level != new->level) {
+		if (strcmp(current->zprl_type, new->zprl_type) != 0) {
 			vdev_error(gettext(
-			    "mismatched replication level: pool uses %d-way %s "
-			    "and new vdev uses %d-way %s\n"),
-			    current->level, current->type, new->level,
-			    new->type);
+			    "mismatched replication level: pool uses %s "
+			    "and new vdev is %s\n"),
+			    current->zprl_type, new->zprl_type);
+			ret = -1;
+		} else if (current->zprl_parity != new->zprl_parity) {
+			vdev_error(gettext(
+			    "mismatched replication level: pool uses %llu "
+			    "device parity and new vdev uses %llu\n"),
+			    current->zprl_parity, new->zprl_parity);
+			ret = -1;
+		} else if (current->zprl_children != new->zprl_children) {
+			vdev_error(gettext(
+			    "mismatched replication level: pool uses %llu-way "
+			    "%s and new vdev uses %llu-way %s\n"),
+			    current->zprl_children, current->zprl_type,
+			    new->zprl_children, new->zprl_type);
 			ret = -1;
 		}
 	}
@@ -795,10 +863,12 @@ label_disk(char *name)
 		(void) fprintf(stderr, gettext("use fdisk(1M) to partition "
 		    "the disk, and provide a specific slice\n"));
 		(void) close(fd);
+		efi_free(vtoc);
 		return (-1);
 	}
 
 	(void) close(fd);
+	efi_free(vtoc);
 	return (0);
 }
 
@@ -892,20 +962,75 @@ make_disks(nvlist_t *nv)
 		if ((ret = make_disks(child[c])) != 0)
 			return (ret);
 
+	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES,
+	    &child, &children) == 0)
+		for (c = 0; c < children; c++)
+			if ((ret = make_disks(child[c])) != 0)
+				return (ret);
+
 	return (0);
 }
 
 /*
+ * Determine if the given path is a hot spare within the given configuration.
+ */
+static boolean_t
+is_spare(nvlist_t *config, const char *path)
+{
+	int fd;
+	pool_state_t state;
+	char *name;
+	nvlist_t *label;
+	uint64_t guid, spareguid;
+	nvlist_t *nvroot;
+	nvlist_t **spares;
+	uint_t i, nspares;
+	boolean_t inuse;
+
+	if ((fd = open(path, O_RDONLY)) < 0)
+		return (B_FALSE);
+
+	if (zpool_in_use(g_zfs, fd, &state, &name, &inuse) != 0 ||
+	    !inuse ||
+	    state != POOL_STATE_SPARE ||
+	    zpool_read_label(fd, &label) != 0) {
+		(void) close(fd);
+		return (B_FALSE);
+	}
+
+	(void) close(fd);
+	verify(nvlist_lookup_uint64(label, ZPOOL_CONFIG_GUID, &guid) == 0);
+	nvlist_free(label);
+
+	verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
+	    &nvroot) == 0);
+	if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
+	    &spares, &nspares) == 0) {
+		for (i = 0; i < nspares; i++) {
+			verify(nvlist_lookup_uint64(spares[i],
+			    ZPOOL_CONFIG_GUID, &spareguid) == 0);
+			if (spareguid == guid)
+				return (B_TRUE);
+		}
+	}
+
+	return (B_FALSE);
+}
+
+/*
  * Go through and find any devices that are in use.  We rely on libdiskmgt for
  * the majority of this task.
  */
 int
-check_in_use(nvlist_t *nv, int force)
+check_in_use(nvlist_t *config, nvlist_t *nv, int force, int isreplacing,
+    int isspare)
 {
 	nvlist_t **child;
 	uint_t c, children;
 	char *type, *path;
 	int ret;
+	char buf[MAXPATHLEN];
+	uint64_t wholedisk;
 
 	verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) == 0);
 
@@ -914,22 +1039,76 @@ check_in_use(nvlist_t *nv, int force)
 
 		verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0);
 
+		/*
+		 * As a generic check, we look to see if this is a replace of a
+		 * hot spare within the same pool.  If so, we allow it
+		 * regardless of what libdiskmgt or zpool_in_use() says.
+		 */
+		if (isreplacing) {
+			if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK,
+			    &wholedisk) == 0 && wholedisk)
+				(void) snprintf(buf, sizeof (buf), "%ss0",
+				    path);
+			else
+				(void) strlcpy(buf, path, sizeof (buf));
+			if (is_spare(config, buf))
+				return (0);
+		}
+
 		if (strcmp(type, VDEV_TYPE_DISK) == 0)
-			ret = check_device(path, force);
+			ret = check_device(path, force, isspare);
 
 		if (strcmp(type, VDEV_TYPE_FILE) == 0)
-			ret = check_file(path, force);
+			ret = check_file(path, force, isspare);
 
 		return (ret);
 	}
 
 	for (c = 0; c < children; c++)
-		if ((ret = check_in_use(child[c], force)) != 0)
+		if ((ret = check_in_use(config, child[c], force,
+		    isreplacing, B_FALSE)) != 0)
 			return (ret);
 
+	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES,
+	    &child, &children) == 0)
+		for (c = 0; c < children; c++)
+			if ((ret = check_in_use(config, child[c], force,
+			    isreplacing, B_TRUE)) != 0)
+				return (ret);
+
 	return (0);
 }
 
+const char *
+is_grouping(const char *type, int *mindev)
+{
+	if (strcmp(type, "raidz") == 0 || strcmp(type, "raidz1") == 0) {
+		if (mindev != NULL)
+			*mindev = 2;
+		return (VDEV_TYPE_RAIDZ);
+	}
+
+	if (strcmp(type, "raidz2") == 0) {
+		if (mindev != NULL)
+			*mindev = 3;
+		return (VDEV_TYPE_RAIDZ);
+	}
+
+	if (strcmp(type, "mirror") == 0) {
+		if (mindev != NULL)
+			*mindev = 2;
+		return (VDEV_TYPE_MIRROR);
+	}
+
+	if (strcmp(type, "spare") == 0) {
+		if (mindev != NULL)
+			*mindev = 1;
+		return (VDEV_TYPE_SPARE);
+	}
+
+	return (NULL);
+}
+
 /*
  * Construct a syntactically valid vdev specification,
  * and ensure that all devices and files exist and can be opened.
@@ -939,11 +1118,14 @@ check_in_use(nvlist_t *nv, int force)
 nvlist_t *
 construct_spec(int argc, char **argv)
 {
-	nvlist_t *nvroot, *nv, **top;
-	int t, toplevels;
+	nvlist_t *nvroot, *nv, **top, **spares;
+	int t, toplevels, mindev, nspares;
+	const char *type;
 
 	top = NULL;
 	toplevels = 0;
+	spares = NULL;
+	nspares = 0;
 
 	while (argc > 0) {
 		nv = NULL;
@@ -952,17 +1134,20 @@ construct_spec(int argc, char **argv)
 		 * If it's a mirror or raidz, the subsequent arguments are
 		 * its leaves -- until we encounter the next mirror or raidz.
 		 */
-		if (strcmp(argv[0], VDEV_TYPE_MIRROR) == 0 ||
-		    strcmp(argv[0], VDEV_TYPE_RAIDZ) == 0) {
-
-			char *type = argv[0];
+		if ((type = is_grouping(argv[0], &mindev)) != NULL) {
 			nvlist_t **child = NULL;
-			int children = 0;
-			int c;
+			int c, children = 0;
+
+			if (strcmp(type, VDEV_TYPE_SPARE) == 0 &&
+			    spares != NULL) {
+				(void) fprintf(stderr, gettext("invalid vdev "
+				    "specification: 'spare' can be "
+				    "specified only once\n"));
+				return (NULL);
+			}
 
 			for (c = 1; c < argc; c++) {
-				if (strcmp(argv[c], VDEV_TYPE_MIRROR) == 0 ||
-				    strcmp(argv[c], VDEV_TYPE_RAIDZ) == 0)
+				if (is_grouping(argv[c], NULL) != NULL)
 					break;
 				children++;
 				child = realloc(child,
@@ -974,29 +1159,38 @@ construct_spec(int argc, char **argv)
 				child[children - 1] = nv;
 			}
 
-			argc -= c;
-			argv += c;
-
-			/*
-			 * Mirrors and RAID-Z devices require at least
-			 * two components.
-			 */
-			if (children < 2) {
-				(void) fprintf(stderr,
-				    gettext("invalid vdev specification: "
-				    "%s requires at least 2 devices\n"), type);
+			if (children < mindev) {
+				(void) fprintf(stderr, gettext("invalid vdev "
+				    "specification: %s requires at least %d "
+				    "devices\n"), argv[0], mindev);
 				return (NULL);
 			}
 
-			verify(nvlist_alloc(&nv, NV_UNIQUE_NAME, 0) == 0);
-			verify(nvlist_add_string(nv, ZPOOL_CONFIG_TYPE,
-			    type) == 0);
-			verify(nvlist_add_nvlist_array(nv,
-			    ZPOOL_CONFIG_CHILDREN, child, children) == 0);
+			argc -= c;
+			argv += c;
+
+			if (strcmp(type, VDEV_TYPE_SPARE) == 0) {
+				spares = child;
+				nspares = children;
+				continue;
+			} else {
+				verify(nvlist_alloc(&nv, NV_UNIQUE_NAME,
+				    0) == 0);
+				verify(nvlist_add_string(nv, ZPOOL_CONFIG_TYPE,
+				    type) == 0);
+				if (strcmp(type, VDEV_TYPE_RAIDZ) == 0) {
+					verify(nvlist_add_uint64(nv,
+					    ZPOOL_CONFIG_NPARITY,
+					    mindev - 1) == 0);
+				}
+				verify(nvlist_add_nvlist_array(nv,
+				    ZPOOL_CONFIG_CHILDREN, child,
+				    children) == 0);
 
-			for (c = 0; c < children; c++)
-				nvlist_free(child[c]);
-			free(child);
+				for (c = 0; c < children; c++)
+					nvlist_free(child[c]);
+				free(child);
+			}
 		} else {
 			/*
 			 * We have a device.  Pass off to make_leaf_vdev() to
@@ -1015,6 +1209,13 @@ construct_spec(int argc, char **argv)
 		top[toplevels - 1] = nv;
 	}
 
+	if (toplevels == 0 && nspares == 0) {
+		(void) fprintf(stderr, gettext("invalid vdev "
+		    "specification: at least one toplevel vdev must be "
+		    "specified\n"));
+		return (NULL);
+	}
+
 	/*
 	 * Finally, create nvroot and add all top-level vdevs to it.
 	 */
@@ -1023,9 +1224,16 @@ construct_spec(int argc, char **argv)
 	    VDEV_TYPE_ROOT) == 0);
 	verify(nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
 	    top, toplevels) == 0);
+	if (nspares != 0)
+		verify(nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
+		    spares, nspares) == 0);
 
 	for (t = 0; t < toplevels; t++)
 		nvlist_free(top[t]);
+	for (t = 0; t < nspares; t++)
+		nvlist_free(spares[t]);
+	if (spares)
+		free(spares);
 	free(top);
 
 	return (nvroot);
@@ -1043,7 +1251,7 @@ construct_spec(int argc, char **argv)
  */
 nvlist_t *
 make_root_vdev(nvlist_t *poolconfig, int force, int check_rep,
-    int argc, char **argv)
+    boolean_t isreplacing, int argc, char **argv)
 {
 	nvlist_t *newroot;
 
@@ -1063,7 +1271,8 @@ make_root_vdev(nvlist_t *poolconfig, int force, int check_rep,
 	 * uses (such as a dedicated dump device) that even '-f' cannot
 	 * override.
 	 */
-	if (check_in_use(newroot, force) != 0) {
+	if (check_in_use(poolconfig, newroot, force, isreplacing,
+	    B_FALSE) != 0) {
 		nvlist_free(newroot);
 		return (NULL);
 	}
diff --git a/usr/src/cmd/ztest/ztest.c b/usr/src/cmd/ztest/ztest.c
index c74f227bed..e8065c74f5 100644
--- a/usr/src/cmd/ztest/ztest.c
+++ b/usr/src/cmd/ztest/ztest.c
@@ -114,6 +114,7 @@ static uint64_t zopt_vdevtime;
 static int zopt_ashift = SPA_MINBLOCKSHIFT;
 static int zopt_mirrors = 2;
 static int zopt_raidz = 4;
+static int zopt_raidz_parity = 1;
 static size_t zopt_vdev_size = SPA_MINDEVSIZE;
 static int zopt_datasets = 7;
 static int zopt_threads = 23;
@@ -346,6 +347,7 @@ usage(void)
 	    "\t[-a alignment_shift (default: %d) (use 0 for random)]\n"
 	    "\t[-m mirror_copies (default: %d)]\n"
 	    "\t[-r raidz_disks (default: %d)]\n"
+	    "\t[-R raidz_parity (default: %d)]\n"
 	    "\t[-d datasets (default: %d)]\n"
 	    "\t[-t threads (default: %d)]\n"
 	    "\t[-g gang_block_threshold (default: %s)]\n"
@@ -364,6 +366,7 @@ usage(void)
 	    zopt_ashift,			/* -a */
 	    zopt_mirrors,			/* -m */
 	    zopt_raidz,				/* -r */
+	    zopt_raidz_parity,			/* -R */
 	    zopt_datasets,			/* -d */
 	    zopt_threads,			/* -t */
 	    nice_gang_bang,			/* -g */
@@ -407,7 +410,7 @@ process_options(int argc, char **argv)
 	zio_gang_bang = 32 << 10;
 
 	while ((opt = getopt(argc, argv,
-	    "v:s:a:m:r:d:t:g:i:k:p:f:VET:P:")) != EOF) {
+	    "v:s:a:m:r:R:d:t:g:i:k:p:f:VET:P:")) != EOF) {
 		value = 0;
 		switch (opt) {
 		    case 'v':
@@ -415,6 +418,7 @@ process_options(int argc, char **argv)
 		    case 'a':
 		    case 'm':
 		    case 'r':
+		    case 'R':
 		    case 'd':
 		    case 't':
 		    case 'g':
@@ -440,6 +444,9 @@ process_options(int argc, char **argv)
 		    case 'r':
 			zopt_raidz = MAX(1, value);
 			break;
+		    case 'R':
+			zopt_raidz_parity = MIN(MAX(value, 1), 2);
+			break;
 		    case 'd':
 			zopt_datasets = MAX(1, value);
 			break;
@@ -480,8 +487,10 @@ process_options(int argc, char **argv)
 		}
 	}
 
+	zopt_raidz_parity = MIN(zopt_raidz_parity, zopt_raidz - 1);
+
 	zopt_vdevtime = (zopt_vdevs > 0 ? zopt_time / zopt_vdevs : UINT64_MAX);
-	zopt_maxfaults = MAX(zopt_mirrors, 1) * (zopt_raidz >= 2 ? 2 : 1) - 1;
+	zopt_maxfaults = MAX(zopt_mirrors, 1) * (zopt_raidz_parity + 1) - 1;
 }
 
 static uint64_t
@@ -542,6 +551,8 @@ make_vdev_raidz(size_t size, int r)
 	VERIFY(nvlist_alloc(&raidz, NV_UNIQUE_NAME, 0) == 0);
 	VERIFY(nvlist_add_string(raidz, ZPOOL_CONFIG_TYPE,
 	    VDEV_TYPE_RAIDZ) == 0);
+	VERIFY(nvlist_add_uint64(raidz, ZPOOL_CONFIG_NPARITY,
+	    zopt_raidz_parity) == 0);
 	VERIFY(nvlist_add_nvlist_array(raidz, ZPOOL_CONFIG_CHILDREN,
 	    child, r) == 0);
 
@@ -671,7 +682,7 @@ ztest_replay_create(ztest_replay_t *zr, lr_create_t *lr, boolean_t byteswap)
 
 	error = dmu_object_claim(os, lr->lr_doid, lr->lr_mode, 0,
 	    DMU_OT_NONE, 0, tx);
-	ASSERT(error == 0);
+	ASSERT3U(error, ==, 0);
 	dmu_tx_commit(tx);
 
 	if (zopt_verbose >= 5) {
diff --git a/usr/src/lib/libdiskmgt/common/entry.c b/usr/src/lib/libdiskmgt/common/entry.c
index 860801b41d..61bc9d60d4 100644
--- a/usr/src/lib/libdiskmgt/common/entry.c
+++ b/usr/src/lib/libdiskmgt/common/entry.c
@@ -2,9 +2,8 @@
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
@@ -966,6 +965,10 @@ dm_get_usage_string(char *what, char *how, char **usage_string)
 		*usage_string = dgettext(TEXT_DOMAIN,
 		    "%s is part of active ZFS pool %s. Please see zpool(1M)."
 		    "\n");
+	} else if (strcmp(what, DM_USE_SPARE_ZPOOL) == 0) {
+		*usage_string = dgettext(TEXT_DOMAIN,
+		    "%s is reserved as a hot spare for ZFS pool %s.  Please "
+		    "see zpool(1M).\n");
 	}
 }
 void
diff --git a/usr/src/lib/libdiskmgt/common/inuse_zpool.c b/usr/src/lib/libdiskmgt/common/inuse_zpool.c
index 1637ace92d..a7cf203a2f 100644
--- a/usr/src/lib/libdiskmgt/common/inuse_zpool.c
+++ b/usr/src/lib/libdiskmgt/common/inuse_zpool.c
@@ -46,17 +46,21 @@
 #include <ctype.h>
 #include <sys/fs/zfs.h>
 
+#include <libzfs.h>
 #include "libdiskmgt.h"
 #include "disks_private.h"
 
 /*
  * Pointers to libzfs.so functions that we dynamically resolve.
  */
-static	int	(*zfsdl_zpool_in_use)(int fd, pool_state_t *state, char **name);
+static int (*zfsdl_zpool_in_use)(libzfs_handle_t *hdl, int fd,
+    pool_state_t *state, char **name, boolean_t *);
+static libzfs_handle_t *(*zfsdl_libzfs_init)(boolean_t);
 
 static mutex_t			init_lock = DEFAULTMUTEX;
 static rwlock_t			zpool_lock = DEFAULTRWLOCK;
-static	int			initialized = 0;
+static boolean_t		initialized;
+static libzfs_handle_t		*zfs_hdl;
 
 static void	*init_zpool();
 
@@ -67,6 +71,7 @@ inuse_zpool_common(char *slice, nvlist_t *attrs, int *errp, char *type)
 	char		*name;
 	int		fd;
 	pool_state_t	state;
+	boolean_t	used;
 
 	*errp = 0;
 	if (slice == NULL) {
@@ -83,15 +88,21 @@ inuse_zpool_common(char *slice, nvlist_t *attrs, int *errp, char *type)
 			(void) mutex_unlock(&init_lock);
 			return (found);
 		}
-		initialized = 1;
+		initialized = B_TRUE;
 	}
 	(void) mutex_unlock(&init_lock);
 	(void) rw_rdlock(&zpool_lock);
 	if ((fd = open(slice, O_RDONLY)) > 0) {
-		if (zfsdl_zpool_in_use(fd, &state, &name)) {
+		name = NULL;
+		if (zfsdl_zpool_in_use(zfs_hdl, fd, &state,
+		    &name, &used) == 0 && used) {
 			if (strcmp(type, DM_USE_ACTIVE_ZPOOL) == 0) {
-				if (state == POOL_STATE_ACTIVE)
+				if (state == POOL_STATE_ACTIVE) {
 					found = 1;
+				} else if (state == POOL_STATE_SPARE) {
+					found = 1;
+					type = DM_USE_SPARE_ZPOOL;
+				}
 			} else {
 				found = 1;
 			}
@@ -100,9 +111,11 @@ inuse_zpool_common(char *slice, nvlist_t *attrs, int *errp, char *type)
 				libdiskmgt_add_str(attrs, DM_USED_BY,
 				    type, errp);
 				libdiskmgt_add_str(attrs, DM_USED_NAME,
-					name, errp);
+				    name, errp);
 			}
 		}
+		if (name)
+			free(name);
 		(void) close(fd);
 	}
 	(void) rw_unlock(&zpool_lock);
@@ -133,15 +146,24 @@ init_zpool()
 	if ((lh = dlopen("libzfs.so", RTLD_NOW)) == NULL) {
 		return (lh);
 	}
+
 	/*
 	 * Instantiate the functions needed to get zpool configuration
 	 * data
 	 */
-	if ((zfsdl_zpool_in_use = (int (*)(int, pool_state_t *, char **))
+	if ((zfsdl_libzfs_init = (libzfs_handle_t *(*)(boolean_t))
+	    dlsym(lh, "libzfs_init")) == NULL ||
+	    (zfsdl_zpool_in_use = (int (*)(libzfs_handle_t *, int,
+	    pool_state_t *, char **, boolean_t *))
 	    dlsym(lh, "zpool_in_use")) == NULL) {
 		(void) dlclose(lh);
 		return (NULL);
 	}
 
+	if ((zfs_hdl = (*zfsdl_libzfs_init)(B_FALSE)) == NULL) {
+		(void) dlclose(lh);
+		return (NULL);
+	}
+
 	return (lh);
 }
diff --git a/usr/src/lib/libdiskmgt/common/libdiskmgt.h b/usr/src/lib/libdiskmgt/common/libdiskmgt.h
index aa6df0967e..7d6fef46d4 100644
--- a/usr/src/lib/libdiskmgt/common/libdiskmgt.h
+++ b/usr/src/lib/libdiskmgt/common/libdiskmgt.h
@@ -2,9 +2,8 @@
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
@@ -215,6 +214,7 @@ typedef enum {
 #define	DM_USE_VFSTAB		"vfstab"
 #define	DM_USE_EXPORTED_ZPOOL	"exported_zpool"
 #define	DM_USE_ACTIVE_ZPOOL	"active_zpool"
+#define	DM_USE_SPARE_ZPOOL	"spare_zpool"
 
 /* event */
 #define	DM_EV_NAME		"name"
diff --git a/usr/src/lib/libzfs/common/libzfs.h b/usr/src/lib/libzfs/common/libzfs.h
index 0044ccd7c9..bf4b2874ad 100644
--- a/usr/src/lib/libzfs/common/libzfs.h
+++ b/usr/src/lib/libzfs/common/libzfs.h
@@ -47,16 +47,78 @@ extern "C" {
 #define	ZFS_MAXPROPLEN		MAXPATHLEN
 
 /*
+ * libzfs errors
+ */
+enum {
+	EZFS_NOMEM = 2000,	/* out of memory */
+	EZFS_BADPROP,		/* invalid property value */
+	EZFS_PROPREADONLY,	/* cannot set readonly property */
+	EZFS_PROPTYPE,		/* property does not apply to dataset type */
+	EZFS_PROPNONINHERIT,	/* property is not inheritable */
+	EZFS_PROPSPACE,		/* bad quota or reservation */
+	EZFS_BADTYPE,		/* dataset is not of appropriate type */
+	EZFS_BUSY,		/* pool or dataset is busy */
+	EZFS_EXISTS,		/* pool or dataset already exists */
+	EZFS_NOENT,		/* no such pool or dataset */
+	EZFS_BADSTREAM,		/* bad backup stream */
+	EZFS_DSREADONLY,	/* dataset is readonly */
+	EZFS_VOLTOOBIG,		/* volume is too large for 32-bit system */
+	EZFS_VOLHASDATA,	/* volume already contains data */
+	EZFS_INVALIDNAME,	/* invalid dataset name */
+	EZFS_BADRESTORE,	/* unable to restore to destination */
+	EZFS_BADBACKUP,		/* backup failed */
+	EZFS_BADTARGET,		/* bad attach/detach/replace target */
+	EZFS_NODEVICE,		/* no such device in pool */
+	EZFS_BADDEV,		/* invalid device to add */
+	EZFS_NOREPLICAS,	/* no valid replicas */
+	EZFS_RESILVERING,	/* currently resilvering */
+	EZFS_BADVERSION,	/* unsupported version */
+	EZFS_POOLUNAVAIL,	/* pool is currently unavailable */
+	EZFS_DEVOVERFLOW,	/* too many devices in one vdev */
+	EZFS_BADPATH,		/* must be an absolute path */
+	EZFS_CROSSTARGET,	/* rename or clone across pool or dataset */
+	EZFS_ZONED,		/* used improperly in local zone */
+	EZFS_MOUNTFAILED,	/* failed to mount dataset */
+	EZFS_UMOUNTFAILED,	/* failed to unmount dataset */
+	EZFS_UNSHAREFAILED,	/* unshare(1M) failed */
+	EZFS_SHAREFAILED,	/* share(1M) failed */
+	EZFS_DEVLINKS,		/* failed to create zvol links */
+	EZFS_PERM,		/* permission denied */
+	EZFS_NOSPC,		/* out of space */
+	EZFS_IO,		/* I/O error */
+	EZFS_INTR,		/* signal received */
+	EZFS_ISSPARE,		/* device is a hot spare */
+	EZFS_INVALCONFIG,	/* invalid vdev configuration */
+	EZFS_UNKNOWN		/* unknown error */
+};
+
+/*
  * Basic handle types
  */
 typedef struct zfs_handle zfs_handle_t;
 typedef struct zpool_handle zpool_handle_t;
+typedef struct libzfs_handle libzfs_handle_t;
+
+/*
+ * Library initialization
+ */
+extern libzfs_handle_t *libzfs_init(void);
+extern void libzfs_fini(libzfs_handle_t *);
+
+extern libzfs_handle_t *zpool_get_handle(zpool_handle_t *);
+extern libzfs_handle_t *zfs_get_handle(zfs_handle_t *);
+
+extern void libzfs_print_on_error(libzfs_handle_t *, boolean_t);
+
+extern int libzfs_errno(libzfs_handle_t *);
+extern const char *libzfs_error_action(libzfs_handle_t *);
+extern const char *libzfs_error_description(libzfs_handle_t *);
 
 /*
  * Basic handle functions
  */
-extern zpool_handle_t *zpool_open(const char *);
-extern zpool_handle_t *zpool_open_canfail(const char *);
+extern zpool_handle_t *zpool_open(libzfs_handle_t *, const char *);
+extern zpool_handle_t *zpool_open_canfail(libzfs_handle_t *, const char *);
 extern void zpool_close(zpool_handle_t *);
 extern const char *zpool_get_name(zpool_handle_t *);
 extern uint64_t zpool_get_guid(zpool_handle_t *);
@@ -64,17 +126,19 @@ extern uint64_t zpool_get_space_used(zpool_handle_t *);
 extern uint64_t zpool_get_space_total(zpool_handle_t *);
 extern int zpool_get_root(zpool_handle_t *, char *, size_t);
 extern int zpool_get_state(zpool_handle_t *);
+extern uint64_t zpool_get_version(zpool_handle_t *);
 
 /*
  * Iterate over all active pools in the system.
  */
 typedef int (*zpool_iter_f)(zpool_handle_t *, void *);
-extern int zpool_iter(zpool_iter_f, void *);
+extern int zpool_iter(libzfs_handle_t *, zpool_iter_f, void *);
 
 /*
  * Functions to create and destroy pools
  */
-extern int zpool_create(const char *, nvlist_t *, const char *);
+extern int zpool_create(libzfs_handle_t *, const char *, nvlist_t *,
+    const char *);
 extern int zpool_destroy(zpool_handle_t *);
 extern int zpool_add(zpool_handle_t *, nvlist_t *);
 
@@ -88,8 +152,9 @@ extern int zpool_vdev_offline(zpool_handle_t *, const char *, int);
 extern int zpool_vdev_attach(zpool_handle_t *, const char *, const char *,
     nvlist_t *, int);
 extern int zpool_vdev_detach(zpool_handle_t *, const char *);
+extern int zpool_vdev_remove(zpool_handle_t *, const char *);
 extern int zpool_clear(zpool_handle_t *, const char *);
-extern uint64_t zpool_vdev_to_guid(zpool_handle_t *, const char *);
+extern nvlist_t *zpool_find_vdev(zpool_handle_t *, const char *, boolean_t *);
 
 /*
  * Pool health statistics.
@@ -143,24 +208,25 @@ extern int zpool_get_errlog(zpool_handle_t *, nvlist_t ***, size_t *);
  * Import and export functions
  */
 extern int zpool_export(zpool_handle_t *);
-extern int zpool_import(nvlist_t *, const char *, const char *);
+extern int zpool_import(libzfs_handle_t *, nvlist_t *, const char *,
+    const char *);
 
 /*
  * Search for pools to import
  */
-extern nvlist_t *zpool_find_import(int, char **);
+extern nvlist_t *zpool_find_import(libzfs_handle_t *, int, char **);
 
 /*
  * Miscellaneous pool functions
  */
-extern char *zpool_vdev_name(zpool_handle_t *, nvlist_t *);
+extern char *zpool_vdev_name(libzfs_handle_t *, zpool_handle_t *, nvlist_t *);
 extern int zpool_upgrade(zpool_handle_t *);
 
 /*
  * Basic handle manipulations.  These functions do not create or destroy the
  * underlying datasets, only the references to them.
  */
-extern zfs_handle_t *zfs_open(const char *, int);
+extern zfs_handle_t *zfs_open(libzfs_handle_t *, const char *, int);
 extern void zfs_close(zfs_handle_t *);
 extern zfs_type_t zfs_get_type(const zfs_handle_t *);
 extern const char *zfs_get_name(const zfs_handle_t *);
@@ -182,11 +248,11 @@ typedef enum {
 const char *zfs_prop_to_name(zfs_prop_t);
 int zfs_prop_set(zfs_handle_t *, zfs_prop_t, const char *);
 int zfs_prop_get(zfs_handle_t *, zfs_prop_t, char *, size_t, zfs_source_t *,
-    char *, size_t, int);
+    char *, size_t, boolean_t);
 int zfs_prop_get_numeric(zfs_handle_t *, zfs_prop_t, uint64_t *, zfs_source_t *,
     char *, size_t);
 uint64_t zfs_prop_get_int(zfs_handle_t *, zfs_prop_t);
-int zfs_prop_validate(zfs_prop_t, const char *, uint64_t *);
+int zfs_prop_validate(libzfs_handle_t *, zfs_prop_t, const char *, uint64_t *);
 int zfs_prop_inheritable(zfs_prop_t);
 int zfs_prop_inherit(zfs_handle_t *, zfs_prop_t);
 const char *zfs_prop_values(zfs_prop_t);
@@ -206,7 +272,7 @@ int zfs_get_proplist(char *fields, zfs_prop_t *proplist, int max, int *count,
  * Iterator functions.
  */
 typedef int (*zfs_iter_f)(zfs_handle_t *, void *);
-extern int zfs_iter_root(zfs_iter_f, void *);
+extern int zfs_iter_root(libzfs_handle_t *, zfs_iter_f, void *);
 extern int zfs_iter_children(zfs_handle_t *, zfs_iter_f, void *);
 extern int zfs_iter_dependents(zfs_handle_t *, zfs_iter_f, void *);
 extern int zfs_iter_filesystems(zfs_handle_t *, zfs_iter_f, void *);
@@ -215,14 +281,16 @@ extern int zfs_iter_snapshots(zfs_handle_t *, zfs_iter_f, void *);
 /*
  * Functions to create and destroy datasets.
  */
-extern int zfs_create(const char *, zfs_type_t, const char *, const char *);
+extern int zfs_create(libzfs_handle_t *, const char *, zfs_type_t,
+    const char *, const char *);
 extern int zfs_destroy(zfs_handle_t *);
 extern int zfs_clone(zfs_handle_t *, const char *);
-extern int zfs_snapshot(const char *);
+extern int zfs_snapshot(libzfs_handle_t *, const char *);
 extern int zfs_rollback(zfs_handle_t *, zfs_handle_t *, int);
 extern int zfs_rename(zfs_handle_t *, const char *);
 extern int zfs_send(zfs_handle_t *, zfs_handle_t *);
-extern int zfs_receive(const char *, int, int, int);
+extern int zfs_receive(libzfs_handle_t *, const char *, int, int, int);
+extern int zfs_promote(zfs_handle_t *);
 
 /*
  * Miscellaneous functions.
@@ -234,7 +302,7 @@ extern int zfs_name_valid(const char *, zfs_type_t);
 /*
  * Mount support functions.
  */
-extern int zfs_is_mounted(zfs_handle_t *, char **);
+extern boolean_t zfs_is_mounted(zfs_handle_t *, char **);
 extern int zfs_mount(zfs_handle_t *, const char *, int);
 extern int zfs_unmount(zfs_handle_t *, const char *, int);
 extern int zfs_unmountall(zfs_handle_t *, int);
@@ -242,17 +310,12 @@ extern int zfs_unmountall(zfs_handle_t *, int);
 /*
  * Share support functions.
  */
-extern int zfs_is_shared(zfs_handle_t *, char **);
+extern boolean_t zfs_is_shared(zfs_handle_t *, char **);
 extern int zfs_share(zfs_handle_t *);
 extern int zfs_unshare(zfs_handle_t *, const char *);
 extern int zfs_unshareall(zfs_handle_t *);
 
 /*
- * For clients that need to capture error output.
- */
-extern void zfs_set_error_handler(void (*)(const char *, va_list));
-
-/*
  * When dealing with nvlists, verify() is extremely useful
  */
 #ifdef NDEBUG
@@ -276,12 +339,13 @@ extern int zfs_remove_link(zfs_handle_t *);
 /*
  * Given a device or file, determine if it is part of a pool.
  */
-extern int zpool_in_use(int fd, pool_state_t *state, char **name);
+extern int zpool_in_use(libzfs_handle_t *, int, pool_state_t *, char **,
+    boolean_t *);
 
 /*
  * ftyp special.  Read the label from a given device.
  */
-extern nvlist_t *zpool_read_label(int fd);
+extern int zpool_read_label(int, nvlist_t **);
 
 /*
  * Create and remove zvol /dev links
@@ -289,21 +353,6 @@ extern nvlist_t *zpool_read_label(int fd);
 extern int zpool_create_zvol_links(zpool_handle_t *);
 extern int zpool_remove_zvol_links(zpool_handle_t *);
 
-/*
- * zoneadmd hack
- */
-extern void zfs_init(void);
-
-/*
- * Useful defines
- */
-#ifndef TRUE
-#define	TRUE	1
-#endif
-#ifndef FALSE
-#define	FALSE	0
-#endif
-
 #ifdef	__cplusplus
 }
 #endif
diff --git a/usr/src/lib/libzfs/common/libzfs_changelist.c b/usr/src/lib/libzfs/common/libzfs_changelist.c
index 57fcc1497c..04270dfe51 100644
--- a/usr/src/lib/libzfs/common/libzfs_changelist.c
+++ b/usr/src/lib/libzfs/common/libzfs_changelist.c
@@ -2,9 +2,8 @@
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
@@ -73,11 +72,11 @@ struct prop_changelist {
 	zfs_prop_t		cl_realprop;
 	uu_list_pool_t		*cl_pool;
 	uu_list_t		*cl_list;
-	int			cl_waslegacy;
-	int			cl_allchildren;
-	int			cl_alldependents;
+	boolean_t		cl_waslegacy;
+	boolean_t		cl_allchildren;
+	boolean_t		cl_alldependents;
 	int			cl_flags;
-	int			cl_haszonedchild;
+	boolean_t		cl_haszonedchild;
 };
 
 /*
@@ -109,7 +108,8 @@ changelist_prefix(prop_changelist_t *clp)
 		 */
 		if (cn->cn_handle->zfs_volblocksize &&
 		    clp->cl_realprop == ZFS_PROP_NAME) {
-			if (zvol_remove_link(cn->cn_handle->zfs_name) != 0)
+			if (zvol_remove_link(cn->cn_handle->zfs_hdl,
+			    cn->cn_handle->zfs_name) != 0)
 				ret = -1;
 		} else if (zfs_unmount(cn->cn_handle, NULL, clp->cl_flags) != 0)
 			ret = -1;
@@ -167,7 +167,8 @@ changelist_postfix(prop_changelist_t *clp)
 		 */
 		if (cn->cn_handle->zfs_volblocksize &&
 		    clp->cl_realprop == ZFS_PROP_NAME) {
-			if (zvol_create_link(cn->cn_handle->zfs_name) != 0)
+			if (zvol_create_link(cn->cn_handle->zfs_hdl,
+			    cn->cn_handle->zfs_name) != 0)
 				ret = -1;
 			continue;
 		}
@@ -186,7 +187,7 @@ changelist_postfix(prop_changelist_t *clp)
 			char shareopts[ZFS_MAXPROPLEN];
 			if (zfs_prop_get(cn->cn_handle, ZFS_PROP_SHARENFS,
 			    shareopts, sizeof (shareopts), NULL, NULL, 0,
-			    FALSE) == 0 && strcmp(shareopts, "off") == 0)
+			    B_FALSE) == 0 && strcmp(shareopts, "off") == 0)
 				ret = zfs_unshare(cn->cn_handle, NULL);
 			else
 				ret = zfs_share(cn->cn_handle);
@@ -199,22 +200,22 @@ changelist_postfix(prop_changelist_t *clp)
 /*
  * Is this "dataset" a child of "parent"?
  */
-static int
+static boolean_t
 isa_child_of(char *dataset, const char *parent)
 {
 	int len;
 
 	/* snapshot does not have a child */
 	if (strchr(parent, '@'))
-		return (FALSE);
+		return (B_FALSE);
 
 	len = strlen(parent);
 
 	if (strncmp(dataset, parent, len) == 0 &&
 	    (dataset[len] == '/' || dataset[len] == '\0'))
-		return (TRUE);
+		return (B_TRUE);
 	else
-		return (FALSE);
+		return (B_FALSE);
 
 }
 
@@ -326,6 +327,9 @@ changelist_free(prop_changelist_t *clp)
 		free(cn);
 	}
 
+	uu_list_walk_end(walk);
+
+	uu_list_destroy(clp->cl_list);
 	uu_list_pool_destroy(clp->cl_pool);
 
 	free(clp);
@@ -353,12 +357,18 @@ change_one(zfs_handle_t *zhp, void *data)
 	if (!(zhp->zfs_volblocksize && clp->cl_realprop == ZFS_PROP_NAME) &&
 	    zfs_prop_get(zhp, clp->cl_prop, property,
 	    sizeof (property), &sourcetype, where, sizeof (where),
-	    FALSE) != 0)
+	    B_FALSE) != 0) {
+		zfs_close(zhp);
 		return (0);
+	}
 
 	if (clp->cl_alldependents || clp->cl_allchildren ||
 	    sourcetype == ZFS_SRC_DEFAULT || sourcetype == ZFS_SRC_INHERITED) {
-		cn = zfs_malloc(sizeof (prop_changenode_t));
+		if ((cn = zfs_alloc(zfs_get_handle(zhp),
+		    sizeof (prop_changenode_t))) == NULL) {
+			zfs_close(zhp);
+			return (-1);
+		}
 
 		cn->cn_handle = zhp;
 		cn->cn_mounted = zfs_is_mounted(zhp, NULL);
@@ -367,7 +377,7 @@ change_one(zfs_handle_t *zhp, void *data)
 
 		/* indicate if any child is exported to a local zone */
 		if ((getzoneid() == GLOBAL_ZONEID) && cn->cn_zoned)
-			clp->cl_haszonedchild = TRUE;
+			clp->cl_haszonedchild = B_TRUE;
 
 		uu_list_node_init(cn, &cn->cn_listnode, clp->cl_pool);
 
@@ -399,11 +409,14 @@ change_one(zfs_handle_t *zhp, void *data)
 prop_changelist_t *
 changelist_gather(zfs_handle_t *zhp, zfs_prop_t prop, int flags)
 {
-	prop_changelist_t *clp = zfs_malloc(sizeof (prop_changelist_t));
+	prop_changelist_t *clp;
 	prop_changenode_t *cn;
 	zfs_handle_t *temp;
 	char property[ZFS_MAXPROPLEN];
 
+	if ((clp = zfs_alloc(zhp->zfs_hdl, sizeof (prop_changelist_t))) == NULL)
+		return (NULL);
+
 	clp->cl_pool = uu_list_pool_create("changelist_pool",
 	    sizeof (prop_changenode_t),
 	    offsetof(prop_changenode_t, cn_listnode),
@@ -423,10 +436,10 @@ changelist_gather(zfs_handle_t *zhp, zfs_prop_t prop, int flags)
 	 */
 	if (prop == ZFS_PROP_NAME) {
 		clp->cl_prop = ZFS_PROP_MOUNTPOINT;
-		clp->cl_alldependents = TRUE;
+		clp->cl_alldependents = B_TRUE;
 	} else if (prop == ZFS_PROP_ZONED) {
 		clp->cl_prop = ZFS_PROP_MOUNTPOINT;
-		clp->cl_allchildren = TRUE;
+		clp->cl_allchildren = B_TRUE;
 	} else {
 		clp->cl_prop = prop;
 	}
@@ -450,8 +463,9 @@ changelist_gather(zfs_handle_t *zhp, zfs_prop_t prop, int flags)
 	 * We have to re-open ourselves because we auto-close all the handles
 	 * and can't tell the difference.
 	 */
-	if ((temp = zfs_open(zfs_get_name(zhp), ZFS_TYPE_ANY)) == NULL) {
-		free(clp);
+	if ((temp = zfs_open(zhp->zfs_hdl, zfs_get_name(zhp),
+	    ZFS_TYPE_ANY)) == NULL) {
+		changelist_free(clp);
 		return (NULL);
 	}
 
@@ -459,7 +473,13 @@ changelist_gather(zfs_handle_t *zhp, zfs_prop_t prop, int flags)
 	 * Always add ourself to the list.  We add ourselves to the end so that
 	 * we're the last to be unmounted.
 	 */
-	cn = zfs_malloc(sizeof (prop_changenode_t));
+	if ((cn = zfs_alloc(zhp->zfs_hdl,
+	    sizeof (prop_changenode_t))) == NULL) {
+		zfs_close(temp);
+		changelist_free(clp);
+		return (NULL);
+	}
+
 	cn->cn_handle = temp;
 	cn->cn_mounted = zfs_is_mounted(temp, NULL);
 	cn->cn_shared = zfs_is_shared(temp, NULL);
@@ -474,10 +494,10 @@ changelist_gather(zfs_handle_t *zhp, zfs_prop_t prop, int flags)
 	 * as the behavior of changelist_postfix() will be different.
 	 */
 	if (zfs_prop_get(zhp, prop, property, sizeof (property),
-	    NULL, NULL, 0, FALSE) == 0 &&
+	    NULL, NULL, 0, B_FALSE) == 0 &&
 	    (strcmp(property, "legacy") == 0 || strcmp(property, "none") == 0 ||
 	    strcmp(property, "off") == 0))
-		clp->cl_waslegacy = TRUE;
+		clp->cl_waslegacy = B_TRUE;
 
 	return (clp);
 }
diff --git a/usr/src/lib/libzfs/common/libzfs_config.c b/usr/src/lib/libzfs/common/libzfs_config.c
index 71801d5cba..be691f0ced 100644
--- a/usr/src/lib/libzfs/common/libzfs_config.c
+++ b/usr/src/lib/libzfs/common/libzfs_config.c
@@ -45,9 +45,6 @@
 
 #include "libzfs_impl.h"
 
-static uu_avl_t *namespace_avl;
-static uint64_t namespace_generation;
-
 typedef struct config_node {
 	char		*cn_name;
 	nvlist_t	*cn_config;
@@ -73,11 +70,41 @@ config_node_compare(const void *a, const void *b, void *unused)
 		return (0);
 }
 
+void
+namespace_clear(libzfs_handle_t *hdl)
+{
+	if (hdl->libzfs_ns_avl) {
+		uu_avl_walk_t *walk;
+		config_node_t *cn;
+
+		if ((walk = uu_avl_walk_start(hdl->libzfs_ns_avl,
+		    UU_WALK_ROBUST)) == NULL)
+			return;
+
+		while ((cn = uu_avl_walk_next(walk)) != NULL) {
+			uu_avl_remove(hdl->libzfs_ns_avl, cn);
+			nvlist_free(cn->cn_config);
+			free(cn->cn_name);
+			free(cn);
+		}
+
+		uu_avl_walk_end(walk);
+
+		uu_avl_destroy(hdl->libzfs_ns_avl);
+		hdl->libzfs_ns_avl = NULL;
+	}
+
+	if (hdl->libzfs_ns_avlpool) {
+		uu_avl_pool_destroy(hdl->libzfs_ns_avlpool);
+		hdl->libzfs_ns_avlpool = NULL;
+	}
+}
+
 /*
  * Loads the pool namespace, or re-loads it if the cache has changed.
  */
-static void
-namespace_reload()
+static int
+namespace_reload(libzfs_handle_t *hdl)
 {
 	nvlist_t *config;
 	config_node_t *cn;
@@ -85,23 +112,21 @@ namespace_reload()
 	zfs_cmd_t zc = { 0 };
 	uu_avl_walk_t *walk;
 
-	if (namespace_generation == 0) {
+	if (hdl->libzfs_ns_gen == 0) {
 		/*
 		 * This is the first time we've accessed the configuration
 		 * cache.  Initialize the AVL tree and then fall through to the
 		 * common code.
 		 */
-		uu_avl_pool_t *pool;
-
-		if ((pool = uu_avl_pool_create("config_pool",
+		if ((hdl->libzfs_ns_avlpool = uu_avl_pool_create("config_pool",
 		    sizeof (config_node_t),
 		    offsetof(config_node_t, cn_avl),
 		    config_node_compare, UU_DEFAULT)) == NULL)
-			no_memory();
+			return (no_memory(hdl));
 
-		if ((namespace_avl = uu_avl_create(pool, NULL,
-		    UU_DEFAULT)) == NULL)
-			no_memory();
+		if ((hdl->libzfs_ns_avl = uu_avl_create(hdl->libzfs_ns_avlpool,
+		    NULL, UU_DEFAULT)) == NULL)
+			return (no_memory(hdl));
 	}
 
 	/*
@@ -114,68 +139,92 @@ namespace_reload()
 	 *			been modified to tell us how much to allocate.
 	 */
 	zc.zc_config_dst_size = 1024;
-	zc.zc_config_dst = (uint64_t)(uintptr_t)
-	    zfs_malloc(zc.zc_config_dst_size);
+	if ((zc.zc_config_dst = (uint64_t)(uintptr_t)
+	    zfs_alloc(hdl, zc.zc_config_dst_size)) == NULL)
+		return (-1);
 	for (;;) {
-		zc.zc_cookie = namespace_generation;
-		if (zfs_ioctl(ZFS_IOC_POOL_CONFIGS, &zc) != 0) {
+		zc.zc_cookie = hdl->libzfs_ns_gen;
+		if (ioctl(hdl->libzfs_fd, ZFS_IOC_POOL_CONFIGS, &zc) != 0) {
 			switch (errno) {
 			case EEXIST:
 				/*
 				 * The namespace hasn't changed.
 				 */
 				free((void *)(uintptr_t)zc.zc_config_dst);
-				return;
+				return (0);
 
 			case ENOMEM:
 				free((void *)(uintptr_t)zc.zc_config_dst);
-				zc.zc_config_dst = (uint64_t)(uintptr_t)
-				    zfs_malloc(zc.zc_config_dst_size);
+				if ((zc.zc_config_dst = (uint64_t)(uintptr_t)
+				    zfs_alloc(hdl, zc.zc_config_dst_size))
+				    == NULL)
+					return (-1);
 				break;
 
 			default:
-				zfs_baderror(errno);
+				return (zfs_standard_error(hdl, errno,
+				    dgettext(TEXT_DOMAIN, "failed to read "
+				    "pool configuration")));
 			}
 		} else {
-			namespace_generation = zc.zc_cookie;
+			hdl->libzfs_ns_gen = zc.zc_cookie;
 			break;
 		}
 	}
 
-	verify(nvlist_unpack((void *)(uintptr_t)zc.zc_config_dst,
-	    zc.zc_config_dst_size, &config, 0) == 0);
+	if (nvlist_unpack((void *)(uintptr_t)zc.zc_config_dst,
+	    zc.zc_config_dst_size, &config, 0) != 0)
+		return (no_memory(hdl));
 
 	free((void *)(uintptr_t)zc.zc_config_dst);
 
 	/*
 	 * Clear out any existing configuration information.
 	 */
-	if ((walk = uu_avl_walk_start(namespace_avl, UU_WALK_ROBUST)) == NULL)
-		no_memory();
+	if ((walk = uu_avl_walk_start(hdl->libzfs_ns_avl,
+	    UU_WALK_ROBUST)) == NULL) {
+		nvlist_free(config);
+		return (no_memory(hdl));
+	}
 
 	while ((cn = uu_avl_walk_next(walk)) != NULL) {
-		uu_avl_remove(namespace_avl, cn);
+		uu_avl_remove(hdl->libzfs_ns_avl, cn);
 		nvlist_free(cn->cn_config);
 		free(cn->cn_name);
 		free(cn);
 	}
 
+	uu_avl_walk_end(walk);
+
 	elem = NULL;
 	while ((elem = nvlist_next_nvpair(config, elem)) != NULL) {
 		nvlist_t *child;
 		uu_avl_index_t where;
 
-		cn = zfs_malloc(sizeof (config_node_t));
-		cn->cn_name = zfs_strdup(nvpair_name(elem));
+		if ((cn = zfs_alloc(hdl, sizeof (config_node_t))) == NULL) {
+			nvlist_free(config);
+			return (-1);
+		}
+
+		if ((cn->cn_name = zfs_strdup(hdl,
+		    nvpair_name(elem))) == NULL) {
+			free(cn);
+			return (-1);
+		}
 
 		verify(nvpair_value_nvlist(elem, &child) == 0);
-		verify(nvlist_dup(child, &cn->cn_config, 0) == 0);
-		verify(uu_avl_find(namespace_avl, cn, NULL, &where) == NULL);
+		if (nvlist_dup(child, &cn->cn_config, 0) != 0) {
+			nvlist_free(config);
+			return (no_memory(hdl));
+		}
+		verify(uu_avl_find(hdl->libzfs_ns_avl, cn, NULL, &where)
+		    == NULL);
 
-		uu_avl_insert(namespace_avl, cn, where);
+		uu_avl_insert(hdl->libzfs_ns_avl, cn, where);
 	}
 
 	nvlist_free(config);
+	return (0);
 }
 
 /*
@@ -209,35 +258,43 @@ zpool_refresh_stats(zpool_handle_t *zhp)
 		zhp->zpool_config_size = 1 << 16;
 
 	zc.zc_config_dst_size = zhp->zpool_config_size;
-	zc.zc_config_dst = (uint64_t)(uintptr_t)
-	    zfs_malloc(zc.zc_config_dst_size);
+	if ((zc.zc_config_dst = (uint64_t)(uintptr_t)
+	    zfs_alloc(zhp->zpool_hdl, zc.zc_config_dst_size)) == NULL)
+		return (-1);
 
 	for (;;) {
-		if (zfs_ioctl(ZFS_IOC_POOL_STATS, &zc) == 0) {
+		if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_POOL_STATS,
+		    &zc) == 0) {
 			/*
 			 * The real error is returned in the zc_cookie field.
 			 */
-			error = zc.zc_cookie;
+			error = errno = zc.zc_cookie;
 			break;
 		}
 
 		if (errno == ENOMEM) {
 			free((void *)(uintptr_t)zc.zc_config_dst);
-			zc.zc_config_dst = (uint64_t)(uintptr_t)
-			    zfs_malloc(zc.zc_config_dst_size);
+			if ((zc.zc_config_dst = (uint64_t)(uintptr_t)
+			    zfs_alloc(zhp->zpool_hdl,
+			    zc.zc_config_dst_size)) == NULL)
+				return (-1);
 		} else {
 			free((void *)(uintptr_t)zc.zc_config_dst);
-			return (errno);
+			return (-1);
 		}
 	}
 
-	verify(nvlist_unpack((void *)(uintptr_t)zc.zc_config_dst,
-	    zc.zc_config_dst_size, &config, 0) == 0);
+	if (nvlist_unpack((void *)(uintptr_t)zc.zc_config_dst,
+	    zc.zc_config_dst_size, &config, 0) != 0) {
+		free((void *)(uintptr_t)zc.zc_config_dst);
+		return (no_memory(zhp->zpool_hdl));
+	}
 
 	zhp->zpool_config_size = zc.zc_config_dst_size;
 	free((void *)(uintptr_t)zc.zc_config_dst);
 
-	set_pool_health(config);
+	if (set_pool_health(config) != 0)
+		return (no_memory(zhp->zpool_hdl));
 
 	if (zhp->zpool_config != NULL) {
 		uint64_t oldtxg, newtxg;
@@ -260,25 +317,26 @@ zpool_refresh_stats(zpool_handle_t *zhp)
 
 	zhp->zpool_config = config;
 
-	return (error);
+	return (error ? -1 : 0);
 }
 
 /*
  * Iterate over all pools in the system.
  */
 int
-zpool_iter(zpool_iter_f func, void *data)
+zpool_iter(libzfs_handle_t *hdl, zpool_iter_f func, void *data)
 {
 	config_node_t *cn;
 	zpool_handle_t *zhp;
 	int ret;
 
-	namespace_reload();
+	if (namespace_reload(hdl) != 0)
+		return (-1);
 
-	for (cn = uu_avl_first(namespace_avl); cn != NULL;
-	    cn = uu_avl_next(namespace_avl, cn)) {
+	for (cn = uu_avl_first(hdl->libzfs_ns_avl); cn != NULL;
+	    cn = uu_avl_next(hdl->libzfs_ns_avl, cn)) {
 
-		if ((zhp = zpool_open_silent(cn->cn_name)) == NULL)
+		if ((zhp = zpool_open_silent(hdl, cn->cn_name)) == NULL)
 			continue;
 
 		if ((ret = func(zhp, data)) != 0)
@@ -293,18 +351,19 @@ zpool_iter(zpool_iter_f func, void *data)
  * handle passed each time must be explicitly closed by the callback.
  */
 int
-zfs_iter_root(zfs_iter_f func, void *data)
+zfs_iter_root(libzfs_handle_t *hdl, zfs_iter_f func, void *data)
 {
 	config_node_t *cn;
 	zfs_handle_t *zhp;
 	int ret;
 
-	namespace_reload();
+	if (namespace_reload(hdl) != 0)
+		return (-1);
 
-	for (cn = uu_avl_first(namespace_avl); cn != NULL;
-	    cn = uu_avl_next(namespace_avl, cn)) {
+	for (cn = uu_avl_first(hdl->libzfs_ns_avl); cn != NULL;
+	    cn = uu_avl_next(hdl->libzfs_ns_avl, cn)) {
 
-		if ((zhp = make_dataset_handle(cn->cn_name)) == NULL)
+		if ((zhp = make_dataset_handle(hdl, cn->cn_name)) == NULL)
 			continue;
 
 		if ((ret = func(zhp, data)) != 0)
diff --git a/usr/src/lib/libzfs/common/libzfs_dataset.c b/usr/src/lib/libzfs/common/libzfs_dataset.c
index f23136c8aa..14ba6112ed 100644
--- a/usr/src/lib/libzfs/common/libzfs_dataset.c
+++ b/usr/src/lib/libzfs/common/libzfs_dataset.c
@@ -36,6 +36,7 @@
 #include <strings.h>
 #include <unistd.h>
 #include <zone.h>
+#include <fcntl.h>
 #include <sys/mntent.h>
 #include <sys/mnttab.h>
 #include <sys/mount.h>
@@ -64,7 +65,6 @@ zfs_type_to_name(zfs_type_t type)
 		return (dgettext(TEXT_DOMAIN, "volume"));
 	}
 
-	zfs_baderror(type);
 	return (NULL);
 }
 
@@ -118,43 +118,43 @@ path_to_str(const char *path, int types)
  * 'buf' detailing exactly why the name was not valid.
  */
 static int
-zfs_validate_name(const char *path, int type, char *buf, size_t buflen)
+zfs_validate_name(libzfs_handle_t *hdl, const char *path, int type)
 {
 	namecheck_err_t why;
 	char what;
 
 	if (dataset_namecheck(path, &why, &what) != 0) {
-		if (buf != NULL) {
+		if (hdl != NULL) {
 			switch (why) {
 			case NAME_ERR_TOOLONG:
-				(void) strlcpy(buf, dgettext(TEXT_DOMAIN,
-				    "name is too long"), buflen);
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "name is too long"));
 				break;
 
 			case NAME_ERR_LEADING_SLASH:
-				(void) strlcpy(buf, dgettext(TEXT_DOMAIN,
-				    "leading slash"), buflen);
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "leading slash in name"));
 				break;
 
 			case NAME_ERR_EMPTY_COMPONENT:
-				(void) strlcpy(buf, dgettext(TEXT_DOMAIN,
-				    "empty component"), buflen);
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "empty component in name"));
 				break;
 
 			case NAME_ERR_TRAILING_SLASH:
-				(void) strlcpy(buf, dgettext(TEXT_DOMAIN,
-				    "trailing slash"), buflen);
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "trailing slash in name"));
 				break;
 
 			case NAME_ERR_INVALCHAR:
-				(void) snprintf(buf, buflen,
+				zfs_error_aux(hdl,
 				    dgettext(TEXT_DOMAIN, "invalid character "
-				    "'%c'"), what);
+				    "'%c' in name"), what);
 				break;
 
 			case NAME_ERR_MULTIPLE_AT:
-				(void) strlcpy(buf, dgettext(TEXT_DOMAIN,
-				    "multiple '@' delimiters"), buflen);
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "multiple '@' delimiters in name"));
 				break;
 			}
 		}
@@ -163,20 +163,19 @@ zfs_validate_name(const char *path, int type, char *buf, size_t buflen)
 	}
 
 	if (!(type & ZFS_TYPE_SNAPSHOT) && strchr(path, '@') != NULL) {
-		if (buf != NULL)
-			(void) strlcpy(buf,
-			    dgettext(TEXT_DOMAIN,
-			    "snapshot delimiter '@'"), buflen);
+		if (hdl != NULL)
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "snapshot delimiter '@' in filesystem name"));
 		return (0);
 	}
 
-	return (1);
+	return (-1);
 }
 
 int
 zfs_name_valid(const char *name, zfs_type_t type)
 {
-	return (zfs_validate_name(name, type, NULL, NULL));
+	return (zfs_validate_name(NULL, name, type));
 }
 
 /*
@@ -189,13 +188,16 @@ get_stats(zfs_handle_t *zhp)
 
 	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
 
-	zc.zc_config_src = (uint64_t)(uintptr_t)zfs_malloc(1024);
+	if ((zc.zc_config_src = (uint64_t)(uintptr_t)malloc(1024)) == NULL)
+		return (-1);
 	zc.zc_config_src_size = 1024;
 
-	while (zfs_ioctl(ZFS_IOC_OBJSET_STATS, &zc) != 0) {
+	while (ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) != 0) {
 		if (errno == ENOMEM) {
-			zc.zc_config_src = (uint64_t)(uintptr_t)
-			    zfs_malloc(zc.zc_config_src_size);
+			free((void *)(uintptr_t)zc.zc_config_src);
+			if ((zc.zc_config_src = (uint64_t)(uintptr_t)
+			    malloc(zc.zc_config_src_size)) == NULL)
+				return (-1);
 		} else {
 			free((void *)(uintptr_t)zc.zc_config_src);
 			return (-1);
@@ -207,12 +209,22 @@ get_stats(zfs_handle_t *zhp)
 
 	(void) strcpy(zhp->zfs_root, zc.zc_root);
 
-	verify(nvlist_unpack((void *)(uintptr_t)zc.zc_config_src,
-	    zc.zc_config_src_size, &zhp->zfs_props, 0) == 0);
+	if (zhp->zfs_props) {
+		nvlist_free(zhp->zfs_props);
+		zhp->zfs_props = NULL;
+	}
+
+	if (nvlist_unpack((void *)(uintptr_t)zc.zc_config_src,
+	    zc.zc_config_src_size, &zhp->zfs_props, 0) != 0) {
+		free((void *)(uintptr_t)zc.zc_config_src);
+		return (-1);
+	}
 
 	zhp->zfs_volsize = zc.zc_volsize;
 	zhp->zfs_volblocksize = zc.zc_volblocksize;
 
+	free((void *)(uintptr_t)zc.zc_config_src);
+
 	return (0);
 }
 
@@ -230,9 +242,14 @@ zfs_refresh_properties(zfs_handle_t *zhp)
  * zfs_iter_* to create child handles on the fly.
  */
 zfs_handle_t *
-make_dataset_handle(const char *path)
+make_dataset_handle(libzfs_handle_t *hdl, const char *path)
 {
-	zfs_handle_t *zhp = zfs_malloc(sizeof (zfs_handle_t));
+	zfs_handle_t *zhp = calloc(sizeof (zfs_handle_t), 1);
+
+	if (zhp == NULL)
+		return (NULL);
+
+	zhp->zfs_hdl = hdl;
 
 top:
 	(void) strlcpy(zhp->zfs_name, path, sizeof (zhp->zfs_name));
@@ -263,20 +280,20 @@ top:
 		(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
 
 		if (zhp->zfs_type == ZFS_TYPE_VOLUME) {
-			(void) zvol_remove_link(zhp->zfs_name);
+			(void) zvol_remove_link(hdl, zhp->zfs_name);
 			zc.zc_objset_type = DMU_OST_ZVOL;
 		} else {
 			zc.zc_objset_type = DMU_OST_ZFS;
 		}
 
 		/* If we can successfully roll it back, reget the stats */
-		if (zfs_ioctl(ZFS_IOC_ROLLBACK, &zc) == 0)
+		if (ioctl(hdl->libzfs_fd, ZFS_IOC_ROLLBACK, &zc) == 0)
 			goto top;
 		/*
 		 * If we can sucessfully destroy it, pretend that it
 		 * never existed.
 		 */
-		if (zfs_ioctl(ZFS_IOC_DESTROY, &zc) == 0) {
+		if (ioctl(hdl->libzfs_fd, ZFS_IOC_DESTROY, &zc) == 0) {
 			free(zhp);
 			errno = ENOENT;
 			return (NULL);
@@ -294,8 +311,7 @@ top:
 	else if (zhp->zfs_dmustats.dds_type == DMU_OST_ZFS)
 		zhp->zfs_type = ZFS_TYPE_FILESYSTEM;
 	else
-		/* we should never see any other dataset types */
-		zfs_baderror(zhp->zfs_dmustats.dds_type);
+		abort();	/* we should never see any other types */
 
 	return (zhp);
 }
@@ -306,18 +322,21 @@ top:
  * appropriate error message and return NULL if it can't be opened.
  */
 zfs_handle_t *
-zfs_open(const char *path, int types)
+zfs_open(libzfs_handle_t *hdl, const char *path, int types)
 {
 	zfs_handle_t *zhp;
+	char errbuf[1024];
+
+	(void) snprintf(errbuf, sizeof (errbuf),
+	    dgettext(TEXT_DOMAIN, "cannot open '%s'"), path);
 
 	/*
-	 * Validate the name before we even try to open it.  We don't care about
-	 * the verbose invalid messages here; just report a generic error.
+	 * Validate the name before we even try to open it.
 	 */
-	if (!zfs_validate_name(path, types, NULL, 0)) {
-		zfs_error(dgettext(TEXT_DOMAIN,
-		    "cannot open '%s': invalid %s name"), path,
-		    path_to_str(path, types));
+	if (!zfs_validate_name(hdl, path, ZFS_TYPE_ANY)) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "invalid dataset name"));
+		(void) zfs_error(hdl, EZFS_INVALIDNAME, errbuf);
 		return (NULL);
 	}
 
@@ -325,48 +344,13 @@ zfs_open(const char *path, int types)
 	 * Try to get stats for the dataset, which will tell us if it exists.
 	 */
 	errno = 0;
-	if ((zhp = make_dataset_handle(path)) == NULL) {
-		switch (errno) {
-		case ENOENT:
-			/*
-			 * The dataset doesn't exist.
-			 */
-			zfs_error(dgettext(TEXT_DOMAIN,
-			    "cannot open '%s': no such %s"), path,
-			    path_to_str(path, types));
-			break;
-
-		case EBUSY:
-			/*
-			 * We were able to open the dataset but couldn't
-			 * get the stats.
-			 */
-			zfs_error(dgettext(TEXT_DOMAIN,
-			    "cannot open '%s': %s is busy"), path,
-			    path_to_str(path, types));
-			break;
-
-		case ENXIO:
-		case EIO:
-			/*
-			 * I/O error from the underlying pool.
-			 */
-			zfs_error(dgettext(TEXT_DOMAIN,
-			    "cannot open '%s': I/O error"), path,
-			    path_to_str(path, types));
-			break;
-
-		default:
-			zfs_baderror(errno);
-
-		}
+	if ((zhp = make_dataset_handle(hdl, path)) == NULL) {
+		(void) zfs_standard_error(hdl, errno, errbuf, path);
 		return (NULL);
 	}
 
 	if (!(types & zhp->zfs_type)) {
-		zfs_error(dgettext(TEXT_DOMAIN, "cannot open '%s': operation "
-		    "not supported for %ss"), path,
-		    zfs_type_to_name(zhp->zfs_type));
+		(void) zfs_error(hdl, EZFS_BADTYPE, errbuf);
 		free(zhp);
 		return (NULL);
 	}
@@ -382,6 +366,8 @@ zfs_close(zfs_handle_t *zhp)
 {
 	if (zhp->zfs_mntopts)
 		free(zhp->zfs_mntopts);
+	if (zhp->zfs_props)
+		nvlist_free(zhp->zfs_props);
 	free(zhp);
 }
 
@@ -443,7 +429,7 @@ struct {
  * resulting value must be shifted.
  */
 static int
-str2shift(const char *buf, char *reason, size_t len)
+str2shift(libzfs_handle_t *hdl, const char *buf)
 {
 	const char *ends = "BKMGTPEZ";
 	int i;
@@ -455,8 +441,8 @@ str2shift(const char *buf, char *reason, size_t len)
 			break;
 	}
 	if (i == strlen(ends)) {
-		(void) snprintf(reason, len, dgettext(TEXT_DOMAIN, "invalid "
-		    "numeric suffix '%s'"), buf);
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "invalid numeric suffix '%s'"), buf);
 		return (-1);
 	}
 
@@ -465,12 +451,11 @@ str2shift(const char *buf, char *reason, size_t len)
 	 * allow 'BB' - that's just weird.
 	 */
 	if (buf[1] == '\0' || (toupper(buf[1]) == 'B' && buf[2] == '\0' &&
-	    toupper(buf[0]) != 'B')) {
+	    toupper(buf[0]) != 'B'))
 		return (10*i);
-	}
 
-	(void) snprintf(reason, len, dgettext(TEXT_DOMAIN, "invalid numeric "
-	    "suffix '%s'"), buf);
+	zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+	    "invalid numeric suffix '%s'"), buf);
 	return (-1);
 }
 
@@ -480,7 +465,7 @@ str2shift(const char *buf, char *reason, size_t len)
  * message for the caller to use.
  */
 static int
-nicestrtonum(const char *value, uint64_t *num, char *buf, size_t buflen)
+nicestrtonum(libzfs_handle_t *hdl, const char *value, uint64_t *num)
 {
 	char *end;
 	int shift;
@@ -489,8 +474,9 @@ nicestrtonum(const char *value, uint64_t *num, char *buf, size_t buflen)
 
 	/* Check to see if this looks like a number.  */
 	if ((value[0] < '0' || value[0] > '9') && value[0] != '.') {
-		(void) strlcpy(buf, dgettext(TEXT_DOMAIN,
-		    "must be a numeric value"), buflen);
+		if (hdl)
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "bad numeric value '%s'"), value);
 		return (-1);
 	}
 
@@ -503,8 +489,9 @@ nicestrtonum(const char *value, uint64_t *num, char *buf, size_t buflen)
 	 * in a 64-bit value.
 	 */
 	if (errno == ERANGE) {
-		(void) strlcpy(buf, dgettext(TEXT_DOMAIN,
-		    "value is too large"), buflen);
+		if (hdl)
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "numeric value is too large"));
 		return (-1);
 	}
 
@@ -515,26 +502,28 @@ nicestrtonum(const char *value, uint64_t *num, char *buf, size_t buflen)
 	if (*end == '.') {
 		double fval = strtod(value, &end);
 
-		if ((shift = str2shift(end, buf, buflen)) == -1)
+		if ((shift = str2shift(hdl, end)) == -1)
 			return (-1);
 
 		fval *= pow(2, shift);
 
 		if (fval > UINT64_MAX) {
-			(void) strlcpy(buf, dgettext(TEXT_DOMAIN,
-			    "value is too large"), buflen);
+			if (hdl)
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "numeric value is too large"));
 			return (-1);
 		}
 
 		*num = (uint64_t)fval;
 	} else {
-		if ((shift = str2shift(end, buf, buflen)) == -1)
+		if ((shift = str2shift(hdl, end)) == -1)
 			return (-1);
 
 		/* Check for overflow */
 		if (shift >= 64 || (*num << shift) >> shift != *num) {
-			(void) strlcpy(buf, dgettext(TEXT_DOMAIN,
-			    "value is too large"), buflen);
+			if (hdl)
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "numeric value is too large"));
 			return (-1);
 		}
 
@@ -547,9 +536,7 @@ nicestrtonum(const char *value, uint64_t *num, char *buf, size_t buflen)
 int
 zfs_nicestrtonum(const char *str, uint64_t *val)
 {
-	char buf[1];
-
-	return (nicestrtonum(str, val, buf, sizeof (buf)));
+	return (nicestrtonum(NULL, str, val));
 }
 
 /*
@@ -557,28 +544,28 @@ zfs_nicestrtonum(const char *str, uint64_t *val)
  * by zfs_prop_set() and some libzfs consumers.
  */
 int
-zfs_prop_validate(zfs_prop_t prop, const char *value, uint64_t *intval)
+zfs_prop_validate(libzfs_handle_t *hdl, zfs_prop_t prop, const char *value,
+    uint64_t *intval)
 {
 	const char *propname = zfs_prop_to_name(prop);
 	uint64_t number;
-	char reason[64];
+	char errbuf[1024];
 	int i;
 
 	/*
 	 * Check to see if this a read-only property.
 	 */
-	if (zfs_prop_readonly(prop)) {
-		zfs_error(dgettext(TEXT_DOMAIN,
-		    "cannot set %s property: read-only property"), propname);
-		return (-1);
-	}
+	if (zfs_prop_readonly(prop))
+		return (zfs_error(hdl, EZFS_PROPREADONLY,
+		    dgettext(TEXT_DOMAIN, "cannot set %s property"), propname));
+
+	(void) snprintf(errbuf, sizeof (errbuf),
+	    dgettext(TEXT_DOMAIN, "bad %s value '%s'"), propname, value);
 
 	/* See if the property value is too long */
 	if (strlen(value) >= ZFS_MAXPROPLEN) {
-		zfs_error(dgettext(TEXT_DOMAIN,
-		    "bad %s value '%s': value is too long"), propname,
-		    value);
-		return (-1);
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "value is too long"));
+		return (zfs_error(hdl, EZFS_BADPROP, errbuf));
 	}
 
 	/* Perform basic checking based on property type */
@@ -589,10 +576,9 @@ zfs_prop_validate(zfs_prop_t prop, const char *value, uint64_t *intval)
 		} else if (strcmp(value, "off") == 0) {
 			number = 0;
 		} else {
-			zfs_error(dgettext(TEXT_DOMAIN,
-			    "bad %s value '%s': must be 'on' or 'off'"),
-			    propname, value);
-			return (-1);
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "must be 'on' or 'off'"));
+			return (zfs_error(hdl, EZFS_BADPROP, errbuf));
 		}
 		break;
 
@@ -603,21 +589,15 @@ zfs_prop_validate(zfs_prop_t prop, const char *value, uint64_t *intval)
 			break;
 		}
 
-		if (nicestrtonum(value, &number, reason,
-		    sizeof (reason)) != 0) {
-			zfs_error(dgettext(TEXT_DOMAIN,
-			    "bad %s value '%s': %s"), propname, value,
-			    reason);
-			return (-1);
-		}
+		if (nicestrtonum(hdl, value, &number) != 0)
+			return (zfs_error(hdl, EZFS_BADPROP, errbuf));
 
 		/* don't allow 0 for quota, use 'none' instead */
 		if (prop == ZFS_PROP_QUOTA && number == 0 &&
 		    strcmp(value, "none") != 0) {
-			zfs_error(dgettext(TEXT_DOMAIN,
-			    "bad %s value '%s': use '%s=none' to disable"),
-			    propname, value, propname);
-			return (-1);
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "use 'quota=none' to disable"));
+			return (zfs_error(hdl, EZFS_BADPROP, errbuf));
 		}
 
 		/* must be power of two within SPA_{MIN,MAX}BLOCKSIZE */
@@ -625,13 +605,11 @@ zfs_prop_validate(zfs_prop_t prop, const char *value, uint64_t *intval)
 		    prop == ZFS_PROP_VOLBLOCKSIZE) {
 			if (number < SPA_MINBLOCKSIZE ||
 			    number > SPA_MAXBLOCKSIZE || !ISP2(number)) {
-				zfs_error(dgettext(TEXT_DOMAIN,
-				    "bad %s value '%s': "
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "must be power of 2 from %u to %uk"),
-				    propname, value,
 				    (uint_t)SPA_MINBLOCKSIZE,
 				    (uint_t)SPA_MAXBLOCKSIZE >> 10);
-				return (-1);
+				return (zfs_error(hdl, EZFS_BADPROP, errbuf));
 			}
 		}
 
@@ -652,11 +630,10 @@ zfs_prop_validate(zfs_prop_t prop, const char *value, uint64_t *intval)
 				break;
 
 			if (value[0] != '/') {
-				zfs_error(dgettext(TEXT_DOMAIN,
-				    "bad %s value '%s': must be an absolute "
-				    "path, 'none', or 'legacy'"),
-				    propname, value);
-				return (-1);
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "must be an absolute path, 'none', or "
+				    "'legacy'"));
+				return (zfs_error(hdl, EZFS_BADPROP, errbuf));
 			}
 			break;
 
@@ -670,11 +647,10 @@ zfs_prop_validate(zfs_prop_t prop, const char *value, uint64_t *intval)
 			}
 
 			if (checksum_table[i].name == NULL) {
-				zfs_error(dgettext(TEXT_DOMAIN,
-				    "bad %s value '%s': must be 'on', 'off', "
-				    "'fletcher2', 'fletcher4', or 'sha256'"),
-				    propname, value);
-				return (-1);
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "must be 'on', 'off', 'fletcher2', "
+				    "'fletcher4', or 'sha256'"));
+				return (zfs_error(hdl, EZFS_BADPROP, errbuf));
 			}
 			break;
 
@@ -688,11 +664,9 @@ zfs_prop_validate(zfs_prop_t prop, const char *value, uint64_t *intval)
 			}
 
 			if (compress_table[i].name == NULL) {
-				zfs_error(dgettext(TEXT_DOMAIN,
-				    "bad %s value '%s': must be 'on', 'off', "
-				    "or 'lzjb'"),
-				    propname, value);
-				return (-1);
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "must be 'on', 'off', or 'lzjb'"));
+				return (zfs_error(hdl, EZFS_BADPROP, errbuf));
 			}
 			break;
 
@@ -705,11 +679,9 @@ zfs_prop_validate(zfs_prop_t prop, const char *value, uint64_t *intval)
 			}
 
 			if (snapdir_table[i].name == NULL) {
-				zfs_error(dgettext(TEXT_DOMAIN,
-				    "bad %s value '%s': must be 'hidden' "
-				    "or 'visible'"),
-				    propname, value);
-				return (-1);
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "must be 'hidden' or 'visible'"));
+				return (zfs_error(hdl, EZFS_BADPROP, errbuf));
 			}
 			break;
 
@@ -723,11 +695,10 @@ zfs_prop_validate(zfs_prop_t prop, const char *value, uint64_t *intval)
 			}
 
 			if (acl_mode_table[i].name == NULL) {
-				zfs_error(dgettext(TEXT_DOMAIN,
-				    "bad %s value '%s': must be 'discard', "
-				    "'groupmask' or 'passthrough'"),
-				    propname, value);
-				return (-1);
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "must be 'disacard', 'groupmask', or "
+				    "'passthrough'"));
+				return (zfs_error(hdl, EZFS_BADPROP, errbuf));
 			}
 			break;
 
@@ -741,11 +712,10 @@ zfs_prop_validate(zfs_prop_t prop, const char *value, uint64_t *intval)
 			}
 
 			if (acl_inherit_table[i].name == NULL) {
-				zfs_error(dgettext(TEXT_DOMAIN,
-				    "bad %s value '%s': must be 'discard', "
-				    "'noallow', 'secure' or 'passthrough'"),
-				    propname, value);
-				return (-1);
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "must be 'discard, 'noallow', 'secure', "
+				    "or 'passthrough'"));
+				return (zfs_error(hdl, EZFS_BADPROP, errbuf));
 			}
 			break;
 
@@ -775,19 +745,22 @@ zfs_prop_set(zfs_handle_t *zhp, zfs_prop_t prop, const char *propval)
 	zfs_cmd_t zc = { 0 };
 	int ret;
 	prop_changelist_t *cl;
+	char errbuf[1024];
+	libzfs_handle_t *hdl = zhp->zfs_hdl;
 
-	if (zfs_prop_validate(prop, propval, &number) != 0)
+	if (zfs_prop_validate(zhp->zfs_hdl, prop, propval, &number) != 0)
 		return (-1);
 
+
+	(void) snprintf(errbuf, sizeof (errbuf),
+	    dgettext(TEXT_DOMAIN, "cannot set %s for '%s'"), propname,
+	    zhp->zfs_name);
+
 	/*
 	 * Check to see if the value applies to this type
 	 */
-	if (!zfs_prop_valid_for_type(prop, zhp->zfs_type)) {
-		zfs_error(dgettext(TEXT_DOMAIN,
-		    "cannot set %s for '%s': property does not apply to %ss"),
-		    propname, zhp->zfs_name, zfs_type_to_name(zhp->zfs_type));
-		return (-1);
-	}
+	if (!zfs_prop_valid_for_type(prop, zhp->zfs_type))
+		return (zfs_error(hdl, EZFS_PROPTYPE, errbuf));
 
 	/*
 	 * For the mountpoint and sharenfs properties, check if it can be set
@@ -804,29 +777,24 @@ zfs_prop_set(zfs_handle_t *zhp, zfs_prop_t prop, const char *propval)
 	if (prop == ZFS_PROP_MOUNTPOINT || prop == ZFS_PROP_SHARENFS) {
 		if (zfs_prop_get_int(zhp, ZFS_PROP_ZONED)) {
 			if (getzoneid() == GLOBAL_ZONEID) {
-				zfs_error(dgettext(TEXT_DOMAIN,
-				    "cannot set %s for '%s': "
-				    "dataset is used in a non-global zone"),
-				    propname, zhp->zfs_name);
-				return (-1);
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "dataset is used in a non-global zone"));
+				return (zfs_error(hdl, EZFS_ZONED, errbuf));
 			} else if (prop == ZFS_PROP_SHARENFS) {
-				zfs_error(dgettext(TEXT_DOMAIN,
-				    "cannot set %s for '%s': filesystems "
-				    "cannot be shared in a non-global zone"),
-				    propname, zhp->zfs_name);
-				return (-1);
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "filesystems cannot be shared in a "
+				    "non-global zone"));
+				return (zfs_error(hdl, EZFS_ZONED, errbuf));
 			}
 		} else if (getzoneid() != GLOBAL_ZONEID) {
 			/*
 			 * If zoned property is 'off', this must be in
 			 * a globle zone. If not, something is wrong.
 			 */
-			zfs_error(dgettext(TEXT_DOMAIN,
-			    "cannot set %s for '%s': dataset is "
-			    "used in a non-global zone, but 'zoned' "
-			    "property is not set"),
-			    propname, zhp->zfs_name);
-			return (-1);
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "dataset is used in a non-global zone, but "
+			    "'zoned' property is not set"));
+			return (zfs_error(hdl, EZFS_ZONED, errbuf));
 		}
 	}
 
@@ -834,11 +802,10 @@ zfs_prop_set(zfs_handle_t *zhp, zfs_prop_t prop, const char *propval)
 		return (-1);
 
 	if (prop == ZFS_PROP_MOUNTPOINT && changelist_haszonedchild(cl)) {
-		zfs_error(dgettext(TEXT_DOMAIN, "cannot set %s for '%s', "
-			"child dataset with inherited mountpoint is used "
-			"in a non-global zone"),
-			propname, zhp->zfs_name);
-		ret = -1;
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "child dataset with inherited mountpoint is used "
+		    "in a non-global zone"));
+		ret = zfs_error(hdl, EZFS_ZONED, errbuf);
 		goto error;
 	}
 
@@ -853,11 +820,12 @@ zfs_prop_set(zfs_handle_t *zhp, zfs_prop_t prop, const char *propval)
 	switch (prop) {
 	case ZFS_PROP_QUOTA:
 		zc.zc_cookie = number;
-		ret = zfs_ioctl(ZFS_IOC_SET_QUOTA, &zc);
+		ret = ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_SET_QUOTA, &zc);
 		break;
 	case ZFS_PROP_RESERVATION:
 		zc.zc_cookie = number;
-		ret = zfs_ioctl(ZFS_IOC_SET_RESERVATION, &zc);
+		ret = ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_SET_RESERVATION,
+		    &zc);
 		break;
 	case ZFS_PROP_MOUNTPOINT:
 	case ZFS_PROP_SHARENFS:
@@ -870,15 +838,16 @@ zfs_prop_set(zfs_handle_t *zhp, zfs_prop_t prop, const char *propval)
 		    sizeof (zc.zc_prop_value));
 		zc.zc_intsz = 1;
 		zc.zc_numints = strlen(propval) + 1;
-		ret = zfs_ioctl(ZFS_IOC_SET_PROP, &zc);
+		ret = ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_SET_PROP, &zc);
 		break;
 	case ZFS_PROP_VOLSIZE:
 		zc.zc_volsize = number;
-		ret = zfs_ioctl(ZFS_IOC_SET_VOLSIZE, &zc);
+		ret = ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_SET_VOLSIZE, &zc);
 		break;
 	case ZFS_PROP_VOLBLOCKSIZE:
 		zc.zc_volblocksize = number;
-		ret = zfs_ioctl(ZFS_IOC_SET_VOLBLOCKSIZE, &zc);
+		ret = ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_SET_VOLBLOCKSIZE,
+		    &zc);
 		break;
 	default:
 		(void) strlcpy(zc.zc_prop_name, propname,
@@ -887,25 +856,13 @@ zfs_prop_set(zfs_handle_t *zhp, zfs_prop_t prop, const char *propval)
 		*(uint64_t *)zc.zc_prop_value = number;
 		zc.zc_intsz = 8;
 		zc.zc_numints = 1;
-		ret = zfs_ioctl(ZFS_IOC_SET_PROP, &zc);
+		ret = ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_SET_PROP, &zc);
 		break;
 	}
 
 	if (ret != 0) {
 		switch (errno) {
 
-		case EPERM:
-			zfs_error(dgettext(TEXT_DOMAIN,
-			    "cannot set %s for '%s': permission "
-			    "denied"), propname, zhp->zfs_name);
-			break;
-
-		case ENOENT:
-			zfs_error(dgettext(TEXT_DOMAIN,
-			    "cannot open '%s': no such %s"), zhp->zfs_name,
-			    zfs_type_to_name(zhp->zfs_type));
-			break;
-
 		case ENOSPC:
 			/*
 			 * For quotas and reservations, ENOSPC indicates
@@ -914,41 +871,33 @@ zfs_prop_set(zfs_handle_t *zhp, zfs_prop_t prop, const char *propval)
 			 */
 			switch (prop) {
 			case ZFS_PROP_QUOTA:
-				zfs_error(dgettext(TEXT_DOMAIN, "cannot set %s "
-				    "for '%s': size is less than current "
-				    "used or reserved space"), propname,
-				    zhp->zfs_name);
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "size is less than current used or "
+				    "reserved space"));
+				(void) zfs_error(hdl, EZFS_PROPSPACE, errbuf);
 				break;
 
 			case ZFS_PROP_RESERVATION:
-				zfs_error(dgettext(TEXT_DOMAIN, "cannot set %s "
-				    "for '%s': size is greater than available "
-				    "space"), propname, zhp->zfs_name);
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "size is greater than available space"));
+				(void) zfs_error(hdl, EZFS_PROPSPACE, errbuf);
 				break;
 
 			default:
-				zfs_error(dgettext(TEXT_DOMAIN,
-				    "cannot set %s for '%s': out of space"),
-				    propname, zhp->zfs_name);
+				(void) zfs_standard_error(hdl, errno, errbuf);
 				break;
 			}
 			break;
 
 		case EBUSY:
-			if (prop == ZFS_PROP_VOLBLOCKSIZE) {
-				zfs_error(dgettext(TEXT_DOMAIN,
-				    "cannot set %s for '%s': "
-				    "volume already contains data"),
-				    propname, zhp->zfs_name);
-			} else {
-				zfs_baderror(errno);
-			}
+			if (prop == ZFS_PROP_VOLBLOCKSIZE)
+				(void) zfs_error(hdl, EZFS_VOLHASDATA, errbuf);
+			else
+				return (zfs_standard_error(hdl, EBUSY, errbuf));
 			break;
 
 		case EROFS:
-			zfs_error(dgettext(TEXT_DOMAIN, "cannot set %s for "
-			    "'%s': read only %s"), propname, zhp->zfs_name,
-			    zfs_type_to_name(zhp->zfs_type));
+			(void) zfs_error(hdl, EZFS_DSREADONLY, errbuf);
 			break;
 
 		case EOVERFLOW:
@@ -957,16 +906,13 @@ zfs_prop_set(zfs_handle_t *zhp, zfs_prop_t prop, const char *propval)
 			 */
 #ifdef _ILP32
 			if (prop == ZFS_PROP_VOLSIZE) {
-				zfs_error(dgettext(TEXT_DOMAIN,
-				    "cannot set %s for '%s': "
-				    "max volume size is 1TB on 32-bit systems"),
-				    propname, zhp->zfs_name);
+				(void) zfs_error(hdl, EZFS_VOLTOOBIG, errbuf);
 				break;
 			}
 #endif
-			zfs_baderror(errno);
+			/* FALLTHROUGH */
 		default:
-			zfs_baderror(errno);
+			(void) zfs_standard_error(hdl, errno, errbuf);
 		}
 	} else {
 		/*
@@ -994,44 +940,35 @@ zfs_prop_inherit(zfs_handle_t *zhp, zfs_prop_t prop)
 	zfs_cmd_t zc = { 0 };
 	int ret;
 	prop_changelist_t *cl;
+	libzfs_handle_t *hdl = zhp->zfs_hdl;
+	char errbuf[1024];
+
+	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
+	    "cannot inherit %s for '%s'"), propname, zhp->zfs_name);
 
 	/*
 	 * Verify that this property is inheritable.
 	 */
-	if (zfs_prop_readonly(prop)) {
-		zfs_error(dgettext(TEXT_DOMAIN,
-		    "cannot inherit %s for '%s': property is read-only"),
-		    propname, zhp->zfs_name);
-		return (-1);
-	}
+	if (zfs_prop_readonly(prop))
+		return (zfs_error(hdl, EZFS_PROPREADONLY, errbuf));
 
-	if (!zfs_prop_inheritable(prop)) {
-		zfs_error(dgettext(TEXT_DOMAIN,
-		    "cannot inherit %s for '%s': property is not inheritable"),
-		    propname, zhp->zfs_name);
-		return (-1);
-	}
+	if (!zfs_prop_inheritable(prop))
+		return (zfs_error(hdl, EZFS_PROPNONINHERIT, errbuf));
 
 	/*
 	 * Check to see if the value applies to this type
 	 */
-	if (!zfs_prop_valid_for_type(prop, zhp->zfs_type)) {
-		zfs_error(dgettext(TEXT_DOMAIN,
-		    "cannot inherit %s for '%s': property does "
-		    "not apply to %ss"), propname, zhp->zfs_name,
-		    zfs_type_to_name(zhp->zfs_type));
-		return (-1);
-	}
+	if (!zfs_prop_valid_for_type(prop, zhp->zfs_type))
+		return (zfs_error(hdl, EZFS_PROPTYPE, errbuf));
 
 	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
 	(void) strlcpy(zc.zc_prop_name, propname, sizeof (zc.zc_prop_name));
 
 	if (prop == ZFS_PROP_MOUNTPOINT && getzoneid() == GLOBAL_ZONEID &&
 	    zfs_prop_get_int(zhp, ZFS_PROP_ZONED)) {
-		zfs_error(dgettext(TEXT_DOMAIN, "cannot inherit %s for '%s', "
-		    "dataset is used in a non-global zone"), propname,
-		    zhp->zfs_name);
-		return (-1);
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "dataset is used in a non-global zone"));
+		return (zfs_error(hdl, EZFS_ZONED, errbuf));
 	}
 
 	/*
@@ -1041,11 +978,10 @@ zfs_prop_inherit(zfs_handle_t *zhp, zfs_prop_t prop)
 		return (-1);
 
 	if (prop == ZFS_PROP_MOUNTPOINT && changelist_haszonedchild(cl)) {
-		zfs_error(dgettext(TEXT_DOMAIN, "cannot inherit %s for '%s', "
-			"child dataset with inherited mountpoint is "
-			"used in a non-global zone"),
-			propname, zhp->zfs_name);
-		ret = -1;
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "child dataset with inherited mountpoint is used "
+		    "in a non-global zone"));
+		ret = zfs_error(hdl, EZFS_ZONED, errbuf);
 		goto error;
 	}
 
@@ -1054,27 +990,9 @@ zfs_prop_inherit(zfs_handle_t *zhp, zfs_prop_t prop)
 
 	zc.zc_numints = 0;
 
-	if ((ret = zfs_ioctl(ZFS_IOC_SET_PROP, &zc)) != 0) {
-		switch (errno) {
-		case EPERM:
-			zfs_error(dgettext(TEXT_DOMAIN,
-			    "cannot inherit %s for '%s': permission "
-			    "denied"), propname, zhp->zfs_name);
-			break;
-		case ENOENT:
-			zfs_error(dgettext(TEXT_DOMAIN,
-			    "cannot open '%s': no such %s"), zhp->zfs_name,
-			    zfs_type_to_name(zhp->zfs_type));
-			break;
-		case ENOSPC:
-			zfs_error(dgettext(TEXT_DOMAIN,
-			    "cannot inherit %s for '%s': "
-			    "out of space"), propname, zhp->zfs_name);
-			break;
-		default:
-			zfs_baderror(errno);
-		}
-
+	if ((ret = ioctl(zhp->zfs_hdl->libzfs_fd,
+	    ZFS_IOC_SET_PROP, &zc)) != 0) {
+		return (zfs_standard_error(hdl, errno, errbuf));
 	} else {
 
 		if ((ret = changelist_postfix(cl)) != 0)
@@ -1151,11 +1069,10 @@ getprop_string(zfs_handle_t *zhp, zfs_prop_t prop, char **source)
  * If they differ from the on-disk values, report the current values and mark
  * the source "temporary".
  */
-static uint64_t
+static int
 get_numeric_property(zfs_handle_t *zhp, zfs_prop_t prop, zfs_source_t *src,
-    char **source)
+    char **source, uint64_t *val)
 {
-	uint64_t val;
 	struct mnttab mnt;
 
 	*source = NULL;
@@ -1167,86 +1084,90 @@ get_numeric_property(zfs_handle_t *zhp, zfs_prop_t prop, zfs_source_t *src,
 
 	switch (prop) {
 	case ZFS_PROP_ATIME:
-		val = getprop_uint64(zhp, prop, source);
+		*val = getprop_uint64(zhp, prop, source);
 
-		if (hasmntopt(&mnt, MNTOPT_ATIME) && !val) {
-			val = TRUE;
+		if (hasmntopt(&mnt, MNTOPT_ATIME) && !*val) {
+			*val = B_TRUE;
 			if (src)
 				*src = ZFS_SRC_TEMPORARY;
-		} else if (hasmntopt(&mnt, MNTOPT_NOATIME) && val) {
-			val = FALSE;
+		} else if (hasmntopt(&mnt, MNTOPT_NOATIME) && *val) {
+			*val = B_FALSE;
 			if (src)
 				*src = ZFS_SRC_TEMPORARY;
 		}
-		return (val);
+		break;
 
 	case ZFS_PROP_AVAILABLE:
-		return (zhp->zfs_dmustats.dds_available);
+		*val = zhp->zfs_dmustats.dds_available;
+		break;
 
 	case ZFS_PROP_DEVICES:
-		val = getprop_uint64(zhp, prop, source);
+		*val = getprop_uint64(zhp, prop, source);
 
-		if (hasmntopt(&mnt, MNTOPT_DEVICES) && !val) {
-			val = TRUE;
+		if (hasmntopt(&mnt, MNTOPT_DEVICES) && !*val) {
+			*val = B_TRUE;
 			if (src)
 				*src = ZFS_SRC_TEMPORARY;
-		} else if (hasmntopt(&mnt, MNTOPT_NODEVICES) && val) {
-			val = FALSE;
+		} else if (hasmntopt(&mnt, MNTOPT_NODEVICES) && *val) {
+			*val = B_FALSE;
 			if (src)
 				*src = ZFS_SRC_TEMPORARY;
 		}
-		return (val);
+		break;
 
 	case ZFS_PROP_EXEC:
-		val = getprop_uint64(zhp, prop, source);
+		*val = getprop_uint64(zhp, prop, source);
 
-		if (hasmntopt(&mnt, MNTOPT_EXEC) && !val) {
-			val = TRUE;
+		if (hasmntopt(&mnt, MNTOPT_EXEC) && !*val) {
+			*val = B_TRUE;
 			if (src)
 				*src = ZFS_SRC_TEMPORARY;
-		} else if (hasmntopt(&mnt, MNTOPT_NOEXEC) && val) {
-			val = FALSE;
+		} else if (hasmntopt(&mnt, MNTOPT_NOEXEC) && *val) {
+			*val = B_FALSE;
 			if (src)
 				*src = ZFS_SRC_TEMPORARY;
 		}
-		return (val);
+		break;
 
 	case ZFS_PROP_RECORDSIZE:
 	case ZFS_PROP_COMPRESSION:
 	case ZFS_PROP_ZONED:
-		val = getprop_uint64(zhp, prop, source);
-		return (val);
+		*val = getprop_uint64(zhp, prop, source);
+		break;
 
 	case ZFS_PROP_READONLY:
-		val = getprop_uint64(zhp, prop, source);
+		*val = getprop_uint64(zhp, prop, source);
 
-		if (hasmntopt(&mnt, MNTOPT_RO) && !val) {
-			val = TRUE;
+		if (hasmntopt(&mnt, MNTOPT_RO) && !*val) {
+			*val = B_TRUE;
 			if (src)
 				*src = ZFS_SRC_TEMPORARY;
-		} else if (hasmntopt(&mnt, MNTOPT_RW) && val) {
-			val = FALSE;
+		} else if (hasmntopt(&mnt, MNTOPT_RW) && *val) {
+			*val = B_FALSE;
 			if (src)
 				*src = ZFS_SRC_TEMPORARY;
 		}
-		return (val);
+		break;
 
 	case ZFS_PROP_CREATION:
-		return (zhp->zfs_dmustats.dds_creation_time);
+		*val = zhp->zfs_dmustats.dds_creation_time;
+		break;
 
 	case ZFS_PROP_QUOTA:
 		if (zhp->zfs_dmustats.dds_quota == 0)
 			*source = "";	/* default */
 		else
 			*source = zhp->zfs_name;
-		return (zhp->zfs_dmustats.dds_quota);
+		*val = zhp->zfs_dmustats.dds_quota;
+		break;
 
 	case ZFS_PROP_RESERVATION:
 		if (zhp->zfs_dmustats.dds_reserved == 0)
 			*source = "";	/* default */
 		else
 			*source = zhp->zfs_name;
-		return (zhp->zfs_dmustats.dds_reserved);
+		*val = zhp->zfs_dmustats.dds_reserved;
+		break;
 
 	case ZFS_PROP_COMPRESSRATIO:
 		/*
@@ -1255,43 +1176,50 @@ get_numeric_property(zfs_handle_t *zhp, zfs_prop_t prop, zfs_source_t *src,
 		 * 100, so '2.5x' would be returned as 250.
 		 */
 		if (zhp->zfs_dmustats.dds_compressed_bytes == 0)
-			return (100ULL);
+			*val = 100ULL;
 		else
-			return (zhp->zfs_dmustats.dds_uncompressed_bytes * 100 /
+			*val =
+			    (zhp->zfs_dmustats.dds_uncompressed_bytes * 100 /
 			    zhp->zfs_dmustats.dds_compressed_bytes);
+		break;
 
 	case ZFS_PROP_REFERENCED:
 		/*
 		 * 'referenced' refers to the amount of physical space
 		 * referenced (possibly shared) by this object.
 		 */
-		return (zhp->zfs_dmustats.dds_space_refd);
+		*val = zhp->zfs_dmustats.dds_space_refd;
+		break;
 
 	case ZFS_PROP_SETUID:
-		val = getprop_uint64(zhp, prop, source);
+		*val = getprop_uint64(zhp, prop, source);
 
-		if (hasmntopt(&mnt, MNTOPT_SETUID) && !val) {
-			val = TRUE;
+		if (hasmntopt(&mnt, MNTOPT_SETUID) && !*val) {
+			*val = B_TRUE;
 			if (src)
 				*src = ZFS_SRC_TEMPORARY;
-		} else if (hasmntopt(&mnt, MNTOPT_NOSETUID) && val) {
-			val = FALSE;
+		} else if (hasmntopt(&mnt, MNTOPT_NOSETUID) && *val) {
+			*val = B_FALSE;
 			if (src)
 				*src = ZFS_SRC_TEMPORARY;
 		}
-		return (val);
+		break;
 
 	case ZFS_PROP_VOLSIZE:
-		return (zhp->zfs_volsize);
+		*val = zhp->zfs_volsize;
+		break;
 
 	case ZFS_PROP_VOLBLOCKSIZE:
-		return (zhp->zfs_volblocksize);
+		*val = zhp->zfs_volblocksize;
+		break;
 
 	case ZFS_PROP_USED:
-		return (zhp->zfs_dmustats.dds_space_used);
+		*val = zhp->zfs_dmustats.dds_space_used;
+		break;
 
 	case ZFS_PROP_CREATETXG:
-		return (zhp->zfs_dmustats.dds_creation_txg);
+		*val = zhp->zfs_dmustats.dds_creation_txg;
+		break;
 
 	case ZFS_PROP_MOUNTED:
 		/*
@@ -1306,16 +1234,22 @@ get_numeric_property(zfs_handle_t *zhp, zfs_prop_t prop, zfs_source_t *src,
 
 			search.mnt_special = (char *)zhp->zfs_name;
 			search.mnt_fstype = MNTTYPE_ZFS;
-			rewind(zfs_mnttab());
+			rewind(zhp->zfs_hdl->libzfs_mnttab);
 
-			if (getmntany(zfs_mnttab(), &entry, &search) == 0)
-				zhp->zfs_mntopts =
-				    zfs_strdup(entry.mnt_mntopts);
+			if (getmntany(zhp->zfs_hdl->libzfs_mnttab, &entry,
+			    &search) == 0 && (zhp->zfs_mntopts =
+			    zfs_strdup(zhp->zfs_hdl,
+			    entry.mnt_mntopts)) == NULL)
+				return (-1);
 		}
-		return (zhp->zfs_mntopts != NULL);
+		*val = (zhp->zfs_mntopts != NULL);
+		break;
 
 	default:
-		zfs_baderror(EINVAL);
+		zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
+		    "cannot get non-numeric property"));
+		return (zfs_error(zhp->zfs_hdl, EZFS_BADPROP,
+		    dgettext(TEXT_DOMAIN, "internal error")));
 	}
 
 	return (0);
@@ -1355,7 +1289,7 @@ get_source(zfs_handle_t *zhp, zfs_source_t *srctype, char *source,
  */
 int
 zfs_prop_get(zfs_handle_t *zhp, zfs_prop_t prop, char *propbuf, size_t proplen,
-    zfs_source_t *src, char *statbuf, size_t statlen, int literal)
+    zfs_source_t *src, char *statbuf, size_t statlen, boolean_t literal)
 {
 	char *source = NULL;
 	uint64_t val;
@@ -1383,8 +1317,9 @@ zfs_prop_get(zfs_handle_t *zhp, zfs_prop_t prop, char *propbuf, size_t proplen,
 		 * Basic boolean values are built on top of
 		 * get_numeric_property().
 		 */
-		nicebool(get_numeric_property(zhp, prop, src, &source),
-		    propbuf, proplen);
+		if (get_numeric_property(zhp, prop, src, &source, &val) != 0)
+			return (-1);
+		nicebool(val, propbuf, proplen);
 
 		break;
 
@@ -1399,7 +1334,8 @@ zfs_prop_get(zfs_handle_t *zhp, zfs_prop_t prop, char *propbuf, size_t proplen,
 		 * Basic numeric values are built on top of
 		 * get_numeric_property().
 		 */
-		val = get_numeric_property(zhp, prop, src, &source);
+		if (get_numeric_property(zhp, prop, src, &source, &val) != 0)
+			return (-1);
 		if (literal)
 			(void) snprintf(propbuf, proplen, "%llu", val);
 		else
@@ -1533,7 +1469,8 @@ zfs_prop_get(zfs_handle_t *zhp, zfs_prop_t prop, char *propbuf, size_t proplen,
 
 	case ZFS_PROP_QUOTA:
 	case ZFS_PROP_RESERVATION:
-		val = get_numeric_property(zhp, prop, src, &source);
+		if (get_numeric_property(zhp, prop, src, &source, &val) != 0)
+			return (-1);
 
 		/*
 		 * If quota or reservation is 0, we translate this into 'none'
@@ -1555,7 +1492,8 @@ zfs_prop_get(zfs_handle_t *zhp, zfs_prop_t prop, char *propbuf, size_t proplen,
 		break;
 
 	case ZFS_PROP_COMPRESSRATIO:
-		val = get_numeric_property(zhp, prop, src, &source);
+		if (get_numeric_property(zhp, prop, src, &source, &val) != 0)
+			return (-1);
 		(void) snprintf(propbuf, proplen, "%lld.%02lldx", val / 100,
 		    val % 100);
 		break;
@@ -1572,7 +1510,7 @@ zfs_prop_get(zfs_handle_t *zhp, zfs_prop_t prop, char *propbuf, size_t proplen,
 			str = "snapshot";
 			break;
 		default:
-			zfs_baderror(zhp->zfs_type);
+			abort();
 		}
 		(void) snprintf(propbuf, proplen, "%s", str);
 		break;
@@ -1584,7 +1522,10 @@ zfs_prop_get(zfs_handle_t *zhp, zfs_prop_t prop, char *propbuf, size_t proplen,
 		 * it's a boolean value, the typical values of "on" and "off"
 		 * don't make sense, so we translate to "yes" and "no".
 		 */
-		if (get_numeric_property(zhp, ZFS_PROP_MOUNTED, src, &source))
+		if (get_numeric_property(zhp, ZFS_PROP_MOUNTED,
+		    src, &source, &val) != 0)
+			return (-1);
+		if (val)
 			(void) strlcpy(propbuf, "yes", proplen);
 		else
 			(void) strlcpy(propbuf, "no", proplen);
@@ -1600,7 +1541,7 @@ zfs_prop_get(zfs_handle_t *zhp, zfs_prop_t prop, char *propbuf, size_t proplen,
 		break;
 
 	default:
-		zfs_baderror(EINVAL);
+		abort();
 	}
 
 	get_source(zhp, src, source, statbuf, statlen);
@@ -1618,8 +1559,11 @@ zfs_prop_get_int(zfs_handle_t *zhp, zfs_prop_t prop)
 {
 	char *source;
 	zfs_source_t sourcetype = ZFS_SRC_NONE;
+	uint64_t val;
+
+	(void) get_numeric_property(zhp, prop, &sourcetype, &source, &val);
 
-	return (get_numeric_property(zhp, prop, &sourcetype, &source));
+	return (val);
 }
 
 /*
@@ -1635,12 +1579,15 @@ zfs_prop_get_numeric(zfs_handle_t *zhp, zfs_prop_t prop, uint64_t *value,
 	 * Check to see if this property applies to our object
 	 */
 	if (!zfs_prop_valid_for_type(prop, zhp->zfs_type))
-		return (-1);
+		return (zfs_error(zhp->zfs_hdl, EZFS_PROPTYPE,
+		    dgettext(TEXT_DOMAIN, "cannot get property '%s'"),
+		    zfs_prop_to_name(prop)));
 
 	if (src)
 		*src = ZFS_SRC_NONE;
 
-	*value = get_numeric_property(zhp, prop, src, &source);
+	if (get_numeric_property(zhp, prop, src, &source, value) != 0)
+		return (-1);
 
 	get_source(zhp, src, source, statbuf, statlen);
 
@@ -1676,7 +1623,7 @@ zfs_iter_filesystems(zfs_handle_t *zhp, zfs_iter_f func, void *data)
 	int ret;
 
 	for ((void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
-	    zfs_ioctl(ZFS_IOC_DATASET_LIST_NEXT, &zc) == 0;
+	    ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_DATASET_LIST_NEXT, &zc) == 0;
 	    (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name))) {
 		/*
 		 * Ignore private dataset names.
@@ -1688,7 +1635,8 @@ zfs_iter_filesystems(zfs_handle_t *zhp, zfs_iter_f func, void *data)
 		 * Silently ignore errors, as the only plausible explanation is
 		 * that the pool has since been removed.
 		 */
-		if ((nzhp = make_dataset_handle(zc.zc_name)) == NULL)
+		if ((nzhp = make_dataset_handle(zhp->zfs_hdl,
+		    zc.zc_name)) == NULL)
 			continue;
 
 		if ((ret = func(nzhp, data)) != 0)
@@ -1701,7 +1649,8 @@ zfs_iter_filesystems(zfs_handle_t *zhp, zfs_iter_f func, void *data)
 	 * obtained the handle.
 	 */
 	if (errno != ESRCH && errno != ENOENT)
-		zfs_baderror(errno);
+		return (zfs_standard_error(zhp->zfs_hdl, errno,
+		    dgettext(TEXT_DOMAIN, "cannot iterate filesystems")));
 
 	return (0);
 }
@@ -1717,10 +1666,12 @@ zfs_iter_snapshots(zfs_handle_t *zhp, zfs_iter_f func, void *data)
 	int ret;
 
 	for ((void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
-	    zfs_ioctl(ZFS_IOC_SNAPSHOT_LIST_NEXT, &zc) == 0;
+	    ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_SNAPSHOT_LIST_NEXT,
+	    &zc) == 0;
 	    (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name))) {
 
-		if ((nzhp = make_dataset_handle(zc.zc_name)) == NULL)
+		if ((nzhp = make_dataset_handle(zhp->zfs_hdl,
+		    zc.zc_name)) == NULL)
 			continue;
 
 		if ((ret = func(nzhp, data)) != 0)
@@ -1733,7 +1684,8 @@ zfs_iter_snapshots(zfs_handle_t *zhp, zfs_iter_f func, void *data)
 	 * obtained the handle.  Silently ignore this case, and return success.
 	 */
 	if (errno != ESRCH && errno != ENOENT)
-		zfs_baderror(errno);
+		return (zfs_standard_error(zhp->zfs_hdl, errno,
+		    dgettext(TEXT_DOMAIN, "cannot iterate filesystems")));
 
 	return (0);
 }
@@ -1774,21 +1726,22 @@ parent_name(const char *path, char *buf, size_t buflen)
  * Checks to make sure that the given path has a parent, and that it exists.
  */
 static int
-check_parents(const char *path, zfs_type_t type)
+check_parents(libzfs_handle_t *hdl, const char *path)
 {
 	zfs_cmd_t zc = { 0 };
 	char parent[ZFS_MAXNAMELEN];
 	char *slash;
 	zfs_handle_t *zhp;
+	char errbuf[1024];
+
+	(void) snprintf(errbuf, sizeof (errbuf), "cannot create '%s'",
+	    path);
 
 	/* get parent, and check to see if this is just a pool */
 	if (parent_name(path, parent, sizeof (parent)) != 0) {
-		zfs_error(dgettext(TEXT_DOMAIN,
-		    "cannot create '%s': missing dataset name"),
-		    path, zfs_type_to_name(type));
-		zfs_error(dgettext(TEXT_DOMAIN,
-		    "use 'zpool create' to create a storage pool"));
-		return (-1);
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "missing dataset name"));
+		return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
 	}
 
 	/* check to see if the pool exists */
@@ -1796,40 +1749,39 @@ check_parents(const char *path, zfs_type_t type)
 		slash = parent + strlen(parent);
 	(void) strncpy(zc.zc_name, parent, slash - parent);
 	zc.zc_name[slash - parent] = '\0';
-	if (zfs_ioctl(ZFS_IOC_OBJSET_STATS, &zc) != 0 &&
+	if (ioctl(hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) != 0 &&
 	    errno == ENOENT) {
-		zfs_error(dgettext(TEXT_DOMAIN,
-		    "cannot create '%s': no such pool '%s'"), path, zc.zc_name);
-		return (-1);
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "no such pool '%s'"), zc.zc_name);
+		return (zfs_error(hdl, EZFS_NOENT, errbuf));
 	}
 
 	/* check to see if the parent dataset exists */
-	if ((zhp = make_dataset_handle(parent)) == NULL) {
+	if ((zhp = make_dataset_handle(hdl, parent)) == NULL) {
 		switch (errno) {
 		case ENOENT:
-			zfs_error(dgettext(TEXT_DOMAIN,
-			    "cannot create '%s': parent does not exist"), path);
-			return (-1);
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "parent does not exist"));
+			return (zfs_error(hdl, EZFS_NOENT, errbuf));
 
 		default:
-			zfs_baderror(errno);
+			return (zfs_standard_error(hdl, errno, errbuf));
 		}
 	}
 
 	/* we are in a non-global zone, but parent is in the global zone */
 	if (getzoneid() != GLOBAL_ZONEID &&
 	    !zfs_prop_get_int(zhp, ZFS_PROP_ZONED)) {
-		zfs_error(dgettext(TEXT_DOMAIN,
-		    "cannot create '%s': permission denied"), path);
+		(void) zfs_standard_error(hdl, EPERM, errbuf);
 		zfs_close(zhp);
 		return (-1);
 	}
 
 	/* make sure parent is a filesystem */
 	if (zfs_get_type(zhp) != ZFS_TYPE_FILESYSTEM) {
-		zfs_error(dgettext(TEXT_DOMAIN,
-		    "cannot create '%s': parent is not a filesystem"),
-		    path);
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "parent is not a filesystem"));
+		(void) zfs_error(hdl, EZFS_BADTYPE, errbuf);
 		zfs_close(zhp);
 		return (-1);
 	}
@@ -1843,44 +1795,35 @@ check_parents(const char *path, zfs_type_t type)
  * only for volumes, and indicate the size and blocksize of the volume.
  */
 int
-zfs_create(const char *path, zfs_type_t type,
+zfs_create(libzfs_handle_t *hdl, const char *path, zfs_type_t type,
 	const char *sizestr, const char *blocksizestr)
 {
-	char reason[64];
 	zfs_cmd_t zc = { 0 };
 	int ret;
 	uint64_t size = 0;
 	uint64_t blocksize = zfs_prop_default_numeric(ZFS_PROP_VOLBLOCKSIZE);
+	char errbuf[1024];
 
 	/* convert sizestr into integer size */
-	if (sizestr != NULL && nicestrtonum(sizestr, &size,
-	    reason, sizeof (reason)) != 0) {
-		zfs_error(dgettext(TEXT_DOMAIN,
-		    "bad volume size '%s': %s"), sizestr, reason);
-		return (-1);
-	}
+	if (sizestr != NULL && nicestrtonum(hdl, sizestr, &size) != 0)
+		return (zfs_error(hdl, EZFS_BADPROP, dgettext(TEXT_DOMAIN,
+		    "bad volume size '%s'"), sizestr));
 
 	/* convert blocksizestr into integer blocksize */
-	if (blocksizestr != NULL && nicestrtonum(blocksizestr, &blocksize,
-	    reason, sizeof (reason)) != 0) {
-		zfs_error(dgettext(TEXT_DOMAIN,
-		    "bad volume blocksize '%s': %s"), blocksizestr, reason);
-		return (-1);
-	}
+	if (blocksizestr != NULL && nicestrtonum(hdl, blocksizestr,
+	    &blocksize) != 0)
+		return (zfs_error(hdl, EZFS_BADPROP, dgettext(TEXT_DOMAIN,
+		    "bad volume blocksize '%s'"), blocksizestr));
+
+	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
+	    "cannot create '%s'"), path);
 
 	/* validate the path, taking care to note the extended error message */
-	if (!zfs_validate_name(path, type, reason, sizeof (reason))) {
-		zfs_error(dgettext(TEXT_DOMAIN,
-		    "cannot create '%s': %s in %s name"), path, reason,
-		    zfs_type_to_name(type));
-		if (strstr(reason, "snapshot") != NULL)
-			zfs_error(dgettext(TEXT_DOMAIN,
-			    "use 'zfs snapshot' to create a snapshot"));
-		return (-1);
-	}
+	if (!zfs_validate_name(hdl, path, type))
+		return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
 
 	/* validate parents exist */
-	if (check_parents(path, type) != 0)
+	if (check_parents(hdl, path) != 0)
 		return (-1);
 
 	/*
@@ -1891,10 +1834,10 @@ zfs_create(const char *path, zfs_type_t type,
 	 * first try to see if the dataset exists.
 	 */
 	(void) strlcpy(zc.zc_name, path, sizeof (zc.zc_name));
-	if (zfs_ioctl(ZFS_IOC_OBJSET_STATS, &zc) == 0) {
-		zfs_error(dgettext(TEXT_DOMAIN,
-		    "cannot create '%s': dataset exists"), path);
-		return (-1);
+	if (ioctl(hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) == 0) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "dataset already exists"));
+		return (zfs_error(hdl, EZFS_EXISTS, errbuf));
 	}
 
 	if (type == ZFS_TYPE_VOLUME)
@@ -1911,30 +1854,30 @@ zfs_create(const char *path, zfs_type_t type,
 		 * zero.
 		 */
 		if (size == 0) {
-			zfs_error(dgettext(TEXT_DOMAIN,
-			    "bad volume size '%s': cannot be zero"), sizestr);
-			return (-1);
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "cannot be zero"));
+			return (zfs_error(hdl, EZFS_BADPROP,
+			    dgettext(TEXT_DOMAIN, "bad volume size '%s'"),
+			    sizestr));
 		}
 
 		if (blocksize < SPA_MINBLOCKSIZE ||
 		    blocksize > SPA_MAXBLOCKSIZE || !ISP2(blocksize)) {
-			zfs_error(dgettext(TEXT_DOMAIN,
-			    "bad volume block size '%s': "
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "must be power of 2 from %u to %uk"),
-			    blocksizestr,
 			    (uint_t)SPA_MINBLOCKSIZE,
 			    (uint_t)SPA_MAXBLOCKSIZE >> 10);
-			return (-1);
+			return (zfs_error(hdl, EZFS_BADPROP,
+			    dgettext(TEXT_DOMAIN,
+			    "bad volume block size '%s'"), blocksizestr));
 		}
 
 		if (size % blocksize != 0) {
-			char buf[64];
-			zfs_nicenum(blocksize, buf, sizeof (buf));
-			zfs_error(dgettext(TEXT_DOMAIN,
-			    "bad volume size '%s': "
-			    "must be multiple of volume block size (%s)"),
-			    sizestr, buf);
-			return (-1);
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "must be a multiple of volume block size"));
+			return (zfs_error(hdl, EZFS_BADPROP,
+			    dgettext(TEXT_DOMAIN, "bad volume size '%s'"),
+			    sizestr));
 		}
 
 		zc.zc_volsize = size;
@@ -1942,10 +1885,10 @@ zfs_create(const char *path, zfs_type_t type,
 	}
 
 	/* create the dataset */
-	ret = zfs_ioctl(ZFS_IOC_CREATE, &zc);
+	ret = ioctl(hdl->libzfs_fd, ZFS_IOC_CREATE, &zc);
 
 	if (ret == 0 && type == ZFS_TYPE_VOLUME)
-		ret = zvol_create_link(path);
+		ret = zvol_create_link(hdl, path);
 
 	/* check for failure */
 	if (ret != 0) {
@@ -1954,81 +1897,38 @@ zfs_create(const char *path, zfs_type_t type,
 
 		switch (errno) {
 		case ENOENT:
-			/*
-			 * The parent dataset has been deleted since our
-			 * previous check.
-			 */
-			zfs_error(dgettext(TEXT_DOMAIN,
-			    "cannot create '%s': no such parent '%s'"),
-			    path, parent);
-			break;
-
-		case EPERM:
-			/*
-			 * The user doesn't have permission to create a new
-			 * dataset here.
-			 */
-			zfs_error(dgettext(TEXT_DOMAIN,
-			    "cannot create '%s': permission denied"), path);
-			break;
-
-		case EDQUOT:
-		case ENOSPC:
-			/*
-			 * The parent dataset does not have enough free space
-			 * to create a new dataset.
-			 */
-			zfs_error(dgettext(TEXT_DOMAIN,
-			    "cannot create '%s': not enough space in '%s'"),
-			    path, parent);
-			break;
-
-		case EEXIST:
-			/*
-			 * The target dataset already exists.  We should have
-			 * caught this above, but there may be some unexplained
-			 * race condition.
-			 */
-			zfs_error(dgettext(TEXT_DOMAIN,
-			    "cannot create '%s': dataset exists"), path);
-			break;
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "no such parent '%s'"), parent);
+			return (zfs_error(hdl, EZFS_NOENT, errbuf));
 
 		case EINVAL:
-			/*
-			 * The target dataset does not support children.
-			 */
-			zfs_error(dgettext(TEXT_DOMAIN,
-			    "cannot create '%s': children unsupported in '%s'"),
-			    path, parent);
-			break;
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "parent '%s' is not a filesysem"), parent);
+			return (zfs_error(hdl, EZFS_BADTYPE, errbuf));
 
 		case EDOM:
-			zfs_error(dgettext(TEXT_DOMAIN, "bad %s value '%s': "
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "must be power of 2 from %u to %uk"),
-			    zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE),
-			    blocksizestr ? blocksizestr : "<unknown>",
 			    (uint_t)SPA_MINBLOCKSIZE,
 			    (uint_t)SPA_MAXBLOCKSIZE >> 10);
-			break;
+
+			return (zfs_error(hdl, EZFS_BADPROP,
+			    dgettext(TEXT_DOMAIN, "bad block size '%s'"),
+			    blocksizestr ? blocksizestr : "<unknown>"));
+
 #ifdef _ILP32
 		case EOVERFLOW:
 			/*
 			 * This platform can't address a volume this big.
 			 */
-			if (type == ZFS_TYPE_VOLUME) {
-				zfs_error(dgettext(TEXT_DOMAIN,
-				    "cannot create '%s': "
-				    "max volume size is 1TB on 32-bit systems"),
-				    path);
-				break;
-			}
+			if (type == ZFS_TYPE_VOLUME)
+				return (zfs_error(hdl, EZFS_VOLTOOBIG,
+				    errbuf));
 #endif
-
+			/* FALLTHROUGH */
 		default:
-			zfs_baderror(errno);
+			return (zfs_standard_error(hdl, errno, errbuf));
 		}
-
-		return (-1);
 	}
 
 	return (0);
@@ -2043,6 +1943,7 @@ zfs_destroy(zfs_handle_t *zhp)
 {
 	zfs_cmd_t zc = { 0 };
 	int ret;
+	char errbuf[1024];
 
 	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
 
@@ -2051,7 +1952,7 @@ zfs_destroy(zfs_handle_t *zhp)
 	 * so that we do the right thing for snapshots of volumes.
 	 */
 	if (zhp->zfs_volblocksize != 0) {
-		if (zvol_remove_link(zhp->zfs_name) != 0)
+		if (zvol_remove_link(zhp->zfs_hdl, zhp->zfs_name) != 0)
 			return (-1);
 
 		zc.zc_objset_type = DMU_OST_ZVOL;
@@ -2059,63 +1960,15 @@ zfs_destroy(zfs_handle_t *zhp)
 		zc.zc_objset_type = DMU_OST_ZFS;
 	}
 
-	ret = zfs_ioctl(ZFS_IOC_DESTROY, &zc);
+	ret = ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_DESTROY, &zc);
 
-	if (ret != 0) {
-		switch (errno) {
-
-		case EPERM:
-			/*
-			 * We don't have permission to destroy this dataset.
-			 */
-			zfs_error(dgettext(TEXT_DOMAIN,
-			    "cannot destroy '%s': permission denied"),
-			    zhp->zfs_name);
-			break;
+	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
+	    "cannot destroy '%s'"), zhp->zfs_name);
 
-		case EIO:
-			/*
-			 * I/O error.
-			 */
-			zfs_error(dgettext(TEXT_DOMAIN,
-			    "cannot destroy '%s': I/O error"),
-			    zhp->zfs_name);
-			break;
-
-		case ENOENT:
-			/*
-			 * We've hit a race condition where the dataset has been
-			 * destroyed since we opened it.
-			 */
-			zfs_error(dgettext(TEXT_DOMAIN,
-			    "cannot destroy '%s': no such %s"),
-			    zhp->zfs_name, zfs_type_to_name(zhp->zfs_type));
-			break;
-
-		case EBUSY:
-			/*
-			 * Even if we destroy all children, there is a chance we
-			 * can hit this case if:
-			 *
-			 * 	- A child dataset has since been created
-			 * 	- A filesystem is mounted
-			 *
-			 * This error message is awful, but hopefully we've
-			 * already caught the common cases (and aborted more
-			 * appropriately) before calling this function.  There's
-			 * nothing else we can do at this point.
-			 */
-			zfs_error(dgettext(TEXT_DOMAIN,
-			    "cannot destroy '%s': %s is busy"),
-			    zhp->zfs_name, zfs_type_to_name(zhp->zfs_type));
-			break;
-
-		default:
-			zfs_baderror(errno);
-		}
-
-		return (-1);
-	}
+	if (ret != 0)
+		return (zfs_standard_error(zhp->zfs_hdl, errno,
+		    dgettext(TEXT_DOMAIN, "cannot destroy '%s'"),
+		    zhp->zfs_name));
 
 	remove_mountpoint(zhp);
 
@@ -2128,24 +1981,23 @@ zfs_destroy(zfs_handle_t *zhp)
 int
 zfs_clone(zfs_handle_t *zhp, const char *target)
 {
-	char reason[64];
 	zfs_cmd_t zc = { 0 };
 	char parent[ZFS_MAXNAMELEN];
 	int ret;
+	char errbuf[1024];
+	libzfs_handle_t *hdl = zhp->zfs_hdl;
 
 	assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
 
+	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
+	    "cannot create '%s'"), target);
+
 	/* validate the target name */
-	if (!zfs_validate_name(target, ZFS_TYPE_FILESYSTEM, reason,
-	    sizeof (reason))) {
-		zfs_error(dgettext(TEXT_DOMAIN,
-		    "cannot create '%s': %s in filesystem name"), target,
-		    reason, zfs_type_to_name(ZFS_TYPE_FILESYSTEM));
-		return (-1);
-	}
+	if (!zfs_validate_name(hdl, target, ZFS_TYPE_FILESYSTEM))
+		return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
 
 	/* validate parents exist */
-	if (check_parents(target, zhp->zfs_type) != 0)
+	if (check_parents(zhp->zfs_hdl, target) != 0)
 		return (-1);
 
 	(void) parent_name(target, parent, sizeof (parent));
@@ -2158,18 +2010,10 @@ zfs_clone(zfs_handle_t *zhp, const char *target)
 
 	(void) strlcpy(zc.zc_name, target, sizeof (zc.zc_name));
 	(void) strlcpy(zc.zc_filename, zhp->zfs_name, sizeof (zc.zc_filename));
-	ret = zfs_ioctl(ZFS_IOC_CREATE, &zc);
+	ret = ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_CREATE, &zc);
 
 	if (ret != 0) {
 		switch (errno) {
-		case EPERM:
-			/*
-			 * The user doesn't have permission to create the clone.
-			 */
-			zfs_error(dgettext(TEXT_DOMAIN,
-			    "cannot create '%s': permission denied"),
-			    target);
-			break;
 
 		case ENOENT:
 			/*
@@ -2181,42 +2025,147 @@ zfs_clone(zfs_handle_t *zhp, const char *target)
 			 * that doesn't exist anymore, or whether the target
 			 * dataset doesn't exist.
 			 */
-			zfs_error(dgettext(TEXT_DOMAIN,
-			    "cannot create '%s': no such parent '%s'"),
-			    target, parent);
-			break;
+			zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
+			    "no such parent '%s'"), parent);
+			return (zfs_error(zhp->zfs_hdl, EZFS_NOENT, errbuf));
 
-		case EDQUOT:
-		case ENOSPC:
-			/*
-			 * There is not enough space in the target dataset
-			 */
-			zfs_error(dgettext(TEXT_DOMAIN,
-			    "cannot create '%s': not enough space in '%s'"),
-			    target, parent);
-			break;
+		case EXDEV:
+			zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
+			    "source and target pools differ"));
+			return (zfs_error(zhp->zfs_hdl, EZFS_CROSSTARGET,
+			    errbuf));
 
-		case EEXIST:
-			/*
-			 * The target already exists.
-			 */
-			zfs_error(dgettext(TEXT_DOMAIN,
-			    "cannot create '%s': dataset exists"), target);
-			break;
+		default:
+			return (zfs_standard_error(zhp->zfs_hdl, errno,
+			    errbuf));
+		}
+	} else if (zhp->zfs_volblocksize != 0) {
+		ret = zvol_create_link(zhp->zfs_hdl, target);
+	}
 
-		case EXDEV:
+	return (ret);
+}
+
+typedef struct promote_data {
+	char cb_mountpoint[MAXPATHLEN];
+	const char *cb_target;
+	const char *cb_errbuf;
+	uint64_t cb_pivot_txg;
+} promote_data_t;
+
+static int
+promote_snap_cb(zfs_handle_t *zhp, void *data)
+{
+	promote_data_t *pd = data;
+	zfs_handle_t *szhp;
+	int err;
+	char snapname[MAXPATHLEN];
+	char *cp;
+
+	/* We don't care about snapshots after the pivot point */
+	if (zfs_prop_get_int(zhp, ZFS_PROP_CREATETXG) > pd->cb_pivot_txg)
+		return (0);
+
+	/*
+	 * Unmount it.  We actually need to open it to provoke it to be
+	 * mounted first, because if it is not mounted, umount2 will
+	 * mount it!
+	 */
+	(void) strcpy(snapname, pd->cb_mountpoint);
+	(void) strcat(snapname, "/.zfs/snapshot/");
+	cp = strchr(zhp->zfs_name, '@');
+	(void) strcat(snapname, cp+1);
+	err = open(snapname, O_RDONLY);
+	if (err != -1)
+		(void) close(err);
+	(void) umount2(snapname, MS_FORCE);
+
+	/* Check for conflicting names */
+	(void) strcpy(snapname, pd->cb_target);
+	(void) strcat(snapname, cp);
+	szhp = make_dataset_handle(zhp->zfs_hdl, snapname);
+	if (szhp != NULL) {
+		zfs_close(szhp);
+		zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
+		    "snapshot name '%s' from origin \n"
+		    "conflicts with '%s' from target"),
+		    zhp->zfs_name, snapname);
+		return (zfs_error(zhp->zfs_hdl, EZFS_EXISTS, pd->cb_errbuf));
+	}
+	return (0);
+}
+
+/*
+ * Promotes the given clone fs to be the clone parent.
+ */
+int
+zfs_promote(zfs_handle_t *zhp)
+{
+	libzfs_handle_t *hdl = zhp->zfs_hdl;
+	zfs_cmd_t zc = { 0 };
+	char parent[MAXPATHLEN];
+	char *cp;
+	int ret;
+	zfs_handle_t *pzhp;
+	promote_data_t pd;
+	char errbuf[1024];
+
+	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
+	    "cannot promote '%s'"), zhp->zfs_name);
+
+	if (zhp->zfs_type == ZFS_TYPE_SNAPSHOT) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "snapshots can not be promoted"));
+		return (zfs_error(hdl, EZFS_BADTYPE, errbuf));
+	}
+
+	(void) strcpy(parent, zhp->zfs_dmustats.dds_clone_of);
+	if (parent[0] == '\0') {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "not a cloned filesystem"));
+		return (zfs_error(hdl, EZFS_BADTYPE, errbuf));
+	}
+	cp = strchr(parent, '@');
+	*cp = '\0';
+
+	/* Walk the snapshots we will be moving */
+	pzhp = zfs_open(hdl, zhp->zfs_dmustats.dds_clone_of, ZFS_TYPE_SNAPSHOT);
+	if (pzhp == NULL)
+		return (-1);
+	pd.cb_pivot_txg = zfs_prop_get_int(pzhp, ZFS_PROP_CREATETXG);
+	zfs_close(pzhp);
+	pd.cb_target = zhp->zfs_name;
+	pd.cb_errbuf = errbuf;
+	pzhp = zfs_open(hdl, parent, ZFS_TYPE_ANY);
+	if (pzhp == NULL)
+		return (-1);
+	(void) zfs_prop_get(pzhp, ZFS_PROP_MOUNTPOINT, pd.cb_mountpoint,
+	    sizeof (pd.cb_mountpoint), NULL, NULL, 0, FALSE);
+	ret = zfs_iter_snapshots(pzhp, promote_snap_cb, &pd);
+	if (ret != 0)
+		return (-1);
+
+	/* issue the ioctl */
+	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
+	ret = ioctl(hdl->libzfs_fd, ZFS_IOC_PROMOTE, &zc);
+
+	if (ret != 0) {
+		switch (errno) {
+
+		case EEXIST:
 			/*
-			 * The source and target pools differ.
+			 * There is a conflicting snapshot name.  We
+			 * should have caught this above, but they could
+			 * have renamed something in the mean time.
 			 */
-			zfs_error(dgettext(TEXT_DOMAIN, "cannot create '%s': "
-			    "source and target pools differ"), target);
-			break;
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "conflicting snapshot name from parent '%s'"),
+			    parent);
+			return (zfs_error(hdl, EZFS_EXISTS, errbuf));
 
 		default:
-			zfs_baderror(errno);
+			return (zfs_standard_error(hdl, errno, errbuf));
 		}
-	} else if (zhp->zfs_volblocksize != 0) {
-		ret = zvol_create_link(target);
 	}
 
 	return (ret);
@@ -2226,40 +2175,36 @@ zfs_clone(zfs_handle_t *zhp, const char *target)
  * Takes a snapshot of the given dataset
  */
 int
-zfs_snapshot(const char *path)
+zfs_snapshot(libzfs_handle_t *hdl, const char *path)
 {
-	char reason[64];
 	const char *delim;
 	char *parent;
 	zfs_handle_t *zhp;
 	zfs_cmd_t zc = { 0 };
 	int ret;
+	char errbuf[1024];
 
-	/* validate the snapshot name */
-	if (!zfs_validate_name(path, ZFS_TYPE_SNAPSHOT, reason,
-	    sizeof (reason))) {
-		zfs_error(dgettext(TEXT_DOMAIN,
-		    "cannot snapshot '%s': %s in snapshot name"), path,
-		    reason);
-		return (-1);
-	}
+	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
+	    "cannot snapshot '%s'"), path);
+
+	/* validate the target name */
+	if (!zfs_validate_name(hdl, path, ZFS_TYPE_SNAPSHOT))
+		return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
 
 	/* make sure we have a snapshot */
 	if ((delim = strchr(path, '@')) == NULL) {
-		zfs_error(dgettext(TEXT_DOMAIN,
-		    "cannot snapshot '%s': missing '@' delim in snapshot "
-		    "name"), path);
-		zfs_error(dgettext(TEXT_DOMAIN,
-		    "use 'zfs create' to create a filesystem"));
-		return (-1);
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "missing '@' delimeter in snapshot name"));
+		return (zfs_error(hdl, EZFS_BADTYPE, errbuf));
 	}
 
 	/* make sure the parent exists and is of the appropriate type */
-	parent = zfs_malloc(delim - path + 1);
+	if ((parent = zfs_alloc(hdl, delim - path + 1)) == NULL)
+		return (-1);
 	(void) strncpy(parent, path, delim - path);
 	parent[delim - path] = '\0';
 
-	if ((zhp = zfs_open(parent, ZFS_TYPE_FILESYSTEM |
+	if ((zhp = zfs_open(hdl, parent, ZFS_TYPE_FILESYSTEM |
 	    ZFS_TYPE_VOLUME)) == NULL) {
 		free(parent);
 		return (-1);
@@ -2272,56 +2217,17 @@ zfs_snapshot(const char *path)
 	else
 		zc.zc_objset_type = DMU_OST_ZFS;
 
-	ret = zfs_ioctl(ZFS_IOC_CREATE, &zc);
+	ret = ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_CREATE, &zc);
 
 	if (ret == 0 && zhp->zfs_type == ZFS_TYPE_VOLUME) {
-		ret = zvol_create_link(path);
+		ret = zvol_create_link(zhp->zfs_hdl, path);
 		if (ret != 0)
-			(void) zfs_ioctl(ZFS_IOC_DESTROY, &zc);
+			(void) ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_DESTROY,
+			    &zc);
 	}
 
-	if (ret != 0) {
-		switch (errno) {
-		case EPERM:
-			/*
-			 * User doesn't have permission to create a snapshot
-			 */
-			zfs_error(dgettext(TEXT_DOMAIN, "cannot create '%s': "
-			    "permission denied"), path);
-			break;
-
-		case EDQUOT:
-		case ENOSPC:
-			/*
-			 * Out of space in parent.
-			 */
-			zfs_error(dgettext(TEXT_DOMAIN, "cannot create '%s': "
-			    "not enough space in '%s'"), path, parent);
-			break;
-
-		case EEXIST:
-			/*
-			 * Snapshot already exists.
-			 */
-			zfs_error(dgettext(TEXT_DOMAIN, "cannot create '%s': "
-			    "snapshot exists"), path);
-			break;
-
-		case ENOENT:
-			/*
-			 * Shouldn't happen because we verified the parent
-			 * above.  But there may be a race condition where it
-			 * has since been removed.
-			 */
-			zfs_error(dgettext(TEXT_DOMAIN, "cannot open '%s': "
-			    "no such %s"), parent,
-			    zfs_type_to_name(zhp->zfs_type));
-			break;
-
-		default:
-			zfs_baderror(errno);
-		}
-	}
+	if (ret != 0)
+		(void) zfs_standard_error(hdl, errno, errbuf);
 
 	free(parent);
 	zfs_close(zhp);
@@ -2337,6 +2243,11 @@ zfs_send(zfs_handle_t *zhp_to, zfs_handle_t *zhp_from)
 {
 	zfs_cmd_t zc = { 0 };
 	int ret;
+	char errbuf[1024];
+	libzfs_handle_t *hdl = zhp_to->zfs_hdl;
+
+	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
+	    "cannot send '%s'"), zhp_to->zfs_name);
 
 	/* do the ioctl() */
 	(void) strlcpy(zc.zc_name, zhp_to->zfs_name, sizeof (zc.zc_name));
@@ -2348,34 +2259,14 @@ zfs_send(zfs_handle_t *zhp_to, zfs_handle_t *zhp_from)
 	}
 	zc.zc_cookie = STDOUT_FILENO;
 
-	ret = zfs_ioctl(ZFS_IOC_SENDBACKUP, &zc);
+	ret = ioctl(zhp_to->zfs_hdl->libzfs_fd, ZFS_IOC_SENDBACKUP, &zc);
 	if (ret != 0) {
 		switch (errno) {
-		case EPERM:
-			/*
-			 * User doesn't have permission to do a send
-			 */
-			zfs_error(dgettext(TEXT_DOMAIN, "cannot send '%s': "
-			    "permission denied"), zhp_to->zfs_name);
-			break;
 
 		case EXDEV:
-			zfs_error(dgettext(TEXT_DOMAIN,
-			    "cannot send incremental from %s:\n"
-			    "it is not an earlier snapshot from the "
-			    "same fs as %s"),
-			    zhp_from->zfs_name, zhp_to->zfs_name);
-			break;
-
-		case ENOENT:
-			/*
-			 * Shouldn't happen because we verified the parent
-			 * above.  But there may be a race condition where it
-			 * has since been removed.
-			 */
-			zfs_error(dgettext(TEXT_DOMAIN, "cannot open: "
-			    "no such snapshot"));
-			break;
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "not an ealier snapshot from the same fs"));
+			return (zfs_error(hdl, EZFS_CROSSTARGET, errbuf));
 
 		case EDQUOT:
 		case EFBIG:
@@ -2388,18 +2279,11 @@ zfs_send(zfs_handle_t *zhp_to, zfs_handle_t *zhp_from)
 		case ERANGE:
 		case EFAULT:
 		case EROFS:
-			zfs_error(dgettext(TEXT_DOMAIN,
-			    "cannot write stream: %s"),
-			    strerror(errno));
-			break;
-
-		case EINTR:
-			zfs_error(dgettext(TEXT_DOMAIN,
-			    "send failed: signal received"));
-			break;
+			zfs_error_aux(hdl, strerror(errno));
+			return (zfs_error(hdl, EZFS_BADBACKUP, errbuf));
 
 		default:
-			zfs_baderror(errno);
+			return (zfs_standard_error(hdl, errno, errbuf));
 		}
 	}
 
@@ -2410,7 +2294,8 @@ zfs_send(zfs_handle_t *zhp_to, zfs_handle_t *zhp_from)
  * Restores a backup of tosnap from stdin.
  */
 int
-zfs_receive(const char *tosnap, int isprefix, int verbose, int dryrun)
+zfs_receive(libzfs_handle_t *hdl, const char *tosnap, int isprefix,
+    int verbose, int dryrun)
 {
 	zfs_cmd_t zc = { 0 };
 	time_t begin_time;
@@ -2418,9 +2303,13 @@ zfs_receive(const char *tosnap, int isprefix, int verbose, int dryrun)
 	char *cp;
 	dmu_replay_record_t drr;
 	struct drr_begin *drrb = &zc.zc_begin_record;
+	char errbuf[1024];
 
 	begin_time = time(NULL);
 
+	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
+	    "cannot receive"));
+
 	/* trim off snapname, if any */
 	(void) strcpy(zc.zc_name, tosnap);
 	cp = strchr(zc.zc_name, '@');
@@ -2437,31 +2326,26 @@ zfs_receive(const char *tosnap, int isprefix, int verbose, int dryrun)
 	} while (size > 0);
 
 	if (size < 0 || bytes != sizeof (drr)) {
-		zfs_error(dgettext(TEXT_DOMAIN,
-		    "cannot receive: invalid stream "
-		    "(couldn't read first record)"));
-		return (-1);
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
+		    "stream (failed to read first record)"));
+		return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
 	}
 
 	zc.zc_begin_record = drr.drr_u.drr_begin;
 
 	if (drrb->drr_magic != DMU_BACKUP_MAGIC &&
 	    drrb->drr_magic != BSWAP_64(DMU_BACKUP_MAGIC)) {
-		zfs_error(dgettext(TEXT_DOMAIN,
-		    "cannot receive: invalid stream "
-		    "(invalid magic number)"));
-		return (-1);
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
+		    "stream (bad magic number)"));
+		return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
 	}
 
 	if (drrb->drr_version != DMU_BACKUP_VERSION &&
 	    drrb->drr_version != BSWAP_64(DMU_BACKUP_VERSION)) {
-		if (drrb->drr_magic == BSWAP_64(DMU_BACKUP_MAGIC))
-			drrb->drr_version = BSWAP_64(drrb->drr_version);
-		zfs_error(dgettext(TEXT_DOMAIN,
-		    "cannot receive: only stream version 0x%llx is supported, "
-		    "stream is version %llx."),
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "only version "
+		    "0x%llx is supported (stream is version 0x%llx)"),
 		    DMU_BACKUP_VERSION, drrb->drr_version);
-		return (-1);
+		return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
 	}
 
 	/*
@@ -2470,10 +2354,9 @@ zfs_receive(const char *tosnap, int isprefix, int verbose, int dryrun)
 	(void) strcpy(zc.zc_filename, tosnap);
 	if (isprefix) {
 		if (strchr(tosnap, '@') != NULL) {
-			zfs_error(dgettext(TEXT_DOMAIN,
-			    "cannot receive: "
-			    "argument to -d must be a filesystem"));
-			return (-1);
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "destination must be a filesystem"));
+			return (zfs_error(hdl, EZFS_BADTYPE, errbuf));
 		}
 
 		cp = strchr(drr.drr_u.drr_begin.drr_toname, '/');
@@ -2490,11 +2373,8 @@ zfs_receive(const char *tosnap, int isprefix, int verbose, int dryrun)
 		 * snapname from the backup.
 		 */
 		cp = strchr(drr.drr_u.drr_begin.drr_toname, '@');
-		if (cp == NULL || strlen(tosnap) + strlen(cp) >= MAXNAMELEN) {
-			zfs_error(dgettext(TEXT_DOMAIN,
-			    "cannot receive: invalid snapshot name"));
-			return (-1);
-		}
+		if (cp == NULL || strlen(tosnap) + strlen(cp) >= MAXNAMELEN)
+			return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
 		(void) strcat(zc.zc_filename, cp);
 	}
 
@@ -2508,20 +2388,16 @@ zfs_receive(const char *tosnap, int isprefix, int verbose, int dryrun)
 		*cp = '\0';
 
 		/* make sure destination fs exists */
-		h = zfs_open(zc.zc_name, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME);
-		if (h == NULL) {
-			zfs_error(dgettext(TEXT_DOMAIN,
-			    "cannot receive incrememtal stream: destination\n"
-			    "filesystem %s does not exist"),
-			    zc.zc_name);
+		h = zfs_open(hdl, zc.zc_name,
+		    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME);
+		if (h == NULL)
 			return (-1);
-		}
 		if (!dryrun) {
 			/* unmount destination fs or remove device link. */
 			if (h->zfs_type == ZFS_TYPE_FILESYSTEM) {
 				(void) zfs_unmount(h, NULL, 0);
 			} else {
-				(void) zvol_remove_link(h->zfs_name);
+				(void) zvol_remove_link(hdl, h->zfs_name);
 			}
 		}
 		zfs_close(h);
@@ -2535,24 +2411,18 @@ zfs_receive(const char *tosnap, int isprefix, int verbose, int dryrun)
 			cp = strchr(zc.zc_name, '@');
 			if (cp)
 				*cp = '\0';
-			zfs_error(dgettext(TEXT_DOMAIN,
-			    "cannot receive: destination fs %s already exists"),
-			    zc.zc_name);
-			return (-1);
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "destination '%s' already exists"), zc.zc_name);
+			return (zfs_error(hdl, EZFS_EXISTS, errbuf));
 		}
 
 		if (isprefix) {
 			zfs_handle_t *h;
 
 			/* make sure prefix exists */
-			h = zfs_open(tosnap, ZFS_TYPE_FILESYSTEM);
-			if (h == NULL) {
-				zfs_error(dgettext(TEXT_DOMAIN,
-				    "cannot receive: "
-				    "%s is an invalid destination"),
-				    tosnap);
+			h = zfs_open(hdl, tosnap, ZFS_TYPE_FILESYSTEM);
+			if (h == NULL)
 				return (-1);
-			}
 			zfs_close(h);
 
 			/* create any necessary ancestors up to prefix */
@@ -2569,24 +2439,25 @@ zfs_receive(const char *tosnap, int isprefix, int verbose, int dryrun)
 				const char *opname;
 				*cp = '\0';
 
-				opname = "create";
-				if (zfs_create(zc.zc_name, ZFS_TYPE_FILESYSTEM,
-				    NULL, NULL) != 0) {
+				opname = dgettext(TEXT_DOMAIN, "create");
+				if (zfs_create(hdl, zc.zc_name,
+				    ZFS_TYPE_FILESYSTEM, NULL, NULL) != 0) {
 					if (errno == EEXIST)
 						continue;
 					goto ancestorerr;
 				}
 
-				opname = "open";
-				h = zfs_open(zc.zc_name, ZFS_TYPE_FILESYSTEM);
+				opname = dgettext(TEXT_DOMAIN, "open");
+				h = zfs_open(hdl, zc.zc_name,
+				    ZFS_TYPE_FILESYSTEM);
 				if (h == NULL)
 					goto ancestorerr;
 
-				opname = "mount";
+				opname = dgettext(TEXT_DOMAIN, "mount");
 				if (zfs_mount(h, NULL, 0) != 0)
 					goto ancestorerr;
 
-				opname = "share";
+				opname = dgettext(TEXT_DOMAIN, "share");
 				if (zfs_share(h) != 0)
 					goto ancestorerr;
 
@@ -2594,22 +2465,21 @@ zfs_receive(const char *tosnap, int isprefix, int verbose, int dryrun)
 
 				continue;
 ancestorerr:
-				zfs_error(dgettext(TEXT_DOMAIN,
-				    "cannot receive: couldn't %s ancestor %s"),
-				    opname, zc.zc_name);
-				return (-1);
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "failed to %s ancestor '%s'"), opname,
+				    zc.zc_name);
+				return (zfs_error(hdl, EZFS_BADRESTORE,
+				    errbuf));
 			}
 		}
 
 		/* Make sure destination fs does not exist */
 		cp = strchr(zc.zc_name, '@');
 		*cp = '\0';
-		if (zfs_ioctl(ZFS_IOC_OBJSET_STATS, &zc) == 0) {
-			zfs_error(dgettext(TEXT_DOMAIN,
-			    "cannot receive full stream: "
-			    "destination filesystem %s already exists"),
-			    zc.zc_name);
-			return (-1);
+		if (ioctl(hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) == 0) {
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "destination '%s' exists"), zc.zc_name);
+			return (zfs_error(hdl, EZFS_EXISTS, errbuf));
 		}
 
 		/* Do the recvbackup ioctl to the fs's parent. */
@@ -2630,21 +2500,20 @@ ancestorerr:
 	}
 	if (dryrun)
 		return (0);
-	err = ioctl_err = zfs_ioctl(ZFS_IOC_RECVBACKUP, &zc);
+	err = ioctl_err = ioctl(hdl->libzfs_fd, ZFS_IOC_RECVBACKUP, &zc);
 	if (ioctl_err != 0) {
 		switch (errno) {
 		case ENODEV:
-			zfs_error(dgettext(TEXT_DOMAIN,
-			    "cannot receive: "
-			    "most recent snapshot does not "
-			    "match incremental source"));
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "most recent snapshot does not match incremental "
+			    "source"));
+			(void) zfs_error(hdl, EZFS_BADRESTORE, errbuf);
 			break;
 		case ETXTBSY:
-			zfs_error(dgettext(TEXT_DOMAIN,
-			    "cannot receive: "
-			    "destination has been modified since "
-			    "most recent snapshot --\n"
-			    "use 'zfs rollback' to discard changes"));
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "destination has been modified since most recent "
+			    "snapshot"));
+			(void) zfs_error(hdl, EZFS_BADRESTORE, errbuf);
 			break;
 		case EEXIST:
 			if (drrb->drr_fromguid == 0) {
@@ -2652,45 +2521,21 @@ ancestorerr:
 				cp = strchr(zc.zc_filename, '@');
 				*cp = '\0';
 			}
-			zfs_error(dgettext(TEXT_DOMAIN,
-			    "cannot receive to %s: destination already exists"),
-			    zc.zc_filename);
-			break;
-		case ENOENT:
-			zfs_error(dgettext(TEXT_DOMAIN,
-			    "cannot receive: destination does not exist"));
-			break;
-		case EBUSY:
-			zfs_error(dgettext(TEXT_DOMAIN,
-			    "cannot receive: destination is in use"));
-			break;
-		case ENOSPC:
-			zfs_error(dgettext(TEXT_DOMAIN,
-			    "cannot receive: out of space"));
-			break;
-		case EDQUOT:
-			zfs_error(dgettext(TEXT_DOMAIN,
-			    "cannot receive: quota exceeded"));
-			break;
-		case EINTR:
-			zfs_error(dgettext(TEXT_DOMAIN,
-			    "receive failed: signal received"));
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "destination already exists"));
+			(void) zfs_error(hdl, EZFS_EXISTS, dgettext(TEXT_DOMAIN,
+			    "cannot restore to %s"), zc.zc_filename);
 			break;
 		case EINVAL:
-			zfs_error(dgettext(TEXT_DOMAIN,
-			    "cannot receive: invalid stream"));
+			(void) zfs_error(hdl, EZFS_BADSTREAM, errbuf);
 			break;
 		case ECKSUM:
-			zfs_error(dgettext(TEXT_DOMAIN,
-			    "cannot receive: invalid stream "
-			    "(checksum mismatch)"));
-			break;
-		case EPERM:
-			zfs_error(dgettext(TEXT_DOMAIN,
-			    "cannot receive: permission denied"));
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "invalid stream (checksum mismatch)"));
+			(void) zfs_error(hdl, EZFS_BADSTREAM, errbuf);
 			break;
 		default:
-			zfs_baderror(errno);
+			(void) zfs_standard_error(hdl, errno, errbuf);
 		}
 	}
 
@@ -2705,16 +2550,17 @@ ancestorerr:
 		zfs_handle_t *h;
 
 		*cp = '\0';
-		h = zfs_open(zc.zc_filename,
+		h = zfs_open(hdl, zc.zc_filename,
 		    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME);
 		*cp = '@';
 		if (h) {
 			if (h->zfs_type == ZFS_TYPE_FILESYSTEM) {
 				err = zfs_mount(h, NULL, 0);
 			} else {
-				err = zvol_create_link(h->zfs_name);
+				err = zvol_create_link(hdl, h->zfs_name);
 				if (err == 0 && ioctl_err == 0)
-					err = zvol_create_link(zc.zc_filename);
+					err = zvol_create_link(hdl,
+					    zc.zc_filename);
 			}
 			zfs_close(h);
 		}
@@ -2750,7 +2596,7 @@ typedef struct rollback_data {
 	uint64_t	cb_create;		/* creation time reference */
 	prop_changelist_t *cb_clp;		/* changelist pointer */
 	int		cb_error;
-	int		cb_dependent;
+	boolean_t	cb_dependent;
 } rollback_data_t;
 
 static int
@@ -2764,9 +2610,9 @@ rollback_destroy(zfs_handle_t *zhp, void *data)
 		    zfs_prop_get_int(zhp, ZFS_PROP_CREATETXG) >
 		    cbp->cb_create) {
 
-			cbp->cb_dependent = TRUE;
+			cbp->cb_dependent = B_TRUE;
 			(void) zfs_iter_dependents(zhp, rollback_destroy, cbp);
-			cbp->cb_dependent = FALSE;
+			cbp->cb_dependent = B_FALSE;
 
 			if (zfs_destroy(zhp) != 0)
 				cbp->cb_error = 1;
@@ -2797,7 +2643,7 @@ do_rollback(zfs_handle_t *zhp)
 	    zhp->zfs_type == ZFS_TYPE_VOLUME);
 
 	if (zhp->zfs_type == ZFS_TYPE_VOLUME &&
-	    zvol_remove_link(zhp->zfs_name) != 0)
+	    zvol_remove_link(zhp->zfs_hdl, zhp->zfs_name) != 0)
 		return (-1);
 
 	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
@@ -2814,58 +2660,13 @@ do_rollback(zfs_handle_t *zhp)
 	 * condition where the user has taken a snapshot since we verified that
 	 * this was the most recent.
 	 */
-	if ((ret = zfs_ioctl(ZFS_IOC_ROLLBACK, &zc)) != 0) {
-		switch (errno) {
-		case EPERM:
-			/*
-			 * The user doesn't have permission to rollback the
-			 * given dataset.
-			 */
-			zfs_error(dgettext(TEXT_DOMAIN, "cannot rollback '%s': "
-			    "permission denied"), zhp->zfs_name);
-			break;
-
-		case EDQUOT:
-		case ENOSPC:
-			/*
-			 * The parent dataset doesn't have enough space to
-			 * rollback to the last snapshot.
-			 */
-			{
-				char parent[ZFS_MAXNAMELEN];
-				(void) parent_name(zhp->zfs_name, parent,
-				    sizeof (parent));
-				zfs_error(dgettext(TEXT_DOMAIN, "cannot "
-				    "rollback '%s': out of space"), parent);
-			}
-			break;
-
-		case ENOENT:
-			/*
-			 * The dataset doesn't exist.  This shouldn't happen
-			 * except in race conditions.
-			 */
-			zfs_error(dgettext(TEXT_DOMAIN, "cannot rollback '%s': "
-			    "no such %s"), zhp->zfs_name,
-			    zfs_type_to_name(zhp->zfs_type));
-			break;
-
-		case EBUSY:
-			/*
-			 * The filesystem is busy.  This should have been caught
-			 * by the caller before getting here, but there may be
-			 * an unexpected problem.
-			 */
-			zfs_error(dgettext(TEXT_DOMAIN, "cannot rollback '%s': "
-			    "%s is busy"), zhp->zfs_name,
-			    zfs_type_to_name(zhp->zfs_type));
-			break;
-
-		default:
-			zfs_baderror(errno);
-		}
+	if ((ret = ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_ROLLBACK,
+	    &zc)) != 0) {
+		(void) zfs_standard_error(zhp->zfs_hdl, errno,
+		    dgettext(TEXT_DOMAIN, "cannot rollback '%s'"),
+		    zhp->zfs_name);
 	} else if (zhp->zfs_type == ZFS_TYPE_VOLUME) {
-		ret = zvol_create_link(zhp->zfs_name);
+		ret = zvol_create_link(zhp->zfs_hdl, zhp->zfs_name);
 	}
 
 	return (ret);
@@ -2946,9 +2747,10 @@ zfs_iter_dependents(zfs_handle_t *zhp, zfs_iter_f func, void *data)
 	zfs_handle_t *child;
 	int ret = 0;
 
-	dependents = get_dependents(zhp->zfs_name, &count);
+	dependents = get_dependents(zhp->zfs_hdl, zhp->zfs_name, &count);
 	for (i = 0; i < count; i++) {
-		if ((child = make_dataset_handle(dependents[i])) == NULL)
+		if ((child = make_dataset_handle(zhp->zfs_hdl,
+		    dependents[i])) == NULL)
 			continue;
 
 		if ((ret = func(child, data)) != 0)
@@ -2970,10 +2772,11 @@ zfs_rename(zfs_handle_t *zhp, const char *target)
 {
 	int ret;
 	zfs_cmd_t zc = { 0 };
-	char reason[64];
 	char *delim;
 	prop_changelist_t *cl;
 	char parent[ZFS_MAXNAMELEN];
+	libzfs_handle_t *hdl = zhp->zfs_hdl;
+	char errbuf[1024];
 
 	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
 	(void) strlcpy(zc.zc_prop_value, target, sizeof (zc.zc_prop_value));
@@ -2982,22 +2785,21 @@ zfs_rename(zfs_handle_t *zhp, const char *target)
 	if (strcmp(zhp->zfs_name, target) == 0)
 		return (0);
 
+	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
+	    "cannot rename to '%s'"), target);
+
 	/*
 	 * Make sure the target name is valid
 	 */
-	if (!zfs_validate_name(target, zhp->zfs_type, reason,
-	    sizeof (reason))) {
-		zfs_error(dgettext(TEXT_DOMAIN,
-		    "cannot create '%s': %s in %s name"), target, reason,
-		    zfs_type_to_name(zhp->zfs_type));
-		return (-1);
-	}
+	if (!zfs_validate_name(hdl, target, zhp->zfs_type))
+		return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
 
 	if (zhp->zfs_type == ZFS_TYPE_SNAPSHOT) {
+
 		if ((delim = strchr(target, '@')) == NULL) {
-			zfs_error(dgettext(TEXT_DOMAIN,
-			    "cannot rename to '%s': not a snapshot"), target);
-			return (-1);
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "not a snapshot"));
+			return (zfs_error(hdl, EZFS_BADTYPE, errbuf));
 		}
 
 		/*
@@ -3005,17 +2807,16 @@ zfs_rename(zfs_handle_t *zhp, const char *target)
 		 */
 		if (strncmp(zhp->zfs_name, target, delim - target) != 0 ||
 		    zhp->zfs_name[delim - target] != '@') {
-			zfs_error(dgettext(TEXT_DOMAIN,
-			    "cannot rename to '%s': snapshots must be part "
-			    "of same dataset"), target);
-			return (-1);
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "snapshots must be part of same dataset"));
+			return (zfs_error(hdl, EZFS_CROSSTARGET, errbuf));
 		}
 
 		(void) strncpy(parent, target, delim - target);
 		parent[delim - target] = '\0';
 	} else {
 		/* validate parents */
-		if (check_parents(target, zhp->zfs_type) != 0)
+		if (check_parents(hdl, target) != 0)
 			return (-1);
 
 		(void) parent_name(target, parent, sizeof (parent));
@@ -3024,28 +2825,30 @@ zfs_rename(zfs_handle_t *zhp, const char *target)
 		verify((delim = strchr(target, '/')) != NULL);
 		if (strncmp(zhp->zfs_name, target, delim - target) != 0 ||
 		    zhp->zfs_name[delim - target] != '/') {
-			zfs_error(dgettext(TEXT_DOMAIN,
-			    "cannot rename to '%s': "
-			    "datasets must be within same pool"), target);
-			return (-1);
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "datasets must be within same pool"));
+			return (zfs_error(hdl, EZFS_CROSSTARGET, errbuf));
 		}
 	}
 
+	(void) snprintf(errbuf, sizeof (errbuf),
+	    dgettext(TEXT_DOMAIN, "cannot rename '%s'"), zhp->zfs_name);
+
 	if (getzoneid() == GLOBAL_ZONEID &&
 	    zfs_prop_get_int(zhp, ZFS_PROP_ZONED)) {
-		zfs_error(dgettext(TEXT_DOMAIN, "cannot rename %s, "
-		    "dataset is used in a non-global zone"), zhp->zfs_name);
-		return (-1);
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "dataset is used in a non-global zone"));
+		return (zfs_error(hdl, EZFS_ZONED, errbuf));
 	}
 
 	if ((cl = changelist_gather(zhp, ZFS_PROP_NAME, 0)) == NULL)
-		return (1);
+		return (-1);
 
 	if (changelist_haszonedchild(cl)) {
-		zfs_error(dgettext(TEXT_DOMAIN,
-		    "cannot rename '%s': child dataset with inherited "
-		    "mountpoint is used in a non-global zone"), zhp->zfs_name);
-		ret = -1;
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "child dataset with inherited mountpoint is used "
+		    "in a non-global zone"));
+		ret = zfs_error(hdl, EZFS_ZONED, errbuf);
 		goto error;
 	}
 
@@ -3057,59 +2860,8 @@ zfs_rename(zfs_handle_t *zhp, const char *target)
 	else
 		zc.zc_objset_type = DMU_OST_ZFS;
 
-	if ((ret = zfs_ioctl(ZFS_IOC_RENAME, &zc)) != 0) {
-		switch (errno) {
-		case EPERM:
-			/*
-			 * The user doesn't have permission to rename the
-			 * given dataset.
-			 */
-			zfs_error(dgettext(TEXT_DOMAIN, "cannot rename '%s': "
-			    "permission denied"), zhp->zfs_name);
-			break;
-
-		case EDQUOT:
-		case ENOSPC:
-			/*
-			 * Not enough space in the parent dataset.
-			 */
-			zfs_error(dgettext(TEXT_DOMAIN, "cannot "
-			    "rename '%s': not enough space in '%s'"),
-			    zhp->zfs_name, parent);
-			break;
-
-		case ENOENT:
-			/*
-			 * The destination doesn't exist.
-			 */
-			zfs_error(dgettext(TEXT_DOMAIN, "cannot rename '%s' "
-			    "to '%s': destination doesn't exist"),
-			    zhp->zfs_name, target);
-			break;
-
-		case EEXIST:
-			/*
-			 * The destination already exists.
-			 */
-			zfs_error(dgettext(TEXT_DOMAIN, "cannot rename '%s' "
-			    "to '%s': destination already exists"),
-			    zhp->zfs_name, target);
-			break;
-
-		case EBUSY:
-			/*
-			 * The filesystem is busy.  This should have been caught
-			 * by the caller before getting here, but there may be
-			 * an unexpected problem.
-			 */
-			zfs_error(dgettext(TEXT_DOMAIN, "cannot rename '%s': "
-			    "%s is busy"), zhp->zfs_name,
-			    zfs_type_to_name(zhp->zfs_type));
-			break;
-
-		default:
-			zfs_baderror(errno);
-		}
+	if ((ret = ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_RENAME, &zc)) != 0) {
+		(void) zfs_standard_error(zhp->zfs_hdl, errno, errbuf);
 
 		/*
 		 * On failure, we still want to remount any filesystems that
@@ -3132,24 +2884,18 @@ error:
  * poke devfsadm to create the /dev link, and then wait for the link to appear.
  */
 int
-zvol_create_link(const char *dataset)
+zvol_create_link(libzfs_handle_t *hdl, const char *dataset)
 {
 	zfs_cmd_t zc = { 0 };
-	di_devlink_handle_t hdl;
+	di_devlink_handle_t dhdl;
 
 	(void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name));
 
 	/*
 	 * Issue the appropriate ioctl.
 	 */
-	if (zfs_ioctl(ZFS_IOC_CREATE_MINOR, &zc) != 0) {
+	if (ioctl(hdl->libzfs_fd, ZFS_IOC_CREATE_MINOR, &zc) != 0) {
 		switch (errno) {
-		case EPERM:
-			zfs_error(dgettext(TEXT_DOMAIN, "cannot create "
-			    "device links for '%s': permission denied"),
-			    dataset);
-			break;
-
 		case EEXIST:
 			/*
 			 * Silently ignore the case where the link already
@@ -3159,22 +2905,24 @@ zvol_create_link(const char *dataset)
 			return (0);
 
 		default:
-			zfs_baderror(errno);
+			return (zfs_standard_error(hdl, errno,
+			    dgettext(TEXT_DOMAIN, "cannot create device links "
+			    "for '%s'"), dataset));
 		}
-
-		return (-1);
 	}
 
 	/*
 	 * Call devfsadm and wait for the links to magically appear.
 	 */
-	if ((hdl = di_devlink_init(ZFS_DRIVER, DI_MAKE_LINK)) == NULL) {
-		zfs_error(dgettext(TEXT_DOMAIN,
-		    "cannot create device links for '%s'"), dataset);
-		(void) zfs_ioctl(ZFS_IOC_REMOVE_MINOR, &zc);
+	if ((dhdl = di_devlink_init(ZFS_DRIVER, DI_MAKE_LINK)) == NULL) {
+		zfs_error_aux(hdl, strerror(errno));
+		(void) zfs_error(hdl, EZFS_DEVLINKS,
+		    dgettext(TEXT_DOMAIN, "cannot create device links "
+		    "for '%s'"), dataset);
+		(void) ioctl(hdl->libzfs_fd, ZFS_IOC_REMOVE_MINOR, &zc);
 		return (-1);
 	} else {
-		(void) di_devlink_fini(&hdl);
+		(void) di_devlink_fini(&dhdl);
 	}
 
 	return (0);
@@ -3184,26 +2932,14 @@ zvol_create_link(const char *dataset)
  * Remove a minor node for the given zvol and the associated /dev links.
  */
 int
-zvol_remove_link(const char *dataset)
+zvol_remove_link(libzfs_handle_t *hdl, const char *dataset)
 {
 	zfs_cmd_t zc = { 0 };
 
 	(void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name));
 
-	if (zfs_ioctl(ZFS_IOC_REMOVE_MINOR, &zc) != 0) {
+	if (ioctl(hdl->libzfs_fd, ZFS_IOC_REMOVE_MINOR, &zc) != 0) {
 		switch (errno) {
-		case EPERM:
-			zfs_error(dgettext(TEXT_DOMAIN, "cannot remove "
-			    "device links for '%s': permission denied"),
-			    dataset);
-			break;
-
-		case EBUSY:
-			zfs_error(dgettext(TEXT_DOMAIN, "cannot remove "
-			    "device links for '%s': volume is in use"),
-			    dataset);
-			break;
-
 		case ENXIO:
 			/*
 			 * Silently ignore the case where the link no longer
@@ -3213,10 +2949,10 @@ zvol_remove_link(const char *dataset)
 			return (0);
 
 		default:
-			zfs_baderror(errno);
+			return (zfs_standard_error(hdl, errno,
+			    dgettext(TEXT_DOMAIN, "cannot remove device "
+			    "links for '%s'"), dataset));
 		}
-
-		return (-1);
 	}
 
 	return (0);
diff --git a/usr/src/lib/libzfs/common/libzfs_graph.c b/usr/src/lib/libzfs/common/libzfs_graph.c
index 4c7bb547ee..e86a6c9377 100644
--- a/usr/src/lib/libzfs/common/libzfs_graph.c
+++ b/usr/src/lib/libzfs/common/libzfs_graph.c
@@ -121,9 +121,12 @@ typedef struct zfs_graph {
  * Allocate a new edge pointing to the target vertex.
  */
 static zfs_edge_t *
-zfs_edge_create(zfs_vertex_t *dest)
+zfs_edge_create(libzfs_handle_t *hdl, zfs_vertex_t *dest)
 {
-	zfs_edge_t *zep = zfs_malloc(sizeof (zfs_edge_t));
+	zfs_edge_t *zep = zfs_alloc(hdl, sizeof (zfs_edge_t));
+
+	if (zep == NULL)
+		return (NULL);
 
 	zep->ze_dest = dest;
 
@@ -143,15 +146,23 @@ zfs_edge_destroy(zfs_edge_t *zep)
  * Allocate a new vertex with the given name.
  */
 static zfs_vertex_t *
-zfs_vertex_create(const char *dataset)
+zfs_vertex_create(libzfs_handle_t *hdl, const char *dataset)
 {
-	zfs_vertex_t *zvp = zfs_malloc(sizeof (zfs_vertex_t));
+	zfs_vertex_t *zvp = zfs_alloc(hdl, sizeof (zfs_vertex_t));
+
+	if (zvp == NULL)
+		return (NULL);
 
 	assert(strlen(dataset) < ZFS_MAXNAMELEN);
 
 	(void) strlcpy(zvp->zv_dataset, dataset, sizeof (zvp->zv_dataset));
 
-	zvp->zv_edges = zfs_malloc(MIN_EDGECOUNT * sizeof (void *));
+	if ((zvp->zv_edges = zfs_alloc(hdl,
+	    MIN_EDGECOUNT * sizeof (void *))) == NULL) {
+		free(zvp);
+		return (NULL);
+	}
+
 	zvp->zv_edgealloc = MIN_EDGECOUNT;
 
 	return (zvp);
@@ -175,15 +186,22 @@ zfs_vertex_destroy(zfs_vertex_t *zvp)
 /*
  * Given a vertex, add an edge to the destination vertex.
  */
-static void
-zfs_vertex_add_edge(zfs_vertex_t *zvp, zfs_vertex_t *dest)
+static int
+zfs_vertex_add_edge(libzfs_handle_t *hdl, zfs_vertex_t *zvp,
+    zfs_vertex_t *dest)
 {
-	zfs_edge_t *zep = zfs_edge_create(dest);
+	zfs_edge_t *zep = zfs_edge_create(hdl, dest);
+
+	if (zep == NULL)
+		return (-1);
 
 	if (zvp->zv_edgecount == zvp->zv_edgealloc) {
-		zfs_edge_t **newedges = zfs_malloc(zvp->zv_edgealloc * 2 *
+		zfs_edge_t **newedges = zfs_alloc(hdl, zvp->zv_edgealloc * 2 *
 		    sizeof (void *));
 
+		if (newedges == NULL)
+			return (-1);
+
 		bcopy(zvp->zv_edges, newedges,
 		    zvp->zv_edgealloc * sizeof (void *));
 
@@ -193,6 +211,8 @@ zfs_vertex_add_edge(zfs_vertex_t *zvp, zfs_vertex_t *dest)
 	}
 
 	zvp->zv_edges[zvp->zv_edgecount++] = zep;
+
+	return (0);
 }
 
 static int
@@ -227,12 +247,19 @@ zfs_vertex_sort_edges(zfs_vertex_t *zvp)
  * datasets in the pool.
  */
 static zfs_graph_t *
-zfs_graph_create(size_t size)
+zfs_graph_create(libzfs_handle_t *hdl, size_t size)
 {
-	zfs_graph_t *zgp = zfs_malloc(sizeof (zfs_graph_t));
+	zfs_graph_t *zgp = zfs_alloc(hdl, sizeof (zfs_graph_t));
+
+	if (zgp == NULL)
+		return (NULL);
 
 	zgp->zg_size = size;
-	zgp->zg_hash = zfs_malloc(size * sizeof (zfs_vertex_t *));
+	if ((zgp->zg_hash = zfs_alloc(hdl,
+	    size * sizeof (zfs_vertex_t *))) == NULL) {
+		free(zgp);
+		return (NULL);
+	}
 
 	return (zgp);
 }
@@ -280,7 +307,8 @@ zfs_graph_hash(zfs_graph_t *zgp, const char *str)
  * Given a dataset name, finds the associated vertex, creating it if necessary.
  */
 static zfs_vertex_t *
-zfs_graph_lookup(zfs_graph_t *zgp, const char *dataset, uint64_t txg)
+zfs_graph_lookup(libzfs_handle_t *hdl, zfs_graph_t *zgp, const char *dataset,
+    uint64_t txg)
 {
 	size_t idx = zfs_graph_hash(zgp, dataset);
 	zfs_vertex_t *zvp;
@@ -293,7 +321,9 @@ zfs_graph_lookup(zfs_graph_t *zgp, const char *dataset, uint64_t txg)
 		}
 	}
 
-	zvp = zfs_vertex_create(dataset);
+	if ((zvp = zfs_vertex_create(hdl, dataset)) == NULL)
+		return (NULL);
+
 	zvp->zv_next = zgp->zg_hash[idx];
 	zvp->zv_txg = txg;
 	zgp->zg_hash[idx] = zvp;
@@ -308,43 +338,52 @@ zfs_graph_lookup(zfs_graph_t *zgp, const char *dataset, uint64_t txg)
  * created it as a destination of another edge.  If 'dest' is NULL, then this
  * is an individual vertex (i.e. the starting vertex), so don't add an edge.
  */
-static void
-zfs_graph_add(zfs_graph_t *zgp, const char *source, const char *dest,
-    uint64_t txg)
+static int
+zfs_graph_add(libzfs_handle_t *hdl, zfs_graph_t *zgp, const char *source,
+    const char *dest, uint64_t txg)
 {
 	zfs_vertex_t *svp, *dvp;
 
-	svp = zfs_graph_lookup(zgp, source, 0);
+	if ((svp = zfs_graph_lookup(hdl, zgp, source, 0)) == NULL)
+		return (-1);
 	svp->zv_visited = 1;
 	if (dest != NULL) {
-		dvp = zfs_graph_lookup(zgp, dest, txg);
-		zfs_vertex_add_edge(svp, dvp);
+		dvp = zfs_graph_lookup(hdl, zgp, dest, txg);
+		if (dvp == NULL)
+			return (-1);
+		if (zfs_vertex_add_edge(hdl, svp, dvp) != 0)
+			return (-1);
 	}
+
+	return (0);
 }
 
 /*
  * Iterate over all children of the given dataset, adding any vertices as
- * necessary.  Returns 0 if no cloned snapshots were seen, 1 otherwise.  This is
+ * necessary.  Returns 0 if no cloned snapshots were seen, -1 if there was an
+ * error, or 1 otherwise.  This is
  * a simple recursive algorithm - the ZFS namespace typically is very flat.  We
  * manually invoke the necessary ioctl() calls to avoid the overhead and
  * additional semantics of zfs_open().
  */
 static int
-iterate_children(zfs_graph_t *zgp, const char *dataset)
+iterate_children(libzfs_handle_t *hdl, zfs_graph_t *zgp, const char *dataset)
 {
 	zfs_cmd_t zc = { 0 };
-	int ret = 0;
+	int ret = 0, err;
 	zfs_vertex_t *zvp;
 
 	/*
 	 * Look up the source vertex, and avoid it if we've seen it before.
 	 */
-	zvp = zfs_graph_lookup(zgp, dataset, 0);
+	zvp = zfs_graph_lookup(hdl, zgp, dataset, 0);
+	if (zvp == NULL)
+		return (-1);
 	if (zvp->zv_visited)
 		return (0);
 
 	for ((void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name));
-	    zfs_ioctl(ZFS_IOC_DATASET_LIST_NEXT, &zc) == 0;
+	    ioctl(hdl->libzfs_fd, ZFS_IOC_DATASET_LIST_NEXT, &zc) == 0;
 	    (void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name))) {
 
 		/*
@@ -358,32 +397,38 @@ iterate_children(zfs_graph_t *zgp, const char *dataset)
 		 * dataset and clone statistics.  If this fails, the dataset has
 		 * since been removed, and we're pretty much screwed anyway.
 		 */
-		if (zfs_ioctl(ZFS_IOC_OBJSET_STATS, &zc) != 0)
+		if (ioctl(hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) != 0)
 			continue;
 
 		/*
 		 * Add an edge between the parent and the child.
 		 */
-		zfs_graph_add(zgp, dataset, zc.zc_name,
-		    zc.zc_objset_stats.dds_creation_txg);
+		if (zfs_graph_add(hdl, zgp, dataset, zc.zc_name,
+		    zc.zc_objset_stats.dds_creation_txg) != 0)
+			return (-1);
 
 		/*
 		 * If this dataset has a clone parent, add an appropriate edge.
 		 */
-		if (zc.zc_objset_stats.dds_clone_of[0] != '\0')
-			zfs_graph_add(zgp, zc.zc_objset_stats.dds_clone_of,
-			    zc.zc_name, zc.zc_objset_stats.dds_creation_txg);
+		if (zc.zc_objset_stats.dds_clone_of[0] != '\0' &&
+		    zfs_graph_add(hdl, zgp, zc.zc_objset_stats.dds_clone_of,
+		    zc.zc_name, zc.zc_objset_stats.dds_creation_txg) != 0)
+			return (-1);
 
 		/*
 		 * Iterate over all children
 		 */
-		ret |= iterate_children(zgp, zc.zc_name);
+		err = iterate_children(hdl, zgp, zc.zc_name);
+		if (err == -1)
+			return (-1);
+		else if (err == 1)
+			ret = 1;
 
 		/*
 		 * Indicate if we found a dataset with a non-zero clone count.
 		 */
 		if (zc.zc_objset_stats.dds_num_clones != 0)
-			ret |= 1;
+			ret = 1;
 	}
 
 	/*
@@ -392,7 +437,7 @@ iterate_children(zfs_graph_t *zgp, const char *dataset)
 	bzero(&zc, sizeof (zc));
 
 	for ((void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name));
-	    zfs_ioctl(ZFS_IOC_SNAPSHOT_LIST_NEXT, &zc) == 0;
+	    ioctl(hdl->libzfs_fd, ZFS_IOC_SNAPSHOT_LIST_NEXT, &zc) == 0;
 	    (void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name))) {
 
 		/*
@@ -400,20 +445,21 @@ iterate_children(zfs_graph_t *zgp, const char *dataset)
 		 * dataset and clone statistics.  If this fails, the dataset has
 		 * since been removed, and we're pretty much screwed anyway.
 		 */
-		if (zfs_ioctl(ZFS_IOC_OBJSET_STATS, &zc) != 0)
+		if (ioctl(hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) != 0)
 			continue;
 
 		/*
 		 * Add an edge between the parent and the child.
 		 */
-		zfs_graph_add(zgp, dataset, zc.zc_name,
-		    zc.zc_objset_stats.dds_creation_txg);
+		if (zfs_graph_add(hdl, zgp, dataset, zc.zc_name,
+		    zc.zc_objset_stats.dds_creation_txg) != 0)
+			return (-1);
 
 		/*
 		 * Indicate if we found a dataset with a non-zero clone count.
 		 */
 		if (zc.zc_objset_stats.dds_num_clones != 0)
-			ret |= 1;
+			ret = 1;
 	}
 
 	zvp->zv_visited = 1;
@@ -428,20 +474,24 @@ iterate_children(zfs_graph_t *zgp, const char *dataset)
  * over all datasets.
  */
 static zfs_graph_t *
-construct_graph(const char *dataset)
+construct_graph(libzfs_handle_t *hdl, const char *dataset)
 {
-	zfs_graph_t *zgp = zfs_graph_create(ZFS_GRAPH_SIZE);
+	zfs_graph_t *zgp = zfs_graph_create(hdl, ZFS_GRAPH_SIZE);
 	zfs_cmd_t zc = { 0 };
+	int ret = 0;
+
+	if (zgp == NULL)
+		return (zgp);
 
 	/*
 	 * We need to explicitly check whether this dataset has clones or not,
 	 * since iterate_children() only checks the children.
 	 */
 	(void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name));
-	(void) zfs_ioctl(ZFS_IOC_OBJSET_STATS, &zc);
+	(void) ioctl(hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc);
 
 	if (zc.zc_objset_stats.dds_num_clones != 0 ||
-	    iterate_children(zgp, dataset) != 0) {
+	    (ret = iterate_children(hdl, zgp, dataset)) != 0) {
 		/*
 		 * Determine pool name and try again.
 		 */
@@ -449,17 +499,29 @@ construct_graph(const char *dataset)
 
 		if ((slash = strchr(dataset, '/')) != NULL ||
 		    (slash = strchr(dataset, '@')) != NULL) {
-			pool = zfs_malloc(slash - dataset + 1);
+			pool = zfs_alloc(hdl, slash - dataset + 1);
+			if (pool == NULL) {
+				zfs_graph_destroy(zgp);
+				return (NULL);
+			}
 			(void) strncpy(pool, dataset, slash - dataset);
 			pool[slash - dataset] = '\0';
 
-			(void) iterate_children(zgp, pool);
-			zfs_graph_add(zgp, pool, NULL, 0);
+			if (iterate_children(hdl, zgp, pool) == -1 ||
+			    zfs_graph_add(hdl, zgp, pool, NULL, 0) != 0) {
+				free(pool);
+				zfs_graph_destroy(zgp);
+				return (NULL);
+			}
 
 			free(pool);
 		}
 	}
-	zfs_graph_add(zgp, dataset, NULL, 0);
+
+	if (ret == -1 || zfs_graph_add(hdl, zgp, dataset, NULL, 0) != 0) {
+		zfs_graph_destroy(zgp);
+		return (NULL);
+	}
 
 	return (zgp);
 }
@@ -469,27 +531,33 @@ construct_graph(const char *dataset)
  * really just a depth first search, so that the deepest nodes appear first.
  * hijack the 'zv_visited' marker to avoid visiting the same vertex twice.
  */
-static void
-topo_sort(char **result, size_t *idx, zfs_vertex_t *zgv)
+static int
+topo_sort(libzfs_handle_t *hdl, char **result, size_t *idx, zfs_vertex_t *zgv)
 {
 	int i;
 
 	/* avoid doing a search if we don't have to */
 	if (zgv->zv_visited == 2)
-		return;
+		return (0);
 
 	zfs_vertex_sort_edges(zgv);
-	for (i = 0; i < zgv->zv_edgecount; i++)
-		topo_sort(result, idx, zgv->zv_edges[i]->ze_dest);
+	for (i = 0; i < zgv->zv_edgecount; i++) {
+		if (topo_sort(hdl, result, idx, zgv->zv_edges[i]->ze_dest) != 0)
+			return (-1);
+	}
 
 	/* we may have visited this in the course of the above */
 	if (zgv->zv_visited == 2)
-		return;
+		return (0);
+
+	if ((result[*idx] = zfs_alloc(hdl,
+	    strlen(zgv->zv_dataset) + 1)) == NULL)
+		return (-1);
 
-	result[*idx] = zfs_malloc(strlen(zgv->zv_dataset) + 1);
 	(void) strcpy(result[*idx], zgv->zv_dataset);
 	*idx += 1;
 	zgv->zv_visited = 2;
+	return (0);
 }
 
 /*
@@ -498,19 +566,33 @@ topo_sort(char **result, size_t *idx, zfs_vertex_t *zgv)
  * sort, and then return the array of strings to the caller.
  */
 char **
-get_dependents(const char *dataset, size_t *count)
+get_dependents(libzfs_handle_t *hdl, const char *dataset, size_t *count)
 {
 	char **result;
 	zfs_graph_t *zgp;
 	zfs_vertex_t *zvp;
 
-	zgp = construct_graph(dataset);
-	result = zfs_malloc(zgp->zg_nvertex * sizeof (char *));
+	if ((zgp = construct_graph(hdl, dataset)) == NULL)
+		return (NULL);
 
-	zvp = zfs_graph_lookup(zgp, dataset, 0);
+	if ((result = zfs_alloc(hdl,
+	    zgp->zg_nvertex * sizeof (char *))) == NULL) {
+		zfs_graph_destroy(zgp);
+		return (NULL);
+	}
+
+	if ((zvp = zfs_graph_lookup(hdl, zgp, dataset, 0)) == NULL) {
+		free(result);
+		zfs_graph_destroy(zgp);
+		return (NULL);
+	}
 
 	*count = 0;
-	topo_sort(result, count, zvp);
+	if (topo_sort(hdl, result, count, zvp) != 0) {
+		free(result);
+		zfs_graph_destroy(zgp);
+		return (NULL);
+	}
 
 	/*
 	 * Get rid of the last entry, which is our starting vertex and not
diff --git a/usr/src/lib/libzfs/common/libzfs_impl.h b/usr/src/lib/libzfs/common/libzfs_impl.h
index 76bca21242..2c5e890767 100644
--- a/usr/src/lib/libzfs/common/libzfs_impl.h
+++ b/usr/src/lib/libzfs/common/libzfs_impl.h
@@ -34,13 +34,29 @@
 #include <sys/zfs_acl.h>
 #include <sys/nvpair.h>
 
+#include <libuutil.h>
 #include <libzfs.h>
 
 #ifdef	__cplusplus
 extern "C" {
 #endif
 
+struct libzfs_handle {
+	int libzfs_error;
+	int libzfs_fd;
+	FILE *libzfs_mnttab;
+	FILE *libzfs_sharetab;
+	uu_avl_pool_t *libzfs_ns_avlpool;
+	uu_avl_t *libzfs_ns_avl;
+	uint64_t libzfs_ns_gen;
+	int libzfs_desc_active;
+	char libzfs_action[1024];
+	char libzfs_desc[1024];
+	int libzfs_printerr;
+};
+
 struct zfs_handle {
+	libzfs_handle_t *zfs_hdl;
 	char zfs_name[ZFS_MAXNAMELEN];
 	zfs_type_t zfs_type;
 	dmu_objset_stats_t zfs_dmustats;
@@ -52,6 +68,7 @@ struct zfs_handle {
 };
 
 struct zpool_handle {
+	libzfs_handle_t *zpool_hdl;
 	char zpool_name[ZPOOL_MAXNAMELEN];
 	int zpool_state;
 	size_t zpool_config_size;
@@ -61,18 +78,16 @@ struct zpool_handle {
 	size_t zpool_error_count;
 };
 
-void zfs_error(const char *, ...);
-void zfs_fatal(const char *, ...);
-void *zfs_malloc(size_t);
-char *zfs_strdup(const char *);
-void no_memory(void);
+int zfs_error(libzfs_handle_t *, int, const char *, ...);
+void zfs_error_aux(libzfs_handle_t *, const char *, ...);
+void *zfs_alloc(libzfs_handle_t *, size_t);
+char *zfs_strdup(libzfs_handle_t *, const char *);
+int no_memory(libzfs_handle_t *);
 
-#define	zfs_baderror(err)						\
-	(zfs_fatal(dgettext(TEXT_DOMAIN,				\
-	"internal error: unexpected error %d at line %d of %s"),	\
-	(err), (__LINE__), (__FILE__)))
+int zfs_standard_error(libzfs_handle_t *, int, const char *, ...);
+int zpool_standard_error(libzfs_handle_t *, int, const char *, ...);
 
-char **get_dependents(const char *, size_t *);
+char **get_dependents(libzfs_handle_t *, const char *, size_t *);
 
 typedef struct prop_changelist prop_changelist_t;
 
@@ -87,17 +102,15 @@ int changelist_haszonedchild(prop_changelist_t *);
 
 void remove_mountpoint(zfs_handle_t *);
 
-zfs_handle_t *make_dataset_handle(const char *);
-void set_pool_health(nvlist_t *);
+zfs_handle_t *make_dataset_handle(libzfs_handle_t *, const char *);
+int set_pool_health(nvlist_t *);
 
-zpool_handle_t *zpool_open_silent(const char *);
+zpool_handle_t *zpool_open_silent(libzfs_handle_t *, const char *);
 
-int zvol_create_link(const char *);
-int zvol_remove_link(const char *);
+int zvol_create_link(libzfs_handle_t *, const char *);
+int zvol_remove_link(libzfs_handle_t *, const char *);
 
-int zfs_ioctl(int, zfs_cmd_t *);
-FILE *zfs_mnttab(void);
-FILE *zfs_sharetab(void);
+void namespace_clear(libzfs_handle_t *);
 
 #ifdef	__cplusplus
 }
diff --git a/usr/src/lib/libzfs/common/libzfs_import.c b/usr/src/lib/libzfs/common/libzfs_import.c
index 98519c3aae..ef34419146 100644
--- a/usr/src/lib/libzfs/common/libzfs_import.c
+++ b/usr/src/lib/libzfs/common/libzfs_import.c
@@ -78,7 +78,7 @@ typedef struct pool_entry {
 } pool_entry_t;
 
 typedef struct name_entry {
-	const char		*ne_name;
+	char			*ne_name;
 	uint64_t		ne_guid;
 	struct name_entry	*ne_next;
 } name_entry_t;
@@ -117,7 +117,7 @@ get_devid(const char *path)
  * Go through and fix up any path and/or devid information for the given vdev
  * configuration.
  */
-static void
+static int
 fix_paths(nvlist_t *nv, name_entry_t *names)
 {
 	nvlist_t **child;
@@ -130,8 +130,9 @@ fix_paths(nvlist_t *nv, name_entry_t *names)
 	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
 	    &child, &children) == 0) {
 		for (c = 0; c < children; c++)
-			fix_paths(child[c], names);
-		return;
+			if (fix_paths(child[c], names) != 0)
+				return (-1);
+		return (0);
 	}
 
 	/*
@@ -182,31 +183,56 @@ fix_paths(nvlist_t *nv, name_entry_t *names)
 	}
 
 	if (best == NULL)
-		return;
+		return (0);
 
-	verify(nvlist_add_string(nv, ZPOOL_CONFIG_PATH, best->ne_name) == 0);
+	if (nvlist_add_string(nv, ZPOOL_CONFIG_PATH, best->ne_name) != 0)
+		return (-1);
 
 	if ((devid = get_devid(best->ne_name)) == NULL) {
 		(void) nvlist_remove_all(nv, ZPOOL_CONFIG_DEVID);
 	} else {
-		verify(nvlist_add_string(nv, ZPOOL_CONFIG_DEVID, devid) == 0);
+		if (nvlist_add_string(nv, ZPOOL_CONFIG_DEVID, devid) != 0)
+			return (-1);
 		devid_str_free(devid);
 	}
+
+	return (0);
 }
 
 /*
  * Add the given configuration to the list of known devices.
  */
-static void
-add_config(pool_list_t *pl, const char *path, nvlist_t *config)
+static int
+add_config(libzfs_handle_t *hdl, pool_list_t *pl, const char *path,
+    nvlist_t *config)
 {
-	uint64_t pool_guid, vdev_guid, top_guid, txg;
+	uint64_t pool_guid, vdev_guid, top_guid, txg, state;
 	pool_entry_t *pe;
 	vdev_entry_t *ve;
 	config_entry_t *ce;
 	name_entry_t *ne;
 
 	/*
+	 * If this is a hot spare not currently in use, add it to the list of
+	 * names to translate, but don't do anything else.
+	 */
+	if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE,
+	    &state) == 0 && state == POOL_STATE_SPARE &&
+	    nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID, &vdev_guid) == 0) {
+		if ((ne = zfs_alloc(hdl, sizeof (name_entry_t))) == NULL)
+		    return (-1);
+
+		if ((ne->ne_name = zfs_strdup(hdl, path)) == NULL) {
+			free(ne);
+			return (-1);
+		}
+		ne->ne_guid = vdev_guid;
+		ne->ne_next = pl->names;
+		pl->names = ne;
+		return (0);
+	}
+
+	/*
 	 * If we have a valid config but cannot read any of these fields, then
 	 * it means we have a half-initialized label.  In vdev_label_init()
 	 * we write a label with txg == 0 so that we can identify the device
@@ -223,7 +249,7 @@ add_config(pool_list_t *pl, const char *path, nvlist_t *config)
 	    nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG,
 	    &txg) != 0 || txg == 0) {
 		nvlist_free(config);
-		return;
+		return (0);
 	}
 
 	/*
@@ -236,7 +262,10 @@ add_config(pool_list_t *pl, const char *path, nvlist_t *config)
 	}
 
 	if (pe == NULL) {
-		pe = zfs_malloc(sizeof (pool_entry_t));
+		if ((pe = zfs_alloc(hdl, sizeof (pool_entry_t))) == NULL) {
+			nvlist_free(config);
+			return (-1);
+		}
 		pe->pe_guid = pool_guid;
 		pe->pe_next = pl->pools;
 		pl->pools = pe;
@@ -252,7 +281,10 @@ add_config(pool_list_t *pl, const char *path, nvlist_t *config)
 	}
 
 	if (ve == NULL) {
-		ve = zfs_malloc(sizeof (vdev_entry_t));
+		if ((ve = zfs_alloc(hdl, sizeof (vdev_entry_t))) == NULL) {
+			nvlist_free(config);
+			return (-1);
+		}
 		ve->ve_guid = top_guid;
 		ve->ve_next = pe->pe_vdevs;
 		pe->pe_vdevs = ve;
@@ -269,7 +301,10 @@ add_config(pool_list_t *pl, const char *path, nvlist_t *config)
 	}
 
 	if (ce == NULL) {
-		ce = zfs_malloc(sizeof (config_entry_t));
+		if ((ce = zfs_alloc(hdl, sizeof (config_entry_t))) == NULL) {
+			nvlist_free(config);
+			return (-1);
+		}
 		ce->ce_txg = txg;
 		ce->ce_config = config;
 		ce->ce_next = ve->ve_configs;
@@ -284,24 +319,31 @@ add_config(pool_list_t *pl, const char *path, nvlist_t *config)
 	 * mappings so that we can fix up the configuration as necessary before
 	 * doing the import.
 	 */
-	ne = zfs_malloc(sizeof (name_entry_t));
+	if ((ne = zfs_alloc(hdl, sizeof (name_entry_t))) == NULL)
+		return (-1);
+
+	if ((ne->ne_name = zfs_strdup(hdl, path)) == NULL) {
+		free(ne);
+		return (-1);
+	}
 
-	ne->ne_name = zfs_strdup(path);
 	ne->ne_guid = vdev_guid;
 	ne->ne_next = pl->names;
 	pl->names = ne;
+
+	return (0);
 }
 
 /*
  * Returns true if the named pool matches the given GUID.
  */
-boolean_t
-pool_active(const char *name, uint64_t guid)
+static boolean_t
+pool_active(libzfs_handle_t *hdl, const char *name, uint64_t guid)
 {
 	zpool_handle_t *zhp;
 	uint64_t theguid;
 
-	if ((zhp = zpool_open_silent(name)) == NULL)
+	if ((zhp = zpool_open_silent(hdl, name)) == NULL)
 		return (B_FALSE);
 
 	verify(nvlist_lookup_uint64(zhp->zpool_config, ZPOOL_CONFIG_POOL_GUID,
@@ -320,41 +362,42 @@ pool_active(const char *name, uint64_t guid)
  * return to the user.
  */
 static nvlist_t *
-get_configs(pool_list_t *pl)
+get_configs(libzfs_handle_t *hdl, pool_list_t *pl)
 {
-	pool_entry_t *pe, *penext;
-	vdev_entry_t *ve, *venext;
-	config_entry_t *ce, *cenext;
-	nvlist_t *ret, *config, *tmp, *nvtop, *nvroot;
-	int config_seen;
+	pool_entry_t *pe;
+	vdev_entry_t *ve;
+	config_entry_t *ce;
+	nvlist_t *ret = NULL, *config = NULL, *tmp, *nvtop, *nvroot;
+	nvlist_t **spares;
+	uint_t i, nspares;
+	boolean_t config_seen;
 	uint64_t best_txg;
 	char *name;
 	zfs_cmd_t zc = { 0 };
-	uint64_t guid;
+	uint64_t version, guid;
 	char *packed;
 	size_t len;
 	int err;
+	uint_t children = 0;
+	nvlist_t **child = NULL;
+	uint_t c;
 
-	verify(nvlist_alloc(&ret, 0, 0) == 0);
+	if (nvlist_alloc(&ret, 0, 0) != 0)
+		goto nomem;
 
-	for (pe = pl->pools; pe != NULL; pe = penext) {
-		uint_t c;
-		uint_t children = 0;
+	for (pe = pl->pools; pe != NULL; pe = pe->pe_next) {
 		uint64_t id;
-		nvlist_t **child = NULL;
 
-		penext = pe->pe_next;
-
-		verify(nvlist_alloc(&config, NV_UNIQUE_NAME, 0) == 0);
-		config_seen = FALSE;
+		if (nvlist_alloc(&config, NV_UNIQUE_NAME, 0) != 0)
+			goto nomem;
+		config_seen = B_FALSE;
 
 		/*
 		 * Iterate over all toplevel vdevs.  Grab the pool configuration
 		 * from the first one we find, and then go through the rest and
 		 * add them as necessary to the 'vdevs' member of the config.
 		 */
-		for (ve = pe->pe_vdevs; ve != NULL; ve = venext) {
-			venext = ve->ve_next;
+		for (ve = pe->pe_vdevs; ve != NULL; ve = ve->ve_next) {
 
 			/*
 			 * Determine the best configuration for this vdev by
@@ -365,8 +408,10 @@ get_configs(pool_list_t *pl)
 			for (ce = ve->ve_configs; ce != NULL;
 			    ce = ce->ce_next) {
 
-				if (ce->ce_txg > best_txg)
+				if (ce->ce_txg > best_txg) {
 					tmp = ce->ce_config;
+					best_txg = ce->ce_txg;
+				}
 			}
 
 			if (!config_seen) {
@@ -374,6 +419,7 @@ get_configs(pool_list_t *pl)
 				 * Copy the relevant pieces of data to the pool
 				 * configuration:
 				 *
+				 *	version
 				 * 	pool guid
 				 * 	name
 				 * 	pool state
@@ -381,19 +427,27 @@ get_configs(pool_list_t *pl)
 				uint64_t state;
 
 				verify(nvlist_lookup_uint64(tmp,
+				    ZPOOL_CONFIG_VERSION, &version) == 0);
+				if (nvlist_add_uint64(config,
+				    ZPOOL_CONFIG_VERSION, version) != 0)
+					goto nomem;
+				verify(nvlist_lookup_uint64(tmp,
 				    ZPOOL_CONFIG_POOL_GUID, &guid) == 0);
-				verify(nvlist_add_uint64(config,
-				    ZPOOL_CONFIG_POOL_GUID, guid) == 0);
+				if (nvlist_add_uint64(config,
+				    ZPOOL_CONFIG_POOL_GUID, guid) != 0)
+					goto nomem;
 				verify(nvlist_lookup_string(tmp,
 				    ZPOOL_CONFIG_POOL_NAME, &name) == 0);
-				verify(nvlist_add_string(config,
-				    ZPOOL_CONFIG_POOL_NAME, name) == 0);
+				if (nvlist_add_string(config,
+				    ZPOOL_CONFIG_POOL_NAME, name) != 0)
+					goto nomem;
 				verify(nvlist_lookup_uint64(tmp,
 				    ZPOOL_CONFIG_POOL_STATE, &state) == 0);
-				verify(nvlist_add_uint64(config,
-				    ZPOOL_CONFIG_POOL_STATE, state) == 0);
+				if (nvlist_add_uint64(config,
+				    ZPOOL_CONFIG_POOL_STATE, state) != 0)
+					goto nomem;
 
-				config_seen = TRUE;
+				config_seen = B_TRUE;
 			}
 
 			/*
@@ -406,8 +460,10 @@ get_configs(pool_list_t *pl)
 			if (id >= children) {
 				nvlist_t **newchild;
 
-				newchild = zfs_malloc((id + 1) *
+				newchild = zfs_alloc(hdl, (id + 1) *
 				    sizeof (nvlist_t *));
+				if (newchild == NULL)
+					goto nomem;
 
 				for (c = 0; c < children; c++)
 					newchild[c] = child[c];
@@ -416,23 +472,9 @@ get_configs(pool_list_t *pl)
 				child = newchild;
 				children = id + 1;
 			}
-			verify(nvlist_dup(nvtop, &child[id], 0) == 0);
+			if (nvlist_dup(nvtop, &child[id], 0) != 0)
+				goto nomem;
 
-			/*
-			 * Go through and free all config information.
-			 */
-			for (ce = ve->ve_configs; ce != NULL; ce = cenext) {
-				cenext = ce->ce_next;
-
-				nvlist_free(ce->ce_config);
-				free(ce);
-			}
-
-			/*
-			 * Free this vdev entry, since it has now been merged
-			 * into the main config.
-			 */
-			free(ve);
 		}
 
 		verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
@@ -448,51 +490,63 @@ get_configs(pool_list_t *pl)
 		for (c = 0; c < children; c++)
 			if (child[c] == NULL) {
 				nvlist_t *missing;
-				verify(nvlist_alloc(&missing, NV_UNIQUE_NAME,
-				    0) == 0);
-				verify(nvlist_add_string(missing,
-				    ZPOOL_CONFIG_TYPE, VDEV_TYPE_MISSING) == 0);
-				verify(nvlist_add_uint64(missing,
-				    ZPOOL_CONFIG_ID, c) == 0);
-				verify(nvlist_add_uint64(missing,
-				    ZPOOL_CONFIG_GUID, 0ULL) == 0);
+				if (nvlist_alloc(&missing, NV_UNIQUE_NAME,
+				    0) != 0)
+					goto nomem;
+				if (nvlist_add_string(missing,
+				    ZPOOL_CONFIG_TYPE,
+				    VDEV_TYPE_MISSING) != 0 ||
+				    nvlist_add_uint64(missing,
+				    ZPOOL_CONFIG_ID, c) != 0 ||
+				    nvlist_add_uint64(missing,
+				    ZPOOL_CONFIG_GUID, 0ULL) != 0) {
+					nvlist_free(missing);
+					goto nomem;
+				}
 				child[c] = missing;
 			}
 
 		/*
 		 * Put all of this pool's top-level vdevs into a root vdev.
 		 */
-		verify(nvlist_alloc(&nvroot, NV_UNIQUE_NAME, 0) == 0);
-		verify(nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE,
-		    VDEV_TYPE_ROOT) == 0);
-		verify(nvlist_add_uint64(nvroot, ZPOOL_CONFIG_ID, 0ULL) == 0);
-		verify(nvlist_add_uint64(nvroot, ZPOOL_CONFIG_GUID, guid) == 0);
-		verify(nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
-		    child, children) == 0);
+		if (nvlist_alloc(&nvroot, NV_UNIQUE_NAME, 0) != 0)
+			goto nomem;
+		if (nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE,
+		    VDEV_TYPE_ROOT) != 0 ||
+		    nvlist_add_uint64(nvroot, ZPOOL_CONFIG_ID, 0ULL) != 0 ||
+		    nvlist_add_uint64(nvroot, ZPOOL_CONFIG_GUID, guid) != 0 ||
+		    nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
+		    child, children) != 0) {
+			nvlist_free(nvroot);
+			goto nomem;
+		}
 
 		for (c = 0; c < children; c++)
 			nvlist_free(child[c]);
 		free(child);
+		children = 0;
+		child = NULL;
 
 		/*
 		 * Go through and fix up any paths and/or devids based on our
 		 * known list of vdev GUID -> path mappings.
 		 */
-		fix_paths(nvroot, pl->names);
+		if (fix_paths(nvroot, pl->names) != 0) {
+			nvlist_free(nvroot);
+			goto nomem;
+		}
 
 		/*
 		 * Add the root vdev to this pool's configuration.
 		 */
-		verify(nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
-		    nvroot) == 0);
+		if (nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
+		    nvroot) != 0) {
+			nvlist_free(nvroot);
+			goto nomem;
+		}
 		nvlist_free(nvroot);
 
 		/*
-		 * Free this pool entry.
-		 */
-		free(pe);
-
-		/*
 		 * Determine if this pool is currently active, in which case we
 		 * can't actually import it.
 		 */
@@ -501,8 +555,9 @@ get_configs(pool_list_t *pl)
 		verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
 		    &guid) == 0);
 
-		if (pool_active(name, guid)) {
+		if (pool_active(hdl, name, guid)) {
 			nvlist_free(config);
+			config = NULL;
 			continue;
 		}
 
@@ -510,13 +565,14 @@ get_configs(pool_list_t *pl)
 		 * Try to do the import in order to get vdev state.
 		 */
 		if ((err = nvlist_size(config, &len, NV_ENCODE_NATIVE)) != 0)
-			zfs_baderror(err);
+			goto nomem;
 
-		packed = zfs_malloc(len);
+		if ((packed = zfs_alloc(hdl, len)) == NULL)
+			goto nomem;
 
 		if ((err = nvlist_pack(config, &packed, &len,
 		    NV_ENCODE_NATIVE, 0)) != 0)
-			zfs_baderror(err);
+			goto nomem;
 
 		nvlist_free(config);
 		config = NULL;
@@ -525,37 +581,76 @@ get_configs(pool_list_t *pl)
 		zc.zc_config_src = (uint64_t)(uintptr_t)packed;
 
 		zc.zc_config_dst_size = 2 * len;
-		zc.zc_config_dst = (uint64_t)(uintptr_t)
-		    zfs_malloc(zc.zc_config_dst_size);
+		if ((zc.zc_config_dst = (uint64_t)(uintptr_t)
+		    zfs_alloc(hdl, zc.zc_config_dst_size)) == NULL)
+			goto nomem;
 
-		while ((err = zfs_ioctl(ZFS_IOC_POOL_TRYIMPORT,
+		while ((err = ioctl(hdl->libzfs_fd, ZFS_IOC_POOL_TRYIMPORT,
 		    &zc)) != 0 && errno == ENOMEM) {
 			free((void *)(uintptr_t)zc.zc_config_dst);
-			zc.zc_config_dst = (uint64_t)(uintptr_t)
-			    zfs_malloc(zc.zc_config_dst_size);
+			if ((zc.zc_config_dst = (uint64_t)(uintptr_t)
+			    zfs_alloc(hdl, zc.zc_config_dst_size)) == NULL)
+				goto nomem;
 		}
 
 		free(packed);
 
-		if (err)
-			zfs_baderror(errno);
+		if (err) {
+			(void) zpool_standard_error(hdl, errno,
+			    dgettext(TEXT_DOMAIN, "cannot discover pools"));
+			free((void *)(uintptr_t)zc.zc_config_dst);
+			goto error;
+		}
 
-		verify(nvlist_unpack((void *)(uintptr_t)zc.zc_config_dst,
-		    zc.zc_config_dst_size, &config, 0) == 0);
+		if (nvlist_unpack((void *)(uintptr_t)zc.zc_config_dst,
+		    zc.zc_config_dst_size, &config, 0) != 0) {
+			free((void *)(uintptr_t)zc.zc_config_dst);
+			goto nomem;
+		}
+		free((void *)(uintptr_t)zc.zc_config_dst);
 
-		set_pool_health(config);
+		/*
+		 * Go through and update the paths for spares, now that we have
+		 * them.
+		 */
+		verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
+		    &nvroot) == 0);
+		if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
+		    &spares, &nspares) == 0) {
+			for (i = 0; i < nspares; i++) {
+				if (fix_paths(spares[i], pl->names) != 0)
+					goto nomem;
+			}
+		}
+
+		if (set_pool_health(config) != 0)
+			goto nomem;
 
 		/*
 		 * Add this pool to the list of configs.
 		 */
-		verify(nvlist_add_nvlist(ret, name, config) == 0);
+		if (nvlist_add_nvlist(ret, name, config) != 0)
+			goto nomem;
 
 		nvlist_free(config);
-
-		free((void *)(uintptr_t)zc.zc_config_dst);
+		config = NULL;
 	}
 
 	return (ret);
+
+nomem:
+	(void) no_memory(hdl);
+error:
+	if (config)
+		nvlist_free(config);
+	if (ret)
+		nvlist_free(ret);
+	for (c = 0; c < children; c++)
+		nvlist_free(child[c]);
+	if (child)
+		free(child);
+
+	return (NULL);
 }
 
 /*
@@ -572,19 +667,21 @@ label_offset(size_t size, int l)
  * Given a file descriptor, read the label information and return an nvlist
  * describing the configuration, if there is one.
  */
-nvlist_t *
-zpool_read_label(int fd)
+int
+zpool_read_label(int fd, nvlist_t **config)
 {
 	struct stat64 statbuf;
 	int l;
 	vdev_label_t *label;
-	nvlist_t *config;
 	uint64_t state, txg;
 
+	*config = NULL;
+
 	if (fstat64(fd, &statbuf) == -1)
-		return (NULL);
+		return (0);
 
-	label = zfs_malloc(sizeof (vdev_label_t));
+	if ((label = malloc(sizeof (vdev_label_t))) == NULL)
+		return (-1);
 
 	for (l = 0; l < VDEV_LABELS; l++) {
 		if (pread(fd, label, sizeof (vdev_label_t),
@@ -592,27 +689,29 @@ zpool_read_label(int fd)
 			continue;
 
 		if (nvlist_unpack(label->vl_vdev_phys.vp_nvlist,
-		    sizeof (label->vl_vdev_phys.vp_nvlist), &config, 0) != 0)
+		    sizeof (label->vl_vdev_phys.vp_nvlist), config, 0) != 0)
 			continue;
 
-		if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE,
-		    &state) != 0 || state > POOL_STATE_DESTROYED) {
-			nvlist_free(config);
+		if (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_STATE,
+		    &state) != 0 || state > POOL_STATE_SPARE) {
+			nvlist_free(*config);
 			continue;
 		}
 
-		if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG,
-		    &txg) != 0 || txg == 0) {
-			nvlist_free(config);
+		if (state != POOL_STATE_SPARE &&
+		    (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_TXG,
+		    &txg) != 0 || txg == 0)) {
+			nvlist_free(*config);
 			continue;
 		}
 
 		free(label);
-		return (config);
+		return (0);
 	}
 
 	free(label);
-	return (NULL);
+	*config = NULL;
+	return (0);
 }
 
 /*
@@ -621,17 +720,22 @@ zpool_read_label(int fd)
  * given (argc is 0), then the default directory (/dev/dsk) is searched.
  */
 nvlist_t *
-zpool_find_import(int argc, char **argv)
+zpool_find_import(libzfs_handle_t *hdl, int argc, char **argv)
 {
 	int i;
 	DIR *dirp;
 	struct dirent64 *dp;
 	char path[MAXPATHLEN];
 	struct stat64 statbuf;
-	nvlist_t *ret, *config;
+	nvlist_t *ret = NULL, *config;
 	static char *default_dir = "/dev/dsk";
 	int fd;
 	pool_list_t pools = { 0 };
+	pool_entry_t *pe, *penext;
+	vdev_entry_t *ve, *venext;
+	config_entry_t *ce, *cenext;
+	name_entry_t *ne, *nenext;
+
 
 	if (argc == 0) {
 		argc = 1;
@@ -645,17 +749,18 @@ zpool_find_import(int argc, char **argv)
 	 */
 	for (i = 0; i < argc; i++) {
 		if (argv[i][0] != '/') {
-			zfs_error(dgettext(TEXT_DOMAIN,
-			    "cannot open '%s': must be an absolute path"),
+			(void) zfs_error(hdl, EZFS_BADPATH,
+			    dgettext(TEXT_DOMAIN, "cannot open '%s'"),
 			    argv[i]);
-			return (NULL);
+			goto error;
 		}
 
 		if ((dirp = opendir(argv[i])) == NULL) {
-			zfs_error(dgettext(TEXT_DOMAIN,
-			    "cannot open '%s': %s"), argv[i],
-			    strerror(errno));
-			return (NULL);
+			zfs_error_aux(hdl, strerror(errno));
+			(void) zfs_error(hdl, EZFS_BADPATH,
+			    dgettext(TEXT_DOMAIN, "cannot open '%s'"),
+			    argv[i]);
+			goto error;
 		}
 
 		/*
@@ -678,21 +783,49 @@ zpool_find_import(int argc, char **argv)
 			if ((fd = open64(path, O_RDONLY)) < 0)
 				continue;
 
-			config = zpool_read_label(fd);
+			if ((zpool_read_label(fd, &config)) != 0) {
+				(void) no_memory(hdl);
+				goto error;
+			}
 
 			(void) close(fd);
 
 			if (config != NULL)
-				add_config(&pools, path, config);
+				if (add_config(hdl, &pools, path, config) != 0)
+					goto error;
 		}
 	}
 
-	ret = get_configs(&pools);
+	ret = get_configs(hdl, &pools);
+
+error:
+	for (pe = pools.pools; pe != NULL; pe = penext) {
+		penext = pe->pe_next;
+		for (ve = pe->pe_vdevs; ve != NULL; ve = venext) {
+			venext = ve->ve_next;
+			for (ce = ve->ve_configs; ce != NULL; ce = cenext) {
+				cenext = ce->ce_next;
+				if (ce->ce_config)
+					nvlist_free(ce->ce_config);
+				free(ce);
+			}
+			free(ve);
+		}
+		free(pe);
+	}
+
+	for (ne = pools.names; ne != NULL; ne = nenext) {
+		nenext = ne->ne_next;
+		if (ne->ne_name)
+			free(ne->ne_name);
+		free(ne);
+	}
+
 
 	return (ret);
 }
 
-int
+boolean_t
 find_guid(nvlist_t *nv, uint64_t guid)
 {
 	uint64_t tmp;
@@ -701,49 +834,94 @@ find_guid(nvlist_t *nv, uint64_t guid)
 
 	verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &tmp) == 0);
 	if (tmp == guid)
-		return (TRUE);
+		return (B_TRUE);
 
 	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
 	    &child, &children) == 0) {
 		for (c = 0; c < children; c++)
 			if (find_guid(child[c], guid))
-				return (TRUE);
+				return (B_TRUE);
+	}
+
+	return (B_FALSE);
+}
+
+typedef struct spare_cbdata {
+	uint64_t	cb_guid;
+	zpool_handle_t	*cb_zhp;
+} spare_cbdata_t;
+
+static int
+find_spare(zpool_handle_t *zhp, void *data)
+{
+	spare_cbdata_t *cbp = data;
+	nvlist_t **spares;
+	uint_t i, nspares;
+	uint64_t guid;
+	nvlist_t *nvroot;
+
+	verify(nvlist_lookup_nvlist(zhp->zpool_config, ZPOOL_CONFIG_VDEV_TREE,
+	    &nvroot) == 0);
+
+	if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
+	    &spares, &nspares) == 0) {
+		for (i = 0; i < nspares; i++) {
+			verify(nvlist_lookup_uint64(spares[i],
+			    ZPOOL_CONFIG_GUID, &guid) == 0);
+			if (guid == cbp->cb_guid) {
+				cbp->cb_zhp = zhp;
+				return (1);
+			}
+		}
 	}
 
-	return (FALSE);
+	zpool_close(zhp);
+	return (0);
 }
 
 /*
- * Determines if the pool is in use.  If so, it returns TRUE and the state of
+ * Determines if the pool is in use.  If so, it returns true and the state of
  * the pool as well as the name of the pool.  Both strings are allocated and
  * must be freed by the caller.
  */
 int
-zpool_in_use(int fd, pool_state_t *state, char **namestr)
+zpool_in_use(libzfs_handle_t *hdl, int fd, pool_state_t *state, char **namestr,
+    boolean_t *inuse)
 {
 	nvlist_t *config;
 	char *name;
-	int ret;
+	boolean_t ret;
 	uint64_t guid, vdev_guid;
 	zpool_handle_t *zhp;
 	nvlist_t *pool_config;
 	uint64_t stateval;
+	spare_cbdata_t cb = { 0 };
+
+	*inuse = B_FALSE;
 
-	if ((config = zpool_read_label(fd)) == NULL)
-		return (FALSE);
+	if (zpool_read_label(fd, &config) != 0) {
+		(void) no_memory(hdl);
+		return (-1);
+	}
+
+	if (config == NULL)
+		return (0);
 
-	verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME,
-	    &name) == 0);
 	verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE,
 	    &stateval) == 0);
-	verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
-	    &guid) == 0);
 	verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID,
 	    &vdev_guid) == 0);
 
+	if (stateval != POOL_STATE_SPARE) {
+		verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME,
+		    &name) == 0);
+		verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
+		    &guid) == 0);
+	}
+
 	switch (stateval) {
 	case POOL_STATE_EXPORTED:
-		ret = TRUE;
+		ret = B_TRUE;
 		break;
 
 	case POOL_STATE_ACTIVE:
@@ -754,14 +932,14 @@ zpool_in_use(int fd, pool_state_t *state, char **namestr)
 		 * active pool that was disconnected without being explicitly
 		 * exported.
 		 */
-		if (pool_active(name, guid)) {
+		if (pool_active(hdl, name, guid)) {
 			/*
 			 * Because the device may have been removed while
 			 * offlined, we only report it as active if the vdev is
 			 * still present in the config.  Otherwise, pretend like
 			 * it's not in use.
 			 */
-			if ((zhp = zpool_open_canfail(name)) != NULL &&
+			if ((zhp = zpool_open_canfail(hdl, name)) != NULL &&
 			    (pool_config = zpool_get_config(zhp, NULL))
 			    != NULL) {
 				nvlist_t *nvroot;
@@ -770,24 +948,57 @@ zpool_in_use(int fd, pool_state_t *state, char **namestr)
 				    ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
 				ret = find_guid(nvroot, vdev_guid);
 			} else {
-				ret = FALSE;
+				ret = B_FALSE;
 			}
+
+			if (zhp != NULL)
+				zpool_close(zhp);
 		} else {
 			stateval = POOL_STATE_POTENTIALLY_ACTIVE;
+			ret = B_TRUE;
+		}
+		break;
+
+	case POOL_STATE_SPARE:
+		/*
+		 * For a hot spare, it can be either definitively in use, or
+		 * potentially active.  To determine if it's in use, we iterate
+		 * over all pools in the system and search for one with a spare
+		 * with a matching guid.
+		 *
+		 * Due to the shared nature of spares, we don't actually report
+		 * the potentially active case as in use.  This means the user
+		 * can freely create pools on the hot spares of exported pools,
+		 * but to do otherwise makes the resulting code complicated, and
+		 * we end up having to deal with this case anyway.
+		 */
+		cb.cb_zhp = NULL;
+		cb.cb_guid = vdev_guid;
+		if (zpool_iter(hdl, find_spare, &cb) == 1) {
+			name = (char *)zpool_get_name(cb.cb_zhp);
 			ret = TRUE;
+		} else {
+			ret = FALSE;
 		}
 		break;
 
 	default:
-		ret = FALSE;
+		ret = B_FALSE;
 	}
 
 
 	if (ret) {
-		*namestr = zfs_strdup(name);
+		if ((*namestr = zfs_strdup(hdl, name)) == NULL) {
+			nvlist_free(config);
+			return (-1);
+		}
 		*state = (pool_state_t)stateval;
 	}
 
+	if (cb.cb_zhp)
+		zpool_close(cb.cb_zhp);
+
 	nvlist_free(config);
-	return (ret);
+	*inuse = ret;
+	return (0);
 }
diff --git a/usr/src/lib/libzfs/common/libzfs_mount.c b/usr/src/lib/libzfs/common/libzfs_mount.c
index ae4a9937a8..894bcc0d03 100644
--- a/usr/src/lib/libzfs/common/libzfs_mount.c
+++ b/usr/src/lib/libzfs/common/libzfs_mount.c
@@ -63,44 +63,44 @@
 #include "libzfs_impl.h"
 
 /*
- * Search the sharetab for the given mountpoint, returning TRUE if it is found.
+ * Search the sharetab for the given mountpoint, returning true if it is found.
  */
-static int
-is_shared(const char *mountpoint)
+static boolean_t
+is_shared(libzfs_handle_t *hdl, const char *mountpoint)
 {
 	char buf[MAXPATHLEN], *tab;
 
-	if (zfs_sharetab() == NULL)
+	if (hdl->libzfs_sharetab == NULL)
 		return (0);
 
-	(void) fseek(zfs_sharetab(), 0, SEEK_SET);
+	(void) fseek(hdl->libzfs_sharetab, 0, SEEK_SET);
 
-	while (fgets(buf, sizeof (buf), zfs_sharetab()) != NULL) {
+	while (fgets(buf, sizeof (buf), hdl->libzfs_sharetab) != NULL) {
 
 		/* the mountpoint is the first entry on each line */
 		if ((tab = strchr(buf, '\t')) != NULL) {
 			*tab = '\0';
 			if (strcmp(buf, mountpoint) == 0)
-				return (1);
+				return (B_TRUE);
 		}
 	}
 
-	return (0);
+	return (B_FALSE);
 }
 
 /*
- * Returns TRUE if the specified directory is empty.  If we can't open the
- * directory at all, return TRUE so that the mount can fail with a more
+ * Returns true if the specified directory is empty.  If we can't open the
+ * directory at all, return true so that the mount can fail with a more
  * informative error message.
  */
-static int
+static boolean_t
 dir_is_empty(const char *dirname)
 {
 	DIR *dirp;
 	struct dirent64 *dp;
 
 	if ((dirp = opendir(dirname)) == NULL)
-		return (TRUE);
+		return (B_TRUE);
 
 	while ((dp = readdir64(dirp)) != NULL) {
 
@@ -109,11 +109,11 @@ dir_is_empty(const char *dirname)
 			continue;
 
 		(void) closedir(dirp);
-		return (FALSE);
+		return (B_FALSE);
 	}
 
 	(void) closedir(dirp);
-	return (TRUE);
+	return (B_TRUE);
 }
 
 /*
@@ -121,7 +121,7 @@ dir_is_empty(const char *dirname)
  * in 'where' with the current mountpoint, and return 1.  Otherwise, we return
  * 0.
  */
-int
+boolean_t
 zfs_is_mounted(zfs_handle_t *zhp, char **where)
 {
 	struct mnttab search = { 0 }, entry;
@@ -134,14 +134,14 @@ zfs_is_mounted(zfs_handle_t *zhp, char **where)
 	search.mnt_special = (char *)zfs_get_name(zhp);
 	search.mnt_fstype = MNTTYPE_ZFS;
 
-	rewind(zfs_mnttab());
-	if (getmntany(zfs_mnttab(), &entry, &search) != 0)
-		return (FALSE);
+	rewind(zhp->zfs_hdl->libzfs_mnttab);
+	if (getmntany(zhp->zfs_hdl->libzfs_mnttab, &entry, &search) != 0)
+		return (B_FALSE);
 
 	if (where != NULL)
-		*where = zfs_strdup(entry.mnt_mountp);
+		*where = zfs_strdup(zhp->zfs_hdl, entry.mnt_mountp);
 
-	return (TRUE);
+	return (B_TRUE);
 }
 
 /*
@@ -153,6 +153,7 @@ zfs_mount(zfs_handle_t *zhp, const char *options, int flags)
 	struct stat buf;
 	char mountpoint[ZFS_MAXPROPLEN];
 	char mntopts[MNT_LINE_MAX];
+	libzfs_handle_t *hdl = zhp->zfs_hdl;
 
 	if (options == NULL)
 		mntopts[0] = '\0';
@@ -161,7 +162,7 @@ zfs_mount(zfs_handle_t *zhp, const char *options, int flags)
 
 	/* ignore non-filesystems */
 	if (zfs_prop_get(zhp, ZFS_PROP_MOUNTPOINT, mountpoint,
-	    sizeof (mountpoint), NULL, NULL, 0, FALSE) != 0)
+	    sizeof (mountpoint), NULL, NULL, 0, B_FALSE) != 0)
 		return (0);
 
 	/* return success if there is no mountpoint set */
@@ -173,25 +174,18 @@ zfs_mount(zfs_handle_t *zhp, const char *options, int flags)
 	 * If the 'zoned' property is set, and we're in the global zone, simply
 	 * return success.
 	 */
-	if (zfs_prop_get_int(zhp, ZFS_PROP_ZONED)) {
-		char zonename[ZONENAME_MAX];
-		if (getzonenamebyid(getzoneid(), zonename,
-		    sizeof (zonename)) < 0) {
-			zfs_error(dgettext(TEXT_DOMAIN, "internal error: "
-			    "cannot determine current zone"));
-			return (1);
-		}
-
-		if (strcmp(zonename, "global") == 0)
-			return (0);
-	}
+	if (zfs_prop_get_int(zhp, ZFS_PROP_ZONED) &&
+	    getzoneid() == GLOBAL_ZONEID)
+		return (0);
 
 	/* Create the directory if it doesn't already exist */
 	if (lstat(mountpoint, &buf) != 0) {
 		if (mkdirp(mountpoint, 0755) != 0) {
-			zfs_error(dgettext(TEXT_DOMAIN, "cannot mount '%s': "
-			    "unable to create mountpoint"), mountpoint);
-			return (1);
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "failed to create mountpoint"));
+			return (zfs_error(hdl, EZFS_MOUNTFAILED,
+			    dgettext(TEXT_DOMAIN, "cannot mount '%s'"),
+			    mountpoint));
 		}
 	}
 
@@ -204,11 +198,10 @@ zfs_mount(zfs_handle_t *zhp, const char *options, int flags)
 	if ((flags & MS_OVERLAY) == 0 &&
 	    strstr(mntopts, MNTOPT_REMOUNT) == NULL &&
 	    !dir_is_empty(mountpoint)) {
-		zfs_error(dgettext(TEXT_DOMAIN, "cannot mount '%s': "
-		    "directory is not empty"), mountpoint);
-		zfs_error(dgettext(TEXT_DOMAIN, "use legacy mountpoint to "
-		    "allow this behavior, or use the -O flag"));
-		return (1);
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "directory is not empty"));
+		return (zfs_error(hdl, EZFS_MOUNTFAILED,
+		    dgettext(TEXT_DOMAIN, "cannot mount '%s'"), mountpoint));
 	}
 
 	/* perform the mount */
@@ -219,24 +212,15 @@ zfs_mount(zfs_handle_t *zhp, const char *options, int flags)
 		 * from mount(), and they're well-understood.  We pick a few
 		 * common ones to improve upon.
 		 */
-		switch (errno) {
-		case EBUSY:
-			zfs_error(dgettext(TEXT_DOMAIN, "cannot mount '%s': "
-			    "mountpoint or dataset is busy"), zhp->zfs_name);
-			break;
-		case EPERM:
-		case EACCES:
-			zfs_error(dgettext(TEXT_DOMAIN, "cannot mount '%s': "
-			    "permission denied"), zhp->zfs_name,
-			    mountpoint);
-			break;
-		default:
-			zfs_error(dgettext(TEXT_DOMAIN,
-			    "cannot mount '%s': %s"),
-			    mountpoint, strerror(errno));
-			break;
-		}
-		return (1);
+		if (errno == EBUSY)
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "mountpoint or dataset is busy"));
+		else
+			zfs_error_aux(hdl, strerror(errno));
+
+		return (zfs_error(hdl, EZFS_MOUNTFAILED,
+		    dgettext(TEXT_DOMAIN, "cannot mount '%s'"),
+		    zhp->zfs_name));
 	}
 
 	return (0);
@@ -253,9 +237,9 @@ zfs_unmount(zfs_handle_t *zhp, const char *mountpoint, int flags)
 	/* check to see if need to unmount the filesystem */
 	search.mnt_special = (char *)zfs_get_name(zhp);
 	search.mnt_fstype = MNTTYPE_ZFS;
-	rewind(zfs_mnttab());
+	rewind(zhp->zfs_hdl->libzfs_mnttab);
 	if (mountpoint != NULL || ((zfs_get_type(zhp) == ZFS_TYPE_FILESYSTEM) &&
-	    getmntany(zfs_mnttab(), &entry, &search) == 0)) {
+	    getmntany(zhp->zfs_hdl->libzfs_mnttab, &entry, &search) == 0)) {
 
 		if (mountpoint == NULL)
 			mountpoint = entry.mnt_mountp;
@@ -277,10 +261,10 @@ zfs_unmount(zfs_handle_t *zhp, const char *mountpoint, int flags)
 		 * semantics from the kernel.
 		 */
 		if (umount2(mountpoint, flags) != 0) {
-			zfs_error(dgettext(TEXT_DOMAIN,
-			    "cannot unmount '%s': %s"),
-			    mountpoint, strerror(errno));
-			return (-1);
+			zfs_error_aux(zhp->zfs_hdl, strerror(errno));
+			return (zfs_error(zhp->zfs_hdl, EZFS_UMOUNTFAILED,
+			    dgettext(TEXT_DOMAIN, "cannot unmount '%s'"),
+			    mountpoint));
 		}
 
 		/*
@@ -315,23 +299,23 @@ zfs_unmountall(zfs_handle_t *zhp, int flags)
 /*
  * Check to see if the filesystem is currently shared.
  */
-int
+boolean_t
 zfs_is_shared(zfs_handle_t *zhp, char **where)
 {
 	char *mountpoint;
 
 	if (!zfs_is_mounted(zhp, &mountpoint))
-		return (FALSE);
+		return (B_FALSE);
 
-	if (is_shared(mountpoint)) {
+	if (is_shared(zhp->zfs_hdl, mountpoint)) {
 		if (where != NULL)
 			*where = mountpoint;
 		else
 			free(mountpoint);
-		return (TRUE);
+		return (B_TRUE);
 	} else {
 		free(mountpoint);
-		return (FALSE);
+		return (B_FALSE);
 	}
 }
 
@@ -346,6 +330,7 @@ zfs_share(zfs_handle_t *zhp)
 	char shareopts[ZFS_MAXPROPLEN];
 	char buf[MAXPATHLEN];
 	FILE *fp;
+	libzfs_handle_t *hdl = zhp->zfs_hdl;
 
 	/* ignore non-filesystems */
 	if (zfs_get_type(zhp) != ZFS_TYPE_FILESYSTEM)
@@ -353,14 +338,14 @@ zfs_share(zfs_handle_t *zhp)
 
 	/* return success if there is no mountpoint set */
 	if (zfs_prop_get(zhp, ZFS_PROP_MOUNTPOINT,
-	    mountpoint, sizeof (mountpoint), NULL, NULL, 0, FALSE) != 0 ||
+	    mountpoint, sizeof (mountpoint), NULL, NULL, 0, B_FALSE) != 0 ||
 	    strcmp(mountpoint, ZFS_MOUNTPOINT_NONE) == 0 ||
 	    strcmp(mountpoint, ZFS_MOUNTPOINT_LEGACY) == 0)
 		return (0);
 
 	/* return success if there are no share options */
 	if (zfs_prop_get(zhp, ZFS_PROP_SHARENFS, shareopts, sizeof (shareopts),
-	    NULL, NULL, 0, FALSE) != 0 ||
+	    NULL, NULL, 0, B_FALSE) != 0 ||
 	    strcmp(shareopts, "off") == 0)
 		return (0);
 
@@ -386,11 +371,10 @@ zfs_share(zfs_handle_t *zhp)
 		    "-F nfs -o \"%s\" \"%s\" 2>&1", shareopts,
 		    mountpoint);
 
-	if ((fp = popen(buf, "r")) == NULL) {
-		zfs_error(dgettext(TEXT_DOMAIN, "cannot share '%s': "
-		    "share(1M) failed"), zfs_get_name(zhp));
-		return (-1);
-	}
+	if ((fp = popen(buf, "r")) == NULL)
+		return (zfs_error(hdl, EZFS_SHAREFAILED,
+		    dgettext(TEXT_DOMAIN, "cannot share '%s'"),
+		    zfs_get_name(zhp)));
 
 	/*
 	 * share(1M) should only produce output if there is some kind
@@ -403,14 +387,11 @@ zfs_share(zfs_handle_t *zhp)
 		while (buf[strlen(buf) - 1] == '\n')
 			buf[strlen(buf) - 1] = '\0';
 
-		if (colon == NULL)
-			zfs_error(dgettext(TEXT_DOMAIN, "cannot share "
-			    "'%s': share(1M) failed"),
-			    zfs_get_name(zhp));
-		else
-			zfs_error(dgettext(TEXT_DOMAIN, "cannot share "
-			    "'%s': %s"), zfs_get_name(zhp),
-			    colon + 2);
+		if (colon != NULL)
+			zfs_error_aux(hdl, colon + 2);
+
+		(void) zfs_error(hdl, EZFS_SHAREFAILED,
+		    dgettext(TEXT_DOMAIN, "cannot share '%s'"));
 
 		verify(pclose(fp) != 0);
 		return (-1);
@@ -429,30 +410,29 @@ zfs_unshare(zfs_handle_t *zhp, const char *mountpoint)
 {
 	char buf[MAXPATHLEN];
 	struct mnttab search = { 0 }, entry;
+	libzfs_handle_t *hdl = zhp->zfs_hdl;
 
 	/* check to see if need to unmount the filesystem */
 	search.mnt_special = (char *)zfs_get_name(zhp);
 	search.mnt_fstype = MNTTYPE_ZFS;
-	rewind(zfs_mnttab());
+	rewind(zhp->zfs_hdl->libzfs_mnttab);
 	if (mountpoint != NULL || ((zfs_get_type(zhp) == ZFS_TYPE_FILESYSTEM) &&
-	    getmntany(zfs_mnttab(), &entry, &search) == 0)) {
+	    getmntany(zhp->zfs_hdl->libzfs_mnttab, &entry, &search) == 0)) {
 
 		if (mountpoint == NULL)
 			mountpoint = entry.mnt_mountp;
 
-		if (is_shared(mountpoint)) {
+		if (is_shared(zhp->zfs_hdl, mountpoint)) {
 			FILE *fp;
 
 			(void) snprintf(buf, sizeof (buf),
 			    "/usr/sbin/unshare  \"%s\" 2>&1",
 			    mountpoint);
 
-			if ((fp = popen(buf, "r")) == NULL) {
-				zfs_error(dgettext(TEXT_DOMAIN, "cannot "
-				    "unshare '%s': unshare(1M) failed"),
-				    zfs_get_name(zhp));
-				return (-1);
-			}
+			if ((fp = popen(buf, "r")) == NULL)
+				return (zfs_error(hdl, EZFS_UNSHAREFAILED,
+				    dgettext(TEXT_DOMAIN,
+				    "cannot unshare '%s'"), zfs_get_name(zhp)));
 
 			/*
 			 * unshare(1M) should only produce output if there is
@@ -465,17 +445,14 @@ zfs_unshare(zfs_handle_t *zhp, const char *mountpoint)
 				while (buf[strlen(buf) - 1] == '\n')
 					buf[strlen(buf) - 1] = '\0';
 
-				if (colon == NULL)
-					zfs_error(dgettext(TEXT_DOMAIN,
-					    "cannot unshare '%s': unshare(1M) "
-					    "failed"), zfs_get_name(zhp));
-				else
-					zfs_error(dgettext(TEXT_DOMAIN,
-					    "cannot unshare '%s': %s"),
-					    zfs_get_name(zhp), colon + 2);
+				if (colon != NULL)
+					zfs_error_aux(hdl, colon + 2);
 
 				verify(pclose(fp) != 0);
-				return (-1);
+
+				return (zfs_error(hdl, EZFS_UNSHAREFAILED,
+				    dgettext(TEXT_DOMAIN,
+				    "cannot unshare '%s'"), zfs_get_name(zhp)));
 			}
 
 			verify(pclose(fp) == 0);
@@ -521,24 +498,20 @@ remove_mountpoint(zfs_handle_t *zhp)
 	char mountpoint[ZFS_MAXPROPLEN];
 	char source[ZFS_MAXNAMELEN];
 	zfs_source_t sourcetype;
-	char zonename[ZONENAME_MAX];
+	int zoneid = getzoneid();
 
 	/* ignore non-filesystems */
 	if (zfs_prop_get(zhp, ZFS_PROP_MOUNTPOINT, mountpoint,
 	    sizeof (mountpoint), &sourcetype, source, sizeof (source),
-	    FALSE) != 0)
+	    B_FALSE) != 0)
 		return;
 
-	if (getzonenamebyid(getzoneid(), zonename, sizeof (zonename)) < 0)
-		zfs_fatal(dgettext(TEXT_DOMAIN, "internal error: "
-		    "cannot determine current zone"));
-
 	if (strcmp(mountpoint, ZFS_MOUNTPOINT_NONE) != 0 &&
 	    strcmp(mountpoint, ZFS_MOUNTPOINT_LEGACY) != 0 &&
 	    (sourcetype == ZFS_SRC_DEFAULT ||
 	    sourcetype == ZFS_SRC_INHERITED) &&
 	    (!zfs_prop_get_int(zhp, ZFS_PROP_ZONED) ||
-	    strcmp(zonename, "global") != 0)) {
+	    zoneid != GLOBAL_ZONEID)) {
 
 		/*
 		 * Try to remove the directory, silently ignoring any errors.
diff --git a/usr/src/lib/libzfs/common/libzfs_pool.c b/usr/src/lib/libzfs/common/libzfs_pool.c
index 1fe6fa2d27..37c82015b9 100644
--- a/usr/src/lib/libzfs/common/libzfs_pool.c
+++ b/usr/src/lib/libzfs/common/libzfs_pool.c
@@ -18,6 +18,7 @@
  *
  * CDDL HEADER END
  */
+
 /*
  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
@@ -45,8 +46,8 @@
  * Validate the given pool name, optionally putting an extended error message in
  * 'buf'.
  */
-static int
-zpool_name_valid(const char *pool, boolean_t isopen, char *buf, size_t buflen)
+static boolean_t
+zpool_name_valid(libzfs_handle_t *hdl, boolean_t isopen, const char *pool)
 {
 	namecheck_err_t why;
 	char what;
@@ -64,53 +65,52 @@ zpool_name_valid(const char *pool, boolean_t isopen, char *buf, size_t buflen)
 	    (strncmp(pool, "mirror", 6) == 0 ||
 	    strncmp(pool, "raidz", 5) == 0 ||
 	    strncmp(pool, "spare", 5) == 0)) {
-		ret = -1;
-		why = NAME_ERR_RESERVED;
+		zfs_error_aux(hdl,
+		    dgettext(TEXT_DOMAIN, "name is reserved"));
+		return (B_FALSE);
 	}
 
 
 	if (ret != 0) {
-		if (buf != NULL) {
+		if (hdl != NULL) {
 			switch (why) {
 			case NAME_ERR_TOOLONG:
-				(void) snprintf(buf, buflen,
+				zfs_error_aux(hdl,
 				    dgettext(TEXT_DOMAIN, "name is too long"));
 				break;
 
 			case NAME_ERR_INVALCHAR:
-				(void) snprintf(buf, buflen,
+				zfs_error_aux(hdl,
 				    dgettext(TEXT_DOMAIN, "invalid character "
 				    "'%c' in pool name"), what);
 				break;
 
 			case NAME_ERR_NOLETTER:
-				(void) strlcpy(buf, dgettext(TEXT_DOMAIN,
-				    "name must begin with a letter"), buflen);
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "name must begin with a letter"));
 				break;
 
 			case NAME_ERR_RESERVED:
-				(void) strlcpy(buf, dgettext(TEXT_DOMAIN,
-				    "name is reserved\n"
-				    "pool name may have been omitted"), buflen);
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "name is reserved"));
 				break;
 
 			case NAME_ERR_DISKLIKE:
-				(void) strlcpy(buf, dgettext(TEXT_DOMAIN,
-				    "pool name is reserved\n"
-				    "pool name may have been omitted"), buflen);
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "pool name is reserved"));
 				break;
 			}
 		}
-		return (FALSE);
+		return (B_FALSE);
 	}
 
-	return (TRUE);
+	return (B_TRUE);
 }
 
 /*
  * Set the pool-wide health based on the vdev state of the root vdev.
  */
-void
+int
 set_pool_health(nvlist_t *config)
 {
 	nvlist_t *nvroot;
@@ -140,11 +140,10 @@ set_pool_health(nvlist_t *config)
 		break;
 
 	default:
-		zfs_baderror(vs->vs_state);
+		abort();
 	}
 
-	verify(nvlist_add_string(config, ZPOOL_CONFIG_POOL_HEALTH,
-	    health) == 0);
+	return (nvlist_add_string(config, ZPOOL_CONFIG_POOL_HEALTH, health));
 }
 
 /*
@@ -152,28 +151,33 @@ set_pool_health(nvlist_t *config)
  * state.
  */
 zpool_handle_t *
-zpool_open_canfail(const char *pool)
+zpool_open_canfail(libzfs_handle_t *hdl, const char *pool)
 {
 	zpool_handle_t *zhp;
-	int error;
 
 	/*
 	 * Make sure the pool name is valid.
 	 */
-	if (!zpool_name_valid(pool, B_TRUE, NULL, 0)) {
-		zfs_error(dgettext(TEXT_DOMAIN, "cannot open '%s': invalid "
-		    "pool name"), pool);
+	if (!zpool_name_valid(hdl, B_TRUE, pool)) {
+		(void) zfs_error(hdl, EZFS_INVALIDNAME,
+		    dgettext(TEXT_DOMAIN, "cannot open '%s'"),
+		    pool);
 		return (NULL);
 	}
 
-	zhp = zfs_malloc(sizeof (zpool_handle_t));
+	if ((zhp = zfs_alloc(hdl, sizeof (zpool_handle_t))) == NULL)
+		return (NULL);
 
+	zhp->zpool_hdl = hdl;
 	(void) strlcpy(zhp->zpool_name, pool, sizeof (zhp->zpool_name));
 
-	if ((error = zpool_refresh_stats(zhp)) != 0) {
-		if (error == ENOENT || error == EINVAL) {
-			zfs_error(dgettext(TEXT_DOMAIN, "cannot open '%s': no "
-			    "such pool"), pool);
+	if (zpool_refresh_stats(zhp) != 0) {
+		if (errno == ENOENT || errno == EINVAL) {
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "no such pool"));
+			(void) zfs_error(hdl, EZFS_NOENT,
+			    dgettext(TEXT_DOMAIN, "cannot open '%s'"),
+			    pool);
 			free(zhp);
 			return (NULL);
 		} else {
@@ -191,17 +195,18 @@ zpool_open_canfail(const char *pool)
  * the configuration cache may be out of date).
  */
 zpool_handle_t *
-zpool_open_silent(const char *pool)
+zpool_open_silent(libzfs_handle_t *hdl, const char *pool)
 {
 	zpool_handle_t *zhp;
-	int error;
 
-	zhp = zfs_malloc(sizeof (zpool_handle_t));
+	if ((zhp = calloc(sizeof (zpool_handle_t), 1)) == NULL)
+		return (NULL);
 
+	zhp->zpool_hdl = hdl;
 	(void) strlcpy(zhp->zpool_name, pool, sizeof (zhp->zpool_name));
 
-	if ((error = zpool_refresh_stats(zhp)) != 0) {
-		if (error == ENOENT || error == EINVAL) {
+	if (zpool_refresh_stats(zhp) != 0) {
+		if (errno == ENOENT || errno == EINVAL) {
 			free(zhp);
 			return (NULL);
 		} else {
@@ -219,18 +224,16 @@ zpool_open_silent(const char *pool)
  * state.
  */
 zpool_handle_t *
-zpool_open(const char *pool)
+zpool_open(libzfs_handle_t *hdl, const char *pool)
 {
 	zpool_handle_t *zhp;
 
-	if ((zhp = zpool_open_canfail(pool)) == NULL)
+	if ((zhp = zpool_open_canfail(hdl, pool)) == NULL)
 		return (NULL);
 
 	if (zhp->zpool_state == POOL_STATE_UNAVAIL) {
-		zfs_error(dgettext(TEXT_DOMAIN, "cannot open '%s': pool is "
-		    "currently unavailable"), zhp->zpool_name);
-		zfs_error(dgettext(TEXT_DOMAIN, "run 'zpool status %s' for "
-		    "detailed information"), zhp->zpool_name);
+		(void) zfs_error(hdl, EZFS_POOLUNAVAIL,
+		    dgettext(TEXT_DOMAIN, "cannot open '%s'"), zhp->zpool_name);
 		zpool_close(zhp);
 		return (NULL);
 	}
@@ -251,7 +254,7 @@ zpool_close(zpool_handle_t *zhp)
 	if (zhp->zpool_error_log) {
 		int i;
 		for (i = 0; i < zhp->zpool_error_count; i++)
-			free(zhp->zpool_error_log[i]);
+			nvlist_free(zhp->zpool_error_log[i]);
 		free(zhp->zpool_error_log);
 	}
 	free(zhp);
@@ -280,6 +283,20 @@ zpool_get_guid(zpool_handle_t *zhp)
 }
 
 /*
+ * Return the version of the pool.
+ */
+uint64_t
+zpool_get_version(zpool_handle_t *zhp)
+{
+	uint64_t version;
+
+	verify(nvlist_lookup_uint64(zhp->zpool_config, ZPOOL_CONFIG_VERSION,
+	    &version) == 0);
+
+	return (version);
+}
+
+/*
  * Return the amount of space currently consumed by the pool.
  */
 uint64_t
@@ -324,7 +341,7 @@ zpool_get_root(zpool_handle_t *zhp, char *buf, size_t buflen)
 	zfs_cmd_t zc = { 0 };
 
 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
-	if (zfs_ioctl(ZFS_IOC_OBJSET_STATS, &zc) != 0 ||
+	if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) != 0 ||
 	    zc.zc_root[0] == '\0')
 		return (-1);
 
@@ -348,34 +365,35 @@ zpool_get_state(zpool_handle_t *zhp)
  * don't have to worry about error semantics.
  */
 int
-zpool_create(const char *pool, nvlist_t *nvroot, const char *altroot)
+zpool_create(libzfs_handle_t *hdl, const char *pool, nvlist_t *nvroot,
+    const char *altroot)
 {
 	zfs_cmd_t zc = { 0 };
 	char *packed;
 	size_t len;
-	int err;
-	char reason[64];
+	char msg[1024];
 
-	if (!zpool_name_valid(pool, B_FALSE, reason, sizeof (reason))) {
-		zfs_error(dgettext(TEXT_DOMAIN, "cannot create '%s': %s"),
-		    pool, reason);
-		return (-1);
-	}
+	(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
+	    "cannot create '%s'"), pool);
 
-	if (altroot != NULL && altroot[0] != '/') {
-		zfs_error(dgettext(TEXT_DOMAIN, "cannot create '%s': alternate "
-		    "root '%s' must be a complete path"), pool, altroot);
-		return (-1);
-	}
+	if (!zpool_name_valid(hdl, B_FALSE, pool))
+		return (zfs_error(hdl, EZFS_INVALIDNAME, msg));
 
-	if ((err = nvlist_size(nvroot, &len, NV_ENCODE_NATIVE)) != 0)
-		zfs_baderror(err);
+	if (altroot != NULL && altroot[0] != '/')
+		return (zfs_error(hdl, EZFS_BADPATH,
+		    dgettext(TEXT_DOMAIN, "bad alternate root '%s'"), altroot));
 
-	packed = zfs_malloc(len);
+	if (nvlist_size(nvroot, &len, NV_ENCODE_NATIVE) != 0)
+		return (no_memory(hdl));
 
-	if ((err = nvlist_pack(nvroot, &packed, &len,
-	    NV_ENCODE_NATIVE, 0)) != 0)
-		zfs_baderror(err);
+	if ((packed = zfs_alloc(hdl, len)) == NULL)
+		return (-1);
+
+	if (nvlist_pack(nvroot, &packed, &len,
+	    NV_ENCODE_NATIVE, 0) != 0) {
+		free(packed);
+		return (no_memory(hdl));
+	}
 
 	(void) strlcpy(zc.zc_name, pool, sizeof (zc.zc_name));
 	zc.zc_config_src = (uint64_t)(uintptr_t)packed;
@@ -384,18 +402,10 @@ zpool_create(const char *pool, nvlist_t *nvroot, const char *altroot)
 	if (altroot != NULL)
 		(void) strlcpy(zc.zc_root, altroot, sizeof (zc.zc_root));
 
-	if (zfs_ioctl(ZFS_IOC_POOL_CREATE, &zc) != 0) {
-		switch (errno) {
-		case EEXIST:
-			zfs_error(dgettext(TEXT_DOMAIN, "cannot create '%s': "
-			    "pool exists"), pool);
-			break;
-
-		case EPERM:
-			zfs_error(dgettext(TEXT_DOMAIN, "cannot create '%s': "
-			    "permission denied"), pool);
-			break;
+	if (ioctl(hdl->libzfs_fd, ZFS_IOC_POOL_CREATE, &zc) != 0) {
+		free(packed);
 
+		switch (errno) {
 		case EBUSY:
 			/*
 			 * This can happen if the user has specified the same
@@ -403,14 +413,13 @@ zpool_create(const char *pool, nvlist_t *nvroot, const char *altroot)
 			 * until we try to add it and see we already have a
 			 * label.
 			 */
-			zfs_error(dgettext(TEXT_DOMAIN, "cannot create '%s': "
-			    "one or more vdevs refer to the same device"),
-			    pool);
-			break;
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "one or more vdevs refer to the same device"));
+			return (zfs_error(hdl, EZFS_BADDEV, msg));
 
 		case EOVERFLOW:
 			/*
-			 * This occurrs when one of the devices is below
+			 * This occurs when one of the devices is below
 			 * SPA_MINDEVSIZE.  Unfortunately, we can't detect which
 			 * device was the problem device since there's no
 			 * reliable way to determine device size from userland.
@@ -420,53 +429,20 @@ zpool_create(const char *pool, nvlist_t *nvroot, const char *altroot)
 
 				zfs_nicenum(SPA_MINDEVSIZE, buf, sizeof (buf));
 
-				zfs_error(dgettext(TEXT_DOMAIN, "cannot "
-				    "create '%s': one or more devices is less "
-				    "than the minimum size (%s)"), pool,
-				    buf);
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "one or more devices is less than the "
+				    "minimum size (%s)"), buf);
 			}
-			break;
-
-		case ENAMETOOLONG:
-			/*
-			 * One of the vdevs has exceeded VDEV_SPEC_MAX length in
-			 * its plaintext representation.
-			 */
-			zfs_error(dgettext(TEXT_DOMAIN, "cannot create '%s': "
-			    "too many devices in a single vdev"), pool);
-			break;
-
-		case EIO:
-			zfs_error(dgettext(TEXT_DOMAIN, "cannot create '%s': "
-			    "I/O error on one or more devices"), pool);
-			break;
-
-		case ENXIO:
-			/*
-			 * This is unlikely to happen since we've verified that
-			 * all the devices can be opened from userland, but it's
-			 * still possible in some circumstances.
-			 */
-			zfs_error(dgettext(TEXT_DOMAIN, "cannot create '%s': "
-			    "one or more devices is unavailable"), pool);
-			break;
+			return (zfs_error(hdl, EZFS_BADDEV, msg));
 
 		case ENOSPC:
-			/*
-			 * This can occur if we were incapable of writing to a
-			 * file vdev because the underlying filesystem is out of
-			 * space.  This is very similar to EOVERFLOW, but we'll
-			 * produce a slightly different message.
-			 */
-			zfs_error(dgettext(TEXT_DOMAIN, "cannot create '%s': "
-			    "one or more devices is out of space"), pool);
-			break;
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "one or more devices is out of space"));
+			return (zfs_error(hdl, EZFS_BADDEV, msg));
 
 		default:
-			zfs_baderror(errno);
+			return (zpool_standard_error(hdl, errno, msg));
 		}
-
-		return (-1);
 	}
 
 	free(packed);
@@ -478,7 +454,7 @@ zpool_create(const char *pool, nvlist_t *nvroot, const char *altroot)
 	if (altroot != NULL) {
 		zfs_handle_t *zhp;
 
-		verify((zhp = zfs_open(pool, ZFS_TYPE_ANY)) != NULL);
+		verify((zhp = zfs_open(hdl, pool, ZFS_TYPE_ANY)) != NULL);
 		verify(zfs_prop_set(zhp, ZFS_PROP_MOUNTPOINT, "/") == 0);
 
 		zfs_close(zhp);
@@ -496,9 +472,12 @@ zpool_destroy(zpool_handle_t *zhp)
 {
 	zfs_cmd_t zc = { 0 };
 	zfs_handle_t *zfp = NULL;
+	libzfs_handle_t *hdl = zhp->zpool_hdl;
+	char msg[1024];
 
 	if (zhp->zpool_state == POOL_STATE_ACTIVE &&
-	    (zfp = zfs_open(zhp->zpool_name, ZFS_TYPE_FILESYSTEM)) == NULL)
+	    (zfp = zfs_open(zhp->zpool_hdl, zhp->zpool_name,
+	    ZFS_TYPE_FILESYSTEM)) == NULL)
 		return (-1);
 
 	if (zpool_remove_zvol_links(zhp) != NULL)
@@ -506,35 +485,16 @@ zpool_destroy(zpool_handle_t *zhp)
 
 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
 
-	if (zfs_ioctl(ZFS_IOC_POOL_DESTROY, &zc) != 0) {
-		switch (errno) {
-		case EPERM:
-			zfs_error(dgettext(TEXT_DOMAIN,
-			    "cannot destroy '%s': permission denied"),
-			    zhp->zpool_name);
-			break;
-
-		case EBUSY:
-			zfs_error(dgettext(TEXT_DOMAIN,
-			    "cannot destroy '%s': pool busy"),
-			    zhp->zpool_name);
-			break;
-
-		case ENOENT:
-			zfs_error(dgettext(TEXT_DOMAIN,
-			    "cannot destroy '%s': no such pool"),
-			    zhp->zpool_name);
-			break;
-
-		case EROFS:
-			zfs_error(dgettext(TEXT_DOMAIN,
-			    "cannot destroy '%s': one or more devices is "
-			    "read only, or '/' is mounted read only"),
-			    zhp->zpool_name);
-			break;
+	if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_POOL_DESTROY, &zc) != 0) {
+		(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
+		    "cannot destroy '%s'"), zhp->zpool_name);
 
-		default:
-			zfs_baderror(errno);
+		if (errno == EROFS) {
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "one or more devices is read only"));
+			(void) zfs_error(hdl, EZFS_BADDEV, msg);
+		} else {
+			(void) zpool_standard_error(hdl, errno, msg);
 		}
 
 		if (zfp)
@@ -560,10 +520,27 @@ zpool_add(zpool_handle_t *zhp, nvlist_t *nvroot)
 	char *packed;
 	size_t len;
 	zfs_cmd_t zc;
+	int ret;
+	libzfs_handle_t *hdl = zhp->zpool_hdl;
+	char msg[1024];
+	nvlist_t **spares;
+	uint_t nspares;
+
+	(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
+	    "cannot add to '%s'"), zhp->zpool_name);
+
+	if (zpool_get_version(zhp) < ZFS_VERSION_SPARES &&
+	    nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
+	    &spares, &nspares) == 0) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "pool must be "
+		    "upgraded to add hot spares"));
+		return (zfs_error(hdl, EZFS_BADVERSION, msg));
+	}
 
 	verify(nvlist_size(nvroot, &len, NV_ENCODE_NATIVE) == 0);
 
-	packed = zfs_malloc(len);
+	if ((packed = zfs_alloc(zhp->zpool_hdl, len)) == NULL)
+		return (-1);
 
 	verify(nvlist_pack(nvroot, &packed, &len, NV_ENCODE_NATIVE, 0) == 0);
 
@@ -571,13 +548,8 @@ zpool_add(zpool_handle_t *zhp, nvlist_t *nvroot)
 	zc.zc_config_src = (uint64_t)(uintptr_t)packed;
 	zc.zc_config_src_size = len;
 
-	if (zfs_ioctl(ZFS_IOC_VDEV_ADD, &zc) != 0) {
+	if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_VDEV_ADD, &zc) != 0) {
 		switch (errno) {
-		case EPERM:
-			zfs_error(dgettext(TEXT_DOMAIN, "cannot add to '%s': "
-			    "permission denied"), zhp->zpool_name);
-			break;
-
 		case EBUSY:
 			/*
 			 * This can happen if the user has specified the same
@@ -585,30 +557,9 @@ zpool_add(zpool_handle_t *zhp, nvlist_t *nvroot)
 			 * until we try to add it and see we already have a
 			 * label.
 			 */
-			zfs_error(dgettext(TEXT_DOMAIN, "cannot add to '%s': "
-			    "one or more vdevs refer to the same device"),
-			    zhp->zpool_name);
-			break;
-
-		case ENAMETOOLONG:
-			/*
-			 * One of the vdevs has exceeded VDEV_SPEC_MAX length in
-			 * its plaintext representation.
-			 */
-			zfs_error(dgettext(TEXT_DOMAIN, "cannot add to '%s': "
-			    "too many devices in a single vdev"),
-			    zhp->zpool_name);
-			break;
-
-		case ENXIO:
-			/*
-			 * This is unlikely to happen since we've verified that
-			 * all the devices can be opened from userland, but it's
-			 * still possible in some circumstances.
-			 */
-			zfs_error(dgettext(TEXT_DOMAIN, "cannot add to '%s': "
-			    "one or more devices is unavailable"),
-			    zhp->zpool_name);
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "one or more vdevs refer to the same device"));
+			(void) zfs_error(hdl, EZFS_BADDEV, msg);
 			break;
 
 		case EOVERFLOW:
@@ -623,23 +574,31 @@ zpool_add(zpool_handle_t *zhp, nvlist_t *nvroot)
 
 				zfs_nicenum(SPA_MINDEVSIZE, buf, sizeof (buf));
 
-				zfs_error(dgettext(TEXT_DOMAIN, "cannot "
-				    "add to '%s': one or more devices is less "
-				    "than the minimum size (%s)"),
-				    zhp->zpool_name, buf);
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "device is less than the minimum "
+				    "size (%s)"), buf);
 			}
+			(void) zfs_error(hdl, EZFS_BADDEV, msg);
+			break;
+
+		case ENOTSUP:
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "pool must be upgraded to add raidz2 vdevs"));
+			(void) zfs_error(hdl, EZFS_BADVERSION, msg);
 			break;
 
 		default:
-			zfs_baderror(errno);
+			(void) zpool_standard_error(hdl, errno, msg);
 		}
 
-		return (-1);
+		ret = -1;
+	} else {
+		ret = 0;
 	}
 
 	free(packed);
 
-	return (0);
+	return (ret);
 }
 
 /*
@@ -656,32 +615,10 @@ zpool_export(zpool_handle_t *zhp)
 
 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
 
-	if (zfs_ioctl(ZFS_IOC_POOL_EXPORT, &zc) != 0) {
-		switch (errno) {
-		case EPERM:
-			zfs_error(dgettext(TEXT_DOMAIN,
-			    "cannot export '%s': permission denied"),
-			    zhp->zpool_name);
-			break;
-
-		case EBUSY:
-			zfs_error(dgettext(TEXT_DOMAIN,
-			    "cannot export '%s': pool is in use"),
-			    zhp->zpool_name);
-			break;
-
-		case ENOENT:
-			zfs_error(dgettext(TEXT_DOMAIN,
-			    "cannot export '%s': no such pool"),
-			    zhp->zpool_name);
-			break;
-
-		default:
-			zfs_baderror(errno);
-		}
-
-		return (-1);
-	}
+	if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_POOL_EXPORT, &zc) != 0)
+		return (zpool_standard_error(zhp->zpool_hdl, errno,
+		    dgettext(TEXT_DOMAIN, "cannot export '%s'"),
+		    zhp->zpool_name));
 
 	return (0);
 }
@@ -693,7 +630,8 @@ zpool_export(zpool_handle_t *zhp)
  * an alternate root, respectively.
  */
 int
-zpool_import(nvlist_t *config, const char *newname, const char *altroot)
+zpool_import(libzfs_handle_t *hdl, nvlist_t *config, const char *newname,
+    const char *altroot)
 {
 	zfs_cmd_t zc;
 	char *packed;
@@ -706,22 +644,19 @@ zpool_import(nvlist_t *config, const char *newname, const char *altroot)
 	    &origname) == 0);
 
 	if (newname != NULL) {
-		if (!zpool_name_valid(newname, B_FALSE, NULL, 0)) {
-			zfs_error(dgettext(TEXT_DOMAIN, "cannot import '%s': "
-			    "invalid pool name"), newname);
-			return (-1);
-		}
+		if (!zpool_name_valid(hdl, B_FALSE, newname))
+			return (zfs_error(hdl, EZFS_INVALIDNAME,
+			    dgettext(TEXT_DOMAIN, "cannot import '%s'"),
+			    newname));
 		thename = (char *)newname;
 	} else {
 		thename = origname;
 	}
 
-	if (altroot != NULL && altroot[0] != '/') {
-		zfs_error(dgettext(TEXT_DOMAIN, "cannot import '%s': alternate "
-		    "root '%s' must be a complete path"), thename,
-		    altroot);
-		return (-1);
-	}
+	if (altroot != NULL && altroot[0] != '/')
+		return (zfs_error(hdl, EZFS_BADPATH,
+		    dgettext(TEXT_DOMAIN, "bad alternate root '%s'"),
+		    altroot));
 
 	(void) strlcpy(zc.zc_name, thename, sizeof (zc.zc_name));
 
@@ -735,7 +670,8 @@ zpool_import(nvlist_t *config, const char *newname, const char *altroot)
 
 	verify(nvlist_size(config, &len, NV_ENCODE_NATIVE) == 0);
 
-	packed = zfs_malloc(len);
+	if ((packed = zfs_alloc(hdl, len)) == NULL)
+		return (-1);
 
 	verify(nvlist_pack(config, &packed, &len, NV_ENCODE_NATIVE, 0) == 0);
 
@@ -743,7 +679,7 @@ zpool_import(nvlist_t *config, const char *newname, const char *altroot)
 	zc.zc_config_src_size = len;
 
 	ret = 0;
-	if (zfs_ioctl(ZFS_IOC_POOL_IMPORT, &zc) != 0) {
+	if (ioctl(hdl->libzfs_fd, ZFS_IOC_POOL_IMPORT, &zc) != 0) {
 		char desc[1024];
 		if (newname == NULL)
 			(void) snprintf(desc, sizeof (desc),
@@ -755,42 +691,15 @@ zpool_import(nvlist_t *config, const char *newname, const char *altroot)
 			    origname, thename);
 
 		switch (errno) {
-		case EEXIST:
-			/*
-			 * A pool with that name already exists.
-			 */
-			zfs_error(dgettext(TEXT_DOMAIN, "%s: pool exists"),
-			    desc);
-			break;
-
-		case EPERM:
-			/*
-			 * The user doesn't have permission to create pools.
-			 */
-			zfs_error(dgettext(TEXT_DOMAIN, "%s: permission "
-			    "denied"), desc);
-			break;
-
-		case ENXIO:
-		case EDOM:
-			/*
-			 * Device is unavailable, or vdev sum didn't match.
-			 */
-			zfs_error(dgettext(TEXT_DOMAIN, "%s: one or more "
-			    "devices is unavailable"),
-			    desc);
-			break;
-
 		case ENOTSUP:
 			/*
 			 * Unsupported version.
 			 */
-			zfs_error(dgettext(TEXT_DOMAIN,
-			    "%s: unsupported version"), desc);
+			(void) zfs_error(hdl, EZFS_BADVERSION, desc);
 			break;
 
 		default:
-			zfs_baderror(errno);
+			(void) zpool_standard_error(hdl, errno, desc);
 		}
 
 		ret = -1;
@@ -799,7 +708,7 @@ zpool_import(nvlist_t *config, const char *newname, const char *altroot)
 		/*
 		 * This should never fail, but play it safe anyway.
 		 */
-		if ((zhp = zpool_open_silent(thename)) != NULL) {
+		if ((zhp = zpool_open_silent(hdl, thename)) != NULL) {
 			ret = zpool_create_zvol_links(zhp);
 			zpool_close(zhp);
 		}
@@ -817,48 +726,35 @@ zpool_scrub(zpool_handle_t *zhp, pool_scrub_type_t type)
 {
 	zfs_cmd_t zc = { 0 };
 	char msg[1024];
+	libzfs_handle_t *hdl = zhp->zpool_hdl;
 
 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
 	zc.zc_cookie = type;
 
-	if (zfs_ioctl(ZFS_IOC_POOL_SCRUB, &zc) == 0)
+	if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_POOL_SCRUB, &zc) == 0)
 		return (0);
 
 	(void) snprintf(msg, sizeof (msg),
 	    dgettext(TEXT_DOMAIN, "cannot scrub %s"), zc.zc_name);
 
-	switch (errno) {
-	    case EPERM:
-		/*
-		 * No permission to scrub this pool.
-		 */
-		zfs_error(dgettext(TEXT_DOMAIN, "%s: permission denied"), msg);
-		break;
-
-	    case EBUSY:
-		/*
-		 * Resilver in progress.
-		 */
-		zfs_error(dgettext(TEXT_DOMAIN, "%s: currently resilvering"),
-		    msg);
-		break;
-
-	    default:
-		zfs_baderror(errno);
-	}
-	return (-1);
+	if (errno == EBUSY)
+		return (zfs_error(hdl, EZFS_RESILVERING, msg));
+	else
+		return (zpool_standard_error(hdl, errno, msg));
 }
 
-static uint64_t
-vdev_to_guid(nvlist_t *nv, const char *search, uint64_t guid)
+static nvlist_t *
+vdev_to_nvlist_iter(nvlist_t *nv, const char *search, uint64_t guid,
+    boolean_t *isspare)
 {
 	uint_t c, children;
 	nvlist_t **child;
-	uint64_t ret, present;
+	uint64_t theguid, present;
 	char *path;
 	uint64_t wholedisk = 0;
+	nvlist_t *ret;
 
-	verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &ret) == 0);
+	verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &theguid) == 0);
 
 	if (search == NULL &&
 	    nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NOT_PRESENT, &present) == 0) {
@@ -866,8 +762,8 @@ vdev_to_guid(nvlist_t *nv, const char *search, uint64_t guid)
 		 * If the device has never been present since import, the only
 		 * reliable way to match the vdev is by GUID.
 		 */
-		if (ret == guid)
-			return (ret);
+		if (theguid == guid)
+			return (nv);
 	} else if (search != NULL &&
 	    nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0) {
 		(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK,
@@ -879,28 +775,37 @@ vdev_to_guid(nvlist_t *nv, const char *search, uint64_t guid)
 			 */
 			if (strlen(search) == strlen(path) - 2 &&
 			    strncmp(search, path, strlen(search)) == 0)
-				return (ret);
+				return (nv);
 		} else if (strcmp(search, path) == 0) {
-			return (ret);
+			return (nv);
 		}
 	}
 
 	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
 	    &child, &children) != 0)
-		return (0);
+		return (NULL);
 
 	for (c = 0; c < children; c++)
-		if ((ret = vdev_to_guid(child[c], search, guid)) != 0)
+		if ((ret = vdev_to_nvlist_iter(child[c], search, guid,
+		    isspare)) != NULL)
 			return (ret);
 
-	return (0);
+	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES,
+	    &child, &children) == 0) {
+		for (c = 0; c < children; c++) {
+			if ((ret = vdev_to_nvlist_iter(child[c], search, guid,
+			    isspare)) != NULL) {
+				*isspare = B_TRUE;
+				return (ret);
+			}
+		}
+	}
+
+	return (NULL);
 }
 
-/*
- * Given a string describing a vdev, returns the matching GUID, or 0 if none.
- */
-uint64_t
-zpool_vdev_to_guid(zpool_handle_t *zhp, const char *path)
+nvlist_t *
+zpool_find_vdev(zpool_handle_t *zhp, const char *path, boolean_t *isspare)
 {
 	char buf[MAXPATHLEN];
 	const char *search;
@@ -921,7 +826,8 @@ zpool_vdev_to_guid(zpool_handle_t *zhp, const char *path)
 	verify(nvlist_lookup_nvlist(zhp->zpool_config, ZPOOL_CONFIG_VDEV_TREE,
 	    &nvroot) == 0);
 
-	return (vdev_to_guid(nvroot, search, guid));
+	*isspare = B_FALSE;
+	return (vdev_to_nvlist_iter(nvroot, search, guid, isspare));
 }
 
 /*
@@ -932,39 +838,26 @@ zpool_vdev_online(zpool_handle_t *zhp, const char *path)
 {
 	zfs_cmd_t zc = { 0 };
 	char msg[1024];
+	nvlist_t *tgt;
+	boolean_t isspare;
+	libzfs_handle_t *hdl = zhp->zpool_hdl;
 
 	(void) snprintf(msg, sizeof (msg),
 	    dgettext(TEXT_DOMAIN, "cannot online %s"), path);
 
 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
-	if ((zc.zc_guid = zpool_vdev_to_guid(zhp, path)) == 0) {
-		zfs_error(dgettext(TEXT_DOMAIN, "%s: no such device in pool"),
-		    msg);
-		return (-1);
-	}
+	if ((tgt = zpool_find_vdev(zhp, path, &isspare)) == NULL)
+		return (zfs_error(hdl, EZFS_NODEVICE, msg));
 
-	if (zfs_ioctl(ZFS_IOC_VDEV_ONLINE, &zc) == 0)
-		return (0);
+	if (isspare)
+		return (zfs_error(hdl, EZFS_ISSPARE, msg));
 
-	switch (errno) {
-	    case ENODEV:
-		/*
-		 * Device doesn't exist
-		 */
-		zfs_error(dgettext(TEXT_DOMAIN, "%s: device not in pool"), msg);
-		break;
+	verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
 
-	    case EPERM:
-		/*
-		 * No permission to bring this vdev online.
-		 */
-		zfs_error(dgettext(TEXT_DOMAIN, "%s: permission denied"), msg);
-		break;
+	if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_VDEV_ONLINE, &zc) == 0)
+		return (0);
 
-	    default:
-		zfs_baderror(errno);
-	}
-	return (-1);
+	return (zpool_standard_error(hdl, errno, msg));
 }
 
 /*
@@ -975,48 +868,66 @@ zpool_vdev_offline(zpool_handle_t *zhp, const char *path, int istmp)
 {
 	zfs_cmd_t zc = { 0 };
 	char msg[1024];
+	nvlist_t *tgt;
+	boolean_t isspare;
+	libzfs_handle_t *hdl = zhp->zpool_hdl;
 
 	(void) snprintf(msg, sizeof (msg),
 	    dgettext(TEXT_DOMAIN, "cannot offline %s"), path);
 
 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
-	if ((zc.zc_guid = zpool_vdev_to_guid(zhp, path)) == 0) {
-		zfs_error(dgettext(TEXT_DOMAIN, "%s: no such device in pool"),
-		    msg);
-		return (-1);
-	}
+	if ((tgt = zpool_find_vdev(zhp, path, &isspare)) == NULL)
+		return (zfs_error(hdl, EZFS_NODEVICE, msg));
+
+	if (isspare)
+		return (zfs_error(hdl, EZFS_ISSPARE, msg));
+
+	verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
 
 	zc.zc_cookie = istmp;
 
-	if (zfs_ioctl(ZFS_IOC_VDEV_OFFLINE, &zc) == 0)
+	if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_VDEV_OFFLINE, &zc) == 0)
 		return (0);
 
 	switch (errno) {
-	    case ENODEV:
-		/*
-		 * Device doesn't exist
-		 */
-		zfs_error(dgettext(TEXT_DOMAIN, "%s: device not in pool"), msg);
-		break;
-
-	    case EPERM:
-		/*
-		 * No permission to take this vdev offline.
-		 */
-		zfs_error(dgettext(TEXT_DOMAIN, "%s: permission denied"), msg);
-		break;
+	case EBUSY:
 
-	    case EBUSY:
 		/*
 		 * There are no other replicas of this device.
 		 */
-		zfs_error(dgettext(TEXT_DOMAIN, "%s: no valid replicas"), msg);
-		break;
+		return (zfs_error(hdl, EZFS_NOREPLICAS, msg));
 
-	    default:
-		zfs_baderror(errno);
+	default:
+		return (zpool_standard_error(hdl, errno, msg));
 	}
-	return (-1);
+}
+
+/*
+ * Returns TRUE if the given nvlist is a vdev that was originally swapped in as
+ * a hot spare.
+ */
+static boolean_t
+is_replacing_spare(nvlist_t *search, nvlist_t *tgt, int which)
+{
+	nvlist_t **child;
+	uint_t c, children;
+	char *type;
+
+	if (nvlist_lookup_nvlist_array(search, ZPOOL_CONFIG_CHILDREN, &child,
+	    &children) == 0) {
+		verify(nvlist_lookup_string(search, ZPOOL_CONFIG_TYPE,
+		    &type) == 0);
+
+		if (strcmp(type, VDEV_TYPE_SPARE) == 0 &&
+		    children == 2 && child[which] == tgt)
+			return (B_TRUE);
+
+		for (c = 0; c < children; c++)
+			if (is_replacing_spare(child[c], tgt, which))
+				return (B_TRUE);
+	}
+
+	return (B_FALSE);
 }
 
 /*
@@ -1032,6 +943,14 @@ zpool_vdev_attach(zpool_handle_t *zhp,
 	char *packed;
 	int ret;
 	size_t len;
+	nvlist_t *tgt;
+	boolean_t isspare;
+	uint64_t val;
+	char *path;
+	nvlist_t **child;
+	uint_t children;
+	nvlist_t *config_root;
+	libzfs_handle_t *hdl = zhp->zpool_hdl;
 
 	if (replacing)
 		(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
@@ -1041,23 +960,63 @@ zpool_vdev_attach(zpool_handle_t *zhp,
 		    "cannot attach %s to %s"), new_disk, old_disk);
 
 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
-	if ((zc.zc_guid = zpool_vdev_to_guid(zhp, old_disk)) == 0) {
-		zfs_error(dgettext(TEXT_DOMAIN, "%s: no such device in pool"),
-		    msg);
-		return (-1);
-	}
+	if ((tgt = zpool_find_vdev(zhp, old_disk, &isspare)) == 0)
+		return (zfs_error(hdl, EZFS_NODEVICE, msg));
+
+	if (isspare)
+		return (zfs_error(hdl, EZFS_ISSPARE, msg));
+
+	verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
 	zc.zc_cookie = replacing;
 
+	if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
+	    &child, &children) != 0 || children != 1) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "new device must be a single disk"));
+		return (zfs_error(hdl, EZFS_INVALCONFIG, msg));
+	}
+
+	verify(nvlist_lookup_nvlist(zpool_get_config(zhp, NULL),
+	    ZPOOL_CONFIG_VDEV_TREE, &config_root) == 0);
+
+	/*
+	 * If the target is a hot spare that has been swapped in, we can only
+	 * replace it with another hot spare.
+	 */
+	if (replacing &&
+	    nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_IS_SPARE, &val) == 0 &&
+	    nvlist_lookup_string(child[0], ZPOOL_CONFIG_PATH, &path) == 0 &&
+	    (zpool_find_vdev(zhp, path, &isspare) == NULL || !isspare) &&
+	    is_replacing_spare(config_root, tgt, 1)) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "can only be replaced by another hot spare"));
+		return (zfs_error(hdl, EZFS_BADTARGET, msg));
+	}
+
+	/*
+	 * If we are attempting to replace a spare, it canot be applied to an
+	 * already spared device.
+	 */
+	if (replacing &&
+	    nvlist_lookup_string(child[0], ZPOOL_CONFIG_PATH, &path) == 0 &&
+	    zpool_find_vdev(zhp, path, &isspare) != NULL && isspare &&
+	    is_replacing_spare(config_root, tgt, 0)) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "device has already been replaced with a spare"));
+		return (zfs_error(hdl, EZFS_BADTARGET, msg));
+	}
+
 	verify(nvlist_size(nvroot, &len, NV_ENCODE_NATIVE) == 0);
 
-	packed = zfs_malloc(len);
+	if ((packed = zfs_alloc(zhp->zpool_hdl, len)) == NULL)
+		return (-1);
 
 	verify(nvlist_pack(nvroot, &packed, &len, NV_ENCODE_NATIVE, 0) == 0);
 
 	zc.zc_config_src = (uint64_t)(uintptr_t)packed;
 	zc.zc_config_src_size = len;
 
-	ret = zfs_ioctl(ZFS_IOC_VDEV_ATTACH, &zc);
+	ret = ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_VDEV_ATTACH, &zc);
 
 	free(packed);
 
@@ -1065,87 +1024,65 @@ zpool_vdev_attach(zpool_handle_t *zhp,
 		return (0);
 
 	switch (errno) {
-	case EPERM:
-		/*
-		 * No permission to mess with the config.
-		 */
-		zfs_error(dgettext(TEXT_DOMAIN, "%s: permission denied"), msg);
-		break;
-
-	case ENODEV:
-		/*
-		 * Device doesn't exist.
-		 */
-		zfs_error(dgettext(TEXT_DOMAIN, "%s: %s not in pool"),
-		    msg, old_disk);
-		break;
-
 	case ENOTSUP:
 		/*
 		 * Can't attach to or replace this type of vdev.
 		 */
 		if (replacing)
-			zfs_error(dgettext(TEXT_DOMAIN,
-			    "%s: cannot replace a replacing device"), msg);
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "cannot replace a replacing device"));
 		else
-			zfs_error(dgettext(TEXT_DOMAIN,
-			    "%s: attach is only applicable to mirrors"), msg);
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "can only attach to mirrors and top-level "
+			    "disks"));
+		(void) zfs_error(hdl, EZFS_BADTARGET, msg);
 		break;
 
 	case EINVAL:
 		/*
 		 * The new device must be a single disk.
 		 */
-		zfs_error(dgettext(TEXT_DOMAIN,
-		    "%s: <new_device> must be a single disk"), msg);
-		break;
-
-	case ENXIO:
-		/*
-		 * This is unlikely to happen since we've verified that
-		 * all the devices can be opened from userland, but it's
-		 * still possible in some circumstances.
-		 */
-		zfs_error(dgettext(TEXT_DOMAIN, "%s: %s is unavailable"),
-		    msg, new_disk);
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "new device must be a single disk"));
+		(void) zfs_error(hdl, EZFS_INVALCONFIG, msg);
 		break;
 
 	case EBUSY:
-		/*
-		 * The new device is is use.
-		 */
-		zfs_error(dgettext(TEXT_DOMAIN, "%s: %s busy"), msg, new_disk);
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "%s is busy"),
+		    new_disk);
+		(void) zfs_error(hdl, EZFS_BADDEV, msg);
 		break;
 
 	case EOVERFLOW:
 		/*
 		 * The new device is too small.
 		 */
-		zfs_error(dgettext(TEXT_DOMAIN, "%s: %s is too small"),
-		    msg, new_disk);
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "device is too small"));
+		(void) zfs_error(hdl, EZFS_BADDEV, msg);
 		break;
 
 	case EDOM:
 		/*
 		 * The new device has a different alignment requirement.
 		 */
-		zfs_error(dgettext(TEXT_DOMAIN,
-		    "%s: devices have different sector alignment"), msg);
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "devices have different sector alignment"));
+		(void) zfs_error(hdl, EZFS_BADDEV, msg);
 		break;
 
 	case ENAMETOOLONG:
 		/*
 		 * The resulting top-level vdev spec won't fit in the label.
 		 */
-		zfs_error(dgettext(TEXT_DOMAIN,
-		    "%s: too many devices in a single vdev"), msg);
+		(void) zfs_error(hdl, EZFS_DEVOVERFLOW, msg);
 		break;
 
 	default:
-		zfs_baderror(errno);
+		(void) zpool_standard_error(hdl, errno, msg);
 	}
 
-	return (1);
+	return (-1);
 }
 
 /*
@@ -1156,55 +1093,81 @@ zpool_vdev_detach(zpool_handle_t *zhp, const char *path)
 {
 	zfs_cmd_t zc = { 0 };
 	char msg[1024];
+	nvlist_t *tgt;
+	boolean_t isspare;
+	libzfs_handle_t *hdl = zhp->zpool_hdl;
 
 	(void) snprintf(msg, sizeof (msg),
 	    dgettext(TEXT_DOMAIN, "cannot detach %s"), path);
 
 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
-	if ((zc.zc_guid = zpool_vdev_to_guid(zhp, path)) == 0) {
-		zfs_error(dgettext(TEXT_DOMAIN, "%s: no such device in pool"),
-		    msg);
-		return (-1);
-	}
+	if ((tgt = zpool_find_vdev(zhp, path, &isspare)) == 0)
+		return (zfs_error(hdl, EZFS_NODEVICE, msg));
 
-	if (zfs_ioctl(ZFS_IOC_VDEV_DETACH, &zc) == 0)
+	if (isspare)
+		return (zfs_error(hdl, EZFS_ISSPARE, msg));
+
+	verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
+
+	if (ioctl(hdl->libzfs_fd, ZFS_IOC_VDEV_DETACH, &zc) == 0)
 		return (0);
 
 	switch (errno) {
-	case EPERM:
-		/*
-		 * No permission to mess with the config.
-		 */
-		zfs_error(dgettext(TEXT_DOMAIN, "%s: permission denied"), msg);
-		break;
-
-	case ENODEV:
-		/*
-		 * Device doesn't exist.
-		 */
-		zfs_error(dgettext(TEXT_DOMAIN, "%s: device not in pool"), msg);
-		break;
 
 	case ENOTSUP:
 		/*
 		 * Can't detach from this type of vdev.
 		 */
-		zfs_error(dgettext(TEXT_DOMAIN,
-		    "%s: only applicable to mirror and replacing vdevs"), msg);
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "only "
+		    "applicable to mirror and replacing vdevs"));
+		(void) zfs_error(zhp->zpool_hdl, EZFS_BADTARGET, msg);
 		break;
 
 	case EBUSY:
 		/*
 		 * There are no other replicas of this device.
 		 */
-		zfs_error(dgettext(TEXT_DOMAIN, "%s: no valid replicas"), msg);
+		(void) zfs_error(hdl, EZFS_NOREPLICAS, msg);
 		break;
 
 	default:
-		zfs_baderror(errno);
+		(void) zpool_standard_error(hdl, errno, msg);
 	}
 
-	return (1);
+	return (-1);
+}
+
+/*
+ * Remove the given device.  Currently, this is supported only for hot spares.
+ */
+int
+zpool_vdev_remove(zpool_handle_t *zhp, const char *path)
+{
+	zfs_cmd_t zc = { 0 };
+	char msg[1024];
+	nvlist_t *tgt;
+	boolean_t isspare;
+	libzfs_handle_t *hdl = zhp->zpool_hdl;
+
+	(void) snprintf(msg, sizeof (msg),
+	    dgettext(TEXT_DOMAIN, "cannot remove %s"), path);
+
+	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
+	if ((tgt = zpool_find_vdev(zhp, path, &isspare)) == 0)
+		return (zfs_error(hdl, EZFS_NODEVICE, msg));
+
+	if (!isspare) {
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "only hot spares can be removed"));
+		return (zfs_error(hdl, EZFS_NODEVICE, msg));
+	}
+
+	verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
+
+	if (ioctl(hdl->libzfs_fd, ZFS_IOC_VDEV_REMOVE, &zc) == 0)
+		return (0);
+
+	return (zpool_standard_error(hdl, errno, msg));
 }
 
 /*
@@ -1215,6 +1178,9 @@ zpool_clear(zpool_handle_t *zhp, const char *path)
 {
 	zfs_cmd_t zc = { 0 };
 	char msg[1024];
+	nvlist_t *tgt;
+	boolean_t isspare;
+	libzfs_handle_t *hdl = zhp->zpool_hdl;
 
 	if (path)
 		(void) snprintf(msg, sizeof (msg),
@@ -1226,35 +1192,21 @@ zpool_clear(zpool_handle_t *zhp, const char *path)
 		    zhp->zpool_name);
 
 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
-	if (path && (zc.zc_guid = zpool_vdev_to_guid(zhp, path)) == 0) {
-		zfs_error(dgettext(TEXT_DOMAIN, "%s: no such device in pool"),
-		    msg);
-		return (-1);
-	}
+	if (path) {
+		if ((tgt = zpool_find_vdev(zhp, path, &isspare)) == 0)
+			return (zfs_error(hdl, EZFS_NODEVICE, msg));
 
-	if (zfs_ioctl(ZFS_IOC_CLEAR, &zc) == 0)
-		return (0);
+		if (isspare)
+			return (zfs_error(hdl, EZFS_ISSPARE, msg));
 
-	switch (errno) {
-	case EPERM:
-		/*
-		 * No permission to mess with the config.
-		 */
-		zfs_error(dgettext(TEXT_DOMAIN, "%s: permission denied"), msg);
-		break;
-
-	case ENODEV:
-		/*
-		 * Device doesn't exist.
-		 */
-		zfs_error(dgettext(TEXT_DOMAIN, "%s: device not in pool"), msg);
-		break;
-
-	default:
-		zfs_baderror(errno);
+		verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID,
+		    &zc.zc_guid) == 0);
 	}
 
-	return (1);
+	if (ioctl(hdl->libzfs_fd, ZFS_IOC_CLEAR, &zc) == 0)
+		return (0);
+
+	return (zpool_standard_error(hdl, errno, msg));
 }
 
 static int
@@ -1269,9 +1221,9 @@ do_zvol(zfs_handle_t *zhp, void *data)
 	 */
 	if (zhp->zfs_volblocksize != 0) {
 		if (linktype)
-			ret = zvol_create_link(zhp->zfs_name);
+			ret = zvol_create_link(zhp->zfs_hdl, zhp->zfs_name);
 		else
-			ret = zvol_remove_link(zhp->zfs_name);
+			ret = zvol_remove_link(zhp->zfs_hdl, zhp->zfs_name);
 	}
 
 	ret = zfs_iter_children(zhp, do_zvol, data);
@@ -1292,10 +1244,11 @@ zpool_create_zvol_links(zpool_handle_t *zhp)
 	/*
 	 * If the pool is unavailable, just return success.
 	 */
-	if ((zfp = make_dataset_handle(zhp->zpool_name)) == NULL)
+	if ((zfp = make_dataset_handle(zhp->zpool_hdl,
+	    zhp->zpool_name)) == NULL)
 		return (0);
 
-	ret = zfs_iter_children(zfp, do_zvol, (void *)TRUE);
+	ret = zfs_iter_children(zfp, do_zvol, (void *)B_TRUE);
 
 	zfs_close(zfp);
 	return (ret);
@@ -1313,10 +1266,11 @@ zpool_remove_zvol_links(zpool_handle_t *zhp)
 	/*
 	 * If the pool is unavailable, just return success.
 	 */
-	if ((zfp = make_dataset_handle(zhp->zpool_name)) == NULL)
+	if ((zfp = make_dataset_handle(zhp->zpool_hdl,
+	    zhp->zpool_name)) == NULL)
 		return (0);
 
-	ret = zfs_iter_children(zfp, do_zvol, (void *)FALSE);
+	ret = zfs_iter_children(zfp, do_zvol, (void *)B_FALSE);
 
 	zfs_close(zfp);
 	return (ret);
@@ -1345,7 +1299,9 @@ devid_to_path(char *devid_str)
 	if (ret != 0)
 		return (NULL);
 
-	path = zfs_strdup(list[0].devname);
+	if ((path = strdup(list[0].devname)) == NULL)
+		return (NULL);
+
 	devid_free_nmlist(list);
 
 	return (path);
@@ -1393,7 +1349,7 @@ set_path(zpool_handle_t *zhp, nvlist_t *nv, const char *path)
 	verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID,
 	    &zc.zc_guid) == 0);
 
-	(void) zfs_ioctl(ZFS_IOC_VDEV_SETPATH, &zc);
+	(void) ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_VDEV_SETPATH, &zc);
 }
 
 /*
@@ -1412,7 +1368,7 @@ set_path(zpool_handle_t *zhp, nvlist_t *nv, const char *path)
  * of these checks.
  */
 char *
-zpool_vdev_name(zpool_handle_t *zhp, nvlist_t *nv)
+zpool_vdev_name(libzfs_handle_t *hdl, zpool_handle_t *zhp, nvlist_t *nv)
 {
 	char *path, *devid;
 	uint64_t value;
@@ -1442,17 +1398,17 @@ zpool_vdev_name(zpool_handle_t *zhp, nvlist_t *nv)
 					 * Update the path appropriately.
 					 */
 					set_path(zhp, nv, newpath);
-					verify(nvlist_add_string(nv,
-					    ZPOOL_CONFIG_PATH, newpath) == 0);
+					if (nvlist_add_string(nv,
+					    ZPOOL_CONFIG_PATH, newpath) == 0)
+						verify(nvlist_lookup_string(nv,
+						    ZPOOL_CONFIG_PATH,
+						    &path) == 0);
 					free(newpath);
-					verify(nvlist_lookup_string(nv,
-					    ZPOOL_CONFIG_PATH, &path) == 0);
 				}
-
-				if (newdevid)
-					devid_str_free(newdevid);
 			}
 
+			if (newdevid)
+				devid_str_free(newdevid);
 		}
 
 		if (strncmp(path, "/dev/dsk/", 9) == 0)
@@ -1460,15 +1416,28 @@ zpool_vdev_name(zpool_handle_t *zhp, nvlist_t *nv)
 
 		if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK,
 		    &value) == 0 && value) {
-			char *tmp = zfs_strdup(path);
+			char *tmp = zfs_strdup(hdl, path);
+			if (tmp == NULL)
+				return (NULL);
 			tmp[strlen(path) - 2] = '\0';
 			return (tmp);
 		}
 	} else {
 		verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &path) == 0);
+
+		/*
+		 * If it's a raidz device, we need to stick in the parity level.
+		 */
+		if (strcmp(path, VDEV_TYPE_RAIDZ) == 0) {
+			verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NPARITY,
+			    &value) == 0);
+			(void) snprintf(buf, sizeof (buf), "%s%llu", path,
+			    value);
+			path = buf;
+		}
 	}
 
-	return (zfs_strdup(path));
+	return (zfs_strdup(hdl, path));
 }
 
 static int
@@ -1502,15 +1471,20 @@ zpool_get_errlog(zpool_handle_t *zhp, nvlist_t ***list, size_t *nelem)
 	 */
 	verify(nvlist_lookup_uint64(zhp->zpool_config, ZPOOL_CONFIG_ERRCOUNT,
 	    &count) == 0);
-	zc.zc_config_dst = (uintptr_t)zfs_malloc(count * sizeof (zbookmark_t));
+	if ((zc.zc_config_dst = (uintptr_t)zfs_alloc(zhp->zpool_hdl,
+	    count * sizeof (zbookmark_t))) == NULL)
+		return (-1);
 	zc.zc_config_dst_size = count;
 	(void) strcpy(zc.zc_name, zhp->zpool_name);
 	for (;;) {
-		if (zfs_ioctl(ZFS_IOC_ERROR_LOG, &zc) != 0) {
+		if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_ERROR_LOG,
+		    &zc) != 0) {
+			free((void *)(uintptr_t)zc.zc_config_dst);
 			if (errno == ENOMEM) {
-				free((void *)(uintptr_t)zc.zc_config_dst);
-				zc.zc_config_dst = (uintptr_t)
-				    zfs_malloc(zc.zc_config_dst_size);
+				if ((zc.zc_config_dst = (uintptr_t)
+				    zfs_alloc(zhp->zpool_hdl,
+				    zc.zc_config_dst_size)) == NULL)
+					return (-1);
 			} else {
 				return (-1);
 			}
@@ -1549,6 +1523,7 @@ zpool_get_errlog(zpool_handle_t *zhp, nvlist_t ***list, size_t *nelem)
 	 */
 	if (list == NULL) {
 		*nelem = j;
+		free((void *)(uintptr_t)zc.zc_config_dst);
 		return (0);
 	}
 
@@ -1557,7 +1532,11 @@ zpool_get_errlog(zpool_handle_t *zhp, nvlist_t ***list, size_t *nelem)
 	/*
 	 * Allocate an array of nvlists to hold the results
 	 */
-	zhp->zpool_error_log = zfs_malloc(j * sizeof (nvlist_t *));
+	if ((zhp->zpool_error_log = zfs_alloc(zhp->zpool_hdl,
+	    j * sizeof (nvlist_t *))) == NULL) {
+		free((void *)(uintptr_t)zc.zc_config_dst);
+		return (-1);
+	}
 
 	/*
 	 * Fill in the results with names from the kernel.
@@ -1571,31 +1550,37 @@ zpool_get_errlog(zpool_handle_t *zhp, nvlist_t ***list, size_t *nelem)
 		    sizeof (zbookmark_t)) == 0)
 			continue;
 
-		verify(nvlist_alloc(&nv, NV_UNIQUE_NAME,
-		    0) == 0);
+		if (nvlist_alloc(&nv, NV_UNIQUE_NAME,
+		    0) != 0)
+			goto nomem;
 		zhp->zpool_error_log[j] = nv;
 
 		zc.zc_bookmark = zb[i];
-		if (zfs_ioctl(ZFS_IOC_BOOKMARK_NAME, &zc) == 0) {
-			verify(nvlist_add_string(nv, ZPOOL_ERR_DATASET,
-			    zc.zc_prop_name) == 0);
-			verify(nvlist_add_string(nv, ZPOOL_ERR_OBJECT,
-			    zc.zc_prop_value) == 0);
-			verify(nvlist_add_string(nv, ZPOOL_ERR_RANGE,
-			    zc.zc_filename) == 0);
+		if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_BOOKMARK_NAME,
+		    &zc) == 0) {
+			if (nvlist_add_string(nv, ZPOOL_ERR_DATASET,
+			    zc.zc_prop_name) != 0 ||
+			    nvlist_add_string(nv, ZPOOL_ERR_OBJECT,
+			    zc.zc_prop_value) != 0 ||
+			    nvlist_add_string(nv, ZPOOL_ERR_RANGE,
+			    zc.zc_filename) != 0)
+				goto nomem;
 		} else {
 			(void) snprintf(buf, sizeof (buf), "%llx",
 			    zb[i].zb_objset);
-			verify(nvlist_add_string(nv,
-			    ZPOOL_ERR_DATASET, buf) == 0);
+			if (nvlist_add_string(nv,
+			    ZPOOL_ERR_DATASET, buf) != 0)
+				goto nomem;
 			(void) snprintf(buf, sizeof (buf), "%llx",
 			    zb[i].zb_object);
-			verify(nvlist_add_string(nv, ZPOOL_ERR_OBJECT,
-			    buf) == 0);
+			if (nvlist_add_string(nv, ZPOOL_ERR_OBJECT,
+			    buf) != 0)
+				goto nomem;
 			(void) snprintf(buf, sizeof (buf), "lvl=%u blkid=%llu",
 			    (int)zb[i].zb_level, (long long)zb[i].zb_blkid);
-			verify(nvlist_add_string(nv, ZPOOL_ERR_RANGE,
-			    buf) == 0);
+			if (nvlist_add_string(nv, ZPOOL_ERR_RANGE,
+			    buf) != 0)
+				goto nomem;
 		}
 
 		j++;
@@ -1607,6 +1592,16 @@ zpool_get_errlog(zpool_handle_t *zhp, nvlist_t ***list, size_t *nelem)
 	free((void *)(uintptr_t)zc.zc_config_dst);
 
 	return (0);
+
+nomem:
+	free((void *)(uintptr_t)zc.zc_config_dst);
+	for (i = 0; i < zhp->zpool_error_count; i++) {
+		if (zhp->zpool_error_log[i])
+			free(zhp->zpool_error_log[i]);
+	}
+	free(zhp->zpool_error_log);
+	zhp->zpool_error_log = NULL;
+	return (no_memory(zhp->zpool_hdl));
 }
 
 /*
@@ -1616,20 +1611,13 @@ int
 zpool_upgrade(zpool_handle_t *zhp)
 {
 	zfs_cmd_t zc = { 0 };
+	libzfs_handle_t *hdl = zhp->zpool_hdl;
 
 	(void) strcpy(zc.zc_name, zhp->zpool_name);
-	if (zfs_ioctl(ZFS_IOC_POOL_UPGRADE, &zc) != 0) {
-		switch (errno) {
-		case EPERM:
-			zfs_error(dgettext(TEXT_DOMAIN, "cannot upgrade '%s': "
-			    "permission denied"), zhp->zpool_name);
-			break;
-		default:
-			zfs_baderror(errno);
-		}
-
-		return (-1);
-	}
+	if (ioctl(hdl->libzfs_fd, ZFS_IOC_POOL_UPGRADE, &zc) != 0)
+		return (zpool_standard_error(hdl, errno,
+		    dgettext(TEXT_DOMAIN, "cannot upgrade '%s'"),
+		    zhp->zpool_name));
 
 	return (0);
 }
diff --git a/usr/src/lib/libzfs/common/libzfs_status.c b/usr/src/lib/libzfs/common/libzfs_status.c
index 258b2e2f7d..2a4164964d 100644
--- a/usr/src/lib/libzfs/common/libzfs_status.c
+++ b/usr/src/lib/libzfs/common/libzfs_status.c
@@ -116,7 +116,7 @@ vdev_offlined(uint64_t state, uint64_t aux, uint64_t errs)
 /*
  * Detect if any leaf devices that have seen errors or could not be opened.
  */
-static int
+static boolean_t
 find_vdev_problem(nvlist_t *vdev, int (*func)(uint64_t, uint64_t, uint64_t))
 {
 	nvlist_t **child;
@@ -132,13 +132,13 @@ find_vdev_problem(nvlist_t *vdev, int (*func)(uint64_t, uint64_t, uint64_t))
 	 */
 	verify(nvlist_lookup_string(vdev, ZPOOL_CONFIG_TYPE, &type) == 0);
 	if (strcmp(type, VDEV_TYPE_REPLACING) == 0)
-		return (FALSE);
+		return (B_FALSE);
 
 	if (nvlist_lookup_nvlist_array(vdev, ZPOOL_CONFIG_CHILDREN, &child,
 	    &children) == 0) {
 		for (c = 0; c < children; c++)
 			if (find_vdev_problem(child[c], func))
-				return (TRUE);
+				return (B_TRUE);
 	} else {
 		verify(nvlist_lookup_uint64_array(vdev, ZPOOL_CONFIG_STATS,
 		    (uint64_t **)&vs, &c) == 0);
@@ -147,10 +147,10 @@ find_vdev_problem(nvlist_t *vdev, int (*func)(uint64_t, uint64_t, uint64_t))
 		    vs->vs_read_errors +
 		    vs->vs_write_errors +
 		    vs->vs_checksum_errors))
-			return (TRUE);
+			return (B_TRUE);
 	}
 
-	return (FALSE);
+	return (B_FALSE);
 }
 
 /*
@@ -171,7 +171,7 @@ find_vdev_problem(nvlist_t *vdev, int (*func)(uint64_t, uint64_t, uint64_t))
  * only picks the most damaging of all the current errors to report.
  */
 static zpool_status_t
-check_status(nvlist_t *config, int isimport)
+check_status(nvlist_t *config, boolean_t isimport)
 {
 	nvlist_t *nvroot;
 	vdev_stat_t *vs;
@@ -265,7 +265,7 @@ check_status(nvlist_t *config, int isimport)
 zpool_status_t
 zpool_get_status(zpool_handle_t *zhp, char **msgid)
 {
-	zpool_status_t ret = check_status(zhp->zpool_config, FALSE);
+	zpool_status_t ret = check_status(zhp->zpool_config, B_FALSE);
 
 	if (ret >= NMSGID)
 		*msgid = NULL;
@@ -278,7 +278,7 @@ zpool_get_status(zpool_handle_t *zhp, char **msgid)
 zpool_status_t
 zpool_import_status(nvlist_t *config, char **msgid)
 {
-	zpool_status_t ret = check_status(config, TRUE);
+	zpool_status_t ret = check_status(config, B_TRUE);
 
 	if (ret >= NMSGID)
 		*msgid = NULL;
diff --git a/usr/src/lib/libzfs/common/libzfs_util.c b/usr/src/lib/libzfs/common/libzfs_util.c
index c7f7528491..29e99dc5b1 100644
--- a/usr/src/lib/libzfs/common/libzfs_util.c
+++ b/usr/src/lib/libzfs/common/libzfs_util.c
@@ -43,90 +43,320 @@
 
 #include "libzfs_impl.h"
 
-static int zfs_fd = -1;
-static FILE *mnttab_file;
-static FILE *sharetab_file;
-static int sharetab_opened;
+int
+libzfs_errno(libzfs_handle_t *hdl)
+{
+	return (hdl->libzfs_error);
+}
 
-void (*error_func)(const char *, va_list);
+const char *
+libzfs_error_action(libzfs_handle_t *hdl)
+{
+	return (hdl->libzfs_action);
+}
 
-/*
- * All error handling is kept within libzfs where we have the most information
- * immediately available.  While this may not be suitable for a general purpose
- * library, it greatly simplifies our commands.  This command name is used to
- * prefix all error messages appropriately.
- */
+const char *
+libzfs_error_description(libzfs_handle_t *hdl)
+{
+	if (hdl->libzfs_desc[0] != '\0')
+		return (hdl->libzfs_desc);
+
+	switch (hdl->libzfs_error) {
+	case EZFS_NOMEM:
+		return (dgettext(TEXT_DOMAIN, "out of memory"));
+	case EZFS_BADPROP:
+		return (dgettext(TEXT_DOMAIN, "invalid property value"));
+	case EZFS_PROPREADONLY:
+		return (dgettext(TEXT_DOMAIN, "read only property"));
+	case EZFS_PROPTYPE:
+		return (dgettext(TEXT_DOMAIN, "property doesn't apply to "
+		    "datasets of this type"));
+	case EZFS_PROPNONINHERIT:
+		return (dgettext(TEXT_DOMAIN, "property cannot be inherited"));
+	case EZFS_PROPSPACE:
+		return (dgettext(TEXT_DOMAIN, "invalid quota or reservation"));
+	case EZFS_BADTYPE:
+		return (dgettext(TEXT_DOMAIN, "operation not applicable to "
+		    "datasets of this type"));
+	case EZFS_BUSY:
+		return (dgettext(TEXT_DOMAIN, "pool or dataset is busy"));
+	case EZFS_EXISTS:
+		return (dgettext(TEXT_DOMAIN, "pool or dataset exists"));
+	case EZFS_NOENT:
+		return (dgettext(TEXT_DOMAIN, "no such pool or dataset"));
+	case EZFS_BADSTREAM:
+		return (dgettext(TEXT_DOMAIN, "invalid backup stream"));
+	case EZFS_DSREADONLY:
+		return (dgettext(TEXT_DOMAIN, "dataset is read only"));
+	case EZFS_VOLTOOBIG:
+		return (dgettext(TEXT_DOMAIN, "volume size exceeds limit for "
+		    "this system"));
+	case EZFS_VOLHASDATA:
+		return (dgettext(TEXT_DOMAIN, "volume has data"));
+	case EZFS_INVALIDNAME:
+		return (dgettext(TEXT_DOMAIN, "invalid name"));
+	case EZFS_BADRESTORE:
+		return (dgettext(TEXT_DOMAIN, "unable to restore to "
+		    "destination"));
+	case EZFS_BADBACKUP:
+		return (dgettext(TEXT_DOMAIN, "backup failed"));
+	case EZFS_BADTARGET:
+		return (dgettext(TEXT_DOMAIN, "invalid target vdev"));
+	case EZFS_NODEVICE:
+		return (dgettext(TEXT_DOMAIN, "no such device in pool"));
+	case EZFS_BADDEV:
+		return (dgettext(TEXT_DOMAIN, "invalid device"));
+	case EZFS_NOREPLICAS:
+		return (dgettext(TEXT_DOMAIN, "no valid replicas"));
+	case EZFS_RESILVERING:
+		return (dgettext(TEXT_DOMAIN, "currently resilvering"));
+	case EZFS_BADVERSION:
+		return (dgettext(TEXT_DOMAIN, "unsupported version"));
+	case EZFS_POOLUNAVAIL:
+		return (dgettext(TEXT_DOMAIN, "pool is unavailable"));
+	case EZFS_DEVOVERFLOW:
+		return (dgettext(TEXT_DOMAIN, "too many devices in one vdev"));
+	case EZFS_BADPATH:
+		return (dgettext(TEXT_DOMAIN, "must be an absolute path"));
+	case EZFS_CROSSTARGET:
+		return (dgettext(TEXT_DOMAIN, "operation crosses datasets or "
+		    "pools"));
+	case EZFS_ZONED:
+		return (dgettext(TEXT_DOMAIN, "dataset in use by local zone"));
+	case EZFS_MOUNTFAILED:
+		return (dgettext(TEXT_DOMAIN, "mount failed"));
+	case EZFS_UMOUNTFAILED:
+		return (dgettext(TEXT_DOMAIN, "umount failed"));
+	case EZFS_UNSHAREFAILED:
+		return (dgettext(TEXT_DOMAIN, "unshare(1M) failed"));
+	case EZFS_SHAREFAILED:
+		return (dgettext(TEXT_DOMAIN, "share(1M) failed"));
+	case EZFS_DEVLINKS:
+		return (dgettext(TEXT_DOMAIN, "failed to create /dev links"));
+	case EZFS_PERM:
+		return (dgettext(TEXT_DOMAIN, "permission denied"));
+	case EZFS_NOSPC:
+		return (dgettext(TEXT_DOMAIN, "out of space"));
+	case EZFS_IO:
+		return (dgettext(TEXT_DOMAIN, "I/O error"));
+	case EZFS_INTR:
+		return (dgettext(TEXT_DOMAIN, "signal received"));
+	case EZFS_ISSPARE:
+		return (dgettext(TEXT_DOMAIN, "device is reserved as a hot "
+		    "spare"));
+	case EZFS_INVALCONFIG:
+		return (dgettext(TEXT_DOMAIN, "invalid vdev configuration"));
+	case EZFS_UNKNOWN:
+		return (dgettext(TEXT_DOMAIN, "unknown error"));
+	default:
+		abort();
+	}
+
+	/* NOTREACHED */
+}
+
+/*PRINTFLIKE2*/
 void
-zfs_error(const char *fmt, ...)
+zfs_error_aux(libzfs_handle_t *hdl, const char *fmt, ...)
 {
 	va_list ap;
 
 	va_start(ap, fmt);
 
-	if (error_func != NULL) {
-		error_func(fmt, ap);
-	} else {
-		(void) vfprintf(stderr, fmt, ap);
-		(void) fprintf(stderr, "\n");
+	(void) vsnprintf(hdl->libzfs_desc, sizeof (hdl->libzfs_desc),
+	    fmt, ap);
+	hdl->libzfs_desc_active = 1;
+
+	va_end(ap);
+}
+
+static void
+zfs_verror(libzfs_handle_t *hdl, int error, const char *fmt, va_list ap)
+{
+	(void) vsnprintf(hdl->libzfs_action, sizeof (hdl->libzfs_action),
+	    fmt, ap);
+	hdl->libzfs_error = error;
+
+	if (hdl->libzfs_desc_active)
+		hdl->libzfs_desc_active = 0;
+	else
+		hdl->libzfs_desc[0] = '\0';
+
+	if (hdl->libzfs_printerr) {
+		if (error == EZFS_UNKNOWN) {
+			(void) fprintf(stderr, dgettext(TEXT_DOMAIN, "internal "
+			    "error: %s\n"), libzfs_error_description(hdl));
+			abort();
+		}
+
+		(void) fprintf(stderr, "%s: %s\n", hdl->libzfs_action,
+		    libzfs_error_description(hdl));
+		if (error == EZFS_NOMEM)
+			exit(1);
 	}
+}
+
+/*PRINTFLIKE3*/
+int
+zfs_error(libzfs_handle_t *hdl, int error, const char *fmt, ...)
+{
+	va_list ap;
+
+	va_start(ap, fmt);
+
+	zfs_verror(hdl, error, fmt, ap);
 
 	va_end(ap);
+
+	return (-1);
 }
 
-/*
- * An internal error is something that we cannot recover from, and should never
- * happen (such as running out of memory).  It should only be used in
- * exceptional circumstances.
- */
-void
-zfs_fatal(const char *fmt, ...)
+static int
+zfs_common_error(libzfs_handle_t *hdl, int error, const char *fmt,
+    va_list ap)
+{
+	switch (error) {
+	case EPERM:
+	case EACCES:
+		zfs_verror(hdl, EZFS_PERM, fmt, ap);
+		return (-1);
+
+	case EIO:
+		zfs_verror(hdl, EZFS_IO, fmt, ap);
+		return (-1);
+
+	case EINTR:
+		zfs_verror(hdl, EZFS_INTR, fmt, ap);
+		return (-1);
+	}
+
+	return (0);
+}
+
+/*PRINTFLIKE3*/
+int
+zfs_standard_error(libzfs_handle_t *hdl, int error, const char *fmt, ...)
 {
 	va_list ap;
 
 	va_start(ap, fmt);
 
-	if (error_func != NULL) {
-		error_func(fmt, ap);
-	} else {
-		(void) vfprintf(stderr, fmt, ap);
-		(void) fprintf(stderr, "\n");
+	if (zfs_common_error(hdl, error, fmt, ap) != 0) {
+		va_end(ap);
+		return (-1);
 	}
 
-	va_end(ap);
 
-	exit(1);
+	switch (error) {
+	case ENXIO:
+		zfs_verror(hdl, EZFS_IO, fmt, ap);
+		break;
+
+	case ENOENT:
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "dataset does not exist"));
+		zfs_verror(hdl, EZFS_NOENT, fmt, ap);
+		break;
+
+	case ENOSPC:
+	case EDQUOT:
+		zfs_verror(hdl, EZFS_NOSPC, fmt, ap);
+		return (-1);
+
+	case EEXIST:
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "dataset already exists"));
+		zfs_verror(hdl, EZFS_EXISTS, fmt, ap);
+		break;
+
+	case EBUSY:
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "dataset is busy"));
+		zfs_verror(hdl, EZFS_BUSY, fmt, ap);
+		break;
+
+	default:
+		zfs_error_aux(hdl, strerror(errno));
+		zfs_verror(hdl, EZFS_UNKNOWN, fmt, ap);
+		break;
+	}
+
+	va_end(ap);
+	return (-1);
 }
 
-/*
- * Consumers (such as the JNI interface) that need to capture error output can
- * override the default error handler using this function.
- */
-void
-zfs_set_error_handler(void (*func)(const char *, va_list))
+/*PRINTFLIKE3*/
+int
+zpool_standard_error(libzfs_handle_t *hdl, int error, const char *fmt, ...)
 {
-	error_func = func;
+	va_list ap;
+
+	va_start(ap, fmt);
+
+	if (zfs_common_error(hdl, error, fmt, ap) != 0) {
+		va_end(ap);
+		return (-1);
+	}
+
+	switch (error) {
+	case ENODEV:
+		zfs_verror(hdl, EZFS_NODEVICE, fmt, ap);
+		break;
+
+	case ENOENT:
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "no such pool"));
+		zfs_verror(hdl, EZFS_NOENT, fmt, ap);
+		break;
+
+	case EEXIST:
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "pool already exists"));
+		zfs_verror(hdl, EZFS_EXISTS, fmt, ap);
+		break;
+
+	case EBUSY:
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "pool is busy"));
+		zfs_verror(hdl, EZFS_EXISTS, fmt, ap);
+		break;
+
+	case ENXIO:
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "one or more devices is currently unavailable"));
+		zfs_verror(hdl, EZFS_BADDEV, fmt, ap);
+		break;
+
+	case ENAMETOOLONG:
+		zfs_verror(hdl, EZFS_DEVOVERFLOW, fmt, ap);
+		break;
+
+	default:
+		zfs_error_aux(hdl, strerror(error));
+		zfs_verror(hdl, EZFS_UNKNOWN, fmt, ap);
+	}
+
+	va_end(ap);
+	return (-1);
 }
 
 /*
  * Display an out of memory error message and abort the current program.
  */
-void
-no_memory(void)
+int
+no_memory(libzfs_handle_t *hdl)
 {
-	assert(errno == ENOMEM);
-	zfs_fatal(dgettext(TEXT_DOMAIN, "internal error: out of memory\n"));
+	return (zfs_error(hdl, EZFS_NOMEM, "internal error"));
 }
 
 /*
  * A safe form of malloc() which will die if the allocation fails.
  */
 void *
-zfs_malloc(size_t size)
+zfs_alloc(libzfs_handle_t *hdl, size_t size)
 {
 	void *data;
 
 	if ((data = calloc(1, size)) == NULL)
-		no_memory();
+		(void) no_memory(hdl);
 
 	return (data);
 }
@@ -135,69 +365,17 @@ zfs_malloc(size_t size)
  * A safe form of strdup() which will die if the allocation fails.
  */
 char *
-zfs_strdup(const char *str)
+zfs_strdup(libzfs_handle_t *hdl, const char *str)
 {
 	char *ret;
 
 	if ((ret = strdup(str)) == NULL)
-		no_memory();
+		(void) no_memory(hdl);
 
 	return (ret);
 }
 
 /*
- * Utility functions around common used files - /dev/zfs, /etc/mnttab, and
- * /etc/dfs/sharetab.
- */
-int
-zfs_ioctl(int cmd, zfs_cmd_t *zc)
-{
-	if (zfs_fd == -1 &&
-	    (zfs_fd = open(ZFS_DEV, O_RDWR)) < 0)
-		zfs_fatal(dgettext(TEXT_DOMAIN, "internal error: unable to "
-		    "open ZFS device\n"), MNTTAB);
-
-	return (ioctl(zfs_fd, cmd, zc));
-}
-
-FILE *
-zfs_mnttab(void)
-{
-	if (mnttab_file == NULL &&
-	    (mnttab_file = fopen(MNTTAB, "r")) == NULL)
-		zfs_fatal(dgettext(TEXT_DOMAIN, "internal error: unable to "
-		    "open %s\n"), MNTTAB);
-
-	return (mnttab_file);
-}
-
-FILE *
-zfs_sharetab(void)
-{
-	if (sharetab_opened)
-		return (sharetab_file);
-
-	sharetab_opened = TRUE;
-	return (sharetab_file = fopen("/etc/dfs/sharetab", "r"));
-}
-
-/*
- * Cleanup function for library.  Close any file descriptors that were
- * opened as part of the above functions.
- */
-#pragma fini(zfs_fini)
-void
-zfs_fini(void)
-{
-	if (zfs_fd != -1)
-		(void) close(zfs_fd);
-	if (sharetab_file)
-		(void) fclose(sharetab_file);
-	if (mnttab_file)
-		(void) fclose(mnttab_file);
-}
-
-/*
  * Convert a number to an appropriately human-readable output.
  */
 void
@@ -241,3 +419,58 @@ zfs_nicenum(uint64_t num, char *buf, size_t buflen)
 		}
 	}
 }
+
+void
+libzfs_print_on_error(libzfs_handle_t *hdl, boolean_t printerr)
+{
+	hdl->libzfs_printerr = printerr;
+}
+
+libzfs_handle_t *
+libzfs_init(void)
+{
+	libzfs_handle_t *hdl;
+
+	if ((hdl = calloc(sizeof (libzfs_handle_t), 1)) == NULL) {
+		return (NULL);
+	}
+
+	if ((hdl->libzfs_fd = open(ZFS_DEV, O_RDWR)) == NULL) {
+		free(hdl);
+		return (NULL);
+	}
+
+	if ((hdl->libzfs_mnttab = fopen(MNTTAB, "r")) == NULL) {
+		(void) close(hdl->libzfs_fd);
+		free(hdl);
+		return (NULL);
+	}
+
+	hdl->libzfs_sharetab = fopen("/etc/dfs/sharetab", "r");
+
+	return (hdl);
+}
+
+void
+libzfs_fini(libzfs_handle_t *hdl)
+{
+	(void) close(hdl->libzfs_fd);
+	if (hdl->libzfs_mnttab)
+		(void) fclose(hdl->libzfs_mnttab);
+	if (hdl->libzfs_sharetab)
+		(void) fclose(hdl->libzfs_sharetab);
+	namespace_clear(hdl);
+	free(hdl);
+}
+
+libzfs_handle_t *
+zpool_get_handle(zpool_handle_t *zhp)
+{
+	return (zhp->zpool_hdl);
+}
+
+libzfs_handle_t *
+zfs_get_handle(zfs_handle_t *zhp)
+{
+	return (zhp->zfs_hdl);
+}
diff --git a/usr/src/lib/libzfs/spec/libzfs.spec b/usr/src/lib/libzfs/spec/libzfs.spec
index 1789122711..6120603e18 100644
--- a/usr/src/lib/libzfs/spec/libzfs.spec
+++ b/usr/src/lib/libzfs/spec/libzfs.spec
@@ -24,6 +24,30 @@
 #
 #ident	"%Z%%M%	%I%	%E% SMI"
 
+function libzfs_fini
+version SUNWprivate_1.1
+end
+
+function libzfs_init
+version SUNWprivate_1.1
+end
+
+function libzfs_errno
+version SUNWprivate_1.1
+end
+
+function libzfs_error_action
+version SUNWprivate_1.1
+end
+
+function libzfs_error_description
+version SUNWprivate_1.1
+end
+
+function libzfs_print_on_error
+version SUNWprivate_1.1
+end
+
 function zfs_clone
 version SUNWprivate_1.1
 end
@@ -40,6 +64,10 @@ function zfs_destroy
 version SUNWprivate_1.1
 end
 
+function zfs_get_handle
+version SUNWprivate_1.1
+end
+
 function zfs_get_name
 version SUNWprivate_1.1
 end
@@ -104,6 +132,10 @@ function zfs_open
 version SUNWprivate_1.1
 end
 
+function zfs_promote
+version SUNWprivate_1.1
+end
+
 function zfs_prop_column_name
 version SUNWprivate_1.1
 end
@@ -188,10 +220,6 @@ function zfs_send
 version SUNWprivate_1.1
 end
 
-function zfs_set_error_handler
-version SUNWprivate_1.1
-end
-
 function zfs_share
 version SUNWprivate_1.1
 end
@@ -248,6 +276,10 @@ function zpool_export
 version SUNWprivate_1.1
 end
 
+function zpool_find_vdev
+version  SUNWprivate_1.1
+end
+
 function zpool_find_import
 version SUNWprivate_1.1
 end
@@ -264,6 +296,10 @@ function zpool_get_guid
 version SUNWprivate_1.1
 end
 
+function zpool_get_handle
+version SUNWprivate_1.1
+end
+
 function zpool_get_name
 version SUNWprivate_1.1
 end
@@ -288,6 +324,10 @@ function zpool_get_status
 version SUNWprivate_1.1
 end
 
+function zpool_get_version
+version SUNWprivate_1.1
+end
+
 function zpool_import
 version SUNWprivate_1.1
 end
@@ -352,6 +392,7 @@ function zpool_vdev_name
 version  SUNWprivate_1.1
 end
 
-function zpool_vdev_to_guid
-version  SUNWprivate_1.1
+function zpool_vdev_remove
+version SUNWprivate_1.1
 end
+
diff --git a/usr/src/lib/libzfs_jni/common/libzfs_jni_dataset.c b/usr/src/lib/libzfs_jni/common/libzfs_jni_dataset.c
index 64270f2cd7..2daeca32e2 100644
--- a/usr/src/lib/libzfs_jni/common/libzfs_jni_dataset.c
+++ b/usr/src/lib/libzfs_jni/common/libzfs_jni_dataset.c
@@ -574,7 +574,7 @@ is_fs_snapshot(zfs_handle_t *zhp)
 	zjni_get_dataset_from_snapshot(
 	    zfs_get_name(zhp), parent, sizeof (parent));
 
-	parent_zhp = zfs_open(parent, ZFS_TYPE_ANY);
+	parent_zhp = zfs_open(g_zfs, parent, ZFS_TYPE_ANY);
 	if (parent_zhp == NULL) {
 		return (-1);
 	}
@@ -606,7 +606,8 @@ zjni_create_add_Pool(zpool_handle_t *zphp, void *data)
 	zjni_Collection_t *list = ((zjni_ArrayCallbackData_t *)data)->list;
 
 	/* Get root fs for this pool -- may be NULL if pool is faulted */
-	zfs_handle_t *zhp = zfs_open(zpool_get_name(zphp), ZFS_TYPE_FILESYSTEM);
+	zfs_handle_t *zhp = zfs_open(g_zfs, zpool_get_name(zphp),
+	    ZFS_TYPE_FILESYSTEM);
 
 	jobject bean = create_PoolBean(env, zphp, zhp);
 
@@ -682,7 +683,7 @@ zjni_get_Datasets_below(JNIEnv *env, jstring parentUTF,
 		zjni_new_DatasetSet(env, list);
 
 		/* Retrieve parent dataset */
-		zhp = zfs_open(name, parent_typemask);
+		zhp = zfs_open(g_zfs, name, parent_typemask);
 
 		if (zhp != NULL) {
 			zjni_DatasetArrayCallbackData_t data = {0};
@@ -703,7 +704,7 @@ zjni_get_Datasets_below(JNIEnv *env, jstring parentUTF,
 		/* Parent is not a dataset -- see if it's a faulted pool */
 		if ((parent_typemask & ZFS_TYPE_FILESYSTEM) &&
 		    is_pool_name(name)) {
-			zpool_handle_t *zphp = zpool_open_canfail(name);
+			zpool_handle_t *zphp = zpool_open_canfail(g_zfs, name);
 
 			if (zphp != NULL) {
 				/* A faulted pool has no datasets */
@@ -750,7 +751,7 @@ zjni_get_Datasets_dependents(JNIEnv *env, jobjectArray paths)
 			const char *path =
 			    (*env)->GetStringUTFChars(env, pathUTF, NULL);
 
-			zfs_handle_t *zhp = zfs_open(path, ZFS_TYPE_ANY);
+			zfs_handle_t *zhp = zfs_open(g_zfs, path, ZFS_TYPE_ANY);
 			if (zhp != NULL) {
 				/* Add all dependents of this Dataset to list */
 				(void) zfs_iter_dependents(zhp,
@@ -762,7 +763,8 @@ zjni_get_Datasets_dependents(JNIEnv *env, jobjectArray paths)
 
 			/* Path is not a dataset - see if it's a faulted pool */
 			if (is_pool_name(path)) {
-				zpool_handle_t *zphp = zpool_open_canfail(path);
+				zpool_handle_t *zphp = zpool_open_canfail(g_zfs,
+				    path);
 
 				if (zphp != NULL) {
 					/*
@@ -795,10 +797,10 @@ zjni_get_Dataset(JNIEnv *env, jstring nameUTF, zfs_type_t typemask)
 {
 	jobject device = NULL;
 	const char *name = (*env)->GetStringUTFChars(env, nameUTF, NULL);
-	zfs_handle_t *zhp = zfs_open(name, typemask);
+	zfs_handle_t *zhp = zfs_open(g_zfs, name, typemask);
 
 	if ((typemask & ZFS_TYPE_FILESYSTEM) && is_pool_name(name)) {
-		zpool_handle_t *zphp = zpool_open_canfail(name);
+		zpool_handle_t *zphp = zpool_open_canfail(g_zfs, name);
 
 		if (zphp != NULL) {
 			device = create_PoolBean(env, zphp, zhp);
diff --git a/usr/src/lib/libzfs_jni/common/libzfs_jni_main.c b/usr/src/lib/libzfs_jni/common/libzfs_jni_main.c
index 34500684d3..a699ecd7ce 100644
--- a/usr/src/lib/libzfs_jni/common/libzfs_jni_main.c
+++ b/usr/src/lib/libzfs_jni/common/libzfs_jni_main.c
@@ -35,6 +35,8 @@
 #include "libzfs_jni_diskmgt.h"
 #include "libzfs_jni_disk.h"
 
+libzfs_handle_t *g_zfs;
+
 /*
  * Function prototypes
  */
@@ -46,14 +48,14 @@ static void init();
  * Static functions
  */
 
-char libzfs_err[1024];
+char libdskmgt_err[1024];
 static void
 handle_error(const char *fmt, va_list ap)
 {
 	/* Save the error message in case it's needed */
-	(void) vsnprintf(libzfs_err, sizeof (libzfs_err), fmt, ap);
+	(void) vsnprintf(libdskmgt_err, sizeof (libdskmgt_err), fmt, ap);
 #ifdef	DEBUG
-	(void) fprintf(stderr, "caught error: %s\n", libzfs_err);
+	(void) fprintf(stderr, "caught error: %s\n", libdskmgt_err);
 #endif
 }
 
@@ -64,10 +66,8 @@ handle_error(const char *fmt, va_list ap)
 static void
 init()
 {
-	libzfs_err[0] = '\0';
-
-	/* libzfs error handler */
-	zfs_set_error_handler(handle_error);
+	if ((g_zfs = libzfs_init()) == NULL)
+		abort();
 
 	/* diskmgt.o error handler */
 	dmgt_set_error_handler(handle_error);
@@ -151,7 +151,7 @@ Java_com_sun_zfs_common_model_SystemDataModel_getPools(JNIEnv *env, jobject obj)
 	data.env = env;
 	data.list = (zjni_Collection_t *)list;
 
-	result = zpool_iter(zjni_create_add_Pool, &data);
+	result = zpool_iter(g_zfs, zjni_create_add_Pool, &data);
 	if (result && (*env)->ExceptionOccurred(env) != NULL) {
 		/* Must not call any more Java methods to preserve exception */
 		return (NULL);
@@ -334,7 +334,7 @@ Java_com_sun_zfs_common_model_SystemDataModel_getVirtualDevice(JNIEnv *env,
 	if (poolUTF != NULL) {
 		const char *pool = (*env)->GetStringUTFChars(env, poolUTF,
 		    NULL);
-		zpool_handle_t *zhp = zpool_open_canfail(pool);
+		zpool_handle_t *zhp = zpool_open_canfail(g_zfs, pool);
 		(*env)->ReleaseStringUTFChars(env, poolUTF, pool);
 
 		if (zhp != NULL) {
@@ -371,7 +371,7 @@ Java_com_sun_zfs_common_model_SystemDataModel_getVirtualDevices__Ljava_lang_Stri
 	if (poolUTF != NULL) {
 		const char *pool = (*env)->GetStringUTFChars(env, poolUTF,
 		    NULL);
-		zpool_handle_t *zhp = zpool_open_canfail(pool);
+		zpool_handle_t *zhp = zpool_open_canfail(g_zfs, pool);
 		(*env)->ReleaseStringUTFChars(env, poolUTF, pool);
 
 		/* Is the pool valid? */
@@ -408,7 +408,7 @@ Java_com_sun_zfs_common_model_SystemDataModel_getVirtualDevices__Ljava_lang_Stri
 	if (poolUTF != NULL) {
 		const char *pool = (*env)->GetStringUTFChars(env,
 		    poolUTF, NULL);
-		zpool_handle_t *zhp = zpool_open_canfail(pool);
+		zpool_handle_t *zhp = zpool_open_canfail(g_zfs, pool);
 		(*env)->ReleaseStringUTFChars(env, poolUTF, pool);
 
 		/* Is the pool valid? */
@@ -446,7 +446,7 @@ Java_com_sun_zfs_common_model_SystemDataModel_getAvailableDisks(JNIEnv *env,
 	error = dmgt_avail_disk_iter(zjni_create_add_DiskDevice, &data);
 
 	if (error) {
-		zjni_throw_exception(env, "%s", libzfs_err);
+		zjni_throw_exception(env, "%s", libdskmgt_err);
 	} else {
 		array = zjni_Collection_to_array(
 		    env, (zjni_Collection_t *)list,
diff --git a/usr/src/lib/libzfs_jni/common/libzfs_jni_pool.c b/usr/src/lib/libzfs_jni/common/libzfs_jni_pool.c
index d9d09804ec..0e228460dc 100644
--- a/usr/src/lib/libzfs_jni/common/libzfs_jni_pool.c
+++ b/usr/src/lib/libzfs_jni/common/libzfs_jni_pool.c
@@ -26,6 +26,7 @@
 
 #pragma ident	"%Z%%M%	%I%	%E% SMI"
 
+#include "libzfs_jni_util.h"
 #include "libzfs_jni_pool.h"
 #include <strings.h>
 
@@ -1110,7 +1111,7 @@ zjni_pool_status_to_obj(JNIEnv *env, zpool_status_t status)
 int
 zjni_ipool_iter(int argc, char **argv, zjni_ipool_iter_f func, void *data)
 {
-	nvlist_t *pools = zpool_find_import(argc, argv);
+	nvlist_t *pools = zpool_find_import(g_zfs, argc, argv);
 
 	if (pools != NULL) {
 		nvpair_t *elem = NULL;
diff --git a/usr/src/lib/libzfs_jni/common/libzfs_jni_util.h b/usr/src/lib/libzfs_jni/common/libzfs_jni_util.h
index 1b878a4977..b6989239ac 100644
--- a/usr/src/lib/libzfs_jni/common/libzfs_jni_util.h
+++ b/usr/src/lib/libzfs_jni/common/libzfs_jni_util.h
@@ -32,6 +32,7 @@
 #include <jni.h>
 #include <regex.h>
 #include <libnvpair.h>
+#include <libzfs.h>
 
 #ifdef __cplusplus
 extern "C" {
@@ -105,6 +106,8 @@ int zjni_count_elements(void **);
 nvpair_t *zjni_nvlist_walk_nvpair(
 	nvlist_t *, const char *, data_type_t, nvpair_t *);
 
+extern libzfs_handle_t *g_zfs;
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/usr/src/lib/libzpool/common/util.c b/usr/src/lib/libzpool/common/util.c
index 094c8b6c6f..df49adbc7a 100644
--- a/usr/src/lib/libzpool/common/util.c
+++ b/usr/src/lib/libzpool/common/util.c
@@ -111,11 +111,17 @@ show_vdev_stats(const char *desc, nvlist_t *nv, int indent)
 
 	for (c = 0; c < children; c++) {
 		nvlist_t *cnv = child[c];
-		char *cname;
+		char *cname, *tname;
+		uint64_t np;
 		if (nvlist_lookup_string(cnv, ZPOOL_CONFIG_PATH, &cname) &&
 		    nvlist_lookup_string(cnv, ZPOOL_CONFIG_TYPE, &cname))
 			cname = "<unknown>";
-		show_vdev_stats(cname, cnv, indent + 2);
+		tname = calloc(1, strlen(cname) + 2);
+		(void) strcpy(tname, cname);
+		if (nvlist_lookup_uint64(cnv, ZPOOL_CONFIG_NPARITY, &np) == 0)
+			tname[strlen(tname)] = '0' + np;
+		show_vdev_stats(tname, cnv, indent + 2);
+		free(tname);
 	}
 }
 
diff --git a/usr/src/pkgdefs/SUNWfmd/prototype_com b/usr/src/pkgdefs/SUNWfmd/prototype_com
index 01b0e4cea5..c99e4aca59 100644
--- a/usr/src/pkgdefs/SUNWfmd/prototype_com
+++ b/usr/src/pkgdefs/SUNWfmd/prototype_com
@@ -74,6 +74,8 @@ f none usr/lib/fm/fmd/plugins/syslog-msgs.conf 644 root bin
 f none usr/lib/fm/fmd/plugins/syslog-msgs.so 555 root bin
 f none usr/lib/fm/fmd/plugins/zfs-diagnosis.conf 644 root bin
 f none usr/lib/fm/fmd/plugins/zfs-diagnosis.so 555 root bin
+f none usr/lib/fm/fmd/plugins/zfs-retire.conf 644 root bin
+f none usr/lib/fm/fmd/plugins/zfs-retire.so 555 root bin
 d none usr/lib/fm/fmd/schemes 755 root bin
 f none usr/lib/fm/fmd/schemes/cpu.so 555 root bin
 f none usr/lib/fm/fmd/schemes/dev.so 555 root bin
diff --git a/usr/src/uts/common/fs/zfs/arc.c b/usr/src/uts/common/fs/zfs/arc.c
index ba07ea12d4..f979159f8a 100644
--- a/usr/src/uts/common/fs/zfs/arc.c
+++ b/usr/src/uts/common/fs/zfs/arc.c
@@ -356,8 +356,6 @@ buf_hash_find(spa_t *spa, dva_t *dva, uint64_t birth, kmutex_t **lockp)
  * will be returned and the new element will not be inserted.
  * Otherwise returns NULL.
  */
-static arc_buf_hdr_t *fbufs[4]; /* XXX to find 6341326 */
-static kthread_t *fbufs_lastthread;
 static arc_buf_hdr_t *
 buf_hash_insert(arc_buf_hdr_t *buf, kmutex_t **lockp)
 {
@@ -367,13 +365,10 @@ buf_hash_insert(arc_buf_hdr_t *buf, kmutex_t **lockp)
 	uint32_t max, i;
 
 	ASSERT(!HDR_IN_HASH_TABLE(buf));
-	fbufs_lastthread = curthread;
 	*lockp = hash_lock;
 	mutex_enter(hash_lock);
 	for (fbuf = buf_hash_table.ht_table[idx], i = 0; fbuf != NULL;
 	    fbuf = fbuf->b_hash_next, i++) {
-		if (i < sizeof (fbufs) / sizeof (fbufs[0]))
-			fbufs[i] = fbuf;
 		if (BUF_EQUAL(buf->b_spa, &buf->b_dva, buf->b_birth, fbuf))
 			return (fbuf);
 	}
diff --git a/usr/src/uts/common/fs/zfs/bplist.c b/usr/src/uts/common/fs/zfs/bplist.c
index db0d3534d6..4442b1f28a 100644
--- a/usr/src/uts/common/fs/zfs/bplist.c
+++ b/usr/src/uts/common/fs/zfs/bplist.c
@@ -45,12 +45,13 @@ bplist_hold(bplist_t *bpl)
 uint64_t
 bplist_create(objset_t *mos, int blocksize, dmu_tx_t *tx)
 {
-	uint64_t obj;
+	int size;
 
-	obj = dmu_object_alloc(mos, DMU_OT_BPLIST, blocksize,
-	    DMU_OT_BPLIST_HDR, sizeof (bplist_phys_t), tx);
+	size = spa_version(dmu_objset_spa(mos)) < ZFS_VERSION_BPLIST_ACCOUNT ?
+	    BPLIST_SIZE_V0 : sizeof (bplist_phys_t);
 
-	return (obj);
+	return (dmu_object_alloc(mos, DMU_OT_BPLIST, blocksize,
+	    DMU_OT_BPLIST_HDR, size, tx));
 }
 
 void
@@ -76,11 +77,14 @@ bplist_open(bplist_t *bpl, objset_t *mos, uint64_t object)
 	ASSERT(bpl->bpl_cached_dbuf == NULL);
 	ASSERT(bpl->bpl_queue == NULL);
 	ASSERT(object != 0);
+	ASSERT3U(doi.doi_type, ==, DMU_OT_BPLIST);
+	ASSERT3U(doi.doi_bonus_type, ==, DMU_OT_BPLIST_HDR);
 
 	bpl->bpl_mos = mos;
 	bpl->bpl_object = object;
 	bpl->bpl_blockshift = highbit(doi.doi_data_block_size - 1);
 	bpl->bpl_bpshift = bpl->bpl_blockshift - SPA_BLKPTRSHIFT;
+	bpl->bpl_havecomp = (doi.doi_bonus_size == sizeof (bplist_phys_t));
 
 	mutex_exit(&bpl->bpl_lock);
 	return (0);
@@ -210,7 +214,12 @@ bplist_enqueue(bplist_t *bpl, blkptr_t *bp, dmu_tx_t *tx)
 
 	dmu_buf_will_dirty(bpl->bpl_dbuf, tx);
 	bpl->bpl_phys->bpl_entries++;
-	bpl->bpl_phys->bpl_bytes += BP_GET_ASIZE(bp);
+	bpl->bpl_phys->bpl_bytes +=
+	    bp_get_dasize(dmu_objset_spa(bpl->bpl_mos), bp);
+	if (bpl->bpl_havecomp) {
+		bpl->bpl_phys->bpl_comp += BP_GET_PSIZE(bp);
+		bpl->bpl_phys->bpl_uncomp += BP_GET_UCSIZE(bp);
+	}
 	mutex_exit(&bpl->bpl_lock);
 
 	return (0);
@@ -259,5 +268,45 @@ bplist_vacate(bplist_t *bpl, dmu_tx_t *tx)
 	    bpl->bpl_object, 0, -1ULL, tx));
 	bpl->bpl_phys->bpl_entries = 0;
 	bpl->bpl_phys->bpl_bytes = 0;
+	if (bpl->bpl_havecomp) {
+		bpl->bpl_phys->bpl_comp = 0;
+		bpl->bpl_phys->bpl_uncomp = 0;
+	}
+	mutex_exit(&bpl->bpl_lock);
+}
+
+int
+bplist_space(bplist_t *bpl, uint64_t *usedp, uint64_t *compp, uint64_t *uncompp)
+{
+	uint64_t itor = 0, comp = 0, uncomp = 0;
+	int err;
+	blkptr_t bp;
+
+	mutex_enter(&bpl->bpl_lock);
+
+	err = bplist_hold(bpl);
+	if (err) {
+		mutex_exit(&bpl->bpl_lock);
+		return (err);
+	}
+
+	*usedp = bpl->bpl_phys->bpl_bytes;
+	if (bpl->bpl_havecomp) {
+		*compp = bpl->bpl_phys->bpl_comp;
+		*uncompp = bpl->bpl_phys->bpl_uncomp;
+	}
 	mutex_exit(&bpl->bpl_lock);
+
+	if (!bpl->bpl_havecomp) {
+		while ((err = bplist_iterate(bpl, &itor, &bp)) == 0) {
+			comp += BP_GET_PSIZE(&bp);
+			uncomp += BP_GET_UCSIZE(&bp);
+		}
+		if (err == ENOENT)
+			err = 0;
+		*compp = comp;
+		*uncompp = uncomp;
+	}
+
+	return (err);
 }
diff --git a/usr/src/uts/common/fs/zfs/dbuf.c b/usr/src/uts/common/fs/zfs/dbuf.c
index 2135427b7a..e78f49c4f9 100644
--- a/usr/src/uts/common/fs/zfs/dbuf.c
+++ b/usr/src/uts/common/fs/zfs/dbuf.c
@@ -1029,7 +1029,7 @@ dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
 			 * it's OK if we get an odd answer.
 			 */
 			dnode_willuse_space(dn,
-			    -BP_GET_ASIZE(db->db_blkptr), tx);
+			    -bp_get_dasize(os->os_spa, db->db_blkptr), tx);
 		}
 		dnode_willuse_space(dn, db->db.db_size, tx);
 	}
@@ -1951,8 +1951,8 @@ dbuf_sync(dmu_buf_impl_t *db, zio_t *zio, dmu_tx_t *tx)
 		arc_buf_t **old =
 		    (arc_buf_t **)&db->db_d.db_data_old[txg&TXG_MASK];
 		blkptr_t **bpp = &db->db_d.db_overridden_by[txg&TXG_MASK];
-		int old_size = BP_GET_ASIZE(db->db_blkptr);
-		int new_size = BP_GET_ASIZE(*bpp);
+		int old_size = bp_get_dasize(os->os_spa, db->db_blkptr);
+		int new_size = bp_get_dasize(os->os_spa, *bpp);
 
 		ASSERT(db->db_blkid != DB_BONUS_BLKID);
 
@@ -2078,8 +2078,8 @@ dbuf_write_done(zio_t *zio, arc_buf_t *buf, void *vdb)
 
 	dprintf_dbuf_bp(db, &zio->io_bp_orig, "bp_orig: %s", "");
 
-	old_size = BP_GET_ASIZE(&zio->io_bp_orig);
-	new_size = BP_GET_ASIZE(zio->io_bp);
+	old_size = bp_get_dasize(os->os_spa, &zio->io_bp_orig);
+	new_size = bp_get_dasize(os->os_spa, zio->io_bp);
 
 	dnode_diduse_space(dn, new_size-old_size);
 
diff --git a/usr/src/uts/common/fs/zfs/dmu.c b/usr/src/uts/common/fs/zfs/dmu.c
index 52c8413c9a..77886f5e24 100644
--- a/usr/src/uts/common/fs/zfs/dmu.c
+++ b/usr/src/uts/common/fs/zfs/dmu.c
@@ -789,7 +789,7 @@ replay_incremental_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx)
 	/* The point of no (unsuccessful) return. */
 
 	dmu_buf_will_dirty(ds->ds_dbuf, tx);
-	ds->ds_phys->ds_inconsistent = TRUE;
+	ds->ds_phys->ds_flags |= DS_FLAG_INCONSISTENT;
 
 	dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG);
 	return (0);
@@ -841,7 +841,7 @@ replay_full_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx)
 	    ds, drrb->drr_type, tx);
 
 	dmu_buf_will_dirty(ds->ds_dbuf, tx);
-	ds->ds_phys->ds_inconsistent = TRUE;
+	ds->ds_phys->ds_flags |= DS_FLAG_INCONSISTENT;
 
 	dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG);
 	return (0);
@@ -875,7 +875,7 @@ replay_end_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx)
 	dmu_buf_will_dirty(ds->ds_dbuf, tx);
 	ds->ds_phys->ds_creation_time = drrb->drr_creation_time;
 	ds->ds_phys->ds_guid = drrb->drr_toguid;
-	ds->ds_phys->ds_inconsistent = FALSE;
+	ds->ds_phys->ds_flags &= ~DS_FLAG_INCONSISTENT;
 
 	dsl_dataset_close(ds, DS_MODE_PRIMARY, FTAG);
 
@@ -883,7 +883,7 @@ replay_end_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx)
 	    dd->dd_phys->dd_head_dataset_obj,
 	    NULL, DS_MODE_STANDARD | DS_MODE_INCONSISTENT, FTAG, &ds));
 	dmu_buf_will_dirty(ds->ds_dbuf, tx);
-	ds->ds_phys->ds_inconsistent = FALSE;
+	ds->ds_phys->ds_flags &= ~DS_FLAG_INCONSISTENT;
 	dsl_dataset_close(ds, DS_MODE_STANDARD, FTAG);
 
 	return (0);
@@ -1686,7 +1686,8 @@ dmu_object_info_from_dnode(dnode_t *dn, dmu_object_info_t *doi)
 	doi->doi_indirection = dn->dn_nlevels;
 	doi->doi_checksum = dn->dn_checksum;
 	doi->doi_compress = dn->dn_compress;
-	doi->doi_physical_blks = dn->dn_phys->dn_secphys;
+	doi->doi_physical_blks = (DN_USED_BYTES(dn->dn_phys) +
+	    SPA_MINBLOCKSIZE/2) >> SPA_MINBLOCKSHIFT;
 	doi->doi_max_block_offset = dn->dn_phys->dn_maxblkid;
 	doi->doi_type = dn->dn_type;
 	doi->doi_bonus_size = dn->dn_bonuslen;
@@ -1735,7 +1736,9 @@ dmu_object_size_from_db(dmu_buf_t *db, uint32_t *blksize, u_longlong_t *nblk512)
 	dnode_t *dn = ((dmu_buf_impl_t *)db)->db_dnode;
 
 	*blksize = dn->dn_datablksz;
-	*nblk512 = dn->dn_phys->dn_secphys + 1;	/* add 1 for dnode space */
+	/* add 1 for dnode space */
+	*nblk512 = ((DN_USED_BYTES(dn->dn_phys) + SPA_MINBLOCKSIZE/2) >>
+	    SPA_MINBLOCKSHIFT) + 1;
 }
 
 /*
diff --git a/usr/src/uts/common/fs/zfs/dmu_objset.c b/usr/src/uts/common/fs/zfs/dmu_objset.c
index 248612e3cc..3d5f1f7b5c 100644
--- a/usr/src/uts/common/fs/zfs/dmu_objset.c
+++ b/usr/src/uts/common/fs/zfs/dmu_objset.c
@@ -164,9 +164,10 @@ dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
 	/*
 	 * Note: the changed_cb will be called once before the register
 	 * func returns, thus changing the checksum/compression from the
-	 * default (fletcher2/off).
+	 * default (fletcher2/off).  Snapshots don't need to know, and
+	 * registering would complicate clone promotion.
 	 */
-	if (ds) {
+	if (ds && ds->ds_phys->ds_num_children == 0) {
 		err = dsl_prop_register(ds, "checksum",
 		    checksum_changed_cb, osi);
 		if (err == 0)
@@ -177,7 +178,7 @@ dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
 			kmem_free(osi, sizeof (objset_impl_t));
 			return (err);
 		}
-	} else {
+	} else if (ds == NULL) {
 		/* It's the meta-objset. */
 		osi->os_checksum = ZIO_CHECKSUM_FLETCHER_4;
 		osi->os_compress = ZIO_COMPRESS_LZJB;
@@ -329,21 +330,18 @@ dmu_objset_evict(dsl_dataset_t *ds, void *arg)
 {
 	objset_impl_t *osi = arg;
 	objset_t os;
-	int err, i;
+	int i;
 
 	for (i = 0; i < TXG_SIZE; i++) {
 		ASSERT(list_head(&osi->os_dirty_dnodes[i]) == NULL);
 		ASSERT(list_head(&osi->os_free_dnodes[i]) == NULL);
 	}
 
-	if (ds) {
-		err = dsl_prop_unregister(ds, "checksum",
-		    checksum_changed_cb, osi);
-		ASSERT(err == 0);
-
-		err = dsl_prop_unregister(ds, "compression",
-		    compression_changed_cb, osi);
-		ASSERT(err == 0);
+	if (ds && ds->ds_phys->ds_num_children == 0) {
+		VERIFY(0 == dsl_prop_unregister(ds, "checksum",
+		    checksum_changed_cb, osi));
+		VERIFY(0 == dsl_prop_unregister(ds, "compression",
+		    compression_changed_cb, osi));
 	}
 
 	/*
diff --git a/usr/src/uts/common/fs/zfs/dmu_tx.c b/usr/src/uts/common/fs/zfs/dmu_tx.c
index 91ee5c5062..1b4a0c2bd0 100644
--- a/usr/src/uts/common/fs/zfs/dmu_tx.c
+++ b/usr/src/uts/common/fs/zfs/dmu_tx.c
@@ -181,8 +181,9 @@ dmu_tx_count_write(dmu_tx_t *tx, dnode_t *dn, uint64_t off, uint64_t len)
 
 	/*
 	 * For i/o error checking, read the first and last level-0
-	 * blocks, and all the level-1 blocks.  We needn't do this on
-	 * the meta-dnode, because we've already read it in.
+	 * blocks (if they are not aligned), and all the level-1 blocks.
+	 * We needn't do this on the meta-dnode, because we've already
+	 * read it in.
 	 */
 
 	if (dn && dn->dn_object != DMU_META_DNODE_OBJECT) {
@@ -199,16 +200,20 @@ dmu_tx_count_write(dmu_tx_t *tx, dnode_t *dn, uint64_t off, uint64_t len)
 			    NULL, NULL, ZIO_FLAG_CANFAIL);
 
 			/* first level-0 block */
-			start = off/dn->dn_datablksz;
-			err = dmu_tx_check_ioerr(zio, dn, 0, start);
-			if (err) {
-				tx->tx_err = err;
-				return;
+			start = off >> dn->dn_datablkshift;
+			if (P2PHASE(off, dn->dn_datablksz) ||
+			    len < dn->dn_datablksz) {
+				err = dmu_tx_check_ioerr(zio, dn, 0, start);
+				if (err) {
+					tx->tx_err = err;
+					return;
+				}
 			}
 
 			/* last level-0 block */
-			end = (off+len)/dn->dn_datablksz;
-			if (end != start) {
+			end = (off+len-1) >> dn->dn_datablkshift;
+			if (end != start &&
+			    P2PHASE(off+len, dn->dn_datablksz)) {
 				err = dmu_tx_check_ioerr(zio, dn, 0, end);
 				if (err) {
 					tx->tx_err = err;
@@ -330,6 +335,7 @@ dmu_tx_count_free(dmu_tx_t *tx, dnode_t *dn, uint64_t off, uint64_t len)
 	uint64_t blkid, nblks;
 	uint64_t space = 0;
 	dsl_dataset_t *ds = dn->dn_objset->os_dsl_dataset;
+	spa_t *spa = tx->tx_pool->dp_spa;
 	int dirty;
 
 	/*
@@ -388,7 +394,7 @@ dmu_tx_count_free(dmu_tx_t *tx, dnode_t *dn, uint64_t off, uint64_t len)
 			bp += blkid + i;
 			if (dsl_dataset_block_freeable(ds, bp->blk_birth)) {
 				dprintf_bp(bp, "can free old%s", "");
-				space += BP_GET_ASIZE(bp);
+				space += bp_get_dasize(spa, bp);
 			}
 		}
 		nblks = 0;
@@ -423,7 +429,7 @@ dmu_tx_count_free(dmu_tx_t *tx, dnode_t *dn, uint64_t off, uint64_t len)
 				    bp[i].blk_birth)) {
 					dprintf_bp(&bp[i],
 					    "can free old%s", "");
-					space += BP_GET_ASIZE(&bp[i]);
+					space += bp_get_dasize(spa, &bp[i]);
 				}
 			}
 			dbuf_rele(dbuf, FTAG);
diff --git a/usr/src/uts/common/fs/zfs/dnode.c b/usr/src/uts/common/fs/zfs/dnode.c
index 6de40f5081..43f1d4f135 100644
--- a/usr/src/uts/common/fs/zfs/dnode.c
+++ b/usr/src/uts/common/fs/zfs/dnode.c
@@ -177,17 +177,10 @@ dnode_byteswap(dnode_phys_t *dnp)
 		return;
 	}
 
-	dnp->dn_type = BSWAP_8(dnp->dn_type);
-	dnp->dn_indblkshift = BSWAP_8(dnp->dn_indblkshift);
-	dnp->dn_nlevels = BSWAP_8(dnp->dn_nlevels);
-	dnp->dn_nblkptr = BSWAP_8(dnp->dn_nblkptr);
-	dnp->dn_bonustype = BSWAP_8(dnp->dn_bonustype);
-	dnp->dn_checksum = BSWAP_8(dnp->dn_checksum);
-	dnp->dn_compress = BSWAP_8(dnp->dn_compress);
 	dnp->dn_datablkszsec = BSWAP_16(dnp->dn_datablkszsec);
 	dnp->dn_bonuslen = BSWAP_16(dnp->dn_bonuslen);
 	dnp->dn_maxblkid = BSWAP_64(dnp->dn_maxblkid);
-	dnp->dn_secphys = BSWAP_64(dnp->dn_secphys);
+	dnp->dn_used = BSWAP_64(dnp->dn_used);
 
 	/*
 	 * dn_nblkptr is only one byte, so it's OK to read it in either
@@ -1110,27 +1103,29 @@ dnode_block_freed(dnode_t *dn, uint64_t blkid)
 
 /* call from syncing context when we actually write/free space for this dnode */
 void
-dnode_diduse_space(dnode_t *dn, int64_t space)
+dnode_diduse_space(dnode_t *dn, int64_t delta)
 {
-	uint64_t sectors;
-
-	dprintf_dnode(dn, "dn=%p dnp=%p secphys=%llu space=%lld\n",
+	uint64_t space;
+	dprintf_dnode(dn, "dn=%p dnp=%p used=%llu delta=%lld\n",
 	    dn, dn->dn_phys,
-	    (u_longlong_t)dn->dn_phys->dn_secphys,
-	    (longlong_t)space);
-
-	ASSERT(P2PHASE(space, 1<<DEV_BSHIFT) == 0);
+	    (u_longlong_t)dn->dn_phys->dn_used,
+	    (longlong_t)delta);
 
 	mutex_enter(&dn->dn_mtx);
-	if (space > 0) {
-		sectors = space >> DEV_BSHIFT;
-		ASSERT3U(dn->dn_phys->dn_secphys + sectors, >=,
-		    dn->dn_phys->dn_secphys);
-		dn->dn_phys->dn_secphys += sectors;
+	space = DN_USED_BYTES(dn->dn_phys);
+	if (delta > 0) {
+		ASSERT3U(space + delta, >=, space); /* no overflow */
+	} else {
+		ASSERT3U(space, >=, -delta); /* no underflow */
+	}
+	space += delta;
+	if (spa_version(dn->dn_objset->os_spa) < ZFS_VERSION_DNODE_BYTES) {
+		ASSERT((dn->dn_phys->dn_flags & DNODE_FLAG_USED_BYTES) == 0);
+		ASSERT3U(P2PHASE(space, 1<<DEV_BSHIFT), ==, 0);
+		dn->dn_phys->dn_used = space >> DEV_BSHIFT;
 	} else {
-		sectors = -space >> DEV_BSHIFT;
-		ASSERT3U(dn->dn_phys->dn_secphys, >=, sectors);
-		dn->dn_phys->dn_secphys -= sectors;
+		dn->dn_phys->dn_used = space;
+		dn->dn_phys->dn_flags |= DNODE_FLAG_USED_BYTES;
 	}
 	mutex_exit(&dn->dn_mtx);
 }
diff --git a/usr/src/uts/common/fs/zfs/dnode_sync.c b/usr/src/uts/common/fs/zfs/dnode_sync.c
index 80ac38c86a..5bb538980e 100644
--- a/usr/src/uts/common/fs/zfs/dnode_sync.c
+++ b/usr/src/uts/common/fs/zfs/dnode_sync.c
@@ -119,8 +119,8 @@ free_blocks(dnode_t *dn, blkptr_t *bp, int num, dmu_tx_t *tx)
 		if (BP_IS_HOLE(bp))
 			continue;
 
-		bytesfreed += BP_GET_ASIZE(bp);
-		ASSERT3U(bytesfreed >> DEV_BSHIFT, <=, dn->dn_phys->dn_secphys);
+		bytesfreed += bp_get_dasize(os->os_spa, bp);
+		ASSERT3U(bytesfreed, <=, DN_USED_BYTES(dn->dn_phys));
 		dsl_dataset_block_kill(os->os_dsl_dataset, bp, tx);
 	}
 	dnode_diduse_space(dn, -bytesfreed);
@@ -457,7 +457,7 @@ dnode_sync_free(dnode_t *dn, dmu_tx_t *tx)
 
 	/* free up all the blocks in the file. */
 	dnode_sync_free_range(dn, 0, dn->dn_phys->dn_maxblkid+1, tx);
-	ASSERT3U(dn->dn_phys->dn_secphys, ==, 0);
+	ASSERT3U(DN_USED_BYTES(dn->dn_phys), ==, 0);
 
 	/* ASSERT(blkptrs are zero); */
 	ASSERT(dn->dn_phys->dn_type != DMU_OT_NONE);
diff --git a/usr/src/uts/common/fs/zfs/dsl_dataset.c b/usr/src/uts/common/fs/zfs/dsl_dataset.c
index 5b1de1b4b8..a199aec8de 100644
--- a/usr/src/uts/common/fs/zfs/dsl_dataset.c
+++ b/usr/src/uts/common/fs/zfs/dsl_dataset.c
@@ -28,6 +28,7 @@
 #include <sys/dmu_objset.h>
 #include <sys/dsl_dataset.h>
 #include <sys/dsl_dir.h>
+#include <sys/dsl_prop.h>
 #include <sys/dmu_traverse.h>
 #include <sys/dmu_tx.h>
 #include <sys/arc.h>
@@ -43,10 +44,6 @@ static int dsl_dataset_destroy_begin_sync(dsl_dir_t *dd,
 
 #define	DSL_DEADLIST_BLOCKSIZE	SPA_MAXBLOCKSIZE
 
-#define	BP_GET_UCSIZE(bp) \
-	((BP_GET_LEVEL(bp) > 0 || dmu_ot[BP_GET_TYPE(bp)].ot_metadata) ? \
-	BP_GET_PSIZE(bp) : BP_GET_LSIZE(bp));
-
 /*
  * We use weighted reference counts to express the various forms of exclusion
  * between different open modes.  A STANDARD open is 1 point, an EXCLUSIVE open
@@ -68,7 +65,7 @@ static uint64_t ds_refcnt_weight[DS_MODE_LEVELS] = {
 void
 dsl_dataset_block_born(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx)
 {
-	int used = BP_GET_ASIZE(bp);
+	int used = bp_get_dasize(tx->tx_pool->dp_spa, bp);
 	int compressed = BP_GET_PSIZE(bp);
 	int uncompressed = BP_GET_UCSIZE(bp);
 
@@ -105,7 +102,7 @@ dsl_dataset_block_born(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx)
 void
 dsl_dataset_block_kill(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx)
 {
-	int used = BP_GET_ASIZE(bp);
+	int used = bp_get_dasize(tx->tx_pool->dp_spa, bp);
 	int compressed = BP_GET_PSIZE(bp);
 	int uncompressed = BP_GET_UCSIZE(bp);
 
@@ -155,8 +152,7 @@ dsl_dataset_block_kill(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx)
 			    ds->ds_phys->ds_prev_snap_obj);
 			ASSERT(ds->ds_prev->ds_phys->ds_num_children > 0);
 			if (ds->ds_prev->ds_phys->ds_next_snap_obj ==
-			    ds->ds_object &&
-			    bp->blk_birth >
+			    ds->ds_object && bp->blk_birth >
 			    ds->ds_prev->ds_phys->ds_prev_snap_txg) {
 				dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx);
 				mutex_enter(&ds->ds_prev->ds_lock);
@@ -373,7 +369,8 @@ dsl_dataset_open_obj(dsl_pool_t *dp, uint64_t dsobj, const char *snapname,
 
 	mutex_enter(&ds->ds_lock);
 	if ((DS_MODE_LEVEL(mode) == DS_MODE_PRIMARY &&
-	    ds->ds_phys->ds_inconsistent && !DS_MODE_IS_INCONSISTENT(mode)) ||
+	    (ds->ds_phys->ds_flags & DS_FLAG_INCONSISTENT) &&
+	    !DS_MODE_IS_INCONSISTENT(mode)) ||
 	    (ds->ds_open_refcount + weight > DOS_REF_MAX)) {
 		mutex_exit(&ds->ds_lock);
 		dsl_dataset_close(ds, DS_MODE_NONE, tag);
@@ -842,7 +839,7 @@ kill_blkptr(traverse_blk_cache_t *bc, spa_t *spa, void *arg)
 	 * Since this callback is not called concurrently, no lock is
 	 * needed on the accounting values.
 	 */
-	*ka->usedp += BP_GET_ASIZE(bp);
+	*ka->usedp += bp_get_dasize(spa, bp);
 	*ka->compressedp += BP_GET_PSIZE(bp);
 	*ka->uncompressedp += BP_GET_UCSIZE(bp);
 	/* XXX check for EIO? */
@@ -939,7 +936,7 @@ dsl_dataset_rollback_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx)
 	    ds->ds_prev->ds_phys->ds_compressed_bytes;
 	ds->ds_phys->ds_uncompressed_bytes =
 	    ds->ds_prev->ds_phys->ds_uncompressed_bytes;
-	ds->ds_phys->ds_inconsistent = ds->ds_prev->ds_phys->ds_inconsistent;
+	ds->ds_phys->ds_flags = ds->ds_prev->ds_phys->ds_flags;
 	ds->ds_phys->ds_unique_bytes = 0;
 
 	dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx);
@@ -969,7 +966,7 @@ dsl_dataset_destroy_begin_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx)
 
 	/* Mark it as inconsistent on-disk, in case we crash */
 	dmu_buf_will_dirty(ds->ds_dbuf, tx);
-	ds->ds_phys->ds_inconsistent = TRUE;
+	ds->ds_phys->ds_flags |= DS_FLAG_INCONSISTENT;
 
 	return (0);
 }
@@ -1120,10 +1117,10 @@ dsl_dataset_destroy_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx)
 				    bp.blk_birth >
 				    ds_prev->ds_phys->ds_prev_snap_txg) {
 					ds_prev->ds_phys->ds_unique_bytes +=
-					    BP_GET_ASIZE(&bp);
+					    bp_get_dasize(dp->dp_spa, &bp);
 				}
 			} else {
-				used += BP_GET_ASIZE(&bp);
+				used += bp_get_dasize(dp->dp_spa, &bp);
 				compressed += BP_GET_PSIZE(&bp);
 				uncompressed += BP_GET_UCSIZE(&bp);
 				/* XXX check return value? */
@@ -1169,7 +1166,7 @@ dsl_dataset_destroy_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx)
 				    bp.blk_birth <=
 				    ds->ds_phys->ds_creation_txg) {
 					ds_next->ds_phys->ds_unique_bytes +=
-					    BP_GET_ASIZE(&bp);
+					    bp_get_dasize(dp->dp_spa, &bp);
 				}
 			}
 
@@ -1347,7 +1344,7 @@ dsl_dataset_snapshot_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx)
 	dsphys->ds_used_bytes = ds->ds_phys->ds_used_bytes;
 	dsphys->ds_compressed_bytes = ds->ds_phys->ds_compressed_bytes;
 	dsphys->ds_uncompressed_bytes = ds->ds_phys->ds_uncompressed_bytes;
-	dsphys->ds_inconsistent = ds->ds_phys->ds_inconsistent;
+	dsphys->ds_flags = ds->ds_phys->ds_flags;
 	dsphys->ds_bp = ds->ds_phys->ds_bp;
 	dmu_buf_rele(dbuf, FTAG);
 
@@ -1424,7 +1421,7 @@ dsl_dataset_stats(dsl_dataset_t *ds, dmu_objset_stats_t *dds)
 		dds->dds_num_clones = ds->ds_phys->ds_num_children - 1;
 	}
 
-	dds->dds_inconsistent = ds->ds_phys->ds_inconsistent;
+	dds->dds_inconsistent = ds->ds_phys->ds_flags & DS_FLAG_INCONSISTENT;
 	dds->dds_last_txg = ds->ds_phys->ds_bp.blk_birth;
 
 	dds->dds_objects_used = ds->ds_phys->ds_bp.blk_fill;
@@ -1581,3 +1578,236 @@ dsl_dataset_rename(const char *osname, const char *newname)
 	dsl_dir_close(dd, FTAG);
 	return (err);
 }
+
+/* ARGSUSED */
+static int
+dsl_dataset_promote_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx)
+{
+	dsl_dir_t *pdd = NULL;
+	dsl_dataset_t *ds = NULL;
+	dsl_dataset_t *hds = NULL;
+	dsl_dataset_t *phds = NULL;
+	dsl_dataset_t *pivot_ds = NULL;
+	dsl_dataset_t *newnext_ds = NULL;
+	int err;
+	char *name = NULL;
+	uint64_t used = 0, comp = 0, uncomp = 0, unique = 0, itor = 0;
+	blkptr_t bp;
+
+	/* Check that it is a clone */
+	if (dd->dd_phys->dd_clone_parent_obj == 0)
+		return (EINVAL);
+
+	/* Open everyone */
+	if (err = dsl_dataset_open_obj(dd->dd_pool,
+	    dd->dd_phys->dd_clone_parent_obj,
+	    NULL, DS_MODE_EXCLUSIVE, FTAG, &pivot_ds))
+		goto out;
+	pdd = pivot_ds->ds_dir;
+	if (err = dsl_dataset_open_obj(dd->dd_pool,
+	    pdd->dd_phys->dd_head_dataset_obj, NULL, DS_MODE_NONE, FTAG, &phds))
+		goto out;
+	if (err = dsl_dataset_open_obj(dd->dd_pool,
+	    dd->dd_phys->dd_head_dataset_obj, NULL, DS_MODE_NONE, FTAG, &hds))
+		goto out;
+
+	if (hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE) {
+		err = EXDEV;
+		goto out;
+	}
+
+	/* find pivot point's new next ds */
+	VERIFY(0 == dsl_dataset_open_obj(dd->dd_pool, hds->ds_object,
+	    NULL, DS_MODE_NONE, FTAG, &newnext_ds));
+	while (newnext_ds->ds_phys->ds_prev_snap_obj != pivot_ds->ds_object) {
+		dsl_dataset_t *prev;
+
+		if (err = dsl_dataset_open_obj(dd->dd_pool,
+		    newnext_ds->ds_phys->ds_prev_snap_obj, NULL, DS_MODE_NONE,
+		    FTAG, &prev))
+			goto out;
+		dsl_dataset_close(newnext_ds, DS_MODE_NONE, FTAG);
+		newnext_ds = prev;
+	}
+
+	/* compute pivot point's new unique space */
+	while ((err = bplist_iterate(&newnext_ds->ds_deadlist,
+	    &itor, &bp)) == 0) {
+		if (bp.blk_birth > pivot_ds->ds_phys->ds_prev_snap_txg)
+			unique += bp_get_dasize(dd->dd_pool->dp_spa, &bp);
+	}
+	if (err != ENOENT)
+		goto out;
+
+	/* need the config lock to ensure that the snapshots are not open */
+	rw_enter(&dd->dd_pool->dp_config_rwlock, RW_WRITER);
+
+	/* Walk the snapshots that we are moving */
+	name = kmem_alloc(MAXPATHLEN, KM_SLEEP);
+	ds = pivot_ds;
+	/* CONSTCOND */
+	while (TRUE) {
+		uint64_t val, dlused, dlcomp, dluncomp;
+		dsl_dataset_t *prev;
+
+		/* Check that the snapshot name does not conflict */
+		dsl_dataset_name(ds, name);
+		err = zap_lookup(dd->dd_pool->dp_meta_objset,
+		    hds->ds_phys->ds_snapnames_zapobj, ds->ds_snapname,
+		    8, 1, &val);
+		if (err != ENOENT) {
+			if (err == 0)
+				err = EEXIST;
+			goto out;
+		}
+
+		/*
+		 * compute space to transfer.  Each snapshot gave birth to:
+		 * (my used) - (prev's used) + (deadlist's used)
+		 */
+		used += ds->ds_phys->ds_used_bytes;
+		comp += ds->ds_phys->ds_compressed_bytes;
+		uncomp += ds->ds_phys->ds_uncompressed_bytes;
+
+		/* If we reach the first snapshot, we're done. */
+		if (ds->ds_phys->ds_prev_snap_obj == 0)
+			break;
+
+		if (err = bplist_space(&ds->ds_deadlist,
+		    &dlused, &dlcomp, &dluncomp))
+			goto out;
+		if (err = dsl_dataset_open_obj(dd->dd_pool,
+		    ds->ds_phys->ds_prev_snap_obj, NULL, DS_MODE_EXCLUSIVE,
+		    FTAG, &prev))
+			goto out;
+		used += dlused - prev->ds_phys->ds_used_bytes;
+		comp += dlcomp - prev->ds_phys->ds_compressed_bytes;
+		uncomp += dluncomp - prev->ds_phys->ds_uncompressed_bytes;
+
+		/*
+		 * We could be a clone of a clone.  If we reach our
+		 * parent's branch point, we're done.
+		 */
+		if (prev->ds_phys->ds_next_snap_obj != ds->ds_object) {
+			dsl_dataset_close(prev, DS_MODE_EXCLUSIVE, FTAG);
+			break;
+		}
+		if (ds != pivot_ds)
+			dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG);
+		ds = prev;
+	}
+	if (ds != pivot_ds)
+		dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG);
+	ds = NULL;
+
+	/* Check that there is enough space here */
+	if (err = dsl_dir_transfer_possible(pdd, dd, used))
+		goto out;
+
+	/* The point of no (unsuccessful) return */
+
+	/* move snapshots to this dir */
+	ds = pivot_ds;
+	/* CONSTCOND */
+	while (TRUE) {
+		dsl_dataset_t *prev;
+
+		/* move snap name entry */
+		dsl_dataset_name(ds, name);
+		VERIFY(0 == zap_remove(dd->dd_pool->dp_meta_objset,
+		    phds->ds_phys->ds_snapnames_zapobj, ds->ds_snapname, tx));
+		VERIFY(0 == zap_add(dd->dd_pool->dp_meta_objset,
+		    hds->ds_phys->ds_snapnames_zapobj, ds->ds_snapname,
+		    8, 1, &ds->ds_object, tx));
+
+		/* change containing dsl_dir */
+		dmu_buf_will_dirty(ds->ds_dbuf, tx);
+		ASSERT3U(ds->ds_phys->ds_dir_obj, ==, pdd->dd_object);
+		ds->ds_phys->ds_dir_obj = dd->dd_object;
+		ASSERT3P(ds->ds_dir, ==, pdd);
+		dsl_dir_close(ds->ds_dir, ds);
+		VERIFY(0 == dsl_dir_open_obj(dd->dd_pool, dd->dd_object,
+		    NULL, ds, &ds->ds_dir));
+
+		ASSERT3U(dsl_prop_numcb(ds), ==, 0);
+
+		if (ds->ds_phys->ds_prev_snap_obj == 0)
+			break;
+
+		VERIFY(0 == dsl_dataset_open_obj(dd->dd_pool,
+		    ds->ds_phys->ds_prev_snap_obj, NULL, DS_MODE_EXCLUSIVE,
+		    FTAG, &prev));
+
+		if (prev->ds_phys->ds_next_snap_obj != ds->ds_object) {
+			dsl_dataset_close(prev, DS_MODE_EXCLUSIVE, FTAG);
+			break;
+		}
+		if (ds != pivot_ds)
+			dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG);
+		ds = prev;
+	}
+
+	/* change pivot point's next snap */
+	dmu_buf_will_dirty(pivot_ds->ds_dbuf, tx);
+	pivot_ds->ds_phys->ds_next_snap_obj = newnext_ds->ds_object;
+
+	/* change clone_parent-age */
+	dmu_buf_will_dirty(dd->dd_dbuf, tx);
+	ASSERT3U(dd->dd_phys->dd_clone_parent_obj, ==, pivot_ds->ds_object);
+	dd->dd_phys->dd_clone_parent_obj = pdd->dd_phys->dd_clone_parent_obj;
+	dmu_buf_will_dirty(pdd->dd_dbuf, tx);
+	pdd->dd_phys->dd_clone_parent_obj = pivot_ds->ds_object;
+
+	/* change space accounting */
+	dsl_dir_diduse_space(pdd, -used, -comp, -uncomp, tx);
+	dsl_dir_diduse_space(dd, used, comp, uncomp, tx);
+	pivot_ds->ds_phys->ds_unique_bytes = unique;
+
+	err = 0;
+
+out:
+	if (RW_WRITE_HELD(&dd->dd_pool->dp_config_rwlock))
+		rw_exit(&dd->dd_pool->dp_config_rwlock);
+	if (hds)
+		dsl_dataset_close(hds, DS_MODE_NONE, FTAG);
+	if (phds)
+		dsl_dataset_close(phds, DS_MODE_NONE, FTAG);
+	if (ds && ds != pivot_ds)
+		dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG);
+	if (pivot_ds)
+		dsl_dataset_close(pivot_ds, DS_MODE_EXCLUSIVE, FTAG);
+	if (newnext_ds)
+		dsl_dataset_close(newnext_ds, DS_MODE_NONE, FTAG);
+	if (name)
+		kmem_free(name, MAXPATHLEN);
+	return (err);
+}
+
+int
+dsl_dataset_promote(const char *name)
+{
+	dsl_dataset_t *ds;
+	int err;
+	dmu_object_info_t doi;
+
+	err = dsl_dataset_open(name, DS_MODE_NONE, FTAG, &ds);
+	if (err)
+		return (err);
+
+	err = dmu_object_info(ds->ds_dir->dd_pool->dp_meta_objset,
+	    ds->ds_phys->ds_snapnames_zapobj, &doi);
+	if (err) {
+		dsl_dataset_close(ds, DS_MODE_NONE, FTAG);
+		return (err);
+	}
+
+	/*
+	 * Add in 128x the snapnames zapobj size, since we will be moving
+	 * a bunch of snapnames to the promoted ds, and dirtying their
+	 * bonus buffers.
+	 */
+	err = dsl_dir_sync_task(ds->ds_dir, dsl_dataset_promote_sync, NULL,
+	    (1<<20) + (doi.doi_physical_blks << (SPA_MINBLOCKSHIFT + 7)));
+	dsl_dataset_close(ds, DS_MODE_NONE, FTAG);
+	return (err);
+}
diff --git a/usr/src/uts/common/fs/zfs/dsl_dir.c b/usr/src/uts/common/fs/zfs/dsl_dir.c
index 8ffa145477..d7095cb0d3 100644
--- a/usr/src/uts/common/fs/zfs/dsl_dir.c
+++ b/usr/src/uts/common/fs/zfs/dsl_dir.c
@@ -739,7 +739,7 @@ dsl_dir_space_available(dsl_dir_t *dd,
 		used += delta;
 
 	if (dd->dd_parent == NULL) {
-		uint64_t poolsize = dsl_pool_adjustedsize(dd->dd_pool, B_FALSE);
+		uint64_t poolsize = dsl_pool_adjustedsize(dd->dd_pool, FALSE);
 		quota = MIN(quota, poolsize);
 	}
 
@@ -754,23 +754,19 @@ dsl_dir_space_available(dsl_dir_t *dd,
 	if (used > quota) {
 		/* over quota */
 		myspace = 0;
-#ifdef ZFS_DEBUG
-		{
-			/*
-			 * While it's OK to be a little over quota, if
-			 * we think we are using more space than there
-			 * is in the pool (which is already 6% more than
-			 * dsl_pool_adjustedsize()), something is very
-			 * wrong.
-			 */
-			uint64_t space = spa_get_space(dd->dd_pool->dp_spa);
-			ASSERT3U(used, <=, space);
-		}
-#endif
+
+		/*
+		 * While it's OK to be a little over quota, if
+		 * we think we are using more space than there
+		 * is in the pool (which is already 1.6% more than
+		 * dsl_pool_adjustedsize()), something is very
+		 * wrong.
+		 */
+		ASSERT3U(used, <=, spa_get_space(dd->dd_pool->dp_spa));
 	} else {
 		/*
-		 * the lesser of parent's space and the space
-		 * left in our quota
+		 * the lesser of the space provided by our parent and
+		 * the space left in our quota
 		 */
 		myspace = MIN(parentspace, quota - used);
 	}
@@ -1170,27 +1166,22 @@ dsl_dir_rename_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx)
 	}
 
 	if (newpds != dd->dd_parent) {
-		dsl_dir_t *ancestor;
-		int64_t adelta;
-		uint64_t myspace, avail;
-
-		ancestor = closest_common_ancestor(dd, newpds);
+		/* is there enough space? */
+		uint64_t myspace =
+		    MAX(dd->dd_used_bytes, dd->dd_phys->dd_reserved);
 
 		/* no rename into our descendent */
-		if (ancestor == dd) {
+		if (closest_common_ancestor(dd, newpds) == dd) {
 			dsl_dir_close(newpds, FTAG);
 			rw_exit(&dp->dp_config_rwlock);
 			return (EINVAL);
 		}
 
-		myspace = MAX(dd->dd_used_bytes, dd->dd_phys->dd_reserved);
-		adelta = would_change(dd->dd_parent, -myspace, ancestor);
-		avail = dsl_dir_space_available(newpds,
-		    ancestor, adelta, FALSE);
-		if (avail < myspace) {
+		if (err = dsl_dir_transfer_possible(dd->dd_parent, newpds,
+		    myspace)) {
 			dsl_dir_close(newpds, FTAG);
 			rw_exit(&dp->dp_config_rwlock);
-			return (ENOSPC);
+			return (err);
 		}
 
 		/* The point of no (unsuccessful) return */
@@ -1227,3 +1218,19 @@ dsl_dir_rename_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx)
 	rw_exit(&dp->dp_config_rwlock);
 	return (0);
 }
+
+int
+dsl_dir_transfer_possible(dsl_dir_t *sdd, dsl_dir_t *tdd, uint64_t space)
+{
+	dsl_dir_t *ancestor;
+	int64_t adelta;
+	uint64_t avail;
+
+	ancestor = closest_common_ancestor(sdd, tdd);
+	adelta = would_change(sdd, -space, ancestor);
+	avail = dsl_dir_space_available(tdd, ancestor, adelta, FALSE);
+	if (avail < space)
+		return (ENOSPC);
+
+	return (0);
+}
diff --git a/usr/src/uts/common/fs/zfs/dsl_pool.c b/usr/src/uts/common/fs/zfs/dsl_pool.c
index 77a1adb3b1..d12e1acfeb 100644
--- a/usr/src/uts/common/fs/zfs/dsl_pool.c
+++ b/usr/src/uts/common/fs/zfs/dsl_pool.c
@@ -241,7 +241,7 @@ dsl_pool_adjustedsize(dsl_pool_t *dp, boolean_t netfree)
 	 * cut the reservation in half to allow forward progress
 	 * (e.g. make it possible to rm(1) files from a full pool).
 	 */
-	space = spa_get_space(dp->dp_spa);
+	space = spa_get_dspace(dp->dp_spa);
 	resv = MAX(space >> 6, SPA_MINDEVSIZE >> 1);
 	if (netfree)
 		resv >>= 1;
diff --git a/usr/src/uts/common/fs/zfs/dsl_prop.c b/usr/src/uts/common/fs/zfs/dsl_prop.c
index fc33b1c591..0bb55f8b95 100644
--- a/usr/src/uts/common/fs/zfs/dsl_prop.c
+++ b/usr/src/uts/common/fs/zfs/dsl_prop.c
@@ -62,33 +62,28 @@ dodefault(const char *propname, int intsz, int numint, void *buf)
 }
 
 static int
-dsl_prop_get_impl(dsl_pool_t *dp, uint64_t ddobj, const char *propname,
+dsl_prop_get_impl(dsl_dir_t *dd, const char *propname,
     int intsz, int numint, void *buf, char *setpoint)
 {
-	int err = 0;
-	objset_t *mos = dp->dp_meta_objset;
+	int err = ENOENT;
 
 	if (setpoint)
 		setpoint[0] = '\0';
 
-	ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock));
-
-	while (ddobj != 0) {
-		dsl_dir_t *dd;
-		err = dsl_dir_open_obj(dp, ddobj, NULL, FTAG, &dd);
-		if (err)
-			break;
+	/*
+	 * Note: dd may be NULL, therefore we shouldn't dereference it
+	 * ouside this loop.
+	 */
+	for (; dd != NULL; dd = dd->dd_parent) {
+		objset_t *mos = dd->dd_pool->dp_meta_objset;
+		ASSERT(RW_LOCK_HELD(&dd->dd_pool->dp_config_rwlock));
 		err = zap_lookup(mos, dd->dd_phys->dd_props_zapobj,
 		    propname, intsz, numint, buf);
 		if (err != ENOENT) {
 			if (setpoint)
 				dsl_dir_name(dd, setpoint);
-			dsl_dir_close(dd, FTAG);
 			break;
 		}
-		ASSERT3U(err, ==, ENOENT);
-		ddobj = dd->dd_phys->dd_parent_obj;
-		dsl_dir_close(dd, FTAG);
 	}
 	if (err == ENOENT)
 		err = dodefault(propname, intsz, numint, buf);
@@ -107,27 +102,21 @@ int
 dsl_prop_register(dsl_dataset_t *ds, const char *propname,
     dsl_prop_changed_cb_t *callback, void *cbarg)
 {
-	dsl_dir_t *dd;
+	dsl_dir_t *dd = ds->ds_dir;
 	uint64_t value;
 	dsl_prop_cb_record_t *cbr;
 	int err;
 
-	dd = ds->ds_dir;
-
 	rw_enter(&dd->dd_pool->dp_config_rwlock, RW_READER);
 
-	err = dsl_prop_get_impl(dd->dd_pool, dd->dd_object, propname,
-	    8, 1, &value, NULL);
-	if (err == ENOENT) {
-		err = 0;
-		value = DSL_PROP_VALUE_UNDEFINED;
-	}
+	err = dsl_prop_get_impl(dd, propname, 8, 1, &value, NULL);
 	if (err != 0) {
 		rw_exit(&dd->dd_pool->dp_config_rwlock);
 		return (err);
 	}
 
 	cbr = kmem_alloc(sizeof (dsl_prop_cb_record_t), KM_SLEEP);
+	cbr->cbr_ds = ds;
 	cbr->cbr_propname = kmem_alloc(strlen(propname)+1, KM_SLEEP);
 	(void) strcpy((char *)cbr->cbr_propname, propname);
 	cbr->cbr_func = callback;
@@ -152,8 +141,7 @@ dsl_prop_get_ds(dsl_dir_t *dd, const char *propname,
 	int err;
 
 	rw_enter(&dd->dd_pool->dp_config_rwlock, RW_READER);
-	err = dsl_prop_get_impl(dd->dd_pool, dd->dd_object,
-	    propname, intsz, numints, buf, setpoint);
+	err = dsl_prop_get_impl(dd, propname, intsz, numints, buf, setpoint);
 	rw_exit(&dd->dd_pool->dp_config_rwlock);
 
 	return (err);
@@ -222,17 +210,16 @@ int
 dsl_prop_unregister(dsl_dataset_t *ds, const char *propname,
     dsl_prop_changed_cb_t *callback, void *cbarg)
 {
-	dsl_dir_t *dd;
+	dsl_dir_t *dd = ds->ds_dir;
 	dsl_prop_cb_record_t *cbr;
 
-	dd = ds->ds_dir;
-
 	mutex_enter(&dd->dd_lock);
 	for (cbr = list_head(&dd->dd_prop_cbs);
 	    cbr; cbr = list_next(&dd->dd_prop_cbs, cbr)) {
-		if (strcmp(cbr->cbr_propname, propname) == 0 &&
+		if (cbr->cbr_ds == ds &&
 		    cbr->cbr_func == callback &&
-		    cbr->cbr_arg == cbarg)
+		    cbr->cbr_arg == cbarg &&
+		    strcmp(cbr->cbr_propname, propname) == 0)
 			break;
 	}
 
@@ -251,6 +238,27 @@ dsl_prop_unregister(dsl_dataset_t *ds, const char *propname,
 	return (0);
 }
 
+/*
+ * Return the number of callbacks that are registered for this dataset.
+ */
+int
+dsl_prop_numcb(dsl_dataset_t *ds)
+{
+	dsl_dir_t *dd = ds->ds_dir;
+	dsl_prop_cb_record_t *cbr;
+	int num = 0;
+
+	mutex_enter(&dd->dd_lock);
+	for (cbr = list_head(&dd->dd_prop_cbs);
+	    cbr; cbr = list_next(&dd->dd_prop_cbs, cbr)) {
+		if (cbr->cbr_ds == ds)
+			num++;
+	}
+	mutex_exit(&dd->dd_lock);
+
+	return (num);
+}
+
 static void
 dsl_prop_changed_notify(dsl_pool_t *dp, uint64_t ddobj,
     const char *propname, uint64_t value, int first)
@@ -330,9 +338,8 @@ dsl_prop_set_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx)
 		if (err == ENOENT) /* that's fine. */
 			err = 0;
 		if (err == 0 && isint) {
-			err = dsl_prop_get_impl(dd->dd_pool,
-			    dd->dd_phys->dd_parent_obj, psa->name,
-			    8, 1, &intval, NULL);
+			err = dsl_prop_get_impl(dd->dd_parent,
+			    psa->name, 8, 1, &intval, NULL);
 		}
 	} else {
 		err = zap_update(mos, zapobj, psa->name,
@@ -380,7 +387,7 @@ int
 dsl_prop_get_all(objset_t *os, nvlist_t **nvp)
 {
 	dsl_dataset_t *ds = os->os->os_dsl_dataset;
-	dsl_dir_t *dd, *parent;
+	dsl_dir_t *dd = ds->ds_dir;
 	int err = 0;
 	dsl_pool_t *dp;
 	objset_t *mos;
@@ -395,15 +402,13 @@ dsl_prop_get_all(objset_t *os, nvlist_t **nvp)
 		return (0);
 	}
 
-	dd = ds->ds_dir;
-
 	VERIFY(nvlist_alloc(nvp, NV_UNIQUE_NAME, KM_SLEEP) == 0);
 
 	dp = dd->dd_pool;
 	mos = dp->dp_meta_objset;
 
 	rw_enter(&dp->dp_config_rwlock, RW_READER);
-	while (dd != NULL) {
+	for (; dd != NULL; dd = dd->dd_parent) {
 		dsl_dir_name(dd, setpoint);
 
 		for (zap_cursor_init(&zc, mos, dd->dd_phys->dd_props_zapobj);
@@ -418,7 +423,6 @@ dsl_prop_get_all(objset_t *os, nvlist_t **nvp)
 				/*
 				 * String property
 				 */
-
 				tmp = kmem_alloc(za.za_num_integers, KM_SLEEP);
 				err = zap_lookup(mos,
 				    dd->dd_phys->dd_props_zapobj,
@@ -448,27 +452,9 @@ dsl_prop_get_all(objset_t *os, nvlist_t **nvp)
 		}
 		zap_cursor_fini(&zc);
 
-		if (err != ENOENT) {
-			if (dd != ds->ds_dir)
-				dsl_dir_close(dd, FTAG);
+		if (err != ENOENT)
 			break;
-		} else {
-			err = 0;
-		}
-
-		/*
-		 * Continue to parent.
-		 */
-		if (dd->dd_phys->dd_parent_obj == 0)
-			parent = NULL;
-		else
-			err = dsl_dir_open_obj(dp,
-			    dd->dd_phys->dd_parent_obj, NULL, FTAG, &parent);
-		if (dd != ds->ds_dir)
-			dsl_dir_close(dd, FTAG);
-		if (err)
-			break;
-		dd = parent;
+		err = 0;
 	}
 	rw_exit(&dp->dp_config_rwlock);
 
diff --git a/usr/src/uts/common/fs/zfs/spa.c b/usr/src/uts/common/fs/zfs/spa.c
index 2fe82c2e80..fca42558ef 100644
--- a/usr/src/uts/common/fs/zfs/spa.c
+++ b/usr/src/uts/common/fs/zfs/spa.c
@@ -18,6 +18,7 @@
  *
  * CDDL HEADER END
  */
+
 /*
  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
@@ -185,33 +186,40 @@ spa_deactivate(spa_t *spa)
  * in the CLOSED state.  This will prep the pool before open/creation/import.
  * All vdev validation is done by the vdev_alloc() routine.
  */
-static vdev_t *
-spa_config_parse(spa_t *spa, nvlist_t *nv, vdev_t *parent, uint_t id, int atype)
+static int
+spa_config_parse(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent,
+    uint_t id, int atype)
 {
 	nvlist_t **child;
 	uint_t c, children;
-	vdev_t *vd;
+	int error;
 
-	if ((vd = vdev_alloc(spa, nv, parent, id, atype)) == NULL)
-		return (NULL);
+	if ((error = vdev_alloc(spa, vdp, nv, parent, id, atype)) != 0)
+		return (error);
 
-	if (vd->vdev_ops->vdev_op_leaf)
-		return (vd);
+	if ((*vdp)->vdev_ops->vdev_op_leaf)
+		return (0);
 
 	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
 	    &child, &children) != 0) {
-		vdev_free(vd);
-		return (NULL);
+		vdev_free(*vdp);
+		*vdp = NULL;
+		return (EINVAL);
 	}
 
 	for (c = 0; c < children; c++) {
-		if (spa_config_parse(spa, child[c], vd, c, atype) == NULL) {
-			vdev_free(vd);
-			return (NULL);
+		vdev_t *vd;
+		if ((error = spa_config_parse(spa, &vd, child[c], *vdp, c,
+		    atype)) != 0) {
+			vdev_free(*vdp);
+			*vdp = NULL;
+			return (error);
 		}
 	}
 
-	return (vd);
+	ASSERT(*vdp != NULL);
+
+	return (0);
 }
 
 /*
@@ -220,6 +228,8 @@ spa_config_parse(spa_t *spa, nvlist_t *nv, vdev_t *parent, uint_t id, int atype)
 static void
 spa_unload(spa_t *spa)
 {
+	int i;
+
 	/*
 	 * Stop async tasks.
 	 */
@@ -254,10 +264,117 @@ spa_unload(spa_t *spa)
 		vdev_free(spa->spa_root_vdev);
 	ASSERT(spa->spa_root_vdev == NULL);
 
+	for (i = 0; i < spa->spa_nspares; i++)
+		vdev_free(spa->spa_spares[i]);
+	if (spa->spa_spares) {
+		kmem_free(spa->spa_spares, spa->spa_nspares * sizeof (void *));
+		spa->spa_spares = NULL;
+	}
+	if (spa->spa_sparelist) {
+		nvlist_free(spa->spa_sparelist);
+		spa->spa_sparelist = NULL;
+	}
+
 	spa->spa_async_suspended = 0;
 }
 
 /*
+ * Load (or re-load) the current list of vdevs describing the active spares for
+ * this pool.  When this is called, we have some form of basic information in
+ * 'spa_sparelist'.  We parse this into vdevs, try to open them, and then
+ * re-generate a more complete list including status information.
+ */
+static void
+spa_load_spares(spa_t *spa)
+{
+	nvlist_t **spares;
+	uint_t nspares;
+	int i;
+
+	/*
+	 * First, close and free any existing spare vdevs.
+	 */
+	for (i = 0; i < spa->spa_nspares; i++) {
+		vdev_close(spa->spa_spares[i]);
+		vdev_free(spa->spa_spares[i]);
+	}
+	if (spa->spa_spares)
+		kmem_free(spa->spa_spares, spa->spa_nspares * sizeof (void *));
+
+	if (spa->spa_sparelist == NULL)
+		nspares = 0;
+	else
+		VERIFY(nvlist_lookup_nvlist_array(spa->spa_sparelist,
+		    ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0);
+
+	spa->spa_nspares = (int)nspares;
+	spa->spa_spares = NULL;
+
+	if (nspares == 0)
+		return;
+
+	/*
+	 * Construct the array of vdevs, opening them to get status in the
+	 * process.
+	 */
+	spa->spa_spares = kmem_alloc(nspares * sizeof (void *), KM_SLEEP);
+	for (i = 0; i < spa->spa_nspares; i++) {
+		vdev_t *vd;
+
+		VERIFY(spa_config_parse(spa, &vd, spares[i], NULL, 0,
+		    VDEV_ALLOC_SPARE) == 0);
+		ASSERT(vd != NULL);
+
+		spa->spa_spares[i] = vd;
+
+		if (vdev_open(vd) != 0)
+			continue;
+
+		vd->vdev_top = vd;
+		(void) vdev_validate_spare(vd);
+	}
+
+	/*
+	 * Recompute the stashed list of spares, with status information
+	 * this time.
+	 */
+	VERIFY(nvlist_remove(spa->spa_sparelist, ZPOOL_CONFIG_SPARES,
+	    DATA_TYPE_NVLIST_ARRAY) == 0);
+
+	spares = kmem_alloc(spa->spa_nspares * sizeof (void *), KM_SLEEP);
+	for (i = 0; i < spa->spa_nspares; i++)
+		spares[i] = vdev_config_generate(spa, spa->spa_spares[i],
+		    B_TRUE, B_TRUE);
+	VERIFY(nvlist_add_nvlist_array(spa->spa_sparelist, ZPOOL_CONFIG_SPARES,
+	    spares, spa->spa_nspares) == 0);
+	for (i = 0; i < spa->spa_nspares; i++)
+		nvlist_free(spares[i]);
+	kmem_free(spares, spa->spa_nspares * sizeof (void *));
+}
+
+static int
+load_nvlist(spa_t *spa, uint64_t obj, nvlist_t **value)
+{
+	dmu_buf_t *db;
+	char *packed = NULL;
+	size_t nvsize = 0;
+	int error;
+	*value = NULL;
+
+	VERIFY(0 == dmu_bonus_hold(spa->spa_meta_objset, obj, FTAG, &db));
+	nvsize = *(uint64_t *)db->db_data;
+	dmu_buf_rele(db, FTAG);
+
+	packed = kmem_alloc(nvsize, KM_SLEEP);
+	error = dmu_read(spa->spa_meta_objset, obj, 0, nvsize, packed);
+	if (error == 0)
+		error = nvlist_unpack(packed, nvsize, value, 0);
+	kmem_free(packed, nvsize);
+
+	return (error);
+}
+
+/*
  * Load an existing storage pool, using the pool's builtin spa_config as a
  * source of configuration information.
  */
@@ -270,6 +387,7 @@ spa_load(spa_t *spa, nvlist_t *config, spa_load_state_t state, int mosconfig)
 	uberblock_t *ub = &spa->spa_uberblock;
 	uint64_t config_cache_txg = spa->spa_config_txg;
 	uint64_t pool_guid;
+	uint64_t version;
 	zio_t *zio;
 
 	spa->spa_load_state = state;
@@ -280,6 +398,13 @@ spa_load(spa_t *spa, nvlist_t *config, spa_load_state_t state, int mosconfig)
 		goto out;
 	}
 
+	/*
+	 * Versioning wasn't explicitly added to the label until later, so if
+	 * it's not present treat it as the initial version.
+	 */
+	if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION, &version) != 0)
+		version = ZFS_VERSION_INITIAL;
+
 	(void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG,
 	    &spa->spa_config_txg);
 
@@ -290,16 +415,17 @@ spa_load(spa_t *spa, nvlist_t *config, spa_load_state_t state, int mosconfig)
 	}
 
 	/*
-	 * Parse the configuration into a vdev tree.
+	 * Parse the configuration into a vdev tree.  We explicitly set the
+	 * value that will be returned by spa_version() since parsing the
+	 * configuration requires knowing the version number.
 	 */
 	spa_config_enter(spa, RW_WRITER, FTAG);
-	rvd = spa_config_parse(spa, nvroot, NULL, 0, VDEV_ALLOC_LOAD);
+	spa->spa_ubsync.ub_version = version;
+	error = spa_config_parse(spa, &rvd, nvroot, NULL, 0, VDEV_ALLOC_LOAD);
 	spa_config_exit(spa, FTAG);
 
-	if (rvd == NULL) {
-		error = EINVAL;
+	if (error != 0)
 		goto out;
-	}
 
 	ASSERT(spa->spa_root_vdev == rvd);
 	ASSERT(spa_guid(spa) == pool_guid);
@@ -396,24 +522,9 @@ spa_load(spa_t *spa, nvlist_t *config, spa_load_state_t state, int mosconfig)
 	}
 
 	if (!mosconfig) {
-		dmu_buf_t *db;
-		char *packed = NULL;
-		size_t nvsize = 0;
-		nvlist_t *newconfig = NULL;
-
-		VERIFY(0 == dmu_bonus_hold(spa->spa_meta_objset,
-		    spa->spa_config_object, FTAG, &db));
-		nvsize = *(uint64_t *)db->db_data;
-		dmu_buf_rele(db, FTAG);
-
-		packed = kmem_alloc(nvsize, KM_SLEEP);
-		error = dmu_read(spa->spa_meta_objset,
-		    spa->spa_config_object, 0, nvsize, packed);
-		if (error == 0)
-			error = nvlist_unpack(packed, nvsize, &newconfig, 0);
-		kmem_free(packed, nvsize);
+		nvlist_t *newconfig;
 
-		if (error) {
+		if (load_nvlist(spa, spa->spa_config_object, &newconfig) != 0) {
 			vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN,
 			    VDEV_AUX_CORRUPT_DATA);
 			error = EIO;
@@ -421,7 +532,6 @@ spa_load(spa_t *spa, nvlist_t *config, spa_load_state_t state, int mosconfig)
 		}
 
 		spa_config_set(spa, newconfig);
-
 		spa_unload(spa);
 		spa_deactivate(spa);
 		spa_activate(spa);
@@ -439,6 +549,21 @@ spa_load(spa_t *spa, nvlist_t *config, spa_load_state_t state, int mosconfig)
 	}
 
 	/*
+	 * Load the bit that tells us to use the new accounting function
+	 * (raid-z deflation).  If we have an older pool, this will not
+	 * be present.
+	 */
+	error = zap_lookup(spa->spa_meta_objset,
+	    DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_DEFLATE,
+	    sizeof (uint64_t), 1, &spa->spa_deflate);
+	if (error != 0 && error != ENOENT) {
+		vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN,
+		    VDEV_AUX_CORRUPT_DATA);
+		error = EIO;
+		goto out;
+	}
+
+	/*
 	 * Load the persistent error log.  If we have an older pool, this will
 	 * not be present.
 	 */
@@ -463,6 +588,32 @@ spa_load(spa_t *spa, nvlist_t *config, spa_load_state_t state, int mosconfig)
 	}
 
 	/*
+	 * Load any hot spares for this pool.
+	 */
+	error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
+	    DMU_POOL_SPARES, sizeof (uint64_t), 1, &spa->spa_spares_object);
+	if (error != 0 && error != ENOENT) {
+		vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN,
+		    VDEV_AUX_CORRUPT_DATA);
+		error = EIO;
+		goto out;
+	}
+	if (error == 0) {
+		ASSERT(spa_version(spa) >= ZFS_VERSION_SPARES);
+		if (load_nvlist(spa, spa->spa_spares_object,
+		    &spa->spa_sparelist) != 0) {
+			vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN,
+			    VDEV_AUX_CORRUPT_DATA);
+			error = EIO;
+			goto out;
+		}
+
+		spa_config_enter(spa, RW_WRITER, FTAG);
+		spa_load_spares(spa);
+		spa_config_exit(spa, FTAG);
+	}
+
+	/*
 	 * Load the vdev state for all toplevel vdevs.
 	 */
 	vdev_load(rvd);
@@ -527,7 +678,7 @@ spa_load(spa_t *spa, nvlist_t *config, spa_load_state_t state, int mosconfig)
 
 	error = 0;
 out:
-	if (error)
+	if (error && error != EBADF)
 		zfs_ereport_post(FM_EREPORT_ZFS_POOL, spa, NULL, NULL, 0, 0);
 	spa->spa_load_state = SPA_LOAD_NONE;
 	spa->spa_ena = 0;
@@ -587,6 +738,7 @@ spa_open_common(const char *pool, spa_t **spapp, void *tag, nvlist_t **config)
 			 * this is the case, the config cache is out of sync and
 			 * we should remove the pool from the namespace.
 			 */
+			zfs_post_ok(spa, NULL);
 			spa_unload(spa);
 			spa_deactivate(spa);
 			spa_remove(spa);
@@ -678,6 +830,48 @@ spa_inject_delref(spa_t *spa)
 	mutex_exit(&spa_namespace_lock);
 }
 
+static void
+spa_add_spares(spa_t *spa, nvlist_t *config)
+{
+	nvlist_t **spares;
+	uint_t i, nspares;
+	nvlist_t *nvroot;
+	uint64_t guid;
+	vdev_stat_t *vs;
+	uint_t vsc;
+
+	if (spa->spa_nspares == 0)
+		return;
+
+	VERIFY(nvlist_lookup_nvlist(config,
+	    ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
+	VERIFY(nvlist_lookup_nvlist_array(spa->spa_sparelist,
+	    ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0);
+	if (nspares != 0) {
+		VERIFY(nvlist_add_nvlist_array(nvroot,
+		    ZPOOL_CONFIG_SPARES, spares, nspares) == 0);
+		VERIFY(nvlist_lookup_nvlist_array(nvroot,
+		    ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0);
+
+		/*
+		 * Go through and find any spares which have since been
+		 * repurposed as an active spare.  If this is the case, update
+		 * their status appropriately.
+		 */
+		for (i = 0; i < nspares; i++) {
+			VERIFY(nvlist_lookup_uint64(spares[i],
+			    ZPOOL_CONFIG_GUID, &guid) == 0);
+			if (spa_spare_inuse(guid)) {
+				VERIFY(nvlist_lookup_uint64_array(
+				    spares[i], ZPOOL_CONFIG_STATS,
+				    (uint64_t **)&vs, &vsc) == 0);
+				vs->vs_state = VDEV_STATE_CANT_OPEN;
+				vs->vs_aux = VDEV_AUX_SPARED;
+			}
+		}
+	}
+}
+
 int
 spa_get_stats(const char *name, nvlist_t **config, char *altroot, size_t buflen)
 {
@@ -687,10 +881,13 @@ spa_get_stats(const char *name, nvlist_t **config, char *altroot, size_t buflen)
 	*config = NULL;
 	error = spa_open_common(name, &spa, FTAG, config);
 
-	if (spa && *config != NULL)
+	if (spa && *config != NULL) {
 		VERIFY(nvlist_add_uint64(*config, ZPOOL_CONFIG_ERRCOUNT,
 		    spa_get_errlog_size(spa)) == 0);
 
+		spa_add_spares(spa, *config);
+	}
+
 	/*
 	 * We want to get the alternate root even for faulted pools, so we cheat
 	 * and call spa_lookup() directly.
@@ -717,6 +914,65 @@ spa_get_stats(const char *name, nvlist_t **config, char *altroot, size_t buflen)
 }
 
 /*
+ * Validate that the 'spares' array is well formed.  We must have an array of
+ * nvlists, each which describes a valid leaf vdev.
+ */
+static int
+spa_validate_spares(spa_t *spa, nvlist_t *nvroot, uint64_t crtxg, int mode)
+{
+	nvlist_t **spares;
+	uint_t i, nspares;
+	vdev_t *vd;
+	int error;
+
+	/*
+	 * It's acceptable to have no spares specified.
+	 */
+	if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
+	    &spares, &nspares) != 0)
+		return (0);
+
+	if (nspares == 0)
+		return (EINVAL);
+
+	/*
+	 * Make sure the pool is formatted with a version that supports hot
+	 * spares.
+	 */
+	if (spa_version(spa) < ZFS_VERSION_SPARES)
+		return (ENOTSUP);
+
+	for (i = 0; i < nspares; i++) {
+		if ((error = spa_config_parse(spa, &vd, spares[i], NULL, 0,
+		    mode)) != 0)
+			return (error);
+
+		if (!vd->vdev_ops->vdev_op_leaf) {
+			vdev_free(vd);
+			return (EINVAL);
+		}
+
+		if ((error = vdev_open(vd)) != 0) {
+			vdev_free(vd);
+			return (error);
+		}
+
+		vd->vdev_top = vd;
+		if ((error = vdev_label_spare(vd, crtxg)) != 0) {
+			vdev_free(vd);
+			return (error);
+		}
+
+		VERIFY(nvlist_add_uint64(spares[i], ZPOOL_CONFIG_GUID,
+		    vd->vdev_guid) == 0);
+
+		vdev_free(vd);
+	}
+
+	return (0);
+}
+
+/*
  * Pool Creation
  */
 int
@@ -726,8 +982,10 @@ spa_create(const char *pool, nvlist_t *nvroot, const char *altroot)
 	vdev_t *rvd;
 	dsl_pool_t *dp;
 	dmu_tx_t *tx;
-	int c, error;
+	int c, error = 0;
 	uint64_t txg = TXG_INITIAL;
+	nvlist_t **spares;
+	uint_t nspares;
 
 	/*
 	 * If this pool already exists, return failure.
@@ -753,23 +1011,26 @@ spa_create(const char *pool, nvlist_t *nvroot, const char *altroot)
 	 */
 	spa_config_enter(spa, RW_WRITER, FTAG);
 
-	rvd = spa_config_parse(spa, nvroot, NULL, 0, VDEV_ALLOC_ADD);
+	error = spa_config_parse(spa, &rvd, nvroot, NULL, 0, VDEV_ALLOC_ADD);
 
-	ASSERT(spa->spa_root_vdev == rvd);
+	ASSERT(error != 0 || rvd != NULL);
+	ASSERT(error != 0 || spa->spa_root_vdev == rvd);
 
-	if (rvd == NULL) {
+	if (error == 0 && rvd->vdev_children == 0)
 		error = EINVAL;
-	} else {
-		if ((error = vdev_create(rvd, txg)) == 0) {
-			for (c = 0; c < rvd->vdev_children; c++)
-				vdev_init(rvd->vdev_child[c], txg);
-			vdev_config_dirty(rvd);
-		}
+
+	if (error == 0 &&
+	    (error = vdev_create(rvd, txg, B_FALSE)) == 0 &&
+	    (error = spa_validate_spares(spa, nvroot, txg,
+	    VDEV_ALLOC_ADD)) == 0) {
+		for (c = 0; c < rvd->vdev_children; c++)
+			vdev_init(rvd->vdev_child[c], txg);
+		vdev_config_dirty(rvd);
 	}
 
 	spa_config_exit(spa, FTAG);
 
-	if (error) {
+	if (error != 0) {
 		spa_unload(spa);
 		spa_deactivate(spa);
 		spa_remove(spa);
@@ -777,6 +1038,21 @@ spa_create(const char *pool, nvlist_t *nvroot, const char *altroot)
 		return (error);
 	}
 
+	/*
+	 * Get the list of spares, if specified.
+	 */
+	if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
+	    &spares, &nspares) == 0) {
+		VERIFY(nvlist_alloc(&spa->spa_sparelist, NV_UNIQUE_NAME,
+		    KM_SLEEP) == 0);
+		VERIFY(nvlist_add_nvlist_array(spa->spa_sparelist,
+		    ZPOOL_CONFIG_SPARES, spares, nspares) == 0);
+		spa_config_enter(spa, RW_WRITER, FTAG);
+		spa_load_spares(spa);
+		spa_config_exit(spa, FTAG);
+		spa->spa_sync_spares = B_TRUE;
+	}
+
 	spa->spa_dsl_pool = dp = dsl_pool_create(spa, txg);
 	spa->spa_meta_objset = dp->dp_meta_objset;
 
@@ -795,6 +1071,14 @@ spa_create(const char *pool, nvlist_t *nvroot, const char *altroot)
 		cmn_err(CE_PANIC, "failed to add pool config");
 	}
 
+	/* Newly created pools are always deflated. */
+	spa->spa_deflate = TRUE;
+	if (zap_add(spa->spa_meta_objset,
+	    DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_DEFLATE,
+	    sizeof (uint64_t), 1, &spa->spa_deflate, tx) != 0) {
+		cmn_err(CE_PANIC, "failed to add deflate");
+	}
+
 	/*
 	 * Create the deferred-free bplist object.  Turn off compression
 	 * because sync-to-convergence takes longer if the blocksize
@@ -838,6 +1122,9 @@ spa_import(const char *pool, nvlist_t *config, const char *altroot)
 {
 	spa_t *spa;
 	int error;
+	nvlist_t *nvroot;
+	nvlist_t **spares;
+	uint_t nspares;
 
 	if (!(spa_mode & FWRITE))
 		return (EROFS);
@@ -864,7 +1151,25 @@ spa_import(const char *pool, nvlist_t *config, const char *altroot)
 	 */
 	error = spa_load(spa, config, SPA_LOAD_IMPORT, B_TRUE);
 
-	if (error) {
+	spa_config_enter(spa, RW_WRITER, FTAG);
+	/*
+	 * Toss any existing sparelist, as it doesn't have any validity anymore,
+	 * and conflicts with spa_has_spare().
+	 */
+	if (spa->spa_sparelist) {
+		nvlist_free(spa->spa_sparelist);
+		spa->spa_sparelist = NULL;
+		spa_load_spares(spa);
+	}
+
+	VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
+	    &nvroot) == 0);
+	if (error == 0)
+		error = spa_validate_spares(spa, nvroot, -1ULL,
+		    VDEV_ALLOC_SPARE);
+	spa_config_exit(spa, FTAG);
+
+	if (error != 0) {
 		spa_unload(spa);
 		spa_deactivate(spa);
 		spa_remove(spa);
@@ -873,6 +1178,26 @@ spa_import(const char *pool, nvlist_t *config, const char *altroot)
 	}
 
 	/*
+	 * Override any spares as specified by the user, as these may have
+	 * correct device names/devids, etc.
+	 */
+	if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
+	    &spares, &nspares) == 0) {
+		if (spa->spa_sparelist)
+			VERIFY(nvlist_remove(spa->spa_sparelist,
+			    ZPOOL_CONFIG_SPARES, DATA_TYPE_NVLIST_ARRAY) == 0);
+		else
+			VERIFY(nvlist_alloc(&spa->spa_sparelist,
+			    NV_UNIQUE_NAME, KM_SLEEP) == 0);
+		VERIFY(nvlist_add_nvlist_array(spa->spa_sparelist,
+		    ZPOOL_CONFIG_SPARES, spares, nspares) == 0);
+		spa_config_enter(spa, RW_WRITER, FTAG);
+		spa_load_spares(spa);
+		spa_config_exit(spa, FTAG);
+		spa->spa_sync_spares = B_TRUE;
+	}
+
+	/*
 	 * Update the config cache to include the newly-imported pool.
 	 */
 	spa_config_update(spa, SPA_CONFIG_UPDATE_POOL);
@@ -933,6 +1258,11 @@ spa_tryimport(nvlist_t *tryconfig)
 		    poolname) == 0);
 		VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_STATE,
 		    state) == 0);
+
+		/*
+		 * Add the list of hot spares.
+		 */
+		spa_add_spares(spa, config);
 	}
 
 	spa_unload(spa);
@@ -1083,26 +1413,80 @@ spa_vdev_add(spa_t *spa, nvlist_t *nvroot)
 	int c, error;
 	vdev_t *rvd = spa->spa_root_vdev;
 	vdev_t *vd, *tvd;
+	nvlist_t **spares;
+	uint_t i, nspares;
 
 	txg = spa_vdev_enter(spa);
 
-	vd = spa_config_parse(spa, nvroot, NULL, 0, VDEV_ALLOC_ADD);
+	if ((error = spa_config_parse(spa, &vd, nvroot, NULL, 0,
+	    VDEV_ALLOC_ADD)) != 0)
+		return (spa_vdev_exit(spa, NULL, txg, error));
 
-	if (vd == NULL)
+	if ((error = spa_validate_spares(spa, nvroot, txg,
+	    VDEV_ALLOC_ADD)) != 0)
+		return (spa_vdev_exit(spa, vd, txg, error));
+
+	if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
+	    &spares, &nspares) != 0)
+		nspares = 0;
+
+	if (vd->vdev_children == 0 && nspares == 0)
 		return (spa_vdev_exit(spa, vd, txg, EINVAL));
 
-	if ((error = vdev_create(vd, txg)) != 0)
-		return (spa_vdev_exit(spa, vd, txg, error));
+	if (vd->vdev_children != 0) {
+		if ((error = vdev_create(vd, txg, B_FALSE)) != 0)
+			return (spa_vdev_exit(spa, vd, txg, error));
 
-	/*
-	 * Transfer each new top-level vdev from vd to rvd.
-	 */
-	for (c = 0; c < vd->vdev_children; c++) {
-		tvd = vd->vdev_child[c];
-		vdev_remove_child(vd, tvd);
-		tvd->vdev_id = rvd->vdev_children;
-		vdev_add_child(rvd, tvd);
-		vdev_config_dirty(tvd);
+		/*
+		 * Transfer each new top-level vdev from vd to rvd.
+		 */
+		for (c = 0; c < vd->vdev_children; c++) {
+			tvd = vd->vdev_child[c];
+			vdev_remove_child(vd, tvd);
+			tvd->vdev_id = rvd->vdev_children;
+			vdev_add_child(rvd, tvd);
+			vdev_config_dirty(tvd);
+		}
+	}
+
+	if (nspares != 0) {
+		if (spa->spa_sparelist != NULL) {
+			nvlist_t **oldspares;
+			uint_t oldnspares;
+			nvlist_t **newspares;
+
+			VERIFY(nvlist_lookup_nvlist_array(spa->spa_sparelist,
+			    ZPOOL_CONFIG_SPARES, &oldspares, &oldnspares) == 0);
+
+			newspares = kmem_alloc(sizeof (void *) *
+			    (nspares + oldnspares), KM_SLEEP);
+			for (i = 0; i < oldnspares; i++)
+				VERIFY(nvlist_dup(oldspares[i],
+				    &newspares[i], KM_SLEEP) == 0);
+			for (i = 0; i < nspares; i++)
+				VERIFY(nvlist_dup(spares[i],
+				    &newspares[i + oldnspares],
+				    KM_SLEEP) == 0);
+
+			VERIFY(nvlist_remove(spa->spa_sparelist,
+			    ZPOOL_CONFIG_SPARES, DATA_TYPE_NVLIST_ARRAY) == 0);
+
+			VERIFY(nvlist_add_nvlist_array(spa->spa_sparelist,
+			    ZPOOL_CONFIG_SPARES, newspares,
+			    nspares + oldnspares) == 0);
+			for (i = 0; i < oldnspares + nspares; i++)
+				nvlist_free(newspares[i]);
+			kmem_free(newspares, (oldnspares + nspares) *
+			    sizeof (void *));
+		} else {
+			VERIFY(nvlist_alloc(&spa->spa_sparelist,
+			    NV_UNIQUE_NAME, KM_SLEEP) == 0);
+			VERIFY(nvlist_add_nvlist_array(spa->spa_sparelist,
+			    ZPOOL_CONFIG_SPARES, spares, nspares) == 0);
+		}
+
+		spa_load_spares(spa);
+		spa->spa_sync_spares = B_TRUE;
 	}
 
 	/*
@@ -1147,7 +1531,7 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing)
 	int error;
 	vdev_t *rvd = spa->spa_root_vdev;
 	vdev_t *oldvd, *newvd, *newrootvd, *pvd, *tvd;
-	vdev_ops_t *pvops = replacing ? &vdev_replacing_ops : &vdev_mirror_ops;
+	vdev_ops_t *pvops;
 
 	txg = spa_vdev_enter(spa);
 
@@ -1161,18 +1545,8 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing)
 
 	pvd = oldvd->vdev_parent;
 
-	/*
-	 * The parent must be a mirror or the root, unless we're replacing;
-	 * in that case, the parent can be anything but another replacing vdev.
-	 */
-	if (pvd->vdev_ops != &vdev_mirror_ops &&
-	    pvd->vdev_ops != &vdev_root_ops &&
-	    (!replacing || pvd->vdev_ops == &vdev_replacing_ops))
-		return (spa_vdev_exit(spa, NULL, txg, ENOTSUP));
-
-	newrootvd = spa_config_parse(spa, nvroot, NULL, 0, VDEV_ALLOC_ADD);
-
-	if (newrootvd == NULL || newrootvd->vdev_children != 1)
+	if ((error = spa_config_parse(spa, &newrootvd, nvroot, NULL, 0,
+	    VDEV_ALLOC_ADD)) != 0 || newrootvd->vdev_children != 1)
 		return (spa_vdev_exit(spa, newrootvd, txg, EINVAL));
 
 	newvd = newrootvd->vdev_child[0];
@@ -1180,9 +1554,43 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing)
 	if (!newvd->vdev_ops->vdev_op_leaf)
 		return (spa_vdev_exit(spa, newrootvd, txg, EINVAL));
 
-	if ((error = vdev_create(newrootvd, txg)) != 0)
+	if ((error = vdev_create(newrootvd, txg, replacing)) != 0)
 		return (spa_vdev_exit(spa, newrootvd, txg, error));
 
+	if (!replacing) {
+		/*
+		 * For attach, the only allowable parent is a mirror or the root
+		 * vdev.
+		 */
+		if (pvd->vdev_ops != &vdev_mirror_ops &&
+		    pvd->vdev_ops != &vdev_root_ops)
+			return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP));
+
+		pvops = &vdev_mirror_ops;
+	} else {
+		/*
+		 * Active hot spares can only be replaced by inactive hot
+		 * spares.
+		 */
+		if (pvd->vdev_ops == &vdev_spare_ops &&
+		    pvd->vdev_child[1] == oldvd &&
+		    !spa_has_spare(spa, newvd->vdev_guid))
+			return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP));
+
+		/*
+		 * If the source is a hot spare, and the parent isn't already a
+		 * spare, then we want to create a new hot spare.  Otherwise, we
+		 * want to create a replacing vdev.
+		 */
+		if (pvd->vdev_ops == &vdev_replacing_ops)
+			return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP));
+		else if (pvd->vdev_ops != &vdev_spare_ops &&
+		    newvd->vdev_isspare)
+			pvops = &vdev_spare_ops;
+		else
+			pvops = &vdev_replacing_ops;
+	}
+
 	/*
 	 * Compare the new device size with the replaceable/attachable
 	 * device size.
@@ -1214,8 +1622,8 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing)
 	}
 
 	/*
-	 * If the parent is not a mirror, or if we're replacing,
-	 * insert the new mirror/replacing vdev above oldvd.
+	 * If the parent is not a mirror, or if we're replacing, insert the new
+	 * mirror/replacing/spare vdev above oldvd.
 	 */
 	if (pvd->vdev_ops != pvops)
 		pvd = vdev_add_parent(oldvd, pvops);
@@ -1283,6 +1691,8 @@ spa_vdev_detach(spa_t *spa, uint64_t guid, int replace_done)
 	int c, t, error;
 	vdev_t *rvd = spa->spa_root_vdev;
 	vdev_t *vd, *pvd, *cvd, *tvd;
+	boolean_t unspare = B_FALSE;
+	uint64_t unspare_guid;
 
 	txg = spa_vdev_enter(spa);
 
@@ -1298,17 +1708,27 @@ spa_vdev_detach(spa_t *spa, uint64_t guid, int replace_done)
 
 	/*
 	 * If replace_done is specified, only remove this device if it's
-	 * the first child of a replacing vdev.
-	 */
-	if (replace_done &&
-	    (vd->vdev_id != 0 || pvd->vdev_ops != &vdev_replacing_ops))
-		return (spa_vdev_exit(spa, NULL, txg, ENOTSUP));
+	 * the first child of a replacing vdev.  For the 'spare' vdev, either
+	 * disk can be removed.
+	 */
+	if (replace_done) {
+		if (pvd->vdev_ops == &vdev_replacing_ops) {
+			if (vd->vdev_id != 0)
+				return (spa_vdev_exit(spa, NULL, txg, ENOTSUP));
+		} else if (pvd->vdev_ops != &vdev_spare_ops) {
+			return (spa_vdev_exit(spa, NULL, txg, ENOTSUP));
+		}
+	}
+
+	ASSERT(pvd->vdev_ops != &vdev_spare_ops ||
+	    spa_version(spa) >= ZFS_VERSION_SPARES);
 
 	/*
-	 * Only mirror and replacing vdevs support detach.
+	 * Only mirror, replacing, and spare vdevs support detach.
 	 */
 	if (pvd->vdev_ops != &vdev_replacing_ops &&
-	    pvd->vdev_ops != &vdev_mirror_ops)
+	    pvd->vdev_ops != &vdev_mirror_ops &&
+	    pvd->vdev_ops != &vdev_spare_ops)
 		return (spa_vdev_exit(spa, NULL, txg, ENOTSUP));
 
 	/*
@@ -1339,10 +1759,25 @@ spa_vdev_detach(spa_t *spa, uint64_t guid, int replace_done)
 		if (!dirty)
 			break;
 	}
-	if (c == pvd->vdev_children)
+
+	/*
+	 * If we are a replacing or spare vdev, then we can always detach the
+	 * latter child, as that is how one cancels the operation.
+	 */
+	if ((pvd->vdev_ops == &vdev_mirror_ops || vd->vdev_id != 1) &&
+	    c == pvd->vdev_children)
 		return (spa_vdev_exit(spa, NULL, txg, EBUSY));
 
 	/*
+	 * If we are detaching the original disk from a spare, then it implies
+	 * that the spare should become a real disk, and be removed from the
+	 * active spare list for the pool.
+	 */
+	if (pvd->vdev_ops == &vdev_spare_ops &&
+	    vd->vdev_id == 0)
+		unspare = B_TRUE;
+
+	/*
 	 * Erase the disk labels so the disk can be used for other things.
 	 * This must be done after all other error cases are handled,
 	 * but before we disembowel vd (so we can still do I/O to it).
@@ -1350,7 +1785,7 @@ spa_vdev_detach(spa_t *spa, uint64_t guid, int replace_done)
 	 * it may be that the unwritability of the disk is the reason
 	 * it's being detached!
 	 */
-	error = vdev_label_init(vd, 0);
+	error = vdev_label_init(vd, 0, B_FALSE);
 	if (error)
 		dprintf("unable to erase labels on %s\n", vdev_description(vd));
 
@@ -1366,6 +1801,19 @@ spa_vdev_detach(spa_t *spa, uint64_t guid, int replace_done)
 	cvd = pvd->vdev_child[0];
 
 	/*
+	 * If we need to remove the remaining child from the list of hot spares,
+	 * do it now, marking the vdev as no longer a spare in the process.  We
+	 * must do this before vdev_remove_parent(), because that can change the
+	 * GUID if it creates a new toplevel GUID.
+	 */
+	if (unspare) {
+		ASSERT(cvd->vdev_isspare);
+		spa_spare_remove(cvd->vdev_guid);
+		cvd->vdev_isspare = B_FALSE;
+		unspare_guid = cvd->vdev_guid;
+	}
+
+	/*
 	 * If the parent mirror/replacing vdev only has one child,
 	 * the parent is no longer needed.  Remove it from the tree.
 	 */
@@ -1408,7 +1856,104 @@ spa_vdev_detach(spa_t *spa, uint64_t guid, int replace_done)
 
 	dprintf("detached %s in txg %llu\n", vd->vdev_path, txg);
 
-	return (spa_vdev_exit(spa, vd, txg, 0));
+	error = spa_vdev_exit(spa, vd, txg, 0);
+
+	/*
+	 * If we are supposed to remove the given vdev from the list of spares,
+	 * iterate over all pools in the system and replace it if it's present.
+	 */
+	if (unspare) {
+		spa = NULL;
+		mutex_enter(&spa_namespace_lock);
+		while ((spa = spa_next(spa)) != NULL) {
+			if (spa->spa_state != POOL_STATE_ACTIVE)
+				continue;
+
+			(void) spa_vdev_remove(spa, unspare_guid, B_TRUE);
+		}
+		mutex_exit(&spa_namespace_lock);
+	}
+
+	return (error);
+}
+
+/*
+ * Remove a device from the pool.  Currently, this supports removing only hot
+ * spares.
+ */
+int
+spa_vdev_remove(spa_t *spa, uint64_t guid, boolean_t unspare)
+{
+	vdev_t *vd;
+	nvlist_t **spares, *nv, **newspares;
+	uint_t i, j, nspares;
+	int ret = 0;
+
+	spa_config_enter(spa, RW_WRITER, FTAG);
+
+	vd = spa_lookup_by_guid(spa, guid);
+
+	nv = NULL;
+	if (spa->spa_spares != NULL &&
+	    nvlist_lookup_nvlist_array(spa->spa_sparelist, ZPOOL_CONFIG_SPARES,
+	    &spares, &nspares) == 0) {
+		for (i = 0; i < nspares; i++) {
+			uint64_t theguid;
+
+			VERIFY(nvlist_lookup_uint64(spares[i],
+			    ZPOOL_CONFIG_GUID, &theguid) == 0);
+			if (theguid == guid) {
+				nv = spares[i];
+				break;
+			}
+		}
+	}
+
+	/*
+	 * We only support removing a hot spare, and only if it's not currently
+	 * in use in this pool.
+	 */
+	if (nv == NULL && vd == NULL) {
+		ret = ENOENT;
+		goto out;
+	}
+
+	if (nv == NULL && vd != NULL) {
+		ret = ENOTSUP;
+		goto out;
+	}
+
+	if (!unspare && nv != NULL && vd != NULL) {
+		ret = EBUSY;
+		goto out;
+	}
+
+	if (nspares == 1) {
+		newspares = NULL;
+	} else {
+		newspares = kmem_alloc((nspares - 1) * sizeof (void *),
+		    KM_SLEEP);
+		for (i = 0, j = 0; i < nspares; i++) {
+			if (spares[i] != nv)
+				VERIFY(nvlist_dup(spares[i],
+				    &newspares[j++], KM_SLEEP) == 0);
+		}
+	}
+
+	VERIFY(nvlist_remove(spa->spa_sparelist, ZPOOL_CONFIG_SPARES,
+	    DATA_TYPE_NVLIST_ARRAY) == 0);
+	VERIFY(nvlist_add_nvlist_array(spa->spa_sparelist, ZPOOL_CONFIG_SPARES,
+	    newspares, nspares - 1) == 0);
+	for (i = 0; i < nspares - 1; i++)
+		nvlist_free(newspares[i]);
+	kmem_free(newspares, (nspares - 1) * sizeof (void *));
+	spa_load_spares(spa);
+	spa->spa_sync_spares = B_TRUE;
+
+out:
+	spa_config_exit(spa, FTAG);
+
+	return (ret);
 }
 
 /*
@@ -1446,15 +1991,31 @@ static void
 spa_vdev_replace_done(spa_t *spa)
 {
 	vdev_t *vd;
+	vdev_t *pvd;
 	uint64_t guid;
+	uint64_t pguid = 0;
 
 	spa_config_enter(spa, RW_READER, FTAG);
 
 	while ((vd = spa_vdev_replace_done_hunt(spa->spa_root_vdev)) != NULL) {
 		guid = vd->vdev_guid;
+		/*
+		 * If we have just finished replacing a hot spared device, then
+		 * we need to detach the parent's first child (the original hot
+		 * spare) as well.
+		 */
+		pvd = vd->vdev_parent;
+		if (pvd->vdev_parent->vdev_ops == &vdev_spare_ops &&
+		    pvd->vdev_id == 0) {
+			ASSERT(pvd->vdev_ops == &vdev_replacing_ops);
+			ASSERT(pvd->vdev_parent->vdev_children == 2);
+			pguid = pvd->vdev_parent->vdev_child[1]->vdev_guid;
+		}
 		spa_config_exit(spa, FTAG);
 		if (spa_vdev_detach(spa, guid, B_TRUE) != 0)
 			return;
+		if (pguid != 0 && spa_vdev_detach(spa, pguid, B_TRUE) != 0)
+			return;
 		spa_config_enter(spa, RW_READER, FTAG);
 	}
 
@@ -1475,8 +2036,36 @@ spa_vdev_setpath(spa_t *spa, uint64_t guid, const char *newpath)
 
 	txg = spa_vdev_enter(spa);
 
-	if ((vd = vdev_lookup_by_guid(rvd, guid)) == NULL)
-		return (spa_vdev_exit(spa, NULL, txg, ENOENT));
+	if ((vd = vdev_lookup_by_guid(rvd, guid)) == NULL) {
+		/*
+		 * Determine if this is a reference to a hot spare.  In that
+		 * case, update the path as stored in the spare list.
+		 */
+		nvlist_t **spares;
+		uint_t i, nspares;
+		if (spa->spa_sparelist != NULL) {
+			VERIFY(nvlist_lookup_nvlist_array(spa->spa_sparelist,
+			    ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0);
+			for (i = 0; i < nspares; i++) {
+				uint64_t theguid;
+				VERIFY(nvlist_lookup_uint64(spares[i],
+				    ZPOOL_CONFIG_GUID, &theguid) == 0);
+				if (theguid == guid)
+					break;
+			}
+
+			if (i == nspares)
+				return (spa_vdev_exit(spa, NULL, txg, ENOENT));
+
+			VERIFY(nvlist_add_string(spares[i],
+			    ZPOOL_CONFIG_PATH, newpath) == 0);
+			spa_load_spares(spa);
+			spa->spa_sync_spares = B_TRUE;
+			return (spa_vdev_exit(spa, NULL, txg, 0));
+		} else {
+			return (spa_vdev_exit(spa, NULL, txg, ENOENT));
+		}
+	}
 
 	if (!vd->vdev_ops->vdev_op_leaf)
 		return (spa_vdev_exit(spa, NULL, txg, ENOTSUP));
@@ -2049,41 +2638,92 @@ spa_sync_deferred_frees(spa_t *spa, uint64_t txg)
 }
 
 static void
-spa_sync_config_object(spa_t *spa, dmu_tx_t *tx)
+spa_sync_nvlist(spa_t *spa, uint64_t obj, nvlist_t *nv, dmu_tx_t *tx)
 {
-	nvlist_t *config;
 	char *packed = NULL;
 	size_t nvsize = 0;
 	dmu_buf_t *db;
 
-	if (list_is_empty(&spa->spa_dirty_list))
-		return;
-
-	config = spa_config_generate(spa, NULL, dmu_tx_get_txg(tx), B_FALSE);
-
-	if (spa->spa_config_syncing)
-		nvlist_free(spa->spa_config_syncing);
-	spa->spa_config_syncing = config;
-
-	VERIFY(nvlist_size(config, &nvsize, NV_ENCODE_XDR) == 0);
+	VERIFY(nvlist_size(nv, &nvsize, NV_ENCODE_XDR) == 0);
 
 	packed = kmem_alloc(nvsize, KM_SLEEP);
 
-	VERIFY(nvlist_pack(config, &packed, &nvsize, NV_ENCODE_XDR,
+	VERIFY(nvlist_pack(nv, &packed, &nvsize, NV_ENCODE_XDR,
 	    KM_SLEEP) == 0);
 
-	dmu_write(spa->spa_meta_objset, spa->spa_config_object, 0, nvsize,
-	    packed, tx);
+	dmu_write(spa->spa_meta_objset, obj, 0, nvsize, packed, tx);
 
 	kmem_free(packed, nvsize);
 
-	VERIFY(0 == dmu_bonus_hold(spa->spa_meta_objset,
-	    spa->spa_config_object, FTAG, &db));
+	VERIFY(0 == dmu_bonus_hold(spa->spa_meta_objset, obj, FTAG, &db));
 	dmu_buf_will_dirty(db, tx);
 	*(uint64_t *)db->db_data = nvsize;
 	dmu_buf_rele(db, FTAG);
 }
 
+static void
+spa_sync_spares(spa_t *spa, dmu_tx_t *tx)
+{
+	nvlist_t *nvroot;
+	nvlist_t **spares;
+	int i;
+
+	if (!spa->spa_sync_spares)
+		return;
+
+	/*
+	 * Update the MOS nvlist describing the list of available spares.
+	 * spa_validate_spares() will have already made sure this nvlist is
+	 * valid and the vdevs are labelled appropriately.
+	 */
+	if (spa->spa_spares_object == 0) {
+		spa->spa_spares_object = dmu_object_alloc(spa->spa_meta_objset,
+		    DMU_OT_PACKED_NVLIST, 1 << 14,
+		    DMU_OT_PACKED_NVLIST_SIZE, sizeof (uint64_t), tx);
+		VERIFY(zap_update(spa->spa_meta_objset,
+		    DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_SPARES,
+		    sizeof (uint64_t), 1, &spa->spa_spares_object, tx) == 0);
+	}
+
+	VERIFY(nvlist_alloc(&nvroot, NV_UNIQUE_NAME, KM_SLEEP) == 0);
+	if (spa->spa_nspares == 0) {
+		VERIFY(nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
+		    NULL, 0) == 0);
+	} else {
+		spares = kmem_alloc(spa->spa_nspares * sizeof (void *),
+		    KM_SLEEP);
+		for (i = 0; i < spa->spa_nspares; i++)
+			spares[i] = vdev_config_generate(spa,
+			    spa->spa_spares[i], B_FALSE, B_TRUE);
+		VERIFY(nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
+		    spares, spa->spa_nspares) == 0);
+		for (i = 0; i < spa->spa_nspares; i++)
+			nvlist_free(spares[i]);
+		kmem_free(spares, spa->spa_nspares * sizeof (void *));
+	}
+
+	spa_sync_nvlist(spa, spa->spa_spares_object, nvroot, tx);
+
+	spa->spa_sync_spares = B_FALSE;
+}
+
+static void
+spa_sync_config_object(spa_t *spa, dmu_tx_t *tx)
+{
+	nvlist_t *config;
+
+	if (list_is_empty(&spa->spa_dirty_list))
+		return;
+
+	config = spa_config_generate(spa, NULL, dmu_tx_get_txg(tx), B_FALSE);
+
+	if (spa->spa_config_syncing)
+		nvlist_free(spa->spa_config_syncing);
+	spa->spa_config_syncing = config;
+
+	spa_sync_nvlist(spa, spa->spa_config_object, config, tx);
+}
+
 /*
  * Sync the specified transaction group.  New blocks may be dirtied as
  * part of the process, so we iterate until it converges.
@@ -2109,6 +2749,29 @@ spa_sync(spa_t *spa, uint64_t txg)
 
 	VERIFY(0 == bplist_open(bpl, mos, spa->spa_sync_bplist_obj));
 
+	tx = dmu_tx_create_assigned(dp, txg);
+
+	/*
+	 * If we are upgrading to ZFS_VERSION_RAIDZ_DEFLATE this txg,
+	 * set spa_deflate if we have no raid-z vdevs.
+	 */
+	if (spa->spa_ubsync.ub_version < ZFS_VERSION_RAIDZ_DEFLATE &&
+	    spa->spa_uberblock.ub_version >= ZFS_VERSION_RAIDZ_DEFLATE) {
+		int i;
+
+		for (i = 0; i < rvd->vdev_children; i++) {
+			vd = rvd->vdev_child[i];
+			if (vd->vdev_deflate_ratio != SPA_MINBLOCKSIZE)
+				break;
+		}
+		if (i == rvd->vdev_children) {
+			spa->spa_deflate = TRUE;
+			VERIFY(0 == zap_add(spa->spa_meta_objset,
+			    DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_DEFLATE,
+			    sizeof (uint64_t), 1, &spa->spa_deflate, tx));
+		}
+	}
+
 	/*
 	 * If anything has changed in this txg, push the deferred frees
 	 * from the previous txg.  If not, leave them alone so that we
@@ -2124,12 +2787,9 @@ spa_sync(spa_t *spa, uint64_t txg)
 	do {
 		spa->spa_sync_pass++;
 
-		tx = dmu_tx_create_assigned(dp, txg);
 		spa_sync_config_object(spa, tx);
-		dmu_tx_commit(tx);
-
+		spa_sync_spares(spa, tx);
 		spa_errlog_sync(spa, txg);
-
 		dsl_pool_sync(dp, txg);
 
 		dirty_vdevs = 0;
@@ -2138,10 +2798,7 @@ spa_sync(spa_t *spa, uint64_t txg)
 			dirty_vdevs++;
 		}
 
-		tx = dmu_tx_create_assigned(dp, txg);
 		bplist_sync(bpl, tx);
-		dmu_tx_commit(tx);
-
 	} while (dirty_vdevs);
 
 	bplist_close(bpl);
@@ -2175,6 +2832,8 @@ spa_sync(spa_t *spa, uint64_t txg)
 			VERIFY(vdev_config_sync(rvd, txg) == 0);
 	}
 
+	dmu_tx_commit(tx);
+
 	/*
 	 * Clear the dirty config list.
 	 */
@@ -2219,7 +2878,7 @@ spa_sync(spa_t *spa, uint64_t txg)
 
 	/*
 	 * It had better be the case that we didn't dirty anything
-	 * since spa_sync_labels().
+	 * since vdev_config_sync().
 	 */
 	ASSERT(txg_list_empty(&dp->dp_dirty_datasets, txg));
 	ASSERT(txg_list_empty(&dp->dp_dirty_dirs, txg));
@@ -2319,4 +2978,18 @@ spa_upgrade(spa_t *spa)
 	vdev_config_dirty(spa->spa_root_vdev);
 
 	spa_config_exit(spa, FTAG);
+
+	txg_wait_synced(spa_get_dsl(spa), 0);
+}
+
+boolean_t
+spa_has_spare(spa_t *spa, uint64_t guid)
+{
+	int i;
+
+	for (i = 0; i < spa->spa_nspares; i++)
+		if (spa->spa_spares[i]->vdev_guid == guid)
+			return (B_TRUE);
+
+	return (B_FALSE);
 }
diff --git a/usr/src/uts/common/fs/zfs/spa_config.c b/usr/src/uts/common/fs/zfs/spa_config.c
index 906f2e5470..03ba60b0e3 100644
--- a/usr/src/uts/common/fs/zfs/spa_config.c
+++ b/usr/src/uts/common/fs/zfs/spa_config.c
@@ -18,6 +18,7 @@
  *
  * CDDL HEADER END
  */
+
 /*
  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
@@ -279,7 +280,7 @@ spa_config_generate(spa_t *spa, vdev_t *vd, uint64_t txg, int getstats)
 	VERIFY(nvlist_alloc(&config, NV_UNIQUE_NAME, KM_SLEEP) == 0);
 
 	VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_VERSION,
-	    spa->spa_uberblock.ub_version) == 0);
+	    spa_version(spa)) == 0);
 	VERIFY(nvlist_add_string(config, ZPOOL_CONFIG_POOL_NAME,
 	    spa_name(spa)) == 0);
 	VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_STATE,
@@ -294,10 +295,13 @@ spa_config_generate(spa_t *spa, vdev_t *vd, uint64_t txg, int getstats)
 		    vd->vdev_top->vdev_guid) == 0);
 		VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_GUID,
 		    vd->vdev_guid) == 0);
+		if (vd->vdev_isspare)
+			VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_IS_SPARE,
+			    1ULL) == 0);
 		vd = vd->vdev_top;		/* label contains top config */
 	}
 
-	nvroot = vdev_config_generate(vd, getstats);
+	nvroot = vdev_config_generate(spa, vd, getstats, B_FALSE);
 	VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, nvroot) == 0);
 	nvlist_free(nvroot);
 
diff --git a/usr/src/uts/common/fs/zfs/spa_misc.c b/usr/src/uts/common/fs/zfs/spa_misc.c
index 11267729d9..3d2ec9f0b7 100644
--- a/usr/src/uts/common/fs/zfs/spa_misc.c
+++ b/usr/src/uts/common/fs/zfs/spa_misc.c
@@ -175,6 +175,9 @@ static kcondvar_t spa_namespace_cv;
 static int spa_active_count;
 static int spa_max_replication_override = SPA_DVAS_PER_BP;
 
+static avl_tree_t spa_spare_avl;
+static kmutex_t spa_spare_lock;
+
 kmem_cache_t *spa_buffer_pool;
 int spa_mode;
 
@@ -338,6 +341,99 @@ spa_refcount_zero(spa_t *spa)
 
 /*
  * ==========================================================================
+ * SPA spare tracking
+ * ==========================================================================
+ */
+
+/*
+ * We track spare information on a global basis.  This allows us to do two
+ * things: determine when a spare is no longer referenced by any active pool,
+ * and (quickly) determine if a spare is currently in use in another pool on the
+ * system.
+ */
+typedef struct spa_spare {
+	uint64_t	spare_guid;
+	avl_node_t	spare_avl;
+	int		spare_count;
+} spa_spare_t;
+
+static int
+spa_spare_compare(const void *a, const void *b)
+{
+	const spa_spare_t *sa = a;
+	const spa_spare_t *sb = b;
+
+	if (sa->spare_guid < sb->spare_guid)
+		return (-1);
+	else if (sa->spare_guid > sb->spare_guid)
+		return (1);
+	else
+		return (0);
+}
+
+void
+spa_spare_add(uint64_t guid)
+{
+	avl_index_t where;
+	spa_spare_t search;
+	spa_spare_t *spare;
+
+	mutex_enter(&spa_spare_lock);
+
+	search.spare_guid = guid;
+	if ((spare = avl_find(&spa_spare_avl, &search, &where)) != NULL) {
+		spare->spare_count++;
+	} else {
+		spare = kmem_alloc(sizeof (spa_spare_t), KM_SLEEP);
+		spare->spare_guid = guid;
+		spare->spare_count = 1;
+		avl_insert(&spa_spare_avl, spare, where);
+	}
+
+	mutex_exit(&spa_spare_lock);
+}
+
+void
+spa_spare_remove(uint64_t guid)
+{
+	spa_spare_t search;
+	spa_spare_t *spare;
+	avl_index_t where;
+
+	mutex_enter(&spa_spare_lock);
+
+	search.spare_guid = guid;
+	spare = avl_find(&spa_spare_avl, &search, &where);
+
+	ASSERT(spare != NULL);
+
+	if (--spare->spare_count == 0) {
+		avl_remove(&spa_spare_avl, spare);
+		kmem_free(spare, sizeof (spa_spare_t));
+	}
+
+	mutex_exit(&spa_spare_lock);
+}
+
+boolean_t
+spa_spare_inuse(uint64_t guid)
+{
+	spa_spare_t search;
+	avl_index_t where;
+	boolean_t ret;
+
+	mutex_enter(&spa_spare_lock);
+
+	search.spare_guid = guid;
+	ret = (avl_find(&spa_spare_avl, &search, &where) != NULL);
+
+	mutex_exit(&spa_spare_lock);
+
+	return (ret);
+}
+
+/*
+ * ==========================================================================
  * SPA config locking
  * ==========================================================================
  */
@@ -779,7 +875,7 @@ spa_metaslab_class_select(spa_t *spa)
 }
 
 /*
- * Return pool-wide allocated space.
+ * Return how much space is allocated in the pool (ie. sum of all asize)
  */
 uint64_t
 spa_get_alloc(spa_t *spa)
@@ -788,7 +884,7 @@ spa_get_alloc(spa_t *spa)
 }
 
 /*
- * Return pool-wide allocated space.
+ * Return how much (raid-z inflated) space there is in the pool.
  */
 uint64_t
 spa_get_space(spa_t *spa)
@@ -796,6 +892,18 @@ spa_get_space(spa_t *spa)
 	return (spa->spa_root_vdev->vdev_stat.vs_space);
 }
 
+/*
+ * Return the amount of raid-z-deflated space in the pool.
+ */
+uint64_t
+spa_get_dspace(spa_t *spa)
+{
+	if (spa->spa_deflate)
+		return (spa->spa_root_vdev->vdev_stat.vs_dspace);
+	else
+		return (spa->spa_root_vdev->vdev_stat.vs_space);
+}
+
 /* ARGSUSED */
 uint64_t
 spa_get_asize(spa_t *spa, uint64_t lsize)
@@ -828,6 +936,23 @@ spa_max_replication(spa_t *spa)
 	return (MIN(SPA_DVAS_PER_BP, spa_max_replication_override));
 }
 
+uint64_t
+bp_get_dasize(spa_t *spa, const blkptr_t *bp)
+{
+	int sz = 0, i;
+
+	if (!spa->spa_deflate)
+		return (BP_GET_ASIZE(bp));
+
+	for (i = 0; i < SPA_DVAS_PER_BP; i++) {
+		vdev_t *vd =
+		    vdev_lookup_top(spa, DVA_GET_VDEV(&bp->blk_dva[i]));
+		sz += (DVA_GET_ASIZE(&bp->blk_dva[i]) >> SPA_MINBLOCKSHIFT) *
+		    vd->vdev_deflate_ratio;
+	}
+	return (sz);
+}
+
 /*
  * ==========================================================================
  * Initialization and Termination
@@ -864,6 +989,9 @@ spa_init(int mode)
 	avl_create(&spa_namespace_avl, spa_name_compare, sizeof (spa_t),
 	    offsetof(spa_t, spa_avl));
 
+	avl_create(&spa_spare_avl, spa_spare_compare, sizeof (spa_spare_t),
+	    offsetof(spa_spare_t, spare_avl));
+
 	spa_mode = mode;
 
 	refcount_init();
@@ -885,6 +1013,7 @@ spa_fini(void)
 	refcount_fini();
 
 	avl_destroy(&spa_namespace_avl);
+	avl_destroy(&spa_spare_avl);
 
 	cv_destroy(&spa_namespace_cv);
 	mutex_destroy(&spa_namespace_lock);
diff --git a/usr/src/uts/common/fs/zfs/sys/bplist.h b/usr/src/uts/common/fs/zfs/sys/bplist.h
index c716fe7aa6..b4c83765c8 100644
--- a/usr/src/uts/common/fs/zfs/sys/bplist.h
+++ b/usr/src/uts/common/fs/zfs/sys/bplist.h
@@ -45,8 +45,12 @@ typedef struct bplist_phys {
 	 */
 	uint64_t	bpl_entries;
 	uint64_t	bpl_bytes;
+	uint64_t	bpl_comp;
+	uint64_t	bpl_uncomp;
 } bplist_phys_t;
 
+#define	BPLIST_SIZE_V0	(2 * sizeof (uint64_t))
+
 typedef struct bplist_q {
 	blkptr_t	bpq_blk;
 	void		*bpq_next;
@@ -56,8 +60,9 @@ typedef struct bplist {
 	kmutex_t	bpl_lock;
 	objset_t	*bpl_mos;
 	uint64_t	bpl_object;
-	int		bpl_blockshift;
-	int		bpl_bpshift;
+	uint8_t		bpl_blockshift;
+	uint8_t		bpl_bpshift;
+	uint8_t		bpl_havecomp;
 	bplist_q_t	*bpl_queue;
 	bplist_phys_t	*bpl_phys;
 	dmu_buf_t	*bpl_dbuf;
@@ -74,6 +79,8 @@ extern int bplist_enqueue(bplist_t *bpl, blkptr_t *bp, dmu_tx_t *tx);
 extern void bplist_enqueue_deferred(bplist_t *bpl, blkptr_t *bp);
 extern void bplist_sync(bplist_t *bpl, dmu_tx_t *tx);
 extern void bplist_vacate(bplist_t *bpl, dmu_tx_t *tx);
+extern int bplist_space(bplist_t *bpl,
+    uint64_t *usedp, uint64_t *compp, uint64_t *uncompp);
 
 #ifdef	__cplusplus
 }
diff --git a/usr/src/uts/common/fs/zfs/sys/dmu.h b/usr/src/uts/common/fs/zfs/sys/dmu.h
index 78dd9632e6..88b59a1618 100644
--- a/usr/src/uts/common/fs/zfs/sys/dmu.h
+++ b/usr/src/uts/common/fs/zfs/sys/dmu.h
@@ -177,12 +177,17 @@ typedef void dmu_buf_evict_func_t(struct dmu_buf *db, void *user_ptr);
  */
 typedef void dmu_byteswap_func_t(void *buf, size_t size);
 
+/*
+ * The names of zap entries in the DIRECTORY_OBJECT of the MOS.
+ */
 #define	DMU_POOL_DIRECTORY_OBJECT	1
 #define	DMU_POOL_CONFIG			"config"
 #define	DMU_POOL_ROOT_DATASET		"root_dataset"
 #define	DMU_POOL_SYNC_BPLIST		"sync_bplist"
 #define	DMU_POOL_ERRLOG_SCRUB		"errlog_scrub"
 #define	DMU_POOL_ERRLOG_LAST		"errlog_last"
+#define	DMU_POOL_SPARES			"spares"
+#define	DMU_POOL_DEFLATE		"deflate"
 
 /*
  * Allocate an object from this objset.  The range of object numbers
diff --git a/usr/src/uts/common/fs/zfs/sys/dnode.h b/usr/src/uts/common/fs/zfs/sys/dnode.h
index d2c9d4f3bf..48b06a6749 100644
--- a/usr/src/uts/common/fs/zfs/sys/dnode.h
+++ b/usr/src/uts/common/fs/zfs/sys/dnode.h
@@ -75,6 +75,9 @@ extern "C" {
 #define	DN_BONUS(dnp)	((void*)((dnp)->dn_bonus + \
 	(((dnp)->dn_nblkptr - 1) * sizeof (blkptr_t))))
 
+#define	DN_USED_BYTES(dnp) (((dnp)->dn_flags & DNODE_FLAG_USED_BYTES) ? \
+	(dnp)->dn_used : (dnp)->dn_used << SPA_MINBLOCKSHIFT)
+
 #define	EPB(blkshift, typeshift)	(1 << (blkshift - typeshift))
 
 struct dmu_buf_impl;
@@ -87,6 +90,9 @@ enum dnode_dirtycontext {
 	DN_DIRTY_SYNC
 };
 
+/* Is dn_used in bytes?  if not, it's in multiples of SPA_MINBLOCKSIZE */
+#define	DNODE_FLAG_USED_BYTES	(1<<0)
+
 typedef struct dnode_phys {
 	uint8_t dn_type;		/* dmu_object_type_t */
 	uint8_t dn_indblkshift;		/* ln2(indirect block size) */
@@ -95,14 +101,14 @@ typedef struct dnode_phys {
 	uint8_t dn_bonustype;		/* type of data in bonus buffer */
 	uint8_t	dn_checksum;		/* ZIO_CHECKSUM type */
 	uint8_t	dn_compress;		/* ZIO_COMPRESS type */
-	uint8_t dn_pad1[1];
+	uint8_t dn_flags;		/* DNODE_FLAG_* */
 	uint16_t dn_datablkszsec;	/* data block size in 512b sectors */
 	uint16_t dn_bonuslen;		/* length of dn_bonus */
 	uint8_t dn_pad2[4];
 
 	/* accounting is protected by dn_dirty_mtx */
 	uint64_t dn_maxblkid;		/* largest allocated block ID */
-	uint64_t dn_secphys;		/* 512b sectors of disk space used */
+	uint64_t dn_used;		/* bytes (or sectors) of disk space */
 
 	uint64_t dn_pad3[4];
 
diff --git a/usr/src/uts/common/fs/zfs/sys/dsl_dataset.h b/usr/src/uts/common/fs/zfs/sys/dsl_dataset.h
index 2a4ce242dc..912445b160 100644
--- a/usr/src/uts/common/fs/zfs/sys/dsl_dataset.h
+++ b/usr/src/uts/common/fs/zfs/sys/dsl_dataset.h
@@ -44,6 +44,15 @@ struct dsl_pool;
 
 typedef void dsl_dataset_evict_func_t(struct dsl_dataset *, void *);
 
+#define	DS_FLAG_INCONSISTENT	(1ULL<<0)
+/*
+ * NB: nopromote can not yet be set, but we want support for it in this
+ * on-disk version, so that we don't need to upgrade for it later.  It
+ * will be needed when we implement 'zfs split' (where the split off
+ * clone should not be promoted).
+ */
+#define	DS_FLAG_NOPROMOTE	(1ULL<<1)
+
 typedef struct dsl_dataset_phys {
 	uint64_t ds_dir_obj;
 	uint64_t ds_prev_snap_obj;
@@ -65,9 +74,9 @@ typedef struct dsl_dataset_phys {
 	 */
 	uint64_t ds_fsid_guid;
 	uint64_t ds_guid;
-	uint64_t ds_inconsistent; /* boolean */
+	uint64_t ds_flags;
 	blkptr_t ds_bp;
-	uint64_t ds_pad[8]; /* pad out to 256 bytes for good measure */
+	uint64_t ds_pad[8]; /* pad out to 320 bytes for good measure */
 } dsl_dataset_phys_t;
 
 typedef struct dsl_dataset {
@@ -119,6 +128,7 @@ int dsl_dataset_destroy_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx);
 int dsl_dataset_rollback(const char *name);
 int dsl_dataset_rollback_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx);
 int dsl_dataset_rename(const char *name, const char *newname);
+int dsl_dataset_promote(const char *name);
 
 void *dsl_dataset_set_user_ptr(dsl_dataset_t *ds,
     void *p, dsl_dataset_evict_func_t func);
diff --git a/usr/src/uts/common/fs/zfs/sys/dsl_dir.h b/usr/src/uts/common/fs/zfs/sys/dsl_dir.h
index 5c23fdc497..123d6d128f 100644
--- a/usr/src/uts/common/fs/zfs/sys/dsl_dir.h
+++ b/usr/src/uts/common/fs/zfs/sys/dsl_dir.h
@@ -121,6 +121,7 @@ int dsl_dir_sync_task(dsl_dir_t *dd,
 int dsl_dir_set_quota(const char *ddname, uint64_t quota);
 int dsl_dir_set_reservation(const char *ddname, uint64_t reservation);
 int dsl_dir_rename_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx);
+int dsl_dir_transfer_possible(dsl_dir_t *sdd, dsl_dir_t *tdd, uint64_t space);
 
 #ifdef ZFS_DEBUG
 #define	dprintf_dd(dd, fmt, ...) do { \
diff --git a/usr/src/uts/common/fs/zfs/sys/dsl_prop.h b/usr/src/uts/common/fs/zfs/sys/dsl_prop.h
index bf03cfa799..95094641c5 100644
--- a/usr/src/uts/common/fs/zfs/sys/dsl_prop.h
+++ b/usr/src/uts/common/fs/zfs/sys/dsl_prop.h
@@ -2,9 +2,8 @@
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
@@ -42,10 +41,9 @@ struct dsl_dataset;
 /* The callback func may not call into the DMU or DSL! */
 typedef void (dsl_prop_changed_cb_t)(void *arg, uint64_t newval);
 
-#define	DSL_PROP_VALUE_UNDEFINED (-1ULL)
-
 typedef struct dsl_prop_cb_record {
 	list_node_t cbr_node; /* link on dd_prop_cbs */
+	struct dsl_dataset *cbr_ds;
 	const char *cbr_propname;
 	dsl_prop_changed_cb_t *cbr_func;
 	void *cbr_arg;
@@ -55,6 +53,7 @@ int dsl_prop_register(struct dsl_dataset *ds, const char *propname,
     dsl_prop_changed_cb_t *callback, void *cbarg);
 int dsl_prop_unregister(struct dsl_dataset *ds, const char *propname,
     dsl_prop_changed_cb_t *callback, void *cbarg);
+int dsl_prop_numcb(struct dsl_dataset *ds);
 
 int dsl_prop_get(const char *ddname, const char *propname,
     int intsz, int numints, void *buf, char *setpoint);
diff --git a/usr/src/uts/common/fs/zfs/sys/spa.h b/usr/src/uts/common/fs/zfs/sys/spa.h
index a51cfd524f..829c025af2 100644
--- a/usr/src/uts/common/fs/zfs/sys/spa.h
+++ b/usr/src/uts/common/fs/zfs/sys/spa.h
@@ -232,7 +232,11 @@ typedef struct blkptr {
 
 #define	BP_GET_ASIZE(bp)	\
 	(DVA_GET_ASIZE(&(bp)->blk_dva[0]) + DVA_GET_ASIZE(&(bp)->blk_dva[1]) + \
-	DVA_GET_ASIZE(&(bp)->blk_dva[2]))
+		DVA_GET_ASIZE(&(bp)->blk_dva[2]))
+
+#define	BP_GET_UCSIZE(bp) \
+	((BP_GET_LEVEL(bp) > 0 || dmu_ot[BP_GET_TYPE(bp)].ot_metadata) ? \
+	BP_GET_PSIZE(bp) : BP_GET_LSIZE(bp));
 
 #define	BP_GET_NDVAS(bp)	\
 	(!!DVA_GET_ASIZE(&(bp)->blk_dva[0]) + \
@@ -326,8 +330,14 @@ extern int spa_vdev_add(spa_t *spa, nvlist_t *nvroot);
 extern int spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot,
     int replacing);
 extern int spa_vdev_detach(spa_t *spa, uint64_t guid, int replace_done);
+extern int spa_vdev_remove(spa_t *spa, uint64_t guid, boolean_t unspare);
 extern int spa_vdev_setpath(spa_t *spa, uint64_t guid, const char *newpath);
 
+/* spare state (which is global across all pools) */
+extern void spa_spare_add(uint64_t guid);
+extern void spa_spare_remove(uint64_t guid);
+extern boolean_t spa_spare_inuse(uint64_t guid);
+
 /* scrubbing */
 extern int spa_scrub(spa_t *spa, pool_scrub_type_t type, boolean_t force);
 extern void spa_scrub_suspend(spa_t *spa);
@@ -390,12 +400,14 @@ extern char *spa_name(spa_t *spa);
 extern uint64_t spa_guid(spa_t *spa);
 extern uint64_t spa_last_synced_txg(spa_t *spa);
 extern uint64_t spa_first_txg(spa_t *spa);
+extern uint64_t spa_version(spa_t *spa);
 extern int spa_state(spa_t *spa);
 extern uint64_t spa_freeze_txg(spa_t *spa);
 struct metaslab_class;
 extern struct metaslab_class *spa_metaslab_class_select(spa_t *spa);
 extern uint64_t spa_get_alloc(spa_t *spa);
 extern uint64_t spa_get_space(spa_t *spa);
+extern uint64_t spa_get_dspace(spa_t *spa);
 extern uint64_t spa_get_asize(spa_t *spa, uint64_t lsize);
 extern uint64_t spa_version(spa_t *spa);
 extern int spa_max_replication(spa_t *spa);
@@ -412,6 +424,8 @@ extern void spa_freeze(spa_t *spa);
 extern void spa_upgrade(spa_t *spa);
 extern void spa_evict_all(void);
 extern vdev_t *spa_lookup_by_guid(spa_t *spa, uint64_t guid);
+extern boolean_t spa_has_spare(spa_t *, uint64_t guid);
+extern uint64_t bp_get_dasize(spa_t *spa, const blkptr_t *bp);
 
 /* error handling */
 struct zbookmark;
diff --git a/usr/src/uts/common/fs/zfs/sys/spa_impl.h b/usr/src/uts/common/fs/zfs/sys/spa_impl.h
index e4df4c9eab..9a2fea9c21 100644
--- a/usr/src/uts/common/fs/zfs/sys/spa_impl.h
+++ b/usr/src/uts/common/fs/zfs/sys/spa_impl.h
@@ -65,7 +65,6 @@ struct spa {
 	nvlist_t	*spa_config;		/* last synced config */
 	nvlist_t	*spa_config_syncing;	/* currently syncing config */
 	uint64_t	spa_config_txg;		/* txg of last config change */
-	spa_config_lock_t spa_config_lock;	/* configuration changes */
 	kmutex_t	spa_config_cache_lock;	/* for spa_config RW_READER */
 	int		spa_sync_pass;		/* iterate-to-convergence */
 	int		spa_state;		/* pool state */
@@ -84,6 +83,11 @@ struct spa {
 	txg_list_t	spa_vdev_txg_list;	/* per-txg dirty vdev list */
 	vdev_t		*spa_root_vdev;		/* top-level vdev container */
 	list_t		spa_dirty_list;		/* vdevs with dirty labels */
+	uint64_t	spa_spares_object;	/* MOS object for spare list */
+	nvlist_t	*spa_sparelist;		/* cached spare config */
+	vdev_t		**spa_spares;		/* available hot spares */
+	int		spa_nspares;		/* number of hot spares */
+	boolean_t	spa_sync_spares;	/* sync the spares list */
 	uint64_t	spa_config_object;	/* MOS object for pool config */
 	uint64_t	spa_syncing_txg;	/* txg currently syncing */
 	uint64_t	spa_sync_bplist_obj;	/* object for deferred frees */
@@ -122,11 +126,13 @@ struct spa {
 	kmutex_t	spa_errlist_lock;	/* error list/ereport lock */
 	avl_tree_t	spa_errlist_last;	/* last error list */
 	avl_tree_t	spa_errlist_scrub;	/* scrub error list */
+	uint64_t	spa_deflate;		/* should we deflate? */
 	/*
 	 * spa_refcnt must be the last element because it changes size based on
 	 * compilation options.  In order for the MDB module to function
 	 * correctly, the other fields must remain in the same location.
 	 */
+	spa_config_lock_t spa_config_lock;	/* configuration changes */
 	refcount_t	spa_refcount;		/* number of opens */
 };
 
diff --git a/usr/src/uts/common/fs/zfs/sys/vdev.h b/usr/src/uts/common/fs/zfs/sys/vdev.h
index 5a2e6750a0..760aeae560 100644
--- a/usr/src/uts/common/fs/zfs/sys/vdev.h
+++ b/usr/src/uts/common/fs/zfs/sys/vdev.h
@@ -60,9 +60,10 @@ typedef struct vdev_knob {
 extern int vdev_open(vdev_t *);
 extern int vdev_validate(vdev_t *);
 extern void vdev_close(vdev_t *);
-extern int vdev_create(vdev_t *, uint64_t txg);
+extern int vdev_create(vdev_t *, uint64_t txg, boolean_t isreplace);
 extern void vdev_init(vdev_t *, uint64_t txg);
 extern void vdev_reopen(vdev_t *);
+extern int vdev_validate_spare(vdev_t *);
 
 extern vdev_t *vdev_lookup_top(spa_t *spa, uint64_t vdev);
 extern vdev_t *vdev_lookup_by_guid(vdev_t *vd, uint64_t guid);
@@ -85,8 +86,8 @@ extern void vdev_propagate_state(vdev_t *vd);
 extern void vdev_set_state(vdev_t *vd, boolean_t isopen, vdev_state_t state,
     vdev_aux_t aux);
 
-extern void vdev_space_update(vdev_t *vd, uint64_t space_delta,
-    uint64_t alloc_delta);
+extern void vdev_space_update(vdev_t *vd, int64_t space_delta,
+    int64_t alloc_delta);
 
 extern uint64_t vdev_psize_to_asize(vdev_t *vd, uint64_t psize);
 
@@ -116,7 +117,8 @@ extern void vdev_config_dirty(vdev_t *vd);
 extern void vdev_config_clean(vdev_t *vd);
 extern int vdev_config_sync(vdev_t *vd, uint64_t txg);
 
-extern nvlist_t *vdev_config_generate(vdev_t *vd, int getstats);
+extern nvlist_t *vdev_config_generate(spa_t *spa, vdev_t *vd,
+    boolean_t getstats, boolean_t isspare);
 
 /*
  * Label routines
@@ -125,7 +127,8 @@ struct uberblock;
 extern uint64_t vdev_label_offset(uint64_t psize, int l, uint64_t offset);
 extern nvlist_t *vdev_label_read_config(vdev_t *vd);
 extern void vdev_uberblock_load(zio_t *zio, vdev_t *vd, struct uberblock *ub);
-int vdev_label_init(vdev_t *vd, uint64_t create_txg);
+int vdev_label_init(vdev_t *vd, uint64_t create_txg, boolean_t isreplacing);
+int vdev_label_spare(vdev_t *vd, uint64_t create_txg);
 
 #ifdef	__cplusplus
 }
diff --git a/usr/src/uts/common/fs/zfs/sys/vdev_impl.h b/usr/src/uts/common/fs/zfs/sys/vdev_impl.h
index 1b18df8cda..75e642a495 100644
--- a/usr/src/uts/common/fs/zfs/sys/vdev_impl.h
+++ b/usr/src/uts/common/fs/zfs/sys/vdev_impl.h
@@ -153,6 +153,7 @@ struct vdev {
 	txg_node_t	vdev_txg_node;	/* per-txg dirty vdev linkage	*/
 	uint8_t		vdev_reopen_wanted; /* async reopen wanted?	*/
 	list_node_t	vdev_dirty_node; /* config dirty list		*/
+	uint64_t	vdev_deflate_ratio; /* deflation ratio (x512)	*/
 
 	/*
 	 * Leaf vdev state.
@@ -162,6 +163,7 @@ struct vdev {
 	txg_node_t	vdev_dtl_node;	/* per-txg dirty DTL linkage	*/
 	uint64_t	vdev_wholedisk;	/* true if this is a whole disk */
 	uint64_t	vdev_offline;	/* device taken offline?	*/
+	uint64_t	vdev_nparity;	/* number of parity devices for raidz */
 	char		*vdev_path;	/* vdev path (if any)		*/
 	char		*vdev_devid;	/* vdev devid (if any)		*/
 	uint64_t	vdev_fault_arg; /* fault injection paramater	*/
@@ -170,6 +172,7 @@ struct vdev {
 	uint8_t		vdev_cache_active; /* vdev_cache and vdev_queue	*/
 	uint8_t		vdev_tmpoffline; /* device taken offline temporarily? */
 	uint8_t		vdev_detached;	/* device detached?		*/
+	uint64_t	vdev_isspare;	/* was a hot spare */
 	vdev_queue_t	vdev_queue;	/* I/O deadline schedule queue	*/
 	vdev_cache_t	vdev_cache;	/* physical block cache		*/
 	uint64_t	vdev_not_present; /* not present during import	*/
@@ -245,12 +248,13 @@ typedef struct vdev_label {
 
 #define	VDEV_ALLOC_LOAD		0
 #define	VDEV_ALLOC_ADD		1
+#define	VDEV_ALLOC_SPARE	2
 
 /*
  * Allocate or free a vdev
  */
-extern vdev_t *vdev_alloc(spa_t *spa, nvlist_t *config, vdev_t *parent,
-    uint_t id, int alloctype);
+extern int vdev_alloc(spa_t *spa, vdev_t **vdp, nvlist_t *config,
+    vdev_t *parent, uint_t id, int alloctype);
 extern void vdev_free(vdev_t *vd);
 
 /*
@@ -280,6 +284,7 @@ extern vdev_ops_t vdev_raidz_ops;
 extern vdev_ops_t vdev_disk_ops;
 extern vdev_ops_t vdev_file_ops;
 extern vdev_ops_t vdev_missing_ops;
+extern vdev_ops_t vdev_spare_ops;
 
 /*
  * Common size functions
diff --git a/usr/src/uts/common/fs/zfs/vdev.c b/usr/src/uts/common/fs/zfs/vdev.c
index 726852cb4d..0bbd073fd7 100644
--- a/usr/src/uts/common/fs/zfs/vdev.c
+++ b/usr/src/uts/common/fs/zfs/vdev.c
@@ -18,6 +18,7 @@
  *
  * CDDL HEADER END
  */
+
 /*
  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
@@ -49,6 +50,7 @@ static vdev_ops_t *vdev_ops_table[] = {
 	&vdev_raidz_ops,
 	&vdev_mirror_ops,
 	&vdev_replacing_ops,
+	&vdev_spare_ops,
 	&vdev_disk_ops,
 	&vdev_file_ops,
 	&vdev_missing_ops,
@@ -324,6 +326,9 @@ vdev_free_common(vdev_t *vd)
 	if (vd->vdev_devid)
 		spa_strfree(vd->vdev_devid);
 
+	if (vd->vdev_isspare)
+		spa_spare_remove(vd->vdev_guid);
+
 	txg_list_destroy(&vd->vdev_ms_list);
 	txg_list_destroy(&vd->vdev_dtl_list);
 	mutex_enter(&vd->vdev_dtl_lock);
@@ -345,8 +350,9 @@ vdev_free_common(vdev_t *vd)
  * creating a new vdev or loading an existing one - the behavior is slightly
  * different for each case.
  */
-vdev_t *
-vdev_alloc(spa_t *spa, nvlist_t *nv, vdev_t *parent, uint_t id, int alloctype)
+int
+vdev_alloc(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, uint_t id,
+    int alloctype)
 {
 	vdev_ops_t *ops;
 	char *type;
@@ -356,10 +362,10 @@ vdev_alloc(spa_t *spa, nvlist_t *nv, vdev_t *parent, uint_t id, int alloctype)
 	ASSERT(spa_config_held(spa, RW_WRITER));
 
 	if (nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) != 0)
-		return (NULL);
+		return (EINVAL);
 
 	if ((ops = vdev_getops(type)) == NULL)
-		return (NULL);
+		return (EINVAL);
 
 	/*
 	 * If this is a load, get the vdev guid from the nvlist.
@@ -370,12 +376,21 @@ vdev_alloc(spa_t *spa, nvlist_t *nv, vdev_t *parent, uint_t id, int alloctype)
 
 		if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_ID, &label_id) ||
 		    label_id != id)
-			return (NULL);
+			return (EINVAL);
 
 		if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &guid) != 0)
-			return (NULL);
+			return (EINVAL);
+	} else if (alloctype == VDEV_ALLOC_SPARE) {
+		if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &guid) != 0)
+			return (EINVAL);
 	}
 
+	/*
+	 * The first allocated vdev must be of type 'root'.
+	 */
+	if (ops != &vdev_root_ops && spa->spa_root_vdev == NULL)
+		return (EINVAL);
+
 	vd = vdev_alloc_common(spa, id, guid, ops);
 
 	if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &vd->vdev_path) == 0)
@@ -384,6 +399,41 @@ vdev_alloc(spa_t *spa, nvlist_t *nv, vdev_t *parent, uint_t id, int alloctype)
 		vd->vdev_devid = spa_strdup(vd->vdev_devid);
 
 	/*
+	 * Set the nparity propery for RAID-Z vdevs.
+	 */
+	if (ops == &vdev_raidz_ops) {
+		if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NPARITY,
+		    &vd->vdev_nparity) == 0) {
+			/*
+			 * Currently, we can only support 2 parity devices.
+			 */
+			if (vd->vdev_nparity > 2)
+				return (EINVAL);
+			/*
+			 * Older versions can only support 1 parity device.
+			 */
+			if (vd->vdev_nparity == 2 &&
+			    spa_version(spa) < ZFS_VERSION_RAID6)
+				return (ENOTSUP);
+
+		} else {
+			/*
+			 * We require the parity to be specified for SPAs that
+			 * support multiple parity levels.
+			 */
+			if (spa_version(spa) >= ZFS_VERSION_RAID6)
+				return (EINVAL);
+
+			/*
+			 * Otherwise, we default to 1 parity device for RAID-Z.
+			 */
+			vd->vdev_nparity = 1;
+		}
+	} else {
+		vd->vdev_nparity = 0;
+	}
+
+	/*
 	 * Set the whole_disk property.  If it's not specified, leave the value
 	 * as -1.
 	 */
@@ -404,6 +454,15 @@ vdev_alloc(spa_t *spa, nvlist_t *nv, vdev_t *parent, uint_t id, int alloctype)
 	(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_ASHIFT, &vd->vdev_ashift);
 
 	/*
+	 * Look for the 'is_spare' flag.  If this is the case, then we are a
+	 * repurposed hot spare.
+	 */
+	(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_IS_SPARE,
+	    &vd->vdev_isspare);
+	if (vd->vdev_isspare)
+		spa_spare_add(vd->vdev_guid);
+
+	/*
 	 * If we're a top-level vdev, try to load the allocation parameters.
 	 */
 	if (parent && !parent->vdev_parent && alloctype == VDEV_ALLOC_LOAD) {
@@ -430,7 +489,9 @@ vdev_alloc(spa_t *spa, nvlist_t *nv, vdev_t *parent, uint_t id, int alloctype)
 	 */
 	vdev_add_child(parent, vd);
 
-	return (vd);
+	*vdp = vd;
+
+	return (0);
 }
 
 void
@@ -462,6 +523,7 @@ vdev_free(vdev_t *vd)
 		vdev_metaslab_fini(vd);
 
 	ASSERT3U(vd->vdev_stat.vs_space, ==, 0);
+	ASSERT3U(vd->vdev_stat.vs_dspace, ==, 0);
 	ASSERT3U(vd->vdev_stat.vs_alloc, ==, 0);
 
 	/*
@@ -506,9 +568,11 @@ vdev_top_transfer(vdev_t *svd, vdev_t *tvd)
 
 	tvd->vdev_stat.vs_alloc = svd->vdev_stat.vs_alloc;
 	tvd->vdev_stat.vs_space = svd->vdev_stat.vs_space;
+	tvd->vdev_stat.vs_dspace = svd->vdev_stat.vs_dspace;
 
 	svd->vdev_stat.vs_alloc = 0;
 	svd->vdev_stat.vs_space = 0;
+	svd->vdev_stat.vs_dspace = 0;
 
 	for (t = 0; t < TXG_SIZE; t++) {
 		while ((msp = txg_list_remove(&svd->vdev_ms_list, t)) != NULL)
@@ -526,6 +590,9 @@ vdev_top_transfer(vdev_t *svd, vdev_t *tvd)
 
 	tvd->vdev_reopen_wanted = svd->vdev_reopen_wanted;
 	svd->vdev_reopen_wanted = 0;
+
+	tvd->vdev_deflate_ratio = svd->vdev_deflate_ratio;
+	svd->vdev_deflate_ratio = 0;
 }
 
 static void
@@ -585,13 +652,28 @@ vdev_remove_parent(vdev_t *cvd)
 
 	ASSERT(mvd->vdev_children == 1);
 	ASSERT(mvd->vdev_ops == &vdev_mirror_ops ||
-	    mvd->vdev_ops == &vdev_replacing_ops);
+	    mvd->vdev_ops == &vdev_replacing_ops ||
+	    mvd->vdev_ops == &vdev_spare_ops);
 	cvd->vdev_ashift = mvd->vdev_ashift;
 
 	vdev_remove_child(mvd, cvd);
 	vdev_remove_child(pvd, mvd);
 	cvd->vdev_id = mvd->vdev_id;
 	vdev_add_child(pvd, cvd);
+	/*
+	 * If we created a new toplevel vdev, then we need to change the child's
+	 * vdev GUID to match the old toplevel vdev.  Otherwise, we could have
+	 * detached an offline device, and when we go to import the pool we'll
+	 * think we have two toplevel vdevs, instead of a different version of
+	 * the same toplevel vdev.
+	 */
+	if (cvd->vdev_top == cvd) {
+		pvd->vdev_guid_sum -= cvd->vdev_guid;
+		cvd->vdev_guid_sum -= cvd->vdev_guid;
+		cvd->vdev_guid = mvd->vdev_guid;
+		cvd->vdev_guid_sum += mvd->vdev_guid;
+		pvd->vdev_guid_sum += cvd->vdev_guid;
+	}
 	vdev_top_update(cvd->vdev_top, cvd->vdev_top);
 
 	if (cvd == cvd->vdev_top)
@@ -801,6 +883,18 @@ vdev_open(vdev_t *vd)
 	}
 
 	/*
+	 * If this is a top-level vdev, compute the raidz-deflation
+	 * ratio.  Note, we hard-code in 128k (1<<17) because it is the
+	 * current "typical" blocksize.  Even if SPA_MAXBLOCKSIZE
+	 * changes, this algorithm must never change, or we will
+	 * inconsistently account for existing bp's.
+	 */
+	if (vd->vdev_top == vd) {
+		vd->vdev_deflate_ratio = (1<<17) /
+		    (vdev_psize_to_asize(vd, 1<<17) >> SPA_MINBLOCKSHIFT);
+	}
+
+	/*
 	 * This allows the ZFS DE to close cases appropriately.  If a device
 	 * goes away and later returns, we want to close the associated case.
 	 * But it's not enough to simply post this only when a device goes from
@@ -933,7 +1027,7 @@ vdev_reopen(vdev_t *vd)
 }
 
 int
-vdev_create(vdev_t *vd, uint64_t txg)
+vdev_create(vdev_t *vd, uint64_t txg, boolean_t isreplacing)
 {
 	int error;
 
@@ -952,7 +1046,7 @@ vdev_create(vdev_t *vd, uint64_t txg)
 	/*
 	 * Recursively initialize all labels.
 	 */
-	if ((error = vdev_label_init(vd, txg)) != 0) {
+	if ((error = vdev_label_init(vd, txg, isreplacing)) != 0) {
 		vdev_close(vd);
 		return (error);
 	}
@@ -1202,6 +1296,45 @@ vdev_load(vdev_t *vd)
 		    VDEV_AUX_CORRUPT_DATA);
 }
 
+/*
+ * This special case of vdev_spare() is used for hot spares.  It's sole purpose
+ * it to set the vdev state for the associated vdev.  To do this, we make sure
+ * that we can open the underlying device, then try to read the label, and make
+ * sure that the label is sane and that it hasn't been repurposed to another
+ * pool.
+ */
+int
+vdev_validate_spare(vdev_t *vd)
+{
+	nvlist_t *label;
+	uint64_t guid, version;
+	uint64_t state;
+
+	if ((label = vdev_label_read_config(vd)) == NULL) {
+		vdev_set_state(vd, B_TRUE, VDEV_STATE_CANT_OPEN,
+		    VDEV_AUX_CORRUPT_DATA);
+		return (-1);
+	}
+
+	if (nvlist_lookup_uint64(label, ZPOOL_CONFIG_VERSION, &version) != 0 ||
+	    version > ZFS_VERSION ||
+	    nvlist_lookup_uint64(label, ZPOOL_CONFIG_GUID, &guid) != 0 ||
+	    guid != vd->vdev_guid ||
+	    nvlist_lookup_uint64(label, ZPOOL_CONFIG_POOL_STATE, &state) != 0) {
+		vdev_set_state(vd, B_TRUE, VDEV_STATE_CANT_OPEN,
+		    VDEV_AUX_CORRUPT_DATA);
+		nvlist_free(label);
+		return (-1);
+	}
+
+	/*
+	 * We don't actually check the pool state here.  If it's in fact in
+	 * use by another pool, we update this fact on the fly when requested.
+	 */
+	nvlist_free(label);
+	return (0);
+}
+
 void
 vdev_sync_done(vdev_t *vd, uint64_t txg)
 {
@@ -1560,14 +1693,31 @@ vdev_scrub_stat_update(vdev_t *vd, pool_scrub_type_t type, boolean_t complete)
  * Update the in-core space usage stats for this vdev and the root vdev.
  */
 void
-vdev_space_update(vdev_t *vd, uint64_t space_delta, uint64_t alloc_delta)
+vdev_space_update(vdev_t *vd, int64_t space_delta, int64_t alloc_delta)
 {
 	ASSERT(vd == vd->vdev_top);
+	int64_t dspace_delta = space_delta;
 
 	do {
+		if (vd->vdev_ms_count) {
+			/*
+			 * If this is a top-level vdev, apply the
+			 * inverse of its psize-to-asize (ie. RAID-Z)
+			 * space-expansion factor.  We must calculate
+			 * this here and not at the root vdev because
+			 * the root vdev's psize-to-asize is simply the
+			 * max of its childrens', thus not accurate
+			 * enough for us.
+			 */
+			ASSERT((dspace_delta & (SPA_MINBLOCKSIZE-1)) == 0);
+			dspace_delta = (dspace_delta >> SPA_MINBLOCKSHIFT) *
+			    vd->vdev_deflate_ratio;
+		}
+
 		mutex_enter(&vd->vdev_stat_lock);
 		vd->vdev_stat.vs_space += space_delta;
 		vd->vdev_stat.vs_alloc += alloc_delta;
+		vd->vdev_stat.vs_dspace += dspace_delta;
 		mutex_exit(&vd->vdev_stat_lock);
 	} while ((vd = vd->vdev_parent) != NULL);
 }
diff --git a/usr/src/uts/common/fs/zfs/vdev_label.c b/usr/src/uts/common/fs/zfs/vdev_label.c
index 4627745067..335b3e5a36 100644
--- a/usr/src/uts/common/fs/zfs/vdev_label.c
+++ b/usr/src/uts/common/fs/zfs/vdev_label.c
@@ -187,7 +187,8 @@ vdev_label_write(zio_t *zio, vdev_t *vd, int l, void *buf, uint64_t offset,
  * Generate the nvlist representing this vdev's config.
  */
 nvlist_t *
-vdev_config_generate(vdev_t *vd, int getstats)
+vdev_config_generate(spa_t *spa, vdev_t *vd, boolean_t getstats,
+    boolean_t isspare)
 {
 	nvlist_t *nv = NULL;
 
@@ -195,7 +196,9 @@ vdev_config_generate(vdev_t *vd, int getstats)
 
 	VERIFY(nvlist_add_string(nv, ZPOOL_CONFIG_TYPE,
 	    vd->vdev_ops->vdev_op_type) == 0);
-	VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_ID, vd->vdev_id) == 0);
+	if (!isspare)
+		VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_ID, vd->vdev_id)
+		    == 0);
 	VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_GUID, vd->vdev_guid) == 0);
 
 	if (vd->vdev_path != NULL)
@@ -206,6 +209,27 @@ vdev_config_generate(vdev_t *vd, int getstats)
 		VERIFY(nvlist_add_string(nv, ZPOOL_CONFIG_DEVID,
 		    vd->vdev_devid) == 0);
 
+	if (vd->vdev_nparity != 0) {
+		ASSERT(strcmp(vd->vdev_ops->vdev_op_type,
+		    VDEV_TYPE_RAIDZ) == 0);
+
+		/*
+		 * Make sure someone hasn't managed to sneak a fancy new vdev
+		 * into a crufty old storage pool.
+		 */
+		ASSERT(vd->vdev_nparity == 1 ||
+		    (vd->vdev_nparity == 2 &&
+		    spa_version(spa) >= ZFS_VERSION_RAID6));
+
+		/*
+		 * Note that we'll add the nparity tag even on storage pools
+		 * that only support a single parity device -- older software
+		 * will just ignore it.
+		 */
+		VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_NPARITY,
+		    vd->vdev_nparity) == 0);
+	}
+
 	if (vd->vdev_wholedisk != -1ULL)
 		VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK,
 		    vd->vdev_wholedisk) == 0);
@@ -213,7 +237,10 @@ vdev_config_generate(vdev_t *vd, int getstats)
 	if (vd->vdev_not_present)
 		VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_NOT_PRESENT, 1) == 0);
 
-	if (vd == vd->vdev_top) {
+	if (vd->vdev_isspare)
+		VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_IS_SPARE, 1) == 0);
+
+	if (!isspare && vd == vd->vdev_top) {
 		VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_METASLAB_ARRAY,
 		    vd->vdev_ms_array) == 0);
 		VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_METASLAB_SHIFT,
@@ -243,8 +270,8 @@ vdev_config_generate(vdev_t *vd, int getstats)
 		    KM_SLEEP);
 
 		for (c = 0; c < vd->vdev_children; c++)
-			child[c] = vdev_config_generate(vd->vdev_child[c],
-			    getstats);
+			child[c] = vdev_config_generate(spa, vd->vdev_child[c],
+			    getstats, isspare);
 
 		VERIFY(nvlist_add_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
 		    child, vd->vdev_children) == 0);
@@ -307,8 +334,9 @@ vdev_label_read_config(vdev_t *vd)
 	return (config);
 }
 
-int
-vdev_label_init(vdev_t *vd, uint64_t crtxg)
+static int
+vdev_label_common(vdev_t *vd, uint64_t crtxg, boolean_t isspare,
+    boolean_t isreplacing)
 {
 	spa_t *spa = vd->vdev_spa;
 	nvlist_t *label;
@@ -324,7 +352,8 @@ vdev_label_init(vdev_t *vd, uint64_t crtxg)
 	ASSERT(spa_config_held(spa, RW_WRITER));
 
 	for (c = 0; c < vd->vdev_children; c++)
-		if ((error = vdev_label_init(vd->vdev_child[c], crtxg)) != 0)
+		if ((error = vdev_label_common(vd->vdev_child[c],
+		    crtxg, isspare, isreplacing)) != 0)
 			return (error);
 
 	if (!vd->vdev_ops->vdev_op_leaf)
@@ -346,7 +375,7 @@ vdev_label_init(vdev_t *vd, uint64_t crtxg)
 	 */
 	if (crtxg != 0 &&
 	    (label = vdev_label_read_config(vd)) != NULL) {
-		uint64_t state, pool_guid, device_guid, txg;
+		uint64_t state, pool_guid, device_guid, txg, spare;
 		uint64_t mycrtxg = 0;
 
 		(void) nvlist_lookup_uint64(label, ZPOOL_CONFIG_CREATE_TXG,
@@ -361,11 +390,61 @@ vdev_label_init(vdev_t *vd, uint64_t crtxg)
 		    spa_guid_exists(pool_guid, device_guid) &&
 		    nvlist_lookup_uint64(label, ZPOOL_CONFIG_POOL_TXG,
 		    &txg) == 0 && (txg != 0 || mycrtxg == crtxg)) {
-			dprintf("vdev %s in use, pool_state %d\n",
-			    vdev_description(vd), state);
+			if (isspare && pool_guid != spa_guid(spa) &&
+			    nvlist_lookup_uint64(label,
+			    ZPOOL_CONFIG_IS_SPARE, &spare) == 0 &&
+			    !spa_has_spare(spa, device_guid)) {
+				/*
+				 * If this is a request to add a spare that
+				 * is actively in use in another pool, simply
+				 * return success, after updating the guid.
+				 */
+				vdev_t *pvd = vd->vdev_parent;
+
+				for (; pvd != NULL; pvd = pvd->vdev_parent) {
+					pvd->vdev_guid_sum -= vd->vdev_guid;
+					pvd->vdev_guid_sum += device_guid;
+				}
+
+				vd->vdev_guid = vd->vdev_guid_sum = device_guid;
+				nvlist_free(label);
+				return (0);
+			}
 			nvlist_free(label);
 			return (EBUSY);
 		}
+
+		/*
+		 * If this device is reserved as a hot spare for this pool,
+		 * adopt its GUID, and mark it as such.  This way we preserve
+		 * the fact that it is a hot spare even as it is added and
+		 * removed from the pool.
+		 */
+		if (nvlist_lookup_uint64(label, ZPOOL_CONFIG_POOL_STATE,
+		    &state) == 0 && state == POOL_STATE_SPARE &&
+		    nvlist_lookup_uint64(label, ZPOOL_CONFIG_GUID,
+		    &device_guid) == 0) {
+			vdev_t *pvd = vd->vdev_parent;
+
+			if ((isspare || !isreplacing) &&
+			    spa_has_spare(spa, device_guid)) {
+				nvlist_free(label);
+				return (EBUSY);
+			}
+
+			for (; pvd != NULL; pvd = pvd->vdev_parent) {
+				pvd->vdev_guid_sum -= vd->vdev_guid;
+				pvd->vdev_guid_sum += device_guid;
+			}
+
+			vd->vdev_guid = vd->vdev_guid_sum = device_guid;
+
+			if (!isspare) {
+				vd->vdev_isspare = B_TRUE;
+				spa_spare_add(vd->vdev_guid);
+			}
+		}
+
 		nvlist_free(label);
 	}
 
@@ -380,14 +459,35 @@ vdev_label_init(vdev_t *vd, uint64_t crtxg)
 	 * We mark it as being from txg 0 to indicate that it's not
 	 * really part of an active pool just yet.  The labels will
 	 * be written again with a meaningful txg by spa_sync().
+	 *
+	 * For hot spares, we generate a special label that identifies as a
+	 * mutually shared hot spare.  If this is being added as a hot spare,
+	 * always write out the spare label.  If this was a hot spare, then
+	 * always label it as such.  If we are adding the vdev, it will remain
+	 * labelled in this state until it's really added to the config.  If we
+	 * are removing the vdev or destroying the pool, then it goes back to
+	 * its original hot spare state.
 	 */
-	label = spa_config_generate(spa, vd, 0ULL, B_FALSE);
-
-	/*
-	 * Add our creation time.  This allows us to detect multiple vdev
-	 * uses as described above, and automatically expires if we fail.
-	 */
-	VERIFY(nvlist_add_uint64(label, ZPOOL_CONFIG_CREATE_TXG, crtxg) == 0);
+	if (isspare || vd->vdev_isspare) {
+		VERIFY(nvlist_alloc(&label, NV_UNIQUE_NAME, KM_SLEEP) == 0);
+
+		VERIFY(nvlist_add_uint64(label, ZPOOL_CONFIG_VERSION,
+		    spa_version(spa)) == 0);
+		VERIFY(nvlist_add_uint64(label, ZPOOL_CONFIG_POOL_STATE,
+		    POOL_STATE_SPARE) == 0);
+		VERIFY(nvlist_add_uint64(label, ZPOOL_CONFIG_GUID,
+		    vd->vdev_guid) == 0);
+	} else {
+		label = spa_config_generate(spa, vd, 0ULL, B_FALSE);
+
+		/*
+		 * Add our creation time.  This allows us to detect multiple
+		 * vdev uses as described above, and automatically expires if we
+		 * fail.
+		 */
+		VERIFY(nvlist_add_uint64(label, ZPOOL_CONFIG_CREATE_TXG,
+		    crtxg) == 0);
+	}
 
 	buf = vp->vp_nvlist;
 	buflen = sizeof (vp->vp_nvlist);
@@ -449,6 +549,22 @@ vdev_label_init(vdev_t *vd, uint64_t crtxg)
 	return (error);
 }
 
+int
+vdev_label_init(vdev_t *vd, uint64_t crtxg, boolean_t isreplacing)
+{
+	return (vdev_label_common(vd, crtxg, B_FALSE, isreplacing));
+}
+
+/*
+ * Label a disk as a hot spare.  A hot spare label is a special label with only
+ * the following members: version, pool_state, and guid.
+ */
+int
+vdev_label_spare(vdev_t *vd, uint64_t crtxg)
+{
+	return (vdev_label_common(vd, crtxg, B_TRUE, B_FALSE));
+}
+
 /*
  * ==========================================================================
  * uberblock load/sync
diff --git a/usr/src/uts/common/fs/zfs/vdev_mirror.c b/usr/src/uts/common/fs/zfs/vdev_mirror.c
index eb3f0a862d..14a6ce7e6e 100644
--- a/usr/src/uts/common/fs/zfs/vdev_mirror.c
+++ b/usr/src/uts/common/fs/zfs/vdev_mirror.c
@@ -85,6 +85,7 @@ vdev_mirror_map_alloc(zio_t *zio)
 
 		for (c = 0; c < mm->mm_children; c++) {
 			mc = &mm->mm_child[c];
+
 			mc->mc_vd = vdev_lookup_top(spa, DVA_GET_VDEV(&dva[c]));
 			mc->mc_offset = DVA_GET_OFFSET(&dva[c]);
 		}
@@ -93,7 +94,8 @@ vdev_mirror_map_alloc(zio_t *zio)
 
 		mm = kmem_zalloc(offsetof(mirror_map_t, mm_child[c]), KM_SLEEP);
 		mm->mm_children = c;
-		mm->mm_replacing = (vd->vdev_ops == &vdev_replacing_ops);
+		mm->mm_replacing = (vd->vdev_ops == &vdev_replacing_ops ||
+		    vd->vdev_ops == &vdev_spare_ops);
 		mm->mm_preferred = mm->mm_replacing ? 0 : spa_get_random(c);
 		mm->mm_root = B_FALSE;
 
@@ -477,3 +479,14 @@ vdev_ops_t vdev_replacing_ops = {
 	VDEV_TYPE_REPLACING,	/* name of this vdev type */
 	B_FALSE			/* not a leaf vdev */
 };
+
+vdev_ops_t vdev_spare_ops = {
+	vdev_mirror_open,
+	vdev_mirror_close,
+	vdev_default_asize,
+	vdev_mirror_io_start,
+	vdev_mirror_io_done,
+	vdev_mirror_state_change,
+	VDEV_TYPE_SPARE,	/* name of this vdev type */
+	B_FALSE			/* not a leaf vdev */
+};
diff --git a/usr/src/uts/common/fs/zfs/vdev_raidz.c b/usr/src/uts/common/fs/zfs/vdev_raidz.c
index 33225de39b..3afeab0aef 100644
--- a/usr/src/uts/common/fs/zfs/vdev_raidz.c
+++ b/usr/src/uts/common/fs/zfs/vdev_raidz.c
@@ -18,6 +18,7 @@
  *
  * CDDL HEADER END
  */
+
 /*
  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
@@ -35,49 +36,178 @@
 
 /*
  * Virtual device vector for RAID-Z.
+ *
+ * This vdev supports both single and double parity. For single parity, we
+ * use a simple XOR of all the data columns. For double parity, we use both
+ * the simple XOR as well as a technique described in "The mathematics of
+ * RAID-6" by H. Peter Anvin. This technique defines a Galois field, GF(2^8),
+ * over the integers expressable in a single byte. Briefly, the operations on
+ * the field are defined as follows:
+ *
+ *   o addition (+) is represented by a bitwise XOR
+ *   o subtraction (-) is therefore identical to addition: A + B = A - B
+ *   o multiplication of A by 2 is defined by the following bitwise expression:
+ *	(A * 2)_7 = A_6
+ *	(A * 2)_6 = A_5
+ *	(A * 2)_5 = A_4
+ *	(A * 2)_4 = A_3 + A_7
+ *	(A * 2)_3 = A_2 + A_7
+ *	(A * 2)_2 = A_1 + A_7
+ *	(A * 2)_1 = A_0
+ *	(A * 2)_0 = A_7
+ *
+ * In C, multiplying by 2 is therefore ((a << 1) ^ ((a & 0x80) ? 0x1d : 0)).
+ *
+ * Observe that any number in the field (except for 0) can be expressed as a
+ * power of 2 -- a generator for the field. We store a table of the powers of
+ * 2 and logs base 2 for quick look ups, and exploit the fact that A * B can
+ * be rewritten as 2^(log_2(A) + log_2(B)) (where '+' is normal addition rather
+ * than field addition). The inverse of a field element A (A^-1) is A^254.
+ *
+ * The two parity columns, P and Q, over several data columns, D_0, ... D_n-1,
+ * can be expressed by field operations:
+ *
+ *	P = D_0 + D_1 + ... + D_n-2 + D_n-1
+ *	Q = 2^n-1 * D_0 + 2^n-2 * D_1 + ... + 2^1 * D_n-2 + 2^0 * D_n-1
+ *	  = ((...((D_0) * 2 + D_1) * 2 + ...) * 2 + D_n-2) * 2 + D_n-1
+ *
+ * See the reconstruction code below for how P and Q can used individually or
+ * in concert to recover missing data columns.
  */
 
-/*
- * We currently allow up to two-way replication (i.e. single-fault
- * reconstruction) models in RAID-Z vdevs.  The blocks in such vdevs
- * must all be multiples of two times the leaf vdev blocksize.
- */
-#define	VDEV_RAIDZ_ALIGN	2ULL
-
 typedef struct raidz_col {
-	uint64_t	rc_col;
-	uint64_t	rc_offset;
-	uint64_t	rc_size;
-	void		*rc_data;
-	int		rc_error;
-	short		rc_tried;
-	short		rc_skipped;
+	uint64_t rc_devidx;		/* child device index for I/O */
+	uint64_t rc_offset;		/* device offset */
+	uint64_t rc_size;		/* I/O size */
+	void *rc_data;			/* I/O data */
+	int rc_error;			/* I/O error for this device */
+	uint8_t rc_tried;		/* Did we attempt this I/O column? */
+	uint8_t rc_skipped;		/* Did we skip this I/O column? */
 } raidz_col_t;
 
 typedef struct raidz_map {
-	uint64_t	rm_cols;
-	uint64_t	rm_bigcols;
-	uint64_t	rm_asize;
-	int		rm_missing_child;
-	int		rm_firstdatacol;
-	raidz_col_t	rm_col[1];
+	uint64_t rm_cols;		/* Column count */
+	uint64_t rm_bigcols;		/* Number of oversized columns */
+	uint64_t rm_asize;		/* Actual total I/O size */
+	uint64_t rm_missingdata;	/* Count of missing data devices */
+	uint64_t rm_missingparity;	/* Count of missing parity devices */
+	uint64_t rm_firstdatacol;	/* First data column/parity count */
+	raidz_col_t rm_col[1];		/* Flexible array of I/O columns */
 } raidz_map_t;
 
+#define	VDEV_RAIDZ_P		0
+#define	VDEV_RAIDZ_Q		1
+
+#define	VDEV_RAIDZ_MAXPARITY	2
+
+#define	VDEV_RAIDZ_MUL_2(a)	(((a) << 1) ^ (((a) & 0x80) ? 0x1d : 0))
+
+/*
+ * These two tables represent powers and logs of 2 in the Galois field defined
+ * above. These values were computed by repeatedly multiplying by 2 as above.
+ */
+static const uint8_t vdev_raidz_pow2[256] = {
+	0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80,
+	0x1d, 0x3a, 0x74, 0xe8, 0xcd, 0x87, 0x13, 0x26,
+	0x4c, 0x98, 0x2d, 0x5a, 0xb4, 0x75, 0xea, 0xc9,
+	0x8f, 0x03, 0x06, 0x0c, 0x18, 0x30, 0x60, 0xc0,
+	0x9d, 0x27, 0x4e, 0x9c, 0x25, 0x4a, 0x94, 0x35,
+	0x6a, 0xd4, 0xb5, 0x77, 0xee, 0xc1, 0x9f, 0x23,
+	0x46, 0x8c, 0x05, 0x0a, 0x14, 0x28, 0x50, 0xa0,
+	0x5d, 0xba, 0x69, 0xd2, 0xb9, 0x6f, 0xde, 0xa1,
+	0x5f, 0xbe, 0x61, 0xc2, 0x99, 0x2f, 0x5e, 0xbc,
+	0x65, 0xca, 0x89, 0x0f, 0x1e, 0x3c, 0x78, 0xf0,
+	0xfd, 0xe7, 0xd3, 0xbb, 0x6b, 0xd6, 0xb1, 0x7f,
+	0xfe, 0xe1, 0xdf, 0xa3, 0x5b, 0xb6, 0x71, 0xe2,
+	0xd9, 0xaf, 0x43, 0x86, 0x11, 0x22, 0x44, 0x88,
+	0x0d, 0x1a, 0x34, 0x68, 0xd0, 0xbd, 0x67, 0xce,
+	0x81, 0x1f, 0x3e, 0x7c, 0xf8, 0xed, 0xc7, 0x93,
+	0x3b, 0x76, 0xec, 0xc5, 0x97, 0x33, 0x66, 0xcc,
+	0x85, 0x17, 0x2e, 0x5c, 0xb8, 0x6d, 0xda, 0xa9,
+	0x4f, 0x9e, 0x21, 0x42, 0x84, 0x15, 0x2a, 0x54,
+	0xa8, 0x4d, 0x9a, 0x29, 0x52, 0xa4, 0x55, 0xaa,
+	0x49, 0x92, 0x39, 0x72, 0xe4, 0xd5, 0xb7, 0x73,
+	0xe6, 0xd1, 0xbf, 0x63, 0xc6, 0x91, 0x3f, 0x7e,
+	0xfc, 0xe5, 0xd7, 0xb3, 0x7b, 0xf6, 0xf1, 0xff,
+	0xe3, 0xdb, 0xab, 0x4b, 0x96, 0x31, 0x62, 0xc4,
+	0x95, 0x37, 0x6e, 0xdc, 0xa5, 0x57, 0xae, 0x41,
+	0x82, 0x19, 0x32, 0x64, 0xc8, 0x8d, 0x07, 0x0e,
+	0x1c, 0x38, 0x70, 0xe0, 0xdd, 0xa7, 0x53, 0xa6,
+	0x51, 0xa2, 0x59, 0xb2, 0x79, 0xf2, 0xf9, 0xef,
+	0xc3, 0x9b, 0x2b, 0x56, 0xac, 0x45, 0x8a, 0x09,
+	0x12, 0x24, 0x48, 0x90, 0x3d, 0x7a, 0xf4, 0xf5,
+	0xf7, 0xf3, 0xfb, 0xeb, 0xcb, 0x8b, 0x0b, 0x16,
+	0x2c, 0x58, 0xb0, 0x7d, 0xfa, 0xe9, 0xcf, 0x83,
+	0x1b, 0x36, 0x6c, 0xd8, 0xad, 0x47, 0x8e, 0x01
+};
+static const uint8_t vdev_raidz_log2[256] = {
+	0x00, 0x00, 0x01, 0x19, 0x02, 0x32, 0x1a, 0xc6,
+	0x03, 0xdf, 0x33, 0xee, 0x1b, 0x68, 0xc7, 0x4b,
+	0x04, 0x64, 0xe0, 0x0e, 0x34, 0x8d, 0xef, 0x81,
+	0x1c, 0xc1, 0x69, 0xf8, 0xc8, 0x08, 0x4c, 0x71,
+	0x05, 0x8a, 0x65, 0x2f, 0xe1, 0x24, 0x0f, 0x21,
+	0x35, 0x93, 0x8e, 0xda, 0xf0, 0x12, 0x82, 0x45,
+	0x1d, 0xb5, 0xc2, 0x7d, 0x6a, 0x27, 0xf9, 0xb9,
+	0xc9, 0x9a, 0x09, 0x78, 0x4d, 0xe4, 0x72, 0xa6,
+	0x06, 0xbf, 0x8b, 0x62, 0x66, 0xdd, 0x30, 0xfd,
+	0xe2, 0x98, 0x25, 0xb3, 0x10, 0x91, 0x22, 0x88,
+	0x36, 0xd0, 0x94, 0xce, 0x8f, 0x96, 0xdb, 0xbd,
+	0xf1, 0xd2, 0x13, 0x5c, 0x83, 0x38, 0x46, 0x40,
+	0x1e, 0x42, 0xb6, 0xa3, 0xc3, 0x48, 0x7e, 0x6e,
+	0x6b, 0x3a, 0x28, 0x54, 0xfa, 0x85, 0xba, 0x3d,
+	0xca, 0x5e, 0x9b, 0x9f, 0x0a, 0x15, 0x79, 0x2b,
+	0x4e, 0xd4, 0xe5, 0xac, 0x73, 0xf3, 0xa7, 0x57,
+	0x07, 0x70, 0xc0, 0xf7, 0x8c, 0x80, 0x63, 0x0d,
+	0x67, 0x4a, 0xde, 0xed, 0x31, 0xc5, 0xfe, 0x18,
+	0xe3, 0xa5, 0x99, 0x77, 0x26, 0xb8, 0xb4, 0x7c,
+	0x11, 0x44, 0x92, 0xd9, 0x23, 0x20, 0x89, 0x2e,
+	0x37, 0x3f, 0xd1, 0x5b, 0x95, 0xbc, 0xcf, 0xcd,
+	0x90, 0x87, 0x97, 0xb2, 0xdc, 0xfc, 0xbe, 0x61,
+	0xf2, 0x56, 0xd3, 0xab, 0x14, 0x2a, 0x5d, 0x9e,
+	0x84, 0x3c, 0x39, 0x53, 0x47, 0x6d, 0x41, 0xa2,
+	0x1f, 0x2d, 0x43, 0xd8, 0xb7, 0x7b, 0xa4, 0x76,
+	0xc4, 0x17, 0x49, 0xec, 0x7f, 0x0c, 0x6f, 0xf6,
+	0x6c, 0xa1, 0x3b, 0x52, 0x29, 0x9d, 0x55, 0xaa,
+	0xfb, 0x60, 0x86, 0xb1, 0xbb, 0xcc, 0x3e, 0x5a,
+	0xcb, 0x59, 0x5f, 0xb0, 0x9c, 0xa9, 0xa0, 0x51,
+	0x0b, 0xf5, 0x16, 0xeb, 0x7a, 0x75, 0x2c, 0xd7,
+	0x4f, 0xae, 0xd5, 0xe9, 0xe6, 0xe7, 0xad, 0xe8,
+	0x74, 0xd6, 0xf4, 0xea, 0xa8, 0x50, 0x58, 0xaf,
+};
+
+/*
+ * Multiply a given number by 2 raised to the given power.
+ */
+static uint8_t
+vdev_raidz_exp2(uint_t a, int exp)
+{
+	if (a == 0)
+		return (0);
+
+	ASSERT(exp >= 0);
+	ASSERT(vdev_raidz_log2[a] > 0 || a == 1);
+
+	exp += vdev_raidz_log2[a];
+	if (exp > 255)
+		exp -= 255;
+
+	return (vdev_raidz_pow2[exp]);
+}
+
 static raidz_map_t *
-vdev_raidz_map_alloc(zio_t *zio, uint64_t unit_shift, uint64_t dcols)
+vdev_raidz_map_alloc(zio_t *zio, uint64_t unit_shift, uint64_t dcols,
+    uint64_t nparity)
 {
 	raidz_map_t *rm;
 	uint64_t b = zio->io_offset >> unit_shift;
 	uint64_t s = zio->io_size >> unit_shift;
 	uint64_t f = b % dcols;
 	uint64_t o = (b / dcols) << unit_shift;
-	uint64_t q, r, c, bc, col, acols, coff;
-	int firstdatacol;
+	uint64_t q, r, c, bc, col, acols, coff, devidx;
 
-	q = s / (dcols - 1);
-	r = s - q * (dcols - 1);
-	bc = r + !!r;
-	firstdatacol = 1;
+	q = s / (dcols - nparity);
+	r = s - q * (dcols - nparity);
+	bc = (r == 0 ? 0 : r + nparity);
 
 	acols = (q == 0 ? bc : dcols);
 
@@ -86,8 +216,9 @@ vdev_raidz_map_alloc(zio_t *zio, uint64_t unit_shift, uint64_t dcols)
 	rm->rm_cols = acols;
 	rm->rm_bigcols = bc;
 	rm->rm_asize = 0;
-	rm->rm_missing_child = -1;
-	rm->rm_firstdatacol = firstdatacol;
+	rm->rm_missingdata = 0;
+	rm->rm_missingparity = 0;
+	rm->rm_firstdatacol = nparity;
 
 	for (c = 0; c < acols; c++) {
 		col = f + c;
@@ -96,7 +227,7 @@ vdev_raidz_map_alloc(zio_t *zio, uint64_t unit_shift, uint64_t dcols)
 			col -= dcols;
 			coff += 1ULL << unit_shift;
 		}
-		rm->rm_col[c].rc_col = col;
+		rm->rm_col[c].rc_devidx = col;
 		rm->rm_col[c].rc_offset = coff;
 		rm->rm_col[c].rc_size = (q + (c < bc)) << unit_shift;
 		rm->rm_col[c].rc_data = NULL;
@@ -106,7 +237,7 @@ vdev_raidz_map_alloc(zio_t *zio, uint64_t unit_shift, uint64_t dcols)
 		rm->rm_asize += rm->rm_col[c].rc_size;
 	}
 
-	rm->rm_asize = P2ROUNDUP(rm->rm_asize, VDEV_RAIDZ_ALIGN << unit_shift);
+	rm->rm_asize = roundup(rm->rm_asize, (nparity + 1) << unit_shift);
 
 	for (c = 0; c < rm->rm_firstdatacol; c++)
 		rm->rm_col[c].rc_data = zio_buf_alloc(rm->rm_col[c].rc_size);
@@ -118,18 +249,29 @@ vdev_raidz_map_alloc(zio_t *zio, uint64_t unit_shift, uint64_t dcols)
 		    rm->rm_col[c - 1].rc_size;
 
 	/*
-	 * To prevent hot parity disks, switch the parity and data
-	 * columns every 1MB.
+	 * If all data stored spans all columns, there's a danger that parity
+	 * will always be on the same device and, since parity isn't read
+	 * during normal operation, that that device's I/O bandwidth won't be
+	 * used effectively. We therefore switch the parity every 1MB.
+	 *
+	 * ... at least that was, ostensibly, the theory. As a practical
+	 * matter unless we juggle the parity between all devices evenly, we
+	 * won't see any benefit. Further, occasional writes that aren't a
+	 * multiple of the LCM of the number of children and the minimum
+	 * stripe width are sufficient to avoid pessimal behavior.
+	 * Unfortunately, this decision created an implicit on-disk format
+	 * requirement that we need to support for all eternity (but only for
+	 * RAID-Z with one parity device).
 	 */
 	ASSERT(rm->rm_cols >= 2);
 	ASSERT(rm->rm_col[0].rc_size == rm->rm_col[1].rc_size);
 
-	if (zio->io_offset & (1ULL << 20)) {
-		col = rm->rm_col[0].rc_col;
+	if (rm->rm_firstdatacol == 1 && (zio->io_offset & (1ULL << 20))) {
+		devidx = rm->rm_col[0].rc_devidx;
 		o = rm->rm_col[0].rc_offset;
-		rm->rm_col[0].rc_col = rm->rm_col[1].rc_col;
+		rm->rm_col[0].rc_devidx = rm->rm_col[1].rc_devidx;
 		rm->rm_col[0].rc_offset = rm->rm_col[1].rc_offset;
-		rm->rm_col[1].rc_col = col;
+		rm->rm_col[1].rc_devidx = devidx;
 		rm->rm_col[1].rc_offset = o;
 	}
 
@@ -151,47 +293,284 @@ vdev_raidz_map_free(zio_t *zio)
 }
 
 static void
-vdev_raidz_reconstruct(raidz_map_t *rm, int x)
+vdev_raidz_generate_parity_p(raidz_map_t *rm)
 {
-	uint64_t *dst, *src, count, xsize, csize;
-	int i, c;
+	uint64_t *p, *src, pcount, ccount, i;
+	int c;
+
+	pcount = rm->rm_col[VDEV_RAIDZ_P].rc_size / sizeof (src[0]);
+
+	for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++) {
+		src = rm->rm_col[c].rc_data;
+		p = rm->rm_col[VDEV_RAIDZ_P].rc_data;
+		ccount = rm->rm_col[c].rc_size / sizeof (src[0]);
+
+		if (c == rm->rm_firstdatacol) {
+			ASSERT(ccount == pcount);
+			for (i = 0; i < ccount; i++, p++, src++) {
+				*p = *src;
+			}
+		} else {
+			ASSERT(ccount <= pcount);
+			for (i = 0; i < ccount; i++, p++, src++) {
+				*p ^= *src;
+			}
+		}
+	}
+}
+
+static void
+vdev_raidz_generate_parity_pq(raidz_map_t *rm)
+{
+	uint64_t *q, *p, *src, pcount, ccount, mask, i;
+	int c;
+
+	pcount = rm->rm_col[VDEV_RAIDZ_P].rc_size / sizeof (src[0]);
+	ASSERT(rm->rm_col[VDEV_RAIDZ_P].rc_size ==
+	    rm->rm_col[VDEV_RAIDZ_Q].rc_size);
+
+	for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++) {
+		src = rm->rm_col[c].rc_data;
+		p = rm->rm_col[VDEV_RAIDZ_P].rc_data;
+		q = rm->rm_col[VDEV_RAIDZ_Q].rc_data;
+		ccount = rm->rm_col[c].rc_size / sizeof (src[0]);
+
+		if (c == rm->rm_firstdatacol) {
+			ASSERT(ccount == pcount || ccount == 0);
+			for (i = 0; i < ccount; i++, p++, q++, src++) {
+				*q = *src;
+				*p = *src;
+			}
+			for (; i < pcount; i++, p++, q++, src++) {
+				*q = 0;
+				*p = 0;
+			}
+		} else {
+			ASSERT(ccount <= pcount);
+
+			/*
+			 * Rather than multiplying each byte individually (as
+			 * described above), we are able to handle 8 at once
+			 * by generating a mask based on the high bit in each
+			 * byte and using that to conditionally XOR in 0x1d.
+			 */
+			for (i = 0; i < ccount; i++, p++, q++, src++) {
+				mask = *q & 0x8080808080808080ULL;
+				mask = (mask << 1) - (mask >> 7);
+				*q = ((*q << 1) & 0xfefefefefefefefeULL) ^
+				    (mask & 0x1d1d1d1d1d1d1d1dULL);
+				*q ^= *src;
+				*p ^= *src;
+			}
+
+			/*
+			 * Treat short columns as though they are full of 0s.
+			 */
+			for (; i < pcount; i++, q++) {
+				mask = *q & 0x8080808080808080ULL;
+				mask = (mask << 1) - (mask >> 7);
+				*q = ((*q << 1) & 0xfefefefefefefefeULL) ^
+				    (mask & 0x1d1d1d1d1d1d1d1dULL);
+			}
+		}
+	}
+}
+
+static void
+vdev_raidz_reconstruct_p(raidz_map_t *rm, int x)
+{
+	uint64_t *dst, *src, xcount, ccount, count, i;
+	int c;
+
+	xcount = rm->rm_col[x].rc_size / sizeof (src[0]);
+	ASSERT(xcount <= rm->rm_col[VDEV_RAIDZ_P].rc_size / sizeof (src[0]));
+	ASSERT(xcount > 0);
+
+	src = rm->rm_col[VDEV_RAIDZ_P].rc_data;
+	dst = rm->rm_col[x].rc_data;
+	for (i = 0; i < xcount; i++, dst++, src++) {
+		*dst = *src;
+	}
+
+	for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++) {
+		src = rm->rm_col[c].rc_data;
+		dst = rm->rm_col[x].rc_data;
 
-	for (c = 0; c < rm->rm_cols; c++) {
 		if (c == x)
 			continue;
+
+		ccount = rm->rm_col[c].rc_size / sizeof (src[0]);
+		count = MIN(ccount, xcount);
+
+		for (i = 0; i < count; i++, dst++, src++) {
+			*dst ^= *src;
+		}
+	}
+}
+
+static void
+vdev_raidz_reconstruct_q(raidz_map_t *rm, int x)
+{
+	uint64_t *dst, *src, xcount, ccount, count, mask, i;
+	uint8_t *b;
+	int c, j, exp;
+
+	xcount = rm->rm_col[x].rc_size / sizeof (src[0]);
+	ASSERT(xcount <= rm->rm_col[VDEV_RAIDZ_Q].rc_size / sizeof (src[0]));
+
+	for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++) {
 		src = rm->rm_col[c].rc_data;
 		dst = rm->rm_col[x].rc_data;
-		csize = rm->rm_col[c].rc_size;
-		xsize = rm->rm_col[x].rc_size;
-		count = MIN(csize, xsize) / sizeof (uint64_t);
-		if (c == !x) {
+
+		if (c == x)
+			ccount = 0;
+		else
+			ccount = rm->rm_col[c].rc_size / sizeof (src[0]);
+
+		count = MIN(ccount, xcount);
+
+		if (c == rm->rm_firstdatacol) {
+			for (i = 0; i < count; i++, dst++, src++) {
+				*dst = *src;
+			}
+			for (; i < xcount; i++, dst++) {
+				*dst = 0;
+			}
+
+		} else {
 			/*
-			 * The initial copy happens at either c == 0 or c == 1.
-			 * Both of these columns are 'big' columns, so we'll
-			 * definitely initialize all of column x.
+			 * For an explanation of this, see the comment in
+			 * vdev_raidz_generate_parity_pq() above.
 			 */
-			ASSERT3U(xsize, <=, csize);
-			for (i = 0; i < count; i++)
-				*dst++ = *src++;
-		} else {
-			for (i = 0; i < count; i++)
-				*dst++ ^= *src++;
+			for (i = 0; i < count; i++, dst++, src++) {
+				mask = *dst & 0x8080808080808080ULL;
+				mask = (mask << 1) - (mask >> 7);
+				*dst = ((*dst << 1) & 0xfefefefefefefefeULL) ^
+				    (mask & 0x1d1d1d1d1d1d1d1dULL);
+				*dst ^= *src;
+			}
+
+			for (; i < xcount; i++, dst++) {
+				mask = *dst & 0x8080808080808080ULL;
+				mask = (mask << 1) - (mask >> 7);
+				*dst = ((*dst << 1) & 0xfefefefefefefefeULL) ^
+				    (mask & 0x1d1d1d1d1d1d1d1dULL);
+			}
+		}
+	}
+
+	src = rm->rm_col[VDEV_RAIDZ_Q].rc_data;
+	dst = rm->rm_col[x].rc_data;
+	exp = 255 - (rm->rm_cols - 1 - x);
+
+	for (i = 0; i < xcount; i++, dst++, src++) {
+		*dst ^= *src;
+		for (j = 0, b = (uint8_t *)dst; j < 8; j++, b++) {
+			*b = vdev_raidz_exp2(*b, exp);
 		}
 	}
 }
 
+static void
+vdev_raidz_reconstruct_pq(raidz_map_t *rm, int x, int y)
+{
+	uint8_t *p, *q, *pxy, *qxy, *xd, *yd, tmp, a, b, aexp, bexp;
+	void *pdata, *qdata;
+	uint64_t xsize, ysize, i;
+
+	ASSERT(x < y);
+	ASSERT(x >= rm->rm_firstdatacol);
+	ASSERT(y < rm->rm_cols);
+
+	ASSERT(rm->rm_col[x].rc_size >= rm->rm_col[y].rc_size);
+
+	/*
+	 * Move the parity data aside -- we're going to compute parity as
+	 * though columns x and y were full of zeros -- Pxy and Qxy. We want to
+	 * reuse the parity generation mechanism without trashing the actual
+	 * parity so we make those columns appear to be full of zeros by
+	 * setting their lengths to zero.
+	 */
+	pdata = rm->rm_col[VDEV_RAIDZ_P].rc_data;
+	qdata = rm->rm_col[VDEV_RAIDZ_Q].rc_data;
+	xsize = rm->rm_col[x].rc_size;
+	ysize = rm->rm_col[y].rc_size;
+
+	rm->rm_col[VDEV_RAIDZ_P].rc_data =
+	    zio_buf_alloc(rm->rm_col[VDEV_RAIDZ_P].rc_size);
+	rm->rm_col[VDEV_RAIDZ_Q].rc_data =
+	    zio_buf_alloc(rm->rm_col[VDEV_RAIDZ_Q].rc_size);
+	rm->rm_col[x].rc_size = 0;
+	rm->rm_col[y].rc_size = 0;
+
+	vdev_raidz_generate_parity_pq(rm);
+
+	rm->rm_col[x].rc_size = xsize;
+	rm->rm_col[y].rc_size = ysize;
+
+	p = pdata;
+	q = qdata;
+	pxy = rm->rm_col[VDEV_RAIDZ_P].rc_data;
+	qxy = rm->rm_col[VDEV_RAIDZ_Q].rc_data;
+	xd = rm->rm_col[x].rc_data;
+	yd = rm->rm_col[y].rc_data;
+
+	/*
+	 * We now have:
+	 *	Pxy = P + D_x + D_y
+	 *	Qxy = Q + 2^(ndevs - 1 - x) * D_x + 2^(ndevs - 1 - y) * D_y
+	 *
+	 * We can then solve for D_x:
+	 *	D_x = A * (P + Pxy) + B * (Q + Qxy)
+	 * where
+	 *	A = 2^(x - y) * (2^(x - y) + 1)^-1
+	 *	B = 2^(ndevs - 1 - x) * (2^(x - y) + 1)^-1
+	 *
+	 * With D_x in hand, we can easily solve for D_y:
+	 *	D_y = P + Pxy + D_x
+	 */
+
+	a = vdev_raidz_pow2[255 + x - y];
+	b = vdev_raidz_pow2[255 - (rm->rm_cols - 1 - x)];
+	tmp = 255 - vdev_raidz_log2[a ^ 1];
+
+	aexp = vdev_raidz_log2[vdev_raidz_exp2(a, tmp)];
+	bexp = vdev_raidz_log2[vdev_raidz_exp2(b, tmp)];
+
+	for (i = 0; i < xsize; i++, p++, q++, pxy++, qxy++, xd++, yd++) {
+		*xd = vdev_raidz_exp2(*p ^ *pxy, aexp) ^
+		    vdev_raidz_exp2(*q ^ *qxy, bexp);
+
+		if (i < ysize)
+			*yd = *p ^ *pxy ^ *xd;
+	}
+
+	zio_buf_free(rm->rm_col[VDEV_RAIDZ_P].rc_data,
+	    rm->rm_col[VDEV_RAIDZ_P].rc_size);
+	zio_buf_free(rm->rm_col[VDEV_RAIDZ_Q].rc_data,
+	    rm->rm_col[VDEV_RAIDZ_Q].rc_size);
+
+	/*
+	 * Restore the saved parity data.
+	 */
+	rm->rm_col[VDEV_RAIDZ_P].rc_data = pdata;
+	rm->rm_col[VDEV_RAIDZ_Q].rc_data = qdata;
+}
+
+
 static int
 vdev_raidz_open(vdev_t *vd, uint64_t *asize, uint64_t *ashift)
 {
 	vdev_t *cvd;
+	uint64_t nparity = vd->vdev_nparity;
 	int c, error;
 	int lasterror = 0;
 	int numerrors = 0;
 
-	/*
-	 * XXX -- minimum children should be raid-type-specific
-	 */
-	if (vd->vdev_children < 2) {
+	ASSERT(nparity > 0);
+
+	if (nparity > VDEV_RAIDZ_MAXPARITY ||
+	    vd->vdev_children < nparity + 1) {
 		vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL;
 		return (EINVAL);
 	}
@@ -211,7 +590,7 @@ vdev_raidz_open(vdev_t *vd, uint64_t *asize, uint64_t *ashift)
 
 	*asize *= vd->vdev_children;
 
-	if (numerrors > 1) {
+	if (numerrors > nparity) {
 		vd->vdev_stat.vs_aux = VDEV_AUX_NO_REPLICAS;
 		return (lasterror);
 	}
@@ -234,10 +613,11 @@ vdev_raidz_asize(vdev_t *vd, uint64_t psize)
 	uint64_t asize;
 	uint64_t ashift = vd->vdev_top->vdev_ashift;
 	uint64_t cols = vd->vdev_children;
+	uint64_t nparity = vd->vdev_nparity;
 
 	asize = ((psize - 1) >> ashift) + 1;
-	asize += (asize + cols - 2) / (cols - 1);
-	asize = P2ROUNDUP(asize, VDEV_RAIDZ_ALIGN) << ashift;
+	asize += nparity * ((asize + cols - nparity - 1) / (cols - nparity));
+	asize = roundup(asize, nparity + 1) << ashift;
 
 	return (asize);
 }
@@ -270,20 +650,23 @@ vdev_raidz_io_start(zio_t *zio)
 	raidz_col_t *rc;
 	int c;
 
-	rm = vdev_raidz_map_alloc(zio, tvd->vdev_ashift, vd->vdev_children);
+	rm = vdev_raidz_map_alloc(zio, tvd->vdev_ashift, vd->vdev_children,
+	    vd->vdev_nparity);
 
 	ASSERT3U(rm->rm_asize, ==, vdev_psize_to_asize(vd, zio->io_size));
 
 	if (zio->io_type == ZIO_TYPE_WRITE) {
-
 		/*
-		 * Generate RAID parity in virtual column 0.
+		 * Generate RAID parity in the first virtual columns.
 		 */
-		vdev_raidz_reconstruct(rm, 0);
+		if (rm->rm_firstdatacol == 1)
+			vdev_raidz_generate_parity_p(rm);
+		else
+			vdev_raidz_generate_parity_pq(rm);
 
 		for (c = 0; c < rm->rm_cols; c++) {
 			rc = &rm->rm_col[c];
-			cvd = vd->vdev_child[rc->rc_col];
+			cvd = vd->vdev_child[rc->rc_devidx];
 			zio_nowait(zio_vdev_child_io(zio, NULL, cvd,
 			    rc->rc_offset, rc->rc_data, rc->rc_size,
 			    zio->io_type, zio->io_priority, ZIO_FLAG_CANFAIL,
@@ -295,23 +678,34 @@ vdev_raidz_io_start(zio_t *zio)
 
 	ASSERT(zio->io_type == ZIO_TYPE_READ);
 
+	/*
+	 * Iterate over the columns in reverse order so that we hit the parity
+	 * last -- any errors along the way will force us to read the parity
+	 * data.
+	 */
 	for (c = rm->rm_cols - 1; c >= 0; c--) {
 		rc = &rm->rm_col[c];
-		cvd = vd->vdev_child[rc->rc_col];
+		cvd = vd->vdev_child[rc->rc_devidx];
 		if (vdev_is_dead(cvd)) {
-			rm->rm_missing_child = c;
+			if (c >= rm->rm_firstdatacol)
+				rm->rm_missingdata++;
+			else
+				rm->rm_missingparity++;
 			rc->rc_error = ENXIO;
 			rc->rc_tried = 1;	/* don't even try */
 			rc->rc_skipped = 1;
 			continue;
 		}
 		if (vdev_dtl_contains(&cvd->vdev_dtl_map, bp->blk_birth, 1)) {
-			rm->rm_missing_child = c;
+			if (c >= rm->rm_firstdatacol)
+				rm->rm_missingdata++;
+			else
+				rm->rm_missingparity++;
 			rc->rc_error = ESTALE;
 			rc->rc_skipped = 1;
 			continue;
 		}
-		if (c >= rm->rm_firstdatacol || rm->rm_missing_child != -1 ||
+		if (c >= rm->rm_firstdatacol || rm->rm_missingdata > 0 ||
 		    (zio->io_flags & ZIO_FLAG_SCRUB)) {
 			zio_nowait(zio_vdev_child_io(zio, NULL, cvd,
 			    rc->rc_offset, rc->rc_data, rc->rc_size,
@@ -329,7 +723,7 @@ vdev_raidz_io_start(zio_t *zio)
 static void
 raidz_checksum_error(zio_t *zio, raidz_col_t *rc)
 {
-	vdev_t *vd = zio->io_vd->vdev_child[rc->rc_col];
+	vdev_t *vd = zio->io_vd->vdev_child[rc->rc_devidx];
 	dprintf_bp(zio->io_bp, "imputed checksum error on %s: ",
 	    vdev_description(vd));
 
@@ -344,6 +738,50 @@ raidz_checksum_error(zio_t *zio, raidz_col_t *rc)
 		    zio->io_spa, vd, zio, rc->rc_offset, rc->rc_size);
 }
 
+/*
+ * Generate the parity from the data columns. If we tried and were able to
+ * read the parity without error, verify that the generated parity matches the
+ * data we read. If it doesn't, we fire off a checksum error. Return the
+ * number such failures.
+ */
+static int
+raidz_parity_verify(zio_t *zio, raidz_map_t *rm)
+{
+	void *orig[VDEV_RAIDZ_MAXPARITY];
+	int c, ret = 0;
+	raidz_col_t *rc;
+
+	for (c = 0; c < rm->rm_firstdatacol; c++) {
+		rc = &rm->rm_col[c];
+		if (!rc->rc_tried || rc->rc_error != 0)
+			continue;
+		orig[c] = zio_buf_alloc(rc->rc_size);
+		bcopy(rc->rc_data, orig[c], rc->rc_size);
+	}
+
+	if (rm->rm_firstdatacol == 1)
+		vdev_raidz_generate_parity_p(rm);
+	else
+		vdev_raidz_generate_parity_pq(rm);
+
+	for (c = 0; c < rm->rm_firstdatacol; c++) {
+		rc = &rm->rm_col[c];
+		if (!rc->rc_tried || rc->rc_error != 0)
+			continue;
+		if (bcmp(orig[c], rc->rc_data, rc->rc_size) != 0) {
+			raidz_checksum_error(zio, rc);
+			rc->rc_error = ECKSUM;
+			ret++;
+		}
+		zio_buf_free(orig[c], rc->rc_size);
+	}
+
+	return (ret);
+}
+
+static uint64_t raidz_corrected_p;
+static uint64_t raidz_corrected_q;
+static uint64_t raidz_corrected_pq;
 
 static void
 vdev_raidz_io_done(zio_t *zio)
@@ -351,15 +789,20 @@ vdev_raidz_io_done(zio_t *zio)
 	vdev_t *vd = zio->io_vd;
 	vdev_t *cvd;
 	raidz_map_t *rm = zio->io_vsd;
-	raidz_col_t *rc;
+	raidz_col_t *rc, *rc1;
 	int unexpected_errors = 0;
-	int c;
+	int parity_errors = 0;
+	int data_errors = 0;
+	int n, c, c1;
 
 	ASSERT(zio->io_bp != NULL);  /* XXX need to add code to enforce this */
 
 	zio->io_error = 0;
 	zio->io_numerrors = 0;
 
+	ASSERT(rm->rm_missingparity <= rm->rm_firstdatacol);
+	ASSERT(rm->rm_missingdata <= rm->rm_cols - rm->rm_firstdatacol);
+
 	for (c = 0; c < rm->rm_cols; c++) {
 		rc = &rm->rm_col[c];
 
@@ -370,8 +813,15 @@ vdev_raidz_io_done(zio_t *zio)
 		if (rc->rc_error) {
 			if (zio->io_error != EIO)
 				zio->io_error = rc->rc_error;
+
+			if (c < rm->rm_firstdatacol)
+				parity_errors++;
+			else
+				data_errors++;
+
 			if (!rc->rc_skipped)
 				unexpected_errors++;
+
 			zio->io_numerrors++;
 		}
 	}
@@ -392,149 +842,288 @@ vdev_raidz_io_done(zio_t *zio)
 	}
 
 	ASSERT(zio->io_type == ZIO_TYPE_READ);
+	/*
+	 * There are three potential phases for a read:
+	 *	1. produce valid data from the columns read
+	 *	2. read all disks and try again
+	 *	3. perform combinatorial reconstruction
+	 *
+	 * Each phase is progressively both more expensive and less likely to
+	 * occur. If we encounter more errors than we can repair or all phases
+	 * fail, we have no choice but to return an error.
+	 */
 
 	/*
-	 * If there were no I/O errors, and the data checksums correctly,
-	 * the read is complete.
+	 * If the number of errors we saw was correctable -- less than or equal
+	 * to the number of parity disks -- attempt to produce data that has a
+	 * valid checksum. Naturally, zero errors falls into this case.
 	 */
-	/* XXPOLICY */
-	if (zio->io_numerrors == 0 && zio_checksum_error(zio) == 0) {
-		ASSERT(unexpected_errors == 0);
-		ASSERT(zio->io_error == 0);
+	if (zio->io_numerrors <= rm->rm_firstdatacol) {
+		switch (data_errors) {
+		case 0:
+			if (zio_checksum_error(zio) == 0) {
+				zio->io_error = 0;
+				n = raidz_parity_verify(zio, rm);
+				unexpected_errors += n;
+				ASSERT(parity_errors + n <=
+				    rm->rm_firstdatacol);
+				goto done;
+			}
+			break;
 
-		/*
-		 * We know the data's good.  If we read the parity,
-		 * verify that it's good as well.  If not, fix it.
-		 */
-		for (c = 0; c < rm->rm_firstdatacol; c++) {
-			void *orig;
-			rc = &rm->rm_col[c];
-			if (!rc->rc_tried)
-				continue;
-			orig = zio_buf_alloc(rc->rc_size);
-			bcopy(rc->rc_data, orig, rc->rc_size);
-			vdev_raidz_reconstruct(rm, c);
-			if (bcmp(orig, rc->rc_data, rc->rc_size) != 0) {
-				raidz_checksum_error(zio, rc);
-				rc->rc_error = ECKSUM;
-				unexpected_errors++;
+		case 1:
+			ASSERT(parity_errors < rm->rm_firstdatacol);
+
+			/*
+			 * Find the column that reported the error.
+			 */
+			for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++) {
+				rc = &rm->rm_col[c];
+				if (rc->rc_error != 0)
+					break;
+			}
+			ASSERT(c != rm->rm_cols);
+			ASSERT(!rc->rc_skipped || rc->rc_error == ENXIO ||
+			    rc->rc_error == ESTALE);
+
+			if (rm->rm_col[VDEV_RAIDZ_P].rc_error == 0) {
+				vdev_raidz_reconstruct_p(rm, c);
+			} else {
+				ASSERT(rm->rm_firstdatacol > 1);
+				vdev_raidz_reconstruct_q(rm, c);
 			}
-			zio_buf_free(orig, rc->rc_size);
-		}
-		goto done;
-	}
 
-	/*
-	 * If there was exactly one I/O error, it's the one we expected,
-	 * and the reconstructed data checksums, the read is complete.
-	 * This happens when one child is offline and vdev_fault_assess()
-	 * knows it, or when one child has stale data and the DTL knows it.
-	 */
-	if (zio->io_numerrors == 1 && (c = rm->rm_missing_child) != -1) {
-		rc = &rm->rm_col[c];
-		ASSERT(unexpected_errors == 0);
-		ASSERT(rc->rc_error == ENXIO || rc->rc_error == ESTALE);
-		vdev_raidz_reconstruct(rm, c);
-		if (zio_checksum_error(zio) == 0) {
-			zio->io_error = 0;
-			goto done;
+			if (zio_checksum_error(zio) == 0) {
+				zio->io_error = 0;
+				if (rm->rm_col[VDEV_RAIDZ_P].rc_error == 0)
+					atomic_inc_64(&raidz_corrected_p);
+				else
+					atomic_inc_64(&raidz_corrected_q);
+
+				/*
+				 * If there's more than one parity disk,
+				 * confirm that the parity disk not used above
+				 * has the correct data.
+				 */
+				if (rm->rm_firstdatacol > 1) {
+					n = raidz_parity_verify(zio, rm);
+					unexpected_errors += n;
+					ASSERT(parity_errors + n <=
+					    rm->rm_firstdatacol);
+				}
+
+				goto done;
+			}
+			break;
+
+		case 2:
+			/*
+			 * Find the two columns that reported errors.
+			 */
+			for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++) {
+				rc = &rm->rm_col[c];
+				if (rc->rc_error != 0)
+					break;
+			}
+			ASSERT(c != rm->rm_cols);
+			ASSERT(!rc->rc_skipped || rc->rc_error == ENXIO ||
+			    rc->rc_error == ESTALE);
+
+			for (c1 = c++; c < rm->rm_cols; c++) {
+				rc = &rm->rm_col[c];
+				if (rc->rc_error != 0)
+					break;
+			}
+			ASSERT(c != rm->rm_cols);
+			ASSERT(!rc->rc_skipped || rc->rc_error == ENXIO ||
+			    rc->rc_error == ESTALE);
+
+			vdev_raidz_reconstruct_pq(rm, c1, c);
+
+			if (zio_checksum_error(zio) == 0) {
+				zio->io_error = 0;
+				atomic_inc_64(&raidz_corrected_pq);
+
+				goto done;
+			}
+			break;
+
+		default:
+			ASSERT(rm->rm_firstdatacol <= 2);
+			ASSERT(0);
 		}
 	}
 
 	/*
-	 * This isn't a typical error -- either we got a read error or
-	 * more than one child claimed a problem.  Read every block we
-	 * haven't already so we can try combinatorial reconstruction.
+	 * This isn't a typical situation -- either we got a read error or
+	 * a child silently returned bad data. Read every block so we can
+	 * try again with as much data and parity as we can track down. If
+	 * we've already been through once before, all children will be marked
+	 * as tried so we'll proceed to combinatorial reconstruction.
 	 */
 	unexpected_errors = 1;
-	rm->rm_missing_child = -1;
+	rm->rm_missingdata = 0;
+	rm->rm_missingparity = 0;
 
-	for (c = 0; c < rm->rm_cols; c++)
-		if (!rm->rm_col[c].rc_tried)
-			break;
+	for (c = 0; c < rm->rm_cols; c++) {
+		if (rm->rm_col[c].rc_tried)
+			continue;
 
-	if (c != rm->rm_cols) {
 		zio->io_error = 0;
 		zio_vdev_io_redone(zio);
-		for (c = 0; c < rm->rm_cols; c++) {
+		do {
 			rc = &rm->rm_col[c];
 			if (rc->rc_tried)
 				continue;
 			zio_nowait(zio_vdev_child_io(zio, NULL,
-			    vd->vdev_child[rc->rc_col],
+			    vd->vdev_child[rc->rc_devidx],
 			    rc->rc_offset, rc->rc_data, rc->rc_size,
 			    zio->io_type, zio->io_priority, ZIO_FLAG_CANFAIL,
 			    vdev_raidz_child_done, rc));
-		}
+		} while (++c < rm->rm_cols);
+		dprintf("rereading\n");
 		zio_wait_children_done(zio);
 		return;
 	}
 
 	/*
-	 * If there were more errors than parity disks, give up.
+	 * At this point we've attempted to reconstruct the data given the
+	 * errors we detected, and we've attempted to read all columns. There
+	 * must, therefore, be one or more additional problems -- silent errors
+	 * resulting in invalid data rather than explicit I/O errors resulting
+	 * in absent data. Before we attempt combinatorial reconstruction make
+	 * sure we have a chance of coming up with the right answer.
 	 */
-	if (zio->io_numerrors > rm->rm_firstdatacol) {
+	if (zio->io_numerrors >= rm->rm_firstdatacol) {
 		ASSERT(zio->io_error != 0);
 		goto done;
 	}
 
-	/*
-	 * The number of I/O errors is correctable.  Correct them here.
-	 */
-	ASSERT(zio->io_numerrors <= rm->rm_firstdatacol);
-	for (c = 0; c < rm->rm_cols; c++) {
-		rc = &rm->rm_col[c];
-		ASSERT(rc->rc_tried);
-		if (rc->rc_error) {
-			vdev_raidz_reconstruct(rm, c);
-			if (zio_checksum_error(zio) == 0)
+	if (rm->rm_col[VDEV_RAIDZ_P].rc_error == 0) {
+		/*
+		 * Attempt to reconstruct the data from parity P.
+		 */
+		for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++) {
+			void *orig;
+			rc = &rm->rm_col[c];
+
+			orig = zio_buf_alloc(rc->rc_size);
+			bcopy(rc->rc_data, orig, rc->rc_size);
+			vdev_raidz_reconstruct_p(rm, c);
+
+			if (zio_checksum_error(zio) == 0) {
+				zio_buf_free(orig, rc->rc_size);
 				zio->io_error = 0;
-			else
-				zio->io_error = rc->rc_error;
-			goto done;
+				atomic_inc_64(&raidz_corrected_p);
+
+				/*
+				 * If this child didn't know that it returned
+				 * bad data, inform it.
+				 */
+				if (rc->rc_tried && rc->rc_error == 0)
+					raidz_checksum_error(zio, rc);
+				rc->rc_error = ECKSUM;
+				goto done;
+			}
+
+			bcopy(orig, rc->rc_data, rc->rc_size);
+			zio_buf_free(orig, rc->rc_size);
 		}
 	}
 
-	/*
-	 * There were no I/O errors, but the data doesn't checksum.
-	 * Try all permutations to see if we can find one that does.
-	 */
-	ASSERT(zio->io_numerrors == 0);
-	for (c = 0; c < rm->rm_cols; c++) {
-		void *orig;
-		rc = &rm->rm_col[c];
+	if (rm->rm_firstdatacol > 1 && rm->rm_col[VDEV_RAIDZ_Q].rc_error == 0) {
+		/*
+		 * Attempt to reconstruct the data from parity Q.
+		 */
+		for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++) {
+			void *orig;
+			rc = &rm->rm_col[c];
+
+			orig = zio_buf_alloc(rc->rc_size);
+			bcopy(rc->rc_data, orig, rc->rc_size);
+			vdev_raidz_reconstruct_q(rm, c);
 
-		orig = zio_buf_alloc(rc->rc_size);
-		bcopy(rc->rc_data, orig, rc->rc_size);
-		vdev_raidz_reconstruct(rm, c);
+			if (zio_checksum_error(zio) == 0) {
+				zio_buf_free(orig, rc->rc_size);
+				zio->io_error = 0;
+				atomic_inc_64(&raidz_corrected_q);
+
+				/*
+				 * If this child didn't know that it returned
+				 * bad data, inform it.
+				 */
+				if (rc->rc_tried && rc->rc_error == 0)
+					raidz_checksum_error(zio, rc);
+				rc->rc_error = ECKSUM;
+				goto done;
+			}
 
-		if (zio_checksum_error(zio) == 0) {
+			bcopy(orig, rc->rc_data, rc->rc_size);
 			zio_buf_free(orig, rc->rc_size);
-			zio->io_error = 0;
-			/*
-			 * If this child didn't know that it returned bad data,
-			 * inform it.
-			 */
-			if (rc->rc_tried && rc->rc_error == 0)
-				raidz_checksum_error(zio, rc);
-			rc->rc_error = ECKSUM;
-			goto done;
 		}
+	}
 
-		bcopy(orig, rc->rc_data, rc->rc_size);
-		zio_buf_free(orig, rc->rc_size);
+	if (rm->rm_firstdatacol > 1 &&
+	    rm->rm_col[VDEV_RAIDZ_P].rc_error == 0 &&
+	    rm->rm_col[VDEV_RAIDZ_Q].rc_error == 0) {
+		/*
+		 * Attempt to reconstruct the data from both P and Q.
+		 */
+		for (c = rm->rm_firstdatacol; c < rm->rm_cols - 1; c++) {
+			void *orig, *orig1;
+			rc = &rm->rm_col[c];
+
+			orig = zio_buf_alloc(rc->rc_size);
+			bcopy(rc->rc_data, orig, rc->rc_size);
+
+			for (c1 = c + 1; c1 < rm->rm_cols; c1++) {
+				rc1 = &rm->rm_col[c1];
+
+				orig1 = zio_buf_alloc(rc1->rc_size);
+				bcopy(rc1->rc_data, orig1, rc1->rc_size);
+
+				vdev_raidz_reconstruct_pq(rm, c, c1);
+
+				if (zio_checksum_error(zio) == 0) {
+					zio_buf_free(orig, rc->rc_size);
+					zio_buf_free(orig1, rc1->rc_size);
+					zio->io_error = 0;
+					atomic_inc_64(&raidz_corrected_pq);
+
+					/*
+					 * If these children didn't know they
+					 * returned bad data, inform them.
+					 */
+					if (rc->rc_tried && rc->rc_error == 0)
+						raidz_checksum_error(zio, rc);
+					if (rc1->rc_tried && rc1->rc_error == 0)
+						raidz_checksum_error(zio, rc1);
+
+					rc->rc_error = ECKSUM;
+					rc1->rc_error = ECKSUM;
+
+					goto done;
+				}
+
+				bcopy(orig1, rc1->rc_data, rc1->rc_size);
+				zio_buf_free(orig1, rc1->rc_size);
+			}
+
+			bcopy(orig, rc->rc_data, rc->rc_size);
+			zio_buf_free(orig, rc->rc_size);
+		}
 	}
 
 	/*
-	 * All combinations failed to checksum.  Generate checksum ereports for
-	 * every one.
+	 * All combinations failed to checksum. Generate checksum ereports for
+	 * all children.
 	 */
 	zio->io_error = ECKSUM;
 	if (!(zio->io_flags & ZIO_FLAG_SPECULATIVE)) {
 		for (c = 0; c < rm->rm_cols; c++) {
 			rc = &rm->rm_col[c];
 			zfs_ereport_post(FM_EREPORT_ZFS_CHECKSUM,
-			    zio->io_spa, vd->vdev_child[rc->rc_col], zio,
+			    zio->io_spa, vd->vdev_child[rc->rc_devidx], zio,
 			    rc->rc_offset, rc->rc_size);
 		}
 	}
@@ -558,7 +1147,7 @@ done:
 
 		for (c = 0; c < rm->rm_cols; c++) {
 			rc = &rm->rm_col[c];
-			cvd = vd->vdev_child[rc->rc_col];
+			cvd = vd->vdev_child[rc->rc_devidx];
 
 			if (rc->rc_error == 0)
 				continue;
@@ -571,8 +1160,8 @@ done:
 			zio_nowait(zio_vdev_child_io(rio, NULL, cvd,
 			    rc->rc_offset, rc->rc_data, rc->rc_size,
 			    ZIO_TYPE_WRITE, zio->io_priority,
-			    ZIO_FLAG_IO_REPAIR | ZIO_FLAG_CANFAIL |
-			    ZIO_FLAG_DONT_PROPAGATE, NULL, NULL));
+			    ZIO_FLAG_IO_REPAIR | ZIO_FLAG_DONT_PROPAGATE |
+			    ZIO_FLAG_CANFAIL, NULL, NULL));
 		}
 
 		zio_nowait(rio);
@@ -587,7 +1176,7 @@ done:
 static void
 vdev_raidz_state_change(vdev_t *vd, int faulted, int degraded)
 {
-	if (faulted > 1)
+	if (faulted > vd->vdev_nparity)
 		vdev_set_state(vd, B_FALSE, VDEV_STATE_CANT_OPEN,
 		    VDEV_AUX_NO_REPLICAS);
 	else if (degraded + faulted != 0)
diff --git a/usr/src/uts/common/fs/zfs/zfs_ioctl.c b/usr/src/uts/common/fs/zfs/zfs_ioctl.c
index 0cff445cf3..137a402538 100644
--- a/usr/src/uts/common/fs/zfs/zfs_ioctl.c
+++ b/usr/src/uts/common/fs/zfs/zfs_ioctl.c
@@ -564,11 +564,18 @@ zfs_ioc_vdev_add(zfs_cmd_t *zc)
 	return (error);
 }
 
-/* ARGSUSED */
 static int
 zfs_ioc_vdev_remove(zfs_cmd_t *zc)
 {
-	return (ENOTSUP);
+	spa_t *spa;
+	int error;
+
+	error = spa_open(zc->zc_name, &spa, FTAG);
+	if (error != 0)
+		return (error);
+	error = spa_vdev_remove(spa, zc->zc_guid, B_FALSE);
+	spa_close(spa, FTAG);
+	return (error);
 }
 
 static int
@@ -1176,6 +1183,12 @@ zfs_ioc_bookmark_name(zfs_cmd_t *zc)
 	return (error);
 }
 
+static int
+zfs_ioc_promote(zfs_cmd_t *zc)
+{
+	return (dsl_dataset_promote(zc->zc_name));
+}
+
 static zfs_ioc_vec_t zfs_ioc_vec[] = {
 	{ zfs_ioc_pool_create,		zfs_secpolicy_config,	pool_name },
 	{ zfs_ioc_pool_destroy,		zfs_secpolicy_config,	pool_name },
@@ -1215,7 +1228,8 @@ static zfs_ioc_vec_t zfs_ioc_vec[] = {
 	{ zfs_ioc_inject_list_next,	zfs_secpolicy_inject,	no_name },
 	{ zfs_ioc_error_log,		zfs_secpolicy_inject,	pool_name },
 	{ zfs_ioc_clear,		zfs_secpolicy_config,	pool_name },
-	{ zfs_ioc_bookmark_name,	zfs_secpolicy_inject,	pool_name }
+	{ zfs_ioc_bookmark_name,	zfs_secpolicy_inject,	pool_name },
+	{ zfs_ioc_promote,		zfs_secpolicy_write,	dataset_name }
 };
 
 static int
diff --git a/usr/src/uts/common/fs/zfs/zio.c b/usr/src/uts/common/fs/zfs/zio.c
index bf7c9791fe..640ed4e960 100644
--- a/usr/src/uts/common/fs/zfs/zio.c
+++ b/usr/src/uts/common/fs/zfs/zio.c
@@ -1392,7 +1392,6 @@ zio_vdev_io_assess(zio_t *zio)
 	/* XXPOLICY */
 	if (zio_should_retry(zio)) {
 		ASSERT(tvd == vd);
-		ASSERT(!(zio->io_flags & ZIO_FLAG_DONT_PROPAGATE));
 
 		zio->io_retries++;
 		zio->io_error = 0;
diff --git a/usr/src/uts/common/sys/fs/zfs.h b/usr/src/uts/common/sys/fs/zfs.h
index 5aaca0662b..07ada9c30e 100644
--- a/usr/src/uts/common/sys/fs/zfs.h
+++ b/usr/src/uts/common/sys/fs/zfs.h
@@ -106,12 +106,14 @@ int zfs_prop_readonly(zfs_prop_t);
 const char *zfs_prop_default_string(zfs_prop_t);
 uint64_t zfs_prop_default_numeric(zfs_prop_t);
 
+
 /*
- * On-disk format version.
+ * On-disk version number.
  */
 #define	ZFS_VERSION_1			1ULL
 #define	ZFS_VERSION_2			2ULL
-#define	ZFS_VERSION			ZFS_VERSION_2
+#define	ZFS_VERSION_3			3ULL
+#define	ZFS_VERSION			ZFS_VERSION_3
 
 /*
  * Symbolic names for the changes that caused a ZFS_VERSION switch.
@@ -126,6 +128,11 @@ uint64_t zfs_prop_default_numeric(zfs_prop_t);
  */
 #define	ZFS_VERSION_INITIAL		ZFS_VERSION_1
 #define	ZFS_VERSION_DITTO_BLOCKS	ZFS_VERSION_2
+#define	ZFS_VERSION_SPARES		ZFS_VERSION_3
+#define	ZFS_VERSION_RAID6		ZFS_VERSION_3
+#define	ZFS_VERSION_BPLIST_ACCOUNT	ZFS_VERSION_3
+#define	ZFS_VERSION_RAIDZ_DEFLATE	ZFS_VERSION_3
+#define	ZFS_VERSION_DNODE_BYTES		ZFS_VERSION_3
 
 /*
  * The following are configuration names used in the nvlist describing a pool's
@@ -156,6 +163,9 @@ uint64_t zfs_prop_default_numeric(zfs_prop_t);
 #define	ZPOOL_CONFIG_OFFLINE		"offline"
 #define	ZPOOL_CONFIG_ERRCOUNT		"error_count"
 #define	ZPOOL_CONFIG_NOT_PRESENT	"not_present"
+#define	ZPOOL_CONFIG_SPARES		"spares"
+#define	ZPOOL_CONFIG_IS_SPARE		"is_spare"
+#define	ZPOOL_CONFIG_NPARITY		"nparity"
 
 #define	VDEV_TYPE_ROOT			"root"
 #define	VDEV_TYPE_MIRROR		"mirror"
@@ -164,6 +174,7 @@ uint64_t zfs_prop_default_numeric(zfs_prop_t);
 #define	VDEV_TYPE_DISK			"disk"
 #define	VDEV_TYPE_FILE			"file"
 #define	VDEV_TYPE_MISSING		"missing"
+#define	VDEV_TYPE_SPARE			"spare"
 
 /*
  * This is needed in userland to report the minimum necessary device size.
@@ -206,18 +217,20 @@ typedef enum vdev_aux {
 	VDEV_AUX_TOO_SMALL,	/* vdev size is too small		*/
 	VDEV_AUX_BAD_LABEL,	/* the label is OK but invalid		*/
 	VDEV_AUX_VERSION_NEWER,	/* on-disk version is too new		*/
-	VDEV_AUX_VERSION_OLDER	/* on-disk version is too old		*/
+	VDEV_AUX_VERSION_OLDER,	/* on-disk version is too old		*/
+	VDEV_AUX_SPARED		/* hot spare used in another pool	*/
 } vdev_aux_t;
 
 /*
  * pool state.  The following states are written to disk as part of the normal
- * SPA lifecycle: ACTIVE, EXPORTED, DESTROYED.  The remaining states are
+ * SPA lifecycle: ACTIVE, EXPORTED, DESTROYED, SPARE.  The remaining states are
  * software abstractions used at various levels to communicate pool state.
  */
 typedef enum pool_state {
 	POOL_STATE_ACTIVE = 0,		/* In active use		*/
 	POOL_STATE_EXPORTED,		/* Explicitly exported		*/
 	POOL_STATE_DESTROYED,		/* Explicitly destroyed		*/
+	POOL_STATE_SPARE,		/* Reserved for hot spare use	*/
 	POOL_STATE_UNINITIALIZED,	/* Internal spa_t state		*/
 	POOL_STATE_UNAVAIL,		/* Internal libzfs state	*/
 	POOL_STATE_POTENTIALLY_ACTIVE	/* Internal libzfs state	*/
@@ -256,6 +269,7 @@ typedef struct vdev_stat {
 	uint64_t	vs_aux;			/* see vdev_aux_t	*/
 	uint64_t	vs_alloc;		/* space allocated	*/
 	uint64_t	vs_space;		/* total capacity	*/
+	uint64_t	vs_dspace;		/* deflated capacity	*/
 	uint64_t	vs_rsize;		/* replaceable dev size */
 	uint64_t	vs_ops[ZIO_TYPES];	/* operation count	*/
 	uint64_t	vs_bytes[ZIO_TYPES];	/* bytes read/written	*/
@@ -335,7 +349,8 @@ typedef enum zfs_ioc {
 	ZFS_IOC_INJECT_LIST_NEXT,
 	ZFS_IOC_ERROR_LOG,
 	ZFS_IOC_CLEAR,
-	ZFS_IOC_BOOKMARK_NAME
+	ZFS_IOC_BOOKMARK_NAME,
+	ZFS_IOC_PROMOTE
 } zfs_ioc_t;
 
 /*
author	eschrock <none@none>	2006-05-30 15:47:16 -0700
committer	eschrock <none@none>	2006-05-30 15:47:16 -0700
commit	99653d4ee642c6528e88224f12409a5f23060994 (patch)
tree	5cbcc540b8ed86b6a008f1084f9ca031368d926f
parent	354a1801a85aa6b61ff4d5e290ab708ba57e56a3 (diff)
download	illumos-joyent-99653d4ee642c6528e88224f12409a5f23060994.tar.gz