[illumos-gate merge]

commit 0b2e8253986c5c761129b58cfdac46d204903de1 9512 zfs remap poolname@snapname coredumps commit 591e0e133f9980083db5d64ac33a30bcc3382ff7 8115 parallel zfs mount commit b4bf0cf0458759c67920a031021a9d96cd683cfe 9426 metaslab size can exceed offset addressable by spacemap commit b1da084b97cda9a2d087205b95c45a54ad654453 9309 mdb: this statement may fall through Conflicts: usr/src/lib/Makefile
author: Jerry Jelinek <jerry.jelinek@joyent.com> 2018-05-16 11:14:46 +0000
committer: Jerry Jelinek <jerry.jelinek@joyent.com> 2018-05-16 11:14:46 +0000
commit: c884631e9c751a35384a284fea0975fe5174262d (patch)
tree: 4d47f3e8853bf814a777e423c1eeb5ae997ec2dd
parent: c596bb2c28271ba1ba0b6af4ef4a3244b32bbfe1 (diff)
parent: 0b2e8253986c5c761129b58cfdac46d204903de1 (diff)
download: illumos-joyent-c884631e9c751a35384a284fea0975fe5174262d.tar.gz
28 files changed, 1431 insertions, 195 deletions
diff --git a/usr/src/cmd/mdb/common/mdb/mdb_io.c b/usr/src/cmd/mdb/common/mdb/mdb_io.c
index 12608a89d3..b8c04bcd06 100644
--- a/usr/src/cmd/mdb/common/mdb/mdb_io.c
+++ b/usr/src/cmd/mdb/common/mdb/mdb_io.c
@@ -908,6 +908,7 @@ iob_bytes2str(varglist_t *ap, intsize_t size)
 
 	case SZ_SHORT:
 		n = (ushort_t)VA_ARG(ap, uint_t);
+		break;
 
 	default:
 		n = (uint_t)VA_ARG(ap, uint_t);
diff --git a/usr/src/cmd/mdb/common/mdb/mdb_main.c b/usr/src/cmd/mdb/common/mdb/mdb_main.c
index a30ee45b7e..ab8ffb80cd 100644
--- a/usr/src/cmd/mdb/common/mdb/mdb_main.c
+++ b/usr/src/cmd/mdb/common/mdb/mdb_main.c
@@ -111,7 +111,7 @@ ucontext_t _mdb_abort_ctx;		/* context fatal signal interrupted */
 int _mdb_abort_rcount;			/* number of times resume requested */
 int _mdb_self_fd = -1;			/* fd for self as for valid_frame */
 
-static void
+__NORETURN static void
 terminate(int status)
 {
 	(void) mdb_signal_blockall();
diff --git a/usr/src/cmd/mdb/common/modules/idm/idm.c b/usr/src/cmd/mdb/common/modules/idm/idm.c
index c465a9b8fb..4e4ad832c5 100644
--- a/usr/src/cmd/mdb/common/modules/idm/idm.c
+++ b/usr/src/cmd/mdb/common/modules/idm/idm.c
@@ -683,7 +683,8 @@ iscsi_cmd(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
 
 
 static int
-iscsi_ini_hba_impl(uintptr_t addr, iscsi_dcmd_ctrl_t *idc) {
+iscsi_ini_hba_impl(uintptr_t addr, iscsi_dcmd_ctrl_t *idc)
+{
 	iscsi_hba_t ih;
 
 	if (mdb_vread(&ih, sizeof (ih), addr) != sizeof (ih)) {
@@ -1003,8 +1004,8 @@ iscsi_svc_walk_cb(uintptr_t addr, const void *list_walker_data,
 
 /*ARGSUSED*/
 static int
-iscsi_ini_hba_walk_cb(uintptr_t addr, const void *vhba,
-    void *idc_void) {
+iscsi_ini_hba_walk_cb(uintptr_t addr, const void *vhba, void *idc_void)
+{
 
 	iscsi_dcmd_ctrl_t	*idc = idc_void;
 	int			rc;
@@ -2392,7 +2393,8 @@ iscsi_print_ini_lun(uintptr_t addr, const iscsi_lun_t *lun,
 
 static int
 iscsi_print_ini_cmd(uintptr_t addr, const iscsi_cmd_t *cmd,
-    iscsi_dcmd_ctrl_t *idc) {
+    iscsi_dcmd_ctrl_t *idc)
+{
 
 	uintptr_t states_addr;
 
@@ -2666,13 +2668,15 @@ iscsi_sm_audit_impl(uintptr_t addr)
 				    iscsi_iscsi_login_state(sar->sar_new_state);
 				break;
 			default:
+				state_name = new_state_name = "N/A";
 				break;
 			}
 			mdb_printf("%s|%s (%d)\n\t%9s %s (%d)\n",
 			    ts_string, state_name, sar->sar_state,
 			    "New State", new_state_name, sar->sar_new_state);
+
+			break;
 		default:
-			state_name = new_state_name = "N/A";
 			break;
 		}
 
@@ -3222,9 +3226,8 @@ iscsi_isns(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
 }
 
 static int
-iscsi_ini_sess_walk_init(mdb_walk_state_t *wsp) {
-
-
+iscsi_ini_sess_walk_init(mdb_walk_state_t *wsp)
+{
 	if (wsp->walk_addr == NULL) {
 		mdb_warn("<iscsi_sess_t addr>::walk iscsi_ini_sess");
 		return (WALK_ERR);
@@ -3240,7 +3243,8 @@ iscsi_ini_sess_walk_init(mdb_walk_state_t *wsp) {
 }
 
 static int
-iscsi_ini_sess_step(mdb_walk_state_t *wsp) {
+iscsi_ini_sess_step(mdb_walk_state_t *wsp)
+{
 	int status;
 
 	if (wsp->walk_addr == NULL) {
@@ -3263,8 +3267,8 @@ iscsi_ini_sess_step(mdb_walk_state_t *wsp) {
 }
 
 static int
-iscsi_ini_conn_walk_init(mdb_walk_state_t *wsp) {
-
+iscsi_ini_conn_walk_init(mdb_walk_state_t *wsp)
+{
 	if (wsp->walk_addr == NULL) {
 		mdb_warn("<iscsi_conn_t addr>::walk iscsi_ini_conn");
 		return (WALK_DONE);
@@ -3280,7 +3284,8 @@ iscsi_ini_conn_walk_init(mdb_walk_state_t *wsp) {
 }
 
 static int
-iscsi_ini_conn_step(mdb_walk_state_t *wsp) {
+iscsi_ini_conn_step(mdb_walk_state_t *wsp)
+{
 	int status;
 
 	if (wsp->walk_addr == NULL) {
@@ -3304,8 +3309,8 @@ iscsi_ini_conn_step(mdb_walk_state_t *wsp) {
 }
 
 static int
-iscsi_ini_lun_walk_init(mdb_walk_state_t *wsp) {
-
+iscsi_ini_lun_walk_init(mdb_walk_state_t *wsp)
+{
 	if (wsp->walk_addr == NULL) {
 		mdb_warn("<iscsi_lun_t addr>::walk iscsi_ini_lun");
 		return (WALK_DONE);
@@ -3320,7 +3325,8 @@ iscsi_ini_lun_walk_init(mdb_walk_state_t *wsp) {
 }
 
 static int
-iscsi_ini_lun_step(mdb_walk_state_t *wsp) {
+iscsi_ini_lun_step(mdb_walk_state_t *wsp)
+{
 	int status;
 
 	if (wsp->walk_addr == NULL) {
@@ -3343,8 +3349,8 @@ iscsi_ini_lun_step(mdb_walk_state_t *wsp) {
 }
 
 static int
-iscsi_ini_cmd_walk_init(mdb_walk_state_t *wsp) {
-
+iscsi_ini_cmd_walk_init(mdb_walk_state_t *wsp)
+{
 	if (wsp->walk_addr == NULL) {
 		mdb_warn("<iscsi_cmd_t addr>::walk iscsi_ini_cmd");
 		return (WALK_DONE);
@@ -3359,7 +3365,8 @@ iscsi_ini_cmd_walk_init(mdb_walk_state_t *wsp) {
 }
 
 static int
-iscsi_ini_cmd_step(mdb_walk_state_t *wsp) {
+iscsi_ini_cmd_step(mdb_walk_state_t *wsp)
+{
 	int status;
 
 	if (wsp->walk_addr == NULL) {
@@ -3382,9 +3389,8 @@ iscsi_ini_cmd_step(mdb_walk_state_t *wsp) {
 }
 
 static int
-iscsi_ini_cmd_walk_cb(uintptr_t addr, const void *vcmd,
-    void *vidc) {
-
+iscsi_ini_cmd_walk_cb(uintptr_t addr, const void *vcmd, void *vidc)
+{
 	const iscsi_cmd_t	*cmd = vcmd;
 	iscsi_dcmd_ctrl_t	*idc = vidc;
 	int			rc;
@@ -3400,7 +3406,8 @@ iscsi_ini_cmd_walk_cb(uintptr_t addr, const void *vcmd,
 }
 
 static int
-iscsi_ini_hba_walk_init(mdb_walk_state_t *wsp) {
+iscsi_ini_hba_walk_init(mdb_walk_state_t *wsp)
+{
 	uintptr_t state_addr, array_addr;
 	int array_size;
 	struct i_ddi_soft_state *ss;
@@ -3408,7 +3415,7 @@ iscsi_ini_hba_walk_init(mdb_walk_state_t *wsp) {
 
 
 	hwi = (idm_hba_walk_info_t *)mdb_zalloc(
-		sizeof (idm_hba_walk_info_t), UM_SLEEP|UM_GC);
+	    sizeof (idm_hba_walk_info_t), UM_SLEEP|UM_GC);
 
 	if (!hwi) {
 		mdb_warn("unable to allocate storage for iscsi_ini_hba walk");
@@ -3461,7 +3468,8 @@ iscsi_ini_hba_walk_init(mdb_walk_state_t *wsp) {
 }
 
 static int
-iscsi_ini_hba_step(mdb_walk_state_t *wsp) {
+iscsi_ini_hba_step(mdb_walk_state_t *wsp)
+{
 	int status;
 	idm_hba_walk_info_t *hwi = (idm_hba_walk_info_t *)wsp->walk_data;
 
diff --git a/usr/src/cmd/mdb/intel/kmdb/kmdb_dpi_isadep.c b/usr/src/cmd/mdb/intel/kmdb/kmdb_dpi_isadep.c
index 100cbe4be1..56630dd8a6 100644
--- a/usr/src/cmd/mdb/intel/kmdb/kmdb_dpi_isadep.c
+++ b/usr/src/cmd/mdb/intel/kmdb/kmdb_dpi_isadep.c
@@ -51,6 +51,7 @@ kmdb_dpi_handle_fault(kreg_t trapno, kreg_t pc, kreg_t sp, int cpuid)
 	switch (trapno) {
 	case T_GPFLT:
 		errno = EACCES;
+		break;
 	default:
 		errno = EMDB_NOMAP;
 	}
diff --git a/usr/src/cmd/zfs/zfs_main.c b/usr/src/cmd/zfs/zfs_main.c
index 7dac2f2237..d9f253fbf8 100644
--- a/usr/src/cmd/zfs/zfs_main.c
+++ b/usr/src/cmd/zfs/zfs_main.c
@@ -60,6 +60,7 @@
 #include <sys/fs/zfs.h>
 #include <sys/types.h>
 #include <time.h>
+#include <synch.h>
 
 #include <libzfs.h>
 #include <libzfs_core.h>
@@ -5839,7 +5840,12 @@ zfs_do_holds(int argc, char **argv)
 
 #define	CHECK_SPINNER 30
 #define	SPINNER_TIME 3		/* seconds */
-#define	MOUNT_TIME 5		/* seconds */
+#define	MOUNT_TIME 1		/* seconds */
+
+typedef struct get_all_state {
+	boolean_t	ga_verbose;
+	get_all_cb_t	*ga_cbp;
+} get_all_state_t;
 
 static int
 get_one_dataset(zfs_handle_t *zhp, void *data)
@@ -5848,10 +5854,10 @@ get_one_dataset(zfs_handle_t *zhp, void *data)
 	static int spinval = 0;
 	static int spincheck = 0;
 	static time_t last_spin_time = (time_t)0;
-	get_all_cb_t *cbp = data;
+	get_all_state_t *state = data;
 	zfs_type_t type = zfs_get_type(zhp);
 
-	if (cbp->cb_verbose) {
+	if (state->ga_verbose) {
 		if (--spincheck < 0) {
 			time_t now = time(NULL);
 			if (last_spin_time + SPINNER_TIME < now) {
@@ -5877,25 +5883,23 @@ get_one_dataset(zfs_handle_t *zhp, void *data)
 		zfs_close(zhp);
 		return (0);
 	}
-	libzfs_add_handle(cbp, zhp);
-	assert(cbp->cb_used <= cbp->cb_alloc);
+	libzfs_add_handle(state->ga_cbp, zhp);
+	assert(state->ga_cbp->cb_used <= state->ga_cbp->cb_alloc);
 
 	return (0);
 }
 
 static void
-get_all_datasets(zfs_handle_t ***dslist, size_t *count, boolean_t verbose)
+get_all_datasets(get_all_cb_t *cbp, boolean_t verbose)
 {
-	get_all_cb_t cb = { 0 };
-	cb.cb_verbose = verbose;
-	cb.cb_getone = get_one_dataset;
+	get_all_state_t state = {
+	    .ga_verbose = verbose,
+	    .ga_cbp = cbp
+	};
 
 	if (verbose)
 		set_progress_header(gettext("Reading ZFS config"));
-	(void) zfs_iter_root(g_zfs, get_one_dataset, &cb);
-
-	*dslist = cb.cb_handles;
-	*count = cb.cb_used;
+	(void) zfs_iter_root(g_zfs, get_one_dataset, &state);
 
 	if (verbose)
 		finish_progress(gettext("done."));
@@ -5906,8 +5910,19 @@ get_all_datasets(zfs_handle_t ***dslist, size_t *count, boolean_t verbose)
  * similar, we have a common function with an extra parameter to determine which
  * mode we are using.
  */
-#define	OP_SHARE	0x1
-#define	OP_MOUNT	0x2
+typedef enum { OP_SHARE, OP_MOUNT } share_mount_op_t;
+
+typedef struct share_mount_state {
+	share_mount_op_t	sm_op;
+	boolean_t	sm_verbose;
+	int	sm_flags;
+	char	*sm_options;
+	char	*sm_proto; /* only valid for OP_SHARE */
+	mutex_t	sm_lock; /* protects the remaining fields */
+	uint_t	sm_total; /* number of filesystems to process */
+	uint_t	sm_done; /* number of filesystems processed */
+	int	sm_status; /* -1 if any of the share/mount operations failed */
+} share_mount_state_t;
 
 /*
  * Share or mount a dataset.
@@ -6149,6 +6164,29 @@ report_mount_progress(int current, int total)
 		update_progress(info);
 }
 
+/*
+ * zfs_foreach_mountpoint() callback that mounts or shares one filesystem and
+ * updates the progress meter.
+ */
+static int
+share_mount_one_cb(zfs_handle_t *zhp, void *arg)
+{
+	share_mount_state_t *sms = arg;
+	int ret;
+
+	ret = share_mount_one(zhp, sms->sm_op, sms->sm_flags, sms->sm_proto,
+	    B_FALSE, sms->sm_options);
+
+	mutex_enter(&sms->sm_lock);
+	if (ret != 0)
+		sms->sm_status = ret;
+	sms->sm_done++;
+	if (sms->sm_verbose)
+		report_mount_progress(sms->sm_done, sms->sm_total);
+	mutex_exit(&sms->sm_lock);
+	return (ret);
+}
+
 static void
 append_options(char *mntopts, char *newopts)
 {
@@ -6221,8 +6259,6 @@ share_mount(int op, int argc, char **argv)
 
 	/* check number of arguments */
 	if (do_all) {
-		zfs_handle_t **dslist = NULL;
-		size_t i, count = 0;
 		char *protocol = NULL;
 
 		if (op == OP_SHARE && argc > 0) {
@@ -6243,33 +6279,44 @@ share_mount(int op, int argc, char **argv)
 		}
 
 		start_progress_timer();
-		get_all_datasets(&dslist, &count, verbose);
+		get_all_cb_t cb = { 0 };
+		get_all_datasets(&cb, verbose);
 
-		if (count == 0)
+		if (cb.cb_used == 0)
 			return (0);
 
-		qsort(dslist, count, sizeof (void *), libzfs_dataset_cmp);
-		sa_init_selective_arg_t sharearg;
-		sharearg.zhandle_arr = dslist;
-		sharearg.zhandle_len = count;
-		if ((ret = zfs_init_libshare_arg(zfs_get_handle(dslist[0]),
-		    SA_INIT_SHARE_API_SELECTIVE, &sharearg)) != SA_OK) {
-			(void) fprintf(stderr,
-			    gettext("Could not initialize libshare, %d"), ret);
-			return (ret);
+		if (op == OP_SHARE) {
+			sa_init_selective_arg_t sharearg;
+			sharearg.zhandle_arr = cb.cb_handles;
+			sharearg.zhandle_len = cb.cb_used;
+			if ((ret = zfs_init_libshare_arg(g_zfs,
+			    SA_INIT_SHARE_API_SELECTIVE, &sharearg)) != SA_OK) {
+				(void) fprintf(stderr, gettext(
+				    "Could not initialize libshare, %d"), ret);
+				return (ret);
+			}
 		}
 
-		for (i = 0; i < count; i++) {
-			if (verbose)
-				report_mount_progress(i, count);
-
-			if (share_mount_one(dslist[i], op, flags, protocol,
-			    B_FALSE, options) != 0)
-				ret = 1;
-			zfs_close(dslist[i]);
-		}
+		share_mount_state_t share_mount_state = { 0 };
+		share_mount_state.sm_op = op;
+		share_mount_state.sm_verbose = verbose;
+		share_mount_state.sm_flags = flags;
+		share_mount_state.sm_options = options;
+		share_mount_state.sm_proto = protocol;
+		share_mount_state.sm_total = cb.cb_used;
+		(void) mutex_init(&share_mount_state.sm_lock,
+		    LOCK_NORMAL | LOCK_ERRORCHECK, NULL);
+		/*
+		 * libshare isn't mt-safe, so only do the operation in parallel
+		 * if we're mounting.
+		 */
+		zfs_foreach_mountpoint(g_zfs, cb.cb_handles, cb.cb_used,
+		    share_mount_one_cb, &share_mount_state, op == OP_MOUNT);
+		ret = share_mount_state.sm_status;
 
-		free(dslist);
+		for (int i = 0; i < cb.cb_used; i++)
+			zfs_close(cb.cb_handles[i]);
+		free(cb.cb_handles);
 	} else if (argc == 0) {
 		struct mnttab entry;
 
@@ -6984,11 +7031,28 @@ zfs_do_diff(int argc, char **argv)
 	return (err != 0);
 }
 
+/*
+ * zfs remap <filesystem | volume>
+ *
+ * Remap the indirect blocks in the given fileystem or volume.
+ */
 static int
 zfs_do_remap(int argc, char **argv)
 {
 	const char *fsname;
 	int err = 0;
+	int c;
+
+	/* check options */
+	while ((c = getopt(argc, argv, "")) != -1) {
+		switch (c) {
+		case '?':
+			(void) fprintf(stderr,
+			    gettext("invalid option '%c'\n"), optopt);
+			usage(B_FALSE);
+		}
+	}
+
 	if (argc != 2) {
 		(void) fprintf(stderr, gettext("wrong number of arguments\n"));
 		usage(B_FALSE);
diff --git a/usr/src/lib/Makefile b/usr/src/lib/Makefile
index ee1855d850..3bdaeda439 100644
--- a/usr/src/lib/Makefile
+++ b/usr/src/lib/Makefile
@@ -680,8 +680,8 @@ libsmbfs:	libkrb5 libsec libidmap pkcs11
 libsmbios:	libdevinfo
 libsrpt:	libstmf
 libstmf:	libscf
-libstmfproxy: 	libstmf libpthread
-libsum: 	libast
+libstmfproxy:	libstmf libpthread
+libsum:		libast
 libsun_ima:	libdevinfo libsysevent
 libsysevent:	libsecdb
 libtecla:	libcurses
@@ -697,7 +697,7 @@ libvrrpadm:	libdladm libscf
 libvscan:	libscf libsecdb
 libzdoor:	libc libzonecfg libcontract
 libzfs:		libdevid libgen libuutil libadm libavl libefi libidmap \
-		libumem libtsol libzfs_core libcmdutils
+		libumem libtsol libzfs_core
 libzfs_jni:	libdiskmgt libzfs
 libzonecfg:	libuuid libsysevent libsec libbrand libpool libscf libproc \
 		libuutil libbsm libsecdb
@@ -713,7 +713,7 @@ passwdutil:	libsldap
 pkcs11:		libcryptoutil libgen libuuid
 policykit:	dbusdeps
 print:		libldap5 libmd5 libsendfile
-pylibbe: 	libbe libzfs
+pylibbe:	libbe libzfs
 pysolaris:	libsec libidmap
 pyzfs:		libzfs
 raidcfg_plugins: libraidcfg librcm libcfgadm libpicl libpicltree
diff --git a/usr/src/lib/libzfs/Makefile.com b/usr/src/lib/libzfs/Makefile.com
index c4a8af38b8..581adf9120 100644
--- a/usr/src/lib/libzfs/Makefile.com
+++ b/usr/src/lib/libzfs/Makefile.com
@@ -21,7 +21,7 @@
 #
 # Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
 # Copyright 2016 Igor Kozhukhov <ikozhukhov@gmail.com>
-# Copyright (c) 2011, 2016 by Delphix. All rights reserved.
+# Copyright (c) 2011, 2017 by Delphix. All rights reserved.
 #
 
 LIBRARY= libzfs.a
@@ -49,7 +49,8 @@ OBJS_COMMON=			\
 	libzfs_pool.o		\
 	libzfs_sendrecv.o	\
 	libzfs_status.o		\
-	libzfs_util.o
+	libzfs_util.o		\
+	libzfs_taskq.o
 
 OBJECTS= $(OBJS_COMMON) $(OBJS_SHARED)
 
diff --git a/usr/src/lib/libzfs/common/libzfs.h b/usr/src/lib/libzfs/common/libzfs.h
index 8fc19ba61e..92594c59a0 100644
--- a/usr/src/lib/libzfs/common/libzfs.h
+++ b/usr/src/lib/libzfs/common/libzfs.h
@@ -578,12 +578,11 @@ typedef struct get_all_cb {
 	zfs_handle_t	**cb_handles;
 	size_t		cb_alloc;
 	size_t		cb_used;
-	boolean_t	cb_verbose;
-	int		(*cb_getone)(zfs_handle_t *, void *);
 } get_all_cb_t;
 
+void zfs_foreach_mountpoint(libzfs_handle_t *, zfs_handle_t **, size_t,
+    zfs_iter_f, void *, boolean_t);
 void libzfs_add_handle(get_all_cb_t *, zfs_handle_t *);
-int libzfs_dataset_cmp(const void *, const void *);
 
 /*
  * Functions to create and destroy datasets.
diff --git a/usr/src/lib/libzfs/common/libzfs_dataset.c b/usr/src/lib/libzfs/common/libzfs_dataset.c
index 79df1aa994..54018af2c6 100644
--- a/usr/src/lib/libzfs/common/libzfs_dataset.c
+++ b/usr/src/lib/libzfs/common/libzfs_dataset.c
@@ -54,6 +54,7 @@
 #include <idmap.h>
 #include <aclutils.h>
 #include <directory.h>
+#include <time.h>
 
 #include <sys/dnode.h>
 #include <sys/spa.h>
@@ -789,6 +790,8 @@ libzfs_mnttab_cache_compare(const void *arg1, const void *arg2)
 void
 libzfs_mnttab_init(libzfs_handle_t *hdl)
 {
+	(void) mutex_init(&hdl->libzfs_mnttab_cache_lock,
+	    LOCK_NORMAL | LOCK_ERRORCHECK, NULL);
 	assert(avl_numnodes(&hdl->libzfs_mnttab_cache) == 0);
 	avl_create(&hdl->libzfs_mnttab_cache, libzfs_mnttab_cache_compare,
 	    sizeof (mnttab_node_t), offsetof(mnttab_node_t, mtn_node));
@@ -829,6 +832,7 @@ libzfs_mnttab_fini(libzfs_handle_t *hdl)
 		free(mtn);
 	}
 	avl_destroy(&hdl->libzfs_mnttab_cache);
+	(void) mutex_destroy(&hdl->libzfs_mnttab_cache_lock);
 }
 
 void
@@ -843,6 +847,7 @@ libzfs_mnttab_find(libzfs_handle_t *hdl, const char *fsname,
 {
 	mnttab_node_t find;
 	mnttab_node_t *mtn;
+	int ret = ENOENT;
 
 	if (!hdl->libzfs_mnttab_enable) {
 		struct mnttab srch = { 0 };
@@ -858,6 +863,7 @@ libzfs_mnttab_find(libzfs_handle_t *hdl, const char *fsname,
 			return (ENOENT);
 	}
 
+	mutex_enter(&hdl->libzfs_mnttab_cache_lock);
 	if (avl_numnodes(&hdl->libzfs_mnttab_cache) == 0)
 		libzfs_mnttab_update(hdl);
 
@@ -865,9 +871,10 @@ libzfs_mnttab_find(libzfs_handle_t *hdl, const char *fsname,
 	mtn = avl_find(&hdl->libzfs_mnttab_cache, &find, NULL);
 	if (mtn) {
 		*entry = mtn->mtn_mt;
-		return (0);
+		ret = 0;
 	}
-	return (ENOENT);
+	mutex_exit(&hdl->libzfs_mnttab_cache_lock);
+	return (ret);
 }
 
 void
@@ -876,14 +883,16 @@ libzfs_mnttab_add(libzfs_handle_t *hdl, const char *special,
 {
 	mnttab_node_t *mtn;
 
-	if (avl_numnodes(&hdl->libzfs_mnttab_cache) == 0)
-		return;
-	mtn = zfs_alloc(hdl, sizeof (mnttab_node_t));
-	mtn->mtn_mt.mnt_special = zfs_strdup(hdl, special);
-	mtn->mtn_mt.mnt_mountp = zfs_strdup(hdl, mountp);
-	mtn->mtn_mt.mnt_fstype = zfs_strdup(hdl, MNTTYPE_ZFS);
-	mtn->mtn_mt.mnt_mntopts = zfs_strdup(hdl, mntopts);
-	avl_add(&hdl->libzfs_mnttab_cache, mtn);
+	mutex_enter(&hdl->libzfs_mnttab_cache_lock);
+	if (avl_numnodes(&hdl->libzfs_mnttab_cache) != 0) {
+		mtn = zfs_alloc(hdl, sizeof (mnttab_node_t));
+		mtn->mtn_mt.mnt_special = zfs_strdup(hdl, special);
+		mtn->mtn_mt.mnt_mountp = zfs_strdup(hdl, mountp);
+		mtn->mtn_mt.mnt_fstype = zfs_strdup(hdl, MNTTYPE_ZFS);
+		mtn->mtn_mt.mnt_mntopts = zfs_strdup(hdl, mntopts);
+		avl_add(&hdl->libzfs_mnttab_cache, mtn);
+	}
+	mutex_exit(&hdl->libzfs_mnttab_cache_lock);
 }
 
 void
@@ -892,6 +901,7 @@ libzfs_mnttab_remove(libzfs_handle_t *hdl, const char *fsname)
 	mnttab_node_t find;
 	mnttab_node_t *ret;
 
+	mutex_enter(&hdl->libzfs_mnttab_cache_lock);
 	find.mtn_mt.mnt_special = (char *)fsname;
 	if ((ret = avl_find(&hdl->libzfs_mnttab_cache, (void *)&find, NULL))
 	    != NULL) {
@@ -902,6 +912,7 @@ libzfs_mnttab_remove(libzfs_handle_t *hdl, const char *fsname)
 		free(ret->mtn_mt.mnt_mntopts);
 		free(ret);
 	}
+	mutex_exit(&hdl->libzfs_mnttab_cache_lock);
 }
 
 int
@@ -3886,12 +3897,24 @@ zfs_remap_indirects(libzfs_handle_t *hdl, const char *fs)
 	char errbuf[1024];
 
 	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
-	    "cannot remap filesystem '%s' "), fs);
+	    "cannot remap dataset '%s'"), fs);
 
 	err = lzc_remap(fs);
 
 	if (err != 0) {
-		(void) zfs_standard_error(hdl, err, errbuf);
+		switch (err) {
+		case ENOTSUP:
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "pool must be upgraded"));
+			(void) zfs_error(hdl, EZFS_BADVERSION, errbuf);
+			break;
+		case EINVAL:
+			(void) zfs_error(hdl, EZFS_BADTYPE, errbuf);
+			break;
+		default:
+			(void) zfs_standard_error(hdl, err, errbuf);
+			break;
+		}
 	}
 
 	return (err);
diff --git a/usr/src/lib/libzfs/common/libzfs_impl.h b/usr/src/lib/libzfs/common/libzfs_impl.h
index 9e5641ec46..4c0c89e989 100644
--- a/usr/src/lib/libzfs/common/libzfs_impl.h
+++ b/usr/src/lib/libzfs/common/libzfs_impl.h
@@ -23,7 +23,7 @@
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2011 Pawel Jakub Dawidek. All rights reserved.
  * Copyright (c) 2012, Joyent, Inc. All rights reserved.
- * Copyright (c) 2011, 2016 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2017 by Delphix. All rights reserved.
  */
 
 #ifndef	_LIBZFS_IMPL_H
@@ -34,6 +34,7 @@
 #include <sys/nvpair.h>
 #include <sys/dmu.h>
 #include <sys/zfs_ioctl.h>
+#include <synch.h>
 
 #include <libuutil.h>
 #include <libzfs.h>
@@ -74,6 +75,13 @@ struct libzfs_handle {
 	int libzfs_storeerr; /* stuff error messages into buffer */
 	void *libzfs_sharehdl; /* libshare handle */
 	boolean_t libzfs_mnttab_enable;
+	/*
+	 * We need a lock to handle the case where parallel mount
+	 * threads are populating the mnttab cache simultaneously. The
+	 * lock only protects the integrity of the avl tree, and does
+	 * not protect the contents of the mnttab entries themselves.
+	 */
+	mutex_t libzfs_mnttab_cache_lock;
 	avl_tree_t libzfs_mnttab_cache;
 	int libzfs_pool_iter;
 	topo_hdl_t *libzfs_topo_hdl;
diff --git a/usr/src/lib/libzfs/common/libzfs_mount.c b/usr/src/lib/libzfs/common/libzfs_mount.c
index 9fd37825a3..cf15735f3f 100644
--- a/usr/src/lib/libzfs/common/libzfs_mount.c
+++ b/usr/src/lib/libzfs/common/libzfs_mount.c
@@ -22,7 +22,7 @@
 /*
  * Copyright 2015 Nexenta Systems, Inc.  All rights reserved.
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2014, 2016 by Delphix. All rights reserved.
+ * Copyright (c) 2014, 2017 by Delphix. All rights reserved.
  * Copyright 2016 Igor Kozhukhov <ikozhukhov@gmail.com>
  * Copyright 2017 Joyent, Inc.
  * Copyright 2017 RackTop Systems.
@@ -34,25 +34,25 @@
  * they are used by mount and unmount and when changing a filesystem's
  * mountpoint.
  *
- * 	zfs_is_mounted()
- * 	zfs_mount()
- * 	zfs_unmount()
- * 	zfs_unmountall()
+ *	zfs_is_mounted()
+ *	zfs_mount()
+ *	zfs_unmount()
+ *	zfs_unmountall()
  *
  * This file also contains the functions used to manage sharing filesystems via
  * NFS and iSCSI:
  *
- * 	zfs_is_shared()
- * 	zfs_share()
- * 	zfs_unshare()
+ *	zfs_is_shared()
+ *	zfs_share()
+ *	zfs_unshare()
  *
- * 	zfs_is_shared_nfs()
- * 	zfs_is_shared_smb()
- * 	zfs_share_proto()
- * 	zfs_shareall();
- * 	zfs_unshare_nfs()
- * 	zfs_unshare_smb()
- * 	zfs_unshareall_nfs()
+ *	zfs_is_shared_nfs()
+ *	zfs_is_shared_smb()
+ *	zfs_share_proto()
+ *	zfs_shareall();
+ *	zfs_unshare_nfs()
+ *	zfs_unshare_smb()
+ *	zfs_unshareall_nfs()
  *	zfs_unshareall_smb()
  *	zfs_unshareall()
  *	zfs_unshareall_bypath()
@@ -60,8 +60,8 @@
  * The following functions are available for pool consumers, and will
  * mount/unmount and share/unshare all datasets within pool:
  *
- * 	zpool_enable_datasets()
- * 	zpool_disable_datasets()
+ *	zpool_enable_datasets()
+ *	zpool_disable_datasets()
  */
 
 #include <dirent.h>
@@ -83,11 +83,15 @@
 #include <libzfs.h>
 
 #include "libzfs_impl.h"
+#include "libzfs_taskq.h"
 
 #include <libshare.h>
 #include <sys/systeminfo.h>
 #define	MAXISALEN	257	/* based on sysinfo(2) man page */
 
+static int mount_tq_nthr = 512;	/* taskq threads for multi-threaded mounting */
+
+static void zfs_mount_task(void *);
 static int zfs_share_proto(zfs_handle_t *, zfs_share_proto_t *);
 zfs_share_type_t zfs_is_shared_proto(zfs_handle_t *, char **,
     zfs_share_proto_t);
@@ -1077,25 +1081,32 @@ remove_mountpoint(zfs_handle_t *zhp)
 	}
 }
 
+/*
+ * Add the given zfs handle to the cb_handles array, dynamically reallocating
+ * the array if it is out of space.
+ */
 void
 libzfs_add_handle(get_all_cb_t *cbp, zfs_handle_t *zhp)
 {
 	if (cbp->cb_alloc == cbp->cb_used) {
 		size_t newsz;
-		void *ptr;
+		zfs_handle_t **newhandles;
 
-		newsz = cbp->cb_alloc ? cbp->cb_alloc * 2 : 64;
-		ptr = zfs_realloc(zhp->zfs_hdl,
-		    cbp->cb_handles, cbp->cb_alloc * sizeof (void *),
-		    newsz * sizeof (void *));
-		cbp->cb_handles = ptr;
+		newsz = cbp->cb_alloc != 0 ? cbp->cb_alloc * 2 : 64;
+		newhandles = zfs_realloc(zhp->zfs_hdl,
+		    cbp->cb_handles, cbp->cb_alloc * sizeof (zfs_handle_t *),
+		    newsz * sizeof (zfs_handle_t *));
+		cbp->cb_handles = newhandles;
 		cbp->cb_alloc = newsz;
 	}
 	cbp->cb_handles[cbp->cb_used++] = zhp;
 }
 
+/*
+ * Recursive helper function used during file system enumeration
+ */
 static int
-mount_cb(zfs_handle_t *zhp, void *data)
+zfs_iter_cb(zfs_handle_t *zhp, void *data)
 {
 	get_all_cb_t *cbp = data;
 
@@ -1121,104 +1132,350 @@ mount_cb(zfs_handle_t *zhp, void *data)
 	}
 
 	libzfs_add_handle(cbp, zhp);
-	if (zfs_iter_filesystems(zhp, mount_cb, cbp) != 0) {
+	if (zfs_iter_filesystems(zhp, zfs_iter_cb, cbp) != 0) {
 		zfs_close(zhp);
 		return (-1);
 	}
 	return (0);
 }
 
+/*
+ * Sort comparator that compares two mountpoint paths. We sort these paths so
+ * that subdirectories immediately follow their parents. This means that we
+ * effectively treat the '/' character as the lowest value non-nul char. An
+ * example sorted list using this comparator would look like:
+ *
+ * /foo
+ * /foo/bar
+ * /foo/bar/baz
+ * /foo/baz
+ * /foo.bar
+ *
+ * The mounting code depends on this ordering to deterministically iterate
+ * over filesystems in order to spawn parallel mount tasks.
+ */
 int
-libzfs_dataset_cmp(const void *a, const void *b)
+mountpoint_cmp(const void *arga, const void *argb)
 {
-	zfs_handle_t **za = (zfs_handle_t **)a;
-	zfs_handle_t **zb = (zfs_handle_t **)b;
+	zfs_handle_t *const *zap = arga;
+	zfs_handle_t *za = *zap;
+	zfs_handle_t *const *zbp = argb;
+	zfs_handle_t *zb = *zbp;
 	char mounta[MAXPATHLEN];
 	char mountb[MAXPATHLEN];
+	const char *a = mounta;
+	const char *b = mountb;
 	boolean_t gota, gotb;
 
-	if ((gota = (zfs_get_type(*za) == ZFS_TYPE_FILESYSTEM)) != 0)
-		verify(zfs_prop_get(*za, ZFS_PROP_MOUNTPOINT, mounta,
+	gota = (zfs_get_type(za) == ZFS_TYPE_FILESYSTEM);
+	if (gota) {
+		verify(zfs_prop_get(za, ZFS_PROP_MOUNTPOINT, mounta,
 		    sizeof (mounta), NULL, NULL, 0, B_FALSE) == 0);
-	if ((gotb = (zfs_get_type(*zb) == ZFS_TYPE_FILESYSTEM)) != 0)
-		verify(zfs_prop_get(*zb, ZFS_PROP_MOUNTPOINT, mountb,
+	}
+	gotb = (zfs_get_type(zb) == ZFS_TYPE_FILESYSTEM);
+	if (gotb) {
+		verify(zfs_prop_get(zb, ZFS_PROP_MOUNTPOINT, mountb,
 		    sizeof (mountb), NULL, NULL, 0, B_FALSE) == 0);
+	}
 
-	if (gota && gotb)
-		return (strcmp(mounta, mountb));
+	if (gota && gotb) {
+		while (*a != '\0' && (*a == *b)) {
+			a++;
+			b++;
+		}
+		if (*a == *b)
+			return (0);
+		if (*a == '\0')
+			return (-1);
+		if (*b == '\0')
+			return (1);
+		if (*a == '/')
+			return (-1);
+		if (*b == '/')
+			return (1);
+		return (*a < *b ? -1 : *a > *b);
+	}
 
 	if (gota)
 		return (-1);
 	if (gotb)
 		return (1);
 
-	return (strcmp(zfs_get_name(a), zfs_get_name(b)));
+	/*
+	 * If neither filesystem has a mountpoint, revert to sorting by
+	 * dataset name.
+	 */
+	return (strcmp(zfs_get_name(za), zfs_get_name(zb)));
+}
+
+/*
+ * Return true if path2 is a child of path1.
+ */
+static boolean_t
+libzfs_path_contains(const char *path1, const char *path2)
+{
+	return (strstr(path2, path1) == path2 && path2[strlen(path1)] == '/');
+}
+
+/*
+ * Given a mountpoint specified by idx in the handles array, find the first
+ * non-descendent of that mountpoint and return its index. Descendant paths
+ * start with the parent's path. This function relies on the ordering
+ * enforced by mountpoint_cmp().
+ */
+static int
+non_descendant_idx(zfs_handle_t **handles, size_t num_handles, int idx)
+{
+	char parent[ZFS_MAXPROPLEN];
+	char child[ZFS_MAXPROPLEN];
+	int i;
+
+	verify(zfs_prop_get(handles[idx], ZFS_PROP_MOUNTPOINT, parent,
+	    sizeof (parent), NULL, NULL, 0, B_FALSE) == 0);
+
+	for (i = idx + 1; i < num_handles; i++) {
+		verify(zfs_prop_get(handles[i], ZFS_PROP_MOUNTPOINT, child,
+		    sizeof (child), NULL, NULL, 0, B_FALSE) == 0);
+		if (!libzfs_path_contains(parent, child))
+			break;
+	}
+	return (i);
+}
+
+typedef struct mnt_param {
+	libzfs_handle_t	*mnt_hdl;
+	zfs_taskq_t	*mnt_tq;
+	zfs_handle_t	**mnt_zhps; /* filesystems to mount */
+	size_t		mnt_num_handles;
+	int		mnt_idx;	/* Index of selected entry to mount */
+	zfs_iter_f	mnt_func;
+	void		*mnt_data;
+} mnt_param_t;
+
+/*
+ * Allocate and populate the parameter struct for mount function, and
+ * schedule mounting of the entry selected by idx.
+ */
+static void
+zfs_dispatch_mount(libzfs_handle_t *hdl, zfs_handle_t **handles,
+    size_t num_handles, int idx, zfs_iter_f func, void *data, zfs_taskq_t *tq)
+{
+	mnt_param_t *mnt_param = zfs_alloc(hdl, sizeof (mnt_param_t));
+
+	mnt_param->mnt_hdl = hdl;
+	mnt_param->mnt_tq = tq;
+	mnt_param->mnt_zhps = handles;
+	mnt_param->mnt_num_handles = num_handles;
+	mnt_param->mnt_idx = idx;
+	mnt_param->mnt_func = func;
+	mnt_param->mnt_data = data;
+
+	(void) zfs_taskq_dispatch(tq, zfs_mount_task, (void*)mnt_param,
+	    ZFS_TQ_SLEEP);
+}
+
+/*
+ * This is the structure used to keep state of mounting or sharing operations
+ * during a call to zpool_enable_datasets().
+ */
+typedef struct mount_state {
+	/*
+	 * ms_mntstatus is set to -1 if any mount fails. While multiple threads
+	 * could update this variable concurrently, no synchronization is
+	 * needed as it's only ever set to -1.
+	 */
+	int		ms_mntstatus;
+	int		ms_mntflags;
+	const char	*ms_mntopts;
+} mount_state_t;
+
+static int
+zfs_mount_one(zfs_handle_t *zhp, void *arg)
+{
+	mount_state_t *ms = arg;
+	int ret = 0;
+
+	if (zfs_mount(zhp, ms->ms_mntopts, ms->ms_mntflags) != 0)
+		ret = ms->ms_mntstatus = -1;
+	return (ret);
+}
+
+static int
+zfs_share_one(zfs_handle_t *zhp, void *arg)
+{
+	mount_state_t *ms = arg;
+	int ret = 0;
+
+	if (zfs_share(zhp) != 0)
+		ret = ms->ms_mntstatus = -1;
+	return (ret);
+}
+
+/*
+ * Task queue function to mount one file system. On completion, it finds and
+ * schedules its children to be mounted. This depends on the sorting done in
+ * zfs_foreach_mountpoint(). Note that the degenerate case (chain of entries
+ * each descending from the previous) will have no parallelism since we always
+ * have to wait for the parent to finish mounting before we can schedule
+ * its children.
+ */
+static void
+zfs_mount_task(void *arg)
+{
+	mnt_param_t *mp = arg;
+	int idx = mp->mnt_idx;
+	zfs_handle_t **handles = mp->mnt_zhps;
+	size_t num_handles = mp->mnt_num_handles;
+	char mountpoint[ZFS_MAXPROPLEN];
+
+	verify(zfs_prop_get(handles[idx], ZFS_PROP_MOUNTPOINT, mountpoint,
+	    sizeof (mountpoint), NULL, NULL, 0, B_FALSE) == 0);
+
+	if (mp->mnt_func(handles[idx], mp->mnt_data) != 0)
+		return;
+
+	/*
+	 * We dispatch tasks to mount filesystems with mountpoints underneath
+	 * this one. We do this by dispatching the next filesystem with a
+	 * descendant mountpoint of the one we just mounted, then skip all of
+	 * its descendants, dispatch the next descendant mountpoint, and so on.
+	 * The non_descendant_idx() function skips over filesystems that are
+	 * descendants of the filesystem we just dispatched.
+	 */
+	for (int i = idx + 1; i < num_handles;
+	    i = non_descendant_idx(handles, num_handles, i)) {
+		char child[ZFS_MAXPROPLEN];
+		verify(zfs_prop_get(handles[i], ZFS_PROP_MOUNTPOINT,
+		    child, sizeof (child), NULL, NULL, 0, B_FALSE) == 0);
+
+		if (!libzfs_path_contains(mountpoint, child))
+			break; /* not a descendant, return */
+		zfs_dispatch_mount(mp->mnt_hdl, handles, num_handles, i,
+		    mp->mnt_func, mp->mnt_data, mp->mnt_tq);
+	}
+	free(mp);
+}
+
+/*
+ * Issue the func callback for each ZFS handle contained in the handles
+ * array. This function is used to mount all datasets, and so this function
+ * guarantees that filesystems for parent mountpoints are called before their
+ * children. As such, before issuing any callbacks, we first sort the array
+ * of handles by mountpoint.
+ *
+ * Callbacks are issued in one of two ways:
+ *
+ * 1. Sequentially: If the parallel argument is B_FALSE or the ZFS_SERIAL_MOUNT
+ *    environment variable is set, then we issue callbacks sequentially.
+ *
+ * 2. In parallel: If the parallel argument is B_TRUE and the ZFS_SERIAL_MOUNT
+ *    environment variable is not set, then we use a taskq to dispatch threads
+ *    to mount filesystems is parallel. This function dispatches tasks to mount
+ *    the filesystems at the top-level mountpoints, and these tasks in turn
+ *    are responsible for recursively mounting filesystems in their children
+ *    mountpoints.
+ */
+void
+zfs_foreach_mountpoint(libzfs_handle_t *hdl, zfs_handle_t **handles,
+    size_t num_handles, zfs_iter_f func, void *data, boolean_t parallel)
+{
+	/*
+	 * The ZFS_SERIAL_MOUNT environment variable is an undocumented
+	 * variable that can be used as a convenience to do a/b comparison
+	 * of serial vs. parallel mounting.
+	 */
+	boolean_t serial_mount = !parallel ||
+	    (getenv("ZFS_SERIAL_MOUNT") != NULL);
+
+	/*
+	 * Sort the datasets by mountpoint. See mountpoint_cmp for details
+	 * of how these are sorted.
+	 */
+	qsort(handles, num_handles, sizeof (zfs_handle_t *), mountpoint_cmp);
+
+	if (serial_mount) {
+		for (int i = 0; i < num_handles; i++) {
+			func(handles[i], data);
+		}
+		return;
+	}
+
+	/*
+	 * Issue the callback function for each dataset using a parallel
+	 * algorithm that uses a taskq to manage threads.
+	 */
+	zfs_taskq_t *tq = zfs_taskq_create("mount_taskq", mount_tq_nthr, 0,
+	    mount_tq_nthr, mount_tq_nthr, ZFS_TASKQ_PREPOPULATE);
+
+	/*
+	 * There may be multiple "top level" mountpoints outside of the pool's
+	 * root mountpoint, e.g.: /foo /bar. Dispatch a mount task for each of
+	 * these.
+	 */
+	for (int i = 0; i < num_handles;
+	    i = non_descendant_idx(handles, num_handles, i)) {
+		zfs_dispatch_mount(hdl, handles, num_handles, i, func, data,
+		    tq);
+	}
+
+	zfs_taskq_wait(tq); /* wait for all scheduled mounts to complete */
+	zfs_taskq_destroy(tq);
 }
 
 /*
  * Mount and share all datasets within the given pool.  This assumes that no
- * datasets within the pool are currently mounted.  Because users can create
- * complicated nested hierarchies of mountpoints, we first gather all the
- * datasets and mountpoints within the pool, and sort them by mountpoint.  Once
- * we have the list of all filesystems, we iterate over them in order and mount
- * and/or share each one.
+ * datasets within the pool are currently mounted.
  */
 #pragma weak zpool_mount_datasets = zpool_enable_datasets
 int
 zpool_enable_datasets(zpool_handle_t *zhp, const char *mntopts, int flags)
 {
 	get_all_cb_t cb = { 0 };
-	libzfs_handle_t *hdl = zhp->zpool_hdl;
+	mount_state_t ms = { 0 };
 	zfs_handle_t *zfsp;
-	int i, ret = -1;
-	int *good;
+	sa_init_selective_arg_t sharearg;
+	int ret = 0;
 
-	/*
-	 * Gather all non-snap datasets within the pool.
-	 */
-	if ((zfsp = zfs_open(hdl, zhp->zpool_name, ZFS_TYPE_DATASET)) == NULL)
+	if ((zfsp = zfs_open(zhp->zpool_hdl, zhp->zpool_name,
+	    ZFS_TYPE_DATASET)) == NULL)
 		goto out;
 
-	libzfs_add_handle(&cb, zfsp);
-	if (zfs_iter_filesystems(zfsp, mount_cb, &cb) != 0)
-		goto out;
-	/*
-	 * Sort the datasets by mountpoint.
-	 */
-	qsort(cb.cb_handles, cb.cb_used, sizeof (void *),
-	    libzfs_dataset_cmp);
 
 	/*
-	 * And mount all the datasets, keeping track of which ones
-	 * succeeded or failed.
+	 * Gather all non-snapshot datasets within the pool. Start by adding
+	 * the root filesystem for this pool to the list, and then iterate
+	 * over all child filesystems.
 	 */
-	if ((good = zfs_alloc(zhp->zpool_hdl,
-	    cb.cb_used * sizeof (int))) == NULL)
+	libzfs_add_handle(&cb, zfsp);
+	if (zfs_iter_filesystems(zfsp, zfs_iter_cb, &cb) != 0)
 		goto out;
 
-	ret = 0;
-	for (i = 0; i < cb.cb_used; i++) {
-		if (zfs_mount(cb.cb_handles[i], mntopts, flags) != 0)
-			ret = -1;
-		else
-			good[i] = 1;
-	}
+	ms.ms_mntopts = mntopts;
+	ms.ms_mntflags = flags;
+	zfs_foreach_mountpoint(zhp->zpool_hdl, cb.cb_handles, cb.cb_used,
+	    zfs_mount_one, &ms, B_TRUE);
+	if (ms.ms_mntstatus != 0)
+		ret = ms.ms_mntstatus;
 
 	/*
-	 * Then share all the ones that need to be shared. This needs
-	 * to be a separate pass in order to avoid excessive reloading
-	 * of the configuration. Good should never be NULL since
-	 * zfs_alloc is supposed to exit if memory isn't available.
+	 * Share all filesystems that need to be shared. This needs to be
+	 * a separate pass because libshare is not mt-safe, and so we need
+	 * to share serially.
 	 */
-	for (i = 0; i < cb.cb_used; i++) {
-		if (good[i] && zfs_share(cb.cb_handles[i]) != 0)
-			ret = -1;
-	}
+	sharearg.zhandle_arr = cb.cb_handles;
+	sharearg.zhandle_len = cb.cb_used;
+	if ((ret = zfs_init_libshare_arg(zhp->zpool_hdl,
+	    SA_INIT_SHARE_API_SELECTIVE, &sharearg)) != 0)
+		goto out;
 
-	free(good);
+	ms.ms_mntstatus = 0;
+	zfs_foreach_mountpoint(zhp->zpool_hdl, cb.cb_handles, cb.cb_used,
+	    zfs_share_one, &ms, B_FALSE);
+	if (ms.ms_mntstatus != 0)
+		ret = ms.ms_mntstatus;
 
 out:
-	for (i = 0; i < cb.cb_used; i++)
+	for (int i = 0; i < cb.cb_used; i++)
 		zfs_close(cb.cb_handles[i]);
 	free(cb.cb_handles);
 
diff --git a/usr/src/lib/libzfs/common/libzfs_taskq.c b/usr/src/lib/libzfs/common/libzfs_taskq.c
new file mode 100644
index 0000000000..28bf649710
--- /dev/null
+++ b/usr/src/lib/libzfs/common/libzfs_taskq.c
@@ -0,0 +1,297 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
+ * Copyright 2012 Garrett D'Amore <garrett@damore.org>.  All rights reserved.
+ * Copyright (c) 2014, 2018 by Delphix. All rights reserved.
+ */
+
+#include <thread.h>
+#include <synch.h>
+#include <unistd.h>
+#include <string.h>
+#include <errno.h>
+#include <sys/debug.h>
+#include <sys/sysmacros.h>
+
+#include "libzfs_taskq.h"
+
+#define	ZFS_TASKQ_ACTIVE	0x00010000
+#define	ZFS_TASKQ_NAMELEN	31
+
+typedef struct zfs_taskq_ent {
+	struct zfs_taskq_ent	*ztqent_next;
+	struct zfs_taskq_ent	*ztqent_prev;
+	ztask_func_t		*ztqent_func;
+	void			*ztqent_arg;
+	uintptr_t		ztqent_flags;
+} zfs_taskq_ent_t;
+
+struct zfs_taskq {
+	char		ztq_name[ZFS_TASKQ_NAMELEN + 1];
+	mutex_t		ztq_lock;
+	rwlock_t	ztq_threadlock;
+	cond_t		ztq_dispatch_cv;
+	cond_t		ztq_wait_cv;
+	thread_t	*ztq_threadlist;
+	int		ztq_flags;
+	int		ztq_active;
+	int		ztq_nthreads;
+	int		ztq_nalloc;
+	int		ztq_minalloc;
+	int		ztq_maxalloc;
+	cond_t		ztq_maxalloc_cv;
+	int		ztq_maxalloc_wait;
+	zfs_taskq_ent_t	*ztq_freelist;
+	zfs_taskq_ent_t	ztq_task;
+};
+
+static zfs_taskq_ent_t *
+ztask_alloc(zfs_taskq_t *ztq, int ztqflags)
+{
+	zfs_taskq_ent_t *t;
+	timestruc_t ts;
+	int err;
+
+again:	if ((t = ztq->ztq_freelist) != NULL &&
+	    ztq->ztq_nalloc >= ztq->ztq_minalloc) {
+		ztq->ztq_freelist = t->ztqent_next;
+	} else {
+		if (ztq->ztq_nalloc >= ztq->ztq_maxalloc) {
+			if (!(ztqflags & UMEM_NOFAIL))
+				return (NULL);
+
+			/*
+			 * We don't want to exceed ztq_maxalloc, but we can't
+			 * wait for other tasks to complete (and thus free up
+			 * task structures) without risking deadlock with
+			 * the caller.  So, we just delay for one second
+			 * to throttle the allocation rate. If we have tasks
+			 * complete before one second timeout expires then
+			 * zfs_taskq_ent_free will signal us and we will
+			 * immediately retry the allocation.
+			 */
+			ztq->ztq_maxalloc_wait++;
+
+			ts.tv_sec = 1;
+			ts.tv_nsec = 0;
+			err = cond_reltimedwait(&ztq->ztq_maxalloc_cv,
+			    &ztq->ztq_lock, &ts);
+
+			ztq->ztq_maxalloc_wait--;
+			if (err == 0)
+				goto again;		/* signaled */
+		}
+		mutex_exit(&ztq->ztq_lock);
+
+		t = umem_alloc(sizeof (zfs_taskq_ent_t), ztqflags);
+
+		mutex_enter(&ztq->ztq_lock);
+		if (t != NULL)
+			ztq->ztq_nalloc++;
+	}
+	return (t);
+}
+
+static void
+ztask_free(zfs_taskq_t *ztq, zfs_taskq_ent_t *t)
+{
+	if (ztq->ztq_nalloc <= ztq->ztq_minalloc) {
+		t->ztqent_next = ztq->ztq_freelist;
+		ztq->ztq_freelist = t;
+	} else {
+		ztq->ztq_nalloc--;
+		mutex_exit(&ztq->ztq_lock);
+		umem_free(t, sizeof (zfs_taskq_ent_t));
+		mutex_enter(&ztq->ztq_lock);
+	}
+
+	if (ztq->ztq_maxalloc_wait)
+		VERIFY0(cond_signal(&ztq->ztq_maxalloc_cv));
+}
+
+zfs_taskqid_t
+zfs_taskq_dispatch(zfs_taskq_t *ztq, ztask_func_t func, void *arg,
+    uint_t ztqflags)
+{
+	zfs_taskq_ent_t *t;
+
+	mutex_enter(&ztq->ztq_lock);
+	ASSERT(ztq->ztq_flags & ZFS_TASKQ_ACTIVE);
+	if ((t = ztask_alloc(ztq, ztqflags)) == NULL) {
+		mutex_exit(&ztq->ztq_lock);
+		return (0);
+	}
+	if (ztqflags & ZFS_TQ_FRONT) {
+		t->ztqent_next = ztq->ztq_task.ztqent_next;
+		t->ztqent_prev = &ztq->ztq_task;
+	} else {
+		t->ztqent_next = &ztq->ztq_task;
+		t->ztqent_prev = ztq->ztq_task.ztqent_prev;
+	}
+	t->ztqent_next->ztqent_prev = t;
+	t->ztqent_prev->ztqent_next = t;
+	t->ztqent_func = func;
+	t->ztqent_arg = arg;
+	t->ztqent_flags = 0;
+	VERIFY0(cond_signal(&ztq->ztq_dispatch_cv));
+	mutex_exit(&ztq->ztq_lock);
+	return (1);
+}
+
+void
+zfs_taskq_wait(zfs_taskq_t *ztq)
+{
+	mutex_enter(&ztq->ztq_lock);
+	while (ztq->ztq_task.ztqent_next != &ztq->ztq_task ||
+	    ztq->ztq_active != 0) {
+		int ret = cond_wait(&ztq->ztq_wait_cv, &ztq->ztq_lock);
+		VERIFY(ret == 0 || ret == EINTR);
+	}
+	mutex_exit(&ztq->ztq_lock);
+}
+
+static void *
+zfs_taskq_thread(void *arg)
+{
+	zfs_taskq_t *ztq = arg;
+	zfs_taskq_ent_t *t;
+	boolean_t prealloc;
+
+	mutex_enter(&ztq->ztq_lock);
+	while (ztq->ztq_flags & ZFS_TASKQ_ACTIVE) {
+		if ((t = ztq->ztq_task.ztqent_next) == &ztq->ztq_task) {
+			int ret;
+			if (--ztq->ztq_active == 0)
+				VERIFY0(cond_broadcast(&ztq->ztq_wait_cv));
+			ret = cond_wait(&ztq->ztq_dispatch_cv, &ztq->ztq_lock);
+			VERIFY(ret == 0 || ret == EINTR);
+			ztq->ztq_active++;
+			continue;
+		}
+		t->ztqent_prev->ztqent_next = t->ztqent_next;
+		t->ztqent_next->ztqent_prev = t->ztqent_prev;
+		t->ztqent_next = NULL;
+		t->ztqent_prev = NULL;
+		prealloc = t->ztqent_flags & ZFS_TQENT_FLAG_PREALLOC;
+		mutex_exit(&ztq->ztq_lock);
+
+		VERIFY0(rw_rdlock(&ztq->ztq_threadlock));
+		t->ztqent_func(t->ztqent_arg);
+		VERIFY0(rw_unlock(&ztq->ztq_threadlock));
+
+		mutex_enter(&ztq->ztq_lock);
+		if (!prealloc)
+			ztask_free(ztq, t);
+	}
+	ztq->ztq_nthreads--;
+	VERIFY0(cond_broadcast(&ztq->ztq_wait_cv));
+	mutex_exit(&ztq->ztq_lock);
+	return (NULL);
+}
+
+/*ARGSUSED*/
+zfs_taskq_t *
+zfs_taskq_create(const char *name, int nthreads, pri_t pri, int minalloc,
+    int maxalloc, uint_t flags)
+{
+	zfs_taskq_t *ztq = umem_zalloc(sizeof (zfs_taskq_t), UMEM_NOFAIL);
+	int t;
+
+	ASSERT3S(nthreads, >=, 1);
+
+	VERIFY0(rwlock_init(&ztq->ztq_threadlock, USYNC_THREAD, NULL));
+	VERIFY0(cond_init(&ztq->ztq_dispatch_cv, USYNC_THREAD, NULL));
+	VERIFY0(cond_init(&ztq->ztq_wait_cv, USYNC_THREAD, NULL));
+	VERIFY0(cond_init(&ztq->ztq_maxalloc_cv, USYNC_THREAD, NULL));
+	VERIFY0(mutex_init(
+	    &ztq->ztq_lock, LOCK_NORMAL | LOCK_ERRORCHECK, NULL));
+
+	(void) strncpy(ztq->ztq_name, name, ZFS_TASKQ_NAMELEN + 1);
+
+	ztq->ztq_flags = flags | ZFS_TASKQ_ACTIVE;
+	ztq->ztq_active = nthreads;
+	ztq->ztq_nthreads = nthreads;
+	ztq->ztq_minalloc = minalloc;
+	ztq->ztq_maxalloc = maxalloc;
+	ztq->ztq_task.ztqent_next = &ztq->ztq_task;
+	ztq->ztq_task.ztqent_prev = &ztq->ztq_task;
+	ztq->ztq_threadlist =
+	    umem_alloc(nthreads * sizeof (thread_t), UMEM_NOFAIL);
+
+	if (flags & ZFS_TASKQ_PREPOPULATE) {
+		mutex_enter(&ztq->ztq_lock);
+		while (minalloc-- > 0)
+			ztask_free(ztq, ztask_alloc(ztq, UMEM_NOFAIL));
+		mutex_exit(&ztq->ztq_lock);
+	}
+
+	for (t = 0; t < nthreads; t++) {
+		(void) thr_create(0, 0, zfs_taskq_thread,
+		    ztq, THR_BOUND, &ztq->ztq_threadlist[t]);
+	}
+
+	return (ztq);
+}
+
+void
+zfs_taskq_destroy(zfs_taskq_t *ztq)
+{
+	int t;
+	int nthreads = ztq->ztq_nthreads;
+
+	zfs_taskq_wait(ztq);
+
+	mutex_enter(&ztq->ztq_lock);
+
+	ztq->ztq_flags &= ~ZFS_TASKQ_ACTIVE;
+	VERIFY0(cond_broadcast(&ztq->ztq_dispatch_cv));
+
+	while (ztq->ztq_nthreads != 0) {
+		int ret = cond_wait(&ztq->ztq_wait_cv, &ztq->ztq_lock);
+		VERIFY(ret == 0 || ret == EINTR);
+	}
+
+	ztq->ztq_minalloc = 0;
+	while (ztq->ztq_nalloc != 0) {
+		ASSERT(ztq->ztq_freelist != NULL);
+		ztask_free(ztq, ztask_alloc(ztq, UMEM_NOFAIL));
+	}
+
+	mutex_exit(&ztq->ztq_lock);
+
+	for (t = 0; t < nthreads; t++)
+		(void) thr_join(ztq->ztq_threadlist[t], NULL, NULL);
+
+	umem_free(ztq->ztq_threadlist, nthreads * sizeof (thread_t));
+
+	VERIFY0(rwlock_destroy(&ztq->ztq_threadlock));
+	VERIFY0(cond_destroy(&ztq->ztq_dispatch_cv));
+	VERIFY0(cond_destroy(&ztq->ztq_wait_cv));
+	VERIFY0(cond_destroy(&ztq->ztq_maxalloc_cv));
+	VERIFY0(mutex_destroy(&ztq->ztq_lock));
+
+	umem_free(ztq, sizeof (zfs_taskq_t));
+}
diff --git a/usr/src/lib/libzfs/common/libzfs_taskq.h b/usr/src/lib/libzfs/common/libzfs_taskq.h
new file mode 100644
index 0000000000..7ac045738c
--- /dev/null
+++ b/usr/src/lib/libzfs/common/libzfs_taskq.h
@@ -0,0 +1,63 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
+ * Copyright (c) 2012, 2017 by Delphix. All rights reserved.
+ * Copyright (c) 2012, Joyent, Inc. All rights reserved.
+ */
+
+#ifndef	_ZFS_TASKQ_H
+#define	_ZFS_TASKQ_H
+
+#include <stdint.h>
+#include <umem.h>
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+typedef struct zfs_taskq zfs_taskq_t;
+typedef uintptr_t zfs_taskqid_t;
+typedef void (ztask_func_t)(void *);
+
+#define	ZFS_TQENT_FLAG_PREALLOC		0x1	/* taskq_dispatch_ent used */
+
+#define	ZFS_TASKQ_PREPOPULATE		0x0001
+
+#define	ZFS_TQ_SLEEP	UMEM_NOFAIL	/* Can block for memory */
+#define	ZFS_TQ_NOSLEEP	UMEM_DEFAULT	/* cannot block for memory; may fail */
+#define	ZFS_TQ_FRONT	0x08		/* Queue in front */
+
+extern zfs_taskq_t	*zfs_taskq_create(const char *, int, pri_t, int,
+	int, uint_t);
+extern void		zfs_taskq_destroy(zfs_taskq_t *);
+
+extern zfs_taskqid_t	zfs_taskq_dispatch(zfs_taskq_t *, ztask_func_t,
+	void *, uint_t);
+
+extern void		zfs_taskq_wait(zfs_taskq_t *);
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif	/* _ZFS_TASKQ_H */
diff --git a/usr/src/lib/libzfs/common/mapfile-vers b/usr/src/lib/libzfs/common/mapfile-vers
index 5b38fc3eae..17a60e830d 100644
--- a/usr/src/lib/libzfs/common/mapfile-vers
+++ b/usr/src/lib/libzfs/common/mapfile-vers
@@ -21,8 +21,8 @@
 
 #
 # Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
-# Copyright (c) 2011, 2015 by Delphix. All rights reserved.
 # Copyright (c) 2012, Joyent, Inc. All rights reserved.
+# Copyright (c) 2011, 2017 by Delphix. All rights reserved.
 # Copyright 2016 Nexenta Systems, Inc.
 #
 
@@ -51,7 +51,6 @@ SYMBOL_VERSION SUNWprivate_1.1 {
 	fletcher_4_incremental_native;
 	fletcher_4_incremental_byteswap;
 	libzfs_add_handle;
-	libzfs_dataset_cmp;
 	libzfs_errno;
 	libzfs_error_action;
 	libzfs_error_description;
@@ -79,6 +78,7 @@ SYMBOL_VERSION SUNWprivate_1.1 {
 	zfs_destroy_snaps;
 	zfs_destroy_snaps_nvl;
 	zfs_expand_proplist;
+	zfs_foreach_mountpoint;
 	zfs_get_handle;
 	zfs_get_holds;
 	zfs_get_hole_count;
diff --git a/usr/src/pkg/manifests/system-test-zfstest.mf b/usr/src/pkg/manifests/system-test-zfstest.mf
index 47ed5a1f33..c10c421246 100644
--- a/usr/src/pkg/manifests/system-test-zfstest.mf
+++ b/usr/src/pkg/manifests/system-test-zfstest.mf
@@ -56,6 +56,7 @@ dir path=opt/zfs-tests/tests/functional/cli_root/zfs_mount
 dir path=opt/zfs-tests/tests/functional/cli_root/zfs_promote
 dir path=opt/zfs-tests/tests/functional/cli_root/zfs_property
 dir path=opt/zfs-tests/tests/functional/cli_root/zfs_receive
+dir path=opt/zfs-tests/tests/functional/cli_root/zfs_remap
 dir path=opt/zfs-tests/tests/functional/cli_root/zfs_rename
 dir path=opt/zfs-tests/tests/functional/cli_root/zfs_reservation
 dir path=opt/zfs-tests/tests/functional/cli_root/zfs_rollback
@@ -873,6 +874,11 @@ file path=opt/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_012_neg \
 file \
     path=opt/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_all_001_pos \
     mode=0555
+file path=opt/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_all_fail \
+    mode=0555
+file \
+    path=opt/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_all_mountpoints \
+    mode=0555
 file path=opt/zfs-tests/tests/functional/cli_root/zfs_promote/cleanup \
     mode=0555
 file path=opt/zfs-tests/tests/functional/cli_root/zfs_promote/setup mode=0555
@@ -953,6 +959,13 @@ file \
 file \
     path=opt/zfs-tests/tests/functional/cli_root/zfs_receive/zfs_receive_014_pos \
     mode=0555
+file path=opt/zfs-tests/tests/functional/cli_root/zfs_remap/cleanup mode=0555
+file path=opt/zfs-tests/tests/functional/cli_root/zfs_remap/setup mode=0555
+file path=opt/zfs-tests/tests/functional/cli_root/zfs_remap/zfs_remap_cliargs \
+    mode=0555
+file \
+    path=opt/zfs-tests/tests/functional/cli_root/zfs_remap/zfs_remap_obsolete_counts \
+    mode=0555
 file path=opt/zfs-tests/tests/functional/cli_root/zfs_rename/cleanup mode=0555
 file path=opt/zfs-tests/tests/functional/cli_root/zfs_rename/setup mode=0555
 file path=opt/zfs-tests/tests/functional/cli_root/zfs_rename/zfs_rename.cfg \
diff --git a/usr/src/test/zfs-tests/runfiles/delphix.run b/usr/src/test/zfs-tests/runfiles/delphix.run
index e37f606fe0..b5974f8476 100644
--- a/usr/src/test/zfs-tests/runfiles/delphix.run
+++ b/usr/src/test/zfs-tests/runfiles/delphix.run
@@ -145,7 +145,7 @@ tests = ['zfs_mount_001_pos', 'zfs_mount_002_pos', 'zfs_mount_003_pos',
     'zfs_mount_004_pos', 'zfs_mount_005_pos', 'zfs_mount_006_pos',
     'zfs_mount_007_pos', 'zfs_mount_008_pos', 'zfs_mount_009_neg',
     'zfs_mount_010_neg', 'zfs_mount_011_neg', 'zfs_mount_012_neg',
-    'zfs_mount_all_001_pos']
+    'zfs_mount_all_001_pos', 'zfs_mount_all_fail', 'zfs_mount_all_mountpoints']
 
 [/opt/zfs-tests/tests/functional/cli_root/zfs_promote]
 tests = ['zfs_promote_001_pos', 'zfs_promote_002_pos', 'zfs_promote_003_pos',
@@ -162,6 +162,9 @@ tests = ['zfs_receive_001_pos', 'zfs_receive_002_pos', 'zfs_receive_003_pos',
     'zfs_receive_010_pos', 'zfs_receive_011_pos', 'zfs_receive_012_pos',
     'zfs_receive_013_pos', 'zfs_receive_014_pos']
 
+[/opt/zfs-tests/tests/functional/cli_root/zfs_remap]
+tests = ['zfs_remap_cliargs', 'zfs_remap_obsolete_counts']
+
 [/opt/zfs-tests/tests/functional/cli_root/zfs_rename]
 tests = ['zfs_rename_001_pos', 'zfs_rename_002_pos', 'zfs_rename_003_pos',
     'zfs_rename_004_neg', 'zfs_rename_005_neg', 'zfs_rename_006_pos',
diff --git a/usr/src/test/zfs-tests/runfiles/omnios.run b/usr/src/test/zfs-tests/runfiles/omnios.run
index ebf446f61a..57a828c86f 100644
--- a/usr/src/test/zfs-tests/runfiles/omnios.run
+++ b/usr/src/test/zfs-tests/runfiles/omnios.run
@@ -162,6 +162,9 @@ tests = ['zfs_rename_001_pos', 'zfs_rename_002_pos', 'zfs_rename_003_pos',
     'zfs_rename_010_neg', 'zfs_rename_011_pos', 'zfs_rename_012_neg',
     'zfs_rename_013_pos']
 
+[/opt/zfs-tests/tests/functional/cli_root/zfs_remap]
+tests = ['zfs_remap_cliargs', 'zfs_remap_obsolete_counts']
+
 [/opt/zfs-tests/tests/functional/cli_root/zfs_reservation]
 tests = ['zfs_reservation_001_pos', 'zfs_reservation_002_pos']
 
diff --git a/usr/src/test/zfs-tests/runfiles/openindiana.run b/usr/src/test/zfs-tests/runfiles/openindiana.run
index 2d8af0bf69..4cefe8f228 100644
--- a/usr/src/test/zfs-tests/runfiles/openindiana.run
+++ b/usr/src/test/zfs-tests/runfiles/openindiana.run
@@ -162,6 +162,9 @@ tests = ['zfs_rename_001_pos', 'zfs_rename_002_pos', 'zfs_rename_003_pos',
     'zfs_rename_010_neg', 'zfs_rename_011_pos', 'zfs_rename_012_neg',
     'zfs_rename_013_pos']
 
+[/opt/zfs-tests/tests/functional/cli_root/zfs_remap]
+tests = ['zfs_remap_cliargs', 'zfs_remap_obsolete_counts']
+
 [/opt/zfs-tests/tests/functional/cli_root/zfs_reservation]
 tests = ['zfs_reservation_001_pos', 'zfs_reservation_002_pos']
 
diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount.kshlib b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount.kshlib
index 41cd9698cc..0e57115e0d 100644
--- a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount.kshlib
+++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount.kshlib
@@ -25,7 +25,7 @@
 #
 
 #
-# Copyright (c) 2016 by Delphix. All rights reserved.
+# Copyright (c) 2017 by Delphix. All rights reserved.
 #
 
 . $STF_SUITE/include/libtest.shlib
@@ -84,13 +84,11 @@ function setup_filesystem #disklist #pool #fs #mntpoint #type #vdev
 	fi
 
 	case "$type" in
-		'ctr')	log_must zfs create $pool/$fs
-			log_must zfs set mountpoint=$mntpoint $pool/$fs
+		'ctr')	log_must zfs create -o mountpoint=$mntpoint $pool/$fs
 			;;
 		'vol')	log_must zfs create -V $VOLSIZE $pool/$fs
 			;;
-		*)	log_must zfs create $pool/$fs
-			log_must zfs set mountpoint=$mntpoint $pool/$fs
+		*)	log_must zfs create -o mountpoint=$mntpoint $pool/$fs
 			;;
 	esac
 
diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_all_fail.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_all_fail.ksh
new file mode 100644
index 0000000000..d7fcd20afa
--- /dev/null
+++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_all_fail.ksh
@@ -0,0 +1,96 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2017 by Delphix. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/cli_root/zfs_mount/zfs_mount.kshlib
+
+# DESCRIPTION:
+#       Verify that if 'zfs mount -a' fails to mount one filesystem,
+#       the command fails with a non-zero error code, but all other
+#       filesystems are mounted.
+#
+# STRATEGY:
+#       1. Create zfs filesystems
+#       2. Unmount a leaf filesystem
+#       3. Create a file in the above filesystem's mountpoint
+#       4. Verify that 'zfs mount -a' fails to mount the above
+#       5. Verify that all other filesystems were mounted
+#
+
+verify_runnable "both"
+
+typeset -a filesystems
+typeset path=${TEST_BASE_DIR%%/}/testroot$$/$TESTPOOL
+typeset fscount=10
+
+function setup_all
+{
+	# Create $fscount filesystems at the top level of $path
+	for ((i=0; i<$fscount; i++)); do
+		setup_filesystem "$DISKS" "$TESTPOOL" $i "$path/$i" ctr
+	done
+
+	zfs list -r $TESTPOOL
+
+	return 0
+}
+
+function cleanup_all
+{
+	export __ZFS_POOL_RESTRICT="$TESTPOOL"
+	log_must zfs $unmountall
+	unset __ZFS_POOL_RESTRICT
+
+	[[ -d ${TEST_BASE_DIR%%/}/testroot$$ ]] && \
+		rm -rf ${TEST_BASE_DIR%%/}/testroot$$
+}
+
+log_onexit cleanup_all
+
+log_must setup_all
+
+#
+# Unmount all of the above so that we can create the stray file
+# in one of the mountpoint directories.
+#
+export __ZFS_POOL_RESTRICT="$TESTPOOL"
+log_must zfs $unmountall
+unset __ZFS_POOL_RESTRICT
+
+# All of our filesystems should be unmounted at this point
+for ((i=0; i<$fscount; i++)); do
+	log_mustnot mounted "$TESTPOOL/$i"
+done
+
+# Create a stray file in one filesystem's mountpoint
+touch $path/0/strayfile
+
+# Verify that zfs mount -a fails
+export __ZFS_POOL_RESTRICT="$TESTPOOL"
+log_mustnot zfs $mountall
+unset __ZFS_POOL_RESTRICT
+
+# All filesystems except for "0" should be mounted
+log_mustnot mounted "$TESTPOOL/0"
+for ((i=1; i<$fscount; i++)); do
+	log_must mounted "$TESTPOOL/$i"
+done
+
+log_pass "'zfs $mountall' failed as expected."
diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_all_mountpoints.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_all_mountpoints.ksh
new file mode 100644
index 0000000000..3e6a24bbcd
--- /dev/null
+++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_all_mountpoints.ksh
@@ -0,0 +1,162 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2017 by Delphix. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/cli_root/zfs_mount/zfs_mount.kshlib
+
+# DESCRIPTION:
+#       Verify that 'zfs mount -a' succeeds given a set of filesystems
+#       whose mountpoints have a parent/child relationship which is
+#       counter to the filesystem parent/child relationship.
+#
+# STRATEGY:
+#       1. Create zfs filesystems within the given pool.
+#       2. Unmount all the filesystems.
+#       3. Verify that 'zfs mount -a' command succeed,
+#	   and all available ZFS filesystems are mounted.
+#	4. Verify that 'zfs mount' is identical with 'df -F zfs'
+#
+
+verify_runnable "both"
+
+typeset -a filesystems
+
+function setup_all
+{
+	typeset path=${TEST_BASE_DIR%%/}/testroot$$/$TESTPOOL
+	typeset fscount=10
+
+	#
+	# Generate an array of filesystem names that represent a deep
+	# hierarchy as such:
+	#
+	# 0
+	# 0/1
+	# 0/1/2
+	# 0/1/2/3
+	# 0/1/2/3/4
+	# ...
+	#
+	fs=0
+	for ((i=0; i<$fscount; i++)); do
+		if [[ $i -gt 0 ]]; then
+			fs=$fs/$i
+		fi
+		filesystems+=($fs)
+	done
+
+	# Create all of the above filesystems
+	for ((i=0; i<$fscount; i++)); do
+		fs=${filesystems[$i]}
+		setup_filesystem "$DISKS" "$TESTPOOL" "$fs" "$path/$i" ctr
+	done
+
+	zfs list -r $TESTPOOL
+
+	#
+	# Unmount all of the above so that we can setup our convoluted
+	# mount paths.
+	#
+	export __ZFS_POOL_RESTRICT="$TESTPOOL"
+	log_must zfs $unmountall
+	unset __ZFS_POOL_RESTRICT
+
+	#
+	# Configure the mount paths so that each mountpoint is contained
+	# in a child filesystem. We should end up with something like the
+	# following structure (modulo the number of filesystems):
+	#
+	# NAME                       MOUNTPOINT
+	# testpool                   /testpool
+	# testpool/0                 /testroot25416/testpool/0/1/2/3/4/5/6
+	# testpool/0/1               /testroot25416/testpool/0/1/2/3/4/5
+	# testpool/0/1/2             /testroot25416/testpool/0/1/2/3/4
+	# testpool/0/1/2/3           /testroot25416/testpool/0/1/2/3
+	# testpool/0/1/2/3/4         /testroot25416/testpool/0/1/2
+	# testpool/0/1/2/3/4/5       /testroot25416/testpool/0/1
+	# testpool/0/1/2/3/4/5/6     /testroot25416/testpool/0
+	#
+	for ((i=0; i<$fscount; i++)); do
+		fs=$TESTPOOL/${filesystems[$(($fscount - $i - 1))]}
+		mnt=$path/${filesystems[$i]}
+		zfs set mountpoint=$mnt $fs
+	done
+
+	zfs list -r $TESTPOOL
+
+	return 0
+}
+
+function cleanup_all
+{
+	export __ZFS_POOL_RESTRICT="$TESTPOOL"
+	log_must zfs $unmountall
+	unset __ZFS_POOL_RESTRICT
+
+	for fs in ${filesystems[@]}; do
+		cleanup_filesystem "$TESTPOOL" "$fs"
+	done
+	[[ -d ${TEST_BASE_DIR%%/}/testroot$$ ]] && \
+		rm -rf ${TEST_BASE_DIR%%/}/testroot$$
+}
+
+#
+# This function takes a single true/false argument. If true it will verify that
+# all file systems are mounted. If false it will verify that they are not
+# mounted.
+#
+function verify_all
+{
+	if $1; then
+		logfunc=log_must
+	else
+		logfunc=log_mustnot
+	fi
+
+	for fs in ${filesystems[@]}; do
+		$logfunc mounted "$TESTPOOL/$fs"
+	done
+
+	return 0
+}
+
+log_onexit cleanup_all
+
+log_must setup_all
+
+export __ZFS_POOL_RESTRICT="$TESTPOOL"
+log_must zfs $unmountall
+unset __ZFS_POOL_RESTRICT
+
+verify_all false
+
+export __ZFS_POOL_RESTRICT="$TESTPOOL"
+log_must zfs $mountall
+unset __ZFS_POOL_RESTRICT
+
+verify_all true
+
+log_note "Verify that 'zfs $mountcmd' will display " \
+	"all ZFS filesystems currently mounted."
+
+verify_mount_display
+
+log_pass "'zfs $mountall' succeeds as root, " \
+	"and all available ZFS filesystems are mounted."
diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_remap/Makefile b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_remap/Makefile
new file mode 100644
index 0000000000..658776d0cd
--- /dev/null
+++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_remap/Makefile
@@ -0,0 +1,21 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2012, 2016 by Delphix. All rights reserved.
+#
+
+include $(SRC)/Makefile.master
+
+ROOTOPTPKG = $(ROOT)/opt/zfs-tests
+TARGETDIR = $(ROOTOPTPKG)/tests/functional/cli_root/zfs_remap
+
+include $(SRC)/test/zfs-tests/Makefile.com
diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_remap/cleanup.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_remap/cleanup.ksh
new file mode 100644
index 0000000000..e78deacd5b
--- /dev/null
+++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_remap/cleanup.ksh
@@ -0,0 +1,19 @@
+#!/bin/ksh -p
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2018, loli10K <ezomori.nozomu@gmail.com>. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+default_cleanup
diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_remap/setup.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_remap/setup.ksh
new file mode 100644
index 0000000000..4497dbd746
--- /dev/null
+++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_remap/setup.ksh
@@ -0,0 +1,17 @@
+#!/bin/ksh -p
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2018, loli10K <ezomori.nozomu@gmail.com>. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_remap/zfs_remap_cliargs.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_remap/zfs_remap_cliargs.ksh
new file mode 100644
index 0000000000..4e0d2bc442
--- /dev/null
+++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_remap/zfs_remap_cliargs.ksh
@@ -0,0 +1,78 @@
+#!/bin/ksh -p
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2018, loli10K <ezomori.nozomu@gmail.com>. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/removal/removal.kshlib
+
+#
+# DESCRIPTION:
+# 'zfs remap' should only work with supported parameters.
+#
+# STRATEGY:
+# 1. Prepare a pool where a top-level VDEV has been removed
+# 2. Verify every supported parameter to 'zfs remap' is accepted
+# 3. Verify other unsupported parameters raise an error
+#
+
+verify_runnable "both"
+
+function cleanup
+{
+	destroy_pool $TESTPOOL
+	rm -f $DISK1 $DISK2
+}
+
+log_assert "'zfs remap' should only work with supported parameters"
+log_onexit cleanup
+
+f="$TESTPOOL/fs"
+v="$TESTPOOL/vol"
+s="$TESTPOOL/fs@snap"
+b="$TESTPOOL/fs#bmark"
+c="$TESTPOOL/clone"
+
+typeset goodparams=("$f" "$v" "$c")
+typeset badparams=("-H" "-p" "-?" "$s" "$b" "$f $f" "$f $v" "$f $s")
+
+DISK1="/var/tmp/zfs_remap-1"
+DISK2="/var/tmp/zfs_remap-2"
+
+# 1. Prepare a pool where a top-level VDEV has been removed
+log_must truncate -s $(($MINVDEVSIZE * 2)) $DISK1
+log_must zpool create $TESTPOOL $DISK1
+log_must zfs create $f
+log_must zfs create -V 1M -s $v
+log_must zfs snap $s
+log_must zfs bookmark $s $b
+log_must zfs clone $s $c
+log_must truncate -s $(($MINVDEVSIZE * 2)) $DISK2
+log_must zpool add $TESTPOOL $DISK2
+log_must zpool remove $TESTPOOL $DISK1
+log_must wait_for_removal $TESTPOOL
+
+# 2. Verify every supported parameter to 'zfs remap' is accepted
+for param in "${goodparams[@]}"
+do
+	log_must zfs remap $param
+done
+
+# 3. Verify other unsupported parameters raise an error
+for param in "${badparams[@]}"
+do
+	log_mustnot zfs remap $param
+done
+
+log_pass "'zfs remap' only works with supported parameters"
diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_remap/zfs_remap_obsolete_counts.ksh b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_remap/zfs_remap_obsolete_counts.ksh
new file mode 100644
index 0000000000..d8b52b091e
--- /dev/null
+++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zfs_remap/zfs_remap_obsolete_counts.ksh
@@ -0,0 +1,76 @@
+#!/bin/ksh -p
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2018, loli10K <ezomori.nozomu@gmail.com>. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/removal/removal.kshlib
+
+#
+# DESCRIPTION:
+# 'zfs remap' depends on 'feature@obsolete_counts' being active
+#
+# STRATEGY:
+# 1. Prepare a pool where a top-level VDEV has been removed and with
+#    feature@obsolete_counts disabled
+# 2. Verify any 'zfs remap' command cannot be executed
+# 3. Verify the same commands complete successfully when
+#    feature@obsolete_counts is enabled
+#
+
+verify_runnable "both"
+
+function cleanup
+{
+	destroy_pool $TESTPOOL
+	rm -f $DISK1 $DISK2
+}
+
+log_assert "'zfs remap' depends on feature@obsolete_counts being active"
+log_onexit cleanup
+
+f="$TESTPOOL/fs"
+v="$TESTPOOL/vol"
+s="$TESTPOOL/fs@snap"
+c="$TESTPOOL/clone"
+
+DISK1="/var/tmp/zfs_remap-1"
+DISK2="/var/tmp/zfs_remap-2"
+
+# 1. Prepare a pool where a top-level VDEV has been removed with
+#    feature@obsolete_counts disabled
+log_must truncate -s $(($MINVDEVSIZE * 2)) $DISK1
+log_must zpool create -d -o feature@device_removal=enabled $TESTPOOL $DISK1
+log_must zfs create $f
+log_must zfs create -V 1M -s $v
+log_must zfs snap $s
+log_must zfs clone $s $c
+log_must truncate -s $(($MINVDEVSIZE * 2)) $DISK2
+log_must zpool add $TESTPOOL $DISK2
+log_must zpool remove $TESTPOOL $DISK1
+log_must wait_for_removal $TESTPOOL
+
+# 2. Verify any 'zfs remap' command cannot be executed
+log_mustnot zfs remap $f
+log_mustnot zfs remap $v
+log_mustnot zfs remap $c
+
+# 3. Verify the same commands complete successfully when
+#    feature@obsolete_counts is enabled
+log_must zpool set feature@obsolete_counts=enabled $TESTPOOL
+log_must zfs remap $f
+log_must zfs remap $v
+log_must zfs remap $c
+
+log_pass "'zfs remap' correctly depends on feature@obsolete_counts being active"
diff --git a/usr/src/uts/common/fs/zfs/sys/dsl_pool.h b/usr/src/uts/common/fs/zfs/sys/dsl_pool.h
index 0f855d4f3d..2df0f21f98 100644
--- a/usr/src/uts/common/fs/zfs/sys/dsl_pool.h
+++ b/usr/src/uts/common/fs/zfs/sys/dsl_pool.h
@@ -89,7 +89,7 @@ typedef struct dsl_pool {
 	struct dsl_dir *dp_leak_dir;
 	struct dsl_dataset *dp_origin_snap;
 	uint64_t dp_root_dir_obj;
-	struct taskq *dp_vnrele_taskq;
+	taskq_t *dp_vnrele_taskq;
 
 	/* No lock needed - sync context only */
 	blkptr_t dp_meta_rootbp;
diff --git a/usr/src/uts/common/fs/zfs/vdev.c b/usr/src/uts/common/fs/zfs/vdev.c
index 6fee8109e0..db2d12db96 100644
--- a/usr/src/uts/common/fs/zfs/vdev.c
+++ b/usr/src/uts/common/fs/zfs/vdev.c
@@ -72,15 +72,21 @@ static vdev_ops_t *vdev_ops_table[] = {
 /* maximum scrub/resilver I/O queue per leaf vdev */
 int zfs_scrub_limit = 10;
 
-/* maximum number of metaslabs per top-level vdev */
+/* target number of metaslabs per top-level vdev */
 int vdev_max_ms_count = 200;
 
-/* minimum amount of metaslabs per top-level vdev */
+/* minimum number of metaslabs per top-level vdev */
 int vdev_min_ms_count = 16;
 
-/* see comment in vdev_metaslab_set_size() */
+/* practical upper limit of total metaslabs per top-level vdev */
+int vdev_ms_count_limit = 1ULL << 17;
+
+/* lower limit for metaslab size (512M) */
 int vdev_default_ms_shift = 29;
 
+/* upper limit for metaslab size (256G) */
+int vdev_max_ms_shift = 38;
+
 boolean_t vdev_validate_skip = B_FALSE;
 
 /*
@@ -2028,34 +2034,53 @@ void
 vdev_metaslab_set_size(vdev_t *vd)
 {
 	uint64_t asize = vd->vdev_asize;
-	uint64_t ms_shift = 0;
+	uint64_t ms_count = asize >> vdev_default_ms_shift;
+	uint64_t ms_shift;
 
 	/*
-	 * For vdevs that are bigger than 8G the metaslab size varies in
-	 * a way that the number of metaslabs increases in powers of two,
-	 * linearly in terms of vdev_asize, starting from 16 metaslabs.
-	 * So for vdev_asize of 8G we get 16 metaslabs, for 16G, we get 32,
-	 * and so on, until we hit the maximum metaslab count limit
-	 * [vdev_max_ms_count] from which point the metaslab count stays
-	 * the same.
+	 * There are two dimensions to the metaslab sizing calculation:
+	 * the size of the metaslab and the count of metaslabs per vdev.
+	 * In general, we aim for vdev_max_ms_count (200) metaslabs. The
+	 * range of the dimensions are as follows:
+	 *
+	 *	2^29 <= ms_size  <= 2^38
+	 *	  16 <= ms_count <= 131,072
+	 *
+	 * On the lower end of vdev sizes, we aim for metaslabs sizes of
+	 * at least 512MB (2^29) to minimize fragmentation effects when
+	 * testing with smaller devices.  However, the count constraint
+	 * of at least 16 metaslabs will override this minimum size goal.
+	 *
+	 * On the upper end of vdev sizes, we aim for a maximum metaslab
+	 * size of 256GB.  However, we will cap the total count to 2^17
+	 * metaslabs to keep our memory footprint in check.
+	 *
+	 * The net effect of applying above constrains is summarized below.
+	 *
+	 *	vdev size	metaslab count
+	 *	-------------|-----------------
+	 *	< 8GB		~16
+	 *	8GB - 100GB	one per 512MB
+	 *	100GB - 50TB	~200
+	 *	50TB - 32PB	one per 256GB
+	 *	> 32PB		~131,072
+	 *	-------------------------------
 	 */
-	ms_shift = vdev_default_ms_shift;
 
-	if ((asize >> ms_shift) < vdev_min_ms_count) {
-		/*
-		 * For devices that are less than 8G we want to have
-		 * exactly 16 metaslabs. We don't want less as integer
-		 * division rounds down, so less metaslabs mean more
-		 * wasted space. We don't want more as these vdevs are
-		 * small and in the likely event that we are running
-		 * out of space, the SPA will have a hard time finding
-		 * space due to fragmentation.
-		 */
+	if (ms_count < vdev_min_ms_count)
 		ms_shift = highbit64(asize / vdev_min_ms_count);
-		ms_shift = MAX(ms_shift, SPA_MAXBLOCKSHIFT);
-
-	} else if ((asize >> ms_shift) > vdev_max_ms_count) {
+	else if (ms_count > vdev_max_ms_count)
 		ms_shift = highbit64(asize / vdev_max_ms_count);
+	else
+		ms_shift = vdev_default_ms_shift;
+
+	if (ms_shift < SPA_MAXBLOCKSHIFT) {
+		ms_shift = SPA_MAXBLOCKSHIFT;
+	} else if (ms_shift > vdev_max_ms_shift) {
+		ms_shift = vdev_max_ms_shift;
+		/* cap the total count to constrain memory footprint */
+		if ((asize >> ms_shift) > vdev_ms_count_limit)
+			ms_shift = highbit64(asize / vdev_ms_count_limit);
 	}
 
 	vd->vdev_ms_shift = ms_shift;
author	Jerry Jelinek <jerry.jelinek@joyent.com>	2018-05-16 11:14:46 +0000
committer	Jerry Jelinek <jerry.jelinek@joyent.com>	2018-05-16 11:14:46 +0000
commit	c884631e9c751a35384a284fea0975fe5174262d (patch)
tree	4d47f3e8853bf814a777e423c1eeb5ae997ec2dd
parent	c596bb2c28271ba1ba0b6af4ef4a3244b32bbfe1 (diff)
parent	0b2e8253986c5c761129b58cfdac46d204903de1 (diff)
download	illumos-joyent-c884631e9c751a35384a284fea0975fe5174262d.tar.gz