summaryrefslogtreecommitdiff
path: root/usr/src
diff options
context:
space:
mode:
authorMark J Musante <Mark.Musante@Sun.COM>2010-01-04 17:24:41 -0500
committerMark J Musante <Mark.Musante@Sun.COM>2010-01-04 17:24:41 -0500
commit1195e687f1c03c8d57417b5999578922e20a3554 (patch)
tree0ee99e0c83bed51074cd4abbf8fcdf58b0bd37e5 /usr/src
parentb88604e3d674a4c82f27e5c41d05b8774a7547e4 (diff)
downloadillumos-gate-1195e687f1c03c8d57417b5999578922e20a3554.tar.gz
PSARC/2009/511 zpool split
5097228 provide 'zpool split' to create new pool by breaking all mirrors 6880831 memory leak in zpool add 6891438 zfs_ioc_userspace_upgrade could reference uninitialised error variable 6891441 zvol_create_minor sets local variable zv but never references it 6891442 spa_import() sets local variable spa but never references it 6895446 vdevs left open after removing slogs or offlining device/file
Diffstat (limited to 'usr/src')
-rw-r--r--usr/src/cmd/mdb/common/modules/zfs/zfs.c8
-rw-r--r--usr/src/cmd/zinject/zinject.c8
-rw-r--r--usr/src/cmd/zpool/zpool_main.c157
-rw-r--r--usr/src/cmd/zpool/zpool_util.h4
-rw-r--r--usr/src/cmd/zpool/zpool_vdev.c48
-rw-r--r--usr/src/cmd/ztest/ztest.c174
-rw-r--r--usr/src/lib/libzfs/common/libzfs.h11
-rw-r--r--usr/src/lib/libzfs/common/libzfs_pool.c265
-rw-r--r--usr/src/lib/libzfs/common/libzfs_util.c5
-rw-r--r--usr/src/lib/libzfs/common/mapfile-vers3
-rw-r--r--usr/src/uts/common/fs/zfs/spa.c868
-rw-r--r--usr/src/uts/common/fs/zfs/spa_config.c21
-rw-r--r--usr/src/uts/common/fs/zfs/spa_misc.c22
-rw-r--r--usr/src/uts/common/fs/zfs/sys/spa.h11
-rw-r--r--usr/src/uts/common/fs/zfs/sys/spa_impl.h4
-rw-r--r--usr/src/uts/common/fs/zfs/sys/vdev.h7
-rw-r--r--usr/src/uts/common/fs/zfs/sys/vdev_impl.h5
-rw-r--r--usr/src/uts/common/fs/zfs/sys/zfs_ioctl.h4
-rw-r--r--usr/src/uts/common/fs/zfs/sys/zio.h4
-rw-r--r--usr/src/uts/common/fs/zfs/vdev.c89
-rw-r--r--usr/src/uts/common/fs/zfs/vdev_label.c18
-rw-r--r--usr/src/uts/common/fs/zfs/zfs_ioctl.c45
-rw-r--r--usr/src/uts/common/fs/zfs/zio_inject.c7
-rw-r--r--usr/src/uts/common/fs/zfs/zvol.c4
-rw-r--r--usr/src/uts/common/sys/fs/zfs.h13
25 files changed, 1479 insertions, 326 deletions
diff --git a/usr/src/cmd/mdb/common/modules/zfs/zfs.c b/usr/src/cmd/mdb/common/modules/zfs/zfs.c
index df1fb44332..55c2b7133f 100644
--- a/usr/src/cmd/mdb/common/modules/zfs/zfs.c
+++ b/usr/src/cmd/mdb/common/modules/zfs/zfs.c
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -1040,6 +1040,12 @@ do_print_vdev(uintptr_t addr, int flags, int depth, int stats,
case VDEV_AUX_BAD_LOG:
aux = "BAD_LOG";
break;
+ case VDEV_AUX_EXTERNAL:
+ aux = "EXTERNAL";
+ break;
+ case VDEV_AUX_SPLIT_POOL:
+ aux = "SPLIT_POOL";
+ break;
default:
aux = "UNKNOWN";
break;
diff --git a/usr/src/cmd/zinject/zinject.c b/usr/src/cmd/zinject/zinject.c
index b892c418a8..caa1b75a6a 100644
--- a/usr/src/cmd/zinject/zinject.c
+++ b/usr/src/cmd/zinject/zinject.c
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -855,14 +855,16 @@ main(int argc, char **argv)
return (2);
}
- if (argc != 1) {
+ if (argc < 1 || argc > 2) {
(void) fprintf(stderr, "panic (-p) injection requires "
- "a single pool name\n");
+ "a single pool name and an optional id\n");
usage();
return (2);
}
(void) strcpy(pool, argv[0]);
+ if (argv[1] != NULL)
+ record.zi_type = atoi(argv[1]);
dataset[0] = '\0';
} else if (record.zi_duration != 0) {
if (nowrites == 0) {
diff --git a/usr/src/cmd/zpool/zpool_main.c b/usr/src/cmd/zpool/zpool_main.c
index e7a9dfb3f4..6ded428fe3 100644
--- a/usr/src/cmd/zpool/zpool_main.c
+++ b/usr/src/cmd/zpool/zpool_main.c
@@ -20,7 +20,7 @@
*/
/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -69,6 +69,7 @@ static int zpool_do_clear(int, char **);
static int zpool_do_attach(int, char **);
static int zpool_do_detach(int, char **);
static int zpool_do_replace(int, char **);
+static int zpool_do_split(int, char **);
static int zpool_do_scrub(int, char **);
@@ -121,7 +122,8 @@ typedef enum {
HELP_STATUS,
HELP_UPGRADE,
HELP_GET,
- HELP_SET
+ HELP_SET,
+ HELP_SPLIT
} zpool_help_t;
@@ -158,6 +160,7 @@ static zpool_command_t command_table[] = {
{ "attach", zpool_do_attach, HELP_ATTACH },
{ "detach", zpool_do_detach, HELP_DETACH },
{ "replace", zpool_do_replace, HELP_REPLACE },
+ { "split", zpool_do_split, HELP_SPLIT },
{ NULL },
{ "scrub", zpool_do_scrub, HELP_SCRUB },
{ NULL },
@@ -235,6 +238,10 @@ get_usage(zpool_help_t idx) {
"<pool> ...\n"));
case HELP_SET:
return (gettext("\tset <property=value> <pool> \n"));
+ case HELP_SPLIT:
+ return (gettext("\tsplit [-n] [-R altroot] [-o mntopts]\n"
+ "\t [-o property=value] <pool> <newpool> "
+ "[<device> ...]\n"));
}
abort();
@@ -1132,6 +1139,10 @@ print_status_config(zpool_handle_t *zhp, const char *name, nvlist_t *nv,
(void) printf(gettext("external device fault"));
break;
+ case VDEV_AUX_SPLIT_POOL:
+ (void) printf(gettext("split into new pool"));
+ break;
+
default:
(void) printf(gettext("corrupted data"));
break;
@@ -1622,7 +1633,7 @@ zpool_do_import(int argc, char **argv)
char *cachefile = NULL;
/* check options */
- while ((c = getopt(argc, argv, ":aCc:d:DEfFno:p:rR:VX")) != -1) {
+ while ((c = getopt(argc, argv, ":aCc:d:DEfFno:rR:VX")) != -1) {
switch (c) {
case 'a':
do_all = B_TRUE;
@@ -2697,6 +2708,146 @@ zpool_do_detach(int argc, char **argv)
}
/*
+ * zpool split [-n] [-o prop=val] ...
+ * [-o mntopt] ...
+ * [-R altroot] <pool> <newpool> [<device> ...]
+ *
+ * -n Do not split the pool, but display the resulting layout if
+ * it were to be split.
+ * -o Set property=value, or set mount options.
+ * -R Mount the split-off pool under an alternate root.
+ *
+ * Splits the named pool and gives it the new pool name. Devices to be split
+ * off may be listed, provided that no more than one device is specified
+ * per top-level vdev mirror. The newly split pool is left in an exported
+ * state unless -R is specified.
+ *
+ * Restrictions: the top-level of the pool pool must only be made up of
+ * mirrors; all devices in the pool must be healthy; no device may be
+ * undergoing a resilvering operation.
+ */
+int
+zpool_do_split(int argc, char **argv)
+{
+ char *srcpool, *newpool, *propval;
+ char *mntopts = NULL;
+ splitflags_t flags;
+ int c, ret = 0;
+ zpool_handle_t *zhp;
+ nvlist_t *config, *props = NULL;
+
+ flags.dryrun = B_FALSE;
+ flags.import = B_FALSE;
+
+ /* check options */
+ while ((c = getopt(argc, argv, ":R:no:")) != -1) {
+ switch (c) {
+ case 'R':
+ flags.import = B_TRUE;
+ if (add_prop_list(
+ zpool_prop_to_name(ZPOOL_PROP_ALTROOT), optarg,
+ &props, B_TRUE) != 0) {
+ if (props)
+ nvlist_free(props);
+ usage(B_FALSE);
+ }
+ break;
+ case 'n':
+ flags.dryrun = B_TRUE;
+ break;
+ case 'o':
+ if ((propval = strchr(optarg, '=')) != NULL) {
+ *propval = '\0';
+ propval++;
+ if (add_prop_list(optarg, propval,
+ &props, B_TRUE) != 0) {
+ if (props)
+ nvlist_free(props);
+ usage(B_FALSE);
+ }
+ } else {
+ mntopts = optarg;
+ }
+ break;
+ case ':':
+ (void) fprintf(stderr, gettext("missing argument for "
+ "'%c' option\n"), optopt);
+ usage(B_FALSE);
+ break;
+ case '?':
+ (void) fprintf(stderr, gettext("invalid option '%c'\n"),
+ optopt);
+ usage(B_FALSE);
+ break;
+ }
+ }
+
+ if (!flags.import && mntopts != NULL) {
+ (void) fprintf(stderr, gettext("setting mntopts is only "
+ "valid when importing the pool\n"));
+ usage(B_FALSE);
+ }
+
+ argc -= optind;
+ argv += optind;
+
+ if (argc < 1) {
+ (void) fprintf(stderr, gettext("Missing pool name\n"));
+ usage(B_FALSE);
+ }
+ if (argc < 2) {
+ (void) fprintf(stderr, gettext("Missing new pool name\n"));
+ usage(B_FALSE);
+ }
+
+ srcpool = argv[0];
+ newpool = argv[1];
+
+ argc -= 2;
+ argv += 2;
+
+ if ((zhp = zpool_open(g_zfs, srcpool)) == NULL)
+ return (1);
+
+ config = split_mirror_vdev(zhp, newpool, props, flags, argc, argv);
+ if (config == NULL) {
+ ret = 1;
+ } else {
+ if (flags.dryrun) {
+ (void) printf(gettext("would create '%s' with the "
+ "following layout:\n\n"), newpool);
+ print_vdev_tree(NULL, newpool, config, 0, B_FALSE);
+ }
+ nvlist_free(config);
+ }
+
+ zpool_close(zhp);
+
+ if (ret != 0 || flags.dryrun || !flags.import)
+ return (ret);
+
+ /*
+ * The split was successful. Now we need to open the new
+ * pool and import it.
+ */
+ if ((zhp = zpool_open_canfail(g_zfs, newpool)) == NULL)
+ return (1);
+ if (zpool_get_state(zhp) != POOL_STATE_UNAVAIL &&
+ zpool_enable_datasets(zhp, mntopts, 0) != 0) {
+ ret = 1;
+ (void) fprintf(stderr, gettext("Split was succssful, but "
+ "the datasets could not all be mounted\n"));
+ (void) fprintf(stderr, gettext("Try doing '%s' with a "
+ "different altroot\n"), "zpool import");
+ }
+ zpool_close(zhp);
+
+ return (ret);
+}
+
+
+
+/*
* zpool online <pool> <device> ...
*/
int
diff --git a/usr/src/cmd/zpool/zpool_util.h b/usr/src/cmd/zpool/zpool_util.h
index c86b2e7405..a18b8b705f 100644
--- a/usr/src/cmd/zpool/zpool_util.h
+++ b/usr/src/cmd/zpool/zpool_util.h
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -46,6 +46,8 @@ uint_t num_logs(nvlist_t *nv);
nvlist_t *make_root_vdev(zpool_handle_t *zhp, int force, int check_rep,
boolean_t isreplace, boolean_t dryrun, int argc, char **argv);
+nvlist_t *split_mirror_vdev(zpool_handle_t *zhp, char *newname,
+ nvlist_t *props, splitflags_t flags, int argc, char **argv);
/*
* Pool list functions
diff --git a/usr/src/cmd/zpool/zpool_vdev.c b/usr/src/cmd/zpool/zpool_vdev.c
index 621519197d..3c725d232c 100644
--- a/usr/src/cmd/zpool/zpool_vdev.c
+++ b/usr/src/cmd/zpool/zpool_vdev.c
@@ -20,7 +20,7 @@
*/
/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -1360,6 +1360,52 @@ construct_spec(int argc, char **argv)
return (nvroot);
}
+nvlist_t *
+split_mirror_vdev(zpool_handle_t *zhp, char *newname, nvlist_t *props,
+ splitflags_t flags, int argc, char **argv)
+{
+ nvlist_t *newroot = NULL, **child;
+ uint_t c, children;
+
+ if (argc > 0) {
+ if ((newroot = construct_spec(argc, argv)) == NULL) {
+ (void) fprintf(stderr, gettext("Unable to build a "
+ "pool from the specified devices\n"));
+ return (NULL);
+ }
+
+ if (!flags.dryrun && make_disks(zhp, newroot) != 0) {
+ nvlist_free(newroot);
+ return (NULL);
+ }
+
+ /* avoid any tricks in the spec */
+ verify(nvlist_lookup_nvlist_array(newroot,
+ ZPOOL_CONFIG_CHILDREN, &child, &children) == 0);
+ for (c = 0; c < children; c++) {
+ char *path;
+ const char *type;
+ int min, max;
+
+ verify(nvlist_lookup_string(child[c],
+ ZPOOL_CONFIG_PATH, &path) == 0);
+ if ((type = is_grouping(path, &min, &max)) != NULL) {
+ (void) fprintf(stderr, gettext("Cannot use "
+ "'%s' as a device for splitting\n"), type);
+ nvlist_free(newroot);
+ return (NULL);
+ }
+ }
+ }
+
+ if (zpool_vdev_split(zhp, newname, &newroot, props, flags) != 0) {
+ if (newroot != NULL)
+ nvlist_free(newroot);
+ return (NULL);
+ }
+
+ return (newroot);
+}
/*
* Get and validate the contents of the given vdev specification. This ensures
diff --git a/usr/src/cmd/ztest/ztest.c b/usr/src/cmd/ztest/ztest.c
index 63b858b4fb..ed36d90279 100644
--- a/usr/src/cmd/ztest/ztest.c
+++ b/usr/src/cmd/ztest/ztest.c
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -125,9 +125,9 @@ static int zopt_verbose = 0;
static int zopt_init = 1;
static char *zopt_dir = "/tmp";
static uint64_t zopt_time = 300; /* 5 minutes */
-static int zopt_maxfaults;
#define BT_MAGIC 0x123456789abcdefULL
+#define MAXFAULTS() (MAX(zs->zs_mirrors, 1) * (zopt_raidz_parity + 1) - 1)
enum ztest_io_type {
ZTEST_IO_WRITE_TAG,
@@ -251,6 +251,7 @@ ztest_func_t ztest_vdev_attach_detach;
ztest_func_t ztest_vdev_LUN_growth;
ztest_func_t ztest_vdev_add_remove;
ztest_func_t ztest_vdev_aux_add_remove;
+ztest_func_t ztest_split_pool;
uint64_t zopt_always = 0ULL * NANOSEC; /* all the time */
uint64_t zopt_incessant = 1ULL * NANOSEC / 10; /* every 1/10 second */
@@ -265,6 +266,7 @@ ztest_info_t ztest_info[] = {
{ ztest_dmu_commit_callbacks, 1, &zopt_always },
{ ztest_zap, 30, &zopt_always },
{ ztest_zap_parallel, 100, &zopt_always },
+ { ztest_split_pool, 1, &zopt_always },
{ ztest_zil_commit, 1, &zopt_incessant },
{ ztest_dmu_read_write_zcopy, 1, &zopt_often },
{ ztest_dmu_objset_create_destroy, 1, &zopt_often },
@@ -318,6 +320,8 @@ typedef struct ztest_shared {
mutex_t zs_vdev_lock;
rwlock_t zs_name_lock;
ztest_info_t zs_info[ZTEST_FUNCS];
+ uint64_t zs_splits;
+ uint64_t zs_mirrors;
ztest_ds_t zs_zd[];
} ztest_shared_t;
@@ -592,7 +596,6 @@ process_options(int argc, char **argv)
zopt_vdevtime = (zopt_vdevs > 0 ? zopt_time * NANOSEC / zopt_vdevs :
UINT64_MAX >> 2);
- zopt_maxfaults = MAX(zopt_mirrors, 1) * (zopt_raidz_parity + 1) - 1;
}
static void
@@ -2134,12 +2137,13 @@ ztest_vdev_add_remove(ztest_ds_t *zd, uint64_t id)
{
ztest_shared_t *zs = ztest_shared;
spa_t *spa = zs->zs_spa;
- uint64_t leaves = MAX(zopt_mirrors, 1) * zopt_raidz;
+ uint64_t leaves;
uint64_t guid;
nvlist_t *nvroot;
int error;
VERIFY(mutex_lock(&zs->zs_vdev_lock) == 0);
+ leaves = MAX(zs->zs_mirrors + zs->zs_splits, 1) * zopt_raidz;
spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER);
@@ -2177,7 +2181,7 @@ ztest_vdev_add_remove(ztest_ds_t *zd, uint64_t id)
* Make 1/4 of the devices be log devices.
*/
nvroot = make_vdev_root(NULL, NULL, zopt_vdev_size, 0,
- ztest_random(4) == 0, zopt_raidz, zopt_mirrors, 1);
+ ztest_random(4) == 0, zopt_raidz, zs->zs_mirrors, 1);
error = spa_vdev_add(spa, nvroot);
nvlist_free(nvroot);
@@ -2274,6 +2278,99 @@ ztest_vdev_aux_add_remove(ztest_ds_t *zd, uint64_t id)
}
/*
+ * split a pool if it has mirror tlvdevs
+ */
+/* ARGSUSED */
+void
+ztest_split_pool(ztest_ds_t *zd, uint64_t id)
+{
+ ztest_shared_t *zs = ztest_shared;
+ spa_t *spa = zs->zs_spa;
+ vdev_t *rvd = spa->spa_root_vdev;
+ nvlist_t *tree, **child, *config, *split, **schild;
+ uint_t c, children, schildren = 0, lastlogid = 0;
+ int error = 0;
+
+ VERIFY(mutex_lock(&zs->zs_vdev_lock) == 0);
+
+ /* ensure we have a useable config; mirrors of raidz aren't supported */
+ if (zs->zs_mirrors < 3 || zopt_raidz > 1) {
+ VERIFY(mutex_unlock(&zs->zs_vdev_lock) == 0);
+ return;
+ }
+
+ /* clean up the old pool, if any */
+ (void) spa_destroy("splitp");
+
+ spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER);
+
+ /* generate a config from the existing config */
+ VERIFY(nvlist_lookup_nvlist(spa->spa_config, ZPOOL_CONFIG_VDEV_TREE,
+ &tree) == 0);
+ VERIFY(nvlist_lookup_nvlist_array(tree, ZPOOL_CONFIG_CHILDREN, &child,
+ &children) == 0);
+
+ schild = malloc(rvd->vdev_children * sizeof (nvlist_t *));
+ for (c = 0; c < children; c++) {
+ vdev_t *tvd = rvd->vdev_child[c];
+ nvlist_t **mchild;
+ uint_t mchildren;
+
+ if (tvd->vdev_islog || tvd->vdev_ops == &vdev_hole_ops) {
+ VERIFY(nvlist_alloc(&schild[schildren], NV_UNIQUE_NAME,
+ 0) == 0);
+ VERIFY(nvlist_add_string(schild[schildren],
+ ZPOOL_CONFIG_TYPE, VDEV_TYPE_HOLE) == 0);
+ VERIFY(nvlist_add_uint64(schild[schildren],
+ ZPOOL_CONFIG_IS_HOLE, 1) == 0);
+ if (lastlogid == 0)
+ lastlogid = schildren;
+ ++schildren;
+ continue;
+ }
+ lastlogid = 0;
+ VERIFY(nvlist_lookup_nvlist_array(child[c],
+ ZPOOL_CONFIG_CHILDREN, &mchild, &mchildren) == 0);
+ VERIFY(nvlist_dup(mchild[0], &schild[schildren++], 0) == 0);
+ }
+
+ /* OK, create a config that can be used to split */
+ VERIFY(nvlist_alloc(&split, NV_UNIQUE_NAME, 0) == 0);
+ VERIFY(nvlist_add_string(split, ZPOOL_CONFIG_TYPE,
+ VDEV_TYPE_ROOT) == 0);
+ VERIFY(nvlist_add_nvlist_array(split, ZPOOL_CONFIG_CHILDREN, schild,
+ lastlogid != 0 ? lastlogid : schildren) == 0);
+
+ VERIFY(nvlist_alloc(&config, NV_UNIQUE_NAME, 0) == 0);
+ VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, split) == 0);
+
+ for (c = 0; c < schildren; c++)
+ nvlist_free(schild[c]);
+ free(schild);
+ nvlist_free(split);
+
+ spa_config_exit(spa, SCL_VDEV, FTAG);
+
+ (void) rw_wrlock(&zs->zs_name_lock);
+ error = spa_vdev_split_mirror(spa, "splitp", config, NULL, B_FALSE);
+ (void) rw_unlock(&zs->zs_name_lock);
+
+ nvlist_free(config);
+
+ if (error == 0) {
+ (void) printf("successful split - results:\n");
+ mutex_enter(&spa_namespace_lock);
+ show_pool_stats(spa);
+ show_pool_stats(spa_lookup("splitp"));
+ mutex_exit(&spa_namespace_lock);
+ ++zs->zs_splits;
+ --zs->zs_mirrors;
+ }
+ VERIFY(mutex_unlock(&zs->zs_vdev_lock) == 0);
+
+}
+
+/*
* Verify that we can attach and detach devices.
*/
/* ARGSUSED */
@@ -2286,7 +2383,7 @@ ztest_vdev_attach_detach(ztest_ds_t *zd, uint64_t id)
vdev_t *rvd = spa->spa_root_vdev;
vdev_t *oldvd, *newvd, *pvd;
nvlist_t *root;
- uint64_t leaves = MAX(zopt_mirrors, 1) * zopt_raidz;
+ uint64_t leaves;
uint64_t leaf, top;
uint64_t ashift = ztest_get_ashift();
uint64_t oldguid, pguid;
@@ -2299,6 +2396,7 @@ ztest_vdev_attach_detach(ztest_ds_t *zd, uint64_t id)
int error, expected_error;
VERIFY(mutex_lock(&zs->zs_vdev_lock) == 0);
+ leaves = MAX(zs->zs_mirrors, 1) * zopt_raidz;
spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER);
@@ -2315,15 +2413,15 @@ ztest_vdev_attach_detach(ztest_ds_t *zd, uint64_t id)
/*
* Pick a random leaf within it.
*/
- leaf = ztest_random(leaves);
+ leaf = ztest_random(leaves) + zs->zs_splits;
/*
* Locate this vdev.
*/
oldvd = rvd->vdev_child[top];
- if (zopt_mirrors >= 1) {
+ if (zs->zs_mirrors >= 1) {
ASSERT(oldvd->vdev_ops == &vdev_mirror_ops);
- ASSERT(oldvd->vdev_children >= zopt_mirrors);
+ ASSERT(oldvd->vdev_children >= zs->zs_mirrors);
oldvd = oldvd->vdev_child[leaf / zopt_raidz];
}
if (zopt_raidz > 1) {
@@ -4268,7 +4366,7 @@ ztest_fault_inject(ztest_ds_t *zd, uint64_t id)
spa_t *spa = zs->zs_spa;
int fd;
uint64_t offset;
- uint64_t leaves = MAX(zopt_mirrors, 1) * zopt_raidz;
+ uint64_t leaves;
uint64_t bad = 0x1990c0ffeedecade;
uint64_t top, leaf;
char path0[MAXPATHLEN];
@@ -4276,11 +4374,18 @@ ztest_fault_inject(ztest_ds_t *zd, uint64_t id)
size_t fsize;
int bshift = SPA_MAXBLOCKSHIFT + 2; /* don't scrog all labels */
int iters = 1000;
- int maxfaults = zopt_maxfaults;
+ int maxfaults;
+ int mirror_save;
vdev_t *vd0 = NULL;
uint64_t guid0 = 0;
boolean_t islog = B_FALSE;
+ VERIFY(mutex_lock(&zs->zs_vdev_lock) == 0);
+ maxfaults = MAXFAULTS();
+ leaves = MAX(zs->zs_mirrors, 1) * zopt_raidz;
+ mirror_save = zs->zs_mirrors;
+ VERIFY(mutex_unlock(&zs->zs_vdev_lock) == 0);
+
ASSERT(leaves >= 1);
/*
@@ -4293,7 +4398,7 @@ ztest_fault_inject(ztest_ds_t *zd, uint64_t id)
* Inject errors on a normal data device or slog device.
*/
top = ztest_random_vdev_top(spa, B_TRUE);
- leaf = ztest_random(leaves);
+ leaf = ztest_random(leaves) + zs->zs_splits;
/*
* Generate paths to the first leaf in this top-level vdev,
@@ -4302,7 +4407,7 @@ ztest_fault_inject(ztest_ds_t *zd, uint64_t id)
* and we'll write random garbage to the randomly chosen leaf.
*/
(void) snprintf(path0, sizeof (path0), ztest_dev_template,
- zopt_dir, zopt_pool, top * leaves + 0);
+ zopt_dir, zopt_pool, top * leaves + zs->zs_splits);
(void) snprintf(pathrand, sizeof (pathrand), ztest_dev_template,
zopt_dir, zopt_pool, top * leaves + leaf);
@@ -4405,13 +4510,22 @@ ztest_fault_inject(ztest_ds_t *zd, uint64_t id)
if (offset >= fsize)
continue;
- if (zopt_verbose >= 7)
- (void) printf("injecting bad word into %s,"
- " offset 0x%llx\n", pathrand, (u_longlong_t)offset);
+ VERIFY(mutex_lock(&zs->zs_vdev_lock) == 0);
+ if (mirror_save != zs->zs_mirrors) {
+ VERIFY(mutex_unlock(&zs->zs_vdev_lock) == 0);
+ (void) close(fd);
+ return;
+ }
if (pwrite(fd, &bad, sizeof (bad), offset) != sizeof (bad))
fatal(1, "can't inject bad word at 0x%llx in %s",
offset, pathrand);
+
+ VERIFY(mutex_unlock(&zs->zs_vdev_lock) == 0);
+
+ if (zopt_verbose >= 7)
+ (void) printf("injected bad word into %s,"
+ " offset 0x%llx\n", pathrand, (u_longlong_t)offset);
}
(void) close(fd);
@@ -4992,7 +5106,7 @@ ztest_run(ztest_shared_t *zs)
* in which case ztest_fault_inject() temporarily takes away
* the only valid replica.
*/
- if (zopt_maxfaults == 0)
+ if (MAXFAULTS() == 0)
spa->spa_failmode = ZIO_FAILURE_MODE_WAIT;
else
spa->spa_failmode = ZIO_FAILURE_MODE_PANIC;
@@ -5202,6 +5316,23 @@ print_time(hrtime_t t, char *timebuf)
(void) sprintf(timebuf, "%llus", s);
}
+static nvlist_t *
+make_random_props()
+{
+ nvlist_t *props;
+
+ if (ztest_random(2) == 0)
+ return (NULL);
+
+ VERIFY(nvlist_alloc(&props, NV_UNIQUE_NAME, 0) == 0);
+ VERIFY(nvlist_add_uint64(props, "autoreplace", 1) == 0);
+
+ (void) printf("props:\n");
+ dump_nvlist(props, 4);
+
+ return (props);
+}
+
/*
* Create a storage pool with the given name and initial vdev size.
* Then test spa_freeze() functionality.
@@ -5210,7 +5341,7 @@ static void
ztest_init(ztest_shared_t *zs)
{
spa_t *spa;
- nvlist_t *nvroot;
+ nvlist_t *nvroot, *props;
VERIFY(_mutex_init(&zs->zs_vdev_lock, USYNC_THREAD, NULL) == 0);
VERIFY(rwlock_init(&zs->zs_name_lock, USYNC_THREAD, NULL) == 0);
@@ -5222,9 +5353,12 @@ ztest_init(ztest_shared_t *zs)
*/
(void) spa_destroy(zs->zs_pool);
ztest_shared->zs_vdev_next_leaf = 0;
+ zs->zs_splits = 0;
+ zs->zs_mirrors = zopt_mirrors;
nvroot = make_vdev_root(NULL, NULL, zopt_vdev_size, 0,
- 0, zopt_raidz, zopt_mirrors, 1);
- VERIFY3U(0, ==, spa_create(zs->zs_pool, nvroot, NULL, NULL, NULL));
+ 0, zopt_raidz, zs->zs_mirrors, 1);
+ props = make_random_props();
+ VERIFY3U(0, ==, spa_create(zs->zs_pool, nvroot, props, NULL, NULL));
nvlist_free(nvroot);
VERIFY3U(0, ==, spa_open(zs->zs_pool, &spa, FTAG));
diff --git a/usr/src/lib/libzfs/common/libzfs.h b/usr/src/lib/libzfs/common/libzfs.h
index 03870019d5..0a4c734dbe 100644
--- a/usr/src/lib/libzfs/common/libzfs.h
+++ b/usr/src/lib/libzfs/common/libzfs.h
@@ -120,6 +120,7 @@ enum {
EZFS_TAGTOOLONG, /* snapshot hold/rele: tag too long */
EZFS_PIPEFAILED, /* pipe create failed */
EZFS_THREADCREATEFAILED, /* thread create failed */
+ EZFS_POSTSPLIT_ONLINE, /* onlining a disk after splitting it */
EZFS_UNKNOWN
};
@@ -214,6 +215,14 @@ extern int zpool_create(libzfs_handle_t *, const char *, nvlist_t *,
extern int zpool_destroy(zpool_handle_t *);
extern int zpool_add(zpool_handle_t *, nvlist_t *);
+typedef struct splitflags {
+ /* do not split, but return the config that would be split off */
+ int dryrun : 1;
+
+ /* after splitting, import the pool */
+ int import : 1;
+} splitflags_t;
+
/*
* Functions to manipulate pool and vdev state
*/
@@ -227,6 +236,8 @@ extern int zpool_vdev_attach(zpool_handle_t *, const char *,
const char *, nvlist_t *, int);
extern int zpool_vdev_detach(zpool_handle_t *, const char *);
extern int zpool_vdev_remove(zpool_handle_t *, const char *);
+extern int zpool_vdev_split(zpool_handle_t *, char *, nvlist_t **, nvlist_t *,
+ splitflags_t);
extern int zpool_vdev_fault(zpool_handle_t *, uint64_t, vdev_aux_t);
extern int zpool_vdev_degrade(zpool_handle_t *, uint64_t, vdev_aux_t);
diff --git a/usr/src/lib/libzfs/common/libzfs_pool.c b/usr/src/lib/libzfs/common/libzfs_pool.c
index 0b28814a50..3c0f46815b 100644
--- a/usr/src/lib/libzfs/common/libzfs_pool.c
+++ b/usr/src/lib/libzfs/common/libzfs_pool.c
@@ -20,7 +20,7 @@
*/
/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -84,6 +84,7 @@ const char *hist_event_table[LOG_END] = {
"pool scrub done",
"user hold",
"user release",
+ "pool split",
};
static int read_efi_label(nvlist_t *config, diskaddr_t *sb);
@@ -231,6 +232,8 @@ zpool_state_to_name(vdev_state_t state, vdev_aux_t aux)
case VDEV_STATE_CANT_OPEN:
if (aux == VDEV_AUX_CORRUPT_DATA || aux == VDEV_AUX_BAD_LOG)
return (gettext("FAULTED"));
+ else if (aux == VDEV_AUX_SPLIT_POOL)
+ return (gettext("SPLIT"));
else
return (gettext("UNAVAIL"));
case VDEV_STATE_FAULTED:
@@ -2074,8 +2077,15 @@ zpool_vdev_online(zpool_handle_t *zhp, const char *path, int flags,
zc.zc_cookie = VDEV_STATE_ONLINE;
zc.zc_obj = flags;
- if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_VDEV_SET_STATE, &zc) != 0)
+ if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_VDEV_SET_STATE, &zc) != 0) {
+ if (errno == EINVAL) {
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "was split "
+ "from this pool into a new one. Use '%s' "
+ "instead"), "zpool detach");
+ return (zfs_error(hdl, EZFS_POSTSPLIT_ONLINE, msg));
+ }
return (zpool_standard_error(hdl, errno, msg));
+ }
*newstate = zc.zc_cookie;
return (0);
@@ -2468,6 +2478,257 @@ zpool_vdev_detach(zpool_handle_t *zhp, const char *path)
}
/*
+ * Find a mirror vdev in the source nvlist.
+ *
+ * The mchild array contains a list of disks in one of the top-level mirrors
+ * of the source pool. The schild array contains a list of disks that the
+ * user specified on the command line. We loop over the mchild array to
+ * see if any entry in the schild array matches.
+ *
+ * If a disk in the mchild array is found in the schild array, we return
+ * the index of that entry. Otherwise we return -1.
+ */
+static int
+find_vdev_entry(zpool_handle_t *zhp, nvlist_t **mchild, uint_t mchildren,
+ nvlist_t **schild, uint_t schildren)
+{
+ uint_t mc;
+
+ for (mc = 0; mc < mchildren; mc++) {
+ uint_t sc;
+ char *mpath = zpool_vdev_name(zhp->zpool_hdl, zhp,
+ mchild[mc], B_FALSE);
+
+ for (sc = 0; sc < schildren; sc++) {
+ char *spath = zpool_vdev_name(zhp->zpool_hdl, zhp,
+ schild[sc], B_FALSE);
+ boolean_t result = (strcmp(mpath, spath) == 0);
+
+ free(spath);
+ if (result) {
+ free(mpath);
+ return (mc);
+ }
+ }
+
+ free(mpath);
+ }
+
+ return (-1);
+}
+
+/*
+ * Split a mirror pool. If newroot points to null, then a new nvlist
+ * is generated and it is the responsibility of the caller to free it.
+ */
+int
+zpool_vdev_split(zpool_handle_t *zhp, char *newname, nvlist_t **newroot,
+ nvlist_t *props, splitflags_t flags)
+{
+ zfs_cmd_t zc = { 0 };
+ char msg[1024];
+ nvlist_t *tree, *config, **child, **newchild, *newconfig = NULL;
+ nvlist_t **varray = NULL, *zc_props = NULL;
+ uint_t c, children, newchildren, lastlog = 0, vcount, found = 0;
+ libzfs_handle_t *hdl = zhp->zpool_hdl;
+ uint64_t vers;
+ boolean_t freelist = B_FALSE, memory_err = B_TRUE;
+ int retval = 0;
+
+ (void) snprintf(msg, sizeof (msg),
+ dgettext(TEXT_DOMAIN, "Unable to split %s"), zhp->zpool_name);
+
+ if (!zpool_name_valid(hdl, B_FALSE, newname))
+ return (zfs_error(hdl, EZFS_INVALIDNAME, msg));
+
+ if ((config = zpool_get_config(zhp, NULL)) == NULL) {
+ (void) fprintf(stderr, gettext("Internal error: unable to "
+ "retrieve pool configuration\n"));
+ return (-1);
+ }
+
+ verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &tree)
+ == 0);
+ verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION, &vers) == 0);
+
+ if (props) {
+ if ((zc_props = zpool_valid_proplist(hdl, zhp->zpool_name,
+ props, vers, B_TRUE, msg)) == NULL)
+ return (-1);
+ }
+
+ if (nvlist_lookup_nvlist_array(tree, ZPOOL_CONFIG_CHILDREN, &child,
+ &children) != 0) {
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "Source pool is missing vdev tree"));
+ if (zc_props)
+ nvlist_free(zc_props);
+ return (-1);
+ }
+
+ varray = zfs_alloc(hdl, children * sizeof (nvlist_t *));
+ vcount = 0;
+
+ if (*newroot == NULL ||
+ nvlist_lookup_nvlist_array(*newroot, ZPOOL_CONFIG_CHILDREN,
+ &newchild, &newchildren) != 0)
+ newchildren = 0;
+
+ for (c = 0; c < children; c++) {
+ uint64_t is_log = B_FALSE, is_hole = B_FALSE;
+ char *type;
+ nvlist_t **mchild, *vdev;
+ uint_t mchildren;
+ int entry;
+
+ /*
+ * Unlike cache & spares, slogs are stored in the
+ * ZPOOL_CONFIG_CHILDREN array. We filter them out here.
+ */
+ (void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_LOG,
+ &is_log);
+ (void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_HOLE,
+ &is_hole);
+ if (is_log || is_hole) {
+ /*
+ * Create a hole vdev and put it in the config.
+ */
+ if (nvlist_alloc(&vdev, NV_UNIQUE_NAME, 0) != 0)
+ goto out;
+ if (nvlist_add_string(vdev, ZPOOL_CONFIG_TYPE,
+ VDEV_TYPE_HOLE) != 0)
+ goto out;
+ if (nvlist_add_uint64(vdev, ZPOOL_CONFIG_IS_HOLE,
+ 1) != 0)
+ goto out;
+ if (lastlog == 0)
+ lastlog = vcount;
+ varray[vcount++] = vdev;
+ continue;
+ }
+ lastlog = 0;
+ verify(nvlist_lookup_string(child[c], ZPOOL_CONFIG_TYPE, &type)
+ == 0);
+ if (strcmp(type, VDEV_TYPE_MIRROR) != 0) {
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "Source pool must be composed only of mirrors\n"));
+ retval = zfs_error(hdl, EZFS_INVALCONFIG, msg);
+ goto out;
+ }
+
+ verify(nvlist_lookup_nvlist_array(child[c],
+ ZPOOL_CONFIG_CHILDREN, &mchild, &mchildren) == 0);
+
+ /* find or add an entry for this top-level vdev */
+ if (newchildren > 0 &&
+ (entry = find_vdev_entry(zhp, mchild, mchildren,
+ newchild, newchildren)) >= 0) {
+ /* We found a disk that the user specified. */
+ vdev = mchild[entry];
+ ++found;
+ } else {
+ /* User didn't specify a disk for this vdev. */
+ vdev = mchild[mchildren - 1];
+ }
+
+ if (nvlist_dup(vdev, &varray[vcount++], 0) != 0)
+ goto out;
+ }
+
+ /* did we find every disk the user specified? */
+ if (found != newchildren) {
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "Device list must "
+ "include at most one disk from each mirror"));
+ retval = zfs_error(hdl, EZFS_INVALCONFIG, msg);
+ goto out;
+ }
+
+ /* Prepare the nvlist for populating. */
+ if (*newroot == NULL) {
+ if (nvlist_alloc(newroot, NV_UNIQUE_NAME, 0) != 0)
+ goto out;
+ freelist = B_TRUE;
+ if (nvlist_add_string(*newroot, ZPOOL_CONFIG_TYPE,
+ VDEV_TYPE_ROOT) != 0)
+ goto out;
+ } else {
+ verify(nvlist_remove_all(*newroot, ZPOOL_CONFIG_CHILDREN) == 0);
+ }
+
+ /* Add all the children we found */
+ if (nvlist_add_nvlist_array(*newroot, ZPOOL_CONFIG_CHILDREN, varray,
+ lastlog == 0 ? vcount : lastlog) != 0)
+ goto out;
+
+ /*
+ * If we're just doing a dry run, exit now with success.
+ */
+ if (flags.dryrun) {
+ memory_err = B_FALSE;
+ freelist = B_FALSE;
+ goto out;
+ }
+
+ /* now build up the config list & call the ioctl */
+ if (nvlist_alloc(&newconfig, NV_UNIQUE_NAME, 0) != 0)
+ goto out;
+
+ if (nvlist_add_nvlist(newconfig,
+ ZPOOL_CONFIG_VDEV_TREE, *newroot) != 0 ||
+ nvlist_add_string(newconfig,
+ ZPOOL_CONFIG_POOL_NAME, newname) != 0 ||
+ nvlist_add_uint64(newconfig, ZPOOL_CONFIG_VERSION, vers) != 0)
+ goto out;
+
+ /*
+ * The new pool is automatically part of the namespace unless we
+ * explicitly export it.
+ */
+ if (!flags.import)
+ zc.zc_cookie = ZPOOL_EXPORT_AFTER_SPLIT;
+ (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
+ (void) strlcpy(zc.zc_string, newname, sizeof (zc.zc_string));
+ if (zcmd_write_conf_nvlist(hdl, &zc, newconfig) != 0)
+ goto out;
+ if (zc_props != NULL && zcmd_write_src_nvlist(hdl, &zc, zc_props) != 0)
+ goto out;
+
+ if (zfs_ioctl(hdl, ZFS_IOC_VDEV_SPLIT, &zc) != 0) {
+ retval = zpool_standard_error(hdl, errno, msg);
+ goto out;
+ }
+
+ freelist = B_FALSE;
+ memory_err = B_FALSE;
+
+out:
+ if (varray != NULL) {
+ int v;
+
+ for (v = 0; v < vcount; v++)
+ nvlist_free(varray[v]);
+ free(varray);
+ }
+ zcmd_free_nvlists(&zc);
+ if (zc_props)
+ nvlist_free(zc_props);
+ if (newconfig)
+ nvlist_free(newconfig);
+ if (freelist) {
+ nvlist_free(*newroot);
+ *newroot = NULL;
+ }
+
+ if (retval != 0)
+ return (retval);
+
+ if (memory_err)
+ return (no_memory(hdl));
+
+ return (0);
+}
+
+/*
* Remove the given device. Currently, this is supported only for hot spares
* and level 2 cache devices.
*/
diff --git a/usr/src/lib/libzfs/common/libzfs_util.c b/usr/src/lib/libzfs/common/libzfs_util.c
index f7beab37dd..a400dc9c1e 100644
--- a/usr/src/lib/libzfs/common/libzfs_util.c
+++ b/usr/src/lib/libzfs/common/libzfs_util.c
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -220,6 +220,9 @@ libzfs_error_description(libzfs_handle_t *hdl)
return (dgettext(TEXT_DOMAIN, "pipe create failed"));
case EZFS_THREADCREATEFAILED:
return (dgettext(TEXT_DOMAIN, "thread create failed"));
+ case EZFS_POSTSPLIT_ONLINE:
+ return (dgettext(TEXT_DOMAIN, "disk was split from this pool "
+ "into a new one"));
case EZFS_UNKNOWN:
return (dgettext(TEXT_DOMAIN, "unknown error"));
default:
diff --git a/usr/src/lib/libzfs/common/mapfile-vers b/usr/src/lib/libzfs/common/mapfile-vers
index d3006b09c7..88fb30fb44 100644
--- a/usr/src/lib/libzfs/common/mapfile-vers
+++ b/usr/src/lib/libzfs/common/mapfile-vers
@@ -19,7 +19,7 @@
# CDDL HEADER END
#
#
-# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+# Copyright 2010 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
@@ -216,6 +216,7 @@ SUNWprivate_1.1 {
zpool_vdev_offline;
zpool_vdev_online;
zpool_vdev_remove;
+ zpool_vdev_split;
zprop_free_list;
zprop_get_list;
zprop_iter;
diff --git a/usr/src/uts/common/fs/zfs/spa.c b/usr/src/uts/common/fs/zfs/spa.c
index 4c73d6aa1e..d1e770eb08 100644
--- a/usr/src/uts/common/fs/zfs/spa.c
+++ b/usr/src/uts/common/fs/zfs/spa.c
@@ -20,7 +20,7 @@
*/
/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -113,6 +113,9 @@ const zio_taskq_info_t zio_taskqs[ZIO_TYPES][ZIO_TASKQ_TYPES] = {
static void spa_sync_props(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx);
static boolean_t spa_has_active_shared_spare(spa_t *spa);
+static int spa_load_impl(spa_t *spa, uint64_t, nvlist_t *config,
+ spa_load_state_t state, spa_import_type_t type, boolean_t mosconfig,
+ char **ereport);
uint_t zio_taskq_batch_pct = 100; /* 1 thread per cpu in pset */
id_t zio_taskq_psrset_bind = PS_NONE;
@@ -1313,7 +1316,7 @@ spa_check_logs(spa_t *spa)
case SPA_LOG_UNKNOWN:
if (dmu_objset_find(spa->spa_name, zil_check_log_chain, NULL,
DS_FIND_CHILDREN)) {
- spa->spa_log_state = SPA_LOG_MISSING;
+ spa_set_log_state(spa, SPA_LOG_MISSING);
return (1);
}
break;
@@ -1321,6 +1324,64 @@ spa_check_logs(spa_t *spa)
return (0);
}
+static boolean_t
+spa_passivate_log(spa_t *spa)
+{
+ vdev_t *rvd = spa->spa_root_vdev;
+ boolean_t slog_found = B_FALSE;
+
+ ASSERT(spa_config_held(spa, SCL_ALLOC, RW_WRITER));
+
+ if (!spa_has_slogs(spa))
+ return (B_FALSE);
+
+ for (int c = 0; c < rvd->vdev_children; c++) {
+ vdev_t *tvd = rvd->vdev_child[c];
+ metaslab_group_t *mg = tvd->vdev_mg;
+
+ if (tvd->vdev_islog) {
+ metaslab_group_passivate(mg);
+ slog_found = B_TRUE;
+ }
+ }
+
+ return (slog_found);
+}
+
+static void
+spa_activate_log(spa_t *spa)
+{
+ vdev_t *rvd = spa->spa_root_vdev;
+
+ ASSERT(spa_config_held(spa, SCL_ALLOC, RW_WRITER));
+
+ for (int c = 0; c < rvd->vdev_children; c++) {
+ vdev_t *tvd = rvd->vdev_child[c];
+ metaslab_group_t *mg = tvd->vdev_mg;
+
+ if (tvd->vdev_islog)
+ metaslab_group_activate(mg);
+ }
+}
+
+int
+spa_offline_log(spa_t *spa)
+{
+ int error = 0;
+
+ if ((error = dmu_objset_find(spa_name(spa), zil_vdev_offline,
+ NULL, DS_FIND_CHILDREN)) == 0) {
+
+ /*
+ * We successfully offlined the log device, sync out the
+ * current txg so that the "stubby" block can be removed
+ * by zil_sync().
+ */
+ txg_wait_synced(spa->spa_dsl_pool, 0);
+ }
+ return (error);
+}
+
static void
spa_aux_check_removed(spa_aux_vdev_t *sav)
{
@@ -1424,23 +1485,178 @@ spa_load_verify(spa_t *spa)
}
/*
+ * Find a value in the pool props object.
+ */
+static void
+spa_prop_find(spa_t *spa, zpool_prop_t prop, uint64_t *val)
+{
+ (void) zap_lookup(spa->spa_meta_objset, spa->spa_pool_props_object,
+ zpool_prop_to_name(prop), sizeof (uint64_t), 1, val);
+}
+
+/*
+ * Find a value in the pool directory object.
+ */
+static int
+spa_dir_prop(spa_t *spa, const char *name, uint64_t *val)
+{
+ return (zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
+ name, sizeof (uint64_t), 1, val));
+}
+
+static int
+spa_vdev_err(vdev_t *vdev, vdev_aux_t aux, int err)
+{
+ vdev_set_state(vdev, B_TRUE, VDEV_STATE_CANT_OPEN, aux);
+ return (err);
+}
+
+/*
+ * Fix up config after a partly-completed split. This is done with the
+ * ZPOOL_CONFIG_SPLIT nvlist. Both the splitting pool and the split-off
+ * pool have that entry in their config, but only the splitting one contains
+ * a list of all the guids of the vdevs that are being split off.
+ *
+ * This function determines what to do with that list: either rejoin
+ * all the disks to the pool, or complete the splitting process. To attempt
+ * the rejoin, each disk that is offlined is marked online again, and
+ * we do a reopen() call. If the vdev label for every disk that was
+ * marked online indicates it was successfully split off (VDEV_AUX_SPLIT_POOL)
+ * then we call vdev_split() on each disk, and complete the split.
+ *
+ * Otherwise we leave the config alone, and rejoined to the original pool.
+ */
+static void
+spa_try_repair(spa_t *spa, nvlist_t *config)
+{
+ uint_t extracted;
+ uint64_t *glist;
+ uint_t i, gcount;
+ nvlist_t *nvl;
+ vdev_t **vd;
+ boolean_t attempt_reopen;
+
+ if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_SPLIT, &nvl) != 0)
+ return;
+
+ /* check that the config is complete */
+ if (nvlist_lookup_uint64_array(nvl, ZPOOL_CONFIG_SPLIT_LIST,
+ &glist, &gcount) != 0)
+ return;
+
+ vd = kmem_zalloc(gcount * sizeof (vdev_t *), KM_SLEEP);
+
+ /* attempt to online all the vdevs & validate */
+ attempt_reopen = B_TRUE;
+ for (i = 0; i < gcount; i++) {
+ if (glist[i] == 0) /* vdev is hole */
+ continue;
+
+ vd[i] = spa_lookup_by_guid(spa, glist[i], B_FALSE);
+ if (vd[i] == NULL) {
+ /*
+ * Don't bother attempting to reopen the disks;
+ * just do the split.
+ */
+ attempt_reopen = B_FALSE;
+ } else {
+ /* attempt to re-online it */
+ vd[i]->vdev_offline = B_FALSE;
+ }
+ }
+
+ if (attempt_reopen) {
+ vdev_reopen(spa->spa_root_vdev);
+
+ /* check each device to see what state it's in */
+ for (extracted = 0, i = 0; i < gcount; i++) {
+ if (vd[i] != NULL &&
+ vd[i]->vdev_stat.vs_aux != VDEV_AUX_SPLIT_POOL)
+ break;
+ ++extracted;
+ }
+ }
+
+ /*
+ * If every disk has been moved to the new pool, or if we never
+ * even attempted to look at them, then we split them off for
+ * good.
+ */
+ if (!attempt_reopen || gcount == extracted) {
+ for (i = 0; i < gcount; i++)
+ if (vd[i] != NULL)
+ vdev_split(vd[i]);
+ vdev_reopen(spa->spa_root_vdev);
+ }
+
+ kmem_free(vd, gcount * sizeof (vdev_t *));
+}
+
+static int
+spa_load(spa_t *spa, spa_load_state_t state, spa_import_type_t type,
+ boolean_t mosconfig)
+{
+ nvlist_t *config = spa->spa_config;
+ char *ereport = FM_EREPORT_ZFS_POOL;
+ int error;
+ uint64_t pool_guid;
+ nvlist_t *nvl;
+
+ if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &pool_guid))
+ return (EINVAL);
+
+ /*
+ * Versioning wasn't explicitly added to the label until later, so if
+ * it's not present treat it as the initial version.
+ */
+ if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION,
+ &spa->spa_ubsync.ub_version) != 0)
+ spa->spa_ubsync.ub_version = SPA_VERSION_INITIAL;
+
+ (void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG,
+ &spa->spa_config_txg);
+
+ if ((state == SPA_LOAD_IMPORT || state == SPA_LOAD_TRYIMPORT) &&
+ spa_guid_exists(pool_guid, 0)) {
+ error = EEXIST;
+ } else {
+ spa->spa_load_guid = pool_guid;
+
+ if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_SPLIT,
+ &nvl) == 0) {
+ VERIFY(nvlist_dup(nvl, &spa->spa_config_splitting,
+ KM_SLEEP) == 0);
+ }
+
+ error = spa_load_impl(spa, pool_guid, config, state, type,
+ mosconfig, &ereport);
+ }
+
+ spa->spa_minref = refcount_count(&spa->spa_refcount);
+ if (error && error != EBADF)
+ zfs_ereport_post(ereport, spa, NULL, NULL, 0, 0);
+ spa->spa_load_state = error ? SPA_LOAD_ERROR : SPA_LOAD_NONE;
+ spa->spa_ena = 0;
+
+ return (error);
+}
+
+/*
* Load an existing storage pool, using the pool's builtin spa_config as a
* source of configuration information.
*/
static int
-spa_load(spa_t *spa, spa_load_state_t state, int mosconfig)
+spa_load_impl(spa_t *spa, uint64_t pool_guid, nvlist_t *config,
+ spa_load_state_t state, spa_import_type_t type, boolean_t mosconfig,
+ char **ereport)
{
int error = 0;
nvlist_t *nvconfig, *nvroot = NULL;
vdev_t *rvd;
uberblock_t *ub = &spa->spa_uberblock;
uint64_t config_cache_txg = spa->spa_config_txg;
- uint64_t pool_guid;
- uint64_t version;
- uint64_t autoreplace = 0;
int orig_mode = spa->spa_mode;
- char *ereport = FM_EREPORT_ZFS_POOL;
- nvlist_t *config = spa->spa_config;
+ int parse;
/*
* If this is an untrusted config, access the pool in read-only mode.
@@ -1453,29 +1669,11 @@ spa_load(spa_t *spa, spa_load_state_t state, int mosconfig)
spa->spa_load_state = state;
- if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvroot) ||
- nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &pool_guid)) {
- error = EINVAL;
- goto out;
- }
-
- /*
- * Versioning wasn't explicitly added to the label until later, so if
- * it's not present treat it as the initial version.
- */
- if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION, &version) != 0)
- version = SPA_VERSION_INITIAL;
-
- (void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG,
- &spa->spa_config_txg);
-
- if ((state == SPA_LOAD_IMPORT || state == SPA_LOAD_TRYIMPORT) &&
- spa_guid_exists(pool_guid, 0)) {
- error = EEXIST;
- goto out;
- }
+ if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvroot))
+ return (EINVAL);
- spa->spa_load_guid = pool_guid;
+ parse = (type == SPA_IMPORT_EXISTING ?
+ VDEV_ALLOC_LOAD : VDEV_ALLOC_SPLIT);
/*
* Create "The Godfather" zio to hold all async IOs
@@ -1489,15 +1687,17 @@ spa_load(spa_t *spa, spa_load_state_t state, int mosconfig)
* configuration requires knowing the version number.
*/
spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
- spa->spa_ubsync.ub_version = version;
- error = spa_config_parse(spa, &rvd, nvroot, NULL, 0, VDEV_ALLOC_LOAD);
+ error = spa_config_parse(spa, &rvd, nvroot, NULL, 0, parse);
spa_config_exit(spa, SCL_ALL, FTAG);
if (error != 0)
- goto out;
+ return (error);
ASSERT(spa->spa_root_vdev == rvd);
- ASSERT(spa_guid(spa) == pool_guid);
+
+ if (type != SPA_IMPORT_ASSEMBLE) {
+ ASSERT(spa_guid(spa) == pool_guid);
+ }
/*
* Try to open all vdevs, loading each label in the process.
@@ -1506,26 +1706,31 @@ spa_load(spa_t *spa, spa_load_state_t state, int mosconfig)
error = vdev_open(rvd);
spa_config_exit(spa, SCL_ALL, FTAG);
if (error != 0)
- goto out;
+ return (error);
/*
* We need to validate the vdev labels against the configuration that
* we have in hand, which is dependent on the setting of mosconfig. If
* mosconfig is true then we're validating the vdev labels based on
- * that config. Otherwise, we're validating against the cached config
+ * that config. Otherwise, we're validating against the cached config
* (zpool.cache) that was read when we loaded the zfs module, and then
* later we will recursively call spa_load() and validate against
* the vdev config.
+ *
+ * If we're assembling a new pool that's been split off from an
+ * existing pool, the labels haven't yet been updated so we skip
+ * validation for now.
*/
- spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
- error = vdev_validate(rvd);
- spa_config_exit(spa, SCL_ALL, FTAG);
- if (error != 0)
- goto out;
+ if (type != SPA_IMPORT_ASSEMBLE) {
+ spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
+ error = vdev_validate(rvd);
+ spa_config_exit(spa, SCL_ALL, FTAG);
- if (rvd->vdev_state <= VDEV_STATE_CANT_OPEN) {
- error = ENXIO;
- goto out;
+ if (error != 0)
+ return (error);
+
+ if (rvd->vdev_state <= VDEV_STATE_CANT_OPEN)
+ return (ENXIO);
}
/*
@@ -1536,32 +1741,29 @@ spa_load(spa_t *spa, spa_load_state_t state, int mosconfig)
/*
* If we weren't able to find a single valid uberblock, return failure.
*/
- if (ub->ub_txg == 0) {
- vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN,
- VDEV_AUX_CORRUPT_DATA);
- error = ENXIO;
- goto out;
- }
+ if (ub->ub_txg == 0)
+ return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, ENXIO));
/*
* If the pool is newer than the code, we can't open it.
*/
- if (ub->ub_version > SPA_VERSION) {
- vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN,
- VDEV_AUX_VERSION_NEWER);
- error = ENOTSUP;
- goto out;
- }
+ if (ub->ub_version > SPA_VERSION)
+ return (spa_vdev_err(rvd, VDEV_AUX_VERSION_NEWER, ENOTSUP));
/*
* If the vdev guid sum doesn't match the uberblock, we have an
* incomplete configuration.
*/
- if (rvd->vdev_guid_sum != ub->ub_guid_sum && mosconfig) {
- vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN,
- VDEV_AUX_BAD_GUID_SUM);
- error = ENXIO;
- goto out;
+ if (mosconfig && type != SPA_IMPORT_ASSEMBLE &&
+ rvd->vdev_guid_sum != ub->ub_guid_sum)
+ return (spa_vdev_err(rvd, VDEV_AUX_BAD_GUID_SUM, ENXIO));
+
+ if (type != SPA_IMPORT_ASSEMBLE && spa->spa_config_splitting) {
+ spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
+ spa_try_repair(spa, config);
+ spa_config_exit(spa, SCL_ALL, FTAG);
+ nvlist_free(spa->spa_config_splitting);
+ spa->spa_config_splitting = NULL;
}
/*
@@ -1576,29 +1778,15 @@ spa_load(spa_t *spa, spa_load_state_t state, int mosconfig)
spa->spa_claim_max_txg = spa->spa_first_txg;
error = dsl_pool_open(spa, spa->spa_first_txg, &spa->spa_dsl_pool);
- if (error) {
- vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN,
- VDEV_AUX_CORRUPT_DATA);
- error = EIO;
- goto out;
- }
+ if (error)
+ return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
spa->spa_meta_objset = spa->spa_dsl_pool->dp_meta_objset;
- if (zap_lookup(spa->spa_meta_objset,
- DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_CONFIG,
- sizeof (uint64_t), 1, &spa->spa_config_object) != 0) {
- vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN,
- VDEV_AUX_CORRUPT_DATA);
- error = EIO;
- goto out;
- }
+ if (spa_dir_prop(spa, DMU_POOL_CONFIG, &spa->spa_config_object) != 0)
+ return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
- if (load_nvlist(spa, spa->spa_config_object, &nvconfig) != 0) {
- vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN,
- VDEV_AUX_CORRUPT_DATA);
- error = EIO;
- goto out;
- }
+ if (load_nvlist(spa, spa->spa_config_object, &nvconfig) != 0)
+ return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
if (!mosconfig) {
uint64_t hostid;
@@ -1628,8 +1816,7 @@ spa_load(spa_t *spa, spa_load_state_t state, int mosconfig)
"See: http://www.sun.com/msg/ZFS-8000-EY",
spa_name(spa), hostname,
(unsigned long)hostid);
- error = EBADF;
- goto out;
+ return (EBADF);
}
}
@@ -1638,163 +1825,106 @@ spa_load(spa_t *spa, spa_load_state_t state, int mosconfig)
spa_deactivate(spa);
spa_activate(spa, orig_mode);
- return (spa_load(spa, state, B_TRUE));
+ return (spa_load(spa, state, SPA_IMPORT_EXISTING, B_TRUE));
}
- if (zap_lookup(spa->spa_meta_objset,
- DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_SYNC_BPLIST,
- sizeof (uint64_t), 1, &spa->spa_deferred_bplist_obj) != 0) {
- vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN,
- VDEV_AUX_CORRUPT_DATA);
- error = EIO;
- goto out;
- }
+ if (spa_dir_prop(spa, DMU_POOL_SYNC_BPLIST,
+ &spa->spa_deferred_bplist_obj) != 0)
+ return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
/*
* Load the bit that tells us to use the new accounting function
* (raid-z deflation). If we have an older pool, this will not
* be present.
*/
- error = zap_lookup(spa->spa_meta_objset,
- DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_DEFLATE,
- sizeof (uint64_t), 1, &spa->spa_deflate);
- if (error != 0 && error != ENOENT) {
- vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN,
- VDEV_AUX_CORRUPT_DATA);
- error = EIO;
- goto out;
- }
+ error = spa_dir_prop(spa, DMU_POOL_DEFLATE, &spa->spa_deflate);
+ if (error != 0 && error != ENOENT)
+ return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
/*
* Load the persistent error log. If we have an older pool, this will
* not be present.
*/
- error = zap_lookup(spa->spa_meta_objset,
- DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_ERRLOG_LAST,
- sizeof (uint64_t), 1, &spa->spa_errlog_last);
- if (error != 0 && error != ENOENT) {
- vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN,
- VDEV_AUX_CORRUPT_DATA);
- error = EIO;
- goto out;
- }
+ error = spa_dir_prop(spa, DMU_POOL_ERRLOG_LAST, &spa->spa_errlog_last);
+ if (error != 0 && error != ENOENT)
+ return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
- error = zap_lookup(spa->spa_meta_objset,
- DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_ERRLOG_SCRUB,
- sizeof (uint64_t), 1, &spa->spa_errlog_scrub);
- if (error != 0 && error != ENOENT) {
- vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN,
- VDEV_AUX_CORRUPT_DATA);
- error = EIO;
- goto out;
- }
+ error = spa_dir_prop(spa, DMU_POOL_ERRLOG_SCRUB,
+ &spa->spa_errlog_scrub);
+ if (error != 0 && error != ENOENT)
+ return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
/*
* Load the history object. If we have an older pool, this
* will not be present.
*/
- error = zap_lookup(spa->spa_meta_objset,
- DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_HISTORY,
- sizeof (uint64_t), 1, &spa->spa_history);
- if (error != 0 && error != ENOENT) {
- vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN,
- VDEV_AUX_CORRUPT_DATA);
- error = EIO;
- goto out;
- }
+ error = spa_dir_prop(spa, DMU_POOL_HISTORY, &spa->spa_history);
+ if (error != 0 && error != ENOENT)
+ return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
+
+ /*
+ * If we're assembling the pool from the split-off vdevs of
+ * an existing pool, we don't want to attach the spares & cache
+ * devices.
+ */
/*
* Load any hot spares for this pool.
*/
- error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
- DMU_POOL_SPARES, sizeof (uint64_t), 1, &spa->spa_spares.sav_object);
- if (error != 0 && error != ENOENT) {
- vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN,
- VDEV_AUX_CORRUPT_DATA);
- error = EIO;
- goto out;
- }
- if (error == 0) {
+ error = spa_dir_prop(spa, DMU_POOL_SPARES, &spa->spa_spares.sav_object);
+ if (error != 0 && error != ENOENT)
+ return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
+ if (error == 0 && type != SPA_IMPORT_ASSEMBLE) {
ASSERT(spa_version(spa) >= SPA_VERSION_SPARES);
if (load_nvlist(spa, spa->spa_spares.sav_object,
- &spa->spa_spares.sav_config) != 0) {
- vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN,
- VDEV_AUX_CORRUPT_DATA);
- error = EIO;
- goto out;
- }
+ &spa->spa_spares.sav_config) != 0)
+ return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
spa_load_spares(spa);
spa_config_exit(spa, SCL_ALL, FTAG);
+ } else if (error == 0) {
+ spa->spa_spares.sav_sync = B_TRUE;
}
/*
* Load any level 2 ARC devices for this pool.
*/
- error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
- DMU_POOL_L2CACHE, sizeof (uint64_t), 1,
+ error = spa_dir_prop(spa, DMU_POOL_L2CACHE,
&spa->spa_l2cache.sav_object);
- if (error != 0 && error != ENOENT) {
- vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN,
- VDEV_AUX_CORRUPT_DATA);
- error = EIO;
- goto out;
- }
- if (error == 0) {
+ if (error != 0 && error != ENOENT)
+ return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
+ if (error == 0 && type != SPA_IMPORT_ASSEMBLE) {
ASSERT(spa_version(spa) >= SPA_VERSION_L2CACHE);
if (load_nvlist(spa, spa->spa_l2cache.sav_object,
- &spa->spa_l2cache.sav_config) != 0) {
- vdev_set_state(rvd, B_TRUE,
- VDEV_STATE_CANT_OPEN,
- VDEV_AUX_CORRUPT_DATA);
- error = EIO;
- goto out;
- }
+ &spa->spa_l2cache.sav_config) != 0)
+ return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
spa_load_l2cache(spa);
spa_config_exit(spa, SCL_ALL, FTAG);
+ } else if (error == 0) {
+ spa->spa_l2cache.sav_sync = B_TRUE;
}
spa->spa_delegation = zpool_prop_default_numeric(ZPOOL_PROP_DELEGATION);
- error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
- DMU_POOL_PROPS, sizeof (uint64_t), 1, &spa->spa_pool_props_object);
-
- if (error && error != ENOENT) {
- vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN,
- VDEV_AUX_CORRUPT_DATA);
- error = EIO;
- goto out;
- }
+ error = spa_dir_prop(spa, DMU_POOL_PROPS, &spa->spa_pool_props_object);
+ if (error && error != ENOENT)
+ return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
if (error == 0) {
- (void) zap_lookup(spa->spa_meta_objset,
- spa->spa_pool_props_object,
- zpool_prop_to_name(ZPOOL_PROP_BOOTFS),
- sizeof (uint64_t), 1, &spa->spa_bootfs);
- (void) zap_lookup(spa->spa_meta_objset,
- spa->spa_pool_props_object,
- zpool_prop_to_name(ZPOOL_PROP_AUTOREPLACE),
- sizeof (uint64_t), 1, &autoreplace);
+ uint64_t autoreplace;
+
+ spa_prop_find(spa, ZPOOL_PROP_BOOTFS, &spa->spa_bootfs);
+ spa_prop_find(spa, ZPOOL_PROP_AUTOREPLACE, &autoreplace);
+ spa_prop_find(spa, ZPOOL_PROP_DELEGATION, &spa->spa_delegation);
+ spa_prop_find(spa, ZPOOL_PROP_FAILUREMODE, &spa->spa_failmode);
+ spa_prop_find(spa, ZPOOL_PROP_AUTOEXPAND, &spa->spa_autoexpand);
+ spa_prop_find(spa, ZPOOL_PROP_DEDUPDITTO,
+ &spa->spa_dedup_ditto);
+
spa->spa_autoreplace = (autoreplace != 0);
- (void) zap_lookup(spa->spa_meta_objset,
- spa->spa_pool_props_object,
- zpool_prop_to_name(ZPOOL_PROP_DELEGATION),
- sizeof (uint64_t), 1, &spa->spa_delegation);
- (void) zap_lookup(spa->spa_meta_objset,
- spa->spa_pool_props_object,
- zpool_prop_to_name(ZPOOL_PROP_FAILUREMODE),
- sizeof (uint64_t), 1, &spa->spa_failmode);
- (void) zap_lookup(spa->spa_meta_objset,
- spa->spa_pool_props_object,
- zpool_prop_to_name(ZPOOL_PROP_AUTOEXPAND),
- sizeof (uint64_t), 1, &spa->spa_autoexpand);
- (void) zap_lookup(spa->spa_meta_objset,
- spa->spa_pool_props_object,
- zpool_prop_to_name(ZPOOL_PROP_DEDUPDITTO),
- sizeof (uint64_t), 1, &spa->spa_dedup_ditto);
}
/*
@@ -1833,47 +1963,39 @@ spa_load(spa_t *spa, spa_load_state_t state, int mosconfig)
* Check the state of the root vdev. If it can't be opened, it
* indicates one or more toplevel vdevs are faulted.
*/
- if (rvd->vdev_state <= VDEV_STATE_CANT_OPEN) {
- error = ENXIO;
- goto out;
- }
+ if (rvd->vdev_state <= VDEV_STATE_CANT_OPEN)
+ return (ENXIO);
/*
* Load the DDTs (dedup tables).
*/
error = ddt_load(spa);
- if (error != 0) {
- vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN,
- VDEV_AUX_CORRUPT_DATA);
- error = EIO;
- goto out;
- }
+ if (error != 0)
+ return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
spa_update_dspace(spa);
if (state != SPA_LOAD_TRYIMPORT) {
error = spa_load_verify(spa);
- if (error) {
- vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN,
- VDEV_AUX_CORRUPT_DATA);
- goto out;
- }
+ if (error)
+ return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA,
+ error));
}
/*
- * Load the intent log state and check log integrity.
+ * Load the intent log state and check log integrity. If we're
+ * assembling a pool from a split, the log is not transferred over.
*/
- VERIFY(nvlist_lookup_nvlist(nvconfig, ZPOOL_CONFIG_VDEV_TREE,
- &nvroot) == 0);
- spa_load_log_state(spa, nvroot);
- nvlist_free(nvconfig);
-
- if (spa_check_logs(spa)) {
- vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN,
- VDEV_AUX_BAD_LOG);
- error = ENXIO;
- ereport = FM_EREPORT_ZFS_LOG_REPLAY;
- goto out;
+ if (type != SPA_IMPORT_ASSEMBLE) {
+ VERIFY(nvlist_lookup_nvlist(nvconfig, ZPOOL_CONFIG_VDEV_TREE,
+ &nvroot) == 0);
+ spa_load_log_state(spa, nvroot);
+ nvlist_free(nvconfig);
+
+ if (spa_check_logs(spa)) {
+ *ereport = FM_EREPORT_ZFS_LOG_REPLAY;
+ return (spa_vdev_err(rvd, VDEV_AUX_BAD_LOG, ENXIO));
+ }
}
if (spa_writeable(spa) && (state == SPA_LOAD_RECOVER ||
@@ -1900,7 +2022,7 @@ spa_load(spa_t *spa, spa_load_state_t state, int mosconfig)
spa->spa_claiming = B_FALSE;
- spa->spa_log_state = SPA_LOG_GOOD;
+ spa_set_log_state(spa, SPA_LOG_GOOD);
spa->spa_sync_on = B_TRUE;
txg_sync_start(spa->spa_dsl_pool);
@@ -1954,17 +2076,7 @@ spa_load(spa_t *spa, spa_load_state_t state, int mosconfig)
dsl_pool_clean_tmp_userrefs(spa->spa_dsl_pool);
}
- error = 0;
-out:
-
- spa->spa_minref = refcount_count(&spa->spa_refcount);
- if (error && error != EBADF)
- zfs_ereport_post(ereport, spa, NULL, NULL, 0, 0);
-
- spa->spa_load_state = error ? SPA_LOAD_ERROR : SPA_LOAD_NONE;
- spa->spa_ena = 0;
-
- return (error);
+ return (0);
}
static int
@@ -1978,7 +2090,7 @@ spa_load_retry(spa_t *spa, spa_load_state_t state, int mosconfig)
spa_activate(spa, spa_mode_global);
spa_async_suspend(spa);
- return (spa_load(spa, state, mosconfig));
+ return (spa_load(spa, state, SPA_IMPORT_EXISTING, mosconfig));
}
static int
@@ -1992,12 +2104,13 @@ spa_load_best(spa_t *spa, spa_load_state_t state, int mosconfig,
if (spa->spa_load_txg && state == SPA_LOAD_RECOVER) {
spa->spa_load_max_txg = spa->spa_load_txg;
- spa->spa_log_state = SPA_LOG_CLEAR;
+ spa_set_log_state(spa, SPA_LOG_CLEAR);
} else {
spa->spa_load_max_txg = max_request;
}
- load_error = rewind_error = spa_load(spa, state, mosconfig);
+ load_error = rewind_error = spa_load(spa, state, SPA_IMPORT_EXISTING,
+ mosconfig);
if (load_error == 0)
return (0);
@@ -2015,7 +2128,7 @@ spa_load_best(spa_t *spa, spa_load_state_t state, int mosconfig,
/* Price of rolling back is discarding txgs, including log */
if (state == SPA_LOAD_RECOVER)
- spa->spa_log_state = SPA_LOG_CLEAR;
+ spa_set_log_state(spa, SPA_LOG_CLEAR);
spa->spa_load_max_txg = spa->spa_uberblock.ub_txg;
safe_rollback_txg = spa->spa_uberblock.ub_txg - TXG_DEFER_SIZE;
@@ -3016,7 +3129,7 @@ spa_import(const char *pool, nvlist_t *config, nvlist_t *props)
* If a pool with this name exists, return failure.
*/
mutex_enter(&spa_namespace_lock);
- if ((spa = spa_lookup(pool)) != NULL) {
+ if (spa_lookup(pool) != NULL) {
mutex_exit(&spa_namespace_lock);
return (EEXIST);
}
@@ -3183,7 +3296,7 @@ spa_tryimport(nvlist_t *tryconfig)
* Pass TRUE for mosconfig because the user-supplied config
* is actually the one to trust when doing an import.
*/
- error = spa_load(spa, SPA_LOAD_TRYIMPORT, B_TRUE);
+ error = spa_load(spa, SPA_LOAD_TRYIMPORT, SPA_IMPORT_EXISTING, B_TRUE);
/*
* If 'tryconfig' was at least parsable, return the current config.
@@ -3700,6 +3813,7 @@ spa_vdev_detach(spa_t *spa, uint64_t guid, uint64_t pguid, int replace_done)
boolean_t unspare = B_FALSE;
uint64_t unspare_guid;
size_t len;
+ char *vdpath;
txg = spa_vdev_enter(spa);
@@ -3866,6 +3980,7 @@ spa_vdev_detach(spa_t *spa, uint64_t guid, uint64_t pguid, int replace_done)
* But first make sure we're not on any *other* txg's DTL list, to
* prevent vd from being accessed after it's freed.
*/
+ vdpath = spa_strdup(vd->vdev_path);
for (int t = 0; t < TXG_SIZE; t++)
(void) txg_list_remove_this(&tvd->vdev_dtl_list, vd, t);
vd->vdev_detached = B_TRUE;
@@ -3875,6 +3990,10 @@ spa_vdev_detach(spa_t *spa, uint64_t guid, uint64_t pguid, int replace_done)
error = spa_vdev_exit(spa, vd, txg, 0);
+ spa_history_internal_log(LOG_POOL_VDEV_DETACH, spa, NULL, CRED(),
+ "vdev=%s", vdpath);
+ spa_strfree(vdpath);
+
/*
* If this was the removal of the original device in a hot spare vdev,
* then we want to go through and remove the device from the hot spare
@@ -3901,6 +4020,281 @@ spa_vdev_detach(spa_t *spa, uint64_t guid, uint64_t pguid, int replace_done)
return (error);
}
+/*
+ * Split a set of devices from their mirrors, and create a new pool from them.
+ */
+int
+spa_vdev_split_mirror(spa_t *spa, char *newname, nvlist_t *config,
+ nvlist_t *props, boolean_t exp)
+{
+ int error = 0;
+ uint64_t txg, *glist;
+ spa_t *newspa;
+ uint_t c, children, lastlog;
+ nvlist_t **child, *nvl, *tmp;
+ dmu_tx_t *tx;
+ char *altroot = NULL;
+ vdev_t *rvd, **vml = NULL; /* vdev modify list */
+ boolean_t activate_slog;
+
+ if (!spa_writeable(spa))
+ return (EROFS);
+
+ txg = spa_vdev_enter(spa);
+
+ /* clear the log and flush everything up to now */
+ activate_slog = spa_passivate_log(spa);
+ (void) spa_vdev_config_exit(spa, NULL, txg, 0, FTAG);
+ error = spa_offline_log(spa);
+ txg = spa_vdev_config_enter(spa);
+
+ if (activate_slog)
+ spa_activate_log(spa);
+
+ if (error != 0)
+ return (spa_vdev_exit(spa, NULL, txg, error));
+
+ /* check new spa name before going any further */
+ if (spa_lookup(newname) != NULL)
+ return (spa_vdev_exit(spa, NULL, txg, EEXIST));
+
+ /*
+ * scan through all the children to ensure they're all mirrors
+ */
+ if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvl) != 0 ||
+ nvlist_lookup_nvlist_array(nvl, ZPOOL_CONFIG_CHILDREN, &child,
+ &children) != 0)
+ return (spa_vdev_exit(spa, NULL, txg, EINVAL));
+
+ /* first, check to ensure we've got the right child count */
+ rvd = spa->spa_root_vdev;
+ lastlog = 0;
+ for (c = 0; c < rvd->vdev_children; c++) {
+ vdev_t *vd = rvd->vdev_child[c];
+
+ /* don't count the holes & logs as children */
+ if (vd->vdev_islog || vd->vdev_ishole) {
+ if (lastlog == 0)
+ lastlog = c;
+ continue;
+ }
+
+ lastlog = 0;
+ }
+ if (children != (lastlog != 0 ? lastlog : rvd->vdev_children))
+ return (spa_vdev_exit(spa, NULL, txg, EINVAL));
+
+ /* next, ensure no spare or cache devices are part of the split */
+ if (nvlist_lookup_nvlist(nvl, ZPOOL_CONFIG_SPARES, &tmp) == 0 ||
+ nvlist_lookup_nvlist(nvl, ZPOOL_CONFIG_L2CACHE, &tmp) == 0)
+ return (spa_vdev_exit(spa, NULL, txg, EINVAL));
+
+ vml = kmem_zalloc(children * sizeof (vdev_t *), KM_SLEEP);
+ glist = kmem_zalloc(children * sizeof (uint64_t), KM_SLEEP);
+
+ /* then, loop over each vdev and validate it */
+ for (c = 0; c < children; c++) {
+ uint64_t is_hole = 0;
+
+ (void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_HOLE,
+ &is_hole);
+
+ if (is_hole != 0) {
+ if (spa->spa_root_vdev->vdev_child[c]->vdev_ishole ||
+ spa->spa_root_vdev->vdev_child[c]->vdev_islog) {
+ continue;
+ } else {
+ error = EINVAL;
+ break;
+ }
+ }
+
+ /* which disk is going to be split? */
+ if (nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_GUID,
+ &glist[c]) != 0) {
+ error = EINVAL;
+ break;
+ }
+
+ /* look it up in the spa */
+ vml[c] = spa_lookup_by_guid(spa, glist[c], B_FALSE);
+ if (vml[c] == NULL) {
+ error = ENODEV;
+ break;
+ }
+
+ /* make sure there's nothing stopping the split */
+ if (vml[c]->vdev_parent->vdev_ops != &vdev_mirror_ops ||
+ vml[c]->vdev_islog ||
+ vml[c]->vdev_ishole ||
+ vml[c]->vdev_isspare ||
+ vml[c]->vdev_isl2cache ||
+ !vdev_writeable(vml[c]) ||
+ vml[c]->vdev_state != VDEV_STATE_HEALTHY ||
+ c != spa->spa_root_vdev->vdev_child[c]->vdev_id) {
+ error = EINVAL;
+ break;
+ }
+
+ if (vdev_dtl_required(vml[c])) {
+ error = EBUSY;
+ break;
+ }
+
+ /* we need certain info from the top level */
+ VERIFY(nvlist_add_uint64(child[c], ZPOOL_CONFIG_METASLAB_ARRAY,
+ vml[c]->vdev_top->vdev_ms_array) == 0);
+ VERIFY(nvlist_add_uint64(child[c], ZPOOL_CONFIG_METASLAB_SHIFT,
+ vml[c]->vdev_top->vdev_ms_shift) == 0);
+ VERIFY(nvlist_add_uint64(child[c], ZPOOL_CONFIG_ASIZE,
+ vml[c]->vdev_top->vdev_asize) == 0);
+ VERIFY(nvlist_add_uint64(child[c], ZPOOL_CONFIG_ASHIFT,
+ vml[c]->vdev_top->vdev_ashift) == 0);
+ }
+
+ if (error != 0) {
+ kmem_free(vml, children * sizeof (vdev_t *));
+ kmem_free(glist, children * sizeof (uint64_t));
+ return (spa_vdev_exit(spa, NULL, txg, error));
+ }
+
+ /* stop writers from using the disks */
+ for (c = 0; c < children; c++) {
+ if (vml[c] != NULL)
+ vml[c]->vdev_offline = B_TRUE;
+ }
+ vdev_reopen(spa->spa_root_vdev);
+
+ /*
+ * Temporarily record the splitting vdevs in the spa config. This
+ * will disappear once the config is regenerated.
+ */
+ VERIFY(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP) == 0);
+ VERIFY(nvlist_add_uint64_array(nvl, ZPOOL_CONFIG_SPLIT_LIST,
+ glist, children) == 0);
+ kmem_free(glist, children * sizeof (uint64_t));
+
+ VERIFY(nvlist_add_nvlist(spa->spa_config, ZPOOL_CONFIG_SPLIT,
+ nvl) == 0);
+ spa->spa_config_splitting = nvl;
+ vdev_config_dirty(spa->spa_root_vdev);
+
+ /* configure and create the new pool */
+ VERIFY(nvlist_add_string(config, ZPOOL_CONFIG_POOL_NAME, newname) == 0);
+ VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_STATE,
+ exp ? POOL_STATE_EXPORTED : POOL_STATE_ACTIVE) == 0);
+ VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_VERSION,
+ spa_version(spa)) == 0);
+ VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_TXG,
+ spa->spa_config_txg) == 0);
+ VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_GUID,
+ spa_generate_guid(NULL)) == 0);
+ (void) nvlist_lookup_string(props,
+ zpool_prop_to_name(ZPOOL_PROP_ALTROOT), &altroot);
+
+ newspa = spa_add(newname, config, altroot);
+ mutex_enter(&newspa->spa_vdev_top_lock);
+ newspa->spa_config_txg = spa->spa_config_txg;
+ spa_set_log_state(newspa, SPA_LOG_CLEAR);
+
+ /* release the spa config lock, retaining the namespace lock */
+ spa_vdev_config_exit(spa, NULL, txg, 0, FTAG);
+
+ if (zio_injection_enabled)
+ zio_handle_panic_injection(spa, FTAG, 1);
+
+ spa_activate(newspa, spa_mode_global);
+ spa_async_suspend(newspa);
+
+ /* create the new pool from the disks of the original pool */
+ error = spa_load(newspa, SPA_LOAD_IMPORT, SPA_IMPORT_ASSEMBLE, B_TRUE);
+ if (error)
+ goto out;
+
+ /* if that worked, generate a real config for the new pool */
+ if (newspa->spa_root_vdev != NULL) {
+ VERIFY(nvlist_alloc(&newspa->spa_config_splitting,
+ NV_UNIQUE_NAME, KM_SLEEP) == 0);
+ VERIFY(nvlist_add_uint64(newspa->spa_config_splitting,
+ ZPOOL_CONFIG_SPLIT_GUID, spa_guid(spa)) == 0);
+ spa_config_set(newspa, spa_config_generate(newspa, NULL, -1ULL,
+ B_TRUE));
+ }
+
+ /* set the props */
+ if (props != NULL) {
+ spa_configfile_set(newspa, props, B_FALSE);
+ error = spa_prop_set(newspa, props);
+ if (error)
+ goto out;
+ }
+
+ /* flush everything */
+ txg = spa_vdev_config_enter(newspa);
+ vdev_config_dirty(newspa->spa_root_vdev);
+ spa_config_sync(newspa, B_FALSE, B_TRUE);
+ (void) spa_vdev_config_exit(newspa, NULL, txg, 0, FTAG);
+ mutex_exit(&newspa->spa_vdev_top_lock);
+
+ if (zio_injection_enabled)
+ zio_handle_panic_injection(spa, FTAG, 2);
+
+ spa_async_resume(newspa);
+
+ /* finally, update the original pool's config */
+ txg = spa_vdev_config_enter(spa);
+ tx = dmu_tx_create_dd(spa_get_dsl(spa)->dp_mos_dir);
+ error = dmu_tx_assign(tx, TXG_WAIT);
+ if (error != 0)
+ dmu_tx_abort(tx);
+ for (c = 0; c < children; c++) {
+ if (vml[c] != NULL) {
+ vdev_split(vml[c]);
+ if (error == 0)
+ spa_history_internal_log(LOG_POOL_VDEV_DETACH,
+ spa, tx, CRED(), "vdev=%s",
+ vml[c]->vdev_path);
+ vdev_free(vml[c]);
+ }
+ }
+ vdev_config_dirty(spa->spa_root_vdev);
+ spa->spa_config_splitting = NULL;
+ nvlist_free(nvl);
+ if (error == 0)
+ dmu_tx_commit(tx);
+ (void) spa_vdev_exit(spa, NULL, txg, 0);
+
+ if (zio_injection_enabled)
+ zio_handle_panic_injection(spa, FTAG, 3);
+
+ /* split is complete; log a history record */
+ spa_history_internal_log(LOG_POOL_SPLIT, newspa, NULL, CRED(),
+ "split new pool %s from pool %s", newname, spa_name(spa));
+
+ kmem_free(vml, children * sizeof (vdev_t *));
+
+ /* if we're not going to mount the filesystems in userland, export */
+ if (exp)
+ error = spa_export_common(newname, POOL_STATE_EXPORTED, NULL,
+ B_FALSE, B_FALSE);
+
+ return (error);
+
+out:
+ mutex_exit(&newspa->spa_vdev_top_lock);
+ spa_unload(newspa);
+ spa_deactivate(newspa);
+ spa_remove(newspa);
+
+ txg = spa_vdev_config_enter(spa);
+ nvlist_free(spa->spa_config_splitting);
+ spa->spa_config_splitting = NULL;
+ (void) spa_vdev_exit(spa, NULL, txg, 0);
+
+ kmem_free(vml, children * sizeof (vdev_t *));
+ return (error);
+}
+
static nvlist_t *
spa_nvlist_lookup_by_guid(nvlist_t **nvpp, int count, uint64_t target_guid)
{
diff --git a/usr/src/uts/common/fs/zfs/spa_config.c b/usr/src/uts/common/fs/zfs/spa_config.c
index 176543c173..68a40bec89 100644
--- a/usr/src/uts/common/fs/zfs/spa_config.c
+++ b/usr/src/uts/common/fs/zfs/spa_config.c
@@ -20,7 +20,7 @@
*/
/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -335,6 +335,7 @@ spa_config_generate(spa_t *spa, vdev_t *vd, uint64_t txg, int getstats)
vdev_t *rvd = spa->spa_root_vdev;
unsigned long hostid = 0;
boolean_t locked = B_FALSE;
+ uint64_t split_guid;
if (vd == NULL) {
vd = rvd;
@@ -391,6 +392,14 @@ spa_config_generate(spa_t *spa, vdev_t *vd, uint64_t txg, int getstats)
VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_IS_LOG,
1ULL) == 0);
vd = vd->vdev_top; /* label contains top config */
+ } else {
+ /*
+ * Only add the (potentially large) split information
+ * in the mos config, and not in the vdev labels
+ */
+ if (spa->spa_config_splitting != NULL)
+ VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_SPLIT,
+ spa->spa_config_splitting) == 0);
}
/*
@@ -400,6 +409,16 @@ spa_config_generate(spa_t *spa, vdev_t *vd, uint64_t txg, int getstats)
*/
vdev_top_config_generate(spa, config);
+ /*
+ * If we're splitting, record the original pool's guid.
+ */
+ if (spa->spa_config_splitting != NULL &&
+ nvlist_lookup_uint64(spa->spa_config_splitting,
+ ZPOOL_CONFIG_SPLIT_GUID, &split_guid) == 0) {
+ VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_SPLIT_GUID,
+ split_guid) == 0);
+ }
+
nvroot = vdev_config_generate(spa, vd, getstats, B_FALSE, B_FALSE);
VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, nvroot) == 0);
nvlist_free(nvroot);
diff --git a/usr/src/uts/common/fs/zfs/spa_misc.c b/usr/src/uts/common/fs/zfs/spa_misc.c
index cff272abda..5a48dc6093 100644
--- a/usr/src/uts/common/fs/zfs/spa_misc.c
+++ b/usr/src/uts/common/fs/zfs/spa_misc.c
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -498,6 +498,8 @@ spa_remove(spa_t *spa)
ASSERT(MUTEX_HELD(&spa_namespace_lock));
ASSERT(spa->spa_state == POOL_STATE_UNINITIALIZED);
+ nvlist_free(spa->spa_config_splitting);
+
avl_remove(&spa_namespace_avl, spa);
cv_broadcast(&spa_namespace_cv);
@@ -909,7 +911,7 @@ spa_vdev_config_exit(spa_t *spa, vdev_t *vd, uint64_t txg, int error, char *tag)
* transactionally.
*/
if (zio_injection_enabled)
- zio_handle_panic_injection(spa, tag);
+ zio_handle_panic_injection(spa, tag, 0);
/*
* Note: this txg_wait_synced() is important because it ensures
@@ -1117,6 +1119,22 @@ spa_get_random(uint64_t range)
return (r % range);
}
+uint64_t
+spa_generate_guid(spa_t *spa)
+{
+ uint64_t guid = spa_get_random(-1ULL);
+
+ if (spa != NULL) {
+ while (guid == 0 || spa_guid_exists(spa_guid(spa), guid))
+ guid = spa_get_random(-1ULL);
+ } else {
+ while (guid == 0 || spa_guid_exists(guid, 0))
+ guid = spa_get_random(-1ULL);
+ }
+
+ return (guid);
+}
+
void
sprintf_blkptr(char *buf, const blkptr_t *bp)
{
diff --git a/usr/src/uts/common/fs/zfs/sys/spa.h b/usr/src/uts/common/fs/zfs/sys/spa.h
index c0d5ded3a2..e338b0a0fc 100644
--- a/usr/src/uts/common/fs/zfs/sys/spa.h
+++ b/usr/src/uts/common/fs/zfs/sys/spa.h
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -401,6 +401,11 @@ typedef struct blkptr {
(((BP_GET_LEVEL(bp) > 0) || (dmu_ot[BP_GET_TYPE(bp)].ot_metadata)) ? \
ARC_BUFC_METADATA : ARC_BUFC_DATA);
+typedef enum spa_import_type {
+ SPA_IMPORT_EXISTING,
+ SPA_IMPORT_ASSEMBLE
+} spa_import_type_t;
+
/* state manipulation functions */
extern int spa_open(const char *pool, spa_t **, void *tag);
extern int spa_open_rewind(const char *pool, spa_t **, void *tag,
@@ -440,6 +445,8 @@ extern int spa_vdev_detach(spa_t *spa, uint64_t guid, uint64_t pguid,
extern int spa_vdev_remove(spa_t *spa, uint64_t guid, boolean_t unspare);
extern int spa_vdev_setpath(spa_t *spa, uint64_t guid, const char *newpath);
extern int spa_vdev_setfru(spa_t *spa, uint64_t guid, const char *newfru);
+extern int spa_vdev_split_mirror(spa_t *spa, char *newname, nvlist_t *config,
+ nvlist_t *props, boolean_t exp);
/* spare state (which is global across all pools) */
extern void spa_spare_add(vdev_t *vd);
@@ -537,6 +544,7 @@ typedef enum spa_log_state {
extern spa_log_state_t spa_get_log_state(spa_t *spa);
extern void spa_set_log_state(spa_t *spa, spa_log_state_t state);
+extern int spa_offline_log(spa_t *spa);
/* Log claim callback */
extern void spa_claim_notify(zio_t *zio);
@@ -579,6 +587,7 @@ extern boolean_t spa_guid_exists(uint64_t pool_guid, uint64_t device_guid);
extern char *spa_strdup(const char *);
extern void spa_strfree(char *);
extern uint64_t spa_get_random(uint64_t range);
+extern uint64_t spa_generate_guid(spa_t *spa);
extern void sprintf_blkptr(char *buf, const blkptr_t *bp);
extern void spa_freeze(spa_t *spa);
extern void spa_upgrade(spa_t *spa, uint64_t version);
diff --git a/usr/src/uts/common/fs/zfs/sys/spa_impl.h b/usr/src/uts/common/fs/zfs/sys/spa_impl.h
index a5d21508a3..9df395bf55 100644
--- a/usr/src/uts/common/fs/zfs/sys/spa_impl.h
+++ b/usr/src/uts/common/fs/zfs/sys/spa_impl.h
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -113,6 +113,7 @@ struct spa {
avl_node_t spa_avl; /* node in spa_namespace_avl */
nvlist_t *spa_config; /* last synced config */
nvlist_t *spa_config_syncing; /* currently syncing config */
+ nvlist_t *spa_config_splitting; /* config for splitting */
uint64_t spa_config_txg; /* txg of last config change */
int spa_sync_pass; /* iterate-to-convergence */
pool_state_t spa_state; /* pool state */
@@ -212,6 +213,7 @@ struct spa {
uint64_t spa_did; /* if procp != p0, did of t1 */
boolean_t spa_autoreplace; /* autoreplace set in open */
int spa_vdev_locks; /* locks grabbed */
+
/*
* spa_refcnt & spa_config_lock must be the last elements
* because refcount_t changes size based on compilation options.
diff --git a/usr/src/uts/common/fs/zfs/sys/vdev.h b/usr/src/uts/common/fs/zfs/sys/vdev.h
index 2f9dbef0e0..3bf5ba8042 100644
--- a/usr/src/uts/common/fs/zfs/sys/vdev.h
+++ b/usr/src/uts/common/fs/zfs/sys/vdev.h
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -74,6 +74,8 @@ extern int vdev_metaslab_init(vdev_t *vd, uint64_t txg);
extern void vdev_metaslab_fini(vdev_t *vd);
extern void vdev_metaslab_set_size(vdev_t *);
extern void vdev_expand(vdev_t *vd, uint64_t txg);
+extern void vdev_split(vdev_t *vd);
+
extern void vdev_get_stats(vdev_t *vd, vdev_stat_t *vs);
extern void vdev_clear_stats(vdev_t *vd);
@@ -139,7 +141,8 @@ typedef enum {
VDEV_LABEL_REPLACE, /* replace an existing device */
VDEV_LABEL_SPARE, /* add a new hot spare */
VDEV_LABEL_REMOVE, /* remove an existing device */
- VDEV_LABEL_L2CACHE /* add an L2ARC cache device */
+ VDEV_LABEL_L2CACHE, /* add an L2ARC cache device */
+ VDEV_LABEL_SPLIT /* generating new label for split-off dev */
} vdev_labeltype_t;
extern int vdev_label_init(vdev_t *vd, uint64_t txg, vdev_labeltype_t reason);
diff --git a/usr/src/uts/common/fs/zfs/sys/vdev_impl.h b/usr/src/uts/common/fs/zfs/sys/vdev_impl.h
index 20c828b9e9..f46c08c6fb 100644
--- a/usr/src/uts/common/fs/zfs/sys/vdev_impl.h
+++ b/usr/src/uts/common/fs/zfs/sys/vdev_impl.h
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -112,6 +112,7 @@ struct vdev {
uint64_t vdev_id; /* child number in vdev parent */
uint64_t vdev_guid; /* unique ID for this vdev */
uint64_t vdev_guid_sum; /* self guid + all child guids */
+ uint64_t vdev_orig_guid; /* orig. guid prior to remove */
uint64_t vdev_asize; /* allocatable device capacity */
uint64_t vdev_min_asize; /* min acceptable asize */
uint64_t vdev_ashift; /* block alignment shift */
@@ -174,6 +175,7 @@ struct vdev {
boolean_t vdev_nowritecache; /* true if flushwritecache failed */
boolean_t vdev_checkremove; /* temporary online test */
boolean_t vdev_forcefault; /* force online fault */
+ boolean_t vdev_splitting; /* split or repair in progress */
uint8_t vdev_tmpoffline; /* device taken offline temporarily? */
uint8_t vdev_detached; /* device detached? */
uint8_t vdev_cant_read; /* vdev is failing all reads */
@@ -251,6 +253,7 @@ typedef struct vdev_label {
#define VDEV_ALLOC_SPARE 2
#define VDEV_ALLOC_L2CACHE 3
#define VDEV_ALLOC_ROOTPOOL 4
+#define VDEV_ALLOC_SPLIT 5
/*
* Allocate or free a vdev
diff --git a/usr/src/uts/common/fs/zfs/sys/zfs_ioctl.h b/usr/src/uts/common/fs/zfs/sys/zfs_ioctl.h
index 103e5d3050..90eecb812f 100644
--- a/usr/src/uts/common/fs/zfs/sys/zfs_ioctl.h
+++ b/usr/src/uts/common/fs/zfs/sys/zfs_ioctl.h
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -269,6 +269,8 @@ typedef struct zfs_useracct {
#define ZVOL_MAX_MINOR (1 << 16)
#define ZFS_MIN_MINOR (ZVOL_MAX_MINOR + 1)
+#define ZPOOL_EXPORT_AFTER_SPLIT 0x1
+
#ifdef _KERNEL
typedef struct zfs_creat {
diff --git a/usr/src/uts/common/fs/zfs/sys/zio.h b/usr/src/uts/common/fs/zfs/sys/zio.h
index fb749f8f18..d201278b11 100644
--- a/usr/src/uts/common/fs/zfs/sys/zio.h
+++ b/usr/src/uts/common/fs/zfs/sys/zio.h
@@ -20,7 +20,7 @@
*/
/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -522,7 +522,7 @@ extern int zio_inject_fault(char *name, int flags, int *id,
extern int zio_inject_list_next(int *id, char *name, size_t buflen,
struct zinject_record *record);
extern int zio_clear_fault(int id);
-extern void zio_handle_panic_injection(spa_t *spa, char *tag);
+extern void zio_handle_panic_injection(spa_t *spa, char *tag, uint64_t type);
extern int zio_handle_fault_injection(zio_t *zio, int error);
extern int zio_handle_device_injection(vdev_t *vd, zio_t *zio, int error);
extern int zio_handle_label_injection(zio_t *zio, int error);
diff --git a/usr/src/uts/common/fs/zfs/vdev.c b/usr/src/uts/common/fs/zfs/vdev.c
index 07aebf5347..48082c8bf9 100644
--- a/usr/src/uts/common/fs/zfs/vdev.c
+++ b/usr/src/uts/common/fs/zfs/vdev.c
@@ -20,7 +20,7 @@
*/
/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -300,15 +300,12 @@ vdev_alloc_common(spa_t *spa, uint_t id, uint64_t guid, vdev_ops_t *ops)
* The root vdev's guid will also be the pool guid,
* which must be unique among all pools.
*/
- while (guid == 0 || spa_guid_exists(guid, 0))
- guid = spa_get_random(-1ULL);
+ guid = spa_generate_guid(NULL);
} else {
/*
* Any other vdev's guid must be unique within the pool.
*/
- while (guid == 0 ||
- spa_guid_exists(spa_guid(spa), guid))
- guid = spa_get_random(-1ULL);
+ guid = spa_generate_guid(spa);
}
ASSERT(!spa_guid_exists(spa_guid(spa), guid));
}
@@ -482,7 +479,8 @@ vdev_alloc(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, uint_t id,
/*
* If we're a top-level vdev, try to load the allocation parameters.
*/
- if (parent && !parent->vdev_parent && alloctype == VDEV_ALLOC_LOAD) {
+ if (parent && !parent->vdev_parent &&
+ (alloctype == VDEV_ALLOC_LOAD || alloctype == VDEV_ALLOC_SPLIT)) {
(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_METASLAB_ARRAY,
&vd->vdev_ms_array);
(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_METASLAB_SHIFT,
@@ -494,6 +492,7 @@ vdev_alloc(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, uint_t id,
if (parent && !parent->vdev_parent) {
ASSERT(alloctype == VDEV_ALLOC_LOAD ||
alloctype == VDEV_ALLOC_ADD ||
+ alloctype == VDEV_ALLOC_SPLIT ||
alloctype == VDEV_ALLOC_ROOTPOOL);
vd->vdev_mg = metaslab_group_create(islog ?
spa_log_class(spa) : spa_normal_class(spa), vd);
@@ -778,6 +777,7 @@ vdev_remove_parent(vdev_t *cvd)
*/
if (mvd->vdev_top == mvd) {
uint64_t guid_delta = mvd->vdev_guid - cvd->vdev_guid;
+ cvd->vdev_orig_guid = cvd->vdev_guid;
cvd->vdev_guid += guid_delta;
cvd->vdev_guid_sum += guid_delta;
}
@@ -1282,7 +1282,7 @@ vdev_validate(vdev_t *vd)
{
spa_t *spa = vd->vdev_spa;
nvlist_t *label;
- uint64_t guid, top_guid;
+ uint64_t guid = 0, top_guid;
uint64_t state;
for (int c = 0; c < vd->vdev_children; c++)
@@ -1295,6 +1295,8 @@ vdev_validate(vdev_t *vd)
* overwrite the previous state.
*/
if (vd->vdev_ops->vdev_op_leaf && vdev_readable(vd)) {
+ uint64_t aux_guid = 0;
+ nvlist_t *nvl;
if ((label = vdev_label_read_config(vd)) == NULL) {
vdev_set_state(vd, B_TRUE, VDEV_STATE_CANT_OPEN,
@@ -1302,6 +1304,18 @@ vdev_validate(vdev_t *vd)
return (0);
}
+ /*
+ * Determine if this vdev has been split off into another
+ * pool. If so, then refuse to open it.
+ */
+ if (nvlist_lookup_uint64(label, ZPOOL_CONFIG_SPLIT_GUID,
+ &aux_guid) == 0 && aux_guid == spa_guid(spa)) {
+ vdev_set_state(vd, B_FALSE, VDEV_STATE_CANT_OPEN,
+ VDEV_AUX_SPLIT_POOL);
+ nvlist_free(label);
+ return (0);
+ }
+
if (nvlist_lookup_uint64(label, ZPOOL_CONFIG_POOL_GUID,
&guid) != 0 || guid != spa_guid(spa)) {
vdev_set_state(vd, B_FALSE, VDEV_STATE_CANT_OPEN,
@@ -1310,6 +1324,11 @@ vdev_validate(vdev_t *vd)
return (0);
}
+ if (nvlist_lookup_nvlist(label, ZPOOL_CONFIG_VDEV_TREE, &nvl)
+ != 0 || nvlist_lookup_uint64(nvl, ZPOOL_CONFIG_ORIG_GUID,
+ &aux_guid) != 0)
+ aux_guid = 0;
+
/*
* If this vdev just became a top-level vdev because its
* sibling was detached, it will have adopted the parent's
@@ -1317,12 +1336,16 @@ vdev_validate(vdev_t *vd)
* Fortunately, either version of the label will have the
* same top guid, so if we're a top-level vdev, we can
* safely compare to that instead.
+ *
+ * If we split this vdev off instead, then we also check the
+ * original pool's guid. We don't want to consider the vdev
+ * corrupt if it is partway through a split operation.
*/
if (nvlist_lookup_uint64(label, ZPOOL_CONFIG_GUID,
&guid) != 0 ||
nvlist_lookup_uint64(label, ZPOOL_CONFIG_TOP_GUID,
&top_guid) != 0 ||
- (vd->vdev_guid != guid &&
+ ((vd->vdev_guid != guid && vd->vdev_guid != aux_guid) &&
(vd->vdev_guid != top_guid || vd != vd->vdev_top))) {
vdev_set_state(vd, B_FALSE, VDEV_STATE_CANT_OPEN,
VDEV_AUX_CORRUPT_DATA);
@@ -1372,8 +1395,12 @@ vdev_close(vdev_t *vd)
ASSERT(spa_config_held(spa, SCL_STATE_ALL, RW_WRITER) == SCL_STATE_ALL);
+ /*
+ * If our parent is reopening, then we are as well, unless we are
+ * going offline.
+ */
if (pvd != NULL && pvd->vdev_reopening)
- vd->vdev_reopening = pvd->vdev_reopening;
+ vd->vdev_reopening = (pvd->vdev_reopening && !vd->vdev_offline);
vd->vdev_ops->vdev_op_close(vd);
@@ -1406,7 +1433,8 @@ vdev_reopen(vdev_t *vd)
ASSERT(spa_config_held(spa, SCL_STATE_ALL, RW_WRITER) == SCL_STATE_ALL);
- vd->vdev_reopening = B_TRUE;
+ /* set the reopening flag unless we're taking the vdev offline */
+ vd->vdev_reopening = !vd->vdev_offline;
vdev_close(vd);
(void) vdev_open(vd);
@@ -2133,24 +2161,6 @@ vdev_online(spa_t *spa, uint64_t guid, uint64_t flags, vdev_state_t *newstate)
return (spa_vdev_state_exit(spa, vd, 0));
}
-int
-vdev_offline_log(spa_t *spa)
-{
- int error = 0;
-
- if ((error = dmu_objset_find(spa_name(spa), zil_vdev_offline,
- NULL, DS_FIND_CHILDREN)) == 0) {
-
- /*
- * We successfully offlined the log device, sync out the
- * current txg so that the "stubby" block can be removed
- * by zil_sync().
- */
- txg_wait_synced(spa->spa_dsl_pool, 0);
- }
- return (error);
-}
-
static int
vdev_offline_locked(spa_t *spa, uint64_t guid, uint64_t flags)
{
@@ -2198,7 +2208,7 @@ top:
metaslab_group_passivate(mg);
(void) spa_vdev_state_exit(spa, vd, 0);
- error = vdev_offline_log(spa);
+ error = spa_offline_log(spa);
spa_vdev_state_enter(spa, SCL_ALLOC);
@@ -3035,3 +3045,22 @@ vdev_expand(vdev_t *vd, uint64_t txg)
vdev_config_dirty(vd);
}
}
+
+/*
+ * Split a vdev.
+ */
+void
+vdev_split(vdev_t *vd)
+{
+ vdev_t *cvd, *pvd = vd->vdev_parent;
+
+ vdev_remove_child(pvd, vd);
+ vdev_compact_children(pvd);
+
+ cvd = pvd->vdev_child[0];
+ if (pvd->vdev_children == 1) {
+ vdev_remove_parent(cvd);
+ cvd->vdev_splitting = B_TRUE;
+ }
+ vdev_propagate_state(cvd);
+}
diff --git a/usr/src/uts/common/fs/zfs/vdev_label.c b/usr/src/uts/common/fs/zfs/vdev_label.c
index e8f8e43e02..d11b3df7c6 100644
--- a/usr/src/uts/common/fs/zfs/vdev_label.c
+++ b/usr/src/uts/common/fs/zfs/vdev_label.c
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -354,6 +354,11 @@ vdev_config_generate(spa_t *spa, vdev_t *vd, boolean_t getstats,
if (aux != NULL)
VERIFY(nvlist_add_string(nv, ZPOOL_CONFIG_AUX_STATE,
aux) == 0);
+
+ if (vd->vdev_splitting && vd->vdev_orig_guid != 0LL) {
+ VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_ORIG_GUID,
+ vd->vdev_orig_guid) == 0);
+ }
}
return (nv);
@@ -590,7 +595,7 @@ vdev_label_init(vdev_t *vd, uint64_t crtxg, vdev_labeltype_t reason)
/*
* Determine if the vdev is in use.
*/
- if (reason != VDEV_LABEL_REMOVE &&
+ if (reason != VDEV_LABEL_REMOVE && reason != VDEV_LABEL_SPLIT &&
vdev_inuse(vd, crtxg, reason, &spare_guid, &l2cache_guid))
return (EBUSY);
@@ -616,7 +621,8 @@ vdev_label_init(vdev_t *vd, uint64_t crtxg, vdev_labeltype_t reason)
*/
if (reason == VDEV_LABEL_SPARE)
return (0);
- ASSERT(reason == VDEV_LABEL_REPLACE);
+ ASSERT(reason == VDEV_LABEL_REPLACE ||
+ reason == VDEV_LABEL_SPLIT);
}
if (reason != VDEV_LABEL_REMOVE && reason != VDEV_LABEL_SPARE &&
@@ -681,7 +687,11 @@ vdev_label_init(vdev_t *vd, uint64_t crtxg, vdev_labeltype_t reason)
VERIFY(nvlist_add_uint64(label, ZPOOL_CONFIG_GUID,
vd->vdev_guid) == 0);
} else {
- label = spa_config_generate(spa, vd, 0ULL, B_FALSE);
+ uint64_t txg = 0ULL;
+
+ if (reason == VDEV_LABEL_SPLIT)
+ txg = spa->spa_uberblock.ub_txg;
+ label = spa_config_generate(spa, vd, txg, B_FALSE);
/*
* Add our creation time. This allows us to detect multiple
diff --git a/usr/src/uts/common/fs/zfs/zfs_ioctl.c b/usr/src/uts/common/fs/zfs/zfs_ioctl.c
index 5500b27e2d..14e479463d 100644
--- a/usr/src/uts/common/fs/zfs/zfs_ioctl.c
+++ b/usr/src/uts/common/fs/zfs/zfs_ioctl.c
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -1397,6 +1397,7 @@ zfs_ioc_vdev_add(zfs_cmd_t *zc)
* l2cache and spare devices are ok to be added to a rootpool.
*/
if (spa_bootfs(spa) != 0 && nl2cache == 0 && nspares == 0) {
+ nvlist_free(config);
spa_close(spa, FTAG);
return (EDOM);
}
@@ -1502,6 +1503,41 @@ zfs_ioc_vdev_detach(zfs_cmd_t *zc)
}
static int
+zfs_ioc_vdev_split(zfs_cmd_t *zc)
+{
+ spa_t *spa;
+ nvlist_t *config, *props = NULL;
+ int error;
+ boolean_t exp = !!(zc->zc_cookie & ZPOOL_EXPORT_AFTER_SPLIT);
+
+ if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
+ return (error);
+
+ if (error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
+ zc->zc_iflags, &config)) {
+ spa_close(spa, FTAG);
+ return (error);
+ }
+
+ if (zc->zc_nvlist_src_size != 0 && (error =
+ get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
+ zc->zc_iflags, &props))) {
+ spa_close(spa, FTAG);
+ nvlist_free(config);
+ return (error);
+ }
+
+ error = spa_vdev_split_mirror(spa, zc->zc_string, config, props, exp);
+
+ spa_close(spa, FTAG);
+
+ nvlist_free(config);
+ nvlist_free(props);
+
+ return (error);
+}
+
+static int
zfs_ioc_vdev_setpath(zfs_cmd_t *zc)
{
spa_t *spa;
@@ -3839,7 +3875,7 @@ static int
zfs_ioc_userspace_upgrade(zfs_cmd_t *zc)
{
objset_t *os;
- int error;
+ int error = 0;
zfsvfs_t *zfsvfs;
if (getzfsvfs(zc->zc_name, &zfsvfs) == 0) {
@@ -4131,6 +4167,7 @@ zfs_ioc_smb_acl(zfs_cmd_t *zc)
VN_RELE(vp);
VN_RELE(ZTOV(sharedir));
ZFS_EXIT(zfsvfs);
+ nvlist_free(nvlist);
return (error);
}
error = VOP_RENAME(ZTOV(sharedir), src, ZTOV(sharedir), target,
@@ -4324,7 +4361,9 @@ static zfs_ioc_vec_t zfs_ioc_vec[] = {
{ zfs_ioc_get_holds, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
B_TRUE },
{ zfs_ioc_objset_recvd_props, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
- B_FALSE }
+ B_FALSE },
+ { zfs_ioc_vdev_split, zfs_secpolicy_config, POOL_NAME, B_TRUE,
+ B_TRUE }
};
int
diff --git a/usr/src/uts/common/fs/zfs/zio_inject.c b/usr/src/uts/common/fs/zfs/zio_inject.c
index e8f8f7b723..fa040ea4b3 100644
--- a/usr/src/uts/common/fs/zfs/zio_inject.c
+++ b/usr/src/uts/common/fs/zfs/zio_inject.c
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -101,7 +101,7 @@ zio_match_handler(zbookmark_t *zb, uint64_t type,
* specified by tag.
*/
void
-zio_handle_panic_injection(spa_t *spa, char *tag)
+zio_handle_panic_injection(spa_t *spa, char *tag, uint64_t type)
{
inject_handler_t *handler;
@@ -113,7 +113,8 @@ zio_handle_panic_injection(spa_t *spa, char *tag)
if (spa != handler->zi_spa)
continue;
- if (strcmp(tag, handler->zi_record.zi_func) == 0)
+ if (handler->zi_record.zi_type == type &&
+ strcmp(tag, handler->zi_record.zi_func) == 0)
panic("Panic requested in function %s\n", tag);
}
diff --git a/usr/src/uts/common/fs/zfs/zvol.c b/usr/src/uts/common/fs/zfs/zvol.c
index 3419d708c4..3733e495ec 100644
--- a/usr/src/uts/common/fs/zfs/zvol.c
+++ b/usr/src/uts/common/fs/zfs/zvol.c
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -456,7 +456,7 @@ zvol_create_minor(const char *name)
mutex_enter(&zvol_state_lock);
- if ((zv = zvol_minor_lookup(name)) != NULL) {
+ if (zvol_minor_lookup(name) != NULL) {
mutex_exit(&zvol_state_lock);
return (EEXIST);
}
diff --git a/usr/src/uts/common/sys/fs/zfs.h b/usr/src/uts/common/sys/fs/zfs.h
index 800f790453..3889fd9b12 100644
--- a/usr/src/uts/common/sys/fs/zfs.h
+++ b/usr/src/uts/common/sys/fs/zfs.h
@@ -20,7 +20,7 @@
*/
/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -454,6 +454,10 @@ typedef struct zpool_rewind_policy {
#define ZPOOL_CONFIG_DDT_HISTOGRAM "ddt_histogram"
#define ZPOOL_CONFIG_DDT_OBJ_STATS "ddt_object_stats"
#define ZPOOL_CONFIG_DDT_STATS "ddt_stats"
+#define ZPOOL_CONFIG_SPLIT "splitcfg"
+#define ZPOOL_CONFIG_ORIG_GUID "orig_guid"
+#define ZPOOL_CONFIG_SPLIT_GUID "split_guid"
+#define ZPOOL_CONFIG_SPLIT_LIST "guid_list"
#define ZPOOL_CONFIG_SUSPENDED "suspended" /* not stored on disk */
#define ZPOOL_CONFIG_TIMESTAMP "timestamp" /* not stored on disk */
#define ZPOOL_CONFIG_BOOTFS "bootfs" /* not stored on disk */
@@ -539,7 +543,8 @@ typedef enum vdev_aux {
VDEV_AUX_ERR_EXCEEDED, /* too many errors */
VDEV_AUX_IO_FAILURE, /* experienced I/O failure */
VDEV_AUX_BAD_LOG, /* cannot read log chain(s) */
- VDEV_AUX_EXTERNAL /* external diagnosis */
+ VDEV_AUX_EXTERNAL, /* external diagnosis */
+ VDEV_AUX_SPLIT_POOL /* vdev was split off into another pool */
} vdev_aux_t;
/*
@@ -707,7 +712,8 @@ typedef enum zfs_ioc {
ZFS_IOC_HOLD,
ZFS_IOC_RELEASE,
ZFS_IOC_GET_HOLDS,
- ZFS_IOC_OBJSET_RECVD_PROPS
+ ZFS_IOC_OBJSET_RECVD_PROPS,
+ ZFS_IOC_VDEV_SPLIT
} zfs_ioc_t;
/*
@@ -825,6 +831,7 @@ typedef enum history_internal_events {
LOG_POOL_SCRUB_DONE,
LOG_DS_USER_HOLD,
LOG_DS_USER_RELEASE,
+ LOG_POOL_SPLIT,
LOG_END
} history_internal_events_t;