summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--usr/src/cmd/zdb/zdb.c7
-rw-r--r--usr/src/cmd/zpool/zpool_main.c23
-rw-r--r--usr/src/cmd/ztest/ztest.c6
-rw-r--r--usr/src/lib/libzfs/common/libzfs.h2
-rw-r--r--usr/src/lib/libzfs/common/libzfs_pool.c136
-rw-r--r--usr/src/lib/libzfs/common/libzfs_util.c2
-rw-r--r--usr/src/uts/common/fs/zfs/spa.c290
-rw-r--r--usr/src/uts/common/fs/zfs/spa_config.c23
-rw-r--r--usr/src/uts/common/fs/zfs/spa_misc.c8
-rw-r--r--usr/src/uts/common/fs/zfs/sys/spa.h5
-rw-r--r--usr/src/uts/common/fs/zfs/sys/spa_impl.h3
-rw-r--r--usr/src/uts/common/fs/zfs/sys/vdev_impl.h1
-rw-r--r--usr/src/uts/common/fs/zfs/vdev.c48
-rw-r--r--usr/src/uts/common/fs/zfs/zfs_ioctl.c17
-rw-r--r--usr/src/uts/common/fs/zfs/zil.c26
-rw-r--r--usr/src/uts/common/sys/fs/zfs.h10
16 files changed, 419 insertions, 188 deletions
diff --git a/usr/src/cmd/zdb/zdb.c b/usr/src/cmd/zdb/zdb.c
index 56686fe30a..c6e219df9e 100644
--- a/usr/src/cmd/zdb/zdb.c
+++ b/usr/src/cmd/zdb/zdb.c
@@ -3076,8 +3076,11 @@ main(int argc, char **argv)
fatal("can't open '%s': %s",
target, strerror(ENOMEM));
}
- if ((error = spa_import(name, cfg, NULL)) != 0)
- error = spa_import_verbatim(name, cfg, NULL);
+ if ((error = spa_import(name, cfg, NULL,
+ ZFS_IMPORT_MISSING_LOG)) != 0) {
+ error = spa_import(name, cfg, NULL,
+ ZFS_IMPORT_VERBATIM);
+ }
}
}
diff --git a/usr/src/cmd/zpool/zpool_main.c b/usr/src/cmd/zpool/zpool_main.c
index fcbbbcd671..1c2565ce72 100644
--- a/usr/src/cmd/zpool/zpool_main.c
+++ b/usr/src/cmd/zpool/zpool_main.c
@@ -1499,7 +1499,7 @@ show_import(nvlist_t *config)
*/
static int
do_import(nvlist_t *config, const char *newname, const char *mntopts,
- int force, nvlist_t *props, boolean_t do_verbatim)
+ nvlist_t *props, int flags)
{
zpool_handle_t *zhp;
char *name;
@@ -1517,7 +1517,8 @@ do_import(nvlist_t *config, const char *newname, const char *mntopts,
(void) fprintf(stderr, gettext("cannot import '%s': pool "
"is formatted using a newer ZFS version\n"), name);
return (1);
- } else if (state != POOL_STATE_EXPORTED && !force) {
+ } else if (state != POOL_STATE_EXPORTED &&
+ !(flags & ZFS_IMPORT_ANY_HOST)) {
uint64_t hostid;
if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_HOSTID,
@@ -1551,7 +1552,7 @@ do_import(nvlist_t *config, const char *newname, const char *mntopts,
}
}
- if (zpool_import_props(g_zfs, config, newname, props, do_verbatim) != 0)
+ if (zpool_import_props(g_zfs, config, newname, props, flags) != 0)
return (1);
if (newname != NULL)
@@ -1620,7 +1621,6 @@ zpool_do_import(int argc, char **argv)
boolean_t do_all = B_FALSE;
boolean_t do_destroyed = B_FALSE;
char *mntopts = NULL;
- boolean_t do_force = B_FALSE;
nvpair_t *elem;
nvlist_t *config;
uint64_t searchguid = 0;
@@ -1630,7 +1630,7 @@ zpool_do_import(int argc, char **argv)
nvlist_t *policy = NULL;
nvlist_t *props = NULL;
boolean_t first;
- boolean_t do_verbatim = B_FALSE;
+ int flags = ZFS_IMPORT_NORMAL;
uint32_t rewind_policy = ZPOOL_NO_REWIND;
boolean_t dryrun = B_FALSE;
boolean_t do_rewind = B_FALSE;
@@ -1640,7 +1640,7 @@ zpool_do_import(int argc, char **argv)
importargs_t idata = { 0 };
/* check options */
- while ((c = getopt(argc, argv, ":aCc:d:DEfFno:rR:VX")) != -1) {
+ while ((c = getopt(argc, argv, ":aCc:d:DEfFmno:rR:VX")) != -1) {
switch (c) {
case 'a':
do_all = B_TRUE;
@@ -1665,11 +1665,14 @@ zpool_do_import(int argc, char **argv)
do_destroyed = B_TRUE;
break;
case 'f':
- do_force = B_TRUE;
+ flags |= ZFS_IMPORT_ANY_HOST;
break;
case 'F':
do_rewind = B_TRUE;
break;
+ case 'm':
+ flags |= ZFS_IMPORT_MISSING_LOG;
+ break;
case 'n':
dryrun = B_TRUE;
break;
@@ -1697,7 +1700,7 @@ zpool_do_import(int argc, char **argv)
goto error;
break;
case 'V':
- do_verbatim = B_TRUE;
+ flags |= ZFS_IMPORT_VERBATIM;
break;
case 'X':
xtreme_rewind = B_TRUE;
@@ -1869,7 +1872,7 @@ zpool_do_import(int argc, char **argv)
if (do_all) {
err |= do_import(config, NULL, mntopts,
- do_force, props, do_verbatim);
+ props, flags);
} else {
show_import(config);
}
@@ -1918,7 +1921,7 @@ zpool_do_import(int argc, char **argv)
err = B_TRUE;
} else {
err |= do_import(found_config, argc == 1 ? NULL :
- argv[1], mntopts, do_force, props, do_verbatim);
+ argv[1], mntopts, props, flags);
}
}
diff --git a/usr/src/cmd/ztest/ztest.c b/usr/src/cmd/ztest/ztest.c
index 2ee44f150d..3ca0280c97 100644
--- a/usr/src/cmd/ztest/ztest.c
+++ b/usr/src/cmd/ztest/ztest.c
@@ -4847,19 +4847,19 @@ ztest_spa_import_export(char *oldname, char *newname)
/*
* Import it under the new name.
*/
- VERIFY3U(0, ==, spa_import(newname, config, NULL));
+ VERIFY3U(0, ==, spa_import(newname, config, NULL, 0));
ztest_walk_pool_directory("pools after import");
/*
* Try to import it again -- should fail with EEXIST.
*/
- VERIFY3U(EEXIST, ==, spa_import(newname, config, NULL));
+ VERIFY3U(EEXIST, ==, spa_import(newname, config, NULL, 0));
/*
* Try to import it under a different name -- should fail with EEXIST.
*/
- VERIFY3U(EEXIST, ==, spa_import(oldname, config, NULL));
+ VERIFY3U(EEXIST, ==, spa_import(oldname, config, NULL, 0));
/*
* Verify that the pool is no longer visible under the old name.
diff --git a/usr/src/lib/libzfs/common/libzfs.h b/usr/src/lib/libzfs/common/libzfs.h
index 5df5d9d1f7..2adbb58c9c 100644
--- a/usr/src/lib/libzfs/common/libzfs.h
+++ b/usr/src/lib/libzfs/common/libzfs.h
@@ -326,7 +326,7 @@ extern int zpool_export_force(zpool_handle_t *);
extern int zpool_import(libzfs_handle_t *, nvlist_t *, const char *,
char *altroot);
extern int zpool_import_props(libzfs_handle_t *, nvlist_t *, const char *,
- nvlist_t *, boolean_t);
+ nvlist_t *, int);
/*
* Search for pools to import
diff --git a/usr/src/lib/libzfs/common/libzfs_pool.c b/usr/src/lib/libzfs/common/libzfs_pool.c
index 158e20712d..6c3a0e0728 100644
--- a/usr/src/lib/libzfs/common/libzfs_pool.c
+++ b/usr/src/lib/libzfs/common/libzfs_pool.c
@@ -1202,19 +1202,23 @@ zpool_export_force(zpool_handle_t *zhp)
static void
zpool_rewind_exclaim(libzfs_handle_t *hdl, const char *name, boolean_t dryrun,
- nvlist_t *rbi)
+ nvlist_t *config)
{
+ nvlist_t *nv = NULL;
uint64_t rewindto;
int64_t loss = -1;
struct tm t;
char timestr[128];
- if (!hdl->libzfs_printerr || rbi == NULL)
+ if (!hdl->libzfs_printerr || config == NULL)
+ return;
+
+ if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_LOAD_INFO, &nv) != 0)
return;
- if (nvlist_lookup_uint64(rbi, ZPOOL_CONFIG_LOAD_TIME, &rewindto) != 0)
+ if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_LOAD_TIME, &rewindto) != 0)
return;
- (void) nvlist_lookup_int64(rbi, ZPOOL_CONFIG_REWIND_TIME, &loss);
+ (void) nvlist_lookup_int64(nv, ZPOOL_CONFIG_REWIND_TIME, &loss);
if (localtime_r((time_t *)&rewindto, &t) != NULL &&
strftime(timestr, 128, 0, &t) != 0) {
@@ -1249,6 +1253,7 @@ void
zpool_explain_recover(libzfs_handle_t *hdl, const char *name, int reason,
nvlist_t *config)
{
+ nvlist_t *nv = NULL;
int64_t loss = -1;
uint64_t edata = UINT64_MAX;
uint64_t rewindto;
@@ -1264,12 +1269,12 @@ zpool_explain_recover(libzfs_handle_t *hdl, const char *name, int reason,
(void) printf(dgettext(TEXT_DOMAIN, "\t"));
/* All attempted rewinds failed if ZPOOL_CONFIG_LOAD_TIME missing */
- if (nvlist_lookup_uint64(config,
- ZPOOL_CONFIG_LOAD_TIME, &rewindto) != 0)
+ if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_LOAD_INFO, &nv) != 0 ||
+ nvlist_lookup_uint64(nv, ZPOOL_CONFIG_LOAD_TIME, &rewindto) != 0)
goto no_info;
- (void) nvlist_lookup_int64(config, ZPOOL_CONFIG_REWIND_TIME, &loss);
- (void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_LOAD_DATA_ERRORS,
+ (void) nvlist_lookup_int64(nv, ZPOOL_CONFIG_REWIND_TIME, &loss);
+ (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_LOAD_DATA_ERRORS,
&edata);
(void) printf(dgettext(TEXT_DOMAIN,
@@ -1353,12 +1358,40 @@ zpool_import(libzfs_handle_t *hdl, nvlist_t *config, const char *newname,
}
}
- ret = zpool_import_props(hdl, config, newname, props, B_FALSE);
+ ret = zpool_import_props(hdl, config, newname, props,
+ ZFS_IMPORT_NORMAL);
if (props)
nvlist_free(props);
return (ret);
}
+static void
+print_vdev_tree(libzfs_handle_t *hdl, const char *name, nvlist_t *nv,
+ int indent)
+{
+ nvlist_t **child;
+ uint_t c, children;
+ char *vname;
+ uint64_t is_log = 0;
+
+ (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_IS_LOG,
+ &is_log);
+
+ if (name != NULL)
+ (void) printf("\t%*s%s%s\n", indent, "", name,
+ is_log ? " [log]" : "");
+
+ if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
+ &child, &children) != 0)
+ return;
+
+ for (c = 0; c < children; c++) {
+ vname = zpool_vdev_name(hdl, NULL, child[c], B_TRUE);
+ print_vdev_tree(hdl, vname, child[c], indent + 2);
+ free(vname);
+ }
+}
+
/*
* Import the given pool using the known configuration and a list of
* properties to be set. The configuration should have come from
@@ -1367,15 +1400,17 @@ zpool_import(libzfs_handle_t *hdl, nvlist_t *config, const char *newname,
*/
int
zpool_import_props(libzfs_handle_t *hdl, nvlist_t *config, const char *newname,
- nvlist_t *props, boolean_t importfaulted)
+ nvlist_t *props, int flags)
{
zfs_cmd_t zc = { 0 };
zpool_rewind_policy_t policy;
- nvlist_t *nvi = NULL;
+ nvlist_t *nv = NULL;
+ nvlist_t *nvinfo = NULL;
+ nvlist_t *missing = NULL;
char *thename;
char *origname;
- uint64_t returned_size;
int ret;
+ int error = 0;
char errbuf[1024];
verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME,
@@ -1418,27 +1453,36 @@ zpool_import_props(libzfs_handle_t *hdl, nvlist_t *config, const char *newname,
nvlist_free(props);
return (-1);
}
- returned_size = zc.zc_nvlist_conf_size + 512;
- if (zcmd_alloc_dst_nvlist(hdl, &zc, returned_size) != 0) {
+ if (zcmd_alloc_dst_nvlist(hdl, &zc, 0) != 0) {
nvlist_free(props);
return (-1);
}
- zc.zc_cookie = (uint64_t)importfaulted;
- ret = 0;
- if (zfs_ioctl(hdl, ZFS_IOC_POOL_IMPORT, &zc) != 0) {
+ zc.zc_cookie = flags;
+ while ((ret = zfs_ioctl(hdl, ZFS_IOC_POOL_IMPORT, &zc)) != 0 &&
+ errno == ENOMEM) {
+ if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) {
+ zcmd_free_nvlists(&zc);
+ return (-1);
+ }
+ }
+ if (ret != 0)
+ error = errno;
+
+ (void) zcmd_read_dst_nvlist(hdl, &zc, &nv);
+ zpool_get_rewind_policy(config, &policy);
+
+ if (error) {
char desc[1024];
- (void) zcmd_read_dst_nvlist(hdl, &zc, &nvi);
- zpool_get_rewind_policy(config, &policy);
/*
* Dry-run failed, but we print out what success
* looks like if we found a best txg
*/
- if ((policy.zrp_request & ZPOOL_TRY_REWIND) && nvi) {
+ if (policy.zrp_request & ZPOOL_TRY_REWIND) {
zpool_rewind_exclaim(hdl, newname ? origname : thename,
- B_TRUE, nvi);
- nvlist_free(nvi);
+ B_TRUE, nv);
+ nvlist_free(nv);
return (-1);
}
@@ -1451,7 +1495,7 @@ zpool_import_props(libzfs_handle_t *hdl, nvlist_t *config, const char *newname,
dgettext(TEXT_DOMAIN, "cannot import '%s' as '%s'"),
origname, thename);
- switch (errno) {
+ switch (error) {
case ENOTSUP:
/*
* Unsupported version.
@@ -1469,15 +1513,32 @@ zpool_import_props(libzfs_handle_t *hdl, nvlist_t *config, const char *newname,
(void) zfs_error(hdl, EZFS_BADDEV, desc);
break;
+ case ENXIO:
+ if (nv && nvlist_lookup_nvlist(nv,
+ ZPOOL_CONFIG_LOAD_INFO, &nvinfo) == 0 &&
+ nvlist_lookup_nvlist(nvinfo,
+ ZPOOL_CONFIG_MISSING_DEVICES, &missing) == 0) {
+ (void) printf(dgettext(TEXT_DOMAIN,
+ "The devices below are missing, use "
+ "'-m' to import the pool anyway:\n"));
+ print_vdev_tree(hdl, NULL, missing, 2);
+ (void) printf("\n");
+ }
+ (void) zpool_standard_error(hdl, error, desc);
+ break;
+
+ case EEXIST:
+ (void) zpool_standard_error(hdl, error, desc);
+ break;
+
default:
- (void) zcmd_read_dst_nvlist(hdl, &zc, &nvi);
- (void) zpool_standard_error(hdl, errno, desc);
+ (void) zpool_standard_error(hdl, error, desc);
zpool_explain_recover(hdl,
- newname ? origname : thename, -errno, nvi);
- nvlist_free(nvi);
+ newname ? origname : thename, -error, nv);
break;
}
+ nvlist_free(nv);
ret = -1;
} else {
zpool_handle_t *zhp;
@@ -1489,15 +1550,12 @@ zpool_import_props(libzfs_handle_t *hdl, nvlist_t *config, const char *newname,
ret = -1;
else if (zhp != NULL)
zpool_close(zhp);
- (void) zcmd_read_dst_nvlist(hdl, &zc, &nvi);
- zpool_get_rewind_policy(config, &policy);
if (policy.zrp_request &
(ZPOOL_DO_REWIND | ZPOOL_TRY_REWIND)) {
zpool_rewind_exclaim(hdl, newname ? origname : thename,
- ((policy.zrp_request & ZPOOL_TRY_REWIND) != 0),
- nvi);
+ ((policy.zrp_request & ZPOOL_TRY_REWIND) != 0), nv);
}
- nvlist_free(nvi);
+ nvlist_free(nv);
return (0);
}
@@ -2816,6 +2874,7 @@ zpool_clear(zpool_handle_t *zhp, const char *path, nvlist_t *rewindnvl)
boolean_t avail_spare, l2cache;
libzfs_handle_t *hdl = zhp->zpool_hdl;
nvlist_t *nvi = NULL;
+ int error;
if (path)
(void) snprintf(msg, sizeof (msg),
@@ -2846,14 +2905,21 @@ zpool_clear(zpool_handle_t *zhp, const char *path, nvlist_t *rewindnvl)
zpool_get_rewind_policy(rewindnvl, &policy);
zc.zc_cookie = policy.zrp_request;
- if (zcmd_alloc_dst_nvlist(hdl, &zc, 8192) != 0)
+ if (zcmd_alloc_dst_nvlist(hdl, &zc, 0) != 0)
return (-1);
if (zcmd_write_src_nvlist(zhp->zpool_hdl, &zc, rewindnvl) != 0)
return (-1);
- if (zfs_ioctl(hdl, ZFS_IOC_CLEAR, &zc) == 0 ||
- ((policy.zrp_request & ZPOOL_TRY_REWIND) &&
+ while ((error = zfs_ioctl(hdl, ZFS_IOC_CLEAR, &zc)) != 0 &&
+ errno == ENOMEM) {
+ if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) {
+ zcmd_free_nvlists(&zc);
+ return (-1);
+ }
+ }
+
+ if (!error || ((policy.zrp_request & ZPOOL_TRY_REWIND) &&
errno != EPERM && errno != EACCES)) {
if (policy.zrp_request &
(ZPOOL_DO_REWIND | ZPOOL_TRY_REWIND)) {
diff --git a/usr/src/lib/libzfs/common/libzfs_util.c b/usr/src/lib/libzfs/common/libzfs_util.c
index 2e73f76ea5..e3de88815c 100644
--- a/usr/src/lib/libzfs/common/libzfs_util.c
+++ b/usr/src/lib/libzfs/common/libzfs_util.c
@@ -692,7 +692,7 @@ int
zcmd_alloc_dst_nvlist(libzfs_handle_t *hdl, zfs_cmd_t *zc, size_t len)
{
if (len == 0)
- len = 4*1024;
+ len = 16 * 1024;
zc->zc_nvlist_dst_size = len;
if ((zc->zc_nvlist_dst = (uint64_t)(uintptr_t)
zfs_alloc(hdl, zc->zc_nvlist_dst_size)) == NULL)
diff --git a/usr/src/uts/common/fs/zfs/spa.c b/usr/src/uts/common/fs/zfs/spa.c
index 49937402b9..3e6f4ab2a7 100644
--- a/usr/src/uts/common/fs/zfs/spa.c
+++ b/usr/src/uts/common/fs/zfs/spa.c
@@ -1284,33 +1284,131 @@ spa_check_removed(vdev_t *vd)
}
/*
- * Load the slog device state from the config object since it's possible
- * that the label does not contain the most up-to-date information.
+ * Validate the current config against the MOS config
*/
-void
-spa_load_log_state(spa_t *spa, nvlist_t *nv)
+static boolean_t
+spa_config_valid(spa_t *spa, nvlist_t *config)
{
- vdev_t *ovd, *rvd = spa->spa_root_vdev;
+ vdev_t *mrvd, *rvd = spa->spa_root_vdev;
+ nvlist_t *nv;
+
+ VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nv) == 0);
+
+ spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
+ VERIFY(spa_config_parse(spa, &mrvd, nv, NULL, 0, VDEV_ALLOC_LOAD) == 0);
+
+ ASSERT3U(rvd->vdev_children, ==, mrvd->vdev_children);
/*
- * Load the original root vdev tree from the passed config.
+ * If we're doing a normal import, then build up any additional
+ * diagnostic information about missing devices in this config.
+ * We'll pass this up to the user for further processing.
*/
- spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
- VERIFY(spa_config_parse(spa, &ovd, nv, NULL, 0, VDEV_ALLOC_LOAD) == 0);
+ if (!(spa->spa_import_flags & ZFS_IMPORT_MISSING_LOG)) {
+ nvlist_t **child, *nv;
+ uint64_t idx = 0;
+
+ child = kmem_alloc(rvd->vdev_children * sizeof (nvlist_t **),
+ KM_SLEEP);
+ VERIFY(nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) == 0);
+
+ for (int c = 0; c < rvd->vdev_children; c++) {
+ vdev_t *tvd = rvd->vdev_child[c];
+ vdev_t *mtvd = mrvd->vdev_child[c];
+
+ if (tvd->vdev_ops == &vdev_missing_ops &&
+ mtvd->vdev_ops != &vdev_missing_ops &&
+ mtvd->vdev_islog)
+ child[idx++] = vdev_config_generate(spa, mtvd,
+ B_FALSE, 0);
+ }
+
+ if (idx) {
+ VERIFY(nvlist_add_nvlist_array(nv,
+ ZPOOL_CONFIG_CHILDREN, child, idx) == 0);
+ VERIFY(nvlist_add_nvlist(spa->spa_load_info,
+ ZPOOL_CONFIG_MISSING_DEVICES, nv) == 0);
+
+ for (int i = 0; i < idx; i++)
+ nvlist_free(child[i]);
+ }
+ nvlist_free(nv);
+ kmem_free(child, rvd->vdev_children * sizeof (char **));
+ }
+ /*
+ * Compare the root vdev tree with the information we have
+ * from the MOS config (mrvd). Check each top-level vdev
+ * with the corresponding MOS config top-level (mtvd).
+ */
for (int c = 0; c < rvd->vdev_children; c++) {
- vdev_t *cvd = rvd->vdev_child[c];
- if (cvd->vdev_islog)
- vdev_load_log_state(cvd, ovd->vdev_child[c]);
+ vdev_t *tvd = rvd->vdev_child[c];
+ vdev_t *mtvd = mrvd->vdev_child[c];
+
+ /*
+ * Resolve any "missing" vdevs in the current configuration.
+ * If we find that the MOS config has more accurate information
+ * about the top-level vdev then use that vdev instead.
+ */
+ if (tvd->vdev_ops == &vdev_missing_ops &&
+ mtvd->vdev_ops != &vdev_missing_ops) {
+
+ if (!(spa->spa_import_flags & ZFS_IMPORT_MISSING_LOG))
+ continue;
+
+ /*
+ * Device specific actions.
+ */
+ if (mtvd->vdev_islog) {
+ spa_set_log_state(spa, SPA_LOG_CLEAR);
+ } else {
+ /*
+ * XXX - once we have 'readonly' pool
+ * support we should be able to handle
+ * missing data devices by transitioning
+ * the pool to readonly.
+ */
+ continue;
+ }
+
+ /*
+ * Swap the missing vdev with the data we were
+ * able to obtain from the MOS config.
+ */
+ vdev_remove_child(rvd, tvd);
+ vdev_remove_child(mrvd, mtvd);
+
+ vdev_add_child(rvd, mtvd);
+ vdev_add_child(mrvd, tvd);
+
+ spa_config_exit(spa, SCL_ALL, FTAG);
+ vdev_load(mtvd);
+ spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
+
+ vdev_reopen(rvd);
+ } else if (mtvd->vdev_islog) {
+ /*
+ * Load the slog device's state from the MOS config
+ * since it's possible that the label does not
+ * contain the most up-to-date information.
+ */
+ vdev_load_log_state(tvd, mtvd);
+ vdev_reopen(tvd);
+ }
}
- vdev_free(ovd);
+ vdev_free(mrvd);
spa_config_exit(spa, SCL_ALL, FTAG);
+
+ /*
+ * Ensure we were able to validate the config.
+ */
+ return (rvd->vdev_guid_sum == spa->spa_uberblock.ub_guid_sum);
}
/*
* Check for missing log devices
*/
-int
+static int
spa_check_logs(spa_t *spa)
{
switch (spa->spa_log_state) {
@@ -1474,9 +1572,19 @@ spa_load_verify(spa_t *spa)
if (!error && sle.sle_meta_count <= policy.zrp_maxmeta &&
sle.sle_data_count <= policy.zrp_maxdata) {
+ int64_t loss = 0;
+
verify_ok = B_TRUE;
spa->spa_load_txg = spa->spa_uberblock.ub_txg;
spa->spa_load_txg_ts = spa->spa_uberblock.ub_timestamp;
+
+ loss = spa->spa_last_ubsync_txg_ts - spa->spa_load_txg_ts;
+ VERIFY(nvlist_add_uint64(spa->spa_load_info,
+ ZPOOL_CONFIG_LOAD_TIME, spa->spa_load_txg_ts) == 0);
+ VERIFY(nvlist_add_int64(spa->spa_load_info,
+ ZPOOL_CONFIG_REWIND_TIME, loss) == 0);
+ VERIFY(nvlist_add_uint64(spa->spa_load_info,
+ ZPOOL_CONFIG_LOAD_DATA_ERRORS, sle.sle_data_count) == 0);
} else {
spa->spa_load_max_txg = spa->spa_uberblock.ub_txg;
}
@@ -1669,7 +1777,7 @@ spa_load_impl(spa_t *spa, uint64_t pool_guid, nvlist_t *config,
nvlist_t *nvroot = NULL;
vdev_t *rvd;
uberblock_t *ub = &spa->spa_uberblock;
- uint64_t config_cache_txg = spa->spa_config_txg;
+ uint64_t children, config_cache_txg = spa->spa_config_txg;
int orig_mode = spa->spa_mode;
int parse;
uint64_t obj;
@@ -1768,9 +1876,13 @@ spa_load_impl(spa_t *spa, uint64_t pool_guid, nvlist_t *config,
/*
* If the vdev guid sum doesn't match the uberblock, we have an
- * incomplete configuration.
+ * incomplete configuration. We first check to see if the pool
+ * is aware of the complete config (i.e ZPOOL_CONFIG_VDEV_CHILDREN).
+ * If it is, defer the vdev_guid_sum check till later so we
+ * can handle missing vdevs.
*/
- if (mosconfig && type != SPA_IMPORT_ASSEMBLE &&
+ if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_VDEV_CHILDREN,
+ &children) != 0 && mosconfig && type != SPA_IMPORT_ASSEMBLE &&
rvd->vdev_guid_sum != ub->ub_guid_sum)
return (spa_vdev_err(rvd, VDEV_AUX_BAD_GUID_SUM, ENXIO));
@@ -1990,13 +2102,6 @@ spa_load_impl(spa_t *spa, uint64_t pool_guid, nvlist_t *config,
spa_config_exit(spa, SCL_ALL, FTAG);
/*
- * Check the state of the root vdev. If it can't be opened, it
- * indicates one or more toplevel vdevs are faulted.
- */
- if (rvd->vdev_state <= VDEV_STATE_CANT_OPEN)
- return (ENXIO);
-
- /*
* Load the DDTs (dedup tables).
*/
error = ddt_load(spa);
@@ -2005,16 +2110,12 @@ spa_load_impl(spa_t *spa, uint64_t pool_guid, nvlist_t *config,
spa_update_dspace(spa);
- if (state != SPA_LOAD_TRYIMPORT) {
- error = spa_load_verify(spa);
- if (error)
- return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA,
- error));
- }
-
/*
- * Load the intent log state and check log integrity. If we're
- * assembling a pool from a split, the log is not transferred over.
+ * Validate the config, using the MOS config to fill in any
+ * information which might be missing. If we fail to validate
+ * the config then declare the pool unfit for use. If we're
+ * assembling a pool from a split, the log is not transferred
+ * over.
*/
if (type != SPA_IMPORT_ASSEMBLE) {
nvlist_t *nvconfig;
@@ -2022,17 +2123,37 @@ spa_load_impl(spa_t *spa, uint64_t pool_guid, nvlist_t *config,
if (load_nvlist(spa, spa->spa_config_object, &nvconfig) != 0)
return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
- VERIFY(nvlist_lookup_nvlist(nvconfig, ZPOOL_CONFIG_VDEV_TREE,
- &nvroot) == 0);
- spa_load_log_state(spa, nvroot);
+ if (!spa_config_valid(spa, nvconfig)) {
+ nvlist_free(nvconfig);
+ return (spa_vdev_err(rvd, VDEV_AUX_BAD_GUID_SUM,
+ ENXIO));
+ }
nvlist_free(nvconfig);
+ /*
+ * Now that we've validate the config, check the state of the
+ * root vdev. If it can't be opened, it indicates one or
+ * more toplevel vdevs are faulted.
+ */
+ if (rvd->vdev_state <= VDEV_STATE_CANT_OPEN)
+ return (ENXIO);
+
if (spa_check_logs(spa)) {
*ereport = FM_EREPORT_ZFS_LOG_REPLAY;
return (spa_vdev_err(rvd, VDEV_AUX_BAD_LOG, ENXIO));
}
}
+ /*
+ * We've successfully opened the pool, verify that we're ready
+ * to start pushing transactions.
+ */
+ if (state != SPA_LOAD_TRYIMPORT) {
+ if (error = spa_load_verify(spa))
+ return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA,
+ error));
+ }
+
if (spa_writeable(spa) && (state == SPA_LOAD_RECOVER ||
spa->spa_load_max_txg == UINT64_MAX)) {
dmu_tx_t *tx;
@@ -2074,12 +2195,13 @@ spa_load_impl(spa_t *spa, uint64_t pool_guid, nvlist_t *config,
* If the config cache is stale, or we have uninitialized
* metaslabs (see spa_vdev_add()), then update the config.
*
- * If spa_load_verbatim is true, trust the current
+ * If this is a verbatim import, trust the current
* in-core spa_config and update the disk labels.
*/
if (config_cache_txg != spa->spa_config_txg ||
- state == SPA_LOAD_IMPORT || spa->spa_load_verbatim ||
- state == SPA_LOAD_RECOVER)
+ state == SPA_LOAD_IMPORT ||
+ state == SPA_LOAD_RECOVER ||
+ (spa->spa_import_flags & ZFS_IMPORT_VERBATIM))
need_update = B_TRUE;
for (int c = 0; c < rvd->vdev_children; c++)
@@ -2181,9 +2303,6 @@ spa_load_best(spa_t *spa, spa_load_state_t state, int mosconfig,
rewind_error = spa_load_retry(spa, state, mosconfig);
}
- if (config)
- spa_rewind_data_to_nvlist(spa, config);
-
spa->spa_extreme_rewind = B_FALSE;
spa->spa_load_max_txg = UINT64_MAX;
@@ -2210,6 +2329,7 @@ spa_open_common(const char *pool, spa_t **spapp, void *tag, nvlist_t *nvpolicy,
nvlist_t **config)
{
spa_t *spa;
+ spa_load_state_t state = SPA_LOAD_OPEN;
int error;
int locked = B_FALSE;
@@ -2233,7 +2353,6 @@ spa_open_common(const char *pool, spa_t **spapp, void *tag, nvlist_t *nvpolicy,
}
if (spa->spa_state == POOL_STATE_UNINITIALIZED) {
- spa_load_state_t state = SPA_LOAD_OPEN;
zpool_rewind_policy_t policy;
zpool_get_rewind_policy(nvpolicy ? nvpolicy : spa->spa_config,
@@ -2272,9 +2391,13 @@ spa_open_common(const char *pool, spa_t **spapp, void *tag, nvlist_t *nvpolicy,
* information: the state of each vdev after the
* attempted vdev_open(). Return this to the user.
*/
- if (config != NULL && spa->spa_config)
+ if (config != NULL && spa->spa_config) {
VERIFY(nvlist_dup(spa->spa_config, config,
KM_SLEEP) == 0);
+ VERIFY(nvlist_add_nvlist(*config,
+ ZPOOL_CONFIG_LOAD_INFO,
+ spa->spa_load_info) == 0);
+ }
spa_unload(spa);
spa_deactivate(spa);
spa->spa_last_open_failed = error;
@@ -2283,7 +2406,6 @@ spa_open_common(const char *pool, spa_t **spapp, void *tag, nvlist_t *nvpolicy,
*spapp = NULL;
return (error);
}
-
}
spa_open_ref(spa, tag);
@@ -2291,6 +2413,15 @@ spa_open_common(const char *pool, spa_t **spapp, void *tag, nvlist_t *nvpolicy,
if (config != NULL)
*config = spa_config_generate(spa, NULL, -1ULL, B_TRUE);
+ /*
+ * If we've recovered the pool, pass back any information we
+ * gathered while doing the load.
+ */
+ if (state == SPA_LOAD_RECOVER) {
+ VERIFY(nvlist_add_nvlist(*config, ZPOOL_CONFIG_LOAD_INFO,
+ spa->spa_load_info) == 0);
+ }
+
if (locked) {
spa->spa_last_open_failed = 0;
spa->spa_last_ubsync_txg = 0;
@@ -3046,7 +3177,7 @@ spa_import_rootpool(char *devpath, char *devid)
spa = spa_add(pname, config, NULL);
spa->spa_is_root = B_TRUE;
- spa->spa_load_verbatim = B_TRUE;
+ spa->spa_import_flags = ZFS_IMPORT_VERBATIM;
/*
* Build up a vdev tree based on the boot device's label config.
@@ -3115,43 +3246,10 @@ out:
#endif
/*
- * Take a pool and insert it into the namespace as if it had been loaded at
- * boot.
- */
-int
-spa_import_verbatim(const char *pool, nvlist_t *config, nvlist_t *props)
-{
- spa_t *spa;
- char *altroot = NULL;
-
- mutex_enter(&spa_namespace_lock);
- if (spa_lookup(pool) != NULL) {
- mutex_exit(&spa_namespace_lock);
- return (EEXIST);
- }
-
- (void) nvlist_lookup_string(props,
- zpool_prop_to_name(ZPOOL_PROP_ALTROOT), &altroot);
- spa = spa_add(pool, config, altroot);
-
- spa->spa_load_verbatim = B_TRUE;
-
- if (props != NULL)
- spa_configfile_set(spa, props, B_FALSE);
-
- spa_config_sync(spa, B_FALSE, B_TRUE);
-
- mutex_exit(&spa_namespace_lock);
- spa_history_log_version(spa, LOG_POOL_IMPORT);
-
- return (0);
-}
-
-/*
* Import a non-root pool into the system.
*/
int
-spa_import(const char *pool, nvlist_t *config, nvlist_t *props)
+spa_import(const char *pool, nvlist_t *config, nvlist_t *props, uint64_t flags)
{
spa_t *spa;
char *altroot = NULL;
@@ -3171,16 +3269,30 @@ spa_import(const char *pool, nvlist_t *config, nvlist_t *props)
return (EEXIST);
}
- zpool_get_rewind_policy(config, &policy);
- if (policy.zrp_request & ZPOOL_DO_REWIND)
- state = SPA_LOAD_RECOVER;
-
/*
* Create and initialize the spa structure.
*/
(void) nvlist_lookup_string(props,
zpool_prop_to_name(ZPOOL_PROP_ALTROOT), &altroot);
spa = spa_add(pool, config, altroot);
+ spa->spa_import_flags = flags;
+
+ /*
+ * Verbatim import - Take a pool and insert it into the namespace
+ * as if it had been loaded at boot.
+ */
+ if (spa->spa_import_flags & ZFS_IMPORT_VERBATIM) {
+ if (props != NULL)
+ spa_configfile_set(spa, props, B_FALSE);
+
+ spa_config_sync(spa, B_FALSE, B_TRUE);
+
+ mutex_exit(&spa_namespace_lock);
+ spa_history_log_version(spa, LOG_POOL_IMPORT);
+
+ return (0);
+ }
+
spa_activate(spa, spa_mode_global);
/*
@@ -3188,6 +3300,10 @@ spa_import(const char *pool, nvlist_t *config, nvlist_t *props)
*/
spa_async_suspend(spa);
+ zpool_get_rewind_policy(config, &policy);
+ if (policy.zrp_request & ZPOOL_DO_REWIND)
+ state = SPA_LOAD_RECOVER;
+
/*
* Pass off the heavy lifting to spa_load(). Pass TRUE for mosconfig
* because the user-supplied config is actually the one to trust when
@@ -3195,14 +3311,16 @@ spa_import(const char *pool, nvlist_t *config, nvlist_t *props)
*/
if (state != SPA_LOAD_RECOVER)
spa->spa_last_ubsync_txg = spa->spa_load_txg = 0;
+
error = spa_load_best(spa, state, B_TRUE, policy.zrp_txg,
policy.zrp_request);
/*
- * Propagate anything learned about failing or best txgs
- * back to caller
+ * Propagate anything learned while loading the pool and pass it
+ * back to caller (i.e. rewind info, missing devices, etc).
*/
- spa_rewind_data_to_nvlist(spa, config);
+ VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_LOAD_INFO,
+ spa->spa_load_info) == 0);
spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
/*
diff --git a/usr/src/uts/common/fs/zfs/spa_config.c b/usr/src/uts/common/fs/zfs/spa_config.c
index cdeda3f93c..69d57f66db 100644
--- a/usr/src/uts/common/fs/zfs/spa_config.c
+++ b/usr/src/uts/common/fs/zfs/spa_config.c
@@ -304,24 +304,6 @@ spa_config_set(spa_t *spa, nvlist_t *config)
mutex_exit(&spa->spa_props_lock);
}
-/* Add discovered rewind info, if any to the provided nvlist */
-void
-spa_rewind_data_to_nvlist(spa_t *spa, nvlist_t *tonvl)
-{
- int64_t loss = 0;
-
- if (tonvl == NULL || spa->spa_load_txg == 0)
- return;
-
- VERIFY(nvlist_add_uint64(tonvl, ZPOOL_CONFIG_LOAD_TIME,
- spa->spa_load_txg_ts) == 0);
- if (spa->spa_last_ubsync_txg)
- loss = spa->spa_last_ubsync_txg_ts - spa->spa_load_txg_ts;
- VERIFY(nvlist_add_int64(tonvl, ZPOOL_CONFIG_REWIND_TIME, loss) == 0);
- VERIFY(nvlist_add_uint64(tonvl, ZPOOL_CONFIG_LOAD_DATA_ERRORS,
- spa->spa_load_data_errors) == 0);
-}
-
/*
* Generate the pool's configuration based on the current in-core state.
* We infer whether to generate a complete config or just one top-level config
@@ -403,8 +385,7 @@ spa_config_generate(spa_t *spa, vdev_t *vd, uint64_t txg, int getstats)
/*
* Add the top-level config. We even add this on pools which
- * don't support holes in the namespace as older pools will
- * just ignore it.
+ * don't support holes in the namespace.
*/
vdev_top_config_generate(spa, config);
@@ -449,8 +430,6 @@ spa_config_generate(spa_t *spa, vdev_t *vd, uint64_t txg, int getstats)
kmem_free(dds, sizeof (ddt_stat_t));
}
- spa_rewind_data_to_nvlist(spa, config);
-
if (locked)
spa_config_exit(spa, SCL_CONFIG | SCL_STATE, FTAG);
diff --git a/usr/src/uts/common/fs/zfs/spa_misc.c b/usr/src/uts/common/fs/zfs/spa_misc.c
index 52af7fcb71..f2bd1bff3e 100644
--- a/usr/src/uts/common/fs/zfs/spa_misc.c
+++ b/usr/src/uts/common/fs/zfs/spa_misc.c
@@ -478,6 +478,9 @@ spa_add(const char *name, nvlist_t *config, const char *altroot)
dp->scd_path = altroot ? NULL : spa_strdup(spa_config_path);
list_insert_head(&spa->spa_config_list, dp);
+ VERIFY(nvlist_alloc(&spa->spa_load_info, NV_UNIQUE_NAME,
+ KM_SLEEP) == 0);
+
if (config != NULL)
VERIFY(nvlist_dup(config, &spa->spa_config, 0) == 0);
@@ -516,6 +519,7 @@ spa_remove(spa_t *spa)
list_destroy(&spa->spa_config_list);
+ nvlist_free(spa->spa_load_info);
spa_config_set(spa, NULL);
refcount_destroy(&spa->spa_refcount);
@@ -886,10 +890,6 @@ spa_vdev_config_exit(spa_t *spa, vdev_t *vd, uint64_t txg, int error, char *tag)
*/
vdev_dtl_reassess(spa->spa_root_vdev, 0, 0, B_FALSE);
- /*
- * If the config changed, notify the scrub that it must restart.
- * This will initiate a resilver if needed.
- */
if (error == 0 && !list_is_empty(&spa->spa_config_dirty_list)) {
config_changed = B_TRUE;
spa->spa_config_generation++;
diff --git a/usr/src/uts/common/fs/zfs/sys/spa.h b/usr/src/uts/common/fs/zfs/sys/spa.h
index 41a40300eb..e228455abb 100644
--- a/usr/src/uts/common/fs/zfs/sys/spa.h
+++ b/usr/src/uts/common/fs/zfs/sys/spa.h
@@ -418,8 +418,8 @@ extern int spa_get_stats(const char *pool, nvlist_t **config,
extern int spa_create(const char *pool, nvlist_t *config, nvlist_t *props,
const char *history_str, nvlist_t *zplprops);
extern int spa_import_rootpool(char *devpath, char *devid);
-extern int spa_import(const char *pool, nvlist_t *config, nvlist_t *props);
-extern int spa_import_verbatim(const char *, nvlist_t *, nvlist_t *);
+extern int spa_import(const char *pool, nvlist_t *config, nvlist_t *props,
+ uint64_t flags);
extern nvlist_t *spa_tryimport(nvlist_t *tryconfig);
extern int spa_destroy(char *pool);
extern int spa_export(char *pool, nvlist_t **oldconfig, boolean_t force,
@@ -620,7 +620,6 @@ extern uint64_t bp_get_dsize(spa_t *spa, const blkptr_t *bp);
extern boolean_t spa_has_slogs(spa_t *spa);
extern boolean_t spa_is_root(spa_t *spa);
extern boolean_t spa_writeable(spa_t *spa);
-extern void spa_rewind_data_to_nvlist(spa_t *spa, nvlist_t *to);
extern int spa_mode(spa_t *spa);
extern uint64_t strtonum(const char *str, char **nptr);
diff --git a/usr/src/uts/common/fs/zfs/sys/spa_impl.h b/usr/src/uts/common/fs/zfs/sys/spa_impl.h
index ec0a7e56f0..c965ffbbef 100644
--- a/usr/src/uts/common/fs/zfs/sys/spa_impl.h
+++ b/usr/src/uts/common/fs/zfs/sys/spa_impl.h
@@ -114,13 +114,14 @@ struct spa {
nvlist_t *spa_config; /* last synced config */
nvlist_t *spa_config_syncing; /* currently syncing config */
nvlist_t *spa_config_splitting; /* config for splitting */
+ nvlist_t *spa_load_info; /* info and errors from load */
uint64_t spa_config_txg; /* txg of last config change */
int spa_sync_pass; /* iterate-to-convergence */
pool_state_t spa_state; /* pool state */
int spa_inject_ref; /* injection references */
uint8_t spa_sync_on; /* sync threads are running */
spa_load_state_t spa_load_state; /* current load operation */
- boolean_t spa_load_verbatim; /* load the given config? */
+ uint64_t spa_import_flags; /* import specific flags */
taskq_t *spa_zio_taskq[ZIO_TYPES][ZIO_TASKQ_TYPES];
dsl_pool_t *spa_dsl_pool;
metaslab_class_t *spa_normal_class; /* normal data class */
diff --git a/usr/src/uts/common/fs/zfs/sys/vdev_impl.h b/usr/src/uts/common/fs/zfs/sys/vdev_impl.h
index 2b886bc588..63d1b3eeda 100644
--- a/usr/src/uts/common/fs/zfs/sys/vdev_impl.h
+++ b/usr/src/uts/common/fs/zfs/sys/vdev_impl.h
@@ -283,6 +283,7 @@ extern void vdev_remove_parent(vdev_t *cvd);
* vdev sync load and sync
*/
extern void vdev_load_log_state(vdev_t *nvd, vdev_t *ovd);
+extern boolean_t vdev_log_state_valid(vdev_t *vd);
extern void vdev_load(vdev_t *vd);
extern void vdev_sync(vdev_t *vd, uint64_t txg);
extern void vdev_sync_done(vdev_t *vd, uint64_t txg);
diff --git a/usr/src/uts/common/fs/zfs/vdev.c b/usr/src/uts/common/fs/zfs/vdev.c
index 5bf6eebcd7..7c6892b530 100644
--- a/usr/src/uts/common/fs/zfs/vdev.c
+++ b/usr/src/uts/common/fs/zfs/vdev.c
@@ -1369,10 +1369,10 @@ vdev_validate(vdev_t *vd)
nvlist_free(label);
/*
- * If spa->spa_load_verbatim is true, no need to check the
+ * If this is a verbatim import, no need to check the
* state of the pool.
*/
- if (!spa->spa_load_verbatim &&
+ if (!(spa->spa_import_flags & ZFS_IMPORT_VERBATIM) &&
spa_load_state(spa) == SPA_LOAD_OPEN &&
state != POOL_STATE_ACTIVE)
return (EBADF);
@@ -2064,7 +2064,7 @@ vdev_psize_to_asize(vdev_t *vd, uint64_t psize)
int
vdev_fault(spa_t *spa, uint64_t guid, vdev_aux_t aux)
{
- vdev_t *vd;
+ vdev_t *vd, *tvd;
spa_vdev_state_enter(spa, SCL_NONE);
@@ -2074,6 +2074,8 @@ vdev_fault(spa_t *spa, uint64_t guid, vdev_aux_t aux)
if (!vd->vdev_ops->vdev_op_leaf)
return (spa_vdev_state_exit(spa, NULL, ENOTSUP));
+ tvd = vd->vdev_top;
+
/*
* We don't directly use the aux state here, but if we do a
* vdev_reopen(), we need this value to be present to remember why we
@@ -2093,7 +2095,7 @@ vdev_fault(spa_t *spa, uint64_t guid, vdev_aux_t aux)
* If this device has the only valid copy of the data, then
* back off and simply mark the vdev as degraded instead.
*/
- if (!vd->vdev_islog && vd->vdev_aux == NULL && vdev_dtl_required(vd)) {
+ if (!tvd->vdev_islog && vd->vdev_aux == NULL && vdev_dtl_required(vd)) {
vd->vdev_degraded = 1ULL;
vd->vdev_faulted = 0ULL;
@@ -2101,7 +2103,7 @@ vdev_fault(spa_t *spa, uint64_t guid, vdev_aux_t aux)
* If we reopen the device and it's not dead, only then do we
* mark it degraded.
*/
- vdev_reopen(vd);
+ vdev_reopen(tvd);
if (vdev_readable(vd))
vdev_set_state(vd, B_FALSE, VDEV_STATE_DEGRADED, aux);
@@ -2343,7 +2345,7 @@ vdev_clear(spa_t *spa, vdev_t *vd)
*/
vd->vdev_forcefault = B_TRUE;
- vd->vdev_faulted = vd->vdev_degraded = 0;
+ vd->vdev_faulted = vd->vdev_degraded = 0ULL;
vd->vdev_cant_read = B_FALSE;
vd->vdev_cant_write = B_FALSE;
@@ -3036,32 +3038,52 @@ vdev_is_bootable(vdev_t *vd)
/*
* Load the state from the original vdev tree (ovd) which
* we've retrieved from the MOS config object. If the original
- * vdev was offline then we transfer that state to the device
- * in the current vdev tree (nvd).
+ * vdev was offline or faulted then we transfer that state to the
+ * device in the current vdev tree (nvd).
*/
void
vdev_load_log_state(vdev_t *nvd, vdev_t *ovd)
{
spa_t *spa = nvd->vdev_spa;
+ ASSERT(nvd->vdev_top->vdev_islog);
ASSERT(spa_config_held(spa, SCL_STATE_ALL, RW_WRITER) == SCL_STATE_ALL);
ASSERT3U(nvd->vdev_guid, ==, ovd->vdev_guid);
for (int c = 0; c < nvd->vdev_children; c++)
vdev_load_log_state(nvd->vdev_child[c], ovd->vdev_child[c]);
- if (nvd->vdev_ops->vdev_op_leaf && ovd->vdev_offline) {
+ if (nvd->vdev_ops->vdev_op_leaf) {
/*
- * It would be nice to call vdev_offline()
- * directly but the pool isn't fully loaded and
- * the txg threads have not been started yet.
+ * Restore the persistent vdev state
*/
nvd->vdev_offline = ovd->vdev_offline;
- vdev_reopen(nvd->vdev_top);
+ nvd->vdev_faulted = ovd->vdev_faulted;
+ nvd->vdev_degraded = ovd->vdev_degraded;
+ nvd->vdev_removed = ovd->vdev_removed;
}
}
/*
+ * Determine if a log device has valid content. If the vdev was
+ * removed or faulted in the MOS config then we know that
+ * the content on the log device has already been written to the pool.
+ */
+boolean_t
+vdev_log_state_valid(vdev_t *vd)
+{
+ if (vd->vdev_ops->vdev_op_leaf && !vd->vdev_faulted &&
+ !vd->vdev_removed)
+ return (B_TRUE);
+
+ for (int c = 0; c < vd->vdev_children; c++)
+ if (vdev_log_state_valid(vd->vdev_child[c]))
+ return (B_TRUE);
+
+ return (B_FALSE);
+}
+
+/*
* Expand a vdev if possible.
*/
void
diff --git a/usr/src/uts/common/fs/zfs/zfs_ioctl.c b/usr/src/uts/common/fs/zfs/zfs_ioctl.c
index 60605358a8..85f0a5e5cb 100644
--- a/usr/src/uts/common/fs/zfs/zfs_ioctl.c
+++ b/usr/src/uts/common/fs/zfs/zfs_ioctl.c
@@ -1206,13 +1206,15 @@ zfs_ioc_pool_import(zfs_cmd_t *zc)
if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &guid) != 0 ||
guid != zc->zc_guid)
error = EINVAL;
- else if (zc->zc_cookie)
- error = spa_import_verbatim(zc->zc_name, config, props);
else
- error = spa_import(zc->zc_name, config, props);
+ error = spa_import(zc->zc_name, config, props, zc->zc_cookie);
- if (zc->zc_nvlist_dst != 0)
- (void) put_nvlist(zc, config);
+ if (zc->zc_nvlist_dst != 0) {
+ int err;
+
+ if ((err = put_nvlist(zc, config)) != 0)
+ error = err;
+ }
nvlist_free(config);
@@ -3847,7 +3849,10 @@ zfs_ioc_clear(zfs_cmd_t *zc)
error = spa_open_rewind(zc->zc_name, &spa, FTAG,
policy, &config);
if (config != NULL) {
- (void) put_nvlist(zc, config);
+ int err;
+
+ if ((err = put_nvlist(zc, config)) != 0)
+ error = err;
nvlist_free(config);
}
nvlist_free(policy);
diff --git a/usr/src/uts/common/fs/zfs/zil.c b/usr/src/uts/common/fs/zfs/zil.c
index 358bc3857a..c66313ff6f 100644
--- a/usr/src/uts/common/fs/zfs/zil.c
+++ b/usr/src/uts/common/fs/zfs/zil.c
@@ -34,7 +34,7 @@
#include <sys/zil.h>
#include <sys/zil_impl.h>
#include <sys/dsl_dataset.h>
-#include <sys/vdev.h>
+#include <sys/vdev_impl.h>
#include <sys/dmu_tx.h>
#include <sys/dsl_pool.h>
@@ -640,6 +640,7 @@ zil_check_log_chain(const char *osname, void *tx)
{
zilog_t *zilog;
objset_t *os;
+ blkptr_t *bp;
int error;
ASSERT(tx == NULL);
@@ -651,6 +652,29 @@ zil_check_log_chain(const char *osname, void *tx)
}
zilog = dmu_objset_zil(os);
+ bp = (blkptr_t *)&zilog->zl_header->zh_log;
+
+ /*
+ * Check the first block and determine if it's on a log device
+ * which may have been removed or faulted prior to loading this
+ * pool. If so, there's no point in checking the rest of the log
+ * as its content should have already been synced to the pool.
+ */
+ if (!BP_IS_HOLE(bp)) {
+ vdev_t *vd;
+ boolean_t valid = B_TRUE;
+
+ spa_config_enter(os->os_spa, SCL_STATE, FTAG, RW_READER);
+ vd = vdev_lookup_top(os->os_spa, DVA_GET_VDEV(&bp->blk_dva[0]));
+ if (vd->vdev_islog && vdev_is_dead(vd))
+ valid = vdev_log_state_valid(vd);
+ spa_config_exit(os->os_spa, SCL_STATE, FTAG);
+
+ if (!valid) {
+ dmu_objset_rele(os, FTAG);
+ return (0);
+ }
+ }
/*
* Because tx == NULL, zil_claim_log_block() will not actually claim
diff --git a/usr/src/uts/common/sys/fs/zfs.h b/usr/src/uts/common/sys/fs/zfs.h
index 01aacb3a07..8b03fb0f9d 100644
--- a/usr/src/uts/common/sys/fs/zfs.h
+++ b/usr/src/uts/common/sys/fs/zfs.h
@@ -486,6 +486,8 @@ typedef struct zpool_rewind_policy {
#define ZPOOL_CONFIG_SUSPENDED "suspended" /* not stored on disk */
#define ZPOOL_CONFIG_TIMESTAMP "timestamp" /* not stored on disk */
#define ZPOOL_CONFIG_BOOTFS "bootfs" /* not stored on disk */
+#define ZPOOL_CONFIG_MISSING_DEVICES "missing_vdevs" /* not stored on disk */
+#define ZPOOL_CONFIG_LOAD_INFO "load_info" /* not stored on disk */
/*
* The persistent vdev state is stored as separate values rather than a single
* 'vdev_state' entry. This is because a device can be in multiple states, such
@@ -811,6 +813,14 @@ typedef enum {
#define ZFS_OFFLINE_TEMPORARY 0x1
/*
+ * Flags for ZFS_IOC_POOL_IMPORT
+ */
+#define ZFS_IMPORT_NORMAL 0x0
+#define ZFS_IMPORT_VERBATIM 0x1
+#define ZFS_IMPORT_ANY_HOST 0x2
+#define ZFS_IMPORT_MISSING_LOG 0x4
+
+/*
* Sysevent payload members. ZFS will generate the following sysevents with the
* given payloads:
*