diff options
author | George Wilson <George.Wilson@Sun.COM> | 2009-09-21 10:38:24 -0700 |
---|---|---|
committer | George Wilson <George.Wilson@Sun.COM> | 2009-09-21 10:38:24 -0700 |
commit | 88ecc943b4eb72f7c4fbbd8435997b85ef171fc3 (patch) | |
tree | ebceb7c59c849c35d63917995146dc8ad430fa31 /usr/src | |
parent | 53520bfd0d8e6401efee237b91e682ab66f77eef (diff) | |
download | illumos-joyent-88ecc943b4eb72f7c4fbbd8435997b85ef171fc3.tar.gz |
6574286 removing a slog doesn't work
6856566 zpool import -F can cause panic
6863456 system panic by load_nvlist(spa, spa->spa_config_object, &nv) == 0 while running zfs test suite
6882947 dump_nvlist() should live in libnvpair
Diffstat (limited to 'usr/src')
33 files changed, 1047 insertions, 381 deletions
diff --git a/usr/src/cmd/fm/modules/common/zfs-retire/zfs_retire.c b/usr/src/cmd/fm/modules/common/zfs-retire/zfs_retire.c index 3f0a6eee43..bc35ad9cfb 100644 --- a/usr/src/cmd/fm/modules/common/zfs-retire/zfs_retire.c +++ b/usr/src/cmd/fm/modules/common/zfs-retire/zfs_retire.c @@ -166,7 +166,7 @@ replace_with_spare(zpool_handle_t *zhp, nvlist_t *vdev) return; } - dev_name = zpool_vdev_name(NULL, zhp, vdev); + dev_name = zpool_vdev_name(NULL, zhp, vdev, B_FALSE); /* * Try to replace each spare, ending when we successfully diff --git a/usr/src/cmd/fstyp/fstyp.c b/usr/src/cmd/fstyp/fstyp.c index fb81b0edbb..464a3114a4 100644 --- a/usr/src/cmd/fstyp/fstyp.c +++ b/usr/src/cmd/fstyp/fstyp.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -50,7 +50,6 @@ static const char *getmodfsname(); static char *getexecpathname(); -static void dump_nvlist(nvlist_t *list, int indent); static boolean_t dos_to_dev(char *path, char **devpath, int *num); static boolean_t find_dos_drive(int fd, int num, off_t *offset); static void run_legacy_cmds(int fd, char *device, int vflag); @@ -177,151 +176,6 @@ out: } -#define NVP(elem, type, vtype, ptype, format) { \ - vtype value; \ -\ - (void) nvpair_value_##type(elem, &value); \ - (void) printf("%*s%s: " format "\n", indent, "", \ - nvpair_name(elem), (ptype)value); \ -} - -#define NVPA(elem, type, vtype, ptype, format) { \ - uint_t i, count; \ - vtype *value; \ -\ - (void) nvpair_value_##type(elem, &value, &count); \ - for (i = 0; i < count; i++) { \ - (void) printf("%*s%s[%d]: " format "\n", indent, "", \ - nvpair_name(elem), i, (ptype)value[i]); \ - } \ -} - -static void -dump_nvlist(nvlist_t *list, int indent) -{ - nvpair_t *elem = NULL; - boolean_t bool_value; - nvlist_t *nvlist_value; - nvlist_t **nvlist_array_value; - uint_t i, count; - - if (list == NULL) { - return; - } - - while ((elem = nvlist_next_nvpair(list, elem)) != NULL) { - switch (nvpair_type(elem)) { - case DATA_TYPE_BOOLEAN_VALUE: - (void) nvpair_value_boolean_value(elem, &bool_value); - (void) printf("%*s%s: %s\n", indent, "", - nvpair_name(elem), bool_value ? "true" : "false"); - break; - - case DATA_TYPE_BYTE: - NVP(elem, byte, uchar_t, int, "%u"); - break; - - case DATA_TYPE_INT8: - NVP(elem, int8, int8_t, int, "%d"); - break; - - case DATA_TYPE_UINT8: - NVP(elem, uint8, uint8_t, int, "%u"); - break; - - case DATA_TYPE_INT16: - NVP(elem, int16, int16_t, int, "%d"); - break; - - case DATA_TYPE_UINT16: - NVP(elem, uint16, uint16_t, int, "%u"); - break; - - case DATA_TYPE_INT32: - NVP(elem, int32, int32_t, long, "%ld"); - break; - - case DATA_TYPE_UINT32: - NVP(elem, uint32, uint32_t, ulong_t, "%lu"); - break; - - case DATA_TYPE_INT64: - NVP(elem, int64, int64_t, longlong_t, "%lld"); - break; - - case DATA_TYPE_UINT64: - NVP(elem, uint64, uint64_t, u_longlong_t, "%llu"); - break; - - case DATA_TYPE_STRING: - NVP(elem, string, char *, char *, "'%s'"); - break; - - case DATA_TYPE_BYTE_ARRAY: - NVPA(elem, byte_array, uchar_t, int, "%u"); - break; - - case DATA_TYPE_INT8_ARRAY: - NVPA(elem, int8_array, int8_t, int, "%d"); - break; - - case DATA_TYPE_UINT8_ARRAY: - NVPA(elem, uint8_array, uint8_t, int, "%u"); - break; - - case DATA_TYPE_INT16_ARRAY: - NVPA(elem, int16_array, int16_t, int, "%d"); - break; - - case DATA_TYPE_UINT16_ARRAY: - NVPA(elem, uint16_array, uint16_t, int, "%u"); - break; - - case DATA_TYPE_INT32_ARRAY: - NVPA(elem, int32_array, int32_t, long, "%ld"); - break; - - case DATA_TYPE_UINT32_ARRAY: - NVPA(elem, uint32_array, uint32_t, ulong_t, "%lu"); - break; - - case DATA_TYPE_INT64_ARRAY: - NVPA(elem, int64_array, int64_t, longlong_t, "%lld"); - break; - - case DATA_TYPE_UINT64_ARRAY: - NVPA(elem, uint64_array, uint64_t, u_longlong_t, - "%llu"); - break; - - case DATA_TYPE_STRING_ARRAY: - NVPA(elem, string_array, char *, char *, "'%s'"); - break; - - case DATA_TYPE_NVLIST: - (void) nvpair_value_nvlist(elem, &nvlist_value); - (void) printf("%*s%s:\n", indent, "", - nvpair_name(elem)); - dump_nvlist(nvlist_value, indent + 4); - break; - - case DATA_TYPE_NVLIST_ARRAY: - (void) nvpair_value_nvlist_array(elem, - &nvlist_array_value, &count); - for (i = 0; i < count; i++) { - (void) printf("%*s%s[%u]:\n", indent, "", - nvpair_name(elem), i); - dump_nvlist(nvlist_array_value[i], indent + 4); - } - break; - - default: - (void) printf(gettext("bad config type %d for %s\n"), - nvpair_type(elem), nvpair_name(elem)); - } - } -} - /* * If the executable is a fs-specific hardlink, /usr/lib/fs/<fsname>/fstyp, * return that fsname; otherwise return NULL. diff --git a/usr/src/cmd/power/handlers.c b/usr/src/cmd/power/handlers.c index 5d2d51851c..ba66f288ae 100644 --- a/usr/src/cmd/power/handlers.c +++ b/usr/src/cmd/power/handlers.c @@ -1043,7 +1043,7 @@ ztop(char *arg, char *diskname) libzfs_fini(lzfs); return (-1); } - vname = zpool_vdev_name(lzfs, zpool_handle, child[0]); + vname = zpool_vdev_name(lzfs, zpool_handle, child[0], B_FALSE); if (vname == NULL) { mesg(MERR, "couldn't determine vdev name\n"); zpool_close(zpool_handle); diff --git a/usr/src/cmd/zdb/zdb.c b/usr/src/cmd/zdb/zdb.c index 068dd228a8..f0d3fa77a4 100644 --- a/usr/src/cmd/zdb/zdb.c +++ b/usr/src/cmd/zdb/zdb.c @@ -146,68 +146,6 @@ fatal(const char *fmt, ...) exit(1); } -static void -dump_nvlist(nvlist_t *list, int indent) -{ - nvpair_t *elem = NULL; - - while ((elem = nvlist_next_nvpair(list, elem)) != NULL) { - switch (nvpair_type(elem)) { - case DATA_TYPE_STRING: - { - char *value; - - VERIFY(nvpair_value_string(elem, &value) == 0); - (void) printf("%*s%s='%s'\n", indent, "", - nvpair_name(elem), value); - } - break; - - case DATA_TYPE_UINT64: - { - uint64_t value; - - VERIFY(nvpair_value_uint64(elem, &value) == 0); - (void) printf("%*s%s=%llu\n", indent, "", - nvpair_name(elem), (u_longlong_t)value); - } - break; - - case DATA_TYPE_NVLIST: - { - nvlist_t *value; - - VERIFY(nvpair_value_nvlist(elem, &value) == 0); - (void) printf("%*s%s\n", indent, "", - nvpair_name(elem)); - dump_nvlist(value, indent + 4); - } - break; - - case DATA_TYPE_NVLIST_ARRAY: - { - nvlist_t **value; - uint_t c, count; - - VERIFY(nvpair_value_nvlist_array(elem, &value, - &count) == 0); - - for (c = 0; c < count; c++) { - (void) printf("%*s%s[%u]\n", indent, "", - nvpair_name(elem), c); - dump_nvlist(value[c], indent + 8); - } - } - break; - - default: - - (void) printf("bad config type %d for %s\n", - nvpair_type(elem), nvpair_name(elem)); - } - } -} - /* ARGSUSED */ static void dump_packed_nvlist(objset_t *os, uint64_t object, void *data, size_t size) diff --git a/usr/src/cmd/zinject/zinject.c b/usr/src/cmd/zinject/zinject.c index 09c377ef8d..5528ac330a 100644 --- a/usr/src/cmd/zinject/zinject.c +++ b/usr/src/cmd/zinject/zinject.c @@ -222,6 +222,11 @@ usage(void) "\t\tClear the particular record (if given a numeric ID), or\n" "\t\tall records if 'all' is specificed.\n" "\n" + "\tzinject -p <function name> pool\n" + "\t\tInject a panic fault at the specified function. Only \n" + "\t\tfunctions which call spa_vdev_config_exit(), or \n" + "\t\tspa_vdev_exit() will trigger a panic.\n" + "\n" "\tzinject -d device [-e errno] [-L <nvlist|uber>] [-F] pool\n" "\t\tInject a fault into a particular device or the device's\n" "\t\tlabel. Label injection can either be 'nvlist' or 'uber'.\n" @@ -295,7 +300,7 @@ print_data_handler(int id, const char *pool, zinject_record_t *record, { int *count = data; - if (record->zi_guid != 0) + if (record->zi_guid != 0 || record->zi_func[0] != '\0') return (0); if (*count == 0) { @@ -327,7 +332,7 @@ print_device_handler(int id, const char *pool, zinject_record_t *record, { int *count = data; - if (record->zi_guid == 0) + if (record->zi_guid == 0 || record->zi_func[0] != '\0') return (0); if (*count == 0) { @@ -343,6 +348,27 @@ print_device_handler(int id, const char *pool, zinject_record_t *record, return (0); } +static int +print_panic_handler(int id, const char *pool, zinject_record_t *record, + void *data) +{ + int *count = data; + + if (record->zi_func[0] == '\0') + return (0); + + if (*count == 0) { + (void) printf("%3s %-15s %s\n", "ID", "POOL", "FUNCTION"); + (void) printf("--- --------------- ----------------\n"); + } + + *count += 1; + + (void) printf("%3d %-15s %s\n", id, pool, record->zi_func); + + return (0); +} + /* * Print all registered error handlers. Returns the number of handlers * registered. @@ -356,6 +382,9 @@ print_all_handlers(void) (void) printf("\n"); count = 0; (void) iter_handlers(print_data_handler, &count); + (void) printf("\n"); + count = 0; + (void) iter_handlers(print_panic_handler, &count); return (count); } @@ -443,6 +472,9 @@ register_handler(const char *pool, int flags, zinject_record_t *record, if (record->zi_guid) { (void) printf(" vdev: %llx\n", (u_longlong_t)record->zi_guid); + } else if (record->zi_func[0] != '\0') { + (void) printf(" panic function: %s\n", + record->zi_func); } else { (void) printf("objset: %llu\n", (u_longlong_t)record->zi_objset); @@ -514,7 +546,7 @@ main(int argc, char **argv) return (0); } - while ((c = getopt(argc, argv, ":ab:d:f:Fqhc:t:l:mr:e:uL:")) != -1) { + while ((c = getopt(argc, argv, ":ab:d:f:Fqhc:t:l:mr:e:uL:p:")) != -1) { switch (c) { case 'a': flags |= ZINJECT_FLUSH_ARC; @@ -569,6 +601,10 @@ main(int argc, char **argv) case 'm': domount = 1; break; + case 'p': + (void) strlcpy(record.zi_func, optarg, + sizeof (record.zi_func)); + break; case 'q': quiet = 1; break; @@ -617,7 +653,7 @@ main(int argc, char **argv) * '-c' is invalid with any other options. */ if (raw != NULL || range != NULL || type != TYPE_INVAL || - level != 0) { + level != 0 || record.zi_func[0] != '\0') { (void) fprintf(stderr, "cancel (-c) incompatible with " "any other options\n"); usage(); @@ -649,7 +685,7 @@ main(int argc, char **argv) * for doing injection, so handle it separately here. */ if (raw != NULL || range != NULL || type != TYPE_INVAL || - level != 0) { + level != 0 || record.zi_func[0] != '\0') { (void) fprintf(stderr, "device (-d) incompatible with " "data error injection\n"); usage(); @@ -677,7 +713,8 @@ main(int argc, char **argv) if (!error) error = ENXIO; } else if (raw != NULL) { - if (range != NULL || type != TYPE_INVAL || level != 0) { + if (range != NULL || type != TYPE_INVAL || level != 0 || + record.zi_func[0] != '\0') { (void) fprintf(stderr, "raw (-b) format with " "any other options\n"); usage(); @@ -704,10 +741,28 @@ main(int argc, char **argv) return (1); if (!error) error = EIO; + } else if (record.zi_func[0] != '\0') { + if (raw != NULL || range != NULL || type != TYPE_INVAL || + level != 0 || device != NULL) { + (void) fprintf(stderr, "panic (-p) incompatible with " + "other options\n"); + usage(); + return (2); + } + + if (argc != 1) { + (void) fprintf(stderr, "panic (-p) injection requires " + "a single pool name\n"); + usage(); + return (2); + } + + (void) strcpy(pool, argv[0]); + dataset[0] = '\0'; } else if (type == TYPE_INVAL) { if (flags == 0) { (void) fprintf(stderr, "at least one of '-b', '-d', " - "'-t', '-a', or '-u' must be specified\n"); + "'-t', '-a', '-p', or '-u' must be specified\n"); usage(); return (2); } diff --git a/usr/src/cmd/zpool/zpool_main.c b/usr/src/cmd/zpool/zpool_main.c index c78dc6f646..41bd4794c7 100644 --- a/usr/src/cmd/zpool/zpool_main.c +++ b/usr/src/cmd/zpool/zpool_main.c @@ -343,7 +343,7 @@ print_vdev_tree(zpool_handle_t *zhp, const char *name, nvlist_t *nv, int indent, if ((is_log && !print_logs) || (!is_log && print_logs)) continue; - vname = zpool_vdev_name(g_zfs, zhp, child[c]); + vname = zpool_vdev_name(g_zfs, zhp, child[c], B_FALSE); print_vdev_tree(zhp, vname, child[c], indent + 2, B_FALSE); free(vname); @@ -944,7 +944,7 @@ zpool_do_export(int argc, char **argv) static int max_width(zpool_handle_t *zhp, nvlist_t *nv, int depth, int max) { - char *name = zpool_vdev_name(g_zfs, zhp, nv); + char *name = zpool_vdev_name(g_zfs, zhp, nv, B_TRUE); nvlist_t **child; uint_t c, children; int ret; @@ -1144,14 +1144,16 @@ print_status_config(zpool_handle_t *zhp, const char *name, nvlist_t *nv, (void) printf("\n"); for (c = 0; c < children; c++) { - uint64_t is_log = B_FALSE; + uint64_t islog = B_FALSE, ishole = B_FALSE; - /* Don't print logs here */ + /* Don't print logs or holes here */ (void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_LOG, - &is_log); - if (is_log) + &islog); + (void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_HOLE, + &ishole); + if (islog || ishole) continue; - vname = zpool_vdev_name(g_zfs, zhp, child[c]); + vname = zpool_vdev_name(g_zfs, zhp, child[c], B_TRUE); print_status_config(zhp, vname, child[c], namewidth, depth + 2, isspare); free(vname); @@ -1172,7 +1174,8 @@ print_import_config(const char *name, nvlist_t *nv, int namewidth, int depth) char *type, *vname; verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) == 0); - if (strcmp(type, VDEV_TYPE_MISSING) == 0) + if (strcmp(type, VDEV_TYPE_MISSING) == 0 || + strcmp(type, VDEV_TYPE_HOLE) == 0) return; verify(nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_STATS, @@ -1224,7 +1227,7 @@ print_import_config(const char *name, nvlist_t *nv, int namewidth, int depth) if (is_log) continue; - vname = zpool_vdev_name(g_zfs, NULL, child[c]); + vname = zpool_vdev_name(g_zfs, NULL, child[c], B_TRUE); print_import_config(vname, child[c], namewidth, depth + 2); free(vname); } @@ -1233,7 +1236,7 @@ print_import_config(const char *name, nvlist_t *nv, int namewidth, int depth) &child, &children) == 0) { (void) printf(gettext("\tcache\n")); for (c = 0; c < children; c++) { - vname = zpool_vdev_name(g_zfs, NULL, child[c]); + vname = zpool_vdev_name(g_zfs, NULL, child[c], B_FALSE); (void) printf("\t %s\n", vname); free(vname); } @@ -1243,7 +1246,7 @@ print_import_config(const char *name, nvlist_t *nv, int namewidth, int depth) &child, &children) == 0) { (void) printf(gettext("\tspares\n")); for (c = 0; c < children; c++) { - vname = zpool_vdev_name(g_zfs, NULL, child[c]); + vname = zpool_vdev_name(g_zfs, NULL, child[c], B_FALSE); (void) printf("\t %s\n", vname); free(vname); } @@ -1278,7 +1281,7 @@ print_logs(zpool_handle_t *zhp, nvlist_t *nv, int namewidth, boolean_t verbose) &is_log); if (!is_log) continue; - name = zpool_vdev_name(g_zfs, zhp, child[c]); + name = zpool_vdev_name(g_zfs, zhp, child[c], B_TRUE); if (verbose) print_status_config(zhp, name, child[c], namewidth, 2, B_FALSE); @@ -1964,7 +1967,7 @@ print_vdev_stats(zpool_handle_t *zhp, const char *name, nvlist_t *oldnv, return; for (c = 0; c < children; c++) { - vname = zpool_vdev_name(g_zfs, zhp, newchild[c]); + vname = zpool_vdev_name(g_zfs, zhp, newchild[c], B_FALSE); print_vdev_stats(zhp, vname, oldnv ? oldchild[c] : NULL, newchild[c], cb, depth + 2); free(vname); @@ -1985,7 +1988,8 @@ print_vdev_stats(zpool_handle_t *zhp, const char *name, nvlist_t *oldnv, (void) printf("%-*s - - - - - " "-\n", cb->cb_namewidth, "cache"); for (c = 0; c < children; c++) { - vname = zpool_vdev_name(g_zfs, zhp, newchild[c]); + vname = zpool_vdev_name(g_zfs, zhp, newchild[c], + B_FALSE); print_vdev_stats(zhp, vname, oldnv ? oldchild[c] : NULL, newchild[c], cb, depth + 2); free(vname); @@ -2996,7 +3000,7 @@ print_spares(zpool_handle_t *zhp, nvlist_t **spares, uint_t nspares, (void) printf(gettext("\tspares\n")); for (i = 0; i < nspares; i++) { - name = zpool_vdev_name(g_zfs, zhp, spares[i]); + name = zpool_vdev_name(g_zfs, zhp, spares[i], B_FALSE); print_status_config(zhp, name, spares[i], namewidth, 2, B_TRUE); free(name); @@ -3016,7 +3020,7 @@ print_l2cache(zpool_handle_t *zhp, nvlist_t **l2cache, uint_t nl2cache, (void) printf(gettext("\tcache\n")); for (i = 0; i < nl2cache; i++) { - name = zpool_vdev_name(g_zfs, zhp, l2cache[i]); + name = zpool_vdev_name(g_zfs, zhp, l2cache[i], B_FALSE); print_status_config(zhp, name, l2cache[i], namewidth, 2, B_FALSE); free(name); @@ -3573,6 +3577,7 @@ zpool_do_upgrade(int argc, char **argv) (void) printf(gettext(" 16 stmf property support\n")); (void) printf(gettext(" 17 Triple-parity RAID-Z\n")); (void) printf(gettext(" 18 snapshot user holds\n")); + (void) printf(gettext(" 19 Log device removal\n")); (void) printf(gettext("For more information on a particular " "version, including supported releases, see:\n\n")); (void) printf("http://www.opensolaris.org/os/community/zfs/" diff --git a/usr/src/cmd/zpool/zpool_util.c b/usr/src/cmd/zpool/zpool_util.c index bc34e41a4c..c7a002efb1 100644 --- a/usr/src/cmd/zpool/zpool_util.c +++ b/usr/src/cmd/zpool/zpool_util.c @@ -49,22 +49,6 @@ safe_malloc(size_t size) } /* - * Same as above, but for strdup() - */ -char * -zpool_safe_strdup(const char *str) -{ - char *ret; - - if ((ret = strdup(str)) == NULL) { - (void) fprintf(stderr, "internal error: out of memory\n"); - exit(1); - } - - return (ret); -} - -/* * Display an out of memory error message and abort the current program. */ void diff --git a/usr/src/cmd/zpool/zpool_util.h b/usr/src/cmd/zpool/zpool_util.h index 2bcefee62b..c86b2e7405 100644 --- a/usr/src/cmd/zpool/zpool_util.h +++ b/usr/src/cmd/zpool/zpool_util.h @@ -37,7 +37,6 @@ extern "C" { * Basic utility functions */ void *safe_malloc(size_t); -char *zpool_safe_strdup(const char *); void zpool_no_memory(void); uint_t num_logs(nvlist_t *nv); diff --git a/usr/src/cmd/ztest/ztest.c b/usr/src/cmd/ztest/ztest.c index 4cf36302c0..81b53a68bc 100644 --- a/usr/src/cmd/ztest/ztest.c +++ b/usr/src/cmd/ztest/ztest.c @@ -92,6 +92,7 @@ #include <sys/vdev_impl.h> #include <sys/vdev_file.h> #include <sys/spa_impl.h> +#include <sys/metaslab_impl.h> #include <sys/dsl_prop.h> #include <sys/dsl_dataset.h> #include <sys/refcount.h> @@ -231,7 +232,7 @@ ztest_info_t ztest_info[] = { typedef struct ztest_shared { mutex_t zs_vdev_lock; rwlock_t zs_name_lock; - uint64_t zs_vdev_primaries; + uint64_t zs_vdev_next_leaf; uint64_t zs_vdev_aux; uint64_t zs_enospc_count; hrtime_t zs_start_time; @@ -558,7 +559,7 @@ make_vdev_file(char *path, char *aux, size_t size, uint64_t ashift) (void) sprintf(path, ztest_aux_template, zopt_dir, zopt_pool, aux, vdev); } else { - vdev = ztest_shared->zs_vdev_primaries++; + vdev = ztest_shared->zs_vdev_next_leaf++; (void) sprintf(path, ztest_dev_template, zopt_dir, zopt_pool, vdev); } @@ -850,6 +851,26 @@ vdev_lookup_by_path(vdev_t *vd, const char *path) } /* + * Find the first available hole which can be used as a top-level. + */ +int +find_vdev_hole(spa_t *spa) +{ + vdev_t *rvd = spa->spa_root_vdev; + int c; + + ASSERT(spa_config_held(spa, SCL_VDEV, RW_READER) == SCL_VDEV); + + for (c = 0; c < rvd->vdev_children; c++) { + vdev_t *cvd = rvd->vdev_child[c]; + + if (cvd->vdev_ishole) + break; + } + return (c); +} + +/* * Verify that vdev_add() works as expected. */ void @@ -857,6 +878,7 @@ ztest_vdev_add_remove(ztest_args_t *za) { spa_t *spa = za->za_spa; uint64_t leaves = MAX(zopt_mirrors, 1) * zopt_raidz; + uint64_t guid; nvlist_t *nvroot; int error; @@ -864,26 +886,52 @@ ztest_vdev_add_remove(ztest_args_t *za) spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER); - ztest_shared->zs_vdev_primaries = - spa->spa_root_vdev->vdev_children * leaves; - - spa_config_exit(spa, SCL_VDEV, FTAG); + ztest_shared->zs_vdev_next_leaf = find_vdev_hole(spa) * leaves; /* - * Make 1/4 of the devices be log devices. + * If we have slogs then remove them 1/4 of the time. */ - nvroot = make_vdev_root(NULL, NULL, zopt_vdev_size, 0, - ztest_random(4) == 0, zopt_raidz, zopt_mirrors, 1); + if (spa_has_slogs(spa) && ztest_random(4) == 0) { + /* + * Grab the guid from the head of the log class rotor. + */ + guid = spa->spa_log_class->mc_rotor->mg_vd->vdev_guid; - error = spa_vdev_add(spa, nvroot); - nvlist_free(nvroot); + spa_config_exit(spa, SCL_VDEV, FTAG); - (void) mutex_unlock(&ztest_shared->zs_vdev_lock); + /* + * We have to grab the zs_name_lock as writer to + * prevent a race between removing a slog (dmu_objset_find) + * and destroying a dataset. Removing the slog will + * grab a reference on the dataset which may cause + * dmu_objset_destroy() to fail with EBUSY thus + * leaving the dataset in an inconsistent state. + */ + (void) rw_wrlock(&ztest_shared->zs_name_lock); + error = spa_vdev_remove(spa, guid, B_FALSE); + (void) rw_unlock(&ztest_shared->zs_name_lock); - if (error == ENOSPC) - ztest_record_enospc("spa_vdev_add"); - else if (error != 0) - fatal(0, "spa_vdev_add() = %d", error); + if (error && error != EEXIST) + fatal(0, "spa_vdev_remove() = %d", error); + } else { + spa_config_exit(spa, SCL_VDEV, FTAG); + + /* + * Make 1/4 of the devices be log devices. + */ + nvroot = make_vdev_root(NULL, NULL, zopt_vdev_size, 0, + ztest_random(4) == 0, zopt_raidz, zopt_mirrors, 1); + + error = spa_vdev_add(spa, nvroot); + nvlist_free(nvroot); + + if (error == ENOSPC) + ztest_record_enospc("spa_vdev_add"); + else if (error != 0) + fatal(0, "spa_vdev_add() = %d", error); + } + + (void) mutex_unlock(&ztest_shared->zs_vdev_lock); } /* @@ -4004,7 +4052,7 @@ ztest_init(char *pool) * Create the storage pool. */ (void) spa_destroy(pool); - ztest_shared->zs_vdev_primaries = 0; + ztest_shared->zs_vdev_next_leaf = 0; nvroot = make_vdev_root(NULL, NULL, zopt_vdev_size, 0, 0, zopt_raidz, zopt_mirrors, 1); error = spa_create(pool, nvroot, NULL, NULL, NULL); diff --git a/usr/src/grub/capability b/usr/src/grub/capability index 5d81d0e2f3..25987fb4f7 100644 --- a/usr/src/grub/capability +++ b/usr/src/grub/capability @@ -40,7 +40,7 @@ # This file and the associated version are Solaris specific and are # not a part of the open source distribution of GRUB. # -VERSION=11 +VERSION=12 dboot xVM zfs diff --git a/usr/src/grub/grub-0.97/stage2/zfs-include/zfs.h b/usr/src/grub/grub-0.97/stage2/zfs-include/zfs.h index 4e4a72c139..612a0c4c2d 100644 --- a/usr/src/grub/grub-0.97/stage2/zfs-include/zfs.h +++ b/usr/src/grub/grub-0.97/stage2/zfs-include/zfs.h @@ -27,7 +27,7 @@ /* * On-disk version number. */ -#define SPA_VERSION 18ULL +#define SPA_VERSION 19ULL /* * The following are configuration names used in the nvlist describing a pool's @@ -61,6 +61,9 @@ #define ZPOOL_CONFIG_NPARITY "nparity" #define ZPOOL_CONFIG_PHYS_PATH "phys_path" #define ZPOOL_CONFIG_L2CACHE "l2cache" +#define ZPOOL_CONFIG_HOLE_ARRAY "hole_array" +#define ZPOOL_CONFIG_VDEV_CHILDREN "vdev_children" +#define ZPOOL_CONFIG_IS_HOLE "is_hole" /* * The persistent vdev state is stored as separate values rather than a single * 'vdev_state' entry. This is because a device can be in multiple states, such @@ -78,6 +81,7 @@ #define VDEV_TYPE_DISK "disk" #define VDEV_TYPE_FILE "file" #define VDEV_TYPE_MISSING "missing" +#define VDEV_TYPE_HOLE "hole" #define VDEV_TYPE_SPARE "spare" #define VDEV_TYPE_L2CACHE "l2cache" diff --git a/usr/src/lib/libnvpair/libnvpair.c b/usr/src/lib/libnvpair/libnvpair.c index 0845cb08cf..57915cd737 100644 --- a/usr/src/lib/libnvpair/libnvpair.c +++ b/usr/src/lib/libnvpair/libnvpair.c @@ -19,14 +19,13 @@ * CDDL HEADER END */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ -#pragma ident "%Z%%M% %I% %E% SMI" - #include <unistd.h> #include <strings.h> +#include <libintl.h> #include <sys/types.h> #include <sys/inttypes.h> #include "libnvpair.h" @@ -272,6 +271,156 @@ nvlist_print(FILE *fp, nvlist_t *nvl) nvlist_print_with_indent(fp, nvl, 0); } + +#define NVP(elem, type, vtype, ptype, format) { \ + vtype value; \ +\ + (void) nvpair_value_##type(elem, &value); \ + (void) printf("%*s%s: " format "\n", indent, "", \ + nvpair_name(elem), (ptype)value); \ +} + +#define NVPA(elem, type, vtype, ptype, format) { \ + uint_t i, count; \ + vtype *value; \ +\ + (void) nvpair_value_##type(elem, &value, &count); \ + for (i = 0; i < count; i++) { \ + (void) printf("%*s%s[%d]: " format "\n", indent, "", \ + nvpair_name(elem), i, (ptype)value[i]); \ + } \ +} + +/* + * Similar to nvlist_print() but handles arrays slightly differently. + */ +void +dump_nvlist(nvlist_t *list, int indent) +{ + nvpair_t *elem = NULL; + boolean_t bool_value; + nvlist_t *nvlist_value; + nvlist_t **nvlist_array_value; + uint_t i, count; + + if (list == NULL) { + return; + } + + while ((elem = nvlist_next_nvpair(list, elem)) != NULL) { + switch (nvpair_type(elem)) { + case DATA_TYPE_BOOLEAN_VALUE: + (void) nvpair_value_boolean_value(elem, &bool_value); + (void) printf("%*s%s: %s\n", indent, "", + nvpair_name(elem), bool_value ? "true" : "false"); + break; + + case DATA_TYPE_BYTE: + NVP(elem, byte, uchar_t, int, "%u"); + break; + + case DATA_TYPE_INT8: + NVP(elem, int8, int8_t, int, "%d"); + break; + + case DATA_TYPE_UINT8: + NVP(elem, uint8, uint8_t, int, "%u"); + break; + + case DATA_TYPE_INT16: + NVP(elem, int16, int16_t, int, "%d"); + break; + + case DATA_TYPE_UINT16: + NVP(elem, uint16, uint16_t, int, "%u"); + break; + + case DATA_TYPE_INT32: + NVP(elem, int32, int32_t, long, "%ld"); + break; + + case DATA_TYPE_UINT32: + NVP(elem, uint32, uint32_t, ulong_t, "%lu"); + break; + + case DATA_TYPE_INT64: + NVP(elem, int64, int64_t, longlong_t, "%lld"); + break; + + case DATA_TYPE_UINT64: + NVP(elem, uint64, uint64_t, u_longlong_t, "%llu"); + break; + + case DATA_TYPE_STRING: + NVP(elem, string, char *, char *, "'%s'"); + break; + + case DATA_TYPE_BYTE_ARRAY: + NVPA(elem, byte_array, uchar_t, int, "%u"); + break; + + case DATA_TYPE_INT8_ARRAY: + NVPA(elem, int8_array, int8_t, int, "%d"); + break; + + case DATA_TYPE_UINT8_ARRAY: + NVPA(elem, uint8_array, uint8_t, int, "%u"); + break; + + case DATA_TYPE_INT16_ARRAY: + NVPA(elem, int16_array, int16_t, int, "%d"); + break; + + case DATA_TYPE_UINT16_ARRAY: + NVPA(elem, uint16_array, uint16_t, int, "%u"); + break; + + case DATA_TYPE_INT32_ARRAY: + NVPA(elem, int32_array, int32_t, long, "%ld"); + break; + + case DATA_TYPE_UINT32_ARRAY: + NVPA(elem, uint32_array, uint32_t, ulong_t, "%lu"); + break; + + case DATA_TYPE_INT64_ARRAY: + NVPA(elem, int64_array, int64_t, longlong_t, "%lld"); + break; + + case DATA_TYPE_UINT64_ARRAY: + NVPA(elem, uint64_array, uint64_t, u_longlong_t, + "%llu"); + break; + + case DATA_TYPE_STRING_ARRAY: + NVPA(elem, string_array, char *, char *, "'%s'"); + break; + + case DATA_TYPE_NVLIST: + (void) nvpair_value_nvlist(elem, &nvlist_value); + (void) printf("%*s%s:\n", indent, "", + nvpair_name(elem)); + dump_nvlist(nvlist_value, indent + 4); + break; + + case DATA_TYPE_NVLIST_ARRAY: + (void) nvpair_value_nvlist_array(elem, + &nvlist_array_value, &count); + for (i = 0; i < count; i++) { + (void) printf("%*s%s[%u]:\n", indent, "", + nvpair_name(elem), i); + dump_nvlist(nvlist_array_value[i], indent + 4); + } + break; + + default: + (void) printf(dgettext(TEXT_DOMAIN, "bad config type " + "%d for %s\n"), nvpair_type(elem), + nvpair_name(elem)); + } + } +} + /* * Determine if string 'value' matches 'nvp' value. The 'value' string is * converted, depending on the type of 'nvp', prior to match. For numeric diff --git a/usr/src/lib/libnvpair/libnvpair.h b/usr/src/lib/libnvpair/libnvpair.h index e655e0d406..15c1c78167 100644 --- a/usr/src/lib/libnvpair/libnvpair.h +++ b/usr/src/lib/libnvpair/libnvpair.h @@ -19,15 +19,13 @@ * CDDL HEADER END */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #ifndef _LIBNVPAIR_H #define _LIBNVPAIR_H -#pragma ident "%Z%%M% %I% %E% SMI" - #include <sys/nvpair.h> #include <stdlib.h> #include <stdio.h> @@ -40,6 +38,7 @@ extern "C" { void nvlist_print(FILE *, nvlist_t *); int nvpair_value_match(nvpair_t *, int, char *, char **); int nvpair_value_match_regex(nvpair_t *, int, char *, regex_t *, char **); +void dump_nvlist(nvlist_t *, int); #ifdef __cplusplus } diff --git a/usr/src/lib/libnvpair/mapfile-vers b/usr/src/lib/libnvpair/mapfile-vers index a6d56b5ca8..52f9fcfaec 100644 --- a/usr/src/lib/libnvpair/mapfile-vers +++ b/usr/src/lib/libnvpair/mapfile-vers @@ -166,6 +166,7 @@ SUNW_1.1 { SUNWprivate_1.1 { global: + dump_nvlist; nvlist_add_hrtime; nvlist_lookup_hrtime; nvlist_print; diff --git a/usr/src/lib/libzfs/common/libzfs.h b/usr/src/lib/libzfs/common/libzfs.h index 81f556d816..546c8e451f 100644 --- a/usr/src/lib/libzfs/common/libzfs.h +++ b/usr/src/lib/libzfs/common/libzfs.h @@ -332,7 +332,8 @@ extern nvlist_t *zpool_find_import_activeok(libzfs_handle_t *, int, char **); */ struct zfs_cmd; -extern char *zpool_vdev_name(libzfs_handle_t *, zpool_handle_t *, nvlist_t *); +extern char *zpool_vdev_name(libzfs_handle_t *, zpool_handle_t *, nvlist_t *, + boolean_t verbose); extern int zpool_upgrade(zpool_handle_t *, uint64_t); extern int zpool_get_history(zpool_handle_t *, nvlist_t **); extern void zpool_set_history_str(const char *subcommand, int argc, diff --git a/usr/src/lib/libzfs/common/libzfs_import.c b/usr/src/lib/libzfs/common/libzfs_import.c index d67776889d..f5793390ea 100644 --- a/usr/src/lib/libzfs/common/libzfs_import.c +++ b/usr/src/lib/libzfs/common/libzfs_import.c @@ -19,12 +19,10 @@ * CDDL HEADER END */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ -#pragma ident "%Z%%M% %I% %E% SMI" - /* * Pool import support functions. * @@ -388,8 +386,6 @@ refresh_config(libzfs_handle_t *hdl, nvlist_t *config) } if (err) { - (void) zpool_standard_error(hdl, errno, - dgettext(TEXT_DOMAIN, "cannot discover pools")); zcmd_free_nvlists(&zc); return (NULL); } @@ -404,6 +400,21 @@ refresh_config(libzfs_handle_t *hdl, nvlist_t *config) } /* + * Determine if the vdev id is a hole in the namespace. + */ +boolean_t +vdev_is_hole(uint64_t *hole_array, uint_t holes, uint_t id) +{ + for (int c = 0; c < holes; c++) { + + /* Top-level is a hole */ + if (hole_array[c] == id) + return (B_TRUE); + } + return (B_FALSE); +} + +/* * Convert our list of pools into the definitive set of configurations. We * start by picking the best config for each toplevel vdev. Once that's done, * we assemble the toplevel vdevs into a full config for the pool. We make a @@ -425,17 +436,20 @@ get_configs(libzfs_handle_t *hdl, pool_list_t *pl, boolean_t active_ok) uint64_t version, guid; uint_t children = 0; nvlist_t **child = NULL; + uint_t holes; + uint64_t *hole_array, max_id; uint_t c; boolean_t isactive; uint64_t hostid; nvlist_t *nvl; boolean_t found_one = B_FALSE; + boolean_t valid_top_config = B_FALSE; if (nvlist_alloc(&ret, 0, 0) != 0) goto nomem; for (pe = pl->pools; pe != NULL; pe = pe->pe_next) { - uint64_t id; + uint64_t id, max_txg = 0; if (nvlist_alloc(&config, NV_UNIQUE_NAME, 0) != 0) goto nomem; @@ -463,6 +477,42 @@ get_configs(libzfs_handle_t *hdl, pool_list_t *pl, boolean_t active_ok) } } + /* + * We rely on the fact that the max txg for the + * pool will contain the most up-to-date information + * about the valid top-levels in the vdev namespace. + */ + if (best_txg > max_txg) { + (void) nvlist_remove(config, + ZPOOL_CONFIG_VDEV_CHILDREN, + DATA_TYPE_UINT64); + (void) nvlist_remove(config, + ZPOOL_CONFIG_HOLE_ARRAY, + DATA_TYPE_UINT64_ARRAY); + + max_txg = best_txg; + hole_array = NULL; + holes = 0; + max_id = 0; + valid_top_config = B_FALSE; + + if (nvlist_lookup_uint64(tmp, + ZPOOL_CONFIG_VDEV_CHILDREN, &max_id) == 0) { + verify(nvlist_add_uint64(config, + ZPOOL_CONFIG_VDEV_CHILDREN, + max_id) == 0); + valid_top_config = B_TRUE; + } + + if (nvlist_lookup_uint64_array(tmp, + ZPOOL_CONFIG_HOLE_ARRAY, &hole_array, + &holes) == 0) { + verify(nvlist_add_uint64_array(config, + ZPOOL_CONFIG_HOLE_ARRAY, + hole_array, holes) == 0); + } + } + if (!config_seen) { /* * Copy the relevant pieces of data to the pool @@ -522,6 +572,7 @@ get_configs(libzfs_handle_t *hdl, pool_list_t *pl, boolean_t active_ok) ZPOOL_CONFIG_VDEV_TREE, &nvtop) == 0); verify(nvlist_lookup_uint64(nvtop, ZPOOL_CONFIG_ID, &id) == 0); + if (id >= children) { nvlist_t **newchild; @@ -542,17 +593,82 @@ get_configs(libzfs_handle_t *hdl, pool_list_t *pl, boolean_t active_ok) } + /* + * If we have information about all the top-levels then + * clean up the nvlist which we've constructed. This + * means removing any extraneous devices that are + * beyond the valid range or adding devices to the end + * of our array which appear to be missing. + */ + if (valid_top_config) { + if (max_id < children) { + for (c = max_id; c < children; c++) + nvlist_free(child[c]); + children = max_id; + } else if (max_id > children) { + nvlist_t **newchild; + + newchild = zfs_alloc(hdl, (max_id) * + sizeof (nvlist_t *)); + if (newchild == NULL) + goto nomem; + + for (c = 0; c < children; c++) + newchild[c] = child[c]; + + free(child); + child = newchild; + children = max_id; + } + } + verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &guid) == 0); /* + * The vdev namespace may contain holes as a result of + * device removal. We must add them back into the vdev + * tree before we process any missing devices. + */ + if (holes > 0) { + ASSERT(valid_top_config); + + for (c = 0; c < children; c++) { + nvlist_t *holey; + + if (child[c] != NULL || + !vdev_is_hole(hole_array, holes, c)) + continue; + + if (nvlist_alloc(&holey, NV_UNIQUE_NAME, + 0) != 0) + goto nomem; + + /* + * Holes in the namespace are treated as + * "hole" top-level vdevs and have a + * special flag set on them. + */ + if (nvlist_add_string(holey, + ZPOOL_CONFIG_TYPE, + VDEV_TYPE_HOLE) != 0 || + nvlist_add_uint64(holey, + ZPOOL_CONFIG_ID, c) != 0 || + nvlist_add_uint64(holey, + ZPOOL_CONFIG_GUID, 0ULL) != 0) + goto nomem; + child[c] = holey; + } + } + + /* * Look for any missing top-level vdevs. If this is the case, * create a faked up 'missing' vdev as a placeholder. We cannot * simply compress the child array, because the kernel performs * certain checks to make sure the vdev IDs match their location * in the configuration. */ - for (c = 0; c < children; c++) + for (c = 0; c < children; c++) { if (child[c] == NULL) { nvlist_t *missing; if (nvlist_alloc(&missing, NV_UNIQUE_NAME, @@ -570,6 +686,7 @@ get_configs(libzfs_handle_t *hdl, pool_list_t *pl, boolean_t active_ok) } child[c] = missing; } + } /* * Put all of this pool's top-level vdevs into a root vdev. @@ -636,8 +753,11 @@ get_configs(libzfs_handle_t *hdl, pool_list_t *pl, boolean_t active_ok) continue; } - if ((nvl = refresh_config(hdl, config)) == NULL) - goto error; + if ((nvl = refresh_config(hdl, config)) == NULL) { + nvlist_free(config); + config = NULL; + continue; + } nvlist_free(config); config = nvl; diff --git a/usr/src/lib/libzfs/common/libzfs_pool.c b/usr/src/lib/libzfs/common/libzfs_pool.c index de6587ec40..da19f7a780 100644 --- a/usr/src/lib/libzfs/common/libzfs_pool.c +++ b/usr/src/lib/libzfs/common/libzfs_pool.c @@ -1063,7 +1063,8 @@ zpool_add(zpool_handle_t *zhp, nvlist_t *nvroot) zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "device '%s' contains an EFI label and " "cannot be used on root pools."), - zpool_vdev_name(hdl, NULL, spares[s])); + zpool_vdev_name(hdl, NULL, spares[s], + B_FALSE)); return (zfs_error(hdl, EZFS_POOL_NOTSUP, msg)); } } @@ -1419,8 +1420,9 @@ vdev_to_nvlist_iter(nvlist_t *nv, nvlist_t *search, boolean_t *avail_spare, /* * Search for the requested value. We special case the search - * for ZPOOL_CONFIG_PATH when it's a wholedisk. Otherwise, - * all other searches are simple string compares. + * for ZPOOL_CONFIG_PATH when it's a wholedisk and when + * Looking for a top-level vdev name (i.e. ZPOOL_CONFIG_TYPE). + * Otherwise, all other searches are simple string compares. */ if (strcmp(srchkey, ZPOOL_CONFIG_PATH) == 0 && val) { uint64_t wholedisk = 0; @@ -1437,6 +1439,52 @@ vdev_to_nvlist_iter(nvlist_t *nv, nvlist_t *search, boolean_t *avail_spare, return (nv); break; } + } else if (strcmp(srchkey, ZPOOL_CONFIG_TYPE) == 0 && val) { + char *type, *idx, *end, *p; + uint64_t id, vdev_id; + + /* + * Determine our vdev type, keeping in mind + * that the srchval is composed of a type and + * vdev id pair (i.e. mirror-4). + */ + if ((type = strdup(srchval)) == NULL) + return (NULL); + + if ((p = strrchr(type, '-')) == NULL) { + free(type); + break; + } + idx = p + 1; + *p = '\0'; + + /* + * If the types don't match then keep looking. + */ + if (strncmp(val, type, strlen(val)) != 0) { + free(type); + break; + } + + verify(strncmp(type, VDEV_TYPE_RAIDZ, + strlen(VDEV_TYPE_RAIDZ)) == 0 || + strncmp(type, VDEV_TYPE_MIRROR, + strlen(VDEV_TYPE_MIRROR)) == 0); + verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_ID, + &id) == 0); + + errno = 0; + vdev_id = strtoull(idx, &end, 10); + + free(type); + if (errno != 0) + return (NULL); + + /* + * Now verify that we have the correct vdev id. + */ + if (vdev_id == id) + return (nv); } /* @@ -1522,6 +1570,18 @@ zpool_find_vdev_by_physpath(zpool_handle_t *zhp, const char *ppath, return (ret); } +/* + * Determine if we have an "interior" top-level vdev (i.e mirror/raidz). + */ +boolean_t +zpool_vdev_is_interior(const char *name) +{ + if (strncmp(name, VDEV_TYPE_RAIDZ, strlen(VDEV_TYPE_RAIDZ)) == 0 || + strncmp(name, VDEV_TYPE_MIRROR, strlen(VDEV_TYPE_MIRROR)) == 0) + return (B_TRUE); + return (B_FALSE); +} + nvlist_t * zpool_find_vdev(zpool_handle_t *zhp, const char *path, boolean_t *avail_spare, boolean_t *l2cache, boolean_t *log) @@ -1536,6 +1596,8 @@ zpool_find_vdev(zpool_handle_t *zhp, const char *path, boolean_t *avail_spare, guid = strtoull(path, &end, 10); if (guid != 0 && *end == '\0') { verify(nvlist_add_uint64(search, ZPOOL_CONFIG_GUID, guid) == 0); + } else if (zpool_vdev_is_interior(path)) { + verify(nvlist_add_string(search, ZPOOL_CONFIG_TYPE, path) == 0); } else if (path[0] != '/') { (void) snprintf(buf, sizeof (buf), "%s%s", "/dev/dsk/", path); verify(nvlist_add_string(search, ZPOOL_CONFIG_PATH, buf) == 0); @@ -2038,7 +2100,7 @@ zpool_vdev_attach(zpool_handle_t *zhp, verify(nvlist_lookup_nvlist(zpool_get_config(zhp, NULL), ZPOOL_CONFIG_VDEV_TREE, &config_root) == 0); - if ((newname = zpool_vdev_name(NULL, NULL, child[0])) == NULL) + if ((newname = zpool_vdev_name(NULL, NULL, child[0], B_FALSE)) == NULL) return (-1); /* @@ -2235,24 +2297,34 @@ zpool_vdev_remove(zpool_handle_t *zhp, const char *path) zfs_cmd_t zc = { 0 }; char msg[1024]; nvlist_t *tgt; - boolean_t avail_spare, l2cache; + boolean_t avail_spare, l2cache, islog; libzfs_handle_t *hdl = zhp->zpool_hdl; + uint64_t version; (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN, "cannot remove %s"), path); (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name)); if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache, - NULL)) == 0) + &islog)) == 0) return (zfs_error(hdl, EZFS_NODEVICE, msg)); - - if (!avail_spare && !l2cache) { + /* + * XXX - this should just go away. + */ + if (!avail_spare && !l2cache && !islog) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "only inactive hot spares or cache devices " - "can be removed")); + "only inactive hot spares, cache, top-level, " + "or log devices can be removed")); return (zfs_error(hdl, EZFS_NODEVICE, msg)); } + version = zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL); + if (islog && version < SPA_VERSION_HOLES) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "pool must be upgrade to support log removal")); + return (zfs_error(hdl, EZFS_BADVERSION, msg)); + } + verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0); if (zfs_ioctl(hdl, ZFS_IOC_VDEV_REMOVE, &zc) == 0) @@ -2420,7 +2492,8 @@ set_path(zpool_handle_t *zhp, nvlist_t *nv, const char *path) * of these checks. */ char * -zpool_vdev_name(libzfs_handle_t *hdl, zpool_handle_t *zhp, nvlist_t *nv) +zpool_vdev_name(libzfs_handle_t *hdl, zpool_handle_t *zhp, nvlist_t *nv, + boolean_t verbose) { char *path, *devid; uint64_t value; @@ -2499,6 +2572,20 @@ zpool_vdev_name(libzfs_handle_t *hdl, zpool_handle_t *zhp, nvlist_t *nv) (u_longlong_t)value); path = buf; } + + /* + * We identify each top-level vdev by using a <type-id> + * naming convention. + */ + if (verbose) { + uint64_t id; + + verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_ID, + &id) == 0); + (void) snprintf(buf, sizeof (buf), "%s-%llu", path, + (u_longlong_t)id); + path = buf; + } } return (zfs_strdup(hdl, path)); @@ -3036,6 +3123,7 @@ supported_dump_vdev_type(libzfs_handle_t *hdl, nvlist_t *config, char *errbuf) if (strcmp(type, VDEV_TYPE_RAIDZ) == 0 || strcmp(type, VDEV_TYPE_FILE) == 0 || strcmp(type, VDEV_TYPE_LOG) == 0 || + strcmp(type, VDEV_TYPE_HOLE) == 0 || strcmp(type, VDEV_TYPE_MISSING) == 0) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "vdev type '%s' is not supported"), type); diff --git a/usr/src/uts/common/fs/zfs/metaslab.c b/usr/src/uts/common/fs/zfs/metaslab.c index 77556ac5d7..3ebde10240 100644 --- a/usr/src/uts/common/fs/zfs/metaslab.c +++ b/usr/src/uts/common/fs/zfs/metaslab.c @@ -57,12 +57,13 @@ int metaslab_df_free_pct = 30; * ========================================================================== */ metaslab_class_t * -metaslab_class_create(space_map_ops_t *ops) +metaslab_class_create(spa_t *spa, space_map_ops_t *ops) { metaslab_class_t *mc; mc = kmem_zalloc(sizeof (metaslab_class_t), KM_SLEEP); + mc->mc_spa = spa; mc->mc_rotor = NULL; mc->mc_ops = ops; @@ -126,6 +127,32 @@ metaslab_class_remove(metaslab_class_t *mc, metaslab_group_t *mg) mg->mg_class = NULL; } +int +metaslab_class_validate(metaslab_class_t *mc) +{ + metaslab_group_t *mg; + vdev_t *vd; + + /* + * Must hold one of the spa_config locks. + */ + ASSERT(spa_config_held(mc->mc_spa, SCL_ALL, RW_READER) || + spa_config_held(mc->mc_spa, SCL_ALL, RW_WRITER)); + + if ((mg = mc->mc_rotor) == NULL) + return (0); + + do { + vd = mg->mg_vd; + ASSERT(vd->vdev_mg != NULL); + ASSERT3P(vd->vdev_top, ==, vd); + ASSERT3P(mg->mg_class, ==, mc); + ASSERT3P(vd->vdev_ops, !=, &vdev_hole_ops); + } while ((mg = mg->mg_next) != mc->mc_rotor); + + return (0); +} + /* * ========================================================================== * Metaslab groups @@ -634,6 +661,8 @@ metaslab_sync(metaslab_t *msp, uint64_t txg) dmu_tx_t *tx; int t; + ASSERT(!vd->vdev_ishole); + tx = dmu_tx_create_assigned(spa_get_dsl(spa), txg); /* @@ -721,6 +750,8 @@ metaslab_sync_done(metaslab_t *msp, uint64_t txg) vdev_t *vd = mg->mg_vd; int t; + ASSERT(!vd->vdev_ishole); + mutex_enter(&msp->ms_lock); /* @@ -932,10 +963,21 @@ metaslab_alloc_dva(spa_t *spa, metaslab_class_t *mc, uint64_t psize, */ if (hintdva) { vd = vdev_lookup_top(spa, DVA_GET_VDEV(&hintdva[d])); - if (flags & METASLAB_HINTBP_AVOID) - mg = vd->vdev_mg->mg_next; - else + + /* + * It's possible the vdev we're using as the hint no + * longer exists (i.e. removed). Consult the rotor when + * all else fails. + */ + if (vd != NULL && vd->vdev_mg != NULL) { mg = vd->vdev_mg; + + if (flags & METASLAB_HINTBP_AVOID && + mg->mg_next != NULL) + mg = mg->mg_next; + } else { + mg = mc->mc_rotor; + } } else if (d != 0) { vd = vdev_lookup_top(spa, DVA_GET_VDEV(&dva[d - 1])); mg = vd->vdev_mg->mg_next; diff --git a/usr/src/uts/common/fs/zfs/spa.c b/usr/src/uts/common/fs/zfs/spa.c index 53e1ac0f4a..f503592396 100644 --- a/usr/src/uts/common/fs/zfs/spa.c +++ b/usr/src/uts/common/fs/zfs/spa.c @@ -42,6 +42,7 @@ #include <sys/zil.h> #include <sys/vdev_impl.h> #include <sys/metaslab.h> +#include <sys/metaslab_impl.h> #include <sys/uberblock_impl.h> #include <sys/txg.h> #include <sys/avl.h> @@ -578,8 +579,8 @@ spa_activate(spa_t *spa, int mode) spa->spa_state = POOL_STATE_ACTIVE; spa->spa_mode = mode; - spa->spa_normal_class = metaslab_class_create(zfs_metaslab_ops); - spa->spa_log_class = metaslab_class_create(zfs_metaslab_ops); + spa->spa_normal_class = metaslab_class_create(spa, zfs_metaslab_ops); + spa->spa_log_class = metaslab_class_create(spa, zfs_metaslab_ops); for (int t = 0; t < ZIO_TYPES; t++) { const zio_taskq_info_t *ztip = &zio_taskqs[t]; @@ -1101,26 +1102,23 @@ spa_check_removed(vdev_t *vd) * that the label does not contain the most up-to-date information. */ void -spa_load_log_state(spa_t *spa) +spa_load_log_state(spa_t *spa, nvlist_t *nv) { - nvlist_t *nv, *nvroot, **child; - uint64_t is_log; - uint_t children; - vdev_t *rvd = spa->spa_root_vdev; + vdev_t *ovd, *rvd = spa->spa_root_vdev; - VERIFY(load_nvlist(spa, spa->spa_config_object, &nv) == 0); - VERIFY(nvlist_lookup_nvlist(nv, ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0); - VERIFY(nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, - &child, &children) == 0); - - for (int c = 0; c < children; c++) { - vdev_t *tvd = rvd->vdev_child[c]; + /* + * Load the original root vdev tree from the passed config. + */ + spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); + VERIFY(spa_config_parse(spa, &ovd, nv, NULL, 0, VDEV_ALLOC_LOAD) == 0); - if (nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_LOG, - &is_log) == 0 && is_log) - vdev_load_log_state(tvd, child[c]); + for (int c = 0; c < rvd->vdev_children; c++) { + vdev_t *cvd = rvd->vdev_child[c]; + if (cvd->vdev_islog) + vdev_load_log_state(cvd, ovd->vdev_child[c]); } - nvlist_free(nv); + vdev_free(ovd); + spa_config_exit(spa, SCL_ALL, FTAG); } /* @@ -1151,7 +1149,7 @@ static int spa_load(spa_t *spa, nvlist_t *config, spa_load_state_t state, int mosconfig) { int error = 0; - nvlist_t *nvroot = NULL; + nvlist_t *nvconfig, *nvroot = NULL; vdev_t *rvd; uberblock_t *ub = &spa->spa_uberblock; uint64_t config_cache_txg = spa->spa_config_txg; @@ -1306,23 +1304,22 @@ spa_load(spa_t *spa, nvlist_t *config, spa_load_state_t state, int mosconfig) goto out; } + if (load_nvlist(spa, spa->spa_config_object, &nvconfig) != 0) { + vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, + VDEV_AUX_CORRUPT_DATA); + error = EIO; + goto out; + } + if (!mosconfig) { - nvlist_t *newconfig; uint64_t hostid; - if (load_nvlist(spa, spa->spa_config_object, &newconfig) != 0) { - vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, - VDEV_AUX_CORRUPT_DATA); - error = EIO; - goto out; - } - - if (!spa_is_root(spa) && nvlist_lookup_uint64(newconfig, + if (!spa_is_root(spa) && nvlist_lookup_uint64(nvconfig, ZPOOL_CONFIG_HOSTID, &hostid) == 0) { char *hostname; unsigned long myhostid = 0; - VERIFY(nvlist_lookup_string(newconfig, + VERIFY(nvlist_lookup_string(nvconfig, ZPOOL_CONFIG_HOSTNAME, &hostname) == 0); #ifdef _KERNEL @@ -1347,12 +1344,12 @@ spa_load(spa_t *spa, nvlist_t *config, spa_load_state_t state, int mosconfig) } } - spa_config_set(spa, newconfig); + spa_config_set(spa, nvconfig); spa_unload(spa); spa_deactivate(spa); spa_activate(spa, orig_mode); - return (spa_load(spa, newconfig, state, B_TRUE)); + return (spa_load(spa, nvconfig, state, B_TRUE)); } if (zap_lookup(spa->spa_meta_objset, @@ -1471,7 +1468,10 @@ spa_load(spa_t *spa, nvlist_t *config, spa_load_state_t state, int mosconfig) spa_config_exit(spa, SCL_ALL, FTAG); } - spa_load_log_state(spa); + VERIFY(nvlist_lookup_nvlist(nvconfig, ZPOOL_CONFIG_VDEV_TREE, + &nvroot) == 0); + spa_load_log_state(spa, nvroot); + nvlist_free(nvconfig); if (spa_check_logs(spa)) { vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN, @@ -2910,7 +2910,7 @@ spa_reset(char *pool) int spa_vdev_add(spa_t *spa, nvlist_t *nvroot) { - uint64_t txg; + uint64_t txg, id; int error; vdev_t *rvd = spa->spa_root_vdev; vdev_t *vd, *tvd; @@ -2951,9 +2951,19 @@ spa_vdev_add(spa_t *spa, nvlist_t *nvroot) * Transfer each new top-level vdev from vd to rvd. */ for (int c = 0; c < vd->vdev_children; c++) { + + /* + * Set the vdev id to the first hole, if one exists. + */ + for (id = 0; id < rvd->vdev_children; id++) { + if (rvd->vdev_child[id]->vdev_ishole) { + vdev_free(rvd->vdev_child[id]); + break; + } + } tvd = vd->vdev_child[c]; vdev_remove_child(vd, tvd); - tvd->vdev_id = rvd->vdev_children; + tvd->vdev_id = id; vdev_add_child(rvd, tvd); vdev_config_dirty(tvd); } @@ -3136,6 +3146,7 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing) */ vdev_remove_child(newrootvd, newvd); newvd->vdev_id = pvd->vdev_children; + newvd->vdev_crtxg = oldvd->vdev_crtxg; vdev_add_child(pvd, newvd); tvd = newvd->vdev_top; @@ -3444,16 +3455,127 @@ spa_vdev_remove_aux(nvlist_t *config, char *name, nvlist_t **dev, int count, } /* + * Removing a device from the vdev namespace requires several steps + * and can take a significant amount of time. As a result we use + * the spa_vdev_config_[enter/exit] functions which allow us to + * grab and release the spa_config_lock while still holding the namespace + * lock. During each step the configuration is synced out. + */ + +/* + * Initial phase of device removal - stop future allocations from this device. + */ +void +spa_vdev_remove_start(spa_t *spa, vdev_t *vd) +{ + metaslab_group_t *mg = vd->vdev_mg; + + ASSERT(MUTEX_HELD(&spa_namespace_lock)); + ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); + + /* + * Remove our vdev from the allocatable vdevs + */ + if (mg) + metaslab_class_remove(mg->mg_class, mg); +} + +/* + * Evacuate the device. + */ +int +spa_vdev_remove_evacuate(spa_t *spa, vdev_t *vd) +{ + uint64_t txg; + int error; + + ASSERT(MUTEX_HELD(&spa_namespace_lock)); + ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == 0); + + /* + * Evacuate the device. We don't hold the config lock as writer + * since we need to do I/O but we do keep the + * spa_namespace_lock held. Once this completes the device + * should no longer have any blocks allocated on it. + */ + if (vd->vdev_islog) { + /* + * Evacuate the device. + */ + if (error = dmu_objset_find(spa_name(spa), + zil_vdev_offline, NULL, DS_FIND_CHILDREN)) { + uint64_t txg; + + txg = spa_vdev_config_enter(spa); + metaslab_class_add(spa->spa_log_class, + vd->vdev_mg); + return (spa_vdev_exit(spa, NULL, txg, error)); + } + txg_wait_synced(spa_get_dsl(spa), 0); + } + + /* + * Remove any remaining MOS metadata associated with the device. + */ + txg = spa_vdev_config_enter(spa); + vd->vdev_removing = B_TRUE; + vdev_dirty(vd, 0, NULL, txg); + vdev_config_dirty(vd); + spa_vdev_config_exit(spa, NULL, txg, 0, FTAG); + + return (0); +} + +/* + * Complete the removal by cleaning up the namespace. + */ +void +spa_vdev_remove_done(spa_t *spa, vdev_t *vd) +{ + vdev_t *rvd = spa->spa_root_vdev; + metaslab_group_t *mg = vd->vdev_mg; + uint64_t id = vd->vdev_id; + boolean_t last_vdev = (id == (rvd->vdev_children - 1)); + + ASSERT(MUTEX_HELD(&spa_namespace_lock)); + ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); + + (void) vdev_label_init(vd, 0, VDEV_LABEL_REMOVE); + vdev_free(vd); + + /* + * It's possible that another thread is trying todo a spa_vdev_add() + * at the same time we're trying remove it. As a result the + * added vdev may not have initialized its metaslabs yet. + */ + if (mg != NULL) + metaslab_group_destroy(mg); + + if (last_vdev) { + vdev_compact_children(rvd); + } else { + vd = vdev_alloc_common(spa, id, 0, &vdev_hole_ops); + vdev_add_child(rvd, vd); + } + vdev_config_dirty(rvd); + + /* + * Reassess the health of our root vdev. + */ + vdev_reopen(rvd); +} + +/* * Remove a device from the pool. Currently, this supports removing only hot - * spares and level 2 ARC devices. + * spares, slogs, and level 2 ARC devices. */ int spa_vdev_remove(spa_t *spa, uint64_t guid, boolean_t unspare) { vdev_t *vd; nvlist_t **spares, **l2cache, *nv; - uint_t nspares, nl2cache; uint64_t txg = 0; + uint_t nspares, nl2cache; int error = 0; boolean_t locked = MUTEX_HELD(&spa_namespace_lock); @@ -3489,6 +3611,29 @@ spa_vdev_remove(spa_t *spa, uint64_t guid, boolean_t unspare) ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache, nv); spa_load_l2cache(spa); spa->spa_l2cache.sav_sync = B_TRUE; + } else if (vd != NULL && vd->vdev_islog) { + ASSERT(!locked); + + /* + * XXX - Once we have bp-rewrite this should + * become the common case. + */ + + /* + * 1. Stop allocations + * 2. Evacuate the device (i.e. kill off stubby and + * metadata) and wait for it to complete (i.e. sync). + * 3. Cleanup the vdev namespace. + */ + spa_vdev_remove_start(spa, vd); + + spa_vdev_config_exit(spa, NULL, txg, 0, FTAG); + if ((error = spa_vdev_remove_evacuate(spa, vd)) != 0) + return (error); + txg = spa_vdev_config_enter(spa); + + spa_vdev_remove_done(spa, vd); + } else if (vd != NULL) { /* * Normal vdevs cannot be removed (yet). diff --git a/usr/src/uts/common/fs/zfs/spa_config.c b/usr/src/uts/common/fs/zfs/spa_config.c index b2063bba13..d611e0aa9b 100644 --- a/usr/src/uts/common/fs/zfs/spa_config.c +++ b/usr/src/uts/common/fs/zfs/spa_config.c @@ -383,6 +383,13 @@ spa_config_generate(spa_t *spa, vdev_t *vd, uint64_t txg, int getstats) vd = vd->vdev_top; /* label contains top config */ } + /* + * Add the top-level config. We even add this on pools which + * don't support holes in the namespace as older pools will + * just ignore it. + */ + vdev_top_config_generate(spa, config); + nvroot = vdev_config_generate(spa, vd, getstats, B_FALSE, B_FALSE); VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, nvroot) == 0); nvlist_free(nvroot); diff --git a/usr/src/uts/common/fs/zfs/spa_misc.c b/usr/src/uts/common/fs/zfs/spa_misc.c index 9384db4ae9..38474c194d 100644 --- a/usr/src/uts/common/fs/zfs/spa_misc.c +++ b/usr/src/uts/common/fs/zfs/spa_misc.c @@ -836,6 +836,18 @@ uint64_t spa_vdev_enter(spa_t *spa) { mutex_enter(&spa_namespace_lock); + return (spa_vdev_config_enter(spa)); +} + +/* + * Internal implementation for spa_vdev_enter(). Used when a vdev + * operation requires multiple syncs (i.e. removing a device) while + * keeping the spa_namespace_lock held. + */ +uint64_t +spa_vdev_config_enter(spa_t *spa) +{ + ASSERT(MUTEX_HELD(&spa_namespace_lock)); spa_config_enter(spa, SCL_ALL, spa, RW_WRITER); @@ -843,14 +855,14 @@ spa_vdev_enter(spa_t *spa) } /* - * Unlock the spa_t after adding or removing a vdev. Besides undoing the - * locking of spa_vdev_enter(), we also want make sure the transactions have - * synced to disk, and then update the global configuration cache with the new - * information. + * Used in combination with spa_vdev_config_enter() to allow the syncing + * of multiple transactions without releasing the spa_namespace_lock. */ -int -spa_vdev_exit(spa_t *spa, vdev_t *vd, uint64_t txg, int error) +void +spa_vdev_config_exit(spa_t *spa, vdev_t *vd, uint64_t txg, int error, char *tag) { + ASSERT(MUTEX_HELD(&spa_namespace_lock)); + int config_changed = B_FALSE; ASSERT(txg > spa_last_synced_txg(spa)); @@ -870,9 +882,23 @@ spa_vdev_exit(spa_t *spa, vdev_t *vd, uint64_t txg, int error) config_changed = B_TRUE; } + /* + * Verify the metaslab classes. + */ + ASSERT(metaslab_class_validate(spa->spa_normal_class) == 0); + ASSERT(metaslab_class_validate(spa->spa_log_class) == 0); + spa_config_exit(spa, SCL_ALL, spa); /* + * Panic the system if the specified tag requires it. This + * is useful for ensuring that configurations are updated + * transactionally. + */ + if (zio_injection_enabled) + zio_handle_panic_injection(spa, tag); + + /* * Note: this txg_wait_synced() is important because it ensures * that there won't be more than one config change per txg. * This allows us to use the txg as the generation number. @@ -892,7 +918,18 @@ spa_vdev_exit(spa_t *spa, vdev_t *vd, uint64_t txg, int error) */ if (config_changed) spa_config_sync(spa, B_FALSE, B_TRUE); +} +/* + * Unlock the spa_t after adding or removing a vdev. Besides undoing the + * locking of spa_vdev_enter(), we also want make sure the transactions have + * synced to disk, and then update the global configuration cache with the new + * information. + */ +int +spa_vdev_exit(spa_t *spa, vdev_t *vd, uint64_t txg, int error) +{ + spa_vdev_config_exit(spa, vd, txg, error, FTAG); mutex_exit(&spa_namespace_lock); return (error); diff --git a/usr/src/uts/common/fs/zfs/sys/metaslab.h b/usr/src/uts/common/fs/zfs/sys/metaslab.h index 5d3e11c971..78a5f94952 100644 --- a/usr/src/uts/common/fs/zfs/sys/metaslab.h +++ b/usr/src/uts/common/fs/zfs/sys/metaslab.h @@ -57,10 +57,12 @@ extern void metaslab_free(spa_t *spa, const blkptr_t *bp, uint64_t txg, boolean_t now); extern int metaslab_claim(spa_t *spa, const blkptr_t *bp, uint64_t txg); -extern metaslab_class_t *metaslab_class_create(space_map_ops_t *ops); +extern metaslab_class_t *metaslab_class_create(spa_t *spa, + space_map_ops_t *ops); extern void metaslab_class_destroy(metaslab_class_t *mc); extern void metaslab_class_add(metaslab_class_t *mc, metaslab_group_t *mg); extern void metaslab_class_remove(metaslab_class_t *mc, metaslab_group_t *mg); +extern int metaslab_class_validate(metaslab_class_t *mc); extern metaslab_group_t *metaslab_group_create(metaslab_class_t *mc, vdev_t *vd); diff --git a/usr/src/uts/common/fs/zfs/sys/metaslab_impl.h b/usr/src/uts/common/fs/zfs/sys/metaslab_impl.h index d67dea7e97..bdf9559631 100644 --- a/usr/src/uts/common/fs/zfs/sys/metaslab_impl.h +++ b/usr/src/uts/common/fs/zfs/sys/metaslab_impl.h @@ -37,6 +37,7 @@ extern "C" { #endif struct metaslab_class { + spa_t *mc_spa; metaslab_group_t *mc_rotor; uint64_t mc_allocated; space_map_ops_t *mc_ops; diff --git a/usr/src/uts/common/fs/zfs/sys/spa.h b/usr/src/uts/common/fs/zfs/sys/spa.h index bccee25da9..b4165b24c8 100644 --- a/usr/src/uts/common/fs/zfs/sys/spa.h +++ b/usr/src/uts/common/fs/zfs/sys/spa.h @@ -430,6 +430,9 @@ extern int spa_config_held(spa_t *spa, int locks, krw_t rw); /* Pool vdev add/remove lock */ extern uint64_t spa_vdev_enter(spa_t *spa); +extern uint64_t spa_vdev_config_enter(spa_t *spa); +extern void spa_vdev_config_exit(spa_t *spa, vdev_t *vd, uint64_t txg, + int error, char *tag); extern int spa_vdev_exit(spa_t *spa, vdev_t *vd, uint64_t txg, int error); /* Pool vdev state change lock */ diff --git a/usr/src/uts/common/fs/zfs/sys/vdev.h b/usr/src/uts/common/fs/zfs/sys/vdev.h index a76cecb4b2..ecf6c2fe17 100644 --- a/usr/src/uts/common/fs/zfs/sys/vdev.h +++ b/usr/src/uts/common/fs/zfs/sys/vdev.h @@ -122,6 +122,7 @@ extern int vdev_config_sync(vdev_t **svd, int svdcount, uint64_t txg, extern void vdev_state_dirty(vdev_t *vd); extern void vdev_state_clean(vdev_t *vd); +extern void vdev_top_config_generate(spa_t *spa, nvlist_t *config); extern nvlist_t *vdev_config_generate(spa_t *spa, vdev_t *vd, boolean_t getstats, boolean_t isspare, boolean_t isl2cache); diff --git a/usr/src/uts/common/fs/zfs/sys/vdev_impl.h b/usr/src/uts/common/fs/zfs/sys/vdev_impl.h index 23780430df..bb2f98c33e 100644 --- a/usr/src/uts/common/fs/zfs/sys/vdev_impl.h +++ b/usr/src/uts/common/fs/zfs/sys/vdev_impl.h @@ -129,6 +129,7 @@ struct vdev { boolean_t vdev_expanding; /* expand the vdev? */ int vdev_open_error; /* error on last open */ kthread_t *vdev_open_thread; /* thread opening children */ + uint64_t vdev_crtxg; /* txg when top-level was added */ /* * Top-level vdev state. @@ -143,10 +144,12 @@ struct vdev { txg_node_t vdev_txg_node; /* per-txg dirty vdev linkage */ boolean_t vdev_remove_wanted; /* async remove wanted? */ boolean_t vdev_probe_wanted; /* async probe wanted? */ + boolean_t vdev_removing; /* device is being removed? */ list_node_t vdev_config_dirty_node; /* config dirty list */ list_node_t vdev_state_dirty_node; /* state dirty list */ uint64_t vdev_deflate_ratio; /* deflation ratio (x512) */ uint64_t vdev_islog; /* is an intent log device */ + uint64_t vdev_ishole; /* is a hole in the namespace */ /* * Leaf vdev state. @@ -248,6 +251,8 @@ typedef struct vdev_label { /* * Allocate or free a vdev */ +extern vdev_t *vdev_alloc_common(spa_t *spa, uint_t id, uint64_t guid, + vdev_ops_t *ops); extern int vdev_alloc(spa_t *spa, vdev_t **vdp, nvlist_t *config, vdev_t *parent, uint_t id, int alloctype); extern void vdev_free(vdev_t *vd); @@ -264,7 +269,7 @@ extern void vdev_remove_parent(vdev_t *cvd); /* * vdev sync load and sync */ -extern void vdev_load_log_state(vdev_t *vd, nvlist_t *nv); +extern void vdev_load_log_state(vdev_t *nvd, vdev_t *ovd); extern void vdev_load(vdev_t *vd); extern void vdev_sync(vdev_t *vd, uint64_t txg); extern void vdev_sync_done(vdev_t *vd, uint64_t txg); @@ -280,6 +285,7 @@ extern vdev_ops_t vdev_raidz_ops; extern vdev_ops_t vdev_disk_ops; extern vdev_ops_t vdev_file_ops; extern vdev_ops_t vdev_missing_ops; +extern vdev_ops_t vdev_hole_ops; extern vdev_ops_t vdev_spare_ops; /* diff --git a/usr/src/uts/common/fs/zfs/sys/zfs_ioctl.h b/usr/src/uts/common/fs/zfs/sys/zfs_ioctl.h index b7a2f57cbc..37615ba35f 100644 --- a/usr/src/uts/common/fs/zfs/sys/zfs_ioctl.h +++ b/usr/src/uts/common/fs/zfs/sys/zfs_ioctl.h @@ -117,6 +117,7 @@ typedef struct zinject_record { uint64_t zi_type; uint32_t zi_freq; uint32_t zi_failfast; + char zi_func[MAXNAMELEN]; } zinject_record_t; #define ZINJECT_NULL 0x1 diff --git a/usr/src/uts/common/fs/zfs/sys/zio.h b/usr/src/uts/common/fs/zfs/sys/zio.h index a85a1cdfcb..305c697697 100644 --- a/usr/src/uts/common/fs/zfs/sys/zio.h +++ b/usr/src/uts/common/fs/zfs/sys/zio.h @@ -442,6 +442,7 @@ extern int zio_inject_fault(char *name, int flags, int *id, extern int zio_inject_list_next(int *id, char *name, size_t buflen, struct zinject_record *record); extern int zio_clear_fault(int id); +extern void zio_handle_panic_injection(spa_t *spa, char *tag); extern int zio_handle_fault_injection(zio_t *zio, int error); extern int zio_handle_device_injection(vdev_t *vd, zio_t *zio, int error); extern int zio_handle_label_injection(zio_t *zio, int error); diff --git a/usr/src/uts/common/fs/zfs/vdev.c b/usr/src/uts/common/fs/zfs/vdev.c index 415cd4a9e9..9c8aa43425 100644 --- a/usr/src/uts/common/fs/zfs/vdev.c +++ b/usr/src/uts/common/fs/zfs/vdev.c @@ -54,6 +54,7 @@ static vdev_ops_t *vdev_ops_table[] = { &vdev_disk_ops, &vdev_file_ops, &vdev_missing_ops, + &vdev_hole_ops, NULL }; @@ -281,7 +282,7 @@ vdev_compact_children(vdev_t *pvd) /* * Allocate and minimally initialize a vdev_t. */ -static vdev_t * +vdev_t * vdev_alloc_common(spa_t *spa, uint_t id, uint64_t guid, vdev_ops_t *ops) { vdev_t *vd; @@ -293,7 +294,7 @@ vdev_alloc_common(spa_t *spa, uint_t id, uint64_t guid, vdev_ops_t *ops) spa->spa_root_vdev = vd; } - if (guid == 0) { + if (guid == 0 && ops != &vdev_hole_ops) { if (spa->spa_root_vdev == vd) { /* * The root vdev's guid will also be the pool guid, @@ -318,6 +319,7 @@ vdev_alloc_common(spa_t *spa, uint_t id, uint64_t guid, vdev_ops_t *ops) vd->vdev_guid_sum = guid; vd->vdev_ops = ops; vd->vdev_state = VDEV_STATE_CLOSED; + vd->vdev_ishole = (ops == &vdev_hole_ops); mutex_init(&vd->vdev_dtl_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&vd->vdev_stat_lock, NULL, MUTEX_DEFAULT, NULL); @@ -397,6 +399,9 @@ vdev_alloc(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, uint_t id, if (islog && spa_version(spa) < SPA_VERSION_SLOGS) return (ENOTSUP); + if (ops == &vdev_hole_ops && spa_version(spa) < SPA_VERSION_HOLES) + return (ENOTSUP); + /* * Set the nparity property for RAID-Z vdevs. */ @@ -472,6 +477,12 @@ vdev_alloc(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, uint_t id, (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_ASHIFT, &vd->vdev_ashift); /* + * Retrieve the vdev creation time. + */ + (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_CREATE_TXG, + &vd->vdev_crtxg); + + /* * If we're a top-level vdev, try to load the allocation parameters. */ if (parent && !parent->vdev_parent && alloctype == VDEV_ALLOC_LOAD) { @@ -705,6 +716,7 @@ vdev_add_parent(vdev_t *cvd, vdev_ops_t *ops) mvd->vdev_min_asize = cvd->vdev_min_asize; mvd->vdev_ashift = cvd->vdev_ashift; mvd->vdev_state = cvd->vdev_state; + mvd->vdev_crtxg = cvd->vdev_crtxg; vdev_remove_child(pvd, cvd); vdev_add_child(pvd, mvd); @@ -772,9 +784,14 @@ vdev_metaslab_init(vdev_t *vd, uint64_t txg) metaslab_t **mspp; int error; - if (vd->vdev_ms_shift == 0) /* not being allocated from yet */ + /* + * This vdev is not being allocated from yet or is a hole. + */ + if (vd->vdev_ms_shift == 0) return (0); + ASSERT(!vd->vdev_ishole); + /* * Compute the raidz-deflation ratio. Note, we hard-code * in 128k (1 << 17) because it is the current "typical" blocksize. @@ -1105,6 +1122,12 @@ vdev_open(vdev_t *vd) vd->vdev_state = VDEV_STATE_HEALTHY; } + /* + * For hole or missing vdevs we just return success. + */ + if (vd->vdev_ishole || vd->vdev_ops == &vdev_missing_ops) + return (0); + for (int c = 0; c < vd->vdev_children; c++) { if (vd->vdev_child[c]->vdev_state != VDEV_STATE_HEALTHY) { vdev_set_state(vd, B_TRUE, VDEV_STATE_DEGRADED, @@ -1393,6 +1416,7 @@ void vdev_dirty(vdev_t *vd, int flags, void *arg, uint64_t txg) { ASSERT(vd == vd->vdev_top); + ASSERT(!vd->vdev_ishole); ASSERT(ISP2(flags)); if (flags & VDD_METASLAB) @@ -1502,7 +1526,7 @@ vdev_dtl_reassess(vdev_t *vd, uint64_t txg, uint64_t scrub_txg, int scrub_done) vdev_dtl_reassess(vd->vdev_child[c], txg, scrub_txg, scrub_done); - if (vd == spa->spa_root_vdev) + if (vd == spa->spa_root_vdev || vd->vdev_ishole) return; if (vd->vdev_ops->vdev_op_leaf) { @@ -1592,6 +1616,8 @@ vdev_dtl_load(vdev_t *vd) if (smo->smo_object == 0) return (0); + ASSERT(!vd->vdev_ishole); + if ((error = dmu_bonus_hold(mos, smo->smo_object, FTAG, &db)) != 0) return (error); @@ -1619,6 +1645,8 @@ vdev_dtl_sync(vdev_t *vd, uint64_t txg) dmu_buf_t *db; dmu_tx_t *tx; + ASSERT(!vd->vdev_ishole); + tx = dmu_tx_create_assigned(spa->spa_dsl_pool, txg); if (vd->vdev_detached) { @@ -1755,7 +1783,7 @@ vdev_load(vdev_t *vd) /* * If this is a top-level vdev, initialize its metaslabs. */ - if (vd == vd->vdev_top && + if (vd == vd->vdev_top && !vd->vdev_ishole && (vd->vdev_ashift == 0 || vd->vdev_asize == 0 || vdev_metaslab_init(vd, 0) != 0)) vdev_set_state(vd, B_FALSE, VDEV_STATE_CANT_OPEN, @@ -1812,10 +1840,48 @@ vdev_validate_aux(vdev_t *vd) } void +vdev_remove(vdev_t *vd, uint64_t txg) +{ + spa_t *spa = vd->vdev_spa; + objset_t *mos = spa->spa_meta_objset; + dmu_tx_t *tx; + + tx = dmu_tx_create_assigned(spa_get_dsl(spa), txg); + + if (vd->vdev_dtl_smo.smo_object) { + ASSERT3U(vd->vdev_dtl_smo.smo_alloc, ==, 0); + (void) dmu_object_free(mos, vd->vdev_dtl_smo.smo_object, tx); + vd->vdev_dtl_smo.smo_object = 0; + } + + if (vd->vdev_ms != NULL) { + for (int m = 0; m < vd->vdev_ms_count; m++) { + metaslab_t *msp = vd->vdev_ms[m]; + + if (msp == NULL || msp->ms_smo.smo_object == 0) + continue; + + ASSERT3U(msp->ms_smo.smo_alloc, ==, 0); + (void) dmu_object_free(mos, msp->ms_smo.smo_object, tx); + msp->ms_smo.smo_object = 0; + } + } + + if (vd->vdev_ms_array) { + (void) dmu_object_free(mos, vd->vdev_ms_array, tx); + vd->vdev_ms_array = 0; + vd->vdev_ms_shift = 0; + } + dmu_tx_commit(tx); +} + +void vdev_sync_done(vdev_t *vd, uint64_t txg) { metaslab_t *msp; + ASSERT(!vd->vdev_ishole); + while (msp = txg_list_remove(&vd->vdev_ms_list, TXG_CLEAN(txg))) metaslab_sync_done(msp, txg); } @@ -1828,6 +1894,8 @@ vdev_sync(vdev_t *vd, uint64_t txg) metaslab_t *msp; dmu_tx_t *tx; + ASSERT(!vd->vdev_ishole); + if (vd->vdev_ms_array == 0 && vd->vdev_ms_shift != 0) { ASSERT(vd == vd->vdev_top); tx = dmu_tx_create_assigned(spa->spa_dsl_pool, txg); @@ -1838,6 +1906,9 @@ vdev_sync(vdev_t *vd, uint64_t txg) dmu_tx_commit(tx); } + if (vd->vdev_removing) + vdev_remove(vd, txg); + while ((msp = txg_list_remove(&vd->vdev_ms_list, txg)) != NULL) { metaslab_sync(msp, txg); (void) txg_list_add(&vd->vdev_ms_list, msp, TXG_CLEAN(txg)); @@ -2110,7 +2181,15 @@ vdev_clear(spa_t *spa, vdev_t *vd) boolean_t vdev_is_dead(vdev_t *vd) { - return (vd->vdev_state < VDEV_STATE_DEGRADED); + /* + * Holes and missing devices are always considered "dead". + * This simplifies the code since we don't have to check for + * these types of devices in the various code paths. + * Instead we rely on the fact that we skip over dead devices + * before issuing I/O to them. + */ + return (vd->vdev_state < VDEV_STATE_DEGRADED || vd->vdev_ishole || + vd->vdev_ops == &vdev_missing_ops); } boolean_t @@ -2139,7 +2218,7 @@ vdev_allocatable(vdev_t *vd) * we're asking two separate questions about it. */ return (!(state < VDEV_STATE_DEGRADED && state != VDEV_STATE_CLOSED) && - !vd->vdev_cant_write); + !vd->vdev_cant_write && !vd->vdev_ishole && !vd->vdev_removing); } boolean_t @@ -2391,7 +2470,7 @@ vdev_space_update(vdev_t *vd, int64_t space_delta, int64_t alloc_delta, * Don't count non-normal (e.g. intent log) space as part of * the pool's capacity. */ - if (vd->vdev_mg->mg_class != spa->spa_normal_class) + if (vd->vdev_islog) return; mutex_enter(&rvd->vdev_stat_lock); @@ -2472,7 +2551,8 @@ vdev_config_dirty(vdev_t *vd) } else { ASSERT(vd == vd->vdev_top); - if (!list_link_active(&vd->vdev_config_dirty_node)) + if (!list_link_active(&vd->vdev_config_dirty_node) && + !vd->vdev_ishole) list_insert_head(&spa->spa_config_dirty_list, vd); } } @@ -2546,6 +2626,12 @@ vdev_propagate_state(vdev_t *vd) for (int c = 0; c < vd->vdev_children; c++) { child = vd->vdev_child[c]; + /* + * Don't factor holes into the decision. + */ + if (child->vdev_ishole) + continue; + if (!vdev_readable(child) || (!vdev_writeable(child) && spa_writeable(spa))) { /* @@ -2739,32 +2825,31 @@ vdev_is_bootable(vdev_t *vd) return (B_TRUE); } +/* + * Load the state from the original vdev tree (ovd) which + * we've retrieved from the MOS config object. If the original + * vdev was offline then we transfer that state to the device + * in the current vdev tree (nvd). + */ void -vdev_load_log_state(vdev_t *vd, nvlist_t *nv) +vdev_load_log_state(vdev_t *nvd, vdev_t *ovd) { - uint_t children; - nvlist_t **child; - uint64_t val; - spa_t *spa = vd->vdev_spa; + spa_t *spa = nvd->vdev_spa; - if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, - &child, &children) == 0) { - for (int c = 0; c < children; c++) - vdev_load_log_state(vd->vdev_child[c], child[c]); - } + ASSERT(spa_config_held(spa, SCL_STATE_ALL, RW_WRITER) == SCL_STATE_ALL); + ASSERT3U(nvd->vdev_guid, ==, ovd->vdev_guid); - if (vd->vdev_ops->vdev_op_leaf && nvlist_lookup_uint64(nv, - ZPOOL_CONFIG_OFFLINE, &val) == 0 && val) { + for (int c = 0; c < nvd->vdev_children; c++) + vdev_load_log_state(nvd->vdev_child[c], ovd->vdev_child[c]); + if (nvd->vdev_ops->vdev_op_leaf && ovd->vdev_offline) { /* * It would be nice to call vdev_offline() * directly but the pool isn't fully loaded and * the txg threads have not been started yet. */ - spa_config_enter(spa, SCL_STATE_ALL, FTAG, RW_WRITER); - vd->vdev_offline = val; - vdev_reopen(vd->vdev_top); - spa_config_exit(spa, SCL_STATE_ALL, FTAG); + nvd->vdev_offline = ovd->vdev_offline; + vdev_reopen(nvd->vdev_top); } } diff --git a/usr/src/uts/common/fs/zfs/vdev_label.c b/usr/src/uts/common/fs/zfs/vdev_label.c index 06cb720128..87adc01622 100644 --- a/usr/src/uts/common/fs/zfs/vdev_label.c +++ b/usr/src/uts/common/fs/zfs/vdev_label.c @@ -287,6 +287,10 @@ vdev_config_generate(spa_t *spa, vdev_t *vd, boolean_t getstats, VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_DTL, vd->vdev_dtl_smo.smo_object) == 0); + if (vd->vdev_crtxg) + VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_CREATE_TXG, + vd->vdev_crtxg) == 0); + if (getstats) { vdev_stat_t vs; vdev_get_stats(vd, &vs); @@ -298,6 +302,8 @@ vdev_config_generate(spa_t *spa, vdev_t *vd, boolean_t getstats, nvlist_t **child; int c; + ASSERT(!vd->vdev_ishole); + child = kmem_alloc(vd->vdev_children * sizeof (nvlist_t *), KM_SLEEP); @@ -329,11 +335,45 @@ vdev_config_generate(spa_t *spa, vdev_t *vd, boolean_t getstats, if (vd->vdev_unspare) VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_UNSPARE, B_TRUE) == 0); + if (vd->vdev_ishole) + VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_IS_HOLE, + B_TRUE) == 0); } return (nv); } +/* + * Generate a view of the top-level vdevs. If we currently have holes + * in the namespace, then generate an array which contains a list of holey + * vdevs. Additionally, add the number of top-level children that currently + * exist. + */ +void +vdev_top_config_generate(spa_t *spa, nvlist_t *config) +{ + vdev_t *rvd = spa->spa_root_vdev; + uint64_t *array; + uint_t idx; + + array = kmem_alloc(rvd->vdev_children * sizeof (uint64_t), KM_SLEEP); + + idx = 0; + for (int c = 0; c < rvd->vdev_children; c++) { + vdev_t *tvd = rvd->vdev_child[c]; + + if (tvd->vdev_ishole) + array[idx++] = c; + } + + VERIFY(nvlist_add_uint64_array(config, ZPOOL_CONFIG_HOLE_ARRAY, + array, idx++) == 0); + VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_VDEV_CHILDREN, + rvd->vdev_children) == 0); + + kmem_free(array, rvd->vdev_children * sizeof (uint64_t)); +} + nvlist_t * vdev_label_read_config(vdev_t *vd) { @@ -516,6 +556,9 @@ vdev_label_init(vdev_t *vd, uint64_t crtxg, vdev_labeltype_t reason) crtxg, reason)) != 0) return (error); + /* Track the creation time for this vdev */ + vd->vdev_crtxg = crtxg; + if (!vd->vdev_ops->vdev_op_leaf) return (0); @@ -976,6 +1019,9 @@ vdev_label_sync_list(spa_t *spa, int l, uint64_t txg, int flags) for (vd = list_head(dl); vd != NULL; vd = list_next(dl, vd)) { uint64_t *good_writes = kmem_zalloc(sizeof (uint64_t), KM_SLEEP); + + ASSERT(!vd->vdev_ishole); + zio_t *vio = zio_null(zio, spa, NULL, (vd->vdev_islog || vd->vdev_aux != NULL) ? vdev_label_sync_ignore_done : vdev_label_sync_top_done, diff --git a/usr/src/uts/common/fs/zfs/vdev_missing.c b/usr/src/uts/common/fs/zfs/vdev_missing.c index 731f7d3dce..e1bf7d86a3 100644 --- a/usr/src/uts/common/fs/zfs/vdev_missing.c +++ b/usr/src/uts/common/fs/zfs/vdev_missing.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -48,8 +48,8 @@ vdev_missing_open(vdev_t *vd, uint64_t *psize, uint64_t *ashift) * VDEV_AUX_BAD_GUID_SUM. So we pretend to succeed, knowing that we * will fail the GUID sum check before ever trying to open the pool. */ - *psize = SPA_MINDEVSIZE; - *ashift = SPA_MINBLOCKSHIFT; + *psize = 0; + *ashift = 0; return (0); } @@ -83,3 +83,14 @@ vdev_ops_t vdev_missing_ops = { VDEV_TYPE_MISSING, /* name of this vdev type */ B_TRUE /* leaf vdev */ }; + +vdev_ops_t vdev_hole_ops = { + vdev_missing_open, + vdev_missing_close, + vdev_default_asize, + vdev_missing_io_start, + vdev_missing_io_done, + NULL, + VDEV_TYPE_HOLE, /* name of this vdev type */ + B_TRUE /* leaf vdev */ +}; diff --git a/usr/src/uts/common/fs/zfs/zio_inject.c b/usr/src/uts/common/fs/zfs/zio_inject.c index f8e6880c90..c5ff55243a 100644 --- a/usr/src/uts/common/fs/zfs/zio_inject.c +++ b/usr/src/uts/common/fs/zfs/zio_inject.c @@ -96,6 +96,30 @@ zio_match_handler(zbookmark_t *zb, uint64_t type, } /* + * Panic the system when a config change happens in the function + * specified by tag. + */ +void +zio_handle_panic_injection(spa_t *spa, char *tag) +{ + inject_handler_t *handler; + + rw_enter(&inject_lock, RW_READER); + + for (handler = list_head(&inject_handlers); handler != NULL; + handler = list_next(&inject_handlers, handler)) { + + if (spa != handler->zi_spa) + continue; + + if (strcmp(tag, handler->zi_record.zi_func) == 0) + panic("Panic requested in function %s\n", tag); + } + + rw_exit(&inject_lock); +} + +/* * Determine if the I/O in question should return failure. Returns the errno * to be returned to the caller. */ @@ -126,8 +150,9 @@ zio_handle_fault_injection(zio_t *zio, int error) if (zio->io_spa != handler->zi_spa) continue; - /* Ignore device errors */ - if (handler->zi_record.zi_guid != 0) + /* Ignore device errors and panic injection */ + if (handler->zi_record.zi_guid != 0 || + handler->zi_record.zi_func[0] != '\0') continue; /* If this handler matches, return EIO */ @@ -170,8 +195,9 @@ zio_handle_label_injection(zio_t *zio, int error) uint64_t start = handler->zi_record.zi_start; uint64_t end = handler->zi_record.zi_end; - /* Ignore device only faults */ - if (handler->zi_record.zi_start == 0) + /* Ignore device only faults or panic injection */ + if (handler->zi_record.zi_start == 0 || + handler->zi_record.zi_func[0] != '\0') continue; /* @@ -205,8 +231,9 @@ zio_handle_device_injection(vdev_t *vd, zio_t *zio, int error) for (handler = list_head(&inject_handlers); handler != NULL; handler = list_next(&inject_handlers, handler)) { - /* Ignore label specific faults */ - if (handler->zi_record.zi_start != 0) + /* Ignore label specific faults or panic injection */ + if (handler->zi_record.zi_start != 0 || + handler->zi_record.zi_func[0] != '\0') continue; if (vd->vdev_guid == handler->zi_record.zi_guid) { diff --git a/usr/src/uts/common/sys/fs/zfs.h b/usr/src/uts/common/sys/fs/zfs.h index b88fb5419a..de0d67176e 100644 --- a/usr/src/uts/common/sys/fs/zfs.h +++ b/usr/src/uts/common/sys/fs/zfs.h @@ -295,14 +295,15 @@ typedef enum zfs_cache_type { #define SPA_VERSION_16 16ULL #define SPA_VERSION_17 17ULL #define SPA_VERSION_18 18ULL +#define SPA_VERSION_19 19ULL /* * When bumping up SPA_VERSION, make sure GRUB ZFS understands the on-disk * format change. Go to usr/src/grub/grub-0.97/stage2/{zfs-include/, fsys_zfs*}, * and do the appropriate changes. Also bump the version number in * usr/src/grub/capability. */ -#define SPA_VERSION SPA_VERSION_18 -#define SPA_VERSION_STRING "18" +#define SPA_VERSION SPA_VERSION_19 +#define SPA_VERSION_STRING "19" /* * Symbolic names for the changes that caused a SPA_VERSION switch. @@ -342,6 +343,7 @@ typedef enum zfs_cache_type { #define SPA_VERSION_STMF_PROP SPA_VERSION_16 #define SPA_VERSION_RAIDZ3 SPA_VERSION_17 #define SPA_VERSION_USERREFS SPA_VERSION_18 +#define SPA_VERSION_HOLES SPA_VERSION_19 /* * ZPL version - rev'd whenever an incompatible on-disk format change @@ -401,6 +403,9 @@ typedef enum zfs_cache_type { #define ZPOOL_CONFIG_PHYS_PATH "phys_path" #define ZPOOL_CONFIG_IS_LOG "is_log" #define ZPOOL_CONFIG_L2CACHE "l2cache" +#define ZPOOL_CONFIG_HOLE_ARRAY "hole_array" +#define ZPOOL_CONFIG_VDEV_CHILDREN "vdev_children" +#define ZPOOL_CONFIG_IS_HOLE "is_hole" #define ZPOOL_CONFIG_SUSPENDED "suspended" /* not stored on disk */ #define ZPOOL_CONFIG_TIMESTAMP "timestamp" /* not stored on disk */ #define ZPOOL_CONFIG_BOOTFS "bootfs" /* not stored on disk */ @@ -422,6 +427,7 @@ typedef enum zfs_cache_type { #define VDEV_TYPE_DISK "disk" #define VDEV_TYPE_FILE "file" #define VDEV_TYPE_MISSING "missing" +#define VDEV_TYPE_HOLE "hole" #define VDEV_TYPE_SPARE "spare" #define VDEV_TYPE_LOG "log" #define VDEV_TYPE_L2CACHE "l2cache" |