diff options
author | Jerry Jelinek <jerry.jelinek@joyent.com> | 2019-04-04 13:53:47 +0000 |
---|---|---|
committer | Jerry Jelinek <jerry.jelinek@joyent.com> | 2019-04-04 13:53:47 +0000 |
commit | ba8d35b8e828327d042f5b28a474b63bb8bad611 (patch) | |
tree | df1260825e1794778c3428c9c39cf7dd4883ff66 /usr | |
parent | da3e6b844dd737652a3d4fd1064a6bd15b4274ad (diff) | |
parent | 251a62bcb8f02bf0421d03de4ad180a37416657d (diff) | |
download | illumos-joyent-ba8d35b8e828327d042f5b28a474b63bb8bad611.tar.gz |
[illumos-gate merge]
commit 251a62bcb8f02bf0421d03de4ad180a37416657d
10590 Update tzdata to 2019a
commit 0c26abfe36e56b0d0ca74a046c85dc41f76d1d59
10597 would like a way to set NMI behavior at boot
commit e0f1c0afa46cc84d4b1e40124032a9a87310386e
10499 Multi-modifier protection (MMP)
Conflicts:
usr/src/uts/i86pc/sys/apic_common.h
usr/src/uts/i86pc/os/mlsetup.c
usr/src/uts/i86pc/io/pcplusmp/apic_common.c
Diffstat (limited to 'usr')
82 files changed, 4054 insertions, 450 deletions
diff --git a/usr/src/cmd/zdb/Makefile.com b/usr/src/cmd/zdb/Makefile.com index b90b5810e7..be85c6dbb5 100644 --- a/usr/src/cmd/zdb/Makefile.com +++ b/usr/src/cmd/zdb/Makefile.com @@ -52,7 +52,6 @@ CPPFLAGS += -D_LARGEFILE64_SOURCE=1 -D_REENTRANT $(INCS) -DDEBUG # in Makefile.master CERRWARN += -_gcc=-Wmissing-braces CERRWARN += -_gcc=-Wsign-compare -CERRWARN += -_gcc=-Wmissing-field-initializers SMOFF += 64bit_shift,all_func_returns diff --git a/usr/src/cmd/zdb/zdb.c b/usr/src/cmd/zdb/zdb.c index 489b3db63c..62f6adc330 100644 --- a/usr/src/cmd/zdb/zdb.c +++ b/usr/src/cmd/zdb/zdb.c @@ -24,6 +24,7 @@ * Copyright (c) 2011, 2017 by Delphix. All rights reserved. * Copyright (c) 2014 Integros [integros.com] * Copyright 2017 Nexenta Systems, Inc. + * Copyright (c) 2017, 2018 Lawrence Livermore National Security, LLC. * Copyright 2017 RackTop Systems. */ @@ -2411,6 +2412,13 @@ dump_uberblock(uberblock_t *ub, const char *header, const char *footer) (void) printf("\tguid_sum = %llu\n", (u_longlong_t)ub->ub_guid_sum); (void) printf("\ttimestamp = %llu UTC = %s", (u_longlong_t)ub->ub_timestamp, asctime(localtime(×tamp))); + + (void) printf("\tmmp_magic = %016llx\n", + (u_longlong_t)ub->ub_mmp_magic); + if (ub->ub_mmp_magic == MMP_MAGIC) + (void) printf("\tmmp_delay = %0llu\n", + (u_longlong_t)ub->ub_mmp_delay); + if (dump_opt['u'] >= 3) { char blkbuf[BP_SPRINTF_LEN]; snprintf_blkptr(blkbuf, sizeof (blkbuf), &ub->ub_rootbp); @@ -2509,6 +2517,12 @@ dump_label_uberblocks(vdev_label_t *lbl, uint64_t ashift) if (uberblock_verify(ub)) continue; + + if ((dump_opt['u'] < 4) && + (ub->ub_mmp_magic == MMP_MAGIC) && ub->ub_mmp_delay && + (i >= VDEV_UBERBLOCK_COUNT(&vd) - MMP_BLOCKS_PER_LABEL)) + continue; + (void) snprintf(header, ZDB_MAX_UB_HEADER_SIZE, "Uberblock[%d]\n", i); dump_uberblock(ub, header, ""); @@ -4144,6 +4158,22 @@ verify_device_removal_feature_counts(spa_t *spa) return (ret); } +static void +zdb_set_skip_mmp(char *target) +{ + spa_t *spa; + + /* + * Disable the activity check to allow examination of + * active pools. + */ + mutex_enter(&spa_namespace_lock); + if ((spa = spa_lookup(target)) != NULL) { + spa->spa_import_flags |= ZFS_IMPORT_SKIP_MMP; + } + mutex_exit(&spa_namespace_lock); +} + #define BOGUS_SUFFIX "_CHECKPOINTED_UNIVERSE" /* * Import the checkpointed state of the pool specified by the target @@ -4178,6 +4208,7 @@ import_checkpointed_state(char *target, nvlist_t *cfg, char **new_path) } if (cfg == NULL) { + zdb_set_skip_mmp(poolname); error = spa_get_stats(poolname, &cfg, NULL, 0); if (error != 0) { fatal("Tried to read config of pool \"%s\" but " @@ -4190,7 +4221,8 @@ import_checkpointed_state(char *target, nvlist_t *cfg, char **new_path) fnvlist_add_string(cfg, ZPOOL_CONFIG_POOL_NAME, bogus_name); error = spa_import(bogus_name, cfg, NULL, - ZFS_IMPORT_MISSING_LOG | ZFS_IMPORT_CHECKPOINT); + ZFS_IMPORT_MISSING_LOG | ZFS_IMPORT_CHECKPOINT | + ZFS_IMPORT_SKIP_MMP); if (error != 0) { fatal("Tried to import pool \"%s\" but spa_import() failed " "with error %d\n", bogus_name, error); @@ -5190,90 +5222,6 @@ zdb_embedded_block(char *thing) free(buf); } -static boolean_t -pool_match(nvlist_t *cfg, char *tgt) -{ - uint64_t v, guid = strtoull(tgt, NULL, 0); - char *s; - - if (guid != 0) { - if (nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_POOL_GUID, &v) == 0) - return (v == guid); - } else { - if (nvlist_lookup_string(cfg, ZPOOL_CONFIG_POOL_NAME, &s) == 0) - return (strcmp(s, tgt) == 0); - } - return (B_FALSE); -} - -static char * -find_zpool(char **target, nvlist_t **configp, int dirc, char **dirv) -{ - nvlist_t *pools; - nvlist_t *match = NULL; - char *name = NULL; - char *sepp = NULL; - char sep = '\0'; - int count = 0; - importargs_t args; - - bzero(&args, sizeof (args)); - args.paths = dirc; - args.path = dirv; - args.can_be_active = B_TRUE; - - if ((sepp = strpbrk(*target, "/@")) != NULL) { - sep = *sepp; - *sepp = '\0'; - } - - pools = zpool_search_import(g_zfs, &args); - - if (pools != NULL) { - nvpair_t *elem = NULL; - while ((elem = nvlist_next_nvpair(pools, elem)) != NULL) { - verify(nvpair_value_nvlist(elem, configp) == 0); - if (pool_match(*configp, *target)) { - count++; - if (match != NULL) { - /* print previously found config */ - if (name != NULL) { - (void) printf("%s\n", name); - dump_nvlist(match, 8); - name = NULL; - } - (void) printf("%s\n", - nvpair_name(elem)); - dump_nvlist(*configp, 8); - } else { - match = *configp; - name = nvpair_name(elem); - } - } - } - } - if (count > 1) - (void) fatal("\tMatched %d pools - use pool GUID " - "instead of pool name or \n" - "\tpool name part of a dataset name to select pool", count); - - if (sepp) - *sepp = sep; - /* - * If pool GUID was specified for pool id, replace it with pool name - */ - if (name && (strstr(*target, name) != *target)) { - int sz = 1 + strlen(name) + ((sepp) ? strlen(sepp) : 0); - - *target = umem_alloc(sz, UMEM_NOFAIL); - (void) snprintf(*target, sz, "%s%s", name, sepp ? sepp : ""); - } - - *configp = name ? match : NULL; - - return (name); -} - int main(int argc, char **argv) { @@ -5286,7 +5234,7 @@ main(int argc, char **argv) int error = 0; char **searchdirs = NULL; int nsearch = 0; - char *target; + char *target, *target_pool; nvlist_t *policy = NULL; uint64_t max_txg = UINT64_MAX; int flags = ZFS_IMPORT_MISSING_LOG; @@ -5493,22 +5441,48 @@ main(int argc, char **argv) error = 0; target = argv[0]; + if (strpbrk(target, "/@") != NULL) { + size_t targetlen; + + target_pool = strdup(target); + *strpbrk(target_pool, "/@") = '\0'; + + target_is_spa = B_FALSE; + targetlen = strlen(target); + if (targetlen && target[targetlen - 1] == '/') + target[targetlen - 1] = '\0'; + } else { + target_pool = target; + } + if (dump_opt['e']) { - char *name = find_zpool(&target, &cfg, nsearch, searchdirs); + importargs_t args = { 0 }; - error = ENOENT; - if (name) { - if (dump_opt['C'] > 1) { - (void) printf("\nConfiguration for import:\n"); - dump_nvlist(cfg, 8); - } + args.paths = nsearch; + args.path = searchdirs; + args.can_be_active = B_TRUE; + + error = zpool_tryimport(g_zfs, target_pool, &cfg, &args); + + if (error == 0) { if (nvlist_add_nvlist(cfg, ZPOOL_LOAD_POLICY, policy) != 0) { fatal("can't open '%s': %s", target, strerror(ENOMEM)); } - error = spa_import(name, cfg, NULL, flags); + + if (dump_opt['C'] > 1) { + (void) printf("\nConfiguration for import:\n"); + dump_nvlist(cfg, 8); + } + + /* + * Disable the activity check to allow examination of + * active pools. + */ + error = spa_import(target_pool, cfg, NULL, + flags | ZFS_IMPORT_SKIP_MMP); } } @@ -5523,21 +5497,6 @@ main(int argc, char **argv) } - if (strpbrk(target, "/@") != NULL) { - size_t targetlen; - - target_is_spa = B_FALSE; - /* - * Remove any trailing slash. Later code would get confused - * by it, but we want to allow it so that "pool/" can - * indicate that we want to dump the topmost filesystem, - * rather than the whole pool. - */ - targetlen = strlen(target); - if (targetlen != 0 && target[targetlen - 1] == '/') - target[targetlen - 1] = '\0'; - } - if (error == 0) { if (dump_opt['k'] && (target_is_spa || dump_opt['R'])) { ASSERT(checkpoint_pool != NULL); @@ -5551,6 +5510,7 @@ main(int argc, char **argv) } } else if (target_is_spa || dump_opt['R']) { + zdb_set_skip_mmp(target); error = spa_open_rewind(target, &spa, FTAG, policy, NULL); if (error) { @@ -5573,6 +5533,7 @@ main(int argc, char **argv) } } } else { + zdb_set_skip_mmp(target); error = open_objset(target, DMU_OST_ANY, FTAG, &os); } } diff --git a/usr/src/cmd/zhack/zhack.c b/usr/src/cmd/zhack/zhack.c index 79ca3a7e6c..6b8e9dc47c 100644 --- a/usr/src/cmd/zhack/zhack.c +++ b/usr/src/cmd/zhack/zhack.c @@ -121,16 +121,11 @@ space_delta_cb(dmu_object_type_t bonustype, void *data, * Target is the dataset whose pool we want to open. */ static void -import_pool(const char *target, boolean_t readonly) +zhack_import(char *target, boolean_t readonly) { nvlist_t *config; - nvlist_t *pools; - int error; - char *sepp; - spa_t *spa; - nvpair_t *elem; nvlist_t *props; - const char *name; + int error; kernel_init(readonly ? FREAD : (FREAD | FWRITE)); g_zfs = libzfs_init(); @@ -139,68 +134,40 @@ import_pool(const char *target, boolean_t readonly) dmu_objset_register_type(DMU_OST_ZFS, space_delta_cb); g_readonly = readonly; - - /* - * If we only want readonly access, it's OK if we find - * a potentially-active (ie, imported into the kernel) pool from the - * default cachefile. - */ - if (readonly && spa_open(target, &spa, FTAG) == 0) { - spa_close(spa, FTAG); - return; - } - g_importargs.unique = B_TRUE; g_importargs.can_be_active = readonly; g_pool = strdup(target); - if ((sepp = strpbrk(g_pool, "/@")) != NULL) - *sepp = '\0'; - g_importargs.poolname = g_pool; - pools = zpool_search_import(g_zfs, &g_importargs); - - if (nvlist_empty(pools)) { - if (!g_importargs.can_be_active) { - g_importargs.can_be_active = B_TRUE; - if (zpool_search_import(g_zfs, &g_importargs) != NULL || - spa_open(target, &spa, FTAG) == 0) { - fatal(spa, FTAG, "cannot import '%s': pool is " - "active; run " "\"zpool export %s\" " - "first\n", g_pool, g_pool); - } - } - fatal(NULL, FTAG, "cannot import '%s': no such pool " - "available\n", g_pool); - } - - elem = nvlist_next_nvpair(pools, NULL); - name = nvpair_name(elem); - verify(nvpair_value_nvlist(elem, &config) == 0); + error = zpool_tryimport(g_zfs, target, &config, &g_importargs); + if (error) + fatal(NULL, FTAG, "cannot import '%s': %s", target, + libzfs_error_description(g_zfs)); props = NULL; if (readonly) { - verify(nvlist_alloc(&props, NV_UNIQUE_NAME, 0) == 0); - verify(nvlist_add_uint64(props, + VERIFY(nvlist_alloc(&props, NV_UNIQUE_NAME, 0) == 0); + VERIFY(nvlist_add_uint64(props, zpool_prop_to_name(ZPOOL_PROP_READONLY), 1) == 0); } zfeature_checks_disable = B_TRUE; - error = spa_import(name, config, props, ZFS_IMPORT_NORMAL); + error = spa_import(target, config, props, + (readonly ? ZFS_IMPORT_SKIP_MMP : ZFS_IMPORT_NORMAL)); zfeature_checks_disable = B_FALSE; if (error == EEXIST) error = 0; if (error) - fatal(NULL, FTAG, "can't import '%s': %s", name, + fatal(NULL, FTAG, "can't import '%s': %s", target, strerror(error)); } static void -zhack_spa_open(const char *target, boolean_t readonly, void *tag, spa_t **spa) +zhack_spa_open(char *target, boolean_t readonly, void *tag, spa_t **spa) { int err; - import_pool(target, readonly); + zhack_import(target, readonly); zfeature_checks_disable = B_TRUE; err = spa_open(target, spa, tag); diff --git a/usr/src/cmd/zpool/zpool_main.c b/usr/src/cmd/zpool/zpool_main.c index d83d3854d0..161ea1792f 100644 --- a/usr/src/cmd/zpool/zpool_main.c +++ b/usr/src/cmd/zpool/zpool_main.c @@ -51,6 +51,7 @@ #include <zfs_prop.h> #include <sys/fs/zfs.h> #include <sys/stat.h> +#include <sys/debug.h> #include <libzfs.h> @@ -1616,6 +1617,10 @@ print_status_config(zpool_handle_t *zhp, const char *name, nvlist_t *nv, (void) printf(gettext("split into new pool")); break; + case VDEV_AUX_ACTIVE: + (void) printf(gettext("currently in use")); + break; + case VDEV_AUX_CHILDREN_OFFLINE: (void) printf(gettext("all children offline")); break; @@ -1744,6 +1749,10 @@ print_import_config(const char *name, nvlist_t *nv, int namewidth, int depth) (void) printf(gettext("too many errors")); break; + case VDEV_AUX_ACTIVE: + (void) printf(gettext("currently in use")); + break; + case VDEV_AUX_CHILDREN_OFFLINE: (void) printf(gettext("all children offline")); break; @@ -1841,8 +1850,10 @@ show_import(nvlist_t *config) vdev_stat_t *vs; char *name; uint64_t guid; + uint64_t hostid = 0; char *msgid; - nvlist_t *nvroot; + char *hostname = "unknown"; + nvlist_t *nvroot, *nvinfo; int reason; const char *health; uint_t vsc; @@ -1929,6 +1940,17 @@ show_import(nvlist_t *config) zpool_print_unsup_feat(config); break; + case ZPOOL_STATUS_HOSTID_ACTIVE: + (void) printf(gettext(" status: The pool is currently " + "imported by another system.\n")); + break; + + case ZPOOL_STATUS_HOSTID_REQUIRED: + (void) printf(gettext(" status: The pool has the " + "multihost property on. It cannot\n\tbe safely imported " + "when the system hostid is not set.\n")); + break; + case ZPOOL_STATUS_HOSTID_MISMATCH: (void) printf(gettext(" status: The pool was last accessed by " "another system.\n")); @@ -2009,6 +2031,27 @@ show_import(nvlist_t *config) "imported. Attach the missing\n\tdevices and try " "again.\n")); break; + case ZPOOL_STATUS_HOSTID_ACTIVE: + VERIFY0(nvlist_lookup_nvlist(config, + ZPOOL_CONFIG_LOAD_INFO, &nvinfo)); + + if (nvlist_exists(nvinfo, ZPOOL_CONFIG_MMP_HOSTNAME)) + hostname = fnvlist_lookup_string(nvinfo, + ZPOOL_CONFIG_MMP_HOSTNAME); + + if (nvlist_exists(nvinfo, ZPOOL_CONFIG_MMP_HOSTID)) + hostid = fnvlist_lookup_uint64(nvinfo, + ZPOOL_CONFIG_MMP_HOSTID); + + (void) printf(gettext(" action: The pool must be " + "exported from %s (hostid=%lx)\n\tbefore it " + "can be safely imported.\n"), hostname, + (unsigned long) hostid); + break; + case ZPOOL_STATUS_HOSTID_REQUIRED: + (void) printf(gettext(" action: Check the SMF " + "svc:/system/hostid service.\n")); + break; default: (void) printf(gettext(" action: The pool cannot be " "imported due to damaged devices or data.\n")); @@ -2056,6 +2099,31 @@ show_import(nvlist_t *config) } } +static boolean_t +zfs_force_import_required(nvlist_t *config) +{ + uint64_t state; + uint64_t hostid = 0; + nvlist_t *nvinfo; + + state = fnvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE); + (void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_HOSTID, &hostid); + + if (state != POOL_STATE_EXPORTED && hostid != get_system_hostid()) + return (B_TRUE); + + nvinfo = fnvlist_lookup_nvlist(config, ZPOOL_CONFIG_LOAD_INFO); + if (nvlist_exists(nvinfo, ZPOOL_CONFIG_MMP_STATE)) { + mmp_state_t mmp_state = fnvlist_lookup_uint64(nvinfo, + ZPOOL_CONFIG_MMP_STATE); + + if (mmp_state != MMP_STATE_INACTIVE) + return (B_TRUE); + } + + return (B_FALSE); +} + /* * Perform the import for the given configuration. This passes the heavy * lifting off to zpool_import_props(), and then mounts the datasets contained @@ -2067,53 +2135,73 @@ do_import(nvlist_t *config, const char *newname, const char *mntopts, { zpool_handle_t *zhp; char *name; - uint64_t state; uint64_t version; - verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME, - &name) == 0); + name = fnvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME); + version = fnvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION); - verify(nvlist_lookup_uint64(config, - ZPOOL_CONFIG_POOL_STATE, &state) == 0); - verify(nvlist_lookup_uint64(config, - ZPOOL_CONFIG_VERSION, &version) == 0); if (!SPA_VERSION_IS_SUPPORTED(version)) { (void) fprintf(stderr, gettext("cannot import '%s': pool " "is formatted using an unsupported ZFS version\n"), name); return (1); - } else if (state != POOL_STATE_EXPORTED && + } else if (zfs_force_import_required(config) && !(flags & ZFS_IMPORT_ANY_HOST)) { - uint64_t hostid; - - if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_HOSTID, - &hostid) == 0) { - if ((unsigned long)hostid != gethostid()) { - char *hostname; - uint64_t timestamp; - time_t t; - - verify(nvlist_lookup_string(config, - ZPOOL_CONFIG_HOSTNAME, &hostname) == 0); - verify(nvlist_lookup_uint64(config, - ZPOOL_CONFIG_TIMESTAMP, ×tamp) == 0); - t = timestamp; - (void) fprintf(stderr, gettext("cannot import " - "'%s': pool may be in use from other " - "system, it was last accessed by %s " - "(hostid: 0x%lx) on %s"), name, hostname, - (unsigned long)hostid, - asctime(localtime(&t))); - (void) fprintf(stderr, gettext("use '-f' to " - "import anyway\n")); - return (1); - } + mmp_state_t mmp_state = MMP_STATE_INACTIVE; + nvlist_t *nvinfo; + + nvinfo = fnvlist_lookup_nvlist(config, ZPOOL_CONFIG_LOAD_INFO); + if (nvlist_exists(nvinfo, ZPOOL_CONFIG_MMP_STATE)) + mmp_state = fnvlist_lookup_uint64(nvinfo, + ZPOOL_CONFIG_MMP_STATE); + + if (mmp_state == MMP_STATE_ACTIVE) { + char *hostname = "<unknown>"; + uint64_t hostid = 0; + + if (nvlist_exists(nvinfo, ZPOOL_CONFIG_MMP_HOSTNAME)) + hostname = fnvlist_lookup_string(nvinfo, + ZPOOL_CONFIG_MMP_HOSTNAME); + + if (nvlist_exists(nvinfo, ZPOOL_CONFIG_MMP_HOSTID)) + hostid = fnvlist_lookup_uint64(nvinfo, + ZPOOL_CONFIG_MMP_HOSTID); + + (void) fprintf(stderr, gettext("cannot import '%s': " + "pool is imported on %s (hostid: " + "0x%lx)\nExport the pool on the other system, " + "then run 'zpool import'.\n"), + name, hostname, (unsigned long) hostid); + } else if (mmp_state == MMP_STATE_NO_HOSTID) { + (void) fprintf(stderr, gettext("Cannot import '%s': " + "pool has the multihost property on and the\n" + "system's hostid is not set.\n"), name); } else { + char *hostname = "<unknown>"; + uint64_t timestamp = 0; + uint64_t hostid = 0; + + if (nvlist_exists(config, ZPOOL_CONFIG_HOSTNAME)) + hostname = fnvlist_lookup_string(config, + ZPOOL_CONFIG_HOSTNAME); + + if (nvlist_exists(config, ZPOOL_CONFIG_TIMESTAMP)) + timestamp = fnvlist_lookup_uint64(config, + ZPOOL_CONFIG_TIMESTAMP); + + if (nvlist_exists(config, ZPOOL_CONFIG_HOSTID)) + hostid = fnvlist_lookup_uint64(config, + ZPOOL_CONFIG_HOSTID); + (void) fprintf(stderr, gettext("cannot import '%s': " - "pool may be in use from other system\n"), name); - (void) fprintf(stderr, gettext("use '-f' to import " - "anyway\n")); - return (1); + "pool was previously in use from another system.\n" + "Last accessed by %s (hostid=%lx) at %s" + "The pool can be imported, use 'zpool import -f' " + "to import the pool.\n"), name, hostname, + (unsigned long)hostid, ctime((time_t *)×tamp)); + } + + return (1); } if (zpool_import_props(g_zfs, config, newname, props, flags) != 0) @@ -5055,6 +5143,15 @@ status_callback(zpool_handle_t *zhp, void *data) "to be recovered.\n")); break; + case ZPOOL_STATUS_IO_FAILURE_MMP: + (void) printf(gettext("status: The pool is suspended because " + "multihost writes failed or were delayed;\n\tanother " + "system could import the pool undetected.\n")); + (void) printf(gettext("action: Make sure the pool's devices " + "are connected, then reboot your system and\n\timport the " + "pool.\n")); + break; + case ZPOOL_STATUS_IO_FAILURE_WAIT: case ZPOOL_STATUS_IO_FAILURE_CONTINUE: (void) printf(gettext("status: One or more devices are " diff --git a/usr/src/cmd/ztest/Makefile.com b/usr/src/cmd/ztest/Makefile.com index 4f0ea21a0f..f0f503dae8 100644 --- a/usr/src/cmd/ztest/Makefile.com +++ b/usr/src/cmd/ztest/Makefile.com @@ -22,7 +22,7 @@ # Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. # Copyright (c) 2012, 2016 by Delphix. All rights reserved. # Copyright 2017 RackTop Systems. -# Copyright (c) 2018, Joyent, Inc. +# Copyright 2019, Joyent, Inc. PROG= ztest OBJS= $(PROG).o @@ -36,7 +36,7 @@ INCS += -I../../../uts/common/fs/zfs INCS += -I../../../uts/common/fs/zfs/lua INCS += -I../../../common/zfs -LDLIBS += -lumem -lzpool -lcmdutils -lm -lnvpair -lfakekernel +LDLIBS += -lumem -lzpool -lcmdutils -lm -lnvpair -lfakekernel -lzfs CSTD= $(CSTD_GNU99) C99LMODE= -Xc99=%all diff --git a/usr/src/cmd/ztest/ztest.c b/usr/src/cmd/ztest/ztest.c index b2e99a4c6e..982a5e0ea8 100644 --- a/usr/src/cmd/ztest/ztest.c +++ b/usr/src/cmd/ztest/ztest.c @@ -126,6 +126,7 @@ #include <math.h> #include <sys/fs/zfs.h> #include <libnvpair.h> +#include <libzfs.h> #include <libcmdutils.h> static int ztest_fd_data = -1; @@ -164,6 +165,7 @@ typedef struct ztest_shared_opts { uint64_t zo_time; uint64_t zo_maxloops; uint64_t zo_metaslab_force_ganging; + int zo_mmp_test; } ztest_shared_opts_t; static const ztest_shared_opts_t ztest_opts_defaults = { @@ -182,6 +184,7 @@ static const ztest_shared_opts_t ztest_opts_defaults = { .zo_passtime = 60, /* 60 seconds */ .zo_killrate = 70, /* 70% kill rate */ .zo_verbose = 0, + .zo_mmp_test = 0, .zo_init = 1, .zo_time = 300, /* 5 minutes */ .zo_maxloops = 50, /* max loops during spa_freeze() */ @@ -341,6 +344,7 @@ ztest_func_t ztest_spa_create_destroy; ztest_func_t ztest_fault_inject; ztest_func_t ztest_ddt_repair; ztest_func_t ztest_dmu_snapshot_hold; +ztest_func_t ztest_mmp_enable_disable; ztest_func_t ztest_scrub; ztest_func_t ztest_dsl_dataset_promote_busy; ztest_func_t ztest_vdev_attach_detach; @@ -386,6 +390,7 @@ ztest_info_t ztest_info[] = { { ztest_fault_inject, 1, &zopt_sometimes }, { ztest_ddt_repair, 1, &zopt_sometimes }, { ztest_dmu_snapshot_hold, 1, &zopt_sometimes }, + { ztest_mmp_enable_disable, 1, &zopt_sometimes }, { ztest_reguid, 1, &zopt_rarely }, { ztest_scrub, 1, &zopt_rarely }, { ztest_spa_upgrade, 1, &zopt_rarely }, @@ -599,6 +604,7 @@ usage(boolean_t requested) "\t[-k kill_percentage (default: %llu%%)]\n" "\t[-p pool_name (default: %s)]\n" "\t[-f dir (default: %s)] file directory for vdev files\n" + "\t[-M] Multi-host simulate pool imported on remote host\n" "\t[-V] verbose (use multiple times for ever more blather)\n" "\t[-E] use existing pool instead of creating new one\n" "\t[-T time (default: %llu sec)] total run time\n" @@ -642,7 +648,7 @@ process_options(int argc, char **argv) bcopy(&ztest_opts_defaults, zo, sizeof (*zo)); while ((opt = getopt(argc, argv, - "v:s:a:m:r:R:d:t:g:i:k:p:f:VET:P:hF:B:o:")) != EOF) { + "v:s:a:m:r:R:d:t:g:i:k:p:f:MVET:P:hF:B:o:")) != EOF) { value = 0; switch (opt) { case 'v': @@ -711,6 +717,9 @@ process_options(int argc, char **argv) sizeof (zo->zo_dir)); } break; + case 'M': + zo->zo_mmp_test = 1; + break; case 'V': zo->zo_verbose++; break; @@ -2478,6 +2487,9 @@ ztest_spa_create_destroy(ztest_ds_t *zd, uint64_t id) spa_t *spa; nvlist_t *nvroot; + if (zo->zo_mmp_test) + return; + /* * Attempt to create using a bad file. */ @@ -2509,6 +2521,56 @@ ztest_spa_create_destroy(ztest_ds_t *zd, uint64_t id) rw_exit(&ztest_name_lock); } +/* + * Start and then stop the MMP threads to ensure the startup and shutdown code + * works properly. Actual protection and property-related code tested via ZTS. + */ +/* ARGSUSED */ +void +ztest_mmp_enable_disable(ztest_ds_t *zd, uint64_t id) +{ + ztest_shared_opts_t *zo = &ztest_opts; + spa_t *spa = ztest_spa; + + if (zo->zo_mmp_test) + return; + + /* + * Since enabling MMP involves setting a property, it could not be done + * while the pool is suspended. + */ + if (spa_suspended(spa)) + return; + + spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); + mutex_enter(&spa->spa_props_lock); + + zfs_multihost_fail_intervals = 0; + + if (!spa_multihost(spa)) { + spa->spa_multihost = B_TRUE; + mmp_thread_start(spa); + } + + mutex_exit(&spa->spa_props_lock); + spa_config_exit(spa, SCL_CONFIG, FTAG); + + txg_wait_synced(spa_get_dsl(spa), 0); + mmp_signal_all_threads(); + txg_wait_synced(spa_get_dsl(spa), 0); + + spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); + mutex_enter(&spa->spa_props_lock); + + if (spa_multihost(spa)) { + mmp_thread_stop(spa); + spa->spa_multihost = B_FALSE; + } + + mutex_exit(&spa->spa_props_lock); + spa_config_exit(spa, SCL_CONFIG, FTAG); +} + /* ARGSUSED */ void ztest_spa_upgrade(ztest_ds_t *zd, uint64_t id) @@ -2519,6 +2581,9 @@ ztest_spa_upgrade(ztest_ds_t *zd, uint64_t id) nvlist_t *nvroot, *props; char *name; + if (ztest_opts.zo_mmp_test) + return; + mutex_enter(&ztest_vdev_lock); name = kmem_asprintf("%s_upgrade", ztest_opts.zo_pool); @@ -2687,6 +2752,9 @@ ztest_vdev_add_remove(ztest_ds_t *zd, uint64_t id) nvlist_t *nvroot; int error; + if (ztest_opts.zo_mmp_test) + return; + mutex_enter(&ztest_vdev_lock); leaves = MAX(zs->zs_mirrors + zs->zs_splits, 1) * ztest_opts.zo_raidz; @@ -2769,6 +2837,9 @@ ztest_vdev_aux_add_remove(ztest_ds_t *zd, uint64_t id) uint64_t guid = 0; int error; + if (ztest_opts.zo_mmp_test) + return; + if (ztest_random(2) == 0) { sav = &spa->spa_spares; aux = ZPOOL_CONFIG_SPARES; @@ -2864,6 +2935,9 @@ ztest_split_pool(ztest_ds_t *zd, uint64_t id) uint_t c, children, schildren = 0, lastlogid = 0; int error = 0; + if (ztest_opts.zo_mmp_test) + return; + mutex_enter(&ztest_vdev_lock); /* ensure we have a useable config; mirrors of raidz aren't supported */ @@ -2970,6 +3044,9 @@ ztest_vdev_attach_detach(ztest_ds_t *zd, uint64_t id) int oldvd_is_log; int error, expected_error; + if (ztest_opts.zo_mmp_test) + return; + mutex_enter(&ztest_vdev_lock); leaves = MAX(zs->zs_mirrors, 1) * ztest_opts.zo_raidz; @@ -5566,6 +5643,9 @@ ztest_reguid(ztest_ds_t *zd, uint64_t id) uint64_t orig, load; int error; + if (ztest_opts.zo_mmp_test) + return; + orig = spa_guid(spa); load = spa_load_guid(spa); @@ -6250,7 +6330,7 @@ ztest_run(ztest_shared_t *zs) * Verify that we can export the pool and reimport it under a * different name. */ - if (ztest_random(2) == 0) { + if ((ztest_random(2) == 0) && !ztest_opts.zo_mmp_test) { char name[ZFS_MAX_DATASET_NAME_LEN]; (void) snprintf(name, sizeof (name), "%s_import", ztest_opts.zo_pool); @@ -6399,6 +6479,56 @@ make_random_props() } /* + * Import a storage pool with the given name. + */ +static void +ztest_import(ztest_shared_t *zs) +{ + libzfs_handle_t *hdl; + importargs_t args = { 0 }; + spa_t *spa; + nvlist_t *cfg = NULL; + int nsearch = 1; + char *searchdirs[nsearch]; + char *name = ztest_opts.zo_pool; + int flags = ZFS_IMPORT_MISSING_LOG; + int error; + + mutex_init(&ztest_vdev_lock, NULL, MUTEX_DEFAULT, NULL); + rw_init(&ztest_name_lock, NULL, USYNC_THREAD, NULL); + + kernel_init(FREAD | FWRITE); + hdl = libzfs_init(); + + searchdirs[0] = ztest_opts.zo_dir; + args.paths = nsearch; + args.path = searchdirs; + args.can_be_active = B_FALSE; + + error = zpool_tryimport(hdl, name, &cfg, &args); + if (error) + (void) fatal(0, "No pools found\n"); + + VERIFY0(spa_import(name, cfg, NULL, flags)); + VERIFY0(spa_open(name, &spa, FTAG)); + zs->zs_metaslab_sz = + 1ULL << spa->spa_root_vdev->vdev_child[0]->vdev_ms_shift; + spa_close(spa, FTAG); + + libzfs_fini(hdl); + kernel_fini(); + + if (!ztest_opts.zo_mmp_test) { + ztest_run_zdb(ztest_opts.zo_pool); + ztest_freeze(); + ztest_run_zdb(ztest_opts.zo_pool); + } + + rw_destroy(&ztest_name_lock); + mutex_destroy(&ztest_vdev_lock); +} + +/* * Create a storage pool with the given name and initial vdev size. * Then test spa_freeze() functionality. */ @@ -6442,11 +6572,11 @@ ztest_init(ztest_shared_t *zs) kernel_fini(); - ztest_run_zdb(ztest_opts.zo_pool); - - ztest_freeze(); - - ztest_run_zdb(ztest_opts.zo_pool); + if (!ztest_opts.zo_mmp_test) { + ztest_run_zdb(ztest_opts.zo_pool); + ztest_freeze(); + ztest_run_zdb(ztest_opts.zo_pool); + } rw_destroy(&ztest_name_lock); mutex_destroy(&ztest_vdev_lock); @@ -6607,13 +6737,19 @@ ztest_run_init(void) { ztest_shared_t *zs = ztest_shared; - ASSERT(ztest_opts.zo_init != 0); - /* * Blow away any existing copy of zpool.cache */ (void) remove(spa_config_path); + if (ztest_opts.zo_init == 0) { + if (ztest_opts.zo_verbose >= 1) + (void) printf("Importing pool %s\n", + ztest_opts.zo_pool); + ztest_import(zs); + return; + } + /* * Create and initialize our storage pool. */ @@ -6820,7 +6956,8 @@ main(int argc, char **argv) (void) printf("\n"); } - ztest_run_zdb(ztest_opts.zo_pool); + if (!ztest_opts.zo_mmp_test) + ztest_run_zdb(ztest_opts.zo_pool); } if (ztest_opts.zo_verbose >= 1) { diff --git a/usr/src/common/zfs/zfs_comutil.h b/usr/src/common/zfs/zfs_comutil.h index 8cc098ada2..1c828e41e2 100644 --- a/usr/src/common/zfs/zfs_comutil.h +++ b/usr/src/common/zfs/zfs_comutil.h @@ -21,6 +21,7 @@ /* * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2012, 2017 by Delphix. All rights reserved. + * Copyright 2019 Joyent, Inc. */ #ifndef _ZFS_COMUTIL_H @@ -33,6 +34,9 @@ extern "C" { #endif +/* Needed for ZoL errno usage in MMP kernel and user code */ +#define EREMOTEIO EREMOTE + extern boolean_t zfs_allocatable_devs(nvlist_t *); extern void zpool_get_load_policy(nvlist_t *, zpool_load_policy_t *); diff --git a/usr/src/common/zfs/zpool_prop.c b/usr/src/common/zfs/zpool_prop.c index cbbd299185..d17c7fd980 100644 --- a/usr/src/common/zfs/zpool_prop.c +++ b/usr/src/common/zfs/zpool_prop.c @@ -125,6 +125,9 @@ zpool_prop_init(void) PROP_DEFAULT, ZFS_TYPE_POOL, "on | off", "EXPAND", boolean_table); zprop_register_index(ZPOOL_PROP_READONLY, "readonly", 0, PROP_DEFAULT, ZFS_TYPE_POOL, "on | off", "RDONLY", boolean_table); + zprop_register_index(ZPOOL_PROP_MULTIHOST, "multihost", 0, + PROP_DEFAULT, ZFS_TYPE_POOL, "on | off", "MULTIHOST", + boolean_table); /* default index properties */ zprop_register_index(ZPOOL_PROP_FAILUREMODE, "failmode", diff --git a/usr/src/data/zoneinfo/README.illumos b/usr/src/data/zoneinfo/README.illumos index b4bbd7ee38..8af2e7fc48 100644 --- a/usr/src/data/zoneinfo/README.illumos +++ b/usr/src/data/zoneinfo/README.illumos @@ -35,7 +35,7 @@ different names in our source tree. $ cp iso3166.tab $CODEMGR_WS/usr/src/data/zoneinfo/country.tab $ cp zone.tab $CODEMGR_WS/usr/src/data/zoneinfo/zone.tab.txt -Now, you need to manaully fix up the zone_sun.tab. zone_sun.tab has +Now, you need to manually fix up the zone_sun.tab. zone_sun.tab has additional different zone names that the original does not. As things are changed and removed, or coordinates updated, the same should be done in zone_sun.tab. The simplest way known to deal with it today is diff --git a/usr/src/data/zoneinfo/africa b/usr/src/data/zoneinfo/africa index a26b91bc11..a58c11c181 100644 --- a/usr/src/data/zoneinfo/africa +++ b/usr/src/data/zoneinfo/africa @@ -364,6 +364,11 @@ Zone Africa/Cairo 2:05:09 - LMT 1900 Oct # See Africa/Lagos. # Eritrea +# See Africa/Nairobi. + +# Eswatini (formerly Swaziland) +# See Africa/Johannesburg. + # Ethiopia # See Africa/Nairobi. # @@ -847,8 +852,41 @@ Zone Indian/Mauritius 3:50:00 - LMT 1907 # Port Louis # From Mohamed Essedik Najd (2018-10-26): # Today, a Moroccan government council approved the perpetual addition # of 60 minutes to the regular Moroccan timezone. -# From Brian Inglis (2018-10-26): -# http://www.maroc.ma/fr/actualites/le-conseil-de-gouvernement-adopte-un-projet-de-decret-relatif-lheure-legale-stipulant-le +# From Matt Johnson (2018-10-28): +# http://www.sgg.gov.ma/Portals/1/BO/2018/BO_6720-bis_Ar.pdf +# +# From Maamar Abdelkader (2018-11-01): +# We usually move clocks back the previous week end and come back to the +1 +# the week end after.... The government does not announce yet the decision +# about this temporary change. But it s 99% sure that it will be the case, +# as in previous years. An unofficial survey was done these days, showing +# that 64% of asked peopke are ok for moving from +1 to +0 during Ramadan. +# https://leconomiste.com/article/1035870-enquete-l-economiste-sunergia-64-des-marocains-plebiscitent-le-gmt-pendant-ramadan +# +# From Paul Eggert (2018-11-01): +# For now, guess that Morocco will fall back at 03:00 the last Sunday +# before Ramadan, and spring forward at 02:00 the first Sunday after +# Ramadan, as this has been the practice since 2012. To implement this, +# transition dates for 2019 through 2037 were determined by running the +# following program under GNU Emacs 26.1. +# (let ((islamic-year 1440)) +# (require 'cal-islam) +# (while (< islamic-year 1460) +# (let ((a (calendar-islamic-to-absolute (list 9 1 islamic-year))) +# (b (calendar-islamic-to-absolute (list 10 1 islamic-year))) +# (sunday 0)) +# (while (/= sunday (mod (setq a (1- a)) 7))) +# (while (/= sunday (mod b 7)) +# (setq b (1+ b))) +# (setq a (calendar-gregorian-from-absolute a)) +# (setq b (calendar-gregorian-from-absolute b)) +# (insert +# (format +# (concat "Rule\tMorocco\t%d\tonly\t-\t%s\t%2d\t 3:00\t-1:00\t-\n" +# "Rule\tMorocco\t%d\tonly\t-\t%s\t%2d\t 2:00\t0\t-\n") +# (car (cdr (cdr a))) (calendar-month-name (car a) t) (car (cdr a)) +# (car (cdr (cdr b))) (calendar-month-name (car b) t) (car (cdr b))))) +# (setq islamic-year (+ 1 islamic-year)))) # RULE NAME FROM TO TYPE IN ON AT SAVE LETTER/S Rule Morocco 1939 only - Sep 12 0:00 1:00 - @@ -892,13 +930,53 @@ Rule Morocco 2017 only - May 21 3:00 0 - Rule Morocco 2017 only - Jul 2 2:00 1:00 - Rule Morocco 2018 only - May 13 3:00 0 - Rule Morocco 2018 only - Jun 17 2:00 1:00 - +Rule Morocco 2019 only - May 5 3:00 -1:00 - +Rule Morocco 2019 only - Jun 9 2:00 0 - +Rule Morocco 2020 only - Apr 19 3:00 -1:00 - +Rule Morocco 2020 only - May 24 2:00 0 - +Rule Morocco 2021 only - Apr 11 3:00 -1:00 - +Rule Morocco 2021 only - May 16 2:00 0 - +Rule Morocco 2022 only - Mar 27 3:00 -1:00 - +Rule Morocco 2022 only - May 8 2:00 0 - +Rule Morocco 2023 only - Mar 19 3:00 -1:00 - +Rule Morocco 2023 only - Apr 23 2:00 0 - +Rule Morocco 2024 only - Mar 10 3:00 -1:00 - +Rule Morocco 2024 only - Apr 14 2:00 0 - +Rule Morocco 2025 only - Feb 23 3:00 -1:00 - +Rule Morocco 2025 only - Apr 6 2:00 0 - +Rule Morocco 2026 only - Feb 15 3:00 -1:00 - +Rule Morocco 2026 only - Mar 22 2:00 0 - +Rule Morocco 2027 only - Feb 7 3:00 -1:00 - +Rule Morocco 2027 only - Mar 14 2:00 0 - +Rule Morocco 2028 only - Jan 23 3:00 -1:00 - +Rule Morocco 2028 only - Feb 27 2:00 0 - +Rule Morocco 2029 only - Jan 14 3:00 -1:00 - +Rule Morocco 2029 only - Feb 18 2:00 0 - +Rule Morocco 2029 only - Dec 30 3:00 -1:00 - +Rule Morocco 2030 only - Feb 10 2:00 0 - +Rule Morocco 2030 only - Dec 22 3:00 -1:00 - +Rule Morocco 2031 only - Jan 26 2:00 0 - +Rule Morocco 2031 only - Dec 14 3:00 -1:00 - +Rule Morocco 2032 only - Jan 18 2:00 0 - +Rule Morocco 2032 only - Nov 28 3:00 -1:00 - +Rule Morocco 2033 only - Jan 9 2:00 0 - +Rule Morocco 2033 only - Nov 20 3:00 -1:00 - +Rule Morocco 2033 only - Dec 25 2:00 0 - +Rule Morocco 2034 only - Nov 5 3:00 -1:00 - +Rule Morocco 2034 only - Dec 17 2:00 0 - +Rule Morocco 2035 only - Oct 28 3:00 -1:00 - +Rule Morocco 2035 only - Dec 2 2:00 0 - +Rule Morocco 2036 only - Oct 19 3:00 -1:00 - +Rule Morocco 2036 only - Nov 23 2:00 0 - +Rule Morocco 2037 only - Oct 4 3:00 -1:00 - +Rule Morocco 2037 only - Nov 15 2:00 0 - # Zone NAME GMTOFF RULES FORMAT [UNTIL] Zone Africa/Casablanca -0:30:20 - LMT 1913 Oct 26 0:00 Morocco +00/+01 1984 Mar 16 1:00 - +01 1986 - 0:00 Morocco +00/+01 2018 Oct 27 - 1:00 - +01 + 0:00 Morocco +00/+01 2018 Oct 28 3:00 + 1:00 Morocco +01/+00 # Western Sahara # @@ -913,8 +991,8 @@ Zone Africa/Casablanca -0:30:20 - LMT 1913 Oct 26 Zone Africa/El_Aaiun -0:52:48 - LMT 1934 Jan # El Aaiún -1:00 - -01 1976 Apr 14 - 0:00 Morocco +00/+01 2018 Oct 27 - 1:00 - +01 + 0:00 Morocco +00/+01 2018 Oct 28 3:00 + 1:00 Morocco +01/+00 # Mozambique # @@ -1071,10 +1149,20 @@ Zone Indian/Reunion 3:41:52 - LMT 1911 Jun # Saint-Denis # the switch is from 01:00 to 02:00 ... [Decree No. 25/2017] # http://www.mnec.gov.st/index.php/publicacoes/documentos/file/90-decreto-lei-n-25-2017 +# From Vadim Nasardinov (2018-12-29): +# São Tomé and Príncipe is about to do the following on Jan 1, 2019: +# https://www.stp-press.st/2018/12/05/governo-jesus-ja-decidiu-repor-hora-legal-sao-tomense/ +# +# From Michael Deckers (2018-12-30): +# https://www.legis-palop.org/download.jsp?idFile=102818 +# ... [The legal time of the country, which coincides with universal +# coordinated time, will be restituted at 2 o'clock on day 1 of January, 2019.] + Zone Africa/Sao_Tome 0:26:56 - LMT 1884 -0:36:45 - LMT 1912 Jan 1 00:00u # Lisbon MT 0:00 - GMT 2018 Jan 1 01:00 - 1:00 - WAT + 1:00 - WAT 2019 Jan 1 02:00 + 0:00 - GMT # Senegal # See Africa/Abidjan. @@ -1105,7 +1193,7 @@ Zone Africa/Johannesburg 1:52:00 - LMT 1892 Feb 8 1:30 - SAST 1903 Mar 2:00 SA SAST Link Africa/Johannesburg Africa/Maseru # Lesotho -Link Africa/Johannesburg Africa/Mbabane # Swaziland +Link Africa/Johannesburg Africa/Mbabane # Eswatini # # Marion and Prince Edward Is # scientific station since 1947 @@ -1147,9 +1235,6 @@ Zone Africa/Juba 2:06:28 - LMT 1931 2:00 Sudan CA%sT 2000 Jan 15 12:00 3:00 - EAT -# Swaziland -# See Africa/Johannesburg. - # Tanzania # See Africa/Nairobi. diff --git a/usr/src/data/zoneinfo/asia b/usr/src/data/zoneinfo/asia index 48b4c65299..d790da5bf6 100644 --- a/usr/src/data/zoneinfo/asia +++ b/usr/src/data/zoneinfo/asia @@ -586,12 +586,82 @@ Zone Asia/Urumqi 5:50:20 - LMT 1928 # obtained from # http://www.hko.gov.hk/gts/time/Summertime.htm -# From Arthur David Olson (2009-10-28): +# From Phake Nick (2018-10-27): +# According to Singaporean newspaper +# http://eresources.nlb.gov.sg/newspapers/Digitised/Article/singfreepresswk19041102-1.2.37 +# the day that Hong Kong start using GMT+8 should be Oct 30, 1904. +# +# From Paul Eggert (2018-11-17): +# Hong Kong had a time ball near the Marine Police Station, Tsim Sha Tsui. +# "The ball was raised manually each day and dropped at exactly 1pm +# (except on Sundays and Government holidays)." +# Dyson AD. From Time Ball to Atomic Clock. Hong Kong Government. 1983. +# <https://www.hko.gov.hk/publica/gen_pub/timeball_atomic_clock.pdf> +# "From 1904 October 30 the time-ball at Hong Kong has been dropped by order +# of the Governor of the Colony at 17h 0m 0s G.M.T., which is 23m 18s.14 in +# advance of 1h 0m 0s of Hong Kong mean time." +# Hollis HP. Universal Time, Longitudes, and Geodesy. Mon Not R Astron Soc. +# 1905-02-10;65(4):405-6. https://doi.org/10.1093/mnras/65.4.382 +# +# From Joseph Myers (2018-11-18): +# An astronomer before 1925 referring to GMT would have been using the old +# astronomical convention where the day started at noon, not midnight. +# +# From Steve Allen (2018-11-17): +# Meteorological Observations made at the Hongkong Observatory in the year 1904 +# page 4 <https://books.google.com/books?id=kgw5AQAAMAAJ&pg=RA4-PA4> +# ... the log of drop times in Table II shows that on Sunday 1904-10-30 the +# ball was dropped. So that looks like a special case drop for the sake +# of broadcasting the new local time. +# +# From Phake Nick (2018-11-18): +# According to The Hong Kong Weekly Press, 1904-10-29, p.324, the +# governor of Hong Kong at the time stated that "We are further desired to +# make it known that the change will be effected by firing the gun and by the +# dropping of the Ball at 23min. 18sec. before one." +# From Paul Eggert (2018-11-18): +# See <https://mmis.hkpl.gov.hk> for this; unfortunately Flash is required. + +# From Phake Nick (2018-10-26): +# I went to check microfilm records stored at Hong Kong Public Library.... +# on September 30 1941, according to Ta Kung Pao (Hong Kong edition), it was +# stated that fallback would occur on the next day (the 1st)'s "03:00 am (Hong +# Kong Time 04:00 am)" and the clock will fall back for a half hour. (03:00 +# probably refer to the time commonly used in mainland China at the time given +# the paper's background) ... the sunrise/sunset time given by South China +# Morning Post for October 1st was indeed moved by half an hour compares to +# before. After that, in December, the battle to capture Hong Kong started and +# the library doesn't seems to have any record stored about press during that +# period of time. Some media resumed publication soon after that within the +# same month, but there were not much information about time there. Later they +# started including a radio program guide when they restored radio service, +# explicitly mentioning it use Tokyo standard time, and later added a note +# saying it's half an hour ahead of the old Hong Kong standard time, and it +# also seems to indicate that Hong Kong was not using GMT+8 when it was +# captured by Japan. +# +# Image of related sections on newspaper: +# * 1941-09-30, Ta Kung Pao (Hong Kong), "Winter Time start tomorrow". +# https://i.imgur.com/6waY51Z.jpg (Chinese) +# * 1941-09-29, South China Morning Post, Information on sunrise/sunset +# time and other things for September 30 and October 1. +# https://i.imgur.com/kCiUR78.jpg +# * 1942-02-05. The Hong Kong News, Radio Program Guide. +# https://i.imgur.com/eVvDMzS.jpg +# * 1941-06-14. Hong Kong Daily Press, Daylight Saving from 3am Tomorrow. +# https://i.imgur.com/05KkvtC.png +# * 1941-09-30, Hong Kong Daily Press, Winter Time Warning. +# https://i.imgur.com/dge4kFJ.png +# Also, the Liberation day of Hong Kong after WWII which British rule +# over the territory resumed was August 30, 1945, which I think should +# be the termination date for the use of JST in the territory.... + +# From Paul Eggert (2018-11-17): # Here are the dates given at -# http://www.hko.gov.hk/gts/time/Summertime.htm -# as of 2009-10-28: +# https://www.hko.gov.hk/gts/time/Summertime.htm +# as of 2014-06-19: # Year Period -# 1941 1 Apr to 30 Sep +# 1941 15 Jun to 30 Sep # 1942 Whole year # 1943 Whole year # 1944 Whole year @@ -602,7 +672,7 @@ Zone Asia/Urumqi 5:50:20 - LMT 1928 # 1949 3 Apr to 30 Oct # 1950 2 Apr to 29 Oct # 1951 1 Apr to 28 Oct -# 1952 6 Apr to 25 Oct +# 1952 6 Apr to 2 Nov # 1953 5 Apr to 1 Nov # 1954 21 Mar to 31 Oct # 1955 20 Mar to 6 Nov @@ -631,25 +701,25 @@ Zone Asia/Urumqi 5:50:20 - LMT 1928 # 1978 Nil # 1979 13 May to 21 Oct # 1980 to Now Nil -# The page does not give start or end times of day. -# The page does not give a start date for 1942. -# The page does not givw an end date for 1945. -# The Japanese occupation of Hong Kong began on 1941-12-25. -# The Japanese surrender of Hong Kong was signed 1945-09-15. -# For lack of anything better, use start of those days as the transition times. +# The page does not give times of day for transitions, +# or dates for the 1942 and 1945 transitions. +# The Japanese occupation of Hong Kong began 1941-12-25. +# The Japanese surrender of Hong Kong was signed 1945-09-16; see: +# Heaver S. The days after the Pacific war ended: unsettling times +# in Hong Kong. Post Magazine. 2016-06-13. +# https://www.scmp.com/magazines/post-magazine/article/1852990/days-after-pacific-war-ended-unsettling-times-hong-kong +# For lack of anything better, use start of those days as the +# transition times. # Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S -Rule HK 1941 only - Apr 1 3:30 1:00 S -Rule HK 1941 only - Sep 30 3:30 0 - Rule HK 1946 only - Apr 20 3:30 1:00 S Rule HK 1946 only - Dec 1 3:30 0 - Rule HK 1947 only - Apr 13 3:30 1:00 S Rule HK 1947 only - Dec 30 3:30 0 - Rule HK 1948 only - May 2 3:30 1:00 S Rule HK 1948 1951 - Oct lastSun 3:30 0 - -Rule HK 1952 only - Oct 25 3:30 0 - +Rule HK 1952 1953 - Nov Sun>=1 3:30 0 - Rule HK 1949 1953 - Apr Sun>=1 3:30 1:00 S -Rule HK 1953 only - Nov 1 3:30 0 - Rule HK 1954 1964 - Mar Sun>=18 3:30 1:00 S Rule HK 1954 only - Oct 31 3:30 0 - Rule HK 1955 1964 - Nov Sun>=1 3:30 0 - @@ -659,9 +729,11 @@ Rule HK 1973 only - Dec 30 3:30 1:00 S Rule HK 1979 only - May Sun>=8 3:30 1:00 S Rule HK 1979 only - Oct Sun>=16 3:30 0 - # Zone NAME GMTOFF RULES FORMAT [UNTIL] -Zone Asia/Hong_Kong 7:36:42 - LMT 1904 Oct 30 - 8:00 HK HK%sT 1941 Dec 25 - 9:00 - JST 1945 Sep 15 +Zone Asia/Hong_Kong 7:36:42 - LMT 1904 Oct 30 0:36:42 + 8:00 - HKT 1941 Jun 15 3:30 + 8:00 1:00 HKST 1941 Oct 1 4:00 + 8:30 - HKT 1941 Dec 25 + 9:00 - JST 1945 Sep 16 8:00 HK HK%sT ############################################################################### @@ -1057,6 +1129,16 @@ Zone Asia/Dili 8:22:20 - LMT 1912 Jan 1 # India +# British astronomer Henry Park Hollis disliked India Standard Time's offset: +# "A new time system has been proposed for India, Further India, and Burmah. +# The scheme suggested is that the times of the meridians 5½ and 6½ hours +# east of Greenwich should be adopted in these territories. No reason is +# given why hourly meridians five hours and six hours east should not be +# chosen; a plan which would bring the time of India into harmony with +# that of almost the whole of the civilised world." +# Hollis HP. Universal Time, Longitudes, and Geodesy. Mon Not R Astron Soc. +# 1905-02-10;65(4):405-6. https://doi.org/10.1093/mnras/65.4.382 + # From Ian P. Beacock, in "A brief history of (modern) time", The Atlantic # https://www.theatlantic.com/technology/archive/2015/12/the-creation-of-modern-time/421419/ # (2015-12-22): @@ -1227,12 +1309,65 @@ Zone Asia/Jayapura 9:22:48 - LMT 1932 Nov # leap year calculation involved. There has never been any serious # plan to change that law.... # -# From Paul Eggert (2006-03-22): +# From Paul Eggert (2018-11-30): # Go with Shanks & Pottenger before Sept. 1991, and with Pournader thereafter. -# I used Ed Reingold's cal-persia in GNU Emacs 21.2 to check Persian dates, -# stopping after 2037 when 32-bit time_t's overflow. -# That cal-persia used Birashk's approximation, which disagrees with the solar -# calendar predictions for the year 2025, so I corrected those dates by hand. +# I used the following code in GNU Emacs 26.1 to generate the "Rule Iran" +# lines from 2008 through 2087. Emacs 26.1 uses Ed Reingold's +# cal-persia implementation of Birashk's approximation, which in the +# 2008-2087 range disagrees with the the astronomical Persian calendar +# for Persian years 1404 (Gregorian 2025) and 1437 (Gregorian 2058), +# so the following code special-case those years. See Table 15.1, page 264, of: +# Edward M. Reingold and Nachum Dershowitz, Calendrical Calculations: +# The Ultimate Edition, Cambridge University Press (2018). +# https://www.cambridge.org/fr/academic/subjects/computer-science/computing-general-interest/calendrical-calculations-ultimate-edition-4th-edition +# Page 258, footnote 2, of this book says there is some dispute over what will +# happen in 2091 (and some other years after that), so this code +# stops in 2087, as 2088 and 2089 agree with the "max" rule below. +# (cl-loop +# initially (require 'cal-persia) +# with first-persian-year = 1387 +# with last-persian-year = 1466 +# ;; Exceptional years in the above range, +# ;; from Reingold & Dershowitz Table 15.1, page 264: +# with exceptional-persian-years = '(1404 1437) +# with range-start = nil +# for persian-year from first-persian-year to last-persian-year +# do +# (let* +# ((exceptional-year-offset +# (if (member persian-year exceptional-persian-years) 1 0)) +# (beg-dst-absolute +# (+ (calendar-persian-to-absolute (list 1 1 persian-year)) +# exceptional-year-offset)) +# (end-dst-absolute +# (+ (calendar-persian-to-absolute (list 6 30 persian-year)) +# exceptional-year-offset)) +# (next-year-beg-dst-absolute +# (+ (calendar-persian-to-absolute (list 1 1 (1+ persian-year))) +# (if (member (1+ persian-year) exceptional-persian-years) 1 0))) +# (beg-dst (calendar-gregorian-from-absolute beg-dst-absolute)) +# (end-dst (calendar-gregorian-from-absolute end-dst-absolute)) +# (next-year-beg-dst (calendar-gregorian-from-absolute +# next-year-beg-dst-absolute)) +# (year (calendar-extract-year beg-dst)) +# (range-end (if range-start year "only"))) +# (setq range-start (or range-start year)) +# (when (or (/= (calendar-extract-day beg-dst) +# (calendar-extract-day next-year-beg-dst)) +# (= persian-year last-persian-year)) +# (insert +# (format +# "Rule\tIran\t%d\t%s\t-\t%s\t%2d\t24:00\t1:00\t-\n" +# range-start range-end +# (calendar-month-name (calendar-extract-month beg-dst) t) +# (calendar-extract-day beg-dst))) +# (insert +# (format +# "Rule\tIran\t%d\t%s\t-\t%s\t%2d\t24:00\t0\t-\n" +# range-start range-end +# (calendar-month-name (calendar-extract-month end-dst) t) +# (calendar-extract-day end-dst))) +# (setq range-start nil)))) # # From Oscar van Vlijmen (2005-03-30), writing about future # discrepancies between cal-persia and the Iranian calendar: @@ -1267,61 +1402,113 @@ Zone Asia/Jayapura 9:22:48 - LMT 1932 Nov # thirtieth day of Shahrivar. # # Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S -Rule Iran 1978 1980 - Mar 21 0:00 1:00 - -Rule Iran 1978 only - Oct 21 0:00 0 - -Rule Iran 1979 only - Sep 19 0:00 0 - -Rule Iran 1980 only - Sep 23 0:00 0 - -Rule Iran 1991 only - May 3 0:00 1:00 - -Rule Iran 1992 1995 - Mar 22 0:00 1:00 - -Rule Iran 1991 1995 - Sep 22 0:00 0 - -Rule Iran 1996 only - Mar 21 0:00 1:00 - -Rule Iran 1996 only - Sep 21 0:00 0 - -Rule Iran 1997 1999 - Mar 22 0:00 1:00 - -Rule Iran 1997 1999 - Sep 22 0:00 0 - -Rule Iran 2000 only - Mar 21 0:00 1:00 - -Rule Iran 2000 only - Sep 21 0:00 0 - -Rule Iran 2001 2003 - Mar 22 0:00 1:00 - -Rule Iran 2001 2003 - Sep 22 0:00 0 - -Rule Iran 2004 only - Mar 21 0:00 1:00 - -Rule Iran 2004 only - Sep 21 0:00 0 - -Rule Iran 2005 only - Mar 22 0:00 1:00 - -Rule Iran 2005 only - Sep 22 0:00 0 - -Rule Iran 2008 only - Mar 21 0:00 1:00 - -Rule Iran 2008 only - Sep 21 0:00 0 - -Rule Iran 2009 2011 - Mar 22 0:00 1:00 - -Rule Iran 2009 2011 - Sep 22 0:00 0 - -Rule Iran 2012 only - Mar 21 0:00 1:00 - -Rule Iran 2012 only - Sep 21 0:00 0 - -Rule Iran 2013 2015 - Mar 22 0:00 1:00 - -Rule Iran 2013 2015 - Sep 22 0:00 0 - -Rule Iran 2016 only - Mar 21 0:00 1:00 - -Rule Iran 2016 only - Sep 21 0:00 0 - -Rule Iran 2017 2019 - Mar 22 0:00 1:00 - -Rule Iran 2017 2019 - Sep 22 0:00 0 - -Rule Iran 2020 only - Mar 21 0:00 1:00 - -Rule Iran 2020 only - Sep 21 0:00 0 - -Rule Iran 2021 2023 - Mar 22 0:00 1:00 - -Rule Iran 2021 2023 - Sep 22 0:00 0 - -Rule Iran 2024 only - Mar 21 0:00 1:00 - -Rule Iran 2024 only - Sep 21 0:00 0 - -Rule Iran 2025 2027 - Mar 22 0:00 1:00 - -Rule Iran 2025 2027 - Sep 22 0:00 0 - -Rule Iran 2028 2029 - Mar 21 0:00 1:00 - -Rule Iran 2028 2029 - Sep 21 0:00 0 - -Rule Iran 2030 2031 - Mar 22 0:00 1:00 - -Rule Iran 2030 2031 - Sep 22 0:00 0 - -Rule Iran 2032 2033 - Mar 21 0:00 1:00 - -Rule Iran 2032 2033 - Sep 21 0:00 0 - -Rule Iran 2034 2035 - Mar 22 0:00 1:00 - -Rule Iran 2034 2035 - Sep 22 0:00 0 - -# -# The following rules are approximations starting in the year 2038. -# These are the best post-2037 approximations available, given the -# restrictions of a single rule using a Gregorian-based data format. +Rule Iran 1978 1980 - Mar 20 24:00 1:00 - +Rule Iran 1978 only - Oct 20 24:00 0 - +Rule Iran 1979 only - Sep 18 24:00 0 - +Rule Iran 1980 only - Sep 22 24:00 0 - +Rule Iran 1991 only - May 2 24:00 1:00 - +Rule Iran 1992 1995 - Mar 21 24:00 1:00 - +Rule Iran 1991 1995 - Sep 21 24:00 0 - +Rule Iran 1996 only - Mar 20 24:00 1:00 - +Rule Iran 1996 only - Sep 20 24:00 0 - +Rule Iran 1997 1999 - Mar 21 24:00 1:00 - +Rule Iran 1997 1999 - Sep 21 24:00 0 - +Rule Iran 2000 only - Mar 20 24:00 1:00 - +Rule Iran 2000 only - Sep 20 24:00 0 - +Rule Iran 2001 2003 - Mar 21 24:00 1:00 - +Rule Iran 2001 2003 - Sep 21 24:00 0 - +Rule Iran 2004 only - Mar 20 24:00 1:00 - +Rule Iran 2004 only - Sep 20 24:00 0 - +Rule Iran 2005 only - Mar 21 24:00 1:00 - +Rule Iran 2005 only - Sep 21 24:00 0 - +Rule Iran 2008 only - Mar 20 24:00 1:00 - +Rule Iran 2008 only - Sep 20 24:00 0 - +Rule Iran 2009 2011 - Mar 21 24:00 1:00 - +Rule Iran 2009 2011 - Sep 21 24:00 0 - +Rule Iran 2012 only - Mar 20 24:00 1:00 - +Rule Iran 2012 only - Sep 20 24:00 0 - +Rule Iran 2013 2015 - Mar 21 24:00 1:00 - +Rule Iran 2013 2015 - Sep 21 24:00 0 - +Rule Iran 2016 only - Mar 20 24:00 1:00 - +Rule Iran 2016 only - Sep 20 24:00 0 - +Rule Iran 2017 2019 - Mar 21 24:00 1:00 - +Rule Iran 2017 2019 - Sep 21 24:00 0 - +Rule Iran 2020 only - Mar 20 24:00 1:00 - +Rule Iran 2020 only - Sep 20 24:00 0 - +Rule Iran 2021 2023 - Mar 21 24:00 1:00 - +Rule Iran 2021 2023 - Sep 21 24:00 0 - +Rule Iran 2024 only - Mar 20 24:00 1:00 - +Rule Iran 2024 only - Sep 20 24:00 0 - +Rule Iran 2025 2027 - Mar 21 24:00 1:00 - +Rule Iran 2025 2027 - Sep 21 24:00 0 - +Rule Iran 2028 2029 - Mar 20 24:00 1:00 - +Rule Iran 2028 2029 - Sep 20 24:00 0 - +Rule Iran 2030 2031 - Mar 21 24:00 1:00 - +Rule Iran 2030 2031 - Sep 21 24:00 0 - +Rule Iran 2032 2033 - Mar 20 24:00 1:00 - +Rule Iran 2032 2033 - Sep 20 24:00 0 - +Rule Iran 2034 2035 - Mar 21 24:00 1:00 - +Rule Iran 2034 2035 - Sep 21 24:00 0 - +Rule Iran 2036 2037 - Mar 20 24:00 1:00 - +Rule Iran 2036 2037 - Sep 20 24:00 0 - +Rule Iran 2038 2039 - Mar 21 24:00 1:00 - +Rule Iran 2038 2039 - Sep 21 24:00 0 - +Rule Iran 2040 2041 - Mar 20 24:00 1:00 - +Rule Iran 2040 2041 - Sep 20 24:00 0 - +Rule Iran 2042 2043 - Mar 21 24:00 1:00 - +Rule Iran 2042 2043 - Sep 21 24:00 0 - +Rule Iran 2044 2045 - Mar 20 24:00 1:00 - +Rule Iran 2044 2045 - Sep 20 24:00 0 - +Rule Iran 2046 2047 - Mar 21 24:00 1:00 - +Rule Iran 2046 2047 - Sep 21 24:00 0 - +Rule Iran 2048 2049 - Mar 20 24:00 1:00 - +Rule Iran 2048 2049 - Sep 20 24:00 0 - +Rule Iran 2050 2051 - Mar 21 24:00 1:00 - +Rule Iran 2050 2051 - Sep 21 24:00 0 - +Rule Iran 2052 2053 - Mar 20 24:00 1:00 - +Rule Iran 2052 2053 - Sep 20 24:00 0 - +Rule Iran 2054 2055 - Mar 21 24:00 1:00 - +Rule Iran 2054 2055 - Sep 21 24:00 0 - +Rule Iran 2056 2057 - Mar 20 24:00 1:00 - +Rule Iran 2056 2057 - Sep 20 24:00 0 - +Rule Iran 2058 2059 - Mar 21 24:00 1:00 - +Rule Iran 2058 2059 - Sep 21 24:00 0 - +Rule Iran 2060 2062 - Mar 20 24:00 1:00 - +Rule Iran 2060 2062 - Sep 20 24:00 0 - +Rule Iran 2063 only - Mar 21 24:00 1:00 - +Rule Iran 2063 only - Sep 21 24:00 0 - +Rule Iran 2064 2066 - Mar 20 24:00 1:00 - +Rule Iran 2064 2066 - Sep 20 24:00 0 - +Rule Iran 2067 only - Mar 21 24:00 1:00 - +Rule Iran 2067 only - Sep 21 24:00 0 - +Rule Iran 2068 2070 - Mar 20 24:00 1:00 - +Rule Iran 2068 2070 - Sep 20 24:00 0 - +Rule Iran 2071 only - Mar 21 24:00 1:00 - +Rule Iran 2071 only - Sep 21 24:00 0 - +Rule Iran 2072 2074 - Mar 20 24:00 1:00 - +Rule Iran 2072 2074 - Sep 20 24:00 0 - +Rule Iran 2075 only - Mar 21 24:00 1:00 - +Rule Iran 2075 only - Sep 21 24:00 0 - +Rule Iran 2076 2078 - Mar 20 24:00 1:00 - +Rule Iran 2076 2078 - Sep 20 24:00 0 - +Rule Iran 2079 only - Mar 21 24:00 1:00 - +Rule Iran 2079 only - Sep 21 24:00 0 - +Rule Iran 2080 2082 - Mar 20 24:00 1:00 - +Rule Iran 2080 2082 - Sep 20 24:00 0 - +Rule Iran 2083 only - Mar 21 24:00 1:00 - +Rule Iran 2083 only - Sep 21 24:00 0 - +Rule Iran 2084 2086 - Mar 20 24:00 1:00 - +Rule Iran 2084 2086 - Sep 20 24:00 0 - +Rule Iran 2087 only - Mar 21 24:00 1:00 - +Rule Iran 2087 only - Sep 21 24:00 0 - +# +# The following rules are approximations starting in the year 2088. +# These are the best post-2088 approximations available, given the +# restrictions of a single rule using ordinary Gregorian dates. # At some point this table will need to be extended, though quite # possibly Iran will change the rules first. -Rule Iran 2036 max - Mar 21 0:00 1:00 - -Rule Iran 2036 max - Sep 21 0:00 0 - +Rule Iran 2088 max - Mar 20 24:00 1:00 - +Rule Iran 2088 max - Sep 20 24:00 0 - # Zone NAME GMTOFF RULES FORMAT [UNTIL] Zone Asia/Tehran 3:25:44 - LMT 1916 @@ -1433,6 +1620,24 @@ Rule Zion 1974 only - Jul 7 0:00 1:00 D Rule Zion 1974 only - Oct 13 0:00 0 S Rule Zion 1975 only - Apr 20 0:00 1:00 D Rule Zion 1975 only - Aug 31 0:00 0 S + +# From Alois Treindl (2019-03-06): +# http://www.moin.gov.il/Documents/שעון קיץ/clock-50-years-7-2014.pdf +# From Isaac Starkman (2019-03-06): +# Summer time was in that period in 1980 and 1984, see +# https://www.ynet.co.il/articles/0,7340,L-3951073,00.html +# You can of course read it in translation. +# I checked the local newspapers for that years. +# It started on midnight and end at 01.00 am. +# From Paul Eggert (2019-03-06): +# Also see this thread about the moin.gov.il URL: +# https://mm.icann.org/pipermail/tz/2018-November/027194.html +Rule Zion 1980 only - Aug 2 0:00 1:00 D +Rule Zion 1980 only - Sep 13 1:00 0 S +Rule Zion 1984 only - May 5 0:00 1:00 D +Rule Zion 1984 only - Aug 25 1:00 0 S + +# From Shanks & Pottenger: Rule Zion 1985 only - Apr 14 0:00 1:00 D Rule Zion 1985 only - Sep 15 0:00 0 S Rule Zion 1986 only - May 18 0:00 1:00 D @@ -1691,7 +1896,9 @@ Rule Japan 1950 1951 - May Sat>=1 24:00 1:00 D # Zone NAME GMTOFF RULES FORMAT [UNTIL] Zone Asia/Tokyo 9:18:59 - LMT 1887 Dec 31 15:00u 9:00 Japan J%sT -# Since 1938, all Japanese possessions have been like Asia/Tokyo. +# Since 1938, all Japanese possessions have been like Asia/Tokyo, +# except that Truk (Chuuk), Ponape (Pohnpei), and Jaluit (Kosrae) did not +# switch from +10 to +09 until 1941-04-01; see the 'australasia' file. # Jordan # @@ -1981,8 +2188,10 @@ Zone Asia/Amman 2:23:44 - LMT 1931 # and in Byalokoz) lists Ural river (plus 10 versts on its left bank) in # the third time belt (before 1930 this means +03). -# From Paul Eggert (2016-12-06): -# The tables below reflect Golosunov's remarks, with exceptions as noted. +# From Alexander Konzurovski (2018-12-20): +# Qyzyolrda Region (Asia/Qyzylorda) is changing its time zone from +# UTC+6 to UTC+5 effective December 21st, 2018. The legal document is +# located here: http://adilet.zan.kz/rus/docs/P1800000817 (russian language). # Zone NAME GMTOFF RULES FORMAT [UNTIL] # @@ -1996,8 +2205,6 @@ Zone Asia/Almaty 5:07:48 - LMT 1924 May 2 # or Alma-Ata 6:00 RussiaAsia +06/+07 2004 Oct 31 2:00s 6:00 - +06 # Qyzylorda (aka Kyzylorda, Kizilorda, Kzyl-Orda, etc.) (KZ-KZY) -# This currently includes Qostanay (aka Kostanay, Kustanay) (KZ-KUS); -# see comments below. Zone Asia/Qyzylorda 4:21:52 - LMT 1924 May 2 4:00 - +04 1930 Jun 21 5:00 - +05 1981 Apr 1 @@ -2008,21 +2215,22 @@ Zone Asia/Qyzylorda 4:21:52 - LMT 1924 May 2 5:00 RussiaAsia +05/+06 1992 Jan 19 2:00s 6:00 RussiaAsia +06/+07 1992 Mar 29 2:00s 5:00 RussiaAsia +05/+06 2004 Oct 31 2:00s - 6:00 - +06 -# The following zone is like Asia/Qyzylorda except for being one -# hour earlier from 1991-09-29 to 1992-03-29. The 1991/2 rules for -# Qostanay are unclear partly because of the 1997 Turgai -# reorganization, so this zone is commented out for now. -#Zone Asia/Qostanay 4:14:20 - LMT 1924 May 2 -# 4:00 - +04 1930 Jun 21 -# 5:00 - +05 1981 Apr 1 -# 5:00 1:00 +06 1981 Oct 1 -# 6:00 - +06 1982 Apr 1 -# 5:00 RussiaAsia +05/+06 1991 Mar 31 2:00s -# 4:00 RussiaAsia +04/+05 1992 Jan 19 2:00s -# 5:00 RussiaAsia +05/+06 2004 Oct 31 2:00s -# 6:00 - +06 + 6:00 - +06 2018 Dec 21 0:00 + 5:00 - +05 # +# Qostanay (aka Kostanay, Kustanay) (KZ-KUS) +# The 1991/2 rules are unclear partly because of the 1997 Turgai +# reorganization. +Zone Asia/Qostanay 4:14:28 - LMT 1924 May 2 + 4:00 - +04 1930 Jun 21 + 5:00 - +05 1981 Apr 1 + 5:00 1:00 +06 1981 Oct 1 + 6:00 - +06 1982 Apr 1 + 5:00 RussiaAsia +05/+06 1991 Mar 31 2:00s + 4:00 RussiaAsia +04/+05 1992 Jan 19 2:00s + 5:00 RussiaAsia +05/+06 2004 Oct 31 2:00s + 6:00 - +06 + # Aqtöbe (aka Aktobe, formerly Aktyubinsk) (KZ-AKT) Zone Asia/Aqtobe 3:48:40 - LMT 1924 May 2 4:00 - +04 1930 Jun 21 @@ -2116,21 +2324,43 @@ Zone Asia/Bishkek 4:58:24 - LMT 1924 May 2 # started at June 1 in that year. For another example, the article in # 1988 said that DST started at 2:00 AM in that year. +# From Phake Nick (2018-10-27): +# 1. According to official announcement from Korean government, the DST end +# date in South Korea should be +# 1955-09-08 without specifying time +# http://theme.archives.go.kr/next/common/viewEbook.do?singleData=N&archiveEventId=0027977557 +# 1956-09-29 without specifying time +# http://theme.archives.go.kr/next/common/viewEbook.do?singleData=N&archiveEventId=0027978341 +# 1957-09-21 24 o'clock +# http://theme.archives.go.kr/next/common/viewEbook.do?singleData=N&archiveEventId=0027979690#3 +# 1958-09-20 24 o'clock +# http://theme.archives.go.kr/next/common/viewEbook.do?singleData=N&archiveEventId=0027981189 +# 1959-09-19 24 o'clock +# http://theme.archives.go.kr/next/common/viewEbook.do?singleData=N&archiveEventId=0027982974#2 +# 1960-09-17 24 o'clock +# http://theme.archives.go.kr/next/common/viewEbook.do?singleData=N&archiveEventId=0028044104 +# ... +# 2.... https://namu.wiki/w/대한민국%20표준시 ... [says] +# when Korea was using GMT+8:30 as standard time, the international +# aviation/marine/meteorological industry in the country refused to +# follow and continued to use GMT+9:00 for interoperability. + + # Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S -Rule ROK 1948 only - Jun 1 0:00 1:00 D -Rule ROK 1948 only - Sep 13 0:00 0 S -Rule ROK 1949 only - Apr 3 0:00 1:00 D -Rule ROK 1949 1951 - Sep Sun>=8 0:00 0 S -Rule ROK 1950 only - Apr 1 0:00 1:00 D -Rule ROK 1951 only - May 6 0:00 1:00 D -Rule ROK 1955 only - May 5 0:00 1:00 D -Rule ROK 1955 only - Sep 9 0:00 0 S -Rule ROK 1956 only - May 20 0:00 1:00 D -Rule ROK 1956 only - Sep 30 0:00 0 S -Rule ROK 1957 1960 - May Sun>=1 0:00 1:00 D -Rule ROK 1957 1960 - Sep Sun>=18 0:00 0 S -Rule ROK 1987 1988 - May Sun>=8 2:00 1:00 D -Rule ROK 1987 1988 - Oct Sun>=8 3:00 0 S +Rule ROK 1948 only - Jun 1 0:00 1:00 D +Rule ROK 1948 only - Sep 12 24:00 0 S +Rule ROK 1949 only - Apr 3 0:00 1:00 D +Rule ROK 1949 1951 - Sep Sat>=7 24:00 0 S +Rule ROK 1950 only - Apr 1 0:00 1:00 D +Rule ROK 1951 only - May 6 0:00 1:00 D +Rule ROK 1955 only - May 5 0:00 1:00 D +Rule ROK 1955 only - Sep 8 24:00 0 S +Rule ROK 1956 only - May 20 0:00 1:00 D +Rule ROK 1956 only - Sep 29 24:00 0 S +Rule ROK 1957 1960 - May Sun>=1 0:00 1:00 D +Rule ROK 1957 1960 - Sep Sat>=17 24:00 0 S +Rule ROK 1987 1988 - May Sun>=8 2:00 1:00 D +Rule ROK 1987 1988 - Oct Sun>=8 3:00 0 S # From Paul Eggert (2016-08-23): # The Korean Wikipedia entry gives the following sources for UT offsets: @@ -2859,9 +3089,15 @@ Zone Asia/Karachi 4:28:12 - LMT 1907 # the official website, though the decree did not specify the exact # time of the time shift. # http://www.palestinecabinet.gov.ps/Website/AR/NDecrees/ViewFile.ashx?ID=e7a42ab7-ee23-435a-b9c8-a4f7e81f3817 + +# From Even Scharning (2019-03-23): +# DST in Palestine will start on 30 March this year, not 23 March as the time +# zone database predicted. +# https://ramallah.news/post/123610 # -# From Paul Eggert (2018-03-16): -# For 2016 on, predict spring transitions on March's fourth Saturday at 01:00. +# From Tim Parenti (2019-03-23): +# Combining this with the rules observed since 2016, adjust our spring +# transition guess to Mar Sat>=24. # Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S Rule EgyptAsia 1957 only - May 10 0:00 1:00 S @@ -2892,7 +3128,7 @@ Rule Palestine 2012 only - Sep 21 1:00 0 - Rule Palestine 2013 only - Sep Fri>=21 0:00 0 - Rule Palestine 2014 2015 - Oct Fri>=21 0:00 0 - Rule Palestine 2015 only - Mar lastFri 24:00 1:00 S -Rule Palestine 2016 max - Mar Sat>=22 1:00 1:00 S +Rule Palestine 2016 max - Mar Sat>=24 1:00 1:00 S Rule Palestine 2016 max - Oct lastSat 1:00 0 - # Zone NAME GMTOFF RULES FORMAT [UNTIL] @@ -2920,6 +3156,11 @@ Zone Asia/Hebron 2:20:23 - LMT 1900 Oct # no information # Philippines + +# From Paul Eggert (2018-11-18): +# The Spanish initially used American (west-of-Greenwich) time. +# It is unknown what time Manila kept when the British occupied it from +# 1762-10-06 through 1764-04; for now assume it kept American time. # On 1844-08-16, Narciso Clavería, governor-general of the # Philippines, issued a proclamation announcing that 1844-12-30 was to # be immediately followed by 1845-01-01; see R.H. van Gent's @@ -3005,8 +3246,8 @@ Link Asia/Qatar Asia/Bahrain # going to run on Higgins Time.' And so, until last year, it did." See: # Antar E. Dinner at When? Saudi Aramco World, 1969 March/April. 2-3. # http://archive.aramcoworld.com/issue/196902/dinner.at.when.htm -# newspapers.com says a similar story about Higgins was published in the Port -# Angeles (WA) Evening News, 1965-03-10, page 5, but I lack access to the text. +# Also see: Antar EN. Arabian flying is confusing. +# Port Angeles (WA) Evening News. 1965-03-10. page 3. # # The TZ database cannot represent quasi-solar time; airline time is the best # we can do. The 1946 foreign air news digest of the U.S. Civil Aeronautics @@ -3379,5 +3620,17 @@ Zone Asia/Ho_Chi_Minh 7:06:40 - LMT 1906 Jul 1 8:00 - +08 1975 Jun 13 7:00 - +07 +# From Paul Eggert (2019-02-19): +# +# The Ho Chi Minh entry suffices for most purposes as it agrees with all of +# Vietnam since 1975-06-13. Presumably clocks often changed in south Vietnam +# in the early 1970s as locations changed hands during the war; however the +# details are unknown and would likely be too voluminous for this database. +# +# For timestamps in north Vietnam back to 1970 (the tzdb cutoff), +# use Asia/Bangkok; see the VN entries in the file zone1970.tab. +# For timestamps before 1970, see Asia/Hanoi in the file 'backzone'. + + # Yemen # See Asia/Riyadh. diff --git a/usr/src/data/zoneinfo/australasia b/usr/src/data/zoneinfo/australasia index 87ba620d9f..dfe73d3f9d 100644 --- a/usr/src/data/zoneinfo/australasia +++ b/usr/src/data/zoneinfo/australasia @@ -402,10 +402,44 @@ Zone Pacific/Tahiti -9:58:16 - LMT 1912 Oct # Papeete # it is uninhabited. # Guam + +# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# http://guamlegislature.com/Public_Laws_5th/PL05-025.pdf +# http://documents.guam.gov/wp-content/uploads/E.O.-59-7-Guam-Daylight-Savings-Time-May-6-1959.pdf +Rule Guam 1959 only - Jun 27 2:00 1:00 D +# http://documents.guam.gov/wp-content/uploads/E.O.-61-5-Revocation-of-Daylight-Saving-Time-and-Restoratio.pdf +Rule Guam 1961 only - Jan 29 2:00 0 S +# http://documents.guam.gov/wp-content/uploads/E.O.-67-13-Guam-Daylight-Savings-Time.pdf +Rule Guam 1967 only - Sep 1 2:00 1:00 D +# http://documents.guam.gov/wp-content/uploads/E.O.-69-2-Repeal-of-Guam-Daylight-Saving-Time.pdf +Rule Guam 1969 only - Jan 26 0:01 0 S +# http://documents.guam.gov/wp-content/uploads/E.O.-69-10-Guam-Daylight-Saving-Time.pdf +Rule Guam 1969 only - Jun 22 2:00 1:00 D +Rule Guam 1969 only - Aug 31 2:00 0 S +# http://documents.guam.gov/wp-content/uploads/E.O.-70-10-Guam-Daylight-Saving-Time.pdf +# http://documents.guam.gov/wp-content/uploads/E.O.-70-30-End-of-Guam-Daylight-Saving-Time.pdf +# http://documents.guam.gov/wp-content/uploads/E.O.-71-5-Guam-Daylight-Savings-Time.pdf +Rule Guam 1970 1971 - Apr lastSun 2:00 1:00 D +Rule Guam 1970 1971 - Sep Sun>=1 2:00 0 S +# http://documents.guam.gov/wp-content/uploads/E.O.-73-28.-Guam-Day-light-Saving-Time.pdf +Rule Guam 1973 only - Dec 16 2:00 1:00 D +# http://documents.guam.gov/wp-content/uploads/E.O.-74-7-Guam-Daylight-Savings-Time-Rescinded.pdf +Rule Guam 1974 only - Feb 24 2:00 0 S +# http://documents.guam.gov/wp-content/uploads/E.O.-76-13-Daylight-Savings-Time.pdf +Rule Guam 1976 only - May 26 2:00 1:00 D +# http://documents.guam.gov/wp-content/uploads/E.O.-76-25-Revocation-of-E.O.-76-13.pdf +Rule Guam 1976 only - Aug 22 2:01 0 S +# http://documents.guam.gov/wp-content/uploads/E.O.-77-4-Daylight-Savings-Time.pdf +Rule Guam 1977 only - Apr 24 2:00 1:00 D +# http://documents.guam.gov/wp-content/uploads/E.O.-77-18-Guam-Standard-Time.pdf +Rule Guam 1977 only - Aug 28 2:00 0 S + # Zone NAME GMTOFF RULES FORMAT [UNTIL] Zone Pacific/Guam -14:21:00 - LMT 1844 Dec 31 9:39:00 - LMT 1901 # Agana - 10:00 - GST 2000 Dec 23 # Guam + 10:00 - GST 1941 Dec 10 # Guam + 9:00 - +09 1944 Jul 31 + 10:00 Guam G%sT 2000 Dec 23 10:00 - ChST # Chamorro Standard Time Link Pacific/Guam Pacific/Saipan # N Mariana Is @@ -427,31 +461,56 @@ Zone Pacific/Kiritimati -10:29:20 - LMT 1901 # Marshall Is # Zone NAME GMTOFF RULES FORMAT [UNTIL] -Zone Pacific/Majuro 11:24:48 - LMT 1901 - 11:00 - +11 1969 Oct - 12:00 - +12 -Zone Pacific/Kwajalein 11:09:20 - LMT 1901 - 11:00 - +11 1969 Oct - -12:00 - -12 1993 Aug 20 - 12:00 - +12 +Zone Pacific/Majuro 11:24:48 - LMT 1901 + 11:00 - +11 1914 Oct + 9:00 - +09 1919 Feb 1 + 11:00 - +11 1937 + 10:00 - +10 1941 Apr 1 + 9:00 - +09 1944 Jan 30 + 11:00 - +11 1969 Oct + 12:00 - +12 +Zone Pacific/Kwajalein 11:09:20 - LMT 1901 + 11:00 - +11 1937 + 10:00 - +10 1941 Apr 1 + 9:00 - +09 1944 Feb 6 + 11:00 - +11 1969 Oct + -12:00 - -12 1993 Aug 20 24:00 + 12:00 - +12 # Micronesia # Zone NAME GMTOFF RULES FORMAT [UNTIL] -Zone Pacific/Chuuk 10:07:08 - LMT 1901 - 10:00 - +10 -Zone Pacific/Pohnpei 10:32:52 - LMT 1901 # Kolonia - 11:00 - +11 -Zone Pacific/Kosrae 10:51:56 - LMT 1901 - 11:00 - +11 1969 Oct - 12:00 - +12 1999 - 11:00 - +11 +Zone Pacific/Chuuk -13:52:52 - LMT 1844 Dec 31 + 10:07:08 - LMT 1901 + 10:00 - +10 1914 Oct + 9:00 - +09 1919 Feb 1 + 10:00 - +10 1941 Apr 1 + 9:00 - +09 1945 Aug + 10:00 - +10 +Zone Pacific/Pohnpei -13:27:08 - LMT 1844 Dec 31 # Kolonia + 10:32:52 - LMT 1901 + 11:00 - +11 1914 Oct + 9:00 - +09 1919 Feb 1 + 11:00 - +11 1937 + 10:00 - +10 1941 Apr 1 + 9:00 - +09 1945 Aug + 11:00 - +11 +Zone Pacific/Kosrae -13:08:04 - LMT 1844 Dec 31 + 10:51:56 - LMT 1901 + 11:00 - +11 1914 Oct + 9:00 - +09 1919 Feb 1 + 11:00 - +11 1937 + 10:00 - +10 1941 Apr 1 + 9:00 - +09 1945 Aug + 11:00 - +11 1969 Oct + 12:00 - +12 1999 + 11:00 - +11 # Nauru # Zone NAME GMTOFF RULES FORMAT [UNTIL] Zone Pacific/Nauru 11:07:40 - LMT 1921 Jan 15 # Uaobe - 11:30 - +1130 1942 Mar 15 - 9:00 - +09 1944 Aug 15 - 11:30 - +1130 1979 May + 11:30 - +1130 1942 Aug 29 + 9:00 - +09 1945 Sep 8 + 11:30 - +1130 1979 Feb 10 2:00 12:00 - +12 # New Caledonia @@ -552,8 +611,9 @@ Zone Pacific/Norfolk 11:11:52 - LMT 1901 # Kingston # Palau (Belau) # Zone NAME GMTOFF RULES FORMAT [UNTIL] -Zone Pacific/Palau 8:57:56 - LMT 1901 # Koror - 9:00 - +09 +Zone Pacific/Palau -15:02:04 - LMT 1844 Dec 31 # Koror + 8:57:56 - LMT 1901 + 9:00 - +09 # Papua New Guinea # Zone NAME GMTOFF RULES FORMAT [UNTIL] @@ -815,7 +875,7 @@ Zone Pacific/Wallis 12:15:20 - LMT 1901 # tz@iana.org for general use in the future). For more, please see # the file CONTRIBUTING in the tz distribution. -# From Paul Eggert (2017-02-10): +# From Paul Eggert (2018-11-18): # # Unless otherwise specified, the source for data through 1990 is: # Thomas G. Shanks and Rique Pottenger, The International Atlas (6th edition), @@ -840,6 +900,7 @@ Zone Pacific/Wallis 12:15:20 - LMT 1901 # A reliable and entertaining source about time zones is # Derek Howse, Greenwich time and longitude, Philip Wilson Publishers (1997). # +# I invented the abbreviation marked "*". # The following abbreviations are from other sources. # Corrections are welcome! # std dst @@ -847,7 +908,7 @@ Zone Pacific/Wallis 12:15:20 - LMT 1901 # 8:00 AWST AWDT Western Australia # 9:30 ACST ACDT Central Australia # 10:00 AEST AEDT Eastern Australia -# 10:00 GST Guam through 2000 +# 10:00 GST GDT* Guam through 2000 # 10:00 ChST Chamorro # 11:30 NZMT NZST New Zealand through 1945 # 12:00 NZST NZDT New Zealand 1946-present @@ -1546,28 +1607,70 @@ Zone Pacific/Wallis 12:15:20 - LMT 1901 # Kwajalein -# In comp.risks 14.87 (26 August 1993), Peter Neumann writes: -# I wonder what happened in Kwajalein, where there was NO Friday, -# 1993-08-20. Thursday night at midnight Kwajalein switched sides with -# respect to the International Date Line, to rejoin its fellow islands, -# going from 11:59 p.m. Thursday to 12:00 m. Saturday in a blink. +# From an AP article (1993-08-22): +# "The nearly 3,000 Americans living on this remote Pacific atoll have a good +# excuse for not remembering Saturday night: there wasn't one. Residents were +# going to bed Friday night and waking up Sunday morning because at midnight +# -- 8 A.M. Eastern daylight time on Saturday -- Kwajalein was jumping from +# one side of the international date line to the other." +# "In Marshall Islands, Friday is followed by Sunday", NY Times. 1993-08-22. +# https://www.nytimes.com/1993/08/22/world/in-marshall-islands-friday-is-followed-by-sunday.html + +# From Phake Nick (2018-10-27): +# <https://wiki.suikawiki.org/n/南洋群島の標準時> ... pointed out that +# currently tzdata say Pacific/Kwajalein switched from GMT+11 to GMT-12 in +# 1969 October without explanation, however an 1993 article from NYT say it +# synchorized its day with US mainland about 40 years ago and thus the switch +# should occur at around 1950s instead. +# +# From Paul Eggert (2018-11-18): +# The NYT (actually, AP) article is vague and possibly wrong about this. +# The article says the earlier switch was "40 years ago when the United States +# Army established a missile test range here". However, the Kwajalein Test +# Center was established on 1960-10-01 and was run by the US Navy. It was +# transferred to the US Army on 1964-07-01. See "Seize the High Ground" +# <https://history.army.mil/html/books/070/70-88-1/cmhPub_70-88-1.pdf>. +# Given that Shanks was right on the money about the 1993 change, I'm inclined +# to take Shanks's word for the 1969 change unless we find better evidence. # N Mariana Is, Guam +# From Phake Nick (2018-10-27): +# Guam Island was briefly annexed by Japan during ... year 1941-1944 ... +# however there are no detailed information about what time it use during that +# period. It would probably be reasonable to assume Guam use GMT+9 during +# that period of time like the surrounding area. + +# From Paul Eggert (2018-11-18): # Howse writes (p 153) "The Spaniards, on the other hand, reached the # Philippines and the Ladrones from America," and implies that the Ladrones # (now called the Marianas) kept American date for quite some time. # For now, we assume the Ladrones switched at the same time as the Philippines; # see Asia/Manila. - +# +# Use 1941-12-10 and 1944-07-31 for Guam WWII transitions, as the rough start +# and end of Japanese control of Agana. We don't know whether the Northern +# Marianas followed Guam's DST rules from 1959 through 1977; for now, assume +# they did as that avoids the need for a separate zone due to our 1970 cutoff. +# # US Public Law 106-564 (2000-12-23) made UT +10 the official standard time, # under the name "Chamorro Standard Time". There is no official abbreviation, # but Congressman Robert A. Underwood, author of the bill that became law, # wrote in a press release (2000-12-27) that he will seek the use of "ChST". +# See also the commentary for Micronesia. -# Micronesia + +# Marshall Is +# See the commentary for Micronesia. + + +# Micronesia (and nearby) + +# From Paul Eggert (2018-11-18): +# Like the Ladrones (see Guam commentary), assume the Spanish East Indies +# kept American time until the Philippines switched at the end of 1844. # Alan Eugene Davis writes (1996-03-16), # "I am certain, having lived there for the past decade, that 'Truk' @@ -1583,6 +1686,95 @@ Zone Pacific/Wallis 12:15:20 - LMT 1901 # that Truk and Yap are UT +10, and Ponape and Kosrae are +11. # We don't know when Kosrae switched from +12; assume January 1 for now. +# From Phake Nick (2018-10-27): +# +# From a Japanese wiki site https://wiki.suikawiki.org/n/南洋群島の標準時 +# ... +# For "Southern Islands" (modern region of Mariana + Palau + Federation of +# Micronesia + Marshall Islands): +# +# A 1906 Japanese magazine shown the Caroline Islands and Mariana Islands +# who was occupied by Germany at the time as GMT+10, together with the like +# of German New Guinea. However there is a marking saying it have not been +# implemented (yet). No further information after that were found. +# +# Japan invaded those islands in 1914, and records shows that they were +# instructed to use JST at the time. +# +# 1915 January telecommunication record on the Jaluit Atoll shows they use +# the meridian of 170E as standard time (GMT+11:20), which is similar to the +# longitude of the atoll. +# 1915 February record say the 170E standard time is to be used until +# February 9 noon, and after February 9 noon they are to use JST. +# However these are time used within the Japanese Military at the time and +# probably does not reflect the time used by local resident at the time (that +# is if they keep their own time back then) +# +# In January 1919 the occupying force issued a command that split the area +# into three different timezone with meridian of 135E, 150E, 165E (JST+0, +1, +# +2), and the command was to become effective from February 1 of the same +# year. Despite the target of the command is still only for the occupying +# force itself, further publication have described the time as the standard +# time for the occupied area and thus it can probably be seen as such. +# * Area that use meridian of 135E: Palau and Yap civil administration area +# (Southern Islands Western Standard Time) +# * Area that use meridian of 150E: Truk (Chuuk) and Saipan civil +# administration area (Southern Islands Central Standard Time) +# * Area that use meridian of 165E: Ponape (Pohnpei) and Jaluit civil +# administration area (Southern Islands Eastern Standard Time). +# * In the next few years Japanese occupation of those islands have been +# formalized via League of Nation Mandate (South Pacific Mandate) and formal +# governance structure have been established, these district [become +# subprefectures] and timezone classification have been inherited as standard +# time of the area. +# * Saipan subprefecture include Mariana islands (exclude Guam which was +# occupied by America at the time), Palau and Yap subprefecture rule the +# Western Caroline Islands with 137E longitude as border, Truk and Ponape +# subprefecture rule the Eastern Caroline Islands with 154E as border, Ponape +# subprefecture also rule part of Marshall Islands to the west of 164E +# starting from (1918?) and Jaluit subprefecture rule the rest of the +# Marshall Islands. +# +# And then in year 1937, an announcement was made to change the time in the +# area into 2 timezones: +# * Area that use meridian of 135E: area administered by Palau, Yap and +# Saipan subprefecture (Southern Islands Western Standard Time) +# * Area that use meridian of 150E: area administered by Truk (Chuuk), +# Ponape (Pohnpei) and Jaluit subprefecture (Southern Islands Eastern +# Standard Time) +# +# Another announcement issued in 1941 say that on April 1 that year, +# standard time of the Southern Islands would be changed to use the meridian +# of 135E (GMT+9), and thus abolishing timezone different within the area. +# +# Then Pacific theater of WWII started and Japan slowly lose control on the +# island. The webpage I linked above contain no information during this +# period of time.... +# +# After the end of WWII, in 1946 February, a document written by the +# (former?) Japanese military personnel describe there are 3 hours time +# different between Caroline islands time/Wake island time and the Chungking +# time, which would mean the time being used there at the time was GMT+10. +# +# After that, the area become Trust Territories of the Pacific Islands +# under American administration from year 1947. The site listed some +# American/International books/maps/publications about time used in those +# area during this period of time but they doesn't seems to be reliable +# information so it would be the best if someone know where can more reliable +# information can be found. +# +# +# From Paul Eggert (2018-11-18): +# +# For the above, use vague dates like "1914" and "1945" for transitions that +# plausibly exist but for which the details are not known. The information +# for Wake is too sketchy to act on. +# +# The 1906 GMT+10 info about German-controlled islands might not have been +# done, so omit it from the data for now. +# +# The Jaluit info governs Kwajalein. + # Midway @@ -1600,6 +1792,29 @@ Zone Pacific/Wallis 12:15:20 - LMT 1901 # started DST on June 3. Possibly DST was observed other years # in Midway, but we have no record of it. +# Nauru + +# From Phake Nick (2018-10-31): +# Currently, the tz database say Nauru use LMT until 1921, and then +# switched to GMT+11:30 for the next two decades. +# However, a number of timezone map published in America/Japan back then +# showed its timezone as GMT+11 per https://wiki.suikawiki.org/n/ナウルの標準時 +# And it would also be nice if the 1921 transition date could be sourced. +# ... +# The "Nauru Standard Time Act 1978 Time Change" +# http://ronlaw.gov.nr/nauru_lpms/files/gazettes/4b23a17d2030150404db7a5fa5872f52.pdf#page=3 +# based on "Nauru Standard Time Act 1978 Time Change" +# http://www.paclii.org/nr/legis/num_act/nsta1978207/ defined that "Nauru +# Alternative Time" (GMT+12) should be in effect from 1979 Feb. +# +# From Paul Eggert (2018-11-19): +# The 1921-01-15 introduction of standard time is in Shanks; it is also in +# "Standard Time Throughout the World", US National Bureau of Standards (1935), +# page 3, which does not give the UT offset. In response to a comment by +# Phake Nick I set the Nauru time of occupation by Japan to +# 1942-08-29/1945-09-08 by using dates from: +# https://en.wikipedia.org/wiki/Japanese_occupation_of_Nauru + # Norfolk # From Alexander Krivenyshev (2015-09-23): @@ -1615,6 +1830,9 @@ Zone Pacific/Wallis 12:15:20 - LMT 1901 # other than in 1974/5. See: # https://www.timeanddate.com/time/australia/norfolk-island.html +# Palau +# See commentary for Micronesia. + # Pitcairn # From Rives McDow (1999-11-08): @@ -1779,6 +1997,9 @@ Zone Pacific/Wallis 12:15:20 - LMT 1901 # From Paul Eggert (2003-03-23): # We have no other report of DST in Wake Island, so omit this info for now. +# See also the commentary for Micronesia. + + ############################################################################### # The International Date Line diff --git a/usr/src/data/zoneinfo/backward b/usr/src/data/zoneinfo/backward index 51e10f4ce4..b4ae3cf9d3 100644 --- a/usr/src/data/zoneinfo/backward +++ b/usr/src/data/zoneinfo/backward @@ -77,6 +77,7 @@ Link Pacific/Easter Chile/EasterIsland Link America/Havana Cuba Link Africa/Cairo Egypt Link Europe/Dublin Eire +Link Etc/UTC Etc/UCT Link Europe/London Europe/Belfast Link Europe/Chisinau Europe/Tiraspol Link Europe/London GB @@ -111,7 +112,7 @@ Link Asia/Taipei ROC Link Asia/Seoul ROK Link Asia/Singapore Singapore Link Europe/Istanbul Turkey -Link Etc/UCT UCT +Link Etc/UTC UCT Link America/Anchorage US/Alaska Link America/Adak US/Aleutian Link America/Phoenix US/Arizona diff --git a/usr/src/data/zoneinfo/country.tab b/usr/src/data/zoneinfo/country.tab index c2e0f8eafc..a4ff61a4d3 100644 --- a/usr/src/data/zoneinfo/country.tab +++ b/usr/src/data/zoneinfo/country.tab @@ -9,8 +9,8 @@ # All text uses UTF-8 encoding. The columns of the table are as follows: # # 1. ISO 3166-1 alpha-2 country code, current as of -# ISO 3166-1 N905 (2016-11-15). See: Updates on ISO 3166-1 -# http://isotc.iso.org/livelink/livelink/Open/16944257 +# ISO 3166-1 N976 (2018-11-06). See: Updates on ISO 3166-1 +# https://isotc.iso.org/livelink/livelink/Open/16944257 # 2. The usual English name for the coded region, # chosen so that alphabetic sorting of subsets produces helpful lists. # This is not the same as the English name in the ISO 3166 tables. @@ -166,7 +166,7 @@ ME Montenegro MF St Martin (French) MG Madagascar MH Marshall Islands -MK Macedonia +MK North Macedonia ML Mali MM Myanmar (Burma) MN Mongolia @@ -235,7 +235,7 @@ ST Sao Tome & Principe SV El Salvador SX St Maarten (Dutch) SY Syria -SZ Swaziland +SZ Eswatini (Swaziland) TC Turks & Caicos Is TD Chad TF French Southern & Antarctic Lands diff --git a/usr/src/data/zoneinfo/etcetera b/usr/src/data/zoneinfo/etcetera index 91ded935c4..a1606bde84 100644 --- a/usr/src/data/zoneinfo/etcetera +++ b/usr/src/data/zoneinfo/etcetera @@ -19,7 +19,6 @@ Zone Etc/GMT 0 - GMT Zone Etc/UTC 0 - UTC -Zone Etc/UCT 0 - UCT # The following link uses older naming conventions, # but it belongs here, not in the file 'backward', diff --git a/usr/src/data/zoneinfo/europe b/usr/src/data/zoneinfo/europe index 587f7b03cc..b735a48d74 100644 --- a/usr/src/data/zoneinfo/europe +++ b/usr/src/data/zoneinfo/europe @@ -1855,7 +1855,7 @@ Zone Europe/Luxembourg 0:24:36 - LMT 1904 Jun 1:00 Belgium CE%sT 1977 1:00 EU CE%sT -# Macedonia +# North Macedonia # See Europe/Belgrade. # Malta @@ -3359,7 +3359,7 @@ Zone Europe/Belgrade 1:22:00 - LMT 1884 Link Europe/Belgrade Europe/Ljubljana # Slovenia Link Europe/Belgrade Europe/Podgorica # Montenegro Link Europe/Belgrade Europe/Sarajevo # Bosnia and Herzegovina -Link Europe/Belgrade Europe/Skopje # Macedonia +Link Europe/Belgrade Europe/Skopje # North Macedonia Link Europe/Belgrade Europe/Zagreb # Croatia # Slovakia diff --git a/usr/src/data/zoneinfo/northamerica b/usr/src/data/zoneinfo/northamerica index 9d5bad2f7b..eee8de0fbc 100644 --- a/usr/src/data/zoneinfo/northamerica +++ b/usr/src/data/zoneinfo/northamerica @@ -599,6 +599,26 @@ Zone America/Los_Angeles -7:52:58 - LMT 1883 Nov 18 12:07:02 # between AKST and AKDT from now on.... # https://www.krbd.org/2015/10/30/annette-island-times-they-are-a-changing/ +# From Ryan Stanley (2018-11-06): +# The Metlakatla community in Alaska has decided not to change its +# clock back an hour starting on November 4th, 2018 (day before yesterday). +# They will be gmtoff=-28800 year-round. +# https://www.facebook.com/141055983004923/photos/pb.141055983004923.-2207520000.1541465673./569081370202380/ + +# From Paul Eggert (2018-12-16): +# In a 2018-12-11 special election, Metlakatla voted to go back to +# Alaska time (including daylight saving time) starting next year. +# https://www.krbd.org/2018/12/12/metlakatla-to-follow-alaska-standard-time-allow-liquor-sales/ +# +# From Ryan Stanley (2019-01-11): +# The community will be changing back on the 20th of this month... +# From Tim Parenti (2019-01-11): +# Per an announcement on the Metlakatla community's official Facebook page, the +# "fall back" will be on Sunday 2019-01-20 at 02:00: +# https://www.facebook.com/141055983004923/photos/607150969728753/ +# So they won't be waiting for Alaska to join them on 2019-03-10, but will +# rather change their clocks twice in seven weeks. + # Zone NAME GMTOFF RULES FORMAT [UNTIL] Zone America/Juneau 15:02:19 - LMT 1867 Oct 19 15:33:32 -8:57:41 - LMT 1900 Aug 20 12:00 @@ -625,6 +645,8 @@ Zone America/Metlakatla 15:13:42 - LMT 1867 Oct 19 15:44:55 -8:00 - PST 1969 -8:00 US P%sT 1983 Oct 30 2:00 -8:00 - PST 2015 Nov 1 2:00 + -9:00 US AK%sT 2018 Nov 4 2:00 + -8:00 - PST 2019 Jan 20 2:00 -9:00 US AK%sT Zone America/Yakutat 14:41:05 - LMT 1867 Oct 19 15:12:18 -9:18:55 - LMT 1900 Aug 20 12:00 @@ -785,6 +807,22 @@ Zone America/Boise -7:44:49 - LMT 1883 Nov 18 12:15:11 # For a map of Indiana's time zone regions, see: # https://en.wikipedia.org/wiki/Time_in_Indiana # +# From Paul Eggert (2018-11-30): +# A brief but entertaining history of time in Indiana describes a 1949 debate +# in the Indiana House where city legislators (who favored "fast time") +# tussled with farm legislators (who didn't) over a bill to outlaw DST: +# "Lacking enough votes, the city faction tries to filibuster until time runs +# out on the session at midnight, but rural champion Rep. Herbert Copeland, +# R-Madison, leans over the gallery railing and forces the official clock +# back to 9 p.m., breaking it in the process. The clock sticks on 9 as the +# debate rages on into the night. The filibuster finally dies out and the +# bill passes, while outside the chamber, clocks read 3:30 a.m. In the end, +# it doesn't matter which side won. The law has no enforcement powers and +# is simply ignored by fast-time communities." +# How Indiana went from 'God's time' to split zones and daylight-saving. +# Indianapolis Star. 2018-11-27 14:58 -05. +# https://www.indystar.com/story/news/politics/2018/11/27/indianapolis-indiana-time-zone-history-central-eastern-daylight-savings-time/2126300002/ +# # From Paul Eggert (2007-08-17): # Since 1970, most of Indiana has been like America/Indiana/Indianapolis, # with the following exceptions: diff --git a/usr/src/data/zoneinfo/zone.tab.txt b/usr/src/data/zoneinfo/zone.tab.txt index dcb6e1da60..27e1dee61e 100644 --- a/usr/src/data/zoneinfo/zone.tab.txt +++ b/usr/src/data/zoneinfo/zone.tab.txt @@ -239,6 +239,7 @@ KW +2920+04759 Asia/Kuwait KY +1918-08123 America/Cayman KZ +4315+07657 Asia/Almaty Kazakhstan (most areas) KZ +4448+06528 Asia/Qyzylorda Qyzylorda/Kyzylorda/Kzyl-Orda +KZ +5312+06337 Asia/Qostanay Qostanay/Kostanay/Kustanay KZ +5017+05710 Asia/Aqtobe Aqtobe/Aktobe KZ +4431+05016 Asia/Aqtau Mangghystau/Mankistau KZ +4707+05156 Asia/Atyrau Atyrau/Atirau/Gur'yev @@ -332,9 +333,9 @@ RS +4450+02030 Europe/Belgrade RU +5443+02030 Europe/Kaliningrad MSK-01 - Kaliningrad RU +554521+0373704 Europe/Moscow MSK+00 - Moscow area RU +4457+03406 Europe/Simferopol MSK+00 - Crimea -RU +4844+04425 Europe/Volgograd MSK+00 - Volgograd RU +5836+04939 Europe/Kirov MSK+00 - Kirov RU +4621+04803 Europe/Astrakhan MSK+01 - Astrakhan +RU +4844+04425 Europe/Volgograd MSK+01 - Volgograd RU +5134+04602 Europe/Saratov MSK+01 - Saratov RU +5420+04824 Europe/Ulyanovsk MSK+01 - Ulyanovsk RU +5312+05009 Europe/Samara MSK+01 - Samara, Udmurtia diff --git a/usr/src/data/zoneinfo/zone_sun.tab b/usr/src/data/zoneinfo/zone_sun.tab index 4ef71391f7..85fbb6418b 100644 --- a/usr/src/data/zoneinfo/zone_sun.tab +++ b/usr/src/data/zoneinfo/zone_sun.tab @@ -262,6 +262,7 @@ KW +2920+04759 Asia/Kuwait - KY +1918-08123 America/Cayman - KZ +4315+07657 Asia/Almaty - Kazakhstan (most areas) KZ +4448+06528 Asia/Qyzylorda Qyzylorda/Kyzylorda/Kzyl-Orda +KZ +5312+06337 Asia/Qostanay - Qostanay/Kostanay/Kustanay KZ +5017+05710 Asia/Aqtobe Aqtobe/Aktobe KZ +4431+05016 Asia/Aqtau - Mangghystau/Mankistau KZ +4707+05156 Asia/Atyrau - Atyrau/Atirau/Gur'yev @@ -355,9 +356,9 @@ RS +4450+02030 Europe/Belgrade - RU +5443+02030 Europe/Kaliningrad - MSK-01 - Kaliningrad RU +554521+0373704 Europe/Moscow - MSK+00 - Moscow area RU +4457+03406 Europe/Simferopol - MSK+00 - Crimea -RU +4844+04425 Europe/Volgograd - MSK+00 - Volgograd RU +5836+04939 Europe/Kirov - MSK+00 - Kirov RU +4621+04803 Europe/Astrakhan - MSK+01 - Astrakhan +RU +4844+04425 Europe/Volgograd - MSK+01 - Volgograd RU +5134+04602 Europe/Saratov - MSK+01 - Saratov RU +5420+04824 Europe/Ulyanovsk - MSK+01 - Ulyanovsk RU +5312+05009 Europe/Samara - MSK+01 - Samara, Udmurtia diff --git a/usr/src/lib/libfakekernel/common/cond.c b/usr/src/lib/libfakekernel/common/cond.c index 08d0265dd2..859722cb27 100644 --- a/usr/src/lib/libfakekernel/common/cond.c +++ b/usr/src/lib/libfakekernel/common/cond.c @@ -12,6 +12,7 @@ /* * Copyright 2013 Nexenta Systems, Inc. All rights reserved. * Copyright 2017 RackTop Systems. + * Copyright 2019 Joyent, Inc. */ /* @@ -119,6 +120,15 @@ cv_timedwait_sig(kcondvar_t *cv, kmutex_t *mp, clock_t abstime) return (cv__twait(cv, mp, delta, 1, 0)); } +int +cv_timedwait_sig_hrtime(kcondvar_t *cv, kmutex_t *mp, hrtime_t tim) +{ + clock_t delta; + + delta = tim; + return (cv__twait(cv, mp, delta, 1, 1)); +} + /*ARGSUSED*/ clock_t cv_timedwait_hires(kcondvar_t *cv, kmutex_t *mp, hrtime_t tim, hrtime_t res, diff --git a/usr/src/lib/libfakekernel/common/mapfile-vers b/usr/src/lib/libfakekernel/common/mapfile-vers index c7aa5a8f7d..b33f6fc160 100644 --- a/usr/src/lib/libfakekernel/common/mapfile-vers +++ b/usr/src/lib/libfakekernel/common/mapfile-vers @@ -13,7 +13,7 @@ # Copyright 2015 Nexenta Systems, Inc. All rights reserved. # Copyright (c) 2017, Joyent, Inc. # Copyright 2017 RackTop Systems. -# Copyright 2019, Joyent, Inc. +# Copyright 2019 Joyent, Inc. # # @@ -70,6 +70,7 @@ SYMBOL_VERSION SUNWprivate_1.1 { cv_signal; cv_timedwait; cv_timedwait_sig; + cv_timedwait_sig_hrtime; cv_timedwait_hires; cv_wait; cv_wait_sig; diff --git a/usr/src/lib/libzfs/common/libzfs.h b/usr/src/lib/libzfs/common/libzfs.h index 09ce507dc0..5726530ee2 100644 --- a/usr/src/lib/libzfs/common/libzfs.h +++ b/usr/src/lib/libzfs/common/libzfs.h @@ -23,7 +23,7 @@ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2011 Pawel Jakub Dawidek. All rights reserved. * Copyright (c) 2011, 2017 by Delphix. All rights reserved. - * Copyright (c) 2012, Joyent, Inc. All rights reserved. + * Copyright 2019 Joyent, Inc. * Copyright (c) 2013 Steven Hartland. All rights reserved. * Copyright (c) 2014 Integros [integros.com] * Copyright 2016 Nexenta Systems, Inc. @@ -131,6 +131,7 @@ typedef enum zfs_error { EZFS_DIFFDATA, /* bad zfs diff data */ EZFS_POOLREADONLY, /* pool is in read-only mode */ EZFS_SCRUB_PAUSED, /* scrub currently paused */ + EZFS_ACTIVE_POOL, /* pool is imported on a different system */ EZFS_NO_PENDING, /* cannot cancel, no operation is pending */ EZFS_CHECKPOINT_EXISTS, /* checkpoint exists */ EZFS_DISCARDING_CHECKPOINT, /* currently discarding a checkpoint */ @@ -315,6 +316,8 @@ typedef enum { /* * The following correspond to faults as defined in the (fault.fs.zfs.*) * event namespace. Each is associated with a corresponding message ID. + * This must be kept in sync with the zfs_msgid_table in + * lib/libzfs/libzfs_status.c. */ ZPOOL_STATUS_CORRUPT_CACHE, /* corrupt /kernel/drv/zpool.cache */ ZPOOL_STATUS_MISSING_DEV_R, /* missing device with replicas */ @@ -327,8 +330,11 @@ typedef enum { ZPOOL_STATUS_FAILING_DEV, /* device experiencing errors */ ZPOOL_STATUS_VERSION_NEWER, /* newer on-disk version */ ZPOOL_STATUS_HOSTID_MISMATCH, /* last accessed by another system */ + ZPOOL_STATUS_HOSTID_ACTIVE, /* currently active on another system */ + ZPOOL_STATUS_HOSTID_REQUIRED, /* multihost=on and hostid=0 */ ZPOOL_STATUS_IO_FAILURE_WAIT, /* failed I/O, failmode 'wait' */ ZPOOL_STATUS_IO_FAILURE_CONTINUE, /* failed I/O, failmode 'continue' */ + ZPOOL_STATUS_IO_FAILURE_MMP, /* failed MMP, failmode not 'panic' */ ZPOOL_STATUS_BAD_LOG, /* cannot read log chain(s) */ /* @@ -406,6 +412,8 @@ typedef struct importargs { } importargs_t; extern nvlist_t *zpool_search_import(libzfs_handle_t *, importargs_t *); +extern int zpool_tryimport(libzfs_handle_t *hdl, char *target, + nvlist_t **configp, importargs_t *args); /* legacy pool search routines */ extern nvlist_t *zpool_find_import(libzfs_handle_t *, int, char **); @@ -725,6 +733,7 @@ extern boolean_t zfs_dataset_exists(libzfs_handle_t *, const char *, zfs_type_t); extern int zfs_spa_version(zfs_handle_t *, int *); extern boolean_t zfs_bookmark_exists(const char *path); +extern ulong_t get_system_hostid(void); /* * Mount support functions. diff --git a/usr/src/lib/libzfs/common/libzfs_dataset.c b/usr/src/lib/libzfs/common/libzfs_dataset.c index 71a72dd16b..940ed878f2 100644 --- a/usr/src/lib/libzfs/common/libzfs_dataset.c +++ b/usr/src/lib/libzfs/common/libzfs_dataset.c @@ -444,6 +444,8 @@ make_dataset_handle_common(zfs_handle_t *zhp, zfs_cmd_t *zc) zhp->zfs_head_type = ZFS_TYPE_VOLUME; else if (zhp->zfs_dmustats.dds_type == DMU_OST_ZFS) zhp->zfs_head_type = ZFS_TYPE_FILESYSTEM; + else if (zhp->zfs_dmustats.dds_type == DMU_OST_OTHER) + return (-1); else abort(); diff --git a/usr/src/lib/libzfs/common/libzfs_impl.h b/usr/src/lib/libzfs/common/libzfs_impl.h index 4c0c89e989..82c04798c7 100644 --- a/usr/src/lib/libzfs/common/libzfs_impl.h +++ b/usr/src/lib/libzfs/common/libzfs_impl.h @@ -141,7 +141,7 @@ typedef enum { SHARED_SMB = 0x4 } zfs_share_type_t; -#define CONFIG_BUF_MINSIZE 65536 +#define CONFIG_BUF_MINSIZE 262144 int zfs_error(libzfs_handle_t *, int, const char *); int zfs_error_fmt(libzfs_handle_t *, int, const char *, ...); diff --git a/usr/src/lib/libzfs/common/libzfs_import.c b/usr/src/lib/libzfs/common/libzfs_import.c index 5bd900ad00..ce5864a62b 100644 --- a/usr/src/lib/libzfs/common/libzfs_import.c +++ b/usr/src/lib/libzfs/common/libzfs_import.c @@ -1438,16 +1438,87 @@ name_or_guid_exists(zpool_handle_t *zhp, void *data) nvlist_t * zpool_search_import(libzfs_handle_t *hdl, importargs_t *import) { + nvlist_t *pools = NULL; + verify(import->poolname == NULL || import->guid == 0); if (import->unique) import->exists = zpool_iter(hdl, name_or_guid_exists, import); if (import->cachefile != NULL) - return (zpool_find_import_cached(hdl, import->cachefile, - import->poolname, import->guid)); + pools = zpool_find_import_cached(hdl, import->cachefile, + import->poolname, import->guid); + else + pools = zpool_find_import_impl(hdl, import); + + return (pools); +} + +static boolean_t +pool_match(nvlist_t *cfg, char *tgt) +{ + uint64_t v, guid = strtoull(tgt, NULL, 0); + char *s; + + if (guid != 0) { + if (nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_POOL_GUID, &v) == 0) + return (v == guid); + } else { + if (nvlist_lookup_string(cfg, ZPOOL_CONFIG_POOL_NAME, &s) == 0) + return (strcmp(s, tgt) == 0); + } + return (B_FALSE); +} + +int +zpool_tryimport(libzfs_handle_t *hdl, char *target, nvlist_t **configp, + importargs_t *args) +{ + nvlist_t *pools; + nvlist_t *match = NULL; + nvlist_t *config = NULL; + char *sepp = NULL; + int count = 0; + char *targetdup = strdup(target); + + *configp = NULL; - return (zpool_find_import_impl(hdl, import)); + if ((sepp = strpbrk(targetdup, "/@")) != NULL) { + *sepp = '\0'; + } + + pools = zpool_search_import(hdl, args); + + if (pools != NULL) { + nvpair_t *elem = NULL; + while ((elem = nvlist_next_nvpair(pools, elem)) != NULL) { + VERIFY0(nvpair_value_nvlist(elem, &config)); + if (pool_match(config, targetdup)) { + count++; + if (match != NULL) { + /* multiple matches found */ + continue; + } else { + match = config; + } + } + } + } + + if (count == 0) { + free(targetdup); + return (ENOENT); + } + + if (count > 1) { + free(targetdup); + return (EINVAL); + } + + *configp = match; + free(targetdup); + + return (0); } boolean_t diff --git a/usr/src/lib/libzfs/common/libzfs_pool.c b/usr/src/lib/libzfs/common/libzfs_pool.c index eab6a920f0..2157b45edb 100644 --- a/usr/src/lib/libzfs/common/libzfs_pool.c +++ b/usr/src/lib/libzfs/common/libzfs_pool.c @@ -666,6 +666,15 @@ zpool_valid_proplist(libzfs_handle_t *hdl, const char *poolname, } break; + case ZPOOL_PROP_MULTIHOST: + if (get_system_hostid() == 0) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "requires a non-zero system hostid")); + (void) zfs_error(hdl, EZFS_BADPROP, errbuf); + goto error; + } + break; + default: zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "property '%s'(%d) not defined"), propname, prop); @@ -1803,6 +1812,7 @@ zpool_import_props(libzfs_handle_t *hdl, nvlist_t *config, const char *newname, if (error) { char desc[1024]; + char aux[256]; /* * Dry-run failed, but we print out what success @@ -1848,6 +1858,46 @@ zpool_import_props(libzfs_handle_t *hdl, nvlist_t *config, const char *newname, (void) zfs_error(hdl, EZFS_BADVERSION, desc); break; + case EREMOTEIO: + if (nv != NULL && nvlist_lookup_nvlist(nv, + ZPOOL_CONFIG_LOAD_INFO, &nvinfo) == 0) { + char *hostname = "<unknown>"; + uint64_t hostid = 0; + mmp_state_t mmp_state; + + mmp_state = fnvlist_lookup_uint64(nvinfo, + ZPOOL_CONFIG_MMP_STATE); + + if (nvlist_exists(nvinfo, + ZPOOL_CONFIG_MMP_HOSTNAME)) + hostname = fnvlist_lookup_string(nvinfo, + ZPOOL_CONFIG_MMP_HOSTNAME); + + if (nvlist_exists(nvinfo, + ZPOOL_CONFIG_MMP_HOSTID)) + hostid = fnvlist_lookup_uint64(nvinfo, + ZPOOL_CONFIG_MMP_HOSTID); + + if (mmp_state == MMP_STATE_ACTIVE) { + (void) snprintf(aux, sizeof (aux), + dgettext(TEXT_DOMAIN, "pool is imp" + "orted on host '%s' (hostid=%lx).\n" + "Export the pool on the other " + "system, then run 'zpool import'."), + hostname, (unsigned long) hostid); + } else if (mmp_state == MMP_STATE_NO_HOSTID) { + (void) snprintf(aux, sizeof (aux), + dgettext(TEXT_DOMAIN, "pool has " + "the multihost property on and " + "the\nsystem's hostid is not " + "set.\n")); + } + + (void) zfs_error_aux(hdl, aux); + } + (void) zfs_error(hdl, EZFS_ACTIVE_POOL, desc); + break; + case EINVAL: (void) zfs_error(hdl, EZFS_INVALCONFIG, desc); break; @@ -2391,7 +2441,7 @@ zpool_find_vdev(zpool_handle_t *zhp, const char *path, boolean_t *avail_spare, } static int -vdev_online(nvlist_t *nv) +vdev_is_online(nvlist_t *nv) { uint64_t ival; @@ -2459,7 +2509,7 @@ vdev_get_physpaths(nvlist_t *nv, char *physpath, size_t phypath_size, return (EZFS_INVALCONFIG); } - if (vdev_online(nv)) { + if (vdev_is_online(nv)) { if ((ret = vdev_get_one_physpath(nv, physpath, phypath_size, rsz)) != 0) return (ret); diff --git a/usr/src/lib/libzfs/common/libzfs_status.c b/usr/src/lib/libzfs/common/libzfs_status.c index 71b27a1214..7a701b78c6 100644 --- a/usr/src/lib/libzfs/common/libzfs_status.c +++ b/usr/src/lib/libzfs/common/libzfs_status.c @@ -53,20 +53,36 @@ * of this table, and hence have no associated message ID. */ static char *zfs_msgid_table[] = { - "ZFS-8000-14", - "ZFS-8000-2Q", - "ZFS-8000-3C", - "ZFS-8000-4J", - "ZFS-8000-5E", - "ZFS-8000-6X", - "ZFS-8000-72", - "ZFS-8000-8A", - "ZFS-8000-9P", - "ZFS-8000-A5", - "ZFS-8000-EY", - "ZFS-8000-HC", - "ZFS-8000-JQ", - "ZFS-8000-K4", + "ZFS-8000-14", /* ZPOOL_STATUS_CORRUPT_CACHE */ + "ZFS-8000-2Q", /* ZPOOL_STATUS_MISSING_DEV_R */ + "ZFS-8000-3C", /* ZPOOL_STATUS_MISSING_DEV_NR */ + "ZFS-8000-4J", /* ZPOOL_STATUS_CORRUPT_LABEL_R */ + "ZFS-8000-5E", /* ZPOOL_STATUS_CORRUPT_LABEL_NR */ + "ZFS-8000-6X", /* ZPOOL_STATUS_BAD_GUID_SUM */ + "ZFS-8000-72", /* ZPOOL_STATUS_CORRUPT_POOL */ + "ZFS-8000-8A", /* ZPOOL_STATUS_CORRUPT_DATA */ + "ZFS-8000-9P", /* ZPOOL_STATUS_FAILING_DEV */ + "ZFS-8000-A5", /* ZPOOL_STATUS_VERSION_NEWER */ + "ZFS-8000-EY", /* ZPOOL_STATUS_HOSTID_MISMATCH */ + "ZFS-8000-EY", /* ZPOOL_STATUS_HOSTID_ACTIVE */ + "ZFS-8000-EY", /* ZPOOL_STATUS_HOSTID_REQUIRED */ + "ZFS-8000-HC", /* ZPOOL_STATUS_IO_FAILURE_WAIT */ + "ZFS-8000-JQ", /* ZPOOL_STATUS_IO_FAILURE_CONTINUE */ + "ZFS-8000-MM", /* ZPOOL_STATUS_IO_FAILURE_MMP */ + "ZFS-8000-K4", /* ZPOOL_STATUS_BAD_LOG */ + /* + * The following results have no message ID. + * ZPOOL_STATUS_UNSUP_FEAT_READ + * ZPOOL_STATUS_UNSUP_FEAT_WRITE + * ZPOOL_STATUS_FAULTED_DEV_R + * ZPOOL_STATUS_FAULTED_DEV_NR + * ZPOOL_STATUS_VERSION_OLDER + * ZPOOL_STATUS_FEAT_DISABLED + * ZPOOL_STATUS_RESILVERING + * ZPOOL_STATUS_OFFLINE_DEV + * ZPOOL_STATUS_REMOVED_DEV + * ZPOOL_STATUS_OK + */ }; #define NMSGID (sizeof (zfs_msgid_table) / sizeof (zfs_msgid_table[0])) @@ -193,6 +209,7 @@ check_status(nvlist_t *config, boolean_t isimport) uint64_t stateval; uint64_t suspended; uint64_t hostid = 0; + unsigned long system_hostid = get_system_hostid(); verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION, &version) == 0); @@ -213,10 +230,30 @@ check_status(nvlist_t *config, boolean_t isimport) return (ZPOOL_STATUS_RESILVERING); /* + * The multihost property is set and the pool may be active. + */ + if (vs->vs_state == VDEV_STATE_CANT_OPEN && + vs->vs_aux == VDEV_AUX_ACTIVE) { + mmp_state_t mmp_state; + nvlist_t *nvinfo; + + nvinfo = fnvlist_lookup_nvlist(config, ZPOOL_CONFIG_LOAD_INFO); + mmp_state = fnvlist_lookup_uint64(nvinfo, + ZPOOL_CONFIG_MMP_STATE); + + if (mmp_state == MMP_STATE_ACTIVE) + return (ZPOOL_STATUS_HOSTID_ACTIVE); + else if (mmp_state == MMP_STATE_NO_HOSTID) + return (ZPOOL_STATUS_HOSTID_REQUIRED); + else + return (ZPOOL_STATUS_HOSTID_MISMATCH); + } + + /* * Pool last accessed by another system. */ (void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_HOSTID, &hostid); - if (hostid != 0 && (unsigned long)hostid != gethostid() && + if (hostid != 0 && (unsigned long)hostid != system_hostid && stateval == POOL_STATE_ACTIVE) return (ZPOOL_STATUS_HOSTID_MISMATCH); @@ -249,10 +286,16 @@ check_status(nvlist_t *config, boolean_t isimport) return (ZPOOL_STATUS_BAD_GUID_SUM); /* - * Check whether the pool has suspended due to failed I/O. + * Check whether the pool has suspended. */ if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_SUSPENDED, &suspended) == 0) { + uint64_t reason; + + if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_SUSPENDED_REASON, + &reason) == 0 && reason == ZIO_SUSPEND_MMP) + return (ZPOOL_STATUS_IO_FAILURE_MMP); + if (suspended == ZIO_FAILURE_MODE_CONTINUE) return (ZPOOL_STATUS_IO_FAILURE_CONTINUE); return (ZPOOL_STATUS_IO_FAILURE_WAIT); @@ -341,8 +384,9 @@ check_status(nvlist_t *config, boolean_t isimport) if (isimport) { feat = fnvlist_lookup_nvlist(config, ZPOOL_CONFIG_LOAD_INFO); - feat = fnvlist_lookup_nvlist(feat, - ZPOOL_CONFIG_ENABLED_FEAT); + if (nvlist_exists(feat, ZPOOL_CONFIG_ENABLED_FEAT)) + feat = fnvlist_lookup_nvlist(feat, + ZPOOL_CONFIG_ENABLED_FEAT); } else { feat = fnvlist_lookup_nvlist(config, ZPOOL_CONFIG_FEATURE_STATS); diff --git a/usr/src/lib/libzfs/common/libzfs_util.c b/usr/src/lib/libzfs/common/libzfs_util.c index 3c00b33b02..91278b50b9 100644 --- a/usr/src/lib/libzfs/common/libzfs_util.c +++ b/usr/src/lib/libzfs/common/libzfs_util.c @@ -21,7 +21,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2018 Joyent, Inc. + * Copyright 2019 Joyent, Inc. * Copyright (c) 2011, 2017 by Delphix. All rights reserved. * Copyright 2016 Igor Kozhukhov <ikozhukhov@gmail.com> * Copyright (c) 2017 Datto Inc. @@ -52,6 +52,7 @@ #include "libzfs_impl.h" #include "zfs_prop.h" +#include "zfs_comutil.h" #include "zfeature_common.h" int @@ -252,6 +253,9 @@ libzfs_error_description(libzfs_handle_t *hdl) return (dgettext(TEXT_DOMAIN, "device removal in progress")); case EZFS_VDEV_TOO_BIG: return (dgettext(TEXT_DOMAIN, "device exceeds supported size")); + case EZFS_ACTIVE_POOL: + return (dgettext(TEXT_DOMAIN, "pool is imported on a " + "different host")); case EZFS_TOOMANY: return (dgettext(TEXT_DOMAIN, "argument list too long")); case EZFS_INITIALIZING: @@ -421,6 +425,9 @@ zfs_standard_error_fmt(libzfs_handle_t *hdl, int error, const char *fmt, ...) "pool I/O is currently suspended")); zfs_verror(hdl, EZFS_POOLUNAVAIL, fmt, ap); break; + case EREMOTEIO: + zfs_verror(hdl, EZFS_ACTIVE_POOL, fmt, ap); + break; default: zfs_error_aux(hdl, strerror(error)); zfs_verror(hdl, EZFS_UNKNOWN, fmt, ap); @@ -508,6 +515,9 @@ zpool_standard_error_fmt(libzfs_handle_t *hdl, int error, const char *fmt, ...) case ENOTACTIVE: zfs_verror(hdl, EZFS_NO_PENDING, fmt, ap); break; + case EREMOTEIO: + zfs_verror(hdl, EZFS_ACTIVE_POOL, fmt, ap); + break; case ZFS_ERR_CHECKPOINT_EXISTS: zfs_verror(hdl, EZFS_CHECKPOINT_EXISTS, fmt, ap); break; @@ -1636,3 +1646,20 @@ zfs_get_hole_count(const char *path, uint64_t *count, uint64_t *bs) } return (0); } + +ulong_t +get_system_hostid(void) +{ + char *env; + + /* + * Allow the hostid to be subverted for testing. + */ + env = getenv("ZFS_HOSTID"); + if (env) { + ulong_t hostid = strtoull(env, NULL, 16); + return (hostid & 0xFFFFFFFF); + } + + return (gethostid()); +} diff --git a/usr/src/lib/libzfs/common/mapfile-vers b/usr/src/lib/libzfs/common/mapfile-vers index fa81e6440b..4a6e987bb4 100644 --- a/usr/src/lib/libzfs/common/mapfile-vers +++ b/usr/src/lib/libzfs/common/mapfile-vers @@ -50,6 +50,7 @@ SYMBOL_VERSION SUNWprivate_1.1 { fletcher_4_byteswap; fletcher_4_incremental_native; fletcher_4_incremental_byteswap; + get_system_hostid; libzfs_add_handle; libzfs_errno; libzfs_error_action; @@ -248,6 +249,7 @@ SYMBOL_VERSION SUNWprivate_1.1 { zpool_skip_pool; zpool_state_to_name; zpool_sync_one; + zpool_tryimport; zpool_unmount_datasets; zpool_upgrade; zpool_vdev_attach; diff --git a/usr/src/lib/libzpool/Makefile.com b/usr/src/lib/libzpool/Makefile.com index 57a171f483..47bdc10ffd 100644 --- a/usr/src/lib/libzpool/Makefile.com +++ b/usr/src/lib/libzpool/Makefile.com @@ -21,7 +21,7 @@ # # Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. # Copyright (c) 2013, 2016 by Delphix. All rights reserved. -# Copyright (c) 2018, Joyent, Inc. +# Copyright 2019, Joyent, Inc. # LIBRARY= libzpool.a @@ -69,7 +69,7 @@ C99LMODE= -Xc99=%all CFLAGS += $(CCGDEBUG) $(CCVERBOSE) $(CNOGLOBAL) CFLAGS64 += $(CCGDEBUG) $(CCVERBOSE) $(CNOGLOBAL) LDLIBS += -lcmdutils -lumem -lavl -lnvpair -lz -lc -lsysevent -lmd \ - -lfakekernel + -lfakekernel -lzfs CPPFLAGS.first = -I$(SRC)/lib/libfakekernel/common CPPFLAGS += $(INCS) -DDEBUG -D_FAKE_KERNEL diff --git a/usr/src/lib/libzpool/common/kernel.c b/usr/src/lib/libzpool/common/kernel.c index e74f14cc9b..fb14f88817 100644 --- a/usr/src/lib/libzpool/common/kernel.c +++ b/usr/src/lib/libzpool/common/kernel.c @@ -41,6 +41,7 @@ #include <sys/zmod.h> #include <sys/utsname.h> #include <sys/systeminfo.h> +#include <libzfs.h> extern void system_taskq_init(void); extern void system_taskq_fini(void); @@ -442,7 +443,7 @@ kernel_init(int mode) (double)physmem * sysconf(_SC_PAGE_SIZE) / (1ULL << 30)); (void) snprintf(hw_serial, sizeof (hw_serial), "%ld", - (mode & FWRITE) ? gethostid() : 0); + (mode & FWRITE) ? get_system_hostid() : 0); system_taskq_init(); diff --git a/usr/src/man/man1m/zpool.1m b/usr/src/man/man1m/zpool.1m index 52d68929f1..4d46ce3649 100644 --- a/usr/src/man/man1m/zpool.1m +++ b/usr/src/man/man1m/zpool.1m @@ -410,6 +410,11 @@ If a pool has a shared spare that is currently being used, the pool can not be exported since other pools may use this shared spare, which may lead to potential data corruption. .Pp +Shared spares add some risk. +If the pools are imported on different hosts, and both pools suffer a device +failure at the same time, both could attempt to use the spare at the same time. +This may not be detected, resulting in data corruption. +.Pp An in-progress spare replacement can be cancelled by detaching the hot spare. If the original faulted device is detached, then the hot spare assumes its place in the configuration, and is removed from the spare list of all active @@ -732,6 +737,31 @@ The default value is .Sy off . This property can also be referred to by its shortened name, .Sy listsnaps . +.It Sy multihost Ns = Ns Sy on Ns | Ns Sy off +Controls whether a pool activity check should be performed during +.Nm zpool Cm import . +When a pool is determined to be active it cannot be imported, even with the +.Fl f +option. +This property is intended to be used in failover configurations +where multiple hosts have access to a pool on shared storage. +.sp +Multihost provides protection on import only. +It does not protect against an +individual device being used in multiple pools, regardless of the type of vdev. +See the discussion under +.Sy zpool create. +.sp +When this property is on, periodic writes to storage occur to show the pool is +in use. +See +.Sy zfs_multihost_interval +in the +.Xr zfs-module-parameters 5 +man page. +In order to enable this property each host must set a unique hostid. +The default value is +.Sy off . .It Sy version Ns = Ns Ar version The current on-disk version of the pool. This can be increased, but never decreased. @@ -863,6 +893,10 @@ Clears device errors in a pool. If no arguments are specified, all device errors within the pool are cleared. If one or more devices is specified, only those errors associated with the specified device or devices are cleared. +If multihost is enabled, and the pool has been suspended, this will not +resume I/O. +While the pool was suspended, it may have been imported on +another host, and resuming I/O could result in pool damage. .It Xo .Nm .Cm create @@ -898,8 +932,24 @@ specification is described in the .Sx Virtual Devices section. .Pp -The command verifies that each device specified is accessible and not currently -in use by another subsystem. +The command attempts to verify that each device specified is accessible and not +currently in use by another subsystem. +However this check is not robust enough +to detect simultaneous attempts to use a new device in different pools, even if +.Sy multihost +is +.Sy enabled. +The +administrator must ensure that simultaneous invocations of any combination of +.Sy zpool replace , +.Sy zpool create , +.Sy zpool add , +or +.Sy zpool labelclear , +do not refer to the same device. +Using the same device in two pools will +result in pool corruption. +.sp There are some uses, such as being currently mounted, or specified as the dedicated dump device, that prevents a device from ever being used by ZFS. Other uses, such as having a preexisting UFS file system, can be overridden with diff --git a/usr/src/pkg/manifests/system-data-zoneinfo.mf b/usr/src/pkg/manifests/system-data-zoneinfo.mf index 65d8b892fe..1234d7ca5b 100644 --- a/usr/src/pkg/manifests/system-data-zoneinfo.mf +++ b/usr/src/pkg/manifests/system-data-zoneinfo.mf @@ -13,11 +13,11 @@ # Copyright 2018 Nexenta Systems, Inc. # Copyright (c) 2014 Joyent, Inc. All rights reserved. # Copyright 2017 OmniTI Computer Consulting, Inc. All rights reserved. -# Copyright 2018 OmniOS Community Edition (OmniOSce) Association. +# Copyright 2019 OmniOS Community Edition (OmniOSce) Association. # set name=pkg.fmri \ - value=pkg:/system/data/zoneinfo@2018.7,$(PKGVERS_BUILTON)-$(PKGVERS_BRANCH) + value=pkg:/system/data/zoneinfo@2019.1,$(PKGVERS_BUILTON)-$(PKGVERS_BRANCH) set name=pkg.description value="timezone information" set name=pkg.summary value="Timezone Information" set name=info.classification value=org.opensolaris.category.2008:System/Core @@ -275,6 +275,7 @@ file path=usr/share/lib/zoneinfo/Asia/Oral file path=usr/share/lib/zoneinfo/Asia/Pontianak file path=usr/share/lib/zoneinfo/Asia/Pyongyang file path=usr/share/lib/zoneinfo/Asia/Qatar +file path=usr/share/lib/zoneinfo/Asia/Qostanay file path=usr/share/lib/zoneinfo/Asia/Qyzylorda file path=usr/share/lib/zoneinfo/Asia/Riyadh file path=usr/share/lib/zoneinfo/Asia/Sakhalin @@ -351,7 +352,6 @@ file path=usr/share/lib/zoneinfo/Etc/GMT-6 file path=usr/share/lib/zoneinfo/Etc/GMT-7 file path=usr/share/lib/zoneinfo/Etc/GMT-8 file path=usr/share/lib/zoneinfo/Etc/GMT-9 -file path=usr/share/lib/zoneinfo/Etc/UCT file path=usr/share/lib/zoneinfo/Etc/UTC file path=usr/share/lib/zoneinfo/Europe/Amsterdam file path=usr/share/lib/zoneinfo/Europe/Andorra @@ -631,6 +631,7 @@ hardlink path=usr/share/lib/zoneinfo/Etc/GMT+0 target=../Etc/GMT hardlink path=usr/share/lib/zoneinfo/Etc/GMT-0 target=../Etc/GMT hardlink path=usr/share/lib/zoneinfo/Etc/GMT0 target=../Etc/GMT hardlink path=usr/share/lib/zoneinfo/Etc/Greenwich target=../Etc/GMT +hardlink path=usr/share/lib/zoneinfo/Etc/UCT target=UTC hardlink path=usr/share/lib/zoneinfo/Etc/Universal target=../Etc/UTC hardlink path=usr/share/lib/zoneinfo/Etc/Zulu target=../Etc/UTC hardlink path=usr/share/lib/zoneinfo/Europe/Belfast target=../Europe/London @@ -695,7 +696,7 @@ hardlink path=usr/share/lib/zoneinfo/ROC target=Asia/Taipei hardlink path=usr/share/lib/zoneinfo/ROK target=Asia/Seoul hardlink path=usr/share/lib/zoneinfo/Singapore target=Asia/Singapore hardlink path=usr/share/lib/zoneinfo/Turkey target=Europe/Istanbul -hardlink path=usr/share/lib/zoneinfo/UCT target=Etc/UCT +hardlink path=usr/share/lib/zoneinfo/UCT target=Etc/UTC hardlink path=usr/share/lib/zoneinfo/US/Alaska target=../America/Anchorage hardlink path=usr/share/lib/zoneinfo/US/Aleutian target=../America/Adak hardlink path=usr/share/lib/zoneinfo/US/Arizona target=../America/Phoenix diff --git a/usr/src/pkg/manifests/system-test-zfstest.mf b/usr/src/pkg/manifests/system-test-zfstest.mf index eed3016079..06386c5ac5 100644 --- a/usr/src/pkg/manifests/system-test-zfstest.mf +++ b/usr/src/pkg/manifests/system-test-zfstest.mf @@ -120,6 +120,7 @@ dir path=opt/zfs-tests/tests/functional/link_count dir path=opt/zfs-tests/tests/functional/mdb dir path=opt/zfs-tests/tests/functional/migration dir path=opt/zfs-tests/tests/functional/mmap +dir path=opt/zfs-tests/tests/functional/mmp dir path=opt/zfs-tests/tests/functional/mount dir path=opt/zfs-tests/tests/functional/mv_files dir path=opt/zfs-tests/tests/functional/nestedfs @@ -188,6 +189,7 @@ file path=opt/zfs-tests/bin/rename_dir mode=0555 file path=opt/zfs-tests/bin/rm_lnkcnt_zero_file mode=0555 file path=opt/zfs-tests/bin/zfstest mode=0555 file path=opt/zfs-tests/callbacks/zfs_dbgmsg mode=0555 +file path=opt/zfs-tests/callbacks/zfs_mmp mode=0555 file path=opt/zfs-tests/include/commands.cfg mode=0444 file path=opt/zfs-tests/include/default.cfg mode=0444 file path=opt/zfs-tests/include/libtest.shlib mode=0444 @@ -2270,6 +2272,22 @@ file path=opt/zfs-tests/tests/functional/mmap/cleanup mode=0555 file path=opt/zfs-tests/tests/functional/mmap/mmap_read_001_pos mode=0555 file path=opt/zfs-tests/tests/functional/mmap/mmap_write_001_pos mode=0555 file path=opt/zfs-tests/tests/functional/mmap/setup mode=0555 +file path=opt/zfs-tests/tests/functional/mmp/cleanup mode=0555 +file path=opt/zfs-tests/tests/functional/mmp/mmp.cfg mode=0444 +file path=opt/zfs-tests/tests/functional/mmp/mmp.kshlib mode=0444 +file path=opt/zfs-tests/tests/functional/mmp/mmp_active_import mode=0555 +file path=opt/zfs-tests/tests/functional/mmp/mmp_exported_import mode=0555 +file path=opt/zfs-tests/tests/functional/mmp/mmp_inactive_import mode=0555 +file path=opt/zfs-tests/tests/functional/mmp/mmp_interval mode=0555 +file path=opt/zfs-tests/tests/functional/mmp/mmp_on_off mode=0555 +file path=opt/zfs-tests/tests/functional/mmp/mmp_on_thread mode=0555 +file path=opt/zfs-tests/tests/functional/mmp/mmp_on_uberblocks mode=0555 +file path=opt/zfs-tests/tests/functional/mmp/mmp_on_zdb mode=0555 +file path=opt/zfs-tests/tests/functional/mmp/mmp_reset_interval mode=0555 +file path=opt/zfs-tests/tests/functional/mmp/mmp_write_distribution mode=0555 +file path=opt/zfs-tests/tests/functional/mmp/mmp_write_uberblocks mode=0555 +file path=opt/zfs-tests/tests/functional/mmp/multihost_history mode=0555 +file path=opt/zfs-tests/tests/functional/mmp/setup mode=0555 file path=opt/zfs-tests/tests/functional/mount/cleanup mode=0555 file path=opt/zfs-tests/tests/functional/mount/setup mode=0555 file path=opt/zfs-tests/tests/functional/mount/umount_001 mode=0555 diff --git a/usr/src/test/zfs-tests/callbacks/zfs_mmp.ksh b/usr/src/test/zfs-tests/callbacks/zfs_mmp.ksh new file mode 100755 index 0000000000..df2cd132d3 --- /dev/null +++ b/usr/src/test/zfs-tests/callbacks/zfs_mmp.ksh @@ -0,0 +1,37 @@ +#!/bin/ksh -p + +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2017 by Lawrence Livermore National Security. +# All rights reserved. +# + +# $1: number of lines to output (default: 40) +typeset lines=${1:-40} +typeset history=$(cat /sys/module/zfs/parameters/zfs_multihost_history) + +if [ $history -eq 0 ]; then + exit +fi + +for f in /proc/spl/kstat/zfs/*/multihost; do + echo "=================================================================" + echo " Last $lines lines of $f" + echo "=================================================================" + + sudo tail -n $lines $f +done + +echo "=================================================================" +echo " End of zfs multihost log" +echo "=================================================================" diff --git a/usr/src/test/zfs-tests/include/commands.cfg b/usr/src/test/zfs-tests/include/commands.cfg index 3b21caafb6..e5d8dbe403 100644 --- a/usr/src/test/zfs-tests/include/commands.cfg +++ b/usr/src/test/zfs-tests/include/commands.cfg @@ -56,6 +56,7 @@ export USR_BIN_FILES='awk getent grep head + hostid hostname id iostat diff --git a/usr/src/test/zfs-tests/include/libtest.shlib b/usr/src/test/zfs-tests/include/libtest.shlib index ed87c46317..39a60a0900 100644 --- a/usr/src/test/zfs-tests/include/libtest.shlib +++ b/usr/src/test/zfs-tests/include/libtest.shlib @@ -2704,3 +2704,89 @@ function mdb_ctf_set_int return 0 } + +# +# Set a global system tunable (64-bit value) +# +# $1 tunable name +# $2 tunable values +# +function set_tunable64 +{ + set_tunable_impl "$1" "$2" Z +} + +# +# Set a global system tunable (32-bit value) +# +# $1 tunable name +# $2 tunable values +# +function set_tunable32 +{ + set_tunable_impl "$1" "$2" W +} + +function set_tunable_impl +{ + typeset tunable="$1" + typeset value="$2" + typeset mdb_cmd="$3" + typeset module="${4:-zfs}" + + [[ -z "$tunable" ]] && return 1 + [[ -z "$value" ]] && return 1 + [[ -z "$mdb_cmd" ]] && return 1 + + case "$(uname)" in + Linux) + typeset zfs_tunables="/sys/module/$module/parameters" + [[ -w "$zfs_tunables/$tunable" ]] || return 1 + echo -n "$value" > "$zfs_tunables/$tunable" + return "$?" + ;; + SunOS) + [[ "$module" -eq "zfs" ]] || return 1 + echo "${tunable}/${mdb_cmd}0t${value}" | mdb -kw + return "$?" + ;; + esac +} + +# +# Get a global system tunable +# +# $1 tunable name +# +function get_tunable +{ + get_tunable_impl "$1" +} + +function get_tunable_impl +{ + typeset tunable="$1" + typeset module="${2:-zfs}" + + [[ -z "$tunable" ]] && return 1 + + case "$(uname)" in + Linux) + typeset zfs_tunables="/sys/module/$module/parameters" + [[ -f "$zfs_tunables/$tunable" ]] || return 1 + cat $zfs_tunables/$tunable + return "$?" + ;; + SunOS) + typeset value=$(mdb -k -e "$tunable/X | ::eval .=U") + if [[ $? -ne 0 ]]; then + log_fail "Failed to get value of '$tunable' from mdb." + return 1 + fi + echo $value + return 0 + ;; + esac + + return 1 +} diff --git a/usr/src/test/zfs-tests/runfiles/delphix.run b/usr/src/test/zfs-tests/runfiles/delphix.run index ed3961f323..41e6d30e8b 100644 --- a/usr/src/test/zfs-tests/runfiles/delphix.run +++ b/usr/src/test/zfs-tests/runfiles/delphix.run @@ -462,6 +462,12 @@ tests = ['migration_001_pos', 'migration_002_pos', 'migration_003_pos', [/opt/zfs-tests/tests/functional/mmap] tests = ['mmap_read_001_pos', 'mmap_write_001_pos'] +[/opt/zfs-tests/tests/functional/mmp] +tests = ['mmp_on_thread', 'mmp_on_uberblocks', 'mmp_on_off', 'mmp_interval', + 'mmp_active_import', 'mmp_inactive_import', 'mmp_exported_import', + 'mmp_write_uberblocks', 'mmp_reset_interval', 'multihost_history', + 'mmp_on_zdb', 'mmp_write_distribution'] + [/opt/zfs-tests/tests/functional/mount] tests = ['umount_001', 'umountall_001'] diff --git a/usr/src/test/zfs-tests/runfiles/omnios.run b/usr/src/test/zfs-tests/runfiles/omnios.run index 5847e0e980..e46c59416e 100644 --- a/usr/src/test/zfs-tests/runfiles/omnios.run +++ b/usr/src/test/zfs-tests/runfiles/omnios.run @@ -431,6 +431,12 @@ tests = ['migration_001_pos', 'migration_002_pos', 'migration_003_pos', [/opt/zfs-tests/tests/functional/mmap] tests = ['mmap_read_001_pos', 'mmap_write_001_pos'] +[/opt/zfs-tests/tests/functional/mmp] +tests = ['mmp_on_thread', 'mmp_on_uberblocks', 'mmp_on_off', 'mmp_interval', + 'mmp_active_import', 'mmp_inactive_import', 'mmp_exported_import', + 'mmp_write_uberblocks', 'mmp_reset_interval', 'multihost_history', + 'mmp_on_zdb', 'mmp_write_distribution'] + [/opt/zfs-tests/tests/functional/mount] tests = ['umount_001', 'umountall_001'] diff --git a/usr/src/test/zfs-tests/runfiles/openindiana.run b/usr/src/test/zfs-tests/runfiles/openindiana.run index 3c1202f751..188f9b24c2 100644 --- a/usr/src/test/zfs-tests/runfiles/openindiana.run +++ b/usr/src/test/zfs-tests/runfiles/openindiana.run @@ -431,6 +431,12 @@ tests = ['migration_001_pos', 'migration_002_pos', 'migration_003_pos', [/opt/zfs-tests/tests/functional/mmap] tests = ['mmap_read_001_pos', 'mmap_write_001_pos'] +[/opt/zfs-tests/tests/functional/mmp] +tests = ['mmp_on_thread', 'mmp_on_uberblocks', 'mmp_on_off', 'mmp_interval', + 'mmp_active_import', 'mmp_inactive_import', 'mmp_exported_import', + 'mmp_write_uberblocks', 'mmp_reset_interval', 'multihost_history', + 'mmp_on_zdb', 'mmp_write_distribution'] + [/opt/zfs-tests/tests/functional/mount] tests = ['umount_001', 'umountall_001'] diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zpool_get/zpool_get.cfg b/usr/src/test/zfs-tests/tests/functional/cli_root/zpool_get/zpool_get.cfg index ebf4e77ba8..90e723a8cc 100644 --- a/usr/src/test/zfs-tests/tests/functional/cli_root/zpool_get/zpool_get.cfg +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zpool_get/zpool_get.cfg @@ -56,6 +56,7 @@ typeset -a properties=( "leaked" "bootsize" "checkpoint" + "multihost" "feature@async_destroy" "feature@empty_bpobj" "feature@lz4_compress" diff --git a/usr/src/test/zfs-tests/tests/functional/mmp/Makefile b/usr/src/test/zfs-tests/tests/functional/mmp/Makefile new file mode 100644 index 0000000000..7b26281882 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/mmp/Makefile @@ -0,0 +1,21 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2019 Joyent, Inc. +# + +include $(SRC)/Makefile.master + +ROOTOPTPKG = $(ROOT)/opt/zfs-tests +TARGETDIR = $(ROOTOPTPKG)/tests/functional/mmp + +include $(SRC)/test/zfs-tests/Makefile.com diff --git a/usr/src/test/zfs-tests/tests/functional/mmp/cleanup.ksh b/usr/src/test/zfs-tests/tests/functional/mmp/cleanup.ksh new file mode 100755 index 0000000000..82f06567cd --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/mmp/cleanup.ksh @@ -0,0 +1,36 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017 by Lawrence Livermore National Security, LLC. +# Copyright 2019 Joyent, Inc. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/mmp/mmp.cfg +. $STF_SUITE/tests/functional/mmp/mmp.kshlib + +verify_runnable "global" + +case "$(uname)" in +SunOS) h=$(cat /var/tmp/zfs_test_hostid.txt) + mmp_set_hostid $h + rm /var/tmp/zfs_test_hostid.txt + ;; +esac +log_must set_tunable64 zfs_multihost_history 0 + +log_pass "mmp cleanup passed" diff --git a/usr/src/test/zfs-tests/tests/functional/mmp/mmp.cfg b/usr/src/test/zfs-tests/tests/functional/mmp/mmp.cfg new file mode 100644 index 0000000000..52680c275a --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/mmp/mmp.cfg @@ -0,0 +1,40 @@ +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017 by Lawrence Livermore National Security, LLC. +# + +export PREV_UBER="$TEST_BASE_DIR/mmp-uber-prev.txt" +export CURR_UBER="$TEST_BASE_DIR/mmp-uber-curr.txt" +export DISK=${DISKS%% *} + +export HOSTID_FILE="/etc/hostid" +export HOSTID1=01234567 +export HOSTID2=89abcdef + +export TXG_TIMEOUT_LONG=5000 +export TXG_TIMEOUT_DEFAULT=5 + +export MMP_POOL=mmppool +export MMP_DIR=$TEST_BASE_DIR/mmp +export MMP_CACHE=$MMP_DIR/zpool.cache +export MMP_ZTEST_LOG=$MMP_DIR/ztest.log +export MMP_HISTORY=100 +export MMP_HISTORY_OFF=0 + +export MMP_INTERVAL_HOUR=$((60*60*1000)) +export MMP_INTERVAL_DEFAULT=1000 +export MMP_INTERVAL_MIN=100 diff --git a/usr/src/test/zfs-tests/tests/functional/mmp/mmp.kshlib b/usr/src/test/zfs-tests/tests/functional/mmp/mmp.kshlib new file mode 100644 index 0000000000..0dc255998b --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/mmp/mmp.kshlib @@ -0,0 +1,292 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017 by Lawrence Livermore National Security, LLC. +# Use is subject to license terms. +# Copyright 2019 Joyent, Inc. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/mmp/mmp.cfg + + +function check_pool_import # pool opts token keyword +{ + typeset pool=${1:-$MMP_POOL} + typeset opts=$2 + typeset token=$3 + typeset keyword=$4 + + zpool import $opts 2>&1 | \ + nawk -v token="$token:" '($1==token) {print $0}' | \ + grep -i "$keyword" > /dev/null 2>&1 + + return $? +} + +function is_pool_imported # pool opts +{ + typeset pool=${1:-$MMP_POOL} + typeset opts=$2 + + check_pool_import "$pool" "$opts" "status" \ + "The pool is currently imported" + return $? +} + +function wait_pool_imported # pool opts +{ + typeset pool=${1:-$MMP_POOL} + typeset opts=$2 + + while is_pool_imported "$pool" "$opts"; do + log_must sleep 5 + done + + return 0 +} + +function try_pool_import # pool opts message +{ + typeset pool=${1:-$MMP_POOL} + typeset opts=$2 + typeset msg=$3 + + zpool import $opts $pool 2>&1 | grep -i "$msg" + + return $? +} + +function chr2ascii +{ + case "$1" in + 0) asc="30";; + 1) asc="31";; + 2) asc="32";; + 3) asc="33";; + 4) asc="34";; + 5) asc="35";; + 6) asc="36";; + 7) asc="37";; + 8) asc="38";; + 9) asc="39";; + a) asc="61";; + b) asc="62";; + c) asc="63";; + d) asc="64";; + e) asc="65";; + f) asc="66";; + esac +} + +function mmp_set_hostid +{ + typeset hostid=$1 + + case "$(uname)" in + Linux) + a=${hostid:6:2} + b=${hostid:4:2} + c=${hostid:2:2} + d=${hostid:0:2} + + printf "\\x$a\\x$b\\x$c\\x$d" >$HOSTID_FILE + + if [ $(hostid) != "$hostid" ]; then + return 1 + fi + ;; + SunOS) + # + # Given a hostid in hex, we have to convert to decimal, then + # save the ascii string representation in the kernel. The + # 'hostid' command will get the decimal SI_HW_SERIAL value via + # sysinfo, then print that as an 8 digit hex number. + # + typeset dec=$(mdb -e "$hostid=E" | sed -e 's/ *//g') + typeset len=$(echo $dec | awk '{print length($0)}') + if [[ $len -lt 0 || $len -gt 10 ]]; then + return + fi + typeset pos=0 + while [[ $pos -lt $len ]]; do + chr2ascii ${dec:$pos:1} + echo "hw_serial+${pos}/v $asc" | mdb -kw >/dev/null 2>&1 + pos=$(($pos + 1)) + done + echo "hw_serial+${pos}/v 0" | mdb -kw >/dev/null 2>&1 + ;; + esac + + return 0 +} + +function mmp_clear_hostid +{ + case "$(uname)" in + Linux) rm -f $HOSTID_FILE;; + SunOS) mmp_set_hostid "00000000";; + esac +} + +function mmp_pool_create_simple # pool dir +{ + typeset pool=${1:-$MMP_POOL} + typeset dir=${2:-$MMP_DIR} + + log_must mkdir -p $dir + log_must rm -f $dir/* + log_must truncate -s $MINVDEVSIZE $dir/vdev1 $dir/vdev2 + + log_must mmp_set_hostid $HOSTID1 + log_must zpool create -f -o cachefile=$MMP_CACHE $pool \ + mirror $dir/vdev1 $dir/vdev2 + log_must zpool set multihost=on $pool +} + +function mmp_pool_create # pool dir +{ + typeset pool=${1:-$MMP_POOL} + typeset dir=${2:-$MMP_DIR} + typeset opts="-VVVVV -T120 -M -k0 -f $dir -E -p $pool" + + mmp_pool_create_simple $pool $dir + + log_must mv $MMP_CACHE ${MMP_CACHE}.stale + log_must zpool export $pool + log_must mmp_set_hostid $HOSTID2 + + log_note "Starting ztest in the background as hostid $HOSTID1" + log_must eval "ZFS_HOSTID=$HOSTID1 /usr/bin/ztest $opts >$MMP_ZTEST_LOG 2>&1 &" + + while ! is_pool_imported "$pool" "-d $dir"; do + log_must pgrep ztest + log_must sleep 5 + done +} + +function mmp_pool_destroy # pool dir +{ + typeset pool=${1:-$MMP_POOL} + typeset dir=${2:-$MMP_DIR} + + ZTESTPID=$(pgrep ztest) + if [ -n "$ZTESTPID" ]; then + log_must kill $ZTESTPID + wait $ZTESTPID + fi + + if poolexists $pool; then + destroy_pool $pool + fi + + log_must rm -f $dir/* + mmp_clear_hostid +} + +function mmp_pool_set_hostid # pool hostid +{ + typeset pool=$1 + typeset hostid=$2 + + log_must mmp_set_hostid $hostid + log_must zpool export $pool + log_must zpool import $pool + + return 0 +} + +# Return the number of seconds the activity check portion of the import process +# will take. Does not include the time to find devices and assemble the +# preliminary pool configuration passed into the kernel. +function seconds_mmp_waits_for_activity +{ + typeset import_intervals=$(get_tunable zfs_multihost_import_intervals) + typeset interval=$(get_tunable zfs_multihost_interval) + typeset seconds=$((interval*import_intervals/1000)) + + echo $seconds +} + +function import_no_activity_check # pool opts +{ + typeset pool=$1 + typeset opts=$2 + + typeset max_duration=$(seconds_mmp_waits_for_activity) + + SECONDS=0 + zpool import $opts $pool + typeset rc=$? + + if [[ $SECONDS -gt $max_duration ]]; then + log_fail "unexpected activity check (${SECONDS}s gt \ +$max_duration)" + fi + + return $rc +} + +function import_activity_check # pool opts +{ + typeset pool=$1 + typeset opts=$2 + + typeset min_duration=$(seconds_mmp_waits_for_activity) + + SECONDS=0 + zpool import $opts $pool + typeset rc=$? + + if [[ $SECONDS -le $min_duration ]]; then + log_fail "expected activity check (${SECONDS}s le \ +$min_duration)" + fi + + return $rc +} + +function clear_mmp_history +{ + log_must set_tunable64 zfs_multihost_history $MMP_HISTORY_OFF + log_must set_tunable64 zfs_multihost_history $MMP_HISTORY +} + +function count_skipped_mmp_writes # pool duration +{ + typeset pool=$1 + typeset -i duration=$2 + typeset hist_path="/proc/spl/kstat/zfs/$pool/multihost" + + sleep $duration + awk 'BEGIN {count=0}; $NF == "-" {count++}; END {print count};' "$hist_path" +} + +function count_mmp_writes # pool duration +{ + typeset pool=$1 + typeset -i duration=$2 + typeset hist_path="/proc/spl/kstat/zfs/$pool/multihost" + + log_must sleep $duration + awk 'BEGIN {count=0}; $NF != "-" {count++}; END {print count};' "$hist_path" +} diff --git a/usr/src/test/zfs-tests/tests/functional/mmp/mmp_active_import.ksh b/usr/src/test/zfs-tests/tests/functional/mmp/mmp_active_import.ksh new file mode 100755 index 0000000000..59f1e1ef67 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/mmp/mmp_active_import.ksh @@ -0,0 +1,119 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017 by Lawrence Livermore National Security, LLC. +# Copyright 2019 Joyent, Inc. +# + +# DESCRIPTION: +# Under no circumstances when multihost is active, should an active pool +# with one hostid be importable by a host with a different hostid. +# +# STRATEGY: +# 1. Simulate an active pool on another host with ztest. +# 2. Verify 'zpool import' reports an active pool. +# 3. Verify 'zpool import [-f] $MMP_POOL' cannot import the pool. +# 4. Kill ztest to make pool eligible for import. +# 5. Verify 'zpool import' fails with the expected error message. +# 6. Verify 'zpool import $MMP_POOL' fails with the expected message. +# 7. Verify 'zpool import -f $MMP_POOL' can now import the pool. +# 8. Verify pool may be exported/imported without -f argument. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/mmp/mmp.cfg +. $STF_SUITE/tests/functional/mmp/mmp.kshlib + +verify_runnable "both" + +function cleanup +{ + mmp_pool_destroy $MMP_POOL $MMP_DIR + log_must set_tunable64 zfs_multihost_interval $MMP_INTERVAL_DEFAULT + log_must mmp_clear_hostid +} + +log_assert "multihost=on|off active pool activity checks" +log_onexit cleanup + +# 1. Simulate an active pool on another host with ztest. +mmp_pool_destroy $MMP_POOL $MMP_DIR +mmp_pool_create $MMP_POOL $MMP_DIR + +# 2. Verify 'zpool import' reports an active pool. +log_must mmp_set_hostid $HOSTID2 +log_must set_tunable64 zfs_multihost_interval $MMP_INTERVAL_MIN +log_must is_pool_imported $MMP_POOL "-d $MMP_DIR" + +# 3. Verify 'zpool import [-f] $MMP_POOL' cannot import the pool. +MMP_IMPORTED_MSG="Cannot import '$MMP_POOL': pool is imported" + +log_must try_pool_import $MMP_POOL "-d $MMP_DIR" "$MMP_IMPORTED_MSG" +for i in {1..10}; do + log_must try_pool_import $MMP_POOL "-f -d $MMP_DIR" "$MMP_IMPORTED_MSG" +done + +log_must try_pool_import $MMP_POOL "-c ${MMP_CACHE}.stale" "$MMP_IMPORTED_MSG" + +for i in {1..10}; do + log_must try_pool_import $MMP_POOL "-f -c ${MMP_CACHE}.stale" \ + "$MMP_IMPORTED_MSG" +done + +# 4. Kill ztest to make pool eligible for import. Poll with 'zpool status'. +ZTESTPID=$(pgrep ztest) +if [ -n "$ZTESTPID" ]; then + log_must kill -9 $ZTESTPID +fi +log_must wait_pool_imported $MMP_POOL "-d $MMP_DIR" + +# 5. Verify 'zpool import' fails with the expected error message, when +# - hostid=0: - configuration error +# - hostid=matches - safe to import the pool +# - hostid=different - previously imported on a different system +# +log_must mmp_clear_hostid +case "$(uname)" in +Linux) MMP_IMPORTED_MSG="Set a unique system hostid";; +SunOS) MMP_IMPORTED_MSG="Check the SMF svc:/system/hostid service.";; +esac +log_must check_pool_import $MMP_POOL "-d $MMP_DIR" "action" "$MMP_IMPORTED_MSG" + +log_must mmp_set_hostid $HOSTID1 +MMP_IMPORTED_MSG="The pool can be imported" +log_must check_pool_import $MMP_POOL "-d $MMP_DIR" "action" "$MMP_IMPORTED_MSG" + +log_must mmp_clear_hostid +log_must mmp_set_hostid $HOSTID2 +MMP_IMPORTED_MSG="The pool was last accessed by another system." +log_must check_pool_import $MMP_POOL "-d $MMP_DIR" "status" "$MMP_IMPORTED_MSG" + +# 6. Verify 'zpool import $MMP_POOL' fails with the expected message. +MMP_IMPORTED_MSG="pool was previously in use from another system." +log_must try_pool_import $MMP_POOL "-d $MMP_DIR" "$MMP_IMPORTED_MSG" + +# 7. Verify 'zpool import -f $MMP_POOL' can now import the pool. +# Default interval results in minimum activity test 10s which +# makes detection of the activity test reliable. +log_must set_tunable64 zfs_multihost_interval $MMP_INTERVAL_DEFAULT +log_must import_activity_check $MMP_POOL "-f -d $MMP_DIR" + +# 8 Verify pool may be exported/imported without -f argument. +log_must zpool export $MMP_POOL +log_must import_no_activity_check $MMP_POOL "-d $MMP_DIR" + +log_pass "multihost=on|off active pool activity checks passed" diff --git a/usr/src/test/zfs-tests/tests/functional/mmp/mmp_exported_import.ksh b/usr/src/test/zfs-tests/tests/functional/mmp/mmp_exported_import.ksh new file mode 100755 index 0000000000..bf00e4b8d5 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/mmp/mmp_exported_import.ksh @@ -0,0 +1,110 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017 by Lawrence Livermore National Security, LLC. +# Copyright 2019 Joyent, Inc. +# + +# DESCRIPTION: +# Verify import behavior for exported pool (no activity check) +# +# STRATEGY: +# 1. Create a zpool +# 2. Verify multihost=off and hostids match (no activity check) +# 3. Verify multihost=off and hostids differ (no activity check) +# 4. Verify multihost=off and hostid zero allowed (no activity check) +# 5. Verify multihost=on and hostids match (no activity check) +# 6. Verify multihost=on and hostids differ (no activity check) +# 7. Verify multihost=on and hostid zero fails (no activity check) +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/mmp/mmp.cfg +. $STF_SUITE/tests/functional/mmp/mmp.kshlib + +verify_runnable "both" + +function cleanup +{ + default_cleanup_noexit + log_must mmp_clear_hostid +} + +log_assert "multihost=on|off activity checks exported pool" +log_onexit cleanup + +# 1. Create a zpool +log_must mmp_set_hostid $HOSTID1 +default_setup_noexit $DISK + +# 2. Verify multihost=off and hostids match (no activity check) +log_must zpool set multihost=off $TESTPOOL + +for opt in "" "-f"; do + log_must zpool export $TESTPOOL + log_must import_no_activity_check $TESTPOOL $opt +done + +# 3. Verify multihost=off and hostids differ (no activity check) +for opt in "" "-f"; do + log_must mmp_pool_set_hostid $TESTPOOL $HOSTID1 + log_must zpool export $TESTPOOL + log_must mmp_clear_hostid + log_must mmp_set_hostid $HOSTID2 + log_must import_no_activity_check $TESTPOOL $opt +done + +# 4. Verify multihost=off and hostid zero allowed (no activity check) +log_must mmp_clear_hostid + +for opt in "" "-f"; do + log_must zpool export $TESTPOOL + log_must import_no_activity_check $TESTPOOL $opt +done + +# 5. Verify multihost=on and hostids match (no activity check) +log_must mmp_pool_set_hostid $TESTPOOL $HOSTID1 +log_must zpool set multihost=on $TESTPOOL + +for opt in "" "-f"; do + log_must zpool export $TESTPOOL + log_must import_no_activity_check $TESTPOOL $opt +done + +# 6. Verify multihost=on and hostids differ (no activity check) +for opt in "" "-f"; do + log_must mmp_pool_set_hostid $TESTPOOL $HOSTID1 + log_must zpool export $TESTPOOL + log_must mmp_clear_hostid + log_must mmp_set_hostid $HOSTID2 + log_must import_no_activity_check $TESTPOOL $opt +done + +# 7. Verify multihost=on and hostid zero fails (no activity check) +log_must zpool export $TESTPOOL +log_must mmp_clear_hostid + +for opt in "" "-f"; do + case "$(uname)" in + Linux) MMP_IMPORTED_MSG="Set a unique system hostid";; + SunOS) MMP_IMPORTED_MSG="Check the SMF svc:/system/hostid service.";; + esac + log_must check_pool_import $TESTPOOL "" "action" "$MMP_IMPORTED_MSG" + log_mustnot import_no_activity_check $TESTPOOL $opt +done + +log_pass "multihost=on|off exported pool activity checks passed" diff --git a/usr/src/test/zfs-tests/tests/functional/mmp/mmp_inactive_import.ksh b/usr/src/test/zfs-tests/tests/functional/mmp/mmp_inactive_import.ksh new file mode 100755 index 0000000000..59a2a3d09c --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/mmp/mmp_inactive_import.ksh @@ -0,0 +1,101 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017 by Lawrence Livermore National Security, LLC. +# Copyright 2019 Joyent, Inc. +# + +# DESCRIPTION: +# Verify import behavior for inactive, but not exported, pools +# +# STRATEGY: +# 1. Create a zpool +# 2. Verify multihost=off and hostids match (no activity check) +# 3. Verify multihost=off and hostids differ (no activity check) +# 4. Verify multihost=off and hostid allowed (no activity check) +# 5. Verify multihost=on and hostids match (no activity check) +# 6. Verify multihost=on and hostids differ (activity check) +# 7. Verify multihost=on and hostid zero fails (no activity check) +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/mmp/mmp.cfg +. $STF_SUITE/tests/functional/mmp/mmp.kshlib + +verify_runnable "both" + +function cleanup +{ + default_cleanup_noexit + log_must mmp_clear_hostid +} + +log_assert "multihost=on|off inactive pool activity checks" +log_onexit cleanup + +# 1. Create a zpool +log_must mmp_set_hostid $HOSTID1 +default_setup_noexit $DISK + +# 2. Verify multihost=off and hostids match (no activity check) +log_must zpool set multihost=off $TESTPOOL + +for opt in "" "-f"; do + log_must zpool export -F $TESTPOOL + log_must import_no_activity_check $TESTPOOL $opt +done + +# 3. Verify multihost=off and hostids differ (no activity check) +log_must zpool export -F $TESTPOOL +log_must mmp_clear_hostid +log_must mmp_set_hostid $HOSTID2 +log_mustnot import_no_activity_check $TESTPOOL "" +log_must import_no_activity_check $TESTPOOL "-f" + +# 4. Verify multihost=off and hostid zero allowed (no activity check) +log_must zpool export -F $TESTPOOL +log_must mmp_clear_hostid +log_mustnot import_no_activity_check $TESTPOOL "" +log_must import_no_activity_check $TESTPOOL "-f" + +# 5. Verify multihost=on and hostids match (no activity check) +log_must mmp_pool_set_hostid $TESTPOOL $HOSTID1 +log_must zpool set multihost=on $TESTPOOL + +for opt in "" "-f"; do + log_must zpool export -F $TESTPOOL + log_must import_no_activity_check $TESTPOOL $opt +done + +# 6. Verify multihost=on and hostids differ (activity check) +log_must zpool export -F $TESTPOOL +log_must mmp_clear_hostid +log_must mmp_set_hostid $HOSTID2 +log_mustnot import_activity_check $TESTPOOL "" +log_must import_activity_check $TESTPOOL "-f" + +# 7. Verify multihost=on and hostid zero fails (no activity check) +log_must zpool export -F $TESTPOOL +log_must mmp_clear_hostid +case "$(uname)" in +Linux) MMP_IMPORTED_MSG="Set a unique system hostid";; +SunOS) MMP_IMPORTED_MSG="Check the SMF svc:/system/hostid service.";; +esac +log_must check_pool_import $TESTPOOL "-f" "action" "$MMP_IMPORTED_MSG" +log_mustnot import_no_activity_check $TESTPOOL "-f" + +log_pass "multihost=on|off inactive pool activity checks passed" diff --git a/usr/src/test/zfs-tests/tests/functional/mmp/mmp_interval.ksh b/usr/src/test/zfs-tests/tests/functional/mmp/mmp_interval.ksh new file mode 100755 index 0000000000..e07677c4f1 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/mmp/mmp_interval.ksh @@ -0,0 +1,50 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017 by Lawrence Livermore National Security, LLC. +# Copyright 2019 Joyent, Inc. +# + +# DESCRIPTION: +# zfs_multihost_interval should only accept valid values. +# +# STRATEGY: +# 1. Set zfs_multihost_interval to invalid values (negative). +# 2. Set zfs_multihost_interval to valid values. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/mmp/mmp.cfg +. $STF_SUITE/tests/functional/mmp/mmp.kshlib + +verify_runnable "both" + +function cleanup +{ + log_must set_tunable64 zfs_multihost_interval $MMP_INTERVAL_DEFAULT +} + +log_assert "zfs_multihost_interval cannot be set to an invalid value" +log_onexit cleanup + +if [[ $(uname) == "Linux" ]]; then + log_mustnot set_tunable64 zfs_multihost_interval -1 +fi +log_must set_tunable64 zfs_multihost_interval $MMP_INTERVAL_MIN +log_must set_tunable64 zfs_multihost_interval $MMP_INTERVAL_DEFAULT + +log_pass "zfs_multihost_interval cannot be set to an invalid value" diff --git a/usr/src/test/zfs-tests/tests/functional/mmp/mmp_on_off.ksh b/usr/src/test/zfs-tests/tests/functional/mmp/mmp_on_off.ksh new file mode 100755 index 0000000000..8bef86a0ff --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/mmp/mmp_on_off.ksh @@ -0,0 +1,79 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017 by Lawrence Livermore National Security, LLC. +# + +# DESCRIPTION: +# When multihost=off ensure that leaf vdev uberblocks are not updated. +# +# STRATEGY: +# 1. Set multihost=off (disables mmp) +# 2. Set zfs_txg_timeout to large value +# 3. Create a zpool +# 4. Find the current "best" uberblock +# 5. Sleep for enough time for uberblocks to change +# 6. Find the current "best" uberblock +# 7. If the uberblock changed, fail +# 8. Set multihost=on +# 9. Sleep for enough time for uberblocks to change +# 10. Find the current "best" uberblock +# 11. If uberblocks didn't change, fail +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/mmp/mmp.cfg +. $STF_SUITE/tests/functional/mmp/mmp.kshlib + +verify_runnable "both" + +function cleanup +{ + default_cleanup_noexit + log_must set_tunable64 zfs_txg_timeout $TXG_TIMEOUT_DEFAULT + log_must set_tunable64 zfs_multihost_interval $MMP_INTERVAL_DEFAULT + log_must rm -f $PREV_UBER $CURR_UBER + log_must mmp_clear_hostid +} + +log_assert "mmp thread won't write uberblocks with multihost=off" +log_onexit cleanup + +log_must set_tunable64 zfs_multihost_interval $MMP_INTERVAL_MIN +log_must set_tunable64 zfs_txg_timeout $TXG_TIMEOUT_LONG +log_must mmp_set_hostid $HOSTID1 + +default_setup_noexit $DISK +log_must zpool set multihost=off $TESTPOOL + +log_must zdb -u $TESTPOOL > $PREV_UBER +log_must sleep 5 +log_must zdb -u $TESTPOOL > $CURR_UBER + +if ! diff "$CURR_UBER" "$PREV_UBER"; then + log_fail "mmp thread has updated an uberblock" +fi + +log_must zpool set multihost=on $TESTPOOL +log_must sleep 5 +log_must zdb -u $TESTPOOL > $CURR_UBER + +if diff "$CURR_UBER" "$PREV_UBER"; then + log_fail "mmp failed to update uberblocks" +fi + +log_pass "mmp thread won't write uberblocks with multihost=off passed" diff --git a/usr/src/test/zfs-tests/tests/functional/mmp/mmp_on_thread.ksh b/usr/src/test/zfs-tests/tests/functional/mmp/mmp_on_thread.ksh new file mode 100755 index 0000000000..07384c6231 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/mmp/mmp_on_thread.ksh @@ -0,0 +1,64 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017 by Lawrence Livermore National Security, LLC. +# + +# DESCRIPTION: +# Ensure that the MMP thread is writing uberblocks. +# +# STRATEGY: +# 1. Set zfs_txg_timeout to large value +# 2. Create a zpool +# 3. Find the current "best" uberblock +# 4. Sleep for enough time for a potential uberblock update +# 5. Find the current "best" uberblock +# 6. If the uberblock never changed, fail +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/mmp/mmp.cfg +. $STF_SUITE/tests/functional/mmp/mmp.kshlib + +verify_runnable "both" + +function cleanup +{ + default_cleanup_noexit + log_must set_tunable64 zfs_txg_timeout $TXG_TIMEOUT_DEFAULT + log_must rm -f $PREV_UBER $CURR_UBER + log_must mmp_clear_hostid +} + +log_assert "mmp thread writes uberblocks (MMP)" +log_onexit cleanup + +log_must set_tunable64 zfs_txg_timeout $TXG_TIMEOUT_LONG +log_must mmp_set_hostid $HOSTID1 + +default_setup_noexit $DISK +log_must zpool set multihost=on $TESTPOOL + +log_must zdb -u $TESTPOOL > $PREV_UBER +log_must sleep 5 +log_must zdb -u $TESTPOOL > $CURR_UBER + +if diff -u "$CURR_UBER" "$PREV_UBER"; then + log_fail "mmp failed to update uberblocks" +fi + +log_pass "mmp thread writes uberblocks (MMP) passed" diff --git a/usr/src/test/zfs-tests/tests/functional/mmp/mmp_on_uberblocks.ksh b/usr/src/test/zfs-tests/tests/functional/mmp/mmp_on_uberblocks.ksh new file mode 100755 index 0000000000..0cb38f8899 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/mmp/mmp_on_uberblocks.ksh @@ -0,0 +1,73 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017 by Lawrence Livermore National Security, LLC. +# + +# DESCRIPTION: +# Ensure that MMP updates uberblocks at the expected intervals. +# +# STRATEGY: +# 1. Set zfs_txg_timeout to large value +# 2. Create a zpool +# 3. Clear multihost history +# 4. Sleep, then collect count of uberblocks written +# 5. If number of changes seen is less than min threshold, then fail +# 6. If number of changes seen is more than max threshold, then fail +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/mmp/mmp.cfg +. $STF_SUITE/tests/functional/mmp/mmp.kshlib + +verify_runnable "both" + +UBER_CHANGES=0 +EXPECTED=$(($(echo $DISKS | wc -w) * 10)) +FUDGE=$((EXPECTED * 20 / 100)) +MIN=$((EXPECTED - FUDGE)) +MAX=$((EXPECTED + FUDGE)) + +function cleanup +{ + default_cleanup_noexit + set_tunable64 zfs_txg_timeout $TXG_TIMEOUT_DEFAULT + log_must mmp_clear_hostid +} + +log_assert "Ensure MMP uberblocks update at the correct interval" +log_onexit cleanup + +log_must set_tunable64 zfs_txg_timeout $TXG_TIMEOUT_LONG +log_must mmp_set_hostid $HOSTID1 + +default_setup_noexit "$DISKS" +log_must zpool set multihost=on $TESTPOOL +clear_mmp_history +UBER_CHANGES=$(count_mmp_writes $TESTPOOL 10) + +log_note "Uberblock changed $UBER_CHANGES times" + +if [ $UBER_CHANGES -lt $MIN ]; then + log_fail "Fewer uberblock writes occured than expected ($EXPECTED)" +fi + +if [ $UBER_CHANGES -gt $MAX ]; then + log_fail "More uberblock writes occured than expected ($EXPECTED)" +fi + +log_pass "Ensure MMP uberblocks update at the correct interval passed" diff --git a/usr/src/test/zfs-tests/tests/functional/mmp/mmp_on_zdb.ksh b/usr/src/test/zfs-tests/tests/functional/mmp/mmp_on_zdb.ksh new file mode 100755 index 0000000000..131fd21e88 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/mmp/mmp_on_zdb.ksh @@ -0,0 +1,81 @@ +#!/bin/ksh + +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2018 Lawrence Livermore National Security, LLC. +# Copyright (c) 2018 by Nutanix. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/mmp/mmp.cfg +. $STF_SUITE/tests/functional/mmp/mmp.kshlib + +# +# Description: +# zdb will work while multihost is enabled. +# +# Strategy: +# 1. Create a pool +# 2. Enable multihost +# 3. Run zdb -d with pool and dataset arguments. +# 4. Create a checkpoint +# 5. Run zdb -kd with pool and dataset arguments. +# 6. Discard the checkpoint +# 7. Export the pool +# 8. Run zdb -ed with pool and dataset arguments. +# + +function cleanup +{ + datasetexists $TESTPOOL && destroy_pool $TESTPOOL + for DISK in $DISKS; do + zpool labelclear -f $DEV_RDSKDIR/$DISK + done + log_must mmp_clear_hostid +} + +log_assert "Verify zdb -d works while multihost is enabled" +log_onexit cleanup + +verify_runnable "global" +verify_disk_count "$DISKS" 2 + +default_mirror_setup_noexit $DISKS +log_must mmp_set_hostid $HOSTID1 +log_must zpool set multihost=on $TESTPOOL +log_must zfs snap $TESTPOOL/$TESTFS@snap + +log_must zdb -d $TESTPOOL +log_must zdb -d $TESTPOOL/ +log_must zdb -d $TESTPOOL/$TESTFS +log_must zdb -d $TESTPOOL/$TESTFS@snap + +log_must zpool checkpoint $TESTPOOL +log_must zdb -kd $TESTPOOL +log_must zdb -kd $TESTPOOL/ +log_must zdb -kd $TESTPOOL/$TESTFS +log_must zdb -kd $TESTPOOL/$TESTFS@snap +log_must zpool checkpoint -d $TESTPOOL + +log_must zpool export $TESTPOOL + +log_must zdb -ed $TESTPOOL +log_must zdb -ed $TESTPOOL/ +log_must zdb -ed $TESTPOOL/$TESTFS +log_must zdb -ed $TESTPOOL/$TESTFS@snap + +log_must zpool import $TESTPOOL + +cleanup + +log_pass "zdb -d works while multihost is enabled" diff --git a/usr/src/test/zfs-tests/tests/functional/mmp/mmp_reset_interval.ksh b/usr/src/test/zfs-tests/tests/functional/mmp/mmp_reset_interval.ksh new file mode 100755 index 0000000000..3c8f00cde9 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/mmp/mmp_reset_interval.ksh @@ -0,0 +1,64 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017 by Lawrence Livermore National Security, LLC. +# + +# DESCRIPTION: +# Ensure that the MMP thread is notified when zfs_multihost_interval is +# reduced. +# +# STRATEGY: +# 1. Set zfs_multihost_interval to much longer than the test duration +# 2. Create a zpool and enable multihost +# 3. Verify no MMP writes occurred +# 4. Set zfs_multihost_interval to 1 second +# 5. Sleep briefly +# 6. Verify MMP writes began +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/mmp/mmp.cfg +. $STF_SUITE/tests/functional/mmp/mmp.kshlib + +verify_runnable "both" + +function cleanup +{ + default_cleanup_noexit + log_must set_tunable64 zfs_multihost_interval $MMP_INTERVAL_DEFAULT + log_must mmp_clear_hostid +} + +log_assert "mmp threads notified when zfs_multihost_interval reduced" +log_onexit cleanup + +log_must set_tunable64 zfs_multihost_interval $MMP_INTERVAL_HOUR +log_must mmp_set_hostid $HOSTID1 + +default_setup_noexit $DISK +log_must zpool set multihost=on $TESTPOOL + +clear_mmp_history +log_must set_tunable64 zfs_multihost_interval $MMP_INTERVAL_DEFAULT +uber_count=$(count_mmp_writes $TESTPOOL 1) + +if [ $uber_count -eq 0 ]; then + log_fail "mmp writes did not start when zfs_multihost_interval reduced" +fi + +log_pass "mmp threads notified when zfs_multihost_interval reduced" diff --git a/usr/src/test/zfs-tests/tests/functional/mmp/mmp_write_distribution.ksh b/usr/src/test/zfs-tests/tests/functional/mmp/mmp_write_distribution.ksh new file mode 100755 index 0000000000..7504caa4d1 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/mmp/mmp_write_distribution.ksh @@ -0,0 +1,92 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017 by Lawrence Livermore National Security, LLC. +# + +# DESCRIPTION: +# Verify MMP writes are distributed evenly among leaves +# +# STRATEGY: +# 1. Create an asymmetric mirrored pool +# 2. Enable multihost and multihost_history +# 3. Delay for MMP writes to occur +# 4. Verify the MMP writes are distributed evenly across leaf vdevs +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/mmp/mmp.cfg +. $STF_SUITE/tests/functional/mmp/mmp.kshlib + +verify_runnable "both" + +function cleanup +{ + log_must zpool destroy $MMP_POOL + log_must rm $MMP_DIR/file.{0,1,2,3,4,5,6,7} + log_must rm $MMP_HISTORY_TMP + log_must rmdir $MMP_DIR + log_must mmp_clear_hostid +} + +log_assert "mmp writes are evenly distributed across leaf vdevs" +log_onexit cleanup + +MMP_HISTORY_TMP=$MMP_DIR/history +MMP_HISTORY=/proc/spl/kstat/zfs/$MMP_POOL/multihost + +# Step 1 +log_must mkdir -p $MMP_DIR +log_must truncate -s 128M $MMP_DIR/file.{0,1,2,3,4,5,6,7} +log_must zpool create -f $MMP_POOL mirror $MMP_DIR/file.{0,1} mirror $MMP_DIR/file.{2,3,4,5,6,7} + +# Step 2 +log_must mmp_set_hostid $HOSTID1 +log_must zpool set multihost=on $MMP_POOL +set_tunable64 zfs_multihost_history 0 +set_tunable64 zfs_multihost_history 40 + +# Step 3 +# default settings, every leaf written once/second +sleep 4 + +# Step 4 +typeset -i min_writes=999 +typeset -i max_writes=0 +typeset -i write_count +# copy to get as close to a consistent view as possible +cat $MMP_HISTORY > $MMP_HISTORY_TMP +for x in $(seq 0 7); do + write_count=$(grep -c file.${x} $MMP_HISTORY_TMP) + if [ $write_count -lt $min_writes ]; then + min_writes=$write_count + fi + if [ $write_count -gt $max_writes ]; then + max_writes=$write_count + fi +done +log_note "mmp min_writes $min_writes max_writes $max_writes" + +if [ $min_writes -lt 1 ]; then + log_fail "mmp writes were not counted correctly" +fi + +if [ $((max_writes - min_writes)) -gt 1 ]; then + log_fail "mmp writes were not evenly distributed across leaf vdevs" +fi + +log_pass "mmp writes were evenly distributed across leaf vdevs" diff --git a/usr/src/test/zfs-tests/tests/functional/mmp/mmp_write_uberblocks.ksh b/usr/src/test/zfs-tests/tests/functional/mmp/mmp_write_uberblocks.ksh new file mode 100755 index 0000000000..be387637ba --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/mmp/mmp_write_uberblocks.ksh @@ -0,0 +1,59 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017 by Lawrence Livermore National Security, LLC. +# Copyright 2019 Joyent, Inc. +# + +# DESCRIPTION: +# Verify MMP behaves correctly when failing to write uberblocks. +# +# STRATEGY: +# 1. Create a mirrored pool and enable multihost +# 2. Inject a 50% failure rate when writing uberblocks to a device +# 3. Delay briefly for additional MMP writes to complete +# 4. Verify the failed uberblock writes did not prevent MMP updates +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/mmp/mmp.cfg +. $STF_SUITE/tests/functional/mmp/mmp.kshlib + +verify_runnable "both" + +function cleanup +{ + zinject -c all + default_cleanup_noexit + log_must mmp_clear_hostid +} + +log_assert "mmp behaves correctly when failing to write uberblocks." +log_onexit cleanup + +log_must mmp_set_hostid $HOSTID1 +default_mirror_setup_noexit $DISKS +log_must zpool set multihost=on $TESTPOOL +log_must zinject -d ${DISK[0]} -e io -T write -f 50 -L uber $TESTPOOL +clear_mmp_history +uber_count=$(count_mmp_writes $TESTPOOL 3) + +if [ $uber_count -eq 0 ]; then + log_fail "mmp writes did not occur when uberblock IO errors injected" +fi + +log_pass "mmp correctly wrote uberblocks when IO errors injected" diff --git a/usr/src/test/zfs-tests/tests/functional/mmp/multihost_history.ksh b/usr/src/test/zfs-tests/tests/functional/mmp/multihost_history.ksh new file mode 100755 index 0000000000..e831475dbc --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/mmp/multihost_history.ksh @@ -0,0 +1,67 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017 by Lawrence Livermore National Security, LLC. +# + +# DESCRIPTION: +# zfs_multihost_history should report both writes issued and gaps +# +# STRATEGY: +# 1. Create a 2-vdev pool with mmp enabled +# 2. Delay writes by 2*MMP_INTERVAL_DEFAULT +# 3. Check multihost_history for both issued writes, and for gaps where +# no write could be issued because all vdevs are busy +# +# During the first MMP_INTERVAL period 2 MMP writes will be issued - one to +# each vdev. At the third scheduled attempt to write, at time t0+MMP_INTERVAL, +# both vdevs will still have outstanding writes, so a skipped write entry will +# be recorded in the multihost_history. + + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/mmp/mmp.cfg +. $STF_SUITE/tests/functional/mmp/mmp.kshlib + +verify_runnable "both" + +function cleanup +{ + log_must zinject -c all + mmp_pool_destroy $MMP_POOL $MMP_DIR + log_must mmp_clear_hostid +} + +log_assert "zfs_multihost_history records writes and skipped writes" +log_onexit cleanup + +mmp_pool_create_simple $MMP_POOL $MMP_DIR +log_must zinject -d $MMP_DIR/vdev1 -D$((2*MMP_INTERVAL_DEFAULT)):10 $MMP_POOL +log_must zinject -d $MMP_DIR/vdev2 -D$((2*MMP_INTERVAL_DEFAULT)):10 $MMP_POOL + +mmp_writes=$(count_mmp_writes $MMP_POOL $((MMP_INTERVAL_DEFAULT/1000))) +mmp_skips=$(count_skipped_mmp_writes $MMP_POOL $((MMP_INTERVAL_DEFAULT/1000))) + +if [ $mmp_writes -lt 1 ]; then + log_fail "mmp writes entries missing when delays injected" +fi + +if [ $mmp_skips -lt 1 ]; then + log_fail "mmp skipped write entries missing when delays injected" +fi + +log_pass "zfs_multihost_history records writes and skipped writes" diff --git a/usr/src/test/zfs-tests/tests/functional/mmp/setup.ksh b/usr/src/test/zfs-tests/tests/functional/mmp/setup.ksh new file mode 100755 index 0000000000..c6f8175838 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/mmp/setup.ksh @@ -0,0 +1,38 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017 by Lawrence Livermore National Security, LLC. +# Copyright 2019 Joyent, Inc. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/mmp/mmp.cfg + +verify_runnable "global" + +case "$(uname)" in +Linux) if [ -e $HOSTID_FILE ]; then + log_unsupported "System has existing $HOSTID_FILE file" + fi + log_must set_tunable64 zfs_multihost_history $MMP_HISTORY + ;; + +SunOS) hostid >/var/tmp/zfs_test_hostid.txt + ;; +esac + +log_pass "mmp setup pass" diff --git a/usr/src/uts/common/Makefile.files b/usr/src/uts/common/Makefile.files index 0f403efa5e..8c5d9c8c33 100644 --- a/usr/src/uts/common/Makefile.files +++ b/usr/src/uts/common/Makefile.files @@ -1383,6 +1383,7 @@ ZFS_COMMON_OBJS += \ lz4.o \ lzjb.o \ metaslab.o \ + mmp.o \ multilist.o \ range_tree.o \ refcount.o \ diff --git a/usr/src/uts/common/fs/zfs/dsl_pool.c b/usr/src/uts/common/fs/zfs/dsl_pool.c index 6bb34fd7af..136636bc8f 100644 --- a/usr/src/uts/common/fs/zfs/dsl_pool.c +++ b/usr/src/uts/common/fs/zfs/dsl_pool.c @@ -51,6 +51,7 @@ #include <sys/zfeature.h> #include <sys/zil_impl.h> #include <sys/dsl_userhold.h> +#include <sys/mmp.h> /* * ZFS Write Throttle @@ -193,6 +194,7 @@ dsl_pool_open_impl(spa_t *spa, uint64_t txg) dp->dp_meta_rootbp = *bp; rrw_init(&dp->dp_config_rwlock, B_TRUE); txg_init(dp, txg); + mmp_init(spa); txg_list_create(&dp->dp_dirty_datasets, spa, offsetof(dsl_dataset_t, ds_dirty_link)); @@ -394,6 +396,7 @@ dsl_pool_close(dsl_pool_t *dp) */ arc_flush(dp->dp_spa, FALSE); + mmp_fini(dp->dp_spa); txg_fini(dp); dsl_scan_fini(dp); dmu_buf_user_evict_wait(); diff --git a/usr/src/uts/common/fs/zfs/mmp.c b/usr/src/uts/common/fs/zfs/mmp.c new file mode 100644 index 0000000000..105e2bfdfd --- /dev/null +++ b/usr/src/uts/common/fs/zfs/mmp.c @@ -0,0 +1,582 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2017 by Lawrence Livermore National Security, LLC. + * Copyright 2019 Joyent, Inc. + */ + +#include <sys/abd.h> +#include <sys/mmp.h> +#include <sys/spa.h> +#include <sys/spa_impl.h> +#include <sys/time.h> +#include <sys/vdev.h> +#include <sys/vdev_impl.h> +#include <sys/zfs_context.h> +#include <sys/callb.h> + +/* + * Multi-Modifier Protection (MMP) attempts to prevent a user from importing + * or opening a pool on more than one host at a time. In particular, it + * prevents "zpool import -f" on a host from succeeding while the pool is + * already imported on another host. There are many other ways in which a + * device could be used by two hosts for different purposes at the same time + * resulting in pool damage. This implementation does not attempt to detect + * those cases. + * + * MMP operates by ensuring there are frequent visible changes on disk (a + * "heartbeat") at all times. And by altering the import process to check + * for these changes and failing the import when they are detected. This + * functionality is enabled by setting the 'multihost' pool property to on. + * + * Uberblocks written by the txg_sync thread always go into the first + * (N-MMP_BLOCKS_PER_LABEL) slots, the remaining slots are reserved for MMP. + * They are used to hold uberblocks which are exactly the same as the last + * synced uberblock except that the ub_timestamp is frequently updated. + * Like all other uberblocks, the slot is written with an embedded checksum, + * and slots with invalid checksums are ignored. This provides the + * "heartbeat", with no risk of overwriting good uberblocks that must be + * preserved, e.g. previous txgs and associated block pointers. + * + * Two optional fields are added to uberblock structure: ub_mmp_magic and + * ub_mmp_delay. The magic field allows zfs to tell whether ub_mmp_delay is + * valid. The delay field is a decaying average of the amount of time between + * completion of successive MMP writes, in nanoseconds. It is used to predict + * how long the import must wait to detect activity in the pool, before + * concluding it is not in use. + * + * During import an activity test may now be performed to determine if + * the pool is in use. The activity test is typically required if the + * ZPOOL_CONFIG_HOSTID does not match the system hostid, the pool state is + * POOL_STATE_ACTIVE, and the pool is not a root pool. + * + * The activity test finds the "best" uberblock (highest txg & timestamp), + * waits some time, and then finds the "best" uberblock again. If the txg + * and timestamp in both "best" uberblocks do not match, the pool is in use + * by another host and the import fails. Since the granularity of the + * timestamp is in seconds this activity test must take a bare minimum of one + * second. In order to assure the accuracy of the activity test, the default + * values result in an activity test duration of 10x the mmp write interval. + * + * The "zpool import" activity test can be expected to take a minimum time of + * zfs_multihost_import_intervals * zfs_multihost_interval milliseconds. If the + * "best" uberblock has a valid ub_mmp_delay field, then the duration of the + * test may take longer if MMP writes were occurring less frequently than + * expected. Additionally, the duration is then extended by a random 25% to + * attempt to to detect simultaneous imports. For example, if both partner + * hosts are rebooted at the same time and automatically attempt to import the + * pool. + */ + +/* + * Used to control the frequency of mmp writes which are performed when the + * 'multihost' pool property is on. This is one factor used to determine the + * length of the activity check during import. + * + * The mmp write period is zfs_multihost_interval / leaf-vdevs milliseconds. + * This means that on average an mmp write will be issued for each leaf vdev + * every zfs_multihost_interval milliseconds. In practice, the observed period + * can vary with the I/O load and this observed value is the delay which is + * stored in the uberblock. The minimum allowed value is 100 ms. + */ +ulong_t zfs_multihost_interval = MMP_DEFAULT_INTERVAL; + +/* + * Used to control the duration of the activity test on import. Smaller values + * of zfs_multihost_import_intervals will reduce the import time but increase + * the risk of failing to detect an active pool. The total activity check time + * is never allowed to drop below one second. A value of 0 is ignored and + * treated as if it was set to 1. + */ +uint_t zfs_multihost_import_intervals = MMP_DEFAULT_IMPORT_INTERVALS; + +/* + * Controls the behavior of the pool when mmp write failures are detected. + * + * When zfs_multihost_fail_intervals = 0 then mmp write failures are ignored. + * The failures will still be reported to the ZED which depending on its + * configuration may take action such as suspending the pool or taking a + * device offline. + * + * When zfs_multihost_fail_intervals > 0 then sequential mmp write failures will + * cause the pool to be suspended. This occurs when + * zfs_multihost_fail_intervals * zfs_multihost_interval milliseconds have + * passed since the last successful mmp write. This guarantees the activity + * test will see mmp writes if the + * pool is imported. + */ +uint_t zfs_multihost_fail_intervals = MMP_DEFAULT_FAIL_INTERVALS; + +char *mmp_tag = "mmp_write_uberblock"; +static void mmp_thread(void *arg); + +void +mmp_init(spa_t *spa) +{ + mmp_thread_t *mmp = &spa->spa_mmp; + + mutex_init(&mmp->mmp_thread_lock, NULL, MUTEX_DEFAULT, NULL); + cv_init(&mmp->mmp_thread_cv, NULL, CV_DEFAULT, NULL); + mutex_init(&mmp->mmp_io_lock, NULL, MUTEX_DEFAULT, NULL); + mmp->mmp_kstat_id = 1; +} + +void +mmp_fini(spa_t *spa) +{ + mmp_thread_t *mmp = &spa->spa_mmp; + + mutex_destroy(&mmp->mmp_thread_lock); + cv_destroy(&mmp->mmp_thread_cv); + mutex_destroy(&mmp->mmp_io_lock); +} + +static void +mmp_thread_enter(mmp_thread_t *mmp, callb_cpr_t *cpr) +{ + CALLB_CPR_INIT(cpr, &mmp->mmp_thread_lock, callb_generic_cpr, FTAG); + mutex_enter(&mmp->mmp_thread_lock); +} + +static void +mmp_thread_exit(mmp_thread_t *mmp, kthread_t **mpp, callb_cpr_t *cpr) +{ + ASSERT(*mpp != NULL); + *mpp = NULL; + cv_broadcast(&mmp->mmp_thread_cv); + CALLB_CPR_EXIT(cpr); /* drops &mmp->mmp_thread_lock */ + thread_exit(); +} + +void +mmp_thread_start(spa_t *spa) +{ + mmp_thread_t *mmp = &spa->spa_mmp; + + if (spa_writeable(spa)) { + mutex_enter(&mmp->mmp_thread_lock); + if (!mmp->mmp_thread) { + dprintf("mmp_thread_start pool %s\n", + spa->spa_name); + mmp->mmp_thread = thread_create(NULL, 0, mmp_thread, + spa, 0, &p0, TS_RUN, minclsyspri); + } + mutex_exit(&mmp->mmp_thread_lock); + } +} + +void +mmp_thread_stop(spa_t *spa) +{ + mmp_thread_t *mmp = &spa->spa_mmp; + + mutex_enter(&mmp->mmp_thread_lock); + mmp->mmp_thread_exiting = 1; + cv_broadcast(&mmp->mmp_thread_cv); + + while (mmp->mmp_thread) { + cv_wait(&mmp->mmp_thread_cv, &mmp->mmp_thread_lock); + } + mutex_exit(&mmp->mmp_thread_lock); + + ASSERT(mmp->mmp_thread == NULL); + mmp->mmp_thread_exiting = 0; +} + +typedef enum mmp_vdev_state_flag { + MMP_FAIL_NOT_WRITABLE = (1 << 0), + MMP_FAIL_WRITE_PENDING = (1 << 1), +} mmp_vdev_state_flag_t; + +/* + * Find a leaf vdev to write an MMP block to. It must not have an outstanding + * mmp write (if so a new write will also likely block). If there is no usable + * leaf, a nonzero error value is returned. The error value returned is a bit + * field. + * + * MMP_FAIL_WRITE_PENDING One or more leaf vdevs are writeable, but have an + * outstanding MMP write. + * MMP_FAIL_NOT_WRITABLE One or more leaf vdevs are not writeable. + */ + +static int +mmp_next_leaf(spa_t *spa) +{ + vdev_t *leaf; + vdev_t *starting_leaf; + int fail_mask = 0; + + ASSERT(MUTEX_HELD(&spa->spa_mmp.mmp_io_lock)); + ASSERT(spa_config_held(spa, SCL_STATE, RW_READER)); + ASSERT(list_link_active(&spa->spa_leaf_list.list_head) == B_TRUE); + ASSERT(!list_is_empty(&spa->spa_leaf_list)); + + if (spa->spa_mmp.mmp_leaf_last_gen != spa->spa_leaf_list_gen) { + spa->spa_mmp.mmp_last_leaf = list_head(&spa->spa_leaf_list); + spa->spa_mmp.mmp_leaf_last_gen = spa->spa_leaf_list_gen; + } + + leaf = spa->spa_mmp.mmp_last_leaf; + if (leaf == NULL) + leaf = list_head(&spa->spa_leaf_list); + starting_leaf = leaf; + + do { + leaf = list_next(&spa->spa_leaf_list, leaf); + if (leaf == NULL) + leaf = list_head(&spa->spa_leaf_list); + + if (!vdev_writeable(leaf)) { + fail_mask |= MMP_FAIL_NOT_WRITABLE; + } else if (leaf->vdev_mmp_pending != 0) { + fail_mask |= MMP_FAIL_WRITE_PENDING; + } else { + spa->spa_mmp.mmp_last_leaf = leaf; + return (0); + } + } while (leaf != starting_leaf); + + ASSERT(fail_mask); + + return (fail_mask); +} + +/* + * MMP writes are issued on a fixed schedule, but may complete at variable, + * much longer, intervals. The mmp_delay captures long periods between + * successful writes for any reason, including disk latency, scheduling delays, + * etc. + * + * The mmp_delay is usually calculated as a decaying average, but if the latest + * delay is higher we do not average it, so that we do not hide sudden spikes + * which the importing host must wait for. + * + * If writes are occurring frequently, such as due to a high rate of txg syncs, + * the mmp_delay could become very small. Since those short delays depend on + * activity we cannot count on, we never allow mmp_delay to get lower than rate + * expected if only mmp_thread writes occur. + * + * If an mmp write was skipped or fails, and we have already waited longer than + * mmp_delay, we need to update it so the next write reflects the longer delay. + * + * Do not set mmp_delay if the multihost property is not on, so as not to + * trigger an activity check on import. + */ +static void +mmp_delay_update(spa_t *spa, boolean_t write_completed) +{ + mmp_thread_t *mts = &spa->spa_mmp; + hrtime_t delay = gethrtime() - mts->mmp_last_write; + + ASSERT(MUTEX_HELD(&mts->mmp_io_lock)); + + if (spa_multihost(spa) == B_FALSE) { + mts->mmp_delay = 0; + return; + } + + if (delay > mts->mmp_delay) + mts->mmp_delay = delay; + + if (write_completed == B_FALSE) + return; + + mts->mmp_last_write = gethrtime(); + + /* + * strictly less than, in case delay was changed above. + */ + if (delay < mts->mmp_delay) { + hrtime_t min_delay = MSEC2NSEC(zfs_multihost_interval) / + MAX(1, vdev_count_leaves(spa)); + mts->mmp_delay = MAX(((delay + mts->mmp_delay * 127) / 128), + min_delay); + } +} + +static void +mmp_write_done(zio_t *zio) +{ + spa_t *spa = zio->io_spa; + vdev_t *vd = zio->io_vd; + mmp_thread_t *mts = zio->io_private; + + mutex_enter(&mts->mmp_io_lock); + uint64_t mmp_kstat_id = vd->vdev_mmp_kstat_id; + hrtime_t mmp_write_duration = gethrtime() - vd->vdev_mmp_pending; + + mmp_delay_update(spa, (zio->io_error == 0)); + + vd->vdev_mmp_pending = 0; + vd->vdev_mmp_kstat_id = 0; + + mutex_exit(&mts->mmp_io_lock); + spa_config_exit(spa, SCL_STATE, mmp_tag); + + abd_free(zio->io_abd); +} + +/* + * When the uberblock on-disk is updated by a spa_sync, + * creating a new "best" uberblock, update the one stored + * in the mmp thread state, used for mmp writes. + */ +void +mmp_update_uberblock(spa_t *spa, uberblock_t *ub) +{ + mmp_thread_t *mmp = &spa->spa_mmp; + + mutex_enter(&mmp->mmp_io_lock); + mmp->mmp_ub = *ub; + mmp->mmp_ub.ub_timestamp = gethrestime_sec(); + mmp_delay_update(spa, B_TRUE); + mutex_exit(&mmp->mmp_io_lock); +} + +/* + * Choose a random vdev, label, and MMP block, and write over it + * with a copy of the last-synced uberblock, whose timestamp + * has been updated to reflect that the pool is in use. + */ +static void +mmp_write_uberblock(spa_t *spa) +{ + int flags = ZIO_FLAG_CONFIG_WRITER | ZIO_FLAG_CANFAIL; + mmp_thread_t *mmp = &spa->spa_mmp; + uberblock_t *ub; + vdev_t *vd = NULL; + int label, error; + uint64_t offset; + + hrtime_t lock_acquire_time = gethrtime(); + spa_config_enter(spa, SCL_STATE, mmp_tag, RW_READER); + lock_acquire_time = gethrtime() - lock_acquire_time; + if (lock_acquire_time > (MSEC2NSEC(MMP_MIN_INTERVAL) / 10)) + zfs_dbgmsg("SCL_STATE acquisition took %llu ns\n", + (u_longlong_t)lock_acquire_time); + + mutex_enter(&mmp->mmp_io_lock); + + error = mmp_next_leaf(spa); + + /* + * spa_mmp_history has two types of entries: + * Issued MMP write: records time issued, error status, etc. + * Skipped MMP write: an MMP write could not be issued because no + * suitable leaf vdev was available. See comment above struct + * spa_mmp_history for details. + */ + + if (error) { + mmp_delay_update(spa, B_FALSE); + if (mmp->mmp_skip_error == error) { + /* + * ZoL porting note: the following is TBD + * spa_mmp_history_set_skip(spa, mmp->mmp_kstat_id - 1); + */ + } else { + mmp->mmp_skip_error = error; + /* + * ZoL porting note: the following is TBD + * spa_mmp_history_add(spa, mmp->mmp_ub.ub_txg, + * gethrestime_sec(), mmp->mmp_delay, NULL, 0, + * mmp->mmp_kstat_id++, error); + */ + } + mutex_exit(&mmp->mmp_io_lock); + spa_config_exit(spa, SCL_STATE, mmp_tag); + return; + } + + vd = spa->spa_mmp.mmp_last_leaf; + mmp->mmp_skip_error = 0; + + if (mmp->mmp_zio_root == NULL) + mmp->mmp_zio_root = zio_root(spa, NULL, NULL, + flags | ZIO_FLAG_GODFATHER); + + ub = &mmp->mmp_ub; + ub->ub_timestamp = gethrestime_sec(); + ub->ub_mmp_magic = MMP_MAGIC; + ub->ub_mmp_delay = mmp->mmp_delay; + vd->vdev_mmp_pending = gethrtime(); + vd->vdev_mmp_kstat_id = mmp->mmp_kstat_id; + + zio_t *zio = zio_null(mmp->mmp_zio_root, spa, NULL, NULL, NULL, flags); + abd_t *ub_abd = abd_alloc_for_io(VDEV_UBERBLOCK_SIZE(vd), B_TRUE); + abd_zero(ub_abd, VDEV_UBERBLOCK_SIZE(vd)); + abd_copy_from_buf(ub_abd, ub, sizeof (uberblock_t)); + + mmp->mmp_kstat_id++; + mutex_exit(&mmp->mmp_io_lock); + + offset = VDEV_UBERBLOCK_OFFSET(vd, VDEV_UBERBLOCK_COUNT(vd) - + MMP_BLOCKS_PER_LABEL + spa_get_random(MMP_BLOCKS_PER_LABEL)); + + label = spa_get_random(VDEV_LABELS); + vdev_label_write(zio, vd, label, ub_abd, offset, + VDEV_UBERBLOCK_SIZE(vd), mmp_write_done, mmp, + flags | ZIO_FLAG_DONT_PROPAGATE); + + /* + * ZoL porting note: the following is TBD + * (void) spa_mmp_history_add(spa, ub->ub_txg, ub->ub_timestamp, + * ub->ub_mmp_delay, vd, label, vd->vdev_mmp_kstat_id, 0); + */ + + zio_nowait(zio); +} + +static void +mmp_thread(void *arg) +{ + spa_t *spa = (spa_t *)arg; + mmp_thread_t *mmp = &spa->spa_mmp; + boolean_t last_spa_suspended = spa_suspended(spa); + boolean_t last_spa_multihost = spa_multihost(spa); + callb_cpr_t cpr; + hrtime_t max_fail_ns = zfs_multihost_fail_intervals * + MSEC2NSEC(MAX(zfs_multihost_interval, MMP_MIN_INTERVAL)); + + mmp_thread_enter(mmp, &cpr); + + /* + * The mmp_write_done() function calculates mmp_delay based on the + * prior value of mmp_delay and the elapsed time since the last write. + * For the first mmp write, there is no "last write", so we start + * with fake, but reasonable, default non-zero values. + */ + mmp->mmp_delay = MSEC2NSEC(MAX(zfs_multihost_interval, + MMP_MIN_INTERVAL)) / MAX(vdev_count_leaves(spa), 1); + mmp->mmp_last_write = gethrtime() - mmp->mmp_delay; + + while (!mmp->mmp_thread_exiting) { + uint64_t mmp_fail_intervals = zfs_multihost_fail_intervals; + uint64_t mmp_interval = MSEC2NSEC( + MAX(zfs_multihost_interval, MMP_MIN_INTERVAL)); + boolean_t suspended = spa_suspended(spa); + boolean_t multihost = spa_multihost(spa); + hrtime_t next_time; + + if (multihost) + next_time = gethrtime() + mmp_interval / + MAX(vdev_count_leaves(spa), 1); + else + next_time = gethrtime() + + MSEC2NSEC(MMP_DEFAULT_INTERVAL); + + /* + * MMP off => on, or suspended => !suspended: + * No writes occurred recently. Update mmp_last_write to give + * us some time to try. + */ + if ((!last_spa_multihost && multihost) || + (last_spa_suspended && !suspended)) { + mutex_enter(&mmp->mmp_io_lock); + mmp->mmp_last_write = gethrtime(); + mutex_exit(&mmp->mmp_io_lock); + } + + /* + * MMP on => off: + * mmp_delay == 0 tells importing node to skip activity check. + */ + if (last_spa_multihost && !multihost) { + mutex_enter(&mmp->mmp_io_lock); + mmp->mmp_delay = 0; + mutex_exit(&mmp->mmp_io_lock); + } + last_spa_multihost = multihost; + last_spa_suspended = suspended; + + /* + * Smooth max_fail_ns when its factors are decreased, because + * making (max_fail_ns < mmp_interval) results in the pool being + * immediately suspended before writes can occur at the new + * higher frequency. + */ + if ((mmp_interval * mmp_fail_intervals) < max_fail_ns) { + max_fail_ns = ((31 * max_fail_ns) + (mmp_interval * + mmp_fail_intervals)) / 32; + } else { + max_fail_ns = mmp_interval * mmp_fail_intervals; + } + + /* + * Suspend the pool if no MMP write has succeeded in over + * mmp_interval * mmp_fail_intervals nanoseconds. + */ + if (!suspended && mmp_fail_intervals && multihost && + (gethrtime() - mmp->mmp_last_write) > max_fail_ns) { + cmn_err(CE_WARN, "MMP writes to pool '%s' have not " + "succeeded in over %llus; suspending pool", + spa_name(spa), + NSEC2SEC(gethrtime() - mmp->mmp_last_write)); + zio_suspend(spa, NULL, ZIO_SUSPEND_MMP); + } + + if (multihost && !suspended) + mmp_write_uberblock(spa); + + CALLB_CPR_SAFE_BEGIN(&cpr); + (void) cv_timedwait_sig_hrtime(&mmp->mmp_thread_cv, + &mmp->mmp_thread_lock, next_time); + CALLB_CPR_SAFE_END(&cpr, &mmp->mmp_thread_lock); + } + + /* Outstanding writes are allowed to complete. */ + if (mmp->mmp_zio_root) + zio_wait(mmp->mmp_zio_root); + + mmp->mmp_zio_root = NULL; + mmp_thread_exit(mmp, &mmp->mmp_thread, &cpr); +} + +/* + * Signal the MMP thread to wake it, when it is sleeping on + * its cv. Used when some module parameter has changed and + * we want the thread to know about it. + * Only signal if the pool is active and mmp thread is + * running, otherwise there is no thread to wake. + */ +static void +mmp_signal_thread(spa_t *spa) +{ + mmp_thread_t *mmp = &spa->spa_mmp; + + mutex_enter(&mmp->mmp_thread_lock); + if (mmp->mmp_thread) + cv_broadcast(&mmp->mmp_thread_cv); + mutex_exit(&mmp->mmp_thread_lock); +} + +void +mmp_signal_all_threads(void) +{ + spa_t *spa = NULL; + + mutex_enter(&spa_namespace_lock); + while ((spa = spa_next(spa))) { + if (spa->spa_state == POOL_STATE_ACTIVE) + mmp_signal_thread(spa); + } + mutex_exit(&spa_namespace_lock); +} diff --git a/usr/src/uts/common/fs/zfs/spa.c b/usr/src/uts/common/fs/zfs/spa.c index 9b331b2098..b1f028479c 100644 --- a/usr/src/uts/common/fs/zfs/spa.c +++ b/usr/src/uts/common/fs/zfs/spa.c @@ -57,6 +57,7 @@ #include <sys/vdev_initialize.h> #include <sys/metaslab.h> #include <sys/metaslab_impl.h> +#include <sys/mmp.h> #include <sys/uberblock_impl.h> #include <sys/txg.h> #include <sys/avl.h> @@ -553,6 +554,16 @@ spa_prop_validate(spa_t *spa, nvlist_t *props) error = SET_ERROR(EINVAL); break; + case ZPOOL_PROP_MULTIHOST: + error = nvpair_value_uint64(elem, &intval); + if (!error && intval > 1) + error = SET_ERROR(EINVAL); + + if (!error && !spa_get_hostid()) + error = SET_ERROR(ENOTSUP); + + break; + case ZPOOL_PROP_BOOTFS: /* * If the pool version is less than SPA_VERSION_BOOTFS, @@ -1360,6 +1371,9 @@ spa_unload(spa_t *spa) spa_config_exit(spa, SCL_ALL, spa); } + if (spa->spa_mmp.mmp_thread) + mmp_thread_stop(spa); + /* * Wait for any outstanding async I/O to complete. */ @@ -2329,6 +2343,205 @@ vdev_count_verify_zaps(vdev_t *vd) return (total); } +/* + * Determine whether the activity check is required. + */ +static boolean_t +spa_activity_check_required(spa_t *spa, uberblock_t *ub, nvlist_t *label, + nvlist_t *config) +{ + uint64_t state = 0; + uint64_t hostid = 0; + uint64_t tryconfig_txg = 0; + uint64_t tryconfig_timestamp = 0; + nvlist_t *nvinfo; + + if (nvlist_exists(config, ZPOOL_CONFIG_LOAD_INFO)) { + nvinfo = fnvlist_lookup_nvlist(config, ZPOOL_CONFIG_LOAD_INFO); + (void) nvlist_lookup_uint64(nvinfo, ZPOOL_CONFIG_MMP_TXG, + &tryconfig_txg); + (void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_TIMESTAMP, + &tryconfig_timestamp); + } + + (void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE, &state); + + /* + * Disable the MMP activity check - This is used by zdb which + * is intended to be used on potentially active pools. + */ + if (spa->spa_import_flags & ZFS_IMPORT_SKIP_MMP) + return (B_FALSE); + + /* + * Skip the activity check when the MMP feature is disabled. + */ + if (ub->ub_mmp_magic == MMP_MAGIC && ub->ub_mmp_delay == 0) + return (B_FALSE); + /* + * If the tryconfig_* values are nonzero, they are the results of an + * earlier tryimport. If they match the uberblock we just found, then + * the pool has not changed and we return false so we do not test a + * second time. + */ + if (tryconfig_txg && tryconfig_txg == ub->ub_txg && + tryconfig_timestamp && tryconfig_timestamp == ub->ub_timestamp) + return (B_FALSE); + + /* + * Allow the activity check to be skipped when importing the pool + * on the same host which last imported it. Since the hostid from + * configuration may be stale use the one read from the label. + */ + if (nvlist_exists(label, ZPOOL_CONFIG_HOSTID)) + hostid = fnvlist_lookup_uint64(label, ZPOOL_CONFIG_HOSTID); + + if (hostid == spa_get_hostid()) + return (B_FALSE); + + /* + * Skip the activity test when the pool was cleanly exported. + */ + if (state != POOL_STATE_ACTIVE) + return (B_FALSE); + + return (B_TRUE); +} + +/* + * Perform the import activity check. If the user canceled the import or + * we detected activity then fail. + */ +static int +spa_activity_check(spa_t *spa, uberblock_t *ub, nvlist_t *config) +{ + uint64_t import_intervals = MAX(zfs_multihost_import_intervals, 1); + uint64_t txg = ub->ub_txg; + uint64_t timestamp = ub->ub_timestamp; + uint64_t import_delay = NANOSEC; + hrtime_t import_expire; + nvlist_t *mmp_label = NULL; + vdev_t *rvd = spa->spa_root_vdev; + kcondvar_t cv; + kmutex_t mtx; + int error = 0; + + cv_init(&cv, NULL, CV_DEFAULT, NULL); + mutex_init(&mtx, NULL, MUTEX_DEFAULT, NULL); + mutex_enter(&mtx); + + /* + * If ZPOOL_CONFIG_MMP_TXG is present an activity check was performed + * during the earlier tryimport. If the txg recorded there is 0 then + * the pool is known to be active on another host. + * + * Otherwise, the pool might be in use on another node. Check for + * changes in the uberblocks on disk if necessary. + */ + if (nvlist_exists(config, ZPOOL_CONFIG_LOAD_INFO)) { + nvlist_t *nvinfo = fnvlist_lookup_nvlist(config, + ZPOOL_CONFIG_LOAD_INFO); + + if (nvlist_exists(nvinfo, ZPOOL_CONFIG_MMP_TXG) && + fnvlist_lookup_uint64(nvinfo, ZPOOL_CONFIG_MMP_TXG) == 0) { + vdev_uberblock_load(rvd, ub, &mmp_label); + error = SET_ERROR(EREMOTEIO); + goto out; + } + } + + /* + * Preferentially use the zfs_multihost_interval from the node which + * last imported the pool. This value is stored in an MMP uberblock as. + * + * ub_mmp_delay * vdev_count_leaves() == zfs_multihost_interval + */ + if (ub->ub_mmp_magic == MMP_MAGIC && ub->ub_mmp_delay) + import_delay = MAX(import_delay, import_intervals * + ub->ub_mmp_delay * MAX(vdev_count_leaves(spa), 1)); + + /* Apply a floor using the local default values. */ + import_delay = MAX(import_delay, import_intervals * + MSEC2NSEC(MAX(zfs_multihost_interval, MMP_MIN_INTERVAL))); + + zfs_dbgmsg("import_delay=%llu ub_mmp_delay=%llu import_intervals=%u " + "leaves=%u", import_delay, ub->ub_mmp_delay, import_intervals, + vdev_count_leaves(spa)); + + /* Add a small random factor in case of simultaneous imports (0-25%) */ + import_expire = gethrtime() + import_delay + + (import_delay * spa_get_random(250) / 1000); + + while (gethrtime() < import_expire) { + vdev_uberblock_load(rvd, ub, &mmp_label); + + if (txg != ub->ub_txg || timestamp != ub->ub_timestamp) { + error = SET_ERROR(EREMOTEIO); + break; + } + + if (mmp_label) { + nvlist_free(mmp_label); + mmp_label = NULL; + } + + error = cv_timedwait_sig(&cv, &mtx, ddi_get_lbolt() + hz); + if (error != -1) { + error = SET_ERROR(EINTR); + break; + } + error = 0; + } + +out: + mutex_exit(&mtx); + mutex_destroy(&mtx); + cv_destroy(&cv); + + /* + * If the pool is determined to be active store the status in the + * spa->spa_load_info nvlist. If the remote hostname or hostid are + * available from configuration read from disk store them as well. + * This allows 'zpool import' to generate a more useful message. + * + * ZPOOL_CONFIG_MMP_STATE - observed pool status (mandatory) + * ZPOOL_CONFIG_MMP_HOSTNAME - hostname from the active pool + * ZPOOL_CONFIG_MMP_HOSTID - hostid from the active pool + */ + if (error == EREMOTEIO) { + char *hostname = "<unknown>"; + uint64_t hostid = 0; + + if (mmp_label) { + if (nvlist_exists(mmp_label, ZPOOL_CONFIG_HOSTNAME)) { + hostname = fnvlist_lookup_string(mmp_label, + ZPOOL_CONFIG_HOSTNAME); + fnvlist_add_string(spa->spa_load_info, + ZPOOL_CONFIG_MMP_HOSTNAME, hostname); + } + + if (nvlist_exists(mmp_label, ZPOOL_CONFIG_HOSTID)) { + hostid = fnvlist_lookup_uint64(mmp_label, + ZPOOL_CONFIG_HOSTID); + fnvlist_add_uint64(spa->spa_load_info, + ZPOOL_CONFIG_MMP_HOSTID, hostid); + } + } + + fnvlist_add_uint64(spa->spa_load_info, + ZPOOL_CONFIG_MMP_STATE, MMP_STATE_ACTIVE); + fnvlist_add_uint64(spa->spa_load_info, + ZPOOL_CONFIG_MMP_TXG, 0); + + error = spa_vdev_err(rvd, VDEV_AUX_ACTIVE, EREMOTEIO); + } + + if (mmp_label) + nvlist_free(mmp_label); + + return (error); +} + static int spa_verify_host(spa_t *spa, nvlist_t *mos_config) { @@ -2579,6 +2792,7 @@ spa_ld_select_uberblock(spa_t *spa, spa_import_type_t type) vdev_t *rvd = spa->spa_root_vdev; nvlist_t *label; uberblock_t *ub = &spa->spa_uberblock; + boolean_t activity_check = B_FALSE; /* * If we are opening the checkpointed state of the pool by @@ -2621,6 +2835,34 @@ spa_ld_select_uberblock(spa_t *spa, spa_import_type_t type) (u_longlong_t)ub->ub_txg); /* + * For pools which have the multihost property on determine if the + * pool is truly inactive and can be safely imported. Prevent + * hosts which don't have a hostid set from importing the pool. + */ + activity_check = spa_activity_check_required(spa, ub, label, + spa->spa_config); + if (activity_check) { + if (ub->ub_mmp_magic == MMP_MAGIC && ub->ub_mmp_delay && + spa_get_hostid() == 0) { + nvlist_free(label); + fnvlist_add_uint64(spa->spa_load_info, + ZPOOL_CONFIG_MMP_STATE, MMP_STATE_NO_HOSTID); + return (spa_vdev_err(rvd, VDEV_AUX_ACTIVE, EREMOTEIO)); + } + + int error = spa_activity_check(spa, ub, spa->spa_config); + if (error) { + nvlist_free(label); + return (error); + } + + fnvlist_add_uint64(spa->spa_load_info, + ZPOOL_CONFIG_MMP_STATE, MMP_STATE_INACTIVE); + fnvlist_add_uint64(spa->spa_load_info, + ZPOOL_CONFIG_MMP_TXG, ub->ub_txg); + } + + /* * If the pool has an unsupported version we can't open it. */ if (!SPA_VERSION_IS_SUPPORTED(ub->ub_version)) { @@ -3175,6 +3417,7 @@ spa_ld_get_props(spa_t *spa) spa_prop_find(spa, ZPOOL_PROP_DELEGATION, &spa->spa_delegation); spa_prop_find(spa, ZPOOL_PROP_FAILUREMODE, &spa->spa_failmode); spa_prop_find(spa, ZPOOL_PROP_AUTOEXPAND, &spa->spa_autoexpand); + spa_prop_find(spa, ZPOOL_PROP_MULTIHOST, &spa->spa_multihost); spa_prop_find(spa, ZPOOL_PROP_DEDUPDITTO, &spa->spa_dedup_ditto); @@ -3263,6 +3506,18 @@ spa_ld_load_vdev_metadata(spa_t *spa) vdev_t *rvd = spa->spa_root_vdev; /* + * If the 'multihost' property is set, then never allow a pool to + * be imported when the system hostid is zero. The exception to + * this rule is zdb which is always allowed to access pools. + */ + if (spa_multihost(spa) && spa_get_hostid() == 0 && + (spa->spa_import_flags & ZFS_IMPORT_SKIP_MMP) == 0) { + fnvlist_add_uint64(spa->spa_load_info, + ZPOOL_CONFIG_MMP_STATE, MMP_STATE_NO_HOSTID); + return (spa_vdev_err(rvd, VDEV_AUX_ACTIVE, EREMOTEIO)); + } + + /* * If the 'autoreplace' property is set, then post a resource notifying * the ZFS DE that it should not issue any faults for unopenable * devices. We also iterate over the vdevs, and post a sysevent for any @@ -3862,6 +4117,7 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, char **ereport) */ spa->spa_sync_on = B_TRUE; txg_sync_start(spa->spa_dsl_pool); + mmp_thread_start(spa); /* * Wait for all claims to sync. We sync up to the highest @@ -4385,10 +4641,14 @@ spa_get_stats(const char *name, nvlist_t **config, ZPOOL_CONFIG_ERRCOUNT, spa_get_errlog_size(spa)) == 0); - if (spa_suspended(spa)) + if (spa_suspended(spa)) { VERIFY(nvlist_add_uint64(*config, ZPOOL_CONFIG_SUSPENDED, spa->spa_failmode) == 0); + VERIFY(nvlist_add_uint64(*config, + ZPOOL_CONFIG_SUSPENDED_REASON, + spa->spa_suspended) == 0); + } spa_add_spares(spa, *config); spa_add_l2cache(spa, *config); @@ -4475,18 +4735,6 @@ spa_validate_aux_devs(spa_t *spa, nvlist_t *nvroot, uint64_t crtxg, int mode, goto out; } - /* - * The L2ARC currently only supports disk devices in - * kernel context. For user-level testing, we allow it. - */ -#ifdef _KERNEL - if ((strcmp(config, ZPOOL_CONFIG_L2CACHE) == 0) && - strcmp(vd->vdev_ops->vdev_op_type, VDEV_TYPE_DISK) != 0) { - error = SET_ERROR(ENOTBLK); - vdev_free(vd); - goto out; - } -#endif vd->vdev_top = vd; if ((error = vdev_open(vd)) == 0 && @@ -4831,6 +5079,7 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props, spa->spa_delegation = zpool_prop_default_numeric(ZPOOL_PROP_DELEGATION); spa->spa_failmode = zpool_prop_default_numeric(ZPOOL_PROP_FAILUREMODE); spa->spa_autoexpand = zpool_prop_default_numeric(ZPOOL_PROP_AUTOEXPAND); + spa->spa_multihost = zpool_prop_default_numeric(ZPOOL_PROP_MULTIHOST); if (props != NULL) { spa_configfile_set(spa, props, B_FALSE); @@ -4841,6 +5090,7 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props, spa->spa_sync_on = B_TRUE; txg_sync_start(spa->spa_dsl_pool); + mmp_thread_start(spa); /* * We explicitly wait for the first transaction to complete so that our @@ -7429,6 +7679,9 @@ spa_sync_props(void *arg, dmu_tx_t *tx) spa_async_request(spa, SPA_ASYNC_AUTOEXPAND); break; + case ZPOOL_PROP_MULTIHOST: + spa->spa_multihost = intval; + break; case ZPOOL_PROP_DEDUPDITTO: spa->spa_dedup_ditto = intval; break; @@ -7830,7 +8083,7 @@ spa_sync(spa_t *spa, uint64_t txg) if (error == 0) break; - zio_suspend(spa, NULL); + zio_suspend(spa, NULL, ZIO_SUSPEND_IOERR); zio_resume_wait(spa); } dmu_tx_commit(tx); diff --git a/usr/src/uts/common/fs/zfs/spa_config.c b/usr/src/uts/common/fs/zfs/spa_config.c index ad61dd0723..e01260f312 100644 --- a/usr/src/uts/common/fs/zfs/spa_config.c +++ b/usr/src/uts/common/fs/zfs/spa_config.c @@ -413,8 +413,7 @@ spa_config_generate(spa_t *spa, vdev_t *vd, uint64_t txg, int getstats) spa->spa_comment); } - hostid = zone_get_hostid(NULL); - + hostid = spa_get_hostid(); if (hostid != 0) { fnvlist_add_uint64(config, ZPOOL_CONFIG_HOSTID, hostid); } diff --git a/usr/src/uts/common/fs/zfs/spa_misc.c b/usr/src/uts/common/fs/zfs/spa_misc.c index 5acca63b01..d27514fd69 100644 --- a/usr/src/uts/common/fs/zfs/spa_misc.c +++ b/usr/src/uts/common/fs/zfs/spa_misc.c @@ -719,6 +719,9 @@ spa_add(const char *name, nvlist_t *config, const char *altroot) spa->spa_feat_refcount_cache[i] = SPA_FEATURE_DISABLED; } + list_create(&spa->spa_leaf_list, sizeof (vdev_t), + offsetof(vdev_t, vdev_leaf_node)); + return (spa); } @@ -763,6 +766,7 @@ spa_remove(spa_t *spa) sizeof (avl_tree_t)); list_destroy(&spa->spa_config_list); + list_destroy(&spa->spa_leaf_list); nvlist_free(spa->spa_label_features); nvlist_free(spa->spa_load_info); @@ -1443,6 +1447,9 @@ spa_get_random(uint64_t range) ASSERT(range != 0); + if (range == 1) + return (0); + (void) random_get_pseudo_bytes((void *)&r, sizeof (uint64_t)); return (r % range); @@ -1772,7 +1779,7 @@ spa_get_failmode(spa_t *spa) boolean_t spa_suspended(spa_t *spa) { - return (spa->spa_suspended); + return (spa->spa_suspended != ZIO_SUSPEND_NONE); } uint64_t @@ -2150,6 +2157,30 @@ spa_maxdnodesize(spa_t *spa) return (DNODE_MIN_SIZE); } +boolean_t +spa_multihost(spa_t *spa) +{ + return (spa->spa_multihost ? B_TRUE : B_FALSE); +} + +unsigned long +spa_get_hostid(void) +{ + unsigned long myhostid; + +#ifdef _KERNEL + myhostid = zone_get_hostid(NULL); +#else /* _KERNEL */ + /* + * We're emulating the system's hostid in userland, so + * we can't use zone_get_hostid(). + */ + (void) ddi_strtoul(hw_serial, NULL, 10, &myhostid); +#endif /* _KERNEL */ + + return (myhostid); +} + /* * Returns the txg that the last device removal completed. No indirect mappings * have been added since this txg. diff --git a/usr/src/uts/common/fs/zfs/sys/dsl_pool.h b/usr/src/uts/common/fs/zfs/sys/dsl_pool.h index c79c5bf735..b23d19eef5 100644 --- a/usr/src/uts/common/fs/zfs/sys/dsl_pool.h +++ b/usr/src/uts/common/fs/zfs/sys/dsl_pool.h @@ -39,6 +39,7 @@ #include <sys/bptree.h> #include <sys/rrwlock.h> #include <sys/dsl_synctask.h> +#include <sys/mmp.h> #ifdef __cplusplus extern "C" { diff --git a/usr/src/uts/common/fs/zfs/sys/mmp.h b/usr/src/uts/common/fs/zfs/sys/mmp.h new file mode 100644 index 0000000000..edb0d43470 --- /dev/null +++ b/usr/src/uts/common/fs/zfs/sys/mmp.h @@ -0,0 +1,68 @@ +/* + * CDDL HEADER START + * + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + * + * CDDL HEADER END + */ +/* + * Copyright (C) 2017 by Lawrence Livermore National Security, LLC. + */ + +#ifndef _SYS_MMP_H +#define _SYS_MMP_H + +#include <sys/spa.h> +#include <sys/zfs_context.h> +#include <sys/uberblock_impl.h> + +#ifdef __cplusplus +extern "C" { +#endif + +#define MMP_MIN_INTERVAL 100 /* ms */ +#define MMP_DEFAULT_INTERVAL 1000 /* ms */ +#define MMP_DEFAULT_IMPORT_INTERVALS 10 +#define MMP_DEFAULT_FAIL_INTERVALS 5 + +typedef struct mmp_thread { + kmutex_t mmp_thread_lock; /* protect thread mgmt fields */ + kcondvar_t mmp_thread_cv; + kthread_t *mmp_thread; + uint8_t mmp_thread_exiting; + kmutex_t mmp_io_lock; /* protect below */ + hrtime_t mmp_last_write; /* last successful MMP write */ + uint64_t mmp_delay; /* decaying avg ns between MMP writes */ + uberblock_t mmp_ub; /* last ub written by sync */ + zio_t *mmp_zio_root; /* root of mmp write zios */ + uint64_t mmp_kstat_id; /* unique id for next MMP write kstat */ + int mmp_skip_error; /* reason for last skipped write */ + vdev_t *mmp_last_leaf; /* last mmp write sent here */ + uint64_t mmp_leaf_last_gen; /* last mmp write sent here */ +} mmp_thread_t; + + +extern void mmp_init(struct spa *spa); +extern void mmp_fini(struct spa *spa); +extern void mmp_thread_start(struct spa *spa); +extern void mmp_thread_stop(struct spa *spa); +extern void mmp_update_uberblock(struct spa *spa, struct uberblock *ub); +extern void mmp_signal_all_threads(void); + +/* Global tuning */ +extern ulong_t zfs_multihost_interval; +extern uint_t zfs_multihost_fail_intervals; +extern uint_t zfs_multihost_import_intervals; + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_MMP_H */ diff --git a/usr/src/uts/common/fs/zfs/sys/spa.h b/usr/src/uts/common/fs/zfs/sys/spa.h index f044b994d0..a7a0341a3e 100644 --- a/usr/src/uts/common/fs/zfs/sys/spa.h +++ b/usr/src/uts/common/fs/zfs/sys/spa.h @@ -847,6 +847,8 @@ extern boolean_t spa_writeable(spa_t *spa); extern boolean_t spa_has_pending_synctask(spa_t *spa); extern int spa_maxblocksize(spa_t *spa); extern int spa_maxdnodesize(spa_t *spa); +extern boolean_t spa_multihost(spa_t *spa); +extern unsigned long spa_get_hostid(void); extern boolean_t spa_has_checkpoint(spa_t *spa); extern boolean_t spa_importing_readonly_checkpoint(spa_t *spa); extern boolean_t spa_suspend_async_destroy(spa_t *spa); diff --git a/usr/src/uts/common/fs/zfs/sys/spa_impl.h b/usr/src/uts/common/fs/zfs/sys/spa_impl.h index 473237a90f..c253bbe103 100644 --- a/usr/src/uts/common/fs/zfs/sys/spa_impl.h +++ b/usr/src/uts/common/fs/zfs/sys/spa_impl.h @@ -328,7 +328,7 @@ struct spa { zio_t *spa_txg_zio[TXG_SIZE]; /* spa_sync() waits for this */ kmutex_t spa_suspend_lock; /* protects suspend_zio_root */ kcondvar_t spa_suspend_cv; /* notification of resume */ - uint8_t spa_suspended; /* pool is suspended */ + zio_suspend_reason_t spa_suspended; /* pool is suspended */ uint8_t spa_claiming; /* pool is doing zil_claim() */ boolean_t spa_is_root; /* pool is root */ int spa_minref; /* num refs when first opened */ @@ -381,6 +381,11 @@ struct spa { hrtime_t spa_ccw_fail_time; /* Conf cache write fail time */ + uint64_t spa_multihost; /* multihost aware (mmp) */ + mmp_thread_t spa_mmp; /* multihost mmp thread */ + list_t spa_leaf_list; /* list of leaf vdevs */ + uint64_t spa_leaf_list_gen; /* track leaf_list changes */ + /* * spa_refcount & spa_config_lock must be the last elements * because refcount_t changes size based on compilation options. diff --git a/usr/src/uts/common/fs/zfs/sys/uberblock.h b/usr/src/uts/common/fs/zfs/sys/uberblock.h index 21e7ae0de7..044e438387 100644 --- a/usr/src/uts/common/fs/zfs/sys/uberblock.h +++ b/usr/src/uts/common/fs/zfs/sys/uberblock.h @@ -40,7 +40,8 @@ extern "C" { typedef struct uberblock uberblock_t; extern int uberblock_verify(uberblock_t *); -extern boolean_t uberblock_update(uberblock_t *, vdev_t *, uint64_t); +extern boolean_t uberblock_update(uberblock_t *ub, vdev_t *rvd, uint64_t txg, + uint64_t mmp_delay); #ifdef __cplusplus } diff --git a/usr/src/uts/common/fs/zfs/sys/uberblock_impl.h b/usr/src/uts/common/fs/zfs/sys/uberblock_impl.h index 9a3684577d..e649a4ccda 100644 --- a/usr/src/uts/common/fs/zfs/sys/uberblock_impl.h +++ b/usr/src/uts/common/fs/zfs/sys/uberblock_impl.h @@ -44,6 +44,7 @@ extern "C" { */ #define UBERBLOCK_MAGIC 0x00bab10c /* oo-ba-bloc! */ #define UBERBLOCK_SHIFT 10 /* up to 1K */ +#define MMP_MAGIC 0xa11cea11 /* all-see-all */ struct uberblock { uint64_t ub_magic; /* UBERBLOCK_MAGIC */ @@ -56,7 +57,7 @@ struct uberblock { /* highest SPA_VERSION supported by software that wrote this txg */ uint64_t ub_software_version; - /* These fields are reserved for features that are under development: */ + /* Maybe missing in uberblocks we read, but always written */ uint64_t ub_mmp_magic; uint64_t ub_mmp_delay; uint64_t ub_mmp_seq; diff --git a/usr/src/uts/common/fs/zfs/sys/vdev.h b/usr/src/uts/common/fs/zfs/sys/vdev.h index 688af34ccd..b45f0a2ca9 100644 --- a/usr/src/uts/common/fs/zfs/sys/vdev.h +++ b/usr/src/uts/common/fs/zfs/sys/vdev.h @@ -161,6 +161,8 @@ extern uint64_t vdev_label_offset(uint64_t psize, int l, uint64_t offset); extern int vdev_label_number(uint64_t psise, uint64_t offset); extern nvlist_t *vdev_label_read_config(vdev_t *vd, uint64_t txg); extern void vdev_uberblock_load(vdev_t *, struct uberblock *, nvlist_t **); +extern void vdev_label_write(zio_t *zio, vdev_t *vd, int l, abd_t *buf, uint64_t + offset, uint64_t size, zio_done_func_t *done, void *private, int flags); typedef enum { VDEV_LABEL_CREATE, /* create/add a new device */ diff --git a/usr/src/uts/common/fs/zfs/sys/vdev_impl.h b/usr/src/uts/common/fs/zfs/sys/vdev_impl.h index 2c5dee00e2..5a3fdefb92 100644 --- a/usr/src/uts/common/fs/zfs/sys/vdev_impl.h +++ b/usr/src/uts/common/fs/zfs/sys/vdev_impl.h @@ -243,7 +243,7 @@ struct vdev { /* pool checkpoint related */ space_map_t *vdev_checkpoint_sm; /* contains reserved blocks */ - + boolean_t vdev_initialize_exit_wanted; vdev_initializing_state_t vdev_initialize_state; kthread_t *vdev_initialize_thread; @@ -344,6 +344,9 @@ struct vdev { zio_t *vdev_probe_zio; /* root of current probe */ vdev_aux_t vdev_label_aux; /* on-disk aux state */ uint64_t vdev_leaf_zap; + hrtime_t vdev_mmp_pending; /* 0 if write finished */ + uint64_t vdev_mmp_kstat_id; /* to find kstat entry */ + list_node_t vdev_leaf_node; /* leaf vdev list */ /* * For DTrace to work in userland (libzpool) context, these fields must @@ -365,6 +368,12 @@ struct vdev { #define VDEV_PHYS_SIZE (112 << 10) #define VDEV_UBERBLOCK_RING (128 << 10) +/* + * MMP blocks occupy the last MMP_BLOCKS_PER_LABEL slots in the uberblock + * ring when MMP is enabled. + */ +#define MMP_BLOCKS_PER_LABEL 1 + /* The largest uberblock we support is 8k. */ #define MAX_UBERBLOCK_SHIFT (13) #define VDEV_UBERBLOCK_SHIFT(vd) \ diff --git a/usr/src/uts/common/fs/zfs/sys/zio.h b/usr/src/uts/common/fs/zfs/sys/zio.h index f702cf1337..ead74f9cb7 100644 --- a/usr/src/uts/common/fs/zfs/sys/zio.h +++ b/usr/src/uts/common/fs/zfs/sys/zio.h @@ -138,6 +138,12 @@ enum zio_checksum { #define ZIO_FAILURE_MODE_CONTINUE 1 #define ZIO_FAILURE_MODE_PANIC 2 +typedef enum zio_suspend_reason { + ZIO_SUSPEND_NONE = 0, + ZIO_SUSPEND_IOERR, + ZIO_SUSPEND_MMP, +} zio_suspend_reason_t; + enum zio_flag { /* * Flags inherited by gang, ddt, and vdev children, @@ -224,7 +230,7 @@ enum zio_child { #define ZIO_CHILD_DDT_BIT ZIO_CHILD_BIT(ZIO_CHILD_DDT) #define ZIO_CHILD_LOGICAL_BIT ZIO_CHILD_BIT(ZIO_CHILD_LOGICAL) #define ZIO_CHILD_ALL_BITS \ - (ZIO_CHILD_VDEV_BIT | ZIO_CHILD_GANG_BIT | \ + (ZIO_CHILD_VDEV_BIT | ZIO_CHILD_GANG_BIT | \ ZIO_CHILD_DDT_BIT | ZIO_CHILD_LOGICAL_BIT) enum zio_wait_type { @@ -442,7 +448,7 @@ struct zio { avl_node_t io_queue_node; avl_node_t io_offset_node; avl_node_t io_alloc_node; - zio_alloc_list_t io_alloc_list; + zio_alloc_list_t io_alloc_list; /* Internal pipeline state */ enum zio_flag io_flags; @@ -575,7 +581,7 @@ extern enum zio_checksum zio_checksum_dedup_select(spa_t *spa, extern enum zio_compress zio_compress_select(spa_t *spa, enum zio_compress child, enum zio_compress parent); -extern void zio_suspend(spa_t *spa, zio_t *zio); +extern void zio_suspend(spa_t *spa, zio_t *zio, zio_suspend_reason_t); extern int zio_resume(spa_t *spa); extern void zio_resume_wait(spa_t *spa); diff --git a/usr/src/uts/common/fs/zfs/uberblock.c b/usr/src/uts/common/fs/zfs/uberblock.c index 8b198469e1..3b85260764 100644 --- a/usr/src/uts/common/fs/zfs/uberblock.c +++ b/usr/src/uts/common/fs/zfs/uberblock.c @@ -44,7 +44,7 @@ uberblock_verify(uberblock_t *ub) * transaction group. */ boolean_t -uberblock_update(uberblock_t *ub, vdev_t *rvd, uint64_t txg) +uberblock_update(uberblock_t *ub, vdev_t *rvd, uint64_t txg, uint64_t mmp_delay) { ASSERT(ub->ub_txg < txg); @@ -57,6 +57,9 @@ uberblock_update(uberblock_t *ub, vdev_t *rvd, uint64_t txg) ub->ub_guid_sum = rvd->vdev_guid_sum; ub->ub_timestamp = gethrestime_sec(); ub->ub_software_version = SPA_VERSION; + ub->ub_mmp_magic = MMP_MAGIC; + ub->ub_mmp_delay = spa_multihost(rvd->vdev_spa) ? mmp_delay : 0; + ub->ub_mmp_seq = 0; ub->ub_checkpoint_txg = 0; return (ub->ub_rootbp.blk_birth == txg); diff --git a/usr/src/uts/common/fs/zfs/vdev.c b/usr/src/uts/common/fs/zfs/vdev.c index f5c3ff5d77..1c4f041072 100644 --- a/usr/src/uts/common/fs/zfs/vdev.c +++ b/usr/src/uts/common/fs/zfs/vdev.c @@ -353,6 +353,11 @@ vdev_add_child(vdev_t *pvd, vdev_t *cvd) */ for (; pvd != NULL; pvd = pvd->vdev_parent) pvd->vdev_guid_sum += cvd->vdev_guid_sum; + + if (cvd->vdev_ops->vdev_op_leaf) { + list_insert_head(&cvd->vdev_spa->spa_leaf_list, cvd); + cvd->vdev_spa->spa_leaf_list_gen++; + } } void @@ -382,6 +387,12 @@ vdev_remove_child(vdev_t *pvd, vdev_t *cvd) pvd->vdev_children = 0; } + if (cvd->vdev_ops->vdev_op_leaf) { + spa_t *spa = cvd->vdev_spa; + list_remove(&spa->spa_leaf_list, cvd); + spa->spa_leaf_list_gen++; + } + /* * Walk up all ancestors to update guid sum. */ @@ -466,6 +477,7 @@ vdev_alloc_common(spa_t *spa, uint_t id, uint64_t guid, vdev_ops_t *ops) mutex_init(&vd->vdev_obsolete_lock, NULL, MUTEX_DEFAULT, NULL); vd->vdev_obsolete_segments = range_tree_create(NULL, NULL); + list_link_init(&vd->vdev_leaf_node); mutex_init(&vd->vdev_dtl_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&vd->vdev_stat_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&vd->vdev_probe_lock, NULL, MUTEX_DEFAULT, NULL); @@ -786,6 +798,7 @@ vdev_free(vdev_t *vd) vdev_remove_child(vd->vdev_parent, vd); ASSERT(vd->vdev_parent == NULL); + ASSERT(!list_link_active(&vd->vdev_leaf_node)); /* * Clean up vdev structure. diff --git a/usr/src/uts/common/fs/zfs/vdev_label.c b/usr/src/uts/common/fs/zfs/vdev_label.c index 8d5f17c15f..17553607a5 100644 --- a/usr/src/uts/common/fs/zfs/vdev_label.c +++ b/usr/src/uts/common/fs/zfs/vdev_label.c @@ -22,6 +22,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2012, 2018 by Delphix. All rights reserved. + * Copyright 2019 Joyent, Inc. */ /* @@ -193,14 +194,21 @@ vdev_label_read(zio_t *zio, vdev_t *vd, int l, abd_t *buf, uint64_t offset, ZIO_PRIORITY_SYNC_READ, flags, B_TRUE)); } -static void +void vdev_label_write(zio_t *zio, vdev_t *vd, int l, abd_t *buf, uint64_t offset, uint64_t size, zio_done_func_t *done, void *private, int flags) { +#ifdef _KERNEL + /* + * This assert is invalid in the user-level ztest MMP code because + * the ztest thread is not in dsl_pool_sync_context. ZoL does not + * build the user-level code with DEBUG so this is not an issue there. + */ ASSERT(spa_config_held(zio->io_spa, SCL_ALL, RW_WRITER) == SCL_ALL || (spa_config_held(zio->io_spa, SCL_CONFIG | SCL_STATE, RW_READER) == (SCL_CONFIG | SCL_STATE) && dsl_pool_sync_context(spa_get_dsl(zio->io_spa)))); +#endif ASSERT(flags & ZIO_FLAG_CONFIG_WRITER); zio_nowait(zio_write_phys(zio, vd, @@ -1142,7 +1150,8 @@ vdev_uberblock_sync(zio_t *zio, uint64_t *good_writes, if (!vdev_writeable(vd)) return; - int n = ub->ub_txg & (VDEV_UBERBLOCK_COUNT(vd) - 1); + int m = spa_multihost(vd->vdev_spa) ? MMP_BLOCKS_PER_LABEL : 0; + int n = ub->ub_txg % (VDEV_UBERBLOCK_COUNT(vd) - m); /* Copy the uberblock_t into the ABD */ abd_t *ub_abd = abd_alloc_for_io(VDEV_UBERBLOCK_SIZE(vd), B_TRUE); @@ -1360,10 +1369,13 @@ retry: * and the vdev configuration hasn't changed, * then there's nothing to do. */ - if (ub->ub_txg < txg && - uberblock_update(ub, spa->spa_root_vdev, txg) == B_FALSE && - list_is_empty(&spa->spa_config_dirty_list)) - return (0); + if (ub->ub_txg < txg) { + boolean_t changed = uberblock_update(ub, spa->spa_root_vdev, + txg, spa->spa_mmp.mmp_delay); + + if (!changed && list_is_empty(&spa->spa_config_dirty_list)) + return (0); + } if (txg > spa_freeze_txg(spa)) return (0); @@ -1426,6 +1438,9 @@ retry: goto retry; } + if (spa_multihost(spa)) + mmp_update_uberblock(spa, ub); + /* * Sync out odd labels for every dirty vdev. If the system dies * in the middle of this process, the even labels and the new diff --git a/usr/src/uts/common/fs/zfs/zfs_ioctl.c b/usr/src/uts/common/fs/zfs/zfs_ioctl.c index 22ed061953..fb9ae56ae6 100644 --- a/usr/src/uts/common/fs/zfs/zfs_ioctl.c +++ b/usr/src/uts/common/fs/zfs/zfs_ioctl.c @@ -4885,6 +4885,13 @@ zfs_ioc_clear(zfs_cmd_t *zc) if (error != 0) return (error); + /* + * If multihost is enabled, resuming I/O is unsafe as another + * host may have imported the pool. + */ + if (spa_multihost(spa) && spa_suspended(spa)) + return (SET_ERROR(EINVAL)); + spa_vdev_state_enter(spa, SCL_NONE); if (zc->zc_guid == 0) { diff --git a/usr/src/uts/common/fs/zfs/zio.c b/usr/src/uts/common/fs/zfs/zio.c index 1e725f355a..6897fe8a9b 100644 --- a/usr/src/uts/common/fs/zfs/zio.c +++ b/usr/src/uts/common/fs/zfs/zio.c @@ -1784,7 +1784,7 @@ zio_reexecute(zio_t *pio) } void -zio_suspend(spa_t *spa, zio_t *zio) +zio_suspend(spa_t *spa, zio_t *zio, zio_suspend_reason_t reason) { if (spa_get_failmode(spa) == ZIO_FAILURE_MODE_PANIC) fm_panic("Pool '%s' has encountered an uncorrectable I/O " @@ -1800,7 +1800,7 @@ zio_suspend(spa_t *spa, zio_t *zio) ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE | ZIO_FLAG_GODFATHER); - spa->spa_suspended = B_TRUE; + spa->spa_suspended = reason; if (zio != NULL) { ASSERT(!(zio->io_flags & ZIO_FLAG_GODFATHER)); @@ -1823,7 +1823,7 @@ zio_resume(spa_t *spa) * Reexecute all previously suspended i/o. */ mutex_enter(&spa->spa_suspend_lock); - spa->spa_suspended = B_FALSE; + spa->spa_suspended = ZIO_SUSPEND_NONE; cv_broadcast(&spa->spa_suspend_cv); pio = spa->spa_suspend_zio_root; spa->spa_suspend_zio_root = NULL; @@ -3907,7 +3907,7 @@ zio_done(zio_t *zio) * We'd fail again if we reexecuted now, so suspend * until conditions improve (e.g. device comes online). */ - zio_suspend(spa, zio); + zio_suspend(zio->io_spa, zio, ZIO_SUSPEND_IOERR); } else { /* * Reexecution is potentially a huge amount of work. diff --git a/usr/src/uts/common/sys/fs/zfs.h b/usr/src/uts/common/sys/fs/zfs.h index 0728f42212..cf7c466314 100644 --- a/usr/src/uts/common/sys/fs/zfs.h +++ b/usr/src/uts/common/sys/fs/zfs.h @@ -213,6 +213,7 @@ typedef enum { ZPOOL_PROP_CHECKPOINT, ZPOOL_PROP_TNAME, ZPOOL_PROP_MAXDNODESIZE, + ZPOOL_PROP_MULTIHOST, ZPOOL_NUM_PROPS } zpool_prop_t; @@ -580,6 +581,7 @@ typedef struct zpool_load_policy { #define ZPOOL_CONFIG_RESILVER_TXG "resilver_txg" #define ZPOOL_CONFIG_COMMENT "comment" #define ZPOOL_CONFIG_SUSPENDED "suspended" /* not stored on disk */ +#define ZPOOL_CONFIG_SUSPENDED_REASON "suspended_reason" /* not stored */ #define ZPOOL_CONFIG_TIMESTAMP "timestamp" /* not stored on disk */ #define ZPOOL_CONFIG_BOOTFS "bootfs" /* not stored on disk */ #define ZPOOL_CONFIG_MISSING_DEVICES "missing_vdevs" /* not stored on disk */ @@ -594,6 +596,10 @@ typedef struct zpool_load_policy { #define ZPOOL_CONFIG_VDEV_LEAF_ZAP "com.delphix:vdev_zap_leaf" #define ZPOOL_CONFIG_HAS_PER_VDEV_ZAPS "com.delphix:has_per_vdev_zaps" #define ZPOOL_CONFIG_CACHEFILE "cachefile" /* not stored on disk */ +#define ZPOOL_CONFIG_MMP_STATE "mmp_state" /* not stored on disk */ +#define ZPOOL_CONFIG_MMP_TXG "mmp_txg" /* not stored on disk */ +#define ZPOOL_CONFIG_MMP_HOSTNAME "mmp_hostname" /* not stored on disk */ +#define ZPOOL_CONFIG_MMP_HOSTID "mmp_hostid" /* not stored on disk */ /* * The persistent vdev state is stored as separate values rather than a single * 'vdev_state' entry. This is because a device can be in multiple states, such @@ -704,6 +710,7 @@ typedef enum vdev_aux { VDEV_AUX_BAD_LOG, /* cannot read log chain(s) */ VDEV_AUX_EXTERNAL, /* external diagnosis */ VDEV_AUX_SPLIT_POOL, /* vdev was split off into another pool */ + VDEV_AUX_ACTIVE, /* vdev active on a different host */ VDEV_AUX_CHILDREN_OFFLINE /* all children are offline */ } vdev_aux_t; @@ -725,6 +732,16 @@ typedef enum pool_state { } pool_state_t; /* + * mmp state. The following states provide additional detail describing + * why a pool couldn't be safely imported. + */ +typedef enum mmp_state { + MMP_STATE_ACTIVE = 0, /* In active use */ + MMP_STATE_INACTIVE, /* Inactive and safe to import */ + MMP_STATE_NO_HOSTID /* System hostid is not set */ +} mmp_state_t; + +/* * Scan Functions. */ typedef enum pool_scan_func { @@ -1076,6 +1093,7 @@ typedef enum { #define ZFS_IMPORT_ONLY 0x8 #define ZFS_IMPORT_CHECKPOINT 0x10 #define ZFS_IMPORT_TEMP_NAME 0x20 +#define ZFS_IMPORT_SKIP_MMP 0x40 /* * Channel program argument/return nvlist keys and defaults. diff --git a/usr/src/uts/i86pc/io/pcplusmp/apic_common.c b/usr/src/uts/i86pc/io/pcplusmp/apic_common.c index b1a0b9c3d0..3e5eb92067 100644 --- a/usr/src/uts/i86pc/io/pcplusmp/apic_common.c +++ b/usr/src/uts/i86pc/io/pcplusmp/apic_common.c @@ -808,7 +808,7 @@ gethrtime_again: void apic_nmi_intr(caddr_t arg, struct regs *rp) { - int action = nmi_action; + nmi_action_t action = nmi_action; if (apic_shutdown_processors) { apic_disable_local_apic(); diff --git a/usr/src/uts/i86pc/os/mlsetup.c b/usr/src/uts/i86pc/os/mlsetup.c index 94d4695141..1f9149d5c4 100644 --- a/usr/src/uts/i86pc/os/mlsetup.c +++ b/usr/src/uts/i86pc/os/mlsetup.c @@ -81,7 +81,7 @@ extern uint32_t cpuid_feature_ecx_exclude; extern uint32_t cpuid_feature_edx_include; extern uint32_t cpuid_feature_edx_exclude; -int nmi_action = NMI_ACTION_UNSET; +nmi_action_t nmi_action = NMI_ACTION_UNSET; /* * Set console mode diff --git a/usr/src/uts/i86pc/sys/apic_common.h b/usr/src/uts/i86pc/sys/apic_common.h index 58b9bb93a6..eeee3c8a6a 100644 --- a/usr/src/uts/i86pc/sys/apic_common.h +++ b/usr/src/uts/i86pc/sys/apic_common.h @@ -201,12 +201,14 @@ extern int apic_msix_enable; extern uint32_t apic_get_localapicid(uint32_t cpuid); extern uchar_t apic_get_ioapicid(uchar_t ioapicindex); -#define NMI_ACTION_UNSET (0) -#define NMI_ACTION_PANIC (1) -#define NMI_ACTION_IGNORE (2) -#define NMI_ACTION_KMDB (3) - -extern int nmi_action; +typedef enum nmi_action { + NMI_ACTION_UNSET, + NMI_ACTION_PANIC, + NMI_ACTION_IGNORE, + NMI_ACTION_KMDB +} nmi_action_t; + +extern nmi_action_t nmi_action; #ifdef __cplusplus } |