diff options
author | Mark J Musante <Mark.Musante@Sun.COM> | 2010-08-06 13:53:14 -0600 |
---|---|---|
committer | Mark J Musante <Mark.Musante@Sun.COM> | 2010-08-06 13:53:14 -0600 |
commit | cb04b8739c50e3e6d12e89b790fa7b8d0d899865 (patch) | |
tree | c421a5b9f9c5c255e0a88f0c0dabeb04ffc65a2a | |
parent | 35f59e50e9ef37e6f1bd3b018289d678d5551cab (diff) | |
download | illumos-gate-cb04b8739c50e3e6d12e89b790fa7b8d0d899865.tar.gz |
6782540 zpool cannot replace a replacing device
-rw-r--r-- | usr/src/cmd/zinject/zinject.c | 24 | ||||
-rw-r--r-- | usr/src/cmd/zpool/zpool_main.c | 1 | ||||
-rw-r--r-- | usr/src/grub/capability | 2 | ||||
-rw-r--r-- | usr/src/grub/grub-0.97/stage2/zfs-include/zfs.h | 2 | ||||
-rw-r--r-- | usr/src/lib/libzfs/common/libzfs_pool.c | 76 | ||||
-rw-r--r-- | usr/src/uts/common/fs/zfs/spa.c | 160 | ||||
-rw-r--r-- | usr/src/uts/common/fs/zfs/sys/vdev_impl.h | 1 | ||||
-rw-r--r-- | usr/src/uts/common/fs/zfs/vdev.c | 17 | ||||
-rw-r--r-- | usr/src/uts/common/fs/zfs/vdev_label.c | 3 | ||||
-rw-r--r-- | usr/src/uts/common/sys/fs/zfs.h | 8 |
10 files changed, 179 insertions, 115 deletions
diff --git a/usr/src/cmd/zinject/zinject.c b/usr/src/cmd/zinject/zinject.c index ab04e422a9..60c53ceb3f 100644 --- a/usr/src/cmd/zinject/zinject.c +++ b/usr/src/cmd/zinject/zinject.c @@ -233,7 +233,7 @@ usage(void) "\t\tInject a fault into a particular device or the device's\n" "\t\tlabel. Label injection can either be 'nvlist', 'uber',\n " "\t\t'pad1', or 'pad2'.\n" - "\t\t'errno' can either be 'nxio' (the default) or 'io'.\n" + "\t\t'errno' can be 'nxio' (the default), 'io', or 'dtl'.\n" "\n" "\tzinject -d device -A <degrade|fault> pool\n" "\t\tPerform a specific action on a particular device\n" @@ -395,17 +395,25 @@ print_panic_handler(int id, const char *pool, zinject_record_t *record, static int print_all_handlers(void) { - int count = 0; + int count = 0, total = 0; (void) iter_handlers(print_device_handler, &count); - (void) printf("\n"); - count = 0; + if (count > 0) { + total += count; + (void) printf("\n"); + count = 0; + } + (void) iter_handlers(print_data_handler, &count); - (void) printf("\n"); - count = 0; + if (count > 0) { + total += count; + (void) printf("\n"); + count = 0; + } + (void) iter_handlers(print_panic_handler, &count); - return (count); + return (count + total); } /* ARGSUSED */ @@ -627,6 +635,8 @@ main(int argc, char **argv) error = ECKSUM; } else if (strcasecmp(optarg, "nxio") == 0) { error = ENXIO; + } else if (strcasecmp(optarg, "dtl") == 0) { + error = ECHILD; } else { (void) fprintf(stderr, "invalid error type " "'%s': must be 'io', 'checksum' or " diff --git a/usr/src/cmd/zpool/zpool_main.c b/usr/src/cmd/zpool/zpool_main.c index 1c2565ce72..0739d9bb2b 100644 --- a/usr/src/cmd/zpool/zpool_main.c +++ b/usr/src/cmd/zpool/zpool_main.c @@ -4014,6 +4014,7 @@ zpool_do_upgrade(int argc, char **argv) "performance\n")); (void) printf(gettext(" 27 Improved snapshot creation " "performance\n")); + (void) printf(gettext(" 28 Multiple vdev replacements\n")); (void) printf(gettext("\nFor more information on a particular " "version, including supported releases,\n")); (void) printf(gettext("see the ZFS Administration Guide.\n\n")); diff --git a/usr/src/grub/capability b/usr/src/grub/capability index 8039914e06..225ce5bb83 100644 --- a/usr/src/grub/capability +++ b/usr/src/grub/capability @@ -39,7 +39,7 @@ # This file and the associated version are Solaris specific and are # not a part of the open source distribution of GRUB. # -VERSION=19 +VERSION=20 dboot xVM zfs diff --git a/usr/src/grub/grub-0.97/stage2/zfs-include/zfs.h b/usr/src/grub/grub-0.97/stage2/zfs-include/zfs.h index 0834c0cf6e..9ad1367ed3 100644 --- a/usr/src/grub/grub-0.97/stage2/zfs-include/zfs.h +++ b/usr/src/grub/grub-0.97/stage2/zfs-include/zfs.h @@ -26,7 +26,7 @@ /* * On-disk version number. */ -#define SPA_VERSION 27ULL +#define SPA_VERSION 28ULL /* * The following are configuration names used in the nvlist describing a pool's diff --git a/usr/src/lib/libzfs/common/libzfs_pool.c b/usr/src/lib/libzfs/common/libzfs_pool.c index 23907b2090..8e5eff3dda 100644 --- a/usr/src/lib/libzfs/common/libzfs_pool.c +++ b/usr/src/lib/libzfs/common/libzfs_pool.c @@ -997,13 +997,12 @@ zpool_destroy(zpool_handle_t *zhp) char msg[1024]; if (zhp->zpool_state == POOL_STATE_ACTIVE && - (zfp = zfs_open(zhp->zpool_hdl, zhp->zpool_name, - ZFS_TYPE_FILESYSTEM)) == NULL) + (zfp = zfs_open(hdl, zhp->zpool_name, ZFS_TYPE_FILESYSTEM)) == NULL) return (-1); (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name)); - if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_POOL_DESTROY, &zc) != 0) { + if (zfs_ioctl(hdl, ZFS_IOC_POOL_DESTROY, &zc) != 0) { (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN, "cannot destroy '%s'"), zhp->zpool_name); @@ -1086,7 +1085,7 @@ zpool_add(zpool_handle_t *zhp, nvlist_t *nvroot) return (-1); (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name)); - if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_VDEV_ADD, &zc) != 0) { + if (zfs_ioctl(hdl, ZFS_IOC_VDEV_ADD, &zc) != 0) { switch (errno) { case EBUSY: /* @@ -1578,7 +1577,7 @@ zpool_scan(zpool_handle_t *zhp, pool_scan_func_t func) (void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name)); zc.zc_cookie = func; - if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_POOL_SCAN, &zc) == 0 || + if (zfs_ioctl(hdl, ZFS_IOC_POOL_SCAN, &zc) == 0 || (errno == ENOENT && func != POOL_SCAN_NONE)) return (0); @@ -1670,26 +1669,17 @@ vdev_to_nvlist_iter(nvlist_t *nv, nvlist_t *search, boolean_t *avail_spare, srchkey = nvpair_name(pair); switch (nvpair_type(pair)) { - case DATA_TYPE_UINT64: { - uint64_t srchval, theguid, present; - - verify(nvpair_value_uint64(pair, &srchval) == 0); + case DATA_TYPE_UINT64: if (strcmp(srchkey, ZPOOL_CONFIG_GUID) == 0) { - if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NOT_PRESENT, - &present) == 0) { - /* - * If the device has never been present since - * import, the only reliable way to match the - * vdev is by GUID. - */ - verify(nvlist_lookup_uint64(nv, - ZPOOL_CONFIG_GUID, &theguid) == 0); - if (theguid == srchval) - return (nv); - } + uint64_t srchval, theguid; + + verify(nvpair_value_uint64(pair, &srchval) == 0); + verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, + &theguid) == 0); + if (theguid == srchval) + return (nv); } break; - } case DATA_TYPE_STRING: { char *srchval, *val; @@ -1871,6 +1861,8 @@ zpool_find_vdev_by_physpath(zpool_handle_t *zhp, const char *ppath, &nvroot) == 0); *avail_spare = B_FALSE; + *l2cache = B_FALSE; + *log = B_FALSE; ret = vdev_to_nvlist_iter(nvroot, search, avail_spare, l2cache, log); nvlist_free(search); @@ -2166,14 +2158,14 @@ zpool_vdev_online(zpool_handle_t *zhp, const char *path, int flags, if (wholedisk) { pathname += strlen(DISK_ROOT) + 1; - (void) zpool_relabel_disk(zhp->zpool_hdl, pathname); + (void) zpool_relabel_disk(hdl, pathname); } } zc.zc_cookie = VDEV_STATE_ONLINE; zc.zc_obj = flags; - if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_VDEV_SET_STATE, &zc) != 0) { + if (zfs_ioctl(hdl, ZFS_IOC_VDEV_SET_STATE, &zc) != 0) { if (errno == EINVAL) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "was split " "from this pool into a new one. Use '%s' " @@ -2215,7 +2207,7 @@ zpool_vdev_offline(zpool_handle_t *zhp, const char *path, boolean_t istmp) zc.zc_cookie = VDEV_STATE_OFFLINE; zc.zc_obj = istmp ? ZFS_OFFLINE_TEMPORARY : 0; - if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_VDEV_SET_STATE, &zc) == 0) + if (zfs_ioctl(hdl, ZFS_IOC_VDEV_SET_STATE, &zc) == 0) return (0); switch (errno) { @@ -2255,7 +2247,7 @@ zpool_vdev_fault(zpool_handle_t *zhp, uint64_t guid, vdev_aux_t aux) zc.zc_cookie = VDEV_STATE_FAULTED; zc.zc_obj = aux; - if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_VDEV_SET_STATE, &zc) == 0) + if (ioctl(hdl->libzfs_fd, ZFS_IOC_VDEV_SET_STATE, &zc) == 0) return (0); switch (errno) { @@ -2290,7 +2282,7 @@ zpool_vdev_degrade(zpool_handle_t *zhp, uint64_t guid, vdev_aux_t aux) zc.zc_cookie = VDEV_STATE_DEGRADED; zc.zc_obj = aux; - if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_VDEV_SET_STATE, &zc) == 0) + if (ioctl(hdl->libzfs_fd, ZFS_IOC_VDEV_SET_STATE, &zc) == 0) return (0); return (zpool_standard_error(hdl, errno, msg)); @@ -2338,7 +2330,7 @@ zpool_vdev_attach(zpool_handle_t *zhp, nvlist_t *tgt; boolean_t avail_spare, l2cache, islog; uint64_t val; - char *path, *newname; + char *newname; nvlist_t **child; uint_t children; nvlist_t *config_root; @@ -2404,27 +2396,12 @@ zpool_vdev_attach(zpool_handle_t *zhp, return (zfs_error(hdl, EZFS_BADTARGET, msg)); } - /* - * If we are attempting to replace a spare, it canot be applied to an - * already spared device. - */ - if (replacing && - nvlist_lookup_string(child[0], ZPOOL_CONFIG_PATH, &path) == 0 && - zpool_find_vdev(zhp, newname, &avail_spare, - &l2cache, NULL) != NULL && avail_spare && - is_replacing_spare(config_root, tgt, 0)) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "device has already been replaced with a spare")); - free(newname); - return (zfs_error(hdl, EZFS_BADTARGET, msg)); - } - free(newname); if (zcmd_write_conf_nvlist(hdl, &zc, nvroot) != 0) return (-1); - ret = zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_VDEV_ATTACH, &zc); + ret = zfs_ioctl(hdl, ZFS_IOC_VDEV_ATTACH, &zc); zcmd_free_nvlists(&zc); @@ -2447,9 +2424,16 @@ zpool_vdev_attach(zpool_handle_t *zhp, * Can't attach to or replace this type of vdev. */ if (replacing) { + uint64_t version = zpool_get_prop_int(zhp, + ZPOOL_PROP_VERSION, NULL); + if (islog) zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "cannot replace a log with a spare")); + else if (version >= SPA_VERSION_MULTI_REPLACE) + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "already in replacing/spare config; wait " + "for completion or use 'zpool detach'")); else zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "cannot replace a replacing device")); @@ -2547,7 +2531,7 @@ zpool_vdev_detach(zpool_handle_t *zhp, const char *path) */ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "only " "applicable to mirror and replacing vdevs")); - (void) zfs_error(zhp->zpool_hdl, EZFS_BADTARGET, msg); + (void) zfs_error(hdl, EZFS_BADTARGET, msg); break; case EBUSY: @@ -2908,7 +2892,7 @@ zpool_clear(zpool_handle_t *zhp, const char *path, nvlist_t *rewindnvl) if (zcmd_alloc_dst_nvlist(hdl, &zc, zhp->zpool_config_size * 2) != 0) return (-1); - if (zcmd_write_src_nvlist(zhp->zpool_hdl, &zc, rewindnvl) != 0) + if (zcmd_write_src_nvlist(hdl, &zc, rewindnvl) != 0) return (-1); while ((error = zfs_ioctl(hdl, ZFS_IOC_CLEAR, &zc)) != 0 && diff --git a/usr/src/uts/common/fs/zfs/spa.c b/usr/src/uts/common/fs/zfs/spa.c index 3e6f4ab2a7..59c7656044 100644 --- a/usr/src/uts/common/fs/zfs/spa.c +++ b/usr/src/uts/common/fs/zfs/spa.c @@ -116,6 +116,7 @@ static boolean_t spa_has_active_shared_spare(spa_t *spa); static int spa_load_impl(spa_t *spa, uint64_t, nvlist_t *config, spa_load_state_t state, spa_import_type_t type, boolean_t mosconfig, char **ereport); +static void spa_vdev_resilver_done(spa_t *spa); uint_t zio_taskq_batch_pct = 100; /* 1 thread per cpu in pset */ id_t zio_taskq_psrset_bind = PS_NONE; @@ -3226,7 +3227,8 @@ spa_import_rootpool(char *devpath, char *devid) !bvd->vdev_isspare) { cmn_err(CE_NOTE, "The boot device is currently spared. Please " "try booting from '%s'", - bvd->vdev_parent->vdev_child[1]->vdev_path); + bvd->vdev_parent-> + vdev_child[bvd->vdev_parent->vdev_children - 1]->vdev_path); error = EINVAL; goto out; } @@ -3834,7 +3836,7 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing) * spares. */ if (pvd->vdev_ops == &vdev_spare_ops && - pvd->vdev_child[1] == oldvd && + oldvd->vdev_isspare && !spa_has_spare(spa, newvd->vdev_guid)) return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); @@ -3846,13 +3848,15 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing) * the same (spare replaces spare, non-spare replaces * non-spare). */ - if (pvd->vdev_ops == &vdev_replacing_ops) + if (pvd->vdev_ops == &vdev_replacing_ops && + spa_version(spa) < SPA_VERSION_MULTI_REPLACE) { return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); - else if (pvd->vdev_ops == &vdev_spare_ops && - newvd->vdev_isspare != oldvd->vdev_isspare) + } else if (pvd->vdev_ops == &vdev_spare_ops && + newvd->vdev_isspare != oldvd->vdev_isspare) { return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); - else if (pvd->vdev_ops != &vdev_spare_ops && - newvd->vdev_isspare) + } + + if (newvd->vdev_isspare) pvops = &vdev_spare_ops; else pvops = &vdev_replacing_ops; @@ -3887,6 +3891,9 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing) } } + /* mark the device being resilvered */ + newvd->vdev_resilvering = B_TRUE; + /* * If the parent is not a mirror, or if we're replacing, insert the new * mirror/replacing/spare vdev above oldvd. @@ -3975,7 +3982,6 @@ spa_vdev_detach(spa_t *spa, uint64_t guid, uint64_t pguid, int replace_done) vdev_t *vd, *pvd, *cvd, *tvd; boolean_t unspare = B_FALSE; uint64_t unspare_guid; - size_t len; char *vdpath; txg = spa_vdev_enter(spa); @@ -4007,18 +4013,11 @@ spa_vdev_detach(spa_t *spa, uint64_t guid, uint64_t pguid, int replace_done) return (spa_vdev_exit(spa, NULL, txg, EBUSY)); /* - * If replace_done is specified, only remove this device if it's - * the first child of a replacing vdev. For the 'spare' vdev, either - * disk can be removed. + * Only 'replacing' or 'spare' vdevs can be replaced. */ - if (replace_done) { - if (pvd->vdev_ops == &vdev_replacing_ops) { - if (vd->vdev_id != 0) - return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); - } else if (pvd->vdev_ops != &vdev_spare_ops) { - return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); - } - } + if (replace_done && pvd->vdev_ops != &vdev_replacing_ops && + pvd->vdev_ops != &vdev_spare_ops) + return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); ASSERT(pvd->vdev_ops != &vdev_spare_ops || spa_version(spa) >= SPA_VERSION_SPARES); @@ -4045,16 +4044,22 @@ spa_vdev_detach(spa_t *spa, uint64_t guid, uint64_t pguid, int replace_done) * check to see if we changed the original vdev's path to have "/old" * at the end in spa_vdev_attach(). If so, undo that change now. */ - if (pvd->vdev_ops == &vdev_replacing_ops && vd->vdev_id == 1 && - pvd->vdev_child[0]->vdev_path != NULL && - pvd->vdev_child[1]->vdev_path != NULL) { - ASSERT(pvd->vdev_child[1] == vd); - cvd = pvd->vdev_child[0]; - len = strlen(vd->vdev_path); - if (strncmp(cvd->vdev_path, vd->vdev_path, len) == 0 && - strcmp(cvd->vdev_path + len, "/old") == 0) { - spa_strfree(cvd->vdev_path); - cvd->vdev_path = spa_strdup(vd->vdev_path); + if (pvd->vdev_ops == &vdev_replacing_ops && vd->vdev_id > 0 && + vd->vdev_path != NULL) { + size_t len = strlen(vd->vdev_path); + + for (int c = 0; c < pvd->vdev_children; c++) { + cvd = pvd->vdev_child[c]; + + if (cvd == vd || cvd->vdev_path == NULL) + continue; + + if (strncmp(cvd->vdev_path, vd->vdev_path, len) == 0 && + strcmp(cvd->vdev_path + len, "/old") == 0) { + spa_strfree(cvd->vdev_path); + cvd->vdev_path = spa_strdup(vd->vdev_path); + break; + } } } @@ -4064,7 +4069,8 @@ spa_vdev_detach(spa_t *spa, uint64_t guid, uint64_t pguid, int replace_done) * active spare list for the pool. */ if (pvd->vdev_ops == &vdev_spare_ops && - vd->vdev_id == 0 && pvd->vdev_child[1]->vdev_isspare) + vd->vdev_id == 0 && + pvd->vdev_child[pvd->vdev_children - 1]->vdev_isspare) unspare = B_TRUE; /* @@ -4086,7 +4092,7 @@ spa_vdev_detach(spa_t *spa, uint64_t guid, uint64_t pguid, int replace_done) /* * Remember one of the remaining children so we can get tvd below. */ - cvd = pvd->vdev_child[0]; + cvd = pvd->vdev_child[pvd->vdev_children - 1]; /* * If we need to remove the remaining child from the list of hot spares, @@ -4102,14 +4108,20 @@ spa_vdev_detach(spa_t *spa, uint64_t guid, uint64_t pguid, int replace_done) spa_spare_remove(cvd); unspare_guid = cvd->vdev_guid; (void) spa_vdev_remove(spa, unspare_guid, B_TRUE); + cvd->vdev_unspare = B_TRUE; } /* * If the parent mirror/replacing vdev only has one child, * the parent is no longer needed. Remove it from the tree. */ - if (pvd->vdev_children == 1) + if (pvd->vdev_children == 1) { + if (pvd->vdev_ops == &vdev_spare_ops) + cvd->vdev_unspare = B_FALSE; vdev_remove_parent(cvd); + cvd->vdev_resilvering = B_FALSE; + } + /* * We don't set tvd until now because the parent we just removed @@ -4151,6 +4163,9 @@ spa_vdev_detach(spa_t *spa, uint64_t guid, uint64_t pguid, int replace_done) spa_event_notify(spa, vd, ESC_ZFS_VDEV_REMOVE); + /* hang on to the spa before we release the lock */ + spa_open_ref(spa, FTAG); + error = spa_vdev_exit(spa, vd, txg, 0); spa_history_log_internal(LOG_POOL_VDEV_DETACH, spa, NULL, @@ -4163,24 +4178,31 @@ spa_vdev_detach(spa_t *spa, uint64_t guid, uint64_t pguid, int replace_done) * list of every other pool. */ if (unspare) { - spa_t *myspa = spa; - spa = NULL; + spa_t *altspa = NULL; + mutex_enter(&spa_namespace_lock); - while ((spa = spa_next(spa)) != NULL) { - if (spa->spa_state != POOL_STATE_ACTIVE) - continue; - if (spa == myspa) + while ((altspa = spa_next(altspa)) != NULL) { + if (altspa->spa_state != POOL_STATE_ACTIVE || + altspa == spa) continue; - spa_open_ref(spa, FTAG); + + spa_open_ref(altspa, FTAG); mutex_exit(&spa_namespace_lock); - (void) spa_vdev_remove(spa, unspare_guid, - B_TRUE); + (void) spa_vdev_remove(altspa, unspare_guid, B_TRUE); mutex_enter(&spa_namespace_lock); - spa_close(spa, FTAG); + spa_close(altspa, FTAG); } mutex_exit(&spa_namespace_lock); + + /* search the rest of the vdevs for spares to remove */ + spa_vdev_resilver_done(spa); } + /* all done with the spa; OK to release */ + mutex_enter(&spa_namespace_lock); + spa_close(spa, FTAG); + mutex_exit(&spa_namespace_lock); + return (error); } @@ -4728,11 +4750,18 @@ spa_vdev_resilver_done_hunt(vdev_t *vd) } /* - * Check for a completed replacement. + * Check for a completed replacement. We always consider the first + * vdev in the list to be the oldest vdev, and the last one to be + * the newest (see spa_vdev_attach() for how that works). In + * the case where the newest vdev is faulted, we will not automatically + * remove it after a resilver completes. This is OK as it will require + * user intervention to determine which disk the admin wishes to keep. */ - if (vd->vdev_ops == &vdev_replacing_ops && vd->vdev_children == 2) { + if (vd->vdev_ops == &vdev_replacing_ops) { + ASSERT(vd->vdev_children > 1); + + newvd = vd->vdev_child[vd->vdev_children - 1]; oldvd = vd->vdev_child[0]; - newvd = vd->vdev_child[1]; if (vdev_dtl_empty(newvd, DTL_MISSING) && vdev_dtl_empty(newvd, DTL_OUTAGE) && @@ -4743,16 +4772,41 @@ spa_vdev_resilver_done_hunt(vdev_t *vd) /* * Check for a completed resilver with the 'unspare' flag set. */ - if (vd->vdev_ops == &vdev_spare_ops && vd->vdev_children == 2) { - newvd = vd->vdev_child[0]; - oldvd = vd->vdev_child[1]; + if (vd->vdev_ops == &vdev_spare_ops) { + vdev_t *first = vd->vdev_child[0]; + vdev_t *last = vd->vdev_child[vd->vdev_children - 1]; + + if (last->vdev_unspare) { + oldvd = first; + newvd = last; + } else if (first->vdev_unspare) { + oldvd = last; + newvd = first; + } else { + oldvd = NULL; + } - if (newvd->vdev_unspare && + if (oldvd != NULL && vdev_dtl_empty(newvd, DTL_MISSING) && vdev_dtl_empty(newvd, DTL_OUTAGE) && - !vdev_dtl_required(oldvd)) { - newvd->vdev_unspare = 0; + !vdev_dtl_required(oldvd)) return (oldvd); + + /* + * If there are more than two spares attached to a disk, + * and those spares are not required, then we want to + * attempt to free them up now so that they can be used + * by other pools. Once we're back down to a single + * disk+spare, we stop removing them. + */ + if (vd->vdev_children > 2) { + newvd = vd->vdev_child[1]; + + if (newvd->vdev_isspare && last->vdev_isspare && + vdev_dtl_empty(last, DTL_MISSING) && + vdev_dtl_empty(last, DTL_OUTAGE) && + !vdev_dtl_required(newvd)) + return (newvd); } } @@ -4779,9 +4833,9 @@ spa_vdev_resilver_done(spa_t *spa) * we need to detach the parent's first child (the original hot * spare) as well. */ - if (ppvd->vdev_ops == &vdev_spare_ops && pvd->vdev_id == 0) { + if (ppvd->vdev_ops == &vdev_spare_ops && pvd->vdev_id == 0 && + ppvd->vdev_children == 2) { ASSERT(pvd->vdev_ops == &vdev_replacing_ops); - ASSERT(ppvd->vdev_children == 2); sguid = ppvd->vdev_child[1]->vdev_guid; } spa_config_exit(spa, SCL_ALL, FTAG); diff --git a/usr/src/uts/common/fs/zfs/sys/vdev_impl.h b/usr/src/uts/common/fs/zfs/sys/vdev_impl.h index 63d1b3eeda..161bd21f05 100644 --- a/usr/src/uts/common/fs/zfs/sys/vdev_impl.h +++ b/usr/src/uts/common/fs/zfs/sys/vdev_impl.h @@ -169,6 +169,7 @@ struct vdev { uint64_t vdev_faulted; /* persistent faulted state */ uint64_t vdev_degraded; /* persistent degraded state */ uint64_t vdev_removed; /* persistent removed state */ + uint64_t vdev_resilvering; /* persistent resilvering state */ uint64_t vdev_nparity; /* number of parity devices for raidz */ char *vdev_path; /* vdev path (if any) */ char *vdev_devid; /* vdev devid (if any) */ diff --git a/usr/src/uts/common/fs/zfs/vdev.c b/usr/src/uts/common/fs/zfs/vdev.c index 7c6892b530..600da9a83d 100644 --- a/usr/src/uts/common/fs/zfs/vdev.c +++ b/usr/src/uts/common/fs/zfs/vdev.c @@ -518,6 +518,9 @@ vdev_alloc(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, uint_t id, (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_OFFLINE, &vd->vdev_offline); + (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_RESILVERING, + &vd->vdev_resilvering); + /* * When importing a pool, we want to ignore the persistent fault * state, as the diagnosis made on another system may not be @@ -1849,6 +1852,9 @@ vdev_dtl_required(vdev_t *vd) vd->vdev_cant_read = cant_read; vdev_dtl_reassess(tvd, 0, 0, B_FALSE); + if (!required && zio_injection_enabled) + required = !!zio_handle_device_injection(vd, NULL, ECHILD); + return (required); } @@ -2940,12 +2946,13 @@ vdev_set_state(vdev_t *vd, boolean_t isopen, vdev_state_t state, vdev_aux_t aux) vd->vdev_removed = B_TRUE; } else if (state == VDEV_STATE_CANT_OPEN) { /* - * If we fail to open a vdev during an import, we mark it as - * "not available", which signifies that it was never there to - * begin with. Failure to open such a device is not considered - * an error. + * If we fail to open a vdev during an import or recovery, we + * mark it as "not available", which signifies that it was + * never there to begin with. Failure to open such a device + * is not considered an error. */ - if (spa_load_state(spa) == SPA_LOAD_IMPORT && + if ((spa_load_state(spa) == SPA_LOAD_IMPORT || + spa_load_state(spa) == SPA_LOAD_RECOVER) && vd->vdev_ops->vdev_op_leaf) vd->vdev_not_present = 1; diff --git a/usr/src/uts/common/fs/zfs/vdev_label.c b/usr/src/uts/common/fs/zfs/vdev_label.c index 75ec545345..ec5ec6ee34 100644 --- a/usr/src/uts/common/fs/zfs/vdev_label.c +++ b/usr/src/uts/common/fs/zfs/vdev_label.c @@ -353,6 +353,9 @@ vdev_config_generate(spa_t *spa, vdev_t *vd, boolean_t getstats, if (vd->vdev_offline && !vd->vdev_tmpoffline) VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_OFFLINE, B_TRUE) == 0); + if (vd->vdev_resilvering) + VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_RESILVERING, + B_TRUE) == 0); if (vd->vdev_faulted) VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_FAULTED, B_TRUE) == 0); diff --git a/usr/src/uts/common/sys/fs/zfs.h b/usr/src/uts/common/sys/fs/zfs.h index 8b03fb0f9d..a6a3484ffc 100644 --- a/usr/src/uts/common/sys/fs/zfs.h +++ b/usr/src/uts/common/sys/fs/zfs.h @@ -336,14 +336,16 @@ typedef enum { #define SPA_VERSION_25 25ULL #define SPA_VERSION_26 26ULL #define SPA_VERSION_27 27ULL +#define SPA_VERSION_28 28ULL + /* * When bumping up SPA_VERSION, make sure GRUB ZFS understands the on-disk * format change. Go to usr/src/grub/grub-0.97/stage2/{zfs-include/, fsys_zfs*}, * and do the appropriate changes. Also bump the version number in * usr/src/grub/capability. */ -#define SPA_VERSION SPA_VERSION_27 -#define SPA_VERSION_STRING "27" +#define SPA_VERSION SPA_VERSION_28 +#define SPA_VERSION_STRING "28" /* * Symbolic names for the changes that caused a SPA_VERSION switch. @@ -393,6 +395,7 @@ typedef enum { #define SPA_VERSION_DIR_CLONES SPA_VERSION_26 #define SPA_VERSION_DEADLISTS SPA_VERSION_26 #define SPA_VERSION_FAST_SNAP SPA_VERSION_27 +#define SPA_VERSION_MULTI_REPLACE SPA_VERSION_28 /* * ZPL version - rev'd whenever an incompatible on-disk format change @@ -483,6 +486,7 @@ typedef struct zpool_rewind_policy { #define ZPOOL_CONFIG_SPLIT_GUID "split_guid" #define ZPOOL_CONFIG_SPLIT_LIST "guid_list" #define ZPOOL_CONFIG_REMOVING "removing" +#define ZPOOL_CONFIG_RESILVERING "resilvering" #define ZPOOL_CONFIG_SUSPENDED "suspended" /* not stored on disk */ #define ZPOOL_CONFIG_TIMESTAMP "timestamp" /* not stored on disk */ #define ZPOOL_CONFIG_BOOTFS "bootfs" /* not stored on disk */ |