summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMark J Musante <Mark.Musante@Sun.COM>2010-08-06 13:53:14 -0600
committerMark J Musante <Mark.Musante@Sun.COM>2010-08-06 13:53:14 -0600
commitcb04b8739c50e3e6d12e89b790fa7b8d0d899865 (patch)
treec421a5b9f9c5c255e0a88f0c0dabeb04ffc65a2a
parent35f59e50e9ef37e6f1bd3b018289d678d5551cab (diff)
downloadillumos-gate-cb04b8739c50e3e6d12e89b790fa7b8d0d899865.tar.gz
6782540 zpool cannot replace a replacing device
-rw-r--r--usr/src/cmd/zinject/zinject.c24
-rw-r--r--usr/src/cmd/zpool/zpool_main.c1
-rw-r--r--usr/src/grub/capability2
-rw-r--r--usr/src/grub/grub-0.97/stage2/zfs-include/zfs.h2
-rw-r--r--usr/src/lib/libzfs/common/libzfs_pool.c76
-rw-r--r--usr/src/uts/common/fs/zfs/spa.c160
-rw-r--r--usr/src/uts/common/fs/zfs/sys/vdev_impl.h1
-rw-r--r--usr/src/uts/common/fs/zfs/vdev.c17
-rw-r--r--usr/src/uts/common/fs/zfs/vdev_label.c3
-rw-r--r--usr/src/uts/common/sys/fs/zfs.h8
10 files changed, 179 insertions, 115 deletions
diff --git a/usr/src/cmd/zinject/zinject.c b/usr/src/cmd/zinject/zinject.c
index ab04e422a9..60c53ceb3f 100644
--- a/usr/src/cmd/zinject/zinject.c
+++ b/usr/src/cmd/zinject/zinject.c
@@ -233,7 +233,7 @@ usage(void)
"\t\tInject a fault into a particular device or the device's\n"
"\t\tlabel. Label injection can either be 'nvlist', 'uber',\n "
"\t\t'pad1', or 'pad2'.\n"
- "\t\t'errno' can either be 'nxio' (the default) or 'io'.\n"
+ "\t\t'errno' can be 'nxio' (the default), 'io', or 'dtl'.\n"
"\n"
"\tzinject -d device -A <degrade|fault> pool\n"
"\t\tPerform a specific action on a particular device\n"
@@ -395,17 +395,25 @@ print_panic_handler(int id, const char *pool, zinject_record_t *record,
static int
print_all_handlers(void)
{
- int count = 0;
+ int count = 0, total = 0;
(void) iter_handlers(print_device_handler, &count);
- (void) printf("\n");
- count = 0;
+ if (count > 0) {
+ total += count;
+ (void) printf("\n");
+ count = 0;
+ }
+
(void) iter_handlers(print_data_handler, &count);
- (void) printf("\n");
- count = 0;
+ if (count > 0) {
+ total += count;
+ (void) printf("\n");
+ count = 0;
+ }
+
(void) iter_handlers(print_panic_handler, &count);
- return (count);
+ return (count + total);
}
/* ARGSUSED */
@@ -627,6 +635,8 @@ main(int argc, char **argv)
error = ECKSUM;
} else if (strcasecmp(optarg, "nxio") == 0) {
error = ENXIO;
+ } else if (strcasecmp(optarg, "dtl") == 0) {
+ error = ECHILD;
} else {
(void) fprintf(stderr, "invalid error type "
"'%s': must be 'io', 'checksum' or "
diff --git a/usr/src/cmd/zpool/zpool_main.c b/usr/src/cmd/zpool/zpool_main.c
index 1c2565ce72..0739d9bb2b 100644
--- a/usr/src/cmd/zpool/zpool_main.c
+++ b/usr/src/cmd/zpool/zpool_main.c
@@ -4014,6 +4014,7 @@ zpool_do_upgrade(int argc, char **argv)
"performance\n"));
(void) printf(gettext(" 27 Improved snapshot creation "
"performance\n"));
+ (void) printf(gettext(" 28 Multiple vdev replacements\n"));
(void) printf(gettext("\nFor more information on a particular "
"version, including supported releases,\n"));
(void) printf(gettext("see the ZFS Administration Guide.\n\n"));
diff --git a/usr/src/grub/capability b/usr/src/grub/capability
index 8039914e06..225ce5bb83 100644
--- a/usr/src/grub/capability
+++ b/usr/src/grub/capability
@@ -39,7 +39,7 @@
# This file and the associated version are Solaris specific and are
# not a part of the open source distribution of GRUB.
#
-VERSION=19
+VERSION=20
dboot
xVM
zfs
diff --git a/usr/src/grub/grub-0.97/stage2/zfs-include/zfs.h b/usr/src/grub/grub-0.97/stage2/zfs-include/zfs.h
index 0834c0cf6e..9ad1367ed3 100644
--- a/usr/src/grub/grub-0.97/stage2/zfs-include/zfs.h
+++ b/usr/src/grub/grub-0.97/stage2/zfs-include/zfs.h
@@ -26,7 +26,7 @@
/*
* On-disk version number.
*/
-#define SPA_VERSION 27ULL
+#define SPA_VERSION 28ULL
/*
* The following are configuration names used in the nvlist describing a pool's
diff --git a/usr/src/lib/libzfs/common/libzfs_pool.c b/usr/src/lib/libzfs/common/libzfs_pool.c
index 23907b2090..8e5eff3dda 100644
--- a/usr/src/lib/libzfs/common/libzfs_pool.c
+++ b/usr/src/lib/libzfs/common/libzfs_pool.c
@@ -997,13 +997,12 @@ zpool_destroy(zpool_handle_t *zhp)
char msg[1024];
if (zhp->zpool_state == POOL_STATE_ACTIVE &&
- (zfp = zfs_open(zhp->zpool_hdl, zhp->zpool_name,
- ZFS_TYPE_FILESYSTEM)) == NULL)
+ (zfp = zfs_open(hdl, zhp->zpool_name, ZFS_TYPE_FILESYSTEM)) == NULL)
return (-1);
(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
- if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_POOL_DESTROY, &zc) != 0) {
+ if (zfs_ioctl(hdl, ZFS_IOC_POOL_DESTROY, &zc) != 0) {
(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
"cannot destroy '%s'"), zhp->zpool_name);
@@ -1086,7 +1085,7 @@ zpool_add(zpool_handle_t *zhp, nvlist_t *nvroot)
return (-1);
(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
- if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_VDEV_ADD, &zc) != 0) {
+ if (zfs_ioctl(hdl, ZFS_IOC_VDEV_ADD, &zc) != 0) {
switch (errno) {
case EBUSY:
/*
@@ -1578,7 +1577,7 @@ zpool_scan(zpool_handle_t *zhp, pool_scan_func_t func)
(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
zc.zc_cookie = func;
- if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_POOL_SCAN, &zc) == 0 ||
+ if (zfs_ioctl(hdl, ZFS_IOC_POOL_SCAN, &zc) == 0 ||
(errno == ENOENT && func != POOL_SCAN_NONE))
return (0);
@@ -1670,26 +1669,17 @@ vdev_to_nvlist_iter(nvlist_t *nv, nvlist_t *search, boolean_t *avail_spare,
srchkey = nvpair_name(pair);
switch (nvpair_type(pair)) {
- case DATA_TYPE_UINT64: {
- uint64_t srchval, theguid, present;
-
- verify(nvpair_value_uint64(pair, &srchval) == 0);
+ case DATA_TYPE_UINT64:
if (strcmp(srchkey, ZPOOL_CONFIG_GUID) == 0) {
- if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NOT_PRESENT,
- &present) == 0) {
- /*
- * If the device has never been present since
- * import, the only reliable way to match the
- * vdev is by GUID.
- */
- verify(nvlist_lookup_uint64(nv,
- ZPOOL_CONFIG_GUID, &theguid) == 0);
- if (theguid == srchval)
- return (nv);
- }
+ uint64_t srchval, theguid;
+
+ verify(nvpair_value_uint64(pair, &srchval) == 0);
+ verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID,
+ &theguid) == 0);
+ if (theguid == srchval)
+ return (nv);
}
break;
- }
case DATA_TYPE_STRING: {
char *srchval, *val;
@@ -1871,6 +1861,8 @@ zpool_find_vdev_by_physpath(zpool_handle_t *zhp, const char *ppath,
&nvroot) == 0);
*avail_spare = B_FALSE;
+ *l2cache = B_FALSE;
+ *log = B_FALSE;
ret = vdev_to_nvlist_iter(nvroot, search, avail_spare, l2cache, log);
nvlist_free(search);
@@ -2166,14 +2158,14 @@ zpool_vdev_online(zpool_handle_t *zhp, const char *path, int flags,
if (wholedisk) {
pathname += strlen(DISK_ROOT) + 1;
- (void) zpool_relabel_disk(zhp->zpool_hdl, pathname);
+ (void) zpool_relabel_disk(hdl, pathname);
}
}
zc.zc_cookie = VDEV_STATE_ONLINE;
zc.zc_obj = flags;
- if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_VDEV_SET_STATE, &zc) != 0) {
+ if (zfs_ioctl(hdl, ZFS_IOC_VDEV_SET_STATE, &zc) != 0) {
if (errno == EINVAL) {
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "was split "
"from this pool into a new one. Use '%s' "
@@ -2215,7 +2207,7 @@ zpool_vdev_offline(zpool_handle_t *zhp, const char *path, boolean_t istmp)
zc.zc_cookie = VDEV_STATE_OFFLINE;
zc.zc_obj = istmp ? ZFS_OFFLINE_TEMPORARY : 0;
- if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_VDEV_SET_STATE, &zc) == 0)
+ if (zfs_ioctl(hdl, ZFS_IOC_VDEV_SET_STATE, &zc) == 0)
return (0);
switch (errno) {
@@ -2255,7 +2247,7 @@ zpool_vdev_fault(zpool_handle_t *zhp, uint64_t guid, vdev_aux_t aux)
zc.zc_cookie = VDEV_STATE_FAULTED;
zc.zc_obj = aux;
- if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_VDEV_SET_STATE, &zc) == 0)
+ if (ioctl(hdl->libzfs_fd, ZFS_IOC_VDEV_SET_STATE, &zc) == 0)
return (0);
switch (errno) {
@@ -2290,7 +2282,7 @@ zpool_vdev_degrade(zpool_handle_t *zhp, uint64_t guid, vdev_aux_t aux)
zc.zc_cookie = VDEV_STATE_DEGRADED;
zc.zc_obj = aux;
- if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_VDEV_SET_STATE, &zc) == 0)
+ if (ioctl(hdl->libzfs_fd, ZFS_IOC_VDEV_SET_STATE, &zc) == 0)
return (0);
return (zpool_standard_error(hdl, errno, msg));
@@ -2338,7 +2330,7 @@ zpool_vdev_attach(zpool_handle_t *zhp,
nvlist_t *tgt;
boolean_t avail_spare, l2cache, islog;
uint64_t val;
- char *path, *newname;
+ char *newname;
nvlist_t **child;
uint_t children;
nvlist_t *config_root;
@@ -2404,27 +2396,12 @@ zpool_vdev_attach(zpool_handle_t *zhp,
return (zfs_error(hdl, EZFS_BADTARGET, msg));
}
- /*
- * If we are attempting to replace a spare, it canot be applied to an
- * already spared device.
- */
- if (replacing &&
- nvlist_lookup_string(child[0], ZPOOL_CONFIG_PATH, &path) == 0 &&
- zpool_find_vdev(zhp, newname, &avail_spare,
- &l2cache, NULL) != NULL && avail_spare &&
- is_replacing_spare(config_root, tgt, 0)) {
- zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
- "device has already been replaced with a spare"));
- free(newname);
- return (zfs_error(hdl, EZFS_BADTARGET, msg));
- }
-
free(newname);
if (zcmd_write_conf_nvlist(hdl, &zc, nvroot) != 0)
return (-1);
- ret = zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_VDEV_ATTACH, &zc);
+ ret = zfs_ioctl(hdl, ZFS_IOC_VDEV_ATTACH, &zc);
zcmd_free_nvlists(&zc);
@@ -2447,9 +2424,16 @@ zpool_vdev_attach(zpool_handle_t *zhp,
* Can't attach to or replace this type of vdev.
*/
if (replacing) {
+ uint64_t version = zpool_get_prop_int(zhp,
+ ZPOOL_PROP_VERSION, NULL);
+
if (islog)
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"cannot replace a log with a spare"));
+ else if (version >= SPA_VERSION_MULTI_REPLACE)
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+ "already in replacing/spare config; wait "
+ "for completion or use 'zpool detach'"));
else
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"cannot replace a replacing device"));
@@ -2547,7 +2531,7 @@ zpool_vdev_detach(zpool_handle_t *zhp, const char *path)
*/
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "only "
"applicable to mirror and replacing vdevs"));
- (void) zfs_error(zhp->zpool_hdl, EZFS_BADTARGET, msg);
+ (void) zfs_error(hdl, EZFS_BADTARGET, msg);
break;
case EBUSY:
@@ -2908,7 +2892,7 @@ zpool_clear(zpool_handle_t *zhp, const char *path, nvlist_t *rewindnvl)
if (zcmd_alloc_dst_nvlist(hdl, &zc, zhp->zpool_config_size * 2) != 0)
return (-1);
- if (zcmd_write_src_nvlist(zhp->zpool_hdl, &zc, rewindnvl) != 0)
+ if (zcmd_write_src_nvlist(hdl, &zc, rewindnvl) != 0)
return (-1);
while ((error = zfs_ioctl(hdl, ZFS_IOC_CLEAR, &zc)) != 0 &&
diff --git a/usr/src/uts/common/fs/zfs/spa.c b/usr/src/uts/common/fs/zfs/spa.c
index 3e6f4ab2a7..59c7656044 100644
--- a/usr/src/uts/common/fs/zfs/spa.c
+++ b/usr/src/uts/common/fs/zfs/spa.c
@@ -116,6 +116,7 @@ static boolean_t spa_has_active_shared_spare(spa_t *spa);
static int spa_load_impl(spa_t *spa, uint64_t, nvlist_t *config,
spa_load_state_t state, spa_import_type_t type, boolean_t mosconfig,
char **ereport);
+static void spa_vdev_resilver_done(spa_t *spa);
uint_t zio_taskq_batch_pct = 100; /* 1 thread per cpu in pset */
id_t zio_taskq_psrset_bind = PS_NONE;
@@ -3226,7 +3227,8 @@ spa_import_rootpool(char *devpath, char *devid)
!bvd->vdev_isspare) {
cmn_err(CE_NOTE, "The boot device is currently spared. Please "
"try booting from '%s'",
- bvd->vdev_parent->vdev_child[1]->vdev_path);
+ bvd->vdev_parent->
+ vdev_child[bvd->vdev_parent->vdev_children - 1]->vdev_path);
error = EINVAL;
goto out;
}
@@ -3834,7 +3836,7 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing)
* spares.
*/
if (pvd->vdev_ops == &vdev_spare_ops &&
- pvd->vdev_child[1] == oldvd &&
+ oldvd->vdev_isspare &&
!spa_has_spare(spa, newvd->vdev_guid))
return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP));
@@ -3846,13 +3848,15 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing)
* the same (spare replaces spare, non-spare replaces
* non-spare).
*/
- if (pvd->vdev_ops == &vdev_replacing_ops)
+ if (pvd->vdev_ops == &vdev_replacing_ops &&
+ spa_version(spa) < SPA_VERSION_MULTI_REPLACE) {
return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP));
- else if (pvd->vdev_ops == &vdev_spare_ops &&
- newvd->vdev_isspare != oldvd->vdev_isspare)
+ } else if (pvd->vdev_ops == &vdev_spare_ops &&
+ newvd->vdev_isspare != oldvd->vdev_isspare) {
return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP));
- else if (pvd->vdev_ops != &vdev_spare_ops &&
- newvd->vdev_isspare)
+ }
+
+ if (newvd->vdev_isspare)
pvops = &vdev_spare_ops;
else
pvops = &vdev_replacing_ops;
@@ -3887,6 +3891,9 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing)
}
}
+ /* mark the device being resilvered */
+ newvd->vdev_resilvering = B_TRUE;
+
/*
* If the parent is not a mirror, or if we're replacing, insert the new
* mirror/replacing/spare vdev above oldvd.
@@ -3975,7 +3982,6 @@ spa_vdev_detach(spa_t *spa, uint64_t guid, uint64_t pguid, int replace_done)
vdev_t *vd, *pvd, *cvd, *tvd;
boolean_t unspare = B_FALSE;
uint64_t unspare_guid;
- size_t len;
char *vdpath;
txg = spa_vdev_enter(spa);
@@ -4007,18 +4013,11 @@ spa_vdev_detach(spa_t *spa, uint64_t guid, uint64_t pguid, int replace_done)
return (spa_vdev_exit(spa, NULL, txg, EBUSY));
/*
- * If replace_done is specified, only remove this device if it's
- * the first child of a replacing vdev. For the 'spare' vdev, either
- * disk can be removed.
+ * Only 'replacing' or 'spare' vdevs can be replaced.
*/
- if (replace_done) {
- if (pvd->vdev_ops == &vdev_replacing_ops) {
- if (vd->vdev_id != 0)
- return (spa_vdev_exit(spa, NULL, txg, ENOTSUP));
- } else if (pvd->vdev_ops != &vdev_spare_ops) {
- return (spa_vdev_exit(spa, NULL, txg, ENOTSUP));
- }
- }
+ if (replace_done && pvd->vdev_ops != &vdev_replacing_ops &&
+ pvd->vdev_ops != &vdev_spare_ops)
+ return (spa_vdev_exit(spa, NULL, txg, ENOTSUP));
ASSERT(pvd->vdev_ops != &vdev_spare_ops ||
spa_version(spa) >= SPA_VERSION_SPARES);
@@ -4045,16 +4044,22 @@ spa_vdev_detach(spa_t *spa, uint64_t guid, uint64_t pguid, int replace_done)
* check to see if we changed the original vdev's path to have "/old"
* at the end in spa_vdev_attach(). If so, undo that change now.
*/
- if (pvd->vdev_ops == &vdev_replacing_ops && vd->vdev_id == 1 &&
- pvd->vdev_child[0]->vdev_path != NULL &&
- pvd->vdev_child[1]->vdev_path != NULL) {
- ASSERT(pvd->vdev_child[1] == vd);
- cvd = pvd->vdev_child[0];
- len = strlen(vd->vdev_path);
- if (strncmp(cvd->vdev_path, vd->vdev_path, len) == 0 &&
- strcmp(cvd->vdev_path + len, "/old") == 0) {
- spa_strfree(cvd->vdev_path);
- cvd->vdev_path = spa_strdup(vd->vdev_path);
+ if (pvd->vdev_ops == &vdev_replacing_ops && vd->vdev_id > 0 &&
+ vd->vdev_path != NULL) {
+ size_t len = strlen(vd->vdev_path);
+
+ for (int c = 0; c < pvd->vdev_children; c++) {
+ cvd = pvd->vdev_child[c];
+
+ if (cvd == vd || cvd->vdev_path == NULL)
+ continue;
+
+ if (strncmp(cvd->vdev_path, vd->vdev_path, len) == 0 &&
+ strcmp(cvd->vdev_path + len, "/old") == 0) {
+ spa_strfree(cvd->vdev_path);
+ cvd->vdev_path = spa_strdup(vd->vdev_path);
+ break;
+ }
}
}
@@ -4064,7 +4069,8 @@ spa_vdev_detach(spa_t *spa, uint64_t guid, uint64_t pguid, int replace_done)
* active spare list for the pool.
*/
if (pvd->vdev_ops == &vdev_spare_ops &&
- vd->vdev_id == 0 && pvd->vdev_child[1]->vdev_isspare)
+ vd->vdev_id == 0 &&
+ pvd->vdev_child[pvd->vdev_children - 1]->vdev_isspare)
unspare = B_TRUE;
/*
@@ -4086,7 +4092,7 @@ spa_vdev_detach(spa_t *spa, uint64_t guid, uint64_t pguid, int replace_done)
/*
* Remember one of the remaining children so we can get tvd below.
*/
- cvd = pvd->vdev_child[0];
+ cvd = pvd->vdev_child[pvd->vdev_children - 1];
/*
* If we need to remove the remaining child from the list of hot spares,
@@ -4102,14 +4108,20 @@ spa_vdev_detach(spa_t *spa, uint64_t guid, uint64_t pguid, int replace_done)
spa_spare_remove(cvd);
unspare_guid = cvd->vdev_guid;
(void) spa_vdev_remove(spa, unspare_guid, B_TRUE);
+ cvd->vdev_unspare = B_TRUE;
}
/*
* If the parent mirror/replacing vdev only has one child,
* the parent is no longer needed. Remove it from the tree.
*/
- if (pvd->vdev_children == 1)
+ if (pvd->vdev_children == 1) {
+ if (pvd->vdev_ops == &vdev_spare_ops)
+ cvd->vdev_unspare = B_FALSE;
vdev_remove_parent(cvd);
+ cvd->vdev_resilvering = B_FALSE;
+ }
+
/*
* We don't set tvd until now because the parent we just removed
@@ -4151,6 +4163,9 @@ spa_vdev_detach(spa_t *spa, uint64_t guid, uint64_t pguid, int replace_done)
spa_event_notify(spa, vd, ESC_ZFS_VDEV_REMOVE);
+ /* hang on to the spa before we release the lock */
+ spa_open_ref(spa, FTAG);
+
error = spa_vdev_exit(spa, vd, txg, 0);
spa_history_log_internal(LOG_POOL_VDEV_DETACH, spa, NULL,
@@ -4163,24 +4178,31 @@ spa_vdev_detach(spa_t *spa, uint64_t guid, uint64_t pguid, int replace_done)
* list of every other pool.
*/
if (unspare) {
- spa_t *myspa = spa;
- spa = NULL;
+ spa_t *altspa = NULL;
+
mutex_enter(&spa_namespace_lock);
- while ((spa = spa_next(spa)) != NULL) {
- if (spa->spa_state != POOL_STATE_ACTIVE)
- continue;
- if (spa == myspa)
+ while ((altspa = spa_next(altspa)) != NULL) {
+ if (altspa->spa_state != POOL_STATE_ACTIVE ||
+ altspa == spa)
continue;
- spa_open_ref(spa, FTAG);
+
+ spa_open_ref(altspa, FTAG);
mutex_exit(&spa_namespace_lock);
- (void) spa_vdev_remove(spa, unspare_guid,
- B_TRUE);
+ (void) spa_vdev_remove(altspa, unspare_guid, B_TRUE);
mutex_enter(&spa_namespace_lock);
- spa_close(spa, FTAG);
+ spa_close(altspa, FTAG);
}
mutex_exit(&spa_namespace_lock);
+
+ /* search the rest of the vdevs for spares to remove */
+ spa_vdev_resilver_done(spa);
}
+ /* all done with the spa; OK to release */
+ mutex_enter(&spa_namespace_lock);
+ spa_close(spa, FTAG);
+ mutex_exit(&spa_namespace_lock);
+
return (error);
}
@@ -4728,11 +4750,18 @@ spa_vdev_resilver_done_hunt(vdev_t *vd)
}
/*
- * Check for a completed replacement.
+ * Check for a completed replacement. We always consider the first
+ * vdev in the list to be the oldest vdev, and the last one to be
+ * the newest (see spa_vdev_attach() for how that works). In
+ * the case where the newest vdev is faulted, we will not automatically
+ * remove it after a resilver completes. This is OK as it will require
+ * user intervention to determine which disk the admin wishes to keep.
*/
- if (vd->vdev_ops == &vdev_replacing_ops && vd->vdev_children == 2) {
+ if (vd->vdev_ops == &vdev_replacing_ops) {
+ ASSERT(vd->vdev_children > 1);
+
+ newvd = vd->vdev_child[vd->vdev_children - 1];
oldvd = vd->vdev_child[0];
- newvd = vd->vdev_child[1];
if (vdev_dtl_empty(newvd, DTL_MISSING) &&
vdev_dtl_empty(newvd, DTL_OUTAGE) &&
@@ -4743,16 +4772,41 @@ spa_vdev_resilver_done_hunt(vdev_t *vd)
/*
* Check for a completed resilver with the 'unspare' flag set.
*/
- if (vd->vdev_ops == &vdev_spare_ops && vd->vdev_children == 2) {
- newvd = vd->vdev_child[0];
- oldvd = vd->vdev_child[1];
+ if (vd->vdev_ops == &vdev_spare_ops) {
+ vdev_t *first = vd->vdev_child[0];
+ vdev_t *last = vd->vdev_child[vd->vdev_children - 1];
+
+ if (last->vdev_unspare) {
+ oldvd = first;
+ newvd = last;
+ } else if (first->vdev_unspare) {
+ oldvd = last;
+ newvd = first;
+ } else {
+ oldvd = NULL;
+ }
- if (newvd->vdev_unspare &&
+ if (oldvd != NULL &&
vdev_dtl_empty(newvd, DTL_MISSING) &&
vdev_dtl_empty(newvd, DTL_OUTAGE) &&
- !vdev_dtl_required(oldvd)) {
- newvd->vdev_unspare = 0;
+ !vdev_dtl_required(oldvd))
return (oldvd);
+
+ /*
+ * If there are more than two spares attached to a disk,
+ * and those spares are not required, then we want to
+ * attempt to free them up now so that they can be used
+ * by other pools. Once we're back down to a single
+ * disk+spare, we stop removing them.
+ */
+ if (vd->vdev_children > 2) {
+ newvd = vd->vdev_child[1];
+
+ if (newvd->vdev_isspare && last->vdev_isspare &&
+ vdev_dtl_empty(last, DTL_MISSING) &&
+ vdev_dtl_empty(last, DTL_OUTAGE) &&
+ !vdev_dtl_required(newvd))
+ return (newvd);
}
}
@@ -4779,9 +4833,9 @@ spa_vdev_resilver_done(spa_t *spa)
* we need to detach the parent's first child (the original hot
* spare) as well.
*/
- if (ppvd->vdev_ops == &vdev_spare_ops && pvd->vdev_id == 0) {
+ if (ppvd->vdev_ops == &vdev_spare_ops && pvd->vdev_id == 0 &&
+ ppvd->vdev_children == 2) {
ASSERT(pvd->vdev_ops == &vdev_replacing_ops);
- ASSERT(ppvd->vdev_children == 2);
sguid = ppvd->vdev_child[1]->vdev_guid;
}
spa_config_exit(spa, SCL_ALL, FTAG);
diff --git a/usr/src/uts/common/fs/zfs/sys/vdev_impl.h b/usr/src/uts/common/fs/zfs/sys/vdev_impl.h
index 63d1b3eeda..161bd21f05 100644
--- a/usr/src/uts/common/fs/zfs/sys/vdev_impl.h
+++ b/usr/src/uts/common/fs/zfs/sys/vdev_impl.h
@@ -169,6 +169,7 @@ struct vdev {
uint64_t vdev_faulted; /* persistent faulted state */
uint64_t vdev_degraded; /* persistent degraded state */
uint64_t vdev_removed; /* persistent removed state */
+ uint64_t vdev_resilvering; /* persistent resilvering state */
uint64_t vdev_nparity; /* number of parity devices for raidz */
char *vdev_path; /* vdev path (if any) */
char *vdev_devid; /* vdev devid (if any) */
diff --git a/usr/src/uts/common/fs/zfs/vdev.c b/usr/src/uts/common/fs/zfs/vdev.c
index 7c6892b530..600da9a83d 100644
--- a/usr/src/uts/common/fs/zfs/vdev.c
+++ b/usr/src/uts/common/fs/zfs/vdev.c
@@ -518,6 +518,9 @@ vdev_alloc(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, uint_t id,
(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_OFFLINE,
&vd->vdev_offline);
+ (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_RESILVERING,
+ &vd->vdev_resilvering);
+
/*
* When importing a pool, we want to ignore the persistent fault
* state, as the diagnosis made on another system may not be
@@ -1849,6 +1852,9 @@ vdev_dtl_required(vdev_t *vd)
vd->vdev_cant_read = cant_read;
vdev_dtl_reassess(tvd, 0, 0, B_FALSE);
+ if (!required && zio_injection_enabled)
+ required = !!zio_handle_device_injection(vd, NULL, ECHILD);
+
return (required);
}
@@ -2940,12 +2946,13 @@ vdev_set_state(vdev_t *vd, boolean_t isopen, vdev_state_t state, vdev_aux_t aux)
vd->vdev_removed = B_TRUE;
} else if (state == VDEV_STATE_CANT_OPEN) {
/*
- * If we fail to open a vdev during an import, we mark it as
- * "not available", which signifies that it was never there to
- * begin with. Failure to open such a device is not considered
- * an error.
+ * If we fail to open a vdev during an import or recovery, we
+ * mark it as "not available", which signifies that it was
+ * never there to begin with. Failure to open such a device
+ * is not considered an error.
*/
- if (spa_load_state(spa) == SPA_LOAD_IMPORT &&
+ if ((spa_load_state(spa) == SPA_LOAD_IMPORT ||
+ spa_load_state(spa) == SPA_LOAD_RECOVER) &&
vd->vdev_ops->vdev_op_leaf)
vd->vdev_not_present = 1;
diff --git a/usr/src/uts/common/fs/zfs/vdev_label.c b/usr/src/uts/common/fs/zfs/vdev_label.c
index 75ec545345..ec5ec6ee34 100644
--- a/usr/src/uts/common/fs/zfs/vdev_label.c
+++ b/usr/src/uts/common/fs/zfs/vdev_label.c
@@ -353,6 +353,9 @@ vdev_config_generate(spa_t *spa, vdev_t *vd, boolean_t getstats,
if (vd->vdev_offline && !vd->vdev_tmpoffline)
VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_OFFLINE,
B_TRUE) == 0);
+ if (vd->vdev_resilvering)
+ VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_RESILVERING,
+ B_TRUE) == 0);
if (vd->vdev_faulted)
VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_FAULTED,
B_TRUE) == 0);
diff --git a/usr/src/uts/common/sys/fs/zfs.h b/usr/src/uts/common/sys/fs/zfs.h
index 8b03fb0f9d..a6a3484ffc 100644
--- a/usr/src/uts/common/sys/fs/zfs.h
+++ b/usr/src/uts/common/sys/fs/zfs.h
@@ -336,14 +336,16 @@ typedef enum {
#define SPA_VERSION_25 25ULL
#define SPA_VERSION_26 26ULL
#define SPA_VERSION_27 27ULL
+#define SPA_VERSION_28 28ULL
+
/*
* When bumping up SPA_VERSION, make sure GRUB ZFS understands the on-disk
* format change. Go to usr/src/grub/grub-0.97/stage2/{zfs-include/, fsys_zfs*},
* and do the appropriate changes. Also bump the version number in
* usr/src/grub/capability.
*/
-#define SPA_VERSION SPA_VERSION_27
-#define SPA_VERSION_STRING "27"
+#define SPA_VERSION SPA_VERSION_28
+#define SPA_VERSION_STRING "28"
/*
* Symbolic names for the changes that caused a SPA_VERSION switch.
@@ -393,6 +395,7 @@ typedef enum {
#define SPA_VERSION_DIR_CLONES SPA_VERSION_26
#define SPA_VERSION_DEADLISTS SPA_VERSION_26
#define SPA_VERSION_FAST_SNAP SPA_VERSION_27
+#define SPA_VERSION_MULTI_REPLACE SPA_VERSION_28
/*
* ZPL version - rev'd whenever an incompatible on-disk format change
@@ -483,6 +486,7 @@ typedef struct zpool_rewind_policy {
#define ZPOOL_CONFIG_SPLIT_GUID "split_guid"
#define ZPOOL_CONFIG_SPLIT_LIST "guid_list"
#define ZPOOL_CONFIG_REMOVING "removing"
+#define ZPOOL_CONFIG_RESILVERING "resilvering"
#define ZPOOL_CONFIG_SUSPENDED "suspended" /* not stored on disk */
#define ZPOOL_CONFIG_TIMESTAMP "timestamp" /* not stored on disk */
#define ZPOOL_CONFIG_BOOTFS "bootfs" /* not stored on disk */