diff options
| author | George Wilson <George.Wilson@Sun.COM> | 2009-06-08 10:35:50 -0700 |
|---|---|---|
| committer | George Wilson <George.Wilson@Sun.COM> | 2009-06-08 10:35:50 -0700 |
| commit | 573ca77e53dd31dcaebef023e7eb41969e6896c1 (patch) | |
| tree | d32384ceb45e66779de3cd1b448c9cc1a2a18236 /usr/src/cmd/ztest | |
| parent | b509e89b2befbaa42939abad9da1d7f5a8c6aaae (diff) | |
| download | illumos-joyent-573ca77e53dd31dcaebef023e7eb41969e6896c1.tar.gz | |
PSARC 2008/353 zpool autoexpand property
6475340 when lun expands, zfs should expand too
6563887 in-place replacement allows for smaller devices
6606879 should be able to grow pool without a reboot or export/import
6844090 zfs should be able to mirror to a smaller disk
Diffstat (limited to 'usr/src/cmd/ztest')
| -rw-r--r-- | usr/src/cmd/ztest/ztest.c | 211 |
1 files changed, 182 insertions, 29 deletions
diff --git a/usr/src/cmd/ztest/ztest.c b/usr/src/cmd/ztest/ztest.c index cb59116d42..746db0c070 100644 --- a/usr/src/cmd/ztest/ztest.c +++ b/usr/src/cmd/ztest/ztest.c @@ -954,7 +954,7 @@ ztest_vdev_aux_add_remove(ztest_args_t *za) * of devices that have pending state changes. */ if (ztest_random(2) == 0) - (void) vdev_online(spa, guid, B_FALSE, NULL); + (void) vdev_online(spa, guid, 0, NULL); error = spa_vdev_remove(spa, guid, B_FALSE); if (error != 0 && error != EBUSY) @@ -1032,7 +1032,7 @@ ztest_vdev_attach_detach(ztest_args_t *za) } oldguid = oldvd->vdev_guid; - oldsize = vdev_get_rsize(oldvd); + oldsize = vdev_get_min_asize(oldvd); oldvd_is_log = oldvd->vdev_top->vdev_islog; (void) strcpy(oldpath, oldvd->vdev_path); pvd = oldvd->vdev_parent; @@ -1068,7 +1068,7 @@ ztest_vdev_attach_detach(ztest_args_t *za) } if (newvd) { - newsize = vdev_get_rsize(newvd); + newsize = vdev_get_min_asize(newvd); } else { /* * Make newsize a little bigger or smaller than oldsize. @@ -1144,49 +1144,202 @@ ztest_vdev_attach_detach(ztest_args_t *za) } /* + * Callback function which expands the physical size of the vdev. + */ +vdev_t * +grow_vdev(vdev_t *vd, void *arg) +{ + spa_t *spa = vd->vdev_spa; + size_t *newsize = arg; + size_t fsize; + int fd; + + ASSERT(spa_config_held(spa, SCL_STATE, RW_READER) == SCL_STATE); + ASSERT(vd->vdev_ops->vdev_op_leaf); + + if ((fd = open(vd->vdev_path, O_RDWR)) == -1) + return (vd); + + fsize = lseek(fd, 0, SEEK_END); + (void) ftruncate(fd, *newsize); + + if (zopt_verbose >= 6) { + (void) printf("%s grew from %lu to %lu bytes\n", + vd->vdev_path, (ulong_t)fsize, (ulong_t)*newsize); + } + (void) close(fd); + return (NULL); +} + +/* + * Callback function which expands a given vdev by calling vdev_online(). + */ +/* ARGSUSED */ +vdev_t * +online_vdev(vdev_t *vd, void *arg) +{ + spa_t *spa = vd->vdev_spa; + vdev_t *tvd = vd->vdev_top; + vdev_t *pvd = vd->vdev_parent; + uint64_t guid = vd->vdev_guid; + + ASSERT(spa_config_held(spa, SCL_STATE, RW_READER) == SCL_STATE); + ASSERT(vd->vdev_ops->vdev_op_leaf); + + /* Calling vdev_online will initialize the new metaslabs */ + spa_config_exit(spa, SCL_STATE, spa); + (void) vdev_online(spa, guid, ZFS_ONLINE_EXPAND, NULL); + spa_config_enter(spa, SCL_STATE, spa, RW_READER); + + /* + * Since we dropped the lock we need to ensure that we're + * still talking to the original vdev. It's possible this + * vdev may have been detached/replaced while we were + * trying to online it. + */ + if (vd != vdev_lookup_by_guid(tvd, guid) || vd->vdev_parent != pvd) { + if (zopt_verbose >= 6) { + (void) printf("vdev %p has disappeared, was " + "guid %llu\n", (void *)vd, (u_longlong_t)guid); + } + return (vd); + } + return (NULL); +} + +/* + * Traverse the vdev tree calling the supplied function. + * We continue to walk the tree until we either have walked all + * children or we receive a non-NULL return from the callback. + * If a NULL callback is passed, then we just return back the first + * leaf vdev we encounter. + */ +vdev_t * +vdev_walk_tree(vdev_t *vd, vdev_t *(*func)(vdev_t *, void *), void *arg) +{ + if (vd->vdev_ops->vdev_op_leaf) { + if (func == NULL) + return (vd); + else + return (func(vd, arg)); + } + + for (uint_t c = 0; c < vd->vdev_children; c++) { + vdev_t *cvd = vd->vdev_child[c]; + if ((cvd = vdev_walk_tree(cvd, func, arg)) != NULL) + return (cvd); + } + return (NULL); +} + +/* * Verify that dynamic LUN growth works as expected. */ void ztest_vdev_LUN_growth(ztest_args_t *za) { spa_t *spa = za->za_spa; - char dev_name[MAXPATHLEN]; - uint64_t leaves = MAX(zopt_mirrors, 1) * zopt_raidz; - uint64_t vdev; - size_t fsize; - int fd; + vdev_t *vd, *tvd = NULL; + size_t psize, newsize; + uint64_t spa_newsize, spa_cursize, ms_count; (void) mutex_lock(&ztest_shared->zs_vdev_lock); + mutex_enter(&spa_namespace_lock); + spa_config_enter(spa, SCL_STATE, spa, RW_READER); + + while (tvd == NULL || tvd->vdev_islog) { + uint64_t vdev; + + vdev = ztest_random(spa->spa_root_vdev->vdev_children); + tvd = spa->spa_root_vdev->vdev_child[vdev]; + } /* - * Pick a random leaf vdev. + * Determine the size of the first leaf vdev associated with + * our top-level device. */ - spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER); - vdev = ztest_random(spa->spa_root_vdev->vdev_children * leaves); - spa_config_exit(spa, SCL_VDEV, FTAG); + vd = vdev_walk_tree(tvd, NULL, NULL); + ASSERT3P(vd, !=, NULL); + ASSERT(vd->vdev_ops->vdev_op_leaf); - (void) sprintf(dev_name, ztest_dev_template, zopt_dir, zopt_pool, vdev); + psize = vd->vdev_psize; - if ((fd = open(dev_name, O_RDWR)) != -1) { - /* - * Determine the size. - */ - fsize = lseek(fd, 0, SEEK_END); + /* + * We only try to expand the vdev if it's less than 4x its + * original size and it has a valid psize. + */ + if (psize == 0 || psize >= 4 * zopt_vdev_size) { + spa_config_exit(spa, SCL_STATE, spa); + mutex_exit(&spa_namespace_lock); + (void) mutex_unlock(&ztest_shared->zs_vdev_lock); + return; + } + ASSERT(psize > 0); + newsize = psize + psize / 8; + ASSERT3U(newsize, >, psize); - /* - * If it's less than 2x the original size, grow by around 3%. - */ - if (fsize < 2 * zopt_vdev_size) { - size_t newsize = fsize + ztest_random(fsize / 32); - (void) ftruncate(fd, newsize); - if (zopt_verbose >= 6) { - (void) printf("%s grew from %lu to %lu bytes\n", - dev_name, (ulong_t)fsize, (ulong_t)newsize); - } + if (zopt_verbose >= 6) { + (void) printf("Expanding vdev %s from %lu to %lu\n", + vd->vdev_path, (ulong_t)psize, (ulong_t)newsize); + } + + spa_cursize = spa_get_space(spa); + ms_count = tvd->vdev_ms_count; + + /* + * Growing the vdev is a two step process: + * 1). expand the physical size (i.e. relabel) + * 2). online the vdev to create the new metaslabs + */ + if (vdev_walk_tree(tvd, grow_vdev, &newsize) != NULL || + vdev_walk_tree(tvd, online_vdev, NULL) != NULL || + tvd->vdev_state != VDEV_STATE_HEALTHY) { + if (zopt_verbose >= 5) { + (void) printf("Could not expand LUN because " + "some vdevs were not healthy\n"); } - (void) close(fd); + (void) spa_config_exit(spa, SCL_STATE, spa); + mutex_exit(&spa_namespace_lock); + (void) mutex_unlock(&ztest_shared->zs_vdev_lock); + return; } + (void) spa_config_exit(spa, SCL_STATE, spa); + mutex_exit(&spa_namespace_lock); + + /* + * Expanding the LUN will update the config asynchronously, + * thus we must wait for the async thread to complete any + * pending tasks before proceeding. + */ + mutex_enter(&spa->spa_async_lock); + while (spa->spa_async_thread != NULL || spa->spa_async_tasks) + cv_wait(&spa->spa_async_cv, &spa->spa_async_lock); + mutex_exit(&spa->spa_async_lock); + + spa_config_enter(spa, SCL_STATE, spa, RW_READER); + spa_newsize = spa_get_space(spa); + + /* + * Make sure we were able to grow the pool. + */ + if (ms_count >= tvd->vdev_ms_count || + spa_cursize >= spa_newsize) { + (void) printf("Top-level vdev metaslab count: " + "before %llu, after %llu\n", + (u_longlong_t)ms_count, + (u_longlong_t)tvd->vdev_ms_count); + fatal(0, "LUN expansion failed: before %llu, " + "after %llu\n", spa_cursize, spa_newsize); + } else if (zopt_verbose >= 5) { + char oldnumbuf[6], newnumbuf[6]; + + nicenum(spa_cursize, oldnumbuf); + nicenum(spa_newsize, newnumbuf); + (void) printf("%s grew from %s to %s\n", + spa->spa_name, oldnumbuf, newnumbuf); + } + spa_config_exit(spa, SCL_STATE, spa); (void) mutex_unlock(&ztest_shared->zs_vdev_lock); } |
