diff options
author | George Wilson <george.wilson@delphix.com> | 2013-08-07 10:24:34 -0800 |
---|---|---|
committer | Christopher Siden <chris.siden@delphix.com> | 2013-08-07 11:24:34 -0700 |
commit | 2c1e2b44148432fb7a509dd216a99299b6740250 (patch) | |
tree | bdf8fde7252bf0b5e6247b84f77c05ca42bc463c | |
parent | 98144673ce45bddc6d5dbe7e2afab720c660b5d7 (diff) | |
download | illumos-joyent-2c1e2b44148432fb7a509dd216a99299b6740250.tar.gz |
3949 ztest fault injection should avoid resilvering devices
3950 ztest: deadman fires when we're doing a scan
3951 ztest hang when running dedup test
3952 ztest: ztest_reguid test and ztest_fault_inject don't place nice together
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Reviewed by: Adam Leventhal <ahl@delphix.com>
Approved by: Richard Lowe <richlowe@richlowe.net>
-rw-r--r-- | usr/src/cmd/ztest/ztest.c | 52 | ||||
-rw-r--r-- | usr/src/lib/libzpool/common/llib-lzpool | 1 | ||||
-rw-r--r-- | usr/src/uts/common/fs/zfs/spa.c | 10 | ||||
-rw-r--r-- | usr/src/uts/common/fs/zfs/vdev_file.c | 5 |
4 files changed, 56 insertions, 12 deletions
diff --git a/usr/src/cmd/ztest/ztest.c b/usr/src/cmd/ztest/ztest.c index 3aca1fe0c9..22717410ea 100644 --- a/usr/src/cmd/ztest/ztest.c +++ b/usr/src/cmd/ztest/ztest.c @@ -184,6 +184,7 @@ static const ztest_shared_opts_t ztest_opts_defaults = { extern uint64_t metaslab_gang_bang; extern uint64_t metaslab_df_alloc_threshold; +extern uint64_t zfs_deadman_synctime; static ztest_shared_opts_t *ztest_shared_opts; static ztest_shared_opts_t ztest_opts; @@ -363,7 +364,7 @@ ztest_info_t ztest_info[] = { { ztest_fault_inject, 1, &zopt_sometimes }, { ztest_ddt_repair, 1, &zopt_sometimes }, { ztest_dmu_snapshot_hold, 1, &zopt_sometimes }, - { ztest_reguid, 1, &zopt_sometimes }, + { ztest_reguid, 1, &zopt_rarely }, { ztest_spa_rename, 1, &zopt_rarely }, { ztest_scrub, 1, &zopt_rarely }, { ztest_spa_upgrade, 1, &zopt_rarely }, @@ -4754,6 +4755,14 @@ ztest_fault_inject(ztest_ds_t *zd, uint64_t id) ASSERT(leaves >= 1); /* + * Grab the name lock as reader. There are some operations + * which don't like to have their vdevs changed while + * they are in progress (i.e. spa_change_guid). Those + * operations will have grabbed the name lock as writer. + */ + (void) rw_rdlock(&ztest_name_lock); + + /* * We need SCL_STATE here because we're going to look at vd0->vdev_tsd. */ spa_config_enter(spa, SCL_STATE, FTAG, RW_READER); @@ -4782,7 +4791,14 @@ ztest_fault_inject(ztest_ds_t *zd, uint64_t id) if (vd0 != NULL && vd0->vdev_top->vdev_islog) islog = B_TRUE; - if (vd0 != NULL && maxfaults != 1) { + /* + * If the top-level vdev needs to be resilvered + * then we only allow faults on the device that is + * resilvering. + */ + if (vd0 != NULL && maxfaults != 1 && + (!vdev_resilver_needed(vd0->vdev_top, NULL, NULL) || + vd0->vdev_resilvering)) { /* * Make vd0 explicitly claim to be unreadable, * or unwriteable, or reach behind its back @@ -4813,6 +4829,7 @@ ztest_fault_inject(ztest_ds_t *zd, uint64_t id) if (sav->sav_count == 0) { spa_config_exit(spa, SCL_STATE, FTAG); + (void) rw_unlock(&ztest_name_lock); return; } vd0 = sav->sav_vdevs[ztest_random(sav->sav_count)]; @@ -4826,6 +4843,7 @@ ztest_fault_inject(ztest_ds_t *zd, uint64_t id) } spa_config_exit(spa, SCL_STATE, FTAG); + (void) rw_unlock(&ztest_name_lock); /* * If we can tolerate two or more faults, or we're dealing @@ -5290,16 +5308,33 @@ static void * ztest_deadman_thread(void *arg) { ztest_shared_t *zs = arg; - int grace = 300; - hrtime_t delta; + spa_t *spa = ztest_spa; + hrtime_t delta, total = 0; - delta = (zs->zs_thread_stop - zs->zs_thread_start) / NANOSEC + grace; + for (;;) { + delta = (zs->zs_thread_stop - zs->zs_thread_start) / + NANOSEC + zfs_deadman_synctime; - (void) poll(NULL, 0, (int)(1000 * delta)); + (void) poll(NULL, 0, (int)(1000 * delta)); - fatal(0, "failed to complete within %d seconds of deadline", grace); + /* + * If the pool is suspended then fail immediately. Otherwise, + * check to see if the pool is making any progress. If + * vdev_deadman() discovers that there hasn't been any recent + * I/Os then it will end up aborting the tests. + */ + if (spa_suspended(spa)) { + fatal(0, "aborting test after %llu seconds because " + "pool has transitioned to a suspended state.", + zfs_deadman_synctime); + return (NULL); + } + vdev_deadman(spa->spa_root_vdev); - return (NULL); + total += zfs_deadman_synctime; + (void) printf("ztest has been running for %lld seconds\n", + total); + } } static void @@ -6024,6 +6059,7 @@ main(int argc, char **argv) (void) setvbuf(stdout, NULL, _IOLBF, 0); dprintf_setup(&argc, argv); + zfs_deadman_synctime = 300; ztest_fd_rand = open("/dev/urandom", O_RDONLY); ASSERT3S(ztest_fd_rand, >=, 0); diff --git a/usr/src/lib/libzpool/common/llib-lzpool b/usr/src/lib/libzpool/common/llib-lzpool index d3864d2a9a..7e61b55a91 100644 --- a/usr/src/lib/libzpool/common/llib-lzpool +++ b/usr/src/lib/libzpool/common/llib-lzpool @@ -64,3 +64,4 @@ extern uint64_t metaslab_gang_bang; extern uint64_t metaslab_df_alloc_threshold; extern boolean_t zfeature_checks_disable; +extern uint64_t zfs_deadman_synctime; diff --git a/usr/src/uts/common/fs/zfs/spa.c b/usr/src/uts/common/fs/zfs/spa.c index 7334d39516..738a8a2a26 100644 --- a/usr/src/uts/common/fs/zfs/spa.c +++ b/usr/src/uts/common/fs/zfs/spa.c @@ -757,6 +757,7 @@ spa_change_guid(spa_t *spa) int error; uint64_t guid; + mutex_enter(&spa->spa_vdev_top_lock); mutex_enter(&spa_namespace_lock); guid = spa_generate_guid(NULL); @@ -769,6 +770,7 @@ spa_change_guid(spa_t *spa) } mutex_exit(&spa_namespace_lock); + mutex_exit(&spa->spa_vdev_top_lock); return (error); } @@ -4674,7 +4676,6 @@ spa_vdev_detach(spa_t *spa, uint64_t guid, uint64_t pguid, int replace_done) if (pvd->vdev_ops == &vdev_spare_ops) cvd->vdev_unspare = B_FALSE; vdev_remove_parent(cvd); - cvd->vdev_resilvering = B_FALSE; } @@ -5302,6 +5303,13 @@ spa_vdev_resilver_done_hunt(vdev_t *vd) return (oldvd); } + if (vd->vdev_resilvering && vdev_dtl_empty(vd, DTL_MISSING) && + vdev_dtl_empty(vd, DTL_OUTAGE)) { + ASSERT(vd->vdev_ops->vdev_op_leaf); + vd->vdev_resilvering = B_FALSE; + vdev_config_dirty(vd->vdev_top); + } + /* * Check for a completed replacement. We always consider the first * vdev in the list to be the oldest vdev, and the last one to be diff --git a/usr/src/uts/common/fs/zfs/vdev_file.c b/usr/src/uts/common/fs/zfs/vdev_file.c index 2e67544345..a05abeb9d9 100644 --- a/usr/src/uts/common/fs/zfs/vdev_file.c +++ b/usr/src/uts/common/fs/zfs/vdev_file.c @@ -185,7 +185,6 @@ vdev_file_io_strategy(void *arg) static int vdev_file_io_start(zio_t *zio) { - spa_t *spa = zio->io_spa; vdev_t *vd = zio->io_vd; vdev_file_t *vf = vd->vdev_tsd; vdev_buf_t *vb; @@ -224,8 +223,8 @@ vdev_file_io_start(zio_t *zio) bp->b_private = vf->vf_vnode; bp->b_iodone = (int (*)())vdev_file_io_intr; - spa_taskq_dispatch_ent(spa, ZIO_TYPE_FREE, ZIO_TASKQ_ISSUE, - vdev_file_io_strategy, bp, 0, &zio->io_tqent); + VERIFY3U(taskq_dispatch(system_taskq, vdev_file_io_strategy, bp, + TQ_SLEEP), !=, 0); return (ZIO_PIPELINE_STOP); } |