diff options
author | Keith M Wesolowski <wesolows@foobazco.org> | 2013-08-07 22:53:10 +0000 |
---|---|---|
committer | Keith M Wesolowski <wesolows@foobazco.org> | 2013-08-07 22:53:13 +0000 |
commit | 8d105dfdc34afdf1754018fe761746fa9c7ddb29 (patch) | |
tree | f2ae1738ba44bd9942a8f94a9a0e965e327ef02b | |
parent | 1bfc1ecf10a7a4ecdd3e74fedc5fea9a61f9bd57 (diff) | |
parent | b4952e17e8858d3225793b28788278de9fe6038d (diff) | |
download | illumos-joyent-8d105dfdc34afdf1754018fe761746fa9c7ddb29.tar.gz |
[illumos-gate merge]
commit b4952e17e8858d3225793b28788278de9fe6038d
3956 ::vdev -r should work with pipelines
3957 ztest should update the cachefile before killing itself
3958 multiple scans can lead to partial resilvering
3959 ddt entries are not always resilvered
3960 dsl_scan can skip over dedup-ed blocks if physical birth != logical birth
3961 freed gang blocks are not resilvered and can cause pool to suspend
3962 ztest should print out zfs debug buffer before exiting
commit be9000cc677e0a8d04e5be45c61d7370fc8c7b54
3955 ztest failure: assertion refcount_count(&tx->tx_space_written) + delta <= tx->tx_space_towrite
commit 2c1e2b44148432fb7a509dd216a99299b6740250
3949 ztest fault injection should avoid resilvering devices
3950 ztest: deadman fires when we're doing a scan
3951 ztest hang when running dedup test
3952 ztest: ztest_reguid test and ztest_fault_inject don't place nice together
commit 98144673ce45bddc6d5dbe7e2afab720c660b5d7
3992 mdb ::stacks segv
-rw-r--r-- | usr/src/cmd/mdb/common/modules/genunix/findstack.c | 5 | ||||
-rw-r--r-- | usr/src/cmd/mdb/common/modules/zfs/zfs.c | 2 | ||||
-rw-r--r-- | usr/src/cmd/ztest/ztest.c | 74 | ||||
-rw-r--r-- | usr/src/lib/libzpool/common/llib-lzpool | 1 | ||||
-rw-r--r-- | usr/src/uts/common/fs/zfs/dmu_tx.c | 4 | ||||
-rw-r--r-- | usr/src/uts/common/fs/zfs/dsl_scan.c | 21 | ||||
-rw-r--r-- | usr/src/uts/common/fs/zfs/spa.c | 7 | ||||
-rw-r--r-- | usr/src/uts/common/fs/zfs/spa_config.c | 9 | ||||
-rw-r--r-- | usr/src/uts/common/fs/zfs/sys/dsl_scan.h | 31 | ||||
-rw-r--r-- | usr/src/uts/common/fs/zfs/sys/vdev_impl.h | 4 | ||||
-rw-r--r-- | usr/src/uts/common/fs/zfs/sys/zfs_debug.h | 3 | ||||
-rw-r--r-- | usr/src/uts/common/fs/zfs/vdev.c | 100 | ||||
-rw-r--r-- | usr/src/uts/common/fs/zfs/vdev_file.c | 5 | ||||
-rw-r--r-- | usr/src/uts/common/fs/zfs/vdev_label.c | 107 | ||||
-rw-r--r-- | usr/src/uts/common/fs/zfs/zfs_debug.c | 15 | ||||
-rw-r--r-- | usr/src/uts/common/sys/fs/zfs.h | 4 |
16 files changed, 289 insertions, 103 deletions
diff --git a/usr/src/cmd/mdb/common/modules/genunix/findstack.c b/usr/src/cmd/mdb/common/modules/genunix/findstack.c index 223764eda9..12b90b8f7d 100644 --- a/usr/src/cmd/mdb/common/modules/genunix/findstack.c +++ b/usr/src/cmd/mdb/common/modules/genunix/findstack.c @@ -21,6 +21,7 @@ /* * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2013, Josef 'Jeff' Sipek <jeffpc@josefsipek.net> */ #include <mdb/mdb_modapi.h> @@ -340,6 +341,8 @@ stacks_cleanup(int force) mdb_free(stacks_array, stacks_array_size * sizeof (*stacks_array)); + mdb_free(stacks_hash, STACKS_HSIZE * sizeof (*stacks_hash)); + } else if (stacks_array != NULL) { for (idx = 0; idx < stacks_array_size; idx++) { if ((cur = stacks_array[idx]) != NULL) { @@ -360,6 +363,8 @@ stacks_cleanup(int force) stacks_array_size = 0; stacks_state = STACKS_STATE_CLEAN; + stacks_hash = NULL; + stacks_array = NULL; } /*ARGSUSED*/ diff --git a/usr/src/cmd/mdb/common/modules/zfs/zfs.c b/usr/src/cmd/mdb/common/modules/zfs/zfs.c index b04b05d75a..b3cf1d5f0b 100644 --- a/usr/src/cmd/mdb/common/modules/zfs/zfs.c +++ b/usr/src/cmd/mdb/common/modules/zfs/zfs.c @@ -1140,7 +1140,7 @@ do_print_vdev(uintptr_t addr, int flags, int depth, int stats, } if (flags & DCMD_PIPE_OUT) { - mdb_printf("%#lr", addr); + mdb_printf("%#lr\n", addr); } else { if (vdev.vdev_path != NULL) { if (mdb_readstr(desc, sizeof (desc), diff --git a/usr/src/cmd/ztest/ztest.c b/usr/src/cmd/ztest/ztest.c index 3aca1fe0c9..390061a8c5 100644 --- a/usr/src/cmd/ztest/ztest.c +++ b/usr/src/cmd/ztest/ztest.c @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012 by Delphix. All rights reserved. + * Copyright (c) 2013 by Delphix. All rights reserved. * Copyright 2011 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2013 Steven Hartland. All rights reserved. */ @@ -184,6 +184,7 @@ static const ztest_shared_opts_t ztest_opts_defaults = { extern uint64_t metaslab_gang_bang; extern uint64_t metaslab_df_alloc_threshold; +extern uint64_t zfs_deadman_synctime; static ztest_shared_opts_t *ztest_shared_opts; static ztest_shared_opts_t ztest_opts; @@ -363,7 +364,7 @@ ztest_info_t ztest_info[] = { { ztest_fault_inject, 1, &zopt_sometimes }, { ztest_ddt_repair, 1, &zopt_sometimes }, { ztest_dmu_snapshot_hold, 1, &zopt_sometimes }, - { ztest_reguid, 1, &zopt_sometimes }, + { ztest_reguid, 1, &zopt_rarely }, { ztest_spa_rename, 1, &zopt_rarely }, { ztest_scrub, 1, &zopt_rarely }, { ztest_spa_upgrade, 1, &zopt_rarely }, @@ -766,6 +767,16 @@ ztest_kill(ztest_shared_t *zs) { zs->zs_alloc = metaslab_class_get_alloc(spa_normal_class(ztest_spa)); zs->zs_space = metaslab_class_get_space(spa_normal_class(ztest_spa)); + + /* + * Before we kill off ztest, make sure that the config is updated. + * See comment above spa_config_sync(). + */ + mutex_enter(&spa_namespace_lock); + spa_config_sync(ztest_spa, B_FALSE, B_FALSE); + mutex_exit(&spa_namespace_lock); + + zfs_dbgmsg_print(FTAG); (void) kill(getpid(), SIGKILL); } @@ -2730,7 +2741,7 @@ ztest_vdev_attach_detach(ztest_ds_t *zd, uint64_t id) uint64_t leaf, top; uint64_t ashift = ztest_get_ashift(); uint64_t oldguid, pguid; - size_t oldsize, newsize; + uint64_t oldsize, newsize; char oldpath[MAXPATHLEN], newpath[MAXPATHLEN]; int replacing; int oldvd_has_siblings = B_FALSE; @@ -2889,8 +2900,8 @@ ztest_vdev_attach_detach(ztest_ds_t *zd, uint64_t id) if (error != expected_error && expected_error != EBUSY) { fatal(0, "attach (%s %llu, %s %llu, %d) " "returned %d, expected %d", - oldpath, (longlong_t)oldsize, newpath, - (longlong_t)newsize, replacing, error, expected_error); + oldpath, oldsize, newpath, + newsize, replacing, error, expected_error); } VERIFY(mutex_unlock(&ztest_vdev_lock) == 0); @@ -3604,6 +3615,9 @@ ztest_dmu_read_write(ztest_ds_t *zd, uint64_t id) else dmu_tx_hold_write(tx, bigobj, bigoff, bigsize); + /* This accounts for setting the checksum/compression. */ + dmu_tx_hold_bonus(tx, bigobj); + txg = ztest_tx_assign(tx, TXG_MIGHTWAIT, FTAG); if (txg == 0) { umem_free(packbuf, packsize); @@ -4754,6 +4768,14 @@ ztest_fault_inject(ztest_ds_t *zd, uint64_t id) ASSERT(leaves >= 1); /* + * Grab the name lock as reader. There are some operations + * which don't like to have their vdevs changed while + * they are in progress (i.e. spa_change_guid). Those + * operations will have grabbed the name lock as writer. + */ + (void) rw_rdlock(&ztest_name_lock); + + /* * We need SCL_STATE here because we're going to look at vd0->vdev_tsd. */ spa_config_enter(spa, SCL_STATE, FTAG, RW_READER); @@ -4782,7 +4804,14 @@ ztest_fault_inject(ztest_ds_t *zd, uint64_t id) if (vd0 != NULL && vd0->vdev_top->vdev_islog) islog = B_TRUE; - if (vd0 != NULL && maxfaults != 1) { + /* + * If the top-level vdev needs to be resilvered + * then we only allow faults on the device that is + * resilvering. + */ + if (vd0 != NULL && maxfaults != 1 && + (!vdev_resilver_needed(vd0->vdev_top, NULL, NULL) || + vd0->vdev_resilver_txg != 0)) { /* * Make vd0 explicitly claim to be unreadable, * or unwriteable, or reach behind its back @@ -4813,6 +4842,7 @@ ztest_fault_inject(ztest_ds_t *zd, uint64_t id) if (sav->sav_count == 0) { spa_config_exit(spa, SCL_STATE, FTAG); + (void) rw_unlock(&ztest_name_lock); return; } vd0 = sav->sav_vdevs[ztest_random(sav->sav_count)]; @@ -4826,6 +4856,7 @@ ztest_fault_inject(ztest_ds_t *zd, uint64_t id) } spa_config_exit(spa, SCL_STATE, FTAG); + (void) rw_unlock(&ztest_name_lock); /* * If we can tolerate two or more faults, or we're dealing @@ -5290,16 +5321,33 @@ static void * ztest_deadman_thread(void *arg) { ztest_shared_t *zs = arg; - int grace = 300; - hrtime_t delta; + spa_t *spa = ztest_spa; + hrtime_t delta, total = 0; - delta = (zs->zs_thread_stop - zs->zs_thread_start) / NANOSEC + grace; + for (;;) { + delta = (zs->zs_thread_stop - zs->zs_thread_start) / + NANOSEC + zfs_deadman_synctime; - (void) poll(NULL, 0, (int)(1000 * delta)); + (void) poll(NULL, 0, (int)(1000 * delta)); - fatal(0, "failed to complete within %d seconds of deadline", grace); + /* + * If the pool is suspended then fail immediately. Otherwise, + * check to see if the pool is making any progress. If + * vdev_deadman() discovers that there hasn't been any recent + * I/Os then it will end up aborting the tests. + */ + if (spa_suspended(spa)) { + fatal(0, "aborting test after %llu seconds because " + "pool has transitioned to a suspended state.", + zfs_deadman_synctime); + return (NULL); + } + vdev_deadman(spa->spa_root_vdev); - return (NULL); + total += zfs_deadman_synctime; + (void) printf("ztest has been running for %lld seconds\n", + total); + } } static void @@ -5613,6 +5661,7 @@ ztest_run(ztest_shared_t *zs) zs->zs_alloc = metaslab_class_get_alloc(spa_normal_class(spa)); zs->zs_space = metaslab_class_get_space(spa_normal_class(spa)); + zfs_dbgmsg_print(FTAG); umem_free(tid, ztest_opts.zo_threads * sizeof (thread_t)); @@ -6024,6 +6073,7 @@ main(int argc, char **argv) (void) setvbuf(stdout, NULL, _IOLBF, 0); dprintf_setup(&argc, argv); + zfs_deadman_synctime = 300; ztest_fd_rand = open("/dev/urandom", O_RDONLY); ASSERT3S(ztest_fd_rand, >=, 0); diff --git a/usr/src/lib/libzpool/common/llib-lzpool b/usr/src/lib/libzpool/common/llib-lzpool index d3864d2a9a..7e61b55a91 100644 --- a/usr/src/lib/libzpool/common/llib-lzpool +++ b/usr/src/lib/libzpool/common/llib-lzpool @@ -64,3 +64,4 @@ extern uint64_t metaslab_gang_bang; extern uint64_t metaslab_df_alloc_threshold; extern boolean_t zfeature_checks_disable; +extern uint64_t zfs_deadman_synctime; diff --git a/usr/src/uts/common/fs/zfs/dmu_tx.c b/usr/src/uts/common/fs/zfs/dmu_tx.c index 84e518dfd1..d0dd730ed4 100644 --- a/usr/src/uts/common/fs/zfs/dmu_tx.c +++ b/usr/src/uts/common/fs/zfs/dmu_tx.c @@ -450,12 +450,12 @@ dmu_tx_count_free(dmu_tx_hold_t *txh, uint64_t off, uint64_t len) blkid = off >> dn->dn_datablkshift; nblks = (len + dn->dn_datablksz - 1) >> dn->dn_datablkshift; - if (blkid >= dn->dn_maxblkid) { + if (blkid > dn->dn_maxblkid) { rw_exit(&dn->dn_struct_rwlock); return; } if (blkid + nblks > dn->dn_maxblkid) - nblks = dn->dn_maxblkid - blkid; + nblks = dn->dn_maxblkid - blkid + 1; } l0span = nblks; /* save for later use to calc level > 1 overhead */ diff --git a/usr/src/uts/common/fs/zfs/dsl_scan.c b/usr/src/uts/common/fs/zfs/dsl_scan.c index 34816986a1..e1859cc34c 100644 --- a/usr/src/uts/common/fs/zfs/dsl_scan.c +++ b/usr/src/uts/common/fs/zfs/dsl_scan.c @@ -194,6 +194,7 @@ dsl_scan_setup_sync(void *arg, dmu_tx_t *tx) scn->scn_phys.scn_errors = 0; scn->scn_phys.scn_to_examine = spa->spa_root_vdev->vdev_stat.vs_alloc; scn->scn_restart_txg = 0; + scn->scn_done_txg = 0; spa_scan_stat_init(spa); if (DSL_SCAN_IS_SCRUB_RESILVER(scn)) { @@ -769,7 +770,7 @@ dsl_scan_visitbp(blkptr_t *bp, const zbookmark_t *zb, * Don't scan it now unless we need to because something * under it was modified. */ - if (bp->blk_birth <= scn->scn_phys.scn_cur_max_txg) { + if (BP_PHYSICAL_BIRTH(bp) <= scn->scn_phys.scn_cur_max_txg) { scan_funcs[scn->scn_phys.scn_func](dp, bp, zb); } if (buf) @@ -1214,7 +1215,7 @@ dsl_scan_ddt_entry(dsl_scan_t *scn, enum zio_checksum checksum, for (int p = 0; p < DDT_PHYS_TYPES; p++, ddp++) { if (ddp->ddp_phys_birth == 0 || - ddp->ddp_phys_birth > scn->scn_phys.scn_cur_max_txg) + ddp->ddp_phys_birth > scn->scn_phys.scn_max_txg) continue; ddt_bp_create(checksum, ddk, ddp, &bp); @@ -1457,6 +1458,16 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx) if (scn->scn_phys.scn_state != DSS_SCANNING) return; + if (scn->scn_done_txg == tx->tx_txg) { + ASSERT(!scn->scn_pausing); + /* finished with scan. */ + zfs_dbgmsg("txg %llu scan complete", tx->tx_txg); + dsl_scan_done(scn, B_TRUE, tx); + ASSERT3U(spa->spa_scrub_inflight, ==, 0); + dsl_scan_sync_state(scn, tx); + return; + } + if (scn->scn_phys.scn_ddt_bookmark.ddb_class <= scn->scn_phys.scn_ddt_class_max) { zfs_dbgmsg("doing scan sync txg %llu; " @@ -1492,9 +1503,9 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx) (longlong_t)NSEC2MSEC(gethrtime() - scn->scn_sync_start_time)); if (!scn->scn_pausing) { - /* finished with scan. */ - zfs_dbgmsg("finished scan txg %llu", (longlong_t)tx->tx_txg); - dsl_scan_done(scn, B_TRUE, tx); + scn->scn_done_txg = tx->tx_txg + 1; + zfs_dbgmsg("txg %llu traversal complete, waiting till txg %llu", + tx->tx_txg, scn->scn_done_txg); } if (DSL_SCAN_IS_SCRUB_RESILVER(scn)) { diff --git a/usr/src/uts/common/fs/zfs/spa.c b/usr/src/uts/common/fs/zfs/spa.c index 7334d39516..13eaaecbf7 100644 --- a/usr/src/uts/common/fs/zfs/spa.c +++ b/usr/src/uts/common/fs/zfs/spa.c @@ -757,6 +757,7 @@ spa_change_guid(spa_t *spa) int error; uint64_t guid; + mutex_enter(&spa->spa_vdev_top_lock); mutex_enter(&spa_namespace_lock); guid = spa_generate_guid(NULL); @@ -769,6 +770,7 @@ spa_change_guid(spa_t *spa) } mutex_exit(&spa_namespace_lock); + mutex_exit(&spa->spa_vdev_top_lock); return (error); } @@ -4444,7 +4446,7 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing) } /* mark the device being resilvered */ - newvd->vdev_resilvering = B_TRUE; + newvd->vdev_resilver_txg = txg; /* * If the parent is not a mirror, or if we're replacing, insert the new @@ -4674,7 +4676,6 @@ spa_vdev_detach(spa_t *spa, uint64_t guid, uint64_t pguid, int replace_done) if (pvd->vdev_ops == &vdev_spare_ops) cvd->vdev_unspare = B_FALSE; vdev_remove_parent(cvd); - cvd->vdev_resilvering = B_FALSE; } @@ -5391,6 +5392,8 @@ spa_vdev_resilver_done(spa_t *spa) ASSERT(pvd->vdev_ops == &vdev_replacing_ops); sguid = ppvd->vdev_child[1]->vdev_guid; } + ASSERT(vd->vdev_resilver_txg == 0 || !vdev_dtl_required(vd)); + spa_config_exit(spa, SCL_ALL, FTAG); if (spa_vdev_detach(spa, guid, pguid, B_TRUE) != 0) return; diff --git a/usr/src/uts/common/fs/zfs/spa_config.c b/usr/src/uts/common/fs/zfs/spa_config.c index d97fc32fbf..47bb595908 100644 --- a/usr/src/uts/common/fs/zfs/spa_config.c +++ b/usr/src/uts/common/fs/zfs/spa_config.c @@ -22,7 +22,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright 2011 Nexenta Systems, Inc. All rights reserved. - * Copyright (c) 2012 by Delphix. All rights reserved. + * Copyright (c) 2013 by Delphix. All rights reserved. */ #include <sys/spa.h> @@ -198,7 +198,12 @@ spa_config_write(spa_config_dirent_t *dp, nvlist_t *nvl) /* * Synchronize pool configuration to disk. This must be called with the - * namespace lock held. + * namespace lock held. Synchronizing the pool cache is typically done after + * the configuration has been synced to the MOS. This exposes a window where + * the MOS config will have been updated but the cache file has not. If + * the system were to crash at that instant then the cached config may not + * contain the correct information to open the pool and an explicity import + * would be required. */ void spa_config_sync(spa_t *target, boolean_t removing, boolean_t postsysevent) diff --git a/usr/src/uts/common/fs/zfs/sys/dsl_scan.h b/usr/src/uts/common/fs/zfs/sys/dsl_scan.h index aae8c312af..bf8c5ac824 100644 --- a/usr/src/uts/common/fs/zfs/sys/dsl_scan.h +++ b/usr/src/uts/common/fs/zfs/sys/dsl_scan.h @@ -72,11 +72,42 @@ typedef enum dsl_scan_flags { DSF_VISIT_DS_AGAIN = 1<<0, } dsl_scan_flags_t; +/* + * Every pool will have one dsl_scan_t and this structure will contain + * in-memory information about the scan and a pointer to the on-disk + * representation (i.e. dsl_scan_phys_t). Most of the state of the scan + * is contained on-disk to allow the scan to resume in the event of a reboot + * or panic. This structure maintains information about the behavior of a + * running scan, some caching information, and how it should traverse the pool. + * + * The following members of this structure direct the behavior of the scan: + * + * scn_pausing - a scan that cannot be completed in a single txg or + * has exceeded its allotted time will need to pause. + * When this flag is set the scanner will stop traversing + * the pool and write out the current state to disk. + * + * scn_restart_txg - directs the scanner to either restart or start a + * a scan at the specified txg value. + * + * scn_done_txg - when a scan completes its traversal it will set + * the completion txg to the next txg. This is necessary + * to ensure that any blocks that were freed during + * the scan but have not yet been processed (i.e deferred + * frees) are accounted for. + * + * This structure also maintains information about deferred frees which are + * a special kind of traversal. Deferred free can exist in either a bptree or + * a bpobj structure. The scn_is_bptree flag will indicate the type of + * deferred free that is in progress. If the deferred free is part of an + * asynchronous destroy then the scn_async_destroying flag will be set. + */ typedef struct dsl_scan { struct dsl_pool *scn_dp; boolean_t scn_pausing; uint64_t scn_restart_txg; + uint64_t scn_done_txg; uint64_t scn_sync_start_time; zio_t *scn_zio_root; diff --git a/usr/src/uts/common/fs/zfs/sys/vdev_impl.h b/usr/src/uts/common/fs/zfs/sys/vdev_impl.h index 02e3e838c3..9f68ac2968 100644 --- a/usr/src/uts/common/fs/zfs/sys/vdev_impl.h +++ b/usr/src/uts/common/fs/zfs/sys/vdev_impl.h @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012 by Delphix. All rights reserved. + * Copyright (c) 2013 by Delphix. All rights reserved. */ #ifndef _SYS_VDEV_IMPL_H @@ -174,7 +174,7 @@ struct vdev { uint64_t vdev_faulted; /* persistent faulted state */ uint64_t vdev_degraded; /* persistent degraded state */ uint64_t vdev_removed; /* persistent removed state */ - uint64_t vdev_resilvering; /* persistent resilvering state */ + uint64_t vdev_resilver_txg; /* persistent resilvering state */ uint64_t vdev_nparity; /* number of parity devices for raidz */ char *vdev_path; /* vdev path (if any) */ char *vdev_devid; /* vdev devid (if any) */ diff --git a/usr/src/uts/common/fs/zfs/sys/zfs_debug.h b/usr/src/uts/common/fs/zfs/sys/zfs_debug.h index 14eb2abdc1..c4dcfaec65 100644 --- a/usr/src/uts/common/fs/zfs/sys/zfs_debug.h +++ b/usr/src/uts/common/fs/zfs/sys/zfs_debug.h @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012 by Delphix. All rights reserved. + * Copyright (c) 2013 by Delphix. All rights reserved. */ #ifndef _SYS_ZFS_DEBUG_H @@ -77,6 +77,7 @@ typedef struct zfs_dbgmsg { extern void zfs_dbgmsg_init(void); extern void zfs_dbgmsg_fini(void); extern void zfs_dbgmsg(const char *fmt, ...); +extern void zfs_dbgmsg_print(const char *tag); #ifndef _KERNEL extern int dprintf_find_string(const char *string); diff --git a/usr/src/uts/common/fs/zfs/vdev.c b/usr/src/uts/common/fs/zfs/vdev.c index 7a409bd7ed..dc5eb627f1 100644 --- a/usr/src/uts/common/fs/zfs/vdev.c +++ b/usr/src/uts/common/fs/zfs/vdev.c @@ -521,8 +521,8 @@ vdev_alloc(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, uint_t id, (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_OFFLINE, &vd->vdev_offline); - (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_RESILVERING, - &vd->vdev_resilvering); + (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_RESILVER_TXG, + &vd->vdev_resilver_txg); /* * When importing a pool, we want to ignore the persistent fault @@ -1663,6 +1663,75 @@ vdev_dtl_empty(vdev_t *vd, vdev_dtl_type_t t) } /* + * Returns the lowest txg in the DTL range. + */ +static uint64_t +vdev_dtl_min(vdev_t *vd) +{ + space_seg_t *ss; + + ASSERT(MUTEX_HELD(&vd->vdev_dtl_lock)); + ASSERT3U(vd->vdev_dtl[DTL_MISSING].sm_space, !=, 0); + ASSERT0(vd->vdev_children); + + ss = avl_first(&vd->vdev_dtl[DTL_MISSING].sm_root); + return (ss->ss_start - 1); +} + +/* + * Returns the highest txg in the DTL. + */ +static uint64_t +vdev_dtl_max(vdev_t *vd) +{ + space_seg_t *ss; + + ASSERT(MUTEX_HELD(&vd->vdev_dtl_lock)); + ASSERT3U(vd->vdev_dtl[DTL_MISSING].sm_space, !=, 0); + ASSERT0(vd->vdev_children); + + ss = avl_last(&vd->vdev_dtl[DTL_MISSING].sm_root); + return (ss->ss_end); +} + +/* + * Determine if a resilvering vdev should remove any DTL entries from + * its range. If the vdev was resilvering for the entire duration of the + * scan then it should excise that range from its DTLs. Otherwise, this + * vdev is considered partially resilvered and should leave its DTL + * entries intact. The comment in vdev_dtl_reassess() describes how we + * excise the DTLs. + */ +static boolean_t +vdev_dtl_should_excise(vdev_t *vd) +{ + spa_t *spa = vd->vdev_spa; + dsl_scan_t *scn = spa->spa_dsl_pool->dp_scan; + + ASSERT0(scn->scn_phys.scn_errors); + ASSERT0(vd->vdev_children); + + if (vd->vdev_resilver_txg == 0 || + vd->vdev_dtl[DTL_MISSING].sm_space == 0) + return (B_TRUE); + + /* + * When a resilver is initiated the scan will assign the scn_max_txg + * value to the highest txg value that exists in all DTLs. If this + * device's max DTL is not part of this scan (i.e. it is not in + * the range (scn_min_txg, scn_max_txg] then it is not eligible + * for excision. + */ + if (vdev_dtl_max(vd) <= scn->scn_phys.scn_max_txg) { + ASSERT3U(scn->scn_phys.scn_min_txg, <=, vdev_dtl_min(vd)); + ASSERT3U(scn->scn_phys.scn_min_txg, <, vd->vdev_resilver_txg); + ASSERT3U(vd->vdev_resilver_txg, <=, scn->scn_phys.scn_max_txg); + return (B_TRUE); + } + return (B_FALSE); +} + +/* * Reassess DTLs after a config change or scrub completion. */ void @@ -1685,9 +1754,17 @@ vdev_dtl_reassess(vdev_t *vd, uint64_t txg, uint64_t scrub_txg, int scrub_done) dsl_scan_t *scn = spa->spa_dsl_pool->dp_scan; mutex_enter(&vd->vdev_dtl_lock); + + /* + * If we've completed a scan cleanly then determine + * if this vdev should remove any DTLs. We only want to + * excise regions on vdevs that were available during + * the entire duration of this scan. + */ if (scrub_txg != 0 && (spa->spa_scrub_started || - (scn && scn->scn_phys.scn_errors == 0))) { + (scn != NULL && scn->scn_phys.scn_errors == 0)) && + vdev_dtl_should_excise(vd)) { /* * We completed a scrub up to scrub_txg. If we * did it without rebooting, then the scrub dtl @@ -1726,6 +1803,16 @@ vdev_dtl_reassess(vdev_t *vd, uint64_t txg, uint64_t scrub_txg, int scrub_done) else space_map_walk(&vd->vdev_dtl[DTL_MISSING], space_map_add, &vd->vdev_dtl[DTL_OUTAGE]); + + /* + * If the vdev was resilvering and no longer has any + * DTLs then reset its resilvering flag. + */ + if (vd->vdev_resilver_txg != 0 && + vd->vdev_dtl[DTL_MISSING].sm_space == 0 && + vd->vdev_dtl[DTL_OUTAGE].sm_space == 0) + vd->vdev_resilver_txg = 0; + mutex_exit(&vd->vdev_dtl_lock); if (txg != 0) @@ -1902,12 +1989,9 @@ vdev_resilver_needed(vdev_t *vd, uint64_t *minp, uint64_t *maxp) mutex_enter(&vd->vdev_dtl_lock); if (vd->vdev_dtl[DTL_MISSING].sm_space != 0 && vdev_writeable(vd)) { - space_seg_t *ss; - ss = avl_first(&vd->vdev_dtl[DTL_MISSING].sm_root); - thismin = ss->ss_start - 1; - ss = avl_last(&vd->vdev_dtl[DTL_MISSING].sm_root); - thismax = ss->ss_end; + thismin = vdev_dtl_min(vd); + thismax = vdev_dtl_max(vd); needed = B_TRUE; } mutex_exit(&vd->vdev_dtl_lock); diff --git a/usr/src/uts/common/fs/zfs/vdev_file.c b/usr/src/uts/common/fs/zfs/vdev_file.c index 2e67544345..a05abeb9d9 100644 --- a/usr/src/uts/common/fs/zfs/vdev_file.c +++ b/usr/src/uts/common/fs/zfs/vdev_file.c @@ -185,7 +185,6 @@ vdev_file_io_strategy(void *arg) static int vdev_file_io_start(zio_t *zio) { - spa_t *spa = zio->io_spa; vdev_t *vd = zio->io_vd; vdev_file_t *vf = vd->vdev_tsd; vdev_buf_t *vb; @@ -224,8 +223,8 @@ vdev_file_io_start(zio_t *zio) bp->b_private = vf->vf_vnode; bp->b_iodone = (int (*)())vdev_file_io_intr; - spa_taskq_dispatch_ent(spa, ZIO_TYPE_FREE, ZIO_TASKQ_ISSUE, - vdev_file_io_strategy, bp, 0, &zio->io_tqent); + VERIFY3U(taskq_dispatch(system_taskq, vdev_file_io_strategy, bp, + TQ_SLEEP), !=, 0); return (ZIO_PIPELINE_STOP); } diff --git a/usr/src/uts/common/fs/zfs/vdev_label.c b/usr/src/uts/common/fs/zfs/vdev_label.c index 904918c3a4..ee9921f94a 100644 --- a/usr/src/uts/common/fs/zfs/vdev_label.c +++ b/usr/src/uts/common/fs/zfs/vdev_label.c @@ -216,30 +216,25 @@ vdev_config_generate(spa_t *spa, vdev_t *vd, boolean_t getstats, { nvlist_t *nv = NULL; - VERIFY(nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) == 0); + nv = fnvlist_alloc(); - VERIFY(nvlist_add_string(nv, ZPOOL_CONFIG_TYPE, - vd->vdev_ops->vdev_op_type) == 0); + fnvlist_add_string(nv, ZPOOL_CONFIG_TYPE, vd->vdev_ops->vdev_op_type); if (!(flags & (VDEV_CONFIG_SPARE | VDEV_CONFIG_L2CACHE))) - VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_ID, vd->vdev_id) - == 0); - VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_GUID, vd->vdev_guid) == 0); + fnvlist_add_uint64(nv, ZPOOL_CONFIG_ID, vd->vdev_id); + fnvlist_add_uint64(nv, ZPOOL_CONFIG_GUID, vd->vdev_guid); if (vd->vdev_path != NULL) - VERIFY(nvlist_add_string(nv, ZPOOL_CONFIG_PATH, - vd->vdev_path) == 0); + fnvlist_add_string(nv, ZPOOL_CONFIG_PATH, vd->vdev_path); if (vd->vdev_devid != NULL) - VERIFY(nvlist_add_string(nv, ZPOOL_CONFIG_DEVID, - vd->vdev_devid) == 0); + fnvlist_add_string(nv, ZPOOL_CONFIG_DEVID, vd->vdev_devid); if (vd->vdev_physpath != NULL) - VERIFY(nvlist_add_string(nv, ZPOOL_CONFIG_PHYS_PATH, - vd->vdev_physpath) == 0); + fnvlist_add_string(nv, ZPOOL_CONFIG_PHYS_PATH, + vd->vdev_physpath); if (vd->vdev_fru != NULL) - VERIFY(nvlist_add_string(nv, ZPOOL_CONFIG_FRU, - vd->vdev_fru) == 0); + fnvlist_add_string(nv, ZPOOL_CONFIG_FRU, vd->vdev_fru); if (vd->vdev_nparity != 0) { ASSERT(strcmp(vd->vdev_ops->vdev_op_type, @@ -260,59 +255,54 @@ vdev_config_generate(spa_t *spa, vdev_t *vd, boolean_t getstats, * that only support a single parity device -- older software * will just ignore it. */ - VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_NPARITY, - vd->vdev_nparity) == 0); + fnvlist_add_uint64(nv, ZPOOL_CONFIG_NPARITY, vd->vdev_nparity); } if (vd->vdev_wholedisk != -1ULL) - VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK, - vd->vdev_wholedisk) == 0); + fnvlist_add_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK, + vd->vdev_wholedisk); if (vd->vdev_not_present) - VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_NOT_PRESENT, 1) == 0); + fnvlist_add_uint64(nv, ZPOOL_CONFIG_NOT_PRESENT, 1); if (vd->vdev_isspare) - VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_IS_SPARE, 1) == 0); + fnvlist_add_uint64(nv, ZPOOL_CONFIG_IS_SPARE, 1); if (!(flags & (VDEV_CONFIG_SPARE | VDEV_CONFIG_L2CACHE)) && vd == vd->vdev_top) { - VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_METASLAB_ARRAY, - vd->vdev_ms_array) == 0); - VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_METASLAB_SHIFT, - vd->vdev_ms_shift) == 0); - VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_ASHIFT, - vd->vdev_ashift) == 0); - VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_ASIZE, - vd->vdev_asize) == 0); - VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_IS_LOG, - vd->vdev_islog) == 0); + fnvlist_add_uint64(nv, ZPOOL_CONFIG_METASLAB_ARRAY, + vd->vdev_ms_array); + fnvlist_add_uint64(nv, ZPOOL_CONFIG_METASLAB_SHIFT, + vd->vdev_ms_shift); + fnvlist_add_uint64(nv, ZPOOL_CONFIG_ASHIFT, vd->vdev_ashift); + fnvlist_add_uint64(nv, ZPOOL_CONFIG_ASIZE, + vd->vdev_asize); + fnvlist_add_uint64(nv, ZPOOL_CONFIG_IS_LOG, vd->vdev_islog); if (vd->vdev_removing) - VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_REMOVING, - vd->vdev_removing) == 0); + fnvlist_add_uint64(nv, ZPOOL_CONFIG_REMOVING, + vd->vdev_removing); } if (vd->vdev_dtl_smo.smo_object != 0) - VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_DTL, - vd->vdev_dtl_smo.smo_object) == 0); + fnvlist_add_uint64(nv, ZPOOL_CONFIG_DTL, + vd->vdev_dtl_smo.smo_object); if (vd->vdev_crtxg) - VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_CREATE_TXG, - vd->vdev_crtxg) == 0); + fnvlist_add_uint64(nv, ZPOOL_CONFIG_CREATE_TXG, vd->vdev_crtxg); if (getstats) { vdev_stat_t vs; pool_scan_stat_t ps; vdev_get_stats(vd, &vs); - VERIFY(nvlist_add_uint64_array(nv, ZPOOL_CONFIG_VDEV_STATS, - (uint64_t *)&vs, sizeof (vs) / sizeof (uint64_t)) == 0); + fnvlist_add_uint64_array(nv, ZPOOL_CONFIG_VDEV_STATS, + (uint64_t *)&vs, sizeof (vs) / sizeof (uint64_t)); /* provide either current or previous scan information */ if (spa_scan_get_stats(spa, &ps) == 0) { - VERIFY(nvlist_add_uint64_array(nv, + fnvlist_add_uint64_array(nv, ZPOOL_CONFIG_SCAN_STATS, (uint64_t *)&ps, - sizeof (pool_scan_stat_t) / sizeof (uint64_t)) - == 0); + sizeof (pool_scan_stat_t) / sizeof (uint64_t)); } } @@ -342,8 +332,8 @@ vdev_config_generate(spa_t *spa, vdev_t *vd, boolean_t getstats, } if (idx) { - VERIFY(nvlist_add_nvlist_array(nv, - ZPOOL_CONFIG_CHILDREN, child, idx) == 0); + fnvlist_add_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, + child, idx); } for (c = 0; c < idx; c++) @@ -355,26 +345,20 @@ vdev_config_generate(spa_t *spa, vdev_t *vd, boolean_t getstats, const char *aux = NULL; if (vd->vdev_offline && !vd->vdev_tmpoffline) - VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_OFFLINE, - B_TRUE) == 0); - if (vd->vdev_resilvering) - VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_RESILVERING, - B_TRUE) == 0); + fnvlist_add_uint64(nv, ZPOOL_CONFIG_OFFLINE, B_TRUE); + if (vd->vdev_resilver_txg != 0) + fnvlist_add_uint64(nv, ZPOOL_CONFIG_RESILVER_TXG, + vd->vdev_resilver_txg); if (vd->vdev_faulted) - VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_FAULTED, - B_TRUE) == 0); + fnvlist_add_uint64(nv, ZPOOL_CONFIG_FAULTED, B_TRUE); if (vd->vdev_degraded) - VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_DEGRADED, - B_TRUE) == 0); + fnvlist_add_uint64(nv, ZPOOL_CONFIG_DEGRADED, B_TRUE); if (vd->vdev_removed) - VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_REMOVED, - B_TRUE) == 0); + fnvlist_add_uint64(nv, ZPOOL_CONFIG_REMOVED, B_TRUE); if (vd->vdev_unspare) - VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_UNSPARE, - B_TRUE) == 0); + fnvlist_add_uint64(nv, ZPOOL_CONFIG_UNSPARE, B_TRUE); if (vd->vdev_ishole) - VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_IS_HOLE, - B_TRUE) == 0); + fnvlist_add_uint64(nv, ZPOOL_CONFIG_IS_HOLE, B_TRUE); switch (vd->vdev_stat.vs_aux) { case VDEV_AUX_ERR_EXCEEDED: @@ -387,12 +371,11 @@ vdev_config_generate(spa_t *spa, vdev_t *vd, boolean_t getstats, } if (aux != NULL) - VERIFY(nvlist_add_string(nv, ZPOOL_CONFIG_AUX_STATE, - aux) == 0); + fnvlist_add_string(nv, ZPOOL_CONFIG_AUX_STATE, aux); if (vd->vdev_splitting && vd->vdev_orig_guid != 0LL) { - VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_ORIG_GUID, - vd->vdev_orig_guid) == 0); + fnvlist_add_uint64(nv, ZPOOL_CONFIG_ORIG_GUID, + vd->vdev_orig_guid); } } diff --git a/usr/src/uts/common/fs/zfs/zfs_debug.c b/usr/src/uts/common/fs/zfs/zfs_debug.c index 44824e15a0..26ea561eb1 100644 --- a/usr/src/uts/common/fs/zfs/zfs_debug.c +++ b/usr/src/uts/common/fs/zfs/zfs_debug.c @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012 by Delphix. All rights reserved. + * Copyright (c) 2013 by Delphix. All rights reserved. */ #include <sys/zfs_context.h> @@ -94,3 +94,16 @@ zfs_dbgmsg(const char *fmt, ...) } mutex_exit(&zfs_dbgmsgs_lock); } + +void +zfs_dbgmsg_print(const char *tag) +{ + zfs_dbgmsg_t *zdm; + + (void) printf("ZFS_DBGMSG(%s):\n", tag); + mutex_enter(&zfs_dbgmsgs_lock); + for (zdm = list_head(&zfs_dbgmsgs); zdm; + zdm = list_next(&zfs_dbgmsgs, zdm)) + (void) printf("%s\n", zdm->zdm_msg); + mutex_exit(&zfs_dbgmsgs_lock); +} diff --git a/usr/src/uts/common/sys/fs/zfs.h b/usr/src/uts/common/sys/fs/zfs.h index 6be8aee742..f6eb9aa490 100644 --- a/usr/src/uts/common/sys/fs/zfs.h +++ b/usr/src/uts/common/sys/fs/zfs.h @@ -21,7 +21,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012 by Delphix. All rights reserved. + * Copyright (c) 2013 by Delphix. All rights reserved. * Copyright 2011 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2012, Joyent, Inc. All rights reserved. */ @@ -522,7 +522,7 @@ typedef struct zpool_rewind_policy { #define ZPOOL_CONFIG_SPLIT_GUID "split_guid" #define ZPOOL_CONFIG_SPLIT_LIST "guid_list" #define ZPOOL_CONFIG_REMOVING "removing" -#define ZPOOL_CONFIG_RESILVERING "resilvering" +#define ZPOOL_CONFIG_RESILVER_TXG "resilver_txg" #define ZPOOL_CONFIG_COMMENT "comment" #define ZPOOL_CONFIG_SUSPENDED "suspended" /* not stored on disk */ #define ZPOOL_CONFIG_TIMESTAMP "timestamp" /* not stored on disk */ |