summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJerry Jelinek <jerry.jelinek@joyent.com>2018-10-05 11:46:27 +0000
committerJerry Jelinek <jerry.jelinek@joyent.com>2018-10-05 11:46:27 +0000
commitb1917065992b99886121c8fbeeb447643cb0a1fb (patch)
tree983043c89e828e04f0469f6a7f149acd8213d1da
parent4c14c4cff5adaaa79d7099e153c3ace3a0d65148 (diff)
parentb86e7e3f0e50748bb5bb5cc91632d72ff17f08dd (diff)
downloadillumos-joyent-b1917065992b99886121c8fbeeb447643cb0a1fb.tar.gz
[illumos-gate merge]
commit b86e7e3f0e50748bb5bb5cc91632d72ff17f08dd 9738 9112 broke third block copy allocations within one metaslab group commit 4e75ba682600b2bf19d158577d528c3db65050e8 9690 metaslab of vdev with no space maps was flushed during removal commit 29bf2d68bef208274f5a54a14cc80c4a8cb76f53 9688 aggsum_fini leaks memory commit 6aee0ad76969eb0027131b3a338f2d94ae86f728 9681 ztest failure in spa_history_log_internal due to spa_rename() commit ade2c82828f0dca1f46919aa1bd936ea1a5a0047 9682 page fault in dsl_async_clone_destroy() while opening pool commit 84927f52bd837f6e4882a19e43fd026f1828d910 9862 fix typo in comment in vdev_impl.h
-rw-r--r--usr/src/cmd/zdb/zdb.c5
-rw-r--r--usr/src/cmd/ztest/ztest.c74
-rw-r--r--usr/src/uts/common/fs/zfs/aggsum.c3
-rw-r--r--usr/src/uts/common/fs/zfs/arc.c15
-rw-r--r--usr/src/uts/common/fs/zfs/metaslab.c12
-rw-r--r--usr/src/uts/common/fs/zfs/spa.c13
-rw-r--r--usr/src/uts/common/fs/zfs/spa_misc.c53
-rw-r--r--usr/src/uts/common/fs/zfs/sys/spa.h1
-rw-r--r--usr/src/uts/common/fs/zfs/sys/vdev_impl.h2
-rw-r--r--usr/src/uts/common/fs/zfs/vdev.c22
10 files changed, 44 insertions, 156 deletions
diff --git a/usr/src/cmd/zdb/zdb.c b/usr/src/cmd/zdb/zdb.c
index 2470545a52..de8485a7de 100644
--- a/usr/src/cmd/zdb/zdb.c
+++ b/usr/src/cmd/zdb/zdb.c
@@ -2243,10 +2243,11 @@ dump_dir(objset_t *os)
dmu_objset_name(os, osname);
(void) printf("Dataset %s [%s], ID %llu, cr_txg %llu, "
- "%s, %llu objects%s\n",
+ "%s, %llu objects%s%s\n",
osname, type, (u_longlong_t)dmu_objset_id(os),
(u_longlong_t)dds.dds_creation_txg,
- numbuf, (u_longlong_t)usedobjs, blkbuf);
+ numbuf, (u_longlong_t)usedobjs, blkbuf,
+ (dds.dds_inconsistent) ? " (inconsistent)" : "");
if (zopt_objects != 0) {
for (i = 0; i < zopt_objects; i++)
diff --git a/usr/src/cmd/ztest/ztest.c b/usr/src/cmd/ztest/ztest.c
index c10186aa38..ca1c7a3cc0 100644
--- a/usr/src/cmd/ztest/ztest.c
+++ b/usr/src/cmd/ztest/ztest.c
@@ -334,7 +334,6 @@ ztest_func_t ztest_spa_create_destroy;
ztest_func_t ztest_fault_inject;
ztest_func_t ztest_ddt_repair;
ztest_func_t ztest_dmu_snapshot_hold;
-ztest_func_t ztest_spa_rename;
ztest_func_t ztest_scrub;
ztest_func_t ztest_dsl_dataset_promote_busy;
ztest_func_t ztest_vdev_attach_detach;
@@ -379,7 +378,6 @@ ztest_info_t ztest_info[] = {
{ ztest_ddt_repair, 1, &zopt_sometimes },
{ ztest_dmu_snapshot_hold, 1, &zopt_sometimes },
{ ztest_reguid, 1, &zopt_rarely },
- { ztest_spa_rename, 1, &zopt_rarely },
{ ztest_scrub, 1, &zopt_rarely },
{ ztest_spa_upgrade, 1, &zopt_rarely },
{ ztest_dsl_dataset_promote_busy, 1, &zopt_rarely },
@@ -5423,59 +5421,6 @@ ztest_reguid(ztest_ds_t *zd, uint64_t id)
VERIFY3U(load, ==, spa_load_guid(spa));
}
-/*
- * Rename the pool to a different name and then rename it back.
- */
-/* ARGSUSED */
-void
-ztest_spa_rename(ztest_ds_t *zd, uint64_t id)
-{
- char *oldname, *newname;
- spa_t *spa;
-
- rw_enter(&ztest_name_lock, RW_WRITER);
-
- oldname = ztest_opts.zo_pool;
- newname = umem_alloc(strlen(oldname) + 5, UMEM_NOFAIL);
- (void) strcpy(newname, oldname);
- (void) strcat(newname, "_tmp");
-
- /*
- * Do the rename
- */
- VERIFY3U(0, ==, spa_rename(oldname, newname));
-
- /*
- * Try to open it under the old name, which shouldn't exist
- */
- VERIFY3U(ENOENT, ==, spa_open(oldname, &spa, FTAG));
-
- /*
- * Open it under the new name and make sure it's still the same spa_t.
- */
- VERIFY3U(0, ==, spa_open(newname, &spa, FTAG));
-
- ASSERT(spa == ztest_spa);
- spa_close(spa, FTAG);
-
- /*
- * Rename it back to the original
- */
- VERIFY3U(0, ==, spa_rename(newname, oldname));
-
- /*
- * Make sure it can still be opened
- */
- VERIFY3U(0, ==, spa_open(oldname, &spa, FTAG));
-
- ASSERT(spa == ztest_spa);
- spa_close(spa, FTAG);
-
- umem_free(newname, strlen(newname) + 1);
-
- rw_exit(&ztest_name_lock);
-}
-
static vdev_t *
ztest_random_concrete_vdev_leaf(vdev_t *vd)
{
@@ -6529,7 +6474,6 @@ main(int argc, char **argv)
ztest_shared_callstate_t *zc;
char timebuf[100];
char numbuf[NN_NUMBUF_SZ];
- spa_t *spa;
char *cmd;
boolean_t hasalt;
char *fd_data_str = getenv("ZTEST_FD_DATA");
@@ -6704,24 +6648,6 @@ main(int argc, char **argv)
(void) printf("\n");
}
- /*
- * It's possible that we killed a child during a rename test,
- * in which case we'll have a 'ztest_tmp' pool lying around
- * instead of 'ztest'. Do a blind rename in case this happened.
- */
- kernel_init(FREAD);
- if (spa_open(ztest_opts.zo_pool, &spa, FTAG) == 0) {
- spa_close(spa, FTAG);
- } else {
- char tmpname[ZFS_MAX_DATASET_NAME_LEN];
- kernel_fini();
- kernel_init(FREAD | FWRITE);
- (void) snprintf(tmpname, sizeof (tmpname), "%s_tmp",
- ztest_opts.zo_pool);
- (void) spa_rename(tmpname, ztest_opts.zo_pool);
- }
- kernel_fini();
-
ztest_run_zdb(ztest_opts.zo_pool);
}
diff --git a/usr/src/uts/common/fs/zfs/aggsum.c b/usr/src/uts/common/fs/zfs/aggsum.c
index 814f6c276b..c4e78e2a6d 100644
--- a/usr/src/uts/common/fs/zfs/aggsum.c
+++ b/usr/src/uts/common/fs/zfs/aggsum.c
@@ -13,7 +13,7 @@
* CDDL HEADER END
*/
/*
- * Copyright (c) 2017 by Delphix. All rights reserved.
+ * Copyright (c) 2017, 2018 by Delphix. All rights reserved.
*/
#include <sys/zfs_context.h>
@@ -99,6 +99,7 @@ aggsum_fini(aggsum_t *as)
{
for (int i = 0; i < as->as_numbuckets; i++)
mutex_destroy(&as->as_buckets[i].asc_lock);
+ kmem_free(as->as_buckets, as->as_numbuckets * sizeof (aggsum_bucket_t));
mutex_destroy(&as->as_lock);
}
diff --git a/usr/src/uts/common/fs/zfs/arc.c b/usr/src/uts/common/fs/zfs/arc.c
index af70eee950..d2a828b520 100644
--- a/usr/src/uts/common/fs/zfs/arc.c
+++ b/usr/src/uts/common/fs/zfs/arc.c
@@ -6168,6 +6168,14 @@ arc_state_fini(void)
multilist_destroy(arc_mru_ghost->arcs_list[ARC_BUFC_DATA]);
multilist_destroy(arc_mfu->arcs_list[ARC_BUFC_DATA]);
multilist_destroy(arc_mfu_ghost->arcs_list[ARC_BUFC_DATA]);
+
+ aggsum_fini(&arc_meta_used);
+ aggsum_fini(&arc_size);
+ aggsum_fini(&astat_data_size);
+ aggsum_fini(&astat_metadata_size);
+ aggsum_fini(&astat_hdr_size);
+ aggsum_fini(&astat_other_size);
+ aggsum_fini(&astat_l2_hdr_size);
}
uint64_t
@@ -6341,8 +6349,13 @@ arc_fini(void)
mutex_destroy(&arc_adjust_lock);
cv_destroy(&arc_adjust_waiters_cv);
- arc_state_fini();
+ /*
+ * buf_fini() must proceed arc_state_fini() because buf_fin() may
+ * trigger the release of kmem magazines, which can callback to
+ * arc_space_return() which accesses aggsums freed in act_state_fini().
+ */
buf_fini();
+ arc_state_fini();
ASSERT0(arc_loaned_bytes);
}
diff --git a/usr/src/uts/common/fs/zfs/metaslab.c b/usr/src/uts/common/fs/zfs/metaslab.c
index 17613c41f9..8863bdc824 100644
--- a/usr/src/uts/common/fs/zfs/metaslab.c
+++ b/usr/src/uts/common/fs/zfs/metaslab.c
@@ -3088,7 +3088,6 @@ metaslab_group_alloc_normal(metaslab_group_t *mg, zio_alloc_list_t *zal,
metaslab_t *msp = NULL;
uint64_t offset = -1ULL;
uint64_t activation_weight;
- boolean_t tertiary = B_FALSE;
activation_weight = METASLAB_WEIGHT_PRIMARY;
for (int i = 0; i < d; i++) {
@@ -3097,7 +3096,7 @@ metaslab_group_alloc_normal(metaslab_group_t *mg, zio_alloc_list_t *zal,
activation_weight = METASLAB_WEIGHT_SECONDARY;
} else if (activation_weight == METASLAB_WEIGHT_SECONDARY &&
DVA_GET_VDEV(&dva[i]) == mg->mg_vd->vdev_id) {
- tertiary = B_TRUE;
+ activation_weight = METASLAB_WEIGHT_CLAIM;
break;
}
}
@@ -3106,10 +3105,8 @@ metaslab_group_alloc_normal(metaslab_group_t *mg, zio_alloc_list_t *zal,
* If we don't have enough metaslabs active to fill the entire array, we
* just use the 0th slot.
*/
- if (mg->mg_ms_ready < mg->mg_allocators * 2) {
- tertiary = B_FALSE;
+ if (mg->mg_ms_ready < mg->mg_allocators * 3)
allocator = 0;
- }
ASSERT3U(mg->mg_vd->vdev_ms_count, >=, 2);
@@ -3135,7 +3132,7 @@ metaslab_group_alloc_normal(metaslab_group_t *mg, zio_alloc_list_t *zal,
msp = mg->mg_primaries[allocator];
was_active = B_TRUE;
} else if (activation_weight == METASLAB_WEIGHT_SECONDARY &&
- mg->mg_secondaries[allocator] != NULL && !tertiary) {
+ mg->mg_secondaries[allocator] != NULL) {
msp = mg->mg_secondaries[allocator];
was_active = B_TRUE;
} else {
@@ -3178,7 +3175,8 @@ metaslab_group_alloc_normal(metaslab_group_t *mg, zio_alloc_list_t *zal,
continue;
}
- if (msp->ms_weight & METASLAB_WEIGHT_CLAIM) {
+ if (msp->ms_weight & METASLAB_WEIGHT_CLAIM &&
+ activation_weight != METASLAB_WEIGHT_CLAIM) {
metaslab_passivate(msp, msp->ms_weight &
~METASLAB_WEIGHT_CLAIM);
mutex_exit(&msp->ms_lock);
diff --git a/usr/src/uts/common/fs/zfs/spa.c b/usr/src/uts/common/fs/zfs/spa.c
index 599ae9fade..2b19f71d36 100644
--- a/usr/src/uts/common/fs/zfs/spa.c
+++ b/usr/src/uts/common/fs/zfs/spa.c
@@ -3862,8 +3862,17 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, char **ereport)
*/
spa_history_log_version(spa, "open");
+ spa_restart_removal(spa);
+ spa_spawn_aux_threads(spa);
+
/*
* Delete any inconsistent datasets.
+ *
+ * Note:
+ * Since we may be issuing deletes for clones here,
+ * we make sure to do so after we've spawned all the
+ * auxiliary threads above (from which the livelist
+ * deletion zthr is part of).
*/
(void) dmu_objset_find(spa_name(spa),
dsl_destroy_inconsistent, NULL, DS_FIND_CHILDREN);
@@ -3873,10 +3882,6 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, char **ereport)
*/
dsl_pool_clean_tmp_userrefs(spa->spa_dsl_pool);
- spa_restart_removal(spa);
-
- spa_spawn_aux_threads(spa);
-
spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
vdev_initialize_restart(spa->spa_root_vdev);
spa_config_exit(spa, SCL_CONFIG, FTAG);
diff --git a/usr/src/uts/common/fs/zfs/spa_misc.c b/usr/src/uts/common/fs/zfs/spa_misc.c
index 87a95f0a36..8ba49fed41 100644
--- a/usr/src/uts/common/fs/zfs/spa_misc.c
+++ b/usr/src/uts/common/fs/zfs/spa_misc.c
@@ -222,9 +222,6 @@
* vdev state is protected by spa_vdev_state_enter() / spa_vdev_state_exit().
* Like spa_vdev_enter/exit, these are convenience wrappers -- the actual
* locking is, always, based on spa_namespace_lock and spa_config_lock[].
- *
- * spa_rename() is also implemented within this file since it requires
- * manipulation of the namespace.
*/
static avl_tree_t spa_namespace_avl;
@@ -1334,56 +1331,6 @@ spa_deactivate_mos_feature(spa_t *spa, const char *feature)
}
/*
- * Rename a spa_t.
- */
-int
-spa_rename(const char *name, const char *newname)
-{
- spa_t *spa;
- int err;
-
- /*
- * Lookup the spa_t and grab the config lock for writing. We need to
- * actually open the pool so that we can sync out the necessary labels.
- * It's OK to call spa_open() with the namespace lock held because we
- * allow recursive calls for other reasons.
- */
- mutex_enter(&spa_namespace_lock);
- if ((err = spa_open(name, &spa, FTAG)) != 0) {
- mutex_exit(&spa_namespace_lock);
- return (err);
- }
-
- spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
-
- avl_remove(&spa_namespace_avl, spa);
- (void) strlcpy(spa->spa_name, newname, sizeof (spa->spa_name));
- avl_add(&spa_namespace_avl, spa);
-
- /*
- * Sync all labels to disk with the new names by marking the root vdev
- * dirty and waiting for it to sync. It will pick up the new pool name
- * during the sync.
- */
- vdev_config_dirty(spa->spa_root_vdev);
-
- spa_config_exit(spa, SCL_ALL, FTAG);
-
- txg_wait_synced(spa->spa_dsl_pool, 0);
-
- /*
- * Sync the updated config cache.
- */
- spa_write_cachefile(spa, B_FALSE, B_TRUE);
-
- spa_close(spa, FTAG);
-
- mutex_exit(&spa_namespace_lock);
-
- return (0);
-}
-
-/*
* Return the spa_t associated with given pool_guid, if it exists. If
* device_guid is non-zero, determine whether the pool exists *and* contains
* a device with the specified device_guid.
diff --git a/usr/src/uts/common/fs/zfs/sys/spa.h b/usr/src/uts/common/fs/zfs/sys/spa.h
index 1acbe31377..dc5da8fd77 100644
--- a/usr/src/uts/common/fs/zfs/sys/spa.h
+++ b/usr/src/uts/common/fs/zfs/sys/spa.h
@@ -821,7 +821,6 @@ extern void spa_load_note(spa_t *spa, const char *fmt, ...);
extern void spa_activate_mos_feature(spa_t *spa, const char *feature,
dmu_tx_t *tx);
extern void spa_deactivate_mos_feature(spa_t *spa, const char *feature);
-extern int spa_rename(const char *oldname, const char *newname);
extern spa_t *spa_by_guid(uint64_t pool_guid, uint64_t device_guid);
extern boolean_t spa_guid_exists(uint64_t pool_guid, uint64_t device_guid);
extern char *spa_strdup(const char *);
diff --git a/usr/src/uts/common/fs/zfs/sys/vdev_impl.h b/usr/src/uts/common/fs/zfs/sys/vdev_impl.h
index 71753cf24f..2c5dee00e2 100644
--- a/usr/src/uts/common/fs/zfs/sys/vdev_impl.h
+++ b/usr/src/uts/common/fs/zfs/sys/vdev_impl.h
@@ -298,7 +298,7 @@ struct vdev {
/*
* The queue depth parameters determine how many async writes are
- * still pending (i.e. allocated by net yet issued to disk) per
+ * still pending (i.e. allocated but not yet issued to disk) per
* top-level (vdev_async_write_queue_depth) and the maximum allowed
* (vdev_max_async_write_queue_depth). These values only apply to
* top-level vdevs.
diff --git a/usr/src/uts/common/fs/zfs/vdev.c b/usr/src/uts/common/fs/zfs/vdev.c
index f6845f3dcb..27d1dcf97f 100644
--- a/usr/src/uts/common/fs/zfs/vdev.c
+++ b/usr/src/uts/common/fs/zfs/vdev.c
@@ -2823,11 +2823,11 @@ vdev_destroy_spacemaps(vdev_t *vd, dmu_tx_t *tx)
}
static void
-vdev_remove_empty(vdev_t *vd, uint64_t txg)
+vdev_remove_empty_log(vdev_t *vd, uint64_t txg)
{
spa_t *spa = vd->vdev_spa;
- dmu_tx_t *tx;
+ ASSERT(vd->vdev_islog);
ASSERT(vd == vd->vdev_top);
ASSERT3U(txg, ==, spa_syncing_txg(spa));
@@ -2871,13 +2871,14 @@ vdev_remove_empty(vdev_t *vd, uint64_t txg)
ASSERT0(mg->mg_histogram[i]);
}
- tx = dmu_tx_create_assigned(spa_get_dsl(spa), txg);
- vdev_destroy_spacemaps(vd, tx);
+ dmu_tx_t *tx = dmu_tx_create_assigned(spa_get_dsl(spa), txg);
- if (vd->vdev_islog && vd->vdev_top_zap != 0) {
+ vdev_destroy_spacemaps(vd, tx);
+ if (vd->vdev_top_zap != 0) {
vdev_destroy_unlink_zap(vd, vd->vdev_top_zap, tx);
vd->vdev_top_zap = 0;
}
+
dmu_tx_commit(tx);
}
@@ -2949,14 +2950,11 @@ vdev_sync(vdev_t *vd, uint64_t txg)
vdev_dtl_sync(lvd, txg);
/*
- * Remove the metadata associated with this vdev once it's empty.
- * Note that this is typically used for log/cache device removal;
- * we don't empty toplevel vdevs when removing them. But if
- * a toplevel happens to be emptied, this is not harmful.
+ * If this is an empty log device being removed, destroy the
+ * metadata associated with it.
*/
- if (vd->vdev_stat.vs_alloc == 0 && vd->vdev_removing) {
- vdev_remove_empty(vd, txg);
- }
+ if (vd->vdev_islog && vd->vdev_stat.vs_alloc == 0 && vd->vdev_removing)
+ vdev_remove_empty_log(vd, txg);
(void) txg_list_add(&spa->spa_vdev_txg_list, vd, TXG_CLEAN(txg));
}