summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJerry Jelinek <jerry.jelinek@joyent.com>2018-12-17 16:29:23 +0000
committerJerry Jelinek <jerry.jelinek@joyent.com>2018-12-17 16:30:16 +0000
commitf4cb444376b6efe8770e521115e58d5df2379c97 (patch)
tree30d252b1da778d6a49e68f8bbee073c869c4da9e
parent4b3d3b0096454cadc8c9c15703aa2dcc9d6675a9 (diff)
downloadillumos-joyent-f4cb444376b6efe8770e521115e58d5df2379c97.tar.gz
OS-7151 ZFS loading and unloading metaslabs at audible frequency
Reviewed by: Joshua Clulow <jmc@joyent.com> Reviewed by: Kody Kantor <kody.kantor@joyent.com> Approved by: Kody Kantor <kody.kantor@joyent.com>
-rw-r--r--usr/src/uts/common/fs/zfs/metaslab.c71
-rw-r--r--usr/src/uts/common/fs/zfs/sys/metaslab_impl.h2
2 files changed, 52 insertions, 21 deletions
diff --git a/usr/src/uts/common/fs/zfs/metaslab.c b/usr/src/uts/common/fs/zfs/metaslab.c
index 8863bdc824..5a39bf5352 100644
--- a/usr/src/uts/common/fs/zfs/metaslab.c
+++ b/usr/src/uts/common/fs/zfs/metaslab.c
@@ -156,6 +156,18 @@ int metaslab_load_pct = 50;
int metaslab_unload_delay = TXG_SIZE * 2;
/*
+ * Tunables used to reduce metaslab load/unload thrashing when selection
+ * algorithm is allocating across metaslabs very evenly. In addition to
+ * tracking when the slab was used for allocation (ms_selected_txg), we also
+ * track when it was loaded (ms_loaded_txg). If the slab would be unloaded,
+ * but the load txg is within the window of
+ * metaslab_unload_delay + metaslab_load_window
+ * then we ramp up metaslab_unload_delay instead of unloading the metaslab.
+ */
+int metaslab_load_window = 10;
+int metaslab_unload_delay_max = 256;
+
+/*
* Max number of metaslabs per group to preload.
*/
int metaslab_preload_limit = SPA_DVAS_PER_BP;
@@ -1533,6 +1545,7 @@ metaslab_unload(metaslab_t *msp)
ASSERT(MUTEX_HELD(&msp->ms_lock));
range_tree_vacate(msp->ms_allocatable, NULL, NULL);
msp->ms_loaded = B_FALSE;
+ msp->ms_loaded_txg = 0;
msp->ms_weight &= ~METASLAB_ACTIVE_MASK;
msp->ms_max_size = 0;
}
@@ -2077,7 +2090,8 @@ metaslab_activate_allocator(metaslab_group_t *mg, metaslab_t *msp,
}
static int
-metaslab_activate(metaslab_t *msp, int allocator, uint64_t activation_weight)
+metaslab_activate(metaslab_t *msp, int allocator, uint64_t activation_weight,
+ uint64_t txg)
{
ASSERT(MUTEX_HELD(&msp->ms_lock));
@@ -2089,6 +2103,8 @@ metaslab_activate(metaslab_t *msp, int allocator, uint64_t activation_weight)
metaslab_group_sort(msp->ms_group, msp, 0);
return (error);
}
+ VERIFY0(msp->ms_loaded_txg);
+ msp->ms_loaded_txg = txg;
}
if ((msp->ms_weight & METASLAB_ACTIVE_MASK) != 0) {
/*
@@ -2201,8 +2217,11 @@ metaslab_preload(void *arg)
mutex_enter(&msp->ms_lock);
metaslab_load_wait(msp);
- if (!msp->ms_loaded)
+ if (!msp->ms_loaded) {
(void) metaslab_load(msp);
+ VERIFY0(msp->ms_loaded_txg);
+ msp->ms_loaded_txg = spa_syncing_txg(spa);
+ }
msp->ms_selected_txg = spa_syncing_txg(spa);
mutex_exit(&msp->ms_lock);
}
@@ -2727,22 +2746,35 @@ metaslab_sync_done(metaslab_t *msp, uint64_t txg)
/*
* If the metaslab is loaded and we've not tried to load or allocate
- * from it in 'metaslab_unload_delay' txgs, then unload it.
+ * from it in 'metaslab_unload_delay' txgs, then we normally unload it.
+ * However, to prevent thrashing, if the metaslab was recently loaded,
+ * then instead of unloading it, we increase the unload delay (only up
+ * to the maximum).
*/
if (msp->ms_loaded &&
msp->ms_initializing == 0 &&
msp->ms_selected_txg + metaslab_unload_delay < txg) {
- for (int t = 1; t < TXG_CONCURRENT_STATES; t++) {
- VERIFY0(range_tree_space(
- msp->ms_allocating[(txg + t) & TXG_MASK]));
- }
- if (msp->ms_allocator != -1) {
- metaslab_passivate(msp, msp->ms_weight &
- ~METASLAB_ACTIVE_MASK);
- }
+ if (msp->ms_loaded_txg != 0 && msp->ms_loaded_txg +
+ metaslab_unload_delay + metaslab_load_window >= txg) {
+ if (metaslab_unload_delay + metaslab_load_window <=
+ metaslab_unload_delay_max) {
+ metaslab_unload_delay += metaslab_load_window;
+ }
+ DTRACE_PROBE1(zfs__metaslab__delay__unload,
+ metaslab_t *, msp);
+ } else {
+ for (int t = 1; t < TXG_CONCURRENT_STATES; t++) {
+ VERIFY0(range_tree_space(
+ msp->ms_allocating[(txg + t) & TXG_MASK]));
+ }
+ if (msp->ms_allocator != -1) {
+ metaslab_passivate(msp, msp->ms_weight &
+ ~METASLAB_ACTIVE_MASK);
+ }
- if (!metaslab_debug_unload)
- metaslab_unload(msp);
+ if (!metaslab_debug_unload)
+ metaslab_unload(msp);
+ }
}
ASSERT0(range_tree_space(msp->ms_allocating[txg & TXG_MASK]));
@@ -2997,8 +3029,6 @@ metaslab_block_alloc(metaslab_t *msp, uint64_t size, uint64_t txg)
range_tree_add(msp->ms_allocating[txg & TXG_MASK], start, size);
- /* Track the last successful allocation */
- msp->ms_alloc_txg = txg;
metaslab_verify_space(msp, txg);
}
@@ -3041,10 +3071,10 @@ find_valid_metaslab(metaslab_group_t *mg, uint64_t activation_weight,
}
/*
- * If the selected metaslab is condensing or being
- * initialized, skip it.
+ * If the selected metaslab is condensing or being
+ * initialized, skip it.
*/
- if (msp->ms_condensing || msp->ms_initializing > 0)
+ if (msp->ms_condensing || msp->ms_initializing > 0)
continue;
*was_active = msp->ms_allocator != -1;
@@ -3183,7 +3213,8 @@ metaslab_group_alloc_normal(metaslab_group_t *mg, zio_alloc_list_t *zal,
continue;
}
- if (metaslab_activate(msp, allocator, activation_weight) != 0) {
+ if (metaslab_activate(msp, allocator, activation_weight,
+ txg) != 0) {
mutex_exit(&msp->ms_lock);
continue;
}
@@ -3902,7 +3933,7 @@ metaslab_claim_concrete(vdev_t *vd, uint64_t offset, uint64_t size,
mutex_enter(&msp->ms_lock);
if ((txg != 0 && spa_writeable(spa)) || !msp->ms_loaded)
- error = metaslab_activate(msp, 0, METASLAB_WEIGHT_CLAIM);
+ error = metaslab_activate(msp, 0, METASLAB_WEIGHT_CLAIM, txg);
/*
* No need to fail in that case; someone else has activated the
* metaslab, but that doesn't preclude us from using it.
diff --git a/usr/src/uts/common/fs/zfs/sys/metaslab_impl.h b/usr/src/uts/common/fs/zfs/sys/metaslab_impl.h
index 3c4ce37303..7e51f7dda6 100644
--- a/usr/src/uts/common/fs/zfs/sys/metaslab_impl.h
+++ b/usr/src/uts/common/fs/zfs/sys/metaslab_impl.h
@@ -385,8 +385,8 @@ struct metaslab {
* stay cached.
*/
uint64_t ms_selected_txg;
+ uint64_t ms_loaded_txg; /* track when metaslab was loaded */
- uint64_t ms_alloc_txg; /* last successful alloc (debug only) */
uint64_t ms_max_size; /* maximum allocatable size */
/*