OS-7151 ZFS loading and unloading metaslabs at audible frequency

Reviewed by: Joshua Clulow <jmc@joyent.com> Reviewed by: Kody Kantor <kody.kantor@joyent.com> Approved by: Kody Kantor <kody.kantor@joyent.com>
author: Jerry Jelinek <jerry.jelinek@joyent.com> 2018-12-17 16:29:23 +0000
committer: Jerry Jelinek <jerry.jelinek@joyent.com> 2018-12-17 16:30:16 +0000
commit: f4cb444376b6efe8770e521115e58d5df2379c97 (patch)
tree: 30d252b1da778d6a49e68f8bbee073c869c4da9e
parent: 4b3d3b0096454cadc8c9c15703aa2dcc9d6675a9 (diff)
download: illumos-joyent-f4cb444376b6efe8770e521115e58d5df2379c97.tar.gz
2 files changed, 52 insertions, 21 deletions
diff --git a/usr/src/uts/common/fs/zfs/metaslab.c b/usr/src/uts/common/fs/zfs/metaslab.c
index 8863bdc824..5a39bf5352 100644
--- a/usr/src/uts/common/fs/zfs/metaslab.c
+++ b/usr/src/uts/common/fs/zfs/metaslab.c
@@ -156,6 +156,18 @@ int metaslab_load_pct = 50;
 int metaslab_unload_delay = TXG_SIZE * 2;
 
 /*
+ * Tunables used to reduce metaslab load/unload thrashing when selection
+ * algorithm is allocating across metaslabs very evenly. In addition to
+ * tracking when the slab was used for allocation (ms_selected_txg), we also
+ * track when it was loaded (ms_loaded_txg). If the slab would be unloaded,
+ * but the load txg is within the window of
+ *    metaslab_unload_delay + metaslab_load_window
+ * then we ramp up metaslab_unload_delay instead of unloading the metaslab.
+ */
+int metaslab_load_window = 10;
+int metaslab_unload_delay_max = 256;
+
+/*
  * Max number of metaslabs per group to preload.
  */
 int metaslab_preload_limit = SPA_DVAS_PER_BP;
@@ -1533,6 +1545,7 @@ metaslab_unload(metaslab_t *msp)
 	ASSERT(MUTEX_HELD(&msp->ms_lock));
 	range_tree_vacate(msp->ms_allocatable, NULL, NULL);
 	msp->ms_loaded = B_FALSE;
+	msp->ms_loaded_txg = 0;
 	msp->ms_weight &= ~METASLAB_ACTIVE_MASK;
 	msp->ms_max_size = 0;
 }
@@ -2077,7 +2090,8 @@ metaslab_activate_allocator(metaslab_group_t *mg, metaslab_t *msp,
 }
 
 static int
-metaslab_activate(metaslab_t *msp, int allocator, uint64_t activation_weight)
+metaslab_activate(metaslab_t *msp, int allocator, uint64_t activation_weight,
+    uint64_t txg)
 {
 	ASSERT(MUTEX_HELD(&msp->ms_lock));
 
@@ -2089,6 +2103,8 @@ metaslab_activate(metaslab_t *msp, int allocator, uint64_t activation_weight)
 				metaslab_group_sort(msp->ms_group, msp, 0);
 				return (error);
 			}
+			VERIFY0(msp->ms_loaded_txg);
+			msp->ms_loaded_txg = txg;
 		}
 		if ((msp->ms_weight & METASLAB_ACTIVE_MASK) != 0) {
 			/*
@@ -2201,8 +2217,11 @@ metaslab_preload(void *arg)
 
 	mutex_enter(&msp->ms_lock);
 	metaslab_load_wait(msp);
-	if (!msp->ms_loaded)
+	if (!msp->ms_loaded) {
 		(void) metaslab_load(msp);
+		VERIFY0(msp->ms_loaded_txg);
+		msp->ms_loaded_txg = spa_syncing_txg(spa);
+	}
 	msp->ms_selected_txg = spa_syncing_txg(spa);
 	mutex_exit(&msp->ms_lock);
 }
@@ -2727,22 +2746,35 @@ metaslab_sync_done(metaslab_t *msp, uint64_t txg)
 
 	/*
 	 * If the metaslab is loaded and we've not tried to load or allocate
-	 * from it in 'metaslab_unload_delay' txgs, then unload it.
+	 * from it in 'metaslab_unload_delay' txgs, then we normally unload it.
+	 * However, to prevent thrashing, if the metaslab was recently loaded,
+	 * then instead of unloading it, we increase the unload delay (only up
+	 * to the maximum).
 	 */
 	if (msp->ms_loaded &&
 	    msp->ms_initializing == 0 &&
 	    msp->ms_selected_txg + metaslab_unload_delay < txg) {
-		for (int t = 1; t < TXG_CONCURRENT_STATES; t++) {
-			VERIFY0(range_tree_space(
-			    msp->ms_allocating[(txg + t) & TXG_MASK]));
-		}
-		if (msp->ms_allocator != -1) {
-			metaslab_passivate(msp, msp->ms_weight &
-			    ~METASLAB_ACTIVE_MASK);
-		}
+		if (msp->ms_loaded_txg != 0 && msp->ms_loaded_txg +
+		    metaslab_unload_delay + metaslab_load_window >= txg) {
+			if (metaslab_unload_delay + metaslab_load_window <=
+			    metaslab_unload_delay_max) {
+				metaslab_unload_delay += metaslab_load_window;
+			}
+			DTRACE_PROBE1(zfs__metaslab__delay__unload,
+			    metaslab_t *, msp);
+		} else {
+			for (int t = 1; t < TXG_CONCURRENT_STATES; t++) {
+				VERIFY0(range_tree_space(
+				    msp->ms_allocating[(txg + t) & TXG_MASK]));
+			}
+			if (msp->ms_allocator != -1) {
+				metaslab_passivate(msp, msp->ms_weight &
+				    ~METASLAB_ACTIVE_MASK);
+			}
 
-		if (!metaslab_debug_unload)
-			metaslab_unload(msp);
+			if (!metaslab_debug_unload)
+				metaslab_unload(msp);
+		}
 	}
 
 	ASSERT0(range_tree_space(msp->ms_allocating[txg & TXG_MASK]));
@@ -2997,8 +3029,6 @@ metaslab_block_alloc(metaslab_t *msp, uint64_t size, uint64_t txg)
 
 		range_tree_add(msp->ms_allocating[txg & TXG_MASK], start, size);
 
-		/* Track the last successful allocation */
-		msp->ms_alloc_txg = txg;
 		metaslab_verify_space(msp, txg);
 	}
 
@@ -3041,10 +3071,10 @@ find_valid_metaslab(metaslab_group_t *mg, uint64_t activation_weight,
 		}
 
 		/*
-			 * If the selected metaslab is condensing or being
-			 * initialized, skip it.
+		 * If the selected metaslab is condensing or being
+		 * initialized, skip it.
 		 */
-			if (msp->ms_condensing || msp->ms_initializing > 0)
+		if (msp->ms_condensing || msp->ms_initializing > 0)
 			continue;
 
 		*was_active = msp->ms_allocator != -1;
@@ -3183,7 +3213,8 @@ metaslab_group_alloc_normal(metaslab_group_t *mg, zio_alloc_list_t *zal,
 			continue;
 		}
 
-		if (metaslab_activate(msp, allocator, activation_weight) != 0) {
+		if (metaslab_activate(msp, allocator, activation_weight,
+		    txg) != 0) {
 			mutex_exit(&msp->ms_lock);
 			continue;
 		}
@@ -3902,7 +3933,7 @@ metaslab_claim_concrete(vdev_t *vd, uint64_t offset, uint64_t size,
 	mutex_enter(&msp->ms_lock);
 
 	if ((txg != 0 && spa_writeable(spa)) || !msp->ms_loaded)
-		error = metaslab_activate(msp, 0, METASLAB_WEIGHT_CLAIM);
+		error = metaslab_activate(msp, 0, METASLAB_WEIGHT_CLAIM, txg);
 	/*
 	 * No need to fail in that case; someone else has activated the
 	 * metaslab, but that doesn't preclude us from using it.
diff --git a/usr/src/uts/common/fs/zfs/sys/metaslab_impl.h b/usr/src/uts/common/fs/zfs/sys/metaslab_impl.h
index 3c4ce37303..7e51f7dda6 100644
--- a/usr/src/uts/common/fs/zfs/sys/metaslab_impl.h
+++ b/usr/src/uts/common/fs/zfs/sys/metaslab_impl.h
@@ -385,8 +385,8 @@ struct metaslab {
 	 * stay cached.
 	 */
 	uint64_t	ms_selected_txg;
+	uint64_t	ms_loaded_txg;	/* track when metaslab was loaded */
 
-	uint64_t	ms_alloc_txg;	/* last successful alloc (debug only) */
 	uint64_t	ms_max_size;	/* maximum allocatable size	*/
 
 	/*
author	Jerry Jelinek <jerry.jelinek@joyent.com>	2018-12-17 16:29:23 +0000
committer	Jerry Jelinek <jerry.jelinek@joyent.com>	2018-12-17 16:30:16 +0000
commit	f4cb444376b6efe8770e521115e58d5df2379c97 (patch)
tree	30d252b1da778d6a49e68f8bbee073c869c4da9e
parent	4b3d3b0096454cadc8c9c15703aa2dcc9d6675a9 (diff)
download	illumos-joyent-f4cb444376b6efe8770e521115e58d5df2379c97.tar.gz