summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKeith M Wesolowski <wesolows@foobazco.org>2014-06-13 18:11:58 +0000
committerKeith M Wesolowski <wesolows@foobazco.org>2014-06-13 18:11:58 +0000
commitcdc6060cfab8e07a11d17716c7e329f6abf49751 (patch)
tree35d98def8a569fe57477cc02803c2692b21b6b5c
parent9c8327afa8bab87bf0c263b4b462556f2a936089 (diff)
parent30beaff42d8240ebf5386e8b7a14e3d137a1631f (diff)
downloadillumos-joyent-cdc6060cfab8e07a11d17716c7e329f6abf49751.tar.gz
[illumos-gate merge]
commit 30beaff42d8240ebf5386e8b7a14e3d137a1631f 4756 metaslab_group_preload() could deadlock
-rw-r--r--usr/src/cmd/ztest/ztest.c4
-rw-r--r--usr/src/lib/libzpool/common/llib-lzpool3
-rw-r--r--usr/src/uts/common/fs/zfs/metaslab.c25
3 files changed, 27 insertions, 5 deletions
diff --git a/usr/src/cmd/ztest/ztest.c b/usr/src/cmd/ztest/ztest.c
index d4dac71db8..c12eb098a3 100644
--- a/usr/src/cmd/ztest/ztest.c
+++ b/usr/src/cmd/ztest/ztest.c
@@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2013 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2014 by Delphix. All rights reserved.
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2013 Steven Hartland. All rights reserved.
*/
@@ -185,6 +185,7 @@ static const ztest_shared_opts_t ztest_opts_defaults = {
extern uint64_t metaslab_gang_bang;
extern uint64_t metaslab_df_alloc_threshold;
extern uint64_t zfs_deadman_synctime_ms;
+extern int metaslab_preload_limit;
static ztest_shared_opts_t *ztest_shared_opts;
static ztest_shared_opts_t ztest_opts;
@@ -5593,6 +5594,7 @@ ztest_run(ztest_shared_t *zs)
kernel_init(FREAD | FWRITE);
VERIFY0(spa_open(ztest_opts.zo_pool, &spa, FTAG));
spa->spa_debug = B_TRUE;
+ metaslab_preload_limit = ztest_random(20) + 1;
ztest_spa = spa;
VERIFY0(dmu_objset_own(ztest_opts.zo_pool,
diff --git a/usr/src/lib/libzpool/common/llib-lzpool b/usr/src/lib/libzpool/common/llib-lzpool
index 2872049eb5..e173e16658 100644
--- a/usr/src/lib/libzpool/common/llib-lzpool
+++ b/usr/src/lib/libzpool/common/llib-lzpool
@@ -24,7 +24,7 @@
*/
/*
- * Copyright (c) 2013 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
*/
/* LINTLIBRARY */
@@ -66,3 +66,4 @@ extern uint64_t metaslab_gang_bang;
extern uint64_t metaslab_df_alloc_threshold;
extern boolean_t zfeature_checks_disable;
extern uint64_t zfs_deadman_synctime_ms;
+extern int metaslab_preload_limit;
diff --git a/usr/src/uts/common/fs/zfs/metaslab.c b/usr/src/uts/common/fs/zfs/metaslab.c
index d2193a67b1..2e2c972387 100644
--- a/usr/src/uts/common/fs/zfs/metaslab.c
+++ b/usr/src/uts/common/fs/zfs/metaslab.c
@@ -1212,6 +1212,8 @@ metaslab_preload(void *arg)
metaslab_t *msp = arg;
spa_t *spa = msp->ms_group->mg_vd->vdev_spa;
+ ASSERT(!MUTEX_HELD(&msp->ms_group->mg_lock));
+
mutex_enter(&msp->ms_lock);
metaslab_load_wait(msp);
if (!msp->ms_loaded)
@@ -1236,19 +1238,36 @@ metaslab_group_preload(metaslab_group_t *mg)
taskq_wait(mg->mg_taskq);
return;
}
- mutex_enter(&mg->mg_lock);
+ mutex_enter(&mg->mg_lock);
/*
- * Prefetch the next potential metaslabs
+ * Load the next potential metaslabs
*/
- for (msp = avl_first(t); msp != NULL; msp = AVL_NEXT(t, msp)) {
+ msp = avl_first(t);
+ while (msp != NULL) {
+ metaslab_t *msp_next = AVL_NEXT(t, msp);
/* If we have reached our preload limit then we're done */
if (++m > metaslab_preload_limit)
break;
+ /*
+ * We must drop the metaslab group lock here to preserve
+ * lock ordering with the ms_lock (when grabbing both
+ * the mg_lock and the ms_lock, the ms_lock must be taken
+ * first). As a result, it is possible that the ordering
+ * of the metaslabs within the avl tree may change before
+ * we reacquire the lock. The metaslab cannot be removed from
+ * the tree while we're in syncing context so it is safe to
+ * drop the mg_lock here. If the metaslabs are reordered
+ * nothing will break -- we just may end up loading a
+ * less than optimal one.
+ */
+ mutex_exit(&mg->mg_lock);
VERIFY(taskq_dispatch(mg->mg_taskq, metaslab_preload,
msp, TQ_SLEEP) != NULL);
+ mutex_enter(&mg->mg_lock);
+ msp = msp_next;
}
mutex_exit(&mg->mg_lock);
}