diff options
| author | George Wilson <george.wilson@delphix.com> | 2014-06-12 15:29:11 -0800 |
|---|---|---|
| committer | Christopher Siden <chris@delphix.com> | 2014-06-12 16:29:11 -0700 |
| commit | 30beaff42d8240ebf5386e8b7a14e3d137a1631f (patch) | |
| tree | 3cefa28276c6dce7c463d8f7b77766f0ee7d30f1 | |
| parent | b89e420ae1290e425c29db875ec0c0546006eec7 (diff) | |
| download | illumos-joyent-30beaff42d8240ebf5386e8b7a14e3d137a1631f.tar.gz | |
4756 metaslab_group_preload() could deadlock
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Reviewed by: Christopher Siden <christopher.siden@delphix.com>
Reviewed by: Dan McDonald <danmcd@omniti.com>
Reviewed by: Saso Kiselkov <saso.kiselkov@nexenta.com>
Approved by: Garrett D'Amore <garrett@damore.org>
| -rw-r--r-- | usr/src/cmd/ztest/ztest.c | 4 | ||||
| -rw-r--r-- | usr/src/lib/libzpool/common/llib-lzpool | 3 | ||||
| -rw-r--r-- | usr/src/uts/common/fs/zfs/metaslab.c | 25 |
3 files changed, 27 insertions, 5 deletions
diff --git a/usr/src/cmd/ztest/ztest.c b/usr/src/cmd/ztest/ztest.c index d4dac71db8..c12eb098a3 100644 --- a/usr/src/cmd/ztest/ztest.c +++ b/usr/src/cmd/ztest/ztest.c @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2013 by Delphix. All rights reserved. + * Copyright (c) 2011, 2014 by Delphix. All rights reserved. * Copyright 2011 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2013 Steven Hartland. All rights reserved. */ @@ -185,6 +185,7 @@ static const ztest_shared_opts_t ztest_opts_defaults = { extern uint64_t metaslab_gang_bang; extern uint64_t metaslab_df_alloc_threshold; extern uint64_t zfs_deadman_synctime_ms; +extern int metaslab_preload_limit; static ztest_shared_opts_t *ztest_shared_opts; static ztest_shared_opts_t ztest_opts; @@ -5593,6 +5594,7 @@ ztest_run(ztest_shared_t *zs) kernel_init(FREAD | FWRITE); VERIFY0(spa_open(ztest_opts.zo_pool, &spa, FTAG)); spa->spa_debug = B_TRUE; + metaslab_preload_limit = ztest_random(20) + 1; ztest_spa = spa; VERIFY0(dmu_objset_own(ztest_opts.zo_pool, diff --git a/usr/src/lib/libzpool/common/llib-lzpool b/usr/src/lib/libzpool/common/llib-lzpool index 2872049eb5..e173e16658 100644 --- a/usr/src/lib/libzpool/common/llib-lzpool +++ b/usr/src/lib/libzpool/common/llib-lzpool @@ -24,7 +24,7 @@ */ /* - * Copyright (c) 2013 by Delphix. All rights reserved. + * Copyright (c) 2012, 2014 by Delphix. All rights reserved. */ /* LINTLIBRARY */ @@ -66,3 +66,4 @@ extern uint64_t metaslab_gang_bang; extern uint64_t metaslab_df_alloc_threshold; extern boolean_t zfeature_checks_disable; extern uint64_t zfs_deadman_synctime_ms; +extern int metaslab_preload_limit; diff --git a/usr/src/uts/common/fs/zfs/metaslab.c b/usr/src/uts/common/fs/zfs/metaslab.c index 4c854b7105..bba1dbf514 100644 --- a/usr/src/uts/common/fs/zfs/metaslab.c +++ b/usr/src/uts/common/fs/zfs/metaslab.c @@ -1207,6 +1207,8 @@ metaslab_preload(void *arg) metaslab_t *msp = arg; spa_t *spa = msp->ms_group->mg_vd->vdev_spa; + ASSERT(!MUTEX_HELD(&msp->ms_group->mg_lock)); + mutex_enter(&msp->ms_lock); metaslab_load_wait(msp); if (!msp->ms_loaded) @@ -1231,19 +1233,36 @@ metaslab_group_preload(metaslab_group_t *mg) taskq_wait(mg->mg_taskq); return; } - mutex_enter(&mg->mg_lock); + mutex_enter(&mg->mg_lock); /* - * Prefetch the next potential metaslabs + * Load the next potential metaslabs */ - for (msp = avl_first(t); msp != NULL; msp = AVL_NEXT(t, msp)) { + msp = avl_first(t); + while (msp != NULL) { + metaslab_t *msp_next = AVL_NEXT(t, msp); /* If we have reached our preload limit then we're done */ if (++m > metaslab_preload_limit) break; + /* + * We must drop the metaslab group lock here to preserve + * lock ordering with the ms_lock (when grabbing both + * the mg_lock and the ms_lock, the ms_lock must be taken + * first). As a result, it is possible that the ordering + * of the metaslabs within the avl tree may change before + * we reacquire the lock. The metaslab cannot be removed from + * the tree while we're in syncing context so it is safe to + * drop the mg_lock here. If the metaslabs are reordered + * nothing will break -- we just may end up loading a + * less than optimal one. + */ + mutex_exit(&mg->mg_lock); VERIFY(taskq_dispatch(mg->mg_taskq, metaslab_preload, msp, TQ_SLEEP) != NULL); + mutex_enter(&mg->mg_lock); + msp = msp_next; } mutex_exit(&mg->mg_lock); } |
