diff options
Diffstat (limited to 'usr/src')
-rw-r--r-- | usr/src/cmd/mdb/common/modules/zfs/zfs.c | 2 | ||||
-rw-r--r-- | usr/src/cmd/zdb/Makefile.com | 4 | ||||
-rw-r--r-- | usr/src/cmd/zdb/zdb.c | 69 | ||||
-rw-r--r-- | usr/src/cmd/ztest/ztest.c | 7 | ||||
-rw-r--r-- | usr/src/lib/libzpool/common/llib-lzpool | 1 | ||||
-rw-r--r-- | usr/src/uts/common/fs/zfs/metaslab.c | 242 | ||||
-rw-r--r-- | usr/src/uts/common/fs/zfs/spa.c | 4 | ||||
-rw-r--r-- | usr/src/uts/common/fs/zfs/space_map.c | 37 | ||||
-rw-r--r-- | usr/src/uts/common/fs/zfs/sys/metaslab.h | 6 | ||||
-rw-r--r-- | usr/src/uts/common/fs/zfs/sys/metaslab_impl.h | 5 | ||||
-rw-r--r-- | usr/src/uts/common/fs/zfs/sys/space_map.h | 6 |
11 files changed, 316 insertions, 67 deletions
diff --git a/usr/src/cmd/mdb/common/modules/zfs/zfs.c b/usr/src/cmd/mdb/common/modules/zfs/zfs.c index 8a9be77c5f..d55ce1d0c5 100644 --- a/usr/src/cmd/mdb/common/modules/zfs/zfs.c +++ b/usr/src/cmd/mdb/common/modules/zfs/zfs.c @@ -377,6 +377,8 @@ zfs_params(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) "zil_disable", "zfs_nocacheflush", "metaslab_gang_bang", + "metaslab_df_alloc_threshold", + "metaslab_df_free_pct", "zio_injection_enabled", "zvol_immediate_write_sz", }; diff --git a/usr/src/cmd/zdb/Makefile.com b/usr/src/cmd/zdb/Makefile.com index f7aacf97ce..de7f5a4a24 100644 --- a/usr/src/cmd/zdb/Makefile.com +++ b/usr/src/cmd/zdb/Makefile.com @@ -20,7 +20,7 @@ # # -# Copyright 2008 Sun Microsystems, Inc. All rights reserved. +# Copyright 2009 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # @@ -33,7 +33,7 @@ include ../../Makefile.cmd INCS += -I../../../lib/libzpool/common INCS += -I../../../uts/common/fs/zfs -LDLIBS += -lzpool -lumem -lnvpair -lzfs +LDLIBS += -lzpool -lumem -lnvpair -lzfs -lavl C99MODE= -xc99=%all C99LMODE= -Xc99=%all diff --git a/usr/src/cmd/zdb/zdb.c b/usr/src/cmd/zdb/zdb.c index 74546c2cf0..9c84410aa1 100644 --- a/usr/src/cmd/zdb/zdb.c +++ b/usr/src/cmd/zdb/zdb.c @@ -102,6 +102,7 @@ usage(void) (void) fprintf(stderr, " -C cached pool configuration\n"); (void) fprintf(stderr, " -i intent logs\n"); (void) fprintf(stderr, " -b block statistics\n"); + (void) fprintf(stderr, " -m metaslabs\n"); (void) fprintf(stderr, " -c checksum all metadata (twice for " "all data) blocks\n"); (void) fprintf(stderr, " -s report stats on zdb's I/O\n"); @@ -473,6 +474,21 @@ dump_spacemap(objset_t *os, space_map_obj_t *smo, space_map_t *sm) } static void +dump_metaslab_stats(metaslab_t *msp) +{ + char maxbuf[5]; + space_map_t *sm = &msp->ms_map; + avl_tree_t *t = sm->sm_pp_root; + int free_pct = sm->sm_space * 100 / sm->sm_size; + + nicenum(space_map_maxsize(sm), maxbuf); + + (void) printf("\t %20s %10lu %7s %6s %4s %4d%%\n", + "segments", avl_numnodes(t), "maxsize", maxbuf, + "freepct", free_pct); +} + +static void dump_metaslab(metaslab_t *msp) { char freebuf[5]; @@ -482,22 +498,28 @@ dump_metaslab(metaslab_t *msp) nicenum(msp->ms_map.sm_size - smo->smo_alloc, freebuf); - if (dump_opt['d'] <= 5) { - (void) printf("\t%10llx %10llu %5s\n", - (u_longlong_t)msp->ms_map.sm_start, - (u_longlong_t)smo->smo_object, - freebuf); - return; - } - (void) printf( - "\tvdev %llu offset %08llx spacemap %4llu free %5s\n", + "\tvdev %5llu offset %12llx spacemap %6llu free %5s\n", (u_longlong_t)vd->vdev_id, (u_longlong_t)msp->ms_map.sm_start, (u_longlong_t)smo->smo_object, freebuf); - ASSERT(msp->ms_map.sm_size == (1ULL << vd->vdev_ms_shift)); + if (dump_opt['m'] > 1) { + mutex_enter(&msp->ms_lock); + VERIFY(space_map_load(&msp->ms_map, zfs_metaslab_ops, + SM_FREE, &msp->ms_smo, spa->spa_meta_objset) == 0); + dump_metaslab_stats(msp); + space_map_unload(&msp->ms_map); + mutex_exit(&msp->ms_lock); + } + + if (dump_opt['d'] > 5 || dump_opt['m'] > 2) { + ASSERT(msp->ms_map.sm_size == (1ULL << vd->vdev_ms_shift)); + + mutex_enter(&msp->ms_lock); + dump_spacemap(spa->spa_meta_objset, smo, &msp->ms_map); + mutex_exit(&msp->ms_lock); + } - dump_spacemap(spa->spa_meta_objset, smo, &msp->ms_map); } static void @@ -512,14 +534,12 @@ dump_metaslabs(spa_t *spa) for (c = 0; c < rvd->vdev_children; c++) { vd = rvd->vdev_child[c]; - (void) printf("\n vdev %llu\n\n", (u_longlong_t)vd->vdev_id); + (void) printf("\t%-10s %-19s %-15s %-10s\n", + "vdev", "offset", "spacemap", "free"); + (void) printf("\t%10s %19s %15s %10s\n", + "----------", "-------------------", + "---------------", "-------------"); - if (dump_opt['d'] <= 5) { - (void) printf("\t%10s %10s %5s\n", - "offset", "spacemap", "free"); - (void) printf("\t%10s %10s %5s\n", - "------", "--------", "----"); - } for (m = 0; m < vd->vdev_ms_count; m++) dump_metaslab(vd->vdev_ms[m]); (void) printf("\n"); @@ -1419,7 +1439,8 @@ static space_map_ops_t zdb_space_map_ops = { zdb_space_map_unload, NULL, /* alloc */ zdb_space_map_claim, - NULL /* free */ + NULL, /* free */ + NULL /* maxsize */ }; static void @@ -1809,14 +1830,17 @@ dump_zpool(spa_t *spa) if (dump_opt['u']) dump_uberblock(&spa->spa_uberblock); - if (dump_opt['d'] || dump_opt['i']) { + if (dump_opt['d'] || dump_opt['i'] || dump_opt['m']) { dump_dir(dp->dp_meta_objset); if (dump_opt['d'] >= 3) { dump_bplist(dp->dp_meta_objset, spa->spa_sync_bplist_obj, "Deferred frees"); dump_dtl(spa->spa_root_vdev, 0); - dump_metaslabs(spa); } + + if (dump_opt['d'] >= 3 || dump_opt['m']) + dump_metaslabs(spa); + (void) dmu_objset_find(spa_name(spa), dump_one_dir, NULL, DS_FIND_SNAPSHOTS | DS_FIND_CHILDREN); } @@ -2292,13 +2316,14 @@ main(int argc, char **argv) dprintf_setup(&argc, argv); - while ((c = getopt(argc, argv, "udibcsvCLS:U:lRep:t:")) != -1) { + while ((c = getopt(argc, argv, "udibcmsvCLS:U:lRep:t:")) != -1) { switch (c) { case 'u': case 'd': case 'i': case 'b': case 'c': + case 'm': case 's': case 'C': case 'l': diff --git a/usr/src/cmd/ztest/ztest.c b/usr/src/cmd/ztest/ztest.c index c9beb00b20..304024c951 100644 --- a/usr/src/cmd/ztest/ztest.c +++ b/usr/src/cmd/ztest/ztest.c @@ -248,9 +248,11 @@ static ztest_shared_t *ztest_shared; static int ztest_random_fd; static int ztest_dump_core = 1; +static uint64_t metaslab_sz; static boolean_t ztest_exiting; extern uint64_t metaslab_gang_bang; +extern uint64_t metaslab_df_alloc_threshold; #define ZTEST_DIROBJ 1 #define ZTEST_MICROZAP_OBJ 2 @@ -3767,6 +3769,8 @@ ztest_init(char *pool) if (error) fatal(0, "spa_open() = %d", error); + metaslab_sz = 1ULL << spa->spa_root_vdev->vdev_child[0]->vdev_ms_shift; + if (zopt_verbose >= 3) show_pool_stats(spa); @@ -3858,6 +3862,9 @@ main(int argc, char **argv) zi->zi_call_time = 0; } + /* Set the allocation switch size */ + metaslab_df_alloc_threshold = ztest_random(metaslab_sz / 4) + 1; + pid = fork(); if (pid == -1) diff --git a/usr/src/lib/libzpool/common/llib-lzpool b/usr/src/lib/libzpool/common/llib-lzpool index 276c2eb6ad..44a7d4a28a 100644 --- a/usr/src/lib/libzpool/common/llib-lzpool +++ b/usr/src/lib/libzpool/common/llib-lzpool @@ -49,3 +49,4 @@ #include <sys/arc.h> extern uint64_t metaslab_gang_bang; +extern uint64_t metaslab_df_alloc_threshold; diff --git a/usr/src/uts/common/fs/zfs/metaslab.c b/usr/src/uts/common/fs/zfs/metaslab.c index 412832968d..77556ac5d7 100644 --- a/usr/src/uts/common/fs/zfs/metaslab.c +++ b/usr/src/uts/common/fs/zfs/metaslab.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -36,18 +36,35 @@ uint64_t metaslab_aliquot = 512ULL << 10; uint64_t metaslab_gang_bang = SPA_MAXBLOCKSIZE + 1; /* force gang blocks */ /* + * Minimum size which forces the dynamic allocator to change + * it's allocation strategy. Once the space map cannot satisfy + * an allocation of this size then it switches to using more + * aggressive strategy (i.e search by size rather than offset). + */ +uint64_t metaslab_df_alloc_threshold = SPA_MAXBLOCKSIZE; + +/* + * The minimum free space, in percent, which must be available + * in a space map to continue allocations in a first-fit fashion. + * Once the space_map's free space drops below this level we dynamically + * switch to using best-fit allocations. + */ +int metaslab_df_free_pct = 30; + +/* * ========================================================================== * Metaslab classes * ========================================================================== */ metaslab_class_t * -metaslab_class_create(void) +metaslab_class_create(space_map_ops_t *ops) { metaslab_class_t *mc; mc = kmem_zalloc(sizeof (metaslab_class_t), KM_SLEEP); mc->mc_rotor = NULL; + mc->mc_ops = ops; return (mc); } @@ -202,30 +219,14 @@ metaslab_group_sort(metaslab_group_t *mg, metaslab_t *msp, uint64_t weight) } /* - * ========================================================================== - * The first-fit block allocator - * ========================================================================== + * This is a helper function that can be used by the allocator to find + * a suitable block to allocate. This will search the specified AVL + * tree looking for a block that matches the specified criteria. */ -static void -metaslab_ff_load(space_map_t *sm) -{ - ASSERT(sm->sm_ppd == NULL); - sm->sm_ppd = kmem_zalloc(64 * sizeof (uint64_t), KM_SLEEP); -} - -static void -metaslab_ff_unload(space_map_t *sm) -{ - kmem_free(sm->sm_ppd, 64 * sizeof (uint64_t)); - sm->sm_ppd = NULL; -} - static uint64_t -metaslab_ff_alloc(space_map_t *sm, uint64_t size) +metaslab_block_picker(avl_tree_t *t, uint64_t *cursor, uint64_t size, + uint64_t align) { - avl_tree_t *t = &sm->sm_root; - uint64_t align = size & -size; - uint64_t *cursor = (uint64_t *)sm->sm_ppd + highbit(align) - 1; space_seg_t *ss, ssearch; avl_index_t where; @@ -254,7 +255,37 @@ metaslab_ff_alloc(space_map_t *sm, uint64_t size) return (-1ULL); *cursor = 0; - return (metaslab_ff_alloc(sm, size)); + return (metaslab_block_picker(t, cursor, size, align)); +} + +/* + * ========================================================================== + * The first-fit block allocator + * ========================================================================== + */ +static void +metaslab_ff_load(space_map_t *sm) +{ + ASSERT(sm->sm_ppd == NULL); + sm->sm_ppd = kmem_zalloc(64 * sizeof (uint64_t), KM_SLEEP); + sm->sm_pp_root = NULL; +} + +static void +metaslab_ff_unload(space_map_t *sm) +{ + kmem_free(sm->sm_ppd, 64 * sizeof (uint64_t)); + sm->sm_ppd = NULL; +} + +static uint64_t +metaslab_ff_alloc(space_map_t *sm, uint64_t size) +{ + avl_tree_t *t = &sm->sm_root; + uint64_t align = size & -size; + uint64_t *cursor = (uint64_t *)sm->sm_ppd + highbit(align) - 1; + + return (metaslab_block_picker(t, cursor, size, align)); } /* ARGSUSED */ @@ -276,9 +307,136 @@ static space_map_ops_t metaslab_ff_ops = { metaslab_ff_unload, metaslab_ff_alloc, metaslab_ff_claim, - metaslab_ff_free + metaslab_ff_free, + NULL /* maxsize */ +}; + +/* + * Dynamic block allocator - + * Uses the first fit allocation scheme until space get low and then + * adjusts to a best fit allocation method. Uses metaslab_df_alloc_threshold + * and metaslab_df_free_pct to determine when to switch the allocation scheme. + */ + +uint64_t +metaslab_df_maxsize(space_map_t *sm) +{ + avl_tree_t *t = sm->sm_pp_root; + space_seg_t *ss; + + if (t == NULL || (ss = avl_last(t)) == NULL) + return (0ULL); + + return (ss->ss_end - ss->ss_start); +} + +static int +metaslab_df_seg_compare(const void *x1, const void *x2) +{ + const space_seg_t *s1 = x1; + const space_seg_t *s2 = x2; + uint64_t ss_size1 = s1->ss_end - s1->ss_start; + uint64_t ss_size2 = s2->ss_end - s2->ss_start; + + if (ss_size1 < ss_size2) + return (-1); + if (ss_size1 > ss_size2) + return (1); + + if (s1->ss_start < s2->ss_start) + return (-1); + if (s1->ss_start > s2->ss_start) + return (1); + + return (0); +} + +static void +metaslab_df_load(space_map_t *sm) +{ + space_seg_t *ss; + + ASSERT(sm->sm_ppd == NULL); + sm->sm_ppd = kmem_zalloc(64 * sizeof (uint64_t), KM_SLEEP); + + sm->sm_pp_root = kmem_alloc(sizeof (avl_tree_t), KM_SLEEP); + avl_create(sm->sm_pp_root, metaslab_df_seg_compare, + sizeof (space_seg_t), offsetof(struct space_seg, ss_pp_node)); + + for (ss = avl_first(&sm->sm_root); ss; ss = AVL_NEXT(&sm->sm_root, ss)) + avl_add(sm->sm_pp_root, ss); +} + +static void +metaslab_df_unload(space_map_t *sm) +{ + void *cookie = NULL; + + kmem_free(sm->sm_ppd, 64 * sizeof (uint64_t)); + sm->sm_ppd = NULL; + + while (avl_destroy_nodes(sm->sm_pp_root, &cookie) != NULL) { + /* tear down the tree */ + } + + avl_destroy(sm->sm_pp_root); + kmem_free(sm->sm_pp_root, sizeof (avl_tree_t)); + sm->sm_pp_root = NULL; +} + +static uint64_t +metaslab_df_alloc(space_map_t *sm, uint64_t size) +{ + avl_tree_t *t = &sm->sm_root; + uint64_t align = size & -size; + uint64_t *cursor = (uint64_t *)sm->sm_ppd + highbit(align) - 1; + uint64_t max_size = metaslab_df_maxsize(sm); + int free_pct = sm->sm_space * 100 / sm->sm_size; + + ASSERT(MUTEX_HELD(sm->sm_lock)); + ASSERT3U(avl_numnodes(&sm->sm_root), ==, avl_numnodes(sm->sm_pp_root)); + + if (max_size < size) + return (-1ULL); + + /* + * If we're running low on space switch to using the size + * sorted AVL tree (best-fit). + */ + if (max_size < metaslab_df_alloc_threshold || + free_pct < metaslab_df_free_pct) { + t = sm->sm_pp_root; + *cursor = 0; + } + + return (metaslab_block_picker(t, cursor, size, 1ULL)); +} + +/* ARGSUSED */ +static void +metaslab_df_claim(space_map_t *sm, uint64_t start, uint64_t size) +{ + /* No need to update cursor */ +} + +/* ARGSUSED */ +static void +metaslab_df_free(space_map_t *sm, uint64_t start, uint64_t size) +{ + /* No need to update cursor */ +} + +static space_map_ops_t metaslab_df_ops = { + metaslab_df_load, + metaslab_df_unload, + metaslab_df_alloc, + metaslab_df_claim, + metaslab_df_free, + metaslab_df_maxsize }; +space_map_ops_t *zfs_metaslab_ops = &metaslab_df_ops; + /* * ========================================================================== * Metaslabs @@ -414,20 +572,28 @@ metaslab_weight(metaslab_t *msp) } static int -metaslab_activate(metaslab_t *msp, uint64_t activation_weight) +metaslab_activate(metaslab_t *msp, uint64_t activation_weight, uint64_t size) { space_map_t *sm = &msp->ms_map; + space_map_ops_t *sm_ops = msp->ms_group->mg_class->mc_ops; ASSERT(MUTEX_HELD(&msp->ms_lock)); if ((msp->ms_weight & METASLAB_ACTIVE_MASK) == 0) { - int error = space_map_load(sm, &metaslab_ff_ops, - SM_FREE, &msp->ms_smo, + int error = space_map_load(sm, sm_ops, SM_FREE, &msp->ms_smo, msp->ms_group->mg_vd->vdev_spa->spa_meta_objset); if (error) { metaslab_group_sort(msp->ms_group, msp, 0); return (error); } + + /* + * If we were able to load the map then make sure + * that this map is still able to satisfy our request. + */ + if (msp->ms_weight < size) + return (ENOSPC); + metaslab_group_sort(msp->ms_group, msp, msp->ms_weight | activation_weight); } @@ -636,11 +802,16 @@ metaslab_group_alloc(metaslab_group_t *mg, uint64_t size, uint64_t txg, int i; activation_weight = METASLAB_WEIGHT_PRIMARY; - for (i = 0; i < d; i++) - if (DVA_GET_VDEV(&dva[i]) == mg->mg_vd->vdev_id) + for (i = 0; i < d; i++) { + if (DVA_GET_VDEV(&dva[i]) == mg->mg_vd->vdev_id) { activation_weight = METASLAB_WEIGHT_SECONDARY; + break; + } + } for (;;) { + boolean_t was_active; + mutex_enter(&mg->mg_lock); for (msp = avl_first(t); msp; msp = AVL_NEXT(t, msp)) { if (msp->ms_weight < size) { @@ -648,6 +819,7 @@ metaslab_group_alloc(metaslab_group_t *mg, uint64_t size, uint64_t txg, return (-1ULL); } + was_active = msp->ms_weight & METASLAB_ACTIVE_MASK; if (activation_weight == METASLAB_WEIGHT_PRIMARY) break; @@ -673,7 +845,9 @@ metaslab_group_alloc(metaslab_group_t *mg, uint64_t size, uint64_t txg, * another thread may have changed the weight while we * were blocked on the metaslab lock. */ - if (msp->ms_weight < size) { + if (msp->ms_weight < size || (was_active && + !(msp->ms_weight & METASLAB_ACTIVE_MASK) && + activation_weight == METASLAB_WEIGHT_PRIMARY)) { mutex_exit(&msp->ms_lock); continue; } @@ -686,7 +860,7 @@ metaslab_group_alloc(metaslab_group_t *mg, uint64_t size, uint64_t txg, continue; } - if (metaslab_activate(msp, activation_weight) != 0) { + if (metaslab_activate(msp, activation_weight, size) != 0) { mutex_exit(&msp->ms_lock); continue; } @@ -869,7 +1043,7 @@ next: goto top; } - if (!zio_lock) { + if (!allocatable && !zio_lock) { dshift = 3; zio_lock = B_TRUE; goto top; @@ -955,7 +1129,7 @@ metaslab_claim_dva(spa_t *spa, const dva_t *dva, uint64_t txg) mutex_enter(&msp->ms_lock); - error = metaslab_activate(msp, METASLAB_WEIGHT_SECONDARY); + error = metaslab_activate(msp, METASLAB_WEIGHT_SECONDARY, 0); if (error || txg == 0) { /* txg == 0 indicates dry run */ mutex_exit(&msp->ms_lock); return (error); diff --git a/usr/src/uts/common/fs/zfs/spa.c b/usr/src/uts/common/fs/zfs/spa.c index 79f4bc91aa..b852bd039f 100644 --- a/usr/src/uts/common/fs/zfs/spa.c +++ b/usr/src/uts/common/fs/zfs/spa.c @@ -541,8 +541,8 @@ spa_activate(spa_t *spa, int mode) spa->spa_state = POOL_STATE_ACTIVE; spa->spa_mode = mode; - spa->spa_normal_class = metaslab_class_create(); - spa->spa_log_class = metaslab_class_create(); + spa->spa_normal_class = metaslab_class_create(zfs_metaslab_ops); + spa->spa_log_class = metaslab_class_create(zfs_metaslab_ops); for (int t = 0; t < ZIO_TYPES; t++) { for (int q = 0; q < ZIO_TASKQ_TYPES; q++) { diff --git a/usr/src/uts/common/fs/zfs/space_map.c b/usr/src/uts/common/fs/zfs/space_map.c index 1cdacc81da..4aa2394138 100644 --- a/usr/src/uts/common/fs/zfs/space_map.c +++ b/usr/src/uts/common/fs/zfs/space_map.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -116,12 +116,23 @@ space_map_add(space_map_t *sm, uint64_t start, uint64_t size) if (merge_before && merge_after) { avl_remove(&sm->sm_root, ss_before); + if (sm->sm_pp_root) { + avl_remove(sm->sm_pp_root, ss_before); + avl_remove(sm->sm_pp_root, ss_after); + } ss_after->ss_start = ss_before->ss_start; kmem_free(ss_before, sizeof (*ss_before)); + ss = ss_after; } else if (merge_before) { ss_before->ss_end = end; + if (sm->sm_pp_root) + avl_remove(sm->sm_pp_root, ss_before); + ss = ss_before; } else if (merge_after) { ss_after->ss_start = start; + if (sm->sm_pp_root) + avl_remove(sm->sm_pp_root, ss_after); + ss = ss_after; } else { ss = kmem_alloc(sizeof (*ss), KM_SLEEP); ss->ss_start = start; @@ -129,6 +140,9 @@ space_map_add(space_map_t *sm, uint64_t start, uint64_t size) avl_insert(&sm->sm_root, ss, where); } + if (sm->sm_pp_root) + avl_add(sm->sm_pp_root, ss); + sm->sm_space += size; } @@ -163,12 +177,17 @@ space_map_remove(space_map_t *sm, uint64_t start, uint64_t size) left_over = (ss->ss_start != start); right_over = (ss->ss_end != end); + if (sm->sm_pp_root) + avl_remove(sm->sm_pp_root, ss); + if (left_over && right_over) { newseg = kmem_alloc(sizeof (*newseg), KM_SLEEP); newseg->ss_start = end; newseg->ss_end = ss->ss_end; ss->ss_end = start; avl_insert_here(&sm->sm_root, newseg, ss, AVL_AFTER); + if (sm->sm_pp_root) + avl_add(sm->sm_pp_root, newseg); } else if (left_over) { ss->ss_end = start; } else if (right_over) { @@ -176,8 +195,12 @@ space_map_remove(space_map_t *sm, uint64_t start, uint64_t size) } else { avl_remove(&sm->sm_root, ss); kmem_free(ss, sizeof (*ss)); + ss = NULL; } + if (sm->sm_pp_root && ss != NULL) + avl_add(sm->sm_pp_root, ss); + sm->sm_space -= size; } @@ -315,6 +338,9 @@ space_map_load(space_map_t *sm, space_map_ops_t *ops, uint8_t maptype, if (ops != NULL) ops->smop_load(sm); } else { + if (ops != NULL) + ops->smop_unload(sm); + sm->sm_ops = NULL; space_map_vacate(sm, NULL, NULL); } @@ -342,6 +368,15 @@ space_map_unload(space_map_t *sm) } uint64_t +space_map_maxsize(space_map_t *sm) +{ + if (sm->sm_loaded && sm->sm_ops != NULL) + return (sm->sm_ops->smop_max(sm)); + else + return (-1ULL); +} + +uint64_t space_map_alloc(space_map_t *sm, uint64_t size) { uint64_t start; diff --git a/usr/src/uts/common/fs/zfs/sys/metaslab.h b/usr/src/uts/common/fs/zfs/sys/metaslab.h index 1c9d89e8fd..5d3e11c971 100644 --- a/usr/src/uts/common/fs/zfs/sys/metaslab.h +++ b/usr/src/uts/common/fs/zfs/sys/metaslab.h @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -39,6 +39,8 @@ extern "C" { typedef struct metaslab_class metaslab_class_t; typedef struct metaslab_group metaslab_group_t; +extern space_map_ops_t *zfs_metaslab_ops; + extern metaslab_t *metaslab_init(metaslab_group_t *mg, space_map_obj_t *smo, uint64_t start, uint64_t size, uint64_t txg); extern void metaslab_fini(metaslab_t *msp); @@ -55,7 +57,7 @@ extern void metaslab_free(spa_t *spa, const blkptr_t *bp, uint64_t txg, boolean_t now); extern int metaslab_claim(spa_t *spa, const blkptr_t *bp, uint64_t txg); -extern metaslab_class_t *metaslab_class_create(void); +extern metaslab_class_t *metaslab_class_create(space_map_ops_t *ops); extern void metaslab_class_destroy(metaslab_class_t *mc); extern void metaslab_class_add(metaslab_class_t *mc, metaslab_group_t *mg); extern void metaslab_class_remove(metaslab_class_t *mc, metaslab_group_t *mg); diff --git a/usr/src/uts/common/fs/zfs/sys/metaslab_impl.h b/usr/src/uts/common/fs/zfs/sys/metaslab_impl.h index 5980cbc843..d67dea7e97 100644 --- a/usr/src/uts/common/fs/zfs/sys/metaslab_impl.h +++ b/usr/src/uts/common/fs/zfs/sys/metaslab_impl.h @@ -19,15 +19,13 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #ifndef _SYS_METASLAB_IMPL_H #define _SYS_METASLAB_IMPL_H -#pragma ident "%Z%%M% %I% %E% SMI" - #include <sys/metaslab.h> #include <sys/space_map.h> #include <sys/vdev.h> @@ -41,6 +39,7 @@ extern "C" { struct metaslab_class { metaslab_group_t *mc_rotor; uint64_t mc_allocated; + space_map_ops_t *mc_ops; }; struct metaslab_group { diff --git a/usr/src/uts/common/fs/zfs/sys/space_map.h b/usr/src/uts/common/fs/zfs/sys/space_map.h index 8d7860660c..a682bbd409 100644 --- a/usr/src/uts/common/fs/zfs/sys/space_map.h +++ b/usr/src/uts/common/fs/zfs/sys/space_map.h @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -46,12 +46,14 @@ typedef struct space_map { uint8_t sm_loading; /* map loading? */ kcondvar_t sm_load_cv; /* map load completion */ space_map_ops_t *sm_ops; /* space map block picker ops vector */ + avl_tree_t *sm_pp_root; /* picker-private AVL tree */ void *sm_ppd; /* picker-private data */ kmutex_t *sm_lock; /* pointer to lock that protects map */ } space_map_t; typedef struct space_seg { avl_node_t ss_node; /* AVL node */ + avl_node_t ss_pp_node; /* AVL picker-private node */ uint64_t ss_start; /* starting offset of this segment */ uint64_t ss_end; /* ending offset (non-inclusive) */ } space_seg_t; @@ -74,6 +76,7 @@ struct space_map_ops { uint64_t (*smop_alloc)(space_map_t *sm, uint64_t size); void (*smop_claim)(space_map_t *sm, uint64_t start, uint64_t size); void (*smop_free)(space_map_t *sm, uint64_t start, uint64_t size); + uint64_t (*smop_max)(space_map_t *sm); }; /* @@ -152,6 +155,7 @@ extern void space_map_unload(space_map_t *sm); extern uint64_t space_map_alloc(space_map_t *sm, uint64_t size); extern void space_map_claim(space_map_t *sm, uint64_t start, uint64_t size); extern void space_map_free(space_map_t *sm, uint64_t start, uint64_t size); +extern uint64_t space_map_maxsize(space_map_t *sm); extern void space_map_sync(space_map_t *sm, uint8_t maptype, space_map_obj_t *smo, objset_t *os, dmu_tx_t *tx); |