diff options
Diffstat (limited to 'usr/src/cmd/mdb/common/modules/zfs/zfs.c')
-rw-r--r-- | usr/src/cmd/mdb/common/modules/zfs/zfs.c | 263 |
1 files changed, 240 insertions, 23 deletions
diff --git a/usr/src/cmd/mdb/common/modules/zfs/zfs.c b/usr/src/cmd/mdb/common/modules/zfs/zfs.c index 7cc12ccf0a..aaa29cdb17 100644 --- a/usr/src/cmd/mdb/common/modules/zfs/zfs.c +++ b/usr/src/cmd/mdb/common/modules/zfs/zfs.c @@ -55,6 +55,7 @@ #include <sys/zfs_acl.h> #include <sys/sa_impl.h> #include <sys/multilist.h> +#include <sys/btree.h> #ifdef _KERNEL #define ZFS_OBJ_NAME "zfs" @@ -1462,13 +1463,15 @@ spa_print_config(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) 0, NULL)); } - - typedef struct mdb_range_tree { struct { - uint64_t avl_numnodes; + uint64_t bt_num_elems; + uint64_t bt_num_nodes; } rt_root; uint64_t rt_space; + range_seg_type_t rt_type; + uint8_t rt_shift; + uint64_t rt_start; } mdb_range_tree_t; typedef struct mdb_metaslab_group { @@ -1566,15 +1569,13 @@ metaslab_stats(mdb_vdev_t *vd, int spa_flags) ms.ms_unflushed_frees, 0) == -1) return (DCMD_ERR); ufrees = rt.rt_space; - raw_uchanges_mem = rt.rt_root.avl_numnodes * - mdb_ctf_sizeof_by_name("range_seg_t"); + raw_uchanges_mem = rt.rt_root.bt_num_nodes * BTREE_LEAF_SIZE; if (mdb_ctf_vread(&rt, "range_tree_t", "mdb_range_tree_t", ms.ms_unflushed_allocs, 0) == -1) return (DCMD_ERR); uallocs = rt.rt_space; - raw_uchanges_mem += rt.rt_root.avl_numnodes * - mdb_ctf_sizeof_by_name("range_seg_t"); + raw_uchanges_mem += rt.rt_root.bt_num_nodes * BTREE_LEAF_SIZE; mdb_nicenum(raw_uchanges_mem, uchanges_mem); raw_free = ms.ms_size; @@ -1644,14 +1645,12 @@ metaslab_group_stats(mdb_vdev_t *vd, int spa_flags) if (mdb_ctf_vread(&rt, "range_tree_t", "mdb_range_tree_t", ms.ms_unflushed_frees, 0) == -1) return (DCMD_ERR); - raw_uchanges_mem += - rt.rt_root.avl_numnodes * sizeof (range_seg_t); + raw_uchanges_mem += rt.rt_root.bt_num_nodes * BTREE_LEAF_SIZE; if (mdb_ctf_vread(&rt, "range_tree_t", "mdb_range_tree_t", ms.ms_unflushed_allocs, 0) == -1) return (DCMD_ERR); - raw_uchanges_mem += - rt.rt_root.avl_numnodes * sizeof (range_seg_t); + raw_uchanges_mem += rt.rt_root.bt_num_nodes * BTREE_LEAF_SIZE; } mdb_nicenum(raw_uchanges_mem, uchanges_mem); mdb_printf("%10s\n", uchanges_mem); @@ -2669,6 +2668,202 @@ zio_state(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) return (mdb_pwalk_dcmd("zio_root", "zio", argc, argv, addr)); } + +typedef struct mdb_zfs_btree_hdr { + uintptr_t bth_parent; + boolean_t bth_core; + /* + * For both leaf and core nodes, represents the number of elements in + * the node. For core nodes, they will have bth_count + 1 children. + */ + uint32_t bth_count; +} mdb_zfs_btree_hdr_t; + +typedef struct mdb_zfs_btree_core { + mdb_zfs_btree_hdr_t btc_hdr; + uintptr_t btc_children[BTREE_CORE_ELEMS + 1]; + uint8_t btc_elems[]; +} mdb_zfs_btree_core_t; + +typedef struct mdb_zfs_btree_leaf { + mdb_zfs_btree_hdr_t btl_hdr; + uint8_t btl_elems[]; +} mdb_zfs_btree_leaf_t; + +typedef struct mdb_zfs_btree { + uintptr_t bt_root; + size_t bt_elem_size; +} mdb_zfs_btree_t; + +typedef struct btree_walk_data { + mdb_zfs_btree_t bwd_btree; + mdb_zfs_btree_hdr_t *bwd_node; + uint64_t bwd_offset; // In units of bt_node_size +} btree_walk_data_t; + +static uintptr_t +btree_leftmost_child(uintptr_t addr, mdb_zfs_btree_hdr_t *buf) +{ + size_t size = offsetof(zfs_btree_core_t, btc_children) + + sizeof (uintptr_t); + for (;;) { + if (mdb_vread(buf, size, addr) == -1) { + mdb_warn("failed to read at %p\n", addr); + return ((uintptr_t)0ULL); + } + if (!buf->bth_core) + return (addr); + mdb_zfs_btree_core_t *node = (mdb_zfs_btree_core_t *)buf; + addr = node->btc_children[0]; + } +} + +static int +btree_walk_step(mdb_walk_state_t *wsp) +{ + btree_walk_data_t *bwd = wsp->walk_data; + size_t elem_size = bwd->bwd_btree.bt_elem_size; + if (wsp->walk_addr == 0ULL) + return (WALK_DONE); + + if (!bwd->bwd_node->bth_core) { + /* + * For the first element in a leaf node, read in the full + * leaf, since we only had part of it read in before. + */ + if (bwd->bwd_offset == 0) { + if (mdb_vread(bwd->bwd_node, BTREE_LEAF_SIZE, + wsp->walk_addr) == -1) { + mdb_warn("failed to read at %p\n", + wsp->walk_addr); + return (WALK_ERR); + } + } + + int status = wsp->walk_callback((uintptr_t)(wsp->walk_addr + + offsetof(mdb_zfs_btree_leaf_t, btl_elems) + + bwd->bwd_offset * elem_size), bwd->bwd_node, + wsp->walk_cbdata); + if (status != WALK_NEXT) + return (status); + bwd->bwd_offset++; + + /* Find the next element, if we're at the end of the leaf. */ + while (bwd->bwd_offset == bwd->bwd_node->bth_count) { + uintptr_t par = bwd->bwd_node->bth_parent; + uintptr_t cur = wsp->walk_addr; + wsp->walk_addr = par; + if (par == 0ULL) + return (WALK_NEXT); + + size_t size = sizeof (zfs_btree_core_t) + + BTREE_CORE_ELEMS * elem_size; + if (mdb_vread(bwd->bwd_node, size, wsp->walk_addr) == + -1) { + mdb_warn("failed to read at %p\n", + wsp->walk_addr); + return (WALK_ERR); + } + mdb_zfs_btree_core_t *node = + (mdb_zfs_btree_core_t *)bwd->bwd_node; + int i; + for (i = 0; i <= bwd->bwd_node->bth_count; i++) { + if (node->btc_children[i] == cur) + break; + } + if (i > bwd->bwd_node->bth_count) { + mdb_warn("btree parent/child mismatch at " + "%#lx\n", cur); + return (WALK_ERR); + } + bwd->bwd_offset = i; + } + return (WALK_NEXT); + } + + if (!bwd->bwd_node->bth_core) { + mdb_warn("Invalid btree node at %#lx\n", wsp->walk_addr); + return (WALK_ERR); + } + mdb_zfs_btree_core_t *node = (mdb_zfs_btree_core_t *)bwd->bwd_node; + int status = wsp->walk_callback((uintptr_t)(wsp->walk_addr + + offsetof(mdb_zfs_btree_core_t, btc_elems) + bwd->bwd_offset * + elem_size), bwd->bwd_node, wsp->walk_cbdata); + if (status != WALK_NEXT) + return (status); + + uintptr_t new_child = node->btc_children[bwd->bwd_offset + 1]; + wsp->walk_addr = btree_leftmost_child(new_child, bwd->bwd_node); + if (wsp->walk_addr == 0ULL) + return (WALK_ERR); + + bwd->bwd_offset = 0; + return (WALK_NEXT); +} + +static int +btree_walk_init(mdb_walk_state_t *wsp) +{ + btree_walk_data_t *bwd; + + if (wsp->walk_addr == 0ULL) { + mdb_warn("must supply address of zfs_btree_t\n"); + return (WALK_ERR); + } + + bwd = mdb_zalloc(sizeof (btree_walk_data_t), UM_SLEEP); + if (mdb_ctf_vread(&bwd->bwd_btree, "zfs_btree_t", "mdb_zfs_btree_t", + wsp->walk_addr, 0) == -1) { + mdb_free(bwd, sizeof (*bwd)); + return (WALK_ERR); + } + + if (bwd->bwd_btree.bt_elem_size == 0) { + mdb_warn("invalid or uninitialized btree at %#lx\n", + wsp->walk_addr); + mdb_free(bwd, sizeof (*bwd)); + return (WALK_ERR); + } + + size_t size = MAX(BTREE_LEAF_SIZE, sizeof (zfs_btree_core_t) + + BTREE_CORE_ELEMS * bwd->bwd_btree.bt_elem_size); + bwd->bwd_node = mdb_zalloc(size, UM_SLEEP); + + uintptr_t node = (uintptr_t)bwd->bwd_btree.bt_root; + if (node == 0ULL) { + wsp->walk_addr = 0ULL; + wsp->walk_data = bwd; + return (WALK_NEXT); + } + node = btree_leftmost_child(node, bwd->bwd_node); + if (node == 0ULL) { + mdb_free(bwd->bwd_node, size); + mdb_free(bwd, sizeof (*bwd)); + return (WALK_ERR); + } + bwd->bwd_offset = 0; + + wsp->walk_addr = node; + wsp->walk_data = bwd; + return (WALK_NEXT); +} + +static void +btree_walk_fini(mdb_walk_state_t *wsp) +{ + btree_walk_data_t *bwd = (btree_walk_data_t *)wsp->walk_data; + + if (bwd == NULL) + return; + + size_t size = MAX(BTREE_LEAF_SIZE, sizeof (zfs_btree_core_t) + + BTREE_CORE_ELEMS * bwd->bwd_btree.bt_elem_size); + if (bwd->bwd_node != NULL) + mdb_free(bwd->bwd_node, size); + + mdb_free(bwd, sizeof (*bwd)); +} + typedef struct mdb_multilist { uint64_t ml_num_sublists; uintptr_t ml_sublists; @@ -4170,23 +4365,43 @@ out: return (rc); } -typedef struct mdb_range_seg { +typedef struct mdb_range_seg64 { uint64_t rs_start; uint64_t rs_end; -} mdb_range_seg_t; +} mdb_range_seg64_t; + +typedef struct mdb_range_seg32 { + uint32_t rs_start; + uint32_t rs_end; +} mdb_range_seg32_t; /* ARGSUSED */ static int range_tree_cb(uintptr_t addr, const void *unknown, void *arg) { - mdb_range_seg_t rs; + mdb_range_tree_t *rt = (mdb_range_tree_t *)arg; + uint64_t start, end; - if (mdb_ctf_vread(&rs, ZFS_STRUCT "range_seg", "mdb_range_seg_t", - addr, 0) == -1) - return (DCMD_ERR); + if (rt->rt_type == RANGE_SEG64) { + mdb_range_seg64_t rs; + + if (mdb_ctf_vread(&rs, ZFS_STRUCT "range_seg64", + "mdb_range_seg64_t", addr, 0) == -1) + return (DCMD_ERR); + start = rs.rs_start; + end = rs.rs_end; + } else { + ASSERT3U(rt->rt_type, ==, RANGE_SEG32); + mdb_range_seg32_t rs; + + if (mdb_ctf_vread(&rs, ZFS_STRUCT "range_seg32", + "mdb_range_seg32_t", addr, 0) == -1) + return (DCMD_ERR); + start = ((uint64_t)rs.rs_start << rt->rt_shift) + rt->rt_start; + end = ((uint64_t)rs.rs_end << rt->rt_shift) + rt->rt_start; + } - mdb_printf("\t[%llx %llx) (length %llx)\n", - rs.rs_start, rs.rs_end, rs.rs_end - rs.rs_start); + mdb_printf("\t[%llx %llx) (length %llx)\n", start, end, end - start); return (0); } @@ -4197,7 +4412,7 @@ range_tree(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) { mdb_range_tree_t rt; - uintptr_t avl_addr; + uintptr_t btree_addr; if (!(flags & DCMD_ADDRSPEC)) return (DCMD_USAGE); @@ -4207,12 +4422,12 @@ range_tree(uintptr_t addr, uint_t flags, int argc, return (DCMD_ERR); mdb_printf("%p: range tree of %llu entries, %llu bytes\n", - addr, rt.rt_root.avl_numnodes, rt.rt_space); + addr, rt.rt_root.bt_num_elems, rt.rt_space); - avl_addr = addr + + btree_addr = addr + mdb_ctf_offsetof_by_name(ZFS_STRUCT "range_tree", "rt_root"); - if (mdb_pwalk("avl", range_tree_cb, NULL, avl_addr) != 0) { + if (mdb_pwalk("zfs_btree", range_tree_cb, &rt, btree_addr) != 0) { mdb_warn("can't walk range_tree segments"); return (DCMD_ERR); } @@ -4407,6 +4622,8 @@ static const mdb_walker_t walkers[] = { { "zfs_acl_node_aces0", "given a zfs_acl_node_t, walk all ACEs as ace_t", zfs_acl_node_aces0_walk_init, zfs_aces_walk_step, NULL }, + { "zfs_btree", "given a zfs_btree_t *, walk all entries", + btree_walk_init, btree_walk_step, btree_walk_fini }, { NULL } }; |