diff options
Diffstat (limited to 'usr/src/cmd')
-rw-r--r-- | usr/src/cmd/mdb/common/modules/zfs/zfs.c | 142 | ||||
-rw-r--r-- | usr/src/cmd/zdb/zdb.c | 125 | ||||
-rw-r--r-- | usr/src/cmd/ztest/ztest.c | 7 |
3 files changed, 183 insertions, 91 deletions
diff --git a/usr/src/cmd/mdb/common/modules/zfs/zfs.c b/usr/src/cmd/mdb/common/modules/zfs/zfs.c index 06ce396ef8..5d3af7ff3c 100644 --- a/usr/src/cmd/mdb/common/modules/zfs/zfs.c +++ b/usr/src/cmd/mdb/common/modules/zfs/zfs.c @@ -178,55 +178,110 @@ mdb_nicenum(uint64_t num, char *buf) } } -static int verbose; - +/* + * <addr>::sm_entries <buffer length in bytes> + * + * Treat the buffer specified by the given address as a buffer that contains + * space map entries. Iterate over the specified number of entries and print + * them in both encoded and decoded form. + */ +/* ARGSUSED */ static int -freelist_walk_init(mdb_walk_state_t *wsp) +sm_entries(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) { - if (wsp->walk_addr == NULL) { - mdb_warn("must supply starting address\n"); - return (WALK_ERR); + uint64_t bufsz = 0; + boolean_t preview = B_FALSE; + + if (!(flags & DCMD_ADDRSPEC)) + return (DCMD_USAGE); + + if (argc < 1) { + preview = B_TRUE; + bufsz = 2; + } else if (argc != 1) { + return (DCMD_USAGE); + } else { + switch (argv[0].a_type) { + case MDB_TYPE_STRING: + bufsz = mdb_strtoull(argv[0].a_un.a_str); + break; + case MDB_TYPE_IMMEDIATE: + bufsz = argv[0].a_un.a_val; + break; + default: + return (DCMD_USAGE); + } } - wsp->walk_data = 0; /* Index into the freelist */ - return (WALK_NEXT); -} + char *actions[] = { "ALLOC", "FREE", "INVALID" }; + for (uintptr_t bufend = addr + bufsz; addr < bufend; + addr += sizeof (uint64_t)) { + uint64_t nwords; + uint64_t start_addr = addr; -static int -freelist_walk_step(mdb_walk_state_t *wsp) -{ - uint64_t entry; - uintptr_t number = (uintptr_t)wsp->walk_data; - char *ddata[] = { "ALLOC", "FREE", "CONDENSE", "INVALID", - "INVALID", "INVALID", "INVALID", "INVALID" }; - int mapshift = SPA_MINBLOCKSHIFT; + uint64_t word = 0; + if (mdb_vread(&word, sizeof (word), addr) == -1) { + mdb_warn("failed to read space map entry %p", addr); + return (DCMD_ERR); + } - if (mdb_vread(&entry, sizeof (entry), wsp->walk_addr) == -1) { - mdb_warn("failed to read freelist entry %p", wsp->walk_addr); - return (WALK_DONE); - } - wsp->walk_addr += sizeof (entry); - wsp->walk_data = (void *)(number + 1); + if (SM_PREFIX_DECODE(word) == SM_DEBUG_PREFIX) { + (void) mdb_printf("\t [%6llu] %s: txg %llu, " + "pass %llu\n", + (u_longlong_t)(addr), + actions[SM_DEBUG_ACTION_DECODE(word)], + (u_longlong_t)SM_DEBUG_TXG_DECODE(word), + (u_longlong_t)SM_DEBUG_SYNCPASS_DECODE(word)); + continue; + } - if (SM_DEBUG_DECODE(entry)) { - mdb_printf("DEBUG: %3u %10s: txg=%llu pass=%llu\n", - number, - ddata[SM_DEBUG_ACTION_DECODE(entry)], - SM_DEBUG_TXG_DECODE(entry), - SM_DEBUG_SYNCPASS_DECODE(entry)); - } else { - mdb_printf("Entry: %3u offsets=%08llx-%08llx type=%c " - "size=%06llx", number, - SM_OFFSET_DECODE(entry) << mapshift, - (SM_OFFSET_DECODE(entry) + SM_RUN_DECODE(entry)) << - mapshift, - SM_TYPE_DECODE(entry) == SM_ALLOC ? 'A' : 'F', - SM_RUN_DECODE(entry) << mapshift); - if (verbose) - mdb_printf(" (raw=%012llx)\n", entry); - mdb_printf("\n"); + char entry_type; + uint64_t raw_offset, raw_run, vdev_id = SM_NO_VDEVID; + + if (SM_PREFIX_DECODE(word) != SM2_PREFIX) { + entry_type = (SM_TYPE_DECODE(word) == SM_ALLOC) ? + 'A' : 'F'; + raw_offset = SM_OFFSET_DECODE(word); + raw_run = SM_RUN_DECODE(word); + nwords = 1; + } else { + ASSERT3U(SM_PREFIX_DECODE(word), ==, SM2_PREFIX); + + raw_run = SM2_RUN_DECODE(word); + vdev_id = SM2_VDEV_DECODE(word); + + /* it is a two-word entry so we read another word */ + addr += sizeof (uint64_t); + if (addr >= bufend) { + mdb_warn("buffer ends in the middle of a two " + "word entry\n", addr); + return (DCMD_ERR); + } + + if (mdb_vread(&word, sizeof (word), addr) == -1) { + mdb_warn("failed to read space map entry %p", + addr); + return (DCMD_ERR); + } + + entry_type = (SM2_TYPE_DECODE(word) == SM_ALLOC) ? + 'A' : 'F'; + raw_offset = SM2_OFFSET_DECODE(word); + nwords = 2; + } + + (void) mdb_printf("\t [%6llx] %c range:" + " %010llx-%010llx size: %06llx vdev: %06llu words: %llu\n", + (u_longlong_t)start_addr, + entry_type, (u_longlong_t)raw_offset, + (u_longlong_t)(raw_offset + raw_run), + (u_longlong_t)raw_run, + (u_longlong_t)vdev_id, (u_longlong_t)nwords); + + if (preview) + break; } - return (WALK_NEXT); + return (DCMD_OK); } static int @@ -3974,6 +4029,9 @@ static const mdb_dcmd_t dcmds[] = { "\t-M display metaslab group statistic\n" "\t-h display histogram (requires -m or -M)\n", "given a spa_t, print vdev summary", spa_vdevs }, + { "sm_entries", "<buffer length in bytes>", + "print out space map entries from a buffer decoded", + sm_entries}, { "vdev", ":[-remMh]\n" "\t-r display recursively\n" "\t-e display statistics\n" @@ -4024,8 +4082,6 @@ static const mdb_dcmd_t dcmds[] = { }; static const mdb_walker_t walkers[] = { - { "zms_freelist", "walk ZFS metaslab freelist", - freelist_walk_init, freelist_walk_step, NULL }, { "txg_list", "given any txg_list_t *, walk all entries in all txgs", txg_list_walk_init, txg_list_walk_step, NULL }, { "txg_list0", "given any txg_list_t *, walk all entries in txg 0", diff --git a/usr/src/cmd/zdb/zdb.c b/usr/src/cmd/zdb/zdb.c index aafb0aa29f..6ce4263db8 100644 --- a/usr/src/cmd/zdb/zdb.c +++ b/usr/src/cmd/zdb/zdb.c @@ -774,7 +774,6 @@ verify_spacemap_refcounts(spa_t *spa) static void dump_spacemap(objset_t *os, space_map_t *sm) { - uint64_t alloc, offset, entry; char *ddata[] = { "ALLOC", "FREE", "CONDENSE", "INVALID", "INVALID", "INVALID", "INVALID", "INVALID" }; @@ -791,41 +790,73 @@ dump_spacemap(objset_t *os, space_map_t *sm) /* * Print out the freelist entries in both encoded and decoded form. */ - alloc = 0; - for (offset = 0; offset < space_map_length(sm); - offset += sizeof (entry)) { - uint8_t mapshift = sm->sm_shift; + uint8_t mapshift = sm->sm_shift; + int64_t alloc = 0; + uint64_t word; + for (uint64_t offset = 0; offset < space_map_length(sm); + offset += sizeof (word)) { VERIFY0(dmu_read(os, space_map_object(sm), offset, - sizeof (entry), &entry, DMU_READ_PREFETCH)); - if (SM_DEBUG_DECODE(entry)) { + sizeof (word), &word, DMU_READ_PREFETCH)); + if (sm_entry_is_debug(word)) { (void) printf("\t [%6llu] %s: txg %llu, pass %llu\n", - (u_longlong_t)(offset / sizeof (entry)), - ddata[SM_DEBUG_ACTION_DECODE(entry)], - (u_longlong_t)SM_DEBUG_TXG_DECODE(entry), - (u_longlong_t)SM_DEBUG_SYNCPASS_DECODE(entry)); + (u_longlong_t)(offset / sizeof (word)), + ddata[SM_DEBUG_ACTION_DECODE(word)], + (u_longlong_t)SM_DEBUG_TXG_DECODE(word), + (u_longlong_t)SM_DEBUG_SYNCPASS_DECODE(word)); + continue; + } + + uint8_t words; + char entry_type; + uint64_t entry_off, entry_run, entry_vdev = SM_NO_VDEVID; + + if (sm_entry_is_single_word(word)) { + entry_type = (SM_TYPE_DECODE(word) == SM_ALLOC) ? + 'A' : 'F'; + entry_off = (SM_OFFSET_DECODE(word) << mapshift) + + sm->sm_start; + entry_run = SM_RUN_DECODE(word) << mapshift; + words = 1; } else { - (void) printf("\t [%6llu] %c range:" - " %010llx-%010llx size: %06llx\n", - (u_longlong_t)(offset / sizeof (entry)), - SM_TYPE_DECODE(entry) == SM_ALLOC ? 'A' : 'F', - (u_longlong_t)((SM_OFFSET_DECODE(entry) << - mapshift) + sm->sm_start), - (u_longlong_t)((SM_OFFSET_DECODE(entry) << - mapshift) + sm->sm_start + - (SM_RUN_DECODE(entry) << mapshift)), - (u_longlong_t)(SM_RUN_DECODE(entry) << mapshift)); - if (SM_TYPE_DECODE(entry) == SM_ALLOC) - alloc += SM_RUN_DECODE(entry) << mapshift; - else - alloc -= SM_RUN_DECODE(entry) << mapshift; + /* it is a two-word entry so we read another word */ + ASSERT(sm_entry_is_double_word(word)); + + uint64_t extra_word; + offset += sizeof (extra_word); + VERIFY0(dmu_read(os, space_map_object(sm), offset, + sizeof (extra_word), &extra_word, + DMU_READ_PREFETCH)); + + ASSERT3U(offset, <=, space_map_length(sm)); + + entry_run = SM2_RUN_DECODE(word) << mapshift; + entry_vdev = SM2_VDEV_DECODE(word); + entry_type = (SM2_TYPE_DECODE(extra_word) == SM_ALLOC) ? + 'A' : 'F'; + entry_off = (SM2_OFFSET_DECODE(extra_word) << + mapshift) + sm->sm_start; + words = 2; } + + (void) printf("\t [%6llu] %c range:" + " %010llx-%010llx size: %06llx vdev: %06llu words: %u\n", + (u_longlong_t)(offset / sizeof (word)), + entry_type, (u_longlong_t)entry_off, + (u_longlong_t)(entry_off + entry_run), + (u_longlong_t)entry_run, + (u_longlong_t)entry_vdev, words); + + if (entry_type == 'A') + alloc += entry_run; + else + alloc -= entry_run; } - if (alloc != space_map_allocated(sm)) { - (void) printf("space_map_object alloc (%llu) INCONSISTENT " - "with space map summary (%llu)\n", - (u_longlong_t)space_map_allocated(sm), (u_longlong_t)alloc); + if ((uint64_t)alloc != space_map_allocated(sm)) { + (void) printf("space_map_object alloc (%lld) INCONSISTENT " + "with space map summary (%lld)\n", + (longlong_t)space_map_allocated(sm), (longlong_t)alloc); } } @@ -1153,7 +1184,7 @@ dump_ddt(ddt_t *ddt, enum ddt_type type, enum ddt_class class) while ((error = ddt_object_walk(ddt, type, class, &walk, &dde)) == 0) dump_dde(ddt, &dde, walk); - ASSERT(error == ENOENT); + ASSERT3U(error, ==, ENOENT); (void) printf("\n"); } @@ -3070,15 +3101,14 @@ typedef struct checkpoint_sm_exclude_entry_arg { } checkpoint_sm_exclude_entry_arg_t; static int -checkpoint_sm_exclude_entry_cb(maptype_t type, uint64_t offset, uint64_t size, - void *arg) +checkpoint_sm_exclude_entry_cb(space_map_entry_t *sme, void *arg) { checkpoint_sm_exclude_entry_arg_t *cseea = arg; vdev_t *vd = cseea->cseea_vd; - metaslab_t *ms = vd->vdev_ms[offset >> vd->vdev_ms_shift]; - uint64_t end = offset + size; + metaslab_t *ms = vd->vdev_ms[sme->sme_offset >> vd->vdev_ms_shift]; + uint64_t end = sme->sme_offset + sme->sme_run; - ASSERT(type == SM_FREE); + ASSERT(sme->sme_type == SM_FREE); /* * Since the vdev_checkpoint_sm exists in the vdev level @@ -3096,7 +3126,7 @@ checkpoint_sm_exclude_entry_cb(maptype_t type, uint64_t offset, uint64_t size, * metaslab boundaries. So if needed we could add code * that handles metaslab-crossing segments in the future. */ - VERIFY3U(offset, >=, ms->ms_start); + VERIFY3U(sme->sme_offset, >=, ms->ms_start); VERIFY3U(end, <=, ms->ms_start + ms->ms_size); /* @@ -3104,10 +3134,10 @@ checkpoint_sm_exclude_entry_cb(maptype_t type, uint64_t offset, uint64_t size, * also verify that the entry is there to begin with. */ mutex_enter(&ms->ms_lock); - range_tree_remove(ms->ms_allocatable, offset, size); + range_tree_remove(ms->ms_allocatable, sme->sme_offset, sme->sme_run); mutex_exit(&ms->ms_lock); - cseea->cseea_checkpoint_size += size; + cseea->cseea_checkpoint_size += sme->sme_run; return (0); } @@ -4082,15 +4112,14 @@ typedef struct verify_checkpoint_sm_entry_cb_arg { #define ENTRIES_PER_PROGRESS_UPDATE 10000 static int -verify_checkpoint_sm_entry_cb(maptype_t type, uint64_t offset, uint64_t size, - void *arg) +verify_checkpoint_sm_entry_cb(space_map_entry_t *sme, void *arg) { verify_checkpoint_sm_entry_cb_arg_t *vcsec = arg; vdev_t *vd = vcsec->vcsec_vd; - metaslab_t *ms = vd->vdev_ms[offset >> vd->vdev_ms_shift]; - uint64_t end = offset + size; + metaslab_t *ms = vd->vdev_ms[sme->sme_offset >> vd->vdev_ms_shift]; + uint64_t end = sme->sme_offset + sme->sme_run; - ASSERT(type == SM_FREE); + ASSERT(sme->sme_type == SM_FREE); if ((vcsec->vcsec_entryid % ENTRIES_PER_PROGRESS_UPDATE) == 0) { (void) fprintf(stderr, @@ -4104,7 +4133,7 @@ verify_checkpoint_sm_entry_cb(maptype_t type, uint64_t offset, uint64_t size, /* * See comment in checkpoint_sm_exclude_entry_cb() */ - VERIFY3U(offset, >=, ms->ms_start); + VERIFY3U(sme->sme_offset, >=, ms->ms_start); VERIFY3U(end, <=, ms->ms_start + ms->ms_size); /* @@ -4113,7 +4142,7 @@ verify_checkpoint_sm_entry_cb(maptype_t type, uint64_t offset, uint64_t size, * their respective ms_allocateable trees should not contain them. */ mutex_enter(&ms->ms_lock); - range_tree_verify(ms->ms_allocatable, offset, size); + range_tree_verify(ms->ms_allocatable, sme->sme_offset, sme->sme_run); mutex_exit(&ms->ms_lock); return (0); @@ -4359,7 +4388,7 @@ verify_checkpoint(spa_t *spa) DMU_POOL_ZPOOL_CHECKPOINT, sizeof (uint64_t), sizeof (uberblock_t) / sizeof (uint64_t), &checkpoint); - if (error == ENOENT) { + if (error == ENOENT && !dump_opt['L']) { /* * If the feature is active but the uberblock is missing * then we must be in the middle of discarding the @@ -4382,7 +4411,7 @@ verify_checkpoint(spa_t *spa) error = 3; } - if (error == 0) + if (error == 0 && !dump_opt['L']) verify_checkpoint_blocks(spa); return (error); @@ -4488,7 +4517,7 @@ dump_zpool(spa_t *spa) if (dump_opt['h']) dump_history(spa); - if (rc == 0 && !dump_opt['L']) + if (rc == 0) rc = verify_checkpoint(spa); if (rc != 0) { diff --git a/usr/src/cmd/ztest/ztest.c b/usr/src/cmd/ztest/ztest.c index 1522c75485..ff45ab193e 100644 --- a/usr/src/cmd/ztest/ztest.c +++ b/usr/src/cmd/ztest/ztest.c @@ -193,6 +193,7 @@ extern uint64_t zfs_deadman_synctime_ms; extern int metaslab_preload_limit; extern boolean_t zfs_compressed_arc_enabled; extern boolean_t zfs_abd_scatter_enabled; +extern boolean_t zfs_force_some_double_word_sm_entries; static ztest_shared_opts_t *ztest_shared_opts; static ztest_shared_opts_t ztest_opts; @@ -6394,6 +6395,12 @@ main(int argc, char **argv) dprintf_setup(&argc, argv); zfs_deadman_synctime_ms = 300000; + /* + * As two-word space map entries may not come up often (especially + * if pool and vdev sizes are small) we want to force at least some + * of them so the feature get tested. + */ + zfs_force_some_double_word_sm_entries = B_TRUE; ztest_fd_rand = open("/dev/urandom", O_RDONLY); ASSERT3S(ztest_fd_rand, >=, 0); |