summaryrefslogtreecommitdiff
path: root/usr/src/cmd
diff options
context:
space:
mode:
Diffstat (limited to 'usr/src/cmd')
-rw-r--r--usr/src/cmd/mdb/common/modules/zfs/zfs.c142
-rw-r--r--usr/src/cmd/zdb/zdb.c125
-rw-r--r--usr/src/cmd/ztest/ztest.c7
3 files changed, 183 insertions, 91 deletions
diff --git a/usr/src/cmd/mdb/common/modules/zfs/zfs.c b/usr/src/cmd/mdb/common/modules/zfs/zfs.c
index 06ce396ef8..5d3af7ff3c 100644
--- a/usr/src/cmd/mdb/common/modules/zfs/zfs.c
+++ b/usr/src/cmd/mdb/common/modules/zfs/zfs.c
@@ -178,55 +178,110 @@ mdb_nicenum(uint64_t num, char *buf)
}
}
-static int verbose;
-
+/*
+ * <addr>::sm_entries <buffer length in bytes>
+ *
+ * Treat the buffer specified by the given address as a buffer that contains
+ * space map entries. Iterate over the specified number of entries and print
+ * them in both encoded and decoded form.
+ */
+/* ARGSUSED */
static int
-freelist_walk_init(mdb_walk_state_t *wsp)
+sm_entries(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
{
- if (wsp->walk_addr == NULL) {
- mdb_warn("must supply starting address\n");
- return (WALK_ERR);
+ uint64_t bufsz = 0;
+ boolean_t preview = B_FALSE;
+
+ if (!(flags & DCMD_ADDRSPEC))
+ return (DCMD_USAGE);
+
+ if (argc < 1) {
+ preview = B_TRUE;
+ bufsz = 2;
+ } else if (argc != 1) {
+ return (DCMD_USAGE);
+ } else {
+ switch (argv[0].a_type) {
+ case MDB_TYPE_STRING:
+ bufsz = mdb_strtoull(argv[0].a_un.a_str);
+ break;
+ case MDB_TYPE_IMMEDIATE:
+ bufsz = argv[0].a_un.a_val;
+ break;
+ default:
+ return (DCMD_USAGE);
+ }
}
- wsp->walk_data = 0; /* Index into the freelist */
- return (WALK_NEXT);
-}
+ char *actions[] = { "ALLOC", "FREE", "INVALID" };
+ for (uintptr_t bufend = addr + bufsz; addr < bufend;
+ addr += sizeof (uint64_t)) {
+ uint64_t nwords;
+ uint64_t start_addr = addr;
-static int
-freelist_walk_step(mdb_walk_state_t *wsp)
-{
- uint64_t entry;
- uintptr_t number = (uintptr_t)wsp->walk_data;
- char *ddata[] = { "ALLOC", "FREE", "CONDENSE", "INVALID",
- "INVALID", "INVALID", "INVALID", "INVALID" };
- int mapshift = SPA_MINBLOCKSHIFT;
+ uint64_t word = 0;
+ if (mdb_vread(&word, sizeof (word), addr) == -1) {
+ mdb_warn("failed to read space map entry %p", addr);
+ return (DCMD_ERR);
+ }
- if (mdb_vread(&entry, sizeof (entry), wsp->walk_addr) == -1) {
- mdb_warn("failed to read freelist entry %p", wsp->walk_addr);
- return (WALK_DONE);
- }
- wsp->walk_addr += sizeof (entry);
- wsp->walk_data = (void *)(number + 1);
+ if (SM_PREFIX_DECODE(word) == SM_DEBUG_PREFIX) {
+ (void) mdb_printf("\t [%6llu] %s: txg %llu, "
+ "pass %llu\n",
+ (u_longlong_t)(addr),
+ actions[SM_DEBUG_ACTION_DECODE(word)],
+ (u_longlong_t)SM_DEBUG_TXG_DECODE(word),
+ (u_longlong_t)SM_DEBUG_SYNCPASS_DECODE(word));
+ continue;
+ }
- if (SM_DEBUG_DECODE(entry)) {
- mdb_printf("DEBUG: %3u %10s: txg=%llu pass=%llu\n",
- number,
- ddata[SM_DEBUG_ACTION_DECODE(entry)],
- SM_DEBUG_TXG_DECODE(entry),
- SM_DEBUG_SYNCPASS_DECODE(entry));
- } else {
- mdb_printf("Entry: %3u offsets=%08llx-%08llx type=%c "
- "size=%06llx", number,
- SM_OFFSET_DECODE(entry) << mapshift,
- (SM_OFFSET_DECODE(entry) + SM_RUN_DECODE(entry)) <<
- mapshift,
- SM_TYPE_DECODE(entry) == SM_ALLOC ? 'A' : 'F',
- SM_RUN_DECODE(entry) << mapshift);
- if (verbose)
- mdb_printf(" (raw=%012llx)\n", entry);
- mdb_printf("\n");
+ char entry_type;
+ uint64_t raw_offset, raw_run, vdev_id = SM_NO_VDEVID;
+
+ if (SM_PREFIX_DECODE(word) != SM2_PREFIX) {
+ entry_type = (SM_TYPE_DECODE(word) == SM_ALLOC) ?
+ 'A' : 'F';
+ raw_offset = SM_OFFSET_DECODE(word);
+ raw_run = SM_RUN_DECODE(word);
+ nwords = 1;
+ } else {
+ ASSERT3U(SM_PREFIX_DECODE(word), ==, SM2_PREFIX);
+
+ raw_run = SM2_RUN_DECODE(word);
+ vdev_id = SM2_VDEV_DECODE(word);
+
+ /* it is a two-word entry so we read another word */
+ addr += sizeof (uint64_t);
+ if (addr >= bufend) {
+ mdb_warn("buffer ends in the middle of a two "
+ "word entry\n", addr);
+ return (DCMD_ERR);
+ }
+
+ if (mdb_vread(&word, sizeof (word), addr) == -1) {
+ mdb_warn("failed to read space map entry %p",
+ addr);
+ return (DCMD_ERR);
+ }
+
+ entry_type = (SM2_TYPE_DECODE(word) == SM_ALLOC) ?
+ 'A' : 'F';
+ raw_offset = SM2_OFFSET_DECODE(word);
+ nwords = 2;
+ }
+
+ (void) mdb_printf("\t [%6llx] %c range:"
+ " %010llx-%010llx size: %06llx vdev: %06llu words: %llu\n",
+ (u_longlong_t)start_addr,
+ entry_type, (u_longlong_t)raw_offset,
+ (u_longlong_t)(raw_offset + raw_run),
+ (u_longlong_t)raw_run,
+ (u_longlong_t)vdev_id, (u_longlong_t)nwords);
+
+ if (preview)
+ break;
}
- return (WALK_NEXT);
+ return (DCMD_OK);
}
static int
@@ -3974,6 +4029,9 @@ static const mdb_dcmd_t dcmds[] = {
"\t-M display metaslab group statistic\n"
"\t-h display histogram (requires -m or -M)\n",
"given a spa_t, print vdev summary", spa_vdevs },
+ { "sm_entries", "<buffer length in bytes>",
+ "print out space map entries from a buffer decoded",
+ sm_entries},
{ "vdev", ":[-remMh]\n"
"\t-r display recursively\n"
"\t-e display statistics\n"
@@ -4024,8 +4082,6 @@ static const mdb_dcmd_t dcmds[] = {
};
static const mdb_walker_t walkers[] = {
- { "zms_freelist", "walk ZFS metaslab freelist",
- freelist_walk_init, freelist_walk_step, NULL },
{ "txg_list", "given any txg_list_t *, walk all entries in all txgs",
txg_list_walk_init, txg_list_walk_step, NULL },
{ "txg_list0", "given any txg_list_t *, walk all entries in txg 0",
diff --git a/usr/src/cmd/zdb/zdb.c b/usr/src/cmd/zdb/zdb.c
index aafb0aa29f..6ce4263db8 100644
--- a/usr/src/cmd/zdb/zdb.c
+++ b/usr/src/cmd/zdb/zdb.c
@@ -774,7 +774,6 @@ verify_spacemap_refcounts(spa_t *spa)
static void
dump_spacemap(objset_t *os, space_map_t *sm)
{
- uint64_t alloc, offset, entry;
char *ddata[] = { "ALLOC", "FREE", "CONDENSE", "INVALID",
"INVALID", "INVALID", "INVALID", "INVALID" };
@@ -791,41 +790,73 @@ dump_spacemap(objset_t *os, space_map_t *sm)
/*
* Print out the freelist entries in both encoded and decoded form.
*/
- alloc = 0;
- for (offset = 0; offset < space_map_length(sm);
- offset += sizeof (entry)) {
- uint8_t mapshift = sm->sm_shift;
+ uint8_t mapshift = sm->sm_shift;
+ int64_t alloc = 0;
+ uint64_t word;
+ for (uint64_t offset = 0; offset < space_map_length(sm);
+ offset += sizeof (word)) {
VERIFY0(dmu_read(os, space_map_object(sm), offset,
- sizeof (entry), &entry, DMU_READ_PREFETCH));
- if (SM_DEBUG_DECODE(entry)) {
+ sizeof (word), &word, DMU_READ_PREFETCH));
+ if (sm_entry_is_debug(word)) {
(void) printf("\t [%6llu] %s: txg %llu, pass %llu\n",
- (u_longlong_t)(offset / sizeof (entry)),
- ddata[SM_DEBUG_ACTION_DECODE(entry)],
- (u_longlong_t)SM_DEBUG_TXG_DECODE(entry),
- (u_longlong_t)SM_DEBUG_SYNCPASS_DECODE(entry));
+ (u_longlong_t)(offset / sizeof (word)),
+ ddata[SM_DEBUG_ACTION_DECODE(word)],
+ (u_longlong_t)SM_DEBUG_TXG_DECODE(word),
+ (u_longlong_t)SM_DEBUG_SYNCPASS_DECODE(word));
+ continue;
+ }
+
+ uint8_t words;
+ char entry_type;
+ uint64_t entry_off, entry_run, entry_vdev = SM_NO_VDEVID;
+
+ if (sm_entry_is_single_word(word)) {
+ entry_type = (SM_TYPE_DECODE(word) == SM_ALLOC) ?
+ 'A' : 'F';
+ entry_off = (SM_OFFSET_DECODE(word) << mapshift) +
+ sm->sm_start;
+ entry_run = SM_RUN_DECODE(word) << mapshift;
+ words = 1;
} else {
- (void) printf("\t [%6llu] %c range:"
- " %010llx-%010llx size: %06llx\n",
- (u_longlong_t)(offset / sizeof (entry)),
- SM_TYPE_DECODE(entry) == SM_ALLOC ? 'A' : 'F',
- (u_longlong_t)((SM_OFFSET_DECODE(entry) <<
- mapshift) + sm->sm_start),
- (u_longlong_t)((SM_OFFSET_DECODE(entry) <<
- mapshift) + sm->sm_start +
- (SM_RUN_DECODE(entry) << mapshift)),
- (u_longlong_t)(SM_RUN_DECODE(entry) << mapshift));
- if (SM_TYPE_DECODE(entry) == SM_ALLOC)
- alloc += SM_RUN_DECODE(entry) << mapshift;
- else
- alloc -= SM_RUN_DECODE(entry) << mapshift;
+ /* it is a two-word entry so we read another word */
+ ASSERT(sm_entry_is_double_word(word));
+
+ uint64_t extra_word;
+ offset += sizeof (extra_word);
+ VERIFY0(dmu_read(os, space_map_object(sm), offset,
+ sizeof (extra_word), &extra_word,
+ DMU_READ_PREFETCH));
+
+ ASSERT3U(offset, <=, space_map_length(sm));
+
+ entry_run = SM2_RUN_DECODE(word) << mapshift;
+ entry_vdev = SM2_VDEV_DECODE(word);
+ entry_type = (SM2_TYPE_DECODE(extra_word) == SM_ALLOC) ?
+ 'A' : 'F';
+ entry_off = (SM2_OFFSET_DECODE(extra_word) <<
+ mapshift) + sm->sm_start;
+ words = 2;
}
+
+ (void) printf("\t [%6llu] %c range:"
+ " %010llx-%010llx size: %06llx vdev: %06llu words: %u\n",
+ (u_longlong_t)(offset / sizeof (word)),
+ entry_type, (u_longlong_t)entry_off,
+ (u_longlong_t)(entry_off + entry_run),
+ (u_longlong_t)entry_run,
+ (u_longlong_t)entry_vdev, words);
+
+ if (entry_type == 'A')
+ alloc += entry_run;
+ else
+ alloc -= entry_run;
}
- if (alloc != space_map_allocated(sm)) {
- (void) printf("space_map_object alloc (%llu) INCONSISTENT "
- "with space map summary (%llu)\n",
- (u_longlong_t)space_map_allocated(sm), (u_longlong_t)alloc);
+ if ((uint64_t)alloc != space_map_allocated(sm)) {
+ (void) printf("space_map_object alloc (%lld) INCONSISTENT "
+ "with space map summary (%lld)\n",
+ (longlong_t)space_map_allocated(sm), (longlong_t)alloc);
}
}
@@ -1153,7 +1184,7 @@ dump_ddt(ddt_t *ddt, enum ddt_type type, enum ddt_class class)
while ((error = ddt_object_walk(ddt, type, class, &walk, &dde)) == 0)
dump_dde(ddt, &dde, walk);
- ASSERT(error == ENOENT);
+ ASSERT3U(error, ==, ENOENT);
(void) printf("\n");
}
@@ -3070,15 +3101,14 @@ typedef struct checkpoint_sm_exclude_entry_arg {
} checkpoint_sm_exclude_entry_arg_t;
static int
-checkpoint_sm_exclude_entry_cb(maptype_t type, uint64_t offset, uint64_t size,
- void *arg)
+checkpoint_sm_exclude_entry_cb(space_map_entry_t *sme, void *arg)
{
checkpoint_sm_exclude_entry_arg_t *cseea = arg;
vdev_t *vd = cseea->cseea_vd;
- metaslab_t *ms = vd->vdev_ms[offset >> vd->vdev_ms_shift];
- uint64_t end = offset + size;
+ metaslab_t *ms = vd->vdev_ms[sme->sme_offset >> vd->vdev_ms_shift];
+ uint64_t end = sme->sme_offset + sme->sme_run;
- ASSERT(type == SM_FREE);
+ ASSERT(sme->sme_type == SM_FREE);
/*
* Since the vdev_checkpoint_sm exists in the vdev level
@@ -3096,7 +3126,7 @@ checkpoint_sm_exclude_entry_cb(maptype_t type, uint64_t offset, uint64_t size,
* metaslab boundaries. So if needed we could add code
* that handles metaslab-crossing segments in the future.
*/
- VERIFY3U(offset, >=, ms->ms_start);
+ VERIFY3U(sme->sme_offset, >=, ms->ms_start);
VERIFY3U(end, <=, ms->ms_start + ms->ms_size);
/*
@@ -3104,10 +3134,10 @@ checkpoint_sm_exclude_entry_cb(maptype_t type, uint64_t offset, uint64_t size,
* also verify that the entry is there to begin with.
*/
mutex_enter(&ms->ms_lock);
- range_tree_remove(ms->ms_allocatable, offset, size);
+ range_tree_remove(ms->ms_allocatable, sme->sme_offset, sme->sme_run);
mutex_exit(&ms->ms_lock);
- cseea->cseea_checkpoint_size += size;
+ cseea->cseea_checkpoint_size += sme->sme_run;
return (0);
}
@@ -4082,15 +4112,14 @@ typedef struct verify_checkpoint_sm_entry_cb_arg {
#define ENTRIES_PER_PROGRESS_UPDATE 10000
static int
-verify_checkpoint_sm_entry_cb(maptype_t type, uint64_t offset, uint64_t size,
- void *arg)
+verify_checkpoint_sm_entry_cb(space_map_entry_t *sme, void *arg)
{
verify_checkpoint_sm_entry_cb_arg_t *vcsec = arg;
vdev_t *vd = vcsec->vcsec_vd;
- metaslab_t *ms = vd->vdev_ms[offset >> vd->vdev_ms_shift];
- uint64_t end = offset + size;
+ metaslab_t *ms = vd->vdev_ms[sme->sme_offset >> vd->vdev_ms_shift];
+ uint64_t end = sme->sme_offset + sme->sme_run;
- ASSERT(type == SM_FREE);
+ ASSERT(sme->sme_type == SM_FREE);
if ((vcsec->vcsec_entryid % ENTRIES_PER_PROGRESS_UPDATE) == 0) {
(void) fprintf(stderr,
@@ -4104,7 +4133,7 @@ verify_checkpoint_sm_entry_cb(maptype_t type, uint64_t offset, uint64_t size,
/*
* See comment in checkpoint_sm_exclude_entry_cb()
*/
- VERIFY3U(offset, >=, ms->ms_start);
+ VERIFY3U(sme->sme_offset, >=, ms->ms_start);
VERIFY3U(end, <=, ms->ms_start + ms->ms_size);
/*
@@ -4113,7 +4142,7 @@ verify_checkpoint_sm_entry_cb(maptype_t type, uint64_t offset, uint64_t size,
* their respective ms_allocateable trees should not contain them.
*/
mutex_enter(&ms->ms_lock);
- range_tree_verify(ms->ms_allocatable, offset, size);
+ range_tree_verify(ms->ms_allocatable, sme->sme_offset, sme->sme_run);
mutex_exit(&ms->ms_lock);
return (0);
@@ -4359,7 +4388,7 @@ verify_checkpoint(spa_t *spa)
DMU_POOL_ZPOOL_CHECKPOINT, sizeof (uint64_t),
sizeof (uberblock_t) / sizeof (uint64_t), &checkpoint);
- if (error == ENOENT) {
+ if (error == ENOENT && !dump_opt['L']) {
/*
* If the feature is active but the uberblock is missing
* then we must be in the middle of discarding the
@@ -4382,7 +4411,7 @@ verify_checkpoint(spa_t *spa)
error = 3;
}
- if (error == 0)
+ if (error == 0 && !dump_opt['L'])
verify_checkpoint_blocks(spa);
return (error);
@@ -4488,7 +4517,7 @@ dump_zpool(spa_t *spa)
if (dump_opt['h'])
dump_history(spa);
- if (rc == 0 && !dump_opt['L'])
+ if (rc == 0)
rc = verify_checkpoint(spa);
if (rc != 0) {
diff --git a/usr/src/cmd/ztest/ztest.c b/usr/src/cmd/ztest/ztest.c
index 1522c75485..ff45ab193e 100644
--- a/usr/src/cmd/ztest/ztest.c
+++ b/usr/src/cmd/ztest/ztest.c
@@ -193,6 +193,7 @@ extern uint64_t zfs_deadman_synctime_ms;
extern int metaslab_preload_limit;
extern boolean_t zfs_compressed_arc_enabled;
extern boolean_t zfs_abd_scatter_enabled;
+extern boolean_t zfs_force_some_double_word_sm_entries;
static ztest_shared_opts_t *ztest_shared_opts;
static ztest_shared_opts_t ztest_opts;
@@ -6394,6 +6395,12 @@ main(int argc, char **argv)
dprintf_setup(&argc, argv);
zfs_deadman_synctime_ms = 300000;
+ /*
+ * As two-word space map entries may not come up often (especially
+ * if pool and vdev sizes are small) we want to force at least some
+ * of them so the feature get tested.
+ */
+ zfs_force_some_double_word_sm_entries = B_TRUE;
ztest_fd_rand = open("/dev/urandom", O_RDONLY);
ASSERT3S(ztest_fd_rand, >=, 0);