diff options
Diffstat (limited to 'usr/src/uts/common/fs/zfs/zap.c')
-rw-r--r-- | usr/src/uts/common/fs/zfs/zap.c | 46 |
1 files changed, 45 insertions, 1 deletions
diff --git a/usr/src/uts/common/fs/zfs/zap.c b/usr/src/uts/common/fs/zfs/zap.c index 7a1994f603..de8e212077 100644 --- a/usr/src/uts/common/fs/zfs/zap.c +++ b/usr/src/uts/common/fs/zfs/zap.c @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012, 2016 by Delphix. All rights reserved. + * Copyright (c) 2012, 2018 by Delphix. All rights reserved. * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved. */ @@ -49,6 +49,36 @@ #include <sys/zap_impl.h> #include <sys/zap_leaf.h> +/* + * If zap_iterate_prefetch is set, we will prefetch the entire ZAP object + * (all leaf blocks) when we start iterating over it. + * + * For zap_cursor_init(), the callers all intend to iterate through all the + * entries. There are a few cases where an error (typically i/o error) could + * cause it to bail out early. + * + * For zap_cursor_init_serialized(), there are callers that do the iteration + * outside of ZFS. Typically they would iterate over everything, but we + * don't have control of that. E.g. zfs_ioc_snapshot_list_next(), + * zcp_snapshots_iter(), and other iterators over things in the MOS - these + * are called by /sbin/zfs and channel programs. The other example is + * zfs_readdir() which iterates over directory entries for the getdents() + * syscall. /sbin/ls iterates to the end (unless it receives a signal), but + * userland doesn't have to. + * + * Given that the ZAP entries aren't returned in a specific order, the only + * legitimate use cases for partial iteration would be: + * + * 1. Pagination: e.g. you only want to display 100 entries at a time, so you + * get the first 100 and then wait for the user to hit "next page", which + * they may never do). + * + * 2. You want to know if there are more than X entries, without relying on + * the zfs-specific implementation of the directory's st_size (which is + * the number of entries). + */ +boolean_t zap_iterate_prefetch = B_TRUE; + int fzap_default_block_shift = 14; /* 16k blocksize */ extern inline zap_phys_t *zap_f_phys(zap_t *zap); @@ -1169,6 +1199,20 @@ fzap_cursor_retrieve(zap_t *zap, zap_cursor_t *zc, zap_attribute_t *za) /* retrieve the next entry at or after zc_hash/zc_cd */ /* if no entry, return ENOENT */ + /* + * If we are reading from the beginning, we're almost + * certain to iterate over the entire ZAP object. If there are + * multiple leaf blocks (freeblk > 2), prefetch the whole + * object, so that we read the leaf blocks concurrently. + * (Unless noprefetch was requested via zap_cursor_init_noprefetch()). + */ + if (zc->zc_hash == 0 && zap_iterate_prefetch && + zc->zc_prefetch && zap_f_phys(zap)->zap_freeblk > 2) { + dmu_prefetch(zc->zc_objset, zc->zc_zapobj, 0, 0, + zap_f_phys(zap)->zap_freeblk << FZAP_BLOCK_SHIFT(zap), + ZIO_PRIORITY_ASYNC_READ); + } + if (zc->zc_leaf && (ZAP_HASH_IDX(zc->zc_hash, zap_leaf_phys(zc->zc_leaf)->l_hdr.lh_prefix_len) != |