diff options
author | Dan McDonald <danmcd@joyent.com> | 2021-05-14 12:00:48 -0400 |
---|---|---|
committer | Dan McDonald <danmcd@joyent.com> | 2021-05-14 12:00:48 -0400 |
commit | 1a25930b922d3fede4a252f4f2e0ecb8de2656cb (patch) | |
tree | a2ff441f85489691a94f13a644cb7cfe10b81f9e /usr/src/uts/common/fs/zfs/arc.c | |
parent | 3aa01401155d92a38a0d4e107043c130432e4a43 (diff) | |
parent | 1cd083931cfd3fb8617c1178f62bce417cfa6af2 (diff) | |
download | illumos-joyent-1a25930b922d3fede4a252f4f2e0ecb8de2656cb.tar.gz |
[illumos-gate merge]
commit 1cd083931cfd3fb8617c1178f62bce417cfa6af2
13780 Add support for rust v0 mangling format
commit 4fe48c6ec9f06cbcce19c4cf97f662b64efde582
13798 loader: Update the EFI timer to be called once a second
commit 9e3493cb8a0cfe96c9aef9b7da42c6c9b5c24b43
13374 Port L2ARC Improvements from OpenZFS
Diffstat (limited to 'usr/src/uts/common/fs/zfs/arc.c')
-rw-r--r-- | usr/src/uts/common/fs/zfs/arc.c | 281 |
1 files changed, 237 insertions, 44 deletions
diff --git a/usr/src/uts/common/fs/zfs/arc.c b/usr/src/uts/common/fs/zfs/arc.c index 1034481117..37ec80e4b3 100644 --- a/usr/src/uts/common/fs/zfs/arc.c +++ b/usr/src/uts/common/fs/zfs/arc.c @@ -26,6 +26,10 @@ * Copyright 2017 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2011, 2019, Delphix. All rights reserved. * Copyright (c) 2020, George Amanakis. All rights reserved. + * Copyright (c) 2020, The FreeBSD Foundation [1] + * + * [1] Portions of this software were developed by Allan Jude + * under sponsorship from the FreeBSD Foundation. */ /* @@ -441,6 +445,8 @@ arc_stats_t arc_stats = { { "evict_not_enough", KSTAT_DATA_UINT64 }, { "evict_l2_cached", KSTAT_DATA_UINT64 }, { "evict_l2_eligible", KSTAT_DATA_UINT64 }, + { "evict_l2_eligible_mfu", KSTAT_DATA_UINT64 }, + { "evict_l2_eligible_mru", KSTAT_DATA_UINT64 }, { "evict_l2_ineligible", KSTAT_DATA_UINT64 }, { "evict_l2_skip", KSTAT_DATA_UINT64 }, { "hash_elements", KSTAT_DATA_UINT64 }, @@ -477,6 +483,11 @@ arc_stats_t arc_stats = { { "mfu_ghost_evictable_metadata", KSTAT_DATA_UINT64 }, { "l2_hits", KSTAT_DATA_UINT64 }, { "l2_misses", KSTAT_DATA_UINT64 }, + { "l2_prefetch_asize", KSTAT_DATA_UINT64 }, + { "l2_mru_asize", KSTAT_DATA_UINT64 }, + { "l2_mfu_asize", KSTAT_DATA_UINT64 }, + { "l2_bufc_data_asize", KSTAT_DATA_UINT64 }, + { "l2_bufc_metadata_asize", KSTAT_DATA_UINT64 }, { "l2_feeds", KSTAT_DATA_UINT64 }, { "l2_rw_clash", KSTAT_DATA_UINT64 }, { "l2_read_bytes", KSTAT_DATA_UINT64 }, @@ -702,6 +713,13 @@ uint64_t zfs_crc64_table[256]; #define L2ARC_FEED_SECS 1 /* caching interval secs */ #define L2ARC_FEED_MIN_MS 200 /* min caching interval ms */ +/* + * We can feed L2ARC from two states of ARC buffers, mru and mfu, + * and each of the state has two types: data and metadata. + */ +#define L2ARC_FEED_TYPES 4 + + #define l2arc_writes_sent ARCSTAT(arcstat_l2_writes_sent) #define l2arc_writes_done ARCSTAT(arcstat_l2_writes_done) @@ -715,6 +733,7 @@ uint64_t l2arc_feed_min_ms = L2ARC_FEED_MIN_MS; /* min interval milliseconds */ boolean_t l2arc_noprefetch = B_TRUE; /* don't cache prefetch bufs */ boolean_t l2arc_feed_again = B_TRUE; /* turbo warmup */ boolean_t l2arc_norw = B_TRUE; /* no reads during writes */ +int l2arc_meta_percent = 33; /* limit on headers size */ /* * L2ARC Internals @@ -785,6 +804,18 @@ static inline void arc_hdr_clear_flags(arc_buf_hdr_t *hdr, arc_flags_t flags); static boolean_t l2arc_write_eligible(uint64_t, arc_buf_hdr_t *); static void l2arc_read_done(zio_t *); +static void l2arc_do_free_on_write(void); +static void l2arc_hdr_arcstats_update(arc_buf_hdr_t *hdr, boolean_t incr, + boolean_t state_only); + +#define l2arc_hdr_arcstats_increment(hdr) \ + l2arc_hdr_arcstats_update((hdr), B_TRUE, B_FALSE) +#define l2arc_hdr_arcstats_decrement(hdr) \ + l2arc_hdr_arcstats_update((hdr), B_FALSE, B_FALSE) +#define l2arc_hdr_arcstats_increment_state(hdr) \ + l2arc_hdr_arcstats_update((hdr), B_TRUE, B_TRUE) +#define l2arc_hdr_arcstats_decrement_state(hdr) \ + l2arc_hdr_arcstats_update((hdr), B_FALSE, B_TRUE) /* * The arc_all_memory function is a ZoL enhancement that lives in their OSL @@ -931,6 +962,12 @@ buf_hash_remove(arc_buf_hdr_t *hdr) } /* + * l2arc_mfuonly : A ZFS module parameter that controls whether only MFU + * metadata and data are cached from ARC into L2ARC. + */ +int l2arc_mfuonly = 0; + +/* * Global data structures and functions for the buf kmem cache. */ @@ -2171,7 +2208,11 @@ add_reference(arc_buf_hdr_t *hdr, void *tag) arc_evictable_space_decrement(hdr, state); } /* remove the prefetch flag if we get a reference */ + if (HDR_HAS_L2HDR(hdr)) + l2arc_hdr_arcstats_decrement_state(hdr); arc_hdr_clear_flags(hdr, ARC_FLAG_PREFETCH); + if (HDR_HAS_L2HDR(hdr)) + l2arc_hdr_arcstats_increment_state(hdr); } } @@ -2407,9 +2448,16 @@ arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *hdr, } } - if (HDR_HAS_L1HDR(hdr)) + if (HDR_HAS_L1HDR(hdr)) { hdr->b_l1hdr.b_state = new_state; + if (HDR_HAS_L2HDR(hdr) && new_state != arc_l2c_only) { + l2arc_hdr_arcstats_decrement_state(hdr); + hdr->b_l2hdr.b_arcs_state = new_state->arcs_state; + l2arc_hdr_arcstats_increment_state(hdr); + } + } + /* * L2 headers should never be on the L2 state list since they don't * have L1 headers allocated. @@ -2712,7 +2760,7 @@ static void l2arc_log_blk_fetch_abort(zio_t *zio); /* L2ARC persistence block restoration routines. */ static void l2arc_log_blk_restore(l2arc_dev_t *dev, - const l2arc_log_blk_phys_t *lb, uint64_t lb_asize, uint64_t lb_daddr); + const l2arc_log_blk_phys_t *lb, uint64_t lb_asize); static void l2arc_hdr_restore(const l2arc_log_ent_phys_t *le, l2arc_dev_t *dev); @@ -3437,7 +3485,8 @@ arc_alloc_buf(spa_t *spa, void *tag, arc_buf_contents_t type, int32_t size) arc_buf_hdr_t * arc_buf_alloc_l2only(size_t size, arc_buf_contents_t type, l2arc_dev_t *dev, dva_t dva, uint64_t daddr, int32_t psize, uint64_t birth, - enum zio_compress compress, boolean_t protected, boolean_t prefetch) + enum zio_compress compress, boolean_t protected, + boolean_t prefetch, arc_state_type_t arcs_state) { arc_buf_hdr_t *hdr; @@ -3460,6 +3509,7 @@ arc_buf_alloc_l2only(size_t size, arc_buf_contents_t type, l2arc_dev_t *dev, hdr->b_l2hdr.b_dev = dev; hdr->b_l2hdr.b_daddr = daddr; + hdr->b_l2hdr.b_arcs_state = arcs_state; return (hdr); } @@ -3543,6 +3593,76 @@ arc_alloc_raw_buf(spa_t *spa, void *tag, uint64_t dsobj, boolean_t byteorder, } static void +l2arc_hdr_arcstats_update(arc_buf_hdr_t *hdr, boolean_t incr, + boolean_t state_only) +{ + l2arc_buf_hdr_t *l2hdr = &hdr->b_l2hdr; + l2arc_dev_t *dev = l2hdr->b_dev; + uint64_t lsize = HDR_GET_LSIZE(hdr); + uint64_t psize = HDR_GET_PSIZE(hdr); + uint64_t asize = vdev_psize_to_asize(dev->l2ad_vdev, psize); + arc_buf_contents_t type = hdr->b_type; + int64_t lsize_s; + int64_t psize_s; + int64_t asize_s; + + if (incr) { + lsize_s = lsize; + psize_s = psize; + asize_s = asize; + } else { + lsize_s = -lsize; + psize_s = -psize; + asize_s = -asize; + } + + /* If the buffer is a prefetch, count it as such. */ + if (HDR_PREFETCH(hdr)) { + ARCSTAT_INCR(arcstat_l2_prefetch_asize, asize_s); + } else { + /* + * We use the value stored in the L2 header upon initial + * caching in L2ARC. This value will be updated in case + * an MRU/MRU_ghost buffer transitions to MFU but the L2ARC + * metadata (log entry) cannot currently be updated. Having + * the ARC state in the L2 header solves the problem of a + * possibly absent L1 header (apparent in buffers restored + * from persistent L2ARC). + */ + switch (hdr->b_l2hdr.b_arcs_state) { + case ARC_STATE_MRU_GHOST: + case ARC_STATE_MRU: + ARCSTAT_INCR(arcstat_l2_mru_asize, asize_s); + break; + case ARC_STATE_MFU_GHOST: + case ARC_STATE_MFU: + ARCSTAT_INCR(arcstat_l2_mfu_asize, asize_s); + break; + default: + break; + } + } + + if (state_only) + return; + + ARCSTAT_INCR(arcstat_l2_psize, psize_s); + ARCSTAT_INCR(arcstat_l2_lsize, lsize_s); + + switch (type) { + case ARC_BUFC_DATA: + ARCSTAT_INCR(arcstat_l2_bufc_data_asize, asize_s); + break; + case ARC_BUFC_METADATA: + ARCSTAT_INCR(arcstat_l2_bufc_metadata_asize, asize_s); + break; + default: + break; + } +} + + +static void arc_hdr_l2hdr_destroy(arc_buf_hdr_t *hdr) { l2arc_buf_hdr_t *l2hdr = &hdr->b_l2hdr; @@ -3555,9 +3675,7 @@ arc_hdr_l2hdr_destroy(arc_buf_hdr_t *hdr) list_remove(&dev->l2ad_buflist, hdr); - ARCSTAT_INCR(arcstat_l2_psize, -psize); - ARCSTAT_INCR(arcstat_l2_lsize, -HDR_GET_LSIZE(hdr)); - + l2arc_hdr_arcstats_decrement(hdr); vdev_space_update(dev->l2ad_vdev, -asize, 0, 0); (void) zfs_refcount_remove_many(&dev->l2ad_alloc, arc_hdr_size(hdr), @@ -3767,6 +3885,21 @@ arc_evict_hdr(arc_buf_hdr_t *hdr, kmutex_t *hash_lock) if (l2arc_write_eligible(hdr->b_spa, hdr)) { ARCSTAT_INCR(arcstat_evict_l2_eligible, HDR_GET_LSIZE(hdr)); + + switch (state->arcs_state) { + case ARC_STATE_MRU: + ARCSTAT_INCR( + arcstat_evict_l2_eligible_mru, + HDR_GET_LSIZE(hdr)); + break; + case ARC_STATE_MFU: + ARCSTAT_INCR( + arcstat_evict_l2_eligible_mfu, + HDR_GET_LSIZE(hdr)); + break; + default: + break; + } } else { ARCSTAT_INCR(arcstat_evict_l2_ineligible, HDR_GET_LSIZE(hdr)); @@ -4824,9 +4957,6 @@ arc_adapt(int bytes, arc_state_t *state) int64_t mrug_size = zfs_refcount_count(&arc_mru_ghost->arcs_size); int64_t mfug_size = zfs_refcount_count(&arc_mfu_ghost->arcs_size); - if (state == arc_l2c_only) - return; - ASSERT(bytes > 0); /* * Adapt the target size of the MRU list: @@ -5121,10 +5251,14 @@ arc_access(arc_buf_hdr_t *hdr, kmutex_t *hash_lock) ASSERT(multilist_link_active( &hdr->b_l1hdr.b_arc_node)); } else { + if (HDR_HAS_L2HDR(hdr)) + l2arc_hdr_arcstats_decrement_state(hdr); arc_hdr_clear_flags(hdr, ARC_FLAG_PREFETCH | ARC_FLAG_PRESCIENT_PREFETCH); ARCSTAT_BUMP(arcstat_mru_hits); + if (HDR_HAS_L2HDR(hdr)) + l2arc_hdr_arcstats_increment_state(hdr); } hdr->b_l1hdr.b_arc_access = now; return; @@ -5153,13 +5287,16 @@ arc_access(arc_buf_hdr_t *hdr, kmutex_t *hash_lock) * was evicted from the cache. Move it to the * MFU state. */ - if (HDR_PREFETCH(hdr) || HDR_PRESCIENT_PREFETCH(hdr)) { new_state = arc_mru; if (zfs_refcount_count(&hdr->b_l1hdr.b_refcnt) > 0) { + if (HDR_HAS_L2HDR(hdr)) + l2arc_hdr_arcstats_decrement_state(hdr); arc_hdr_clear_flags(hdr, ARC_FLAG_PREFETCH | ARC_FLAG_PRESCIENT_PREFETCH); + if (HDR_HAS_L2HDR(hdr)) + l2arc_hdr_arcstats_increment_state(hdr); } DTRACE_PROBE1(new_state__mru, arc_buf_hdr_t *, hdr); } else { @@ -5420,8 +5557,6 @@ arc_read_done(zio_t *zio) } arc_hdr_clear_flags(hdr, ARC_FLAG_L2_EVICTED); - if (l2arc_noprefetch && HDR_PREFETCH(hdr)) - arc_hdr_clear_flags(hdr, ARC_FLAG_L2CACHE); callback_list = hdr->b_l1hdr.b_acb; ASSERT3P(callback_list, !=, NULL); @@ -5747,8 +5882,12 @@ top: ASSERT((zio_flags & ZIO_FLAG_SPECULATIVE) || rc != EACCES); } else if (*arc_flags & ARC_FLAG_PREFETCH && - zfs_refcount_count(&hdr->b_l1hdr.b_refcnt) == 0) { + zfs_refcount_is_zero(&hdr->b_l1hdr.b_refcnt)) { + if (HDR_HAS_L2HDR(hdr)) + l2arc_hdr_arcstats_decrement_state(hdr); arc_hdr_set_flags(hdr, ARC_FLAG_PREFETCH); + if (HDR_HAS_L2HDR(hdr)) + l2arc_hdr_arcstats_increment_state(hdr); } DTRACE_PROBE1(arc__hit, arc_buf_hdr_t *, hdr); arc_access(hdr, hash_lock); @@ -5871,8 +6010,13 @@ top: } if (*arc_flags & ARC_FLAG_PREFETCH && - zfs_refcount_is_zero(&hdr->b_l1hdr.b_refcnt)) + zfs_refcount_is_zero(&hdr->b_l1hdr.b_refcnt)) { + if (HDR_HAS_L2HDR(hdr)) + l2arc_hdr_arcstats_decrement_state(hdr); arc_hdr_set_flags(hdr, ARC_FLAG_PREFETCH); + if (HDR_HAS_L2HDR(hdr)) + l2arc_hdr_arcstats_increment_state(hdr); + } if (*arc_flags & ARC_FLAG_PRESCIENT_PREFETCH) arc_hdr_set_flags(hdr, ARC_FLAG_PRESCIENT_PREFETCH); @@ -5942,7 +6086,7 @@ top: * 3. This buffer isn't currently writing to the L2ARC. * 4. The L2ARC entry wasn't evicted, which may * also have invalidated the vdev. - * 5. This isn't prefetch and l2arc_noprefetch is set. + * 5. This isn't prefetch or l2arc_noprefetch is 0. */ if (HDR_HAS_L2HDR(hdr) && !HDR_L2_WRITING(hdr) && !HDR_L2_EVICTED(hdr) && @@ -5961,6 +6105,17 @@ top: cb->l2rcb_zb = *zb; cb->l2rcb_flags = zio_flags; + /* + * When Compressed ARC is disabled, but the + * L2ARC block is compressed, arc_hdr_size() + * will have returned LSIZE rather than PSIZE. + */ + if (HDR_GET_COMPRESS(hdr) != ZIO_COMPRESS_OFF && + !HDR_COMPRESSION_ENABLED(hdr) && + HDR_GET_PSIZE(hdr) != 0) { + size = HDR_GET_PSIZE(hdr); + } + asize = vdev_psize_to_asize(vd, size); if (asize != size) { abd = abd_alloc_for_io(asize, @@ -6980,6 +7135,13 @@ arc_state_init(void) aggsum_init(&astat_hdr_size, 0); aggsum_init(&astat_other_size, 0); aggsum_init(&astat_l2_hdr_size, 0); + + arc_anon->arcs_state = ARC_STATE_ANON; + arc_mru->arcs_state = ARC_STATE_MRU; + arc_mru_ghost->arcs_state = ARC_STATE_MRU_GHOST; + arc_mfu->arcs_state = ARC_STATE_MFU; + arc_mfu_ghost->arcs_state = ARC_STATE_MFU_GHOST; + arc_l2c_only->arcs_state = ARC_STATE_L2C_ONLY; } static void @@ -7456,9 +7618,11 @@ l2arc_write_eligible(uint64_t spa_guid, arc_buf_hdr_t *hdr) * 2. is already cached on the L2ARC. * 3. has an I/O in progress (it may be an incomplete read). * 4. is flagged not eligible (zfs property). + * 5. is a prefetch and l2arc_noprefetch is set. */ if (hdr->b_spa != spa_guid || HDR_HAS_L2HDR(hdr) || - HDR_IO_IN_PROGRESS(hdr) || !HDR_L2CACHE(hdr)) + HDR_IO_IN_PROGRESS(hdr) || !HDR_L2CACHE(hdr) || + (l2arc_noprefetch && HDR_PREFETCH(hdr))) return (B_FALSE); return (B_TRUE); @@ -7642,9 +7806,6 @@ l2arc_write_done(zio_t *zio) DTRACE_PROBE2(l2arc__iodone, zio_t *, zio, l2arc_write_callback_t *, cb); - if (zio->io_error != 0) - ARCSTAT_BUMP(arcstat_l2_writes_error); - /* * All writes completed, or an error was hit. */ @@ -7704,8 +7865,7 @@ top: arc_hdr_clear_flags(hdr, ARC_FLAG_HAS_L2HDR); uint64_t psize = HDR_GET_PSIZE(hdr); - ARCSTAT_INCR(arcstat_l2_psize, -psize); - ARCSTAT_INCR(arcstat_l2_lsize, -HDR_GET_LSIZE(hdr)); + l2arc_hdr_arcstats_decrement(hdr); bytes_dropped += vdev_psize_to_asize(dev->l2ad_vdev, psize); @@ -7753,6 +7913,8 @@ top: list_destroy(&cb->l2wcb_abd_list); if (zio->io_error != 0) { + ARCSTAT_BUMP(arcstat_l2_writes_error); + /* * Restore the lbps array in the header to its previous state. * If the list of log block pointers is empty, zero out the @@ -8045,7 +8207,7 @@ l2arc_sublist_lock(int list_num) multilist_t *ml = NULL; unsigned int idx; - ASSERT(list_num >= 0 && list_num <= 3); + ASSERT(list_num >= 0 && list_num < L2ARC_FEED_TYPES); switch (list_num) { case 0: @@ -8060,6 +8222,8 @@ l2arc_sublist_lock(int list_num) case 3: ml = arc_mru->arcs_list[ARC_BUFC_DATA]; break; + default: + return (NULL); } /* @@ -8447,7 +8611,16 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz) /* * Copy buffers for L2ARC writing. */ - for (int try = 0; try <= 3; try++) { + for (int try = 0; try < L2ARC_FEED_TYPES; try++) { + /* + * If try == 1 or 3, we cache MRU metadata and data + * respectively. + */ + if (l2arc_mfuonly) { + if (try == 1 || try == 3) + continue; + } + multilist_sublist_t *mls = l2arc_sublist_lock(try); uint64_t passed_sz = 0; @@ -8599,6 +8772,8 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz) hdr->b_l2hdr.b_dev = dev; hdr->b_l2hdr.b_daddr = dev->l2ad_hand; + hdr->b_l2hdr.b_arcs_state = + hdr->b_l1hdr.b_state->arcs_state; arc_hdr_set_flags(hdr, ARC_FLAG_L2_WRITING | ARC_FLAG_HAS_L2HDR); @@ -8622,6 +8797,7 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz) write_psize += psize; write_asize += asize; dev->l2ad_hand += asize; + l2arc_hdr_arcstats_increment(hdr); vdev_space_update(dev->l2ad_vdev, asize, 0, 0); mutex_exit(hash_lock); @@ -8665,8 +8841,6 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz) ASSERT3U(write_asize, <=, target_sz); ARCSTAT_BUMP(arcstat_l2_writes_sent); ARCSTAT_INCR(arcstat_l2_write_bytes, write_psize); - ARCSTAT_INCR(arcstat_l2_lsize, write_lsize); - ARCSTAT_INCR(arcstat_l2_psize, write_psize); dev->l2ad_writing = B_TRUE; (void) zio_wait(pio); @@ -8682,6 +8856,15 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz) return (write_asize); } +static boolean_t +l2arc_hdr_limit_reached(void) +{ + int64_t s = aggsum_upper_bound(&astat_l2_hdr_size); + + return (arc_reclaim_needed() || (s > arc_meta_limit * 3 / 4) || + (s > (arc_warm ? arc_c : arc_c_max) * l2arc_meta_percent / 100)); +} + /* * This thread feeds the L2ARC at regular intervals. This is the beating * heart of the L2ARC. @@ -8747,7 +8930,7 @@ l2arc_feed_thread(void *unused) /* * Avoid contributing to memory pressure. */ - if (arc_reclaim_needed()) { + if (l2arc_hdr_limit_reached()) { ARCSTAT_BUMP(arcstat_l2_abort_lowmem); spa_config_exit(spa, SCL_L2ARC, dev); continue; @@ -8877,8 +9060,6 @@ l2arc_rebuild_vdev(vdev_t *vd, boolean_t reopen) l2arc_dev_hdr_phys_t *l2dhdr; uint64_t l2dhdr_asize; spa_t *spa; - int err; - boolean_t l2dhdr_valid = B_TRUE; dev = l2arc_vdev_get(vd); ASSERT3P(dev, !=, NULL); @@ -8907,10 +9088,7 @@ l2arc_rebuild_vdev(vdev_t *vd, boolean_t reopen) /* * Read the device header, if an error is returned do not rebuild L2ARC. */ - if ((err = l2arc_dev_hdr_read(dev)) != 0) - l2dhdr_valid = B_FALSE; - - if (l2dhdr_valid && dev->l2ad_log_entries > 0) { + if (l2arc_dev_hdr_read(dev) == 0 && dev->l2ad_log_entries > 0) { /* * If we are onlining a cache device (vdev_reopen) that was * still present (l2arc_vdev_present()) and rebuild is enabled, @@ -9187,7 +9365,7 @@ l2arc_rebuild(l2arc_dev_t *dev) * online the L2ARC dev at a later time (or re-import the pool) * to reconstruct it (when there's less memory pressure). */ - if (arc_reclaim_needed()) { + if (l2arc_hdr_limit_reached()) { ARCSTAT_BUMP(arcstat_l2_rebuild_abort_lowmem); cmn_err(CE_NOTE, "System running low on memory, " "aborting L2ARC rebuild."); @@ -9204,7 +9382,7 @@ l2arc_rebuild(l2arc_dev_t *dev) * L2BLK_GET_PSIZE returns aligned size for log blocks. */ uint64_t asize = L2BLK_GET_PSIZE((&lbps[0])->lbp_prop); - l2arc_log_blk_restore(dev, this_lb, asize, lbps[0].lbp_daddr); + l2arc_log_blk_restore(dev, this_lb, asize); /* * log block restored, include its pointer in the list of @@ -9286,7 +9464,7 @@ l2arc_rebuild(l2arc_dev_t *dev) PTR_SWAP(this_lb, next_lb); this_io = next_io; next_io = NULL; - } + } if (this_io != NULL) l2arc_log_blk_fetch_abort(this_io); @@ -9314,6 +9492,13 @@ out: "no valid log blocks"); bzero(l2dhdr, dev->l2ad_dev_hdr_asize); l2arc_dev_hdr_update(dev); + } else if (err == ECANCELED) { + /* + * In case the rebuild was canceled do not log to spa history + * log as the pool may be in the process of being removed. + */ + zfs_dbgmsg("L2ARC rebuild aborted, restored %llu blocks", + zfs_refcount_count(&dev->l2ad_lb_count)); } else if (err != 0) { spa_history_log_internal(spa, "L2ARC rebuild", NULL, "aborted, restored %llu blocks", @@ -9346,7 +9531,7 @@ l2arc_dev_hdr_read(l2arc_dev_t *dev) err = zio_wait(zio_read_phys(NULL, dev->l2ad_vdev, VDEV_LABEL_START_SIZE, l2dhdr_asize, abd, - ZIO_CHECKSUM_LABEL, NULL, NULL, ZIO_PRIORITY_ASYNC_READ, + ZIO_CHECKSUM_LABEL, NULL, NULL, ZIO_PRIORITY_SYNC_READ, ZIO_FLAG_DONT_CACHE | ZIO_FLAG_CANFAIL | ZIO_FLAG_DONT_PROPAGATE | ZIO_FLAG_DONT_RETRY | ZIO_FLAG_SPECULATIVE, B_FALSE)); @@ -9515,11 +9700,18 @@ cleanup: */ static void l2arc_log_blk_restore(l2arc_dev_t *dev, const l2arc_log_blk_phys_t *lb, - uint64_t lb_asize, uint64_t lb_daddr) + uint64_t lb_asize) { uint64_t size = 0, asize = 0; uint64_t log_entries = dev->l2ad_log_entries; + /* + * Usually arc_adapt() is called only for data, not headers, but + * since we may allocate significant amount of memory here, let ARC + * grow its arc_c. + */ + arc_adapt(log_entries * HDR_L2ONLY_SIZE, arc_l2c_only); + for (int i = log_entries - 1; i >= 0; i--) { /* * Restore goes in the reverse temporal direction to preserve @@ -9582,19 +9774,18 @@ l2arc_hdr_restore(const l2arc_log_ent_phys_t *le, l2arc_dev_t *dev) L2BLK_GET_PSIZE((le)->le_prop), le->le_birth, L2BLK_GET_COMPRESS((le)->le_prop), L2BLK_GET_PROTECTED((le)->le_prop), - L2BLK_GET_PREFETCH((le)->le_prop)); + L2BLK_GET_PREFETCH((le)->le_prop), + L2BLK_GET_STATE((le)->le_prop)); asize = vdev_psize_to_asize(dev->l2ad_vdev, L2BLK_GET_PSIZE((le)->le_prop)); /* * vdev_space_update() has to be called before arc_hdr_destroy() to - * avoid underflow since the latter also calls the former. + * avoid underflow since the latter also calls vdev_space_update(). */ + l2arc_hdr_arcstats_increment(hdr); vdev_space_update(dev->l2ad_vdev, asize, 0, 0); - ARCSTAT_INCR(arcstat_l2_lsize, HDR_GET_LSIZE(hdr)); - ARCSTAT_INCR(arcstat_l2_psize, HDR_GET_PSIZE(hdr)); - mutex_enter(&dev->l2ad_mtx); list_insert_tail(&dev->l2ad_buflist, hdr); (void) zfs_refcount_add_many(&dev->l2ad_alloc, arc_hdr_size(hdr), hdr); @@ -9614,14 +9805,15 @@ l2arc_hdr_restore(const l2arc_log_ent_phys_t *le, l2arc_dev_t *dev) arc_hdr_set_flags(exists, ARC_FLAG_HAS_L2HDR); exists->b_l2hdr.b_dev = dev; exists->b_l2hdr.b_daddr = le->le_daddr; + exists->b_l2hdr.b_arcs_state = + L2BLK_GET_STATE((le)->le_prop); mutex_enter(&dev->l2ad_mtx); list_insert_tail(&dev->l2ad_buflist, exists); (void) zfs_refcount_add_many(&dev->l2ad_alloc, arc_hdr_size(exists), exists); mutex_exit(&dev->l2ad_mtx); + l2arc_hdr_arcstats_increment(exists); vdev_space_update(dev->l2ad_vdev, asize, 0, 0); - ARCSTAT_INCR(arcstat_l2_lsize, HDR_GET_LSIZE(exists)); - ARCSTAT_INCR(arcstat_l2_psize, HDR_GET_PSIZE(exists)); } ARCSTAT_BUMP(arcstat_l2_rebuild_bufs_precached); } @@ -9915,6 +10107,7 @@ l2arc_log_blk_insert(l2arc_dev_t *dev, const arc_buf_hdr_t *hdr) L2BLK_SET_TYPE((le)->le_prop, hdr->b_type); L2BLK_SET_PROTECTED((le)->le_prop, !!(HDR_PROTECTED(hdr))); L2BLK_SET_PREFETCH((le)->le_prop, !!(HDR_PREFETCH(hdr))); + L2BLK_SET_STATE((le)->le_prop, hdr->b_l1hdr.b_state->arcs_state); dev->l2ad_log_blk_payload_asize += vdev_psize_to_asize(dev->l2ad_vdev, HDR_GET_PSIZE(hdr)); |