summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatthew Ahrens <mahrens@delphix.com>2019-11-01 10:14:57 -0600
committerJerry Jelinek <jerry.jelinek@joyent.com>2019-11-04 15:38:15 -0700
commit87d7b64204c06f7d85b6dfec442ff0aba82efe9a (patch)
treeb7a114a5a36a7c63d41bcc26a666f608c6cc031c
parente5ace2968d69e625f7153013243d710d96efad76 (diff)
downloadillumos-joyent-87d7b64204c06f7d85b6dfec442ff0aba82efe9a.tar.gz
11681 abd_alloc should use scatter for >1K allocations
Portions contributed by: Jerry Jelinek <jerry.jelinek@joyent.com> Reviewed by: George Melikov <mail@gmelikov.ru> Reviewed by: DHE <git@dehacked.net> Reviewed by: Chunwei Chen <tuxoko@gmail.com> Reviewed by: Brian Behlendorf <behlendorf1@llnl.gov> Reviewed by: Don Brady <don.brady@delphix.com> Reviewed by: Andy Stormont <AStormont@racktopsystems.com> Reviewed by: Andy Fiddaman <andy@omniosce.org> Approved by: Dan McDonald <danmcd@joyent.com>
-rw-r--r--usr/src/uts/common/fs/zfs/abd.c29
1 files changed, 27 insertions, 2 deletions
diff --git a/usr/src/uts/common/fs/zfs/abd.c b/usr/src/uts/common/fs/zfs/abd.c
index 5417514e41..596545afd9 100644
--- a/usr/src/uts/common/fs/zfs/abd.c
+++ b/usr/src/uts/common/fs/zfs/abd.c
@@ -11,7 +11,7 @@
/*
* Copyright (c) 2014 by Chunwei Chen. All rights reserved.
- * Copyright (c) 2016 by Delphix. All rights reserved.
+ * Copyright (c) 2019 by Delphix. All rights reserved.
*/
/*
@@ -141,6 +141,30 @@ static abd_stats_t abd_stats = {
boolean_t zfs_abd_scatter_enabled = B_TRUE;
/*
+ * zfs_abd_scatter_min_size is the minimum allocation size to use scatter
+ * ABD's. Smaller allocations will use linear ABD's which uses
+ * zio_[data_]buf_alloc().
+ *
+ * Scatter ABD's use at least one page each, so sub-page allocations waste
+ * some space when allocated as scatter (e.g. 2KB scatter allocation wastes
+ * half of each page). Using linear ABD's for small allocations means that
+ * they will be put on slabs which contain many allocations. This can
+ * improve memory efficiency, but it also makes it much harder for ARC
+ * evictions to actually free pages, because all the buffers on one slab need
+ * to be freed in order for the slab (and underlying pages) to be freed.
+ * Typically, 512B and 1KB kmem caches have 16 buffers per slab, so it's
+ * possible for them to actually waste more memory than scatter (one page per
+ * buf = wasting 3/4 or 7/8th; one buf per slab = wasting 15/16th).
+ *
+ * Spill blocks are typically 512B and are heavily used on systems running
+ * selinux with the default dnode size and the `xattr=sa` property set.
+ *
+ * By default we use linear allocations for 512B and 1KB, and scatter
+ * allocations for larger (1.5KB and up).
+ */
+int zfs_abd_scatter_min_size = 512 * 3;
+
+/*
* The size of the chunks ABD allocates. Because the sizes allocated from the
* kmem_cache can't change, this tunable can only be modified at boot. Changing
* it at runtime would cause ABD iteration to work incorrectly for ABDs which
@@ -277,7 +301,8 @@ abd_free_struct(abd_t *abd)
abd_t *
abd_alloc(size_t size, boolean_t is_metadata)
{
- if (!zfs_abd_scatter_enabled)
+ /* see the comment above zfs_abd_scatter_min_size */
+ if (!zfs_abd_scatter_enabled || size < zfs_abd_scatter_min_size)
return (abd_alloc_linear(size, is_metadata));
VERIFY3U(size, <=, SPA_MAXBLOCKSIZE);