summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJerry Jelinek <jerry.jelinek@joyent.com>2018-07-24 11:41:26 +0000
committerJerry Jelinek <jerry.jelinek@joyent.com>2018-07-24 11:41:26 +0000
commit5b6d8a45d43cad27424fe3742fc5420e5e621038 (patch)
treee1a25d0e0f573bbaec69d67dac9a6b3c96224afc
parentc67976a7d0f48374dab986535bad445ffb728e00 (diff)
parent2ec7644aab2a726a64681fa66c6db8731b160de1 (diff)
downloadillumos-joyent-5b6d8a45d43cad27424fe3742fc5420e5e621038.tar.gz
[illumos-gate merge]
commit 2ec7644aab2a726a64681fa66c6db8731b160de1 9580 Add a hash-table on top of nvlist to speed-up operations commit abe1fd01ce5a83718c5a840daeab4abdaec1c104 9465 ARC check for 'anon_size > arc_c/2' can stall the system
-rw-r--r--usr/src/common/nvpair/nvpair.c366
-rw-r--r--usr/src/lib/libnvpair/nvpair_json.c3
-rw-r--r--usr/src/uts/common/fs/zfs/arc.c48
-rw-r--r--usr/src/uts/common/fs/zfs/dsl_dir.c2
-rw-r--r--usr/src/uts/common/fs/zfs/spa_misc.c6
-rw-r--r--usr/src/uts/common/fs/zfs/sys/arc.h2
-rw-r--r--usr/src/uts/common/fs/zfs/sys/spa.h1
-rw-r--r--usr/src/uts/common/fs/zfs/sys/spa_impl.h4
-rw-r--r--usr/src/uts/common/sys/nvpair.h3
-rw-r--r--usr/src/uts/common/sys/nvpair_impl.h29
10 files changed, 392 insertions, 72 deletions
diff --git a/usr/src/common/nvpair/nvpair.c b/usr/src/common/nvpair/nvpair.c
index 5881ba54b7..bffae68ca3 100644
--- a/usr/src/common/nvpair/nvpair.c
+++ b/usr/src/common/nvpair/nvpair.c
@@ -144,6 +144,8 @@ int nvpair_max_recursion = 20;
int nvpair_max_recursion = 100;
#endif
+uint64_t nvlist_hashtable_init_size = (1 << 4);
+
int
nv_alloc_init(nv_alloc_t *nva, const nv_alloc_ops_t *nvo, /* args */ ...)
{
@@ -251,6 +253,291 @@ nv_priv_alloc_embedded(nvpriv_t *priv)
return (emb_priv);
}
+static int
+nvt_tab_alloc(nvpriv_t *priv, uint64_t buckets)
+{
+ ASSERT3P(priv->nvp_hashtable, ==, NULL);
+ ASSERT0(priv->nvp_nbuckets);
+ ASSERT0(priv->nvp_nentries);
+
+ i_nvp_t **tab = nv_mem_zalloc(priv, buckets * sizeof (i_nvp_t *));
+ if (tab == NULL)
+ return (ENOMEM);
+
+ priv->nvp_hashtable = tab;
+ priv->nvp_nbuckets = buckets;
+ return (0);
+}
+
+static void
+nvt_tab_free(nvpriv_t *priv)
+{
+ i_nvp_t **tab = priv->nvp_hashtable;
+ if (tab == NULL) {
+ ASSERT0(priv->nvp_nbuckets);
+ ASSERT0(priv->nvp_nentries);
+ return;
+ }
+
+ nv_mem_free(priv, tab, priv->nvp_nbuckets * sizeof (i_nvp_t *));
+
+ priv->nvp_hashtable = NULL;
+ priv->nvp_nbuckets = 0;
+ priv->nvp_nentries = 0;
+}
+
+static uint32_t
+nvt_hash(const char *p)
+{
+ uint32_t g, hval = 0;
+
+ while (*p) {
+ hval = (hval << 4) + *p++;
+ if ((g = (hval & 0xf0000000)) != 0)
+ hval ^= g >> 24;
+ hval &= ~g;
+ }
+ return (hval);
+}
+
+static boolean_t
+nvt_nvpair_match(nvpair_t *nvp1, nvpair_t *nvp2, uint32_t nvflag)
+{
+ boolean_t match = B_FALSE;
+ if (nvflag & NV_UNIQUE_NAME_TYPE) {
+ if (strcmp(NVP_NAME(nvp1), NVP_NAME(nvp2)) == 0 &&
+ NVP_TYPE(nvp1) == NVP_TYPE(nvp2))
+ match = B_TRUE;
+ } else {
+ ASSERT(nvflag == 0 || nvflag & NV_UNIQUE_NAME);
+ if (strcmp(NVP_NAME(nvp1), NVP_NAME(nvp2)) == 0)
+ match = B_TRUE;
+ }
+ return (match);
+}
+
+static nvpair_t *
+nvt_lookup_name_type(nvlist_t *nvl, const char *name, data_type_t type)
+{
+ nvpriv_t *priv = (nvpriv_t *)(uintptr_t)nvl->nvl_priv;
+ ASSERT(priv != NULL);
+
+ i_nvp_t **tab = priv->nvp_hashtable;
+
+ if (tab == NULL) {
+ ASSERT3P(priv->nvp_list, ==, NULL);
+ ASSERT0(priv->nvp_nbuckets);
+ ASSERT0(priv->nvp_nentries);
+ return (NULL);
+ } else {
+ ASSERT(priv->nvp_nbuckets != 0);
+ }
+
+ uint64_t hash = nvt_hash(name);
+ uint64_t index = hash & (priv->nvp_nbuckets - 1);
+
+ ASSERT3U(index, <, priv->nvp_nbuckets);
+ i_nvp_t *entry = tab[index];
+
+ for (i_nvp_t *e = entry; e != NULL; e = e->nvi_hashtable_next) {
+ if (strcmp(NVP_NAME(&e->nvi_nvp), name) == 0 &&
+ (type == DATA_TYPE_DONTCARE ||
+ NVP_TYPE(&e->nvi_nvp) == type))
+ return (&e->nvi_nvp);
+ }
+ return (NULL);
+}
+
+static nvpair_t *
+nvt_lookup_name(nvlist_t *nvl, const char *name)
+{
+ return (nvt_lookup_name_type(nvl, name, DATA_TYPE_DONTCARE));
+}
+
+static int
+nvt_resize(nvpriv_t *priv, uint32_t new_size)
+{
+ i_nvp_t **tab = priv->nvp_hashtable;
+
+ /*
+ * Migrate all the entries from the current table
+ * to a newly-allocated table with the new size by
+ * re-adjusting the pointers of their entries.
+ */
+ uint32_t size = priv->nvp_nbuckets;
+ uint32_t new_mask = new_size - 1;
+ ASSERT(ISP2(new_size));
+
+ i_nvp_t **new_tab = nv_mem_zalloc(priv, new_size * sizeof (i_nvp_t *));
+ if (new_tab == NULL)
+ return (ENOMEM);
+
+ uint32_t nentries = 0;
+ for (uint32_t i = 0; i < size; i++) {
+ i_nvp_t *next, *e = tab[i];
+
+ while (e != NULL) {
+ next = e->nvi_hashtable_next;
+
+ uint32_t hash = nvt_hash(NVP_NAME(&e->nvi_nvp));
+ uint32_t index = hash & new_mask;
+
+ e->nvi_hashtable_next = new_tab[index];
+ new_tab[index] = e;
+ nentries++;
+
+ e = next;
+ }
+ tab[i] = NULL;
+ }
+ ASSERT3U(nentries, ==, priv->nvp_nentries);
+
+ nvt_tab_free(priv);
+
+ priv->nvp_hashtable = new_tab;
+ priv->nvp_nbuckets = new_size;
+ priv->nvp_nentries = nentries;
+
+ return (0);
+}
+
+static boolean_t
+nvt_needs_togrow(nvpriv_t *priv)
+{
+ /*
+ * Grow only when we have more elements than buckets
+ * and the # of buckets doesn't overflow.
+ */
+ return (priv->nvp_nentries > priv->nvp_nbuckets &&
+ (UINT32_MAX >> 1) >= priv->nvp_nbuckets);
+}
+
+/*
+ * Allocate a new table that's twice the size of the old one,
+ * and migrate all the entries from the old one to the new
+ * one by re-adjusting their pointers.
+ */
+static int
+nvt_grow(nvpriv_t *priv)
+{
+ uint32_t current_size = priv->nvp_nbuckets;
+ /* ensure we won't overflow */
+ ASSERT3U(UINT32_MAX >> 1, >=, current_size);
+ return (nvt_resize(priv, current_size << 1));
+}
+
+static boolean_t
+nvt_needs_toshrink(nvpriv_t *priv)
+{
+ /*
+ * Shrink only when the # of elements is less than or
+ * equal to 1/4 the # of buckets. Never shrink less than
+ * nvlist_hashtable_init_size.
+ */
+ ASSERT3U(priv->nvp_nbuckets, >=, nvlist_hashtable_init_size);
+ if (priv->nvp_nbuckets == nvlist_hashtable_init_size)
+ return (B_FALSE);
+ return (priv->nvp_nentries <= (priv->nvp_nbuckets >> 2));
+}
+
+/*
+ * Allocate a new table that's half the size of the old one,
+ * and migrate all the entries from the old one to the new
+ * one by re-adjusting their pointers.
+ */
+static int
+nvt_shrink(nvpriv_t *priv)
+{
+ uint32_t current_size = priv->nvp_nbuckets;
+ /* ensure we won't overflow */
+ ASSERT3U(current_size, >=, nvlist_hashtable_init_size);
+ return (nvt_resize(priv, current_size >> 1));
+}
+
+static int
+nvt_remove_nvpair(nvlist_t *nvl, nvpair_t *nvp)
+{
+ nvpriv_t *priv = (nvpriv_t *)(uintptr_t)nvl->nvl_priv;
+
+ if (nvt_needs_toshrink(priv)) {
+ int err = nvt_shrink(priv);
+ if (err != 0)
+ return (err);
+ }
+ i_nvp_t **tab = priv->nvp_hashtable;
+
+ char *name = NVP_NAME(nvp);
+ uint64_t hash = nvt_hash(name);
+ uint64_t index = hash & (priv->nvp_nbuckets - 1);
+
+ ASSERT3U(index, <, priv->nvp_nbuckets);
+ i_nvp_t *bucket = tab[index];
+
+ for (i_nvp_t *prev = NULL, *e = bucket;
+ e != NULL; prev = e, e = e->nvi_hashtable_next) {
+ if (nvt_nvpair_match(&e->nvi_nvp, nvp, nvl->nvl_flag)) {
+ if (prev != NULL) {
+ prev->nvi_hashtable_next =
+ e->nvi_hashtable_next;
+ } else {
+ ASSERT3P(e, ==, bucket);
+ tab[index] = e->nvi_hashtable_next;
+ }
+ e->nvi_hashtable_next = NULL;
+ priv->nvp_nentries--;
+ break;
+ }
+ }
+
+ return (0);
+}
+
+static int
+nvt_add_nvpair(nvlist_t *nvl, nvpair_t *nvp)
+{
+ nvpriv_t *priv = (nvpriv_t *)(uintptr_t)nvl->nvl_priv;
+
+ /* initialize nvpair table now if it doesn't exist. */
+ if (priv->nvp_hashtable == NULL) {
+ int err = nvt_tab_alloc(priv, nvlist_hashtable_init_size);
+ if (err != 0)
+ return (err);
+ }
+
+ /*
+ * if we don't allow duplicate entries, make sure to
+ * unlink any existing entries from the table.
+ */
+ if (nvl->nvl_nvflag != 0) {
+ int err = nvt_remove_nvpair(nvl, nvp);
+ if (err != 0)
+ return (err);
+ }
+
+ if (nvt_needs_togrow(priv)) {
+ int err = nvt_grow(priv);
+ if (err != 0)
+ return (err);
+ }
+ i_nvp_t **tab = priv->nvp_hashtable;
+
+ char *name = NVP_NAME(nvp);
+ uint64_t hash = nvt_hash(name);
+ uint64_t index = hash & (priv->nvp_nbuckets - 1);
+
+ ASSERT3U(index, <, priv->nvp_nbuckets);
+ i_nvp_t *bucket = tab[index];
+
+ /* insert link at the beginning of the bucket */
+ i_nvp_t *new_entry = NVPAIR2I_NVP(nvp);
+ ASSERT3P(new_entry->nvi_hashtable_next, ==, NULL);
+ new_entry->nvi_hashtable_next = bucket;
+ tab[index] = new_entry;
+
+ priv->nvp_nentries++;
+ return (0);
+}
+
static void
nvlist_init(nvlist_t *nvl, uint32_t nvflag, nvpriv_t *priv)
{
@@ -583,6 +870,7 @@ nvlist_free(nvlist_t *nvl)
else
nvl->nvl_priv = 0;
+ nvt_tab_free(priv);
nv_mem_free(priv, priv, sizeof (nvpriv_t));
}
@@ -643,26 +931,14 @@ nvlist_xdup(nvlist_t *nvl, nvlist_t **nvlp, nv_alloc_t *nva)
int
nvlist_remove_all(nvlist_t *nvl, const char *name)
{
- nvpriv_t *priv;
- i_nvp_t *curr;
int error = ENOENT;
- if (nvl == NULL || name == NULL ||
- (priv = (nvpriv_t *)(uintptr_t)nvl->nvl_priv) == NULL)
+ if (nvl == NULL || name == NULL || nvl->nvl_priv == 0)
return (EINVAL);
- curr = priv->nvp_list;
- while (curr != NULL) {
- nvpair_t *nvp = &curr->nvi_nvp;
-
- curr = curr->nvi_next;
- if (strcmp(name, NVP_NAME(nvp)) != 0)
- continue;
-
- nvp_buf_unlink(nvl, nvp);
- nvpair_free(nvp);
- nvp_buf_free(nvl, nvp);
-
+ nvpair_t *nvp;
+ while ((nvp = nvt_lookup_name(nvl, name)) != NULL) {
+ VERIFY0(nvlist_remove_nvpair(nvl, nvp));
error = 0;
}
@@ -675,28 +951,14 @@ nvlist_remove_all(nvlist_t *nvl, const char *name)
int
nvlist_remove(nvlist_t *nvl, const char *name, data_type_t type)
{
- nvpriv_t *priv;
- i_nvp_t *curr;
-
- if (nvl == NULL || name == NULL ||
- (priv = (nvpriv_t *)(uintptr_t)nvl->nvl_priv) == NULL)
+ if (nvl == NULL || name == NULL || nvl->nvl_priv == 0)
return (EINVAL);
- curr = priv->nvp_list;
- while (curr != NULL) {
- nvpair_t *nvp = &curr->nvi_nvp;
-
- if (strcmp(name, NVP_NAME(nvp)) == 0 && NVP_TYPE(nvp) == type) {
- nvp_buf_unlink(nvl, nvp);
- nvpair_free(nvp);
- nvp_buf_free(nvl, nvp);
-
- return (0);
- }
- curr = curr->nvi_next;
- }
+ nvpair_t *nvp = nvt_lookup_name_type(nvl, name, type);
+ if (nvp == NULL)
+ return (ENOENT);
- return (ENOENT);
+ return (nvlist_remove_nvpair(nvl, nvp));
}
int
@@ -705,6 +967,10 @@ nvlist_remove_nvpair(nvlist_t *nvl, nvpair_t *nvp)
if (nvl == NULL || nvp == NULL)
return (EINVAL);
+ int err = nvt_remove_nvpair(nvl, nvp);
+ if (err != 0)
+ return (err);
+
nvp_buf_unlink(nvl, nvp);
nvpair_free(nvp);
nvp_buf_free(nvl, nvp);
@@ -982,6 +1248,12 @@ nvlist_add_common(nvlist_t *nvl, const char *name,
else if (nvl->nvl_nvflag & NV_UNIQUE_NAME_TYPE)
(void) nvlist_remove(nvl, name, type);
+ err = nvt_add_nvpair(nvl, nvp);
+ if (err != 0) {
+ nvpair_free(nvp);
+ nvp_buf_free(nvl, nvp);
+ return (err);
+ }
nvp_buf_link(nvl, nvp);
return (0);
@@ -1331,25 +1603,17 @@ static int
nvlist_lookup_common(nvlist_t *nvl, const char *name, data_type_t type,
uint_t *nelem, void *data)
{
- nvpriv_t *priv;
- nvpair_t *nvp;
- i_nvp_t *curr;
-
- if (name == NULL || nvl == NULL ||
- (priv = (nvpriv_t *)(uintptr_t)nvl->nvl_priv) == NULL)
+ if (name == NULL || nvl == NULL || nvl->nvl_priv == 0)
return (EINVAL);
if (!(nvl->nvl_nvflag & (NV_UNIQUE_NAME | NV_UNIQUE_NAME_TYPE)))
return (ENOTSUP);
- for (curr = priv->nvp_list; curr != NULL; curr = curr->nvi_next) {
- nvp = &curr->nvi_nvp;
-
- if (strcmp(name, NVP_NAME(nvp)) == 0 && NVP_TYPE(nvp) == type)
- return (nvpair_value_common(nvp, type, nelem, data));
- }
+ nvpair_t *nvp = nvt_lookup_name_type(nvl, name, type);
+ if (nvp == NULL)
+ return (ENOENT);
- return (ENOENT);
+ return (nvpair_value_common(nvp, type, nelem, data));
}
int
@@ -2107,6 +2371,12 @@ nvs_decode_pairs(nvstream_t *nvs, nvlist_t *nvl)
return (EFAULT);
}
+ err = nvt_add_nvpair(nvl, nvp);
+ if (err != 0) {
+ nvpair_free(nvp);
+ nvp_buf_free(nvl, nvp);
+ return (err);
+ }
nvp_buf_link(nvl, nvp);
}
return (err);
diff --git a/usr/src/lib/libnvpair/nvpair_json.c b/usr/src/lib/libnvpair/nvpair_json.c
index 7ebd1be7a0..3205d229ab 100644
--- a/usr/src/lib/libnvpair/nvpair_json.c
+++ b/usr/src/lib/libnvpair/nvpair_json.c
@@ -10,6 +10,7 @@
*/
/*
* Copyright (c) 2014, Joyent, Inc.
+ * Copyright (c) 2017 by Delphix. All rights reserved.
*/
#include <stdio.h>
@@ -461,8 +462,10 @@ nvlist_do_json(nvlist_t *nvl, char **bufp, size_t *blen, off_t *offp)
}
case DATA_TYPE_UNKNOWN:
+ case DATA_TYPE_DONTCARE:
return (-1);
}
+
}
FPRINTF(bufp, blen, offp, "}");
diff --git a/usr/src/uts/common/fs/zfs/arc.c b/usr/src/uts/common/fs/zfs/arc.c
index 5a899aa1c7..af70eee950 100644
--- a/usr/src/uts/common/fs/zfs/arc.c
+++ b/usr/src/uts/common/fs/zfs/arc.c
@@ -380,6 +380,13 @@ int zfs_arc_shrink_shift = 0;
int zfs_arc_p_min_shift = 0;
int zfs_arc_average_blocksize = 8 * 1024; /* 8KB */
+/*
+ * ARC dirty data constraints for arc_tempreserve_space() throttle
+ */
+uint_t zfs_arc_dirty_limit_percent = 50; /* total dirty data limit */
+uint_t zfs_arc_anon_limit_percent = 25; /* anon block dirty limit */
+uint_t zfs_arc_pool_dirty_percent = 20; /* each pool's anon allowance */
+
boolean_t zfs_compressed_arc_enabled = B_TRUE;
/*
@@ -5848,12 +5855,10 @@ arc_write(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp, arc_buf_t *buf,
}
static int
-arc_memory_throttle(uint64_t reserve, uint64_t txg)
+arc_memory_throttle(spa_t *spa, uint64_t reserve, uint64_t txg)
{
#ifdef _KERNEL
uint64_t available_memory = ptob(freemem);
- static uint64_t page_load = 0;
- static uint64_t last_txg = 0;
#if defined(__i386)
available_memory =
@@ -5863,9 +5868,9 @@ arc_memory_throttle(uint64_t reserve, uint64_t txg)
if (freemem > physmem * arc_lotsfree_percent / 100)
return (0);
- if (txg > last_txg) {
- last_txg = txg;
- page_load = 0;
+ if (txg > spa->spa_lowmem_last_txg) {
+ spa->spa_lowmem_last_txg = txg;
+ spa->spa_lowmem_page_load = 0;
}
/*
* If we are in pageout, we know that memory is already tight,
@@ -5873,18 +5878,19 @@ arc_memory_throttle(uint64_t reserve, uint64_t txg)
* continue to let page writes occur as quickly as possible.
*/
if (curproc == proc_pageout) {
- if (page_load > MAX(ptob(minfree), available_memory) / 4)
+ if (spa->spa_lowmem_page_load >
+ MAX(ptob(minfree), available_memory) / 4)
return (SET_ERROR(ERESTART));
/* Note: reserve is inflated, so we deflate */
- page_load += reserve / 8;
+ atomic_add_64(&spa->spa_lowmem_page_load, reserve / 8);
return (0);
- } else if (page_load > 0 && arc_reclaim_needed()) {
+ } else if (spa->spa_lowmem_page_load > 0 && arc_reclaim_needed()) {
/* memory is low, delay before restarting */
ARCSTAT_INCR(arcstat_memory_throttle_count, 1);
return (SET_ERROR(EAGAIN));
}
- page_load = 0;
-#endif
+ spa->spa_lowmem_page_load = 0;
+#endif /* _KERNEL */
return (0);
}
@@ -5896,7 +5902,7 @@ arc_tempreserve_clear(uint64_t reserve)
}
int
-arc_tempreserve_space(uint64_t reserve, uint64_t txg)
+arc_tempreserve_space(spa_t *spa, uint64_t reserve, uint64_t txg)
{
int error;
uint64_t anon_size;
@@ -5923,7 +5929,7 @@ arc_tempreserve_space(uint64_t reserve, uint64_t txg)
* in order to compress/encrypt/etc the data. We therefore need to
* make sure that there is sufficient available memory for this.
*/
- error = arc_memory_throttle(reserve, txg);
+ error = arc_memory_throttle(spa, reserve, txg);
if (error != 0)
return (error);
@@ -5931,12 +5937,24 @@ arc_tempreserve_space(uint64_t reserve, uint64_t txg)
* Throttle writes when the amount of dirty data in the cache
* gets too large. We try to keep the cache less than half full
* of dirty blocks so that our sync times don't grow too large.
+ *
+ * In the case of one pool being built on another pool, we want
+ * to make sure we don't end up throttling the lower (backing)
+ * pool when the upper pool is the majority contributor to dirty
+ * data. To insure we make forward progress during throttling, we
+ * also check the current pool's net dirty data and only throttle
+ * if it exceeds zfs_arc_pool_dirty_percent of the anonymous dirty
+ * data in the cache.
+ *
* Note: if two requests come in concurrently, we might let them
* both succeed, when one of them should fail. Not a huge deal.
*/
+ uint64_t total_dirty = reserve + arc_tempreserve + anon_size;
+ uint64_t spa_dirty_anon = spa_dirty_data(spa);
- if (reserve + arc_tempreserve + anon_size > arc_c / 2 &&
- anon_size > arc_c / 4) {
+ if (total_dirty > arc_c * zfs_arc_dirty_limit_percent / 100 &&
+ anon_size > arc_c * zfs_arc_anon_limit_percent / 100 &&
+ spa_dirty_anon > anon_size * zfs_arc_pool_dirty_percent / 100) {
uint64_t meta_esize =
refcount_count(&arc_anon->arcs_esize[ARC_BUFC_METADATA]);
uint64_t data_esize =
diff --git a/usr/src/uts/common/fs/zfs/dsl_dir.c b/usr/src/uts/common/fs/zfs/dsl_dir.c
index 99bc468313..35e76e273e 100644
--- a/usr/src/uts/common/fs/zfs/dsl_dir.c
+++ b/usr/src/uts/common/fs/zfs/dsl_dir.c
@@ -1380,7 +1380,7 @@ dsl_dir_tempreserve_space(dsl_dir_t *dd, uint64_t lsize, uint64_t asize,
offsetof(struct tempreserve, tr_node));
ASSERT3S(asize, >, 0);
- err = arc_tempreserve_space(lsize, tx->tx_txg);
+ err = arc_tempreserve_space(dd->dd_pool->dp_spa, lsize, tx->tx_txg);
if (err == 0) {
struct tempreserve *tr;
diff --git a/usr/src/uts/common/fs/zfs/spa_misc.c b/usr/src/uts/common/fs/zfs/spa_misc.c
index 41342f37ea..87a95f0a36 100644
--- a/usr/src/uts/common/fs/zfs/spa_misc.c
+++ b/usr/src/uts/common/fs/zfs/spa_misc.c
@@ -1910,6 +1910,12 @@ bp_get_dsize(spa_t *spa, const blkptr_t *bp)
return (dsize);
}
+uint64_t
+spa_dirty_data(spa_t *spa)
+{
+ return (spa->spa_dsl_pool->dp_dirty_total);
+}
+
/*
* ==========================================================================
* Initialization and Termination
diff --git a/usr/src/uts/common/fs/zfs/sys/arc.h b/usr/src/uts/common/fs/zfs/sys/arc.h
index 10c920ff9d..45db7701e1 100644
--- a/usr/src/uts/common/fs/zfs/sys/arc.h
+++ b/usr/src/uts/common/fs/zfs/sys/arc.h
@@ -190,7 +190,7 @@ void arc_freed(spa_t *spa, const blkptr_t *bp);
void arc_flush(spa_t *spa, boolean_t retry);
void arc_tempreserve_clear(uint64_t reserve);
-int arc_tempreserve_space(uint64_t reserve, uint64_t txg);
+int arc_tempreserve_space(spa_t *spa, uint64_t reserve, uint64_t txg);
uint64_t arc_max_bytes(void);
void arc_init(void);
diff --git a/usr/src/uts/common/fs/zfs/sys/spa.h b/usr/src/uts/common/fs/zfs/sys/spa.h
index 34f02ed430..1acbe31377 100644
--- a/usr/src/uts/common/fs/zfs/sys/spa.h
+++ b/usr/src/uts/common/fs/zfs/sys/spa.h
@@ -813,6 +813,7 @@ extern uint64_t spa_bootfs(spa_t *spa);
extern uint64_t spa_delegation(spa_t *spa);
extern objset_t *spa_meta_objset(spa_t *spa);
extern uint64_t spa_deadman_synctime(spa_t *spa);
+extern uint64_t spa_dirty_data(spa_t *spa);
/* Miscellaneous support routines */
extern void spa_load_failed(spa_t *spa, const char *fmt, ...);
diff --git a/usr/src/uts/common/fs/zfs/sys/spa_impl.h b/usr/src/uts/common/fs/zfs/sys/spa_impl.h
index 4b90e96cfa..ea251cf0c6 100644
--- a/usr/src/uts/common/fs/zfs/sys/spa_impl.h
+++ b/usr/src/uts/common/fs/zfs/sys/spa_impl.h
@@ -373,6 +373,10 @@ struct spa {
int spa_queued;
} spa_queue_stats[ZIO_PRIORITY_NUM_QUEUEABLE];
+ /* arc_memory_throttle() parameters during low memory condition */
+ uint64_t spa_lowmem_page_load; /* memory load during txg */
+ uint64_t spa_lowmem_last_txg; /* txg window start */
+
hrtime_t spa_ccw_fail_time; /* Conf cache write fail time */
/*
diff --git a/usr/src/uts/common/sys/nvpair.h b/usr/src/uts/common/sys/nvpair.h
index e4d637b007..cf3f761c8c 100644
--- a/usr/src/uts/common/sys/nvpair.h
+++ b/usr/src/uts/common/sys/nvpair.h
@@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2017 by Delphix. All rights reserved.
*/
#ifndef _SYS_NVPAIR_H
@@ -40,6 +40,7 @@ extern "C" {
#endif
typedef enum {
+ DATA_TYPE_DONTCARE = -1,
DATA_TYPE_UNKNOWN = 0,
DATA_TYPE_BOOLEAN,
DATA_TYPE_BYTE,
diff --git a/usr/src/uts/common/sys/nvpair_impl.h b/usr/src/uts/common/sys/nvpair_impl.h
index f12dbbfe6e..c9874b3e4d 100644
--- a/usr/src/uts/common/sys/nvpair_impl.h
+++ b/usr/src/uts/common/sys/nvpair_impl.h
@@ -24,11 +24,13 @@
* Use is subject to license terms.
*/
+/*
+ * Copyright (c) 2017 by Delphix. All rights reserved.
+ */
+
#ifndef _NVPAIR_IMPL_H
#define _NVPAIR_IMPL_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#ifdef __cplusplus
extern "C" {
#endif
@@ -47,16 +49,27 @@ typedef struct i_nvp i_nvp_t;
struct i_nvp {
union {
- uint64_t _nvi_align; /* ensure alignment */
+ /* ensure alignment */
+ uint64_t _nvi_align;
+
struct {
- i_nvp_t *_nvi_next; /* pointer to next nvpair */
- i_nvp_t *_nvi_prev; /* pointer to prev nvpair */
+ /* pointer to next nvpair */
+ i_nvp_t *_nvi_next;
+
+ /* pointer to prev nvpair */
+ i_nvp_t *_nvi_prev;
+
+ /* next pair in table bucket */
+ i_nvp_t *_nvi_hashtable_next;
} _nvi;
} _nvi_un;
- nvpair_t nvi_nvp; /* nvpair */
+
+ /* nvpair */
+ nvpair_t nvi_nvp;
};
#define nvi_next _nvi_un._nvi._nvi_next
#define nvi_prev _nvi_un._nvi._nvi_prev
+#define nvi_hashtable_next _nvi_un._nvi._nvi_hashtable_next
typedef struct {
i_nvp_t *nvp_list; /* linked list of nvpairs */
@@ -64,6 +77,10 @@ typedef struct {
i_nvp_t *nvp_curr; /* current walker nvpair */
nv_alloc_t *nvp_nva; /* pluggable allocator */
uint32_t nvp_stat; /* internal state */
+
+ i_nvp_t **nvp_hashtable; /* table of entries used for lookup */
+ uint32_t nvp_nbuckets; /* # of buckets in hash table */
+ uint32_t nvp_nentries; /* # of entries in hash table */
} nvpriv_t;
#ifdef __cplusplus