summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTim Kordas <tim.kordas@joyent.com>2017-10-13 17:10:52 -0700
committerTim Kordas <tim.kordas@joyent.com>2017-10-25 16:47:48 +0000
commitdaa3911f02365820bf2df2a1cdf96602eda66912 (patch)
tree65b452006c58dc88f7955a09bfb3cb94dff79f8c
parente0c6772393ee04366494174e8e95ba7bcc980ed7 (diff)
downloadillumos-joyent-release-20171026.tar.gz
OS-6363 system went to dark side of moon for ~467 secondsrelease-20171026
OS-6404 ARC reclaim should throttle its calls to arc_kmem_reap_now() Reviewed by: Bryan Cantrill <bryan@joyent.com> Reviewed by: Dan McDonald <danmcd@joyent.com> Approved by: Bryan Cantrill <bryan@joyent.com>
-rw-r--r--usr/src/lib/libfakekernel/common/kmem.c13
-rw-r--r--usr/src/lib/libfakekernel/common/mapfile-vers5
-rw-r--r--usr/src/lib/libfakekernel/common/taskq.c13
-rw-r--r--usr/src/lib/libzpool/common/sys/zfs_context.h6
-rw-r--r--usr/src/lib/libzpool/common/taskq.c13
-rw-r--r--usr/src/uts/common/fs/zfs/arc.c47
-rw-r--r--usr/src/uts/common/os/kmem.c24
-rw-r--r--usr/src/uts/common/os/taskq.c17
-rw-r--r--usr/src/uts/common/os/vmem.c4
-rw-r--r--usr/src/uts/common/sys/kmem.h4
-rw-r--r--usr/src/uts/common/sys/taskq.h2
11 files changed, 127 insertions, 21 deletions
diff --git a/usr/src/lib/libfakekernel/common/kmem.c b/usr/src/lib/libfakekernel/common/kmem.c
index 82d1cfeaef..18ebb89885 100644
--- a/usr/src/lib/libfakekernel/common/kmem.c
+++ b/usr/src/lib/libfakekernel/common/kmem.c
@@ -11,6 +11,7 @@
/*
* Copyright 2013 Nexenta Systems, Inc. All rights reserved.
+ * Copyright (c) 2017, Joyent, Inc.
*/
#include <sys/kmem.h>
@@ -129,6 +130,18 @@ kmem_cache_set_move(kmem_cache_t *kc,
{
}
+boolean_t
+kmem_cache_reap_active(void)
+{
+ return (B_FALSE);
+}
+
+/* ARGSUSED */
+void
+kmem_cache_reap_soon(kmem_cache_t *kc)
+{
+}
+
/* ARGSUSED */
void
kmem_cache_reap_now(kmem_cache_t *kc)
diff --git a/usr/src/lib/libfakekernel/common/mapfile-vers b/usr/src/lib/libfakekernel/common/mapfile-vers
index db81dea59a..cae4b7b30b 100644
--- a/usr/src/lib/libfakekernel/common/mapfile-vers
+++ b/usr/src/lib/libfakekernel/common/mapfile-vers
@@ -11,6 +11,7 @@
#
# Copyright 2015 Nexenta Systems, Inc. All rights reserved.
+# Copyright (c) 2017, Joyent, Inc.
#
#
@@ -85,7 +86,8 @@ SYMBOL_VERSION SUNWprivate_1.1 {
kmem_cache_create;
kmem_cache_destroy;
kmem_cache_free;
- kmem_cache_reap_now;
+ kmem_cache_reap_active;
+ kmem_cache_reap_soon;
kmem_cache_set_move;
kmem_debugging;
kmem_free;
@@ -173,6 +175,7 @@ SYMBOL_VERSION SUNWprivate_1.1 {
taskq_dispatch;
taskq_dispatch_ent;
taskq_member;
+ taskq_empty;
taskq_wait;
thread_join;
diff --git a/usr/src/lib/libfakekernel/common/taskq.c b/usr/src/lib/libfakekernel/common/taskq.c
index a9320024ad..ffaa73fccc 100644
--- a/usr/src/lib/libfakekernel/common/taskq.c
+++ b/usr/src/lib/libfakekernel/common/taskq.c
@@ -25,6 +25,7 @@
/*
* Copyright 2012 Garrett D'Amore <garrett@damore.org>. All rights reserved.
* Copyright 2013 Nexenta Systems, Inc. All rights reserved.
+ * Copyright (c) 2017, Joyent, Inc.
*/
#include <sys/taskq_impl.h>
@@ -208,6 +209,18 @@ taskq_dispatch_ent(taskq_t *tq, task_func_t func, void *arg, uint_t flags,
mutex_exit(&tq->tq_lock);
}
+boolean_t
+taskq_empty(taskq_t *tq)
+{
+ boolean_t rv;
+
+ mutex_enter(&tq->tq_lock);
+ rv = (tq->tq_task.tqent_next == &tq->tq_task) && (tq->tq_active == 0);
+ mutex_exit(&tq->tq_lock);
+
+ return (rv);
+}
+
void
taskq_wait(taskq_t *tq)
{
diff --git a/usr/src/lib/libzpool/common/sys/zfs_context.h b/usr/src/lib/libzpool/common/sys/zfs_context.h
index 7bf4a3843d..45eeeaeb24 100644
--- a/usr/src/lib/libzpool/common/sys/zfs_context.h
+++ b/usr/src/lib/libzpool/common/sys/zfs_context.h
@@ -21,7 +21,7 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
- * Copyright (c) 2013, Joyent, Inc. All rights reserved.
+ * Copyright (c) 2017, Joyent, Inc.
* Copyright (c) 2012, 2016 by Delphix. All rights reserved.
*/
@@ -333,7 +333,8 @@ extern void kstat_runq_back_to_waitq(kstat_io_t *);
#define kmem_cache_alloc(_c, _f) umem_cache_alloc(_c, _f)
#define kmem_cache_free(_c, _b) umem_cache_free(_c, _b)
#define kmem_debugging() 0
-#define kmem_cache_reap_now(_c) /* nothing */
+#define kmem_cache_reap_active() (B_FALSE)
+#define kmem_cache_reap_soon(_c) /* nothing */
#define kmem_cache_set_move(_c, _cb) /* nothing */
#define vmem_qcache_reap(_v) /* nothing */
#define POINTER_INVALIDATE(_pp) /* nothing */
@@ -391,6 +392,7 @@ extern taskqid_t taskq_dispatch(taskq_t *, task_func_t, void *, uint_t);
extern void taskq_dispatch_ent(taskq_t *, task_func_t, void *, uint_t,
taskq_ent_t *);
extern void taskq_destroy(taskq_t *);
+extern boolean_t taskq_empty(taskq_t *);
extern void taskq_wait(taskq_t *);
extern int taskq_member(taskq_t *, void *);
extern void system_taskq_init(void);
diff --git a/usr/src/lib/libzpool/common/taskq.c b/usr/src/lib/libzpool/common/taskq.c
index a4ab58963d..11a646d62f 100644
--- a/usr/src/lib/libzpool/common/taskq.c
+++ b/usr/src/lib/libzpool/common/taskq.c
@@ -26,6 +26,7 @@
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
* Copyright 2012 Garrett D'Amore <garrett@damore.org>. All rights reserved.
* Copyright (c) 2014 by Delphix. All rights reserved.
+ * Copyright (c) 2017, Joyent, Inc.
*/
#include <sys/zfs_context.h>
@@ -178,6 +179,18 @@ taskq_dispatch_ent(taskq_t *tq, task_func_t func, void *arg, uint_t flags,
mutex_exit(&tq->tq_lock);
}
+boolean_t
+taskq_empty(taskq_t *tq)
+{
+ boolean_t rv;
+
+ mutex_enter(&tq->tq_lock);
+ rv = (tq->tq_task.tqent_next == &tq->tq_task) && (tq->tq_active == 0);
+ mutex_exit(&tq->tq_lock);
+
+ return (rv);
+}
+
void
taskq_wait(taskq_t *tq)
{
diff --git a/usr/src/uts/common/fs/zfs/arc.c b/usr/src/uts/common/fs/zfs/arc.c
index fe60c09daa..87fecf5938 100644
--- a/usr/src/uts/common/fs/zfs/arc.c
+++ b/usr/src/uts/common/fs/zfs/arc.c
@@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, Joyent, Inc. All rights reserved.
+ * Copyright (c) 2017, Joyent, Inc.
* Copyright (c) 2011, 2017 by Delphix. All rights reserved.
* Copyright (c) 2014 by Saso Kiselkov. All rights reserved.
* Copyright 2015 Nexenta Systems, Inc. All rights reserved.
@@ -302,6 +302,9 @@ int zfs_arc_evict_batch_limit = 10;
/* number of seconds before growing cache again */
static int arc_grow_retry = 60;
+/* number of milliseconds before attempting a kmem-cache-reap */
+static int arc_kmem_cache_reap_retry_ms = 1000;
+
/* shift of arc_c for calculating overflow limit in arc_get_data_impl */
int zfs_arc_overflow_shift = 3;
@@ -4047,21 +4050,31 @@ arc_kmem_reap_now(void)
#endif
#endif
+ /*
+ * If a kmem reap is already active, don't schedule more. We must
+ * check for this because kmem_cache_reap_soon() won't actually
+ * block on the cache being reaped (this is to prevent callers from
+ * becoming implicitly blocked by a system-wide kmem reap -- which,
+ * on a system with many, many full magazines, can take minutes).
+ */
+ if (kmem_cache_reap_active())
+ return;
+
for (i = 0; i < SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT; i++) {
if (zio_buf_cache[i] != prev_cache) {
prev_cache = zio_buf_cache[i];
- kmem_cache_reap_now(zio_buf_cache[i]);
+ kmem_cache_reap_soon(zio_buf_cache[i]);
}
if (zio_data_buf_cache[i] != prev_data_cache) {
prev_data_cache = zio_data_buf_cache[i];
- kmem_cache_reap_now(zio_data_buf_cache[i]);
+ kmem_cache_reap_soon(zio_data_buf_cache[i]);
}
}
- kmem_cache_reap_now(abd_chunk_cache);
- kmem_cache_reap_now(buf_cache);
- kmem_cache_reap_now(hdr_full_cache);
- kmem_cache_reap_now(hdr_l2only_cache);
- kmem_cache_reap_now(range_seg_cache);
+ kmem_cache_reap_soon(abd_chunk_cache);
+ kmem_cache_reap_soon(buf_cache);
+ kmem_cache_reap_soon(hdr_full_cache);
+ kmem_cache_reap_soon(hdr_l2only_cache);
+ kmem_cache_reap_soon(range_seg_cache);
if (zio_arena != NULL) {
/*
@@ -4093,6 +4106,7 @@ static void
arc_reclaim_thread(void *unused)
{
hrtime_t growtime = 0;
+ hrtime_t kmem_reap_time = 0;
callb_cpr_t cpr;
CALLB_CPR_INIT(&cpr, &arc_reclaim_lock, callb_generic_cpr, FTAG);
@@ -4126,7 +4140,7 @@ arc_reclaim_thread(void *unused)
int64_t free_memory = arc_available_memory();
if (free_memory < 0) {
-
+ hrtime_t curtime = gethrtime();
arc_no_grow = B_TRUE;
arc_warm = B_TRUE;
@@ -4134,9 +4148,20 @@ arc_reclaim_thread(void *unused)
* Wait at least zfs_grow_retry (default 60) seconds
* before considering growing.
*/
- growtime = gethrtime() + SEC2NSEC(arc_grow_retry);
+ growtime = curtime + SEC2NSEC(arc_grow_retry);
- arc_kmem_reap_now();
+ /*
+ * Wait at least arc_kmem_cache_reap_retry_ms
+ * between arc_kmem_reap_now() calls. Without
+ * this check it is possible to end up in a
+ * situation where we spend lots of time
+ * reaping caches, while we're near arc_c_min.
+ */
+ if (curtime >= kmem_reap_time) {
+ arc_kmem_reap_now();
+ kmem_reap_time = gethrtime() +
+ MSEC2NSEC(arc_kmem_cache_reap_retry_ms);
+ }
/*
* If we are still low on memory, shrink the ARC
diff --git a/usr/src/uts/common/os/kmem.c b/usr/src/uts/common/os/kmem.c
index 1243d0fbbf..052bdb62ca 100644
--- a/usr/src/uts/common/os/kmem.c
+++ b/usr/src/uts/common/os/kmem.c
@@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 1994, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2015 Joyent, Inc. All rights reserved.
+ * Copyright (c) 2017, Joyent, Inc.
* Copyright (c) 2012, 2017 by Delphix. All rights reserved.
* Copyright 2015 Nexenta Systems, Inc. All rights reserved.
*/
@@ -3287,10 +3287,27 @@ kmem_cache_magazine_enable(kmem_cache_t *cp)
}
/*
- * Reap (almost) everything right now.
+ * Allow our caller to determine if there are running reaps.
+ *
+ * This call is very conservative and may return B_TRUE even when
+ * reaping activity isn't active. If it returns B_FALSE, then reaping
+ * activity is definitely inactive.
+ */
+boolean_t
+kmem_cache_reap_active(void)
+{
+ return (!taskq_empty(kmem_taskq));
+}
+
+/*
+ * Reap (almost) everything soon.
+ *
+ * Note: this does not wait for the reap-tasks to complete. Caller
+ * should use kmem_cache_reap_active() (above) and/or moderation to
+ * avoid scheduling too many reap-tasks.
*/
void
-kmem_cache_reap_now(kmem_cache_t *cp)
+kmem_cache_reap_soon(kmem_cache_t *cp)
{
ASSERT(list_link_active(&cp->cache_link));
@@ -3298,7 +3315,6 @@ kmem_cache_reap_now(kmem_cache_t *cp)
(void) taskq_dispatch(kmem_taskq,
(task_func_t *)kmem_depot_ws_reap, cp, TQ_SLEEP);
- taskq_wait(kmem_taskq);
}
/*
diff --git a/usr/src/uts/common/os/taskq.c b/usr/src/uts/common/os/taskq.c
index 814b738581..7a38181cf7 100644
--- a/usr/src/uts/common/os/taskq.c
+++ b/usr/src/uts/common/os/taskq.c
@@ -26,6 +26,7 @@
/*
* Copyright 2015 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2017 by Delphix. All rights reserved.
+ * Copyright (c) 2017, Joyent, Inc.
*/
/*
@@ -1320,6 +1321,22 @@ taskq_dispatch_ent(taskq_t *tq, task_func_t func, void *arg, uint_t flags,
}
/*
+ * Allow our caller to ask if there are tasks pending on the queue.
+ */
+boolean_t
+taskq_empty(taskq_t *tq)
+{
+ boolean_t rv;
+
+ ASSERT3P(tq, !=, curthread->t_taskq);
+ mutex_enter(&tq->tq_lock);
+ rv = (tq->tq_task.tqent_next == &tq->tq_task) && (tq->tq_active == 0);
+ mutex_exit(&tq->tq_lock);
+
+ return (rv);
+}
+
+/*
* Wait for all pending tasks to complete.
* Calling taskq_wait from a task will cause deadlock.
*/
diff --git a/usr/src/uts/common/os/vmem.c b/usr/src/uts/common/os/vmem.c
index e3da4df247..cbe5b1f08c 100644
--- a/usr/src/uts/common/os/vmem.c
+++ b/usr/src/uts/common/os/vmem.c
@@ -25,7 +25,7 @@
/*
* Copyright (c) 2012, 2015 by Delphix. All rights reserved.
- * Copyright (c) 2012, Joyent, Inc. All rights reserved.
+ * Copyright (c) 2017, Joyent, Inc.
*/
/*
@@ -1745,7 +1745,7 @@ vmem_qcache_reap(vmem_t *vmp)
*/
for (i = 0; i < VMEM_NQCACHE_MAX; i++)
if (vmp->vm_qcache[i])
- kmem_cache_reap_now(vmp->vm_qcache[i]);
+ kmem_cache_reap_soon(vmp->vm_qcache[i]);
}
/*
diff --git a/usr/src/uts/common/sys/kmem.h b/usr/src/uts/common/sys/kmem.h
index e54d83e499..161fafb851 100644
--- a/usr/src/uts/common/sys/kmem.h
+++ b/usr/src/uts/common/sys/kmem.h
@@ -23,6 +23,7 @@
* Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012 by Delphix. All rights reserved.
* Copyright 2013 Nexenta Systems, Inc. All rights reserved.
+ * Copyright (c) 2017, Joyent, Inc.
*/
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
@@ -128,7 +129,8 @@ extern void kmem_cache_destroy(kmem_cache_t *);
extern void *kmem_cache_alloc(kmem_cache_t *, int);
extern void kmem_cache_free(kmem_cache_t *, void *);
extern uint64_t kmem_cache_stat(kmem_cache_t *, char *);
-extern void kmem_cache_reap_now(kmem_cache_t *);
+extern boolean_t kmem_cache_reap_active(void);
+extern void kmem_cache_reap_soon(kmem_cache_t *);
extern void kmem_cache_move_notify(kmem_cache_t *, void *);
#endif /* _KERNEL */
diff --git a/usr/src/uts/common/sys/taskq.h b/usr/src/uts/common/sys/taskq.h
index 7548149ef6..41d542d849 100644
--- a/usr/src/uts/common/sys/taskq.h
+++ b/usr/src/uts/common/sys/taskq.h
@@ -23,6 +23,7 @@
* Use is subject to license terms.
*
* Copyright 2013 Nexenta Systems, Inc. All rights reserved.
+ * Copyright (c) 2017, Joyent, Inc.
*/
#ifndef _SYS_TASKQ_H
@@ -80,6 +81,7 @@ extern taskqid_t taskq_dispatch(taskq_t *, task_func_t, void *, uint_t);
extern void nulltask(void *);
extern void taskq_destroy(taskq_t *);
extern void taskq_wait(taskq_t *);
+extern boolean_t taskq_empty(taskq_t *);
extern void taskq_suspend(taskq_t *);
extern int taskq_suspended(taskq_t *);
extern void taskq_resume(taskq_t *);