diff options
author | Tim Kordas <tim.kordas@joyent.com> | 2017-10-13 17:10:52 -0700 |
---|---|---|
committer | Tim Kordas <tim.kordas@joyent.com> | 2017-10-25 16:47:48 +0000 |
commit | daa3911f02365820bf2df2a1cdf96602eda66912 (patch) | |
tree | 65b452006c58dc88f7955a09bfb3cb94dff79f8c | |
parent | e0c6772393ee04366494174e8e95ba7bcc980ed7 (diff) | |
download | illumos-joyent-release-20171026.tar.gz |
OS-6363 system went to dark side of moon for ~467 secondsrelease-20171026
OS-6404 ARC reclaim should throttle its calls to arc_kmem_reap_now()
Reviewed by: Bryan Cantrill <bryan@joyent.com>
Reviewed by: Dan McDonald <danmcd@joyent.com>
Approved by: Bryan Cantrill <bryan@joyent.com>
-rw-r--r-- | usr/src/lib/libfakekernel/common/kmem.c | 13 | ||||
-rw-r--r-- | usr/src/lib/libfakekernel/common/mapfile-vers | 5 | ||||
-rw-r--r-- | usr/src/lib/libfakekernel/common/taskq.c | 13 | ||||
-rw-r--r-- | usr/src/lib/libzpool/common/sys/zfs_context.h | 6 | ||||
-rw-r--r-- | usr/src/lib/libzpool/common/taskq.c | 13 | ||||
-rw-r--r-- | usr/src/uts/common/fs/zfs/arc.c | 47 | ||||
-rw-r--r-- | usr/src/uts/common/os/kmem.c | 24 | ||||
-rw-r--r-- | usr/src/uts/common/os/taskq.c | 17 | ||||
-rw-r--r-- | usr/src/uts/common/os/vmem.c | 4 | ||||
-rw-r--r-- | usr/src/uts/common/sys/kmem.h | 4 | ||||
-rw-r--r-- | usr/src/uts/common/sys/taskq.h | 2 |
11 files changed, 127 insertions, 21 deletions
diff --git a/usr/src/lib/libfakekernel/common/kmem.c b/usr/src/lib/libfakekernel/common/kmem.c index 82d1cfeaef..18ebb89885 100644 --- a/usr/src/lib/libfakekernel/common/kmem.c +++ b/usr/src/lib/libfakekernel/common/kmem.c @@ -11,6 +11,7 @@ /* * Copyright 2013 Nexenta Systems, Inc. All rights reserved. + * Copyright (c) 2017, Joyent, Inc. */ #include <sys/kmem.h> @@ -129,6 +130,18 @@ kmem_cache_set_move(kmem_cache_t *kc, { } +boolean_t +kmem_cache_reap_active(void) +{ + return (B_FALSE); +} + +/* ARGSUSED */ +void +kmem_cache_reap_soon(kmem_cache_t *kc) +{ +} + /* ARGSUSED */ void kmem_cache_reap_now(kmem_cache_t *kc) diff --git a/usr/src/lib/libfakekernel/common/mapfile-vers b/usr/src/lib/libfakekernel/common/mapfile-vers index db81dea59a..cae4b7b30b 100644 --- a/usr/src/lib/libfakekernel/common/mapfile-vers +++ b/usr/src/lib/libfakekernel/common/mapfile-vers @@ -11,6 +11,7 @@ # # Copyright 2015 Nexenta Systems, Inc. All rights reserved. +# Copyright (c) 2017, Joyent, Inc. # # @@ -85,7 +86,8 @@ SYMBOL_VERSION SUNWprivate_1.1 { kmem_cache_create; kmem_cache_destroy; kmem_cache_free; - kmem_cache_reap_now; + kmem_cache_reap_active; + kmem_cache_reap_soon; kmem_cache_set_move; kmem_debugging; kmem_free; @@ -173,6 +175,7 @@ SYMBOL_VERSION SUNWprivate_1.1 { taskq_dispatch; taskq_dispatch_ent; taskq_member; + taskq_empty; taskq_wait; thread_join; diff --git a/usr/src/lib/libfakekernel/common/taskq.c b/usr/src/lib/libfakekernel/common/taskq.c index a9320024ad..ffaa73fccc 100644 --- a/usr/src/lib/libfakekernel/common/taskq.c +++ b/usr/src/lib/libfakekernel/common/taskq.c @@ -25,6 +25,7 @@ /* * Copyright 2012 Garrett D'Amore <garrett@damore.org>. All rights reserved. * Copyright 2013 Nexenta Systems, Inc. All rights reserved. + * Copyright (c) 2017, Joyent, Inc. */ #include <sys/taskq_impl.h> @@ -208,6 +209,18 @@ taskq_dispatch_ent(taskq_t *tq, task_func_t func, void *arg, uint_t flags, mutex_exit(&tq->tq_lock); } +boolean_t +taskq_empty(taskq_t *tq) +{ + boolean_t rv; + + mutex_enter(&tq->tq_lock); + rv = (tq->tq_task.tqent_next == &tq->tq_task) && (tq->tq_active == 0); + mutex_exit(&tq->tq_lock); + + return (rv); +} + void taskq_wait(taskq_t *tq) { diff --git a/usr/src/lib/libzpool/common/sys/zfs_context.h b/usr/src/lib/libzpool/common/sys/zfs_context.h index 7bf4a3843d..45eeeaeb24 100644 --- a/usr/src/lib/libzpool/common/sys/zfs_context.h +++ b/usr/src/lib/libzpool/common/sys/zfs_context.h @@ -21,7 +21,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright 2011 Nexenta Systems, Inc. All rights reserved. - * Copyright (c) 2013, Joyent, Inc. All rights reserved. + * Copyright (c) 2017, Joyent, Inc. * Copyright (c) 2012, 2016 by Delphix. All rights reserved. */ @@ -333,7 +333,8 @@ extern void kstat_runq_back_to_waitq(kstat_io_t *); #define kmem_cache_alloc(_c, _f) umem_cache_alloc(_c, _f) #define kmem_cache_free(_c, _b) umem_cache_free(_c, _b) #define kmem_debugging() 0 -#define kmem_cache_reap_now(_c) /* nothing */ +#define kmem_cache_reap_active() (B_FALSE) +#define kmem_cache_reap_soon(_c) /* nothing */ #define kmem_cache_set_move(_c, _cb) /* nothing */ #define vmem_qcache_reap(_v) /* nothing */ #define POINTER_INVALIDATE(_pp) /* nothing */ @@ -391,6 +392,7 @@ extern taskqid_t taskq_dispatch(taskq_t *, task_func_t, void *, uint_t); extern void taskq_dispatch_ent(taskq_t *, task_func_t, void *, uint_t, taskq_ent_t *); extern void taskq_destroy(taskq_t *); +extern boolean_t taskq_empty(taskq_t *); extern void taskq_wait(taskq_t *); extern int taskq_member(taskq_t *, void *); extern void system_taskq_init(void); diff --git a/usr/src/lib/libzpool/common/taskq.c b/usr/src/lib/libzpool/common/taskq.c index a4ab58963d..11a646d62f 100644 --- a/usr/src/lib/libzpool/common/taskq.c +++ b/usr/src/lib/libzpool/common/taskq.c @@ -26,6 +26,7 @@ * Copyright 2011 Nexenta Systems, Inc. All rights reserved. * Copyright 2012 Garrett D'Amore <garrett@damore.org>. All rights reserved. * Copyright (c) 2014 by Delphix. All rights reserved. + * Copyright (c) 2017, Joyent, Inc. */ #include <sys/zfs_context.h> @@ -178,6 +179,18 @@ taskq_dispatch_ent(taskq_t *tq, task_func_t func, void *arg, uint_t flags, mutex_exit(&tq->tq_lock); } +boolean_t +taskq_empty(taskq_t *tq) +{ + boolean_t rv; + + mutex_enter(&tq->tq_lock); + rv = (tq->tq_task.tqent_next == &tq->tq_task) && (tq->tq_active == 0); + mutex_exit(&tq->tq_lock); + + return (rv); +} + void taskq_wait(taskq_t *tq) { diff --git a/usr/src/uts/common/fs/zfs/arc.c b/usr/src/uts/common/fs/zfs/arc.c index fe60c09daa..87fecf5938 100644 --- a/usr/src/uts/common/fs/zfs/arc.c +++ b/usr/src/uts/common/fs/zfs/arc.c @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012, Joyent, Inc. All rights reserved. + * Copyright (c) 2017, Joyent, Inc. * Copyright (c) 2011, 2017 by Delphix. All rights reserved. * Copyright (c) 2014 by Saso Kiselkov. All rights reserved. * Copyright 2015 Nexenta Systems, Inc. All rights reserved. @@ -302,6 +302,9 @@ int zfs_arc_evict_batch_limit = 10; /* number of seconds before growing cache again */ static int arc_grow_retry = 60; +/* number of milliseconds before attempting a kmem-cache-reap */ +static int arc_kmem_cache_reap_retry_ms = 1000; + /* shift of arc_c for calculating overflow limit in arc_get_data_impl */ int zfs_arc_overflow_shift = 3; @@ -4047,21 +4050,31 @@ arc_kmem_reap_now(void) #endif #endif + /* + * If a kmem reap is already active, don't schedule more. We must + * check for this because kmem_cache_reap_soon() won't actually + * block on the cache being reaped (this is to prevent callers from + * becoming implicitly blocked by a system-wide kmem reap -- which, + * on a system with many, many full magazines, can take minutes). + */ + if (kmem_cache_reap_active()) + return; + for (i = 0; i < SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT; i++) { if (zio_buf_cache[i] != prev_cache) { prev_cache = zio_buf_cache[i]; - kmem_cache_reap_now(zio_buf_cache[i]); + kmem_cache_reap_soon(zio_buf_cache[i]); } if (zio_data_buf_cache[i] != prev_data_cache) { prev_data_cache = zio_data_buf_cache[i]; - kmem_cache_reap_now(zio_data_buf_cache[i]); + kmem_cache_reap_soon(zio_data_buf_cache[i]); } } - kmem_cache_reap_now(abd_chunk_cache); - kmem_cache_reap_now(buf_cache); - kmem_cache_reap_now(hdr_full_cache); - kmem_cache_reap_now(hdr_l2only_cache); - kmem_cache_reap_now(range_seg_cache); + kmem_cache_reap_soon(abd_chunk_cache); + kmem_cache_reap_soon(buf_cache); + kmem_cache_reap_soon(hdr_full_cache); + kmem_cache_reap_soon(hdr_l2only_cache); + kmem_cache_reap_soon(range_seg_cache); if (zio_arena != NULL) { /* @@ -4093,6 +4106,7 @@ static void arc_reclaim_thread(void *unused) { hrtime_t growtime = 0; + hrtime_t kmem_reap_time = 0; callb_cpr_t cpr; CALLB_CPR_INIT(&cpr, &arc_reclaim_lock, callb_generic_cpr, FTAG); @@ -4126,7 +4140,7 @@ arc_reclaim_thread(void *unused) int64_t free_memory = arc_available_memory(); if (free_memory < 0) { - + hrtime_t curtime = gethrtime(); arc_no_grow = B_TRUE; arc_warm = B_TRUE; @@ -4134,9 +4148,20 @@ arc_reclaim_thread(void *unused) * Wait at least zfs_grow_retry (default 60) seconds * before considering growing. */ - growtime = gethrtime() + SEC2NSEC(arc_grow_retry); + growtime = curtime + SEC2NSEC(arc_grow_retry); - arc_kmem_reap_now(); + /* + * Wait at least arc_kmem_cache_reap_retry_ms + * between arc_kmem_reap_now() calls. Without + * this check it is possible to end up in a + * situation where we spend lots of time + * reaping caches, while we're near arc_c_min. + */ + if (curtime >= kmem_reap_time) { + arc_kmem_reap_now(); + kmem_reap_time = gethrtime() + + MSEC2NSEC(arc_kmem_cache_reap_retry_ms); + } /* * If we are still low on memory, shrink the ARC diff --git a/usr/src/uts/common/os/kmem.c b/usr/src/uts/common/os/kmem.c index 1243d0fbbf..052bdb62ca 100644 --- a/usr/src/uts/common/os/kmem.c +++ b/usr/src/uts/common/os/kmem.c @@ -20,7 +20,7 @@ */ /* * Copyright (c) 1994, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2015 Joyent, Inc. All rights reserved. + * Copyright (c) 2017, Joyent, Inc. * Copyright (c) 2012, 2017 by Delphix. All rights reserved. * Copyright 2015 Nexenta Systems, Inc. All rights reserved. */ @@ -3287,10 +3287,27 @@ kmem_cache_magazine_enable(kmem_cache_t *cp) } /* - * Reap (almost) everything right now. + * Allow our caller to determine if there are running reaps. + * + * This call is very conservative and may return B_TRUE even when + * reaping activity isn't active. If it returns B_FALSE, then reaping + * activity is definitely inactive. + */ +boolean_t +kmem_cache_reap_active(void) +{ + return (!taskq_empty(kmem_taskq)); +} + +/* + * Reap (almost) everything soon. + * + * Note: this does not wait for the reap-tasks to complete. Caller + * should use kmem_cache_reap_active() (above) and/or moderation to + * avoid scheduling too many reap-tasks. */ void -kmem_cache_reap_now(kmem_cache_t *cp) +kmem_cache_reap_soon(kmem_cache_t *cp) { ASSERT(list_link_active(&cp->cache_link)); @@ -3298,7 +3315,6 @@ kmem_cache_reap_now(kmem_cache_t *cp) (void) taskq_dispatch(kmem_taskq, (task_func_t *)kmem_depot_ws_reap, cp, TQ_SLEEP); - taskq_wait(kmem_taskq); } /* diff --git a/usr/src/uts/common/os/taskq.c b/usr/src/uts/common/os/taskq.c index 814b738581..7a38181cf7 100644 --- a/usr/src/uts/common/os/taskq.c +++ b/usr/src/uts/common/os/taskq.c @@ -26,6 +26,7 @@ /* * Copyright 2015 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2017 by Delphix. All rights reserved. + * Copyright (c) 2017, Joyent, Inc. */ /* @@ -1320,6 +1321,22 @@ taskq_dispatch_ent(taskq_t *tq, task_func_t func, void *arg, uint_t flags, } /* + * Allow our caller to ask if there are tasks pending on the queue. + */ +boolean_t +taskq_empty(taskq_t *tq) +{ + boolean_t rv; + + ASSERT3P(tq, !=, curthread->t_taskq); + mutex_enter(&tq->tq_lock); + rv = (tq->tq_task.tqent_next == &tq->tq_task) && (tq->tq_active == 0); + mutex_exit(&tq->tq_lock); + + return (rv); +} + +/* * Wait for all pending tasks to complete. * Calling taskq_wait from a task will cause deadlock. */ diff --git a/usr/src/uts/common/os/vmem.c b/usr/src/uts/common/os/vmem.c index e3da4df247..cbe5b1f08c 100644 --- a/usr/src/uts/common/os/vmem.c +++ b/usr/src/uts/common/os/vmem.c @@ -25,7 +25,7 @@ /* * Copyright (c) 2012, 2015 by Delphix. All rights reserved. - * Copyright (c) 2012, Joyent, Inc. All rights reserved. + * Copyright (c) 2017, Joyent, Inc. */ /* @@ -1745,7 +1745,7 @@ vmem_qcache_reap(vmem_t *vmp) */ for (i = 0; i < VMEM_NQCACHE_MAX; i++) if (vmp->vm_qcache[i]) - kmem_cache_reap_now(vmp->vm_qcache[i]); + kmem_cache_reap_soon(vmp->vm_qcache[i]); } /* diff --git a/usr/src/uts/common/sys/kmem.h b/usr/src/uts/common/sys/kmem.h index e54d83e499..161fafb851 100644 --- a/usr/src/uts/common/sys/kmem.h +++ b/usr/src/uts/common/sys/kmem.h @@ -23,6 +23,7 @@ * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2012 by Delphix. All rights reserved. * Copyright 2013 Nexenta Systems, Inc. All rights reserved. + * Copyright (c) 2017, Joyent, Inc. */ /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ @@ -128,7 +129,8 @@ extern void kmem_cache_destroy(kmem_cache_t *); extern void *kmem_cache_alloc(kmem_cache_t *, int); extern void kmem_cache_free(kmem_cache_t *, void *); extern uint64_t kmem_cache_stat(kmem_cache_t *, char *); -extern void kmem_cache_reap_now(kmem_cache_t *); +extern boolean_t kmem_cache_reap_active(void); +extern void kmem_cache_reap_soon(kmem_cache_t *); extern void kmem_cache_move_notify(kmem_cache_t *, void *); #endif /* _KERNEL */ diff --git a/usr/src/uts/common/sys/taskq.h b/usr/src/uts/common/sys/taskq.h index 7548149ef6..41d542d849 100644 --- a/usr/src/uts/common/sys/taskq.h +++ b/usr/src/uts/common/sys/taskq.h @@ -23,6 +23,7 @@ * Use is subject to license terms. * * Copyright 2013 Nexenta Systems, Inc. All rights reserved. + * Copyright (c) 2017, Joyent, Inc. */ #ifndef _SYS_TASKQ_H @@ -80,6 +81,7 @@ extern taskqid_t taskq_dispatch(taskq_t *, task_func_t, void *, uint_t); extern void nulltask(void *); extern void taskq_destroy(taskq_t *); extern void taskq_wait(taskq_t *); +extern boolean_t taskq_empty(taskq_t *); extern void taskq_suspend(taskq_t *); extern int taskq_suspended(taskq_t *); extern void taskq_resume(taskq_t *); |