summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPatrick Mooney <pmooney@pfmooney.com>2016-12-19 20:21:50 +0000
committerPatrick Mooney <pmooney@pfmooney.com>2016-12-27 18:05:17 +0000
commit68ebdf39ab401a6e0d6639b4e465ef44619e025f (patch)
tree3033f54ac67cc522a2318e9ebae68c452a35d9ba
parent9b80da806e4b1c1660c0885bd90e593444383e40 (diff)
downloadillumos-joyent-68ebdf39ab401a6e0d6639b4e465ef44619e025f.tar.gz
OS-5862 zoneadmd should perform page-outs in smaller pieces
Reviewed by: Ryan Zezeski <ryan.zeseski@joyent.com> Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com> Approved by: Jerry Jelinek <jerry.jelinek@joyent.com>
-rw-r--r--usr/src/cmd/zoneadmd/mcap.c55
1 files changed, 40 insertions, 15 deletions
diff --git a/usr/src/cmd/zoneadmd/mcap.c b/usr/src/cmd/zoneadmd/mcap.c
index 16cd2dd07a..be094e4419 100644
--- a/usr/src/cmd/zoneadmd/mcap.c
+++ b/usr/src/cmd/zoneadmd/mcap.c
@@ -20,7 +20,7 @@
*/
/*
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
- * Copyright 2014, Joyent, Inc. All rights reserved.
+ * Copyright 2016 Joyent, Inc.
*/
/*
@@ -139,6 +139,13 @@ uint64_t prev_fast_rss = 0;
uint64_t fast_rss = 0;
uint64_t accurate_rss = 0;
+/*
+ * Tunable for chunk size when breaking up large segment page-out ops.
+ * The initial value has been set at 64MB, trying to strike a balance between
+ * responsiveness and the load placed on locks.
+ */
+static size_t pageout_chunk_size = 0x4000000;
+
static char zoneproc[MAXPATHLEN];
static char debug_log[MAXPATHLEN];
static zoneid_t zid;
@@ -349,24 +356,45 @@ done:
/*
* Attempt to invalidate the entire mapping from within the given process's
- * address space. May return nonzero with errno as:
- * ESRCH - process not found
- * ENOMEM - segment not found
- * EINVAL - mapping exceeds a single segment
+ * address space.
*/
-static int
+static void
pageout_mapping(pid_t pid, prmap_t *pmp)
{
- int res;
+ uintptr_t base;
+ size_t remain;
if (pmp->pr_mflags & MA_ISM || pmp->pr_mflags & MA_SHM)
- return (0);
+ return;
errno = 0;
- res = syscall(SYS_rusagesys, _RUSAGESYS_INVALMAP, pid, pmp->pr_vaddr,
- pmp->pr_size);
+ base = pmp->pr_vaddr;
+ remain = pmp->pr_size;
+ while (remain > 0 && !shutting_down) {
+ size_t chunk;
- return (res);
+ /*
+ * The rusagesys(INVALMAP) call is split up into smaller chunks
+ * when applied to large mappings. This is meant to avoid the
+ * situation where large writable segments take an extrememly
+ * long time to page out, keeping locks held in the process.
+ */
+ if (remain > pageout_chunk_size) {
+ chunk = pageout_chunk_size;
+ } else {
+ chunk = remain;
+ }
+
+ if (syscall(SYS_rusagesys, _RUSAGESYS_INVALMAP, pid, base,
+ chunk) != 0) {
+ debug("pid %ld: mapping 0x%p %ldkb unpageable (%d)\n",
+ pid, base, chunk / 1024, errno);
+ return;
+ }
+
+ base += chunk;
+ remain -= chunk;
+ }
}
/*
@@ -379,7 +407,6 @@ pageout_process(pid_t pid, int64_t excess)
int psfd;
prmap_t *pmap;
proc_map_t cur;
- int res;
int64_t sum_d_rss, d_rss;
int64_t old_rss;
int map_cnt;
@@ -431,9 +458,7 @@ pageout_process(pid_t pid, int64_t excess)
sum_d_rss = 0;
while (excess > 0 && pmap != NULL && !shutting_down) {
/* invalidate the entire mapping */
- if ((res = pageout_mapping(pid, pmap)) < 0)
- debug("pid %ld: mapping 0x%p %ldkb unpageable (%d)\n",
- pid, pmap->pr_vaddr, pmap->pr_size / 1024, errno);
+ pageout_mapping(pid, pmap);
map_cnt++;