summaryrefslogtreecommitdiff
path: root/usr/src/uts/common/vm
diff options
context:
space:
mode:
authorPatrick Mooney <pmooney@pfmooney.com>2021-05-28 21:07:11 +0000
committerPatrick Mooney <pmooney@oxide.computer>2021-07-27 19:26:22 +0000
commitb57f5d3e6a2df8d435e606797cf3934811848343 (patch)
tree31d0b366057848a88837b15524905a703c3bdf9c /usr/src/uts/common/vm
parented1e93792d7c9ea04a0cb44cffe34c24c135b002 (diff)
downloadillumos-joyent-b57f5d3e6a2df8d435e606797cf3934811848343.tar.gz
13833 want bhyve memory reservoir
13822 bhyve memory should exert memory pressure 13834 want extensible page_resv 13821 vmmctl ioctls should have more structure Reviewed by: Andy Fiddaman <andy@omnios.org> Reviewed by: Jason King <jason.brian.king@gmail.com> Reviewed by: Dan Cross <cross@oxidecomputer.com> Reviewed by: Hans Rosenfeld <rosenfeld@grumpf.hope-2000.org> Reviewed by: Mike Zeller <mike.zeller@joyent.com> Approved by: Dan McDonald <danmcd@joyent.com>
Diffstat (limited to 'usr/src/uts/common/vm')
-rw-r--r--usr/src/uts/common/vm/page.h12
-rw-r--r--usr/src/uts/common/vm/vm_page.c72
2 files changed, 63 insertions, 21 deletions
diff --git a/usr/src/uts/common/vm/page.h b/usr/src/uts/common/vm/page.h
index 8747b96acc..5b98acd24f 100644
--- a/usr/src/uts/common/vm/page.h
+++ b/usr/src/uts/common/vm/page.h
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 1986, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2021 Oxide Computer Company
*/
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
@@ -340,13 +341,13 @@ struct as;
*
* So, as a quick summary:
*
- * pse_mutex[]'s protect the p_selock and p_cv fields.
+ * pse_mutex[]'s protect the p_selock and p_cv fields.
*
- * p_selock protects the p_free, p_age, p_vnode, p_offset and p_hash,
+ * p_selock protects the p_free, p_age, p_vnode, p_offset and p_hash,
*
- * ph_mutex[]'s protect the page_hash[] array and its chains.
+ * ph_mutex[]'s protect the page_hash[] array and its chains.
*
- * vph_mutex[]'s protect the v_pages field and the vp page chains.
+ * vph_mutex[]'s protect the v_pages field and the vp page chains.
*
* First lock the page, then the hash chain, then the vnode chain. When
* this is not possible `trylocks' must be used. Sleeping while holding
@@ -762,6 +763,7 @@ void page_lock_delete(page_t *);
int page_deleted(page_t *);
int page_pp_lock(page_t *, int, int);
void page_pp_unlock(page_t *, int, int);
+int page_xresv(pgcnt_t, uint_t, int (*)(void));
int page_resv(pgcnt_t, uint_t);
void page_unresv(pgcnt_t);
void page_pp_useclaim(page_t *, page_t *, uint_t);
@@ -1078,7 +1080,7 @@ typedef struct kpm_hlk {
* The state about how a kpm page is mapped and whether it is ready to go
* is indicated by the following 1 byte kpm_spage structure. This byte is
* split into two 4-bit parts - kp_mapped and kp_mapped_go.
- * - kp_mapped == 1 the page is mapped cacheable
+ * - kp_mapped == 1 the page is mapped cacheable
* - kp_mapped == 2 the page is mapped non-cacheable
* - kp_mapped_go == 1 the mapping is ready to be dropped in
* - kp_mapped_go == 0 the mapping is not ready to be dropped in.
diff --git a/usr/src/uts/common/vm/vm_page.c b/usr/src/uts/common/vm/vm_page.c
index bcc6d05d47..89751b7b2c 100644
--- a/usr/src/uts/common/vm/vm_page.c
+++ b/usr/src/uts/common/vm/vm_page.c
@@ -23,6 +23,7 @@
* Copyright (c) 2015, Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
* Copyright (c) 2015, 2016 by Delphix. All rights reserved.
* Copyright 2018 Joyent, Inc.
+ * Copyright 2021 Oxide Computer Company
*/
/* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
@@ -3919,29 +3920,68 @@ page_pp_unlock(
}
/*
- * This routine reserves availrmem for npages;
- * flags: KM_NOSLEEP or KM_SLEEP
- * returns 1 on success or 0 on failure
+ * This routine reserves availrmem for npages.
+ * It returns 1 on success or 0 on failure.
+ *
+ * flags: KM_NOSLEEP or KM_SLEEP
+ * cb_wait: called to induce delay when KM_SLEEP reservation requires kmem
+ * reaping to potentially succeed. If the callback returns 0, the
+ * reservation attempts will cease to repeat and page_xresv() may
+ * report a failure. If cb_wait is NULL, the traditional delay(hz/2)
+ * behavior will be used while waiting for a reap.
*/
int
-page_resv(pgcnt_t npages, uint_t flags)
+page_xresv(pgcnt_t npages, uint_t flags, int (*cb_wait)(void))
{
mutex_enter(&freemem_lock);
- while (availrmem < tune.t_minarmem + npages) {
- if (flags & KM_NOSLEEP) {
- mutex_exit(&freemem_lock);
- return (0);
- }
+ if (availrmem >= tune.t_minarmem + npages) {
+ availrmem -= npages;
mutex_exit(&freemem_lock);
- page_needfree(npages);
- kmem_reap();
- delay(hz >> 2);
- page_needfree(-(spgcnt_t)npages);
- mutex_enter(&freemem_lock);
+ return (1);
+ } else if ((flags & KM_NOSLEEP) != 0) {
+ mutex_exit(&freemem_lock);
+ return (0);
}
- availrmem -= npages;
mutex_exit(&freemem_lock);
- return (1);
+
+ /*
+ * We signal memory pressure to the system by elevating 'needfree'.
+ * Processes such as kmem reaping, pageout, and ZFS ARC shrinking can
+ * then respond to said pressure by freeing pages.
+ */
+ page_needfree(npages);
+ int nobail = 1;
+ do {
+ kmem_reap();
+ if (cb_wait == NULL) {
+ delay(hz >> 2);
+ } else {
+ nobail = cb_wait();
+ }
+
+ mutex_enter(&freemem_lock);
+ if (availrmem >= tune.t_minarmem + npages) {
+ availrmem -= npages;
+ mutex_exit(&freemem_lock);
+ page_needfree(-(spgcnt_t)npages);
+ return (1);
+ }
+ mutex_exit(&freemem_lock);
+ } while (nobail != 0);
+ page_needfree(-(spgcnt_t)npages);
+
+ return (0);
+}
+
+/*
+ * This routine reserves availrmem for npages;
+ * flags: KM_NOSLEEP or KM_SLEEP
+ * returns 1 on success or 0 on failure
+ */
+int
+page_resv(pgcnt_t npages, uint_t flags)
+{
+ return (page_xresv(npages, flags, NULL));
}
/*