diff options
| author | Patrick Mooney <pmooney@pfmooney.com> | 2021-05-28 21:07:11 +0000 |
|---|---|---|
| committer | Patrick Mooney <pmooney@oxide.computer> | 2021-07-27 19:26:22 +0000 |
| commit | b57f5d3e6a2df8d435e606797cf3934811848343 (patch) | |
| tree | 31d0b366057848a88837b15524905a703c3bdf9c /usr/src/uts/common/vm | |
| parent | ed1e93792d7c9ea04a0cb44cffe34c24c135b002 (diff) | |
| download | illumos-joyent-b57f5d3e6a2df8d435e606797cf3934811848343.tar.gz | |
13833 want bhyve memory reservoir
13822 bhyve memory should exert memory pressure
13834 want extensible page_resv
13821 vmmctl ioctls should have more structure
Reviewed by: Andy Fiddaman <andy@omnios.org>
Reviewed by: Jason King <jason.brian.king@gmail.com>
Reviewed by: Dan Cross <cross@oxidecomputer.com>
Reviewed by: Hans Rosenfeld <rosenfeld@grumpf.hope-2000.org>
Reviewed by: Mike Zeller <mike.zeller@joyent.com>
Approved by: Dan McDonald <danmcd@joyent.com>
Diffstat (limited to 'usr/src/uts/common/vm')
| -rw-r--r-- | usr/src/uts/common/vm/page.h | 12 | ||||
| -rw-r--r-- | usr/src/uts/common/vm/vm_page.c | 72 |
2 files changed, 63 insertions, 21 deletions
diff --git a/usr/src/uts/common/vm/page.h b/usr/src/uts/common/vm/page.h index 8747b96acc..5b98acd24f 100644 --- a/usr/src/uts/common/vm/page.h +++ b/usr/src/uts/common/vm/page.h @@ -20,6 +20,7 @@ */ /* * Copyright (c) 1986, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright 2021 Oxide Computer Company */ /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ @@ -340,13 +341,13 @@ struct as; * * So, as a quick summary: * - * pse_mutex[]'s protect the p_selock and p_cv fields. + * pse_mutex[]'s protect the p_selock and p_cv fields. * - * p_selock protects the p_free, p_age, p_vnode, p_offset and p_hash, + * p_selock protects the p_free, p_age, p_vnode, p_offset and p_hash, * - * ph_mutex[]'s protect the page_hash[] array and its chains. + * ph_mutex[]'s protect the page_hash[] array and its chains. * - * vph_mutex[]'s protect the v_pages field and the vp page chains. + * vph_mutex[]'s protect the v_pages field and the vp page chains. * * First lock the page, then the hash chain, then the vnode chain. When * this is not possible `trylocks' must be used. Sleeping while holding @@ -762,6 +763,7 @@ void page_lock_delete(page_t *); int page_deleted(page_t *); int page_pp_lock(page_t *, int, int); void page_pp_unlock(page_t *, int, int); +int page_xresv(pgcnt_t, uint_t, int (*)(void)); int page_resv(pgcnt_t, uint_t); void page_unresv(pgcnt_t); void page_pp_useclaim(page_t *, page_t *, uint_t); @@ -1078,7 +1080,7 @@ typedef struct kpm_hlk { * The state about how a kpm page is mapped and whether it is ready to go * is indicated by the following 1 byte kpm_spage structure. This byte is * split into two 4-bit parts - kp_mapped and kp_mapped_go. - * - kp_mapped == 1 the page is mapped cacheable + * - kp_mapped == 1 the page is mapped cacheable * - kp_mapped == 2 the page is mapped non-cacheable * - kp_mapped_go == 1 the mapping is ready to be dropped in * - kp_mapped_go == 0 the mapping is not ready to be dropped in. diff --git a/usr/src/uts/common/vm/vm_page.c b/usr/src/uts/common/vm/vm_page.c index bcc6d05d47..89751b7b2c 100644 --- a/usr/src/uts/common/vm/vm_page.c +++ b/usr/src/uts/common/vm/vm_page.c @@ -23,6 +23,7 @@ * Copyright (c) 2015, Josef 'Jeff' Sipek <jeffpc@josefsipek.net> * Copyright (c) 2015, 2016 by Delphix. All rights reserved. * Copyright 2018 Joyent, Inc. + * Copyright 2021 Oxide Computer Company */ /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ @@ -3919,29 +3920,68 @@ page_pp_unlock( } /* - * This routine reserves availrmem for npages; - * flags: KM_NOSLEEP or KM_SLEEP - * returns 1 on success or 0 on failure + * This routine reserves availrmem for npages. + * It returns 1 on success or 0 on failure. + * + * flags: KM_NOSLEEP or KM_SLEEP + * cb_wait: called to induce delay when KM_SLEEP reservation requires kmem + * reaping to potentially succeed. If the callback returns 0, the + * reservation attempts will cease to repeat and page_xresv() may + * report a failure. If cb_wait is NULL, the traditional delay(hz/2) + * behavior will be used while waiting for a reap. */ int -page_resv(pgcnt_t npages, uint_t flags) +page_xresv(pgcnt_t npages, uint_t flags, int (*cb_wait)(void)) { mutex_enter(&freemem_lock); - while (availrmem < tune.t_minarmem + npages) { - if (flags & KM_NOSLEEP) { - mutex_exit(&freemem_lock); - return (0); - } + if (availrmem >= tune.t_minarmem + npages) { + availrmem -= npages; mutex_exit(&freemem_lock); - page_needfree(npages); - kmem_reap(); - delay(hz >> 2); - page_needfree(-(spgcnt_t)npages); - mutex_enter(&freemem_lock); + return (1); + } else if ((flags & KM_NOSLEEP) != 0) { + mutex_exit(&freemem_lock); + return (0); } - availrmem -= npages; mutex_exit(&freemem_lock); - return (1); + + /* + * We signal memory pressure to the system by elevating 'needfree'. + * Processes such as kmem reaping, pageout, and ZFS ARC shrinking can + * then respond to said pressure by freeing pages. + */ + page_needfree(npages); + int nobail = 1; + do { + kmem_reap(); + if (cb_wait == NULL) { + delay(hz >> 2); + } else { + nobail = cb_wait(); + } + + mutex_enter(&freemem_lock); + if (availrmem >= tune.t_minarmem + npages) { + availrmem -= npages; + mutex_exit(&freemem_lock); + page_needfree(-(spgcnt_t)npages); + return (1); + } + mutex_exit(&freemem_lock); + } while (nobail != 0); + page_needfree(-(spgcnt_t)npages); + + return (0); +} + +/* + * This routine reserves availrmem for npages; + * flags: KM_NOSLEEP or KM_SLEEP + * returns 1 on success or 0 on failure + */ +int +page_resv(pgcnt_t npages, uint_t flags) +{ + return (page_xresv(npages, flags, NULL)); } /* |
