diff options
author | Jason King <jasonbking@users.noreply.github.com> | 2021-03-08 14:57:47 -0600 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-03-08 14:57:47 -0600 |
commit | a6033573eedd94118d2b9e65f45deca0bf4b42f7 (patch) | |
tree | 92b8a87d98df9c991d1713c6fd6282eb1f2ece20 | |
parent | a20b12c5450be0d2fc1b64450605364b6f188003 (diff) | |
download | illumos-joyent-a6033573eedd94118d2b9e65f45deca0bf4b42f7.tar.gz |
OS-8005 bhyve memory pressure needs to target ARC better (#354)
Reviewed by: Dan McDonald <danmcd@joyent.com>
Reviewed by: Mike Zeller <mike.zeller@joyent.com>
Approved by: Brian Bennett <brian.bennett@joyent.com>
-rw-r--r-- | usr/src/cmd/bhyve/bhyverun.c | 7 | ||||
-rw-r--r-- | usr/src/lib/libvmmapi/common/mapfile-vers | 3 | ||||
-rw-r--r-- | usr/src/lib/libvmmapi/common/vmmapi.c | 8 | ||||
-rw-r--r-- | usr/src/lib/libvmmapi/common/vmmapi.h | 3 | ||||
-rw-r--r-- | usr/src/uts/common/fs/zfs/arc.c | 56 | ||||
-rw-r--r-- | usr/src/uts/i86pc/io/vmm/sys/vmm_kernel.h | 4 | ||||
-rw-r--r-- | usr/src/uts/i86pc/io/vmm/vmm.c | 30 | ||||
-rw-r--r-- | usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c | 6 | ||||
-rw-r--r-- | usr/src/uts/i86pc/sys/vmm_dev.h | 3 | ||||
-rw-r--r-- | usr/src/uts/i86pc/vmm/Makefile | 5 |
10 files changed, 116 insertions, 9 deletions
diff --git a/usr/src/cmd/bhyve/bhyverun.c b/usr/src/cmd/bhyve/bhyverun.c index 0cbb2c78eb..53e10bbb55 100644 --- a/usr/src/cmd/bhyve/bhyverun.c +++ b/usr/src/cmd/bhyve/bhyverun.c @@ -1446,6 +1446,13 @@ main(int argc, char *argv[]) #ifdef __FreeBSD__ err = vm_setup_memory(ctx, memsize, VM_MMAP_ALL); #else + err = vm_arc_resv(ctx, memsize); + if (err != 0) { + (void) fprintf(stderr, "Could not shrink ARC: %s\n", + strerror(err)); + exit(4); + } + do { errno = 0; err = vm_setup_memory(ctx, memsize, VM_MMAP_ALL); diff --git a/usr/src/lib/libvmmapi/common/mapfile-vers b/usr/src/lib/libvmmapi/common/mapfile-vers index be0a055490..e6468c36fb 100644 --- a/usr/src/lib/libvmmapi/common/mapfile-vers +++ b/usr/src/lib/libvmmapi/common/mapfile-vers @@ -11,7 +11,7 @@ # # Copyright 2013 Pluribus Networks Inc. -# Copyright 2019 Joyent, Inc. +# Copyright 2020 Joyent, Inc. # Copyright 2020 Oxide Computer Company # @@ -37,6 +37,7 @@ SYMBOL_VERSION ILLUMOSprivate { vm_activate_cpu; vm_active_cpus; vm_apicid2vcpu; + vm_arc_resv; vm_assign_pptdev; vm_capability_name2type; vm_capability_type2name; diff --git a/usr/src/lib/libvmmapi/common/vmmapi.c b/usr/src/lib/libvmmapi/common/vmmapi.c index fcb098a74f..0d084b9cd5 100644 --- a/usr/src/lib/libvmmapi/common/vmmapi.c +++ b/usr/src/lib/libvmmapi/common/vmmapi.c @@ -1909,6 +1909,14 @@ vm_set_run_state(struct vmctx *ctx, int vcpu, enum vcpu_run_state state, return (0); } +int +vm_arc_resv(struct vmctx *ctx, size_t len) +{ + if (ioctl(ctx->fd, VM_ARC_RESV, (uint64_t)len) != 0) { + return (errno); + } + return (0); +} #endif /* __FreeBSD__ */ #ifdef __FreeBSD__ diff --git a/usr/src/lib/libvmmapi/common/vmmapi.h b/usr/src/lib/libvmmapi/common/vmmapi.h index 72e43a4e3d..cad555e498 100644 --- a/usr/src/lib/libvmmapi/common/vmmapi.h +++ b/usr/src/lib/libvmmapi/common/vmmapi.h @@ -38,7 +38,7 @@ * http://www.illumos.org/license/CDDL. * * Copyright 2015 Pluribus Networks Inc. - * Copyright 2019 Joyent, Inc. + * Copyright 2020 Joyent, Inc. * Copyright 2020 Oxide Computer Company */ @@ -310,6 +310,7 @@ int vm_get_run_state(struct vmctx *ctx, int vcpu, enum vcpu_run_state *state, uint8_t *sipi_vector); int vm_set_run_state(struct vmctx *ctx, int vcpu, enum vcpu_run_state state, uint8_t sipi_vector); +int vm_arc_resv(struct vmctx *ctx, size_t); #endif /* __FreeBSD__ */ #ifdef __FreeBSD__ diff --git a/usr/src/uts/common/fs/zfs/arc.c b/usr/src/uts/common/fs/zfs/arc.c index e88213a491..cc5143e4df 100644 --- a/usr/src/uts/common/fs/zfs/arc.c +++ b/usr/src/uts/common/fs/zfs/arc.c @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2019, Joyent, Inc. + * Copyright 2020 Joyent, Inc. * Copyright (c) 2011, 2018 by Delphix. All rights reserved. * Copyright (c) 2014 by Saso Kiselkov. All rights reserved. * Copyright 2017 Nexenta Systems, Inc. All rights reserved. @@ -4399,6 +4399,7 @@ typedef enum free_memory_reason_t { FMR_PAGES_PP_MAXIMUM, FMR_HEAP_ARENA, FMR_ZIO_ARENA, + FMR_VIRT_MACHINE, /* 'VM' seems ambiguous in this context */ } free_memory_reason_t; int64_t last_free_memory; @@ -4414,6 +4415,48 @@ int64_t arc_pages_pp_reserve = 64; */ int64_t arc_swapfs_reserve = 64; +static volatile uint64_t arc_virt_machine_reserved; + +/* + * XXX: A possible concern is that we allow arc_virt_machine_reserved to + * get so large that we cause the arc to perform a lot of additional + * work to keep the arc extremely small. We may want to set limits to + * the size of arc_virt_machine_reserved and disallow reservations + * beyond that limit. + */ +int +arc_virt_machine_reserve(size_t pages) +{ + uint64_t newv; + + newv = atomic_add_64_nv(&arc_virt_machine_reserved, pages); + + /* + * Since arc_virt_machine_reserved effectively lowers arc_c_max + * as needed for vmm memory, if this request would put the arc + * under arc_c_min, we reject it. arc_c_min should be a value that + * ensures reasonable performance for non-VMM stuff, as well as keep + * us from dipping below lotsfree, which could trigger the pager + * (and send the system toa grinding halt while it pages). + * + * XXX: This is a bit hacky and might be better done w/ a mutex + * instead of atomic ops. + */ + if (newv + arc_c_min > arc_c_max) { + atomic_add_64(&arc_virt_machine_reserved, -(int64_t)pages); + return (ENOMEM); + } + + zthr_wakeup(arc_reap_zthr); + return (0); +} + +void +arc_virt_machine_release(size_t pages) +{ + atomic_add_64(&arc_virt_machine_reserved, -(int64_t)pages); +} + /* * Return the amount of memory that can be consumed before reclaim will be * needed. Positive if there is sufficient free memory, negative indicates @@ -4477,6 +4520,17 @@ arc_available_memory(void) r = FMR_PAGES_PP_MAXIMUM; } + /* + * Check that we have enough memory for any virtual machines that + * are running or starting. We add desfree to keep us out of + * particularly dire circumstances. + */ + n = PAGESIZE * (availrmem - arc_virt_machine_reserved - desfree); + if (n < lowest) { + lowest = n; + r = FMR_VIRT_MACHINE; + } + #if defined(__i386) /* * If we're on an i386 platform, it's possible that we'll exhaust the diff --git a/usr/src/uts/i86pc/io/vmm/sys/vmm_kernel.h b/usr/src/uts/i86pc/io/vmm/sys/vmm_kernel.h index 09e9afd8a8..ec1707ea79 100644 --- a/usr/src/uts/i86pc/io/vmm/sys/vmm_kernel.h +++ b/usr/src/uts/i86pc/io/vmm/sys/vmm_kernel.h @@ -203,6 +203,10 @@ int vm_service_mmio_read(struct vm *vm, int cpuid, uint64_t gpa, uint64_t *rval, int vm_service_mmio_write(struct vm *vm, int cpuid, uint64_t gpa, uint64_t wval, int wsize); +#ifndef __FreeBSD__ +int vm_arc_resv(struct vm *vm, size_t); +#endif + #ifdef _SYS__CPUSET_H_ cpuset_t vm_active_cpus(struct vm *vm); cpuset_t vm_debug_cpus(struct vm *vm); diff --git a/usr/src/uts/i86pc/io/vmm/vmm.c b/usr/src/uts/i86pc/io/vmm/vmm.c index 1cd0b23a1c..ad3a9f548f 100644 --- a/usr/src/uts/i86pc/io/vmm/vmm.c +++ b/usr/src/uts/i86pc/io/vmm/vmm.c @@ -38,7 +38,7 @@ * http://www.illumos.org/license/CDDL. * * Copyright 2015 Pluribus Networks Inc. - * Copyright 2018 Joyent, Inc. + * Copyright 2021 Joyent, Inc. * Copyright 2020 Oxide Computer Company */ @@ -196,6 +196,7 @@ struct vm { uint16_t threads; /* (o) num of threads/core */ uint16_t maxcpus; /* (o) max pluggable cpus */ uint64_t boot_tsc_offset; /* (i) TSC offset at VM boot */ + size_t arc_resv; /* # of pages take from ARC */ struct ioport_config ioports; /* (o) ioport handling */ }; @@ -287,6 +288,9 @@ static int vcpu_vector_sipi(struct vm *vm, int vcpuid, uint8_t vector); #ifndef __FreeBSD__ static void vm_clear_memseg(struct vm *, int); +extern int arc_virt_machine_reserve(size_t); +extern void arc_virt_machine_release(size_t); + /* Flags for vtc_status */ #define VTCS_FPU_RESTORED 1 /* guest FPU restored, host FPU saved */ #define VTCS_FPU_CTX_CRITICAL 2 /* in ctx where FPU restore cannot be lazy */ @@ -296,6 +300,7 @@ typedef struct vm_thread_ctx { int vtc_vcpuid; uint_t vtc_status; } vm_thread_ctx_t; + #endif /* __FreeBSD__ */ #ifdef KTR @@ -645,6 +650,12 @@ vm_cleanup(struct vm *vm, bool destroy) VMSPACE_FREE(vm->vmspace); vm->vmspace = NULL; + +#ifndef __FreeBSD__ + arc_virt_machine_release(vm->arc_resv); + vm->arc_resv = 0; +#endif + } #ifndef __FreeBSD__ else { @@ -3775,3 +3786,20 @@ vm_ioport_unhook(struct vm *vm, void **cookie) *cookie = NULL; } + +#ifndef __FreeBSD__ +int +vm_arc_resv(struct vm *vm, uint64_t len) +{ + /* Since we already have the compat macros included, we use those */ + size_t pages = (size_t)roundup2(len, PAGE_SIZE) >> PAGE_SHIFT; + int err = 0; + + err = arc_virt_machine_reserve(pages); + if (err != 0) + return (err); + + vm->arc_resv += pages; + return (0); +} +#endif /* __FreeBSD__ */ diff --git a/usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c b/usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c index 7b04aed61a..9267befd19 100644 --- a/usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c +++ b/usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c @@ -12,7 +12,7 @@ /* * Copyright 2015 Pluribus Networks Inc. - * Copyright 2019 Joyent, Inc. + * Copyright 2020 Joyent, Inc. * Copyright 2020 OmniOS Community Edition (OmniOSce) Association. * Copyright 2020 Oxide Computer Company */ @@ -471,6 +471,7 @@ vmmdev_do_ioctl(vmm_softc_t *sc, int cmd, intptr_t arg, int md, case VM_MMAP_MEMSEG: case VM_WRLOCK_CYCLE: case VM_PMTMR_LOCATE: + case VM_ARC_RESV: vmm_write_lock(sc); lock_type = LOCK_WRITE_HOLD; break; @@ -1396,6 +1397,9 @@ vmmdev_do_ioctl(vmm_softc_t *sc, int cmd, intptr_t arg, int md, */ break; } + case VM_ARC_RESV: + error = vm_arc_resv(sc->vmm_vm, (uint64_t)arg); + break; #endif default: error = ENOTTY; diff --git a/usr/src/uts/i86pc/sys/vmm_dev.h b/usr/src/uts/i86pc/sys/vmm_dev.h index f4a68636b3..3f20f1d5ee 100644 --- a/usr/src/uts/i86pc/sys/vmm_dev.h +++ b/usr/src/uts/i86pc/sys/vmm_dev.h @@ -38,7 +38,7 @@ * http://www.illumos.org/license/CDDL. * * Copyright 2015 Pluribus Networks Inc. - * Copyright 2019 Joyent, Inc. + * Copyright 2020 Joyent, Inc. */ #ifndef _VMM_DEV_H_ @@ -369,6 +369,7 @@ struct vm_run_state { #define VM_RESUME_CPU (VMM_IOC_BASE | 0x1e) #define VM_PPTDEV_DISABLE_MSIX (VMM_IOC_BASE | 0x1f) +#define VM_ARC_RESV (VMM_IOC_BASE | 0xfe) #define VM_DEVMEM_GETOFFSET (VMM_IOC_BASE | 0xff) diff --git a/usr/src/uts/i86pc/vmm/Makefile b/usr/src/uts/i86pc/vmm/Makefile index 0106dd0a0f..2c0843f165 100644 --- a/usr/src/uts/i86pc/vmm/Makefile +++ b/usr/src/uts/i86pc/vmm/Makefile @@ -11,7 +11,7 @@ # # Copyright 2013 Pluribus Networks Inc. -# Copyright 2019 Joyent, Inc. +# Copyright 2020 Joyent, Inc. # # @@ -43,7 +43,6 @@ INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOT_CONFFILE) # Overrides and additions # - # 3rd party code SMOFF += all_func_returns @@ -60,7 +59,7 @@ AS_INC_PATH += -I$(UTSBASE)/i86pc/io/vmm -I$(OBJS_DIR) # enable collection of VMM statistics CFLAGS += -DVMM_KEEP_STATS -LDFLAGS += -N misc/acpica -N misc/pcie -N fs/dev +LDFLAGS += -N misc/acpica -N misc/pcie -N fs/dev -N fs/zfs LDFLAGS += -z type=kmod -M $(MAPFILE) OFFSETS_VMX = $(CONF_SRCDIR)/intel/offsets.in |