summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJason King <jasonbking@users.noreply.github.com>2021-03-08 14:57:47 -0600
committerGitHub <noreply@github.com>2021-03-08 14:57:47 -0600
commita6033573eedd94118d2b9e65f45deca0bf4b42f7 (patch)
tree92b8a87d98df9c991d1713c6fd6282eb1f2ece20
parenta20b12c5450be0d2fc1b64450605364b6f188003 (diff)
downloadillumos-joyent-a6033573eedd94118d2b9e65f45deca0bf4b42f7.tar.gz
OS-8005 bhyve memory pressure needs to target ARC better (#354)
Reviewed by: Dan McDonald <danmcd@joyent.com> Reviewed by: Mike Zeller <mike.zeller@joyent.com> Approved by: Brian Bennett <brian.bennett@joyent.com>
-rw-r--r--usr/src/cmd/bhyve/bhyverun.c7
-rw-r--r--usr/src/lib/libvmmapi/common/mapfile-vers3
-rw-r--r--usr/src/lib/libvmmapi/common/vmmapi.c8
-rw-r--r--usr/src/lib/libvmmapi/common/vmmapi.h3
-rw-r--r--usr/src/uts/common/fs/zfs/arc.c56
-rw-r--r--usr/src/uts/i86pc/io/vmm/sys/vmm_kernel.h4
-rw-r--r--usr/src/uts/i86pc/io/vmm/vmm.c30
-rw-r--r--usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c6
-rw-r--r--usr/src/uts/i86pc/sys/vmm_dev.h3
-rw-r--r--usr/src/uts/i86pc/vmm/Makefile5
10 files changed, 116 insertions, 9 deletions
diff --git a/usr/src/cmd/bhyve/bhyverun.c b/usr/src/cmd/bhyve/bhyverun.c
index 0cbb2c78eb..53e10bbb55 100644
--- a/usr/src/cmd/bhyve/bhyverun.c
+++ b/usr/src/cmd/bhyve/bhyverun.c
@@ -1446,6 +1446,13 @@ main(int argc, char *argv[])
#ifdef __FreeBSD__
err = vm_setup_memory(ctx, memsize, VM_MMAP_ALL);
#else
+ err = vm_arc_resv(ctx, memsize);
+ if (err != 0) {
+ (void) fprintf(stderr, "Could not shrink ARC: %s\n",
+ strerror(err));
+ exit(4);
+ }
+
do {
errno = 0;
err = vm_setup_memory(ctx, memsize, VM_MMAP_ALL);
diff --git a/usr/src/lib/libvmmapi/common/mapfile-vers b/usr/src/lib/libvmmapi/common/mapfile-vers
index be0a055490..e6468c36fb 100644
--- a/usr/src/lib/libvmmapi/common/mapfile-vers
+++ b/usr/src/lib/libvmmapi/common/mapfile-vers
@@ -11,7 +11,7 @@
#
# Copyright 2013 Pluribus Networks Inc.
-# Copyright 2019 Joyent, Inc.
+# Copyright 2020 Joyent, Inc.
# Copyright 2020 Oxide Computer Company
#
@@ -37,6 +37,7 @@ SYMBOL_VERSION ILLUMOSprivate {
vm_activate_cpu;
vm_active_cpus;
vm_apicid2vcpu;
+ vm_arc_resv;
vm_assign_pptdev;
vm_capability_name2type;
vm_capability_type2name;
diff --git a/usr/src/lib/libvmmapi/common/vmmapi.c b/usr/src/lib/libvmmapi/common/vmmapi.c
index fcb098a74f..0d084b9cd5 100644
--- a/usr/src/lib/libvmmapi/common/vmmapi.c
+++ b/usr/src/lib/libvmmapi/common/vmmapi.c
@@ -1909,6 +1909,14 @@ vm_set_run_state(struct vmctx *ctx, int vcpu, enum vcpu_run_state state,
return (0);
}
+int
+vm_arc_resv(struct vmctx *ctx, size_t len)
+{
+ if (ioctl(ctx->fd, VM_ARC_RESV, (uint64_t)len) != 0) {
+ return (errno);
+ }
+ return (0);
+}
#endif /* __FreeBSD__ */
#ifdef __FreeBSD__
diff --git a/usr/src/lib/libvmmapi/common/vmmapi.h b/usr/src/lib/libvmmapi/common/vmmapi.h
index 72e43a4e3d..cad555e498 100644
--- a/usr/src/lib/libvmmapi/common/vmmapi.h
+++ b/usr/src/lib/libvmmapi/common/vmmapi.h
@@ -38,7 +38,7 @@
* http://www.illumos.org/license/CDDL.
*
* Copyright 2015 Pluribus Networks Inc.
- * Copyright 2019 Joyent, Inc.
+ * Copyright 2020 Joyent, Inc.
* Copyright 2020 Oxide Computer Company
*/
@@ -310,6 +310,7 @@ int vm_get_run_state(struct vmctx *ctx, int vcpu, enum vcpu_run_state *state,
uint8_t *sipi_vector);
int vm_set_run_state(struct vmctx *ctx, int vcpu, enum vcpu_run_state state,
uint8_t sipi_vector);
+int vm_arc_resv(struct vmctx *ctx, size_t);
#endif /* __FreeBSD__ */
#ifdef __FreeBSD__
diff --git a/usr/src/uts/common/fs/zfs/arc.c b/usr/src/uts/common/fs/zfs/arc.c
index e88213a491..cc5143e4df 100644
--- a/usr/src/uts/common/fs/zfs/arc.c
+++ b/usr/src/uts/common/fs/zfs/arc.c
@@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2019, Joyent, Inc.
+ * Copyright 2020 Joyent, Inc.
* Copyright (c) 2011, 2018 by Delphix. All rights reserved.
* Copyright (c) 2014 by Saso Kiselkov. All rights reserved.
* Copyright 2017 Nexenta Systems, Inc. All rights reserved.
@@ -4399,6 +4399,7 @@ typedef enum free_memory_reason_t {
FMR_PAGES_PP_MAXIMUM,
FMR_HEAP_ARENA,
FMR_ZIO_ARENA,
+ FMR_VIRT_MACHINE, /* 'VM' seems ambiguous in this context */
} free_memory_reason_t;
int64_t last_free_memory;
@@ -4414,6 +4415,48 @@ int64_t arc_pages_pp_reserve = 64;
*/
int64_t arc_swapfs_reserve = 64;
+static volatile uint64_t arc_virt_machine_reserved;
+
+/*
+ * XXX: A possible concern is that we allow arc_virt_machine_reserved to
+ * get so large that we cause the arc to perform a lot of additional
+ * work to keep the arc extremely small. We may want to set limits to
+ * the size of arc_virt_machine_reserved and disallow reservations
+ * beyond that limit.
+ */
+int
+arc_virt_machine_reserve(size_t pages)
+{
+ uint64_t newv;
+
+ newv = atomic_add_64_nv(&arc_virt_machine_reserved, pages);
+
+ /*
+ * Since arc_virt_machine_reserved effectively lowers arc_c_max
+ * as needed for vmm memory, if this request would put the arc
+ * under arc_c_min, we reject it. arc_c_min should be a value that
+ * ensures reasonable performance for non-VMM stuff, as well as keep
+ * us from dipping below lotsfree, which could trigger the pager
+ * (and send the system toa grinding halt while it pages).
+ *
+ * XXX: This is a bit hacky and might be better done w/ a mutex
+ * instead of atomic ops.
+ */
+ if (newv + arc_c_min > arc_c_max) {
+ atomic_add_64(&arc_virt_machine_reserved, -(int64_t)pages);
+ return (ENOMEM);
+ }
+
+ zthr_wakeup(arc_reap_zthr);
+ return (0);
+}
+
+void
+arc_virt_machine_release(size_t pages)
+{
+ atomic_add_64(&arc_virt_machine_reserved, -(int64_t)pages);
+}
+
/*
* Return the amount of memory that can be consumed before reclaim will be
* needed. Positive if there is sufficient free memory, negative indicates
@@ -4477,6 +4520,17 @@ arc_available_memory(void)
r = FMR_PAGES_PP_MAXIMUM;
}
+ /*
+ * Check that we have enough memory for any virtual machines that
+ * are running or starting. We add desfree to keep us out of
+ * particularly dire circumstances.
+ */
+ n = PAGESIZE * (availrmem - arc_virt_machine_reserved - desfree);
+ if (n < lowest) {
+ lowest = n;
+ r = FMR_VIRT_MACHINE;
+ }
+
#if defined(__i386)
/*
* If we're on an i386 platform, it's possible that we'll exhaust the
diff --git a/usr/src/uts/i86pc/io/vmm/sys/vmm_kernel.h b/usr/src/uts/i86pc/io/vmm/sys/vmm_kernel.h
index 09e9afd8a8..ec1707ea79 100644
--- a/usr/src/uts/i86pc/io/vmm/sys/vmm_kernel.h
+++ b/usr/src/uts/i86pc/io/vmm/sys/vmm_kernel.h
@@ -203,6 +203,10 @@ int vm_service_mmio_read(struct vm *vm, int cpuid, uint64_t gpa, uint64_t *rval,
int vm_service_mmio_write(struct vm *vm, int cpuid, uint64_t gpa, uint64_t wval,
int wsize);
+#ifndef __FreeBSD__
+int vm_arc_resv(struct vm *vm, size_t);
+#endif
+
#ifdef _SYS__CPUSET_H_
cpuset_t vm_active_cpus(struct vm *vm);
cpuset_t vm_debug_cpus(struct vm *vm);
diff --git a/usr/src/uts/i86pc/io/vmm/vmm.c b/usr/src/uts/i86pc/io/vmm/vmm.c
index 1cd0b23a1c..ad3a9f548f 100644
--- a/usr/src/uts/i86pc/io/vmm/vmm.c
+++ b/usr/src/uts/i86pc/io/vmm/vmm.c
@@ -38,7 +38,7 @@
* http://www.illumos.org/license/CDDL.
*
* Copyright 2015 Pluribus Networks Inc.
- * Copyright 2018 Joyent, Inc.
+ * Copyright 2021 Joyent, Inc.
* Copyright 2020 Oxide Computer Company
*/
@@ -196,6 +196,7 @@ struct vm {
uint16_t threads; /* (o) num of threads/core */
uint16_t maxcpus; /* (o) max pluggable cpus */
uint64_t boot_tsc_offset; /* (i) TSC offset at VM boot */
+ size_t arc_resv; /* # of pages take from ARC */
struct ioport_config ioports; /* (o) ioport handling */
};
@@ -287,6 +288,9 @@ static int vcpu_vector_sipi(struct vm *vm, int vcpuid, uint8_t vector);
#ifndef __FreeBSD__
static void vm_clear_memseg(struct vm *, int);
+extern int arc_virt_machine_reserve(size_t);
+extern void arc_virt_machine_release(size_t);
+
/* Flags for vtc_status */
#define VTCS_FPU_RESTORED 1 /* guest FPU restored, host FPU saved */
#define VTCS_FPU_CTX_CRITICAL 2 /* in ctx where FPU restore cannot be lazy */
@@ -296,6 +300,7 @@ typedef struct vm_thread_ctx {
int vtc_vcpuid;
uint_t vtc_status;
} vm_thread_ctx_t;
+
#endif /* __FreeBSD__ */
#ifdef KTR
@@ -645,6 +650,12 @@ vm_cleanup(struct vm *vm, bool destroy)
VMSPACE_FREE(vm->vmspace);
vm->vmspace = NULL;
+
+#ifndef __FreeBSD__
+ arc_virt_machine_release(vm->arc_resv);
+ vm->arc_resv = 0;
+#endif
+
}
#ifndef __FreeBSD__
else {
@@ -3775,3 +3786,20 @@ vm_ioport_unhook(struct vm *vm, void **cookie)
*cookie = NULL;
}
+
+#ifndef __FreeBSD__
+int
+vm_arc_resv(struct vm *vm, uint64_t len)
+{
+ /* Since we already have the compat macros included, we use those */
+ size_t pages = (size_t)roundup2(len, PAGE_SIZE) >> PAGE_SHIFT;
+ int err = 0;
+
+ err = arc_virt_machine_reserve(pages);
+ if (err != 0)
+ return (err);
+
+ vm->arc_resv += pages;
+ return (0);
+}
+#endif /* __FreeBSD__ */
diff --git a/usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c b/usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c
index 7b04aed61a..9267befd19 100644
--- a/usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c
+++ b/usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c
@@ -12,7 +12,7 @@
/*
* Copyright 2015 Pluribus Networks Inc.
- * Copyright 2019 Joyent, Inc.
+ * Copyright 2020 Joyent, Inc.
* Copyright 2020 OmniOS Community Edition (OmniOSce) Association.
* Copyright 2020 Oxide Computer Company
*/
@@ -471,6 +471,7 @@ vmmdev_do_ioctl(vmm_softc_t *sc, int cmd, intptr_t arg, int md,
case VM_MMAP_MEMSEG:
case VM_WRLOCK_CYCLE:
case VM_PMTMR_LOCATE:
+ case VM_ARC_RESV:
vmm_write_lock(sc);
lock_type = LOCK_WRITE_HOLD;
break;
@@ -1396,6 +1397,9 @@ vmmdev_do_ioctl(vmm_softc_t *sc, int cmd, intptr_t arg, int md,
*/
break;
}
+ case VM_ARC_RESV:
+ error = vm_arc_resv(sc->vmm_vm, (uint64_t)arg);
+ break;
#endif
default:
error = ENOTTY;
diff --git a/usr/src/uts/i86pc/sys/vmm_dev.h b/usr/src/uts/i86pc/sys/vmm_dev.h
index f4a68636b3..3f20f1d5ee 100644
--- a/usr/src/uts/i86pc/sys/vmm_dev.h
+++ b/usr/src/uts/i86pc/sys/vmm_dev.h
@@ -38,7 +38,7 @@
* http://www.illumos.org/license/CDDL.
*
* Copyright 2015 Pluribus Networks Inc.
- * Copyright 2019 Joyent, Inc.
+ * Copyright 2020 Joyent, Inc.
*/
#ifndef _VMM_DEV_H_
@@ -369,6 +369,7 @@ struct vm_run_state {
#define VM_RESUME_CPU (VMM_IOC_BASE | 0x1e)
#define VM_PPTDEV_DISABLE_MSIX (VMM_IOC_BASE | 0x1f)
+#define VM_ARC_RESV (VMM_IOC_BASE | 0xfe)
#define VM_DEVMEM_GETOFFSET (VMM_IOC_BASE | 0xff)
diff --git a/usr/src/uts/i86pc/vmm/Makefile b/usr/src/uts/i86pc/vmm/Makefile
index 0106dd0a0f..2c0843f165 100644
--- a/usr/src/uts/i86pc/vmm/Makefile
+++ b/usr/src/uts/i86pc/vmm/Makefile
@@ -11,7 +11,7 @@
#
# Copyright 2013 Pluribus Networks Inc.
-# Copyright 2019 Joyent, Inc.
+# Copyright 2020 Joyent, Inc.
#
#
@@ -43,7 +43,6 @@ INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOT_CONFFILE)
# Overrides and additions
#
-
# 3rd party code
SMOFF += all_func_returns
@@ -60,7 +59,7 @@ AS_INC_PATH += -I$(UTSBASE)/i86pc/io/vmm -I$(OBJS_DIR)
# enable collection of VMM statistics
CFLAGS += -DVMM_KEEP_STATS
-LDFLAGS += -N misc/acpica -N misc/pcie -N fs/dev
+LDFLAGS += -N misc/acpica -N misc/pcie -N fs/dev -N fs/zfs
LDFLAGS += -z type=kmod -M $(MAPFILE)
OFFSETS_VMX = $(CONF_SRCDIR)/intel/offsets.in