diff options
author | Luqman Aden <luqman@oxide.computer> | 2022-11-11 10:40:00 -0800 |
---|---|---|
committer | Patrick Mooney <pmooney@oxide.computer> | 2022-11-15 16:19:45 +0000 |
commit | 4ac713da4ff2c45287699af975f8c98142bbd9d3 (patch) | |
tree | 1afaea7fcf37dcce5b2a8e9f17ce5d255b54c4f9 /usr | |
parent | cd434274bf9d8b5d054dee3415d2f1c28c33dd8f (diff) | |
download | illumos-gate-4ac713da4ff2c45287699af975f8c98142bbd9d3.tar.gz |
14251 operationalize bhyve dirty page tracking
Reviewed by: Patrick Mooney <pmooney@pfmooney.com>
Approved by: Dan McDonald <danmcd@mnx.io>
Diffstat (limited to 'usr')
19 files changed, 152 insertions, 38 deletions
diff --git a/usr/src/test/bhyve-tests/runfiles/default.run b/usr/src/test/bhyve-tests/runfiles/default.run index f424652655..93101811a5 100644 --- a/usr/src/test/bhyve-tests/runfiles/default.run +++ b/usr/src/test/bhyve-tests/runfiles/default.run @@ -57,7 +57,8 @@ tests = [ 'rdmsr', 'wrmsr', 'triple_fault', - 'exit_paging' + 'exit_paging', + 'page_dirty' ] [/opt/bhyve-tests/tests/viona] diff --git a/usr/src/test/bhyve-tests/tests/vmm/common.c b/usr/src/test/bhyve-tests/tests/common/common.c index ca34dc8cb2..ca34dc8cb2 100644 --- a/usr/src/test/bhyve-tests/tests/vmm/common.c +++ b/usr/src/test/bhyve-tests/tests/common/common.c diff --git a/usr/src/test/bhyve-tests/tests/vmm/common.h b/usr/src/test/bhyve-tests/tests/common/common.h index a1147395ef..a1147395ef 100644 --- a/usr/src/test/bhyve-tests/tests/vmm/common.h +++ b/usr/src/test/bhyve-tests/tests/common/common.h diff --git a/usr/src/test/bhyve-tests/tests/common/in_guest.c b/usr/src/test/bhyve-tests/tests/common/in_guest.c index 7d27cf194d..42df592bc8 100644 --- a/usr/src/test/bhyve-tests/tests/common/in_guest.c +++ b/usr/src/test/bhyve-tests/tests/common/in_guest.c @@ -123,7 +123,7 @@ populate_desc_tables(struct vmctx *ctx) } -static void +void test_cleanup(bool is_failure) { if (test_vmctx != NULL) { @@ -144,6 +144,7 @@ test_cleanup(bool is_failure) if (!is_failure || !keep_on_fail) { vm_destroy(test_vmctx); } + test_name = NULL; test_vmctx = NULL; } } @@ -309,6 +310,12 @@ load_payload(struct vmctx *ctx) struct vmctx * test_initialize(const char *tname) { + return (test_initialize_flags(tname, 0)); +} + +struct vmctx * +test_initialize_flags(const char *tname, uint64_t create_flags) +{ char vm_name[VM_MAX_NAMELEN]; int err; struct vmctx *ctx; @@ -320,7 +327,7 @@ test_initialize(const char *tname) (void) snprintf(vm_name, sizeof (vm_name), "bhyve-test-%s-%d", test_name, getpid()); - err = vm_create(vm_name, 0); + err = vm_create(vm_name, create_flags); if (err != 0) { test_fail_errno(err, "Could not create VM"); } diff --git a/usr/src/test/bhyve-tests/tests/common/in_guest.h b/usr/src/test/bhyve-tests/tests/common/in_guest.h index 8d6e04a6da..fdacd6f540 100644 --- a/usr/src/test/bhyve-tests/tests/common/in_guest.h +++ b/usr/src/test/bhyve-tests/tests/common/in_guest.h @@ -19,6 +19,8 @@ #include "payload_common.h" struct vmctx *test_initialize(const char *); +struct vmctx *test_initialize_flags(const char *, uint64_t); +void test_cleanup(bool); void test_fail_errno(int err, const char *msg); void test_fail_msg(const char *fmt, ...); void test_fail_vmexit(const struct vm_exit *vexit); diff --git a/usr/src/test/bhyve-tests/tests/inst_emul/Makefile b/usr/src/test/bhyve-tests/tests/inst_emul/Makefile index f01a9c3bee..88f4d17119 100644 --- a/usr/src/test/bhyve-tests/tests/inst_emul/Makefile +++ b/usr/src/test/bhyve-tests/tests/inst_emul/Makefile @@ -32,7 +32,7 @@ CPAYLOADS = cpuid PAYLOADS = $(PROG) include ../Makefile.in_guest -COMMON_OBJS = in_guest.o +COMMON_OBJS = in_guest.o common.o CLEANFILES = $(COMMON_OBJS) $(PAYLOAD_CLEANFILES) payload_utils.o CLOBBERFILES = $(PROG) diff --git a/usr/src/test/bhyve-tests/tests/inst_emul/page_dirty.c b/usr/src/test/bhyve-tests/tests/inst_emul/page_dirty.c index 2e3a06bf47..36b9721701 100644 --- a/usr/src/test/bhyve-tests/tests/inst_emul/page_dirty.c +++ b/usr/src/test/bhyve-tests/tests/inst_emul/page_dirty.c @@ -29,6 +29,7 @@ #include <sys/vmm_dev.h> #include <vmmapi.h> +#include "common.h" #include "in_guest.h" #define PAGE_SZ 4096 @@ -70,6 +71,73 @@ count_dirty_pages(const uint8_t *bitmap) return (count); } +void +check_supported(const char *test_suite_name) +{ + char name[VM_MAX_NAMELEN]; + int err; + + name_test_vm(test_suite_name, name); + + err = vm_create(name, VCF_TRACK_DIRTY); + if (err == 0) { + /* + * We created the VM successfully, so we know that dirty page + * tracking is supported. + */ + err = destroy_instance(test_suite_name); + if (err != 0) { + (void) fprintf(stderr, + "Could not destroy VM: %s\n", strerror(errno)); + (void) printf("FAIL %s\n", test_suite_name); + exit(EXIT_FAILURE); + } + } else if (errno == ENOTSUP) { + (void) printf( + "Skipping test: dirty page tracking not supported\n"); + (void) printf("PASS %s\n", test_suite_name); + exit(EXIT_SUCCESS); + } else { + /* + * Ignore any other errors, they'll be caught by subsequent + * test routines. + */ + } +} + +void +test_dirty_tracking_disabled(const char *test_suite_name) +{ + struct vmctx *ctx = NULL; + int err; + + uint8_t dirty_bitmap[DIRTY_BITMAP_SZ] = { 0 }; + struct vmm_dirty_tracker track = { + .vdt_start_gpa = 0, + .vdt_len = MEM_TOTAL_SZ, + .vdt_pfns = (void *)dirty_bitmap, + }; + + /* Create VM without VCF_TRACK_DIRTY flag */ + ctx = test_initialize_flags(test_suite_name, 0); + + err = test_setup_vcpu(ctx, 0, MEM_LOC_PAYLOAD, MEM_LOC_STACK); + if (err != 0) { + test_fail_errno(err, "Could not initialize vcpu0"); + } + + /* Try to query for dirty pages */ + err = ioctl(vm_get_device_fd(ctx), VM_TRACK_DIRTY_PAGES, &track); + if (err == 0) { + test_fail_msg("VM_TRACK_DIRTY_PAGES succeeded unexpectedly\n"); + } else if (errno != EPERM) { + test_fail_errno(errno, + "VM_TRACK_DIRTY_PAGES failed with unexpected error"); + } + + test_cleanup(false); +} + int main(int argc, char *argv[]) { @@ -77,12 +145,13 @@ main(int argc, char *argv[]) struct vmctx *ctx = NULL; int err; - ctx = test_initialize(test_suite_name); + /* Skip test if CPU doesn't support HW A/D tracking */ + check_supported(test_suite_name); + + /* Test for expected error with dirty tracking disabled */ + test_dirty_tracking_disabled(test_suite_name); - /* Until #14251 is fixed, warn the user of the test requirement */ - (void) fprintf(stderr, - "Ensure that 'gpt_track_dirty' is set to 1 via mdb -kw\n" - "The reasoning is described in illumos #14251\n"); + ctx = test_initialize_flags(test_suite_name, VCF_TRACK_DIRTY); err = test_setup_vcpu(ctx, 0, MEM_LOC_PAYLOAD, MEM_LOC_STACK); if (err != 0) { diff --git a/usr/src/test/bhyve-tests/tests/vmm/Makefile b/usr/src/test/bhyve-tests/tests/vmm/Makefile index 8f7f2d9624..e557aa2407 100644 --- a/usr/src/test/bhyve-tests/tests/vmm/Makefile +++ b/usr/src/test/bhyve-tests/tests/vmm/Makefile @@ -44,7 +44,8 @@ CPPFLAGS = -I$(COMPAT)/bhyve -I$(CONTRIB)/bhyve \ -I$(COMPAT)/bhyve/amd64 -I$(CONTRIB)/bhyve/amd64 \ $(CPPFLAGS.master) \ -I$(SRC)/uts/intel/io/vmm \ - -I$(SRC)/uts/intel + -I$(SRC)/uts/intel \ + -I../common $(PROG) := LDLIBS += -lvmmapi all: $(PROG) @@ -66,6 +67,10 @@ $(TESTDIR): $(TESTDIR)/%: % $(INS.file) +%.o: ../common/%.c + $(COMPILE.c) -o $@ $^ + $(POST_PROCESS) + %: %.o $(LINK.c) -o $@ $< $(COMMON_OBJS) $(LDLIBS) $(POST_PROCESS) diff --git a/usr/src/uts/intel/io/vmm/sys/vmm_gpt.h b/usr/src/uts/intel/io/vmm/sys/vmm_gpt.h index a425fb53ec..2692c6bec8 100644 --- a/usr/src/uts/intel/io/vmm/sys/vmm_gpt.h +++ b/usr/src/uts/intel/io/vmm/sys/vmm_gpt.h @@ -11,7 +11,7 @@ /* * Copyright 2019 Joyent, Inc. - * Copyright 2021 Oxide Computer Company + * Copyright 2022 Oxide Computer Company */ #ifndef _VMM_GPT_H @@ -52,6 +52,7 @@ enum vmm_gpt_node_level { * cleared. Returns non-zero if the previous value of the bit was set. * vpeo_get_pmtp: Generate a properly formatted PML4 (EPTP/nCR3), given the root * PFN for the GPT. + * vpeo_hw_ad_supported: Returns true IFF hardware A/D tracking is supported. */ typedef struct vmm_pte_ops vmm_pte_ops_t; struct vmm_pte_ops { @@ -62,7 +63,8 @@ struct vmm_pte_ops { uint_t (*vpeo_pte_prot)(uint64_t); uint_t (*vpeo_reset_dirty)(uint64_t *, bool); uint_t (*vpeo_reset_accessed)(uint64_t *, bool); - uint64_t (*vpeo_get_pmtp)(pfn_t); + uint64_t (*vpeo_get_pmtp)(pfn_t, bool); + bool (*vpeo_hw_ad_supported)(void); }; extern vmm_pte_ops_t ept_pte_ops; @@ -82,7 +84,7 @@ void vmm_gpt_vacate_region(vmm_gpt_t *, uint64_t, uint64_t); bool vmm_gpt_map(vmm_gpt_t *, uint64_t, pfn_t, uint_t, uint8_t); bool vmm_gpt_unmap(vmm_gpt_t *, uint64_t); size_t vmm_gpt_unmap_region(vmm_gpt_t *, uint64_t, uint64_t); -uint64_t vmm_gpt_get_pmtp(vmm_gpt_t *); +uint64_t vmm_gpt_get_pmtp(vmm_gpt_t *, bool); bool vmm_gpt_is_mapped(vmm_gpt_t *, uint64_t *, pfn_t *, uint_t *); uint_t vmm_gpt_reset_accessed(vmm_gpt_t *, uint64_t *, bool); diff --git a/usr/src/uts/intel/io/vmm/sys/vmm_kernel.h b/usr/src/uts/intel/io/vmm/sys/vmm_kernel.h index 1ef2d48adf..8af6df5c66 100644 --- a/usr/src/uts/intel/io/vmm/sys/vmm_kernel.h +++ b/usr/src/uts/intel/io/vmm/sys/vmm_kernel.h @@ -138,7 +138,7 @@ bool vm_is_paused(struct vm *); /* * APIs that race against hardware. */ -void vm_track_dirty_pages(struct vm *, uint64_t, size_t, uint8_t *); +int vm_track_dirty_pages(struct vm *, uint64_t, size_t, uint8_t *); /* * APIs that modify the guest memory map require all vcpus to be frozen. diff --git a/usr/src/uts/intel/io/vmm/sys/vmm_vm.h b/usr/src/uts/intel/io/vmm/sys/vmm_vm.h index 57d0ec8b00..6edba02bc5 100644 --- a/usr/src/uts/intel/io/vmm/sys/vmm_vm.h +++ b/usr/src/uts/intel/io/vmm/sys/vmm_vm.h @@ -12,7 +12,7 @@ /* * Copyright 2019 Joyent, Inc. - * Copyright 2021 Oxide Computer Company + * Copyright 2022 Oxide Computer Company */ #ifndef _VMM_VM_H @@ -40,7 +40,7 @@ vm_client_t *vmspace_client_alloc(vmspace_t *); uint64_t vmspace_table_root(vmspace_t *); uint64_t vmspace_table_gen(vmspace_t *); uint64_t vmspace_resident_count(vmspace_t *); -void vmspace_track_dirty(vmspace_t *, uint64_t, size_t, uint8_t *); +int vmspace_track_dirty(vmspace_t *, uint64_t, size_t, uint8_t *); /* vm_client_t operations */ vm_page_t *vmc_hold(vm_client_t *, uintptr_t, int); diff --git a/usr/src/uts/intel/io/vmm/vmm.c b/usr/src/uts/intel/io/vmm/vmm.c index 136c38c5ab..11f09c2102 100644 --- a/usr/src/uts/intel/io/vmm/vmm.c +++ b/usr/src/uts/intel/io/vmm/vmm.c @@ -551,12 +551,6 @@ vm_init(struct vm *vm, bool create) uint_t cores_per_package = 1; uint_t threads_per_core = 1; -/* - * Debugging tunable to enable dirty-page-tracking. - * (Remains off by default for now) - */ -bool gpt_track_dirty = false; - int vm_create(uint64_t flags, struct vm **retvm) { @@ -570,7 +564,11 @@ vm_create(uint64_t flags, struct vm **retvm) if (!vmm_initialized) return (ENXIO); - vmspace = vmspace_alloc(VM_MAXUSER_ADDRESS, pte_ops, gpt_track_dirty); + bool track_dirty = (flags & VCF_TRACK_DIRTY) != 0; + if (track_dirty && !pte_ops->vpeo_hw_ad_supported()) + return (ENOTSUP); + + vmspace = vmspace_alloc(VM_MAXUSER_ADDRESS, pte_ops, track_dirty); if (vmspace == NULL) return (ENOMEM); @@ -1357,11 +1355,11 @@ vm_set_run_state(struct vm *vm, int vcpuid, uint32_t state, uint8_t sipi_vec) return (0); } -void +int vm_track_dirty_pages(struct vm *vm, uint64_t gpa, size_t len, uint8_t *bitmap) { vmspace_t *vms = vm_get_vmspace(vm); - vmspace_track_dirty(vms, gpa, len, bitmap); + return (vmspace_track_dirty(vms, gpa, len, bitmap)); } static void diff --git a/usr/src/uts/intel/io/vmm/vmm_gpt.c b/usr/src/uts/intel/io/vmm/vmm_gpt.c index a542dba055..a464f8e964 100644 --- a/usr/src/uts/intel/io/vmm/vmm_gpt.c +++ b/usr/src/uts/intel/io/vmm/vmm_gpt.c @@ -579,7 +579,8 @@ vmm_gpt_reset_dirty(vmm_gpt_t *gpt, uint64_t *entry, bool on) * Get properly formatted PML4 (EPTP/nCR3) for GPT. */ uint64_t -vmm_gpt_get_pmtp(vmm_gpt_t *gpt) +vmm_gpt_get_pmtp(vmm_gpt_t *gpt, bool track_dirty) { - return (gpt->vgpt_pte_ops->vpeo_get_pmtp(gpt->vgpt_root->vgn_host_pfn)); + const pfn_t root_pfn = gpt->vgpt_root->vgn_host_pfn; + return (gpt->vgpt_pte_ops->vpeo_get_pmtp(root_pfn, track_dirty)); } diff --git a/usr/src/uts/intel/io/vmm/vmm_sol_dev.c b/usr/src/uts/intel/io/vmm/vmm_sol_dev.c index c66f1ce17a..d89bca5ce0 100644 --- a/usr/src/uts/intel/io/vmm/vmm_sol_dev.c +++ b/usr/src/uts/intel/io/vmm/vmm_sol_dev.c @@ -1703,9 +1703,10 @@ vmmdev_do_ioctl(vmm_softc_t *sc, int cmd, intptr_t arg, int md, } len = roundup(tracker.vdt_len / PAGESIZE, 8) / 8; bitmap = kmem_zalloc(len, KM_SLEEP); - vm_track_dirty_pages(sc->vmm_vm, tracker.vdt_start_gpa, + error = vm_track_dirty_pages(sc->vmm_vm, tracker.vdt_start_gpa, tracker.vdt_len, bitmap); - if (ddi_copyout(bitmap, tracker.vdt_pfns, len, md) != 0) { + if (error == 0 && + ddi_copyout(bitmap, tracker.vdt_pfns, len, md) != 0) { error = EFAULT; } kmem_free(bitmap, len); diff --git a/usr/src/uts/intel/io/vmm/vmm_sol_ept.c b/usr/src/uts/intel/io/vmm/vmm_sol_ept.c index fde4a030ce..c34ec4684e 100644 --- a/usr/src/uts/intel/io/vmm/vmm_sol_ept.c +++ b/usr/src/uts/intel/io/vmm/vmm_sol_ept.c @@ -12,7 +12,7 @@ /* * Copyright 2019 Joyent, Inc. - * Copyright 2021 Oxide Computer Company + * Copyright 2022 Oxide Computer Company */ #include <sys/types.h> @@ -40,6 +40,8 @@ #define EPT_MAX_LEVELS 4 CTASSERT(EPT_MAX_LEVELS <= MAX_GPT_LEVEL); +#define EPTP_FLAG_ACCESSED_DIRTY (1 << 6) + CTASSERT(EPT_R == PROT_READ); CTASSERT(EPT_W == PROT_WRITE); CTASSERT(EPT_X == PROT_EXEC); @@ -120,13 +122,20 @@ ept_reset_accessed(uint64_t *entry, bool on) } static uint64_t -ept_get_pmtp(pfn_t root_pfn) +ept_get_pmtp(pfn_t root_pfn, bool track_dirty) { - /* TODO: enable AD tracking when required */ - return ((root_pfn << PAGESHIFT | + const uint64_t ad_flag = track_dirty ? EPTP_FLAG_ACCESSED_DIRTY : 0; + return ((root_pfn << PAGESHIFT | ad_flag | (EPT_MAX_LEVELS - 1) << 3 | MTRR_TYPE_WB)); } +static bool +ept_hw_ad_supported(void) +{ + uint64_t ept_caps = rdmsr(MSR_IA32_VMX_EPT_VPID_CAP); + return ((ept_caps & IA32_VMX_EPT_VPID_HW_AD) != 0); +} + vmm_pte_ops_t ept_pte_ops = { .vpeo_map_table = ept_map_table, .vpeo_map_page = ept_map_page, @@ -136,4 +145,5 @@ vmm_pte_ops_t ept_pte_ops = { .vpeo_reset_dirty = ept_reset_dirty, .vpeo_reset_accessed = ept_reset_accessed, .vpeo_get_pmtp = ept_get_pmtp, + .vpeo_hw_ad_supported = ept_hw_ad_supported, }; diff --git a/usr/src/uts/intel/io/vmm/vmm_sol_rvi.c b/usr/src/uts/intel/io/vmm/vmm_sol_rvi.c index 8b45782d25..fb7afd069b 100644 --- a/usr/src/uts/intel/io/vmm/vmm_sol_rvi.c +++ b/usr/src/uts/intel/io/vmm/vmm_sol_rvi.c @@ -12,7 +12,7 @@ /* * Copyright 2019 Joyent, Inc. - * Copyright 2021 Oxide Computer Company + * Copyright 2022 Oxide Computer Company */ #include <sys/types.h> @@ -140,11 +140,18 @@ rvi_reset_accessed(uint64_t *entry, bool on) } static uint64_t -rvi_get_pmtp(pfn_t root_pfn) +rvi_get_pmtp(pfn_t root_pfn, bool track_dirty) { return (root_pfn << PAGESHIFT); } +static bool +rvi_hw_ad_supported(void) +{ + return (true); +} + + vmm_pte_ops_t rvi_pte_ops = { .vpeo_map_table = rvi_map_table, .vpeo_map_page = rvi_map_page, @@ -154,4 +161,5 @@ vmm_pte_ops_t rvi_pte_ops = { .vpeo_reset_dirty = rvi_reset_dirty, .vpeo_reset_accessed = rvi_reset_accessed, .vpeo_get_pmtp = rvi_get_pmtp, + .vpeo_hw_ad_supported = rvi_hw_ad_supported, }; diff --git a/usr/src/uts/intel/io/vmm/vmm_vm.c b/usr/src/uts/intel/io/vmm/vmm_vm.c index 42d963a53c..609f034d10 100644 --- a/usr/src/uts/intel/io/vmm/vmm_vm.c +++ b/usr/src/uts/intel/io/vmm/vmm_vm.c @@ -292,9 +292,12 @@ vmspace_resident_count(vmspace_t *vms) return (vms->vms_pages_mapped); } -void +int vmspace_track_dirty(vmspace_t *vms, uint64_t gpa, size_t len, uint8_t *bitmap) { + if (!vms->vms_track_dirty) + return (EPERM); + /* * Accumulate dirty bits into the given bit vector. Note that this * races both against hardware writes from running vCPUs and @@ -327,6 +330,8 @@ vmspace_track_dirty(vmspace_t *vms, uint64_t gpa, size_t len, uint8_t *bitmap) vmc_space_invalidate(vmc, gpa, len, vms->vms_pt_gen); } vmspace_hold_exit(vms, true); + + return (0); } static pfn_t @@ -839,7 +844,7 @@ vmspace_client_alloc(vmspace_t *vms) uint64_t vmspace_table_root(vmspace_t *vms) { - return (vmm_gpt_get_pmtp(vms->vms_gpt)); + return (vmm_gpt_get_pmtp(vms->vms_gpt, vms->vms_track_dirty)); } /* diff --git a/usr/src/uts/intel/sys/vmm.h b/usr/src/uts/intel/sys/vmm.h index 45e0fe8f34..8a0b45ca5c 100644 --- a/usr/src/uts/intel/sys/vmm.h +++ b/usr/src/uts/intel/sys/vmm.h @@ -404,6 +404,11 @@ enum vm_create_flags { * rather than attempting to create transient allocations. */ VCF_RESERVOIR_MEM = (1 << 0), + + /* + * Enable dirty page tracking for the guest. + */ + VCF_TRACK_DIRTY = (1 << 1), }; /* diff --git a/usr/src/uts/intel/sys/vmm_dev.h b/usr/src/uts/intel/sys/vmm_dev.h index 6fe04a633e..c0d07ad049 100644 --- a/usr/src/uts/intel/sys/vmm_dev.h +++ b/usr/src/uts/intel/sys/vmm_dev.h @@ -402,7 +402,7 @@ struct vm_legacy_cpuid { * best-effort activity. Nothing is to be inferred about the magnitude of a * change when the version is modified. It follows no rules like semver. */ -#define VMM_CURRENT_INTERFACE_VERSION 7 +#define VMM_CURRENT_INTERFACE_VERSION 8 #define VMMCTL_IOC_BASE (('V' << 16) | ('M' << 8)) |