diff options
Diffstat (limited to 'usr/src/uts')
-rw-r--r-- | usr/src/uts/intel/io/vmm/sys/vmm_gpt.h | 8 | ||||
-rw-r--r-- | usr/src/uts/intel/io/vmm/sys/vmm_kernel.h | 2 | ||||
-rw-r--r-- | usr/src/uts/intel/io/vmm/sys/vmm_vm.h | 4 | ||||
-rw-r--r-- | usr/src/uts/intel/io/vmm/vmm.c | 16 | ||||
-rw-r--r-- | usr/src/uts/intel/io/vmm/vmm_gpt.c | 5 | ||||
-rw-r--r-- | usr/src/uts/intel/io/vmm/vmm_sol_dev.c | 5 | ||||
-rw-r--r-- | usr/src/uts/intel/io/vmm/vmm_sol_ept.c | 18 | ||||
-rw-r--r-- | usr/src/uts/intel/io/vmm/vmm_sol_rvi.c | 12 | ||||
-rw-r--r-- | usr/src/uts/intel/io/vmm/vmm_vm.c | 9 | ||||
-rw-r--r-- | usr/src/uts/intel/sys/vmm.h | 5 | ||||
-rw-r--r-- | usr/src/uts/intel/sys/vmm_dev.h | 2 |
11 files changed, 58 insertions, 28 deletions
diff --git a/usr/src/uts/intel/io/vmm/sys/vmm_gpt.h b/usr/src/uts/intel/io/vmm/sys/vmm_gpt.h index a425fb53ec..2692c6bec8 100644 --- a/usr/src/uts/intel/io/vmm/sys/vmm_gpt.h +++ b/usr/src/uts/intel/io/vmm/sys/vmm_gpt.h @@ -11,7 +11,7 @@ /* * Copyright 2019 Joyent, Inc. - * Copyright 2021 Oxide Computer Company + * Copyright 2022 Oxide Computer Company */ #ifndef _VMM_GPT_H @@ -52,6 +52,7 @@ enum vmm_gpt_node_level { * cleared. Returns non-zero if the previous value of the bit was set. * vpeo_get_pmtp: Generate a properly formatted PML4 (EPTP/nCR3), given the root * PFN for the GPT. + * vpeo_hw_ad_supported: Returns true IFF hardware A/D tracking is supported. */ typedef struct vmm_pte_ops vmm_pte_ops_t; struct vmm_pte_ops { @@ -62,7 +63,8 @@ struct vmm_pte_ops { uint_t (*vpeo_pte_prot)(uint64_t); uint_t (*vpeo_reset_dirty)(uint64_t *, bool); uint_t (*vpeo_reset_accessed)(uint64_t *, bool); - uint64_t (*vpeo_get_pmtp)(pfn_t); + uint64_t (*vpeo_get_pmtp)(pfn_t, bool); + bool (*vpeo_hw_ad_supported)(void); }; extern vmm_pte_ops_t ept_pte_ops; @@ -82,7 +84,7 @@ void vmm_gpt_vacate_region(vmm_gpt_t *, uint64_t, uint64_t); bool vmm_gpt_map(vmm_gpt_t *, uint64_t, pfn_t, uint_t, uint8_t); bool vmm_gpt_unmap(vmm_gpt_t *, uint64_t); size_t vmm_gpt_unmap_region(vmm_gpt_t *, uint64_t, uint64_t); -uint64_t vmm_gpt_get_pmtp(vmm_gpt_t *); +uint64_t vmm_gpt_get_pmtp(vmm_gpt_t *, bool); bool vmm_gpt_is_mapped(vmm_gpt_t *, uint64_t *, pfn_t *, uint_t *); uint_t vmm_gpt_reset_accessed(vmm_gpt_t *, uint64_t *, bool); diff --git a/usr/src/uts/intel/io/vmm/sys/vmm_kernel.h b/usr/src/uts/intel/io/vmm/sys/vmm_kernel.h index 1ef2d48adf..8af6df5c66 100644 --- a/usr/src/uts/intel/io/vmm/sys/vmm_kernel.h +++ b/usr/src/uts/intel/io/vmm/sys/vmm_kernel.h @@ -138,7 +138,7 @@ bool vm_is_paused(struct vm *); /* * APIs that race against hardware. */ -void vm_track_dirty_pages(struct vm *, uint64_t, size_t, uint8_t *); +int vm_track_dirty_pages(struct vm *, uint64_t, size_t, uint8_t *); /* * APIs that modify the guest memory map require all vcpus to be frozen. diff --git a/usr/src/uts/intel/io/vmm/sys/vmm_vm.h b/usr/src/uts/intel/io/vmm/sys/vmm_vm.h index 57d0ec8b00..6edba02bc5 100644 --- a/usr/src/uts/intel/io/vmm/sys/vmm_vm.h +++ b/usr/src/uts/intel/io/vmm/sys/vmm_vm.h @@ -12,7 +12,7 @@ /* * Copyright 2019 Joyent, Inc. - * Copyright 2021 Oxide Computer Company + * Copyright 2022 Oxide Computer Company */ #ifndef _VMM_VM_H @@ -40,7 +40,7 @@ vm_client_t *vmspace_client_alloc(vmspace_t *); uint64_t vmspace_table_root(vmspace_t *); uint64_t vmspace_table_gen(vmspace_t *); uint64_t vmspace_resident_count(vmspace_t *); -void vmspace_track_dirty(vmspace_t *, uint64_t, size_t, uint8_t *); +int vmspace_track_dirty(vmspace_t *, uint64_t, size_t, uint8_t *); /* vm_client_t operations */ vm_page_t *vmc_hold(vm_client_t *, uintptr_t, int); diff --git a/usr/src/uts/intel/io/vmm/vmm.c b/usr/src/uts/intel/io/vmm/vmm.c index 136c38c5ab..11f09c2102 100644 --- a/usr/src/uts/intel/io/vmm/vmm.c +++ b/usr/src/uts/intel/io/vmm/vmm.c @@ -551,12 +551,6 @@ vm_init(struct vm *vm, bool create) uint_t cores_per_package = 1; uint_t threads_per_core = 1; -/* - * Debugging tunable to enable dirty-page-tracking. - * (Remains off by default for now) - */ -bool gpt_track_dirty = false; - int vm_create(uint64_t flags, struct vm **retvm) { @@ -570,7 +564,11 @@ vm_create(uint64_t flags, struct vm **retvm) if (!vmm_initialized) return (ENXIO); - vmspace = vmspace_alloc(VM_MAXUSER_ADDRESS, pte_ops, gpt_track_dirty); + bool track_dirty = (flags & VCF_TRACK_DIRTY) != 0; + if (track_dirty && !pte_ops->vpeo_hw_ad_supported()) + return (ENOTSUP); + + vmspace = vmspace_alloc(VM_MAXUSER_ADDRESS, pte_ops, track_dirty); if (vmspace == NULL) return (ENOMEM); @@ -1357,11 +1355,11 @@ vm_set_run_state(struct vm *vm, int vcpuid, uint32_t state, uint8_t sipi_vec) return (0); } -void +int vm_track_dirty_pages(struct vm *vm, uint64_t gpa, size_t len, uint8_t *bitmap) { vmspace_t *vms = vm_get_vmspace(vm); - vmspace_track_dirty(vms, gpa, len, bitmap); + return (vmspace_track_dirty(vms, gpa, len, bitmap)); } static void diff --git a/usr/src/uts/intel/io/vmm/vmm_gpt.c b/usr/src/uts/intel/io/vmm/vmm_gpt.c index a542dba055..a464f8e964 100644 --- a/usr/src/uts/intel/io/vmm/vmm_gpt.c +++ b/usr/src/uts/intel/io/vmm/vmm_gpt.c @@ -579,7 +579,8 @@ vmm_gpt_reset_dirty(vmm_gpt_t *gpt, uint64_t *entry, bool on) * Get properly formatted PML4 (EPTP/nCR3) for GPT. */ uint64_t -vmm_gpt_get_pmtp(vmm_gpt_t *gpt) +vmm_gpt_get_pmtp(vmm_gpt_t *gpt, bool track_dirty) { - return (gpt->vgpt_pte_ops->vpeo_get_pmtp(gpt->vgpt_root->vgn_host_pfn)); + const pfn_t root_pfn = gpt->vgpt_root->vgn_host_pfn; + return (gpt->vgpt_pte_ops->vpeo_get_pmtp(root_pfn, track_dirty)); } diff --git a/usr/src/uts/intel/io/vmm/vmm_sol_dev.c b/usr/src/uts/intel/io/vmm/vmm_sol_dev.c index c66f1ce17a..d89bca5ce0 100644 --- a/usr/src/uts/intel/io/vmm/vmm_sol_dev.c +++ b/usr/src/uts/intel/io/vmm/vmm_sol_dev.c @@ -1703,9 +1703,10 @@ vmmdev_do_ioctl(vmm_softc_t *sc, int cmd, intptr_t arg, int md, } len = roundup(tracker.vdt_len / PAGESIZE, 8) / 8; bitmap = kmem_zalloc(len, KM_SLEEP); - vm_track_dirty_pages(sc->vmm_vm, tracker.vdt_start_gpa, + error = vm_track_dirty_pages(sc->vmm_vm, tracker.vdt_start_gpa, tracker.vdt_len, bitmap); - if (ddi_copyout(bitmap, tracker.vdt_pfns, len, md) != 0) { + if (error == 0 && + ddi_copyout(bitmap, tracker.vdt_pfns, len, md) != 0) { error = EFAULT; } kmem_free(bitmap, len); diff --git a/usr/src/uts/intel/io/vmm/vmm_sol_ept.c b/usr/src/uts/intel/io/vmm/vmm_sol_ept.c index fde4a030ce..c34ec4684e 100644 --- a/usr/src/uts/intel/io/vmm/vmm_sol_ept.c +++ b/usr/src/uts/intel/io/vmm/vmm_sol_ept.c @@ -12,7 +12,7 @@ /* * Copyright 2019 Joyent, Inc. - * Copyright 2021 Oxide Computer Company + * Copyright 2022 Oxide Computer Company */ #include <sys/types.h> @@ -40,6 +40,8 @@ #define EPT_MAX_LEVELS 4 CTASSERT(EPT_MAX_LEVELS <= MAX_GPT_LEVEL); +#define EPTP_FLAG_ACCESSED_DIRTY (1 << 6) + CTASSERT(EPT_R == PROT_READ); CTASSERT(EPT_W == PROT_WRITE); CTASSERT(EPT_X == PROT_EXEC); @@ -120,13 +122,20 @@ ept_reset_accessed(uint64_t *entry, bool on) } static uint64_t -ept_get_pmtp(pfn_t root_pfn) +ept_get_pmtp(pfn_t root_pfn, bool track_dirty) { - /* TODO: enable AD tracking when required */ - return ((root_pfn << PAGESHIFT | + const uint64_t ad_flag = track_dirty ? EPTP_FLAG_ACCESSED_DIRTY : 0; + return ((root_pfn << PAGESHIFT | ad_flag | (EPT_MAX_LEVELS - 1) << 3 | MTRR_TYPE_WB)); } +static bool +ept_hw_ad_supported(void) +{ + uint64_t ept_caps = rdmsr(MSR_IA32_VMX_EPT_VPID_CAP); + return ((ept_caps & IA32_VMX_EPT_VPID_HW_AD) != 0); +} + vmm_pte_ops_t ept_pte_ops = { .vpeo_map_table = ept_map_table, .vpeo_map_page = ept_map_page, @@ -136,4 +145,5 @@ vmm_pte_ops_t ept_pte_ops = { .vpeo_reset_dirty = ept_reset_dirty, .vpeo_reset_accessed = ept_reset_accessed, .vpeo_get_pmtp = ept_get_pmtp, + .vpeo_hw_ad_supported = ept_hw_ad_supported, }; diff --git a/usr/src/uts/intel/io/vmm/vmm_sol_rvi.c b/usr/src/uts/intel/io/vmm/vmm_sol_rvi.c index 8b45782d25..fb7afd069b 100644 --- a/usr/src/uts/intel/io/vmm/vmm_sol_rvi.c +++ b/usr/src/uts/intel/io/vmm/vmm_sol_rvi.c @@ -12,7 +12,7 @@ /* * Copyright 2019 Joyent, Inc. - * Copyright 2021 Oxide Computer Company + * Copyright 2022 Oxide Computer Company */ #include <sys/types.h> @@ -140,11 +140,18 @@ rvi_reset_accessed(uint64_t *entry, bool on) } static uint64_t -rvi_get_pmtp(pfn_t root_pfn) +rvi_get_pmtp(pfn_t root_pfn, bool track_dirty) { return (root_pfn << PAGESHIFT); } +static bool +rvi_hw_ad_supported(void) +{ + return (true); +} + + vmm_pte_ops_t rvi_pte_ops = { .vpeo_map_table = rvi_map_table, .vpeo_map_page = rvi_map_page, @@ -154,4 +161,5 @@ vmm_pte_ops_t rvi_pte_ops = { .vpeo_reset_dirty = rvi_reset_dirty, .vpeo_reset_accessed = rvi_reset_accessed, .vpeo_get_pmtp = rvi_get_pmtp, + .vpeo_hw_ad_supported = rvi_hw_ad_supported, }; diff --git a/usr/src/uts/intel/io/vmm/vmm_vm.c b/usr/src/uts/intel/io/vmm/vmm_vm.c index 42d963a53c..609f034d10 100644 --- a/usr/src/uts/intel/io/vmm/vmm_vm.c +++ b/usr/src/uts/intel/io/vmm/vmm_vm.c @@ -292,9 +292,12 @@ vmspace_resident_count(vmspace_t *vms) return (vms->vms_pages_mapped); } -void +int vmspace_track_dirty(vmspace_t *vms, uint64_t gpa, size_t len, uint8_t *bitmap) { + if (!vms->vms_track_dirty) + return (EPERM); + /* * Accumulate dirty bits into the given bit vector. Note that this * races both against hardware writes from running vCPUs and @@ -327,6 +330,8 @@ vmspace_track_dirty(vmspace_t *vms, uint64_t gpa, size_t len, uint8_t *bitmap) vmc_space_invalidate(vmc, gpa, len, vms->vms_pt_gen); } vmspace_hold_exit(vms, true); + + return (0); } static pfn_t @@ -839,7 +844,7 @@ vmspace_client_alloc(vmspace_t *vms) uint64_t vmspace_table_root(vmspace_t *vms) { - return (vmm_gpt_get_pmtp(vms->vms_gpt)); + return (vmm_gpt_get_pmtp(vms->vms_gpt, vms->vms_track_dirty)); } /* diff --git a/usr/src/uts/intel/sys/vmm.h b/usr/src/uts/intel/sys/vmm.h index 45e0fe8f34..8a0b45ca5c 100644 --- a/usr/src/uts/intel/sys/vmm.h +++ b/usr/src/uts/intel/sys/vmm.h @@ -404,6 +404,11 @@ enum vm_create_flags { * rather than attempting to create transient allocations. */ VCF_RESERVOIR_MEM = (1 << 0), + + /* + * Enable dirty page tracking for the guest. + */ + VCF_TRACK_DIRTY = (1 << 1), }; /* diff --git a/usr/src/uts/intel/sys/vmm_dev.h b/usr/src/uts/intel/sys/vmm_dev.h index 6fe04a633e..c0d07ad049 100644 --- a/usr/src/uts/intel/sys/vmm_dev.h +++ b/usr/src/uts/intel/sys/vmm_dev.h @@ -402,7 +402,7 @@ struct vm_legacy_cpuid { * best-effort activity. Nothing is to be inferred about the magnitude of a * change when the version is modified. It follows no rules like semver. */ -#define VMM_CURRENT_INTERFACE_VERSION 7 +#define VMM_CURRENT_INTERFACE_VERSION 8 #define VMMCTL_IOC_BASE (('V' << 16) | ('M' << 8)) |