summaryrefslogtreecommitdiff
path: root/usr/src/uts
diff options
context:
space:
mode:
Diffstat (limited to 'usr/src/uts')
-rw-r--r--usr/src/uts/intel/io/vmm/sys/vmm_gpt.h8
-rw-r--r--usr/src/uts/intel/io/vmm/sys/vmm_kernel.h2
-rw-r--r--usr/src/uts/intel/io/vmm/sys/vmm_vm.h4
-rw-r--r--usr/src/uts/intel/io/vmm/vmm.c16
-rw-r--r--usr/src/uts/intel/io/vmm/vmm_gpt.c5
-rw-r--r--usr/src/uts/intel/io/vmm/vmm_sol_dev.c5
-rw-r--r--usr/src/uts/intel/io/vmm/vmm_sol_ept.c18
-rw-r--r--usr/src/uts/intel/io/vmm/vmm_sol_rvi.c12
-rw-r--r--usr/src/uts/intel/io/vmm/vmm_vm.c9
-rw-r--r--usr/src/uts/intel/sys/vmm.h5
-rw-r--r--usr/src/uts/intel/sys/vmm_dev.h2
11 files changed, 58 insertions, 28 deletions
diff --git a/usr/src/uts/intel/io/vmm/sys/vmm_gpt.h b/usr/src/uts/intel/io/vmm/sys/vmm_gpt.h
index a425fb53ec..2692c6bec8 100644
--- a/usr/src/uts/intel/io/vmm/sys/vmm_gpt.h
+++ b/usr/src/uts/intel/io/vmm/sys/vmm_gpt.h
@@ -11,7 +11,7 @@
/*
* Copyright 2019 Joyent, Inc.
- * Copyright 2021 Oxide Computer Company
+ * Copyright 2022 Oxide Computer Company
*/
#ifndef _VMM_GPT_H
@@ -52,6 +52,7 @@ enum vmm_gpt_node_level {
* cleared. Returns non-zero if the previous value of the bit was set.
* vpeo_get_pmtp: Generate a properly formatted PML4 (EPTP/nCR3), given the root
* PFN for the GPT.
+ * vpeo_hw_ad_supported: Returns true IFF hardware A/D tracking is supported.
*/
typedef struct vmm_pte_ops vmm_pte_ops_t;
struct vmm_pte_ops {
@@ -62,7 +63,8 @@ struct vmm_pte_ops {
uint_t (*vpeo_pte_prot)(uint64_t);
uint_t (*vpeo_reset_dirty)(uint64_t *, bool);
uint_t (*vpeo_reset_accessed)(uint64_t *, bool);
- uint64_t (*vpeo_get_pmtp)(pfn_t);
+ uint64_t (*vpeo_get_pmtp)(pfn_t, bool);
+ bool (*vpeo_hw_ad_supported)(void);
};
extern vmm_pte_ops_t ept_pte_ops;
@@ -82,7 +84,7 @@ void vmm_gpt_vacate_region(vmm_gpt_t *, uint64_t, uint64_t);
bool vmm_gpt_map(vmm_gpt_t *, uint64_t, pfn_t, uint_t, uint8_t);
bool vmm_gpt_unmap(vmm_gpt_t *, uint64_t);
size_t vmm_gpt_unmap_region(vmm_gpt_t *, uint64_t, uint64_t);
-uint64_t vmm_gpt_get_pmtp(vmm_gpt_t *);
+uint64_t vmm_gpt_get_pmtp(vmm_gpt_t *, bool);
bool vmm_gpt_is_mapped(vmm_gpt_t *, uint64_t *, pfn_t *, uint_t *);
uint_t vmm_gpt_reset_accessed(vmm_gpt_t *, uint64_t *, bool);
diff --git a/usr/src/uts/intel/io/vmm/sys/vmm_kernel.h b/usr/src/uts/intel/io/vmm/sys/vmm_kernel.h
index 1ef2d48adf..8af6df5c66 100644
--- a/usr/src/uts/intel/io/vmm/sys/vmm_kernel.h
+++ b/usr/src/uts/intel/io/vmm/sys/vmm_kernel.h
@@ -138,7 +138,7 @@ bool vm_is_paused(struct vm *);
/*
* APIs that race against hardware.
*/
-void vm_track_dirty_pages(struct vm *, uint64_t, size_t, uint8_t *);
+int vm_track_dirty_pages(struct vm *, uint64_t, size_t, uint8_t *);
/*
* APIs that modify the guest memory map require all vcpus to be frozen.
diff --git a/usr/src/uts/intel/io/vmm/sys/vmm_vm.h b/usr/src/uts/intel/io/vmm/sys/vmm_vm.h
index 57d0ec8b00..6edba02bc5 100644
--- a/usr/src/uts/intel/io/vmm/sys/vmm_vm.h
+++ b/usr/src/uts/intel/io/vmm/sys/vmm_vm.h
@@ -12,7 +12,7 @@
/*
* Copyright 2019 Joyent, Inc.
- * Copyright 2021 Oxide Computer Company
+ * Copyright 2022 Oxide Computer Company
*/
#ifndef _VMM_VM_H
@@ -40,7 +40,7 @@ vm_client_t *vmspace_client_alloc(vmspace_t *);
uint64_t vmspace_table_root(vmspace_t *);
uint64_t vmspace_table_gen(vmspace_t *);
uint64_t vmspace_resident_count(vmspace_t *);
-void vmspace_track_dirty(vmspace_t *, uint64_t, size_t, uint8_t *);
+int vmspace_track_dirty(vmspace_t *, uint64_t, size_t, uint8_t *);
/* vm_client_t operations */
vm_page_t *vmc_hold(vm_client_t *, uintptr_t, int);
diff --git a/usr/src/uts/intel/io/vmm/vmm.c b/usr/src/uts/intel/io/vmm/vmm.c
index 136c38c5ab..11f09c2102 100644
--- a/usr/src/uts/intel/io/vmm/vmm.c
+++ b/usr/src/uts/intel/io/vmm/vmm.c
@@ -551,12 +551,6 @@ vm_init(struct vm *vm, bool create)
uint_t cores_per_package = 1;
uint_t threads_per_core = 1;
-/*
- * Debugging tunable to enable dirty-page-tracking.
- * (Remains off by default for now)
- */
-bool gpt_track_dirty = false;
-
int
vm_create(uint64_t flags, struct vm **retvm)
{
@@ -570,7 +564,11 @@ vm_create(uint64_t flags, struct vm **retvm)
if (!vmm_initialized)
return (ENXIO);
- vmspace = vmspace_alloc(VM_MAXUSER_ADDRESS, pte_ops, gpt_track_dirty);
+ bool track_dirty = (flags & VCF_TRACK_DIRTY) != 0;
+ if (track_dirty && !pte_ops->vpeo_hw_ad_supported())
+ return (ENOTSUP);
+
+ vmspace = vmspace_alloc(VM_MAXUSER_ADDRESS, pte_ops, track_dirty);
if (vmspace == NULL)
return (ENOMEM);
@@ -1357,11 +1355,11 @@ vm_set_run_state(struct vm *vm, int vcpuid, uint32_t state, uint8_t sipi_vec)
return (0);
}
-void
+int
vm_track_dirty_pages(struct vm *vm, uint64_t gpa, size_t len, uint8_t *bitmap)
{
vmspace_t *vms = vm_get_vmspace(vm);
- vmspace_track_dirty(vms, gpa, len, bitmap);
+ return (vmspace_track_dirty(vms, gpa, len, bitmap));
}
static void
diff --git a/usr/src/uts/intel/io/vmm/vmm_gpt.c b/usr/src/uts/intel/io/vmm/vmm_gpt.c
index a542dba055..a464f8e964 100644
--- a/usr/src/uts/intel/io/vmm/vmm_gpt.c
+++ b/usr/src/uts/intel/io/vmm/vmm_gpt.c
@@ -579,7 +579,8 @@ vmm_gpt_reset_dirty(vmm_gpt_t *gpt, uint64_t *entry, bool on)
* Get properly formatted PML4 (EPTP/nCR3) for GPT.
*/
uint64_t
-vmm_gpt_get_pmtp(vmm_gpt_t *gpt)
+vmm_gpt_get_pmtp(vmm_gpt_t *gpt, bool track_dirty)
{
- return (gpt->vgpt_pte_ops->vpeo_get_pmtp(gpt->vgpt_root->vgn_host_pfn));
+ const pfn_t root_pfn = gpt->vgpt_root->vgn_host_pfn;
+ return (gpt->vgpt_pte_ops->vpeo_get_pmtp(root_pfn, track_dirty));
}
diff --git a/usr/src/uts/intel/io/vmm/vmm_sol_dev.c b/usr/src/uts/intel/io/vmm/vmm_sol_dev.c
index c66f1ce17a..d89bca5ce0 100644
--- a/usr/src/uts/intel/io/vmm/vmm_sol_dev.c
+++ b/usr/src/uts/intel/io/vmm/vmm_sol_dev.c
@@ -1703,9 +1703,10 @@ vmmdev_do_ioctl(vmm_softc_t *sc, int cmd, intptr_t arg, int md,
}
len = roundup(tracker.vdt_len / PAGESIZE, 8) / 8;
bitmap = kmem_zalloc(len, KM_SLEEP);
- vm_track_dirty_pages(sc->vmm_vm, tracker.vdt_start_gpa,
+ error = vm_track_dirty_pages(sc->vmm_vm, tracker.vdt_start_gpa,
tracker.vdt_len, bitmap);
- if (ddi_copyout(bitmap, tracker.vdt_pfns, len, md) != 0) {
+ if (error == 0 &&
+ ddi_copyout(bitmap, tracker.vdt_pfns, len, md) != 0) {
error = EFAULT;
}
kmem_free(bitmap, len);
diff --git a/usr/src/uts/intel/io/vmm/vmm_sol_ept.c b/usr/src/uts/intel/io/vmm/vmm_sol_ept.c
index fde4a030ce..c34ec4684e 100644
--- a/usr/src/uts/intel/io/vmm/vmm_sol_ept.c
+++ b/usr/src/uts/intel/io/vmm/vmm_sol_ept.c
@@ -12,7 +12,7 @@
/*
* Copyright 2019 Joyent, Inc.
- * Copyright 2021 Oxide Computer Company
+ * Copyright 2022 Oxide Computer Company
*/
#include <sys/types.h>
@@ -40,6 +40,8 @@
#define EPT_MAX_LEVELS 4
CTASSERT(EPT_MAX_LEVELS <= MAX_GPT_LEVEL);
+#define EPTP_FLAG_ACCESSED_DIRTY (1 << 6)
+
CTASSERT(EPT_R == PROT_READ);
CTASSERT(EPT_W == PROT_WRITE);
CTASSERT(EPT_X == PROT_EXEC);
@@ -120,13 +122,20 @@ ept_reset_accessed(uint64_t *entry, bool on)
}
static uint64_t
-ept_get_pmtp(pfn_t root_pfn)
+ept_get_pmtp(pfn_t root_pfn, bool track_dirty)
{
- /* TODO: enable AD tracking when required */
- return ((root_pfn << PAGESHIFT |
+ const uint64_t ad_flag = track_dirty ? EPTP_FLAG_ACCESSED_DIRTY : 0;
+ return ((root_pfn << PAGESHIFT | ad_flag |
(EPT_MAX_LEVELS - 1) << 3 | MTRR_TYPE_WB));
}
+static bool
+ept_hw_ad_supported(void)
+{
+ uint64_t ept_caps = rdmsr(MSR_IA32_VMX_EPT_VPID_CAP);
+ return ((ept_caps & IA32_VMX_EPT_VPID_HW_AD) != 0);
+}
+
vmm_pte_ops_t ept_pte_ops = {
.vpeo_map_table = ept_map_table,
.vpeo_map_page = ept_map_page,
@@ -136,4 +145,5 @@ vmm_pte_ops_t ept_pte_ops = {
.vpeo_reset_dirty = ept_reset_dirty,
.vpeo_reset_accessed = ept_reset_accessed,
.vpeo_get_pmtp = ept_get_pmtp,
+ .vpeo_hw_ad_supported = ept_hw_ad_supported,
};
diff --git a/usr/src/uts/intel/io/vmm/vmm_sol_rvi.c b/usr/src/uts/intel/io/vmm/vmm_sol_rvi.c
index 8b45782d25..fb7afd069b 100644
--- a/usr/src/uts/intel/io/vmm/vmm_sol_rvi.c
+++ b/usr/src/uts/intel/io/vmm/vmm_sol_rvi.c
@@ -12,7 +12,7 @@
/*
* Copyright 2019 Joyent, Inc.
- * Copyright 2021 Oxide Computer Company
+ * Copyright 2022 Oxide Computer Company
*/
#include <sys/types.h>
@@ -140,11 +140,18 @@ rvi_reset_accessed(uint64_t *entry, bool on)
}
static uint64_t
-rvi_get_pmtp(pfn_t root_pfn)
+rvi_get_pmtp(pfn_t root_pfn, bool track_dirty)
{
return (root_pfn << PAGESHIFT);
}
+static bool
+rvi_hw_ad_supported(void)
+{
+ return (true);
+}
+
+
vmm_pte_ops_t rvi_pte_ops = {
.vpeo_map_table = rvi_map_table,
.vpeo_map_page = rvi_map_page,
@@ -154,4 +161,5 @@ vmm_pte_ops_t rvi_pte_ops = {
.vpeo_reset_dirty = rvi_reset_dirty,
.vpeo_reset_accessed = rvi_reset_accessed,
.vpeo_get_pmtp = rvi_get_pmtp,
+ .vpeo_hw_ad_supported = rvi_hw_ad_supported,
};
diff --git a/usr/src/uts/intel/io/vmm/vmm_vm.c b/usr/src/uts/intel/io/vmm/vmm_vm.c
index 42d963a53c..609f034d10 100644
--- a/usr/src/uts/intel/io/vmm/vmm_vm.c
+++ b/usr/src/uts/intel/io/vmm/vmm_vm.c
@@ -292,9 +292,12 @@ vmspace_resident_count(vmspace_t *vms)
return (vms->vms_pages_mapped);
}
-void
+int
vmspace_track_dirty(vmspace_t *vms, uint64_t gpa, size_t len, uint8_t *bitmap)
{
+ if (!vms->vms_track_dirty)
+ return (EPERM);
+
/*
* Accumulate dirty bits into the given bit vector. Note that this
* races both against hardware writes from running vCPUs and
@@ -327,6 +330,8 @@ vmspace_track_dirty(vmspace_t *vms, uint64_t gpa, size_t len, uint8_t *bitmap)
vmc_space_invalidate(vmc, gpa, len, vms->vms_pt_gen);
}
vmspace_hold_exit(vms, true);
+
+ return (0);
}
static pfn_t
@@ -839,7 +844,7 @@ vmspace_client_alloc(vmspace_t *vms)
uint64_t
vmspace_table_root(vmspace_t *vms)
{
- return (vmm_gpt_get_pmtp(vms->vms_gpt));
+ return (vmm_gpt_get_pmtp(vms->vms_gpt, vms->vms_track_dirty));
}
/*
diff --git a/usr/src/uts/intel/sys/vmm.h b/usr/src/uts/intel/sys/vmm.h
index 45e0fe8f34..8a0b45ca5c 100644
--- a/usr/src/uts/intel/sys/vmm.h
+++ b/usr/src/uts/intel/sys/vmm.h
@@ -404,6 +404,11 @@ enum vm_create_flags {
* rather than attempting to create transient allocations.
*/
VCF_RESERVOIR_MEM = (1 << 0),
+
+ /*
+ * Enable dirty page tracking for the guest.
+ */
+ VCF_TRACK_DIRTY = (1 << 1),
};
/*
diff --git a/usr/src/uts/intel/sys/vmm_dev.h b/usr/src/uts/intel/sys/vmm_dev.h
index 6fe04a633e..c0d07ad049 100644
--- a/usr/src/uts/intel/sys/vmm_dev.h
+++ b/usr/src/uts/intel/sys/vmm_dev.h
@@ -402,7 +402,7 @@ struct vm_legacy_cpuid {
* best-effort activity. Nothing is to be inferred about the magnitude of a
* change when the version is modified. It follows no rules like semver.
*/
-#define VMM_CURRENT_INTERFACE_VERSION 7
+#define VMM_CURRENT_INTERFACE_VERSION 8
#define VMMCTL_IOC_BASE (('V' << 16) | ('M' << 8))