summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--usr/src/uts/i86pc/io/vmm/intel/vmx.c143
-rw-r--r--usr/src/uts/i86pc/io/vmm/intel/vmx.h3
-rw-r--r--usr/src/uts/i86pc/io/vmm/intel/vmx_msr.c95
-rw-r--r--usr/src/uts/i86pc/io/vmm/intel/vmx_msr.h16
4 files changed, 131 insertions, 126 deletions
diff --git a/usr/src/uts/i86pc/io/vmm/intel/vmx.c b/usr/src/uts/i86pc/io/vmm/intel/vmx.c
index c716c02a34..61df90da3d 100644
--- a/usr/src/uts/i86pc/io/vmm/intel/vmx.c
+++ b/usr/src/uts/i86pc/io/vmm/intel/vmx.c
@@ -284,13 +284,6 @@ SDT_PROBE_DEFINE4(vmm, vmx, exit, return,
"struct vmx *", "int", "struct vm_exit *", "int");
/* END CSTYLED */
-/*
- * Use the last page below 4GB as the APIC access address. This address is
- * occupied by the boot firmware so it is guaranteed that it will not conflict
- * with a page in system memory.
- */
-#define APIC_ACCESS_ADDRESS 0xFFFFF000
-
static int vmx_getdesc(void *arg, int vcpu, int reg, struct seg_desc *desc);
static int vmx_getreg(void *arg, int vcpu, int reg, uint64_t *retval);
static void vmx_apply_tsc_adjust(struct vmx *, int);
@@ -298,40 +291,33 @@ static void vmx_apicv_sync_tmr(struct vlapic *vlapic);
static void vmx_tpr_shadow_enter(struct vlapic *vlapic);
static void vmx_tpr_shadow_exit(struct vlapic *vlapic);
-static int
-vmx_allow_x2apic_msrs(struct vmx *vmx)
+static void
+vmx_allow_x2apic_msrs(struct vmx *vmx, int vcpuid)
{
- int i, error;
-
- error = 0;
-
/*
* Allow readonly access to the following x2APIC MSRs from the guest.
*/
- error += guest_msr_ro(vmx, MSR_APIC_ID);
- error += guest_msr_ro(vmx, MSR_APIC_VERSION);
- error += guest_msr_ro(vmx, MSR_APIC_LDR);
- error += guest_msr_ro(vmx, MSR_APIC_SVR);
-
- for (i = 0; i < 8; i++)
- error += guest_msr_ro(vmx, MSR_APIC_ISR0 + i);
-
- for (i = 0; i < 8; i++)
- error += guest_msr_ro(vmx, MSR_APIC_TMR0 + i);
-
- for (i = 0; i < 8; i++)
- error += guest_msr_ro(vmx, MSR_APIC_IRR0 + i);
-
- error += guest_msr_ro(vmx, MSR_APIC_ESR);
- error += guest_msr_ro(vmx, MSR_APIC_LVT_TIMER);
- error += guest_msr_ro(vmx, MSR_APIC_LVT_THERMAL);
- error += guest_msr_ro(vmx, MSR_APIC_LVT_PCINT);
- error += guest_msr_ro(vmx, MSR_APIC_LVT_LINT0);
- error += guest_msr_ro(vmx, MSR_APIC_LVT_LINT1);
- error += guest_msr_ro(vmx, MSR_APIC_LVT_ERROR);
- error += guest_msr_ro(vmx, MSR_APIC_ICR_TIMER);
- error += guest_msr_ro(vmx, MSR_APIC_DCR_TIMER);
- error += guest_msr_ro(vmx, MSR_APIC_ICR);
+ guest_msr_ro(vmx, vcpuid, MSR_APIC_ID);
+ guest_msr_ro(vmx, vcpuid, MSR_APIC_VERSION);
+ guest_msr_ro(vmx, vcpuid, MSR_APIC_LDR);
+ guest_msr_ro(vmx, vcpuid, MSR_APIC_SVR);
+
+ for (uint_t i = 0; i < 8; i++) {
+ guest_msr_ro(vmx, vcpuid, MSR_APIC_ISR0 + i);
+ guest_msr_ro(vmx, vcpuid, MSR_APIC_TMR0 + i);
+ guest_msr_ro(vmx, vcpuid, MSR_APIC_IRR0 + i);
+ }
+
+ guest_msr_ro(vmx, vcpuid, MSR_APIC_ESR);
+ guest_msr_ro(vmx, vcpuid, MSR_APIC_LVT_TIMER);
+ guest_msr_ro(vmx, vcpuid, MSR_APIC_LVT_THERMAL);
+ guest_msr_ro(vmx, vcpuid, MSR_APIC_LVT_PCINT);
+ guest_msr_ro(vmx, vcpuid, MSR_APIC_LVT_LINT0);
+ guest_msr_ro(vmx, vcpuid, MSR_APIC_LVT_LINT1);
+ guest_msr_ro(vmx, vcpuid, MSR_APIC_LVT_ERROR);
+ guest_msr_ro(vmx, vcpuid, MSR_APIC_ICR_TIMER);
+ guest_msr_ro(vmx, vcpuid, MSR_APIC_DCR_TIMER);
+ guest_msr_ro(vmx, vcpuid, MSR_APIC_ICR);
/*
* Allow TPR, EOI and SELF_IPI MSRs to be read and written by the guest.
@@ -339,11 +325,9 @@ vmx_allow_x2apic_msrs(struct vmx *vmx)
* These registers get special treatment described in the section
* "Virtualizing MSR-Based APIC Accesses".
*/
- error += guest_msr_rw(vmx, MSR_APIC_TPR);
- error += guest_msr_rw(vmx, MSR_APIC_EOI);
- error += guest_msr_rw(vmx, MSR_APIC_SELF_IPI);
-
- return (error);
+ guest_msr_rw(vmx, vcpuid, MSR_APIC_TPR);
+ guest_msr_rw(vmx, vcpuid, MSR_APIC_EOI);
+ guest_msr_rw(vmx, vcpuid, MSR_APIC_SELF_IPI);
}
static ulong_t
@@ -667,6 +651,7 @@ vmx_vminit(struct vm *vm, pmap_t pmap)
uint32_t exc_bitmap;
uint16_t maxcpus;
uint32_t proc_ctls, proc2_ctls, pin_ctls;
+ uint64_t apic_access_pa = UINT64_MAX;
vmx = malloc(sizeof (struct vmx), M_VMX, M_WAITOK | M_ZERO);
if ((uintptr_t)vmx & PAGE_MASK) {
@@ -688,36 +673,7 @@ vmx_vminit(struct vm *vm, pmap_t pmap)
*/
ept_invalidate_mappings(vmx->eptp);
- msr_bitmap_initialize(vmx->msr_bitmap);
-
- /*
- * It is safe to allow direct access to MSR_GSBASE and MSR_FSBASE.
- * The guest FSBASE and GSBASE are saved and restored during
- * vm-exit and vm-entry respectively. The host FSBASE and GSBASE are
- * always restored from the vmcs host state area on vm-exit.
- *
- * The SYSENTER_CS/ESP/EIP MSRs are identical to FS/GSBASE in
- * how they are saved/restored so can be directly accessed by the
- * guest.
- *
- * MSR_EFER is saved and restored in the guest VMCS area on a
- * VM exit and entry respectively. It is also restored from the
- * host VMCS area on a VM exit.
- *
- * The TSC MSR is exposed read-only. Writes are disallowed as
- * that will impact the host TSC. If the guest does a write
- * the "use TSC offsetting" execution control is enabled and the
- * difference between the host TSC and the guest TSC is written
- * into the TSC offset in the VMCS.
- */
- if (guest_msr_rw(vmx, MSR_GSBASE) ||
- guest_msr_rw(vmx, MSR_FSBASE) ||
- guest_msr_rw(vmx, MSR_SYSENTER_CS_MSR) ||
- guest_msr_rw(vmx, MSR_SYSENTER_ESP_MSR) ||
- guest_msr_rw(vmx, MSR_SYSENTER_EIP_MSR) ||
- guest_msr_rw(vmx, MSR_EFER) ||
- guest_msr_ro(vmx, MSR_TSC))
- panic("vmx_vminit: error setting guest msr access");
+ vmx_msr_bitmap_initialize(vmx);
vpid_alloc(vpid, VM_MAXCPU);
@@ -740,8 +696,17 @@ vmx_vminit(struct vm *vm, pmap_t pmap)
PROCBASED2_APIC_REGISTER_VIRTUALIZATION |
PROCBASED2_VIRTUAL_INTERRUPT_DELIVERY);
+ /*
+ * Allocate a page of memory to back the APIC access address for
+ * when APICv features are in use. Guest MMIO accesses should
+ * never actually reach this page, but rather be intercepted.
+ */
+ vmx->apic_access_page = kmem_zalloc(PAGESIZE, KM_SLEEP);
+ VERIFY3U((uintptr_t)vmx->apic_access_page & PAGEOFFSET, ==, 0);
+ apic_access_pa = vtophys(vmx->apic_access_page);
+
error = vm_map_mmio(vm, DEFAULT_APIC_BASE, PAGE_SIZE,
- APIC_ACCESS_ADDRESS);
+ apic_access_pa);
/* XXX this should really return an error to the caller */
KASSERT(error == 0, ("vm_map_mmio(apicbase) error %d", error));
}
@@ -759,7 +724,7 @@ vmx_vminit(struct vm *vm, pmap_t pmap)
* may be required inside the critical_enter() section implied
* by VMPTRLD() below.
*/
- vm_paddr_t msr_bitmap_pa = vtophys(vmx->msr_bitmap);
+ vm_paddr_t msr_bitmap_pa = vtophys(vmx->msr_bitmap[i]);
vm_paddr_t apic_page_pa = vtophys(&vmx->apic_page[i]);
vm_paddr_t pir_desc_pa = vtophys(&vmx->pir_desc[i]);
@@ -841,7 +806,7 @@ vmx_vminit(struct vm *vm, pmap_t pmap)
}
if (vmx_cap_en(vmx, VMX_CAP_APICV)) {
- vmcs_write(VMCS_APIC_ACCESS, APIC_ACCESS_ADDRESS);
+ vmcs_write(VMCS_APIC_ACCESS, apic_access_pa);
vmcs_write(VMCS_EOI_EXIT0, 0);
vmcs_write(VMCS_EOI_EXIT1, 0);
vmcs_write(VMCS_EOI_EXIT2, 0);
@@ -2870,8 +2835,14 @@ vmx_vmcleanup(void *arg)
struct vmx *vmx = arg;
uint16_t maxcpus;
- if (apic_access_virtualization(vmx, 0))
+ if (vmx_cap_en(vmx, VMX_CAP_APICV)) {
vm_unmap_mmio(vmx->vm, DEFAULT_APIC_BASE, PAGE_SIZE);
+ kmem_free(vmx->apic_access_page, PAGESIZE);
+ } else {
+ VERIFY3P(vmx->apic_access_page, ==, NULL);
+ }
+
+ vmx_msr_bitmap_destroy(vmx);
maxcpus = vm_get_maxcpus(vmx->vm);
for (i = 0; i < maxcpus; i++)
@@ -3436,7 +3407,7 @@ vmx_enable_x2apic_mode_vid(struct vlapic *vlapic)
{
struct vmx *vmx;
uint32_t proc_ctls2;
- int vcpuid, error;
+ int vcpuid;
vcpuid = vlapic->vcpuid;
vmx = ((struct vlapic_vtx *)vlapic)->vmx;
@@ -3453,23 +3424,7 @@ vmx_enable_x2apic_mode_vid(struct vlapic *vlapic)
vmcs_write(VMCS_SEC_PROC_BASED_CTLS, proc_ctls2);
vmcs_clear(vmx->vmcs_pa[vcpuid]);
- if (vlapic->vcpuid == 0) {
- /*
- * The nested page table mappings are shared by all vcpus
- * so unmap the APIC access page just once.
- */
- error = vm_unmap_mmio(vmx->vm, DEFAULT_APIC_BASE, PAGE_SIZE);
- KASSERT(error == 0, ("%s: vm_unmap_mmio error %d",
- __func__, error));
-
- /*
- * The MSR bitmap is shared by all vcpus so modify it only
- * once in the context of vcpu 0.
- */
- error = vmx_allow_x2apic_msrs(vmx);
- KASSERT(error == 0, ("%s: vmx_allow_x2apic_msrs error %d",
- __func__, error));
- }
+ vmx_allow_x2apic_msrs(vmx, vcpuid);
}
static void
diff --git a/usr/src/uts/i86pc/io/vmm/intel/vmx.h b/usr/src/uts/i86pc/io/vmm/intel/vmx.h
index f86d812104..c0d1fdd7fb 100644
--- a/usr/src/uts/i86pc/io/vmm/intel/vmx.h
+++ b/usr/src/uts/i86pc/io/vmm/intel/vmx.h
@@ -137,13 +137,14 @@ typedef enum {
struct vmx {
struct vmcs vmcs[VM_MAXCPU]; /* one vmcs per virtual cpu */
struct apic_page apic_page[VM_MAXCPU]; /* one apic page per vcpu */
- char msr_bitmap[PAGE_SIZE];
+ uint8_t *msr_bitmap[VM_MAXCPU]; /* one MSR bitmap per vCPU */
struct pir_desc pir_desc[VM_MAXCPU];
uint64_t guest_msrs[VM_MAXCPU][GUEST_MSR_NUM];
uint64_t host_msrs[VM_MAXCPU][GUEST_MSR_NUM];
uint64_t tsc_offset_active[VM_MAXCPU];
vmcs_state_t vmcs_state[VM_MAXCPU];
uintptr_t vmcs_pa[VM_MAXCPU];
+ void *apic_access_page;
struct vmxctx ctx[VM_MAXCPU];
struct vmxcap cap[VM_MAXCPU];
struct vmxstate state[VM_MAXCPU];
diff --git a/usr/src/uts/i86pc/io/vmm/intel/vmx_msr.c b/usr/src/uts/i86pc/io/vmm/intel/vmx_msr.c
index df044fd09e..cf00426300 100644
--- a/usr/src/uts/i86pc/io/vmm/intel/vmx_msr.c
+++ b/usr/src/uts/i86pc/io/vmm/intel/vmx_msr.c
@@ -29,6 +29,7 @@
*/
/*
* Copyright 2020 Joyent, Inc.
+ * Copyright 2021 Oxide Computer Company
*/
#include <sys/cdefs.h>
@@ -138,38 +139,57 @@ vmx_set_ctlreg(int ctl_reg, int true_ctl_reg, uint32_t ones_mask,
}
void
-msr_bitmap_initialize(char *bitmap)
+vmx_msr_bitmap_initialize(struct vmx *vmx)
{
+ for (uint_t i = 0; i < VM_MAXCPU; i++) {
+ uint8_t *bitmap;
- memset(bitmap, 0xff, PAGE_SIZE);
+ bitmap = kmem_alloc(PAGESIZE, KM_SLEEP);
+ VERIFY3U((uintptr_t)bitmap & PAGEOFFSET, ==, 0);
+ memset(bitmap, 0xff, PAGESIZE);
+
+ vmx->msr_bitmap[i] = bitmap;
+ }
}
-int
-msr_bitmap_change_access(char *bitmap, uint_t msr, int access)
+void
+vmx_msr_bitmap_destroy(struct vmx *vmx)
+{
+ for (uint_t i = 0; i < VM_MAXCPU; i++) {
+ VERIFY3P(vmx->msr_bitmap[i], !=, NULL);
+ kmem_free(vmx->msr_bitmap[i], PAGESIZE);
+ vmx->msr_bitmap[i] = NULL;
+ }
+}
+
+void
+vmx_msr_bitmap_change_access(struct vmx *vmx, int vcpuid, uint_t msr, int acc)
{
+ uint8_t *bitmap = vmx->msr_bitmap[vcpuid];
int byte, bit;
- if (msr <= 0x00001FFF)
+ if (msr <= 0x00001FFF) {
byte = msr / 8;
- else if (msr >= 0xC0000000 && msr <= 0xC0001FFF)
+ } else if (msr >= 0xC0000000 && msr <= 0xC0001FFF) {
byte = 1024 + (msr - 0xC0000000) / 8;
- else
- return (EINVAL);
+ } else {
+ panic("Invalid MSR for bitmap: %x", msr);
+ }
bit = msr & 0x7;
- if (access & MSR_BITMAP_ACCESS_READ)
+ if (acc & MSR_BITMAP_ACCESS_READ) {
bitmap[byte] &= ~(1 << bit);
- else
+ } else {
bitmap[byte] |= 1 << bit;
+ }
byte += 2048;
- if (access & MSR_BITMAP_ACCESS_WRITE)
+ if (acc & MSR_BITMAP_ACCESS_WRITE) {
bitmap[byte] &= ~(1 << bit);
- else
+ } else {
bitmap[byte] |= 1 << bit;
-
- return (0);
+ }
}
static uint64_t misc_enable;
@@ -306,21 +326,46 @@ vmx_msr_init(void)
void
vmx_msr_guest_init(struct vmx *vmx, int vcpuid)
{
- uint64_t *guest_msrs;
+ uint64_t *guest_msrs = vmx->guest_msrs[vcpuid];
- guest_msrs = vmx->guest_msrs[vcpuid];
+ /*
+ * It is safe to allow direct access to MSR_GSBASE and
+ * MSR_FSBASE. The guest FSBASE and GSBASE are saved and
+ * restored during vm-exit and vm-entry respectively. The host
+ * FSBASE and GSBASE are always restored from the vmcs host
+ * state area on vm-exit.
+ *
+ * The SYSENTER_CS/ESP/EIP MSRs are identical to FS/GSBASE in
+ * how they are saved/restored so can be directly accessed by
+ * the guest.
+ *
+ * MSR_EFER is saved and restored in the guest VMCS area on a VM
+ * exit and entry respectively. It is also restored from the
+ * host VMCS area on a VM exit.
+ *
+ * The TSC MSR is exposed read-only. Writes are disallowed as
+ * that will impact the host TSC. If the guest does a write the
+ * "use TSC offsetting" execution control is enabled and the
+ * difference between the host TSC and the guest TSC is written
+ * into the TSC offset in the VMCS.
+ */
+ guest_msr_rw(vmx, vcpuid, MSR_GSBASE);
+ guest_msr_rw(vmx, vcpuid, MSR_FSBASE);
+ guest_msr_rw(vmx, vcpuid, MSR_SYSENTER_CS_MSR);
+ guest_msr_rw(vmx, vcpuid, MSR_SYSENTER_ESP_MSR);
+ guest_msr_rw(vmx, vcpuid, MSR_SYSENTER_EIP_MSR);
+ guest_msr_rw(vmx, vcpuid, MSR_EFER);
+ guest_msr_ro(vmx, vcpuid, MSR_TSC);
/*
- * The permissions bitmap is shared between all vcpus so initialize it
- * once when initializing the vBSP.
+ * The guest may have direct access to these MSRs as they are
+ * saved/restored in vmx_msr_guest_enter() and vmx_msr_guest_exit().
*/
- if (vcpuid == 0) {
- guest_msr_rw(vmx, MSR_LSTAR);
- guest_msr_rw(vmx, MSR_CSTAR);
- guest_msr_rw(vmx, MSR_STAR);
- guest_msr_rw(vmx, MSR_SF_MASK);
- guest_msr_rw(vmx, MSR_KGSBASE);
- }
+ guest_msr_rw(vmx, vcpuid, MSR_LSTAR);
+ guest_msr_rw(vmx, vcpuid, MSR_CSTAR);
+ guest_msr_rw(vmx, vcpuid, MSR_STAR);
+ guest_msr_rw(vmx, vcpuid, MSR_SF_MASK);
+ guest_msr_rw(vmx, vcpuid, MSR_KGSBASE);
/*
* Initialize guest IA32_PAT MSR with default value after reset.
diff --git a/usr/src/uts/i86pc/io/vmm/intel/vmx_msr.h b/usr/src/uts/i86pc/io/vmm/intel/vmx_msr.h
index 4bc43d74f9..5df9fc631d 100644
--- a/usr/src/uts/i86pc/io/vmm/intel/vmx_msr.h
+++ b/usr/src/uts/i86pc/io/vmm/intel/vmx_msr.h
@@ -27,6 +27,9 @@
*
* $FreeBSD$
*/
+/*
+ * Copyright 2021 Oxide Computer Company
+ */
#ifndef _VMX_MSR_H_
#define _VMX_MSR_H_
@@ -58,13 +61,14 @@ int vmx_set_ctlreg(int ctl_reg, int true_ctl_reg, uint32_t ones_mask,
#define MSR_BITMAP_ACCESS_READ 0x1
#define MSR_BITMAP_ACCESS_WRITE 0x2
#define MSR_BITMAP_ACCESS_RW (MSR_BITMAP_ACCESS_READ|MSR_BITMAP_ACCESS_WRITE)
-void msr_bitmap_initialize(char *bitmap);
-int msr_bitmap_change_access(char *bitmap, uint_t msr, int access);
+void vmx_msr_bitmap_initialize(struct vmx *);
+void vmx_msr_bitmap_destroy(struct vmx *);
+void vmx_msr_bitmap_change_access(struct vmx *, int, uint_t, int);
-#define guest_msr_rw(vmx, msr) \
- msr_bitmap_change_access((vmx)->msr_bitmap, (msr), MSR_BITMAP_ACCESS_RW)
+#define guest_msr_rw(vmx, vcpuid, msr) \
+ vmx_msr_bitmap_change_access((vmx), (vcpuid), (msr), MSR_BITMAP_ACCESS_RW)
-#define guest_msr_ro(vmx, msr) \
- msr_bitmap_change_access((vmx)->msr_bitmap, (msr), MSR_BITMAP_ACCESS_READ)
+#define guest_msr_ro(vmx, vcpuid, msr) \
+ vmx_msr_bitmap_change_access((vmx), (vcpuid), (msr), MSR_BITMAP_ACCESS_READ)
#endif