diff options
| author | Patrick Mooney <pmooney@pfmooney.com> | 2021-06-10 04:23:06 +0000 |
|---|---|---|
| committer | Patrick Mooney <pmooney@oxide.computer> | 2021-07-02 22:08:24 +0000 |
| commit | 6b641d7a35808436d7283b7057a01791e2e2a4e2 (patch) | |
| tree | 5baecaf22474262b4a296a0c4557d6758ea4f86e | |
| parent | c21bd51d7acbaf77116c4cc3a23dfc6d16c637c2 (diff) | |
| download | illumos-joyent-6b641d7a35808436d7283b7057a01791e2e2a4e2.tar.gz | |
13861 bhyve apicv logic could take more care
Reviewed by: Dan Cross <cross@oxidecomputer.com>
Reviewed by: Mike Zeller <mike.zeller@joyent.com>
Approved by: Dan McDonald <danmcd@joyent.com>
| -rw-r--r-- | usr/src/uts/i86pc/io/vmm/intel/vmx.c | 143 | ||||
| -rw-r--r-- | usr/src/uts/i86pc/io/vmm/intel/vmx.h | 3 | ||||
| -rw-r--r-- | usr/src/uts/i86pc/io/vmm/intel/vmx_msr.c | 95 | ||||
| -rw-r--r-- | usr/src/uts/i86pc/io/vmm/intel/vmx_msr.h | 16 |
4 files changed, 131 insertions, 126 deletions
diff --git a/usr/src/uts/i86pc/io/vmm/intel/vmx.c b/usr/src/uts/i86pc/io/vmm/intel/vmx.c index c716c02a34..61df90da3d 100644 --- a/usr/src/uts/i86pc/io/vmm/intel/vmx.c +++ b/usr/src/uts/i86pc/io/vmm/intel/vmx.c @@ -284,13 +284,6 @@ SDT_PROBE_DEFINE4(vmm, vmx, exit, return, "struct vmx *", "int", "struct vm_exit *", "int"); /* END CSTYLED */ -/* - * Use the last page below 4GB as the APIC access address. This address is - * occupied by the boot firmware so it is guaranteed that it will not conflict - * with a page in system memory. - */ -#define APIC_ACCESS_ADDRESS 0xFFFFF000 - static int vmx_getdesc(void *arg, int vcpu, int reg, struct seg_desc *desc); static int vmx_getreg(void *arg, int vcpu, int reg, uint64_t *retval); static void vmx_apply_tsc_adjust(struct vmx *, int); @@ -298,40 +291,33 @@ static void vmx_apicv_sync_tmr(struct vlapic *vlapic); static void vmx_tpr_shadow_enter(struct vlapic *vlapic); static void vmx_tpr_shadow_exit(struct vlapic *vlapic); -static int -vmx_allow_x2apic_msrs(struct vmx *vmx) +static void +vmx_allow_x2apic_msrs(struct vmx *vmx, int vcpuid) { - int i, error; - - error = 0; - /* * Allow readonly access to the following x2APIC MSRs from the guest. */ - error += guest_msr_ro(vmx, MSR_APIC_ID); - error += guest_msr_ro(vmx, MSR_APIC_VERSION); - error += guest_msr_ro(vmx, MSR_APIC_LDR); - error += guest_msr_ro(vmx, MSR_APIC_SVR); - - for (i = 0; i < 8; i++) - error += guest_msr_ro(vmx, MSR_APIC_ISR0 + i); - - for (i = 0; i < 8; i++) - error += guest_msr_ro(vmx, MSR_APIC_TMR0 + i); - - for (i = 0; i < 8; i++) - error += guest_msr_ro(vmx, MSR_APIC_IRR0 + i); - - error += guest_msr_ro(vmx, MSR_APIC_ESR); - error += guest_msr_ro(vmx, MSR_APIC_LVT_TIMER); - error += guest_msr_ro(vmx, MSR_APIC_LVT_THERMAL); - error += guest_msr_ro(vmx, MSR_APIC_LVT_PCINT); - error += guest_msr_ro(vmx, MSR_APIC_LVT_LINT0); - error += guest_msr_ro(vmx, MSR_APIC_LVT_LINT1); - error += guest_msr_ro(vmx, MSR_APIC_LVT_ERROR); - error += guest_msr_ro(vmx, MSR_APIC_ICR_TIMER); - error += guest_msr_ro(vmx, MSR_APIC_DCR_TIMER); - error += guest_msr_ro(vmx, MSR_APIC_ICR); + guest_msr_ro(vmx, vcpuid, MSR_APIC_ID); + guest_msr_ro(vmx, vcpuid, MSR_APIC_VERSION); + guest_msr_ro(vmx, vcpuid, MSR_APIC_LDR); + guest_msr_ro(vmx, vcpuid, MSR_APIC_SVR); + + for (uint_t i = 0; i < 8; i++) { + guest_msr_ro(vmx, vcpuid, MSR_APIC_ISR0 + i); + guest_msr_ro(vmx, vcpuid, MSR_APIC_TMR0 + i); + guest_msr_ro(vmx, vcpuid, MSR_APIC_IRR0 + i); + } + + guest_msr_ro(vmx, vcpuid, MSR_APIC_ESR); + guest_msr_ro(vmx, vcpuid, MSR_APIC_LVT_TIMER); + guest_msr_ro(vmx, vcpuid, MSR_APIC_LVT_THERMAL); + guest_msr_ro(vmx, vcpuid, MSR_APIC_LVT_PCINT); + guest_msr_ro(vmx, vcpuid, MSR_APIC_LVT_LINT0); + guest_msr_ro(vmx, vcpuid, MSR_APIC_LVT_LINT1); + guest_msr_ro(vmx, vcpuid, MSR_APIC_LVT_ERROR); + guest_msr_ro(vmx, vcpuid, MSR_APIC_ICR_TIMER); + guest_msr_ro(vmx, vcpuid, MSR_APIC_DCR_TIMER); + guest_msr_ro(vmx, vcpuid, MSR_APIC_ICR); /* * Allow TPR, EOI and SELF_IPI MSRs to be read and written by the guest. @@ -339,11 +325,9 @@ vmx_allow_x2apic_msrs(struct vmx *vmx) * These registers get special treatment described in the section * "Virtualizing MSR-Based APIC Accesses". */ - error += guest_msr_rw(vmx, MSR_APIC_TPR); - error += guest_msr_rw(vmx, MSR_APIC_EOI); - error += guest_msr_rw(vmx, MSR_APIC_SELF_IPI); - - return (error); + guest_msr_rw(vmx, vcpuid, MSR_APIC_TPR); + guest_msr_rw(vmx, vcpuid, MSR_APIC_EOI); + guest_msr_rw(vmx, vcpuid, MSR_APIC_SELF_IPI); } static ulong_t @@ -667,6 +651,7 @@ vmx_vminit(struct vm *vm, pmap_t pmap) uint32_t exc_bitmap; uint16_t maxcpus; uint32_t proc_ctls, proc2_ctls, pin_ctls; + uint64_t apic_access_pa = UINT64_MAX; vmx = malloc(sizeof (struct vmx), M_VMX, M_WAITOK | M_ZERO); if ((uintptr_t)vmx & PAGE_MASK) { @@ -688,36 +673,7 @@ vmx_vminit(struct vm *vm, pmap_t pmap) */ ept_invalidate_mappings(vmx->eptp); - msr_bitmap_initialize(vmx->msr_bitmap); - - /* - * It is safe to allow direct access to MSR_GSBASE and MSR_FSBASE. - * The guest FSBASE and GSBASE are saved and restored during - * vm-exit and vm-entry respectively. The host FSBASE and GSBASE are - * always restored from the vmcs host state area on vm-exit. - * - * The SYSENTER_CS/ESP/EIP MSRs are identical to FS/GSBASE in - * how they are saved/restored so can be directly accessed by the - * guest. - * - * MSR_EFER is saved and restored in the guest VMCS area on a - * VM exit and entry respectively. It is also restored from the - * host VMCS area on a VM exit. - * - * The TSC MSR is exposed read-only. Writes are disallowed as - * that will impact the host TSC. If the guest does a write - * the "use TSC offsetting" execution control is enabled and the - * difference between the host TSC and the guest TSC is written - * into the TSC offset in the VMCS. - */ - if (guest_msr_rw(vmx, MSR_GSBASE) || - guest_msr_rw(vmx, MSR_FSBASE) || - guest_msr_rw(vmx, MSR_SYSENTER_CS_MSR) || - guest_msr_rw(vmx, MSR_SYSENTER_ESP_MSR) || - guest_msr_rw(vmx, MSR_SYSENTER_EIP_MSR) || - guest_msr_rw(vmx, MSR_EFER) || - guest_msr_ro(vmx, MSR_TSC)) - panic("vmx_vminit: error setting guest msr access"); + vmx_msr_bitmap_initialize(vmx); vpid_alloc(vpid, VM_MAXCPU); @@ -740,8 +696,17 @@ vmx_vminit(struct vm *vm, pmap_t pmap) PROCBASED2_APIC_REGISTER_VIRTUALIZATION | PROCBASED2_VIRTUAL_INTERRUPT_DELIVERY); + /* + * Allocate a page of memory to back the APIC access address for + * when APICv features are in use. Guest MMIO accesses should + * never actually reach this page, but rather be intercepted. + */ + vmx->apic_access_page = kmem_zalloc(PAGESIZE, KM_SLEEP); + VERIFY3U((uintptr_t)vmx->apic_access_page & PAGEOFFSET, ==, 0); + apic_access_pa = vtophys(vmx->apic_access_page); + error = vm_map_mmio(vm, DEFAULT_APIC_BASE, PAGE_SIZE, - APIC_ACCESS_ADDRESS); + apic_access_pa); /* XXX this should really return an error to the caller */ KASSERT(error == 0, ("vm_map_mmio(apicbase) error %d", error)); } @@ -759,7 +724,7 @@ vmx_vminit(struct vm *vm, pmap_t pmap) * may be required inside the critical_enter() section implied * by VMPTRLD() below. */ - vm_paddr_t msr_bitmap_pa = vtophys(vmx->msr_bitmap); + vm_paddr_t msr_bitmap_pa = vtophys(vmx->msr_bitmap[i]); vm_paddr_t apic_page_pa = vtophys(&vmx->apic_page[i]); vm_paddr_t pir_desc_pa = vtophys(&vmx->pir_desc[i]); @@ -841,7 +806,7 @@ vmx_vminit(struct vm *vm, pmap_t pmap) } if (vmx_cap_en(vmx, VMX_CAP_APICV)) { - vmcs_write(VMCS_APIC_ACCESS, APIC_ACCESS_ADDRESS); + vmcs_write(VMCS_APIC_ACCESS, apic_access_pa); vmcs_write(VMCS_EOI_EXIT0, 0); vmcs_write(VMCS_EOI_EXIT1, 0); vmcs_write(VMCS_EOI_EXIT2, 0); @@ -2870,8 +2835,14 @@ vmx_vmcleanup(void *arg) struct vmx *vmx = arg; uint16_t maxcpus; - if (apic_access_virtualization(vmx, 0)) + if (vmx_cap_en(vmx, VMX_CAP_APICV)) { vm_unmap_mmio(vmx->vm, DEFAULT_APIC_BASE, PAGE_SIZE); + kmem_free(vmx->apic_access_page, PAGESIZE); + } else { + VERIFY3P(vmx->apic_access_page, ==, NULL); + } + + vmx_msr_bitmap_destroy(vmx); maxcpus = vm_get_maxcpus(vmx->vm); for (i = 0; i < maxcpus; i++) @@ -3436,7 +3407,7 @@ vmx_enable_x2apic_mode_vid(struct vlapic *vlapic) { struct vmx *vmx; uint32_t proc_ctls2; - int vcpuid, error; + int vcpuid; vcpuid = vlapic->vcpuid; vmx = ((struct vlapic_vtx *)vlapic)->vmx; @@ -3453,23 +3424,7 @@ vmx_enable_x2apic_mode_vid(struct vlapic *vlapic) vmcs_write(VMCS_SEC_PROC_BASED_CTLS, proc_ctls2); vmcs_clear(vmx->vmcs_pa[vcpuid]); - if (vlapic->vcpuid == 0) { - /* - * The nested page table mappings are shared by all vcpus - * so unmap the APIC access page just once. - */ - error = vm_unmap_mmio(vmx->vm, DEFAULT_APIC_BASE, PAGE_SIZE); - KASSERT(error == 0, ("%s: vm_unmap_mmio error %d", - __func__, error)); - - /* - * The MSR bitmap is shared by all vcpus so modify it only - * once in the context of vcpu 0. - */ - error = vmx_allow_x2apic_msrs(vmx); - KASSERT(error == 0, ("%s: vmx_allow_x2apic_msrs error %d", - __func__, error)); - } + vmx_allow_x2apic_msrs(vmx, vcpuid); } static void diff --git a/usr/src/uts/i86pc/io/vmm/intel/vmx.h b/usr/src/uts/i86pc/io/vmm/intel/vmx.h index f86d812104..c0d1fdd7fb 100644 --- a/usr/src/uts/i86pc/io/vmm/intel/vmx.h +++ b/usr/src/uts/i86pc/io/vmm/intel/vmx.h @@ -137,13 +137,14 @@ typedef enum { struct vmx { struct vmcs vmcs[VM_MAXCPU]; /* one vmcs per virtual cpu */ struct apic_page apic_page[VM_MAXCPU]; /* one apic page per vcpu */ - char msr_bitmap[PAGE_SIZE]; + uint8_t *msr_bitmap[VM_MAXCPU]; /* one MSR bitmap per vCPU */ struct pir_desc pir_desc[VM_MAXCPU]; uint64_t guest_msrs[VM_MAXCPU][GUEST_MSR_NUM]; uint64_t host_msrs[VM_MAXCPU][GUEST_MSR_NUM]; uint64_t tsc_offset_active[VM_MAXCPU]; vmcs_state_t vmcs_state[VM_MAXCPU]; uintptr_t vmcs_pa[VM_MAXCPU]; + void *apic_access_page; struct vmxctx ctx[VM_MAXCPU]; struct vmxcap cap[VM_MAXCPU]; struct vmxstate state[VM_MAXCPU]; diff --git a/usr/src/uts/i86pc/io/vmm/intel/vmx_msr.c b/usr/src/uts/i86pc/io/vmm/intel/vmx_msr.c index df044fd09e..cf00426300 100644 --- a/usr/src/uts/i86pc/io/vmm/intel/vmx_msr.c +++ b/usr/src/uts/i86pc/io/vmm/intel/vmx_msr.c @@ -29,6 +29,7 @@ */ /* * Copyright 2020 Joyent, Inc. + * Copyright 2021 Oxide Computer Company */ #include <sys/cdefs.h> @@ -138,38 +139,57 @@ vmx_set_ctlreg(int ctl_reg, int true_ctl_reg, uint32_t ones_mask, } void -msr_bitmap_initialize(char *bitmap) +vmx_msr_bitmap_initialize(struct vmx *vmx) { + for (uint_t i = 0; i < VM_MAXCPU; i++) { + uint8_t *bitmap; - memset(bitmap, 0xff, PAGE_SIZE); + bitmap = kmem_alloc(PAGESIZE, KM_SLEEP); + VERIFY3U((uintptr_t)bitmap & PAGEOFFSET, ==, 0); + memset(bitmap, 0xff, PAGESIZE); + + vmx->msr_bitmap[i] = bitmap; + } } -int -msr_bitmap_change_access(char *bitmap, uint_t msr, int access) +void +vmx_msr_bitmap_destroy(struct vmx *vmx) +{ + for (uint_t i = 0; i < VM_MAXCPU; i++) { + VERIFY3P(vmx->msr_bitmap[i], !=, NULL); + kmem_free(vmx->msr_bitmap[i], PAGESIZE); + vmx->msr_bitmap[i] = NULL; + } +} + +void +vmx_msr_bitmap_change_access(struct vmx *vmx, int vcpuid, uint_t msr, int acc) { + uint8_t *bitmap = vmx->msr_bitmap[vcpuid]; int byte, bit; - if (msr <= 0x00001FFF) + if (msr <= 0x00001FFF) { byte = msr / 8; - else if (msr >= 0xC0000000 && msr <= 0xC0001FFF) + } else if (msr >= 0xC0000000 && msr <= 0xC0001FFF) { byte = 1024 + (msr - 0xC0000000) / 8; - else - return (EINVAL); + } else { + panic("Invalid MSR for bitmap: %x", msr); + } bit = msr & 0x7; - if (access & MSR_BITMAP_ACCESS_READ) + if (acc & MSR_BITMAP_ACCESS_READ) { bitmap[byte] &= ~(1 << bit); - else + } else { bitmap[byte] |= 1 << bit; + } byte += 2048; - if (access & MSR_BITMAP_ACCESS_WRITE) + if (acc & MSR_BITMAP_ACCESS_WRITE) { bitmap[byte] &= ~(1 << bit); - else + } else { bitmap[byte] |= 1 << bit; - - return (0); + } } static uint64_t misc_enable; @@ -306,21 +326,46 @@ vmx_msr_init(void) void vmx_msr_guest_init(struct vmx *vmx, int vcpuid) { - uint64_t *guest_msrs; + uint64_t *guest_msrs = vmx->guest_msrs[vcpuid]; - guest_msrs = vmx->guest_msrs[vcpuid]; + /* + * It is safe to allow direct access to MSR_GSBASE and + * MSR_FSBASE. The guest FSBASE and GSBASE are saved and + * restored during vm-exit and vm-entry respectively. The host + * FSBASE and GSBASE are always restored from the vmcs host + * state area on vm-exit. + * + * The SYSENTER_CS/ESP/EIP MSRs are identical to FS/GSBASE in + * how they are saved/restored so can be directly accessed by + * the guest. + * + * MSR_EFER is saved and restored in the guest VMCS area on a VM + * exit and entry respectively. It is also restored from the + * host VMCS area on a VM exit. + * + * The TSC MSR is exposed read-only. Writes are disallowed as + * that will impact the host TSC. If the guest does a write the + * "use TSC offsetting" execution control is enabled and the + * difference between the host TSC and the guest TSC is written + * into the TSC offset in the VMCS. + */ + guest_msr_rw(vmx, vcpuid, MSR_GSBASE); + guest_msr_rw(vmx, vcpuid, MSR_FSBASE); + guest_msr_rw(vmx, vcpuid, MSR_SYSENTER_CS_MSR); + guest_msr_rw(vmx, vcpuid, MSR_SYSENTER_ESP_MSR); + guest_msr_rw(vmx, vcpuid, MSR_SYSENTER_EIP_MSR); + guest_msr_rw(vmx, vcpuid, MSR_EFER); + guest_msr_ro(vmx, vcpuid, MSR_TSC); /* - * The permissions bitmap is shared between all vcpus so initialize it - * once when initializing the vBSP. + * The guest may have direct access to these MSRs as they are + * saved/restored in vmx_msr_guest_enter() and vmx_msr_guest_exit(). */ - if (vcpuid == 0) { - guest_msr_rw(vmx, MSR_LSTAR); - guest_msr_rw(vmx, MSR_CSTAR); - guest_msr_rw(vmx, MSR_STAR); - guest_msr_rw(vmx, MSR_SF_MASK); - guest_msr_rw(vmx, MSR_KGSBASE); - } + guest_msr_rw(vmx, vcpuid, MSR_LSTAR); + guest_msr_rw(vmx, vcpuid, MSR_CSTAR); + guest_msr_rw(vmx, vcpuid, MSR_STAR); + guest_msr_rw(vmx, vcpuid, MSR_SF_MASK); + guest_msr_rw(vmx, vcpuid, MSR_KGSBASE); /* * Initialize guest IA32_PAT MSR with default value after reset. diff --git a/usr/src/uts/i86pc/io/vmm/intel/vmx_msr.h b/usr/src/uts/i86pc/io/vmm/intel/vmx_msr.h index 4bc43d74f9..5df9fc631d 100644 --- a/usr/src/uts/i86pc/io/vmm/intel/vmx_msr.h +++ b/usr/src/uts/i86pc/io/vmm/intel/vmx_msr.h @@ -27,6 +27,9 @@ * * $FreeBSD$ */ +/* + * Copyright 2021 Oxide Computer Company + */ #ifndef _VMX_MSR_H_ #define _VMX_MSR_H_ @@ -58,13 +61,14 @@ int vmx_set_ctlreg(int ctl_reg, int true_ctl_reg, uint32_t ones_mask, #define MSR_BITMAP_ACCESS_READ 0x1 #define MSR_BITMAP_ACCESS_WRITE 0x2 #define MSR_BITMAP_ACCESS_RW (MSR_BITMAP_ACCESS_READ|MSR_BITMAP_ACCESS_WRITE) -void msr_bitmap_initialize(char *bitmap); -int msr_bitmap_change_access(char *bitmap, uint_t msr, int access); +void vmx_msr_bitmap_initialize(struct vmx *); +void vmx_msr_bitmap_destroy(struct vmx *); +void vmx_msr_bitmap_change_access(struct vmx *, int, uint_t, int); -#define guest_msr_rw(vmx, msr) \ - msr_bitmap_change_access((vmx)->msr_bitmap, (msr), MSR_BITMAP_ACCESS_RW) +#define guest_msr_rw(vmx, vcpuid, msr) \ + vmx_msr_bitmap_change_access((vmx), (vcpuid), (msr), MSR_BITMAP_ACCESS_RW) -#define guest_msr_ro(vmx, msr) \ - msr_bitmap_change_access((vmx)->msr_bitmap, (msr), MSR_BITMAP_ACCESS_READ) +#define guest_msr_ro(vmx, vcpuid, msr) \ + vmx_msr_bitmap_change_access((vmx), (vcpuid), (msr), MSR_BITMAP_ACCESS_READ) #endif |
