summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPatrick Mooney <pmooney@pfmooney.com>2020-08-19 02:31:33 +0000
committerPatrick Mooney <pmooney@oxide.computer>2020-08-28 18:28:42 +0000
commit007ca33219ffdc49281657f5f8a9ee1bbfc367ab (patch)
tree0fb12165e1b770bdfb1c11900885fe3b0087f115
parent638bc9f013400030354ab6566ae2a5726f7580fa (diff)
downloadillumos-joyent-007ca33219ffdc49281657f5f8a9ee1bbfc367ab.tar.gz
13063 clarify VMCS interfaces in bhyve
13064 bhyve should not waste extra page for VMCS Reviewed by: Robert Mustacchi <rm@fingolfin.org> Reviewed by: Mike Zeller <mike.zeller@joyent.com> Approved by: Dan McDonald <danmcd@joyent.com>
-rw-r--r--usr/src/cmd/bhyve/bhyverun.c5
-rw-r--r--usr/src/cmd/bhyvectl/bhyvectl.c23
-rw-r--r--usr/src/uts/i86pc/io/vmm/intel/ept.c20
-rw-r--r--usr/src/uts/i86pc/io/vmm/intel/offsets.in1
-rw-r--r--usr/src/uts/i86pc/io/vmm/intel/vmcs.c448
-rw-r--r--usr/src/uts/i86pc/io/vmm/intel/vmcs.h125
-rw-r--r--usr/src/uts/i86pc/io/vmm/intel/vmx.c579
-rw-r--r--usr/src/uts/i86pc/io/vmm/intel/vmx.h28
-rw-r--r--usr/src/uts/i86pc/io/vmm/intel/vmx_cpufunc.h244
-rw-r--r--usr/src/uts/i86pc/io/vmm/intel/vmx_msr.c7
-rw-r--r--usr/src/uts/i86pc/io/vmm/intel/vmx_msr.h2
11 files changed, 425 insertions, 1057 deletions
diff --git a/usr/src/cmd/bhyve/bhyverun.c b/usr/src/cmd/bhyve/bhyverun.c
index d2a4032682..8b7166ce5e 100644
--- a/usr/src/cmd/bhyve/bhyverun.c
+++ b/usr/src/cmd/bhyve/bhyverun.c
@@ -737,7 +737,12 @@ vmexit_spinup_ap(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu)
return (VMEXIT_CONTINUE);
}
+#ifdef __FreeBSD__
#define DEBUG_EPT_MISCONFIG
+#else
+/* EPT misconfig debugging not possible now that raw VMCS access is gone */
+#endif
+
#ifdef DEBUG_EPT_MISCONFIG
#define VMCS_GUEST_PHYSICAL_ADDRESS 0x00002400
diff --git a/usr/src/cmd/bhyvectl/bhyvectl.c b/usr/src/cmd/bhyvectl/bhyvectl.c
index 22c72cf5df..e0041ede30 100644
--- a/usr/src/cmd/bhyvectl/bhyvectl.c
+++ b/usr/src/cmd/bhyvectl/bhyvectl.c
@@ -400,6 +400,7 @@ dump_vm_run_exitcode(struct vm_exit *vmexit, int vcpu)
#define MSR_AMD7TH_START 0xC0010000
#define MSR_AMD7TH_END 0xC0011FFF
+#ifdef __FreeBSD__
static const char *
msr_name(uint32_t msr)
{
@@ -563,6 +564,21 @@ vm_set_vmcs_field(struct vmctx *ctx, int vcpu, int field, uint64_t val)
return (vm_set_register(ctx, vcpu, VMCS_IDENT(field), val));
}
+#else /* __FreeBSD__ */
+/* VMCS does not allow arbitrary reads/writes */
+static int
+vm_get_vmcs_field(struct vmctx *ctx, int vcpu, int field, uint64_t *ret_val)
+{
+ *ret_val = 0;
+ return (0);
+}
+
+static int
+vm_set_vmcs_field(struct vmctx *ctx, int vcpu, int field, uint64_t val)
+{
+ return (EINVAL);
+}
+#endif /* __FreeBSD__ */
static int
vm_get_vmcb_field(struct vmctx *ctx, int vcpu, int off, int bytes,
@@ -2182,8 +2198,15 @@ main(int argc, char *argv[])
&addr);
}
+#ifdef __FreeBSD__
if (error == 0)
error = dump_msr_bitmap(vcpu, addr, cpu_intel);
+#else
+ /*
+ * Skip dumping the MSR bitmap since raw access to the VMCS is
+ * currently not possible.
+ */
+#endif /* __FreeBSD__ */
}
if (!error && (get_vpid_asid || get_all)) {
diff --git a/usr/src/uts/i86pc/io/vmm/intel/ept.c b/usr/src/uts/i86pc/io/vmm/intel/ept.c
index 5e5253780e..5e3bd6d309 100644
--- a/usr/src/uts/i86pc/io/vmm/intel/ept.c
+++ b/usr/src/uts/i86pc/io/vmm/intel/ept.c
@@ -59,7 +59,6 @@ __FBSDID("$FreeBSD$");
#include <machine/vmm.h>
-#include "vmx_cpufunc.h"
#include "ept.h"
#define EPT_SUPPORTS_EXEC_ONLY(cap) ((cap) & (1UL << 0))
@@ -171,31 +170,12 @@ ept_dump(uint64_t *ptp, int nlevels)
}
#endif
-#ifdef __FreeBSD__
-static void
-invept_single_context(void *arg)
-{
- struct invept_desc desc = *(struct invept_desc *)arg;
-
- invept(INVEPT_TYPE_SINGLE_CONTEXT, desc);
-}
-
-void
-ept_invalidate_mappings(u_long eptp)
-{
- struct invept_desc invept_desc = { 0 };
- invept_desc.eptp = eptp;
-
- smp_rendezvous(NULL, invept_single_context, NULL, &invept_desc);
-}
-#else /* __FreeBSD__ */
void
ept_invalidate_mappings(u_long eptp)
{
hma_vmx_invept_allcpus((uintptr_t)eptp);
}
-#endif /* __FreeBSD__ */
static int
ept_pinit(pmap_t pmap)
diff --git a/usr/src/uts/i86pc/io/vmm/intel/offsets.in b/usr/src/uts/i86pc/io/vmm/intel/offsets.in
index cc041eaefc..ca7f967f3b 100644
--- a/usr/src/uts/i86pc/io/vmm/intel/offsets.in
+++ b/usr/src/uts/i86pc/io/vmm/intel/offsets.in
@@ -22,7 +22,6 @@
#include <machine/pmap.h>
#include <machine/vmm.h>
-#include "intel/vmx_cpufunc.h"
#include "intel/vmx.h"
#include "vm/vm_glue.h"
diff --git a/usr/src/uts/i86pc/io/vmm/intel/vmcs.c b/usr/src/uts/i86pc/io/vmm/intel/vmcs.c
index f1a08cc57d..51ae5fbd0c 100644
--- a/usr/src/uts/i86pc/io/vmm/intel/vmcs.c
+++ b/usr/src/uts/i86pc/io/vmm/intel/vmcs.c
@@ -39,59 +39,24 @@
*
* Copyright 2014 Pluribus Networks Inc.
* Copyright 2017 Joyent, Inc.
+ * Copyright 2020 Oxide Computer Company
*/
-#ifdef __FreeBSD__
-#include "opt_ddb.h"
-#endif
-
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
-#include <sys/sysctl.h>
#include <sys/systm.h>
-#include <sys/pcpu.h>
#include <vm/vm.h>
-#include <vm/pmap.h>
-#include <machine/segments.h>
#include <machine/vmm.h>
-#include "vmm_host.h"
-#include "vmx_cpufunc.h"
-#include "vmcs.h"
-#include "ept.h"
#include "vmx.h"
-#ifdef DDB
-#include <ddb/ddb.h>
-#endif
-
-SYSCTL_DECL(_hw_vmm_vmx);
-
-static int no_flush_rsb;
-SYSCTL_INT(_hw_vmm_vmx, OID_AUTO, no_flush_rsb, CTLFLAG_RW,
- &no_flush_rsb, 0, "Do not flush RSB upon vmexit");
-
-static uint64_t
-vmcs_fix_regval(uint32_t encoding, uint64_t val)
-{
-
- switch (encoding) {
- case VMCS_GUEST_CR0:
- val = vmx_fix_cr0(val);
- break;
- case VMCS_GUEST_CR4:
- val = vmx_fix_cr4(val);
- break;
- default:
- break;
- }
- return (val);
-}
+/* Bits 0-30 of VMX_BASIC MSR contain VMCS revision identifier */
+#define VMX_BASIC_REVISION(v) ((v) & 0x7fffffff)
-static uint32_t
+uint32_t
vmcs_field_encoding(int ident)
{
switch (ident) {
@@ -138,15 +103,13 @@ vmcs_field_encoding(int ident)
case VM_REG_GUEST_ENTRY_INST_LENGTH:
return (VMCS_ENTRY_INST_LENGTH);
default:
- return (-1);
+ return (VMCS_INVALID_ENCODING);
}
-
}
-static int
+void
vmcs_seg_desc_encoding(int seg, uint32_t *base, uint32_t *lim, uint32_t *acc)
{
-
switch (seg) {
case VM_REG_GUEST_ES:
*base = VMCS_GUEST_ES_BASE;
@@ -199,364 +162,111 @@ vmcs_seg_desc_encoding(int seg, uint32_t *base, uint32_t *lim, uint32_t *acc)
*acc = VMCS_INVALID_ENCODING;
break;
default:
- return (EINVAL);
+ panic("invalid segment register %d", seg);
}
-
- return (0);
}
-int
-vmcs_getreg(struct vmcs *vmcs, int running, int ident, uint64_t *retval)
+void
+vmcs_clear(uintptr_t vmcs_pa)
{
- int error;
- uint32_t encoding;
-
- /*
- * If we need to get at vmx-specific state in the VMCS we can bypass
- * the translation of 'ident' to 'encoding' by simply setting the
- * sign bit. As it so happens the upper 16 bits are reserved (i.e
- * set to 0) in the encodings for the VMCS so we are free to use the
- * sign bit.
- */
- if (ident < 0)
- encoding = ident & 0x7fffffff;
- else
- encoding = vmcs_field_encoding(ident);
-
- if (encoding == (uint32_t)-1)
- return (EINVAL);
+ int err;
- if (!running)
- VMPTRLD(vmcs);
+ __asm __volatile("vmclear %[addr];"
+ VMX_SET_ERROR_CODE_ASM
+ : [error] "=r" (err)
+ : [addr] "m" (vmcs_pa)
+ : "memory");
- error = vmread(encoding, retval);
-
- if (!running)
- VMCLEAR(vmcs);
-
- return (error);
-}
-
-int
-vmcs_setreg(struct vmcs *vmcs, int running, int ident, uint64_t val)
-{
- int error;
- uint32_t encoding;
-
- if (ident < 0)
- encoding = ident & 0x7fffffff;
- else
- encoding = vmcs_field_encoding(ident);
-
- if (encoding == (uint32_t)-1)
- return (EINVAL);
-
- val = vmcs_fix_regval(encoding, val);
-
- if (!running)
- VMPTRLD(vmcs);
-
- error = vmwrite(encoding, val);
-
- if (!running)
- VMCLEAR(vmcs);
-
- return (error);
-}
-
-int
-vmcs_setdesc(struct vmcs *vmcs, int running, int seg, struct seg_desc *desc)
-{
- int error;
- uint32_t base, limit, access;
-
- error = vmcs_seg_desc_encoding(seg, &base, &limit, &access);
- if (error != 0)
- panic("vmcs_setdesc: invalid segment register %d", seg);
-
- if (!running)
- VMPTRLD(vmcs);
- if ((error = vmwrite(base, desc->base)) != 0)
- goto done;
-
- if ((error = vmwrite(limit, desc->limit)) != 0)
- goto done;
-
- if (access != VMCS_INVALID_ENCODING) {
- if ((error = vmwrite(access, desc->access)) != 0)
- goto done;
+ if (err != 0) {
+ panic("vmclear(%p) error %d", vmcs_pa, err);
}
-done:
- if (!running)
- VMCLEAR(vmcs);
- return (error);
-}
-
-int
-vmcs_getdesc(struct vmcs *vmcs, int running, int seg, struct seg_desc *desc)
-{
- int error;
- uint32_t base, limit, access;
- uint64_t u64;
-
- error = vmcs_seg_desc_encoding(seg, &base, &limit, &access);
- if (error != 0)
- panic("vmcs_getdesc: invalid segment register %d", seg);
- if (!running)
- VMPTRLD(vmcs);
- if ((error = vmread(base, &u64)) != 0)
- goto done;
- desc->base = u64;
-
- if ((error = vmread(limit, &u64)) != 0)
- goto done;
- desc->limit = u64;
-
- if (access != VMCS_INVALID_ENCODING) {
- if ((error = vmread(access, &u64)) != 0)
- goto done;
- desc->access = u64;
- }
-done:
- if (!running)
- VMCLEAR(vmcs);
- return (error);
+ /*
+ * A call to critical_enter() was made in vmcs_load() to prevent
+ * preemption. Now that the VMCS is unloaded, it is safe to relax that
+ * restriction.
+ */
+ critical_exit();
}
-int
-vmcs_set_msr_save(struct vmcs *vmcs, u_long g_area, u_int g_count)
+void
+vmcs_initialize(struct vmcs *vmcs, uintptr_t vmcs_pa)
{
- int error;
+ int err;
- VMPTRLD(vmcs);
+ /* set to VMCS revision */
+ vmcs->identifier = VMX_BASIC_REVISION(rdmsr(MSR_VMX_BASIC));
/*
- * Guest MSRs are saved in the VM-exit MSR-store area.
- * Guest MSRs are loaded from the VM-entry MSR-load area.
- * Both areas point to the same location in memory.
+ * Perform a vmclear on the VMCS, but without the critical section
+ * manipulation as done by vmcs_clear() above.
*/
- if ((error = vmwrite(VMCS_EXIT_MSR_STORE, g_area)) != 0)
- goto done;
- if ((error = vmwrite(VMCS_EXIT_MSR_STORE_COUNT, g_count)) != 0)
- goto done;
-
- if ((error = vmwrite(VMCS_ENTRY_MSR_LOAD, g_area)) != 0)
- goto done;
- if ((error = vmwrite(VMCS_ENTRY_MSR_LOAD_COUNT, g_count)) != 0)
- goto done;
-
- error = 0;
-done:
- VMCLEAR(vmcs);
- return (error);
+ __asm __volatile("vmclear %[addr];"
+ VMX_SET_ERROR_CODE_ASM
+ : [error] "=r" (err)
+ : [addr] "m" (vmcs_pa)
+ : "memory");
+
+ if (err != 0) {
+ panic("vmclear(%p) error %d", vmcs_pa, err);
+ }
}
-int
-vmcs_init(struct vmcs *vmcs)
+void
+vmcs_load(uintptr_t vmcs_pa)
{
- int error, codesel, datasel, tsssel;
- u_long cr0, cr4, efer;
- uint64_t pat;
-#ifdef __FreeBSD__
- uint64_t fsbase, idtrbase;
-#endif
-
- codesel = vmm_get_host_codesel();
- datasel = vmm_get_host_datasel();
- tsssel = vmm_get_host_tsssel();
+ int err;
/*
- * Make sure we have a "current" VMCS to work with.
+ * While the VMCS is loaded on the CPU for subsequent operations, it is
+ * important that the thread not be preempted. That is ensured with
+ * critical_enter() here, with a matching critical_exit() call in
+ * vmcs_clear() once the VMCS is unloaded.
*/
- VMPTRLD(vmcs);
-
- /* Host state */
-
- /* Initialize host IA32_PAT MSR */
- pat = vmm_get_host_pat();
- if ((error = vmwrite(VMCS_HOST_IA32_PAT, pat)) != 0)
- goto done;
+ critical_enter();
- /* Load the IA32_EFER MSR */
- efer = vmm_get_host_efer();
- if ((error = vmwrite(VMCS_HOST_IA32_EFER, efer)) != 0)
- goto done;
+ __asm __volatile("vmptrld %[addr];"
+ VMX_SET_ERROR_CODE_ASM
+ : [error] "=r" (err)
+ : [addr] "m" (vmcs_pa)
+ : "memory");
- /* Load the control registers */
-
- cr0 = vmm_get_host_cr0();
- if ((error = vmwrite(VMCS_HOST_CR0, cr0)) != 0)
- goto done;
-
- cr4 = vmm_get_host_cr4() | CR4_VMXE;
- if ((error = vmwrite(VMCS_HOST_CR4, cr4)) != 0)
- goto done;
-
- /* Load the segment selectors */
- if ((error = vmwrite(VMCS_HOST_ES_SELECTOR, datasel)) != 0)
- goto done;
-
- if ((error = vmwrite(VMCS_HOST_CS_SELECTOR, codesel)) != 0)
- goto done;
-
- if ((error = vmwrite(VMCS_HOST_SS_SELECTOR, datasel)) != 0)
- goto done;
-
- if ((error = vmwrite(VMCS_HOST_DS_SELECTOR, datasel)) != 0)
- goto done;
-
-#ifdef __FreeBSD__
- if ((error = vmwrite(VMCS_HOST_FS_SELECTOR, datasel)) != 0)
- goto done;
-
- if ((error = vmwrite(VMCS_HOST_GS_SELECTOR, datasel)) != 0)
- goto done;
-#else
- if ((error = vmwrite(VMCS_HOST_FS_SELECTOR, vmm_get_host_fssel())) != 0)
- goto done;
-
- if ((error = vmwrite(VMCS_HOST_GS_SELECTOR, vmm_get_host_gssel())) != 0)
- goto done;
-#endif
-
- if ((error = vmwrite(VMCS_HOST_TR_SELECTOR, tsssel)) != 0)
- goto done;
-
-#ifdef __FreeBSD__
- /*
- * Load the Base-Address for %fs and idtr.
- *
- * Note that we exclude %gs, tss and gdtr here because their base
- * address is pcpu specific.
- */
- fsbase = vmm_get_host_fsbase();
- if ((error = vmwrite(VMCS_HOST_FS_BASE, fsbase)) != 0)
- goto done;
-
- idtrbase = vmm_get_host_idtrbase();
- if ((error = vmwrite(VMCS_HOST_IDTR_BASE, idtrbase)) != 0)
- goto done;
+ if (err != 0) {
+ panic("vmptrld(%p) error %d", vmcs_pa, err);
+ }
+}
-#else /* __FreeBSD__ */
- /*
- * Configure host sysenter MSRs to be restored on VM exit.
- * The thread-specific MSR_INTC_SEP_ESP value is loaded in vmx_run.
- */
- if ((error = vmwrite(VMCS_HOST_IA32_SYSENTER_CS, KCS_SEL)) != 0)
- goto done;
- /* Natively defined as MSR_INTC_SEP_EIP */
- if ((error = vmwrite(VMCS_HOST_IA32_SYSENTER_EIP,
- rdmsr(MSR_SYSENTER_EIP_MSR))) != 0)
- goto done;
+uint64_t
+vmcs_read(uint32_t encoding)
+{
+ int error;
+ uint64_t val;
-#endif /* __FreeBSD__ */
+ __asm __volatile("vmread %[enc], %[val];"
+ VMX_SET_ERROR_CODE_ASM
+ : [error] "=r" (error), [val] "=r" (val)
+ : [enc] "r" ((uint64_t)encoding)
+ : "memory");
- /* instruction pointer */
- if (no_flush_rsb) {
- if ((error = vmwrite(VMCS_HOST_RIP,
- (u_long)vmx_exit_guest)) != 0)
- goto done;
- } else {
- if ((error = vmwrite(VMCS_HOST_RIP,
- (u_long)vmx_exit_guest_flush_rsb)) != 0)
- goto done;
+ if (error != 0) {
+ panic("vmread(%x) error %d", encoding, error);
}
- /* link pointer */
- if ((error = vmwrite(VMCS_LINK_POINTER, ~0)) != 0)
- goto done;
-done:
- VMCLEAR(vmcs);
- return (error);
+ return (val);
}
-#ifdef DDB
-extern int vmxon_enabled[];
-
-DB_SHOW_COMMAND(vmcs, db_show_vmcs)
+void
+vmcs_write(uint32_t encoding, uint64_t val)
{
- uint64_t cur_vmcs, val;
- uint32_t exit;
-
- if (!vmxon_enabled[curcpu]) {
- db_printf("VMX not enabled\n");
- return;
- }
+ int error;
- if (have_addr) {
- db_printf("Only current VMCS supported\n");
- return;
- }
+ __asm __volatile("vmwrite %[val], %[enc];"
+ VMX_SET_ERROR_CODE_ASM
+ : [error] "=r" (error)
+ : [val] "r" (val), [enc] "r" ((uint64_t)encoding)
+ : "memory");
- vmptrst(&cur_vmcs);
- if (cur_vmcs == VMCS_INITIAL) {
- db_printf("No current VM context\n");
- return;
- }
- db_printf("VMCS: %jx\n", cur_vmcs);
- db_printf("VPID: %lu\n", vmcs_read(VMCS_VPID));
- db_printf("Activity: ");
- val = vmcs_read(VMCS_GUEST_ACTIVITY);
- switch (val) {
- case 0:
- db_printf("Active");
- break;
- case 1:
- db_printf("HLT");
- break;
- case 2:
- db_printf("Shutdown");
- break;
- case 3:
- db_printf("Wait for SIPI");
- break;
- default:
- db_printf("Unknown: %#lx", val);
- }
- db_printf("\n");
- exit = vmcs_read(VMCS_EXIT_REASON);
- if (exit & 0x80000000)
- db_printf("Entry Failure Reason: %u\n", exit & 0xffff);
- else
- db_printf("Exit Reason: %u\n", exit & 0xffff);
- db_printf("Qualification: %#lx\n", vmcs_exit_qualification());
- db_printf("Guest Linear Address: %#lx\n",
- vmcs_read(VMCS_GUEST_LINEAR_ADDRESS));
- switch (exit & 0x8000ffff) {
- case EXIT_REASON_EXCEPTION:
- case EXIT_REASON_EXT_INTR:
- val = vmcs_read(VMCS_EXIT_INTR_INFO);
- db_printf("Interrupt Type: ");
- switch (val >> 8 & 0x7) {
- case 0:
- db_printf("external");
- break;
- case 2:
- db_printf("NMI");
- break;
- case 3:
- db_printf("HW exception");
- break;
- case 4:
- db_printf("SW exception");
- break;
- default:
- db_printf("?? %lu", val >> 8 & 0x7);
- break;
- }
- db_printf(" Vector: %lu", val & 0xff);
- if (val & 0x800)
- db_printf(" Error Code: %lx",
- vmcs_read(VMCS_EXIT_INTR_ERRCODE));
- db_printf("\n");
- break;
- case EXIT_REASON_EPT_FAULT:
- case EXIT_REASON_EPT_MISCONFIG:
- db_printf("Guest Physical Address: %#lx\n",
- vmcs_read(VMCS_GUEST_PHYSICAL_ADDRESS));
- break;
+ if (error != 0) {
+ panic("vmwrite(%x, %x) error %d", encoding, val, error);
}
- db_printf("VM-instruction error: %#lx\n", vmcs_instruction_error());
}
-#endif
diff --git a/usr/src/uts/i86pc/io/vmm/intel/vmcs.h b/usr/src/uts/i86pc/io/vmm/intel/vmcs.h
index edde5c6dd5..1713872556 100644
--- a/usr/src/uts/i86pc/io/vmm/intel/vmcs.h
+++ b/usr/src/uts/i86pc/io/vmm/intel/vmcs.h
@@ -30,6 +30,7 @@
/*
* Copyright 2017 Joyent, Inc.
+ * Copyright 2020 Oxide Computer Company
*/
#ifndef _VMCS_H_
@@ -41,125 +42,20 @@ struct vmcs {
uint32_t identifier;
uint32_t abort_code;
char _impl_specific[PAGE_SIZE - sizeof(uint32_t) * 2];
-#ifndef __FreeBSD__
- /*
- * Keep the physical address of the VMCS cached adjacent for the
- * structure so it can be referenced in contexts which are too delicate
- * for a call into the HAT. For the moment it means wasting a whole
- * page on padding for the PA value to maintain alignment, but it
- * allows the consumers of 'struct vmcs *' to easily access the value
- * without a significant change to the interface.
- */
- uint64_t vmcs_pa;
- char _pa_pad[PAGE_SIZE - sizeof (vm_paddr_t)];
-#endif
};
-#ifdef __FreeBSD__
-CTASSERT(sizeof(struct vmcs) == PAGE_SIZE);
-#else
-CTASSERT(sizeof(struct vmcs) == (2*PAGE_SIZE));
-#endif
+CTASSERT(sizeof (struct vmcs) == PAGE_SIZE);
-/* MSR save region is composed of an array of 'struct msr_entry' */
-struct msr_entry {
- uint32_t index;
- uint32_t reserved;
- uint64_t val;
+uint32_t vmcs_field_encoding(int ident);
+void vmcs_seg_desc_encoding(int seg, uint32_t *base, uint32_t *lim,
+ uint32_t *acc);
-};
+void vmcs_initialize(struct vmcs *vmcs, uintptr_t vmcs_pa);
-int vmcs_set_msr_save(struct vmcs *vmcs, u_long g_area, u_int g_count);
-int vmcs_init(struct vmcs *vmcs);
-int vmcs_getreg(struct vmcs *vmcs, int running, int ident, uint64_t *rv);
-int vmcs_setreg(struct vmcs *vmcs, int running, int ident, uint64_t val);
-int vmcs_getdesc(struct vmcs *vmcs, int running, int ident,
- struct seg_desc *desc);
-int vmcs_setdesc(struct vmcs *vmcs, int running, int ident,
- struct seg_desc *desc);
+void vmcs_load(uintptr_t vmcs_pa);
+void vmcs_clear(uintptr_t vmcs_pa);
-/*
- * Avoid header pollution caused by inline use of 'vtophys()' in vmx_cpufunc.h
- */
-#ifdef _VMX_CPUFUNC_H_
-static __inline uint64_t
-vmcs_read(uint32_t encoding)
-{
- int error;
- uint64_t val;
-
- error = vmread(encoding, &val);
- KASSERT(error == 0, ("vmcs_read(%u) error %d", encoding, error));
- return (val);
-}
-
-static __inline void
-vmcs_write(uint32_t encoding, uint64_t val)
-{
- int error;
-
- error = vmwrite(encoding, val);
- KASSERT(error == 0, ("vmcs_write(%u) error %d", encoding, error));
-}
-
-#ifndef __FreeBSD__
-/*
- * Due to header complexity combined with the need to cache the physical
- * address for the VMCS, these must be defined here rather than vmx_cpufunc.h.
- */
-static __inline int
-vmclear(struct vmcs *vmcs)
-{
- int error;
- uint64_t addr = vmcs->vmcs_pa;
-
- __asm __volatile("vmclear %[addr];"
- VMX_SET_ERROR_CODE
- : [error] "=r" (error)
- : [addr] "m" (*(uint64_t *)&addr)
- : "memory");
- return (error);
-}
-
-static __inline int
-vmptrld(struct vmcs *vmcs)
-{
- int error;
- uint64_t addr = vmcs->vmcs_pa;
-
- __asm __volatile("vmptrld %[addr];"
- VMX_SET_ERROR_CODE
- : [error] "=r" (error)
- : [addr] "m" (*(uint64_t *)&addr)
- : "memory");
- return (error);
-}
-
-static __inline void
-VMCLEAR(struct vmcs *vmcs)
-{
- int err;
-
- err = vmclear(vmcs);
- if (err != 0)
- panic("%s: vmclear(%p) error %d", __func__, vmcs, err);
-
- critical_exit();
-}
-
-static __inline void
-VMPTRLD(struct vmcs *vmcs)
-{
- int err;
-
- critical_enter();
-
- err = vmptrld(vmcs);
- if (err != 0)
- panic("%s: vmptrld(%p) error %d", __func__, vmcs, err);
-}
-#endif /* __FreeBSD__ */
-
-#endif /* _VMX_CPUFUNC_H_ */
+uint64_t vmcs_read(uint32_t encoding);
+void vmcs_write(uint32_t encoding, uint64_t val);
#define vmexit_instruction_length() vmcs_read(VMCS_EXIT_INSTRUCTION_LENGTH)
#define vmcs_guest_rip() vmcs_read(VMCS_GUEST_RIP)
@@ -177,7 +73,6 @@ VMPTRLD(struct vmcs *vmcs)
#define VMCS_INITIAL 0xffffffffffffffff
-#define VMCS_IDENT(encoding) ((encoding) | 0x80000000)
/*
* VMCS field encodings from Appendix H, Intel Architecture Manual Vol3B.
*/
diff --git a/usr/src/uts/i86pc/io/vmm/intel/vmx.c b/usr/src/uts/i86pc/io/vmm/intel/vmx.c
index d07e57aa7f..0af56c34e8 100644
--- a/usr/src/uts/i86pc/io/vmm/intel/vmx.c
+++ b/usr/src/uts/i86pc/io/vmm/intel/vmx.c
@@ -88,7 +88,6 @@ __FBSDID("$FreeBSD$");
#include "vlapic_priv.h"
#include "ept.h"
-#include "vmx_cpufunc.h"
#include "vmcs.h"
#include "vmx.h"
#include "vmx_msr.h"
@@ -172,11 +171,6 @@ SYSCTL_DECL(_hw_vmm);
SYSCTL_NODE(_hw_vmm, OID_AUTO, vmx, CTLFLAG_RW | CTLFLAG_MPSAFE, NULL,
NULL);
-#ifdef __FreeBSD__
-int vmxon_enabled[MAXCPU];
-static char vmxon_region[MAXCPU][PAGE_SIZE] __aligned(PAGE_SIZE);
-#endif /*__FreeBSD__ */
-
static uint32_t pinbased_ctls, procbased_ctls, procbased_ctls2;
static uint32_t exit_ctls, entry_ctls;
@@ -196,10 +190,15 @@ static int vmx_initialized;
SYSCTL_INT(_hw_vmm_vmx, OID_AUTO, initialized, CTLFLAG_RD,
&vmx_initialized, 0, "Intel VMX initialized");
+static int no_flush_rsb;
+SYSCTL_INT(_hw_vmm_vmx, OID_AUTO, no_flush_rsb, CTLFLAG_RW,
+ &no_flush_rsb, 0, "Do not flush RSB upon vmexit");
+
/*
* Optional capabilities
*/
#ifdef __FreeBSD__
+SYSCTL_DECL(_hw_vmm_vmx);
static SYSCTL_NODE(_hw_vmm_vmx, OID_AUTO, cap,
CTLFLAG_RW | CTLFLAG_MPSAFE, NULL,
NULL);
@@ -242,6 +241,13 @@ int guest_l1d_flush_sw;
SYSCTL_INT(_hw_vmm_vmx, OID_AUTO, l1d_flush_sw, CTLFLAG_RD,
&guest_l1d_flush_sw, 0, NULL);
+/* MSR save region is composed of an array of 'struct msr_entry' */
+struct msr_entry {
+ uint32_t index;
+ uint32_t reserved;
+ uint64_t val;
+};
+
static struct msr_entry msr_load_list[1] __aligned(16);
/*
@@ -332,11 +338,8 @@ SDT_PROBE_DEFINE4(vmm, vmx, exit, return,
static int vmx_getdesc(void *arg, int vcpu, int reg, struct seg_desc *desc);
static int vmx_getreg(void *arg, int vcpu, int reg, uint64_t *retval);
-static int vmxctx_setreg(struct vmxctx *vmxctx, int reg, uint64_t val);
static void vmx_inject_pir(struct vlapic *vlapic);
-#ifndef __FreeBSD__
-static int vmx_apply_tsc_adjust(struct vmx *, int);
-#endif /* __FreeBSD__ */
+static void vmx_apply_tsc_adjust(struct vmx *, int);
#ifdef KTR
static const char *
@@ -506,17 +509,15 @@ vmx_allow_x2apic_msrs(struct vmx *vmx)
return (error);
}
-u_long
+static u_long
vmx_fix_cr0(u_long cr0)
{
-
return ((cr0 | cr0_ones_mask) & ~cr0_zeros_mask);
}
-u_long
+static u_long
vmx_fix_cr4(u_long cr4)
{
-
return ((cr4 | cr4_ones_mask) & ~cr4_zeros_mask);
}
@@ -847,45 +848,12 @@ vmx_trigger_hostintr(int vector)
#endif /* __FreeBSD__ */
}
-static int
-vmx_setup_cr_shadow(int which, struct vmcs *vmcs, uint32_t initial)
-{
- int error, mask_ident, shadow_ident;
- uint64_t mask_value;
-
- if (which != 0 && which != 4)
- panic("vmx_setup_cr_shadow: unknown cr%d", which);
-
- if (which == 0) {
- mask_ident = VMCS_CR0_MASK;
- mask_value = cr0_ones_mask | cr0_zeros_mask;
- shadow_ident = VMCS_CR0_SHADOW;
- } else {
- mask_ident = VMCS_CR4_MASK;
- mask_value = cr4_ones_mask | cr4_zeros_mask;
- shadow_ident = VMCS_CR4_SHADOW;
- }
-
- error = vmcs_setreg(vmcs, 0, VMCS_IDENT(mask_ident), mask_value);
- if (error)
- return (error);
-
- error = vmcs_setreg(vmcs, 0, VMCS_IDENT(shadow_ident), initial);
- if (error)
- return (error);
-
- return (0);
-}
-#define vmx_setup_cr0_shadow(vmcs,init) vmx_setup_cr_shadow(0, (vmcs), (init))
-#define vmx_setup_cr4_shadow(vmcs,init) vmx_setup_cr_shadow(4, (vmcs), (init))
-
static void *
vmx_vminit(struct vm *vm, pmap_t pmap)
{
uint16_t vpid[VM_MAXCPU];
- int i, error;
+ int i, error, datasel;
struct vmx *vmx;
- struct vmcs *vmcs;
uint32_t exc_bitmap;
uint16_t maxcpus;
uint32_t proc_ctls, proc2_ctls, pin_ctls;
@@ -974,6 +942,7 @@ vmx_vminit(struct vm *vm, pmap_t pmap)
}
maxcpus = vm_get_maxcpus(vm);
+ datasel = vmm_get_host_datasel();
for (i = 0; i < maxcpus; i++) {
/*
* Cache physical address lookups for various components which
@@ -984,31 +953,58 @@ vmx_vminit(struct vm *vm, pmap_t pmap)
vm_paddr_t apic_page_pa = vtophys(&vmx->apic_page[i]);
vm_paddr_t pir_desc_pa = vtophys(&vmx->pir_desc[i]);
- vmcs = &vmx->vmcs[i];
- vmcs->identifier = vmx_revision();
- vmcs->vmcs_pa = (uint64_t)vtophys(vmcs);
- error = vmclear(vmcs);
- if (error != 0) {
- panic("vmx_vminit: vmclear error %d on vcpu %d\n",
- error, i);
- }
+ vmx->vmcs_pa[i] = (uintptr_t)vtophys(&vmx->vmcs[i]);
+ vmcs_initialize(&vmx->vmcs[i], vmx->vmcs_pa[i]);
vmx_msr_guest_init(vmx, i);
- error = vmcs_init(vmcs);
- KASSERT(error == 0, ("vmcs_init error %d", error));
+ vmcs_load(vmx->vmcs_pa[i]);
- VMPTRLD(vmcs);
- error = 0;
+ vmcs_write(VMCS_HOST_IA32_PAT, vmm_get_host_pat());
+ vmcs_write(VMCS_HOST_IA32_EFER, vmm_get_host_efer());
+
+ /* Load the control registers */
+ vmcs_write(VMCS_HOST_CR0, vmm_get_host_cr0());
+ vmcs_write(VMCS_HOST_CR4, vmm_get_host_cr4() | CR4_VMXE);
+
+ /* Load the segment selectors */
+ vmcs_write(VMCS_HOST_CS_SELECTOR, vmm_get_host_codesel());
+
+ vmcs_write(VMCS_HOST_ES_SELECTOR, datasel);
+ vmcs_write(VMCS_HOST_SS_SELECTOR, datasel);
+ vmcs_write(VMCS_HOST_DS_SELECTOR, datasel);
+
+ vmcs_write(VMCS_HOST_FS_SELECTOR, vmm_get_host_fssel());
+ vmcs_write(VMCS_HOST_GS_SELECTOR, vmm_get_host_gssel());
+ vmcs_write(VMCS_HOST_TR_SELECTOR, vmm_get_host_tsssel());
+
+ /*
+ * Configure host sysenter MSRs to be restored on VM exit.
+ * The thread-specific MSR_INTC_SEP_ESP value is loaded in vmx_run.
+ */
+ vmcs_write(VMCS_HOST_IA32_SYSENTER_CS, KCS_SEL);
+ vmcs_write(VMCS_HOST_IA32_SYSENTER_EIP,
+ rdmsr(MSR_SYSENTER_EIP_MSR));
+
+ /* instruction pointer */
+ if (no_flush_rsb) {
+ vmcs_write(VMCS_HOST_RIP, (uint64_t)vmx_exit_guest);
+ } else {
+ vmcs_write(VMCS_HOST_RIP,
+ (uint64_t)vmx_exit_guest_flush_rsb);
+ }
- error += vmwrite(VMCS_EPTP, vmx->eptp);
- error += vmwrite(VMCS_PIN_BASED_CTLS, pin_ctls);
- error += vmwrite(VMCS_PRI_PROC_BASED_CTLS, proc_ctls);
- error += vmwrite(VMCS_SEC_PROC_BASED_CTLS, proc2_ctls);
- error += vmwrite(VMCS_EXIT_CTLS, exit_ctls);
- error += vmwrite(VMCS_ENTRY_CTLS, entry_ctls);
- error += vmwrite(VMCS_MSR_BITMAP, msr_bitmap_pa);
- error += vmwrite(VMCS_VPID, vpid[i]);
+ /* link pointer */
+ vmcs_write(VMCS_LINK_POINTER, ~0);
+
+ vmcs_write(VMCS_EPTP, vmx->eptp);
+ vmcs_write(VMCS_PIN_BASED_CTLS, pin_ctls);
+ vmcs_write(VMCS_PRI_PROC_BASED_CTLS, proc_ctls);
+ vmcs_write(VMCS_SEC_PROC_BASED_CTLS, proc2_ctls);
+ vmcs_write(VMCS_EXIT_CTLS, exit_ctls);
+ vmcs_write(VMCS_ENTRY_CTLS, entry_ctls);
+ vmcs_write(VMCS_MSR_BITMAP, msr_bitmap_pa);
+ vmcs_write(VMCS_VPID, vpid[i]);
if (guest_l1d_flush && !guest_l1d_flush_sw) {
vmcs_write(VMCS_ENTRY_MSR_LOAD, pmap_kextract(
@@ -1024,28 +1020,39 @@ vmx_vminit(struct vm *vm, pmap_t pmap)
exc_bitmap = 0xffffffff;
else
exc_bitmap = 1 << IDT_MC;
- error += vmwrite(VMCS_EXCEPTION_BITMAP, exc_bitmap);
+ vmcs_write(VMCS_EXCEPTION_BITMAP, exc_bitmap);
vmx->ctx[i].guest_dr6 = DBREG_DR6_RESERVED1;
- error += vmwrite(VMCS_GUEST_DR7, DBREG_DR7_RESERVED1);
+ vmcs_write(VMCS_GUEST_DR7, DBREG_DR7_RESERVED1);
if (vmx_cap_en(vmx, VMX_CAP_TPR_SHADOW)) {
- error += vmwrite(VMCS_VIRTUAL_APIC, apic_page_pa);
+ vmcs_write(VMCS_VIRTUAL_APIC, apic_page_pa);
}
if (vmx_cap_en(vmx, VMX_CAP_APICV)) {
- error += vmwrite(VMCS_APIC_ACCESS, APIC_ACCESS_ADDRESS);
- error += vmwrite(VMCS_EOI_EXIT0, 0);
- error += vmwrite(VMCS_EOI_EXIT1, 0);
- error += vmwrite(VMCS_EOI_EXIT2, 0);
- error += vmwrite(VMCS_EOI_EXIT3, 0);
+ vmcs_write(VMCS_APIC_ACCESS, APIC_ACCESS_ADDRESS);
+ vmcs_write(VMCS_EOI_EXIT0, 0);
+ vmcs_write(VMCS_EOI_EXIT1, 0);
+ vmcs_write(VMCS_EOI_EXIT2, 0);
+ vmcs_write(VMCS_EOI_EXIT3, 0);
}
if (vmx_cap_en(vmx, VMX_CAP_APICV_PIR)) {
- error += vmwrite(VMCS_PIR_VECTOR, pirvec);
- error += vmwrite(VMCS_PIR_DESC, pir_desc_pa);
+ vmcs_write(VMCS_PIR_VECTOR, pirvec);
+ vmcs_write(VMCS_PIR_DESC, pir_desc_pa);
}
- VMCLEAR(vmcs);
- KASSERT(error == 0, ("vmx_vminit: error customizing the vmcs"));
+
+ /*
+ * Set up the CR0/4 masks and configure the read shadow state
+ * to the power-on register value from the Intel Sys Arch.
+ * CR0 - 0x60000010
+ * CR4 - 0
+ */
+ vmcs_write(VMCS_CR0_MASK, cr0_ones_mask | cr0_zeros_mask);
+ vmcs_write(VMCS_CR0_SHADOW, 0x60000010);
+ vmcs_write(VMCS_CR4_MASK, cr4_ones_mask | cr4_zeros_mask);
+ vmcs_write(VMCS_CR4_SHADOW, 0);
+
+ vmcs_clear(vmx->vmcs_pa[i]);
vmx->cap[i].set = 0;
vmx->cap[i].proc_ctls = proc_ctls;
@@ -1056,19 +1063,6 @@ vmx_vminit(struct vm *vm, pmap_t pmap)
vmx->state[i].lastcpu = NOCPU;
vmx->state[i].vpid = vpid[i];
- /*
- * Set up the CR0/4 shadows, and init the read shadow
- * to the power-on register value from the Intel Sys Arch.
- * CR0 - 0x60000010
- * CR4 - 0
- */
- error = vmx_setup_cr0_shadow(vmcs, 0x60000010);
- if (error != 0)
- panic("vmx_setup_cr0_shadow %d", error);
-
- error = vmx_setup_cr4_shadow(vmcs, 0);
- if (error != 0)
- panic("vmx_setup_cr4_shadow %d", error);
vmx->ctx[i].pmap = pmap;
}
@@ -1125,6 +1119,33 @@ vmx_astpending_trace(struct vmx *vmx, int vcpu, uint64_t rip)
static VMM_STAT_INTEL(VCPU_INVVPID_SAVED, "Number of vpid invalidations saved");
static VMM_STAT_INTEL(VCPU_INVVPID_DONE, "Number of vpid invalidations done");
+#define INVVPID_TYPE_ADDRESS 0UL
+#define INVVPID_TYPE_SINGLE_CONTEXT 1UL
+#define INVVPID_TYPE_ALL_CONTEXTS 2UL
+
+struct invvpid_desc {
+ uint16_t vpid;
+ uint16_t _res1;
+ uint32_t _res2;
+ uint64_t linear_addr;
+};
+CTASSERT(sizeof(struct invvpid_desc) == 16);
+
+static __inline void
+invvpid(uint64_t type, struct invvpid_desc desc)
+{
+ int error;
+
+ __asm __volatile("invvpid %[desc], %[type];"
+ VMX_SET_ERROR_CODE_ASM
+ : [error] "=r" (error)
+ : [desc] "m" (desc), [type] "r" (type)
+ : "memory");
+
+ if (error)
+ panic("invvpid error %d", error);
+}
+
/*
* Invalidate guest mappings identified by its vpid from the TLB.
*/
@@ -1192,7 +1213,6 @@ vmx_set_pcpu_defaults(struct vmx *vmx, int vcpu, pmap_t pmap)
{
struct vmxstate *vmxstate;
-#ifndef __FreeBSD__
/*
* Regardless of whether the VM appears to have migrated between CPUs,
* save the host sysenter stack pointer. As it points to the kernel
@@ -1205,8 +1225,7 @@ vmx_set_pcpu_defaults(struct vmx *vmx, int vcpu, pmap_t pmap)
* Perform any needed TSC_OFFSET adjustment based on TSC_MSR writes or
* migration between host CPUs with differing TSC values.
*/
- VERIFY0(vmx_apply_tsc_adjust(vmx, vcpu));
-#endif
+ vmx_apply_tsc_adjust(vmx, vcpu);
vmxstate = &vmx->state[vcpu];
if (vmxstate->lastcpu == curcpu)
@@ -1216,10 +1235,8 @@ vmx_set_pcpu_defaults(struct vmx *vmx, int vcpu, pmap_t pmap)
vmm_stat_incr(vmx->vm, vcpu, VCPU_MIGRATIONS, 1);
-#ifndef __FreeBSD__
/* Load the per-CPU IDT address */
vmcs_write(VMCS_HOST_IDTR_BASE, vmm_get_host_idtrbase());
-#endif
vmcs_write(VMCS_HOST_TR_BASE, vmm_get_host_trbase());
vmcs_write(VMCS_HOST_GDTR_BASE, vmm_get_host_gdtrbase());
vmcs_write(VMCS_HOST_GS_BASE, vmm_get_host_gsbase());
@@ -1275,23 +1292,6 @@ vmx_clear_nmi_window_exiting(struct vmx *vmx, int vcpu)
VCPU_CTR0(vmx->vm, vcpu, "Disabling NMI window exiting");
}
-#ifdef __FreeBSD__
-int
-vmx_set_tsc_offset(struct vmx *vmx, int vcpu, uint64_t offset)
-{
- int error;
-
- if ((vmx->cap[vcpu].proc_ctls & PROCBASED_TSC_OFFSET) == 0) {
- vmx->cap[vcpu].proc_ctls |= PROCBASED_TSC_OFFSET;
- vmcs_write(VMCS_PRI_PROC_BASED_CTLS, vmx->cap[vcpu].proc_ctls);
- VCPU_CTR0(vmx->vm, vcpu, "Enabling TSC offsetting");
- }
-
- error = vmwrite(VMCS_TSC_OFFSET, offset);
-
- return (error);
-}
-#else /* __FreeBSD__ */
/*
* Set the TSC adjustment, taking into account the offsets measured between
* host physical CPUs. This is required even if the guest has not set a TSC
@@ -1299,24 +1299,20 @@ vmx_set_tsc_offset(struct vmx *vmx, int vcpu, uint64_t offset)
* migrated onto. Without this mitigation, un-synched host TSCs will convey
* the appearance of TSC time-travel to the guest as its vCPUs migrate.
*/
-static int
+static void
vmx_apply_tsc_adjust(struct vmx *vmx, int vcpu)
{
extern hrtime_t tsc_gethrtime_tick_delta(void);
const uint64_t target_offset = (vcpu_tsc_offset(vmx->vm, vcpu) +
(uint64_t)tsc_gethrtime_tick_delta());
- int error = 0;
ASSERT(vmx->cap[vcpu].proc_ctls & PROCBASED_TSC_OFFSET);
if (vmx->tsc_offset_active[vcpu] != target_offset) {
- error = vmwrite(VMCS_TSC_OFFSET, target_offset);
+ vmcs_write(VMCS_TSC_OFFSET, target_offset);
vmx->tsc_offset_active[vcpu] = target_offset;
}
-
- return (error);
}
-#endif /* __FreeBSD__ */
#define NMI_BLOCKING (VMCS_INTERRUPTIBILITY_NMI_BLOCKING | \
VMCS_INTERRUPTIBILITY_MOVSS_BLOCKING)
@@ -2226,9 +2222,7 @@ emulate_wrmsr(struct vmx *vmx, int vcpuid, u_int num, uint64_t val, bool *retu)
static int
emulate_rdmsr(struct vmx *vmx, int vcpuid, u_int num, bool *retu)
{
- struct vmxctx *vmxctx;
uint64_t result;
- uint32_t eax, edx;
int error;
if (lapic_msr(num))
@@ -2237,14 +2231,8 @@ emulate_rdmsr(struct vmx *vmx, int vcpuid, u_int num, bool *retu)
error = vmx_rdmsr(vmx, vcpuid, num, &result, retu);
if (error == 0) {
- eax = result;
- vmxctx = &vmx->ctx[vcpuid];
- error = vmxctx_setreg(vmxctx, VM_REG_GUEST_RAX, eax);
- KASSERT(error == 0, ("vmxctx_setreg(rax) error %d", error));
-
- edx = result >> 32;
- error = vmxctx_setreg(vmxctx, VM_REG_GUEST_RDX, edx);
- KASSERT(error == 0, ("vmxctx_setreg(rdx) error %d", error));
+ vmx->ctx[vcpuid].guest_rax = (uint32_t)result;
+ vmx->ctx[vcpuid].guest_rdx = result >> 32;
}
return (error);
@@ -2582,9 +2570,7 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit)
}
if (intr_vec == IDT_PF) {
- error = vmxctx_setreg(vmxctx, VM_REG_GUEST_CR2, qual);
- KASSERT(error == 0, ("%s: vmxctx_setreg(cr2) error %d",
- __func__, error));
+ vmxctx->guest_cr2 = qual;
}
/*
@@ -2881,7 +2867,7 @@ vmx_run(void *arg, int vcpu, register_t rip, pmap_t pmap,
struct vmx *vmx;
struct vm *vm;
struct vmxctx *vmxctx;
- struct vmcs *vmcs;
+ uintptr_t vmcs_pa;
struct vm_exit *vmexit;
struct vlapic *vlapic;
uint32_t exit_reason;
@@ -2892,7 +2878,7 @@ vmx_run(void *arg, int vcpu, register_t rip, pmap_t pmap,
vmx = arg;
vm = vmx->vm;
- vmcs = &vmx->vmcs[vcpu];
+ vmcs_pa = vmx->vmcs_pa[vcpu];
vmxctx = &vmx->ctx[vcpu];
vlapic = vm_lapic(vm, vcpu);
vmexit = vm_exitinfo(vm, vcpu);
@@ -2903,7 +2889,7 @@ vmx_run(void *arg, int vcpu, register_t rip, pmap_t pmap,
vmx_msr_guest_enter(vmx, vcpu);
- VMPTRLD(vmcs);
+ vmcs_load(vmcs_pa);
#ifndef __FreeBSD__
VERIFY(vmx->vmcs_state[vcpu] == VS_NONE && curthread->t_preempt != 0);
@@ -3117,7 +3103,7 @@ vmx_run(void *arg, int vcpu, register_t rip, pmap_t pmap,
VCPU_CTR1(vm, vcpu, "returning from vmx_run: exitcode %d",
vmexit->exitcode);
- VMCLEAR(vmcs);
+ vmcs_clear(vmcs_pa);
vmx_msr_guest_exit(vmx, vcpu);
#ifndef __FreeBSD__
@@ -3150,7 +3136,6 @@ vmx_vmcleanup(void *arg)
static register_t *
vmxctx_regptr(struct vmxctx *vmxctx, int reg)
{
-
switch (reg) {
case VM_REG_GUEST_RAX:
return (&vmxctx->guest_rax);
@@ -3201,157 +3186,129 @@ vmxctx_regptr(struct vmxctx *vmxctx, int reg)
}
static int
-vmxctx_getreg(struct vmxctx *vmxctx, int reg, uint64_t *retval)
+vmx_getreg(void *arg, int vcpu, int reg, uint64_t *retval)
{
+ int running, hostcpu, err;
+ struct vmx *vmx = arg;
register_t *regp;
- if ((regp = vmxctx_regptr(vmxctx, reg)) != NULL) {
- *retval = *regp;
- return (0);
- } else
- return (EINVAL);
-}
-
-static int
-vmxctx_setreg(struct vmxctx *vmxctx, int reg, uint64_t val)
-{
- register_t *regp;
+ running = vcpu_is_running(vmx->vm, vcpu, &hostcpu);
+ if (running && hostcpu != curcpu)
+ panic("vmx_getreg: %s%d is running", vm_name(vmx->vm), vcpu);
- if ((regp = vmxctx_regptr(vmxctx, reg)) != NULL) {
- *regp = val;
+ /* VMCS access not required for ctx reads */
+ if ((regp = vmxctx_regptr(&vmx->ctx[vcpu], reg)) != NULL) {
+ *retval = *regp;
return (0);
- } else
- return (EINVAL);
-}
-
-static int
-vmx_get_intr_shadow(struct vmx *vmx, int vcpu, int running, uint64_t *retval)
-{
- uint64_t gi;
- int error;
-
- error = vmcs_getreg(&vmx->vmcs[vcpu], running,
- VMCS_IDENT(VMCS_GUEST_INTERRUPTIBILITY), &gi);
- *retval = (gi & HWINTR_BLOCKING) ? 1 : 0;
- return (error);
-}
-
-static int
-vmx_modify_intr_shadow(struct vmx *vmx, int vcpu, int running, uint64_t val)
-{
- struct vmcs *vmcs;
- uint64_t gi;
- int error, ident;
-
- /*
- * Forcing the vcpu into an interrupt shadow is not supported.
- */
- if (val) {
- error = EINVAL;
- goto done;
}
- vmcs = &vmx->vmcs[vcpu];
- ident = VMCS_IDENT(VMCS_GUEST_INTERRUPTIBILITY);
- error = vmcs_getreg(vmcs, running, ident, &gi);
- if (error == 0) {
- gi &= ~HWINTR_BLOCKING;
- error = vmcs_setreg(vmcs, running, ident, gi);
+ if (!running) {
+ vmcs_load(vmx->vmcs_pa[vcpu]);
}
-done:
- VCPU_CTR2(vmx->vm, vcpu, "Setting intr_shadow to %#lx %s", val,
- error ? "failed" : "succeeded");
- return (error);
-}
-
-static int
-vmx_shadow_reg(int reg)
-{
- int shreg;
- shreg = -1;
+ err = EINVAL;
+ if (reg == VM_REG_GUEST_INTR_SHADOW) {
+ uint64_t gi = vmcs_read(VMCS_GUEST_INTERRUPTIBILITY);
+ *retval = (gi & HWINTR_BLOCKING) ? 1 : 0;
+ err = 0;
+ } else {
+ uint32_t encoding;
- switch (reg) {
- case VM_REG_GUEST_CR0:
- shreg = VMCS_CR0_SHADOW;
- break;
- case VM_REG_GUEST_CR4:
- shreg = VMCS_CR4_SHADOW;
- break;
- default:
- break;
+ encoding = vmcs_field_encoding(reg);
+ if (encoding != VMCS_INVALID_ENCODING) {
+ *retval = vmcs_read(encoding);
+ err = 0;
+ }
}
- return (shreg);
-}
-
-static int
-vmx_getreg(void *arg, int vcpu, int reg, uint64_t *retval)
-{
- int running, hostcpu;
- struct vmx *vmx = arg;
-
- running = vcpu_is_running(vmx->vm, vcpu, &hostcpu);
- if (running && hostcpu != curcpu)
- panic("vmx_getreg: %s%d is running", vm_name(vmx->vm), vcpu);
-
- if (reg == VM_REG_GUEST_INTR_SHADOW)
- return (vmx_get_intr_shadow(vmx, vcpu, running, retval));
-
- if (vmxctx_getreg(&vmx->ctx[vcpu], reg, retval) == 0)
- return (0);
+ if (!running) {
+ vmcs_clear(vmx->vmcs_pa[vcpu]);
+ }
- return (vmcs_getreg(&vmx->vmcs[vcpu], running, reg, retval));
+ return (err);
}
static int
vmx_setreg(void *arg, int vcpu, int reg, uint64_t val)
{
- int error, hostcpu, running, shadow;
- uint64_t ctls;
- pmap_t pmap;
+ int running, hostcpu, error;
struct vmx *vmx = arg;
+ register_t *regp;
running = vcpu_is_running(vmx->vm, vcpu, &hostcpu);
if (running && hostcpu != curcpu)
panic("vmx_setreg: %s%d is running", vm_name(vmx->vm), vcpu);
- if (reg == VM_REG_GUEST_INTR_SHADOW)
- return (vmx_modify_intr_shadow(vmx, vcpu, running, val));
-
- if (vmxctx_setreg(&vmx->ctx[vcpu], reg, val) == 0)
+ /* VMCS access not required for ctx writes */
+ if ((regp = vmxctx_regptr(&vmx->ctx[vcpu], reg)) != NULL) {
+ *regp = val;
return (0);
+ }
- error = vmcs_setreg(&vmx->vmcs[vcpu], running, reg, val);
-
- if (error == 0) {
- /*
- * If the "load EFER" VM-entry control is 1 then the
- * value of EFER.LMA must be identical to "IA-32e mode guest"
- * bit in the VM-entry control.
- */
- if ((entry_ctls & VM_ENTRY_LOAD_EFER) != 0 &&
- (reg == VM_REG_GUEST_EFER)) {
- vmcs_getreg(&vmx->vmcs[vcpu], running,
- VMCS_IDENT(VMCS_ENTRY_CTLS), &ctls);
- if (val & EFER_LMA)
- ctls |= VM_ENTRY_GUEST_LMA;
- else
- ctls &= ~VM_ENTRY_GUEST_LMA;
- vmcs_setreg(&vmx->vmcs[vcpu], running,
- VMCS_IDENT(VMCS_ENTRY_CTLS), ctls);
- }
+ if (!running) {
+ vmcs_load(vmx->vmcs_pa[vcpu]);
+ }
- shadow = vmx_shadow_reg(reg);
- if (shadow > 0) {
+ if (reg == VM_REG_GUEST_INTR_SHADOW) {
+ if (val != 0) {
/*
- * Store the unmodified value in the shadow
+ * Forcing the vcpu into an interrupt shadow is not
+ * presently supported.
*/
- error = vmcs_setreg(&vmx->vmcs[vcpu], running,
- VMCS_IDENT(shadow), val);
+ error = EINVAL;
+ } else {
+ uint64_t gi;
+
+ gi = vmcs_read(VMCS_GUEST_INTERRUPTIBILITY);
+ gi &= ~HWINTR_BLOCKING;
+ vmcs_write(VMCS_GUEST_INTERRUPTIBILITY, gi);
+ error = 0;
}
+ } else {
+ uint32_t encoding;
- if (reg == VM_REG_GUEST_CR3) {
+ error = 0;
+ encoding = vmcs_field_encoding(reg);
+ switch (encoding) {
+ case VMCS_GUEST_IA32_EFER:
+ /*
+ * If the "load EFER" VM-entry control is 1 then the
+ * value of EFER.LMA must be identical to "IA-32e mode
+ * guest" bit in the VM-entry control.
+ */
+ if ((entry_ctls & VM_ENTRY_LOAD_EFER) != 0) {
+ uint64_t ctls;
+
+ ctls = vmcs_read(VMCS_ENTRY_CTLS);
+ if (val & EFER_LMA) {
+ ctls |= VM_ENTRY_GUEST_LMA;
+ } else {
+ ctls &= ~VM_ENTRY_GUEST_LMA;
+ }
+ vmcs_write(VMCS_ENTRY_CTLS, ctls);
+ }
+ vmcs_write(encoding, val);
+ break;
+ case VMCS_GUEST_CR0:
+ /*
+ * The guest is not allowed to modify certain bits in
+ * %cr0 and %cr4. To maintain the illusion of full
+ * control, they have shadow versions which contain the
+ * guest-perceived (via reads from the register) values
+ * as opposed to the guest-effective values.
+ *
+ * This is detailed in the SDM: Vol. 3 Ch. 24.6.6.
+ */
+ vmcs_write(VMCS_CR0_SHADOW, val);
+ vmcs_write(encoding, vmx_fix_cr0(val));
+ break;
+ case VMCS_GUEST_CR4:
+ /* See above for detail on %cr4 shadowing */
+ vmcs_write(VMCS_CR4_SHADOW, val);
+ vmcs_write(encoding, vmx_fix_cr4(val));
+ break;
+ case VMCS_GUEST_CR3:
+ vmcs_write(encoding, val);
/*
* Invalidate the guest vcpu's TLB mappings to emulate
* the behavior of updating %cr3.
@@ -3359,38 +3316,80 @@ vmx_setreg(void *arg, int vcpu, int reg, uint64_t val)
* XXX the processor retains global mappings when %cr3
* is updated but vmx_invvpid() does not.
*/
- pmap = vmx->ctx[vcpu].pmap;
- vmx_invvpid(vmx, vcpu, pmap, running);
+ vmx_invvpid(vmx, vcpu, vmx->ctx[vcpu].pmap, running);
+ break;
+ case VMCS_INVALID_ENCODING:
+ error = EINVAL;
+ break;
+ default:
+ vmcs_write(encoding, val);
+ break;
}
}
+ if (!running) {
+ vmcs_clear(vmx->vmcs_pa[vcpu]);
+ }
+
return (error);
}
static int
-vmx_getdesc(void *arg, int vcpu, int reg, struct seg_desc *desc)
+vmx_getdesc(void *arg, int vcpu, int seg, struct seg_desc *desc)
{
int hostcpu, running;
struct vmx *vmx = arg;
+ uint32_t base, limit, access;
running = vcpu_is_running(vmx->vm, vcpu, &hostcpu);
if (running && hostcpu != curcpu)
panic("vmx_getdesc: %s%d is running", vm_name(vmx->vm), vcpu);
- return (vmcs_getdesc(&vmx->vmcs[vcpu], running, reg, desc));
+ if (!running) {
+ vmcs_load(vmx->vmcs_pa[vcpu]);
+ }
+
+ vmcs_seg_desc_encoding(seg, &base, &limit, &access);
+ desc->base = vmcs_read(base);
+ desc->limit = vmcs_read(limit);
+ if (access != VMCS_INVALID_ENCODING) {
+ desc->access = vmcs_read(access);
+ } else {
+ desc->access = 0;
+ }
+
+ if (!running) {
+ vmcs_clear(vmx->vmcs_pa[vcpu]);
+ }
+ return (0);
}
static int
-vmx_setdesc(void *arg, int vcpu, int reg, struct seg_desc *desc)
+vmx_setdesc(void *arg, int vcpu, int seg, struct seg_desc *desc)
{
int hostcpu, running;
struct vmx *vmx = arg;
+ uint32_t base, limit, access;
running = vcpu_is_running(vmx->vm, vcpu, &hostcpu);
if (running && hostcpu != curcpu)
panic("vmx_setdesc: %s%d is running", vm_name(vmx->vm), vcpu);
- return (vmcs_setdesc(&vmx->vmcs[vcpu], running, reg, desc));
+ if (!running) {
+ vmcs_load(vmx->vmcs_pa[vcpu]);
+ }
+
+ vmcs_seg_desc_encoding(seg, &base, &limit, &access);
+ vmcs_write(base, desc->base);
+ vmcs_write(limit, desc->limit);
+ if (access != VMCS_INVALID_ENCODING) {
+ vmcs_write(access, desc->access);
+ }
+
+ if (!running) {
+ vmcs_clear(vmx->vmcs_pa[vcpu]);
+ }
+ return (0);
}
static int
@@ -3438,21 +3437,17 @@ static int
vmx_setcap(void *arg, int vcpu, int type, int val)
{
struct vmx *vmx = arg;
- struct vmcs *vmcs = &vmx->vmcs[vcpu];
- uint32_t baseval;
+ uint32_t baseval, reg, flag;
uint32_t *pptr;
int error;
- int flag;
- int reg;
- int retval;
- retval = ENOENT;
+ error = ENOENT;
pptr = NULL;
switch (type) {
case VM_CAP_HALT_EXIT:
if (cap_halt_exit) {
- retval = 0;
+ error = 0;
pptr = &vmx->cap[vcpu].proc_ctls;
baseval = *pptr;
flag = PROCBASED_HLT_EXITING;
@@ -3461,7 +3456,7 @@ vmx_setcap(void *arg, int vcpu, int type, int val)
break;
case VM_CAP_MTRAP_EXIT:
if (cap_monitor_trap) {
- retval = 0;
+ error = 0;
pptr = &vmx->cap[vcpu].proc_ctls;
baseval = *pptr;
flag = PROCBASED_MTF;
@@ -3470,7 +3465,7 @@ vmx_setcap(void *arg, int vcpu, int type, int val)
break;
case VM_CAP_PAUSE_EXIT:
if (cap_pause_exit) {
- retval = 0;
+ error = 0;
pptr = &vmx->cap[vcpu].proc_ctls;
baseval = *pptr;
flag = PROCBASED_PAUSE_EXITING;
@@ -3479,7 +3474,7 @@ vmx_setcap(void *arg, int vcpu, int type, int val)
break;
case VM_CAP_ENABLE_INVPCID:
if (cap_invpcid) {
- retval = 0;
+ error = 0;
pptr = &vmx->cap[vcpu].proc_ctls2;
baseval = *pptr;
flag = PROCBASED2_ENABLE_INVPCID;
@@ -3487,7 +3482,7 @@ vmx_setcap(void *arg, int vcpu, int type, int val)
}
break;
case VM_CAP_BPT_EXIT:
- retval = 0;
+ error = 0;
/* Don't change the bitmap if we are tracing all exceptions. */
if (vmx->cap[vcpu].exc_bitmap != 0xffffffff) {
@@ -3501,8 +3496,9 @@ vmx_setcap(void *arg, int vcpu, int type, int val)
break;
}
- if (retval)
- return (retval);
+ if (error != 0) {
+ return (error);
+ }
if (pptr != NULL) {
if (val) {
@@ -3510,12 +3506,9 @@ vmx_setcap(void *arg, int vcpu, int type, int val)
} else {
baseval &= ~flag;
}
- VMPTRLD(vmcs);
- error = vmwrite(reg, baseval);
- VMCLEAR(vmcs);
-
- if (error)
- return (error);
+ vmcs_load(vmx->vmcs_pa[vcpu]);
+ vmcs_write(reg, baseval);
+ vmcs_clear(vmx->vmcs_pa[vcpu]);
/*
* Update optional stored flags, and record
@@ -3717,13 +3710,11 @@ static void
vmx_enable_x2apic_mode_ts(struct vlapic *vlapic)
{
struct vmx *vmx;
- struct vmcs *vmcs;
uint32_t proc_ctls;
int vcpuid;
vcpuid = vlapic->vcpuid;
vmx = ((struct vlapic_vtx *)vlapic)->vmx;
- vmcs = &vmx->vmcs[vcpuid];
proc_ctls = vmx->cap[vcpuid].proc_ctls;
proc_ctls &= ~PROCBASED_USE_TPR_SHADOW;
@@ -3731,22 +3722,20 @@ vmx_enable_x2apic_mode_ts(struct vlapic *vlapic)
proc_ctls |= PROCBASED_CR8_STORE_EXITING;
vmx->cap[vcpuid].proc_ctls = proc_ctls;
- VMPTRLD(vmcs);
+ vmcs_load(vmx->vmcs_pa[vcpuid]);
vmcs_write(VMCS_PRI_PROC_BASED_CTLS, proc_ctls);
- VMCLEAR(vmcs);
+ vmcs_clear(vmx->vmcs_pa[vcpuid]);
}
static void
vmx_enable_x2apic_mode_vid(struct vlapic *vlapic)
{
struct vmx *vmx;
- struct vmcs *vmcs;
uint32_t proc_ctls2;
int vcpuid, error;
vcpuid = vlapic->vcpuid;
vmx = ((struct vlapic_vtx *)vlapic)->vmx;
- vmcs = &vmx->vmcs[vcpuid];
proc_ctls2 = vmx->cap[vcpuid].proc_ctls2;
KASSERT((proc_ctls2 & PROCBASED2_VIRTUALIZE_APIC_ACCESSES) != 0,
@@ -3756,9 +3745,9 @@ vmx_enable_x2apic_mode_vid(struct vlapic *vlapic)
proc_ctls2 |= PROCBASED2_VIRTUALIZE_X2APIC_MODE;
vmx->cap[vcpuid].proc_ctls2 = proc_ctls2;
- VMPTRLD(vmcs);
+ vmcs_load(vmx->vmcs_pa[vcpuid]);
vmcs_write(VMCS_SEC_PROC_BASED_CTLS, proc_ctls2);
- VMCLEAR(vmcs);
+ vmcs_clear(vmx->vmcs_pa[vcpuid]);
if (vlapic->vcpuid == 0) {
/*
@@ -3934,10 +3923,9 @@ static void
vmx_savectx(void *arg, int vcpu)
{
struct vmx *vmx = arg;
- struct vmcs *vmcs = &vmx->vmcs[vcpu];
if ((vmx->vmcs_state[vcpu] & VS_LOADED) != 0) {
- VERIFY3U(vmclear(vmcs), ==, 0);
+ vmcs_clear(vmx->vmcs_pa[vcpu]);
vmx_msr_guest_exit(vmx, vcpu);
/*
* Having VMCLEARed the VMCS, it can no longer be re-entered
@@ -3953,13 +3941,12 @@ static void
vmx_restorectx(void *arg, int vcpu)
{
struct vmx *vmx = arg;
- struct vmcs *vmcs = &vmx->vmcs[vcpu];
ASSERT0(vmx->vmcs_state[vcpu] & VS_LAUNCHED);
if ((vmx->vmcs_state[vcpu] & VS_LOADED) != 0) {
vmx_msr_guest_enter(vmx, vcpu);
- VERIFY3U(vmptrld(vmcs), ==, 0);
+ vmcs_load(vmx->vmcs_pa[vcpu]);
}
}
#endif /* __FreeBSD__ */
diff --git a/usr/src/uts/i86pc/io/vmm/intel/vmx.h b/usr/src/uts/i86pc/io/vmm/intel/vmx.h
index 0fd723f9c9..a5647e0b87 100644
--- a/usr/src/uts/i86pc/io/vmm/intel/vmx.h
+++ b/usr/src/uts/i86pc/io/vmm/intel/vmx.h
@@ -156,6 +156,7 @@ struct vmx {
uint64_t host_msrs[VM_MAXCPU][GUEST_MSR_NUM];
uint64_t tsc_offset_active[VM_MAXCPU];
vmcs_state_t vmcs_state[VM_MAXCPU];
+ uintptr_t vmcs_pa[VM_MAXCPU];
#endif
struct vmxctx ctx[VM_MAXCPU];
struct vmxcap cap[VM_MAXCPU];
@@ -175,17 +176,38 @@ vmx_cap_en(const struct vmx *vmx, enum vmx_caps cap)
return ((vmx->vmx_caps & cap) == cap);
}
+
+/*
+ * Section 5.2 "Conventions" from Intel Architecture Manual 2B.
+ *
+ * error
+ * VMsucceed 0
+ * VMFailInvalid 1
+ * VMFailValid 2 see also VMCS VM-Instruction Error Field
+ */
+#define VM_SUCCESS 0
+#define VM_FAIL_INVALID 1
+#define VM_FAIL_VALID 2
+#define VMX_SET_ERROR_CODE_ASM \
+ " jnc 1f;" \
+ " mov $1, %[error];" /* CF: error = 1 */ \
+ " jmp 3f;" \
+ "1: jnz 2f;" \
+ " mov $2, %[error];" /* ZF: error = 2 */ \
+ " jmp 3f;" \
+ "2: mov $0, %[error];" \
+ "3:"
+
+
#define VMX_GUEST_VMEXIT 0
#define VMX_VMRESUME_ERROR 1
#define VMX_VMLAUNCH_ERROR 2
#define VMX_INVEPT_ERROR 3
#define VMX_VMWRITE_ERROR 4
+
int vmx_enter_guest(struct vmxctx *ctx, struct vmx *vmx, int launched);
void vmx_call_isr(uintptr_t entry);
-u_long vmx_fix_cr0(u_long cr0);
-u_long vmx_fix_cr4(u_long cr4);
-
int vmx_set_tsc_offset(struct vmx *vmx, int vcpu, uint64_t offset);
extern char vmx_exit_guest[];
diff --git a/usr/src/uts/i86pc/io/vmm/intel/vmx_cpufunc.h b/usr/src/uts/i86pc/io/vmm/intel/vmx_cpufunc.h
deleted file mode 100644
index f0c5ba7691..0000000000
--- a/usr/src/uts/i86pc/io/vmm/intel/vmx_cpufunc.h
+++ /dev/null
@@ -1,244 +0,0 @@
-/*-
- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
- *
- * Copyright (c) 2011 NetApp, Inc.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * $FreeBSD$
- */
-/*
- * This file and its contents are supplied under the terms of the
- * Common Development and Distribution License ("CDDL"), version 1.0.
- * You may only use this file in accordance with the terms of version
- * 1.0 of the CDDL.
- *
- * A full copy of the text of the CDDL should have accompanied this
- * source. A copy of the CDDL is also available via the Internet at
- * http://www.illumos.org/license/CDDL.
- *
- * Copyright 2014 Pluribus Networks Inc.
- * Copyright 2017 Joyent, Inc.
- */
-
-#ifndef _VMX_CPUFUNC_H_
-#define _VMX_CPUFUNC_H_
-
-struct vmcs;
-
-/*
- * Section 5.2 "Conventions" from Intel Architecture Manual 2B.
- *
- * error
- * VMsucceed 0
- * VMFailInvalid 1
- * VMFailValid 2 see also VMCS VM-Instruction Error Field
- */
-#define VM_SUCCESS 0
-#define VM_FAIL_INVALID 1
-#define VM_FAIL_VALID 2
-#define VMX_SET_ERROR_CODE \
- " jnc 1f;" \
- " mov $1, %[error];" /* CF: error = 1 */ \
- " jmp 3f;" \
- "1: jnz 2f;" \
- " mov $2, %[error];" /* ZF: error = 2 */ \
- " jmp 3f;" \
- "2: mov $0, %[error];" \
- "3:"
-
-/* returns 0 on success and non-zero on failure */
-static __inline int
-vmxon(char *region)
-{
- int error;
- uint64_t addr;
-
-#ifdef __FreeBSD__
- addr = vtophys(region);
-#else
- /* This is pre-translated in illumos */
- addr = (uint64_t)region;
-#endif
- __asm __volatile("vmxon %[addr];"
- VMX_SET_ERROR_CODE
- : [error] "=r" (error)
- : [addr] "m" (*(uint64_t *)&addr)
- : "memory");
-
- return (error);
-}
-
-#ifdef __FreeBSD__
-/* returns 0 on success and non-zero on failure */
-static __inline int
-vmclear(struct vmcs *vmcs)
-{
- int error;
- uint64_t addr;
-
- addr = vtophys(vmcs);
- __asm __volatile("vmclear %[addr];"
- VMX_SET_ERROR_CODE
- : [error] "=r" (error)
- : [addr] "m" (*(uint64_t *)&addr)
- : "memory");
- return (error);
-}
-#endif /* __FreeBSD__ */
-
-static __inline void
-vmxoff(void)
-{
-
- __asm __volatile("vmxoff");
-}
-
-static __inline void
-vmptrst(uint64_t *addr)
-{
-
- __asm __volatile("vmptrst %[addr]" :: [addr]"m" (*addr) : "memory");
-}
-
-#ifdef __FreeBSD__
-static __inline int
-vmptrld(struct vmcs *vmcs)
-{
- int error;
- uint64_t addr;
-
- addr = vtophys(vmcs);
- __asm __volatile("vmptrld %[addr];"
- VMX_SET_ERROR_CODE
- : [error] "=r" (error)
- : [addr] "m" (*(uint64_t *)&addr)
- : "memory");
- return (error);
-}
-#endif /* __FreeBSD__ */
-
-static __inline int
-vmwrite(uint64_t reg, uint64_t val)
-{
- int error;
-
- __asm __volatile("vmwrite %[val], %[reg];"
- VMX_SET_ERROR_CODE
- : [error] "=r" (error)
- : [val] "r" (val), [reg] "r" (reg)
- : "memory");
-
- return (error);
-}
-
-static __inline int
-vmread(uint64_t r, uint64_t *addr)
-{
- int error;
-
- __asm __volatile("vmread %[r], %[addr];"
- VMX_SET_ERROR_CODE
- : [error] "=r" (error)
- : [r] "r" (r), [addr] "m" (*addr)
- : "memory");
-
- return (error);
-}
-
-#ifdef __FreeBSD__
-static __inline void
-VMCLEAR(struct vmcs *vmcs)
-{
- int err;
-
- err = vmclear(vmcs);
- if (err != 0)
- panic("%s: vmclear(%p) error %d", __func__, vmcs, err);
-
- critical_exit();
-}
-
-static __inline void
-VMPTRLD(struct vmcs *vmcs)
-{
- int err;
-
- critical_enter();
-
- err = vmptrld(vmcs);
- if (err != 0)
- panic("%s: vmptrld(%p) error %d", __func__, vmcs, err);
-}
-#endif /* __FreeBSD__ */
-
-#define INVVPID_TYPE_ADDRESS 0UL
-#define INVVPID_TYPE_SINGLE_CONTEXT 1UL
-#define INVVPID_TYPE_ALL_CONTEXTS 2UL
-
-struct invvpid_desc {
- uint16_t vpid;
- uint16_t _res1;
- uint32_t _res2;
- uint64_t linear_addr;
-};
-CTASSERT(sizeof(struct invvpid_desc) == 16);
-
-static __inline void
-invvpid(uint64_t type, struct invvpid_desc desc)
-{
- int error;
-
- __asm __volatile("invvpid %[desc], %[type];"
- VMX_SET_ERROR_CODE
- : [error] "=r" (error)
- : [desc] "m" (desc), [type] "r" (type)
- : "memory");
-
- if (error)
- panic("invvpid error %d", error);
-}
-
-#define INVEPT_TYPE_SINGLE_CONTEXT 1UL
-#define INVEPT_TYPE_ALL_CONTEXTS 2UL
-struct invept_desc {
- uint64_t eptp;
- uint64_t _res;
-};
-CTASSERT(sizeof(struct invept_desc) == 16);
-
-static __inline void
-invept(uint64_t type, struct invept_desc desc)
-{
- int error;
-
- __asm __volatile("invept %[desc], %[type];"
- VMX_SET_ERROR_CODE
- : [error] "=r" (error)
- : [desc] "m" (desc), [type] "r" (type)
- : "memory");
-
- if (error)
- panic("invept error %d", error);
-}
-#endif
diff --git a/usr/src/uts/i86pc/io/vmm/intel/vmx_msr.c b/usr/src/uts/i86pc/io/vmm/intel/vmx_msr.c
index 6c37c9c234..cfdf2bfe05 100644
--- a/usr/src/uts/i86pc/io/vmm/intel/vmx_msr.c
+++ b/usr/src/uts/i86pc/io/vmm/intel/vmx_msr.c
@@ -62,13 +62,6 @@ vmx_ctl_allows_zero_setting(uint64_t msr_val, int bitpos)
return ((msr_val & (1UL << bitpos)) == 0);
}
-uint32_t
-vmx_revision(void)
-{
-
- return (rdmsr(MSR_VMX_BASIC) & 0xffffffff);
-}
-
/*
* Generate a bitmask to be used for the VMCS execution control fields.
*
diff --git a/usr/src/uts/i86pc/io/vmm/intel/vmx_msr.h b/usr/src/uts/i86pc/io/vmm/intel/vmx_msr.h
index ac2adb0dd1..848cdea26b 100644
--- a/usr/src/uts/i86pc/io/vmm/intel/vmx_msr.h
+++ b/usr/src/uts/i86pc/io/vmm/intel/vmx_msr.h
@@ -40,8 +40,6 @@ void vmx_msr_guest_exit(struct vmx *vmx, int vcpuid);
int vmx_rdmsr(struct vmx *, int vcpuid, u_int num, uint64_t *val, bool *retu);
int vmx_wrmsr(struct vmx *, int vcpuid, u_int num, uint64_t val, bool *retu);
-uint32_t vmx_revision(void);
-
int vmx_set_ctlreg(int ctl_reg, int true_ctl_reg, uint32_t ones_mask,
uint32_t zeros_mask, uint32_t *retval);