summaryrefslogtreecommitdiff
path: root/usr/src/uts/i86pc
diff options
context:
space:
mode:
authorPatrick Mooney <pmooney@pfmooney.com>2018-07-18 20:14:58 +0000
committerPatrick Mooney <pmooney@pfmooney.com>2018-08-21 17:54:16 +0000
commitbefffd577ca6c3a090d7d3c72d267a383c3a3c45 (patch)
tree3d1b28dc8970770089d4a47d74f4d028bb59d110 /usr/src/uts/i86pc
parentdce88dec0a5dd9849b881d131e2c0f78a5ba4c97 (diff)
downloadillumos-joyent-befffd577ca6c3a090d7d3c72d267a383c3a3c45.tar.gz
OS-7080 bhyve and KVM should coexist in peace
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com> Reviewed by: Robert Mustacchi <rm@joyent.com> Reviewed by: John Levon <john.levon@joyent.com> Approved by: John Levon <john.levon@joyent.com>
Diffstat (limited to 'usr/src/uts/i86pc')
-rw-r--r--usr/src/uts/i86pc/Makefile.files3
-rw-r--r--usr/src/uts/i86pc/io/vmm/intel/vmx.c92
-rw-r--r--usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c134
-rw-r--r--usr/src/uts/i86pc/io/vmm/vmm_sol_glue.c53
-rw-r--r--usr/src/uts/i86pc/ml/hma_asm.s (renamed from usr/src/uts/i86pc/sys/pc_hvm.h)39
-rw-r--r--usr/src/uts/i86pc/os/hma.c338
-rw-r--r--usr/src/uts/i86pc/os/mp_startup.c10
-rw-r--r--usr/src/uts/i86pc/os/pc_hvm.c65
-rw-r--r--usr/src/uts/i86pc/sys/hma.h22
9 files changed, 484 insertions, 272 deletions
diff --git a/usr/src/uts/i86pc/Makefile.files b/usr/src/uts/i86pc/Makefile.files
index 2a94505acb..2f3a525730 100644
--- a/usr/src/uts/i86pc/Makefile.files
+++ b/usr/src/uts/i86pc/Makefile.files
@@ -64,6 +64,8 @@ CORE_OBJS += \
hardclk.o \
hat_i86.o \
hat_kdi.o \
+ hma.o \
+ hma_asm.o \
hma_fpu.o \
hment.o \
hold_page.o \
@@ -100,7 +102,6 @@ CORE_OBJS += \
memscrub.o \
mpcore.o \
notes.o \
- pc_hvm.o \
pci_bios.o \
pci_cfgacc.o \
pci_cfgacc_x86.o \
diff --git a/usr/src/uts/i86pc/io/vmm/intel/vmx.c b/usr/src/uts/i86pc/io/vmm/intel/vmx.c
index e07ee0ea52..b119cde3c6 100644
--- a/usr/src/uts/i86pc/io/vmm/intel/vmx.c
+++ b/usr/src/uts/i86pc/io/vmm/intel/vmx.c
@@ -57,6 +57,7 @@ __FBSDID("$FreeBSD$");
#include <sys/x86_archext.h>
#include <sys/smp_impldefs.h>
#include <sys/ht.h>
+#include <sys/hma.h>
#endif
#include <vm/vm.h>
@@ -159,9 +160,10 @@ static MALLOC_DEFINE(M_VLAPIC, "vlapic", "vlapic");
SYSCTL_DECL(_hw_vmm);
SYSCTL_NODE(_hw_vmm, OID_AUTO, vmx, CTLFLAG_RW, NULL, NULL);
+#ifdef __FreeBSD__
int vmxon_enabled[MAXCPU];
static char vmxon_region[MAXCPU][PAGE_SIZE] __aligned(PAGE_SIZE);
-static char *vmxon_region_pa[MAXCPU];
+#endif /*__FreeBSD__ */
static uint32_t pinbased_ctls, procbased_ctls, procbased_ctls2;
static uint32_t exit_ctls, entry_ctls;
@@ -510,7 +512,11 @@ vpid_free(int vpid)
*/
if (vpid > VM_MAXCPU)
+#ifdef __FreeBSD__
free_unr(vpid_unr, vpid);
+#else
+ hma_vmx_vpid_free((uint16_t)vpid);
+#endif
}
static void
@@ -535,7 +541,14 @@ vpid_alloc(uint16_t *vpid, int num)
* Allocate a unique VPID for each vcpu from the unit number allocator.
*/
for (i = 0; i < num; i++) {
+#ifdef __FreeBSD__
x = alloc_unr(vpid_unr);
+#else
+ uint16_t tmp;
+
+ tmp = hma_vmx_vpid_alloc();
+ x = (tmp == 0) ? -1 : tmp;
+#endif
if (x == -1)
break;
else
@@ -564,6 +577,7 @@ vpid_alloc(uint16_t *vpid, int num)
}
}
+#ifdef __FreeBSD__
static void
vpid_init(void)
{
@@ -604,10 +618,8 @@ vmx_disable(void *arg __unused)
static int
vmx_cleanup(void)
{
-#ifdef __FreeBSD__
if (pirvec >= 0)
lapic_ipi_free(pirvec);
-#endif
if (vpid_unr != NULL) {
delete_unrhdr(vpid_unr);
@@ -636,11 +648,7 @@ vmx_enable(void *arg __unused)
load_cr4(rcr4() | CR4_VMXE);
*(uint32_t *)vmxon_region[curcpu] = vmx_revision();
-#ifdef __FreeBSD__
error = vmxon(vmxon_region[curcpu]);
-#else
- error = vmxon(vmxon_region_pa[curcpu]);
-#endif
if (error == 0)
vmxon_enabled[curcpu] = 1;
}
@@ -652,12 +660,30 @@ vmx_restore(void)
if (vmxon_enabled[curcpu])
vmxon(vmxon_region[curcpu]);
}
+#else /* __FreeBSD__ */
+static int
+vmx_cleanup(void)
+{
+ /* This is taken care of by the hma registration */
+ return (0);
+}
+
+static void
+vmx_restore(void)
+{
+ /* No-op on illumos */
+}
+#endif /* __FreeBSD__ */
static int
vmx_init(int ipinum)
{
int error, use_tpr_shadow;
+#ifdef __FreeBSD__
uint64_t basic, fixed0, fixed1, feature_control;
+#else
+ uint64_t fixed0, fixed1;
+#endif
uint32_t tmp, procbased2_vid_bits;
#ifdef __FreeBSD__
@@ -666,13 +692,6 @@ vmx_init(int ipinum)
printf("vmx_init: processor does not support VMX operation\n");
return (ENXIO);
}
-#else
- if (!is_x86_feature(x86_featureset, X86FSET_VMX)) {
- cmn_err(CE_WARN,
- "vmx_init: processor does not support VMX operation\n");
- return (ENXIO);
- }
-#endif
/*
* Verify that MSR_IA32_FEATURE_CONTROL lock and VMXON enable bits
@@ -695,6 +714,7 @@ vmx_init(int ipinum)
"capabilities\n");
return (EINVAL);
}
+#endif /* __FreeBSD__ */
/* Check support for primary processor-based VM-execution controls */
error = vmx_set_ctlreg(MSR_VMX_PROCBASED_CTLS,
@@ -890,23 +910,16 @@ vmx_init(int ipinum)
cr4_ones_mask = fixed0 & fixed1;
cr4_zeros_mask = ~fixed0 & ~fixed1;
+#ifdef __FreeBSD__
vpid_init();
+#endif
vmx_msr_init();
-#ifndef __FreeBSD__
- /*
- * Since vtophys requires locks to complete, cache the physical
- * addresses to the vmxon pages now, rather than attempting the
- * translation in the sensitive cross-call context.
- */
- for (uint_t i = 0; i < MAXCPU; i++) {
- vmxon_region_pa[i] = (char *)vtophys(vmxon_region[i]);
- }
-#endif /* __FreeBSD__ */
-
+#ifdef __FreeBSD__
/* enable VMX operation */
smp_rendezvous(NULL, vmx_enable, NULL, NULL);
+#endif
vmx_initialized = 1;
@@ -4006,37 +4019,12 @@ struct vmm_ops vmm_ops_intel = {
#ifndef __FreeBSD__
/* Side-effect free HW validation derived from checks in vmx_init. */
int
-vmx_x86_supported(char **msg)
+vmx_x86_supported(const char **msg)
{
int error;
- uint64_t basic, feature_control;
uint32_t tmp;
- if (!is_x86_feature(x86_featureset, X86FSET_VMX)) {
- *msg = "processor does not support VMX operation";
- return (ENXIO);
- }
-
- /*
- * Verify that MSR_IA32_FEATURE_CONTROL lock and VMXON enable bits
- * are set (bits 0 and 2 respectively).
- */
- feature_control = rdmsr(MSR_IA32_FEATURE_CONTROL);
- if ((feature_control & IA32_FEATURE_CONTROL_LOCK) == 1 &&
- (feature_control & IA32_FEATURE_CONTROL_VMX_EN) == 0) {
- *msg = "VMX operation disabled by BIOS";
- return (ENXIO);
- }
-
- /*
- * Verify capabilities MSR_VMX_BASIC:
- * - bit 54 indicates support for INS/OUTS decoding
- */
- basic = rdmsr(MSR_VMX_BASIC);
- if ((basic & (1UL << 54)) == 0) {
- *msg = "processor does not support desired basic capabilities";
- return (EINVAL);
- }
+ ASSERT(msg != NULL);
/* Check support for primary processor-based VM-execution controls */
error = vmx_set_ctlreg(MSR_VMX_PROCBASED_CTLS,
diff --git a/usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c b/usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c
index 3c0d9beec2..817d88111f 100644
--- a/usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c
+++ b/usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c
@@ -24,13 +24,14 @@
#include <sys/mkdev.h>
#include <sys/sunddi.h>
#include <sys/fs/dv_node.h>
-#include <sys/pc_hvm.h>
#include <sys/cpuset.h>
#include <sys/id_space.h>
#include <sys/fs/sdev_plugin.h>
#include <sys/ht.h>
#include <sys/kernel.h>
+#include <sys/hma.h>
+#include <sys/x86_archext.h>
#include <sys/vmm.h>
#include <sys/vmm_instruction_emul.h>
@@ -64,8 +65,7 @@ static void *vmm_statep;
static kmutex_t vmmdev_mtx;
static id_space_t *vmmdev_minors;
-static uint_t vmmdev_inst_count = 0;
-static boolean_t vmmdev_load_failure;
+static hma_reg_t *vmm_hma_reg;
static kmutex_t vmm_mtx;
static list_t vmmdev_list;
@@ -78,7 +78,7 @@ static const char *vmmdev_hvm_name = "bhyve";
static sdev_plugin_hdl_t vmm_sdev_hdl;
/* From uts/i86pc/io/vmm/intel/vmx.c */
-extern int vmx_x86_supported(char **);
+extern int vmx_x86_supported(const char **);
/* Holds and hooks from drivers external to vmm */
struct vmm_hold {
@@ -1161,49 +1161,6 @@ done:
return (error);
}
-static boolean_t
-vmmdev_mod_incr()
-{
- ASSERT(MUTEX_HELD(&vmmdev_mtx));
-
- if (vmmdev_inst_count == 0) {
- /*
- * If the HVM portions of the module failed initialize on a
- * previous attempt, do not bother with a retry. This tracker
- * is cleared on module attach, allowing subsequent attempts if
- * desired by the user.
- */
- if (vmmdev_load_failure) {
- return (B_FALSE);
- }
-
- if (!hvm_excl_hold(vmmdev_hvm_name)) {
- return (B_FALSE);
- }
- if (vmm_mod_load() != 0) {
- hvm_excl_rele(vmmdev_hvm_name);
- vmmdev_load_failure = B_TRUE;
- return (B_FALSE);
- }
- }
-
- vmmdev_inst_count++;
- return (B_TRUE);
-}
-
-static void
-vmmdev_mod_decr(void)
-{
- ASSERT(MUTEX_HELD(&vmmdev_mtx));
- ASSERT(vmmdev_inst_count > 0);
-
- vmmdev_inst_count--;
- if (vmmdev_inst_count == 0) {
- VERIFY0(vmm_mod_unload());
- hvm_excl_rele(vmmdev_hvm_name);
- }
-}
-
static vmm_softc_t *
vmm_lookup(const char *name)
{
@@ -1233,17 +1190,11 @@ vmmdev_do_vm_create(char *name, cred_t *cr)
}
mutex_enter(&vmmdev_mtx);
- if (!vmmdev_mod_incr()) {
- mutex_exit(&vmmdev_mtx);
- return (ENXIO);
- }
-
mutex_enter(&vmm_mtx);
/* Look for duplicates names */
if (vmm_lookup(name) != NULL) {
mutex_exit(&vmm_mtx);
- vmmdev_mod_decr();
mutex_exit(&vmmdev_mtx);
return (EEXIST);
}
@@ -1254,7 +1205,6 @@ vmmdev_do_vm_create(char *name, cred_t *cr)
sc = list_next(&vmmdev_list, sc)) {
if (sc->vmm_zone == curzone) {
mutex_exit(&vmm_mtx);
- vmmdev_mod_decr();
mutex_exit(&vmmdev_mtx);
return (EINVAL);
}
@@ -1296,7 +1246,6 @@ vmmdev_do_vm_create(char *name, cred_t *cr)
ddi_remove_minor_node(vmm_dip, name);
fail:
id_free(vmmdev_minors, minor);
- vmmdev_mod_decr();
if (sc != NULL) {
ddi_soft_state_free(vmm_statep, minor);
}
@@ -1548,7 +1497,6 @@ vmm_do_vm_destroy_locked(vmm_softc_t *sc, boolean_t clean_zsd)
ddi_soft_state_free(vmm_statep, minor);
id_free(vmmdev_minors, minor);
(void) devfs_clean(pdip, NULL, DV_CLEAN_FORCE);
- vmmdev_mod_decr();
return (0);
}
@@ -1662,7 +1610,7 @@ static int
vmm_is_supported(intptr_t arg)
{
int r;
- char *msg;
+ const char *msg;
if (!vmm_is_intel())
return (ENXIO);
@@ -1882,47 +1830,66 @@ vmm_info(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **result)
static int
vmm_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
{
- switch (cmd) {
- case DDI_ATTACH:
- break;
- default:
+ sdev_plugin_hdl_t sph;
+ hma_reg_t *reg = NULL;
+ boolean_t vmm_loaded = B_FALSE;
+
+ if (cmd != DDI_ATTACH) {
+ return (DDI_FAILURE);
+ }
+
+ mutex_enter(&vmmdev_mtx);
+ /* Ensure we are not already attached. */
+ if (vmm_dip != NULL) {
+ mutex_exit(&vmmdev_mtx);
return (DDI_FAILURE);
}
vmm_sol_glue_init();
+ vmm_arena_init();
- /*
- * Create control node. Other nodes will be created on demand.
- */
+ if ((reg = hma_register(vmmdev_hvm_name)) == NULL) {
+ goto fail;
+ } else if (vmm_mod_load() != 0) {
+ goto fail;
+ }
+ vmm_loaded = B_TRUE;
+
+ /* Create control node. Other nodes will be created on demand. */
if (ddi_create_minor_node(dip, "ctl", S_IFCHR,
VMM_CTL_MINOR, DDI_PSEUDO, 0) != 0) {
- return (DDI_FAILURE);
+ goto fail;
}
- if ((vmm_sdev_hdl = sdev_plugin_register("vmm", &vmm_sdev_ops,
- NULL)) == NULL) {
+ if ((sph = sdev_plugin_register("vmm", &vmm_sdev_ops, NULL)) == NULL) {
ddi_remove_minor_node(dip, NULL);
- dip = NULL;
- return (DDI_FAILURE);
+ goto fail;
}
ddi_report_dev(dip);
-
- vmm_arena_init();
-
- vmmdev_load_failure = B_FALSE;
+ vmm_hma_reg = reg;
+ vmm_sdev_hdl = sph;
vmm_dip = dip;
-
+ mutex_exit(&vmmdev_mtx);
return (DDI_SUCCESS);
+
+fail:
+ if (vmm_loaded) {
+ VERIFY0(vmm_mod_unload());
+ }
+ if (reg != NULL) {
+ hma_unregister(reg);
+ }
+ vmm_sol_glue_cleanup();
+ vmm_arena_fini();
+ mutex_exit(&vmmdev_mtx);
+ return (DDI_FAILURE);
}
static int
vmm_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
{
- switch (cmd) {
- case DDI_DETACH:
- break;
- default:
+ if (cmd != DDI_DETACH) {
return (DDI_FAILURE);
}
@@ -1937,19 +1904,12 @@ vmm_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
if (mutex_tryenter(&vmmdev_mtx) == 0)
return (DDI_FAILURE);
- if (vmmdev_inst_count != 0) {
- mutex_exit(&vmmdev_mtx);
- return (DDI_FAILURE);
- }
-
mutex_enter(&vmm_mtx);
-
if (!list_is_empty(&vmmdev_list)) {
mutex_exit(&vmm_mtx);
mutex_exit(&vmmdev_mtx);
return (DDI_FAILURE);
}
-
mutex_exit(&vmm_mtx);
if (vmm_sdev_hdl != NULL && sdev_plugin_unregister(vmm_sdev_hdl) != 0) {
@@ -1961,6 +1921,10 @@ vmm_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
/* Remove the control node. */
ddi_remove_minor_node(dip, "ctl");
vmm_dip = NULL;
+
+ VERIFY0(vmm_mod_unload());
+ hma_unregister(vmm_hma_reg);
+ vmm_hma_reg = NULL;
vmm_sol_glue_cleanup();
vmm_arena_fini();
diff --git a/usr/src/uts/i86pc/io/vmm/vmm_sol_glue.c b/usr/src/uts/i86pc/io/vmm/vmm_sol_glue.c
index b523b69bd9..b73c937fad 100644
--- a/usr/src/uts/i86pc/io/vmm/vmm_sol_glue.c
+++ b/usr/src/uts/i86pc/io/vmm/vmm_sol_glue.c
@@ -320,58 +320,6 @@ critical_exit(void)
kpreempt_enable();
}
-struct unrhdr;
-static kmutex_t unr_lock;
-static uint_t unr_idx;
-
-/*
- * Allocate a new unrheader set.
- *
- * Highest and lowest valid values given as parameters.
- */
-struct unrhdr *
-new_unrhdr(int low, int high, struct mtx *mtx)
-{
- id_space_t *ids;
- char name[] = "vmm_unr_00000000";
-
- ASSERT(mtx == NULL);
-
- mutex_enter(&unr_lock);
- /* Get a unique name for the id space */
- (void) snprintf(name, sizeof (name), "vmm_unr_%08X", unr_idx);
- VERIFY(++unr_idx != UINT_MAX);
- mutex_exit(&unr_lock);
-
- ids = id_space_create(name, low, high);
-
- return ((struct unrhdr *)ids);
-}
-
-void
-delete_unrhdr(struct unrhdr *uh)
-{
- id_space_t *ids = (id_space_t *)uh;
-
- id_space_destroy(ids);
-}
-
-int
-alloc_unr(struct unrhdr *uh)
-{
- id_space_t *ids = (id_space_t *)uh;
-
- return (id_alloc(ids));
-}
-
-void
-free_unr(struct unrhdr *uh, u_int item)
-{
- id_space_t *ids = (id_space_t *)uh;
-
- id_free(ids, item);
-}
-
static void
vmm_glue_callout_handler(void *arg)
@@ -571,7 +519,6 @@ vmm_sol_glue_init(void)
{
vmm_alloc_init();
vmm_cpuid_init();
- unr_idx = 0;
}
void
diff --git a/usr/src/uts/i86pc/sys/pc_hvm.h b/usr/src/uts/i86pc/ml/hma_asm.s
index 38acf052e4..3b5995bb1a 100644
--- a/usr/src/uts/i86pc/sys/pc_hvm.h
+++ b/usr/src/uts/i86pc/ml/hma_asm.s
@@ -10,26 +10,33 @@
*/
/*
- * Copyright 2017 Joyent, Inc.
+ * Copyright 2018 Joyent, Inc.
*/
-#ifndef _PC_HVM_H
-#define _PC_HVM_H
+#include <sys/asm_linkage.h>
-#ifdef __cplusplus
-extern "C" {
-#endif
+#if defined(__lint)
-#if defined(_KERNEL)
-
-extern boolean_t hvm_excl_hold(const char *);
-extern void hvm_excl_rele(const char *);
-
-#endif /* defined(_KERNEL) */
-
-#ifdef __cplusplus
+int
+hma_vmx_vmxon(uintptr_t arg)
+{
+ return (0);
}
-#endif
-#endif /* _PC_HVM_H */
+#else /* __lint */
+ ENTRY_NP(hma_vmx_vmxon)
+ push %rbp
+ movq %rsp, %rbp
+ pushq %rdi
+
+ xorl %eax, %eax
+ vmxon -0x8(%rbp)
+ ja 1f /* CF=0, ZF=0 (success) */
+ incl %eax
+1:
+
+ leave
+ ret
+ SET_SIZE(hma_vmx_vmxon)
+#endif /* __lint */
diff --git a/usr/src/uts/i86pc/os/hma.c b/usr/src/uts/i86pc/os/hma.c
new file mode 100644
index 0000000000..34ae85086d
--- /dev/null
+++ b/usr/src/uts/i86pc/os/hma.c
@@ -0,0 +1,338 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2018 Joyent, Inc.
+ */
+
+#include <sys/cpuvar.h>
+#include <sys/types.h>
+#include <sys/errno.h>
+#include <sys/machsystm.h>
+#include <sys/controlregs.h>
+#include <sys/x86_archext.h>
+#include <sys/id_space.h>
+#include <sys/hma.h>
+#include <sys/cmn_err.h>
+#include <vm/hat.h>
+#include <vm/as.h>
+
+struct hma_reg {
+ const char *hr_name;
+ list_node_t hr_node;
+};
+
+static kmutex_t hma_lock;
+static list_t hma_registrations;
+
+static boolean_t hma_vmx_ready = B_FALSE;
+static const char *hma_vmx_error = NULL;
+static id_space_t *hma_vmx_vpid;
+
+typedef enum vmx_cpu_state {
+ VCS_UNINITIALIZED = 0,
+ VCS_READY,
+ VCS_ERROR
+} vmx_cpu_state_t;
+
+/*
+ * The bulk of VMX-related HMA state is protected by cpu_lock, rather than a
+ * mutex specific to the module. It (cpu_lock) is already required for the
+ * state needed to perform setup on all CPUs, so it was a natural fit to
+ * protect this data too.
+ */
+static void *hma_vmx_vmxon_page[NCPU];
+static uintptr_t hma_vmx_vmxon_pa[NCPU];
+static vmx_cpu_state_t hma_vmx_status[NCPU];
+static uint32_t hma_vmx_revision;
+
+
+static int hma_vmx_init(void);
+static int hma_svm_init(void);
+
+void
+hma_init(void)
+{
+ mutex_init(&hma_lock, NULL, MUTEX_DEFAULT, NULL);
+ list_create(&hma_registrations, sizeof (struct hma_reg),
+ offsetof(struct hma_reg, hr_node));
+
+ switch (cpuid_getvendor(CPU)) {
+ case X86_VENDOR_Intel:
+ (void) hma_vmx_init();
+ break;
+ case X86_VENDOR_AMD:
+ (void) hma_svm_init();
+ break;
+ default:
+ break;
+ }
+}
+
+hma_reg_t *
+hma_register(const char *name)
+{
+ struct hma_reg *reg;
+ boolean_t is_ready;
+
+ VERIFY(name != NULL);
+
+ reg = kmem_zalloc(sizeof (*reg), KM_SLEEP);
+ reg->hr_name = name;
+
+ mutex_enter(&hma_lock);
+ switch (cpuid_getvendor(CPU)) {
+ case X86_VENDOR_Intel:
+ is_ready = hma_vmx_ready;
+ break;
+ case X86_VENDOR_AMD:
+ /* Punt on SVM support for now */
+ is_ready = B_FALSE;
+ break;
+ default:
+ is_ready = B_FALSE;
+ break;
+ }
+
+ if (!is_ready) {
+ kmem_free(reg, sizeof (*reg));
+ reg = NULL;
+ } else {
+ list_insert_tail(&hma_registrations, reg);
+ }
+ mutex_exit(&hma_lock);
+
+ return (reg);
+}
+
+void
+hma_unregister(hma_reg_t *reg)
+{
+ VERIFY(reg != NULL);
+ VERIFY(!list_is_empty(&hma_registrations));
+
+ mutex_enter(&hma_lock);
+ list_remove(&hma_registrations, reg);
+ mutex_exit(&hma_lock);
+ kmem_free(reg, sizeof (*reg));
+}
+
+/*
+ * VPID 0 is reserved for instances where VPID is disabled. Some hypervisors
+ * (read: bhyve) reserve lower-order VPIDs for use in fallback behavior if
+ * unique VPIDs could not be allocated for all the vCPUs belonging to a VM.
+ */
+#define HMA_VPID_RESERVED NCPU
+
+uint16_t
+hma_vmx_vpid_alloc(void)
+{
+ id_t res;
+
+ res = id_alloc_nosleep(hma_vmx_vpid);
+ if (res == -1) {
+ return (0);
+ } else {
+ ASSERT(res > HMA_VPID_RESERVED && res <= UINT16_MAX);
+ return (res);
+ }
+}
+
+void
+hma_vmx_vpid_free(uint16_t vpid)
+{
+ VERIFY(vpid > HMA_VPID_RESERVED);
+ id_free(hma_vmx_vpid, (id_t)vpid);
+}
+
+
+extern int hma_vmx_vmxon(uintptr_t);
+
+/* ARGSUSED */
+static int
+hma_vmx_cpu_vmxon(xc_arg_t arg1, xc_arg_t arg2, xc_arg_t arg3)
+{
+ uint64_t fctrl;
+ processorid_t id = CPU->cpu_seqid;
+ void *vmxon_region = hma_vmx_vmxon_page[id];
+ uintptr_t vmxon_pa = hma_vmx_vmxon_pa[id];
+
+ VERIFY(vmxon_region != NULL && vmxon_pa != 0);
+
+ /*
+ * Ensure that the VMX support and lock bits are enabled in the
+ * feature-control MSR.
+ */
+ fctrl = rdmsr(MSR_IA32_FEAT_CTRL);
+ if ((fctrl & IA32_FEAT_CTRL_LOCK) == 0 ||
+ (fctrl & IA32_FEAT_CTRL_VMX_EN) == 0) {
+ fctrl = fctrl | IA32_FEAT_CTRL_VMX_EN | IA32_FEAT_CTRL_LOCK;
+ wrmsr(MSR_IA32_FEAT_CTRL, fctrl);
+ }
+
+ setcr4(getcr4() | CR4_VMXE);
+
+ if (hma_vmx_vmxon(vmxon_pa) == 0) {
+ hma_vmx_status[id] = VCS_READY;
+ } else {
+ hma_vmx_status[id] = VCS_ERROR;
+
+ /*
+ * If VMX has already been marked active and available for the
+ * system, then failure to perform VMXON on a newly-onlined CPU
+ * represents a fatal problem. Continuing on would mean
+ * failure for any hypervisor thread which landed here.
+ */
+ if (hma_vmx_ready) {
+ panic("VMXON failure after VMX marked ready");
+ }
+ }
+ return (0);
+}
+
+/* ARGSUSED2 */
+static int
+hma_vmx_cpu_setup(cpu_setup_t what, int id, void *arg)
+{
+ ASSERT(MUTEX_HELD(&cpu_lock));
+ ASSERT(id >= 0 && id < NCPU);
+
+ switch (what) {
+ case CPU_CONFIG:
+ case CPU_ON:
+ case CPU_INIT:
+ break;
+ default:
+ /*
+ * Other events, such as CPU offlining, are of no interest.
+ * Letting the VMX state linger should not cause any harm.
+ *
+ * This logic assumes that any offlining activity is strictly
+ * administrative in nature and will not alter any existing
+ * configuration (such as %cr4 bits previously set).
+ */
+ return (0);
+ }
+
+ /* Perform initialization if it has not been previously attempted. */
+ if (hma_vmx_status[id] != VCS_UNINITIALIZED) {
+ return ((hma_vmx_status[id] == VCS_READY) ? 0 : -1);
+ }
+
+ /* Allocate the VMXON page for this CPU */
+ if (hma_vmx_vmxon_page[id] == NULL) {
+ caddr_t va;
+ pfn_t pfn;
+
+ va = kmem_alloc(PAGESIZE, KM_SLEEP);
+ VERIFY0((uintptr_t)va & PAGEOFFSET);
+ hma_vmx_vmxon_page[id] = va;
+
+ /* Initialize the VMX revision field as expected */
+ bcopy(&hma_vmx_revision, va, sizeof (hma_vmx_revision));
+
+ /*
+ * Cache the physical address of the VMXON page rather than
+ * looking it up later when the potential blocking of
+ * hat_getpfnum would be less acceptable.
+ */
+ pfn = hat_getpfnum(kas.a_hat, va);
+ hma_vmx_vmxon_pa[id] = (pfn << PAGESHIFT);
+ } else {
+ VERIFY(hma_vmx_vmxon_pa[id] != 0);
+ }
+
+ kpreempt_disable();
+ if (CPU->cpu_seqid == id) {
+ /* Perform vmxon setup directly if this CPU is the target */
+ (void) hma_vmx_cpu_vmxon(0, 0, 0);
+ kpreempt_enable();
+ } else {
+ cpuset_t set;
+
+ /* Use a cross-call if a remote CPU is the target */
+ kpreempt_enable();
+ cpuset_zero(&set);
+ cpuset_add(&set, id);
+ xc_sync(0, 0, 0, CPUSET2BV(set), hma_vmx_cpu_vmxon);
+ }
+
+ return (hma_vmx_status[id] != VCS_READY);
+}
+
+static int
+hma_vmx_init(void)
+{
+ cpu_t *cp;
+ uint64_t msr;
+ int err = 0;
+ const char *msg = NULL;
+
+ if (!is_x86_feature(x86_featureset, X86FSET_VMX)) {
+ msg = "CPU does not support VMX";
+ goto bail;
+ }
+
+ /* Has the BIOS set the feature-control lock bit without VMX enabled? */
+ msr = rdmsr(MSR_IA32_FEAT_CTRL);
+ if ((msr & IA32_FEAT_CTRL_LOCK) != 0 &&
+ (msr & IA32_FEAT_CTRL_VMX_EN) == 0) {
+ msg = "VMX support disabled by BIOS";
+ goto bail;
+ }
+
+ /* Does VMX support basic INS/OUTS functionality */
+ msr = rdmsr(MSR_IA32_VMX_BASIC);
+ if ((msr & IA32_VMX_BASIC_INS_OUTS) == 0) {
+ msg = "VMX does not support INS/OUTS";
+ goto bail;
+ }
+ /* Record the VMX revision for later VMXON usage */
+ hma_vmx_revision = (uint32_t)msr;
+
+ mutex_enter(&cpu_lock);
+ /* Perform VMX configuration for already-online CPUs. */
+ cp = cpu_active;
+ do {
+ err = hma_vmx_cpu_setup(CPU_ON, cp->cpu_seqid, NULL);
+ if (err != 0) {
+ msg = "failure during VMXON setup";
+ mutex_exit(&cpu_lock);
+ goto bail;
+ }
+ } while ((cp = cp->cpu_next_onln) != cpu_active);
+
+ /*
+ * Register callback for later-onlined CPUs and perform other remaining
+ * resource allocation.
+ */
+ register_cpu_setup_func(hma_vmx_cpu_setup, NULL);
+ mutex_exit(&cpu_lock);
+
+ hma_vmx_vpid = id_space_create("hma_vmx_vpid", HMA_VPID_RESERVED + 1,
+ UINT16_MAX);
+ hma_vmx_ready = B_TRUE;
+
+ return (0);
+
+bail:
+ hma_vmx_error = msg;
+ cmn_err(CE_NOTE, "hma_vmx_init: %s", msg);
+ return (-1);
+}
+
+
+static int
+hma_svm_init(void)
+{
+ /* punt on AMD for now */
+ return (ENOTSUP);
+}
diff --git a/usr/src/uts/i86pc/os/mp_startup.c b/usr/src/uts/i86pc/os/mp_startup.c
index fef8f2759d..f292b52fbe 100644
--- a/usr/src/uts/i86pc/os/mp_startup.c
+++ b/usr/src/uts/i86pc/os/mp_startup.c
@@ -77,6 +77,8 @@
#include <sys/sysmacros.h>
#if defined(__xpv)
#include <sys/hypervisor.h>
+#else
+#include <sys/hma.h>
#endif
#include <sys/cpu_module.h>
#include <sys/ontrap.h>
@@ -1610,6 +1612,14 @@ done:
workaround_errata_end();
cmi_post_mpstartup();
+#if !defined(__xpv)
+ /*
+ * Once other CPUs have completed startup procedures, perform
+ * initialization of hypervisor resources for HMA.
+ */
+ hma_init();
+#endif
+
if (use_mp && ncpus != boot_max_ncpus) {
cmn_err(CE_NOTE,
"System detected %d cpus, but "
diff --git a/usr/src/uts/i86pc/os/pc_hvm.c b/usr/src/uts/i86pc/os/pc_hvm.c
deleted file mode 100644
index 60f8e8d3ca..0000000000
--- a/usr/src/uts/i86pc/os/pc_hvm.c
+++ /dev/null
@@ -1,65 +0,0 @@
-/*
- * This file and its contents are supplied under the terms of the
- * Common Development and Distribution License ("CDDL"), version 1.0.
- * You may only use this file in accordance with the terms of version
- * 1.0 of the CDDL.
- *
- * A full copy of the text of the CDDL should have accompanied this
- * source. A copy of the CDDL is also available via the Internet at
- * http://www.illumos.org/license/CDDL.
- */
-
-/*
- * Copyright 2018 Joyent, Inc.
- */
-
-#include <sys/cmn_err.h>
-#include <sys/ddi.h>
-#include <sys/sunddi.h>
-#include <sys/param.h>
-#include <sys/types.h>
-#include <sys/mutex.h>
-#include <sys/debug.h>
-
-static kmutex_t hvm_excl_lock;
-static const char *hvm_excl_holder = NULL;
-
-/*
- * HVM Exclusion Interface
- *
- * To avoid VMX/SVM conflicts from arising when multiple hypervisor providers
- * (eg. KVM, bhyve) are shipped with the system, this simple advisory locking
- * system is presented for their use. Until a proper hypervisor API, like the
- * one in OSX, is shipped in illumos, this will serve as opt-in regulation to
- * dictate that only a single hypervisor be allowed to configure the system and
- * run at any given time.
- */
-
-boolean_t
-hvm_excl_hold(const char *consumer)
-{
- boolean_t res;
-
- mutex_enter(&hvm_excl_lock);
- if (hvm_excl_holder == NULL) {
- hvm_excl_holder = consumer;
- res = B_TRUE;
- } else {
- cmn_err(CE_WARN, "zone '%s' cannot take HVM exclusion lock as "
- "'%s': held by '%s'", curproc->p_zone->zone_name, consumer,
- hvm_excl_holder);
- res = B_FALSE;
- }
- mutex_exit(&hvm_excl_lock);
-
- return (res);
-}
-
-void
-hvm_excl_rele(const char *consumer)
-{
- mutex_enter(&hvm_excl_lock);
- VERIFY(consumer == hvm_excl_holder);
- hvm_excl_holder = NULL;
- mutex_exit(&hvm_excl_lock);
-}
diff --git a/usr/src/uts/i86pc/sys/hma.h b/usr/src/uts/i86pc/sys/hma.h
index 00009cf439..0c6161fdfc 100644
--- a/usr/src/uts/i86pc/sys/hma.h
+++ b/usr/src/uts/i86pc/sys/hma.h
@@ -30,6 +30,25 @@
extern "C" {
#endif
+
+/*
+ * Register a hypervisor with HMA. On success, a pointer to the opaque
+ * registration token will be returned, indicating that proper host setup has
+ * occurred for further hypervisor actions.
+ */
+typedef struct hma_reg hma_reg_t;
+extern hma_reg_t *hma_register(const char *);
+extern void hma_unregister(hma_reg_t *);
+
+/*
+ * Allocate or free a VPID for use with VMX.
+ *
+ * This must not be performed by a hypervisor until it has successfully
+ * registered via hma_register().
+ */
+extern uint16_t hma_vmx_vpid_alloc(void);
+extern void hma_vmx_vpid_free(uint16_t);
+
/*
* FPU related management. These functions provide a set of APIs to manage the
* FPU state and switch between host and guest management of this state.
@@ -96,6 +115,9 @@ extern void hma_fpu_stop_guest(hma_fpu_t *);
extern void hma_fpu_get_fxsave_state(const hma_fpu_t *, struct fxsave_state *);
extern int hma_fpu_set_fxsave_state(hma_fpu_t *, const struct fxsave_state *);
+/* Perform HMA initialization steps during boot-up. */
+extern void hma_init(void);
+
#ifdef __cplusplus
}
#endif