diff options
| author | Patrick Mooney <pmooney@pfmooney.com> | 2018-07-18 20:14:58 +0000 |
|---|---|---|
| committer | Patrick Mooney <pmooney@pfmooney.com> | 2018-08-21 17:54:16 +0000 |
| commit | befffd577ca6c3a090d7d3c72d267a383c3a3c45 (patch) | |
| tree | 3d1b28dc8970770089d4a47d74f4d028bb59d110 /usr/src/uts/i86pc | |
| parent | dce88dec0a5dd9849b881d131e2c0f78a5ba4c97 (diff) | |
| download | illumos-joyent-befffd577ca6c3a090d7d3c72d267a383c3a3c45.tar.gz | |
OS-7080 bhyve and KVM should coexist in peace
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Robert Mustacchi <rm@joyent.com>
Reviewed by: John Levon <john.levon@joyent.com>
Approved by: John Levon <john.levon@joyent.com>
Diffstat (limited to 'usr/src/uts/i86pc')
| -rw-r--r-- | usr/src/uts/i86pc/Makefile.files | 3 | ||||
| -rw-r--r-- | usr/src/uts/i86pc/io/vmm/intel/vmx.c | 92 | ||||
| -rw-r--r-- | usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c | 134 | ||||
| -rw-r--r-- | usr/src/uts/i86pc/io/vmm/vmm_sol_glue.c | 53 | ||||
| -rw-r--r-- | usr/src/uts/i86pc/ml/hma_asm.s (renamed from usr/src/uts/i86pc/sys/pc_hvm.h) | 39 | ||||
| -rw-r--r-- | usr/src/uts/i86pc/os/hma.c | 338 | ||||
| -rw-r--r-- | usr/src/uts/i86pc/os/mp_startup.c | 10 | ||||
| -rw-r--r-- | usr/src/uts/i86pc/os/pc_hvm.c | 65 | ||||
| -rw-r--r-- | usr/src/uts/i86pc/sys/hma.h | 22 |
9 files changed, 484 insertions, 272 deletions
diff --git a/usr/src/uts/i86pc/Makefile.files b/usr/src/uts/i86pc/Makefile.files index 2a94505acb..2f3a525730 100644 --- a/usr/src/uts/i86pc/Makefile.files +++ b/usr/src/uts/i86pc/Makefile.files @@ -64,6 +64,8 @@ CORE_OBJS += \ hardclk.o \ hat_i86.o \ hat_kdi.o \ + hma.o \ + hma_asm.o \ hma_fpu.o \ hment.o \ hold_page.o \ @@ -100,7 +102,6 @@ CORE_OBJS += \ memscrub.o \ mpcore.o \ notes.o \ - pc_hvm.o \ pci_bios.o \ pci_cfgacc.o \ pci_cfgacc_x86.o \ diff --git a/usr/src/uts/i86pc/io/vmm/intel/vmx.c b/usr/src/uts/i86pc/io/vmm/intel/vmx.c index e07ee0ea52..b119cde3c6 100644 --- a/usr/src/uts/i86pc/io/vmm/intel/vmx.c +++ b/usr/src/uts/i86pc/io/vmm/intel/vmx.c @@ -57,6 +57,7 @@ __FBSDID("$FreeBSD$"); #include <sys/x86_archext.h> #include <sys/smp_impldefs.h> #include <sys/ht.h> +#include <sys/hma.h> #endif #include <vm/vm.h> @@ -159,9 +160,10 @@ static MALLOC_DEFINE(M_VLAPIC, "vlapic", "vlapic"); SYSCTL_DECL(_hw_vmm); SYSCTL_NODE(_hw_vmm, OID_AUTO, vmx, CTLFLAG_RW, NULL, NULL); +#ifdef __FreeBSD__ int vmxon_enabled[MAXCPU]; static char vmxon_region[MAXCPU][PAGE_SIZE] __aligned(PAGE_SIZE); -static char *vmxon_region_pa[MAXCPU]; +#endif /*__FreeBSD__ */ static uint32_t pinbased_ctls, procbased_ctls, procbased_ctls2; static uint32_t exit_ctls, entry_ctls; @@ -510,7 +512,11 @@ vpid_free(int vpid) */ if (vpid > VM_MAXCPU) +#ifdef __FreeBSD__ free_unr(vpid_unr, vpid); +#else + hma_vmx_vpid_free((uint16_t)vpid); +#endif } static void @@ -535,7 +541,14 @@ vpid_alloc(uint16_t *vpid, int num) * Allocate a unique VPID for each vcpu from the unit number allocator. */ for (i = 0; i < num; i++) { +#ifdef __FreeBSD__ x = alloc_unr(vpid_unr); +#else + uint16_t tmp; + + tmp = hma_vmx_vpid_alloc(); + x = (tmp == 0) ? -1 : tmp; +#endif if (x == -1) break; else @@ -564,6 +577,7 @@ vpid_alloc(uint16_t *vpid, int num) } } +#ifdef __FreeBSD__ static void vpid_init(void) { @@ -604,10 +618,8 @@ vmx_disable(void *arg __unused) static int vmx_cleanup(void) { -#ifdef __FreeBSD__ if (pirvec >= 0) lapic_ipi_free(pirvec); -#endif if (vpid_unr != NULL) { delete_unrhdr(vpid_unr); @@ -636,11 +648,7 @@ vmx_enable(void *arg __unused) load_cr4(rcr4() | CR4_VMXE); *(uint32_t *)vmxon_region[curcpu] = vmx_revision(); -#ifdef __FreeBSD__ error = vmxon(vmxon_region[curcpu]); -#else - error = vmxon(vmxon_region_pa[curcpu]); -#endif if (error == 0) vmxon_enabled[curcpu] = 1; } @@ -652,12 +660,30 @@ vmx_restore(void) if (vmxon_enabled[curcpu]) vmxon(vmxon_region[curcpu]); } +#else /* __FreeBSD__ */ +static int +vmx_cleanup(void) +{ + /* This is taken care of by the hma registration */ + return (0); +} + +static void +vmx_restore(void) +{ + /* No-op on illumos */ +} +#endif /* __FreeBSD__ */ static int vmx_init(int ipinum) { int error, use_tpr_shadow; +#ifdef __FreeBSD__ uint64_t basic, fixed0, fixed1, feature_control; +#else + uint64_t fixed0, fixed1; +#endif uint32_t tmp, procbased2_vid_bits; #ifdef __FreeBSD__ @@ -666,13 +692,6 @@ vmx_init(int ipinum) printf("vmx_init: processor does not support VMX operation\n"); return (ENXIO); } -#else - if (!is_x86_feature(x86_featureset, X86FSET_VMX)) { - cmn_err(CE_WARN, - "vmx_init: processor does not support VMX operation\n"); - return (ENXIO); - } -#endif /* * Verify that MSR_IA32_FEATURE_CONTROL lock and VMXON enable bits @@ -695,6 +714,7 @@ vmx_init(int ipinum) "capabilities\n"); return (EINVAL); } +#endif /* __FreeBSD__ */ /* Check support for primary processor-based VM-execution controls */ error = vmx_set_ctlreg(MSR_VMX_PROCBASED_CTLS, @@ -890,23 +910,16 @@ vmx_init(int ipinum) cr4_ones_mask = fixed0 & fixed1; cr4_zeros_mask = ~fixed0 & ~fixed1; +#ifdef __FreeBSD__ vpid_init(); +#endif vmx_msr_init(); -#ifndef __FreeBSD__ - /* - * Since vtophys requires locks to complete, cache the physical - * addresses to the vmxon pages now, rather than attempting the - * translation in the sensitive cross-call context. - */ - for (uint_t i = 0; i < MAXCPU; i++) { - vmxon_region_pa[i] = (char *)vtophys(vmxon_region[i]); - } -#endif /* __FreeBSD__ */ - +#ifdef __FreeBSD__ /* enable VMX operation */ smp_rendezvous(NULL, vmx_enable, NULL, NULL); +#endif vmx_initialized = 1; @@ -4006,37 +4019,12 @@ struct vmm_ops vmm_ops_intel = { #ifndef __FreeBSD__ /* Side-effect free HW validation derived from checks in vmx_init. */ int -vmx_x86_supported(char **msg) +vmx_x86_supported(const char **msg) { int error; - uint64_t basic, feature_control; uint32_t tmp; - if (!is_x86_feature(x86_featureset, X86FSET_VMX)) { - *msg = "processor does not support VMX operation"; - return (ENXIO); - } - - /* - * Verify that MSR_IA32_FEATURE_CONTROL lock and VMXON enable bits - * are set (bits 0 and 2 respectively). - */ - feature_control = rdmsr(MSR_IA32_FEATURE_CONTROL); - if ((feature_control & IA32_FEATURE_CONTROL_LOCK) == 1 && - (feature_control & IA32_FEATURE_CONTROL_VMX_EN) == 0) { - *msg = "VMX operation disabled by BIOS"; - return (ENXIO); - } - - /* - * Verify capabilities MSR_VMX_BASIC: - * - bit 54 indicates support for INS/OUTS decoding - */ - basic = rdmsr(MSR_VMX_BASIC); - if ((basic & (1UL << 54)) == 0) { - *msg = "processor does not support desired basic capabilities"; - return (EINVAL); - } + ASSERT(msg != NULL); /* Check support for primary processor-based VM-execution controls */ error = vmx_set_ctlreg(MSR_VMX_PROCBASED_CTLS, diff --git a/usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c b/usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c index 3c0d9beec2..817d88111f 100644 --- a/usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c +++ b/usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c @@ -24,13 +24,14 @@ #include <sys/mkdev.h> #include <sys/sunddi.h> #include <sys/fs/dv_node.h> -#include <sys/pc_hvm.h> #include <sys/cpuset.h> #include <sys/id_space.h> #include <sys/fs/sdev_plugin.h> #include <sys/ht.h> #include <sys/kernel.h> +#include <sys/hma.h> +#include <sys/x86_archext.h> #include <sys/vmm.h> #include <sys/vmm_instruction_emul.h> @@ -64,8 +65,7 @@ static void *vmm_statep; static kmutex_t vmmdev_mtx; static id_space_t *vmmdev_minors; -static uint_t vmmdev_inst_count = 0; -static boolean_t vmmdev_load_failure; +static hma_reg_t *vmm_hma_reg; static kmutex_t vmm_mtx; static list_t vmmdev_list; @@ -78,7 +78,7 @@ static const char *vmmdev_hvm_name = "bhyve"; static sdev_plugin_hdl_t vmm_sdev_hdl; /* From uts/i86pc/io/vmm/intel/vmx.c */ -extern int vmx_x86_supported(char **); +extern int vmx_x86_supported(const char **); /* Holds and hooks from drivers external to vmm */ struct vmm_hold { @@ -1161,49 +1161,6 @@ done: return (error); } -static boolean_t -vmmdev_mod_incr() -{ - ASSERT(MUTEX_HELD(&vmmdev_mtx)); - - if (vmmdev_inst_count == 0) { - /* - * If the HVM portions of the module failed initialize on a - * previous attempt, do not bother with a retry. This tracker - * is cleared on module attach, allowing subsequent attempts if - * desired by the user. - */ - if (vmmdev_load_failure) { - return (B_FALSE); - } - - if (!hvm_excl_hold(vmmdev_hvm_name)) { - return (B_FALSE); - } - if (vmm_mod_load() != 0) { - hvm_excl_rele(vmmdev_hvm_name); - vmmdev_load_failure = B_TRUE; - return (B_FALSE); - } - } - - vmmdev_inst_count++; - return (B_TRUE); -} - -static void -vmmdev_mod_decr(void) -{ - ASSERT(MUTEX_HELD(&vmmdev_mtx)); - ASSERT(vmmdev_inst_count > 0); - - vmmdev_inst_count--; - if (vmmdev_inst_count == 0) { - VERIFY0(vmm_mod_unload()); - hvm_excl_rele(vmmdev_hvm_name); - } -} - static vmm_softc_t * vmm_lookup(const char *name) { @@ -1233,17 +1190,11 @@ vmmdev_do_vm_create(char *name, cred_t *cr) } mutex_enter(&vmmdev_mtx); - if (!vmmdev_mod_incr()) { - mutex_exit(&vmmdev_mtx); - return (ENXIO); - } - mutex_enter(&vmm_mtx); /* Look for duplicates names */ if (vmm_lookup(name) != NULL) { mutex_exit(&vmm_mtx); - vmmdev_mod_decr(); mutex_exit(&vmmdev_mtx); return (EEXIST); } @@ -1254,7 +1205,6 @@ vmmdev_do_vm_create(char *name, cred_t *cr) sc = list_next(&vmmdev_list, sc)) { if (sc->vmm_zone == curzone) { mutex_exit(&vmm_mtx); - vmmdev_mod_decr(); mutex_exit(&vmmdev_mtx); return (EINVAL); } @@ -1296,7 +1246,6 @@ vmmdev_do_vm_create(char *name, cred_t *cr) ddi_remove_minor_node(vmm_dip, name); fail: id_free(vmmdev_minors, minor); - vmmdev_mod_decr(); if (sc != NULL) { ddi_soft_state_free(vmm_statep, minor); } @@ -1548,7 +1497,6 @@ vmm_do_vm_destroy_locked(vmm_softc_t *sc, boolean_t clean_zsd) ddi_soft_state_free(vmm_statep, minor); id_free(vmmdev_minors, minor); (void) devfs_clean(pdip, NULL, DV_CLEAN_FORCE); - vmmdev_mod_decr(); return (0); } @@ -1662,7 +1610,7 @@ static int vmm_is_supported(intptr_t arg) { int r; - char *msg; + const char *msg; if (!vmm_is_intel()) return (ENXIO); @@ -1882,47 +1830,66 @@ vmm_info(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **result) static int vmm_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) { - switch (cmd) { - case DDI_ATTACH: - break; - default: + sdev_plugin_hdl_t sph; + hma_reg_t *reg = NULL; + boolean_t vmm_loaded = B_FALSE; + + if (cmd != DDI_ATTACH) { + return (DDI_FAILURE); + } + + mutex_enter(&vmmdev_mtx); + /* Ensure we are not already attached. */ + if (vmm_dip != NULL) { + mutex_exit(&vmmdev_mtx); return (DDI_FAILURE); } vmm_sol_glue_init(); + vmm_arena_init(); - /* - * Create control node. Other nodes will be created on demand. - */ + if ((reg = hma_register(vmmdev_hvm_name)) == NULL) { + goto fail; + } else if (vmm_mod_load() != 0) { + goto fail; + } + vmm_loaded = B_TRUE; + + /* Create control node. Other nodes will be created on demand. */ if (ddi_create_minor_node(dip, "ctl", S_IFCHR, VMM_CTL_MINOR, DDI_PSEUDO, 0) != 0) { - return (DDI_FAILURE); + goto fail; } - if ((vmm_sdev_hdl = sdev_plugin_register("vmm", &vmm_sdev_ops, - NULL)) == NULL) { + if ((sph = sdev_plugin_register("vmm", &vmm_sdev_ops, NULL)) == NULL) { ddi_remove_minor_node(dip, NULL); - dip = NULL; - return (DDI_FAILURE); + goto fail; } ddi_report_dev(dip); - - vmm_arena_init(); - - vmmdev_load_failure = B_FALSE; + vmm_hma_reg = reg; + vmm_sdev_hdl = sph; vmm_dip = dip; - + mutex_exit(&vmmdev_mtx); return (DDI_SUCCESS); + +fail: + if (vmm_loaded) { + VERIFY0(vmm_mod_unload()); + } + if (reg != NULL) { + hma_unregister(reg); + } + vmm_sol_glue_cleanup(); + vmm_arena_fini(); + mutex_exit(&vmmdev_mtx); + return (DDI_FAILURE); } static int vmm_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) { - switch (cmd) { - case DDI_DETACH: - break; - default: + if (cmd != DDI_DETACH) { return (DDI_FAILURE); } @@ -1937,19 +1904,12 @@ vmm_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) if (mutex_tryenter(&vmmdev_mtx) == 0) return (DDI_FAILURE); - if (vmmdev_inst_count != 0) { - mutex_exit(&vmmdev_mtx); - return (DDI_FAILURE); - } - mutex_enter(&vmm_mtx); - if (!list_is_empty(&vmmdev_list)) { mutex_exit(&vmm_mtx); mutex_exit(&vmmdev_mtx); return (DDI_FAILURE); } - mutex_exit(&vmm_mtx); if (vmm_sdev_hdl != NULL && sdev_plugin_unregister(vmm_sdev_hdl) != 0) { @@ -1961,6 +1921,10 @@ vmm_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) /* Remove the control node. */ ddi_remove_minor_node(dip, "ctl"); vmm_dip = NULL; + + VERIFY0(vmm_mod_unload()); + hma_unregister(vmm_hma_reg); + vmm_hma_reg = NULL; vmm_sol_glue_cleanup(); vmm_arena_fini(); diff --git a/usr/src/uts/i86pc/io/vmm/vmm_sol_glue.c b/usr/src/uts/i86pc/io/vmm/vmm_sol_glue.c index b523b69bd9..b73c937fad 100644 --- a/usr/src/uts/i86pc/io/vmm/vmm_sol_glue.c +++ b/usr/src/uts/i86pc/io/vmm/vmm_sol_glue.c @@ -320,58 +320,6 @@ critical_exit(void) kpreempt_enable(); } -struct unrhdr; -static kmutex_t unr_lock; -static uint_t unr_idx; - -/* - * Allocate a new unrheader set. - * - * Highest and lowest valid values given as parameters. - */ -struct unrhdr * -new_unrhdr(int low, int high, struct mtx *mtx) -{ - id_space_t *ids; - char name[] = "vmm_unr_00000000"; - - ASSERT(mtx == NULL); - - mutex_enter(&unr_lock); - /* Get a unique name for the id space */ - (void) snprintf(name, sizeof (name), "vmm_unr_%08X", unr_idx); - VERIFY(++unr_idx != UINT_MAX); - mutex_exit(&unr_lock); - - ids = id_space_create(name, low, high); - - return ((struct unrhdr *)ids); -} - -void -delete_unrhdr(struct unrhdr *uh) -{ - id_space_t *ids = (id_space_t *)uh; - - id_space_destroy(ids); -} - -int -alloc_unr(struct unrhdr *uh) -{ - id_space_t *ids = (id_space_t *)uh; - - return (id_alloc(ids)); -} - -void -free_unr(struct unrhdr *uh, u_int item) -{ - id_space_t *ids = (id_space_t *)uh; - - id_free(ids, item); -} - static void vmm_glue_callout_handler(void *arg) @@ -571,7 +519,6 @@ vmm_sol_glue_init(void) { vmm_alloc_init(); vmm_cpuid_init(); - unr_idx = 0; } void diff --git a/usr/src/uts/i86pc/sys/pc_hvm.h b/usr/src/uts/i86pc/ml/hma_asm.s index 38acf052e4..3b5995bb1a 100644 --- a/usr/src/uts/i86pc/sys/pc_hvm.h +++ b/usr/src/uts/i86pc/ml/hma_asm.s @@ -10,26 +10,33 @@ */ /* - * Copyright 2017 Joyent, Inc. + * Copyright 2018 Joyent, Inc. */ -#ifndef _PC_HVM_H -#define _PC_HVM_H +#include <sys/asm_linkage.h> -#ifdef __cplusplus -extern "C" { -#endif +#if defined(__lint) -#if defined(_KERNEL) - -extern boolean_t hvm_excl_hold(const char *); -extern void hvm_excl_rele(const char *); - -#endif /* defined(_KERNEL) */ - -#ifdef __cplusplus +int +hma_vmx_vmxon(uintptr_t arg) +{ + return (0); } -#endif -#endif /* _PC_HVM_H */ +#else /* __lint */ + ENTRY_NP(hma_vmx_vmxon) + push %rbp + movq %rsp, %rbp + pushq %rdi + + xorl %eax, %eax + vmxon -0x8(%rbp) + ja 1f /* CF=0, ZF=0 (success) */ + incl %eax +1: + + leave + ret + SET_SIZE(hma_vmx_vmxon) +#endif /* __lint */ diff --git a/usr/src/uts/i86pc/os/hma.c b/usr/src/uts/i86pc/os/hma.c new file mode 100644 index 0000000000..34ae85086d --- /dev/null +++ b/usr/src/uts/i86pc/os/hma.c @@ -0,0 +1,338 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2018 Joyent, Inc. + */ + +#include <sys/cpuvar.h> +#include <sys/types.h> +#include <sys/errno.h> +#include <sys/machsystm.h> +#include <sys/controlregs.h> +#include <sys/x86_archext.h> +#include <sys/id_space.h> +#include <sys/hma.h> +#include <sys/cmn_err.h> +#include <vm/hat.h> +#include <vm/as.h> + +struct hma_reg { + const char *hr_name; + list_node_t hr_node; +}; + +static kmutex_t hma_lock; +static list_t hma_registrations; + +static boolean_t hma_vmx_ready = B_FALSE; +static const char *hma_vmx_error = NULL; +static id_space_t *hma_vmx_vpid; + +typedef enum vmx_cpu_state { + VCS_UNINITIALIZED = 0, + VCS_READY, + VCS_ERROR +} vmx_cpu_state_t; + +/* + * The bulk of VMX-related HMA state is protected by cpu_lock, rather than a + * mutex specific to the module. It (cpu_lock) is already required for the + * state needed to perform setup on all CPUs, so it was a natural fit to + * protect this data too. + */ +static void *hma_vmx_vmxon_page[NCPU]; +static uintptr_t hma_vmx_vmxon_pa[NCPU]; +static vmx_cpu_state_t hma_vmx_status[NCPU]; +static uint32_t hma_vmx_revision; + + +static int hma_vmx_init(void); +static int hma_svm_init(void); + +void +hma_init(void) +{ + mutex_init(&hma_lock, NULL, MUTEX_DEFAULT, NULL); + list_create(&hma_registrations, sizeof (struct hma_reg), + offsetof(struct hma_reg, hr_node)); + + switch (cpuid_getvendor(CPU)) { + case X86_VENDOR_Intel: + (void) hma_vmx_init(); + break; + case X86_VENDOR_AMD: + (void) hma_svm_init(); + break; + default: + break; + } +} + +hma_reg_t * +hma_register(const char *name) +{ + struct hma_reg *reg; + boolean_t is_ready; + + VERIFY(name != NULL); + + reg = kmem_zalloc(sizeof (*reg), KM_SLEEP); + reg->hr_name = name; + + mutex_enter(&hma_lock); + switch (cpuid_getvendor(CPU)) { + case X86_VENDOR_Intel: + is_ready = hma_vmx_ready; + break; + case X86_VENDOR_AMD: + /* Punt on SVM support for now */ + is_ready = B_FALSE; + break; + default: + is_ready = B_FALSE; + break; + } + + if (!is_ready) { + kmem_free(reg, sizeof (*reg)); + reg = NULL; + } else { + list_insert_tail(&hma_registrations, reg); + } + mutex_exit(&hma_lock); + + return (reg); +} + +void +hma_unregister(hma_reg_t *reg) +{ + VERIFY(reg != NULL); + VERIFY(!list_is_empty(&hma_registrations)); + + mutex_enter(&hma_lock); + list_remove(&hma_registrations, reg); + mutex_exit(&hma_lock); + kmem_free(reg, sizeof (*reg)); +} + +/* + * VPID 0 is reserved for instances where VPID is disabled. Some hypervisors + * (read: bhyve) reserve lower-order VPIDs for use in fallback behavior if + * unique VPIDs could not be allocated for all the vCPUs belonging to a VM. + */ +#define HMA_VPID_RESERVED NCPU + +uint16_t +hma_vmx_vpid_alloc(void) +{ + id_t res; + + res = id_alloc_nosleep(hma_vmx_vpid); + if (res == -1) { + return (0); + } else { + ASSERT(res > HMA_VPID_RESERVED && res <= UINT16_MAX); + return (res); + } +} + +void +hma_vmx_vpid_free(uint16_t vpid) +{ + VERIFY(vpid > HMA_VPID_RESERVED); + id_free(hma_vmx_vpid, (id_t)vpid); +} + + +extern int hma_vmx_vmxon(uintptr_t); + +/* ARGSUSED */ +static int +hma_vmx_cpu_vmxon(xc_arg_t arg1, xc_arg_t arg2, xc_arg_t arg3) +{ + uint64_t fctrl; + processorid_t id = CPU->cpu_seqid; + void *vmxon_region = hma_vmx_vmxon_page[id]; + uintptr_t vmxon_pa = hma_vmx_vmxon_pa[id]; + + VERIFY(vmxon_region != NULL && vmxon_pa != 0); + + /* + * Ensure that the VMX support and lock bits are enabled in the + * feature-control MSR. + */ + fctrl = rdmsr(MSR_IA32_FEAT_CTRL); + if ((fctrl & IA32_FEAT_CTRL_LOCK) == 0 || + (fctrl & IA32_FEAT_CTRL_VMX_EN) == 0) { + fctrl = fctrl | IA32_FEAT_CTRL_VMX_EN | IA32_FEAT_CTRL_LOCK; + wrmsr(MSR_IA32_FEAT_CTRL, fctrl); + } + + setcr4(getcr4() | CR4_VMXE); + + if (hma_vmx_vmxon(vmxon_pa) == 0) { + hma_vmx_status[id] = VCS_READY; + } else { + hma_vmx_status[id] = VCS_ERROR; + + /* + * If VMX has already been marked active and available for the + * system, then failure to perform VMXON on a newly-onlined CPU + * represents a fatal problem. Continuing on would mean + * failure for any hypervisor thread which landed here. + */ + if (hma_vmx_ready) { + panic("VMXON failure after VMX marked ready"); + } + } + return (0); +} + +/* ARGSUSED2 */ +static int +hma_vmx_cpu_setup(cpu_setup_t what, int id, void *arg) +{ + ASSERT(MUTEX_HELD(&cpu_lock)); + ASSERT(id >= 0 && id < NCPU); + + switch (what) { + case CPU_CONFIG: + case CPU_ON: + case CPU_INIT: + break; + default: + /* + * Other events, such as CPU offlining, are of no interest. + * Letting the VMX state linger should not cause any harm. + * + * This logic assumes that any offlining activity is strictly + * administrative in nature and will not alter any existing + * configuration (such as %cr4 bits previously set). + */ + return (0); + } + + /* Perform initialization if it has not been previously attempted. */ + if (hma_vmx_status[id] != VCS_UNINITIALIZED) { + return ((hma_vmx_status[id] == VCS_READY) ? 0 : -1); + } + + /* Allocate the VMXON page for this CPU */ + if (hma_vmx_vmxon_page[id] == NULL) { + caddr_t va; + pfn_t pfn; + + va = kmem_alloc(PAGESIZE, KM_SLEEP); + VERIFY0((uintptr_t)va & PAGEOFFSET); + hma_vmx_vmxon_page[id] = va; + + /* Initialize the VMX revision field as expected */ + bcopy(&hma_vmx_revision, va, sizeof (hma_vmx_revision)); + + /* + * Cache the physical address of the VMXON page rather than + * looking it up later when the potential blocking of + * hat_getpfnum would be less acceptable. + */ + pfn = hat_getpfnum(kas.a_hat, va); + hma_vmx_vmxon_pa[id] = (pfn << PAGESHIFT); + } else { + VERIFY(hma_vmx_vmxon_pa[id] != 0); + } + + kpreempt_disable(); + if (CPU->cpu_seqid == id) { + /* Perform vmxon setup directly if this CPU is the target */ + (void) hma_vmx_cpu_vmxon(0, 0, 0); + kpreempt_enable(); + } else { + cpuset_t set; + + /* Use a cross-call if a remote CPU is the target */ + kpreempt_enable(); + cpuset_zero(&set); + cpuset_add(&set, id); + xc_sync(0, 0, 0, CPUSET2BV(set), hma_vmx_cpu_vmxon); + } + + return (hma_vmx_status[id] != VCS_READY); +} + +static int +hma_vmx_init(void) +{ + cpu_t *cp; + uint64_t msr; + int err = 0; + const char *msg = NULL; + + if (!is_x86_feature(x86_featureset, X86FSET_VMX)) { + msg = "CPU does not support VMX"; + goto bail; + } + + /* Has the BIOS set the feature-control lock bit without VMX enabled? */ + msr = rdmsr(MSR_IA32_FEAT_CTRL); + if ((msr & IA32_FEAT_CTRL_LOCK) != 0 && + (msr & IA32_FEAT_CTRL_VMX_EN) == 0) { + msg = "VMX support disabled by BIOS"; + goto bail; + } + + /* Does VMX support basic INS/OUTS functionality */ + msr = rdmsr(MSR_IA32_VMX_BASIC); + if ((msr & IA32_VMX_BASIC_INS_OUTS) == 0) { + msg = "VMX does not support INS/OUTS"; + goto bail; + } + /* Record the VMX revision for later VMXON usage */ + hma_vmx_revision = (uint32_t)msr; + + mutex_enter(&cpu_lock); + /* Perform VMX configuration for already-online CPUs. */ + cp = cpu_active; + do { + err = hma_vmx_cpu_setup(CPU_ON, cp->cpu_seqid, NULL); + if (err != 0) { + msg = "failure during VMXON setup"; + mutex_exit(&cpu_lock); + goto bail; + } + } while ((cp = cp->cpu_next_onln) != cpu_active); + + /* + * Register callback for later-onlined CPUs and perform other remaining + * resource allocation. + */ + register_cpu_setup_func(hma_vmx_cpu_setup, NULL); + mutex_exit(&cpu_lock); + + hma_vmx_vpid = id_space_create("hma_vmx_vpid", HMA_VPID_RESERVED + 1, + UINT16_MAX); + hma_vmx_ready = B_TRUE; + + return (0); + +bail: + hma_vmx_error = msg; + cmn_err(CE_NOTE, "hma_vmx_init: %s", msg); + return (-1); +} + + +static int +hma_svm_init(void) +{ + /* punt on AMD for now */ + return (ENOTSUP); +} diff --git a/usr/src/uts/i86pc/os/mp_startup.c b/usr/src/uts/i86pc/os/mp_startup.c index fef8f2759d..f292b52fbe 100644 --- a/usr/src/uts/i86pc/os/mp_startup.c +++ b/usr/src/uts/i86pc/os/mp_startup.c @@ -77,6 +77,8 @@ #include <sys/sysmacros.h> #if defined(__xpv) #include <sys/hypervisor.h> +#else +#include <sys/hma.h> #endif #include <sys/cpu_module.h> #include <sys/ontrap.h> @@ -1610,6 +1612,14 @@ done: workaround_errata_end(); cmi_post_mpstartup(); +#if !defined(__xpv) + /* + * Once other CPUs have completed startup procedures, perform + * initialization of hypervisor resources for HMA. + */ + hma_init(); +#endif + if (use_mp && ncpus != boot_max_ncpus) { cmn_err(CE_NOTE, "System detected %d cpus, but " diff --git a/usr/src/uts/i86pc/os/pc_hvm.c b/usr/src/uts/i86pc/os/pc_hvm.c deleted file mode 100644 index 60f8e8d3ca..0000000000 --- a/usr/src/uts/i86pc/os/pc_hvm.c +++ /dev/null @@ -1,65 +0,0 @@ -/* - * This file and its contents are supplied under the terms of the - * Common Development and Distribution License ("CDDL"), version 1.0. - * You may only use this file in accordance with the terms of version - * 1.0 of the CDDL. - * - * A full copy of the text of the CDDL should have accompanied this - * source. A copy of the CDDL is also available via the Internet at - * http://www.illumos.org/license/CDDL. - */ - -/* - * Copyright 2018 Joyent, Inc. - */ - -#include <sys/cmn_err.h> -#include <sys/ddi.h> -#include <sys/sunddi.h> -#include <sys/param.h> -#include <sys/types.h> -#include <sys/mutex.h> -#include <sys/debug.h> - -static kmutex_t hvm_excl_lock; -static const char *hvm_excl_holder = NULL; - -/* - * HVM Exclusion Interface - * - * To avoid VMX/SVM conflicts from arising when multiple hypervisor providers - * (eg. KVM, bhyve) are shipped with the system, this simple advisory locking - * system is presented for their use. Until a proper hypervisor API, like the - * one in OSX, is shipped in illumos, this will serve as opt-in regulation to - * dictate that only a single hypervisor be allowed to configure the system and - * run at any given time. - */ - -boolean_t -hvm_excl_hold(const char *consumer) -{ - boolean_t res; - - mutex_enter(&hvm_excl_lock); - if (hvm_excl_holder == NULL) { - hvm_excl_holder = consumer; - res = B_TRUE; - } else { - cmn_err(CE_WARN, "zone '%s' cannot take HVM exclusion lock as " - "'%s': held by '%s'", curproc->p_zone->zone_name, consumer, - hvm_excl_holder); - res = B_FALSE; - } - mutex_exit(&hvm_excl_lock); - - return (res); -} - -void -hvm_excl_rele(const char *consumer) -{ - mutex_enter(&hvm_excl_lock); - VERIFY(consumer == hvm_excl_holder); - hvm_excl_holder = NULL; - mutex_exit(&hvm_excl_lock); -} diff --git a/usr/src/uts/i86pc/sys/hma.h b/usr/src/uts/i86pc/sys/hma.h index 00009cf439..0c6161fdfc 100644 --- a/usr/src/uts/i86pc/sys/hma.h +++ b/usr/src/uts/i86pc/sys/hma.h @@ -30,6 +30,25 @@ extern "C" { #endif + +/* + * Register a hypervisor with HMA. On success, a pointer to the opaque + * registration token will be returned, indicating that proper host setup has + * occurred for further hypervisor actions. + */ +typedef struct hma_reg hma_reg_t; +extern hma_reg_t *hma_register(const char *); +extern void hma_unregister(hma_reg_t *); + +/* + * Allocate or free a VPID for use with VMX. + * + * This must not be performed by a hypervisor until it has successfully + * registered via hma_register(). + */ +extern uint16_t hma_vmx_vpid_alloc(void); +extern void hma_vmx_vpid_free(uint16_t); + /* * FPU related management. These functions provide a set of APIs to manage the * FPU state and switch between host and guest management of this state. @@ -96,6 +115,9 @@ extern void hma_fpu_stop_guest(hma_fpu_t *); extern void hma_fpu_get_fxsave_state(const hma_fpu_t *, struct fxsave_state *); extern int hma_fpu_set_fxsave_state(hma_fpu_t *, const struct fxsave_state *); +/* Perform HMA initialization steps during boot-up. */ +extern void hma_init(void); + #ifdef __cplusplus } #endif |
