diff options
| author | Patrick Mooney <pmooney@pfmooney.com> | 2022-06-10 23:05:32 +0000 |
|---|---|---|
| committer | Patrick Mooney <pmooney@oxide.computer> | 2022-06-27 23:20:35 +0000 |
| commit | 54cf5b63effe805271443d5dd7afd37ec184fbab (patch) | |
| tree | c2da22ed12b56879537c652b894932f257462d7b /usr | |
| parent | ea962d11118b10579c946c4ac15559148ddf3cf8 (diff) | |
| download | illumos-joyent-54cf5b63effe805271443d5dd7afd37ec184fbab.tar.gz | |
14635 bhyve should expose additional vcpu state
Reviewed by: Luqman Aden <luqman@oxide.computer>
Reviewed by: Jordan Paige Hendricks <jordan@oxidecomputer.com>
Approved by: Robert Mustacchi <rm@fingolfin.org>
Diffstat (limited to 'usr')
| -rw-r--r-- | usr/src/cmd/bhyvectl/bhyvectl.c | 447 | ||||
| -rw-r--r-- | usr/src/uts/intel/io/vmm/amd/svm.c | 64 | ||||
| -rw-r--r-- | usr/src/uts/intel/io/vmm/amd/vmcb.c | 61 | ||||
| -rw-r--r-- | usr/src/uts/intel/io/vmm/amd/vmcb.h | 1 | ||||
| -rw-r--r-- | usr/src/uts/intel/io/vmm/intel/vmcs.c | 28 | ||||
| -rw-r--r-- | usr/src/uts/intel/io/vmm/intel/vmcs.h | 1 | ||||
| -rw-r--r-- | usr/src/uts/intel/io/vmm/intel/vmx.c | 242 | ||||
| -rw-r--r-- | usr/src/uts/intel/io/vmm/sys/vmm_kernel.h | 23 | ||||
| -rw-r--r-- | usr/src/uts/intel/io/vmm/vmm.c | 482 | ||||
| -rw-r--r-- | usr/src/uts/intel/io/vmm/vmm_sol_dev.c | 65 | ||||
| -rw-r--r-- | usr/src/uts/intel/io/vmm/x86.c | 62 | ||||
| -rw-r--r-- | usr/src/uts/intel/io/vmm/x86.h | 85 | ||||
| -rw-r--r-- | usr/src/uts/intel/sys/vmm.h | 1 | ||||
| -rw-r--r-- | usr/src/uts/intel/sys/vmm_data.h | 42 | ||||
| -rw-r--r-- | usr/src/uts/intel/sys/vmm_dev.h | 3 |
15 files changed, 1029 insertions, 578 deletions
diff --git a/usr/src/cmd/bhyvectl/bhyvectl.c b/usr/src/cmd/bhyvectl/bhyvectl.c index a6c86fd5fc..3b3caf0d20 100644 --- a/usr/src/cmd/bhyvectl/bhyvectl.c +++ b/usr/src/cmd/bhyvectl/bhyvectl.c @@ -51,9 +51,7 @@ __FBSDID("$FreeBSD$"); #include <sys/errno.h> #include <sys/mman.h> #include <sys/cpuset.h> -#ifndef __FreeBSD__ #include <sys/fp.h> -#endif /* __FreeBSD__ */ #include <stdio.h> #include <stdlib.h> @@ -72,6 +70,7 @@ __FBSDID("$FreeBSD$"); #include <machine/specialreg.h> #include <machine/vmm.h> #include <machine/vmm_dev.h> +#include <sys/vmm_data.h> #include <vmmapi.h> #include "amd/vmcb.h" @@ -95,10 +94,8 @@ usage(bool cpu_intel) " [--cpu=<vcpu_number>]\n" " [--create]\n" " [--destroy]\n" -#ifndef __FreeBSD__ " [--pmtmr-port=ioport]\n" " [--wrlock-cycle]\n" -#endif " [--get-all]\n" " [--get-stats]\n" " [--set-desc-ds]\n" @@ -186,9 +183,6 @@ usage(bool cpu_intel) " [--get-ldtr]\n" " [--set-x2apic-state=<state>]\n" " [--get-x2apic-state]\n" -#ifdef __FreeBSD__ - " [--unassign-pptdev=<bus/slot/func>]\n" -#endif " [--set-mem=<memory in units of MB>]\n" " [--get-lowmem]\n" " [--get-highmem]\n" @@ -307,16 +301,11 @@ static int set_cs, set_ds, set_es, set_fs, set_gs, set_ss, set_tr, set_ldtr; static int get_cs, get_ds, get_es, get_fs, get_gs, get_ss, get_tr, get_ldtr; static int set_x2apic_state, get_x2apic_state; enum x2apic_state x2apic_state; -#ifdef __FreeBSD__ -static int unassign_pptdev, bus, slot, func; -#endif static int run; static int get_cpu_topology; -#ifndef __FreeBSD__ static int pmtmr_port; static int wrlock_cycle; static int get_fpu; -#endif /* * VMCB specific. @@ -339,12 +328,13 @@ static int get_cr4_mask, get_cr4_shadow; static int get_cr3_targets; static int get_apic_access_addr, get_virtual_apic_addr, get_tpr_threshold; static int get_msr_bitmap, get_msr_bitmap_address; +static int get_guest_msrs; static int get_vpid_asid; static int get_inst_err, get_exit_ctls, get_entry_ctls; static int get_host_cr0, get_host_cr3, get_host_cr4; static int get_host_rip, get_host_rsp; -static int get_guest_pat, get_host_pat; -static int get_guest_sysenter, get_vmcs_link; +static int get_host_pat; +static int get_vmcs_link; static int get_exit_reason, get_vmcs_exit_qualification; static int get_vmcs_exit_interruption_info, get_vmcs_exit_interruption_error; static int get_vmcs_exit_inst_length; @@ -406,172 +396,7 @@ dump_vm_run_exitcode(struct vm_exit *vmexit, int vcpu) #define MSR_AMD7TH_START 0xC0010000 #define MSR_AMD7TH_END 0xC0011FFF -#ifdef __FreeBSD__ -static const char * -msr_name(uint32_t msr) -{ - static char buf[32]; - - switch(msr) { - case MSR_TSC: - return ("MSR_TSC"); - case MSR_EFER: - return ("MSR_EFER"); - case MSR_STAR: - return ("MSR_STAR"); - case MSR_LSTAR: - return ("MSR_LSTAR"); - case MSR_CSTAR: - return ("MSR_CSTAR"); - case MSR_SF_MASK: - return ("MSR_SF_MASK"); - case MSR_FSBASE: - return ("MSR_FSBASE"); - case MSR_GSBASE: - return ("MSR_GSBASE"); - case MSR_KGSBASE: - return ("MSR_KGSBASE"); - case MSR_SYSENTER_CS_MSR: - return ("MSR_SYSENTER_CS_MSR"); - case MSR_SYSENTER_ESP_MSR: - return ("MSR_SYSENTER_ESP_MSR"); - case MSR_SYSENTER_EIP_MSR: - return ("MSR_SYSENTER_EIP_MSR"); - case MSR_PAT: - return ("MSR_PAT"); - } - snprintf(buf, sizeof(buf), "MSR %#08x", msr); - - return (buf); -} - -static inline void -print_msr_pm(uint64_t msr, int vcpu, int readable, int writeable) -{ - - if (readable || writeable) { - printf("%-20s[%d]\t\t%c%c\n", msr_name(msr), vcpu, - readable ? 'R' : '-', writeable ? 'W' : '-'); - } -} - -/* - * Reference APM vol2, section 15.11 MSR Intercepts. - */ -static void -dump_amd_msr_pm(const char *bitmap, int vcpu) -{ - int byte, bit, readable, writeable; - uint32_t msr; - - for (msr = 0; msr < 0x2000; msr++) { - byte = msr / 4; - bit = (msr % 4) * 2; - - /* Look at MSRs in the range 0x00000000 to 0x00001FFF */ - readable = (bitmap[byte] & (1 << bit)) ? 0 : 1; - writeable = (bitmap[byte] & (2 << bit)) ? 0 : 1; - print_msr_pm(msr, vcpu, readable, writeable); - - /* Look at MSRs in the range 0xC0000000 to 0xC0001FFF */ - byte += 2048; - readable = (bitmap[byte] & (1 << bit)) ? 0 : 1; - writeable = (bitmap[byte] & (2 << bit)) ? 0 : 1; - print_msr_pm(msr + MSR_AMD6TH_START, vcpu, readable, - writeable); - - /* MSR 0xC0010000 to 0xC0011FF is only for AMD */ - byte += 4096; - readable = (bitmap[byte] & (1 << bit)) ? 0 : 1; - writeable = (bitmap[byte] & (2 << bit)) ? 0 : 1; - print_msr_pm(msr + MSR_AMD7TH_START, vcpu, readable, - writeable); - } -} - -/* - * Reference Intel SDM Vol3 Section 24.6.9 MSR-Bitmap Address - */ -static void -dump_intel_msr_pm(const char *bitmap, int vcpu) -{ - int byte, bit, readable, writeable; - uint32_t msr; - - for (msr = 0; msr < 0x2000; msr++) { - byte = msr / 8; - bit = msr & 0x7; - - /* Look at MSRs in the range 0x00000000 to 0x00001FFF */ - readable = (bitmap[byte] & (1 << bit)) ? 0 : 1; - writeable = (bitmap[2048 + byte] & (1 << bit)) ? 0 : 1; - print_msr_pm(msr, vcpu, readable, writeable); - - /* Look at MSRs in the range 0xC0000000 to 0xC0001FFF */ - byte += 1024; - readable = (bitmap[byte] & (1 << bit)) ? 0 : 1; - writeable = (bitmap[2048 + byte] & (1 << bit)) ? 0 : 1; - print_msr_pm(msr + MSR_AMD6TH_START, vcpu, readable, - writeable); - } -} - -static int -dump_msr_bitmap(int vcpu, uint64_t addr, bool cpu_intel) -{ - int error, fd, map_size; - const char *bitmap; - - error = -1; - bitmap = MAP_FAILED; - - fd = open("/dev/mem", O_RDONLY, 0); - if (fd < 0) { - perror("Couldn't open /dev/mem"); - goto done; - } - - if (cpu_intel) - map_size = PAGE_SIZE; - else - map_size = 2 * PAGE_SIZE; - - bitmap = mmap(NULL, map_size, PROT_READ, MAP_SHARED, fd, addr); - if (bitmap == MAP_FAILED) { - perror("mmap failed"); - goto done; - } - - if (cpu_intel) - dump_intel_msr_pm(bitmap, vcpu); - else - dump_amd_msr_pm(bitmap, vcpu); - - error = 0; -done: - if (bitmap != MAP_FAILED) - munmap((void *)bitmap, map_size); - if (fd >= 0) - close(fd); - - return (error); -} - -static int -vm_get_vmcs_field(struct vmctx *ctx, int vcpu, int field, uint64_t *ret_val) -{ - - return (vm_get_register(ctx, vcpu, VMCS_IDENT(field), ret_val)); -} - -static int -vm_set_vmcs_field(struct vmctx *ctx, int vcpu, int field, uint64_t val) -{ - - return (vm_set_register(ctx, vcpu, VMCS_IDENT(field), val)); -} -#else /* __FreeBSD__ */ -/* VMCS does not allow arbitrary reads/writes */ +/* Until a safe method is created, arbitrary VMCS reads/writes are forbidden */ static int vm_get_vmcs_field(struct vmctx *ctx, int vcpu, int field, uint64_t *ret_val) { @@ -584,29 +409,11 @@ vm_set_vmcs_field(struct vmctx *ctx, int vcpu, int field, uint64_t val) { return (EINVAL); } -#endif /* __FreeBSD__ */ - -#ifdef __FreeBSD__ -static int -vm_get_vmcb_field(struct vmctx *ctx, int vcpu, int off, int bytes, - uint64_t *ret_val) -{ - - return (vm_get_register(ctx, vcpu, VMCB_ACCESS(off, bytes), ret_val)); -} -static int -vm_set_vmcb_field(struct vmctx *ctx, int vcpu, int off, int bytes, - uint64_t val) -{ - - return (vm_set_register(ctx, vcpu, VMCB_ACCESS(off, bytes), val)); -} -#else /* __FreeBSD__ */ -/* Arbitrary VMCB read/write is not allowed */ +/* Until a safe method is created, arbitrary VMCB reads/writes are forbidden */ static int vm_get_vmcb_field(struct vmctx *ctx, int vcpu, int off, int bytes, - uint64_t *ret_val) + uint64_t *ret_val) { *ret_val = 0; return (0); @@ -614,11 +421,10 @@ vm_get_vmcb_field(struct vmctx *ctx, int vcpu, int off, int bytes, static int vm_set_vmcb_field(struct vmctx *ctx, int vcpu, int off, int bytes, - uint64_t val) + uint64_t val) { return (EINVAL); } -#endif /* __FreeBSD__ */ enum { VMNAME = 1000, /* avoid collision with return values from getopt */ @@ -661,9 +467,7 @@ enum { SET_RTC_TIME, SET_RTC_NVRAM, RTC_NVRAM_OFFSET, -#ifndef __FreeBSD__ PMTMR_PORT, -#endif }; static void @@ -686,38 +490,6 @@ print_cpus(const char *banner, const cpuset_t *cpus) printf("\n"); } -#ifdef __FreeBSD__ -static void -print_intinfo(const char *banner, uint64_t info) -{ - int type; - - printf("%s:\t", banner); - if (info & VM_INTINFO_VALID) { - type = info & VM_INTINFO_TYPE; - switch (type) { - case VM_INTINFO_HWINTR: - printf("extint"); - break; - case VM_INTINFO_NMI: - printf("nmi"); - break; - case VM_INTINFO_SWINTR: - printf("swint"); - break; - default: - printf("exception"); - break; - } - printf(" vector %d", (int)VM_INTINFO_VECTOR(info)); - if (info & VM_INTINFO_DEL_ERRCODE) - printf(" errcode %#x", (u_int)(info >> 32)); - } else { - printf("n/a"); - } - printf("\n"); -} -#else /* __FreeBSD__ */ static void print_intinfo(const char *banner, uint64_t info) { @@ -746,7 +518,6 @@ print_intinfo(const char *banner, uint64_t info) } printf("\n"); } -#endif /* __FreeBSD__ */ static bool cpu_vendor_intel(void) @@ -1141,7 +912,7 @@ get_misc_vmcs(struct vmctx *ctx, int vcpu) if (error == 0) printf("cr4_shadow[%d]\t\t0x%016lx\n", vcpu, cr4shadow); } - + if (!error && (get_cr3_targets || get_all)) { uint64_t target_count, target_addr; error = vm_get_vmcs_field(ctx, vcpu, VMCS_CR3_TARGET_COUNT, @@ -1214,7 +985,7 @@ get_misc_vmcs(struct vmctx *ctx, int vcpu) printf("gpa[%d]\t\t0x%016lx\n", vcpu, u64); } - if (!error && (get_vmcs_entry_interruption_info || + if (!error && (get_vmcs_entry_interruption_info || get_all)) { error = vm_get_vmcs_field(ctx, vcpu, VMCS_ENTRY_INTR_INFO,&u64); if (error == 0) { @@ -1336,7 +1107,7 @@ get_misc_vmcs(struct vmctx *ctx, int vcpu) printf("vmcs_exit_qualification[%d]\t0x%016lx\n", vcpu, u64); } - + return (error); } @@ -1549,9 +1320,7 @@ setup_options(bool cpu_intel) NO_ARG, &get_msr_bitmap, 1 }, { "get-msr-bitmap-address", NO_ARG, &get_msr_bitmap_address, 1 }, - { "get-guest-pat", NO_ARG, &get_guest_pat, 1 }, - { "get-guest-sysenter", - NO_ARG, &get_guest_sysenter, 1 }, + { "get-guest-msrs", NO_ARG, &get_guest_msrs, 1 }, { "get-exit-reason", NO_ARG, &get_exit_reason, 1 }, { "get-x2apic-state", NO_ARG, &get_x2apic_state, 1 }, @@ -1566,11 +1335,9 @@ setup_options(bool cpu_intel) { "get-suspended-cpus", NO_ARG, &get_suspended_cpus, 1 }, { "get-intinfo", NO_ARG, &get_intinfo, 1 }, { "get-cpu-topology", NO_ARG, &get_cpu_topology, 1 }, -#ifndef __FreeBSD__ { "pmtmr-port", REQ_ARG, 0, PMTMR_PORT }, { "wrlock-cycle", NO_ARG, &wrlock_cycle, 1 }, { "get-fpu", NO_ARG, &get_fpu, 1 }, -#endif }; const struct option intel_opts[] = { @@ -1632,7 +1399,7 @@ setup_options(bool cpu_intel) const struct option amd_opts[] = { { "get-vmcb-intercepts", NO_ARG, &get_vmcb_intercept, 1 }, - { "get-vmcb-asid", + { "get-vmcb-asid", NO_ARG, &get_vpid_asid, 1 }, { "get-vmcb-exit-details", NO_ARG, &get_vmcb_exit_details, 1 }, @@ -1788,7 +1555,6 @@ show_memseg(struct vmctx *ctx) } } -#ifndef __FreeBSD__ static int show_fpu(struct vmctx *ctx, int vcpu) { @@ -1873,7 +1639,87 @@ show_fpu(struct vmctx *ctx, int vcpu) free(buf); return (0); } -#endif /*__FreeBSD__ */ + +static const char * +msr_name(uint32_t msr) +{ +#define MSR_IDENT_MAP(x) case x: return (#x); + switch (msr) { + MSR_IDENT_MAP(MSR_PAT) + MSR_IDENT_MAP(MSR_SYSENTER_CS_MSR) + MSR_IDENT_MAP(MSR_SYSENTER_ESP_MSR) + MSR_IDENT_MAP(MSR_SYSENTER_EIP_MSR) + MSR_IDENT_MAP(MSR_STAR) + MSR_IDENT_MAP(MSR_LSTAR) + MSR_IDENT_MAP(MSR_CSTAR) + MSR_IDENT_MAP(MSR_SF_MASK) + MSR_IDENT_MAP(MSR_FSBASE) + MSR_IDENT_MAP(MSR_GSBASE) + MSR_IDENT_MAP(MSR_KGSBASE) + MSR_IDENT_MAP(MSR_EFER) + MSR_IDENT_MAP(MSR_MTRRcap) + MSR_IDENT_MAP(MSR_MTRRdefType) + case MSR_TSC: + return ("MSR_TSC (offset from system boot)"); + default: + return (NULL); + } +} + +static int +show_msrs(struct vmctx *ctx, int vcpu) +{ + struct vdi_field_entry_v1 *msrs; + struct vm_data_xfer xfer = { + .vdx_vcpuid = vcpu, + .vdx_class = VDC_MSR, + .vdx_version = 1, + .vdx_len = 0, + .vdx_data = &msrs, + }; + int fd = vm_get_device_fd(ctx); + int res; + + /* Figure out how many entries we need to alloc for */ + res = ioctl(fd, VM_DATA_READ, &xfer); + if (res == 0) { + return (EINVAL); + } else if (errno != ENOSPC) { + return (errno); + } + const uint32_t len = xfer.vdx_result_len; + msrs = malloc(len); + if (msrs == NULL) { + return (ENOMEM); + } + bzero(msrs, len); + xfer.vdx_data = msrs; + xfer.vdx_len = len; + + /* Query the actual data, now that we should have an adequate buffer */ + res = ioctl(fd, VM_DATA_READ, &xfer); + if (res != 0) { + free(msrs); + return (errno); + } + + const uint_t count = + xfer.vdx_result_len / sizeof (struct vdi_field_entry_v1); + for (uint_t i = 0; i < count; i++) { + const uint32_t ident = msrs[i].vfe_ident; + const uint64_t value = msrs[i].vfe_value; + + const char *name = msr_name(ident); + + if (name != NULL) { + printf("msr[%s]\t = %x\n", name, value); + } else { + printf("msr[%08x]\t = %x\n", ident, value); + } + } + free(msrs); + return (0); +} int main(int argc, char *argv[]) @@ -1883,7 +1729,7 @@ main(int argc, char *argv[]) vm_paddr_t gpa_pmap; struct vm_exit vmexit; uint64_t rax, cr0, cr2, cr3, cr4, dr0, dr1, dr2, dr3, dr6, dr7; - uint64_t rsp, rip, rflags, efer, pat; + uint64_t rsp, rip, rflags, efer; uint64_t eptp, bm, addr, u64, pteval[4], *pte, info[2]; struct vmctx *ctx; cpuset_t cpus; @@ -2049,21 +1895,12 @@ main(int argc, char *argv[]) case CAPNAME: capname = optarg; break; -#ifdef __FreeBSD__ - case UNASSIGN_PPTDEV: - unassign_pptdev = 1; - if (sscanf(optarg, "%d/%d/%d", &bus, &slot, &func) != 3) - usage(cpu_intel); - break; -#endif case ASSERT_LAPIC_LVT: assert_lapic_lvt = atoi(optarg); break; -#ifndef __FreeBSD__ case PMTMR_PORT: pmtmr_port = strtoul(optarg, NULL, 16); break; -#endif default: usage(cpu_intel); } @@ -2076,13 +1913,8 @@ main(int argc, char *argv[]) error = 0; -#ifndef __FreeBSD__ if (!error && create) error = vm_create(vmname, 0); -# else - if (!error && create) - error = vm_create(vmname); -#endif /* __FreeBSD__ */ if (!error) { ctx = vm_open(vmname); @@ -2094,16 +1926,15 @@ main(int argc, char *argv[]) } } -#ifndef __FreeBSD__ if (!error && pmtmr_port) { error = vm_pmtmr_set_location(ctx, pmtmr_port); exit(error); } + if (!error && wrlock_cycle) { error = vm_wrlock_cycle(ctx); exit(error); } -#endif /* __FreeBSD__ */ if (!error && memsize) error = vm_setup_memory(ctx, memsize, VM_MMAP_ALL); @@ -2232,11 +2063,6 @@ main(int argc, char *argv[]) if (!error && set_x2apic_state) error = vm_set_x2apic_state(ctx, vcpu, x2apic_state); -#ifdef __FreeBSD__ - if (!error && unassign_pptdev) - error = vm_unassign_pptdev(ctx, bus, slot, func); -#endif /* __FreeBSD__ */ - if (!error && set_exception_bitmap) { if (cpu_intel) error = vm_set_vmcs_field(ctx, vcpu, @@ -2273,11 +2099,9 @@ main(int argc, char *argv[]) if (!error) error = get_all_segments(ctx, vcpu); -#ifndef __FreeBSD__ if (!error && (get_fpu || get_all)) { error = show_fpu(ctx, vcpu); } -#endif /* __FreeBSD__ */ if (!error) { if (cpu_intel) @@ -2285,7 +2109,7 @@ main(int argc, char *argv[]) else error = get_misc_vmcb(ctx, vcpu); } - + if (!error && (get_x2apic_state || get_all)) { error = vm_get_x2apic_state(ctx, vcpu, &x2apic_state); if (error == 0) @@ -2340,7 +2164,7 @@ main(int argc, char *argv[]) &tscoff); else error = vm_get_vmcb_field(ctx, vcpu, - VMCB_OFF_TSC_OFFSET, + VMCB_OFF_TSC_OFFSET, 8, &tscoff); if (error == 0) printf("tsc_offset[%d]\t0x%016lx\n", vcpu, tscoff); @@ -2348,7 +2172,7 @@ main(int argc, char *argv[]) if (!error && (get_msr_bitmap_address || get_all)) { if (cpu_intel) - error = vm_get_vmcs_field(ctx, vcpu, VMCS_MSR_BITMAP, + error = vm_get_vmcs_field(ctx, vcpu, VMCS_MSR_BITMAP, &addr); else error = vm_get_vmcb_field(ctx, vcpu, @@ -2357,90 +2181,27 @@ main(int argc, char *argv[]) printf("msr_bitmap[%d]\t\t%#lx\n", vcpu, addr); } - if (!error && (get_msr_bitmap || get_all)) { - if (cpu_intel) { - error = vm_get_vmcs_field(ctx, vcpu, - VMCS_MSR_BITMAP, &addr); - } else { - error = vm_get_vmcb_field(ctx, vcpu, - VMCB_OFF_MSR_PERM, 8, - &addr); - } - -#ifdef __FreeBSD__ - if (error == 0) - error = dump_msr_bitmap(vcpu, addr, cpu_intel); -#else - /* - * Skip dumping the MSR bitmap since raw access to the VMCS is - * currently not possible. - */ -#endif /* __FreeBSD__ */ - } - if (!error && (get_vpid_asid || get_all)) { uint64_t vpid; if (cpu_intel) error = vm_get_vmcs_field(ctx, vcpu, VMCS_VPID, &vpid); else - error = vm_get_vmcb_field(ctx, vcpu, VMCB_OFF_ASID, + error = vm_get_vmcb_field(ctx, vcpu, VMCB_OFF_ASID, 4, &vpid); if (error == 0) - printf("%s[%d]\t\t0x%04lx\n", + printf("%s[%d]\t\t0x%04lx\n", cpu_intel ? "vpid" : "asid", vcpu, vpid); } - if (!error && (get_guest_pat || get_all)) { - if (cpu_intel) - error = vm_get_vmcs_field(ctx, vcpu, - VMCS_GUEST_IA32_PAT, &pat); - else - error = vm_get_vmcb_field(ctx, vcpu, - VMCB_OFF_GUEST_PAT, 8, &pat); - if (error == 0) - printf("guest_pat[%d]\t\t0x%016lx\n", vcpu, pat); - } - - if (!error && (get_guest_sysenter || get_all)) { - if (cpu_intel) - error = vm_get_vmcs_field(ctx, vcpu, - VMCS_GUEST_IA32_SYSENTER_CS, - &cs); - else - error = vm_get_vmcb_field(ctx, vcpu, - VMCB_OFF_SYSENTER_CS, 8, - &cs); - - if (error == 0) - printf("guest_sysenter_cs[%d]\t%#lx\n", vcpu, cs); - if (cpu_intel) - error = vm_get_vmcs_field(ctx, vcpu, - VMCS_GUEST_IA32_SYSENTER_ESP, - &rsp); - else - error = vm_get_vmcb_field(ctx, vcpu, - VMCB_OFF_SYSENTER_ESP, 8, - &rsp); - - if (error == 0) - printf("guest_sysenter_sp[%d]\t%#lx\n", vcpu, rsp); - if (cpu_intel) - error = vm_get_vmcs_field(ctx, vcpu, - VMCS_GUEST_IA32_SYSENTER_EIP, - &rip); - else - error = vm_get_vmcb_field(ctx, vcpu, - VMCB_OFF_SYSENTER_EIP, 8, - &rip); - if (error == 0) - printf("guest_sysenter_ip[%d]\t%#lx\n", vcpu, rip); + if (!error && (get_guest_msrs || get_all)) { + error = show_msrs(ctx, vcpu); } if (!error && (get_exit_reason || get_all)) { if (cpu_intel) error = vm_get_vmcs_field(ctx, vcpu, VMCS_EXIT_REASON, &u64); - else + else error = vm_get_vmcb_field(ctx, vcpu, VMCB_OFF_EXIT_REASON, 8, &u64); diff --git a/usr/src/uts/intel/io/vmm/amd/svm.c b/usr/src/uts/intel/io/vmm/amd/svm.c index b699d57991..de4a492ae9 100644 --- a/usr/src/uts/intel/io/vmm/amd/svm.c +++ b/usr/src/uts/intel/io/vmm/amd/svm.c @@ -72,7 +72,6 @@ __FBSDID("$FreeBSD$"); #include "vlapic.h" #include "vlapic_priv.h" -#include "x86.h" #include "vmcb.h" #include "svm.h" #include "svm_softc.h" @@ -2257,6 +2256,17 @@ svm_setdesc(void *arg, int vcpu, int reg, const struct seg_desc *desc) if (SEG_DESC_UNUSABLE(desc->access)) { seg->attrib &= ~0x80; } + /* + * Keep CPL synced with the DPL specified for %ss. + * + * KVM notes that a SYSRET to non-cpl-3 is possible on AMD + * (unlike Intel), but accepts such a possible deviation for + * what is otherwise unreasonable behavior for a guest OS, since + * they do the same synchronization. + */ + if (reg == VM_REG_GUEST_SS) { + vmcb->state.cpl = SEG_DESC_DPL(desc->access); + } break; case VM_REG_GUEST_GDTR: @@ -2339,6 +2349,55 @@ svm_getdesc(void *arg, int vcpu, int reg, struct seg_desc *desc) } static int +svm_get_msr(void *arg, int vcpu, uint32_t msr, uint64_t *valp) +{ + struct svm_softc *sc = arg; + struct vmcb *vmcb = svm_get_vmcb(sc, vcpu); + const uint64_t *msrp = vmcb_msr_ptr(vmcb, msr, NULL); + + if (msrp != NULL) { + *valp = *msrp; + return (0); + } + + return (EINVAL); +} + +static int +svm_set_msr(void *arg, int vcpu, uint32_t msr, uint64_t val) +{ + struct svm_softc *sc = arg; + struct vmcb *vmcb = svm_get_vmcb(sc, vcpu); + + uint32_t dirty = 0; + uint64_t *msrp = vmcb_msr_ptr(vmcb, msr, &dirty); + if (msrp == NULL) { + return (EINVAL); + } + switch (msr) { + case MSR_EFER: + /* + * For now, just clone the logic from + * svm_setreg(): + * + * EFER_SVM must always be set when the guest is + * executing + */ + *msrp = val | EFER_SVM; + break; + /* TODO: other necessary MSR masking */ + default: + *msrp = val; + break; + } + if (dirty != 0) { + svm_set_dirty(sc, vcpu, dirty); + } + return (0); + +} + +static int svm_setcap(void *arg, int vcpu, int type, int val) { struct svm_softc *sc; @@ -2450,4 +2509,7 @@ struct vmm_ops vmm_ops_amd = { .vmsavectx = svm_savectx, .vmrestorectx = svm_restorectx, + + .vmgetmsr = svm_get_msr, + .vmsetmsr = svm_set_msr, }; diff --git a/usr/src/uts/intel/io/vmm/amd/vmcb.c b/usr/src/uts/intel/io/vmm/amd/vmcb.c index 5be5240129..ec2c9674c0 100644 --- a/usr/src/uts/intel/io/vmm/amd/vmcb.c +++ b/usr/src/uts/intel/io/vmm/amd/vmcb.c @@ -45,6 +45,7 @@ __FBSDID("$FreeBSD$"); #include <sys/param.h> #include <sys/systm.h> +#include <machine/specialreg.h> #include <machine/vmm.h> #include "vmcb.h" @@ -148,3 +149,63 @@ vmcb_regptr(struct vmcb *vmcb, int ident, uint32_t *dirtyp) } return (res); } + +uint64_t * +vmcb_msr_ptr(struct vmcb *vmcb, uint32_t msr, uint32_t *dirtyp) +{ + uint64_t *res = NULL; + uint32_t dirty = 0; + struct vmcb_state *state = &vmcb->state; + + switch (msr) { + case MSR_EFER: + res = &state->efer; + dirty = VMCB_CACHE_CR; + break; + + case MSR_GSBASE: + res = &state->gs.base; + dirty = VMCB_CACHE_SEG; + break; + case MSR_FSBASE: + res = &state->fs.base; + dirty = VMCB_CACHE_SEG; + break; + case MSR_KGSBASE: + res = &state->kernelgsbase; + break; + + case MSR_STAR: + res = &state->star; + break; + case MSR_LSTAR: + res = &state->lstar; + break; + case MSR_CSTAR: + res = &state->cstar; + break; + case MSR_SF_MASK: + res = &state->sfmask; + break; + + case MSR_SYSENTER_CS_MSR: + res = &state->sysenter_cs; + break; + case MSR_SYSENTER_ESP_MSR: + res = &state->sysenter_esp; + break; + case MSR_SYSENTER_EIP_MSR: + res = &state->sysenter_eip; + break; + + case MSR_PAT: + res = &state->g_pat; + dirty = VMCB_CACHE_NP; + break; + } + + if (res != NULL && dirtyp != NULL) { + *dirtyp = dirty; + } + return (res); +} diff --git a/usr/src/uts/intel/io/vmm/amd/vmcb.h b/usr/src/uts/intel/io/vmm/amd/vmcb.h index da0f08445c..7a57979d56 100644 --- a/usr/src/uts/intel/io/vmm/amd/vmcb.h +++ b/usr/src/uts/intel/io/vmm/amd/vmcb.h @@ -397,6 +397,7 @@ CTASSERT(offsetof(struct vmcb, state) == 0x400); struct vmcb_segment *vmcb_segptr(struct vmcb *vmcb, int type); uint64_t *vmcb_regptr(struct vmcb *vmcb, int ident, uint32_t *dirtyp); +uint64_t *vmcb_msr_ptr(struct vmcb *vmcb, uint32_t ident, uint32_t *dirtyp); #endif /* _KERNEL */ #endif /* _VMCB_H_ */ diff --git a/usr/src/uts/intel/io/vmm/intel/vmcs.c b/usr/src/uts/intel/io/vmm/intel/vmcs.c index 7fabba79f7..b5bc8130d9 100644 --- a/usr/src/uts/intel/io/vmm/intel/vmcs.c +++ b/usr/src/uts/intel/io/vmm/intel/vmcs.c @@ -165,6 +165,34 @@ vmcs_seg_desc_encoding(int seg, uint32_t *base, uint32_t *lim, uint32_t *acc) } } +uint32_t +vmcs_msr_encoding(uint32_t msr) +{ + switch (msr) { + case MSR_PAT: + return (VMCS_GUEST_IA32_PAT); + case MSR_EFER: + return (VMCS_GUEST_IA32_EFER); + case MSR_SYSENTER_CS_MSR: + return (VMCS_GUEST_IA32_SYSENTER_CS); + case MSR_SYSENTER_ESP_MSR: + return (VMCS_GUEST_IA32_SYSENTER_ESP); + case MSR_SYSENTER_EIP_MSR: + return (VMCS_GUEST_IA32_SYSENTER_EIP); + /* + * While fsbase and gsbase are expected to be accessed (by the VMM) via + * the segment descriptor interfaces, we still make it available as MSR + * contents as well. + */ + case MSR_FSBASE: + return (VMCS_GUEST_FS_BASE); + case MSR_GSBASE: + return (VMCS_GUEST_GS_BASE); + default: + return (VMCS_INVALID_ENCODING); + } +} + void vmcs_clear(uintptr_t vmcs_pa) { diff --git a/usr/src/uts/intel/io/vmm/intel/vmcs.h b/usr/src/uts/intel/io/vmm/intel/vmcs.h index 24dc2dd574..9e4a9e3282 100644 --- a/usr/src/uts/intel/io/vmm/intel/vmcs.h +++ b/usr/src/uts/intel/io/vmm/intel/vmcs.h @@ -48,6 +48,7 @@ CTASSERT(sizeof (struct vmcs) == PAGE_SIZE); uint32_t vmcs_field_encoding(int ident); void vmcs_seg_desc_encoding(int seg, uint32_t *base, uint32_t *lim, uint32_t *acc); +uint32_t vmcs_msr_encoding(uint32_t msr); void vmcs_initialize(struct vmcs *vmcs, uintptr_t vmcs_pa); diff --git a/usr/src/uts/intel/io/vmm/intel/vmx.c b/usr/src/uts/intel/io/vmm/intel/vmx.c index 360cec1056..4ef51259ab 100644 --- a/usr/src/uts/intel/io/vmm/intel/vmx.c +++ b/usr/src/uts/intel/io/vmm/intel/vmx.c @@ -85,7 +85,6 @@ __FBSDID("$FreeBSD$"); #include "vmcs.h" #include "vmx.h" #include "vmx_msr.h" -#include "x86.h" #include "vmx_controls.h" #define PINBASED_CTLS_ONE_SETTING \ @@ -1629,6 +1628,25 @@ vmx_set_guest_reg(struct vmx *vmx, int vcpu, int ident, uint64_t regval) } } +static void +vmx_sync_efer_state(struct vmx *vmx, int vcpu, uint64_t efer) +{ + uint64_t ctrl; + + /* + * If the "load EFER" VM-entry control is 1 (which we require) then the + * value of EFER.LMA must be identical to "IA-32e mode guest" bit in the + * VM-entry control. + */ + ctrl = vmcs_read(VMCS_ENTRY_CTLS); + if ((efer & EFER_LMA) != 0) { + ctrl |= VM_ENTRY_GUEST_LMA; + } else { + ctrl &= ~VM_ENTRY_GUEST_LMA; + } + vmcs_write(VMCS_ENTRY_CTLS, ctrl); +} + static int vmx_emulate_cr0_access(struct vmx *vmx, int vcpu, uint64_t exitqual) { @@ -1655,20 +1673,14 @@ vmx_emulate_cr0_access(struct vmx *vmx, int vcpu, uint64_t exitqual) vmcs_write(VMCS_GUEST_CR0, crval); if (regval & CR0_PG) { - uint64_t efer, entry_ctls; + uint64_t efer; - /* - * If CR0.PG is 1 and EFER.LME is 1 then EFER.LMA and - * the "IA-32e mode guest" bit in VM-entry control must be - * equal. - */ + /* Keep EFER.LMA properly updated if paging is enabled */ efer = vmcs_read(VMCS_GUEST_IA32_EFER); if (efer & EFER_LME) { efer |= EFER_LMA; vmcs_write(VMCS_GUEST_IA32_EFER, efer); - entry_ctls = vmcs_read(VMCS_ENTRY_CTLS); - entry_ctls |= VM_ENTRY_GUEST_LMA; - vmcs_write(VMCS_ENTRY_CTLS, entry_ctls); + vmx_sync_efer_state(vmx, vcpu, efer); } } @@ -2934,6 +2946,44 @@ vmx_vmcleanup(void *arg) kmem_free(vmx, sizeof (*vmx)); } +/* + * Ensure that the VMCS for this vcpu is loaded. + * Returns true if a VMCS load was required. + */ +static bool +vmx_vmcs_access_ensure(struct vmx *vmx, int vcpu) +{ + int hostcpu; + + if (vcpu_is_running(vmx->vm, vcpu, &hostcpu)) { + if (hostcpu != curcpu) { + panic("unexpected vcpu migration %d != %d", + hostcpu, curcpu); + } + /* Earlier logic already took care of the load */ + return (false); + } else { + vmcs_load(vmx->vmcs_pa[vcpu]); + return (true); + } +} + +static void +vmx_vmcs_access_done(struct vmx *vmx, int vcpu) +{ + int hostcpu; + + if (vcpu_is_running(vmx->vm, vcpu, &hostcpu)) { + if (hostcpu != curcpu) { + panic("unexpected vcpu migration %d != %d", + hostcpu, curcpu); + } + /* Later logic will take care of the unload */ + } else { + vmcs_clear(vmx->vmcs_pa[vcpu]); + } +} + static uint64_t * vmxctx_regptr(struct vmxctx *vmxctx, int reg) { @@ -2989,25 +3039,18 @@ vmxctx_regptr(struct vmxctx *vmxctx, int reg) static int vmx_getreg(void *arg, int vcpu, int reg, uint64_t *retval) { - int running, hostcpu, err; struct vmx *vmx = arg; uint64_t *regp; - running = vcpu_is_running(vmx->vm, vcpu, &hostcpu); - if (running && hostcpu != curcpu) - panic("vmx_getreg: %d is running", vcpu); - /* VMCS access not required for ctx reads */ if ((regp = vmxctx_regptr(&vmx->ctx[vcpu], reg)) != NULL) { *retval = *regp; return (0); } - if (!running) { - vmcs_load(vmx->vmcs_pa[vcpu]); - } + bool vmcs_loaded = vmx_vmcs_access_ensure(vmx, vcpu); + int err = 0; - err = 0; if (reg == VM_REG_GUEST_INTR_SHADOW) { uint64_t gi = vmcs_read(VMCS_GUEST_INTERRUPTIBILITY); *retval = (gi & HWINTR_BLOCKING) ? 1 : 0; @@ -3035,33 +3078,26 @@ vmx_getreg(void *arg, int vcpu, int reg, uint64_t *retval) } } - if (!running) { - vmcs_clear(vmx->vmcs_pa[vcpu]); + if (vmcs_loaded) { + vmx_vmcs_access_done(vmx, vcpu); } - return (err); } static int vmx_setreg(void *arg, int vcpu, int reg, uint64_t val) { - int running, hostcpu, error; struct vmx *vmx = arg; uint64_t *regp; - running = vcpu_is_running(vmx->vm, vcpu, &hostcpu); - if (running && hostcpu != curcpu) - panic("vmx_setreg: %d is running", vcpu); - /* VMCS access not required for ctx writes */ if ((regp = vmxctx_regptr(&vmx->ctx[vcpu], reg)) != NULL) { *regp = val; return (0); } - if (!running) { - vmcs_load(vmx->vmcs_pa[vcpu]); - } + bool vmcs_loaded = vmx_vmcs_access_ensure(vmx, vcpu); + int err = 0; if (reg == VM_REG_GUEST_INTR_SHADOW) { if (val != 0) { @@ -3069,39 +3105,24 @@ vmx_setreg(void *arg, int vcpu, int reg, uint64_t val) * Forcing the vcpu into an interrupt shadow is not * presently supported. */ - error = EINVAL; + err = EINVAL; } else { uint64_t gi; gi = vmcs_read(VMCS_GUEST_INTERRUPTIBILITY); gi &= ~HWINTR_BLOCKING; vmcs_write(VMCS_GUEST_INTERRUPTIBILITY, gi); - error = 0; + err = 0; } } else { uint32_t encoding; - error = 0; + err = 0; encoding = vmcs_field_encoding(reg); switch (encoding) { case VMCS_GUEST_IA32_EFER: - /* - * If the "load EFER" VM-entry control is 1 then the - * value of EFER.LMA must be identical to "IA-32e mode - * guest" bit in the VM-entry control. - */ - if ((entry_ctls & VM_ENTRY_LOAD_EFER) != 0) { - uint64_t ctls; - - ctls = vmcs_read(VMCS_ENTRY_CTLS); - if (val & EFER_LMA) { - ctls |= VM_ENTRY_GUEST_LMA; - } else { - ctls &= ~VM_ENTRY_GUEST_LMA; - } - vmcs_write(VMCS_ENTRY_CTLS, ctls); - } vmcs_write(encoding, val); + vmx_sync_efer_state(vmx, vcpu, val); break; case VMCS_GUEST_CR0: /* @@ -3130,10 +3151,11 @@ vmx_setreg(void *arg, int vcpu, int reg, uint64_t val) * XXX the processor retains global mappings when %cr3 * is updated but vmx_invvpid() does not. */ - vmx_invvpid(vmx, vcpu, running); + vmx_invvpid(vmx, vcpu, + vcpu_is_running(vmx->vm, vcpu, NULL)); break; case VMCS_INVALID_ENCODING: - error = EINVAL; + err = EINVAL; break; default: vmcs_write(encoding, val); @@ -3141,27 +3163,19 @@ vmx_setreg(void *arg, int vcpu, int reg, uint64_t val) } } - if (!running) { - vmcs_clear(vmx->vmcs_pa[vcpu]); + if (vmcs_loaded) { + vmx_vmcs_access_done(vmx, vcpu); } - - return (error); + return (err); } static int vmx_getdesc(void *arg, int vcpu, int seg, struct seg_desc *desc) { - int hostcpu, running; struct vmx *vmx = arg; uint32_t base, limit, access; - running = vcpu_is_running(vmx->vm, vcpu, &hostcpu); - if (running && hostcpu != curcpu) - panic("vmx_getdesc: %d is running", vcpu); - - if (!running) { - vmcs_load(vmx->vmcs_pa[vcpu]); - } + bool vmcs_loaded = vmx_vmcs_access_ensure(vmx, vcpu); vmcs_seg_desc_encoding(seg, &base, &limit, &access); desc->base = vmcs_read(base); @@ -3172,8 +3186,8 @@ vmx_getdesc(void *arg, int vcpu, int seg, struct seg_desc *desc) desc->access = 0; } - if (!running) { - vmcs_clear(vmx->vmcs_pa[vcpu]); + if (vmcs_loaded) { + vmx_vmcs_access_done(vmx, vcpu); } return (0); } @@ -3181,17 +3195,10 @@ vmx_getdesc(void *arg, int vcpu, int seg, struct seg_desc *desc) static int vmx_setdesc(void *arg, int vcpu, int seg, const struct seg_desc *desc) { - int hostcpu, running; struct vmx *vmx = arg; uint32_t base, limit, access; - running = vcpu_is_running(vmx->vm, vcpu, &hostcpu); - if (running && hostcpu != curcpu) - panic("vmx_setdesc: %d is running", vcpu); - - if (!running) { - vmcs_load(vmx->vmcs_pa[vcpu]); - } + bool vmcs_loaded = vmx_vmcs_access_ensure(vmx, vcpu); vmcs_seg_desc_encoding(seg, &base, &limit, &access); vmcs_write(base, desc->base); @@ -3200,12 +3207,94 @@ vmx_setdesc(void *arg, int vcpu, int seg, const struct seg_desc *desc) vmcs_write(access, desc->access); } - if (!running) { - vmcs_clear(vmx->vmcs_pa[vcpu]); + if (vmcs_loaded) { + vmx_vmcs_access_done(vmx, vcpu); } return (0); } +static uint64_t * +vmx_msr_ptr(struct vmx *vmx, int vcpu, uint32_t msr) +{ + uint64_t *guest_msrs = vmx->guest_msrs[vcpu]; + + switch (msr) { + case MSR_LSTAR: + return (&guest_msrs[IDX_MSR_LSTAR]); + case MSR_CSTAR: + return (&guest_msrs[IDX_MSR_CSTAR]); + case MSR_STAR: + return (&guest_msrs[IDX_MSR_STAR]); + case MSR_SF_MASK: + return (&guest_msrs[IDX_MSR_SF_MASK]); + case MSR_KGSBASE: + return (&guest_msrs[IDX_MSR_KGSBASE]); + case MSR_PAT: + return (&guest_msrs[IDX_MSR_PAT]); + default: + return (NULL); + } +} + +static int +vmx_msr_get(void *arg, int vcpu, uint32_t msr, uint64_t *valp) +{ + struct vmx *vmx = arg; + + ASSERT(valp != NULL); + + const uint64_t *msrp = vmx_msr_ptr(vmx, vcpu, msr); + if (msrp != NULL) { + *valp = *msrp; + return (0); + } + + const uint32_t vmcs_enc = vmcs_msr_encoding(msr); + if (vmcs_enc != VMCS_INVALID_ENCODING) { + bool vmcs_loaded = vmx_vmcs_access_ensure(vmx, vcpu); + + *valp = vmcs_read(vmcs_enc); + + if (vmcs_loaded) { + vmx_vmcs_access_done(vmx, vcpu); + } + return (0); + } + + return (EINVAL); +} + +static int +vmx_msr_set(void *arg, int vcpu, uint32_t msr, uint64_t val) +{ + struct vmx *vmx = arg; + + /* TODO: mask value */ + + uint64_t *msrp = vmx_msr_ptr(vmx, vcpu, msr); + if (msrp != NULL) { + *msrp = val; + return (0); + } + + const uint32_t vmcs_enc = vmcs_msr_encoding(msr); + if (vmcs_enc != VMCS_INVALID_ENCODING) { + bool vmcs_loaded = vmx_vmcs_access_ensure(vmx, vcpu); + + vmcs_write(vmcs_enc, val); + + if (msr == MSR_EFER) { + vmx_sync_efer_state(vmx, vcpu, val); + } + + if (vmcs_loaded) { + vmx_vmcs_access_done(vmx, vcpu); + } + return (0); + } + return (EINVAL); +} + static int vmx_getcap(void *arg, int vcpu, int type, int *retval) { @@ -3711,6 +3800,9 @@ struct vmm_ops vmm_ops_intel = { .vmsavectx = vmx_savectx, .vmrestorectx = vmx_restorectx, + + .vmgetmsr = vmx_msr_get, + .vmsetmsr = vmx_msr_set, }; /* Side-effect free HW validation derived from checks in vmx_init. */ diff --git a/usr/src/uts/intel/io/vmm/sys/vmm_kernel.h b/usr/src/uts/intel/io/vmm/sys/vmm_kernel.h index bc7f1bb0f2..1dba79a7bf 100644 --- a/usr/src/uts/intel/io/vmm/sys/vmm_kernel.h +++ b/usr/src/uts/intel/io/vmm/sys/vmm_kernel.h @@ -90,6 +90,11 @@ typedef void (*vmi_vlapic_cleanup)(void *vmi, struct vlapic *vlapic); typedef void (*vmi_savectx)(void *vmi, int vcpu); typedef void (*vmi_restorectx)(void *vmi, int vcpu); +typedef int (*vmi_get_msr_t)(void *vmi, int vcpu, uint32_t msr, + uint64_t *valp); +typedef int (*vmi_set_msr_t)(void *vmi, int vcpu, uint32_t msr, + uint64_t val); + struct vmm_ops { vmm_init_func_t init; /* module wide initialization */ vmm_cleanup_func_t cleanup; @@ -109,6 +114,9 @@ struct vmm_ops { vmi_savectx vmsavectx; vmi_restorectx vmrestorectx; + + vmi_get_msr_t vmgetmsr; + vmi_set_msr_t vmsetmsr; }; extern struct vmm_ops vmm_ops_intel; @@ -379,6 +387,19 @@ typedef enum vm_msr_result { VMR_UNHANLDED = 2, /* handle in userspace, kernel cannot emulate */ } vm_msr_result_t; +enum vm_cpuid_capability { + VCC_NONE, + VCC_NO_EXECUTE, + VCC_FFXSR, + VCC_TCE, + VCC_LAST +}; + +int x86_emulate_cpuid(struct vm *, int, uint64_t *, uint64_t *, uint64_t *, + uint64_t *); +bool vm_cpuid_capability(struct vm *, int, enum vm_cpuid_capability); +bool validate_guest_xcr0(uint64_t, uint64_t); + void vmm_sol_glue_init(void); void vmm_sol_glue_cleanup(void); @@ -445,6 +466,7 @@ typedef struct vmm_data_req { uint32_t vdr_flags; uint32_t vdr_len; void *vdr_data; + uint32_t *vdr_result_len; } vmm_data_req_t; typedef struct vmm_data_req vmm_data_req_t; @@ -455,6 +477,7 @@ typedef struct vmm_data_version_entry { uint16_t vdve_class; uint16_t vdve_version; uint16_t vdve_len_expect; + uint16_t vdve_len_per_item; vmm_data_readf_t vdve_readf; vmm_data_writef_t vdve_writef; } vmm_data_version_entry_t; diff --git a/usr/src/uts/intel/io/vmm/vmm.c b/usr/src/uts/intel/io/vmm/vmm.c index 565dcbbe0a..e85d84d0b6 100644 --- a/usr/src/uts/intel/io/vmm/vmm.c +++ b/usr/src/uts/intel/io/vmm/vmm.c @@ -248,6 +248,8 @@ static struct vmm_ops vmm_ops_null = { .vlapic_cleanup = (vmi_vlapic_cleanup)nullop_panic, .vmsavectx = (vmi_savectx)nullop_panic, .vmrestorectx = (vmi_restorectx)nullop_panic, + .vmgetmsr = (vmi_get_msr_t)nullop_panic, + .vmsetmsr = (vmi_set_msr_t)nullop_panic, }; static struct vmm_ops *ops = &vmm_ops_null; @@ -1102,38 +1104,51 @@ vm_assign_pptdev(struct vm *vm, int pptfd) } int -vm_get_register(struct vm *vm, int vcpu, int reg, uint64_t *retval) +vm_get_register(struct vm *vm, int vcpuid, int reg, uint64_t *retval) { - - if (vcpu < 0 || vcpu >= vm->maxcpus) + if (vcpuid < 0 || vcpuid >= vm->maxcpus) return (EINVAL); if (reg >= VM_REG_LAST) return (EINVAL); - return (VMGETREG(vm->cookie, vcpu, reg, retval)); + struct vcpu *vcpu = &vm->vcpu[vcpuid]; + switch (reg) { + case VM_REG_GUEST_XCR0: + *retval = vcpu->guest_xcr0; + return (0); + default: + return (VMGETREG(vm->cookie, vcpuid, reg, retval)); + } } int vm_set_register(struct vm *vm, int vcpuid, int reg, uint64_t val) { - struct vcpu *vcpu; - int error; - if (vcpuid < 0 || vcpuid >= vm->maxcpus) return (EINVAL); if (reg >= VM_REG_LAST) return (EINVAL); - error = VMSETREG(vm->cookie, vcpuid, reg, val); - if (error || reg != VM_REG_GUEST_RIP) + int error; + struct vcpu *vcpu = &vm->vcpu[vcpuid]; + switch (reg) { + case VM_REG_GUEST_RIP: + error = VMSETREG(vm->cookie, vcpuid, reg, val); + if (error == 0) { + vcpu->nextrip = val; + } return (error); - - /* Set 'nextrip' to match the value of %rip */ - vcpu = &vm->vcpu[vcpuid]; - vcpu->nextrip = val; - return (0); + case VM_REG_GUEST_XCR0: + if (!validate_guest_xcr0(val, vmm_get_host_xcr0())) { + return (EINVAL); + } + vcpu->guest_xcr0 = val; + return (0); + default: + return (VMSETREG(vm->cookie, vcpuid, reg, val)); + } } static bool @@ -1864,7 +1879,7 @@ vm_handle_run_state(struct vm *vm, int vcpuid) } static int -vm_rdmtrr(struct vm_mtrr *mtrr, uint32_t num, uint64_t *val) +vm_rdmtrr(const struct vm_mtrr *mtrr, uint32_t num, uint64_t *val) { switch (num) { case MSR_MTRRcap: @@ -1945,6 +1960,22 @@ vm_wrmtrr(struct vm_mtrr *mtrr, uint32_t num, uint64_t val) return (0); } +static bool +is_mtrr_msr(uint32_t msr) +{ + switch (msr) { + case MSR_MTRRcap: + case MSR_MTRRdefType: + case MSR_MTRR4kBase ... MSR_MTRR4kBase + 7: + case MSR_MTRR16kBase ... MSR_MTRR16kBase + 1: + case MSR_MTRR64kBase: + case MSR_MTRRVarBase ... MSR_MTRRVarBase + (VMM_MTRR_VAR_MAX * 2) - 1: + return (true); + default: + return (false); + } +} + static int vm_handle_rdmsr(struct vm *vm, int vcpuid, struct vm_exit *vme) { @@ -3702,17 +3733,20 @@ vmm_data_is_cpu_specific(uint16_t data_class) case VDC_MSR: case VDC_FPU: case VDC_LAPIC: - case VDC_VMM_ARCH: return (true); default: return (false); } } -static const vmm_data_version_entry_t * -vmm_data_find(const vmm_data_req_t *req, int *err) +static int +vmm_data_find(const vmm_data_req_t *req, const vmm_data_version_entry_t **resp) { const vmm_data_version_entry_t **vdpp, *vdp; + + ASSERT(resp != NULL); + ASSERT(req->vdr_result_len != NULL); + SET_FOREACH(vdpp, vmm_data_version_entries) { vdp = *vdpp; if (vdp->vdve_class == req->vdr_class && @@ -3722,15 +3756,15 @@ vmm_data_find(const vmm_data_req_t *req, int *err) * provider for this data. */ if (vdp->vdve_len_expect != 0 && - vdp->vdve_len_expect != req->vdr_len) { - *err = ENOSPC; - return (NULL); + vdp->vdve_len_expect > req->vdr_len) { + *req->vdr_result_len = vdp->vdve_len_expect; + return (ENOSPC); } - return (vdp); + *resp = vdp; + return (0); } } - *err = EINVAL; - return (NULL); + return (EINVAL); } static void * @@ -3740,10 +3774,11 @@ vmm_data_from_class(const vmm_data_req_t *req, struct vm *vm, int vcpuid) /* per-cpu data/devices */ case VDC_LAPIC: return (vm_lapic(vm, vcpuid)); + case VDC_VMM_ARCH: + return (vm); case VDC_FPU: case VDC_REGISTER: - case VDC_VMM_ARCH: case VDC_MSR: /* * These have per-CPU handling which is dispatched outside @@ -3771,6 +3806,356 @@ vmm_data_from_class(const vmm_data_req_t *req, struct vm *vm, int vcpuid) } } +const uint32_t arch_msr_iter[] = { + MSR_EFER, + + /* + * While gsbase and fsbase are accessible via the MSR accessors, they + * are not included in MSR iteration since they are covered by the + * segment descriptor interface too. + */ + MSR_KGSBASE, + + MSR_STAR, + MSR_LSTAR, + MSR_CSTAR, + MSR_SF_MASK, + + MSR_SYSENTER_CS_MSR, + MSR_SYSENTER_ESP_MSR, + MSR_SYSENTER_EIP_MSR, + MSR_PAT, +}; +const uint32_t generic_msr_iter[] = { + MSR_TSC, + MSR_MTRRcap, + MSR_MTRRdefType, + + MSR_MTRR4kBase, MSR_MTRR4kBase + 1, MSR_MTRR4kBase + 2, + MSR_MTRR4kBase + 3, MSR_MTRR4kBase + 4, MSR_MTRR4kBase + 5, + MSR_MTRR4kBase + 6, MSR_MTRR4kBase + 7, + + MSR_MTRR16kBase, MSR_MTRR16kBase + 1, + + MSR_MTRR64kBase, +}; + +static int +vmm_data_read_msrs(struct vm *vm, int vcpuid, const vmm_data_req_t *req) +{ + VERIFY3U(req->vdr_class, ==, VDC_MSR); + VERIFY3U(req->vdr_version, ==, 1); + + const uint_t num_msrs = nitems(arch_msr_iter) + nitems(generic_msr_iter) + + (VMM_MTRR_VAR_MAX * 2); + const uint32_t output_len = + num_msrs * sizeof (struct vdi_field_entry_v1); + *req->vdr_result_len = output_len; + + if (req->vdr_len < output_len) { + return (ENOSPC); + } + + struct vdi_field_entry_v1 *entryp = req->vdr_data; + for (uint_t i = 0; i < nitems(arch_msr_iter); i++, entryp++) { + const uint32_t msr = arch_msr_iter[i]; + uint64_t val = 0; + + int err = ops->vmgetmsr(vm->cookie, vcpuid, msr, &val); + /* All of these MSRs are expected to work */ + VERIFY0(err); + entryp->vfe_ident = msr; + entryp->vfe_value = val; + } + + struct vm_mtrr *mtrr = &vm->vcpu[vcpuid].mtrr; + for (uint_t i = 0; i < nitems(generic_msr_iter); i++, entryp++) { + const uint32_t msr = generic_msr_iter[i]; + + entryp->vfe_ident = msr; + switch (msr) { + case MSR_TSC: + /* + * Communicate this as the difference from the VM-wide + * offset of the boot time. + */ + entryp->vfe_value = vm->vcpu[vcpuid].tsc_offset; + break; + case MSR_MTRRcap: + case MSR_MTRRdefType: + case MSR_MTRR4kBase ... MSR_MTRR4kBase + 7: + case MSR_MTRR16kBase ... MSR_MTRR16kBase + 1: + case MSR_MTRR64kBase: { + int err = vm_rdmtrr(mtrr, msr, &entryp->vfe_value); + VERIFY0(err); + break; + } + default: + panic("unexpected msr export %x", msr); + } + } + /* Copy the variable MTRRs */ + for (uint_t i = 0; i < (VMM_MTRR_VAR_MAX * 2); i++, entryp++) { + const uint32_t msr = MSR_MTRRVarBase + i; + + entryp->vfe_ident = msr; + int err = vm_rdmtrr(mtrr, msr, &entryp->vfe_value); + VERIFY0(err); + } + return (0); +} + +static int +vmm_data_write_msrs(struct vm *vm, int vcpuid, const vmm_data_req_t *req) +{ + VERIFY3U(req->vdr_class, ==, VDC_MSR); + VERIFY3U(req->vdr_version, ==, 1); + + const struct vdi_field_entry_v1 *entryp = req->vdr_data; + const uint_t entry_count = + req->vdr_len / sizeof (struct vdi_field_entry_v1); + struct vm_mtrr *mtrr = &vm->vcpu[vcpuid].mtrr; + + /* + * First make sure that all of the MSRs can be manipulated. + * For now, this check is done by going though the getmsr handler + */ + for (uint_t i = 0; i < entry_count; i++, entryp++) { + const uint32_t msr = entryp->vfe_ident; + uint64_t val; + int err = 0; + + switch (msr) { + case MSR_TSC: + break; + default: + if (is_mtrr_msr(msr)) { + err = vm_rdmtrr(mtrr, msr, &val); + } else { + err = ops->vmgetmsr(vm->cookie, vcpuid, msr, + &val); + } + break; + } + if (err != 0) { + return (err); + } + } + + /* + * Fairly confident that all of the 'set' operations are at least + * targeting valid MSRs, continue on. + */ + entryp = req->vdr_data; + for (uint_t i = 0; i < entry_count; i++, entryp++) { + const uint32_t msr = entryp->vfe_ident; + const uint64_t val = entryp->vfe_value; + int err = 0; + + switch (msr) { + case MSR_TSC: + vm->vcpu[vcpuid].tsc_offset = entryp->vfe_value; + break; + default: + if (is_mtrr_msr(msr)) { + if (msr == MSR_MTRRcap) { + /* + * MTRRcap is read-only. If the current + * value matches the incoming one, + * consider it a success + */ + uint64_t comp; + err = vm_rdmtrr(mtrr, msr, &comp); + if (err != 0 || comp != val) { + err = EINVAL; + } + } else { + err = vm_wrmtrr(mtrr, msr, val); + } + } else { + err = ops->vmsetmsr(vm->cookie, vcpuid, msr, + val); + } + break; + } + if (err != 0) { + return (err); + } + } + *req->vdr_result_len = entry_count * sizeof (struct vdi_field_entry_v1); + + return (0); +} + +static const vmm_data_version_entry_t msr_v1 = { + .vdve_class = VDC_MSR, + .vdve_version = 1, + .vdve_len_per_item = sizeof (struct vdi_field_entry_v1), + /* Requires backend-specific dispatch */ + .vdve_readf = NULL, + .vdve_writef = NULL, +}; +VMM_DATA_VERSION(msr_v1); + +static const uint32_t vmm_arch_v1_fields[] = { + VAI_TSC_BOOT_OFFSET, + VAI_BOOT_HRTIME, + VAI_TSC_FREQ, +}; + +static bool +vmm_read_arch_field(struct vm *vm, uint32_t ident, uint64_t *valp) +{ + ASSERT(valp != NULL); + + switch (ident) { + case VAI_TSC_BOOT_OFFSET: + *valp = vm->boot_tsc_offset; + return (true); + case VAI_BOOT_HRTIME: + *valp = vm->boot_hrtime; + return (true); + case VAI_TSC_FREQ: + /* + * Since the system TSC calibration is not public, just derive + * it from the scaling functions available. + */ + *valp = unscalehrtime(NANOSEC); + return (true); + default: + break; + } + return (false); +} + +static int +vmm_data_read_vmm_arch(void *arg, const vmm_data_req_t *req) +{ + struct vm *vm = arg; + + VERIFY3U(req->vdr_class, ==, VDC_VMM_ARCH); + VERIFY3U(req->vdr_version, ==, 1); + + struct vdi_field_entry_v1 *entryp = req->vdr_data; + + /* Specific fields requested */ + if ((req->vdr_flags & VDX_FLAG_READ_COPYIN) != 0) { + const uint_t count = + req->vdr_len / sizeof (struct vdi_field_entry_v1); + + for (uint_t i = 0; i < count; i++, entryp++) { + if (!vmm_read_arch_field(vm, entryp->vfe_ident, + &entryp->vfe_value)) { + return (EINVAL); + } + } + *req->vdr_result_len = + count * sizeof (struct vdi_field_entry_v1); + return (0); + } + + /* Emit all of the possible values */ + const uint32_t total_size = nitems(vmm_arch_v1_fields) * + sizeof (struct vdi_field_entry_v1); + *req->vdr_result_len = total_size; + if (req->vdr_len < total_size) { + return (ENOSPC); + } + for (uint_t i = 0; i < nitems(vmm_arch_v1_fields); i++, entryp++) { + entryp->vfe_ident = vmm_arch_v1_fields[i]; + VERIFY(vmm_read_arch_field(vm, entryp->vfe_ident, + &entryp->vfe_value)); + } + return (0); +} + +static int +vmm_data_write_vmm_arch(void *arg, const vmm_data_req_t *req) +{ + struct vm *vm = arg; + + VERIFY3U(req->vdr_class, ==, VDC_VMM_ARCH); + VERIFY3U(req->vdr_version, ==, 1); + + const struct vdi_field_entry_v1 *entryp = req->vdr_data; + const uint_t entry_count = + req->vdr_len / sizeof (struct vdi_field_entry_v1); + + for (uint_t i = 0; i < entry_count; i++, entryp++) { + const uint64_t val = entryp->vfe_value; + + switch (entryp->vfe_ident) { + case VAI_TSC_BOOT_OFFSET: + vm->boot_tsc_offset = val; + break; + case VAI_BOOT_HRTIME: + vm->boot_hrtime = val; + break; + case VAI_TSC_FREQ: + /* Guest TSC frequency not (currently) adjustable */ + return (EPERM); + default: + return (EINVAL); + } + } + *req->vdr_result_len = entry_count * sizeof (struct vdi_field_entry_v1); + return (0); +} + +static const vmm_data_version_entry_t vmm_arch_v1 = { + .vdve_class = VDC_VMM_ARCH, + .vdve_version = 1, + .vdve_len_per_item = sizeof (struct vdi_field_entry_v1), + .vdve_readf = vmm_data_read_vmm_arch, + .vdve_writef = vmm_data_write_vmm_arch, +}; +VMM_DATA_VERSION(vmm_arch_v1); + +static int +vmm_data_read_versions(void *arg, const vmm_data_req_t *req) +{ + VERIFY3U(req->vdr_class, ==, VDC_VERSION); + VERIFY3U(req->vdr_version, ==, 1); + + const uint32_t total_size = SET_COUNT(vmm_data_version_entries) * + sizeof (struct vdi_version_entry_v1); + + /* Make sure there is room for all of the entries */ + *req->vdr_result_len = total_size; + if (req->vdr_len < *req->vdr_result_len) { + return (ENOSPC); + } + + struct vdi_version_entry_v1 *entryp = req->vdr_data; + const vmm_data_version_entry_t **vdpp; + SET_FOREACH(vdpp, vmm_data_version_entries) { + const vmm_data_version_entry_t *vdp = *vdpp; + + entryp->vve_class = vdp->vdve_class; + entryp->vve_version = vdp->vdve_version; + entryp->vve_len_expect = vdp->vdve_len_expect; + entryp->vve_len_per_item = vdp->vdve_len_per_item; + entryp++; + } + return (0); +} + +static int +vmm_data_write_versions(void *arg, const vmm_data_req_t *req) +{ + /* Writing to the version information makes no sense */ + return (EPERM); +} + +static const vmm_data_version_entry_t versions_v1 = { + .vdve_class = VDC_VERSION, + .vdve_version = 1, + .vdve_len_per_item = sizeof (struct vdi_version_entry_v1), + .vdve_readf = vmm_data_read_versions, + .vdve_writef = vmm_data_write_versions, +}; +VMM_DATA_VERSION(versions_v1); + int vmm_data_read(struct vm *vm, int vcpuid, const vmm_data_req_t *req) { @@ -3782,28 +4167,34 @@ vmm_data_read(struct vm *vm, int vcpuid, const vmm_data_req_t *req) } } - const vmm_data_version_entry_t *entry; - entry = vmm_data_find(req, &err); - if (entry == NULL) { - ASSERT(err != 0); + const vmm_data_version_entry_t *entry = NULL; + err = vmm_data_find(req, &entry); + if (err != 0) { return (err); } + ASSERT(entry != NULL); void *datap = vmm_data_from_class(req, vm, vcpuid); if (datap != NULL) { err = entry->vdve_readf(datap, req); + + /* + * Successful reads of fixed-length data should populate the + * length of that result. + */ + if (err == 0 && entry->vdve_len_expect != 0) { + *req->vdr_result_len = entry->vdve_len_expect; + } } else { switch (req->vdr_class) { + case VDC_MSR: + err = vmm_data_read_msrs(vm, vcpuid, req); + break; case VDC_FPU: /* TODO: wire up to xsave export via hma_fpu iface */ err = EINVAL; break; case VDC_REGISTER: - case VDC_VMM_ARCH: - case VDC_MSR: - /* TODO: implement */ - err = EINVAL; - break; default: err = EINVAL; break; @@ -3824,28 +4215,33 @@ vmm_data_write(struct vm *vm, int vcpuid, const vmm_data_req_t *req) } } - const vmm_data_version_entry_t *entry; - entry = vmm_data_find(req, &err); - if (entry == NULL) { - ASSERT(err != 0); + const vmm_data_version_entry_t *entry = NULL; + err = vmm_data_find(req, &entry); + if (err != 0) { return (err); } + ASSERT(entry != NULL); void *datap = vmm_data_from_class(req, vm, vcpuid); if (datap != NULL) { err = entry->vdve_writef(datap, req); + /* + * Successful writes of fixed-length data should populate the + * length of that result. + */ + if (err == 0 && entry->vdve_len_expect != 0) { + *req->vdr_result_len = entry->vdve_len_expect; + } } else { switch (req->vdr_class) { + case VDC_MSR: + err = vmm_data_write_msrs(vm, vcpuid, req); + break; case VDC_FPU: /* TODO: wire up to xsave import via hma_fpu iface */ err = EINVAL; break; case VDC_REGISTER: - case VDC_VMM_ARCH: - case VDC_MSR: - /* TODO: implement */ - err = EINVAL; - break; default: err = EINVAL; break; diff --git a/usr/src/uts/intel/io/vmm/vmm_sol_dev.c b/usr/src/uts/intel/io/vmm/vmm_sol_dev.c index 9a4693fc78..ee07779b21 100644 --- a/usr/src/uts/intel/io/vmm/vmm_sol_dev.c +++ b/usr/src/uts/intel/io/vmm/vmm_sol_dev.c @@ -1550,32 +1550,48 @@ vmmdev_do_ioctl(vmm_softc_t *sc, int cmd, intptr_t arg, int md, } const size_t len = vdx.vdx_len; - void *buf = kmem_alloc(len, KM_SLEEP); - if ((vdx.vdx_flags & VDX_FLAG_READ_COPYIN) != 0) { - if (ddi_copyin(vdx.vdx_data, buf, len, md) != 0) { + void *buf = NULL; + if (len != 0) { + buf = kmem_alloc(len, KM_SLEEP); + if ((vdx.vdx_flags & VDX_FLAG_READ_COPYIN) != 0 && + ddi_copyin(vdx.vdx_data, buf, len, md) != 0) { kmem_free(buf, len); error = EFAULT; break; + } else { + bzero(buf, len); } - } else { - bzero(buf, len); } + vdx.vdx_result_len = 0; vmm_data_req_t req = { .vdr_class = vdx.vdx_class, .vdr_version = vdx.vdx_version, .vdr_flags = vdx.vdx_flags, - .vdr_len = vdx.vdx_len, + .vdr_len = len, .vdr_data = buf, + .vdr_result_len = &vdx.vdx_result_len, }; error = vmm_data_read(sc->vmm_vm, vdx.vdx_vcpuid, &req); - if (error == 0) { + if (error == 0 && buf != NULL) { if (ddi_copyout(buf, vdx.vdx_data, len, md) != 0) { error = EFAULT; } } - kmem_free(buf, len); + + /* + * Copy out the transfer request so that the value of + * vdx_result_len can be made available, regardless of any + * error(s) which may have occurred. + */ + if (ddi_copyout(&vdx, datap, sizeof (vdx), md) != 0) { + error = (error != 0) ? error : EFAULT; + } + + if (buf != NULL) { + kmem_free(buf, len); + } break; } case VM_DATA_WRITE: { @@ -1595,19 +1611,24 @@ vmmdev_do_ioctl(vmm_softc_t *sc, int cmd, intptr_t arg, int md, } const size_t len = vdx.vdx_len; - void *buf = kmem_alloc(len, KM_SLEEP); - if (ddi_copyin(vdx.vdx_data, buf, len, md) != 0) { - kmem_free(buf, len); - error = EFAULT; - break; + void *buf = NULL; + if (len != 0) { + buf = kmem_alloc(len, KM_SLEEP); + if (ddi_copyin(vdx.vdx_data, buf, len, md) != 0) { + kmem_free(buf, len); + error = EFAULT; + break; + } } + vdx.vdx_result_len = 0; vmm_data_req_t req = { .vdr_class = vdx.vdx_class, .vdr_version = vdx.vdx_version, .vdr_flags = vdx.vdx_flags, - .vdr_len = vdx.vdx_len, + .vdr_len = len, .vdr_data = buf, + .vdr_result_len = &vdx.vdx_result_len, }; if (vmm_allow_state_writes == 0) { /* XXX: Play it safe for now */ @@ -1617,13 +1638,25 @@ vmmdev_do_ioctl(vmm_softc_t *sc, int cmd, intptr_t arg, int md, &req); } - if (error == 0 && + if (error == 0 && buf != NULL && (vdx.vdx_flags & VDX_FLAG_WRITE_COPYOUT) != 0) { if (ddi_copyout(buf, vdx.vdx_data, len, md) != 0) { error = EFAULT; } } - kmem_free(buf, len); + + /* + * Copy out the transfer request so that the value of + * vdx_result_len can be made available, regardless of any + * error(s) which may have occurred. + */ + if (ddi_copyout(&vdx, datap, sizeof (vdx), md) != 0) { + error = (error != 0) ? error : EFAULT; + } + + if (buf != NULL) { + kmem_free(buf, len); + } break; } diff --git a/usr/src/uts/intel/io/vmm/x86.c b/usr/src/uts/intel/io/vmm/x86.c index de48ba1d48..e593e0c04e 100644 --- a/usr/src/uts/intel/io/vmm/x86.c +++ b/usr/src/uts/intel/io/vmm/x86.c @@ -58,10 +58,10 @@ __FBSDID("$FreeBSD$"); #include <machine/specialreg.h> #include <machine/vmm.h> +#include <sys/vmm_kernel.h> #include "vmm_host.h" #include "vmm_util.h" -#include "x86.h" SYSCTL_DECL(_hw_vmm); @@ -80,6 +80,42 @@ static int cpuid_leaf_b = 1; */ static int vmm_force_invariant_tsc = 0; +#define CPUID_0000_0000 (0x0) +#define CPUID_0000_0001 (0x1) +#define CPUID_0000_0002 (0x2) +#define CPUID_0000_0003 (0x3) +#define CPUID_0000_0004 (0x4) +#define CPUID_0000_0006 (0x6) +#define CPUID_0000_0007 (0x7) +#define CPUID_0000_000A (0xA) +#define CPUID_0000_000B (0xB) +#define CPUID_0000_000D (0xD) +#define CPUID_0000_000F (0xF) +#define CPUID_0000_0010 (0x10) +#define CPUID_0000_0015 (0x15) +#define CPUID_8000_0000 (0x80000000) +#define CPUID_8000_0001 (0x80000001) +#define CPUID_8000_0002 (0x80000002) +#define CPUID_8000_0003 (0x80000003) +#define CPUID_8000_0004 (0x80000004) +#define CPUID_8000_0006 (0x80000006) +#define CPUID_8000_0007 (0x80000007) +#define CPUID_8000_0008 (0x80000008) +#define CPUID_8000_001D (0x8000001D) +#define CPUID_8000_001E (0x8000001E) + +/* + * CPUID instruction Fn0000_0001: + */ +#define CPUID_0000_0001_APICID_MASK (0xff<<24) +#define CPUID_0000_0001_APICID_SHIFT 24 + +/* + * CPUID instruction Fn0000_0001 ECX + */ +#define CPUID_0000_0001_FEAT0_VMX (1<<5) + + /* * Round up to the next power of two, if necessary, and then take log2. * Returns -1 if argument is zero. @@ -649,6 +685,10 @@ default_leaf: return (1); } +/* + * Return 'true' if the capability 'cap' is enabled in this virtual cpu + * and 'false' otherwise. + */ bool vm_cpuid_capability(struct vm *vm, int vcpuid, enum vm_cpuid_capability cap) { @@ -690,3 +730,23 @@ vm_cpuid_capability(struct vm *vm, int vcpuid, enum vm_cpuid_capability cap) } return (rv); } + +bool +validate_guest_xcr0(uint64_t val, uint64_t limit_mask) +{ + /* x87 feature must be enabled */ + if ((val & XFEATURE_ENABLED_X87) == 0) { + return (false); + } + /* AVX cannot be enabled without SSE */ + if ((val & (XFEATURE_ENABLED_SSE | XFEATURE_ENABLED_AVX)) == + XFEATURE_ENABLED_SSE) { + return (false); + } + /* No bits should be outside what we dictate to be allowed */ + if ((val & ~limit_mask) != 0) { + return (false); + } + + return (true); +} diff --git a/usr/src/uts/intel/io/vmm/x86.h b/usr/src/uts/intel/io/vmm/x86.h deleted file mode 100644 index f3459e4f8a..0000000000 --- a/usr/src/uts/intel/io/vmm/x86.h +++ /dev/null @@ -1,85 +0,0 @@ -/*- - * SPDX-License-Identifier: BSD-2-Clause-FreeBSD - * - * Copyright (c) 2011 NetApp, Inc. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $FreeBSD$ - */ - -#ifndef _X86_H_ -#define _X86_H_ - -#define CPUID_0000_0000 (0x0) -#define CPUID_0000_0001 (0x1) -#define CPUID_0000_0002 (0x2) -#define CPUID_0000_0003 (0x3) -#define CPUID_0000_0004 (0x4) -#define CPUID_0000_0006 (0x6) -#define CPUID_0000_0007 (0x7) -#define CPUID_0000_000A (0xA) -#define CPUID_0000_000B (0xB) -#define CPUID_0000_000D (0xD) -#define CPUID_0000_000F (0xF) -#define CPUID_0000_0010 (0x10) -#define CPUID_0000_0015 (0x15) -#define CPUID_8000_0000 (0x80000000) -#define CPUID_8000_0001 (0x80000001) -#define CPUID_8000_0002 (0x80000002) -#define CPUID_8000_0003 (0x80000003) -#define CPUID_8000_0004 (0x80000004) -#define CPUID_8000_0006 (0x80000006) -#define CPUID_8000_0007 (0x80000007) -#define CPUID_8000_0008 (0x80000008) -#define CPUID_8000_001D (0x8000001D) -#define CPUID_8000_001E (0x8000001E) - -/* - * CPUID instruction Fn0000_0001: - */ -#define CPUID_0000_0001_APICID_MASK (0xff<<24) -#define CPUID_0000_0001_APICID_SHIFT 24 - -/* - * CPUID instruction Fn0000_0001 ECX - */ -#define CPUID_0000_0001_FEAT0_VMX (1<<5) - -int x86_emulate_cpuid(struct vm *vm, int vcpu_id, uint64_t *rax, uint64_t *rbx, - uint64_t *rcx, uint64_t *rdx); - -enum vm_cpuid_capability { - VCC_NONE, - VCC_NO_EXECUTE, - VCC_FFXSR, - VCC_TCE, - VCC_LAST -}; - -/* - * Return 'true' if the capability 'cap' is enabled in this virtual cpu - * and 'false' otherwise. - */ -bool vm_cpuid_capability(struct vm *vm, int vcpuid, enum vm_cpuid_capability); -#endif diff --git a/usr/src/uts/intel/sys/vmm.h b/usr/src/uts/intel/sys/vmm.h index 268b2e82ce..50d76ab17c 100644 --- a/usr/src/uts/intel/sys/vmm.h +++ b/usr/src/uts/intel/sys/vmm.h @@ -103,6 +103,7 @@ enum vm_reg_name { VM_REG_GUEST_DR3, VM_REG_GUEST_DR6, VM_REG_GUEST_ENTRY_INST_LENGTH, + VM_REG_GUEST_XCR0, VM_REG_LAST }; diff --git a/usr/src/uts/intel/sys/vmm_data.h b/usr/src/uts/intel/sys/vmm_data.h index 1b8614543c..9ba385c5d6 100644 --- a/usr/src/uts/intel/sys/vmm_data.h +++ b/usr/src/uts/intel/sys/vmm_data.h @@ -18,7 +18,6 @@ #define _VMM_DATA_H_ /* VMM Data Classes */ -#define VDC_META 0 /* Meta information about data system */ #define VDC_VERSION 1 /* Version information for each data class */ /* Classes bearing per-CPU data */ @@ -42,21 +41,27 @@ /* VMM Data Identifiers */ - -/* - * VDC_REGISTER: - */ - /* - * VDC_MSR: + * Generic field encoding for 64-bit (or smaller) data which are identified by a + * 32-bit (or smaller) name. * - * Use MSR identifiers directly + * Used by the following classes/version: + * - VDC_REGISTER v1: `vm_reg_name` identifiers + * - VDC_MSR v1: MSR identifiers + * - VDC_VMM_ARCH v1: Identifiers described below */ - -struct vdi_msr_entry_v1 { - uint32_t vme_msr; +struct vdi_field_entry_v1 { + uint32_t vfe_ident; uint32_t _pad; - uint64_t vme_value; + uint64_t vfe_value; +}; + +/* VDC_VERSION */ +struct vdi_version_entry_v1 { + uint16_t vve_class; + uint16_t vve_version; + uint16_t vve_len_expect; + uint16_t vve_len_per_item; }; /* @@ -98,11 +103,22 @@ struct vdi_lapic_v1 { uint32_t vl_esr_pending; }; - /* * VDC_VMM_ARCH: */ +/* + * Version 1 identifiers: + */ + +/* Offset of guest TSC from system at time of boot */ +#define VAI_TSC_BOOT_OFFSET 1 +/* Time that guest (nominally) booted, as hrtime */ +#define VAI_BOOT_HRTIME 2 +/* Guest TSC frequency measured by hrtime (not effected by wall clock adj.) */ +#define VAI_TSC_FREQ 3 + + /* VDC_IOAPIC: */ struct vdi_ioapic_v1 { diff --git a/usr/src/uts/intel/sys/vmm_dev.h b/usr/src/uts/intel/sys/vmm_dev.h index fc8ccf406e..8d1b2713dd 100644 --- a/usr/src/uts/intel/sys/vmm_dev.h +++ b/usr/src/uts/intel/sys/vmm_dev.h @@ -366,6 +366,7 @@ struct vm_data_xfer { uint16_t vdx_version; uint32_t vdx_flags; uint32_t vdx_len; + uint32_t vdx_result_len; void *vdx_data; }; @@ -384,7 +385,7 @@ struct vm_data_xfer { * best-effort activity. Nothing is to be inferred about the magnitude of a * change when the version is modified. It follows no rules like semver. */ -#define VMM_CURRENT_INTERFACE_VERSION 2 +#define VMM_CURRENT_INTERFACE_VERSION 3 #define VMMCTL_IOC_BASE (('V' << 16) | ('M' << 8)) |
