summaryrefslogtreecommitdiff
path: root/usr
diff options
context:
space:
mode:
authorPatrick Mooney <pmooney@pfmooney.com>2022-06-10 23:05:32 +0000
committerPatrick Mooney <pmooney@oxide.computer>2022-06-27 23:20:35 +0000
commit54cf5b63effe805271443d5dd7afd37ec184fbab (patch)
treec2da22ed12b56879537c652b894932f257462d7b /usr
parentea962d11118b10579c946c4ac15559148ddf3cf8 (diff)
downloadillumos-joyent-54cf5b63effe805271443d5dd7afd37ec184fbab.tar.gz
14635 bhyve should expose additional vcpu state
Reviewed by: Luqman Aden <luqman@oxide.computer> Reviewed by: Jordan Paige Hendricks <jordan@oxidecomputer.com> Approved by: Robert Mustacchi <rm@fingolfin.org>
Diffstat (limited to 'usr')
-rw-r--r--usr/src/cmd/bhyvectl/bhyvectl.c447
-rw-r--r--usr/src/uts/intel/io/vmm/amd/svm.c64
-rw-r--r--usr/src/uts/intel/io/vmm/amd/vmcb.c61
-rw-r--r--usr/src/uts/intel/io/vmm/amd/vmcb.h1
-rw-r--r--usr/src/uts/intel/io/vmm/intel/vmcs.c28
-rw-r--r--usr/src/uts/intel/io/vmm/intel/vmcs.h1
-rw-r--r--usr/src/uts/intel/io/vmm/intel/vmx.c242
-rw-r--r--usr/src/uts/intel/io/vmm/sys/vmm_kernel.h23
-rw-r--r--usr/src/uts/intel/io/vmm/vmm.c482
-rw-r--r--usr/src/uts/intel/io/vmm/vmm_sol_dev.c65
-rw-r--r--usr/src/uts/intel/io/vmm/x86.c62
-rw-r--r--usr/src/uts/intel/io/vmm/x86.h85
-rw-r--r--usr/src/uts/intel/sys/vmm.h1
-rw-r--r--usr/src/uts/intel/sys/vmm_data.h42
-rw-r--r--usr/src/uts/intel/sys/vmm_dev.h3
15 files changed, 1029 insertions, 578 deletions
diff --git a/usr/src/cmd/bhyvectl/bhyvectl.c b/usr/src/cmd/bhyvectl/bhyvectl.c
index a6c86fd5fc..3b3caf0d20 100644
--- a/usr/src/cmd/bhyvectl/bhyvectl.c
+++ b/usr/src/cmd/bhyvectl/bhyvectl.c
@@ -51,9 +51,7 @@ __FBSDID("$FreeBSD$");
#include <sys/errno.h>
#include <sys/mman.h>
#include <sys/cpuset.h>
-#ifndef __FreeBSD__
#include <sys/fp.h>
-#endif /* __FreeBSD__ */
#include <stdio.h>
#include <stdlib.h>
@@ -72,6 +70,7 @@ __FBSDID("$FreeBSD$");
#include <machine/specialreg.h>
#include <machine/vmm.h>
#include <machine/vmm_dev.h>
+#include <sys/vmm_data.h>
#include <vmmapi.h>
#include "amd/vmcb.h"
@@ -95,10 +94,8 @@ usage(bool cpu_intel)
" [--cpu=<vcpu_number>]\n"
" [--create]\n"
" [--destroy]\n"
-#ifndef __FreeBSD__
" [--pmtmr-port=ioport]\n"
" [--wrlock-cycle]\n"
-#endif
" [--get-all]\n"
" [--get-stats]\n"
" [--set-desc-ds]\n"
@@ -186,9 +183,6 @@ usage(bool cpu_intel)
" [--get-ldtr]\n"
" [--set-x2apic-state=<state>]\n"
" [--get-x2apic-state]\n"
-#ifdef __FreeBSD__
- " [--unassign-pptdev=<bus/slot/func>]\n"
-#endif
" [--set-mem=<memory in units of MB>]\n"
" [--get-lowmem]\n"
" [--get-highmem]\n"
@@ -307,16 +301,11 @@ static int set_cs, set_ds, set_es, set_fs, set_gs, set_ss, set_tr, set_ldtr;
static int get_cs, get_ds, get_es, get_fs, get_gs, get_ss, get_tr, get_ldtr;
static int set_x2apic_state, get_x2apic_state;
enum x2apic_state x2apic_state;
-#ifdef __FreeBSD__
-static int unassign_pptdev, bus, slot, func;
-#endif
static int run;
static int get_cpu_topology;
-#ifndef __FreeBSD__
static int pmtmr_port;
static int wrlock_cycle;
static int get_fpu;
-#endif
/*
* VMCB specific.
@@ -339,12 +328,13 @@ static int get_cr4_mask, get_cr4_shadow;
static int get_cr3_targets;
static int get_apic_access_addr, get_virtual_apic_addr, get_tpr_threshold;
static int get_msr_bitmap, get_msr_bitmap_address;
+static int get_guest_msrs;
static int get_vpid_asid;
static int get_inst_err, get_exit_ctls, get_entry_ctls;
static int get_host_cr0, get_host_cr3, get_host_cr4;
static int get_host_rip, get_host_rsp;
-static int get_guest_pat, get_host_pat;
-static int get_guest_sysenter, get_vmcs_link;
+static int get_host_pat;
+static int get_vmcs_link;
static int get_exit_reason, get_vmcs_exit_qualification;
static int get_vmcs_exit_interruption_info, get_vmcs_exit_interruption_error;
static int get_vmcs_exit_inst_length;
@@ -406,172 +396,7 @@ dump_vm_run_exitcode(struct vm_exit *vmexit, int vcpu)
#define MSR_AMD7TH_START 0xC0010000
#define MSR_AMD7TH_END 0xC0011FFF
-#ifdef __FreeBSD__
-static const char *
-msr_name(uint32_t msr)
-{
- static char buf[32];
-
- switch(msr) {
- case MSR_TSC:
- return ("MSR_TSC");
- case MSR_EFER:
- return ("MSR_EFER");
- case MSR_STAR:
- return ("MSR_STAR");
- case MSR_LSTAR:
- return ("MSR_LSTAR");
- case MSR_CSTAR:
- return ("MSR_CSTAR");
- case MSR_SF_MASK:
- return ("MSR_SF_MASK");
- case MSR_FSBASE:
- return ("MSR_FSBASE");
- case MSR_GSBASE:
- return ("MSR_GSBASE");
- case MSR_KGSBASE:
- return ("MSR_KGSBASE");
- case MSR_SYSENTER_CS_MSR:
- return ("MSR_SYSENTER_CS_MSR");
- case MSR_SYSENTER_ESP_MSR:
- return ("MSR_SYSENTER_ESP_MSR");
- case MSR_SYSENTER_EIP_MSR:
- return ("MSR_SYSENTER_EIP_MSR");
- case MSR_PAT:
- return ("MSR_PAT");
- }
- snprintf(buf, sizeof(buf), "MSR %#08x", msr);
-
- return (buf);
-}
-
-static inline void
-print_msr_pm(uint64_t msr, int vcpu, int readable, int writeable)
-{
-
- if (readable || writeable) {
- printf("%-20s[%d]\t\t%c%c\n", msr_name(msr), vcpu,
- readable ? 'R' : '-', writeable ? 'W' : '-');
- }
-}
-
-/*
- * Reference APM vol2, section 15.11 MSR Intercepts.
- */
-static void
-dump_amd_msr_pm(const char *bitmap, int vcpu)
-{
- int byte, bit, readable, writeable;
- uint32_t msr;
-
- for (msr = 0; msr < 0x2000; msr++) {
- byte = msr / 4;
- bit = (msr % 4) * 2;
-
- /* Look at MSRs in the range 0x00000000 to 0x00001FFF */
- readable = (bitmap[byte] & (1 << bit)) ? 0 : 1;
- writeable = (bitmap[byte] & (2 << bit)) ? 0 : 1;
- print_msr_pm(msr, vcpu, readable, writeable);
-
- /* Look at MSRs in the range 0xC0000000 to 0xC0001FFF */
- byte += 2048;
- readable = (bitmap[byte] & (1 << bit)) ? 0 : 1;
- writeable = (bitmap[byte] & (2 << bit)) ? 0 : 1;
- print_msr_pm(msr + MSR_AMD6TH_START, vcpu, readable,
- writeable);
-
- /* MSR 0xC0010000 to 0xC0011FF is only for AMD */
- byte += 4096;
- readable = (bitmap[byte] & (1 << bit)) ? 0 : 1;
- writeable = (bitmap[byte] & (2 << bit)) ? 0 : 1;
- print_msr_pm(msr + MSR_AMD7TH_START, vcpu, readable,
- writeable);
- }
-}
-
-/*
- * Reference Intel SDM Vol3 Section 24.6.9 MSR-Bitmap Address
- */
-static void
-dump_intel_msr_pm(const char *bitmap, int vcpu)
-{
- int byte, bit, readable, writeable;
- uint32_t msr;
-
- for (msr = 0; msr < 0x2000; msr++) {
- byte = msr / 8;
- bit = msr & 0x7;
-
- /* Look at MSRs in the range 0x00000000 to 0x00001FFF */
- readable = (bitmap[byte] & (1 << bit)) ? 0 : 1;
- writeable = (bitmap[2048 + byte] & (1 << bit)) ? 0 : 1;
- print_msr_pm(msr, vcpu, readable, writeable);
-
- /* Look at MSRs in the range 0xC0000000 to 0xC0001FFF */
- byte += 1024;
- readable = (bitmap[byte] & (1 << bit)) ? 0 : 1;
- writeable = (bitmap[2048 + byte] & (1 << bit)) ? 0 : 1;
- print_msr_pm(msr + MSR_AMD6TH_START, vcpu, readable,
- writeable);
- }
-}
-
-static int
-dump_msr_bitmap(int vcpu, uint64_t addr, bool cpu_intel)
-{
- int error, fd, map_size;
- const char *bitmap;
-
- error = -1;
- bitmap = MAP_FAILED;
-
- fd = open("/dev/mem", O_RDONLY, 0);
- if (fd < 0) {
- perror("Couldn't open /dev/mem");
- goto done;
- }
-
- if (cpu_intel)
- map_size = PAGE_SIZE;
- else
- map_size = 2 * PAGE_SIZE;
-
- bitmap = mmap(NULL, map_size, PROT_READ, MAP_SHARED, fd, addr);
- if (bitmap == MAP_FAILED) {
- perror("mmap failed");
- goto done;
- }
-
- if (cpu_intel)
- dump_intel_msr_pm(bitmap, vcpu);
- else
- dump_amd_msr_pm(bitmap, vcpu);
-
- error = 0;
-done:
- if (bitmap != MAP_FAILED)
- munmap((void *)bitmap, map_size);
- if (fd >= 0)
- close(fd);
-
- return (error);
-}
-
-static int
-vm_get_vmcs_field(struct vmctx *ctx, int vcpu, int field, uint64_t *ret_val)
-{
-
- return (vm_get_register(ctx, vcpu, VMCS_IDENT(field), ret_val));
-}
-
-static int
-vm_set_vmcs_field(struct vmctx *ctx, int vcpu, int field, uint64_t val)
-{
-
- return (vm_set_register(ctx, vcpu, VMCS_IDENT(field), val));
-}
-#else /* __FreeBSD__ */
-/* VMCS does not allow arbitrary reads/writes */
+/* Until a safe method is created, arbitrary VMCS reads/writes are forbidden */
static int
vm_get_vmcs_field(struct vmctx *ctx, int vcpu, int field, uint64_t *ret_val)
{
@@ -584,29 +409,11 @@ vm_set_vmcs_field(struct vmctx *ctx, int vcpu, int field, uint64_t val)
{
return (EINVAL);
}
-#endif /* __FreeBSD__ */
-
-#ifdef __FreeBSD__
-static int
-vm_get_vmcb_field(struct vmctx *ctx, int vcpu, int off, int bytes,
- uint64_t *ret_val)
-{
-
- return (vm_get_register(ctx, vcpu, VMCB_ACCESS(off, bytes), ret_val));
-}
-static int
-vm_set_vmcb_field(struct vmctx *ctx, int vcpu, int off, int bytes,
- uint64_t val)
-{
-
- return (vm_set_register(ctx, vcpu, VMCB_ACCESS(off, bytes), val));
-}
-#else /* __FreeBSD__ */
-/* Arbitrary VMCB read/write is not allowed */
+/* Until a safe method is created, arbitrary VMCB reads/writes are forbidden */
static int
vm_get_vmcb_field(struct vmctx *ctx, int vcpu, int off, int bytes,
- uint64_t *ret_val)
+ uint64_t *ret_val)
{
*ret_val = 0;
return (0);
@@ -614,11 +421,10 @@ vm_get_vmcb_field(struct vmctx *ctx, int vcpu, int off, int bytes,
static int
vm_set_vmcb_field(struct vmctx *ctx, int vcpu, int off, int bytes,
- uint64_t val)
+ uint64_t val)
{
return (EINVAL);
}
-#endif /* __FreeBSD__ */
enum {
VMNAME = 1000, /* avoid collision with return values from getopt */
@@ -661,9 +467,7 @@ enum {
SET_RTC_TIME,
SET_RTC_NVRAM,
RTC_NVRAM_OFFSET,
-#ifndef __FreeBSD__
PMTMR_PORT,
-#endif
};
static void
@@ -686,38 +490,6 @@ print_cpus(const char *banner, const cpuset_t *cpus)
printf("\n");
}
-#ifdef __FreeBSD__
-static void
-print_intinfo(const char *banner, uint64_t info)
-{
- int type;
-
- printf("%s:\t", banner);
- if (info & VM_INTINFO_VALID) {
- type = info & VM_INTINFO_TYPE;
- switch (type) {
- case VM_INTINFO_HWINTR:
- printf("extint");
- break;
- case VM_INTINFO_NMI:
- printf("nmi");
- break;
- case VM_INTINFO_SWINTR:
- printf("swint");
- break;
- default:
- printf("exception");
- break;
- }
- printf(" vector %d", (int)VM_INTINFO_VECTOR(info));
- if (info & VM_INTINFO_DEL_ERRCODE)
- printf(" errcode %#x", (u_int)(info >> 32));
- } else {
- printf("n/a");
- }
- printf("\n");
-}
-#else /* __FreeBSD__ */
static void
print_intinfo(const char *banner, uint64_t info)
{
@@ -746,7 +518,6 @@ print_intinfo(const char *banner, uint64_t info)
}
printf("\n");
}
-#endif /* __FreeBSD__ */
static bool
cpu_vendor_intel(void)
@@ -1141,7 +912,7 @@ get_misc_vmcs(struct vmctx *ctx, int vcpu)
if (error == 0)
printf("cr4_shadow[%d]\t\t0x%016lx\n", vcpu, cr4shadow);
}
-
+
if (!error && (get_cr3_targets || get_all)) {
uint64_t target_count, target_addr;
error = vm_get_vmcs_field(ctx, vcpu, VMCS_CR3_TARGET_COUNT,
@@ -1214,7 +985,7 @@ get_misc_vmcs(struct vmctx *ctx, int vcpu)
printf("gpa[%d]\t\t0x%016lx\n", vcpu, u64);
}
- if (!error && (get_vmcs_entry_interruption_info ||
+ if (!error && (get_vmcs_entry_interruption_info ||
get_all)) {
error = vm_get_vmcs_field(ctx, vcpu, VMCS_ENTRY_INTR_INFO,&u64);
if (error == 0) {
@@ -1336,7 +1107,7 @@ get_misc_vmcs(struct vmctx *ctx, int vcpu)
printf("vmcs_exit_qualification[%d]\t0x%016lx\n",
vcpu, u64);
}
-
+
return (error);
}
@@ -1549,9 +1320,7 @@ setup_options(bool cpu_intel)
NO_ARG, &get_msr_bitmap, 1 },
{ "get-msr-bitmap-address",
NO_ARG, &get_msr_bitmap_address, 1 },
- { "get-guest-pat", NO_ARG, &get_guest_pat, 1 },
- { "get-guest-sysenter",
- NO_ARG, &get_guest_sysenter, 1 },
+ { "get-guest-msrs", NO_ARG, &get_guest_msrs, 1 },
{ "get-exit-reason",
NO_ARG, &get_exit_reason, 1 },
{ "get-x2apic-state", NO_ARG, &get_x2apic_state, 1 },
@@ -1566,11 +1335,9 @@ setup_options(bool cpu_intel)
{ "get-suspended-cpus", NO_ARG, &get_suspended_cpus, 1 },
{ "get-intinfo", NO_ARG, &get_intinfo, 1 },
{ "get-cpu-topology", NO_ARG, &get_cpu_topology, 1 },
-#ifndef __FreeBSD__
{ "pmtmr-port", REQ_ARG, 0, PMTMR_PORT },
{ "wrlock-cycle", NO_ARG, &wrlock_cycle, 1 },
{ "get-fpu", NO_ARG, &get_fpu, 1 },
-#endif
};
const struct option intel_opts[] = {
@@ -1632,7 +1399,7 @@ setup_options(bool cpu_intel)
const struct option amd_opts[] = {
{ "get-vmcb-intercepts",
NO_ARG, &get_vmcb_intercept, 1 },
- { "get-vmcb-asid",
+ { "get-vmcb-asid",
NO_ARG, &get_vpid_asid, 1 },
{ "get-vmcb-exit-details",
NO_ARG, &get_vmcb_exit_details, 1 },
@@ -1788,7 +1555,6 @@ show_memseg(struct vmctx *ctx)
}
}
-#ifndef __FreeBSD__
static int
show_fpu(struct vmctx *ctx, int vcpu)
{
@@ -1873,7 +1639,87 @@ show_fpu(struct vmctx *ctx, int vcpu)
free(buf);
return (0);
}
-#endif /*__FreeBSD__ */
+
+static const char *
+msr_name(uint32_t msr)
+{
+#define MSR_IDENT_MAP(x) case x: return (#x);
+ switch (msr) {
+ MSR_IDENT_MAP(MSR_PAT)
+ MSR_IDENT_MAP(MSR_SYSENTER_CS_MSR)
+ MSR_IDENT_MAP(MSR_SYSENTER_ESP_MSR)
+ MSR_IDENT_MAP(MSR_SYSENTER_EIP_MSR)
+ MSR_IDENT_MAP(MSR_STAR)
+ MSR_IDENT_MAP(MSR_LSTAR)
+ MSR_IDENT_MAP(MSR_CSTAR)
+ MSR_IDENT_MAP(MSR_SF_MASK)
+ MSR_IDENT_MAP(MSR_FSBASE)
+ MSR_IDENT_MAP(MSR_GSBASE)
+ MSR_IDENT_MAP(MSR_KGSBASE)
+ MSR_IDENT_MAP(MSR_EFER)
+ MSR_IDENT_MAP(MSR_MTRRcap)
+ MSR_IDENT_MAP(MSR_MTRRdefType)
+ case MSR_TSC:
+ return ("MSR_TSC (offset from system boot)");
+ default:
+ return (NULL);
+ }
+}
+
+static int
+show_msrs(struct vmctx *ctx, int vcpu)
+{
+ struct vdi_field_entry_v1 *msrs;
+ struct vm_data_xfer xfer = {
+ .vdx_vcpuid = vcpu,
+ .vdx_class = VDC_MSR,
+ .vdx_version = 1,
+ .vdx_len = 0,
+ .vdx_data = &msrs,
+ };
+ int fd = vm_get_device_fd(ctx);
+ int res;
+
+ /* Figure out how many entries we need to alloc for */
+ res = ioctl(fd, VM_DATA_READ, &xfer);
+ if (res == 0) {
+ return (EINVAL);
+ } else if (errno != ENOSPC) {
+ return (errno);
+ }
+ const uint32_t len = xfer.vdx_result_len;
+ msrs = malloc(len);
+ if (msrs == NULL) {
+ return (ENOMEM);
+ }
+ bzero(msrs, len);
+ xfer.vdx_data = msrs;
+ xfer.vdx_len = len;
+
+ /* Query the actual data, now that we should have an adequate buffer */
+ res = ioctl(fd, VM_DATA_READ, &xfer);
+ if (res != 0) {
+ free(msrs);
+ return (errno);
+ }
+
+ const uint_t count =
+ xfer.vdx_result_len / sizeof (struct vdi_field_entry_v1);
+ for (uint_t i = 0; i < count; i++) {
+ const uint32_t ident = msrs[i].vfe_ident;
+ const uint64_t value = msrs[i].vfe_value;
+
+ const char *name = msr_name(ident);
+
+ if (name != NULL) {
+ printf("msr[%s]\t = %x\n", name, value);
+ } else {
+ printf("msr[%08x]\t = %x\n", ident, value);
+ }
+ }
+ free(msrs);
+ return (0);
+}
int
main(int argc, char *argv[])
@@ -1883,7 +1729,7 @@ main(int argc, char *argv[])
vm_paddr_t gpa_pmap;
struct vm_exit vmexit;
uint64_t rax, cr0, cr2, cr3, cr4, dr0, dr1, dr2, dr3, dr6, dr7;
- uint64_t rsp, rip, rflags, efer, pat;
+ uint64_t rsp, rip, rflags, efer;
uint64_t eptp, bm, addr, u64, pteval[4], *pte, info[2];
struct vmctx *ctx;
cpuset_t cpus;
@@ -2049,21 +1895,12 @@ main(int argc, char *argv[])
case CAPNAME:
capname = optarg;
break;
-#ifdef __FreeBSD__
- case UNASSIGN_PPTDEV:
- unassign_pptdev = 1;
- if (sscanf(optarg, "%d/%d/%d", &bus, &slot, &func) != 3)
- usage(cpu_intel);
- break;
-#endif
case ASSERT_LAPIC_LVT:
assert_lapic_lvt = atoi(optarg);
break;
-#ifndef __FreeBSD__
case PMTMR_PORT:
pmtmr_port = strtoul(optarg, NULL, 16);
break;
-#endif
default:
usage(cpu_intel);
}
@@ -2076,13 +1913,8 @@ main(int argc, char *argv[])
error = 0;
-#ifndef __FreeBSD__
if (!error && create)
error = vm_create(vmname, 0);
-# else
- if (!error && create)
- error = vm_create(vmname);
-#endif /* __FreeBSD__ */
if (!error) {
ctx = vm_open(vmname);
@@ -2094,16 +1926,15 @@ main(int argc, char *argv[])
}
}
-#ifndef __FreeBSD__
if (!error && pmtmr_port) {
error = vm_pmtmr_set_location(ctx, pmtmr_port);
exit(error);
}
+
if (!error && wrlock_cycle) {
error = vm_wrlock_cycle(ctx);
exit(error);
}
-#endif /* __FreeBSD__ */
if (!error && memsize)
error = vm_setup_memory(ctx, memsize, VM_MMAP_ALL);
@@ -2232,11 +2063,6 @@ main(int argc, char *argv[])
if (!error && set_x2apic_state)
error = vm_set_x2apic_state(ctx, vcpu, x2apic_state);
-#ifdef __FreeBSD__
- if (!error && unassign_pptdev)
- error = vm_unassign_pptdev(ctx, bus, slot, func);
-#endif /* __FreeBSD__ */
-
if (!error && set_exception_bitmap) {
if (cpu_intel)
error = vm_set_vmcs_field(ctx, vcpu,
@@ -2273,11 +2099,9 @@ main(int argc, char *argv[])
if (!error)
error = get_all_segments(ctx, vcpu);
-#ifndef __FreeBSD__
if (!error && (get_fpu || get_all)) {
error = show_fpu(ctx, vcpu);
}
-#endif /* __FreeBSD__ */
if (!error) {
if (cpu_intel)
@@ -2285,7 +2109,7 @@ main(int argc, char *argv[])
else
error = get_misc_vmcb(ctx, vcpu);
}
-
+
if (!error && (get_x2apic_state || get_all)) {
error = vm_get_x2apic_state(ctx, vcpu, &x2apic_state);
if (error == 0)
@@ -2340,7 +2164,7 @@ main(int argc, char *argv[])
&tscoff);
else
error = vm_get_vmcb_field(ctx, vcpu,
- VMCB_OFF_TSC_OFFSET,
+ VMCB_OFF_TSC_OFFSET,
8, &tscoff);
if (error == 0)
printf("tsc_offset[%d]\t0x%016lx\n", vcpu, tscoff);
@@ -2348,7 +2172,7 @@ main(int argc, char *argv[])
if (!error && (get_msr_bitmap_address || get_all)) {
if (cpu_intel)
- error = vm_get_vmcs_field(ctx, vcpu, VMCS_MSR_BITMAP,
+ error = vm_get_vmcs_field(ctx, vcpu, VMCS_MSR_BITMAP,
&addr);
else
error = vm_get_vmcb_field(ctx, vcpu,
@@ -2357,90 +2181,27 @@ main(int argc, char *argv[])
printf("msr_bitmap[%d]\t\t%#lx\n", vcpu, addr);
}
- if (!error && (get_msr_bitmap || get_all)) {
- if (cpu_intel) {
- error = vm_get_vmcs_field(ctx, vcpu,
- VMCS_MSR_BITMAP, &addr);
- } else {
- error = vm_get_vmcb_field(ctx, vcpu,
- VMCB_OFF_MSR_PERM, 8,
- &addr);
- }
-
-#ifdef __FreeBSD__
- if (error == 0)
- error = dump_msr_bitmap(vcpu, addr, cpu_intel);
-#else
- /*
- * Skip dumping the MSR bitmap since raw access to the VMCS is
- * currently not possible.
- */
-#endif /* __FreeBSD__ */
- }
-
if (!error && (get_vpid_asid || get_all)) {
uint64_t vpid;
if (cpu_intel)
error = vm_get_vmcs_field(ctx, vcpu, VMCS_VPID, &vpid);
else
- error = vm_get_vmcb_field(ctx, vcpu, VMCB_OFF_ASID,
+ error = vm_get_vmcb_field(ctx, vcpu, VMCB_OFF_ASID,
4, &vpid);
if (error == 0)
- printf("%s[%d]\t\t0x%04lx\n",
+ printf("%s[%d]\t\t0x%04lx\n",
cpu_intel ? "vpid" : "asid", vcpu, vpid);
}
- if (!error && (get_guest_pat || get_all)) {
- if (cpu_intel)
- error = vm_get_vmcs_field(ctx, vcpu,
- VMCS_GUEST_IA32_PAT, &pat);
- else
- error = vm_get_vmcb_field(ctx, vcpu,
- VMCB_OFF_GUEST_PAT, 8, &pat);
- if (error == 0)
- printf("guest_pat[%d]\t\t0x%016lx\n", vcpu, pat);
- }
-
- if (!error && (get_guest_sysenter || get_all)) {
- if (cpu_intel)
- error = vm_get_vmcs_field(ctx, vcpu,
- VMCS_GUEST_IA32_SYSENTER_CS,
- &cs);
- else
- error = vm_get_vmcb_field(ctx, vcpu,
- VMCB_OFF_SYSENTER_CS, 8,
- &cs);
-
- if (error == 0)
- printf("guest_sysenter_cs[%d]\t%#lx\n", vcpu, cs);
- if (cpu_intel)
- error = vm_get_vmcs_field(ctx, vcpu,
- VMCS_GUEST_IA32_SYSENTER_ESP,
- &rsp);
- else
- error = vm_get_vmcb_field(ctx, vcpu,
- VMCB_OFF_SYSENTER_ESP, 8,
- &rsp);
-
- if (error == 0)
- printf("guest_sysenter_sp[%d]\t%#lx\n", vcpu, rsp);
- if (cpu_intel)
- error = vm_get_vmcs_field(ctx, vcpu,
- VMCS_GUEST_IA32_SYSENTER_EIP,
- &rip);
- else
- error = vm_get_vmcb_field(ctx, vcpu,
- VMCB_OFF_SYSENTER_EIP, 8,
- &rip);
- if (error == 0)
- printf("guest_sysenter_ip[%d]\t%#lx\n", vcpu, rip);
+ if (!error && (get_guest_msrs || get_all)) {
+ error = show_msrs(ctx, vcpu);
}
if (!error && (get_exit_reason || get_all)) {
if (cpu_intel)
error = vm_get_vmcs_field(ctx, vcpu, VMCS_EXIT_REASON,
&u64);
- else
+ else
error = vm_get_vmcb_field(ctx, vcpu,
VMCB_OFF_EXIT_REASON, 8,
&u64);
diff --git a/usr/src/uts/intel/io/vmm/amd/svm.c b/usr/src/uts/intel/io/vmm/amd/svm.c
index b699d57991..de4a492ae9 100644
--- a/usr/src/uts/intel/io/vmm/amd/svm.c
+++ b/usr/src/uts/intel/io/vmm/amd/svm.c
@@ -72,7 +72,6 @@ __FBSDID("$FreeBSD$");
#include "vlapic.h"
#include "vlapic_priv.h"
-#include "x86.h"
#include "vmcb.h"
#include "svm.h"
#include "svm_softc.h"
@@ -2257,6 +2256,17 @@ svm_setdesc(void *arg, int vcpu, int reg, const struct seg_desc *desc)
if (SEG_DESC_UNUSABLE(desc->access)) {
seg->attrib &= ~0x80;
}
+ /*
+ * Keep CPL synced with the DPL specified for %ss.
+ *
+ * KVM notes that a SYSRET to non-cpl-3 is possible on AMD
+ * (unlike Intel), but accepts such a possible deviation for
+ * what is otherwise unreasonable behavior for a guest OS, since
+ * they do the same synchronization.
+ */
+ if (reg == VM_REG_GUEST_SS) {
+ vmcb->state.cpl = SEG_DESC_DPL(desc->access);
+ }
break;
case VM_REG_GUEST_GDTR:
@@ -2339,6 +2349,55 @@ svm_getdesc(void *arg, int vcpu, int reg, struct seg_desc *desc)
}
static int
+svm_get_msr(void *arg, int vcpu, uint32_t msr, uint64_t *valp)
+{
+ struct svm_softc *sc = arg;
+ struct vmcb *vmcb = svm_get_vmcb(sc, vcpu);
+ const uint64_t *msrp = vmcb_msr_ptr(vmcb, msr, NULL);
+
+ if (msrp != NULL) {
+ *valp = *msrp;
+ return (0);
+ }
+
+ return (EINVAL);
+}
+
+static int
+svm_set_msr(void *arg, int vcpu, uint32_t msr, uint64_t val)
+{
+ struct svm_softc *sc = arg;
+ struct vmcb *vmcb = svm_get_vmcb(sc, vcpu);
+
+ uint32_t dirty = 0;
+ uint64_t *msrp = vmcb_msr_ptr(vmcb, msr, &dirty);
+ if (msrp == NULL) {
+ return (EINVAL);
+ }
+ switch (msr) {
+ case MSR_EFER:
+ /*
+ * For now, just clone the logic from
+ * svm_setreg():
+ *
+ * EFER_SVM must always be set when the guest is
+ * executing
+ */
+ *msrp = val | EFER_SVM;
+ break;
+ /* TODO: other necessary MSR masking */
+ default:
+ *msrp = val;
+ break;
+ }
+ if (dirty != 0) {
+ svm_set_dirty(sc, vcpu, dirty);
+ }
+ return (0);
+
+}
+
+static int
svm_setcap(void *arg, int vcpu, int type, int val)
{
struct svm_softc *sc;
@@ -2450,4 +2509,7 @@ struct vmm_ops vmm_ops_amd = {
.vmsavectx = svm_savectx,
.vmrestorectx = svm_restorectx,
+
+ .vmgetmsr = svm_get_msr,
+ .vmsetmsr = svm_set_msr,
};
diff --git a/usr/src/uts/intel/io/vmm/amd/vmcb.c b/usr/src/uts/intel/io/vmm/amd/vmcb.c
index 5be5240129..ec2c9674c0 100644
--- a/usr/src/uts/intel/io/vmm/amd/vmcb.c
+++ b/usr/src/uts/intel/io/vmm/amd/vmcb.c
@@ -45,6 +45,7 @@ __FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/systm.h>
+#include <machine/specialreg.h>
#include <machine/vmm.h>
#include "vmcb.h"
@@ -148,3 +149,63 @@ vmcb_regptr(struct vmcb *vmcb, int ident, uint32_t *dirtyp)
}
return (res);
}
+
+uint64_t *
+vmcb_msr_ptr(struct vmcb *vmcb, uint32_t msr, uint32_t *dirtyp)
+{
+ uint64_t *res = NULL;
+ uint32_t dirty = 0;
+ struct vmcb_state *state = &vmcb->state;
+
+ switch (msr) {
+ case MSR_EFER:
+ res = &state->efer;
+ dirty = VMCB_CACHE_CR;
+ break;
+
+ case MSR_GSBASE:
+ res = &state->gs.base;
+ dirty = VMCB_CACHE_SEG;
+ break;
+ case MSR_FSBASE:
+ res = &state->fs.base;
+ dirty = VMCB_CACHE_SEG;
+ break;
+ case MSR_KGSBASE:
+ res = &state->kernelgsbase;
+ break;
+
+ case MSR_STAR:
+ res = &state->star;
+ break;
+ case MSR_LSTAR:
+ res = &state->lstar;
+ break;
+ case MSR_CSTAR:
+ res = &state->cstar;
+ break;
+ case MSR_SF_MASK:
+ res = &state->sfmask;
+ break;
+
+ case MSR_SYSENTER_CS_MSR:
+ res = &state->sysenter_cs;
+ break;
+ case MSR_SYSENTER_ESP_MSR:
+ res = &state->sysenter_esp;
+ break;
+ case MSR_SYSENTER_EIP_MSR:
+ res = &state->sysenter_eip;
+ break;
+
+ case MSR_PAT:
+ res = &state->g_pat;
+ dirty = VMCB_CACHE_NP;
+ break;
+ }
+
+ if (res != NULL && dirtyp != NULL) {
+ *dirtyp = dirty;
+ }
+ return (res);
+}
diff --git a/usr/src/uts/intel/io/vmm/amd/vmcb.h b/usr/src/uts/intel/io/vmm/amd/vmcb.h
index da0f08445c..7a57979d56 100644
--- a/usr/src/uts/intel/io/vmm/amd/vmcb.h
+++ b/usr/src/uts/intel/io/vmm/amd/vmcb.h
@@ -397,6 +397,7 @@ CTASSERT(offsetof(struct vmcb, state) == 0x400);
struct vmcb_segment *vmcb_segptr(struct vmcb *vmcb, int type);
uint64_t *vmcb_regptr(struct vmcb *vmcb, int ident, uint32_t *dirtyp);
+uint64_t *vmcb_msr_ptr(struct vmcb *vmcb, uint32_t ident, uint32_t *dirtyp);
#endif /* _KERNEL */
#endif /* _VMCB_H_ */
diff --git a/usr/src/uts/intel/io/vmm/intel/vmcs.c b/usr/src/uts/intel/io/vmm/intel/vmcs.c
index 7fabba79f7..b5bc8130d9 100644
--- a/usr/src/uts/intel/io/vmm/intel/vmcs.c
+++ b/usr/src/uts/intel/io/vmm/intel/vmcs.c
@@ -165,6 +165,34 @@ vmcs_seg_desc_encoding(int seg, uint32_t *base, uint32_t *lim, uint32_t *acc)
}
}
+uint32_t
+vmcs_msr_encoding(uint32_t msr)
+{
+ switch (msr) {
+ case MSR_PAT:
+ return (VMCS_GUEST_IA32_PAT);
+ case MSR_EFER:
+ return (VMCS_GUEST_IA32_EFER);
+ case MSR_SYSENTER_CS_MSR:
+ return (VMCS_GUEST_IA32_SYSENTER_CS);
+ case MSR_SYSENTER_ESP_MSR:
+ return (VMCS_GUEST_IA32_SYSENTER_ESP);
+ case MSR_SYSENTER_EIP_MSR:
+ return (VMCS_GUEST_IA32_SYSENTER_EIP);
+ /*
+ * While fsbase and gsbase are expected to be accessed (by the VMM) via
+ * the segment descriptor interfaces, we still make it available as MSR
+ * contents as well.
+ */
+ case MSR_FSBASE:
+ return (VMCS_GUEST_FS_BASE);
+ case MSR_GSBASE:
+ return (VMCS_GUEST_GS_BASE);
+ default:
+ return (VMCS_INVALID_ENCODING);
+ }
+}
+
void
vmcs_clear(uintptr_t vmcs_pa)
{
diff --git a/usr/src/uts/intel/io/vmm/intel/vmcs.h b/usr/src/uts/intel/io/vmm/intel/vmcs.h
index 24dc2dd574..9e4a9e3282 100644
--- a/usr/src/uts/intel/io/vmm/intel/vmcs.h
+++ b/usr/src/uts/intel/io/vmm/intel/vmcs.h
@@ -48,6 +48,7 @@ CTASSERT(sizeof (struct vmcs) == PAGE_SIZE);
uint32_t vmcs_field_encoding(int ident);
void vmcs_seg_desc_encoding(int seg, uint32_t *base, uint32_t *lim,
uint32_t *acc);
+uint32_t vmcs_msr_encoding(uint32_t msr);
void vmcs_initialize(struct vmcs *vmcs, uintptr_t vmcs_pa);
diff --git a/usr/src/uts/intel/io/vmm/intel/vmx.c b/usr/src/uts/intel/io/vmm/intel/vmx.c
index 360cec1056..4ef51259ab 100644
--- a/usr/src/uts/intel/io/vmm/intel/vmx.c
+++ b/usr/src/uts/intel/io/vmm/intel/vmx.c
@@ -85,7 +85,6 @@ __FBSDID("$FreeBSD$");
#include "vmcs.h"
#include "vmx.h"
#include "vmx_msr.h"
-#include "x86.h"
#include "vmx_controls.h"
#define PINBASED_CTLS_ONE_SETTING \
@@ -1629,6 +1628,25 @@ vmx_set_guest_reg(struct vmx *vmx, int vcpu, int ident, uint64_t regval)
}
}
+static void
+vmx_sync_efer_state(struct vmx *vmx, int vcpu, uint64_t efer)
+{
+ uint64_t ctrl;
+
+ /*
+ * If the "load EFER" VM-entry control is 1 (which we require) then the
+ * value of EFER.LMA must be identical to "IA-32e mode guest" bit in the
+ * VM-entry control.
+ */
+ ctrl = vmcs_read(VMCS_ENTRY_CTLS);
+ if ((efer & EFER_LMA) != 0) {
+ ctrl |= VM_ENTRY_GUEST_LMA;
+ } else {
+ ctrl &= ~VM_ENTRY_GUEST_LMA;
+ }
+ vmcs_write(VMCS_ENTRY_CTLS, ctrl);
+}
+
static int
vmx_emulate_cr0_access(struct vmx *vmx, int vcpu, uint64_t exitqual)
{
@@ -1655,20 +1673,14 @@ vmx_emulate_cr0_access(struct vmx *vmx, int vcpu, uint64_t exitqual)
vmcs_write(VMCS_GUEST_CR0, crval);
if (regval & CR0_PG) {
- uint64_t efer, entry_ctls;
+ uint64_t efer;
- /*
- * If CR0.PG is 1 and EFER.LME is 1 then EFER.LMA and
- * the "IA-32e mode guest" bit in VM-entry control must be
- * equal.
- */
+ /* Keep EFER.LMA properly updated if paging is enabled */
efer = vmcs_read(VMCS_GUEST_IA32_EFER);
if (efer & EFER_LME) {
efer |= EFER_LMA;
vmcs_write(VMCS_GUEST_IA32_EFER, efer);
- entry_ctls = vmcs_read(VMCS_ENTRY_CTLS);
- entry_ctls |= VM_ENTRY_GUEST_LMA;
- vmcs_write(VMCS_ENTRY_CTLS, entry_ctls);
+ vmx_sync_efer_state(vmx, vcpu, efer);
}
}
@@ -2934,6 +2946,44 @@ vmx_vmcleanup(void *arg)
kmem_free(vmx, sizeof (*vmx));
}
+/*
+ * Ensure that the VMCS for this vcpu is loaded.
+ * Returns true if a VMCS load was required.
+ */
+static bool
+vmx_vmcs_access_ensure(struct vmx *vmx, int vcpu)
+{
+ int hostcpu;
+
+ if (vcpu_is_running(vmx->vm, vcpu, &hostcpu)) {
+ if (hostcpu != curcpu) {
+ panic("unexpected vcpu migration %d != %d",
+ hostcpu, curcpu);
+ }
+ /* Earlier logic already took care of the load */
+ return (false);
+ } else {
+ vmcs_load(vmx->vmcs_pa[vcpu]);
+ return (true);
+ }
+}
+
+static void
+vmx_vmcs_access_done(struct vmx *vmx, int vcpu)
+{
+ int hostcpu;
+
+ if (vcpu_is_running(vmx->vm, vcpu, &hostcpu)) {
+ if (hostcpu != curcpu) {
+ panic("unexpected vcpu migration %d != %d",
+ hostcpu, curcpu);
+ }
+ /* Later logic will take care of the unload */
+ } else {
+ vmcs_clear(vmx->vmcs_pa[vcpu]);
+ }
+}
+
static uint64_t *
vmxctx_regptr(struct vmxctx *vmxctx, int reg)
{
@@ -2989,25 +3039,18 @@ vmxctx_regptr(struct vmxctx *vmxctx, int reg)
static int
vmx_getreg(void *arg, int vcpu, int reg, uint64_t *retval)
{
- int running, hostcpu, err;
struct vmx *vmx = arg;
uint64_t *regp;
- running = vcpu_is_running(vmx->vm, vcpu, &hostcpu);
- if (running && hostcpu != curcpu)
- panic("vmx_getreg: %d is running", vcpu);
-
/* VMCS access not required for ctx reads */
if ((regp = vmxctx_regptr(&vmx->ctx[vcpu], reg)) != NULL) {
*retval = *regp;
return (0);
}
- if (!running) {
- vmcs_load(vmx->vmcs_pa[vcpu]);
- }
+ bool vmcs_loaded = vmx_vmcs_access_ensure(vmx, vcpu);
+ int err = 0;
- err = 0;
if (reg == VM_REG_GUEST_INTR_SHADOW) {
uint64_t gi = vmcs_read(VMCS_GUEST_INTERRUPTIBILITY);
*retval = (gi & HWINTR_BLOCKING) ? 1 : 0;
@@ -3035,33 +3078,26 @@ vmx_getreg(void *arg, int vcpu, int reg, uint64_t *retval)
}
}
- if (!running) {
- vmcs_clear(vmx->vmcs_pa[vcpu]);
+ if (vmcs_loaded) {
+ vmx_vmcs_access_done(vmx, vcpu);
}
-
return (err);
}
static int
vmx_setreg(void *arg, int vcpu, int reg, uint64_t val)
{
- int running, hostcpu, error;
struct vmx *vmx = arg;
uint64_t *regp;
- running = vcpu_is_running(vmx->vm, vcpu, &hostcpu);
- if (running && hostcpu != curcpu)
- panic("vmx_setreg: %d is running", vcpu);
-
/* VMCS access not required for ctx writes */
if ((regp = vmxctx_regptr(&vmx->ctx[vcpu], reg)) != NULL) {
*regp = val;
return (0);
}
- if (!running) {
- vmcs_load(vmx->vmcs_pa[vcpu]);
- }
+ bool vmcs_loaded = vmx_vmcs_access_ensure(vmx, vcpu);
+ int err = 0;
if (reg == VM_REG_GUEST_INTR_SHADOW) {
if (val != 0) {
@@ -3069,39 +3105,24 @@ vmx_setreg(void *arg, int vcpu, int reg, uint64_t val)
* Forcing the vcpu into an interrupt shadow is not
* presently supported.
*/
- error = EINVAL;
+ err = EINVAL;
} else {
uint64_t gi;
gi = vmcs_read(VMCS_GUEST_INTERRUPTIBILITY);
gi &= ~HWINTR_BLOCKING;
vmcs_write(VMCS_GUEST_INTERRUPTIBILITY, gi);
- error = 0;
+ err = 0;
}
} else {
uint32_t encoding;
- error = 0;
+ err = 0;
encoding = vmcs_field_encoding(reg);
switch (encoding) {
case VMCS_GUEST_IA32_EFER:
- /*
- * If the "load EFER" VM-entry control is 1 then the
- * value of EFER.LMA must be identical to "IA-32e mode
- * guest" bit in the VM-entry control.
- */
- if ((entry_ctls & VM_ENTRY_LOAD_EFER) != 0) {
- uint64_t ctls;
-
- ctls = vmcs_read(VMCS_ENTRY_CTLS);
- if (val & EFER_LMA) {
- ctls |= VM_ENTRY_GUEST_LMA;
- } else {
- ctls &= ~VM_ENTRY_GUEST_LMA;
- }
- vmcs_write(VMCS_ENTRY_CTLS, ctls);
- }
vmcs_write(encoding, val);
+ vmx_sync_efer_state(vmx, vcpu, val);
break;
case VMCS_GUEST_CR0:
/*
@@ -3130,10 +3151,11 @@ vmx_setreg(void *arg, int vcpu, int reg, uint64_t val)
* XXX the processor retains global mappings when %cr3
* is updated but vmx_invvpid() does not.
*/
- vmx_invvpid(vmx, vcpu, running);
+ vmx_invvpid(vmx, vcpu,
+ vcpu_is_running(vmx->vm, vcpu, NULL));
break;
case VMCS_INVALID_ENCODING:
- error = EINVAL;
+ err = EINVAL;
break;
default:
vmcs_write(encoding, val);
@@ -3141,27 +3163,19 @@ vmx_setreg(void *arg, int vcpu, int reg, uint64_t val)
}
}
- if (!running) {
- vmcs_clear(vmx->vmcs_pa[vcpu]);
+ if (vmcs_loaded) {
+ vmx_vmcs_access_done(vmx, vcpu);
}
-
- return (error);
+ return (err);
}
static int
vmx_getdesc(void *arg, int vcpu, int seg, struct seg_desc *desc)
{
- int hostcpu, running;
struct vmx *vmx = arg;
uint32_t base, limit, access;
- running = vcpu_is_running(vmx->vm, vcpu, &hostcpu);
- if (running && hostcpu != curcpu)
- panic("vmx_getdesc: %d is running", vcpu);
-
- if (!running) {
- vmcs_load(vmx->vmcs_pa[vcpu]);
- }
+ bool vmcs_loaded = vmx_vmcs_access_ensure(vmx, vcpu);
vmcs_seg_desc_encoding(seg, &base, &limit, &access);
desc->base = vmcs_read(base);
@@ -3172,8 +3186,8 @@ vmx_getdesc(void *arg, int vcpu, int seg, struct seg_desc *desc)
desc->access = 0;
}
- if (!running) {
- vmcs_clear(vmx->vmcs_pa[vcpu]);
+ if (vmcs_loaded) {
+ vmx_vmcs_access_done(vmx, vcpu);
}
return (0);
}
@@ -3181,17 +3195,10 @@ vmx_getdesc(void *arg, int vcpu, int seg, struct seg_desc *desc)
static int
vmx_setdesc(void *arg, int vcpu, int seg, const struct seg_desc *desc)
{
- int hostcpu, running;
struct vmx *vmx = arg;
uint32_t base, limit, access;
- running = vcpu_is_running(vmx->vm, vcpu, &hostcpu);
- if (running && hostcpu != curcpu)
- panic("vmx_setdesc: %d is running", vcpu);
-
- if (!running) {
- vmcs_load(vmx->vmcs_pa[vcpu]);
- }
+ bool vmcs_loaded = vmx_vmcs_access_ensure(vmx, vcpu);
vmcs_seg_desc_encoding(seg, &base, &limit, &access);
vmcs_write(base, desc->base);
@@ -3200,12 +3207,94 @@ vmx_setdesc(void *arg, int vcpu, int seg, const struct seg_desc *desc)
vmcs_write(access, desc->access);
}
- if (!running) {
- vmcs_clear(vmx->vmcs_pa[vcpu]);
+ if (vmcs_loaded) {
+ vmx_vmcs_access_done(vmx, vcpu);
}
return (0);
}
+static uint64_t *
+vmx_msr_ptr(struct vmx *vmx, int vcpu, uint32_t msr)
+{
+ uint64_t *guest_msrs = vmx->guest_msrs[vcpu];
+
+ switch (msr) {
+ case MSR_LSTAR:
+ return (&guest_msrs[IDX_MSR_LSTAR]);
+ case MSR_CSTAR:
+ return (&guest_msrs[IDX_MSR_CSTAR]);
+ case MSR_STAR:
+ return (&guest_msrs[IDX_MSR_STAR]);
+ case MSR_SF_MASK:
+ return (&guest_msrs[IDX_MSR_SF_MASK]);
+ case MSR_KGSBASE:
+ return (&guest_msrs[IDX_MSR_KGSBASE]);
+ case MSR_PAT:
+ return (&guest_msrs[IDX_MSR_PAT]);
+ default:
+ return (NULL);
+ }
+}
+
+static int
+vmx_msr_get(void *arg, int vcpu, uint32_t msr, uint64_t *valp)
+{
+ struct vmx *vmx = arg;
+
+ ASSERT(valp != NULL);
+
+ const uint64_t *msrp = vmx_msr_ptr(vmx, vcpu, msr);
+ if (msrp != NULL) {
+ *valp = *msrp;
+ return (0);
+ }
+
+ const uint32_t vmcs_enc = vmcs_msr_encoding(msr);
+ if (vmcs_enc != VMCS_INVALID_ENCODING) {
+ bool vmcs_loaded = vmx_vmcs_access_ensure(vmx, vcpu);
+
+ *valp = vmcs_read(vmcs_enc);
+
+ if (vmcs_loaded) {
+ vmx_vmcs_access_done(vmx, vcpu);
+ }
+ return (0);
+ }
+
+ return (EINVAL);
+}
+
+static int
+vmx_msr_set(void *arg, int vcpu, uint32_t msr, uint64_t val)
+{
+ struct vmx *vmx = arg;
+
+ /* TODO: mask value */
+
+ uint64_t *msrp = vmx_msr_ptr(vmx, vcpu, msr);
+ if (msrp != NULL) {
+ *msrp = val;
+ return (0);
+ }
+
+ const uint32_t vmcs_enc = vmcs_msr_encoding(msr);
+ if (vmcs_enc != VMCS_INVALID_ENCODING) {
+ bool vmcs_loaded = vmx_vmcs_access_ensure(vmx, vcpu);
+
+ vmcs_write(vmcs_enc, val);
+
+ if (msr == MSR_EFER) {
+ vmx_sync_efer_state(vmx, vcpu, val);
+ }
+
+ if (vmcs_loaded) {
+ vmx_vmcs_access_done(vmx, vcpu);
+ }
+ return (0);
+ }
+ return (EINVAL);
+}
+
static int
vmx_getcap(void *arg, int vcpu, int type, int *retval)
{
@@ -3711,6 +3800,9 @@ struct vmm_ops vmm_ops_intel = {
.vmsavectx = vmx_savectx,
.vmrestorectx = vmx_restorectx,
+
+ .vmgetmsr = vmx_msr_get,
+ .vmsetmsr = vmx_msr_set,
};
/* Side-effect free HW validation derived from checks in vmx_init. */
diff --git a/usr/src/uts/intel/io/vmm/sys/vmm_kernel.h b/usr/src/uts/intel/io/vmm/sys/vmm_kernel.h
index bc7f1bb0f2..1dba79a7bf 100644
--- a/usr/src/uts/intel/io/vmm/sys/vmm_kernel.h
+++ b/usr/src/uts/intel/io/vmm/sys/vmm_kernel.h
@@ -90,6 +90,11 @@ typedef void (*vmi_vlapic_cleanup)(void *vmi, struct vlapic *vlapic);
typedef void (*vmi_savectx)(void *vmi, int vcpu);
typedef void (*vmi_restorectx)(void *vmi, int vcpu);
+typedef int (*vmi_get_msr_t)(void *vmi, int vcpu, uint32_t msr,
+ uint64_t *valp);
+typedef int (*vmi_set_msr_t)(void *vmi, int vcpu, uint32_t msr,
+ uint64_t val);
+
struct vmm_ops {
vmm_init_func_t init; /* module wide initialization */
vmm_cleanup_func_t cleanup;
@@ -109,6 +114,9 @@ struct vmm_ops {
vmi_savectx vmsavectx;
vmi_restorectx vmrestorectx;
+
+ vmi_get_msr_t vmgetmsr;
+ vmi_set_msr_t vmsetmsr;
};
extern struct vmm_ops vmm_ops_intel;
@@ -379,6 +387,19 @@ typedef enum vm_msr_result {
VMR_UNHANLDED = 2, /* handle in userspace, kernel cannot emulate */
} vm_msr_result_t;
+enum vm_cpuid_capability {
+ VCC_NONE,
+ VCC_NO_EXECUTE,
+ VCC_FFXSR,
+ VCC_TCE,
+ VCC_LAST
+};
+
+int x86_emulate_cpuid(struct vm *, int, uint64_t *, uint64_t *, uint64_t *,
+ uint64_t *);
+bool vm_cpuid_capability(struct vm *, int, enum vm_cpuid_capability);
+bool validate_guest_xcr0(uint64_t, uint64_t);
+
void vmm_sol_glue_init(void);
void vmm_sol_glue_cleanup(void);
@@ -445,6 +466,7 @@ typedef struct vmm_data_req {
uint32_t vdr_flags;
uint32_t vdr_len;
void *vdr_data;
+ uint32_t *vdr_result_len;
} vmm_data_req_t;
typedef struct vmm_data_req vmm_data_req_t;
@@ -455,6 +477,7 @@ typedef struct vmm_data_version_entry {
uint16_t vdve_class;
uint16_t vdve_version;
uint16_t vdve_len_expect;
+ uint16_t vdve_len_per_item;
vmm_data_readf_t vdve_readf;
vmm_data_writef_t vdve_writef;
} vmm_data_version_entry_t;
diff --git a/usr/src/uts/intel/io/vmm/vmm.c b/usr/src/uts/intel/io/vmm/vmm.c
index 565dcbbe0a..e85d84d0b6 100644
--- a/usr/src/uts/intel/io/vmm/vmm.c
+++ b/usr/src/uts/intel/io/vmm/vmm.c
@@ -248,6 +248,8 @@ static struct vmm_ops vmm_ops_null = {
.vlapic_cleanup = (vmi_vlapic_cleanup)nullop_panic,
.vmsavectx = (vmi_savectx)nullop_panic,
.vmrestorectx = (vmi_restorectx)nullop_panic,
+ .vmgetmsr = (vmi_get_msr_t)nullop_panic,
+ .vmsetmsr = (vmi_set_msr_t)nullop_panic,
};
static struct vmm_ops *ops = &vmm_ops_null;
@@ -1102,38 +1104,51 @@ vm_assign_pptdev(struct vm *vm, int pptfd)
}
int
-vm_get_register(struct vm *vm, int vcpu, int reg, uint64_t *retval)
+vm_get_register(struct vm *vm, int vcpuid, int reg, uint64_t *retval)
{
-
- if (vcpu < 0 || vcpu >= vm->maxcpus)
+ if (vcpuid < 0 || vcpuid >= vm->maxcpus)
return (EINVAL);
if (reg >= VM_REG_LAST)
return (EINVAL);
- return (VMGETREG(vm->cookie, vcpu, reg, retval));
+ struct vcpu *vcpu = &vm->vcpu[vcpuid];
+ switch (reg) {
+ case VM_REG_GUEST_XCR0:
+ *retval = vcpu->guest_xcr0;
+ return (0);
+ default:
+ return (VMGETREG(vm->cookie, vcpuid, reg, retval));
+ }
}
int
vm_set_register(struct vm *vm, int vcpuid, int reg, uint64_t val)
{
- struct vcpu *vcpu;
- int error;
-
if (vcpuid < 0 || vcpuid >= vm->maxcpus)
return (EINVAL);
if (reg >= VM_REG_LAST)
return (EINVAL);
- error = VMSETREG(vm->cookie, vcpuid, reg, val);
- if (error || reg != VM_REG_GUEST_RIP)
+ int error;
+ struct vcpu *vcpu = &vm->vcpu[vcpuid];
+ switch (reg) {
+ case VM_REG_GUEST_RIP:
+ error = VMSETREG(vm->cookie, vcpuid, reg, val);
+ if (error == 0) {
+ vcpu->nextrip = val;
+ }
return (error);
-
- /* Set 'nextrip' to match the value of %rip */
- vcpu = &vm->vcpu[vcpuid];
- vcpu->nextrip = val;
- return (0);
+ case VM_REG_GUEST_XCR0:
+ if (!validate_guest_xcr0(val, vmm_get_host_xcr0())) {
+ return (EINVAL);
+ }
+ vcpu->guest_xcr0 = val;
+ return (0);
+ default:
+ return (VMSETREG(vm->cookie, vcpuid, reg, val));
+ }
}
static bool
@@ -1864,7 +1879,7 @@ vm_handle_run_state(struct vm *vm, int vcpuid)
}
static int
-vm_rdmtrr(struct vm_mtrr *mtrr, uint32_t num, uint64_t *val)
+vm_rdmtrr(const struct vm_mtrr *mtrr, uint32_t num, uint64_t *val)
{
switch (num) {
case MSR_MTRRcap:
@@ -1945,6 +1960,22 @@ vm_wrmtrr(struct vm_mtrr *mtrr, uint32_t num, uint64_t val)
return (0);
}
+static bool
+is_mtrr_msr(uint32_t msr)
+{
+ switch (msr) {
+ case MSR_MTRRcap:
+ case MSR_MTRRdefType:
+ case MSR_MTRR4kBase ... MSR_MTRR4kBase + 7:
+ case MSR_MTRR16kBase ... MSR_MTRR16kBase + 1:
+ case MSR_MTRR64kBase:
+ case MSR_MTRRVarBase ... MSR_MTRRVarBase + (VMM_MTRR_VAR_MAX * 2) - 1:
+ return (true);
+ default:
+ return (false);
+ }
+}
+
static int
vm_handle_rdmsr(struct vm *vm, int vcpuid, struct vm_exit *vme)
{
@@ -3702,17 +3733,20 @@ vmm_data_is_cpu_specific(uint16_t data_class)
case VDC_MSR:
case VDC_FPU:
case VDC_LAPIC:
- case VDC_VMM_ARCH:
return (true);
default:
return (false);
}
}
-static const vmm_data_version_entry_t *
-vmm_data_find(const vmm_data_req_t *req, int *err)
+static int
+vmm_data_find(const vmm_data_req_t *req, const vmm_data_version_entry_t **resp)
{
const vmm_data_version_entry_t **vdpp, *vdp;
+
+ ASSERT(resp != NULL);
+ ASSERT(req->vdr_result_len != NULL);
+
SET_FOREACH(vdpp, vmm_data_version_entries) {
vdp = *vdpp;
if (vdp->vdve_class == req->vdr_class &&
@@ -3722,15 +3756,15 @@ vmm_data_find(const vmm_data_req_t *req, int *err)
* provider for this data.
*/
if (vdp->vdve_len_expect != 0 &&
- vdp->vdve_len_expect != req->vdr_len) {
- *err = ENOSPC;
- return (NULL);
+ vdp->vdve_len_expect > req->vdr_len) {
+ *req->vdr_result_len = vdp->vdve_len_expect;
+ return (ENOSPC);
}
- return (vdp);
+ *resp = vdp;
+ return (0);
}
}
- *err = EINVAL;
- return (NULL);
+ return (EINVAL);
}
static void *
@@ -3740,10 +3774,11 @@ vmm_data_from_class(const vmm_data_req_t *req, struct vm *vm, int vcpuid)
/* per-cpu data/devices */
case VDC_LAPIC:
return (vm_lapic(vm, vcpuid));
+ case VDC_VMM_ARCH:
+ return (vm);
case VDC_FPU:
case VDC_REGISTER:
- case VDC_VMM_ARCH:
case VDC_MSR:
/*
* These have per-CPU handling which is dispatched outside
@@ -3771,6 +3806,356 @@ vmm_data_from_class(const vmm_data_req_t *req, struct vm *vm, int vcpuid)
}
}
+const uint32_t arch_msr_iter[] = {
+ MSR_EFER,
+
+ /*
+ * While gsbase and fsbase are accessible via the MSR accessors, they
+ * are not included in MSR iteration since they are covered by the
+ * segment descriptor interface too.
+ */
+ MSR_KGSBASE,
+
+ MSR_STAR,
+ MSR_LSTAR,
+ MSR_CSTAR,
+ MSR_SF_MASK,
+
+ MSR_SYSENTER_CS_MSR,
+ MSR_SYSENTER_ESP_MSR,
+ MSR_SYSENTER_EIP_MSR,
+ MSR_PAT,
+};
+const uint32_t generic_msr_iter[] = {
+ MSR_TSC,
+ MSR_MTRRcap,
+ MSR_MTRRdefType,
+
+ MSR_MTRR4kBase, MSR_MTRR4kBase + 1, MSR_MTRR4kBase + 2,
+ MSR_MTRR4kBase + 3, MSR_MTRR4kBase + 4, MSR_MTRR4kBase + 5,
+ MSR_MTRR4kBase + 6, MSR_MTRR4kBase + 7,
+
+ MSR_MTRR16kBase, MSR_MTRR16kBase + 1,
+
+ MSR_MTRR64kBase,
+};
+
+static int
+vmm_data_read_msrs(struct vm *vm, int vcpuid, const vmm_data_req_t *req)
+{
+ VERIFY3U(req->vdr_class, ==, VDC_MSR);
+ VERIFY3U(req->vdr_version, ==, 1);
+
+ const uint_t num_msrs = nitems(arch_msr_iter) + nitems(generic_msr_iter)
+ + (VMM_MTRR_VAR_MAX * 2);
+ const uint32_t output_len =
+ num_msrs * sizeof (struct vdi_field_entry_v1);
+ *req->vdr_result_len = output_len;
+
+ if (req->vdr_len < output_len) {
+ return (ENOSPC);
+ }
+
+ struct vdi_field_entry_v1 *entryp = req->vdr_data;
+ for (uint_t i = 0; i < nitems(arch_msr_iter); i++, entryp++) {
+ const uint32_t msr = arch_msr_iter[i];
+ uint64_t val = 0;
+
+ int err = ops->vmgetmsr(vm->cookie, vcpuid, msr, &val);
+ /* All of these MSRs are expected to work */
+ VERIFY0(err);
+ entryp->vfe_ident = msr;
+ entryp->vfe_value = val;
+ }
+
+ struct vm_mtrr *mtrr = &vm->vcpu[vcpuid].mtrr;
+ for (uint_t i = 0; i < nitems(generic_msr_iter); i++, entryp++) {
+ const uint32_t msr = generic_msr_iter[i];
+
+ entryp->vfe_ident = msr;
+ switch (msr) {
+ case MSR_TSC:
+ /*
+ * Communicate this as the difference from the VM-wide
+ * offset of the boot time.
+ */
+ entryp->vfe_value = vm->vcpu[vcpuid].tsc_offset;
+ break;
+ case MSR_MTRRcap:
+ case MSR_MTRRdefType:
+ case MSR_MTRR4kBase ... MSR_MTRR4kBase + 7:
+ case MSR_MTRR16kBase ... MSR_MTRR16kBase + 1:
+ case MSR_MTRR64kBase: {
+ int err = vm_rdmtrr(mtrr, msr, &entryp->vfe_value);
+ VERIFY0(err);
+ break;
+ }
+ default:
+ panic("unexpected msr export %x", msr);
+ }
+ }
+ /* Copy the variable MTRRs */
+ for (uint_t i = 0; i < (VMM_MTRR_VAR_MAX * 2); i++, entryp++) {
+ const uint32_t msr = MSR_MTRRVarBase + i;
+
+ entryp->vfe_ident = msr;
+ int err = vm_rdmtrr(mtrr, msr, &entryp->vfe_value);
+ VERIFY0(err);
+ }
+ return (0);
+}
+
+static int
+vmm_data_write_msrs(struct vm *vm, int vcpuid, const vmm_data_req_t *req)
+{
+ VERIFY3U(req->vdr_class, ==, VDC_MSR);
+ VERIFY3U(req->vdr_version, ==, 1);
+
+ const struct vdi_field_entry_v1 *entryp = req->vdr_data;
+ const uint_t entry_count =
+ req->vdr_len / sizeof (struct vdi_field_entry_v1);
+ struct vm_mtrr *mtrr = &vm->vcpu[vcpuid].mtrr;
+
+ /*
+ * First make sure that all of the MSRs can be manipulated.
+ * For now, this check is done by going though the getmsr handler
+ */
+ for (uint_t i = 0; i < entry_count; i++, entryp++) {
+ const uint32_t msr = entryp->vfe_ident;
+ uint64_t val;
+ int err = 0;
+
+ switch (msr) {
+ case MSR_TSC:
+ break;
+ default:
+ if (is_mtrr_msr(msr)) {
+ err = vm_rdmtrr(mtrr, msr, &val);
+ } else {
+ err = ops->vmgetmsr(vm->cookie, vcpuid, msr,
+ &val);
+ }
+ break;
+ }
+ if (err != 0) {
+ return (err);
+ }
+ }
+
+ /*
+ * Fairly confident that all of the 'set' operations are at least
+ * targeting valid MSRs, continue on.
+ */
+ entryp = req->vdr_data;
+ for (uint_t i = 0; i < entry_count; i++, entryp++) {
+ const uint32_t msr = entryp->vfe_ident;
+ const uint64_t val = entryp->vfe_value;
+ int err = 0;
+
+ switch (msr) {
+ case MSR_TSC:
+ vm->vcpu[vcpuid].tsc_offset = entryp->vfe_value;
+ break;
+ default:
+ if (is_mtrr_msr(msr)) {
+ if (msr == MSR_MTRRcap) {
+ /*
+ * MTRRcap is read-only. If the current
+ * value matches the incoming one,
+ * consider it a success
+ */
+ uint64_t comp;
+ err = vm_rdmtrr(mtrr, msr, &comp);
+ if (err != 0 || comp != val) {
+ err = EINVAL;
+ }
+ } else {
+ err = vm_wrmtrr(mtrr, msr, val);
+ }
+ } else {
+ err = ops->vmsetmsr(vm->cookie, vcpuid, msr,
+ val);
+ }
+ break;
+ }
+ if (err != 0) {
+ return (err);
+ }
+ }
+ *req->vdr_result_len = entry_count * sizeof (struct vdi_field_entry_v1);
+
+ return (0);
+}
+
+static const vmm_data_version_entry_t msr_v1 = {
+ .vdve_class = VDC_MSR,
+ .vdve_version = 1,
+ .vdve_len_per_item = sizeof (struct vdi_field_entry_v1),
+ /* Requires backend-specific dispatch */
+ .vdve_readf = NULL,
+ .vdve_writef = NULL,
+};
+VMM_DATA_VERSION(msr_v1);
+
+static const uint32_t vmm_arch_v1_fields[] = {
+ VAI_TSC_BOOT_OFFSET,
+ VAI_BOOT_HRTIME,
+ VAI_TSC_FREQ,
+};
+
+static bool
+vmm_read_arch_field(struct vm *vm, uint32_t ident, uint64_t *valp)
+{
+ ASSERT(valp != NULL);
+
+ switch (ident) {
+ case VAI_TSC_BOOT_OFFSET:
+ *valp = vm->boot_tsc_offset;
+ return (true);
+ case VAI_BOOT_HRTIME:
+ *valp = vm->boot_hrtime;
+ return (true);
+ case VAI_TSC_FREQ:
+ /*
+ * Since the system TSC calibration is not public, just derive
+ * it from the scaling functions available.
+ */
+ *valp = unscalehrtime(NANOSEC);
+ return (true);
+ default:
+ break;
+ }
+ return (false);
+}
+
+static int
+vmm_data_read_vmm_arch(void *arg, const vmm_data_req_t *req)
+{
+ struct vm *vm = arg;
+
+ VERIFY3U(req->vdr_class, ==, VDC_VMM_ARCH);
+ VERIFY3U(req->vdr_version, ==, 1);
+
+ struct vdi_field_entry_v1 *entryp = req->vdr_data;
+
+ /* Specific fields requested */
+ if ((req->vdr_flags & VDX_FLAG_READ_COPYIN) != 0) {
+ const uint_t count =
+ req->vdr_len / sizeof (struct vdi_field_entry_v1);
+
+ for (uint_t i = 0; i < count; i++, entryp++) {
+ if (!vmm_read_arch_field(vm, entryp->vfe_ident,
+ &entryp->vfe_value)) {
+ return (EINVAL);
+ }
+ }
+ *req->vdr_result_len =
+ count * sizeof (struct vdi_field_entry_v1);
+ return (0);
+ }
+
+ /* Emit all of the possible values */
+ const uint32_t total_size = nitems(vmm_arch_v1_fields) *
+ sizeof (struct vdi_field_entry_v1);
+ *req->vdr_result_len = total_size;
+ if (req->vdr_len < total_size) {
+ return (ENOSPC);
+ }
+ for (uint_t i = 0; i < nitems(vmm_arch_v1_fields); i++, entryp++) {
+ entryp->vfe_ident = vmm_arch_v1_fields[i];
+ VERIFY(vmm_read_arch_field(vm, entryp->vfe_ident,
+ &entryp->vfe_value));
+ }
+ return (0);
+}
+
+static int
+vmm_data_write_vmm_arch(void *arg, const vmm_data_req_t *req)
+{
+ struct vm *vm = arg;
+
+ VERIFY3U(req->vdr_class, ==, VDC_VMM_ARCH);
+ VERIFY3U(req->vdr_version, ==, 1);
+
+ const struct vdi_field_entry_v1 *entryp = req->vdr_data;
+ const uint_t entry_count =
+ req->vdr_len / sizeof (struct vdi_field_entry_v1);
+
+ for (uint_t i = 0; i < entry_count; i++, entryp++) {
+ const uint64_t val = entryp->vfe_value;
+
+ switch (entryp->vfe_ident) {
+ case VAI_TSC_BOOT_OFFSET:
+ vm->boot_tsc_offset = val;
+ break;
+ case VAI_BOOT_HRTIME:
+ vm->boot_hrtime = val;
+ break;
+ case VAI_TSC_FREQ:
+ /* Guest TSC frequency not (currently) adjustable */
+ return (EPERM);
+ default:
+ return (EINVAL);
+ }
+ }
+ *req->vdr_result_len = entry_count * sizeof (struct vdi_field_entry_v1);
+ return (0);
+}
+
+static const vmm_data_version_entry_t vmm_arch_v1 = {
+ .vdve_class = VDC_VMM_ARCH,
+ .vdve_version = 1,
+ .vdve_len_per_item = sizeof (struct vdi_field_entry_v1),
+ .vdve_readf = vmm_data_read_vmm_arch,
+ .vdve_writef = vmm_data_write_vmm_arch,
+};
+VMM_DATA_VERSION(vmm_arch_v1);
+
+static int
+vmm_data_read_versions(void *arg, const vmm_data_req_t *req)
+{
+ VERIFY3U(req->vdr_class, ==, VDC_VERSION);
+ VERIFY3U(req->vdr_version, ==, 1);
+
+ const uint32_t total_size = SET_COUNT(vmm_data_version_entries) *
+ sizeof (struct vdi_version_entry_v1);
+
+ /* Make sure there is room for all of the entries */
+ *req->vdr_result_len = total_size;
+ if (req->vdr_len < *req->vdr_result_len) {
+ return (ENOSPC);
+ }
+
+ struct vdi_version_entry_v1 *entryp = req->vdr_data;
+ const vmm_data_version_entry_t **vdpp;
+ SET_FOREACH(vdpp, vmm_data_version_entries) {
+ const vmm_data_version_entry_t *vdp = *vdpp;
+
+ entryp->vve_class = vdp->vdve_class;
+ entryp->vve_version = vdp->vdve_version;
+ entryp->vve_len_expect = vdp->vdve_len_expect;
+ entryp->vve_len_per_item = vdp->vdve_len_per_item;
+ entryp++;
+ }
+ return (0);
+}
+
+static int
+vmm_data_write_versions(void *arg, const vmm_data_req_t *req)
+{
+ /* Writing to the version information makes no sense */
+ return (EPERM);
+}
+
+static const vmm_data_version_entry_t versions_v1 = {
+ .vdve_class = VDC_VERSION,
+ .vdve_version = 1,
+ .vdve_len_per_item = sizeof (struct vdi_version_entry_v1),
+ .vdve_readf = vmm_data_read_versions,
+ .vdve_writef = vmm_data_write_versions,
+};
+VMM_DATA_VERSION(versions_v1);
+
int
vmm_data_read(struct vm *vm, int vcpuid, const vmm_data_req_t *req)
{
@@ -3782,28 +4167,34 @@ vmm_data_read(struct vm *vm, int vcpuid, const vmm_data_req_t *req)
}
}
- const vmm_data_version_entry_t *entry;
- entry = vmm_data_find(req, &err);
- if (entry == NULL) {
- ASSERT(err != 0);
+ const vmm_data_version_entry_t *entry = NULL;
+ err = vmm_data_find(req, &entry);
+ if (err != 0) {
return (err);
}
+ ASSERT(entry != NULL);
void *datap = vmm_data_from_class(req, vm, vcpuid);
if (datap != NULL) {
err = entry->vdve_readf(datap, req);
+
+ /*
+ * Successful reads of fixed-length data should populate the
+ * length of that result.
+ */
+ if (err == 0 && entry->vdve_len_expect != 0) {
+ *req->vdr_result_len = entry->vdve_len_expect;
+ }
} else {
switch (req->vdr_class) {
+ case VDC_MSR:
+ err = vmm_data_read_msrs(vm, vcpuid, req);
+ break;
case VDC_FPU:
/* TODO: wire up to xsave export via hma_fpu iface */
err = EINVAL;
break;
case VDC_REGISTER:
- case VDC_VMM_ARCH:
- case VDC_MSR:
- /* TODO: implement */
- err = EINVAL;
- break;
default:
err = EINVAL;
break;
@@ -3824,28 +4215,33 @@ vmm_data_write(struct vm *vm, int vcpuid, const vmm_data_req_t *req)
}
}
- const vmm_data_version_entry_t *entry;
- entry = vmm_data_find(req, &err);
- if (entry == NULL) {
- ASSERT(err != 0);
+ const vmm_data_version_entry_t *entry = NULL;
+ err = vmm_data_find(req, &entry);
+ if (err != 0) {
return (err);
}
+ ASSERT(entry != NULL);
void *datap = vmm_data_from_class(req, vm, vcpuid);
if (datap != NULL) {
err = entry->vdve_writef(datap, req);
+ /*
+ * Successful writes of fixed-length data should populate the
+ * length of that result.
+ */
+ if (err == 0 && entry->vdve_len_expect != 0) {
+ *req->vdr_result_len = entry->vdve_len_expect;
+ }
} else {
switch (req->vdr_class) {
+ case VDC_MSR:
+ err = vmm_data_write_msrs(vm, vcpuid, req);
+ break;
case VDC_FPU:
/* TODO: wire up to xsave import via hma_fpu iface */
err = EINVAL;
break;
case VDC_REGISTER:
- case VDC_VMM_ARCH:
- case VDC_MSR:
- /* TODO: implement */
- err = EINVAL;
- break;
default:
err = EINVAL;
break;
diff --git a/usr/src/uts/intel/io/vmm/vmm_sol_dev.c b/usr/src/uts/intel/io/vmm/vmm_sol_dev.c
index 9a4693fc78..ee07779b21 100644
--- a/usr/src/uts/intel/io/vmm/vmm_sol_dev.c
+++ b/usr/src/uts/intel/io/vmm/vmm_sol_dev.c
@@ -1550,32 +1550,48 @@ vmmdev_do_ioctl(vmm_softc_t *sc, int cmd, intptr_t arg, int md,
}
const size_t len = vdx.vdx_len;
- void *buf = kmem_alloc(len, KM_SLEEP);
- if ((vdx.vdx_flags & VDX_FLAG_READ_COPYIN) != 0) {
- if (ddi_copyin(vdx.vdx_data, buf, len, md) != 0) {
+ void *buf = NULL;
+ if (len != 0) {
+ buf = kmem_alloc(len, KM_SLEEP);
+ if ((vdx.vdx_flags & VDX_FLAG_READ_COPYIN) != 0 &&
+ ddi_copyin(vdx.vdx_data, buf, len, md) != 0) {
kmem_free(buf, len);
error = EFAULT;
break;
+ } else {
+ bzero(buf, len);
}
- } else {
- bzero(buf, len);
}
+ vdx.vdx_result_len = 0;
vmm_data_req_t req = {
.vdr_class = vdx.vdx_class,
.vdr_version = vdx.vdx_version,
.vdr_flags = vdx.vdx_flags,
- .vdr_len = vdx.vdx_len,
+ .vdr_len = len,
.vdr_data = buf,
+ .vdr_result_len = &vdx.vdx_result_len,
};
error = vmm_data_read(sc->vmm_vm, vdx.vdx_vcpuid, &req);
- if (error == 0) {
+ if (error == 0 && buf != NULL) {
if (ddi_copyout(buf, vdx.vdx_data, len, md) != 0) {
error = EFAULT;
}
}
- kmem_free(buf, len);
+
+ /*
+ * Copy out the transfer request so that the value of
+ * vdx_result_len can be made available, regardless of any
+ * error(s) which may have occurred.
+ */
+ if (ddi_copyout(&vdx, datap, sizeof (vdx), md) != 0) {
+ error = (error != 0) ? error : EFAULT;
+ }
+
+ if (buf != NULL) {
+ kmem_free(buf, len);
+ }
break;
}
case VM_DATA_WRITE: {
@@ -1595,19 +1611,24 @@ vmmdev_do_ioctl(vmm_softc_t *sc, int cmd, intptr_t arg, int md,
}
const size_t len = vdx.vdx_len;
- void *buf = kmem_alloc(len, KM_SLEEP);
- if (ddi_copyin(vdx.vdx_data, buf, len, md) != 0) {
- kmem_free(buf, len);
- error = EFAULT;
- break;
+ void *buf = NULL;
+ if (len != 0) {
+ buf = kmem_alloc(len, KM_SLEEP);
+ if (ddi_copyin(vdx.vdx_data, buf, len, md) != 0) {
+ kmem_free(buf, len);
+ error = EFAULT;
+ break;
+ }
}
+ vdx.vdx_result_len = 0;
vmm_data_req_t req = {
.vdr_class = vdx.vdx_class,
.vdr_version = vdx.vdx_version,
.vdr_flags = vdx.vdx_flags,
- .vdr_len = vdx.vdx_len,
+ .vdr_len = len,
.vdr_data = buf,
+ .vdr_result_len = &vdx.vdx_result_len,
};
if (vmm_allow_state_writes == 0) {
/* XXX: Play it safe for now */
@@ -1617,13 +1638,25 @@ vmmdev_do_ioctl(vmm_softc_t *sc, int cmd, intptr_t arg, int md,
&req);
}
- if (error == 0 &&
+ if (error == 0 && buf != NULL &&
(vdx.vdx_flags & VDX_FLAG_WRITE_COPYOUT) != 0) {
if (ddi_copyout(buf, vdx.vdx_data, len, md) != 0) {
error = EFAULT;
}
}
- kmem_free(buf, len);
+
+ /*
+ * Copy out the transfer request so that the value of
+ * vdx_result_len can be made available, regardless of any
+ * error(s) which may have occurred.
+ */
+ if (ddi_copyout(&vdx, datap, sizeof (vdx), md) != 0) {
+ error = (error != 0) ? error : EFAULT;
+ }
+
+ if (buf != NULL) {
+ kmem_free(buf, len);
+ }
break;
}
diff --git a/usr/src/uts/intel/io/vmm/x86.c b/usr/src/uts/intel/io/vmm/x86.c
index de48ba1d48..e593e0c04e 100644
--- a/usr/src/uts/intel/io/vmm/x86.c
+++ b/usr/src/uts/intel/io/vmm/x86.c
@@ -58,10 +58,10 @@ __FBSDID("$FreeBSD$");
#include <machine/specialreg.h>
#include <machine/vmm.h>
+#include <sys/vmm_kernel.h>
#include "vmm_host.h"
#include "vmm_util.h"
-#include "x86.h"
SYSCTL_DECL(_hw_vmm);
@@ -80,6 +80,42 @@ static int cpuid_leaf_b = 1;
*/
static int vmm_force_invariant_tsc = 0;
+#define CPUID_0000_0000 (0x0)
+#define CPUID_0000_0001 (0x1)
+#define CPUID_0000_0002 (0x2)
+#define CPUID_0000_0003 (0x3)
+#define CPUID_0000_0004 (0x4)
+#define CPUID_0000_0006 (0x6)
+#define CPUID_0000_0007 (0x7)
+#define CPUID_0000_000A (0xA)
+#define CPUID_0000_000B (0xB)
+#define CPUID_0000_000D (0xD)
+#define CPUID_0000_000F (0xF)
+#define CPUID_0000_0010 (0x10)
+#define CPUID_0000_0015 (0x15)
+#define CPUID_8000_0000 (0x80000000)
+#define CPUID_8000_0001 (0x80000001)
+#define CPUID_8000_0002 (0x80000002)
+#define CPUID_8000_0003 (0x80000003)
+#define CPUID_8000_0004 (0x80000004)
+#define CPUID_8000_0006 (0x80000006)
+#define CPUID_8000_0007 (0x80000007)
+#define CPUID_8000_0008 (0x80000008)
+#define CPUID_8000_001D (0x8000001D)
+#define CPUID_8000_001E (0x8000001E)
+
+/*
+ * CPUID instruction Fn0000_0001:
+ */
+#define CPUID_0000_0001_APICID_MASK (0xff<<24)
+#define CPUID_0000_0001_APICID_SHIFT 24
+
+/*
+ * CPUID instruction Fn0000_0001 ECX
+ */
+#define CPUID_0000_0001_FEAT0_VMX (1<<5)
+
+
/*
* Round up to the next power of two, if necessary, and then take log2.
* Returns -1 if argument is zero.
@@ -649,6 +685,10 @@ default_leaf:
return (1);
}
+/*
+ * Return 'true' if the capability 'cap' is enabled in this virtual cpu
+ * and 'false' otherwise.
+ */
bool
vm_cpuid_capability(struct vm *vm, int vcpuid, enum vm_cpuid_capability cap)
{
@@ -690,3 +730,23 @@ vm_cpuid_capability(struct vm *vm, int vcpuid, enum vm_cpuid_capability cap)
}
return (rv);
}
+
+bool
+validate_guest_xcr0(uint64_t val, uint64_t limit_mask)
+{
+ /* x87 feature must be enabled */
+ if ((val & XFEATURE_ENABLED_X87) == 0) {
+ return (false);
+ }
+ /* AVX cannot be enabled without SSE */
+ if ((val & (XFEATURE_ENABLED_SSE | XFEATURE_ENABLED_AVX)) ==
+ XFEATURE_ENABLED_SSE) {
+ return (false);
+ }
+ /* No bits should be outside what we dictate to be allowed */
+ if ((val & ~limit_mask) != 0) {
+ return (false);
+ }
+
+ return (true);
+}
diff --git a/usr/src/uts/intel/io/vmm/x86.h b/usr/src/uts/intel/io/vmm/x86.h
deleted file mode 100644
index f3459e4f8a..0000000000
--- a/usr/src/uts/intel/io/vmm/x86.h
+++ /dev/null
@@ -1,85 +0,0 @@
-/*-
- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
- *
- * Copyright (c) 2011 NetApp, Inc.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * $FreeBSD$
- */
-
-#ifndef _X86_H_
-#define _X86_H_
-
-#define CPUID_0000_0000 (0x0)
-#define CPUID_0000_0001 (0x1)
-#define CPUID_0000_0002 (0x2)
-#define CPUID_0000_0003 (0x3)
-#define CPUID_0000_0004 (0x4)
-#define CPUID_0000_0006 (0x6)
-#define CPUID_0000_0007 (0x7)
-#define CPUID_0000_000A (0xA)
-#define CPUID_0000_000B (0xB)
-#define CPUID_0000_000D (0xD)
-#define CPUID_0000_000F (0xF)
-#define CPUID_0000_0010 (0x10)
-#define CPUID_0000_0015 (0x15)
-#define CPUID_8000_0000 (0x80000000)
-#define CPUID_8000_0001 (0x80000001)
-#define CPUID_8000_0002 (0x80000002)
-#define CPUID_8000_0003 (0x80000003)
-#define CPUID_8000_0004 (0x80000004)
-#define CPUID_8000_0006 (0x80000006)
-#define CPUID_8000_0007 (0x80000007)
-#define CPUID_8000_0008 (0x80000008)
-#define CPUID_8000_001D (0x8000001D)
-#define CPUID_8000_001E (0x8000001E)
-
-/*
- * CPUID instruction Fn0000_0001:
- */
-#define CPUID_0000_0001_APICID_MASK (0xff<<24)
-#define CPUID_0000_0001_APICID_SHIFT 24
-
-/*
- * CPUID instruction Fn0000_0001 ECX
- */
-#define CPUID_0000_0001_FEAT0_VMX (1<<5)
-
-int x86_emulate_cpuid(struct vm *vm, int vcpu_id, uint64_t *rax, uint64_t *rbx,
- uint64_t *rcx, uint64_t *rdx);
-
-enum vm_cpuid_capability {
- VCC_NONE,
- VCC_NO_EXECUTE,
- VCC_FFXSR,
- VCC_TCE,
- VCC_LAST
-};
-
-/*
- * Return 'true' if the capability 'cap' is enabled in this virtual cpu
- * and 'false' otherwise.
- */
-bool vm_cpuid_capability(struct vm *vm, int vcpuid, enum vm_cpuid_capability);
-#endif
diff --git a/usr/src/uts/intel/sys/vmm.h b/usr/src/uts/intel/sys/vmm.h
index 268b2e82ce..50d76ab17c 100644
--- a/usr/src/uts/intel/sys/vmm.h
+++ b/usr/src/uts/intel/sys/vmm.h
@@ -103,6 +103,7 @@ enum vm_reg_name {
VM_REG_GUEST_DR3,
VM_REG_GUEST_DR6,
VM_REG_GUEST_ENTRY_INST_LENGTH,
+ VM_REG_GUEST_XCR0,
VM_REG_LAST
};
diff --git a/usr/src/uts/intel/sys/vmm_data.h b/usr/src/uts/intel/sys/vmm_data.h
index 1b8614543c..9ba385c5d6 100644
--- a/usr/src/uts/intel/sys/vmm_data.h
+++ b/usr/src/uts/intel/sys/vmm_data.h
@@ -18,7 +18,6 @@
#define _VMM_DATA_H_
/* VMM Data Classes */
-#define VDC_META 0 /* Meta information about data system */
#define VDC_VERSION 1 /* Version information for each data class */
/* Classes bearing per-CPU data */
@@ -42,21 +41,27 @@
/* VMM Data Identifiers */
-
-/*
- * VDC_REGISTER:
- */
-
/*
- * VDC_MSR:
+ * Generic field encoding for 64-bit (or smaller) data which are identified by a
+ * 32-bit (or smaller) name.
*
- * Use MSR identifiers directly
+ * Used by the following classes/version:
+ * - VDC_REGISTER v1: `vm_reg_name` identifiers
+ * - VDC_MSR v1: MSR identifiers
+ * - VDC_VMM_ARCH v1: Identifiers described below
*/
-
-struct vdi_msr_entry_v1 {
- uint32_t vme_msr;
+struct vdi_field_entry_v1 {
+ uint32_t vfe_ident;
uint32_t _pad;
- uint64_t vme_value;
+ uint64_t vfe_value;
+};
+
+/* VDC_VERSION */
+struct vdi_version_entry_v1 {
+ uint16_t vve_class;
+ uint16_t vve_version;
+ uint16_t vve_len_expect;
+ uint16_t vve_len_per_item;
};
/*
@@ -98,11 +103,22 @@ struct vdi_lapic_v1 {
uint32_t vl_esr_pending;
};
-
/*
* VDC_VMM_ARCH:
*/
+/*
+ * Version 1 identifiers:
+ */
+
+/* Offset of guest TSC from system at time of boot */
+#define VAI_TSC_BOOT_OFFSET 1
+/* Time that guest (nominally) booted, as hrtime */
+#define VAI_BOOT_HRTIME 2
+/* Guest TSC frequency measured by hrtime (not effected by wall clock adj.) */
+#define VAI_TSC_FREQ 3
+
+
/* VDC_IOAPIC: */
struct vdi_ioapic_v1 {
diff --git a/usr/src/uts/intel/sys/vmm_dev.h b/usr/src/uts/intel/sys/vmm_dev.h
index fc8ccf406e..8d1b2713dd 100644
--- a/usr/src/uts/intel/sys/vmm_dev.h
+++ b/usr/src/uts/intel/sys/vmm_dev.h
@@ -366,6 +366,7 @@ struct vm_data_xfer {
uint16_t vdx_version;
uint32_t vdx_flags;
uint32_t vdx_len;
+ uint32_t vdx_result_len;
void *vdx_data;
};
@@ -384,7 +385,7 @@ struct vm_data_xfer {
* best-effort activity. Nothing is to be inferred about the magnitude of a
* change when the version is modified. It follows no rules like semver.
*/
-#define VMM_CURRENT_INTERFACE_VERSION 2
+#define VMM_CURRENT_INTERFACE_VERSION 3
#define VMMCTL_IOC_BASE (('V' << 16) | ('M' << 8))