diff options
author | Dan McDonald <danmcd@mnx.io> | 2022-06-28 09:57:47 -0400 |
---|---|---|
committer | Dan McDonald <danmcd@mnx.io> | 2022-06-28 09:57:47 -0400 |
commit | fd7948755fd156e375263ea9fb2742dbf6e210bc (patch) | |
tree | 185f5776972facf2410d3344af3253c432e4e2d5 | |
parent | be744fbba3271d21a8969d9ca80671ddeffdd23f (diff) | |
parent | 265cb9426334857d58c5981579dc08d38aaae3d9 (diff) | |
download | illumos-joyent-fd7948755fd156e375263ea9fb2742dbf6e210bc.tar.gz |
[illumos-gate merge]
commit 265cb9426334857d58c5981579dc08d38aaae3d9
14757 scsi_pkt(9s) references pkt_scdblen instead of pkt_scblen
commit 54cf5b63effe805271443d5dd7afd37ec184fbab
14635 bhyve should expose additional vcpu state
-rw-r--r-- | usr/src/cmd/bhyvectl/bhyvectl.c | 447 | ||||
-rw-r--r-- | usr/src/man/man9s/scsi_pkt.9s | 948 | ||||
-rw-r--r-- | usr/src/uts/intel/io/vmm/amd/svm.c | 64 | ||||
-rw-r--r-- | usr/src/uts/intel/io/vmm/amd/vmcb.c | 61 | ||||
-rw-r--r-- | usr/src/uts/intel/io/vmm/amd/vmcb.h | 1 | ||||
-rw-r--r-- | usr/src/uts/intel/io/vmm/intel/vmcs.c | 28 | ||||
-rw-r--r-- | usr/src/uts/intel/io/vmm/intel/vmcs.h | 1 | ||||
-rw-r--r-- | usr/src/uts/intel/io/vmm/intel/vmx.c | 242 | ||||
-rw-r--r-- | usr/src/uts/intel/io/vmm/sys/vmm_kernel.h | 23 | ||||
-rw-r--r-- | usr/src/uts/intel/io/vmm/vmm.c | 482 | ||||
-rw-r--r-- | usr/src/uts/intel/io/vmm/vmm_sol_dev.c | 65 | ||||
-rw-r--r-- | usr/src/uts/intel/io/vmm/x86.c | 62 | ||||
-rw-r--r-- | usr/src/uts/intel/io/vmm/x86.h | 85 | ||||
-rw-r--r-- | usr/src/uts/intel/sys/vmm.h | 1 | ||||
-rw-r--r-- | usr/src/uts/intel/sys/vmm_data.h | 42 | ||||
-rw-r--r-- | usr/src/uts/intel/sys/vmm_dev.h | 3 |
16 files changed, 1335 insertions, 1220 deletions
diff --git a/usr/src/cmd/bhyvectl/bhyvectl.c b/usr/src/cmd/bhyvectl/bhyvectl.c index a6c86fd5fc..3b3caf0d20 100644 --- a/usr/src/cmd/bhyvectl/bhyvectl.c +++ b/usr/src/cmd/bhyvectl/bhyvectl.c @@ -51,9 +51,7 @@ __FBSDID("$FreeBSD$"); #include <sys/errno.h> #include <sys/mman.h> #include <sys/cpuset.h> -#ifndef __FreeBSD__ #include <sys/fp.h> -#endif /* __FreeBSD__ */ #include <stdio.h> #include <stdlib.h> @@ -72,6 +70,7 @@ __FBSDID("$FreeBSD$"); #include <machine/specialreg.h> #include <machine/vmm.h> #include <machine/vmm_dev.h> +#include <sys/vmm_data.h> #include <vmmapi.h> #include "amd/vmcb.h" @@ -95,10 +94,8 @@ usage(bool cpu_intel) " [--cpu=<vcpu_number>]\n" " [--create]\n" " [--destroy]\n" -#ifndef __FreeBSD__ " [--pmtmr-port=ioport]\n" " [--wrlock-cycle]\n" -#endif " [--get-all]\n" " [--get-stats]\n" " [--set-desc-ds]\n" @@ -186,9 +183,6 @@ usage(bool cpu_intel) " [--get-ldtr]\n" " [--set-x2apic-state=<state>]\n" " [--get-x2apic-state]\n" -#ifdef __FreeBSD__ - " [--unassign-pptdev=<bus/slot/func>]\n" -#endif " [--set-mem=<memory in units of MB>]\n" " [--get-lowmem]\n" " [--get-highmem]\n" @@ -307,16 +301,11 @@ static int set_cs, set_ds, set_es, set_fs, set_gs, set_ss, set_tr, set_ldtr; static int get_cs, get_ds, get_es, get_fs, get_gs, get_ss, get_tr, get_ldtr; static int set_x2apic_state, get_x2apic_state; enum x2apic_state x2apic_state; -#ifdef __FreeBSD__ -static int unassign_pptdev, bus, slot, func; -#endif static int run; static int get_cpu_topology; -#ifndef __FreeBSD__ static int pmtmr_port; static int wrlock_cycle; static int get_fpu; -#endif /* * VMCB specific. @@ -339,12 +328,13 @@ static int get_cr4_mask, get_cr4_shadow; static int get_cr3_targets; static int get_apic_access_addr, get_virtual_apic_addr, get_tpr_threshold; static int get_msr_bitmap, get_msr_bitmap_address; +static int get_guest_msrs; static int get_vpid_asid; static int get_inst_err, get_exit_ctls, get_entry_ctls; static int get_host_cr0, get_host_cr3, get_host_cr4; static int get_host_rip, get_host_rsp; -static int get_guest_pat, get_host_pat; -static int get_guest_sysenter, get_vmcs_link; +static int get_host_pat; +static int get_vmcs_link; static int get_exit_reason, get_vmcs_exit_qualification; static int get_vmcs_exit_interruption_info, get_vmcs_exit_interruption_error; static int get_vmcs_exit_inst_length; @@ -406,172 +396,7 @@ dump_vm_run_exitcode(struct vm_exit *vmexit, int vcpu) #define MSR_AMD7TH_START 0xC0010000 #define MSR_AMD7TH_END 0xC0011FFF -#ifdef __FreeBSD__ -static const char * -msr_name(uint32_t msr) -{ - static char buf[32]; - - switch(msr) { - case MSR_TSC: - return ("MSR_TSC"); - case MSR_EFER: - return ("MSR_EFER"); - case MSR_STAR: - return ("MSR_STAR"); - case MSR_LSTAR: - return ("MSR_LSTAR"); - case MSR_CSTAR: - return ("MSR_CSTAR"); - case MSR_SF_MASK: - return ("MSR_SF_MASK"); - case MSR_FSBASE: - return ("MSR_FSBASE"); - case MSR_GSBASE: - return ("MSR_GSBASE"); - case MSR_KGSBASE: - return ("MSR_KGSBASE"); - case MSR_SYSENTER_CS_MSR: - return ("MSR_SYSENTER_CS_MSR"); - case MSR_SYSENTER_ESP_MSR: - return ("MSR_SYSENTER_ESP_MSR"); - case MSR_SYSENTER_EIP_MSR: - return ("MSR_SYSENTER_EIP_MSR"); - case MSR_PAT: - return ("MSR_PAT"); - } - snprintf(buf, sizeof(buf), "MSR %#08x", msr); - - return (buf); -} - -static inline void -print_msr_pm(uint64_t msr, int vcpu, int readable, int writeable) -{ - - if (readable || writeable) { - printf("%-20s[%d]\t\t%c%c\n", msr_name(msr), vcpu, - readable ? 'R' : '-', writeable ? 'W' : '-'); - } -} - -/* - * Reference APM vol2, section 15.11 MSR Intercepts. - */ -static void -dump_amd_msr_pm(const char *bitmap, int vcpu) -{ - int byte, bit, readable, writeable; - uint32_t msr; - - for (msr = 0; msr < 0x2000; msr++) { - byte = msr / 4; - bit = (msr % 4) * 2; - - /* Look at MSRs in the range 0x00000000 to 0x00001FFF */ - readable = (bitmap[byte] & (1 << bit)) ? 0 : 1; - writeable = (bitmap[byte] & (2 << bit)) ? 0 : 1; - print_msr_pm(msr, vcpu, readable, writeable); - - /* Look at MSRs in the range 0xC0000000 to 0xC0001FFF */ - byte += 2048; - readable = (bitmap[byte] & (1 << bit)) ? 0 : 1; - writeable = (bitmap[byte] & (2 << bit)) ? 0 : 1; - print_msr_pm(msr + MSR_AMD6TH_START, vcpu, readable, - writeable); - - /* MSR 0xC0010000 to 0xC0011FF is only for AMD */ - byte += 4096; - readable = (bitmap[byte] & (1 << bit)) ? 0 : 1; - writeable = (bitmap[byte] & (2 << bit)) ? 0 : 1; - print_msr_pm(msr + MSR_AMD7TH_START, vcpu, readable, - writeable); - } -} - -/* - * Reference Intel SDM Vol3 Section 24.6.9 MSR-Bitmap Address - */ -static void -dump_intel_msr_pm(const char *bitmap, int vcpu) -{ - int byte, bit, readable, writeable; - uint32_t msr; - - for (msr = 0; msr < 0x2000; msr++) { - byte = msr / 8; - bit = msr & 0x7; - - /* Look at MSRs in the range 0x00000000 to 0x00001FFF */ - readable = (bitmap[byte] & (1 << bit)) ? 0 : 1; - writeable = (bitmap[2048 + byte] & (1 << bit)) ? 0 : 1; - print_msr_pm(msr, vcpu, readable, writeable); - - /* Look at MSRs in the range 0xC0000000 to 0xC0001FFF */ - byte += 1024; - readable = (bitmap[byte] & (1 << bit)) ? 0 : 1; - writeable = (bitmap[2048 + byte] & (1 << bit)) ? 0 : 1; - print_msr_pm(msr + MSR_AMD6TH_START, vcpu, readable, - writeable); - } -} - -static int -dump_msr_bitmap(int vcpu, uint64_t addr, bool cpu_intel) -{ - int error, fd, map_size; - const char *bitmap; - - error = -1; - bitmap = MAP_FAILED; - - fd = open("/dev/mem", O_RDONLY, 0); - if (fd < 0) { - perror("Couldn't open /dev/mem"); - goto done; - } - - if (cpu_intel) - map_size = PAGE_SIZE; - else - map_size = 2 * PAGE_SIZE; - - bitmap = mmap(NULL, map_size, PROT_READ, MAP_SHARED, fd, addr); - if (bitmap == MAP_FAILED) { - perror("mmap failed"); - goto done; - } - - if (cpu_intel) - dump_intel_msr_pm(bitmap, vcpu); - else - dump_amd_msr_pm(bitmap, vcpu); - - error = 0; -done: - if (bitmap != MAP_FAILED) - munmap((void *)bitmap, map_size); - if (fd >= 0) - close(fd); - - return (error); -} - -static int -vm_get_vmcs_field(struct vmctx *ctx, int vcpu, int field, uint64_t *ret_val) -{ - - return (vm_get_register(ctx, vcpu, VMCS_IDENT(field), ret_val)); -} - -static int -vm_set_vmcs_field(struct vmctx *ctx, int vcpu, int field, uint64_t val) -{ - - return (vm_set_register(ctx, vcpu, VMCS_IDENT(field), val)); -} -#else /* __FreeBSD__ */ -/* VMCS does not allow arbitrary reads/writes */ +/* Until a safe method is created, arbitrary VMCS reads/writes are forbidden */ static int vm_get_vmcs_field(struct vmctx *ctx, int vcpu, int field, uint64_t *ret_val) { @@ -584,29 +409,11 @@ vm_set_vmcs_field(struct vmctx *ctx, int vcpu, int field, uint64_t val) { return (EINVAL); } -#endif /* __FreeBSD__ */ - -#ifdef __FreeBSD__ -static int -vm_get_vmcb_field(struct vmctx *ctx, int vcpu, int off, int bytes, - uint64_t *ret_val) -{ - - return (vm_get_register(ctx, vcpu, VMCB_ACCESS(off, bytes), ret_val)); -} -static int -vm_set_vmcb_field(struct vmctx *ctx, int vcpu, int off, int bytes, - uint64_t val) -{ - - return (vm_set_register(ctx, vcpu, VMCB_ACCESS(off, bytes), val)); -} -#else /* __FreeBSD__ */ -/* Arbitrary VMCB read/write is not allowed */ +/* Until a safe method is created, arbitrary VMCB reads/writes are forbidden */ static int vm_get_vmcb_field(struct vmctx *ctx, int vcpu, int off, int bytes, - uint64_t *ret_val) + uint64_t *ret_val) { *ret_val = 0; return (0); @@ -614,11 +421,10 @@ vm_get_vmcb_field(struct vmctx *ctx, int vcpu, int off, int bytes, static int vm_set_vmcb_field(struct vmctx *ctx, int vcpu, int off, int bytes, - uint64_t val) + uint64_t val) { return (EINVAL); } -#endif /* __FreeBSD__ */ enum { VMNAME = 1000, /* avoid collision with return values from getopt */ @@ -661,9 +467,7 @@ enum { SET_RTC_TIME, SET_RTC_NVRAM, RTC_NVRAM_OFFSET, -#ifndef __FreeBSD__ PMTMR_PORT, -#endif }; static void @@ -686,38 +490,6 @@ print_cpus(const char *banner, const cpuset_t *cpus) printf("\n"); } -#ifdef __FreeBSD__ -static void -print_intinfo(const char *banner, uint64_t info) -{ - int type; - - printf("%s:\t", banner); - if (info & VM_INTINFO_VALID) { - type = info & VM_INTINFO_TYPE; - switch (type) { - case VM_INTINFO_HWINTR: - printf("extint"); - break; - case VM_INTINFO_NMI: - printf("nmi"); - break; - case VM_INTINFO_SWINTR: - printf("swint"); - break; - default: - printf("exception"); - break; - } - printf(" vector %d", (int)VM_INTINFO_VECTOR(info)); - if (info & VM_INTINFO_DEL_ERRCODE) - printf(" errcode %#x", (u_int)(info >> 32)); - } else { - printf("n/a"); - } - printf("\n"); -} -#else /* __FreeBSD__ */ static void print_intinfo(const char *banner, uint64_t info) { @@ -746,7 +518,6 @@ print_intinfo(const char *banner, uint64_t info) } printf("\n"); } -#endif /* __FreeBSD__ */ static bool cpu_vendor_intel(void) @@ -1141,7 +912,7 @@ get_misc_vmcs(struct vmctx *ctx, int vcpu) if (error == 0) printf("cr4_shadow[%d]\t\t0x%016lx\n", vcpu, cr4shadow); } - + if (!error && (get_cr3_targets || get_all)) { uint64_t target_count, target_addr; error = vm_get_vmcs_field(ctx, vcpu, VMCS_CR3_TARGET_COUNT, @@ -1214,7 +985,7 @@ get_misc_vmcs(struct vmctx *ctx, int vcpu) printf("gpa[%d]\t\t0x%016lx\n", vcpu, u64); } - if (!error && (get_vmcs_entry_interruption_info || + if (!error && (get_vmcs_entry_interruption_info || get_all)) { error = vm_get_vmcs_field(ctx, vcpu, VMCS_ENTRY_INTR_INFO,&u64); if (error == 0) { @@ -1336,7 +1107,7 @@ get_misc_vmcs(struct vmctx *ctx, int vcpu) printf("vmcs_exit_qualification[%d]\t0x%016lx\n", vcpu, u64); } - + return (error); } @@ -1549,9 +1320,7 @@ setup_options(bool cpu_intel) NO_ARG, &get_msr_bitmap, 1 }, { "get-msr-bitmap-address", NO_ARG, &get_msr_bitmap_address, 1 }, - { "get-guest-pat", NO_ARG, &get_guest_pat, 1 }, - { "get-guest-sysenter", - NO_ARG, &get_guest_sysenter, 1 }, + { "get-guest-msrs", NO_ARG, &get_guest_msrs, 1 }, { "get-exit-reason", NO_ARG, &get_exit_reason, 1 }, { "get-x2apic-state", NO_ARG, &get_x2apic_state, 1 }, @@ -1566,11 +1335,9 @@ setup_options(bool cpu_intel) { "get-suspended-cpus", NO_ARG, &get_suspended_cpus, 1 }, { "get-intinfo", NO_ARG, &get_intinfo, 1 }, { "get-cpu-topology", NO_ARG, &get_cpu_topology, 1 }, -#ifndef __FreeBSD__ { "pmtmr-port", REQ_ARG, 0, PMTMR_PORT }, { "wrlock-cycle", NO_ARG, &wrlock_cycle, 1 }, { "get-fpu", NO_ARG, &get_fpu, 1 }, -#endif }; const struct option intel_opts[] = { @@ -1632,7 +1399,7 @@ setup_options(bool cpu_intel) const struct option amd_opts[] = { { "get-vmcb-intercepts", NO_ARG, &get_vmcb_intercept, 1 }, - { "get-vmcb-asid", + { "get-vmcb-asid", NO_ARG, &get_vpid_asid, 1 }, { "get-vmcb-exit-details", NO_ARG, &get_vmcb_exit_details, 1 }, @@ -1788,7 +1555,6 @@ show_memseg(struct vmctx *ctx) } } -#ifndef __FreeBSD__ static int show_fpu(struct vmctx *ctx, int vcpu) { @@ -1873,7 +1639,87 @@ show_fpu(struct vmctx *ctx, int vcpu) free(buf); return (0); } -#endif /*__FreeBSD__ */ + +static const char * +msr_name(uint32_t msr) +{ +#define MSR_IDENT_MAP(x) case x: return (#x); + switch (msr) { + MSR_IDENT_MAP(MSR_PAT) + MSR_IDENT_MAP(MSR_SYSENTER_CS_MSR) + MSR_IDENT_MAP(MSR_SYSENTER_ESP_MSR) + MSR_IDENT_MAP(MSR_SYSENTER_EIP_MSR) + MSR_IDENT_MAP(MSR_STAR) + MSR_IDENT_MAP(MSR_LSTAR) + MSR_IDENT_MAP(MSR_CSTAR) + MSR_IDENT_MAP(MSR_SF_MASK) + MSR_IDENT_MAP(MSR_FSBASE) + MSR_IDENT_MAP(MSR_GSBASE) + MSR_IDENT_MAP(MSR_KGSBASE) + MSR_IDENT_MAP(MSR_EFER) + MSR_IDENT_MAP(MSR_MTRRcap) + MSR_IDENT_MAP(MSR_MTRRdefType) + case MSR_TSC: + return ("MSR_TSC (offset from system boot)"); + default: + return (NULL); + } +} + +static int +show_msrs(struct vmctx *ctx, int vcpu) +{ + struct vdi_field_entry_v1 *msrs; + struct vm_data_xfer xfer = { + .vdx_vcpuid = vcpu, + .vdx_class = VDC_MSR, + .vdx_version = 1, + .vdx_len = 0, + .vdx_data = &msrs, + }; + int fd = vm_get_device_fd(ctx); + int res; + + /* Figure out how many entries we need to alloc for */ + res = ioctl(fd, VM_DATA_READ, &xfer); + if (res == 0) { + return (EINVAL); + } else if (errno != ENOSPC) { + return (errno); + } + const uint32_t len = xfer.vdx_result_len; + msrs = malloc(len); + if (msrs == NULL) { + return (ENOMEM); + } + bzero(msrs, len); + xfer.vdx_data = msrs; + xfer.vdx_len = len; + + /* Query the actual data, now that we should have an adequate buffer */ + res = ioctl(fd, VM_DATA_READ, &xfer); + if (res != 0) { + free(msrs); + return (errno); + } + + const uint_t count = + xfer.vdx_result_len / sizeof (struct vdi_field_entry_v1); + for (uint_t i = 0; i < count; i++) { + const uint32_t ident = msrs[i].vfe_ident; + const uint64_t value = msrs[i].vfe_value; + + const char *name = msr_name(ident); + + if (name != NULL) { + printf("msr[%s]\t = %x\n", name, value); + } else { + printf("msr[%08x]\t = %x\n", ident, value); + } + } + free(msrs); + return (0); +} int main(int argc, char *argv[]) @@ -1883,7 +1729,7 @@ main(int argc, char *argv[]) vm_paddr_t gpa_pmap; struct vm_exit vmexit; uint64_t rax, cr0, cr2, cr3, cr4, dr0, dr1, dr2, dr3, dr6, dr7; - uint64_t rsp, rip, rflags, efer, pat; + uint64_t rsp, rip, rflags, efer; uint64_t eptp, bm, addr, u64, pteval[4], *pte, info[2]; struct vmctx *ctx; cpuset_t cpus; @@ -2049,21 +1895,12 @@ main(int argc, char *argv[]) case CAPNAME: capname = optarg; break; -#ifdef __FreeBSD__ - case UNASSIGN_PPTDEV: - unassign_pptdev = 1; - if (sscanf(optarg, "%d/%d/%d", &bus, &slot, &func) != 3) - usage(cpu_intel); - break; -#endif case ASSERT_LAPIC_LVT: assert_lapic_lvt = atoi(optarg); break; -#ifndef __FreeBSD__ case PMTMR_PORT: pmtmr_port = strtoul(optarg, NULL, 16); break; -#endif default: usage(cpu_intel); } @@ -2076,13 +1913,8 @@ main(int argc, char *argv[]) error = 0; -#ifndef __FreeBSD__ if (!error && create) error = vm_create(vmname, 0); -# else - if (!error && create) - error = vm_create(vmname); -#endif /* __FreeBSD__ */ if (!error) { ctx = vm_open(vmname); @@ -2094,16 +1926,15 @@ main(int argc, char *argv[]) } } -#ifndef __FreeBSD__ if (!error && pmtmr_port) { error = vm_pmtmr_set_location(ctx, pmtmr_port); exit(error); } + if (!error && wrlock_cycle) { error = vm_wrlock_cycle(ctx); exit(error); } -#endif /* __FreeBSD__ */ if (!error && memsize) error = vm_setup_memory(ctx, memsize, VM_MMAP_ALL); @@ -2232,11 +2063,6 @@ main(int argc, char *argv[]) if (!error && set_x2apic_state) error = vm_set_x2apic_state(ctx, vcpu, x2apic_state); -#ifdef __FreeBSD__ - if (!error && unassign_pptdev) - error = vm_unassign_pptdev(ctx, bus, slot, func); -#endif /* __FreeBSD__ */ - if (!error && set_exception_bitmap) { if (cpu_intel) error = vm_set_vmcs_field(ctx, vcpu, @@ -2273,11 +2099,9 @@ main(int argc, char *argv[]) if (!error) error = get_all_segments(ctx, vcpu); -#ifndef __FreeBSD__ if (!error && (get_fpu || get_all)) { error = show_fpu(ctx, vcpu); } -#endif /* __FreeBSD__ */ if (!error) { if (cpu_intel) @@ -2285,7 +2109,7 @@ main(int argc, char *argv[]) else error = get_misc_vmcb(ctx, vcpu); } - + if (!error && (get_x2apic_state || get_all)) { error = vm_get_x2apic_state(ctx, vcpu, &x2apic_state); if (error == 0) @@ -2340,7 +2164,7 @@ main(int argc, char *argv[]) &tscoff); else error = vm_get_vmcb_field(ctx, vcpu, - VMCB_OFF_TSC_OFFSET, + VMCB_OFF_TSC_OFFSET, 8, &tscoff); if (error == 0) printf("tsc_offset[%d]\t0x%016lx\n", vcpu, tscoff); @@ -2348,7 +2172,7 @@ main(int argc, char *argv[]) if (!error && (get_msr_bitmap_address || get_all)) { if (cpu_intel) - error = vm_get_vmcs_field(ctx, vcpu, VMCS_MSR_BITMAP, + error = vm_get_vmcs_field(ctx, vcpu, VMCS_MSR_BITMAP, &addr); else error = vm_get_vmcb_field(ctx, vcpu, @@ -2357,90 +2181,27 @@ main(int argc, char *argv[]) printf("msr_bitmap[%d]\t\t%#lx\n", vcpu, addr); } - if (!error && (get_msr_bitmap || get_all)) { - if (cpu_intel) { - error = vm_get_vmcs_field(ctx, vcpu, - VMCS_MSR_BITMAP, &addr); - } else { - error = vm_get_vmcb_field(ctx, vcpu, - VMCB_OFF_MSR_PERM, 8, - &addr); - } - -#ifdef __FreeBSD__ - if (error == 0) - error = dump_msr_bitmap(vcpu, addr, cpu_intel); -#else - /* - * Skip dumping the MSR bitmap since raw access to the VMCS is - * currently not possible. - */ -#endif /* __FreeBSD__ */ - } - if (!error && (get_vpid_asid || get_all)) { uint64_t vpid; if (cpu_intel) error = vm_get_vmcs_field(ctx, vcpu, VMCS_VPID, &vpid); else - error = vm_get_vmcb_field(ctx, vcpu, VMCB_OFF_ASID, + error = vm_get_vmcb_field(ctx, vcpu, VMCB_OFF_ASID, 4, &vpid); if (error == 0) - printf("%s[%d]\t\t0x%04lx\n", + printf("%s[%d]\t\t0x%04lx\n", cpu_intel ? "vpid" : "asid", vcpu, vpid); } - if (!error && (get_guest_pat || get_all)) { - if (cpu_intel) - error = vm_get_vmcs_field(ctx, vcpu, - VMCS_GUEST_IA32_PAT, &pat); - else - error = vm_get_vmcb_field(ctx, vcpu, - VMCB_OFF_GUEST_PAT, 8, &pat); - if (error == 0) - printf("guest_pat[%d]\t\t0x%016lx\n", vcpu, pat); - } - - if (!error && (get_guest_sysenter || get_all)) { - if (cpu_intel) - error = vm_get_vmcs_field(ctx, vcpu, - VMCS_GUEST_IA32_SYSENTER_CS, - &cs); - else - error = vm_get_vmcb_field(ctx, vcpu, - VMCB_OFF_SYSENTER_CS, 8, - &cs); - - if (error == 0) - printf("guest_sysenter_cs[%d]\t%#lx\n", vcpu, cs); - if (cpu_intel) - error = vm_get_vmcs_field(ctx, vcpu, - VMCS_GUEST_IA32_SYSENTER_ESP, - &rsp); - else - error = vm_get_vmcb_field(ctx, vcpu, - VMCB_OFF_SYSENTER_ESP, 8, - &rsp); - - if (error == 0) - printf("guest_sysenter_sp[%d]\t%#lx\n", vcpu, rsp); - if (cpu_intel) - error = vm_get_vmcs_field(ctx, vcpu, - VMCS_GUEST_IA32_SYSENTER_EIP, - &rip); - else - error = vm_get_vmcb_field(ctx, vcpu, - VMCB_OFF_SYSENTER_EIP, 8, - &rip); - if (error == 0) - printf("guest_sysenter_ip[%d]\t%#lx\n", vcpu, rip); + if (!error && (get_guest_msrs || get_all)) { + error = show_msrs(ctx, vcpu); } if (!error && (get_exit_reason || get_all)) { if (cpu_intel) error = vm_get_vmcs_field(ctx, vcpu, VMCS_EXIT_REASON, &u64); - else + else error = vm_get_vmcb_field(ctx, vcpu, VMCB_OFF_EXIT_REASON, 8, &u64); diff --git a/usr/src/man/man9s/scsi_pkt.9s b/usr/src/man/man9s/scsi_pkt.9s index 385075076a..06a4f66b84 100644 --- a/usr/src/man/man9s/scsi_pkt.9s +++ b/usr/src/man/man9s/scsi_pkt.9s @@ -1,698 +1,362 @@ -'\" te -.\" Copyright (c) 2009, Sun Microsystems, Inc. All Rights Reserved. -.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. -.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. -.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] -.TH SCSI_PKT 9S "Jan 11, 2009" -.SH NAME -scsi_pkt \- SCSI packet structure -.SH SYNOPSIS -.nf -#include <sys/scsi/scsi.h> -.fi - -.SH INTERFACE LEVEL +.\" Copyright (c) 2009, Sun Microsystems, Inc. All Rights Reserved. +.\" Copyright 2022 Garrett D'Amore +.\" The contents of this file are subject to the terms of the +.\" Common Development and Distribution License (the "License"). +.\" You may not use this file except in compliance with the License. +.\" +.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +.\" or http://www.opensolaris.org/os/licensing. +.\" See the License for the specific language governing permissions +.\" and limitations under the License. +.\" +.\" When distributing Covered Code, include this CDDL HEADER in each +.\" file and include the License file at usr/src/OPENSOLARIS.LICENSE. +.\" If applicable, add the following below this CDDL HEADER, with the +.\" fields enclosed by brackets "[]" replaced with your own identifying +.\" information: Portions Copyright [yyyy] [name of copyright owner] +.Dd June 21, 2022 +.Dt SCSI_PKT 9S +.Os +.Sh NAME +.Nm scsi_pkt +.Nd SCSI packet structure +.Sh SYNOPSIS +.In sys/scsi/scsi.h +.Sh INTERFACE LEVEL illumos DDI specific (illumos DDI). -.SH DESCRIPTION -A \fBscsi_pkt\fR structure defines the packet that is allocated by -\fBscsi_init_pkt\fR(9F). The target driver fills in some information and passes -it to \fBscsi_transport\fR(9F) for execution on the target. The host bus -adapter (\fBHBA\fR) fills in other information as the command is processed. +.Sh DESCRIPTION +A +.Nm +structure defines the packet that is allocated by +.Xr scsi_init_pkt 9F . +The target driver fills in some information and passes it to +.Xr scsi_transport 9F +for execution on the target. +The host bus adapter +.Pq HBA +fills in other information as the command is processed. When the command completes or can be taken no further, the completion function specified in the packet is called with a pointer to the packet as its argument. From fields within the packet, the target driver can determine the success or failure of the command. -.SH STRUCTURE MEMBERS -.in +2 -.nf -opaque_t pkt_ha_private; /* private data for - host adapter */ -struct scsi_address pkt_address; /* destination packet */ -opaque_t pkt_private; /* private data - for target driver */ -void (*pkt_comp)(struct scsi_pkt *); /* callback */ -uint_t pkt_flags; /* flags */ -int pkt_time; /* time allotted to - complete command */ -uchar_t *pkt_scbp; /* pointer to - status block */ -uchar_t *pkt_cdbp; /* pointer to - command block */ -ssize_t pkt_resid; /* number of bytes - not transferred */ -uint_t pkt_state; /* state of command */ -uint_t pkt_statistics; /* statistics */ -uchar_t pkt_reason; /* reason completion - called */ -uint_t pkt_cdblen; /* length of pkt_cdbp */ -uint_t pkt_scdblen; /* length of pkt_scbp */ -uint_t pkt_tgtlen; /* length of pkt_private */ -uint_t pkt_numcookies; /* number of DMA cookies */ -ddi_dma_cookie_t *pkt_cookies; /* array of DMA cookies */ -uint_t pkt_dma_flags; /* DMA flags */ -.fi -.in -2 - -.sp -.ne 2 -.na -\fB\fBpkt_ha_private\fR\fR -.ad -.RS 18n +.Sh STRUCTURE MEMBERS +.Bd -literal -offset indent +opaque_t pkt_ha_private; +struct scsi_address pkt_address; +opaque_t pkt_private; +void (*pkt_comp)(struct scsi_pkt *); +uint_t pkt_flags; +int pkt_time; +uchar_t *pkt_scbp; +uchar_t *pkt_cdbp; +ssize_t pkt_resid; +uint_t pkt_state; +uint_t pkt_statistics; +uchar_t pkt_reason; +uint_t pkt_cdblen; +uint_t pkt_scblen; +uint_t pkt_tgtlen; +uint_t pkt_numcookies; +ddi_dma_cookie_t *pkt_cookies; +uint_t pkt_dma_flags; +.Ed +.Pp +These members are described here: +.Bl -tag -width indent +.It Fa pkt_ha_private Opaque pointer that the HBA uses to reference a private data structure that -transfers \fBscsi_pkt\fR requests. -.RE - -.sp -.ne 2 -.na -\fB\fBpkt_address\fR\fR -.ad -.RS 18n -Initialized by \fBscsi_init_pkt\fR(9F), \fBpkt_address\fR records the intended -route and the recipient of a request. -.RE - -.sp -.ne 2 -.na -\fB\fBpkt_private\fR\fR -.ad -.RS 18n -Reserved for the use of the target driver, \fBpkt_private\fR is not changed by -the HBA driver. -.RE - -.sp -.ne 2 -.na -\fB\fBpkt_comp\fR\fR -.ad -.RS 18n -Specifies the command completion callback routine. When the host adapter driver -has gone as far as it can in transporting a command to a \fBSCSI\fR target, and -the command has either run to completion or can go no further for some other -reason, the host adapter driver calls the function pointed to by this field and -passes a pointer to the packet as argument. The callback routine itself is -called from interrupt context and must not sleep or call any function that -might sleep. -.RE - -.sp -.ne 2 -.na -\fB\fBpkt_flags\fR\fR -.ad -.RS 18n +transfers +.Nm +requests. +.It Fa pkt_address +Initialized by +.Xr scsi_init_pkt 9F , +.Fa pkt_address +records the intended route and the recipient of a request. +.It Fa pkt_private +Reserved for the use of the target driver, +.Fa pkt_private +is not changed by the HBA driver. +.It Fa pkt_comp +Specifies the command completion callback routine. +When the host adapter driver has gone as far as it can in transporting a +command to a SCSI target, and the command has either run to completion or +can go no further for some other reason, the host adapter driver calls the +function pointed to by this field and passes a pointer to the packet as +argument. +The callback routine itself is called from interrupt context and must not +sleep or call any function that might sleep. +.It Fa pkt_flags Provides additional information about how the target driver expects the command -to be executed. See \fBpkt_flag Definitions\fR. -.RE - -.sp -.ne 2 -.na -\fB\fBpkt_time\fR\fR -.ad -.RS 18n +to be executed. +See +.Sx pkt_flags Definitions . +.It Fa pkt_time Set by the target driver to represent the maximum time allowed in seconds for -this command to complete. Timeout starts when the command is transmitted on the -\fBSCSI\fR bus. The \fBpkt_time\fR may be \fB0\fR if no timeout is required. -.RE - -.sp -.ne 2 -.na -\fB\fBpkt_scbp\fR\fR -.ad -.RS 18n -Points to either a struct \fBscsi_status\fR(9S) or, if \fBauto-rqsense\fR is -enabled and \fBpkt_state\fR includes \fBSTATE_ARQ_DONE\fR, a struct -\fBscsi_arq_status\fR. If \fBscsi_status\fR is returned, the \fBSCSI\fR status -byte resulting from the requested command is available. If -\fBscsi_arq_status\fR(9S) is returned, the sense information is also available. -.RE - -.sp -.ne 2 -.na -\fB\fBpkt_cdbp\fR\fR -.ad -.RS 18n +this command to complete. +Timeout starts when the command is transmitted on the SCSI bus. +The +.Fa pkt_time +may be zero if no timeout is required. +.It Fa pkt_scbp +Points to either a struct +.Xr scsi_status 9S +or, if +.Fa auto-rqsense +is enabled and +.Fa pkt_state +includes +.Dv STATE_ARQ_DONE , +a struct +.Xr scsi_arq_status 9S . +If +.Xr scsi_status 9S +is returned, the SCSI status byte resulting from the requested command is +available. +If +.Xr scsi_arq_status 9S +is returned, the sense information is also available. +.It Fa pkt_cdbp Points to a kernel-addressable buffer with a length specified by a call to the -proper resource allocation routine, \fBscsi_init_pkt\fR(9F). -.RE - -.sp -.ne 2 -.na -\fB\fBpkt_resid\fR\fR -.ad -.RS 18n +proper resource allocation routine, +.Xr scsi_init_pkt 9F . +.It Fa pkt_resid Contains a residual count, either the number of data bytes that have not been -transferred (\fBscsi_transport\fR(9F)) or the number of data bytes for which -DMA resources could not be allocated \fBscsi_init_pkt\fR(9F). In the latter -case, partial DMA resources can be allocated only if \fBscsi_init_pkt\fR(9F) is -called with the \fBPKT_DMA_PARTIAL\fR flag. -.RE - -.sp -.ne 2 -.na -\fB\fBpkt_state\fR\fR -.ad -.RS 18n +transferred by +.Xr scsi_transport 9F +or the number of data bytes for which +DMA resources could not be allocated by +.Xr scsi_init_pkt 9F . +In the latter case, partial DMA resources can be allocated only if +.Xr scsi_init_pkt 9F +is called with the +.Dv PKT_DMA_PARTIAL +flag. +.It Fa pkt_state Has bit positions that represent the six most important states that a -\fBSCSI\fR command can go through. See \fBpkt_state Definitions\fR. -.RE - -.sp -.ne 2 -.na -\fB\fBpkt_statistics\fR\fR -.ad -.RS 18n -Maintains some transport-related statistics. See \fBpkt_statistics -Definitions\fR. -.RE - -.sp -.ne 2 -.na -\fB\fBpkt_reason\fR\fR -.ad -.RS 18n -Contains a completion code that indicates why the \fBpkt_comp\fR function was -called. See \fBpkt_reason Definitions\fR. -.RE - -.sp -.ne 2 -.na -\fB\fBpkt_cdblen\fR\fR -.ad -.RS 18n -Length of buffer pointed to by \fBpkt_cdbp\fR. See \fBtran_setup_pkt\fR. -.RE - -.sp -.ne 2 -.na -\fB\fBpkt_scblen\fR\fR -.ad -.RS 18n -Length of buffer pointed to by \fBpkt_scbp\fR. See \fBtran_setup_pkt\fR. -.RE - -.sp -.ne 2 -.na -\fB\fBpkt_tgtlen\fR\fR -.ad -.RS 18n -Length of buffer pointed to by \fBpkt_private\fR. See \fBtran_setup_pkt\fR. -.RE - -.sp -.ne 2 -.na -\fB\fBpkt_numcookies\fR\fR -.ad -.RS 18n -Length \fBpkt_cookies\fR array. See \fBtran_setup_pkt\fR. -.RE - -.sp -.ne 2 -.na -\fB\fBpkt_cookies\fR\fR -.ad -.RS 18n -Array of DMA cookies. See \fBtran_setup_pkt\fR. -.RE - -.sp -.ne 2 -.na -\fB\fBpkt_dma_flags\fR\fR -.ad -.RS 18n -DMA flags used, such as \fBDDI_DMA_READ\fR and \fBDDI_DMA_WRITE\fR. See -\fBtran_setup_pkt\fR. -.RE - -.sp -.LP -The host adapter driver will update the \fBpkt_resid\fR, \fBpkt_reason\fR, -\fBpkt_state\fR, and \fBpkt_statistics\fR fields. -.SS "\fBpkt_flags\fR Definitions" -The appropriate definitions for the structure member \fBpkt_flags\fR are: -.sp -.ne 2 -.na -\fB\fBFLAG_NOINTR\fR\fR -.ad -.RS 30n -Run command with no command completion callback. Command is complete upon -return from \fBscsi_transport\fR(9F). -.RE - -.sp -.ne 2 -.na -\fB\fBFLAG_NODISCON\fR\fR -.ad -.RS 30n +SCSI command can go through. +See +.Sx pkt_state Definitions . +.It Fa pkt_statistics +Maintains some transport-related statistics. +See +.Sx pkt_statistics Definitions . +.It Fa pkt_reason +Contains a completion code that indicates why the +.Fa pkt_comp +function was called. +See +.Sx pkt_reason Definitions . +.It Fa pkt_cdblen +Length of buffer pointed to by +.Fa pkt_cdbp . +Se +.Xr tran_setup_pkt 9E . +.It Fa pkt_scblen +Length of buffer pointed to by +.Fa pkt_scbp . +See +.Xr tran_setup_pkt 9E . +.It Fa pkt_tgtlen +Length of buffer pointed to by +.Fa pkt_private . +See +.Xr tran_setup_pkt 9E . +.It Fa pkt_numcookies +Length of +.Fa pkt_cookies +array. +See +.Xr tran_setup_pkt 9E . +.It Fa pkt_cookies +Array of DMA cookies. +See +.Xr tran_setup_pkt 9E . +.It Fa pkt_dma_flags +DMA flags used, such as +.Dv DDI_DMA_READ +and +.Dv DDI_DMA_WRITE . +See +.Xr tran_setup_pkt 9E . +.El +.Pp +The host adapter driver will update the +.Fa pkt_resid , +.Fa pkt_reason , +.Fa pkt_state , +and +.Fa pkt_statistics +fields. +.Ss "pkt_flags Definitions" +The appropriate definitions for the structure member +.Fa pkt_flags +are: +.Bl -tag -width indent +.It Dv FLAG_NOINTR +Run command with no command completion callback. +Command is complete upon return from +.Xr scsi_transport 9F . +.It Dv FLAG_NODISCON Run command without disconnects. -.RE - -.sp -.ne 2 -.na -\fB\fBFLAG_NOPARITY\fR\fR -.ad -.RS 30n +.It Dv FLAG_NOPARITY Run command without parity checking. -.RE - -.sp -.ne 2 -.na -\fB\fBFLAG_HTAG\fR\fR -.ad -.RS 30n +.It Dv FLAG_HTAG Run command as the head-of-queue-tagged command. -.RE - -.sp -.ne 2 -.na -\fB\fBFLAG_OTAG\fR\fR -.ad -.RS 30n +.It Dv FLAG_OTAG Run command as an ordered-queue-tagged command. -.RE - -.sp -.ne 2 -.na -\fB\fBFLAG_STAG\fR\fR -.ad -.RS 30n +.It Dv FLAG_STAG Run command as a simple-queue-tagged command. -.RE - -.sp -.ne 2 -.na -\fB\fBFLAG_SENSING\fR\fR -.ad -.RS 30n +.It Dv FLAG_SENSING Indicates a request sense command. -.RE - -.sp -.ne 2 -.na -\fB\fBFLAG_HEAD\fR\fR -.ad -.RS 30n +.It Dv FLAG_HEAD Place command at the head of the queue. -.RE - -.sp -.ne 2 -.na -\fB\fBFLAG_RENEGOTIATE_WIDE_SYNC\fR\fR -.ad -.RS 30n +.It Dv FLAG_RENEGOTIATE_WIDE_SYNC Before transporting this command, the host adapter should initiate the -renegotiation of wide mode and synchronous transfer speed. Normally, the HBA -driver manages negotiations but under certain conditions forcing a -renegotiation is appropriate. Renegotiation is recommended before Request Sense -and Inquiry commands. Refer to the SCSI 2 standard, sections 6.6.21 and 6.6.23. -.sp +renegotiation of wide mode and synchronous transfer speed. +Normally, the HBA driver manages negotiations but under certain conditions +forcing a renegotiation is appropriate. +Renegotiation is recommended before +.Ql "Request Sense" +and +.Ql Inquiry +commands. +Refer to the SCSI 2 standard, sections 6.6.21 and 6.6.23. +.Pp This flag should not be set for every packet as this will severely impact performance. -.RE - -.SS "\fBpkt_reason\fR Definitions" -The appropriate definitions for the structure member \fBpkt_reason\fR are: -.sp -.ne 2 -.na -\fB\fBCMD_CMPLT\fR\fR -.ad -.RS 20n +.El +.Ss "pkt_reason Definitions" +The appropriate definitions for the structure member +.Fa pkt_reason +are: +.Bl -tag -width indent +.It Dv CMD_CMPLT No transport errors; normal completion. -.RE - -.sp -.ne 2 -.na -\fB\fBCMD_INCOMPLETE\fR\fR -.ad -.RS 20n +.It Dv CMD_INCOMPLETE Transport stopped with abnormal state. -.RE - -.sp -.ne 2 -.na -\fB\fBCMD_DMA_DERR\fR\fR -.ad -.RS 20n -\fBDMA\fRd irection error. -.RE - -.sp -.ne 2 -.na -\fB\fBCMD_TRAN_ERR\fR\fR -.ad -.RS 20n +.It Dv CMD_DMA_DERR +DMA direction error. +.It Dv CMD_TRAN_ERR Unspecified transport error. -.RE - -.sp -.ne 2 -.na -\fB\fBCMD_RESET\fR\fR -.ad -.RS 20n -\fBSCSI\fR bus reset destroyed command. -.RE - -.sp -.ne 2 -.na -\fB\fBCMD_ABORTED\fR\fR -.ad -.RS 20n +.It Dv CMD_RESET +SCSI bus reset destroyed command. +.It Dv CMD_ABORTED Command transport aborted on request. -.RE - -.sp -.ne 2 -.na -\fB\fBCMD_TIMEOUT\fR\fR -.ad -.RS 20n +.It Dv CMD_TIMEOUT Command timed out. -.RE - -.sp -.ne 2 -.na -\fB\fBCMD_DATA_OVR\fR\fR -.ad -.RS 20n +.It Dv CMD_DATA_OVR Data overrun. -.RE - -.sp -.ne 2 -.na -\fB\fBCMD_CMD_OVR\fR\fR -.ad -.RS 20n +.It Dv CMD_CMD_OVR Command overrun. -.RE - -.sp -.ne 2 -.na -\fB\fBCMD_STS_OVR\fR\fR -.ad -.RS 20n +.It Dv CMD_STS_OVR Status overrun. -.RE - -.sp -.ne 2 -.na -\fB\fBCMD_BADMSG\fR\fR -.ad -.RS 20n +.It Dv CMD_BADMSG Message not command complete. -.RE - -.sp -.ne 2 -.na -\fB\fBCMD_NOMSGOUT\fR\fR -.ad -.RS 20n +.It Dv CMD_NOMSGOUT Target refused to go to message out phase. -.RE - -.sp -.ne 2 -.na -\fB\fBCMD_XID_FAIL\fR\fR -.ad -.RS 20n +.It Dv CMD_XID_FAIL Extended identify message rejected. -.RE - -.sp -.ne 2 -.na -\fB\fBCMD_IDE_FAIL\fR\fR -.ad -.RS 20n +.It Dv CMD_IDE_FAIL "Initiator Detected Error" message rejected. -.RE - -.sp -.ne 2 -.na -\fB\fBCMD_ABORT_FAIL\fR\fR -.ad -.RS 20n +.It Dv CMD_ABORT_FAIL Abort message rejected. -.RE - -.sp -.ne 2 -.na -\fB\fBCMD_REJECT_FAIL\fR\fR -.ad -.RS 20n +.It Dv CMD_REJECT_FAIL Reject message rejected. -.RE - -.sp -.ne 2 -.na -\fB\fBCMD_NOP_FAIL\fR\fR -.ad -.RS 20n +.It Dv CMD_NOP_FAIL "No Operation" message rejected. -.RE - -.sp -.ne 2 -.na -\fB\fBCMD_PER_FAIL\fR\fR -.ad -.RS 20n +.It Dv CMD_PER_FAIL "Message Parity Error" message rejected. -.RE - -.sp -.ne 2 -.na -\fB\fBCMD_BDR_FAIL\fR\fR -.ad -.RS 20n +.It Dv CMD_BDR_FAIL "Bus Device Reset" message rejected. -.RE - -.sp -.ne 2 -.na -\fB\fBCMD_ID_FAIL\fR\fR -.ad -.RS 20n +.It Dv CMD_ID_FAIL Identify message rejected. -.RE - -.sp -.ne 2 -.na -\fB\fBCMD_UNX_BUS_FREE\fR\fR -.ad -.RS 20n +.It Dv CMD_UNX_BUS_FREE Unexpected bus free phase. -.RE - -.sp -.ne 2 -.na -\fB\fBCMD_TAG_REJECT\fR\fR -.ad -.RS 20n +.It Dv CMD_TAG_REJECT Target rejected the tag message. -.RE - -.sp -.ne 2 -.na -\fB\fBCMD_DEV_GONE\fR\fR -.ad -.RS 20n +.It Dv CMD_DEV_GONE The device has been removed. -.RE - -.SS "pkt_state Definitions" -The appropriate definitions for the structure member \fBpkt_state\fR are: -.sp -.ne 2 -.na -\fB\fBSTATE_GOT_BUS\fR\fR -.ad -.RS 22n +.El +.Ss "pkt_state Definitions" +The appropriate definitions for the structure member +.Fa pkt_state +are: +.Bl -tag -width indent +.It Dv STATE_GOT_BUS Bus arbitration succeeded. -.RE - -.sp -.ne 2 -.na -\fB\fBSTATE_GOT_TARGET\fR\fR -.ad -.RS 22n +.It Dv STATE_GOT_TARGET Target successfully selected. -.RE - -.sp -.ne 2 -.na -\fB\fBSTATE_SENT_CMD\fR\fR -.ad -.RS 22n +.It Dv STATE_SENT_CMD Command successfully sent. -.RE - -.sp -.ne 2 -.na -\fB\fBSTATE_XFERRED_DATA\fR\fR -.ad -.RS 22n +.It Dv STATE_XFERRED_DATA Data transfer took place. -.RE - -.sp -.ne 2 -.na -\fB\fBSTATE_GOT_STATUS\fR\fR -.ad -.RS 22n +.It Dv STATE_GOT_STATUS Status received. -.RE - -.sp -.ne 2 -.na -\fB\fBSTATE_ARQ_DONE\fR\fR -.ad -.RS 22n +.It Dv STATE_ARQ_DONE The command resulted in a check condition and the host adapter driver executed an automatic request sense command. -.RE - -.sp -.ne 2 -.na -\fB\fBSTATE_XARQ_DONE\fR\fR -.ad -.RS 22n -The command requested in extra sense data using a \fBPKT_XARQ\fR flag got a -check condition. The host adapter driver was able to successfully request and -return this. The \fBscsi_pkt.pkt_scbp->sts_rqpkt_resid\fR returns the sense -data residual based on the \fIstatuslen\fR parameter of the -\fBscsi_init_pkt\fR(9F) call. The sense data begins at -\fBscsi_pkt.pkt_scbp->sts_sensedata\fR. -.RE - -.SS "pkt_statistics Definitions" +.It Dv STATE_XARQ_DONE +The command requested in extra sense data using a +.Dv PKT_XARQ +flag got a check condition. +The host adapter driver was able to successfully request and return this. +The +.Sy scsi_pkt.pkt_scbp->sts_rqpkt_resid +returns the sense data residual based on the +.Fa statuslen +parameter of the +.Xr scsi_init_pkt 9F +call. +The sense data begins at +.Sy scsi_pkt.pkt_scbp->sts_sensedata . +.El +.Ss "pkt_statistics Definitions" The definitions that are appropriate for the structure member -\fBpkt_statistics\fR are: -.sp -.ne 2 -.na -\fB\fBSTAT_DISCON\fR\fR -.ad -.RS 18n +.Fa pkt_statistics +are: +.Bl -tag -width indent +.It Dv STAT_DISCON Device disconnect. -.RE - -.sp -.ne 2 -.na -\fB\fBSTAT_SYNC\fR\fR -.ad -.RS 18n +.It Dv STAT_SYNC Command did a synchronous data transfer. -.RE - -.sp -.ne 2 -.na -\fB\fBSTAT_PERR\fR\fR -.ad -.RS 18n -\fBSCSI\fR parity error. -.RE - -.sp -.ne 2 -.na -\fB\fBSTAT_BUS_RESET\fR\fR -.ad -.RS 18n +.It Dv STAT_PERR +SCSI parity error. +.It Dv STAT_BUS_RESET Bus reset. -.RE - -.sp -.ne 2 -.na -\fB\fBSTAT_DEV_RESET\fR\fR -.ad -.RS 18n +.It Dv STAT_DEV_RESET Device reset. -.RE - -.sp -.ne 2 -.na -\fB\fBSTAT_ABORTED\fR\fR -.ad -.RS 18n +.It Dv STAT_ABORTED Command was aborted. -.RE - -.sp -.ne 2 -.na -\fB\fBSTAT_TIMEOUT\fR\fR -.ad -.RS 18n +.It Dv STAT_TIMEOUT Command timed out. -.RE - -.SH SEE ALSO -.BR tran_init_pkt (9E), -.BR tran_setup_pkt (9E), -.BR scsi_hba_pkt_comp (9F), -.BR scsi_init_pkt (9F), -.BR scsi_transport (9F), -.BR scsi_arq_status (9S), -.BR scsi_status (9S) -.sp -.LP -\fIWriting Device Drivers\fR -.SH NOTES -HBA drivers should signal \fBscsi_pkt\fR completion by calling -\fBscsi_hba_pkt_comp\fR(9F). This is mandatory for HBA drivers that implement -\fBtran_setup_pkt\fR(9E). Failure to comply results in undefined behavior. +.El +.Sh SEE ALSO +.Xr tran_init_pkt 9E , +.Xr tran_setup_pkt 9E , +.Xr scsi_hba_pkt_comp 9F , +.Xr scsi_init_pkt 9F , +.Xr scsi_transport 9F , +.Xr scsi_arq_status 9S , +.Xr scsi_status 9S +.Pp +.Rs +.%T "Writing Device Drivers" +.Re +.Sh NOTES +HBA drivers should signal +.Nm +completion by calling +.Xr scsi_hba_pkt_comp 9F . +This is mandatory for HBA drivers that implement +.Xr tran_setup_pkt 9E . +Failure to comply results in undefined behavior. +.Pp +Drivers must not make assumptions about the size of the +.Nm +structure. +In particular, this structure must not be directly +inlined into other driver structures nor allocated +except by one of the specialized allocation functions +such as +.Xr scsi_init_pkt 9F . diff --git a/usr/src/uts/intel/io/vmm/amd/svm.c b/usr/src/uts/intel/io/vmm/amd/svm.c index b699d57991..de4a492ae9 100644 --- a/usr/src/uts/intel/io/vmm/amd/svm.c +++ b/usr/src/uts/intel/io/vmm/amd/svm.c @@ -72,7 +72,6 @@ __FBSDID("$FreeBSD$"); #include "vlapic.h" #include "vlapic_priv.h" -#include "x86.h" #include "vmcb.h" #include "svm.h" #include "svm_softc.h" @@ -2257,6 +2256,17 @@ svm_setdesc(void *arg, int vcpu, int reg, const struct seg_desc *desc) if (SEG_DESC_UNUSABLE(desc->access)) { seg->attrib &= ~0x80; } + /* + * Keep CPL synced with the DPL specified for %ss. + * + * KVM notes that a SYSRET to non-cpl-3 is possible on AMD + * (unlike Intel), but accepts such a possible deviation for + * what is otherwise unreasonable behavior for a guest OS, since + * they do the same synchronization. + */ + if (reg == VM_REG_GUEST_SS) { + vmcb->state.cpl = SEG_DESC_DPL(desc->access); + } break; case VM_REG_GUEST_GDTR: @@ -2339,6 +2349,55 @@ svm_getdesc(void *arg, int vcpu, int reg, struct seg_desc *desc) } static int +svm_get_msr(void *arg, int vcpu, uint32_t msr, uint64_t *valp) +{ + struct svm_softc *sc = arg; + struct vmcb *vmcb = svm_get_vmcb(sc, vcpu); + const uint64_t *msrp = vmcb_msr_ptr(vmcb, msr, NULL); + + if (msrp != NULL) { + *valp = *msrp; + return (0); + } + + return (EINVAL); +} + +static int +svm_set_msr(void *arg, int vcpu, uint32_t msr, uint64_t val) +{ + struct svm_softc *sc = arg; + struct vmcb *vmcb = svm_get_vmcb(sc, vcpu); + + uint32_t dirty = 0; + uint64_t *msrp = vmcb_msr_ptr(vmcb, msr, &dirty); + if (msrp == NULL) { + return (EINVAL); + } + switch (msr) { + case MSR_EFER: + /* + * For now, just clone the logic from + * svm_setreg(): + * + * EFER_SVM must always be set when the guest is + * executing + */ + *msrp = val | EFER_SVM; + break; + /* TODO: other necessary MSR masking */ + default: + *msrp = val; + break; + } + if (dirty != 0) { + svm_set_dirty(sc, vcpu, dirty); + } + return (0); + +} + +static int svm_setcap(void *arg, int vcpu, int type, int val) { struct svm_softc *sc; @@ -2450,4 +2509,7 @@ struct vmm_ops vmm_ops_amd = { .vmsavectx = svm_savectx, .vmrestorectx = svm_restorectx, + + .vmgetmsr = svm_get_msr, + .vmsetmsr = svm_set_msr, }; diff --git a/usr/src/uts/intel/io/vmm/amd/vmcb.c b/usr/src/uts/intel/io/vmm/amd/vmcb.c index 5be5240129..ec2c9674c0 100644 --- a/usr/src/uts/intel/io/vmm/amd/vmcb.c +++ b/usr/src/uts/intel/io/vmm/amd/vmcb.c @@ -45,6 +45,7 @@ __FBSDID("$FreeBSD$"); #include <sys/param.h> #include <sys/systm.h> +#include <machine/specialreg.h> #include <machine/vmm.h> #include "vmcb.h" @@ -148,3 +149,63 @@ vmcb_regptr(struct vmcb *vmcb, int ident, uint32_t *dirtyp) } return (res); } + +uint64_t * +vmcb_msr_ptr(struct vmcb *vmcb, uint32_t msr, uint32_t *dirtyp) +{ + uint64_t *res = NULL; + uint32_t dirty = 0; + struct vmcb_state *state = &vmcb->state; + + switch (msr) { + case MSR_EFER: + res = &state->efer; + dirty = VMCB_CACHE_CR; + break; + + case MSR_GSBASE: + res = &state->gs.base; + dirty = VMCB_CACHE_SEG; + break; + case MSR_FSBASE: + res = &state->fs.base; + dirty = VMCB_CACHE_SEG; + break; + case MSR_KGSBASE: + res = &state->kernelgsbase; + break; + + case MSR_STAR: + res = &state->star; + break; + case MSR_LSTAR: + res = &state->lstar; + break; + case MSR_CSTAR: + res = &state->cstar; + break; + case MSR_SF_MASK: + res = &state->sfmask; + break; + + case MSR_SYSENTER_CS_MSR: + res = &state->sysenter_cs; + break; + case MSR_SYSENTER_ESP_MSR: + res = &state->sysenter_esp; + break; + case MSR_SYSENTER_EIP_MSR: + res = &state->sysenter_eip; + break; + + case MSR_PAT: + res = &state->g_pat; + dirty = VMCB_CACHE_NP; + break; + } + + if (res != NULL && dirtyp != NULL) { + *dirtyp = dirty; + } + return (res); +} diff --git a/usr/src/uts/intel/io/vmm/amd/vmcb.h b/usr/src/uts/intel/io/vmm/amd/vmcb.h index da0f08445c..7a57979d56 100644 --- a/usr/src/uts/intel/io/vmm/amd/vmcb.h +++ b/usr/src/uts/intel/io/vmm/amd/vmcb.h @@ -397,6 +397,7 @@ CTASSERT(offsetof(struct vmcb, state) == 0x400); struct vmcb_segment *vmcb_segptr(struct vmcb *vmcb, int type); uint64_t *vmcb_regptr(struct vmcb *vmcb, int ident, uint32_t *dirtyp); +uint64_t *vmcb_msr_ptr(struct vmcb *vmcb, uint32_t ident, uint32_t *dirtyp); #endif /* _KERNEL */ #endif /* _VMCB_H_ */ diff --git a/usr/src/uts/intel/io/vmm/intel/vmcs.c b/usr/src/uts/intel/io/vmm/intel/vmcs.c index 7fabba79f7..b5bc8130d9 100644 --- a/usr/src/uts/intel/io/vmm/intel/vmcs.c +++ b/usr/src/uts/intel/io/vmm/intel/vmcs.c @@ -165,6 +165,34 @@ vmcs_seg_desc_encoding(int seg, uint32_t *base, uint32_t *lim, uint32_t *acc) } } +uint32_t +vmcs_msr_encoding(uint32_t msr) +{ + switch (msr) { + case MSR_PAT: + return (VMCS_GUEST_IA32_PAT); + case MSR_EFER: + return (VMCS_GUEST_IA32_EFER); + case MSR_SYSENTER_CS_MSR: + return (VMCS_GUEST_IA32_SYSENTER_CS); + case MSR_SYSENTER_ESP_MSR: + return (VMCS_GUEST_IA32_SYSENTER_ESP); + case MSR_SYSENTER_EIP_MSR: + return (VMCS_GUEST_IA32_SYSENTER_EIP); + /* + * While fsbase and gsbase are expected to be accessed (by the VMM) via + * the segment descriptor interfaces, we still make it available as MSR + * contents as well. + */ + case MSR_FSBASE: + return (VMCS_GUEST_FS_BASE); + case MSR_GSBASE: + return (VMCS_GUEST_GS_BASE); + default: + return (VMCS_INVALID_ENCODING); + } +} + void vmcs_clear(uintptr_t vmcs_pa) { diff --git a/usr/src/uts/intel/io/vmm/intel/vmcs.h b/usr/src/uts/intel/io/vmm/intel/vmcs.h index 24dc2dd574..9e4a9e3282 100644 --- a/usr/src/uts/intel/io/vmm/intel/vmcs.h +++ b/usr/src/uts/intel/io/vmm/intel/vmcs.h @@ -48,6 +48,7 @@ CTASSERT(sizeof (struct vmcs) == PAGE_SIZE); uint32_t vmcs_field_encoding(int ident); void vmcs_seg_desc_encoding(int seg, uint32_t *base, uint32_t *lim, uint32_t *acc); +uint32_t vmcs_msr_encoding(uint32_t msr); void vmcs_initialize(struct vmcs *vmcs, uintptr_t vmcs_pa); diff --git a/usr/src/uts/intel/io/vmm/intel/vmx.c b/usr/src/uts/intel/io/vmm/intel/vmx.c index 360cec1056..4ef51259ab 100644 --- a/usr/src/uts/intel/io/vmm/intel/vmx.c +++ b/usr/src/uts/intel/io/vmm/intel/vmx.c @@ -85,7 +85,6 @@ __FBSDID("$FreeBSD$"); #include "vmcs.h" #include "vmx.h" #include "vmx_msr.h" -#include "x86.h" #include "vmx_controls.h" #define PINBASED_CTLS_ONE_SETTING \ @@ -1629,6 +1628,25 @@ vmx_set_guest_reg(struct vmx *vmx, int vcpu, int ident, uint64_t regval) } } +static void +vmx_sync_efer_state(struct vmx *vmx, int vcpu, uint64_t efer) +{ + uint64_t ctrl; + + /* + * If the "load EFER" VM-entry control is 1 (which we require) then the + * value of EFER.LMA must be identical to "IA-32e mode guest" bit in the + * VM-entry control. + */ + ctrl = vmcs_read(VMCS_ENTRY_CTLS); + if ((efer & EFER_LMA) != 0) { + ctrl |= VM_ENTRY_GUEST_LMA; + } else { + ctrl &= ~VM_ENTRY_GUEST_LMA; + } + vmcs_write(VMCS_ENTRY_CTLS, ctrl); +} + static int vmx_emulate_cr0_access(struct vmx *vmx, int vcpu, uint64_t exitqual) { @@ -1655,20 +1673,14 @@ vmx_emulate_cr0_access(struct vmx *vmx, int vcpu, uint64_t exitqual) vmcs_write(VMCS_GUEST_CR0, crval); if (regval & CR0_PG) { - uint64_t efer, entry_ctls; + uint64_t efer; - /* - * If CR0.PG is 1 and EFER.LME is 1 then EFER.LMA and - * the "IA-32e mode guest" bit in VM-entry control must be - * equal. - */ + /* Keep EFER.LMA properly updated if paging is enabled */ efer = vmcs_read(VMCS_GUEST_IA32_EFER); if (efer & EFER_LME) { efer |= EFER_LMA; vmcs_write(VMCS_GUEST_IA32_EFER, efer); - entry_ctls = vmcs_read(VMCS_ENTRY_CTLS); - entry_ctls |= VM_ENTRY_GUEST_LMA; - vmcs_write(VMCS_ENTRY_CTLS, entry_ctls); + vmx_sync_efer_state(vmx, vcpu, efer); } } @@ -2934,6 +2946,44 @@ vmx_vmcleanup(void *arg) kmem_free(vmx, sizeof (*vmx)); } +/* + * Ensure that the VMCS for this vcpu is loaded. + * Returns true if a VMCS load was required. + */ +static bool +vmx_vmcs_access_ensure(struct vmx *vmx, int vcpu) +{ + int hostcpu; + + if (vcpu_is_running(vmx->vm, vcpu, &hostcpu)) { + if (hostcpu != curcpu) { + panic("unexpected vcpu migration %d != %d", + hostcpu, curcpu); + } + /* Earlier logic already took care of the load */ + return (false); + } else { + vmcs_load(vmx->vmcs_pa[vcpu]); + return (true); + } +} + +static void +vmx_vmcs_access_done(struct vmx *vmx, int vcpu) +{ + int hostcpu; + + if (vcpu_is_running(vmx->vm, vcpu, &hostcpu)) { + if (hostcpu != curcpu) { + panic("unexpected vcpu migration %d != %d", + hostcpu, curcpu); + } + /* Later logic will take care of the unload */ + } else { + vmcs_clear(vmx->vmcs_pa[vcpu]); + } +} + static uint64_t * vmxctx_regptr(struct vmxctx *vmxctx, int reg) { @@ -2989,25 +3039,18 @@ vmxctx_regptr(struct vmxctx *vmxctx, int reg) static int vmx_getreg(void *arg, int vcpu, int reg, uint64_t *retval) { - int running, hostcpu, err; struct vmx *vmx = arg; uint64_t *regp; - running = vcpu_is_running(vmx->vm, vcpu, &hostcpu); - if (running && hostcpu != curcpu) - panic("vmx_getreg: %d is running", vcpu); - /* VMCS access not required for ctx reads */ if ((regp = vmxctx_regptr(&vmx->ctx[vcpu], reg)) != NULL) { *retval = *regp; return (0); } - if (!running) { - vmcs_load(vmx->vmcs_pa[vcpu]); - } + bool vmcs_loaded = vmx_vmcs_access_ensure(vmx, vcpu); + int err = 0; - err = 0; if (reg == VM_REG_GUEST_INTR_SHADOW) { uint64_t gi = vmcs_read(VMCS_GUEST_INTERRUPTIBILITY); *retval = (gi & HWINTR_BLOCKING) ? 1 : 0; @@ -3035,33 +3078,26 @@ vmx_getreg(void *arg, int vcpu, int reg, uint64_t *retval) } } - if (!running) { - vmcs_clear(vmx->vmcs_pa[vcpu]); + if (vmcs_loaded) { + vmx_vmcs_access_done(vmx, vcpu); } - return (err); } static int vmx_setreg(void *arg, int vcpu, int reg, uint64_t val) { - int running, hostcpu, error; struct vmx *vmx = arg; uint64_t *regp; - running = vcpu_is_running(vmx->vm, vcpu, &hostcpu); - if (running && hostcpu != curcpu) - panic("vmx_setreg: %d is running", vcpu); - /* VMCS access not required for ctx writes */ if ((regp = vmxctx_regptr(&vmx->ctx[vcpu], reg)) != NULL) { *regp = val; return (0); } - if (!running) { - vmcs_load(vmx->vmcs_pa[vcpu]); - } + bool vmcs_loaded = vmx_vmcs_access_ensure(vmx, vcpu); + int err = 0; if (reg == VM_REG_GUEST_INTR_SHADOW) { if (val != 0) { @@ -3069,39 +3105,24 @@ vmx_setreg(void *arg, int vcpu, int reg, uint64_t val) * Forcing the vcpu into an interrupt shadow is not * presently supported. */ - error = EINVAL; + err = EINVAL; } else { uint64_t gi; gi = vmcs_read(VMCS_GUEST_INTERRUPTIBILITY); gi &= ~HWINTR_BLOCKING; vmcs_write(VMCS_GUEST_INTERRUPTIBILITY, gi); - error = 0; + err = 0; } } else { uint32_t encoding; - error = 0; + err = 0; encoding = vmcs_field_encoding(reg); switch (encoding) { case VMCS_GUEST_IA32_EFER: - /* - * If the "load EFER" VM-entry control is 1 then the - * value of EFER.LMA must be identical to "IA-32e mode - * guest" bit in the VM-entry control. - */ - if ((entry_ctls & VM_ENTRY_LOAD_EFER) != 0) { - uint64_t ctls; - - ctls = vmcs_read(VMCS_ENTRY_CTLS); - if (val & EFER_LMA) { - ctls |= VM_ENTRY_GUEST_LMA; - } else { - ctls &= ~VM_ENTRY_GUEST_LMA; - } - vmcs_write(VMCS_ENTRY_CTLS, ctls); - } vmcs_write(encoding, val); + vmx_sync_efer_state(vmx, vcpu, val); break; case VMCS_GUEST_CR0: /* @@ -3130,10 +3151,11 @@ vmx_setreg(void *arg, int vcpu, int reg, uint64_t val) * XXX the processor retains global mappings when %cr3 * is updated but vmx_invvpid() does not. */ - vmx_invvpid(vmx, vcpu, running); + vmx_invvpid(vmx, vcpu, + vcpu_is_running(vmx->vm, vcpu, NULL)); break; case VMCS_INVALID_ENCODING: - error = EINVAL; + err = EINVAL; break; default: vmcs_write(encoding, val); @@ -3141,27 +3163,19 @@ vmx_setreg(void *arg, int vcpu, int reg, uint64_t val) } } - if (!running) { - vmcs_clear(vmx->vmcs_pa[vcpu]); + if (vmcs_loaded) { + vmx_vmcs_access_done(vmx, vcpu); } - - return (error); + return (err); } static int vmx_getdesc(void *arg, int vcpu, int seg, struct seg_desc *desc) { - int hostcpu, running; struct vmx *vmx = arg; uint32_t base, limit, access; - running = vcpu_is_running(vmx->vm, vcpu, &hostcpu); - if (running && hostcpu != curcpu) - panic("vmx_getdesc: %d is running", vcpu); - - if (!running) { - vmcs_load(vmx->vmcs_pa[vcpu]); - } + bool vmcs_loaded = vmx_vmcs_access_ensure(vmx, vcpu); vmcs_seg_desc_encoding(seg, &base, &limit, &access); desc->base = vmcs_read(base); @@ -3172,8 +3186,8 @@ vmx_getdesc(void *arg, int vcpu, int seg, struct seg_desc *desc) desc->access = 0; } - if (!running) { - vmcs_clear(vmx->vmcs_pa[vcpu]); + if (vmcs_loaded) { + vmx_vmcs_access_done(vmx, vcpu); } return (0); } @@ -3181,17 +3195,10 @@ vmx_getdesc(void *arg, int vcpu, int seg, struct seg_desc *desc) static int vmx_setdesc(void *arg, int vcpu, int seg, const struct seg_desc *desc) { - int hostcpu, running; struct vmx *vmx = arg; uint32_t base, limit, access; - running = vcpu_is_running(vmx->vm, vcpu, &hostcpu); - if (running && hostcpu != curcpu) - panic("vmx_setdesc: %d is running", vcpu); - - if (!running) { - vmcs_load(vmx->vmcs_pa[vcpu]); - } + bool vmcs_loaded = vmx_vmcs_access_ensure(vmx, vcpu); vmcs_seg_desc_encoding(seg, &base, &limit, &access); vmcs_write(base, desc->base); @@ -3200,12 +3207,94 @@ vmx_setdesc(void *arg, int vcpu, int seg, const struct seg_desc *desc) vmcs_write(access, desc->access); } - if (!running) { - vmcs_clear(vmx->vmcs_pa[vcpu]); + if (vmcs_loaded) { + vmx_vmcs_access_done(vmx, vcpu); } return (0); } +static uint64_t * +vmx_msr_ptr(struct vmx *vmx, int vcpu, uint32_t msr) +{ + uint64_t *guest_msrs = vmx->guest_msrs[vcpu]; + + switch (msr) { + case MSR_LSTAR: + return (&guest_msrs[IDX_MSR_LSTAR]); + case MSR_CSTAR: + return (&guest_msrs[IDX_MSR_CSTAR]); + case MSR_STAR: + return (&guest_msrs[IDX_MSR_STAR]); + case MSR_SF_MASK: + return (&guest_msrs[IDX_MSR_SF_MASK]); + case MSR_KGSBASE: + return (&guest_msrs[IDX_MSR_KGSBASE]); + case MSR_PAT: + return (&guest_msrs[IDX_MSR_PAT]); + default: + return (NULL); + } +} + +static int +vmx_msr_get(void *arg, int vcpu, uint32_t msr, uint64_t *valp) +{ + struct vmx *vmx = arg; + + ASSERT(valp != NULL); + + const uint64_t *msrp = vmx_msr_ptr(vmx, vcpu, msr); + if (msrp != NULL) { + *valp = *msrp; + return (0); + } + + const uint32_t vmcs_enc = vmcs_msr_encoding(msr); + if (vmcs_enc != VMCS_INVALID_ENCODING) { + bool vmcs_loaded = vmx_vmcs_access_ensure(vmx, vcpu); + + *valp = vmcs_read(vmcs_enc); + + if (vmcs_loaded) { + vmx_vmcs_access_done(vmx, vcpu); + } + return (0); + } + + return (EINVAL); +} + +static int +vmx_msr_set(void *arg, int vcpu, uint32_t msr, uint64_t val) +{ + struct vmx *vmx = arg; + + /* TODO: mask value */ + + uint64_t *msrp = vmx_msr_ptr(vmx, vcpu, msr); + if (msrp != NULL) { + *msrp = val; + return (0); + } + + const uint32_t vmcs_enc = vmcs_msr_encoding(msr); + if (vmcs_enc != VMCS_INVALID_ENCODING) { + bool vmcs_loaded = vmx_vmcs_access_ensure(vmx, vcpu); + + vmcs_write(vmcs_enc, val); + + if (msr == MSR_EFER) { + vmx_sync_efer_state(vmx, vcpu, val); + } + + if (vmcs_loaded) { + vmx_vmcs_access_done(vmx, vcpu); + } + return (0); + } + return (EINVAL); +} + static int vmx_getcap(void *arg, int vcpu, int type, int *retval) { @@ -3711,6 +3800,9 @@ struct vmm_ops vmm_ops_intel = { .vmsavectx = vmx_savectx, .vmrestorectx = vmx_restorectx, + + .vmgetmsr = vmx_msr_get, + .vmsetmsr = vmx_msr_set, }; /* Side-effect free HW validation derived from checks in vmx_init. */ diff --git a/usr/src/uts/intel/io/vmm/sys/vmm_kernel.h b/usr/src/uts/intel/io/vmm/sys/vmm_kernel.h index bc7f1bb0f2..1dba79a7bf 100644 --- a/usr/src/uts/intel/io/vmm/sys/vmm_kernel.h +++ b/usr/src/uts/intel/io/vmm/sys/vmm_kernel.h @@ -90,6 +90,11 @@ typedef void (*vmi_vlapic_cleanup)(void *vmi, struct vlapic *vlapic); typedef void (*vmi_savectx)(void *vmi, int vcpu); typedef void (*vmi_restorectx)(void *vmi, int vcpu); +typedef int (*vmi_get_msr_t)(void *vmi, int vcpu, uint32_t msr, + uint64_t *valp); +typedef int (*vmi_set_msr_t)(void *vmi, int vcpu, uint32_t msr, + uint64_t val); + struct vmm_ops { vmm_init_func_t init; /* module wide initialization */ vmm_cleanup_func_t cleanup; @@ -109,6 +114,9 @@ struct vmm_ops { vmi_savectx vmsavectx; vmi_restorectx vmrestorectx; + + vmi_get_msr_t vmgetmsr; + vmi_set_msr_t vmsetmsr; }; extern struct vmm_ops vmm_ops_intel; @@ -379,6 +387,19 @@ typedef enum vm_msr_result { VMR_UNHANLDED = 2, /* handle in userspace, kernel cannot emulate */ } vm_msr_result_t; +enum vm_cpuid_capability { + VCC_NONE, + VCC_NO_EXECUTE, + VCC_FFXSR, + VCC_TCE, + VCC_LAST +}; + +int x86_emulate_cpuid(struct vm *, int, uint64_t *, uint64_t *, uint64_t *, + uint64_t *); +bool vm_cpuid_capability(struct vm *, int, enum vm_cpuid_capability); +bool validate_guest_xcr0(uint64_t, uint64_t); + void vmm_sol_glue_init(void); void vmm_sol_glue_cleanup(void); @@ -445,6 +466,7 @@ typedef struct vmm_data_req { uint32_t vdr_flags; uint32_t vdr_len; void *vdr_data; + uint32_t *vdr_result_len; } vmm_data_req_t; typedef struct vmm_data_req vmm_data_req_t; @@ -455,6 +477,7 @@ typedef struct vmm_data_version_entry { uint16_t vdve_class; uint16_t vdve_version; uint16_t vdve_len_expect; + uint16_t vdve_len_per_item; vmm_data_readf_t vdve_readf; vmm_data_writef_t vdve_writef; } vmm_data_version_entry_t; diff --git a/usr/src/uts/intel/io/vmm/vmm.c b/usr/src/uts/intel/io/vmm/vmm.c index 565dcbbe0a..e85d84d0b6 100644 --- a/usr/src/uts/intel/io/vmm/vmm.c +++ b/usr/src/uts/intel/io/vmm/vmm.c @@ -248,6 +248,8 @@ static struct vmm_ops vmm_ops_null = { .vlapic_cleanup = (vmi_vlapic_cleanup)nullop_panic, .vmsavectx = (vmi_savectx)nullop_panic, .vmrestorectx = (vmi_restorectx)nullop_panic, + .vmgetmsr = (vmi_get_msr_t)nullop_panic, + .vmsetmsr = (vmi_set_msr_t)nullop_panic, }; static struct vmm_ops *ops = &vmm_ops_null; @@ -1102,38 +1104,51 @@ vm_assign_pptdev(struct vm *vm, int pptfd) } int -vm_get_register(struct vm *vm, int vcpu, int reg, uint64_t *retval) +vm_get_register(struct vm *vm, int vcpuid, int reg, uint64_t *retval) { - - if (vcpu < 0 || vcpu >= vm->maxcpus) + if (vcpuid < 0 || vcpuid >= vm->maxcpus) return (EINVAL); if (reg >= VM_REG_LAST) return (EINVAL); - return (VMGETREG(vm->cookie, vcpu, reg, retval)); + struct vcpu *vcpu = &vm->vcpu[vcpuid]; + switch (reg) { + case VM_REG_GUEST_XCR0: + *retval = vcpu->guest_xcr0; + return (0); + default: + return (VMGETREG(vm->cookie, vcpuid, reg, retval)); + } } int vm_set_register(struct vm *vm, int vcpuid, int reg, uint64_t val) { - struct vcpu *vcpu; - int error; - if (vcpuid < 0 || vcpuid >= vm->maxcpus) return (EINVAL); if (reg >= VM_REG_LAST) return (EINVAL); - error = VMSETREG(vm->cookie, vcpuid, reg, val); - if (error || reg != VM_REG_GUEST_RIP) + int error; + struct vcpu *vcpu = &vm->vcpu[vcpuid]; + switch (reg) { + case VM_REG_GUEST_RIP: + error = VMSETREG(vm->cookie, vcpuid, reg, val); + if (error == 0) { + vcpu->nextrip = val; + } return (error); - - /* Set 'nextrip' to match the value of %rip */ - vcpu = &vm->vcpu[vcpuid]; - vcpu->nextrip = val; - return (0); + case VM_REG_GUEST_XCR0: + if (!validate_guest_xcr0(val, vmm_get_host_xcr0())) { + return (EINVAL); + } + vcpu->guest_xcr0 = val; + return (0); + default: + return (VMSETREG(vm->cookie, vcpuid, reg, val)); + } } static bool @@ -1864,7 +1879,7 @@ vm_handle_run_state(struct vm *vm, int vcpuid) } static int -vm_rdmtrr(struct vm_mtrr *mtrr, uint32_t num, uint64_t *val) +vm_rdmtrr(const struct vm_mtrr *mtrr, uint32_t num, uint64_t *val) { switch (num) { case MSR_MTRRcap: @@ -1945,6 +1960,22 @@ vm_wrmtrr(struct vm_mtrr *mtrr, uint32_t num, uint64_t val) return (0); } +static bool +is_mtrr_msr(uint32_t msr) +{ + switch (msr) { + case MSR_MTRRcap: + case MSR_MTRRdefType: + case MSR_MTRR4kBase ... MSR_MTRR4kBase + 7: + case MSR_MTRR16kBase ... MSR_MTRR16kBase + 1: + case MSR_MTRR64kBase: + case MSR_MTRRVarBase ... MSR_MTRRVarBase + (VMM_MTRR_VAR_MAX * 2) - 1: + return (true); + default: + return (false); + } +} + static int vm_handle_rdmsr(struct vm *vm, int vcpuid, struct vm_exit *vme) { @@ -3702,17 +3733,20 @@ vmm_data_is_cpu_specific(uint16_t data_class) case VDC_MSR: case VDC_FPU: case VDC_LAPIC: - case VDC_VMM_ARCH: return (true); default: return (false); } } -static const vmm_data_version_entry_t * -vmm_data_find(const vmm_data_req_t *req, int *err) +static int +vmm_data_find(const vmm_data_req_t *req, const vmm_data_version_entry_t **resp) { const vmm_data_version_entry_t **vdpp, *vdp; + + ASSERT(resp != NULL); + ASSERT(req->vdr_result_len != NULL); + SET_FOREACH(vdpp, vmm_data_version_entries) { vdp = *vdpp; if (vdp->vdve_class == req->vdr_class && @@ -3722,15 +3756,15 @@ vmm_data_find(const vmm_data_req_t *req, int *err) * provider for this data. */ if (vdp->vdve_len_expect != 0 && - vdp->vdve_len_expect != req->vdr_len) { - *err = ENOSPC; - return (NULL); + vdp->vdve_len_expect > req->vdr_len) { + *req->vdr_result_len = vdp->vdve_len_expect; + return (ENOSPC); } - return (vdp); + *resp = vdp; + return (0); } } - *err = EINVAL; - return (NULL); + return (EINVAL); } static void * @@ -3740,10 +3774,11 @@ vmm_data_from_class(const vmm_data_req_t *req, struct vm *vm, int vcpuid) /* per-cpu data/devices */ case VDC_LAPIC: return (vm_lapic(vm, vcpuid)); + case VDC_VMM_ARCH: + return (vm); case VDC_FPU: case VDC_REGISTER: - case VDC_VMM_ARCH: case VDC_MSR: /* * These have per-CPU handling which is dispatched outside @@ -3771,6 +3806,356 @@ vmm_data_from_class(const vmm_data_req_t *req, struct vm *vm, int vcpuid) } } +const uint32_t arch_msr_iter[] = { + MSR_EFER, + + /* + * While gsbase and fsbase are accessible via the MSR accessors, they + * are not included in MSR iteration since they are covered by the + * segment descriptor interface too. + */ + MSR_KGSBASE, + + MSR_STAR, + MSR_LSTAR, + MSR_CSTAR, + MSR_SF_MASK, + + MSR_SYSENTER_CS_MSR, + MSR_SYSENTER_ESP_MSR, + MSR_SYSENTER_EIP_MSR, + MSR_PAT, +}; +const uint32_t generic_msr_iter[] = { + MSR_TSC, + MSR_MTRRcap, + MSR_MTRRdefType, + + MSR_MTRR4kBase, MSR_MTRR4kBase + 1, MSR_MTRR4kBase + 2, + MSR_MTRR4kBase + 3, MSR_MTRR4kBase + 4, MSR_MTRR4kBase + 5, + MSR_MTRR4kBase + 6, MSR_MTRR4kBase + 7, + + MSR_MTRR16kBase, MSR_MTRR16kBase + 1, + + MSR_MTRR64kBase, +}; + +static int +vmm_data_read_msrs(struct vm *vm, int vcpuid, const vmm_data_req_t *req) +{ + VERIFY3U(req->vdr_class, ==, VDC_MSR); + VERIFY3U(req->vdr_version, ==, 1); + + const uint_t num_msrs = nitems(arch_msr_iter) + nitems(generic_msr_iter) + + (VMM_MTRR_VAR_MAX * 2); + const uint32_t output_len = + num_msrs * sizeof (struct vdi_field_entry_v1); + *req->vdr_result_len = output_len; + + if (req->vdr_len < output_len) { + return (ENOSPC); + } + + struct vdi_field_entry_v1 *entryp = req->vdr_data; + for (uint_t i = 0; i < nitems(arch_msr_iter); i++, entryp++) { + const uint32_t msr = arch_msr_iter[i]; + uint64_t val = 0; + + int err = ops->vmgetmsr(vm->cookie, vcpuid, msr, &val); + /* All of these MSRs are expected to work */ + VERIFY0(err); + entryp->vfe_ident = msr; + entryp->vfe_value = val; + } + + struct vm_mtrr *mtrr = &vm->vcpu[vcpuid].mtrr; + for (uint_t i = 0; i < nitems(generic_msr_iter); i++, entryp++) { + const uint32_t msr = generic_msr_iter[i]; + + entryp->vfe_ident = msr; + switch (msr) { + case MSR_TSC: + /* + * Communicate this as the difference from the VM-wide + * offset of the boot time. + */ + entryp->vfe_value = vm->vcpu[vcpuid].tsc_offset; + break; + case MSR_MTRRcap: + case MSR_MTRRdefType: + case MSR_MTRR4kBase ... MSR_MTRR4kBase + 7: + case MSR_MTRR16kBase ... MSR_MTRR16kBase + 1: + case MSR_MTRR64kBase: { + int err = vm_rdmtrr(mtrr, msr, &entryp->vfe_value); + VERIFY0(err); + break; + } + default: + panic("unexpected msr export %x", msr); + } + } + /* Copy the variable MTRRs */ + for (uint_t i = 0; i < (VMM_MTRR_VAR_MAX * 2); i++, entryp++) { + const uint32_t msr = MSR_MTRRVarBase + i; + + entryp->vfe_ident = msr; + int err = vm_rdmtrr(mtrr, msr, &entryp->vfe_value); + VERIFY0(err); + } + return (0); +} + +static int +vmm_data_write_msrs(struct vm *vm, int vcpuid, const vmm_data_req_t *req) +{ + VERIFY3U(req->vdr_class, ==, VDC_MSR); + VERIFY3U(req->vdr_version, ==, 1); + + const struct vdi_field_entry_v1 *entryp = req->vdr_data; + const uint_t entry_count = + req->vdr_len / sizeof (struct vdi_field_entry_v1); + struct vm_mtrr *mtrr = &vm->vcpu[vcpuid].mtrr; + + /* + * First make sure that all of the MSRs can be manipulated. + * For now, this check is done by going though the getmsr handler + */ + for (uint_t i = 0; i < entry_count; i++, entryp++) { + const uint32_t msr = entryp->vfe_ident; + uint64_t val; + int err = 0; + + switch (msr) { + case MSR_TSC: + break; + default: + if (is_mtrr_msr(msr)) { + err = vm_rdmtrr(mtrr, msr, &val); + } else { + err = ops->vmgetmsr(vm->cookie, vcpuid, msr, + &val); + } + break; + } + if (err != 0) { + return (err); + } + } + + /* + * Fairly confident that all of the 'set' operations are at least + * targeting valid MSRs, continue on. + */ + entryp = req->vdr_data; + for (uint_t i = 0; i < entry_count; i++, entryp++) { + const uint32_t msr = entryp->vfe_ident; + const uint64_t val = entryp->vfe_value; + int err = 0; + + switch (msr) { + case MSR_TSC: + vm->vcpu[vcpuid].tsc_offset = entryp->vfe_value; + break; + default: + if (is_mtrr_msr(msr)) { + if (msr == MSR_MTRRcap) { + /* + * MTRRcap is read-only. If the current + * value matches the incoming one, + * consider it a success + */ + uint64_t comp; + err = vm_rdmtrr(mtrr, msr, &comp); + if (err != 0 || comp != val) { + err = EINVAL; + } + } else { + err = vm_wrmtrr(mtrr, msr, val); + } + } else { + err = ops->vmsetmsr(vm->cookie, vcpuid, msr, + val); + } + break; + } + if (err != 0) { + return (err); + } + } + *req->vdr_result_len = entry_count * sizeof (struct vdi_field_entry_v1); + + return (0); +} + +static const vmm_data_version_entry_t msr_v1 = { + .vdve_class = VDC_MSR, + .vdve_version = 1, + .vdve_len_per_item = sizeof (struct vdi_field_entry_v1), + /* Requires backend-specific dispatch */ + .vdve_readf = NULL, + .vdve_writef = NULL, +}; +VMM_DATA_VERSION(msr_v1); + +static const uint32_t vmm_arch_v1_fields[] = { + VAI_TSC_BOOT_OFFSET, + VAI_BOOT_HRTIME, + VAI_TSC_FREQ, +}; + +static bool +vmm_read_arch_field(struct vm *vm, uint32_t ident, uint64_t *valp) +{ + ASSERT(valp != NULL); + + switch (ident) { + case VAI_TSC_BOOT_OFFSET: + *valp = vm->boot_tsc_offset; + return (true); + case VAI_BOOT_HRTIME: + *valp = vm->boot_hrtime; + return (true); + case VAI_TSC_FREQ: + /* + * Since the system TSC calibration is not public, just derive + * it from the scaling functions available. + */ + *valp = unscalehrtime(NANOSEC); + return (true); + default: + break; + } + return (false); +} + +static int +vmm_data_read_vmm_arch(void *arg, const vmm_data_req_t *req) +{ + struct vm *vm = arg; + + VERIFY3U(req->vdr_class, ==, VDC_VMM_ARCH); + VERIFY3U(req->vdr_version, ==, 1); + + struct vdi_field_entry_v1 *entryp = req->vdr_data; + + /* Specific fields requested */ + if ((req->vdr_flags & VDX_FLAG_READ_COPYIN) != 0) { + const uint_t count = + req->vdr_len / sizeof (struct vdi_field_entry_v1); + + for (uint_t i = 0; i < count; i++, entryp++) { + if (!vmm_read_arch_field(vm, entryp->vfe_ident, + &entryp->vfe_value)) { + return (EINVAL); + } + } + *req->vdr_result_len = + count * sizeof (struct vdi_field_entry_v1); + return (0); + } + + /* Emit all of the possible values */ + const uint32_t total_size = nitems(vmm_arch_v1_fields) * + sizeof (struct vdi_field_entry_v1); + *req->vdr_result_len = total_size; + if (req->vdr_len < total_size) { + return (ENOSPC); + } + for (uint_t i = 0; i < nitems(vmm_arch_v1_fields); i++, entryp++) { + entryp->vfe_ident = vmm_arch_v1_fields[i]; + VERIFY(vmm_read_arch_field(vm, entryp->vfe_ident, + &entryp->vfe_value)); + } + return (0); +} + +static int +vmm_data_write_vmm_arch(void *arg, const vmm_data_req_t *req) +{ + struct vm *vm = arg; + + VERIFY3U(req->vdr_class, ==, VDC_VMM_ARCH); + VERIFY3U(req->vdr_version, ==, 1); + + const struct vdi_field_entry_v1 *entryp = req->vdr_data; + const uint_t entry_count = + req->vdr_len / sizeof (struct vdi_field_entry_v1); + + for (uint_t i = 0; i < entry_count; i++, entryp++) { + const uint64_t val = entryp->vfe_value; + + switch (entryp->vfe_ident) { + case VAI_TSC_BOOT_OFFSET: + vm->boot_tsc_offset = val; + break; + case VAI_BOOT_HRTIME: + vm->boot_hrtime = val; + break; + case VAI_TSC_FREQ: + /* Guest TSC frequency not (currently) adjustable */ + return (EPERM); + default: + return (EINVAL); + } + } + *req->vdr_result_len = entry_count * sizeof (struct vdi_field_entry_v1); + return (0); +} + +static const vmm_data_version_entry_t vmm_arch_v1 = { + .vdve_class = VDC_VMM_ARCH, + .vdve_version = 1, + .vdve_len_per_item = sizeof (struct vdi_field_entry_v1), + .vdve_readf = vmm_data_read_vmm_arch, + .vdve_writef = vmm_data_write_vmm_arch, +}; +VMM_DATA_VERSION(vmm_arch_v1); + +static int +vmm_data_read_versions(void *arg, const vmm_data_req_t *req) +{ + VERIFY3U(req->vdr_class, ==, VDC_VERSION); + VERIFY3U(req->vdr_version, ==, 1); + + const uint32_t total_size = SET_COUNT(vmm_data_version_entries) * + sizeof (struct vdi_version_entry_v1); + + /* Make sure there is room for all of the entries */ + *req->vdr_result_len = total_size; + if (req->vdr_len < *req->vdr_result_len) { + return (ENOSPC); + } + + struct vdi_version_entry_v1 *entryp = req->vdr_data; + const vmm_data_version_entry_t **vdpp; + SET_FOREACH(vdpp, vmm_data_version_entries) { + const vmm_data_version_entry_t *vdp = *vdpp; + + entryp->vve_class = vdp->vdve_class; + entryp->vve_version = vdp->vdve_version; + entryp->vve_len_expect = vdp->vdve_len_expect; + entryp->vve_len_per_item = vdp->vdve_len_per_item; + entryp++; + } + return (0); +} + +static int +vmm_data_write_versions(void *arg, const vmm_data_req_t *req) +{ + /* Writing to the version information makes no sense */ + return (EPERM); +} + +static const vmm_data_version_entry_t versions_v1 = { + .vdve_class = VDC_VERSION, + .vdve_version = 1, + .vdve_len_per_item = sizeof (struct vdi_version_entry_v1), + .vdve_readf = vmm_data_read_versions, + .vdve_writef = vmm_data_write_versions, +}; +VMM_DATA_VERSION(versions_v1); + int vmm_data_read(struct vm *vm, int vcpuid, const vmm_data_req_t *req) { @@ -3782,28 +4167,34 @@ vmm_data_read(struct vm *vm, int vcpuid, const vmm_data_req_t *req) } } - const vmm_data_version_entry_t *entry; - entry = vmm_data_find(req, &err); - if (entry == NULL) { - ASSERT(err != 0); + const vmm_data_version_entry_t *entry = NULL; + err = vmm_data_find(req, &entry); + if (err != 0) { return (err); } + ASSERT(entry != NULL); void *datap = vmm_data_from_class(req, vm, vcpuid); if (datap != NULL) { err = entry->vdve_readf(datap, req); + + /* + * Successful reads of fixed-length data should populate the + * length of that result. + */ + if (err == 0 && entry->vdve_len_expect != 0) { + *req->vdr_result_len = entry->vdve_len_expect; + } } else { switch (req->vdr_class) { + case VDC_MSR: + err = vmm_data_read_msrs(vm, vcpuid, req); + break; case VDC_FPU: /* TODO: wire up to xsave export via hma_fpu iface */ err = EINVAL; break; case VDC_REGISTER: - case VDC_VMM_ARCH: - case VDC_MSR: - /* TODO: implement */ - err = EINVAL; - break; default: err = EINVAL; break; @@ -3824,28 +4215,33 @@ vmm_data_write(struct vm *vm, int vcpuid, const vmm_data_req_t *req) } } - const vmm_data_version_entry_t *entry; - entry = vmm_data_find(req, &err); - if (entry == NULL) { - ASSERT(err != 0); + const vmm_data_version_entry_t *entry = NULL; + err = vmm_data_find(req, &entry); + if (err != 0) { return (err); } + ASSERT(entry != NULL); void *datap = vmm_data_from_class(req, vm, vcpuid); if (datap != NULL) { err = entry->vdve_writef(datap, req); + /* + * Successful writes of fixed-length data should populate the + * length of that result. + */ + if (err == 0 && entry->vdve_len_expect != 0) { + *req->vdr_result_len = entry->vdve_len_expect; + } } else { switch (req->vdr_class) { + case VDC_MSR: + err = vmm_data_write_msrs(vm, vcpuid, req); + break; case VDC_FPU: /* TODO: wire up to xsave import via hma_fpu iface */ err = EINVAL; break; case VDC_REGISTER: - case VDC_VMM_ARCH: - case VDC_MSR: - /* TODO: implement */ - err = EINVAL; - break; default: err = EINVAL; break; diff --git a/usr/src/uts/intel/io/vmm/vmm_sol_dev.c b/usr/src/uts/intel/io/vmm/vmm_sol_dev.c index 9a4693fc78..ee07779b21 100644 --- a/usr/src/uts/intel/io/vmm/vmm_sol_dev.c +++ b/usr/src/uts/intel/io/vmm/vmm_sol_dev.c @@ -1550,32 +1550,48 @@ vmmdev_do_ioctl(vmm_softc_t *sc, int cmd, intptr_t arg, int md, } const size_t len = vdx.vdx_len; - void *buf = kmem_alloc(len, KM_SLEEP); - if ((vdx.vdx_flags & VDX_FLAG_READ_COPYIN) != 0) { - if (ddi_copyin(vdx.vdx_data, buf, len, md) != 0) { + void *buf = NULL; + if (len != 0) { + buf = kmem_alloc(len, KM_SLEEP); + if ((vdx.vdx_flags & VDX_FLAG_READ_COPYIN) != 0 && + ddi_copyin(vdx.vdx_data, buf, len, md) != 0) { kmem_free(buf, len); error = EFAULT; break; + } else { + bzero(buf, len); } - } else { - bzero(buf, len); } + vdx.vdx_result_len = 0; vmm_data_req_t req = { .vdr_class = vdx.vdx_class, .vdr_version = vdx.vdx_version, .vdr_flags = vdx.vdx_flags, - .vdr_len = vdx.vdx_len, + .vdr_len = len, .vdr_data = buf, + .vdr_result_len = &vdx.vdx_result_len, }; error = vmm_data_read(sc->vmm_vm, vdx.vdx_vcpuid, &req); - if (error == 0) { + if (error == 0 && buf != NULL) { if (ddi_copyout(buf, vdx.vdx_data, len, md) != 0) { error = EFAULT; } } - kmem_free(buf, len); + + /* + * Copy out the transfer request so that the value of + * vdx_result_len can be made available, regardless of any + * error(s) which may have occurred. + */ + if (ddi_copyout(&vdx, datap, sizeof (vdx), md) != 0) { + error = (error != 0) ? error : EFAULT; + } + + if (buf != NULL) { + kmem_free(buf, len); + } break; } case VM_DATA_WRITE: { @@ -1595,19 +1611,24 @@ vmmdev_do_ioctl(vmm_softc_t *sc, int cmd, intptr_t arg, int md, } const size_t len = vdx.vdx_len; - void *buf = kmem_alloc(len, KM_SLEEP); - if (ddi_copyin(vdx.vdx_data, buf, len, md) != 0) { - kmem_free(buf, len); - error = EFAULT; - break; + void *buf = NULL; + if (len != 0) { + buf = kmem_alloc(len, KM_SLEEP); + if (ddi_copyin(vdx.vdx_data, buf, len, md) != 0) { + kmem_free(buf, len); + error = EFAULT; + break; + } } + vdx.vdx_result_len = 0; vmm_data_req_t req = { .vdr_class = vdx.vdx_class, .vdr_version = vdx.vdx_version, .vdr_flags = vdx.vdx_flags, - .vdr_len = vdx.vdx_len, + .vdr_len = len, .vdr_data = buf, + .vdr_result_len = &vdx.vdx_result_len, }; if (vmm_allow_state_writes == 0) { /* XXX: Play it safe for now */ @@ -1617,13 +1638,25 @@ vmmdev_do_ioctl(vmm_softc_t *sc, int cmd, intptr_t arg, int md, &req); } - if (error == 0 && + if (error == 0 && buf != NULL && (vdx.vdx_flags & VDX_FLAG_WRITE_COPYOUT) != 0) { if (ddi_copyout(buf, vdx.vdx_data, len, md) != 0) { error = EFAULT; } } - kmem_free(buf, len); + + /* + * Copy out the transfer request so that the value of + * vdx_result_len can be made available, regardless of any + * error(s) which may have occurred. + */ + if (ddi_copyout(&vdx, datap, sizeof (vdx), md) != 0) { + error = (error != 0) ? error : EFAULT; + } + + if (buf != NULL) { + kmem_free(buf, len); + } break; } diff --git a/usr/src/uts/intel/io/vmm/x86.c b/usr/src/uts/intel/io/vmm/x86.c index de48ba1d48..e593e0c04e 100644 --- a/usr/src/uts/intel/io/vmm/x86.c +++ b/usr/src/uts/intel/io/vmm/x86.c @@ -58,10 +58,10 @@ __FBSDID("$FreeBSD$"); #include <machine/specialreg.h> #include <machine/vmm.h> +#include <sys/vmm_kernel.h> #include "vmm_host.h" #include "vmm_util.h" -#include "x86.h" SYSCTL_DECL(_hw_vmm); @@ -80,6 +80,42 @@ static int cpuid_leaf_b = 1; */ static int vmm_force_invariant_tsc = 0; +#define CPUID_0000_0000 (0x0) +#define CPUID_0000_0001 (0x1) +#define CPUID_0000_0002 (0x2) +#define CPUID_0000_0003 (0x3) +#define CPUID_0000_0004 (0x4) +#define CPUID_0000_0006 (0x6) +#define CPUID_0000_0007 (0x7) +#define CPUID_0000_000A (0xA) +#define CPUID_0000_000B (0xB) +#define CPUID_0000_000D (0xD) +#define CPUID_0000_000F (0xF) +#define CPUID_0000_0010 (0x10) +#define CPUID_0000_0015 (0x15) +#define CPUID_8000_0000 (0x80000000) +#define CPUID_8000_0001 (0x80000001) +#define CPUID_8000_0002 (0x80000002) +#define CPUID_8000_0003 (0x80000003) +#define CPUID_8000_0004 (0x80000004) +#define CPUID_8000_0006 (0x80000006) +#define CPUID_8000_0007 (0x80000007) +#define CPUID_8000_0008 (0x80000008) +#define CPUID_8000_001D (0x8000001D) +#define CPUID_8000_001E (0x8000001E) + +/* + * CPUID instruction Fn0000_0001: + */ +#define CPUID_0000_0001_APICID_MASK (0xff<<24) +#define CPUID_0000_0001_APICID_SHIFT 24 + +/* + * CPUID instruction Fn0000_0001 ECX + */ +#define CPUID_0000_0001_FEAT0_VMX (1<<5) + + /* * Round up to the next power of two, if necessary, and then take log2. * Returns -1 if argument is zero. @@ -649,6 +685,10 @@ default_leaf: return (1); } +/* + * Return 'true' if the capability 'cap' is enabled in this virtual cpu + * and 'false' otherwise. + */ bool vm_cpuid_capability(struct vm *vm, int vcpuid, enum vm_cpuid_capability cap) { @@ -690,3 +730,23 @@ vm_cpuid_capability(struct vm *vm, int vcpuid, enum vm_cpuid_capability cap) } return (rv); } + +bool +validate_guest_xcr0(uint64_t val, uint64_t limit_mask) +{ + /* x87 feature must be enabled */ + if ((val & XFEATURE_ENABLED_X87) == 0) { + return (false); + } + /* AVX cannot be enabled without SSE */ + if ((val & (XFEATURE_ENABLED_SSE | XFEATURE_ENABLED_AVX)) == + XFEATURE_ENABLED_SSE) { + return (false); + } + /* No bits should be outside what we dictate to be allowed */ + if ((val & ~limit_mask) != 0) { + return (false); + } + + return (true); +} diff --git a/usr/src/uts/intel/io/vmm/x86.h b/usr/src/uts/intel/io/vmm/x86.h deleted file mode 100644 index f3459e4f8a..0000000000 --- a/usr/src/uts/intel/io/vmm/x86.h +++ /dev/null @@ -1,85 +0,0 @@ -/*- - * SPDX-License-Identifier: BSD-2-Clause-FreeBSD - * - * Copyright (c) 2011 NetApp, Inc. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $FreeBSD$ - */ - -#ifndef _X86_H_ -#define _X86_H_ - -#define CPUID_0000_0000 (0x0) -#define CPUID_0000_0001 (0x1) -#define CPUID_0000_0002 (0x2) -#define CPUID_0000_0003 (0x3) -#define CPUID_0000_0004 (0x4) -#define CPUID_0000_0006 (0x6) -#define CPUID_0000_0007 (0x7) -#define CPUID_0000_000A (0xA) -#define CPUID_0000_000B (0xB) -#define CPUID_0000_000D (0xD) -#define CPUID_0000_000F (0xF) -#define CPUID_0000_0010 (0x10) -#define CPUID_0000_0015 (0x15) -#define CPUID_8000_0000 (0x80000000) -#define CPUID_8000_0001 (0x80000001) -#define CPUID_8000_0002 (0x80000002) -#define CPUID_8000_0003 (0x80000003) -#define CPUID_8000_0004 (0x80000004) -#define CPUID_8000_0006 (0x80000006) -#define CPUID_8000_0007 (0x80000007) -#define CPUID_8000_0008 (0x80000008) -#define CPUID_8000_001D (0x8000001D) -#define CPUID_8000_001E (0x8000001E) - -/* - * CPUID instruction Fn0000_0001: - */ -#define CPUID_0000_0001_APICID_MASK (0xff<<24) -#define CPUID_0000_0001_APICID_SHIFT 24 - -/* - * CPUID instruction Fn0000_0001 ECX - */ -#define CPUID_0000_0001_FEAT0_VMX (1<<5) - -int x86_emulate_cpuid(struct vm *vm, int vcpu_id, uint64_t *rax, uint64_t *rbx, - uint64_t *rcx, uint64_t *rdx); - -enum vm_cpuid_capability { - VCC_NONE, - VCC_NO_EXECUTE, - VCC_FFXSR, - VCC_TCE, - VCC_LAST -}; - -/* - * Return 'true' if the capability 'cap' is enabled in this virtual cpu - * and 'false' otherwise. - */ -bool vm_cpuid_capability(struct vm *vm, int vcpuid, enum vm_cpuid_capability); -#endif diff --git a/usr/src/uts/intel/sys/vmm.h b/usr/src/uts/intel/sys/vmm.h index 268b2e82ce..50d76ab17c 100644 --- a/usr/src/uts/intel/sys/vmm.h +++ b/usr/src/uts/intel/sys/vmm.h @@ -103,6 +103,7 @@ enum vm_reg_name { VM_REG_GUEST_DR3, VM_REG_GUEST_DR6, VM_REG_GUEST_ENTRY_INST_LENGTH, + VM_REG_GUEST_XCR0, VM_REG_LAST }; diff --git a/usr/src/uts/intel/sys/vmm_data.h b/usr/src/uts/intel/sys/vmm_data.h index 1b8614543c..9ba385c5d6 100644 --- a/usr/src/uts/intel/sys/vmm_data.h +++ b/usr/src/uts/intel/sys/vmm_data.h @@ -18,7 +18,6 @@ #define _VMM_DATA_H_ /* VMM Data Classes */ -#define VDC_META 0 /* Meta information about data system */ #define VDC_VERSION 1 /* Version information for each data class */ /* Classes bearing per-CPU data */ @@ -42,21 +41,27 @@ /* VMM Data Identifiers */ - -/* - * VDC_REGISTER: - */ - /* - * VDC_MSR: + * Generic field encoding for 64-bit (or smaller) data which are identified by a + * 32-bit (or smaller) name. * - * Use MSR identifiers directly + * Used by the following classes/version: + * - VDC_REGISTER v1: `vm_reg_name` identifiers + * - VDC_MSR v1: MSR identifiers + * - VDC_VMM_ARCH v1: Identifiers described below */ - -struct vdi_msr_entry_v1 { - uint32_t vme_msr; +struct vdi_field_entry_v1 { + uint32_t vfe_ident; uint32_t _pad; - uint64_t vme_value; + uint64_t vfe_value; +}; + +/* VDC_VERSION */ +struct vdi_version_entry_v1 { + uint16_t vve_class; + uint16_t vve_version; + uint16_t vve_len_expect; + uint16_t vve_len_per_item; }; /* @@ -98,11 +103,22 @@ struct vdi_lapic_v1 { uint32_t vl_esr_pending; }; - /* * VDC_VMM_ARCH: */ +/* + * Version 1 identifiers: + */ + +/* Offset of guest TSC from system at time of boot */ +#define VAI_TSC_BOOT_OFFSET 1 +/* Time that guest (nominally) booted, as hrtime */ +#define VAI_BOOT_HRTIME 2 +/* Guest TSC frequency measured by hrtime (not effected by wall clock adj.) */ +#define VAI_TSC_FREQ 3 + + /* VDC_IOAPIC: */ struct vdi_ioapic_v1 { diff --git a/usr/src/uts/intel/sys/vmm_dev.h b/usr/src/uts/intel/sys/vmm_dev.h index fc8ccf406e..8d1b2713dd 100644 --- a/usr/src/uts/intel/sys/vmm_dev.h +++ b/usr/src/uts/intel/sys/vmm_dev.h @@ -366,6 +366,7 @@ struct vm_data_xfer { uint16_t vdx_version; uint32_t vdx_flags; uint32_t vdx_len; + uint32_t vdx_result_len; void *vdx_data; }; @@ -384,7 +385,7 @@ struct vm_data_xfer { * best-effort activity. Nothing is to be inferred about the magnitude of a * change when the version is modified. It follows no rules like semver. */ -#define VMM_CURRENT_INTERFACE_VERSION 2 +#define VMM_CURRENT_INTERFACE_VERSION 3 #define VMMCTL_IOC_BASE (('V' << 16) | ('M' << 8)) |