diff options
author | Patrick Mooney <pmooney@pfmooney.com> | 2022-03-26 05:36:07 +0000 |
---|---|---|
committer | Patrick Mooney <pmooney@oxide.computer> | 2022-04-11 21:17:51 +0000 |
commit | d2f938fdf23aca7cabde8395625fdfaa2118133d (patch) | |
tree | 4ee383b7e19f9cfe76166ce39ebbafb287a1eaf8 | |
parent | 6de0af11ed5afd6a7c0ff23e31c1954813e3bf88 (diff) | |
download | illumos-joyent-d2f938fdf23aca7cabde8395625fdfaa2118133d.tar.gz |
14598 bhyve vlapic should handle APICBASE changes
14599 simplify bhyve MSR handling
Reviewed by: Dan Cross <cross@oxidecomputer.com>
Reviewed by: Luqman Aden <luqman@oxide.computer>
Approved by: Dan McDonald <danmcd@joyent.com>
28 files changed, 1332 insertions, 607 deletions
diff --git a/usr/src/pkg/manifests/system-bhyve-tests.p5m b/usr/src/pkg/manifests/system-bhyve-tests.p5m index 4222b1d09b..1a0ba140ce 100644 --- a/usr/src/pkg/manifests/system-bhyve-tests.p5m +++ b/usr/src/pkg/manifests/system-bhyve-tests.p5m @@ -30,11 +30,16 @@ file path=opt/bhyve-tests/bin/bhyvetest mode=0555 dir path=opt/bhyve-tests/runfiles file path=opt/bhyve-tests/runfiles/default.run mode=0444 dir path=opt/bhyve-tests/tests +dir path=opt/bhyve-tests/tests/inst_emul +file path=opt/bhyve-tests/tests/inst_emul/rdmsr mode=0555 +file path=opt/bhyve-tests/tests/inst_emul/wrmsr mode=0555 dir path=opt/bhyve-tests/tests/kdev file path=opt/bhyve-tests/tests/kdev/vatpit_freq mode=0555 file path=opt/bhyve-tests/tests/kdev/vhpet_freq mode=0555 file path=opt/bhyve-tests/tests/kdev/vlapic_freq mode=0555 file path=opt/bhyve-tests/tests/kdev/vlapic_freq_periodic mode=0555 +file path=opt/bhyve-tests/tests/kdev/vlapic_mmio_access mode=0555 +file path=opt/bhyve-tests/tests/kdev/vlapic_msr_access mode=0555 file path=opt/bhyve-tests/tests/kdev/vpmtmr_freq mode=0555 dir path=opt/bhyve-tests/tests/mevent file path=opt/bhyve-tests/tests/mevent/lists_delete mode=0555 diff --git a/usr/src/test/bhyve-tests/runfiles/default.run b/usr/src/test/bhyve-tests/runfiles/default.run index c37bee591d..476fa5f839 100644 --- a/usr/src/test/bhyve-tests/runfiles/default.run +++ b/usr/src/test/bhyve-tests/runfiles/default.run @@ -35,9 +35,18 @@ tests = [ 'vhpet_freq', 'vlapic_freq', 'vlapic_freq_periodic', + 'vlapic_mmio_access', + 'vlapic_msr_access', 'vpmtmr_freq' ] +[/opt/bhyve-tests/tests/inst_emul] +user = root +tests = [ + 'rdmsr', + 'wrmsr' + ] + # Tests of userspace mevent system, built from cmd/bhyve [/opt/bhyve-tests/tests/mevent] tests = ['lists_delete', 'read_disable', 'read_pause', 'read_requeue', diff --git a/usr/src/test/bhyve-tests/tests/Makefile b/usr/src/test/bhyve-tests/tests/Makefile index 8d528c3f80..5dc416db01 100644 --- a/usr/src/test/bhyve-tests/tests/Makefile +++ b/usr/src/test/bhyve-tests/tests/Makefile @@ -15,6 +15,6 @@ .PARALLEL: $(SUBDIRS) -SUBDIRS = kdev vmm +SUBDIRS = inst_emul kdev vmm include $(SRC)/test/Makefile.com diff --git a/usr/src/test/bhyve-tests/tests/common/in_guest.c b/usr/src/test/bhyve-tests/tests/common/in_guest.c index 31bebc0665..dbd6bdf22a 100644 --- a/usr/src/test/bhyve-tests/tests/common/in_guest.c +++ b/usr/src/test/bhyve-tests/tests/common/in_guest.c @@ -172,6 +172,17 @@ test_fail_vmexit(const struct vm_exit *vexit) vexit->u.inout.bytes, vexit->u.inout.flags); break; + case VM_EXITCODE_RDMSR: + (void) fprintf(stderr, hdr_fmt, "RDMSR", vexit->rip); + (void) fprintf(stderr, "\tcode: %08x\n", vexit->u.msr.code); + break; + case VM_EXITCODE_WRMSR: + (void) fprintf(stderr, hdr_fmt, "WRMSR", vexit->rip); + (void) fprintf(stderr, + "\tcode: %08x\n" + "\twval: %016lx\n", + vexit->u.msr.code, vexit->u.msr.wval); + break; case VM_EXITCODE_MMIO: (void) fprintf(stderr, hdr_fmt, "MMIO", vexit->rip); (void) fprintf(stderr, @@ -437,7 +448,7 @@ which_exit_kind(struct vm_entry *ventry, const struct vm_exit *vexit) case VM_EXITCODE_INOUT: if (inout->port == IOP_TEST_RESULT && (inout->flags & INOUT_IN) == 0) { - if (inout->eax == 0) { + if (inout->eax == TEST_RESULT_PASS) { return (VEK_TEST_PASS); } else { return (VEK_TEST_FAIL); diff --git a/usr/src/test/bhyve-tests/tests/common/payload_common.h b/usr/src/test/bhyve-tests/tests/common/payload_common.h index 895364f18e..4141cec219 100644 --- a/usr/src/test/bhyve-tests/tests/common/payload_common.h +++ b/usr/src/test/bhyve-tests/tests/common/payload_common.h @@ -41,4 +41,8 @@ #define IOP_TEST_PARAM2 0xef22U #define IOP_TEST_PARAM3 0xef23U +/* Expected values emitted through IOP_TEST_RESULT */ +#define TEST_RESULT_PASS 0 +#define TEST_RESULT_FAIL 1 + #endif /* _PAYLOAD_COMMON_H_ */ diff --git a/usr/src/test/bhyve-tests/tests/inst_emul/Makefile b/usr/src/test/bhyve-tests/tests/inst_emul/Makefile new file mode 100644 index 0000000000..2a9d30e67b --- /dev/null +++ b/usr/src/test/bhyve-tests/tests/inst_emul/Makefile @@ -0,0 +1,72 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# Copyright 2022 Oxide Computer Company + +include $(SRC)/cmd/Makefile.cmd +include $(SRC)/cmd/Makefile.cmd.64 +include $(SRC)/test/Makefile.com + +PROG = rdmsr \ + wrmsr + +PAYLOADS = $(PROG) +include ../Makefile.in_guest + +COMMON_OBJS = in_guest.o + +CLEANFILES = $(COMMON_OBJS) $(PAYLOAD_CLEANFILES) +CLOBBERFILES = $(PROG) + +ROOTOPTPKG = $(ROOT)/opt/bhyve-tests +TESTDIR = $(ROOTOPTPKG)/tests/inst_emul + +CMDS = $(PROG:%=$(TESTDIR)/%) +$(CMDS) := FILEMODE = 0555 + +CSTD= $(CSTD_GNU99) +CPPFLAGS = -I$(COMPAT)/bhyve -I$(CONTRIB)/bhyve \ + -I$(COMPAT)/bhyve/amd64 -I$(CONTRIB)/bhyve/amd64 \ + $(CPPFLAGS.master) \ + -I$(SRC)/uts/i86pc/io/vmm \ + -I$(SRC)/uts/i86pc \ + -I../common + +ASFLAGS += -P -D__STDC__ -D_ASM + + +CFLAGS = -m64 +$(PROG) := LDLIBS += -lvmmapi + +all: $(PROG) + +install: all $(CMDS) + +clean: + -$(RM) $(CLEANFILES) +clobber: clean + -$(RM) $(CLOBBERFILES) + +$(CMDS): $(TESTDIR) $(PROG) + +$(TESTDIR): + $(INS.dir) + +$(TESTDIR)/%: % + $(INS.file) + +%: %.c pobj_%.o $(COMMON_OBJS) + $(LINK.c) -o $@ $^ $(LDLIBS) + $(POST_PROCESS) + +%: %.o + $(LINK.c) -o $@ $^ $(LDLIBS) + $(POST_PROCESS) diff --git a/usr/src/test/bhyve-tests/tests/inst_emul/payload_rdmsr.s b/usr/src/test/bhyve-tests/tests/inst_emul/payload_rdmsr.s new file mode 100644 index 0000000000..4802bbbd17 --- /dev/null +++ b/usr/src/test/bhyve-tests/tests/inst_emul/payload_rdmsr.s @@ -0,0 +1,26 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2022 Oxide Computer Company + */ + +#include <sys/asm_linkage.h> + +ENTRY(start) + /* + * Pad test value with garbage to make sure it is properly trimmed off + * when the emulation handles the exit. + */ + movq $0xff01020304, %rcx + rdmsr + hlt +SET_SIZE(start) diff --git a/usr/src/test/bhyve-tests/tests/inst_emul/payload_wrmsr.s b/usr/src/test/bhyve-tests/tests/inst_emul/payload_wrmsr.s new file mode 100644 index 0000000000..2e09a31700 --- /dev/null +++ b/usr/src/test/bhyve-tests/tests/inst_emul/payload_wrmsr.s @@ -0,0 +1,28 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2022 Oxide Computer Company + */ + +#include <sys/asm_linkage.h> + +ENTRY(start) + /* + * Pad test values with garbage to make sure it is properly trimmed off + * when the emulation handles the exit. + */ + movq $0xff01020304, %rcx + movq $0xff05060708, %rdx + movq $0xff090a0b0c, %rax + wrmsr + hlt +SET_SIZE(start) diff --git a/usr/src/test/bhyve-tests/tests/inst_emul/rdmsr.c b/usr/src/test/bhyve-tests/tests/inst_emul/rdmsr.c new file mode 100644 index 0000000000..6610b92552 --- /dev/null +++ b/usr/src/test/bhyve-tests/tests/inst_emul/rdmsr.c @@ -0,0 +1,75 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2022 Oxide Computer Company + */ + +#include <stdio.h> +#include <unistd.h> +#include <stdlib.h> +#include <strings.h> +#include <libgen.h> +#include <assert.h> + +#include <sys/types.h> +#include <sys/sysmacros.h> +#include <sys/debug.h> +#include <sys/vmm.h> +#include <sys/vmm_dev.h> +#include <vmmapi.h> + +#include "in_guest.h" + +int +main(int argc, char *argv[]) +{ + const char *test_suite_name = basename(argv[0]); + struct vmctx *ctx = NULL; + int err; + + ctx = test_initialize(test_suite_name); + + err = test_setup_vcpu(ctx, 0, MEM_LOC_PAYLOAD, MEM_LOC_STACK); + if (err != 0) { + test_fail_errno(err, "Could not initialize vcpu0"); + } + + struct vm_entry ventry = { 0 }; + struct vm_exit vexit = { 0 }; + const uint32_t expected_code = 0x01020304; + + do { + const enum vm_exit_kind kind = + test_run_vcpu(ctx, 0, &ventry, &vexit); + switch (kind) { + case VEK_REENTR: + break; + case VEK_UNHANDLED: + /* Look for wrmsr of test value */ + if (vexit.exitcode != VM_EXITCODE_RDMSR) { + test_fail_vmexit(&vexit); + } + if (vexit.u.msr.code != expected_code) { + test_fail_msg("code %08x != %08x\n", + vexit.u.msr.code, expected_code); + } + test_pass(); + break; + + case VEK_TEST_PASS: + case VEK_TEST_FAIL: + default: + test_fail_vmexit(&vexit); + break; + } + } while (true); +} diff --git a/usr/src/test/bhyve-tests/tests/inst_emul/wrmsr.c b/usr/src/test/bhyve-tests/tests/inst_emul/wrmsr.c new file mode 100644 index 0000000000..5eb88f38fc --- /dev/null +++ b/usr/src/test/bhyve-tests/tests/inst_emul/wrmsr.c @@ -0,0 +1,80 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2022 Oxide Computer Company + */ + +#include <stdio.h> +#include <unistd.h> +#include <stdlib.h> +#include <strings.h> +#include <libgen.h> +#include <assert.h> + +#include <sys/types.h> +#include <sys/sysmacros.h> +#include <sys/debug.h> +#include <sys/vmm.h> +#include <sys/vmm_dev.h> +#include <vmmapi.h> + +#include "in_guest.h" + +int +main(int argc, char *argv[]) +{ + const char *test_suite_name = basename(argv[0]); + struct vmctx *ctx = NULL; + int err; + + ctx = test_initialize(test_suite_name); + + err = test_setup_vcpu(ctx, 0, MEM_LOC_PAYLOAD, MEM_LOC_STACK); + if (err != 0) { + test_fail_errno(err, "Could not initialize vcpu0"); + } + + struct vm_entry ventry = { 0 }; + struct vm_exit vexit = { 0 }; + const uint32_t expected_code = 0x01020304; + const uint64_t expected_wval = 0x05060708090a0b0c; + + do { + const enum vm_exit_kind kind = + test_run_vcpu(ctx, 0, &ventry, &vexit); + switch (kind) { + case VEK_REENTR: + break; + case VEK_UNHANDLED: + /* Look for wrmsr of test value */ + if (vexit.exitcode != VM_EXITCODE_WRMSR) { + test_fail_vmexit(&vexit); + } + if (vexit.u.msr.code != expected_code) { + test_fail_msg("code %08x != %08x\n", + vexit.u.msr.code, expected_code); + } + if (vexit.u.msr.wval != expected_wval) { + test_fail_msg("wval %lx != %lx\n", + vexit.u.msr.wval, expected_wval); + } + test_pass(); + break; + + case VEK_TEST_PASS: + case VEK_TEST_FAIL: + default: + test_fail_vmexit(&vexit); + break; + } + } while (true); +} diff --git a/usr/src/test/bhyve-tests/tests/kdev/Makefile b/usr/src/test/bhyve-tests/tests/kdev/Makefile index 52f3c2576c..0ebc5d5459 100644 --- a/usr/src/test/bhyve-tests/tests/kdev/Makefile +++ b/usr/src/test/bhyve-tests/tests/kdev/Makefile @@ -19,6 +19,8 @@ PROG = vpmtmr_freq \ vhpet_freq \ vlapic_freq \ vlapic_freq_periodic \ + vlapic_mmio_access \ + vlapic_msr_access \ vatpit_freq PAYLOADS = $(PROG) diff --git a/usr/src/test/bhyve-tests/tests/kdev/payload_utils.h b/usr/src/test/bhyve-tests/tests/kdev/payload_utils.h index 8bd51023df..bbb168698a 100644 --- a/usr/src/test/bhyve-tests/tests/kdev/payload_utils.h +++ b/usr/src/test/bhyve-tests/tests/kdev/payload_utils.h @@ -17,6 +17,7 @@ #define _PAYLOAD_UTILS_H_ #include <sys/types.h> +#include <stdbool.h> void outb(uint16_t, uint8_t); void outw(uint16_t, uint16_t); @@ -24,5 +25,7 @@ void outl(uint16_t, uint32_t); uint8_t inb(uint16_t); uint16_t inw(uint16_t); uint32_t inl(uint16_t); +uint64_t rdmsr(uint32_t); +void wrmsr(uint32_t, uint64_t); #endif /* _PAYLOAD_UTILS_H_ */ diff --git a/usr/src/test/bhyve-tests/tests/kdev/payload_utils.s b/usr/src/test/bhyve-tests/tests/kdev/payload_utils.s index 8c8e745c17..e1114f5cb1 100644 --- a/usr/src/test/bhyve-tests/tests/kdev/payload_utils.s +++ b/usr/src/test/bhyve-tests/tests/kdev/payload_utils.s @@ -53,3 +53,20 @@ ENTRY(inl) inl (%dx) ret SET_SIZE(inl) + +ENTRY(rdmsr) + movl %edi, %ecx + rdmsr + shlq $32, %rdx + orq %rdx, %rax + ret +SET_SIZE(rdmsr) + +ENTRY(wrmsr) + movq %rsi, %rdx + shrq $32, %rdx + movl %esi, %eax + movl %edi, %ecx + wrmsr + ret +SET_SIZE(wrmsr) diff --git a/usr/src/test/bhyve-tests/tests/kdev/payload_vlapic_mmio_access.c b/usr/src/test/bhyve-tests/tests/kdev/payload_vlapic_mmio_access.c new file mode 100644 index 0000000000..af39f33ad3 --- /dev/null +++ b/usr/src/test/bhyve-tests/tests/kdev/payload_vlapic_mmio_access.c @@ -0,0 +1,85 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2022 Oxide Computer Company + */ + +#include "payload_common.h" +#include "payload_utils.h" +#include "test_defs.h" + +#define MSR_APICBASE 0x1b + +#define APICBASE_X2APIC (1 << 10) + + +static void +write_vlapic(uint_t reg, uint32_t value) +{ + volatile uint32_t *ptr = (uint32_t *)(MMIO_LAPIC_BASE + reg); + *ptr = value; +} + +static uint32_t +read_vlapic(uint_t reg) +{ + volatile uint32_t *ptr = (uint32_t *)(MMIO_LAPIC_BASE + reg); + return (*ptr); +} + +static void +barrier(void) +{ + asm volatile("": : :"memory"); +} + +void +start(void) +{ + uint64_t base = rdmsr(MSR_APICBASE); + if ((base & APICBASE_X2APIC) != 0) { + /* bail if the host has enabled x2apic for us */ + outb(IOP_TEST_RESULT, TEST_RESULT_FAIL); + } + + /* Access the "normal" register offsets */ + for (uint_t reg = 0; reg < 0x1000; reg += 16) { + uint32_t val; + + /* + * This ignores the fact that some register offsets are reserved + * (such as 0x3a0-0x3d0 and 0x3f0-0xff0) while others may be + * read-only or write-only. For the time being, we know that + * the emulation in bhyve will not emit errors or faults for + * such indiscretions committed via MMIO. + */ + val = read_vlapic(reg); + write_vlapic(reg, val); + } + + /* + * Scan through byte-wise, even though such behavior is undefined as far + * as a to-specification LAPIC is concerned. + */ + for (uint_t off = 0; off < 0x1000; off++) { + volatile uint8_t *ptr = (uint8_t *)(MMIO_LAPIC_BASE + off); + + uint8_t val; + + val = *ptr; + barrier(); + *ptr = val; + } + + /* If we made it this far without an exception, it is a win */ + outb(IOP_TEST_RESULT, TEST_RESULT_PASS); +} diff --git a/usr/src/test/bhyve-tests/tests/kdev/payload_vlapic_msr_access.c b/usr/src/test/bhyve-tests/tests/kdev/payload_vlapic_msr_access.c new file mode 100644 index 0000000000..0598f70531 --- /dev/null +++ b/usr/src/test/bhyve-tests/tests/kdev/payload_vlapic_msr_access.c @@ -0,0 +1,132 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2022 Oxide Computer Company + */ + +#include "payload_common.h" +#include "payload_utils.h" +#include "test_defs.h" + +#define MSR_APICBASE 0x1b +#define MSR_X2APIC_BASE 0x800 +#define MSR_X2APIC_MAX 0x8ff + +#define APICBASE_X2APIC (1 << 10) + +static bool +reg_readable(uint32_t reg) +{ + switch (reg) { + case 0x802: /* ID */ + case 0x803: /* VER */ + + case 0x808: /* TPR */ + case 0x809: /* APR */ + case 0x80a: /* PPR */ + + case 0x80c: /* RRR */ + case 0x80d: /* LDR */ + case 0x80e: /* DFR */ + case 0x80f: /* SVR */ + + case 0x810 ... 0x817: /* ISR */ + case 0x818 ... 0x81f: /* TMR */ + case 0x820 ... 0x827: /* IRR */ + + case 0x828: /* ESR */ + + case 0x82f: /* LVT_CMCI */ + case 0x830: /* ICR */ + + case 0x832: /* LVT_TIMER */ + case 0x833: /* LVT_THERMAL */ + case 0x834: /* LVT_PERF */ + case 0x835: /* LVT_LINT0 */ + case 0x836: /* LVT_LINT1 */ + case 0x837: /* LVT_ERROR */ + case 0x838: /* TIMER_ICR */ + case 0x839: /* TIMER_CCR */ + + case 0x83e: /* TIMER_DCR */ + return (true); + default: + return (false); + } +} + +static bool +reg_writable(uint32_t reg) +{ + switch (reg) { + case 0x802: /* ID */ + + case 0x808: /* TPR */ + + case 0x80b: /* EOI */ + + case 0x80d: /* LDR */ + case 0x80e: /* DFR */ + case 0x80f: /* SVR */ + + case 0x828: /* ESR */ + + case 0x82f: /* LVT_CMCI */ + case 0x830: /* ICR */ + + case 0x832: /* LVT_TIMER */ + case 0x833: /* LVT_THERMAL */ + case 0x834: /* LVT_PERF */ + case 0x835: /* LVT_LINT0 */ + case 0x836: /* LVT_LINT1 */ + case 0x837: /* LVT_ERROR */ + case 0x838: /* TIMER_ICR */ + + case 0x83e: /* TIMER_DCR */ + case 0x83f: /* SELF_IPI */ + return (true); + default: + return (false); + } +} + +void +start(void) +{ + uint64_t base = rdmsr(MSR_APICBASE); + if ((base & APICBASE_X2APIC) == 0) { + /* bail if the host has not enabled x2apic for us */ + outb(IOP_TEST_RESULT, TEST_RESULT_FAIL); + } + + for (uint32_t msr = MSR_X2APIC_BASE; msr <= MSR_X2APIC_MAX; msr++) { + uint64_t val = 0; + + if (reg_readable(msr)) { + val = rdmsr(msr); + } + + if (reg_writable(msr)) { + if (msr == 0x828) { + /* + * While the LAPIC is in x2APIC mode, writes to + * the ESR must carry a value of 0. + */ + val = 0; + } + wrmsr(msr, val); + } + } + + /* If we made it this far without a #GP, it counts as a win */ + outb(IOP_TEST_RESULT, TEST_RESULT_PASS); +} diff --git a/usr/src/test/bhyve-tests/tests/kdev/vlapic_mmio_access.c b/usr/src/test/bhyve-tests/tests/kdev/vlapic_mmio_access.c new file mode 100644 index 0000000000..faee622a22 --- /dev/null +++ b/usr/src/test/bhyve-tests/tests/kdev/vlapic_mmio_access.c @@ -0,0 +1,116 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2022 Oxide Computer Company + */ + +#include <stdio.h> +#include <unistd.h> +#include <stdlib.h> +#include <strings.h> +#include <libgen.h> +#include <assert.h> + +#include <sys/types.h> +#include <sys/sysmacros.h> +#include <sys/debug.h> +#include <sys/vmm.h> +#include <sys/vmm_dev.h> +#include <vmmapi.h> + +#include "in_guest.h" +#include "test_defs.h" + +const char *strict_name = "STRICT_APICV"; + +static bool +strict_apicv(void) +{ + const char *strict_val; + + if ((strict_val = getenv(strict_name)) != NULL) { + if (strlen(strict_val) != 0 && + strcmp(strict_val, "0") != 0) { + return (true); + } + } + return (false); +} + +int +main(int argc, char *argv[]) +{ + const char *test_suite_name = basename(argv[0]); + struct vmctx *ctx = NULL; + int err; + + ctx = test_initialize(test_suite_name); + + err = test_setup_vcpu(ctx, 0, MEM_LOC_PAYLOAD, MEM_LOC_STACK); + if (err != 0) { + test_fail_errno(err, "Could not initialize vcpu0"); + } + + /* + * Although x2APIC should be off by default, make doubly sure by + * explicitly setting it so. + */ + err = vm_set_x2apic_state(ctx, 0, X2APIC_DISABLED); + if (err != 0) { + test_fail_errno(err, "Could not disable x2apic on vcpu0"); + } + + struct vm_entry ventry = { 0 }; + struct vm_exit vexit = { 0 }; + + do { + const enum vm_exit_kind kind = + test_run_vcpu(ctx, 0, &ventry, &vexit); + switch (kind) { + case VEK_REENTR: + break; + case VEK_TEST_PASS: + test_pass(); + break; + case VEK_TEST_FAIL: + test_fail_msg("payload signaled failure"); + break; + case VEK_UNHANDLED: + /* + * Not all APICv-accelerated accesses are properly + * handled by the in-kernel emulation today. + * (See: illumos #13847). + * + * To allow this test to be useful on systems without + * APICv, we suppress such failures unless explicitly + * strict handling is requested. + */ + if (vexit.exitcode == VM_EXITCODE_VMX && + (vexit.u.vmx.exit_reason == 44 || + vexit.u.vmx.exit_reason == 56)) { + if (strict_apicv()) { + test_fail_vmexit(&vexit); + } + (void) fprintf(stderr, + "Ignoring APICv access issue\n" + "If strictness is desired, " + "run with %s=1 in env\n", strict_name); + test_pass(); + } + test_fail_vmexit(&vexit); + break; + default: + test_fail_vmexit(&vexit); + break; + } + } while (true); +} diff --git a/usr/src/test/bhyve-tests/tests/kdev/vlapic_msr_access.c b/usr/src/test/bhyve-tests/tests/kdev/vlapic_msr_access.c new file mode 100644 index 0000000000..33b0e5f081 --- /dev/null +++ b/usr/src/test/bhyve-tests/tests/kdev/vlapic_msr_access.c @@ -0,0 +1,73 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2022 Oxide Computer Company + */ + +#include <stdio.h> +#include <unistd.h> +#include <stdlib.h> +#include <strings.h> +#include <libgen.h> +#include <assert.h> + +#include <sys/types.h> +#include <sys/sysmacros.h> +#include <sys/debug.h> +#include <sys/vmm.h> +#include <sys/vmm_dev.h> +#include <vmmapi.h> + +#include "in_guest.h" +#include "test_defs.h" + +int +main(int argc, char *argv[]) +{ + const char *test_suite_name = basename(argv[0]); + struct vmctx *ctx = NULL; + int err; + + ctx = test_initialize(test_suite_name); + + err = test_setup_vcpu(ctx, 0, MEM_LOC_PAYLOAD, MEM_LOC_STACK); + if (err != 0) { + test_fail_errno(err, "Could not initialize vcpu0"); + } + + err = vm_set_x2apic_state(ctx, 0, X2APIC_ENABLED); + if (err != 0) { + test_fail_errno(err, "Could not enable x2apic on vcpu0"); + } + + struct vm_entry ventry = { 0 }; + struct vm_exit vexit = { 0 }; + + do { + const enum vm_exit_kind kind = + test_run_vcpu(ctx, 0, &ventry, &vexit); + switch (kind) { + case VEK_REENTR: + break; + case VEK_TEST_PASS: + test_pass(); + break; + case VEK_TEST_FAIL: + test_fail_msg("payload signaled failure"); + break; + case VEK_UNHANDLED: + default: + test_fail_vmexit(&vexit); + break; + } + } while (true); +} diff --git a/usr/src/uts/i86pc/io/vmm/amd/svm.c b/usr/src/uts/i86pc/io/vmm/amd/svm.c index 8ffc1c6557..11c1e9c249 100644 --- a/usr/src/uts/i86pc/io/vmm/amd/svm.c +++ b/usr/src/uts/i86pc/io/vmm/amd/svm.c @@ -63,6 +63,7 @@ __FBSDID("$FreeBSD$"); #include <machine/vmm_dev.h> #include <sys/vmm_instruction_emul.h> #include <sys/vmm_vm.h> +#include <sys/vmm_kernel.h> #include "vmm_lapic.h" #include "vmm_stat.h" @@ -1115,109 +1116,118 @@ svm_inject_irq(struct svm_softc *sc, int vcpu, int vector) #define EFER_MBZ_BITS 0xFFFFFFFFFFFF0200UL -static int +static vm_msr_result_t svm_write_efer(struct svm_softc *sc, int vcpu, uint64_t newval) { - struct vm_exit *vme; - struct vmcb_state *state; - uint64_t changed, lma, oldval; + struct vmcb_state *state = svm_get_vmcb_state(sc, vcpu); + uint64_t lma; int error; - state = svm_get_vmcb_state(sc, vcpu); - - oldval = state->efer; - VCPU_CTR2(sc->vm, vcpu, "wrmsr(efer) %lx/%lx", oldval, newval); - newval &= ~0xFE; /* clear the Read-As-Zero (RAZ) bits */ - changed = oldval ^ newval; - if (newval & EFER_MBZ_BITS) - goto gpf; + if (newval & EFER_MBZ_BITS) { + return (VMR_GP); + } /* APMv2 Table 14-5 "Long-Mode Consistency Checks" */ + const uint64_t changed = state->efer ^ newval; if (changed & EFER_LME) { - if (state->cr0 & CR0_PG) - goto gpf; + if (state->cr0 & CR0_PG) { + return (VMR_GP); + } } /* EFER.LMA = EFER.LME & CR0.PG */ - if ((newval & EFER_LME) != 0 && (state->cr0 & CR0_PG) != 0) + if ((newval & EFER_LME) != 0 && (state->cr0 & CR0_PG) != 0) { lma = EFER_LMA; - else + } else { lma = 0; + } + if ((newval & EFER_LMA) != lma) { + return (VMR_GP); + } - if ((newval & EFER_LMA) != lma) - goto gpf; - - if (newval & EFER_NXE) { - if (!vm_cpuid_capability(sc->vm, vcpu, VCC_NO_EXECUTE)) - goto gpf; + if ((newval & EFER_NXE) != 0 && + !vm_cpuid_capability(sc->vm, vcpu, VCC_NO_EXECUTE)) { + return (VMR_GP); + } + if ((newval & EFER_FFXSR) != 0 && + !vm_cpuid_capability(sc->vm, vcpu, VCC_FFXSR)) { + return (VMR_GP); + } + if ((newval & EFER_TCE) != 0 && + !vm_cpuid_capability(sc->vm, vcpu, VCC_TCE)) { + return (VMR_GP); } /* - * XXX bhyve does not enforce segment limits in 64-bit mode. Until - * this is fixed flag guest attempt to set EFER_LMSLE as an error. + * Until bhyve has proper support for long-mode segment limits, just + * toss a #GP at the guest if they attempt to use it. */ if (newval & EFER_LMSLE) { - vme = vm_exitinfo(sc->vm, vcpu); - vm_exit_svm(vme, VMCB_EXIT_MSR, 1, 0); - return (-1); - } - - if (newval & EFER_FFXSR) { - if (!vm_cpuid_capability(sc->vm, vcpu, VCC_FFXSR)) - goto gpf; - } - - if (newval & EFER_TCE) { - if (!vm_cpuid_capability(sc->vm, vcpu, VCC_TCE)) - goto gpf; + return (VMR_GP); } error = svm_setreg(sc, vcpu, VM_REG_GUEST_EFER, newval); - KASSERT(error == 0, ("%s: error %d updating efer", __func__, error)); - return (0); -gpf: - vm_inject_gp(sc->vm, vcpu); - return (0); + VERIFY0(error); + return (VMR_OK); } static int -emulate_wrmsr(struct svm_softc *sc, int vcpu, uint_t num, uint64_t val) +svm_handle_msr(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit, + bool is_wrmsr) { - int error; + struct vmcb_state *state = svm_get_vmcb_state(svm_sc, vcpu); + struct svm_regctx *ctx = svm_get_guest_regctx(svm_sc, vcpu); + const uint32_t ecx = ctx->sctx_rcx; + vm_msr_result_t res; + uint64_t val = 0; - if (lapic_msr(num)) - error = lapic_wrmsr(sc->vm, vcpu, num, val); - else if (num == MSR_EFER) - error = svm_write_efer(sc, vcpu, val); - else - error = svm_wrmsr(sc, vcpu, num, val); + if (is_wrmsr) { + vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_WRMSR, 1); + val = ctx->sctx_rdx << 32 | (uint32_t)state->rax; - return (error); -} + if (vlapic_owned_msr(ecx)) { + struct vlapic *vlapic = vm_lapic(svm_sc->vm, vcpu); -static int -emulate_rdmsr(struct svm_softc *sc, int vcpu, uint_t num) -{ - struct vmcb_state *state; - struct svm_regctx *ctx; - uint64_t result; - int error; + res = vlapic_wrmsr(vlapic, ecx, val); + } else if (ecx == MSR_EFER) { + res = svm_write_efer(svm_sc, vcpu, val); + } else { + res = svm_wrmsr(svm_sc, vcpu, ecx, val); + } + } else { + vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_RDMSR, 1); - if (lapic_msr(num)) - error = lapic_rdmsr(sc->vm, vcpu, num, &result); - else - error = svm_rdmsr(sc, vcpu, num, &result); + if (vlapic_owned_msr(ecx)) { + struct vlapic *vlapic = vm_lapic(svm_sc->vm, vcpu); - if (error == 0) { - state = svm_get_vmcb_state(sc, vcpu); - ctx = svm_get_guest_regctx(sc, vcpu); - state->rax = result & 0xffffffff; - ctx->sctx_rdx = result >> 32; + res = vlapic_rdmsr(vlapic, ecx, &val); + } else { + res = svm_rdmsr(svm_sc, vcpu, ecx, &val); + } } - return (error); + switch (res) { + case VMR_OK: + /* Store rdmsr result in the appropriate registers */ + if (!is_wrmsr) { + state->rax = (uint32_t)val; + ctx->sctx_rdx = val >> 32; + } + return (1); + case VMR_GP: + vm_inject_gp(svm_sc->vm, vcpu); + return (1); + case VMR_UNHANLDED: + vmexit->exitcode = is_wrmsr ? + VM_EXITCODE_WRMSR : VM_EXITCODE_RDMSR; + vmexit->u.msr.code = ecx; + vmexit->u.msr.wval = val; + return (0); + default: + panic("unexpected msr result %u\n", res); + } } /* @@ -1253,8 +1263,7 @@ svm_vmexit(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit) struct vmcb_state *state; struct vmcb_ctrl *ctrl; struct svm_regctx *ctx; - uint64_t code, info1, info2, val; - uint32_t eax, ecx, edx; + uint64_t code, info1, info2; int error, errcode_valid = 0, handled, idtvec, reflect; ctx = svm_get_guest_regctx(svm_sc, vcpu); @@ -1426,41 +1435,8 @@ svm_vmexit(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit) } handled = 1; break; - case VMCB_EXIT_MSR: /* MSR access. */ - eax = state->rax; - ecx = ctx->sctx_rcx; - edx = ctx->sctx_rdx; - - if (info1) { - vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_WRMSR, 1); - val = (uint64_t)edx << 32 | eax; - VCPU_CTR2(svm_sc->vm, vcpu, "wrmsr %x val %lx", - ecx, val); - error = emulate_wrmsr(svm_sc, vcpu, ecx, val); - if (error == 0) { - handled = 1; - } else if (error > 0) { - vmexit->exitcode = VM_EXITCODE_WRMSR; - vmexit->u.msr.code = ecx; - vmexit->u.msr.wval = val; - } else { - KASSERT(vmexit->exitcode != VM_EXITCODE_BOGUS, - ("emulate_wrmsr retu with bogus exitcode")); - } - } else { - VCPU_CTR1(svm_sc->vm, vcpu, "rdmsr %x", ecx); - vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_RDMSR, 1); - error = emulate_rdmsr(svm_sc, vcpu, ecx); - if (error == 0) { - handled = 1; - } else if (error > 0) { - vmexit->exitcode = VM_EXITCODE_RDMSR; - vmexit->u.msr.code = ecx; - } else { - KASSERT(vmexit->exitcode != VM_EXITCODE_BOGUS, - ("emulate_rdmsr retu with bogus exitcode")); - } - } + case VMCB_EXIT_MSR: + handled = svm_handle_msr(svm_sc, vcpu, vmexit, info1 != 0); break; case VMCB_EXIT_IO: handled = svm_handle_inout(svm_sc, vcpu, vmexit); diff --git a/usr/src/uts/i86pc/io/vmm/amd/svm_msr.c b/usr/src/uts/i86pc/io/vmm/amd/svm_msr.c index ab1accbd7a..4fa7826fbf 100644 --- a/usr/src/uts/i86pc/io/vmm/amd/svm_msr.c +++ b/usr/src/uts/i86pc/io/vmm/amd/svm_msr.c @@ -50,6 +50,7 @@ __FBSDID("$FreeBSD$"); #include <machine/cpufunc.h> #include <machine/specialreg.h> #include <machine/vmm.h> +#include <sys/vmm_kernel.h> #include "svm.h" #include "vmcb.h" @@ -121,11 +122,9 @@ svm_msr_guest_exit(struct svm_softc *sc, int vcpu) /* MSR_KGSBASE will be restored on the way back to userspace */ } -int -svm_rdmsr(struct svm_softc *sc, int vcpu, uint_t num, uint64_t *result) +vm_msr_result_t +svm_rdmsr(struct svm_softc *sc, int vcpu, uint32_t num, uint64_t *result) { - int error = 0; - switch (num) { case MSR_SYSCFG: case MSR_AMDK8_IPM: @@ -145,18 +144,14 @@ svm_rdmsr(struct svm_softc *sc, int vcpu, uint_t num, uint64_t *result) } break; default: - error = EINVAL; - break; + return (VMR_UNHANLDED); } - - return (error); + return (VMR_OK); } -int -svm_wrmsr(struct svm_softc *sc, int vcpu, uint_t num, uint64_t val) +vm_msr_result_t +svm_wrmsr(struct svm_softc *sc, int vcpu, uint32_t num, uint64_t val) { - int error = 0; - switch (num) { case MSR_SYSCFG: /* Ignore writes */ @@ -177,9 +172,8 @@ svm_wrmsr(struct svm_softc *sc, int vcpu, uint_t num, uint64_t val) case MSR_EXTFEATURES: break; default: - error = EINVAL; - break; + return (VMR_UNHANLDED); } - return (error); + return (VMR_OK); } diff --git a/usr/src/uts/i86pc/io/vmm/amd/svm_msr.h b/usr/src/uts/i86pc/io/vmm/amd/svm_msr.h index 03ef2acfe7..8f0d14e6b9 100644 --- a/usr/src/uts/i86pc/io/vmm/amd/svm_msr.h +++ b/usr/src/uts/i86pc/io/vmm/amd/svm_msr.h @@ -38,7 +38,7 @@ void svm_msr_guest_init(struct svm_softc *sc, int vcpu); void svm_msr_guest_enter(struct svm_softc *sc, int vcpu); void svm_msr_guest_exit(struct svm_softc *sc, int vcpu); -int svm_wrmsr(struct svm_softc *sc, int vcpu, uint_t num, uint64_t val); -int svm_rdmsr(struct svm_softc *sc, int vcpu, uint_t num, uint64_t *result); +vm_msr_result_t svm_wrmsr(struct svm_softc *, int, uint32_t, uint64_t); +vm_msr_result_t svm_rdmsr(struct svm_softc *, int, uint32_t, uint64_t *); #endif /* _SVM_MSR_H_ */ diff --git a/usr/src/uts/i86pc/io/vmm/intel/vmx.c b/usr/src/uts/i86pc/io/vmm/intel/vmx.c index d07a8142e3..a44c90dcbe 100644 --- a/usr/src/uts/i86pc/io/vmm/intel/vmx.c +++ b/usr/src/uts/i86pc/io/vmm/intel/vmx.c @@ -69,6 +69,7 @@ __FBSDID("$FreeBSD$"); #include <machine/specialreg.h> #include <machine/vmparam.h> #include <sys/vmm_vm.h> +#include <sys/vmm_kernel.h> #include <machine/vmm.h> #include <machine/vmm_dev.h> @@ -1901,11 +1902,7 @@ static int vmx_handle_apic_write(struct vmx *vmx, int vcpuid, struct vlapic *vlapic, uint64_t qual) { - int handled, offset; - uint32_t *apic_regs, vector; - - handled = HANDLED; - offset = APIC_WRITE_OFFSET(qual); + const uint_t offset = APIC_WRITE_OFFSET(qual); if (!apic_access_virtualization(vmx, vcpuid)) { /* @@ -1917,8 +1914,11 @@ vmx_handle_apic_write(struct vmx *vmx, int vcpuid, struct vlapic *vlapic, */ if (x2apic_virtualization(vmx, vcpuid) && offset == APIC_OFFSET_SELF_IPI) { - apic_regs = (uint32_t *)(vlapic->apic_page); - vector = apic_regs[APIC_OFFSET_SELF_IPI / 4]; + const uint32_t *apic_regs = + (uint32_t *)(vlapic->apic_page); + const uint32_t vector = + apic_regs[APIC_OFFSET_SELF_IPI / 4]; + vlapic_self_ipi_handler(vlapic, vector); return (HANDLED); } else @@ -1942,9 +1942,7 @@ vmx_handle_apic_write(struct vmx *vmx, int vcpuid, struct vlapic *vlapic, vlapic_esr_write_handler(vlapic); break; case APIC_OFFSET_ICR_LOW: - if (vlapic_icrlo_write_handler(vlapic) != 0) { - handled = UNHANDLED; - } + vlapic_icrlo_write_handler(vlapic); break; case APIC_OFFSET_CMCI_LVT: case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT: @@ -1957,10 +1955,9 @@ vmx_handle_apic_write(struct vmx *vmx, int vcpuid, struct vlapic *vlapic, vlapic_dcr_write_handler(vlapic); break; default: - handled = UNHANDLED; - break; + return (UNHANDLED); } - return (handled); + return (HANDLED); } static bool @@ -2063,35 +2060,57 @@ vmx_task_switch_reason(uint64_t qual) } static int -emulate_wrmsr(struct vmx *vmx, int vcpuid, uint_t num, uint64_t val) +vmx_handle_msr(struct vmx *vmx, int vcpuid, struct vm_exit *vmexit, + bool is_wrmsr) { - int error; + struct vmxctx *vmxctx = &vmx->ctx[vcpuid]; + const uint32_t ecx = vmxctx->guest_rcx; + vm_msr_result_t res; + uint64_t val = 0; - if (lapic_msr(num)) - error = lapic_wrmsr(vmx->vm, vcpuid, num, val); - else - error = vmx_wrmsr(vmx, vcpuid, num, val); + if (is_wrmsr) { + vmm_stat_incr(vmx->vm, vcpuid, VMEXIT_WRMSR, 1); + val = vmxctx->guest_rdx << 32 | (uint32_t)vmxctx->guest_rax; - return (error); -} + if (vlapic_owned_msr(ecx)) { + struct vlapic *vlapic = vm_lapic(vmx->vm, vcpuid); -static int -emulate_rdmsr(struct vmx *vmx, int vcpuid, uint_t num) -{ - uint64_t result; - int error; + res = vlapic_wrmsr(vlapic, ecx, val); + } else { + res = vmx_wrmsr(vmx, vcpuid, ecx, val); + } + } else { + vmm_stat_incr(vmx->vm, vcpuid, VMEXIT_RDMSR, 1); - if (lapic_msr(num)) - error = lapic_rdmsr(vmx->vm, vcpuid, num, &result); - else - error = vmx_rdmsr(vmx, vcpuid, num, &result); + if (vlapic_owned_msr(ecx)) { + struct vlapic *vlapic = vm_lapic(vmx->vm, vcpuid); - if (error == 0) { - vmx->ctx[vcpuid].guest_rax = (uint32_t)result; - vmx->ctx[vcpuid].guest_rdx = result >> 32; + res = vlapic_rdmsr(vlapic, ecx, &val); + } else { + res = vmx_rdmsr(vmx, vcpuid, ecx, &val); + } } - return (error); + switch (res) { + case VMR_OK: + /* Store rdmsr result in the appropriate registers */ + if (!is_wrmsr) { + vmxctx->guest_rax = (uint32_t)val; + vmxctx->guest_rdx = val >> 32; + } + return (HANDLED); + case VMR_GP: + vm_inject_gp(vmx->vm, vcpuid); + return (HANDLED); + case VMR_UNHANLDED: + vmexit->exitcode = is_wrmsr ? + VM_EXITCODE_WRMSR : VM_EXITCODE_RDMSR; + vmexit->u.msr.code = ecx; + vmexit->u.msr.wval = val; + return (UNHANDLED); + default: + panic("unexpected msr result %u\n", res); + } } static int @@ -2102,7 +2121,7 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit) struct vie *vie; struct vlapic *vlapic; struct vm_task_switch *ts; - uint32_t eax, ecx, edx, idtvec_info, idtvec_err, intr_info; + uint32_t idtvec_info, idtvec_err, intr_info; uint32_t intr_type, intr_vec, reason; uint64_t exitintinfo, qual, gpa; @@ -2242,44 +2261,9 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit) } break; case EXIT_REASON_RDMSR: - vmm_stat_incr(vmx->vm, vcpu, VMEXIT_RDMSR, 1); - ecx = vmxctx->guest_rcx; - VCPU_CTR1(vmx->vm, vcpu, "rdmsr 0x%08x", ecx); - SDT_PROBE4(vmm, vmx, exit, rdmsr, vmx, vcpu, vmexit, ecx); - error = emulate_rdmsr(vmx, vcpu, ecx); - if (error == 0) { - handled = HANDLED; - } else if (error > 0) { - vmexit->exitcode = VM_EXITCODE_RDMSR; - vmexit->u.msr.code = ecx; - } else { - /* Return to userspace with a valid exitcode */ - KASSERT(vmexit->exitcode != VM_EXITCODE_BOGUS, - ("emulate_rdmsr retu with bogus exitcode")); - } - break; case EXIT_REASON_WRMSR: - vmm_stat_incr(vmx->vm, vcpu, VMEXIT_WRMSR, 1); - eax = vmxctx->guest_rax; - ecx = vmxctx->guest_rcx; - edx = vmxctx->guest_rdx; - VCPU_CTR2(vmx->vm, vcpu, "wrmsr 0x%08x value 0x%016lx", - ecx, (uint64_t)edx << 32 | eax); - SDT_PROBE5(vmm, vmx, exit, wrmsr, vmx, vmexit, vcpu, ecx, - (uint64_t)edx << 32 | eax); - error = emulate_wrmsr(vmx, vcpu, ecx, - (uint64_t)edx << 32 | eax); - if (error == 0) { - handled = HANDLED; - } else if (error > 0) { - vmexit->exitcode = VM_EXITCODE_WRMSR; - vmexit->u.msr.code = ecx; - vmexit->u.msr.wval = (uint64_t)edx << 32 | eax; - } else { - /* Return to userspace with a valid exitcode */ - KASSERT(vmexit->exitcode != VM_EXITCODE_BOGUS, - ("emulate_wrmsr retu with bogus exitcode")); - } + handled = vmx_handle_msr(vmx, vcpu, vmexit, + reason == EXIT_REASON_WRMSR); break; case EXIT_REASON_HLT: vmm_stat_incr(vmx->vm, vcpu, VMEXIT_HLT, 1); diff --git a/usr/src/uts/i86pc/io/vmm/intel/vmx_msr.c b/usr/src/uts/i86pc/io/vmm/intel/vmx_msr.c index 844e8b9708..f9c292f659 100644 --- a/usr/src/uts/i86pc/io/vmm/intel/vmx_msr.c +++ b/usr/src/uts/i86pc/io/vmm/intel/vmx_msr.c @@ -44,6 +44,7 @@ __FBSDID("$FreeBSD$"); #include <machine/md_var.h> #include <machine/specialreg.h> #include <machine/vmm.h> +#include <sys/vmm_kernel.h> #include "vmx.h" #include "vmx_msr.h" @@ -421,14 +422,10 @@ vmx_msr_guest_exit(struct vmx *vmx, int vcpuid) /* MSR_KGSBASE will be restored on the way back to userspace */ } -int -vmx_rdmsr(struct vmx *vmx, int vcpuid, uint_t num, uint64_t *val) +vm_msr_result_t +vmx_rdmsr(struct vmx *vmx, int vcpuid, uint32_t num, uint64_t *val) { - const uint64_t *guest_msrs; - int error; - - guest_msrs = vmx->guest_msrs[vcpuid]; - error = 0; + const uint64_t *guest_msrs = vmx->guest_msrs[vcpuid]; switch (num) { case MSR_IA32_FEATURE_CONTROL: @@ -453,21 +450,16 @@ vmx_rdmsr(struct vmx *vmx, int vcpuid, uint_t num, uint64_t *val) *val = guest_msrs[IDX_MSR_PAT]; break; default: - error = EINVAL; - break; + return (VMR_UNHANLDED); } - return (error); + return (VMR_OK); } -int -vmx_wrmsr(struct vmx *vmx, int vcpuid, uint_t num, uint64_t val) +vm_msr_result_t +vmx_wrmsr(struct vmx *vmx, int vcpuid, uint32_t num, uint64_t val) { - uint64_t *guest_msrs; + uint64_t *guest_msrs = vmx->guest_msrs[vcpuid]; uint64_t changed; - int error; - - guest_msrs = vmx->guest_msrs[vcpuid]; - error = 0; switch (num) { case MSR_IA32_MISC_ENABLE: @@ -486,20 +478,19 @@ vmx_wrmsr(struct vmx *vmx, int vcpuid, uint_t num, uint64_t val) /* * Punt to userspace if any other bits are being modified. */ - if (changed) - error = EINVAL; - + if (changed) { + return (VMR_UNHANLDED); + } break; case MSR_PAT: - if (pat_valid(val)) - guest_msrs[IDX_MSR_PAT] = val; - else - vm_inject_gp(vmx->vm, vcpuid); + if (!pat_valid(val)) { + return (VMR_GP); + } + guest_msrs[IDX_MSR_PAT] = val; break; default: - error = EINVAL; - break; + return (VMR_UNHANLDED); } - return (error); + return (VMR_OK); } diff --git a/usr/src/uts/i86pc/io/vmm/intel/vmx_msr.h b/usr/src/uts/i86pc/io/vmm/intel/vmx_msr.h index 5df9fc631d..551f2d659a 100644 --- a/usr/src/uts/i86pc/io/vmm/intel/vmx_msr.h +++ b/usr/src/uts/i86pc/io/vmm/intel/vmx_msr.h @@ -34,14 +34,12 @@ #ifndef _VMX_MSR_H_ #define _VMX_MSR_H_ -struct vmx; - void vmx_msr_init(void); void vmx_msr_guest_init(struct vmx *vmx, int vcpuid); void vmx_msr_guest_enter(struct vmx *vmx, int vcpuid); void vmx_msr_guest_exit(struct vmx *vmx, int vcpuid); -int vmx_rdmsr(struct vmx *, int vcpuid, uint_t num, uint64_t *val); -int vmx_wrmsr(struct vmx *, int vcpuid, uint_t num, uint64_t val); +vm_msr_result_t vmx_rdmsr(struct vmx *, int, uint32_t, uint64_t *); +vm_msr_result_t vmx_wrmsr(struct vmx *, int, uint32_t, uint64_t); int vmx_set_ctlreg(int ctl_reg, int true_ctl_reg, uint32_t ones_mask, uint32_t zeros_mask, uint32_t *retval); diff --git a/usr/src/uts/i86pc/io/vmm/io/vlapic.c b/usr/src/uts/i86pc/io/vmm/io/vlapic.c index e88438da0d..12a20257a9 100644 --- a/usr/src/uts/i86pc/io/vmm/io/vlapic.c +++ b/usr/src/uts/i86pc/io/vmm/io/vlapic.c @@ -60,6 +60,7 @@ __FBSDID("$FreeBSD$"); #include <machine/clock.h> #include <machine/vmm.h> +#include <sys/vmm_kernel.h> #include "vmm_lapic.h" #include "vmm_ktr.h" @@ -79,8 +80,6 @@ __FBSDID("$FreeBSD$"); #define VLAPIC_VERSION (16) -#define x2apic(vlapic) (((vlapic)->msr_apicbase & APICBASE_X2APIC) ? 1 : 0) - /* * The 'vlapic->timer_mtx' is used to provide mutual exclusion between the * vlapic_callout_handler() and vcpu accesses to: @@ -98,6 +97,8 @@ __FBSDID("$FreeBSD$"); */ #define VLAPIC_BUS_FREQ (128 * 1024 * 1024) +#define APICBASE_ADDR_MASK 0xfffffffffffff000UL + static void vlapic_set_error(struct vlapic *, uint32_t, bool); static void vlapic_callout_handler(void *arg); @@ -107,11 +108,38 @@ static void vlapic_isrstk_eoi(struct vlapic *, int); static void vlapic_isrstk_verify(const struct vlapic *); #endif /* __ISRVEC_DEBUG */ + +static __inline bool +vlapic_x2mode(const struct vlapic *vlapic) +{ + return ((vlapic->msr_apicbase & APICBASE_X2APIC) != 0); +} + +static __inline bool +vlapic_hw_disabled(const struct vlapic *vlapic) +{ + return ((vlapic->msr_apicbase & APICBASE_ENABLED) == 0); +} + +static __inline bool +vlapic_sw_disabled(const struct vlapic *vlapic) +{ + const struct LAPIC *lapic = vlapic->apic_page; + + return ((lapic->svr & APIC_SVR_ENABLE) == 0); +} + +static __inline bool +vlapic_enabled(const struct vlapic *vlapic) +{ + return (!vlapic_hw_disabled(vlapic) && !vlapic_sw_disabled(vlapic)); +} + static __inline uint32_t vlapic_get_id(struct vlapic *vlapic) { - if (x2apic(vlapic)) + if (vlapic_x2mode(vlapic)) return (vlapic->vcpuid); else return (vlapic->vcpuid << 24); @@ -135,7 +163,7 @@ vlapic_dfr_write_handler(struct vlapic *vlapic) struct LAPIC *lapic; lapic = vlapic->apic_page; - if (x2apic(vlapic)) { + if (vlapic_x2mode(vlapic)) { VM_CTR1(vlapic->vm, "ignoring write to DFR in x2apic mode: %#x", lapic->dfr); lapic->dfr = 0; @@ -154,7 +182,7 @@ vlapic_ldr_write_handler(struct vlapic *vlapic) lapic = vlapic->apic_page; /* LDR is read-only in x2apic mode */ - if (x2apic(vlapic)) { + if (vlapic_x2mode(vlapic)) { VLAPIC_CTR1(vlapic, "ignoring write to LDR in x2apic mode: %#x", lapic->ldr); lapic->ldr = x2apic_ldr(vlapic); @@ -687,8 +715,7 @@ static VMM_STAT_ARRAY(LVTS_TRIGGERRED, VLAPIC_MAXLVT_INDEX + 1, int vlapic_trigger_lvt(struct vlapic *vlapic, int vector) { - - if (vlapic_enabled(vlapic) == false) { + if (!vlapic_enabled(vlapic)) { /* * When the local APIC is global/hardware disabled, * LINT[1:0] pins are configured as INTR and NMI pins, @@ -866,7 +893,7 @@ vlapic_calcdest(struct vm *vm, cpuset_t *dmask, uint32_t dest, bool phys, mda_ldest = mda_flat_ldest; } else if ((dfr & APIC_DFR_MODEL_MASK) == APIC_DFR_MODEL_CLUSTER) { - if (x2apic(vlapic)) { + if (vlapic_x2mode(vlapic)) { cluster = ldr >> 16; ldest = ldr & 0xffff; } else { @@ -911,14 +938,6 @@ vlapic_set_tpr(struct vlapic *vlapic, uint8_t val) } } -static uint8_t -vlapic_get_tpr(struct vlapic *vlapic) -{ - struct LAPIC *lapic = vlapic->apic_page; - - return (lapic->tpr); -} - void vlapic_set_cr8(struct vlapic *vlapic, uint64_t val) { @@ -936,13 +955,12 @@ vlapic_set_cr8(struct vlapic *vlapic, uint64_t val) uint64_t vlapic_get_cr8(struct vlapic *vlapic) { - uint8_t tpr; + const struct LAPIC *lapic = vlapic->apic_page; - tpr = vlapic_get_tpr(vlapic); - return (tpr >> 4); + return (lapic->tpr >> 4); } -int +void vlapic_icrlo_write_handler(struct vlapic *vlapic) { int i; @@ -955,7 +973,7 @@ vlapic_icrlo_write_handler(struct vlapic *vlapic) lapic->icr_lo &= ~APIC_DELSTAT_PEND; icrval = ((uint64_t)lapic->icr_hi << 32) | lapic->icr_lo; - if (x2apic(vlapic)) + if (vlapic_x2mode(vlapic)) dest = icrval >> 32; else dest = icrval >> (32 + 24); @@ -965,12 +983,12 @@ vlapic_icrlo_write_handler(struct vlapic *vlapic) if (mode == APIC_DELMODE_FIXED && vec < 16) { vlapic_set_error(vlapic, APIC_ESR_SEND_ILLEGAL_VECTOR, false); - return (0); + return; } if (mode == APIC_DELMODE_INIT && (icrval & APIC_LEVEL_MASK) == APIC_LEVEL_DEASSERT) { /* No work required to deassert INIT */ - return (0); + return; } if ((mode == APIC_DELMODE_STARTUP || mode == APIC_DELMODE_INIT) && !(dsh == APIC_DEST_DESTFLD || dsh == APIC_DEST_ALLESELF)) { @@ -978,15 +996,18 @@ vlapic_icrlo_write_handler(struct vlapic *vlapic) * While Intel makes no mention of restrictions for destination * shorthand when sending INIT or SIPI, AMD requires either a * specific destination or all-excluding self. Common use seems - * to be restricted to those two cases. + * to be restricted to those two cases. Until handling is in + * place to halt a guest which makes such a frivolous request, + * we will ignore them. */ - return (-1); + return; } switch (dsh) { case APIC_DEST_DESTFLD: vlapic_calcdest(vlapic->vm, &dmask, dest, - (icrval & APIC_DESTMODE_LOG) == 0, false, x2apic(vlapic)); + (icrval & APIC_DESTMODE_LOG) == 0, false, + vlapic_x2mode(vlapic)); break; case APIC_DEST_SELF: CPU_SETOF(vlapic->vcpuid, &dmask); @@ -1033,17 +1054,16 @@ vlapic_icrlo_write_handler(struct vlapic *vlapic) break; } } - return (0); } void -vlapic_self_ipi_handler(struct vlapic *vlapic, uint64_t val) +vlapic_self_ipi_handler(struct vlapic *vlapic, uint32_t val) { - int vec; + const int vec = val & 0xff; - KASSERT(x2apic(vlapic), ("SELF_IPI does not exist in xAPIC mode")); + /* self-IPI is only exposed via x2APIC */ + ASSERT(vlapic_x2mode(vlapic)); - vec = val & 0xff; lapic_intr_edge(vlapic->vm, vlapic->vcpuid, vec); vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_IPI_SEND, 1); vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_IPI_RECV, 1); @@ -1155,231 +1175,185 @@ vlapic_svr_write_handler(struct vlapic *vlapic) } } -int -vlapic_read(struct vlapic *vlapic, int mmio_access, uint64_t offset, - uint64_t *data) +static bool +vlapic_read(struct vlapic *vlapic, uint16_t offset, uint32_t *outp) { - struct LAPIC *lapic = vlapic->apic_page; - uint32_t *reg; - int i; - - /* Ignore MMIO accesses in x2APIC mode */ - if (x2apic(vlapic) && mmio_access) { - VLAPIC_CTR1(vlapic, "MMIO read from offset %#lx in x2APIC mode", - offset); - *data = 0; - goto done; - } - - if (!x2apic(vlapic) && !mmio_access) { - /* - * XXX Generate GP fault for MSR accesses in xAPIC mode - */ - VLAPIC_CTR1(vlapic, "x2APIC MSR read from offset %#lx in " - "xAPIC mode", offset); - *data = 0; - goto done; - } + struct LAPIC *lapic = vlapic->apic_page; + uint32_t *reg; + int i; - if (offset > sizeof (*lapic)) { - *data = 0; - goto done; - } + ASSERT3U(offset & 0x3, ==, 0); + ASSERT3U(offset, <, PAGESIZE); + ASSERT3P(outp, !=, NULL); - offset &= ~3; + uint32_t data = 0; switch (offset) { - case APIC_OFFSET_ID: - *data = lapic->id; - break; - case APIC_OFFSET_VER: - *data = lapic->version; - break; - case APIC_OFFSET_TPR: - *data = vlapic_get_tpr(vlapic); - break; - case APIC_OFFSET_APR: - *data = lapic->apr; - break; - case APIC_OFFSET_PPR: - *data = lapic->ppr; - break; - case APIC_OFFSET_EOI: - *data = lapic->eoi; - break; - case APIC_OFFSET_LDR: - *data = lapic->ldr; - break; - case APIC_OFFSET_DFR: - *data = lapic->dfr; - break; - case APIC_OFFSET_SVR: - *data = lapic->svr; - break; - case APIC_OFFSET_ISR0 ... APIC_OFFSET_ISR7: - i = (offset - APIC_OFFSET_ISR0) >> 2; - reg = &lapic->isr0; - *data = *(reg + i); - break; - case APIC_OFFSET_TMR0 ... APIC_OFFSET_TMR7: - i = (offset - APIC_OFFSET_TMR0) >> 2; - reg = &lapic->tmr0; - *data = *(reg + i); - break; - case APIC_OFFSET_IRR0 ... APIC_OFFSET_IRR7: - i = (offset - APIC_OFFSET_IRR0) >> 2; - reg = &lapic->irr0; - *data = atomic_load_acq_int(reg + i); - break; - case APIC_OFFSET_ESR: - *data = lapic->esr; - break; - case APIC_OFFSET_ICR_LOW: - *data = lapic->icr_lo; - if (x2apic(vlapic)) - *data |= (uint64_t)lapic->icr_hi << 32; - break; - case APIC_OFFSET_ICR_HI: - *data = lapic->icr_hi; - break; - case APIC_OFFSET_CMCI_LVT: - case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT: - *data = vlapic_get_lvt(vlapic, offset); + case APIC_OFFSET_ID: + data = lapic->id; + break; + case APIC_OFFSET_VER: + data = lapic->version; + break; + case APIC_OFFSET_TPR: + data = lapic->tpr; + break; + case APIC_OFFSET_APR: + data = lapic->apr; + break; + case APIC_OFFSET_PPR: + data = lapic->ppr; + break; + case APIC_OFFSET_LDR: + data = lapic->ldr; + break; + case APIC_OFFSET_DFR: + data = lapic->dfr; + break; + case APIC_OFFSET_SVR: + data = lapic->svr; + break; + case APIC_OFFSET_ISR0 ... APIC_OFFSET_ISR7: + i = (offset - APIC_OFFSET_ISR0) >> 2; + reg = &lapic->isr0; + data = *(reg + i); + break; + case APIC_OFFSET_TMR0 ... APIC_OFFSET_TMR7: + i = (offset - APIC_OFFSET_TMR0) >> 2; + reg = &lapic->tmr0; + data = *(reg + i); + break; + case APIC_OFFSET_IRR0 ... APIC_OFFSET_IRR7: + i = (offset - APIC_OFFSET_IRR0) >> 2; + reg = &lapic->irr0; + data = atomic_load_acq_int(reg + i); + break; + case APIC_OFFSET_ESR: + data = lapic->esr; + break; + case APIC_OFFSET_ICR_LOW: + data = lapic->icr_lo; + break; + case APIC_OFFSET_ICR_HI: + data = lapic->icr_hi; + break; + case APIC_OFFSET_CMCI_LVT: + case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT: + data = vlapic_get_lvt(vlapic, offset); #ifdef INVARIANTS - reg = vlapic_get_lvtptr(vlapic, offset); - KASSERT(*data == *reg, ("inconsistent lvt value at " - "offset %#lx: %#lx/%#x", offset, *data, *reg)); + reg = vlapic_get_lvtptr(vlapic, offset); + ASSERT3U(data, ==, *reg); #endif - break; - case APIC_OFFSET_TIMER_ICR: - *data = lapic->icr_timer; - break; - case APIC_OFFSET_TIMER_CCR: - *data = vlapic_get_ccr(vlapic); - break; - case APIC_OFFSET_TIMER_DCR: - *data = lapic->dcr_timer; - break; - case APIC_OFFSET_SELF_IPI: - /* - * XXX generate a GP fault if vlapic is in x2apic mode - */ - *data = 0; - break; - case APIC_OFFSET_RRR: - default: - *data = 0; - break; + break; + case APIC_OFFSET_TIMER_ICR: + data = lapic->icr_timer; + break; + case APIC_OFFSET_TIMER_CCR: + data = vlapic_get_ccr(vlapic); + break; + case APIC_OFFSET_TIMER_DCR: + data = lapic->dcr_timer; + break; + case APIC_OFFSET_RRR: + data = 0; + break; + + case APIC_OFFSET_SELF_IPI: + case APIC_OFFSET_EOI: + /* Write-only register */ + *outp = 0; + return (false); + + default: + /* Invalid register */ + *outp = 0; + return (false); } -done: - VLAPIC_CTR2(vlapic, "vlapic read offset %#x, data %#lx", offset, *data); - return (0); + + *outp = data; + return (true); } -int -vlapic_write(struct vlapic *vlapic, int mmio_access, uint64_t offset, - uint64_t data) +static bool +vlapic_write(struct vlapic *vlapic, uint16_t offset, uint32_t data) { struct LAPIC *lapic = vlapic->apic_page; uint32_t *regptr; - int retval; - - KASSERT((offset & 0xf) == 0 && offset < PAGE_SIZE, - ("vlapic_write: invalid offset %lx", offset)); - VLAPIC_CTR2(vlapic, "vlapic write offset %#lx, data %#lx", - offset, data); + ASSERT3U(offset & 0xf, ==, 0); + ASSERT3U(offset, <, PAGESIZE); - if (offset > sizeof (*lapic)) - return (0); - - /* Ignore MMIO accesses in x2APIC mode */ - if (x2apic(vlapic) && mmio_access) { - VLAPIC_CTR2(vlapic, "MMIO write of %#lx to offset %#lx " - "in x2APIC mode", data, offset); - return (0); - } - - /* - * XXX Generate GP fault for MSR accesses in xAPIC mode - */ - if (!x2apic(vlapic) && !mmio_access) { - VLAPIC_CTR2(vlapic, "x2APIC MSR write of %#lx to offset %#lx " - "in xAPIC mode", data, offset); - return (0); - } - - retval = 0; switch (offset) { - case APIC_OFFSET_ID: - lapic->id = data; - vlapic_id_write_handler(vlapic); - break; - case APIC_OFFSET_TPR: - vlapic_set_tpr(vlapic, data & 0xff); - break; - case APIC_OFFSET_EOI: - vlapic_process_eoi(vlapic); - break; - case APIC_OFFSET_LDR: - lapic->ldr = data; - vlapic_ldr_write_handler(vlapic); - break; - case APIC_OFFSET_DFR: - lapic->dfr = data; - vlapic_dfr_write_handler(vlapic); - break; - case APIC_OFFSET_SVR: - lapic->svr = data; - vlapic_svr_write_handler(vlapic); - break; - case APIC_OFFSET_ICR_LOW: - lapic->icr_lo = data; - if (x2apic(vlapic)) - lapic->icr_hi = data >> 32; - retval = vlapic_icrlo_write_handler(vlapic); - break; - case APIC_OFFSET_ICR_HI: - lapic->icr_hi = data; - break; - case APIC_OFFSET_CMCI_LVT: - case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT: - regptr = vlapic_get_lvtptr(vlapic, offset); - *regptr = data; - vlapic_lvt_write_handler(vlapic, offset); - break; - case APIC_OFFSET_TIMER_ICR: - lapic->icr_timer = data; - vlapic_icrtmr_write_handler(vlapic); - break; + case APIC_OFFSET_ID: + lapic->id = data; + vlapic_id_write_handler(vlapic); + break; + case APIC_OFFSET_TPR: + vlapic_set_tpr(vlapic, data & 0xff); + break; + case APIC_OFFSET_EOI: + vlapic_process_eoi(vlapic); + break; + case APIC_OFFSET_LDR: + lapic->ldr = data; + vlapic_ldr_write_handler(vlapic); + break; + case APIC_OFFSET_DFR: + lapic->dfr = data; + vlapic_dfr_write_handler(vlapic); + break; + case APIC_OFFSET_SVR: + lapic->svr = data; + vlapic_svr_write_handler(vlapic); + break; + case APIC_OFFSET_ICR_LOW: + lapic->icr_lo = data; + vlapic_icrlo_write_handler(vlapic); + break; + case APIC_OFFSET_ICR_HI: + lapic->icr_hi = data; + break; + case APIC_OFFSET_CMCI_LVT: + case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT: + regptr = vlapic_get_lvtptr(vlapic, offset); + *regptr = data; + vlapic_lvt_write_handler(vlapic, offset); + break; + case APIC_OFFSET_TIMER_ICR: + lapic->icr_timer = data; + vlapic_icrtmr_write_handler(vlapic); + break; - case APIC_OFFSET_TIMER_DCR: - lapic->dcr_timer = data; - vlapic_dcr_write_handler(vlapic); - break; + case APIC_OFFSET_TIMER_DCR: + lapic->dcr_timer = data; + vlapic_dcr_write_handler(vlapic); + break; - case APIC_OFFSET_ESR: - vlapic_esr_write_handler(vlapic); - break; + case APIC_OFFSET_ESR: + vlapic_esr_write_handler(vlapic); + break; - case APIC_OFFSET_SELF_IPI: - if (x2apic(vlapic)) - vlapic_self_ipi_handler(vlapic, data); - break; + case APIC_OFFSET_SELF_IPI: + if (vlapic_x2mode(vlapic)) + vlapic_self_ipi_handler(vlapic, data); + break; - case APIC_OFFSET_VER: - case APIC_OFFSET_APR: - case APIC_OFFSET_PPR: - case APIC_OFFSET_RRR: - case APIC_OFFSET_ISR0 ... APIC_OFFSET_ISR7: - case APIC_OFFSET_TMR0 ... APIC_OFFSET_TMR7: - case APIC_OFFSET_IRR0 ... APIC_OFFSET_IRR7: - case APIC_OFFSET_TIMER_CCR: - default: - // Read only. - break; + case APIC_OFFSET_VER: + case APIC_OFFSET_APR: + case APIC_OFFSET_PPR: + case APIC_OFFSET_RRR: + case APIC_OFFSET_ISR0 ... APIC_OFFSET_ISR7: + case APIC_OFFSET_TMR0 ... APIC_OFFSET_TMR7: + case APIC_OFFSET_IRR0 ... APIC_OFFSET_IRR7: + case APIC_OFFSET_TIMER_CCR: + /* Read-only register */ + return (false); + + default: + /* Invalid register */ + return (false); } - return (retval); + return (true); } void @@ -1407,6 +1381,10 @@ vlapic_reset(struct vlapic *vlapic) (*vlapic->ops.sync_state)(vlapic); } + vlapic->msr_apicbase = DEFAULT_APIC_BASE | APICBASE_ENABLED; + if (vlapic->vcpuid == 0) + vlapic->msr_apicbase |= APICBASE_BSP; + lapic->id = vlapic_get_id(vlapic); lapic->version = VLAPIC_VERSION; lapic->version |= (VLAPIC_MAXLVT_INDEX << MAXLVTSHIFT); @@ -1470,41 +1448,215 @@ vlapic_init(struct vlapic *vlapic) mtx_init(&vlapic->timer_mtx, "vlapic timer mtx", NULL, MTX_SPIN); callout_init(&vlapic->callout, 1); - vlapic->msr_apicbase = DEFAULT_APIC_BASE | APICBASE_ENABLED; - - if (vlapic->vcpuid == 0) - vlapic->msr_apicbase |= APICBASE_BSP; - vlapic_reset(vlapic); } void vlapic_cleanup(struct vlapic *vlapic) { - callout_drain(&vlapic->callout); } -uint64_t -vlapic_get_apicbase(struct vlapic *vlapic) +int +vlapic_mmio_read(struct vlapic *vlapic, uint64_t gpa, uint64_t *valp, + uint_t size) { + ASSERT3U(gpa, >=, DEFAULT_APIC_BASE); + ASSERT3U(gpa, <, DEFAULT_APIC_BASE + PAGE_SIZE); - return (vlapic->msr_apicbase); + /* Ignore MMIO accesses when in x2APIC mode or hardware disabled */ + if (vlapic_x2mode(vlapic) || vlapic_hw_disabled(vlapic)) { + *valp = UINT64_MAX; + return (0); + } + + const uint16_t off = gpa - DEFAULT_APIC_BASE; + uint32_t raw = 0; + (void) vlapic_read(vlapic, off & ~0xf, &raw); + + /* Shift and mask reads which are small and/or unaligned */ + const uint8_t align = off & 0xf; + if (align < 4) { + *valp = (uint64_t)raw << (align * 8); + } else { + *valp = 0; + } + + return (0); } int -vlapic_set_apicbase(struct vlapic *vlapic, uint64_t new) +vlapic_mmio_write(struct vlapic *vlapic, uint64_t gpa, uint64_t val, + uint_t size) { + ASSERT3U(gpa, >=, DEFAULT_APIC_BASE); + ASSERT3U(gpa, <, DEFAULT_APIC_BASE + PAGE_SIZE); - if (vlapic->msr_apicbase != new) { - VLAPIC_CTR2(vlapic, "Changing APIC_BASE MSR from %#lx to %#lx " - "not supported", vlapic->msr_apicbase, new); - return (-1); + /* Ignore MMIO accesses when in x2APIC mode or hardware disabled */ + if (vlapic_x2mode(vlapic) || vlapic_hw_disabled(vlapic)) { + return (0); + } + + const uint16_t off = gpa - DEFAULT_APIC_BASE; + /* Ignore writes which are not 32-bits wide and 16-byte aligned */ + if ((off & 0xf) != 0 || size != 4) { + return (0); } + (void) vlapic_write(vlapic, off, (uint32_t)val); return (0); } +/* Should attempts to change the APIC base address be rejected with a #GP? */ +int vlapic_gp_on_addr_change = 1; + +static vm_msr_result_t +vlapic_set_apicbase(struct vlapic *vlapic, uint64_t val) +{ + const uint64_t diff = vlapic->msr_apicbase ^ val; + + /* + * Until the LAPIC emulation for switching between xAPIC and x2APIC + * modes is more polished, it will remain off-limits from being altered + * by the guest. + */ + const uint64_t reserved_bits = APICBASE_RESERVED | APICBASE_X2APIC | + APICBASE_BSP; + if ((diff & reserved_bits) != 0) { + return (VMR_GP); + } + + /* We do not presently allow the LAPIC access address to be modified. */ + if ((diff & APICBASE_ADDR_MASK) != 0) { + /* + * Explicitly rebuffing such requests with a #GP is the most + * straightforward way to handle the situation, but certain + * consumers (such as the KVM unit tests) may balk at the + * otherwise unexpected exception. + */ + if (vlapic_gp_on_addr_change) { + return (VMR_GP); + } + + /* If silence is required, just ignore the address change. */ + val = (val & ~APICBASE_ADDR_MASK) | DEFAULT_APIC_BASE; + } + + vlapic->msr_apicbase = val; + return (VMR_OK); +} + +static __inline uint16_t +vlapic_msr_to_regoff(uint32_t msr) +{ + ASSERT3U(msr, >=, MSR_APIC_000); + ASSERT3U(msr, <, (MSR_APIC_000 + 0x100)); + + return ((msr - MSR_APIC_000) << 4); +} + +bool +vlapic_owned_msr(uint32_t msr) +{ + if (msr == MSR_APICBASE) { + return (true); + } + if (msr >= MSR_APIC_000 && + msr < (MSR_APIC_000 + 0x100)) { + return (true); + } + return (false); +} + +vm_msr_result_t +vlapic_rdmsr(struct vlapic *vlapic, uint32_t msr, uint64_t *valp) +{ + ASSERT(vlapic_owned_msr(msr)); + ASSERT3P(valp, !=, NULL); + + if (msr == MSR_APICBASE) { + *valp = vlapic->msr_apicbase; + return (VMR_OK); + } + + /* #GP for x2APIC MSR accesses in xAPIC mode */ + if (!vlapic_x2mode(vlapic)) { + return (VMR_GP); + } + + uint64_t out = 0; + const uint16_t reg = vlapic_msr_to_regoff(msr); + switch (reg) { + case APIC_OFFSET_ICR_LOW: { + /* Read from ICR register gets entire (64-bit) value */ + uint32_t low = 0, high = 0; + bool valid; + + valid = vlapic_read(vlapic, APIC_OFFSET_ICR_HI, &high); + VERIFY(valid); + valid = vlapic_read(vlapic, APIC_OFFSET_ICR_LOW, &low); + VERIFY(valid); + + *valp = ((uint64_t)high << 32) | low; + return (VMR_OK); + } + case APIC_OFFSET_ICR_HI: + /* Already covered by ICR_LOW */ + return (VMR_GP); + default: + break; + } + if (!vlapic_read(vlapic, reg, (uint32_t *)&out)) { + return (VMR_GP); + } + *valp = out; + return (VMR_OK); +} + +vm_msr_result_t +vlapic_wrmsr(struct vlapic *vlapic, uint32_t msr, uint64_t val) +{ + ASSERT(vlapic_owned_msr(msr)); + + if (msr == MSR_APICBASE) { + return (vlapic_set_apicbase(vlapic, val)); + } + + /* #GP for x2APIC MSR accesses in xAPIC mode */ + if (!vlapic_x2mode(vlapic)) { + return (VMR_GP); + } + + const uint16_t reg = vlapic_msr_to_regoff(msr); + switch (reg) { + case APIC_OFFSET_ICR_LOW: { + /* Write to ICR register sets entire (64-bit) value */ + bool valid; + + valid = vlapic_write(vlapic, APIC_OFFSET_ICR_HI, val >> 32); + VERIFY(valid); + valid = vlapic_write(vlapic, APIC_OFFSET_ICR_LOW, val); + VERIFY(valid); + return (VMR_OK); + } + case APIC_OFFSET_ICR_HI: + /* Already covered by ICR_LOW */ + return (VMR_GP); + case APIC_OFFSET_ESR: + /* Only 0 may be written from x2APIC mode */ + if (val != 0) { + return (VMR_GP); + } + break; + default: + break; + } + if (!vlapic_write(vlapic, reg, val)) { + return (VMR_GP); + } + return (VMR_OK); +} + void vlapic_set_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state state) { @@ -1526,7 +1678,7 @@ vlapic_set_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state state) */ lapic = vlapic->apic_page; lapic->id = vlapic_get_id(vlapic); - if (x2apic(vlapic)) { + if (vlapic_x2mode(vlapic)) { lapic->ldr = x2apic_ldr(vlapic); lapic->dfr = 0; } else { @@ -1592,18 +1744,6 @@ vlapic_post_intr(struct vlapic *vlapic, int hostcpu) poke_cpu(hostcpu); } -bool -vlapic_enabled(struct vlapic *vlapic) -{ - struct LAPIC *lapic = vlapic->apic_page; - - if ((vlapic->msr_apicbase & APICBASE_ENABLED) != 0 && - (lapic->svr & APIC_SVR_ENABLE) != 0) - return (true); - else - return (false); -} - void vlapic_localize_resources(struct vlapic *vlapic) { diff --git a/usr/src/uts/i86pc/io/vmm/io/vlapic.h b/usr/src/uts/i86pc/io/vmm/io/vlapic.h index a46bae9d34..dd1970cb6a 100644 --- a/usr/src/uts/i86pc/io/vmm/io/vlapic.h +++ b/usr/src/uts/i86pc/io/vmm/io/vlapic.h @@ -36,15 +36,14 @@ #ifndef _VLAPIC_H_ #define _VLAPIC_H_ -struct vm; -enum x2apic_state; - void vlapic_reset(struct vlapic *vlapic); -int vlapic_write(struct vlapic *vlapic, int mmio_access, uint64_t offset, - uint64_t data); -int vlapic_read(struct vlapic *vlapic, int mmio_access, uint64_t offset, - uint64_t *data); +int vlapic_mmio_write(struct vlapic *, uint64_t, uint64_t, uint_t); +int vlapic_mmio_read(struct vlapic *, uint64_t, uint64_t *, uint_t); + +bool vlapic_owned_msr(uint32_t); +vm_msr_result_t vlapic_rdmsr(struct vlapic *, uint32_t, uint64_t *); +vm_msr_result_t vlapic_wrmsr(struct vlapic *, uint32_t, uint64_t); /* * Returns 0 if there is no eligible vector that can be delivered to the @@ -81,10 +80,7 @@ int vlapic_trigger_lvt(struct vlapic *vlapic, int vector); void vlapic_sync_tpr(struct vlapic *vlapic); -uint64_t vlapic_get_apicbase(struct vlapic *vlapic); -int vlapic_set_apicbase(struct vlapic *vlapic, uint64_t val); void vlapic_set_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state s); -bool vlapic_enabled(struct vlapic *vlapic); void vlapic_deliver_intr(struct vm *vm, bool level, uint32_t dest, bool phys, int delmode, int vec); @@ -101,11 +97,11 @@ void vlapic_ldr_write_handler(struct vlapic *vlapic); void vlapic_dfr_write_handler(struct vlapic *vlapic); void vlapic_svr_write_handler(struct vlapic *vlapic); void vlapic_esr_write_handler(struct vlapic *vlapic); -int vlapic_icrlo_write_handler(struct vlapic *vlapic); +void vlapic_icrlo_write_handler(struct vlapic *vlapic); void vlapic_icrtmr_write_handler(struct vlapic *vlapic); void vlapic_dcr_write_handler(struct vlapic *vlapic); void vlapic_lvt_write_handler(struct vlapic *vlapic, uint32_t offset); -void vlapic_self_ipi_handler(struct vlapic *vlapic, uint64_t val); +void vlapic_self_ipi_handler(struct vlapic *vlapic, uint32_t val); void vlapic_localize_resources(struct vlapic *vlapic); diff --git a/usr/src/uts/i86pc/io/vmm/sys/vmm_kernel.h b/usr/src/uts/i86pc/io/vmm/sys/vmm_kernel.h index e94f7a876b..c84b33dc2e 100644 --- a/usr/src/uts/i86pc/io/vmm/sys/vmm_kernel.h +++ b/usr/src/uts/i86pc/io/vmm/sys/vmm_kernel.h @@ -373,6 +373,13 @@ enum event_inject_state { EIS_REQ_EXIT = (1 << 15), }; +/* Possible result codes for MSR access emulation */ +typedef enum vm_msr_result { + VMR_OK = 0, /* succesfully emulated */ + VMR_GP = 1, /* #GP should be injected */ + VMR_UNHANLDED = 2, /* handle in userspace, kernel cannot emulate */ +} vm_msr_result_t; + void vmm_sol_glue_init(void); void vmm_sol_glue_cleanup(void); diff --git a/usr/src/uts/i86pc/io/vmm/vmm.c b/usr/src/uts/i86pc/io/vmm/vmm.c index 78a810880d..ae80146a12 100644 --- a/usr/src/uts/i86pc/io/vmm/vmm.c +++ b/usr/src/uts/i86pc/io/vmm/vmm.c @@ -1506,7 +1506,9 @@ vm_service_mmio_read(struct vm *vm, int cpuid, uint64_t gpa, uint64_t *rval, int err = ESRCH; if (gpa >= DEFAULT_APIC_BASE && gpa < DEFAULT_APIC_BASE + PAGE_SIZE) { - err = lapic_mmio_read(vm, cpuid, gpa, rval, rsize); + struct vlapic *vlapic = vm_lapic(vm, cpuid); + + err = vlapic_mmio_read(vlapic, gpa, rval, rsize); } else if (gpa >= VIOAPIC_BASE && gpa < VIOAPIC_BASE + VIOAPIC_SIZE) { err = vioapic_mmio_read(vm, cpuid, gpa, rval, rsize); } else if (gpa >= VHPET_BASE && gpa < VHPET_BASE + VHPET_SIZE) { @@ -1523,7 +1525,9 @@ vm_service_mmio_write(struct vm *vm, int cpuid, uint64_t gpa, uint64_t wval, int err = ESRCH; if (gpa >= DEFAULT_APIC_BASE && gpa < DEFAULT_APIC_BASE + PAGE_SIZE) { - err = lapic_mmio_write(vm, cpuid, gpa, wval, wsize); + struct vlapic *vlapic = vm_lapic(vm, cpuid); + + err = vlapic_mmio_write(vlapic, gpa, wval, wsize); } else if (gpa >= VIOAPIC_BASE && gpa < VIOAPIC_BASE + VIOAPIC_SIZE) { err = vioapic_mmio_write(vm, cpuid, gpa, wval, wsize); } else if (gpa >= VHPET_BASE && gpa < VHPET_BASE + VHPET_SIZE) { diff --git a/usr/src/uts/i86pc/io/vmm/vmm_lapic.c b/usr/src/uts/i86pc/io/vmm/vmm_lapic.c index e95f444051..8ef1c851d0 100644 --- a/usr/src/uts/i86pc/io/vmm/vmm_lapic.c +++ b/usr/src/uts/i86pc/io/vmm/vmm_lapic.c @@ -149,106 +149,3 @@ lapic_intr_msi(struct vm *vm, uint64_t addr, uint64_t msg) vlapic_deliver_intr(vm, LAPIC_TRIG_EDGE, dest, phys, delmode, vec); return (0); } - -static bool -x2apic_msr(uint_t msr) -{ - return (msr >= 0x800 && msr <= 0xBFF); -} - -static uint_t -x2apic_msr_to_regoff(uint_t msr) -{ - - return ((msr - 0x800) << 4); -} - -bool -lapic_msr(uint_t msr) -{ - - return (x2apic_msr(msr) || msr == MSR_APICBASE); -} - -int -lapic_rdmsr(struct vm *vm, int cpu, uint_t msr, uint64_t *rval) -{ - int error; - uint_t offset; - struct vlapic *vlapic; - - vlapic = vm_lapic(vm, cpu); - - if (msr == MSR_APICBASE) { - *rval = vlapic_get_apicbase(vlapic); - error = 0; - } else { - offset = x2apic_msr_to_regoff(msr); - error = vlapic_read(vlapic, 0, offset, rval); - } - - return (error); -} - -int -lapic_wrmsr(struct vm *vm, int cpu, uint_t msr, uint64_t val) -{ - int error; - uint_t offset; - struct vlapic *vlapic; - - vlapic = vm_lapic(vm, cpu); - - if (msr == MSR_APICBASE) { - error = vlapic_set_apicbase(vlapic, val); - } else { - offset = x2apic_msr_to_regoff(msr); - error = vlapic_write(vlapic, 0, offset, val); - } - - return (error); -} - -int -lapic_mmio_write(struct vm *vm, int cpu, uint64_t gpa, uint64_t wval, int size) -{ - int error; - uint64_t off; - struct vlapic *vlapic; - - off = gpa - DEFAULT_APIC_BASE; - - /* - * Memory mapped local apic accesses must be 4 bytes wide and - * aligned on a 16-byte boundary. - */ - if (size != 4 || off & 0xf) - return (EINVAL); - - vlapic = vm_lapic(vm, cpu); - error = vlapic_write(vlapic, 1, off, wval); - return (error); -} - -int -lapic_mmio_read(struct vm *vm, int cpu, uint64_t gpa, uint64_t *rval, int size) -{ - int error; - uint64_t off; - struct vlapic *vlapic; - - off = gpa - DEFAULT_APIC_BASE; - - /* - * Memory mapped local apic accesses should be aligned on a - * 16-byte boundary. They are also suggested to be 4 bytes - * wide, alas not all OSes follow suggestions. - */ - off &= ~3; - if (off & 0xf) - return (EINVAL); - - vlapic = vm_lapic(vm, cpu); - error = vlapic_read(vlapic, 1, off, rval); - return (error); -} |