summaryrefslogtreecommitdiff
path: root/usr/src
diff options
context:
space:
mode:
authorPatrick Mooney <pmooney@pfmooney.com>2020-07-18 19:49:51 +0000
committerPatrick Mooney <pmooney@oxide.computer>2020-08-20 19:19:51 +0000
commite0c0d44e917080841514d0dd031a696c74e8c435 (patch)
tree232fc454b760fd2fdced128fec0e84b065e4f025 /usr/src
parent76f19f5fdc974fe5be5c82a556e43a4df93f1de1 (diff)
downloadillumos-joyent-e0c0d44e917080841514d0dd031a696c74e8c435.tar.gz
12989 improve interface boundary for bhyve MMIO
12990 improve interface boundary for bhyve ins/outs 12991 bhyve vlapic should SIPI more carefully Reviewed by: Mike Zeller <mike.zeller@joyent.com> Reviewed by: Joshua M. Clulow <josh@sysmgr.org> Reviewed by: Robert Mustacchi <rm@fingolfin.org> Approved by: Gordon Ross <gordon.w.ross@gmail.com>
Diffstat (limited to 'usr/src')
-rw-r--r--usr/src/cmd/bhyve/Makefile6
-rw-r--r--usr/src/cmd/bhyve/bhyverun.c202
-rw-r--r--usr/src/cmd/bhyve/inout.c160
-rw-r--r--usr/src/cmd/bhyve/inout.h7
-rw-r--r--usr/src/cmd/bhyve/mem.c59
-rw-r--r--usr/src/cmd/bhyve/mem.h4
-rw-r--r--usr/src/cmd/bhyve/task_switch.c161
-rw-r--r--usr/src/cmd/bhyvectl/bhyvectl.c25
-rw-r--r--usr/src/lib/libvmmapi/common/vmmapi.c15
-rw-r--r--usr/src/lib/libvmmapi/common/vmmapi.h3
-rw-r--r--usr/src/uts/i86pc/io/vmm/amd/svm.c215
-rw-r--r--usr/src/uts/i86pc/io/vmm/intel/vmx.c185
-rw-r--r--usr/src/uts/i86pc/io/vmm/io/vatpic.c12
-rw-r--r--usr/src/uts/i86pc/io/vmm/io/vatpic.h12
-rw-r--r--usr/src/uts/i86pc/io/vmm/io/vatpit.c6
-rw-r--r--usr/src/uts/i86pc/io/vmm/io/vatpit.h8
-rw-r--r--usr/src/uts/i86pc/io/vmm/io/vlapic.c9
-rw-r--r--usr/src/uts/i86pc/io/vmm/io/vpmtmr.c2
-rw-r--r--usr/src/uts/i86pc/io/vmm/io/vpmtmr.h4
-rw-r--r--usr/src/uts/i86pc/io/vmm/io/vrtc.c8
-rw-r--r--usr/src/uts/i86pc/io/vmm/io/vrtc.h8
-rw-r--r--usr/src/uts/i86pc/io/vmm/sys/vmm_instruction_emul.h (renamed from usr/src/uts/i86pc/sys/vmm_instruction_emul.h)98
-rw-r--r--usr/src/uts/i86pc/io/vmm/sys/vmm_kernel.h9
-rw-r--r--usr/src/uts/i86pc/io/vmm/vmm.c376
-rw-r--r--usr/src/uts/i86pc/io/vmm/vmm_instruction_emul.c1075
-rw-r--r--usr/src/uts/i86pc/io/vmm/vmm_ioport.c156
-rw-r--r--usr/src/uts/i86pc/io/vmm/vmm_ioport.h5
-rw-r--r--usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c59
-rw-r--r--usr/src/uts/i86pc/io/vmm/vmm_stat.c2
-rw-r--r--usr/src/uts/i86pc/io/vmm/vmm_stat.h2
-rw-r--r--usr/src/uts/i86pc/sys/vmm.h167
-rw-r--r--usr/src/uts/i86pc/sys/vmm_dev.h5
32 files changed, 1897 insertions, 1168 deletions
diff --git a/usr/src/cmd/bhyve/Makefile b/usr/src/cmd/bhyve/Makefile
index 348a8988fe..7126fdda17 100644
--- a/usr/src/cmd/bhyve/Makefile
+++ b/usr/src/cmd/bhyve/Makefile
@@ -12,6 +12,7 @@
#
# Copyright 2014 Pluribus Networks Inc.
# Copyright 2019 Joyent, Inc.
+# Copyright 2020 Oxide Computer Company
#
PROG = bhyve
@@ -74,7 +75,6 @@ SRCS = acpi.c \
usb_mouse.c \
vga.c \
virtio.c \
- vmm_instruction_emul.c \
vmgenc.c \
xmsr.c \
spinup_ap.c \
@@ -161,7 +161,3 @@ $(SUBDIRS): FRC
@cd $@; pwd; $(MAKE) $(TARGET)
FRC:
-
-%.o: $(SRC)/uts/i86pc/io/vmm/%.c
- $(COMPILE.c) $<
- $(POST_PROCESS_O)
diff --git a/usr/src/cmd/bhyve/bhyverun.c b/usr/src/cmd/bhyve/bhyverun.c
index fbc9fab6b1..d2a4032682 100644
--- a/usr/src/cmd/bhyve/bhyverun.c
+++ b/usr/src/cmd/bhyve/bhyverun.c
@@ -213,6 +213,7 @@ static cpuset_t cpumask;
static void vm_loop(struct vmctx *ctx, int vcpu, uint64_t rip);
static struct vm_exit vmexit[VM_MAXCPU];
+static struct vm_entry vmentry[VM_MAXCPU];
struct bhyvestats {
uint64_t vmexit_bogus;
@@ -220,15 +221,18 @@ struct bhyvestats {
uint64_t vmexit_hlt;
uint64_t vmexit_pause;
uint64_t vmexit_mtrap;
- uint64_t vmexit_inst_emul;
+ uint64_t vmexit_mmio;
+ uint64_t vmexit_inout;
uint64_t cpu_switch_rotate;
uint64_t cpu_switch_direct;
+ uint64_t mmio_unhandled;
} stats;
struct mt_vmm_info {
pthread_t mt_thr;
struct vmctx *mt_ctx;
- int mt_vcpu;
+ int mt_vcpu;
+ uint64_t mt_startrip;
} mt_vmm_info[VM_MAXCPU];
#ifdef __FreeBSD__
@@ -498,7 +502,7 @@ fbsdrun_start_thread(void *param)
if (gdb_port != 0)
gdb_cpu_add(vcpu);
- vm_loop(mtp->mt_ctx, vcpu, vmexit[vcpu].rip);
+ vm_loop(mtp->mt_ctx, vcpu, mtp->mt_startrip);
/* not reached */
exit(1);
@@ -539,11 +543,9 @@ fbsdrun_addcpu(struct vmctx *ctx, int fromcpu, int newcpu, uint64_t rip,
* Set up the vmexit struct to allow execution to start
* at the given RIP
*/
- vmexit[newcpu].rip = rip;
- vmexit[newcpu].inst_length = 0;
-
mt_vmm_info[newcpu].mt_ctx = ctx;
mt_vmm_info[newcpu].mt_vcpu = newcpu;
+ mt_vmm_info[newcpu].mt_startrip = rip;
error = pthread_create(&mt_vmm_info[newcpu].mt_thr, NULL,
fbsdrun_start_thread, &mt_vmm_info[newcpu]);
@@ -563,6 +565,66 @@ fbsdrun_deletecpu(struct vmctx *ctx, int vcpu)
return (CPU_EMPTY(&cpumask));
}
+static void
+vmentry_mmio_read(int vcpu, uint64_t gpa, uint8_t bytes, uint64_t data)
+{
+ struct vm_entry *entry = &vmentry[vcpu];
+ struct vm_mmio *mmio = &entry->u.mmio;
+
+ assert(entry->cmd == VEC_DEFAULT);
+
+ entry->cmd = VEC_COMPLETE_MMIO;
+ mmio->bytes = bytes;
+ mmio->read = 1;
+ mmio->gpa = gpa;
+ mmio->data = data;
+}
+
+static void
+vmentry_mmio_write(int vcpu, uint64_t gpa, uint8_t bytes)
+{
+ struct vm_entry *entry = &vmentry[vcpu];
+ struct vm_mmio *mmio = &entry->u.mmio;
+
+ assert(entry->cmd == VEC_DEFAULT);
+
+ entry->cmd = VEC_COMPLETE_MMIO;
+ mmio->bytes = bytes;
+ mmio->read = 0;
+ mmio->gpa = gpa;
+ mmio->data = 0;
+}
+
+static void
+vmentry_inout_read(int vcpu, uint16_t port, uint8_t bytes, uint32_t data)
+{
+ struct vm_entry *entry = &vmentry[vcpu];
+ struct vm_inout *inout = &entry->u.inout;
+
+ assert(entry->cmd == VEC_DEFAULT);
+
+ entry->cmd = VEC_COMPLETE_INOUT;
+ inout->bytes = bytes;
+ inout->flags = INOUT_IN;
+ inout->port = port;
+ inout->eax = data;
+}
+
+static void
+vmentry_inout_write(int vcpu, uint16_t port, uint8_t bytes)
+{
+ struct vm_entry *entry = &vmentry[vcpu];
+ struct vm_inout *inout = &entry->u.inout;
+
+ assert(entry->cmd == VEC_DEFAULT);
+
+ entry->cmd = VEC_COMPLETE_INOUT;
+ inout->bytes = bytes;
+ inout->flags = 0;
+ inout->port = port;
+ inout->eax = 0;
+}
+
static int
vmexit_handle_notify(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu,
uint32_t eax)
@@ -579,30 +641,42 @@ static int
vmexit_inout(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu)
{
int error;
- int bytes, port, in, out;
int vcpu;
+ struct vm_inout inout;
+ bool in;
+ uint8_t bytes;
- vcpu = *pvcpu;
+ stats.vmexit_inout++;
- port = vme->u.inout.port;
- bytes = vme->u.inout.bytes;
- in = vme->u.inout.in;
- out = !in;
+ vcpu = *pvcpu;
+ inout = vme->u.inout;
+ in = (inout.flags & INOUT_IN) != 0;
+ bytes = inout.bytes;
/* Extra-special case of host notifications */
- if (out && port == GUEST_NIO_PORT) {
- error = vmexit_handle_notify(ctx, vme, pvcpu, vme->u.inout.eax);
+ if (!in && inout.port == GUEST_NIO_PORT) {
+ error = vmexit_handle_notify(ctx, vme, pvcpu, inout.eax);
+ vmentry_inout_write(vcpu, inout.port, bytes);
return (error);
}
- error = emulate_inout(ctx, vcpu, vme, strictio);
+ error = emulate_inout(ctx, vcpu, &inout, strictio != 0);
if (error) {
fprintf(stderr, "Unhandled %s%c 0x%04x at 0x%lx\n",
in ? "in" : "out",
bytes == 1 ? 'b' : (bytes == 2 ? 'w' : 'l'),
- port, vmexit->rip);
+ inout.port, vmexit->rip);
return (VMEXIT_ABORT);
} else {
+ /*
+ * Communicate the status of the inout operation back to the
+ * in-kernel instruction emulation.
+ */
+ if (in) {
+ vmentry_inout_read(vcpu, inout.port, bytes, inout.eax);
+ } else {
+ vmentry_inout_write(vcpu, inout.port, bytes);
+ }
return (VMEXIT_CONTINUE);
}
}
@@ -792,29 +866,70 @@ vmexit_mtrap(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
static int
vmexit_inst_emul(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
{
- int err, i;
- struct vie *vie;
+ uint8_t i, valid;
+
+ fprintf(stderr, "Failed to emulate instruction sequence ");
+
+ valid = vmexit->u.inst_emul.num_valid;
+ if (valid != 0) {
+ assert(valid <= sizeof (vmexit->u.inst_emul.inst));
+ fprintf(stderr, "[");
+ for (i = 0; i < valid; i++) {
+ if (i == 0) {
+ fprintf(stderr, "%02x",
+ vmexit->u.inst_emul.inst[i]);
+ } else {
+ fprintf(stderr, ", %02x",
+ vmexit->u.inst_emul.inst[i]);
+ }
+ }
+ fprintf(stderr, "] ");
+ }
+ fprintf(stderr, "@ %rip = %x\n", vmexit->rip);
- stats.vmexit_inst_emul++;
+ return (VMEXIT_ABORT);
+}
- vie = &vmexit->u.inst_emul.vie;
- err = emulate_mem(ctx, *pvcpu, vmexit->u.inst_emul.gpa,
- vie, &vmexit->u.inst_emul.paging);
+static int
+vmexit_mmio(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
+{
+ int vcpu, err;
+ struct vm_mmio mmio;
+ bool is_read;
- if (err) {
- if (err == ESRCH) {
- EPRINTLN("Unhandled memory access to 0x%lx\n",
- vmexit->u.inst_emul.gpa);
- }
+ stats.vmexit_mmio++;
- fprintf(stderr, "Failed to emulate instruction sequence [ ");
- for (i = 0; i < vie->num_valid; i++)
- fprintf(stderr, "%02x", vie->inst[i]);
- FPRINTLN(stderr, " ] at 0x%lx", vmexit->rip);
- return (VMEXIT_ABORT);
+ vcpu = *pvcpu;
+ mmio = vmexit->u.mmio;
+ is_read = (mmio.read != 0);
+
+ err = emulate_mem(ctx, vcpu, &mmio);
+
+ if (err == ESRCH) {
+ fprintf(stderr, "Unhandled memory access to 0x%lx\n", mmio.gpa);
+ stats.mmio_unhandled++;
+
+ /*
+ * Access to non-existent physical addresses is not likely to
+ * result in fatal errors on hardware machines, but rather reads
+ * of all-ones or discarded-but-acknowledged writes.
+ */
+ mmio.data = ~0UL;
+ err = 0;
}
- return (VMEXIT_CONTINUE);
+ if (err == 0) {
+ if (is_read) {
+ vmentry_mmio_read(vcpu, mmio.gpa, mmio.bytes,
+ mmio.data);
+ } else {
+ vmentry_mmio_write(vcpu, mmio.gpa, mmio.bytes);
+ }
+ return (VMEXIT_CONTINUE);
+ }
+
+ fprintf(stderr, "Unhandled mmio error to 0x%lx: %d\n", mmio.gpa, err);
+ return (VMEXIT_ABORT);
}
static pthread_mutex_t resetcpu_mtx = PTHREAD_MUTEX_INITIALIZER;
@@ -884,7 +999,7 @@ vmexit_breakpoint(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
static vmexit_handler_t handler[VM_EXITCODE_MAX] = {
[VM_EXITCODE_INOUT] = vmexit_inout,
- [VM_EXITCODE_INOUT_STR] = vmexit_inout,
+ [VM_EXITCODE_MMIO] = vmexit_mmio,
[VM_EXITCODE_VMX] = vmexit_vmx,
[VM_EXITCODE_SVM] = vmexit_svm,
[VM_EXITCODE_BOGUS] = vmexit_bogus,
@@ -906,6 +1021,8 @@ vm_loop(struct vmctx *ctx, int vcpu, uint64_t startrip)
int error, rc;
enum vm_exitcode exitcode;
cpuset_t active_cpus;
+ struct vm_exit *vexit;
+ struct vm_entry *ventry;
#ifdef __FreeBSD__
if (vcpumap[vcpu] != NULL) {
@@ -920,19 +1037,30 @@ vm_loop(struct vmctx *ctx, int vcpu, uint64_t startrip)
error = vm_set_register(ctx, vcpu, VM_REG_GUEST_RIP, startrip);
assert(error == 0);
+ ventry = &vmentry[vcpu];
+ vexit = &vmexit[vcpu];
+
while (1) {
- error = vm_run(ctx, vcpu, &vmexit[vcpu]);
+ error = vm_run(ctx, vcpu, ventry, vexit);
if (error != 0)
break;
- exitcode = vmexit[vcpu].exitcode;
+ if (ventry->cmd != VEC_DEFAULT) {
+ /*
+ * Discard any lingering entry state after it has been
+ * submitted via vm_run().
+ */
+ bzero(ventry, sizeof (*ventry));
+ }
+
+ exitcode = vexit->exitcode;
if (exitcode >= VM_EXITCODE_MAX || handler[exitcode] == NULL) {
fprintf(stderr, "vm_loop: unexpected exitcode 0x%x\n",
exitcode);
exit(4);
}
- rc = (*handler[exitcode])(ctx, &vmexit[vcpu], &vcpu);
+ rc = (*handler[exitcode])(ctx, vexit, &vcpu);
switch (rc) {
case VMEXIT_CONTINUE:
diff --git a/usr/src/cmd/bhyve/inout.c b/usr/src/cmd/bhyve/inout.c
index b460ee2988..27068023d3 100644
--- a/usr/src/cmd/bhyve/inout.c
+++ b/usr/src/cmd/bhyve/inout.c
@@ -27,6 +27,18 @@
*
* $FreeBSD$
*/
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ *
+ * Copyright 2020 Oxide Computer Company
+ */
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
@@ -40,7 +52,6 @@ __FBSDID("$FreeBSD$");
#include <x86/segments.h>
#include <machine/vmm.h>
-#include <machine/vmm_instruction_emul.h>
#include <vmmapi.h>
#include <stdio.h>
@@ -57,12 +68,14 @@ SET_DECLARE(inout_port_set, struct inout_port);
#define VERIFY_IOPORT(port, size) \
assert((port) >= 0 && (size) > 0 && ((port) + (size)) <= MAX_IOPORTS)
-static struct {
+struct inout_handler {
const char *name;
int flags;
inout_func_t handler;
void *arg;
-} inout_handlers[MAX_IOPORTS];
+};
+
+static struct inout_handler inout_handlers[MAX_IOPORTS];
static int
default_inout(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
@@ -85,11 +98,11 @@ default_inout(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
return (0);
}
-static void
+static void
register_default_iohandler(int start, int size)
{
struct inout_port iop;
-
+
VERIFY_IOPORT(start, size);
bzero(&iop, sizeof(iop));
@@ -103,136 +116,37 @@ register_default_iohandler(int start, int size)
}
int
-emulate_inout(struct vmctx *ctx, int vcpu, struct vm_exit *vmexit, int strict)
+emulate_inout(struct vmctx *ctx, int vcpu, struct vm_inout *inout, bool strict)
{
- int addrsize, bytes, flags, in, port, prot, rep;
- uint32_t eax, val;
- inout_func_t handler;
- void *arg;
- int error, fault, retval;
- enum vm_reg_name idxreg;
- uint64_t gla, index, iterations, count;
- struct vm_inout_str *vis;
- struct iovec iov[2];
-
- bytes = vmexit->u.inout.bytes;
- in = vmexit->u.inout.in;
- port = vmexit->u.inout.port;
-
- assert(port < MAX_IOPORTS);
+ struct inout_handler handler;
+ inout_func_t hfunc;
+ void *harg;
+ int error;
+ uint8_t bytes;
+ bool in;
+
+ bytes = inout->bytes;
+ in = (inout->flags & INOUT_IN) != 0;
+
assert(bytes == 1 || bytes == 2 || bytes == 4);
- handler = inout_handlers[port].handler;
+ handler = inout_handlers[inout->port];
+ hfunc = handler.handler;
+ harg = handler.arg;
- if (strict && handler == default_inout)
+ if (strict && hfunc == default_inout)
return (-1);
- flags = inout_handlers[port].flags;
- arg = inout_handlers[port].arg;
-
if (in) {
- if (!(flags & IOPORT_F_IN))
+ if (!(handler.flags & IOPORT_F_IN))
return (-1);
} else {
- if (!(flags & IOPORT_F_OUT))
+ if (!(handler.flags & IOPORT_F_OUT))
return (-1);
}
- retval = 0;
- if (vmexit->u.inout.string) {
- vis = &vmexit->u.inout_str;
- rep = vis->inout.rep;
- addrsize = vis->addrsize;
- prot = in ? PROT_WRITE : PROT_READ;
- assert(addrsize == 2 || addrsize == 4 || addrsize == 8);
-
- /* Index register */
- idxreg = in ? VM_REG_GUEST_RDI : VM_REG_GUEST_RSI;
- index = vis->index & vie_size2mask(addrsize);
-
- /* Count register */
- count = vis->count & vie_size2mask(addrsize);
-
- /* Limit number of back-to-back in/out emulations to 16 */
- iterations = MIN(count, 16);
- while (iterations > 0) {
- assert(retval == 0);
- if (vie_calculate_gla(vis->paging.cpu_mode,
- vis->seg_name, &vis->seg_desc, index, bytes,
- addrsize, prot, &gla)) {
- vm_inject_gp(ctx, vcpu);
- break;
- }
-
- error = vm_copy_setup(ctx, vcpu, &vis->paging, gla,
- bytes, prot, iov, nitems(iov), &fault);
- if (error) {
- retval = -1; /* Unrecoverable error */
- break;
- } else if (fault) {
- retval = 0; /* Resume guest to handle fault */
- break;
- }
-
- if (vie_alignment_check(vis->paging.cpl, bytes,
- vis->cr0, vis->rflags, gla)) {
- vm_inject_ac(ctx, vcpu, 0);
- break;
- }
-
- val = 0;
- if (!in)
- vm_copyin(ctx, vcpu, iov, &val, bytes);
-
- retval = handler(ctx, vcpu, in, port, bytes, &val, arg);
- if (retval != 0)
- break;
-
- if (in)
- vm_copyout(ctx, vcpu, &val, iov, bytes);
-
- /* Update index */
- if (vis->rflags & PSL_D)
- index -= bytes;
- else
- index += bytes;
-
- count--;
- iterations--;
- }
-
- /* Update index register */
- error = vie_update_register(ctx, vcpu, idxreg, index, addrsize);
- assert(error == 0);
-
- /*
- * Update count register only if the instruction had a repeat
- * prefix.
- */
- if (rep) {
- error = vie_update_register(ctx, vcpu, VM_REG_GUEST_RCX,
- count, addrsize);
- assert(error == 0);
- }
-
- /* Restart the instruction if more iterations remain */
- if (retval == 0 && count != 0) {
- error = vm_restart_instruction(ctx, vcpu);
- assert(error == 0);
- }
- } else {
- eax = vmexit->u.inout.eax;
- val = eax & vie_size2mask(bytes);
- retval = handler(ctx, vcpu, in, port, bytes, &val, arg);
- if (retval == 0 && in) {
- eax &= ~vie_size2mask(bytes);
- eax |= val & vie_size2mask(bytes);
- error = vm_set_register(ctx, vcpu, VM_REG_GUEST_RAX,
- eax);
- assert(error == 0);
- }
- }
- return (retval);
+ error = hfunc(ctx, vcpu, in, inout->port, bytes, &inout->eax, harg);
+ return (error);
}
void
diff --git a/usr/src/cmd/bhyve/inout.h b/usr/src/cmd/bhyve/inout.h
index b72ee5d93e..b026e18e92 100644
--- a/usr/src/cmd/bhyve/inout.h
+++ b/usr/src/cmd/bhyve/inout.h
@@ -47,6 +47,7 @@
struct vmctx;
struct vm_exit;
+struct vm_inout;
/*
* inout emulation handlers return 0 on success and -1 on failure.
@@ -82,10 +83,10 @@ struct inout_port {
0 \
}; \
DATA_SET(inout_port_set, __CONCAT(__inout_port, __LINE__))
-
+
void init_inout(void);
-int emulate_inout(struct vmctx *, int vcpu, struct vm_exit *vmexit,
- int strict);
+int emulate_inout(struct vmctx *, int vcpu, struct vm_inout *inout,
+ bool strict);
int register_inout(struct inout_port *iop);
int unregister_inout(struct inout_port *iop);
void init_bvmcons(void);
diff --git a/usr/src/cmd/bhyve/mem.c b/usr/src/cmd/bhyve/mem.c
index 90aefe45c8..1afc8bf5f0 100644
--- a/usr/src/cmd/bhyve/mem.c
+++ b/usr/src/cmd/bhyve/mem.c
@@ -27,6 +27,18 @@
*
* $FreeBSD$
*/
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ *
+ * Copyright 2020 Oxide Computer Company
+ */
/*
* Memory ranges are represented with an RB tree. On insertion, the range
@@ -41,7 +53,6 @@ __FBSDID("$FreeBSD$");
#include <sys/errno.h>
#include <sys/tree.h>
#include <machine/vmm.h>
-#include <machine/vmm_instruction_emul.h>
#include <assert.h>
#include <err.h>
@@ -96,7 +107,7 @@ mmio_rb_lookup(struct mmio_rb_tree *rbt, uint64_t addr,
*entry = res;
return (0);
}
-
+
return (ENOENT);
}
@@ -170,7 +181,7 @@ access_memory(struct vmctx *ctx, int vcpu, uint64_t paddr, mem_cb_t *cb,
{
struct mmio_rb_range *entry;
int err, perror, immutable;
-
+
pthread_rwlock_rdlock(&mmio_rwlock);
/*
* First check the per-vCPU cache
@@ -185,7 +196,7 @@ access_memory(struct vmctx *ctx, int vcpu, uint64_t paddr, mem_cb_t *cb,
if (entry == NULL) {
if (mmio_rb_lookup(&mmio_rb_root, paddr, &entry) == 0) {
/* Update the per-vCPU cache */
- mmio_hint[vcpu] = entry;
+ mmio_hint[vcpu] = entry;
} else if (mmio_rb_lookup(&mmio_rb_fallback, paddr, &entry)) {
perror = pthread_rwlock_unlock(&mmio_rwlock);
assert(perror == 0);
@@ -223,32 +234,28 @@ access_memory(struct vmctx *ctx, int vcpu, uint64_t paddr, mem_cb_t *cb,
return (err);
}
-struct emulate_mem_args {
- struct vie *vie;
- struct vm_guest_paging *paging;
-};
-
static int
emulate_mem_cb(struct vmctx *ctx, int vcpu, uint64_t paddr, struct mem_range *mr,
void *arg)
{
- struct emulate_mem_args *ema;
+ struct vm_mmio *mmio;
+ int err = 0;
+
+ mmio = arg;
- ema = arg;
- return (vmm_emulate_instruction(ctx, vcpu, paddr, ema->vie, ema->paging,
- mem_read, mem_write, mr));
+ if (mmio->read != 0) {
+ err = mem_read(ctx, vcpu, paddr, &mmio->data, mmio->bytes, mr);
+ } else {
+ err = mem_write(ctx, vcpu, paddr, mmio->data, mmio->bytes, mr);
+ }
+
+ return (err);
}
int
-emulate_mem(struct vmctx *ctx, int vcpu, uint64_t paddr, struct vie *vie,
- struct vm_guest_paging *paging)
-
+emulate_mem(struct vmctx *ctx, int vcpu, struct vm_mmio *mmio)
{
- struct emulate_mem_args ema;
-
- ema.vie = vie;
- ema.paging = paging;
- return (access_memory(ctx, vcpu, paddr, emulate_mem_cb, &ema));
+ return (access_memory(ctx, vcpu, mmio->gpa, emulate_mem_cb, mmio));
}
struct rw_mem_args {
@@ -333,23 +340,23 @@ register_mem_fallback(struct mem_range *memp)
return (register_mem_int(&mmio_rb_fallback, memp));
}
-int
+int
unregister_mem(struct mem_range *memp)
{
struct mem_range *mr;
struct mmio_rb_range *entry = NULL;
int err, perror, i;
-
+
pthread_rwlock_wrlock(&mmio_rwlock);
err = mmio_rb_lookup(&mmio_rb_root, memp->base, &entry);
if (err == 0) {
mr = &entry->mr_param;
assert(mr->name == memp->name);
- assert(mr->base == memp->base && mr->size == memp->size);
+ assert(mr->base == memp->base && mr->size == memp->size);
assert((mr->flags & MEM_F_IMMUTABLE) == 0);
RB_REMOVE(mmio_rb_tree, &mmio_rb_root, entry);
- /* flush Per-vCPU cache */
+ /* flush Per-vCPU cache */
for (i=0; i < VM_MAXCPU; i++) {
if (mmio_hint[i] == entry)
mmio_hint[i] = NULL;
@@ -360,7 +367,7 @@ unregister_mem(struct mem_range *memp)
if (entry)
free(entry);
-
+
return (err);
}
diff --git a/usr/src/cmd/bhyve/mem.h b/usr/src/cmd/bhyve/mem.h
index 38d773c43f..8b81b93a02 100644
--- a/usr/src/cmd/bhyve/mem.h
+++ b/usr/src/cmd/bhyve/mem.h
@@ -53,8 +53,8 @@ struct mem_range {
#define MEM_F_IMMUTABLE 0x4 /* mem_range cannot be unregistered */
void init_mem(void);
-int emulate_mem(struct vmctx *, int vcpu, uint64_t paddr, struct vie *vie,
- struct vm_guest_paging *paging);
+
+int emulate_mem(struct vmctx *ctx, int vcpu, struct vm_mmio *mmio);
int read_mem(struct vmctx *ctx, int vcpu, uint64_t gpa, uint64_t *rval,
int size);
diff --git a/usr/src/cmd/bhyve/task_switch.c b/usr/src/cmd/bhyve/task_switch.c
index f1b564d560..c4a087b54f 100644
--- a/usr/src/cmd/bhyve/task_switch.c
+++ b/usr/src/cmd/bhyve/task_switch.c
@@ -25,6 +25,18 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ *
+ * Copyright 2020 Oxide Computer Company
+ */
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
@@ -37,7 +49,6 @@ __FBSDID("$FreeBSD$");
#include <x86/segments.h>
#include <x86/specialreg.h>
#include <machine/vmm.h>
-#include <machine/vmm_instruction_emul.h>
#include <assert.h>
#include <errno.h>
@@ -618,6 +629,150 @@ tss32_restore(struct vmctx *ctx, int vcpu, struct vm_task_switch *ts,
return (0);
}
+
+/*
+ * Copy of vie_alignment_check() from vmm_instruction_emul.c
+ */
+static int
+alignment_check(int cpl, int size, uint64_t cr0, uint64_t rf, uint64_t gla)
+{
+ assert(size == 1 || size == 2 || size == 4 || size == 8);
+ assert(cpl >= 0 && cpl <= 3);
+
+ if (cpl != 3 || (cr0 & CR0_AM) == 0 || (rf & PSL_AC) == 0)
+ return (0);
+
+ return ((gla & (size - 1)) ? 1 : 0);
+}
+
+/*
+ * Copy of vie_size2mask() from vmm_instruction_emul.c
+ */
+static uint64_t
+size2mask(int size)
+{
+ switch (size) {
+ case 1:
+ return (0xff);
+ case 2:
+ return (0xffff);
+ case 4:
+ return (0xffffffff);
+ case 8:
+ return (0xffffffffffffffff);
+ default:
+ assert(0);
+ /* not reached */
+ return (0);
+ }
+}
+
+/*
+ * Copy of vie_calculate_gla() from vmm_instruction_emul.c
+ */
+static int
+calculate_gla(enum vm_cpu_mode cpu_mode, enum vm_reg_name seg,
+ struct seg_desc *desc, uint64_t offset, int length, int addrsize,
+ int prot, uint64_t *gla)
+{
+ uint64_t firstoff, low_limit, high_limit, segbase;
+ int glasize, type;
+
+ assert(seg >= VM_REG_GUEST_ES && seg <= VM_REG_GUEST_GS);
+ assert((length == 1 || length == 2 || length == 4 || length == 8));
+ assert((prot & ~(PROT_READ | PROT_WRITE)) == 0);
+
+ firstoff = offset;
+ if (cpu_mode == CPU_MODE_64BIT) {
+ assert(addrsize == 4 || addrsize == 8);
+ glasize = 8;
+ } else {
+ assert(addrsize == 2 || addrsize == 4);
+ glasize = 4;
+ /*
+ * If the segment selector is loaded with a NULL selector
+ * then the descriptor is unusable and attempting to use
+ * it results in a #GP(0).
+ */
+ if (SEG_DESC_UNUSABLE(desc->access))
+ return (-1);
+
+ /*
+ * The processor generates a #NP exception when a segment
+ * register is loaded with a selector that points to a
+ * descriptor that is not present. If this was the case then
+ * it would have been checked before the VM-exit.
+ */
+ assert(SEG_DESC_PRESENT(desc->access));
+
+ /*
+ * The descriptor type must indicate a code/data segment.
+ */
+ type = SEG_DESC_TYPE(desc->access);
+ assert(type >= 16 && type <= 31);
+
+ if (prot & PROT_READ) {
+ /* #GP on a read access to a exec-only code segment */
+ if ((type & 0xA) == 0x8)
+ return (-1);
+ }
+
+ if (prot & PROT_WRITE) {
+ /*
+ * #GP on a write access to a code segment or a
+ * read-only data segment.
+ */
+ if (type & 0x8) /* code segment */
+ return (-1);
+
+ if ((type & 0xA) == 0) /* read-only data seg */
+ return (-1);
+ }
+
+ /*
+ * 'desc->limit' is fully expanded taking granularity into
+ * account.
+ */
+ if ((type & 0xC) == 0x4) {
+ /* expand-down data segment */
+ low_limit = desc->limit + 1;
+ high_limit = SEG_DESC_DEF32(desc->access) ?
+ 0xffffffff : 0xffff;
+ } else {
+ /* code segment or expand-up data segment */
+ low_limit = 0;
+ high_limit = desc->limit;
+ }
+
+ while (length > 0) {
+ offset &= size2mask(addrsize);
+ if (offset < low_limit || offset > high_limit)
+ return (-1);
+ offset++;
+ length--;
+ }
+ }
+
+ /*
+ * In 64-bit mode all segments except %fs and %gs have a segment
+ * base address of 0.
+ */
+ if (cpu_mode == CPU_MODE_64BIT && seg != VM_REG_GUEST_FS &&
+ seg != VM_REG_GUEST_GS) {
+ segbase = 0;
+ } else {
+ segbase = desc->base;
+ }
+
+ /*
+ * Truncate 'firstoff' to the effective address size before adding
+ * it to the segment base.
+ */
+ firstoff &= size2mask(addrsize);
+ *gla = (segbase + firstoff) & size2mask(glasize);
+ return (0);
+}
+
/*
* Push an error code on the stack of the new task. This is needed if the
* task switch was triggered by a hardware exception that causes an error
@@ -667,14 +822,14 @@ push_errcode(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging,
esp = GETREG(ctx, vcpu, VM_REG_GUEST_RSP);
esp -= bytes;
- if (vie_calculate_gla(paging->cpu_mode, VM_REG_GUEST_SS,
+ if (calculate_gla(paging->cpu_mode, VM_REG_GUEST_SS,
&seg_desc, esp, bytes, stacksize, PROT_WRITE, &gla)) {
sel_exception(ctx, vcpu, IDT_SS, stacksel, 1);
*faultptr = 1;
return (0);
}
- if (vie_alignment_check(paging->cpl, bytes, cr0, rflags, gla)) {
+ if (alignment_check(paging->cpl, bytes, cr0, rflags, gla)) {
vm_inject_ac(ctx, vcpu, 1);
*faultptr = 1;
return (0);
diff --git a/usr/src/cmd/bhyvectl/bhyvectl.c b/usr/src/cmd/bhyvectl/bhyvectl.c
index 5299791091..22c72cf5df 100644
--- a/usr/src/cmd/bhyvectl/bhyvectl.c
+++ b/usr/src/cmd/bhyvectl/bhyvectl.c
@@ -36,11 +36,10 @@
* A full copy of the text of the CDDL should have accompanied this
* source. A copy of the CDDL is also available via the Internet at
* http://www.illumos.org/license/CDDL.
- */
-
-/*
+ *
* Copyright 2015 Pluribus Networks Inc.
* Copyright 2019 Joyent, Inc.
+ * Copyright 2020 Oxide Computer Company
*/
#include <sys/cdefs.h>
@@ -358,14 +357,20 @@ dump_vm_run_exitcode(struct vm_exit *vmexit, int vcpu)
switch (vmexit->exitcode) {
case VM_EXITCODE_INOUT:
printf("\treason\t\tINOUT\n");
- printf("\tdirection\t%s\n", vmexit->u.inout.in ? "IN" : "OUT");
+ printf("\tdirection\t%s\n",
+ (vmexit->u.inout.flags & INOUT_IN) ? "IN" : "OUT");
printf("\tbytes\t\t%d\n", vmexit->u.inout.bytes);
- printf("\tflags\t\t%s%s\n",
- vmexit->u.inout.string ? "STRING " : "",
- vmexit->u.inout.rep ? "REP " : "");
printf("\tport\t\t0x%04x\n", vmexit->u.inout.port);
printf("\teax\t\t0x%08x\n", vmexit->u.inout.eax);
break;
+ case VM_EXITCODE_MMIO:
+ printf("\treason\t\tMMIO\n");
+ printf("\toperation\t%s\n",
+ vmexit->u.mmio.read ? "READ" : "WRITE");
+ printf("\tbytes\t\t%d\n", vmexit->u.mmio.bytes);
+ printf("\tgpa\t\t0x%08x\n", vmexit->u.mmio.gpa);
+ printf("\tdata\t\t0x%08x\n", vmexit->u.mmio.data);
+ break;
case VM_EXITCODE_VMX:
printf("\treason\t\tVMX\n");
printf("\tstatus\t\t%d\n", vmexit->u.vmx.status);
@@ -2366,7 +2371,11 @@ main(int argc, char *argv[])
}
if (!error && run) {
- error = vm_run(ctx, vcpu, &vmexit);
+ struct vm_entry entry;
+
+ bzero(&entry, sizeof (entry));
+
+ error = vm_run(ctx, vcpu, &entry, &vmexit);
if (error == 0)
dump_vm_run_exitcode(&vmexit, vcpu);
else
diff --git a/usr/src/lib/libvmmapi/common/vmmapi.c b/usr/src/lib/libvmmapi/common/vmmapi.c
index 7d3446a845..6d5145431e 100644
--- a/usr/src/lib/libvmmapi/common/vmmapi.c
+++ b/usr/src/lib/libvmmapi/common/vmmapi.c
@@ -772,17 +772,16 @@ vm_get_register_set(struct vmctx *ctx, int vcpu, unsigned int count,
}
int
-vm_run(struct vmctx *ctx, int vcpu, struct vm_exit *vmexit)
+vm_run(struct vmctx *ctx, int vcpu, const struct vm_entry *vm_entry,
+ struct vm_exit *vm_exit)
{
- int error;
- struct vm_run vmrun;
+ struct vm_entry entry;
- bzero(&vmrun, sizeof(vmrun));
- vmrun.cpuid = vcpu;
+ bcopy(vm_entry, &entry, sizeof (entry));
+ entry.cpuid = vcpu;
+ entry.exit_data = vm_exit;
- error = ioctl(ctx->fd, VM_RUN, &vmrun);
- bcopy(&vmrun.vm_exit, vmexit, sizeof(struct vm_exit));
- return (error);
+ return (ioctl(ctx->fd, VM_RUN, &entry));
}
int
diff --git a/usr/src/lib/libvmmapi/common/vmmapi.h b/usr/src/lib/libvmmapi/common/vmmapi.h
index 997267b8cc..4656f417b4 100644
--- a/usr/src/lib/libvmmapi/common/vmmapi.h
+++ b/usr/src/lib/libvmmapi/common/vmmapi.h
@@ -165,7 +165,8 @@ int vm_set_register_set(struct vmctx *ctx, int vcpu, unsigned int count,
const int *regnums, uint64_t *regvals);
int vm_get_register_set(struct vmctx *ctx, int vcpu, unsigned int count,
const int *regnums, uint64_t *regvals);
-int vm_run(struct vmctx *ctx, int vcpu, struct vm_exit *ret_vmexit);
+int vm_run(struct vmctx *ctx, int vcpu, const struct vm_entry *vm_entry,
+ struct vm_exit *vm_exit);
int vm_suspend(struct vmctx *ctx, enum vm_suspend_how how);
int vm_reinit(struct vmctx *ctx);
int vm_apicid2vcpu(struct vmctx *ctx, int apicid);
diff --git a/usr/src/uts/i86pc/io/vmm/amd/svm.c b/usr/src/uts/i86pc/io/vmm/amd/svm.c
index 615d3cd029..e78d401e68 100644
--- a/usr/src/uts/i86pc/io/vmm/amd/svm.c
+++ b/usr/src/uts/i86pc/io/vmm/amd/svm.c
@@ -68,7 +68,7 @@ __FBSDID("$FreeBSD$");
#include <machine/smp.h>
#include <machine/vmm.h>
#include <machine/vmm_dev.h>
-#include <machine/vmm_instruction_emul.h>
+#include <sys/vmm_instruction_emul.h>
#include "vmm_lapic.h"
#include "vmm_stat.h"
@@ -717,61 +717,6 @@ svm_paging_mode(uint64_t cr0, uint64_t cr4, uint64_t efer)
/*
* ins/outs utility routines
*/
-static uint64_t
-svm_inout_str_index(struct svm_regctx *regs, int in)
-{
- uint64_t val;
-
- val = in ? regs->sctx_rdi : regs->sctx_rsi;
-
- return (val);
-}
-
-static uint64_t
-svm_inout_str_count(struct svm_regctx *regs, int rep)
-{
- uint64_t val;
-
- val = rep ? regs->sctx_rcx : 1;
-
- return (val);
-}
-
-static void
-svm_inout_str_seginfo(struct svm_softc *svm_sc, int vcpu, int64_t info1,
- int in, struct vm_inout_str *vis)
-{
- int error, s;
-
- if (in) {
- vis->seg_name = VM_REG_GUEST_ES;
- } else {
- /* The segment field has standard encoding */
- s = (info1 >> 10) & 0x7;
- vis->seg_name = vm_segment_name(s);
- }
-
- error = vmcb_getdesc(svm_sc, vcpu, vis->seg_name, &vis->seg_desc);
- KASSERT(error == 0, ("%s: svm_getdesc error %d", __func__, error));
-}
-
-static int
-svm_inout_str_addrsize(uint64_t info1)
-{
- uint32_t size;
-
- size = (info1 >> 7) & 0x7;
- switch (size) {
- case 1:
- return (2); /* 16 bit */
- case 2:
- return (4); /* 32 bit */
- case 4:
- return (8); /* 64 bit */
- default:
- panic("%s: invalid size encoding %d", __func__, size);
- }
-}
static void
svm_paging_info(struct vmcb *vmcb, struct vm_guest_paging *paging)
@@ -792,53 +737,78 @@ svm_paging_info(struct vmcb *vmcb, struct vm_guest_paging *paging)
* Handle guest I/O intercept.
*/
static int
-svm_handle_io(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit)
+svm_handle_inout(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit)
{
struct vmcb_ctrl *ctrl;
struct vmcb_state *state;
- struct svm_regctx *regs;
- struct vm_inout_str *vis;
+ struct vm_inout *inout;
+ struct vie *vie;
uint64_t info1;
- int inout_string;
+ struct vm_guest_paging paging;
state = svm_get_vmcb_state(svm_sc, vcpu);
- ctrl = svm_get_vmcb_ctrl(svm_sc, vcpu);
- regs = svm_get_guest_regctx(svm_sc, vcpu);
-
+ ctrl = svm_get_vmcb_ctrl(svm_sc, vcpu);
+ inout = &vmexit->u.inout;
info1 = ctrl->exitinfo1;
- inout_string = info1 & BIT(2) ? 1 : 0;
- /*
- * The effective segment number in EXITINFO1[12:10] is populated
- * only if the processor has the DecodeAssist capability.
- *
- * XXX this is not specified explicitly in APMv2 but can be verified
- * empirically.
- */
- if (inout_string && !decode_assist())
- return (UNHANDLED);
-
- vmexit->exitcode = VM_EXITCODE_INOUT;
- vmexit->u.inout.in = (info1 & BIT(0)) ? 1 : 0;
- vmexit->u.inout.string = inout_string;
- vmexit->u.inout.rep = (info1 & BIT(3)) ? 1 : 0;
- vmexit->u.inout.bytes = (info1 >> 4) & 0x7;
- vmexit->u.inout.port = (uint16_t)(info1 >> 16);
- vmexit->u.inout.eax = (uint32_t)(state->rax);
-
- if (inout_string) {
- vmexit->exitcode = VM_EXITCODE_INOUT_STR;
- vis = &vmexit->u.inout_str;
- svm_paging_info(svm_get_vmcb(svm_sc, vcpu), &vis->paging);
- vis->rflags = state->rflags;
- vis->cr0 = state->cr0;
- vis->index = svm_inout_str_index(regs, vmexit->u.inout.in);
- vis->count = svm_inout_str_count(regs, vmexit->u.inout.rep);
- vis->addrsize = svm_inout_str_addrsize(info1);
- svm_inout_str_seginfo(svm_sc, vcpu, info1,
- vmexit->u.inout.in, vis);
+ inout->bytes = (info1 >> 4) & 0x7;
+ inout->flags = 0;
+ inout->flags |= (info1 & BIT(0)) ? INOUT_IN : 0;
+ inout->flags |= (info1 & BIT(3)) ? INOUT_REP : 0;
+ inout->flags |= (info1 & BIT(2)) ? INOUT_STR : 0;
+ inout->port = (uint16_t)(info1 >> 16);
+ inout->eax = (uint32_t)(state->rax);
+
+ if ((inout->flags & INOUT_STR) != 0) {
+ /*
+ * The effective segment number in EXITINFO1[12:10] is populated
+ * only if the processor has the DecodeAssist capability.
+ *
+ * This is not specified explicitly in APMv2 but can be verified
+ * empirically.
+ */
+ if (!decode_assist()) {
+ /*
+ * Without decoding assistance, force the task of
+ * emulating the ins/outs on userspace.
+ */
+ vmexit->exitcode = VM_EXITCODE_INST_EMUL;
+ bzero(&vmexit->u.inst_emul,
+ sizeof (vmexit->u.inst_emul));
+ return (UNHANDLED);
+ }
+
+ /*
+ * Bits 7-9 encode the address size of ins/outs operations where
+ * the 1/2/4 values correspond to 16/32/64 bit sizes.
+ */
+ inout->addrsize = 2 * ((info1 >> 7) & 0x7);
+ VERIFY(inout->addrsize == 2 || inout->addrsize == 4 ||
+ inout->addrsize == 8);
+
+ if (inout->flags & INOUT_IN) {
+ /*
+ * For INS instructions, %es (encoded as 0) is the
+ * implied segment for the operation.
+ */
+ inout->segment = 0;
+ } else {
+ /*
+ * Bits 10-12 encode the segment for OUTS.
+ * This value follows the standard x86 segment order.
+ */
+ inout->segment = (info1 >> 10) & 0x7;
+ }
}
+ vmexit->exitcode = VM_EXITCODE_INOUT;
+ svm_paging_info(svm_get_vmcb(svm_sc, vcpu), &paging);
+ vie = vm_vie_ctx(svm_sc->vm, vcpu);
+ vie_init_inout(vie, inout, vmexit->inst_length, &paging);
+
+ /* The in/out emulation will handle advancing %rip */
+ vmexit->inst_length = 0;
+
return (UNHANDLED);
}
@@ -857,7 +827,6 @@ npf_fault_type(uint64_t exitinfo1)
static bool
svm_npf_emul_fault(uint64_t exitinfo1)
{
-
if (exitinfo1 & VMCB_NPF_INFO1_ID) {
return (false);
}
@@ -870,48 +839,52 @@ svm_npf_emul_fault(uint64_t exitinfo1)
return (false);
}
- return (true);
+ return (true);
}
static void
-svm_handle_inst_emul(struct vmcb *vmcb, uint64_t gpa, struct vm_exit *vmexit)
+svm_handle_mmio_emul(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit,
+ uint64_t gpa)
{
- struct vm_guest_paging *paging;
- struct vmcb_segment seg;
struct vmcb_ctrl *ctrl;
- char *inst_bytes;
- int error, inst_len;
+ struct vmcb *vmcb;
+ struct vie *vie;
+ struct vm_guest_paging paging;
+ struct vmcb_segment seg;
+ char *inst_bytes = NULL;
+ uint8_t inst_len = 0;
+ int error;
+ vmcb = svm_get_vmcb(svm_sc, vcpu);
ctrl = &vmcb->ctrl;
- paging = &vmexit->u.inst_emul.paging;
- vmexit->exitcode = VM_EXITCODE_INST_EMUL;
- vmexit->u.inst_emul.gpa = gpa;
- vmexit->u.inst_emul.gla = VIE_INVALID_GLA;
- svm_paging_info(vmcb, paging);
+ vmexit->exitcode = VM_EXITCODE_MMIO_EMUL;
+ vmexit->u.mmio_emul.gpa = gpa;
+ vmexit->u.mmio_emul.gla = VIE_INVALID_GLA;
+ svm_paging_info(vmcb, &paging);
error = vmcb_seg(vmcb, VM_REG_GUEST_CS, &seg);
KASSERT(error == 0, ("%s: vmcb_seg(CS) error %d", __func__, error));
- switch(paging->cpu_mode) {
+ switch (paging.cpu_mode) {
case CPU_MODE_REAL:
- vmexit->u.inst_emul.cs_base = seg.base;
- vmexit->u.inst_emul.cs_d = 0;
+ vmexit->u.mmio_emul.cs_base = seg.base;
+ vmexit->u.mmio_emul.cs_d = 0;
break;
case CPU_MODE_PROTECTED:
case CPU_MODE_COMPATIBILITY:
- vmexit->u.inst_emul.cs_base = seg.base;
+ vmexit->u.mmio_emul.cs_base = seg.base;
/*
* Section 4.8.1 of APM2, Default Operand Size or D bit.
*/
- vmexit->u.inst_emul.cs_d = (seg.attrib & VMCB_CS_ATTRIB_D) ?
+ vmexit->u.mmio_emul.cs_d = (seg.attrib & VMCB_CS_ATTRIB_D) ?
1 : 0;
break;
default:
- vmexit->u.inst_emul.cs_base = 0;
- vmexit->u.inst_emul.cs_d = 0;
- break;
+ vmexit->u.mmio_emul.cs_base = 0;
+ vmexit->u.mmio_emul.cs_d = 0;
+ break;
}
/*
@@ -920,11 +893,9 @@ svm_handle_inst_emul(struct vmcb *vmcb, uint64_t gpa, struct vm_exit *vmexit)
if (decode_assist() && !disable_npf_assist) {
inst_len = ctrl->inst_len;
inst_bytes = (char *)ctrl->inst_bytes;
- } else {
- inst_len = 0;
- inst_bytes = NULL;
}
- vie_init(&vmexit->u.inst_emul.vie, inst_bytes, inst_len);
+ vie = vm_vie_ctx(svm_sc->vm, vcpu);
+ vie_init_mmio(vie, inst_bytes, inst_len, &paging, gpa);
}
#ifdef KTR
@@ -1520,7 +1491,7 @@ svm_vmexit(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit)
}
break;
case VMCB_EXIT_IO:
- handled = svm_handle_io(svm_sc, vcpu, vmexit);
+ handled = svm_handle_inout(svm_sc, vcpu, vmexit);
vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_INOUT, 1);
break;
case VMCB_EXIT_CPUID:
@@ -1552,9 +1523,9 @@ svm_vmexit(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit)
"on gpa %#lx/%#lx at rip %#lx",
info2, info1, state->rip);
} else if (svm_npf_emul_fault(info1)) {
- svm_handle_inst_emul(vmcb, info2, vmexit);
- vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_INST_EMUL, 1);
- VCPU_CTR3(svm_sc->vm, vcpu, "inst_emul fault "
+ svm_handle_mmio_emul(svm_sc, vcpu, vmexit, info2);
+ vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_MMIO_EMUL, 1);
+ VCPU_CTR3(svm_sc->vm, vcpu, "mmio_emul fault "
"for gpa %#lx/%#lx at rip %#lx",
info2, info1, state->rip);
}
@@ -1568,7 +1539,7 @@ svm_vmexit(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit)
default:
vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_UNKNOWN, 1);
break;
- }
+ }
VCPU_CTR4(svm_sc->vm, vcpu, "%s %s vmexit at %#lx/%d",
handled ? "handled" : "unhandled", exit_reason_to_str(code),
diff --git a/usr/src/uts/i86pc/io/vmm/intel/vmx.c b/usr/src/uts/i86pc/io/vmm/intel/vmx.c
index 83f149c6b7..8156121571 100644
--- a/usr/src/uts/i86pc/io/vmm/intel/vmx.c
+++ b/usr/src/uts/i86pc/io/vmm/intel/vmx.c
@@ -77,7 +77,7 @@ __FBSDID("$FreeBSD$");
#include <machine/vmm.h>
#include <machine/vmm_dev.h>
-#include <machine/vmm_instruction_emul.h>
+#include <sys/vmm_instruction_emul.h>
#include "vmm_lapic.h"
#include "vmm_host.h"
#include "vmm_ioport.h"
@@ -1887,69 +1887,6 @@ vmx_paging_mode(void)
return (PAGING_MODE_PAE);
}
-static uint64_t
-inout_str_index(struct vmx *vmx, int vcpuid, int in)
-{
- uint64_t val;
- int error;
- enum vm_reg_name reg;
-
- reg = in ? VM_REG_GUEST_RDI : VM_REG_GUEST_RSI;
- error = vmx_getreg(vmx, vcpuid, reg, &val);
- KASSERT(error == 0, ("%s: vmx_getreg error %d", __func__, error));
- return (val);
-}
-
-static uint64_t
-inout_str_count(struct vmx *vmx, int vcpuid, int rep)
-{
- uint64_t val;
- int error;
-
- if (rep) {
- error = vmx_getreg(vmx, vcpuid, VM_REG_GUEST_RCX, &val);
- KASSERT(!error, ("%s: vmx_getreg error %d", __func__, error));
- } else {
- val = 1;
- }
- return (val);
-}
-
-static int
-inout_str_addrsize(uint32_t inst_info)
-{
- uint32_t size;
-
- size = (inst_info >> 7) & 0x7;
- switch (size) {
- case 0:
- return (2); /* 16 bit */
- case 1:
- return (4); /* 32 bit */
- case 2:
- return (8); /* 64 bit */
- default:
- panic("%s: invalid size encoding %d", __func__, size);
- }
-}
-
-static void
-inout_str_seginfo(struct vmx *vmx, int vcpuid, uint32_t inst_info, int in,
- struct vm_inout_str *vis)
-{
- int error, s;
-
- if (in) {
- vis->seg_name = VM_REG_GUEST_ES;
- } else {
- s = (inst_info >> 15) & 0x7;
- vis->seg_name = vm_segment_name(s);
- }
-
- error = vmx_getdesc(vmx, vcpuid, vis->seg_name, &vis->seg_desc);
- KASSERT(error == 0, ("%s: vmx_getdesc error %d", __func__, error));
-}
-
static void
vmx_paging_info(struct vm_guest_paging *paging)
{
@@ -1960,35 +1897,89 @@ vmx_paging_info(struct vm_guest_paging *paging)
}
static void
-vmexit_inst_emul(struct vm_exit *vmexit, uint64_t gpa, uint64_t gla)
+vmexit_mmio_emul(struct vm_exit *vmexit, struct vie *vie, uint64_t gpa,
+ uint64_t gla)
{
- struct vm_guest_paging *paging;
+ struct vm_guest_paging paging;
uint32_t csar;
- paging = &vmexit->u.inst_emul.paging;
-
- vmexit->exitcode = VM_EXITCODE_INST_EMUL;
+ vmexit->exitcode = VM_EXITCODE_MMIO_EMUL;
vmexit->inst_length = 0;
- vmexit->u.inst_emul.gpa = gpa;
- vmexit->u.inst_emul.gla = gla;
- vmx_paging_info(paging);
- switch (paging->cpu_mode) {
+ vmexit->u.mmio_emul.gpa = gpa;
+ vmexit->u.mmio_emul.gla = gla;
+ vmx_paging_info(&paging);
+
+ switch (paging.cpu_mode) {
case CPU_MODE_REAL:
- vmexit->u.inst_emul.cs_base = vmcs_read(VMCS_GUEST_CS_BASE);
- vmexit->u.inst_emul.cs_d = 0;
+ vmexit->u.mmio_emul.cs_base = vmcs_read(VMCS_GUEST_CS_BASE);
+ vmexit->u.mmio_emul.cs_d = 0;
break;
case CPU_MODE_PROTECTED:
case CPU_MODE_COMPATIBILITY:
- vmexit->u.inst_emul.cs_base = vmcs_read(VMCS_GUEST_CS_BASE);
+ vmexit->u.mmio_emul.cs_base = vmcs_read(VMCS_GUEST_CS_BASE);
csar = vmcs_read(VMCS_GUEST_CS_ACCESS_RIGHTS);
- vmexit->u.inst_emul.cs_d = SEG_DESC_DEF32(csar);
+ vmexit->u.mmio_emul.cs_d = SEG_DESC_DEF32(csar);
break;
default:
- vmexit->u.inst_emul.cs_base = 0;
- vmexit->u.inst_emul.cs_d = 0;
+ vmexit->u.mmio_emul.cs_base = 0;
+ vmexit->u.mmio_emul.cs_d = 0;
break;
}
- vie_init(&vmexit->u.inst_emul.vie, NULL, 0);
+
+ vie_init_mmio(vie, NULL, 0, &paging, gpa);
+}
+
+static void
+vmexit_inout(struct vm_exit *vmexit, struct vie *vie, uint64_t qual,
+ uint32_t eax)
+{
+ struct vm_guest_paging paging;
+ struct vm_inout *inout;
+
+ inout = &vmexit->u.inout;
+
+ inout->bytes = (qual & 0x7) + 1;
+ inout->flags = 0;
+ inout->flags |= (qual & 0x8) ? INOUT_IN : 0;
+ inout->flags |= (qual & 0x10) ? INOUT_STR : 0;
+ inout->flags |= (qual & 0x20) ? INOUT_REP : 0;
+ inout->port = (uint16_t)(qual >> 16);
+ inout->eax = eax;
+ if (inout->flags & INOUT_STR) {
+ uint64_t inst_info;
+
+ inst_info = vmcs_read(VMCS_EXIT_INSTRUCTION_INFO);
+
+ /*
+ * Bits 7-9 encode the address size of ins/outs operations where
+ * the 0/1/2 values correspond to 16/32/64 bit sizes.
+ */
+ inout->addrsize = 2 << (1 + ((inst_info >> 7) & 0x3));
+ VERIFY(inout->addrsize == 2 || inout->addrsize == 4 ||
+ inout->addrsize == 8);
+
+ if (inout->flags & INOUT_IN) {
+ /*
+ * The bits describing the segment in INSTRUCTION_INFO
+ * are not defined for ins, leaving it to system
+ * software to assume %es (encoded as 0)
+ */
+ inout->segment = 0;
+ } else {
+ /*
+ * Bits 15-17 encode the segment for OUTS.
+ * This value follows the standard x86 segment order.
+ */
+ inout->segment = (inst_info >> 15) & 0x7;
+ }
+ }
+
+ vmexit->exitcode = VM_EXITCODE_INOUT;
+ vmx_paging_info(&paging);
+ vie_init_inout(vie, inout, vmexit->inst_length, &paging);
+
+ /* The in/out emulation will handle advancing %rip */
+ vmexit->inst_length = 0;
}
static int
@@ -2136,6 +2127,7 @@ vmx_handle_apic_access(struct vmx *vmx, int vcpuid, struct vm_exit *vmexit)
{
uint64_t qual;
int access_type, offset, allowed;
+ struct vie *vie;
if (!apic_access_virtualization(vmx, vcpuid))
return (UNHANDLED);
@@ -2182,7 +2174,8 @@ vmx_handle_apic_access(struct vmx *vmx, int vcpuid, struct vm_exit *vmexit)
}
if (allowed) {
- vmexit_inst_emul(vmexit, DEFAULT_APIC_BASE + offset,
+ vie = vm_vie_ctx(vmx->vm, vcpuid);
+ vmexit_mmio_emul(vmexit, vie, DEFAULT_APIC_BASE + offset,
VIE_INVALID_GLA);
}
@@ -2264,10 +2257,10 @@ emulate_rdmsr(struct vmx *vmx, int vcpuid, u_int num, bool *retu)
static int
vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit)
{
- int error, errcode, errcode_valid, handled, in;
+ int error, errcode, errcode_valid, handled;
struct vmxctx *vmxctx;
+ struct vie *vie;
struct vlapic *vlapic;
- struct vm_inout_str *vis;
struct vm_task_switch *ts;
uint32_t eax, ecx, edx, idtvec_info, idtvec_err, intr_info, inst_info;
uint32_t intr_type, intr_vec, reason;
@@ -2524,25 +2517,8 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit)
return (1);
case EXIT_REASON_INOUT:
vmm_stat_incr(vmx->vm, vcpu, VMEXIT_INOUT, 1);
- vmexit->exitcode = VM_EXITCODE_INOUT;
- vmexit->u.inout.bytes = (qual & 0x7) + 1;
- vmexit->u.inout.in = in = (qual & 0x8) ? 1 : 0;
- vmexit->u.inout.string = (qual & 0x10) ? 1 : 0;
- vmexit->u.inout.rep = (qual & 0x20) ? 1 : 0;
- vmexit->u.inout.port = (uint16_t)(qual >> 16);
- vmexit->u.inout.eax = (uint32_t)(vmxctx->guest_rax);
- if (vmexit->u.inout.string) {
- inst_info = vmcs_read(VMCS_EXIT_INSTRUCTION_INFO);
- vmexit->exitcode = VM_EXITCODE_INOUT_STR;
- vis = &vmexit->u.inout_str;
- vmx_paging_info(&vis->paging);
- vis->rflags = vmcs_read(VMCS_GUEST_RFLAGS);
- vis->cr0 = vmcs_read(VMCS_GUEST_CR0);
- vis->index = inout_str_index(vmx, vcpu, in);
- vis->count = inout_str_count(vmx, vcpu, vis->inout.rep);
- vis->addrsize = inout_str_addrsize(inst_info);
- inout_str_seginfo(vmx, vcpu, inst_info, in, vis);
- }
+ vie = vm_vie_ctx(vmx->vm, vcpu);
+ vmexit_inout(vmexit, vie, qual, (uint32_t)vmxctx->guest_rax);
SDT_PROBE3(vmm, vmx, exit, inout, vmx, vcpu, vmexit);
break;
case EXIT_REASON_CPUID:
@@ -2653,8 +2629,9 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit)
SDT_PROBE5(vmm, vmx, exit, nestedfault,
vmx, vcpu, vmexit, gpa, qual);
} else if (ept_emulation_fault(qual)) {
- vmexit_inst_emul(vmexit, gpa, vmcs_gla());
- vmm_stat_incr(vmx->vm, vcpu, VMEXIT_INST_EMUL, 1);
+ vie = vm_vie_ctx(vmx->vm, vcpu);
+ vmexit_mmio_emul(vmexit, vie, gpa, vmcs_gla());
+ vmm_stat_incr(vmx->vm, vcpu, VMEXIT_MMIO_EMUL, 1);
SDT_PROBE4(vmm, vmx, exit, mmiofault,
vmx, vcpu, vmexit, gpa);
}
diff --git a/usr/src/uts/i86pc/io/vmm/io/vatpic.c b/usr/src/uts/i86pc/io/vmm/io/vatpic.c
index ba4cd7785e..817c815fd6 100644
--- a/usr/src/uts/i86pc/io/vmm/io/vatpic.c
+++ b/usr/src/uts/i86pc/io/vmm/io/vatpic.c
@@ -709,8 +709,8 @@ vatpic_write(struct vatpic *vatpic, struct atpic *atpic, bool in, int port,
}
int
-vatpic_master_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes,
- uint32_t *eax)
+vatpic_master_handler(struct vm *vm, int vcpuid, bool in, uint16_t port,
+ uint8_t bytes, uint32_t *eax)
{
struct vatpic *vatpic;
struct atpic *atpic;
@@ -729,8 +729,8 @@ vatpic_master_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes,
}
int
-vatpic_slave_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes,
- uint32_t *eax)
+vatpic_slave_handler(struct vm *vm, int vcpuid, bool in, uint16_t port,
+ uint8_t bytes, uint32_t *eax)
{
struct vatpic *vatpic;
struct atpic *atpic;
@@ -749,8 +749,8 @@ vatpic_slave_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes,
}
int
-vatpic_elc_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes,
- uint32_t *eax)
+vatpic_elc_handler(struct vm *vm, int vcpuid, bool in, uint16_t port,
+ uint8_t bytes, uint32_t *eax)
{
struct vatpic *vatpic;
bool is_master;
diff --git a/usr/src/uts/i86pc/io/vmm/io/vatpic.h b/usr/src/uts/i86pc/io/vmm/io/vatpic.h
index d4a1be1820..dcb8ea6c6f 100644
--- a/usr/src/uts/i86pc/io/vmm/io/vatpic.h
+++ b/usr/src/uts/i86pc/io/vmm/io/vatpic.h
@@ -39,12 +39,12 @@
struct vatpic *vatpic_init(struct vm *vm);
void vatpic_cleanup(struct vatpic *vatpic);
-int vatpic_master_handler(struct vm *vm, int vcpuid, bool in, int port,
- int bytes, uint32_t *eax);
-int vatpic_slave_handler(struct vm *vm, int vcpuid, bool in, int port,
- int bytes, uint32_t *eax);
-int vatpic_elc_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes,
- uint32_t *eax);
+int vatpic_master_handler(struct vm *vm, int vcpuid, bool in, uint16_t port,
+ uint8_t bytes, uint32_t *eax);
+int vatpic_slave_handler(struct vm *vm, int vcpuid, bool in, uint16_t port,
+ uint8_t bytes, uint32_t *eax);
+int vatpic_elc_handler(struct vm *vm, int vcpuid, bool in, uint16_t port,
+ uint8_t bytes, uint32_t *eax);
int vatpic_assert_irq(struct vm *vm, int irq);
int vatpic_deassert_irq(struct vm *vm, int irq);
diff --git a/usr/src/uts/i86pc/io/vmm/io/vatpit.c b/usr/src/uts/i86pc/io/vmm/io/vatpit.c
index 03f63798e7..47cb40f9bd 100644
--- a/usr/src/uts/i86pc/io/vmm/io/vatpit.c
+++ b/usr/src/uts/i86pc/io/vmm/io/vatpit.c
@@ -336,7 +336,7 @@ vatpit_update_mode(struct vatpit *vatpit, uint8_t val)
}
int
-vatpit_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes,
+vatpit_handler(struct vm *vm, int vcpuid, bool in, uint16_t port, uint8_t bytes,
uint32_t *eax)
{
struct vatpit *vatpit;
@@ -419,8 +419,8 @@ vatpit_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes,
}
int
-vatpit_nmisc_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes,
- uint32_t *eax)
+vatpit_nmisc_handler(struct vm *vm, int vcpuid, bool in, uint16_t port,
+ uint8_t bytes, uint32_t *eax)
{
struct vatpit *vatpit;
diff --git a/usr/src/uts/i86pc/io/vmm/io/vatpit.h b/usr/src/uts/i86pc/io/vmm/io/vatpit.h
index 4bf9fe048d..512ce20735 100644
--- a/usr/src/uts/i86pc/io/vmm/io/vatpit.h
+++ b/usr/src/uts/i86pc/io/vmm/io/vatpit.h
@@ -39,10 +39,10 @@
struct vatpit *vatpit_init(struct vm *vm);
void vatpit_cleanup(struct vatpit *vatpit);
-int vatpit_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes,
- uint32_t *eax);
-int vatpit_nmisc_handler(struct vm *vm, int vcpuid, bool in, int port,
- int bytes, uint32_t *eax);
+int vatpit_handler(struct vm *vm, int vcpuid, bool in, uint16_t port,
+ uint8_t bytes, uint32_t *eax);
+int vatpit_nmisc_handler(struct vm *vm, int vcpuid, bool in, uint16_t port,
+ uint8_t bytes, uint32_t *eax);
#ifndef __FreeBSD__
void vatpit_localize_resources(struct vatpit *);
diff --git a/usr/src/uts/i86pc/io/vmm/io/vlapic.c b/usr/src/uts/i86pc/io/vmm/io/vlapic.c
index af902ba40e..60fc907b85 100644
--- a/usr/src/uts/i86pc/io/vmm/io/vlapic.c
+++ b/usr/src/uts/i86pc/io/vmm/io/vlapic.c
@@ -988,7 +988,6 @@ vlapic_icrlo_write_handler(struct vlapic *vlapic, bool *retu)
uint64_t icrval;
uint32_t dest, vec, mode;
struct vlapic *vlapic2;
- struct vm_exit *vmexit;
struct LAPIC *lapic;
uint16_t maxcpus;
@@ -1082,13 +1081,7 @@ vlapic_icrlo_write_handler(struct vlapic *vlapic, bool *retu)
return (0);
vlapic2->boot_state = BS_RUNNING;
-
- *retu = true;
- vmexit = vm_exitinfo(vlapic->vm, vlapic->vcpuid);
- vmexit->exitcode = VM_EXITCODE_SPINUP_AP;
- vmexit->u.spinup_ap.vcpu = dest;
- vmexit->u.spinup_ap.rip = vec << PAGE_SHIFT;
-
+ vm_req_spinup_ap(vlapic->vm, dest, vec << PAGE_SHIFT);
return (0);
}
}
diff --git a/usr/src/uts/i86pc/io/vmm/io/vpmtmr.c b/usr/src/uts/i86pc/io/vmm/io/vpmtmr.c
index 4df909777d..0dce2b0a1f 100644
--- a/usr/src/uts/i86pc/io/vmm/io/vpmtmr.c
+++ b/usr/src/uts/i86pc/io/vmm/io/vpmtmr.c
@@ -80,7 +80,7 @@ vpmtmr_cleanup(struct vpmtmr *vpmtmr)
}
int
-vpmtmr_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes,
+vpmtmr_handler(struct vm *vm, int vcpuid, bool in, uint16_t port, uint8_t bytes,
uint32_t *val)
{
struct vpmtmr *vpmtmr;
diff --git a/usr/src/uts/i86pc/io/vmm/io/vpmtmr.h b/usr/src/uts/i86pc/io/vmm/io/vpmtmr.h
index e6562da5c0..c06825b970 100644
--- a/usr/src/uts/i86pc/io/vmm/io/vpmtmr.h
+++ b/usr/src/uts/i86pc/io/vmm/io/vpmtmr.h
@@ -38,7 +38,7 @@ struct vpmtmr;
struct vpmtmr *vpmtmr_init(struct vm *vm);
void vpmtmr_cleanup(struct vpmtmr *pmtmr);
-int vpmtmr_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes,
- uint32_t *val);
+int vpmtmr_handler(struct vm *vm, int vcpuid, bool in, uint16_t port,
+ uint8_t bytes, uint32_t *val);
#endif
diff --git a/usr/src/uts/i86pc/io/vmm/io/vrtc.c b/usr/src/uts/i86pc/io/vmm/io/vrtc.c
index a3635fc9f0..7a98cd75ad 100644
--- a/usr/src/uts/i86pc/io/vmm/io/vrtc.c
+++ b/usr/src/uts/i86pc/io/vmm/io/vrtc.c
@@ -874,8 +874,8 @@ vrtc_nvram_read(struct vm *vm, int offset, uint8_t *retval)
}
int
-vrtc_addr_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes,
- uint32_t *val)
+vrtc_addr_handler(struct vm *vm, int vcpuid, bool in, uint16_t port,
+ uint8_t bytes, uint32_t *val)
{
struct vrtc *vrtc;
@@ -897,8 +897,8 @@ vrtc_addr_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes,
}
int
-vrtc_data_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes,
- uint32_t *val)
+vrtc_data_handler(struct vm *vm, int vcpuid, bool in, uint16_t port,
+ uint8_t bytes, uint32_t *val)
{
struct vrtc *vrtc;
struct rtcdev *rtc;
diff --git a/usr/src/uts/i86pc/io/vmm/io/vrtc.h b/usr/src/uts/i86pc/io/vmm/io/vrtc.h
index 13abbedeb9..92a060cb8e 100644
--- a/usr/src/uts/i86pc/io/vmm/io/vrtc.h
+++ b/usr/src/uts/i86pc/io/vmm/io/vrtc.h
@@ -48,10 +48,10 @@ int vrtc_set_time(struct vm *vm, time_t secs);
int vrtc_nvram_write(struct vm *vm, int offset, uint8_t value);
int vrtc_nvram_read(struct vm *vm, int offset, uint8_t *retval);
-int vrtc_addr_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes,
- uint32_t *val);
-int vrtc_data_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes,
- uint32_t *val);
+int vrtc_addr_handler(struct vm *vm, int vcpuid, bool in, uint16_t port,
+ uint8_t bytes, uint32_t *val);
+int vrtc_data_handler(struct vm *vm, int vcpuid, bool in, uint16_t port,
+ uint8_t bytes, uint32_t *val);
#ifndef __FreeBSD__
void vrtc_localize_resources(struct vrtc *);
diff --git a/usr/src/uts/i86pc/sys/vmm_instruction_emul.h b/usr/src/uts/i86pc/io/vmm/sys/vmm_instruction_emul.h
index d084301aee..d3a07b0f99 100644
--- a/usr/src/uts/i86pc/sys/vmm_instruction_emul.h
+++ b/usr/src/uts/i86pc/io/vmm/sys/vmm_instruction_emul.h
@@ -27,64 +27,57 @@
*
* $FreeBSD$
*/
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ *
+ * Copyright 2020 Oxide Computer Company
+ */
#ifndef _VMM_INSTRUCTION_EMUL_H_
#define _VMM_INSTRUCTION_EMUL_H_
#include <sys/mman.h>
+#include <machine/vmm.h>
-/*
- * Callback functions to read and write memory regions.
- */
-typedef int (*mem_region_read_t)(void *vm, int cpuid, uint64_t gpa,
- uint64_t *rval, int rsize, void *arg);
-
-typedef int (*mem_region_write_t)(void *vm, int cpuid, uint64_t gpa,
- uint64_t wval, int wsize, void *arg);
+struct vie;
-/*
- * Emulate the decoded 'vie' instruction.
- *
- * The callbacks 'mrr' and 'mrw' emulate reads and writes to the memory region
- * containing 'gpa'. 'mrarg' is an opaque argument that is passed into the
- * callback functions.
- *
- * 'void *vm' should be 'struct vm *' when called from kernel context and
- * 'struct vmctx *' when called from user context.
- * s
- */
-int vmm_emulate_instruction(void *vm, int cpuid, uint64_t gpa, struct vie *vie,
- struct vm_guest_paging *paging, mem_region_read_t mrr,
- mem_region_write_t mrw, void *mrarg);
+struct vie *vie_alloc();
+void vie_free(struct vie *);
-int vie_update_register(void *vm, int vcpuid, enum vm_reg_name reg,
- uint64_t val, int size);
+void vie_init_mmio(struct vie *vie, const char *inst_bytes, uint8_t inst_length,
+ const struct vm_guest_paging *paging, uint64_t gpa);
+void vie_init_inout(struct vie *vie, const struct vm_inout *inout,
+ uint8_t inst_len, const struct vm_guest_paging *paging);
-/*
- * Returns 1 if an alignment check exception should be injected and 0 otherwise.
- */
-int vie_alignment_check(int cpl, int operand_size, uint64_t cr0,
- uint64_t rflags, uint64_t gla);
+int vie_fulfill_mmio(struct vie *vie, const struct vm_mmio *res);
+int vie_fulfill_inout(struct vie *vie, const struct vm_inout *res);
-/* Returns 1 if the 'gla' is not canonical and 0 otherwise. */
-int vie_canonical_check(enum vm_cpu_mode cpu_mode, uint64_t gla);
+bool vie_needs_fetch(const struct vie *vie);
+bool vie_pending(const struct vie *vie);
+uint64_t vie_mmio_gpa(const struct vie *vie);
+void vie_exitinfo(const struct vie *vie, struct vm_exit *vme);
+void vie_fallback_exitinfo(const struct vie *vie, struct vm_exit *vme);
-uint64_t vie_size2mask(int size);
+void vie_reset(struct vie *vie);
+void vie_advance_pc(struct vie *vie, uint64_t *nextrip);
-int vie_calculate_gla(enum vm_cpu_mode cpu_mode, enum vm_reg_name seg,
- struct seg_desc *desc, uint64_t off, int length, int addrsize, int prot,
- uint64_t *gla);
+int vie_emulate_mmio(struct vie *vie, void *vm, int vcpuid);
+int vie_emulate_inout(struct vie *vie, struct vm *vm, int vcpuid);
-#ifdef _KERNEL
/*
* APIs to fetch and decode the instruction from nested page fault handler.
*
- * 'vie' must be initialized before calling 'vmm_fetch_instruction()'
+ * 'vie' must be initialized before calling 'vie_fetch_instruction()'
*/
-int vmm_fetch_instruction(struct vm *vm, int cpuid,
- struct vm_guest_paging *guest_paging,
- uint64_t rip, int inst_length, struct vie *vie,
- int *is_fault);
+int vie_fetch_instruction(struct vie *vie, struct vm *vm, int cpuid,
+ uint64_t rip, int *is_fault);
/*
* Translate the guest linear address 'gla' to a guest physical address.
@@ -101,34 +94,23 @@ int vm_gla2gpa(struct vm *vm, int vcpuid, struct vm_guest_paging *paging,
* Like vm_gla2gpa, but no exceptions are injected into the guest and
* PTEs are not changed.
*/
-int vm_gla2gpa_nofault(struct vm *vm, int vcpuid, struct vm_guest_paging *paging,
- uint64_t gla, int prot, uint64_t *gpa, int *is_fault);
-#endif /* _KERNEL */
-
-void vie_init(struct vie *vie, const char *inst_bytes, int inst_length);
+int vm_gla2gpa_nofault(struct vm *vm, int vcpuid,
+ struct vm_guest_paging *paging, uint64_t gla, int prot, uint64_t *gpa,
+ int *is_fault);
+int vie_verify_gla(struct vie *vie, struct vm *vm, int cpuid, uint64_t gla);
/*
* Decode the instruction fetched into 'vie' so it can be emulated.
*
* 'gla' is the guest linear address provided by the hardware assist
* that caused the nested page table fault. It is used to verify that
* the software instruction decoding is in agreement with the hardware.
- *
+ *
* Some hardware assists do not provide the 'gla' to the hypervisor.
* To skip the 'gla' verification for this or any other reason pass
* in VIE_INVALID_GLA instead.
*/
-#ifdef _KERNEL
#define VIE_INVALID_GLA (1UL << 63) /* a non-canonical address */
-int vmm_decode_instruction(struct vm *vm, int cpuid, uint64_t gla,
- enum vm_cpu_mode cpu_mode, int csd, struct vie *vie);
-#else /* !_KERNEL */
-/*
- * Permit instruction decoding logic to be compiled outside of the kernel for
- * rapid iteration and validation. No GLA validation is performed, obviously.
- */
-int vmm_decode_instruction(enum vm_cpu_mode cpu_mode, int csd,
- struct vie *vie);
-#endif /* _KERNEL */
+int vie_decode_instruction(struct vie *vie, struct vm *vm, int cpuid, int csd);
#endif /* _VMM_INSTRUCTION_EMUL_H_ */
diff --git a/usr/src/uts/i86pc/io/vmm/sys/vmm_kernel.h b/usr/src/uts/i86pc/io/vmm/sys/vmm_kernel.h
index 2a884e6e0e..fbd2884b84 100644
--- a/usr/src/uts/i86pc/io/vmm/sys/vmm_kernel.h
+++ b/usr/src/uts/i86pc/io/vmm/sys/vmm_kernel.h
@@ -54,6 +54,7 @@ struct vm;
struct vm_exception;
struct seg_desc;
struct vm_exit;
+struct vie;
struct vm_run;
struct vhpet;
struct vioapic;
@@ -171,7 +172,7 @@ int vm_get_seg_desc(struct vm *vm, int vcpu, int reg,
struct seg_desc *ret_desc);
int vm_set_seg_desc(struct vm *vm, int vcpu, int reg,
struct seg_desc *desc);
-int vm_run(struct vm *vm, struct vm_run *vmrun);
+int vm_run(struct vm *vm, int vcpuid, const struct vm_entry *);
int vm_suspend(struct vm *vm, enum vm_suspend_how how);
int vm_inject_nmi(struct vm *vm, int vcpu);
int vm_nmi_pending(struct vm *vm, int vcpuid);
@@ -191,11 +192,17 @@ int vm_activate_cpu(struct vm *vm, int vcpu);
int vm_suspend_cpu(struct vm *vm, int vcpu);
int vm_resume_cpu(struct vm *vm, int vcpu);
struct vm_exit *vm_exitinfo(struct vm *vm, int vcpuid);
+struct vie *vm_vie_ctx(struct vm *vm, int vcpuid);
void vm_exit_suspended(struct vm *vm, int vcpuid, uint64_t rip);
void vm_exit_debug(struct vm *vm, int vcpuid, uint64_t rip);
void vm_exit_runblock(struct vm *vm, int vcpuid, uint64_t rip);
void vm_exit_astpending(struct vm *vm, int vcpuid, uint64_t rip);
void vm_exit_reqidle(struct vm *vm, int vcpuid, uint64_t rip);
+int vm_service_mmio_read(struct vm *vm, int cpuid, uint64_t gpa, uint64_t *rval,
+ int rsize);
+int vm_service_mmio_write(struct vm *vm, int cpuid, uint64_t gpa, uint64_t wval,
+ int wsize);
+void vm_req_spinup_ap(struct vm *vm, int req_vcpuid, uint64_t req_rip);
#ifdef _SYS__CPUSET_H_
cpuset_t vm_active_cpus(struct vm *vm);
diff --git a/usr/src/uts/i86pc/io/vmm/vmm.c b/usr/src/uts/i86pc/io/vmm/vmm.c
index 579ca12e84..f4c22c13dd 100644
--- a/usr/src/uts/i86pc/io/vmm/vmm.c
+++ b/usr/src/uts/i86pc/io/vmm/vmm.c
@@ -79,7 +79,7 @@ __FBSDID("$FreeBSD$");
#include <machine/vmm.h>
#include <machine/vmm_dev.h>
-#include <machine/vmm_instruction_emul.h>
+#include <sys/vmm_instruction_emul.h>
#include "vmm_ioport.h"
#include "vmm_ktr.h"
@@ -135,6 +135,7 @@ struct vcpu {
void *stats; /* (a,i) statistics */
struct vm_exit exitinfo; /* (x) exit reason and collateral */
uint64_t nextrip; /* (x) next instruction to execute */
+ struct vie *vie_ctx; /* (x) instruction emulation context */
#ifndef __FreeBSD__
uint64_t tsc_offset; /* (x) offset from host TSC */
#endif
@@ -200,6 +201,14 @@ struct vm {
#ifndef __FreeBSD__
list_t ioport_hooks;
#endif /* __FreeBSD__ */
+ bool sipi_req; /* (i) SIPI requested */
+ int sipi_req_vcpu; /* (i) SIPI destination */
+ uint64_t sipi_req_rip; /* (i) SIPI start %rip */
+
+ /* Miscellaneous VM-wide statistics and counters */
+ struct vm_wide_stats {
+ uint64_t sipi_supersede;
+ } stats;
};
static int vmm_initialized;
@@ -341,6 +350,8 @@ vcpu_cleanup(struct vm *vm, int i, bool destroy)
if (destroy) {
vmm_stat_free(vcpu->stats);
fpu_save_area_free(vcpu->guestfpu);
+ vie_free(vcpu->vie_ctx);
+ vcpu->vie_ctx = NULL;
}
}
@@ -367,6 +378,10 @@ vcpu_init(struct vm *vm, int vcpu_id, bool create)
#endif
vcpu->guestfpu = fpu_save_area_alloc();
vcpu->stats = vmm_stat_alloc();
+ vcpu->vie_ctx = vie_alloc();
+ } else {
+ vie_reset(vcpu->vie_ctx);
+ bzero(&vcpu->exitinfo, sizeof (vcpu->exitinfo));
}
vcpu->vlapic = VLAPIC_INIT(vm->cookie, vcpu_id);
@@ -402,6 +417,15 @@ vm_exitinfo(struct vm *vm, int cpuid)
return (&vcpu->exitinfo);
}
+struct vie *
+vm_vie_ctx(struct vm *vm, int cpuid)
+{
+ if (cpuid < 0 || cpuid >= vm->maxcpus)
+ panic("vm_vie_ctx: invalid cpuid %d", cpuid);
+
+ return (vm->vcpu[cpuid].vie_ctx);
+}
+
static int
vmm_init(void)
{
@@ -1558,85 +1582,190 @@ done:
return (0);
}
+int
+vm_service_mmio_read(struct vm *vm, int cpuid, uint64_t gpa, uint64_t *rval,
+ int rsize)
+{
+ int err = ESRCH;
+ void *arg = NULL;
+
+ if (gpa >= DEFAULT_APIC_BASE && gpa < DEFAULT_APIC_BASE + PAGE_SIZE) {
+ err = lapic_mmio_read(vm, cpuid, gpa, rval, rsize, &arg);
+ } else if (gpa >= VIOAPIC_BASE && gpa < VIOAPIC_BASE + VIOAPIC_SIZE) {
+ err = vioapic_mmio_read(vm, cpuid, gpa, rval, rsize, &arg);
+ } else if (gpa >= VHPET_BASE && gpa < VHPET_BASE + VHPET_SIZE) {
+ err = vhpet_mmio_read(vm, cpuid, gpa, rval, rsize, &arg);
+ }
+
+ return (err);
+}
+
+int
+vm_service_mmio_write(struct vm *vm, int cpuid, uint64_t gpa, uint64_t wval,
+ int wsize)
+{
+ int err = ESRCH;
+ void *arg = NULL;
+
+ if (gpa >= DEFAULT_APIC_BASE && gpa < DEFAULT_APIC_BASE + PAGE_SIZE) {
+ err = lapic_mmio_write(vm, cpuid, gpa, wval, wsize, &arg);
+ } else if (gpa >= VIOAPIC_BASE && gpa < VIOAPIC_BASE + VIOAPIC_SIZE) {
+ err = vioapic_mmio_write(vm, cpuid, gpa, wval, wsize, &arg);
+ } else if (gpa >= VHPET_BASE && gpa < VHPET_BASE + VHPET_SIZE) {
+ err = vhpet_mmio_write(vm, cpuid, gpa, wval, wsize, &arg);
+ }
+
+ return (err);
+}
+
static int
-vm_handle_inst_emul(struct vm *vm, int vcpuid, bool *retu)
+vm_handle_mmio_emul(struct vm *vm, int vcpuid, bool *retu)
{
struct vie *vie;
struct vcpu *vcpu;
struct vm_exit *vme;
- uint64_t gla, gpa, cs_base;
- struct vm_guest_paging *paging;
- mem_region_read_t mread;
- mem_region_write_t mwrite;
- enum vm_cpu_mode cpu_mode;
- int cs_d, error, fault;
+ uint64_t inst_addr;
+ int error, fault, cs_d;
vcpu = &vm->vcpu[vcpuid];
vme = &vcpu->exitinfo;
+ vie = vcpu->vie_ctx;
KASSERT(vme->inst_length == 0, ("%s: invalid inst_length %d",
__func__, vme->inst_length));
- gla = vme->u.inst_emul.gla;
- gpa = vme->u.inst_emul.gpa;
- cs_base = vme->u.inst_emul.cs_base;
- cs_d = vme->u.inst_emul.cs_d;
- vie = &vme->u.inst_emul.vie;
- paging = &vme->u.inst_emul.paging;
- cpu_mode = paging->cpu_mode;
+ inst_addr = vme->rip + vme->u.mmio_emul.cs_base;
+ cs_d = vme->u.mmio_emul.cs_d;
- VCPU_CTR1(vm, vcpuid, "inst_emul fault accessing gpa %#lx", gpa);
+ VCPU_CTR1(vm, vcpuid, "inst_emul fault accessing gpa %#lx",
+ vme->u.mmio_emul.gpa);
- /* Fetch, decode and emulate the faulting instruction */
- if (vie->num_valid == 0) {
- error = vmm_fetch_instruction(vm, vcpuid, paging, vme->rip +
- cs_base, VIE_INST_SIZE, vie, &fault);
- } else {
- /*
- * The instruction bytes have already been copied into 'vie'
- */
- error = fault = 0;
+ /* Fetch the faulting instruction */
+ if (vie_needs_fetch(vie)) {
+ error = vie_fetch_instruction(vie, vm, vcpuid, inst_addr,
+ &fault);
+ if (error != 0) {
+ return (error);
+ } else if (fault) {
+ /*
+ * If a fault during instruction fetch was encounted, it
+ * will have asserted that the appropriate exception be
+ * injected at next entry. No further work is required.
+ */
+ return (0);
+ }
}
- if (error || fault)
- return (error);
- if (vmm_decode_instruction(vm, vcpuid, gla, cpu_mode, cs_d, vie) != 0) {
+ if (vie_decode_instruction(vie, vm, vcpuid, cs_d) != 0) {
VCPU_CTR1(vm, vcpuid, "Error decoding instruction at %#lx",
- vme->rip + cs_base);
- *retu = true; /* dump instruction bytes in userspace */
+ inst_addr);
+ /* Dump (unrecognized) instruction bytes in userspace */
+ vie_fallback_exitinfo(vie, vme);
+ *retu = true;
return (0);
}
-
- /*
- * Update 'nextrip' based on the length of the emulated instruction.
- */
- vme->inst_length = vie->num_processed;
- vcpu->nextrip += vie->num_processed;
- VCPU_CTR1(vm, vcpuid, "nextrip updated to %#lx after instruction "
- "decoding", vcpu->nextrip);
-
- /* return to userland unless this is an in-kernel emulated device */
- if (gpa >= DEFAULT_APIC_BASE && gpa < DEFAULT_APIC_BASE + PAGE_SIZE) {
- mread = lapic_mmio_read;
- mwrite = lapic_mmio_write;
- } else if (gpa >= VIOAPIC_BASE && gpa < VIOAPIC_BASE + VIOAPIC_SIZE) {
- mread = vioapic_mmio_read;
- mwrite = vioapic_mmio_write;
- } else if (gpa >= VHPET_BASE && gpa < VHPET_BASE + VHPET_SIZE) {
- mread = vhpet_mmio_read;
- mwrite = vhpet_mmio_write;
- } else {
+ if (vme->u.mmio_emul.gla != VIE_INVALID_GLA &&
+ vie_verify_gla(vie, vm, vcpuid, vme->u.mmio_emul.gla) != 0) {
+ /* Decoded GLA does not match GLA from VM exit state */
+ vie_fallback_exitinfo(vie, vme);
*retu = true;
return (0);
}
- error = vmm_emulate_instruction(vm, vcpuid, gpa, vie, paging,
- mread, mwrite, retu);
-
+repeat:
+ error = vie_emulate_mmio(vie, vm, vcpuid);
+ if (error < 0) {
+ /*
+ * MMIO not handled by any of the in-kernel-emulated devices, so
+ * make a trip out to userspace for it.
+ */
+ vie_exitinfo(vie, vme);
+ *retu = true;
+ error = 0;
+ } else if (error == EAGAIN) {
+ /*
+ * Continue emulating the rep-prefixed instruction, which has
+ * not completed its iterations.
+ *
+ * In case this can be emulated in-kernel and has a high
+ * repetition count (causing a tight spin), it should be
+ * deferential to yield conditions.
+ */
+ if (!vcpu_should_yield(vm, vcpuid)) {
+ goto repeat;
+ } else {
+ /*
+ * Defer to the contending load by making a trip to
+ * userspace with a no-op (BOGUS) exit reason.
+ */
+ vie_reset(vie);
+ vme->exitcode = VM_EXITCODE_BOGUS;
+ *retu = true;
+ return (0);
+ }
+ } else if (error == 0) {
+ /* Update %rip now that instruction has been emulated */
+ vie_advance_pc(vie, &vcpu->nextrip);
+ }
return (error);
}
static int
+vm_handle_inout(struct vm *vm, int vcpuid, struct vm_exit *vme, bool *retu)
+{
+ struct vcpu *vcpu;
+ struct vie *vie;
+ int err;
+
+ vcpu = &vm->vcpu[vcpuid];
+ vie = vcpu->vie_ctx;
+
+repeat:
+ err = vie_emulate_inout(vie, vm, vcpuid);
+
+ if (err < 0) {
+ /*
+ * In/out not handled by any of the in-kernel-emulated devices,
+ * so make a trip out to userspace for it.
+ */
+ vie_exitinfo(vie, vme);
+ *retu = true;
+ return (0);
+ } else if (err == EAGAIN) {
+ /*
+ * Continue emulating the rep-prefixed ins/outs, which has not
+ * completed its iterations.
+ *
+ * In case this can be emulated in-kernel and has a high
+ * repetition count (causing a tight spin), it should be
+ * deferential to yield conditions.
+ */
+ if (!vcpu_should_yield(vm, vcpuid)) {
+ goto repeat;
+ } else {
+ /*
+ * Defer to the contending load by making a trip to
+ * userspace with a no-op (BOGUS) exit reason.
+ */
+ vie_reset(vie);
+ vme->exitcode = VM_EXITCODE_BOGUS;
+ *retu = true;
+ return (0);
+ }
+ } else if (err != 0) {
+ /* Emulation failure. Bail all the way out to userspace. */
+ vme->exitcode = VM_EXITCODE_INST_EMUL;
+ bzero(&vme->u.inst_emul, sizeof (vme->u.inst_emul));
+ *retu = true;
+ return (0);
+ }
+
+ vie_advance_pc(vie, &vcpu->nextrip);
+ *retu = false;
+ return (0);
+}
+
+static int
vm_handle_suspend(struct vm *vm, int vcpuid, bool *retu)
{
#ifdef __FreeBSD__
@@ -1768,6 +1897,18 @@ vm_handle_wrmsr(struct vm *vm, int vcpuid, struct vm_exit *vme)
}
#endif /* __FreeBSD__ */
+void
+vm_req_spinup_ap(struct vm *vm, int req_vcpuid, uint64_t req_rip)
+{
+ if (vm->sipi_req) {
+ /* This should never occur if userspace is doing its job. */
+ vm->stats.sipi_supersede++;
+ }
+ vm->sipi_req = true;
+ vm->sipi_req_vcpu = req_vcpuid;
+ vm->sipi_req_rip = req_rip;
+}
+
int
vm_suspend(struct vm *vm, enum vm_suspend_how how)
{
@@ -1960,11 +2101,104 @@ vmm_freectx(void *arg, int isexec)
#endif /* __FreeBSD */
+static int
+vm_entry_actions(struct vm *vm, int vcpuid, const struct vm_entry *entry,
+ struct vm_exit *vme)
+{
+ struct vcpu *vcpu;
+ struct vie *vie;
+ int err;
+
+ vcpu = &vm->vcpu[vcpuid];
+ vie = vcpu->vie_ctx;
+ err = 0;
+
+ switch (entry->cmd) {
+ case VEC_DEFAULT:
+ return (0);
+ case VEC_DISCARD_INSTR:
+ vie_reset(vie);
+ return (0);
+ case VEC_COMPLETE_MMIO:
+ err = vie_fulfill_mmio(vie, &entry->u.mmio);
+ if (err == 0) {
+ err = vie_emulate_mmio(vie, vm, vcpuid);
+ if (err == 0) {
+ vie_advance_pc(vie, &vcpu->nextrip);
+ } else if (err < 0) {
+ vie_exitinfo(vie, vme);
+ } else if (err == EAGAIN) {
+ /*
+ * Clear the instruction emulation state in
+ * order to re-enter VM context and continue
+ * this 'rep <instruction>'
+ */
+ vie_reset(vie);
+ err = 0;
+ }
+ }
+ break;
+ case VEC_COMPLETE_INOUT:
+ err = vie_fulfill_inout(vie, &entry->u.inout);
+ if (err == 0) {
+ err = vie_emulate_inout(vie, vm, vcpuid);
+ if (err == 0) {
+ vie_advance_pc(vie, &vcpu->nextrip);
+ } else if (err < 0) {
+ vie_exitinfo(vie, vme);
+ } else if (err == EAGAIN) {
+ /*
+ * Clear the instruction emulation state in
+ * order to re-enter VM context and continue
+ * this 'rep ins/outs'
+ */
+ vie_reset(vie);
+ err = 0;
+ }
+ }
+ break;
+ default:
+ return (EINVAL);
+ }
+ return (err);
+}
+
+static int
+vm_loop_checks(struct vm *vm, int vcpuid, struct vm_exit *vme)
+{
+ struct vie *vie;
+
+ vie = vm->vcpu[vcpuid].vie_ctx;
+
+ if (vie_pending(vie)) {
+ /*
+ * Userspace has not fulfilled the pending needs of the
+ * instruction emulation, so bail back out.
+ */
+ vie_exitinfo(vie, vme);
+ return (-1);
+ }
+
+ if (vcpuid == 0 && vm->sipi_req) {
+ /* The boot vCPU has sent a SIPI to one of the other CPUs */
+ vme->exitcode = VM_EXITCODE_SPINUP_AP;
+ vme->u.spinup_ap.vcpu = vm->sipi_req_vcpu;
+ vme->u.spinup_ap.rip = vm->sipi_req_rip;
+
+ vm->sipi_req = false;
+ vm->sipi_req_vcpu = 0;
+ vm->sipi_req_rip = 0;
+ return (-1);
+ }
+
+ return (0);
+}
+
int
-vm_run(struct vm *vm, struct vm_run *vmrun)
+vm_run(struct vm *vm, int vcpuid, const struct vm_entry *entry)
{
struct vm_eventinfo evinfo;
- int error, vcpuid;
+ int error;
struct vcpu *vcpu;
#ifdef __FreeBSD__
struct pcb *pcb;
@@ -1978,8 +2212,6 @@ vm_run(struct vm *vm, struct vm_run *vmrun)
int affinity_type = CPU_CURRENT;
#endif
- vcpuid = vmrun->cpuid;
-
if (vcpuid < 0 || vcpuid >= vm->maxcpus)
return (EINVAL);
@@ -2005,7 +2237,21 @@ vm_run(struct vm *vm, struct vm_run *vmrun)
NULL, vmm_freectx);
#endif
+ error = vm_entry_actions(vm, vcpuid, entry, vme);
+ if (error < 0) {
+ /* Exit condition to be serviced by userspace */
+ error = 0;
+ goto exit;
+ } else if (error != 0) {
+ goto exit;
+ }
+
restart:
+ if (vm_loop_checks(vm, vcpuid, vme) != 0) {
+ error = 0;
+ goto exit;
+ }
+
#ifndef __FreeBSD__
thread_affinity_set(curthread, affinity_type);
/*
@@ -2091,11 +2337,10 @@ restart:
case VM_EXITCODE_PAGING:
error = vm_handle_paging(vm, vcpuid, &retu);
break;
- case VM_EXITCODE_INST_EMUL:
- error = vm_handle_inst_emul(vm, vcpuid, &retu);
+ case VM_EXITCODE_MMIO_EMUL:
+ error = vm_handle_mmio_emul(vm, vcpuid, &retu);
break;
case VM_EXITCODE_INOUT:
- case VM_EXITCODE_INOUT_STR:
error = vm_handle_inout(vm, vcpuid, vme, &retu);
break;
case VM_EXITCODE_MONITOR:
@@ -2114,12 +2359,12 @@ restart:
affinity_type = CPU_BEST;
break;
}
+#endif
case VM_EXITCODE_MTRAP:
vm_suspend_cpu(vm, vcpuid);
retu = true;
break;
-#endif
default:
retu = true; /* handled in userland */
break;
@@ -2129,6 +2374,7 @@ restart:
if (error == 0 && retu == false)
goto restart;
+exit:
#ifndef __FreeBSD__
removectx(curthread, &vtc, vmm_savectx, vmm_restorectx, NULL, NULL,
NULL, vmm_freectx);
@@ -2136,8 +2382,6 @@ restart:
VCPU_CTR2(vm, vcpuid, "retu %d/%d", error, vme->exitcode);
- /* copy the exit information */
- bcopy(vme, &vmrun->vm_exit, sizeof (struct vm_exit));
return (error);
}
@@ -3206,21 +3450,21 @@ vm_ioport_handle_hook(struct vm *vm, int cpuid, bool in, int port, int bytes,
}
}
if (hook == NULL) {
- return (ENOENT);
+ return (ESRCH);
}
if (in) {
uint64_t tval;
if (hook->vmih_rmem_cb == NULL) {
- return (ENOENT);
+ return (ESRCH);
}
err = hook->vmih_rmem_cb(hook->vmih_arg, (uintptr_t)port,
(uint_t)bytes, &tval);
*val = (uint32_t)tval;
} else {
if (hook->vmih_wmem_cb == NULL) {
- return (ENOENT);
+ return (ESRCH);
}
err = hook->vmih_wmem_cb(hook->vmih_arg, (uintptr_t)port,
(uint_t)bytes, (uint64_t)*val);
diff --git a/usr/src/uts/i86pc/io/vmm/vmm_instruction_emul.c b/usr/src/uts/i86pc/io/vmm/vmm_instruction_emul.c
index 0d32fe0b9a..f8bb7a1646 100644
--- a/usr/src/uts/i86pc/io/vmm/vmm_instruction_emul.c
+++ b/usr/src/uts/i86pc/io/vmm/vmm_instruction_emul.c
@@ -40,12 +40,12 @@
*
* Copyright 2015 Pluribus Networks Inc.
* Copyright 2018 Joyent, Inc.
+ * Copyright 2020 Oxide Computer Company
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
-#ifdef _KERNEL
#include <sys/param.h>
#include <sys/pcpu.h>
#include <sys/systm.h>
@@ -56,27 +56,109 @@ __FBSDID("$FreeBSD$");
#include <machine/vmparam.h>
#include <machine/vmm.h>
-#else /* !_KERNEL */
-#include <sys/types.h>
-#include <sys/errno.h>
-#include <sys/_iovec.h>
+#include <sys/vmm_kernel.h>
-#include <machine/vmm.h>
-
-#include <err.h>
-#include <assert.h>
-#include <stdbool.h>
-#include <stdio.h>
-#include <strings.h>
-#include <vmmapi.h>
-#define KASSERT(exp,msg) assert((exp))
-#define panic(...) errx(4, __VA_ARGS__)
-#endif /* _KERNEL */
-
-#include <machine/vmm_instruction_emul.h>
+#include <sys/vmm_instruction_emul.h>
#include <x86/psl.h>
#include <x86/specialreg.h>
+#include "vmm_ioport.h"
+#include "vmm_ktr.h"
+
+enum vie_status {
+ VIES_INIT = (1U << 0),
+ VIES_MMIO = (1U << 1),
+ VIES_INOUT = (1U << 2),
+ VIES_INST_FETCH = (1U << 3),
+ VIES_INST_DECODE = (1U << 4),
+ VIES_PENDING_MMIO = (1U << 5),
+ VIES_PENDING_INOUT = (1U << 6),
+ VIES_REPEAT = (1U << 7),
+ VIES_COMPLETE = (1U << 8),
+};
+
+/* State of request to perform emulated access (inout or MMIO) */
+enum vie_req {
+ VR_NONE,
+ VR_PENDING,
+ VR_DONE,
+};
+
+struct vie_mmio {
+ uint64_t data;
+ uint64_t gpa;
+ uint8_t bytes;
+ enum vie_req state;
+};
+
+struct vie_op {
+ uint8_t op_byte; /* actual opcode byte */
+ uint8_t op_type; /* type of operation (e.g. MOV) */
+ uint16_t op_flags;
+};
+
+#define VIE_INST_SIZE 15
+struct vie {
+ uint8_t inst[VIE_INST_SIZE]; /* instruction bytes */
+ uint8_t num_valid; /* size of the instruction */
+ uint8_t num_processed;
+
+ uint8_t addrsize:4, opsize:4; /* address and operand sizes */
+ uint8_t rex_w:1, /* REX prefix */
+ rex_r:1,
+ rex_x:1,
+ rex_b:1,
+ rex_present:1,
+ repz_present:1, /* REP/REPE/REPZ prefix */
+ repnz_present:1, /* REPNE/REPNZ prefix */
+ opsize_override:1, /* Operand size override */
+ addrsize_override:1, /* Address size override */
+ segment_override:1; /* Segment override */
+
+ uint8_t mod:2, /* ModRM byte */
+ reg:4,
+ rm:4;
+
+ uint8_t ss:2, /* SIB byte */
+ vex_present:1, /* VEX prefixed */
+ vex_l:1, /* L bit */
+ index:4, /* SIB byte */
+ base:4; /* SIB byte */
+
+ uint8_t disp_bytes;
+ uint8_t imm_bytes;
+
+ uint8_t scale;
+
+ uint8_t vex_reg:4, /* vvvv: first source register specifier */
+ vex_pp:2, /* pp */
+ _sparebits:2;
+
+ uint8_t _sparebytes[2];
+
+ int base_register; /* VM_REG_GUEST_xyz */
+ int index_register; /* VM_REG_GUEST_xyz */
+ int segment_register; /* VM_REG_GUEST_xyz */
+
+ int64_t displacement; /* optional addr displacement */
+ int64_t immediate; /* optional immediate operand */
+
+ struct vie_op op; /* opcode description */
+
+ enum vie_status status;
+
+ struct vm_guest_paging paging; /* guest paging state */
+
+ uint64_t mmio_gpa; /* faulting GPA */
+ struct vie_mmio mmio_req_read;
+ struct vie_mmio mmio_req_write;
+
+ struct vm_inout inout; /* active in/out op */
+ enum vie_req inout_req_state;
+ uint32_t inout_req_val; /* value from userspace */
+};
+
+
/* struct vie_op.op_type */
enum {
VIE_OP_TYPE_NONE = 0,
@@ -299,14 +381,29 @@ static uint64_t size2mask[] = {
[8] = 0xffffffffffffffff,
};
-static int
-vie_read_register(void *vm, int vcpuid, enum vm_reg_name reg, uint64_t *rval)
-{
- int error;
- error = vm_get_register(vm, vcpuid, reg, rval);
+static int vie_mmio_read(struct vie *vie, struct vm *vm, int cpuid,
+ uint64_t gpa, uint64_t *rval, int bytes);
+static int vie_mmio_write(struct vie *vie, struct vm *vm, int cpuid,
+ uint64_t gpa, uint64_t wval, int bytes);
+static int vie_calculate_gla(enum vm_cpu_mode cpu_mode, enum vm_reg_name seg,
+ struct seg_desc *desc, uint64_t offset, int length, int addrsize,
+ int prot, uint64_t *gla);
+static int vie_canonical_check(enum vm_cpu_mode cpu_mode, uint64_t gla);
+static int vie_alignment_check(int cpl, int size, uint64_t cr0, uint64_t rf,
+ uint64_t gla);
+static uint64_t vie_size2mask(int size);
+
+struct vie *
+vie_alloc()
+{
+ return (kmem_zalloc(sizeof (struct vie), KM_SLEEP));
+}
- return (error);
+void
+vie_free(struct vie *vie)
+{
+ kmem_free(vie, sizeof (struct vie));
}
static void
@@ -336,7 +433,7 @@ vie_calc_bytereg(struct vie *vie, enum vm_reg_name *reg, int *lhbr)
}
static int
-vie_read_bytereg(void *vm, int vcpuid, struct vie *vie, uint8_t *rval)
+vie_read_bytereg(struct vie *vie, void *vm, int vcpuid, uint8_t *rval)
{
uint64_t val;
int error, lhbr;
@@ -357,7 +454,7 @@ vie_read_bytereg(void *vm, int vcpuid, struct vie *vie, uint8_t *rval)
}
static int
-vie_write_bytereg(void *vm, int vcpuid, struct vie *vie, uint8_t byte)
+vie_write_bytereg(struct vie *vie, void *vm, int vcpuid, uint8_t byte)
{
uint64_t origval, val, mask;
int error, lhbr;
@@ -382,9 +479,9 @@ vie_write_bytereg(void *vm, int vcpuid, struct vie *vie, uint8_t byte)
return (error);
}
-int
-vie_update_register(void *vm, int vcpuid, enum vm_reg_name reg,
- uint64_t val, int size)
+static int
+vie_update_register(void *vm, int vcpuid, enum vm_reg_name reg, uint64_t val,
+ int size)
{
int error;
uint64_t origval;
@@ -392,7 +489,7 @@ vie_update_register(void *vm, int vcpuid, enum vm_reg_name reg,
switch (size) {
case 1:
case 2:
- error = vie_read_register(vm, vcpuid, reg, &origval);
+ error = vm_get_register(vm, vcpuid, reg, &origval);
if (error)
return (error);
val &= size2mask[size];
@@ -411,6 +508,29 @@ vie_update_register(void *vm, int vcpuid, enum vm_reg_name reg,
return (error);
}
+static int
+vie_repeat(struct vie *vie)
+{
+ vie->status |= VIES_REPEAT;
+
+ /*
+ * Clear out any cached operation values so the repeated instruction can
+ * begin without using that stale state. Other state, such as the
+ * decoding results, are kept around as it will not vary between
+ * iterations of a rep-prefixed instruction.
+ */
+ if ((vie->status & VIES_MMIO) != 0) {
+ vie->mmio_req_read.state = VR_NONE;
+ vie->mmio_req_write.state = VR_NONE;
+ } else if ((vie->status & VIES_INOUT) != 0) {
+ vie->inout_req_state = VR_NONE;
+ } else {
+ panic("unexpected emulation state");
+ }
+
+ return (EAGAIN);
+}
+
#define RFLAGS_STATUS_BITS (PSL_C | PSL_PF | PSL_AF | PSL_Z | PSL_N | PSL_V)
/*
@@ -519,8 +639,7 @@ getandflags(int opsize, uint64_t x, uint64_t y)
}
static int
-emulate_mov(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
- mem_region_read_t memread, mem_region_write_t memwrite, void *arg)
+emulate_mov(void *vm, int vcpuid, uint64_t gpa, struct vie *vie)
{
int error, size;
enum vm_reg_name reg;
@@ -538,9 +657,9 @@ emulate_mov(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
* REX + 88/r: mov r/m8, r8 (%ah, %ch, %dh, %bh not available)
*/
size = 1; /* override for byte operation */
- error = vie_read_bytereg(vm, vcpuid, vie, &byte);
+ error = vie_read_bytereg(vie, vm, vcpuid, &byte);
if (error == 0)
- error = memwrite(vm, vcpuid, gpa, byte, size, arg);
+ error = vie_mmio_write(vie, vm, vcpuid, gpa, byte, size);
break;
case 0x89:
/*
@@ -550,10 +669,10 @@ emulate_mov(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
* REX.W + 89/r mov r/m64, r64
*/
reg = gpr_map[vie->reg];
- error = vie_read_register(vm, vcpuid, reg, &val);
+ error = vm_get_register(vm, vcpuid, reg, &val);
if (error == 0) {
val &= size2mask[size];
- error = memwrite(vm, vcpuid, gpa, val, size, arg);
+ error = vie_mmio_write(vie, vm, vcpuid, gpa, val, size);
}
break;
case 0x8A:
@@ -563,9 +682,9 @@ emulate_mov(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
* REX + 8A/r: mov r8, r/m8
*/
size = 1; /* override for byte operation */
- error = memread(vm, vcpuid, gpa, &val, size, arg);
+ error = vie_mmio_read(vie, vm, vcpuid, gpa, &val, size);
if (error == 0)
- error = vie_write_bytereg(vm, vcpuid, vie, val);
+ error = vie_write_bytereg(vie, vm, vcpuid, val);
break;
case 0x8B:
/*
@@ -574,7 +693,7 @@ emulate_mov(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
* 8B/r: mov r32, r/m32
* REX.W 8B/r: mov r64, r/m64
*/
- error = memread(vm, vcpuid, gpa, &val, size, arg);
+ error = vie_mmio_read(vie, vm, vcpuid, gpa, &val, size);
if (error == 0) {
reg = gpr_map[vie->reg];
error = vie_update_register(vm, vcpuid, reg, val, size);
@@ -587,7 +706,7 @@ emulate_mov(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
* A1: mov EAX, moffs32
* REX.W + A1: mov RAX, moffs64
*/
- error = memread(vm, vcpuid, gpa, &val, size, arg);
+ error = vie_mmio_read(vie, vm, vcpuid, gpa, &val, size);
if (error == 0) {
reg = VM_REG_GUEST_RAX;
error = vie_update_register(vm, vcpuid, reg, val, size);
@@ -597,13 +716,13 @@ emulate_mov(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
/*
* MOV from AX/EAX/RAX to seg:moffset
* A3: mov moffs16, AX
- * A3: mov moffs32, EAX
+ * A3: mov moffs32, EAX
* REX.W + A3: mov moffs64, RAX
*/
- error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RAX, &val);
+ error = vm_get_register(vm, vcpuid, VM_REG_GUEST_RAX, &val);
if (error == 0) {
val &= size2mask[size];
- error = memwrite(vm, vcpuid, gpa, val, size, arg);
+ error = vie_mmio_write(vie, vm, vcpuid, gpa, val, size);
}
break;
case 0xC6:
@@ -613,7 +732,8 @@ emulate_mov(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
* REX + C6/0 mov r/m8, imm8
*/
size = 1; /* override for byte operation */
- error = memwrite(vm, vcpuid, gpa, vie->immediate, size, arg);
+ val = vie->immediate;
+ error = vie_mmio_write(vie, vm, vcpuid, gpa, val, size);
break;
case 0xC7:
/*
@@ -623,7 +743,7 @@ emulate_mov(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
* REX.W + C7/0 mov r/m64, imm32 (sign-extended to 64-bits)
*/
val = vie->immediate & size2mask[size];
- error = memwrite(vm, vcpuid, gpa, val, size, arg);
+ error = vie_mmio_write(vie, vm, vcpuid, gpa, val, size);
break;
default:
break;
@@ -633,9 +753,7 @@ emulate_mov(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
}
static int
-emulate_movx(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
- mem_region_read_t memread, mem_region_write_t memwrite,
- void *arg)
+emulate_movx(void *vm, int vcpuid, uint64_t gpa, struct vie *vie)
{
int error, size;
enum vm_reg_name reg;
@@ -656,7 +774,7 @@ emulate_movx(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
*/
/* get the first operand */
- error = memread(vm, vcpuid, gpa, &val, 1, arg);
+ error = vie_mmio_read(vie, vm, vcpuid, gpa, &val, 1);
if (error)
break;
@@ -677,7 +795,7 @@ emulate_movx(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
* 0F B7/r movzx r32, r/m16
* REX.W + 0F B7/r movzx r64, r/m16
*/
- error = memread(vm, vcpuid, gpa, &val, 2, arg);
+ error = vie_mmio_read(vie, vm, vcpuid, gpa, &val, 2);
if (error)
return (error);
@@ -699,7 +817,7 @@ emulate_movx(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
*/
/* get the first operand */
- error = memread(vm, vcpuid, gpa, &val, 1, arg);
+ error = vie_mmio_read(vie, vm, vcpuid, gpa, &val, 1);
if (error)
break;
@@ -722,25 +840,27 @@ emulate_movx(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
* Helper function to calculate and validate a linear address.
*/
static int
-get_gla(void *vm, int vcpuid, struct vie *vie, struct vm_guest_paging *paging,
- int opsize, int addrsize, int prot, enum vm_reg_name seg,
- enum vm_reg_name gpr, uint64_t *gla, int *fault)
+vie_get_gla(struct vie *vie, void *vm, int vcpuid, int opsize, int addrsize,
+ int prot, enum vm_reg_name seg, enum vm_reg_name gpr, uint64_t *gla)
{
struct seg_desc desc;
uint64_t cr0, val, rflags;
int error;
+ struct vm_guest_paging *paging;
+
+ paging = &vie->paging;
- error = vie_read_register(vm, vcpuid, VM_REG_GUEST_CR0, &cr0);
+ error = vm_get_register(vm, vcpuid, VM_REG_GUEST_CR0, &cr0);
KASSERT(error == 0, ("%s: error %d getting cr0", __func__, error));
- error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags);
+ error = vm_get_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags);
KASSERT(error == 0, ("%s: error %d getting rflags", __func__, error));
error = vm_get_seg_desc(vm, vcpuid, seg, &desc);
KASSERT(error == 0, ("%s: error %d getting segment descriptor %d",
__func__, error, seg));
- error = vie_read_register(vm, vcpuid, gpr, &val);
+ error = vm_get_register(vm, vcpuid, gpr, &val);
KASSERT(error == 0, ("%s: error %d getting register %d", __func__,
error, gpr));
@@ -750,7 +870,7 @@ get_gla(void *vm, int vcpuid, struct vie *vie, struct vm_guest_paging *paging,
vm_inject_ss(vm, vcpuid, 0);
else
vm_inject_gp(vm, vcpuid);
- goto guest_fault;
+ return (-1);
}
if (vie_canonical_check(paging->cpu_mode, *gla)) {
@@ -758,39 +878,30 @@ get_gla(void *vm, int vcpuid, struct vie *vie, struct vm_guest_paging *paging,
vm_inject_ss(vm, vcpuid, 0);
else
vm_inject_gp(vm, vcpuid);
- goto guest_fault;
+ return (-1);
}
if (vie_alignment_check(paging->cpl, opsize, cr0, rflags, *gla)) {
vm_inject_ac(vm, vcpuid, 0);
- goto guest_fault;
+ return (-1);
}
- *fault = 0;
- return (0);
-
-guest_fault:
- *fault = 1;
return (0);
}
static int
-emulate_movs(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
- struct vm_guest_paging *paging, mem_region_read_t memread,
- mem_region_write_t memwrite, void *arg)
+emulate_movs(void *vm, int vcpuid, uint64_t gpa, struct vie *vie)
{
-#ifdef _KERNEL
struct vm_copyinfo copyinfo[2];
-#else
- struct iovec copyinfo[2];
-#endif
uint64_t dstaddr, srcaddr, dstgpa, srcgpa, val;
uint64_t rcx, rdi, rsi, rflags;
int error, fault, opsize, seg, repeat;
+ struct vm_guest_paging *paging;
opsize = (vie->op.op_byte == 0xA4) ? 1 : vie->opsize;
val = 0;
error = 0;
+ paging = &vie->paging;
/*
* XXX although the MOVS instruction is only supposed to be used with
@@ -802,7 +913,7 @@ emulate_movs(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
repeat = vie->repz_present | vie->repnz_present;
if (repeat) {
- error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RCX, &rcx);
+ error = vm_get_register(vm, vcpuid, VM_REG_GUEST_RCX, &rcx);
KASSERT(!error, ("%s: error %d getting rcx", __func__, error));
/*
@@ -832,10 +943,10 @@ emulate_movs(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
*/
seg = vie->segment_override ? vie->segment_register : VM_REG_GUEST_DS;
- error = get_gla(vm, vcpuid, vie, paging, opsize, vie->addrsize,
- PROT_READ, seg, VM_REG_GUEST_RSI, &srcaddr, &fault);
- if (error || fault)
+ if (vie_get_gla(vie, vm, vcpuid, opsize, vie->addrsize, PROT_READ, seg,
+ VM_REG_GUEST_RSI, &srcaddr) != 0) {
goto done;
+ }
error = vm_copy_setup(vm, vcpuid, paging, srcaddr, opsize, PROT_READ,
copyinfo, nitems(copyinfo), &fault);
@@ -848,7 +959,7 @@ emulate_movs(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
*/
vm_copyin(vm, vcpuid, copyinfo, &val, opsize);
vm_copy_teardown(vm, vcpuid, copyinfo, nitems(copyinfo));
- error = memwrite(vm, vcpuid, gpa, val, opsize, arg);
+ error = vie_mmio_write(vie, vm, vcpuid, gpa, val, opsize);
if (error)
goto done;
} else {
@@ -857,11 +968,11 @@ emulate_movs(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
* if 'srcaddr' is in the mmio space.
*/
- error = get_gla(vm, vcpuid, vie, paging, opsize, vie->addrsize,
- PROT_WRITE, VM_REG_GUEST_ES, VM_REG_GUEST_RDI, &dstaddr,
- &fault);
- if (error || fault)
+ if (vie_get_gla(vie, vm, vcpuid, opsize, vie->addrsize,
+ PROT_WRITE, VM_REG_GUEST_ES, VM_REG_GUEST_RDI,
+ &dstaddr) != 0) {
goto done;
+ }
error = vm_copy_setup(vm, vcpuid, paging, dstaddr, opsize,
PROT_WRITE, copyinfo, nitems(copyinfo), &fault);
@@ -878,7 +989,8 @@ emulate_movs(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
* injected into the guest then it will happen
* before the MMIO read is attempted.
*/
- error = memread(vm, vcpuid, gpa, &val, opsize, arg);
+ error = vie_mmio_read(vie, vm, vcpuid, gpa, &val,
+ opsize);
if (error)
goto done;
@@ -903,23 +1015,25 @@ emulate_movs(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
if (error || fault)
goto done;
- error = memread(vm, vcpuid, srcgpa, &val, opsize, arg);
+ error = vie_mmio_read(vie, vm, vcpuid, srcgpa, &val,
+ opsize);
if (error)
goto done;
- error = memwrite(vm, vcpuid, dstgpa, val, opsize, arg);
+ error = vie_mmio_write(vie, vm, vcpuid, dstgpa, val,
+ opsize);
if (error)
goto done;
}
}
- error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RSI, &rsi);
+ error = vm_get_register(vm, vcpuid, VM_REG_GUEST_RSI, &rsi);
KASSERT(error == 0, ("%s: error %d getting rsi", __func__, error));
- error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RDI, &rdi);
+ error = vm_get_register(vm, vcpuid, VM_REG_GUEST_RDI, &rdi);
KASSERT(error == 0, ("%s: error %d getting rdi", __func__, error));
- error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags);
+ error = vm_get_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags);
KASSERT(error == 0, ("%s: error %d getting rflags", __func__, error));
if (rflags & PSL_D) {
@@ -948,18 +1062,14 @@ emulate_movs(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
* Repeat the instruction if the count register is not zero.
*/
if ((rcx & vie_size2mask(vie->addrsize)) != 0)
- vm_restart_instruction(vm, vcpuid);
+ return (vie_repeat(vie));
}
done:
- KASSERT(error == 0 || error == EFAULT, ("%s: unexpected error %d",
- __func__, error));
return (error);
}
static int
-emulate_stos(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
- struct vm_guest_paging *paging, mem_region_read_t memread,
- mem_region_write_t memwrite, void *arg)
+emulate_stos(void *vm, int vcpuid, uint64_t gpa, struct vie *vie)
{
int error, opsize, repeat;
uint64_t val;
@@ -969,7 +1079,7 @@ emulate_stos(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
repeat = vie->repz_present | vie->repnz_present;
if (repeat) {
- error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RCX, &rcx);
+ error = vm_get_register(vm, vcpuid, VM_REG_GUEST_RCX, &rcx);
KASSERT(!error, ("%s: error %d getting rcx", __func__, error));
/*
@@ -980,17 +1090,17 @@ emulate_stos(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
return (0);
}
- error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RAX, &val);
+ error = vm_get_register(vm, vcpuid, VM_REG_GUEST_RAX, &val);
KASSERT(!error, ("%s: error %d getting rax", __func__, error));
- error = memwrite(vm, vcpuid, gpa, val, opsize, arg);
+ error = vie_mmio_write(vie, vm, vcpuid, gpa, val, opsize);
if (error)
return (error);
- error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RDI, &rdi);
+ error = vm_get_register(vm, vcpuid, VM_REG_GUEST_RDI, &rdi);
KASSERT(error == 0, ("%s: error %d getting rdi", __func__, error));
- error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags);
+ error = vm_get_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags);
KASSERT(error == 0, ("%s: error %d getting rflags", __func__, error));
if (rflags & PSL_D)
@@ -1012,15 +1122,14 @@ emulate_stos(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
* Repeat the instruction if the count register is not zero.
*/
if ((rcx & vie_size2mask(vie->addrsize)) != 0)
- vm_restart_instruction(vm, vcpuid);
+ return (vie_repeat(vie));
}
return (0);
}
static int
-emulate_and(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
- mem_region_read_t memread, mem_region_write_t memwrite, void *arg)
+emulate_and(void *vm, int vcpuid, uint64_t gpa, struct vie *vie)
{
int error, size;
enum vm_reg_name reg;
@@ -1042,12 +1151,12 @@ emulate_and(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
/* get the first operand */
reg = gpr_map[vie->reg];
- error = vie_read_register(vm, vcpuid, reg, &val1);
+ error = vm_get_register(vm, vcpuid, reg, &val1);
if (error)
break;
/* get the second operand */
- error = memread(vm, vcpuid, gpa, &val2, size, arg);
+ error = vie_mmio_read(vie, vm, vcpuid, gpa, &val2, size);
if (error)
break;
@@ -1071,7 +1180,7 @@ emulate_and(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
*/
/* get the first operand */
- error = memread(vm, vcpuid, gpa, &val1, size, arg);
+ error = vie_mmio_read(vie, vm, vcpuid, gpa, &val1, size);
if (error)
break;
@@ -1080,7 +1189,7 @@ emulate_and(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
* operand and write the result
*/
result = val1 & vie->immediate;
- error = memwrite(vm, vcpuid, gpa, result, size, arg);
+ error = vie_mmio_write(vie, vm, vcpuid, gpa, result, size);
break;
default:
break;
@@ -1088,7 +1197,7 @@ emulate_and(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
if (error)
return (error);
- error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags);
+ error = vm_get_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags);
if (error)
return (error);
@@ -1107,8 +1216,7 @@ emulate_and(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
}
static int
-emulate_or(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
- mem_region_read_t memread, mem_region_write_t memwrite, void *arg)
+emulate_or(void *vm, int vcpuid, uint64_t gpa, struct vie *vie)
{
int error, size;
enum vm_reg_name reg;
@@ -1130,12 +1238,12 @@ emulate_or(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
/* get the first operand */
reg = gpr_map[vie->reg];
- error = vie_read_register(vm, vcpuid, reg, &val1);
+ error = vm_get_register(vm, vcpuid, reg, &val1);
if (error)
break;
-
+
/* get the second operand */
- error = memread(vm, vcpuid, gpa, &val2, size, arg);
+ error = vie_mmio_read(vie, vm, vcpuid, gpa, &val2, size);
if (error)
break;
@@ -1159,7 +1267,7 @@ emulate_or(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
*/
/* get the first operand */
- error = memread(vm, vcpuid, gpa, &val1, size, arg);
+ error = vie_mmio_read(vie, vm, vcpuid, gpa, &val1, size);
if (error)
break;
@@ -1168,7 +1276,7 @@ emulate_or(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
* operand and write the result
*/
result = val1 | vie->immediate;
- error = memwrite(vm, vcpuid, gpa, result, size, arg);
+ error = vie_mmio_write(vie, vm, vcpuid, gpa, result, size);
break;
default:
break;
@@ -1176,7 +1284,7 @@ emulate_or(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
if (error)
return (error);
- error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags);
+ error = vm_get_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags);
if (error)
return (error);
@@ -1195,8 +1303,7 @@ emulate_or(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
}
static int
-emulate_cmp(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
- mem_region_read_t memread, mem_region_write_t memwrite, void *arg)
+emulate_cmp(void *vm, int vcpuid, uint64_t gpa, struct vie *vie)
{
int error, size;
uint64_t regop, memop, op1, op2, rflags, rflags2;
@@ -1223,12 +1330,12 @@ emulate_cmp(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
/* Get the register operand */
reg = gpr_map[vie->reg];
- error = vie_read_register(vm, vcpuid, reg, &regop);
+ error = vm_get_register(vm, vcpuid, reg, &regop);
if (error)
return (error);
/* Get the memory operand */
- error = memread(vm, vcpuid, gpa, &memop, size, arg);
+ error = vie_mmio_read(vie, vm, vcpuid, gpa, &memop, size);
if (error)
return (error);
@@ -1267,7 +1374,7 @@ emulate_cmp(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
size = 1;
/* get the first operand */
- error = memread(vm, vcpuid, gpa, &op1, size, arg);
+ error = vie_mmio_read(vie, vm, vcpuid, gpa, &op1, size);
if (error)
return (error);
@@ -1276,7 +1383,7 @@ emulate_cmp(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
default:
return (EINVAL);
}
- error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags);
+ error = vm_get_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags);
if (error)
return (error);
rflags &= ~RFLAGS_STATUS_BITS;
@@ -1287,8 +1394,7 @@ emulate_cmp(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
}
static int
-emulate_test(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
- mem_region_read_t memread, mem_region_write_t memwrite, void *arg)
+emulate_test(void *vm, int vcpuid, uint64_t gpa, struct vie *vie)
{
int error, size;
uint64_t op1, rflags, rflags2;
@@ -1311,7 +1417,7 @@ emulate_test(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
if ((vie->reg & 7) != 0)
return (EINVAL);
- error = memread(vm, vcpuid, gpa, &op1, size, arg);
+ error = vie_mmio_read(vie, vm, vcpuid, gpa, &op1, size);
if (error)
return (error);
@@ -1320,7 +1426,7 @@ emulate_test(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
default:
return (EINVAL);
}
- error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags);
+ error = vm_get_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags);
if (error)
return (error);
@@ -1336,16 +1442,16 @@ emulate_test(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
}
static int
-emulate_bextr(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
- struct vm_guest_paging *paging, mem_region_read_t memread,
- mem_region_write_t memwrite, void *arg)
+emulate_bextr(void *vm, int vcpuid, uint64_t gpa, struct vie *vie)
{
uint64_t src1, src2, dst, rflags;
unsigned start, len;
int error, size;
+ struct vm_guest_paging *paging;
size = vie->opsize;
error = EINVAL;
+ paging = &vie->paging;
/*
* VEX.LZ.0F38.W0 F7 /r BEXTR r32a, r/m32, r32b
@@ -1364,13 +1470,13 @@ emulate_bextr(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
* operand) using an index and length specified in the second /source/
* operand (third operand).
*/
- error = memread(vm, vcpuid, gpa, &src1, size, arg);
+ error = vie_mmio_read(vie, vm, vcpuid, gpa, &src1, size);
if (error)
return (error);
- error = vie_read_register(vm, vcpuid, gpr_map[vie->vex_reg], &src2);
+ error = vm_get_register(vm, vcpuid, gpr_map[vie->vex_reg], &src2);
if (error)
return (error);
- error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags);
+ error = vm_get_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags);
if (error)
return (error);
@@ -1413,8 +1519,7 @@ done:
}
static int
-emulate_add(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
- mem_region_read_t memread, mem_region_write_t memwrite, void *arg)
+emulate_add(void *vm, int vcpuid, uint64_t gpa, struct vie *vie)
{
int error, size;
uint64_t nval, rflags, rflags2, val1, val2;
@@ -1435,12 +1540,12 @@ emulate_add(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
/* get the first operand */
reg = gpr_map[vie->reg];
- error = vie_read_register(vm, vcpuid, reg, &val1);
+ error = vm_get_register(vm, vcpuid, reg, &val1);
if (error)
break;
/* get the second operand */
- error = memread(vm, vcpuid, gpa, &val2, size, arg);
+ error = vie_mmio_read(vie, vm, vcpuid, gpa, &val2, size);
if (error)
break;
@@ -1454,7 +1559,7 @@ emulate_add(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
if (!error) {
rflags2 = getaddflags(size, val1, val2);
- error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RFLAGS,
+ error = vm_get_register(vm, vcpuid, VM_REG_GUEST_RFLAGS,
&rflags);
if (error)
return (error);
@@ -1469,8 +1574,7 @@ emulate_add(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
}
static int
-emulate_sub(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
- mem_region_read_t memread, mem_region_write_t memwrite, void *arg)
+emulate_sub(void *vm, int vcpuid, uint64_t gpa, struct vie *vie)
{
int error, size;
uint64_t nval, rflags, rflags2, val1, val2;
@@ -1483,7 +1587,7 @@ emulate_sub(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
case 0x2B:
/*
* SUB r/m from r and store the result in r
- *
+ *
* 2B/r SUB r16, r/m16
* 2B/r SUB r32, r/m32
* REX.W + 2B/r SUB r64, r/m64
@@ -1491,12 +1595,12 @@ emulate_sub(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
/* get the first operand */
reg = gpr_map[vie->reg];
- error = vie_read_register(vm, vcpuid, reg, &val1);
+ error = vm_get_register(vm, vcpuid, reg, &val1);
if (error)
break;
/* get the second operand */
- error = memread(vm, vcpuid, gpa, &val2, size, arg);
+ error = vie_mmio_read(vie, vm, vcpuid, gpa, &val2, size);
if (error)
break;
@@ -1510,7 +1614,7 @@ emulate_sub(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
if (!error) {
rflags2 = getcc(size, val1, val2);
- error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RFLAGS,
+ error = vm_get_register(vm, vcpuid, VM_REG_GUEST_RFLAGS,
&rflags);
if (error)
return (error);
@@ -1525,22 +1629,18 @@ emulate_sub(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
}
static int
-emulate_stack_op(void *vm, int vcpuid, uint64_t mmio_gpa, struct vie *vie,
- struct vm_guest_paging *paging, mem_region_read_t memread,
- mem_region_write_t memwrite, void *arg)
+emulate_stack_op(void *vm, int vcpuid, uint64_t mmio_gpa, struct vie *vie)
{
-#ifdef _KERNEL
struct vm_copyinfo copyinfo[2];
-#else
- struct iovec copyinfo[2];
-#endif
struct seg_desc ss_desc;
uint64_t cr0, rflags, rsp, stack_gla, val;
int error, fault, size, stackaddrsize, pushop;
+ struct vm_guest_paging *paging;
val = 0;
size = vie->opsize;
pushop = (vie->op.op_type == VIE_OP_TYPE_PUSH) ? 1 : 0;
+ paging = &vie->paging;
/*
* From "Address-Size Attributes for Stack Accesses", Intel SDL, Vol 1
@@ -1572,13 +1672,13 @@ emulate_stack_op(void *vm, int vcpuid, uint64_t mmio_gpa, struct vie *vie,
stackaddrsize = 2;
}
- error = vie_read_register(vm, vcpuid, VM_REG_GUEST_CR0, &cr0);
+ error = vm_get_register(vm, vcpuid, VM_REG_GUEST_CR0, &cr0);
KASSERT(error == 0, ("%s: error %d getting cr0", __func__, error));
- error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags);
+ error = vm_get_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags);
KASSERT(error == 0, ("%s: error %d getting rflags", __func__, error));
- error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RSP, &rsp);
+ error = vm_get_register(vm, vcpuid, VM_REG_GUEST_RSP, &rsp);
KASSERT(error == 0, ("%s: error %d getting rsp", __func__, error));
if (pushop) {
rsp -= size;
@@ -1608,12 +1708,12 @@ emulate_stack_op(void *vm, int vcpuid, uint64_t mmio_gpa, struct vie *vie,
return (error);
if (pushop) {
- error = memread(vm, vcpuid, mmio_gpa, &val, size, arg);
+ error = vie_mmio_read(vie, vm, vcpuid, mmio_gpa, &val, size);
if (error == 0)
vm_copyout(vm, vcpuid, &val, copyinfo, size);
} else {
vm_copyin(vm, vcpuid, copyinfo, &val, size);
- error = memwrite(vm, vcpuid, mmio_gpa, val, size, arg);
+ error = vie_mmio_write(vie, vm, vcpuid, mmio_gpa, val, size);
rsp += size;
}
vm_copy_teardown(vm, vcpuid, copyinfo, nitems(copyinfo));
@@ -1627,9 +1727,7 @@ emulate_stack_op(void *vm, int vcpuid, uint64_t mmio_gpa, struct vie *vie,
}
static int
-emulate_push(void *vm, int vcpuid, uint64_t mmio_gpa, struct vie *vie,
- struct vm_guest_paging *paging, mem_region_read_t memread,
- mem_region_write_t memwrite, void *arg)
+emulate_push(void *vm, int vcpuid, uint64_t mmio_gpa, struct vie *vie)
{
int error;
@@ -1642,15 +1740,12 @@ emulate_push(void *vm, int vcpuid, uint64_t mmio_gpa, struct vie *vie,
if ((vie->reg & 7) != 6)
return (EINVAL);
- error = emulate_stack_op(vm, vcpuid, mmio_gpa, vie, paging, memread,
- memwrite, arg);
+ error = emulate_stack_op(vm, vcpuid, mmio_gpa, vie);
return (error);
}
static int
-emulate_pop(void *vm, int vcpuid, uint64_t mmio_gpa, struct vie *vie,
- struct vm_guest_paging *paging, mem_region_read_t memread,
- mem_region_write_t memwrite, void *arg)
+emulate_pop(void *vm, int vcpuid, uint64_t mmio_gpa, struct vie *vie)
{
int error;
@@ -1663,30 +1758,24 @@ emulate_pop(void *vm, int vcpuid, uint64_t mmio_gpa, struct vie *vie,
if ((vie->reg & 7) != 0)
return (EINVAL);
- error = emulate_stack_op(vm, vcpuid, mmio_gpa, vie, paging, memread,
- memwrite, arg);
+ error = emulate_stack_op(vm, vcpuid, mmio_gpa, vie);
return (error);
}
static int
-emulate_group1(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
- struct vm_guest_paging *paging, mem_region_read_t memread,
- mem_region_write_t memwrite, void *memarg)
+emulate_group1(void *vm, int vcpuid, uint64_t gpa, struct vie *vie)
{
int error;
switch (vie->reg & 7) {
case 0x1: /* OR */
- error = emulate_or(vm, vcpuid, gpa, vie,
- memread, memwrite, memarg);
+ error = emulate_or(vm, vcpuid, gpa, vie);
break;
case 0x4: /* AND */
- error = emulate_and(vm, vcpuid, gpa, vie,
- memread, memwrite, memarg);
+ error = emulate_and(vm, vcpuid, gpa, vie);
break;
case 0x7: /* CMP */
- error = emulate_cmp(vm, vcpuid, gpa, vie,
- memread, memwrite, memarg);
+ error = emulate_cmp(vm, vcpuid, gpa, vie);
break;
default:
error = EINVAL;
@@ -1697,8 +1786,7 @@ emulate_group1(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
}
static int
-emulate_bittest(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
- mem_region_read_t memread, mem_region_write_t memwrite, void *memarg)
+emulate_bittest(void *vm, int vcpuid, uint64_t gpa, struct vie *vie)
{
uint64_t val, rflags;
int error, bitmask, bitoff;
@@ -1712,10 +1800,10 @@ emulate_bittest(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
if ((vie->reg & 7) != 4)
return (EINVAL);
- error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags);
+ error = vm_get_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags);
KASSERT(error == 0, ("%s: error %d getting rflags", __func__, error));
- error = memread(vm, vcpuid, gpa, &val, vie->opsize, memarg);
+ error = vie_mmio_read(vie, vm, vcpuid, gpa, &val, vie->opsize);
if (error)
return (error);
@@ -1739,8 +1827,7 @@ emulate_bittest(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
}
static int
-emulate_twob_group15(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
- mem_region_read_t memread, mem_region_write_t memwrite, void *memarg)
+emulate_twob_group15(void *vm, int vcpuid, uint64_t gpa, struct vie *vie)
{
int error;
uint64_t buf;
@@ -1758,7 +1845,7 @@ emulate_twob_group15(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
* CLFLUSH, CLFLUSHOPT. Only check for access
* rights.
*/
- error = memread(vm, vcpuid, gpa, &buf, 1, memarg);
+ error = vie_mmio_read(vie, vm, vcpuid, gpa, &buf, 1);
}
break;
default:
@@ -1769,91 +1856,460 @@ emulate_twob_group15(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
return (error);
}
+static int
+vie_mmio_read(struct vie *vie, struct vm *vm, int cpuid, uint64_t gpa,
+ uint64_t *rval, int bytes)
+{
+ int err;
+
+ if (vie->mmio_req_read.state == VR_DONE) {
+ ASSERT(vie->mmio_req_read.bytes == bytes);
+ ASSERT(vie->mmio_req_read.gpa == gpa);
+
+ *rval = vie->mmio_req_read.data;
+ return (0);
+ }
+
+ err = vm_service_mmio_read(vm, cpuid, gpa, rval, bytes);
+ if (err == 0) {
+ /*
+ * A successful read from an in-kernel-emulated device may come
+ * with side effects, so stash the result in case it's used for
+ * an instruction which subsequently needs to issue an MMIO
+ * write to userspace.
+ */
+ ASSERT(vie->mmio_req_read.state == VR_NONE);
+
+ vie->mmio_req_read.bytes = bytes;
+ vie->mmio_req_read.gpa = gpa;
+ vie->mmio_req_read.data = *rval;
+ vie->mmio_req_read.state = VR_DONE;
+
+ } else if (err == ESRCH) {
+ /* Hope that userspace emulation can fulfill this read */
+ vie->mmio_req_read.bytes = bytes;
+ vie->mmio_req_read.gpa = gpa;
+ vie->mmio_req_read.state = VR_PENDING;
+ vie->status |= VIES_PENDING_MMIO;
+ }
+ return (err);
+}
+
+static int
+vie_mmio_write(struct vie *vie, struct vm *vm, int cpuid, uint64_t gpa,
+ uint64_t wval, int bytes)
+{
+ int err;
+
+ if (vie->mmio_req_write.state == VR_DONE) {
+ ASSERT(vie->mmio_req_write.bytes == bytes);
+ ASSERT(vie->mmio_req_write.gpa == gpa);
+
+ return (0);
+ }
+
+ err = vm_service_mmio_write(vm, cpuid, gpa, wval, bytes);
+ if (err == 0) {
+ /*
+ * A successful write to an in-kernel-emulated device probably
+ * results in side effects, so stash the fact that such a write
+ * succeeded in case the operation requires other work.
+ */
+ vie->mmio_req_write.bytes = bytes;
+ vie->mmio_req_write.gpa = gpa;
+ vie->mmio_req_write.data = wval;
+ vie->mmio_req_write.state = VR_DONE;
+ } else if (err == ESRCH) {
+ /* Hope that userspace emulation can fulfill this write */
+ vie->mmio_req_write.bytes = bytes;
+ vie->mmio_req_write.gpa = gpa;
+ vie->mmio_req_write.data = wval;
+ vie->mmio_req_write.state = VR_PENDING;
+ vie->status |= VIES_PENDING_MMIO;
+ }
+ return (err);
+}
+
int
-vmm_emulate_instruction(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
- struct vm_guest_paging *paging, mem_region_read_t memread,
- mem_region_write_t memwrite, void *memarg)
+vie_emulate_mmio(struct vie *vie, void *vm, int vcpuid)
{
int error;
+ uint64_t gpa;
- if (!vie->decoded)
+ if ((vie->status & (VIES_INST_DECODE | VIES_MMIO)) !=
+ (VIES_INST_DECODE | VIES_MMIO)) {
return (EINVAL);
+ }
+
+ gpa = vie->mmio_gpa;
switch (vie->op.op_type) {
case VIE_OP_TYPE_GROUP1:
- error = emulate_group1(vm, vcpuid, gpa, vie, paging, memread,
- memwrite, memarg);
+ error = emulate_group1(vm, vcpuid, gpa, vie);
break;
case VIE_OP_TYPE_POP:
- error = emulate_pop(vm, vcpuid, gpa, vie, paging, memread,
- memwrite, memarg);
+ error = emulate_pop(vm, vcpuid, gpa, vie);
break;
case VIE_OP_TYPE_PUSH:
- error = emulate_push(vm, vcpuid, gpa, vie, paging, memread,
- memwrite, memarg);
+ error = emulate_push(vm, vcpuid, gpa, vie);
break;
case VIE_OP_TYPE_CMP:
- error = emulate_cmp(vm, vcpuid, gpa, vie,
- memread, memwrite, memarg);
+ error = emulate_cmp(vm, vcpuid, gpa, vie);
break;
case VIE_OP_TYPE_MOV:
- error = emulate_mov(vm, vcpuid, gpa, vie,
- memread, memwrite, memarg);
+ error = emulate_mov(vm, vcpuid, gpa, vie);
break;
case VIE_OP_TYPE_MOVSX:
case VIE_OP_TYPE_MOVZX:
- error = emulate_movx(vm, vcpuid, gpa, vie,
- memread, memwrite, memarg);
+ error = emulate_movx(vm, vcpuid, gpa, vie);
break;
case VIE_OP_TYPE_MOVS:
- error = emulate_movs(vm, vcpuid, gpa, vie, paging, memread,
- memwrite, memarg);
+ error = emulate_movs(vm, vcpuid, gpa, vie);
break;
case VIE_OP_TYPE_STOS:
- error = emulate_stos(vm, vcpuid, gpa, vie, paging, memread,
- memwrite, memarg);
+ error = emulate_stos(vm, vcpuid, gpa, vie);
break;
case VIE_OP_TYPE_AND:
- error = emulate_and(vm, vcpuid, gpa, vie,
- memread, memwrite, memarg);
+ error = emulate_and(vm, vcpuid, gpa, vie);
break;
case VIE_OP_TYPE_OR:
- error = emulate_or(vm, vcpuid, gpa, vie,
- memread, memwrite, memarg);
+ error = emulate_or(vm, vcpuid, gpa, vie);
break;
case VIE_OP_TYPE_SUB:
- error = emulate_sub(vm, vcpuid, gpa, vie,
- memread, memwrite, memarg);
+ error = emulate_sub(vm, vcpuid, gpa, vie);
break;
case VIE_OP_TYPE_BITTEST:
- error = emulate_bittest(vm, vcpuid, gpa, vie,
- memread, memwrite, memarg);
+ error = emulate_bittest(vm, vcpuid, gpa, vie);
break;
case VIE_OP_TYPE_TWOB_GRP15:
- error = emulate_twob_group15(vm, vcpuid, gpa, vie,
- memread, memwrite, memarg);
+ error = emulate_twob_group15(vm, vcpuid, gpa, vie);
break;
case VIE_OP_TYPE_ADD:
- error = emulate_add(vm, vcpuid, gpa, vie, memread,
- memwrite, memarg);
+ error = emulate_add(vm, vcpuid, gpa, vie);
break;
case VIE_OP_TYPE_TEST:
- error = emulate_test(vm, vcpuid, gpa, vie,
- memread, memwrite, memarg);
+ error = emulate_test(vm, vcpuid, gpa, vie);
break;
case VIE_OP_TYPE_BEXTR:
- error = emulate_bextr(vm, vcpuid, gpa, vie, paging,
- memread, memwrite, memarg);
+ error = emulate_bextr(vm, vcpuid, gpa, vie);
break;
default:
error = EINVAL;
break;
}
+ if (error == ESRCH) {
+ /* Return to userspace with the mmio request */
+ return (-1);
+ }
+
return (error);
}
+static int
+vie_emulate_inout_port(struct vie *vie, struct vm *vm, int vcpuid)
+{
+ uint32_t mask, val;
+ bool in;
+ int err;
+
+ mask = vie_size2mask(vie->inout.bytes);
+ in = (vie->inout.flags & INOUT_IN) != 0;
+
+ if (!in) {
+ val = vie->inout.eax & mask;
+ }
+
+ if (vie->inout_req_state != VR_DONE) {
+ err = vm_inout_access(vm, vcpuid, in, vie->inout.port,
+ vie->inout.bytes, &val);
+ } else {
+ /*
+ * This port access was handled in userspace and the result was
+ * injected in to be handled now.
+ */
+ val = vie->inout_req_val;
+ vie->inout_req_state = VR_NONE;
+ err = 0;
+ }
+
+ if (err == ESRCH) {
+ vie->status |= VIES_PENDING_INOUT;
+ vie->inout_req_state = VR_PENDING;
+ return (err);
+ } else if (err != 0) {
+ return (err);
+ }
+
+ if (in) {
+ val &= mask;
+ val |= (vie->inout.eax & ~mask);
+ err = vm_set_register(vm, vcpuid, VM_REG_GUEST_RAX, val);
+ KASSERT(err == 0, ("emulate_ioport: error %d setting guest "
+ "rax register", err));
+ }
+ return (0);
+}
+
+static enum vm_reg_name
+vie_inout_segname(const struct vie *vie)
+{
+ uint8_t segidx = vie->inout.segment;
+ const enum vm_reg_name segmap[] = {
+ VM_REG_GUEST_ES,
+ VM_REG_GUEST_CS,
+ VM_REG_GUEST_SS,
+ VM_REG_GUEST_DS,
+ VM_REG_GUEST_FS,
+ VM_REG_GUEST_GS,
+ };
+ const uint8_t maxidx = (sizeof (segmap) / sizeof (segmap[0]));
+
+ if (segidx >= maxidx) {
+ panic("unexpected segment index %u", segidx);
+ }
+ return (segmap[segidx]);
+}
+
+static int
+vie_emulate_inout_str(struct vie *vie, struct vm *vm, int vcpuid)
+{
+ uint8_t bytes, addrsize;
+ uint64_t index, count = 0, gla, rflags;
+ int prot, err, fault;
+ bool in, repeat;
+ enum vm_reg_name seg_reg, idx_reg;
+ struct vm_copyinfo copyinfo[2];
+
+ in = (vie->inout.flags & INOUT_IN) != 0;
+ bytes = vie->inout.bytes;
+ addrsize = vie->inout.addrsize;
+ prot = in ? PROT_WRITE : PROT_READ;
+
+ ASSERT(bytes == 1 || bytes == 2 || bytes == 4);
+ ASSERT(addrsize == 2 || addrsize == 4 || addrsize == 8);
+
+ idx_reg = (in) ? VM_REG_GUEST_RDI : VM_REG_GUEST_RSI;
+ seg_reg = vie_inout_segname(vie);
+ err = vm_get_register(vm, vcpuid, idx_reg, &index);
+ ASSERT(err == 0);
+ index = index & vie_size2mask(addrsize);
+
+ repeat = (vie->inout.flags & INOUT_REP) != 0;
+
+ /* Count register */
+ if (repeat) {
+ err = vm_get_register(vm, vcpuid, VM_REG_GUEST_RCX, &count);
+ count &= vie_size2mask(addrsize);
+
+ if (count == 0) {
+ /*
+ * If we were asked to emulate a REP INS/OUTS when the
+ * count register is zero, no further work is required.
+ */
+ return (0);
+ }
+ } else {
+ count = 1;
+ }
+
+ gla = 0;
+ if (vie_get_gla(vie, vm, vcpuid, bytes, addrsize, prot, seg_reg,
+ idx_reg, &gla) != 0) {
+ /* vie_get_gla() already injected the appropriate fault */
+ return (0);
+ }
+
+ /*
+ * The INS/OUTS emulate currently assumes that the memory target resides
+ * within the guest system memory, rather than a device MMIO region. If
+ * such a case becomes a necessity, that additional handling could be
+ * put in place.
+ */
+ err = vm_copy_setup(vm, vcpuid, &vie->paging, gla, bytes, prot,
+ copyinfo, nitems(copyinfo), &fault);
+
+ if (err) {
+ /* Unrecoverable error */
+ return (err);
+ } else if (fault) {
+ /* Resume guest to handle fault */
+ return (0);
+ }
+
+ if (!in) {
+ vm_copyin(vm, vcpuid, copyinfo, &vie->inout.eax, bytes);
+ }
+
+ err = vie_emulate_inout_port(vie, vm, vcpuid);
+
+ if (err == 0 && in) {
+ vm_copyout(vm, vcpuid, &vie->inout.eax, copyinfo, bytes);
+ }
+
+ vm_copy_teardown(vm, vcpuid, copyinfo, nitems(copyinfo));
+
+ if (err == 0) {
+ err = vm_get_register(vm, vcpuid, VM_REG_GUEST_RFLAGS,
+ &rflags);
+ ASSERT(err == 0);
+
+ /* Update index */
+ if (rflags & PSL_D) {
+ index -= bytes;
+ } else {
+ index += bytes;
+ }
+
+ /* Update index register */
+ err = vie_update_register(vm, vcpuid, idx_reg, index, addrsize);
+ ASSERT(err == 0);
+
+ /*
+ * Update count register only if the instruction had a repeat
+ * prefix.
+ */
+ if ((vie->inout.flags & INOUT_REP) != 0) {
+ count--;
+ err = vie_update_register(vm, vcpuid, VM_REG_GUEST_RCX,
+ count, addrsize);
+ ASSERT(err == 0);
+
+ if (count != 0) {
+ return (vie_repeat(vie));
+ }
+ }
+ }
+
+ return (err);
+}
+
int
+vie_emulate_inout(struct vie *vie, struct vm *vm, int vcpuid)
+{
+ int err = 0;
+
+ if ((vie->status & VIES_INOUT) == 0) {
+ return (EINVAL);
+ }
+
+ if ((vie->inout.flags & INOUT_STR) == 0) {
+ /*
+ * For now, using the 'rep' prefixes with plain (non-string)
+ * in/out is not supported.
+ */
+ if ((vie->inout.flags & INOUT_REP) != 0) {
+ return (EINVAL);
+ }
+
+ err = vie_emulate_inout_port(vie, vm, vcpuid);
+
+ if (err == ESRCH) {
+ ASSERT(vie->status & VIES_PENDING_INOUT);
+ /* Return to userspace with the in/out request */
+ err = -1;
+ }
+ } else {
+ vie->status &= ~VIES_REPEAT;
+ err = vie_emulate_inout_str(vie, vm, vcpuid);
+
+ if (err == ESRCH) {
+ ASSERT(vie->status & VIES_PENDING_INOUT);
+ /* Return to userspace with the in/out request */
+ err = -1;
+ }
+ }
+
+ return (err);
+}
+
+void
+vie_reset(struct vie *vie)
+{
+ vie->status = 0;
+ vie->num_processed = vie->num_valid = 0;
+}
+
+void
+vie_advance_pc(struct vie *vie, uint64_t *nextrip)
+{
+ VERIFY((vie->status & VIES_REPEAT) == 0);
+
+ *nextrip += vie->num_processed;
+ vie_reset(vie);
+}
+
+void
+vie_exitinfo(const struct vie *vie, struct vm_exit *vme)
+{
+ if (vie->status & VIES_MMIO) {
+ vme->exitcode = VM_EXITCODE_MMIO;
+ if (vie->mmio_req_read.state == VR_PENDING) {
+ vme->u.mmio.gpa = vie->mmio_req_read.gpa;
+ vme->u.mmio.data = 0;
+ vme->u.mmio.bytes = vie->mmio_req_read.bytes;
+ vme->u.mmio.read = 1;
+ } else if (vie->mmio_req_write.state == VR_PENDING) {
+ vme->u.mmio.gpa = vie->mmio_req_write.gpa;
+ vme->u.mmio.data = vie->mmio_req_write.data &
+ vie_size2mask(vie->mmio_req_write.bytes);
+ vme->u.mmio.bytes = vie->mmio_req_write.bytes;
+ vme->u.mmio.read = 0;
+ } else {
+ panic("bad pending MMIO state");
+ }
+ } else if (vie->status & VIES_INOUT) {
+ vme->exitcode = VM_EXITCODE_INOUT;
+ vme->u.inout.port = vie->inout.port;
+ vme->u.inout.bytes = vie->inout.bytes;
+ if ((vie->inout.flags & INOUT_IN) != 0) {
+ vme->u.inout.flags = INOUT_IN;
+ vme->u.inout.eax = 0;
+ } else {
+ vme->u.inout.flags = 0;
+ vme->u.inout.eax = vie->inout.eax &
+ vie_size2mask(vie->inout.bytes);
+ }
+ } else {
+ panic("no pending operation");
+ }
+}
+
+/*
+ * In the case of a decoding or verification failure, bailing out to userspace
+ * to do the instruction emulation is our only option for now.
+ */
+void
+vie_fallback_exitinfo(const struct vie *vie, struct vm_exit *vme)
+{
+ if ((vie->status & VIES_INST_FETCH) == 0) {
+ bzero(&vme->u.inst_emul, sizeof (vme->u.inst_emul));
+ } else {
+ ASSERT(sizeof (vie->inst) == sizeof (vme->u.inst_emul.inst));
+
+ bcopy(vie->inst, vme->u.inst_emul.inst, sizeof (vie->inst));
+ vme->u.inst_emul.num_valid = vie->num_valid;
+ }
+ vme->exitcode = VM_EXITCODE_INST_EMUL;
+}
+
+bool
+vie_pending(const struct vie *vie)
+{
+ return ((vie->status & (VIES_PENDING_MMIO|VIES_PENDING_INOUT)) != 0);
+}
+
+bool
+vie_needs_fetch(const struct vie *vie)
+{
+ if (vie->status & VIES_INST_FETCH) {
+ ASSERT(vie->num_valid != 0);
+ return (false);
+ }
+ return (true);
+}
+
+static int
vie_alignment_check(int cpl, int size, uint64_t cr0, uint64_t rf, uint64_t gla)
{
KASSERT(size == 1 || size == 2 || size == 4 || size == 8,
@@ -1866,7 +2322,7 @@ vie_alignment_check(int cpl, int size, uint64_t cr0, uint64_t rf, uint64_t gla)
return ((gla & (size - 1)) ? 1 : 0);
}
-int
+static int
vie_canonical_check(enum vm_cpu_mode cpu_mode, uint64_t gla)
{
uint64_t mask;
@@ -1885,7 +2341,7 @@ vie_canonical_check(enum vm_cpu_mode cpu_mode, uint64_t gla)
return ((gla & mask) != 0);
}
-uint64_t
+static uint64_t
vie_size2mask(int size)
{
KASSERT(size == 1 || size == 2 || size == 4 || size == 8,
@@ -1893,7 +2349,7 @@ vie_size2mask(int size)
return (size2mask[size]);
}
-int
+static int
vie_calculate_gla(enum vm_cpu_mode cpu_mode, enum vm_reg_name seg,
struct seg_desc *desc, uint64_t offset, int length, int addrsize,
int prot, uint64_t *gla)
@@ -1905,13 +2361,8 @@ vie_calculate_gla(enum vm_cpu_mode cpu_mode, enum vm_reg_name seg,
("%s: invalid segment %d", __func__, seg));
KASSERT(length == 1 || length == 2 || length == 4 || length == 8,
("%s: invalid operand size %d", __func__, length));
-#ifdef __FreeBSD__
- KASSERT((prot & ~(PROT_READ | PROT_WRITE)) == 0,
- ("%s: invalid prot %#x", __func__, prot));
-#else
KASSERT((prot & ~(PROT_READ | PROT_WRITE)) == 0,
("%s: invalid prot %x", __func__, prot));
-#endif
firstoff = offset;
if (cpu_mode == CPU_MODE_64BIT) {
@@ -1930,31 +2381,21 @@ vie_calculate_gla(enum vm_cpu_mode cpu_mode, enum vm_reg_name seg,
if (SEG_DESC_UNUSABLE(desc->access))
return (-1);
- /*
+ /*
* The processor generates a #NP exception when a segment
* register is loaded with a selector that points to a
* descriptor that is not present. If this was the case then
* it would have been checked before the VM-exit.
*/
-#ifdef __FreeBSD__
- KASSERT(SEG_DESC_PRESENT(desc->access),
- ("segment %d not present: %#x", seg, desc->access));
-#else
KASSERT(SEG_DESC_PRESENT(desc->access),
("segment %d not present: %x", seg, desc->access));
-#endif
/*
* The descriptor type must indicate a code/data segment.
*/
type = SEG_DESC_TYPE(desc->access);
-#ifdef __FreeBSD__
- KASSERT(type >= 16 && type <= 31, ("segment %d has invalid "
- "descriptor type %#x", seg, type));
-#else
KASSERT(type >= 16 && type <= 31, ("segment %d has invalid "
"descriptor type %x", seg, type));
-#endif
if (prot & PROT_READ) {
/* #GP on a read access to a exec-only code segment */
@@ -2019,24 +2460,107 @@ vie_calculate_gla(enum vm_cpu_mode cpu_mode, enum vm_reg_name seg,
}
void
-vie_init(struct vie *vie, const char *inst_bytes, int inst_length)
+vie_init_mmio(struct vie *vie, const char *inst_bytes, uint8_t inst_length,
+ const struct vm_guest_paging *paging, uint64_t gpa)
{
- KASSERT(inst_length >= 0 && inst_length <= VIE_INST_SIZE,
+ KASSERT(inst_length <= VIE_INST_SIZE,
("%s: invalid instruction length (%d)", __func__, inst_length));
- bzero(vie, sizeof(struct vie));
+ bzero(vie, sizeof (struct vie));
vie->base_register = VM_REG_LAST;
vie->index_register = VM_REG_LAST;
vie->segment_register = VM_REG_LAST;
+ vie->status = VIES_INIT | VIES_MMIO;
- if (inst_length) {
+ if (inst_length != 0) {
bcopy(inst_bytes, vie->inst, inst_length);
vie->num_valid = inst_length;
+ vie->status |= VIES_INST_FETCH;
+ }
+
+ vie->paging = *paging;
+ vie->mmio_gpa = gpa;
+}
+
+void
+vie_init_inout(struct vie *vie, const struct vm_inout *inout, uint8_t inst_len,
+ const struct vm_guest_paging *paging)
+{
+ bzero(vie, sizeof (struct vie));
+
+ vie->status = VIES_INIT | VIES_INOUT;
+
+ vie->inout = *inout;
+ vie->paging = *paging;
+
+ /*
+ * Since VMX/SVM assists already decoded the nature of the in/out
+ * instruction, let the status reflect that.
+ */
+ vie->status |= VIES_INST_FETCH | VIES_INST_DECODE;
+ vie->num_processed = inst_len;
+}
+
+int
+vie_fulfill_mmio(struct vie *vie, const struct vm_mmio *result)
+{
+ struct vie_mmio *pending;
+
+ if ((vie->status & VIES_MMIO) == 0 ||
+ (vie->status & VIES_PENDING_MMIO) == 0) {
+ return (EINVAL);
+ }
+
+ if (result->read) {
+ pending = &vie->mmio_req_read;
+ } else {
+ pending = &vie->mmio_req_write;
+ }
+
+ if (pending->state != VR_PENDING ||
+ pending->bytes != result->bytes || pending->gpa != result->gpa) {
+ return (EINVAL);
+ }
+
+ if (result->read) {
+ pending->data = result->data & vie_size2mask(pending->bytes);
+ }
+ pending->state = VR_DONE;
+ vie->status &= ~VIES_PENDING_MMIO;
+
+ return (0);
+}
+
+int
+vie_fulfill_inout(struct vie *vie, const struct vm_inout *result)
+{
+ if ((vie->status & VIES_INOUT) == 0 ||
+ (vie->status & VIES_PENDING_INOUT) == 0) {
+ return (EINVAL);
}
+ if ((vie->inout.flags & INOUT_IN) != (result->flags & INOUT_IN) ||
+ vie->inout.bytes != result->bytes ||
+ vie->inout.port != result->port) {
+ return (EINVAL);
+ }
+
+ if (result->flags & INOUT_IN) {
+ vie->inout_req_val = result->eax &
+ vie_size2mask(vie->inout.bytes);
+ }
+ vie->inout_req_state = VR_DONE;
+ vie->status &= ~(VIES_PENDING_INOUT);
+
+ return (0);
+}
+
+uint64_t
+vie_mmio_gpa(const struct vie *vie)
+{
+ return (vie->mmio_gpa);
}
-#ifdef _KERNEL
static int
pf_error_code(int usermode, int prot, int rsvd, uint64_t pte)
{
@@ -2299,27 +2823,28 @@ vm_gla2gpa_nofault(struct vm *vm, int vcpuid, struct vm_guest_paging *paging,
}
int
-vmm_fetch_instruction(struct vm *vm, int vcpuid, struct vm_guest_paging *paging,
- uint64_t rip, int inst_length, struct vie *vie, int *faultptr)
+vie_fetch_instruction(struct vie *vie, struct vm *vm, int vcpuid, uint64_t rip,
+ int *faultptr)
{
struct vm_copyinfo copyinfo[2];
int error, prot;
- if (inst_length > VIE_INST_SIZE)
- panic("vmm_fetch_instruction: invalid length %d", inst_length);
+ if (vie->status != (VIES_INIT|VIES_MMIO)) {
+ return (EINVAL);
+ }
prot = PROT_READ | PROT_EXEC;
- error = vm_copy_setup(vm, vcpuid, paging, rip, inst_length, prot,
- copyinfo, nitems(copyinfo), faultptr);
+ error = vm_copy_setup(vm, vcpuid, &vie->paging, rip, VIE_INST_SIZE,
+ prot, copyinfo, nitems(copyinfo), faultptr);
if (error || *faultptr)
return (error);
- vm_copyin(vm, vcpuid, copyinfo, vie->inst, inst_length);
+ vm_copyin(vm, vcpuid, copyinfo, vie->inst, VIE_INST_SIZE);
vm_copy_teardown(vm, vcpuid, copyinfo, nitems(copyinfo));
- vie->num_valid = inst_length;
+ vie->num_valid = VIE_INST_SIZE;
+ vie->status |= VIES_INST_FETCH;
return (0);
}
-#endif /* _KERNEL */
static int
vie_peek(struct vie *vie, uint8_t *x)
@@ -2821,23 +3346,28 @@ decode_moffset(struct vie *vie)
return (0);
}
-#ifdef _KERNEL
/*
* Verify that the 'guest linear address' provided as collateral of the nested
* page table fault matches with our instruction decoding.
*/
-static int
-verify_gla(struct vm *vm, int cpuid, uint64_t gla, struct vie *vie,
- enum vm_cpu_mode cpu_mode)
+int
+vie_verify_gla(struct vie *vie, struct vm *vm, int cpuid, uint64_t gla)
{
int error;
uint64_t base, segbase, idx, gla2;
enum vm_reg_name seg;
struct seg_desc desc;
- /* Skip 'gla' verification */
- if (gla == VIE_INVALID_GLA)
+ ASSERT((vie->status & VIES_INST_DECODE) != 0);
+
+ /*
+ * If there was no valid GLA context with the exit, or the decoded
+ * instruction acts on more than one address, verification is done.
+ */
+ if (gla == VIE_INVALID_GLA ||
+ (vie->op.op_flags & VIE_OP_F_NO_GLA_VERIFICATION) != 0) {
return (0);
+ }
base = 0;
if (vie->base_register != VM_REG_LAST) {
@@ -2879,15 +3409,16 @@ verify_gla(struct vm *vm, int cpuid, uint64_t gla, struct vie *vie,
* string destination the DS segment is the default. These
* can be overridden to allow other segments to be accessed.
*/
- if (vie->segment_override)
+ if (vie->segment_override) {
seg = vie->segment_register;
- else if (vie->base_register == VM_REG_GUEST_RSP ||
- vie->base_register == VM_REG_GUEST_RBP)
+ } else if (vie->base_register == VM_REG_GUEST_RSP ||
+ vie->base_register == VM_REG_GUEST_RBP) {
seg = VM_REG_GUEST_SS;
- else
+ } else {
seg = VM_REG_GUEST_DS;
- if (cpu_mode == CPU_MODE_64BIT && seg != VM_REG_GUEST_FS &&
- seg != VM_REG_GUEST_GS) {
+ }
+ if (vie->paging.cpu_mode == CPU_MODE_64BIT &&
+ seg != VM_REG_GUEST_FS && seg != VM_REG_GUEST_GS) {
segbase = 0;
} else {
error = vm_get_seg_desc(vm, cpuid, seg, &desc);
@@ -2913,16 +3444,17 @@ verify_gla(struct vm *vm, int cpuid, uint64_t gla, struct vie *vie,
return (0);
}
-#endif /* _KERNEL */
int
-#ifdef _KERNEL
-vmm_decode_instruction(struct vm *vm, int cpuid, uint64_t gla,
- enum vm_cpu_mode cpu_mode, int cs_d, struct vie *vie)
-#else
-vmm_decode_instruction(enum vm_cpu_mode cpu_mode, int cs_d, struct vie *vie)
-#endif
+vie_decode_instruction(struct vie *vie, struct vm *vm, int cpuid, int cs_d)
{
+ enum vm_cpu_mode cpu_mode;
+
+ if ((vie->status & VIES_INST_FETCH) == 0) {
+ return (EINVAL);
+ }
+
+ cpu_mode = vie->paging.cpu_mode;
if (decode_prefixes(vie, cpu_mode, cs_d))
return (-1);
@@ -2945,14 +3477,7 @@ vmm_decode_instruction(enum vm_cpu_mode cpu_mode, int cs_d, struct vie *vie)
if (decode_moffset(vie))
return (-1);
-#ifdef _KERNEL
- if ((vie->op.op_flags & VIE_OP_F_NO_GLA_VERIFICATION) == 0) {
- if (verify_gla(vm, cpuid, gla, vie, cpu_mode))
- return (-1);
- }
-#endif
-
- vie->decoded = 1; /* success */
+ vie->status |= VIES_INST_DECODE;
return (0);
}
diff --git a/usr/src/uts/i86pc/io/vmm/vmm_ioport.c b/usr/src/uts/i86pc/io/vmm/vmm_ioport.c
index 3d08fd5e85..01fae7d584 100644
--- a/usr/src/uts/i86pc/io/vmm/vmm_ioport.c
+++ b/usr/src/uts/i86pc/io/vmm/vmm_ioport.c
@@ -25,6 +25,18 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ *
+ * Copyright 2020 Oxide Computer Company
+ */
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
@@ -33,18 +45,16 @@ __FBSDID("$FreeBSD$");
#include <sys/systm.h>
#include <machine/vmm.h>
-#include <machine/vmm_instruction_emul.h>
#include "vatpic.h"
#include "vatpit.h"
#include "vpmtmr.h"
#include "vrtc.h"
#include "vmm_ioport.h"
-#include "vmm_ktr.h"
#define MAX_IOPORTS 1280
-ioport_handler_func_t ioport_handler[MAX_IOPORTS] = {
+static ioport_handler_func_t ioport_handler[MAX_IOPORTS] = {
[TIMER_MODE] = vatpit_handler,
[TIMER_CNTR0] = vatpit_handler,
[TIMER_CNTR1] = vatpit_handler,
@@ -61,144 +71,24 @@ ioport_handler_func_t ioport_handler[MAX_IOPORTS] = {
[IO_RTC + 1] = vrtc_data_handler,
};
-#ifdef KTR
-static const char *
-inout_instruction(struct vm_exit *vmexit)
-{
- int index;
-
- static const char *iodesc[] = {
- "outb", "outw", "outl",
- "inb", "inw", "inl",
- "outsb", "outsw", "outsd",
- "insb", "insw", "insd",
- };
-
- switch (vmexit->u.inout.bytes) {
- case 1:
- index = 0;
- break;
- case 2:
- index = 1;
- break;
- default:
- index = 2;
- break;
- }
-
- if (vmexit->u.inout.in)
- index += 3;
-
- if (vmexit->u.inout.string)
- index += 6;
-
- KASSERT(index < nitems(iodesc), ("%s: invalid index %d",
- __func__, index));
-
- return (iodesc[index]);
-}
-#endif /* KTR */
-
-static int
-emulate_inout_port(struct vm *vm, int vcpuid, struct vm_exit *vmexit,
- bool *retu)
+int
+vm_inout_access(struct vm *vm, int vcpuid, bool in, uint16_t port,
+ uint8_t bytes, uint32_t *val)
{
ioport_handler_func_t handler;
- uint32_t mask, val;
int error;
-#ifdef __FreeBSD__
- /*
- * If there is no handler for the I/O port then punt to userspace.
- */
- if (vmexit->u.inout.port >= MAX_IOPORTS ||
- (handler = ioport_handler[vmexit->u.inout.port]) == NULL) {
- *retu = true;
- return (0);
- }
-#else /* __FreeBSD__ */
handler = NULL;
- if (vmexit->u.inout.port < MAX_IOPORTS) {
- handler = ioport_handler[vmexit->u.inout.port];
+ if (port < MAX_IOPORTS) {
+ handler = ioport_handler[port];
}
- /* Look for hooks, if a standard handler is not present */
- if (handler == NULL) {
- mask = vie_size2mask(vmexit->u.inout.bytes);
- if (!vmexit->u.inout.in) {
- val = vmexit->u.inout.eax & mask;
- }
- error = vm_ioport_handle_hook(vm, vcpuid, vmexit->u.inout.in,
- vmexit->u.inout.port, vmexit->u.inout.bytes, &val);
- if (error == 0) {
- goto finish;
- }
- *retu = true;
- return (0);
+ if (handler != NULL) {
+ error = (*handler)(vm, vcpuid, in, port, bytes, val);
+ } else {
+ /* Look for hooks, if a standard handler is not present */
+ error = vm_ioport_handle_hook(vm, vcpuid, in, port, bytes, val);
}
-#endif /* __FreeBSD__ */
-
- mask = vie_size2mask(vmexit->u.inout.bytes);
-
- if (!vmexit->u.inout.in) {
- val = vmexit->u.inout.eax & mask;
- }
-
- error = (*handler)(vm, vcpuid, vmexit->u.inout.in,
- vmexit->u.inout.port, vmexit->u.inout.bytes, &val);
- if (error) {
- /*
- * The value returned by this function is also the return value
- * of vm_run(). This needs to be a positive number otherwise it
- * can be interpreted as a "pseudo-error" like ERESTART.
- *
- * Enforce this by mapping all errors to EIO.
- */
- return (EIO);
- }
-
-#ifndef __FreeBSD__
-finish:
-#endif /* __FreeBSD__ */
- if (vmexit->u.inout.in) {
- vmexit->u.inout.eax &= ~mask;
- vmexit->u.inout.eax |= val & mask;
- error = vm_set_register(vm, vcpuid, VM_REG_GUEST_RAX,
- vmexit->u.inout.eax);
- KASSERT(error == 0, ("emulate_ioport: error %d setting guest "
- "rax register", error));
- }
- *retu = false;
- return (0);
-}
-
-static int
-emulate_inout_str(struct vm *vm, int vcpuid, struct vm_exit *vmexit, bool *retu)
-{
- *retu = true;
- return (0); /* Return to userspace to finish emulation */
-}
-
-int
-vm_handle_inout(struct vm *vm, int vcpuid, struct vm_exit *vmexit, bool *retu)
-{
- int bytes, error;
-
- bytes = vmexit->u.inout.bytes;
- KASSERT(bytes == 1 || bytes == 2 || bytes == 4,
- ("vm_handle_inout: invalid operand size %d", bytes));
-
- if (vmexit->u.inout.string)
- error = emulate_inout_str(vm, vcpuid, vmexit, retu);
- else
- error = emulate_inout_port(vm, vcpuid, vmexit, retu);
-
- VCPU_CTR4(vm, vcpuid, "%s%s 0x%04x: %s",
- vmexit->u.inout.rep ? "rep " : "",
- inout_instruction(vmexit),
- vmexit->u.inout.port,
- error ? "error" : (*retu ? "userspace" : "handled"));
-
return (error);
}
diff --git a/usr/src/uts/i86pc/io/vmm/vmm_ioport.h b/usr/src/uts/i86pc/io/vmm/vmm_ioport.h
index 14e315f400..7c51906e85 100644
--- a/usr/src/uts/i86pc/io/vmm/vmm_ioport.h
+++ b/usr/src/uts/i86pc/io/vmm/vmm_ioport.h
@@ -32,8 +32,9 @@
#define _VMM_IOPORT_H_
typedef int (*ioport_handler_func_t)(struct vm *vm, int vcpuid,
- bool in, int port, int bytes, uint32_t *val);
+ bool in, uint16_t port, uint8_t bytes, uint32_t *val);
-int vm_handle_inout(struct vm *vm, int vcpuid, struct vm_exit *vme, bool *retu);
+int vm_inout_access(struct vm *vm, int vcpuid, bool in, uint16_t port,
+ uint8_t bytes, uint32_t *val);
#endif /* _VMM_IOPORT_H_ */
diff --git a/usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c b/usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c
index 6526188b1c..3fd7f862d1 100644
--- a/usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c
+++ b/usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c
@@ -500,25 +500,27 @@ vmmdev_do_ioctl(vmm_softc_t *sc, int cmd, intptr_t arg, int md,
/* Execute the primary logic for the ioctl. */
switch (cmd) {
case VM_RUN: {
- struct vm_run vmrun;
+ struct vm_entry entry;
- if (ddi_copyin(datap, &vmrun, sizeof (vmrun), md)) {
+ if (ddi_copyin(datap, &entry, sizeof (entry), md)) {
error = EFAULT;
break;
}
- vmrun.cpuid = vcpu;
if (!(curthread->t_schedflag & TS_VCPU))
smt_mark_as_vcpu();
- error = vm_run(sc->vmm_vm, &vmrun);
- /*
- * XXXJOY: I think it's necessary to do copyout, even in the
- * face of errors, since the exit state is communicated out.
- */
- if (ddi_copyout(&vmrun, datap, sizeof (vmrun), md)) {
- error = EFAULT;
- break;
+ error = vm_run(sc->vmm_vm, vcpu, &entry);
+
+ if (error == 0) {
+ const struct vm_exit *vme;
+ void *outp = entry.exit_data;
+
+ vme = vm_exitinfo(sc->vmm_vm, vcpu);
+ if (ddi_copyout(vme, outp, sizeof (*vme), md)) {
+ error = EFAULT;
+ break;
+ }
}
break;
}
@@ -982,9 +984,6 @@ vmmdev_do_ioctl(vmm_softc_t *sc, int cmd, intptr_t arg, int md,
case VM_GET_KERNEMU_DEV: {
struct vm_readwrite_kernemu_device kemu;
size_t size = 0;
- mem_region_write_t mwrite = NULL;
- mem_region_read_t mread = NULL;
- uint64_t ignored = 0;
if (ddi_copyin(datap, &kemu, sizeof (kemu), md)) {
error = EFAULT;
@@ -998,31 +997,12 @@ vmmdev_do_ioctl(vmm_softc_t *sc, int cmd, intptr_t arg, int md,
size = (1 << kemu.access_width);
ASSERT(size >= 1 && size <= 8);
- if (kemu.gpa >= DEFAULT_APIC_BASE &&
- kemu.gpa < DEFAULT_APIC_BASE + PAGE_SIZE) {
- mread = lapic_mmio_read;
- mwrite = lapic_mmio_write;
- } else if (kemu.gpa >= VIOAPIC_BASE &&
- kemu.gpa < VIOAPIC_BASE + VIOAPIC_SIZE) {
- mread = vioapic_mmio_read;
- mwrite = vioapic_mmio_write;
- } else if (kemu.gpa >= VHPET_BASE &&
- kemu.gpa < VHPET_BASE + VHPET_SIZE) {
- mread = vhpet_mmio_read;
- mwrite = vhpet_mmio_write;
- } else {
- error = EINVAL;
- break;
- }
-
if (cmd == VM_SET_KERNEMU_DEV) {
- VERIFY(mwrite != NULL);
- error = mwrite(sc->vmm_vm, vcpu, kemu.gpa, kemu.value,
- size, &ignored);
+ error = vm_service_mmio_write(sc->vmm_vm, vcpu,
+ kemu.gpa, kemu.value, size);
} else {
- VERIFY(mread != NULL);
- error = mread(sc->vmm_vm, vcpu, kemu.gpa, &kemu.value,
- size, &ignored);
+ error = vm_service_mmio_read(sc->vmm_vm, vcpu,
+ kemu.gpa, &kemu.value, size);
}
if (error == 0) {
@@ -2004,6 +1984,11 @@ vmm_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp,
vmm_softc_t *sc;
minor_t minor;
+ /* The structs in bhyve ioctls assume a 64-bit datamodel */
+ if (ddi_model_convert_from(mode & FMODELS) != DDI_MODEL_NONE) {
+ return (ENOTSUP);
+ }
+
minor = getminor(dev);
if (minor == VMM_CTL_MINOR) {
diff --git a/usr/src/uts/i86pc/io/vmm/vmm_stat.c b/usr/src/uts/i86pc/io/vmm/vmm_stat.c
index a6af75e40a..42d6f8cfa3 100644
--- a/usr/src/uts/i86pc/io/vmm/vmm_stat.c
+++ b/usr/src/uts/i86pc/io/vmm/vmm_stat.c
@@ -163,7 +163,7 @@ VMM_STAT(VMEXIT_NMI_WINDOW, "vm exits due to nmi window opening");
VMM_STAT(VMEXIT_INOUT, "number of times in/out was intercepted");
VMM_STAT(VMEXIT_CPUID, "number of times cpuid was intercepted");
VMM_STAT(VMEXIT_NESTED_FAULT, "vm exits due to nested page fault");
-VMM_STAT(VMEXIT_INST_EMUL, "vm exits for instruction emulation");
+VMM_STAT(VMEXIT_MMIO_EMUL, "vm exits for mmio emulation");
VMM_STAT(VMEXIT_UNKNOWN, "number of vm exits for unknown reason");
VMM_STAT(VMEXIT_ASTPENDING, "number of times astpending at exit");
VMM_STAT(VMEXIT_REQIDLE, "number of times idle requested at exit");
diff --git a/usr/src/uts/i86pc/io/vmm/vmm_stat.h b/usr/src/uts/i86pc/io/vmm/vmm_stat.h
index 3232e23888..bfe35e9f67 100644
--- a/usr/src/uts/i86pc/io/vmm/vmm_stat.h
+++ b/usr/src/uts/i86pc/io/vmm/vmm_stat.h
@@ -162,7 +162,7 @@ VMM_STAT_DECLARE(VMEXIT_NMI_WINDOW);
VMM_STAT_DECLARE(VMEXIT_INOUT);
VMM_STAT_DECLARE(VMEXIT_CPUID);
VMM_STAT_DECLARE(VMEXIT_NESTED_FAULT);
-VMM_STAT_DECLARE(VMEXIT_INST_EMUL);
+VMM_STAT_DECLARE(VMEXIT_MMIO_EMUL);
VMM_STAT_DECLARE(VMEXIT_UNKNOWN);
VMM_STAT_DECLARE(VMEXIT_ASTPENDING);
VMM_STAT_DECLARE(VMEXIT_USERSPACE);
diff --git a/usr/src/uts/i86pc/sys/vmm.h b/usr/src/uts/i86pc/sys/vmm.h
index 45838e343e..d6d24f0c37 100644
--- a/usr/src/uts/i86pc/sys/vmm.h
+++ b/usr/src/uts/i86pc/sys/vmm.h
@@ -122,31 +122,13 @@ enum x2apic_state {
#define VM_INTINFO_HWEXCEPTION (3 << 8)
#define VM_INTINFO_SWINTR (4 << 8)
-#ifndef __FreeBSD__
/*
* illumos doesn't have a limitation based on SPECNAMELEN like FreeBSD does.
* Instead of picking an arbitrary value we will just rely on the same
* calculation that's made below. If this calculation ever changes we need to
* update the the VM_MAX_NAMELEN mapping in the bhyve brand's boot.c file.
*/
-#else
-/*
- * The VM name has to fit into the pathname length constraints of devfs,
- * governed primarily by SPECNAMELEN. The length is the total number of
- * characters in the full path, relative to the mount point and not
- * including any leading '/' characters.
- * A prefix and a suffix are added to the name specified by the user.
- * The prefix is usually "vmm/" or "vmm.io/", but can be a few characters
- * longer for future use.
- * The suffix is a string that identifies a bootrom image or some similar
- * image that is attached to the VM. A separator character gets added to
- * the suffix automatically when generating the full path, so it must be
- * accounted for, reducing the effective length by 1.
- * The effective length of a VM name is 229 bytes for FreeBSD 13 and 37
- * bytes for FreeBSD 12. A minimum length is set for safety and supports
- * a SPECNAMELEN as small as 32 on old systems.
- */
-#endif
+
#define VM_MAX_PREFIXLEN 10
#define VM_MAX_SUFFIXLEN 15
#define VM_MIN_NAMELEN 6
@@ -224,76 +206,6 @@ struct vm_guest_paging {
enum vm_paging_mode paging_mode;
};
-/*
- * The data structures 'vie' and 'vie_op' are meant to be opaque to the
- * consumers of instruction decoding. The only reason why their contents
- * need to be exposed is because they are part of the 'vm_exit' structure.
- */
-struct vie_op {
- uint8_t op_byte; /* actual opcode byte */
- uint8_t op_type; /* type of operation (e.g. MOV) */
- uint16_t op_flags;
-};
-_Static_assert(sizeof(struct vie_op) == 4, "ABI");
-_Static_assert(_Alignof(struct vie_op) == 2, "ABI");
-
-#define VIE_INST_SIZE 15
-struct vie {
- uint8_t inst[VIE_INST_SIZE]; /* instruction bytes */
- uint8_t num_valid; /* size of the instruction */
- uint8_t num_processed;
-
- uint8_t addrsize:4, opsize:4; /* address and operand sizes */
- uint8_t rex_w:1, /* REX prefix */
- rex_r:1,
- rex_x:1,
- rex_b:1,
- rex_present:1,
- repz_present:1, /* REP/REPE/REPZ prefix */
- repnz_present:1, /* REPNE/REPNZ prefix */
- opsize_override:1, /* Operand size override */
- addrsize_override:1, /* Address size override */
- segment_override:1; /* Segment override */
-
- uint8_t mod:2, /* ModRM byte */
- reg:4,
- rm:4;
-
- uint8_t ss:2, /* SIB byte */
- vex_present:1, /* VEX prefixed */
- vex_l:1, /* L bit */
- index:4, /* SIB byte */
- base:4; /* SIB byte */
-
- uint8_t disp_bytes;
- uint8_t imm_bytes;
-
- uint8_t scale;
-
- uint8_t vex_reg:4, /* vvvv: first source register specifier */
- vex_pp:2, /* pp */
- _sparebits:2;
-
- uint8_t _sparebytes[2];
-
- int base_register; /* VM_REG_GUEST_xyz */
- int index_register; /* VM_REG_GUEST_xyz */
- int segment_register; /* VM_REG_GUEST_xyz */
-
- int64_t displacement; /* optional addr displacement */
- int64_t immediate; /* optional immediate operand */
-
- uint8_t decoded; /* set to 1 if successfully decoded */
-
- uint8_t _sparebyte;
-
- struct vie_op op; /* opcode description */
-};
-_Static_assert(sizeof(struct vie) == 64, "ABI");
-_Static_assert(__offsetof(struct vie, disp_bytes) == 22, "ABI");
-_Static_assert(__offsetof(struct vie, scale) == 24, "ABI");
-_Static_assert(__offsetof(struct vie, base_register) == 28, "ABI");
-
enum vm_exitcode {
VM_EXITCODE_INOUT,
VM_EXITCODE_VMX,
@@ -306,11 +218,11 @@ enum vm_exitcode {
VM_EXITCODE_PAGING,
VM_EXITCODE_INST_EMUL,
VM_EXITCODE_SPINUP_AP,
- VM_EXITCODE_DEPRECATED1, /* used to be SPINDOWN_CPU */
+ VM_EXITCODE_MMIO_EMUL,
VM_EXITCODE_RUNBLOCK,
VM_EXITCODE_IOAPIC_EOI,
VM_EXITCODE_SUSPENDED,
- VM_EXITCODE_INOUT_STR,
+ VM_EXITCODE_MMIO,
VM_EXITCODE_TASK_SWITCH,
VM_EXITCODE_MONITOR,
VM_EXITCODE_MWAIT,
@@ -325,25 +237,38 @@ enum vm_exitcode {
VM_EXITCODE_MAX
};
+enum inout_flags {
+ INOUT_IN = (1U << 0), /* direction: 'in' when set, else 'out' */
+
+ /*
+ * The following flags are used only for in-kernel emulation logic and
+ * are not exposed to userspace.
+ */
+ INOUT_STR = (1U << 1), /* ins/outs operation */
+ INOUT_REP = (1U << 2), /* 'rep' prefix present on instruction */
+};
+
struct vm_inout {
- uint16_t bytes:3; /* 1 or 2 or 4 */
- uint16_t in:1;
- uint16_t string:1;
- uint16_t rep:1;
+ uint32_t eax;
uint16_t port;
- uint32_t eax; /* valid for out */
+ uint8_t bytes; /* 1 or 2 or 4 */
+ uint8_t flags; /* see: inout_flags */
+
+ /*
+ * The address size and segment are relevant to INS/OUTS operations.
+ * Userspace is not concerned with them since the in-kernel emulation
+ * handles those specific aspects.
+ */
+ uint8_t addrsize;
+ uint8_t segment;
};
-struct vm_inout_str {
- struct vm_inout inout; /* must be the first element */
- struct vm_guest_paging paging;
- uint64_t rflags;
- uint64_t cr0;
- uint64_t index;
- uint64_t count; /* rep=1 (%rcx), rep=0 (1) */
- int addrsize;
- enum vm_reg_name seg_name;
- struct seg_desc seg_desc;
+struct vm_mmio {
+ uint8_t bytes; /* 1/2/4/8 bytes */
+ uint8_t read; /* read: 1, write: 0 */
+ uint16_t _pad[3];
+ uint64_t gpa;
+ uint64_t data;
};
enum task_switch_reason {
@@ -368,18 +293,25 @@ struct vm_exit {
uint64_t rip;
union {
struct vm_inout inout;
- struct vm_inout_str inout_str;
+ struct vm_mmio mmio;
struct {
uint64_t gpa;
int fault_type;
} paging;
+ /*
+ * Kernel-internal MMIO decoding and emulation.
+ * Userspace should not expect to see this, but rather a
+ * VM_EXITCODE_MMIO with the above 'mmio' context.
+ */
struct {
uint64_t gpa;
uint64_t gla;
uint64_t cs_base;
int cs_d; /* CS.D */
- struct vm_guest_paging paging;
- struct vie vie;
+ } mmio_emul;
+ struct {
+ uint8_t inst[15];
+ uint8_t num_valid;
} inst_emul;
/*
* VMX specific payload. Used when there is no "better"
@@ -433,6 +365,23 @@ struct vm_exit {
} u;
};
+enum vm_entry_cmds {
+ VEC_DEFAULT = 0,
+ VEC_DISCARD_INSTR, /* discard inst emul state */
+ VEC_COMPLETE_MMIO, /* entry includes result for mmio emul */
+ VEC_COMPLETE_INOUT, /* entry includes result for inout emul */
+};
+
+struct vm_entry {
+ int cpuid;
+ uint_t cmd; /* see: vm_entry_cmds */
+ void *exit_data;
+ union {
+ struct vm_inout inout;
+ struct vm_mmio mmio;
+ } u;
+};
+
void vm_inject_pf(void *vm, int vcpuid, int error_code, uint64_t cr2);
int vm_restart_instruction(void *vm, int vcpuid);
diff --git a/usr/src/uts/i86pc/sys/vmm_dev.h b/usr/src/uts/i86pc/sys/vmm_dev.h
index 40e0857945..4e89b712dc 100644
--- a/usr/src/uts/i86pc/sys/vmm_dev.h
+++ b/usr/src/uts/i86pc/sys/vmm_dev.h
@@ -83,11 +83,6 @@ struct vm_register_set {
uint64_t *regvals;
};
-struct vm_run {
- int cpuid;
- struct vm_exit vm_exit;
-};
-
struct vm_exception {
int cpuid;
int vector;