summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJerry Jelinek <jerry.jelinek@joyent.com>2020-08-24 22:26:14 +0000
committerJerry Jelinek <jerry.jelinek@joyent.com>2020-08-24 22:26:14 +0000
commit360b48fad3374e5a37647e51de850a2b5cc8122a (patch)
tree97b8ae5c55b535b71a341ea9cbebad633c229453
parentb691ee30a82b4152e726474e6ee079bea99c48c0 (diff)
parent0a9a25a293d437b1563e1d8479fef8f3795ba817 (diff)
downloadillumos-joyent-360b48fad3374e5a37647e51de850a2b5cc8122a.tar.gz
[illumos-gate merge]
commit 0a9a25a293d437b1563e1d8479fef8f3795ba817 13021 Invalid state if bindtextdomain() fails during re-binding commit 5fae793b9b02afd1f3f434f3a915a64c08edc7b2 13062 loader: make env_discard() public commit b89fb824168e36795c325b5e0a4d1e71dd2d8d65 13068 gptzfsboot: command from /boot/config should be nul terminated commit 8548ec78d85644539a23c4262ed2b9512a47865e 13065 log when suspending a zpool commit 84971882a96ac0fecd538b02208054a872ff8af3 12996 bhyve kernel should be wscheck clean commit e0c0d44e917080841514d0dd031a696c74e8c435 12989 improve interface boundary for bhyve MMIO 12990 improve interface boundary for bhyve ins/outs 12991 bhyve vlapic should SIPI more carefully commit 76f19f5fdc974fe5be5c82a556e43a4df93f1de1 12983 Want additional prototypes for manual pages commit 341c5f490806c8b3e6e31512923db4c0e1b464b1 13041 i40e_get_available_resources() broken again for X722 part commit dcbbe9e03d9d35c714d909a1f4767ce83c885e80 13049 cstyle should tolerate freebsd headers
-rw-r--r--exception_lists/copyright1
-rw-r--r--exception_lists/cstyle2
-rw-r--r--exception_lists/hdrchk1
-rw-r--r--exception_lists/wscheck21
-rw-r--r--usr/src/boot/Makefile.version2
-rw-r--r--usr/src/boot/lib/libstand/environment.c247
-rw-r--r--usr/src/boot/lib/libstand/stand.h1
-rw-r--r--usr/src/boot/sys/boot/i386/gptzfsboot/zfsboot.c7
-rw-r--r--usr/src/cmd/bhyve/Makefile2
-rw-r--r--usr/src/cmd/bhyve/bhyverun.c202
-rw-r--r--usr/src/cmd/bhyve/inout.c160
-rw-r--r--usr/src/cmd/bhyve/inout.h7
-rw-r--r--usr/src/cmd/bhyve/mem.c59
-rw-r--r--usr/src/cmd/bhyve/mem.h4
-rw-r--r--usr/src/cmd/bhyve/task_switch.c161
-rw-r--r--usr/src/cmd/bhyvectl/bhyvectl.c25
-rw-r--r--usr/src/compat/bhyve/amd64/machine/cpufunc.h6
-rw-r--r--usr/src/compat/bhyve/amd64/machine/pmap.h2
-rw-r--r--usr/src/compat/bhyve/net/ieee_oui.h6
-rw-r--r--usr/src/compat/bhyve/x86/_types.h2
-rw-r--r--usr/src/lib/libc/port/i18n/gettext_real.c12
-rw-r--r--usr/src/lib/libvmmapi/common/vmmapi.c15
-rw-r--r--usr/src/lib/libvmmapi/common/vmmapi.h3
-rw-r--r--usr/src/pkg/manifests/system-test-libctest.mf4
-rw-r--r--usr/src/prototypes/prototype.man1104
-rw-r--r--usr/src/prototypes/prototype.man3x79
-rw-r--r--usr/src/prototypes/prototype.man7d49
-rw-r--r--usr/src/prototypes/prototype.man9e79
-rw-r--r--usr/src/prototypes/prototype.man9f78
-rw-r--r--usr/src/test/libc-tests/runfiles/default.run2
-rw-r--r--usr/src/test/libc-tests/tests/Makefile1
-rw-r--r--usr/src/test/libc-tests/tests/i18n/Makefile25
-rw-r--r--usr/src/test/libc-tests/tests/i18n/bindtextdomain_test.c143
-rw-r--r--usr/src/tools/scripts/cstyle.pl13
-rw-r--r--usr/src/uts/common/fs/zfs/zio.c5
-rw-r--r--usr/src/uts/common/io/i40e/core/i40e_common.c41
-rw-r--r--usr/src/uts/i86pc/io/vmm/amd/amdvi_hw.c18
-rw-r--r--usr/src/uts/i86pc/io/vmm/amd/amdvi_priv.h54
-rw-r--r--usr/src/uts/i86pc/io/vmm/amd/ivrs_drv.c38
-rw-r--r--usr/src/uts/i86pc/io/vmm/amd/npt.c7
-rw-r--r--usr/src/uts/i86pc/io/vmm/amd/npt.h2
-rw-r--r--usr/src/uts/i86pc/io/vmm/amd/svm.c249
-rw-r--r--usr/src/uts/i86pc/io/vmm/amd/svm_msr.c2
-rw-r--r--usr/src/uts/i86pc/io/vmm/amd/svm_softc.h6
-rw-r--r--usr/src/uts/i86pc/io/vmm/intel/ept.c2
-rw-r--r--usr/src/uts/i86pc/io/vmm/intel/vmcs.c2
-rw-r--r--usr/src/uts/i86pc/io/vmm/intel/vmx.c187
-rw-r--r--usr/src/uts/i86pc/io/vmm/intel/vmx_msr.c2
-rw-r--r--usr/src/uts/i86pc/io/vmm/intel/vtd.c6
-rw-r--r--usr/src/uts/i86pc/io/vmm/io/ppt.c2
-rw-r--r--usr/src/uts/i86pc/io/vmm/io/vatpic.c16
-rw-r--r--usr/src/uts/i86pc/io/vmm/io/vatpic.h12
-rw-r--r--usr/src/uts/i86pc/io/vmm/io/vatpit.c6
-rw-r--r--usr/src/uts/i86pc/io/vmm/io/vatpit.h8
-rw-r--r--usr/src/uts/i86pc/io/vmm/io/vhpet.c14
-rw-r--r--usr/src/uts/i86pc/io/vmm/io/vhpet.h2
-rw-r--r--usr/src/uts/i86pc/io/vmm/io/vlapic.c43
-rw-r--r--usr/src/uts/i86pc/io/vmm/io/vpmtmr.c2
-rw-r--r--usr/src/uts/i86pc/io/vmm/io/vpmtmr.h4
-rw-r--r--usr/src/uts/i86pc/io/vmm/io/vrtc.c10
-rw-r--r--usr/src/uts/i86pc/io/vmm/io/vrtc.h8
-rw-r--r--usr/src/uts/i86pc/io/vmm/sys/vmm_instruction_emul.h (renamed from usr/src/uts/i86pc/sys/vmm_instruction_emul.h)98
-rw-r--r--usr/src/uts/i86pc/io/vmm/sys/vmm_kernel.h9
-rw-r--r--usr/src/uts/i86pc/io/vmm/vmm.c394
-rw-r--r--usr/src/uts/i86pc/io/vmm/vmm_instruction_emul.c1075
-rw-r--r--usr/src/uts/i86pc/io/vmm/vmm_ioport.c156
-rw-r--r--usr/src/uts/i86pc/io/vmm/vmm_ioport.h5
-rw-r--r--usr/src/uts/i86pc/io/vmm/vmm_mem.c2
-rw-r--r--usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c59
-rw-r--r--usr/src/uts/i86pc/io/vmm/vmm_sol_glue.c4
-rw-r--r--usr/src/uts/i86pc/io/vmm/vmm_stat.c2
-rw-r--r--usr/src/uts/i86pc/io/vmm/vmm_stat.h10
-rw-r--r--usr/src/uts/i86pc/sys/vmm.h167
-rw-r--r--usr/src/uts/i86pc/sys/vmm_dev.h11
74 files changed, 2786 insertions, 1479 deletions
diff --git a/exception_lists/copyright b/exception_lists/copyright
index 05d12b3fbd..56b86378c8 100644
--- a/exception_lists/copyright
+++ b/exception_lists/copyright
@@ -579,7 +579,6 @@ usr/src/uts/i86pc/io/vmm/vmx_assym.s
usr/src/uts/i86pc/io/vmm/x86.[ch]
usr/src/uts/i86pc/sys/vmm.h
usr/src/uts/i86pc/sys/vmm_dev.h
-usr/src/uts/i86pc/sys/vmm_instruction_emul.h
usr/src/cmd/bhyve/README.license
usr/src/cmd/bhyvectl/README.license
usr/src/lib/libvmmapi/README.license
diff --git a/exception_lists/cstyle b/exception_lists/cstyle
index c03401a64f..9dc6d45d49 100644
--- a/exception_lists/cstyle
+++ b/exception_lists/cstyle
@@ -1434,6 +1434,7 @@ usr/src/uts/i86pc/io/vmm/amd/*.[ch]
usr/src/uts/i86pc/io/vmm/intel/*.[chs]
usr/src/uts/i86pc/io/vmm/io/*.[ch]
usr/src/uts/i86pc/io/vmm/sys/vmm_kernel.h
+usr/src/uts/i86pc/io/vmm/sys/vmm_instruction_emul.h
usr/src/uts/i86pc/io/vmm/vmm.c
usr/src/uts/i86pc/io/vmm/vmm_host.[ch]
usr/src/uts/i86pc/io/vmm/vmm_instruction_emul.c
@@ -1449,4 +1450,3 @@ usr/src/uts/i86pc/io/vmm/vmx_assym.s
usr/src/uts/i86pc/io/vmm/x86.[ch]
usr/src/uts/i86pc/sys/vmm.h
usr/src/uts/i86pc/sys/vmm_dev.h
-usr/src/uts/i86pc/sys/vmm_instruction_emul.h
diff --git a/exception_lists/hdrchk b/exception_lists/hdrchk
index 58a8a9d209..9440c114b5 100644
--- a/exception_lists/hdrchk
+++ b/exception_lists/hdrchk
@@ -437,4 +437,3 @@ usr/src/uts/i86pc/io/vmm/vmm_util.h
usr/src/uts/i86pc/io/vmm/x86.h
usr/src/uts/i86pc/sys/vmm.h
usr/src/uts/i86pc/sys/vmm_dev.h
-usr/src/uts/i86pc/sys/vmm_instruction_emul.h
diff --git a/exception_lists/wscheck b/exception_lists/wscheck
index cfba871041..a06ff9f3e1 100644
--- a/exception_lists/wscheck
+++ b/exception_lists/wscheck
@@ -174,27 +174,6 @@ usr/src/cmd/bhyve/usb_mouse.c
usr/src/cmd/bhyve/vga.[ch]
usr/src/cmd/bhyve/virtio.[ch]
usr/src/cmd/bhyve/xmsr.[ch]
-usr/src/cmd/bhyveconsole/bhyveconsole.c
usr/src/cmd/bhyvectl/bhyvectl.c
-usr/src/compat/bhyve/*
usr/src/contrib/bhyve/*
usr/src/lib/libvmmapi/common/vmmapi.[ch]
-usr/src/uts/i86pc/io/vmm/amd/*.[ch]
-usr/src/uts/i86pc/io/vmm/intel/*.[chs]
-usr/src/uts/i86pc/io/vmm/io/*.[ch]
-usr/src/uts/i86pc/io/vmm/vmm.c
-usr/src/uts/i86pc/io/vmm/vmm_host.[ch]
-usr/src/uts/i86pc/io/vmm/vmm_instruction_emul.c
-usr/src/uts/i86pc/io/vmm/vmm_ioport.[ch]
-usr/src/uts/i86pc/io/vmm/vmm_ipi.h
-usr/src/uts/i86pc/io/vmm/vmm_ktr.h
-usr/src/uts/i86pc/io/vmm/vmm_lapic.[ch]
-usr/src/uts/i86pc/io/vmm/vmm_mem.[ch]
-usr/src/uts/i86pc/io/vmm/vmm_sol_glue.c
-usr/src/uts/i86pc/io/vmm/vmm_stat.[ch]
-usr/src/uts/i86pc/io/vmm/vmm_util.[ch]
-usr/src/uts/i86pc/io/vmm/vmx_assym.s
-usr/src/uts/i86pc/io/vmm/x86.[ch]
-usr/src/uts/i86pc/sys/vmm.h
-usr/src/uts/i86pc/sys/vmm_dev.h
-usr/src/uts/i86pc/sys/vmm_instruction_emul.h
diff --git a/usr/src/boot/Makefile.version b/usr/src/boot/Makefile.version
index 5d93ec0e76..d1fd2200d6 100644
--- a/usr/src/boot/Makefile.version
+++ b/usr/src/boot/Makefile.version
@@ -33,4 +33,4 @@ LOADER_VERSION = 1.1
# Use date like formatting here, YYYY.MM.DD.XX, without leading zeroes.
# The version is processed from left to right, the version number can only
# be increased.
-BOOT_VERSION = $(LOADER_VERSION)-2020.08.05.1
+BOOT_VERSION = $(LOADER_VERSION)-2020.08.22.1
diff --git a/usr/src/boot/lib/libstand/environment.c b/usr/src/boot/lib/libstand/environment.c
index 291e330044..d3130d292e 100644
--- a/usr/src/boot/lib/libstand/environment.c
+++ b/usr/src/boot/lib/libstand/environment.c
@@ -1,4 +1,4 @@
-/*
+/*
* Copyright (c) 1998 Michael Smith.
* All rights reserved.
*
@@ -25,7 +25,6 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
/*
* Manage an environment-like space in which string variables may be stored.
@@ -37,9 +36,7 @@ __FBSDID("$FreeBSD$");
#include <string.h>
-static void env_discard(struct env_var *ev);
-
-struct env_var *environ = NULL;
+struct env_var *environ = NULL;
/*
* Look up (name) and return it's env_var structure.
@@ -47,12 +44,12 @@ struct env_var *environ = NULL;
struct env_var *
env_getenv(const char *name)
{
- struct env_var *ev;
-
- for (ev = environ; ev != NULL; ev = ev->ev_next)
- if (!strcmp(ev->ev_name, name))
- break;
- return(ev);
+ struct env_var *ev;
+
+ for (ev = environ; ev != NULL; ev = ev->ev_next)
+ if (strcmp(ev->ev_name, name) == 0)
+ break;
+ return (ev);
}
/*
@@ -65,159 +62,157 @@ env_getenv(const char *name)
*/
int
env_setenv(const char *name, int flags, const void *value,
- ev_sethook_t sethook, ev_unsethook_t unsethook)
+ ev_sethook_t sethook, ev_unsethook_t unsethook)
{
- struct env_var *ev, *curr, *last;
-
- if ((ev = env_getenv(name)) != NULL) {
- /*
- * If there's a set hook, let it do the work (unless we are working
- * for one already.
- */
- if ((ev->ev_sethook != NULL) && !(flags & EV_NOHOOK))
- return (ev->ev_sethook(ev, flags, value));
-
- /* If there is data in the variable, discard it. */
- if (ev->ev_value != NULL && (ev->ev_flags & EV_DYNAMIC) != 0)
- free(ev->ev_value);
- ev->ev_value = NULL;
- ev->ev_flags &= ~EV_DYNAMIC;
-
- } else {
-
- /*
- * New variable; create and sort into list
- */
- ev = malloc(sizeof(struct env_var));
- ev->ev_name = strdup(name);
- ev->ev_value = NULL;
- ev->ev_flags = 0;
- /* hooks can only be set when the variable is instantiated */
- ev->ev_sethook = sethook;
- ev->ev_unsethook = unsethook;
-
- /* Sort into list */
- ev->ev_prev = NULL;
- ev->ev_next = NULL;
- /* Search for the record to insert before */
- for (last = NULL, curr = environ;
- curr != NULL;
- last = curr, curr = curr->ev_next) {
-
- if (strcmp(ev->ev_name, curr->ev_name) < 0) {
- if (curr->ev_prev) {
- curr->ev_prev->ev_next = ev;
- } else {
- environ = ev;
+ struct env_var *ev, *curr, *last;
+
+ if ((ev = env_getenv(name)) != NULL) {
+ /*
+ * If there's a set hook, let it do the work
+ * (unless we are working for one already).
+ */
+ if ((ev->ev_sethook != NULL) && !(flags & EV_NOHOOK))
+ return (ev->ev_sethook(ev, flags, value));
+
+ /* If there is data in the variable, discard it. */
+ if (ev->ev_value != NULL && (ev->ev_flags & EV_DYNAMIC) != 0)
+ free(ev->ev_value);
+ ev->ev_value = NULL;
+ ev->ev_flags &= ~EV_DYNAMIC;
+
+ } else {
+ /*
+ * New variable; create and sort into list
+ */
+ ev = malloc(sizeof (struct env_var));
+ ev->ev_name = strdup(name);
+ ev->ev_value = NULL;
+ ev->ev_flags = 0;
+ /* hooks can only be set when the variable is instantiated */
+ ev->ev_sethook = sethook;
+ ev->ev_unsethook = unsethook;
+
+ /* Sort into list */
+ ev->ev_prev = NULL;
+ ev->ev_next = NULL;
+ /* Search for the record to insert before */
+ for (last = NULL, curr = environ; curr != NULL;
+ last = curr, curr = curr->ev_next) {
+
+ if (strcmp(ev->ev_name, curr->ev_name) < 0) {
+ if (curr->ev_prev) {
+ curr->ev_prev->ev_next = ev;
+ } else {
+ environ = ev;
+ }
+ ev->ev_next = curr;
+ ev->ev_prev = curr->ev_prev;
+ curr->ev_prev = ev;
+ break;
+ }
+ }
+ if (curr == NULL) {
+ if (last == NULL) {
+ environ = ev;
+ } else {
+ last->ev_next = ev;
+ ev->ev_prev = last;
+ }
}
- ev->ev_next = curr;
- ev->ev_prev = curr->ev_prev;
- curr->ev_prev = ev;
- break;
- }
}
- if (curr == NULL) {
- if (last == NULL) {
- environ = ev;
- } else {
- last->ev_next = ev;
- ev->ev_prev = last;
- }
+
+ /* If we have a new value, use it */
+ if (flags & EV_VOLATILE) {
+ ev->ev_value = strdup(value);
+ ev->ev_flags |= EV_DYNAMIC;
+ } else {
+ ev->ev_value = (char *)value;
+ ev->ev_flags |= flags & EV_DYNAMIC;
}
- }
-
- /* If we have a new value, use it */
- if (flags & EV_VOLATILE) {
- ev->ev_value = strdup(value);
- ev->ev_flags |= EV_DYNAMIC;
- } else {
- ev->ev_value = (char *)value;
- ev->ev_flags |= flags & EV_DYNAMIC;
- }
-
- return(0);
+
+ return (0);
}
char *
getenv(const char *name)
{
- struct env_var *ev;
-
- /* Set but no value gives empty string */
- if ((ev = env_getenv(name)) != NULL) {
- if (ev->ev_value != NULL)
- return(ev->ev_value);
- return("");
- }
- return(NULL);
+ struct env_var *ev;
+
+ /* Set but no value gives empty string */
+ if ((ev = env_getenv(name)) != NULL) {
+ if (ev->ev_value != NULL)
+ return (ev->ev_value);
+ return ("");
+ }
+ return (NULL);
}
int
setenv(const char *name, const char *value, int overwrite)
{
- /* No guarantees about state, always assume volatile */
- if (overwrite || (env_getenv(name) == NULL))
- return(env_setenv(name, EV_VOLATILE, value, NULL, NULL));
- return(0);
+ /* No guarantees about state, always assume volatile */
+ if (overwrite || (env_getenv(name) == NULL))
+ return (env_setenv(name, EV_VOLATILE, value, NULL, NULL));
+ return (0);
}
int
putenv(const char *string)
{
- char *value, *copy;
- int result;
-
- copy = strdup(string);
- if ((value = strchr(copy, '=')) != NULL)
- *(value++) = 0;
- result = setenv(copy, value, 1);
- free(copy);
- return(result);
+ char *value, *copy;
+ int result;
+
+ copy = strdup(string);
+ if ((value = strchr(copy, '=')) != NULL)
+ *(value++) = 0;
+ result = setenv(copy, value, 1);
+ free(copy);
+ return (result);
}
int
unsetenv(const char *name)
{
- struct env_var *ev;
- int err;
-
- err = 0;
- if ((ev = env_getenv(name)) == NULL) {
- err = ENOENT;
- } else {
- if (ev->ev_unsethook != NULL)
- err = ev->ev_unsethook(ev);
- if (err == 0) {
- env_discard(ev);
+ struct env_var *ev;
+ int err;
+
+ err = 0;
+ if ((ev = env_getenv(name)) == NULL) {
+ err = ENOENT;
+ } else {
+ if (ev->ev_unsethook != NULL)
+ err = ev->ev_unsethook(ev);
+ if (err == 0) {
+ env_discard(ev);
+ }
}
- }
- return(err);
+ return (err);
}
-static void
+void
env_discard(struct env_var *ev)
{
- if (ev->ev_prev)
- ev->ev_prev->ev_next = ev->ev_next;
- if (ev->ev_next)
- ev->ev_next->ev_prev = ev->ev_prev;
- if (environ == ev)
- environ = ev->ev_next;
- free(ev->ev_name);
- if (ev->ev_value != NULL && (ev->ev_flags & EV_DYNAMIC) != 0)
- free(ev->ev_value);
- free(ev);
+ if (ev->ev_prev)
+ ev->ev_prev->ev_next = ev->ev_next;
+ if (ev->ev_next)
+ ev->ev_next->ev_prev = ev->ev_prev;
+ if (environ == ev)
+ environ = ev->ev_next;
+ free(ev->ev_name);
+ if (ev->ev_value != NULL && (ev->ev_flags & EV_DYNAMIC) != 0)
+ free(ev->ev_value);
+ free(ev);
}
int
env_noset(struct env_var *ev __unused, int flags __unused,
const void *value __unused)
{
- return(EPERM);
+ return (EPERM);
}
int
env_nounset(struct env_var *ev __unused)
{
- return(EPERM);
+ return (EPERM);
}
diff --git a/usr/src/boot/lib/libstand/stand.h b/usr/src/boot/lib/libstand/stand.h
index 5f94a7fa58..63595f2956 100644
--- a/usr/src/boot/lib/libstand/stand.h
+++ b/usr/src/boot/lib/libstand/stand.h
@@ -345,6 +345,7 @@ extern struct env_var *env_getenv(const char *name);
extern int env_setenv(const char *name, int flags,
const void *value, ev_sethook_t sethook,
ev_unsethook_t unsethook);
+extern void env_discard(struct env_var *);
extern char *getenv(const char *name);
extern int setenv(const char *name, const char *value,
int overwrite);
diff --git a/usr/src/boot/sys/boot/i386/gptzfsboot/zfsboot.c b/usr/src/boot/sys/boot/i386/gptzfsboot/zfsboot.c
index 872ad3d3bc..63ba02968a 100644
--- a/usr/src/boot/sys/boot/i386/gptzfsboot/zfsboot.c
+++ b/usr/src/boot/sys/boot/i386/gptzfsboot/zfsboot.c
@@ -205,7 +205,12 @@ main(void)
fd = open(PATH_DOTCONFIG, O_RDONLY);
if (fd != -1) {
- read(fd, cmd, sizeof (cmd));
+ ssize_t cmdlen;
+
+ if ((cmdlen = read(fd, cmd, sizeof (cmd))) > 0)
+ cmd[cmdlen] = '\0';
+ else
+ *cmd = '\0';
close(fd);
}
diff --git a/usr/src/cmd/bhyve/Makefile b/usr/src/cmd/bhyve/Makefile
index 914f41d1f4..4dc737c768 100644
--- a/usr/src/cmd/bhyve/Makefile
+++ b/usr/src/cmd/bhyve/Makefile
@@ -12,6 +12,7 @@
#
# Copyright 2014 Pluribus Networks Inc.
# Copyright 2020 Joyent, Inc.
+# Copyright 2020 Oxide Computer Company
#
PROG = bhyve
@@ -74,7 +75,6 @@ SRCS = acpi.c \
usb_mouse.c \
vga.c \
virtio.c \
- vmm_instruction_emul.c \
vmgenc.c \
xmsr.c \
spinup_ap.c \
diff --git a/usr/src/cmd/bhyve/bhyverun.c b/usr/src/cmd/bhyve/bhyverun.c
index 18bfda76f0..bb3e0721c8 100644
--- a/usr/src/cmd/bhyve/bhyverun.c
+++ b/usr/src/cmd/bhyve/bhyverun.c
@@ -217,6 +217,7 @@ static cpuset_t cpumask;
static void vm_loop(struct vmctx *ctx, int vcpu, uint64_t rip);
static struct vm_exit vmexit[VM_MAXCPU];
+static struct vm_entry vmentry[VM_MAXCPU];
struct bhyvestats {
uint64_t vmexit_bogus;
@@ -224,15 +225,18 @@ struct bhyvestats {
uint64_t vmexit_hlt;
uint64_t vmexit_pause;
uint64_t vmexit_mtrap;
- uint64_t vmexit_inst_emul;
+ uint64_t vmexit_mmio;
+ uint64_t vmexit_inout;
uint64_t cpu_switch_rotate;
uint64_t cpu_switch_direct;
+ uint64_t mmio_unhandled;
} stats;
struct mt_vmm_info {
pthread_t mt_thr;
struct vmctx *mt_ctx;
- int mt_vcpu;
+ int mt_vcpu;
+ uint64_t mt_startrip;
} mt_vmm_info[VM_MAXCPU];
#ifdef __FreeBSD__
@@ -502,7 +506,7 @@ fbsdrun_start_thread(void *param)
if (gdb_port != 0)
gdb_cpu_add(vcpu);
- vm_loop(mtp->mt_ctx, vcpu, vmexit[vcpu].rip);
+ vm_loop(mtp->mt_ctx, vcpu, mtp->mt_startrip);
/* not reached */
exit(1);
@@ -543,11 +547,9 @@ fbsdrun_addcpu(struct vmctx *ctx, int fromcpu, int newcpu, uint64_t rip,
* Set up the vmexit struct to allow execution to start
* at the given RIP
*/
- vmexit[newcpu].rip = rip;
- vmexit[newcpu].inst_length = 0;
-
mt_vmm_info[newcpu].mt_ctx = ctx;
mt_vmm_info[newcpu].mt_vcpu = newcpu;
+ mt_vmm_info[newcpu].mt_startrip = rip;
error = pthread_create(&mt_vmm_info[newcpu].mt_thr, NULL,
fbsdrun_start_thread, &mt_vmm_info[newcpu]);
@@ -567,6 +569,66 @@ fbsdrun_deletecpu(struct vmctx *ctx, int vcpu)
return (CPU_EMPTY(&cpumask));
}
+static void
+vmentry_mmio_read(int vcpu, uint64_t gpa, uint8_t bytes, uint64_t data)
+{
+ struct vm_entry *entry = &vmentry[vcpu];
+ struct vm_mmio *mmio = &entry->u.mmio;
+
+ assert(entry->cmd == VEC_DEFAULT);
+
+ entry->cmd = VEC_COMPLETE_MMIO;
+ mmio->bytes = bytes;
+ mmio->read = 1;
+ mmio->gpa = gpa;
+ mmio->data = data;
+}
+
+static void
+vmentry_mmio_write(int vcpu, uint64_t gpa, uint8_t bytes)
+{
+ struct vm_entry *entry = &vmentry[vcpu];
+ struct vm_mmio *mmio = &entry->u.mmio;
+
+ assert(entry->cmd == VEC_DEFAULT);
+
+ entry->cmd = VEC_COMPLETE_MMIO;
+ mmio->bytes = bytes;
+ mmio->read = 0;
+ mmio->gpa = gpa;
+ mmio->data = 0;
+}
+
+static void
+vmentry_inout_read(int vcpu, uint16_t port, uint8_t bytes, uint32_t data)
+{
+ struct vm_entry *entry = &vmentry[vcpu];
+ struct vm_inout *inout = &entry->u.inout;
+
+ assert(entry->cmd == VEC_DEFAULT);
+
+ entry->cmd = VEC_COMPLETE_INOUT;
+ inout->bytes = bytes;
+ inout->flags = INOUT_IN;
+ inout->port = port;
+ inout->eax = data;
+}
+
+static void
+vmentry_inout_write(int vcpu, uint16_t port, uint8_t bytes)
+{
+ struct vm_entry *entry = &vmentry[vcpu];
+ struct vm_inout *inout = &entry->u.inout;
+
+ assert(entry->cmd == VEC_DEFAULT);
+
+ entry->cmd = VEC_COMPLETE_INOUT;
+ inout->bytes = bytes;
+ inout->flags = 0;
+ inout->port = port;
+ inout->eax = 0;
+}
+
static int
vmexit_handle_notify(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu,
uint32_t eax)
@@ -583,30 +645,42 @@ static int
vmexit_inout(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu)
{
int error;
- int bytes, port, in, out;
int vcpu;
+ struct vm_inout inout;
+ bool in;
+ uint8_t bytes;
- vcpu = *pvcpu;
+ stats.vmexit_inout++;
- port = vme->u.inout.port;
- bytes = vme->u.inout.bytes;
- in = vme->u.inout.in;
- out = !in;
+ vcpu = *pvcpu;
+ inout = vme->u.inout;
+ in = (inout.flags & INOUT_IN) != 0;
+ bytes = inout.bytes;
/* Extra-special case of host notifications */
- if (out && port == GUEST_NIO_PORT) {
- error = vmexit_handle_notify(ctx, vme, pvcpu, vme->u.inout.eax);
+ if (!in && inout.port == GUEST_NIO_PORT) {
+ error = vmexit_handle_notify(ctx, vme, pvcpu, inout.eax);
+ vmentry_inout_write(vcpu, inout.port, bytes);
return (error);
}
- error = emulate_inout(ctx, vcpu, vme, strictio);
+ error = emulate_inout(ctx, vcpu, &inout, strictio != 0);
if (error) {
fprintf(stderr, "Unhandled %s%c 0x%04x at 0x%lx\n",
in ? "in" : "out",
bytes == 1 ? 'b' : (bytes == 2 ? 'w' : 'l'),
- port, vmexit->rip);
+ inout.port, vmexit->rip);
return (VMEXIT_ABORT);
} else {
+ /*
+ * Communicate the status of the inout operation back to the
+ * in-kernel instruction emulation.
+ */
+ if (in) {
+ vmentry_inout_read(vcpu, inout.port, bytes, inout.eax);
+ } else {
+ vmentry_inout_write(vcpu, inout.port, bytes);
+ }
return (VMEXIT_CONTINUE);
}
}
@@ -796,29 +870,70 @@ vmexit_mtrap(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
static int
vmexit_inst_emul(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
{
- int err, i;
- struct vie *vie;
+ uint8_t i, valid;
+
+ fprintf(stderr, "Failed to emulate instruction sequence ");
+
+ valid = vmexit->u.inst_emul.num_valid;
+ if (valid != 0) {
+ assert(valid <= sizeof (vmexit->u.inst_emul.inst));
+ fprintf(stderr, "[");
+ for (i = 0; i < valid; i++) {
+ if (i == 0) {
+ fprintf(stderr, "%02x",
+ vmexit->u.inst_emul.inst[i]);
+ } else {
+ fprintf(stderr, ", %02x",
+ vmexit->u.inst_emul.inst[i]);
+ }
+ }
+ fprintf(stderr, "] ");
+ }
+ fprintf(stderr, "@ %rip = %x\n", vmexit->rip);
- stats.vmexit_inst_emul++;
+ return (VMEXIT_ABORT);
+}
- vie = &vmexit->u.inst_emul.vie;
- err = emulate_mem(ctx, *pvcpu, vmexit->u.inst_emul.gpa,
- vie, &vmexit->u.inst_emul.paging);
+static int
+vmexit_mmio(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
+{
+ int vcpu, err;
+ struct vm_mmio mmio;
+ bool is_read;
- if (err) {
- if (err == ESRCH) {
- EPRINTLN("Unhandled memory access to 0x%lx\n",
- vmexit->u.inst_emul.gpa);
- }
+ stats.vmexit_mmio++;
- fprintf(stderr, "Failed to emulate instruction sequence [ ");
- for (i = 0; i < vie->num_valid; i++)
- fprintf(stderr, "%02x", vie->inst[i]);
- FPRINTLN(stderr, " ] at 0x%lx", vmexit->rip);
- return (VMEXIT_ABORT);
+ vcpu = *pvcpu;
+ mmio = vmexit->u.mmio;
+ is_read = (mmio.read != 0);
+
+ err = emulate_mem(ctx, vcpu, &mmio);
+
+ if (err == ESRCH) {
+ fprintf(stderr, "Unhandled memory access to 0x%lx\n", mmio.gpa);
+ stats.mmio_unhandled++;
+
+ /*
+ * Access to non-existent physical addresses is not likely to
+ * result in fatal errors on hardware machines, but rather reads
+ * of all-ones or discarded-but-acknowledged writes.
+ */
+ mmio.data = ~0UL;
+ err = 0;
}
- return (VMEXIT_CONTINUE);
+ if (err == 0) {
+ if (is_read) {
+ vmentry_mmio_read(vcpu, mmio.gpa, mmio.bytes,
+ mmio.data);
+ } else {
+ vmentry_mmio_write(vcpu, mmio.gpa, mmio.bytes);
+ }
+ return (VMEXIT_CONTINUE);
+ }
+
+ fprintf(stderr, "Unhandled mmio error to 0x%lx: %d\n", mmio.gpa, err);
+ return (VMEXIT_ABORT);
}
static pthread_mutex_t resetcpu_mtx = PTHREAD_MUTEX_INITIALIZER;
@@ -888,7 +1003,7 @@ vmexit_breakpoint(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
static vmexit_handler_t handler[VM_EXITCODE_MAX] = {
[VM_EXITCODE_INOUT] = vmexit_inout,
- [VM_EXITCODE_INOUT_STR] = vmexit_inout,
+ [VM_EXITCODE_MMIO] = vmexit_mmio,
[VM_EXITCODE_VMX] = vmexit_vmx,
[VM_EXITCODE_SVM] = vmexit_svm,
[VM_EXITCODE_BOGUS] = vmexit_bogus,
@@ -910,6 +1025,8 @@ vm_loop(struct vmctx *ctx, int vcpu, uint64_t startrip)
int error, rc;
enum vm_exitcode exitcode;
cpuset_t active_cpus;
+ struct vm_exit *vexit;
+ struct vm_entry *ventry;
#ifdef __FreeBSD__
if (vcpumap[vcpu] != NULL) {
@@ -924,19 +1041,30 @@ vm_loop(struct vmctx *ctx, int vcpu, uint64_t startrip)
error = vm_set_register(ctx, vcpu, VM_REG_GUEST_RIP, startrip);
assert(error == 0);
+ ventry = &vmentry[vcpu];
+ vexit = &vmexit[vcpu];
+
while (1) {
- error = vm_run(ctx, vcpu, &vmexit[vcpu]);
+ error = vm_run(ctx, vcpu, ventry, vexit);
if (error != 0)
break;
- exitcode = vmexit[vcpu].exitcode;
+ if (ventry->cmd != VEC_DEFAULT) {
+ /*
+ * Discard any lingering entry state after it has been
+ * submitted via vm_run().
+ */
+ bzero(ventry, sizeof (*ventry));
+ }
+
+ exitcode = vexit->exitcode;
if (exitcode >= VM_EXITCODE_MAX || handler[exitcode] == NULL) {
fprintf(stderr, "vm_loop: unexpected exitcode 0x%x\n",
exitcode);
exit(4);
}
- rc = (*handler[exitcode])(ctx, &vmexit[vcpu], &vcpu);
+ rc = (*handler[exitcode])(ctx, vexit, &vcpu);
switch (rc) {
case VMEXIT_CONTINUE:
diff --git a/usr/src/cmd/bhyve/inout.c b/usr/src/cmd/bhyve/inout.c
index b460ee2988..27068023d3 100644
--- a/usr/src/cmd/bhyve/inout.c
+++ b/usr/src/cmd/bhyve/inout.c
@@ -27,6 +27,18 @@
*
* $FreeBSD$
*/
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ *
+ * Copyright 2020 Oxide Computer Company
+ */
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
@@ -40,7 +52,6 @@ __FBSDID("$FreeBSD$");
#include <x86/segments.h>
#include <machine/vmm.h>
-#include <machine/vmm_instruction_emul.h>
#include <vmmapi.h>
#include <stdio.h>
@@ -57,12 +68,14 @@ SET_DECLARE(inout_port_set, struct inout_port);
#define VERIFY_IOPORT(port, size) \
assert((port) >= 0 && (size) > 0 && ((port) + (size)) <= MAX_IOPORTS)
-static struct {
+struct inout_handler {
const char *name;
int flags;
inout_func_t handler;
void *arg;
-} inout_handlers[MAX_IOPORTS];
+};
+
+static struct inout_handler inout_handlers[MAX_IOPORTS];
static int
default_inout(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
@@ -85,11 +98,11 @@ default_inout(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
return (0);
}
-static void
+static void
register_default_iohandler(int start, int size)
{
struct inout_port iop;
-
+
VERIFY_IOPORT(start, size);
bzero(&iop, sizeof(iop));
@@ -103,136 +116,37 @@ register_default_iohandler(int start, int size)
}
int
-emulate_inout(struct vmctx *ctx, int vcpu, struct vm_exit *vmexit, int strict)
+emulate_inout(struct vmctx *ctx, int vcpu, struct vm_inout *inout, bool strict)
{
- int addrsize, bytes, flags, in, port, prot, rep;
- uint32_t eax, val;
- inout_func_t handler;
- void *arg;
- int error, fault, retval;
- enum vm_reg_name idxreg;
- uint64_t gla, index, iterations, count;
- struct vm_inout_str *vis;
- struct iovec iov[2];
-
- bytes = vmexit->u.inout.bytes;
- in = vmexit->u.inout.in;
- port = vmexit->u.inout.port;
-
- assert(port < MAX_IOPORTS);
+ struct inout_handler handler;
+ inout_func_t hfunc;
+ void *harg;
+ int error;
+ uint8_t bytes;
+ bool in;
+
+ bytes = inout->bytes;
+ in = (inout->flags & INOUT_IN) != 0;
+
assert(bytes == 1 || bytes == 2 || bytes == 4);
- handler = inout_handlers[port].handler;
+ handler = inout_handlers[inout->port];
+ hfunc = handler.handler;
+ harg = handler.arg;
- if (strict && handler == default_inout)
+ if (strict && hfunc == default_inout)
return (-1);
- flags = inout_handlers[port].flags;
- arg = inout_handlers[port].arg;
-
if (in) {
- if (!(flags & IOPORT_F_IN))
+ if (!(handler.flags & IOPORT_F_IN))
return (-1);
} else {
- if (!(flags & IOPORT_F_OUT))
+ if (!(handler.flags & IOPORT_F_OUT))
return (-1);
}
- retval = 0;
- if (vmexit->u.inout.string) {
- vis = &vmexit->u.inout_str;
- rep = vis->inout.rep;
- addrsize = vis->addrsize;
- prot = in ? PROT_WRITE : PROT_READ;
- assert(addrsize == 2 || addrsize == 4 || addrsize == 8);
-
- /* Index register */
- idxreg = in ? VM_REG_GUEST_RDI : VM_REG_GUEST_RSI;
- index = vis->index & vie_size2mask(addrsize);
-
- /* Count register */
- count = vis->count & vie_size2mask(addrsize);
-
- /* Limit number of back-to-back in/out emulations to 16 */
- iterations = MIN(count, 16);
- while (iterations > 0) {
- assert(retval == 0);
- if (vie_calculate_gla(vis->paging.cpu_mode,
- vis->seg_name, &vis->seg_desc, index, bytes,
- addrsize, prot, &gla)) {
- vm_inject_gp(ctx, vcpu);
- break;
- }
-
- error = vm_copy_setup(ctx, vcpu, &vis->paging, gla,
- bytes, prot, iov, nitems(iov), &fault);
- if (error) {
- retval = -1; /* Unrecoverable error */
- break;
- } else if (fault) {
- retval = 0; /* Resume guest to handle fault */
- break;
- }
-
- if (vie_alignment_check(vis->paging.cpl, bytes,
- vis->cr0, vis->rflags, gla)) {
- vm_inject_ac(ctx, vcpu, 0);
- break;
- }
-
- val = 0;
- if (!in)
- vm_copyin(ctx, vcpu, iov, &val, bytes);
-
- retval = handler(ctx, vcpu, in, port, bytes, &val, arg);
- if (retval != 0)
- break;
-
- if (in)
- vm_copyout(ctx, vcpu, &val, iov, bytes);
-
- /* Update index */
- if (vis->rflags & PSL_D)
- index -= bytes;
- else
- index += bytes;
-
- count--;
- iterations--;
- }
-
- /* Update index register */
- error = vie_update_register(ctx, vcpu, idxreg, index, addrsize);
- assert(error == 0);
-
- /*
- * Update count register only if the instruction had a repeat
- * prefix.
- */
- if (rep) {
- error = vie_update_register(ctx, vcpu, VM_REG_GUEST_RCX,
- count, addrsize);
- assert(error == 0);
- }
-
- /* Restart the instruction if more iterations remain */
- if (retval == 0 && count != 0) {
- error = vm_restart_instruction(ctx, vcpu);
- assert(error == 0);
- }
- } else {
- eax = vmexit->u.inout.eax;
- val = eax & vie_size2mask(bytes);
- retval = handler(ctx, vcpu, in, port, bytes, &val, arg);
- if (retval == 0 && in) {
- eax &= ~vie_size2mask(bytes);
- eax |= val & vie_size2mask(bytes);
- error = vm_set_register(ctx, vcpu, VM_REG_GUEST_RAX,
- eax);
- assert(error == 0);
- }
- }
- return (retval);
+ error = hfunc(ctx, vcpu, in, inout->port, bytes, &inout->eax, harg);
+ return (error);
}
void
diff --git a/usr/src/cmd/bhyve/inout.h b/usr/src/cmd/bhyve/inout.h
index b72ee5d93e..b026e18e92 100644
--- a/usr/src/cmd/bhyve/inout.h
+++ b/usr/src/cmd/bhyve/inout.h
@@ -47,6 +47,7 @@
struct vmctx;
struct vm_exit;
+struct vm_inout;
/*
* inout emulation handlers return 0 on success and -1 on failure.
@@ -82,10 +83,10 @@ struct inout_port {
0 \
}; \
DATA_SET(inout_port_set, __CONCAT(__inout_port, __LINE__))
-
+
void init_inout(void);
-int emulate_inout(struct vmctx *, int vcpu, struct vm_exit *vmexit,
- int strict);
+int emulate_inout(struct vmctx *, int vcpu, struct vm_inout *inout,
+ bool strict);
int register_inout(struct inout_port *iop);
int unregister_inout(struct inout_port *iop);
void init_bvmcons(void);
diff --git a/usr/src/cmd/bhyve/mem.c b/usr/src/cmd/bhyve/mem.c
index 90aefe45c8..1afc8bf5f0 100644
--- a/usr/src/cmd/bhyve/mem.c
+++ b/usr/src/cmd/bhyve/mem.c
@@ -27,6 +27,18 @@
*
* $FreeBSD$
*/
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ *
+ * Copyright 2020 Oxide Computer Company
+ */
/*
* Memory ranges are represented with an RB tree. On insertion, the range
@@ -41,7 +53,6 @@ __FBSDID("$FreeBSD$");
#include <sys/errno.h>
#include <sys/tree.h>
#include <machine/vmm.h>
-#include <machine/vmm_instruction_emul.h>
#include <assert.h>
#include <err.h>
@@ -96,7 +107,7 @@ mmio_rb_lookup(struct mmio_rb_tree *rbt, uint64_t addr,
*entry = res;
return (0);
}
-
+
return (ENOENT);
}
@@ -170,7 +181,7 @@ access_memory(struct vmctx *ctx, int vcpu, uint64_t paddr, mem_cb_t *cb,
{
struct mmio_rb_range *entry;
int err, perror, immutable;
-
+
pthread_rwlock_rdlock(&mmio_rwlock);
/*
* First check the per-vCPU cache
@@ -185,7 +196,7 @@ access_memory(struct vmctx *ctx, int vcpu, uint64_t paddr, mem_cb_t *cb,
if (entry == NULL) {
if (mmio_rb_lookup(&mmio_rb_root, paddr, &entry) == 0) {
/* Update the per-vCPU cache */
- mmio_hint[vcpu] = entry;
+ mmio_hint[vcpu] = entry;
} else if (mmio_rb_lookup(&mmio_rb_fallback, paddr, &entry)) {
perror = pthread_rwlock_unlock(&mmio_rwlock);
assert(perror == 0);
@@ -223,32 +234,28 @@ access_memory(struct vmctx *ctx, int vcpu, uint64_t paddr, mem_cb_t *cb,
return (err);
}
-struct emulate_mem_args {
- struct vie *vie;
- struct vm_guest_paging *paging;
-};
-
static int
emulate_mem_cb(struct vmctx *ctx, int vcpu, uint64_t paddr, struct mem_range *mr,
void *arg)
{
- struct emulate_mem_args *ema;
+ struct vm_mmio *mmio;
+ int err = 0;
+
+ mmio = arg;
- ema = arg;
- return (vmm_emulate_instruction(ctx, vcpu, paddr, ema->vie, ema->paging,
- mem_read, mem_write, mr));
+ if (mmio->read != 0) {
+ err = mem_read(ctx, vcpu, paddr, &mmio->data, mmio->bytes, mr);
+ } else {
+ err = mem_write(ctx, vcpu, paddr, mmio->data, mmio->bytes, mr);
+ }
+
+ return (err);
}
int
-emulate_mem(struct vmctx *ctx, int vcpu, uint64_t paddr, struct vie *vie,
- struct vm_guest_paging *paging)
-
+emulate_mem(struct vmctx *ctx, int vcpu, struct vm_mmio *mmio)
{
- struct emulate_mem_args ema;
-
- ema.vie = vie;
- ema.paging = paging;
- return (access_memory(ctx, vcpu, paddr, emulate_mem_cb, &ema));
+ return (access_memory(ctx, vcpu, mmio->gpa, emulate_mem_cb, mmio));
}
struct rw_mem_args {
@@ -333,23 +340,23 @@ register_mem_fallback(struct mem_range *memp)
return (register_mem_int(&mmio_rb_fallback, memp));
}
-int
+int
unregister_mem(struct mem_range *memp)
{
struct mem_range *mr;
struct mmio_rb_range *entry = NULL;
int err, perror, i;
-
+
pthread_rwlock_wrlock(&mmio_rwlock);
err = mmio_rb_lookup(&mmio_rb_root, memp->base, &entry);
if (err == 0) {
mr = &entry->mr_param;
assert(mr->name == memp->name);
- assert(mr->base == memp->base && mr->size == memp->size);
+ assert(mr->base == memp->base && mr->size == memp->size);
assert((mr->flags & MEM_F_IMMUTABLE) == 0);
RB_REMOVE(mmio_rb_tree, &mmio_rb_root, entry);
- /* flush Per-vCPU cache */
+ /* flush Per-vCPU cache */
for (i=0; i < VM_MAXCPU; i++) {
if (mmio_hint[i] == entry)
mmio_hint[i] = NULL;
@@ -360,7 +367,7 @@ unregister_mem(struct mem_range *memp)
if (entry)
free(entry);
-
+
return (err);
}
diff --git a/usr/src/cmd/bhyve/mem.h b/usr/src/cmd/bhyve/mem.h
index 38d773c43f..8b81b93a02 100644
--- a/usr/src/cmd/bhyve/mem.h
+++ b/usr/src/cmd/bhyve/mem.h
@@ -53,8 +53,8 @@ struct mem_range {
#define MEM_F_IMMUTABLE 0x4 /* mem_range cannot be unregistered */
void init_mem(void);
-int emulate_mem(struct vmctx *, int vcpu, uint64_t paddr, struct vie *vie,
- struct vm_guest_paging *paging);
+
+int emulate_mem(struct vmctx *ctx, int vcpu, struct vm_mmio *mmio);
int read_mem(struct vmctx *ctx, int vcpu, uint64_t gpa, uint64_t *rval,
int size);
diff --git a/usr/src/cmd/bhyve/task_switch.c b/usr/src/cmd/bhyve/task_switch.c
index f1b564d560..c4a087b54f 100644
--- a/usr/src/cmd/bhyve/task_switch.c
+++ b/usr/src/cmd/bhyve/task_switch.c
@@ -25,6 +25,18 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ *
+ * Copyright 2020 Oxide Computer Company
+ */
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
@@ -37,7 +49,6 @@ __FBSDID("$FreeBSD$");
#include <x86/segments.h>
#include <x86/specialreg.h>
#include <machine/vmm.h>
-#include <machine/vmm_instruction_emul.h>
#include <assert.h>
#include <errno.h>
@@ -618,6 +629,150 @@ tss32_restore(struct vmctx *ctx, int vcpu, struct vm_task_switch *ts,
return (0);
}
+
+/*
+ * Copy of vie_alignment_check() from vmm_instruction_emul.c
+ */
+static int
+alignment_check(int cpl, int size, uint64_t cr0, uint64_t rf, uint64_t gla)
+{
+ assert(size == 1 || size == 2 || size == 4 || size == 8);
+ assert(cpl >= 0 && cpl <= 3);
+
+ if (cpl != 3 || (cr0 & CR0_AM) == 0 || (rf & PSL_AC) == 0)
+ return (0);
+
+ return ((gla & (size - 1)) ? 1 : 0);
+}
+
+/*
+ * Copy of vie_size2mask() from vmm_instruction_emul.c
+ */
+static uint64_t
+size2mask(int size)
+{
+ switch (size) {
+ case 1:
+ return (0xff);
+ case 2:
+ return (0xffff);
+ case 4:
+ return (0xffffffff);
+ case 8:
+ return (0xffffffffffffffff);
+ default:
+ assert(0);
+ /* not reached */
+ return (0);
+ }
+}
+
+/*
+ * Copy of vie_calculate_gla() from vmm_instruction_emul.c
+ */
+static int
+calculate_gla(enum vm_cpu_mode cpu_mode, enum vm_reg_name seg,
+ struct seg_desc *desc, uint64_t offset, int length, int addrsize,
+ int prot, uint64_t *gla)
+{
+ uint64_t firstoff, low_limit, high_limit, segbase;
+ int glasize, type;
+
+ assert(seg >= VM_REG_GUEST_ES && seg <= VM_REG_GUEST_GS);
+ assert((length == 1 || length == 2 || length == 4 || length == 8));
+ assert((prot & ~(PROT_READ | PROT_WRITE)) == 0);
+
+ firstoff = offset;
+ if (cpu_mode == CPU_MODE_64BIT) {
+ assert(addrsize == 4 || addrsize == 8);
+ glasize = 8;
+ } else {
+ assert(addrsize == 2 || addrsize == 4);
+ glasize = 4;
+ /*
+ * If the segment selector is loaded with a NULL selector
+ * then the descriptor is unusable and attempting to use
+ * it results in a #GP(0).
+ */
+ if (SEG_DESC_UNUSABLE(desc->access))
+ return (-1);
+
+ /*
+ * The processor generates a #NP exception when a segment
+ * register is loaded with a selector that points to a
+ * descriptor that is not present. If this was the case then
+ * it would have been checked before the VM-exit.
+ */
+ assert(SEG_DESC_PRESENT(desc->access));
+
+ /*
+ * The descriptor type must indicate a code/data segment.
+ */
+ type = SEG_DESC_TYPE(desc->access);
+ assert(type >= 16 && type <= 31);
+
+ if (prot & PROT_READ) {
+ /* #GP on a read access to a exec-only code segment */
+ if ((type & 0xA) == 0x8)
+ return (-1);
+ }
+
+ if (prot & PROT_WRITE) {
+ /*
+ * #GP on a write access to a code segment or a
+ * read-only data segment.
+ */
+ if (type & 0x8) /* code segment */
+ return (-1);
+
+ if ((type & 0xA) == 0) /* read-only data seg */
+ return (-1);
+ }
+
+ /*
+ * 'desc->limit' is fully expanded taking granularity into
+ * account.
+ */
+ if ((type & 0xC) == 0x4) {
+ /* expand-down data segment */
+ low_limit = desc->limit + 1;
+ high_limit = SEG_DESC_DEF32(desc->access) ?
+ 0xffffffff : 0xffff;
+ } else {
+ /* code segment or expand-up data segment */
+ low_limit = 0;
+ high_limit = desc->limit;
+ }
+
+ while (length > 0) {
+ offset &= size2mask(addrsize);
+ if (offset < low_limit || offset > high_limit)
+ return (-1);
+ offset++;
+ length--;
+ }
+ }
+
+ /*
+ * In 64-bit mode all segments except %fs and %gs have a segment
+ * base address of 0.
+ */
+ if (cpu_mode == CPU_MODE_64BIT && seg != VM_REG_GUEST_FS &&
+ seg != VM_REG_GUEST_GS) {
+ segbase = 0;
+ } else {
+ segbase = desc->base;
+ }
+
+ /*
+ * Truncate 'firstoff' to the effective address size before adding
+ * it to the segment base.
+ */
+ firstoff &= size2mask(addrsize);
+ *gla = (segbase + firstoff) & size2mask(glasize);
+ return (0);
+}
+
/*
* Push an error code on the stack of the new task. This is needed if the
* task switch was triggered by a hardware exception that causes an error
@@ -667,14 +822,14 @@ push_errcode(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging,
esp = GETREG(ctx, vcpu, VM_REG_GUEST_RSP);
esp -= bytes;
- if (vie_calculate_gla(paging->cpu_mode, VM_REG_GUEST_SS,
+ if (calculate_gla(paging->cpu_mode, VM_REG_GUEST_SS,
&seg_desc, esp, bytes, stacksize, PROT_WRITE, &gla)) {
sel_exception(ctx, vcpu, IDT_SS, stacksel, 1);
*faultptr = 1;
return (0);
}
- if (vie_alignment_check(paging->cpl, bytes, cr0, rflags, gla)) {
+ if (alignment_check(paging->cpl, bytes, cr0, rflags, gla)) {
vm_inject_ac(ctx, vcpu, 1);
*faultptr = 1;
return (0);
diff --git a/usr/src/cmd/bhyvectl/bhyvectl.c b/usr/src/cmd/bhyvectl/bhyvectl.c
index 5299791091..22c72cf5df 100644
--- a/usr/src/cmd/bhyvectl/bhyvectl.c
+++ b/usr/src/cmd/bhyvectl/bhyvectl.c
@@ -36,11 +36,10 @@
* A full copy of the text of the CDDL should have accompanied this
* source. A copy of the CDDL is also available via the Internet at
* http://www.illumos.org/license/CDDL.
- */
-
-/*
+ *
* Copyright 2015 Pluribus Networks Inc.
* Copyright 2019 Joyent, Inc.
+ * Copyright 2020 Oxide Computer Company
*/
#include <sys/cdefs.h>
@@ -358,14 +357,20 @@ dump_vm_run_exitcode(struct vm_exit *vmexit, int vcpu)
switch (vmexit->exitcode) {
case VM_EXITCODE_INOUT:
printf("\treason\t\tINOUT\n");
- printf("\tdirection\t%s\n", vmexit->u.inout.in ? "IN" : "OUT");
+ printf("\tdirection\t%s\n",
+ (vmexit->u.inout.flags & INOUT_IN) ? "IN" : "OUT");
printf("\tbytes\t\t%d\n", vmexit->u.inout.bytes);
- printf("\tflags\t\t%s%s\n",
- vmexit->u.inout.string ? "STRING " : "",
- vmexit->u.inout.rep ? "REP " : "");
printf("\tport\t\t0x%04x\n", vmexit->u.inout.port);
printf("\teax\t\t0x%08x\n", vmexit->u.inout.eax);
break;
+ case VM_EXITCODE_MMIO:
+ printf("\treason\t\tMMIO\n");
+ printf("\toperation\t%s\n",
+ vmexit->u.mmio.read ? "READ" : "WRITE");
+ printf("\tbytes\t\t%d\n", vmexit->u.mmio.bytes);
+ printf("\tgpa\t\t0x%08x\n", vmexit->u.mmio.gpa);
+ printf("\tdata\t\t0x%08x\n", vmexit->u.mmio.data);
+ break;
case VM_EXITCODE_VMX:
printf("\treason\t\tVMX\n");
printf("\tstatus\t\t%d\n", vmexit->u.vmx.status);
@@ -2366,7 +2371,11 @@ main(int argc, char *argv[])
}
if (!error && run) {
- error = vm_run(ctx, vcpu, &vmexit);
+ struct vm_entry entry;
+
+ bzero(&entry, sizeof (entry));
+
+ error = vm_run(ctx, vcpu, &entry, &vmexit);
if (error == 0)
dump_vm_run_exitcode(&vmexit, vcpu);
else
diff --git a/usr/src/compat/bhyve/amd64/machine/cpufunc.h b/usr/src/compat/bhyve/amd64/machine/cpufunc.h
index 0b7bcdaa59..bb79ac3ce9 100644
--- a/usr/src/compat/bhyve/amd64/machine/cpufunc.h
+++ b/usr/src/compat/bhyve/amd64/machine/cpufunc.h
@@ -116,7 +116,7 @@ static __inline uint64_t
rdmsr(u_int msr)
{
uint32_t low, high;
-
+
__asm __volatile("rdmsr" : "=a" (low), "=d" (high) : "c" (msr));
return (low | ((uint64_t)high << 32));
}
@@ -150,7 +150,7 @@ static __inline u_long
rcr0(void)
{
u_long data;
-
+
__asm __volatile("movq %%cr0,%0" : "=r" (data));
return (data);
}
@@ -174,7 +174,7 @@ static __inline u_long
rcr4(void)
{
u_long data;
-
+
__asm __volatile("movq %%cr4,%0" : "=r" (data));
return (data);
}
diff --git a/usr/src/compat/bhyve/amd64/machine/pmap.h b/usr/src/compat/bhyve/amd64/machine/pmap.h
index ce3185629b..3b94d1b1a9 100644
--- a/usr/src/compat/bhyve/amd64/machine/pmap.h
+++ b/usr/src/compat/bhyve/amd64/machine/pmap.h
@@ -153,7 +153,7 @@
#define PGEX_RSV 0x08 /* reserved PTE field is non-zero */
#define PGEX_I 0x10 /* during an instruction fetch */
-/*
+/*
* undef the PG_xx macros that define bits in the regular x86 PTEs that
* have a different position in nested PTEs. This is done when compiling
* code that needs to be aware of the differences between regular x86 and
diff --git a/usr/src/compat/bhyve/net/ieee_oui.h b/usr/src/compat/bhyve/net/ieee_oui.h
index 068328d833..115e6a44f8 100644
--- a/usr/src/compat/bhyve/net/ieee_oui.h
+++ b/usr/src/compat/bhyve/net/ieee_oui.h
@@ -37,14 +37,14 @@
#define OUI_FREEBSD_BASE 0x589cfc000000
#define OUI_FREEBSD(nic) (OUI_FREEBSD_BASE | (nic))
-/*
+/*
* OUIs are most often used to uniquely identify network interfaces
* and occupy the first 3 bytes of both destination and source MAC
* addresses. The following allocations exist so that various
* software systems associated with FreeBSD can have unique IDs in the
* absence of hardware. The use of OUIs for this purpose is not fully
* fleshed out but is now in common use in virtualization technology.
- *
+ *
* Allocations from this range are expected to be made using COMMON
* SENSE by developers. Do NOT take a large range just because
* they're currently wide open. Take the smallest useful range for
@@ -53,7 +53,7 @@
*
* In the event of a conflict arbitration of allocation in this file
* is subject to core@ approval.
- *
+ *
* Applications are differentiated based on the high order bit(s) of
* the remaining three bytes. Our first allocation has all 0s, the
* next allocation has the highest bit set. Allocating in this way
diff --git a/usr/src/compat/bhyve/x86/_types.h b/usr/src/compat/bhyve/x86/_types.h
index 8bbae549d8..0263c33d5f 100644
--- a/usr/src/compat/bhyve/x86/_types.h
+++ b/usr/src/compat/bhyve/x86/_types.h
@@ -33,7 +33,7 @@ typedef long long __int64_t;
typedef unsigned long long __uint64_t;
#endif
-/*
+/*
* Standard type definitions.
*/
#ifdef _LP64
diff --git a/usr/src/lib/libc/port/i18n/gettext_real.c b/usr/src/lib/libc/port/i18n/gettext_real.c
index 6045d000fe..6e5b8054ae 100644
--- a/usr/src/lib/libc/port/i18n/gettext_real.c
+++ b/usr/src/lib/libc/port/i18n/gettext_real.c
@@ -58,7 +58,7 @@ char *
_real_gettext_u(const char *domain, const char *msgid1, const char *msgid2,
unsigned long int ln, int category, int plural, locale_t loc)
{
- char msgfile[MAXPATHLEN]; /* 1024 */
+ char msgfile[MAXPATHLEN]; /* 1024 */
char mydomain[TEXTDOMAINMAX + 1]; /* 256 + 1 */
char *cur_binding; /* points to current binding in list */
const char *cur_locale;
@@ -326,7 +326,7 @@ static int
process_nlspath(const char *cur_domain, const char *cur_msgloc,
const char *nlspath, char **binding)
{
- char *s; /* generic string ptr */
+ char *s; /* generic string ptr */
char *territory; /* our current territory element */
char *codeset; /* our current codeset element */
char *s1; /* for handling territory */
@@ -684,12 +684,12 @@ _real_bindtextdomain_u(const char *domain, const char *binding,
return (*binding_addr);
}
/* replace existing binding with new binding */
- if (*binding_addr) {
- free(*binding_addr);
- }
- if ((*binding_addr = strdup(binding)) == NULL) {
+ char *new_binding = strdup(binding);
+ if (new_binding == NULL) {
return (NULL);
}
+ free(*binding_addr);
+ *binding_addr = new_binding;
#ifdef GETTEXT_DEBUG
printlist();
#endif
diff --git a/usr/src/lib/libvmmapi/common/vmmapi.c b/usr/src/lib/libvmmapi/common/vmmapi.c
index 7d3446a845..6d5145431e 100644
--- a/usr/src/lib/libvmmapi/common/vmmapi.c
+++ b/usr/src/lib/libvmmapi/common/vmmapi.c
@@ -772,17 +772,16 @@ vm_get_register_set(struct vmctx *ctx, int vcpu, unsigned int count,
}
int
-vm_run(struct vmctx *ctx, int vcpu, struct vm_exit *vmexit)
+vm_run(struct vmctx *ctx, int vcpu, const struct vm_entry *vm_entry,
+ struct vm_exit *vm_exit)
{
- int error;
- struct vm_run vmrun;
+ struct vm_entry entry;
- bzero(&vmrun, sizeof(vmrun));
- vmrun.cpuid = vcpu;
+ bcopy(vm_entry, &entry, sizeof (entry));
+ entry.cpuid = vcpu;
+ entry.exit_data = vm_exit;
- error = ioctl(ctx->fd, VM_RUN, &vmrun);
- bcopy(&vmrun.vm_exit, vmexit, sizeof(struct vm_exit));
- return (error);
+ return (ioctl(ctx->fd, VM_RUN, &entry));
}
int
diff --git a/usr/src/lib/libvmmapi/common/vmmapi.h b/usr/src/lib/libvmmapi/common/vmmapi.h
index 997267b8cc..4656f417b4 100644
--- a/usr/src/lib/libvmmapi/common/vmmapi.h
+++ b/usr/src/lib/libvmmapi/common/vmmapi.h
@@ -165,7 +165,8 @@ int vm_set_register_set(struct vmctx *ctx, int vcpu, unsigned int count,
const int *regnums, uint64_t *regvals);
int vm_get_register_set(struct vmctx *ctx, int vcpu, unsigned int count,
const int *regnums, uint64_t *regvals);
-int vm_run(struct vmctx *ctx, int vcpu, struct vm_exit *ret_vmexit);
+int vm_run(struct vmctx *ctx, int vcpu, const struct vm_entry *vm_entry,
+ struct vm_exit *vm_exit);
int vm_suspend(struct vmctx *ctx, enum vm_suspend_how how);
int vm_reinit(struct vmctx *ctx);
int vm_apicid2vcpu(struct vmctx *ctx, int apicid);
diff --git a/usr/src/pkg/manifests/system-test-libctest.mf b/usr/src/pkg/manifests/system-test-libctest.mf
index c3d9b87bdb..58ffa45ad9 100644
--- a/usr/src/pkg/manifests/system-test-libctest.mf
+++ b/usr/src/pkg/manifests/system-test-libctest.mf
@@ -28,6 +28,7 @@ dir path=opt/libc-tests/cfg
dir path=opt/libc-tests/cfg/symbols
dir path=opt/libc-tests/runfiles
dir path=opt/libc-tests/tests
+dir path=opt/libc-tests/tests/i18n
dir path=opt/libc-tests/tests/random
dir path=opt/libc-tests/tests/regex
dir path=opt/libc-tests/tests/regex/data
@@ -95,6 +96,9 @@ file path=opt/libc-tests/tests/fnmatch.64 mode=0555
file path=opt/libc-tests/tests/fpround_test mode=0555
file path=opt/libc-tests/tests/fpround_test.$(ARCH) mode=0555
file path=opt/libc-tests/tests/fpround_test.$(ARCH64) mode=0555
+file path=opt/libc-tests/tests/i18n/bindtextdomain_test mode=0555
+file path=opt/libc-tests/tests/i18n/bindtextdomain_test.$(ARCH) mode=0555
+file path=opt/libc-tests/tests/i18n/bindtextdomain_test.$(ARCH64) mode=0555
file path=opt/libc-tests/tests/memset_s.32 mode=0555
file path=opt/libc-tests/tests/memset_s.64 mode=0555
file path=opt/libc-tests/tests/newlocale_test mode=0555
diff --git a/usr/src/prototypes/prototype.man1 b/usr/src/prototypes/prototype.man1
new file mode 100644
index 0000000000..2ab3d426dc
--- /dev/null
+++ b/usr/src/prototypes/prototype.man1
@@ -0,0 +1,104 @@
+.\"
+.\" This file and its contents are supplied under the terms of the
+.\" Common Development and Distribution License ("CDDL"), version 1.0.
+.\" You may only use this file in accordance with the terms of version
+.\" 1.0 of the CDDL.
+.\"
+.\" A full copy of the text of the CDDL should have accompanied this
+.\" source. A copy of the CDDL is also available via the Internet at
+.\" http://www.illumos.org/license/CDDL.
+.\"
+.\"
+.\" Copyright 2020 <contributor>
+.\"
+.Dd Month Day, Year
+.Dt COMMAND 1
+.\" Here and in other places "COMMAND" and "command" are place holders
+.\" that should be replaced with the name of the actual command that
+.\" this is documenting.
+.Os
+.Sh NAME
+.Nm command
+.Nd short description
+.Sh SYNOPSIS
+.Nm
+.\" Each of the following lines should use the Fl and Ar options to
+.\" indicate the set of supported options and flags. There should be one
+.\" option and argument per line. If there are independent ways of
+.\" invoking the command or independent sub-commands ala zfs(1M) or
+.\" dladm(1M), there should be a fresh '.Nm' to indicate that and the
+.\" sub-command should use .Cm.
+.Sh DESCRIPTION
+.\" Describe the purpose of the utility, what it does and how it
+.\" operates. If there are certain privileges or other considerations
+.\" for using this, indicate that here.
+.Sh OPTIONS
+The following options are supported:
+.Bl -tag -width Ar
+.It Fl flag1
+Description of the above flag1
+.It Fl flag2
+Description of the above flag2
+.El
+.Sh OPERANDS
+.\" This should be a list of non-flag arguments that are supported
+.\" and what they do in the following form.
+The following operands are supported:
+.Bl -tag -width Ar
+.It Ar oper1
+Description of what oper1 is.
+.It Ar oper2
+Description of what oper2 is.
+.El
+.Sh EXIT STATUS
+.\" This section should indicate the set of exit codes one can expect.
+.\" In general, do not use the '.Ex' macro and instead call out the
+.\" different error values. One would expect at least a difference
+.\" between an error during execution and an error in the usage.
+.Sh EXAMPLES
+.\" There should be multiple examples present that describe how to use
+.\" different parts of the command. This section should not be skipped
+.\" and it's good to have a number of them.
+.\" .Sh ENVIRONMENT
+.\" If the program reacts to environment variables, most often locale
+.\" related ones, document those here. If they are just the standard
+.\" locale ones, use the following text, adjusting it for the exact set
+.\" of locale specific values that impact it:
+.\" See
+.\" .Xr environ 5
+.\" for descriptions of the following environment variables
+.\" that affect the execution of
+.\" .Nm :
+.\" .Ev LANG ,
+.\" .Ev LC_ALL ,
+.\" .Ev LC_MESSAGES ,
+.\" .Ev LC_NUMERIC ,
+.\" and
+.\" .Ev NLSPATH .
+.\" .Sh CODE SET INDEPENDENCE
+.\" If there are issues around the code set, indicate so here. See
+.\" attributes(5).
+.Sh INTERFACE STABILITY
+.\" When documenting the stability of commands it's useful to
+.\" distinguish between the stability of the options and the command's
+.\" actual output. For most commands, output stability should only be
+.\" guaranteed if there's an explicit parseable option that controls the
+.\" type of data. You can use the following template:
+.\" The command line interface of
+.\" .Nm
+.\" is
+.\" .Sy Committed .
+.\" .Sy Evolving .
+.\" .Sy Volatile .
+.\" .Sy Private .
+.\" The output of
+.\" .Nm
+.\" is
+.\" .Sy Not-An-Interface
+.\" and may change at any time.
+.Sh SEE ALSO
+.\" List other commands that are related to this. For programs that are
+.\" are primarily wrappers around libc functionality or a particular
+.\" library, it's helpful to mention those here so the reader has an
+.\" idea of what is used to implement this. For example, the sleep
+.\" command would mention nanosleep(3C).
diff --git a/usr/src/prototypes/prototype.man3x b/usr/src/prototypes/prototype.man3x
new file mode 100644
index 0000000000..598315f959
--- /dev/null
+++ b/usr/src/prototypes/prototype.man3x
@@ -0,0 +1,79 @@
+.\"
+.\" This file and its contents are supplied under the terms of the
+.\" Common Development and Distribution License ("CDDL"), version 1.0.
+.\" You may only use this file in accordance with the terms of version
+.\" 1.0 of the CDDL.
+.\"
+.\" A full copy of the text of the CDDL should have accompanied this
+.\" source. A copy of the CDDL is also available via the Internet at
+.\" http://www.illumos.org/license/CDDL.
+.\"
+.\"
+.\" Copyright 2020 <contributor>
+.\"
+.Dd Month Day, Year
+.Dt MANUALPAGE 3SECTION
+.\" MANUALPAGE should be replaced with the primary function that you are
+.\" documenting. If there is more than one function, then you should either
+.\" use the primary function of the group or a more general name. Every
+.\" documented function will appear in the NAME section below.
+.\"
+.\" The SECTION generally refers to the name of the library that this
+.\" appears in. For example for something in libc this would be 3C. In
+.\" general this is the capitalized version of the libraries shared
+.\" object name.
+.Os
+.Sh NAME
+.Nm funcname ,
+.Nm funcname2
+.Nd short description
+.Sh LIBRARY
+.Lb libname
+.Sh SYNOPSIS
+.\" Insert any required pre-processor macros with Dv
+.\" .In headerfile.h
+.\" .Ft return type
+.\" .Fo function name
+.\" .Fa "func arg 0 type and name"
+.\" .Fa "func arg 1 type and name"
+.\" .Fc
+.\" Repeat above for each function
+.Sh DESCRIPTION
+.\" Describe how the functions operate and what they do. Provide
+.\" background for the reader. Don't assume that they know how
+.\" everything works. Be clear about the semantics and the why.
+.Sh RETURN VALUES
+Upon successful completion, the
+.Fn funcname
+function returns XXX and <state side effects>. Otherwise XXX is returned
+and <if errno is set describe it below>.
+.Sh EXAMPLES
+.Sh ERRORS
+The
+.fn funcname
+functions will fail if:
+.Bl -tag -width Er
+.It Er ERRNO1
+A Reason why ERRNO1 could occur.
+.It Er ERRNO2
+A Reason why ERRNO2 could occur.
+.El
+.Sh INTERFACE STABILITY
+.\" Indicate the stability per attribute(5). One of:
+.\" .Sy Committed
+.\" .Sy Uncommitted
+.\" .Sy Volatile
+.\" .Sy Private
+.Sh MT-LEVEL
+.\" Indicate the MT-Level per attributes(5). If there are exceptions,
+.\" start with the level and go from there.
+.\" .Sy Safe
+.\" .Sy Unsafe
+.\" .Sy MT-Safe
+.\" .Sy Aysnc-Signal-Safe
+.Sh SEE ALSO
+.\" A list of mentioned manuals or others that are relevant to this
+.\" function. If there is a user command that exercises this, for
+.\" example, sleep(1) if documenting sleep(3C), list this here.
+.\" Generally an overview page for a library in 3LIB should also be
+.\" referenced.
diff --git a/usr/src/prototypes/prototype.man7d b/usr/src/prototypes/prototype.man7d
new file mode 100644
index 0000000000..ee51f21a64
--- /dev/null
+++ b/usr/src/prototypes/prototype.man7d
@@ -0,0 +1,49 @@
+.\"
+.\" This file and its contents are supplied under the terms of the
+.\" Common Development and Distribution License ("CDDL"), version 1.0.
+.\" You may only use this file in accordance with the terms of version
+.\" 1.0 of the CDDL.
+.\"
+.\" A full copy of the text of the CDDL should have accompanied this
+.\" source. A copy of the CDDL is also available via the Internet at
+.\" http://www.illumos.org/license/CDDL.
+.\"
+.\"
+.\" Copyright 2020 <contributor>
+.\"
+.Dd Month Day, Year
+.Dt DRIVERNAME 7D
+.\" Here and in other places "DRIVERNAME" and "drivername" are place
+.\" holders that should be replaced with the name of the actual driver
+.\" that this is documenting.
+.Os
+.Sh NAME
+.Nm drivername
+.Nd driverdesc
+.Sh SYNOPSIS
+.Pa /dev/node/path
+.Sh DESCRIPTION
+The
+.Nm
+driver <fill out what it does and what devices it supports>. <Describe
+the functionality supported by the driver, e.g. for a NIC TSO, etc.>.
+.\" .Sh APPLICATION PROGRAMMING INTERFACE
+.\" If the user may interact with this driver in a specific way,
+.\" document it. The user may not because this driver is part of a
+.\" broader framework.
+.\" .Sh IOCTLS
+.\" If the driver has a non-standard ioctl interface, document it. If it
+.\" just implements the ones to support a framework, leave this out.
+.\" .Sh CONFIGURATION
+.\" If there is a driver.conf file, please describe the different
+.\" options that can be set and their expected stability.
+.\" .Sh ARCHITECTURE
+.\" If this driver is supported on particular architectures (usually not
+.\" the case for pseudo-devices), then include that.
+.Sh FILES
+.\" List the actual installation path of the driver and a configuration
+.\" file.
+.Sh SEE ALSO
+.\" This list should include user programs or libraries that are relevant to
+.\" the program. A nic might have dladm, a storage device, diskinfo, a
+.\" sensor, fmtopo.
diff --git a/usr/src/prototypes/prototype.man9e b/usr/src/prototypes/prototype.man9e
new file mode 100644
index 0000000000..dc229ad6fd
--- /dev/null
+++ b/usr/src/prototypes/prototype.man9e
@@ -0,0 +1,79 @@
+.\"
+.\" This file and its contents are supplied under the terms of the
+.\" Common Development and Distribution License ("CDDL"), version 1.0.
+.\" You may only use this file in accordance with the terms of version
+.\" 1.0 of the CDDL.
+.\"
+.\" A full copy of the text of the CDDL should have accompanied this
+.\" source. A copy of the CDDL is also available via the Internet at
+.\" http://www.illumos.org/license/CDDL.
+.\"
+.\"
+.\" Copyright 2020 <contributor>
+.\"
+.Dd Month Day, Year
+.Dt ENTRYNAME 9E
+.\" ENTRYNAME should be replaced with the primary entry point that you are
+.\" documenting. If there is more than one function, then you should either
+.\" use the primary function of the group or a more general name. Every
+.\" documented entry point will appear in the NAME section below.
+.Os
+.Sh NAME
+.Nm entryname ,
+.Nm entryname2
+.Nd description
+.Sh SYNOPSIS
+.\" .In sys/header.h
+.\" .Ft "return type"
+.\" .Fo entryname
+.\" .Fa "arg 0 type and name"
+.\" .Fa "arg 1 type and name"
+.\" .Fc
+.\" Repeat above for each documented entry
+.Sh INTERFACE LEVEL
+.\" This should be one of the following:
+.\" .Sy Committed
+.\" .Sy Volatile -
+.\" This interface is still evolving in illumos.
+.\" API and ABI stability is not guaranteed.
+.Sh PARAMETERS
+.Bl -tag -width Fa
+.It Fa arg0
+Description of the parameter arg0.
+.It Fa arg1
+Description of the parameter arg1.
+.El
+.Sh DESCRIPTION
+.\" This should include a description of the entry point. This includes
+.\" information such as what framework it is a part of. What it is
+.\" expected to actually do. Any constraints on what it should validate.
+.\" Any concerns around locking or callbacks into the broader
+.\" frameworks. Whether it can be called in parallel by multiple
+.\" threads, etc.
+.Sh CONTEXT
+.\" Indicates the context in which this framework function is called.
+.\" Generally this is one or more of:
+.\" .Sy user
+.\" .Sy kernel
+.\" .Sy interrupt
+.\" However sometimes there may be something specific. Such as this is
+.\" only called or used during attach or detach.
+.Sh RETURN VALUES
+.\" This should indicate what the driver should return on successful
+.\" completion and what it should have done. Otherwise it should
+.\" indicate the class of error returned.
+.Sh ERRORS
+.\" This should be a list of recommended errors and causes. If the
+.\" interface only supports returning DDI_SUCCESS or DDI_FAILURE,
+.\" then this section should not be used. If it should only return these
+.\" specific errors and this section shouldn't be used as a guide,
+.\" indicate that.
+.Bl -tag -width Er
+.It Er ERRNO1
+Description of when to return ERRNO1.
+.It Er ERRNO2
+Description of when to return ERRNO2.
+.El
+.Sh SEE ALSO
+.\" A list of other manual pages related to the general framework or
+.\" section 9f functions they should call.
diff --git a/usr/src/prototypes/prototype.man9f b/usr/src/prototypes/prototype.man9f
new file mode 100644
index 0000000000..29e7c76d89
--- /dev/null
+++ b/usr/src/prototypes/prototype.man9f
@@ -0,0 +1,78 @@
+.\"
+.\" This file and its contents are supplied under the terms of the
+.\" Common Development and Distribution License ("CDDL"), version 1.0.
+.\" You may only use this file in accordance with the terms of version
+.\" 1.0 of the CDDL.
+.\"
+.\" A full copy of the text of the CDDL should have accompanied this
+.\" source. A copy of the CDDL is also available via the Internet at
+.\" http://www.illumos.org/license/CDDL.
+.\"
+.\"
+.\" Copyright 2020 <contributor>
+.\"
+.Dd Month Day, Year
+.Dt FUNCNAME 9F
+.\" FUNCNAME should be replaced with the primary function that you are
+.\" documenting. If there is more than one function, then you should either
+.\" use the primary function of the group or a more general name. Every
+.\" documented function will appear in the NAME section below.
+.Os
+.Sh NAME
+.Nm funcname ,
+.Nm funcname2
+.Nd short description
+.Sh SYNOPSIS
+.\" .In sys/header.h
+.\" .Ft "return type"
+.\" .Fo funcname
+.\" .Fa "arg 0 type and name"
+.\" .Fa "arg 1 type and name"
+.\" .Fc
+.\" Repeat above for each documented entry
+.Sh INTERFACE LEVEL
+.\" This should be one of the following:
+.\" .Sy Committed
+.\" .Sy Volatile -
+.\" This interface is still evolving in illumos.
+.\" API and ABI stability is not guaranteed.
+.Sh PARAMETERS
+.Bl -tag -width Fa
+.It Fa arg0
+Description of the parameter arg0.
+.It Fa arg1
+Description of the parameter arg1.
+.El
+.Sh DESCRIPTION
+.\" This should include a description of the function, how to use it,
+.\" and why someone might use it. Any concerns around locking or
+.\" callbacks into the broader frameworks. Whether it can be called in
+.\" parallel by multiple threads, etc.
+.Sh CONTEXT
+.\" Indicates the context in which this framework function is called.
+.\" Generally this is one or more of:
+.\" .Sy user
+.\" .Sy kernel
+.\" .Sy interrupt
+.\" However sometimes there may be something specific. Such as this is
+.\" only called or used during attach or detach.
+.Sh EXAMPLES
+.\" Include examples of how someone might use this kernel function.
+.Sh RETURN VALUES
+.\" This should indicate what the function will return on successful
+.\" completion and what it should have done. Otherwise it should
+.\" indicate the class of error returned.
+.Sh ERRORS
+.\" If the function returns a set of errors, often errnos (not the case
+.\" for just returning DDI_SUCCESS/DDI_FAILURE), or any other
+.\" positive/negative indicator, then one should consider including the
+.\" meaning of the errors.
+.Bl -tag -width Er
+.It Er ERRNO1
+Description of what would cause ERRNO1.
+.It Er ERRNO2
+Description of what would cause ERRNO2.
+.El
+.Sh SEE ALSO
+.\" A list of other manual pages related to the general framework or
+.\" section 9f functions they might call.
diff --git a/usr/src/test/libc-tests/runfiles/default.run b/usr/src/test/libc-tests/runfiles/default.run
index 2556c6916c..c819079ef6 100644
--- a/usr/src/test/libc-tests/runfiles/default.run
+++ b/usr/src/test/libc-tests/runfiles/default.run
@@ -38,6 +38,8 @@ outputdir = /var/tmp/test_results
[/opt/libc-tests/tests/wcsncasecmp-7350.32]
[/opt/libc-tests/tests/wcsncasecmp-7350.64]
+[/opt/libc-tests/tests/i18n/bindtextdomain_test]
+
[/opt/libc-tests/tests/random/getrandom]
[/opt/libc-tests/tests/random/getentropy]
[/opt/libc-tests/tests/random/chacha]
diff --git a/usr/src/test/libc-tests/tests/Makefile b/usr/src/test/libc-tests/tests/Makefile
index 9ea35b5525..63f108e83c 100644
--- a/usr/src/test/libc-tests/tests/Makefile
+++ b/usr/src/test/libc-tests/tests/Makefile
@@ -18,6 +18,7 @@
SUBDIRS = \
catopen \
fpround \
+ i18n \
newlocale \
nl_langinfo \
priv_gettext \
diff --git a/usr/src/test/libc-tests/tests/i18n/Makefile b/usr/src/test/libc-tests/tests/i18n/Makefile
new file mode 100644
index 0000000000..56410d23a3
--- /dev/null
+++ b/usr/src/test/libc-tests/tests/i18n/Makefile
@@ -0,0 +1,25 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2020 Richard Hansen <rhansen@rhansen.org>
+#
+
+include $(SRC)/Makefile.master
+
+TESTSUBDIR = i18n
+PROG = bindtextdomain_test
+ARCHPROG = bindtextdomain_test
+
+include ../Makefile.com
+
+LDLIBS += -lumem
+LDLIBS64 += -lumem
diff --git a/usr/src/test/libc-tests/tests/i18n/bindtextdomain_test.c b/usr/src/test/libc-tests/tests/i18n/bindtextdomain_test.c
new file mode 100644
index 0000000000..bb608e0328
--- /dev/null
+++ b/usr/src/test/libc-tests/tests/i18n/bindtextdomain_test.c
@@ -0,0 +1,143 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2020 Richard Hansen <rhansen@rhansen.org>
+ */
+
+#include <errno.h>
+#include <libintl.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/sysmacros.h>
+#include <umem.h>
+#include <unistd.h>
+#include "test_common.h"
+
+const char *
+_umem_debug_init(void)
+{
+ return ("default");
+}
+
+int
+main(int argc, char *argv[])
+{
+ int ret = 0;
+ int optc;
+ while ((optc = getopt(argc, argv, "df")) != -1) {
+ switch (optc) {
+ case 'd':
+ test_set_debug();
+ break;
+ case 'f':
+ test_set_force();
+ break;
+ default:
+ (void) fprintf(stderr, "Usage: %s [-df]\n", argv[0]);
+ exit(1);
+ }
+ }
+
+ struct {
+ const char *name;
+ const char *dir;
+ bool malloc_fail;
+ const char *want;
+ int want_errno;
+ } test_cases[] = {
+ {
+ .name = "unbound query",
+ .dir = NULL,
+ .want = "/usr/lib/locale/",
+ },
+ {
+ .name = "bind malloc fail",
+ .dir = "/bounddir1",
+ .malloc_fail = true,
+ .want = NULL,
+ .want_errno = EAGAIN,
+ },
+ {
+ .name = "query after bind malloc fail",
+ .dir = NULL,
+ .want = "/usr/lib/locale/",
+ },
+ {
+ .name = "normal bind",
+ .dir = "/bounddir2",
+ .want = "/bounddir2",
+ },
+ {
+ .name = "query after normal bind",
+ .dir = NULL,
+ .want = "/bounddir2",
+ },
+ {
+ .name = "rebind to same",
+ .dir = "/bounddir2",
+ .want = "/bounddir2",
+ },
+ {
+ .name = "query after rebind to same",
+ .dir = NULL,
+ .want = "/bounddir2",
+ },
+ {
+ .name = "rebind to new",
+ .dir = "/bounddir3",
+ .want = "/bounddir3",
+ },
+ {
+ .name = "query after rebind to new",
+ .dir = NULL,
+ .want = "/bounddir3",
+ },
+ {
+ .name = "rebind malloc fail",
+ .dir = "/bounddir4",
+ .malloc_fail = true,
+ .want = NULL,
+ .want_errno = EAGAIN,
+ },
+ {
+ .name = "query after rebind malloc fail",
+ .dir = NULL,
+ .want = "/bounddir3",
+ },
+ }, *tc;
+
+ for (size_t i = 0; i < ARRAY_SIZE(test_cases); ++i) {
+ tc = &test_cases[i];
+ test_t t = test_start(tc->name);
+ umem_setmtbf((uint_t)tc->malloc_fail);
+ errno = 0;
+ const char *got = bindtextdomain("domain", tc->dir);
+ int got_errno = errno;
+ umem_setmtbf(0);
+ if (((got == NULL) != (tc->want == NULL)) ||
+ ((got != NULL) && strcmp(got, tc->want))) {
+ test_failed(t, "returned %s, want %s",
+ got != NULL ? got : "<NULL>",
+ tc->want != NULL ? tc->want : "<NULL>");
+ ret = 1;
+ }
+ if (got_errno != tc->want_errno) {
+ test_failed(t, "got errno %d, want %d",
+ got_errno, tc->want_errno);
+ ret = 1;
+ }
+ test_passed(t);
+ }
+ test_summary();
+ return (ret);
+}
diff --git a/usr/src/tools/scripts/cstyle.pl b/usr/src/tools/scripts/cstyle.pl
index 5c474cfe28..e4d3694f3b 100644
--- a/usr/src/tools/scripts/cstyle.pl
+++ b/usr/src/tools/scripts/cstyle.pl
@@ -21,6 +21,7 @@
#
# Copyright 2015 Toomas Soome <tsoome@me.com>
# Copyright 2016 Nexenta Systems, Inc.
+# Copyright 2020 Oxide Computer Company
#
# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
@@ -121,6 +122,16 @@ if ($doxygen_comments) {
$hdr_comment_start = qr/^\s*\/\*$/;
}
+# FreeBSD uses comments styled as such for their license headers:
+# /*-
+# * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+# *
+# ...
+#
+# In order to apply other cstyle checks to those files without stumbling over
+# the license header, tolerate such comment openings as well.
+my $fbsd_comment_start = qr/^\s*\/\*-$/;
+
# Note, following must be in single quotes so that \s and \w work right.
my $typename = '(int|char|short|long|unsigned|float|double' .
'|\w+_t|struct\s+\w+|union\s+\w+|FILE)';
@@ -463,7 +474,7 @@ line: while (<$filehandle>) {
$comment_done = 0;
}
# does this looks like the start of a block comment?
- if (/$hdr_comment_start/) {
+ if (/$hdr_comment_start/ || /$fbsd_comment_start/) {
if (!/^\t*\/\*/) {
err("block comment not indented by tabs");
}
diff --git a/usr/src/uts/common/fs/zfs/zio.c b/usr/src/uts/common/fs/zfs/zio.c
index b02363e7eb..5215a58bf2 100644
--- a/usr/src/uts/common/fs/zfs/zio.c
+++ b/usr/src/uts/common/fs/zfs/zio.c
@@ -26,6 +26,7 @@
* Copyright (c) 2014 Integros [integros.com]
* Copyright (c) 2019 Joyent, Inc.
* Copyright (c) 2017, Intel Corporation.
+ * Copyright 2020 Joyent, Inc.
*/
#include <sys/sysmacros.h>
@@ -1990,6 +1991,10 @@ zio_suspend(spa_t *spa, zio_t *zio, zio_suspend_reason_t reason)
"failure and the failure mode property for this pool "
"is set to panic.", spa_name(spa));
+ cmn_err(CE_WARN, "Pool '%s' has encountered an uncorrectable I/O "
+ "failure and has been suspended; `zpool clear` will be required "
+ "before the pool can be written to.", spa_name(spa));
+
zfs_ereport_post(FM_EREPORT_ZFS_IO_FAILURE, spa, NULL,
NULL, NULL, 0, 0);
diff --git a/usr/src/uts/common/io/i40e/core/i40e_common.c b/usr/src/uts/common/io/i40e/core/i40e_common.c
index fd32e0204c..f750bf69ce 100644
--- a/usr/src/uts/common/io/i40e/core/i40e_common.c
+++ b/usr/src/uts/common/io/i40e/core/i40e_common.c
@@ -3885,10 +3885,17 @@ static void i40e_parse_discover_capabilities(struct i40e_hw *hw, void *buff,
/* Always disable FCoE if compiled without the I40E_FCOE_ENA flag */
p->fcoe = FALSE;
+ valid_functions = p->valid_functions;
+ num_functions = 0;
+ while (valid_functions) {
+ if (valid_functions & 1)
+ num_functions++;
+ valid_functions >>= 1;
+ }
+
/* count the enabled ports (aka the "not disabled" ports) */
hw->num_ports = 0;
for (i = 0; i < 4; i++) {
- enum i40e_status_code status;
u32 port_cfg_reg = I40E_PRTGEN_STATUS + (4 * i);
u64 port_cfg = 0;
@@ -3907,6 +3914,16 @@ static void i40e_parse_discover_capabilities(struct i40e_hw *hw, void *buff,
* Those cards have 4 PFs at minimum, so using PRTGEN_CNF for counting
* physical ports results in wrong partition id calculation and thus
* not supporting WoL.
+ *
+ * Porting note: the above comment is no longer directly relevant: we
+ * read PRTGEN_STATUS instead now, as PRTGEN_CNF was not reliable for
+ * these parts. In addition, the claim about having 4 PFs is not
+ * correct. For example, an X557-T2 is a dual port mezz card. Forcing
+ * ports to four here will cause ->num_partitions to be zero.
+ *
+ * On the presumption that the hard-coded value is meaningful in some
+ * cases, though, we'll take the minimal approach of ensuring that we
+ * never have more ports than functions.
*/
if (hw->mac.type == I40E_MAC_X722) {
if (i40e_acquire_nvm(hw, I40E_RESOURCE_READ) == I40E_SUCCESS) {
@@ -3914,21 +3931,25 @@ static void i40e_parse_discover_capabilities(struct i40e_hw *hw, void *buff,
2 * I40E_SR_OCP_CFG_WORD0,
sizeof(ocp_cfg_word0),
&ocp_cfg_word0, TRUE, NULL);
+#ifdef __sun__
+ if (status == I40E_SUCCESS &&
+ (ocp_cfg_word0 & I40E_SR_OCP_ENABLED)) {
+ hw->num_ports = 4;
+ if (hw->num_ports > num_functions) {
+ hw->num_ports = num_functions;
+ DEBUGOUT1("clamped 4 OCP ports to %d\n",
+ (int)hw->num_ports);
+ }
+ }
+#else
if (status == I40E_SUCCESS &&
(ocp_cfg_word0 & I40E_SR_OCP_ENABLED))
hw->num_ports = 4;
+#endif
i40e_release_nvm(hw);
}
}
- valid_functions = p->valid_functions;
- num_functions = 0;
- while (valid_functions) {
- if (valid_functions & 1)
- num_functions++;
- valid_functions >>= 1;
- }
-
/* partition id is 1-based, and functions are evenly spread
* across the ports as partitions
*/
@@ -3937,6 +3958,8 @@ static void i40e_parse_discover_capabilities(struct i40e_hw *hw, void *buff,
hw->num_partitions = num_functions / hw->num_ports;
}
+ VERIFY(hw->num_partitions > 0);
+
/* additional HW specific goodies that might
* someday be HW version specific
*/
diff --git a/usr/src/uts/i86pc/io/vmm/amd/amdvi_hw.c b/usr/src/uts/i86pc/io/vmm/amd/amdvi_hw.c
index 9cf9200b3d..a01b06446d 100644
--- a/usr/src/uts/i86pc/io/vmm/amd/amdvi_hw.c
+++ b/usr/src/uts/i86pc/io/vmm/amd/amdvi_hw.c
@@ -1003,8 +1003,8 @@ amdvi_teardown_hw(struct amdvi_softc *softc)
dev = softc->dev;
- /*
- * Called after disable, h/w is stopped by now, free all the resources.
+ /*
+ * Called after disable, h/w is stopped by now, free all the resources.
*/
amdvi_free_evt_intr_res(dev);
@@ -1026,7 +1026,7 @@ amdvi_init(void)
}
if (!amdvi_enable_user && ivhd_count) {
printf("bhyve: Found %d AMD-Vi/IOMMU device(s), "
- "use hw.vmm.amdvi.enable=1 to enable pass-through.\n",
+ "use hw.vmm.amdvi.enable=1 to enable pass-through.\n",
ivhd_count);
return (EINVAL);
}
@@ -1304,7 +1304,7 @@ amdvi_set_dte(struct amdvi_domain *domain, uint16_t devid, bool enable)
struct amdvi_dte* temp;
KASSERT(domain, ("domain is NULL for pci_rid:0x%x\n", devid));
-
+
softc = amdvi_find_iommu(devid);
KASSERT(softc, ("softc is NULL for pci_rid:0x%x\n", devid));
@@ -1397,11 +1397,11 @@ amdvi_enable(void)
ctrl = softc->ctrl;
KASSERT(ctrl, ("ctrl is NULL\n"));
- val = ( AMDVI_CTRL_EN |
- AMDVI_CTRL_CMD |
- AMDVI_CTRL_ELOG |
- AMDVI_CTRL_ELOGINT |
- AMDVI_CTRL_INV_TO_1S);
+ val = ( AMDVI_CTRL_EN |
+ AMDVI_CTRL_CMD |
+ AMDVI_CTRL_ELOG |
+ AMDVI_CTRL_ELOGINT |
+ AMDVI_CTRL_INV_TO_1S);
if (softc->ivhd_flag & IVHD_FLAG_COH)
val |= AMDVI_CTRL_COH;
diff --git a/usr/src/uts/i86pc/io/vmm/amd/amdvi_priv.h b/usr/src/uts/i86pc/io/vmm/amd/amdvi_priv.h
index 2db6914f08..5d47142a72 100644
--- a/usr/src/uts/i86pc/io/vmm/amd/amdvi_priv.h
+++ b/usr/src/uts/i86pc/io/vmm/amd/amdvi_priv.h
@@ -35,7 +35,7 @@
#define BIT(n) (1ULL << (n))
/* Return value of bits[n:m] where n and (n >= ) m are bit positions. */
-#define REG_BITS(x, n, m) (((x) >> (m)) & \
+#define REG_BITS(x, n, m) (((x) >> (m)) & \
((1 << (((n) - (m)) + 1)) - 1))
/*
@@ -111,10 +111,10 @@ CTASSERT(sizeof(struct amdvi_dte) == 32);
* IOMMU command entry.
*/
struct amdvi_cmd {
- uint32_t word0;
- uint32_t word1:28;
+ uint32_t word0;
+ uint32_t word1:28;
uint8_t opcode:4;
- uint64_t addr;
+ uint64_t addr;
} __attribute__((__packed__));
/* Command opcodes. */
@@ -150,12 +150,12 @@ struct amdvi_cmd {
* IOMMU event entry.
*/
struct amdvi_event {
- uint16_t devid;
- uint16_t pasid_hi;
- uint16_t pasid_domid; /* PASID low or DomainID */
- uint16_t flag:12;
+ uint16_t devid;
+ uint16_t pasid_hi;
+ uint16_t pasid_domid; /* PASID low or DomainID */
+ uint16_t flag:12;
uint8_t opcode:4;
- uint64_t addr;
+ uint64_t addr;
} __attribute__((__packed__));
CTASSERT(sizeof(struct amdvi_event) == 16);
@@ -210,8 +210,8 @@ struct amdvi_ctrl {
uint64_t limit:40;
uint16_t :12;
} excl;
- /*
- * Revision 2 only.
+ /*
+ * Revision 2 only.
*/
uint64_t ex_feature;
struct {
@@ -252,8 +252,8 @@ CTASSERT(offsetof(struct amdvi_ctrl, pad2)== 0x2028);
CTASSERT(offsetof(struct amdvi_ctrl, pad3)== 0x2040);
#define AMDVI_MMIO_V1_SIZE (4 * PAGE_SIZE) /* v1 size */
-/*
- * AMF IOMMU v2 size including event counters
+/*
+ * AMF IOMMU v2 size including event counters
*/
#define AMDVI_MMIO_V2_SIZE (8 * PAGE_SIZE)
@@ -374,38 +374,38 @@ enum IvrsType
*/
struct amdvi_softc {
struct amdvi_ctrl *ctrl; /* Control area. */
- device_t dev; /* IOMMU device. */
+ device_t dev; /* IOMMU device. */
enum IvrsType ivhd_type; /* IOMMU IVHD type. */
bool iotlb; /* IOTLB supported by IOMMU */
struct amdvi_cmd *cmd; /* Command descriptor area. */
- int cmd_max; /* Max number of commands. */
+ int cmd_max; /* Max number of commands. */
uint64_t cmp_data; /* Command completion write back. */
struct amdvi_event *event; /* Event descriptor area. */
struct resource *event_res; /* Event interrupt resource. */
- void *event_tag; /* Event interrupt tag. */
+ void *event_tag; /* Event interrupt tag. */
int event_max; /* Max number of events. */
int event_irq;
int event_rid;
/* ACPI various flags. */
- uint32_t ivhd_flag; /* ACPI IVHD flag. */
- uint32_t ivhd_feature; /* ACPI v1 Reserved or v2 attribute. */
- uint64_t ext_feature; /* IVHD EFR */
+ uint32_t ivhd_flag; /* ACPI IVHD flag. */
+ uint32_t ivhd_feature; /* ACPI v1 Reserved or v2 attribute. */
+ uint64_t ext_feature; /* IVHD EFR */
/* PCI related. */
- uint16_t cap_off; /* PCI Capability offset. */
+ uint16_t cap_off; /* PCI Capability offset. */
uint8_t pci_cap; /* PCI capability. */
- uint16_t pci_seg; /* IOMMU PCI domain/segment. */
- uint16_t pci_rid; /* PCI BDF of IOMMU */
+ uint16_t pci_seg; /* IOMMU PCI domain/segment. */
+ uint16_t pci_rid; /* PCI BDF of IOMMU */
/* Device range under this IOMMU. */
- uint16_t start_dev_rid; /* First device under this IOMMU. */
- uint16_t end_dev_rid; /* Last device under this IOMMU. */
+ uint16_t start_dev_rid; /* First device under this IOMMU. */
+ uint16_t end_dev_rid; /* Last device under this IOMMU. */
/* BIOS provided device configuration for end points. */
- struct ivhd_dev_cfg dev_cfg[10];
+ struct ivhd_dev_cfg dev_cfg[10];
int dev_cfg_cnt;
/* Software statistics. */
- uint64_t event_intr_cnt; /* Total event INTR count. */
- uint64_t total_cmd; /* Total number of commands. */
+ uint64_t event_intr_cnt; /* Total event INTR count. */
+ uint64_t total_cmd; /* Total number of commands. */
};
int amdvi_setup_hw(struct amdvi_softc *softc);
diff --git a/usr/src/uts/i86pc/io/vmm/amd/ivrs_drv.c b/usr/src/uts/i86pc/io/vmm/amd/ivrs_drv.c
index b754058c07..11925582ef 100644
--- a/usr/src/uts/i86pc/io/vmm/amd/ivrs_drv.c
+++ b/usr/src/uts/i86pc/io/vmm/amd/ivrs_drv.c
@@ -50,11 +50,11 @@ __FBSDID("$FreeBSD$");
device_t *ivhd_devs; /* IVHD or AMD-Vi device list. */
int ivhd_count; /* Number of IVHD header. */
-/*
+/*
* Cached IVHD header list.
* Single entry for each IVHD, filtered the legacy one.
*/
-ACPI_IVRS_HARDWARE1 *ivhd_hdrs[10];
+ACPI_IVRS_HARDWARE1 *ivhd_hdrs[10];
extern int amdvi_ptp_level; /* Page table levels. */
@@ -218,7 +218,7 @@ ivhd_dev_parse(ACPI_IVRS_HARDWARE1 *ivhd, struct amdvi_softc *softc)
break;
default:
- device_printf(softc->dev,
+ device_printf(softc->dev,
"unknown type: 0x%x\n", ivhd->Header.Type);
return (-1);
}
@@ -346,7 +346,7 @@ ivhd_identify(driver_t *driver, device_t parent)
ivrs_ivinfo = ivrs->Info;
printf("AMD-Vi: IVRS Info VAsize = %d PAsize = %d GVAsize = %d"
" flags:%b\n",
- REG_BITS(ivrs_ivinfo, 21, 15), REG_BITS(ivrs_ivinfo, 14, 8),
+ REG_BITS(ivrs_ivinfo, 21, 15), REG_BITS(ivrs_ivinfo, 14, 8),
REG_BITS(ivrs_ivinfo, 7, 5), REG_BITS(ivrs_ivinfo, 22, 22),
"\020\001EFRSup");
@@ -360,17 +360,17 @@ ivhd_identify(driver_t *driver, device_t parent)
ivhd_hdrs[i] = ivhd;
}
- /*
+ /*
* Scan for presence of legacy and non-legacy device type
* for same AMD-Vi device and override the old one.
*/
for (i = ivhd_count - 1 ; i > 0 ; i--){
- if (ivhd_is_newer(&ivhd_hdrs[i-1]->Header,
+ if (ivhd_is_newer(&ivhd_hdrs[i-1]->Header,
&ivhd_hdrs[i]->Header)) {
ivhd_hdrs[i-1] = ivhd_hdrs[i];
ivhd_count--;
}
- }
+ }
ivhd_devs = malloc(sizeof(device_t) * ivhd_count, M_DEVBUF,
M_WAITOK | M_ZERO);
@@ -415,7 +415,7 @@ ivhd_probe(device_t dev)
return (ENXIO);
unit = device_get_unit(dev);
- KASSERT((unit < ivhd_count),
+ KASSERT((unit < ivhd_count),
("ivhd unit %d > count %d", unit, ivhd_count));
ivhd = ivhd_hdrs[unit];
KASSERT(ivhd, ("ivhd is NULL"));
@@ -424,7 +424,7 @@ ivhd_probe(device_t dev)
case IVRS_TYPE_HARDWARE_EFR:
device_set_desc(dev, "AMD-Vi/IOMMU ivhd with EFR");
break;
-
+
case IVRS_TYPE_HARDWARE_MIXED:
device_set_desc(dev, "AMD-Vi/IOMMU ivhd in mixed format");
break;
@@ -482,7 +482,7 @@ ivhd_print_flag(device_t dev, enum IvrsType ivhd_type, uint8_t flag)
* Feature in legacy IVHD type(0x10) and attribute in newer type(0x11 and 0x40).
*/
static void
-ivhd_print_feature(device_t dev, enum IvrsType ivhd_type, uint32_t feature)
+ivhd_print_feature(device_t dev, enum IvrsType ivhd_type, uint32_t feature)
{
switch (ivhd_type) {
case IVRS_TYPE_HARDWARE_LEGACY:
@@ -576,9 +576,9 @@ ivhd_print_ext_feature(device_t dev, uint64_t ext_feature)
"\020AttrFWSup"
"\021HDSup"
"\023InvIotlbSup",
- REG_BITS(ext_high, 5, 0),
- REG_BITS(ext_high, 8, 7),
- REG_BITS(ext_high, 11, 10));
+ REG_BITS(ext_high, 5, 0),
+ REG_BITS(ext_high, 8, 7),
+ REG_BITS(ext_high, 11, 10));
}
static int
@@ -588,7 +588,7 @@ ivhd_print_cap(struct amdvi_softc *softc, ACPI_IVRS_HARDWARE1 * ivhd)
int max_ptp_level;
dev = softc->dev;
-
+
ivhd_print_flag(dev, softc->ivhd_type, softc->ivhd_flag);
ivhd_print_feature(dev, softc->ivhd_type, softc->ivhd_feature);
ivhd_print_ext_feature(dev, softc->ext_feature);
@@ -600,7 +600,7 @@ ivhd_print_cap(struct amdvi_softc *softc, ACPI_IVRS_HARDWARE1 * ivhd)
return (EINVAL);
} else {
device_printf(softc->dev, "supported paging level:%d, will use only: %d\n",
- max_ptp_level, amdvi_ptp_level);
+ max_ptp_level, amdvi_ptp_level);
}
device_printf(softc->dev, "device range: 0x%x - 0x%x\n",
@@ -618,7 +618,7 @@ ivhd_attach(device_t dev)
int status, unit;
unit = device_get_unit(dev);
- KASSERT((unit < ivhd_count),
+ KASSERT((unit < ivhd_count),
("ivhd unit %d > count %d", unit, ivhd_count));
/* Make sure its same device for which attach is called. */
KASSERT((ivhd_devs[unit] == dev),
@@ -633,12 +633,12 @@ ivhd_attach(device_t dev)
softc->pci_seg = ivhd->PciSegmentGroup;
softc->pci_rid = ivhd->Header.DeviceId;
softc->ivhd_flag = ivhd->Header.Flags;
- /*
+ /*
* On lgeacy IVHD type(0x10), it is documented as feature
* but in newer type it is attribute.
*/
softc->ivhd_feature = ivhd->FeatureReporting;
- /*
+ /*
* PCI capability has more capabilities that are not part of IVRS.
*/
softc->cap_off = ivhd->CapabilityOffset;
@@ -669,7 +669,7 @@ ivhd_attach(device_t dev)
status = amdvi_setup_hw(softc);
if (status != 0) {
- device_printf(dev, "couldn't be initialised, error=%d\n",
+ device_printf(dev, "couldn't be initialised, error=%d\n",
status);
return (status);
}
diff --git a/usr/src/uts/i86pc/io/vmm/amd/npt.c b/usr/src/uts/i86pc/io/vmm/amd/npt.c
index 862f6a0ecf..3f143a5d8f 100644
--- a/usr/src/uts/i86pc/io/vmm/amd/npt.c
+++ b/usr/src/uts/i86pc/io/vmm/amd/npt.c
@@ -61,28 +61,25 @@ svm_npt_init(int ipinum)
npt_flags = ipinum & NPT_IPIMASK;
TUNABLE_INT_FETCH("hw.vmm.npt.enable_superpage", &enable_superpage);
if (enable_superpage)
- npt_flags |= PMAP_PDE_SUPERPAGE;
-
+ npt_flags |= PMAP_PDE_SUPERPAGE;
+
return (0);
}
static int
npt_pinit(pmap_t pmap)
{
-
return (pmap_pinit_type(pmap, PT_RVI, npt_flags));
}
struct vmspace *
svm_npt_alloc(vm_offset_t min, vm_offset_t max)
{
-
return (vmspace_alloc(min, max, npt_pinit));
}
void
svm_npt_free(struct vmspace *vmspace)
{
-
vmspace_free(vmspace);
}
diff --git a/usr/src/uts/i86pc/io/vmm/amd/npt.h b/usr/src/uts/i86pc/io/vmm/amd/npt.h
index 35530d7833..d90a1b14b2 100644
--- a/usr/src/uts/i86pc/io/vmm/amd/npt.h
+++ b/usr/src/uts/i86pc/io/vmm/amd/npt.h
@@ -31,7 +31,7 @@
#ifndef _SVM_NPT_H_
#define _SVM_NPT_H_
-int svm_npt_init(int ipinum);
+int svm_npt_init(int ipinum);
struct vmspace *svm_npt_alloc(vm_offset_t min, vm_offset_t max);
void svm_npt_free(struct vmspace *vmspace);
diff --git a/usr/src/uts/i86pc/io/vmm/amd/svm.c b/usr/src/uts/i86pc/io/vmm/amd/svm.c
index 615d3cd029..1046a54126 100644
--- a/usr/src/uts/i86pc/io/vmm/amd/svm.c
+++ b/usr/src/uts/i86pc/io/vmm/amd/svm.c
@@ -68,7 +68,7 @@ __FBSDID("$FreeBSD$");
#include <machine/smp.h>
#include <machine/vmm.h>
#include <machine/vmm_dev.h>
-#include <machine/vmm_instruction_emul.h>
+#include <sys/vmm_instruction_emul.h>
#include "vmm_lapic.h"
#include "vmm_stat.h"
@@ -104,7 +104,7 @@ SYSCTL_NODE(_hw_vmm, OID_AUTO, svm, CTLFLAG_RW | CTLFLAG_MPSAFE, NULL,
#define AMD_CPUID_SVM_PAUSE_FTH BIT(12) /* Pause filter threshold */
#define AMD_CPUID_SVM_AVIC BIT(13) /* AVIC present */
-#define VMCB_CACHE_DEFAULT (VMCB_CACHE_ASID | \
+#define VMCB_CACHE_DEFAULT (VMCB_CACHE_ASID | \
VMCB_CACHE_IOPM | \
VMCB_CACHE_I | \
VMCB_CACHE_TPR | \
@@ -139,7 +139,7 @@ SYSCTL_UINT(_hw_vmm_svm, OID_AUTO, num_asids, CTLFLAG_RDTUN, &nasid, 0,
/* Current ASID generation for each host cpu */
static struct asid asid[MAXCPU];
-/*
+/*
* SVM host state saved area of size 4KB for each core.
*/
static uint8_t hsave[MAXCPU][PAGE_SIZE] __aligned(PAGE_SIZE);
@@ -154,14 +154,12 @@ static int svm_setreg(void *arg, int vcpu, int ident, uint64_t val);
static __inline int
flush_by_asid(void)
{
-
return (svm_feature & AMD_CPUID_SVM_FLUSH_BY_ASID);
}
static __inline int
decode_assist(void)
{
-
return (svm_feature & AMD_CPUID_SVM_DECODE_ASSIST);
}
@@ -307,7 +305,7 @@ svm_restore(void)
{
svm_enable(NULL);
-}
+}
#else /* __FreeBSD__ */
static int
svm_cleanup(void)
@@ -335,14 +333,14 @@ svm_restore(void)
#endif /* __FreeBSD__ */
/* Pentium compatible MSRs */
-#define MSR_PENTIUM_START 0
-#define MSR_PENTIUM_END 0x1FFF
+#define MSR_PENTIUM_START 0
+#define MSR_PENTIUM_END 0x1FFF
/* AMD 6th generation and Intel compatible MSRs */
-#define MSR_AMD6TH_START 0xC0000000UL
-#define MSR_AMD6TH_END 0xC0001FFFUL
+#define MSR_AMD6TH_START 0xC0000000UL
+#define MSR_AMD6TH_END 0xC0001FFFUL
/* AMD 7th and 8th generation compatible MSRs */
-#define MSR_AMD7TH_START 0xC0010000UL
-#define MSR_AMD7TH_END 0xC0011FFFUL
+#define MSR_AMD7TH_START 0xC0010000UL
+#define MSR_AMD7TH_END 0xC0011FFFUL
/*
* Get the index and bit position for a MSR in permission bitmap.
@@ -362,12 +360,12 @@ svm_msr_index(uint64_t msr, int *index, int *bit)
return (0);
}
- base += (MSR_PENTIUM_END - MSR_PENTIUM_START + 1);
+ base += (MSR_PENTIUM_END - MSR_PENTIUM_START + 1);
if (msr >= MSR_AMD6TH_START && msr <= MSR_AMD6TH_END) {
- off = (msr - MSR_AMD6TH_START);
+ off = (msr - MSR_AMD6TH_START);
*index = (off + base) / 4;
return (0);
- }
+ }
base += (MSR_AMD6TH_END - MSR_AMD6TH_START + 1);
if (msr >= MSR_AMD7TH_START && msr <= MSR_AMD7TH_END) {
@@ -717,61 +715,6 @@ svm_paging_mode(uint64_t cr0, uint64_t cr4, uint64_t efer)
/*
* ins/outs utility routines
*/
-static uint64_t
-svm_inout_str_index(struct svm_regctx *regs, int in)
-{
- uint64_t val;
-
- val = in ? regs->sctx_rdi : regs->sctx_rsi;
-
- return (val);
-}
-
-static uint64_t
-svm_inout_str_count(struct svm_regctx *regs, int rep)
-{
- uint64_t val;
-
- val = rep ? regs->sctx_rcx : 1;
-
- return (val);
-}
-
-static void
-svm_inout_str_seginfo(struct svm_softc *svm_sc, int vcpu, int64_t info1,
- int in, struct vm_inout_str *vis)
-{
- int error, s;
-
- if (in) {
- vis->seg_name = VM_REG_GUEST_ES;
- } else {
- /* The segment field has standard encoding */
- s = (info1 >> 10) & 0x7;
- vis->seg_name = vm_segment_name(s);
- }
-
- error = vmcb_getdesc(svm_sc, vcpu, vis->seg_name, &vis->seg_desc);
- KASSERT(error == 0, ("%s: svm_getdesc error %d", __func__, error));
-}
-
-static int
-svm_inout_str_addrsize(uint64_t info1)
-{
- uint32_t size;
-
- size = (info1 >> 7) & 0x7;
- switch (size) {
- case 1:
- return (2); /* 16 bit */
- case 2:
- return (4); /* 32 bit */
- case 4:
- return (8); /* 64 bit */
- default:
- panic("%s: invalid size encoding %d", __func__, size);
- }
-}
static void
svm_paging_info(struct vmcb *vmcb, struct vm_guest_paging *paging)
@@ -792,53 +735,78 @@ svm_paging_info(struct vmcb *vmcb, struct vm_guest_paging *paging)
* Handle guest I/O intercept.
*/
static int
-svm_handle_io(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit)
+svm_handle_inout(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit)
{
struct vmcb_ctrl *ctrl;
struct vmcb_state *state;
- struct svm_regctx *regs;
- struct vm_inout_str *vis;
+ struct vm_inout *inout;
+ struct vie *vie;
uint64_t info1;
- int inout_string;
+ struct vm_guest_paging paging;
state = svm_get_vmcb_state(svm_sc, vcpu);
- ctrl = svm_get_vmcb_ctrl(svm_sc, vcpu);
- regs = svm_get_guest_regctx(svm_sc, vcpu);
-
+ ctrl = svm_get_vmcb_ctrl(svm_sc, vcpu);
+ inout = &vmexit->u.inout;
info1 = ctrl->exitinfo1;
- inout_string = info1 & BIT(2) ? 1 : 0;
- /*
- * The effective segment number in EXITINFO1[12:10] is populated
- * only if the processor has the DecodeAssist capability.
- *
- * XXX this is not specified explicitly in APMv2 but can be verified
- * empirically.
- */
- if (inout_string && !decode_assist())
- return (UNHANDLED);
-
- vmexit->exitcode = VM_EXITCODE_INOUT;
- vmexit->u.inout.in = (info1 & BIT(0)) ? 1 : 0;
- vmexit->u.inout.string = inout_string;
- vmexit->u.inout.rep = (info1 & BIT(3)) ? 1 : 0;
- vmexit->u.inout.bytes = (info1 >> 4) & 0x7;
- vmexit->u.inout.port = (uint16_t)(info1 >> 16);
- vmexit->u.inout.eax = (uint32_t)(state->rax);
-
- if (inout_string) {
- vmexit->exitcode = VM_EXITCODE_INOUT_STR;
- vis = &vmexit->u.inout_str;
- svm_paging_info(svm_get_vmcb(svm_sc, vcpu), &vis->paging);
- vis->rflags = state->rflags;
- vis->cr0 = state->cr0;
- vis->index = svm_inout_str_index(regs, vmexit->u.inout.in);
- vis->count = svm_inout_str_count(regs, vmexit->u.inout.rep);
- vis->addrsize = svm_inout_str_addrsize(info1);
- svm_inout_str_seginfo(svm_sc, vcpu, info1,
- vmexit->u.inout.in, vis);
+ inout->bytes = (info1 >> 4) & 0x7;
+ inout->flags = 0;
+ inout->flags |= (info1 & BIT(0)) ? INOUT_IN : 0;
+ inout->flags |= (info1 & BIT(3)) ? INOUT_REP : 0;
+ inout->flags |= (info1 & BIT(2)) ? INOUT_STR : 0;
+ inout->port = (uint16_t)(info1 >> 16);
+ inout->eax = (uint32_t)(state->rax);
+
+ if ((inout->flags & INOUT_STR) != 0) {
+ /*
+ * The effective segment number in EXITINFO1[12:10] is populated
+ * only if the processor has the DecodeAssist capability.
+ *
+ * This is not specified explicitly in APMv2 but can be verified
+ * empirically.
+ */
+ if (!decode_assist()) {
+ /*
+ * Without decoding assistance, force the task of
+ * emulating the ins/outs on userspace.
+ */
+ vmexit->exitcode = VM_EXITCODE_INST_EMUL;
+ bzero(&vmexit->u.inst_emul,
+ sizeof (vmexit->u.inst_emul));
+ return (UNHANDLED);
+ }
+
+ /*
+ * Bits 7-9 encode the address size of ins/outs operations where
+ * the 1/2/4 values correspond to 16/32/64 bit sizes.
+ */
+ inout->addrsize = 2 * ((info1 >> 7) & 0x7);
+ VERIFY(inout->addrsize == 2 || inout->addrsize == 4 ||
+ inout->addrsize == 8);
+
+ if (inout->flags & INOUT_IN) {
+ /*
+ * For INS instructions, %es (encoded as 0) is the
+ * implied segment for the operation.
+ */
+ inout->segment = 0;
+ } else {
+ /*
+ * Bits 10-12 encode the segment for OUTS.
+ * This value follows the standard x86 segment order.
+ */
+ inout->segment = (info1 >> 10) & 0x7;
+ }
}
+ vmexit->exitcode = VM_EXITCODE_INOUT;
+ svm_paging_info(svm_get_vmcb(svm_sc, vcpu), &paging);
+ vie = vm_vie_ctx(svm_sc->vm, vcpu);
+ vie_init_inout(vie, inout, vmexit->inst_length, &paging);
+
+ /* The in/out emulation will handle advancing %rip */
+ vmexit->inst_length = 0;
+
return (UNHANDLED);
}
@@ -857,7 +825,6 @@ npf_fault_type(uint64_t exitinfo1)
static bool
svm_npf_emul_fault(uint64_t exitinfo1)
{
-
if (exitinfo1 & VMCB_NPF_INFO1_ID) {
return (false);
}
@@ -870,48 +837,52 @@ svm_npf_emul_fault(uint64_t exitinfo1)
return (false);
}
- return (true);
+ return (true);
}
static void
-svm_handle_inst_emul(struct vmcb *vmcb, uint64_t gpa, struct vm_exit *vmexit)
+svm_handle_mmio_emul(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit,
+ uint64_t gpa)
{
- struct vm_guest_paging *paging;
- struct vmcb_segment seg;
struct vmcb_ctrl *ctrl;
- char *inst_bytes;
- int error, inst_len;
+ struct vmcb *vmcb;
+ struct vie *vie;
+ struct vm_guest_paging paging;
+ struct vmcb_segment seg;
+ char *inst_bytes = NULL;
+ uint8_t inst_len = 0;
+ int error;
+ vmcb = svm_get_vmcb(svm_sc, vcpu);
ctrl = &vmcb->ctrl;
- paging = &vmexit->u.inst_emul.paging;
- vmexit->exitcode = VM_EXITCODE_INST_EMUL;
- vmexit->u.inst_emul.gpa = gpa;
- vmexit->u.inst_emul.gla = VIE_INVALID_GLA;
- svm_paging_info(vmcb, paging);
+ vmexit->exitcode = VM_EXITCODE_MMIO_EMUL;
+ vmexit->u.mmio_emul.gpa = gpa;
+ vmexit->u.mmio_emul.gla = VIE_INVALID_GLA;
+ svm_paging_info(vmcb, &paging);
error = vmcb_seg(vmcb, VM_REG_GUEST_CS, &seg);
KASSERT(error == 0, ("%s: vmcb_seg(CS) error %d", __func__, error));
- switch(paging->cpu_mode) {
+ switch (paging.cpu_mode) {
case CPU_MODE_REAL:
- vmexit->u.inst_emul.cs_base = seg.base;
- vmexit->u.inst_emul.cs_d = 0;
+ vmexit->u.mmio_emul.cs_base = seg.base;
+ vmexit->u.mmio_emul.cs_d = 0;
break;
case CPU_MODE_PROTECTED:
case CPU_MODE_COMPATIBILITY:
- vmexit->u.inst_emul.cs_base = seg.base;
+ vmexit->u.mmio_emul.cs_base = seg.base;
/*
* Section 4.8.1 of APM2, Default Operand Size or D bit.
*/
- vmexit->u.inst_emul.cs_d = (seg.attrib & VMCB_CS_ATTRIB_D) ?
+ vmexit->u.mmio_emul.cs_d = (seg.attrib & VMCB_CS_ATTRIB_D) ?
1 : 0;
break;
default:
- vmexit->u.inst_emul.cs_base = 0;
- vmexit->u.inst_emul.cs_d = 0;
- break;
+ vmexit->u.mmio_emul.cs_base = 0;
+ vmexit->u.mmio_emul.cs_d = 0;
+ break;
}
/*
@@ -920,11 +891,9 @@ svm_handle_inst_emul(struct vmcb *vmcb, uint64_t gpa, struct vm_exit *vmexit)
if (decode_assist() && !disable_npf_assist) {
inst_len = ctrl->inst_len;
inst_bytes = (char *)ctrl->inst_bytes;
- } else {
- inst_len = 0;
- inst_bytes = NULL;
}
- vie_init(&vmexit->u.inst_emul.vie, inst_bytes, inst_len);
+ vie = vm_vie_ctx(svm_sc->vm, vcpu);
+ vie_init_mmio(vie, inst_bytes, inst_len, &paging, gpa);
}
#ifdef KTR
@@ -1014,7 +983,7 @@ svm_save_intinfo(struct svm_softc *svm_sc, int vcpu)
uint64_t intinfo;
ctrl = svm_get_vmcb_ctrl(svm_sc, vcpu);
- intinfo = ctrl->exitintinfo;
+ intinfo = ctrl->exitintinfo;
if (!VMCB_EXITINTINFO_VALID(intinfo))
return;
@@ -1488,7 +1457,7 @@ svm_vmexit(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit)
eax = state->rax;
ecx = ctx->sctx_rcx;
edx = ctx->sctx_rdx;
- retu = false;
+ retu = false;
if (info1) {
vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_WRMSR, 1);
@@ -1520,7 +1489,7 @@ svm_vmexit(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit)
}
break;
case VMCB_EXIT_IO:
- handled = svm_handle_io(svm_sc, vcpu, vmexit);
+ handled = svm_handle_inout(svm_sc, vcpu, vmexit);
vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_INOUT, 1);
break;
case VMCB_EXIT_CPUID:
@@ -1552,9 +1521,9 @@ svm_vmexit(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit)
"on gpa %#lx/%#lx at rip %#lx",
info2, info1, state->rip);
} else if (svm_npf_emul_fault(info1)) {
- svm_handle_inst_emul(vmcb, info2, vmexit);
- vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_INST_EMUL, 1);
- VCPU_CTR3(svm_sc->vm, vcpu, "inst_emul fault "
+ svm_handle_mmio_emul(svm_sc, vcpu, vmexit, info2);
+ vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_MMIO_EMUL, 1);
+ VCPU_CTR3(svm_sc->vm, vcpu, "mmio_emul fault "
"for gpa %#lx/%#lx at rip %#lx",
info2, info1, state->rip);
}
@@ -1568,7 +1537,7 @@ svm_vmexit(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit)
default:
vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_UNKNOWN, 1);
break;
- }
+ }
VCPU_CTR4(svm_sc->vm, vcpu, "%s %s vmexit at %#lx/%d",
handled ? "handled" : "unhandled", exit_reason_to_str(code),
@@ -1999,7 +1968,7 @@ svm_dr_leave_guest(struct svm_regctx *gctx)
* Start vcpu with specified RIP.
*/
static int
-svm_vmrun(void *arg, int vcpu, register_t rip, pmap_t pmap,
+svm_vmrun(void *arg, int vcpu, register_t rip, pmap_t pmap,
struct vm_eventinfo *evinfo)
{
struct svm_regctx *gctx;
@@ -2153,7 +2122,7 @@ svm_vmrun(void *arg, int vcpu, register_t rip, pmap_t pmap,
/* Restore host LDTR. */
lldt(ldt_sel);
- /* #VMEXIT disables interrupts so re-enable them here. */
+ /* #VMEXIT disables interrupts so re-enable them here. */
enable_gintr();
/* Update 'nextrip' */
diff --git a/usr/src/uts/i86pc/io/vmm/amd/svm_msr.c b/usr/src/uts/i86pc/io/vmm/amd/svm_msr.c
index 75502d3c8e..ea344165dd 100644
--- a/usr/src/uts/i86pc/io/vmm/amd/svm_msr.c
+++ b/usr/src/uts/i86pc/io/vmm/amd/svm_msr.c
@@ -72,7 +72,7 @@ static uint64_t host_msrs[HOST_MSR_NUM];
void
svm_msr_init(void)
{
- /*
+ /*
* It is safe to cache the values of the following MSRs because they
* don't change based on curcpu, curproc or curthread.
*/
diff --git a/usr/src/uts/i86pc/io/vmm/amd/svm_softc.h b/usr/src/uts/i86pc/io/vmm/amd/svm_softc.h
index b5ac1903e7..0b996d0ab4 100644
--- a/usr/src/uts/i86pc/io/vmm/amd/svm_softc.h
+++ b/usr/src/uts/i86pc/io/vmm/amd/svm_softc.h
@@ -72,9 +72,9 @@ struct svm_vcpu {
struct svm_softc {
uint8_t apic_page[VM_MAXCPU][PAGE_SIZE];
struct svm_vcpu vcpu[VM_MAXCPU];
- vm_offset_t nptp; /* nested page table */
- uint8_t *iopm_bitmap; /* shared by all vcpus */
- uint8_t *msr_bitmap; /* shared by all vcpus */
+ vm_offset_t nptp; /* nested page table */
+ uint8_t *iopm_bitmap; /* shared by all vcpus */
+ uint8_t *msr_bitmap; /* shared by all vcpus */
struct vm *vm;
#ifndef __FreeBSD__
uint64_t host_msrs[VM_MAXCPU][SVM_HOST_MSR_NUM];
diff --git a/usr/src/uts/i86pc/io/vmm/intel/ept.c b/usr/src/uts/i86pc/io/vmm/intel/ept.c
index dcc4e3c330..5e5253780e 100644
--- a/usr/src/uts/i86pc/io/vmm/intel/ept.c
+++ b/usr/src/uts/i86pc/io/vmm/intel/ept.c
@@ -157,7 +157,7 @@ ept_dump(uint64_t *ptp, int nlevels)
if (ptpval == 0)
continue;
-
+
for (t = 0; t < tabs; t++)
printf("\t");
printf("%3d 0x%016lx\n", i, ptpval);
diff --git a/usr/src/uts/i86pc/io/vmm/intel/vmcs.c b/usr/src/uts/i86pc/io/vmm/intel/vmcs.c
index bb7ee45048..f1a08cc57d 100644
--- a/usr/src/uts/i86pc/io/vmm/intel/vmcs.c
+++ b/usr/src/uts/i86pc/io/vmm/intel/vmcs.c
@@ -388,7 +388,7 @@ vmcs_init(struct vmcs *vmcs)
cr0 = vmm_get_host_cr0();
if ((error = vmwrite(VMCS_HOST_CR0, cr0)) != 0)
goto done;
-
+
cr4 = vmm_get_host_cr4() | CR4_VMXE;
if ((error = vmwrite(VMCS_HOST_CR4, cr4)) != 0)
goto done;
diff --git a/usr/src/uts/i86pc/io/vmm/intel/vmx.c b/usr/src/uts/i86pc/io/vmm/intel/vmx.c
index 8469c99f33..c46560948e 100644
--- a/usr/src/uts/i86pc/io/vmm/intel/vmx.c
+++ b/usr/src/uts/i86pc/io/vmm/intel/vmx.c
@@ -77,7 +77,7 @@ __FBSDID("$FreeBSD$");
#include <machine/vmm.h>
#include <machine/vmm_dev.h>
-#include <machine/vmm_instruction_emul.h>
+#include <sys/vmm_instruction_emul.h>
#include "vmm_lapic.h"
#include "vmm_host.h"
#include "vmm_ioport.h"
@@ -117,7 +117,7 @@ __FBSDID("$FreeBSD$");
PROCBASED_CR8_STORE_EXITING)
#else
/* We consider TSC offset a necessity for unsynched TSC handling */
-#define PROCBASED_CTLS_ONE_SETTING \
+#define PROCBASED_CTLS_ONE_SETTING \
(PROCBASED_SECONDARY_CONTROLS | \
PROCBASED_TSC_OFFSET | \
PROCBASED_MWAIT_EXITING | \
@@ -1885,69 +1885,6 @@ vmx_paging_mode(void)
return (PAGING_MODE_PAE);
}
-static uint64_t
-inout_str_index(struct vmx *vmx, int vcpuid, int in)
-{
- uint64_t val;
- int error;
- enum vm_reg_name reg;
-
- reg = in ? VM_REG_GUEST_RDI : VM_REG_GUEST_RSI;
- error = vmx_getreg(vmx, vcpuid, reg, &val);
- KASSERT(error == 0, ("%s: vmx_getreg error %d", __func__, error));
- return (val);
-}
-
-static uint64_t
-inout_str_count(struct vmx *vmx, int vcpuid, int rep)
-{
- uint64_t val;
- int error;
-
- if (rep) {
- error = vmx_getreg(vmx, vcpuid, VM_REG_GUEST_RCX, &val);
- KASSERT(!error, ("%s: vmx_getreg error %d", __func__, error));
- } else {
- val = 1;
- }
- return (val);
-}
-
-static int
-inout_str_addrsize(uint32_t inst_info)
-{
- uint32_t size;
-
- size = (inst_info >> 7) & 0x7;
- switch (size) {
- case 0:
- return (2); /* 16 bit */
- case 1:
- return (4); /* 32 bit */
- case 2:
- return (8); /* 64 bit */
- default:
- panic("%s: invalid size encoding %d", __func__, size);
- }
-}
-
-static void
-inout_str_seginfo(struct vmx *vmx, int vcpuid, uint32_t inst_info, int in,
- struct vm_inout_str *vis)
-{
- int error, s;
-
- if (in) {
- vis->seg_name = VM_REG_GUEST_ES;
- } else {
- s = (inst_info >> 15) & 0x7;
- vis->seg_name = vm_segment_name(s);
- }
-
- error = vmx_getdesc(vmx, vcpuid, vis->seg_name, &vis->seg_desc);
- KASSERT(error == 0, ("%s: vmx_getdesc error %d", __func__, error));
-}
-
static void
vmx_paging_info(struct vm_guest_paging *paging)
{
@@ -1958,35 +1895,89 @@ vmx_paging_info(struct vm_guest_paging *paging)
}
static void
-vmexit_inst_emul(struct vm_exit *vmexit, uint64_t gpa, uint64_t gla)
+vmexit_mmio_emul(struct vm_exit *vmexit, struct vie *vie, uint64_t gpa,
+ uint64_t gla)
{
- struct vm_guest_paging *paging;
+ struct vm_guest_paging paging;
uint32_t csar;
- paging = &vmexit->u.inst_emul.paging;
-
- vmexit->exitcode = VM_EXITCODE_INST_EMUL;
+ vmexit->exitcode = VM_EXITCODE_MMIO_EMUL;
vmexit->inst_length = 0;
- vmexit->u.inst_emul.gpa = gpa;
- vmexit->u.inst_emul.gla = gla;
- vmx_paging_info(paging);
- switch (paging->cpu_mode) {
+ vmexit->u.mmio_emul.gpa = gpa;
+ vmexit->u.mmio_emul.gla = gla;
+ vmx_paging_info(&paging);
+
+ switch (paging.cpu_mode) {
case CPU_MODE_REAL:
- vmexit->u.inst_emul.cs_base = vmcs_read(VMCS_GUEST_CS_BASE);
- vmexit->u.inst_emul.cs_d = 0;
+ vmexit->u.mmio_emul.cs_base = vmcs_read(VMCS_GUEST_CS_BASE);
+ vmexit->u.mmio_emul.cs_d = 0;
break;
case CPU_MODE_PROTECTED:
case CPU_MODE_COMPATIBILITY:
- vmexit->u.inst_emul.cs_base = vmcs_read(VMCS_GUEST_CS_BASE);
+ vmexit->u.mmio_emul.cs_base = vmcs_read(VMCS_GUEST_CS_BASE);
csar = vmcs_read(VMCS_GUEST_CS_ACCESS_RIGHTS);
- vmexit->u.inst_emul.cs_d = SEG_DESC_DEF32(csar);
+ vmexit->u.mmio_emul.cs_d = SEG_DESC_DEF32(csar);
break;
default:
- vmexit->u.inst_emul.cs_base = 0;
- vmexit->u.inst_emul.cs_d = 0;
+ vmexit->u.mmio_emul.cs_base = 0;
+ vmexit->u.mmio_emul.cs_d = 0;
break;
}
- vie_init(&vmexit->u.inst_emul.vie, NULL, 0);
+
+ vie_init_mmio(vie, NULL, 0, &paging, gpa);
+}
+
+static void
+vmexit_inout(struct vm_exit *vmexit, struct vie *vie, uint64_t qual,
+ uint32_t eax)
+{
+ struct vm_guest_paging paging;
+ struct vm_inout *inout;
+
+ inout = &vmexit->u.inout;
+
+ inout->bytes = (qual & 0x7) + 1;
+ inout->flags = 0;
+ inout->flags |= (qual & 0x8) ? INOUT_IN : 0;
+ inout->flags |= (qual & 0x10) ? INOUT_STR : 0;
+ inout->flags |= (qual & 0x20) ? INOUT_REP : 0;
+ inout->port = (uint16_t)(qual >> 16);
+ inout->eax = eax;
+ if (inout->flags & INOUT_STR) {
+ uint64_t inst_info;
+
+ inst_info = vmcs_read(VMCS_EXIT_INSTRUCTION_INFO);
+
+ /*
+ * Bits 7-9 encode the address size of ins/outs operations where
+ * the 0/1/2 values correspond to 16/32/64 bit sizes.
+ */
+ inout->addrsize = 2 << (1 + ((inst_info >> 7) & 0x3));
+ VERIFY(inout->addrsize == 2 || inout->addrsize == 4 ||
+ inout->addrsize == 8);
+
+ if (inout->flags & INOUT_IN) {
+ /*
+ * The bits describing the segment in INSTRUCTION_INFO
+ * are not defined for ins, leaving it to system
+ * software to assume %es (encoded as 0)
+ */
+ inout->segment = 0;
+ } else {
+ /*
+ * Bits 15-17 encode the segment for OUTS.
+ * This value follows the standard x86 segment order.
+ */
+ inout->segment = (inst_info >> 15) & 0x7;
+ }
+ }
+
+ vmexit->exitcode = VM_EXITCODE_INOUT;
+ vmx_paging_info(&paging);
+ vie_init_inout(vie, inout, vmexit->inst_length, &paging);
+
+ /* The in/out emulation will handle advancing %rip */
+ vmexit->inst_length = 0;
}
static int
@@ -2134,6 +2125,7 @@ vmx_handle_apic_access(struct vmx *vmx, int vcpuid, struct vm_exit *vmexit)
{
uint64_t qual;
int access_type, offset, allowed;
+ struct vie *vie;
if (!apic_access_virtualization(vmx, vcpuid))
return (UNHANDLED);
@@ -2180,7 +2172,8 @@ vmx_handle_apic_access(struct vmx *vmx, int vcpuid, struct vm_exit *vmexit)
}
if (allowed) {
- vmexit_inst_emul(vmexit, DEFAULT_APIC_BASE + offset,
+ vie = vm_vie_ctx(vmx->vm, vcpuid);
+ vmexit_mmio_emul(vmexit, vie, DEFAULT_APIC_BASE + offset,
VIE_INVALID_GLA);
}
@@ -2262,10 +2255,10 @@ emulate_rdmsr(struct vmx *vmx, int vcpuid, u_int num, bool *retu)
static int
vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit)
{
- int error, errcode, errcode_valid, handled, in;
+ int error, errcode, errcode_valid, handled;
struct vmxctx *vmxctx;
+ struct vie *vie;
struct vlapic *vlapic;
- struct vm_inout_str *vis;
struct vm_task_switch *ts;
uint32_t eax, ecx, edx, idtvec_info, idtvec_err, intr_info, inst_info;
uint32_t intr_type, intr_vec, reason;
@@ -2522,25 +2515,8 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit)
return (1);
case EXIT_REASON_INOUT:
vmm_stat_incr(vmx->vm, vcpu, VMEXIT_INOUT, 1);
- vmexit->exitcode = VM_EXITCODE_INOUT;
- vmexit->u.inout.bytes = (qual & 0x7) + 1;
- vmexit->u.inout.in = in = (qual & 0x8) ? 1 : 0;
- vmexit->u.inout.string = (qual & 0x10) ? 1 : 0;
- vmexit->u.inout.rep = (qual & 0x20) ? 1 : 0;
- vmexit->u.inout.port = (uint16_t)(qual >> 16);
- vmexit->u.inout.eax = (uint32_t)(vmxctx->guest_rax);
- if (vmexit->u.inout.string) {
- inst_info = vmcs_read(VMCS_EXIT_INSTRUCTION_INFO);
- vmexit->exitcode = VM_EXITCODE_INOUT_STR;
- vis = &vmexit->u.inout_str;
- vmx_paging_info(&vis->paging);
- vis->rflags = vmcs_read(VMCS_GUEST_RFLAGS);
- vis->cr0 = vmcs_read(VMCS_GUEST_CR0);
- vis->index = inout_str_index(vmx, vcpu, in);
- vis->count = inout_str_count(vmx, vcpu, vis->inout.rep);
- vis->addrsize = inout_str_addrsize(inst_info);
- inout_str_seginfo(vmx, vcpu, inst_info, in, vis);
- }
+ vie = vm_vie_ctx(vmx->vm, vcpu);
+ vmexit_inout(vmexit, vie, qual, (uint32_t)vmxctx->guest_rax);
SDT_PROBE3(vmm, vmx, exit, inout, vmx, vcpu, vmexit);
break;
case EXIT_REASON_CPUID:
@@ -2651,8 +2627,9 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit)
SDT_PROBE5(vmm, vmx, exit, nestedfault,
vmx, vcpu, vmexit, gpa, qual);
} else if (ept_emulation_fault(qual)) {
- vmexit_inst_emul(vmexit, gpa, vmcs_gla());
- vmm_stat_incr(vmx->vm, vcpu, VMEXIT_INST_EMUL, 1);
+ vie = vm_vie_ctx(vmx->vm, vcpu);
+ vmexit_mmio_emul(vmexit, vie, gpa, vmcs_gla());
+ vmm_stat_incr(vmx->vm, vcpu, VMEXIT_MMIO_EMUL, 1);
SDT_PROBE4(vmm, vmx, exit, mmiofault,
vmx, vcpu, vmexit, gpa);
}
diff --git a/usr/src/uts/i86pc/io/vmm/intel/vmx_msr.c b/usr/src/uts/i86pc/io/vmm/intel/vmx_msr.c
index 9121e46b40..6c37c9c234 100644
--- a/usr/src/uts/i86pc/io/vmm/intel/vmx_msr.c
+++ b/usr/src/uts/i86pc/io/vmm/intel/vmx_msr.c
@@ -451,7 +451,7 @@ vmx_wrmsr(struct vmx *vmx, int vcpuid, u_int num, uint64_t val, bool *retu)
uint64_t *guest_msrs;
uint64_t changed;
int error;
-
+
guest_msrs = vmx->guest_msrs[vcpuid];
error = 0;
diff --git a/usr/src/uts/i86pc/io/vmm/intel/vtd.c b/usr/src/uts/i86pc/io/vmm/intel/vtd.c
index 41c2c5b2f8..50c0934ace 100644
--- a/usr/src/uts/i86pc/io/vmm/intel/vtd.c
+++ b/usr/src/uts/i86pc/io/vmm/intel/vtd.c
@@ -176,7 +176,7 @@ domain_id(void)
if (dom == NULL)
break; /* found it */
}
-
+
if (id >= max_domains)
panic("domain ids exhausted");
@@ -279,7 +279,7 @@ vtd_iotlb_global_invalidate(struct vtdmap *vtdmap)
offset = VTD_ECAP_IRO(vtdmap->ext_cap) * 16;
iotlb_reg = (volatile uint64_t *)((caddr_t)vtdmap + offset + 8);
-
+
*iotlb_reg = VTD_IIR_IVT | VTD_IIR_IIRG_GLOBAL |
VTD_IIR_DRAIN_READS | VTD_IIR_DRAIN_WRITES;
@@ -851,7 +851,7 @@ static void
vtd_destroy_domain(void *arg)
{
struct domain *dom;
-
+
dom = arg;
SLIST_REMOVE(&domhead, dom, domain, next);
diff --git a/usr/src/uts/i86pc/io/vmm/io/ppt.c b/usr/src/uts/i86pc/io/vmm/io/ppt.c
index a71ce86c2d..2f715bcc42 100644
--- a/usr/src/uts/i86pc/io/vmm/io/ppt.c
+++ b/usr/src/uts/i86pc/io/vmm/io/ppt.c
@@ -1321,7 +1321,7 @@ ppt_setup_msix(struct vm *vm, int vcpu, int pptfd, int idx, uint64_t addr,
/*
* First-time configuration:
- * Allocate the MSI-X table
+ * Allocate the MSI-X table
* Allocate the IRQ resources
* Set up some variables in ppt->msix
*/
diff --git a/usr/src/uts/i86pc/io/vmm/io/vatpic.c b/usr/src/uts/i86pc/io/vmm/io/vatpic.c
index ba4cd7785e..b81259647c 100644
--- a/usr/src/uts/i86pc/io/vmm/io/vatpic.c
+++ b/usr/src/uts/i86pc/io/vmm/io/vatpic.c
@@ -709,8 +709,8 @@ vatpic_write(struct vatpic *vatpic, struct atpic *atpic, bool in, int port,
}
int
-vatpic_master_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes,
- uint32_t *eax)
+vatpic_master_handler(struct vm *vm, int vcpuid, bool in, uint16_t port,
+ uint8_t bytes, uint32_t *eax)
{
struct vatpic *vatpic;
struct atpic *atpic;
@@ -720,17 +720,17 @@ vatpic_master_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes,
if (bytes != 1)
return (-1);
-
+
if (in) {
return (vatpic_read(vatpic, atpic, in, port, bytes, eax));
}
-
+
return (vatpic_write(vatpic, atpic, in, port, bytes, eax));
}
int
-vatpic_slave_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes,
- uint32_t *eax)
+vatpic_slave_handler(struct vm *vm, int vcpuid, bool in, uint16_t port,
+ uint8_t bytes, uint32_t *eax)
{
struct vatpic *vatpic;
struct atpic *atpic;
@@ -749,8 +749,8 @@ vatpic_slave_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes,
}
int
-vatpic_elc_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes,
- uint32_t *eax)
+vatpic_elc_handler(struct vm *vm, int vcpuid, bool in, uint16_t port,
+ uint8_t bytes, uint32_t *eax)
{
struct vatpic *vatpic;
bool is_master;
diff --git a/usr/src/uts/i86pc/io/vmm/io/vatpic.h b/usr/src/uts/i86pc/io/vmm/io/vatpic.h
index d4a1be1820..dcb8ea6c6f 100644
--- a/usr/src/uts/i86pc/io/vmm/io/vatpic.h
+++ b/usr/src/uts/i86pc/io/vmm/io/vatpic.h
@@ -39,12 +39,12 @@
struct vatpic *vatpic_init(struct vm *vm);
void vatpic_cleanup(struct vatpic *vatpic);
-int vatpic_master_handler(struct vm *vm, int vcpuid, bool in, int port,
- int bytes, uint32_t *eax);
-int vatpic_slave_handler(struct vm *vm, int vcpuid, bool in, int port,
- int bytes, uint32_t *eax);
-int vatpic_elc_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes,
- uint32_t *eax);
+int vatpic_master_handler(struct vm *vm, int vcpuid, bool in, uint16_t port,
+ uint8_t bytes, uint32_t *eax);
+int vatpic_slave_handler(struct vm *vm, int vcpuid, bool in, uint16_t port,
+ uint8_t bytes, uint32_t *eax);
+int vatpic_elc_handler(struct vm *vm, int vcpuid, bool in, uint16_t port,
+ uint8_t bytes, uint32_t *eax);
int vatpic_assert_irq(struct vm *vm, int irq);
int vatpic_deassert_irq(struct vm *vm, int irq);
diff --git a/usr/src/uts/i86pc/io/vmm/io/vatpit.c b/usr/src/uts/i86pc/io/vmm/io/vatpit.c
index 03f63798e7..47cb40f9bd 100644
--- a/usr/src/uts/i86pc/io/vmm/io/vatpit.c
+++ b/usr/src/uts/i86pc/io/vmm/io/vatpit.c
@@ -336,7 +336,7 @@ vatpit_update_mode(struct vatpit *vatpit, uint8_t val)
}
int
-vatpit_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes,
+vatpit_handler(struct vm *vm, int vcpuid, bool in, uint16_t port, uint8_t bytes,
uint32_t *eax)
{
struct vatpit *vatpit;
@@ -419,8 +419,8 @@ vatpit_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes,
}
int
-vatpit_nmisc_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes,
- uint32_t *eax)
+vatpit_nmisc_handler(struct vm *vm, int vcpuid, bool in, uint16_t port,
+ uint8_t bytes, uint32_t *eax)
{
struct vatpit *vatpit;
diff --git a/usr/src/uts/i86pc/io/vmm/io/vatpit.h b/usr/src/uts/i86pc/io/vmm/io/vatpit.h
index 4bf9fe048d..512ce20735 100644
--- a/usr/src/uts/i86pc/io/vmm/io/vatpit.h
+++ b/usr/src/uts/i86pc/io/vmm/io/vatpit.h
@@ -39,10 +39,10 @@
struct vatpit *vatpit_init(struct vm *vm);
void vatpit_cleanup(struct vatpit *vatpit);
-int vatpit_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes,
- uint32_t *eax);
-int vatpit_nmisc_handler(struct vm *vm, int vcpuid, bool in, int port,
- int bytes, uint32_t *eax);
+int vatpit_handler(struct vm *vm, int vcpuid, bool in, uint16_t port,
+ uint8_t bytes, uint32_t *eax);
+int vatpit_nmisc_handler(struct vm *vm, int vcpuid, bool in, uint16_t port,
+ uint8_t bytes, uint32_t *eax);
#ifndef __FreeBSD__
void vatpit_localize_resources(struct vatpit *);
diff --git a/usr/src/uts/i86pc/io/vmm/io/vhpet.c b/usr/src/uts/i86pc/io/vmm/io/vhpet.c
index c82b4626bd..29e9188b77 100644
--- a/usr/src/uts/i86pc/io/vmm/io/vhpet.c
+++ b/usr/src/uts/i86pc/io/vmm/io/vhpet.c
@@ -61,10 +61,10 @@ static MALLOC_DEFINE(M_VHPET, "vhpet", "bhyve virtual hpet");
#define FS_PER_S 1000000000000000ul
/* Timer N Configuration and Capabilities Register */
-#define HPET_TCAP_RO_MASK (HPET_TCAP_INT_ROUTE | \
- HPET_TCAP_FSB_INT_DEL | \
- HPET_TCAP_SIZE | \
- HPET_TCAP_PER_INT)
+#define HPET_TCAP_RO_MASK (HPET_TCAP_INT_ROUTE | \
+ HPET_TCAP_FSB_INT_DEL | \
+ HPET_TCAP_SIZE | \
+ HPET_TCAP_PER_INT)
/*
* HPET requires at least 3 timers and up to 32 timers per block.
*/
@@ -242,7 +242,7 @@ vhpet_timer_interrupt(struct vhpet *vhpet, int n)
lapic_intr_msi(vhpet->vm, vhpet->timer[n].msireg >> 32,
vhpet->timer[n].msireg & 0xffffffff);
return;
- }
+ }
pin = vhpet_timer_ioapic_pin(vhpet, n);
if (pin == 0) {
@@ -504,7 +504,7 @@ vhpet_mmio_write(void *vm, int vcpuid, uint64_t gpa, uint64_t val, int size,
if ((offset & 0x4) != 0) {
mask <<= 32;
data <<= 32;
- }
+ }
break;
default:
VM_CTR2(vhpet->vm, "hpet invalid mmio write: "
@@ -658,7 +658,7 @@ vhpet_mmio_read(void *vm, int vcpuid, uint64_t gpa, uint64_t *rval, int size,
if (offset == HPET_CAPABILITIES || offset == HPET_CAPABILITIES + 4) {
data = vhpet_capabilities();
- goto done;
+ goto done;
}
if (offset == HPET_CONFIG || offset == HPET_CONFIG + 4) {
diff --git a/usr/src/uts/i86pc/io/vmm/io/vhpet.h b/usr/src/uts/i86pc/io/vmm/io/vhpet.h
index 8e28241b32..e6ded31a66 100644
--- a/usr/src/uts/i86pc/io/vmm/io/vhpet.h
+++ b/usr/src/uts/i86pc/io/vmm/io/vhpet.h
@@ -40,7 +40,7 @@
#define VHPET_SIZE 1024
struct vhpet *vhpet_init(struct vm *vm);
-void vhpet_cleanup(struct vhpet *vhpet);
+void vhpet_cleanup(struct vhpet *vhpet);
int vhpet_mmio_write(void *vm, int vcpuid, uint64_t gpa, uint64_t val,
int size, void *arg);
int vhpet_mmio_read(void *vm, int vcpuid, uint64_t gpa, uint64_t *val,
diff --git a/usr/src/uts/i86pc/io/vmm/io/vlapic.c b/usr/src/uts/i86pc/io/vmm/io/vlapic.c
index af902ba40e..c1825f4264 100644
--- a/usr/src/uts/i86pc/io/vmm/io/vlapic.c
+++ b/usr/src/uts/i86pc/io/vmm/io/vlapic.c
@@ -164,7 +164,7 @@ void
vlapic_id_write_handler(struct vlapic *vlapic)
{
struct LAPIC *lapic;
-
+
/*
* We don't allow the ID register to be modified so reset it back to
* its default value.
@@ -214,7 +214,7 @@ vlapic_get_ccr(struct vlapic *vlapic)
struct bintime bt_now, bt_rem;
struct LAPIC *lapic;
uint32_t ccr;
-
+
ccr = 0;
lapic = vlapic->apic_page;
@@ -250,7 +250,7 @@ vlapic_dcr_write_handler(struct vlapic *vlapic)
{
struct LAPIC *lapic;
int divisor;
-
+
lapic = vlapic->apic_page;
VLAPIC_TIMER_LOCK(vlapic);
@@ -275,7 +275,7 @@ void
vlapic_esr_write_handler(struct vlapic *vlapic)
{
struct LAPIC *lapic;
-
+
lapic = vlapic->apic_page;
lapic->esr = vlapic->esr_pending;
vlapic->esr_pending = 0;
@@ -333,7 +333,7 @@ static __inline uint32_t *
vlapic_get_lvtptr(struct vlapic *vlapic, uint32_t offset)
{
struct LAPIC *lapic = vlapic->apic_page;
- int i;
+ int i;
switch (offset) {
case APIC_OFFSET_CMCI_LVT:
@@ -405,9 +405,9 @@ vlapic_lvt_write_handler(struct vlapic *vlapic, uint32_t offset)
uint32_t *lvtptr, mask, val;
struct LAPIC *lapic;
int idx;
-
+
lapic = vlapic->apic_page;
- lvtptr = vlapic_get_lvtptr(vlapic, offset);
+ lvtptr = vlapic_get_lvtptr(vlapic, offset);
val = *lvtptr;
idx = lvt_off_to_idx(offset);
@@ -635,7 +635,7 @@ static __inline int
vlapic_periodic_timer(struct vlapic *vlapic)
{
uint32_t lvt;
-
+
lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_TIMER_LVT);
return (vlapic_get_lvt_field(lvt, APIC_LVTT_TM_PERIODIC));
@@ -988,7 +988,6 @@ vlapic_icrlo_write_handler(struct vlapic *vlapic, bool *retu)
uint64_t icrval;
uint32_t dest, vec, mode;
struct vlapic *vlapic2;
- struct vm_exit *vmexit;
struct LAPIC *lapic;
uint16_t maxcpus;
@@ -1082,13 +1081,7 @@ vlapic_icrlo_write_handler(struct vlapic *vlapic, bool *retu)
return (0);
vlapic2->boot_state = BS_RUNNING;
-
- *retu = true;
- vmexit = vm_exitinfo(vlapic->vm, vlapic->vcpuid);
- vmexit->exitcode = VM_EXITCODE_SPINUP_AP;
- vmexit->u.spinup_ap.vcpu = dest;
- vmexit->u.spinup_ap.rip = vec << PAGE_SHIFT;
-
+ vm_req_spinup_ap(vlapic->vm, dest, vec << PAGE_SHIFT);
return (0);
}
}
@@ -1117,7 +1110,7 @@ int
vlapic_pending_intr(struct vlapic *vlapic, int *vecptr)
{
struct LAPIC *lapic = vlapic->apic_page;
- int idx, i, bitpos, vector;
+ int idx, i, bitpos, vector;
uint32_t *irrptr, val;
vlapic_update_ppr(vlapic);
@@ -1138,7 +1131,7 @@ vlapic_pending_intr(struct vlapic *vlapic, int *vecptr)
if (vecptr != NULL)
*vecptr = vector;
return (1);
- } else
+ } else
break;
}
}
@@ -1156,7 +1149,7 @@ vlapic_intr_accepted(struct vlapic *vlapic, int vector)
return ((*vlapic->ops.intr_accepted)(vlapic, vector));
/*
- * clear the ready bit for vector being accepted in irr
+ * clear the ready bit for vector being accepted in irr
* and set the vector as in service in isr.
*/
idx = (vector / 32) * 4;
@@ -1247,7 +1240,7 @@ vlapic_read(struct vlapic *vlapic, int mmio_access, uint64_t offset,
*data = 0;
goto done;
}
-
+
offset &= ~3;
switch(offset)
{
@@ -1296,17 +1289,17 @@ vlapic_read(struct vlapic *vlapic, int mmio_access, uint64_t offset,
case APIC_OFFSET_ESR:
*data = lapic->esr;
break;
- case APIC_OFFSET_ICR_LOW:
+ case APIC_OFFSET_ICR_LOW:
*data = lapic->icr_lo;
if (x2apic(vlapic))
*data |= (uint64_t)lapic->icr_hi << 32;
break;
- case APIC_OFFSET_ICR_HI:
+ case APIC_OFFSET_ICR_HI:
*data = lapic->icr_hi;
break;
case APIC_OFFSET_CMCI_LVT:
case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT:
- *data = vlapic_get_lvt(vlapic, offset);
+ *data = vlapic_get_lvt(vlapic, offset);
#ifdef INVARIANTS
reg = vlapic_get_lvtptr(vlapic, offset);
KASSERT(*data == *reg, ("inconsistent lvt value at "
@@ -1401,7 +1394,7 @@ vlapic_write(struct vlapic *vlapic, int mmio_access, uint64_t offset,
lapic->svr = data;
vlapic_svr_write_handler(vlapic);
break;
- case APIC_OFFSET_ICR_LOW:
+ case APIC_OFFSET_ICR_LOW:
lapic->icr_lo = data;
if (x2apic(vlapic))
lapic->icr_hi = data >> 32;
@@ -1455,7 +1448,7 @@ static void
vlapic_reset(struct vlapic *vlapic)
{
struct LAPIC *lapic;
-
+
lapic = vlapic->apic_page;
bzero(lapic, sizeof(struct LAPIC));
diff --git a/usr/src/uts/i86pc/io/vmm/io/vpmtmr.c b/usr/src/uts/i86pc/io/vmm/io/vpmtmr.c
index 4df909777d..0dce2b0a1f 100644
--- a/usr/src/uts/i86pc/io/vmm/io/vpmtmr.c
+++ b/usr/src/uts/i86pc/io/vmm/io/vpmtmr.c
@@ -80,7 +80,7 @@ vpmtmr_cleanup(struct vpmtmr *vpmtmr)
}
int
-vpmtmr_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes,
+vpmtmr_handler(struct vm *vm, int vcpuid, bool in, uint16_t port, uint8_t bytes,
uint32_t *val)
{
struct vpmtmr *vpmtmr;
diff --git a/usr/src/uts/i86pc/io/vmm/io/vpmtmr.h b/usr/src/uts/i86pc/io/vmm/io/vpmtmr.h
index e6562da5c0..c06825b970 100644
--- a/usr/src/uts/i86pc/io/vmm/io/vpmtmr.h
+++ b/usr/src/uts/i86pc/io/vmm/io/vpmtmr.h
@@ -38,7 +38,7 @@ struct vpmtmr;
struct vpmtmr *vpmtmr_init(struct vm *vm);
void vpmtmr_cleanup(struct vpmtmr *pmtmr);
-int vpmtmr_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes,
- uint32_t *val);
+int vpmtmr_handler(struct vm *vm, int vcpuid, bool in, uint16_t port,
+ uint8_t bytes, uint32_t *val);
#endif
diff --git a/usr/src/uts/i86pc/io/vmm/io/vrtc.c b/usr/src/uts/i86pc/io/vmm/io/vrtc.c
index a3635fc9f0..343ad9c37a 100644
--- a/usr/src/uts/i86pc/io/vmm/io/vrtc.c
+++ b/usr/src/uts/i86pc/io/vmm/io/vrtc.c
@@ -359,7 +359,7 @@ rtc_to_secs(struct vrtc *vrtc)
/*
* Ignore 'rtc->dow' because some guests like Linux don't bother
- * setting it at all while others like OpenBSD/i386 set it incorrectly.
+ * setting it at all while others like OpenBSD/i386 set it incorrectly.
*
* clock_ct_to_ts() does not depend on 'ct.dow' anyways so ignore it.
*/
@@ -874,8 +874,8 @@ vrtc_nvram_read(struct vm *vm, int offset, uint8_t *retval)
}
int
-vrtc_addr_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes,
- uint32_t *val)
+vrtc_addr_handler(struct vm *vm, int vcpuid, bool in, uint16_t port,
+ uint8_t bytes, uint32_t *val)
{
struct vrtc *vrtc;
@@ -897,8 +897,8 @@ vrtc_addr_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes,
}
int
-vrtc_data_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes,
- uint32_t *val)
+vrtc_data_handler(struct vm *vm, int vcpuid, bool in, uint16_t port,
+ uint8_t bytes, uint32_t *val)
{
struct vrtc *vrtc;
struct rtcdev *rtc;
diff --git a/usr/src/uts/i86pc/io/vmm/io/vrtc.h b/usr/src/uts/i86pc/io/vmm/io/vrtc.h
index 13abbedeb9..92a060cb8e 100644
--- a/usr/src/uts/i86pc/io/vmm/io/vrtc.h
+++ b/usr/src/uts/i86pc/io/vmm/io/vrtc.h
@@ -48,10 +48,10 @@ int vrtc_set_time(struct vm *vm, time_t secs);
int vrtc_nvram_write(struct vm *vm, int offset, uint8_t value);
int vrtc_nvram_read(struct vm *vm, int offset, uint8_t *retval);
-int vrtc_addr_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes,
- uint32_t *val);
-int vrtc_data_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes,
- uint32_t *val);
+int vrtc_addr_handler(struct vm *vm, int vcpuid, bool in, uint16_t port,
+ uint8_t bytes, uint32_t *val);
+int vrtc_data_handler(struct vm *vm, int vcpuid, bool in, uint16_t port,
+ uint8_t bytes, uint32_t *val);
#ifndef __FreeBSD__
void vrtc_localize_resources(struct vrtc *);
diff --git a/usr/src/uts/i86pc/sys/vmm_instruction_emul.h b/usr/src/uts/i86pc/io/vmm/sys/vmm_instruction_emul.h
index d084301aee..d3a07b0f99 100644
--- a/usr/src/uts/i86pc/sys/vmm_instruction_emul.h
+++ b/usr/src/uts/i86pc/io/vmm/sys/vmm_instruction_emul.h
@@ -27,64 +27,57 @@
*
* $FreeBSD$
*/
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ *
+ * Copyright 2020 Oxide Computer Company
+ */
#ifndef _VMM_INSTRUCTION_EMUL_H_
#define _VMM_INSTRUCTION_EMUL_H_
#include <sys/mman.h>
+#include <machine/vmm.h>
-/*
- * Callback functions to read and write memory regions.
- */
-typedef int (*mem_region_read_t)(void *vm, int cpuid, uint64_t gpa,
- uint64_t *rval, int rsize, void *arg);
-
-typedef int (*mem_region_write_t)(void *vm, int cpuid, uint64_t gpa,
- uint64_t wval, int wsize, void *arg);
+struct vie;
-/*
- * Emulate the decoded 'vie' instruction.
- *
- * The callbacks 'mrr' and 'mrw' emulate reads and writes to the memory region
- * containing 'gpa'. 'mrarg' is an opaque argument that is passed into the
- * callback functions.
- *
- * 'void *vm' should be 'struct vm *' when called from kernel context and
- * 'struct vmctx *' when called from user context.
- * s
- */
-int vmm_emulate_instruction(void *vm, int cpuid, uint64_t gpa, struct vie *vie,
- struct vm_guest_paging *paging, mem_region_read_t mrr,
- mem_region_write_t mrw, void *mrarg);
+struct vie *vie_alloc();
+void vie_free(struct vie *);
-int vie_update_register(void *vm, int vcpuid, enum vm_reg_name reg,
- uint64_t val, int size);
+void vie_init_mmio(struct vie *vie, const char *inst_bytes, uint8_t inst_length,
+ const struct vm_guest_paging *paging, uint64_t gpa);
+void vie_init_inout(struct vie *vie, const struct vm_inout *inout,
+ uint8_t inst_len, const struct vm_guest_paging *paging);
-/*
- * Returns 1 if an alignment check exception should be injected and 0 otherwise.
- */
-int vie_alignment_check(int cpl, int operand_size, uint64_t cr0,
- uint64_t rflags, uint64_t gla);
+int vie_fulfill_mmio(struct vie *vie, const struct vm_mmio *res);
+int vie_fulfill_inout(struct vie *vie, const struct vm_inout *res);
-/* Returns 1 if the 'gla' is not canonical and 0 otherwise. */
-int vie_canonical_check(enum vm_cpu_mode cpu_mode, uint64_t gla);
+bool vie_needs_fetch(const struct vie *vie);
+bool vie_pending(const struct vie *vie);
+uint64_t vie_mmio_gpa(const struct vie *vie);
+void vie_exitinfo(const struct vie *vie, struct vm_exit *vme);
+void vie_fallback_exitinfo(const struct vie *vie, struct vm_exit *vme);
-uint64_t vie_size2mask(int size);
+void vie_reset(struct vie *vie);
+void vie_advance_pc(struct vie *vie, uint64_t *nextrip);
-int vie_calculate_gla(enum vm_cpu_mode cpu_mode, enum vm_reg_name seg,
- struct seg_desc *desc, uint64_t off, int length, int addrsize, int prot,
- uint64_t *gla);
+int vie_emulate_mmio(struct vie *vie, void *vm, int vcpuid);
+int vie_emulate_inout(struct vie *vie, struct vm *vm, int vcpuid);
-#ifdef _KERNEL
/*
* APIs to fetch and decode the instruction from nested page fault handler.
*
- * 'vie' must be initialized before calling 'vmm_fetch_instruction()'
+ * 'vie' must be initialized before calling 'vie_fetch_instruction()'
*/
-int vmm_fetch_instruction(struct vm *vm, int cpuid,
- struct vm_guest_paging *guest_paging,
- uint64_t rip, int inst_length, struct vie *vie,
- int *is_fault);
+int vie_fetch_instruction(struct vie *vie, struct vm *vm, int cpuid,
+ uint64_t rip, int *is_fault);
/*
* Translate the guest linear address 'gla' to a guest physical address.
@@ -101,34 +94,23 @@ int vm_gla2gpa(struct vm *vm, int vcpuid, struct vm_guest_paging *paging,
* Like vm_gla2gpa, but no exceptions are injected into the guest and
* PTEs are not changed.
*/
-int vm_gla2gpa_nofault(struct vm *vm, int vcpuid, struct vm_guest_paging *paging,
- uint64_t gla, int prot, uint64_t *gpa, int *is_fault);
-#endif /* _KERNEL */
-
-void vie_init(struct vie *vie, const char *inst_bytes, int inst_length);
+int vm_gla2gpa_nofault(struct vm *vm, int vcpuid,
+ struct vm_guest_paging *paging, uint64_t gla, int prot, uint64_t *gpa,
+ int *is_fault);
+int vie_verify_gla(struct vie *vie, struct vm *vm, int cpuid, uint64_t gla);
/*
* Decode the instruction fetched into 'vie' so it can be emulated.
*
* 'gla' is the guest linear address provided by the hardware assist
* that caused the nested page table fault. It is used to verify that
* the software instruction decoding is in agreement with the hardware.
- *
+ *
* Some hardware assists do not provide the 'gla' to the hypervisor.
* To skip the 'gla' verification for this or any other reason pass
* in VIE_INVALID_GLA instead.
*/
-#ifdef _KERNEL
#define VIE_INVALID_GLA (1UL << 63) /* a non-canonical address */
-int vmm_decode_instruction(struct vm *vm, int cpuid, uint64_t gla,
- enum vm_cpu_mode cpu_mode, int csd, struct vie *vie);
-#else /* !_KERNEL */
-/*
- * Permit instruction decoding logic to be compiled outside of the kernel for
- * rapid iteration and validation. No GLA validation is performed, obviously.
- */
-int vmm_decode_instruction(enum vm_cpu_mode cpu_mode, int csd,
- struct vie *vie);
-#endif /* _KERNEL */
+int vie_decode_instruction(struct vie *vie, struct vm *vm, int cpuid, int csd);
#endif /* _VMM_INSTRUCTION_EMUL_H_ */
diff --git a/usr/src/uts/i86pc/io/vmm/sys/vmm_kernel.h b/usr/src/uts/i86pc/io/vmm/sys/vmm_kernel.h
index 2a884e6e0e..fbd2884b84 100644
--- a/usr/src/uts/i86pc/io/vmm/sys/vmm_kernel.h
+++ b/usr/src/uts/i86pc/io/vmm/sys/vmm_kernel.h
@@ -54,6 +54,7 @@ struct vm;
struct vm_exception;
struct seg_desc;
struct vm_exit;
+struct vie;
struct vm_run;
struct vhpet;
struct vioapic;
@@ -171,7 +172,7 @@ int vm_get_seg_desc(struct vm *vm, int vcpu, int reg,
struct seg_desc *ret_desc);
int vm_set_seg_desc(struct vm *vm, int vcpu, int reg,
struct seg_desc *desc);
-int vm_run(struct vm *vm, struct vm_run *vmrun);
+int vm_run(struct vm *vm, int vcpuid, const struct vm_entry *);
int vm_suspend(struct vm *vm, enum vm_suspend_how how);
int vm_inject_nmi(struct vm *vm, int vcpu);
int vm_nmi_pending(struct vm *vm, int vcpuid);
@@ -191,11 +192,17 @@ int vm_activate_cpu(struct vm *vm, int vcpu);
int vm_suspend_cpu(struct vm *vm, int vcpu);
int vm_resume_cpu(struct vm *vm, int vcpu);
struct vm_exit *vm_exitinfo(struct vm *vm, int vcpuid);
+struct vie *vm_vie_ctx(struct vm *vm, int vcpuid);
void vm_exit_suspended(struct vm *vm, int vcpuid, uint64_t rip);
void vm_exit_debug(struct vm *vm, int vcpuid, uint64_t rip);
void vm_exit_runblock(struct vm *vm, int vcpuid, uint64_t rip);
void vm_exit_astpending(struct vm *vm, int vcpuid, uint64_t rip);
void vm_exit_reqidle(struct vm *vm, int vcpuid, uint64_t rip);
+int vm_service_mmio_read(struct vm *vm, int cpuid, uint64_t gpa, uint64_t *rval,
+ int rsize);
+int vm_service_mmio_write(struct vm *vm, int cpuid, uint64_t gpa, uint64_t wval,
+ int wsize);
+void vm_req_spinup_ap(struct vm *vm, int req_vcpuid, uint64_t req_rip);
#ifdef _SYS__CPUSET_H_
cpuset_t vm_active_cpus(struct vm *vm);
diff --git a/usr/src/uts/i86pc/io/vmm/vmm.c b/usr/src/uts/i86pc/io/vmm/vmm.c
index 579ca12e84..7a47cd0cd1 100644
--- a/usr/src/uts/i86pc/io/vmm/vmm.c
+++ b/usr/src/uts/i86pc/io/vmm/vmm.c
@@ -79,7 +79,7 @@ __FBSDID("$FreeBSD$");
#include <machine/vmm.h>
#include <machine/vmm_dev.h>
-#include <machine/vmm_instruction_emul.h>
+#include <sys/vmm_instruction_emul.h>
#include "vmm_ioport.h"
#include "vmm_ktr.h"
@@ -109,7 +109,7 @@ struct vlapic;
* (x) initialized before use
*/
struct vcpu {
- struct mtx mtx; /* (o) protects 'state' and 'hostcpu' */
+ struct mtx mtx; /* (o) protects 'state' and 'hostcpu' */
enum vcpu_state state; /* (o) vcpu state */
#ifndef __FreeBSD__
kcondvar_t vcpu_cv; /* (o) cpu waiter cv */
@@ -135,6 +135,7 @@ struct vcpu {
void *stats; /* (a,i) statistics */
struct vm_exit exitinfo; /* (x) exit reason and collateral */
uint64_t nextrip; /* (x) next instruction to execute */
+ struct vie *vie_ctx; /* (x) instruction emulation context */
#ifndef __FreeBSD__
uint64_t tsc_offset; /* (x) offset from host TSC */
#endif
@@ -185,7 +186,7 @@ struct vm {
volatile cpuset_t active_cpus; /* (i) active vcpus */
volatile cpuset_t debug_cpus; /* (i) vcpus stopped for debug */
int suspend; /* (i) stop VM execution */
- volatile cpuset_t suspended_cpus; /* (i) suspended vcpus */
+ volatile cpuset_t suspended_cpus; /* (i) suspended vcpus */
volatile cpuset_t halted_cpus; /* (x) cpus in a hard halt */
struct mem_map mem_maps[VM_MAX_MEMMAPS]; /* (i) guest address space */
struct mem_seg mem_segs[VM_MAX_MEMSEGS]; /* (o) guest memory regions */
@@ -200,6 +201,14 @@ struct vm {
#ifndef __FreeBSD__
list_t ioport_hooks;
#endif /* __FreeBSD__ */
+ bool sipi_req; /* (i) SIPI requested */
+ int sipi_req_vcpu; /* (i) SIPI destination */
+ uint64_t sipi_req_rip; /* (i) SIPI start %rip */
+
+ /* Miscellaneous VM-wide statistics and counters */
+ struct vm_wide_stats {
+ uint64_t sipi_supersede;
+ } stats;
};
static int vmm_initialized;
@@ -341,6 +350,8 @@ vcpu_cleanup(struct vm *vm, int i, bool destroy)
if (destroy) {
vmm_stat_free(vcpu->stats);
fpu_save_area_free(vcpu->guestfpu);
+ vie_free(vcpu->vie_ctx);
+ vcpu->vie_ctx = NULL;
}
}
@@ -367,6 +378,10 @@ vcpu_init(struct vm *vm, int vcpu_id, bool create)
#endif
vcpu->guestfpu = fpu_save_area_alloc();
vcpu->stats = vmm_stat_alloc();
+ vcpu->vie_ctx = vie_alloc();
+ } else {
+ vie_reset(vcpu->vie_ctx);
+ bzero(&vcpu->exitinfo, sizeof (vcpu->exitinfo));
}
vcpu->vlapic = VLAPIC_INIT(vm->cookie, vcpu_id);
@@ -402,6 +417,15 @@ vm_exitinfo(struct vm *vm, int cpuid)
return (&vcpu->exitinfo);
}
+struct vie *
+vm_vie_ctx(struct vm *vm, int cpuid)
+{
+ if (cpuid < 0 || cpuid >= vm->maxcpus)
+ panic("vm_vie_ctx: invalid cpuid %d", cpuid);
+
+ return (vm->vcpu[cpuid].vie_ctx);
+}
+
static int
vmm_init(void)
{
@@ -1198,7 +1222,6 @@ vm_set_register(struct vm *vm, int vcpuid, int reg, uint64_t val)
static bool
is_descriptor_table(int reg)
{
-
switch (reg) {
case VM_REG_GUEST_IDTR:
case VM_REG_GUEST_GDTR:
@@ -1211,7 +1234,6 @@ is_descriptor_table(int reg)
static bool
is_segment_register(int reg)
{
-
switch (reg) {
case VM_REG_GUEST_ES:
case VM_REG_GUEST_CS:
@@ -1558,85 +1580,190 @@ done:
return (0);
}
+int
+vm_service_mmio_read(struct vm *vm, int cpuid, uint64_t gpa, uint64_t *rval,
+ int rsize)
+{
+ int err = ESRCH;
+ void *arg = NULL;
+
+ if (gpa >= DEFAULT_APIC_BASE && gpa < DEFAULT_APIC_BASE + PAGE_SIZE) {
+ err = lapic_mmio_read(vm, cpuid, gpa, rval, rsize, &arg);
+ } else if (gpa >= VIOAPIC_BASE && gpa < VIOAPIC_BASE + VIOAPIC_SIZE) {
+ err = vioapic_mmio_read(vm, cpuid, gpa, rval, rsize, &arg);
+ } else if (gpa >= VHPET_BASE && gpa < VHPET_BASE + VHPET_SIZE) {
+ err = vhpet_mmio_read(vm, cpuid, gpa, rval, rsize, &arg);
+ }
+
+ return (err);
+}
+
+int
+vm_service_mmio_write(struct vm *vm, int cpuid, uint64_t gpa, uint64_t wval,
+ int wsize)
+{
+ int err = ESRCH;
+ void *arg = NULL;
+
+ if (gpa >= DEFAULT_APIC_BASE && gpa < DEFAULT_APIC_BASE + PAGE_SIZE) {
+ err = lapic_mmio_write(vm, cpuid, gpa, wval, wsize, &arg);
+ } else if (gpa >= VIOAPIC_BASE && gpa < VIOAPIC_BASE + VIOAPIC_SIZE) {
+ err = vioapic_mmio_write(vm, cpuid, gpa, wval, wsize, &arg);
+ } else if (gpa >= VHPET_BASE && gpa < VHPET_BASE + VHPET_SIZE) {
+ err = vhpet_mmio_write(vm, cpuid, gpa, wval, wsize, &arg);
+ }
+
+ return (err);
+}
+
static int
-vm_handle_inst_emul(struct vm *vm, int vcpuid, bool *retu)
+vm_handle_mmio_emul(struct vm *vm, int vcpuid, bool *retu)
{
struct vie *vie;
struct vcpu *vcpu;
struct vm_exit *vme;
- uint64_t gla, gpa, cs_base;
- struct vm_guest_paging *paging;
- mem_region_read_t mread;
- mem_region_write_t mwrite;
- enum vm_cpu_mode cpu_mode;
- int cs_d, error, fault;
+ uint64_t inst_addr;
+ int error, fault, cs_d;
vcpu = &vm->vcpu[vcpuid];
vme = &vcpu->exitinfo;
+ vie = vcpu->vie_ctx;
KASSERT(vme->inst_length == 0, ("%s: invalid inst_length %d",
__func__, vme->inst_length));
- gla = vme->u.inst_emul.gla;
- gpa = vme->u.inst_emul.gpa;
- cs_base = vme->u.inst_emul.cs_base;
- cs_d = vme->u.inst_emul.cs_d;
- vie = &vme->u.inst_emul.vie;
- paging = &vme->u.inst_emul.paging;
- cpu_mode = paging->cpu_mode;
+ inst_addr = vme->rip + vme->u.mmio_emul.cs_base;
+ cs_d = vme->u.mmio_emul.cs_d;
- VCPU_CTR1(vm, vcpuid, "inst_emul fault accessing gpa %#lx", gpa);
+ VCPU_CTR1(vm, vcpuid, "inst_emul fault accessing gpa %#lx",
+ vme->u.mmio_emul.gpa);
- /* Fetch, decode and emulate the faulting instruction */
- if (vie->num_valid == 0) {
- error = vmm_fetch_instruction(vm, vcpuid, paging, vme->rip +
- cs_base, VIE_INST_SIZE, vie, &fault);
- } else {
- /*
- * The instruction bytes have already been copied into 'vie'
- */
- error = fault = 0;
+ /* Fetch the faulting instruction */
+ if (vie_needs_fetch(vie)) {
+ error = vie_fetch_instruction(vie, vm, vcpuid, inst_addr,
+ &fault);
+ if (error != 0) {
+ return (error);
+ } else if (fault) {
+ /*
+ * If a fault during instruction fetch was encounted, it
+ * will have asserted that the appropriate exception be
+ * injected at next entry. No further work is required.
+ */
+ return (0);
+ }
}
- if (error || fault)
- return (error);
- if (vmm_decode_instruction(vm, vcpuid, gla, cpu_mode, cs_d, vie) != 0) {
+ if (vie_decode_instruction(vie, vm, vcpuid, cs_d) != 0) {
VCPU_CTR1(vm, vcpuid, "Error decoding instruction at %#lx",
- vme->rip + cs_base);
- *retu = true; /* dump instruction bytes in userspace */
+ inst_addr);
+ /* Dump (unrecognized) instruction bytes in userspace */
+ vie_fallback_exitinfo(vie, vme);
+ *retu = true;
return (0);
}
-
- /*
- * Update 'nextrip' based on the length of the emulated instruction.
- */
- vme->inst_length = vie->num_processed;
- vcpu->nextrip += vie->num_processed;
- VCPU_CTR1(vm, vcpuid, "nextrip updated to %#lx after instruction "
- "decoding", vcpu->nextrip);
-
- /* return to userland unless this is an in-kernel emulated device */
- if (gpa >= DEFAULT_APIC_BASE && gpa < DEFAULT_APIC_BASE + PAGE_SIZE) {
- mread = lapic_mmio_read;
- mwrite = lapic_mmio_write;
- } else if (gpa >= VIOAPIC_BASE && gpa < VIOAPIC_BASE + VIOAPIC_SIZE) {
- mread = vioapic_mmio_read;
- mwrite = vioapic_mmio_write;
- } else if (gpa >= VHPET_BASE && gpa < VHPET_BASE + VHPET_SIZE) {
- mread = vhpet_mmio_read;
- mwrite = vhpet_mmio_write;
- } else {
+ if (vme->u.mmio_emul.gla != VIE_INVALID_GLA &&
+ vie_verify_gla(vie, vm, vcpuid, vme->u.mmio_emul.gla) != 0) {
+ /* Decoded GLA does not match GLA from VM exit state */
+ vie_fallback_exitinfo(vie, vme);
*retu = true;
return (0);
}
- error = vmm_emulate_instruction(vm, vcpuid, gpa, vie, paging,
- mread, mwrite, retu);
-
+repeat:
+ error = vie_emulate_mmio(vie, vm, vcpuid);
+ if (error < 0) {
+ /*
+ * MMIO not handled by any of the in-kernel-emulated devices, so
+ * make a trip out to userspace for it.
+ */
+ vie_exitinfo(vie, vme);
+ *retu = true;
+ error = 0;
+ } else if (error == EAGAIN) {
+ /*
+ * Continue emulating the rep-prefixed instruction, which has
+ * not completed its iterations.
+ *
+ * In case this can be emulated in-kernel and has a high
+ * repetition count (causing a tight spin), it should be
+ * deferential to yield conditions.
+ */
+ if (!vcpu_should_yield(vm, vcpuid)) {
+ goto repeat;
+ } else {
+ /*
+ * Defer to the contending load by making a trip to
+ * userspace with a no-op (BOGUS) exit reason.
+ */
+ vie_reset(vie);
+ vme->exitcode = VM_EXITCODE_BOGUS;
+ *retu = true;
+ return (0);
+ }
+ } else if (error == 0) {
+ /* Update %rip now that instruction has been emulated */
+ vie_advance_pc(vie, &vcpu->nextrip);
+ }
return (error);
}
static int
+vm_handle_inout(struct vm *vm, int vcpuid, struct vm_exit *vme, bool *retu)
+{
+ struct vcpu *vcpu;
+ struct vie *vie;
+ int err;
+
+ vcpu = &vm->vcpu[vcpuid];
+ vie = vcpu->vie_ctx;
+
+repeat:
+ err = vie_emulate_inout(vie, vm, vcpuid);
+
+ if (err < 0) {
+ /*
+ * In/out not handled by any of the in-kernel-emulated devices,
+ * so make a trip out to userspace for it.
+ */
+ vie_exitinfo(vie, vme);
+ *retu = true;
+ return (0);
+ } else if (err == EAGAIN) {
+ /*
+ * Continue emulating the rep-prefixed ins/outs, which has not
+ * completed its iterations.
+ *
+ * In case this can be emulated in-kernel and has a high
+ * repetition count (causing a tight spin), it should be
+ * deferential to yield conditions.
+ */
+ if (!vcpu_should_yield(vm, vcpuid)) {
+ goto repeat;
+ } else {
+ /*
+ * Defer to the contending load by making a trip to
+ * userspace with a no-op (BOGUS) exit reason.
+ */
+ vie_reset(vie);
+ vme->exitcode = VM_EXITCODE_BOGUS;
+ *retu = true;
+ return (0);
+ }
+ } else if (err != 0) {
+ /* Emulation failure. Bail all the way out to userspace. */
+ vme->exitcode = VM_EXITCODE_INST_EMUL;
+ bzero(&vme->u.inst_emul, sizeof (vme->u.inst_emul));
+ *retu = true;
+ return (0);
+ }
+
+ vie_advance_pc(vie, &vcpu->nextrip);
+ *retu = false;
+ return (0);
+}
+
+static int
vm_handle_suspend(struct vm *vm, int vcpuid, bool *retu)
{
#ifdef __FreeBSD__
@@ -1768,6 +1895,18 @@ vm_handle_wrmsr(struct vm *vm, int vcpuid, struct vm_exit *vme)
}
#endif /* __FreeBSD__ */
+void
+vm_req_spinup_ap(struct vm *vm, int req_vcpuid, uint64_t req_rip)
+{
+ if (vm->sipi_req) {
+ /* This should never occur if userspace is doing its job. */
+ vm->stats.sipi_supersede++;
+ }
+ vm->sipi_req = true;
+ vm->sipi_req_vcpu = req_vcpuid;
+ vm->sipi_req_rip = req_rip;
+}
+
int
vm_suspend(struct vm *vm, enum vm_suspend_how how)
{
@@ -1960,11 +2099,104 @@ vmm_freectx(void *arg, int isexec)
#endif /* __FreeBSD */
+static int
+vm_entry_actions(struct vm *vm, int vcpuid, const struct vm_entry *entry,
+ struct vm_exit *vme)
+{
+ struct vcpu *vcpu;
+ struct vie *vie;
+ int err;
+
+ vcpu = &vm->vcpu[vcpuid];
+ vie = vcpu->vie_ctx;
+ err = 0;
+
+ switch (entry->cmd) {
+ case VEC_DEFAULT:
+ return (0);
+ case VEC_DISCARD_INSTR:
+ vie_reset(vie);
+ return (0);
+ case VEC_COMPLETE_MMIO:
+ err = vie_fulfill_mmio(vie, &entry->u.mmio);
+ if (err == 0) {
+ err = vie_emulate_mmio(vie, vm, vcpuid);
+ if (err == 0) {
+ vie_advance_pc(vie, &vcpu->nextrip);
+ } else if (err < 0) {
+ vie_exitinfo(vie, vme);
+ } else if (err == EAGAIN) {
+ /*
+ * Clear the instruction emulation state in
+ * order to re-enter VM context and continue
+ * this 'rep <instruction>'
+ */
+ vie_reset(vie);
+ err = 0;
+ }
+ }
+ break;
+ case VEC_COMPLETE_INOUT:
+ err = vie_fulfill_inout(vie, &entry->u.inout);
+ if (err == 0) {
+ err = vie_emulate_inout(vie, vm, vcpuid);
+ if (err == 0) {
+ vie_advance_pc(vie, &vcpu->nextrip);
+ } else if (err < 0) {
+ vie_exitinfo(vie, vme);
+ } else if (err == EAGAIN) {
+ /*
+ * Clear the instruction emulation state in
+ * order to re-enter VM context and continue
+ * this 'rep ins/outs'
+ */
+ vie_reset(vie);
+ err = 0;
+ }
+ }
+ break;
+ default:
+ return (EINVAL);
+ }
+ return (err);
+}
+
+static int
+vm_loop_checks(struct vm *vm, int vcpuid, struct vm_exit *vme)
+{
+ struct vie *vie;
+
+ vie = vm->vcpu[vcpuid].vie_ctx;
+
+ if (vie_pending(vie)) {
+ /*
+ * Userspace has not fulfilled the pending needs of the
+ * instruction emulation, so bail back out.
+ */
+ vie_exitinfo(vie, vme);
+ return (-1);
+ }
+
+ if (vcpuid == 0 && vm->sipi_req) {
+ /* The boot vCPU has sent a SIPI to one of the other CPUs */
+ vme->exitcode = VM_EXITCODE_SPINUP_AP;
+ vme->u.spinup_ap.vcpu = vm->sipi_req_vcpu;
+ vme->u.spinup_ap.rip = vm->sipi_req_rip;
+
+ vm->sipi_req = false;
+ vm->sipi_req_vcpu = 0;
+ vm->sipi_req_rip = 0;
+ return (-1);
+ }
+
+ return (0);
+}
+
int
-vm_run(struct vm *vm, struct vm_run *vmrun)
+vm_run(struct vm *vm, int vcpuid, const struct vm_entry *entry)
{
struct vm_eventinfo evinfo;
- int error, vcpuid;
+ int error;
struct vcpu *vcpu;
#ifdef __FreeBSD__
struct pcb *pcb;
@@ -1978,8 +2210,6 @@ vm_run(struct vm *vm, struct vm_run *vmrun)
int affinity_type = CPU_CURRENT;
#endif
- vcpuid = vmrun->cpuid;
-
if (vcpuid < 0 || vcpuid >= vm->maxcpus)
return (EINVAL);
@@ -2005,7 +2235,21 @@ vm_run(struct vm *vm, struct vm_run *vmrun)
NULL, vmm_freectx);
#endif
+ error = vm_entry_actions(vm, vcpuid, entry, vme);
+ if (error < 0) {
+ /* Exit condition to be serviced by userspace */
+ error = 0;
+ goto exit;
+ } else if (error != 0) {
+ goto exit;
+ }
+
restart:
+ if (vm_loop_checks(vm, vcpuid, vme) != 0) {
+ error = 0;
+ goto exit;
+ }
+
#ifndef __FreeBSD__
thread_affinity_set(curthread, affinity_type);
/*
@@ -2091,11 +2335,10 @@ restart:
case VM_EXITCODE_PAGING:
error = vm_handle_paging(vm, vcpuid, &retu);
break;
- case VM_EXITCODE_INST_EMUL:
- error = vm_handle_inst_emul(vm, vcpuid, &retu);
+ case VM_EXITCODE_MMIO_EMUL:
+ error = vm_handle_mmio_emul(vm, vcpuid, &retu);
break;
case VM_EXITCODE_INOUT:
- case VM_EXITCODE_INOUT_STR:
error = vm_handle_inout(vm, vcpuid, vme, &retu);
break;
case VM_EXITCODE_MONITOR:
@@ -2114,12 +2357,12 @@ restart:
affinity_type = CPU_BEST;
break;
}
+#endif
case VM_EXITCODE_MTRAP:
vm_suspend_cpu(vm, vcpuid);
retu = true;
break;
-#endif
default:
retu = true; /* handled in userland */
break;
@@ -2129,6 +2372,7 @@ restart:
if (error == 0 && retu == false)
goto restart;
+exit:
#ifndef __FreeBSD__
removectx(curthread, &vtc, vmm_savectx, vmm_restorectx, NULL, NULL,
NULL, vmm_freectx);
@@ -2136,8 +2380,6 @@ restart:
VCPU_CTR2(vm, vcpuid, "retu %d/%d", error, vme->exitcode);
- /* copy the exit information */
- bcopy(vme, &vmrun->vm_exit, sizeof (struct vm_exit));
return (error);
}
@@ -2672,7 +2914,7 @@ vmm_is_pptdev(int bus, int slot, int func)
found = true;
break;
}
-
+
if (cp2 != NULL)
*cp2++ = ' ';
@@ -3082,7 +3324,7 @@ vm_copyin(struct vm *vm, int vcpuid, struct vm_copyinfo *copyinfo, void *kaddr,
{
char *dst;
int idx;
-
+
dst = kaddr;
idx = 0;
while (len > 0) {
@@ -3123,8 +3365,8 @@ vm_get_rescnt(struct vm *vm, int vcpu, struct vmm_stat_type *stat)
if (vcpu == 0) {
vmm_stat_set(vm, vcpu, VMM_MEM_RESIDENT,
- PAGE_SIZE * vmspace_resident_count(vm->vmspace));
- }
+ PAGE_SIZE * vmspace_resident_count(vm->vmspace));
+ }
}
static void
@@ -3133,8 +3375,8 @@ vm_get_wiredcnt(struct vm *vm, int vcpu, struct vmm_stat_type *stat)
if (vcpu == 0) {
vmm_stat_set(vm, vcpu, VMM_MEM_WIRED,
- PAGE_SIZE * pmap_wired_count(vmspace_pmap(vm->vmspace)));
- }
+ PAGE_SIZE * pmap_wired_count(vmspace_pmap(vm->vmspace)));
+ }
}
VMM_STAT_FUNC(VMM_MEM_RESIDENT, "Resident memory", vm_get_rescnt);
@@ -3206,21 +3448,21 @@ vm_ioport_handle_hook(struct vm *vm, int cpuid, bool in, int port, int bytes,
}
}
if (hook == NULL) {
- return (ENOENT);
+ return (ESRCH);
}
if (in) {
uint64_t tval;
if (hook->vmih_rmem_cb == NULL) {
- return (ENOENT);
+ return (ESRCH);
}
err = hook->vmih_rmem_cb(hook->vmih_arg, (uintptr_t)port,
(uint_t)bytes, &tval);
*val = (uint32_t)tval;
} else {
if (hook->vmih_wmem_cb == NULL) {
- return (ENOENT);
+ return (ESRCH);
}
err = hook->vmih_wmem_cb(hook->vmih_arg, (uintptr_t)port,
(uint_t)bytes, (uint64_t)*val);
diff --git a/usr/src/uts/i86pc/io/vmm/vmm_instruction_emul.c b/usr/src/uts/i86pc/io/vmm/vmm_instruction_emul.c
index 0d32fe0b9a..f8bb7a1646 100644
--- a/usr/src/uts/i86pc/io/vmm/vmm_instruction_emul.c
+++ b/usr/src/uts/i86pc/io/vmm/vmm_instruction_emul.c
@@ -40,12 +40,12 @@
*
* Copyright 2015 Pluribus Networks Inc.
* Copyright 2018 Joyent, Inc.
+ * Copyright 2020 Oxide Computer Company
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
-#ifdef _KERNEL
#include <sys/param.h>
#include <sys/pcpu.h>
#include <sys/systm.h>
@@ -56,27 +56,109 @@ __FBSDID("$FreeBSD$");
#include <machine/vmparam.h>
#include <machine/vmm.h>
-#else /* !_KERNEL */
-#include <sys/types.h>
-#include <sys/errno.h>
-#include <sys/_iovec.h>
+#include <sys/vmm_kernel.h>
-#include <machine/vmm.h>
-
-#include <err.h>
-#include <assert.h>
-#include <stdbool.h>
-#include <stdio.h>
-#include <strings.h>
-#include <vmmapi.h>
-#define KASSERT(exp,msg) assert((exp))
-#define panic(...) errx(4, __VA_ARGS__)
-#endif /* _KERNEL */
-
-#include <machine/vmm_instruction_emul.h>
+#include <sys/vmm_instruction_emul.h>
#include <x86/psl.h>
#include <x86/specialreg.h>
+#include "vmm_ioport.h"
+#include "vmm_ktr.h"
+
+enum vie_status {
+ VIES_INIT = (1U << 0),
+ VIES_MMIO = (1U << 1),
+ VIES_INOUT = (1U << 2),
+ VIES_INST_FETCH = (1U << 3),
+ VIES_INST_DECODE = (1U << 4),
+ VIES_PENDING_MMIO = (1U << 5),
+ VIES_PENDING_INOUT = (1U << 6),
+ VIES_REPEAT = (1U << 7),
+ VIES_COMPLETE = (1U << 8),
+};
+
+/* State of request to perform emulated access (inout or MMIO) */
+enum vie_req {
+ VR_NONE,
+ VR_PENDING,
+ VR_DONE,
+};
+
+struct vie_mmio {
+ uint64_t data;
+ uint64_t gpa;
+ uint8_t bytes;
+ enum vie_req state;
+};
+
+struct vie_op {
+ uint8_t op_byte; /* actual opcode byte */
+ uint8_t op_type; /* type of operation (e.g. MOV) */
+ uint16_t op_flags;
+};
+
+#define VIE_INST_SIZE 15
+struct vie {
+ uint8_t inst[VIE_INST_SIZE]; /* instruction bytes */
+ uint8_t num_valid; /* size of the instruction */
+ uint8_t num_processed;
+
+ uint8_t addrsize:4, opsize:4; /* address and operand sizes */
+ uint8_t rex_w:1, /* REX prefix */
+ rex_r:1,
+ rex_x:1,
+ rex_b:1,
+ rex_present:1,
+ repz_present:1, /* REP/REPE/REPZ prefix */
+ repnz_present:1, /* REPNE/REPNZ prefix */
+ opsize_override:1, /* Operand size override */
+ addrsize_override:1, /* Address size override */
+ segment_override:1; /* Segment override */
+
+ uint8_t mod:2, /* ModRM byte */
+ reg:4,
+ rm:4;
+
+ uint8_t ss:2, /* SIB byte */
+ vex_present:1, /* VEX prefixed */
+ vex_l:1, /* L bit */
+ index:4, /* SIB byte */
+ base:4; /* SIB byte */
+
+ uint8_t disp_bytes;
+ uint8_t imm_bytes;
+
+ uint8_t scale;
+
+ uint8_t vex_reg:4, /* vvvv: first source register specifier */
+ vex_pp:2, /* pp */
+ _sparebits:2;
+
+ uint8_t _sparebytes[2];
+
+ int base_register; /* VM_REG_GUEST_xyz */
+ int index_register; /* VM_REG_GUEST_xyz */
+ int segment_register; /* VM_REG_GUEST_xyz */
+
+ int64_t displacement; /* optional addr displacement */
+ int64_t immediate; /* optional immediate operand */
+
+ struct vie_op op; /* opcode description */
+
+ enum vie_status status;
+
+ struct vm_guest_paging paging; /* guest paging state */
+
+ uint64_t mmio_gpa; /* faulting GPA */
+ struct vie_mmio mmio_req_read;
+ struct vie_mmio mmio_req_write;
+
+ struct vm_inout inout; /* active in/out op */
+ enum vie_req inout_req_state;
+ uint32_t inout_req_val; /* value from userspace */
+};
+
+
/* struct vie_op.op_type */
enum {
VIE_OP_TYPE_NONE = 0,
@@ -299,14 +381,29 @@ static uint64_t size2mask[] = {
[8] = 0xffffffffffffffff,
};
-static int
-vie_read_register(void *vm, int vcpuid, enum vm_reg_name reg, uint64_t *rval)
-{
- int error;
- error = vm_get_register(vm, vcpuid, reg, rval);
+static int vie_mmio_read(struct vie *vie, struct vm *vm, int cpuid,
+ uint64_t gpa, uint64_t *rval, int bytes);
+static int vie_mmio_write(struct vie *vie, struct vm *vm, int cpuid,
+ uint64_t gpa, uint64_t wval, int bytes);
+static int vie_calculate_gla(enum vm_cpu_mode cpu_mode, enum vm_reg_name seg,
+ struct seg_desc *desc, uint64_t offset, int length, int addrsize,
+ int prot, uint64_t *gla);
+static int vie_canonical_check(enum vm_cpu_mode cpu_mode, uint64_t gla);
+static int vie_alignment_check(int cpl, int size, uint64_t cr0, uint64_t rf,
+ uint64_t gla);
+static uint64_t vie_size2mask(int size);
+
+struct vie *
+vie_alloc()
+{
+ return (kmem_zalloc(sizeof (struct vie), KM_SLEEP));
+}
- return (error);
+void
+vie_free(struct vie *vie)
+{
+ kmem_free(vie, sizeof (struct vie));
}
static void
@@ -336,7 +433,7 @@ vie_calc_bytereg(struct vie *vie, enum vm_reg_name *reg, int *lhbr)
}
static int
-vie_read_bytereg(void *vm, int vcpuid, struct vie *vie, uint8_t *rval)
+vie_read_bytereg(struct vie *vie, void *vm, int vcpuid, uint8_t *rval)
{
uint64_t val;
int error, lhbr;
@@ -357,7 +454,7 @@ vie_read_bytereg(void *vm, int vcpuid, struct vie *vie, uint8_t *rval)
}
static int
-vie_write_bytereg(void *vm, int vcpuid, struct vie *vie, uint8_t byte)
+vie_write_bytereg(struct vie *vie, void *vm, int vcpuid, uint8_t byte)
{
uint64_t origval, val, mask;
int error, lhbr;
@@ -382,9 +479,9 @@ vie_write_bytereg(void *vm, int vcpuid, struct vie *vie, uint8_t byte)
return (error);
}
-int
-vie_update_register(void *vm, int vcpuid, enum vm_reg_name reg,
- uint64_t val, int size)
+static int
+vie_update_register(void *vm, int vcpuid, enum vm_reg_name reg, uint64_t val,
+ int size)
{
int error;
uint64_t origval;
@@ -392,7 +489,7 @@ vie_update_register(void *vm, int vcpuid, enum vm_reg_name reg,
switch (size) {
case 1:
case 2:
- error = vie_read_register(vm, vcpuid, reg, &origval);
+ error = vm_get_register(vm, vcpuid, reg, &origval);
if (error)
return (error);
val &= size2mask[size];
@@ -411,6 +508,29 @@ vie_update_register(void *vm, int vcpuid, enum vm_reg_name reg,
return (error);
}
+static int
+vie_repeat(struct vie *vie)
+{
+ vie->status |= VIES_REPEAT;
+
+ /*
+ * Clear out any cached operation values so the repeated instruction can
+ * begin without using that stale state. Other state, such as the
+ * decoding results, are kept around as it will not vary between
+ * iterations of a rep-prefixed instruction.
+ */
+ if ((vie->status & VIES_MMIO) != 0) {
+ vie->mmio_req_read.state = VR_NONE;
+ vie->mmio_req_write.state = VR_NONE;
+ } else if ((vie->status & VIES_INOUT) != 0) {
+ vie->inout_req_state = VR_NONE;
+ } else {
+ panic("unexpected emulation state");
+ }
+
+ return (EAGAIN);
+}
+
#define RFLAGS_STATUS_BITS (PSL_C | PSL_PF | PSL_AF | PSL_Z | PSL_N | PSL_V)
/*
@@ -519,8 +639,7 @@ getandflags(int opsize, uint64_t x, uint64_t y)
}
static int
-emulate_mov(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
- mem_region_read_t memread, mem_region_write_t memwrite, void *arg)
+emulate_mov(void *vm, int vcpuid, uint64_t gpa, struct vie *vie)
{
int error, size;
enum vm_reg_name reg;
@@ -538,9 +657,9 @@ emulate_mov(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
* REX + 88/r: mov r/m8, r8 (%ah, %ch, %dh, %bh not available)
*/
size = 1; /* override for byte operation */
- error = vie_read_bytereg(vm, vcpuid, vie, &byte);
+ error = vie_read_bytereg(vie, vm, vcpuid, &byte);
if (error == 0)
- error = memwrite(vm, vcpuid, gpa, byte, size, arg);
+ error = vie_mmio_write(vie, vm, vcpuid, gpa, byte, size);
break;
case 0x89:
/*
@@ -550,10 +669,10 @@ emulate_mov(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
* REX.W + 89/r mov r/m64, r64
*/
reg = gpr_map[vie->reg];
- error = vie_read_register(vm, vcpuid, reg, &val);
+ error = vm_get_register(vm, vcpuid, reg, &val);
if (error == 0) {
val &= size2mask[size];
- error = memwrite(vm, vcpuid, gpa, val, size, arg);
+ error = vie_mmio_write(vie, vm, vcpuid, gpa, val, size);
}
break;
case 0x8A:
@@ -563,9 +682,9 @@ emulate_mov(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
* REX + 8A/r: mov r8, r/m8
*/
size = 1; /* override for byte operation */
- error = memread(vm, vcpuid, gpa, &val, size, arg);
+ error = vie_mmio_read(vie, vm, vcpuid, gpa, &val, size);
if (error == 0)
- error = vie_write_bytereg(vm, vcpuid, vie, val);
+ error = vie_write_bytereg(vie, vm, vcpuid, val);
break;
case 0x8B:
/*
@@ -574,7 +693,7 @@ emulate_mov(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
* 8B/r: mov r32, r/m32
* REX.W 8B/r: mov r64, r/m64
*/
- error = memread(vm, vcpuid, gpa, &val, size, arg);
+ error = vie_mmio_read(vie, vm, vcpuid, gpa, &val, size);
if (error == 0) {
reg = gpr_map[vie->reg];
error = vie_update_register(vm, vcpuid, reg, val, size);
@@ -587,7 +706,7 @@ emulate_mov(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
* A1: mov EAX, moffs32
* REX.W + A1: mov RAX, moffs64
*/
- error = memread(vm, vcpuid, gpa, &val, size, arg);
+ error = vie_mmio_read(vie, vm, vcpuid, gpa, &val, size);
if (error == 0) {
reg = VM_REG_GUEST_RAX;
error = vie_update_register(vm, vcpuid, reg, val, size);
@@ -597,13 +716,13 @@ emulate_mov(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
/*
* MOV from AX/EAX/RAX to seg:moffset
* A3: mov moffs16, AX
- * A3: mov moffs32, EAX
+ * A3: mov moffs32, EAX
* REX.W + A3: mov moffs64, RAX
*/
- error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RAX, &val);
+ error = vm_get_register(vm, vcpuid, VM_REG_GUEST_RAX, &val);
if (error == 0) {
val &= size2mask[size];
- error = memwrite(vm, vcpuid, gpa, val, size, arg);
+ error = vie_mmio_write(vie, vm, vcpuid, gpa, val, size);
}
break;
case 0xC6:
@@ -613,7 +732,8 @@ emulate_mov(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
* REX + C6/0 mov r/m8, imm8
*/
size = 1; /* override for byte operation */
- error = memwrite(vm, vcpuid, gpa, vie->immediate, size, arg);
+ val = vie->immediate;
+ error = vie_mmio_write(vie, vm, vcpuid, gpa, val, size);
break;
case 0xC7:
/*
@@ -623,7 +743,7 @@ emulate_mov(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
* REX.W + C7/0 mov r/m64, imm32 (sign-extended to 64-bits)
*/
val = vie->immediate & size2mask[size];
- error = memwrite(vm, vcpuid, gpa, val, size, arg);
+ error = vie_mmio_write(vie, vm, vcpuid, gpa, val, size);
break;
default:
break;
@@ -633,9 +753,7 @@ emulate_mov(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
}
static int
-emulate_movx(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
- mem_region_read_t memread, mem_region_write_t memwrite,
- void *arg)
+emulate_movx(void *vm, int vcpuid, uint64_t gpa, struct vie *vie)
{
int error, size;
enum vm_reg_name reg;
@@ -656,7 +774,7 @@ emulate_movx(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
*/
/* get the first operand */
- error = memread(vm, vcpuid, gpa, &val, 1, arg);
+ error = vie_mmio_read(vie, vm, vcpuid, gpa, &val, 1);
if (error)
break;
@@ -677,7 +795,7 @@ emulate_movx(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
* 0F B7/r movzx r32, r/m16
* REX.W + 0F B7/r movzx r64, r/m16
*/
- error = memread(vm, vcpuid, gpa, &val, 2, arg);
+ error = vie_mmio_read(vie, vm, vcpuid, gpa, &val, 2);
if (error)
return (error);
@@ -699,7 +817,7 @@ emulate_movx(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
*/
/* get the first operand */
- error = memread(vm, vcpuid, gpa, &val, 1, arg);
+ error = vie_mmio_read(vie, vm, vcpuid, gpa, &val, 1);
if (error)
break;
@@ -722,25 +840,27 @@ emulate_movx(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
* Helper function to calculate and validate a linear address.
*/
static int
-get_gla(void *vm, int vcpuid, struct vie *vie, struct vm_guest_paging *paging,
- int opsize, int addrsize, int prot, enum vm_reg_name seg,
- enum vm_reg_name gpr, uint64_t *gla, int *fault)
+vie_get_gla(struct vie *vie, void *vm, int vcpuid, int opsize, int addrsize,
+ int prot, enum vm_reg_name seg, enum vm_reg_name gpr, uint64_t *gla)
{
struct seg_desc desc;
uint64_t cr0, val, rflags;
int error;
+ struct vm_guest_paging *paging;
+
+ paging = &vie->paging;
- error = vie_read_register(vm, vcpuid, VM_REG_GUEST_CR0, &cr0);
+ error = vm_get_register(vm, vcpuid, VM_REG_GUEST_CR0, &cr0);
KASSERT(error == 0, ("%s: error %d getting cr0", __func__, error));
- error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags);
+ error = vm_get_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags);
KASSERT(error == 0, ("%s: error %d getting rflags", __func__, error));
error = vm_get_seg_desc(vm, vcpuid, seg, &desc);
KASSERT(error == 0, ("%s: error %d getting segment descriptor %d",
__func__, error, seg));
- error = vie_read_register(vm, vcpuid, gpr, &val);
+ error = vm_get_register(vm, vcpuid, gpr, &val);
KASSERT(error == 0, ("%s: error %d getting register %d", __func__,
error, gpr));
@@ -750,7 +870,7 @@ get_gla(void *vm, int vcpuid, struct vie *vie, struct vm_guest_paging *paging,
vm_inject_ss(vm, vcpuid, 0);
else
vm_inject_gp(vm, vcpuid);
- goto guest_fault;
+ return (-1);
}
if (vie_canonical_check(paging->cpu_mode, *gla)) {
@@ -758,39 +878,30 @@ get_gla(void *vm, int vcpuid, struct vie *vie, struct vm_guest_paging *paging,
vm_inject_ss(vm, vcpuid, 0);
else
vm_inject_gp(vm, vcpuid);
- goto guest_fault;
+ return (-1);
}
if (vie_alignment_check(paging->cpl, opsize, cr0, rflags, *gla)) {
vm_inject_ac(vm, vcpuid, 0);
- goto guest_fault;
+ return (-1);
}
- *fault = 0;
- return (0);
-
-guest_fault:
- *fault = 1;
return (0);
}
static int
-emulate_movs(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
- struct vm_guest_paging *paging, mem_region_read_t memread,
- mem_region_write_t memwrite, void *arg)
+emulate_movs(void *vm, int vcpuid, uint64_t gpa, struct vie *vie)
{
-#ifdef _KERNEL
struct vm_copyinfo copyinfo[2];
-#else
- struct iovec copyinfo[2];
-#endif
uint64_t dstaddr, srcaddr, dstgpa, srcgpa, val;
uint64_t rcx, rdi, rsi, rflags;
int error, fault, opsize, seg, repeat;
+ struct vm_guest_paging *paging;
opsize = (vie->op.op_byte == 0xA4) ? 1 : vie->opsize;
val = 0;
error = 0;
+ paging = &vie->paging;
/*
* XXX although the MOVS instruction is only supposed to be used with
@@ -802,7 +913,7 @@ emulate_movs(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
repeat = vie->repz_present | vie->repnz_present;
if (repeat) {
- error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RCX, &rcx);
+ error = vm_get_register(vm, vcpuid, VM_REG_GUEST_RCX, &rcx);
KASSERT(!error, ("%s: error %d getting rcx", __func__, error));
/*
@@ -832,10 +943,10 @@ emulate_movs(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
*/
seg = vie->segment_override ? vie->segment_register : VM_REG_GUEST_DS;
- error = get_gla(vm, vcpuid, vie, paging, opsize, vie->addrsize,
- PROT_READ, seg, VM_REG_GUEST_RSI, &srcaddr, &fault);
- if (error || fault)
+ if (vie_get_gla(vie, vm, vcpuid, opsize, vie->addrsize, PROT_READ, seg,
+ VM_REG_GUEST_RSI, &srcaddr) != 0) {
goto done;
+ }
error = vm_copy_setup(vm, vcpuid, paging, srcaddr, opsize, PROT_READ,
copyinfo, nitems(copyinfo), &fault);
@@ -848,7 +959,7 @@ emulate_movs(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
*/
vm_copyin(vm, vcpuid, copyinfo, &val, opsize);
vm_copy_teardown(vm, vcpuid, copyinfo, nitems(copyinfo));
- error = memwrite(vm, vcpuid, gpa, val, opsize, arg);
+ error = vie_mmio_write(vie, vm, vcpuid, gpa, val, opsize);
if (error)
goto done;
} else {
@@ -857,11 +968,11 @@ emulate_movs(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
* if 'srcaddr' is in the mmio space.
*/
- error = get_gla(vm, vcpuid, vie, paging, opsize, vie->addrsize,
- PROT_WRITE, VM_REG_GUEST_ES, VM_REG_GUEST_RDI, &dstaddr,
- &fault);
- if (error || fault)
+ if (vie_get_gla(vie, vm, vcpuid, opsize, vie->addrsize,
+ PROT_WRITE, VM_REG_GUEST_ES, VM_REG_GUEST_RDI,
+ &dstaddr) != 0) {
goto done;
+ }
error = vm_copy_setup(vm, vcpuid, paging, dstaddr, opsize,
PROT_WRITE, copyinfo, nitems(copyinfo), &fault);
@@ -878,7 +989,8 @@ emulate_movs(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
* injected into the guest then it will happen
* before the MMIO read is attempted.
*/
- error = memread(vm, vcpuid, gpa, &val, opsize, arg);
+ error = vie_mmio_read(vie, vm, vcpuid, gpa, &val,
+ opsize);
if (error)
goto done;
@@ -903,23 +1015,25 @@ emulate_movs(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
if (error || fault)
goto done;
- error = memread(vm, vcpuid, srcgpa, &val, opsize, arg);
+ error = vie_mmio_read(vie, vm, vcpuid, srcgpa, &val,
+ opsize);
if (error)
goto done;
- error = memwrite(vm, vcpuid, dstgpa, val, opsize, arg);
+ error = vie_mmio_write(vie, vm, vcpuid, dstgpa, val,
+ opsize);
if (error)
goto done;
}
}
- error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RSI, &rsi);
+ error = vm_get_register(vm, vcpuid, VM_REG_GUEST_RSI, &rsi);
KASSERT(error == 0, ("%s: error %d getting rsi", __func__, error));
- error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RDI, &rdi);
+ error = vm_get_register(vm, vcpuid, VM_REG_GUEST_RDI, &rdi);
KASSERT(error == 0, ("%s: error %d getting rdi", __func__, error));
- error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags);
+ error = vm_get_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags);
KASSERT(error == 0, ("%s: error %d getting rflags", __func__, error));
if (rflags & PSL_D) {
@@ -948,18 +1062,14 @@ emulate_movs(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
* Repeat the instruction if the count register is not zero.
*/
if ((rcx & vie_size2mask(vie->addrsize)) != 0)
- vm_restart_instruction(vm, vcpuid);
+ return (vie_repeat(vie));
}
done:
- KASSERT(error == 0 || error == EFAULT, ("%s: unexpected error %d",
- __func__, error));
return (error);
}
static int
-emulate_stos(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
- struct vm_guest_paging *paging, mem_region_read_t memread,
- mem_region_write_t memwrite, void *arg)
+emulate_stos(void *vm, int vcpuid, uint64_t gpa, struct vie *vie)
{
int error, opsize, repeat;
uint64_t val;
@@ -969,7 +1079,7 @@ emulate_stos(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
repeat = vie->repz_present | vie->repnz_present;
if (repeat) {
- error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RCX, &rcx);
+ error = vm_get_register(vm, vcpuid, VM_REG_GUEST_RCX, &rcx);
KASSERT(!error, ("%s: error %d getting rcx", __func__, error));
/*
@@ -980,17 +1090,17 @@ emulate_stos(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
return (0);
}
- error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RAX, &val);
+ error = vm_get_register(vm, vcpuid, VM_REG_GUEST_RAX, &val);
KASSERT(!error, ("%s: error %d getting rax", __func__, error));
- error = memwrite(vm, vcpuid, gpa, val, opsize, arg);
+ error = vie_mmio_write(vie, vm, vcpuid, gpa, val, opsize);
if (error)
return (error);
- error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RDI, &rdi);
+ error = vm_get_register(vm, vcpuid, VM_REG_GUEST_RDI, &rdi);
KASSERT(error == 0, ("%s: error %d getting rdi", __func__, error));
- error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags);
+ error = vm_get_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags);
KASSERT(error == 0, ("%s: error %d getting rflags", __func__, error));
if (rflags & PSL_D)
@@ -1012,15 +1122,14 @@ emulate_stos(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
* Repeat the instruction if the count register is not zero.
*/
if ((rcx & vie_size2mask(vie->addrsize)) != 0)
- vm_restart_instruction(vm, vcpuid);
+ return (vie_repeat(vie));
}
return (0);
}
static int
-emulate_and(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
- mem_region_read_t memread, mem_region_write_t memwrite, void *arg)
+emulate_and(void *vm, int vcpuid, uint64_t gpa, struct vie *vie)
{
int error, size;
enum vm_reg_name reg;
@@ -1042,12 +1151,12 @@ emulate_and(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
/* get the first operand */
reg = gpr_map[vie->reg];
- error = vie_read_register(vm, vcpuid, reg, &val1);
+ error = vm_get_register(vm, vcpuid, reg, &val1);
if (error)
break;
/* get the second operand */
- error = memread(vm, vcpuid, gpa, &val2, size, arg);
+ error = vie_mmio_read(vie, vm, vcpuid, gpa, &val2, size);
if (error)
break;
@@ -1071,7 +1180,7 @@ emulate_and(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
*/
/* get the first operand */
- error = memread(vm, vcpuid, gpa, &val1, size, arg);
+ error = vie_mmio_read(vie, vm, vcpuid, gpa, &val1, size);
if (error)
break;
@@ -1080,7 +1189,7 @@ emulate_and(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
* operand and write the result
*/
result = val1 & vie->immediate;
- error = memwrite(vm, vcpuid, gpa, result, size, arg);
+ error = vie_mmio_write(vie, vm, vcpuid, gpa, result, size);
break;
default:
break;
@@ -1088,7 +1197,7 @@ emulate_and(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
if (error)
return (error);
- error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags);
+ error = vm_get_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags);
if (error)
return (error);
@@ -1107,8 +1216,7 @@ emulate_and(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
}
static int
-emulate_or(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
- mem_region_read_t memread, mem_region_write_t memwrite, void *arg)
+emulate_or(void *vm, int vcpuid, uint64_t gpa, struct vie *vie)
{
int error, size;
enum vm_reg_name reg;
@@ -1130,12 +1238,12 @@ emulate_or(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
/* get the first operand */
reg = gpr_map[vie->reg];
- error = vie_read_register(vm, vcpuid, reg, &val1);
+ error = vm_get_register(vm, vcpuid, reg, &val1);
if (error)
break;
-
+
/* get the second operand */
- error = memread(vm, vcpuid, gpa, &val2, size, arg);
+ error = vie_mmio_read(vie, vm, vcpuid, gpa, &val2, size);
if (error)
break;
@@ -1159,7 +1267,7 @@ emulate_or(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
*/
/* get the first operand */
- error = memread(vm, vcpuid, gpa, &val1, size, arg);
+ error = vie_mmio_read(vie, vm, vcpuid, gpa, &val1, size);
if (error)
break;
@@ -1168,7 +1276,7 @@ emulate_or(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
* operand and write the result
*/
result = val1 | vie->immediate;
- error = memwrite(vm, vcpuid, gpa, result, size, arg);
+ error = vie_mmio_write(vie, vm, vcpuid, gpa, result, size);
break;
default:
break;
@@ -1176,7 +1284,7 @@ emulate_or(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
if (error)
return (error);
- error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags);
+ error = vm_get_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags);
if (error)
return (error);
@@ -1195,8 +1303,7 @@ emulate_or(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
}
static int
-emulate_cmp(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
- mem_region_read_t memread, mem_region_write_t memwrite, void *arg)
+emulate_cmp(void *vm, int vcpuid, uint64_t gpa, struct vie *vie)
{
int error, size;
uint64_t regop, memop, op1, op2, rflags, rflags2;
@@ -1223,12 +1330,12 @@ emulate_cmp(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
/* Get the register operand */
reg = gpr_map[vie->reg];
- error = vie_read_register(vm, vcpuid, reg, &regop);
+ error = vm_get_register(vm, vcpuid, reg, &regop);
if (error)
return (error);
/* Get the memory operand */
- error = memread(vm, vcpuid, gpa, &memop, size, arg);
+ error = vie_mmio_read(vie, vm, vcpuid, gpa, &memop, size);
if (error)
return (error);
@@ -1267,7 +1374,7 @@ emulate_cmp(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
size = 1;
/* get the first operand */
- error = memread(vm, vcpuid, gpa, &op1, size, arg);
+ error = vie_mmio_read(vie, vm, vcpuid, gpa, &op1, size);
if (error)
return (error);
@@ -1276,7 +1383,7 @@ emulate_cmp(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
default:
return (EINVAL);
}
- error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags);
+ error = vm_get_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags);
if (error)
return (error);
rflags &= ~RFLAGS_STATUS_BITS;
@@ -1287,8 +1394,7 @@ emulate_cmp(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
}
static int
-emulate_test(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
- mem_region_read_t memread, mem_region_write_t memwrite, void *arg)
+emulate_test(void *vm, int vcpuid, uint64_t gpa, struct vie *vie)
{
int error, size;
uint64_t op1, rflags, rflags2;
@@ -1311,7 +1417,7 @@ emulate_test(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
if ((vie->reg & 7) != 0)
return (EINVAL);
- error = memread(vm, vcpuid, gpa, &op1, size, arg);
+ error = vie_mmio_read(vie, vm, vcpuid, gpa, &op1, size);
if (error)
return (error);
@@ -1320,7 +1426,7 @@ emulate_test(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
default:
return (EINVAL);
}
- error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags);
+ error = vm_get_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags);
if (error)
return (error);
@@ -1336,16 +1442,16 @@ emulate_test(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
}
static int
-emulate_bextr(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
- struct vm_guest_paging *paging, mem_region_read_t memread,
- mem_region_write_t memwrite, void *arg)
+emulate_bextr(void *vm, int vcpuid, uint64_t gpa, struct vie *vie)
{
uint64_t src1, src2, dst, rflags;
unsigned start, len;
int error, size;
+ struct vm_guest_paging *paging;
size = vie->opsize;
error = EINVAL;
+ paging = &vie->paging;
/*
* VEX.LZ.0F38.W0 F7 /r BEXTR r32a, r/m32, r32b
@@ -1364,13 +1470,13 @@ emulate_bextr(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
* operand) using an index and length specified in the second /source/
* operand (third operand).
*/
- error = memread(vm, vcpuid, gpa, &src1, size, arg);
+ error = vie_mmio_read(vie, vm, vcpuid, gpa, &src1, size);
if (error)
return (error);
- error = vie_read_register(vm, vcpuid, gpr_map[vie->vex_reg], &src2);
+ error = vm_get_register(vm, vcpuid, gpr_map[vie->vex_reg], &src2);
if (error)
return (error);
- error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags);
+ error = vm_get_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags);
if (error)
return (error);
@@ -1413,8 +1519,7 @@ done:
}
static int
-emulate_add(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
- mem_region_read_t memread, mem_region_write_t memwrite, void *arg)
+emulate_add(void *vm, int vcpuid, uint64_t gpa, struct vie *vie)
{
int error, size;
uint64_t nval, rflags, rflags2, val1, val2;
@@ -1435,12 +1540,12 @@ emulate_add(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
/* get the first operand */
reg = gpr_map[vie->reg];
- error = vie_read_register(vm, vcpuid, reg, &val1);
+ error = vm_get_register(vm, vcpuid, reg, &val1);
if (error)
break;
/* get the second operand */
- error = memread(vm, vcpuid, gpa, &val2, size, arg);
+ error = vie_mmio_read(vie, vm, vcpuid, gpa, &val2, size);
if (error)
break;
@@ -1454,7 +1559,7 @@ emulate_add(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
if (!error) {
rflags2 = getaddflags(size, val1, val2);
- error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RFLAGS,
+ error = vm_get_register(vm, vcpuid, VM_REG_GUEST_RFLAGS,
&rflags);
if (error)
return (error);
@@ -1469,8 +1574,7 @@ emulate_add(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
}
static int
-emulate_sub(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
- mem_region_read_t memread, mem_region_write_t memwrite, void *arg)
+emulate_sub(void *vm, int vcpuid, uint64_t gpa, struct vie *vie)
{
int error, size;
uint64_t nval, rflags, rflags2, val1, val2;
@@ -1483,7 +1587,7 @@ emulate_sub(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
case 0x2B:
/*
* SUB r/m from r and store the result in r
- *
+ *
* 2B/r SUB r16, r/m16
* 2B/r SUB r32, r/m32
* REX.W + 2B/r SUB r64, r/m64
@@ -1491,12 +1595,12 @@ emulate_sub(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
/* get the first operand */
reg = gpr_map[vie->reg];
- error = vie_read_register(vm, vcpuid, reg, &val1);
+ error = vm_get_register(vm, vcpuid, reg, &val1);
if (error)
break;
/* get the second operand */
- error = memread(vm, vcpuid, gpa, &val2, size, arg);
+ error = vie_mmio_read(vie, vm, vcpuid, gpa, &val2, size);
if (error)
break;
@@ -1510,7 +1614,7 @@ emulate_sub(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
if (!error) {
rflags2 = getcc(size, val1, val2);
- error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RFLAGS,
+ error = vm_get_register(vm, vcpuid, VM_REG_GUEST_RFLAGS,
&rflags);
if (error)
return (error);
@@ -1525,22 +1629,18 @@ emulate_sub(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
}
static int
-emulate_stack_op(void *vm, int vcpuid, uint64_t mmio_gpa, struct vie *vie,
- struct vm_guest_paging *paging, mem_region_read_t memread,
- mem_region_write_t memwrite, void *arg)
+emulate_stack_op(void *vm, int vcpuid, uint64_t mmio_gpa, struct vie *vie)
{
-#ifdef _KERNEL
struct vm_copyinfo copyinfo[2];
-#else
- struct iovec copyinfo[2];
-#endif
struct seg_desc ss_desc;
uint64_t cr0, rflags, rsp, stack_gla, val;
int error, fault, size, stackaddrsize, pushop;
+ struct vm_guest_paging *paging;
val = 0;
size = vie->opsize;
pushop = (vie->op.op_type == VIE_OP_TYPE_PUSH) ? 1 : 0;
+ paging = &vie->paging;
/*
* From "Address-Size Attributes for Stack Accesses", Intel SDL, Vol 1
@@ -1572,13 +1672,13 @@ emulate_stack_op(void *vm, int vcpuid, uint64_t mmio_gpa, struct vie *vie,
stackaddrsize = 2;
}
- error = vie_read_register(vm, vcpuid, VM_REG_GUEST_CR0, &cr0);
+ error = vm_get_register(vm, vcpuid, VM_REG_GUEST_CR0, &cr0);
KASSERT(error == 0, ("%s: error %d getting cr0", __func__, error));
- error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags);
+ error = vm_get_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags);
KASSERT(error == 0, ("%s: error %d getting rflags", __func__, error));
- error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RSP, &rsp);
+ error = vm_get_register(vm, vcpuid, VM_REG_GUEST_RSP, &rsp);
KASSERT(error == 0, ("%s: error %d getting rsp", __func__, error));
if (pushop) {
rsp -= size;
@@ -1608,12 +1708,12 @@ emulate_stack_op(void *vm, int vcpuid, uint64_t mmio_gpa, struct vie *vie,
return (error);
if (pushop) {
- error = memread(vm, vcpuid, mmio_gpa, &val, size, arg);
+ error = vie_mmio_read(vie, vm, vcpuid, mmio_gpa, &val, size);
if (error == 0)
vm_copyout(vm, vcpuid, &val, copyinfo, size);
} else {
vm_copyin(vm, vcpuid, copyinfo, &val, size);
- error = memwrite(vm, vcpuid, mmio_gpa, val, size, arg);
+ error = vie_mmio_write(vie, vm, vcpuid, mmio_gpa, val, size);
rsp += size;
}
vm_copy_teardown(vm, vcpuid, copyinfo, nitems(copyinfo));
@@ -1627,9 +1727,7 @@ emulate_stack_op(void *vm, int vcpuid, uint64_t mmio_gpa, struct vie *vie,
}
static int
-emulate_push(void *vm, int vcpuid, uint64_t mmio_gpa, struct vie *vie,
- struct vm_guest_paging *paging, mem_region_read_t memread,
- mem_region_write_t memwrite, void *arg)
+emulate_push(void *vm, int vcpuid, uint64_t mmio_gpa, struct vie *vie)
{
int error;
@@ -1642,15 +1740,12 @@ emulate_push(void *vm, int vcpuid, uint64_t mmio_gpa, struct vie *vie,
if ((vie->reg & 7) != 6)
return (EINVAL);
- error = emulate_stack_op(vm, vcpuid, mmio_gpa, vie, paging, memread,
- memwrite, arg);
+ error = emulate_stack_op(vm, vcpuid, mmio_gpa, vie);
return (error);
}
static int
-emulate_pop(void *vm, int vcpuid, uint64_t mmio_gpa, struct vie *vie,
- struct vm_guest_paging *paging, mem_region_read_t memread,
- mem_region_write_t memwrite, void *arg)
+emulate_pop(void *vm, int vcpuid, uint64_t mmio_gpa, struct vie *vie)
{
int error;
@@ -1663,30 +1758,24 @@ emulate_pop(void *vm, int vcpuid, uint64_t mmio_gpa, struct vie *vie,
if ((vie->reg & 7) != 0)
return (EINVAL);
- error = emulate_stack_op(vm, vcpuid, mmio_gpa, vie, paging, memread,
- memwrite, arg);
+ error = emulate_stack_op(vm, vcpuid, mmio_gpa, vie);
return (error);
}
static int
-emulate_group1(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
- struct vm_guest_paging *paging, mem_region_read_t memread,
- mem_region_write_t memwrite, void *memarg)
+emulate_group1(void *vm, int vcpuid, uint64_t gpa, struct vie *vie)
{
int error;
switch (vie->reg & 7) {
case 0x1: /* OR */
- error = emulate_or(vm, vcpuid, gpa, vie,
- memread, memwrite, memarg);
+ error = emulate_or(vm, vcpuid, gpa, vie);
break;
case 0x4: /* AND */
- error = emulate_and(vm, vcpuid, gpa, vie,
- memread, memwrite, memarg);
+ error = emulate_and(vm, vcpuid, gpa, vie);
break;
case 0x7: /* CMP */
- error = emulate_cmp(vm, vcpuid, gpa, vie,
- memread, memwrite, memarg);
+ error = emulate_cmp(vm, vcpuid, gpa, vie);
break;
default:
error = EINVAL;
@@ -1697,8 +1786,7 @@ emulate_group1(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
}
static int
-emulate_bittest(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
- mem_region_read_t memread, mem_region_write_t memwrite, void *memarg)
+emulate_bittest(void *vm, int vcpuid, uint64_t gpa, struct vie *vie)
{
uint64_t val, rflags;
int error, bitmask, bitoff;
@@ -1712,10 +1800,10 @@ emulate_bittest(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
if ((vie->reg & 7) != 4)
return (EINVAL);
- error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags);
+ error = vm_get_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags);
KASSERT(error == 0, ("%s: error %d getting rflags", __func__, error));
- error = memread(vm, vcpuid, gpa, &val, vie->opsize, memarg);
+ error = vie_mmio_read(vie, vm, vcpuid, gpa, &val, vie->opsize);
if (error)
return (error);
@@ -1739,8 +1827,7 @@ emulate_bittest(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
}
static int
-emulate_twob_group15(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
- mem_region_read_t memread, mem_region_write_t memwrite, void *memarg)
+emulate_twob_group15(void *vm, int vcpuid, uint64_t gpa, struct vie *vie)
{
int error;
uint64_t buf;
@@ -1758,7 +1845,7 @@ emulate_twob_group15(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
* CLFLUSH, CLFLUSHOPT. Only check for access
* rights.
*/
- error = memread(vm, vcpuid, gpa, &buf, 1, memarg);
+ error = vie_mmio_read(vie, vm, vcpuid, gpa, &buf, 1);
}
break;
default:
@@ -1769,91 +1856,460 @@ emulate_twob_group15(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
return (error);
}
+static int
+vie_mmio_read(struct vie *vie, struct vm *vm, int cpuid, uint64_t gpa,
+ uint64_t *rval, int bytes)
+{
+ int err;
+
+ if (vie->mmio_req_read.state == VR_DONE) {
+ ASSERT(vie->mmio_req_read.bytes == bytes);
+ ASSERT(vie->mmio_req_read.gpa == gpa);
+
+ *rval = vie->mmio_req_read.data;
+ return (0);
+ }
+
+ err = vm_service_mmio_read(vm, cpuid, gpa, rval, bytes);
+ if (err == 0) {
+ /*
+ * A successful read from an in-kernel-emulated device may come
+ * with side effects, so stash the result in case it's used for
+ * an instruction which subsequently needs to issue an MMIO
+ * write to userspace.
+ */
+ ASSERT(vie->mmio_req_read.state == VR_NONE);
+
+ vie->mmio_req_read.bytes = bytes;
+ vie->mmio_req_read.gpa = gpa;
+ vie->mmio_req_read.data = *rval;
+ vie->mmio_req_read.state = VR_DONE;
+
+ } else if (err == ESRCH) {
+ /* Hope that userspace emulation can fulfill this read */
+ vie->mmio_req_read.bytes = bytes;
+ vie->mmio_req_read.gpa = gpa;
+ vie->mmio_req_read.state = VR_PENDING;
+ vie->status |= VIES_PENDING_MMIO;
+ }
+ return (err);
+}
+
+static int
+vie_mmio_write(struct vie *vie, struct vm *vm, int cpuid, uint64_t gpa,
+ uint64_t wval, int bytes)
+{
+ int err;
+
+ if (vie->mmio_req_write.state == VR_DONE) {
+ ASSERT(vie->mmio_req_write.bytes == bytes);
+ ASSERT(vie->mmio_req_write.gpa == gpa);
+
+ return (0);
+ }
+
+ err = vm_service_mmio_write(vm, cpuid, gpa, wval, bytes);
+ if (err == 0) {
+ /*
+ * A successful write to an in-kernel-emulated device probably
+ * results in side effects, so stash the fact that such a write
+ * succeeded in case the operation requires other work.
+ */
+ vie->mmio_req_write.bytes = bytes;
+ vie->mmio_req_write.gpa = gpa;
+ vie->mmio_req_write.data = wval;
+ vie->mmio_req_write.state = VR_DONE;
+ } else if (err == ESRCH) {
+ /* Hope that userspace emulation can fulfill this write */
+ vie->mmio_req_write.bytes = bytes;
+ vie->mmio_req_write.gpa = gpa;
+ vie->mmio_req_write.data = wval;
+ vie->mmio_req_write.state = VR_PENDING;
+ vie->status |= VIES_PENDING_MMIO;
+ }
+ return (err);
+}
+
int
-vmm_emulate_instruction(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
- struct vm_guest_paging *paging, mem_region_read_t memread,
- mem_region_write_t memwrite, void *memarg)
+vie_emulate_mmio(struct vie *vie, void *vm, int vcpuid)
{
int error;
+ uint64_t gpa;
- if (!vie->decoded)
+ if ((vie->status & (VIES_INST_DECODE | VIES_MMIO)) !=
+ (VIES_INST_DECODE | VIES_MMIO)) {
return (EINVAL);
+ }
+
+ gpa = vie->mmio_gpa;
switch (vie->op.op_type) {
case VIE_OP_TYPE_GROUP1:
- error = emulate_group1(vm, vcpuid, gpa, vie, paging, memread,
- memwrite, memarg);
+ error = emulate_group1(vm, vcpuid, gpa, vie);
break;
case VIE_OP_TYPE_POP:
- error = emulate_pop(vm, vcpuid, gpa, vie, paging, memread,
- memwrite, memarg);
+ error = emulate_pop(vm, vcpuid, gpa, vie);
break;
case VIE_OP_TYPE_PUSH:
- error = emulate_push(vm, vcpuid, gpa, vie, paging, memread,
- memwrite, memarg);
+ error = emulate_push(vm, vcpuid, gpa, vie);
break;
case VIE_OP_TYPE_CMP:
- error = emulate_cmp(vm, vcpuid, gpa, vie,
- memread, memwrite, memarg);
+ error = emulate_cmp(vm, vcpuid, gpa, vie);
break;
case VIE_OP_TYPE_MOV:
- error = emulate_mov(vm, vcpuid, gpa, vie,
- memread, memwrite, memarg);
+ error = emulate_mov(vm, vcpuid, gpa, vie);
break;
case VIE_OP_TYPE_MOVSX:
case VIE_OP_TYPE_MOVZX:
- error = emulate_movx(vm, vcpuid, gpa, vie,
- memread, memwrite, memarg);
+ error = emulate_movx(vm, vcpuid, gpa, vie);
break;
case VIE_OP_TYPE_MOVS:
- error = emulate_movs(vm, vcpuid, gpa, vie, paging, memread,
- memwrite, memarg);
+ error = emulate_movs(vm, vcpuid, gpa, vie);
break;
case VIE_OP_TYPE_STOS:
- error = emulate_stos(vm, vcpuid, gpa, vie, paging, memread,
- memwrite, memarg);
+ error = emulate_stos(vm, vcpuid, gpa, vie);
break;
case VIE_OP_TYPE_AND:
- error = emulate_and(vm, vcpuid, gpa, vie,
- memread, memwrite, memarg);
+ error = emulate_and(vm, vcpuid, gpa, vie);
break;
case VIE_OP_TYPE_OR:
- error = emulate_or(vm, vcpuid, gpa, vie,
- memread, memwrite, memarg);
+ error = emulate_or(vm, vcpuid, gpa, vie);
break;
case VIE_OP_TYPE_SUB:
- error = emulate_sub(vm, vcpuid, gpa, vie,
- memread, memwrite, memarg);
+ error = emulate_sub(vm, vcpuid, gpa, vie);
break;
case VIE_OP_TYPE_BITTEST:
- error = emulate_bittest(vm, vcpuid, gpa, vie,
- memread, memwrite, memarg);
+ error = emulate_bittest(vm, vcpuid, gpa, vie);
break;
case VIE_OP_TYPE_TWOB_GRP15:
- error = emulate_twob_group15(vm, vcpuid, gpa, vie,
- memread, memwrite, memarg);
+ error = emulate_twob_group15(vm, vcpuid, gpa, vie);
break;
case VIE_OP_TYPE_ADD:
- error = emulate_add(vm, vcpuid, gpa, vie, memread,
- memwrite, memarg);
+ error = emulate_add(vm, vcpuid, gpa, vie);
break;
case VIE_OP_TYPE_TEST:
- error = emulate_test(vm, vcpuid, gpa, vie,
- memread, memwrite, memarg);
+ error = emulate_test(vm, vcpuid, gpa, vie);
break;
case VIE_OP_TYPE_BEXTR:
- error = emulate_bextr(vm, vcpuid, gpa, vie, paging,
- memread, memwrite, memarg);
+ error = emulate_bextr(vm, vcpuid, gpa, vie);
break;
default:
error = EINVAL;
break;
}
+ if (error == ESRCH) {
+ /* Return to userspace with the mmio request */
+ return (-1);
+ }
+
return (error);
}
+static int
+vie_emulate_inout_port(struct vie *vie, struct vm *vm, int vcpuid)
+{
+ uint32_t mask, val;
+ bool in;
+ int err;
+
+ mask = vie_size2mask(vie->inout.bytes);
+ in = (vie->inout.flags & INOUT_IN) != 0;
+
+ if (!in) {
+ val = vie->inout.eax & mask;
+ }
+
+ if (vie->inout_req_state != VR_DONE) {
+ err = vm_inout_access(vm, vcpuid, in, vie->inout.port,
+ vie->inout.bytes, &val);
+ } else {
+ /*
+ * This port access was handled in userspace and the result was
+ * injected in to be handled now.
+ */
+ val = vie->inout_req_val;
+ vie->inout_req_state = VR_NONE;
+ err = 0;
+ }
+
+ if (err == ESRCH) {
+ vie->status |= VIES_PENDING_INOUT;
+ vie->inout_req_state = VR_PENDING;
+ return (err);
+ } else if (err != 0) {
+ return (err);
+ }
+
+ if (in) {
+ val &= mask;
+ val |= (vie->inout.eax & ~mask);
+ err = vm_set_register(vm, vcpuid, VM_REG_GUEST_RAX, val);
+ KASSERT(err == 0, ("emulate_ioport: error %d setting guest "
+ "rax register", err));
+ }
+ return (0);
+}
+
+static enum vm_reg_name
+vie_inout_segname(const struct vie *vie)
+{
+ uint8_t segidx = vie->inout.segment;
+ const enum vm_reg_name segmap[] = {
+ VM_REG_GUEST_ES,
+ VM_REG_GUEST_CS,
+ VM_REG_GUEST_SS,
+ VM_REG_GUEST_DS,
+ VM_REG_GUEST_FS,
+ VM_REG_GUEST_GS,
+ };
+ const uint8_t maxidx = (sizeof (segmap) / sizeof (segmap[0]));
+
+ if (segidx >= maxidx) {
+ panic("unexpected segment index %u", segidx);
+ }
+ return (segmap[segidx]);
+}
+
+static int
+vie_emulate_inout_str(struct vie *vie, struct vm *vm, int vcpuid)
+{
+ uint8_t bytes, addrsize;
+ uint64_t index, count = 0, gla, rflags;
+ int prot, err, fault;
+ bool in, repeat;
+ enum vm_reg_name seg_reg, idx_reg;
+ struct vm_copyinfo copyinfo[2];
+
+ in = (vie->inout.flags & INOUT_IN) != 0;
+ bytes = vie->inout.bytes;
+ addrsize = vie->inout.addrsize;
+ prot = in ? PROT_WRITE : PROT_READ;
+
+ ASSERT(bytes == 1 || bytes == 2 || bytes == 4);
+ ASSERT(addrsize == 2 || addrsize == 4 || addrsize == 8);
+
+ idx_reg = (in) ? VM_REG_GUEST_RDI : VM_REG_GUEST_RSI;
+ seg_reg = vie_inout_segname(vie);
+ err = vm_get_register(vm, vcpuid, idx_reg, &index);
+ ASSERT(err == 0);
+ index = index & vie_size2mask(addrsize);
+
+ repeat = (vie->inout.flags & INOUT_REP) != 0;
+
+ /* Count register */
+ if (repeat) {
+ err = vm_get_register(vm, vcpuid, VM_REG_GUEST_RCX, &count);
+ count &= vie_size2mask(addrsize);
+
+ if (count == 0) {
+ /*
+ * If we were asked to emulate a REP INS/OUTS when the
+ * count register is zero, no further work is required.
+ */
+ return (0);
+ }
+ } else {
+ count = 1;
+ }
+
+ gla = 0;
+ if (vie_get_gla(vie, vm, vcpuid, bytes, addrsize, prot, seg_reg,
+ idx_reg, &gla) != 0) {
+ /* vie_get_gla() already injected the appropriate fault */
+ return (0);
+ }
+
+ /*
+ * The INS/OUTS emulate currently assumes that the memory target resides
+ * within the guest system memory, rather than a device MMIO region. If
+ * such a case becomes a necessity, that additional handling could be
+ * put in place.
+ */
+ err = vm_copy_setup(vm, vcpuid, &vie->paging, gla, bytes, prot,
+ copyinfo, nitems(copyinfo), &fault);
+
+ if (err) {
+ /* Unrecoverable error */
+ return (err);
+ } else if (fault) {
+ /* Resume guest to handle fault */
+ return (0);
+ }
+
+ if (!in) {
+ vm_copyin(vm, vcpuid, copyinfo, &vie->inout.eax, bytes);
+ }
+
+ err = vie_emulate_inout_port(vie, vm, vcpuid);
+
+ if (err == 0 && in) {
+ vm_copyout(vm, vcpuid, &vie->inout.eax, copyinfo, bytes);
+ }
+
+ vm_copy_teardown(vm, vcpuid, copyinfo, nitems(copyinfo));
+
+ if (err == 0) {
+ err = vm_get_register(vm, vcpuid, VM_REG_GUEST_RFLAGS,
+ &rflags);
+ ASSERT(err == 0);
+
+ /* Update index */
+ if (rflags & PSL_D) {
+ index -= bytes;
+ } else {
+ index += bytes;
+ }
+
+ /* Update index register */
+ err = vie_update_register(vm, vcpuid, idx_reg, index, addrsize);
+ ASSERT(err == 0);
+
+ /*
+ * Update count register only if the instruction had a repeat
+ * prefix.
+ */
+ if ((vie->inout.flags & INOUT_REP) != 0) {
+ count--;
+ err = vie_update_register(vm, vcpuid, VM_REG_GUEST_RCX,
+ count, addrsize);
+ ASSERT(err == 0);
+
+ if (count != 0) {
+ return (vie_repeat(vie));
+ }
+ }
+ }
+
+ return (err);
+}
+
int
+vie_emulate_inout(struct vie *vie, struct vm *vm, int vcpuid)
+{
+ int err = 0;
+
+ if ((vie->status & VIES_INOUT) == 0) {
+ return (EINVAL);
+ }
+
+ if ((vie->inout.flags & INOUT_STR) == 0) {
+ /*
+ * For now, using the 'rep' prefixes with plain (non-string)
+ * in/out is not supported.
+ */
+ if ((vie->inout.flags & INOUT_REP) != 0) {
+ return (EINVAL);
+ }
+
+ err = vie_emulate_inout_port(vie, vm, vcpuid);
+
+ if (err == ESRCH) {
+ ASSERT(vie->status & VIES_PENDING_INOUT);
+ /* Return to userspace with the in/out request */
+ err = -1;
+ }
+ } else {
+ vie->status &= ~VIES_REPEAT;
+ err = vie_emulate_inout_str(vie, vm, vcpuid);
+
+ if (err == ESRCH) {
+ ASSERT(vie->status & VIES_PENDING_INOUT);
+ /* Return to userspace with the in/out request */
+ err = -1;
+ }
+ }
+
+ return (err);
+}
+
+void
+vie_reset(struct vie *vie)
+{
+ vie->status = 0;
+ vie->num_processed = vie->num_valid = 0;
+}
+
+void
+vie_advance_pc(struct vie *vie, uint64_t *nextrip)
+{
+ VERIFY((vie->status & VIES_REPEAT) == 0);
+
+ *nextrip += vie->num_processed;
+ vie_reset(vie);
+}
+
+void
+vie_exitinfo(const struct vie *vie, struct vm_exit *vme)
+{
+ if (vie->status & VIES_MMIO) {
+ vme->exitcode = VM_EXITCODE_MMIO;
+ if (vie->mmio_req_read.state == VR_PENDING) {
+ vme->u.mmio.gpa = vie->mmio_req_read.gpa;
+ vme->u.mmio.data = 0;
+ vme->u.mmio.bytes = vie->mmio_req_read.bytes;
+ vme->u.mmio.read = 1;
+ } else if (vie->mmio_req_write.state == VR_PENDING) {
+ vme->u.mmio.gpa = vie->mmio_req_write.gpa;
+ vme->u.mmio.data = vie->mmio_req_write.data &
+ vie_size2mask(vie->mmio_req_write.bytes);
+ vme->u.mmio.bytes = vie->mmio_req_write.bytes;
+ vme->u.mmio.read = 0;
+ } else {
+ panic("bad pending MMIO state");
+ }
+ } else if (vie->status & VIES_INOUT) {
+ vme->exitcode = VM_EXITCODE_INOUT;
+ vme->u.inout.port = vie->inout.port;
+ vme->u.inout.bytes = vie->inout.bytes;
+ if ((vie->inout.flags & INOUT_IN) != 0) {
+ vme->u.inout.flags = INOUT_IN;
+ vme->u.inout.eax = 0;
+ } else {
+ vme->u.inout.flags = 0;
+ vme->u.inout.eax = vie->inout.eax &
+ vie_size2mask(vie->inout.bytes);
+ }
+ } else {
+ panic("no pending operation");
+ }
+}
+
+/*
+ * In the case of a decoding or verification failure, bailing out to userspace
+ * to do the instruction emulation is our only option for now.
+ */
+void
+vie_fallback_exitinfo(const struct vie *vie, struct vm_exit *vme)
+{
+ if ((vie->status & VIES_INST_FETCH) == 0) {
+ bzero(&vme->u.inst_emul, sizeof (vme->u.inst_emul));
+ } else {
+ ASSERT(sizeof (vie->inst) == sizeof (vme->u.inst_emul.inst));
+
+ bcopy(vie->inst, vme->u.inst_emul.inst, sizeof (vie->inst));
+ vme->u.inst_emul.num_valid = vie->num_valid;
+ }
+ vme->exitcode = VM_EXITCODE_INST_EMUL;
+}
+
+bool
+vie_pending(const struct vie *vie)
+{
+ return ((vie->status & (VIES_PENDING_MMIO|VIES_PENDING_INOUT)) != 0);
+}
+
+bool
+vie_needs_fetch(const struct vie *vie)
+{
+ if (vie->status & VIES_INST_FETCH) {
+ ASSERT(vie->num_valid != 0);
+ return (false);
+ }
+ return (true);
+}
+
+static int
vie_alignment_check(int cpl, int size, uint64_t cr0, uint64_t rf, uint64_t gla)
{
KASSERT(size == 1 || size == 2 || size == 4 || size == 8,
@@ -1866,7 +2322,7 @@ vie_alignment_check(int cpl, int size, uint64_t cr0, uint64_t rf, uint64_t gla)
return ((gla & (size - 1)) ? 1 : 0);
}
-int
+static int
vie_canonical_check(enum vm_cpu_mode cpu_mode, uint64_t gla)
{
uint64_t mask;
@@ -1885,7 +2341,7 @@ vie_canonical_check(enum vm_cpu_mode cpu_mode, uint64_t gla)
return ((gla & mask) != 0);
}
-uint64_t
+static uint64_t
vie_size2mask(int size)
{
KASSERT(size == 1 || size == 2 || size == 4 || size == 8,
@@ -1893,7 +2349,7 @@ vie_size2mask(int size)
return (size2mask[size]);
}
-int
+static int
vie_calculate_gla(enum vm_cpu_mode cpu_mode, enum vm_reg_name seg,
struct seg_desc *desc, uint64_t offset, int length, int addrsize,
int prot, uint64_t *gla)
@@ -1905,13 +2361,8 @@ vie_calculate_gla(enum vm_cpu_mode cpu_mode, enum vm_reg_name seg,
("%s: invalid segment %d", __func__, seg));
KASSERT(length == 1 || length == 2 || length == 4 || length == 8,
("%s: invalid operand size %d", __func__, length));
-#ifdef __FreeBSD__
- KASSERT((prot & ~(PROT_READ | PROT_WRITE)) == 0,
- ("%s: invalid prot %#x", __func__, prot));
-#else
KASSERT((prot & ~(PROT_READ | PROT_WRITE)) == 0,
("%s: invalid prot %x", __func__, prot));
-#endif
firstoff = offset;
if (cpu_mode == CPU_MODE_64BIT) {
@@ -1930,31 +2381,21 @@ vie_calculate_gla(enum vm_cpu_mode cpu_mode, enum vm_reg_name seg,
if (SEG_DESC_UNUSABLE(desc->access))
return (-1);
- /*
+ /*
* The processor generates a #NP exception when a segment
* register is loaded with a selector that points to a
* descriptor that is not present. If this was the case then
* it would have been checked before the VM-exit.
*/
-#ifdef __FreeBSD__
- KASSERT(SEG_DESC_PRESENT(desc->access),
- ("segment %d not present: %#x", seg, desc->access));
-#else
KASSERT(SEG_DESC_PRESENT(desc->access),
("segment %d not present: %x", seg, desc->access));
-#endif
/*
* The descriptor type must indicate a code/data segment.
*/
type = SEG_DESC_TYPE(desc->access);
-#ifdef __FreeBSD__
- KASSERT(type >= 16 && type <= 31, ("segment %d has invalid "
- "descriptor type %#x", seg, type));
-#else
KASSERT(type >= 16 && type <= 31, ("segment %d has invalid "
"descriptor type %x", seg, type));
-#endif
if (prot & PROT_READ) {
/* #GP on a read access to a exec-only code segment */
@@ -2019,24 +2460,107 @@ vie_calculate_gla(enum vm_cpu_mode cpu_mode, enum vm_reg_name seg,
}
void
-vie_init(struct vie *vie, const char *inst_bytes, int inst_length)
+vie_init_mmio(struct vie *vie, const char *inst_bytes, uint8_t inst_length,
+ const struct vm_guest_paging *paging, uint64_t gpa)
{
- KASSERT(inst_length >= 0 && inst_length <= VIE_INST_SIZE,
+ KASSERT(inst_length <= VIE_INST_SIZE,
("%s: invalid instruction length (%d)", __func__, inst_length));
- bzero(vie, sizeof(struct vie));
+ bzero(vie, sizeof (struct vie));
vie->base_register = VM_REG_LAST;
vie->index_register = VM_REG_LAST;
vie->segment_register = VM_REG_LAST;
+ vie->status = VIES_INIT | VIES_MMIO;
- if (inst_length) {
+ if (inst_length != 0) {
bcopy(inst_bytes, vie->inst, inst_length);
vie->num_valid = inst_length;
+ vie->status |= VIES_INST_FETCH;
+ }
+
+ vie->paging = *paging;
+ vie->mmio_gpa = gpa;
+}
+
+void
+vie_init_inout(struct vie *vie, const struct vm_inout *inout, uint8_t inst_len,
+ const struct vm_guest_paging *paging)
+{
+ bzero(vie, sizeof (struct vie));
+
+ vie->status = VIES_INIT | VIES_INOUT;
+
+ vie->inout = *inout;
+ vie->paging = *paging;
+
+ /*
+ * Since VMX/SVM assists already decoded the nature of the in/out
+ * instruction, let the status reflect that.
+ */
+ vie->status |= VIES_INST_FETCH | VIES_INST_DECODE;
+ vie->num_processed = inst_len;
+}
+
+int
+vie_fulfill_mmio(struct vie *vie, const struct vm_mmio *result)
+{
+ struct vie_mmio *pending;
+
+ if ((vie->status & VIES_MMIO) == 0 ||
+ (vie->status & VIES_PENDING_MMIO) == 0) {
+ return (EINVAL);
+ }
+
+ if (result->read) {
+ pending = &vie->mmio_req_read;
+ } else {
+ pending = &vie->mmio_req_write;
+ }
+
+ if (pending->state != VR_PENDING ||
+ pending->bytes != result->bytes || pending->gpa != result->gpa) {
+ return (EINVAL);
+ }
+
+ if (result->read) {
+ pending->data = result->data & vie_size2mask(pending->bytes);
+ }
+ pending->state = VR_DONE;
+ vie->status &= ~VIES_PENDING_MMIO;
+
+ return (0);
+}
+
+int
+vie_fulfill_inout(struct vie *vie, const struct vm_inout *result)
+{
+ if ((vie->status & VIES_INOUT) == 0 ||
+ (vie->status & VIES_PENDING_INOUT) == 0) {
+ return (EINVAL);
}
+ if ((vie->inout.flags & INOUT_IN) != (result->flags & INOUT_IN) ||
+ vie->inout.bytes != result->bytes ||
+ vie->inout.port != result->port) {
+ return (EINVAL);
+ }
+
+ if (result->flags & INOUT_IN) {
+ vie->inout_req_val = result->eax &
+ vie_size2mask(vie->inout.bytes);
+ }
+ vie->inout_req_state = VR_DONE;
+ vie->status &= ~(VIES_PENDING_INOUT);
+
+ return (0);
+}
+
+uint64_t
+vie_mmio_gpa(const struct vie *vie)
+{
+ return (vie->mmio_gpa);
}
-#ifdef _KERNEL
static int
pf_error_code(int usermode, int prot, int rsvd, uint64_t pte)
{
@@ -2299,27 +2823,28 @@ vm_gla2gpa_nofault(struct vm *vm, int vcpuid, struct vm_guest_paging *paging,
}
int
-vmm_fetch_instruction(struct vm *vm, int vcpuid, struct vm_guest_paging *paging,
- uint64_t rip, int inst_length, struct vie *vie, int *faultptr)
+vie_fetch_instruction(struct vie *vie, struct vm *vm, int vcpuid, uint64_t rip,
+ int *faultptr)
{
struct vm_copyinfo copyinfo[2];
int error, prot;
- if (inst_length > VIE_INST_SIZE)
- panic("vmm_fetch_instruction: invalid length %d", inst_length);
+ if (vie->status != (VIES_INIT|VIES_MMIO)) {
+ return (EINVAL);
+ }
prot = PROT_READ | PROT_EXEC;
- error = vm_copy_setup(vm, vcpuid, paging, rip, inst_length, prot,
- copyinfo, nitems(copyinfo), faultptr);
+ error = vm_copy_setup(vm, vcpuid, &vie->paging, rip, VIE_INST_SIZE,
+ prot, copyinfo, nitems(copyinfo), faultptr);
if (error || *faultptr)
return (error);
- vm_copyin(vm, vcpuid, copyinfo, vie->inst, inst_length);
+ vm_copyin(vm, vcpuid, copyinfo, vie->inst, VIE_INST_SIZE);
vm_copy_teardown(vm, vcpuid, copyinfo, nitems(copyinfo));
- vie->num_valid = inst_length;
+ vie->num_valid = VIE_INST_SIZE;
+ vie->status |= VIES_INST_FETCH;
return (0);
}
-#endif /* _KERNEL */
static int
vie_peek(struct vie *vie, uint8_t *x)
@@ -2821,23 +3346,28 @@ decode_moffset(struct vie *vie)
return (0);
}
-#ifdef _KERNEL
/*
* Verify that the 'guest linear address' provided as collateral of the nested
* page table fault matches with our instruction decoding.
*/
-static int
-verify_gla(struct vm *vm, int cpuid, uint64_t gla, struct vie *vie,
- enum vm_cpu_mode cpu_mode)
+int
+vie_verify_gla(struct vie *vie, struct vm *vm, int cpuid, uint64_t gla)
{
int error;
uint64_t base, segbase, idx, gla2;
enum vm_reg_name seg;
struct seg_desc desc;
- /* Skip 'gla' verification */
- if (gla == VIE_INVALID_GLA)
+ ASSERT((vie->status & VIES_INST_DECODE) != 0);
+
+ /*
+ * If there was no valid GLA context with the exit, or the decoded
+ * instruction acts on more than one address, verification is done.
+ */
+ if (gla == VIE_INVALID_GLA ||
+ (vie->op.op_flags & VIE_OP_F_NO_GLA_VERIFICATION) != 0) {
return (0);
+ }
base = 0;
if (vie->base_register != VM_REG_LAST) {
@@ -2879,15 +3409,16 @@ verify_gla(struct vm *vm, int cpuid, uint64_t gla, struct vie *vie,
* string destination the DS segment is the default. These
* can be overridden to allow other segments to be accessed.
*/
- if (vie->segment_override)
+ if (vie->segment_override) {
seg = vie->segment_register;
- else if (vie->base_register == VM_REG_GUEST_RSP ||
- vie->base_register == VM_REG_GUEST_RBP)
+ } else if (vie->base_register == VM_REG_GUEST_RSP ||
+ vie->base_register == VM_REG_GUEST_RBP) {
seg = VM_REG_GUEST_SS;
- else
+ } else {
seg = VM_REG_GUEST_DS;
- if (cpu_mode == CPU_MODE_64BIT && seg != VM_REG_GUEST_FS &&
- seg != VM_REG_GUEST_GS) {
+ }
+ if (vie->paging.cpu_mode == CPU_MODE_64BIT &&
+ seg != VM_REG_GUEST_FS && seg != VM_REG_GUEST_GS) {
segbase = 0;
} else {
error = vm_get_seg_desc(vm, cpuid, seg, &desc);
@@ -2913,16 +3444,17 @@ verify_gla(struct vm *vm, int cpuid, uint64_t gla, struct vie *vie,
return (0);
}
-#endif /* _KERNEL */
int
-#ifdef _KERNEL
-vmm_decode_instruction(struct vm *vm, int cpuid, uint64_t gla,
- enum vm_cpu_mode cpu_mode, int cs_d, struct vie *vie)
-#else
-vmm_decode_instruction(enum vm_cpu_mode cpu_mode, int cs_d, struct vie *vie)
-#endif
+vie_decode_instruction(struct vie *vie, struct vm *vm, int cpuid, int cs_d)
{
+ enum vm_cpu_mode cpu_mode;
+
+ if ((vie->status & VIES_INST_FETCH) == 0) {
+ return (EINVAL);
+ }
+
+ cpu_mode = vie->paging.cpu_mode;
if (decode_prefixes(vie, cpu_mode, cs_d))
return (-1);
@@ -2945,14 +3477,7 @@ vmm_decode_instruction(enum vm_cpu_mode cpu_mode, int cs_d, struct vie *vie)
if (decode_moffset(vie))
return (-1);
-#ifdef _KERNEL
- if ((vie->op.op_flags & VIE_OP_F_NO_GLA_VERIFICATION) == 0) {
- if (verify_gla(vm, cpuid, gla, vie, cpu_mode))
- return (-1);
- }
-#endif
-
- vie->decoded = 1; /* success */
+ vie->status |= VIES_INST_DECODE;
return (0);
}
diff --git a/usr/src/uts/i86pc/io/vmm/vmm_ioport.c b/usr/src/uts/i86pc/io/vmm/vmm_ioport.c
index 3d08fd5e85..01fae7d584 100644
--- a/usr/src/uts/i86pc/io/vmm/vmm_ioport.c
+++ b/usr/src/uts/i86pc/io/vmm/vmm_ioport.c
@@ -25,6 +25,18 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ *
+ * Copyright 2020 Oxide Computer Company
+ */
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
@@ -33,18 +45,16 @@ __FBSDID("$FreeBSD$");
#include <sys/systm.h>
#include <machine/vmm.h>
-#include <machine/vmm_instruction_emul.h>
#include "vatpic.h"
#include "vatpit.h"
#include "vpmtmr.h"
#include "vrtc.h"
#include "vmm_ioport.h"
-#include "vmm_ktr.h"
#define MAX_IOPORTS 1280
-ioport_handler_func_t ioport_handler[MAX_IOPORTS] = {
+static ioport_handler_func_t ioport_handler[MAX_IOPORTS] = {
[TIMER_MODE] = vatpit_handler,
[TIMER_CNTR0] = vatpit_handler,
[TIMER_CNTR1] = vatpit_handler,
@@ -61,144 +71,24 @@ ioport_handler_func_t ioport_handler[MAX_IOPORTS] = {
[IO_RTC + 1] = vrtc_data_handler,
};
-#ifdef KTR
-static const char *
-inout_instruction(struct vm_exit *vmexit)
-{
- int index;
-
- static const char *iodesc[] = {
- "outb", "outw", "outl",
- "inb", "inw", "inl",
- "outsb", "outsw", "outsd",
- "insb", "insw", "insd",
- };
-
- switch (vmexit->u.inout.bytes) {
- case 1:
- index = 0;
- break;
- case 2:
- index = 1;
- break;
- default:
- index = 2;
- break;
- }
-
- if (vmexit->u.inout.in)
- index += 3;
-
- if (vmexit->u.inout.string)
- index += 6;
-
- KASSERT(index < nitems(iodesc), ("%s: invalid index %d",
- __func__, index));
-
- return (iodesc[index]);
-}
-#endif /* KTR */
-
-static int
-emulate_inout_port(struct vm *vm, int vcpuid, struct vm_exit *vmexit,
- bool *retu)
+int
+vm_inout_access(struct vm *vm, int vcpuid, bool in, uint16_t port,
+ uint8_t bytes, uint32_t *val)
{
ioport_handler_func_t handler;
- uint32_t mask, val;
int error;
-#ifdef __FreeBSD__
- /*
- * If there is no handler for the I/O port then punt to userspace.
- */
- if (vmexit->u.inout.port >= MAX_IOPORTS ||
- (handler = ioport_handler[vmexit->u.inout.port]) == NULL) {
- *retu = true;
- return (0);
- }
-#else /* __FreeBSD__ */
handler = NULL;
- if (vmexit->u.inout.port < MAX_IOPORTS) {
- handler = ioport_handler[vmexit->u.inout.port];
+ if (port < MAX_IOPORTS) {
+ handler = ioport_handler[port];
}
- /* Look for hooks, if a standard handler is not present */
- if (handler == NULL) {
- mask = vie_size2mask(vmexit->u.inout.bytes);
- if (!vmexit->u.inout.in) {
- val = vmexit->u.inout.eax & mask;
- }
- error = vm_ioport_handle_hook(vm, vcpuid, vmexit->u.inout.in,
- vmexit->u.inout.port, vmexit->u.inout.bytes, &val);
- if (error == 0) {
- goto finish;
- }
- *retu = true;
- return (0);
+ if (handler != NULL) {
+ error = (*handler)(vm, vcpuid, in, port, bytes, val);
+ } else {
+ /* Look for hooks, if a standard handler is not present */
+ error = vm_ioport_handle_hook(vm, vcpuid, in, port, bytes, val);
}
-#endif /* __FreeBSD__ */
-
- mask = vie_size2mask(vmexit->u.inout.bytes);
-
- if (!vmexit->u.inout.in) {
- val = vmexit->u.inout.eax & mask;
- }
-
- error = (*handler)(vm, vcpuid, vmexit->u.inout.in,
- vmexit->u.inout.port, vmexit->u.inout.bytes, &val);
- if (error) {
- /*
- * The value returned by this function is also the return value
- * of vm_run(). This needs to be a positive number otherwise it
- * can be interpreted as a "pseudo-error" like ERESTART.
- *
- * Enforce this by mapping all errors to EIO.
- */
- return (EIO);
- }
-
-#ifndef __FreeBSD__
-finish:
-#endif /* __FreeBSD__ */
- if (vmexit->u.inout.in) {
- vmexit->u.inout.eax &= ~mask;
- vmexit->u.inout.eax |= val & mask;
- error = vm_set_register(vm, vcpuid, VM_REG_GUEST_RAX,
- vmexit->u.inout.eax);
- KASSERT(error == 0, ("emulate_ioport: error %d setting guest "
- "rax register", error));
- }
- *retu = false;
- return (0);
-}
-
-static int
-emulate_inout_str(struct vm *vm, int vcpuid, struct vm_exit *vmexit, bool *retu)
-{
- *retu = true;
- return (0); /* Return to userspace to finish emulation */
-}
-
-int
-vm_handle_inout(struct vm *vm, int vcpuid, struct vm_exit *vmexit, bool *retu)
-{
- int bytes, error;
-
- bytes = vmexit->u.inout.bytes;
- KASSERT(bytes == 1 || bytes == 2 || bytes == 4,
- ("vm_handle_inout: invalid operand size %d", bytes));
-
- if (vmexit->u.inout.string)
- error = emulate_inout_str(vm, vcpuid, vmexit, retu);
- else
- error = emulate_inout_port(vm, vcpuid, vmexit, retu);
-
- VCPU_CTR4(vm, vcpuid, "%s%s 0x%04x: %s",
- vmexit->u.inout.rep ? "rep " : "",
- inout_instruction(vmexit),
- vmexit->u.inout.port,
- error ? "error" : (*retu ? "userspace" : "handled"));
-
return (error);
}
diff --git a/usr/src/uts/i86pc/io/vmm/vmm_ioport.h b/usr/src/uts/i86pc/io/vmm/vmm_ioport.h
index 14e315f400..7c51906e85 100644
--- a/usr/src/uts/i86pc/io/vmm/vmm_ioport.h
+++ b/usr/src/uts/i86pc/io/vmm/vmm_ioport.h
@@ -32,8 +32,9 @@
#define _VMM_IOPORT_H_
typedef int (*ioport_handler_func_t)(struct vm *vm, int vcpuid,
- bool in, int port, int bytes, uint32_t *val);
+ bool in, uint16_t port, uint8_t bytes, uint32_t *val);
-int vm_handle_inout(struct vm *vm, int vcpuid, struct vm_exit *vme, bool *retu);
+int vm_inout_access(struct vm *vm, int vcpuid, bool in, uint16_t port,
+ uint8_t bytes, uint32_t *val);
#endif /* _VMM_IOPORT_H_ */
diff --git a/usr/src/uts/i86pc/io/vmm/vmm_mem.c b/usr/src/uts/i86pc/io/vmm/vmm_mem.c
index a736d94bba..cd894dc84d 100644
--- a/usr/src/uts/i86pc/io/vmm/vmm_mem.c
+++ b/usr/src/uts/i86pc/io/vmm/vmm_mem.c
@@ -100,7 +100,7 @@ vmm_mmio_alloc(struct vmspace *vmspace, vm_paddr_t gpa, size_t len,
* has incremented the reference count on the sglist. Dropping the
* initial reference count ensures that the sglist will be freed
* when the object is deallocated.
- *
+ *
* If the object could not be allocated then we end up freeing the
* sglist.
*/
diff --git a/usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c b/usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c
index 2f84ac5e95..f05600d6c3 100644
--- a/usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c
+++ b/usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c
@@ -500,25 +500,27 @@ vmmdev_do_ioctl(vmm_softc_t *sc, int cmd, intptr_t arg, int md,
/* Execute the primary logic for the ioctl. */
switch (cmd) {
case VM_RUN: {
- struct vm_run vmrun;
+ struct vm_entry entry;
- if (ddi_copyin(datap, &vmrun, sizeof (vmrun), md)) {
+ if (ddi_copyin(datap, &entry, sizeof (entry), md)) {
error = EFAULT;
break;
}
- vmrun.cpuid = vcpu;
if (!(curthread->t_schedflag & TS_VCPU))
smt_mark_as_vcpu();
- error = vm_run(sc->vmm_vm, &vmrun);
- /*
- * XXXJOY: I think it's necessary to do copyout, even in the
- * face of errors, since the exit state is communicated out.
- */
- if (ddi_copyout(&vmrun, datap, sizeof (vmrun), md)) {
- error = EFAULT;
- break;
+ error = vm_run(sc->vmm_vm, vcpu, &entry);
+
+ if (error == 0) {
+ const struct vm_exit *vme;
+ void *outp = entry.exit_data;
+
+ vme = vm_exitinfo(sc->vmm_vm, vcpu);
+ if (ddi_copyout(vme, outp, sizeof (*vme), md)) {
+ error = EFAULT;
+ break;
+ }
}
break;
}
@@ -982,9 +984,6 @@ vmmdev_do_ioctl(vmm_softc_t *sc, int cmd, intptr_t arg, int md,
case VM_GET_KERNEMU_DEV: {
struct vm_readwrite_kernemu_device kemu;
size_t size = 0;
- mem_region_write_t mwrite = NULL;
- mem_region_read_t mread = NULL;
- uint64_t ignored = 0;
if (ddi_copyin(datap, &kemu, sizeof (kemu), md)) {
error = EFAULT;
@@ -998,31 +997,12 @@ vmmdev_do_ioctl(vmm_softc_t *sc, int cmd, intptr_t arg, int md,
size = (1 << kemu.access_width);
ASSERT(size >= 1 && size <= 8);
- if (kemu.gpa >= DEFAULT_APIC_BASE &&
- kemu.gpa < DEFAULT_APIC_BASE + PAGE_SIZE) {
- mread = lapic_mmio_read;
- mwrite = lapic_mmio_write;
- } else if (kemu.gpa >= VIOAPIC_BASE &&
- kemu.gpa < VIOAPIC_BASE + VIOAPIC_SIZE) {
- mread = vioapic_mmio_read;
- mwrite = vioapic_mmio_write;
- } else if (kemu.gpa >= VHPET_BASE &&
- kemu.gpa < VHPET_BASE + VHPET_SIZE) {
- mread = vhpet_mmio_read;
- mwrite = vhpet_mmio_write;
- } else {
- error = EINVAL;
- break;
- }
-
if (cmd == VM_SET_KERNEMU_DEV) {
- VERIFY(mwrite != NULL);
- error = mwrite(sc->vmm_vm, vcpu, kemu.gpa, kemu.value,
- size, &ignored);
+ error = vm_service_mmio_write(sc->vmm_vm, vcpu,
+ kemu.gpa, kemu.value, size);
} else {
- VERIFY(mread != NULL);
- error = mread(sc->vmm_vm, vcpu, kemu.gpa, &kemu.value,
- size, &ignored);
+ error = vm_service_mmio_read(sc->vmm_vm, vcpu,
+ kemu.gpa, &kemu.value, size);
}
if (error == 0) {
@@ -2004,6 +1984,11 @@ vmm_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp,
vmm_softc_t *sc;
minor_t minor;
+ /* The structs in bhyve ioctls assume a 64-bit datamodel */
+ if (ddi_model_convert_from(mode & FMODELS) != DDI_MODEL_NONE) {
+ return (ENOTSUP);
+ }
+
minor = getminor(dev);
if (minor == VMM_CTL_MINOR) {
diff --git a/usr/src/uts/i86pc/io/vmm/vmm_sol_glue.c b/usr/src/uts/i86pc/io/vmm/vmm_sol_glue.c
index 2401774ab7..4dcaba8a82 100644
--- a/usr/src/uts/i86pc/io/vmm/vmm_sol_glue.c
+++ b/usr/src/uts/i86pc/io/vmm/vmm_sol_glue.c
@@ -572,7 +572,7 @@ vmm_sol_glue_cleanup(void)
*/
#define FEBRUARY 2
-#define days_in_year(y) (leapyear(y) ? 366 : 365)
+#define days_in_year(y) (leapyear(y) ? 366 : 365)
#define days_in_month(y, m) \
(month_days[(m) - 1] + (m == FEBRUARY ? leapyear(y) : 0))
/* Day of week. Days are counted from 1/1/1970, which was a Thursday */
@@ -644,7 +644,7 @@ clock_ct_to_ts(struct clocktime *ct, struct timespec *ts)
/* Months */
for (i = 1; i < ct->mon; i++)
- days += days_in_month(year, i);
+ days += days_in_month(year, i);
days += (ct->day - 1);
ts->tv_sec = (((time_t)days * 24 + ct->hour) * 60 + ct->min) * 60 +
diff --git a/usr/src/uts/i86pc/io/vmm/vmm_stat.c b/usr/src/uts/i86pc/io/vmm/vmm_stat.c
index a6af75e40a..42d6f8cfa3 100644
--- a/usr/src/uts/i86pc/io/vmm/vmm_stat.c
+++ b/usr/src/uts/i86pc/io/vmm/vmm_stat.c
@@ -163,7 +163,7 @@ VMM_STAT(VMEXIT_NMI_WINDOW, "vm exits due to nmi window opening");
VMM_STAT(VMEXIT_INOUT, "number of times in/out was intercepted");
VMM_STAT(VMEXIT_CPUID, "number of times cpuid was intercepted");
VMM_STAT(VMEXIT_NESTED_FAULT, "vm exits due to nested page fault");
-VMM_STAT(VMEXIT_INST_EMUL, "vm exits for instruction emulation");
+VMM_STAT(VMEXIT_MMIO_EMUL, "vm exits for mmio emulation");
VMM_STAT(VMEXIT_UNKNOWN, "number of vm exits for unknown reason");
VMM_STAT(VMEXIT_ASTPENDING, "number of times astpending at exit");
VMM_STAT(VMEXIT_REQIDLE, "number of times idle requested at exit");
diff --git a/usr/src/uts/i86pc/io/vmm/vmm_stat.h b/usr/src/uts/i86pc/io/vmm/vmm_stat.h
index 3232e23888..a214ba0fe9 100644
--- a/usr/src/uts/i86pc/io/vmm/vmm_stat.h
+++ b/usr/src/uts/i86pc/io/vmm/vmm_stat.h
@@ -73,7 +73,7 @@ void vmm_stat_register(void *arg);
}; \
SYSINIT(type##_stat, SI_SUB_KLD, SI_ORDER_ANY, vmm_stat_register, type)
-#define VMM_STAT_DEFINE(type, nelems, desc, scope) \
+#define VMM_STAT_DEFINE(type, nelems, desc, scope) \
VMM_STAT_FDEFINE(type, nelems, desc, NULL, scope)
#define VMM_STAT_DECLARE(type) \
@@ -94,7 +94,7 @@ void vmm_stat_register(void *arg);
void *vmm_stat_alloc(void);
void vmm_stat_init(void *vp);
-void vmm_stat_free(void *vp);
+void vmm_stat_free(void *vp);
/*
* 'buf' should be at least fit 'MAX_VMM_STAT_TYPES' entries
@@ -108,7 +108,7 @@ vmm_stat_array_incr(struct vm *vm, int vcpu, struct vmm_stat_type *vst,
{
#ifdef VMM_KEEP_STATS
uint64_t *stats;
-
+
stats = vcpu_stats(vm, vcpu);
if (vst->index >= 0 && statidx < vst->nelems)
@@ -122,7 +122,7 @@ vmm_stat_array_set(struct vm *vm, int vcpu, struct vmm_stat_type *vst,
{
#ifdef VMM_KEEP_STATS
uint64_t *stats;
-
+
stats = vcpu_stats(vm, vcpu);
if (vst->index >= 0 && statidx < vst->nelems)
@@ -162,7 +162,7 @@ VMM_STAT_DECLARE(VMEXIT_NMI_WINDOW);
VMM_STAT_DECLARE(VMEXIT_INOUT);
VMM_STAT_DECLARE(VMEXIT_CPUID);
VMM_STAT_DECLARE(VMEXIT_NESTED_FAULT);
-VMM_STAT_DECLARE(VMEXIT_INST_EMUL);
+VMM_STAT_DECLARE(VMEXIT_MMIO_EMUL);
VMM_STAT_DECLARE(VMEXIT_UNKNOWN);
VMM_STAT_DECLARE(VMEXIT_ASTPENDING);
VMM_STAT_DECLARE(VMEXIT_USERSPACE);
diff --git a/usr/src/uts/i86pc/sys/vmm.h b/usr/src/uts/i86pc/sys/vmm.h
index 45838e343e..d6d24f0c37 100644
--- a/usr/src/uts/i86pc/sys/vmm.h
+++ b/usr/src/uts/i86pc/sys/vmm.h
@@ -122,31 +122,13 @@ enum x2apic_state {
#define VM_INTINFO_HWEXCEPTION (3 << 8)
#define VM_INTINFO_SWINTR (4 << 8)
-#ifndef __FreeBSD__
/*
* illumos doesn't have a limitation based on SPECNAMELEN like FreeBSD does.
* Instead of picking an arbitrary value we will just rely on the same
* calculation that's made below. If this calculation ever changes we need to
* update the the VM_MAX_NAMELEN mapping in the bhyve brand's boot.c file.
*/
-#else
-/*
- * The VM name has to fit into the pathname length constraints of devfs,
- * governed primarily by SPECNAMELEN. The length is the total number of
- * characters in the full path, relative to the mount point and not
- * including any leading '/' characters.
- * A prefix and a suffix are added to the name specified by the user.
- * The prefix is usually "vmm/" or "vmm.io/", but can be a few characters
- * longer for future use.
- * The suffix is a string that identifies a bootrom image or some similar
- * image that is attached to the VM. A separator character gets added to
- * the suffix automatically when generating the full path, so it must be
- * accounted for, reducing the effective length by 1.
- * The effective length of a VM name is 229 bytes for FreeBSD 13 and 37
- * bytes for FreeBSD 12. A minimum length is set for safety and supports
- * a SPECNAMELEN as small as 32 on old systems.
- */
-#endif
+
#define VM_MAX_PREFIXLEN 10
#define VM_MAX_SUFFIXLEN 15
#define VM_MIN_NAMELEN 6
@@ -224,76 +206,6 @@ struct vm_guest_paging {
enum vm_paging_mode paging_mode;
};
-/*
- * The data structures 'vie' and 'vie_op' are meant to be opaque to the
- * consumers of instruction decoding. The only reason why their contents
- * need to be exposed is because they are part of the 'vm_exit' structure.
- */
-struct vie_op {
- uint8_t op_byte; /* actual opcode byte */
- uint8_t op_type; /* type of operation (e.g. MOV) */
- uint16_t op_flags;
-};
-_Static_assert(sizeof(struct vie_op) == 4, "ABI");
-_Static_assert(_Alignof(struct vie_op) == 2, "ABI");
-
-#define VIE_INST_SIZE 15
-struct vie {
- uint8_t inst[VIE_INST_SIZE]; /* instruction bytes */
- uint8_t num_valid; /* size of the instruction */
- uint8_t num_processed;
-
- uint8_t addrsize:4, opsize:4; /* address and operand sizes */
- uint8_t rex_w:1, /* REX prefix */
- rex_r:1,
- rex_x:1,
- rex_b:1,
- rex_present:1,
- repz_present:1, /* REP/REPE/REPZ prefix */
- repnz_present:1, /* REPNE/REPNZ prefix */
- opsize_override:1, /* Operand size override */
- addrsize_override:1, /* Address size override */
- segment_override:1; /* Segment override */
-
- uint8_t mod:2, /* ModRM byte */
- reg:4,
- rm:4;
-
- uint8_t ss:2, /* SIB byte */
- vex_present:1, /* VEX prefixed */
- vex_l:1, /* L bit */
- index:4, /* SIB byte */
- base:4; /* SIB byte */
-
- uint8_t disp_bytes;
- uint8_t imm_bytes;
-
- uint8_t scale;
-
- uint8_t vex_reg:4, /* vvvv: first source register specifier */
- vex_pp:2, /* pp */
- _sparebits:2;
-
- uint8_t _sparebytes[2];
-
- int base_register; /* VM_REG_GUEST_xyz */
- int index_register; /* VM_REG_GUEST_xyz */
- int segment_register; /* VM_REG_GUEST_xyz */
-
- int64_t displacement; /* optional addr displacement */
- int64_t immediate; /* optional immediate operand */
-
- uint8_t decoded; /* set to 1 if successfully decoded */
-
- uint8_t _sparebyte;
-
- struct vie_op op; /* opcode description */
-};
-_Static_assert(sizeof(struct vie) == 64, "ABI");
-_Static_assert(__offsetof(struct vie, disp_bytes) == 22, "ABI");
-_Static_assert(__offsetof(struct vie, scale) == 24, "ABI");
-_Static_assert(__offsetof(struct vie, base_register) == 28, "ABI");
-
enum vm_exitcode {
VM_EXITCODE_INOUT,
VM_EXITCODE_VMX,
@@ -306,11 +218,11 @@ enum vm_exitcode {
VM_EXITCODE_PAGING,
VM_EXITCODE_INST_EMUL,
VM_EXITCODE_SPINUP_AP,
- VM_EXITCODE_DEPRECATED1, /* used to be SPINDOWN_CPU */
+ VM_EXITCODE_MMIO_EMUL,
VM_EXITCODE_RUNBLOCK,
VM_EXITCODE_IOAPIC_EOI,
VM_EXITCODE_SUSPENDED,
- VM_EXITCODE_INOUT_STR,
+ VM_EXITCODE_MMIO,
VM_EXITCODE_TASK_SWITCH,
VM_EXITCODE_MONITOR,
VM_EXITCODE_MWAIT,
@@ -325,25 +237,38 @@ enum vm_exitcode {
VM_EXITCODE_MAX
};
+enum inout_flags {
+ INOUT_IN = (1U << 0), /* direction: 'in' when set, else 'out' */
+
+ /*
+ * The following flags are used only for in-kernel emulation logic and
+ * are not exposed to userspace.
+ */
+ INOUT_STR = (1U << 1), /* ins/outs operation */
+ INOUT_REP = (1U << 2), /* 'rep' prefix present on instruction */
+};
+
struct vm_inout {
- uint16_t bytes:3; /* 1 or 2 or 4 */
- uint16_t in:1;
- uint16_t string:1;
- uint16_t rep:1;
+ uint32_t eax;
uint16_t port;
- uint32_t eax; /* valid for out */
+ uint8_t bytes; /* 1 or 2 or 4 */
+ uint8_t flags; /* see: inout_flags */
+
+ /*
+ * The address size and segment are relevant to INS/OUTS operations.
+ * Userspace is not concerned with them since the in-kernel emulation
+ * handles those specific aspects.
+ */
+ uint8_t addrsize;
+ uint8_t segment;
};
-struct vm_inout_str {
- struct vm_inout inout; /* must be the first element */
- struct vm_guest_paging paging;
- uint64_t rflags;
- uint64_t cr0;
- uint64_t index;
- uint64_t count; /* rep=1 (%rcx), rep=0 (1) */
- int addrsize;
- enum vm_reg_name seg_name;
- struct seg_desc seg_desc;
+struct vm_mmio {
+ uint8_t bytes; /* 1/2/4/8 bytes */
+ uint8_t read; /* read: 1, write: 0 */
+ uint16_t _pad[3];
+ uint64_t gpa;
+ uint64_t data;
};
enum task_switch_reason {
@@ -368,18 +293,25 @@ struct vm_exit {
uint64_t rip;
union {
struct vm_inout inout;
- struct vm_inout_str inout_str;
+ struct vm_mmio mmio;
struct {
uint64_t gpa;
int fault_type;
} paging;
+ /*
+ * Kernel-internal MMIO decoding and emulation.
+ * Userspace should not expect to see this, but rather a
+ * VM_EXITCODE_MMIO with the above 'mmio' context.
+ */
struct {
uint64_t gpa;
uint64_t gla;
uint64_t cs_base;
int cs_d; /* CS.D */
- struct vm_guest_paging paging;
- struct vie vie;
+ } mmio_emul;
+ struct {
+ uint8_t inst[15];
+ uint8_t num_valid;
} inst_emul;
/*
* VMX specific payload. Used when there is no "better"
@@ -433,6 +365,23 @@ struct vm_exit {
} u;
};
+enum vm_entry_cmds {
+ VEC_DEFAULT = 0,
+ VEC_DISCARD_INSTR, /* discard inst emul state */
+ VEC_COMPLETE_MMIO, /* entry includes result for mmio emul */
+ VEC_COMPLETE_INOUT, /* entry includes result for inout emul */
+};
+
+struct vm_entry {
+ int cpuid;
+ uint_t cmd; /* see: vm_entry_cmds */
+ void *exit_data;
+ union {
+ struct vm_inout inout;
+ struct vm_mmio mmio;
+ } u;
+};
+
void vm_inject_pf(void *vm, int vcpuid, int error_code, uint64_t cr2);
int vm_restart_instruction(void *vm, int vcpuid);
diff --git a/usr/src/uts/i86pc/sys/vmm_dev.h b/usr/src/uts/i86pc/sys/vmm_dev.h
index 40e0857945..090e82ed29 100644
--- a/usr/src/uts/i86pc/sys/vmm_dev.h
+++ b/usr/src/uts/i86pc/sys/vmm_dev.h
@@ -83,11 +83,6 @@ struct vm_register_set {
uint64_t *regvals;
};
-struct vm_run {
- int cpuid;
- struct vm_exit vm_exit;
-};
-
struct vm_exception {
int cpuid;
int vector;
@@ -204,7 +199,7 @@ struct vm_suspend {
struct vm_gla2gpa {
int vcpuid; /* inputs */
- int prot; /* PROT_READ or PROT_WRITE */
+ int prot; /* PROT_READ or PROT_WRITE */
uint64_t gla;
struct vm_guest_paging paging;
int fault; /* outputs */
@@ -312,8 +307,8 @@ _Static_assert(sizeof(struct vm_readwrite_kernemu_device) == 24, "ABI");
#define VM_GET_MEMSEG (VMM_IOC_BASE | 0x02)
#define VM_MMAP_GETNEXT (VMM_IOC_BASE | 0x03)
-#define VM_LAPIC_IRQ (VMM_IOC_BASE | 0x04)
-#define VM_LAPIC_LOCAL_IRQ (VMM_IOC_BASE | 0x05)
+#define VM_LAPIC_IRQ (VMM_IOC_BASE | 0x04)
+#define VM_LAPIC_LOCAL_IRQ (VMM_IOC_BASE | 0x05)
#define VM_LAPIC_MSI (VMM_IOC_BASE | 0x06)
#define VM_IOAPIC_ASSERT_IRQ (VMM_IOC_BASE | 0x07)