diff options
author | Jerry Jelinek <jerry.jelinek@joyent.com> | 2020-08-24 22:26:14 +0000 |
---|---|---|
committer | Jerry Jelinek <jerry.jelinek@joyent.com> | 2020-08-24 22:26:14 +0000 |
commit | 360b48fad3374e5a37647e51de850a2b5cc8122a (patch) | |
tree | 97b8ae5c55b535b71a341ea9cbebad633c229453 | |
parent | b691ee30a82b4152e726474e6ee079bea99c48c0 (diff) | |
parent | 0a9a25a293d437b1563e1d8479fef8f3795ba817 (diff) | |
download | illumos-joyent-360b48fad3374e5a37647e51de850a2b5cc8122a.tar.gz |
[illumos-gate merge]
commit 0a9a25a293d437b1563e1d8479fef8f3795ba817
13021 Invalid state if bindtextdomain() fails during re-binding
commit 5fae793b9b02afd1f3f434f3a915a64c08edc7b2
13062 loader: make env_discard() public
commit b89fb824168e36795c325b5e0a4d1e71dd2d8d65
13068 gptzfsboot: command from /boot/config should be nul terminated
commit 8548ec78d85644539a23c4262ed2b9512a47865e
13065 log when suspending a zpool
commit 84971882a96ac0fecd538b02208054a872ff8af3
12996 bhyve kernel should be wscheck clean
commit e0c0d44e917080841514d0dd031a696c74e8c435
12989 improve interface boundary for bhyve MMIO
12990 improve interface boundary for bhyve ins/outs
12991 bhyve vlapic should SIPI more carefully
commit 76f19f5fdc974fe5be5c82a556e43a4df93f1de1
12983 Want additional prototypes for manual pages
commit 341c5f490806c8b3e6e31512923db4c0e1b464b1
13041 i40e_get_available_resources() broken again for X722 part
commit dcbbe9e03d9d35c714d909a1f4767ce83c885e80
13049 cstyle should tolerate freebsd headers
74 files changed, 2786 insertions, 1479 deletions
diff --git a/exception_lists/copyright b/exception_lists/copyright index 05d12b3fbd..56b86378c8 100644 --- a/exception_lists/copyright +++ b/exception_lists/copyright @@ -579,7 +579,6 @@ usr/src/uts/i86pc/io/vmm/vmx_assym.s usr/src/uts/i86pc/io/vmm/x86.[ch] usr/src/uts/i86pc/sys/vmm.h usr/src/uts/i86pc/sys/vmm_dev.h -usr/src/uts/i86pc/sys/vmm_instruction_emul.h usr/src/cmd/bhyve/README.license usr/src/cmd/bhyvectl/README.license usr/src/lib/libvmmapi/README.license diff --git a/exception_lists/cstyle b/exception_lists/cstyle index c03401a64f..9dc6d45d49 100644 --- a/exception_lists/cstyle +++ b/exception_lists/cstyle @@ -1434,6 +1434,7 @@ usr/src/uts/i86pc/io/vmm/amd/*.[ch] usr/src/uts/i86pc/io/vmm/intel/*.[chs] usr/src/uts/i86pc/io/vmm/io/*.[ch] usr/src/uts/i86pc/io/vmm/sys/vmm_kernel.h +usr/src/uts/i86pc/io/vmm/sys/vmm_instruction_emul.h usr/src/uts/i86pc/io/vmm/vmm.c usr/src/uts/i86pc/io/vmm/vmm_host.[ch] usr/src/uts/i86pc/io/vmm/vmm_instruction_emul.c @@ -1449,4 +1450,3 @@ usr/src/uts/i86pc/io/vmm/vmx_assym.s usr/src/uts/i86pc/io/vmm/x86.[ch] usr/src/uts/i86pc/sys/vmm.h usr/src/uts/i86pc/sys/vmm_dev.h -usr/src/uts/i86pc/sys/vmm_instruction_emul.h diff --git a/exception_lists/hdrchk b/exception_lists/hdrchk index 58a8a9d209..9440c114b5 100644 --- a/exception_lists/hdrchk +++ b/exception_lists/hdrchk @@ -437,4 +437,3 @@ usr/src/uts/i86pc/io/vmm/vmm_util.h usr/src/uts/i86pc/io/vmm/x86.h usr/src/uts/i86pc/sys/vmm.h usr/src/uts/i86pc/sys/vmm_dev.h -usr/src/uts/i86pc/sys/vmm_instruction_emul.h diff --git a/exception_lists/wscheck b/exception_lists/wscheck index cfba871041..a06ff9f3e1 100644 --- a/exception_lists/wscheck +++ b/exception_lists/wscheck @@ -174,27 +174,6 @@ usr/src/cmd/bhyve/usb_mouse.c usr/src/cmd/bhyve/vga.[ch] usr/src/cmd/bhyve/virtio.[ch] usr/src/cmd/bhyve/xmsr.[ch] -usr/src/cmd/bhyveconsole/bhyveconsole.c usr/src/cmd/bhyvectl/bhyvectl.c -usr/src/compat/bhyve/* usr/src/contrib/bhyve/* usr/src/lib/libvmmapi/common/vmmapi.[ch] -usr/src/uts/i86pc/io/vmm/amd/*.[ch] -usr/src/uts/i86pc/io/vmm/intel/*.[chs] -usr/src/uts/i86pc/io/vmm/io/*.[ch] -usr/src/uts/i86pc/io/vmm/vmm.c -usr/src/uts/i86pc/io/vmm/vmm_host.[ch] -usr/src/uts/i86pc/io/vmm/vmm_instruction_emul.c -usr/src/uts/i86pc/io/vmm/vmm_ioport.[ch] -usr/src/uts/i86pc/io/vmm/vmm_ipi.h -usr/src/uts/i86pc/io/vmm/vmm_ktr.h -usr/src/uts/i86pc/io/vmm/vmm_lapic.[ch] -usr/src/uts/i86pc/io/vmm/vmm_mem.[ch] -usr/src/uts/i86pc/io/vmm/vmm_sol_glue.c -usr/src/uts/i86pc/io/vmm/vmm_stat.[ch] -usr/src/uts/i86pc/io/vmm/vmm_util.[ch] -usr/src/uts/i86pc/io/vmm/vmx_assym.s -usr/src/uts/i86pc/io/vmm/x86.[ch] -usr/src/uts/i86pc/sys/vmm.h -usr/src/uts/i86pc/sys/vmm_dev.h -usr/src/uts/i86pc/sys/vmm_instruction_emul.h diff --git a/usr/src/boot/Makefile.version b/usr/src/boot/Makefile.version index 5d93ec0e76..d1fd2200d6 100644 --- a/usr/src/boot/Makefile.version +++ b/usr/src/boot/Makefile.version @@ -33,4 +33,4 @@ LOADER_VERSION = 1.1 # Use date like formatting here, YYYY.MM.DD.XX, without leading zeroes. # The version is processed from left to right, the version number can only # be increased. -BOOT_VERSION = $(LOADER_VERSION)-2020.08.05.1 +BOOT_VERSION = $(LOADER_VERSION)-2020.08.22.1 diff --git a/usr/src/boot/lib/libstand/environment.c b/usr/src/boot/lib/libstand/environment.c index 291e330044..d3130d292e 100644 --- a/usr/src/boot/lib/libstand/environment.c +++ b/usr/src/boot/lib/libstand/environment.c @@ -1,4 +1,4 @@ -/* +/* * Copyright (c) 1998 Michael Smith. * All rights reserved. * @@ -25,7 +25,6 @@ */ #include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); /* * Manage an environment-like space in which string variables may be stored. @@ -37,9 +36,7 @@ __FBSDID("$FreeBSD$"); #include <string.h> -static void env_discard(struct env_var *ev); - -struct env_var *environ = NULL; +struct env_var *environ = NULL; /* * Look up (name) and return it's env_var structure. @@ -47,12 +44,12 @@ struct env_var *environ = NULL; struct env_var * env_getenv(const char *name) { - struct env_var *ev; - - for (ev = environ; ev != NULL; ev = ev->ev_next) - if (!strcmp(ev->ev_name, name)) - break; - return(ev); + struct env_var *ev; + + for (ev = environ; ev != NULL; ev = ev->ev_next) + if (strcmp(ev->ev_name, name) == 0) + break; + return (ev); } /* @@ -65,159 +62,157 @@ env_getenv(const char *name) */ int env_setenv(const char *name, int flags, const void *value, - ev_sethook_t sethook, ev_unsethook_t unsethook) + ev_sethook_t sethook, ev_unsethook_t unsethook) { - struct env_var *ev, *curr, *last; - - if ((ev = env_getenv(name)) != NULL) { - /* - * If there's a set hook, let it do the work (unless we are working - * for one already. - */ - if ((ev->ev_sethook != NULL) && !(flags & EV_NOHOOK)) - return (ev->ev_sethook(ev, flags, value)); - - /* If there is data in the variable, discard it. */ - if (ev->ev_value != NULL && (ev->ev_flags & EV_DYNAMIC) != 0) - free(ev->ev_value); - ev->ev_value = NULL; - ev->ev_flags &= ~EV_DYNAMIC; - - } else { - - /* - * New variable; create and sort into list - */ - ev = malloc(sizeof(struct env_var)); - ev->ev_name = strdup(name); - ev->ev_value = NULL; - ev->ev_flags = 0; - /* hooks can only be set when the variable is instantiated */ - ev->ev_sethook = sethook; - ev->ev_unsethook = unsethook; - - /* Sort into list */ - ev->ev_prev = NULL; - ev->ev_next = NULL; - /* Search for the record to insert before */ - for (last = NULL, curr = environ; - curr != NULL; - last = curr, curr = curr->ev_next) { - - if (strcmp(ev->ev_name, curr->ev_name) < 0) { - if (curr->ev_prev) { - curr->ev_prev->ev_next = ev; - } else { - environ = ev; + struct env_var *ev, *curr, *last; + + if ((ev = env_getenv(name)) != NULL) { + /* + * If there's a set hook, let it do the work + * (unless we are working for one already). + */ + if ((ev->ev_sethook != NULL) && !(flags & EV_NOHOOK)) + return (ev->ev_sethook(ev, flags, value)); + + /* If there is data in the variable, discard it. */ + if (ev->ev_value != NULL && (ev->ev_flags & EV_DYNAMIC) != 0) + free(ev->ev_value); + ev->ev_value = NULL; + ev->ev_flags &= ~EV_DYNAMIC; + + } else { + /* + * New variable; create and sort into list + */ + ev = malloc(sizeof (struct env_var)); + ev->ev_name = strdup(name); + ev->ev_value = NULL; + ev->ev_flags = 0; + /* hooks can only be set when the variable is instantiated */ + ev->ev_sethook = sethook; + ev->ev_unsethook = unsethook; + + /* Sort into list */ + ev->ev_prev = NULL; + ev->ev_next = NULL; + /* Search for the record to insert before */ + for (last = NULL, curr = environ; curr != NULL; + last = curr, curr = curr->ev_next) { + + if (strcmp(ev->ev_name, curr->ev_name) < 0) { + if (curr->ev_prev) { + curr->ev_prev->ev_next = ev; + } else { + environ = ev; + } + ev->ev_next = curr; + ev->ev_prev = curr->ev_prev; + curr->ev_prev = ev; + break; + } + } + if (curr == NULL) { + if (last == NULL) { + environ = ev; + } else { + last->ev_next = ev; + ev->ev_prev = last; + } } - ev->ev_next = curr; - ev->ev_prev = curr->ev_prev; - curr->ev_prev = ev; - break; - } } - if (curr == NULL) { - if (last == NULL) { - environ = ev; - } else { - last->ev_next = ev; - ev->ev_prev = last; - } + + /* If we have a new value, use it */ + if (flags & EV_VOLATILE) { + ev->ev_value = strdup(value); + ev->ev_flags |= EV_DYNAMIC; + } else { + ev->ev_value = (char *)value; + ev->ev_flags |= flags & EV_DYNAMIC; } - } - - /* If we have a new value, use it */ - if (flags & EV_VOLATILE) { - ev->ev_value = strdup(value); - ev->ev_flags |= EV_DYNAMIC; - } else { - ev->ev_value = (char *)value; - ev->ev_flags |= flags & EV_DYNAMIC; - } - - return(0); + + return (0); } char * getenv(const char *name) { - struct env_var *ev; - - /* Set but no value gives empty string */ - if ((ev = env_getenv(name)) != NULL) { - if (ev->ev_value != NULL) - return(ev->ev_value); - return(""); - } - return(NULL); + struct env_var *ev; + + /* Set but no value gives empty string */ + if ((ev = env_getenv(name)) != NULL) { + if (ev->ev_value != NULL) + return (ev->ev_value); + return (""); + } + return (NULL); } int setenv(const char *name, const char *value, int overwrite) { - /* No guarantees about state, always assume volatile */ - if (overwrite || (env_getenv(name) == NULL)) - return(env_setenv(name, EV_VOLATILE, value, NULL, NULL)); - return(0); + /* No guarantees about state, always assume volatile */ + if (overwrite || (env_getenv(name) == NULL)) + return (env_setenv(name, EV_VOLATILE, value, NULL, NULL)); + return (0); } int putenv(const char *string) { - char *value, *copy; - int result; - - copy = strdup(string); - if ((value = strchr(copy, '=')) != NULL) - *(value++) = 0; - result = setenv(copy, value, 1); - free(copy); - return(result); + char *value, *copy; + int result; + + copy = strdup(string); + if ((value = strchr(copy, '=')) != NULL) + *(value++) = 0; + result = setenv(copy, value, 1); + free(copy); + return (result); } int unsetenv(const char *name) { - struct env_var *ev; - int err; - - err = 0; - if ((ev = env_getenv(name)) == NULL) { - err = ENOENT; - } else { - if (ev->ev_unsethook != NULL) - err = ev->ev_unsethook(ev); - if (err == 0) { - env_discard(ev); + struct env_var *ev; + int err; + + err = 0; + if ((ev = env_getenv(name)) == NULL) { + err = ENOENT; + } else { + if (ev->ev_unsethook != NULL) + err = ev->ev_unsethook(ev); + if (err == 0) { + env_discard(ev); + } } - } - return(err); + return (err); } -static void +void env_discard(struct env_var *ev) { - if (ev->ev_prev) - ev->ev_prev->ev_next = ev->ev_next; - if (ev->ev_next) - ev->ev_next->ev_prev = ev->ev_prev; - if (environ == ev) - environ = ev->ev_next; - free(ev->ev_name); - if (ev->ev_value != NULL && (ev->ev_flags & EV_DYNAMIC) != 0) - free(ev->ev_value); - free(ev); + if (ev->ev_prev) + ev->ev_prev->ev_next = ev->ev_next; + if (ev->ev_next) + ev->ev_next->ev_prev = ev->ev_prev; + if (environ == ev) + environ = ev->ev_next; + free(ev->ev_name); + if (ev->ev_value != NULL && (ev->ev_flags & EV_DYNAMIC) != 0) + free(ev->ev_value); + free(ev); } int env_noset(struct env_var *ev __unused, int flags __unused, const void *value __unused) { - return(EPERM); + return (EPERM); } int env_nounset(struct env_var *ev __unused) { - return(EPERM); + return (EPERM); } diff --git a/usr/src/boot/lib/libstand/stand.h b/usr/src/boot/lib/libstand/stand.h index 5f94a7fa58..63595f2956 100644 --- a/usr/src/boot/lib/libstand/stand.h +++ b/usr/src/boot/lib/libstand/stand.h @@ -345,6 +345,7 @@ extern struct env_var *env_getenv(const char *name); extern int env_setenv(const char *name, int flags, const void *value, ev_sethook_t sethook, ev_unsethook_t unsethook); +extern void env_discard(struct env_var *); extern char *getenv(const char *name); extern int setenv(const char *name, const char *value, int overwrite); diff --git a/usr/src/boot/sys/boot/i386/gptzfsboot/zfsboot.c b/usr/src/boot/sys/boot/i386/gptzfsboot/zfsboot.c index 872ad3d3bc..63ba02968a 100644 --- a/usr/src/boot/sys/boot/i386/gptzfsboot/zfsboot.c +++ b/usr/src/boot/sys/boot/i386/gptzfsboot/zfsboot.c @@ -205,7 +205,12 @@ main(void) fd = open(PATH_DOTCONFIG, O_RDONLY); if (fd != -1) { - read(fd, cmd, sizeof (cmd)); + ssize_t cmdlen; + + if ((cmdlen = read(fd, cmd, sizeof (cmd))) > 0) + cmd[cmdlen] = '\0'; + else + *cmd = '\0'; close(fd); } diff --git a/usr/src/cmd/bhyve/Makefile b/usr/src/cmd/bhyve/Makefile index 914f41d1f4..4dc737c768 100644 --- a/usr/src/cmd/bhyve/Makefile +++ b/usr/src/cmd/bhyve/Makefile @@ -12,6 +12,7 @@ # # Copyright 2014 Pluribus Networks Inc. # Copyright 2020 Joyent, Inc. +# Copyright 2020 Oxide Computer Company # PROG = bhyve @@ -74,7 +75,6 @@ SRCS = acpi.c \ usb_mouse.c \ vga.c \ virtio.c \ - vmm_instruction_emul.c \ vmgenc.c \ xmsr.c \ spinup_ap.c \ diff --git a/usr/src/cmd/bhyve/bhyverun.c b/usr/src/cmd/bhyve/bhyverun.c index 18bfda76f0..bb3e0721c8 100644 --- a/usr/src/cmd/bhyve/bhyverun.c +++ b/usr/src/cmd/bhyve/bhyverun.c @@ -217,6 +217,7 @@ static cpuset_t cpumask; static void vm_loop(struct vmctx *ctx, int vcpu, uint64_t rip); static struct vm_exit vmexit[VM_MAXCPU]; +static struct vm_entry vmentry[VM_MAXCPU]; struct bhyvestats { uint64_t vmexit_bogus; @@ -224,15 +225,18 @@ struct bhyvestats { uint64_t vmexit_hlt; uint64_t vmexit_pause; uint64_t vmexit_mtrap; - uint64_t vmexit_inst_emul; + uint64_t vmexit_mmio; + uint64_t vmexit_inout; uint64_t cpu_switch_rotate; uint64_t cpu_switch_direct; + uint64_t mmio_unhandled; } stats; struct mt_vmm_info { pthread_t mt_thr; struct vmctx *mt_ctx; - int mt_vcpu; + int mt_vcpu; + uint64_t mt_startrip; } mt_vmm_info[VM_MAXCPU]; #ifdef __FreeBSD__ @@ -502,7 +506,7 @@ fbsdrun_start_thread(void *param) if (gdb_port != 0) gdb_cpu_add(vcpu); - vm_loop(mtp->mt_ctx, vcpu, vmexit[vcpu].rip); + vm_loop(mtp->mt_ctx, vcpu, mtp->mt_startrip); /* not reached */ exit(1); @@ -543,11 +547,9 @@ fbsdrun_addcpu(struct vmctx *ctx, int fromcpu, int newcpu, uint64_t rip, * Set up the vmexit struct to allow execution to start * at the given RIP */ - vmexit[newcpu].rip = rip; - vmexit[newcpu].inst_length = 0; - mt_vmm_info[newcpu].mt_ctx = ctx; mt_vmm_info[newcpu].mt_vcpu = newcpu; + mt_vmm_info[newcpu].mt_startrip = rip; error = pthread_create(&mt_vmm_info[newcpu].mt_thr, NULL, fbsdrun_start_thread, &mt_vmm_info[newcpu]); @@ -567,6 +569,66 @@ fbsdrun_deletecpu(struct vmctx *ctx, int vcpu) return (CPU_EMPTY(&cpumask)); } +static void +vmentry_mmio_read(int vcpu, uint64_t gpa, uint8_t bytes, uint64_t data) +{ + struct vm_entry *entry = &vmentry[vcpu]; + struct vm_mmio *mmio = &entry->u.mmio; + + assert(entry->cmd == VEC_DEFAULT); + + entry->cmd = VEC_COMPLETE_MMIO; + mmio->bytes = bytes; + mmio->read = 1; + mmio->gpa = gpa; + mmio->data = data; +} + +static void +vmentry_mmio_write(int vcpu, uint64_t gpa, uint8_t bytes) +{ + struct vm_entry *entry = &vmentry[vcpu]; + struct vm_mmio *mmio = &entry->u.mmio; + + assert(entry->cmd == VEC_DEFAULT); + + entry->cmd = VEC_COMPLETE_MMIO; + mmio->bytes = bytes; + mmio->read = 0; + mmio->gpa = gpa; + mmio->data = 0; +} + +static void +vmentry_inout_read(int vcpu, uint16_t port, uint8_t bytes, uint32_t data) +{ + struct vm_entry *entry = &vmentry[vcpu]; + struct vm_inout *inout = &entry->u.inout; + + assert(entry->cmd == VEC_DEFAULT); + + entry->cmd = VEC_COMPLETE_INOUT; + inout->bytes = bytes; + inout->flags = INOUT_IN; + inout->port = port; + inout->eax = data; +} + +static void +vmentry_inout_write(int vcpu, uint16_t port, uint8_t bytes) +{ + struct vm_entry *entry = &vmentry[vcpu]; + struct vm_inout *inout = &entry->u.inout; + + assert(entry->cmd == VEC_DEFAULT); + + entry->cmd = VEC_COMPLETE_INOUT; + inout->bytes = bytes; + inout->flags = 0; + inout->port = port; + inout->eax = 0; +} + static int vmexit_handle_notify(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu, uint32_t eax) @@ -583,30 +645,42 @@ static int vmexit_inout(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) { int error; - int bytes, port, in, out; int vcpu; + struct vm_inout inout; + bool in; + uint8_t bytes; - vcpu = *pvcpu; + stats.vmexit_inout++; - port = vme->u.inout.port; - bytes = vme->u.inout.bytes; - in = vme->u.inout.in; - out = !in; + vcpu = *pvcpu; + inout = vme->u.inout; + in = (inout.flags & INOUT_IN) != 0; + bytes = inout.bytes; /* Extra-special case of host notifications */ - if (out && port == GUEST_NIO_PORT) { - error = vmexit_handle_notify(ctx, vme, pvcpu, vme->u.inout.eax); + if (!in && inout.port == GUEST_NIO_PORT) { + error = vmexit_handle_notify(ctx, vme, pvcpu, inout.eax); + vmentry_inout_write(vcpu, inout.port, bytes); return (error); } - error = emulate_inout(ctx, vcpu, vme, strictio); + error = emulate_inout(ctx, vcpu, &inout, strictio != 0); if (error) { fprintf(stderr, "Unhandled %s%c 0x%04x at 0x%lx\n", in ? "in" : "out", bytes == 1 ? 'b' : (bytes == 2 ? 'w' : 'l'), - port, vmexit->rip); + inout.port, vmexit->rip); return (VMEXIT_ABORT); } else { + /* + * Communicate the status of the inout operation back to the + * in-kernel instruction emulation. + */ + if (in) { + vmentry_inout_read(vcpu, inout.port, bytes, inout.eax); + } else { + vmentry_inout_write(vcpu, inout.port, bytes); + } return (VMEXIT_CONTINUE); } } @@ -796,29 +870,70 @@ vmexit_mtrap(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) static int vmexit_inst_emul(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) { - int err, i; - struct vie *vie; + uint8_t i, valid; + + fprintf(stderr, "Failed to emulate instruction sequence "); + + valid = vmexit->u.inst_emul.num_valid; + if (valid != 0) { + assert(valid <= sizeof (vmexit->u.inst_emul.inst)); + fprintf(stderr, "["); + for (i = 0; i < valid; i++) { + if (i == 0) { + fprintf(stderr, "%02x", + vmexit->u.inst_emul.inst[i]); + } else { + fprintf(stderr, ", %02x", + vmexit->u.inst_emul.inst[i]); + } + } + fprintf(stderr, "] "); + } + fprintf(stderr, "@ %rip = %x\n", vmexit->rip); - stats.vmexit_inst_emul++; + return (VMEXIT_ABORT); +} - vie = &vmexit->u.inst_emul.vie; - err = emulate_mem(ctx, *pvcpu, vmexit->u.inst_emul.gpa, - vie, &vmexit->u.inst_emul.paging); +static int +vmexit_mmio(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) +{ + int vcpu, err; + struct vm_mmio mmio; + bool is_read; - if (err) { - if (err == ESRCH) { - EPRINTLN("Unhandled memory access to 0x%lx\n", - vmexit->u.inst_emul.gpa); - } + stats.vmexit_mmio++; - fprintf(stderr, "Failed to emulate instruction sequence [ "); - for (i = 0; i < vie->num_valid; i++) - fprintf(stderr, "%02x", vie->inst[i]); - FPRINTLN(stderr, " ] at 0x%lx", vmexit->rip); - return (VMEXIT_ABORT); + vcpu = *pvcpu; + mmio = vmexit->u.mmio; + is_read = (mmio.read != 0); + + err = emulate_mem(ctx, vcpu, &mmio); + + if (err == ESRCH) { + fprintf(stderr, "Unhandled memory access to 0x%lx\n", mmio.gpa); + stats.mmio_unhandled++; + + /* + * Access to non-existent physical addresses is not likely to + * result in fatal errors on hardware machines, but rather reads + * of all-ones or discarded-but-acknowledged writes. + */ + mmio.data = ~0UL; + err = 0; } - return (VMEXIT_CONTINUE); + if (err == 0) { + if (is_read) { + vmentry_mmio_read(vcpu, mmio.gpa, mmio.bytes, + mmio.data); + } else { + vmentry_mmio_write(vcpu, mmio.gpa, mmio.bytes); + } + return (VMEXIT_CONTINUE); + } + + fprintf(stderr, "Unhandled mmio error to 0x%lx: %d\n", mmio.gpa, err); + return (VMEXIT_ABORT); } static pthread_mutex_t resetcpu_mtx = PTHREAD_MUTEX_INITIALIZER; @@ -888,7 +1003,7 @@ vmexit_breakpoint(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) static vmexit_handler_t handler[VM_EXITCODE_MAX] = { [VM_EXITCODE_INOUT] = vmexit_inout, - [VM_EXITCODE_INOUT_STR] = vmexit_inout, + [VM_EXITCODE_MMIO] = vmexit_mmio, [VM_EXITCODE_VMX] = vmexit_vmx, [VM_EXITCODE_SVM] = vmexit_svm, [VM_EXITCODE_BOGUS] = vmexit_bogus, @@ -910,6 +1025,8 @@ vm_loop(struct vmctx *ctx, int vcpu, uint64_t startrip) int error, rc; enum vm_exitcode exitcode; cpuset_t active_cpus; + struct vm_exit *vexit; + struct vm_entry *ventry; #ifdef __FreeBSD__ if (vcpumap[vcpu] != NULL) { @@ -924,19 +1041,30 @@ vm_loop(struct vmctx *ctx, int vcpu, uint64_t startrip) error = vm_set_register(ctx, vcpu, VM_REG_GUEST_RIP, startrip); assert(error == 0); + ventry = &vmentry[vcpu]; + vexit = &vmexit[vcpu]; + while (1) { - error = vm_run(ctx, vcpu, &vmexit[vcpu]); + error = vm_run(ctx, vcpu, ventry, vexit); if (error != 0) break; - exitcode = vmexit[vcpu].exitcode; + if (ventry->cmd != VEC_DEFAULT) { + /* + * Discard any lingering entry state after it has been + * submitted via vm_run(). + */ + bzero(ventry, sizeof (*ventry)); + } + + exitcode = vexit->exitcode; if (exitcode >= VM_EXITCODE_MAX || handler[exitcode] == NULL) { fprintf(stderr, "vm_loop: unexpected exitcode 0x%x\n", exitcode); exit(4); } - rc = (*handler[exitcode])(ctx, &vmexit[vcpu], &vcpu); + rc = (*handler[exitcode])(ctx, vexit, &vcpu); switch (rc) { case VMEXIT_CONTINUE: diff --git a/usr/src/cmd/bhyve/inout.c b/usr/src/cmd/bhyve/inout.c index b460ee2988..27068023d3 100644 --- a/usr/src/cmd/bhyve/inout.c +++ b/usr/src/cmd/bhyve/inout.c @@ -27,6 +27,18 @@ * * $FreeBSD$ */ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + * + * Copyright 2020 Oxide Computer Company + */ #include <sys/cdefs.h> __FBSDID("$FreeBSD$"); @@ -40,7 +52,6 @@ __FBSDID("$FreeBSD$"); #include <x86/segments.h> #include <machine/vmm.h> -#include <machine/vmm_instruction_emul.h> #include <vmmapi.h> #include <stdio.h> @@ -57,12 +68,14 @@ SET_DECLARE(inout_port_set, struct inout_port); #define VERIFY_IOPORT(port, size) \ assert((port) >= 0 && (size) > 0 && ((port) + (size)) <= MAX_IOPORTS) -static struct { +struct inout_handler { const char *name; int flags; inout_func_t handler; void *arg; -} inout_handlers[MAX_IOPORTS]; +}; + +static struct inout_handler inout_handlers[MAX_IOPORTS]; static int default_inout(struct vmctx *ctx, int vcpu, int in, int port, int bytes, @@ -85,11 +98,11 @@ default_inout(struct vmctx *ctx, int vcpu, int in, int port, int bytes, return (0); } -static void +static void register_default_iohandler(int start, int size) { struct inout_port iop; - + VERIFY_IOPORT(start, size); bzero(&iop, sizeof(iop)); @@ -103,136 +116,37 @@ register_default_iohandler(int start, int size) } int -emulate_inout(struct vmctx *ctx, int vcpu, struct vm_exit *vmexit, int strict) +emulate_inout(struct vmctx *ctx, int vcpu, struct vm_inout *inout, bool strict) { - int addrsize, bytes, flags, in, port, prot, rep; - uint32_t eax, val; - inout_func_t handler; - void *arg; - int error, fault, retval; - enum vm_reg_name idxreg; - uint64_t gla, index, iterations, count; - struct vm_inout_str *vis; - struct iovec iov[2]; - - bytes = vmexit->u.inout.bytes; - in = vmexit->u.inout.in; - port = vmexit->u.inout.port; - - assert(port < MAX_IOPORTS); + struct inout_handler handler; + inout_func_t hfunc; + void *harg; + int error; + uint8_t bytes; + bool in; + + bytes = inout->bytes; + in = (inout->flags & INOUT_IN) != 0; + assert(bytes == 1 || bytes == 2 || bytes == 4); - handler = inout_handlers[port].handler; + handler = inout_handlers[inout->port]; + hfunc = handler.handler; + harg = handler.arg; - if (strict && handler == default_inout) + if (strict && hfunc == default_inout) return (-1); - flags = inout_handlers[port].flags; - arg = inout_handlers[port].arg; - if (in) { - if (!(flags & IOPORT_F_IN)) + if (!(handler.flags & IOPORT_F_IN)) return (-1); } else { - if (!(flags & IOPORT_F_OUT)) + if (!(handler.flags & IOPORT_F_OUT)) return (-1); } - retval = 0; - if (vmexit->u.inout.string) { - vis = &vmexit->u.inout_str; - rep = vis->inout.rep; - addrsize = vis->addrsize; - prot = in ? PROT_WRITE : PROT_READ; - assert(addrsize == 2 || addrsize == 4 || addrsize == 8); - - /* Index register */ - idxreg = in ? VM_REG_GUEST_RDI : VM_REG_GUEST_RSI; - index = vis->index & vie_size2mask(addrsize); - - /* Count register */ - count = vis->count & vie_size2mask(addrsize); - - /* Limit number of back-to-back in/out emulations to 16 */ - iterations = MIN(count, 16); - while (iterations > 0) { - assert(retval == 0); - if (vie_calculate_gla(vis->paging.cpu_mode, - vis->seg_name, &vis->seg_desc, index, bytes, - addrsize, prot, &gla)) { - vm_inject_gp(ctx, vcpu); - break; - } - - error = vm_copy_setup(ctx, vcpu, &vis->paging, gla, - bytes, prot, iov, nitems(iov), &fault); - if (error) { - retval = -1; /* Unrecoverable error */ - break; - } else if (fault) { - retval = 0; /* Resume guest to handle fault */ - break; - } - - if (vie_alignment_check(vis->paging.cpl, bytes, - vis->cr0, vis->rflags, gla)) { - vm_inject_ac(ctx, vcpu, 0); - break; - } - - val = 0; - if (!in) - vm_copyin(ctx, vcpu, iov, &val, bytes); - - retval = handler(ctx, vcpu, in, port, bytes, &val, arg); - if (retval != 0) - break; - - if (in) - vm_copyout(ctx, vcpu, &val, iov, bytes); - - /* Update index */ - if (vis->rflags & PSL_D) - index -= bytes; - else - index += bytes; - - count--; - iterations--; - } - - /* Update index register */ - error = vie_update_register(ctx, vcpu, idxreg, index, addrsize); - assert(error == 0); - - /* - * Update count register only if the instruction had a repeat - * prefix. - */ - if (rep) { - error = vie_update_register(ctx, vcpu, VM_REG_GUEST_RCX, - count, addrsize); - assert(error == 0); - } - - /* Restart the instruction if more iterations remain */ - if (retval == 0 && count != 0) { - error = vm_restart_instruction(ctx, vcpu); - assert(error == 0); - } - } else { - eax = vmexit->u.inout.eax; - val = eax & vie_size2mask(bytes); - retval = handler(ctx, vcpu, in, port, bytes, &val, arg); - if (retval == 0 && in) { - eax &= ~vie_size2mask(bytes); - eax |= val & vie_size2mask(bytes); - error = vm_set_register(ctx, vcpu, VM_REG_GUEST_RAX, - eax); - assert(error == 0); - } - } - return (retval); + error = hfunc(ctx, vcpu, in, inout->port, bytes, &inout->eax, harg); + return (error); } void diff --git a/usr/src/cmd/bhyve/inout.h b/usr/src/cmd/bhyve/inout.h index b72ee5d93e..b026e18e92 100644 --- a/usr/src/cmd/bhyve/inout.h +++ b/usr/src/cmd/bhyve/inout.h @@ -47,6 +47,7 @@ struct vmctx; struct vm_exit; +struct vm_inout; /* * inout emulation handlers return 0 on success and -1 on failure. @@ -82,10 +83,10 @@ struct inout_port { 0 \ }; \ DATA_SET(inout_port_set, __CONCAT(__inout_port, __LINE__)) - + void init_inout(void); -int emulate_inout(struct vmctx *, int vcpu, struct vm_exit *vmexit, - int strict); +int emulate_inout(struct vmctx *, int vcpu, struct vm_inout *inout, + bool strict); int register_inout(struct inout_port *iop); int unregister_inout(struct inout_port *iop); void init_bvmcons(void); diff --git a/usr/src/cmd/bhyve/mem.c b/usr/src/cmd/bhyve/mem.c index 90aefe45c8..1afc8bf5f0 100644 --- a/usr/src/cmd/bhyve/mem.c +++ b/usr/src/cmd/bhyve/mem.c @@ -27,6 +27,18 @@ * * $FreeBSD$ */ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + * + * Copyright 2020 Oxide Computer Company + */ /* * Memory ranges are represented with an RB tree. On insertion, the range @@ -41,7 +53,6 @@ __FBSDID("$FreeBSD$"); #include <sys/errno.h> #include <sys/tree.h> #include <machine/vmm.h> -#include <machine/vmm_instruction_emul.h> #include <assert.h> #include <err.h> @@ -96,7 +107,7 @@ mmio_rb_lookup(struct mmio_rb_tree *rbt, uint64_t addr, *entry = res; return (0); } - + return (ENOENT); } @@ -170,7 +181,7 @@ access_memory(struct vmctx *ctx, int vcpu, uint64_t paddr, mem_cb_t *cb, { struct mmio_rb_range *entry; int err, perror, immutable; - + pthread_rwlock_rdlock(&mmio_rwlock); /* * First check the per-vCPU cache @@ -185,7 +196,7 @@ access_memory(struct vmctx *ctx, int vcpu, uint64_t paddr, mem_cb_t *cb, if (entry == NULL) { if (mmio_rb_lookup(&mmio_rb_root, paddr, &entry) == 0) { /* Update the per-vCPU cache */ - mmio_hint[vcpu] = entry; + mmio_hint[vcpu] = entry; } else if (mmio_rb_lookup(&mmio_rb_fallback, paddr, &entry)) { perror = pthread_rwlock_unlock(&mmio_rwlock); assert(perror == 0); @@ -223,32 +234,28 @@ access_memory(struct vmctx *ctx, int vcpu, uint64_t paddr, mem_cb_t *cb, return (err); } -struct emulate_mem_args { - struct vie *vie; - struct vm_guest_paging *paging; -}; - static int emulate_mem_cb(struct vmctx *ctx, int vcpu, uint64_t paddr, struct mem_range *mr, void *arg) { - struct emulate_mem_args *ema; + struct vm_mmio *mmio; + int err = 0; + + mmio = arg; - ema = arg; - return (vmm_emulate_instruction(ctx, vcpu, paddr, ema->vie, ema->paging, - mem_read, mem_write, mr)); + if (mmio->read != 0) { + err = mem_read(ctx, vcpu, paddr, &mmio->data, mmio->bytes, mr); + } else { + err = mem_write(ctx, vcpu, paddr, mmio->data, mmio->bytes, mr); + } + + return (err); } int -emulate_mem(struct vmctx *ctx, int vcpu, uint64_t paddr, struct vie *vie, - struct vm_guest_paging *paging) - +emulate_mem(struct vmctx *ctx, int vcpu, struct vm_mmio *mmio) { - struct emulate_mem_args ema; - - ema.vie = vie; - ema.paging = paging; - return (access_memory(ctx, vcpu, paddr, emulate_mem_cb, &ema)); + return (access_memory(ctx, vcpu, mmio->gpa, emulate_mem_cb, mmio)); } struct rw_mem_args { @@ -333,23 +340,23 @@ register_mem_fallback(struct mem_range *memp) return (register_mem_int(&mmio_rb_fallback, memp)); } -int +int unregister_mem(struct mem_range *memp) { struct mem_range *mr; struct mmio_rb_range *entry = NULL; int err, perror, i; - + pthread_rwlock_wrlock(&mmio_rwlock); err = mmio_rb_lookup(&mmio_rb_root, memp->base, &entry); if (err == 0) { mr = &entry->mr_param; assert(mr->name == memp->name); - assert(mr->base == memp->base && mr->size == memp->size); + assert(mr->base == memp->base && mr->size == memp->size); assert((mr->flags & MEM_F_IMMUTABLE) == 0); RB_REMOVE(mmio_rb_tree, &mmio_rb_root, entry); - /* flush Per-vCPU cache */ + /* flush Per-vCPU cache */ for (i=0; i < VM_MAXCPU; i++) { if (mmio_hint[i] == entry) mmio_hint[i] = NULL; @@ -360,7 +367,7 @@ unregister_mem(struct mem_range *memp) if (entry) free(entry); - + return (err); } diff --git a/usr/src/cmd/bhyve/mem.h b/usr/src/cmd/bhyve/mem.h index 38d773c43f..8b81b93a02 100644 --- a/usr/src/cmd/bhyve/mem.h +++ b/usr/src/cmd/bhyve/mem.h @@ -53,8 +53,8 @@ struct mem_range { #define MEM_F_IMMUTABLE 0x4 /* mem_range cannot be unregistered */ void init_mem(void); -int emulate_mem(struct vmctx *, int vcpu, uint64_t paddr, struct vie *vie, - struct vm_guest_paging *paging); + +int emulate_mem(struct vmctx *ctx, int vcpu, struct vm_mmio *mmio); int read_mem(struct vmctx *ctx, int vcpu, uint64_t gpa, uint64_t *rval, int size); diff --git a/usr/src/cmd/bhyve/task_switch.c b/usr/src/cmd/bhyve/task_switch.c index f1b564d560..c4a087b54f 100644 --- a/usr/src/cmd/bhyve/task_switch.c +++ b/usr/src/cmd/bhyve/task_switch.c @@ -25,6 +25,18 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + * + * Copyright 2020 Oxide Computer Company + */ #include <sys/cdefs.h> __FBSDID("$FreeBSD$"); @@ -37,7 +49,6 @@ __FBSDID("$FreeBSD$"); #include <x86/segments.h> #include <x86/specialreg.h> #include <machine/vmm.h> -#include <machine/vmm_instruction_emul.h> #include <assert.h> #include <errno.h> @@ -618,6 +629,150 @@ tss32_restore(struct vmctx *ctx, int vcpu, struct vm_task_switch *ts, return (0); } + +/* + * Copy of vie_alignment_check() from vmm_instruction_emul.c + */ +static int +alignment_check(int cpl, int size, uint64_t cr0, uint64_t rf, uint64_t gla) +{ + assert(size == 1 || size == 2 || size == 4 || size == 8); + assert(cpl >= 0 && cpl <= 3); + + if (cpl != 3 || (cr0 & CR0_AM) == 0 || (rf & PSL_AC) == 0) + return (0); + + return ((gla & (size - 1)) ? 1 : 0); +} + +/* + * Copy of vie_size2mask() from vmm_instruction_emul.c + */ +static uint64_t +size2mask(int size) +{ + switch (size) { + case 1: + return (0xff); + case 2: + return (0xffff); + case 4: + return (0xffffffff); + case 8: + return (0xffffffffffffffff); + default: + assert(0); + /* not reached */ + return (0); + } +} + +/* + * Copy of vie_calculate_gla() from vmm_instruction_emul.c + */ +static int +calculate_gla(enum vm_cpu_mode cpu_mode, enum vm_reg_name seg, + struct seg_desc *desc, uint64_t offset, int length, int addrsize, + int prot, uint64_t *gla) +{ + uint64_t firstoff, low_limit, high_limit, segbase; + int glasize, type; + + assert(seg >= VM_REG_GUEST_ES && seg <= VM_REG_GUEST_GS); + assert((length == 1 || length == 2 || length == 4 || length == 8)); + assert((prot & ~(PROT_READ | PROT_WRITE)) == 0); + + firstoff = offset; + if (cpu_mode == CPU_MODE_64BIT) { + assert(addrsize == 4 || addrsize == 8); + glasize = 8; + } else { + assert(addrsize == 2 || addrsize == 4); + glasize = 4; + /* + * If the segment selector is loaded with a NULL selector + * then the descriptor is unusable and attempting to use + * it results in a #GP(0). + */ + if (SEG_DESC_UNUSABLE(desc->access)) + return (-1); + + /* + * The processor generates a #NP exception when a segment + * register is loaded with a selector that points to a + * descriptor that is not present. If this was the case then + * it would have been checked before the VM-exit. + */ + assert(SEG_DESC_PRESENT(desc->access)); + + /* + * The descriptor type must indicate a code/data segment. + */ + type = SEG_DESC_TYPE(desc->access); + assert(type >= 16 && type <= 31); + + if (prot & PROT_READ) { + /* #GP on a read access to a exec-only code segment */ + if ((type & 0xA) == 0x8) + return (-1); + } + + if (prot & PROT_WRITE) { + /* + * #GP on a write access to a code segment or a + * read-only data segment. + */ + if (type & 0x8) /* code segment */ + return (-1); + + if ((type & 0xA) == 0) /* read-only data seg */ + return (-1); + } + + /* + * 'desc->limit' is fully expanded taking granularity into + * account. + */ + if ((type & 0xC) == 0x4) { + /* expand-down data segment */ + low_limit = desc->limit + 1; + high_limit = SEG_DESC_DEF32(desc->access) ? + 0xffffffff : 0xffff; + } else { + /* code segment or expand-up data segment */ + low_limit = 0; + high_limit = desc->limit; + } + + while (length > 0) { + offset &= size2mask(addrsize); + if (offset < low_limit || offset > high_limit) + return (-1); + offset++; + length--; + } + } + + /* + * In 64-bit mode all segments except %fs and %gs have a segment + * base address of 0. + */ + if (cpu_mode == CPU_MODE_64BIT && seg != VM_REG_GUEST_FS && + seg != VM_REG_GUEST_GS) { + segbase = 0; + } else { + segbase = desc->base; + } + + /* + * Truncate 'firstoff' to the effective address size before adding + * it to the segment base. + */ + firstoff &= size2mask(addrsize); + *gla = (segbase + firstoff) & size2mask(glasize); + return (0); +} + /* * Push an error code on the stack of the new task. This is needed if the * task switch was triggered by a hardware exception that causes an error @@ -667,14 +822,14 @@ push_errcode(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging, esp = GETREG(ctx, vcpu, VM_REG_GUEST_RSP); esp -= bytes; - if (vie_calculate_gla(paging->cpu_mode, VM_REG_GUEST_SS, + if (calculate_gla(paging->cpu_mode, VM_REG_GUEST_SS, &seg_desc, esp, bytes, stacksize, PROT_WRITE, &gla)) { sel_exception(ctx, vcpu, IDT_SS, stacksel, 1); *faultptr = 1; return (0); } - if (vie_alignment_check(paging->cpl, bytes, cr0, rflags, gla)) { + if (alignment_check(paging->cpl, bytes, cr0, rflags, gla)) { vm_inject_ac(ctx, vcpu, 1); *faultptr = 1; return (0); diff --git a/usr/src/cmd/bhyvectl/bhyvectl.c b/usr/src/cmd/bhyvectl/bhyvectl.c index 5299791091..22c72cf5df 100644 --- a/usr/src/cmd/bhyvectl/bhyvectl.c +++ b/usr/src/cmd/bhyvectl/bhyvectl.c @@ -36,11 +36,10 @@ * A full copy of the text of the CDDL should have accompanied this * source. A copy of the CDDL is also available via the Internet at * http://www.illumos.org/license/CDDL. - */ - -/* + * * Copyright 2015 Pluribus Networks Inc. * Copyright 2019 Joyent, Inc. + * Copyright 2020 Oxide Computer Company */ #include <sys/cdefs.h> @@ -358,14 +357,20 @@ dump_vm_run_exitcode(struct vm_exit *vmexit, int vcpu) switch (vmexit->exitcode) { case VM_EXITCODE_INOUT: printf("\treason\t\tINOUT\n"); - printf("\tdirection\t%s\n", vmexit->u.inout.in ? "IN" : "OUT"); + printf("\tdirection\t%s\n", + (vmexit->u.inout.flags & INOUT_IN) ? "IN" : "OUT"); printf("\tbytes\t\t%d\n", vmexit->u.inout.bytes); - printf("\tflags\t\t%s%s\n", - vmexit->u.inout.string ? "STRING " : "", - vmexit->u.inout.rep ? "REP " : ""); printf("\tport\t\t0x%04x\n", vmexit->u.inout.port); printf("\teax\t\t0x%08x\n", vmexit->u.inout.eax); break; + case VM_EXITCODE_MMIO: + printf("\treason\t\tMMIO\n"); + printf("\toperation\t%s\n", + vmexit->u.mmio.read ? "READ" : "WRITE"); + printf("\tbytes\t\t%d\n", vmexit->u.mmio.bytes); + printf("\tgpa\t\t0x%08x\n", vmexit->u.mmio.gpa); + printf("\tdata\t\t0x%08x\n", vmexit->u.mmio.data); + break; case VM_EXITCODE_VMX: printf("\treason\t\tVMX\n"); printf("\tstatus\t\t%d\n", vmexit->u.vmx.status); @@ -2366,7 +2371,11 @@ main(int argc, char *argv[]) } if (!error && run) { - error = vm_run(ctx, vcpu, &vmexit); + struct vm_entry entry; + + bzero(&entry, sizeof (entry)); + + error = vm_run(ctx, vcpu, &entry, &vmexit); if (error == 0) dump_vm_run_exitcode(&vmexit, vcpu); else diff --git a/usr/src/compat/bhyve/amd64/machine/cpufunc.h b/usr/src/compat/bhyve/amd64/machine/cpufunc.h index 0b7bcdaa59..bb79ac3ce9 100644 --- a/usr/src/compat/bhyve/amd64/machine/cpufunc.h +++ b/usr/src/compat/bhyve/amd64/machine/cpufunc.h @@ -116,7 +116,7 @@ static __inline uint64_t rdmsr(u_int msr) { uint32_t low, high; - + __asm __volatile("rdmsr" : "=a" (low), "=d" (high) : "c" (msr)); return (low | ((uint64_t)high << 32)); } @@ -150,7 +150,7 @@ static __inline u_long rcr0(void) { u_long data; - + __asm __volatile("movq %%cr0,%0" : "=r" (data)); return (data); } @@ -174,7 +174,7 @@ static __inline u_long rcr4(void) { u_long data; - + __asm __volatile("movq %%cr4,%0" : "=r" (data)); return (data); } diff --git a/usr/src/compat/bhyve/amd64/machine/pmap.h b/usr/src/compat/bhyve/amd64/machine/pmap.h index ce3185629b..3b94d1b1a9 100644 --- a/usr/src/compat/bhyve/amd64/machine/pmap.h +++ b/usr/src/compat/bhyve/amd64/machine/pmap.h @@ -153,7 +153,7 @@ #define PGEX_RSV 0x08 /* reserved PTE field is non-zero */ #define PGEX_I 0x10 /* during an instruction fetch */ -/* +/* * undef the PG_xx macros that define bits in the regular x86 PTEs that * have a different position in nested PTEs. This is done when compiling * code that needs to be aware of the differences between regular x86 and diff --git a/usr/src/compat/bhyve/net/ieee_oui.h b/usr/src/compat/bhyve/net/ieee_oui.h index 068328d833..115e6a44f8 100644 --- a/usr/src/compat/bhyve/net/ieee_oui.h +++ b/usr/src/compat/bhyve/net/ieee_oui.h @@ -37,14 +37,14 @@ #define OUI_FREEBSD_BASE 0x589cfc000000 #define OUI_FREEBSD(nic) (OUI_FREEBSD_BASE | (nic)) -/* +/* * OUIs are most often used to uniquely identify network interfaces * and occupy the first 3 bytes of both destination and source MAC * addresses. The following allocations exist so that various * software systems associated with FreeBSD can have unique IDs in the * absence of hardware. The use of OUIs for this purpose is not fully * fleshed out but is now in common use in virtualization technology. - * + * * Allocations from this range are expected to be made using COMMON * SENSE by developers. Do NOT take a large range just because * they're currently wide open. Take the smallest useful range for @@ -53,7 +53,7 @@ * * In the event of a conflict arbitration of allocation in this file * is subject to core@ approval. - * + * * Applications are differentiated based on the high order bit(s) of * the remaining three bytes. Our first allocation has all 0s, the * next allocation has the highest bit set. Allocating in this way diff --git a/usr/src/compat/bhyve/x86/_types.h b/usr/src/compat/bhyve/x86/_types.h index 8bbae549d8..0263c33d5f 100644 --- a/usr/src/compat/bhyve/x86/_types.h +++ b/usr/src/compat/bhyve/x86/_types.h @@ -33,7 +33,7 @@ typedef long long __int64_t; typedef unsigned long long __uint64_t; #endif -/* +/* * Standard type definitions. */ #ifdef _LP64 diff --git a/usr/src/lib/libc/port/i18n/gettext_real.c b/usr/src/lib/libc/port/i18n/gettext_real.c index 6045d000fe..6e5b8054ae 100644 --- a/usr/src/lib/libc/port/i18n/gettext_real.c +++ b/usr/src/lib/libc/port/i18n/gettext_real.c @@ -58,7 +58,7 @@ char * _real_gettext_u(const char *domain, const char *msgid1, const char *msgid2, unsigned long int ln, int category, int plural, locale_t loc) { - char msgfile[MAXPATHLEN]; /* 1024 */ + char msgfile[MAXPATHLEN]; /* 1024 */ char mydomain[TEXTDOMAINMAX + 1]; /* 256 + 1 */ char *cur_binding; /* points to current binding in list */ const char *cur_locale; @@ -326,7 +326,7 @@ static int process_nlspath(const char *cur_domain, const char *cur_msgloc, const char *nlspath, char **binding) { - char *s; /* generic string ptr */ + char *s; /* generic string ptr */ char *territory; /* our current territory element */ char *codeset; /* our current codeset element */ char *s1; /* for handling territory */ @@ -684,12 +684,12 @@ _real_bindtextdomain_u(const char *domain, const char *binding, return (*binding_addr); } /* replace existing binding with new binding */ - if (*binding_addr) { - free(*binding_addr); - } - if ((*binding_addr = strdup(binding)) == NULL) { + char *new_binding = strdup(binding); + if (new_binding == NULL) { return (NULL); } + free(*binding_addr); + *binding_addr = new_binding; #ifdef GETTEXT_DEBUG printlist(); #endif diff --git a/usr/src/lib/libvmmapi/common/vmmapi.c b/usr/src/lib/libvmmapi/common/vmmapi.c index 7d3446a845..6d5145431e 100644 --- a/usr/src/lib/libvmmapi/common/vmmapi.c +++ b/usr/src/lib/libvmmapi/common/vmmapi.c @@ -772,17 +772,16 @@ vm_get_register_set(struct vmctx *ctx, int vcpu, unsigned int count, } int -vm_run(struct vmctx *ctx, int vcpu, struct vm_exit *vmexit) +vm_run(struct vmctx *ctx, int vcpu, const struct vm_entry *vm_entry, + struct vm_exit *vm_exit) { - int error; - struct vm_run vmrun; + struct vm_entry entry; - bzero(&vmrun, sizeof(vmrun)); - vmrun.cpuid = vcpu; + bcopy(vm_entry, &entry, sizeof (entry)); + entry.cpuid = vcpu; + entry.exit_data = vm_exit; - error = ioctl(ctx->fd, VM_RUN, &vmrun); - bcopy(&vmrun.vm_exit, vmexit, sizeof(struct vm_exit)); - return (error); + return (ioctl(ctx->fd, VM_RUN, &entry)); } int diff --git a/usr/src/lib/libvmmapi/common/vmmapi.h b/usr/src/lib/libvmmapi/common/vmmapi.h index 997267b8cc..4656f417b4 100644 --- a/usr/src/lib/libvmmapi/common/vmmapi.h +++ b/usr/src/lib/libvmmapi/common/vmmapi.h @@ -165,7 +165,8 @@ int vm_set_register_set(struct vmctx *ctx, int vcpu, unsigned int count, const int *regnums, uint64_t *regvals); int vm_get_register_set(struct vmctx *ctx, int vcpu, unsigned int count, const int *regnums, uint64_t *regvals); -int vm_run(struct vmctx *ctx, int vcpu, struct vm_exit *ret_vmexit); +int vm_run(struct vmctx *ctx, int vcpu, const struct vm_entry *vm_entry, + struct vm_exit *vm_exit); int vm_suspend(struct vmctx *ctx, enum vm_suspend_how how); int vm_reinit(struct vmctx *ctx); int vm_apicid2vcpu(struct vmctx *ctx, int apicid); diff --git a/usr/src/pkg/manifests/system-test-libctest.mf b/usr/src/pkg/manifests/system-test-libctest.mf index c3d9b87bdb..58ffa45ad9 100644 --- a/usr/src/pkg/manifests/system-test-libctest.mf +++ b/usr/src/pkg/manifests/system-test-libctest.mf @@ -28,6 +28,7 @@ dir path=opt/libc-tests/cfg dir path=opt/libc-tests/cfg/symbols dir path=opt/libc-tests/runfiles dir path=opt/libc-tests/tests +dir path=opt/libc-tests/tests/i18n dir path=opt/libc-tests/tests/random dir path=opt/libc-tests/tests/regex dir path=opt/libc-tests/tests/regex/data @@ -95,6 +96,9 @@ file path=opt/libc-tests/tests/fnmatch.64 mode=0555 file path=opt/libc-tests/tests/fpround_test mode=0555 file path=opt/libc-tests/tests/fpround_test.$(ARCH) mode=0555 file path=opt/libc-tests/tests/fpround_test.$(ARCH64) mode=0555 +file path=opt/libc-tests/tests/i18n/bindtextdomain_test mode=0555 +file path=opt/libc-tests/tests/i18n/bindtextdomain_test.$(ARCH) mode=0555 +file path=opt/libc-tests/tests/i18n/bindtextdomain_test.$(ARCH64) mode=0555 file path=opt/libc-tests/tests/memset_s.32 mode=0555 file path=opt/libc-tests/tests/memset_s.64 mode=0555 file path=opt/libc-tests/tests/newlocale_test mode=0555 diff --git a/usr/src/prototypes/prototype.man1 b/usr/src/prototypes/prototype.man1 new file mode 100644 index 0000000000..2ab3d426dc --- /dev/null +++ b/usr/src/prototypes/prototype.man1 @@ -0,0 +1,104 @@ +.\" +.\" This file and its contents are supplied under the terms of the +.\" Common Development and Distribution License ("CDDL"), version 1.0. +.\" You may only use this file in accordance with the terms of version +.\" 1.0 of the CDDL. +.\" +.\" A full copy of the text of the CDDL should have accompanied this +.\" source. A copy of the CDDL is also available via the Internet at +.\" http://www.illumos.org/license/CDDL. +.\" +.\" +.\" Copyright 2020 <contributor> +.\" +.Dd Month Day, Year +.Dt COMMAND 1 +.\" Here and in other places "COMMAND" and "command" are place holders +.\" that should be replaced with the name of the actual command that +.\" this is documenting. +.Os +.Sh NAME +.Nm command +.Nd short description +.Sh SYNOPSIS +.Nm +.\" Each of the following lines should use the Fl and Ar options to +.\" indicate the set of supported options and flags. There should be one +.\" option and argument per line. If there are independent ways of +.\" invoking the command or independent sub-commands ala zfs(1M) or +.\" dladm(1M), there should be a fresh '.Nm' to indicate that and the +.\" sub-command should use .Cm. +.Sh DESCRIPTION +.\" Describe the purpose of the utility, what it does and how it +.\" operates. If there are certain privileges or other considerations +.\" for using this, indicate that here. +.Sh OPTIONS +The following options are supported: +.Bl -tag -width Ar +.It Fl flag1 +Description of the above flag1 +.It Fl flag2 +Description of the above flag2 +.El +.Sh OPERANDS +.\" This should be a list of non-flag arguments that are supported +.\" and what they do in the following form. +The following operands are supported: +.Bl -tag -width Ar +.It Ar oper1 +Description of what oper1 is. +.It Ar oper2 +Description of what oper2 is. +.El +.Sh EXIT STATUS +.\" This section should indicate the set of exit codes one can expect. +.\" In general, do not use the '.Ex' macro and instead call out the +.\" different error values. One would expect at least a difference +.\" between an error during execution and an error in the usage. +.Sh EXAMPLES +.\" There should be multiple examples present that describe how to use +.\" different parts of the command. This section should not be skipped +.\" and it's good to have a number of them. +.\" .Sh ENVIRONMENT +.\" If the program reacts to environment variables, most often locale +.\" related ones, document those here. If they are just the standard +.\" locale ones, use the following text, adjusting it for the exact set +.\" of locale specific values that impact it: +.\" See +.\" .Xr environ 5 +.\" for descriptions of the following environment variables +.\" that affect the execution of +.\" .Nm : +.\" .Ev LANG , +.\" .Ev LC_ALL , +.\" .Ev LC_MESSAGES , +.\" .Ev LC_NUMERIC , +.\" and +.\" .Ev NLSPATH . +.\" .Sh CODE SET INDEPENDENCE +.\" If there are issues around the code set, indicate so here. See +.\" attributes(5). +.Sh INTERFACE STABILITY +.\" When documenting the stability of commands it's useful to +.\" distinguish between the stability of the options and the command's +.\" actual output. For most commands, output stability should only be +.\" guaranteed if there's an explicit parseable option that controls the +.\" type of data. You can use the following template: +.\" The command line interface of +.\" .Nm +.\" is +.\" .Sy Committed . +.\" .Sy Evolving . +.\" .Sy Volatile . +.\" .Sy Private . +.\" The output of +.\" .Nm +.\" is +.\" .Sy Not-An-Interface +.\" and may change at any time. +.Sh SEE ALSO +.\" List other commands that are related to this. For programs that are +.\" are primarily wrappers around libc functionality or a particular +.\" library, it's helpful to mention those here so the reader has an +.\" idea of what is used to implement this. For example, the sleep +.\" command would mention nanosleep(3C). diff --git a/usr/src/prototypes/prototype.man3x b/usr/src/prototypes/prototype.man3x new file mode 100644 index 0000000000..598315f959 --- /dev/null +++ b/usr/src/prototypes/prototype.man3x @@ -0,0 +1,79 @@ +.\" +.\" This file and its contents are supplied under the terms of the +.\" Common Development and Distribution License ("CDDL"), version 1.0. +.\" You may only use this file in accordance with the terms of version +.\" 1.0 of the CDDL. +.\" +.\" A full copy of the text of the CDDL should have accompanied this +.\" source. A copy of the CDDL is also available via the Internet at +.\" http://www.illumos.org/license/CDDL. +.\" +.\" +.\" Copyright 2020 <contributor> +.\" +.Dd Month Day, Year +.Dt MANUALPAGE 3SECTION +.\" MANUALPAGE should be replaced with the primary function that you are +.\" documenting. If there is more than one function, then you should either +.\" use the primary function of the group or a more general name. Every +.\" documented function will appear in the NAME section below. +.\" +.\" The SECTION generally refers to the name of the library that this +.\" appears in. For example for something in libc this would be 3C. In +.\" general this is the capitalized version of the libraries shared +.\" object name. +.Os +.Sh NAME +.Nm funcname , +.Nm funcname2 +.Nd short description +.Sh LIBRARY +.Lb libname +.Sh SYNOPSIS +.\" Insert any required pre-processor macros with Dv +.\" .In headerfile.h +.\" .Ft return type +.\" .Fo function name +.\" .Fa "func arg 0 type and name" +.\" .Fa "func arg 1 type and name" +.\" .Fc +.\" Repeat above for each function +.Sh DESCRIPTION +.\" Describe how the functions operate and what they do. Provide +.\" background for the reader. Don't assume that they know how +.\" everything works. Be clear about the semantics and the why. +.Sh RETURN VALUES +Upon successful completion, the +.Fn funcname +function returns XXX and <state side effects>. Otherwise XXX is returned +and <if errno is set describe it below>. +.Sh EXAMPLES +.Sh ERRORS +The +.fn funcname +functions will fail if: +.Bl -tag -width Er +.It Er ERRNO1 +A Reason why ERRNO1 could occur. +.It Er ERRNO2 +A Reason why ERRNO2 could occur. +.El +.Sh INTERFACE STABILITY +.\" Indicate the stability per attribute(5). One of: +.\" .Sy Committed +.\" .Sy Uncommitted +.\" .Sy Volatile +.\" .Sy Private +.Sh MT-LEVEL +.\" Indicate the MT-Level per attributes(5). If there are exceptions, +.\" start with the level and go from there. +.\" .Sy Safe +.\" .Sy Unsafe +.\" .Sy MT-Safe +.\" .Sy Aysnc-Signal-Safe +.Sh SEE ALSO +.\" A list of mentioned manuals or others that are relevant to this +.\" function. If there is a user command that exercises this, for +.\" example, sleep(1) if documenting sleep(3C), list this here. +.\" Generally an overview page for a library in 3LIB should also be +.\" referenced. diff --git a/usr/src/prototypes/prototype.man7d b/usr/src/prototypes/prototype.man7d new file mode 100644 index 0000000000..ee51f21a64 --- /dev/null +++ b/usr/src/prototypes/prototype.man7d @@ -0,0 +1,49 @@ +.\" +.\" This file and its contents are supplied under the terms of the +.\" Common Development and Distribution License ("CDDL"), version 1.0. +.\" You may only use this file in accordance with the terms of version +.\" 1.0 of the CDDL. +.\" +.\" A full copy of the text of the CDDL should have accompanied this +.\" source. A copy of the CDDL is also available via the Internet at +.\" http://www.illumos.org/license/CDDL. +.\" +.\" +.\" Copyright 2020 <contributor> +.\" +.Dd Month Day, Year +.Dt DRIVERNAME 7D +.\" Here and in other places "DRIVERNAME" and "drivername" are place +.\" holders that should be replaced with the name of the actual driver +.\" that this is documenting. +.Os +.Sh NAME +.Nm drivername +.Nd driverdesc +.Sh SYNOPSIS +.Pa /dev/node/path +.Sh DESCRIPTION +The +.Nm +driver <fill out what it does and what devices it supports>. <Describe +the functionality supported by the driver, e.g. for a NIC TSO, etc.>. +.\" .Sh APPLICATION PROGRAMMING INTERFACE +.\" If the user may interact with this driver in a specific way, +.\" document it. The user may not because this driver is part of a +.\" broader framework. +.\" .Sh IOCTLS +.\" If the driver has a non-standard ioctl interface, document it. If it +.\" just implements the ones to support a framework, leave this out. +.\" .Sh CONFIGURATION +.\" If there is a driver.conf file, please describe the different +.\" options that can be set and their expected stability. +.\" .Sh ARCHITECTURE +.\" If this driver is supported on particular architectures (usually not +.\" the case for pseudo-devices), then include that. +.Sh FILES +.\" List the actual installation path of the driver and a configuration +.\" file. +.Sh SEE ALSO +.\" This list should include user programs or libraries that are relevant to +.\" the program. A nic might have dladm, a storage device, diskinfo, a +.\" sensor, fmtopo. diff --git a/usr/src/prototypes/prototype.man9e b/usr/src/prototypes/prototype.man9e new file mode 100644 index 0000000000..dc229ad6fd --- /dev/null +++ b/usr/src/prototypes/prototype.man9e @@ -0,0 +1,79 @@ +.\" +.\" This file and its contents are supplied under the terms of the +.\" Common Development and Distribution License ("CDDL"), version 1.0. +.\" You may only use this file in accordance with the terms of version +.\" 1.0 of the CDDL. +.\" +.\" A full copy of the text of the CDDL should have accompanied this +.\" source. A copy of the CDDL is also available via the Internet at +.\" http://www.illumos.org/license/CDDL. +.\" +.\" +.\" Copyright 2020 <contributor> +.\" +.Dd Month Day, Year +.Dt ENTRYNAME 9E +.\" ENTRYNAME should be replaced with the primary entry point that you are +.\" documenting. If there is more than one function, then you should either +.\" use the primary function of the group or a more general name. Every +.\" documented entry point will appear in the NAME section below. +.Os +.Sh NAME +.Nm entryname , +.Nm entryname2 +.Nd description +.Sh SYNOPSIS +.\" .In sys/header.h +.\" .Ft "return type" +.\" .Fo entryname +.\" .Fa "arg 0 type and name" +.\" .Fa "arg 1 type and name" +.\" .Fc +.\" Repeat above for each documented entry +.Sh INTERFACE LEVEL +.\" This should be one of the following: +.\" .Sy Committed +.\" .Sy Volatile - +.\" This interface is still evolving in illumos. +.\" API and ABI stability is not guaranteed. +.Sh PARAMETERS +.Bl -tag -width Fa +.It Fa arg0 +Description of the parameter arg0. +.It Fa arg1 +Description of the parameter arg1. +.El +.Sh DESCRIPTION +.\" This should include a description of the entry point. This includes +.\" information such as what framework it is a part of. What it is +.\" expected to actually do. Any constraints on what it should validate. +.\" Any concerns around locking or callbacks into the broader +.\" frameworks. Whether it can be called in parallel by multiple +.\" threads, etc. +.Sh CONTEXT +.\" Indicates the context in which this framework function is called. +.\" Generally this is one or more of: +.\" .Sy user +.\" .Sy kernel +.\" .Sy interrupt +.\" However sometimes there may be something specific. Such as this is +.\" only called or used during attach or detach. +.Sh RETURN VALUES +.\" This should indicate what the driver should return on successful +.\" completion and what it should have done. Otherwise it should +.\" indicate the class of error returned. +.Sh ERRORS +.\" This should be a list of recommended errors and causes. If the +.\" interface only supports returning DDI_SUCCESS or DDI_FAILURE, +.\" then this section should not be used. If it should only return these +.\" specific errors and this section shouldn't be used as a guide, +.\" indicate that. +.Bl -tag -width Er +.It Er ERRNO1 +Description of when to return ERRNO1. +.It Er ERRNO2 +Description of when to return ERRNO2. +.El +.Sh SEE ALSO +.\" A list of other manual pages related to the general framework or +.\" section 9f functions they should call. diff --git a/usr/src/prototypes/prototype.man9f b/usr/src/prototypes/prototype.man9f new file mode 100644 index 0000000000..29e7c76d89 --- /dev/null +++ b/usr/src/prototypes/prototype.man9f @@ -0,0 +1,78 @@ +.\" +.\" This file and its contents are supplied under the terms of the +.\" Common Development and Distribution License ("CDDL"), version 1.0. +.\" You may only use this file in accordance with the terms of version +.\" 1.0 of the CDDL. +.\" +.\" A full copy of the text of the CDDL should have accompanied this +.\" source. A copy of the CDDL is also available via the Internet at +.\" http://www.illumos.org/license/CDDL. +.\" +.\" +.\" Copyright 2020 <contributor> +.\" +.Dd Month Day, Year +.Dt FUNCNAME 9F +.\" FUNCNAME should be replaced with the primary function that you are +.\" documenting. If there is more than one function, then you should either +.\" use the primary function of the group or a more general name. Every +.\" documented function will appear in the NAME section below. +.Os +.Sh NAME +.Nm funcname , +.Nm funcname2 +.Nd short description +.Sh SYNOPSIS +.\" .In sys/header.h +.\" .Ft "return type" +.\" .Fo funcname +.\" .Fa "arg 0 type and name" +.\" .Fa "arg 1 type and name" +.\" .Fc +.\" Repeat above for each documented entry +.Sh INTERFACE LEVEL +.\" This should be one of the following: +.\" .Sy Committed +.\" .Sy Volatile - +.\" This interface is still evolving in illumos. +.\" API and ABI stability is not guaranteed. +.Sh PARAMETERS +.Bl -tag -width Fa +.It Fa arg0 +Description of the parameter arg0. +.It Fa arg1 +Description of the parameter arg1. +.El +.Sh DESCRIPTION +.\" This should include a description of the function, how to use it, +.\" and why someone might use it. Any concerns around locking or +.\" callbacks into the broader frameworks. Whether it can be called in +.\" parallel by multiple threads, etc. +.Sh CONTEXT +.\" Indicates the context in which this framework function is called. +.\" Generally this is one or more of: +.\" .Sy user +.\" .Sy kernel +.\" .Sy interrupt +.\" However sometimes there may be something specific. Such as this is +.\" only called or used during attach or detach. +.Sh EXAMPLES +.\" Include examples of how someone might use this kernel function. +.Sh RETURN VALUES +.\" This should indicate what the function will return on successful +.\" completion and what it should have done. Otherwise it should +.\" indicate the class of error returned. +.Sh ERRORS +.\" If the function returns a set of errors, often errnos (not the case +.\" for just returning DDI_SUCCESS/DDI_FAILURE), or any other +.\" positive/negative indicator, then one should consider including the +.\" meaning of the errors. +.Bl -tag -width Er +.It Er ERRNO1 +Description of what would cause ERRNO1. +.It Er ERRNO2 +Description of what would cause ERRNO2. +.El +.Sh SEE ALSO +.\" A list of other manual pages related to the general framework or +.\" section 9f functions they might call. diff --git a/usr/src/test/libc-tests/runfiles/default.run b/usr/src/test/libc-tests/runfiles/default.run index 2556c6916c..c819079ef6 100644 --- a/usr/src/test/libc-tests/runfiles/default.run +++ b/usr/src/test/libc-tests/runfiles/default.run @@ -38,6 +38,8 @@ outputdir = /var/tmp/test_results [/opt/libc-tests/tests/wcsncasecmp-7350.32] [/opt/libc-tests/tests/wcsncasecmp-7350.64] +[/opt/libc-tests/tests/i18n/bindtextdomain_test] + [/opt/libc-tests/tests/random/getrandom] [/opt/libc-tests/tests/random/getentropy] [/opt/libc-tests/tests/random/chacha] diff --git a/usr/src/test/libc-tests/tests/Makefile b/usr/src/test/libc-tests/tests/Makefile index 9ea35b5525..63f108e83c 100644 --- a/usr/src/test/libc-tests/tests/Makefile +++ b/usr/src/test/libc-tests/tests/Makefile @@ -18,6 +18,7 @@ SUBDIRS = \ catopen \ fpround \ + i18n \ newlocale \ nl_langinfo \ priv_gettext \ diff --git a/usr/src/test/libc-tests/tests/i18n/Makefile b/usr/src/test/libc-tests/tests/i18n/Makefile new file mode 100644 index 0000000000..56410d23a3 --- /dev/null +++ b/usr/src/test/libc-tests/tests/i18n/Makefile @@ -0,0 +1,25 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2020 Richard Hansen <rhansen@rhansen.org> +# + +include $(SRC)/Makefile.master + +TESTSUBDIR = i18n +PROG = bindtextdomain_test +ARCHPROG = bindtextdomain_test + +include ../Makefile.com + +LDLIBS += -lumem +LDLIBS64 += -lumem diff --git a/usr/src/test/libc-tests/tests/i18n/bindtextdomain_test.c b/usr/src/test/libc-tests/tests/i18n/bindtextdomain_test.c new file mode 100644 index 0000000000..bb608e0328 --- /dev/null +++ b/usr/src/test/libc-tests/tests/i18n/bindtextdomain_test.c @@ -0,0 +1,143 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2020 Richard Hansen <rhansen@rhansen.org> + */ + +#include <errno.h> +#include <libintl.h> +#include <stdbool.h> +#include <stdio.h> +#include <string.h> +#include <sys/sysmacros.h> +#include <umem.h> +#include <unistd.h> +#include "test_common.h" + +const char * +_umem_debug_init(void) +{ + return ("default"); +} + +int +main(int argc, char *argv[]) +{ + int ret = 0; + int optc; + while ((optc = getopt(argc, argv, "df")) != -1) { + switch (optc) { + case 'd': + test_set_debug(); + break; + case 'f': + test_set_force(); + break; + default: + (void) fprintf(stderr, "Usage: %s [-df]\n", argv[0]); + exit(1); + } + } + + struct { + const char *name; + const char *dir; + bool malloc_fail; + const char *want; + int want_errno; + } test_cases[] = { + { + .name = "unbound query", + .dir = NULL, + .want = "/usr/lib/locale/", + }, + { + .name = "bind malloc fail", + .dir = "/bounddir1", + .malloc_fail = true, + .want = NULL, + .want_errno = EAGAIN, + }, + { + .name = "query after bind malloc fail", + .dir = NULL, + .want = "/usr/lib/locale/", + }, + { + .name = "normal bind", + .dir = "/bounddir2", + .want = "/bounddir2", + }, + { + .name = "query after normal bind", + .dir = NULL, + .want = "/bounddir2", + }, + { + .name = "rebind to same", + .dir = "/bounddir2", + .want = "/bounddir2", + }, + { + .name = "query after rebind to same", + .dir = NULL, + .want = "/bounddir2", + }, + { + .name = "rebind to new", + .dir = "/bounddir3", + .want = "/bounddir3", + }, + { + .name = "query after rebind to new", + .dir = NULL, + .want = "/bounddir3", + }, + { + .name = "rebind malloc fail", + .dir = "/bounddir4", + .malloc_fail = true, + .want = NULL, + .want_errno = EAGAIN, + }, + { + .name = "query after rebind malloc fail", + .dir = NULL, + .want = "/bounddir3", + }, + }, *tc; + + for (size_t i = 0; i < ARRAY_SIZE(test_cases); ++i) { + tc = &test_cases[i]; + test_t t = test_start(tc->name); + umem_setmtbf((uint_t)tc->malloc_fail); + errno = 0; + const char *got = bindtextdomain("domain", tc->dir); + int got_errno = errno; + umem_setmtbf(0); + if (((got == NULL) != (tc->want == NULL)) || + ((got != NULL) && strcmp(got, tc->want))) { + test_failed(t, "returned %s, want %s", + got != NULL ? got : "<NULL>", + tc->want != NULL ? tc->want : "<NULL>"); + ret = 1; + } + if (got_errno != tc->want_errno) { + test_failed(t, "got errno %d, want %d", + got_errno, tc->want_errno); + ret = 1; + } + test_passed(t); + } + test_summary(); + return (ret); +} diff --git a/usr/src/tools/scripts/cstyle.pl b/usr/src/tools/scripts/cstyle.pl index 5c474cfe28..e4d3694f3b 100644 --- a/usr/src/tools/scripts/cstyle.pl +++ b/usr/src/tools/scripts/cstyle.pl @@ -21,6 +21,7 @@ # # Copyright 2015 Toomas Soome <tsoome@me.com> # Copyright 2016 Nexenta Systems, Inc. +# Copyright 2020 Oxide Computer Company # # Copyright 2008 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. @@ -121,6 +122,16 @@ if ($doxygen_comments) { $hdr_comment_start = qr/^\s*\/\*$/; } +# FreeBSD uses comments styled as such for their license headers: +# /*- +# * SPDX-License-Identifier: BSD-2-Clause-FreeBSD +# * +# ... +# +# In order to apply other cstyle checks to those files without stumbling over +# the license header, tolerate such comment openings as well. +my $fbsd_comment_start = qr/^\s*\/\*-$/; + # Note, following must be in single quotes so that \s and \w work right. my $typename = '(int|char|short|long|unsigned|float|double' . '|\w+_t|struct\s+\w+|union\s+\w+|FILE)'; @@ -463,7 +474,7 @@ line: while (<$filehandle>) { $comment_done = 0; } # does this looks like the start of a block comment? - if (/$hdr_comment_start/) { + if (/$hdr_comment_start/ || /$fbsd_comment_start/) { if (!/^\t*\/\*/) { err("block comment not indented by tabs"); } diff --git a/usr/src/uts/common/fs/zfs/zio.c b/usr/src/uts/common/fs/zfs/zio.c index b02363e7eb..5215a58bf2 100644 --- a/usr/src/uts/common/fs/zfs/zio.c +++ b/usr/src/uts/common/fs/zfs/zio.c @@ -26,6 +26,7 @@ * Copyright (c) 2014 Integros [integros.com] * Copyright (c) 2019 Joyent, Inc. * Copyright (c) 2017, Intel Corporation. + * Copyright 2020 Joyent, Inc. */ #include <sys/sysmacros.h> @@ -1990,6 +1991,10 @@ zio_suspend(spa_t *spa, zio_t *zio, zio_suspend_reason_t reason) "failure and the failure mode property for this pool " "is set to panic.", spa_name(spa)); + cmn_err(CE_WARN, "Pool '%s' has encountered an uncorrectable I/O " + "failure and has been suspended; `zpool clear` will be required " + "before the pool can be written to.", spa_name(spa)); + zfs_ereport_post(FM_EREPORT_ZFS_IO_FAILURE, spa, NULL, NULL, NULL, 0, 0); diff --git a/usr/src/uts/common/io/i40e/core/i40e_common.c b/usr/src/uts/common/io/i40e/core/i40e_common.c index fd32e0204c..f750bf69ce 100644 --- a/usr/src/uts/common/io/i40e/core/i40e_common.c +++ b/usr/src/uts/common/io/i40e/core/i40e_common.c @@ -3885,10 +3885,17 @@ static void i40e_parse_discover_capabilities(struct i40e_hw *hw, void *buff, /* Always disable FCoE if compiled without the I40E_FCOE_ENA flag */ p->fcoe = FALSE; + valid_functions = p->valid_functions; + num_functions = 0; + while (valid_functions) { + if (valid_functions & 1) + num_functions++; + valid_functions >>= 1; + } + /* count the enabled ports (aka the "not disabled" ports) */ hw->num_ports = 0; for (i = 0; i < 4; i++) { - enum i40e_status_code status; u32 port_cfg_reg = I40E_PRTGEN_STATUS + (4 * i); u64 port_cfg = 0; @@ -3907,6 +3914,16 @@ static void i40e_parse_discover_capabilities(struct i40e_hw *hw, void *buff, * Those cards have 4 PFs at minimum, so using PRTGEN_CNF for counting * physical ports results in wrong partition id calculation and thus * not supporting WoL. + * + * Porting note: the above comment is no longer directly relevant: we + * read PRTGEN_STATUS instead now, as PRTGEN_CNF was not reliable for + * these parts. In addition, the claim about having 4 PFs is not + * correct. For example, an X557-T2 is a dual port mezz card. Forcing + * ports to four here will cause ->num_partitions to be zero. + * + * On the presumption that the hard-coded value is meaningful in some + * cases, though, we'll take the minimal approach of ensuring that we + * never have more ports than functions. */ if (hw->mac.type == I40E_MAC_X722) { if (i40e_acquire_nvm(hw, I40E_RESOURCE_READ) == I40E_SUCCESS) { @@ -3914,21 +3931,25 @@ static void i40e_parse_discover_capabilities(struct i40e_hw *hw, void *buff, 2 * I40E_SR_OCP_CFG_WORD0, sizeof(ocp_cfg_word0), &ocp_cfg_word0, TRUE, NULL); +#ifdef __sun__ + if (status == I40E_SUCCESS && + (ocp_cfg_word0 & I40E_SR_OCP_ENABLED)) { + hw->num_ports = 4; + if (hw->num_ports > num_functions) { + hw->num_ports = num_functions; + DEBUGOUT1("clamped 4 OCP ports to %d\n", + (int)hw->num_ports); + } + } +#else if (status == I40E_SUCCESS && (ocp_cfg_word0 & I40E_SR_OCP_ENABLED)) hw->num_ports = 4; +#endif i40e_release_nvm(hw); } } - valid_functions = p->valid_functions; - num_functions = 0; - while (valid_functions) { - if (valid_functions & 1) - num_functions++; - valid_functions >>= 1; - } - /* partition id is 1-based, and functions are evenly spread * across the ports as partitions */ @@ -3937,6 +3958,8 @@ static void i40e_parse_discover_capabilities(struct i40e_hw *hw, void *buff, hw->num_partitions = num_functions / hw->num_ports; } + VERIFY(hw->num_partitions > 0); + /* additional HW specific goodies that might * someday be HW version specific */ diff --git a/usr/src/uts/i86pc/io/vmm/amd/amdvi_hw.c b/usr/src/uts/i86pc/io/vmm/amd/amdvi_hw.c index 9cf9200b3d..a01b06446d 100644 --- a/usr/src/uts/i86pc/io/vmm/amd/amdvi_hw.c +++ b/usr/src/uts/i86pc/io/vmm/amd/amdvi_hw.c @@ -1003,8 +1003,8 @@ amdvi_teardown_hw(struct amdvi_softc *softc) dev = softc->dev; - /* - * Called after disable, h/w is stopped by now, free all the resources. + /* + * Called after disable, h/w is stopped by now, free all the resources. */ amdvi_free_evt_intr_res(dev); @@ -1026,7 +1026,7 @@ amdvi_init(void) } if (!amdvi_enable_user && ivhd_count) { printf("bhyve: Found %d AMD-Vi/IOMMU device(s), " - "use hw.vmm.amdvi.enable=1 to enable pass-through.\n", + "use hw.vmm.amdvi.enable=1 to enable pass-through.\n", ivhd_count); return (EINVAL); } @@ -1304,7 +1304,7 @@ amdvi_set_dte(struct amdvi_domain *domain, uint16_t devid, bool enable) struct amdvi_dte* temp; KASSERT(domain, ("domain is NULL for pci_rid:0x%x\n", devid)); - + softc = amdvi_find_iommu(devid); KASSERT(softc, ("softc is NULL for pci_rid:0x%x\n", devid)); @@ -1397,11 +1397,11 @@ amdvi_enable(void) ctrl = softc->ctrl; KASSERT(ctrl, ("ctrl is NULL\n")); - val = ( AMDVI_CTRL_EN | - AMDVI_CTRL_CMD | - AMDVI_CTRL_ELOG | - AMDVI_CTRL_ELOGINT | - AMDVI_CTRL_INV_TO_1S); + val = ( AMDVI_CTRL_EN | + AMDVI_CTRL_CMD | + AMDVI_CTRL_ELOG | + AMDVI_CTRL_ELOGINT | + AMDVI_CTRL_INV_TO_1S); if (softc->ivhd_flag & IVHD_FLAG_COH) val |= AMDVI_CTRL_COH; diff --git a/usr/src/uts/i86pc/io/vmm/amd/amdvi_priv.h b/usr/src/uts/i86pc/io/vmm/amd/amdvi_priv.h index 2db6914f08..5d47142a72 100644 --- a/usr/src/uts/i86pc/io/vmm/amd/amdvi_priv.h +++ b/usr/src/uts/i86pc/io/vmm/amd/amdvi_priv.h @@ -35,7 +35,7 @@ #define BIT(n) (1ULL << (n)) /* Return value of bits[n:m] where n and (n >= ) m are bit positions. */ -#define REG_BITS(x, n, m) (((x) >> (m)) & \ +#define REG_BITS(x, n, m) (((x) >> (m)) & \ ((1 << (((n) - (m)) + 1)) - 1)) /* @@ -111,10 +111,10 @@ CTASSERT(sizeof(struct amdvi_dte) == 32); * IOMMU command entry. */ struct amdvi_cmd { - uint32_t word0; - uint32_t word1:28; + uint32_t word0; + uint32_t word1:28; uint8_t opcode:4; - uint64_t addr; + uint64_t addr; } __attribute__((__packed__)); /* Command opcodes. */ @@ -150,12 +150,12 @@ struct amdvi_cmd { * IOMMU event entry. */ struct amdvi_event { - uint16_t devid; - uint16_t pasid_hi; - uint16_t pasid_domid; /* PASID low or DomainID */ - uint16_t flag:12; + uint16_t devid; + uint16_t pasid_hi; + uint16_t pasid_domid; /* PASID low or DomainID */ + uint16_t flag:12; uint8_t opcode:4; - uint64_t addr; + uint64_t addr; } __attribute__((__packed__)); CTASSERT(sizeof(struct amdvi_event) == 16); @@ -210,8 +210,8 @@ struct amdvi_ctrl { uint64_t limit:40; uint16_t :12; } excl; - /* - * Revision 2 only. + /* + * Revision 2 only. */ uint64_t ex_feature; struct { @@ -252,8 +252,8 @@ CTASSERT(offsetof(struct amdvi_ctrl, pad2)== 0x2028); CTASSERT(offsetof(struct amdvi_ctrl, pad3)== 0x2040); #define AMDVI_MMIO_V1_SIZE (4 * PAGE_SIZE) /* v1 size */ -/* - * AMF IOMMU v2 size including event counters +/* + * AMF IOMMU v2 size including event counters */ #define AMDVI_MMIO_V2_SIZE (8 * PAGE_SIZE) @@ -374,38 +374,38 @@ enum IvrsType */ struct amdvi_softc { struct amdvi_ctrl *ctrl; /* Control area. */ - device_t dev; /* IOMMU device. */ + device_t dev; /* IOMMU device. */ enum IvrsType ivhd_type; /* IOMMU IVHD type. */ bool iotlb; /* IOTLB supported by IOMMU */ struct amdvi_cmd *cmd; /* Command descriptor area. */ - int cmd_max; /* Max number of commands. */ + int cmd_max; /* Max number of commands. */ uint64_t cmp_data; /* Command completion write back. */ struct amdvi_event *event; /* Event descriptor area. */ struct resource *event_res; /* Event interrupt resource. */ - void *event_tag; /* Event interrupt tag. */ + void *event_tag; /* Event interrupt tag. */ int event_max; /* Max number of events. */ int event_irq; int event_rid; /* ACPI various flags. */ - uint32_t ivhd_flag; /* ACPI IVHD flag. */ - uint32_t ivhd_feature; /* ACPI v1 Reserved or v2 attribute. */ - uint64_t ext_feature; /* IVHD EFR */ + uint32_t ivhd_flag; /* ACPI IVHD flag. */ + uint32_t ivhd_feature; /* ACPI v1 Reserved or v2 attribute. */ + uint64_t ext_feature; /* IVHD EFR */ /* PCI related. */ - uint16_t cap_off; /* PCI Capability offset. */ + uint16_t cap_off; /* PCI Capability offset. */ uint8_t pci_cap; /* PCI capability. */ - uint16_t pci_seg; /* IOMMU PCI domain/segment. */ - uint16_t pci_rid; /* PCI BDF of IOMMU */ + uint16_t pci_seg; /* IOMMU PCI domain/segment. */ + uint16_t pci_rid; /* PCI BDF of IOMMU */ /* Device range under this IOMMU. */ - uint16_t start_dev_rid; /* First device under this IOMMU. */ - uint16_t end_dev_rid; /* Last device under this IOMMU. */ + uint16_t start_dev_rid; /* First device under this IOMMU. */ + uint16_t end_dev_rid; /* Last device under this IOMMU. */ /* BIOS provided device configuration for end points. */ - struct ivhd_dev_cfg dev_cfg[10]; + struct ivhd_dev_cfg dev_cfg[10]; int dev_cfg_cnt; /* Software statistics. */ - uint64_t event_intr_cnt; /* Total event INTR count. */ - uint64_t total_cmd; /* Total number of commands. */ + uint64_t event_intr_cnt; /* Total event INTR count. */ + uint64_t total_cmd; /* Total number of commands. */ }; int amdvi_setup_hw(struct amdvi_softc *softc); diff --git a/usr/src/uts/i86pc/io/vmm/amd/ivrs_drv.c b/usr/src/uts/i86pc/io/vmm/amd/ivrs_drv.c index b754058c07..11925582ef 100644 --- a/usr/src/uts/i86pc/io/vmm/amd/ivrs_drv.c +++ b/usr/src/uts/i86pc/io/vmm/amd/ivrs_drv.c @@ -50,11 +50,11 @@ __FBSDID("$FreeBSD$"); device_t *ivhd_devs; /* IVHD or AMD-Vi device list. */ int ivhd_count; /* Number of IVHD header. */ -/* +/* * Cached IVHD header list. * Single entry for each IVHD, filtered the legacy one. */ -ACPI_IVRS_HARDWARE1 *ivhd_hdrs[10]; +ACPI_IVRS_HARDWARE1 *ivhd_hdrs[10]; extern int amdvi_ptp_level; /* Page table levels. */ @@ -218,7 +218,7 @@ ivhd_dev_parse(ACPI_IVRS_HARDWARE1 *ivhd, struct amdvi_softc *softc) break; default: - device_printf(softc->dev, + device_printf(softc->dev, "unknown type: 0x%x\n", ivhd->Header.Type); return (-1); } @@ -346,7 +346,7 @@ ivhd_identify(driver_t *driver, device_t parent) ivrs_ivinfo = ivrs->Info; printf("AMD-Vi: IVRS Info VAsize = %d PAsize = %d GVAsize = %d" " flags:%b\n", - REG_BITS(ivrs_ivinfo, 21, 15), REG_BITS(ivrs_ivinfo, 14, 8), + REG_BITS(ivrs_ivinfo, 21, 15), REG_BITS(ivrs_ivinfo, 14, 8), REG_BITS(ivrs_ivinfo, 7, 5), REG_BITS(ivrs_ivinfo, 22, 22), "\020\001EFRSup"); @@ -360,17 +360,17 @@ ivhd_identify(driver_t *driver, device_t parent) ivhd_hdrs[i] = ivhd; } - /* + /* * Scan for presence of legacy and non-legacy device type * for same AMD-Vi device and override the old one. */ for (i = ivhd_count - 1 ; i > 0 ; i--){ - if (ivhd_is_newer(&ivhd_hdrs[i-1]->Header, + if (ivhd_is_newer(&ivhd_hdrs[i-1]->Header, &ivhd_hdrs[i]->Header)) { ivhd_hdrs[i-1] = ivhd_hdrs[i]; ivhd_count--; } - } + } ivhd_devs = malloc(sizeof(device_t) * ivhd_count, M_DEVBUF, M_WAITOK | M_ZERO); @@ -415,7 +415,7 @@ ivhd_probe(device_t dev) return (ENXIO); unit = device_get_unit(dev); - KASSERT((unit < ivhd_count), + KASSERT((unit < ivhd_count), ("ivhd unit %d > count %d", unit, ivhd_count)); ivhd = ivhd_hdrs[unit]; KASSERT(ivhd, ("ivhd is NULL")); @@ -424,7 +424,7 @@ ivhd_probe(device_t dev) case IVRS_TYPE_HARDWARE_EFR: device_set_desc(dev, "AMD-Vi/IOMMU ivhd with EFR"); break; - + case IVRS_TYPE_HARDWARE_MIXED: device_set_desc(dev, "AMD-Vi/IOMMU ivhd in mixed format"); break; @@ -482,7 +482,7 @@ ivhd_print_flag(device_t dev, enum IvrsType ivhd_type, uint8_t flag) * Feature in legacy IVHD type(0x10) and attribute in newer type(0x11 and 0x40). */ static void -ivhd_print_feature(device_t dev, enum IvrsType ivhd_type, uint32_t feature) +ivhd_print_feature(device_t dev, enum IvrsType ivhd_type, uint32_t feature) { switch (ivhd_type) { case IVRS_TYPE_HARDWARE_LEGACY: @@ -576,9 +576,9 @@ ivhd_print_ext_feature(device_t dev, uint64_t ext_feature) "\020AttrFWSup" "\021HDSup" "\023InvIotlbSup", - REG_BITS(ext_high, 5, 0), - REG_BITS(ext_high, 8, 7), - REG_BITS(ext_high, 11, 10)); + REG_BITS(ext_high, 5, 0), + REG_BITS(ext_high, 8, 7), + REG_BITS(ext_high, 11, 10)); } static int @@ -588,7 +588,7 @@ ivhd_print_cap(struct amdvi_softc *softc, ACPI_IVRS_HARDWARE1 * ivhd) int max_ptp_level; dev = softc->dev; - + ivhd_print_flag(dev, softc->ivhd_type, softc->ivhd_flag); ivhd_print_feature(dev, softc->ivhd_type, softc->ivhd_feature); ivhd_print_ext_feature(dev, softc->ext_feature); @@ -600,7 +600,7 @@ ivhd_print_cap(struct amdvi_softc *softc, ACPI_IVRS_HARDWARE1 * ivhd) return (EINVAL); } else { device_printf(softc->dev, "supported paging level:%d, will use only: %d\n", - max_ptp_level, amdvi_ptp_level); + max_ptp_level, amdvi_ptp_level); } device_printf(softc->dev, "device range: 0x%x - 0x%x\n", @@ -618,7 +618,7 @@ ivhd_attach(device_t dev) int status, unit; unit = device_get_unit(dev); - KASSERT((unit < ivhd_count), + KASSERT((unit < ivhd_count), ("ivhd unit %d > count %d", unit, ivhd_count)); /* Make sure its same device for which attach is called. */ KASSERT((ivhd_devs[unit] == dev), @@ -633,12 +633,12 @@ ivhd_attach(device_t dev) softc->pci_seg = ivhd->PciSegmentGroup; softc->pci_rid = ivhd->Header.DeviceId; softc->ivhd_flag = ivhd->Header.Flags; - /* + /* * On lgeacy IVHD type(0x10), it is documented as feature * but in newer type it is attribute. */ softc->ivhd_feature = ivhd->FeatureReporting; - /* + /* * PCI capability has more capabilities that are not part of IVRS. */ softc->cap_off = ivhd->CapabilityOffset; @@ -669,7 +669,7 @@ ivhd_attach(device_t dev) status = amdvi_setup_hw(softc); if (status != 0) { - device_printf(dev, "couldn't be initialised, error=%d\n", + device_printf(dev, "couldn't be initialised, error=%d\n", status); return (status); } diff --git a/usr/src/uts/i86pc/io/vmm/amd/npt.c b/usr/src/uts/i86pc/io/vmm/amd/npt.c index 862f6a0ecf..3f143a5d8f 100644 --- a/usr/src/uts/i86pc/io/vmm/amd/npt.c +++ b/usr/src/uts/i86pc/io/vmm/amd/npt.c @@ -61,28 +61,25 @@ svm_npt_init(int ipinum) npt_flags = ipinum & NPT_IPIMASK; TUNABLE_INT_FETCH("hw.vmm.npt.enable_superpage", &enable_superpage); if (enable_superpage) - npt_flags |= PMAP_PDE_SUPERPAGE; - + npt_flags |= PMAP_PDE_SUPERPAGE; + return (0); } static int npt_pinit(pmap_t pmap) { - return (pmap_pinit_type(pmap, PT_RVI, npt_flags)); } struct vmspace * svm_npt_alloc(vm_offset_t min, vm_offset_t max) { - return (vmspace_alloc(min, max, npt_pinit)); } void svm_npt_free(struct vmspace *vmspace) { - vmspace_free(vmspace); } diff --git a/usr/src/uts/i86pc/io/vmm/amd/npt.h b/usr/src/uts/i86pc/io/vmm/amd/npt.h index 35530d7833..d90a1b14b2 100644 --- a/usr/src/uts/i86pc/io/vmm/amd/npt.h +++ b/usr/src/uts/i86pc/io/vmm/amd/npt.h @@ -31,7 +31,7 @@ #ifndef _SVM_NPT_H_ #define _SVM_NPT_H_ -int svm_npt_init(int ipinum); +int svm_npt_init(int ipinum); struct vmspace *svm_npt_alloc(vm_offset_t min, vm_offset_t max); void svm_npt_free(struct vmspace *vmspace); diff --git a/usr/src/uts/i86pc/io/vmm/amd/svm.c b/usr/src/uts/i86pc/io/vmm/amd/svm.c index 615d3cd029..1046a54126 100644 --- a/usr/src/uts/i86pc/io/vmm/amd/svm.c +++ b/usr/src/uts/i86pc/io/vmm/amd/svm.c @@ -68,7 +68,7 @@ __FBSDID("$FreeBSD$"); #include <machine/smp.h> #include <machine/vmm.h> #include <machine/vmm_dev.h> -#include <machine/vmm_instruction_emul.h> +#include <sys/vmm_instruction_emul.h> #include "vmm_lapic.h" #include "vmm_stat.h" @@ -104,7 +104,7 @@ SYSCTL_NODE(_hw_vmm, OID_AUTO, svm, CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, #define AMD_CPUID_SVM_PAUSE_FTH BIT(12) /* Pause filter threshold */ #define AMD_CPUID_SVM_AVIC BIT(13) /* AVIC present */ -#define VMCB_CACHE_DEFAULT (VMCB_CACHE_ASID | \ +#define VMCB_CACHE_DEFAULT (VMCB_CACHE_ASID | \ VMCB_CACHE_IOPM | \ VMCB_CACHE_I | \ VMCB_CACHE_TPR | \ @@ -139,7 +139,7 @@ SYSCTL_UINT(_hw_vmm_svm, OID_AUTO, num_asids, CTLFLAG_RDTUN, &nasid, 0, /* Current ASID generation for each host cpu */ static struct asid asid[MAXCPU]; -/* +/* * SVM host state saved area of size 4KB for each core. */ static uint8_t hsave[MAXCPU][PAGE_SIZE] __aligned(PAGE_SIZE); @@ -154,14 +154,12 @@ static int svm_setreg(void *arg, int vcpu, int ident, uint64_t val); static __inline int flush_by_asid(void) { - return (svm_feature & AMD_CPUID_SVM_FLUSH_BY_ASID); } static __inline int decode_assist(void) { - return (svm_feature & AMD_CPUID_SVM_DECODE_ASSIST); } @@ -307,7 +305,7 @@ svm_restore(void) { svm_enable(NULL); -} +} #else /* __FreeBSD__ */ static int svm_cleanup(void) @@ -335,14 +333,14 @@ svm_restore(void) #endif /* __FreeBSD__ */ /* Pentium compatible MSRs */ -#define MSR_PENTIUM_START 0 -#define MSR_PENTIUM_END 0x1FFF +#define MSR_PENTIUM_START 0 +#define MSR_PENTIUM_END 0x1FFF /* AMD 6th generation and Intel compatible MSRs */ -#define MSR_AMD6TH_START 0xC0000000UL -#define MSR_AMD6TH_END 0xC0001FFFUL +#define MSR_AMD6TH_START 0xC0000000UL +#define MSR_AMD6TH_END 0xC0001FFFUL /* AMD 7th and 8th generation compatible MSRs */ -#define MSR_AMD7TH_START 0xC0010000UL -#define MSR_AMD7TH_END 0xC0011FFFUL +#define MSR_AMD7TH_START 0xC0010000UL +#define MSR_AMD7TH_END 0xC0011FFFUL /* * Get the index and bit position for a MSR in permission bitmap. @@ -362,12 +360,12 @@ svm_msr_index(uint64_t msr, int *index, int *bit) return (0); } - base += (MSR_PENTIUM_END - MSR_PENTIUM_START + 1); + base += (MSR_PENTIUM_END - MSR_PENTIUM_START + 1); if (msr >= MSR_AMD6TH_START && msr <= MSR_AMD6TH_END) { - off = (msr - MSR_AMD6TH_START); + off = (msr - MSR_AMD6TH_START); *index = (off + base) / 4; return (0); - } + } base += (MSR_AMD6TH_END - MSR_AMD6TH_START + 1); if (msr >= MSR_AMD7TH_START && msr <= MSR_AMD7TH_END) { @@ -717,61 +715,6 @@ svm_paging_mode(uint64_t cr0, uint64_t cr4, uint64_t efer) /* * ins/outs utility routines */ -static uint64_t -svm_inout_str_index(struct svm_regctx *regs, int in) -{ - uint64_t val; - - val = in ? regs->sctx_rdi : regs->sctx_rsi; - - return (val); -} - -static uint64_t -svm_inout_str_count(struct svm_regctx *regs, int rep) -{ - uint64_t val; - - val = rep ? regs->sctx_rcx : 1; - - return (val); -} - -static void -svm_inout_str_seginfo(struct svm_softc *svm_sc, int vcpu, int64_t info1, - int in, struct vm_inout_str *vis) -{ - int error, s; - - if (in) { - vis->seg_name = VM_REG_GUEST_ES; - } else { - /* The segment field has standard encoding */ - s = (info1 >> 10) & 0x7; - vis->seg_name = vm_segment_name(s); - } - - error = vmcb_getdesc(svm_sc, vcpu, vis->seg_name, &vis->seg_desc); - KASSERT(error == 0, ("%s: svm_getdesc error %d", __func__, error)); -} - -static int -svm_inout_str_addrsize(uint64_t info1) -{ - uint32_t size; - - size = (info1 >> 7) & 0x7; - switch (size) { - case 1: - return (2); /* 16 bit */ - case 2: - return (4); /* 32 bit */ - case 4: - return (8); /* 64 bit */ - default: - panic("%s: invalid size encoding %d", __func__, size); - } -} static void svm_paging_info(struct vmcb *vmcb, struct vm_guest_paging *paging) @@ -792,53 +735,78 @@ svm_paging_info(struct vmcb *vmcb, struct vm_guest_paging *paging) * Handle guest I/O intercept. */ static int -svm_handle_io(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit) +svm_handle_inout(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit) { struct vmcb_ctrl *ctrl; struct vmcb_state *state; - struct svm_regctx *regs; - struct vm_inout_str *vis; + struct vm_inout *inout; + struct vie *vie; uint64_t info1; - int inout_string; + struct vm_guest_paging paging; state = svm_get_vmcb_state(svm_sc, vcpu); - ctrl = svm_get_vmcb_ctrl(svm_sc, vcpu); - regs = svm_get_guest_regctx(svm_sc, vcpu); - + ctrl = svm_get_vmcb_ctrl(svm_sc, vcpu); + inout = &vmexit->u.inout; info1 = ctrl->exitinfo1; - inout_string = info1 & BIT(2) ? 1 : 0; - /* - * The effective segment number in EXITINFO1[12:10] is populated - * only if the processor has the DecodeAssist capability. - * - * XXX this is not specified explicitly in APMv2 but can be verified - * empirically. - */ - if (inout_string && !decode_assist()) - return (UNHANDLED); - - vmexit->exitcode = VM_EXITCODE_INOUT; - vmexit->u.inout.in = (info1 & BIT(0)) ? 1 : 0; - vmexit->u.inout.string = inout_string; - vmexit->u.inout.rep = (info1 & BIT(3)) ? 1 : 0; - vmexit->u.inout.bytes = (info1 >> 4) & 0x7; - vmexit->u.inout.port = (uint16_t)(info1 >> 16); - vmexit->u.inout.eax = (uint32_t)(state->rax); - - if (inout_string) { - vmexit->exitcode = VM_EXITCODE_INOUT_STR; - vis = &vmexit->u.inout_str; - svm_paging_info(svm_get_vmcb(svm_sc, vcpu), &vis->paging); - vis->rflags = state->rflags; - vis->cr0 = state->cr0; - vis->index = svm_inout_str_index(regs, vmexit->u.inout.in); - vis->count = svm_inout_str_count(regs, vmexit->u.inout.rep); - vis->addrsize = svm_inout_str_addrsize(info1); - svm_inout_str_seginfo(svm_sc, vcpu, info1, - vmexit->u.inout.in, vis); + inout->bytes = (info1 >> 4) & 0x7; + inout->flags = 0; + inout->flags |= (info1 & BIT(0)) ? INOUT_IN : 0; + inout->flags |= (info1 & BIT(3)) ? INOUT_REP : 0; + inout->flags |= (info1 & BIT(2)) ? INOUT_STR : 0; + inout->port = (uint16_t)(info1 >> 16); + inout->eax = (uint32_t)(state->rax); + + if ((inout->flags & INOUT_STR) != 0) { + /* + * The effective segment number in EXITINFO1[12:10] is populated + * only if the processor has the DecodeAssist capability. + * + * This is not specified explicitly in APMv2 but can be verified + * empirically. + */ + if (!decode_assist()) { + /* + * Without decoding assistance, force the task of + * emulating the ins/outs on userspace. + */ + vmexit->exitcode = VM_EXITCODE_INST_EMUL; + bzero(&vmexit->u.inst_emul, + sizeof (vmexit->u.inst_emul)); + return (UNHANDLED); + } + + /* + * Bits 7-9 encode the address size of ins/outs operations where + * the 1/2/4 values correspond to 16/32/64 bit sizes. + */ + inout->addrsize = 2 * ((info1 >> 7) & 0x7); + VERIFY(inout->addrsize == 2 || inout->addrsize == 4 || + inout->addrsize == 8); + + if (inout->flags & INOUT_IN) { + /* + * For INS instructions, %es (encoded as 0) is the + * implied segment for the operation. + */ + inout->segment = 0; + } else { + /* + * Bits 10-12 encode the segment for OUTS. + * This value follows the standard x86 segment order. + */ + inout->segment = (info1 >> 10) & 0x7; + } } + vmexit->exitcode = VM_EXITCODE_INOUT; + svm_paging_info(svm_get_vmcb(svm_sc, vcpu), &paging); + vie = vm_vie_ctx(svm_sc->vm, vcpu); + vie_init_inout(vie, inout, vmexit->inst_length, &paging); + + /* The in/out emulation will handle advancing %rip */ + vmexit->inst_length = 0; + return (UNHANDLED); } @@ -857,7 +825,6 @@ npf_fault_type(uint64_t exitinfo1) static bool svm_npf_emul_fault(uint64_t exitinfo1) { - if (exitinfo1 & VMCB_NPF_INFO1_ID) { return (false); } @@ -870,48 +837,52 @@ svm_npf_emul_fault(uint64_t exitinfo1) return (false); } - return (true); + return (true); } static void -svm_handle_inst_emul(struct vmcb *vmcb, uint64_t gpa, struct vm_exit *vmexit) +svm_handle_mmio_emul(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit, + uint64_t gpa) { - struct vm_guest_paging *paging; - struct vmcb_segment seg; struct vmcb_ctrl *ctrl; - char *inst_bytes; - int error, inst_len; + struct vmcb *vmcb; + struct vie *vie; + struct vm_guest_paging paging; + struct vmcb_segment seg; + char *inst_bytes = NULL; + uint8_t inst_len = 0; + int error; + vmcb = svm_get_vmcb(svm_sc, vcpu); ctrl = &vmcb->ctrl; - paging = &vmexit->u.inst_emul.paging; - vmexit->exitcode = VM_EXITCODE_INST_EMUL; - vmexit->u.inst_emul.gpa = gpa; - vmexit->u.inst_emul.gla = VIE_INVALID_GLA; - svm_paging_info(vmcb, paging); + vmexit->exitcode = VM_EXITCODE_MMIO_EMUL; + vmexit->u.mmio_emul.gpa = gpa; + vmexit->u.mmio_emul.gla = VIE_INVALID_GLA; + svm_paging_info(vmcb, &paging); error = vmcb_seg(vmcb, VM_REG_GUEST_CS, &seg); KASSERT(error == 0, ("%s: vmcb_seg(CS) error %d", __func__, error)); - switch(paging->cpu_mode) { + switch (paging.cpu_mode) { case CPU_MODE_REAL: - vmexit->u.inst_emul.cs_base = seg.base; - vmexit->u.inst_emul.cs_d = 0; + vmexit->u.mmio_emul.cs_base = seg.base; + vmexit->u.mmio_emul.cs_d = 0; break; case CPU_MODE_PROTECTED: case CPU_MODE_COMPATIBILITY: - vmexit->u.inst_emul.cs_base = seg.base; + vmexit->u.mmio_emul.cs_base = seg.base; /* * Section 4.8.1 of APM2, Default Operand Size or D bit. */ - vmexit->u.inst_emul.cs_d = (seg.attrib & VMCB_CS_ATTRIB_D) ? + vmexit->u.mmio_emul.cs_d = (seg.attrib & VMCB_CS_ATTRIB_D) ? 1 : 0; break; default: - vmexit->u.inst_emul.cs_base = 0; - vmexit->u.inst_emul.cs_d = 0; - break; + vmexit->u.mmio_emul.cs_base = 0; + vmexit->u.mmio_emul.cs_d = 0; + break; } /* @@ -920,11 +891,9 @@ svm_handle_inst_emul(struct vmcb *vmcb, uint64_t gpa, struct vm_exit *vmexit) if (decode_assist() && !disable_npf_assist) { inst_len = ctrl->inst_len; inst_bytes = (char *)ctrl->inst_bytes; - } else { - inst_len = 0; - inst_bytes = NULL; } - vie_init(&vmexit->u.inst_emul.vie, inst_bytes, inst_len); + vie = vm_vie_ctx(svm_sc->vm, vcpu); + vie_init_mmio(vie, inst_bytes, inst_len, &paging, gpa); } #ifdef KTR @@ -1014,7 +983,7 @@ svm_save_intinfo(struct svm_softc *svm_sc, int vcpu) uint64_t intinfo; ctrl = svm_get_vmcb_ctrl(svm_sc, vcpu); - intinfo = ctrl->exitintinfo; + intinfo = ctrl->exitintinfo; if (!VMCB_EXITINTINFO_VALID(intinfo)) return; @@ -1488,7 +1457,7 @@ svm_vmexit(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit) eax = state->rax; ecx = ctx->sctx_rcx; edx = ctx->sctx_rdx; - retu = false; + retu = false; if (info1) { vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_WRMSR, 1); @@ -1520,7 +1489,7 @@ svm_vmexit(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit) } break; case VMCB_EXIT_IO: - handled = svm_handle_io(svm_sc, vcpu, vmexit); + handled = svm_handle_inout(svm_sc, vcpu, vmexit); vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_INOUT, 1); break; case VMCB_EXIT_CPUID: @@ -1552,9 +1521,9 @@ svm_vmexit(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit) "on gpa %#lx/%#lx at rip %#lx", info2, info1, state->rip); } else if (svm_npf_emul_fault(info1)) { - svm_handle_inst_emul(vmcb, info2, vmexit); - vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_INST_EMUL, 1); - VCPU_CTR3(svm_sc->vm, vcpu, "inst_emul fault " + svm_handle_mmio_emul(svm_sc, vcpu, vmexit, info2); + vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_MMIO_EMUL, 1); + VCPU_CTR3(svm_sc->vm, vcpu, "mmio_emul fault " "for gpa %#lx/%#lx at rip %#lx", info2, info1, state->rip); } @@ -1568,7 +1537,7 @@ svm_vmexit(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit) default: vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_UNKNOWN, 1); break; - } + } VCPU_CTR4(svm_sc->vm, vcpu, "%s %s vmexit at %#lx/%d", handled ? "handled" : "unhandled", exit_reason_to_str(code), @@ -1999,7 +1968,7 @@ svm_dr_leave_guest(struct svm_regctx *gctx) * Start vcpu with specified RIP. */ static int -svm_vmrun(void *arg, int vcpu, register_t rip, pmap_t pmap, +svm_vmrun(void *arg, int vcpu, register_t rip, pmap_t pmap, struct vm_eventinfo *evinfo) { struct svm_regctx *gctx; @@ -2153,7 +2122,7 @@ svm_vmrun(void *arg, int vcpu, register_t rip, pmap_t pmap, /* Restore host LDTR. */ lldt(ldt_sel); - /* #VMEXIT disables interrupts so re-enable them here. */ + /* #VMEXIT disables interrupts so re-enable them here. */ enable_gintr(); /* Update 'nextrip' */ diff --git a/usr/src/uts/i86pc/io/vmm/amd/svm_msr.c b/usr/src/uts/i86pc/io/vmm/amd/svm_msr.c index 75502d3c8e..ea344165dd 100644 --- a/usr/src/uts/i86pc/io/vmm/amd/svm_msr.c +++ b/usr/src/uts/i86pc/io/vmm/amd/svm_msr.c @@ -72,7 +72,7 @@ static uint64_t host_msrs[HOST_MSR_NUM]; void svm_msr_init(void) { - /* + /* * It is safe to cache the values of the following MSRs because they * don't change based on curcpu, curproc or curthread. */ diff --git a/usr/src/uts/i86pc/io/vmm/amd/svm_softc.h b/usr/src/uts/i86pc/io/vmm/amd/svm_softc.h index b5ac1903e7..0b996d0ab4 100644 --- a/usr/src/uts/i86pc/io/vmm/amd/svm_softc.h +++ b/usr/src/uts/i86pc/io/vmm/amd/svm_softc.h @@ -72,9 +72,9 @@ struct svm_vcpu { struct svm_softc { uint8_t apic_page[VM_MAXCPU][PAGE_SIZE]; struct svm_vcpu vcpu[VM_MAXCPU]; - vm_offset_t nptp; /* nested page table */ - uint8_t *iopm_bitmap; /* shared by all vcpus */ - uint8_t *msr_bitmap; /* shared by all vcpus */ + vm_offset_t nptp; /* nested page table */ + uint8_t *iopm_bitmap; /* shared by all vcpus */ + uint8_t *msr_bitmap; /* shared by all vcpus */ struct vm *vm; #ifndef __FreeBSD__ uint64_t host_msrs[VM_MAXCPU][SVM_HOST_MSR_NUM]; diff --git a/usr/src/uts/i86pc/io/vmm/intel/ept.c b/usr/src/uts/i86pc/io/vmm/intel/ept.c index dcc4e3c330..5e5253780e 100644 --- a/usr/src/uts/i86pc/io/vmm/intel/ept.c +++ b/usr/src/uts/i86pc/io/vmm/intel/ept.c @@ -157,7 +157,7 @@ ept_dump(uint64_t *ptp, int nlevels) if (ptpval == 0) continue; - + for (t = 0; t < tabs; t++) printf("\t"); printf("%3d 0x%016lx\n", i, ptpval); diff --git a/usr/src/uts/i86pc/io/vmm/intel/vmcs.c b/usr/src/uts/i86pc/io/vmm/intel/vmcs.c index bb7ee45048..f1a08cc57d 100644 --- a/usr/src/uts/i86pc/io/vmm/intel/vmcs.c +++ b/usr/src/uts/i86pc/io/vmm/intel/vmcs.c @@ -388,7 +388,7 @@ vmcs_init(struct vmcs *vmcs) cr0 = vmm_get_host_cr0(); if ((error = vmwrite(VMCS_HOST_CR0, cr0)) != 0) goto done; - + cr4 = vmm_get_host_cr4() | CR4_VMXE; if ((error = vmwrite(VMCS_HOST_CR4, cr4)) != 0) goto done; diff --git a/usr/src/uts/i86pc/io/vmm/intel/vmx.c b/usr/src/uts/i86pc/io/vmm/intel/vmx.c index 8469c99f33..c46560948e 100644 --- a/usr/src/uts/i86pc/io/vmm/intel/vmx.c +++ b/usr/src/uts/i86pc/io/vmm/intel/vmx.c @@ -77,7 +77,7 @@ __FBSDID("$FreeBSD$"); #include <machine/vmm.h> #include <machine/vmm_dev.h> -#include <machine/vmm_instruction_emul.h> +#include <sys/vmm_instruction_emul.h> #include "vmm_lapic.h" #include "vmm_host.h" #include "vmm_ioport.h" @@ -117,7 +117,7 @@ __FBSDID("$FreeBSD$"); PROCBASED_CR8_STORE_EXITING) #else /* We consider TSC offset a necessity for unsynched TSC handling */ -#define PROCBASED_CTLS_ONE_SETTING \ +#define PROCBASED_CTLS_ONE_SETTING \ (PROCBASED_SECONDARY_CONTROLS | \ PROCBASED_TSC_OFFSET | \ PROCBASED_MWAIT_EXITING | \ @@ -1885,69 +1885,6 @@ vmx_paging_mode(void) return (PAGING_MODE_PAE); } -static uint64_t -inout_str_index(struct vmx *vmx, int vcpuid, int in) -{ - uint64_t val; - int error; - enum vm_reg_name reg; - - reg = in ? VM_REG_GUEST_RDI : VM_REG_GUEST_RSI; - error = vmx_getreg(vmx, vcpuid, reg, &val); - KASSERT(error == 0, ("%s: vmx_getreg error %d", __func__, error)); - return (val); -} - -static uint64_t -inout_str_count(struct vmx *vmx, int vcpuid, int rep) -{ - uint64_t val; - int error; - - if (rep) { - error = vmx_getreg(vmx, vcpuid, VM_REG_GUEST_RCX, &val); - KASSERT(!error, ("%s: vmx_getreg error %d", __func__, error)); - } else { - val = 1; - } - return (val); -} - -static int -inout_str_addrsize(uint32_t inst_info) -{ - uint32_t size; - - size = (inst_info >> 7) & 0x7; - switch (size) { - case 0: - return (2); /* 16 bit */ - case 1: - return (4); /* 32 bit */ - case 2: - return (8); /* 64 bit */ - default: - panic("%s: invalid size encoding %d", __func__, size); - } -} - -static void -inout_str_seginfo(struct vmx *vmx, int vcpuid, uint32_t inst_info, int in, - struct vm_inout_str *vis) -{ - int error, s; - - if (in) { - vis->seg_name = VM_REG_GUEST_ES; - } else { - s = (inst_info >> 15) & 0x7; - vis->seg_name = vm_segment_name(s); - } - - error = vmx_getdesc(vmx, vcpuid, vis->seg_name, &vis->seg_desc); - KASSERT(error == 0, ("%s: vmx_getdesc error %d", __func__, error)); -} - static void vmx_paging_info(struct vm_guest_paging *paging) { @@ -1958,35 +1895,89 @@ vmx_paging_info(struct vm_guest_paging *paging) } static void -vmexit_inst_emul(struct vm_exit *vmexit, uint64_t gpa, uint64_t gla) +vmexit_mmio_emul(struct vm_exit *vmexit, struct vie *vie, uint64_t gpa, + uint64_t gla) { - struct vm_guest_paging *paging; + struct vm_guest_paging paging; uint32_t csar; - paging = &vmexit->u.inst_emul.paging; - - vmexit->exitcode = VM_EXITCODE_INST_EMUL; + vmexit->exitcode = VM_EXITCODE_MMIO_EMUL; vmexit->inst_length = 0; - vmexit->u.inst_emul.gpa = gpa; - vmexit->u.inst_emul.gla = gla; - vmx_paging_info(paging); - switch (paging->cpu_mode) { + vmexit->u.mmio_emul.gpa = gpa; + vmexit->u.mmio_emul.gla = gla; + vmx_paging_info(&paging); + + switch (paging.cpu_mode) { case CPU_MODE_REAL: - vmexit->u.inst_emul.cs_base = vmcs_read(VMCS_GUEST_CS_BASE); - vmexit->u.inst_emul.cs_d = 0; + vmexit->u.mmio_emul.cs_base = vmcs_read(VMCS_GUEST_CS_BASE); + vmexit->u.mmio_emul.cs_d = 0; break; case CPU_MODE_PROTECTED: case CPU_MODE_COMPATIBILITY: - vmexit->u.inst_emul.cs_base = vmcs_read(VMCS_GUEST_CS_BASE); + vmexit->u.mmio_emul.cs_base = vmcs_read(VMCS_GUEST_CS_BASE); csar = vmcs_read(VMCS_GUEST_CS_ACCESS_RIGHTS); - vmexit->u.inst_emul.cs_d = SEG_DESC_DEF32(csar); + vmexit->u.mmio_emul.cs_d = SEG_DESC_DEF32(csar); break; default: - vmexit->u.inst_emul.cs_base = 0; - vmexit->u.inst_emul.cs_d = 0; + vmexit->u.mmio_emul.cs_base = 0; + vmexit->u.mmio_emul.cs_d = 0; break; } - vie_init(&vmexit->u.inst_emul.vie, NULL, 0); + + vie_init_mmio(vie, NULL, 0, &paging, gpa); +} + +static void +vmexit_inout(struct vm_exit *vmexit, struct vie *vie, uint64_t qual, + uint32_t eax) +{ + struct vm_guest_paging paging; + struct vm_inout *inout; + + inout = &vmexit->u.inout; + + inout->bytes = (qual & 0x7) + 1; + inout->flags = 0; + inout->flags |= (qual & 0x8) ? INOUT_IN : 0; + inout->flags |= (qual & 0x10) ? INOUT_STR : 0; + inout->flags |= (qual & 0x20) ? INOUT_REP : 0; + inout->port = (uint16_t)(qual >> 16); + inout->eax = eax; + if (inout->flags & INOUT_STR) { + uint64_t inst_info; + + inst_info = vmcs_read(VMCS_EXIT_INSTRUCTION_INFO); + + /* + * Bits 7-9 encode the address size of ins/outs operations where + * the 0/1/2 values correspond to 16/32/64 bit sizes. + */ + inout->addrsize = 2 << (1 + ((inst_info >> 7) & 0x3)); + VERIFY(inout->addrsize == 2 || inout->addrsize == 4 || + inout->addrsize == 8); + + if (inout->flags & INOUT_IN) { + /* + * The bits describing the segment in INSTRUCTION_INFO + * are not defined for ins, leaving it to system + * software to assume %es (encoded as 0) + */ + inout->segment = 0; + } else { + /* + * Bits 15-17 encode the segment for OUTS. + * This value follows the standard x86 segment order. + */ + inout->segment = (inst_info >> 15) & 0x7; + } + } + + vmexit->exitcode = VM_EXITCODE_INOUT; + vmx_paging_info(&paging); + vie_init_inout(vie, inout, vmexit->inst_length, &paging); + + /* The in/out emulation will handle advancing %rip */ + vmexit->inst_length = 0; } static int @@ -2134,6 +2125,7 @@ vmx_handle_apic_access(struct vmx *vmx, int vcpuid, struct vm_exit *vmexit) { uint64_t qual; int access_type, offset, allowed; + struct vie *vie; if (!apic_access_virtualization(vmx, vcpuid)) return (UNHANDLED); @@ -2180,7 +2172,8 @@ vmx_handle_apic_access(struct vmx *vmx, int vcpuid, struct vm_exit *vmexit) } if (allowed) { - vmexit_inst_emul(vmexit, DEFAULT_APIC_BASE + offset, + vie = vm_vie_ctx(vmx->vm, vcpuid); + vmexit_mmio_emul(vmexit, vie, DEFAULT_APIC_BASE + offset, VIE_INVALID_GLA); } @@ -2262,10 +2255,10 @@ emulate_rdmsr(struct vmx *vmx, int vcpuid, u_int num, bool *retu) static int vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit) { - int error, errcode, errcode_valid, handled, in; + int error, errcode, errcode_valid, handled; struct vmxctx *vmxctx; + struct vie *vie; struct vlapic *vlapic; - struct vm_inout_str *vis; struct vm_task_switch *ts; uint32_t eax, ecx, edx, idtvec_info, idtvec_err, intr_info, inst_info; uint32_t intr_type, intr_vec, reason; @@ -2522,25 +2515,8 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit) return (1); case EXIT_REASON_INOUT: vmm_stat_incr(vmx->vm, vcpu, VMEXIT_INOUT, 1); - vmexit->exitcode = VM_EXITCODE_INOUT; - vmexit->u.inout.bytes = (qual & 0x7) + 1; - vmexit->u.inout.in = in = (qual & 0x8) ? 1 : 0; - vmexit->u.inout.string = (qual & 0x10) ? 1 : 0; - vmexit->u.inout.rep = (qual & 0x20) ? 1 : 0; - vmexit->u.inout.port = (uint16_t)(qual >> 16); - vmexit->u.inout.eax = (uint32_t)(vmxctx->guest_rax); - if (vmexit->u.inout.string) { - inst_info = vmcs_read(VMCS_EXIT_INSTRUCTION_INFO); - vmexit->exitcode = VM_EXITCODE_INOUT_STR; - vis = &vmexit->u.inout_str; - vmx_paging_info(&vis->paging); - vis->rflags = vmcs_read(VMCS_GUEST_RFLAGS); - vis->cr0 = vmcs_read(VMCS_GUEST_CR0); - vis->index = inout_str_index(vmx, vcpu, in); - vis->count = inout_str_count(vmx, vcpu, vis->inout.rep); - vis->addrsize = inout_str_addrsize(inst_info); - inout_str_seginfo(vmx, vcpu, inst_info, in, vis); - } + vie = vm_vie_ctx(vmx->vm, vcpu); + vmexit_inout(vmexit, vie, qual, (uint32_t)vmxctx->guest_rax); SDT_PROBE3(vmm, vmx, exit, inout, vmx, vcpu, vmexit); break; case EXIT_REASON_CPUID: @@ -2651,8 +2627,9 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit) SDT_PROBE5(vmm, vmx, exit, nestedfault, vmx, vcpu, vmexit, gpa, qual); } else if (ept_emulation_fault(qual)) { - vmexit_inst_emul(vmexit, gpa, vmcs_gla()); - vmm_stat_incr(vmx->vm, vcpu, VMEXIT_INST_EMUL, 1); + vie = vm_vie_ctx(vmx->vm, vcpu); + vmexit_mmio_emul(vmexit, vie, gpa, vmcs_gla()); + vmm_stat_incr(vmx->vm, vcpu, VMEXIT_MMIO_EMUL, 1); SDT_PROBE4(vmm, vmx, exit, mmiofault, vmx, vcpu, vmexit, gpa); } diff --git a/usr/src/uts/i86pc/io/vmm/intel/vmx_msr.c b/usr/src/uts/i86pc/io/vmm/intel/vmx_msr.c index 9121e46b40..6c37c9c234 100644 --- a/usr/src/uts/i86pc/io/vmm/intel/vmx_msr.c +++ b/usr/src/uts/i86pc/io/vmm/intel/vmx_msr.c @@ -451,7 +451,7 @@ vmx_wrmsr(struct vmx *vmx, int vcpuid, u_int num, uint64_t val, bool *retu) uint64_t *guest_msrs; uint64_t changed; int error; - + guest_msrs = vmx->guest_msrs[vcpuid]; error = 0; diff --git a/usr/src/uts/i86pc/io/vmm/intel/vtd.c b/usr/src/uts/i86pc/io/vmm/intel/vtd.c index 41c2c5b2f8..50c0934ace 100644 --- a/usr/src/uts/i86pc/io/vmm/intel/vtd.c +++ b/usr/src/uts/i86pc/io/vmm/intel/vtd.c @@ -176,7 +176,7 @@ domain_id(void) if (dom == NULL) break; /* found it */ } - + if (id >= max_domains) panic("domain ids exhausted"); @@ -279,7 +279,7 @@ vtd_iotlb_global_invalidate(struct vtdmap *vtdmap) offset = VTD_ECAP_IRO(vtdmap->ext_cap) * 16; iotlb_reg = (volatile uint64_t *)((caddr_t)vtdmap + offset + 8); - + *iotlb_reg = VTD_IIR_IVT | VTD_IIR_IIRG_GLOBAL | VTD_IIR_DRAIN_READS | VTD_IIR_DRAIN_WRITES; @@ -851,7 +851,7 @@ static void vtd_destroy_domain(void *arg) { struct domain *dom; - + dom = arg; SLIST_REMOVE(&domhead, dom, domain, next); diff --git a/usr/src/uts/i86pc/io/vmm/io/ppt.c b/usr/src/uts/i86pc/io/vmm/io/ppt.c index a71ce86c2d..2f715bcc42 100644 --- a/usr/src/uts/i86pc/io/vmm/io/ppt.c +++ b/usr/src/uts/i86pc/io/vmm/io/ppt.c @@ -1321,7 +1321,7 @@ ppt_setup_msix(struct vm *vm, int vcpu, int pptfd, int idx, uint64_t addr, /* * First-time configuration: - * Allocate the MSI-X table + * Allocate the MSI-X table * Allocate the IRQ resources * Set up some variables in ppt->msix */ diff --git a/usr/src/uts/i86pc/io/vmm/io/vatpic.c b/usr/src/uts/i86pc/io/vmm/io/vatpic.c index ba4cd7785e..b81259647c 100644 --- a/usr/src/uts/i86pc/io/vmm/io/vatpic.c +++ b/usr/src/uts/i86pc/io/vmm/io/vatpic.c @@ -709,8 +709,8 @@ vatpic_write(struct vatpic *vatpic, struct atpic *atpic, bool in, int port, } int -vatpic_master_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes, - uint32_t *eax) +vatpic_master_handler(struct vm *vm, int vcpuid, bool in, uint16_t port, + uint8_t bytes, uint32_t *eax) { struct vatpic *vatpic; struct atpic *atpic; @@ -720,17 +720,17 @@ vatpic_master_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes, if (bytes != 1) return (-1); - + if (in) { return (vatpic_read(vatpic, atpic, in, port, bytes, eax)); } - + return (vatpic_write(vatpic, atpic, in, port, bytes, eax)); } int -vatpic_slave_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes, - uint32_t *eax) +vatpic_slave_handler(struct vm *vm, int vcpuid, bool in, uint16_t port, + uint8_t bytes, uint32_t *eax) { struct vatpic *vatpic; struct atpic *atpic; @@ -749,8 +749,8 @@ vatpic_slave_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes, } int -vatpic_elc_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes, - uint32_t *eax) +vatpic_elc_handler(struct vm *vm, int vcpuid, bool in, uint16_t port, + uint8_t bytes, uint32_t *eax) { struct vatpic *vatpic; bool is_master; diff --git a/usr/src/uts/i86pc/io/vmm/io/vatpic.h b/usr/src/uts/i86pc/io/vmm/io/vatpic.h index d4a1be1820..dcb8ea6c6f 100644 --- a/usr/src/uts/i86pc/io/vmm/io/vatpic.h +++ b/usr/src/uts/i86pc/io/vmm/io/vatpic.h @@ -39,12 +39,12 @@ struct vatpic *vatpic_init(struct vm *vm); void vatpic_cleanup(struct vatpic *vatpic); -int vatpic_master_handler(struct vm *vm, int vcpuid, bool in, int port, - int bytes, uint32_t *eax); -int vatpic_slave_handler(struct vm *vm, int vcpuid, bool in, int port, - int bytes, uint32_t *eax); -int vatpic_elc_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes, - uint32_t *eax); +int vatpic_master_handler(struct vm *vm, int vcpuid, bool in, uint16_t port, + uint8_t bytes, uint32_t *eax); +int vatpic_slave_handler(struct vm *vm, int vcpuid, bool in, uint16_t port, + uint8_t bytes, uint32_t *eax); +int vatpic_elc_handler(struct vm *vm, int vcpuid, bool in, uint16_t port, + uint8_t bytes, uint32_t *eax); int vatpic_assert_irq(struct vm *vm, int irq); int vatpic_deassert_irq(struct vm *vm, int irq); diff --git a/usr/src/uts/i86pc/io/vmm/io/vatpit.c b/usr/src/uts/i86pc/io/vmm/io/vatpit.c index 03f63798e7..47cb40f9bd 100644 --- a/usr/src/uts/i86pc/io/vmm/io/vatpit.c +++ b/usr/src/uts/i86pc/io/vmm/io/vatpit.c @@ -336,7 +336,7 @@ vatpit_update_mode(struct vatpit *vatpit, uint8_t val) } int -vatpit_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes, +vatpit_handler(struct vm *vm, int vcpuid, bool in, uint16_t port, uint8_t bytes, uint32_t *eax) { struct vatpit *vatpit; @@ -419,8 +419,8 @@ vatpit_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes, } int -vatpit_nmisc_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes, - uint32_t *eax) +vatpit_nmisc_handler(struct vm *vm, int vcpuid, bool in, uint16_t port, + uint8_t bytes, uint32_t *eax) { struct vatpit *vatpit; diff --git a/usr/src/uts/i86pc/io/vmm/io/vatpit.h b/usr/src/uts/i86pc/io/vmm/io/vatpit.h index 4bf9fe048d..512ce20735 100644 --- a/usr/src/uts/i86pc/io/vmm/io/vatpit.h +++ b/usr/src/uts/i86pc/io/vmm/io/vatpit.h @@ -39,10 +39,10 @@ struct vatpit *vatpit_init(struct vm *vm); void vatpit_cleanup(struct vatpit *vatpit); -int vatpit_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes, - uint32_t *eax); -int vatpit_nmisc_handler(struct vm *vm, int vcpuid, bool in, int port, - int bytes, uint32_t *eax); +int vatpit_handler(struct vm *vm, int vcpuid, bool in, uint16_t port, + uint8_t bytes, uint32_t *eax); +int vatpit_nmisc_handler(struct vm *vm, int vcpuid, bool in, uint16_t port, + uint8_t bytes, uint32_t *eax); #ifndef __FreeBSD__ void vatpit_localize_resources(struct vatpit *); diff --git a/usr/src/uts/i86pc/io/vmm/io/vhpet.c b/usr/src/uts/i86pc/io/vmm/io/vhpet.c index c82b4626bd..29e9188b77 100644 --- a/usr/src/uts/i86pc/io/vmm/io/vhpet.c +++ b/usr/src/uts/i86pc/io/vmm/io/vhpet.c @@ -61,10 +61,10 @@ static MALLOC_DEFINE(M_VHPET, "vhpet", "bhyve virtual hpet"); #define FS_PER_S 1000000000000000ul /* Timer N Configuration and Capabilities Register */ -#define HPET_TCAP_RO_MASK (HPET_TCAP_INT_ROUTE | \ - HPET_TCAP_FSB_INT_DEL | \ - HPET_TCAP_SIZE | \ - HPET_TCAP_PER_INT) +#define HPET_TCAP_RO_MASK (HPET_TCAP_INT_ROUTE | \ + HPET_TCAP_FSB_INT_DEL | \ + HPET_TCAP_SIZE | \ + HPET_TCAP_PER_INT) /* * HPET requires at least 3 timers and up to 32 timers per block. */ @@ -242,7 +242,7 @@ vhpet_timer_interrupt(struct vhpet *vhpet, int n) lapic_intr_msi(vhpet->vm, vhpet->timer[n].msireg >> 32, vhpet->timer[n].msireg & 0xffffffff); return; - } + } pin = vhpet_timer_ioapic_pin(vhpet, n); if (pin == 0) { @@ -504,7 +504,7 @@ vhpet_mmio_write(void *vm, int vcpuid, uint64_t gpa, uint64_t val, int size, if ((offset & 0x4) != 0) { mask <<= 32; data <<= 32; - } + } break; default: VM_CTR2(vhpet->vm, "hpet invalid mmio write: " @@ -658,7 +658,7 @@ vhpet_mmio_read(void *vm, int vcpuid, uint64_t gpa, uint64_t *rval, int size, if (offset == HPET_CAPABILITIES || offset == HPET_CAPABILITIES + 4) { data = vhpet_capabilities(); - goto done; + goto done; } if (offset == HPET_CONFIG || offset == HPET_CONFIG + 4) { diff --git a/usr/src/uts/i86pc/io/vmm/io/vhpet.h b/usr/src/uts/i86pc/io/vmm/io/vhpet.h index 8e28241b32..e6ded31a66 100644 --- a/usr/src/uts/i86pc/io/vmm/io/vhpet.h +++ b/usr/src/uts/i86pc/io/vmm/io/vhpet.h @@ -40,7 +40,7 @@ #define VHPET_SIZE 1024 struct vhpet *vhpet_init(struct vm *vm); -void vhpet_cleanup(struct vhpet *vhpet); +void vhpet_cleanup(struct vhpet *vhpet); int vhpet_mmio_write(void *vm, int vcpuid, uint64_t gpa, uint64_t val, int size, void *arg); int vhpet_mmio_read(void *vm, int vcpuid, uint64_t gpa, uint64_t *val, diff --git a/usr/src/uts/i86pc/io/vmm/io/vlapic.c b/usr/src/uts/i86pc/io/vmm/io/vlapic.c index af902ba40e..c1825f4264 100644 --- a/usr/src/uts/i86pc/io/vmm/io/vlapic.c +++ b/usr/src/uts/i86pc/io/vmm/io/vlapic.c @@ -164,7 +164,7 @@ void vlapic_id_write_handler(struct vlapic *vlapic) { struct LAPIC *lapic; - + /* * We don't allow the ID register to be modified so reset it back to * its default value. @@ -214,7 +214,7 @@ vlapic_get_ccr(struct vlapic *vlapic) struct bintime bt_now, bt_rem; struct LAPIC *lapic; uint32_t ccr; - + ccr = 0; lapic = vlapic->apic_page; @@ -250,7 +250,7 @@ vlapic_dcr_write_handler(struct vlapic *vlapic) { struct LAPIC *lapic; int divisor; - + lapic = vlapic->apic_page; VLAPIC_TIMER_LOCK(vlapic); @@ -275,7 +275,7 @@ void vlapic_esr_write_handler(struct vlapic *vlapic) { struct LAPIC *lapic; - + lapic = vlapic->apic_page; lapic->esr = vlapic->esr_pending; vlapic->esr_pending = 0; @@ -333,7 +333,7 @@ static __inline uint32_t * vlapic_get_lvtptr(struct vlapic *vlapic, uint32_t offset) { struct LAPIC *lapic = vlapic->apic_page; - int i; + int i; switch (offset) { case APIC_OFFSET_CMCI_LVT: @@ -405,9 +405,9 @@ vlapic_lvt_write_handler(struct vlapic *vlapic, uint32_t offset) uint32_t *lvtptr, mask, val; struct LAPIC *lapic; int idx; - + lapic = vlapic->apic_page; - lvtptr = vlapic_get_lvtptr(vlapic, offset); + lvtptr = vlapic_get_lvtptr(vlapic, offset); val = *lvtptr; idx = lvt_off_to_idx(offset); @@ -635,7 +635,7 @@ static __inline int vlapic_periodic_timer(struct vlapic *vlapic) { uint32_t lvt; - + lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_TIMER_LVT); return (vlapic_get_lvt_field(lvt, APIC_LVTT_TM_PERIODIC)); @@ -988,7 +988,6 @@ vlapic_icrlo_write_handler(struct vlapic *vlapic, bool *retu) uint64_t icrval; uint32_t dest, vec, mode; struct vlapic *vlapic2; - struct vm_exit *vmexit; struct LAPIC *lapic; uint16_t maxcpus; @@ -1082,13 +1081,7 @@ vlapic_icrlo_write_handler(struct vlapic *vlapic, bool *retu) return (0); vlapic2->boot_state = BS_RUNNING; - - *retu = true; - vmexit = vm_exitinfo(vlapic->vm, vlapic->vcpuid); - vmexit->exitcode = VM_EXITCODE_SPINUP_AP; - vmexit->u.spinup_ap.vcpu = dest; - vmexit->u.spinup_ap.rip = vec << PAGE_SHIFT; - + vm_req_spinup_ap(vlapic->vm, dest, vec << PAGE_SHIFT); return (0); } } @@ -1117,7 +1110,7 @@ int vlapic_pending_intr(struct vlapic *vlapic, int *vecptr) { struct LAPIC *lapic = vlapic->apic_page; - int idx, i, bitpos, vector; + int idx, i, bitpos, vector; uint32_t *irrptr, val; vlapic_update_ppr(vlapic); @@ -1138,7 +1131,7 @@ vlapic_pending_intr(struct vlapic *vlapic, int *vecptr) if (vecptr != NULL) *vecptr = vector; return (1); - } else + } else break; } } @@ -1156,7 +1149,7 @@ vlapic_intr_accepted(struct vlapic *vlapic, int vector) return ((*vlapic->ops.intr_accepted)(vlapic, vector)); /* - * clear the ready bit for vector being accepted in irr + * clear the ready bit for vector being accepted in irr * and set the vector as in service in isr. */ idx = (vector / 32) * 4; @@ -1247,7 +1240,7 @@ vlapic_read(struct vlapic *vlapic, int mmio_access, uint64_t offset, *data = 0; goto done; } - + offset &= ~3; switch(offset) { @@ -1296,17 +1289,17 @@ vlapic_read(struct vlapic *vlapic, int mmio_access, uint64_t offset, case APIC_OFFSET_ESR: *data = lapic->esr; break; - case APIC_OFFSET_ICR_LOW: + case APIC_OFFSET_ICR_LOW: *data = lapic->icr_lo; if (x2apic(vlapic)) *data |= (uint64_t)lapic->icr_hi << 32; break; - case APIC_OFFSET_ICR_HI: + case APIC_OFFSET_ICR_HI: *data = lapic->icr_hi; break; case APIC_OFFSET_CMCI_LVT: case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT: - *data = vlapic_get_lvt(vlapic, offset); + *data = vlapic_get_lvt(vlapic, offset); #ifdef INVARIANTS reg = vlapic_get_lvtptr(vlapic, offset); KASSERT(*data == *reg, ("inconsistent lvt value at " @@ -1401,7 +1394,7 @@ vlapic_write(struct vlapic *vlapic, int mmio_access, uint64_t offset, lapic->svr = data; vlapic_svr_write_handler(vlapic); break; - case APIC_OFFSET_ICR_LOW: + case APIC_OFFSET_ICR_LOW: lapic->icr_lo = data; if (x2apic(vlapic)) lapic->icr_hi = data >> 32; @@ -1455,7 +1448,7 @@ static void vlapic_reset(struct vlapic *vlapic) { struct LAPIC *lapic; - + lapic = vlapic->apic_page; bzero(lapic, sizeof(struct LAPIC)); diff --git a/usr/src/uts/i86pc/io/vmm/io/vpmtmr.c b/usr/src/uts/i86pc/io/vmm/io/vpmtmr.c index 4df909777d..0dce2b0a1f 100644 --- a/usr/src/uts/i86pc/io/vmm/io/vpmtmr.c +++ b/usr/src/uts/i86pc/io/vmm/io/vpmtmr.c @@ -80,7 +80,7 @@ vpmtmr_cleanup(struct vpmtmr *vpmtmr) } int -vpmtmr_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes, +vpmtmr_handler(struct vm *vm, int vcpuid, bool in, uint16_t port, uint8_t bytes, uint32_t *val) { struct vpmtmr *vpmtmr; diff --git a/usr/src/uts/i86pc/io/vmm/io/vpmtmr.h b/usr/src/uts/i86pc/io/vmm/io/vpmtmr.h index e6562da5c0..c06825b970 100644 --- a/usr/src/uts/i86pc/io/vmm/io/vpmtmr.h +++ b/usr/src/uts/i86pc/io/vmm/io/vpmtmr.h @@ -38,7 +38,7 @@ struct vpmtmr; struct vpmtmr *vpmtmr_init(struct vm *vm); void vpmtmr_cleanup(struct vpmtmr *pmtmr); -int vpmtmr_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes, - uint32_t *val); +int vpmtmr_handler(struct vm *vm, int vcpuid, bool in, uint16_t port, + uint8_t bytes, uint32_t *val); #endif diff --git a/usr/src/uts/i86pc/io/vmm/io/vrtc.c b/usr/src/uts/i86pc/io/vmm/io/vrtc.c index a3635fc9f0..343ad9c37a 100644 --- a/usr/src/uts/i86pc/io/vmm/io/vrtc.c +++ b/usr/src/uts/i86pc/io/vmm/io/vrtc.c @@ -359,7 +359,7 @@ rtc_to_secs(struct vrtc *vrtc) /* * Ignore 'rtc->dow' because some guests like Linux don't bother - * setting it at all while others like OpenBSD/i386 set it incorrectly. + * setting it at all while others like OpenBSD/i386 set it incorrectly. * * clock_ct_to_ts() does not depend on 'ct.dow' anyways so ignore it. */ @@ -874,8 +874,8 @@ vrtc_nvram_read(struct vm *vm, int offset, uint8_t *retval) } int -vrtc_addr_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes, - uint32_t *val) +vrtc_addr_handler(struct vm *vm, int vcpuid, bool in, uint16_t port, + uint8_t bytes, uint32_t *val) { struct vrtc *vrtc; @@ -897,8 +897,8 @@ vrtc_addr_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes, } int -vrtc_data_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes, - uint32_t *val) +vrtc_data_handler(struct vm *vm, int vcpuid, bool in, uint16_t port, + uint8_t bytes, uint32_t *val) { struct vrtc *vrtc; struct rtcdev *rtc; diff --git a/usr/src/uts/i86pc/io/vmm/io/vrtc.h b/usr/src/uts/i86pc/io/vmm/io/vrtc.h index 13abbedeb9..92a060cb8e 100644 --- a/usr/src/uts/i86pc/io/vmm/io/vrtc.h +++ b/usr/src/uts/i86pc/io/vmm/io/vrtc.h @@ -48,10 +48,10 @@ int vrtc_set_time(struct vm *vm, time_t secs); int vrtc_nvram_write(struct vm *vm, int offset, uint8_t value); int vrtc_nvram_read(struct vm *vm, int offset, uint8_t *retval); -int vrtc_addr_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes, - uint32_t *val); -int vrtc_data_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes, - uint32_t *val); +int vrtc_addr_handler(struct vm *vm, int vcpuid, bool in, uint16_t port, + uint8_t bytes, uint32_t *val); +int vrtc_data_handler(struct vm *vm, int vcpuid, bool in, uint16_t port, + uint8_t bytes, uint32_t *val); #ifndef __FreeBSD__ void vrtc_localize_resources(struct vrtc *); diff --git a/usr/src/uts/i86pc/sys/vmm_instruction_emul.h b/usr/src/uts/i86pc/io/vmm/sys/vmm_instruction_emul.h index d084301aee..d3a07b0f99 100644 --- a/usr/src/uts/i86pc/sys/vmm_instruction_emul.h +++ b/usr/src/uts/i86pc/io/vmm/sys/vmm_instruction_emul.h @@ -27,64 +27,57 @@ * * $FreeBSD$ */ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + * + * Copyright 2020 Oxide Computer Company + */ #ifndef _VMM_INSTRUCTION_EMUL_H_ #define _VMM_INSTRUCTION_EMUL_H_ #include <sys/mman.h> +#include <machine/vmm.h> -/* - * Callback functions to read and write memory regions. - */ -typedef int (*mem_region_read_t)(void *vm, int cpuid, uint64_t gpa, - uint64_t *rval, int rsize, void *arg); - -typedef int (*mem_region_write_t)(void *vm, int cpuid, uint64_t gpa, - uint64_t wval, int wsize, void *arg); +struct vie; -/* - * Emulate the decoded 'vie' instruction. - * - * The callbacks 'mrr' and 'mrw' emulate reads and writes to the memory region - * containing 'gpa'. 'mrarg' is an opaque argument that is passed into the - * callback functions. - * - * 'void *vm' should be 'struct vm *' when called from kernel context and - * 'struct vmctx *' when called from user context. - * s - */ -int vmm_emulate_instruction(void *vm, int cpuid, uint64_t gpa, struct vie *vie, - struct vm_guest_paging *paging, mem_region_read_t mrr, - mem_region_write_t mrw, void *mrarg); +struct vie *vie_alloc(); +void vie_free(struct vie *); -int vie_update_register(void *vm, int vcpuid, enum vm_reg_name reg, - uint64_t val, int size); +void vie_init_mmio(struct vie *vie, const char *inst_bytes, uint8_t inst_length, + const struct vm_guest_paging *paging, uint64_t gpa); +void vie_init_inout(struct vie *vie, const struct vm_inout *inout, + uint8_t inst_len, const struct vm_guest_paging *paging); -/* - * Returns 1 if an alignment check exception should be injected and 0 otherwise. - */ -int vie_alignment_check(int cpl, int operand_size, uint64_t cr0, - uint64_t rflags, uint64_t gla); +int vie_fulfill_mmio(struct vie *vie, const struct vm_mmio *res); +int vie_fulfill_inout(struct vie *vie, const struct vm_inout *res); -/* Returns 1 if the 'gla' is not canonical and 0 otherwise. */ -int vie_canonical_check(enum vm_cpu_mode cpu_mode, uint64_t gla); +bool vie_needs_fetch(const struct vie *vie); +bool vie_pending(const struct vie *vie); +uint64_t vie_mmio_gpa(const struct vie *vie); +void vie_exitinfo(const struct vie *vie, struct vm_exit *vme); +void vie_fallback_exitinfo(const struct vie *vie, struct vm_exit *vme); -uint64_t vie_size2mask(int size); +void vie_reset(struct vie *vie); +void vie_advance_pc(struct vie *vie, uint64_t *nextrip); -int vie_calculate_gla(enum vm_cpu_mode cpu_mode, enum vm_reg_name seg, - struct seg_desc *desc, uint64_t off, int length, int addrsize, int prot, - uint64_t *gla); +int vie_emulate_mmio(struct vie *vie, void *vm, int vcpuid); +int vie_emulate_inout(struct vie *vie, struct vm *vm, int vcpuid); -#ifdef _KERNEL /* * APIs to fetch and decode the instruction from nested page fault handler. * - * 'vie' must be initialized before calling 'vmm_fetch_instruction()' + * 'vie' must be initialized before calling 'vie_fetch_instruction()' */ -int vmm_fetch_instruction(struct vm *vm, int cpuid, - struct vm_guest_paging *guest_paging, - uint64_t rip, int inst_length, struct vie *vie, - int *is_fault); +int vie_fetch_instruction(struct vie *vie, struct vm *vm, int cpuid, + uint64_t rip, int *is_fault); /* * Translate the guest linear address 'gla' to a guest physical address. @@ -101,34 +94,23 @@ int vm_gla2gpa(struct vm *vm, int vcpuid, struct vm_guest_paging *paging, * Like vm_gla2gpa, but no exceptions are injected into the guest and * PTEs are not changed. */ -int vm_gla2gpa_nofault(struct vm *vm, int vcpuid, struct vm_guest_paging *paging, - uint64_t gla, int prot, uint64_t *gpa, int *is_fault); -#endif /* _KERNEL */ - -void vie_init(struct vie *vie, const char *inst_bytes, int inst_length); +int vm_gla2gpa_nofault(struct vm *vm, int vcpuid, + struct vm_guest_paging *paging, uint64_t gla, int prot, uint64_t *gpa, + int *is_fault); +int vie_verify_gla(struct vie *vie, struct vm *vm, int cpuid, uint64_t gla); /* * Decode the instruction fetched into 'vie' so it can be emulated. * * 'gla' is the guest linear address provided by the hardware assist * that caused the nested page table fault. It is used to verify that * the software instruction decoding is in agreement with the hardware. - * + * * Some hardware assists do not provide the 'gla' to the hypervisor. * To skip the 'gla' verification for this or any other reason pass * in VIE_INVALID_GLA instead. */ -#ifdef _KERNEL #define VIE_INVALID_GLA (1UL << 63) /* a non-canonical address */ -int vmm_decode_instruction(struct vm *vm, int cpuid, uint64_t gla, - enum vm_cpu_mode cpu_mode, int csd, struct vie *vie); -#else /* !_KERNEL */ -/* - * Permit instruction decoding logic to be compiled outside of the kernel for - * rapid iteration and validation. No GLA validation is performed, obviously. - */ -int vmm_decode_instruction(enum vm_cpu_mode cpu_mode, int csd, - struct vie *vie); -#endif /* _KERNEL */ +int vie_decode_instruction(struct vie *vie, struct vm *vm, int cpuid, int csd); #endif /* _VMM_INSTRUCTION_EMUL_H_ */ diff --git a/usr/src/uts/i86pc/io/vmm/sys/vmm_kernel.h b/usr/src/uts/i86pc/io/vmm/sys/vmm_kernel.h index 2a884e6e0e..fbd2884b84 100644 --- a/usr/src/uts/i86pc/io/vmm/sys/vmm_kernel.h +++ b/usr/src/uts/i86pc/io/vmm/sys/vmm_kernel.h @@ -54,6 +54,7 @@ struct vm; struct vm_exception; struct seg_desc; struct vm_exit; +struct vie; struct vm_run; struct vhpet; struct vioapic; @@ -171,7 +172,7 @@ int vm_get_seg_desc(struct vm *vm, int vcpu, int reg, struct seg_desc *ret_desc); int vm_set_seg_desc(struct vm *vm, int vcpu, int reg, struct seg_desc *desc); -int vm_run(struct vm *vm, struct vm_run *vmrun); +int vm_run(struct vm *vm, int vcpuid, const struct vm_entry *); int vm_suspend(struct vm *vm, enum vm_suspend_how how); int vm_inject_nmi(struct vm *vm, int vcpu); int vm_nmi_pending(struct vm *vm, int vcpuid); @@ -191,11 +192,17 @@ int vm_activate_cpu(struct vm *vm, int vcpu); int vm_suspend_cpu(struct vm *vm, int vcpu); int vm_resume_cpu(struct vm *vm, int vcpu); struct vm_exit *vm_exitinfo(struct vm *vm, int vcpuid); +struct vie *vm_vie_ctx(struct vm *vm, int vcpuid); void vm_exit_suspended(struct vm *vm, int vcpuid, uint64_t rip); void vm_exit_debug(struct vm *vm, int vcpuid, uint64_t rip); void vm_exit_runblock(struct vm *vm, int vcpuid, uint64_t rip); void vm_exit_astpending(struct vm *vm, int vcpuid, uint64_t rip); void vm_exit_reqidle(struct vm *vm, int vcpuid, uint64_t rip); +int vm_service_mmio_read(struct vm *vm, int cpuid, uint64_t gpa, uint64_t *rval, + int rsize); +int vm_service_mmio_write(struct vm *vm, int cpuid, uint64_t gpa, uint64_t wval, + int wsize); +void vm_req_spinup_ap(struct vm *vm, int req_vcpuid, uint64_t req_rip); #ifdef _SYS__CPUSET_H_ cpuset_t vm_active_cpus(struct vm *vm); diff --git a/usr/src/uts/i86pc/io/vmm/vmm.c b/usr/src/uts/i86pc/io/vmm/vmm.c index 579ca12e84..7a47cd0cd1 100644 --- a/usr/src/uts/i86pc/io/vmm/vmm.c +++ b/usr/src/uts/i86pc/io/vmm/vmm.c @@ -79,7 +79,7 @@ __FBSDID("$FreeBSD$"); #include <machine/vmm.h> #include <machine/vmm_dev.h> -#include <machine/vmm_instruction_emul.h> +#include <sys/vmm_instruction_emul.h> #include "vmm_ioport.h" #include "vmm_ktr.h" @@ -109,7 +109,7 @@ struct vlapic; * (x) initialized before use */ struct vcpu { - struct mtx mtx; /* (o) protects 'state' and 'hostcpu' */ + struct mtx mtx; /* (o) protects 'state' and 'hostcpu' */ enum vcpu_state state; /* (o) vcpu state */ #ifndef __FreeBSD__ kcondvar_t vcpu_cv; /* (o) cpu waiter cv */ @@ -135,6 +135,7 @@ struct vcpu { void *stats; /* (a,i) statistics */ struct vm_exit exitinfo; /* (x) exit reason and collateral */ uint64_t nextrip; /* (x) next instruction to execute */ + struct vie *vie_ctx; /* (x) instruction emulation context */ #ifndef __FreeBSD__ uint64_t tsc_offset; /* (x) offset from host TSC */ #endif @@ -185,7 +186,7 @@ struct vm { volatile cpuset_t active_cpus; /* (i) active vcpus */ volatile cpuset_t debug_cpus; /* (i) vcpus stopped for debug */ int suspend; /* (i) stop VM execution */ - volatile cpuset_t suspended_cpus; /* (i) suspended vcpus */ + volatile cpuset_t suspended_cpus; /* (i) suspended vcpus */ volatile cpuset_t halted_cpus; /* (x) cpus in a hard halt */ struct mem_map mem_maps[VM_MAX_MEMMAPS]; /* (i) guest address space */ struct mem_seg mem_segs[VM_MAX_MEMSEGS]; /* (o) guest memory regions */ @@ -200,6 +201,14 @@ struct vm { #ifndef __FreeBSD__ list_t ioport_hooks; #endif /* __FreeBSD__ */ + bool sipi_req; /* (i) SIPI requested */ + int sipi_req_vcpu; /* (i) SIPI destination */ + uint64_t sipi_req_rip; /* (i) SIPI start %rip */ + + /* Miscellaneous VM-wide statistics and counters */ + struct vm_wide_stats { + uint64_t sipi_supersede; + } stats; }; static int vmm_initialized; @@ -341,6 +350,8 @@ vcpu_cleanup(struct vm *vm, int i, bool destroy) if (destroy) { vmm_stat_free(vcpu->stats); fpu_save_area_free(vcpu->guestfpu); + vie_free(vcpu->vie_ctx); + vcpu->vie_ctx = NULL; } } @@ -367,6 +378,10 @@ vcpu_init(struct vm *vm, int vcpu_id, bool create) #endif vcpu->guestfpu = fpu_save_area_alloc(); vcpu->stats = vmm_stat_alloc(); + vcpu->vie_ctx = vie_alloc(); + } else { + vie_reset(vcpu->vie_ctx); + bzero(&vcpu->exitinfo, sizeof (vcpu->exitinfo)); } vcpu->vlapic = VLAPIC_INIT(vm->cookie, vcpu_id); @@ -402,6 +417,15 @@ vm_exitinfo(struct vm *vm, int cpuid) return (&vcpu->exitinfo); } +struct vie * +vm_vie_ctx(struct vm *vm, int cpuid) +{ + if (cpuid < 0 || cpuid >= vm->maxcpus) + panic("vm_vie_ctx: invalid cpuid %d", cpuid); + + return (vm->vcpu[cpuid].vie_ctx); +} + static int vmm_init(void) { @@ -1198,7 +1222,6 @@ vm_set_register(struct vm *vm, int vcpuid, int reg, uint64_t val) static bool is_descriptor_table(int reg) { - switch (reg) { case VM_REG_GUEST_IDTR: case VM_REG_GUEST_GDTR: @@ -1211,7 +1234,6 @@ is_descriptor_table(int reg) static bool is_segment_register(int reg) { - switch (reg) { case VM_REG_GUEST_ES: case VM_REG_GUEST_CS: @@ -1558,85 +1580,190 @@ done: return (0); } +int +vm_service_mmio_read(struct vm *vm, int cpuid, uint64_t gpa, uint64_t *rval, + int rsize) +{ + int err = ESRCH; + void *arg = NULL; + + if (gpa >= DEFAULT_APIC_BASE && gpa < DEFAULT_APIC_BASE + PAGE_SIZE) { + err = lapic_mmio_read(vm, cpuid, gpa, rval, rsize, &arg); + } else if (gpa >= VIOAPIC_BASE && gpa < VIOAPIC_BASE + VIOAPIC_SIZE) { + err = vioapic_mmio_read(vm, cpuid, gpa, rval, rsize, &arg); + } else if (gpa >= VHPET_BASE && gpa < VHPET_BASE + VHPET_SIZE) { + err = vhpet_mmio_read(vm, cpuid, gpa, rval, rsize, &arg); + } + + return (err); +} + +int +vm_service_mmio_write(struct vm *vm, int cpuid, uint64_t gpa, uint64_t wval, + int wsize) +{ + int err = ESRCH; + void *arg = NULL; + + if (gpa >= DEFAULT_APIC_BASE && gpa < DEFAULT_APIC_BASE + PAGE_SIZE) { + err = lapic_mmio_write(vm, cpuid, gpa, wval, wsize, &arg); + } else if (gpa >= VIOAPIC_BASE && gpa < VIOAPIC_BASE + VIOAPIC_SIZE) { + err = vioapic_mmio_write(vm, cpuid, gpa, wval, wsize, &arg); + } else if (gpa >= VHPET_BASE && gpa < VHPET_BASE + VHPET_SIZE) { + err = vhpet_mmio_write(vm, cpuid, gpa, wval, wsize, &arg); + } + + return (err); +} + static int -vm_handle_inst_emul(struct vm *vm, int vcpuid, bool *retu) +vm_handle_mmio_emul(struct vm *vm, int vcpuid, bool *retu) { struct vie *vie; struct vcpu *vcpu; struct vm_exit *vme; - uint64_t gla, gpa, cs_base; - struct vm_guest_paging *paging; - mem_region_read_t mread; - mem_region_write_t mwrite; - enum vm_cpu_mode cpu_mode; - int cs_d, error, fault; + uint64_t inst_addr; + int error, fault, cs_d; vcpu = &vm->vcpu[vcpuid]; vme = &vcpu->exitinfo; + vie = vcpu->vie_ctx; KASSERT(vme->inst_length == 0, ("%s: invalid inst_length %d", __func__, vme->inst_length)); - gla = vme->u.inst_emul.gla; - gpa = vme->u.inst_emul.gpa; - cs_base = vme->u.inst_emul.cs_base; - cs_d = vme->u.inst_emul.cs_d; - vie = &vme->u.inst_emul.vie; - paging = &vme->u.inst_emul.paging; - cpu_mode = paging->cpu_mode; + inst_addr = vme->rip + vme->u.mmio_emul.cs_base; + cs_d = vme->u.mmio_emul.cs_d; - VCPU_CTR1(vm, vcpuid, "inst_emul fault accessing gpa %#lx", gpa); + VCPU_CTR1(vm, vcpuid, "inst_emul fault accessing gpa %#lx", + vme->u.mmio_emul.gpa); - /* Fetch, decode and emulate the faulting instruction */ - if (vie->num_valid == 0) { - error = vmm_fetch_instruction(vm, vcpuid, paging, vme->rip + - cs_base, VIE_INST_SIZE, vie, &fault); - } else { - /* - * The instruction bytes have already been copied into 'vie' - */ - error = fault = 0; + /* Fetch the faulting instruction */ + if (vie_needs_fetch(vie)) { + error = vie_fetch_instruction(vie, vm, vcpuid, inst_addr, + &fault); + if (error != 0) { + return (error); + } else if (fault) { + /* + * If a fault during instruction fetch was encounted, it + * will have asserted that the appropriate exception be + * injected at next entry. No further work is required. + */ + return (0); + } } - if (error || fault) - return (error); - if (vmm_decode_instruction(vm, vcpuid, gla, cpu_mode, cs_d, vie) != 0) { + if (vie_decode_instruction(vie, vm, vcpuid, cs_d) != 0) { VCPU_CTR1(vm, vcpuid, "Error decoding instruction at %#lx", - vme->rip + cs_base); - *retu = true; /* dump instruction bytes in userspace */ + inst_addr); + /* Dump (unrecognized) instruction bytes in userspace */ + vie_fallback_exitinfo(vie, vme); + *retu = true; return (0); } - - /* - * Update 'nextrip' based on the length of the emulated instruction. - */ - vme->inst_length = vie->num_processed; - vcpu->nextrip += vie->num_processed; - VCPU_CTR1(vm, vcpuid, "nextrip updated to %#lx after instruction " - "decoding", vcpu->nextrip); - - /* return to userland unless this is an in-kernel emulated device */ - if (gpa >= DEFAULT_APIC_BASE && gpa < DEFAULT_APIC_BASE + PAGE_SIZE) { - mread = lapic_mmio_read; - mwrite = lapic_mmio_write; - } else if (gpa >= VIOAPIC_BASE && gpa < VIOAPIC_BASE + VIOAPIC_SIZE) { - mread = vioapic_mmio_read; - mwrite = vioapic_mmio_write; - } else if (gpa >= VHPET_BASE && gpa < VHPET_BASE + VHPET_SIZE) { - mread = vhpet_mmio_read; - mwrite = vhpet_mmio_write; - } else { + if (vme->u.mmio_emul.gla != VIE_INVALID_GLA && + vie_verify_gla(vie, vm, vcpuid, vme->u.mmio_emul.gla) != 0) { + /* Decoded GLA does not match GLA from VM exit state */ + vie_fallback_exitinfo(vie, vme); *retu = true; return (0); } - error = vmm_emulate_instruction(vm, vcpuid, gpa, vie, paging, - mread, mwrite, retu); - +repeat: + error = vie_emulate_mmio(vie, vm, vcpuid); + if (error < 0) { + /* + * MMIO not handled by any of the in-kernel-emulated devices, so + * make a trip out to userspace for it. + */ + vie_exitinfo(vie, vme); + *retu = true; + error = 0; + } else if (error == EAGAIN) { + /* + * Continue emulating the rep-prefixed instruction, which has + * not completed its iterations. + * + * In case this can be emulated in-kernel and has a high + * repetition count (causing a tight spin), it should be + * deferential to yield conditions. + */ + if (!vcpu_should_yield(vm, vcpuid)) { + goto repeat; + } else { + /* + * Defer to the contending load by making a trip to + * userspace with a no-op (BOGUS) exit reason. + */ + vie_reset(vie); + vme->exitcode = VM_EXITCODE_BOGUS; + *retu = true; + return (0); + } + } else if (error == 0) { + /* Update %rip now that instruction has been emulated */ + vie_advance_pc(vie, &vcpu->nextrip); + } return (error); } static int +vm_handle_inout(struct vm *vm, int vcpuid, struct vm_exit *vme, bool *retu) +{ + struct vcpu *vcpu; + struct vie *vie; + int err; + + vcpu = &vm->vcpu[vcpuid]; + vie = vcpu->vie_ctx; + +repeat: + err = vie_emulate_inout(vie, vm, vcpuid); + + if (err < 0) { + /* + * In/out not handled by any of the in-kernel-emulated devices, + * so make a trip out to userspace for it. + */ + vie_exitinfo(vie, vme); + *retu = true; + return (0); + } else if (err == EAGAIN) { + /* + * Continue emulating the rep-prefixed ins/outs, which has not + * completed its iterations. + * + * In case this can be emulated in-kernel and has a high + * repetition count (causing a tight spin), it should be + * deferential to yield conditions. + */ + if (!vcpu_should_yield(vm, vcpuid)) { + goto repeat; + } else { + /* + * Defer to the contending load by making a trip to + * userspace with a no-op (BOGUS) exit reason. + */ + vie_reset(vie); + vme->exitcode = VM_EXITCODE_BOGUS; + *retu = true; + return (0); + } + } else if (err != 0) { + /* Emulation failure. Bail all the way out to userspace. */ + vme->exitcode = VM_EXITCODE_INST_EMUL; + bzero(&vme->u.inst_emul, sizeof (vme->u.inst_emul)); + *retu = true; + return (0); + } + + vie_advance_pc(vie, &vcpu->nextrip); + *retu = false; + return (0); +} + +static int vm_handle_suspend(struct vm *vm, int vcpuid, bool *retu) { #ifdef __FreeBSD__ @@ -1768,6 +1895,18 @@ vm_handle_wrmsr(struct vm *vm, int vcpuid, struct vm_exit *vme) } #endif /* __FreeBSD__ */ +void +vm_req_spinup_ap(struct vm *vm, int req_vcpuid, uint64_t req_rip) +{ + if (vm->sipi_req) { + /* This should never occur if userspace is doing its job. */ + vm->stats.sipi_supersede++; + } + vm->sipi_req = true; + vm->sipi_req_vcpu = req_vcpuid; + vm->sipi_req_rip = req_rip; +} + int vm_suspend(struct vm *vm, enum vm_suspend_how how) { @@ -1960,11 +2099,104 @@ vmm_freectx(void *arg, int isexec) #endif /* __FreeBSD */ +static int +vm_entry_actions(struct vm *vm, int vcpuid, const struct vm_entry *entry, + struct vm_exit *vme) +{ + struct vcpu *vcpu; + struct vie *vie; + int err; + + vcpu = &vm->vcpu[vcpuid]; + vie = vcpu->vie_ctx; + err = 0; + + switch (entry->cmd) { + case VEC_DEFAULT: + return (0); + case VEC_DISCARD_INSTR: + vie_reset(vie); + return (0); + case VEC_COMPLETE_MMIO: + err = vie_fulfill_mmio(vie, &entry->u.mmio); + if (err == 0) { + err = vie_emulate_mmio(vie, vm, vcpuid); + if (err == 0) { + vie_advance_pc(vie, &vcpu->nextrip); + } else if (err < 0) { + vie_exitinfo(vie, vme); + } else if (err == EAGAIN) { + /* + * Clear the instruction emulation state in + * order to re-enter VM context and continue + * this 'rep <instruction>' + */ + vie_reset(vie); + err = 0; + } + } + break; + case VEC_COMPLETE_INOUT: + err = vie_fulfill_inout(vie, &entry->u.inout); + if (err == 0) { + err = vie_emulate_inout(vie, vm, vcpuid); + if (err == 0) { + vie_advance_pc(vie, &vcpu->nextrip); + } else if (err < 0) { + vie_exitinfo(vie, vme); + } else if (err == EAGAIN) { + /* + * Clear the instruction emulation state in + * order to re-enter VM context and continue + * this 'rep ins/outs' + */ + vie_reset(vie); + err = 0; + } + } + break; + default: + return (EINVAL); + } + return (err); +} + +static int +vm_loop_checks(struct vm *vm, int vcpuid, struct vm_exit *vme) +{ + struct vie *vie; + + vie = vm->vcpu[vcpuid].vie_ctx; + + if (vie_pending(vie)) { + /* + * Userspace has not fulfilled the pending needs of the + * instruction emulation, so bail back out. + */ + vie_exitinfo(vie, vme); + return (-1); + } + + if (vcpuid == 0 && vm->sipi_req) { + /* The boot vCPU has sent a SIPI to one of the other CPUs */ + vme->exitcode = VM_EXITCODE_SPINUP_AP; + vme->u.spinup_ap.vcpu = vm->sipi_req_vcpu; + vme->u.spinup_ap.rip = vm->sipi_req_rip; + + vm->sipi_req = false; + vm->sipi_req_vcpu = 0; + vm->sipi_req_rip = 0; + return (-1); + } + + return (0); +} + int -vm_run(struct vm *vm, struct vm_run *vmrun) +vm_run(struct vm *vm, int vcpuid, const struct vm_entry *entry) { struct vm_eventinfo evinfo; - int error, vcpuid; + int error; struct vcpu *vcpu; #ifdef __FreeBSD__ struct pcb *pcb; @@ -1978,8 +2210,6 @@ vm_run(struct vm *vm, struct vm_run *vmrun) int affinity_type = CPU_CURRENT; #endif - vcpuid = vmrun->cpuid; - if (vcpuid < 0 || vcpuid >= vm->maxcpus) return (EINVAL); @@ -2005,7 +2235,21 @@ vm_run(struct vm *vm, struct vm_run *vmrun) NULL, vmm_freectx); #endif + error = vm_entry_actions(vm, vcpuid, entry, vme); + if (error < 0) { + /* Exit condition to be serviced by userspace */ + error = 0; + goto exit; + } else if (error != 0) { + goto exit; + } + restart: + if (vm_loop_checks(vm, vcpuid, vme) != 0) { + error = 0; + goto exit; + } + #ifndef __FreeBSD__ thread_affinity_set(curthread, affinity_type); /* @@ -2091,11 +2335,10 @@ restart: case VM_EXITCODE_PAGING: error = vm_handle_paging(vm, vcpuid, &retu); break; - case VM_EXITCODE_INST_EMUL: - error = vm_handle_inst_emul(vm, vcpuid, &retu); + case VM_EXITCODE_MMIO_EMUL: + error = vm_handle_mmio_emul(vm, vcpuid, &retu); break; case VM_EXITCODE_INOUT: - case VM_EXITCODE_INOUT_STR: error = vm_handle_inout(vm, vcpuid, vme, &retu); break; case VM_EXITCODE_MONITOR: @@ -2114,12 +2357,12 @@ restart: affinity_type = CPU_BEST; break; } +#endif case VM_EXITCODE_MTRAP: vm_suspend_cpu(vm, vcpuid); retu = true; break; -#endif default: retu = true; /* handled in userland */ break; @@ -2129,6 +2372,7 @@ restart: if (error == 0 && retu == false) goto restart; +exit: #ifndef __FreeBSD__ removectx(curthread, &vtc, vmm_savectx, vmm_restorectx, NULL, NULL, NULL, vmm_freectx); @@ -2136,8 +2380,6 @@ restart: VCPU_CTR2(vm, vcpuid, "retu %d/%d", error, vme->exitcode); - /* copy the exit information */ - bcopy(vme, &vmrun->vm_exit, sizeof (struct vm_exit)); return (error); } @@ -2672,7 +2914,7 @@ vmm_is_pptdev(int bus, int slot, int func) found = true; break; } - + if (cp2 != NULL) *cp2++ = ' '; @@ -3082,7 +3324,7 @@ vm_copyin(struct vm *vm, int vcpuid, struct vm_copyinfo *copyinfo, void *kaddr, { char *dst; int idx; - + dst = kaddr; idx = 0; while (len > 0) { @@ -3123,8 +3365,8 @@ vm_get_rescnt(struct vm *vm, int vcpu, struct vmm_stat_type *stat) if (vcpu == 0) { vmm_stat_set(vm, vcpu, VMM_MEM_RESIDENT, - PAGE_SIZE * vmspace_resident_count(vm->vmspace)); - } + PAGE_SIZE * vmspace_resident_count(vm->vmspace)); + } } static void @@ -3133,8 +3375,8 @@ vm_get_wiredcnt(struct vm *vm, int vcpu, struct vmm_stat_type *stat) if (vcpu == 0) { vmm_stat_set(vm, vcpu, VMM_MEM_WIRED, - PAGE_SIZE * pmap_wired_count(vmspace_pmap(vm->vmspace))); - } + PAGE_SIZE * pmap_wired_count(vmspace_pmap(vm->vmspace))); + } } VMM_STAT_FUNC(VMM_MEM_RESIDENT, "Resident memory", vm_get_rescnt); @@ -3206,21 +3448,21 @@ vm_ioport_handle_hook(struct vm *vm, int cpuid, bool in, int port, int bytes, } } if (hook == NULL) { - return (ENOENT); + return (ESRCH); } if (in) { uint64_t tval; if (hook->vmih_rmem_cb == NULL) { - return (ENOENT); + return (ESRCH); } err = hook->vmih_rmem_cb(hook->vmih_arg, (uintptr_t)port, (uint_t)bytes, &tval); *val = (uint32_t)tval; } else { if (hook->vmih_wmem_cb == NULL) { - return (ENOENT); + return (ESRCH); } err = hook->vmih_wmem_cb(hook->vmih_arg, (uintptr_t)port, (uint_t)bytes, (uint64_t)*val); diff --git a/usr/src/uts/i86pc/io/vmm/vmm_instruction_emul.c b/usr/src/uts/i86pc/io/vmm/vmm_instruction_emul.c index 0d32fe0b9a..f8bb7a1646 100644 --- a/usr/src/uts/i86pc/io/vmm/vmm_instruction_emul.c +++ b/usr/src/uts/i86pc/io/vmm/vmm_instruction_emul.c @@ -40,12 +40,12 @@ * * Copyright 2015 Pluribus Networks Inc. * Copyright 2018 Joyent, Inc. + * Copyright 2020 Oxide Computer Company */ #include <sys/cdefs.h> __FBSDID("$FreeBSD$"); -#ifdef _KERNEL #include <sys/param.h> #include <sys/pcpu.h> #include <sys/systm.h> @@ -56,27 +56,109 @@ __FBSDID("$FreeBSD$"); #include <machine/vmparam.h> #include <machine/vmm.h> -#else /* !_KERNEL */ -#include <sys/types.h> -#include <sys/errno.h> -#include <sys/_iovec.h> +#include <sys/vmm_kernel.h> -#include <machine/vmm.h> - -#include <err.h> -#include <assert.h> -#include <stdbool.h> -#include <stdio.h> -#include <strings.h> -#include <vmmapi.h> -#define KASSERT(exp,msg) assert((exp)) -#define panic(...) errx(4, __VA_ARGS__) -#endif /* _KERNEL */ - -#include <machine/vmm_instruction_emul.h> +#include <sys/vmm_instruction_emul.h> #include <x86/psl.h> #include <x86/specialreg.h> +#include "vmm_ioport.h" +#include "vmm_ktr.h" + +enum vie_status { + VIES_INIT = (1U << 0), + VIES_MMIO = (1U << 1), + VIES_INOUT = (1U << 2), + VIES_INST_FETCH = (1U << 3), + VIES_INST_DECODE = (1U << 4), + VIES_PENDING_MMIO = (1U << 5), + VIES_PENDING_INOUT = (1U << 6), + VIES_REPEAT = (1U << 7), + VIES_COMPLETE = (1U << 8), +}; + +/* State of request to perform emulated access (inout or MMIO) */ +enum vie_req { + VR_NONE, + VR_PENDING, + VR_DONE, +}; + +struct vie_mmio { + uint64_t data; + uint64_t gpa; + uint8_t bytes; + enum vie_req state; +}; + +struct vie_op { + uint8_t op_byte; /* actual opcode byte */ + uint8_t op_type; /* type of operation (e.g. MOV) */ + uint16_t op_flags; +}; + +#define VIE_INST_SIZE 15 +struct vie { + uint8_t inst[VIE_INST_SIZE]; /* instruction bytes */ + uint8_t num_valid; /* size of the instruction */ + uint8_t num_processed; + + uint8_t addrsize:4, opsize:4; /* address and operand sizes */ + uint8_t rex_w:1, /* REX prefix */ + rex_r:1, + rex_x:1, + rex_b:1, + rex_present:1, + repz_present:1, /* REP/REPE/REPZ prefix */ + repnz_present:1, /* REPNE/REPNZ prefix */ + opsize_override:1, /* Operand size override */ + addrsize_override:1, /* Address size override */ + segment_override:1; /* Segment override */ + + uint8_t mod:2, /* ModRM byte */ + reg:4, + rm:4; + + uint8_t ss:2, /* SIB byte */ + vex_present:1, /* VEX prefixed */ + vex_l:1, /* L bit */ + index:4, /* SIB byte */ + base:4; /* SIB byte */ + + uint8_t disp_bytes; + uint8_t imm_bytes; + + uint8_t scale; + + uint8_t vex_reg:4, /* vvvv: first source register specifier */ + vex_pp:2, /* pp */ + _sparebits:2; + + uint8_t _sparebytes[2]; + + int base_register; /* VM_REG_GUEST_xyz */ + int index_register; /* VM_REG_GUEST_xyz */ + int segment_register; /* VM_REG_GUEST_xyz */ + + int64_t displacement; /* optional addr displacement */ + int64_t immediate; /* optional immediate operand */ + + struct vie_op op; /* opcode description */ + + enum vie_status status; + + struct vm_guest_paging paging; /* guest paging state */ + + uint64_t mmio_gpa; /* faulting GPA */ + struct vie_mmio mmio_req_read; + struct vie_mmio mmio_req_write; + + struct vm_inout inout; /* active in/out op */ + enum vie_req inout_req_state; + uint32_t inout_req_val; /* value from userspace */ +}; + + /* struct vie_op.op_type */ enum { VIE_OP_TYPE_NONE = 0, @@ -299,14 +381,29 @@ static uint64_t size2mask[] = { [8] = 0xffffffffffffffff, }; -static int -vie_read_register(void *vm, int vcpuid, enum vm_reg_name reg, uint64_t *rval) -{ - int error; - error = vm_get_register(vm, vcpuid, reg, rval); +static int vie_mmio_read(struct vie *vie, struct vm *vm, int cpuid, + uint64_t gpa, uint64_t *rval, int bytes); +static int vie_mmio_write(struct vie *vie, struct vm *vm, int cpuid, + uint64_t gpa, uint64_t wval, int bytes); +static int vie_calculate_gla(enum vm_cpu_mode cpu_mode, enum vm_reg_name seg, + struct seg_desc *desc, uint64_t offset, int length, int addrsize, + int prot, uint64_t *gla); +static int vie_canonical_check(enum vm_cpu_mode cpu_mode, uint64_t gla); +static int vie_alignment_check(int cpl, int size, uint64_t cr0, uint64_t rf, + uint64_t gla); +static uint64_t vie_size2mask(int size); + +struct vie * +vie_alloc() +{ + return (kmem_zalloc(sizeof (struct vie), KM_SLEEP)); +} - return (error); +void +vie_free(struct vie *vie) +{ + kmem_free(vie, sizeof (struct vie)); } static void @@ -336,7 +433,7 @@ vie_calc_bytereg(struct vie *vie, enum vm_reg_name *reg, int *lhbr) } static int -vie_read_bytereg(void *vm, int vcpuid, struct vie *vie, uint8_t *rval) +vie_read_bytereg(struct vie *vie, void *vm, int vcpuid, uint8_t *rval) { uint64_t val; int error, lhbr; @@ -357,7 +454,7 @@ vie_read_bytereg(void *vm, int vcpuid, struct vie *vie, uint8_t *rval) } static int -vie_write_bytereg(void *vm, int vcpuid, struct vie *vie, uint8_t byte) +vie_write_bytereg(struct vie *vie, void *vm, int vcpuid, uint8_t byte) { uint64_t origval, val, mask; int error, lhbr; @@ -382,9 +479,9 @@ vie_write_bytereg(void *vm, int vcpuid, struct vie *vie, uint8_t byte) return (error); } -int -vie_update_register(void *vm, int vcpuid, enum vm_reg_name reg, - uint64_t val, int size) +static int +vie_update_register(void *vm, int vcpuid, enum vm_reg_name reg, uint64_t val, + int size) { int error; uint64_t origval; @@ -392,7 +489,7 @@ vie_update_register(void *vm, int vcpuid, enum vm_reg_name reg, switch (size) { case 1: case 2: - error = vie_read_register(vm, vcpuid, reg, &origval); + error = vm_get_register(vm, vcpuid, reg, &origval); if (error) return (error); val &= size2mask[size]; @@ -411,6 +508,29 @@ vie_update_register(void *vm, int vcpuid, enum vm_reg_name reg, return (error); } +static int +vie_repeat(struct vie *vie) +{ + vie->status |= VIES_REPEAT; + + /* + * Clear out any cached operation values so the repeated instruction can + * begin without using that stale state. Other state, such as the + * decoding results, are kept around as it will not vary between + * iterations of a rep-prefixed instruction. + */ + if ((vie->status & VIES_MMIO) != 0) { + vie->mmio_req_read.state = VR_NONE; + vie->mmio_req_write.state = VR_NONE; + } else if ((vie->status & VIES_INOUT) != 0) { + vie->inout_req_state = VR_NONE; + } else { + panic("unexpected emulation state"); + } + + return (EAGAIN); +} + #define RFLAGS_STATUS_BITS (PSL_C | PSL_PF | PSL_AF | PSL_Z | PSL_N | PSL_V) /* @@ -519,8 +639,7 @@ getandflags(int opsize, uint64_t x, uint64_t y) } static int -emulate_mov(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, - mem_region_read_t memread, mem_region_write_t memwrite, void *arg) +emulate_mov(void *vm, int vcpuid, uint64_t gpa, struct vie *vie) { int error, size; enum vm_reg_name reg; @@ -538,9 +657,9 @@ emulate_mov(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, * REX + 88/r: mov r/m8, r8 (%ah, %ch, %dh, %bh not available) */ size = 1; /* override for byte operation */ - error = vie_read_bytereg(vm, vcpuid, vie, &byte); + error = vie_read_bytereg(vie, vm, vcpuid, &byte); if (error == 0) - error = memwrite(vm, vcpuid, gpa, byte, size, arg); + error = vie_mmio_write(vie, vm, vcpuid, gpa, byte, size); break; case 0x89: /* @@ -550,10 +669,10 @@ emulate_mov(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, * REX.W + 89/r mov r/m64, r64 */ reg = gpr_map[vie->reg]; - error = vie_read_register(vm, vcpuid, reg, &val); + error = vm_get_register(vm, vcpuid, reg, &val); if (error == 0) { val &= size2mask[size]; - error = memwrite(vm, vcpuid, gpa, val, size, arg); + error = vie_mmio_write(vie, vm, vcpuid, gpa, val, size); } break; case 0x8A: @@ -563,9 +682,9 @@ emulate_mov(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, * REX + 8A/r: mov r8, r/m8 */ size = 1; /* override for byte operation */ - error = memread(vm, vcpuid, gpa, &val, size, arg); + error = vie_mmio_read(vie, vm, vcpuid, gpa, &val, size); if (error == 0) - error = vie_write_bytereg(vm, vcpuid, vie, val); + error = vie_write_bytereg(vie, vm, vcpuid, val); break; case 0x8B: /* @@ -574,7 +693,7 @@ emulate_mov(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, * 8B/r: mov r32, r/m32 * REX.W 8B/r: mov r64, r/m64 */ - error = memread(vm, vcpuid, gpa, &val, size, arg); + error = vie_mmio_read(vie, vm, vcpuid, gpa, &val, size); if (error == 0) { reg = gpr_map[vie->reg]; error = vie_update_register(vm, vcpuid, reg, val, size); @@ -587,7 +706,7 @@ emulate_mov(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, * A1: mov EAX, moffs32 * REX.W + A1: mov RAX, moffs64 */ - error = memread(vm, vcpuid, gpa, &val, size, arg); + error = vie_mmio_read(vie, vm, vcpuid, gpa, &val, size); if (error == 0) { reg = VM_REG_GUEST_RAX; error = vie_update_register(vm, vcpuid, reg, val, size); @@ -597,13 +716,13 @@ emulate_mov(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, /* * MOV from AX/EAX/RAX to seg:moffset * A3: mov moffs16, AX - * A3: mov moffs32, EAX + * A3: mov moffs32, EAX * REX.W + A3: mov moffs64, RAX */ - error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RAX, &val); + error = vm_get_register(vm, vcpuid, VM_REG_GUEST_RAX, &val); if (error == 0) { val &= size2mask[size]; - error = memwrite(vm, vcpuid, gpa, val, size, arg); + error = vie_mmio_write(vie, vm, vcpuid, gpa, val, size); } break; case 0xC6: @@ -613,7 +732,8 @@ emulate_mov(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, * REX + C6/0 mov r/m8, imm8 */ size = 1; /* override for byte operation */ - error = memwrite(vm, vcpuid, gpa, vie->immediate, size, arg); + val = vie->immediate; + error = vie_mmio_write(vie, vm, vcpuid, gpa, val, size); break; case 0xC7: /* @@ -623,7 +743,7 @@ emulate_mov(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, * REX.W + C7/0 mov r/m64, imm32 (sign-extended to 64-bits) */ val = vie->immediate & size2mask[size]; - error = memwrite(vm, vcpuid, gpa, val, size, arg); + error = vie_mmio_write(vie, vm, vcpuid, gpa, val, size); break; default: break; @@ -633,9 +753,7 @@ emulate_mov(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, } static int -emulate_movx(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, - mem_region_read_t memread, mem_region_write_t memwrite, - void *arg) +emulate_movx(void *vm, int vcpuid, uint64_t gpa, struct vie *vie) { int error, size; enum vm_reg_name reg; @@ -656,7 +774,7 @@ emulate_movx(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, */ /* get the first operand */ - error = memread(vm, vcpuid, gpa, &val, 1, arg); + error = vie_mmio_read(vie, vm, vcpuid, gpa, &val, 1); if (error) break; @@ -677,7 +795,7 @@ emulate_movx(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, * 0F B7/r movzx r32, r/m16 * REX.W + 0F B7/r movzx r64, r/m16 */ - error = memread(vm, vcpuid, gpa, &val, 2, arg); + error = vie_mmio_read(vie, vm, vcpuid, gpa, &val, 2); if (error) return (error); @@ -699,7 +817,7 @@ emulate_movx(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, */ /* get the first operand */ - error = memread(vm, vcpuid, gpa, &val, 1, arg); + error = vie_mmio_read(vie, vm, vcpuid, gpa, &val, 1); if (error) break; @@ -722,25 +840,27 @@ emulate_movx(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, * Helper function to calculate and validate a linear address. */ static int -get_gla(void *vm, int vcpuid, struct vie *vie, struct vm_guest_paging *paging, - int opsize, int addrsize, int prot, enum vm_reg_name seg, - enum vm_reg_name gpr, uint64_t *gla, int *fault) +vie_get_gla(struct vie *vie, void *vm, int vcpuid, int opsize, int addrsize, + int prot, enum vm_reg_name seg, enum vm_reg_name gpr, uint64_t *gla) { struct seg_desc desc; uint64_t cr0, val, rflags; int error; + struct vm_guest_paging *paging; + + paging = &vie->paging; - error = vie_read_register(vm, vcpuid, VM_REG_GUEST_CR0, &cr0); + error = vm_get_register(vm, vcpuid, VM_REG_GUEST_CR0, &cr0); KASSERT(error == 0, ("%s: error %d getting cr0", __func__, error)); - error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags); + error = vm_get_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags); KASSERT(error == 0, ("%s: error %d getting rflags", __func__, error)); error = vm_get_seg_desc(vm, vcpuid, seg, &desc); KASSERT(error == 0, ("%s: error %d getting segment descriptor %d", __func__, error, seg)); - error = vie_read_register(vm, vcpuid, gpr, &val); + error = vm_get_register(vm, vcpuid, gpr, &val); KASSERT(error == 0, ("%s: error %d getting register %d", __func__, error, gpr)); @@ -750,7 +870,7 @@ get_gla(void *vm, int vcpuid, struct vie *vie, struct vm_guest_paging *paging, vm_inject_ss(vm, vcpuid, 0); else vm_inject_gp(vm, vcpuid); - goto guest_fault; + return (-1); } if (vie_canonical_check(paging->cpu_mode, *gla)) { @@ -758,39 +878,30 @@ get_gla(void *vm, int vcpuid, struct vie *vie, struct vm_guest_paging *paging, vm_inject_ss(vm, vcpuid, 0); else vm_inject_gp(vm, vcpuid); - goto guest_fault; + return (-1); } if (vie_alignment_check(paging->cpl, opsize, cr0, rflags, *gla)) { vm_inject_ac(vm, vcpuid, 0); - goto guest_fault; + return (-1); } - *fault = 0; - return (0); - -guest_fault: - *fault = 1; return (0); } static int -emulate_movs(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, - struct vm_guest_paging *paging, mem_region_read_t memread, - mem_region_write_t memwrite, void *arg) +emulate_movs(void *vm, int vcpuid, uint64_t gpa, struct vie *vie) { -#ifdef _KERNEL struct vm_copyinfo copyinfo[2]; -#else - struct iovec copyinfo[2]; -#endif uint64_t dstaddr, srcaddr, dstgpa, srcgpa, val; uint64_t rcx, rdi, rsi, rflags; int error, fault, opsize, seg, repeat; + struct vm_guest_paging *paging; opsize = (vie->op.op_byte == 0xA4) ? 1 : vie->opsize; val = 0; error = 0; + paging = &vie->paging; /* * XXX although the MOVS instruction is only supposed to be used with @@ -802,7 +913,7 @@ emulate_movs(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, repeat = vie->repz_present | vie->repnz_present; if (repeat) { - error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RCX, &rcx); + error = vm_get_register(vm, vcpuid, VM_REG_GUEST_RCX, &rcx); KASSERT(!error, ("%s: error %d getting rcx", __func__, error)); /* @@ -832,10 +943,10 @@ emulate_movs(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, */ seg = vie->segment_override ? vie->segment_register : VM_REG_GUEST_DS; - error = get_gla(vm, vcpuid, vie, paging, opsize, vie->addrsize, - PROT_READ, seg, VM_REG_GUEST_RSI, &srcaddr, &fault); - if (error || fault) + if (vie_get_gla(vie, vm, vcpuid, opsize, vie->addrsize, PROT_READ, seg, + VM_REG_GUEST_RSI, &srcaddr) != 0) { goto done; + } error = vm_copy_setup(vm, vcpuid, paging, srcaddr, opsize, PROT_READ, copyinfo, nitems(copyinfo), &fault); @@ -848,7 +959,7 @@ emulate_movs(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, */ vm_copyin(vm, vcpuid, copyinfo, &val, opsize); vm_copy_teardown(vm, vcpuid, copyinfo, nitems(copyinfo)); - error = memwrite(vm, vcpuid, gpa, val, opsize, arg); + error = vie_mmio_write(vie, vm, vcpuid, gpa, val, opsize); if (error) goto done; } else { @@ -857,11 +968,11 @@ emulate_movs(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, * if 'srcaddr' is in the mmio space. */ - error = get_gla(vm, vcpuid, vie, paging, opsize, vie->addrsize, - PROT_WRITE, VM_REG_GUEST_ES, VM_REG_GUEST_RDI, &dstaddr, - &fault); - if (error || fault) + if (vie_get_gla(vie, vm, vcpuid, opsize, vie->addrsize, + PROT_WRITE, VM_REG_GUEST_ES, VM_REG_GUEST_RDI, + &dstaddr) != 0) { goto done; + } error = vm_copy_setup(vm, vcpuid, paging, dstaddr, opsize, PROT_WRITE, copyinfo, nitems(copyinfo), &fault); @@ -878,7 +989,8 @@ emulate_movs(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, * injected into the guest then it will happen * before the MMIO read is attempted. */ - error = memread(vm, vcpuid, gpa, &val, opsize, arg); + error = vie_mmio_read(vie, vm, vcpuid, gpa, &val, + opsize); if (error) goto done; @@ -903,23 +1015,25 @@ emulate_movs(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, if (error || fault) goto done; - error = memread(vm, vcpuid, srcgpa, &val, opsize, arg); + error = vie_mmio_read(vie, vm, vcpuid, srcgpa, &val, + opsize); if (error) goto done; - error = memwrite(vm, vcpuid, dstgpa, val, opsize, arg); + error = vie_mmio_write(vie, vm, vcpuid, dstgpa, val, + opsize); if (error) goto done; } } - error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RSI, &rsi); + error = vm_get_register(vm, vcpuid, VM_REG_GUEST_RSI, &rsi); KASSERT(error == 0, ("%s: error %d getting rsi", __func__, error)); - error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RDI, &rdi); + error = vm_get_register(vm, vcpuid, VM_REG_GUEST_RDI, &rdi); KASSERT(error == 0, ("%s: error %d getting rdi", __func__, error)); - error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags); + error = vm_get_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags); KASSERT(error == 0, ("%s: error %d getting rflags", __func__, error)); if (rflags & PSL_D) { @@ -948,18 +1062,14 @@ emulate_movs(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, * Repeat the instruction if the count register is not zero. */ if ((rcx & vie_size2mask(vie->addrsize)) != 0) - vm_restart_instruction(vm, vcpuid); + return (vie_repeat(vie)); } done: - KASSERT(error == 0 || error == EFAULT, ("%s: unexpected error %d", - __func__, error)); return (error); } static int -emulate_stos(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, - struct vm_guest_paging *paging, mem_region_read_t memread, - mem_region_write_t memwrite, void *arg) +emulate_stos(void *vm, int vcpuid, uint64_t gpa, struct vie *vie) { int error, opsize, repeat; uint64_t val; @@ -969,7 +1079,7 @@ emulate_stos(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, repeat = vie->repz_present | vie->repnz_present; if (repeat) { - error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RCX, &rcx); + error = vm_get_register(vm, vcpuid, VM_REG_GUEST_RCX, &rcx); KASSERT(!error, ("%s: error %d getting rcx", __func__, error)); /* @@ -980,17 +1090,17 @@ emulate_stos(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, return (0); } - error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RAX, &val); + error = vm_get_register(vm, vcpuid, VM_REG_GUEST_RAX, &val); KASSERT(!error, ("%s: error %d getting rax", __func__, error)); - error = memwrite(vm, vcpuid, gpa, val, opsize, arg); + error = vie_mmio_write(vie, vm, vcpuid, gpa, val, opsize); if (error) return (error); - error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RDI, &rdi); + error = vm_get_register(vm, vcpuid, VM_REG_GUEST_RDI, &rdi); KASSERT(error == 0, ("%s: error %d getting rdi", __func__, error)); - error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags); + error = vm_get_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags); KASSERT(error == 0, ("%s: error %d getting rflags", __func__, error)); if (rflags & PSL_D) @@ -1012,15 +1122,14 @@ emulate_stos(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, * Repeat the instruction if the count register is not zero. */ if ((rcx & vie_size2mask(vie->addrsize)) != 0) - vm_restart_instruction(vm, vcpuid); + return (vie_repeat(vie)); } return (0); } static int -emulate_and(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, - mem_region_read_t memread, mem_region_write_t memwrite, void *arg) +emulate_and(void *vm, int vcpuid, uint64_t gpa, struct vie *vie) { int error, size; enum vm_reg_name reg; @@ -1042,12 +1151,12 @@ emulate_and(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, /* get the first operand */ reg = gpr_map[vie->reg]; - error = vie_read_register(vm, vcpuid, reg, &val1); + error = vm_get_register(vm, vcpuid, reg, &val1); if (error) break; /* get the second operand */ - error = memread(vm, vcpuid, gpa, &val2, size, arg); + error = vie_mmio_read(vie, vm, vcpuid, gpa, &val2, size); if (error) break; @@ -1071,7 +1180,7 @@ emulate_and(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, */ /* get the first operand */ - error = memread(vm, vcpuid, gpa, &val1, size, arg); + error = vie_mmio_read(vie, vm, vcpuid, gpa, &val1, size); if (error) break; @@ -1080,7 +1189,7 @@ emulate_and(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, * operand and write the result */ result = val1 & vie->immediate; - error = memwrite(vm, vcpuid, gpa, result, size, arg); + error = vie_mmio_write(vie, vm, vcpuid, gpa, result, size); break; default: break; @@ -1088,7 +1197,7 @@ emulate_and(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, if (error) return (error); - error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags); + error = vm_get_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags); if (error) return (error); @@ -1107,8 +1216,7 @@ emulate_and(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, } static int -emulate_or(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, - mem_region_read_t memread, mem_region_write_t memwrite, void *arg) +emulate_or(void *vm, int vcpuid, uint64_t gpa, struct vie *vie) { int error, size; enum vm_reg_name reg; @@ -1130,12 +1238,12 @@ emulate_or(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, /* get the first operand */ reg = gpr_map[vie->reg]; - error = vie_read_register(vm, vcpuid, reg, &val1); + error = vm_get_register(vm, vcpuid, reg, &val1); if (error) break; - + /* get the second operand */ - error = memread(vm, vcpuid, gpa, &val2, size, arg); + error = vie_mmio_read(vie, vm, vcpuid, gpa, &val2, size); if (error) break; @@ -1159,7 +1267,7 @@ emulate_or(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, */ /* get the first operand */ - error = memread(vm, vcpuid, gpa, &val1, size, arg); + error = vie_mmio_read(vie, vm, vcpuid, gpa, &val1, size); if (error) break; @@ -1168,7 +1276,7 @@ emulate_or(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, * operand and write the result */ result = val1 | vie->immediate; - error = memwrite(vm, vcpuid, gpa, result, size, arg); + error = vie_mmio_write(vie, vm, vcpuid, gpa, result, size); break; default: break; @@ -1176,7 +1284,7 @@ emulate_or(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, if (error) return (error); - error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags); + error = vm_get_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags); if (error) return (error); @@ -1195,8 +1303,7 @@ emulate_or(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, } static int -emulate_cmp(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, - mem_region_read_t memread, mem_region_write_t memwrite, void *arg) +emulate_cmp(void *vm, int vcpuid, uint64_t gpa, struct vie *vie) { int error, size; uint64_t regop, memop, op1, op2, rflags, rflags2; @@ -1223,12 +1330,12 @@ emulate_cmp(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, /* Get the register operand */ reg = gpr_map[vie->reg]; - error = vie_read_register(vm, vcpuid, reg, ®op); + error = vm_get_register(vm, vcpuid, reg, ®op); if (error) return (error); /* Get the memory operand */ - error = memread(vm, vcpuid, gpa, &memop, size, arg); + error = vie_mmio_read(vie, vm, vcpuid, gpa, &memop, size); if (error) return (error); @@ -1267,7 +1374,7 @@ emulate_cmp(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, size = 1; /* get the first operand */ - error = memread(vm, vcpuid, gpa, &op1, size, arg); + error = vie_mmio_read(vie, vm, vcpuid, gpa, &op1, size); if (error) return (error); @@ -1276,7 +1383,7 @@ emulate_cmp(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, default: return (EINVAL); } - error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags); + error = vm_get_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags); if (error) return (error); rflags &= ~RFLAGS_STATUS_BITS; @@ -1287,8 +1394,7 @@ emulate_cmp(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, } static int -emulate_test(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, - mem_region_read_t memread, mem_region_write_t memwrite, void *arg) +emulate_test(void *vm, int vcpuid, uint64_t gpa, struct vie *vie) { int error, size; uint64_t op1, rflags, rflags2; @@ -1311,7 +1417,7 @@ emulate_test(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, if ((vie->reg & 7) != 0) return (EINVAL); - error = memread(vm, vcpuid, gpa, &op1, size, arg); + error = vie_mmio_read(vie, vm, vcpuid, gpa, &op1, size); if (error) return (error); @@ -1320,7 +1426,7 @@ emulate_test(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, default: return (EINVAL); } - error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags); + error = vm_get_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags); if (error) return (error); @@ -1336,16 +1442,16 @@ emulate_test(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, } static int -emulate_bextr(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, - struct vm_guest_paging *paging, mem_region_read_t memread, - mem_region_write_t memwrite, void *arg) +emulate_bextr(void *vm, int vcpuid, uint64_t gpa, struct vie *vie) { uint64_t src1, src2, dst, rflags; unsigned start, len; int error, size; + struct vm_guest_paging *paging; size = vie->opsize; error = EINVAL; + paging = &vie->paging; /* * VEX.LZ.0F38.W0 F7 /r BEXTR r32a, r/m32, r32b @@ -1364,13 +1470,13 @@ emulate_bextr(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, * operand) using an index and length specified in the second /source/ * operand (third operand). */ - error = memread(vm, vcpuid, gpa, &src1, size, arg); + error = vie_mmio_read(vie, vm, vcpuid, gpa, &src1, size); if (error) return (error); - error = vie_read_register(vm, vcpuid, gpr_map[vie->vex_reg], &src2); + error = vm_get_register(vm, vcpuid, gpr_map[vie->vex_reg], &src2); if (error) return (error); - error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags); + error = vm_get_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags); if (error) return (error); @@ -1413,8 +1519,7 @@ done: } static int -emulate_add(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, - mem_region_read_t memread, mem_region_write_t memwrite, void *arg) +emulate_add(void *vm, int vcpuid, uint64_t gpa, struct vie *vie) { int error, size; uint64_t nval, rflags, rflags2, val1, val2; @@ -1435,12 +1540,12 @@ emulate_add(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, /* get the first operand */ reg = gpr_map[vie->reg]; - error = vie_read_register(vm, vcpuid, reg, &val1); + error = vm_get_register(vm, vcpuid, reg, &val1); if (error) break; /* get the second operand */ - error = memread(vm, vcpuid, gpa, &val2, size, arg); + error = vie_mmio_read(vie, vm, vcpuid, gpa, &val2, size); if (error) break; @@ -1454,7 +1559,7 @@ emulate_add(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, if (!error) { rflags2 = getaddflags(size, val1, val2); - error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, + error = vm_get_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags); if (error) return (error); @@ -1469,8 +1574,7 @@ emulate_add(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, } static int -emulate_sub(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, - mem_region_read_t memread, mem_region_write_t memwrite, void *arg) +emulate_sub(void *vm, int vcpuid, uint64_t gpa, struct vie *vie) { int error, size; uint64_t nval, rflags, rflags2, val1, val2; @@ -1483,7 +1587,7 @@ emulate_sub(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, case 0x2B: /* * SUB r/m from r and store the result in r - * + * * 2B/r SUB r16, r/m16 * 2B/r SUB r32, r/m32 * REX.W + 2B/r SUB r64, r/m64 @@ -1491,12 +1595,12 @@ emulate_sub(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, /* get the first operand */ reg = gpr_map[vie->reg]; - error = vie_read_register(vm, vcpuid, reg, &val1); + error = vm_get_register(vm, vcpuid, reg, &val1); if (error) break; /* get the second operand */ - error = memread(vm, vcpuid, gpa, &val2, size, arg); + error = vie_mmio_read(vie, vm, vcpuid, gpa, &val2, size); if (error) break; @@ -1510,7 +1614,7 @@ emulate_sub(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, if (!error) { rflags2 = getcc(size, val1, val2); - error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, + error = vm_get_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags); if (error) return (error); @@ -1525,22 +1629,18 @@ emulate_sub(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, } static int -emulate_stack_op(void *vm, int vcpuid, uint64_t mmio_gpa, struct vie *vie, - struct vm_guest_paging *paging, mem_region_read_t memread, - mem_region_write_t memwrite, void *arg) +emulate_stack_op(void *vm, int vcpuid, uint64_t mmio_gpa, struct vie *vie) { -#ifdef _KERNEL struct vm_copyinfo copyinfo[2]; -#else - struct iovec copyinfo[2]; -#endif struct seg_desc ss_desc; uint64_t cr0, rflags, rsp, stack_gla, val; int error, fault, size, stackaddrsize, pushop; + struct vm_guest_paging *paging; val = 0; size = vie->opsize; pushop = (vie->op.op_type == VIE_OP_TYPE_PUSH) ? 1 : 0; + paging = &vie->paging; /* * From "Address-Size Attributes for Stack Accesses", Intel SDL, Vol 1 @@ -1572,13 +1672,13 @@ emulate_stack_op(void *vm, int vcpuid, uint64_t mmio_gpa, struct vie *vie, stackaddrsize = 2; } - error = vie_read_register(vm, vcpuid, VM_REG_GUEST_CR0, &cr0); + error = vm_get_register(vm, vcpuid, VM_REG_GUEST_CR0, &cr0); KASSERT(error == 0, ("%s: error %d getting cr0", __func__, error)); - error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags); + error = vm_get_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags); KASSERT(error == 0, ("%s: error %d getting rflags", __func__, error)); - error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RSP, &rsp); + error = vm_get_register(vm, vcpuid, VM_REG_GUEST_RSP, &rsp); KASSERT(error == 0, ("%s: error %d getting rsp", __func__, error)); if (pushop) { rsp -= size; @@ -1608,12 +1708,12 @@ emulate_stack_op(void *vm, int vcpuid, uint64_t mmio_gpa, struct vie *vie, return (error); if (pushop) { - error = memread(vm, vcpuid, mmio_gpa, &val, size, arg); + error = vie_mmio_read(vie, vm, vcpuid, mmio_gpa, &val, size); if (error == 0) vm_copyout(vm, vcpuid, &val, copyinfo, size); } else { vm_copyin(vm, vcpuid, copyinfo, &val, size); - error = memwrite(vm, vcpuid, mmio_gpa, val, size, arg); + error = vie_mmio_write(vie, vm, vcpuid, mmio_gpa, val, size); rsp += size; } vm_copy_teardown(vm, vcpuid, copyinfo, nitems(copyinfo)); @@ -1627,9 +1727,7 @@ emulate_stack_op(void *vm, int vcpuid, uint64_t mmio_gpa, struct vie *vie, } static int -emulate_push(void *vm, int vcpuid, uint64_t mmio_gpa, struct vie *vie, - struct vm_guest_paging *paging, mem_region_read_t memread, - mem_region_write_t memwrite, void *arg) +emulate_push(void *vm, int vcpuid, uint64_t mmio_gpa, struct vie *vie) { int error; @@ -1642,15 +1740,12 @@ emulate_push(void *vm, int vcpuid, uint64_t mmio_gpa, struct vie *vie, if ((vie->reg & 7) != 6) return (EINVAL); - error = emulate_stack_op(vm, vcpuid, mmio_gpa, vie, paging, memread, - memwrite, arg); + error = emulate_stack_op(vm, vcpuid, mmio_gpa, vie); return (error); } static int -emulate_pop(void *vm, int vcpuid, uint64_t mmio_gpa, struct vie *vie, - struct vm_guest_paging *paging, mem_region_read_t memread, - mem_region_write_t memwrite, void *arg) +emulate_pop(void *vm, int vcpuid, uint64_t mmio_gpa, struct vie *vie) { int error; @@ -1663,30 +1758,24 @@ emulate_pop(void *vm, int vcpuid, uint64_t mmio_gpa, struct vie *vie, if ((vie->reg & 7) != 0) return (EINVAL); - error = emulate_stack_op(vm, vcpuid, mmio_gpa, vie, paging, memread, - memwrite, arg); + error = emulate_stack_op(vm, vcpuid, mmio_gpa, vie); return (error); } static int -emulate_group1(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, - struct vm_guest_paging *paging, mem_region_read_t memread, - mem_region_write_t memwrite, void *memarg) +emulate_group1(void *vm, int vcpuid, uint64_t gpa, struct vie *vie) { int error; switch (vie->reg & 7) { case 0x1: /* OR */ - error = emulate_or(vm, vcpuid, gpa, vie, - memread, memwrite, memarg); + error = emulate_or(vm, vcpuid, gpa, vie); break; case 0x4: /* AND */ - error = emulate_and(vm, vcpuid, gpa, vie, - memread, memwrite, memarg); + error = emulate_and(vm, vcpuid, gpa, vie); break; case 0x7: /* CMP */ - error = emulate_cmp(vm, vcpuid, gpa, vie, - memread, memwrite, memarg); + error = emulate_cmp(vm, vcpuid, gpa, vie); break; default: error = EINVAL; @@ -1697,8 +1786,7 @@ emulate_group1(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, } static int -emulate_bittest(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, - mem_region_read_t memread, mem_region_write_t memwrite, void *memarg) +emulate_bittest(void *vm, int vcpuid, uint64_t gpa, struct vie *vie) { uint64_t val, rflags; int error, bitmask, bitoff; @@ -1712,10 +1800,10 @@ emulate_bittest(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, if ((vie->reg & 7) != 4) return (EINVAL); - error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags); + error = vm_get_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags); KASSERT(error == 0, ("%s: error %d getting rflags", __func__, error)); - error = memread(vm, vcpuid, gpa, &val, vie->opsize, memarg); + error = vie_mmio_read(vie, vm, vcpuid, gpa, &val, vie->opsize); if (error) return (error); @@ -1739,8 +1827,7 @@ emulate_bittest(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, } static int -emulate_twob_group15(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, - mem_region_read_t memread, mem_region_write_t memwrite, void *memarg) +emulate_twob_group15(void *vm, int vcpuid, uint64_t gpa, struct vie *vie) { int error; uint64_t buf; @@ -1758,7 +1845,7 @@ emulate_twob_group15(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, * CLFLUSH, CLFLUSHOPT. Only check for access * rights. */ - error = memread(vm, vcpuid, gpa, &buf, 1, memarg); + error = vie_mmio_read(vie, vm, vcpuid, gpa, &buf, 1); } break; default: @@ -1769,91 +1856,460 @@ emulate_twob_group15(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, return (error); } +static int +vie_mmio_read(struct vie *vie, struct vm *vm, int cpuid, uint64_t gpa, + uint64_t *rval, int bytes) +{ + int err; + + if (vie->mmio_req_read.state == VR_DONE) { + ASSERT(vie->mmio_req_read.bytes == bytes); + ASSERT(vie->mmio_req_read.gpa == gpa); + + *rval = vie->mmio_req_read.data; + return (0); + } + + err = vm_service_mmio_read(vm, cpuid, gpa, rval, bytes); + if (err == 0) { + /* + * A successful read from an in-kernel-emulated device may come + * with side effects, so stash the result in case it's used for + * an instruction which subsequently needs to issue an MMIO + * write to userspace. + */ + ASSERT(vie->mmio_req_read.state == VR_NONE); + + vie->mmio_req_read.bytes = bytes; + vie->mmio_req_read.gpa = gpa; + vie->mmio_req_read.data = *rval; + vie->mmio_req_read.state = VR_DONE; + + } else if (err == ESRCH) { + /* Hope that userspace emulation can fulfill this read */ + vie->mmio_req_read.bytes = bytes; + vie->mmio_req_read.gpa = gpa; + vie->mmio_req_read.state = VR_PENDING; + vie->status |= VIES_PENDING_MMIO; + } + return (err); +} + +static int +vie_mmio_write(struct vie *vie, struct vm *vm, int cpuid, uint64_t gpa, + uint64_t wval, int bytes) +{ + int err; + + if (vie->mmio_req_write.state == VR_DONE) { + ASSERT(vie->mmio_req_write.bytes == bytes); + ASSERT(vie->mmio_req_write.gpa == gpa); + + return (0); + } + + err = vm_service_mmio_write(vm, cpuid, gpa, wval, bytes); + if (err == 0) { + /* + * A successful write to an in-kernel-emulated device probably + * results in side effects, so stash the fact that such a write + * succeeded in case the operation requires other work. + */ + vie->mmio_req_write.bytes = bytes; + vie->mmio_req_write.gpa = gpa; + vie->mmio_req_write.data = wval; + vie->mmio_req_write.state = VR_DONE; + } else if (err == ESRCH) { + /* Hope that userspace emulation can fulfill this write */ + vie->mmio_req_write.bytes = bytes; + vie->mmio_req_write.gpa = gpa; + vie->mmio_req_write.data = wval; + vie->mmio_req_write.state = VR_PENDING; + vie->status |= VIES_PENDING_MMIO; + } + return (err); +} + int -vmm_emulate_instruction(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, - struct vm_guest_paging *paging, mem_region_read_t memread, - mem_region_write_t memwrite, void *memarg) +vie_emulate_mmio(struct vie *vie, void *vm, int vcpuid) { int error; + uint64_t gpa; - if (!vie->decoded) + if ((vie->status & (VIES_INST_DECODE | VIES_MMIO)) != + (VIES_INST_DECODE | VIES_MMIO)) { return (EINVAL); + } + + gpa = vie->mmio_gpa; switch (vie->op.op_type) { case VIE_OP_TYPE_GROUP1: - error = emulate_group1(vm, vcpuid, gpa, vie, paging, memread, - memwrite, memarg); + error = emulate_group1(vm, vcpuid, gpa, vie); break; case VIE_OP_TYPE_POP: - error = emulate_pop(vm, vcpuid, gpa, vie, paging, memread, - memwrite, memarg); + error = emulate_pop(vm, vcpuid, gpa, vie); break; case VIE_OP_TYPE_PUSH: - error = emulate_push(vm, vcpuid, gpa, vie, paging, memread, - memwrite, memarg); + error = emulate_push(vm, vcpuid, gpa, vie); break; case VIE_OP_TYPE_CMP: - error = emulate_cmp(vm, vcpuid, gpa, vie, - memread, memwrite, memarg); + error = emulate_cmp(vm, vcpuid, gpa, vie); break; case VIE_OP_TYPE_MOV: - error = emulate_mov(vm, vcpuid, gpa, vie, - memread, memwrite, memarg); + error = emulate_mov(vm, vcpuid, gpa, vie); break; case VIE_OP_TYPE_MOVSX: case VIE_OP_TYPE_MOVZX: - error = emulate_movx(vm, vcpuid, gpa, vie, - memread, memwrite, memarg); + error = emulate_movx(vm, vcpuid, gpa, vie); break; case VIE_OP_TYPE_MOVS: - error = emulate_movs(vm, vcpuid, gpa, vie, paging, memread, - memwrite, memarg); + error = emulate_movs(vm, vcpuid, gpa, vie); break; case VIE_OP_TYPE_STOS: - error = emulate_stos(vm, vcpuid, gpa, vie, paging, memread, - memwrite, memarg); + error = emulate_stos(vm, vcpuid, gpa, vie); break; case VIE_OP_TYPE_AND: - error = emulate_and(vm, vcpuid, gpa, vie, - memread, memwrite, memarg); + error = emulate_and(vm, vcpuid, gpa, vie); break; case VIE_OP_TYPE_OR: - error = emulate_or(vm, vcpuid, gpa, vie, - memread, memwrite, memarg); + error = emulate_or(vm, vcpuid, gpa, vie); break; case VIE_OP_TYPE_SUB: - error = emulate_sub(vm, vcpuid, gpa, vie, - memread, memwrite, memarg); + error = emulate_sub(vm, vcpuid, gpa, vie); break; case VIE_OP_TYPE_BITTEST: - error = emulate_bittest(vm, vcpuid, gpa, vie, - memread, memwrite, memarg); + error = emulate_bittest(vm, vcpuid, gpa, vie); break; case VIE_OP_TYPE_TWOB_GRP15: - error = emulate_twob_group15(vm, vcpuid, gpa, vie, - memread, memwrite, memarg); + error = emulate_twob_group15(vm, vcpuid, gpa, vie); break; case VIE_OP_TYPE_ADD: - error = emulate_add(vm, vcpuid, gpa, vie, memread, - memwrite, memarg); + error = emulate_add(vm, vcpuid, gpa, vie); break; case VIE_OP_TYPE_TEST: - error = emulate_test(vm, vcpuid, gpa, vie, - memread, memwrite, memarg); + error = emulate_test(vm, vcpuid, gpa, vie); break; case VIE_OP_TYPE_BEXTR: - error = emulate_bextr(vm, vcpuid, gpa, vie, paging, - memread, memwrite, memarg); + error = emulate_bextr(vm, vcpuid, gpa, vie); break; default: error = EINVAL; break; } + if (error == ESRCH) { + /* Return to userspace with the mmio request */ + return (-1); + } + return (error); } +static int +vie_emulate_inout_port(struct vie *vie, struct vm *vm, int vcpuid) +{ + uint32_t mask, val; + bool in; + int err; + + mask = vie_size2mask(vie->inout.bytes); + in = (vie->inout.flags & INOUT_IN) != 0; + + if (!in) { + val = vie->inout.eax & mask; + } + + if (vie->inout_req_state != VR_DONE) { + err = vm_inout_access(vm, vcpuid, in, vie->inout.port, + vie->inout.bytes, &val); + } else { + /* + * This port access was handled in userspace and the result was + * injected in to be handled now. + */ + val = vie->inout_req_val; + vie->inout_req_state = VR_NONE; + err = 0; + } + + if (err == ESRCH) { + vie->status |= VIES_PENDING_INOUT; + vie->inout_req_state = VR_PENDING; + return (err); + } else if (err != 0) { + return (err); + } + + if (in) { + val &= mask; + val |= (vie->inout.eax & ~mask); + err = vm_set_register(vm, vcpuid, VM_REG_GUEST_RAX, val); + KASSERT(err == 0, ("emulate_ioport: error %d setting guest " + "rax register", err)); + } + return (0); +} + +static enum vm_reg_name +vie_inout_segname(const struct vie *vie) +{ + uint8_t segidx = vie->inout.segment; + const enum vm_reg_name segmap[] = { + VM_REG_GUEST_ES, + VM_REG_GUEST_CS, + VM_REG_GUEST_SS, + VM_REG_GUEST_DS, + VM_REG_GUEST_FS, + VM_REG_GUEST_GS, + }; + const uint8_t maxidx = (sizeof (segmap) / sizeof (segmap[0])); + + if (segidx >= maxidx) { + panic("unexpected segment index %u", segidx); + } + return (segmap[segidx]); +} + +static int +vie_emulate_inout_str(struct vie *vie, struct vm *vm, int vcpuid) +{ + uint8_t bytes, addrsize; + uint64_t index, count = 0, gla, rflags; + int prot, err, fault; + bool in, repeat; + enum vm_reg_name seg_reg, idx_reg; + struct vm_copyinfo copyinfo[2]; + + in = (vie->inout.flags & INOUT_IN) != 0; + bytes = vie->inout.bytes; + addrsize = vie->inout.addrsize; + prot = in ? PROT_WRITE : PROT_READ; + + ASSERT(bytes == 1 || bytes == 2 || bytes == 4); + ASSERT(addrsize == 2 || addrsize == 4 || addrsize == 8); + + idx_reg = (in) ? VM_REG_GUEST_RDI : VM_REG_GUEST_RSI; + seg_reg = vie_inout_segname(vie); + err = vm_get_register(vm, vcpuid, idx_reg, &index); + ASSERT(err == 0); + index = index & vie_size2mask(addrsize); + + repeat = (vie->inout.flags & INOUT_REP) != 0; + + /* Count register */ + if (repeat) { + err = vm_get_register(vm, vcpuid, VM_REG_GUEST_RCX, &count); + count &= vie_size2mask(addrsize); + + if (count == 0) { + /* + * If we were asked to emulate a REP INS/OUTS when the + * count register is zero, no further work is required. + */ + return (0); + } + } else { + count = 1; + } + + gla = 0; + if (vie_get_gla(vie, vm, vcpuid, bytes, addrsize, prot, seg_reg, + idx_reg, &gla) != 0) { + /* vie_get_gla() already injected the appropriate fault */ + return (0); + } + + /* + * The INS/OUTS emulate currently assumes that the memory target resides + * within the guest system memory, rather than a device MMIO region. If + * such a case becomes a necessity, that additional handling could be + * put in place. + */ + err = vm_copy_setup(vm, vcpuid, &vie->paging, gla, bytes, prot, + copyinfo, nitems(copyinfo), &fault); + + if (err) { + /* Unrecoverable error */ + return (err); + } else if (fault) { + /* Resume guest to handle fault */ + return (0); + } + + if (!in) { + vm_copyin(vm, vcpuid, copyinfo, &vie->inout.eax, bytes); + } + + err = vie_emulate_inout_port(vie, vm, vcpuid); + + if (err == 0 && in) { + vm_copyout(vm, vcpuid, &vie->inout.eax, copyinfo, bytes); + } + + vm_copy_teardown(vm, vcpuid, copyinfo, nitems(copyinfo)); + + if (err == 0) { + err = vm_get_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, + &rflags); + ASSERT(err == 0); + + /* Update index */ + if (rflags & PSL_D) { + index -= bytes; + } else { + index += bytes; + } + + /* Update index register */ + err = vie_update_register(vm, vcpuid, idx_reg, index, addrsize); + ASSERT(err == 0); + + /* + * Update count register only if the instruction had a repeat + * prefix. + */ + if ((vie->inout.flags & INOUT_REP) != 0) { + count--; + err = vie_update_register(vm, vcpuid, VM_REG_GUEST_RCX, + count, addrsize); + ASSERT(err == 0); + + if (count != 0) { + return (vie_repeat(vie)); + } + } + } + + return (err); +} + int +vie_emulate_inout(struct vie *vie, struct vm *vm, int vcpuid) +{ + int err = 0; + + if ((vie->status & VIES_INOUT) == 0) { + return (EINVAL); + } + + if ((vie->inout.flags & INOUT_STR) == 0) { + /* + * For now, using the 'rep' prefixes with plain (non-string) + * in/out is not supported. + */ + if ((vie->inout.flags & INOUT_REP) != 0) { + return (EINVAL); + } + + err = vie_emulate_inout_port(vie, vm, vcpuid); + + if (err == ESRCH) { + ASSERT(vie->status & VIES_PENDING_INOUT); + /* Return to userspace with the in/out request */ + err = -1; + } + } else { + vie->status &= ~VIES_REPEAT; + err = vie_emulate_inout_str(vie, vm, vcpuid); + + if (err == ESRCH) { + ASSERT(vie->status & VIES_PENDING_INOUT); + /* Return to userspace with the in/out request */ + err = -1; + } + } + + return (err); +} + +void +vie_reset(struct vie *vie) +{ + vie->status = 0; + vie->num_processed = vie->num_valid = 0; +} + +void +vie_advance_pc(struct vie *vie, uint64_t *nextrip) +{ + VERIFY((vie->status & VIES_REPEAT) == 0); + + *nextrip += vie->num_processed; + vie_reset(vie); +} + +void +vie_exitinfo(const struct vie *vie, struct vm_exit *vme) +{ + if (vie->status & VIES_MMIO) { + vme->exitcode = VM_EXITCODE_MMIO; + if (vie->mmio_req_read.state == VR_PENDING) { + vme->u.mmio.gpa = vie->mmio_req_read.gpa; + vme->u.mmio.data = 0; + vme->u.mmio.bytes = vie->mmio_req_read.bytes; + vme->u.mmio.read = 1; + } else if (vie->mmio_req_write.state == VR_PENDING) { + vme->u.mmio.gpa = vie->mmio_req_write.gpa; + vme->u.mmio.data = vie->mmio_req_write.data & + vie_size2mask(vie->mmio_req_write.bytes); + vme->u.mmio.bytes = vie->mmio_req_write.bytes; + vme->u.mmio.read = 0; + } else { + panic("bad pending MMIO state"); + } + } else if (vie->status & VIES_INOUT) { + vme->exitcode = VM_EXITCODE_INOUT; + vme->u.inout.port = vie->inout.port; + vme->u.inout.bytes = vie->inout.bytes; + if ((vie->inout.flags & INOUT_IN) != 0) { + vme->u.inout.flags = INOUT_IN; + vme->u.inout.eax = 0; + } else { + vme->u.inout.flags = 0; + vme->u.inout.eax = vie->inout.eax & + vie_size2mask(vie->inout.bytes); + } + } else { + panic("no pending operation"); + } +} + +/* + * In the case of a decoding or verification failure, bailing out to userspace + * to do the instruction emulation is our only option for now. + */ +void +vie_fallback_exitinfo(const struct vie *vie, struct vm_exit *vme) +{ + if ((vie->status & VIES_INST_FETCH) == 0) { + bzero(&vme->u.inst_emul, sizeof (vme->u.inst_emul)); + } else { + ASSERT(sizeof (vie->inst) == sizeof (vme->u.inst_emul.inst)); + + bcopy(vie->inst, vme->u.inst_emul.inst, sizeof (vie->inst)); + vme->u.inst_emul.num_valid = vie->num_valid; + } + vme->exitcode = VM_EXITCODE_INST_EMUL; +} + +bool +vie_pending(const struct vie *vie) +{ + return ((vie->status & (VIES_PENDING_MMIO|VIES_PENDING_INOUT)) != 0); +} + +bool +vie_needs_fetch(const struct vie *vie) +{ + if (vie->status & VIES_INST_FETCH) { + ASSERT(vie->num_valid != 0); + return (false); + } + return (true); +} + +static int vie_alignment_check(int cpl, int size, uint64_t cr0, uint64_t rf, uint64_t gla) { KASSERT(size == 1 || size == 2 || size == 4 || size == 8, @@ -1866,7 +2322,7 @@ vie_alignment_check(int cpl, int size, uint64_t cr0, uint64_t rf, uint64_t gla) return ((gla & (size - 1)) ? 1 : 0); } -int +static int vie_canonical_check(enum vm_cpu_mode cpu_mode, uint64_t gla) { uint64_t mask; @@ -1885,7 +2341,7 @@ vie_canonical_check(enum vm_cpu_mode cpu_mode, uint64_t gla) return ((gla & mask) != 0); } -uint64_t +static uint64_t vie_size2mask(int size) { KASSERT(size == 1 || size == 2 || size == 4 || size == 8, @@ -1893,7 +2349,7 @@ vie_size2mask(int size) return (size2mask[size]); } -int +static int vie_calculate_gla(enum vm_cpu_mode cpu_mode, enum vm_reg_name seg, struct seg_desc *desc, uint64_t offset, int length, int addrsize, int prot, uint64_t *gla) @@ -1905,13 +2361,8 @@ vie_calculate_gla(enum vm_cpu_mode cpu_mode, enum vm_reg_name seg, ("%s: invalid segment %d", __func__, seg)); KASSERT(length == 1 || length == 2 || length == 4 || length == 8, ("%s: invalid operand size %d", __func__, length)); -#ifdef __FreeBSD__ - KASSERT((prot & ~(PROT_READ | PROT_WRITE)) == 0, - ("%s: invalid prot %#x", __func__, prot)); -#else KASSERT((prot & ~(PROT_READ | PROT_WRITE)) == 0, ("%s: invalid prot %x", __func__, prot)); -#endif firstoff = offset; if (cpu_mode == CPU_MODE_64BIT) { @@ -1930,31 +2381,21 @@ vie_calculate_gla(enum vm_cpu_mode cpu_mode, enum vm_reg_name seg, if (SEG_DESC_UNUSABLE(desc->access)) return (-1); - /* + /* * The processor generates a #NP exception when a segment * register is loaded with a selector that points to a * descriptor that is not present. If this was the case then * it would have been checked before the VM-exit. */ -#ifdef __FreeBSD__ - KASSERT(SEG_DESC_PRESENT(desc->access), - ("segment %d not present: %#x", seg, desc->access)); -#else KASSERT(SEG_DESC_PRESENT(desc->access), ("segment %d not present: %x", seg, desc->access)); -#endif /* * The descriptor type must indicate a code/data segment. */ type = SEG_DESC_TYPE(desc->access); -#ifdef __FreeBSD__ - KASSERT(type >= 16 && type <= 31, ("segment %d has invalid " - "descriptor type %#x", seg, type)); -#else KASSERT(type >= 16 && type <= 31, ("segment %d has invalid " "descriptor type %x", seg, type)); -#endif if (prot & PROT_READ) { /* #GP on a read access to a exec-only code segment */ @@ -2019,24 +2460,107 @@ vie_calculate_gla(enum vm_cpu_mode cpu_mode, enum vm_reg_name seg, } void -vie_init(struct vie *vie, const char *inst_bytes, int inst_length) +vie_init_mmio(struct vie *vie, const char *inst_bytes, uint8_t inst_length, + const struct vm_guest_paging *paging, uint64_t gpa) { - KASSERT(inst_length >= 0 && inst_length <= VIE_INST_SIZE, + KASSERT(inst_length <= VIE_INST_SIZE, ("%s: invalid instruction length (%d)", __func__, inst_length)); - bzero(vie, sizeof(struct vie)); + bzero(vie, sizeof (struct vie)); vie->base_register = VM_REG_LAST; vie->index_register = VM_REG_LAST; vie->segment_register = VM_REG_LAST; + vie->status = VIES_INIT | VIES_MMIO; - if (inst_length) { + if (inst_length != 0) { bcopy(inst_bytes, vie->inst, inst_length); vie->num_valid = inst_length; + vie->status |= VIES_INST_FETCH; + } + + vie->paging = *paging; + vie->mmio_gpa = gpa; +} + +void +vie_init_inout(struct vie *vie, const struct vm_inout *inout, uint8_t inst_len, + const struct vm_guest_paging *paging) +{ + bzero(vie, sizeof (struct vie)); + + vie->status = VIES_INIT | VIES_INOUT; + + vie->inout = *inout; + vie->paging = *paging; + + /* + * Since VMX/SVM assists already decoded the nature of the in/out + * instruction, let the status reflect that. + */ + vie->status |= VIES_INST_FETCH | VIES_INST_DECODE; + vie->num_processed = inst_len; +} + +int +vie_fulfill_mmio(struct vie *vie, const struct vm_mmio *result) +{ + struct vie_mmio *pending; + + if ((vie->status & VIES_MMIO) == 0 || + (vie->status & VIES_PENDING_MMIO) == 0) { + return (EINVAL); + } + + if (result->read) { + pending = &vie->mmio_req_read; + } else { + pending = &vie->mmio_req_write; + } + + if (pending->state != VR_PENDING || + pending->bytes != result->bytes || pending->gpa != result->gpa) { + return (EINVAL); + } + + if (result->read) { + pending->data = result->data & vie_size2mask(pending->bytes); + } + pending->state = VR_DONE; + vie->status &= ~VIES_PENDING_MMIO; + + return (0); +} + +int +vie_fulfill_inout(struct vie *vie, const struct vm_inout *result) +{ + if ((vie->status & VIES_INOUT) == 0 || + (vie->status & VIES_PENDING_INOUT) == 0) { + return (EINVAL); } + if ((vie->inout.flags & INOUT_IN) != (result->flags & INOUT_IN) || + vie->inout.bytes != result->bytes || + vie->inout.port != result->port) { + return (EINVAL); + } + + if (result->flags & INOUT_IN) { + vie->inout_req_val = result->eax & + vie_size2mask(vie->inout.bytes); + } + vie->inout_req_state = VR_DONE; + vie->status &= ~(VIES_PENDING_INOUT); + + return (0); +} + +uint64_t +vie_mmio_gpa(const struct vie *vie) +{ + return (vie->mmio_gpa); } -#ifdef _KERNEL static int pf_error_code(int usermode, int prot, int rsvd, uint64_t pte) { @@ -2299,27 +2823,28 @@ vm_gla2gpa_nofault(struct vm *vm, int vcpuid, struct vm_guest_paging *paging, } int -vmm_fetch_instruction(struct vm *vm, int vcpuid, struct vm_guest_paging *paging, - uint64_t rip, int inst_length, struct vie *vie, int *faultptr) +vie_fetch_instruction(struct vie *vie, struct vm *vm, int vcpuid, uint64_t rip, + int *faultptr) { struct vm_copyinfo copyinfo[2]; int error, prot; - if (inst_length > VIE_INST_SIZE) - panic("vmm_fetch_instruction: invalid length %d", inst_length); + if (vie->status != (VIES_INIT|VIES_MMIO)) { + return (EINVAL); + } prot = PROT_READ | PROT_EXEC; - error = vm_copy_setup(vm, vcpuid, paging, rip, inst_length, prot, - copyinfo, nitems(copyinfo), faultptr); + error = vm_copy_setup(vm, vcpuid, &vie->paging, rip, VIE_INST_SIZE, + prot, copyinfo, nitems(copyinfo), faultptr); if (error || *faultptr) return (error); - vm_copyin(vm, vcpuid, copyinfo, vie->inst, inst_length); + vm_copyin(vm, vcpuid, copyinfo, vie->inst, VIE_INST_SIZE); vm_copy_teardown(vm, vcpuid, copyinfo, nitems(copyinfo)); - vie->num_valid = inst_length; + vie->num_valid = VIE_INST_SIZE; + vie->status |= VIES_INST_FETCH; return (0); } -#endif /* _KERNEL */ static int vie_peek(struct vie *vie, uint8_t *x) @@ -2821,23 +3346,28 @@ decode_moffset(struct vie *vie) return (0); } -#ifdef _KERNEL /* * Verify that the 'guest linear address' provided as collateral of the nested * page table fault matches with our instruction decoding. */ -static int -verify_gla(struct vm *vm, int cpuid, uint64_t gla, struct vie *vie, - enum vm_cpu_mode cpu_mode) +int +vie_verify_gla(struct vie *vie, struct vm *vm, int cpuid, uint64_t gla) { int error; uint64_t base, segbase, idx, gla2; enum vm_reg_name seg; struct seg_desc desc; - /* Skip 'gla' verification */ - if (gla == VIE_INVALID_GLA) + ASSERT((vie->status & VIES_INST_DECODE) != 0); + + /* + * If there was no valid GLA context with the exit, or the decoded + * instruction acts on more than one address, verification is done. + */ + if (gla == VIE_INVALID_GLA || + (vie->op.op_flags & VIE_OP_F_NO_GLA_VERIFICATION) != 0) { return (0); + } base = 0; if (vie->base_register != VM_REG_LAST) { @@ -2879,15 +3409,16 @@ verify_gla(struct vm *vm, int cpuid, uint64_t gla, struct vie *vie, * string destination the DS segment is the default. These * can be overridden to allow other segments to be accessed. */ - if (vie->segment_override) + if (vie->segment_override) { seg = vie->segment_register; - else if (vie->base_register == VM_REG_GUEST_RSP || - vie->base_register == VM_REG_GUEST_RBP) + } else if (vie->base_register == VM_REG_GUEST_RSP || + vie->base_register == VM_REG_GUEST_RBP) { seg = VM_REG_GUEST_SS; - else + } else { seg = VM_REG_GUEST_DS; - if (cpu_mode == CPU_MODE_64BIT && seg != VM_REG_GUEST_FS && - seg != VM_REG_GUEST_GS) { + } + if (vie->paging.cpu_mode == CPU_MODE_64BIT && + seg != VM_REG_GUEST_FS && seg != VM_REG_GUEST_GS) { segbase = 0; } else { error = vm_get_seg_desc(vm, cpuid, seg, &desc); @@ -2913,16 +3444,17 @@ verify_gla(struct vm *vm, int cpuid, uint64_t gla, struct vie *vie, return (0); } -#endif /* _KERNEL */ int -#ifdef _KERNEL -vmm_decode_instruction(struct vm *vm, int cpuid, uint64_t gla, - enum vm_cpu_mode cpu_mode, int cs_d, struct vie *vie) -#else -vmm_decode_instruction(enum vm_cpu_mode cpu_mode, int cs_d, struct vie *vie) -#endif +vie_decode_instruction(struct vie *vie, struct vm *vm, int cpuid, int cs_d) { + enum vm_cpu_mode cpu_mode; + + if ((vie->status & VIES_INST_FETCH) == 0) { + return (EINVAL); + } + + cpu_mode = vie->paging.cpu_mode; if (decode_prefixes(vie, cpu_mode, cs_d)) return (-1); @@ -2945,14 +3477,7 @@ vmm_decode_instruction(enum vm_cpu_mode cpu_mode, int cs_d, struct vie *vie) if (decode_moffset(vie)) return (-1); -#ifdef _KERNEL - if ((vie->op.op_flags & VIE_OP_F_NO_GLA_VERIFICATION) == 0) { - if (verify_gla(vm, cpuid, gla, vie, cpu_mode)) - return (-1); - } -#endif - - vie->decoded = 1; /* success */ + vie->status |= VIES_INST_DECODE; return (0); } diff --git a/usr/src/uts/i86pc/io/vmm/vmm_ioport.c b/usr/src/uts/i86pc/io/vmm/vmm_ioport.c index 3d08fd5e85..01fae7d584 100644 --- a/usr/src/uts/i86pc/io/vmm/vmm_ioport.c +++ b/usr/src/uts/i86pc/io/vmm/vmm_ioport.c @@ -25,6 +25,18 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + * + * Copyright 2020 Oxide Computer Company + */ #include <sys/cdefs.h> __FBSDID("$FreeBSD$"); @@ -33,18 +45,16 @@ __FBSDID("$FreeBSD$"); #include <sys/systm.h> #include <machine/vmm.h> -#include <machine/vmm_instruction_emul.h> #include "vatpic.h" #include "vatpit.h" #include "vpmtmr.h" #include "vrtc.h" #include "vmm_ioport.h" -#include "vmm_ktr.h" #define MAX_IOPORTS 1280 -ioport_handler_func_t ioport_handler[MAX_IOPORTS] = { +static ioport_handler_func_t ioport_handler[MAX_IOPORTS] = { [TIMER_MODE] = vatpit_handler, [TIMER_CNTR0] = vatpit_handler, [TIMER_CNTR1] = vatpit_handler, @@ -61,144 +71,24 @@ ioport_handler_func_t ioport_handler[MAX_IOPORTS] = { [IO_RTC + 1] = vrtc_data_handler, }; -#ifdef KTR -static const char * -inout_instruction(struct vm_exit *vmexit) -{ - int index; - - static const char *iodesc[] = { - "outb", "outw", "outl", - "inb", "inw", "inl", - "outsb", "outsw", "outsd", - "insb", "insw", "insd", - }; - - switch (vmexit->u.inout.bytes) { - case 1: - index = 0; - break; - case 2: - index = 1; - break; - default: - index = 2; - break; - } - - if (vmexit->u.inout.in) - index += 3; - - if (vmexit->u.inout.string) - index += 6; - - KASSERT(index < nitems(iodesc), ("%s: invalid index %d", - __func__, index)); - - return (iodesc[index]); -} -#endif /* KTR */ - -static int -emulate_inout_port(struct vm *vm, int vcpuid, struct vm_exit *vmexit, - bool *retu) +int +vm_inout_access(struct vm *vm, int vcpuid, bool in, uint16_t port, + uint8_t bytes, uint32_t *val) { ioport_handler_func_t handler; - uint32_t mask, val; int error; -#ifdef __FreeBSD__ - /* - * If there is no handler for the I/O port then punt to userspace. - */ - if (vmexit->u.inout.port >= MAX_IOPORTS || - (handler = ioport_handler[vmexit->u.inout.port]) == NULL) { - *retu = true; - return (0); - } -#else /* __FreeBSD__ */ handler = NULL; - if (vmexit->u.inout.port < MAX_IOPORTS) { - handler = ioport_handler[vmexit->u.inout.port]; + if (port < MAX_IOPORTS) { + handler = ioport_handler[port]; } - /* Look for hooks, if a standard handler is not present */ - if (handler == NULL) { - mask = vie_size2mask(vmexit->u.inout.bytes); - if (!vmexit->u.inout.in) { - val = vmexit->u.inout.eax & mask; - } - error = vm_ioport_handle_hook(vm, vcpuid, vmexit->u.inout.in, - vmexit->u.inout.port, vmexit->u.inout.bytes, &val); - if (error == 0) { - goto finish; - } - *retu = true; - return (0); + if (handler != NULL) { + error = (*handler)(vm, vcpuid, in, port, bytes, val); + } else { + /* Look for hooks, if a standard handler is not present */ + error = vm_ioport_handle_hook(vm, vcpuid, in, port, bytes, val); } -#endif /* __FreeBSD__ */ - - mask = vie_size2mask(vmexit->u.inout.bytes); - - if (!vmexit->u.inout.in) { - val = vmexit->u.inout.eax & mask; - } - - error = (*handler)(vm, vcpuid, vmexit->u.inout.in, - vmexit->u.inout.port, vmexit->u.inout.bytes, &val); - if (error) { - /* - * The value returned by this function is also the return value - * of vm_run(). This needs to be a positive number otherwise it - * can be interpreted as a "pseudo-error" like ERESTART. - * - * Enforce this by mapping all errors to EIO. - */ - return (EIO); - } - -#ifndef __FreeBSD__ -finish: -#endif /* __FreeBSD__ */ - if (vmexit->u.inout.in) { - vmexit->u.inout.eax &= ~mask; - vmexit->u.inout.eax |= val & mask; - error = vm_set_register(vm, vcpuid, VM_REG_GUEST_RAX, - vmexit->u.inout.eax); - KASSERT(error == 0, ("emulate_ioport: error %d setting guest " - "rax register", error)); - } - *retu = false; - return (0); -} - -static int -emulate_inout_str(struct vm *vm, int vcpuid, struct vm_exit *vmexit, bool *retu) -{ - *retu = true; - return (0); /* Return to userspace to finish emulation */ -} - -int -vm_handle_inout(struct vm *vm, int vcpuid, struct vm_exit *vmexit, bool *retu) -{ - int bytes, error; - - bytes = vmexit->u.inout.bytes; - KASSERT(bytes == 1 || bytes == 2 || bytes == 4, - ("vm_handle_inout: invalid operand size %d", bytes)); - - if (vmexit->u.inout.string) - error = emulate_inout_str(vm, vcpuid, vmexit, retu); - else - error = emulate_inout_port(vm, vcpuid, vmexit, retu); - - VCPU_CTR4(vm, vcpuid, "%s%s 0x%04x: %s", - vmexit->u.inout.rep ? "rep " : "", - inout_instruction(vmexit), - vmexit->u.inout.port, - error ? "error" : (*retu ? "userspace" : "handled")); - return (error); } diff --git a/usr/src/uts/i86pc/io/vmm/vmm_ioport.h b/usr/src/uts/i86pc/io/vmm/vmm_ioport.h index 14e315f400..7c51906e85 100644 --- a/usr/src/uts/i86pc/io/vmm/vmm_ioport.h +++ b/usr/src/uts/i86pc/io/vmm/vmm_ioport.h @@ -32,8 +32,9 @@ #define _VMM_IOPORT_H_ typedef int (*ioport_handler_func_t)(struct vm *vm, int vcpuid, - bool in, int port, int bytes, uint32_t *val); + bool in, uint16_t port, uint8_t bytes, uint32_t *val); -int vm_handle_inout(struct vm *vm, int vcpuid, struct vm_exit *vme, bool *retu); +int vm_inout_access(struct vm *vm, int vcpuid, bool in, uint16_t port, + uint8_t bytes, uint32_t *val); #endif /* _VMM_IOPORT_H_ */ diff --git a/usr/src/uts/i86pc/io/vmm/vmm_mem.c b/usr/src/uts/i86pc/io/vmm/vmm_mem.c index a736d94bba..cd894dc84d 100644 --- a/usr/src/uts/i86pc/io/vmm/vmm_mem.c +++ b/usr/src/uts/i86pc/io/vmm/vmm_mem.c @@ -100,7 +100,7 @@ vmm_mmio_alloc(struct vmspace *vmspace, vm_paddr_t gpa, size_t len, * has incremented the reference count on the sglist. Dropping the * initial reference count ensures that the sglist will be freed * when the object is deallocated. - * + * * If the object could not be allocated then we end up freeing the * sglist. */ diff --git a/usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c b/usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c index 2f84ac5e95..f05600d6c3 100644 --- a/usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c +++ b/usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c @@ -500,25 +500,27 @@ vmmdev_do_ioctl(vmm_softc_t *sc, int cmd, intptr_t arg, int md, /* Execute the primary logic for the ioctl. */ switch (cmd) { case VM_RUN: { - struct vm_run vmrun; + struct vm_entry entry; - if (ddi_copyin(datap, &vmrun, sizeof (vmrun), md)) { + if (ddi_copyin(datap, &entry, sizeof (entry), md)) { error = EFAULT; break; } - vmrun.cpuid = vcpu; if (!(curthread->t_schedflag & TS_VCPU)) smt_mark_as_vcpu(); - error = vm_run(sc->vmm_vm, &vmrun); - /* - * XXXJOY: I think it's necessary to do copyout, even in the - * face of errors, since the exit state is communicated out. - */ - if (ddi_copyout(&vmrun, datap, sizeof (vmrun), md)) { - error = EFAULT; - break; + error = vm_run(sc->vmm_vm, vcpu, &entry); + + if (error == 0) { + const struct vm_exit *vme; + void *outp = entry.exit_data; + + vme = vm_exitinfo(sc->vmm_vm, vcpu); + if (ddi_copyout(vme, outp, sizeof (*vme), md)) { + error = EFAULT; + break; + } } break; } @@ -982,9 +984,6 @@ vmmdev_do_ioctl(vmm_softc_t *sc, int cmd, intptr_t arg, int md, case VM_GET_KERNEMU_DEV: { struct vm_readwrite_kernemu_device kemu; size_t size = 0; - mem_region_write_t mwrite = NULL; - mem_region_read_t mread = NULL; - uint64_t ignored = 0; if (ddi_copyin(datap, &kemu, sizeof (kemu), md)) { error = EFAULT; @@ -998,31 +997,12 @@ vmmdev_do_ioctl(vmm_softc_t *sc, int cmd, intptr_t arg, int md, size = (1 << kemu.access_width); ASSERT(size >= 1 && size <= 8); - if (kemu.gpa >= DEFAULT_APIC_BASE && - kemu.gpa < DEFAULT_APIC_BASE + PAGE_SIZE) { - mread = lapic_mmio_read; - mwrite = lapic_mmio_write; - } else if (kemu.gpa >= VIOAPIC_BASE && - kemu.gpa < VIOAPIC_BASE + VIOAPIC_SIZE) { - mread = vioapic_mmio_read; - mwrite = vioapic_mmio_write; - } else if (kemu.gpa >= VHPET_BASE && - kemu.gpa < VHPET_BASE + VHPET_SIZE) { - mread = vhpet_mmio_read; - mwrite = vhpet_mmio_write; - } else { - error = EINVAL; - break; - } - if (cmd == VM_SET_KERNEMU_DEV) { - VERIFY(mwrite != NULL); - error = mwrite(sc->vmm_vm, vcpu, kemu.gpa, kemu.value, - size, &ignored); + error = vm_service_mmio_write(sc->vmm_vm, vcpu, + kemu.gpa, kemu.value, size); } else { - VERIFY(mread != NULL); - error = mread(sc->vmm_vm, vcpu, kemu.gpa, &kemu.value, - size, &ignored); + error = vm_service_mmio_read(sc->vmm_vm, vcpu, + kemu.gpa, &kemu.value, size); } if (error == 0) { @@ -2004,6 +1984,11 @@ vmm_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, vmm_softc_t *sc; minor_t minor; + /* The structs in bhyve ioctls assume a 64-bit datamodel */ + if (ddi_model_convert_from(mode & FMODELS) != DDI_MODEL_NONE) { + return (ENOTSUP); + } + minor = getminor(dev); if (minor == VMM_CTL_MINOR) { diff --git a/usr/src/uts/i86pc/io/vmm/vmm_sol_glue.c b/usr/src/uts/i86pc/io/vmm/vmm_sol_glue.c index 2401774ab7..4dcaba8a82 100644 --- a/usr/src/uts/i86pc/io/vmm/vmm_sol_glue.c +++ b/usr/src/uts/i86pc/io/vmm/vmm_sol_glue.c @@ -572,7 +572,7 @@ vmm_sol_glue_cleanup(void) */ #define FEBRUARY 2 -#define days_in_year(y) (leapyear(y) ? 366 : 365) +#define days_in_year(y) (leapyear(y) ? 366 : 365) #define days_in_month(y, m) \ (month_days[(m) - 1] + (m == FEBRUARY ? leapyear(y) : 0)) /* Day of week. Days are counted from 1/1/1970, which was a Thursday */ @@ -644,7 +644,7 @@ clock_ct_to_ts(struct clocktime *ct, struct timespec *ts) /* Months */ for (i = 1; i < ct->mon; i++) - days += days_in_month(year, i); + days += days_in_month(year, i); days += (ct->day - 1); ts->tv_sec = (((time_t)days * 24 + ct->hour) * 60 + ct->min) * 60 + diff --git a/usr/src/uts/i86pc/io/vmm/vmm_stat.c b/usr/src/uts/i86pc/io/vmm/vmm_stat.c index a6af75e40a..42d6f8cfa3 100644 --- a/usr/src/uts/i86pc/io/vmm/vmm_stat.c +++ b/usr/src/uts/i86pc/io/vmm/vmm_stat.c @@ -163,7 +163,7 @@ VMM_STAT(VMEXIT_NMI_WINDOW, "vm exits due to nmi window opening"); VMM_STAT(VMEXIT_INOUT, "number of times in/out was intercepted"); VMM_STAT(VMEXIT_CPUID, "number of times cpuid was intercepted"); VMM_STAT(VMEXIT_NESTED_FAULT, "vm exits due to nested page fault"); -VMM_STAT(VMEXIT_INST_EMUL, "vm exits for instruction emulation"); +VMM_STAT(VMEXIT_MMIO_EMUL, "vm exits for mmio emulation"); VMM_STAT(VMEXIT_UNKNOWN, "number of vm exits for unknown reason"); VMM_STAT(VMEXIT_ASTPENDING, "number of times astpending at exit"); VMM_STAT(VMEXIT_REQIDLE, "number of times idle requested at exit"); diff --git a/usr/src/uts/i86pc/io/vmm/vmm_stat.h b/usr/src/uts/i86pc/io/vmm/vmm_stat.h index 3232e23888..a214ba0fe9 100644 --- a/usr/src/uts/i86pc/io/vmm/vmm_stat.h +++ b/usr/src/uts/i86pc/io/vmm/vmm_stat.h @@ -73,7 +73,7 @@ void vmm_stat_register(void *arg); }; \ SYSINIT(type##_stat, SI_SUB_KLD, SI_ORDER_ANY, vmm_stat_register, type) -#define VMM_STAT_DEFINE(type, nelems, desc, scope) \ +#define VMM_STAT_DEFINE(type, nelems, desc, scope) \ VMM_STAT_FDEFINE(type, nelems, desc, NULL, scope) #define VMM_STAT_DECLARE(type) \ @@ -94,7 +94,7 @@ void vmm_stat_register(void *arg); void *vmm_stat_alloc(void); void vmm_stat_init(void *vp); -void vmm_stat_free(void *vp); +void vmm_stat_free(void *vp); /* * 'buf' should be at least fit 'MAX_VMM_STAT_TYPES' entries @@ -108,7 +108,7 @@ vmm_stat_array_incr(struct vm *vm, int vcpu, struct vmm_stat_type *vst, { #ifdef VMM_KEEP_STATS uint64_t *stats; - + stats = vcpu_stats(vm, vcpu); if (vst->index >= 0 && statidx < vst->nelems) @@ -122,7 +122,7 @@ vmm_stat_array_set(struct vm *vm, int vcpu, struct vmm_stat_type *vst, { #ifdef VMM_KEEP_STATS uint64_t *stats; - + stats = vcpu_stats(vm, vcpu); if (vst->index >= 0 && statidx < vst->nelems) @@ -162,7 +162,7 @@ VMM_STAT_DECLARE(VMEXIT_NMI_WINDOW); VMM_STAT_DECLARE(VMEXIT_INOUT); VMM_STAT_DECLARE(VMEXIT_CPUID); VMM_STAT_DECLARE(VMEXIT_NESTED_FAULT); -VMM_STAT_DECLARE(VMEXIT_INST_EMUL); +VMM_STAT_DECLARE(VMEXIT_MMIO_EMUL); VMM_STAT_DECLARE(VMEXIT_UNKNOWN); VMM_STAT_DECLARE(VMEXIT_ASTPENDING); VMM_STAT_DECLARE(VMEXIT_USERSPACE); diff --git a/usr/src/uts/i86pc/sys/vmm.h b/usr/src/uts/i86pc/sys/vmm.h index 45838e343e..d6d24f0c37 100644 --- a/usr/src/uts/i86pc/sys/vmm.h +++ b/usr/src/uts/i86pc/sys/vmm.h @@ -122,31 +122,13 @@ enum x2apic_state { #define VM_INTINFO_HWEXCEPTION (3 << 8) #define VM_INTINFO_SWINTR (4 << 8) -#ifndef __FreeBSD__ /* * illumos doesn't have a limitation based on SPECNAMELEN like FreeBSD does. * Instead of picking an arbitrary value we will just rely on the same * calculation that's made below. If this calculation ever changes we need to * update the the VM_MAX_NAMELEN mapping in the bhyve brand's boot.c file. */ -#else -/* - * The VM name has to fit into the pathname length constraints of devfs, - * governed primarily by SPECNAMELEN. The length is the total number of - * characters in the full path, relative to the mount point and not - * including any leading '/' characters. - * A prefix and a suffix are added to the name specified by the user. - * The prefix is usually "vmm/" or "vmm.io/", but can be a few characters - * longer for future use. - * The suffix is a string that identifies a bootrom image or some similar - * image that is attached to the VM. A separator character gets added to - * the suffix automatically when generating the full path, so it must be - * accounted for, reducing the effective length by 1. - * The effective length of a VM name is 229 bytes for FreeBSD 13 and 37 - * bytes for FreeBSD 12. A minimum length is set for safety and supports - * a SPECNAMELEN as small as 32 on old systems. - */ -#endif + #define VM_MAX_PREFIXLEN 10 #define VM_MAX_SUFFIXLEN 15 #define VM_MIN_NAMELEN 6 @@ -224,76 +206,6 @@ struct vm_guest_paging { enum vm_paging_mode paging_mode; }; -/* - * The data structures 'vie' and 'vie_op' are meant to be opaque to the - * consumers of instruction decoding. The only reason why their contents - * need to be exposed is because they are part of the 'vm_exit' structure. - */ -struct vie_op { - uint8_t op_byte; /* actual opcode byte */ - uint8_t op_type; /* type of operation (e.g. MOV) */ - uint16_t op_flags; -}; -_Static_assert(sizeof(struct vie_op) == 4, "ABI"); -_Static_assert(_Alignof(struct vie_op) == 2, "ABI"); - -#define VIE_INST_SIZE 15 -struct vie { - uint8_t inst[VIE_INST_SIZE]; /* instruction bytes */ - uint8_t num_valid; /* size of the instruction */ - uint8_t num_processed; - - uint8_t addrsize:4, opsize:4; /* address and operand sizes */ - uint8_t rex_w:1, /* REX prefix */ - rex_r:1, - rex_x:1, - rex_b:1, - rex_present:1, - repz_present:1, /* REP/REPE/REPZ prefix */ - repnz_present:1, /* REPNE/REPNZ prefix */ - opsize_override:1, /* Operand size override */ - addrsize_override:1, /* Address size override */ - segment_override:1; /* Segment override */ - - uint8_t mod:2, /* ModRM byte */ - reg:4, - rm:4; - - uint8_t ss:2, /* SIB byte */ - vex_present:1, /* VEX prefixed */ - vex_l:1, /* L bit */ - index:4, /* SIB byte */ - base:4; /* SIB byte */ - - uint8_t disp_bytes; - uint8_t imm_bytes; - - uint8_t scale; - - uint8_t vex_reg:4, /* vvvv: first source register specifier */ - vex_pp:2, /* pp */ - _sparebits:2; - - uint8_t _sparebytes[2]; - - int base_register; /* VM_REG_GUEST_xyz */ - int index_register; /* VM_REG_GUEST_xyz */ - int segment_register; /* VM_REG_GUEST_xyz */ - - int64_t displacement; /* optional addr displacement */ - int64_t immediate; /* optional immediate operand */ - - uint8_t decoded; /* set to 1 if successfully decoded */ - - uint8_t _sparebyte; - - struct vie_op op; /* opcode description */ -}; -_Static_assert(sizeof(struct vie) == 64, "ABI"); -_Static_assert(__offsetof(struct vie, disp_bytes) == 22, "ABI"); -_Static_assert(__offsetof(struct vie, scale) == 24, "ABI"); -_Static_assert(__offsetof(struct vie, base_register) == 28, "ABI"); - enum vm_exitcode { VM_EXITCODE_INOUT, VM_EXITCODE_VMX, @@ -306,11 +218,11 @@ enum vm_exitcode { VM_EXITCODE_PAGING, VM_EXITCODE_INST_EMUL, VM_EXITCODE_SPINUP_AP, - VM_EXITCODE_DEPRECATED1, /* used to be SPINDOWN_CPU */ + VM_EXITCODE_MMIO_EMUL, VM_EXITCODE_RUNBLOCK, VM_EXITCODE_IOAPIC_EOI, VM_EXITCODE_SUSPENDED, - VM_EXITCODE_INOUT_STR, + VM_EXITCODE_MMIO, VM_EXITCODE_TASK_SWITCH, VM_EXITCODE_MONITOR, VM_EXITCODE_MWAIT, @@ -325,25 +237,38 @@ enum vm_exitcode { VM_EXITCODE_MAX }; +enum inout_flags { + INOUT_IN = (1U << 0), /* direction: 'in' when set, else 'out' */ + + /* + * The following flags are used only for in-kernel emulation logic and + * are not exposed to userspace. + */ + INOUT_STR = (1U << 1), /* ins/outs operation */ + INOUT_REP = (1U << 2), /* 'rep' prefix present on instruction */ +}; + struct vm_inout { - uint16_t bytes:3; /* 1 or 2 or 4 */ - uint16_t in:1; - uint16_t string:1; - uint16_t rep:1; + uint32_t eax; uint16_t port; - uint32_t eax; /* valid for out */ + uint8_t bytes; /* 1 or 2 or 4 */ + uint8_t flags; /* see: inout_flags */ + + /* + * The address size and segment are relevant to INS/OUTS operations. + * Userspace is not concerned with them since the in-kernel emulation + * handles those specific aspects. + */ + uint8_t addrsize; + uint8_t segment; }; -struct vm_inout_str { - struct vm_inout inout; /* must be the first element */ - struct vm_guest_paging paging; - uint64_t rflags; - uint64_t cr0; - uint64_t index; - uint64_t count; /* rep=1 (%rcx), rep=0 (1) */ - int addrsize; - enum vm_reg_name seg_name; - struct seg_desc seg_desc; +struct vm_mmio { + uint8_t bytes; /* 1/2/4/8 bytes */ + uint8_t read; /* read: 1, write: 0 */ + uint16_t _pad[3]; + uint64_t gpa; + uint64_t data; }; enum task_switch_reason { @@ -368,18 +293,25 @@ struct vm_exit { uint64_t rip; union { struct vm_inout inout; - struct vm_inout_str inout_str; + struct vm_mmio mmio; struct { uint64_t gpa; int fault_type; } paging; + /* + * Kernel-internal MMIO decoding and emulation. + * Userspace should not expect to see this, but rather a + * VM_EXITCODE_MMIO with the above 'mmio' context. + */ struct { uint64_t gpa; uint64_t gla; uint64_t cs_base; int cs_d; /* CS.D */ - struct vm_guest_paging paging; - struct vie vie; + } mmio_emul; + struct { + uint8_t inst[15]; + uint8_t num_valid; } inst_emul; /* * VMX specific payload. Used when there is no "better" @@ -433,6 +365,23 @@ struct vm_exit { } u; }; +enum vm_entry_cmds { + VEC_DEFAULT = 0, + VEC_DISCARD_INSTR, /* discard inst emul state */ + VEC_COMPLETE_MMIO, /* entry includes result for mmio emul */ + VEC_COMPLETE_INOUT, /* entry includes result for inout emul */ +}; + +struct vm_entry { + int cpuid; + uint_t cmd; /* see: vm_entry_cmds */ + void *exit_data; + union { + struct vm_inout inout; + struct vm_mmio mmio; + } u; +}; + void vm_inject_pf(void *vm, int vcpuid, int error_code, uint64_t cr2); int vm_restart_instruction(void *vm, int vcpuid); diff --git a/usr/src/uts/i86pc/sys/vmm_dev.h b/usr/src/uts/i86pc/sys/vmm_dev.h index 40e0857945..090e82ed29 100644 --- a/usr/src/uts/i86pc/sys/vmm_dev.h +++ b/usr/src/uts/i86pc/sys/vmm_dev.h @@ -83,11 +83,6 @@ struct vm_register_set { uint64_t *regvals; }; -struct vm_run { - int cpuid; - struct vm_exit vm_exit; -}; - struct vm_exception { int cpuid; int vector; @@ -204,7 +199,7 @@ struct vm_suspend { struct vm_gla2gpa { int vcpuid; /* inputs */ - int prot; /* PROT_READ or PROT_WRITE */ + int prot; /* PROT_READ or PROT_WRITE */ uint64_t gla; struct vm_guest_paging paging; int fault; /* outputs */ @@ -312,8 +307,8 @@ _Static_assert(sizeof(struct vm_readwrite_kernemu_device) == 24, "ABI"); #define VM_GET_MEMSEG (VMM_IOC_BASE | 0x02) #define VM_MMAP_GETNEXT (VMM_IOC_BASE | 0x03) -#define VM_LAPIC_IRQ (VMM_IOC_BASE | 0x04) -#define VM_LAPIC_LOCAL_IRQ (VMM_IOC_BASE | 0x05) +#define VM_LAPIC_IRQ (VMM_IOC_BASE | 0x04) +#define VM_LAPIC_LOCAL_IRQ (VMM_IOC_BASE | 0x05) #define VM_LAPIC_MSI (VMM_IOC_BASE | 0x06) #define VM_IOAPIC_ASSERT_IRQ (VMM_IOC_BASE | 0x07) |