diff options
| author | Patrick Mooney <pmooney@pfmooney.com> | 2018-05-15 03:06:09 +0000 |
|---|---|---|
| committer | Patrick Mooney <pmooney@pfmooney.com> | 2018-05-24 18:07:37 +0000 |
| commit | adfeb11ce94f7c9b78db3f67388fb704c2d8673a (patch) | |
| tree | dcf8186b69efcb05af66a3fe35cea2234efd9e12 | |
| parent | 28825f0c4ec7c0a9a0a1b599ed767e9c155d1230 (diff) | |
| download | illumos-joyent-adfeb11ce94f7c9b78db3f67388fb704c2d8673a.tar.gz | |
OS-6954 bhyve upstream sync
Reviewed by: Mike Gerdts <mike.gerdts@joyent.com>
Reviewed by: Hans Rosenfeld <hans.rosenfeld@joyent.com>
Approved by: Ryan Zezeski <rpz@joyent.com>
26 files changed, 2125 insertions, 82 deletions
diff --git a/exception_lists/cstyle b/exception_lists/cstyle index 27dc9ec2be..bd8816b589 100644 --- a/exception_lists/cstyle +++ b/exception_lists/cstyle @@ -1401,6 +1401,7 @@ usr/src/cmd/bhyve/console.[ch] usr/src/cmd/bhyve/consport.c usr/src/cmd/bhyve/dbgport.[ch] usr/src/cmd/bhyve/fwctl.[ch] +usr/src/cmd/bhyve/gdb.[ch] usr/src/cmd/bhyve/inout.[ch] usr/src/cmd/bhyve/ioapic.[ch] usr/src/cmd/bhyve/mem.[ch] diff --git a/usr/src/cmd/bhyve/Makefile b/usr/src/cmd/bhyve/Makefile index 181ddf2946..554603d4f8 100644 --- a/usr/src/cmd/bhyve/Makefile +++ b/usr/src/cmd/bhyve/Makefile @@ -38,6 +38,7 @@ SRCS = acpi.c \ consport.c \ dbgport.c \ fwctl.c \ + gdb.c \ inout.c \ ioapic.c \ mem.c \ diff --git a/usr/src/cmd/bhyve/bhyverun.c b/usr/src/cmd/bhyve/bhyverun.c index 949e537738..b12fba0800 100644 --- a/usr/src/cmd/bhyve/bhyverun.c +++ b/usr/src/cmd/bhyve/bhyverun.c @@ -70,6 +70,7 @@ __FBSDID("$FreeBSD$"); #include <pthread_np.h> #include <sysexits.h> #include <stdbool.h> +#include <stdint.h> #include <machine/vmm.h> #ifndef WITHOUT_CAPSICUM @@ -88,6 +89,7 @@ __FBSDID("$FreeBSD$"); #include "inout.h" #include "dbgport.h" #include "fwctl.h" +#include "gdb.h" #include "ioapic.h" #include "mem.h" #include "mevent.h" @@ -113,6 +115,8 @@ extern int vmexit_task_switch(struct vmctx *, struct vm_exit *, int *vcpu); char *vmname; int guest_ncpus; +uint16_t cores, maxcpus, sockets, threads; + char *guest_uuid_str; static int guest_vmexit_on_hlt, guest_vmexit_on_pause; @@ -166,15 +170,17 @@ usage(int code) { fprintf(stderr, - "Usage: %s [-abehuwxACHIPSWY] [-c vcpus] [-g <gdb port>] [-l <lpc>]\n" + "Usage: %s [-abehuwxACHPSWY]\n" + " %*s [-c [[cpus=]numcpus][,sockets=n][,cores=n][,threads=n]]\n" + " %*s [-g <gdb port>] [-l <lpc>]\n" #ifdef __FreeBSD__ - " %*s [-m memsize[K|k|M|m|G|g|T|t] [-p vcpu:hostcpu] [-s <pci>] [-U uuid] <vm>\n" + " %*s [-m mem] [-p vcpu:hostcpu] [-s <pci>] [-U uuid] <vm>\n" #else - " %*s [-s <pci>] [-U uuid] <vm>\n" + " %*s [-m mem] [-s <pci>] [-U uuid] <vm>\n" #endif " -a: local apic is in xAPIC mode (deprecated)\n" " -A: create ACPI tables\n" - " -c: # cpus (default 1)\n" + " -c: number of cpus and/or topology specification" " -C: include guest memory in core file\n" " -e: exit on unhandled I/O access\n" " -g: gdb port\n" @@ -194,11 +200,91 @@ usage(int code) " -W: force virtio to use single-vector MSI\n" " -x: local apic is in x2APIC mode\n" " -Y: disable MPtable generation\n", - progname, (int)strlen(progname), ""); + progname, (int)strlen(progname), "", (int)strlen(progname), "", + (int)strlen(progname), ""); exit(code); } +/* + * XXX This parser is known to have the following issues: + * 1. It accepts null key=value tokens ",,". + * 2. It accepts whitespace after = and before value. + * 3. Values out of range of INT are silently wrapped. + * 4. It doesn't check non-final values. + * 5. The apparently bogus limits of UINT16_MAX are for future expansion. + * + * The acceptance of a null specification ('-c ""') is by design to match the + * manual page syntax specification, this results in a topology of 1 vCPU. + */ +static int +topology_parse(const char *opt) +{ + uint64_t ncpus; + int c, chk, n, s, t, tmp; + char *cp, *str; + bool ns, scts; + + c = 1, n = 1, s = 1, t = 1; + ns = false, scts = false; + str = strdup(opt); + + while ((cp = strsep(&str, ",")) != NULL) { + if (sscanf(cp, "%i%n", &tmp, &chk) == 1) { + n = tmp; + ns = true; + } else if (sscanf(cp, "cpus=%i%n", &tmp, &chk) == 1) { + n = tmp; + ns = true; + } else if (sscanf(cp, "sockets=%i%n", &tmp, &chk) == 1) { + s = tmp; + scts = true; + } else if (sscanf(cp, "cores=%i%n", &tmp, &chk) == 1) { + c = tmp; + scts = true; + } else if (sscanf(cp, "threads=%i%n", &tmp, &chk) == 1) { + t = tmp; + scts = true; +#ifdef notyet /* Do not expose this until vmm.ko implements it */ + } else if (sscanf(cp, "maxcpus=%i%n", &tmp, &chk) == 1) { + m = tmp; +#endif + /* Skip the empty argument case from -c "" */ + } else if (cp[0] == '\0') + continue; + else + return (-1); + /* Any trailing garbage causes an error */ + if (cp[chk] != '\0') + return (-1); + } + /* + * Range check 1 <= n <= UINT16_MAX all values + */ + if (n < 1 || s < 1 || c < 1 || t < 1 || + n > UINT16_MAX || s > UINT16_MAX || c > UINT16_MAX || + t > UINT16_MAX) + return (-1); + + /* If only the cpus was specified, use that as sockets */ + if (!scts) + s = n; + /* + * Compute sockets * cores * threads avoiding overflow + * The range check above insures these are 16 bit values + * If n was specified check it against computed ncpus + */ + ncpus = (uint64_t)s * c * t; + if (ncpus > UINT16_MAX || (ns && n != ncpus)) + return (-1); + + guest_ncpus = ncpus; + sockets = s; + cores = c; + threads = t; + return(0); +} + #ifndef WITHOUT_CAPSICUM /* * 11-stable capsicum helpers @@ -327,6 +413,8 @@ fbsdrun_start_thread(void *param) snprintf(tname, sizeof(tname), "vcpu %d", vcpu); pthread_set_name_np(mtp->mt_thr, tname); + gdb_cpu_add(vcpu); + vm_loop(mtp->mt_ctx, vcpu, vmexit[vcpu].rip); /* not reached */ @@ -590,6 +678,8 @@ vmexit_mtrap(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) stats.vmexit_mtrap++; + gdb_cpu_mtrap(*pvcpu); + return (VMEXIT_CONTINUE); } @@ -664,6 +754,14 @@ vmexit_suspend(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) return (0); /* NOTREACHED */ } +static int +vmexit_debug(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) +{ + + gdb_cpu_suspend(*pvcpu); + return (VMEXIT_CONTINUE); +} + static vmexit_handler_t handler[VM_EXITCODE_MAX] = { [VM_EXITCODE_INOUT] = vmexit_inout, [VM_EXITCODE_INOUT_STR] = vmexit_inout, @@ -678,6 +776,7 @@ static vmexit_handler_t handler[VM_EXITCODE_MAX] = { [VM_EXITCODE_SPINUP_AP] = vmexit_spinup_ap, [VM_EXITCODE_SUSPENDED] = vmexit_suspend, [VM_EXITCODE_TASK_SWITCH] = vmexit_task_switch, + [VM_EXITCODE_DEBUG] = vmexit_debug, }; static void @@ -860,6 +959,9 @@ do_open(const char *vmname) exit(1); } } + error = vm_set_topology(ctx, sockets, cores, threads, maxcpus); + if (error) + errx(EX_OSERR, "vm_set_topology"); return (ctx); } @@ -888,9 +990,10 @@ mark_provisioned(void) int main(int argc, char *argv[]) { - int c, error, gdb_port, err, bvmcons; + int c, error, dbg_port, gdb_port, err, bvmcons; int max_vcpus, mptgen, memflags; int rtc_localtime; + bool gdb_stop; struct vmctx *ctx; uint64_t rip; size_t memsize; @@ -898,17 +1001,21 @@ main(int argc, char *argv[]) bvmcons = 0; progname = basename(argv[0]); + dbg_port = 0; gdb_port = 0; + gdb_stop = false; guest_ncpus = 1; + sockets = cores = threads = 1; + maxcpus = 0; memsize = 256 * MB; mptgen = 1; rtc_localtime = 1; memflags = 0; #ifdef __FreeBSD__ - optstr = "abehuwxACHIPSWYp:g:c:s:m:l:B:U:"; + optstr = "abehuwxACHIPSWYp:g:G:c:s:m:l:B:U:"; #else - optstr = "abehuwxACHIPSWYg:c:s:m:l:B:U:"; + optstr = "abehuwxACHIPSWY:g:G:c:s:m:l:B:U:"; #endif while ((c = getopt(argc, argv, optstr)) != -1) { switch (c) { @@ -936,12 +1043,22 @@ main(int argc, char *argv[]) break; #endif case 'c': - guest_ncpus = atoi(optarg); + if (topology_parse(optarg) != 0) { + errx(EX_USAGE, "invalid cpu topology " + "'%s'", optarg); + } break; case 'C': memflags |= VM_MEM_F_INCORE; break; case 'g': + dbg_port = atoi(optarg); + break; + case 'G': + if (optarg[0] == 'w') { + gdb_stop = true; + optarg++; + } gdb_port = atoi(optarg); break; case 'l': @@ -1014,11 +1131,6 @@ main(int argc, char *argv[]) vmname = argv[0]; ctx = do_open(vmname); - if (guest_ncpus < 1) { - fprintf(stderr, "Invalid guest vCPUs (%d)\n", guest_ncpus); - exit(1); - } - max_vcpus = num_vcpus_allowed(ctx); if (guest_ncpus > max_vcpus) { fprintf(stderr, "%d vCPUs requested but only %d available\n", @@ -1069,8 +1181,11 @@ main(int argc, char *argv[]) if (init_pci(ctx) != 0) exit(1); + if (dbg_port != 0) + init_dbgport(dbg_port); + if (gdb_port != 0) - init_dbgport(gdb_port); + init_gdb(ctx, gdb_port, gdb_stop); if (bvmcons) init_bvmcons(); diff --git a/usr/src/cmd/bhyve/gdb.c b/usr/src/cmd/bhyve/gdb.c new file mode 100644 index 0000000000..4414a05e27 --- /dev/null +++ b/usr/src/cmd/bhyve/gdb.c @@ -0,0 +1,1328 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * + * Copyright (c) 2017-2018 John H. Baldwin <jhb@FreeBSD.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#ifndef WITHOUT_CAPSICUM +#include <sys/capsicum.h> +#endif +#include <sys/ioctl.h> +#include <sys/mman.h> +#include <sys/socket.h> +#include <machine/atomic.h> +#include <machine/specialreg.h> +#include <machine/vmm.h> +#include <netinet/in.h> +#include <assert.h> +#ifndef WITHOUT_CAPSICUM +#include <capsicum_helpers.h> +#endif +#include <err.h> +#include <errno.h> +#include <fcntl.h> +#include <pthread.h> +#include <pthread_np.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sysexits.h> +#include <unistd.h> +#include <vmmapi.h> + +#include "bhyverun.h" +#include "mem.h" +#include "mevent.h" + +/* + * GDB_SIGNAL_* numbers are part of the GDB remote protocol. Most stops + * use SIGTRAP. + */ +#define GDB_SIGNAL_TRAP 5 + +static void gdb_resume_vcpus(void); +static void check_command(int fd); + +static struct mevent *read_event, *write_event; + +static cpuset_t vcpus_active, vcpus_suspended, vcpus_waiting; +static pthread_mutex_t gdb_lock; +static pthread_cond_t idle_vcpus; +static bool stop_pending, first_stop; +static int stepping_vcpu, stopped_vcpu; + +/* + * An I/O buffer contains 'capacity' bytes of room at 'data'. For a + * read buffer, 'start' is unused and 'len' contains the number of + * valid bytes in the buffer. For a write buffer, 'start' is set to + * the index of the next byte in 'data' to send, and 'len' contains + * the remaining number of valid bytes to send. + */ +struct io_buffer { + uint8_t *data; + size_t capacity; + size_t start; + size_t len; +}; + +static struct io_buffer cur_comm, cur_resp; +static uint8_t cur_csum; +static int cur_vcpu; +static struct vmctx *ctx; +static int cur_fd = -1; + +const int gdb_regset[] = { + VM_REG_GUEST_RAX, + VM_REG_GUEST_RBX, + VM_REG_GUEST_RCX, + VM_REG_GUEST_RDX, + VM_REG_GUEST_RSI, + VM_REG_GUEST_RDI, + VM_REG_GUEST_RBP, + VM_REG_GUEST_RSP, + VM_REG_GUEST_R8, + VM_REG_GUEST_R9, + VM_REG_GUEST_R10, + VM_REG_GUEST_R11, + VM_REG_GUEST_R12, + VM_REG_GUEST_R13, + VM_REG_GUEST_R14, + VM_REG_GUEST_R15, + VM_REG_GUEST_RIP, + VM_REG_GUEST_RFLAGS, + VM_REG_GUEST_CS, + VM_REG_GUEST_SS, + VM_REG_GUEST_DS, + VM_REG_GUEST_ES, + VM_REG_GUEST_FS, + VM_REG_GUEST_GS +}; + +const int gdb_regsize[] = { + 8, + 8, + 8, + 8, + 8, + 8, + 8, + 8, + 8, + 8, + 8, + 8, + 8, + 8, + 8, + 8, + 8, + 4, + 4, + 4, + 4, + 4, + 4, + 4 +}; + +#ifdef GDB_LOG +#include <stdarg.h> +#include <stdio.h> + +static void __printflike(1, 2) +debug(const char *fmt, ...) +{ + static FILE *logfile; + va_list ap; + + if (logfile == NULL) { + logfile = fopen("/tmp/bhyve_gdb.log", "w"); + if (logfile == NULL) + return; +#ifndef WITHOUT_CAPSICUM + if (caph_limit_stream(fileno(logfile), CAPH_WRITE) == -1) { + fclose(logfile); + logfile = NULL; + return; + } +#endif + setlinebuf(logfile); + } + va_start(ap, fmt); + vfprintf(logfile, fmt, ap); + va_end(ap); +} +#else +#define debug(...) +#endif + +static int +guest_paging_info(int vcpu, struct vm_guest_paging *paging) +{ + uint64_t regs[4]; + const int regset[4] = { + VM_REG_GUEST_CR0, + VM_REG_GUEST_CR3, + VM_REG_GUEST_CR4, + VM_REG_GUEST_EFER + }; + + if (vm_get_register_set(ctx, vcpu, nitems(regset), regset, regs) == -1) + return (-1); + + /* + * For the debugger, always pretend to be the kernel (CPL 0), + * and if long-mode is enabled, always parse addresses as if + * in 64-bit mode. + */ + paging->cr3 = regs[1]; + paging->cpl = 0; + if (regs[3] & EFER_LMA) + paging->cpu_mode = CPU_MODE_64BIT; + else if (regs[0] & CR0_PE) + paging->cpu_mode = CPU_MODE_PROTECTED; + else + paging->cpu_mode = CPU_MODE_REAL; + if (!(regs[0] & CR0_PG)) + paging->paging_mode = PAGING_MODE_FLAT; + else if (!(regs[2] & CR4_PAE)) + paging->paging_mode = PAGING_MODE_32; + else if (regs[3] & EFER_LME) + paging->paging_mode = PAGING_MODE_64; + else + paging->paging_mode = PAGING_MODE_PAE; + return (0); +} + +/* + * Map a guest virtual address to a physical address (for a given vcpu). + * If a guest virtual address is valid, return 1. If the address is + * not valid, return 0. If an error occurs obtaining the mapping, + * return -1. + */ +static int +guest_vaddr2paddr(int vcpu, uint64_t vaddr, uint64_t *paddr) +{ + struct vm_guest_paging paging; + int fault; + + if (guest_paging_info(vcpu, &paging) == -1) + return (-1); + + /* + * Always use PROT_READ. We really care if the VA is + * accessible, not if the current vCPU can write. + */ + if (vm_gla2gpa_nofault(ctx, vcpu, &paging, vaddr, PROT_READ, paddr, + &fault) == -1) + return (-1); + if (fault) + return (0); + return (1); +} + +static void +io_buffer_reset(struct io_buffer *io) +{ + + io->start = 0; + io->len = 0; +} + +/* Available room for adding data. */ +static size_t +io_buffer_avail(struct io_buffer *io) +{ + + return (io->capacity - (io->start + io->len)); +} + +static uint8_t * +io_buffer_head(struct io_buffer *io) +{ + + return (io->data + io->start); +} + +static uint8_t * +io_buffer_tail(struct io_buffer *io) +{ + + return (io->data + io->start + io->len); +} + +static void +io_buffer_advance(struct io_buffer *io, size_t amount) +{ + + assert(amount <= io->len); + io->start += amount; + io->len -= amount; +} + +static void +io_buffer_consume(struct io_buffer *io, size_t amount) +{ + + io_buffer_advance(io, amount); + if (io->len == 0) { + io->start = 0; + return; + } + + /* + * XXX: Consider making this move optional and compacting on a + * future read() before realloc(). + */ + memmove(io->data, io_buffer_head(io), io->len); + io->start = 0; +} + +static void +io_buffer_grow(struct io_buffer *io, size_t newsize) +{ + uint8_t *new_data; + size_t avail, new_cap; + + avail = io_buffer_avail(io); + if (newsize <= avail) + return; + + new_cap = io->capacity + (newsize - avail); + new_data = realloc(io->data, new_cap); + if (new_data == NULL) + err(1, "Failed to grow GDB I/O buffer"); + io->data = new_data; + io->capacity = new_cap; +} + +static bool +response_pending(void) +{ + + if (cur_resp.start == 0 && cur_resp.len == 0) + return (false); + if (cur_resp.start + cur_resp.len == 1 && cur_resp.data[0] == '+') + return (false); + return (true); +} + +static void +close_connection(void) +{ + + /* + * XXX: This triggers a warning because mevent does the close + * before the EV_DELETE. + */ + pthread_mutex_lock(&gdb_lock); + mevent_delete(write_event); + mevent_delete_close(read_event); + write_event = NULL; + read_event = NULL; + io_buffer_reset(&cur_comm); + io_buffer_reset(&cur_resp); + cur_fd = -1; + + /* Resume any stopped vCPUs. */ + gdb_resume_vcpus(); + pthread_mutex_unlock(&gdb_lock); +} + +static uint8_t +hex_digit(uint8_t nibble) +{ + + if (nibble <= 9) + return (nibble + '0'); + else + return (nibble + 'a' - 10); +} + +static uint8_t +parse_digit(uint8_t v) +{ + + if (v >= '0' && v <= '9') + return (v - '0'); + if (v >= 'a' && v <= 'f') + return (v - 'a' + 10); + if (v >= 'A' && v <= 'F') + return (v - 'A' + 10); + return (0xF); +} + +/* Parses big-endian hexadecimal. */ +static uintmax_t +parse_integer(const uint8_t *p, size_t len) +{ + uintmax_t v; + + v = 0; + while (len > 0) { + v <<= 4; + v |= parse_digit(*p); + p++; + len--; + } + return (v); +} + +static uint8_t +parse_byte(const uint8_t *p) +{ + + return (parse_digit(p[0]) << 4 | parse_digit(p[1])); +} + +static void +send_pending_data(int fd) +{ + ssize_t nwritten; + + if (cur_resp.len == 0) { + mevent_disable(write_event); + return; + } + nwritten = write(fd, io_buffer_head(&cur_resp), cur_resp.len); + if (nwritten == -1) { + warn("Write to GDB socket failed"); + close_connection(); + } else { + io_buffer_advance(&cur_resp, nwritten); + if (cur_resp.len == 0) + mevent_disable(write_event); + else + mevent_enable(write_event); + } +} + +/* Append a single character to the output buffer. */ +static void +send_char(uint8_t data) +{ + io_buffer_grow(&cur_resp, 1); + *io_buffer_tail(&cur_resp) = data; + cur_resp.len++; +} + +/* Append an array of bytes to the output buffer. */ +static void +send_data(const uint8_t *data, size_t len) +{ + + io_buffer_grow(&cur_resp, len); + memcpy(io_buffer_tail(&cur_resp), data, len); + cur_resp.len += len; +} + +static void +format_byte(uint8_t v, uint8_t *buf) +{ + + buf[0] = hex_digit(v >> 4); + buf[1] = hex_digit(v & 0xf); +} + +/* + * Append a single byte (formatted as two hex characters) to the + * output buffer. + */ +static void +send_byte(uint8_t v) +{ + uint8_t buf[2]; + + format_byte(v, buf); + send_data(buf, sizeof(buf)); +} + +static void +start_packet(void) +{ + + send_char('$'); + cur_csum = 0; +} + +static void +finish_packet(void) +{ + + send_char('#'); + send_byte(cur_csum); + debug("-> %.*s\n", (int)cur_resp.len, io_buffer_head(&cur_resp)); +} + +/* + * Append a single character (for the packet payload) and update the + * checksum. + */ +static void +append_char(uint8_t v) +{ + + send_char(v); + cur_csum += v; +} + +/* + * Append an array of bytes (for the packet payload) and update the + * checksum. + */ +static void +append_packet_data(const uint8_t *data, size_t len) +{ + + send_data(data, len); + while (len > 0) { + cur_csum += *data; + data++; + len--; + } +} + +static void +append_string(const char *str) +{ + +#ifdef __FreeBSD__ + append_packet_data(str, strlen(str)); +#else + append_packet_data((const uint8_t *)str, strlen(str)); +#endif +} + +static void +append_byte(uint8_t v) +{ + uint8_t buf[2]; + + format_byte(v, buf); + append_packet_data(buf, sizeof(buf)); +} + +static void +append_unsigned_native(uintmax_t value, size_t len) +{ + size_t i; + + for (i = 0; i < len; i++) { + append_byte(value); + value >>= 8; + } +} + +static void +append_unsigned_be(uintmax_t value, size_t len) +{ + char buf[len * 2]; + size_t i; + + for (i = 0; i < len; i++) { +#ifdef __FreeBSD__ + format_byte(value, buf + (len - i - 1) * 2); +#else + format_byte(value, (uint8_t *)(buf + (len - i - 1) * 2)); +#endif + value >>= 8; + } +#ifdef __FreeBSD__ + append_packet_data(buf, sizeof(buf)); +#else + append_packet_data((const uint8_t *)buf, sizeof(buf)); +#endif +} + +static void +append_integer(unsigned int value) +{ + + if (value == 0) + append_char('0'); + else + append_unsigned_be(value, fls(value) + 7 / 8); +} + +static void +append_asciihex(const char *str) +{ + + while (*str != '\0') { + append_byte(*str); + str++; + } +} + +static void +send_empty_response(void) +{ + + start_packet(); + finish_packet(); +} + +static void +send_error(int error) +{ + + start_packet(); + append_char('E'); + append_byte(error); + finish_packet(); +} + +static void +send_ok(void) +{ + + start_packet(); + append_string("OK"); + finish_packet(); +} + +static int +parse_threadid(const uint8_t *data, size_t len) +{ + + if (len == 1 && *data == '0') + return (0); + if (len == 2 && memcmp(data, "-1", 2) == 0) + return (-1); + if (len == 0) + return (-2); + return (parse_integer(data, len)); +} + +static void +report_stop(void) +{ + + start_packet(); + if (stopped_vcpu == -1) + append_char('S'); + else + append_char('T'); + append_byte(GDB_SIGNAL_TRAP); + if (stopped_vcpu != -1) { + append_string("thread:"); + append_integer(stopped_vcpu + 1); + append_char(';'); + } + stopped_vcpu = -1; + finish_packet(); +} + +static void +gdb_finish_suspend_vcpus(void) +{ + + if (first_stop) { + first_stop = false; + stopped_vcpu = -1; + } else if (response_pending()) + stop_pending = true; + else { + report_stop(); + send_pending_data(cur_fd); + } +} + +static void +_gdb_cpu_suspend(int vcpu, bool report_stop) +{ + + debug("$vCPU %d suspending\n", vcpu); + CPU_SET(vcpu, &vcpus_waiting); + if (report_stop && CPU_CMP(&vcpus_waiting, &vcpus_suspended) == 0) + gdb_finish_suspend_vcpus(); + while (CPU_ISSET(vcpu, &vcpus_suspended) && vcpu != stepping_vcpu) + pthread_cond_wait(&idle_vcpus, &gdb_lock); + CPU_CLR(vcpu, &vcpus_waiting); + debug("$vCPU %d resuming\n", vcpu); +} + +void +gdb_cpu_add(int vcpu) +{ + + debug("$vCPU %d starting\n", vcpu); + pthread_mutex_lock(&gdb_lock); + CPU_SET(vcpu, &vcpus_active); + + /* + * If a vcpu is added while vcpus are stopped, suspend the new + * vcpu so that it will pop back out with a debug exit before + * executing the first instruction. + */ + if (!CPU_EMPTY(&vcpus_suspended)) { + CPU_SET(vcpu, &vcpus_suspended); + _gdb_cpu_suspend(vcpu, false); + } + pthread_mutex_unlock(&gdb_lock); +} + +void +gdb_cpu_suspend(int vcpu) +{ + + pthread_mutex_lock(&gdb_lock); + _gdb_cpu_suspend(vcpu, true); + pthread_mutex_unlock(&gdb_lock); +} + +void +gdb_cpu_mtrap(int vcpu) +{ + + debug("$vCPU %d MTRAP\n", vcpu); + pthread_mutex_lock(&gdb_lock); + if (vcpu == stepping_vcpu) { + stepping_vcpu = -1; + vm_set_capability(ctx, vcpu, VM_CAP_MTRAP_EXIT, 0); + vm_suspend_cpu(ctx, vcpu); + assert(stopped_vcpu == -1); + stopped_vcpu = vcpu; + _gdb_cpu_suspend(vcpu, true); + } + pthread_mutex_unlock(&gdb_lock); +} + +static void +gdb_suspend_vcpus(void) +{ + + assert(pthread_mutex_isowned_np(&gdb_lock)); + debug("suspending all CPUs\n"); + vcpus_suspended = vcpus_active; + vm_suspend_cpu(ctx, -1); + if (CPU_CMP(&vcpus_waiting, &vcpus_suspended) == 0) + gdb_finish_suspend_vcpus(); +} + +static bool +gdb_step_vcpu(int vcpu) +{ + int error, val; + + debug("$vCPU %d step\n", vcpu); + error = vm_get_capability(ctx, vcpu, VM_CAP_MTRAP_EXIT, &val); + if (error < 0) + return (false); + error = vm_set_capability(ctx, vcpu, VM_CAP_MTRAP_EXIT, 1); + vm_resume_cpu(ctx, vcpu); + stepping_vcpu = vcpu; + pthread_cond_broadcast(&idle_vcpus); + return (true); +} + +static void +gdb_resume_vcpus(void) +{ + + assert(pthread_mutex_isowned_np(&gdb_lock)); + vm_resume_cpu(ctx, -1); + debug("resuming all CPUs\n"); + CPU_ZERO(&vcpus_suspended); + pthread_cond_broadcast(&idle_vcpus); +} + +static void +gdb_read_regs(void) +{ + uint64_t regvals[nitems(gdb_regset)]; + int i; + + if (vm_get_register_set(ctx, cur_vcpu, nitems(gdb_regset), + gdb_regset, regvals) == -1) { + send_error(errno); + return; + } + start_packet(); + for (i = 0; i < nitems(regvals); i++) + append_unsigned_native(regvals[i], gdb_regsize[i]); + finish_packet(); +} + +static void +gdb_read_mem(const uint8_t *data, size_t len) +{ + uint64_t gpa, gva, val; + uint8_t *cp; + size_t resid, todo, bytes; + bool started; + int error; + + cp = memchr(data, ',', len); + if (cp == NULL) { + send_error(EINVAL); + return; + } + gva = parse_integer(data + 1, cp - (data + 1)); + resid = parse_integer(cp + 1, len - (cp + 1 - data)); + started = false; + + while (resid > 0) { + error = guest_vaddr2paddr(cur_vcpu, gva, &gpa); + if (error == -1) { + if (started) + finish_packet(); + else + send_error(errno); + return; + } + if (error == 0) { + if (started) + finish_packet(); + else + send_error(EFAULT); + return; + } + + /* Read bytes from current page. */ + todo = getpagesize() - gpa % getpagesize(); + if (todo > resid) + todo = resid; + + cp = paddr_guest2host(ctx, gpa, todo); + if (cp != NULL) { + /* + * If this page is guest RAM, read it a byte + * at a time. + */ + if (!started) { + start_packet(); + started = true; + } + while (todo > 0) { + append_byte(*cp); + cp++; + gpa++; + gva++; + resid--; + todo--; + } + } else { + /* + * If this page isn't guest RAM, try to handle + * it via MMIO. For MMIO requests, use + * aligned reads of words when possible. + */ + while (todo > 0) { + if (gpa & 1 || todo == 1) + bytes = 1; + else if (gpa & 2 || todo == 2) + bytes = 2; + else + bytes = 4; + error = read_mem(ctx, cur_vcpu, gpa, &val, + bytes); + if (error == 0) { + if (!started) { + start_packet(); + started = true; + } + gpa += bytes; + gva += bytes; + resid -= bytes; + todo -= bytes; + while (bytes > 0) { + append_byte(val); + val >>= 8; + bytes--; + } + } else { + if (started) + finish_packet(); + else + send_error(EFAULT); + return; + } + } + } + assert(resid == 0 || gpa % getpagesize() == 0); + } + if (!started) + start_packet(); + finish_packet(); +} + +static bool +command_equals(const uint8_t *data, size_t len, const char *cmd) +{ + + if (strlen(cmd) > len) + return (false); + return (memcmp(data, cmd, strlen(cmd)) == 0); +} + +static void +gdb_query(const uint8_t *data, size_t len) +{ + + /* + * TODO: + * - qSearch + * - qSupported + */ + if (command_equals(data, len, "qAttached")) { + start_packet(); + append_char('1'); + finish_packet(); + } else if (command_equals(data, len, "qC")) { + start_packet(); + append_string("QC"); + append_integer(cur_vcpu + 1); + finish_packet(); + } else if (command_equals(data, len, "qfThreadInfo")) { + cpuset_t mask; + bool first; + int vcpu; + + if (CPU_EMPTY(&vcpus_active)) { + send_error(EINVAL); + return; + } + mask = vcpus_active; + start_packet(); + append_char('m'); + first = true; + while (!CPU_EMPTY(&mask)) { + vcpu = CPU_FFS(&mask) - 1; + CPU_CLR(vcpu, &mask); + if (first) + first = false; + else + append_char(','); + append_integer(vcpu + 1); + } + finish_packet(); + } else if (command_equals(data, len, "qsThreadInfo")) { + start_packet(); + append_char('l'); + finish_packet(); + } else if (command_equals(data, len, "qThreadExtraInfo")) { + char buf[16]; + int tid; + + data += strlen("qThreadExtraInfo"); + len -= strlen("qThreadExtraInfo"); + if (*data != ',') { + send_error(EINVAL); + return; + } + tid = parse_threadid(data + 1, len - 1); + if (tid <= 0 || !CPU_ISSET(tid - 1, &vcpus_active)) { + send_error(EINVAL); + return; + } + + snprintf(buf, sizeof(buf), "vCPU %d", tid - 1); + start_packet(); + append_asciihex(buf); + finish_packet(); + } else + send_empty_response(); +} + +static void +handle_command(const uint8_t *data, size_t len) +{ + + /* Reject packets with a sequence-id. */ + if (len >= 3 && data[0] >= '0' && data[0] <= '9' && + data[0] >= '0' && data[0] <= '9' && data[2] == ':') { + send_empty_response(); + return; + } + + switch (*data) { + case 'c': + if (len != 1) { + send_error(EINVAL); + break; + } + + /* Don't send a reply until a stop occurs. */ + gdb_resume_vcpus(); + break; + case 'D': + send_ok(); + + /* TODO: Resume any stopped CPUs. */ + break; + case 'g': { + gdb_read_regs(); + break; + } + case 'H': { + int tid; + + if (data[1] != 'g' && data[1] != 'c') { + send_error(EINVAL); + break; + } + tid = parse_threadid(data + 2, len - 2); + if (tid == -2) { + send_error(EINVAL); + break; + } + + if (CPU_EMPTY(&vcpus_active)) { + send_error(EINVAL); + break; + } + if (tid == -1 || tid == 0) + cur_vcpu = CPU_FFS(&vcpus_active) - 1; + else if (CPU_ISSET(tid - 1, &vcpus_active)) + cur_vcpu = tid - 1; + else { + send_error(EINVAL); + break; + } + send_ok(); + break; + } + case 'm': + gdb_read_mem(data, len); + break; + case 'T': { + int tid; + + tid = parse_threadid(data + 1, len - 1); + if (tid <= 0 || !CPU_ISSET(tid - 1, &vcpus_active)) { + send_error(EINVAL); + return; + } + send_ok(); + break; + } + case 'q': + gdb_query(data, len); + break; + case 's': + if (len != 1) { + send_error(EINVAL); + break; + } + + /* Don't send a reply until a stop occurs. */ + if (!gdb_step_vcpu(cur_vcpu)) { + send_error(EOPNOTSUPP); + break; + } + break; + case '?': + /* XXX: Only if stopped? */ + /* For now, just report that we are always stopped. */ + start_packet(); + append_char('S'); + append_byte(GDB_SIGNAL_TRAP); + finish_packet(); + break; + case 'G': /* TODO */ + case 'M': /* TODO */ + case 'v': + /* Handle 'vCont' */ + /* 'vCtrlC' */ + case 'p': /* TODO */ + case 'P': /* TODO */ + case 'Q': /* TODO */ + case 't': /* TODO */ + case 'X': /* TODO */ + case 'z': /* TODO */ + case 'Z': /* TODO */ + default: + send_empty_response(); + } +} + +/* Check for a valid packet in the command buffer. */ +static void +check_command(int fd) +{ + uint8_t *head, *hash, *p, sum; + size_t avail, plen; + + for (;;) { + avail = cur_comm.len; + if (avail == 0) + return; + head = io_buffer_head(&cur_comm); + switch (*head) { + case 0x03: + debug("<- Ctrl-C\n"); + io_buffer_consume(&cur_comm, 1); + + gdb_suspend_vcpus(); + break; + case '+': + /* ACK of previous response. */ + debug("<- +\n"); + if (response_pending()) + io_buffer_reset(&cur_resp); + io_buffer_consume(&cur_comm, 1); + if (stop_pending) { + stop_pending = false; + report_stop(); + send_pending_data(fd); + } + break; + case '-': + /* NACK of previous response. */ + debug("<- -\n"); + if (response_pending()) { + cur_resp.len += cur_resp.start; + cur_resp.start = 0; + if (cur_resp.data[0] == '+') + io_buffer_advance(&cur_resp, 1); + debug("-> %.*s\n", (int)cur_resp.len, + io_buffer_head(&cur_resp)); + } + io_buffer_consume(&cur_comm, 1); + send_pending_data(fd); + break; + case '$': + /* Packet. */ + + if (response_pending()) { + warnx("New GDB command while response in " + "progress"); + io_buffer_reset(&cur_resp); + } + + /* Is packet complete? */ + hash = memchr(head, '#', avail); + if (hash == NULL) + return; + plen = (hash - head + 1) + 2; + if (avail < plen) + return; + debug("<- %.*s\n", (int)plen, head); + + /* Verify checksum. */ + for (sum = 0, p = head + 1; p < hash; p++) + sum += *p; + if (sum != parse_byte(hash + 1)) { + io_buffer_consume(&cur_comm, plen); + debug("-> -\n"); + send_char('-'); + send_pending_data(fd); + break; + } + send_char('+'); + + handle_command(head + 1, hash - (head + 1)); + io_buffer_consume(&cur_comm, plen); + if (!response_pending()) { + debug("-> +\n"); + } + send_pending_data(fd); + break; + default: + /* XXX: Possibly drop connection instead. */ + debug("-> %02x\n", *head); + io_buffer_consume(&cur_comm, 1); + break; + } + } +} + +static void +gdb_readable(int fd, enum ev_type event, void *arg) +{ + ssize_t nread; + int pending; + + if (ioctl(fd, FIONREAD, &pending) == -1) { + warn("FIONREAD on GDB socket"); + return; + } + + /* + * 'pending' might be zero due to EOF. We need to call read + * with a non-zero length to detect EOF. + */ + if (pending == 0) + pending = 1; + + /* Ensure there is room in the command buffer. */ + io_buffer_grow(&cur_comm, pending); + assert(io_buffer_avail(&cur_comm) >= pending); + + nread = read(fd, io_buffer_tail(&cur_comm), io_buffer_avail(&cur_comm)); + if (nread == 0) { + close_connection(); + } else if (nread == -1) { + if (errno == EAGAIN) + return; + + warn("Read from GDB socket"); + close_connection(); + } else { + cur_comm.len += nread; + pthread_mutex_lock(&gdb_lock); + check_command(fd); + pthread_mutex_unlock(&gdb_lock); + } +} + +static void +gdb_writable(int fd, enum ev_type event, void *arg) +{ + + send_pending_data(fd); +} + +static void +new_connection(int fd, enum ev_type event, void *arg) +{ + int optval, s; + + s = accept4(fd, NULL, NULL, SOCK_NONBLOCK); + if (s == -1) { + if (arg != NULL) + err(1, "Failed accepting initial GDB connection"); + + /* Silently ignore errors post-startup. */ + return; + } + + optval = 1; + if (setsockopt(s, SOL_SOCKET, SO_NOSIGPIPE, &optval, sizeof(optval)) == + -1) { + warn("Failed to disable SIGPIPE for GDB connection"); + close(s); + return; + } + + pthread_mutex_lock(&gdb_lock); + if (cur_fd != -1) { + close(s); + warnx("Ignoring additional GDB connection."); + } + + read_event = mevent_add(s, EVF_READ, gdb_readable, NULL); + if (read_event == NULL) { + if (arg != NULL) + err(1, "Failed to setup initial GDB connection"); + pthread_mutex_unlock(&gdb_lock); + return; + } + write_event = mevent_add(s, EVF_WRITE, gdb_writable, NULL); + if (write_event == NULL) { + if (arg != NULL) + err(1, "Failed to setup initial GDB connection"); + mevent_delete_close(read_event); + read_event = NULL; + } + + cur_fd = s; + cur_vcpu = 0; + stepping_vcpu = -1; + stopped_vcpu = -1; + stop_pending = false; + + /* Break on attach. */ + first_stop = true; + gdb_suspend_vcpus(); + pthread_mutex_unlock(&gdb_lock); +} + +#ifndef WITHOUT_CAPSICUM +void +limit_gdb_socket(int s) +{ + cap_rights_t rights; + unsigned long ioctls[] = { FIONREAD }; + + cap_rights_init(&rights, CAP_ACCEPT, CAP_EVENT, CAP_READ, CAP_WRITE, + CAP_SETSOCKOPT, CAP_IOCTL); + if (cap_rights_limit(s, &rights) == -1 && errno != ENOSYS) + errx(EX_OSERR, "Unable to apply rights for sandbox"); + if (cap_ioctls_limit(s, ioctls, nitems(ioctls)) == -1 && errno != ENOSYS) + errx(EX_OSERR, "Unable to apply rights for sandbox"); +} +#endif + +void +init_gdb(struct vmctx *_ctx, int sport, bool wait) +{ + struct sockaddr_in sin; + int error, flags, s; + + debug("==> starting on %d, %swaiting\n", sport, wait ? "" : "not "); + + error = pthread_mutex_init(&gdb_lock, NULL); + if (error != 0) + errc(1, error, "gdb mutex init"); + error = pthread_cond_init(&idle_vcpus, NULL); + if (error != 0) + errc(1, error, "gdb cv init"); + + ctx = _ctx; + s = socket(PF_INET, SOCK_STREAM, 0); + if (s < 0) + err(1, "gdb socket create"); + +#ifdef __FreeBSD__ + sin.sin_len = sizeof(sin); +#endif + sin.sin_family = AF_INET; + sin.sin_addr.s_addr = htonl(INADDR_ANY); + sin.sin_port = htons(sport); + + if (bind(s, (struct sockaddr *)&sin, sizeof(sin)) < 0) + err(1, "gdb socket bind"); + + if (listen(s, 1) < 0) + err(1, "gdb socket listen"); + + if (wait) { + /* + * Set vcpu 0 in vcpus_suspended. This will trigger the + * logic in gdb_cpu_add() to suspend the first vcpu before + * it starts execution. The vcpu will remain suspended + * until a debugger connects. + */ + stepping_vcpu = -1; + stopped_vcpu = -1; + CPU_SET(0, &vcpus_suspended); + } + + flags = fcntl(s, F_GETFL); + if (fcntl(s, F_SETFL, flags | O_NONBLOCK) == -1) + err(1, "Failed to mark gdb socket non-blocking"); + +#ifndef WITHOUT_CAPSICUM + limit_gdb_socket(s); +#endif + mevent_add(s, EVF_READ, new_connection, NULL); +} diff --git a/usr/src/cmd/bhyve/gdb.h b/usr/src/cmd/bhyve/gdb.h new file mode 100644 index 0000000000..fa2184df16 --- /dev/null +++ b/usr/src/cmd/bhyve/gdb.h @@ -0,0 +1,39 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * + * Copyright (c) 2017 John H. Baldwin <jhb@FreeBSD.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef __GDB_H__ +#define __GDB_H__ + +void gdb_cpu_add(int vcpu); +void gdb_cpu_mtrap(int vcpu); +void gdb_cpu_suspend(int vcpu); +void init_gdb(struct vmctx *ctx, int sport, bool wait); + +#endif /* !__GDB_H__ */ diff --git a/usr/src/cmd/bhyve/mem.c b/usr/src/cmd/bhyve/mem.c index e01d617a89..105d37cf56 100644 --- a/usr/src/cmd/bhyve/mem.c +++ b/usr/src/cmd/bhyve/mem.c @@ -136,6 +136,9 @@ mmio_rb_dump(struct mmio_rb_tree *rbt) RB_GENERATE(mmio_rb_tree, mmio_rb_range, mr_link, mmio_rb_range_compare); +typedef int (mem_cb_t)(struct vmctx *ctx, int vcpu, uint64_t gpa, + struct mem_range *mr, void *arg); + static int mem_read(void *ctx, int vcpu, uint64_t gpa, uint64_t *rval, int size, void *arg) { @@ -158,10 +161,9 @@ mem_write(void *ctx, int vcpu, uint64_t gpa, uint64_t wval, int size, void *arg) return (error); } -int -emulate_mem(struct vmctx *ctx, int vcpu, uint64_t paddr, struct vie *vie, - struct vm_guest_paging *paging) - +static int +access_memory(struct vmctx *ctx, int vcpu, uint64_t paddr, mem_cb_t *cb, + void *arg) { struct mmio_rb_range *entry; int err, immutable; @@ -204,8 +206,7 @@ emulate_mem(struct vmctx *ctx, int vcpu, uint64_t paddr, struct vie *vie, if (immutable) pthread_rwlock_unlock(&mmio_rwlock); - err = vmm_emulate_instruction(ctx, vcpu, paddr, vie, paging, - mem_read, mem_write, &entry->mr_param); + err = cb(ctx, vcpu, paddr, &entry->mr_param, arg); if (!immutable) pthread_rwlock_unlock(&mmio_rwlock); @@ -213,6 +214,60 @@ emulate_mem(struct vmctx *ctx, int vcpu, uint64_t paddr, struct vie *vie, return (err); } +struct emulate_mem_args { + struct vie *vie; + struct vm_guest_paging *paging; +}; + +static int +emulate_mem_cb(struct vmctx *ctx, int vcpu, uint64_t paddr, struct mem_range *mr, + void *arg) +{ + struct emulate_mem_args *ema; + + ema = arg; + return (vmm_emulate_instruction(ctx, vcpu, paddr, ema->vie, ema->paging, + mem_read, mem_write, mr)); +} + +int +emulate_mem(struct vmctx *ctx, int vcpu, uint64_t paddr, struct vie *vie, + struct vm_guest_paging *paging) + +{ + struct emulate_mem_args ema; + + ema.vie = vie; + ema.paging = paging; + return (access_memory(ctx, vcpu, paddr, emulate_mem_cb, &ema)); +} + +struct read_mem_args { + uint64_t *rval; + int size; +}; + +static int +read_mem_cb(struct vmctx *ctx, int vcpu, uint64_t paddr, struct mem_range *mr, + void *arg) +{ + struct read_mem_args *rma; + + rma = arg; + return (mr->handler(ctx, vcpu, MEM_F_READ, paddr, rma->size, + rma->rval, mr->arg1, mr->arg2)); +} + +int +read_mem(struct vmctx *ctx, int vcpu, uint64_t gpa, uint64_t *rval, int size) +{ + struct read_mem_args rma; + + rma.rval = rval; + rma.size = size; + return (access_memory(ctx, vcpu, gpa, read_mem_cb, &rma)); +} + static int register_mem_int(struct mmio_rb_tree *rbt, struct mem_range *memp) { diff --git a/usr/src/cmd/bhyve/mem.h b/usr/src/cmd/bhyve/mem.h index f9f86fa4a0..f386d67749 100644 --- a/usr/src/cmd/bhyve/mem.h +++ b/usr/src/cmd/bhyve/mem.h @@ -56,6 +56,8 @@ void init_mem(void); int emulate_mem(struct vmctx *, int vcpu, uint64_t paddr, struct vie *vie, struct vm_guest_paging *paging); +int read_mem(struct vmctx *ctx, int vcpu, uint64_t gpa, uint64_t *rval, + int size); int register_mem(struct mem_range *memp); int register_mem_fallback(struct mem_range *memp); int unregister_mem(struct mem_range *memp); diff --git a/usr/src/cmd/bhyve/rfb.c b/usr/src/cmd/bhyve/rfb.c index 96712a6acc..d96b45c5da 100644 --- a/usr/src/cmd/bhyve/rfb.c +++ b/usr/src/cmd/bhyve/rfb.c @@ -546,16 +546,21 @@ rfb_send_screen(struct rfb_softc *rc, int cfd, int all) } for (x = 0; x < xcells; x++) { + if (x == (xcells - 1) && rem_x > 0) + cellwidth = rem_x; + else + cellwidth = PIX_PER_CELL; + if (rc->hw_crc) crc_p[x] = fast_crc32(p, - PIX_PER_CELL * sizeof(uint32_t), + cellwidth * sizeof(uint32_t), crc_p[x]); else crc_p[x] = (uint32_t)crc32(crc_p[x], (Bytef *)p, - PIX_PER_CELL * sizeof(uint32_t)); + cellwidth * sizeof(uint32_t)); - p += PIX_PER_CELL; + p += cellwidth; /* check for crc delta if last row in cell */ if ((y & PIXCELL_MASK) == PIXCELL_MASK || y == (h-1)) { @@ -568,28 +573,6 @@ rfb_send_screen(struct rfb_softc *rc, int cfd, int all) } } } - - if (rem_x) { - if (rc->hw_crc) - crc_p[x] = fast_crc32(p, - rem_x * sizeof(uint32_t), - crc_p[x]); - else - crc_p[x] = (uint32_t)crc32(crc_p[x], - (Bytef *)p, - rem_x * sizeof(uint32_t)); - p += rem_x; - - if ((y & PIXCELL_MASK) == PIXCELL_MASK || y == (h-1)) { - if (orig_crc[x] != crc_p[x]) { - orig_crc[x] = crc_p[x]; - crc_p[x] = 1; - changes++; - } else { - crc_p[x] = 0; - } - } - } } /* If number of changes is > THRESH percent, send the whole screen */ diff --git a/usr/src/cmd/bhyvectl/bhyvectl.c b/usr/src/cmd/bhyvectl/bhyvectl.c index 0cebc77b05..5f8932efa8 100644 --- a/usr/src/cmd/bhyvectl/bhyvectl.c +++ b/usr/src/cmd/bhyvectl/bhyvectl.c @@ -208,7 +208,8 @@ usage(bool cpu_intel) " [--get-msr-bitmap]\n" " [--get-msr-bitmap-address]\n" " [--get-guest-sysenter]\n" - " [--get-exit-reason]\n", + " [--get-exit-reason]\n" + " [--get-cpu-topology]\n", progname); if (cpu_intel) { @@ -304,6 +305,7 @@ enum x2apic_state x2apic_state; static int unassign_pptdev, bus, slot, func; #endif static int run; +static int get_cpu_topology; /* * VMCB specific. @@ -1476,6 +1478,7 @@ setup_options(bool cpu_intel) { "get-active-cpus", NO_ARG, &get_active_cpus, 1 }, { "get-suspended-cpus", NO_ARG, &get_suspended_cpus, 1 }, { "get-intinfo", NO_ARG, &get_intinfo, 1 }, + { "get-cpu-topology", NO_ARG, &get_cpu_topology, 1 }, }; const struct option intel_opts[] = { @@ -2336,6 +2339,14 @@ main(int argc, char *argv[]) } } + if (!error && (get_cpu_topology || get_all)) { + uint16_t sockets, cores, threads, maxcpus; + + vm_get_topology(ctx, &sockets, &cores, &threads, &maxcpus); + printf("cpu_topology:\tsockets=%hu, cores=%hu, threads=%hu, " + "maxcpus=%hu\n", sockets, cores, threads, maxcpus); + } + if (!error && run) { error = vm_run(ctx, vcpu, &vmexit); if (error == 0) diff --git a/usr/src/compat/freebsd/err.h b/usr/src/compat/freebsd/err.h new file mode 100644 index 0000000000..40d144e025 --- /dev/null +++ b/usr/src/compat/freebsd/err.h @@ -0,0 +1,23 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2018 Joyent, Inc. + */ + +#ifndef _COMPAT_FREEBSD_ERR_H_ +#define _COMPAT_FREEBSD_ERR_H_ + +#define errc(code, num, ...) err(code, __VA_ARGS__) + +#include_next <err.h> + +#endif /* _COMPAT_FREEBSD_ERR_H_ */ diff --git a/usr/src/compat/freebsd/sys/cpuset.h b/usr/src/compat/freebsd/sys/cpuset.h index 4328ebcc46..dadadf15b2 100644 --- a/usr/src/compat/freebsd/sys/cpuset.h +++ b/usr/src/compat/freebsd/sys/cpuset.h @@ -52,6 +52,7 @@ int cpusetobj_ffs(const cpuset_t *set); #include <sys/bitmap.h> #include <machine/atomic.h> +#include <machine/cpufunc.h> /* For now, assume NCPU of 256 */ #define CPU_SETSIZE (256) @@ -60,7 +61,8 @@ typedef struct { ulong_t _bits[BT_BITOUL(CPU_SETSIZE)]; } cpuset_t; -static __inline int cpuset_empty(const cpuset_t *set) +static __inline int +cpuset_isempty(const cpuset_t *set) { uint_t i; @@ -71,9 +73,54 @@ static __inline int cpuset_empty(const cpuset_t *set) return (1); } +static __inline void +cpuset_zero(cpuset_t *dst) +{ + uint_t i; + + for (i = 0; i < BT_BITOUL(CPU_SETSIZE); i++) { + dst->_bits[i] = 0; + } +} + +static __inline int +cpuset_isequal(cpuset_t *s1, cpuset_t *s2) +{ + uint_t i; + + for (i = 0; i < BT_BITOUL(CPU_SETSIZE); i++) { + if (s1->_bits[i] != s2->_bits[i]) + return (0); + } + return (1); +} + +static __inline uint_t +cpusetobj_ffs(const cpuset_t *set) +{ + uint_t i, cbit; + cbit = 0; + for (i = 0; i < BT_BITOUL(CPU_SETSIZE); i++) { + if (set->_bits[i] != 0) { + cbit = ffsl(set->_bits[i]); + cbit += i * sizeof (set->_bits[0]); + break; + } + } + return (cbit); +} + + +#define CPU_SET(cpu, setp) BT_SET((setp)->_bits, cpu) +#define CPU_CLR(cpu, setp) BT_CLEAR((setp)->_bits, cpu) +#define CPU_ZERO(setp) cpuset_zero((setp)) +#define CPU_CMP(set1, set2) (cpuset_isequal( \ + (cpuset_t *)(set1), \ + (cpuset_t *)(set2)) == 0) +#define CPU_FFS(set) cpusetobj_ffs(set) #define CPU_ISSET(cpu, setp) BT_TEST((setp)->_bits, cpu) -#define CPU_EMPTY(setp) cpuset_empty((setp)) +#define CPU_EMPTY(setp) cpuset_isempty((setp)) #define CPU_SET_ATOMIC(cpu, setp) \ atomic_set_long(&(BT_WIM((setp)->_bits, cpu)), BT_BIW(cpu)) #define CPU_CLR_ATOMIC(cpu, setp) \ diff --git a/usr/src/compat/freebsd/sys/ioctl.h b/usr/src/compat/freebsd/sys/ioctl.h index e223e1e4c7..72a46b8085 100644 --- a/usr/src/compat/freebsd/sys/ioctl.h +++ b/usr/src/compat/freebsd/sys/ioctl.h @@ -17,6 +17,8 @@ #define _COMPAT_FREEBSD_SYS_IOCTL_H_ #include <sys/ioccom.h> +/* Get BSD compatibility from the ioctl header */ +#define BSD_COMP #include_next <sys/ioctl.h> #endif /* _COMPAT_FREEBSD_SYS_IOCTL_H_ */ diff --git a/usr/src/compat/freebsd/sys/sdt.h b/usr/src/compat/freebsd/sys/sdt.h new file mode 100644 index 0000000000..32d887c0d8 --- /dev/null +++ b/usr/src/compat/freebsd/sys/sdt.h @@ -0,0 +1,37 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2018 Joyent, Inc. + */ + +#ifndef _COMPAT_FREEBSD_SYS_SDT_H_ +#define _COMPAT_FREEBSD_SYS_SDT_H_ + +/* Empty macros to cover FreeBSD's SDT linker tricks */ + +#define SDT_PROVIDER_DECLARE(mod) +#define SDT_PROVIDER_DEFINE(mod) + +#define SDT_PROBE_DEFINE1(...) +#define SDT_PROBE_DEFINE2(...) +#define SDT_PROBE_DEFINE3(...) +#define SDT_PROBE_DEFINE4(...) +#define SDT_PROBE_DEFINE5(...) +#define SDT_PROBE1(...) +#define SDT_PROBE2(...) +#define SDT_PROBE3(...) +#define SDT_PROBE4(...) +#define SDT_PROBE5(...) + +#include_next <sys/sdt.h> + +#endif /* _COMPAT_FREEBSD_SYS_SDT_H_ */ diff --git a/usr/src/lib/libvmmapi/common/mapfile-vers b/usr/src/lib/libvmmapi/common/mapfile-vers index 8979fac4cb..ad47407281 100644 --- a/usr/src/lib/libvmmapi/common/mapfile-vers +++ b/usr/src/lib/libvmmapi/common/mapfile-vers @@ -47,6 +47,7 @@ SYMBOL_VERSION ILLUMOSprivate { vm_create_devmem; vm_create; vm_create_devmem; + vm_debug_cpus; vm_destroy; vm_destroy; vm_get_capability; @@ -66,6 +67,7 @@ SYMBOL_VERSION ILLUMOSprivate { vm_get_seg_desc; vm_get_stat_desc; vm_get_stats; + vm_get_topology; vm_get_x2apic_state; vm_gla2gpa; vm_gla2gpa_nofault; @@ -105,12 +107,15 @@ SYMBOL_VERSION ILLUMOSprivate { vm_set_memflags; vm_set_register; vm_set_register_set; + vm_set_topology; vm_set_x2apic_state; vm_setup_memory; vm_setup_pptdev_msi; vm_setup_pptdev_msix; vm_suspend; + vm_suspend_cpu; vm_suspended_cpus; + vm_resume_cpu; vm_unassign_pptdev; local: diff --git a/usr/src/lib/libvmmapi/common/vmmapi.c b/usr/src/lib/libvmmapi/common/vmmapi.c index 3cc4df93c3..de86e2b9bd 100644 --- a/usr/src/lib/libvmmapi/common/vmmapi.c +++ b/usr/src/lib/libvmmapi/common/vmmapi.c @@ -1545,6 +1545,13 @@ vm_suspended_cpus(struct vmctx *ctx, cpuset_t *cpus) } int +vm_debug_cpus(struct vmctx *ctx, cpuset_t *cpus) +{ + + return (vm_get_cpus(ctx, VM_DEBUG_CPUS, cpus)); +} + +int vm_activate_cpu(struct vmctx *ctx, int vcpu) { struct vm_activate_cpu ac; @@ -1557,6 +1564,30 @@ vm_activate_cpu(struct vmctx *ctx, int vcpu) } int +vm_suspend_cpu(struct vmctx *ctx, int vcpu) +{ + struct vm_activate_cpu ac; + int error; + + bzero(&ac, sizeof(struct vm_activate_cpu)); + ac.vcpuid = vcpu; + error = ioctl(ctx->fd, VM_SUSPEND_CPU, &ac); + return (error); +} + +int +vm_resume_cpu(struct vmctx *ctx, int vcpu) +{ + struct vm_activate_cpu ac; + int error; + + bzero(&ac, sizeof(struct vm_activate_cpu)); + ac.vcpuid = vcpu; + error = ioctl(ctx->fd, VM_RESUME_CPU, &ac); + return (error); +} + +int vm_get_intinfo(struct vmctx *ctx, int vcpu, uint64_t *info1, uint64_t *info2) { struct vm_intinfo vmii; @@ -1646,6 +1677,38 @@ vm_restart_instruction(void *arg, int vcpu) } int +vm_set_topology(struct vmctx *ctx, + uint16_t sockets, uint16_t cores, uint16_t threads, uint16_t maxcpus) +{ + struct vm_cpu_topology topology; + + bzero(&topology, sizeof (struct vm_cpu_topology)); + topology.sockets = sockets; + topology.cores = cores; + topology.threads = threads; + topology.maxcpus = maxcpus; + return (ioctl(ctx->fd, VM_SET_TOPOLOGY, &topology)); +} + +int +vm_get_topology(struct vmctx *ctx, + uint16_t *sockets, uint16_t *cores, uint16_t *threads, uint16_t *maxcpus) +{ + struct vm_cpu_topology topology; + int error; + + bzero(&topology, sizeof (struct vm_cpu_topology)); + error = ioctl(ctx->fd, VM_GET_TOPOLOGY, &topology); + if (error == 0) { + *sockets = topology.sockets; + *cores = topology.cores; + *threads = topology.threads; + *maxcpus = topology.maxcpus; + } + return (error); +} + +int vm_get_device_fd(struct vmctx *ctx) { @@ -1673,9 +1736,10 @@ vm_get_ioctls(size_t *len) VM_SET_X2APIC_STATE, VM_GET_X2APIC_STATE, VM_GET_HPET_CAPABILITIES, VM_GET_GPA_PMAP, VM_GLA2GPA, VM_GLA2GPA_NOFAULT, - VM_ACTIVATE_CPU, VM_GET_CPUS, VM_SET_INTINFO, VM_GET_INTINFO, + VM_ACTIVATE_CPU, VM_GET_CPUS, VM_SUSPEND_CPU, VM_RESUME_CPU, + VM_SET_INTINFO, VM_GET_INTINFO, VM_RTC_WRITE, VM_RTC_READ, VM_RTC_SETTIME, VM_RTC_GETTIME, - VM_RESTART_INSTRUCTION }; + VM_RESTART_INSTRUCTION, VM_SET_TOPOLOGY, VM_GET_TOPOLOGY }; if (len == NULL) { cmds = malloc(sizeof(vm_ioctl_cmds)); diff --git a/usr/src/lib/libvmmapi/common/vmmapi.h b/usr/src/lib/libvmmapi/common/vmmapi.h index ae8bb5d3b8..cfceafc6f4 100644 --- a/usr/src/lib/libvmmapi/common/vmmapi.h +++ b/usr/src/lib/libvmmapi/common/vmmapi.h @@ -245,7 +245,16 @@ int vcpu_reset(struct vmctx *ctx, int vcpu); int vm_active_cpus(struct vmctx *ctx, cpuset_t *cpus); int vm_suspended_cpus(struct vmctx *ctx, cpuset_t *cpus); +int vm_debug_cpus(struct vmctx *ctx, cpuset_t *cpus); int vm_activate_cpu(struct vmctx *ctx, int vcpu); +int vm_suspend_cpu(struct vmctx *ctx, int vcpu); +int vm_resume_cpu(struct vmctx *ctx, int vcpu); + +/* CPU topology */ +int vm_set_topology(struct vmctx *ctx, uint16_t sockets, uint16_t cores, + uint16_t threads, uint16_t maxcpus); +int vm_get_topology(struct vmctx *ctx, uint16_t *sockets, uint16_t *cores, + uint16_t *threads, uint16_t *maxcpus); #ifdef __FreeBSD__ /* diff --git a/usr/src/uts/i86pc/io/vmm/README.sync b/usr/src/uts/i86pc/io/vmm/README.sync index d7e281f250..667f34b9de 100644 --- a/usr/src/uts/i86pc/io/vmm/README.sync +++ b/usr/src/uts/i86pc/io/vmm/README.sync @@ -1,15 +1,22 @@ The bhyve kernel module and its associated userland consumers have been updated to the latest upstream FreeBSD sources as of: -commit 32e2f94b53c0599c7c674ff88c75b289f714c7c9 +commit 0fac2150fc0f1befa5803ca010ed63a6335847ad Author: grehan <grehan@FreeBSD.org> -Date: Sun Mar 11 08:27:11 2018 +0000 +Date: Fri May 4 01:36:49 2018 +0000 - Add CR2 get/set support. + Allow arbitrary numbers of columns for VNC server screen resolution. - Reported/Tested by: Fabian Freyer - Reviewed by: araujo - Differential Revision: https://reviews.freebsd.org/D14648 + The prior code only allowed multiples of 32 for the + numbers of columns. Remove this restriction to allow + a forthcoming UEFI firmware update to allow arbitrary + x,y resolutions. + + (the code for handling rows already supported non mult-32 values) + + Reviewed by: Leon Dang (original author) MFC after: 3 weeks + Differential Revision: https://reviews.freebsd.org/D15274 + -Which corresponds to SVN revision: 330764 +Which corresponds to SVN revision: 333235 diff --git a/usr/src/uts/i86pc/io/vmm/amd/svm.c b/usr/src/uts/i86pc/io/vmm/amd/svm.c index 3b4a279627..f3ce78148b 100644 --- a/usr/src/uts/i86pc/io/vmm/amd/svm.c +++ b/usr/src/uts/i86pc/io/vmm/amd/svm.c @@ -2039,6 +2039,12 @@ svm_vmrun(void *arg, int vcpu, register_t rip, pmap_t pmap, break; } + if (vcpu_debugged(vm, vcpu)) { + enable_gintr(); + vm_exit_debug(vm, vcpu, state->rip); + break; + } + svm_inj_interrupts(svm_sc, vcpu, vlapic); /* Activate the nested pmap on 'curcpu' */ diff --git a/usr/src/uts/i86pc/io/vmm/intel/vmx.c b/usr/src/uts/i86pc/io/vmm/intel/vmx.c index af63e0a426..94c588a5c3 100644 --- a/usr/src/uts/i86pc/io/vmm/intel/vmx.c +++ b/usr/src/uts/i86pc/io/vmm/intel/vmx.c @@ -224,6 +224,82 @@ SYSCTL_UINT(_hw_vmm_vmx, OID_AUTO, vpid_alloc_failed, CTLFLAG_RD, &vpid_alloc_failed, 0, NULL); /* + * The definitions of SDT probes for VMX. + */ + +SDT_PROBE_DEFINE3(vmm, vmx, exit, entry, + "struct vmx *", "int", "struct vm_exit *"); + +SDT_PROBE_DEFINE4(vmm, vmx, exit, taskswitch, + "struct vmx *", "int", "struct vm_exit *", "struct vm_task_switch *"); + +SDT_PROBE_DEFINE4(vmm, vmx, exit, craccess, + "struct vmx *", "int", "struct vm_exit *", "uint64_t"); + +SDT_PROBE_DEFINE4(vmm, vmx, exit, rdmsr, + "struct vmx *", "int", "struct vm_exit *", "uint32_t"); + +SDT_PROBE_DEFINE5(vmm, vmx, exit, wrmsr, + "struct vmx *", "int", "struct vm_exit *", "uint32_t", "uint64_t"); + +SDT_PROBE_DEFINE3(vmm, vmx, exit, halt, + "struct vmx *", "int", "struct vm_exit *"); + +SDT_PROBE_DEFINE3(vmm, vmx, exit, mtrap, + "struct vmx *", "int", "struct vm_exit *"); + +SDT_PROBE_DEFINE3(vmm, vmx, exit, pause, + "struct vmx *", "int", "struct vm_exit *"); + +SDT_PROBE_DEFINE3(vmm, vmx, exit, intrwindow, + "struct vmx *", "int", "struct vm_exit *"); + +SDT_PROBE_DEFINE4(vmm, vmx, exit, interrupt, + "struct vmx *", "int", "struct vm_exit *", "uint32_t"); + +SDT_PROBE_DEFINE3(vmm, vmx, exit, nmiwindow, + "struct vmx *", "int", "struct vm_exit *"); + +SDT_PROBE_DEFINE3(vmm, vmx, exit, inout, + "struct vmx *", "int", "struct vm_exit *"); + +SDT_PROBE_DEFINE3(vmm, vmx, exit, cpuid, + "struct vmx *", "int", "struct vm_exit *"); + +SDT_PROBE_DEFINE5(vmm, vmx, exit, exception, + "struct vmx *", "int", "struct vm_exit *", "uint32_t", "int"); + +SDT_PROBE_DEFINE5(vmm, vmx, exit, nestedfault, + "struct vmx *", "int", "struct vm_exit *", "uint64_t", "uint64_t"); + +SDT_PROBE_DEFINE4(vmm, vmx, exit, mmiofault, + "struct vmx *", "int", "struct vm_exit *", "uint64_t"); + +SDT_PROBE_DEFINE3(vmm, vmx, exit, eoi, + "struct vmx *", "int", "struct vm_exit *"); + +SDT_PROBE_DEFINE3(vmm, vmx, exit, apicaccess, + "struct vmx *", "int", "struct vm_exit *"); + +SDT_PROBE_DEFINE4(vmm, vmx, exit, apicwrite, + "struct vmx *", "int", "struct vm_exit *", "struct vlapic *"); + +SDT_PROBE_DEFINE3(vmm, vmx, exit, xsetbv, + "struct vmx *", "int", "struct vm_exit *"); + +SDT_PROBE_DEFINE3(vmm, vmx, exit, monitor, + "struct vmx *", "int", "struct vm_exit *"); + +SDT_PROBE_DEFINE3(vmm, vmx, exit, mwait, + "struct vmx *", "int", "struct vm_exit *"); + +SDT_PROBE_DEFINE4(vmm, vmx, exit, unknown, + "struct vmx *", "int", "struct vm_exit *", "uint32_t"); + +SDT_PROBE_DEFINE4(vmm, vmx, exit, return, + "struct vmx *", "int", "struct vm_exit *", "int"); + +/* * Use the last page below 4GB as the APIC access address. This address is * occupied by the boot firmware so it is guaranteed that it will not conflict * with a page in system memory. @@ -2302,6 +2378,7 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit) vmexit->exitcode = VM_EXITCODE_BOGUS; vmm_stat_incr(vmx->vm, vcpu, VMEXIT_COUNT, 1); + SDT_PROBE3(vmm, vmx, exit, entry, vmx, vcpu, vmexit); /* * VM-entry failures during or after loading guest state. @@ -2408,6 +2485,7 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit) } } vmexit->exitcode = VM_EXITCODE_TASK_SWITCH; + SDT_PROBE4(vmm, vmx, exit, taskswitch, vmx, vcpu, vmexit, ts); VCPU_CTR4(vmx->vm, vcpu, "task switch reason %d, tss 0x%04x, " "%s errcode 0x%016lx", ts->reason, ts->tsssel, ts->ext ? "external" : "internal", @@ -2415,6 +2493,7 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit) break; case EXIT_REASON_CR_ACCESS: vmm_stat_incr(vmx->vm, vcpu, VMEXIT_CR_ACCESS, 1); + SDT_PROBE4(vmm, vmx, exit, craccess, vmx, vcpu, vmexit, qual); switch (qual & 0xf) { case 0: handled = vmx_emulate_cr0_access(vmx, vcpu, qual); @@ -2432,6 +2511,7 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit) retu = false; ecx = vmxctx->guest_rcx; VCPU_CTR1(vmx->vm, vcpu, "rdmsr 0x%08x", ecx); + SDT_PROBE4(vmm, vmx, exit, rdmsr, vmx, vcpu, vmexit, ecx); error = emulate_rdmsr(vmx, vcpu, ecx, &retu); if (error) { vmexit->exitcode = VM_EXITCODE_RDMSR; @@ -2452,6 +2532,8 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit) edx = vmxctx->guest_rdx; VCPU_CTR2(vmx->vm, vcpu, "wrmsr 0x%08x value 0x%016lx", ecx, (uint64_t)edx << 32 | eax); + SDT_PROBE5(vmm, vmx, exit, wrmsr, vmx, vmexit, vcpu, ecx, + (uint64_t)edx << 32 | eax); error = emulate_wrmsr(vmx, vcpu, ecx, (uint64_t)edx << 32 | eax, &retu); if (error) { @@ -2468,6 +2550,7 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit) break; case EXIT_REASON_HLT: vmm_stat_incr(vmx->vm, vcpu, VMEXIT_HLT, 1); + SDT_PROBE3(vmm, vmx, exit, halt, vmx, vcpu, vmexit); vmexit->exitcode = VM_EXITCODE_HLT; vmexit->u.hlt.rflags = vmcs_read(VMCS_GUEST_RFLAGS); if (virtual_interrupt_delivery) @@ -2478,15 +2561,18 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit) break; case EXIT_REASON_MTF: vmm_stat_incr(vmx->vm, vcpu, VMEXIT_MTRAP, 1); + SDT_PROBE3(vmm, vmx, exit, mtrap, vmx, vcpu, vmexit); vmexit->exitcode = VM_EXITCODE_MTRAP; vmexit->inst_length = 0; break; case EXIT_REASON_PAUSE: vmm_stat_incr(vmx->vm, vcpu, VMEXIT_PAUSE, 1); + SDT_PROBE3(vmm, vmx, exit, pause, vmx, vcpu, vmexit); vmexit->exitcode = VM_EXITCODE_PAUSE; break; case EXIT_REASON_INTR_WINDOW: vmm_stat_incr(vmx->vm, vcpu, VMEXIT_INTR_WINDOW, 1); + SDT_PROBE3(vmm, vmx, exit, intrwindow, vmx, vcpu, vmexit); vmx_clear_int_window_exiting(vmx, vcpu); return (1); case EXIT_REASON_EXT_INTR: @@ -2500,6 +2586,8 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit) * this virtual interrupt during the subsequent VM enter. */ intr_info = vmcs_read(VMCS_EXIT_INTR_INFO); + SDT_PROBE4(vmm, vmx, exit, interrupt, + vmx, vcpu, vmexit, intr_info); /* * XXX: Ignore this exit if VMCS_INTR_VALID is not set. @@ -2519,6 +2607,7 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit) vmm_stat_incr(vmx->vm, vcpu, VMEXIT_EXTINT, 1); return (1); case EXIT_REASON_NMI_WINDOW: + SDT_PROBE3(vmm, vmx, exit, nmiwindow, vmx, vcpu, vmexit); /* Exit to allow the pending virtual NMI to be injected */ if (vm_nmi_pending(vmx->vm, vcpu)) vmx_inject_nmi(vmx, vcpu); @@ -2546,9 +2635,11 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit) vis->addrsize = inout_str_addrsize(inst_info); inout_str_seginfo(vmx, vcpu, inst_info, in, vis); } + SDT_PROBE3(vmm, vmx, exit, inout, vmx, vcpu, vmexit); break; case EXIT_REASON_CPUID: vmm_stat_incr(vmx->vm, vcpu, VMEXIT_CPUID, 1); + SDT_PROBE3(vmm, vmx, exit, cpuid, vmx, vcpu, vmexit); handled = vmx_handle_cpuid(vmx->vm, vcpu, vmxctx); break; case EXIT_REASON_EXCEPTION: @@ -2617,6 +2708,8 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit) } VCPU_CTR2(vmx->vm, vcpu, "Reflecting exception %d/%#x into " "the guest", intr_vec, errcode); + SDT_PROBE5(vmm, vmx, exit, exception, + vmx, vcpu, vmexit, intr_vec, errcode); error = vm_inject_exception(vmx->vm, vcpu, intr_vec, errcode_valid, errcode, 0); KASSERT(error == 0, ("%s: vm_inject_exception error %d", @@ -2637,9 +2730,13 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit) vmexit->u.paging.gpa = gpa; vmexit->u.paging.fault_type = ept_fault_type(qual); vmm_stat_incr(vmx->vm, vcpu, VMEXIT_NESTED_FAULT, 1); + SDT_PROBE5(vmm, vmx, exit, nestedfault, + vmx, vcpu, vmexit, gpa, qual); } else if (ept_emulation_fault(qual)) { vmexit_inst_emul(vmexit, gpa, vmcs_gla()); vmm_stat_incr(vmx->vm, vcpu, VMEXIT_INST_EMUL, 1); + SDT_PROBE4(vmm, vmx, exit, mmiofault, + vmx, vcpu, vmexit, gpa); } /* * If Virtual NMIs control is 1 and the VM-exit is due to an @@ -2656,9 +2753,11 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit) case EXIT_REASON_VIRTUALIZED_EOI: vmexit->exitcode = VM_EXITCODE_IOAPIC_EOI; vmexit->u.ioapic_eoi.vector = qual & 0xFF; + SDT_PROBE3(vmm, vmx, exit, eoi, vmx, vcpu, vmexit); vmexit->inst_length = 0; /* trap-like */ break; case EXIT_REASON_APIC_ACCESS: + SDT_PROBE3(vmm, vmx, exit, apicaccess, vmx, vcpu, vmexit); handled = vmx_handle_apic_access(vmx, vcpu, vmexit); break; case EXIT_REASON_APIC_WRITE: @@ -2668,18 +2767,25 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit) */ vmexit->inst_length = 0; vlapic = vm_lapic(vmx->vm, vcpu); + SDT_PROBE4(vmm, vmx, exit, apicwrite, + vmx, vcpu, vmexit, vlapic); handled = vmx_handle_apic_write(vmx, vcpu, vlapic, qual); break; case EXIT_REASON_XSETBV: + SDT_PROBE3(vmm, vmx, exit, xsetbv, vmx, vcpu, vmexit); handled = vmx_emulate_xsetbv(vmx, vcpu, vmexit); break; case EXIT_REASON_MONITOR: + SDT_PROBE3(vmm, vmx, exit, monitor, vmx, vcpu, vmexit); vmexit->exitcode = VM_EXITCODE_MONITOR; break; case EXIT_REASON_MWAIT: + SDT_PROBE3(vmm, vmx, exit, mwait, vmx, vcpu, vmexit); vmexit->exitcode = VM_EXITCODE_MWAIT; break; default: + SDT_PROBE4(vmm, vmx, exit, unknown, + vmx, vcpu, vmexit, reason); vmm_stat_incr(vmx->vm, vcpu, VMEXIT_UNKNOWN, 1); break; } @@ -2715,6 +2821,9 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit) */ } } + + SDT_PROBE4(vmm, vmx, exit, return, + vmx, vcpu, vmexit, handled); return (handled); } @@ -2951,6 +3060,12 @@ vmx_run(void *arg, int vcpu, register_t rip, pmap_t pmap, break; } + if (vcpu_debugged(vm, vcpu)) { + enable_intr(); + vm_exit_debug(vmx->vm, vcpu, rip); + break; + } + vmx_run_trace(vmx, vcpu); vmx_dr_enter_guest(vmxctx); rc = vmx_enter_guest(vmxctx, vmx, launched); diff --git a/usr/src/uts/i86pc/io/vmm/io/ppt.c b/usr/src/uts/i86pc/io/vmm/io/ppt.c index dabbe584fd..89e9f35c10 100644 --- a/usr/src/uts/i86pc/io/vmm/io/ppt.c +++ b/usr/src/uts/i86pc/io/vmm/io/ppt.c @@ -817,6 +817,10 @@ ppt_flr(dev_info_t *dip, boolean_t force) return (B_TRUE); fail: + /* + * TODO: If the FLR fails for some reason, we should attempt a reset + * using the PCI power management facilities (if possible). + */ pci_config_teardown(&hdl); return (B_FALSE); } diff --git a/usr/src/uts/i86pc/io/vmm/vmm.c b/usr/src/uts/i86pc/io/vmm/vmm.c index 050d8e752c..c4988c6d72 100644 --- a/usr/src/uts/i86pc/io/vmm/vmm.c +++ b/usr/src/uts/i86pc/io/vmm/vmm.c @@ -174,6 +174,7 @@ struct vm { struct vpmtmr *vpmtmr; /* (i) virtual ACPI PM timer */ struct vrtc *vrtc; /* (o) virtual RTC */ volatile cpuset_t active_cpus; /* (i) active vcpus */ + volatile cpuset_t debug_cpus; /* (i) vcpus stopped for debug */ int suspend; /* (i) stop VM execution */ volatile cpuset_t suspended_cpus; /* (i) suspended vcpus */ volatile cpuset_t halted_cpus; /* (x) cpus in a hard halt */ @@ -190,6 +191,11 @@ struct vm { struct vmspace *vmspace; /* (o) guest's address space */ char name[VM_MAX_NAMELEN]; /* (o) virtual machine name */ struct vcpu vcpu[VM_MAXCPU]; /* (i) guest vcpus */ + /* The following describe the vm cpu topology */ + uint16_t sockets; /* (o) num of sockets */ + uint16_t cores; /* (o) num of cores/socket */ + uint16_t threads; /* (o) num of threads/core */ + uint16_t maxcpus; /* (o) max pluggable cpus */ #ifndef __FreeBSD__ krwlock_t ioport_rwlock; list_t ioport_hooks; @@ -231,6 +237,8 @@ static struct vmm_ops *ops; #define fpu_start_emulating() load_cr0(rcr0() | CR0_TS) #define fpu_stop_emulating() clts() +SDT_PROVIDER_DEFINE(vmm); + static MALLOC_DEFINE(M_VM, "vm", "vm"); /* statistics */ @@ -520,6 +528,7 @@ vm_init(struct vm *vm, bool create) #endif /* __FreeBSD__ */ CPU_ZERO(&vm->active_cpus); + CPU_ZERO(&vm->debug_cpus); vm->suspend = 0; CPU_ZERO(&vm->suspended_cpus); @@ -528,6 +537,12 @@ vm_init(struct vm *vm, bool create) vcpu_init(vm, i, create); } +/* + * The default CPU topology is a single thread per package. + */ +u_int cores_per_package = 1; +u_int threads_per_core = 1; + int vm_create(const char *name, struct vm **retvm) { @@ -553,12 +568,43 @@ vm_create(const char *name, struct vm **retvm) vm->vmspace = vmspace; mtx_init(&vm->rendezvous_mtx, "vm rendezvous lock", 0, MTX_DEF); + vm->sockets = 1; + vm->cores = cores_per_package; /* XXX backwards compatibility */ + vm->threads = threads_per_core; /* XXX backwards compatibility */ + vm->maxcpus = 0; /* XXX not implemented */ + vm_init(vm, true); *retvm = vm; return (0); } +void +vm_get_topology(struct vm *vm, uint16_t *sockets, uint16_t *cores, + uint16_t *threads, uint16_t *maxcpus) +{ + *sockets = vm->sockets; + *cores = vm->cores; + *threads = vm->threads; + *maxcpus = vm->maxcpus; +} + +int +vm_set_topology(struct vm *vm, uint16_t sockets, uint16_t cores, + uint16_t threads, uint16_t maxcpus) +{ + if (maxcpus != 0) + return (EINVAL); /* XXX remove when supported */ + if ((sockets * cores * threads) > VM_MAXCPU) + return (EINVAL); + /* XXX need to check sockets * cores * threads == vCPU, how? */ + vm->sockets = sockets; + vm->cores = cores; + vm->threads = threads; + vm->maxcpus = maxcpus; + return(0); +} + static void vm_cleanup(struct vm *vm, bool destroy) { @@ -1435,6 +1481,9 @@ vm_handle_hlt(struct vm *vm, int vcpuid, bool intr_disabled, bool *retu) if (vcpu_should_yield(vm, vcpuid)) break; + if (vcpu_debugged(vm, vcpuid)) + break; + /* * Some Linux guests implement "halt" by having all vcpus * execute HLT with interrupts disabled. 'halted_cpus' keeps @@ -1718,6 +1767,17 @@ vm_exit_suspended(struct vm *vm, int vcpuid, uint64_t rip) } void +vm_exit_debug(struct vm *vm, int vcpuid, uint64_t rip) +{ + struct vm_exit *vmexit; + + vmexit = vm_exitinfo(vm, vcpuid); + vmexit->rip = rip; + vmexit->inst_length = 0; + vmexit->exitcode = VM_EXITCODE_DEBUG; +} + +void vm_exit_rendezvous(struct vm *vm, int vcpuid, uint64_t rip) { struct vm_exit *vmexit; @@ -2609,6 +2669,55 @@ vm_activate_cpu(struct vm *vm, int vcpuid) return (0); } +int +vm_suspend_cpu(struct vm *vm, int vcpuid) +{ + int i; + + if (vcpuid < -1 || vcpuid >= VM_MAXCPU) + return (EINVAL); + + if (vcpuid == -1) { + vm->debug_cpus = vm->active_cpus; + for (i = 0; i < VM_MAXCPU; i++) { + if (CPU_ISSET(i, &vm->active_cpus)) + vcpu_notify_event(vm, i, false); + } + } else { + if (!CPU_ISSET(vcpuid, &vm->active_cpus)) + return (EINVAL); + + CPU_SET_ATOMIC(vcpuid, &vm->debug_cpus); + vcpu_notify_event(vm, vcpuid, false); + } + return (0); +} + +int +vm_resume_cpu(struct vm *vm, int vcpuid) +{ + + if (vcpuid < -1 || vcpuid >= VM_MAXCPU) + return (EINVAL); + + if (vcpuid == -1) { + CPU_ZERO(&vm->debug_cpus); + } else { + if (!CPU_ISSET(vcpuid, &vm->debug_cpus)) + return (EINVAL); + + CPU_CLR_ATOMIC(vcpuid, &vm->debug_cpus); + } + return (0); +} + +int +vcpu_debugged(struct vm *vm, int vcpuid) +{ + + return (CPU_ISSET(vcpuid, &vm->debug_cpus)); +} + cpuset_t vm_active_cpus(struct vm *vm) { @@ -2617,6 +2726,13 @@ vm_active_cpus(struct vm *vm) } cpuset_t +vm_debug_cpus(struct vm *vm) +{ + + return (vm->debug_cpus); +} + +cpuset_t vm_suspended_cpus(struct vm *vm) { diff --git a/usr/src/uts/i86pc/io/vmm/vmm_host.c b/usr/src/uts/i86pc/io/vmm/vmm_host.c index 639de087d0..9e390c93dd 100644 --- a/usr/src/uts/i86pc/io/vmm/vmm_host.c +++ b/usr/src/uts/i86pc/io/vmm/vmm_host.c @@ -73,7 +73,16 @@ vmm_host_state_init(void) */ vmm_host_cr0 = rcr0() | CR0_TS; - vmm_host_cr4 = rcr4(); + /* + * On non-PCID or PCID but without INVPCID support machines, + * we flush kernel i.e. global TLB entries, by temporary + * clearing the CR4.PGE bit, see invltlb_glob(). If + * preemption occurs at the wrong time, cached vmm_host_cr4 + * might store the value with CR4.PGE cleared. Since FreeBSD + * requires support for PG_G on amd64, just set it + * unconditionally. + */ + vmm_host_cr4 = rcr4() | CR4_PGE; /* * Only permit a guest to use XSAVE if the host is using diff --git a/usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c b/usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c index 66d5ce3b5d..a8381a9c0a 100644 --- a/usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c +++ b/usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c @@ -964,6 +964,12 @@ vmmdev_do_ioctl(vmm_softc_t *sc, int cmd, intptr_t arg, int md, case VM_ACTIVATE_CPU: error = vm_activate_cpu(sc->vmm_vm, vcpu); break; + case VM_SUSPEND_CPU: + error = vm_suspend_cpu(sc->vmm_vm, vcpu); + break; + case VM_RESUME_CPU: + error = vm_resume_cpu(sc->vmm_vm, vcpu); + break; case VM_GET_CPUS: { struct vm_cpuset vm_cpuset; @@ -993,6 +999,8 @@ vmmdev_do_ioctl(vmm_softc_t *sc, int cmd, intptr_t arg, int md, tempset = vm_active_cpus(sc->vmm_vm); } else if (vm_cpuset.which == VM_SUSPENDED_CPUS) { tempset = vm_suspended_cpus(sc->vmm_vm); + } else if (vm_cpuset.which == VM_DEBUG_CPUS) { + tempset = vm_debug_cpus(sc->vmm_vm); } else { error = EINVAL; } @@ -1080,6 +1088,29 @@ vmmdev_do_ioctl(vmm_softc_t *sc, int cmd, intptr_t arg, int md, error = vm_restart_instruction(sc->vmm_vm, vcpu); break; + case VM_SET_TOPOLOGY: { + struct vm_cpu_topology topo; + + if (ddi_copyin(datap, &topo, sizeof (topo), md) != 0) { + error = EFAULT; + break; + } + error = vm_set_topology(sc->vmm_vm, topo.sockets, topo.cores, + topo.threads, topo.maxcpus); + break; + } + case VM_GET_TOPOLOGY: { + struct vm_cpu_topology topo; + + vm_get_topology(sc->vmm_vm, &topo.sockets, &topo.cores, + &topo.threads, &topo.maxcpus); + if (ddi_copyout(&topo, datap, sizeof (topo), md) != 0) { + error = EFAULT; + break; + } + break; + } + #ifndef __FreeBSD__ case VM_DEVMEM_GETOFFSET: { struct vm_devmem_offset vdo; diff --git a/usr/src/uts/i86pc/io/vmm/x86.c b/usr/src/uts/i86pc/io/vmm/x86.c index 0d59c119a8..d6426bde44 100644 --- a/usr/src/uts/i86pc/io/vmm/x86.c +++ b/usr/src/uts/i86pc/io/vmm/x86.c @@ -73,17 +73,6 @@ static uint64_t bhyve_xcpuids; SYSCTL_ULONG(_hw_vmm, OID_AUTO, bhyve_xcpuids, CTLFLAG_RW, &bhyve_xcpuids, 0, "Number of times an unknown cpuid leaf was accessed"); -/* - * The default CPU topology is a single thread per package. - */ -static u_int threads_per_core = 1; -SYSCTL_UINT(_hw_vmm_topology, OID_AUTO, threads_per_core, CTLFLAG_RDTUN, - &threads_per_core, 0, NULL); - -static u_int cores_per_package = 1; -SYSCTL_UINT(_hw_vmm_topology, OID_AUTO, cores_per_package, CTLFLAG_RDTUN, - &cores_per_package, 0, NULL); - static int cpuid_leaf_b = 1; SYSCTL_INT(_hw_vmm_topology, OID_AUTO, cpuid_leaf_b, CTLFLAG_RDTUN, &cpuid_leaf_b, 0, NULL); @@ -106,8 +95,9 @@ x86_emulate_cpuid(struct vm *vm, int vcpu_id, const struct xsave_limits *limits; uint64_t cr4; int error, enable_invpcid, level, width = 0, x2apic_id = 0; - unsigned int func, regs[4], logical_cpus; + unsigned int func, regs[4], logical_cpus = 0; enum x2apic_state x2apic_state; + uint16_t cores, maxcpus, sockets, threads; VCPU_CTR2(vm, vcpu_id, "cpuid %#x,%#x", *eax, *ecx); @@ -155,11 +145,11 @@ x86_emulate_cpuid(struct vm *vm, int vcpu_id, * * However this matches the logical cpus as * advertised by leaf 0x1 and will work even - * if the 'threads_per_core' tunable is set - * incorrectly on an AMD host. + * if threads is set incorrectly on an AMD host. */ - logical_cpus = threads_per_core * - cores_per_package; + vm_get_topology(vm, &sockets, &cores, &threads, + &maxcpus); + logical_cpus = threads * cores; regs[2] = logical_cpus - 1; } break; @@ -321,7 +311,9 @@ x86_emulate_cpuid(struct vm *vm, int vcpu_id, */ regs[3] |= (CPUID_MCA | CPUID_MCE | CPUID_MTRR); - logical_cpus = threads_per_core * cores_per_package; + vm_get_topology(vm, &sockets, &cores, &threads, + &maxcpus); + logical_cpus = threads * cores; regs[1] &= ~CPUID_HTT_CORES; regs[1] |= (logical_cpus & 0xff) << 16; regs[3] |= CPUID_HTT; @@ -331,8 +323,10 @@ x86_emulate_cpuid(struct vm *vm, int vcpu_id, cpuid_count(*eax, *ecx, regs); if (regs[0] || regs[1] || regs[2] || regs[3]) { + vm_get_topology(vm, &sockets, &cores, &threads, + &maxcpus); regs[0] &= 0x3ff; - regs[0] |= (cores_per_package - 1) << 26; + regs[0] |= (cores - 1) << 26; /* * Cache topology: * - L1 and L2 are shared only by the logical @@ -340,10 +334,10 @@ x86_emulate_cpuid(struct vm *vm, int vcpu_id, * - L3 and above are shared by all logical * processors in the package. */ - logical_cpus = threads_per_core; + logical_cpus = threads; level = (regs[0] >> 5) & 0x7; if (level >= 3) - logical_cpus *= cores_per_package; + logical_cpus *= cores; regs[0] |= (logical_cpus - 1) << 14; } break; @@ -405,16 +399,17 @@ x86_emulate_cpuid(struct vm *vm, int vcpu_id, /* * Processor topology enumeration */ + vm_get_topology(vm, &sockets, &cores, &threads, + &maxcpus); if (*ecx == 0) { - logical_cpus = threads_per_core; + logical_cpus = threads; width = log2(logical_cpus); level = CPUID_TYPE_SMT; x2apic_id = vcpu_id; } if (*ecx == 1) { - logical_cpus = threads_per_core * - cores_per_package; + logical_cpus = threads * cores; width = log2(logical_cpus); level = CPUID_TYPE_CORE; x2apic_id = vcpu_id; diff --git a/usr/src/uts/i86pc/sys/vmm.h b/usr/src/uts/i86pc/sys/vmm.h index 43a9d36de3..dea60c5f76 100644 --- a/usr/src/uts/i86pc/sys/vmm.h +++ b/usr/src/uts/i86pc/sys/vmm.h @@ -44,8 +44,13 @@ #ifndef _VMM_H_ #define _VMM_H_ +#include <sys/sdt.h> #include <x86/segments.h> +#ifdef _KERNEL +SDT_PROVIDER_DECLARE(vmm); +#endif + enum vm_suspend_how { VM_SUSPEND_NONE, VM_SUSPEND_RESET, @@ -204,6 +209,10 @@ int vm_create(const char *name, struct vm **retvm); void vm_destroy(struct vm *vm); int vm_reinit(struct vm *vm); const char *vm_name(struct vm *vm); +void vm_get_topology(struct vm *vm, uint16_t *sockets, uint16_t *cores, + uint16_t *threads, uint16_t *maxcpus); +int vm_set_topology(struct vm *vm, uint16_t sockets, uint16_t cores, + uint16_t threads, uint16_t maxcpus); /* * APIs that modify the guest memory map require all vcpus to be frozen. @@ -259,8 +268,11 @@ int vm_get_x2apic_state(struct vm *vm, int vcpu, enum x2apic_state *state); int vm_set_x2apic_state(struct vm *vm, int vcpu, enum x2apic_state state); int vm_apicid2vcpuid(struct vm *vm, int apicid); int vm_activate_cpu(struct vm *vm, int vcpu); +int vm_suspend_cpu(struct vm *vm, int vcpu); +int vm_resume_cpu(struct vm *vm, int vcpu); struct vm_exit *vm_exitinfo(struct vm *vm, int vcpuid); void vm_exit_suspended(struct vm *vm, int vcpuid, uint64_t rip); +void vm_exit_debug(struct vm *vm, int vcpuid, uint64_t rip); void vm_exit_rendezvous(struct vm *vm, int vcpuid, uint64_t rip); void vm_exit_astpending(struct vm *vm, int vcpuid, uint64_t rip); void vm_exit_reqidle(struct vm *vm, int vcpuid, uint64_t rip); @@ -284,6 +296,7 @@ typedef void (*vm_rendezvous_func_t)(struct vm *vm, int vcpuid, void *arg); void vm_smp_rendezvous(struct vm *vm, int vcpuid, cpuset_t dest, vm_rendezvous_func_t func, void *arg); cpuset_t vm_active_cpus(struct vm *vm); +cpuset_t vm_debug_cpus(struct vm *vm); cpuset_t vm_suspended_cpus(struct vm *vm); #endif /* _SYS__CPUSET_H_ */ @@ -308,6 +321,8 @@ vcpu_reqidle(struct vm_eventinfo *info) return (*info->iptr); } +int vcpu_debugged(struct vm *vm, int vcpuid); + /* * Return 1 if device indicated by bus/slot/func is supposed to be a * pci passthrough device. @@ -568,6 +583,7 @@ enum vm_exitcode { VM_EXITCODE_MWAIT, VM_EXITCODE_SVM, VM_EXITCODE_REQIDLE, + VM_EXITCODE_DEBUG, VM_EXITCODE_MAX }; diff --git a/usr/src/uts/i86pc/sys/vmm_dev.h b/usr/src/uts/i86pc/sys/vmm_dev.h index a737ab1ad5..63ccc36dc6 100644 --- a/usr/src/uts/i86pc/sys/vmm_dev.h +++ b/usr/src/uts/i86pc/sys/vmm_dev.h @@ -272,6 +272,7 @@ struct vm_cpuset { }; #define VM_ACTIVE_CPUS 0 #define VM_SUSPENDED_CPUS 1 +#define VM_DEBUG_CPUS 2 struct vm_intinfo { int vcpuid; @@ -295,6 +296,13 @@ struct vm_devmem_offset { }; #endif +struct vm_cpu_topology { + uint16_t sockets; + uint16_t cores; + uint16_t threads; + uint16_t maxcpus; +}; + enum { /* general routines */ IOCNUM_ABIVERS = 0, @@ -354,6 +362,10 @@ enum { IOCNUM_GET_X2APIC_STATE = 61, IOCNUM_GET_HPET_CAPABILITIES = 62, + /* CPU Topology */ + IOCNUM_SET_TOPOLOGY = 63, + IOCNUM_GET_TOPOLOGY = 64, + /* legacy interrupt injection */ IOCNUM_ISA_ASSERT_IRQ = 80, IOCNUM_ISA_DEASSERT_IRQ = 81, @@ -363,6 +375,8 @@ enum { /* vm_cpuset */ IOCNUM_ACTIVATE_CPU = 90, IOCNUM_GET_CPUSET = 91, + IOCNUM_SUSPEND_CPU = 92, + IOCNUM_RESUME_CPU = 93, /* RTC */ IOCNUM_RTC_READ = 100, @@ -454,6 +468,10 @@ enum { _IOWR('v', IOCNUM_GET_X2APIC_STATE, struct vm_x2apic) #define VM_GET_HPET_CAPABILITIES \ _IOR('v', IOCNUM_GET_HPET_CAPABILITIES, struct vm_hpet_cap) +#define VM_SET_TOPOLOGY \ + _IOW('v', IOCNUM_SET_TOPOLOGY, struct vm_cpu_topology) +#define VM_GET_TOPOLOGY \ + _IOR('v', IOCNUM_GET_TOPOLOGY, struct vm_cpu_topology) #define VM_GET_GPA_PMAP \ _IOWR('v', IOCNUM_GET_GPA_PMAP, struct vm_gpa_pte) #define VM_GLA2GPA \ @@ -464,6 +482,10 @@ enum { _IOW('v', IOCNUM_ACTIVATE_CPU, struct vm_activate_cpu) #define VM_GET_CPUS \ _IOW('v', IOCNUM_GET_CPUSET, struct vm_cpuset) +#define VM_SUSPEND_CPU \ + _IOW('v', IOCNUM_SUSPEND_CPU, struct vm_activate_cpu) +#define VM_RESUME_CPU \ + _IOW('v', IOCNUM_RESUME_CPU, struct vm_activate_cpu) #define VM_SET_INTINFO \ _IOW('v', IOCNUM_SET_INTINFO, struct vm_intinfo) #define VM_GET_INTINFO \ |
