summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPatrick Mooney <pmooney@pfmooney.com>2018-05-15 03:06:09 +0000
committerPatrick Mooney <pmooney@pfmooney.com>2018-05-24 18:07:37 +0000
commitadfeb11ce94f7c9b78db3f67388fb704c2d8673a (patch)
treedcf8186b69efcb05af66a3fe35cea2234efd9e12
parent28825f0c4ec7c0a9a0a1b599ed767e9c155d1230 (diff)
downloadillumos-joyent-adfeb11ce94f7c9b78db3f67388fb704c2d8673a.tar.gz
OS-6954 bhyve upstream sync
Reviewed by: Mike Gerdts <mike.gerdts@joyent.com> Reviewed by: Hans Rosenfeld <hans.rosenfeld@joyent.com> Approved by: Ryan Zezeski <rpz@joyent.com>
-rw-r--r--exception_lists/cstyle1
-rw-r--r--usr/src/cmd/bhyve/Makefile1
-rw-r--r--usr/src/cmd/bhyve/bhyverun.c145
-rw-r--r--usr/src/cmd/bhyve/gdb.c1328
-rw-r--r--usr/src/cmd/bhyve/gdb.h39
-rw-r--r--usr/src/cmd/bhyve/mem.c67
-rw-r--r--usr/src/cmd/bhyve/mem.h2
-rw-r--r--usr/src/cmd/bhyve/rfb.c33
-rw-r--r--usr/src/cmd/bhyvectl/bhyvectl.c13
-rw-r--r--usr/src/compat/freebsd/err.h23
-rw-r--r--usr/src/compat/freebsd/sys/cpuset.h51
-rw-r--r--usr/src/compat/freebsd/sys/ioctl.h2
-rw-r--r--usr/src/compat/freebsd/sys/sdt.h37
-rw-r--r--usr/src/lib/libvmmapi/common/mapfile-vers5
-rw-r--r--usr/src/lib/libvmmapi/common/vmmapi.c68
-rw-r--r--usr/src/lib/libvmmapi/common/vmmapi.h9
-rw-r--r--usr/src/uts/i86pc/io/vmm/README.sync21
-rw-r--r--usr/src/uts/i86pc/io/vmm/amd/svm.c6
-rw-r--r--usr/src/uts/i86pc/io/vmm/intel/vmx.c115
-rw-r--r--usr/src/uts/i86pc/io/vmm/io/ppt.c4
-rw-r--r--usr/src/uts/i86pc/io/vmm/vmm.c116
-rw-r--r--usr/src/uts/i86pc/io/vmm/vmm_host.c11
-rw-r--r--usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c31
-rw-r--r--usr/src/uts/i86pc/io/vmm/x86.c41
-rw-r--r--usr/src/uts/i86pc/sys/vmm.h16
-rw-r--r--usr/src/uts/i86pc/sys/vmm_dev.h22
26 files changed, 2125 insertions, 82 deletions
diff --git a/exception_lists/cstyle b/exception_lists/cstyle
index 27dc9ec2be..bd8816b589 100644
--- a/exception_lists/cstyle
+++ b/exception_lists/cstyle
@@ -1401,6 +1401,7 @@ usr/src/cmd/bhyve/console.[ch]
usr/src/cmd/bhyve/consport.c
usr/src/cmd/bhyve/dbgport.[ch]
usr/src/cmd/bhyve/fwctl.[ch]
+usr/src/cmd/bhyve/gdb.[ch]
usr/src/cmd/bhyve/inout.[ch]
usr/src/cmd/bhyve/ioapic.[ch]
usr/src/cmd/bhyve/mem.[ch]
diff --git a/usr/src/cmd/bhyve/Makefile b/usr/src/cmd/bhyve/Makefile
index 181ddf2946..554603d4f8 100644
--- a/usr/src/cmd/bhyve/Makefile
+++ b/usr/src/cmd/bhyve/Makefile
@@ -38,6 +38,7 @@ SRCS = acpi.c \
consport.c \
dbgport.c \
fwctl.c \
+ gdb.c \
inout.c \
ioapic.c \
mem.c \
diff --git a/usr/src/cmd/bhyve/bhyverun.c b/usr/src/cmd/bhyve/bhyverun.c
index 949e537738..b12fba0800 100644
--- a/usr/src/cmd/bhyve/bhyverun.c
+++ b/usr/src/cmd/bhyve/bhyverun.c
@@ -70,6 +70,7 @@ __FBSDID("$FreeBSD$");
#include <pthread_np.h>
#include <sysexits.h>
#include <stdbool.h>
+#include <stdint.h>
#include <machine/vmm.h>
#ifndef WITHOUT_CAPSICUM
@@ -88,6 +89,7 @@ __FBSDID("$FreeBSD$");
#include "inout.h"
#include "dbgport.h"
#include "fwctl.h"
+#include "gdb.h"
#include "ioapic.h"
#include "mem.h"
#include "mevent.h"
@@ -113,6 +115,8 @@ extern int vmexit_task_switch(struct vmctx *, struct vm_exit *, int *vcpu);
char *vmname;
int guest_ncpus;
+uint16_t cores, maxcpus, sockets, threads;
+
char *guest_uuid_str;
static int guest_vmexit_on_hlt, guest_vmexit_on_pause;
@@ -166,15 +170,17 @@ usage(int code)
{
fprintf(stderr,
- "Usage: %s [-abehuwxACHIPSWY] [-c vcpus] [-g <gdb port>] [-l <lpc>]\n"
+ "Usage: %s [-abehuwxACHPSWY]\n"
+ " %*s [-c [[cpus=]numcpus][,sockets=n][,cores=n][,threads=n]]\n"
+ " %*s [-g <gdb port>] [-l <lpc>]\n"
#ifdef __FreeBSD__
- " %*s [-m memsize[K|k|M|m|G|g|T|t] [-p vcpu:hostcpu] [-s <pci>] [-U uuid] <vm>\n"
+ " %*s [-m mem] [-p vcpu:hostcpu] [-s <pci>] [-U uuid] <vm>\n"
#else
- " %*s [-s <pci>] [-U uuid] <vm>\n"
+ " %*s [-m mem] [-s <pci>] [-U uuid] <vm>\n"
#endif
" -a: local apic is in xAPIC mode (deprecated)\n"
" -A: create ACPI tables\n"
- " -c: # cpus (default 1)\n"
+ " -c: number of cpus and/or topology specification"
" -C: include guest memory in core file\n"
" -e: exit on unhandled I/O access\n"
" -g: gdb port\n"
@@ -194,11 +200,91 @@ usage(int code)
" -W: force virtio to use single-vector MSI\n"
" -x: local apic is in x2APIC mode\n"
" -Y: disable MPtable generation\n",
- progname, (int)strlen(progname), "");
+ progname, (int)strlen(progname), "", (int)strlen(progname), "",
+ (int)strlen(progname), "");
exit(code);
}
+/*
+ * XXX This parser is known to have the following issues:
+ * 1. It accepts null key=value tokens ",,".
+ * 2. It accepts whitespace after = and before value.
+ * 3. Values out of range of INT are silently wrapped.
+ * 4. It doesn't check non-final values.
+ * 5. The apparently bogus limits of UINT16_MAX are for future expansion.
+ *
+ * The acceptance of a null specification ('-c ""') is by design to match the
+ * manual page syntax specification, this results in a topology of 1 vCPU.
+ */
+static int
+topology_parse(const char *opt)
+{
+ uint64_t ncpus;
+ int c, chk, n, s, t, tmp;
+ char *cp, *str;
+ bool ns, scts;
+
+ c = 1, n = 1, s = 1, t = 1;
+ ns = false, scts = false;
+ str = strdup(opt);
+
+ while ((cp = strsep(&str, ",")) != NULL) {
+ if (sscanf(cp, "%i%n", &tmp, &chk) == 1) {
+ n = tmp;
+ ns = true;
+ } else if (sscanf(cp, "cpus=%i%n", &tmp, &chk) == 1) {
+ n = tmp;
+ ns = true;
+ } else if (sscanf(cp, "sockets=%i%n", &tmp, &chk) == 1) {
+ s = tmp;
+ scts = true;
+ } else if (sscanf(cp, "cores=%i%n", &tmp, &chk) == 1) {
+ c = tmp;
+ scts = true;
+ } else if (sscanf(cp, "threads=%i%n", &tmp, &chk) == 1) {
+ t = tmp;
+ scts = true;
+#ifdef notyet /* Do not expose this until vmm.ko implements it */
+ } else if (sscanf(cp, "maxcpus=%i%n", &tmp, &chk) == 1) {
+ m = tmp;
+#endif
+ /* Skip the empty argument case from -c "" */
+ } else if (cp[0] == '\0')
+ continue;
+ else
+ return (-1);
+ /* Any trailing garbage causes an error */
+ if (cp[chk] != '\0')
+ return (-1);
+ }
+ /*
+ * Range check 1 <= n <= UINT16_MAX all values
+ */
+ if (n < 1 || s < 1 || c < 1 || t < 1 ||
+ n > UINT16_MAX || s > UINT16_MAX || c > UINT16_MAX ||
+ t > UINT16_MAX)
+ return (-1);
+
+ /* If only the cpus was specified, use that as sockets */
+ if (!scts)
+ s = n;
+ /*
+ * Compute sockets * cores * threads avoiding overflow
+ * The range check above insures these are 16 bit values
+ * If n was specified check it against computed ncpus
+ */
+ ncpus = (uint64_t)s * c * t;
+ if (ncpus > UINT16_MAX || (ns && n != ncpus))
+ return (-1);
+
+ guest_ncpus = ncpus;
+ sockets = s;
+ cores = c;
+ threads = t;
+ return(0);
+}
+
#ifndef WITHOUT_CAPSICUM
/*
* 11-stable capsicum helpers
@@ -327,6 +413,8 @@ fbsdrun_start_thread(void *param)
snprintf(tname, sizeof(tname), "vcpu %d", vcpu);
pthread_set_name_np(mtp->mt_thr, tname);
+ gdb_cpu_add(vcpu);
+
vm_loop(mtp->mt_ctx, vcpu, vmexit[vcpu].rip);
/* not reached */
@@ -590,6 +678,8 @@ vmexit_mtrap(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
stats.vmexit_mtrap++;
+ gdb_cpu_mtrap(*pvcpu);
+
return (VMEXIT_CONTINUE);
}
@@ -664,6 +754,14 @@ vmexit_suspend(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
return (0); /* NOTREACHED */
}
+static int
+vmexit_debug(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
+{
+
+ gdb_cpu_suspend(*pvcpu);
+ return (VMEXIT_CONTINUE);
+}
+
static vmexit_handler_t handler[VM_EXITCODE_MAX] = {
[VM_EXITCODE_INOUT] = vmexit_inout,
[VM_EXITCODE_INOUT_STR] = vmexit_inout,
@@ -678,6 +776,7 @@ static vmexit_handler_t handler[VM_EXITCODE_MAX] = {
[VM_EXITCODE_SPINUP_AP] = vmexit_spinup_ap,
[VM_EXITCODE_SUSPENDED] = vmexit_suspend,
[VM_EXITCODE_TASK_SWITCH] = vmexit_task_switch,
+ [VM_EXITCODE_DEBUG] = vmexit_debug,
};
static void
@@ -860,6 +959,9 @@ do_open(const char *vmname)
exit(1);
}
}
+ error = vm_set_topology(ctx, sockets, cores, threads, maxcpus);
+ if (error)
+ errx(EX_OSERR, "vm_set_topology");
return (ctx);
}
@@ -888,9 +990,10 @@ mark_provisioned(void)
int
main(int argc, char *argv[])
{
- int c, error, gdb_port, err, bvmcons;
+ int c, error, dbg_port, gdb_port, err, bvmcons;
int max_vcpus, mptgen, memflags;
int rtc_localtime;
+ bool gdb_stop;
struct vmctx *ctx;
uint64_t rip;
size_t memsize;
@@ -898,17 +1001,21 @@ main(int argc, char *argv[])
bvmcons = 0;
progname = basename(argv[0]);
+ dbg_port = 0;
gdb_port = 0;
+ gdb_stop = false;
guest_ncpus = 1;
+ sockets = cores = threads = 1;
+ maxcpus = 0;
memsize = 256 * MB;
mptgen = 1;
rtc_localtime = 1;
memflags = 0;
#ifdef __FreeBSD__
- optstr = "abehuwxACHIPSWYp:g:c:s:m:l:B:U:";
+ optstr = "abehuwxACHIPSWYp:g:G:c:s:m:l:B:U:";
#else
- optstr = "abehuwxACHIPSWYg:c:s:m:l:B:U:";
+ optstr = "abehuwxACHIPSWY:g:G:c:s:m:l:B:U:";
#endif
while ((c = getopt(argc, argv, optstr)) != -1) {
switch (c) {
@@ -936,12 +1043,22 @@ main(int argc, char *argv[])
break;
#endif
case 'c':
- guest_ncpus = atoi(optarg);
+ if (topology_parse(optarg) != 0) {
+ errx(EX_USAGE, "invalid cpu topology "
+ "'%s'", optarg);
+ }
break;
case 'C':
memflags |= VM_MEM_F_INCORE;
break;
case 'g':
+ dbg_port = atoi(optarg);
+ break;
+ case 'G':
+ if (optarg[0] == 'w') {
+ gdb_stop = true;
+ optarg++;
+ }
gdb_port = atoi(optarg);
break;
case 'l':
@@ -1014,11 +1131,6 @@ main(int argc, char *argv[])
vmname = argv[0];
ctx = do_open(vmname);
- if (guest_ncpus < 1) {
- fprintf(stderr, "Invalid guest vCPUs (%d)\n", guest_ncpus);
- exit(1);
- }
-
max_vcpus = num_vcpus_allowed(ctx);
if (guest_ncpus > max_vcpus) {
fprintf(stderr, "%d vCPUs requested but only %d available\n",
@@ -1069,8 +1181,11 @@ main(int argc, char *argv[])
if (init_pci(ctx) != 0)
exit(1);
+ if (dbg_port != 0)
+ init_dbgport(dbg_port);
+
if (gdb_port != 0)
- init_dbgport(gdb_port);
+ init_gdb(ctx, gdb_port, gdb_stop);
if (bvmcons)
init_bvmcons();
diff --git a/usr/src/cmd/bhyve/gdb.c b/usr/src/cmd/bhyve/gdb.c
new file mode 100644
index 0000000000..4414a05e27
--- /dev/null
+++ b/usr/src/cmd/bhyve/gdb.c
@@ -0,0 +1,1328 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2017-2018 John H. Baldwin <jhb@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#ifndef WITHOUT_CAPSICUM
+#include <sys/capsicum.h>
+#endif
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <sys/socket.h>
+#include <machine/atomic.h>
+#include <machine/specialreg.h>
+#include <machine/vmm.h>
+#include <netinet/in.h>
+#include <assert.h>
+#ifndef WITHOUT_CAPSICUM
+#include <capsicum_helpers.h>
+#endif
+#include <err.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <pthread.h>
+#include <pthread_np.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sysexits.h>
+#include <unistd.h>
+#include <vmmapi.h>
+
+#include "bhyverun.h"
+#include "mem.h"
+#include "mevent.h"
+
+/*
+ * GDB_SIGNAL_* numbers are part of the GDB remote protocol. Most stops
+ * use SIGTRAP.
+ */
+#define GDB_SIGNAL_TRAP 5
+
+static void gdb_resume_vcpus(void);
+static void check_command(int fd);
+
+static struct mevent *read_event, *write_event;
+
+static cpuset_t vcpus_active, vcpus_suspended, vcpus_waiting;
+static pthread_mutex_t gdb_lock;
+static pthread_cond_t idle_vcpus;
+static bool stop_pending, first_stop;
+static int stepping_vcpu, stopped_vcpu;
+
+/*
+ * An I/O buffer contains 'capacity' bytes of room at 'data'. For a
+ * read buffer, 'start' is unused and 'len' contains the number of
+ * valid bytes in the buffer. For a write buffer, 'start' is set to
+ * the index of the next byte in 'data' to send, and 'len' contains
+ * the remaining number of valid bytes to send.
+ */
+struct io_buffer {
+ uint8_t *data;
+ size_t capacity;
+ size_t start;
+ size_t len;
+};
+
+static struct io_buffer cur_comm, cur_resp;
+static uint8_t cur_csum;
+static int cur_vcpu;
+static struct vmctx *ctx;
+static int cur_fd = -1;
+
+const int gdb_regset[] = {
+ VM_REG_GUEST_RAX,
+ VM_REG_GUEST_RBX,
+ VM_REG_GUEST_RCX,
+ VM_REG_GUEST_RDX,
+ VM_REG_GUEST_RSI,
+ VM_REG_GUEST_RDI,
+ VM_REG_GUEST_RBP,
+ VM_REG_GUEST_RSP,
+ VM_REG_GUEST_R8,
+ VM_REG_GUEST_R9,
+ VM_REG_GUEST_R10,
+ VM_REG_GUEST_R11,
+ VM_REG_GUEST_R12,
+ VM_REG_GUEST_R13,
+ VM_REG_GUEST_R14,
+ VM_REG_GUEST_R15,
+ VM_REG_GUEST_RIP,
+ VM_REG_GUEST_RFLAGS,
+ VM_REG_GUEST_CS,
+ VM_REG_GUEST_SS,
+ VM_REG_GUEST_DS,
+ VM_REG_GUEST_ES,
+ VM_REG_GUEST_FS,
+ VM_REG_GUEST_GS
+};
+
+const int gdb_regsize[] = {
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 8,
+ 4,
+ 4,
+ 4,
+ 4,
+ 4,
+ 4,
+ 4
+};
+
+#ifdef GDB_LOG
+#include <stdarg.h>
+#include <stdio.h>
+
+static void __printflike(1, 2)
+debug(const char *fmt, ...)
+{
+ static FILE *logfile;
+ va_list ap;
+
+ if (logfile == NULL) {
+ logfile = fopen("/tmp/bhyve_gdb.log", "w");
+ if (logfile == NULL)
+ return;
+#ifndef WITHOUT_CAPSICUM
+ if (caph_limit_stream(fileno(logfile), CAPH_WRITE) == -1) {
+ fclose(logfile);
+ logfile = NULL;
+ return;
+ }
+#endif
+ setlinebuf(logfile);
+ }
+ va_start(ap, fmt);
+ vfprintf(logfile, fmt, ap);
+ va_end(ap);
+}
+#else
+#define debug(...)
+#endif
+
+static int
+guest_paging_info(int vcpu, struct vm_guest_paging *paging)
+{
+ uint64_t regs[4];
+ const int regset[4] = {
+ VM_REG_GUEST_CR0,
+ VM_REG_GUEST_CR3,
+ VM_REG_GUEST_CR4,
+ VM_REG_GUEST_EFER
+ };
+
+ if (vm_get_register_set(ctx, vcpu, nitems(regset), regset, regs) == -1)
+ return (-1);
+
+ /*
+ * For the debugger, always pretend to be the kernel (CPL 0),
+ * and if long-mode is enabled, always parse addresses as if
+ * in 64-bit mode.
+ */
+ paging->cr3 = regs[1];
+ paging->cpl = 0;
+ if (regs[3] & EFER_LMA)
+ paging->cpu_mode = CPU_MODE_64BIT;
+ else if (regs[0] & CR0_PE)
+ paging->cpu_mode = CPU_MODE_PROTECTED;
+ else
+ paging->cpu_mode = CPU_MODE_REAL;
+ if (!(regs[0] & CR0_PG))
+ paging->paging_mode = PAGING_MODE_FLAT;
+ else if (!(regs[2] & CR4_PAE))
+ paging->paging_mode = PAGING_MODE_32;
+ else if (regs[3] & EFER_LME)
+ paging->paging_mode = PAGING_MODE_64;
+ else
+ paging->paging_mode = PAGING_MODE_PAE;
+ return (0);
+}
+
+/*
+ * Map a guest virtual address to a physical address (for a given vcpu).
+ * If a guest virtual address is valid, return 1. If the address is
+ * not valid, return 0. If an error occurs obtaining the mapping,
+ * return -1.
+ */
+static int
+guest_vaddr2paddr(int vcpu, uint64_t vaddr, uint64_t *paddr)
+{
+ struct vm_guest_paging paging;
+ int fault;
+
+ if (guest_paging_info(vcpu, &paging) == -1)
+ return (-1);
+
+ /*
+ * Always use PROT_READ. We really care if the VA is
+ * accessible, not if the current vCPU can write.
+ */
+ if (vm_gla2gpa_nofault(ctx, vcpu, &paging, vaddr, PROT_READ, paddr,
+ &fault) == -1)
+ return (-1);
+ if (fault)
+ return (0);
+ return (1);
+}
+
+static void
+io_buffer_reset(struct io_buffer *io)
+{
+
+ io->start = 0;
+ io->len = 0;
+}
+
+/* Available room for adding data. */
+static size_t
+io_buffer_avail(struct io_buffer *io)
+{
+
+ return (io->capacity - (io->start + io->len));
+}
+
+static uint8_t *
+io_buffer_head(struct io_buffer *io)
+{
+
+ return (io->data + io->start);
+}
+
+static uint8_t *
+io_buffer_tail(struct io_buffer *io)
+{
+
+ return (io->data + io->start + io->len);
+}
+
+static void
+io_buffer_advance(struct io_buffer *io, size_t amount)
+{
+
+ assert(amount <= io->len);
+ io->start += amount;
+ io->len -= amount;
+}
+
+static void
+io_buffer_consume(struct io_buffer *io, size_t amount)
+{
+
+ io_buffer_advance(io, amount);
+ if (io->len == 0) {
+ io->start = 0;
+ return;
+ }
+
+ /*
+ * XXX: Consider making this move optional and compacting on a
+ * future read() before realloc().
+ */
+ memmove(io->data, io_buffer_head(io), io->len);
+ io->start = 0;
+}
+
+static void
+io_buffer_grow(struct io_buffer *io, size_t newsize)
+{
+ uint8_t *new_data;
+ size_t avail, new_cap;
+
+ avail = io_buffer_avail(io);
+ if (newsize <= avail)
+ return;
+
+ new_cap = io->capacity + (newsize - avail);
+ new_data = realloc(io->data, new_cap);
+ if (new_data == NULL)
+ err(1, "Failed to grow GDB I/O buffer");
+ io->data = new_data;
+ io->capacity = new_cap;
+}
+
+static bool
+response_pending(void)
+{
+
+ if (cur_resp.start == 0 && cur_resp.len == 0)
+ return (false);
+ if (cur_resp.start + cur_resp.len == 1 && cur_resp.data[0] == '+')
+ return (false);
+ return (true);
+}
+
+static void
+close_connection(void)
+{
+
+ /*
+ * XXX: This triggers a warning because mevent does the close
+ * before the EV_DELETE.
+ */
+ pthread_mutex_lock(&gdb_lock);
+ mevent_delete(write_event);
+ mevent_delete_close(read_event);
+ write_event = NULL;
+ read_event = NULL;
+ io_buffer_reset(&cur_comm);
+ io_buffer_reset(&cur_resp);
+ cur_fd = -1;
+
+ /* Resume any stopped vCPUs. */
+ gdb_resume_vcpus();
+ pthread_mutex_unlock(&gdb_lock);
+}
+
+static uint8_t
+hex_digit(uint8_t nibble)
+{
+
+ if (nibble <= 9)
+ return (nibble + '0');
+ else
+ return (nibble + 'a' - 10);
+}
+
+static uint8_t
+parse_digit(uint8_t v)
+{
+
+ if (v >= '0' && v <= '9')
+ return (v - '0');
+ if (v >= 'a' && v <= 'f')
+ return (v - 'a' + 10);
+ if (v >= 'A' && v <= 'F')
+ return (v - 'A' + 10);
+ return (0xF);
+}
+
+/* Parses big-endian hexadecimal. */
+static uintmax_t
+parse_integer(const uint8_t *p, size_t len)
+{
+ uintmax_t v;
+
+ v = 0;
+ while (len > 0) {
+ v <<= 4;
+ v |= parse_digit(*p);
+ p++;
+ len--;
+ }
+ return (v);
+}
+
+static uint8_t
+parse_byte(const uint8_t *p)
+{
+
+ return (parse_digit(p[0]) << 4 | parse_digit(p[1]));
+}
+
+static void
+send_pending_data(int fd)
+{
+ ssize_t nwritten;
+
+ if (cur_resp.len == 0) {
+ mevent_disable(write_event);
+ return;
+ }
+ nwritten = write(fd, io_buffer_head(&cur_resp), cur_resp.len);
+ if (nwritten == -1) {
+ warn("Write to GDB socket failed");
+ close_connection();
+ } else {
+ io_buffer_advance(&cur_resp, nwritten);
+ if (cur_resp.len == 0)
+ mevent_disable(write_event);
+ else
+ mevent_enable(write_event);
+ }
+}
+
+/* Append a single character to the output buffer. */
+static void
+send_char(uint8_t data)
+{
+ io_buffer_grow(&cur_resp, 1);
+ *io_buffer_tail(&cur_resp) = data;
+ cur_resp.len++;
+}
+
+/* Append an array of bytes to the output buffer. */
+static void
+send_data(const uint8_t *data, size_t len)
+{
+
+ io_buffer_grow(&cur_resp, len);
+ memcpy(io_buffer_tail(&cur_resp), data, len);
+ cur_resp.len += len;
+}
+
+static void
+format_byte(uint8_t v, uint8_t *buf)
+{
+
+ buf[0] = hex_digit(v >> 4);
+ buf[1] = hex_digit(v & 0xf);
+}
+
+/*
+ * Append a single byte (formatted as two hex characters) to the
+ * output buffer.
+ */
+static void
+send_byte(uint8_t v)
+{
+ uint8_t buf[2];
+
+ format_byte(v, buf);
+ send_data(buf, sizeof(buf));
+}
+
+static void
+start_packet(void)
+{
+
+ send_char('$');
+ cur_csum = 0;
+}
+
+static void
+finish_packet(void)
+{
+
+ send_char('#');
+ send_byte(cur_csum);
+ debug("-> %.*s\n", (int)cur_resp.len, io_buffer_head(&cur_resp));
+}
+
+/*
+ * Append a single character (for the packet payload) and update the
+ * checksum.
+ */
+static void
+append_char(uint8_t v)
+{
+
+ send_char(v);
+ cur_csum += v;
+}
+
+/*
+ * Append an array of bytes (for the packet payload) and update the
+ * checksum.
+ */
+static void
+append_packet_data(const uint8_t *data, size_t len)
+{
+
+ send_data(data, len);
+ while (len > 0) {
+ cur_csum += *data;
+ data++;
+ len--;
+ }
+}
+
+static void
+append_string(const char *str)
+{
+
+#ifdef __FreeBSD__
+ append_packet_data(str, strlen(str));
+#else
+ append_packet_data((const uint8_t *)str, strlen(str));
+#endif
+}
+
+static void
+append_byte(uint8_t v)
+{
+ uint8_t buf[2];
+
+ format_byte(v, buf);
+ append_packet_data(buf, sizeof(buf));
+}
+
+static void
+append_unsigned_native(uintmax_t value, size_t len)
+{
+ size_t i;
+
+ for (i = 0; i < len; i++) {
+ append_byte(value);
+ value >>= 8;
+ }
+}
+
+static void
+append_unsigned_be(uintmax_t value, size_t len)
+{
+ char buf[len * 2];
+ size_t i;
+
+ for (i = 0; i < len; i++) {
+#ifdef __FreeBSD__
+ format_byte(value, buf + (len - i - 1) * 2);
+#else
+ format_byte(value, (uint8_t *)(buf + (len - i - 1) * 2));
+#endif
+ value >>= 8;
+ }
+#ifdef __FreeBSD__
+ append_packet_data(buf, sizeof(buf));
+#else
+ append_packet_data((const uint8_t *)buf, sizeof(buf));
+#endif
+}
+
+static void
+append_integer(unsigned int value)
+{
+
+ if (value == 0)
+ append_char('0');
+ else
+ append_unsigned_be(value, fls(value) + 7 / 8);
+}
+
+static void
+append_asciihex(const char *str)
+{
+
+ while (*str != '\0') {
+ append_byte(*str);
+ str++;
+ }
+}
+
+static void
+send_empty_response(void)
+{
+
+ start_packet();
+ finish_packet();
+}
+
+static void
+send_error(int error)
+{
+
+ start_packet();
+ append_char('E');
+ append_byte(error);
+ finish_packet();
+}
+
+static void
+send_ok(void)
+{
+
+ start_packet();
+ append_string("OK");
+ finish_packet();
+}
+
+static int
+parse_threadid(const uint8_t *data, size_t len)
+{
+
+ if (len == 1 && *data == '0')
+ return (0);
+ if (len == 2 && memcmp(data, "-1", 2) == 0)
+ return (-1);
+ if (len == 0)
+ return (-2);
+ return (parse_integer(data, len));
+}
+
+static void
+report_stop(void)
+{
+
+ start_packet();
+ if (stopped_vcpu == -1)
+ append_char('S');
+ else
+ append_char('T');
+ append_byte(GDB_SIGNAL_TRAP);
+ if (stopped_vcpu != -1) {
+ append_string("thread:");
+ append_integer(stopped_vcpu + 1);
+ append_char(';');
+ }
+ stopped_vcpu = -1;
+ finish_packet();
+}
+
+static void
+gdb_finish_suspend_vcpus(void)
+{
+
+ if (first_stop) {
+ first_stop = false;
+ stopped_vcpu = -1;
+ } else if (response_pending())
+ stop_pending = true;
+ else {
+ report_stop();
+ send_pending_data(cur_fd);
+ }
+}
+
+static void
+_gdb_cpu_suspend(int vcpu, bool report_stop)
+{
+
+ debug("$vCPU %d suspending\n", vcpu);
+ CPU_SET(vcpu, &vcpus_waiting);
+ if (report_stop && CPU_CMP(&vcpus_waiting, &vcpus_suspended) == 0)
+ gdb_finish_suspend_vcpus();
+ while (CPU_ISSET(vcpu, &vcpus_suspended) && vcpu != stepping_vcpu)
+ pthread_cond_wait(&idle_vcpus, &gdb_lock);
+ CPU_CLR(vcpu, &vcpus_waiting);
+ debug("$vCPU %d resuming\n", vcpu);
+}
+
+void
+gdb_cpu_add(int vcpu)
+{
+
+ debug("$vCPU %d starting\n", vcpu);
+ pthread_mutex_lock(&gdb_lock);
+ CPU_SET(vcpu, &vcpus_active);
+
+ /*
+ * If a vcpu is added while vcpus are stopped, suspend the new
+ * vcpu so that it will pop back out with a debug exit before
+ * executing the first instruction.
+ */
+ if (!CPU_EMPTY(&vcpus_suspended)) {
+ CPU_SET(vcpu, &vcpus_suspended);
+ _gdb_cpu_suspend(vcpu, false);
+ }
+ pthread_mutex_unlock(&gdb_lock);
+}
+
+void
+gdb_cpu_suspend(int vcpu)
+{
+
+ pthread_mutex_lock(&gdb_lock);
+ _gdb_cpu_suspend(vcpu, true);
+ pthread_mutex_unlock(&gdb_lock);
+}
+
+void
+gdb_cpu_mtrap(int vcpu)
+{
+
+ debug("$vCPU %d MTRAP\n", vcpu);
+ pthread_mutex_lock(&gdb_lock);
+ if (vcpu == stepping_vcpu) {
+ stepping_vcpu = -1;
+ vm_set_capability(ctx, vcpu, VM_CAP_MTRAP_EXIT, 0);
+ vm_suspend_cpu(ctx, vcpu);
+ assert(stopped_vcpu == -1);
+ stopped_vcpu = vcpu;
+ _gdb_cpu_suspend(vcpu, true);
+ }
+ pthread_mutex_unlock(&gdb_lock);
+}
+
+static void
+gdb_suspend_vcpus(void)
+{
+
+ assert(pthread_mutex_isowned_np(&gdb_lock));
+ debug("suspending all CPUs\n");
+ vcpus_suspended = vcpus_active;
+ vm_suspend_cpu(ctx, -1);
+ if (CPU_CMP(&vcpus_waiting, &vcpus_suspended) == 0)
+ gdb_finish_suspend_vcpus();
+}
+
+static bool
+gdb_step_vcpu(int vcpu)
+{
+ int error, val;
+
+ debug("$vCPU %d step\n", vcpu);
+ error = vm_get_capability(ctx, vcpu, VM_CAP_MTRAP_EXIT, &val);
+ if (error < 0)
+ return (false);
+ error = vm_set_capability(ctx, vcpu, VM_CAP_MTRAP_EXIT, 1);
+ vm_resume_cpu(ctx, vcpu);
+ stepping_vcpu = vcpu;
+ pthread_cond_broadcast(&idle_vcpus);
+ return (true);
+}
+
+static void
+gdb_resume_vcpus(void)
+{
+
+ assert(pthread_mutex_isowned_np(&gdb_lock));
+ vm_resume_cpu(ctx, -1);
+ debug("resuming all CPUs\n");
+ CPU_ZERO(&vcpus_suspended);
+ pthread_cond_broadcast(&idle_vcpus);
+}
+
+static void
+gdb_read_regs(void)
+{
+ uint64_t regvals[nitems(gdb_regset)];
+ int i;
+
+ if (vm_get_register_set(ctx, cur_vcpu, nitems(gdb_regset),
+ gdb_regset, regvals) == -1) {
+ send_error(errno);
+ return;
+ }
+ start_packet();
+ for (i = 0; i < nitems(regvals); i++)
+ append_unsigned_native(regvals[i], gdb_regsize[i]);
+ finish_packet();
+}
+
+static void
+gdb_read_mem(const uint8_t *data, size_t len)
+{
+ uint64_t gpa, gva, val;
+ uint8_t *cp;
+ size_t resid, todo, bytes;
+ bool started;
+ int error;
+
+ cp = memchr(data, ',', len);
+ if (cp == NULL) {
+ send_error(EINVAL);
+ return;
+ }
+ gva = parse_integer(data + 1, cp - (data + 1));
+ resid = parse_integer(cp + 1, len - (cp + 1 - data));
+ started = false;
+
+ while (resid > 0) {
+ error = guest_vaddr2paddr(cur_vcpu, gva, &gpa);
+ if (error == -1) {
+ if (started)
+ finish_packet();
+ else
+ send_error(errno);
+ return;
+ }
+ if (error == 0) {
+ if (started)
+ finish_packet();
+ else
+ send_error(EFAULT);
+ return;
+ }
+
+ /* Read bytes from current page. */
+ todo = getpagesize() - gpa % getpagesize();
+ if (todo > resid)
+ todo = resid;
+
+ cp = paddr_guest2host(ctx, gpa, todo);
+ if (cp != NULL) {
+ /*
+ * If this page is guest RAM, read it a byte
+ * at a time.
+ */
+ if (!started) {
+ start_packet();
+ started = true;
+ }
+ while (todo > 0) {
+ append_byte(*cp);
+ cp++;
+ gpa++;
+ gva++;
+ resid--;
+ todo--;
+ }
+ } else {
+ /*
+ * If this page isn't guest RAM, try to handle
+ * it via MMIO. For MMIO requests, use
+ * aligned reads of words when possible.
+ */
+ while (todo > 0) {
+ if (gpa & 1 || todo == 1)
+ bytes = 1;
+ else if (gpa & 2 || todo == 2)
+ bytes = 2;
+ else
+ bytes = 4;
+ error = read_mem(ctx, cur_vcpu, gpa, &val,
+ bytes);
+ if (error == 0) {
+ if (!started) {
+ start_packet();
+ started = true;
+ }
+ gpa += bytes;
+ gva += bytes;
+ resid -= bytes;
+ todo -= bytes;
+ while (bytes > 0) {
+ append_byte(val);
+ val >>= 8;
+ bytes--;
+ }
+ } else {
+ if (started)
+ finish_packet();
+ else
+ send_error(EFAULT);
+ return;
+ }
+ }
+ }
+ assert(resid == 0 || gpa % getpagesize() == 0);
+ }
+ if (!started)
+ start_packet();
+ finish_packet();
+}
+
+static bool
+command_equals(const uint8_t *data, size_t len, const char *cmd)
+{
+
+ if (strlen(cmd) > len)
+ return (false);
+ return (memcmp(data, cmd, strlen(cmd)) == 0);
+}
+
+static void
+gdb_query(const uint8_t *data, size_t len)
+{
+
+ /*
+ * TODO:
+ * - qSearch
+ * - qSupported
+ */
+ if (command_equals(data, len, "qAttached")) {
+ start_packet();
+ append_char('1');
+ finish_packet();
+ } else if (command_equals(data, len, "qC")) {
+ start_packet();
+ append_string("QC");
+ append_integer(cur_vcpu + 1);
+ finish_packet();
+ } else if (command_equals(data, len, "qfThreadInfo")) {
+ cpuset_t mask;
+ bool first;
+ int vcpu;
+
+ if (CPU_EMPTY(&vcpus_active)) {
+ send_error(EINVAL);
+ return;
+ }
+ mask = vcpus_active;
+ start_packet();
+ append_char('m');
+ first = true;
+ while (!CPU_EMPTY(&mask)) {
+ vcpu = CPU_FFS(&mask) - 1;
+ CPU_CLR(vcpu, &mask);
+ if (first)
+ first = false;
+ else
+ append_char(',');
+ append_integer(vcpu + 1);
+ }
+ finish_packet();
+ } else if (command_equals(data, len, "qsThreadInfo")) {
+ start_packet();
+ append_char('l');
+ finish_packet();
+ } else if (command_equals(data, len, "qThreadExtraInfo")) {
+ char buf[16];
+ int tid;
+
+ data += strlen("qThreadExtraInfo");
+ len -= strlen("qThreadExtraInfo");
+ if (*data != ',') {
+ send_error(EINVAL);
+ return;
+ }
+ tid = parse_threadid(data + 1, len - 1);
+ if (tid <= 0 || !CPU_ISSET(tid - 1, &vcpus_active)) {
+ send_error(EINVAL);
+ return;
+ }
+
+ snprintf(buf, sizeof(buf), "vCPU %d", tid - 1);
+ start_packet();
+ append_asciihex(buf);
+ finish_packet();
+ } else
+ send_empty_response();
+}
+
+static void
+handle_command(const uint8_t *data, size_t len)
+{
+
+ /* Reject packets with a sequence-id. */
+ if (len >= 3 && data[0] >= '0' && data[0] <= '9' &&
+ data[0] >= '0' && data[0] <= '9' && data[2] == ':') {
+ send_empty_response();
+ return;
+ }
+
+ switch (*data) {
+ case 'c':
+ if (len != 1) {
+ send_error(EINVAL);
+ break;
+ }
+
+ /* Don't send a reply until a stop occurs. */
+ gdb_resume_vcpus();
+ break;
+ case 'D':
+ send_ok();
+
+ /* TODO: Resume any stopped CPUs. */
+ break;
+ case 'g': {
+ gdb_read_regs();
+ break;
+ }
+ case 'H': {
+ int tid;
+
+ if (data[1] != 'g' && data[1] != 'c') {
+ send_error(EINVAL);
+ break;
+ }
+ tid = parse_threadid(data + 2, len - 2);
+ if (tid == -2) {
+ send_error(EINVAL);
+ break;
+ }
+
+ if (CPU_EMPTY(&vcpus_active)) {
+ send_error(EINVAL);
+ break;
+ }
+ if (tid == -1 || tid == 0)
+ cur_vcpu = CPU_FFS(&vcpus_active) - 1;
+ else if (CPU_ISSET(tid - 1, &vcpus_active))
+ cur_vcpu = tid - 1;
+ else {
+ send_error(EINVAL);
+ break;
+ }
+ send_ok();
+ break;
+ }
+ case 'm':
+ gdb_read_mem(data, len);
+ break;
+ case 'T': {
+ int tid;
+
+ tid = parse_threadid(data + 1, len - 1);
+ if (tid <= 0 || !CPU_ISSET(tid - 1, &vcpus_active)) {
+ send_error(EINVAL);
+ return;
+ }
+ send_ok();
+ break;
+ }
+ case 'q':
+ gdb_query(data, len);
+ break;
+ case 's':
+ if (len != 1) {
+ send_error(EINVAL);
+ break;
+ }
+
+ /* Don't send a reply until a stop occurs. */
+ if (!gdb_step_vcpu(cur_vcpu)) {
+ send_error(EOPNOTSUPP);
+ break;
+ }
+ break;
+ case '?':
+ /* XXX: Only if stopped? */
+ /* For now, just report that we are always stopped. */
+ start_packet();
+ append_char('S');
+ append_byte(GDB_SIGNAL_TRAP);
+ finish_packet();
+ break;
+ case 'G': /* TODO */
+ case 'M': /* TODO */
+ case 'v':
+ /* Handle 'vCont' */
+ /* 'vCtrlC' */
+ case 'p': /* TODO */
+ case 'P': /* TODO */
+ case 'Q': /* TODO */
+ case 't': /* TODO */
+ case 'X': /* TODO */
+ case 'z': /* TODO */
+ case 'Z': /* TODO */
+ default:
+ send_empty_response();
+ }
+}
+
+/* Check for a valid packet in the command buffer. */
+static void
+check_command(int fd)
+{
+ uint8_t *head, *hash, *p, sum;
+ size_t avail, plen;
+
+ for (;;) {
+ avail = cur_comm.len;
+ if (avail == 0)
+ return;
+ head = io_buffer_head(&cur_comm);
+ switch (*head) {
+ case 0x03:
+ debug("<- Ctrl-C\n");
+ io_buffer_consume(&cur_comm, 1);
+
+ gdb_suspend_vcpus();
+ break;
+ case '+':
+ /* ACK of previous response. */
+ debug("<- +\n");
+ if (response_pending())
+ io_buffer_reset(&cur_resp);
+ io_buffer_consume(&cur_comm, 1);
+ if (stop_pending) {
+ stop_pending = false;
+ report_stop();
+ send_pending_data(fd);
+ }
+ break;
+ case '-':
+ /* NACK of previous response. */
+ debug("<- -\n");
+ if (response_pending()) {
+ cur_resp.len += cur_resp.start;
+ cur_resp.start = 0;
+ if (cur_resp.data[0] == '+')
+ io_buffer_advance(&cur_resp, 1);
+ debug("-> %.*s\n", (int)cur_resp.len,
+ io_buffer_head(&cur_resp));
+ }
+ io_buffer_consume(&cur_comm, 1);
+ send_pending_data(fd);
+ break;
+ case '$':
+ /* Packet. */
+
+ if (response_pending()) {
+ warnx("New GDB command while response in "
+ "progress");
+ io_buffer_reset(&cur_resp);
+ }
+
+ /* Is packet complete? */
+ hash = memchr(head, '#', avail);
+ if (hash == NULL)
+ return;
+ plen = (hash - head + 1) + 2;
+ if (avail < plen)
+ return;
+ debug("<- %.*s\n", (int)plen, head);
+
+ /* Verify checksum. */
+ for (sum = 0, p = head + 1; p < hash; p++)
+ sum += *p;
+ if (sum != parse_byte(hash + 1)) {
+ io_buffer_consume(&cur_comm, plen);
+ debug("-> -\n");
+ send_char('-');
+ send_pending_data(fd);
+ break;
+ }
+ send_char('+');
+
+ handle_command(head + 1, hash - (head + 1));
+ io_buffer_consume(&cur_comm, plen);
+ if (!response_pending()) {
+ debug("-> +\n");
+ }
+ send_pending_data(fd);
+ break;
+ default:
+ /* XXX: Possibly drop connection instead. */
+ debug("-> %02x\n", *head);
+ io_buffer_consume(&cur_comm, 1);
+ break;
+ }
+ }
+}
+
+static void
+gdb_readable(int fd, enum ev_type event, void *arg)
+{
+ ssize_t nread;
+ int pending;
+
+ if (ioctl(fd, FIONREAD, &pending) == -1) {
+ warn("FIONREAD on GDB socket");
+ return;
+ }
+
+ /*
+ * 'pending' might be zero due to EOF. We need to call read
+ * with a non-zero length to detect EOF.
+ */
+ if (pending == 0)
+ pending = 1;
+
+ /* Ensure there is room in the command buffer. */
+ io_buffer_grow(&cur_comm, pending);
+ assert(io_buffer_avail(&cur_comm) >= pending);
+
+ nread = read(fd, io_buffer_tail(&cur_comm), io_buffer_avail(&cur_comm));
+ if (nread == 0) {
+ close_connection();
+ } else if (nread == -1) {
+ if (errno == EAGAIN)
+ return;
+
+ warn("Read from GDB socket");
+ close_connection();
+ } else {
+ cur_comm.len += nread;
+ pthread_mutex_lock(&gdb_lock);
+ check_command(fd);
+ pthread_mutex_unlock(&gdb_lock);
+ }
+}
+
+static void
+gdb_writable(int fd, enum ev_type event, void *arg)
+{
+
+ send_pending_data(fd);
+}
+
+static void
+new_connection(int fd, enum ev_type event, void *arg)
+{
+ int optval, s;
+
+ s = accept4(fd, NULL, NULL, SOCK_NONBLOCK);
+ if (s == -1) {
+ if (arg != NULL)
+ err(1, "Failed accepting initial GDB connection");
+
+ /* Silently ignore errors post-startup. */
+ return;
+ }
+
+ optval = 1;
+ if (setsockopt(s, SOL_SOCKET, SO_NOSIGPIPE, &optval, sizeof(optval)) ==
+ -1) {
+ warn("Failed to disable SIGPIPE for GDB connection");
+ close(s);
+ return;
+ }
+
+ pthread_mutex_lock(&gdb_lock);
+ if (cur_fd != -1) {
+ close(s);
+ warnx("Ignoring additional GDB connection.");
+ }
+
+ read_event = mevent_add(s, EVF_READ, gdb_readable, NULL);
+ if (read_event == NULL) {
+ if (arg != NULL)
+ err(1, "Failed to setup initial GDB connection");
+ pthread_mutex_unlock(&gdb_lock);
+ return;
+ }
+ write_event = mevent_add(s, EVF_WRITE, gdb_writable, NULL);
+ if (write_event == NULL) {
+ if (arg != NULL)
+ err(1, "Failed to setup initial GDB connection");
+ mevent_delete_close(read_event);
+ read_event = NULL;
+ }
+
+ cur_fd = s;
+ cur_vcpu = 0;
+ stepping_vcpu = -1;
+ stopped_vcpu = -1;
+ stop_pending = false;
+
+ /* Break on attach. */
+ first_stop = true;
+ gdb_suspend_vcpus();
+ pthread_mutex_unlock(&gdb_lock);
+}
+
+#ifndef WITHOUT_CAPSICUM
+void
+limit_gdb_socket(int s)
+{
+ cap_rights_t rights;
+ unsigned long ioctls[] = { FIONREAD };
+
+ cap_rights_init(&rights, CAP_ACCEPT, CAP_EVENT, CAP_READ, CAP_WRITE,
+ CAP_SETSOCKOPT, CAP_IOCTL);
+ if (cap_rights_limit(s, &rights) == -1 && errno != ENOSYS)
+ errx(EX_OSERR, "Unable to apply rights for sandbox");
+ if (cap_ioctls_limit(s, ioctls, nitems(ioctls)) == -1 && errno != ENOSYS)
+ errx(EX_OSERR, "Unable to apply rights for sandbox");
+}
+#endif
+
+void
+init_gdb(struct vmctx *_ctx, int sport, bool wait)
+{
+ struct sockaddr_in sin;
+ int error, flags, s;
+
+ debug("==> starting on %d, %swaiting\n", sport, wait ? "" : "not ");
+
+ error = pthread_mutex_init(&gdb_lock, NULL);
+ if (error != 0)
+ errc(1, error, "gdb mutex init");
+ error = pthread_cond_init(&idle_vcpus, NULL);
+ if (error != 0)
+ errc(1, error, "gdb cv init");
+
+ ctx = _ctx;
+ s = socket(PF_INET, SOCK_STREAM, 0);
+ if (s < 0)
+ err(1, "gdb socket create");
+
+#ifdef __FreeBSD__
+ sin.sin_len = sizeof(sin);
+#endif
+ sin.sin_family = AF_INET;
+ sin.sin_addr.s_addr = htonl(INADDR_ANY);
+ sin.sin_port = htons(sport);
+
+ if (bind(s, (struct sockaddr *)&sin, sizeof(sin)) < 0)
+ err(1, "gdb socket bind");
+
+ if (listen(s, 1) < 0)
+ err(1, "gdb socket listen");
+
+ if (wait) {
+ /*
+ * Set vcpu 0 in vcpus_suspended. This will trigger the
+ * logic in gdb_cpu_add() to suspend the first vcpu before
+ * it starts execution. The vcpu will remain suspended
+ * until a debugger connects.
+ */
+ stepping_vcpu = -1;
+ stopped_vcpu = -1;
+ CPU_SET(0, &vcpus_suspended);
+ }
+
+ flags = fcntl(s, F_GETFL);
+ if (fcntl(s, F_SETFL, flags | O_NONBLOCK) == -1)
+ err(1, "Failed to mark gdb socket non-blocking");
+
+#ifndef WITHOUT_CAPSICUM
+ limit_gdb_socket(s);
+#endif
+ mevent_add(s, EVF_READ, new_connection, NULL);
+}
diff --git a/usr/src/cmd/bhyve/gdb.h b/usr/src/cmd/bhyve/gdb.h
new file mode 100644
index 0000000000..fa2184df16
--- /dev/null
+++ b/usr/src/cmd/bhyve/gdb.h
@@ -0,0 +1,39 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2017 John H. Baldwin <jhb@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef __GDB_H__
+#define __GDB_H__
+
+void gdb_cpu_add(int vcpu);
+void gdb_cpu_mtrap(int vcpu);
+void gdb_cpu_suspend(int vcpu);
+void init_gdb(struct vmctx *ctx, int sport, bool wait);
+
+#endif /* !__GDB_H__ */
diff --git a/usr/src/cmd/bhyve/mem.c b/usr/src/cmd/bhyve/mem.c
index e01d617a89..105d37cf56 100644
--- a/usr/src/cmd/bhyve/mem.c
+++ b/usr/src/cmd/bhyve/mem.c
@@ -136,6 +136,9 @@ mmio_rb_dump(struct mmio_rb_tree *rbt)
RB_GENERATE(mmio_rb_tree, mmio_rb_range, mr_link, mmio_rb_range_compare);
+typedef int (mem_cb_t)(struct vmctx *ctx, int vcpu, uint64_t gpa,
+ struct mem_range *mr, void *arg);
+
static int
mem_read(void *ctx, int vcpu, uint64_t gpa, uint64_t *rval, int size, void *arg)
{
@@ -158,10 +161,9 @@ mem_write(void *ctx, int vcpu, uint64_t gpa, uint64_t wval, int size, void *arg)
return (error);
}
-int
-emulate_mem(struct vmctx *ctx, int vcpu, uint64_t paddr, struct vie *vie,
- struct vm_guest_paging *paging)
-
+static int
+access_memory(struct vmctx *ctx, int vcpu, uint64_t paddr, mem_cb_t *cb,
+ void *arg)
{
struct mmio_rb_range *entry;
int err, immutable;
@@ -204,8 +206,7 @@ emulate_mem(struct vmctx *ctx, int vcpu, uint64_t paddr, struct vie *vie,
if (immutable)
pthread_rwlock_unlock(&mmio_rwlock);
- err = vmm_emulate_instruction(ctx, vcpu, paddr, vie, paging,
- mem_read, mem_write, &entry->mr_param);
+ err = cb(ctx, vcpu, paddr, &entry->mr_param, arg);
if (!immutable)
pthread_rwlock_unlock(&mmio_rwlock);
@@ -213,6 +214,60 @@ emulate_mem(struct vmctx *ctx, int vcpu, uint64_t paddr, struct vie *vie,
return (err);
}
+struct emulate_mem_args {
+ struct vie *vie;
+ struct vm_guest_paging *paging;
+};
+
+static int
+emulate_mem_cb(struct vmctx *ctx, int vcpu, uint64_t paddr, struct mem_range *mr,
+ void *arg)
+{
+ struct emulate_mem_args *ema;
+
+ ema = arg;
+ return (vmm_emulate_instruction(ctx, vcpu, paddr, ema->vie, ema->paging,
+ mem_read, mem_write, mr));
+}
+
+int
+emulate_mem(struct vmctx *ctx, int vcpu, uint64_t paddr, struct vie *vie,
+ struct vm_guest_paging *paging)
+
+{
+ struct emulate_mem_args ema;
+
+ ema.vie = vie;
+ ema.paging = paging;
+ return (access_memory(ctx, vcpu, paddr, emulate_mem_cb, &ema));
+}
+
+struct read_mem_args {
+ uint64_t *rval;
+ int size;
+};
+
+static int
+read_mem_cb(struct vmctx *ctx, int vcpu, uint64_t paddr, struct mem_range *mr,
+ void *arg)
+{
+ struct read_mem_args *rma;
+
+ rma = arg;
+ return (mr->handler(ctx, vcpu, MEM_F_READ, paddr, rma->size,
+ rma->rval, mr->arg1, mr->arg2));
+}
+
+int
+read_mem(struct vmctx *ctx, int vcpu, uint64_t gpa, uint64_t *rval, int size)
+{
+ struct read_mem_args rma;
+
+ rma.rval = rval;
+ rma.size = size;
+ return (access_memory(ctx, vcpu, gpa, read_mem_cb, &rma));
+}
+
static int
register_mem_int(struct mmio_rb_tree *rbt, struct mem_range *memp)
{
diff --git a/usr/src/cmd/bhyve/mem.h b/usr/src/cmd/bhyve/mem.h
index f9f86fa4a0..f386d67749 100644
--- a/usr/src/cmd/bhyve/mem.h
+++ b/usr/src/cmd/bhyve/mem.h
@@ -56,6 +56,8 @@ void init_mem(void);
int emulate_mem(struct vmctx *, int vcpu, uint64_t paddr, struct vie *vie,
struct vm_guest_paging *paging);
+int read_mem(struct vmctx *ctx, int vcpu, uint64_t gpa, uint64_t *rval,
+ int size);
int register_mem(struct mem_range *memp);
int register_mem_fallback(struct mem_range *memp);
int unregister_mem(struct mem_range *memp);
diff --git a/usr/src/cmd/bhyve/rfb.c b/usr/src/cmd/bhyve/rfb.c
index 96712a6acc..d96b45c5da 100644
--- a/usr/src/cmd/bhyve/rfb.c
+++ b/usr/src/cmd/bhyve/rfb.c
@@ -546,16 +546,21 @@ rfb_send_screen(struct rfb_softc *rc, int cfd, int all)
}
for (x = 0; x < xcells; x++) {
+ if (x == (xcells - 1) && rem_x > 0)
+ cellwidth = rem_x;
+ else
+ cellwidth = PIX_PER_CELL;
+
if (rc->hw_crc)
crc_p[x] = fast_crc32(p,
- PIX_PER_CELL * sizeof(uint32_t),
+ cellwidth * sizeof(uint32_t),
crc_p[x]);
else
crc_p[x] = (uint32_t)crc32(crc_p[x],
(Bytef *)p,
- PIX_PER_CELL * sizeof(uint32_t));
+ cellwidth * sizeof(uint32_t));
- p += PIX_PER_CELL;
+ p += cellwidth;
/* check for crc delta if last row in cell */
if ((y & PIXCELL_MASK) == PIXCELL_MASK || y == (h-1)) {
@@ -568,28 +573,6 @@ rfb_send_screen(struct rfb_softc *rc, int cfd, int all)
}
}
}
-
- if (rem_x) {
- if (rc->hw_crc)
- crc_p[x] = fast_crc32(p,
- rem_x * sizeof(uint32_t),
- crc_p[x]);
- else
- crc_p[x] = (uint32_t)crc32(crc_p[x],
- (Bytef *)p,
- rem_x * sizeof(uint32_t));
- p += rem_x;
-
- if ((y & PIXCELL_MASK) == PIXCELL_MASK || y == (h-1)) {
- if (orig_crc[x] != crc_p[x]) {
- orig_crc[x] = crc_p[x];
- crc_p[x] = 1;
- changes++;
- } else {
- crc_p[x] = 0;
- }
- }
- }
}
/* If number of changes is > THRESH percent, send the whole screen */
diff --git a/usr/src/cmd/bhyvectl/bhyvectl.c b/usr/src/cmd/bhyvectl/bhyvectl.c
index 0cebc77b05..5f8932efa8 100644
--- a/usr/src/cmd/bhyvectl/bhyvectl.c
+++ b/usr/src/cmd/bhyvectl/bhyvectl.c
@@ -208,7 +208,8 @@ usage(bool cpu_intel)
" [--get-msr-bitmap]\n"
" [--get-msr-bitmap-address]\n"
" [--get-guest-sysenter]\n"
- " [--get-exit-reason]\n",
+ " [--get-exit-reason]\n"
+ " [--get-cpu-topology]\n",
progname);
if (cpu_intel) {
@@ -304,6 +305,7 @@ enum x2apic_state x2apic_state;
static int unassign_pptdev, bus, slot, func;
#endif
static int run;
+static int get_cpu_topology;
/*
* VMCB specific.
@@ -1476,6 +1478,7 @@ setup_options(bool cpu_intel)
{ "get-active-cpus", NO_ARG, &get_active_cpus, 1 },
{ "get-suspended-cpus", NO_ARG, &get_suspended_cpus, 1 },
{ "get-intinfo", NO_ARG, &get_intinfo, 1 },
+ { "get-cpu-topology", NO_ARG, &get_cpu_topology, 1 },
};
const struct option intel_opts[] = {
@@ -2336,6 +2339,14 @@ main(int argc, char *argv[])
}
}
+ if (!error && (get_cpu_topology || get_all)) {
+ uint16_t sockets, cores, threads, maxcpus;
+
+ vm_get_topology(ctx, &sockets, &cores, &threads, &maxcpus);
+ printf("cpu_topology:\tsockets=%hu, cores=%hu, threads=%hu, "
+ "maxcpus=%hu\n", sockets, cores, threads, maxcpus);
+ }
+
if (!error && run) {
error = vm_run(ctx, vcpu, &vmexit);
if (error == 0)
diff --git a/usr/src/compat/freebsd/err.h b/usr/src/compat/freebsd/err.h
new file mode 100644
index 0000000000..40d144e025
--- /dev/null
+++ b/usr/src/compat/freebsd/err.h
@@ -0,0 +1,23 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2018 Joyent, Inc.
+ */
+
+#ifndef _COMPAT_FREEBSD_ERR_H_
+#define _COMPAT_FREEBSD_ERR_H_
+
+#define errc(code, num, ...) err(code, __VA_ARGS__)
+
+#include_next <err.h>
+
+#endif /* _COMPAT_FREEBSD_ERR_H_ */
diff --git a/usr/src/compat/freebsd/sys/cpuset.h b/usr/src/compat/freebsd/sys/cpuset.h
index 4328ebcc46..dadadf15b2 100644
--- a/usr/src/compat/freebsd/sys/cpuset.h
+++ b/usr/src/compat/freebsd/sys/cpuset.h
@@ -52,6 +52,7 @@ int cpusetobj_ffs(const cpuset_t *set);
#include <sys/bitmap.h>
#include <machine/atomic.h>
+#include <machine/cpufunc.h>
/* For now, assume NCPU of 256 */
#define CPU_SETSIZE (256)
@@ -60,7 +61,8 @@ typedef struct {
ulong_t _bits[BT_BITOUL(CPU_SETSIZE)];
} cpuset_t;
-static __inline int cpuset_empty(const cpuset_t *set)
+static __inline int
+cpuset_isempty(const cpuset_t *set)
{
uint_t i;
@@ -71,9 +73,54 @@ static __inline int cpuset_empty(const cpuset_t *set)
return (1);
}
+static __inline void
+cpuset_zero(cpuset_t *dst)
+{
+ uint_t i;
+
+ for (i = 0; i < BT_BITOUL(CPU_SETSIZE); i++) {
+ dst->_bits[i] = 0;
+ }
+}
+
+static __inline int
+cpuset_isequal(cpuset_t *s1, cpuset_t *s2)
+{
+ uint_t i;
+
+ for (i = 0; i < BT_BITOUL(CPU_SETSIZE); i++) {
+ if (s1->_bits[i] != s2->_bits[i])
+ return (0);
+ }
+ return (1);
+}
+
+static __inline uint_t
+cpusetobj_ffs(const cpuset_t *set)
+{
+ uint_t i, cbit;
+ cbit = 0;
+ for (i = 0; i < BT_BITOUL(CPU_SETSIZE); i++) {
+ if (set->_bits[i] != 0) {
+ cbit = ffsl(set->_bits[i]);
+ cbit += i * sizeof (set->_bits[0]);
+ break;
+ }
+ }
+ return (cbit);
+}
+
+
+#define CPU_SET(cpu, setp) BT_SET((setp)->_bits, cpu)
+#define CPU_CLR(cpu, setp) BT_CLEAR((setp)->_bits, cpu)
+#define CPU_ZERO(setp) cpuset_zero((setp))
+#define CPU_CMP(set1, set2) (cpuset_isequal( \
+ (cpuset_t *)(set1), \
+ (cpuset_t *)(set2)) == 0)
+#define CPU_FFS(set) cpusetobj_ffs(set)
#define CPU_ISSET(cpu, setp) BT_TEST((setp)->_bits, cpu)
-#define CPU_EMPTY(setp) cpuset_empty((setp))
+#define CPU_EMPTY(setp) cpuset_isempty((setp))
#define CPU_SET_ATOMIC(cpu, setp) \
atomic_set_long(&(BT_WIM((setp)->_bits, cpu)), BT_BIW(cpu))
#define CPU_CLR_ATOMIC(cpu, setp) \
diff --git a/usr/src/compat/freebsd/sys/ioctl.h b/usr/src/compat/freebsd/sys/ioctl.h
index e223e1e4c7..72a46b8085 100644
--- a/usr/src/compat/freebsd/sys/ioctl.h
+++ b/usr/src/compat/freebsd/sys/ioctl.h
@@ -17,6 +17,8 @@
#define _COMPAT_FREEBSD_SYS_IOCTL_H_
#include <sys/ioccom.h>
+/* Get BSD compatibility from the ioctl header */
+#define BSD_COMP
#include_next <sys/ioctl.h>
#endif /* _COMPAT_FREEBSD_SYS_IOCTL_H_ */
diff --git a/usr/src/compat/freebsd/sys/sdt.h b/usr/src/compat/freebsd/sys/sdt.h
new file mode 100644
index 0000000000..32d887c0d8
--- /dev/null
+++ b/usr/src/compat/freebsd/sys/sdt.h
@@ -0,0 +1,37 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2018 Joyent, Inc.
+ */
+
+#ifndef _COMPAT_FREEBSD_SYS_SDT_H_
+#define _COMPAT_FREEBSD_SYS_SDT_H_
+
+/* Empty macros to cover FreeBSD's SDT linker tricks */
+
+#define SDT_PROVIDER_DECLARE(mod)
+#define SDT_PROVIDER_DEFINE(mod)
+
+#define SDT_PROBE_DEFINE1(...)
+#define SDT_PROBE_DEFINE2(...)
+#define SDT_PROBE_DEFINE3(...)
+#define SDT_PROBE_DEFINE4(...)
+#define SDT_PROBE_DEFINE5(...)
+#define SDT_PROBE1(...)
+#define SDT_PROBE2(...)
+#define SDT_PROBE3(...)
+#define SDT_PROBE4(...)
+#define SDT_PROBE5(...)
+
+#include_next <sys/sdt.h>
+
+#endif /* _COMPAT_FREEBSD_SYS_SDT_H_ */
diff --git a/usr/src/lib/libvmmapi/common/mapfile-vers b/usr/src/lib/libvmmapi/common/mapfile-vers
index 8979fac4cb..ad47407281 100644
--- a/usr/src/lib/libvmmapi/common/mapfile-vers
+++ b/usr/src/lib/libvmmapi/common/mapfile-vers
@@ -47,6 +47,7 @@ SYMBOL_VERSION ILLUMOSprivate {
vm_create_devmem;
vm_create;
vm_create_devmem;
+ vm_debug_cpus;
vm_destroy;
vm_destroy;
vm_get_capability;
@@ -66,6 +67,7 @@ SYMBOL_VERSION ILLUMOSprivate {
vm_get_seg_desc;
vm_get_stat_desc;
vm_get_stats;
+ vm_get_topology;
vm_get_x2apic_state;
vm_gla2gpa;
vm_gla2gpa_nofault;
@@ -105,12 +107,15 @@ SYMBOL_VERSION ILLUMOSprivate {
vm_set_memflags;
vm_set_register;
vm_set_register_set;
+ vm_set_topology;
vm_set_x2apic_state;
vm_setup_memory;
vm_setup_pptdev_msi;
vm_setup_pptdev_msix;
vm_suspend;
+ vm_suspend_cpu;
vm_suspended_cpus;
+ vm_resume_cpu;
vm_unassign_pptdev;
local:
diff --git a/usr/src/lib/libvmmapi/common/vmmapi.c b/usr/src/lib/libvmmapi/common/vmmapi.c
index 3cc4df93c3..de86e2b9bd 100644
--- a/usr/src/lib/libvmmapi/common/vmmapi.c
+++ b/usr/src/lib/libvmmapi/common/vmmapi.c
@@ -1545,6 +1545,13 @@ vm_suspended_cpus(struct vmctx *ctx, cpuset_t *cpus)
}
int
+vm_debug_cpus(struct vmctx *ctx, cpuset_t *cpus)
+{
+
+ return (vm_get_cpus(ctx, VM_DEBUG_CPUS, cpus));
+}
+
+int
vm_activate_cpu(struct vmctx *ctx, int vcpu)
{
struct vm_activate_cpu ac;
@@ -1557,6 +1564,30 @@ vm_activate_cpu(struct vmctx *ctx, int vcpu)
}
int
+vm_suspend_cpu(struct vmctx *ctx, int vcpu)
+{
+ struct vm_activate_cpu ac;
+ int error;
+
+ bzero(&ac, sizeof(struct vm_activate_cpu));
+ ac.vcpuid = vcpu;
+ error = ioctl(ctx->fd, VM_SUSPEND_CPU, &ac);
+ return (error);
+}
+
+int
+vm_resume_cpu(struct vmctx *ctx, int vcpu)
+{
+ struct vm_activate_cpu ac;
+ int error;
+
+ bzero(&ac, sizeof(struct vm_activate_cpu));
+ ac.vcpuid = vcpu;
+ error = ioctl(ctx->fd, VM_RESUME_CPU, &ac);
+ return (error);
+}
+
+int
vm_get_intinfo(struct vmctx *ctx, int vcpu, uint64_t *info1, uint64_t *info2)
{
struct vm_intinfo vmii;
@@ -1646,6 +1677,38 @@ vm_restart_instruction(void *arg, int vcpu)
}
int
+vm_set_topology(struct vmctx *ctx,
+ uint16_t sockets, uint16_t cores, uint16_t threads, uint16_t maxcpus)
+{
+ struct vm_cpu_topology topology;
+
+ bzero(&topology, sizeof (struct vm_cpu_topology));
+ topology.sockets = sockets;
+ topology.cores = cores;
+ topology.threads = threads;
+ topology.maxcpus = maxcpus;
+ return (ioctl(ctx->fd, VM_SET_TOPOLOGY, &topology));
+}
+
+int
+vm_get_topology(struct vmctx *ctx,
+ uint16_t *sockets, uint16_t *cores, uint16_t *threads, uint16_t *maxcpus)
+{
+ struct vm_cpu_topology topology;
+ int error;
+
+ bzero(&topology, sizeof (struct vm_cpu_topology));
+ error = ioctl(ctx->fd, VM_GET_TOPOLOGY, &topology);
+ if (error == 0) {
+ *sockets = topology.sockets;
+ *cores = topology.cores;
+ *threads = topology.threads;
+ *maxcpus = topology.maxcpus;
+ }
+ return (error);
+}
+
+int
vm_get_device_fd(struct vmctx *ctx)
{
@@ -1673,9 +1736,10 @@ vm_get_ioctls(size_t *len)
VM_SET_X2APIC_STATE, VM_GET_X2APIC_STATE,
VM_GET_HPET_CAPABILITIES, VM_GET_GPA_PMAP, VM_GLA2GPA,
VM_GLA2GPA_NOFAULT,
- VM_ACTIVATE_CPU, VM_GET_CPUS, VM_SET_INTINFO, VM_GET_INTINFO,
+ VM_ACTIVATE_CPU, VM_GET_CPUS, VM_SUSPEND_CPU, VM_RESUME_CPU,
+ VM_SET_INTINFO, VM_GET_INTINFO,
VM_RTC_WRITE, VM_RTC_READ, VM_RTC_SETTIME, VM_RTC_GETTIME,
- VM_RESTART_INSTRUCTION };
+ VM_RESTART_INSTRUCTION, VM_SET_TOPOLOGY, VM_GET_TOPOLOGY };
if (len == NULL) {
cmds = malloc(sizeof(vm_ioctl_cmds));
diff --git a/usr/src/lib/libvmmapi/common/vmmapi.h b/usr/src/lib/libvmmapi/common/vmmapi.h
index ae8bb5d3b8..cfceafc6f4 100644
--- a/usr/src/lib/libvmmapi/common/vmmapi.h
+++ b/usr/src/lib/libvmmapi/common/vmmapi.h
@@ -245,7 +245,16 @@ int vcpu_reset(struct vmctx *ctx, int vcpu);
int vm_active_cpus(struct vmctx *ctx, cpuset_t *cpus);
int vm_suspended_cpus(struct vmctx *ctx, cpuset_t *cpus);
+int vm_debug_cpus(struct vmctx *ctx, cpuset_t *cpus);
int vm_activate_cpu(struct vmctx *ctx, int vcpu);
+int vm_suspend_cpu(struct vmctx *ctx, int vcpu);
+int vm_resume_cpu(struct vmctx *ctx, int vcpu);
+
+/* CPU topology */
+int vm_set_topology(struct vmctx *ctx, uint16_t sockets, uint16_t cores,
+ uint16_t threads, uint16_t maxcpus);
+int vm_get_topology(struct vmctx *ctx, uint16_t *sockets, uint16_t *cores,
+ uint16_t *threads, uint16_t *maxcpus);
#ifdef __FreeBSD__
/*
diff --git a/usr/src/uts/i86pc/io/vmm/README.sync b/usr/src/uts/i86pc/io/vmm/README.sync
index d7e281f250..667f34b9de 100644
--- a/usr/src/uts/i86pc/io/vmm/README.sync
+++ b/usr/src/uts/i86pc/io/vmm/README.sync
@@ -1,15 +1,22 @@
The bhyve kernel module and its associated userland consumers have been updated
to the latest upstream FreeBSD sources as of:
-commit 32e2f94b53c0599c7c674ff88c75b289f714c7c9
+commit 0fac2150fc0f1befa5803ca010ed63a6335847ad
Author: grehan <grehan@FreeBSD.org>
-Date: Sun Mar 11 08:27:11 2018 +0000
+Date: Fri May 4 01:36:49 2018 +0000
- Add CR2 get/set support.
+ Allow arbitrary numbers of columns for VNC server screen resolution.
- Reported/Tested by: Fabian Freyer
- Reviewed by: araujo
- Differential Revision: https://reviews.freebsd.org/D14648
+ The prior code only allowed multiples of 32 for the
+ numbers of columns. Remove this restriction to allow
+ a forthcoming UEFI firmware update to allow arbitrary
+ x,y resolutions.
+
+ (the code for handling rows already supported non mult-32 values)
+
+ Reviewed by: Leon Dang (original author)
MFC after: 3 weeks
+ Differential Revision: https://reviews.freebsd.org/D15274
+
-Which corresponds to SVN revision: 330764
+Which corresponds to SVN revision: 333235
diff --git a/usr/src/uts/i86pc/io/vmm/amd/svm.c b/usr/src/uts/i86pc/io/vmm/amd/svm.c
index 3b4a279627..f3ce78148b 100644
--- a/usr/src/uts/i86pc/io/vmm/amd/svm.c
+++ b/usr/src/uts/i86pc/io/vmm/amd/svm.c
@@ -2039,6 +2039,12 @@ svm_vmrun(void *arg, int vcpu, register_t rip, pmap_t pmap,
break;
}
+ if (vcpu_debugged(vm, vcpu)) {
+ enable_gintr();
+ vm_exit_debug(vm, vcpu, state->rip);
+ break;
+ }
+
svm_inj_interrupts(svm_sc, vcpu, vlapic);
/* Activate the nested pmap on 'curcpu' */
diff --git a/usr/src/uts/i86pc/io/vmm/intel/vmx.c b/usr/src/uts/i86pc/io/vmm/intel/vmx.c
index af63e0a426..94c588a5c3 100644
--- a/usr/src/uts/i86pc/io/vmm/intel/vmx.c
+++ b/usr/src/uts/i86pc/io/vmm/intel/vmx.c
@@ -224,6 +224,82 @@ SYSCTL_UINT(_hw_vmm_vmx, OID_AUTO, vpid_alloc_failed, CTLFLAG_RD,
&vpid_alloc_failed, 0, NULL);
/*
+ * The definitions of SDT probes for VMX.
+ */
+
+SDT_PROBE_DEFINE3(vmm, vmx, exit, entry,
+ "struct vmx *", "int", "struct vm_exit *");
+
+SDT_PROBE_DEFINE4(vmm, vmx, exit, taskswitch,
+ "struct vmx *", "int", "struct vm_exit *", "struct vm_task_switch *");
+
+SDT_PROBE_DEFINE4(vmm, vmx, exit, craccess,
+ "struct vmx *", "int", "struct vm_exit *", "uint64_t");
+
+SDT_PROBE_DEFINE4(vmm, vmx, exit, rdmsr,
+ "struct vmx *", "int", "struct vm_exit *", "uint32_t");
+
+SDT_PROBE_DEFINE5(vmm, vmx, exit, wrmsr,
+ "struct vmx *", "int", "struct vm_exit *", "uint32_t", "uint64_t");
+
+SDT_PROBE_DEFINE3(vmm, vmx, exit, halt,
+ "struct vmx *", "int", "struct vm_exit *");
+
+SDT_PROBE_DEFINE3(vmm, vmx, exit, mtrap,
+ "struct vmx *", "int", "struct vm_exit *");
+
+SDT_PROBE_DEFINE3(vmm, vmx, exit, pause,
+ "struct vmx *", "int", "struct vm_exit *");
+
+SDT_PROBE_DEFINE3(vmm, vmx, exit, intrwindow,
+ "struct vmx *", "int", "struct vm_exit *");
+
+SDT_PROBE_DEFINE4(vmm, vmx, exit, interrupt,
+ "struct vmx *", "int", "struct vm_exit *", "uint32_t");
+
+SDT_PROBE_DEFINE3(vmm, vmx, exit, nmiwindow,
+ "struct vmx *", "int", "struct vm_exit *");
+
+SDT_PROBE_DEFINE3(vmm, vmx, exit, inout,
+ "struct vmx *", "int", "struct vm_exit *");
+
+SDT_PROBE_DEFINE3(vmm, vmx, exit, cpuid,
+ "struct vmx *", "int", "struct vm_exit *");
+
+SDT_PROBE_DEFINE5(vmm, vmx, exit, exception,
+ "struct vmx *", "int", "struct vm_exit *", "uint32_t", "int");
+
+SDT_PROBE_DEFINE5(vmm, vmx, exit, nestedfault,
+ "struct vmx *", "int", "struct vm_exit *", "uint64_t", "uint64_t");
+
+SDT_PROBE_DEFINE4(vmm, vmx, exit, mmiofault,
+ "struct vmx *", "int", "struct vm_exit *", "uint64_t");
+
+SDT_PROBE_DEFINE3(vmm, vmx, exit, eoi,
+ "struct vmx *", "int", "struct vm_exit *");
+
+SDT_PROBE_DEFINE3(vmm, vmx, exit, apicaccess,
+ "struct vmx *", "int", "struct vm_exit *");
+
+SDT_PROBE_DEFINE4(vmm, vmx, exit, apicwrite,
+ "struct vmx *", "int", "struct vm_exit *", "struct vlapic *");
+
+SDT_PROBE_DEFINE3(vmm, vmx, exit, xsetbv,
+ "struct vmx *", "int", "struct vm_exit *");
+
+SDT_PROBE_DEFINE3(vmm, vmx, exit, monitor,
+ "struct vmx *", "int", "struct vm_exit *");
+
+SDT_PROBE_DEFINE3(vmm, vmx, exit, mwait,
+ "struct vmx *", "int", "struct vm_exit *");
+
+SDT_PROBE_DEFINE4(vmm, vmx, exit, unknown,
+ "struct vmx *", "int", "struct vm_exit *", "uint32_t");
+
+SDT_PROBE_DEFINE4(vmm, vmx, exit, return,
+ "struct vmx *", "int", "struct vm_exit *", "int");
+
+/*
* Use the last page below 4GB as the APIC access address. This address is
* occupied by the boot firmware so it is guaranteed that it will not conflict
* with a page in system memory.
@@ -2302,6 +2378,7 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit)
vmexit->exitcode = VM_EXITCODE_BOGUS;
vmm_stat_incr(vmx->vm, vcpu, VMEXIT_COUNT, 1);
+ SDT_PROBE3(vmm, vmx, exit, entry, vmx, vcpu, vmexit);
/*
* VM-entry failures during or after loading guest state.
@@ -2408,6 +2485,7 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit)
}
}
vmexit->exitcode = VM_EXITCODE_TASK_SWITCH;
+ SDT_PROBE4(vmm, vmx, exit, taskswitch, vmx, vcpu, vmexit, ts);
VCPU_CTR4(vmx->vm, vcpu, "task switch reason %d, tss 0x%04x, "
"%s errcode 0x%016lx", ts->reason, ts->tsssel,
ts->ext ? "external" : "internal",
@@ -2415,6 +2493,7 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit)
break;
case EXIT_REASON_CR_ACCESS:
vmm_stat_incr(vmx->vm, vcpu, VMEXIT_CR_ACCESS, 1);
+ SDT_PROBE4(vmm, vmx, exit, craccess, vmx, vcpu, vmexit, qual);
switch (qual & 0xf) {
case 0:
handled = vmx_emulate_cr0_access(vmx, vcpu, qual);
@@ -2432,6 +2511,7 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit)
retu = false;
ecx = vmxctx->guest_rcx;
VCPU_CTR1(vmx->vm, vcpu, "rdmsr 0x%08x", ecx);
+ SDT_PROBE4(vmm, vmx, exit, rdmsr, vmx, vcpu, vmexit, ecx);
error = emulate_rdmsr(vmx, vcpu, ecx, &retu);
if (error) {
vmexit->exitcode = VM_EXITCODE_RDMSR;
@@ -2452,6 +2532,8 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit)
edx = vmxctx->guest_rdx;
VCPU_CTR2(vmx->vm, vcpu, "wrmsr 0x%08x value 0x%016lx",
ecx, (uint64_t)edx << 32 | eax);
+ SDT_PROBE5(vmm, vmx, exit, wrmsr, vmx, vmexit, vcpu, ecx,
+ (uint64_t)edx << 32 | eax);
error = emulate_wrmsr(vmx, vcpu, ecx,
(uint64_t)edx << 32 | eax, &retu);
if (error) {
@@ -2468,6 +2550,7 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit)
break;
case EXIT_REASON_HLT:
vmm_stat_incr(vmx->vm, vcpu, VMEXIT_HLT, 1);
+ SDT_PROBE3(vmm, vmx, exit, halt, vmx, vcpu, vmexit);
vmexit->exitcode = VM_EXITCODE_HLT;
vmexit->u.hlt.rflags = vmcs_read(VMCS_GUEST_RFLAGS);
if (virtual_interrupt_delivery)
@@ -2478,15 +2561,18 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit)
break;
case EXIT_REASON_MTF:
vmm_stat_incr(vmx->vm, vcpu, VMEXIT_MTRAP, 1);
+ SDT_PROBE3(vmm, vmx, exit, mtrap, vmx, vcpu, vmexit);
vmexit->exitcode = VM_EXITCODE_MTRAP;
vmexit->inst_length = 0;
break;
case EXIT_REASON_PAUSE:
vmm_stat_incr(vmx->vm, vcpu, VMEXIT_PAUSE, 1);
+ SDT_PROBE3(vmm, vmx, exit, pause, vmx, vcpu, vmexit);
vmexit->exitcode = VM_EXITCODE_PAUSE;
break;
case EXIT_REASON_INTR_WINDOW:
vmm_stat_incr(vmx->vm, vcpu, VMEXIT_INTR_WINDOW, 1);
+ SDT_PROBE3(vmm, vmx, exit, intrwindow, vmx, vcpu, vmexit);
vmx_clear_int_window_exiting(vmx, vcpu);
return (1);
case EXIT_REASON_EXT_INTR:
@@ -2500,6 +2586,8 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit)
* this virtual interrupt during the subsequent VM enter.
*/
intr_info = vmcs_read(VMCS_EXIT_INTR_INFO);
+ SDT_PROBE4(vmm, vmx, exit, interrupt,
+ vmx, vcpu, vmexit, intr_info);
/*
* XXX: Ignore this exit if VMCS_INTR_VALID is not set.
@@ -2519,6 +2607,7 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit)
vmm_stat_incr(vmx->vm, vcpu, VMEXIT_EXTINT, 1);
return (1);
case EXIT_REASON_NMI_WINDOW:
+ SDT_PROBE3(vmm, vmx, exit, nmiwindow, vmx, vcpu, vmexit);
/* Exit to allow the pending virtual NMI to be injected */
if (vm_nmi_pending(vmx->vm, vcpu))
vmx_inject_nmi(vmx, vcpu);
@@ -2546,9 +2635,11 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit)
vis->addrsize = inout_str_addrsize(inst_info);
inout_str_seginfo(vmx, vcpu, inst_info, in, vis);
}
+ SDT_PROBE3(vmm, vmx, exit, inout, vmx, vcpu, vmexit);
break;
case EXIT_REASON_CPUID:
vmm_stat_incr(vmx->vm, vcpu, VMEXIT_CPUID, 1);
+ SDT_PROBE3(vmm, vmx, exit, cpuid, vmx, vcpu, vmexit);
handled = vmx_handle_cpuid(vmx->vm, vcpu, vmxctx);
break;
case EXIT_REASON_EXCEPTION:
@@ -2617,6 +2708,8 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit)
}
VCPU_CTR2(vmx->vm, vcpu, "Reflecting exception %d/%#x into "
"the guest", intr_vec, errcode);
+ SDT_PROBE5(vmm, vmx, exit, exception,
+ vmx, vcpu, vmexit, intr_vec, errcode);
error = vm_inject_exception(vmx->vm, vcpu, intr_vec,
errcode_valid, errcode, 0);
KASSERT(error == 0, ("%s: vm_inject_exception error %d",
@@ -2637,9 +2730,13 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit)
vmexit->u.paging.gpa = gpa;
vmexit->u.paging.fault_type = ept_fault_type(qual);
vmm_stat_incr(vmx->vm, vcpu, VMEXIT_NESTED_FAULT, 1);
+ SDT_PROBE5(vmm, vmx, exit, nestedfault,
+ vmx, vcpu, vmexit, gpa, qual);
} else if (ept_emulation_fault(qual)) {
vmexit_inst_emul(vmexit, gpa, vmcs_gla());
vmm_stat_incr(vmx->vm, vcpu, VMEXIT_INST_EMUL, 1);
+ SDT_PROBE4(vmm, vmx, exit, mmiofault,
+ vmx, vcpu, vmexit, gpa);
}
/*
* If Virtual NMIs control is 1 and the VM-exit is due to an
@@ -2656,9 +2753,11 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit)
case EXIT_REASON_VIRTUALIZED_EOI:
vmexit->exitcode = VM_EXITCODE_IOAPIC_EOI;
vmexit->u.ioapic_eoi.vector = qual & 0xFF;
+ SDT_PROBE3(vmm, vmx, exit, eoi, vmx, vcpu, vmexit);
vmexit->inst_length = 0; /* trap-like */
break;
case EXIT_REASON_APIC_ACCESS:
+ SDT_PROBE3(vmm, vmx, exit, apicaccess, vmx, vcpu, vmexit);
handled = vmx_handle_apic_access(vmx, vcpu, vmexit);
break;
case EXIT_REASON_APIC_WRITE:
@@ -2668,18 +2767,25 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit)
*/
vmexit->inst_length = 0;
vlapic = vm_lapic(vmx->vm, vcpu);
+ SDT_PROBE4(vmm, vmx, exit, apicwrite,
+ vmx, vcpu, vmexit, vlapic);
handled = vmx_handle_apic_write(vmx, vcpu, vlapic, qual);
break;
case EXIT_REASON_XSETBV:
+ SDT_PROBE3(vmm, vmx, exit, xsetbv, vmx, vcpu, vmexit);
handled = vmx_emulate_xsetbv(vmx, vcpu, vmexit);
break;
case EXIT_REASON_MONITOR:
+ SDT_PROBE3(vmm, vmx, exit, monitor, vmx, vcpu, vmexit);
vmexit->exitcode = VM_EXITCODE_MONITOR;
break;
case EXIT_REASON_MWAIT:
+ SDT_PROBE3(vmm, vmx, exit, mwait, vmx, vcpu, vmexit);
vmexit->exitcode = VM_EXITCODE_MWAIT;
break;
default:
+ SDT_PROBE4(vmm, vmx, exit, unknown,
+ vmx, vcpu, vmexit, reason);
vmm_stat_incr(vmx->vm, vcpu, VMEXIT_UNKNOWN, 1);
break;
}
@@ -2715,6 +2821,9 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit)
*/
}
}
+
+ SDT_PROBE4(vmm, vmx, exit, return,
+ vmx, vcpu, vmexit, handled);
return (handled);
}
@@ -2951,6 +3060,12 @@ vmx_run(void *arg, int vcpu, register_t rip, pmap_t pmap,
break;
}
+ if (vcpu_debugged(vm, vcpu)) {
+ enable_intr();
+ vm_exit_debug(vmx->vm, vcpu, rip);
+ break;
+ }
+
vmx_run_trace(vmx, vcpu);
vmx_dr_enter_guest(vmxctx);
rc = vmx_enter_guest(vmxctx, vmx, launched);
diff --git a/usr/src/uts/i86pc/io/vmm/io/ppt.c b/usr/src/uts/i86pc/io/vmm/io/ppt.c
index dabbe584fd..89e9f35c10 100644
--- a/usr/src/uts/i86pc/io/vmm/io/ppt.c
+++ b/usr/src/uts/i86pc/io/vmm/io/ppt.c
@@ -817,6 +817,10 @@ ppt_flr(dev_info_t *dip, boolean_t force)
return (B_TRUE);
fail:
+ /*
+ * TODO: If the FLR fails for some reason, we should attempt a reset
+ * using the PCI power management facilities (if possible).
+ */
pci_config_teardown(&hdl);
return (B_FALSE);
}
diff --git a/usr/src/uts/i86pc/io/vmm/vmm.c b/usr/src/uts/i86pc/io/vmm/vmm.c
index 050d8e752c..c4988c6d72 100644
--- a/usr/src/uts/i86pc/io/vmm/vmm.c
+++ b/usr/src/uts/i86pc/io/vmm/vmm.c
@@ -174,6 +174,7 @@ struct vm {
struct vpmtmr *vpmtmr; /* (i) virtual ACPI PM timer */
struct vrtc *vrtc; /* (o) virtual RTC */
volatile cpuset_t active_cpus; /* (i) active vcpus */
+ volatile cpuset_t debug_cpus; /* (i) vcpus stopped for debug */
int suspend; /* (i) stop VM execution */
volatile cpuset_t suspended_cpus; /* (i) suspended vcpus */
volatile cpuset_t halted_cpus; /* (x) cpus in a hard halt */
@@ -190,6 +191,11 @@ struct vm {
struct vmspace *vmspace; /* (o) guest's address space */
char name[VM_MAX_NAMELEN]; /* (o) virtual machine name */
struct vcpu vcpu[VM_MAXCPU]; /* (i) guest vcpus */
+ /* The following describe the vm cpu topology */
+ uint16_t sockets; /* (o) num of sockets */
+ uint16_t cores; /* (o) num of cores/socket */
+ uint16_t threads; /* (o) num of threads/core */
+ uint16_t maxcpus; /* (o) max pluggable cpus */
#ifndef __FreeBSD__
krwlock_t ioport_rwlock;
list_t ioport_hooks;
@@ -231,6 +237,8 @@ static struct vmm_ops *ops;
#define fpu_start_emulating() load_cr0(rcr0() | CR0_TS)
#define fpu_stop_emulating() clts()
+SDT_PROVIDER_DEFINE(vmm);
+
static MALLOC_DEFINE(M_VM, "vm", "vm");
/* statistics */
@@ -520,6 +528,7 @@ vm_init(struct vm *vm, bool create)
#endif /* __FreeBSD__ */
CPU_ZERO(&vm->active_cpus);
+ CPU_ZERO(&vm->debug_cpus);
vm->suspend = 0;
CPU_ZERO(&vm->suspended_cpus);
@@ -528,6 +537,12 @@ vm_init(struct vm *vm, bool create)
vcpu_init(vm, i, create);
}
+/*
+ * The default CPU topology is a single thread per package.
+ */
+u_int cores_per_package = 1;
+u_int threads_per_core = 1;
+
int
vm_create(const char *name, struct vm **retvm)
{
@@ -553,12 +568,43 @@ vm_create(const char *name, struct vm **retvm)
vm->vmspace = vmspace;
mtx_init(&vm->rendezvous_mtx, "vm rendezvous lock", 0, MTX_DEF);
+ vm->sockets = 1;
+ vm->cores = cores_per_package; /* XXX backwards compatibility */
+ vm->threads = threads_per_core; /* XXX backwards compatibility */
+ vm->maxcpus = 0; /* XXX not implemented */
+
vm_init(vm, true);
*retvm = vm;
return (0);
}
+void
+vm_get_topology(struct vm *vm, uint16_t *sockets, uint16_t *cores,
+ uint16_t *threads, uint16_t *maxcpus)
+{
+ *sockets = vm->sockets;
+ *cores = vm->cores;
+ *threads = vm->threads;
+ *maxcpus = vm->maxcpus;
+}
+
+int
+vm_set_topology(struct vm *vm, uint16_t sockets, uint16_t cores,
+ uint16_t threads, uint16_t maxcpus)
+{
+ if (maxcpus != 0)
+ return (EINVAL); /* XXX remove when supported */
+ if ((sockets * cores * threads) > VM_MAXCPU)
+ return (EINVAL);
+ /* XXX need to check sockets * cores * threads == vCPU, how? */
+ vm->sockets = sockets;
+ vm->cores = cores;
+ vm->threads = threads;
+ vm->maxcpus = maxcpus;
+ return(0);
+}
+
static void
vm_cleanup(struct vm *vm, bool destroy)
{
@@ -1435,6 +1481,9 @@ vm_handle_hlt(struct vm *vm, int vcpuid, bool intr_disabled, bool *retu)
if (vcpu_should_yield(vm, vcpuid))
break;
+ if (vcpu_debugged(vm, vcpuid))
+ break;
+
/*
* Some Linux guests implement "halt" by having all vcpus
* execute HLT with interrupts disabled. 'halted_cpus' keeps
@@ -1718,6 +1767,17 @@ vm_exit_suspended(struct vm *vm, int vcpuid, uint64_t rip)
}
void
+vm_exit_debug(struct vm *vm, int vcpuid, uint64_t rip)
+{
+ struct vm_exit *vmexit;
+
+ vmexit = vm_exitinfo(vm, vcpuid);
+ vmexit->rip = rip;
+ vmexit->inst_length = 0;
+ vmexit->exitcode = VM_EXITCODE_DEBUG;
+}
+
+void
vm_exit_rendezvous(struct vm *vm, int vcpuid, uint64_t rip)
{
struct vm_exit *vmexit;
@@ -2609,6 +2669,55 @@ vm_activate_cpu(struct vm *vm, int vcpuid)
return (0);
}
+int
+vm_suspend_cpu(struct vm *vm, int vcpuid)
+{
+ int i;
+
+ if (vcpuid < -1 || vcpuid >= VM_MAXCPU)
+ return (EINVAL);
+
+ if (vcpuid == -1) {
+ vm->debug_cpus = vm->active_cpus;
+ for (i = 0; i < VM_MAXCPU; i++) {
+ if (CPU_ISSET(i, &vm->active_cpus))
+ vcpu_notify_event(vm, i, false);
+ }
+ } else {
+ if (!CPU_ISSET(vcpuid, &vm->active_cpus))
+ return (EINVAL);
+
+ CPU_SET_ATOMIC(vcpuid, &vm->debug_cpus);
+ vcpu_notify_event(vm, vcpuid, false);
+ }
+ return (0);
+}
+
+int
+vm_resume_cpu(struct vm *vm, int vcpuid)
+{
+
+ if (vcpuid < -1 || vcpuid >= VM_MAXCPU)
+ return (EINVAL);
+
+ if (vcpuid == -1) {
+ CPU_ZERO(&vm->debug_cpus);
+ } else {
+ if (!CPU_ISSET(vcpuid, &vm->debug_cpus))
+ return (EINVAL);
+
+ CPU_CLR_ATOMIC(vcpuid, &vm->debug_cpus);
+ }
+ return (0);
+}
+
+int
+vcpu_debugged(struct vm *vm, int vcpuid)
+{
+
+ return (CPU_ISSET(vcpuid, &vm->debug_cpus));
+}
+
cpuset_t
vm_active_cpus(struct vm *vm)
{
@@ -2617,6 +2726,13 @@ vm_active_cpus(struct vm *vm)
}
cpuset_t
+vm_debug_cpus(struct vm *vm)
+{
+
+ return (vm->debug_cpus);
+}
+
+cpuset_t
vm_suspended_cpus(struct vm *vm)
{
diff --git a/usr/src/uts/i86pc/io/vmm/vmm_host.c b/usr/src/uts/i86pc/io/vmm/vmm_host.c
index 639de087d0..9e390c93dd 100644
--- a/usr/src/uts/i86pc/io/vmm/vmm_host.c
+++ b/usr/src/uts/i86pc/io/vmm/vmm_host.c
@@ -73,7 +73,16 @@ vmm_host_state_init(void)
*/
vmm_host_cr0 = rcr0() | CR0_TS;
- vmm_host_cr4 = rcr4();
+ /*
+ * On non-PCID or PCID but without INVPCID support machines,
+ * we flush kernel i.e. global TLB entries, by temporary
+ * clearing the CR4.PGE bit, see invltlb_glob(). If
+ * preemption occurs at the wrong time, cached vmm_host_cr4
+ * might store the value with CR4.PGE cleared. Since FreeBSD
+ * requires support for PG_G on amd64, just set it
+ * unconditionally.
+ */
+ vmm_host_cr4 = rcr4() | CR4_PGE;
/*
* Only permit a guest to use XSAVE if the host is using
diff --git a/usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c b/usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c
index 66d5ce3b5d..a8381a9c0a 100644
--- a/usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c
+++ b/usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c
@@ -964,6 +964,12 @@ vmmdev_do_ioctl(vmm_softc_t *sc, int cmd, intptr_t arg, int md,
case VM_ACTIVATE_CPU:
error = vm_activate_cpu(sc->vmm_vm, vcpu);
break;
+ case VM_SUSPEND_CPU:
+ error = vm_suspend_cpu(sc->vmm_vm, vcpu);
+ break;
+ case VM_RESUME_CPU:
+ error = vm_resume_cpu(sc->vmm_vm, vcpu);
+ break;
case VM_GET_CPUS: {
struct vm_cpuset vm_cpuset;
@@ -993,6 +999,8 @@ vmmdev_do_ioctl(vmm_softc_t *sc, int cmd, intptr_t arg, int md,
tempset = vm_active_cpus(sc->vmm_vm);
} else if (vm_cpuset.which == VM_SUSPENDED_CPUS) {
tempset = vm_suspended_cpus(sc->vmm_vm);
+ } else if (vm_cpuset.which == VM_DEBUG_CPUS) {
+ tempset = vm_debug_cpus(sc->vmm_vm);
} else {
error = EINVAL;
}
@@ -1080,6 +1088,29 @@ vmmdev_do_ioctl(vmm_softc_t *sc, int cmd, intptr_t arg, int md,
error = vm_restart_instruction(sc->vmm_vm, vcpu);
break;
+ case VM_SET_TOPOLOGY: {
+ struct vm_cpu_topology topo;
+
+ if (ddi_copyin(datap, &topo, sizeof (topo), md) != 0) {
+ error = EFAULT;
+ break;
+ }
+ error = vm_set_topology(sc->vmm_vm, topo.sockets, topo.cores,
+ topo.threads, topo.maxcpus);
+ break;
+ }
+ case VM_GET_TOPOLOGY: {
+ struct vm_cpu_topology topo;
+
+ vm_get_topology(sc->vmm_vm, &topo.sockets, &topo.cores,
+ &topo.threads, &topo.maxcpus);
+ if (ddi_copyout(&topo, datap, sizeof (topo), md) != 0) {
+ error = EFAULT;
+ break;
+ }
+ break;
+ }
+
#ifndef __FreeBSD__
case VM_DEVMEM_GETOFFSET: {
struct vm_devmem_offset vdo;
diff --git a/usr/src/uts/i86pc/io/vmm/x86.c b/usr/src/uts/i86pc/io/vmm/x86.c
index 0d59c119a8..d6426bde44 100644
--- a/usr/src/uts/i86pc/io/vmm/x86.c
+++ b/usr/src/uts/i86pc/io/vmm/x86.c
@@ -73,17 +73,6 @@ static uint64_t bhyve_xcpuids;
SYSCTL_ULONG(_hw_vmm, OID_AUTO, bhyve_xcpuids, CTLFLAG_RW, &bhyve_xcpuids, 0,
"Number of times an unknown cpuid leaf was accessed");
-/*
- * The default CPU topology is a single thread per package.
- */
-static u_int threads_per_core = 1;
-SYSCTL_UINT(_hw_vmm_topology, OID_AUTO, threads_per_core, CTLFLAG_RDTUN,
- &threads_per_core, 0, NULL);
-
-static u_int cores_per_package = 1;
-SYSCTL_UINT(_hw_vmm_topology, OID_AUTO, cores_per_package, CTLFLAG_RDTUN,
- &cores_per_package, 0, NULL);
-
static int cpuid_leaf_b = 1;
SYSCTL_INT(_hw_vmm_topology, OID_AUTO, cpuid_leaf_b, CTLFLAG_RDTUN,
&cpuid_leaf_b, 0, NULL);
@@ -106,8 +95,9 @@ x86_emulate_cpuid(struct vm *vm, int vcpu_id,
const struct xsave_limits *limits;
uint64_t cr4;
int error, enable_invpcid, level, width = 0, x2apic_id = 0;
- unsigned int func, regs[4], logical_cpus;
+ unsigned int func, regs[4], logical_cpus = 0;
enum x2apic_state x2apic_state;
+ uint16_t cores, maxcpus, sockets, threads;
VCPU_CTR2(vm, vcpu_id, "cpuid %#x,%#x", *eax, *ecx);
@@ -155,11 +145,11 @@ x86_emulate_cpuid(struct vm *vm, int vcpu_id,
*
* However this matches the logical cpus as
* advertised by leaf 0x1 and will work even
- * if the 'threads_per_core' tunable is set
- * incorrectly on an AMD host.
+ * if threads is set incorrectly on an AMD host.
*/
- logical_cpus = threads_per_core *
- cores_per_package;
+ vm_get_topology(vm, &sockets, &cores, &threads,
+ &maxcpus);
+ logical_cpus = threads * cores;
regs[2] = logical_cpus - 1;
}
break;
@@ -321,7 +311,9 @@ x86_emulate_cpuid(struct vm *vm, int vcpu_id,
*/
regs[3] |= (CPUID_MCA | CPUID_MCE | CPUID_MTRR);
- logical_cpus = threads_per_core * cores_per_package;
+ vm_get_topology(vm, &sockets, &cores, &threads,
+ &maxcpus);
+ logical_cpus = threads * cores;
regs[1] &= ~CPUID_HTT_CORES;
regs[1] |= (logical_cpus & 0xff) << 16;
regs[3] |= CPUID_HTT;
@@ -331,8 +323,10 @@ x86_emulate_cpuid(struct vm *vm, int vcpu_id,
cpuid_count(*eax, *ecx, regs);
if (regs[0] || regs[1] || regs[2] || regs[3]) {
+ vm_get_topology(vm, &sockets, &cores, &threads,
+ &maxcpus);
regs[0] &= 0x3ff;
- regs[0] |= (cores_per_package - 1) << 26;
+ regs[0] |= (cores - 1) << 26;
/*
* Cache topology:
* - L1 and L2 are shared only by the logical
@@ -340,10 +334,10 @@ x86_emulate_cpuid(struct vm *vm, int vcpu_id,
* - L3 and above are shared by all logical
* processors in the package.
*/
- logical_cpus = threads_per_core;
+ logical_cpus = threads;
level = (regs[0] >> 5) & 0x7;
if (level >= 3)
- logical_cpus *= cores_per_package;
+ logical_cpus *= cores;
regs[0] |= (logical_cpus - 1) << 14;
}
break;
@@ -405,16 +399,17 @@ x86_emulate_cpuid(struct vm *vm, int vcpu_id,
/*
* Processor topology enumeration
*/
+ vm_get_topology(vm, &sockets, &cores, &threads,
+ &maxcpus);
if (*ecx == 0) {
- logical_cpus = threads_per_core;
+ logical_cpus = threads;
width = log2(logical_cpus);
level = CPUID_TYPE_SMT;
x2apic_id = vcpu_id;
}
if (*ecx == 1) {
- logical_cpus = threads_per_core *
- cores_per_package;
+ logical_cpus = threads * cores;
width = log2(logical_cpus);
level = CPUID_TYPE_CORE;
x2apic_id = vcpu_id;
diff --git a/usr/src/uts/i86pc/sys/vmm.h b/usr/src/uts/i86pc/sys/vmm.h
index 43a9d36de3..dea60c5f76 100644
--- a/usr/src/uts/i86pc/sys/vmm.h
+++ b/usr/src/uts/i86pc/sys/vmm.h
@@ -44,8 +44,13 @@
#ifndef _VMM_H_
#define _VMM_H_
+#include <sys/sdt.h>
#include <x86/segments.h>
+#ifdef _KERNEL
+SDT_PROVIDER_DECLARE(vmm);
+#endif
+
enum vm_suspend_how {
VM_SUSPEND_NONE,
VM_SUSPEND_RESET,
@@ -204,6 +209,10 @@ int vm_create(const char *name, struct vm **retvm);
void vm_destroy(struct vm *vm);
int vm_reinit(struct vm *vm);
const char *vm_name(struct vm *vm);
+void vm_get_topology(struct vm *vm, uint16_t *sockets, uint16_t *cores,
+ uint16_t *threads, uint16_t *maxcpus);
+int vm_set_topology(struct vm *vm, uint16_t sockets, uint16_t cores,
+ uint16_t threads, uint16_t maxcpus);
/*
* APIs that modify the guest memory map require all vcpus to be frozen.
@@ -259,8 +268,11 @@ int vm_get_x2apic_state(struct vm *vm, int vcpu, enum x2apic_state *state);
int vm_set_x2apic_state(struct vm *vm, int vcpu, enum x2apic_state state);
int vm_apicid2vcpuid(struct vm *vm, int apicid);
int vm_activate_cpu(struct vm *vm, int vcpu);
+int vm_suspend_cpu(struct vm *vm, int vcpu);
+int vm_resume_cpu(struct vm *vm, int vcpu);
struct vm_exit *vm_exitinfo(struct vm *vm, int vcpuid);
void vm_exit_suspended(struct vm *vm, int vcpuid, uint64_t rip);
+void vm_exit_debug(struct vm *vm, int vcpuid, uint64_t rip);
void vm_exit_rendezvous(struct vm *vm, int vcpuid, uint64_t rip);
void vm_exit_astpending(struct vm *vm, int vcpuid, uint64_t rip);
void vm_exit_reqidle(struct vm *vm, int vcpuid, uint64_t rip);
@@ -284,6 +296,7 @@ typedef void (*vm_rendezvous_func_t)(struct vm *vm, int vcpuid, void *arg);
void vm_smp_rendezvous(struct vm *vm, int vcpuid, cpuset_t dest,
vm_rendezvous_func_t func, void *arg);
cpuset_t vm_active_cpus(struct vm *vm);
+cpuset_t vm_debug_cpus(struct vm *vm);
cpuset_t vm_suspended_cpus(struct vm *vm);
#endif /* _SYS__CPUSET_H_ */
@@ -308,6 +321,8 @@ vcpu_reqidle(struct vm_eventinfo *info)
return (*info->iptr);
}
+int vcpu_debugged(struct vm *vm, int vcpuid);
+
/*
* Return 1 if device indicated by bus/slot/func is supposed to be a
* pci passthrough device.
@@ -568,6 +583,7 @@ enum vm_exitcode {
VM_EXITCODE_MWAIT,
VM_EXITCODE_SVM,
VM_EXITCODE_REQIDLE,
+ VM_EXITCODE_DEBUG,
VM_EXITCODE_MAX
};
diff --git a/usr/src/uts/i86pc/sys/vmm_dev.h b/usr/src/uts/i86pc/sys/vmm_dev.h
index a737ab1ad5..63ccc36dc6 100644
--- a/usr/src/uts/i86pc/sys/vmm_dev.h
+++ b/usr/src/uts/i86pc/sys/vmm_dev.h
@@ -272,6 +272,7 @@ struct vm_cpuset {
};
#define VM_ACTIVE_CPUS 0
#define VM_SUSPENDED_CPUS 1
+#define VM_DEBUG_CPUS 2
struct vm_intinfo {
int vcpuid;
@@ -295,6 +296,13 @@ struct vm_devmem_offset {
};
#endif
+struct vm_cpu_topology {
+ uint16_t sockets;
+ uint16_t cores;
+ uint16_t threads;
+ uint16_t maxcpus;
+};
+
enum {
/* general routines */
IOCNUM_ABIVERS = 0,
@@ -354,6 +362,10 @@ enum {
IOCNUM_GET_X2APIC_STATE = 61,
IOCNUM_GET_HPET_CAPABILITIES = 62,
+ /* CPU Topology */
+ IOCNUM_SET_TOPOLOGY = 63,
+ IOCNUM_GET_TOPOLOGY = 64,
+
/* legacy interrupt injection */
IOCNUM_ISA_ASSERT_IRQ = 80,
IOCNUM_ISA_DEASSERT_IRQ = 81,
@@ -363,6 +375,8 @@ enum {
/* vm_cpuset */
IOCNUM_ACTIVATE_CPU = 90,
IOCNUM_GET_CPUSET = 91,
+ IOCNUM_SUSPEND_CPU = 92,
+ IOCNUM_RESUME_CPU = 93,
/* RTC */
IOCNUM_RTC_READ = 100,
@@ -454,6 +468,10 @@ enum {
_IOWR('v', IOCNUM_GET_X2APIC_STATE, struct vm_x2apic)
#define VM_GET_HPET_CAPABILITIES \
_IOR('v', IOCNUM_GET_HPET_CAPABILITIES, struct vm_hpet_cap)
+#define VM_SET_TOPOLOGY \
+ _IOW('v', IOCNUM_SET_TOPOLOGY, struct vm_cpu_topology)
+#define VM_GET_TOPOLOGY \
+ _IOR('v', IOCNUM_GET_TOPOLOGY, struct vm_cpu_topology)
#define VM_GET_GPA_PMAP \
_IOWR('v', IOCNUM_GET_GPA_PMAP, struct vm_gpa_pte)
#define VM_GLA2GPA \
@@ -464,6 +482,10 @@ enum {
_IOW('v', IOCNUM_ACTIVATE_CPU, struct vm_activate_cpu)
#define VM_GET_CPUS \
_IOW('v', IOCNUM_GET_CPUSET, struct vm_cpuset)
+#define VM_SUSPEND_CPU \
+ _IOW('v', IOCNUM_SUSPEND_CPU, struct vm_activate_cpu)
+#define VM_RESUME_CPU \
+ _IOW('v', IOCNUM_RESUME_CPU, struct vm_activate_cpu)
#define VM_SET_INTINFO \
_IOW('v', IOCNUM_SET_INTINFO, struct vm_intinfo)
#define VM_GET_INTINFO \