OS-6954 bhyve upstream sync

Reviewed by: Mike Gerdts <mike.gerdts@joyent.com> Reviewed by: Hans Rosenfeld <hans.rosenfeld@joyent.com> Approved by: Ryan Zezeski <rpz@joyent.com>
author: Patrick Mooney <pmooney@pfmooney.com> 2018-05-15 03:06:09 +0000
committer: Patrick Mooney <pmooney@pfmooney.com> 2018-05-24 18:07:37 +0000
commit: adfeb11ce94f7c9b78db3f67388fb704c2d8673a (patch)
tree: dcf8186b69efcb05af66a3fe35cea2234efd9e12
parent: 28825f0c4ec7c0a9a0a1b599ed767e9c155d1230 (diff)
download: illumos-joyent-adfeb11ce94f7c9b78db3f67388fb704c2d8673a.tar.gz
26 files changed, 2125 insertions, 82 deletions
diff --git a/exception_lists/cstyle b/exception_lists/cstyle
index 27dc9ec2be..bd8816b589 100644
--- a/exception_lists/cstyle
+++ b/exception_lists/cstyle
@@ -1401,6 +1401,7 @@ usr/src/cmd/bhyve/console.[ch]
 usr/src/cmd/bhyve/consport.c
 usr/src/cmd/bhyve/dbgport.[ch]
 usr/src/cmd/bhyve/fwctl.[ch]
+usr/src/cmd/bhyve/gdb.[ch]
 usr/src/cmd/bhyve/inout.[ch]
 usr/src/cmd/bhyve/ioapic.[ch]
 usr/src/cmd/bhyve/mem.[ch]
diff --git a/usr/src/cmd/bhyve/Makefile b/usr/src/cmd/bhyve/Makefile
index 181ddf2946..554603d4f8 100644
--- a/usr/src/cmd/bhyve/Makefile
+++ b/usr/src/cmd/bhyve/Makefile
@@ -38,6 +38,7 @@ SRCS =	acpi.c			\
 	consport.c		\
 	dbgport.c		\
 	fwctl.c			\
+	gdb.c			\
 	inout.c			\
 	ioapic.c		\
 	mem.c			\
diff --git a/usr/src/cmd/bhyve/bhyverun.c b/usr/src/cmd/bhyve/bhyverun.c
index 949e537738..b12fba0800 100644
--- a/usr/src/cmd/bhyve/bhyverun.c
+++ b/usr/src/cmd/bhyve/bhyverun.c
@@ -70,6 +70,7 @@ __FBSDID("$FreeBSD$");
 #include <pthread_np.h>
 #include <sysexits.h>
 #include <stdbool.h>
+#include <stdint.h>
 
 #include <machine/vmm.h>
 #ifndef WITHOUT_CAPSICUM
@@ -88,6 +89,7 @@ __FBSDID("$FreeBSD$");
 #include "inout.h"
 #include "dbgport.h"
 #include "fwctl.h"
+#include "gdb.h"
 #include "ioapic.h"
 #include "mem.h"
 #include "mevent.h"
@@ -113,6 +115,8 @@ extern int vmexit_task_switch(struct vmctx *, struct vm_exit *, int *vcpu);
 char *vmname;
 
 int guest_ncpus;
+uint16_t cores, maxcpus, sockets, threads;
+
 char *guest_uuid_str;
 
 static int guest_vmexit_on_hlt, guest_vmexit_on_pause;
@@ -166,15 +170,17 @@ usage(int code)
 {
 
         fprintf(stderr,
-                "Usage: %s [-abehuwxACHIPSWY] [-c vcpus] [-g <gdb port>] [-l <lpc>]\n"
+		"Usage: %s [-abehuwxACHPSWY]\n"
+		"       %*s [-c [[cpus=]numcpus][,sockets=n][,cores=n][,threads=n]]\n"
+		"       %*s [-g <gdb port>] [-l <lpc>]\n"
 #ifdef	__FreeBSD__
-		"       %*s [-m memsize[K|k|M|m|G|g|T|t] [-p vcpu:hostcpu] [-s <pci>] [-U uuid] <vm>\n"
+		"       %*s [-m mem] [-p vcpu:hostcpu] [-s <pci>] [-U uuid] <vm>\n"
 #else
-		"       %*s [-s <pci>] [-U uuid] <vm>\n"
+		"       %*s [-m mem] [-s <pci>] [-U uuid] <vm>\n"
 #endif
 		"       -a: local apic is in xAPIC mode (deprecated)\n"
 		"       -A: create ACPI tables\n"
-		"       -c: # cpus (default 1)\n"
+		"       -c: number of cpus and/or topology specification"
 		"       -C: include guest memory in core file\n"
 		"       -e: exit on unhandled I/O access\n"
 		"       -g: gdb port\n"
@@ -194,11 +200,91 @@ usage(int code)
 		"       -W: force virtio to use single-vector MSI\n"
 		"       -x: local apic is in x2APIC mode\n"
 		"       -Y: disable MPtable generation\n",
-		progname, (int)strlen(progname), "");
+		progname, (int)strlen(progname), "", (int)strlen(progname), "",
+		(int)strlen(progname), "");
 
 	exit(code);
 }
 
+/*
+ * XXX This parser is known to have the following issues:
+ * 1.  It accepts null key=value tokens ",,".
+ * 2.  It accepts whitespace after = and before value.
+ * 3.  Values out of range of INT are silently wrapped.
+ * 4.  It doesn't check non-final values.
+ * 5.  The apparently bogus limits of UINT16_MAX are for future expansion.
+ *
+ * The acceptance of a null specification ('-c ""') is by design to match the
+ * manual page syntax specification, this results in a topology of 1 vCPU.
+ */
+static int
+topology_parse(const char *opt)
+{
+	uint64_t ncpus;
+	int c, chk, n, s, t, tmp;
+	char *cp, *str;
+	bool ns, scts;
+
+	c = 1, n = 1, s = 1, t = 1;
+	ns = false, scts = false;
+	str = strdup(opt);
+
+	while ((cp = strsep(&str, ",")) != NULL) {
+		if (sscanf(cp, "%i%n", &tmp, &chk) == 1) {
+			n = tmp;
+			ns = true;
+		} else if (sscanf(cp, "cpus=%i%n", &tmp, &chk) == 1) {
+			n = tmp;
+			ns = true;
+		} else if (sscanf(cp, "sockets=%i%n", &tmp, &chk) == 1) {
+			s = tmp;
+			scts = true;
+		} else if (sscanf(cp, "cores=%i%n", &tmp, &chk) == 1) {
+			c = tmp;
+			scts = true;
+		} else if (sscanf(cp, "threads=%i%n", &tmp, &chk) == 1) {
+			t = tmp;
+			scts = true;
+#ifdef notyet  /* Do not expose this until vmm.ko implements it */
+		} else if (sscanf(cp, "maxcpus=%i%n", &tmp, &chk) == 1) {
+			m = tmp;
+#endif
+		/* Skip the empty argument case from -c "" */
+		} else if (cp[0] == '\0')
+			continue;
+		else
+			return (-1);
+		/* Any trailing garbage causes an error */
+		if (cp[chk] != '\0')
+			return (-1);
+	}
+	/*
+	 * Range check 1 <= n <= UINT16_MAX all values
+	 */
+	if (n < 1 || s < 1 || c < 1 || t < 1 ||
+	    n > UINT16_MAX || s > UINT16_MAX || c > UINT16_MAX  ||
+	    t > UINT16_MAX)
+		return (-1);
+
+	/* If only the cpus was specified, use that as sockets */
+	if (!scts)
+		s = n;
+	/*
+	 * Compute sockets * cores * threads avoiding overflow
+	 * The range check above insures these are 16 bit values
+	 * If n was specified check it against computed ncpus
+	 */
+	ncpus = (uint64_t)s * c * t;
+	if (ncpus > UINT16_MAX || (ns && n != ncpus))
+		return (-1);
+
+	guest_ncpus = ncpus;
+	sockets = s;
+	cores = c;
+	threads = t;
+	return(0);
+}
+
 #ifndef WITHOUT_CAPSICUM
 /*
  * 11-stable capsicum helpers
@@ -327,6 +413,8 @@ fbsdrun_start_thread(void *param)
 	snprintf(tname, sizeof(tname), "vcpu %d", vcpu);
 	pthread_set_name_np(mtp->mt_thr, tname);
 
+	gdb_cpu_add(vcpu);
+
 	vm_loop(mtp->mt_ctx, vcpu, vmexit[vcpu].rip);
 
 	/* not reached */
@@ -590,6 +678,8 @@ vmexit_mtrap(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
 
 	stats.vmexit_mtrap++;
 
+	gdb_cpu_mtrap(*pvcpu);
+
 	return (VMEXIT_CONTINUE);
 }
 
@@ -664,6 +754,14 @@ vmexit_suspend(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
 	return (0);	/* NOTREACHED */
 }
 
+static int
+vmexit_debug(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
+{
+
+	gdb_cpu_suspend(*pvcpu);
+	return (VMEXIT_CONTINUE);
+}
+
 static vmexit_handler_t handler[VM_EXITCODE_MAX] = {
 	[VM_EXITCODE_INOUT]  = vmexit_inout,
 	[VM_EXITCODE_INOUT_STR]  = vmexit_inout,
@@ -678,6 +776,7 @@ static vmexit_handler_t handler[VM_EXITCODE_MAX] = {
 	[VM_EXITCODE_SPINUP_AP] = vmexit_spinup_ap,
 	[VM_EXITCODE_SUSPENDED] = vmexit_suspend,
 	[VM_EXITCODE_TASK_SWITCH] = vmexit_task_switch,
+	[VM_EXITCODE_DEBUG] = vmexit_debug,
 };
 
 static void
@@ -860,6 +959,9 @@ do_open(const char *vmname)
 			exit(1);
 		}
 	}
+	error = vm_set_topology(ctx, sockets, cores, threads, maxcpus);
+	if (error)
+		errx(EX_OSERR, "vm_set_topology");
 	return (ctx);
 }
 
@@ -888,9 +990,10 @@ mark_provisioned(void)
 int
 main(int argc, char *argv[])
 {
-	int c, error, gdb_port, err, bvmcons;
+	int c, error, dbg_port, gdb_port, err, bvmcons;
 	int max_vcpus, mptgen, memflags;
 	int rtc_localtime;
+	bool gdb_stop;
 	struct vmctx *ctx;
 	uint64_t rip;
 	size_t memsize;
@@ -898,17 +1001,21 @@ main(int argc, char *argv[])
 
 	bvmcons = 0;
 	progname = basename(argv[0]);
+	dbg_port = 0;
 	gdb_port = 0;
+	gdb_stop = false;
 	guest_ncpus = 1;
+	sockets = cores = threads = 1;
+	maxcpus = 0;
 	memsize = 256 * MB;
 	mptgen = 1;
 	rtc_localtime = 1;
 	memflags = 0;
 
 #ifdef	__FreeBSD__
-	optstr = "abehuwxACHIPSWYp:g:c:s:m:l:B:U:";
+	optstr = "abehuwxACHIPSWYp:g:G:c:s:m:l:B:U:";
 #else
-	optstr = "abehuwxACHIPSWYg:c:s:m:l:B:U:";
+	optstr = "abehuwxACHIPSWY:g:G:c:s:m:l:B:U:";
 #endif
 	while ((c = getopt(argc, argv, optstr)) != -1) {
 		switch (c) {
@@ -936,12 +1043,22 @@ main(int argc, char *argv[])
 			break;
 #endif
                 case 'c':
-			guest_ncpus = atoi(optarg);
+			if (topology_parse(optarg) != 0) {
+			    errx(EX_USAGE, "invalid cpu topology "
+				"'%s'", optarg);
+			}
 			break;
 		case 'C':
 			memflags |= VM_MEM_F_INCORE;
 			break;
 		case 'g':
+			dbg_port = atoi(optarg);
+			break;
+		case 'G':
+			if (optarg[0] == 'w') {
+				gdb_stop = true;
+				optarg++;
+			}
 			gdb_port = atoi(optarg);
 			break;
 		case 'l':
@@ -1014,11 +1131,6 @@ main(int argc, char *argv[])
 	vmname = argv[0];
 	ctx = do_open(vmname);
 
-	if (guest_ncpus < 1) {
-		fprintf(stderr, "Invalid guest vCPUs (%d)\n", guest_ncpus);
-		exit(1);
-	}
-
 	max_vcpus = num_vcpus_allowed(ctx);
 	if (guest_ncpus > max_vcpus) {
 		fprintf(stderr, "%d vCPUs requested but only %d available\n",
@@ -1069,8 +1181,11 @@ main(int argc, char *argv[])
 	if (init_pci(ctx) != 0)
 		exit(1);
 
+	if (dbg_port != 0)
+		init_dbgport(dbg_port);
+
 	if (gdb_port != 0)
-		init_dbgport(gdb_port);
+		init_gdb(ctx, gdb_port, gdb_stop);
 
 	if (bvmcons)
 		init_bvmcons();
diff --git a/usr/src/cmd/bhyve/gdb.c b/usr/src/cmd/bhyve/gdb.c
new file mode 100644
index 0000000000..4414a05e27
--- /dev/null
+++ b/usr/src/cmd/bhyve/gdb.c
@@ -0,0 +1,1328 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2017-2018 John H. Baldwin <jhb@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#ifndef WITHOUT_CAPSICUM
+#include <sys/capsicum.h>
+#endif
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <sys/socket.h>
+#include <machine/atomic.h>
+#include <machine/specialreg.h>
+#include <machine/vmm.h>
+#include <netinet/in.h>
+#include <assert.h>
+#ifndef WITHOUT_CAPSICUM
+#include <capsicum_helpers.h>
+#endif
+#include <err.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <pthread.h>
+#include <pthread_np.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sysexits.h>
+#include <unistd.h>
+#include <vmmapi.h>
+
+#include "bhyverun.h"
+#include "mem.h"
+#include "mevent.h"
+
+/*
+ * GDB_SIGNAL_* numbers are part of the GDB remote protocol.  Most stops
+ * use SIGTRAP.
+ */
+#define	GDB_SIGNAL_TRAP		5
+
+static void gdb_resume_vcpus(void);
+static void check_command(int fd);
+
+static struct mevent *read_event, *write_event;
+
+static cpuset_t vcpus_active, vcpus_suspended, vcpus_waiting;
+static pthread_mutex_t gdb_lock;
+static pthread_cond_t idle_vcpus;
+static bool stop_pending, first_stop;
+static int stepping_vcpu, stopped_vcpu;
+
+/*
+ * An I/O buffer contains 'capacity' bytes of room at 'data'.  For a
+ * read buffer, 'start' is unused and 'len' contains the number of
+ * valid bytes in the buffer.  For a write buffer, 'start' is set to
+ * the index of the next byte in 'data' to send, and 'len' contains
+ * the remaining number of valid bytes to send.
+ */
+struct io_buffer {
+	uint8_t *data;
+	size_t capacity;
+	size_t start;
+	size_t len;
+};
+
+static struct io_buffer cur_comm, cur_resp;
+static uint8_t cur_csum;
+static int cur_vcpu;
+static struct vmctx *ctx;
+static int cur_fd = -1;
+
+const int gdb_regset[] = {
+	VM_REG_GUEST_RAX,
+	VM_REG_GUEST_RBX,
+	VM_REG_GUEST_RCX,
+	VM_REG_GUEST_RDX,
+	VM_REG_GUEST_RSI,
+	VM_REG_GUEST_RDI,
+	VM_REG_GUEST_RBP,
+	VM_REG_GUEST_RSP,
+	VM_REG_GUEST_R8,
+	VM_REG_GUEST_R9,
+	VM_REG_GUEST_R10,
+	VM_REG_GUEST_R11,
+	VM_REG_GUEST_R12,
+	VM_REG_GUEST_R13,
+	VM_REG_GUEST_R14,
+	VM_REG_GUEST_R15,
+	VM_REG_GUEST_RIP,
+	VM_REG_GUEST_RFLAGS,
+	VM_REG_GUEST_CS,
+	VM_REG_GUEST_SS,
+	VM_REG_GUEST_DS,
+	VM_REG_GUEST_ES,
+	VM_REG_GUEST_FS,
+	VM_REG_GUEST_GS
+};
+
+const int gdb_regsize[] = {
+	8,
+	8,
+	8,
+	8,
+	8,
+	8,
+	8,
+	8,
+	8,
+	8,
+	8,
+	8,
+	8,
+	8,
+	8,
+	8,
+	8,
+	4,
+	4,
+	4,
+	4,
+	4,
+	4,
+	4
+};
+
+#ifdef GDB_LOG
+#include <stdarg.h>
+#include <stdio.h>
+
+static void __printflike(1, 2)
+debug(const char *fmt, ...)
+{
+	static FILE *logfile;
+	va_list ap;
+
+	if (logfile == NULL) {
+		logfile = fopen("/tmp/bhyve_gdb.log", "w");
+		if (logfile == NULL)
+			return;
+#ifndef WITHOUT_CAPSICUM
+		if (caph_limit_stream(fileno(logfile), CAPH_WRITE) == -1) {
+			fclose(logfile);
+			logfile = NULL;
+			return;
+		}
+#endif
+		setlinebuf(logfile);
+	}
+	va_start(ap, fmt);
+	vfprintf(logfile, fmt, ap);
+	va_end(ap);
+}
+#else
+#define debug(...)
+#endif
+
+static int
+guest_paging_info(int vcpu, struct vm_guest_paging *paging)
+{
+	uint64_t regs[4];
+	const int regset[4] = {
+		VM_REG_GUEST_CR0,
+		VM_REG_GUEST_CR3,
+		VM_REG_GUEST_CR4,
+		VM_REG_GUEST_EFER
+	};
+
+	if (vm_get_register_set(ctx, vcpu, nitems(regset), regset, regs) == -1)
+		return (-1);
+
+	/*
+	 * For the debugger, always pretend to be the kernel (CPL 0),
+	 * and if long-mode is enabled, always parse addresses as if
+	 * in 64-bit mode.
+	 */
+	paging->cr3 = regs[1];
+	paging->cpl = 0;
+	if (regs[3] & EFER_LMA)
+		paging->cpu_mode = CPU_MODE_64BIT;
+	else if (regs[0] & CR0_PE)
+		paging->cpu_mode = CPU_MODE_PROTECTED;
+	else
+		paging->cpu_mode = CPU_MODE_REAL;
+	if (!(regs[0] & CR0_PG))
+		paging->paging_mode = PAGING_MODE_FLAT;
+	else if (!(regs[2] & CR4_PAE))
+		paging->paging_mode = PAGING_MODE_32;
+	else if (regs[3] & EFER_LME)
+		paging->paging_mode = PAGING_MODE_64;
+	else
+		paging->paging_mode = PAGING_MODE_PAE;
+	return (0);
+}
+
+/*
+ * Map a guest virtual address to a physical address (for a given vcpu).
+ * If a guest virtual address is valid, return 1.  If the address is
+ * not valid, return 0.  If an error occurs obtaining the mapping,
+ * return -1.
+ */
+static int
+guest_vaddr2paddr(int vcpu, uint64_t vaddr, uint64_t *paddr)
+{
+	struct vm_guest_paging paging;
+	int fault;
+
+	if (guest_paging_info(vcpu, &paging) == -1)
+		return (-1);
+
+	/*
+	 * Always use PROT_READ.  We really care if the VA is
+	 * accessible, not if the current vCPU can write.
+	 */
+	if (vm_gla2gpa_nofault(ctx, vcpu, &paging, vaddr, PROT_READ, paddr,
+	    &fault) == -1)
+		return (-1);
+	if (fault)
+		return (0);
+	return (1);
+}
+
+static void
+io_buffer_reset(struct io_buffer *io)
+{
+
+	io->start = 0;
+	io->len = 0;
+}
+
+/* Available room for adding data. */
+static size_t
+io_buffer_avail(struct io_buffer *io)
+{
+
+	return (io->capacity - (io->start + io->len));
+}
+
+static uint8_t *
+io_buffer_head(struct io_buffer *io)
+{
+
+	return (io->data + io->start);
+}
+
+static uint8_t *
+io_buffer_tail(struct io_buffer *io)
+{
+
+	return (io->data + io->start + io->len);
+}
+
+static void
+io_buffer_advance(struct io_buffer *io, size_t amount)
+{
+
+	assert(amount <= io->len);
+	io->start += amount;
+	io->len -= amount;
+}
+
+static void
+io_buffer_consume(struct io_buffer *io, size_t amount)
+{
+
+	io_buffer_advance(io, amount);
+	if (io->len == 0) {
+		io->start = 0;
+		return;
+	}
+
+	/*
+	 * XXX: Consider making this move optional and compacting on a
+	 * future read() before realloc().
+	 */
+	memmove(io->data, io_buffer_head(io), io->len);
+	io->start = 0;
+}
+
+static void
+io_buffer_grow(struct io_buffer *io, size_t newsize)
+{
+	uint8_t *new_data;
+	size_t avail, new_cap;
+
+	avail = io_buffer_avail(io);
+	if (newsize <= avail)
+		return;
+
+	new_cap = io->capacity + (newsize - avail);
+	new_data = realloc(io->data, new_cap);
+	if (new_data == NULL)
+		err(1, "Failed to grow GDB I/O buffer");
+	io->data = new_data;
+	io->capacity = new_cap;
+}
+
+static bool
+response_pending(void)
+{
+
+	if (cur_resp.start == 0 && cur_resp.len == 0)
+		return (false);
+	if (cur_resp.start + cur_resp.len == 1 && cur_resp.data[0] == '+')
+		return (false);
+	return (true);
+}
+
+static void
+close_connection(void)
+{
+
+	/*
+	 * XXX: This triggers a warning because mevent does the close
+	 * before the EV_DELETE.
+	 */
+	pthread_mutex_lock(&gdb_lock);
+	mevent_delete(write_event);
+	mevent_delete_close(read_event);
+	write_event = NULL;
+	read_event = NULL;
+	io_buffer_reset(&cur_comm);
+	io_buffer_reset(&cur_resp);
+	cur_fd = -1;
+
+	/* Resume any stopped vCPUs. */
+	gdb_resume_vcpus();
+	pthread_mutex_unlock(&gdb_lock);
+}
+
+static uint8_t
+hex_digit(uint8_t nibble)
+{
+
+	if (nibble <= 9)
+		return (nibble + '0');
+	else
+		return (nibble + 'a' - 10);
+}
+
+static uint8_t
+parse_digit(uint8_t v)
+{
+
+	if (v >= '0' && v <= '9')
+		return (v - '0');
+	if (v >= 'a' && v <= 'f')
+		return (v - 'a' + 10);
+	if (v >= 'A' && v <= 'F')
+		return (v - 'A' + 10);
+	return (0xF);
+}
+
+/* Parses big-endian hexadecimal. */
+static uintmax_t
+parse_integer(const uint8_t *p, size_t len)
+{
+	uintmax_t v;
+
+	v = 0;
+	while (len > 0) {
+		v <<= 4;
+		v |= parse_digit(*p);
+		p++;
+		len--;
+	}
+	return (v);
+}
+
+static uint8_t
+parse_byte(const uint8_t *p)
+{
+
+	return (parse_digit(p[0]) << 4 | parse_digit(p[1]));
+}
+
+static void
+send_pending_data(int fd)
+{
+	ssize_t nwritten;
+
+	if (cur_resp.len == 0) {
+		mevent_disable(write_event);
+		return;
+	}
+	nwritten = write(fd, io_buffer_head(&cur_resp), cur_resp.len);
+	if (nwritten == -1) {
+		warn("Write to GDB socket failed");
+		close_connection();
+	} else {
+		io_buffer_advance(&cur_resp, nwritten);
+		if (cur_resp.len == 0)
+			mevent_disable(write_event);
+		else
+			mevent_enable(write_event);
+	}
+}
+
+/* Append a single character to the output buffer. */
+static void
+send_char(uint8_t data)
+{
+	io_buffer_grow(&cur_resp, 1);
+	*io_buffer_tail(&cur_resp) = data;
+	cur_resp.len++;
+}
+
+/* Append an array of bytes to the output buffer. */
+static void
+send_data(const uint8_t *data, size_t len)
+{
+
+	io_buffer_grow(&cur_resp, len);
+	memcpy(io_buffer_tail(&cur_resp), data, len);
+	cur_resp.len += len;
+}
+
+static void
+format_byte(uint8_t v, uint8_t *buf)
+{
+
+	buf[0] = hex_digit(v >> 4);
+	buf[1] = hex_digit(v & 0xf);
+}
+
+/*
+ * Append a single byte (formatted as two hex characters) to the
+ * output buffer.
+ */
+static void
+send_byte(uint8_t v)
+{
+	uint8_t buf[2];
+
+	format_byte(v, buf);
+	send_data(buf, sizeof(buf));
+}
+
+static void
+start_packet(void)
+{
+
+	send_char('$');
+	cur_csum = 0;
+}
+
+static void
+finish_packet(void)
+{
+
+	send_char('#');
+	send_byte(cur_csum);
+	debug("-> %.*s\n", (int)cur_resp.len, io_buffer_head(&cur_resp));
+}
+
+/*
+ * Append a single character (for the packet payload) and update the
+ * checksum.
+ */
+static void
+append_char(uint8_t v)
+{
+
+	send_char(v);
+	cur_csum += v;
+}
+
+/*
+ * Append an array of bytes (for the packet payload) and update the
+ * checksum.
+ */
+static void
+append_packet_data(const uint8_t *data, size_t len)
+{
+
+	send_data(data, len);
+	while (len > 0) {
+		cur_csum += *data;
+		data++;
+		len--;
+	}
+}
+
+static void
+append_string(const char *str)
+{
+
+#ifdef __FreeBSD__
+	append_packet_data(str, strlen(str));
+#else
+	append_packet_data((const uint8_t *)str, strlen(str));
+#endif
+}
+
+static void
+append_byte(uint8_t v)
+{
+	uint8_t buf[2];
+
+	format_byte(v, buf);
+	append_packet_data(buf, sizeof(buf));
+}
+
+static void
+append_unsigned_native(uintmax_t value, size_t len)
+{
+	size_t i;
+
+	for (i = 0; i < len; i++) {
+		append_byte(value);
+		value >>= 8;
+	}
+}
+
+static void
+append_unsigned_be(uintmax_t value, size_t len)
+{
+	char buf[len * 2];
+	size_t i;
+
+	for (i = 0; i < len; i++) {
+#ifdef __FreeBSD__
+		format_byte(value, buf + (len - i - 1) * 2);
+#else
+		format_byte(value, (uint8_t *)(buf + (len - i - 1) * 2));
+#endif
+		value >>= 8;
+	}
+#ifdef __FreeBSD__
+	append_packet_data(buf, sizeof(buf));
+#else
+	append_packet_data((const uint8_t *)buf, sizeof(buf));
+#endif
+}
+
+static void
+append_integer(unsigned int value)
+{
+
+	if (value == 0)
+		append_char('0');
+	else
+		append_unsigned_be(value, fls(value) + 7 / 8);
+}
+
+static void
+append_asciihex(const char *str)
+{
+
+	while (*str != '\0') {
+		append_byte(*str);
+		str++;
+	}
+}
+
+static void
+send_empty_response(void)
+{
+
+	start_packet();
+	finish_packet();
+}
+
+static void
+send_error(int error)
+{
+
+	start_packet();
+	append_char('E');
+	append_byte(error);
+	finish_packet();
+}
+
+static void
+send_ok(void)
+{
+
+	start_packet();
+	append_string("OK");
+	finish_packet();
+}
+
+static int
+parse_threadid(const uint8_t *data, size_t len)
+{
+
+	if (len == 1 && *data == '0')
+		return (0);
+	if (len == 2 && memcmp(data, "-1", 2) == 0)
+		return (-1);
+	if (len == 0)
+		return (-2);
+	return (parse_integer(data, len));
+}
+
+static void
+report_stop(void)
+{
+
+	start_packet();
+	if (stopped_vcpu == -1)
+		append_char('S');
+	else
+		append_char('T');
+	append_byte(GDB_SIGNAL_TRAP);
+	if (stopped_vcpu != -1) {
+		append_string("thread:");
+		append_integer(stopped_vcpu + 1);
+		append_char(';');
+	}
+	stopped_vcpu = -1;
+	finish_packet();
+}
+
+static void
+gdb_finish_suspend_vcpus(void)
+{
+
+	if (first_stop) {
+		first_stop = false;
+		stopped_vcpu = -1;
+	} else if (response_pending())
+		stop_pending = true;
+	else {
+		report_stop();
+		send_pending_data(cur_fd);
+	}
+}
+
+static void
+_gdb_cpu_suspend(int vcpu, bool report_stop)
+{
+
+	debug("$vCPU %d suspending\n", vcpu);
+	CPU_SET(vcpu, &vcpus_waiting);
+	if (report_stop && CPU_CMP(&vcpus_waiting, &vcpus_suspended) == 0)
+		gdb_finish_suspend_vcpus();
+	while (CPU_ISSET(vcpu, &vcpus_suspended) && vcpu != stepping_vcpu)
+		pthread_cond_wait(&idle_vcpus, &gdb_lock);
+	CPU_CLR(vcpu, &vcpus_waiting);
+	debug("$vCPU %d resuming\n", vcpu);
+}
+
+void
+gdb_cpu_add(int vcpu)
+{
+
+	debug("$vCPU %d starting\n", vcpu);
+	pthread_mutex_lock(&gdb_lock);
+	CPU_SET(vcpu, &vcpus_active);
+
+	/*
+	 * If a vcpu is added while vcpus are stopped, suspend the new
+	 * vcpu so that it will pop back out with a debug exit before
+	 * executing the first instruction.
+	 */
+	if (!CPU_EMPTY(&vcpus_suspended)) {
+		CPU_SET(vcpu, &vcpus_suspended);
+		_gdb_cpu_suspend(vcpu, false);
+	}
+	pthread_mutex_unlock(&gdb_lock);
+}
+
+void
+gdb_cpu_suspend(int vcpu)
+{
+
+	pthread_mutex_lock(&gdb_lock);
+	_gdb_cpu_suspend(vcpu, true);
+	pthread_mutex_unlock(&gdb_lock);
+}
+
+void
+gdb_cpu_mtrap(int vcpu)
+{
+
+	debug("$vCPU %d MTRAP\n", vcpu);
+	pthread_mutex_lock(&gdb_lock);
+	if (vcpu == stepping_vcpu) {
+		stepping_vcpu = -1;
+		vm_set_capability(ctx, vcpu, VM_CAP_MTRAP_EXIT, 0);
+		vm_suspend_cpu(ctx, vcpu);
+		assert(stopped_vcpu == -1);
+		stopped_vcpu = vcpu;
+		_gdb_cpu_suspend(vcpu, true);
+	}
+	pthread_mutex_unlock(&gdb_lock);
+}
+
+static void
+gdb_suspend_vcpus(void)
+{
+
+	assert(pthread_mutex_isowned_np(&gdb_lock));
+	debug("suspending all CPUs\n");
+	vcpus_suspended = vcpus_active;
+	vm_suspend_cpu(ctx, -1);
+	if (CPU_CMP(&vcpus_waiting, &vcpus_suspended) == 0)
+		gdb_finish_suspend_vcpus();
+}
+
+static bool
+gdb_step_vcpu(int vcpu)
+{
+	int error, val;
+
+	debug("$vCPU %d step\n", vcpu);
+	error = vm_get_capability(ctx, vcpu, VM_CAP_MTRAP_EXIT, &val);
+	if (error < 0)
+		return (false);
+	error = vm_set_capability(ctx, vcpu, VM_CAP_MTRAP_EXIT, 1);
+	vm_resume_cpu(ctx, vcpu);
+	stepping_vcpu = vcpu;
+	pthread_cond_broadcast(&idle_vcpus);
+	return (true);
+}
+
+static void
+gdb_resume_vcpus(void)
+{
+
+	assert(pthread_mutex_isowned_np(&gdb_lock));
+	vm_resume_cpu(ctx, -1);
+	debug("resuming all CPUs\n");
+	CPU_ZERO(&vcpus_suspended);
+	pthread_cond_broadcast(&idle_vcpus);
+}
+
+static void
+gdb_read_regs(void)
+{
+	uint64_t regvals[nitems(gdb_regset)];
+	int i;
+
+	if (vm_get_register_set(ctx, cur_vcpu, nitems(gdb_regset),
+	    gdb_regset, regvals) == -1) {
+		send_error(errno);
+		return;
+	}
+	start_packet();
+	for (i = 0; i < nitems(regvals); i++)
+		append_unsigned_native(regvals[i], gdb_regsize[i]);
+	finish_packet();
+}
+
+static void
+gdb_read_mem(const uint8_t *data, size_t len)
+{
+	uint64_t gpa, gva, val;
+	uint8_t *cp;
+	size_t resid, todo, bytes;
+	bool started;
+	int error;
+
+	cp = memchr(data, ',', len);
+	if (cp == NULL) {
+		send_error(EINVAL);
+		return;
+	}
+	gva = parse_integer(data + 1, cp - (data + 1));
+	resid = parse_integer(cp + 1, len - (cp + 1 - data));
+	started = false;
+
+	while (resid > 0) {
+		error = guest_vaddr2paddr(cur_vcpu, gva, &gpa);
+		if (error == -1) {
+			if (started)
+				finish_packet();
+			else
+				send_error(errno);
+			return;
+		}
+		if (error == 0) {
+			if (started)
+				finish_packet();
+			else
+				send_error(EFAULT);
+			return;
+		}
+
+		/* Read bytes from current page. */
+		todo = getpagesize() - gpa % getpagesize();
+		if (todo > resid)
+			todo = resid;
+
+		cp = paddr_guest2host(ctx, gpa, todo);
+		if (cp != NULL) {
+			/*
+			 * If this page is guest RAM, read it a byte
+			 * at a time.
+			 */
+			if (!started) {
+				start_packet();
+				started = true;
+			}
+			while (todo > 0) {
+				append_byte(*cp);
+				cp++;
+				gpa++;
+				gva++;
+				resid--;
+				todo--;
+			}
+		} else {
+			/*
+			 * If this page isn't guest RAM, try to handle
+			 * it via MMIO.  For MMIO requests, use
+			 * aligned reads of words when possible.
+			 */
+			while (todo > 0) {
+				if (gpa & 1 || todo == 1)
+					bytes = 1;
+				else if (gpa & 2 || todo == 2)
+					bytes = 2;
+				else
+					bytes = 4;
+				error = read_mem(ctx, cur_vcpu, gpa, &val,
+				    bytes);
+				if (error == 0) {
+					if (!started) {
+						start_packet();
+						started = true;
+					}
+					gpa += bytes;
+					gva += bytes;
+					resid -= bytes;
+					todo -= bytes;
+					while (bytes > 0) {
+						append_byte(val);
+						val >>= 8;
+						bytes--;
+					}
+				} else {
+					if (started)
+						finish_packet();
+					else
+						send_error(EFAULT);
+					return;
+				}
+			}
+		}
+		assert(resid == 0 || gpa % getpagesize() == 0);
+	}
+	if (!started)
+		start_packet();
+	finish_packet();
+}
+
+static bool
+command_equals(const uint8_t *data, size_t len, const char *cmd)
+{
+
+	if (strlen(cmd) > len)
+		return (false);
+	return (memcmp(data, cmd, strlen(cmd)) == 0);
+}
+
+static void
+gdb_query(const uint8_t *data, size_t len)
+{
+
+	/*
+	 * TODO:
+	 * - qSearch
+	 * - qSupported
+	 */
+	if (command_equals(data, len, "qAttached")) {
+		start_packet();
+		append_char('1');
+		finish_packet();
+	} else if (command_equals(data, len, "qC")) {
+		start_packet();
+		append_string("QC");
+		append_integer(cur_vcpu + 1);
+		finish_packet();
+	} else if (command_equals(data, len, "qfThreadInfo")) {
+		cpuset_t mask;
+		bool first;
+		int vcpu;
+
+		if (CPU_EMPTY(&vcpus_active)) {
+			send_error(EINVAL);
+			return;
+		}
+		mask = vcpus_active;
+		start_packet();
+		append_char('m');
+		first = true;
+		while (!CPU_EMPTY(&mask)) {
+			vcpu = CPU_FFS(&mask) - 1;
+			CPU_CLR(vcpu, &mask);
+			if (first)
+				first = false;
+			else
+				append_char(',');
+			append_integer(vcpu + 1);
+		}
+		finish_packet();
+	} else if (command_equals(data, len, "qsThreadInfo")) {
+		start_packet();
+		append_char('l');
+		finish_packet();
+	} else if (command_equals(data, len, "qThreadExtraInfo")) {
+		char buf[16];
+		int tid;
+
+		data += strlen("qThreadExtraInfo");
+		len -= strlen("qThreadExtraInfo");
+		if (*data != ',') {
+			send_error(EINVAL);
+			return;
+		}
+		tid = parse_threadid(data + 1, len - 1);
+		if (tid <= 0 || !CPU_ISSET(tid - 1, &vcpus_active)) {
+			send_error(EINVAL);
+			return;
+		}
+
+		snprintf(buf, sizeof(buf), "vCPU %d", tid - 1);
+		start_packet();
+		append_asciihex(buf);
+		finish_packet();
+	} else
+		send_empty_response();
+}
+
+static void
+handle_command(const uint8_t *data, size_t len)
+{
+
+	/* Reject packets with a sequence-id. */
+	if (len >= 3 && data[0] >= '0' && data[0] <= '9' &&
+	    data[0] >= '0' && data[0] <= '9' && data[2] == ':') {
+		send_empty_response();
+		return;
+	}
+
+	switch (*data) {
+	case 'c':
+		if (len != 1) {
+			send_error(EINVAL);
+			break;
+		}
+
+		/* Don't send a reply until a stop occurs. */
+		gdb_resume_vcpus();
+		break;
+	case 'D':
+		send_ok();
+
+		/* TODO: Resume any stopped CPUs. */
+		break;
+	case 'g': {
+		gdb_read_regs();
+		break;
+	}
+	case 'H': {
+		int tid;
+
+		if (data[1] != 'g' && data[1] != 'c') {
+			send_error(EINVAL);
+			break;
+		}
+		tid = parse_threadid(data + 2, len - 2);
+		if (tid == -2) {
+			send_error(EINVAL);
+			break;
+		}
+
+		if (CPU_EMPTY(&vcpus_active)) {
+			send_error(EINVAL);
+			break;
+		}
+		if (tid == -1 || tid == 0)
+			cur_vcpu = CPU_FFS(&vcpus_active) - 1;
+		else if (CPU_ISSET(tid - 1, &vcpus_active))
+			cur_vcpu = tid - 1;
+		else {
+			send_error(EINVAL);
+			break;
+		}
+		send_ok();
+		break;
+	}
+	case 'm':
+		gdb_read_mem(data, len);
+		break;
+	case 'T': {
+		int tid;
+
+		tid = parse_threadid(data + 1, len - 1);
+		if (tid <= 0 || !CPU_ISSET(tid - 1, &vcpus_active)) {
+			send_error(EINVAL);
+			return;
+		}
+		send_ok();
+		break;
+	}
+	case 'q':
+		gdb_query(data, len);
+		break;
+	case 's':
+		if (len != 1) {
+			send_error(EINVAL);
+			break;
+		}
+
+		/* Don't send a reply until a stop occurs. */
+		if (!gdb_step_vcpu(cur_vcpu)) {
+			send_error(EOPNOTSUPP);
+			break;
+		}
+		break;
+	case '?':
+		/* XXX: Only if stopped? */
+		/* For now, just report that we are always stopped. */
+		start_packet();
+		append_char('S');
+		append_byte(GDB_SIGNAL_TRAP);
+		finish_packet();
+		break;
+	case 'G': /* TODO */
+	case 'M': /* TODO */
+	case 'v':
+		/* Handle 'vCont' */
+		/* 'vCtrlC' */
+	case 'p': /* TODO */
+	case 'P': /* TODO */
+	case 'Q': /* TODO */
+	case 't': /* TODO */
+	case 'X': /* TODO */
+	case 'z': /* TODO */
+	case 'Z': /* TODO */
+	default:
+		send_empty_response();
+	}
+}
+
+/* Check for a valid packet in the command buffer. */
+static void
+check_command(int fd)
+{
+	uint8_t *head, *hash, *p, sum;
+	size_t avail, plen;
+
+	for (;;) {
+		avail = cur_comm.len;
+		if (avail == 0)
+			return;
+		head = io_buffer_head(&cur_comm);
+		switch (*head) {
+		case 0x03:
+			debug("<- Ctrl-C\n");
+			io_buffer_consume(&cur_comm, 1);
+
+			gdb_suspend_vcpus();
+			break;
+		case '+':
+			/* ACK of previous response. */
+			debug("<- +\n");
+			if (response_pending())
+				io_buffer_reset(&cur_resp);
+			io_buffer_consume(&cur_comm, 1);
+			if (stop_pending) {
+				stop_pending = false;
+				report_stop();
+				send_pending_data(fd);
+			}
+			break;
+		case '-':
+			/* NACK of previous response. */
+			debug("<- -\n");
+			if (response_pending()) {
+				cur_resp.len += cur_resp.start;
+				cur_resp.start = 0;
+				if (cur_resp.data[0] == '+')
+					io_buffer_advance(&cur_resp, 1);
+				debug("-> %.*s\n", (int)cur_resp.len,
+				    io_buffer_head(&cur_resp));
+			}
+			io_buffer_consume(&cur_comm, 1);
+			send_pending_data(fd);
+			break;
+		case '$':
+			/* Packet. */
+
+			if (response_pending()) {
+				warnx("New GDB command while response in "
+				    "progress");
+				io_buffer_reset(&cur_resp);
+			}
+
+			/* Is packet complete? */
+			hash = memchr(head, '#', avail);
+			if (hash == NULL)
+				return;
+			plen = (hash - head + 1) + 2;
+			if (avail < plen)
+				return;
+			debug("<- %.*s\n", (int)plen, head);
+
+			/* Verify checksum. */
+			for (sum = 0, p = head + 1; p < hash; p++)
+				sum += *p;
+			if (sum != parse_byte(hash + 1)) {
+				io_buffer_consume(&cur_comm, plen);
+				debug("-> -\n");
+				send_char('-');
+				send_pending_data(fd);
+				break;
+			}
+			send_char('+');
+
+			handle_command(head + 1, hash - (head + 1));
+			io_buffer_consume(&cur_comm, plen);
+			if (!response_pending()) {
+				debug("-> +\n");
+			}
+			send_pending_data(fd);
+			break;
+		default:
+			/* XXX: Possibly drop connection instead. */
+			debug("-> %02x\n", *head);
+			io_buffer_consume(&cur_comm, 1);
+			break;
+		}
+	}
+}
+
+static void
+gdb_readable(int fd, enum ev_type event, void *arg)
+{
+	ssize_t nread;
+	int pending;
+
+	if (ioctl(fd, FIONREAD, &pending) == -1) {
+		warn("FIONREAD on GDB socket");
+		return;
+	}
+
+	/*
+	 * 'pending' might be zero due to EOF.  We need to call read
+	 * with a non-zero length to detect EOF.
+	 */
+	if (pending == 0)
+		pending = 1;
+
+	/* Ensure there is room in the command buffer. */
+	io_buffer_grow(&cur_comm, pending);
+	assert(io_buffer_avail(&cur_comm) >= pending);
+
+	nread = read(fd, io_buffer_tail(&cur_comm), io_buffer_avail(&cur_comm));
+	if (nread == 0) {
+		close_connection();
+	} else if (nread == -1) {
+		if (errno == EAGAIN)
+			return;
+
+		warn("Read from GDB socket");
+		close_connection();
+	} else {
+		cur_comm.len += nread;
+		pthread_mutex_lock(&gdb_lock);
+		check_command(fd);
+		pthread_mutex_unlock(&gdb_lock);
+	}
+}
+
+static void
+gdb_writable(int fd, enum ev_type event, void *arg)
+{
+
+	send_pending_data(fd);
+}
+
+static void
+new_connection(int fd, enum ev_type event, void *arg)
+{
+	int optval, s;
+
+	s = accept4(fd, NULL, NULL, SOCK_NONBLOCK);
+	if (s == -1) {
+		if (arg != NULL)
+			err(1, "Failed accepting initial GDB connection");
+
+		/* Silently ignore errors post-startup. */
+		return;
+	}
+
+	optval = 1;
+	if (setsockopt(s, SOL_SOCKET, SO_NOSIGPIPE, &optval, sizeof(optval)) ==
+	    -1) {
+		warn("Failed to disable SIGPIPE for GDB connection");
+		close(s);
+		return;
+	}
+
+	pthread_mutex_lock(&gdb_lock);
+	if (cur_fd != -1) {
+		close(s);
+		warnx("Ignoring additional GDB connection.");
+	}
+
+	read_event = mevent_add(s, EVF_READ, gdb_readable, NULL);
+	if (read_event == NULL) {
+		if (arg != NULL)
+			err(1, "Failed to setup initial GDB connection");
+		pthread_mutex_unlock(&gdb_lock);
+		return;
+	}
+	write_event = mevent_add(s, EVF_WRITE, gdb_writable, NULL);
+	if (write_event == NULL) {
+		if (arg != NULL)
+			err(1, "Failed to setup initial GDB connection");
+		mevent_delete_close(read_event);
+		read_event = NULL;
+	}
+
+	cur_fd = s;
+	cur_vcpu = 0;
+	stepping_vcpu = -1;
+	stopped_vcpu = -1;
+	stop_pending = false;
+
+	/* Break on attach. */
+	first_stop = true;
+	gdb_suspend_vcpus();
+	pthread_mutex_unlock(&gdb_lock);
+}
+
+#ifndef WITHOUT_CAPSICUM
+void
+limit_gdb_socket(int s)
+{
+	cap_rights_t rights;
+	unsigned long ioctls[] = { FIONREAD };
+
+	cap_rights_init(&rights, CAP_ACCEPT, CAP_EVENT, CAP_READ, CAP_WRITE,
+	    CAP_SETSOCKOPT, CAP_IOCTL);
+	if (cap_rights_limit(s, &rights) == -1 && errno != ENOSYS)
+		errx(EX_OSERR, "Unable to apply rights for sandbox");
+	if (cap_ioctls_limit(s, ioctls, nitems(ioctls)) == -1 && errno != ENOSYS)
+		errx(EX_OSERR, "Unable to apply rights for sandbox");
+}
+#endif
+
+void
+init_gdb(struct vmctx *_ctx, int sport, bool wait)
+{
+	struct sockaddr_in sin;
+	int error, flags, s;
+
+	debug("==> starting on %d, %swaiting\n", sport, wait ? "" : "not ");
+
+	error = pthread_mutex_init(&gdb_lock, NULL);
+	if (error != 0)
+		errc(1, error, "gdb mutex init");
+	error = pthread_cond_init(&idle_vcpus, NULL);
+	if (error != 0)
+		errc(1, error, "gdb cv init");
+
+	ctx = _ctx;
+	s = socket(PF_INET, SOCK_STREAM, 0);
+	if (s < 0)
+		err(1, "gdb socket create");
+
+#ifdef __FreeBSD__
+	sin.sin_len = sizeof(sin);
+#endif
+	sin.sin_family = AF_INET;
+	sin.sin_addr.s_addr = htonl(INADDR_ANY);
+	sin.sin_port = htons(sport);
+
+	if (bind(s, (struct sockaddr *)&sin, sizeof(sin)) < 0)
+		err(1, "gdb socket bind");
+
+	if (listen(s, 1) < 0)
+		err(1, "gdb socket listen");
+
+	if (wait) {
+		/*
+		 * Set vcpu 0 in vcpus_suspended.  This will trigger the
+		 * logic in gdb_cpu_add() to suspend the first vcpu before
+		 * it starts execution.  The vcpu will remain suspended
+		 * until a debugger connects.
+		 */
+		stepping_vcpu = -1;
+		stopped_vcpu = -1;
+		CPU_SET(0, &vcpus_suspended);
+	}
+
+	flags = fcntl(s, F_GETFL);
+	if (fcntl(s, F_SETFL, flags | O_NONBLOCK) == -1)
+		err(1, "Failed to mark gdb socket non-blocking");
+
+#ifndef WITHOUT_CAPSICUM
+	limit_gdb_socket(s);
+#endif
+	mevent_add(s, EVF_READ, new_connection, NULL);
+}
diff --git a/usr/src/cmd/bhyve/gdb.h b/usr/src/cmd/bhyve/gdb.h
new file mode 100644
index 0000000000..fa2184df16
--- /dev/null
+++ b/usr/src/cmd/bhyve/gdb.h
@@ -0,0 +1,39 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2017 John H. Baldwin <jhb@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef __GDB_H__
+#define	__GDB_H__
+
+void	gdb_cpu_add(int vcpu);
+void	gdb_cpu_mtrap(int vcpu);
+void	gdb_cpu_suspend(int vcpu);
+void	init_gdb(struct vmctx *ctx, int sport, bool wait);
+
+#endif /* !__GDB_H__ */
diff --git a/usr/src/cmd/bhyve/mem.c b/usr/src/cmd/bhyve/mem.c
index e01d617a89..105d37cf56 100644
--- a/usr/src/cmd/bhyve/mem.c
+++ b/usr/src/cmd/bhyve/mem.c
@@ -136,6 +136,9 @@ mmio_rb_dump(struct mmio_rb_tree *rbt)
 
 RB_GENERATE(mmio_rb_tree, mmio_rb_range, mr_link, mmio_rb_range_compare);
 
+typedef int (mem_cb_t)(struct vmctx *ctx, int vcpu, uint64_t gpa,
+    struct mem_range *mr, void *arg);
+
 static int
 mem_read(void *ctx, int vcpu, uint64_t gpa, uint64_t *rval, int size, void *arg)
 {
@@ -158,10 +161,9 @@ mem_write(void *ctx, int vcpu, uint64_t gpa, uint64_t wval, int size, void *arg)
 	return (error);
 }
 
-int
-emulate_mem(struct vmctx *ctx, int vcpu, uint64_t paddr, struct vie *vie,
-    struct vm_guest_paging *paging)
-
+static int
+access_memory(struct vmctx *ctx, int vcpu, uint64_t paddr, mem_cb_t *cb,
+    void *arg)
 {
 	struct mmio_rb_range *entry;
 	int err, immutable;
@@ -204,8 +206,7 @@ emulate_mem(struct vmctx *ctx, int vcpu, uint64_t paddr, struct vie *vie,
 	if (immutable)
 		pthread_rwlock_unlock(&mmio_rwlock);
 
-	err = vmm_emulate_instruction(ctx, vcpu, paddr, vie, paging,
-				      mem_read, mem_write, &entry->mr_param);
+	err = cb(ctx, vcpu, paddr, &entry->mr_param, arg);
 
 	if (!immutable)
 		pthread_rwlock_unlock(&mmio_rwlock);
@@ -213,6 +214,60 @@ emulate_mem(struct vmctx *ctx, int vcpu, uint64_t paddr, struct vie *vie,
 	return (err);
 }
 
+struct emulate_mem_args {
+	struct vie *vie;
+	struct vm_guest_paging *paging;
+};
+
+static int
+emulate_mem_cb(struct vmctx *ctx, int vcpu, uint64_t paddr, struct mem_range *mr,
+    void *arg)
+{
+	struct emulate_mem_args *ema;
+
+	ema = arg;
+	return (vmm_emulate_instruction(ctx, vcpu, paddr, ema->vie, ema->paging,
+	    mem_read, mem_write, mr));
+}
+
+int
+emulate_mem(struct vmctx *ctx, int vcpu, uint64_t paddr, struct vie *vie,
+    struct vm_guest_paging *paging)
+
+{
+	struct emulate_mem_args ema;
+
+	ema.vie = vie;
+	ema.paging = paging;
+	return (access_memory(ctx, vcpu, paddr, emulate_mem_cb, &ema));
+}
+
+struct read_mem_args {
+	uint64_t *rval;
+	int size;
+};
+
+static int
+read_mem_cb(struct vmctx *ctx, int vcpu, uint64_t paddr, struct mem_range *mr,
+    void *arg)
+{
+	struct read_mem_args *rma;
+
+	rma = arg;
+	return (mr->handler(ctx, vcpu, MEM_F_READ, paddr, rma->size,
+	    rma->rval, mr->arg1, mr->arg2));
+}
+
+int
+read_mem(struct vmctx *ctx, int vcpu, uint64_t gpa, uint64_t *rval, int size)
+{
+	struct read_mem_args rma;
+
+	rma.rval = rval;
+	rma.size = size;
+	return (access_memory(ctx, vcpu, gpa, read_mem_cb, &rma));
+}
+
 static int
 register_mem_int(struct mmio_rb_tree *rbt, struct mem_range *memp)
 {
diff --git a/usr/src/cmd/bhyve/mem.h b/usr/src/cmd/bhyve/mem.h
index f9f86fa4a0..f386d67749 100644
--- a/usr/src/cmd/bhyve/mem.h
+++ b/usr/src/cmd/bhyve/mem.h
@@ -56,6 +56,8 @@ void	init_mem(void);
 int     emulate_mem(struct vmctx *, int vcpu, uint64_t paddr, struct vie *vie,
 		    struct vm_guest_paging *paging);
 		    
+int	read_mem(struct vmctx *ctx, int vcpu, uint64_t gpa, uint64_t *rval,
+		 int size);
 int	register_mem(struct mem_range *memp);
 int	register_mem_fallback(struct mem_range *memp);
 int	unregister_mem(struct mem_range *memp);
diff --git a/usr/src/cmd/bhyve/rfb.c b/usr/src/cmd/bhyve/rfb.c
index 96712a6acc..d96b45c5da 100644
--- a/usr/src/cmd/bhyve/rfb.c
+++ b/usr/src/cmd/bhyve/rfb.c
@@ -546,16 +546,21 @@ rfb_send_screen(struct rfb_softc *rc, int cfd, int all)
 		}
 
 		for (x = 0; x < xcells; x++) {
+			if (x == (xcells - 1) && rem_x > 0)
+				cellwidth = rem_x;
+			else
+				cellwidth = PIX_PER_CELL;
+
 			if (rc->hw_crc)
 				crc_p[x] = fast_crc32(p,
-				             PIX_PER_CELL * sizeof(uint32_t),
+				             cellwidth * sizeof(uint32_t),
 				             crc_p[x]);
 			else
 				crc_p[x] = (uint32_t)crc32(crc_p[x],
 				             (Bytef *)p,
-				             PIX_PER_CELL * sizeof(uint32_t));
+				             cellwidth * sizeof(uint32_t));
 
-			p += PIX_PER_CELL;
+			p += cellwidth;
 
 			/* check for crc delta if last row in cell */
 			if ((y & PIXCELL_MASK) == PIXCELL_MASK || y == (h-1)) {
@@ -568,28 +573,6 @@ rfb_send_screen(struct rfb_softc *rc, int cfd, int all)
 				}
 			}
 		}
-
-		if (rem_x) {
-			if (rc->hw_crc)
-				crc_p[x] = fast_crc32(p,
-				                    rem_x * sizeof(uint32_t),
-				                    crc_p[x]);
-			else
-				crc_p[x] = (uint32_t)crc32(crc_p[x],
-				                    (Bytef *)p,
-				                    rem_x * sizeof(uint32_t));
-			p += rem_x;
-
-			if ((y & PIXCELL_MASK) == PIXCELL_MASK || y == (h-1)) {
-				if (orig_crc[x] != crc_p[x]) {
-					orig_crc[x] = crc_p[x];
-					crc_p[x] = 1;
-					changes++;
-				} else {
-					crc_p[x] = 0;
-				}
-			}
-		}
 	}
 
 	/* If number of changes is > THRESH percent, send the whole screen */
diff --git a/usr/src/cmd/bhyvectl/bhyvectl.c b/usr/src/cmd/bhyvectl/bhyvectl.c
index 0cebc77b05..5f8932efa8 100644
--- a/usr/src/cmd/bhyvectl/bhyvectl.c
+++ b/usr/src/cmd/bhyvectl/bhyvectl.c
@@ -208,7 +208,8 @@ usage(bool cpu_intel)
 	"       [--get-msr-bitmap]\n"
 	"       [--get-msr-bitmap-address]\n"
 	"       [--get-guest-sysenter]\n"
-	"       [--get-exit-reason]\n",
+	"       [--get-exit-reason]\n"
+	"       [--get-cpu-topology]\n",
 	progname);
 
 	if (cpu_intel) {
@@ -304,6 +305,7 @@ enum x2apic_state x2apic_state;
 static int unassign_pptdev, bus, slot, func;
 #endif
 static int run;
+static int get_cpu_topology;
 
 /*
  * VMCB specific.
@@ -1476,6 +1478,7 @@ setup_options(bool cpu_intel)
 		{ "get-active-cpus", 	NO_ARG,	&get_active_cpus, 	1 },
 		{ "get-suspended-cpus", NO_ARG,	&get_suspended_cpus, 	1 },
 		{ "get-intinfo", 	NO_ARG,	&get_intinfo,		1 },
+		{ "get-cpu-topology",	NO_ARG, &get_cpu_topology,	1 },
 	};
 
 	const struct option intel_opts[] = {
@@ -2336,6 +2339,14 @@ main(int argc, char *argv[])
 		}
 	}
 
+	if (!error && (get_cpu_topology || get_all)) {
+		uint16_t sockets, cores, threads, maxcpus;
+
+		vm_get_topology(ctx, &sockets, &cores, &threads, &maxcpus);
+		printf("cpu_topology:\tsockets=%hu, cores=%hu, threads=%hu, "
+		    "maxcpus=%hu\n", sockets, cores, threads, maxcpus);
+	}
+
 	if (!error && run) {
 		error = vm_run(ctx, vcpu, &vmexit);
 		if (error == 0)
diff --git a/usr/src/compat/freebsd/err.h b/usr/src/compat/freebsd/err.h
new file mode 100644
index 0000000000..40d144e025
--- /dev/null
+++ b/usr/src/compat/freebsd/err.h
@@ -0,0 +1,23 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2018 Joyent, Inc.
+ */
+
+#ifndef _COMPAT_FREEBSD_ERR_H_
+#define	_COMPAT_FREEBSD_ERR_H_
+
+#define	errc(code, num, ...)	err(code, __VA_ARGS__)
+
+#include_next <err.h>
+
+#endif	/* _COMPAT_FREEBSD_ERR_H_ */
diff --git a/usr/src/compat/freebsd/sys/cpuset.h b/usr/src/compat/freebsd/sys/cpuset.h
index 4328ebcc46..dadadf15b2 100644
--- a/usr/src/compat/freebsd/sys/cpuset.h
+++ b/usr/src/compat/freebsd/sys/cpuset.h
@@ -52,6 +52,7 @@ int	cpusetobj_ffs(const cpuset_t *set);
 
 #include <sys/bitmap.h>
 #include <machine/atomic.h>
+#include <machine/cpufunc.h>
 
 /* For now, assume NCPU of 256 */
 #define	CPU_SETSIZE			(256)
@@ -60,7 +61,8 @@ typedef struct {
 	ulong_t _bits[BT_BITOUL(CPU_SETSIZE)];
 } cpuset_t;
 
-static __inline int cpuset_empty(const cpuset_t *set)
+static __inline int
+cpuset_isempty(const cpuset_t *set)
 {
 	uint_t i;
 
@@ -71,9 +73,54 @@ static __inline int cpuset_empty(const cpuset_t *set)
 	return (1);
 }
 
+static __inline void
+cpuset_zero(cpuset_t *dst)
+{
+	uint_t i;
+
+	for (i = 0; i < BT_BITOUL(CPU_SETSIZE); i++) {
+		dst->_bits[i] = 0;
+	}
+}
+
+static __inline int
+cpuset_isequal(cpuset_t *s1, cpuset_t *s2)
+{
+	uint_t i;
+
+	for (i = 0; i < BT_BITOUL(CPU_SETSIZE); i++) {
+		if (s1->_bits[i] != s2->_bits[i])
+			return (0);
+	}
+	return (1);
+}
+
+static __inline uint_t
+cpusetobj_ffs(const cpuset_t *set)
+{
+	uint_t i, cbit;
 
+	cbit = 0;
+	for (i = 0; i < BT_BITOUL(CPU_SETSIZE); i++) {
+		if (set->_bits[i] != 0) {
+			cbit = ffsl(set->_bits[i]);
+			cbit += i * sizeof (set->_bits[0]);
+			break;
+		}
+	}
+	return (cbit);
+}
+
+
+#define	CPU_SET(cpu, setp)		BT_SET((setp)->_bits, cpu)
+#define	CPU_CLR(cpu, setp)		BT_CLEAR((setp)->_bits, cpu)
+#define	CPU_ZERO(setp)			cpuset_zero((setp))
+#define	CPU_CMP(set1, set2)		(cpuset_isequal(		\
+						(cpuset_t *)(set1),	\
+						(cpuset_t *)(set2)) == 0)
+#define	CPU_FFS(set)			cpusetobj_ffs(set)
 #define	CPU_ISSET(cpu, setp)		BT_TEST((setp)->_bits, cpu)
-#define	CPU_EMPTY(setp)			cpuset_empty((setp))
+#define	CPU_EMPTY(setp)			cpuset_isempty((setp))
 #define	CPU_SET_ATOMIC(cpu, setp)	\
 	atomic_set_long(&(BT_WIM((setp)->_bits, cpu)), BT_BIW(cpu))
 #define	CPU_CLR_ATOMIC(cpu, setp)	\
diff --git a/usr/src/compat/freebsd/sys/ioctl.h b/usr/src/compat/freebsd/sys/ioctl.h
index e223e1e4c7..72a46b8085 100644
--- a/usr/src/compat/freebsd/sys/ioctl.h
+++ b/usr/src/compat/freebsd/sys/ioctl.h
@@ -17,6 +17,8 @@
 #define	_COMPAT_FREEBSD_SYS_IOCTL_H_
 
 #include <sys/ioccom.h>
+/* Get BSD compatibility from the ioctl header */
+#define	BSD_COMP
 #include_next <sys/ioctl.h>
 
 #endif	/* _COMPAT_FREEBSD_SYS_IOCTL_H_ */
diff --git a/usr/src/compat/freebsd/sys/sdt.h b/usr/src/compat/freebsd/sys/sdt.h
new file mode 100644
index 0000000000..32d887c0d8
--- /dev/null
+++ b/usr/src/compat/freebsd/sys/sdt.h
@@ -0,0 +1,37 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2018 Joyent, Inc.
+ */
+
+#ifndef _COMPAT_FREEBSD_SYS_SDT_H_
+#define	_COMPAT_FREEBSD_SYS_SDT_H_
+
+/* Empty macros to cover FreeBSD's SDT linker tricks */
+
+#define	SDT_PROVIDER_DECLARE(mod)
+#define	SDT_PROVIDER_DEFINE(mod)
+
+#define	SDT_PROBE_DEFINE1(...)
+#define	SDT_PROBE_DEFINE2(...)
+#define	SDT_PROBE_DEFINE3(...)
+#define	SDT_PROBE_DEFINE4(...)
+#define	SDT_PROBE_DEFINE5(...)
+#define	SDT_PROBE1(...)
+#define	SDT_PROBE2(...)
+#define	SDT_PROBE3(...)
+#define	SDT_PROBE4(...)
+#define	SDT_PROBE5(...)
+
+#include_next <sys/sdt.h>
+
+#endif /* _COMPAT_FREEBSD_SYS_SDT_H_ */
diff --git a/usr/src/lib/libvmmapi/common/mapfile-vers b/usr/src/lib/libvmmapi/common/mapfile-vers
index 8979fac4cb..ad47407281 100644
--- a/usr/src/lib/libvmmapi/common/mapfile-vers
+++ b/usr/src/lib/libvmmapi/common/mapfile-vers
@@ -47,6 +47,7 @@ SYMBOL_VERSION ILLUMOSprivate {
 		vm_create_devmem;
 		vm_create;
 		vm_create_devmem;
+		vm_debug_cpus;
 		vm_destroy;
 		vm_destroy;
 		vm_get_capability;
@@ -66,6 +67,7 @@ SYMBOL_VERSION ILLUMOSprivate {
 		vm_get_seg_desc;
 		vm_get_stat_desc;
 		vm_get_stats;
+		vm_get_topology;
 		vm_get_x2apic_state;
 		vm_gla2gpa;
 		vm_gla2gpa_nofault;
@@ -105,12 +107,15 @@ SYMBOL_VERSION ILLUMOSprivate {
 		vm_set_memflags;
 		vm_set_register;
 		vm_set_register_set;
+		vm_set_topology;
 		vm_set_x2apic_state;
 		vm_setup_memory;
 		vm_setup_pptdev_msi;
 		vm_setup_pptdev_msix;
 		vm_suspend;
+		vm_suspend_cpu;
 		vm_suspended_cpus;
+		vm_resume_cpu;
 		vm_unassign_pptdev;
 
 	local:
diff --git a/usr/src/lib/libvmmapi/common/vmmapi.c b/usr/src/lib/libvmmapi/common/vmmapi.c
index 3cc4df93c3..de86e2b9bd 100644
--- a/usr/src/lib/libvmmapi/common/vmmapi.c
+++ b/usr/src/lib/libvmmapi/common/vmmapi.c
@@ -1545,6 +1545,13 @@ vm_suspended_cpus(struct vmctx *ctx, cpuset_t *cpus)
 }
 
 int
+vm_debug_cpus(struct vmctx *ctx, cpuset_t *cpus)
+{
+
+	return (vm_get_cpus(ctx, VM_DEBUG_CPUS, cpus));
+}
+
+int
 vm_activate_cpu(struct vmctx *ctx, int vcpu)
 {
 	struct vm_activate_cpu ac;
@@ -1557,6 +1564,30 @@ vm_activate_cpu(struct vmctx *ctx, int vcpu)
 }
 
 int
+vm_suspend_cpu(struct vmctx *ctx, int vcpu)
+{
+	struct vm_activate_cpu ac;
+	int error;
+
+	bzero(&ac, sizeof(struct vm_activate_cpu));
+	ac.vcpuid = vcpu;
+	error = ioctl(ctx->fd, VM_SUSPEND_CPU, &ac);
+	return (error);
+}
+
+int
+vm_resume_cpu(struct vmctx *ctx, int vcpu)
+{
+	struct vm_activate_cpu ac;
+	int error;
+
+	bzero(&ac, sizeof(struct vm_activate_cpu));
+	ac.vcpuid = vcpu;
+	error = ioctl(ctx->fd, VM_RESUME_CPU, &ac);
+	return (error);
+}
+
+int
 vm_get_intinfo(struct vmctx *ctx, int vcpu, uint64_t *info1, uint64_t *info2)
 {
 	struct vm_intinfo vmii;
@@ -1646,6 +1677,38 @@ vm_restart_instruction(void *arg, int vcpu)
 }
 
 int
+vm_set_topology(struct vmctx *ctx,
+    uint16_t sockets, uint16_t cores, uint16_t threads, uint16_t maxcpus)
+{
+	struct vm_cpu_topology topology;
+
+	bzero(&topology, sizeof (struct vm_cpu_topology));
+	topology.sockets = sockets;
+	topology.cores = cores;
+	topology.threads = threads;
+	topology.maxcpus = maxcpus;
+	return (ioctl(ctx->fd, VM_SET_TOPOLOGY, &topology));
+}
+
+int
+vm_get_topology(struct vmctx *ctx,
+    uint16_t *sockets, uint16_t *cores, uint16_t *threads, uint16_t *maxcpus)
+{
+	struct vm_cpu_topology topology;
+	int error;
+
+	bzero(&topology, sizeof (struct vm_cpu_topology));
+	error = ioctl(ctx->fd, VM_GET_TOPOLOGY, &topology);
+	if (error == 0) {
+		*sockets = topology.sockets;
+		*cores = topology.cores;
+		*threads = topology.threads;
+		*maxcpus = topology.maxcpus;
+	}
+	return (error);
+}
+
+int
 vm_get_device_fd(struct vmctx *ctx)
 {
 
@@ -1673,9 +1736,10 @@ vm_get_ioctls(size_t *len)
 	    VM_SET_X2APIC_STATE, VM_GET_X2APIC_STATE,
 	    VM_GET_HPET_CAPABILITIES, VM_GET_GPA_PMAP, VM_GLA2GPA,
 	    VM_GLA2GPA_NOFAULT,
-	    VM_ACTIVATE_CPU, VM_GET_CPUS, VM_SET_INTINFO, VM_GET_INTINFO,
+	    VM_ACTIVATE_CPU, VM_GET_CPUS, VM_SUSPEND_CPU, VM_RESUME_CPU,
+	    VM_SET_INTINFO, VM_GET_INTINFO,
 	    VM_RTC_WRITE, VM_RTC_READ, VM_RTC_SETTIME, VM_RTC_GETTIME,
-	    VM_RESTART_INSTRUCTION };
+	    VM_RESTART_INSTRUCTION, VM_SET_TOPOLOGY, VM_GET_TOPOLOGY };
 
 	if (len == NULL) {
 		cmds = malloc(sizeof(vm_ioctl_cmds));
diff --git a/usr/src/lib/libvmmapi/common/vmmapi.h b/usr/src/lib/libvmmapi/common/vmmapi.h
index ae8bb5d3b8..cfceafc6f4 100644
--- a/usr/src/lib/libvmmapi/common/vmmapi.h
+++ b/usr/src/lib/libvmmapi/common/vmmapi.h
@@ -245,7 +245,16 @@ int	vcpu_reset(struct vmctx *ctx, int vcpu);
 
 int	vm_active_cpus(struct vmctx *ctx, cpuset_t *cpus);
 int	vm_suspended_cpus(struct vmctx *ctx, cpuset_t *cpus);
+int	vm_debug_cpus(struct vmctx *ctx, cpuset_t *cpus);
 int	vm_activate_cpu(struct vmctx *ctx, int vcpu);
+int	vm_suspend_cpu(struct vmctx *ctx, int vcpu);
+int	vm_resume_cpu(struct vmctx *ctx, int vcpu);
+
+/* CPU topology */
+int	vm_set_topology(struct vmctx *ctx, uint16_t sockets, uint16_t cores,
+	    uint16_t threads, uint16_t maxcpus);
+int	vm_get_topology(struct vmctx *ctx, uint16_t *sockets, uint16_t *cores,
+	    uint16_t *threads, uint16_t *maxcpus);
 
 #ifdef	__FreeBSD__
 /*
diff --git a/usr/src/uts/i86pc/io/vmm/README.sync b/usr/src/uts/i86pc/io/vmm/README.sync
index d7e281f250..667f34b9de 100644
--- a/usr/src/uts/i86pc/io/vmm/README.sync
+++ b/usr/src/uts/i86pc/io/vmm/README.sync
@@ -1,15 +1,22 @@
 The bhyve kernel module and its associated userland consumers have been updated
 to the latest upstream FreeBSD sources as of:
 
-commit 32e2f94b53c0599c7c674ff88c75b289f714c7c9
+commit 0fac2150fc0f1befa5803ca010ed63a6335847ad
 Author: grehan <grehan@FreeBSD.org>
-Date:   Sun Mar 11 08:27:11 2018 +0000
+Date:   Fri May 4 01:36:49 2018 +0000
 
-    Add CR2 get/set support.
+    Allow arbitrary numbers of columns for VNC server screen resolution.
 
-    Reported/Tested by:  Fabian Freyer
-    Reviewed by:    araujo
-    Differential Revision:  https://reviews.freebsd.org/D14648
+    The prior code only allowed multiples of 32 for the
+    numbers of columns. Remove this restriction to allow
+    a forthcoming UEFI firmware update to allow arbitrary
+    x,y resolutions.
+
+    (the code for handling rows already supported non mult-32 values)
+
+    Reviewed by:    Leon Dang (original author)
     MFC after:      3 weeks
+    Differential Revision:  https://reviews.freebsd.org/D15274
+
 
-Which corresponds to SVN revision: 330764
+Which corresponds to SVN revision: 333235
diff --git a/usr/src/uts/i86pc/io/vmm/amd/svm.c b/usr/src/uts/i86pc/io/vmm/amd/svm.c
index 3b4a279627..f3ce78148b 100644
--- a/usr/src/uts/i86pc/io/vmm/amd/svm.c
+++ b/usr/src/uts/i86pc/io/vmm/amd/svm.c
@@ -2039,6 +2039,12 @@ svm_vmrun(void *arg, int vcpu, register_t rip, pmap_t pmap,
 			break;
 		}
 
+		if (vcpu_debugged(vm, vcpu)) {
+			enable_gintr();
+			vm_exit_debug(vm, vcpu, state->rip);
+			break;
+		}
+
 		svm_inj_interrupts(svm_sc, vcpu, vlapic);
 
 		/* Activate the nested pmap on 'curcpu' */
diff --git a/usr/src/uts/i86pc/io/vmm/intel/vmx.c b/usr/src/uts/i86pc/io/vmm/intel/vmx.c
index af63e0a426..94c588a5c3 100644
--- a/usr/src/uts/i86pc/io/vmm/intel/vmx.c
+++ b/usr/src/uts/i86pc/io/vmm/intel/vmx.c
@@ -224,6 +224,82 @@ SYSCTL_UINT(_hw_vmm_vmx, OID_AUTO, vpid_alloc_failed, CTLFLAG_RD,
 	    &vpid_alloc_failed, 0, NULL);
 
 /*
+ * The definitions of SDT probes for VMX.
+ */
+
+SDT_PROBE_DEFINE3(vmm, vmx, exit, entry,
+    "struct vmx *", "int", "struct vm_exit *");
+
+SDT_PROBE_DEFINE4(vmm, vmx, exit, taskswitch,
+    "struct vmx *", "int", "struct vm_exit *", "struct vm_task_switch *");
+
+SDT_PROBE_DEFINE4(vmm, vmx, exit, craccess,
+    "struct vmx *", "int", "struct vm_exit *", "uint64_t");
+
+SDT_PROBE_DEFINE4(vmm, vmx, exit, rdmsr,
+    "struct vmx *", "int", "struct vm_exit *", "uint32_t");
+
+SDT_PROBE_DEFINE5(vmm, vmx, exit, wrmsr,
+    "struct vmx *", "int", "struct vm_exit *", "uint32_t", "uint64_t");
+
+SDT_PROBE_DEFINE3(vmm, vmx, exit, halt,
+    "struct vmx *", "int", "struct vm_exit *");
+
+SDT_PROBE_DEFINE3(vmm, vmx, exit, mtrap,
+    "struct vmx *", "int", "struct vm_exit *");
+
+SDT_PROBE_DEFINE3(vmm, vmx, exit, pause,
+    "struct vmx *", "int", "struct vm_exit *");
+
+SDT_PROBE_DEFINE3(vmm, vmx, exit, intrwindow,
+    "struct vmx *", "int", "struct vm_exit *");
+
+SDT_PROBE_DEFINE4(vmm, vmx, exit, interrupt,
+    "struct vmx *", "int", "struct vm_exit *", "uint32_t");
+
+SDT_PROBE_DEFINE3(vmm, vmx, exit, nmiwindow,
+    "struct vmx *", "int", "struct vm_exit *");
+
+SDT_PROBE_DEFINE3(vmm, vmx, exit, inout,
+    "struct vmx *", "int", "struct vm_exit *");
+
+SDT_PROBE_DEFINE3(vmm, vmx, exit, cpuid,
+    "struct vmx *", "int", "struct vm_exit *");
+
+SDT_PROBE_DEFINE5(vmm, vmx, exit, exception,
+    "struct vmx *", "int", "struct vm_exit *", "uint32_t", "int");
+
+SDT_PROBE_DEFINE5(vmm, vmx, exit, nestedfault,
+    "struct vmx *", "int", "struct vm_exit *", "uint64_t", "uint64_t");
+
+SDT_PROBE_DEFINE4(vmm, vmx, exit, mmiofault,
+    "struct vmx *", "int", "struct vm_exit *", "uint64_t");
+
+SDT_PROBE_DEFINE3(vmm, vmx, exit, eoi,
+    "struct vmx *", "int", "struct vm_exit *");
+
+SDT_PROBE_DEFINE3(vmm, vmx, exit, apicaccess,
+    "struct vmx *", "int", "struct vm_exit *");
+
+SDT_PROBE_DEFINE4(vmm, vmx, exit, apicwrite,
+    "struct vmx *", "int", "struct vm_exit *", "struct vlapic *");
+
+SDT_PROBE_DEFINE3(vmm, vmx, exit, xsetbv,
+    "struct vmx *", "int", "struct vm_exit *");
+
+SDT_PROBE_DEFINE3(vmm, vmx, exit, monitor,
+    "struct vmx *", "int", "struct vm_exit *");
+
+SDT_PROBE_DEFINE3(vmm, vmx, exit, mwait,
+    "struct vmx *", "int", "struct vm_exit *");
+
+SDT_PROBE_DEFINE4(vmm, vmx, exit, unknown,
+    "struct vmx *", "int", "struct vm_exit *", "uint32_t");
+
+SDT_PROBE_DEFINE4(vmm, vmx, exit, return,
+    "struct vmx *", "int", "struct vm_exit *", "int");
+
+/*
  * Use the last page below 4GB as the APIC access address. This address is
  * occupied by the boot firmware so it is guaranteed that it will not conflict
  * with a page in system memory.
@@ -2302,6 +2378,7 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit)
 	vmexit->exitcode = VM_EXITCODE_BOGUS;
 
 	vmm_stat_incr(vmx->vm, vcpu, VMEXIT_COUNT, 1);
+	SDT_PROBE3(vmm, vmx, exit, entry, vmx, vcpu, vmexit);
 
 	/*
 	 * VM-entry failures during or after loading guest state.
@@ -2408,6 +2485,7 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit)
 			}
 		}
 		vmexit->exitcode = VM_EXITCODE_TASK_SWITCH;
+		SDT_PROBE4(vmm, vmx, exit, taskswitch, vmx, vcpu, vmexit, ts);
 		VCPU_CTR4(vmx->vm, vcpu, "task switch reason %d, tss 0x%04x, "
 		    "%s errcode 0x%016lx", ts->reason, ts->tsssel,
 		    ts->ext ? "external" : "internal",
@@ -2415,6 +2493,7 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit)
 		break;
 	case EXIT_REASON_CR_ACCESS:
 		vmm_stat_incr(vmx->vm, vcpu, VMEXIT_CR_ACCESS, 1);
+		SDT_PROBE4(vmm, vmx, exit, craccess, vmx, vcpu, vmexit, qual);
 		switch (qual & 0xf) {
 		case 0:
 			handled = vmx_emulate_cr0_access(vmx, vcpu, qual);
@@ -2432,6 +2511,7 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit)
 		retu = false;
 		ecx = vmxctx->guest_rcx;
 		VCPU_CTR1(vmx->vm, vcpu, "rdmsr 0x%08x", ecx);
+		SDT_PROBE4(vmm, vmx, exit, rdmsr, vmx, vcpu, vmexit, ecx);
 		error = emulate_rdmsr(vmx, vcpu, ecx, &retu);
 		if (error) {
 			vmexit->exitcode = VM_EXITCODE_RDMSR;
@@ -2452,6 +2532,8 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit)
 		edx = vmxctx->guest_rdx;
 		VCPU_CTR2(vmx->vm, vcpu, "wrmsr 0x%08x value 0x%016lx",
 		    ecx, (uint64_t)edx << 32 | eax);
+		SDT_PROBE5(vmm, vmx, exit, wrmsr, vmx, vmexit, vcpu, ecx,
+		    (uint64_t)edx << 32 | eax);
 		error = emulate_wrmsr(vmx, vcpu, ecx,
 		    (uint64_t)edx << 32 | eax, &retu);
 		if (error) {
@@ -2468,6 +2550,7 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit)
 		break;
 	case EXIT_REASON_HLT:
 		vmm_stat_incr(vmx->vm, vcpu, VMEXIT_HLT, 1);
+		SDT_PROBE3(vmm, vmx, exit, halt, vmx, vcpu, vmexit);
 		vmexit->exitcode = VM_EXITCODE_HLT;
 		vmexit->u.hlt.rflags = vmcs_read(VMCS_GUEST_RFLAGS);
 		if (virtual_interrupt_delivery)
@@ -2478,15 +2561,18 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit)
 		break;
 	case EXIT_REASON_MTF:
 		vmm_stat_incr(vmx->vm, vcpu, VMEXIT_MTRAP, 1);
+		SDT_PROBE3(vmm, vmx, exit, mtrap, vmx, vcpu, vmexit);
 		vmexit->exitcode = VM_EXITCODE_MTRAP;
 		vmexit->inst_length = 0;
 		break;
 	case EXIT_REASON_PAUSE:
 		vmm_stat_incr(vmx->vm, vcpu, VMEXIT_PAUSE, 1);
+		SDT_PROBE3(vmm, vmx, exit, pause, vmx, vcpu, vmexit);
 		vmexit->exitcode = VM_EXITCODE_PAUSE;
 		break;
 	case EXIT_REASON_INTR_WINDOW:
 		vmm_stat_incr(vmx->vm, vcpu, VMEXIT_INTR_WINDOW, 1);
+		SDT_PROBE3(vmm, vmx, exit, intrwindow, vmx, vcpu, vmexit);
 		vmx_clear_int_window_exiting(vmx, vcpu);
 		return (1);
 	case EXIT_REASON_EXT_INTR:
@@ -2500,6 +2586,8 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit)
 		 * this virtual interrupt during the subsequent VM enter.
 		 */
 		intr_info = vmcs_read(VMCS_EXIT_INTR_INFO);
+		SDT_PROBE4(vmm, vmx, exit, interrupt,
+		    vmx, vcpu, vmexit, intr_info);
 
 		/*
 		 * XXX: Ignore this exit if VMCS_INTR_VALID is not set.
@@ -2519,6 +2607,7 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit)
 		vmm_stat_incr(vmx->vm, vcpu, VMEXIT_EXTINT, 1);
 		return (1);
 	case EXIT_REASON_NMI_WINDOW:
+		SDT_PROBE3(vmm, vmx, exit, nmiwindow, vmx, vcpu, vmexit);
 		/* Exit to allow the pending virtual NMI to be injected */
 		if (vm_nmi_pending(vmx->vm, vcpu))
 			vmx_inject_nmi(vmx, vcpu);
@@ -2546,9 +2635,11 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit)
 			vis->addrsize = inout_str_addrsize(inst_info);
 			inout_str_seginfo(vmx, vcpu, inst_info, in, vis);
 		}
+		SDT_PROBE3(vmm, vmx, exit, inout, vmx, vcpu, vmexit);
 		break;
 	case EXIT_REASON_CPUID:
 		vmm_stat_incr(vmx->vm, vcpu, VMEXIT_CPUID, 1);
+		SDT_PROBE3(vmm, vmx, exit, cpuid, vmx, vcpu, vmexit);
 		handled = vmx_handle_cpuid(vmx->vm, vcpu, vmxctx);
 		break;
 	case EXIT_REASON_EXCEPTION:
@@ -2617,6 +2708,8 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit)
 		}
 		VCPU_CTR2(vmx->vm, vcpu, "Reflecting exception %d/%#x into "
 		    "the guest", intr_vec, errcode);
+		SDT_PROBE5(vmm, vmx, exit, exception,
+		    vmx, vcpu, vmexit, intr_vec, errcode);
 		error = vm_inject_exception(vmx->vm, vcpu, intr_vec,
 		    errcode_valid, errcode, 0);
 		KASSERT(error == 0, ("%s: vm_inject_exception error %d",
@@ -2637,9 +2730,13 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit)
 			vmexit->u.paging.gpa = gpa;
 			vmexit->u.paging.fault_type = ept_fault_type(qual);
 			vmm_stat_incr(vmx->vm, vcpu, VMEXIT_NESTED_FAULT, 1);
+			SDT_PROBE5(vmm, vmx, exit, nestedfault,
+			    vmx, vcpu, vmexit, gpa, qual);
 		} else if (ept_emulation_fault(qual)) {
 			vmexit_inst_emul(vmexit, gpa, vmcs_gla());
 			vmm_stat_incr(vmx->vm, vcpu, VMEXIT_INST_EMUL, 1);
+			SDT_PROBE4(vmm, vmx, exit, mmiofault,
+			    vmx, vcpu, vmexit, gpa);
 		}
 		/*
 		 * If Virtual NMIs control is 1 and the VM-exit is due to an
@@ -2656,9 +2753,11 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit)
 	case EXIT_REASON_VIRTUALIZED_EOI:
 		vmexit->exitcode = VM_EXITCODE_IOAPIC_EOI;
 		vmexit->u.ioapic_eoi.vector = qual & 0xFF;
+		SDT_PROBE3(vmm, vmx, exit, eoi, vmx, vcpu, vmexit);
 		vmexit->inst_length = 0;	/* trap-like */
 		break;
 	case EXIT_REASON_APIC_ACCESS:
+		SDT_PROBE3(vmm, vmx, exit, apicaccess, vmx, vcpu, vmexit);
 		handled = vmx_handle_apic_access(vmx, vcpu, vmexit);
 		break;
 	case EXIT_REASON_APIC_WRITE:
@@ -2668,18 +2767,25 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit)
 		 */
 		vmexit->inst_length = 0;
 		vlapic = vm_lapic(vmx->vm, vcpu);
+		SDT_PROBE4(vmm, vmx, exit, apicwrite,
+		    vmx, vcpu, vmexit, vlapic);
 		handled = vmx_handle_apic_write(vmx, vcpu, vlapic, qual);
 		break;
 	case EXIT_REASON_XSETBV:
+		SDT_PROBE3(vmm, vmx, exit, xsetbv, vmx, vcpu, vmexit);
 		handled = vmx_emulate_xsetbv(vmx, vcpu, vmexit);
 		break;
 	case EXIT_REASON_MONITOR:
+		SDT_PROBE3(vmm, vmx, exit, monitor, vmx, vcpu, vmexit);
 		vmexit->exitcode = VM_EXITCODE_MONITOR;
 		break;
 	case EXIT_REASON_MWAIT:
+		SDT_PROBE3(vmm, vmx, exit, mwait, vmx, vcpu, vmexit);
 		vmexit->exitcode = VM_EXITCODE_MWAIT;
 		break;
 	default:
+		SDT_PROBE4(vmm, vmx, exit, unknown,
+		    vmx, vcpu, vmexit, reason);
 		vmm_stat_incr(vmx->vm, vcpu, VMEXIT_UNKNOWN, 1);
 		break;
 	}
@@ -2715,6 +2821,9 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit)
 			 */
 		}
 	}
+
+	SDT_PROBE4(vmm, vmx, exit, return,
+	    vmx, vcpu, vmexit, handled);
 	return (handled);
 }
 
@@ -2951,6 +3060,12 @@ vmx_run(void *arg, int vcpu, register_t rip, pmap_t pmap,
 			break;
 		}
 
+		if (vcpu_debugged(vm, vcpu)) {
+			enable_intr();
+			vm_exit_debug(vmx->vm, vcpu, rip);
+			break;
+		}
+
 		vmx_run_trace(vmx, vcpu);
 		vmx_dr_enter_guest(vmxctx);
 		rc = vmx_enter_guest(vmxctx, vmx, launched);
diff --git a/usr/src/uts/i86pc/io/vmm/io/ppt.c b/usr/src/uts/i86pc/io/vmm/io/ppt.c
index dabbe584fd..89e9f35c10 100644
--- a/usr/src/uts/i86pc/io/vmm/io/ppt.c
+++ b/usr/src/uts/i86pc/io/vmm/io/ppt.c
@@ -817,6 +817,10 @@ ppt_flr(dev_info_t *dip, boolean_t force)
 	return (B_TRUE);
 
 fail:
+	/*
+	 * TODO: If the FLR fails for some reason, we should attempt a reset
+	 * using the PCI power management facilities (if possible).
+	 */
 	pci_config_teardown(&hdl);
 	return (B_FALSE);
 }
diff --git a/usr/src/uts/i86pc/io/vmm/vmm.c b/usr/src/uts/i86pc/io/vmm/vmm.c
index 050d8e752c..c4988c6d72 100644
--- a/usr/src/uts/i86pc/io/vmm/vmm.c
+++ b/usr/src/uts/i86pc/io/vmm/vmm.c
@@ -174,6 +174,7 @@ struct vm {
 	struct vpmtmr	*vpmtmr;		/* (i) virtual ACPI PM timer */
 	struct vrtc	*vrtc;			/* (o) virtual RTC */
 	volatile cpuset_t active_cpus;		/* (i) active vcpus */
+	volatile cpuset_t debug_cpus;		/* (i) vcpus stopped for debug */
 	int		suspend;		/* (i) stop VM execution */
 	volatile cpuset_t suspended_cpus; 	/* (i) suspended vcpus */
 	volatile cpuset_t halted_cpus;		/* (x) cpus in a hard halt */
@@ -190,6 +191,11 @@ struct vm {
 	struct vmspace	*vmspace;		/* (o) guest's address space */
 	char		name[VM_MAX_NAMELEN];	/* (o) virtual machine name */
 	struct vcpu	vcpu[VM_MAXCPU];	/* (i) guest vcpus */
+	/* The following describe the vm cpu topology */
+	uint16_t	sockets;		/* (o) num of sockets */
+	uint16_t	cores;			/* (o) num of cores/socket */
+	uint16_t	threads;		/* (o) num of threads/core */
+	uint16_t	maxcpus;		/* (o) max pluggable cpus */
 #ifndef __FreeBSD__
 	krwlock_t	ioport_rwlock;
 	list_t		ioport_hooks;
@@ -231,6 +237,8 @@ static struct vmm_ops *ops;
 #define	fpu_start_emulating()	load_cr0(rcr0() | CR0_TS)
 #define	fpu_stop_emulating()	clts()
 
+SDT_PROVIDER_DEFINE(vmm);
+
 static MALLOC_DEFINE(M_VM, "vm", "vm");
 
 /* statistics */
@@ -520,6 +528,7 @@ vm_init(struct vm *vm, bool create)
 #endif /* __FreeBSD__ */
 
 	CPU_ZERO(&vm->active_cpus);
+	CPU_ZERO(&vm->debug_cpus);
 
 	vm->suspend = 0;
 	CPU_ZERO(&vm->suspended_cpus);
@@ -528,6 +537,12 @@ vm_init(struct vm *vm, bool create)
 		vcpu_init(vm, i, create);
 }
 
+/*
+ * The default CPU topology is a single thread per package.
+ */
+u_int cores_per_package = 1;
+u_int threads_per_core = 1;
+
 int
 vm_create(const char *name, struct vm **retvm)
 {
@@ -553,12 +568,43 @@ vm_create(const char *name, struct vm **retvm)
 	vm->vmspace = vmspace;
 	mtx_init(&vm->rendezvous_mtx, "vm rendezvous lock", 0, MTX_DEF);
 
+	vm->sockets = 1;
+	vm->cores = cores_per_package;	/* XXX backwards compatibility */
+	vm->threads = threads_per_core;	/* XXX backwards compatibility */
+	vm->maxcpus = 0;		/* XXX not implemented */
+
 	vm_init(vm, true);
 
 	*retvm = vm;
 	return (0);
 }
 
+void
+vm_get_topology(struct vm *vm, uint16_t *sockets, uint16_t *cores,
+    uint16_t *threads, uint16_t *maxcpus)
+{
+	*sockets = vm->sockets;
+	*cores = vm->cores;
+	*threads = vm->threads;
+	*maxcpus = vm->maxcpus;
+}
+
+int
+vm_set_topology(struct vm *vm, uint16_t sockets, uint16_t cores,
+    uint16_t threads, uint16_t maxcpus)
+{
+	if (maxcpus != 0)
+		return (EINVAL);	/* XXX remove when supported */
+	if ((sockets * cores * threads) > VM_MAXCPU)
+		return (EINVAL);
+	/* XXX need to check sockets * cores * threads == vCPU, how? */
+	vm->sockets = sockets;
+	vm->cores = cores;
+	vm->threads = threads;
+	vm->maxcpus = maxcpus;
+	return(0);
+}
+
 static void
 vm_cleanup(struct vm *vm, bool destroy)
 {
@@ -1435,6 +1481,9 @@ vm_handle_hlt(struct vm *vm, int vcpuid, bool intr_disabled, bool *retu)
 		if (vcpu_should_yield(vm, vcpuid))
 			break;
 
+		if (vcpu_debugged(vm, vcpuid))
+			break;
+
 		/*
 		 * Some Linux guests implement "halt" by having all vcpus
 		 * execute HLT with interrupts disabled. 'halted_cpus' keeps
@@ -1718,6 +1767,17 @@ vm_exit_suspended(struct vm *vm, int vcpuid, uint64_t rip)
 }
 
 void
+vm_exit_debug(struct vm *vm, int vcpuid, uint64_t rip)
+{
+	struct vm_exit *vmexit;
+
+	vmexit = vm_exitinfo(vm, vcpuid);
+	vmexit->rip = rip;
+	vmexit->inst_length = 0;
+	vmexit->exitcode = VM_EXITCODE_DEBUG;
+}
+
+void
 vm_exit_rendezvous(struct vm *vm, int vcpuid, uint64_t rip)
 {
 	struct vm_exit *vmexit;
@@ -2609,6 +2669,55 @@ vm_activate_cpu(struct vm *vm, int vcpuid)
 	return (0);
 }
 
+int
+vm_suspend_cpu(struct vm *vm, int vcpuid)
+{
+	int i;
+
+	if (vcpuid < -1 || vcpuid >= VM_MAXCPU)
+		return (EINVAL);
+
+	if (vcpuid == -1) {
+		vm->debug_cpus = vm->active_cpus;
+		for (i = 0; i < VM_MAXCPU; i++) {
+			if (CPU_ISSET(i, &vm->active_cpus))
+				vcpu_notify_event(vm, i, false);
+		}
+	} else {
+		if (!CPU_ISSET(vcpuid, &vm->active_cpus))
+			return (EINVAL);
+
+		CPU_SET_ATOMIC(vcpuid, &vm->debug_cpus);
+		vcpu_notify_event(vm, vcpuid, false);
+	}
+	return (0);
+}
+
+int
+vm_resume_cpu(struct vm *vm, int vcpuid)
+{
+
+	if (vcpuid < -1 || vcpuid >= VM_MAXCPU)
+		return (EINVAL);
+
+	if (vcpuid == -1) {
+		CPU_ZERO(&vm->debug_cpus);
+	} else {
+		if (!CPU_ISSET(vcpuid, &vm->debug_cpus))
+			return (EINVAL);
+
+		CPU_CLR_ATOMIC(vcpuid, &vm->debug_cpus);
+	}
+	return (0);
+}
+
+int
+vcpu_debugged(struct vm *vm, int vcpuid)
+{
+
+	return (CPU_ISSET(vcpuid, &vm->debug_cpus));
+}
+
 cpuset_t
 vm_active_cpus(struct vm *vm)
 {
@@ -2617,6 +2726,13 @@ vm_active_cpus(struct vm *vm)
 }
 
 cpuset_t
+vm_debug_cpus(struct vm *vm)
+{
+
+	return (vm->debug_cpus);
+}
+
+cpuset_t
 vm_suspended_cpus(struct vm *vm)
 {
 
diff --git a/usr/src/uts/i86pc/io/vmm/vmm_host.c b/usr/src/uts/i86pc/io/vmm/vmm_host.c
index 639de087d0..9e390c93dd 100644
--- a/usr/src/uts/i86pc/io/vmm/vmm_host.c
+++ b/usr/src/uts/i86pc/io/vmm/vmm_host.c
@@ -73,7 +73,16 @@ vmm_host_state_init(void)
 	 */
 	vmm_host_cr0 = rcr0() | CR0_TS;
 
-	vmm_host_cr4 = rcr4();
+	/*
+	 * On non-PCID or PCID but without INVPCID support machines,
+	 * we flush kernel i.e. global TLB entries, by temporary
+	 * clearing the CR4.PGE bit, see invltlb_glob().  If
+	 * preemption occurs at the wrong time, cached vmm_host_cr4
+	 * might store the value with CR4.PGE cleared.  Since FreeBSD
+	 * requires support for PG_G on amd64, just set it
+	 * unconditionally.
+	 */
+	vmm_host_cr4 = rcr4() | CR4_PGE;
 
 	/*
 	 * Only permit a guest to use XSAVE if the host is using
diff --git a/usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c b/usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c
index 66d5ce3b5d..a8381a9c0a 100644
--- a/usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c
+++ b/usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c
@@ -964,6 +964,12 @@ vmmdev_do_ioctl(vmm_softc_t *sc, int cmd, intptr_t arg, int md,
 	case VM_ACTIVATE_CPU:
 		error = vm_activate_cpu(sc->vmm_vm, vcpu);
 		break;
+	case VM_SUSPEND_CPU:
+		error = vm_suspend_cpu(sc->vmm_vm, vcpu);
+		break;
+	case VM_RESUME_CPU:
+		error = vm_resume_cpu(sc->vmm_vm, vcpu);
+		break;
 
 	case VM_GET_CPUS: {
 		struct vm_cpuset vm_cpuset;
@@ -993,6 +999,8 @@ vmmdev_do_ioctl(vmm_softc_t *sc, int cmd, intptr_t arg, int md,
 			tempset = vm_active_cpus(sc->vmm_vm);
 		} else if (vm_cpuset.which == VM_SUSPENDED_CPUS) {
 			tempset = vm_suspended_cpus(sc->vmm_vm);
+		} else if (vm_cpuset.which == VM_DEBUG_CPUS) {
+			tempset = vm_debug_cpus(sc->vmm_vm);
 		} else {
 			error = EINVAL;
 		}
@@ -1080,6 +1088,29 @@ vmmdev_do_ioctl(vmm_softc_t *sc, int cmd, intptr_t arg, int md,
 		error = vm_restart_instruction(sc->vmm_vm, vcpu);
 		break;
 
+	case VM_SET_TOPOLOGY: {
+		struct vm_cpu_topology topo;
+
+		if (ddi_copyin(datap, &topo, sizeof (topo), md) != 0) {
+			error = EFAULT;
+			break;
+		}
+		error = vm_set_topology(sc->vmm_vm, topo.sockets, topo.cores,
+		    topo.threads, topo.maxcpus);
+		break;
+	}
+	case VM_GET_TOPOLOGY: {
+		struct vm_cpu_topology topo;
+
+		vm_get_topology(sc->vmm_vm, &topo.sockets, &topo.cores,
+		    &topo.threads, &topo.maxcpus);
+		if (ddi_copyout(&topo, datap, sizeof (topo), md) != 0) {
+			error = EFAULT;
+			break;
+		}
+		break;
+	}
+
 #ifndef __FreeBSD__
 	case VM_DEVMEM_GETOFFSET: {
 		struct vm_devmem_offset vdo;
diff --git a/usr/src/uts/i86pc/io/vmm/x86.c b/usr/src/uts/i86pc/io/vmm/x86.c
index 0d59c119a8..d6426bde44 100644
--- a/usr/src/uts/i86pc/io/vmm/x86.c
+++ b/usr/src/uts/i86pc/io/vmm/x86.c
@@ -73,17 +73,6 @@ static uint64_t bhyve_xcpuids;
 SYSCTL_ULONG(_hw_vmm, OID_AUTO, bhyve_xcpuids, CTLFLAG_RW, &bhyve_xcpuids, 0,
     "Number of times an unknown cpuid leaf was accessed");
 
-/*
- * The default CPU topology is a single thread per package.
- */
-static u_int threads_per_core = 1;
-SYSCTL_UINT(_hw_vmm_topology, OID_AUTO, threads_per_core, CTLFLAG_RDTUN,
-    &threads_per_core, 0, NULL);
-
-static u_int cores_per_package = 1;
-SYSCTL_UINT(_hw_vmm_topology, OID_AUTO, cores_per_package, CTLFLAG_RDTUN,
-    &cores_per_package, 0, NULL);
-
 static int cpuid_leaf_b = 1;
 SYSCTL_INT(_hw_vmm_topology, OID_AUTO, cpuid_leaf_b, CTLFLAG_RDTUN,
     &cpuid_leaf_b, 0, NULL);
@@ -106,8 +95,9 @@ x86_emulate_cpuid(struct vm *vm, int vcpu_id,
 	const struct xsave_limits *limits;
 	uint64_t cr4;
 	int error, enable_invpcid, level, width = 0, x2apic_id = 0;
-	unsigned int func, regs[4], logical_cpus;
+	unsigned int func, regs[4], logical_cpus = 0;
 	enum x2apic_state x2apic_state;
+	uint16_t cores, maxcpus, sockets, threads;
 
 	VCPU_CTR2(vm, vcpu_id, "cpuid %#x,%#x", *eax, *ecx);
 
@@ -155,11 +145,11 @@ x86_emulate_cpuid(struct vm *vm, int vcpu_id,
 				 *
 				 * However this matches the logical cpus as
 				 * advertised by leaf 0x1 and will work even
-				 * if the 'threads_per_core' tunable is set
-				 * incorrectly on an AMD host.
+				 * if threads is set incorrectly on an AMD host.
 				 */
-				logical_cpus = threads_per_core *
-				    cores_per_package;
+				vm_get_topology(vm, &sockets, &cores, &threads,
+				    &maxcpus);
+				logical_cpus = threads * cores;
 				regs[2] = logical_cpus - 1;
 			}
 			break;
@@ -321,7 +311,9 @@ x86_emulate_cpuid(struct vm *vm, int vcpu_id,
 			 */
 			regs[3] |= (CPUID_MCA | CPUID_MCE | CPUID_MTRR);
 
-			logical_cpus = threads_per_core * cores_per_package;
+			vm_get_topology(vm, &sockets, &cores, &threads,
+			    &maxcpus);
+			logical_cpus = threads * cores;
 			regs[1] &= ~CPUID_HTT_CORES;
 			regs[1] |= (logical_cpus & 0xff) << 16;
 			regs[3] |= CPUID_HTT;
@@ -331,8 +323,10 @@ x86_emulate_cpuid(struct vm *vm, int vcpu_id,
 			cpuid_count(*eax, *ecx, regs);
 
 			if (regs[0] || regs[1] || regs[2] || regs[3]) {
+				vm_get_topology(vm, &sockets, &cores, &threads,
+				    &maxcpus);
 				regs[0] &= 0x3ff;
-				regs[0] |= (cores_per_package - 1) << 26;
+				regs[0] |= (cores - 1) << 26;
 				/*
 				 * Cache topology:
 				 * - L1 and L2 are shared only by the logical
@@ -340,10 +334,10 @@ x86_emulate_cpuid(struct vm *vm, int vcpu_id,
 				 * - L3 and above are shared by all logical
 				 *   processors in the package.
 				 */
-				logical_cpus = threads_per_core;
+				logical_cpus = threads;
 				level = (regs[0] >> 5) & 0x7;
 				if (level >= 3)
-					logical_cpus *= cores_per_package;
+					logical_cpus *= cores;
 				regs[0] |= (logical_cpus - 1) << 14;
 			}
 			break;
@@ -405,16 +399,17 @@ x86_emulate_cpuid(struct vm *vm, int vcpu_id,
 			/*
 			 * Processor topology enumeration
 			 */
+			vm_get_topology(vm, &sockets, &cores, &threads,
+			    &maxcpus);
 			if (*ecx == 0) {
-				logical_cpus = threads_per_core;
+				logical_cpus = threads;
 				width = log2(logical_cpus);
 				level = CPUID_TYPE_SMT;
 				x2apic_id = vcpu_id;
 			}
 
 			if (*ecx == 1) {
-				logical_cpus = threads_per_core *
-				    cores_per_package;
+				logical_cpus = threads * cores;
 				width = log2(logical_cpus);
 				level = CPUID_TYPE_CORE;
 				x2apic_id = vcpu_id;
diff --git a/usr/src/uts/i86pc/sys/vmm.h b/usr/src/uts/i86pc/sys/vmm.h
index 43a9d36de3..dea60c5f76 100644
--- a/usr/src/uts/i86pc/sys/vmm.h
+++ b/usr/src/uts/i86pc/sys/vmm.h
@@ -44,8 +44,13 @@
 #ifndef _VMM_H_
 #define	_VMM_H_
 
+#include <sys/sdt.h>
 #include <x86/segments.h>
 
+#ifdef _KERNEL
+SDT_PROVIDER_DECLARE(vmm);
+#endif
+
 enum vm_suspend_how {
 	VM_SUSPEND_NONE,
 	VM_SUSPEND_RESET,
@@ -204,6 +209,10 @@ int vm_create(const char *name, struct vm **retvm);
 void vm_destroy(struct vm *vm);
 int vm_reinit(struct vm *vm);
 const char *vm_name(struct vm *vm);
+void vm_get_topology(struct vm *vm, uint16_t *sockets, uint16_t *cores,
+    uint16_t *threads, uint16_t *maxcpus);
+int vm_set_topology(struct vm *vm, uint16_t sockets, uint16_t cores,
+    uint16_t threads, uint16_t maxcpus);
 
 /*
  * APIs that modify the guest memory map require all vcpus to be frozen.
@@ -259,8 +268,11 @@ int vm_get_x2apic_state(struct vm *vm, int vcpu, enum x2apic_state *state);
 int vm_set_x2apic_state(struct vm *vm, int vcpu, enum x2apic_state state);
 int vm_apicid2vcpuid(struct vm *vm, int apicid);
 int vm_activate_cpu(struct vm *vm, int vcpu);
+int vm_suspend_cpu(struct vm *vm, int vcpu);
+int vm_resume_cpu(struct vm *vm, int vcpu);
 struct vm_exit *vm_exitinfo(struct vm *vm, int vcpuid);
 void vm_exit_suspended(struct vm *vm, int vcpuid, uint64_t rip);
+void vm_exit_debug(struct vm *vm, int vcpuid, uint64_t rip);
 void vm_exit_rendezvous(struct vm *vm, int vcpuid, uint64_t rip);
 void vm_exit_astpending(struct vm *vm, int vcpuid, uint64_t rip);
 void vm_exit_reqidle(struct vm *vm, int vcpuid, uint64_t rip);
@@ -284,6 +296,7 @@ typedef void (*vm_rendezvous_func_t)(struct vm *vm, int vcpuid, void *arg);
 void vm_smp_rendezvous(struct vm *vm, int vcpuid, cpuset_t dest,
     vm_rendezvous_func_t func, void *arg);
 cpuset_t vm_active_cpus(struct vm *vm);
+cpuset_t vm_debug_cpus(struct vm *vm);
 cpuset_t vm_suspended_cpus(struct vm *vm);
 #endif	/* _SYS__CPUSET_H_ */
 
@@ -308,6 +321,8 @@ vcpu_reqidle(struct vm_eventinfo *info)
 	return (*info->iptr);
 }
 
+int vcpu_debugged(struct vm *vm, int vcpuid);
+
 /*
  * Return 1 if device indicated by bus/slot/func is supposed to be a
  * pci passthrough device.
@@ -568,6 +583,7 @@ enum vm_exitcode {
 	VM_EXITCODE_MWAIT,
 	VM_EXITCODE_SVM,
 	VM_EXITCODE_REQIDLE,
+	VM_EXITCODE_DEBUG,
 	VM_EXITCODE_MAX
 };
 
diff --git a/usr/src/uts/i86pc/sys/vmm_dev.h b/usr/src/uts/i86pc/sys/vmm_dev.h
index a737ab1ad5..63ccc36dc6 100644
--- a/usr/src/uts/i86pc/sys/vmm_dev.h
+++ b/usr/src/uts/i86pc/sys/vmm_dev.h
@@ -272,6 +272,7 @@ struct vm_cpuset {
 };
 #define	VM_ACTIVE_CPUS		0
 #define	VM_SUSPENDED_CPUS	1
+#define	VM_DEBUG_CPUS		2
 
 struct vm_intinfo {
 	int		vcpuid;
@@ -295,6 +296,13 @@ struct vm_devmem_offset {
 };
 #endif
 
+struct vm_cpu_topology {
+	uint16_t	sockets;
+	uint16_t	cores;
+	uint16_t	threads;
+	uint16_t	maxcpus;
+};
+
 enum {
 	/* general routines */
 	IOCNUM_ABIVERS = 0,
@@ -354,6 +362,10 @@ enum {
 	IOCNUM_GET_X2APIC_STATE = 61,
 	IOCNUM_GET_HPET_CAPABILITIES = 62,
 
+	/* CPU Topology */
+	IOCNUM_SET_TOPOLOGY = 63,
+	IOCNUM_GET_TOPOLOGY = 64,
+
 	/* legacy interrupt injection */
 	IOCNUM_ISA_ASSERT_IRQ = 80,
 	IOCNUM_ISA_DEASSERT_IRQ = 81,
@@ -363,6 +375,8 @@ enum {
 	/* vm_cpuset */
 	IOCNUM_ACTIVATE_CPU = 90,
 	IOCNUM_GET_CPUSET = 91,
+	IOCNUM_SUSPEND_CPU = 92,
+	IOCNUM_RESUME_CPU = 93,
 
 	/* RTC */
 	IOCNUM_RTC_READ = 100,
@@ -454,6 +468,10 @@ enum {
 	_IOWR('v', IOCNUM_GET_X2APIC_STATE, struct vm_x2apic)
 #define	VM_GET_HPET_CAPABILITIES \
 	_IOR('v', IOCNUM_GET_HPET_CAPABILITIES, struct vm_hpet_cap)
+#define VM_SET_TOPOLOGY \
+	_IOW('v', IOCNUM_SET_TOPOLOGY, struct vm_cpu_topology)
+#define VM_GET_TOPOLOGY \
+	_IOR('v', IOCNUM_GET_TOPOLOGY, struct vm_cpu_topology)
 #define	VM_GET_GPA_PMAP \
 	_IOWR('v', IOCNUM_GET_GPA_PMAP, struct vm_gpa_pte)
 #define	VM_GLA2GPA	\
@@ -464,6 +482,10 @@ enum {
 	_IOW('v', IOCNUM_ACTIVATE_CPU, struct vm_activate_cpu)
 #define	VM_GET_CPUS	\
 	_IOW('v', IOCNUM_GET_CPUSET, struct vm_cpuset)
+#define	VM_SUSPEND_CPU \
+	_IOW('v', IOCNUM_SUSPEND_CPU, struct vm_activate_cpu)
+#define	VM_RESUME_CPU \
+	_IOW('v', IOCNUM_RESUME_CPU, struct vm_activate_cpu)
 #define	VM_SET_INTINFO	\
 	_IOW('v', IOCNUM_SET_INTINFO, struct vm_intinfo)
 #define	VM_GET_INTINFO	\
author	Patrick Mooney <pmooney@pfmooney.com>	2018-05-15 03:06:09 +0000
committer	Patrick Mooney <pmooney@pfmooney.com>	2018-05-24 18:07:37 +0000
commit	adfeb11ce94f7c9b78db3f67388fb704c2d8673a (patch)
tree	dcf8186b69efcb05af66a3fe35cea2234efd9e12
parent	28825f0c4ec7c0a9a0a1b599ed767e9c155d1230 (diff)
download	illumos-joyent-adfeb11ce94f7c9b78db3f67388fb704c2d8673a.tar.gz