summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPatrick Mooney <pmooney@pfmooney.com>2022-09-06 11:35:56 -0500
committerPatrick Mooney <pmooney@oxide.computer>2022-09-26 20:06:16 +0000
commit42640e499ab380f595753ffb5c3607d030e5cff3 (patch)
treec43fc7ca7b9d413efe780ea5cfce6794ce840ccb
parent1b6debbb96f2c66931b6ec520e8dd07ece57d2dd (diff)
downloadillumos-joyent-42640e499ab380f595753ffb5c3607d030e5cff3.tar.gz
14963 bhyve destroy should be more robust
14968 bhyve needs self-destroy ioctl Reviewed by: Andy Fiddaman <illumos@fiddaman.net> Reviewed by: Michael Zeller <mike@mikezeller.net> Approved by: Dan McDonald <danmcd@mnx.io>
-rw-r--r--usr/src/lib/libvmmapi/common/vmmapi.c8
-rw-r--r--usr/src/pkg/manifests/system-bhyve-tests.p5m2
-rw-r--r--usr/src/test/bhyve-tests/runfiles/default.run4
-rw-r--r--usr/src/test/bhyve-tests/tests/vmm/Makefile2
-rw-r--r--usr/src/test/bhyve-tests/tests/vmm/auto_destruct.c103
-rw-r--r--usr/src/test/bhyve-tests/tests/vmm/common.c67
-rw-r--r--usr/src/test/bhyve-tests/tests/vmm/common.h3
-rw-r--r--usr/src/test/bhyve-tests/tests/vmm/legacy_destruct.c71
-rw-r--r--usr/src/test/bhyve-tests/tests/vmm/self_destruct.c90
-rw-r--r--usr/src/uts/intel/io/vmm/sys/vmm_impl.h13
-rw-r--r--usr/src/uts/intel/io/vmm/vmm_drv_test.c4
-rw-r--r--usr/src/uts/intel/io/vmm/vmm_sol_dev.c343
-rw-r--r--usr/src/uts/intel/io/vmm/vmm_zsd.c11
-rw-r--r--usr/src/uts/intel/sys/vmm_dev.h2
14 files changed, 512 insertions, 211 deletions
diff --git a/usr/src/lib/libvmmapi/common/vmmapi.c b/usr/src/lib/libvmmapi/common/vmmapi.c
index 540cbf0bc5..a3c00d616b 100644
--- a/usr/src/lib/libvmmapi/common/vmmapi.c
+++ b/usr/src/lib/libvmmapi/common/vmmapi.c
@@ -39,7 +39,7 @@
*
* Copyright 2015 Pluribus Networks Inc.
* Copyright 2019 Joyent, Inc.
- * Copyright 2021 Oxide Computer Company
+ * Copyright 2022 Oxide Computer Company
*/
#include <sys/cdefs.h>
@@ -182,18 +182,14 @@ vm_close(struct vmctx *vm)
void
vm_destroy(struct vmctx *vm)
{
- struct vm_destroy_req req;
-
assert(vm != NULL);
if (vm->fd >= 0) {
+ (void) ioctl(vm->fd, VM_DESTROY_SELF, 0);
(void) close(vm->fd);
vm->fd = -1;
}
- (void) strncpy(req.name, vm->name, VM_MAX_NAMELEN);
- (void) vm_do_ctl(VMM_DESTROY_VM, &req);
-
free(vm);
}
#endif
diff --git a/usr/src/pkg/manifests/system-bhyve-tests.p5m b/usr/src/pkg/manifests/system-bhyve-tests.p5m
index 778a2e9a97..a5ba4e375e 100644
--- a/usr/src/pkg/manifests/system-bhyve-tests.p5m
+++ b/usr/src/pkg/manifests/system-bhyve-tests.p5m
@@ -59,9 +59,11 @@ file path=opt/bhyve-tests/tests/vmm/cpuid_ioctl mode=0555
file path=opt/bhyve-tests/tests/vmm/drv_hold mode=0555
file path=opt/bhyve-tests/tests/vmm/fpu_getset mode=0555
file path=opt/bhyve-tests/tests/vmm/interface_version mode=0555
+file path=opt/bhyve-tests/tests/vmm/legacy_destruct mode=0555
file path=opt/bhyve-tests/tests/vmm/mem_devmem mode=0555
file path=opt/bhyve-tests/tests/vmm/mem_partial mode=0555
file path=opt/bhyve-tests/tests/vmm/mem_seg_map mode=0555
+file path=opt/bhyve-tests/tests/vmm/self_destruct mode=0555
dir path=usr/kernel/drv group=sys
dir path=usr/kernel/drv/$(ARCH64) group=sys
file path=usr/kernel/drv/$(ARCH64)/vmm_drv_test
diff --git a/usr/src/test/bhyve-tests/runfiles/default.run b/usr/src/test/bhyve-tests/runfiles/default.run
index ff16ff1975..9402a3051b 100644
--- a/usr/src/test/bhyve-tests/runfiles/default.run
+++ b/usr/src/test/bhyve-tests/runfiles/default.run
@@ -27,9 +27,11 @@ tests = [
'drv_hold',
'fpu_getset',
'interface_version',
+ 'legacy_destruct',
'mem_devmem',
'mem_partial',
- 'mem_seg_map'
+ 'mem_seg_map',
+ 'self_destruct'
]
[/opt/bhyve-tests/tests/kdev]
diff --git a/usr/src/test/bhyve-tests/tests/vmm/Makefile b/usr/src/test/bhyve-tests/tests/vmm/Makefile
index 6c75f1cd1b..409791d027 100644
--- a/usr/src/test/bhyve-tests/tests/vmm/Makefile
+++ b/usr/src/test/bhyve-tests/tests/vmm/Makefile
@@ -22,6 +22,8 @@ PROG = mem_partial \
interface_version \
check_iommu \
auto_destruct \
+ legacy_destruct \
+ self_destruct \
drv_hold \
cpuid_ioctl
diff --git a/usr/src/test/bhyve-tests/tests/vmm/auto_destruct.c b/usr/src/test/bhyve-tests/tests/vmm/auto_destruct.c
index 428f489160..69658dcc32 100644
--- a/usr/src/test/bhyve-tests/tests/vmm/auto_destruct.c
+++ b/usr/src/test/bhyve-tests/tests/vmm/auto_destruct.c
@@ -18,8 +18,8 @@
#include <stdlib.h>
#include <fcntl.h>
#include <libgen.h>
-#include <sys/stat.h>
#include <errno.h>
+#include <err.h>
#include <assert.h>
#include <sys/vmm.h>
@@ -29,41 +29,6 @@
#include "common.h"
-bool
-test_for_instance(const char *suite_name)
-{
- char vm_name[VM_MAX_NAMELEN];
- char vm_path[MAXPATHLEN];
-
- name_test_vm(suite_name, vm_name);
- (void) snprintf(vm_path, sizeof (vm_path), "/dev/vmm/%s", vm_name);
-
- struct stat buf;
- return (stat(vm_path, &buf) == 0);
-}
-
-int
-destroy_instance(const char *suite_name)
-{
- int ctl_fd = open(VMM_CTL_DEV, O_EXCL | O_RDWR);
- if (ctl_fd < 0) {
- return (-1);
- }
-
- struct vm_destroy_req req;
- name_test_vm(suite_name, req.name);
-
- if (ioctl(ctl_fd, VMM_DESTROY_VM, &req) != 0) {
- /* Preserve the destroy error across the close() */
- int err = errno;
- (void) close(ctl_fd);
- errno = err;
- return (-1);
- } else {
- (void) close(ctl_fd);
- return (0);
- }
-}
int
main(int argc, char *argv[])
@@ -73,51 +38,44 @@ main(int argc, char *argv[])
ctx = create_test_vm(suite_name);
if (ctx == NULL) {
- perror("could open test VM");
- return (EXIT_FAILURE);
+ errx(EXIT_FAILURE, "could open test VM");
}
/*
* It would be odd if we had the freshly created VM instance, but it did
* not appear to exist.
*/
- assert(test_for_instance(suite_name));
+ assert(check_instance_usable(suite_name));
/* Make sure that auto-destruct is off */
if (ioctl(vm_get_device_fd(ctx), VM_SET_AUTODESTRUCT, 0) != 0) {
- perror("could not disable auto-destruct");
- return (EXIT_FAILURE);
+ errx(EXIT_FAILURE, "could not disable auto-destruct");
}
vm_close(ctx);
- if (!test_for_instance(suite_name)) {
- perror("instance missing after close");
- return (EXIT_FAILURE);
+ if (!check_instance_usable(suite_name)) {
+ err(EXIT_FAILURE, "instance missing after close");
}
ctx = NULL;
if (destroy_instance(suite_name) != 0) {
- perror("could not clean up instance");
- return (EXIT_FAILURE);
+ errx(EXIT_FAILURE, "could not clean up instance");
}
/* Now repeat that process, but enable auto-destruct */
ctx = create_test_vm(suite_name);
if (ctx == NULL) {
- perror("could open test VM");
- return (EXIT_FAILURE);
+ errx(EXIT_FAILURE, "could open test VM");
}
if (ioctl(vm_get_device_fd(ctx), VM_SET_AUTODESTRUCT, 1) != 0) {
- perror("could not enable auto-destruct");
- return (EXIT_FAILURE);
+ errx(EXIT_FAILURE, "could not enable auto-destruct");
}
vm_close(ctx);
ctx = NULL;
/* At this point, the instance should be gone */
- if (test_for_instance(suite_name)) {
- (void) fprintf(stderr,
+ if (check_instance_usable(suite_name)) {
+ err(EXIT_FAILURE,
"instance did not auto-destruct as expected");
- return (EXIT_FAILURE);
}
/*
@@ -126,37 +84,40 @@ main(int argc, char *argv[])
*/
ctx = create_test_vm(suite_name);
if (ctx == NULL) {
- perror("could open test VM");
- return (EXIT_FAILURE);
+ errx(EXIT_FAILURE, "could open test VM");
}
if (ioctl(vm_get_device_fd(ctx), VM_SET_AUTODESTRUCT, 1) != 0) {
- perror("could not enable auto-destruct");
- return (EXIT_FAILURE);
+ errx(EXIT_FAILURE, "could not enable auto-destruct");
}
int vdtfd = open_drv_test();
if (vdtfd < 0) {
- perror("could open drv_test device");
- return (EXIT_FAILURE);
+ errx(EXIT_FAILURE, "could open drv_test device");
}
if (ioctl(vdtfd, VDT_IOC_HOLD, vm_get_device_fd(ctx)) != 0) {
- perror("could not hold VM from vmm_drv device");
- return (EXIT_FAILURE);
+ errx(EXIT_FAILURE, "could not hold VM from vmm_drv device");
}
vm_close(ctx);
ctx = NULL;
- if (!test_for_instance(suite_name)) {
- (void) fprintf(stderr,
- "instance auto-destructed despite existing vmm_drv hold");
- return (EXIT_FAILURE);
+
+ /*
+ * With the vmm_drv hold remaining on the instance, we expect it to
+ * exist, but not be usable (due to in-progress destroy).
+ */
+ if (!check_instance_exists(suite_name)) {
+ err(EXIT_FAILURE, "instance completed auto-destruct despite "
+ "existing vmm_drv hold");
}
+ if (check_instance_usable(suite_name)) {
+ err(EXIT_FAILURE, "instance still usable despite close() after "
+ "auto-destroy configured");
+ }
+
if (ioctl(vdtfd, VDT_IOC_RELE, 0) != 0) {
- perror("could not release VM from vmm_drv device");
- return (EXIT_FAILURE);
+ errx(EXIT_FAILURE, "could not release VM from vmm_drv device");
}
- if (test_for_instance(suite_name)) {
- (void) fprintf(stderr,
- "instance did not auto-destructed after vmm_drv release");
- return (EXIT_FAILURE);
+ if (check_instance_usable(suite_name)) {
+ err(EXIT_FAILURE, "instance did not complete destruction "
+ "after vmm_drv release");
}
(void) printf("%s\tPASS\n", suite_name);
diff --git a/usr/src/test/bhyve-tests/tests/vmm/common.c b/usr/src/test/bhyve-tests/tests/vmm/common.c
index a4351c47d7..ca34dc8cb2 100644
--- a/usr/src/test/bhyve-tests/tests/vmm/common.c
+++ b/usr/src/test/bhyve-tests/tests/vmm/common.c
@@ -17,6 +17,7 @@
#include <unistd.h>
#include <strings.h>
#include <fcntl.h>
+#include <errno.h>
#include <sys/types.h>
#include <sys/vmm.h>
@@ -79,3 +80,69 @@ open_drv_test(void)
{
return (open("/dev/vmm_drv_test", O_RDWR));
}
+
+
+/*
+ * Test if VMM instance exists (and is not being destroyed).
+ */
+bool
+check_instance_usable(const char *suite_name)
+{
+ char vm_name[VM_MAX_NAMELEN];
+ char vm_path[MAXPATHLEN];
+
+ name_test_vm(suite_name, vm_name);
+ (void) snprintf(vm_path, sizeof (vm_path), "/dev/vmm/%s", vm_name);
+
+ int fd = open(vm_path, O_RDWR, 0);
+ if (fd < 0) {
+ return (false);
+ }
+
+ const int destroy_pending = ioctl(fd, VM_DESTROY_PENDING, 0);
+ (void) close(fd);
+
+ return (destroy_pending == 0);
+}
+
+/*
+ * Does an instance exist in /dev/vmm? (No check for in-progress destroy)
+ */
+bool
+check_instance_exists(const char *suite_name)
+{
+ char vm_name[VM_MAX_NAMELEN];
+ char vm_path[MAXPATHLEN];
+
+ name_test_vm(suite_name, vm_name);
+ (void) snprintf(vm_path, sizeof (vm_path), "/dev/vmm/%s", vm_name);
+
+ return (access(vm_path, F_OK) == 0);
+}
+
+
+/*
+ * Destroy a VMM instance via the vmmctl device.
+ */
+int
+destroy_instance(const char *suite_name)
+{
+ int ctl_fd = open(VMM_CTL_DEV, O_EXCL | O_RDWR);
+ if (ctl_fd < 0) {
+ return (-1);
+ }
+
+ struct vm_destroy_req req;
+ name_test_vm(suite_name, req.name);
+
+ if (ioctl(ctl_fd, VMM_DESTROY_VM, &req) != 0) {
+ /* Preserve the destroy error across the close() */
+ int err = errno;
+ (void) close(ctl_fd);
+ errno = err;
+ return (-1);
+ } else {
+ (void) close(ctl_fd);
+ return (0);
+ }
+}
diff --git a/usr/src/test/bhyve-tests/tests/vmm/common.h b/usr/src/test/bhyve-tests/tests/vmm/common.h
index 9370b8d981..a1147395ef 100644
--- a/usr/src/test/bhyve-tests/tests/vmm/common.h
+++ b/usr/src/test/bhyve-tests/tests/vmm/common.h
@@ -20,6 +20,9 @@ void name_test_vm(const char *, char *);
struct vmctx *create_test_vm(const char *);
int alloc_memseg(struct vmctx *, int, size_t, const char *);
int open_drv_test(void);
+bool check_instance_usable(const char *);
+bool check_instance_exists(const char *);
+int destroy_instance(const char *);
#define PROT_ALL (PROT_READ | PROT_WRITE | PROT_EXEC)
diff --git a/usr/src/test/bhyve-tests/tests/vmm/legacy_destruct.c b/usr/src/test/bhyve-tests/tests/vmm/legacy_destruct.c
new file mode 100644
index 0000000000..9a21de2664
--- /dev/null
+++ b/usr/src/test/bhyve-tests/tests/vmm/legacy_destruct.c
@@ -0,0 +1,71 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2022 Oxide Computer Company
+ */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <fcntl.h>
+#include <libgen.h>
+#include <sys/stat.h>
+#include <errno.h>
+#include <err.h>
+#include <assert.h>
+
+#include <sys/vmm.h>
+#include <sys/vmm_dev.h>
+#include <vmmapi.h>
+
+#include "common.h"
+
+int
+main(int argc, char *argv[])
+{
+ const char *suite_name = basename(argv[0]);
+ struct vmctx *ctx;
+
+ ctx = create_test_vm(suite_name);
+ if (ctx == NULL) {
+ errx(EXIT_FAILURE, "could open test VM");
+ }
+
+ /*
+ * It would be odd if we had the freshly created VM instance, but it did
+ * not appear to exist.
+ */
+ assert(check_instance_usable(suite_name));
+
+ vm_close(ctx);
+
+ /* Instance should remain, even though we closed it */
+ if (!check_instance_usable(suite_name)) {
+ err(EXIT_FAILURE, "instance missing after vm_close()");
+ }
+
+ /*
+ * The common destroy_instance() uses the "legacy" destruction mechanism
+ * via the vmmctl device.
+ */
+ if (destroy_instance(suite_name) != 0) {
+ errx(EXIT_FAILURE, "ioctl(VMM_DESTROY_VM) failed");
+ }
+
+ /* Instance should be gone at this point */
+ if (check_instance_usable(suite_name)) {
+ err(EXIT_FAILURE, "instance still accessible after destroy");
+ }
+
+ (void) printf("%s\tPASS\n", suite_name);
+ return (EXIT_SUCCESS);
+}
diff --git a/usr/src/test/bhyve-tests/tests/vmm/self_destruct.c b/usr/src/test/bhyve-tests/tests/vmm/self_destruct.c
new file mode 100644
index 0000000000..02727834f8
--- /dev/null
+++ b/usr/src/test/bhyve-tests/tests/vmm/self_destruct.c
@@ -0,0 +1,90 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2022 Oxide Computer Company
+ */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <fcntl.h>
+#include <libgen.h>
+#include <sys/stat.h>
+#include <errno.h>
+#include <err.h>
+#include <assert.h>
+
+#include <sys/vmm.h>
+#include <sys/vmm_dev.h>
+#include <vmmapi.h>
+
+#include "common.h"
+
+int
+main(int argc, char *argv[])
+{
+ const char *suite_name = basename(argv[0]);
+ struct vmctx *ctx;
+
+ ctx = create_test_vm(suite_name);
+ if (ctx == NULL) {
+ errx(EXIT_FAILURE, "could open test VM");
+ }
+
+ /*
+ * It would be odd if we had the freshly created VM instance, but it did
+ * not appear to exist.
+ */
+ assert(check_instance_usable(suite_name));
+
+ /* Ensure sure that auto-destruct is off */
+ if (ioctl(vm_get_device_fd(ctx), VM_SET_AUTODESTRUCT, 0) != 0) {
+ errx(EXIT_FAILURE, "could not disable auto-destruct");
+ }
+
+ if (ioctl(vm_get_device_fd(ctx), VM_DESTROY_SELF, 0) != 0) {
+ errx(EXIT_FAILURE, "ioctl(VM_DESTROY_SELF) failed");
+ }
+
+ /*
+ * Since we still hold the instance open, we expect it to still exist in
+ * /dev/vmm, but be useless for further operations
+ */
+ if (!check_instance_exists(suite_name)) {
+ err(EXIT_FAILURE,
+ "instance missing after unfinished destroy");
+ }
+
+ /* Attempt an operation on our still-open handle */
+ uint64_t reg = 0;
+ if (vm_get_register(ctx, 0, VM_REG_GUEST_RAX, &reg) == 0) {
+ err(EXIT_FAILURE,
+ "VM_GET_REGISTER succeeded despite instance destruction");
+ }
+ /* Check usability via the dedicated ioctl */
+ if (check_instance_usable(suite_name)) {
+ err(EXIT_FAILURE,
+ "instance not reporting in-progress destruction");
+ }
+
+
+ vm_close(ctx);
+ ctx = NULL;
+
+ /* Make doubly-sure the VM is gone after close */
+ if (check_instance_exists(suite_name)) {
+ err(EXIT_FAILURE, "instance still accessible after destroy");
+ }
+
+ (void) printf("%s\tPASS\n", suite_name);
+ return (EXIT_SUCCESS);
+}
diff --git a/usr/src/uts/intel/io/vmm/sys/vmm_impl.h b/usr/src/uts/intel/io/vmm/sys/vmm_impl.h
index 2638fa06b1..d7cec8bc78 100644
--- a/usr/src/uts/intel/io/vmm/sys/vmm_impl.h
+++ b/usr/src/uts/intel/io/vmm/sys/vmm_impl.h
@@ -50,10 +50,10 @@ typedef struct vmm_zsd vmm_zsd_t;
enum vmm_softc_state {
VMM_HELD = 1, /* external driver(s) possess hold on the VM */
- VMM_CLEANUP = 2, /* request that holds are released */
- VMM_PURGED = 4, /* all hold have been released */
- VMM_BLOCK_HOOK = 8, /* mem hook install temporarily blocked */
- VMM_DESTROY = 16 /* VM is destroyed, softc still around */
+ VMM_BLOCK_HOOK = 2, /* mem hook install temporarily blocked */
+ VMM_DESTROY = 4, /* VM destruction initiated */
+ VMM_IS_OPEN = 8, /* VM device is open */
+ VMM_AUTODESTROY = 16, /* auto-destroy instance on close */
};
struct vmm_softc {
@@ -66,8 +66,7 @@ struct vmm_softc {
kcondvar_t vmm_cv;
list_t vmm_holds;
uint_t vmm_flags;
- boolean_t vmm_is_open;
- boolean_t vmm_autodestruct;
+ uint_t vmm_destroy_waiters;
kmutex_t vmm_lease_lock;
list_t vmm_lease_list;
@@ -89,7 +88,7 @@ void vmm_zsd_init(void);
void vmm_zsd_fini(void);
int vmm_zsd_add_vm(vmm_softc_t *sc);
void vmm_zsd_rem_vm(vmm_softc_t *sc);
-int vmm_zone_vm_destroy(vmm_softc_t *);
+void vmm_zone_vm_destroy(vmm_softc_t *);
#define VMM_MODULE_NAME "vmm"
diff --git a/usr/src/uts/intel/io/vmm/vmm_drv_test.c b/usr/src/uts/intel/io/vmm/vmm_drv_test.c
index 728bc820d0..c138d4b7af 100644
--- a/usr/src/uts/intel/io/vmm/vmm_drv_test.c
+++ b/usr/src/uts/intel/io/vmm/vmm_drv_test.c
@@ -80,6 +80,10 @@ vdt_close(dev_t dev, int flag, int otype, cred_t *cr)
return (ENXIO);
}
+ if (ss->vss_hold != NULL) {
+ vmm_drv_rele(ss->vss_hold);
+ ss->vss_hold = NULL;
+ }
mutex_destroy(&ss->vss_lock);
ddi_soft_state_free(vdt_state, minor);
id_free(vdt_minors, minor);
diff --git a/usr/src/uts/intel/io/vmm/vmm_sol_dev.c b/usr/src/uts/intel/io/vmm/vmm_sol_dev.c
index 72e8e6e94a..26b58dff79 100644
--- a/usr/src/uts/intel/io/vmm/vmm_sol_dev.c
+++ b/usr/src/uts/intel/io/vmm/vmm_sol_dev.c
@@ -77,7 +77,6 @@ static sdev_plugin_hdl_t vmmdev_sdev_hdl;
static kmutex_t vmm_mtx;
static list_t vmm_list;
-static list_t vmm_destroy_list;
static id_space_t *vmm_minors;
static void *vmm_statep;
@@ -115,22 +114,22 @@ struct vmm_lease {
typedef enum vmm_destroy_opts {
VDO_DEFAULT = 0,
/*
- * Request that zone-specific-data associated with this VM not be
+ * Indicate that zone-specific-data associated with this VM not be
* cleaned up as part of the destroy. Skipping ZSD clean-up is
* necessary when VM is being destroyed as part of zone destruction,
* when said ZSD is already being cleaned up.
*/
VDO_NO_CLEAN_ZSD = (1 << 0),
/*
- * Skip any attempt to wait for vmm_drv consumers when attempting to
- * purge them from the instance. When performing an auto-destruct, it
- * is not desirable to wait, since said consumer might exist in a
- * "higher" file descriptor which has not yet been closed.
+ * Attempt to wait for VM destruction to complete. This is opt-in,
+ * since there are many normal conditions which could lead to
+ * destruction being stalled pending other clean-up.
*/
- VDO_NO_PURGE_WAIT = (1 << 1),
+ VDO_ATTEMPT_WAIT = (1 << 1),
} vmm_destroy_opts_t;
-static int vmm_destroy_locked(vmm_softc_t *, vmm_destroy_opts_t, boolean_t *);
+static void vmm_hma_release(void);
+static int vmm_destroy_locked(vmm_softc_t *, vmm_destroy_opts_t, bool *);
static int vmm_drv_block_hook(vmm_softc_t *, boolean_t);
static void vmm_lease_block(vmm_softc_t *);
static void vmm_lease_unblock(vmm_softc_t *);
@@ -250,6 +249,10 @@ vmmdev_devmem_segid(vmm_softc_t *sc, off_t off, off_t len, int *segidp,
return (B_FALSE);
}
+/*
+ * When an instance is being destroyed, the devmem list of named memory objects
+ * can be torn down, as no new mappings are allowed.
+ */
static void
vmmdev_devmem_purge(vmm_softc_t *sc)
{
@@ -526,6 +529,8 @@ vmmdev_do_ioctl(vmm_softc_t *sc, int cmd, intptr_t arg, int md,
case VM_SUSPEND:
case VM_DESC_FPU_AREA:
case VM_SET_AUTODESTRUCT:
+ case VM_DESTROY_SELF:
+ case VM_DESTROY_PENDING:
default:
break;
}
@@ -866,8 +871,41 @@ vmmdev_do_ioctl(vmm_softc_t *sc, int cmd, intptr_t arg, int md,
* than the vcpu-centric or rwlock exclusion mechanisms.
*/
mutex_enter(&vmm_mtx);
- sc->vmm_autodestruct = (arg != 0);
+ if (arg != 0) {
+ sc->vmm_flags |= VMM_AUTODESTROY;
+ } else {
+ sc->vmm_flags &= ~VMM_AUTODESTROY;
+ }
+ mutex_exit(&vmm_mtx);
+ break;
+ }
+ case VM_DESTROY_SELF: {
+ bool hma_release = false;
+
+ /*
+ * Just like VMM_DESTROY_VM, but on the instance file descriptor
+ * itself, rather than having to perform a racy name lookup as
+ * part of the destroy process.
+ *
+ * Since vmm_destroy_locked() performs vCPU lock acquisition in
+ * order to kick the vCPUs out of guest context as part of any
+ * destruction, we do not need to worry about it ourself using
+ * the `lock_type` logic here.
+ */
+ mutex_enter(&vmm_mtx);
+ VERIFY0(vmm_destroy_locked(sc, VDO_DEFAULT, &hma_release));
mutex_exit(&vmm_mtx);
+ if (hma_release) {
+ vmm_hma_release();
+ }
+ break;
+ }
+ case VM_DESTROY_PENDING: {
+ /*
+ * If we have made it this far, then destruction of the instance
+ * has not been initiated.
+ */
+ *rvalp = 0;
break;
}
@@ -2045,7 +2083,7 @@ vmm_drv_hold(file_t *fp, cred_t *cr, vmm_hold_t **holdp)
}
/* XXXJOY: check cred permissions against instance */
- if ((sc->vmm_flags & (VMM_CLEANUP|VMM_PURGED|VMM_DESTROY)) != 0) {
+ if ((sc->vmm_flags & VMM_DESTROY) != 0) {
err = EBUSY;
goto out;
}
@@ -2067,7 +2105,7 @@ void
vmm_drv_rele(vmm_hold_t *hold)
{
vmm_softc_t *sc;
- boolean_t hma_release = B_FALSE;
+ bool hma_release = false;
ASSERT(hold != NULL);
ASSERT(hold->vmh_sc != NULL);
@@ -2076,24 +2114,22 @@ vmm_drv_rele(vmm_hold_t *hold)
mutex_enter(&vmm_mtx);
sc = hold->vmh_sc;
list_remove(&sc->vmm_holds, hold);
+ kmem_free(hold, sizeof (*hold));
+
if (list_is_empty(&sc->vmm_holds)) {
sc->vmm_flags &= ~VMM_HELD;
- cv_broadcast(&sc->vmm_cv);
/*
- * If pending hold(s) had prevented an auto-destruct of the
- * instance when it was closed, finish that clean-up now.
+ * Since outstanding holds would prevent instance destruction
+ * from completing, attempt to finish it now if it was already
+ * set in motion.
*/
- if (sc->vmm_autodestruct && !sc->vmm_is_open) {
- int err = vmm_destroy_locked(sc,
- VDO_NO_PURGE_WAIT, &hma_release);
-
- VERIFY0(err);
- VERIFY(hma_release);
+ if ((sc->vmm_flags & VMM_DESTROY) != 0) {
+ VERIFY0(vmm_destroy_locked(sc, VDO_DEFAULT,
+ &hma_release));
}
}
mutex_exit(&vmm_mtx);
- kmem_free(hold, sizeof (*hold));
if (hma_release) {
vmm_hma_release();
@@ -2389,15 +2425,14 @@ vmm_drv_ioport_unhook(vmm_hold_t *hold, void **cookie)
mutex_exit(&vmm_mtx);
}
-static int
-vmm_drv_purge(vmm_softc_t *sc, boolean_t no_wait)
+static void
+vmm_drv_purge(vmm_softc_t *sc)
{
ASSERT(MUTEX_HELD(&vmm_mtx));
if ((sc->vmm_flags & VMM_HELD) != 0) {
vmm_hold_t *hold;
- sc->vmm_flags |= VMM_CLEANUP;
for (hold = list_head(&sc->vmm_holds); hold != NULL;
hold = list_next(&sc->vmm_holds, hold)) {
hold->vmh_release_req = B_TRUE;
@@ -2415,28 +2450,7 @@ vmm_drv_purge(vmm_softc_t *sc, boolean_t no_wait)
vmm_lease_block(sc);
vmm_lease_unblock(sc);
mutex_enter(&vmm_mtx);
-
- /*
- * With all of the leases broken, we can proceed in an orderly
- * fashion to waiting for any lingering holds to be dropped.
- */
- while ((sc->vmm_flags & VMM_HELD) != 0) {
- /*
- * Some holds remain, so wait (if acceptable) for them
- * to be cleaned up.
- */
- if (no_wait ||
- cv_wait_sig(&sc->vmm_cv, &vmm_mtx) <= 0) {
- sc->vmm_flags &= ~VMM_CLEANUP;
- return (EINTR);
- }
- }
- sc->vmm_flags &= ~VMM_CLEANUP;
}
-
- VERIFY(list_is_empty(&sc->vmm_holds));
- sc->vmm_flags |= VMM_PURGED;
- return (0);
}
static int
@@ -2471,77 +2485,176 @@ done:
return (err);
}
-static int
-vmm_destroy_locked(vmm_softc_t *sc, vmm_destroy_opts_t opts,
- boolean_t *hma_release)
-{
- dev_info_t *pdip = ddi_get_parent(vmmdev_dip);
- minor_t minor;
+static void
+vmm_destroy_begin(vmm_softc_t *sc, vmm_destroy_opts_t opts)
+{
ASSERT(MUTEX_HELD(&vmm_mtx));
+ ASSERT0(sc->vmm_flags & VMM_DESTROY);
- *hma_release = B_FALSE;
+ sc->vmm_flags |= VMM_DESTROY;
- if (vmm_drv_purge(sc, (opts & VDO_NO_PURGE_WAIT) != 0) != 0) {
- return (EINTR);
+ /*
+ * Lock and unlock all of the vCPUs to ensure that they are kicked out
+ * of guest context, being unable to return now that the instance is
+ * marked for destruction.
+ */
+ const int maxcpus = vm_get_maxcpus(sc->vmm_vm);
+ for (int vcpu = 0; vcpu < maxcpus; vcpu++) {
+ vcpu_lock_one(sc, vcpu);
+ vcpu_unlock_one(sc, vcpu);
}
+ vmmdev_devmem_purge(sc);
if ((opts & VDO_NO_CLEAN_ZSD) == 0) {
+ /*
+ * The ZSD should be cleaned up now, unless destruction of the
+ * instance was initated by destruction of the containing zone,
+ * in which case the ZSD has already been removed.
+ */
vmm_zsd_rem_vm(sc);
}
+ zone_rele(sc->vmm_zone);
- /* Clean up devmem entries */
- vmmdev_devmem_purge(sc);
+ vmm_drv_purge(sc);
+}
+
+static bool
+vmm_destroy_ready(vmm_softc_t *sc)
+{
+ ASSERT(MUTEX_HELD(&vmm_mtx));
+
+ if ((sc->vmm_flags & (VMM_HELD | VMM_IS_OPEN)) == 0) {
+ VERIFY(list_is_empty(&sc->vmm_holds));
+ return (true);
+ }
+
+ return (false);
+}
+
+static void
+vmm_destroy_finish(vmm_softc_t *sc)
+{
+ ASSERT(MUTEX_HELD(&vmm_mtx));
+ ASSERT(vmm_destroy_ready(sc));
list_remove(&vmm_list, sc);
+ vmm_kstat_fini(sc);
+ vm_destroy(sc->vmm_vm);
ddi_remove_minor_node(vmmdev_dip, sc->vmm_name);
- minor = sc->vmm_minor;
- zone_rele(sc->vmm_zone);
- if (sc->vmm_is_open) {
- list_insert_tail(&vmm_destroy_list, sc);
- sc->vmm_flags |= VMM_DESTROY;
- } else {
- vmm_kstat_fini(sc);
- vm_destroy(sc->vmm_vm);
- ddi_soft_state_free(vmm_statep, minor);
- id_free(vmm_minors, minor);
- *hma_release = B_TRUE;
+ (void) devfs_clean(ddi_get_parent(vmmdev_dip), NULL, DV_CLEAN_FORCE);
+
+ const minor_t minor = sc->vmm_minor;
+ ddi_soft_state_free(vmm_statep, minor);
+ id_free(vmm_minors, minor);
+}
+
+/*
+ * Initiate or attempt to finish destruction of a VMM instance.
+ *
+ * This is called from several contexts:
+ * - An explicit destroy ioctl is made
+ * - A vmm_drv consumer releases its hold (being the last on the instance)
+ * - The vmm device is closed, and auto-destruct is enabled
+ */
+static int
+vmm_destroy_locked(vmm_softc_t *sc, vmm_destroy_opts_t opts,
+ bool *hma_release)
+{
+ ASSERT(MUTEX_HELD(&vmm_mtx));
+
+ *hma_release = false;
+
+ /*
+ * When instance destruction begins, it is so marked such that any
+ * further requests to operate the instance will fail.
+ */
+ if ((sc->vmm_flags & VMM_DESTROY) == 0) {
+ vmm_destroy_begin(sc, opts);
}
- (void) devfs_clean(pdip, NULL, DV_CLEAN_FORCE);
- return (0);
+ if (vmm_destroy_ready(sc)) {
+
+ /*
+ * Notify anyone waiting for the destruction to finish. They
+ * must be clear before we can safely tear down the softc.
+ */
+ if (sc->vmm_destroy_waiters != 0) {
+ cv_broadcast(&sc->vmm_cv);
+ while (sc->vmm_destroy_waiters != 0) {
+ cv_wait(&sc->vmm_cv, &vmm_mtx);
+ }
+ }
+
+ /*
+ * Finish destruction of instance. After this point, the softc
+ * is freed and cannot be accessed again.
+ *
+ * With destruction complete, the HMA hold can be released
+ */
+ vmm_destroy_finish(sc);
+ *hma_release = true;
+ return (0);
+ } else if ((opts & VDO_ATTEMPT_WAIT) != 0) {
+ int err = 0;
+
+ sc->vmm_destroy_waiters++;
+ while (!vmm_destroy_ready(sc) && err == 0) {
+ if (cv_wait_sig(&sc->vmm_cv, &vmm_mtx) <= 0) {
+ err = EINTR;
+ }
+ }
+ sc->vmm_destroy_waiters--;
+
+ if (sc->vmm_destroy_waiters == 0) {
+ /*
+ * If we were the last waiter, it could be that VM
+ * destruction is waiting on _us_ to proceed with the
+ * final clean-up.
+ */
+ cv_signal(&sc->vmm_cv);
+ }
+ return (err);
+ } else {
+ /*
+ * Since the instance is not ready for destruction, and the
+ * caller did not ask to wait, consider it a success for now.
+ */
+ return (0);
+ }
}
-int
+void
vmm_zone_vm_destroy(vmm_softc_t *sc)
{
- boolean_t hma_release = B_FALSE;
- int err;
+ bool hma_release = false;
+ int err;
mutex_enter(&vmm_mtx);
err = vmm_destroy_locked(sc, VDO_NO_CLEAN_ZSD, &hma_release);
mutex_exit(&vmm_mtx);
- if (hma_release)
- vmm_hma_release();
+ VERIFY0(err);
- return (err);
+ if (hma_release) {
+ vmm_hma_release();
+ }
}
-/* ARGSUSED */
static int
vmmdev_do_vm_destroy(const struct vm_destroy_req *req, cred_t *cr)
{
- boolean_t hma_release = B_FALSE;
- vmm_softc_t *sc;
- int err;
+ vmm_softc_t *sc;
+ bool hma_release = false;
+ int err;
- if (crgetuid(cr) != 0)
+ if (crgetuid(cr) != 0) {
return (EPERM);
+ }
mutex_enter(&vmm_mtx);
-
- if ((sc = vmm_lookup(req->name)) == NULL) {
+ sc = vmm_lookup(req->name);
+ if (sc == NULL) {
mutex_exit(&vmm_mtx);
return (ENOENT);
}
@@ -2553,12 +2666,13 @@ vmmdev_do_vm_destroy(const struct vm_destroy_req *req, cred_t *cr)
mutex_exit(&vmm_mtx);
return (EPERM);
}
- err = vmm_destroy_locked(sc, VDO_DEFAULT, &hma_release);
+ err = vmm_destroy_locked(sc, VDO_ATTEMPT_WAIT, &hma_release);
mutex_exit(&vmm_mtx);
- if (hma_release)
+ if (hma_release) {
vmm_hma_release();
+ }
return (err);
}
@@ -2720,7 +2834,7 @@ vmm_open(dev_t *devp, int flag, int otyp, cred_t *credp)
return (ENXIO);
}
- sc->vmm_is_open = B_TRUE;
+ sc->vmm_flags |= VMM_IS_OPEN;
mutex_exit(&vmm_mtx);
return (0);
@@ -2729,13 +2843,13 @@ vmm_open(dev_t *devp, int flag, int otyp, cred_t *credp)
static int
vmm_close(dev_t dev, int flag, int otyp, cred_t *credp)
{
- minor_t minor;
- vmm_softc_t *sc;
- boolean_t hma_release = B_FALSE;
+ const minor_t minor = getminor(dev);
+ vmm_softc_t *sc;
+ bool hma_release = false;
- minor = getminor(dev);
- if (minor == VMM_CTL_MINOR)
+ if (minor == VMM_CTL_MINOR) {
return (0);
+ }
mutex_enter(&vmm_mtx);
sc = ddi_get_soft_state(vmm_statep, minor);
@@ -2744,35 +2858,23 @@ vmm_close(dev_t dev, int flag, int otyp, cred_t *credp)
return (ENXIO);
}
- VERIFY(sc->vmm_is_open);
- sc->vmm_is_open = B_FALSE;
+ VERIFY3U(sc->vmm_flags & VMM_IS_OPEN, !=, 0);
+ sc->vmm_flags &= ~VMM_IS_OPEN;
/*
- * If this VM was destroyed while the vmm device was open, then
- * clean it up now that it is closed.
+ * If instance was marked for auto-destruction begin that now. Instance
+ * destruction may have been initated already, so try to make progress
+ * in that case, since closure of the device is one of its requirements.
*/
- if (sc->vmm_flags & VMM_DESTROY) {
- list_remove(&vmm_destroy_list, sc);
- vmm_kstat_fini(sc);
- vm_destroy(sc->vmm_vm);
- ddi_soft_state_free(vmm_statep, minor);
- id_free(vmm_minors, minor);
- hma_release = B_TRUE;
- } else if (sc->vmm_autodestruct) {
- /*
- * Attempt auto-destruct on instance if requested.
- *
- * Do not wait for existing holds to be purged from the
- * instance, since there is no guarantee that will happen in a
- * timely manner. Auto-destruction will resume when the last
- * hold is released. (See: vmm_drv_rele)
- */
- (void) vmm_destroy_locked(sc, VDO_NO_PURGE_WAIT, &hma_release);
+ if ((sc->vmm_flags & VMM_DESTROY) != 0 ||
+ (sc->vmm_flags & VMM_AUTODESTROY) != 0) {
+ VERIFY0(vmm_destroy_locked(sc, VDO_DEFAULT, &hma_release));
}
mutex_exit(&vmm_mtx);
- if (hma_release)
+ if (hma_release) {
vmm_hma_release();
+ }
return (0);
}
@@ -2879,10 +2981,19 @@ vmm_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp,
}
sc = ddi_get_soft_state(vmm_statep, minor);
- ASSERT(sc);
+ ASSERT(sc != NULL);
- if (sc->vmm_flags & VMM_DESTROY)
+ /*
+ * Turn away any ioctls against an instance when it is being destroyed.
+ * (Except for the ioctl inquiring about that destroy-in-progress.)
+ */
+ if ((sc->vmm_flags & VMM_DESTROY) != 0) {
+ if (cmd == VM_DESTROY_PENDING) {
+ *rvalp = 1;
+ return (0);
+ }
return (ENXIO);
+ }
return (vmmdev_do_ioctl(sc, cmd, arg, mode, credp, rvalp));
}
@@ -3113,7 +3224,7 @@ vmm_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
return (DDI_FAILURE);
mutex_enter(&vmm_mtx);
- if (!list_is_empty(&vmm_list) || !list_is_empty(&vmm_destroy_list)) {
+ if (!list_is_empty(&vmm_list)) {
mutex_exit(&vmm_mtx);
mutex_exit(&vmmdev_mtx);
return (DDI_FAILURE);
@@ -3199,8 +3310,6 @@ _init(void)
mutex_init(&vmm_mtx, NULL, MUTEX_DRIVER, NULL);
list_create(&vmm_list, sizeof (vmm_softc_t),
offsetof(vmm_softc_t, vmm_node));
- list_create(&vmm_destroy_list, sizeof (vmm_softc_t),
- offsetof(vmm_softc_t, vmm_node));
vmm_minors = id_space_create("vmm_minors", VMM_CTL_MINOR + 1, MAXMIN32);
error = ddi_soft_state_init(&vmm_statep, sizeof (vmm_softc_t), 0);
diff --git a/usr/src/uts/intel/io/vmm/vmm_zsd.c b/usr/src/uts/intel/io/vmm/vmm_zsd.c
index 16f05ea173..eca6d25969 100644
--- a/usr/src/uts/intel/io/vmm/vmm_zsd.c
+++ b/usr/src/uts/intel/io/vmm/vmm_zsd.c
@@ -141,7 +141,7 @@ vmm_zsd_create(zoneid_t zid)
}
/*
- * Tells all runing VMs in the zone to poweroff. This does not reclaim guest
+ * Tells all running VMs in the zone to poweroff. This does not reclaim guest
* resources (memory, etc.).
*/
static void
@@ -183,14 +183,7 @@ vmm_zsd_destroy(zoneid_t zid, void *data)
ASSERT(!zsd->vz_active);
while ((sc = list_remove_head(&zsd->vz_vmms)) != NULL) {
- int err;
-
- /*
- * This frees all resources associated with the vm, including
- * sc.
- */
- err = vmm_zone_vm_destroy(sc);
- ASSERT3S(err, ==, 0);
+ vmm_zone_vm_destroy(sc);
}
mutex_exit(&zsd->vz_lock);
diff --git a/usr/src/uts/intel/sys/vmm_dev.h b/usr/src/uts/intel/sys/vmm_dev.h
index 80b8c2d7ba..ce5d47fba9 100644
--- a/usr/src/uts/intel/sys/vmm_dev.h
+++ b/usr/src/uts/intel/sys/vmm_dev.h
@@ -515,6 +515,8 @@ struct vm_legacy_cpuid {
#define VM_DATA_WRITE (VMM_IOC_BASE | 0x23)
#define VM_SET_AUTODESTRUCT (VMM_IOC_BASE | 0x24)
+#define VM_DESTROY_SELF (VMM_IOC_BASE | 0x25)
+#define VM_DESTROY_PENDING (VMM_IOC_BASE | 0x26)
#define VM_DEVMEM_GETOFFSET (VMM_IOC_BASE | 0xff)