summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPatrick Mooney <pmooney@pfmooney.com>2022-01-28 21:26:53 +0000
committerPatrick Mooney <pmooney@oxide.computer>2022-02-18 02:23:05 +0000
commit957246c9e6c47389c40079995d73eebcc659fb29 (patch)
tree0abb0e86445192958a251fd636adcc436195cf74
parent4dde95dacc64b35aa9882fcbd0a847355d130734 (diff)
downloadillumos-gate-957246c9e6c47389c40079995d73eebcc659fb29.tar.gz
14456 bhyve needs fpu import/export
Reviewed by: Dan Cross <cross@oxidecomputer.com> Reviewed by: C Fraire <cfraire@me.com> Reviewed by: Robert Mustacchi <rm@fingolfin.org> Approved by: Dan McDonald <danmcd@joyent.com>
-rw-r--r--usr/src/cmd/bhyvectl/Makefile3
-rw-r--r--usr/src/cmd/bhyvectl/bhyvectl.c100
-rw-r--r--usr/src/compat/bhyve/amd64/machine/fpu.h28
-rw-r--r--usr/src/compat/bhyve/amd64/machine/pcb.h21
-rw-r--r--usr/src/pkg/manifests/system-bhyve-tests.p5m1
-rw-r--r--usr/src/test/bhyve-tests/runfiles/default.run2
-rw-r--r--usr/src/test/bhyve-tests/tests/vmm/Makefile3
-rw-r--r--usr/src/test/bhyve-tests/tests/vmm/common.c5
-rw-r--r--usr/src/test/bhyve-tests/tests/vmm/common.h2
-rw-r--r--usr/src/test/bhyve-tests/tests/vmm/fpu_getset.c333
-rw-r--r--usr/src/test/bhyve-tests/tests/vmm/mem_partial.c5
-rw-r--r--usr/src/test/bhyve-tests/tests/vmm/mem_seg_map.c5
-rw-r--r--usr/src/uts/i86pc/io/vmm/intel/vmx_msr.c1
-rw-r--r--usr/src/uts/i86pc/io/vmm/sys/vmm_kernel.h4
-rw-r--r--usr/src/uts/i86pc/io/vmm/vmm.c69
-rw-r--r--usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c95
-rw-r--r--usr/src/uts/i86pc/io/vmm/vmm_sol_glue.c62
-rw-r--r--usr/src/uts/i86pc/os/hma_fpu.c264
-rw-r--r--usr/src/uts/i86pc/sys/hma.h38
-rw-r--r--usr/src/uts/i86pc/sys/vmm_dev.h22
-rw-r--r--usr/src/uts/intel/os/archdep.c2
-rw-r--r--usr/src/uts/intel/os/fpu.c41
-rw-r--r--usr/src/uts/intel/sys/fp.h45
23 files changed, 967 insertions, 184 deletions
diff --git a/usr/src/cmd/bhyvectl/Makefile b/usr/src/cmd/bhyvectl/Makefile
index 01d331c823..486f39da31 100644
--- a/usr/src/cmd/bhyvectl/Makefile
+++ b/usr/src/cmd/bhyvectl/Makefile
@@ -35,6 +35,9 @@ CPPFLAGS = -I$(COMPAT)/bhyve -I$(CONTRIB)/bhyve \
-I$(SRC)/uts/i86pc
LDLIBS += -lvmmapi
+# Force c99 for everything
+CSTD= $(CSTD_GNU99)
+
CERRWARN += -_gcc=-Wno-uninitialized
# main() is too hairy for smatch
diff --git a/usr/src/cmd/bhyvectl/bhyvectl.c b/usr/src/cmd/bhyvectl/bhyvectl.c
index 4fc6ddc251..cbe779a4ea 100644
--- a/usr/src/cmd/bhyvectl/bhyvectl.c
+++ b/usr/src/cmd/bhyvectl/bhyvectl.c
@@ -39,7 +39,7 @@
*
* Copyright 2015 Pluribus Networks Inc.
* Copyright 2019 Joyent, Inc.
- * Copyright 2021 Oxide Computer Company
+ * Copyright 2022 Oxide Computer Company
*/
#include <sys/cdefs.h>
@@ -51,6 +51,9 @@ __FBSDID("$FreeBSD$");
#include <sys/errno.h>
#include <sys/mman.h>
#include <sys/cpuset.h>
+#ifndef __FreeBSD__
+#include <sys/fp.h>
+#endif /* __FreeBSD__ */
#include <stdio.h>
#include <stdlib.h>
@@ -312,6 +315,7 @@ static int get_cpu_topology;
#ifndef __FreeBSD__
static int pmtmr_port;
static int wrlock_cycle;
+static int get_fpu;
#endif
/*
@@ -1534,6 +1538,7 @@ setup_options(bool cpu_intel)
#ifndef __FreeBSD__
{ "pmtmr-port", REQ_ARG, 0, PMTMR_PORT },
{ "wrlock-cycle", NO_ARG, &wrlock_cycle, 1 },
+ { "get-fpu", NO_ARG, &get_fpu, 1 },
#endif
};
@@ -1752,6 +1757,93 @@ show_memseg(struct vmctx *ctx)
}
}
+#ifndef __FreeBSD__
+static int
+show_fpu(struct vmctx *ctx, int vcpu)
+{
+ int res, fd;
+
+ struct vm_fpu_desc_entry entries[64];
+ struct vm_fpu_desc desc = {
+ .vfd_entry_data = entries,
+ .vfd_num_entries = 64,
+ };
+ fd = vm_get_device_fd(ctx);
+ res = ioctl(fd, VM_DESC_FPU_AREA, &desc);
+ if (res != 0) {
+ return (errno);
+ }
+ for (uint_t i = 0; i < desc.vfd_num_entries; i++) {
+ const struct vm_fpu_desc_entry *entry = &entries[i];
+
+ /* confirm that AVX fields are where we expect */
+ if (entry->vfde_feature == XFEATURE_AVX) {
+ if (entry->vfde_size != 0x100 ||
+ entry->vfde_off != 0x240) {
+ (void) fprintf(stderr,
+ "show_fpu: unexpected AVX size/placement "
+ "- size:%x off:%x\n",
+ entry->vfde_size, entry->vfde_off);
+ return (EINVAL);
+ }
+ }
+ }
+ void *buf = malloc(desc.vfd_req_size);
+ if (buf == NULL) {
+ return (ENOMEM);
+ }
+ struct vm_fpu_state req = {
+ .vcpuid = vcpu,
+ .buf = buf,
+ .len = desc.vfd_req_size,
+ };
+ res = ioctl(fd, VM_GET_FPU, &req);
+ if (res != 0) {
+ res = errno;
+ free(buf);
+ return (res);
+ }
+
+ const struct xsave_state *state = buf;
+ const struct fxsave_state *fx = &state->xs_fxsave;
+ (void) printf("fpu_fcw[%d]\t\t0x%04x\n", vcpu, fx->fx_fcw);
+ (void) printf("fpu_fsw[%d]\t\t0x%04x\n", vcpu, fx->fx_fsw);
+ (void) printf("fpu_ftw[%d]\t\t0x%04x\n", vcpu, fx->fx_fctw);
+ (void) printf("fpu_fop[%d]\t\t0x%04x\n", vcpu, fx->fx_fop);
+ (void) printf("fpu_rip[%d]\t\t0x%016lx\n", vcpu, fx->fx_rip);
+ (void) printf("fpu_rdp[%d]\t\t0x%016lx\n", vcpu, fx->fx_rdp);
+ (void) printf("fpu_mxcsr[%d]\t\t0x%08x\n", vcpu, fx->fx_mxcsr);
+ (void) printf("fpu_mxcsr_mask[%d]\t0x%08x\n", vcpu,
+ fx->fx_mxcsr_mask);
+ /* ST/MMX regs */
+ for (uint_t i = 0; i < 8; i++) {
+ (void) printf("fpu_st%u[%d]\t\t0x%08x%08x%08x%08x\n", vcpu, i,
+ fx->fx_st[i].__fpr_pad[0], fx->fx_st[i].__fpr_pad[1],
+ fx->fx_st[i].__fpr_pad[2], fx->fx_st[i].__fpr_pad[3]);
+ }
+ /* SSE regs */
+ for (uint_t i = 0; i < 16; i++) {
+ (void) printf("fpu_xmm%u[%d]\t\t0x%08x%08x%08x%08x\n",
+ i, vcpu,
+ fx->fx_xmm[i]._l[0], fx->fx_xmm[i]._l[1],
+ fx->fx_xmm[i]._l[2], fx->fx_xmm[i]._l[3]);
+ }
+
+ if (state->xs_header.xsh_xstate_bv & XFEATURE_AVX) {
+ /* AVX regs */
+ for (uint_t i = 0; i < 16; i++) {
+ (void) printf("fpu_ymm%u[%d]\t\t0x%08x%08x%08x%08x\n",
+ i, vcpu,
+ state->xs_ymm[i]._l[0], state->xs_ymm[i]._l[1],
+ state->xs_ymm[i]._l[2], state->xs_ymm[i]._l[3]);
+ }
+ }
+
+ free(buf);
+ return (0);
+}
+#endif /*__FreeBSD__ */
+
int
main(int argc, char *argv[])
{
@@ -2150,6 +2242,12 @@ main(int argc, char *argv[])
if (!error)
error = get_all_segments(ctx, vcpu);
+#ifndef __FreeBSD__
+ if (!error && (get_fpu || get_all)) {
+ error = show_fpu(ctx, vcpu);
+ }
+#endif /* __FreeBSD__ */
+
if (!error) {
if (cpu_intel)
error = get_misc_vmcs(ctx, vcpu);
diff --git a/usr/src/compat/bhyve/amd64/machine/fpu.h b/usr/src/compat/bhyve/amd64/machine/fpu.h
deleted file mode 100644
index 6bc651d996..0000000000
--- a/usr/src/compat/bhyve/amd64/machine/fpu.h
+++ /dev/null
@@ -1,28 +0,0 @@
-/*
- * This file and its contents are supplied under the terms of the
- * Common Development and Distribution License ("CDDL"), version 1.0.
- * You may only use this file in accordance with the terms of version
- * 1.0 of the CDDL.
- *
- * A full copy of the text of the CDDL should have accompanied this
- * source. A copy of the CDDL is also available via the Internet at
- * http://www.illumos.org/license/CDDL.
- */
-
-/*
- * Copyright 2014 Pluribus Networks Inc.
- * Copyright (c) 2018, Joyent, Inc.
- */
-
-#ifndef _COMPAT_FREEBSD_AMD64_MACHINE_FPU_H_
-#define _COMPAT_FREEBSD_AMD64_MACHINE_FPU_H_
-
-void fpuexit(kthread_t *td);
-void fpurestore(void *);
-void fpusave(void *);
-
-struct savefpu *fpu_save_area_alloc(void);
-void fpu_save_area_free(struct savefpu *fsa);
-void fpu_save_area_reset(struct savefpu *fsa);
-
-#endif /* _COMPAT_FREEBSD_AMD64_MACHINE_FPU_H_ */
diff --git a/usr/src/compat/bhyve/amd64/machine/pcb.h b/usr/src/compat/bhyve/amd64/machine/pcb.h
deleted file mode 100644
index 75b5de640c..0000000000
--- a/usr/src/compat/bhyve/amd64/machine/pcb.h
+++ /dev/null
@@ -1,21 +0,0 @@
-/*
- * This file and its contents are supplied under the terms of the
- * Common Development and Distribution License ("CDDL"), version 1.0.
- * You may only use this file in accordance with the terms of version
- * 1.0 of the CDDL.
- *
- * A full copy of the text of the CDDL should have accompanied this
- * source. A copy of the CDDL is also available via the Internet at
- * http://www.illumos.org/license/CDDL.
- */
-
-/*
- * Copyright 2014 Pluribus Networks Inc.
- */
-
-#ifndef _COMPAT_FREEBSD_AMD64_MACHINE_PCB_H_
-#define _COMPAT_FREEBSD_AMD64_MACHINE_PCB_H_
-
-#include <machine/fpu.h>
-
-#endif /* _COMPAT_FREEBSD_AMD64_MACHINE_PCB_H_ */
diff --git a/usr/src/pkg/manifests/system-bhyve-tests.p5m b/usr/src/pkg/manifests/system-bhyve-tests.p5m
index 5b4a7351c4..823ed69a60 100644
--- a/usr/src/pkg/manifests/system-bhyve-tests.p5m
+++ b/usr/src/pkg/manifests/system-bhyve-tests.p5m
@@ -38,6 +38,7 @@ file path=opt/bhyve-tests/tests/mevent/read_requeue mode=0555
file path=opt/bhyve-tests/tests/mevent/vnode_file mode=0555
file path=opt/bhyve-tests/tests/mevent/vnode_zvol mode=0555
dir path=opt/bhyve-tests/tests/vmm
+file path=opt/bhyve-tests/tests/vmm/fpu_getset mode=0555
file path=opt/bhyve-tests/tests/vmm/mem_partial mode=0555
file path=opt/bhyve-tests/tests/vmm/mem_seg_map mode=0555
license lic_CDDL license=lic_CDDL
diff --git a/usr/src/test/bhyve-tests/runfiles/default.run b/usr/src/test/bhyve-tests/runfiles/default.run
index babfa0f7e9..3055f3e2d8 100644
--- a/usr/src/test/bhyve-tests/runfiles/default.run
+++ b/usr/src/test/bhyve-tests/runfiles/default.run
@@ -20,7 +20,7 @@ post =
outputdir = /var/tmp/test_results
[/opt/bhyve-tests/tests/vmm]
-tests = ['mem_partial', 'mem_seg_map']
+tests = ['mem_partial', 'mem_seg_map', 'fpu_getset']
# Tests of userspace mevent system, built from cmd/bhyve
[/opt/bhyve-tests/tests/mevent]
diff --git a/usr/src/test/bhyve-tests/tests/vmm/Makefile b/usr/src/test/bhyve-tests/tests/vmm/Makefile
index c91ed9a7e4..30d06a0f6b 100644
--- a/usr/src/test/bhyve-tests/tests/vmm/Makefile
+++ b/usr/src/test/bhyve-tests/tests/vmm/Makefile
@@ -16,7 +16,8 @@ include $(SRC)/cmd/Makefile.cmd.64
include $(SRC)/test/Makefile.com
PROG = mem_partial \
- mem_seg_map
+ mem_seg_map \
+ fpu_getset
COMMON_OBJS = common.o
CLEAN_OBJS = $(PROG:%=%.o)
diff --git a/usr/src/test/bhyve-tests/tests/vmm/common.c b/usr/src/test/bhyve-tests/tests/vmm/common.c
index b7f0a30ed0..622a14c61f 100644
--- a/usr/src/test/bhyve-tests/tests/vmm/common.c
+++ b/usr/src/test/bhyve-tests/tests/vmm/common.c
@@ -23,12 +23,13 @@
#include <vmmapi.h>
struct vmctx *
-create_test_vm(void)
+create_test_vm(const char *test_suite_name)
{
char name[VM_MAX_NAMELEN];
int res;
- (void) snprintf(name, sizeof (name), "bhyve-test-memmap-%d", getpid());
+ (void) snprintf(name, sizeof (name), "bhyve-test-%s-%d",
+ test_suite_name, getpid());
res = vm_create(name, 0);
if (res != 0) {
diff --git a/usr/src/test/bhyve-tests/tests/vmm/common.h b/usr/src/test/bhyve-tests/tests/vmm/common.h
index 7b64574cf2..f210408b71 100644
--- a/usr/src/test/bhyve-tests/tests/vmm/common.h
+++ b/usr/src/test/bhyve-tests/tests/vmm/common.h
@@ -16,7 +16,7 @@
#ifndef _COMMON_H_
#define _COMMON_H_
-struct vmctx *create_test_vm(void);
+struct vmctx *create_test_vm(const char *);
int alloc_memseg(struct vmctx *, int, size_t, const char *);
#define PROT_ALL (PROT_READ | PROT_WRITE | PROT_EXEC)
diff --git a/usr/src/test/bhyve-tests/tests/vmm/fpu_getset.c b/usr/src/test/bhyve-tests/tests/vmm/fpu_getset.c
new file mode 100644
index 0000000000..814e15dec3
--- /dev/null
+++ b/usr/src/test/bhyve-tests/tests/vmm/fpu_getset.c
@@ -0,0 +1,333 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2022 Oxide Computer Company
+ */
+
+
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <stropts.h>
+#include <strings.h>
+#include <signal.h>
+#include <setjmp.h>
+#include <libgen.h>
+#include <sys/debug.h>
+#include <sys/fp.h>
+
+#include <sys/vmm.h>
+#include <sys/vmm_dev.h>
+#include <sys/x86_archext.h>
+#include <vmmapi.h>
+
+#include "common.h"
+
+/* Minimal xsave state area (sans any AVX storage) */
+struct xsave_min {
+ struct fxsave_state legacy;
+ struct xsave_header header;
+};
+
+CTASSERT(sizeof (struct xsave_min) == MIN_XSAVE_SIZE);
+
+struct avx_state {
+ /* 16 x 128-bit: high portions of the ymm registers */
+ uint64_t ymm[32];
+};
+
+static bool
+get_fpu(int fd, struct vm_fpu_state *req)
+{
+ int res = ioctl(fd, VM_GET_FPU, req);
+ if (res != 0) {
+ perror("could not read FPU for vCPU");
+ return (false);
+ }
+ return (true);
+}
+
+static bool
+set_fpu(int fd, struct vm_fpu_state *req)
+{
+ int res = ioctl(fd, VM_SET_FPU, req);
+ if (res != 0) {
+ perror("could not write FPU for vCPU");
+ return (false);
+ }
+ return (true);
+}
+
+static bool
+check_sse(int fd, const struct vm_fpu_desc *desc, void *fpu_area,
+ size_t fpu_size)
+{
+ /* Make sure the x87/MMX/SSE state is described as present */
+ bool found_fp = false, found_sse = false;
+ for (uint_t i = 0; i < desc->vfd_num_entries; i++) {
+ const struct vm_fpu_desc_entry *ent = &desc->vfd_entry_data[i];
+
+ switch (ent->vfde_feature) {
+ case XFEATURE_LEGACY_FP:
+ found_fp = true;
+ if (ent->vfde_off != 0 ||
+ ent->vfde_size != sizeof (struct fxsave_state)) {
+ (void) fprintf(stderr,
+ "unexpected entity for %x: "
+ "size=%x off=%x\n", ent->vfde_feature,
+ ent->vfde_size, ent->vfde_off);
+ return (false);
+ }
+ break;
+ case XFEATURE_SSE:
+ found_sse = true;
+ if (ent->vfde_off != 0 ||
+ ent->vfde_size != sizeof (struct fxsave_state)) {
+ (void) fprintf(stderr,
+ "unexpected entity for %x: "
+ "size=%x off=%x\n", ent->vfde_feature,
+ ent->vfde_size, ent->vfde_off);
+ return (false);
+ }
+ break;
+ }
+ }
+
+ if (!found_fp || !found_sse) {
+ (void) fprintf(stderr, "did not find x87 and SSE area "
+ "descriptors as expected in initial FPU\n");
+ return (false);
+ }
+
+ struct vm_fpu_state req = {
+ .vcpuid = 0,
+ .buf = fpu_area,
+ .len = fpu_size,
+ };
+
+ if (!get_fpu(fd, &req)) {
+ return (false);
+ }
+
+ struct xsave_min *xs = fpu_area;
+ /*
+ * Executing this test on a freshly-created instance, we expect the FPU
+ * to only have the legacy and SSE features present in its active state.
+ */
+ if (xs->header.xsh_xstate_bv != (XFEATURE_LEGACY_FP | XFEATURE_SSE)) {
+ (void) fprintf(stderr, "bad xstate_bv %lx, expected %lx",
+ xs->header.xsh_xstate_bv,
+ (XFEATURE_LEGACY_FP | XFEATURE_SSE));
+ return (false);
+ }
+
+ /* load some SSE values to check for a get/set cycle */
+ uint64_t *xmm = (void *)&xs->legacy.fx_xmm[0];
+ xmm[0] = UINT64_MAX;
+ xmm[2] = 1;
+
+ if (!set_fpu(fd, &req)) {
+ return (false);
+ }
+
+ /* check that those values made it in/out of the guest FPU */
+ bzero(fpu_area, fpu_size);
+ if (!get_fpu(fd, &req)) {
+ return (false);
+ }
+ if (xmm[0] != UINT64_MAX || xmm[2] != 1) {
+ (void) fprintf(stderr, "SSE test registers not saved\n");
+ return (false);
+ }
+
+ /* Make sure that a bogus MXCSR value is rejected */
+ xs->legacy.fx_mxcsr = UINT32_MAX;
+ int res = ioctl(fd, VM_SET_FPU, &req);
+ if (res == 0) {
+ (void) fprintf(stderr,
+ "write of invalid MXCSR erroneously allowed\n");
+ return (false);
+ }
+
+ return (true);
+}
+
+static bool
+check_avx(int fd, const struct vm_fpu_desc *desc, void *fpu_area,
+ size_t fpu_size)
+{
+ bool found_avx = false;
+ size_t avx_size, avx_off;
+ for (uint_t i = 0; i < desc->vfd_num_entries; i++) {
+ const struct vm_fpu_desc_entry *ent = &desc->vfd_entry_data[i];
+
+ if (ent->vfde_feature == XFEATURE_AVX) {
+ found_avx = true;
+ avx_size = ent->vfde_size;
+ avx_off = ent->vfde_off;
+ break;
+ }
+ }
+
+ if (!found_avx) {
+ (void) printf("AVX capability not found on host CPU, "
+ "skipping related tests\n");
+ return (true);
+ }
+
+ if (avx_size != sizeof (struct avx_state)) {
+ (void) fprintf(stderr, "unexpected AVX state size: %x, "
+ "expected %x\n", avx_size, sizeof (struct avx_state));
+ return (false);
+ }
+ if ((avx_off + avx_size) > fpu_size) {
+ (void) fprintf(stderr, "AVX data falls outside fpu size: "
+ "%x > %x\n", avx_off + avx_size, fpu_size);
+ return (false);
+ }
+
+ struct xsave_min *xs = fpu_area;
+ struct avx_state *avx = fpu_area + avx_off;
+
+ /* do a simple data round-trip */
+ struct vm_fpu_state req = {
+ .vcpuid = 0,
+ .buf = fpu_area,
+ .len = fpu_size,
+ };
+ if (!get_fpu(fd, &req)) {
+ return (false);
+ }
+
+ /* With AVX unused so far, we expect it to be absent from the BV */
+ if (xs->header.xsh_xstate_bv != (XFEATURE_LEGACY_FP | XFEATURE_SSE)) {
+ (void) fprintf(stderr, "bad xstate_bv %lx, expected %lx\n",
+ xs->header.xsh_xstate_bv,
+ (XFEATURE_LEGACY_FP | XFEATURE_SSE));
+ return (false);
+ }
+
+ avx->ymm[0] = UINT64_MAX;
+ avx->ymm[2] = 2;
+
+ /* first write without asserting AVX in BV */
+ if (!set_fpu(fd, &req)) {
+ return (false);
+ }
+
+ /* And check that the AVX state stays empty */
+ bzero(fpu_area, fpu_size);
+ if (!get_fpu(fd, &req)) {
+ return (false);
+ }
+ if (xs->header.xsh_xstate_bv != (XFEATURE_LEGACY_FP | XFEATURE_SSE)) {
+ (void) fprintf(stderr, "xstate_bv changed unexpectedly %lx\n",
+ xs->header.xsh_xstate_bv);
+ return (false);
+ }
+ if (avx->ymm[0] != 0 || avx->ymm[2] != 0) {
+ (void) fprintf(stderr, "YMM state changed unexpectedly "
+ "%lx %lx\n", avx->ymm[0], avx->ymm[2]);
+ return (false);
+ }
+
+ /* Now write YMM and set the appropriate AVX BV state */
+ avx->ymm[0] = UINT64_MAX;
+ avx->ymm[2] = 2;
+ xs->header.xsh_xstate_bv |= XFEATURE_AVX;
+ if (!set_fpu(fd, &req)) {
+ return (false);
+ }
+
+ /* ... and now check that it stuck */
+ bzero(fpu_area, fpu_size);
+ if (!get_fpu(fd, &req)) {
+ return (false);
+ }
+ if ((xs->header.xsh_xstate_bv & XFEATURE_AVX) == 0) {
+ (void) fprintf(stderr, "AVX missing from xstate_bv %lx\n",
+ xs->header.xsh_xstate_bv);
+ return (false);
+ }
+ if (avx->ymm[0] != UINT64_MAX || avx->ymm[2] != 2) {
+ (void) fprintf(stderr, "YMM state not preserved "
+ "%lx != %lx | %lx != %lx\n",
+ avx->ymm[0], UINT64_MAX, avx->ymm[2], 2);
+ return (false);
+ }
+
+
+ return (true);
+}
+
+int
+main(int argc, char *argv[])
+{
+ struct vmctx *ctx;
+ int res, fd;
+ const char *suite_name = basename(argv[0]);
+
+ ctx = create_test_vm(suite_name);
+ if (ctx == NULL) {
+ perror("could not open test VM");
+ return (EXIT_FAILURE);
+ }
+ fd = vm_get_device_fd(ctx);
+
+ struct vm_fpu_desc_entry entries[64];
+ struct vm_fpu_desc desc = {
+ .vfd_entry_data = entries,
+ .vfd_num_entries = 64,
+ };
+
+ res = ioctl(fd, VM_DESC_FPU_AREA, &desc);
+ if (res != 0) {
+ perror("could not query fpu area description");
+ goto bail;
+ }
+
+ /* Make sure the XSAVE area described for this machine is reasonable */
+ if (desc.vfd_num_entries == 0) {
+ (void) fprintf(stderr, "no FPU description entries found\n");
+ goto bail;
+ }
+ if (desc.vfd_req_size < MIN_XSAVE_SIZE) {
+ (void) fprintf(stderr, "required XSAVE size %lu < "
+ "expected %lu\n", desc.vfd_req_size, MIN_XSAVE_SIZE);
+ goto bail;
+ }
+
+ const size_t fpu_size = desc.vfd_req_size;
+ void *fpu_area = malloc(fpu_size);
+ if (fpu_area == NULL) {
+ perror("could not allocate fpu area");
+ goto bail;
+ }
+ bzero(fpu_area, fpu_size);
+
+ if (!check_sse(fd, &desc, fpu_area, fpu_size)) {
+ goto bail;
+ }
+ if (!check_avx(fd, &desc, fpu_area, fpu_size)) {
+ goto bail;
+ }
+
+ /* mission accomplished */
+ vm_destroy(ctx);
+ (void) printf("%s\tPASS\n", suite_name);
+ return (EXIT_SUCCESS);
+
+bail:
+ vm_destroy(ctx);
+ (void) printf("%s\tFAIL\n", suite_name);
+ return (EXIT_FAILURE);
+}
diff --git a/usr/src/test/bhyve-tests/tests/vmm/mem_partial.c b/usr/src/test/bhyve-tests/tests/vmm/mem_partial.c
index b410c673ab..964fdf95c5 100644
--- a/usr/src/test/bhyve-tests/tests/vmm/mem_partial.c
+++ b/usr/src/test/bhyve-tests/tests/vmm/mem_partial.c
@@ -57,8 +57,9 @@ main(int argc, char *argv[])
struct vmctx *ctx;
int res, fd;
void *guest_mem;
+ const char *suite_name = basename(argv[0]);
- ctx = create_test_vm();
+ ctx = create_test_vm(suite_name);
if (ctx == NULL) {
perror("could open test VM");
return (1);
@@ -192,7 +193,7 @@ main(int argc, char *argv[])
}
/* mission accomplished */
- (void) printf("%s\tPASS\n", basename(argv[0]));
+ (void) printf("%s\tPASS\n", suite_name);
vm_destroy(ctx);
return (0);
diff --git a/usr/src/test/bhyve-tests/tests/vmm/mem_seg_map.c b/usr/src/test/bhyve-tests/tests/vmm/mem_seg_map.c
index e80f18547e..92d90bbf28 100644
--- a/usr/src/test/bhyve-tests/tests/vmm/mem_seg_map.c
+++ b/usr/src/test/bhyve-tests/tests/vmm/mem_seg_map.c
@@ -40,8 +40,9 @@ main(int argc, char *argv[])
struct vmctx *ctx;
int res, fd;
void *seg_obj, *guest_mem;
+ const char *suite_name = basename(argv[0]);
- ctx = create_test_vm();
+ ctx = create_test_vm(suite_name);
if (ctx == NULL) {
perror("could open test VM");
return (1);
@@ -129,7 +130,7 @@ main(int argc, char *argv[])
/* mission accomplished */
vm_destroy(ctx);
- (void) printf("%s\tPASS\n", basename(argv[0]));
+ (void) printf("%s\tPASS\n", suite_name);
return (0);
bail:
diff --git a/usr/src/uts/i86pc/io/vmm/intel/vmx_msr.c b/usr/src/uts/i86pc/io/vmm/intel/vmx_msr.c
index cf00426300..844e8b9708 100644
--- a/usr/src/uts/i86pc/io/vmm/intel/vmx_msr.c
+++ b/usr/src/uts/i86pc/io/vmm/intel/vmx_msr.c
@@ -42,7 +42,6 @@ __FBSDID("$FreeBSD$");
#include <machine/clock.h>
#include <machine/cpufunc.h>
#include <machine/md_var.h>
-#include <machine/pcb.h>
#include <machine/specialreg.h>
#include <machine/vmm.h>
diff --git a/usr/src/uts/i86pc/io/vmm/sys/vmm_kernel.h b/usr/src/uts/i86pc/io/vmm/sys/vmm_kernel.h
index 7584213d39..e94f7a876b 100644
--- a/usr/src/uts/i86pc/io/vmm/sys/vmm_kernel.h
+++ b/usr/src/uts/i86pc/io/vmm/sys/vmm_kernel.h
@@ -39,7 +39,7 @@
*
* Copyright 2015 Pluribus Networks Inc.
* Copyright 2019 Joyent, Inc.
- * Copyright 2021 Oxide Computer Company
+ * Copyright 2022 Oxide Computer Company
* Copyright 2021 OmniOS Community Edition (OmniOSce) Association.
*/
@@ -161,6 +161,8 @@ int vm_get_run_state(struct vm *vm, int vcpuid, uint32_t *state,
uint8_t *sipi_vec);
int vm_set_run_state(struct vm *vm, int vcpuid, uint32_t state,
uint8_t sipi_vec);
+int vm_get_fpu(struct vm *vm, int vcpuid, void *buf, size_t len);
+int vm_set_fpu(struct vm *vm, int vcpuid, void *buf, size_t len);
int vm_run(struct vm *vm, int vcpuid, const struct vm_entry *);
int vm_suspend(struct vm *vm, enum vm_suspend_how how);
int vm_inject_nmi(struct vm *vm, int vcpu);
diff --git a/usr/src/uts/i86pc/io/vmm/vmm.c b/usr/src/uts/i86pc/io/vmm/vmm.c
index 16acc1ea2c..78a810880d 100644
--- a/usr/src/uts/i86pc/io/vmm/vmm.c
+++ b/usr/src/uts/i86pc/io/vmm/vmm.c
@@ -60,8 +60,8 @@ __FBSDID("$FreeBSD$");
#include <sys/sched.h>
#include <sys/systm.h>
#include <sys/sunddi.h>
+#include <sys/hma.h>
-#include <machine/pcb.h>
#include <machine/md_var.h>
#include <x86/psl.h>
#include <x86/apicreg.h>
@@ -132,7 +132,7 @@ struct vcpu {
int exc_errcode_valid;
uint32_t exc_errcode;
uint8_t sipi_vector; /* (i) SIPI vector */
- struct savefpu *guestfpu; /* (a,i) guest fpu state */
+ hma_fpu_t *guestfpu; /* (a,i) guest fpu state */
uint64_t guest_xcr0; /* (i) guest %xcr0 register */
void *stats; /* (a,i) statistics */
struct vm_exit exitinfo; /* (x) exit reason and collateral */
@@ -318,7 +318,8 @@ vcpu_cleanup(struct vm *vm, int i, bool destroy)
VLAPIC_CLEANUP(vm->cookie, vcpu->vlapic);
if (destroy) {
vmm_stat_free(vcpu->stats);
- fpu_save_area_free(vcpu->guestfpu);
+ hma_fpu_free(vcpu->guestfpu);
+ vcpu->guestfpu = NULL;
vie_free(vcpu->vie_ctx);
vcpu->vie_ctx = NULL;
vmc_destroy(vcpu->vmclient);
@@ -342,7 +343,7 @@ vcpu_init(struct vm *vm, int vcpu_id, bool create)
vcpu->state = VCPU_IDLE;
vcpu->hostcpu = NOCPU;
vcpu->lastloccpu = NOCPU;
- vcpu->guestfpu = fpu_save_area_alloc();
+ vcpu->guestfpu = hma_fpu_alloc(KM_SLEEP);
vcpu->stats = vmm_stat_alloc();
vcpu->vie_ctx = vie_alloc();
@@ -369,7 +370,7 @@ vcpu_init(struct vm *vm, int vcpu_id, bool create)
vcpu->extint_pending = 0;
vcpu->exception_pending = 0;
vcpu->guest_xcr0 = XFEATURE_ENABLED_X87;
- fpu_save_area_reset(vcpu->guestfpu);
+ hma_fpu_init(vcpu->guestfpu);
vmm_stat_init(vcpu->stats);
vcpu->tsc_offset = 0;
}
@@ -1168,6 +1169,50 @@ vm_set_seg_desc(struct vm *vm, int vcpu, int reg, const struct seg_desc *desc)
return (VMSETDESC(vm->cookie, vcpu, reg, desc));
}
+static int
+translate_hma_xsave_result(hma_fpu_xsave_result_t res)
+{
+ switch (res) {
+ case HFXR_OK:
+ return (0);
+ case HFXR_NO_SPACE:
+ return (ENOSPC);
+ case HFXR_BAD_ALIGN:
+ case HFXR_UNSUP_FMT:
+ case HFXR_UNSUP_FEAT:
+ case HFXR_INVALID_DATA:
+ return (EINVAL);
+ default:
+ panic("unexpected xsave result");
+ }
+}
+
+int
+vm_get_fpu(struct vm *vm, int vcpuid, void *buf, size_t len)
+{
+ if (vcpuid < 0 || vcpuid >= vm->maxcpus)
+ return (EINVAL);
+
+ struct vcpu *vcpu = &vm->vcpu[vcpuid];
+ hma_fpu_xsave_result_t res;
+
+ res = hma_fpu_get_xsave_state(vcpu->guestfpu, buf, len);
+ return (translate_hma_xsave_result(res));
+}
+
+int
+vm_set_fpu(struct vm *vm, int vcpuid, void *buf, size_t len)
+{
+ if (vcpuid < 0 || vcpuid >= vm->maxcpus)
+ return (EINVAL);
+
+ struct vcpu *vcpu = &vm->vcpu[vcpuid];
+ hma_fpu_xsave_result_t res;
+
+ res = hma_fpu_set_xsave_state(vcpu->guestfpu, buf, len);
+ return (translate_hma_xsave_result(res));
+}
+
int
vm_get_run_state(struct vm *vm, int vcpuid, uint32_t *state, uint8_t *sipi_vec)
{
@@ -1220,13 +1265,9 @@ vm_track_dirty_pages(struct vm *vm, uint64_t gpa, size_t len, uint8_t *bitmap)
static void
restore_guest_fpustate(struct vcpu *vcpu)
{
-
- /* flush host state to the pcb */
- fpuexit(curthread);
-
- /* restore guest FPU state */
+ /* Save host FPU and restore guest FPU */
fpu_stop_emulating();
- fpurestore(vcpu->guestfpu);
+ hma_fpu_start_guest(vcpu->guestfpu);
/* restore guest XCR0 if XSAVE is enabled in the host */
if (rcr4() & CR4_XSAVE)
@@ -1252,9 +1293,9 @@ save_guest_fpustate(struct vcpu *vcpu)
load_xcr(0, vmm_get_host_xcr0());
}
- /* save guest FPU state */
+ /* save guest FPU and restore host FPU */
fpu_stop_emulating();
- fpusave(vcpu->guestfpu);
+ hma_fpu_stop_guest(vcpu->guestfpu);
/*
* When the host state has been restored, we should not re-enable
* CR0.TS on illumos for eager FPU.
@@ -2912,7 +2953,7 @@ vcpu_arch_reset(struct vm *vm, int vcpuid, bool init_only)
*/
if (!init_only) {
vcpu->guest_xcr0 = XFEATURE_ENABLED_X87;
- fpu_save_area_reset(vcpu->guestfpu);
+ hma_fpu_init(vcpu->guestfpu);
/* XXX: clear MSRs and other pieces */
}
diff --git a/usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c b/usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c
index a83989e9eb..4ef2e5f583 100644
--- a/usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c
+++ b/usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c
@@ -414,6 +414,8 @@ vmmdev_do_ioctl(vmm_softc_t *sc, int cmd, intptr_t arg, int md,
case VM_RESET_CPU:
case VM_GET_RUN_STATE:
case VM_SET_RUN_STATE:
+ case VM_GET_FPU:
+ case VM_SET_FPU:
/*
* Copy in the ID of the vCPU chosen for this operation.
* Since a nefarious caller could update their struct between
@@ -469,6 +471,7 @@ vmmdev_do_ioctl(vmm_softc_t *sc, int cmd, intptr_t arg, int md,
case VM_GET_GPA_PMAP:
case VM_IOAPIC_PINCOUNT:
case VM_SUSPEND:
+ case VM_DESC_FPU_AREA:
default:
break;
}
@@ -755,6 +758,53 @@ vmmdev_do_ioctl(vmm_softc_t *sc, int cmd, intptr_t arg, int md,
}
break;
}
+ case VM_DESC_FPU_AREA: {
+ struct vm_fpu_desc desc;
+ void *buf = NULL;
+
+ if (ddi_copyin(datap, &desc, sizeof (desc), md)) {
+ error = EFAULT;
+ break;
+ }
+ if (desc.vfd_num_entries > 64) {
+ error = EINVAL;
+ break;
+ }
+ const size_t buf_sz = sizeof (struct vm_fpu_desc_entry) *
+ desc.vfd_num_entries;
+ if (buf_sz != 0) {
+ buf = kmem_zalloc(buf_sz, KM_SLEEP);
+ }
+
+ /*
+ * For now, we are depending on vm_fpu_desc_entry and
+ * hma_xsave_state_desc_t having the same format.
+ */
+ CTASSERT(sizeof (struct vm_fpu_desc_entry) ==
+ sizeof (hma_xsave_state_desc_t));
+
+ size_t req_size;
+ const uint_t max_entries = hma_fpu_describe_xsave_state(
+ (hma_xsave_state_desc_t *)buf,
+ desc.vfd_num_entries,
+ &req_size);
+
+ desc.vfd_req_size = req_size;
+ desc.vfd_num_entries = max_entries;
+ if (buf_sz != 0) {
+ if (ddi_copyout(buf, desc.vfd_entry_data, buf_sz, md)) {
+ error = EFAULT;
+ }
+ kmem_free(buf, buf_sz);
+ }
+
+ if (error == 0) {
+ if (ddi_copyout(&desc, datap, sizeof (desc), md)) {
+ error = EFAULT;
+ }
+ }
+ break;
+ }
case VM_ISA_ASSERT_IRQ: {
struct vm_isa_irq isa_irq;
@@ -1040,6 +1090,51 @@ vmmdev_do_ioctl(vmm_softc_t *sc, int cmd, intptr_t arg, int md,
vrs.sipi_vector);
break;
}
+ case VM_GET_FPU: {
+ struct vm_fpu_state req;
+ const size_t max_len = (PAGESIZE * 2);
+ void *kbuf;
+
+ if (ddi_copyin(datap, &req, sizeof (req), md)) {
+ error = EFAULT;
+ break;
+ }
+ if (req.len > max_len || req.len == 0) {
+ error = EINVAL;
+ break;
+ }
+ kbuf = kmem_zalloc(req.len, KM_SLEEP);
+ error = vm_get_fpu(sc->vmm_vm, vcpu, kbuf, req.len);
+ if (error == 0) {
+ if (ddi_copyout(kbuf, req.buf, req.len, md)) {
+ error = EFAULT;
+ }
+ }
+ kmem_free(kbuf, req.len);
+ break;
+ }
+ case VM_SET_FPU: {
+ struct vm_fpu_state req;
+ const size_t max_len = (PAGESIZE * 2);
+ void *kbuf;
+
+ if (ddi_copyin(datap, &req, sizeof (req), md)) {
+ error = EFAULT;
+ break;
+ }
+ if (req.len > max_len || req.len == 0) {
+ error = EINVAL;
+ break;
+ }
+ kbuf = kmem_alloc(req.len, KM_SLEEP);
+ if (ddi_copyin(req.buf, kbuf, req.len, md)) {
+ error = EFAULT;
+ } else {
+ error = vm_set_fpu(sc->vmm_vm, vcpu, kbuf, req.len);
+ }
+ kmem_free(kbuf, req.len);
+ break;
+ }
case VM_SET_KERNEMU_DEV:
case VM_GET_KERNEMU_DEV: {
diff --git a/usr/src/uts/i86pc/io/vmm/vmm_sol_glue.c b/usr/src/uts/i86pc/io/vmm/vmm_sol_glue.c
index f78db731d6..cdcebc71d4 100644
--- a/usr/src/uts/i86pc/io/vmm/vmm_sol_glue.c
+++ b/usr/src/uts/i86pc/io/vmm/vmm_sol_glue.c
@@ -58,7 +58,6 @@
#include <sys/x86_archext.h>
#include <machine/cpufunc.h>
-#include <machine/fpu.h>
#include <machine/md_var.h>
#include <machine/specialreg.h>
#include <machine/vmm.h>
@@ -434,67 +433,6 @@ vmm_cpuid_init(void)
cpu_exthigh = regs[0];
}
-/*
- * FreeBSD uses the struct savefpu for managing the FPU state. That is mimicked
- * by our hypervisor multiplexor framework structure.
- */
-struct savefpu *
-fpu_save_area_alloc(void)
-{
- return ((struct savefpu *)hma_fpu_alloc(KM_SLEEP));
-}
-
-void
-fpu_save_area_free(struct savefpu *fsa)
-{
- hma_fpu_t *fpu = (hma_fpu_t *)fsa;
- hma_fpu_free(fpu);
-}
-
-void
-fpu_save_area_reset(struct savefpu *fsa)
-{
- hma_fpu_t *fpu = (hma_fpu_t *)fsa;
- hma_fpu_init(fpu);
-}
-
-/*
- * This glue function is supposed to save the host's FPU state. This is always
- * paired in the general bhyve code with a call to fpusave. Therefore, we treat
- * this as a nop and do all the work in fpusave(), which will have the context
- * argument that we want anyways.
- */
-void
-fpuexit(kthread_t *td)
-{
-}
-
-/*
- * This glue function is supposed to restore the guest's FPU state from the save
- * area back to the host. In FreeBSD, it is assumed that the host state has
- * already been saved by a call to fpuexit(); however, we do both here.
- */
-void
-fpurestore(void *arg)
-{
- hma_fpu_t *fpu = arg;
-
- hma_fpu_start_guest(fpu);
-}
-
-/*
- * This glue function is supposed to save the guest's FPU state. The host's FPU
- * state is not expected to be restored necessarily due to the use of FPU
- * emulation through CR0.TS. However, we can and do restore it here.
- */
-void
-fpusave(void *arg)
-{
- hma_fpu_t *fpu = arg;
-
- hma_fpu_stop_guest(fpu);
-}
-
void
vmm_sol_glue_init(void)
{
diff --git a/usr/src/uts/i86pc/os/hma_fpu.c b/usr/src/uts/i86pc/os/hma_fpu.c
index 14cfa8baed..138af7a32a 100644
--- a/usr/src/uts/i86pc/os/hma_fpu.c
+++ b/usr/src/uts/i86pc/os/hma_fpu.c
@@ -11,6 +11,7 @@
/*
* Copyright (c) 2018, Joyent, Inc.
+ * Copyright 2022 Oxide Computer Company
*/
/*
@@ -28,6 +29,12 @@
#include <sys/hma.h>
#include <sys/x86_archext.h>
#include <sys/archsystm.h>
+#include <sys/controlregs.h>
+#include <sys/sysmacros.h>
+#include <sys/stdbool.h>
+#include <sys/ontrap.h>
+#include <sys/cpuvar.h>
+#include <sys/disp.h>
struct hma_fpu {
fpu_ctx_t hf_guest_fpu;
@@ -57,7 +64,7 @@ hma_fpu_init(hma_fpu_t *fpu)
xs = fpu->hf_guest_fpu.fpu_regs.kfpu_u.kfpu_xs;
bzero(xs, cpuid_get_xsave_size());
bcopy(&avx_initial, xs, sizeof (*xs));
- xs->xs_xstate_bv = XFEATURE_LEGACY_FP | XFEATURE_SSE;
+ xs->xs_header.xsh_xstate_bv = XFEATURE_LEGACY_FP | XFEATURE_SSE;
fpu->hf_guest_fpu.fpu_xsave_mask = XFEATURE_FP_ALL;
break;
default:
@@ -140,6 +147,36 @@ hma_fpu_start_guest(hma_fpu_t *fpu)
fpu->hf_guest_fpu.fpu_flags &= ~FPU_VALID;
}
+/*
+ * Since fp_save() assumes a thread-centric view of the FPU usage -- it will
+ * assert if attempting to save elsewhere than the thread PCB, and will elide
+ * action if the FPU is not enabled -- we cannot use it for the manual saving of
+ * FPU contents. To work around that, we call the save mechanism directly.
+ */
+static void
+do_fp_save(fpu_ctx_t *fpu)
+{
+ /*
+ * For our manual saving, we expect that the thread PCB never be the
+ * landing zone for the data.
+ */
+ ASSERT(curthread->t_lwp == NULL ||
+ fpu != &curthread->t_lwp->lwp_pcb.pcb_fpu);
+
+ switch (fp_save_mech) {
+ case FP_FXSAVE:
+ fpxsave(fpu->fpu_regs.kfpu_u.kfpu_fx);
+ break;
+ case FP_XSAVE:
+ xsavep(fpu->fpu_regs.kfpu_u.kfpu_xs, fpu->fpu_xsave_mask);
+ break;
+ default:
+ panic("Invalid fp_save_mech");
+ }
+ fpu->fpu_flags |= FPU_VALID;
+}
+
+
void
hma_fpu_stop_guest(hma_fpu_t *fpu)
{
@@ -148,29 +185,232 @@ hma_fpu_stop_guest(hma_fpu_t *fpu)
ASSERT3U(fpu->hf_guest_fpu.fpu_flags & FPU_EN, !=, 0);
ASSERT3U(fpu->hf_guest_fpu.fpu_flags & FPU_VALID, ==, 0);
+ do_fp_save(&fpu->hf_guest_fpu);
+
+ fp_restore(&curthread->t_lwp->lwp_pcb.pcb_fpu);
+
+ fpu->hf_inguest = B_FALSE;
+ fpu->hf_curthread = NULL;
+}
+
+/*
+ * Will output up to `ndesc` records into `descp`. The required size for an
+ * XSAVE area containing all of the data fields supported by the host will be
+ * placed in `req_sizep` (if non-NULL). Returns the number of feature bits
+ * supported by the host.
+ */
+uint_t
+hma_fpu_describe_xsave_state(hma_xsave_state_desc_t *descp, uint_t ndesc,
+ size_t *req_sizep)
+{
+ uint64_t features;
+
+ switch (fp_save_mech) {
+ case FP_FXSAVE:
+ /*
+ * Even without xsave support, the FPU will have legacy x87
+ * float and SSE state contained within.
+ */
+ features = XFEATURE_LEGACY_FP | XFEATURE_SSE;
+ break;
+ case FP_XSAVE:
+ features = get_xcr(XFEATURE_ENABLED_MASK);
+ break;
+ default:
+ panic("Invalid fp_save_mech");
+ }
+
+ uint_t count, pos;
+ uint_t max_size = MIN_XSAVE_SIZE;
+ for (count = 0, pos = 0; pos <= 63; pos++) {
+ const uint64_t bit = (1 << pos);
+ uint32_t size, off;
+
+ if ((features & bit) == 0) {
+ continue;
+ }
+
+ if (bit == XFEATURE_LEGACY_FP || bit == XFEATURE_SSE) {
+ size = sizeof (struct fxsave_state);
+ off = 0;
+ } else {
+ /*
+ * Size and position of data types within the XSAVE area
+ * is described in leaf 0xD in the subfunction
+ * corresponding to the bit position (for pos > 1).
+ */
+ struct cpuid_regs regs = {
+ .cp_eax = 0xD,
+ .cp_ecx = pos,
+ };
+
+ ASSERT3U(pos, >, 1);
+
+ (void) __cpuid_insn(&regs);
+ size = regs.cp_eax;
+ off = regs.cp_ebx;
+ }
+ max_size = MAX(max_size, off + size);
+
+ if (count < ndesc) {
+ hma_xsave_state_desc_t *desc = &descp[count];
+
+ desc->hxsd_bit = bit;
+ desc->hxsd_size = size;
+ desc->hxsd_off = off;
+ }
+ count++;
+ }
+ if (req_sizep != NULL) {
+ *req_sizep = max_size;
+ }
+ return (count);
+}
+
+hma_fpu_xsave_result_t
+hma_fpu_get_xsave_state(const hma_fpu_t *fpu, void *buf, size_t len)
+{
+ ASSERT(!fpu->hf_inguest);
+
+ size_t valid_len;
+ switch (fp_save_mech) {
+ case FP_FXSAVE: {
+ if (len < MIN_XSAVE_SIZE) {
+ return (HFXR_NO_SPACE);
+ }
+ bcopy(fpu->hf_guest_fpu.fpu_regs.kfpu_u.kfpu_generic, buf,
+ sizeof (struct fxsave_state));
+
+ struct xsave_header hdr = {
+ .xsh_xstate_bv = XFEATURE_LEGACY_FP | XFEATURE_SSE,
+ };
+ bcopy(&hdr, buf + sizeof (struct fxsave_state), sizeof (hdr));
+
+ break;
+ }
+ case FP_XSAVE:
+ (void) hma_fpu_describe_xsave_state(NULL, 0, &valid_len);
+ if (len < valid_len) {
+ return (HFXR_NO_SPACE);
+ }
+ bcopy(fpu->hf_guest_fpu.fpu_regs.kfpu_u.kfpu_generic, buf,
+ valid_len);
+ break;
+ default:
+ panic("Invalid fp_save_mech");
+ }
+
+ return (HFXR_OK);
+}
+
+hma_fpu_xsave_result_t
+hma_fpu_set_xsave_state(hma_fpu_t *fpu, void *buf, size_t len)
+{
+ ASSERT(!fpu->hf_inguest);
+
+ if (len < MIN_XSAVE_SIZE) {
+ return (HFXR_NO_SPACE);
+ }
+ /* 64-byte alignment is demanded of the FPU-related operations */
+ if (((uintptr_t)buf & 63) != 0) {
+ return (HFXR_BAD_ALIGN);
+ }
+
+ struct xsave_header *hdr = buf + sizeof (struct fxsave_state);
+ if (hdr->xsh_xcomp_bv != 0) {
+ /* XSAVEC formatting not supported at this time */
+ return (HFXR_UNSUP_FMT);
+ }
+
+ uint64_t allowed_bits;
+ size_t save_area_size;
+ switch (fp_save_mech) {
+ case FP_FXSAVE:
+ allowed_bits = XFEATURE_LEGACY_FP | XFEATURE_SSE;
+ save_area_size = sizeof (struct fxsave_state);
+ break;
+ case FP_XSAVE:
+ allowed_bits = get_xcr(XFEATURE_ENABLED_MASK);
+ save_area_size = cpuid_get_xsave_size();
+ break;
+ default:
+ panic("Invalid fp_save_mech");
+ }
+ if ((hdr->xsh_xstate_bv & ~(allowed_bits)) != 0) {
+ return (HFXR_UNSUP_FEAT);
+ }
+
/*
- * Note, we can't use fp_save because it assumes that we're saving to
- * the thread's PCB and not somewhere else. Because this is a different
- * FPU context, we instead have to do this ourselves.
+ * We validate the incoming state with the FPU itself prior to saving it
+ * into the guest FPU context area. In order to preserve any state
+ * currently housed in the FPU, we save it to a temporarily allocated
+ * FPU context. It is important to note that we are not following the
+ * normal rules around state management detailed in uts/intel/os/fpu.c.
+ * This saving is unconditional, uncaring about the state in the FPU or
+ * the value of CR0_TS, simplifying our process before returning to the
+ * caller (without needing to chcek of an lwp, etc). To prevent
+ * interrupting threads from encountering this unusual FPU state, we
+ * keep interrupts disabled for the duration.
*/
+ fpu_ctx_t temp_ctx = {
+ .fpu_xsave_mask = XFEATURE_FP_ALL,
+ };
+ temp_ctx.fpu_regs.kfpu_u.kfpu_generic =
+ kmem_cache_alloc(fpsave_cachep, KM_SLEEP);
+ bzero(temp_ctx.fpu_regs.kfpu_u.kfpu_generic, save_area_size);
+
+ ulong_t iflag;
+ iflag = intr_clear();
+ bool disable_when_done = (getcr0() & CR0_TS) != 0;
+ do_fp_save(&temp_ctx);
+
+ /*
+ * If the provided data is invalid, it will cause a #GP when we attempt
+ * to load it into the FPU, so protect against that with on_trap().
+ * Should the data load successfully, we can then be confident that its
+ * later use in via hma_fpu_start_guest() will be safe.
+ */
+ on_trap_data_t otd;
+ volatile hma_fpu_xsave_result_t res = HFXR_OK;
+ if (on_trap(&otd, OT_DATA_EC) != 0) {
+ res = HFXR_INVALID_DATA;
+ goto done;
+ }
+
switch (fp_save_mech) {
case FP_FXSAVE:
- fpxsave(fpu->hf_guest_fpu.fpu_regs.kfpu_u.kfpu_fx);
+ if (hdr->xsh_xstate_bv == 0) {
+ /*
+ * An empty xstate_bv means we can simply load the
+ * legacy FP/SSE area with their initial state.
+ */
+ bcopy(&sse_initial,
+ fpu->hf_guest_fpu.fpu_regs.kfpu_u.kfpu_fx,
+ sizeof (sse_initial));
+ } else {
+ fpxrestore(buf);
+ fpxsave(fpu->hf_guest_fpu.fpu_regs.kfpu_u.kfpu_fx);
+ }
break;
case FP_XSAVE:
+ xrestore(buf, XFEATURE_FP_ALL);
xsavep(fpu->hf_guest_fpu.fpu_regs.kfpu_u.kfpu_xs,
fpu->hf_guest_fpu.fpu_xsave_mask);
break;
default:
panic("Invalid fp_save_mech");
- /*NOTREACHED*/
}
- fpu->hf_guest_fpu.fpu_flags |= FPU_VALID;
- fp_restore(&curthread->t_lwp->lwp_pcb.pcb_fpu);
+done:
+ no_trap();
+ fp_restore(&temp_ctx);
+ if (disable_when_done) {
+ fpdisable();
+ }
+ intr_restore(iflag);
+ kmem_cache_free(fpsave_cachep, temp_ctx.fpu_regs.kfpu_u.kfpu_generic);
- fpu->hf_inguest = B_FALSE;
- fpu->hf_curthread = NULL;
+ return (res);
}
void
@@ -214,11 +454,11 @@ hma_fpu_set_fxsave_state(hma_fpu_t *fpu, const struct fxsave_state *fx)
gxs = fpu->hf_guest_fpu.fpu_regs.kfpu_u.kfpu_xs;
bzero(gxs, cpuid_get_xsave_size());
bcopy(fx, &gxs->xs_fxsave, sizeof (*fx));
- gxs->xs_xstate_bv = XFEATURE_LEGACY_FP | XFEATURE_SSE;
+ gxs->xs_header.xsh_xstate_bv =
+ XFEATURE_LEGACY_FP | XFEATURE_SSE;
break;
default:
panic("Invalid fp_save_mech");
- /* NOTREACHED */
}
return (0);
diff --git a/usr/src/uts/i86pc/sys/hma.h b/usr/src/uts/i86pc/sys/hma.h
index 16ab708896..e15cd60d5e 100644
--- a/usr/src/uts/i86pc/sys/hma.h
+++ b/usr/src/uts/i86pc/sys/hma.h
@@ -11,6 +11,7 @@
/*
* Copyright 2019 Joyent, Inc.
+ * Copyright 2022 Oxide Computer Company
*/
#ifndef _SYS_HMA_H
@@ -117,6 +118,43 @@ extern void hma_fpu_start_guest(hma_fpu_t *);
*/
extern void hma_fpu_stop_guest(hma_fpu_t *);
+typedef enum {
+ HFXR_OK = 0,
+ HFXR_NO_SPACE, /* buffer is not large enough */
+ HFXR_BAD_ALIGN, /* buffer is not properly (64-byte) aligned */
+ HFXR_UNSUP_FMT, /* data using unsupported (compressed) format */
+ HFXR_UNSUP_FEAT, /* data has unsupported features set */
+ HFXR_INVALID_DATA, /* CPU determined xsave data is invalid */
+} hma_fpu_xsave_result_t;
+
+/*
+ * Get and set the contents of the FPU save area, formatted as XSAVE-style
+ * information. If XSAVE is not supported by the host, the input and output
+ * values will be translated to and from the FXSAVE format. Attempts to set
+ * XSAVE values not supported by the host will result in an error.
+ *
+ * These functions cannot be called while the FPU is in use by the guest. It is
+ * up to callers to guarantee this invariant.
+ */
+extern hma_fpu_xsave_result_t hma_fpu_get_xsave_state(const hma_fpu_t *, void *,
+ size_t);
+extern hma_fpu_xsave_result_t hma_fpu_set_xsave_state(hma_fpu_t *, void *,
+ size_t);
+
+typedef struct hma_xsave_state_desc {
+ uint64_t hxsd_bit;
+ uint32_t hxsd_size;
+ uint32_t hxsd_off;
+} hma_xsave_state_desc_t;
+
+/*
+ * Get a description of the data fields supported by the host via the XSAVE APIs
+ * for getting/setting guest FPU data. See the function definition for more
+ * detailed parameter usage.
+ */
+extern uint_t hma_fpu_describe_xsave_state(hma_xsave_state_desc_t *, uint_t,
+ size_t *);
+
/*
* Get and set the contents of the FPU save area. This sets the fxsave style
* information. In all cases when this is in use, if an XSAVE state is actually
diff --git a/usr/src/uts/i86pc/sys/vmm_dev.h b/usr/src/uts/i86pc/sys/vmm_dev.h
index 3282fa86bf..027a7da214 100644
--- a/usr/src/uts/i86pc/sys/vmm_dev.h
+++ b/usr/src/uts/i86pc/sys/vmm_dev.h
@@ -302,6 +302,25 @@ struct vm_run_state {
uint8_t _pad[3];
};
+/* Transfer data for VM_GET_FPU and VM_SET_FPU */
+struct vm_fpu_state {
+ int vcpuid;
+ void *buf;
+ size_t len;
+};
+
+struct vm_fpu_desc_entry {
+ uint64_t vfde_feature;
+ uint32_t vfde_size;
+ uint32_t vfde_off;
+};
+
+struct vm_fpu_desc {
+ struct vm_fpu_desc_entry *vfd_entry_data;
+ size_t vfd_req_size;
+ uint32_t vfd_num_entries;
+};
+
struct vmm_resv_query {
size_t vrq_free_sz;
size_t vrq_alloc_sz;
@@ -370,6 +389,8 @@ struct vmm_dirty_tracker {
#define VM_RESET_CPU (VMM_CPU_IOC_BASE | 0x16)
#define VM_GET_RUN_STATE (VMM_CPU_IOC_BASE | 0x17)
#define VM_SET_RUN_STATE (VMM_CPU_IOC_BASE | 0x18)
+#define VM_GET_FPU (VMM_CPU_IOC_BASE | 0x19)
+#define VM_SET_FPU (VMM_CPU_IOC_BASE | 0x1a)
/* Operations requiring write-locking the VM */
#define VM_REINIT (VMM_LOCK_IOC_BASE | 0x01)
@@ -428,6 +449,7 @@ struct vmm_dirty_tracker {
/* Note: forces a barrier on a flush operation before returning. */
#define VM_TRACK_DIRTY_PAGES (VMM_IOC_BASE | 0x20)
+#define VM_DESC_FPU_AREA (VMM_IOC_BASE | 0x21)
#define VM_DEVMEM_GETOFFSET (VMM_IOC_BASE | 0xff)
diff --git a/usr/src/uts/intel/os/archdep.c b/usr/src/uts/intel/os/archdep.c
index 3d2996880d..9ef480a69a 100644
--- a/usr/src/uts/intel/os/archdep.c
+++ b/usr/src/uts/intel/os/archdep.c
@@ -269,7 +269,7 @@ setfpregs(klwp_t *lwp, fpregset_t *fp)
&fpu->fpu_regs.kfpu_u.kfpu_xs->xs_fxsave);
fpu->fpu_regs.kfpu_xstatus =
fp->fp_reg_set.fpchip_state.xstatus;
- fpu->fpu_regs.kfpu_u.kfpu_xs->xs_xstate_bv |=
+ fpu->fpu_regs.kfpu_u.kfpu_xs->xs_header.xsh_xstate_bv |=
(XFEATURE_LEGACY_FP | XFEATURE_SSE);
break;
default:
diff --git a/usr/src/uts/intel/os/fpu.c b/usr/src/uts/intel/os/fpu.c
index 0a9b828288..9644282429 100644
--- a/usr/src/uts/intel/os/fpu.c
+++ b/usr/src/uts/intel/os/fpu.c
@@ -22,7 +22,7 @@
* Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright 2021 Joyent, Inc.
* Copyright 2021 RackTop Systems, Inc.
- * Copyright 2021 Oxide Computer Company
+ * Copyright 2022 Oxide Computer Company
*/
/* Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. */
@@ -528,23 +528,18 @@ const struct xsave_state avx_initial = {
* The definition below needs to be identical with sse_initial
* defined above.
*/
- {
- FPU_CW_INIT, /* fx_fcw */
- 0, /* fx_fsw */
- 0, /* fx_fctw */
- 0, /* fx_fop */
- 0, /* fx_rip */
- 0, /* fx_rdp */
- SSE_MXCSR_INIT /* fx_mxcsr */
- /* rest of structure is zero */
+ .xs_fxsave = {
+ .fx_fcw = FPU_CW_INIT,
+ .fx_mxcsr = SSE_MXCSR_INIT,
+ },
+ .xs_header = {
+ /*
+ * bit0 = 1 for XSTATE_BV to indicate that legacy fields are
+ * valid, and CPU should initialize XMM/YMM.
+ */
+ .xsh_xstate_bv = 1,
+ .xsh_xcomp_bv = 0,
},
- /*
- * bit0 = 1 for XSTATE_BV to indicate that legacy fields are valid,
- * and CPU should initialize XMM/YMM.
- */
- 1,
- 0 /* xs_xcomp_bv */
- /* rest of structure is zero */
};
/*
@@ -656,8 +651,8 @@ fp_new_lwp(void *parent, void *child)
bcopy(&avx_initial, cxs, sizeof (*cxs));
cfx->fx_mxcsr = fx->fx_mxcsr & ~SSE_MXCSR_EFLAGS;
cfx->fx_fcw = fx->fx_fcw;
- cxs->xs_xstate_bv |= (get_xcr(XFEATURE_ENABLED_MASK) &
- XFEATURE_FP_INITIAL);
+ cxs->xs_header.xsh_xstate_bv |=
+ (get_xcr(XFEATURE_ENABLED_MASK) & XFEATURE_FP_INITIAL);
break;
default:
panic("Invalid fp_save_mech");
@@ -973,7 +968,8 @@ fpexterrflt(struct regs *rp)
* Always set LEGACY_FP as it may have been cleared by XSAVE
* instruction
*/
- fp->fpu_regs.kfpu_u.kfpu_xs->xs_xstate_bv |= XFEATURE_LEGACY_FP;
+ fp->fpu_regs.kfpu_u.kfpu_xs->xs_header.xsh_xstate_bv |=
+ XFEATURE_LEGACY_FP;
break;
default:
panic("Invalid fp_save_mech");
@@ -1154,7 +1150,8 @@ fpsetcw(uint16_t fcw, uint32_t mxcsr)
* Always set LEGACY_FP as it may have been cleared by XSAVE
* instruction
*/
- fp->fpu_regs.kfpu_u.kfpu_xs->xs_xstate_bv |= XFEATURE_LEGACY_FP;
+ fp->fpu_regs.kfpu_u.kfpu_xs->xs_header.xsh_xstate_bv |=
+ XFEATURE_LEGACY_FP;
break;
default:
panic("Invalid fp_save_mech");
@@ -1177,7 +1174,7 @@ kernel_fpu_fpstate_init(kfpu_state_t *kfpu)
xs = kfpu->kfpu_ctx.fpu_regs.kfpu_u.kfpu_xs;
bzero(xs, cpuid_get_xsave_size());
bcopy(&avx_initial, xs, sizeof (*xs));
- xs->xs_xstate_bv = XFEATURE_LEGACY_FP | XFEATURE_SSE;
+ xs->xs_header.xsh_xstate_bv = XFEATURE_LEGACY_FP | XFEATURE_SSE;
kfpu->kfpu_ctx.fpu_xsave_mask = XFEATURE_FP_ALL;
break;
default:
diff --git a/usr/src/uts/intel/sys/fp.h b/usr/src/uts/intel/sys/fp.h
index dfbcf7dc1c..7423444c60 100644
--- a/usr/src/uts/intel/sys/fp.h
+++ b/usr/src/uts/intel/sys/fp.h
@@ -21,6 +21,7 @@
/*
* Copyright 2015 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2018, Joyent, Inc.
+ * Copyright 2022 Oxide Computer Company
*
* Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved.
*/
@@ -230,11 +231,23 @@ struct fxsave_state {
} __aligned(16); /* 512 bytes */
/*
+ * This structure represents the header portion of the data layout used by the
+ * 'xsave' instruction variants. It is documented in section 13.4.2 of the
+ * Intel 64 and IA-32 Architectures Software Developer’s Manual, Volume 1
+ * (IASDv1). Although "header" is somewhat of a misnomer, considering the data
+ * begins at offset 512 of the xsave area, its contents dictate which portions
+ * of the area are present and how they may be formatted.
+ */
+struct xsave_header {
+ uint64_t xsh_xstate_bv;
+ uint64_t xsh_xcomp_bv;
+ uint64_t xsh_reserved[6];
+};
+
+/*
* This structure is written to memory by one of the 'xsave' instruction
* variants. The first 512 bytes are compatible with the format of the 'fxsave'
- * area. The header portion of the xsave layout is documented in section
- * 13.4.2 of the Intel 64 and IA-32 Architectures Software Developer’s Manual,
- * Volume 1 (IASDv1). The extended portion is documented in section 13.4.3.
+ * area. The extended portion is documented in section 13.4.3.
*
* Our size is at least AVX_XSAVE_SIZE (832 bytes), which is asserted
* statically. Enabling additional xsave-related CPU features requires an
@@ -245,9 +258,10 @@ struct fxsave_state {
* determined dynamically by querying the CPU. See the xsave_info structure in
* cpuid.c.
*
- * xsave component usage is tracked using bits in the xs_xstate_bv field. The
- * components are documented in section 13.1 of IASDv1. For easy reference,
- * this is a summary of the currently defined component bit definitions:
+ * xsave component usage is tracked using bits in the xstate_bv field of the
+ * header. The components are documented in section 13.1 of IASDv1. For easy
+ * reference, this is a summary of the currently defined component bit
+ * definitions:
* x87 0x0001
* SSE 0x0002
* AVX 0x0004
@@ -259,21 +273,28 @@ struct fxsave_state {
* PT 0x0100
* PKRU 0x0200
* When xsaveopt_ctxt is being used to save into the xsave_state area, the
- * xs_xstate_bv field is updated by the xsaveopt instruction to indicate which
+ * xstate_bv field is updated by the xsaveopt instruction to indicate which
* elements of the xsave area are active.
*
- * xs_xcomp_bv should always be 0, since we do not currently use the compressed
- * form of xsave (xsavec).
+ * The xcomp_bv field should always be 0, since we do not currently use the
+ * compressed form of xsave (xsavec).
*/
struct xsave_state {
struct fxsave_state xs_fxsave; /* 0-511 legacy region */
- uint64_t xs_xstate_bv; /* 512-519 start xsave header */
- uint64_t xs_xcomp_bv; /* 520-527 */
- uint64_t xs_reserved[6]; /* 528-575 end xsave header */
+ struct xsave_header xs_header; /* 512-575 XSAVE header */
upad128_t xs_ymm[16]; /* 576 AVX component */
} __aligned(64);
/*
+ * While AVX_XSTATE_SIZE is the smallest the kernel will allocate for FPU
+ * state-saving, other consumers may constrain themselves to the minimum
+ * possible xsave state structure, which features only the legacy area and the
+ * bare xsave header.
+ */
+#define MIN_XSAVE_SIZE (sizeof (struct fxsave_state) + \
+ sizeof (struct xsave_header))
+
+/*
* Kernel's FPU save area
*/
typedef struct {