From f7d0086cff41f34796511cdbb454eedbaa8ee9c8 Mon Sep 17 00:00:00 2001 From: Robert Mustacchi Date: Wed, 23 May 2018 21:56:51 +0000 Subject: OS-6992 Want hypervisor API for FPU management OS-6999 bhyve should use HMA FPU framework Reviewed by: Patrick Mooney Reviewed by: John Levon Approved by: Patrick Mooney --- usr/src/compat/freebsd/amd64/machine/fpu.h | 3 +- usr/src/uts/i86pc/Makefile.files | 1 + usr/src/uts/i86pc/io/vmm/vmm_sol_glue.c | 157 +++++--------------- usr/src/uts/i86pc/os/hma_fpu.c | 224 +++++++++++++++++++++++++++++ usr/src/uts/i86pc/sys/hma.h | 103 +++++++++++++ usr/src/uts/intel/ia32/os/archdep.c | 5 +- usr/src/uts/intel/sys/fp.h | 3 + 7 files changed, 365 insertions(+), 131 deletions(-) create mode 100644 usr/src/uts/i86pc/os/hma_fpu.c create mode 100644 usr/src/uts/i86pc/sys/hma.h diff --git a/usr/src/compat/freebsd/amd64/machine/fpu.h b/usr/src/compat/freebsd/amd64/machine/fpu.h index 48e686780c..6bc651d996 100644 --- a/usr/src/compat/freebsd/amd64/machine/fpu.h +++ b/usr/src/compat/freebsd/amd64/machine/fpu.h @@ -11,13 +11,12 @@ /* * Copyright 2014 Pluribus Networks Inc. + * Copyright (c) 2018, Joyent, Inc. */ #ifndef _COMPAT_FREEBSD_AMD64_MACHINE_FPU_H_ #define _COMPAT_FREEBSD_AMD64_MACHINE_FPU_H_ -#define XSAVE_AREA_ALIGN 64 - void fpuexit(kthread_t *td); void fpurestore(void *); void fpusave(void *); diff --git a/usr/src/uts/i86pc/Makefile.files b/usr/src/uts/i86pc/Makefile.files index 768aa390b9..fcf9820fd8 100644 --- a/usr/src/uts/i86pc/Makefile.files +++ b/usr/src/uts/i86pc/Makefile.files @@ -64,6 +64,7 @@ CORE_OBJS += \ hardclk.o \ hat_i86.o \ hat_kdi.o \ + hma_fpu.o \ hment.o \ hold_page.o \ hrtimers.o \ diff --git a/usr/src/uts/i86pc/io/vmm/vmm_sol_glue.c b/usr/src/uts/i86pc/io/vmm/vmm_sol_glue.c index 4e23ab4399..b523b69bd9 100644 --- a/usr/src/uts/i86pc/io/vmm/vmm_sol_glue.c +++ b/usr/src/uts/i86pc/io/vmm/vmm_sol_glue.c @@ -52,6 +52,7 @@ #include #include #include +#include #include @@ -504,157 +505,65 @@ vmm_cpuid_init(void) cpu_exthigh = regs[0]; } -struct savefpu { - fpu_ctx_t fsa_fp_ctx; -}; - -static vmem_t *fpu_save_area_arena; - -static void -fpu_save_area_init(void) -{ - fpu_save_area_arena = vmem_create("fpu_save_area", - NULL, 0, XSAVE_AREA_ALIGN, - segkmem_alloc, segkmem_free, heap_arena, 0, VM_BESTFIT | VM_SLEEP); -} - -static void -fpu_save_area_cleanup(void) -{ - vmem_destroy(fpu_save_area_arena); -} - +/* + * FreeBSD uses the struct savefpu for managing the FPU state. That is mimicked + * by our hypervisor multiplexor framework structure. + */ struct savefpu * fpu_save_area_alloc(void) { - struct savefpu *fsa = vmem_alloc(fpu_save_area_arena, - sizeof (struct savefpu), VM_SLEEP); - - bzero(fsa, sizeof (struct savefpu)); - fsa->fsa_fp_ctx.fpu_regs.kfpu_u.kfpu_generic = - kmem_cache_alloc(fpsave_cachep, KM_SLEEP); - - return (fsa); + return ((struct savefpu *)hma_fpu_alloc(KM_SLEEP)); } void fpu_save_area_free(struct savefpu *fsa) { - kmem_cache_free(fpsave_cachep, - fsa->fsa_fp_ctx.fpu_regs.kfpu_u.kfpu_generic); - vmem_free(fpu_save_area_arena, fsa, sizeof (struct savefpu)); + hma_fpu_t *fpu = (hma_fpu_t *)fsa; + hma_fpu_free(fpu); } void fpu_save_area_reset(struct savefpu *fsa) { - extern const struct fxsave_state sse_initial; - extern const struct xsave_state avx_initial; - struct fpu_ctx *fp; - struct fxsave_state *fx; - struct xsave_state *xs; - - fp = &fsa->fsa_fp_ctx; - - fp->fpu_regs.kfpu_status = 0; - fp->fpu_regs.kfpu_xstatus = 0; - - switch (fp_save_mech) { - case FP_FXSAVE: - fx = fp->fpu_regs.kfpu_u.kfpu_fx; - bcopy(&sse_initial, fx, sizeof (*fx)); - break; - case FP_XSAVE: - fp->fpu_xsave_mask = (XFEATURE_ENABLED_X87 | - XFEATURE_ENABLED_SSE | XFEATURE_ENABLED_AVX); - xs = fp->fpu_regs.kfpu_u.kfpu_xs; - bcopy(&avx_initial, xs, sizeof (*xs)); - break; - default: - panic("Invalid fp_save_mech"); - /*NOTREACHED*/ - } + hma_fpu_t *fpu = (hma_fpu_t *)fsa; + hma_fpu_init(fpu); } +/* + * This glue function is supposed to save the host's FPU state. This is always + * paired in the general bhyve code with a call to fpusave. Therefore, we treat + * this as a nop and do all the work in fpusave(), which will have the context + * argument that we want anyways. + */ void fpuexit(kthread_t *td) { - fp_save(&curthread->t_lwp->lwp_pcb.pcb_fpu); -} - -static __inline void -vmm_fxrstor(struct fxsave_state *addr) -{ - __asm __volatile("fxrstor %0" : : "m" (*(addr))); -} - -static __inline void -vmm_fxsave(struct fxsave_state *addr) -{ - __asm __volatile("fxsave %0" : "=m" (*(addr))); -} - -static __inline void -vmm_xrstor(struct xsave_state *addr, uint64_t mask) -{ - uint32_t low, hi; - - low = mask; - hi = mask >> 32; - __asm __volatile("xrstor %0" : : "m" (*addr), "a" (low), "d" (hi)); -} - -static __inline void -vmm_xsave(struct xsave_state *addr, uint64_t mask) -{ - uint32_t low, hi; - - low = mask; - hi = mask >> 32; - __asm __volatile("xsave %0" : "=m" (*addr) : "a" (low), "d" (hi) : - "memory"); } +/* + * This glue function is supposed to restore the guest's FPU state from the save + * area back to the host. In FreeBSD, it is assumed that the host state has + * already been saved by a call to fpuexit(); however, we do both here. + */ void fpurestore(void *arg) { - struct savefpu *fsa = (struct savefpu *)arg; - struct fpu_ctx *fp; - - fp = &fsa->fsa_fp_ctx; - - switch (fp_save_mech) { - case FP_FXSAVE: - vmm_fxrstor(fp->fpu_regs.kfpu_u.kfpu_fx); - break; - case FP_XSAVE: - vmm_xrstor(fp->fpu_regs.kfpu_u.kfpu_xs, fp->fpu_xsave_mask); - break; - default: - panic("Invalid fp_save_mech"); - /*NOTREACHED*/ - } + hma_fpu_t *fpu = arg; + + hma_fpu_start_guest(fpu); } +/* + * This glue function is supposed to save the guest's FPU state. The host's FPU + * state is not expected to be restored necessarily due to the use of FPU + * emulation through CR0.TS. However, we can and do restore it here. + */ void fpusave(void *arg) { - struct savefpu *fsa = (struct savefpu *)arg; - struct fpu_ctx *fp; - - fp = &fsa->fsa_fp_ctx; - - switch (fp_save_mech) { - case FP_FXSAVE: - vmm_fxsave(fp->fpu_regs.kfpu_u.kfpu_fx); - break; - case FP_XSAVE: - vmm_xsave(fp->fpu_regs.kfpu_u.kfpu_xs, fp->fpu_xsave_mask); - break; - default: - panic("Invalid fp_save_mech"); - /*NOTREACHED*/ - } + hma_fpu_t *fpu = arg; + + hma_fpu_stop_guest(fpu); } void @@ -662,14 +571,12 @@ vmm_sol_glue_init(void) { vmm_alloc_init(); vmm_cpuid_init(); - fpu_save_area_init(); unr_idx = 0; } void vmm_sol_glue_cleanup(void) { - fpu_save_area_cleanup(); vmm_alloc_cleanup(); } diff --git a/usr/src/uts/i86pc/os/hma_fpu.c b/usr/src/uts/i86pc/os/hma_fpu.c new file mode 100644 index 0000000000..f207470e85 --- /dev/null +++ b/usr/src/uts/i86pc/os/hma_fpu.c @@ -0,0 +1,224 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright (c) 2018, Joyent, Inc. + */ + +/* + * This implements the hypervisor multiplexor FPU API. Its purpose is to make it + * easy to switch between the host and guest hypervisor while hiding all the + * details about CR0.TS and how to save the host's state as required. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct hma_fpu { + fpu_ctx_t hf_guest_fpu; + kthread_t *hf_curthread; + boolean_t hf_inguest; +}; + +int +hma_fpu_init(hma_fpu_t *fpu) +{ + struct xsave_state *xs; + + ASSERT0(fpu->hf_inguest); + + switch (fp_save_mech) { + case FP_FXSAVE: + bcopy(&sse_initial, fpu->hf_guest_fpu.fpu_regs.kfpu_u.kfpu_fx, + sizeof (struct fxsave_state)); + fpu->hf_guest_fpu.fpu_xsave_mask = 0; + break; + case FP_XSAVE: + /* + * Zero everything in the xsave case as we may have data in + * the structure that's not part of the initial value (which + * only really deals with a small portion of the xsave state). + */ + xs = fpu->hf_guest_fpu.fpu_regs.kfpu_u.kfpu_xs; + bzero(xs, cpuid_get_xsave_size()); + bcopy(&avx_initial, xs, sizeof (*xs)); + xs->xs_xstate_bv = XFEATURE_LEGACY_FP | XFEATURE_SSE; + fpu->hf_guest_fpu.fpu_xsave_mask = XFEATURE_FP_ALL; + break; + default: + panic("Invalid fp_save_mech"); + } + + fpu->hf_guest_fpu.fpu_flags = FPU_EN | FPU_VALID; + + return (0); +} + +void +hma_fpu_free(hma_fpu_t *fpu) +{ + if (fpu == NULL) + return; + + ASSERT3P(fpu->hf_guest_fpu.fpu_regs.kfpu_u.kfpu_generic, !=, NULL); + kmem_cache_free(fpsave_cachep, + fpu->hf_guest_fpu.fpu_regs.kfpu_u.kfpu_generic); + kmem_free(fpu, sizeof (*fpu)); +} + +hma_fpu_t * +hma_fpu_alloc(int kmflag) +{ + hma_fpu_t *fpu; + + fpu = kmem_zalloc(sizeof (hma_fpu_t), kmflag); + if (fpu == NULL) + return (NULL); + + fpu->hf_guest_fpu.fpu_regs.kfpu_u.kfpu_generic = + kmem_cache_alloc(fpsave_cachep, kmflag); + if (fpu->hf_guest_fpu.fpu_regs.kfpu_u.kfpu_generic == NULL) { + kmem_free(fpu, sizeof (hma_fpu_t)); + return (NULL); + } + fpu->hf_inguest = B_FALSE; + + /* + * Make sure the entire structure is zero. + */ + switch (fp_save_mech) { + case FP_FXSAVE: + bzero(fpu->hf_guest_fpu.fpu_regs.kfpu_u.kfpu_generic, + sizeof (struct fxsave_state)); + case FP_XSAVE: + bzero(fpu->hf_guest_fpu.fpu_regs.kfpu_u.kfpu_generic, + cpuid_get_xsave_size()); + break; + default: + panic("Invalid fp_save_mech"); + } + + return (fpu); +} + +void +hma_fpu_start_guest(hma_fpu_t *fpu) +{ + /* + * Note, we don't check / assert whether or not t_prempt is true because + * there are contexts where this is safe to call (from a context op) + * where t_preempt may not be set. + */ + ASSERT3S(fpu->hf_inguest, ==, B_FALSE); + ASSERT3P(fpu->hf_curthread, ==, NULL); + ASSERT3P(curthread->t_lwp, !=, NULL); + ASSERT3U(fpu->hf_guest_fpu.fpu_flags & FPU_EN, !=, 0); + ASSERT3U(fpu->hf_guest_fpu.fpu_flags & FPU_VALID, !=, 0); + + fpu->hf_inguest = B_TRUE; + fpu->hf_curthread = curthread; + + + fp_save(&curthread->t_lwp->lwp_pcb.pcb_fpu); + fp_restore(&fpu->hf_guest_fpu); + fpu->hf_guest_fpu.fpu_flags &= ~FPU_VALID; +} + +void +hma_fpu_stop_guest(hma_fpu_t *fpu) +{ + ASSERT3S(fpu->hf_inguest, ==, B_TRUE); + ASSERT3P(fpu->hf_curthread, ==, curthread); + ASSERT3U(fpu->hf_guest_fpu.fpu_flags & FPU_EN, !=, 0); + ASSERT3U(fpu->hf_guest_fpu.fpu_flags & FPU_VALID, ==, 0); + + /* + * Note, we can't use fp_save because it assumes that we're saving to + * the thread's PCB and not somewhere else. Because this is a different + * FPU context, we instead have to do this ourselves. + */ + switch (fp_save_mech) { + case FP_FXSAVE: + fpxsave(fpu->hf_guest_fpu.fpu_regs.kfpu_u.kfpu_fx); + break; + case FP_XSAVE: + xsavep(fpu->hf_guest_fpu.fpu_regs.kfpu_u.kfpu_xs, + fpu->hf_guest_fpu.fpu_xsave_mask); + break; + default: + panic("Invalid fp_save_mech"); + /*NOTREACHED*/ + } + fpu->hf_guest_fpu.fpu_flags |= FPU_VALID; + + fp_restore(&curthread->t_lwp->lwp_pcb.pcb_fpu); + + fpu->hf_inguest = B_FALSE; + fpu->hf_curthread = NULL; +} + +void +hma_fpu_get_fxsave_state(const hma_fpu_t *fpu, struct fxsave_state *fx) +{ + const struct fxsave_state *guest; + + ASSERT3S(fpu->hf_inguest, ==, B_FALSE); + + guest = fpu->hf_guest_fpu.fpu_regs.kfpu_u.kfpu_fx; + bcopy(guest, fx, sizeof (*fx)); +} + +int +hma_fpu_set_fxsave_state(hma_fpu_t *fpu, const struct fxsave_state *fx) +{ + struct fxsave_state *gfx; + struct xsave_state *gxs; + + ASSERT3S(fpu->hf_inguest, ==, B_FALSE); + + /* + * If reserved bits are set in fx_mxcsr, then we will take a #GP when + * we restore them. Reject this outright. + * + * We do not need to check if we are dealing with state that has pending + * exceptions. This was only the case with the original FPU save and + * restore mechanisms (fsave/frstor). When using fxsave/fxrstor and + * xsave/xrstor they will be deferred to the user using the FPU, which + * is what we'd want here (they'd be used in guest context). + */ + if ((fx->fx_mxcsr & ~sse_mxcsr_mask) != 0) + return (EINVAL); + + switch (fp_save_mech) { + case FP_FXSAVE: + gfx = fpu->hf_guest_fpu.fpu_regs.kfpu_u.kfpu_fx; + bcopy(fx, gfx, sizeof (*fx)); + break; + case FP_XSAVE: + gxs = fpu->hf_guest_fpu.fpu_regs.kfpu_u.kfpu_xs; + bzero(gxs, cpuid_get_xsave_size()); + bcopy(fx, &gxs->xs_fxsave, sizeof (*fx)); + gxs->xs_xstate_bv = XFEATURE_LEGACY_FP | XFEATURE_SSE; + break; + default: + panic("Invalid fp_save_mech"); + /* NOTREACHED */ + } + + return (0); +} diff --git a/usr/src/uts/i86pc/sys/hma.h b/usr/src/uts/i86pc/sys/hma.h new file mode 100644 index 0000000000..00009cf439 --- /dev/null +++ b/usr/src/uts/i86pc/sys/hma.h @@ -0,0 +1,103 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright (c) 2018, Joyent, Inc. + */ + +#ifndef _SYS_HMA_H +#define _SYS_HMA_H + +/* + * Hypervisor Multiplexor API + * + * This provides a set of APIs that are usable by hypervisor implementations + * that allows them to coexist and to make sure that they are all in a + * consistent state. + */ + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * FPU related management. These functions provide a set of APIs to manage the + * FPU state and switch between host and guest management of this state. + */ + +typedef struct hma_fpu hma_fpu_t; + +/* + * Allocate and free FPU state management structures. + */ +extern hma_fpu_t *hma_fpu_alloc(int); +extern void hma_fpu_free(hma_fpu_t *); + +/* + * Resets the FPU to the standard x86 default state. This should be called after + * allocation and whenever the guest needs to logically reset the state (when + * the CPU is reset, etc.). If the system supports xsave, then the xbv state + * will be set to have the x87 and SSE portions as valid and the rest will be + * set to their initial states (regardless of whether or not they will be + * advertised in the host). + */ +extern int hma_fpu_init(hma_fpu_t *); + +/* + * Save the current host's FPU state and restore the guest's state in the FPU. + * At this point, CR0.TS will not be set. The caller must not use the FPU in any + * way before entering the guest. + * + * This should be used in normal operation before entering the guest. It should + * also be used in a thread context operation when the thread is being scheduled + * again. This interface has an implicit assumption that a given guest state + * will be mapped to only one specific OS thread at any given time. + * + * This must be called with preemption disabled. + */ +extern void hma_fpu_start_guest(hma_fpu_t *); + +/* + * Save the current guest's FPU state and restore the host's state in the FPU. + * By the time the thread returns to userland, the FPU will be in a usable + * state; however, the FPU will not be usable while inside the kernel (CR0.TS + * will be set). + * + * This should be used in normal operation after leaving the guest and returning + * to user land. It should also be used in a thread context operation when the + * thread is being descheduled. Like the hma_fpu_start_guest() interface, this + * interface has an implicit assumption that a given guest state will be mapped + * to only a single OS thread at any given time. + * + * This must be called with preemption disabled. + */ +extern void hma_fpu_stop_guest(hma_fpu_t *); + +/* + * Get and set the contents of the FPU save area. This sets the fxsave style + * information. In all cases when this is in use, if an XSAVE state is actually + * used by the host, then this will end up zeroing all of the non-fxsave state + * and it will reset the xbv to indicate that the legacy x87 and SSE portions + * are valid. + * + * These functions cannot be called while the FPU is in use by the guest. It is + * up to callers to guarantee this fact. + */ +extern void hma_fpu_get_fxsave_state(const hma_fpu_t *, struct fxsave_state *); +extern int hma_fpu_set_fxsave_state(hma_fpu_t *, const struct fxsave_state *); + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_HMA_H */ diff --git a/usr/src/uts/intel/ia32/os/archdep.c b/usr/src/uts/intel/ia32/os/archdep.c index fa65b59267..e610f1fb09 100644 --- a/usr/src/uts/intel/ia32/os/archdep.c +++ b/usr/src/uts/intel/ia32/os/archdep.c @@ -25,7 +25,7 @@ /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ /* All Rights Reserved */ /* - * Copyright 2017 Joyent, Inc. + * Copyright (c) 2018, Joyent, Inc. * Copyright 2012 Nexenta Systems, Inc. All rights reserved. */ @@ -68,9 +68,6 @@ #include #include -extern const struct fnsave_state x87_initial; -extern const struct fxsave_state sse_initial; - /* * Map an fnsave-formatted save area into an fxsave-formatted save area. * diff --git a/usr/src/uts/intel/sys/fp.h b/usr/src/uts/intel/sys/fp.h index 7f08f8c8f8..fe5471e855 100644 --- a/usr/src/uts/intel/sys/fp.h +++ b/usr/src/uts/intel/sys/fp.h @@ -345,6 +345,9 @@ extern void fp_lwp_init(struct _klwp *); extern void fp_lwp_cleanup(struct _klwp *); extern void fp_lwp_dup(struct _klwp *); +extern const struct fxsave_state sse_initial; +extern const struct xsave_state avx_initial; + #endif /* _KERNEL */ #ifdef __cplusplus -- cgit v1.2.3