diff options
author | Kuriakose Kuruvilla <kuriakose.kuruvilla@oracle.com> | 2010-08-16 19:36:08 -0700 |
---|---|---|
committer | Kuriakose Kuruvilla <kuriakose.kuruvilla@oracle.com> | 2010-08-16 19:36:08 -0700 |
commit | 7af88ac71631ebf259c6c4c22a9f649ddff3e270 (patch) | |
tree | 3d0109c3dff3677bbff5901e1a9616ace0fae4da /usr/src | |
parent | 315e695527b211489a44386ec695c6ccd3af4e6e (diff) | |
download | illumos-joyent-7af88ac71631ebf259c6c4c22a9f649ddff3e270.tar.gz |
6958308 XSAVE/XRSTOR mechanism to save and restore processor state
Contributed by Lejun Zhu <lejun.zhu@intel.com>
Diffstat (limited to 'usr/src')
23 files changed, 941 insertions, 190 deletions
diff --git a/usr/src/common/elfcap/elfcap.c b/usr/src/common/elfcap/elfcap.c index 55cd3e116d..0e1558468a 100644 --- a/usr/src/common/elfcap/elfcap.c +++ b/usr/src/common/elfcap/elfcap.c @@ -20,8 +20,7 @@ */ /* - * Copyright 2010 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. */ /* LINTLIBRARY */ @@ -288,6 +287,14 @@ static const elfcap_desc_t hw1_386[ELFCAP_NUM_HW1_386] = { { /* 0x08000000 */ AV_386_PCLMULQDQ, STRDESC("AV_386_PCLMULQDQ"), STRDESC("PCLMULQDQ"), STRDESC("pclmulqdq"), + }, + { /* 0x10000000 */ + AV_386_XSAVE, STRDESC("AV_386_XSAVE"), + STRDESC("XSAVE"), STRDESC("xsave"), + }, + { /* 0x20000000 */ + AV_386_AVX, STRDESC("AV_386_AVX"), + STRDESC("AVX"), STRDESC("avx"), } }; diff --git a/usr/src/common/elfcap/elfcap.h b/usr/src/common/elfcap/elfcap.h index 6cd68b53f2..9f0ef25f09 100644 --- a/usr/src/common/elfcap/elfcap.h +++ b/usr/src/common/elfcap/elfcap.h @@ -20,8 +20,7 @@ */ /* - * Copyright 2010 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. */ #ifndef _ELFCAP_DOT_H @@ -114,7 +113,7 @@ typedef enum { */ #define ELFCAP_NUM_SF1 3 #define ELFCAP_NUM_HW1_SPARC 17 -#define ELFCAP_NUM_HW1_386 28 +#define ELFCAP_NUM_HW1_386 30 /* diff --git a/usr/src/uts/common/disp/thread.c b/usr/src/uts/common/disp/thread.c index 245ee27e04..5ed9110251 100644 --- a/usr/src/uts/common/disp/thread.c +++ b/usr/src/uts/common/disp/thread.c @@ -20,8 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved. */ #include <sys/types.h> @@ -186,11 +185,11 @@ thread_init(void) /* * "struct _klwp" includes a "struct pcb", which includes a - * "struct fpu", which needs to be 16-byte aligned on amd64 - * (and even on i386 for fxsave/fxrstor). + * "struct fpu", which needs to be 64-byte aligned on amd64 + * (and even on i386) for xsave/xrstor. */ lwp_cache = kmem_cache_create("lwp_cache", sizeof (klwp_t), - 16, NULL, NULL, NULL, NULL, NULL, 0); + 64, NULL, NULL, NULL, NULL, NULL, 0); #else /* * Allocate thread structures from static_arena. This prevents diff --git a/usr/src/uts/common/sys/auxv_386.h b/usr/src/uts/common/sys/auxv_386.h index 1e7afc3a22..f9b4867f10 100644 --- a/usr/src/uts/common/sys/auxv_386.h +++ b/usr/src/uts/common/sys/auxv_386.h @@ -19,8 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2010 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. */ #ifndef _SYS_AUXV_386_H @@ -68,9 +67,12 @@ extern "C" { #define AV_386_MOVBE 0x2000000 /* Intel MOVBE insns */ #define AV_386_AES 0x4000000 /* Intel AES insns */ #define AV_386_PCLMULQDQ 0x8000000 /* Intel PCLMULQDQ insn */ +#define AV_386_XSAVE 0x10000000 /* Intel XSAVE/XRSTOR insns */ +#define AV_386_AVX 0x20000000 /* Intel AVX insns */ #define FMT_AV_386 \ "\20" \ + "\36avx\35xsave" \ "\34pclmulqdq\33aes" \ "\32movbe\31sse4.2" \ "\30sse4.1\27ssse3\26amd_lzcnt\25popcnt" \ diff --git a/usr/src/uts/i86pc/ml/genassym.c b/usr/src/uts/i86pc/ml/genassym.c index 4836628401..a34ca50669 100644 --- a/usr/src/uts/i86pc/ml/genassym.c +++ b/usr/src/uts/i86pc/ml/genassym.c @@ -19,8 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved. */ #ifndef _GENASSYM @@ -123,6 +122,10 @@ main(int argc, char *argv[]) printf("#define\tFP_387 0x%x\n", FP_387); printf("#define\t__FP_SSE 0x%x\n", __FP_SSE); + printf("#define\tFP_FNSAVE 0x%x\n", FP_FNSAVE); + printf("#define\tFP_FXSAVE 0x%x\n", FP_FXSAVE); + printf("#define\tFP_XSAVE 0x%x\n", FP_XSAVE); + printf("#define\tAV_INT_SPURIOUS 0x%x\n", AV_INT_SPURIOUS); printf("#define\tCPU_READY 0x%x\n", CPU_READY); diff --git a/usr/src/uts/i86pc/ml/offsets.in b/usr/src/uts/i86pc/ml/offsets.in index 4b5d4fc694..20e0c972d4 100644 --- a/usr/src/uts/i86pc/ml/offsets.in +++ b/usr/src/uts/i86pc/ml/offsets.in @@ -165,6 +165,7 @@ _klwp fpu_ctx fpu_regs FPU_CTX_FPU_REGS fpu_flags FPU_CTX_FPU_FLAGS + fpu_xsave_mask FPU_CTX_FPU_XSAVE_MASK fxsave_state FXSAVE_STATE_SIZE fx_fsw FXSAVE_STATE_FSW diff --git a/usr/src/uts/i86pc/os/cpr_impl.c b/usr/src/uts/i86pc/os/cpr_impl.c index 8f57ca7366..555ed9f842 100644 --- a/usr/src/uts/i86pc/os/cpr_impl.c +++ b/usr/src/uts/i86pc/os/cpr_impl.c @@ -65,6 +65,7 @@ #include <sys/reboot.h> #include <sys/acpi/acpi.h> #include <sys/acpica.h> +#include <sys/fp.h> #define AFMT "%lx" @@ -944,6 +945,13 @@ i_cpr_start_cpu(void) pat_sync(); /* + * If we use XSAVE, we need to restore XFEATURE_ENABLE_MASK register. + */ + if (fp_save_mech == FP_XSAVE) { + setup_xfem(); + } + + /* * Initialize this CPU's syscall handlers */ init_cpu_syscall(cp); diff --git a/usr/src/uts/i86pc/os/cpuid.c b/usr/src/uts/i86pc/os/cpuid.c index 44db76c814..19e505917d 100644 --- a/usr/src/uts/i86pc/os/cpuid.c +++ b/usr/src/uts/i86pc/os/cpuid.c @@ -118,7 +118,7 @@ uint_t x86_clflush_size = 0; uint_t pentiumpro_bug4046376; uint_t pentiumpro_bug4064495; -#define NUM_X86_FEATURES 33 +#define NUM_X86_FEATURES 35 void *x86_featureset; ulong_t x86_featureset0[BT_SIZEOFMAP(NUM_X86_FEATURES)]; @@ -155,7 +155,9 @@ char *x86_feature_names[NUM_X86_FEATURES] = { "clfsh", "64", "aes", - "pclmulqdq" }; + "pclmulqdq", + "xsave", + "avx" }; static void * init_x86_featureset(void) @@ -217,6 +219,11 @@ print_x86_featureset(void *featureset) } uint_t enable486; + +static size_t xsave_state_size = 0; +uint64_t xsave_bv_all = (XFEATURE_LEGACY_FP | XFEATURE_SSE); +boolean_t xsave_force_disable = B_FALSE; + /* * This is set to platform type Solaris is running on. */ @@ -247,6 +254,23 @@ struct mwait_info { }; /* + * xsave/xrestor info. + * + * This structure contains HW feature bits and size of the xsave save area. + * Note: the kernel will use the maximum size required for all hardware + * features. It is not optimize for potential memory savings if features at + * the end of the save area are not enabled. + */ +struct xsave_info { + uint32_t xsav_hw_features_low; /* Supported HW features */ + uint32_t xsav_hw_features_high; /* Supported HW features */ + size_t xsav_max_size; /* max size save area for HW features */ + size_t ymm_size; /* AVX: size of ymm save area */ + size_t ymm_offset; /* AVX: offset for ymm save area */ +}; + + +/* * These constants determine how many of the elements of the * cpuid we cache in the cpuid_info data structure; the * remaining elements are accessible via the cpuid instruction. @@ -327,6 +351,8 @@ struct cpuid_info { uint_t cpi_procnodeid; /* AMD: nodeID on HT, Intel: chipid */ uint_t cpi_procnodes_per_pkg; /* AMD: # of nodes in the package */ /* Intel: 1 */ + + struct xsave_info cpi_xsave; /* fn D: xsave/xrestor info */ }; @@ -429,6 +455,12 @@ static struct cpuid_info cpuid_info0; BITX((cpi)->cpi_std[5].cp_edx, c_state + 3, c_state) /* + * XSAVE leaf 0xD enumeration + */ +#define CPUID_LEAFD_2_YMM_OFFSET 576 +#define CPUID_LEAFD_2_YMM_SIZE 256 + +/* * Functions we consune from cpuid_subr.c; don't publish these in a header * file to try and keep people using the expected cpuid_* interfaces. */ @@ -815,6 +847,27 @@ cpuid_amd_getids(cpu_t *cpu) } } +/* + * Setup XFeature_Enabled_Mask register. Required by xsave feature. + */ +void +setup_xfem(void) +{ + uint64_t flags = XFEATURE_LEGACY_FP; + + ASSERT(is_x86_feature(x86_featureset, X86FSET_XSAVE)); + + if (is_x86_feature(x86_featureset, X86FSET_SSE)) + flags |= XFEATURE_SSE; + + if (is_x86_feature(x86_featureset, X86FSET_AVX)) + flags |= XFEATURE_AVX; + + set_xcr(XFEATURE_ENABLED_MASK, flags); + + xsave_bv_all = flags; +} + void * cpuid_pass1(cpu_t *cpu) { @@ -827,7 +880,6 @@ cpuid_pass1(cpu_t *cpu) extern int idle_cpu_prefer_mwait; #endif - #if !defined(__xpv) determine_platform(); #endif @@ -1082,8 +1134,18 @@ cpuid_pass1(cpu_t *cpu) * Do not support MONITOR/MWAIT under a hypervisor */ mask_ecx &= ~CPUID_INTC_ECX_MON; + /* + * Do not support XSAVE under a hypervisor for now + */ + xsave_force_disable = B_TRUE; + #endif /* __xpv */ + if (xsave_force_disable) { + mask_ecx &= ~CPUID_INTC_ECX_XSAVE; + mask_ecx &= ~CPUID_INTC_ECX_AVX; + } + /* * Now we've figured out the masks that determine * which bits we choose to believe, apply the masks @@ -1180,6 +1242,15 @@ cpuid_pass1(cpu_t *cpu) if (cp->cp_ecx & CPUID_INTC_ECX_PCLMULQDQ) { add_x86_feature(featureset, X86FSET_PCLMULQDQ); } + + if (cp->cp_ecx & CPUID_INTC_ECX_XSAVE) { + add_x86_feature(featureset, X86FSET_XSAVE); + /* We only test AVX when there is XSAVE */ + if (cp->cp_ecx & CPUID_INTC_ECX_AVX) { + add_x86_feature(featureset, + X86FSET_AVX); + } + } } } if (cp->cp_edx & CPUID_INTC_EDX_DE) { @@ -1724,6 +1795,92 @@ cpuid_pass2(cpu_t *cpu) cp = NULL; } + /* + * XSAVE enumeration + */ + if (cpi->cpi_maxeax >= 0xD && cpi->cpi_vendor == X86_VENDOR_Intel) { + struct cpuid_regs regs; + boolean_t cpuid_d_valid = B_TRUE; + + cp = ®s; + cp->cp_eax = 0xD; + cp->cp_edx = cp->cp_ebx = cp->cp_ecx = 0; + + (void) __cpuid_insn(cp); + + /* + * Sanity checks for debug + */ + if ((cp->cp_eax & XFEATURE_LEGACY_FP) == 0 || + (cp->cp_eax & XFEATURE_SSE) == 0) { + cpuid_d_valid = B_FALSE; + } + + cpi->cpi_xsave.xsav_hw_features_low = cp->cp_eax; + cpi->cpi_xsave.xsav_hw_features_high = cp->cp_edx; + cpi->cpi_xsave.xsav_max_size = cp->cp_ecx; + + /* + * If the hw supports AVX, get the size and offset in the save + * area for the ymm state. + */ + if (cpi->cpi_xsave.xsav_hw_features_low & XFEATURE_AVX) { + cp->cp_eax = 0xD; + cp->cp_ecx = 2; + cp->cp_edx = cp->cp_ebx = 0; + + (void) __cpuid_insn(cp); + + if (cp->cp_ebx != CPUID_LEAFD_2_YMM_OFFSET || + cp->cp_eax != CPUID_LEAFD_2_YMM_SIZE) { + cpuid_d_valid = B_FALSE; + } + + cpi->cpi_xsave.ymm_size = cp->cp_eax; + cpi->cpi_xsave.ymm_offset = cp->cp_ebx; + } + + if (is_x86_feature(x86_featureset, X86FSET_XSAVE)) { + xsave_state_size = 0; + } else if (cpuid_d_valid) { + xsave_state_size = cpi->cpi_xsave.xsav_max_size; + } else { + /* Broken CPUID 0xD, probably in HVM */ + cmn_err(CE_WARN, "cpu%d: CPUID.0xD returns invalid " + "value: hw_low = %d, hw_high = %d, xsave_size = %d" + ", ymm_size = %d, ymm_offset = %d\n", + cpu->cpu_id, cpi->cpi_xsave.xsav_hw_features_low, + cpi->cpi_xsave.xsav_hw_features_high, + (int)cpi->cpi_xsave.xsav_max_size, + (int)cpi->cpi_xsave.ymm_size, + (int)cpi->cpi_xsave.ymm_offset); + + if (xsave_state_size != 0) { + /* + * This must be a non-boot CPU. We cannot + * continue, because boot cpu has already + * enabled XSAVE. + */ + ASSERT(cpu->cpu_id != 0); + cmn_err(CE_PANIC, "cpu%d: we have already " + "enabled XSAVE on boot cpu, cannot " + "continue.", cpu->cpu_id); + } else { + /* + * Must be from boot CPU, OK to disable XSAVE. + */ + ASSERT(cpu->cpu_id == 0); + remove_x86_feature(x86_featureset, + X86FSET_XSAVE); + remove_x86_feature(x86_featureset, X86FSET_AVX); + CPI_FEATURES_ECX(cpi) &= ~CPUID_INTC_ECX_XSAVE; + CPI_FEATURES_ECX(cpi) &= ~CPUID_INTC_ECX_AVX; + xsave_force_disable = B_TRUE; + } + } + } + + if ((cpi->cpi_xmaxeax & 0x80000000) == 0) goto pass2_done; @@ -2386,6 +2543,11 @@ cpuid_pass4(cpu_t *cpu) *ecx &= ~CPUID_INTC_ECX_AES; if (!is_x86_feature(x86_featureset, X86FSET_PCLMULQDQ)) *ecx &= ~CPUID_INTC_ECX_PCLMULQDQ; + if (!is_x86_feature(x86_featureset, X86FSET_XSAVE)) + *ecx &= ~(CPUID_INTC_ECX_XSAVE | + CPUID_INTC_ECX_OSXSAVE); + if (!is_x86_feature(x86_featureset, X86FSET_AVX)) + *ecx &= ~CPUID_INTC_ECX_AVX; } /* @@ -2419,6 +2581,9 @@ cpuid_pass4(cpu_t *cpu) hwcap_flags |= AV_386_AES; if (*ecx & CPUID_INTC_ECX_PCLMULQDQ) hwcap_flags |= AV_386_PCLMULQDQ; + if ((*ecx & CPUID_INTC_ECX_XSAVE) && + (*ecx & CPUID_INTC_ECX_OSXSAVE)) + hwcap_flags |= AV_386_XSAVE; } if (*ecx & CPUID_INTC_ECX_POPCNT) hwcap_flags |= AV_386_POPCNT; @@ -4273,6 +4438,31 @@ post_startup_cpu_fixups(void) } /* + * Setup necessary registers to enable XSAVE feature on this processor. + * This function needs to be called early enough, so that no xsave/xrstor + * ops will execute on the processor before the MSRs are properly set up. + * + * Current implementation has the following assumption: + * - cpuid_pass1() is done, so that X86 features are known. + * - fpu_probe() is done, so that fp_save_mech is chosen. + */ +void +xsave_setup_msr(cpu_t *cpu) +{ + ASSERT(fp_save_mech == FP_XSAVE); + ASSERT(is_x86_feature(x86_featureset, X86FSET_XSAVE)); + + /* Enable OSXSAVE in CR4. */ + setcr4(getcr4() | CR4_OSXSAVE); + /* + * Update SW copy of ECX, so that /dev/cpu/self/cpuid will report + * correct value. + */ + cpu->cpu_m.mcpu_cpi->cpi_std[1].cp_ecx |= CPUID_INTC_ECX_OSXSAVE; + setup_xfem(); +} + +/* * Starting with the Westmere processor the local * APIC timer will continue running in all C-states, * including the deepest C-states. diff --git a/usr/src/uts/i86pc/os/fpu_subr.c b/usr/src/uts/i86pc/os/fpu_subr.c index 7bb68f7168..0598b913f1 100644 --- a/usr/src/uts/i86pc/os/fpu_subr.c +++ b/usr/src/uts/i86pc/os/fpu_subr.c @@ -48,6 +48,15 @@ int fpu_exists = 1; int fp_kind = FP_387; /* + * Mechanism to save FPU state. + */ +#if defined(__amd64) +int fp_save_mech = FP_FXSAVE; +#elif defined(__i386) +int fp_save_mech = FP_FNSAVE; +#endif + +/* * The variable fpu_ignored is provided to allow other code to * determine whether emulation is being done because there is * no FPU or because of an override requested via /etc/system. @@ -141,8 +150,20 @@ fpu_probe(void) */ if (is_x86_feature(x86_featureset, X86FSET_SSE) && is_x86_feature(x86_featureset, X86FSET_SSE2)) { - fp_kind = __FP_SSE; + fp_kind |= __FP_SSE; ENABLE_SSE(); + + if (is_x86_feature(x86_featureset, X86FSET_AVX)) { + ASSERT(is_x86_feature(x86_featureset, + X86FSET_XSAVE)); + fp_kind |= __FP_AVX; + } + + if (is_x86_feature(x86_featureset, X86FSET_XSAVE)) { + fp_save_mech = FP_XSAVE; + fpsave_ctxt = xsave_ctxt; + patch_xsave(); + } } #elif defined(__i386) /* @@ -150,15 +171,37 @@ fpu_probe(void) * code to exploit it when present. */ if (is_x86_feature(x86_featureset, X86FSET_SSE)) { - fp_kind = __FP_SSE; + fp_kind |= __FP_SSE; + ENABLE_SSE(); + fp_save_mech = FP_FXSAVE; fpsave_ctxt = fpxsave_ctxt; - patch_sse(); - if (is_x86_feature(x86_featureset, X86FSET_SSE2)) + + if (is_x86_feature(x86_featureset, X86FSET_SSE2)) { patch_sse2(); - ENABLE_SSE(); + } + + if (is_x86_feature(x86_featureset, X86FSET_AVX)) { + ASSERT(is_x86_feature(x86_featureset, + X86FSET_XSAVE)); + fp_kind |= __FP_AVX; + } + + if (is_x86_feature(x86_featureset, X86FSET_XSAVE)) { + fp_save_mech = FP_XSAVE; + fpsave_ctxt = xsave_ctxt; + patch_xsave(); + } else { + patch_sse(); /* use fxrstor */ + } } else { remove_x86_feature(x86_featureset, X86FSET_SSE2); /* + * We will not likely to have a chip with AVX but not + * SSE. But to be safe we disable AVX if SSE is not + * enabled. + */ + remove_x86_feature(x86_featureset, X86FSET_AVX); + /* * (Just in case the BIOS decided we wanted SSE * enabled when we didn't. See 4965674.) */ @@ -169,7 +212,7 @@ fpu_probe(void) use_sse_pagecopy = use_sse_pagezero = use_sse_copy = 1; } - if (fp_kind == __FP_SSE) { + if (fp_kind & __FP_SSE) { struct fxsave_state *fx; uint8_t fxsave_state[sizeof (struct fxsave_state) + XMM_ALIGN]; diff --git a/usr/src/uts/i86pc/os/mp_startup.c b/usr/src/uts/i86pc/os/mp_startup.c index f52c320c4b..2979ddb0fc 100644 --- a/usr/src/uts/i86pc/os/mp_startup.c +++ b/usr/src/uts/i86pc/os/mp_startup.c @@ -1711,6 +1711,13 @@ mp_startup_common(boolean_t boot) */ cp->cpu_flags &= ~(CPU_POWEROFF | CPU_QUIESCED); + /* + * Setup this processor for XSAVE. + */ + if (fp_save_mech == FP_XSAVE) { + xsave_setup_msr(cp); + } + cpuid_pass2(cp); cpuid_pass3(cp); (void) cpuid_pass4(cp); diff --git a/usr/src/uts/i86pc/os/startup.c b/usr/src/uts/i86pc/os/startup.c index e5afdcc014..d8facc92e7 100644 --- a/usr/src/uts/i86pc/os/startup.c +++ b/usr/src/uts/i86pc/os/startup.c @@ -2193,6 +2193,13 @@ startup_end(void) PRM_POINT("configure() done"); /* + * We can now setup for XSAVE because fpu_probe is done in configure(). + */ + if (fp_save_mech == FP_XSAVE) { + xsave_setup_msr(CPU); + } + + /* * Set the isa_list string to the defined instruction sets we * support. */ diff --git a/usr/src/uts/intel/ia32/ml/exception.s b/usr/src/uts/intel/ia32/ml/exception.s index d7236f9585..ab24e46548 100644 --- a/usr/src/uts/intel/ia32/ml/exception.s +++ b/usr/src/uts/intel/ia32/ml/exception.s @@ -1,6 +1,5 @@ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. */ /* @@ -637,30 +636,36 @@ _emul_done: * after a context switch -- we do the frequent path in ndptrap_frstor * below; for all other cases, we let the trap code handle it */ - LOADCPU(%rbx) /* swapgs handled in hypervisor */ + LOADCPU(%rax) /* swapgs handled in hypervisor */ cmpl $0, fpu_exists(%rip) je .handle_in_trap /* let trap handle no fp case */ - movq CPU_THREAD(%rbx), %r15 /* %r15 = curthread */ - movl $FPU_EN, %ebx - movq T_LWP(%r15), %r15 /* %r15 = lwp */ - testq %r15, %r15 + movq CPU_THREAD(%rax), %rbx /* %rbx = curthread */ + movl $FPU_EN, %eax + movq T_LWP(%rbx), %rbx /* %rbx = lwp */ + testq %rbx, %rbx jz .handle_in_trap /* should not happen? */ #if LWP_PCB_FPU != 0 - addq $LWP_PCB_FPU, %r15 /* &lwp->lwp_pcb.pcb_fpu */ + addq $LWP_PCB_FPU, %rbx /* &lwp->lwp_pcb.pcb_fpu */ #endif - testl %ebx, PCB_FPU_FLAGS(%r15) + testl %eax, PCB_FPU_FLAGS(%rbx) jz .handle_in_trap /* must be the first fault */ CLTS - andl $_BITNOT(FPU_VALID), PCB_FPU_FLAGS(%r15) + andl $_BITNOT(FPU_VALID), PCB_FPU_FLAGS(%rbx) #if FPU_CTX_FPU_REGS != 0 - addq $FPU_CTX_FPU_REGS, %r15 + addq $FPU_CTX_FPU_REGS, %rbx #endif + + movl FPU_CTX_FPU_XSAVE_MASK(%rbx), %eax /* for xrstor */ + movl FPU_CTX_FPU_XSAVE_MASK+4(%rbx), %edx /* for xrstor */ + /* * the label below is used in trap.c to detect FP faults in * kernel due to user fault. */ ALTENTRY(ndptrap_frstor) - FXRSTORQ ((%r15)) + .globl _patch_xrstorq_rbx +_patch_xrstorq_rbx: + FXRSTORQ ((%rbx)) cmpw $KCS_SEL, REGOFF_CS(%rsp) je .return_to_kernel @@ -694,42 +699,56 @@ _emul_done: pushq %rbx cmpw $KCS_SEL, 24(%rsp) /* did we come from kernel mode? */ jne 1f - LOADCPU(%rbx) /* if yes, don't swapgs */ + LOADCPU(%rax) /* if yes, don't swapgs */ jmp 2f -1: +1: SWAPGS /* if from user, need swapgs */ - LOADCPU(%rbx) + LOADCPU(%rax) SWAPGS -2: +2: + /* + * Xrstor needs to use edx as part of its flag. + * NOTE: have to push rdx after "cmpw ...24(%rsp)", otherwise rsp+$24 + * will not point to CS. + */ + pushq %rdx cmpl $0, fpu_exists(%rip) je .handle_in_trap /* let trap handle no fp case */ - movq CPU_THREAD(%rbx), %rax /* %rax = curthread */ - movl $FPU_EN, %ebx - movq T_LWP(%rax), %rax /* %rax = lwp */ - testq %rax, %rax + movq CPU_THREAD(%rax), %rbx /* %rbx = curthread */ + movl $FPU_EN, %eax + movq T_LWP(%rbx), %rbx /* %rbx = lwp */ + testq %rbx, %rbx jz .handle_in_trap /* should not happen? */ #if LWP_PCB_FPU != 0 - addq $LWP_PCB_FPU, %rax /* &lwp->lwp_pcb.pcb_fpu */ + addq $LWP_PCB_FPU, %rbx /* &lwp->lwp_pcb.pcb_fpu */ #endif - testl %ebx, PCB_FPU_FLAGS(%rax) + testl %eax, PCB_FPU_FLAGS(%rbx) jz .handle_in_trap /* must be the first fault */ clts - andl $_BITNOT(FPU_VALID), PCB_FPU_FLAGS(%rax) + andl $_BITNOT(FPU_VALID), PCB_FPU_FLAGS(%rbx) #if FPU_CTX_FPU_REGS != 0 - addq $FPU_CTX_FPU_REGS, %rax + addq $FPU_CTX_FPU_REGS, %rbx #endif + + movl FPU_CTX_FPU_XSAVE_MASK(%rbx), %eax /* for xrstor */ + movl FPU_CTX_FPU_XSAVE_MASK+4(%rbx), %edx /* for xrstor */ + /* * the label below is used in trap.c to detect FP faults in * kernel due to user fault. */ ALTENTRY(ndptrap_frstor) - FXRSTORQ ((%rax)) + .globl _patch_xrstorq_rbx +_patch_xrstorq_rbx: + FXRSTORQ ((%rbx)) + popq %rdx popq %rbx popq %rax IRET /*NOTREACHED*/ .handle_in_trap: + popq %rdx popq %rbx popq %rax TRAP_NOERR(T_NOEXTFLT) /* $7 */ @@ -749,6 +768,7 @@ _emul_done: */ pushl %eax pushl %ebx + pushl %edx /* for xrstor */ pushl %ds pushl %gs movl $KDS_SEL, %ebx @@ -773,17 +793,24 @@ _emul_done: #if FPU_CTX_FPU_REGS != 0 addl $FPU_CTX_FPU_REGS, %ebx #endif + + movl FPU_CTX_FPU_XSAVE_MASK(%ebx), %eax /* for xrstor */ + movl FPU_CTX_FPU_XSAVE_MASK+4(%ebx), %edx /* for xrstor */ + /* * the label below is used in trap.c to detect FP faults in kernel * due to user fault. */ ALTENTRY(ndptrap_frstor) - .globl _patch_fxrstor_ebx + .globl _patch_fxrstor_ebx _patch_fxrstor_ebx: + .globl _patch_xrstor_ebx +_patch_xrstor_ebx: frstor (%ebx) /* may be patched to fxrstor */ nop /* (including this byte) */ popl %gs popl %ds + popl %edx popl %ebx popl %eax IRET @@ -791,6 +818,7 @@ _patch_fxrstor_ebx: .handle_in_trap: popl %gs popl %ds + popl %edx popl %ebx popl %eax TRAP_NOERR(T_NOEXTFLT) /* $7 */ diff --git a/usr/src/uts/intel/ia32/ml/float.s b/usr/src/uts/intel/ia32/ml/float.s index 7214f1ec64..5a8962c9ff 100644 --- a/usr/src/uts/intel/ia32/ml/float.s +++ b/usr/src/uts/intel/ia32/ml/float.s @@ -20,8 +20,7 @@ */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved. */ /* Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. */ @@ -31,7 +30,10 @@ /* Copyright (c) 1987, 1988 Microsoft Corporation */ /* All Rights Reserved */ -#pragma ident "%Z%%M% %I% %E% SMI" +/* + * Copyright (c) 2009, Intel Corporation. + * All rights reserved. + */ #include <sys/asm_linkage.h> #include <sys/asm_misc.h> @@ -152,6 +154,10 @@ void patch_sse2(void) {} +void +patch_xsave(void) +{} + #else /* __lint */ ENTRY_NP(patch_sse) @@ -188,10 +194,74 @@ _lfence_ret_insn: / see membar_consumer() ret SET_SIZE(patch_sse2) + /* + * Patch lazy fp restore instructions in the trap handler + * to use xrstor instead of frstor + */ + ENTRY_NP(patch_xsave) + _HOT_PATCH_PROLOG + / + / frstor (%ebx); nop -> xrstor (%ebx) + / + _HOT_PATCH(_xrstor_ebx_insn, _patch_xrstor_ebx, 3) + _HOT_PATCH_EPILOG + ret +_xrstor_ebx_insn: / see ndptrap_frstor() + #xrstor (%ebx) + .byte 0x0f, 0xae, 0x2b + SET_SIZE(patch_xsave) + #endif /* __lint */ #endif /* __i386 */ +#if defined(__amd64) +#if defined(__lint) + +void +patch_xsave(void) +{} + +#else /* __lint */ + + /* + * Patch lazy fp restore instructions in the trap handler + * to use xrstor instead of fxrstorq + */ + ENTRY_NP(patch_xsave) + pushq %rbx + pushq %rbp + pushq %r15 + / + / FXRSTORQ (%rbx); -> xrstor (%rbx) + / hot_patch(_xrstor_rbx_insn, _patch_xrstorq_rbx, 4) + / + leaq _patch_xrstorq_rbx(%rip), %rbx + leaq _xrstor_rbx_insn(%rip), %rbp + movq $4, %r15 +1: + movq %rbx, %rdi /* patch address */ + movzbq (%rbp), %rsi /* instruction byte */ + movq $1, %rdx /* count */ + call hot_patch_kernel_text + addq $1, %rbx + addq $1, %rbp + subq $1, %r15 + jnz 1b + popq %r15 + popq %rbp + popq %rbx + ret + +_xrstor_rbx_insn: / see ndptrap_frstor() + #rex.W=1 (.byte 0x48) + #xrstor (%rbx) + .byte 0x48, 0x0f, 0xae, 0x2b + SET_SIZE(patch_xsave) + +#endif /* __lint */ +#endif /* __amd64 */ + /* * One of these routines is called from any lwp with floating * point context as part of the prolog of a context switch. @@ -201,6 +271,11 @@ _lfence_ret_insn: / see membar_consumer() /*ARGSUSED*/ void +xsave_ctxt(void *arg) +{} + +/*ARGSUSED*/ +void fpxsave_ctxt(void *arg) {} @@ -242,6 +317,33 @@ fpnsave_ctxt(void *arg) /* AMD Software Optimization Guide - Section 6.2 */ SET_SIZE(fpxsave_ctxt) + ENTRY_NP(xsave_ctxt) + cmpl $FPU_EN, FPU_CTX_FPU_FLAGS(%rdi) + jne 1f + movl $_CONST(FPU_VALID|FPU_EN), FPU_CTX_FPU_FLAGS(%rdi) + /* + * Setup xsave flags in EDX:EAX + */ + movl FPU_CTX_FPU_XSAVE_MASK(%rdi), %eax + movl FPU_CTX_FPU_XSAVE_MASK+4(%rdi), %edx + leaq FPU_CTX_FPU_REGS(%rdi), %rsi + #xsave (%rsi) + .byte 0x0f, 0xae, 0x26 + + /* + * (see notes above about "exception pointers") + * TODO: does it apply to any machine that uses xsave? + */ + btw $7, FXSAVE_STATE_FSW(%rdi) /* Test saved ES bit */ + jnc 0f /* jump if ES = 0 */ + fnclex /* clear pending x87 exceptions */ +0: ffree %st(7) /* clear tag bit to remove possible stack overflow */ + fildl .fpzero_const(%rip) + /* dummy load changes all exception pointers */ + STTS(%rsi) /* trap on next fpu touch */ +1: ret + SET_SIZE(xsave_ctxt) + #elif defined(__i386) ENTRY_NP(fpnsave_ctxt) @@ -276,6 +378,32 @@ fpnsave_ctxt(void *arg) /* AMD Software Optimization Guide - Section 6.2 */ SET_SIZE(fpxsave_ctxt) + ENTRY_NP(xsave_ctxt) + movl 4(%esp), %ecx /* a struct fpu_ctx */ + cmpl $FPU_EN, FPU_CTX_FPU_FLAGS(%ecx) + jne 1f + + movl $_CONST(FPU_VALID|FPU_EN), FPU_CTX_FPU_FLAGS(%ecx) + movl FPU_CTX_FPU_XSAVE_MASK(%ecx), %eax + movl FPU_CTX_FPU_XSAVE_MASK+4(%ecx), %edx + leal FPU_CTX_FPU_REGS(%ecx), %ecx + #xsave (%ecx) + .byte 0x0f, 0xae, 0x21 + + /* + * (see notes above about "exception pointers") + * TODO: does it apply to any machine that uses xsave? + */ + btw $7, FXSAVE_STATE_FSW(%ecx) /* Test saved ES bit */ + jnc 0f /* jump if ES = 0 */ + fnclex /* clear pending x87 exceptions */ +0: ffree %st(7) /* clear tag bit to remove possible stack overflow */ + fildl .fpzero_const + /* dummy load changes all exception pointers */ + STTS(%edx) /* trap on next fpu touch */ +1: ret + SET_SIZE(xsave_ctxt) + #endif /* __i386 */ .align 8 @@ -298,6 +426,11 @@ void fpxsave(struct fxsave_state *f) {} +/*ARGSUSED*/ +void +xsave(struct xsave_state *f, uint64_t m) +{} + #else /* __lint */ #if defined(__amd64) @@ -310,6 +443,19 @@ fpxsave(struct fxsave_state *f) ret SET_SIZE(fpxsave) + ENTRY_NP(xsave) + CLTS + movl %esi, %eax /* bv mask */ + movq %rsi, %rdx + shrq $32, %rdx + #xsave (%rdi) + .byte 0x0f, 0xae, 0x27 + + fninit /* clear exceptions, init x87 tags */ + STTS(%rdi) /* set TS bit in %cr0 (disable FPU) */ + ret + SET_SIZE(xsave) + #elif defined(__i386) ENTRY_NP(fpsave) @@ -329,6 +475,19 @@ fpxsave(struct fxsave_state *f) ret SET_SIZE(fpxsave) + ENTRY_NP(xsave) + CLTS + movl 4(%esp), %ecx + movl 8(%esp), %eax + movl 12(%esp), %edx + #xsave (%ecx) + .byte 0x0f, 0xae, 0x21 + + fninit /* clear exceptions, init x87 tags */ + STTS(%eax) /* set TS bit in %cr0 (disable FPU) */ + ret + SET_SIZE(xsave) + #endif /* __i386 */ #endif /* __lint */ @@ -344,6 +503,11 @@ void fpxrestore(struct fxsave_state *f) {} +/*ARGSUSED*/ +void +xrestore(struct xsave_state *f, uint64_t m) +{} + #else /* __lint */ #if defined(__amd64) @@ -354,6 +518,16 @@ fpxrestore(struct fxsave_state *f) ret SET_SIZE(fpxrestore) + ENTRY_NP(xrestore) + CLTS + movl %esi, %eax /* bv mask */ + movq %rsi, %rdx + shrq $32, %rdx + #xrstor (%rdi) + .byte 0x0f, 0xae, 0x2f + ret + SET_SIZE(xrestore) + #elif defined(__i386) ENTRY_NP(fprestore) @@ -370,6 +544,16 @@ fpxrestore(struct fxsave_state *f) ret SET_SIZE(fpxrestore) + ENTRY_NP(xrestore) + CLTS + movl 4(%esp), %ecx + movl 8(%esp), %eax + movl 12(%esp), %edx + #xrstor (%ecx) + .byte 0x0f, 0xae, 0x29 + ret + SET_SIZE(xrestore) + #endif /* __i386 */ #endif /* __lint */ @@ -418,26 +602,56 @@ fpinit(void) ENTRY_NP(fpinit) CLTS + cmpl $FP_XSAVE, fp_save_mech + je 1f + + /* fxsave */ leaq sse_initial(%rip), %rax FXRSTORQ ((%rax)) /* load clean initial state */ ret + +1: /* xsave */ + leaq avx_initial(%rip), %rcx + xorl %edx, %edx + movl $XFEATURE_AVX, %eax + bt $X86FSET_AVX, x86_featureset + cmovael %edx, %eax + orl $(XFEATURE_LEGACY_FP | XFEATURE_SSE), %eax + /* xrstor (%rcx) */ + .byte 0x0f, 0xae, 0x29 /* load clean initial state */ + ret SET_SIZE(fpinit) #elif defined(__i386) ENTRY_NP(fpinit) CLTS - cmpl $__FP_SSE, fp_kind + cmpl $FP_FXSAVE, fp_save_mech je 1f + cmpl $FP_XSAVE, fp_save_mech + je 2f + /* fnsave */ fninit movl $x87_initial, %eax frstor (%eax) /* load clean initial state */ ret -1: + +1: /* fxsave */ movl $sse_initial, %eax fxrstor (%eax) /* load clean initial state */ ret + +2: /* xsave */ + movl $avx_initial, %ecx + xorl %edx, %edx + movl $XFEATURE_AVX, %eax + bt $X86FSET_AVX, x86_featureset + cmovael %edx, %eax + orl $(XFEATURE_LEGACY_FP | XFEATURE_SSE), %eax + /* xrstor (%ecx) */ + .byte 0x0f, 0xae, 0x29 /* load clean initial state */ + ret SET_SIZE(fpinit) #endif /* __i386 */ diff --git a/usr/src/uts/intel/ia32/ml/i86_subr.s b/usr/src/uts/intel/ia32/ml/i86_subr.s index a4406b276d..e79eabd119 100644 --- a/usr/src/uts/intel/ia32/ml/i86_subr.s +++ b/usr/src/uts/intel/ia32/ml/i86_subr.s @@ -30,6 +30,11 @@ */ /* + * Copyright (c) 2009, Intel Corporation. + * All rights reserved. + */ + +/* * General assembly language routines. * It is the intent of this file to contain routines that are * independent of the specific kernel architecture, and those that are @@ -2867,6 +2872,16 @@ void invalidate_cache(void) {} +/*ARGSUSED*/ +uint64_t +get_xcr(uint_t r) +{ return (0); } + +/*ARGSUSED*/ +void +set_xcr(uint_t r, const uint64_t val) +{} + #else /* __lint */ #define XMSR_ACCESS_VAL $0x9c5a203a @@ -2914,7 +2929,26 @@ invalidate_cache(void) leave ret SET_SIZE(xwrmsr) - + + ENTRY(get_xcr) + movl %edi, %ecx + #xgetbv + .byte 0x0f,0x01,0xd0 + shlq $32, %rdx + orq %rdx, %rax + ret + SET_SIZE(get_xcr) + + ENTRY(set_xcr) + movq %rsi, %rdx + shrq $32, %rdx + movl %esi, %eax + movl %edi, %ecx + #xsetbv + .byte 0x0f,0x01,0xd1 + ret + SET_SIZE(set_xcr) + #elif defined(__i386) ENTRY(rdmsr) @@ -2957,6 +2991,22 @@ invalidate_cache(void) ret SET_SIZE(xwrmsr) + ENTRY(get_xcr) + movl 4(%esp), %ecx + #xgetbv + .byte 0x0f,0x01,0xd0 + ret + SET_SIZE(get_xcr) + + ENTRY(set_xcr) + movl 4(%esp), %ecx + movl 8(%esp), %eax + movl 12(%esp), %edx + #xsetbv + .byte 0x0f,0x01,0xd1 + ret + SET_SIZE(set_xcr) + #endif /* __i386 */ ENTRY(invalidate_cache) diff --git a/usr/src/uts/intel/ia32/os/archdep.c b/usr/src/uts/intel/ia32/os/archdep.c index 506fbaf66e..7603ccb97d 100644 --- a/usr/src/uts/intel/ia32/os/archdep.c +++ b/usr/src/uts/intel/ia32/os/archdep.c @@ -62,6 +62,7 @@ #include <sys/dtrace.h> #include <sys/brand.h> #include <sys/machbrand.h> +#include <sys/cmn_err.h> extern const struct fnsave_state x87_initial; extern const struct fxsave_state sse_initial; @@ -278,41 +279,43 @@ setfpregs(klwp_t *lwp, fpregset_t *fp) */ fp_free(fpu, 0); } -#if !defined(__amd64) - if (fp_kind == __FP_SSE) { -#endif - fpregset_to_fxsave(fp, &fpu->fpu_regs.kfpu_u.kfpu_fx); - fpu->fpu_regs.kfpu_xstatus = - fp->fp_reg_set.fpchip_state.xstatus; -#if !defined(__amd64) - } else - bcopy(fp, &fpu->fpu_regs.kfpu_u.kfpu_fn, - sizeof (fpu->fpu_regs.kfpu_u.kfpu_fn)); -#endif - fpu->fpu_regs.kfpu_status = fp->fp_reg_set.fpchip_state.status; - fpu->fpu_flags |= FPU_VALID; - } else { - /* - * If we are trying to change the FPU state of a thread which - * hasn't yet initialized floating point, store the state in - * the pcb and indicate that the state is valid. When the - * thread enables floating point, it will use this state instead - * of the default state. - */ -#if !defined(__amd64) - if (fp_kind == __FP_SSE) { -#endif - fpregset_to_fxsave(fp, &fpu->fpu_regs.kfpu_u.kfpu_fx); - fpu->fpu_regs.kfpu_xstatus = - fp->fp_reg_set.fpchip_state.xstatus; -#if !defined(__amd64) - } else - bcopy(fp, &fpu->fpu_regs.kfpu_u.kfpu_fn, - sizeof (fpu->fpu_regs.kfpu_u.kfpu_fn)); + } + /* + * Else: if we are trying to change the FPU state of a thread which + * hasn't yet initialized floating point, store the state in + * the pcb and indicate that the state is valid. When the + * thread enables floating point, it will use this state instead + * of the default state. + */ + + switch (fp_save_mech) { +#if defined(__i386) + case FP_FNSAVE: + bcopy(fp, &fpu->fpu_regs.kfpu_u.kfpu_fn, + sizeof (fpu->fpu_regs.kfpu_u.kfpu_fn)); + break; #endif - fpu->fpu_regs.kfpu_status = fp->fp_reg_set.fpchip_state.status; - fpu->fpu_flags |= FPU_VALID; + case FP_FXSAVE: + fpregset_to_fxsave(fp, &fpu->fpu_regs.kfpu_u.kfpu_fx); + fpu->fpu_regs.kfpu_xstatus = + fp->fp_reg_set.fpchip_state.xstatus; + break; + + case FP_XSAVE: + fpregset_to_fxsave(fp, + &fpu->fpu_regs.kfpu_u.kfpu_xs.xs_fxsave); + fpu->fpu_regs.kfpu_xstatus = + fp->fp_reg_set.fpchip_state.xstatus; + fpu->fpu_regs.kfpu_u.kfpu_xs.xs_xstate_bv |= + (XFEATURE_LEGACY_FP | XFEATURE_SSE); + break; + default: + panic("Invalid fp_save_mech"); + /*NOTREACHED*/ } + + fpu->fpu_regs.kfpu_status = fp->fp_reg_set.fpchip_state.status; + fpu->fpu_flags |= FPU_VALID; } /* @@ -349,32 +352,54 @@ getfpregs(klwp_t *lwp, fpregset_t *fp) /* * Cases 1 and 3. */ -#if !defined(__amd64) - if (fp_kind == __FP_SSE) { + switch (fp_save_mech) { +#if defined(__i386) + case FP_FNSAVE: + bcopy(&fpu->fpu_regs.kfpu_u.kfpu_fn, fp, + sizeof (fpu->fpu_regs.kfpu_u.kfpu_fn)); + break; #endif + case FP_FXSAVE: fxsave_to_fpregset(&fpu->fpu_regs.kfpu_u.kfpu_fx, fp); fp->fp_reg_set.fpchip_state.xstatus = fpu->fpu_regs.kfpu_xstatus; -#if !defined(__amd64) - } else - bcopy(&fpu->fpu_regs.kfpu_u.kfpu_fn, fp, - sizeof (fpu->fpu_regs.kfpu_u.kfpu_fn)); -#endif + break; + case FP_XSAVE: + fxsave_to_fpregset( + &fpu->fpu_regs.kfpu_u.kfpu_xs.xs_fxsave, fp); + fp->fp_reg_set.fpchip_state.xstatus = + fpu->fpu_regs.kfpu_xstatus; + break; + default: + panic("Invalid fp_save_mech"); + /*NOTREACHED*/ + } fp->fp_reg_set.fpchip_state.status = fpu->fpu_regs.kfpu_status; } else { /* * Case 2. */ -#if !defined(__amd64) - if (fp_kind == __FP_SSE) { + switch (fp_save_mech) { +#if defined(__i386) + case FP_FNSAVE: + bcopy(&x87_initial, fp, sizeof (x87_initial)); + break; #endif + case FP_FXSAVE: + case FP_XSAVE: + /* + * For now, we don't have any AVX specific field in ABI. + * If we add any in the future, we need to initial them + * as well. + */ fxsave_to_fpregset(&sse_initial, fp); fp->fp_reg_set.fpchip_state.xstatus = fpu->fpu_regs.kfpu_xstatus; -#if !defined(__amd64) - } else - bcopy(&x87_initial, fp, sizeof (x87_initial)); -#endif + break; + default: + panic("Invalid fp_save_mech"); + /*NOTREACHED*/ + } fp->fp_reg_set.fpchip_state.status = fpu->fpu_regs.kfpu_status; } kpreempt_enable(); diff --git a/usr/src/uts/intel/ia32/os/fpu.c b/usr/src/uts/intel/ia32/os/fpu.c index 3c2a3bae65..b7022cf0e5 100644 --- a/usr/src/uts/intel/ia32/os/fpu.c +++ b/usr/src/uts/intel/ia32/os/fpu.c @@ -19,8 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved. */ /* Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. */ @@ -30,7 +29,10 @@ /* Copyright (c) 1987, 1988 Microsoft Corporation */ /* All Rights Reserved */ -#pragma ident "%Z%%M% %I% %E% SMI" +/* + * Copyright (c) 2009, Intel Corporation. + * All rights reserved. + */ #include <sys/types.h> #include <sys/param.h> @@ -56,6 +58,10 @@ #include <sys/debug.h> #include <sys/x86_archext.h> #include <sys/sysmacros.h> +#include <sys/cmn_err.h> + +/* Legacy fxsave layout + xsave header + ymm */ +#define AVX_XSAVE_SIZE (512 + 64 + 256) /*CSTYLED*/ #pragma align 16 (sse_initial) @@ -83,6 +89,45 @@ const struct fxsave_state sse_initial = { /* rest of structure is zero */ }; +/*CSTYLED*/ +#pragma align 64 (avx_initial) + +/* + * Initial kfpu state for AVX used by fpinit() + */ +const struct xsave_state avx_initial = { + /* + * The definition below needs to be identical with sse_initial + * defined above. + */ + { + FPU_CW_INIT, /* fx_fcw */ + 0, /* fx_fsw */ + 0, /* fx_fctw */ + 0, /* fx_fop */ +#if defined(__amd64) + 0, /* fx_rip */ + 0, /* fx_rdp */ +#else + 0, /* fx_eip */ + 0, /* fx_cs */ + 0, /* __fx_ign0 */ + 0, /* fx_dp */ + 0, /* fx_ds */ + 0, /* __fx_ign1 */ +#endif /* __amd64 */ + SSE_MXCSR_INIT /* fx_mxcsr */ + /* rest of structure is zero */ + }, + /* + * bit0 = 1 for XSTATE_BV to indicate that legacy fields are valid, + * and CPU should initialize XMM/YMM. + */ + 1, + {0, 0} /* These 2 bytes must be zero */ + /* rest of structure is zero */ +}; + /* * mxcsr_mask value (possibly reset in fpu_probe); used to avoid * the #gp exception caused by setting unsupported bits in the @@ -103,11 +148,16 @@ const struct fnsave_state x87_initial = { }; #if defined(__amd64) -#define fpsave_ctxt fpxsave_ctxt +/* + * This vector is patched to xsave_ctxt() if we discover we have an + * XSAVE-capable chip in fpu_probe. + */ +void (*fpsave_ctxt)(void *) = fpxsave_ctxt; #elif defined(__i386) /* - * This vector is patched to fpxsave_ctxt() if we discover - * we have an SSE-capable chip in fpu_probe(). + * This vector is patched to fpxsave_ctxt() if we discover we have an + * SSE-capable chip in fpu_probe(). It is patched to xsave_ctxt + * if we discover we have an XSAVE-capable chip in fpu_probe. */ void (*fpsave_ctxt)(void *) = fpnsave_ctxt; #endif @@ -129,6 +179,10 @@ fp_new_lwp(kthread_id_t t, kthread_id_t ct) struct fpu_ctx *fp; /* parent fpu context */ struct fpu_ctx *cfp; /* new fpu context */ struct fxsave_state *fx, *cfx; +#if defined(__i386) + struct fnsave_state *fn, *cfn; +#endif + struct xsave_state *cxs; ASSERT(fp_kind != FP_NO); @@ -145,27 +199,41 @@ fp_new_lwp(kthread_id_t t, kthread_id_t ct) cfp->fpu_regs.kfpu_status = 0; cfp->fpu_regs.kfpu_xstatus = 0; -#if defined(__amd64) - fx = &fp->fpu_regs.kfpu_u.kfpu_fx; - cfx = &cfp->fpu_regs.kfpu_u.kfpu_fx; - bcopy(&sse_initial, cfx, sizeof (*cfx)); - cfx->fx_mxcsr = fx->fx_mxcsr & ~SSE_MXCSR_EFLAGS; - cfx->fx_fcw = fx->fx_fcw; -#else - if (fp_kind == __FP_SSE) { + switch (fp_save_mech) { +#if defined(__i386) + case FP_FNSAVE: + fn = &fp->fpu_regs.kfpu_u.kfpu_fn; + cfn = &cfp->fpu_regs.kfpu_u.kfpu_fn; + bcopy(&x87_initial, cfn, sizeof (*cfn)); + cfn->f_fcw = fn->f_fcw; + break; +#endif + case FP_FXSAVE: fx = &fp->fpu_regs.kfpu_u.kfpu_fx; cfx = &cfp->fpu_regs.kfpu_u.kfpu_fx; bcopy(&sse_initial, cfx, sizeof (*cfx)); cfx->fx_mxcsr = fx->fx_mxcsr & ~SSE_MXCSR_EFLAGS; cfx->fx_fcw = fx->fx_fcw; - } else { - struct fnsave_state *fn = &fp->fpu_regs.kfpu_u.kfpu_fn; - struct fnsave_state *cfn = &cfp->fpu_regs.kfpu_u.kfpu_fn; + break; - bcopy(&x87_initial, cfn, sizeof (*cfn)); - cfn->f_fcw = fn->f_fcw; + case FP_XSAVE: + cfp->fpu_xsave_mask = fp->fpu_xsave_mask; + + fx = &fp->fpu_regs.kfpu_u.kfpu_xs.xs_fxsave; + cxs = &cfp->fpu_regs.kfpu_u.kfpu_xs; + cfx = &cxs->xs_fxsave; + + bcopy(&avx_initial, cxs, sizeof (*cxs)); + cfx->fx_mxcsr = fx->fx_mxcsr & ~SSE_MXCSR_EFLAGS; + cfx->fx_fcw = fx->fx_fcw; + cxs->xs_xstate_bv |= (get_xcr(XFEATURE_ENABLED_MASK) & + XFEATURE_FP_ALL); + break; + default: + panic("Invalid fp_save_mech"); + /*NOTREACHED*/ } -#endif + installctx(ct, cfp, fpsave_ctxt, NULL, fp_new_lwp, fp_new_lwp, NULL, fp_free); /* @@ -212,7 +280,7 @@ fp_free(struct fpu_ctx *fp, int isexec) if (curthread->t_lwp && fp == &curthread->t_lwp->lwp_pcb.pcb_fpu) { /* Clear errors if any to prevent frstor from complaining */ (void) fperr_reset(); - if (fp_kind == __FP_SSE) + if (fp_kind & __FP_SSE) (void) fpxerr_reset(); fpdisable(); } @@ -234,18 +302,24 @@ fp_save(struct fpu_ctx *fp) } ASSERT(curthread->t_lwp && fp == &curthread->t_lwp->lwp_pcb.pcb_fpu); -#if defined(__amd64) - fpxsave(&fp->fpu_regs.kfpu_u.kfpu_fx); -#else - switch (fp_kind) { - case __FP_SSE: + switch (fp_save_mech) { +#if defined(__i386) + case FP_FNSAVE: + fpsave(&fp->fpu_regs.kfpu_u.kfpu_fn); + break; +#endif + case FP_FXSAVE: fpxsave(&fp->fpu_regs.kfpu_u.kfpu_fx); break; - default: - fpsave(&fp->fpu_regs.kfpu_u.kfpu_fn); + + case FP_XSAVE: + xsave(&fp->fpu_regs.kfpu_u.kfpu_xs, fp->fpu_xsave_mask); break; + default: + panic("Invalid fp_save_mech"); + /*NOTREACHED*/ } -#endif + fp->fpu_flags |= FPU_VALID; kpreempt_enable(); } @@ -259,15 +333,24 @@ fp_save(struct fpu_ctx *fp) void fp_restore(struct fpu_ctx *fp) { -#if defined(__amd64) - fpxrestore(&fp->fpu_regs.kfpu_u.kfpu_fx); -#else - /* case 2 */ - if (fp_kind == __FP_SSE) - fpxrestore(&fp->fpu_regs.kfpu_u.kfpu_fx); - else + switch (fp_save_mech) { +#if defined(__i386) + case FP_FNSAVE: fprestore(&fp->fpu_regs.kfpu_u.kfpu_fn); + break; #endif + case FP_FXSAVE: + fpxrestore(&fp->fpu_regs.kfpu_u.kfpu_fx); + break; + + case FP_XSAVE: + xrestore(&fp->fpu_regs.kfpu_u.kfpu_xs, fp->fpu_xsave_mask); + break; + default: + panic("Invalid fp_save_mech"); + /*NOTREACHED*/ + } + fp->fpu_flags &= ~FPU_VALID; } @@ -289,6 +372,11 @@ fp_seed(void) /* * Always initialize a new context and initialize the hardware. */ + if (fp_save_mech == FP_XSAVE) { + fp->fpu_xsave_mask = get_xcr(XFEATURE_ENABLED_MASK) & + XFEATURE_FP_ALL; + } + installctx(curthread, fp, fpsave_ctxt, NULL, fp_new_lwp, fp_new_lwp, NULL, fp_free); fpinit(); @@ -324,6 +412,9 @@ fpnoextflt(struct regs *rp) ASSERT(sizeof (struct fxsave_state) == 512 && sizeof (struct fnsave_state) == 108); ASSERT((offsetof(struct fxsave_state, fx_xmm[0]) & 0xf) == 0); + + ASSERT(sizeof (struct xsave_state) >= AVX_XSAVE_SIZE); + #if defined(__i386) ASSERT(sizeof (struct fpu) == sizeof (struct __old_fpu)); #endif /* __i386 */ @@ -375,8 +466,9 @@ fpnoextflt(struct regs *rp) * configured to enable fully fledged (%xmm) fxsave/fxrestor on * this CPU. For the non-SSE case, ensure that it isn't. */ - ASSERT((fp_kind == __FP_SSE && (getcr4() & CR4_OSFXSR) == CR4_OSFXSR) || - (fp_kind != __FP_SSE && + ASSERT(((fp_kind & __FP_SSE) && + (getcr4() & CR4_OSFXSR) == CR4_OSFXSR) || + (!(fp_kind & __FP_SSE) && (getcr4() & (CR4_OSXMMEXCPT|CR4_OSFXSR)) == 0)); #endif @@ -451,25 +543,36 @@ fpexterrflt(struct regs *rp) fp_save(fp); /* clear exception flags in saved state, as if by fnclex */ -#if defined(__amd64) - fpsw = fp->fpu_regs.kfpu_u.kfpu_fx.fx_fsw; - fpcw = fp->fpu_regs.kfpu_u.kfpu_fx.fx_fcw; - fp->fpu_regs.kfpu_u.kfpu_fx.fx_fsw &= ~FPS_SW_EFLAGS; -#else - switch (fp_kind) { - case __FP_SSE: - fpsw = fp->fpu_regs.kfpu_u.kfpu_fx.fx_fsw; - fpcw = fp->fpu_regs.kfpu_u.kfpu_fx.fx_fcw; - fp->fpu_regs.kfpu_u.kfpu_fx.fx_fsw &= ~FPS_SW_EFLAGS; - break; - default: - fpsw = fp->fpu_regs.kfpu_u.kfpu_fn.f_fsw; - fpcw = fp->fpu_regs.kfpu_u.kfpu_fn.f_fcw; - fp->fpu_regs.kfpu_u.kfpu_fn.f_fsw &= ~FPS_SW_EFLAGS; - break; - } + switch (fp_save_mech) { +#if defined(__i386) + case FP_FNSAVE: + fpsw = fp->fpu_regs.kfpu_u.kfpu_fn.f_fsw; + fpcw = fp->fpu_regs.kfpu_u.kfpu_fn.f_fcw; + fp->fpu_regs.kfpu_u.kfpu_fn.f_fsw &= ~FPS_SW_EFLAGS; + break; #endif + case FP_FXSAVE: + fpsw = fp->fpu_regs.kfpu_u.kfpu_fx.fx_fsw; + fpcw = fp->fpu_regs.kfpu_u.kfpu_fx.fx_fcw; + fp->fpu_regs.kfpu_u.kfpu_fx.fx_fsw &= ~FPS_SW_EFLAGS; + break; + + case FP_XSAVE: + fpsw = fp->fpu_regs.kfpu_u.kfpu_xs.xs_fxsave.fx_fsw; + fpcw = fp->fpu_regs.kfpu_u.kfpu_xs.xs_fxsave.fx_fcw; + fp->fpu_regs.kfpu_u.kfpu_xs.xs_fxsave.fx_fsw &= ~FPS_SW_EFLAGS; + /* + * Always set LEGACY_FP as it may have been cleared by XSAVE + * instruction + */ + fp->fpu_regs.kfpu_u.kfpu_xs.xs_xstate_bv |= XFEATURE_LEGACY_FP; + break; + default: + panic("Invalid fp_save_mech"); + /*NOTREACHED*/ + } + fp->fpu_regs.kfpu_status = fpsw; if ((fpsw & FPS_ES) == 0) @@ -493,7 +596,7 @@ fpsimderrflt(struct regs *rp) uint32_t mxcsr, xmask; fpu_ctx_t *fp = &ttolwp(curthread)->lwp_pcb.pcb_fpu; - ASSERT(fp_kind == __FP_SSE); + ASSERT(fp_kind & __FP_SSE); /* * NOTE: Interrupts are disabled during execution of this @@ -625,20 +728,30 @@ fpsetcw(uint16_t fcw, uint32_t mxcsr) */ fp_save(fp); -#if defined(__amd64) - fx = &fp->fpu_regs.kfpu_u.kfpu_fx; - fx->fx_fcw = fcw; - fx->fx_mxcsr = sse_mxcsr_mask & mxcsr; -#else - switch (fp_kind) { - case __FP_SSE: + switch (fp_save_mech) { +#if defined(__i386) + case FP_FNSAVE: + fp->fpu_regs.kfpu_u.kfpu_fn.f_fcw = fcw; + break; +#endif + case FP_FXSAVE: fx = &fp->fpu_regs.kfpu_u.kfpu_fx; fx->fx_fcw = fcw; fx->fx_mxcsr = sse_mxcsr_mask & mxcsr; break; - default: - fp->fpu_regs.kfpu_u.kfpu_fn.f_fcw = fcw; + + case FP_XSAVE: + fx = &fp->fpu_regs.kfpu_u.kfpu_xs.xs_fxsave; + fx->fx_fcw = fcw; + fx->fx_mxcsr = sse_mxcsr_mask & mxcsr; + /* + * Always set LEGACY_FP as it may have been cleared by XSAVE + * instruction + */ + fp->fpu_regs.kfpu_u.kfpu_xs.xs_xstate_bv |= XFEATURE_LEGACY_FP; break; + default: + panic("Invalid fp_save_mech"); + /*NOTREACHED*/ } -#endif } diff --git a/usr/src/uts/intel/ia32/os/sysi86.c b/usr/src/uts/intel/ia32/os/sysi86.c index e677ba68fa..308cdddf4f 100644 --- a/usr/src/uts/intel/ia32/os/sysi86.c +++ b/usr/src/uts/intel/ia32/os/sysi86.c @@ -19,8 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved. */ /* Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. */ @@ -30,8 +29,6 @@ /* Copyright (c) 1987, 1988 Microsoft Corporation */ /* All Rights Reserved */ -#pragma ident "%Z%%M% %I% %E% SMI" - #include <sys/param.h> #include <sys/types.h> #include <sys/sysmacros.h> @@ -170,7 +167,7 @@ sysi86(short cmd, uintptr_t arg1, uintptr_t arg2, uintptr_t arg3) break; } fpsetcw((uint16_t)arg2, (uint32_t)arg3); - return (fp_kind == __FP_SSE ? 1 : 0); + return ((fp_kind & __FP_SSE) ? 1 : 0); /* real time clock management commands */ diff --git a/usr/src/uts/intel/sys/archsystm.h b/usr/src/uts/intel/sys/archsystm.h index 1821eb1dc7..a39221b353 100644 --- a/usr/src/uts/intel/sys/archsystm.h +++ b/usr/src/uts/intel/sys/archsystm.h @@ -58,6 +58,8 @@ extern void patch_sse(void); extern void patch_sse2(void); #endif +extern void patch_xsave(void); + extern void cli(void); extern void sti(void); @@ -193,6 +195,7 @@ extern void patch_tsc_read(int); #if defined(__amd64) && !defined(__xpv) extern void patch_memops(uint_t); #endif /* defined(__amd64) && !defined(__xpv) */ +extern void setup_xfem(void); #define cpr_dprintf prom_printf #define IN_XPV_PANIC() (__lintzero) #endif diff --git a/usr/src/uts/intel/sys/controlregs.h b/usr/src/uts/intel/sys/controlregs.h index dc8ec9c8c5..aa9ab14a89 100644 --- a/usr/src/uts/intel/sys/controlregs.h +++ b/usr/src/uts/intel/sys/controlregs.h @@ -19,8 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. */ #ifndef _SYS_CONTROLREGS_H @@ -108,9 +107,10 @@ extern "C" { /* 0x1000 reserved */ #define CR4_VMXE 0x2000 #define CR4_SMXE 0x4000 +#define CR4_OSXSAVE 0x40000 /* OS xsave/xrestore support */ #define FMT_CR4 \ - "\20\17smxe\16vmxe\13xmme\12fxsr\11pce\10pge" \ + "\20\23osxsav\17smxe\16vmxe\13xmme\12fxsr\11pce\10pge" \ "\7mce\6pae\5pse\4de\3tsd\2pvi\1vme" /* diff --git a/usr/src/uts/intel/sys/fp.h b/usr/src/uts/intel/sys/fp.h index 02fb34fc65..4956e2d318 100644 --- a/usr/src/uts/intel/sys/fp.h +++ b/usr/src/uts/intel/sys/fp.h @@ -19,8 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved. */ /* Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. */ @@ -30,8 +29,6 @@ #ifndef _SYS_FP_H #define _SYS_FP_H -#pragma ident "%Z%%M% %I% %E% SMI" - #ifdef __cplusplus extern "C" { #endif @@ -50,7 +47,20 @@ extern "C" { #define FP_387 3 /* 80387 chip present */ #define FP_487 6 /* 80487 chip present */ #define FP_486 6 /* 80486 chip present */ -#define __FP_SSE 0x103 /* x87 plus SSE-capable CPU */ +/* + * The following values are bit flags instead of actual values. + * E.g. to know if we are using SSE, test (value & __FP_SSE) instead + * of (value == __FP_SSE). + */ +#define __FP_SSE 0x100 /* .. plus SSE-capable CPU */ +#define __FP_AVX 0x200 /* .. plus AVX-capable CPU */ + +/* + * values that go into fp_save_mech + */ +#define FP_FNSAVE 1 /* fnsave/frstor instructions */ +#define FP_FXSAVE 2 /* fxsave/fxrstor instructions */ +#define FP_XSAVE 3 /* xsave/xrstor instructions */ /* * masks for 80387 control word @@ -159,6 +169,7 @@ extern "C" { "\10im\7daz\6pe\5ue\4oe\3ze\2de\1ie" extern int fp_kind; /* kind of fp support */ +extern int fp_save_mech; /* fp save/restore mechanism */ extern int fpu_exists; /* FPU hw exists */ #ifdef _KERNEL @@ -174,15 +185,19 @@ extern int fpu_probe_pentium_fdivbug(void); extern void fpnsave_ctxt(void *); extern void fpxsave_ctxt(void *); +extern void xsave_ctxt(void *); extern void (*fpsave_ctxt)(void *); struct fnsave_state; struct fxsave_state; +struct xsave_state; extern void fxsave_insn(struct fxsave_state *); extern void fpsave(struct fnsave_state *); extern void fprestore(struct fnsave_state *); extern void fpxsave(struct fxsave_state *); extern void fpxrestore(struct fxsave_state *); +extern void xsave(struct xsave_state *, uint64_t); +extern void xrestore(struct xsave_state *, uint64_t); extern void fpenable(void); extern void fpdisable(void); diff --git a/usr/src/uts/intel/sys/pcb.h b/usr/src/uts/intel/sys/pcb.h index ec5dea501c..3a690bd980 100644 --- a/usr/src/uts/intel/sys/pcb.h +++ b/usr/src/uts/intel/sys/pcb.h @@ -20,8 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved. */ #ifndef _SYS_PCB_H @@ -37,6 +36,10 @@ extern "C" { #ifndef _ASM typedef struct fpu_ctx { kfpu_t fpu_regs; /* kernel save area for FPU */ + uint64_t fpu_xsave_mask; /* xsave mask for FPU/SSE/AVX */ +#if defined(__i386) + uint64_t fpu_padding; /* fix 32bit libmicro regression */ +#endif uint_t fpu_flags; /* FPU state flags */ } fpu_ctx_t; diff --git a/usr/src/uts/intel/sys/regset.h b/usr/src/uts/intel/sys/regset.h index ff0044e317..5436ae0be3 100644 --- a/usr/src/uts/intel/sys/regset.h +++ b/usr/src/uts/intel/sys/regset.h @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,10 +19,8 @@ * CDDL HEADER END */ /* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved. */ - /* Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. */ /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ @@ -32,8 +29,6 @@ #ifndef _SYS_REGSET_H #define _SYS_REGSET_H -#pragma ident "%Z%%M% %I% %E% SMI" - #include <sys/feature_tests.h> #if !defined(_ASM) @@ -246,6 +241,18 @@ struct fxsave_state { #endif }; /* 512 bytes */ +/* + * This structure is written to memory by an 'xsave' instruction. + * First 512 byte is compatible with the format of an 'fxsave' area. + */ +struct xsave_state { + struct fxsave_state xs_fxsave; + uint64_t xs_xstate_bv; /* 512 */ + uint64_t xs_rsv_mbz[2]; + uint64_t xs_reserved[5]; + upad128_t xs_ymm[16]; /* avx - 576 */ +}; /* 832 bytes, asserted in fpnoextflt() */ + #if defined(__amd64) typedef struct fpu { @@ -352,6 +359,7 @@ typedef struct { #if defined(__i386) struct fnsave_state kfpu_fn; #endif + struct xsave_state kfpu_xs; } kfpu_u; uint32_t kfpu_status; /* saved at #mf exception */ uint32_t kfpu_xstatus; /* saved at #xm exception */ diff --git a/usr/src/uts/intel/sys/x86_archext.h b/usr/src/uts/intel/sys/x86_archext.h index 896fce14e1..fba9c6e896 100644 --- a/usr/src/uts/intel/sys/x86_archext.h +++ b/usr/src/uts/intel/sys/x86_archext.h @@ -112,9 +112,13 @@ extern "C" { #define CPUID_INTC_ECX_MOVBE 0x00400000 /* MOVBE insn */ #define CPUID_INTC_ECX_POPCNT 0x00800000 /* POPCNT insn */ #define CPUID_INTC_ECX_AES 0x02000000 /* AES insns */ +#define CPUID_INTC_ECX_XSAVE 0x04000000 /* XSAVE/XRESTOR insns */ +#define CPUID_INTC_ECX_OSXSAVE 0x08000000 /* OS supports XSAVE insns */ +#define CPUID_INTC_ECX_AVX 0x10000000 /* AVX supported */ #define FMT_CPUID_INTC_ECX \ "\20" \ + "\35avx\34osxsav\33xsave" \ "\32aes" \ "\30popcnt\27movbe\25sse4.2\24sse4.1\23dca" \ "\20\17etprd\16cx16\13cid\12ssse3\11tm2" \ @@ -356,6 +360,8 @@ extern "C" { #define X86FSET_64 30 #define X86FSET_AES 31 #define X86FSET_PCLMULQDQ 32 +#define X86FSET_XSAVE 33 +#define X86FSET_AVX 34 /* * flags to patch tsc_read routine. @@ -561,6 +567,20 @@ extern "C" { #define X86_SOCKET_ASB2 _X86_SOCKET_MKVAL(X86_VENDOR_AMD, 0x001000) #define X86_SOCKET_C32 _X86_SOCKET_MKVAL(X86_VENDOR_AMD, 0x002000) +/* + * xgetbv/xsetbv support + */ + +#define XFEATURE_ENABLED_MASK 0x0 +/* + * XFEATURE_ENABLED_MASK values (eax) + */ +#define XFEATURE_LEGACY_FP 0x1 +#define XFEATURE_SSE 0x2 +#define XFEATURE_AVX 0x4 +#define XFEATURE_MAX XFEATURE_AVX +#define XFEATURE_FP_ALL (XFEATURE_LEGACY_FP|XFEATURE_SSE|XFEATURE_AVX) + #if !defined(_ASM) #if defined(_KERNEL) || defined(_KMEMUSER) @@ -601,6 +621,13 @@ struct cpuid_regs { uint32_t cp_edx; }; +/* + * Utility functions to get/set extended control registers (XCR) + * Initial use is to get/set the contents of the XFEATURE_ENABLED_MASK. + */ +extern uint64_t get_xcr(uint_t); +extern void set_xcr(uint_t, uint64_t); + extern uint64_t rdmsr(uint_t); extern void wrmsr(uint_t, const uint64_t); extern uint64_t xrdmsr(uint_t); @@ -732,6 +759,8 @@ extern void patch_workaround_6323525(void); extern int get_hwenv(void); extern int is_controldom(void); +extern void xsave_setup_msr(struct cpu *); + /* * Defined hardware environments */ |