summaryrefslogtreecommitdiff
path: root/usr/src
diff options
context:
space:
mode:
authorKuriakose Kuruvilla <kuriakose.kuruvilla@oracle.com>2010-08-16 19:36:08 -0700
committerKuriakose Kuruvilla <kuriakose.kuruvilla@oracle.com>2010-08-16 19:36:08 -0700
commit7af88ac71631ebf259c6c4c22a9f649ddff3e270 (patch)
tree3d0109c3dff3677bbff5901e1a9616ace0fae4da /usr/src
parent315e695527b211489a44386ec695c6ccd3af4e6e (diff)
downloadillumos-joyent-7af88ac71631ebf259c6c4c22a9f649ddff3e270.tar.gz
6958308 XSAVE/XRSTOR mechanism to save and restore processor state
Contributed by Lejun Zhu <lejun.zhu@intel.com>
Diffstat (limited to 'usr/src')
-rw-r--r--usr/src/common/elfcap/elfcap.c11
-rw-r--r--usr/src/common/elfcap/elfcap.h5
-rw-r--r--usr/src/uts/common/disp/thread.c9
-rw-r--r--usr/src/uts/common/sys/auxv_386.h6
-rw-r--r--usr/src/uts/i86pc/ml/genassym.c7
-rw-r--r--usr/src/uts/i86pc/ml/offsets.in1
-rw-r--r--usr/src/uts/i86pc/os/cpr_impl.c8
-rw-r--r--usr/src/uts/i86pc/os/cpuid.c196
-rw-r--r--usr/src/uts/i86pc/os/fpu_subr.c55
-rw-r--r--usr/src/uts/i86pc/os/mp_startup.c7
-rw-r--r--usr/src/uts/i86pc/os/startup.c7
-rw-r--r--usr/src/uts/intel/ia32/ml/exception.s80
-rw-r--r--usr/src/uts/intel/ia32/ml/float.s224
-rw-r--r--usr/src/uts/intel/ia32/ml/i86_subr.s52
-rw-r--r--usr/src/uts/intel/ia32/os/archdep.c117
-rw-r--r--usr/src/uts/intel/ia32/os/fpu.c245
-rw-r--r--usr/src/uts/intel/ia32/os/sysi86.c7
-rw-r--r--usr/src/uts/intel/sys/archsystm.h3
-rw-r--r--usr/src/uts/intel/sys/controlregs.h6
-rw-r--r--usr/src/uts/intel/sys/fp.h25
-rw-r--r--usr/src/uts/intel/sys/pcb.h7
-rw-r--r--usr/src/uts/intel/sys/regset.h24
-rw-r--r--usr/src/uts/intel/sys/x86_archext.h29
23 files changed, 941 insertions, 190 deletions
diff --git a/usr/src/common/elfcap/elfcap.c b/usr/src/common/elfcap/elfcap.c
index 55cd3e116d..0e1558468a 100644
--- a/usr/src/common/elfcap/elfcap.c
+++ b/usr/src/common/elfcap/elfcap.c
@@ -20,8 +20,7 @@
*/
/*
- * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
*/
/* LINTLIBRARY */
@@ -288,6 +287,14 @@ static const elfcap_desc_t hw1_386[ELFCAP_NUM_HW1_386] = {
{ /* 0x08000000 */
AV_386_PCLMULQDQ, STRDESC("AV_386_PCLMULQDQ"),
STRDESC("PCLMULQDQ"), STRDESC("pclmulqdq"),
+ },
+ { /* 0x10000000 */
+ AV_386_XSAVE, STRDESC("AV_386_XSAVE"),
+ STRDESC("XSAVE"), STRDESC("xsave"),
+ },
+ { /* 0x20000000 */
+ AV_386_AVX, STRDESC("AV_386_AVX"),
+ STRDESC("AVX"), STRDESC("avx"),
}
};
diff --git a/usr/src/common/elfcap/elfcap.h b/usr/src/common/elfcap/elfcap.h
index 6cd68b53f2..9f0ef25f09 100644
--- a/usr/src/common/elfcap/elfcap.h
+++ b/usr/src/common/elfcap/elfcap.h
@@ -20,8 +20,7 @@
*/
/*
- * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#ifndef _ELFCAP_DOT_H
@@ -114,7 +113,7 @@ typedef enum {
*/
#define ELFCAP_NUM_SF1 3
#define ELFCAP_NUM_HW1_SPARC 17
-#define ELFCAP_NUM_HW1_386 28
+#define ELFCAP_NUM_HW1_386 30
/*
diff --git a/usr/src/uts/common/disp/thread.c b/usr/src/uts/common/disp/thread.c
index 245ee27e04..5ed9110251 100644
--- a/usr/src/uts/common/disp/thread.c
+++ b/usr/src/uts/common/disp/thread.c
@@ -20,8 +20,7 @@
*/
/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#include <sys/types.h>
@@ -186,11 +185,11 @@ thread_init(void)
/*
* "struct _klwp" includes a "struct pcb", which includes a
- * "struct fpu", which needs to be 16-byte aligned on amd64
- * (and even on i386 for fxsave/fxrstor).
+ * "struct fpu", which needs to be 64-byte aligned on amd64
+ * (and even on i386) for xsave/xrstor.
*/
lwp_cache = kmem_cache_create("lwp_cache", sizeof (klwp_t),
- 16, NULL, NULL, NULL, NULL, NULL, 0);
+ 64, NULL, NULL, NULL, NULL, NULL, 0);
#else
/*
* Allocate thread structures from static_arena. This prevents
diff --git a/usr/src/uts/common/sys/auxv_386.h b/usr/src/uts/common/sys/auxv_386.h
index 1e7afc3a22..f9b4867f10 100644
--- a/usr/src/uts/common/sys/auxv_386.h
+++ b/usr/src/uts/common/sys/auxv_386.h
@@ -19,8 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#ifndef _SYS_AUXV_386_H
@@ -68,9 +67,12 @@ extern "C" {
#define AV_386_MOVBE 0x2000000 /* Intel MOVBE insns */
#define AV_386_AES 0x4000000 /* Intel AES insns */
#define AV_386_PCLMULQDQ 0x8000000 /* Intel PCLMULQDQ insn */
+#define AV_386_XSAVE 0x10000000 /* Intel XSAVE/XRSTOR insns */
+#define AV_386_AVX 0x20000000 /* Intel AVX insns */
#define FMT_AV_386 \
"\20" \
+ "\36avx\35xsave" \
"\34pclmulqdq\33aes" \
"\32movbe\31sse4.2" \
"\30sse4.1\27ssse3\26amd_lzcnt\25popcnt" \
diff --git a/usr/src/uts/i86pc/ml/genassym.c b/usr/src/uts/i86pc/ml/genassym.c
index 4836628401..a34ca50669 100644
--- a/usr/src/uts/i86pc/ml/genassym.c
+++ b/usr/src/uts/i86pc/ml/genassym.c
@@ -19,8 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#ifndef _GENASSYM
@@ -123,6 +122,10 @@ main(int argc, char *argv[])
printf("#define\tFP_387 0x%x\n", FP_387);
printf("#define\t__FP_SSE 0x%x\n", __FP_SSE);
+ printf("#define\tFP_FNSAVE 0x%x\n", FP_FNSAVE);
+ printf("#define\tFP_FXSAVE 0x%x\n", FP_FXSAVE);
+ printf("#define\tFP_XSAVE 0x%x\n", FP_XSAVE);
+
printf("#define\tAV_INT_SPURIOUS 0x%x\n", AV_INT_SPURIOUS);
printf("#define\tCPU_READY 0x%x\n", CPU_READY);
diff --git a/usr/src/uts/i86pc/ml/offsets.in b/usr/src/uts/i86pc/ml/offsets.in
index 4b5d4fc694..20e0c972d4 100644
--- a/usr/src/uts/i86pc/ml/offsets.in
+++ b/usr/src/uts/i86pc/ml/offsets.in
@@ -165,6 +165,7 @@ _klwp
fpu_ctx
fpu_regs FPU_CTX_FPU_REGS
fpu_flags FPU_CTX_FPU_FLAGS
+ fpu_xsave_mask FPU_CTX_FPU_XSAVE_MASK
fxsave_state FXSAVE_STATE_SIZE
fx_fsw FXSAVE_STATE_FSW
diff --git a/usr/src/uts/i86pc/os/cpr_impl.c b/usr/src/uts/i86pc/os/cpr_impl.c
index 8f57ca7366..555ed9f842 100644
--- a/usr/src/uts/i86pc/os/cpr_impl.c
+++ b/usr/src/uts/i86pc/os/cpr_impl.c
@@ -65,6 +65,7 @@
#include <sys/reboot.h>
#include <sys/acpi/acpi.h>
#include <sys/acpica.h>
+#include <sys/fp.h>
#define AFMT "%lx"
@@ -944,6 +945,13 @@ i_cpr_start_cpu(void)
pat_sync();
/*
+ * If we use XSAVE, we need to restore XFEATURE_ENABLE_MASK register.
+ */
+ if (fp_save_mech == FP_XSAVE) {
+ setup_xfem();
+ }
+
+ /*
* Initialize this CPU's syscall handlers
*/
init_cpu_syscall(cp);
diff --git a/usr/src/uts/i86pc/os/cpuid.c b/usr/src/uts/i86pc/os/cpuid.c
index 44db76c814..19e505917d 100644
--- a/usr/src/uts/i86pc/os/cpuid.c
+++ b/usr/src/uts/i86pc/os/cpuid.c
@@ -118,7 +118,7 @@ uint_t x86_clflush_size = 0;
uint_t pentiumpro_bug4046376;
uint_t pentiumpro_bug4064495;
-#define NUM_X86_FEATURES 33
+#define NUM_X86_FEATURES 35
void *x86_featureset;
ulong_t x86_featureset0[BT_SIZEOFMAP(NUM_X86_FEATURES)];
@@ -155,7 +155,9 @@ char *x86_feature_names[NUM_X86_FEATURES] = {
"clfsh",
"64",
"aes",
- "pclmulqdq" };
+ "pclmulqdq",
+ "xsave",
+ "avx" };
static void *
init_x86_featureset(void)
@@ -217,6 +219,11 @@ print_x86_featureset(void *featureset)
}
uint_t enable486;
+
+static size_t xsave_state_size = 0;
+uint64_t xsave_bv_all = (XFEATURE_LEGACY_FP | XFEATURE_SSE);
+boolean_t xsave_force_disable = B_FALSE;
+
/*
* This is set to platform type Solaris is running on.
*/
@@ -247,6 +254,23 @@ struct mwait_info {
};
/*
+ * xsave/xrestor info.
+ *
+ * This structure contains HW feature bits and size of the xsave save area.
+ * Note: the kernel will use the maximum size required for all hardware
+ * features. It is not optimize for potential memory savings if features at
+ * the end of the save area are not enabled.
+ */
+struct xsave_info {
+ uint32_t xsav_hw_features_low; /* Supported HW features */
+ uint32_t xsav_hw_features_high; /* Supported HW features */
+ size_t xsav_max_size; /* max size save area for HW features */
+ size_t ymm_size; /* AVX: size of ymm save area */
+ size_t ymm_offset; /* AVX: offset for ymm save area */
+};
+
+
+/*
* These constants determine how many of the elements of the
* cpuid we cache in the cpuid_info data structure; the
* remaining elements are accessible via the cpuid instruction.
@@ -327,6 +351,8 @@ struct cpuid_info {
uint_t cpi_procnodeid; /* AMD: nodeID on HT, Intel: chipid */
uint_t cpi_procnodes_per_pkg; /* AMD: # of nodes in the package */
/* Intel: 1 */
+
+ struct xsave_info cpi_xsave; /* fn D: xsave/xrestor info */
};
@@ -429,6 +455,12 @@ static struct cpuid_info cpuid_info0;
BITX((cpi)->cpi_std[5].cp_edx, c_state + 3, c_state)
/*
+ * XSAVE leaf 0xD enumeration
+ */
+#define CPUID_LEAFD_2_YMM_OFFSET 576
+#define CPUID_LEAFD_2_YMM_SIZE 256
+
+/*
* Functions we consune from cpuid_subr.c; don't publish these in a header
* file to try and keep people using the expected cpuid_* interfaces.
*/
@@ -815,6 +847,27 @@ cpuid_amd_getids(cpu_t *cpu)
}
}
+/*
+ * Setup XFeature_Enabled_Mask register. Required by xsave feature.
+ */
+void
+setup_xfem(void)
+{
+ uint64_t flags = XFEATURE_LEGACY_FP;
+
+ ASSERT(is_x86_feature(x86_featureset, X86FSET_XSAVE));
+
+ if (is_x86_feature(x86_featureset, X86FSET_SSE))
+ flags |= XFEATURE_SSE;
+
+ if (is_x86_feature(x86_featureset, X86FSET_AVX))
+ flags |= XFEATURE_AVX;
+
+ set_xcr(XFEATURE_ENABLED_MASK, flags);
+
+ xsave_bv_all = flags;
+}
+
void *
cpuid_pass1(cpu_t *cpu)
{
@@ -827,7 +880,6 @@ cpuid_pass1(cpu_t *cpu)
extern int idle_cpu_prefer_mwait;
#endif
-
#if !defined(__xpv)
determine_platform();
#endif
@@ -1082,8 +1134,18 @@ cpuid_pass1(cpu_t *cpu)
* Do not support MONITOR/MWAIT under a hypervisor
*/
mask_ecx &= ~CPUID_INTC_ECX_MON;
+ /*
+ * Do not support XSAVE under a hypervisor for now
+ */
+ xsave_force_disable = B_TRUE;
+
#endif /* __xpv */
+ if (xsave_force_disable) {
+ mask_ecx &= ~CPUID_INTC_ECX_XSAVE;
+ mask_ecx &= ~CPUID_INTC_ECX_AVX;
+ }
+
/*
* Now we've figured out the masks that determine
* which bits we choose to believe, apply the masks
@@ -1180,6 +1242,15 @@ cpuid_pass1(cpu_t *cpu)
if (cp->cp_ecx & CPUID_INTC_ECX_PCLMULQDQ) {
add_x86_feature(featureset, X86FSET_PCLMULQDQ);
}
+
+ if (cp->cp_ecx & CPUID_INTC_ECX_XSAVE) {
+ add_x86_feature(featureset, X86FSET_XSAVE);
+ /* We only test AVX when there is XSAVE */
+ if (cp->cp_ecx & CPUID_INTC_ECX_AVX) {
+ add_x86_feature(featureset,
+ X86FSET_AVX);
+ }
+ }
}
}
if (cp->cp_edx & CPUID_INTC_EDX_DE) {
@@ -1724,6 +1795,92 @@ cpuid_pass2(cpu_t *cpu)
cp = NULL;
}
+ /*
+ * XSAVE enumeration
+ */
+ if (cpi->cpi_maxeax >= 0xD && cpi->cpi_vendor == X86_VENDOR_Intel) {
+ struct cpuid_regs regs;
+ boolean_t cpuid_d_valid = B_TRUE;
+
+ cp = &regs;
+ cp->cp_eax = 0xD;
+ cp->cp_edx = cp->cp_ebx = cp->cp_ecx = 0;
+
+ (void) __cpuid_insn(cp);
+
+ /*
+ * Sanity checks for debug
+ */
+ if ((cp->cp_eax & XFEATURE_LEGACY_FP) == 0 ||
+ (cp->cp_eax & XFEATURE_SSE) == 0) {
+ cpuid_d_valid = B_FALSE;
+ }
+
+ cpi->cpi_xsave.xsav_hw_features_low = cp->cp_eax;
+ cpi->cpi_xsave.xsav_hw_features_high = cp->cp_edx;
+ cpi->cpi_xsave.xsav_max_size = cp->cp_ecx;
+
+ /*
+ * If the hw supports AVX, get the size and offset in the save
+ * area for the ymm state.
+ */
+ if (cpi->cpi_xsave.xsav_hw_features_low & XFEATURE_AVX) {
+ cp->cp_eax = 0xD;
+ cp->cp_ecx = 2;
+ cp->cp_edx = cp->cp_ebx = 0;
+
+ (void) __cpuid_insn(cp);
+
+ if (cp->cp_ebx != CPUID_LEAFD_2_YMM_OFFSET ||
+ cp->cp_eax != CPUID_LEAFD_2_YMM_SIZE) {
+ cpuid_d_valid = B_FALSE;
+ }
+
+ cpi->cpi_xsave.ymm_size = cp->cp_eax;
+ cpi->cpi_xsave.ymm_offset = cp->cp_ebx;
+ }
+
+ if (is_x86_feature(x86_featureset, X86FSET_XSAVE)) {
+ xsave_state_size = 0;
+ } else if (cpuid_d_valid) {
+ xsave_state_size = cpi->cpi_xsave.xsav_max_size;
+ } else {
+ /* Broken CPUID 0xD, probably in HVM */
+ cmn_err(CE_WARN, "cpu%d: CPUID.0xD returns invalid "
+ "value: hw_low = %d, hw_high = %d, xsave_size = %d"
+ ", ymm_size = %d, ymm_offset = %d\n",
+ cpu->cpu_id, cpi->cpi_xsave.xsav_hw_features_low,
+ cpi->cpi_xsave.xsav_hw_features_high,
+ (int)cpi->cpi_xsave.xsav_max_size,
+ (int)cpi->cpi_xsave.ymm_size,
+ (int)cpi->cpi_xsave.ymm_offset);
+
+ if (xsave_state_size != 0) {
+ /*
+ * This must be a non-boot CPU. We cannot
+ * continue, because boot cpu has already
+ * enabled XSAVE.
+ */
+ ASSERT(cpu->cpu_id != 0);
+ cmn_err(CE_PANIC, "cpu%d: we have already "
+ "enabled XSAVE on boot cpu, cannot "
+ "continue.", cpu->cpu_id);
+ } else {
+ /*
+ * Must be from boot CPU, OK to disable XSAVE.
+ */
+ ASSERT(cpu->cpu_id == 0);
+ remove_x86_feature(x86_featureset,
+ X86FSET_XSAVE);
+ remove_x86_feature(x86_featureset, X86FSET_AVX);
+ CPI_FEATURES_ECX(cpi) &= ~CPUID_INTC_ECX_XSAVE;
+ CPI_FEATURES_ECX(cpi) &= ~CPUID_INTC_ECX_AVX;
+ xsave_force_disable = B_TRUE;
+ }
+ }
+ }
+
+
if ((cpi->cpi_xmaxeax & 0x80000000) == 0)
goto pass2_done;
@@ -2386,6 +2543,11 @@ cpuid_pass4(cpu_t *cpu)
*ecx &= ~CPUID_INTC_ECX_AES;
if (!is_x86_feature(x86_featureset, X86FSET_PCLMULQDQ))
*ecx &= ~CPUID_INTC_ECX_PCLMULQDQ;
+ if (!is_x86_feature(x86_featureset, X86FSET_XSAVE))
+ *ecx &= ~(CPUID_INTC_ECX_XSAVE |
+ CPUID_INTC_ECX_OSXSAVE);
+ if (!is_x86_feature(x86_featureset, X86FSET_AVX))
+ *ecx &= ~CPUID_INTC_ECX_AVX;
}
/*
@@ -2419,6 +2581,9 @@ cpuid_pass4(cpu_t *cpu)
hwcap_flags |= AV_386_AES;
if (*ecx & CPUID_INTC_ECX_PCLMULQDQ)
hwcap_flags |= AV_386_PCLMULQDQ;
+ if ((*ecx & CPUID_INTC_ECX_XSAVE) &&
+ (*ecx & CPUID_INTC_ECX_OSXSAVE))
+ hwcap_flags |= AV_386_XSAVE;
}
if (*ecx & CPUID_INTC_ECX_POPCNT)
hwcap_flags |= AV_386_POPCNT;
@@ -4273,6 +4438,31 @@ post_startup_cpu_fixups(void)
}
/*
+ * Setup necessary registers to enable XSAVE feature on this processor.
+ * This function needs to be called early enough, so that no xsave/xrstor
+ * ops will execute on the processor before the MSRs are properly set up.
+ *
+ * Current implementation has the following assumption:
+ * - cpuid_pass1() is done, so that X86 features are known.
+ * - fpu_probe() is done, so that fp_save_mech is chosen.
+ */
+void
+xsave_setup_msr(cpu_t *cpu)
+{
+ ASSERT(fp_save_mech == FP_XSAVE);
+ ASSERT(is_x86_feature(x86_featureset, X86FSET_XSAVE));
+
+ /* Enable OSXSAVE in CR4. */
+ setcr4(getcr4() | CR4_OSXSAVE);
+ /*
+ * Update SW copy of ECX, so that /dev/cpu/self/cpuid will report
+ * correct value.
+ */
+ cpu->cpu_m.mcpu_cpi->cpi_std[1].cp_ecx |= CPUID_INTC_ECX_OSXSAVE;
+ setup_xfem();
+}
+
+/*
* Starting with the Westmere processor the local
* APIC timer will continue running in all C-states,
* including the deepest C-states.
diff --git a/usr/src/uts/i86pc/os/fpu_subr.c b/usr/src/uts/i86pc/os/fpu_subr.c
index 7bb68f7168..0598b913f1 100644
--- a/usr/src/uts/i86pc/os/fpu_subr.c
+++ b/usr/src/uts/i86pc/os/fpu_subr.c
@@ -48,6 +48,15 @@ int fpu_exists = 1;
int fp_kind = FP_387;
/*
+ * Mechanism to save FPU state.
+ */
+#if defined(__amd64)
+int fp_save_mech = FP_FXSAVE;
+#elif defined(__i386)
+int fp_save_mech = FP_FNSAVE;
+#endif
+
+/*
* The variable fpu_ignored is provided to allow other code to
* determine whether emulation is being done because there is
* no FPU or because of an override requested via /etc/system.
@@ -141,8 +150,20 @@ fpu_probe(void)
*/
if (is_x86_feature(x86_featureset, X86FSET_SSE) &&
is_x86_feature(x86_featureset, X86FSET_SSE2)) {
- fp_kind = __FP_SSE;
+ fp_kind |= __FP_SSE;
ENABLE_SSE();
+
+ if (is_x86_feature(x86_featureset, X86FSET_AVX)) {
+ ASSERT(is_x86_feature(x86_featureset,
+ X86FSET_XSAVE));
+ fp_kind |= __FP_AVX;
+ }
+
+ if (is_x86_feature(x86_featureset, X86FSET_XSAVE)) {
+ fp_save_mech = FP_XSAVE;
+ fpsave_ctxt = xsave_ctxt;
+ patch_xsave();
+ }
}
#elif defined(__i386)
/*
@@ -150,15 +171,37 @@ fpu_probe(void)
* code to exploit it when present.
*/
if (is_x86_feature(x86_featureset, X86FSET_SSE)) {
- fp_kind = __FP_SSE;
+ fp_kind |= __FP_SSE;
+ ENABLE_SSE();
+ fp_save_mech = FP_FXSAVE;
fpsave_ctxt = fpxsave_ctxt;
- patch_sse();
- if (is_x86_feature(x86_featureset, X86FSET_SSE2))
+
+ if (is_x86_feature(x86_featureset, X86FSET_SSE2)) {
patch_sse2();
- ENABLE_SSE();
+ }
+
+ if (is_x86_feature(x86_featureset, X86FSET_AVX)) {
+ ASSERT(is_x86_feature(x86_featureset,
+ X86FSET_XSAVE));
+ fp_kind |= __FP_AVX;
+ }
+
+ if (is_x86_feature(x86_featureset, X86FSET_XSAVE)) {
+ fp_save_mech = FP_XSAVE;
+ fpsave_ctxt = xsave_ctxt;
+ patch_xsave();
+ } else {
+ patch_sse(); /* use fxrstor */
+ }
} else {
remove_x86_feature(x86_featureset, X86FSET_SSE2);
/*
+ * We will not likely to have a chip with AVX but not
+ * SSE. But to be safe we disable AVX if SSE is not
+ * enabled.
+ */
+ remove_x86_feature(x86_featureset, X86FSET_AVX);
+ /*
* (Just in case the BIOS decided we wanted SSE
* enabled when we didn't. See 4965674.)
*/
@@ -169,7 +212,7 @@ fpu_probe(void)
use_sse_pagecopy = use_sse_pagezero = use_sse_copy = 1;
}
- if (fp_kind == __FP_SSE) {
+ if (fp_kind & __FP_SSE) {
struct fxsave_state *fx;
uint8_t fxsave_state[sizeof (struct fxsave_state) +
XMM_ALIGN];
diff --git a/usr/src/uts/i86pc/os/mp_startup.c b/usr/src/uts/i86pc/os/mp_startup.c
index f52c320c4b..2979ddb0fc 100644
--- a/usr/src/uts/i86pc/os/mp_startup.c
+++ b/usr/src/uts/i86pc/os/mp_startup.c
@@ -1711,6 +1711,13 @@ mp_startup_common(boolean_t boot)
*/
cp->cpu_flags &= ~(CPU_POWEROFF | CPU_QUIESCED);
+ /*
+ * Setup this processor for XSAVE.
+ */
+ if (fp_save_mech == FP_XSAVE) {
+ xsave_setup_msr(cp);
+ }
+
cpuid_pass2(cp);
cpuid_pass3(cp);
(void) cpuid_pass4(cp);
diff --git a/usr/src/uts/i86pc/os/startup.c b/usr/src/uts/i86pc/os/startup.c
index e5afdcc014..d8facc92e7 100644
--- a/usr/src/uts/i86pc/os/startup.c
+++ b/usr/src/uts/i86pc/os/startup.c
@@ -2193,6 +2193,13 @@ startup_end(void)
PRM_POINT("configure() done");
/*
+ * We can now setup for XSAVE because fpu_probe is done in configure().
+ */
+ if (fp_save_mech == FP_XSAVE) {
+ xsave_setup_msr(CPU);
+ }
+
+ /*
* Set the isa_list string to the defined instruction sets we
* support.
*/
diff --git a/usr/src/uts/intel/ia32/ml/exception.s b/usr/src/uts/intel/ia32/ml/exception.s
index d7236f9585..ab24e46548 100644
--- a/usr/src/uts/intel/ia32/ml/exception.s
+++ b/usr/src/uts/intel/ia32/ml/exception.s
@@ -1,6 +1,5 @@
/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
*/
/*
@@ -637,30 +636,36 @@ _emul_done:
* after a context switch -- we do the frequent path in ndptrap_frstor
* below; for all other cases, we let the trap code handle it
*/
- LOADCPU(%rbx) /* swapgs handled in hypervisor */
+ LOADCPU(%rax) /* swapgs handled in hypervisor */
cmpl $0, fpu_exists(%rip)
je .handle_in_trap /* let trap handle no fp case */
- movq CPU_THREAD(%rbx), %r15 /* %r15 = curthread */
- movl $FPU_EN, %ebx
- movq T_LWP(%r15), %r15 /* %r15 = lwp */
- testq %r15, %r15
+ movq CPU_THREAD(%rax), %rbx /* %rbx = curthread */
+ movl $FPU_EN, %eax
+ movq T_LWP(%rbx), %rbx /* %rbx = lwp */
+ testq %rbx, %rbx
jz .handle_in_trap /* should not happen? */
#if LWP_PCB_FPU != 0
- addq $LWP_PCB_FPU, %r15 /* &lwp->lwp_pcb.pcb_fpu */
+ addq $LWP_PCB_FPU, %rbx /* &lwp->lwp_pcb.pcb_fpu */
#endif
- testl %ebx, PCB_FPU_FLAGS(%r15)
+ testl %eax, PCB_FPU_FLAGS(%rbx)
jz .handle_in_trap /* must be the first fault */
CLTS
- andl $_BITNOT(FPU_VALID), PCB_FPU_FLAGS(%r15)
+ andl $_BITNOT(FPU_VALID), PCB_FPU_FLAGS(%rbx)
#if FPU_CTX_FPU_REGS != 0
- addq $FPU_CTX_FPU_REGS, %r15
+ addq $FPU_CTX_FPU_REGS, %rbx
#endif
+
+ movl FPU_CTX_FPU_XSAVE_MASK(%rbx), %eax /* for xrstor */
+ movl FPU_CTX_FPU_XSAVE_MASK+4(%rbx), %edx /* for xrstor */
+
/*
* the label below is used in trap.c to detect FP faults in
* kernel due to user fault.
*/
ALTENTRY(ndptrap_frstor)
- FXRSTORQ ((%r15))
+ .globl _patch_xrstorq_rbx
+_patch_xrstorq_rbx:
+ FXRSTORQ ((%rbx))
cmpw $KCS_SEL, REGOFF_CS(%rsp)
je .return_to_kernel
@@ -694,42 +699,56 @@ _emul_done:
pushq %rbx
cmpw $KCS_SEL, 24(%rsp) /* did we come from kernel mode? */
jne 1f
- LOADCPU(%rbx) /* if yes, don't swapgs */
+ LOADCPU(%rax) /* if yes, don't swapgs */
jmp 2f
-1:
+1:
SWAPGS /* if from user, need swapgs */
- LOADCPU(%rbx)
+ LOADCPU(%rax)
SWAPGS
-2:
+2:
+ /*
+ * Xrstor needs to use edx as part of its flag.
+ * NOTE: have to push rdx after "cmpw ...24(%rsp)", otherwise rsp+$24
+ * will not point to CS.
+ */
+ pushq %rdx
cmpl $0, fpu_exists(%rip)
je .handle_in_trap /* let trap handle no fp case */
- movq CPU_THREAD(%rbx), %rax /* %rax = curthread */
- movl $FPU_EN, %ebx
- movq T_LWP(%rax), %rax /* %rax = lwp */
- testq %rax, %rax
+ movq CPU_THREAD(%rax), %rbx /* %rbx = curthread */
+ movl $FPU_EN, %eax
+ movq T_LWP(%rbx), %rbx /* %rbx = lwp */
+ testq %rbx, %rbx
jz .handle_in_trap /* should not happen? */
#if LWP_PCB_FPU != 0
- addq $LWP_PCB_FPU, %rax /* &lwp->lwp_pcb.pcb_fpu */
+ addq $LWP_PCB_FPU, %rbx /* &lwp->lwp_pcb.pcb_fpu */
#endif
- testl %ebx, PCB_FPU_FLAGS(%rax)
+ testl %eax, PCB_FPU_FLAGS(%rbx)
jz .handle_in_trap /* must be the first fault */
clts
- andl $_BITNOT(FPU_VALID), PCB_FPU_FLAGS(%rax)
+ andl $_BITNOT(FPU_VALID), PCB_FPU_FLAGS(%rbx)
#if FPU_CTX_FPU_REGS != 0
- addq $FPU_CTX_FPU_REGS, %rax
+ addq $FPU_CTX_FPU_REGS, %rbx
#endif
+
+ movl FPU_CTX_FPU_XSAVE_MASK(%rbx), %eax /* for xrstor */
+ movl FPU_CTX_FPU_XSAVE_MASK+4(%rbx), %edx /* for xrstor */
+
/*
* the label below is used in trap.c to detect FP faults in
* kernel due to user fault.
*/
ALTENTRY(ndptrap_frstor)
- FXRSTORQ ((%rax))
+ .globl _patch_xrstorq_rbx
+_patch_xrstorq_rbx:
+ FXRSTORQ ((%rbx))
+ popq %rdx
popq %rbx
popq %rax
IRET
/*NOTREACHED*/
.handle_in_trap:
+ popq %rdx
popq %rbx
popq %rax
TRAP_NOERR(T_NOEXTFLT) /* $7 */
@@ -749,6 +768,7 @@ _emul_done:
*/
pushl %eax
pushl %ebx
+ pushl %edx /* for xrstor */
pushl %ds
pushl %gs
movl $KDS_SEL, %ebx
@@ -773,17 +793,24 @@ _emul_done:
#if FPU_CTX_FPU_REGS != 0
addl $FPU_CTX_FPU_REGS, %ebx
#endif
+
+ movl FPU_CTX_FPU_XSAVE_MASK(%ebx), %eax /* for xrstor */
+ movl FPU_CTX_FPU_XSAVE_MASK+4(%ebx), %edx /* for xrstor */
+
/*
* the label below is used in trap.c to detect FP faults in kernel
* due to user fault.
*/
ALTENTRY(ndptrap_frstor)
- .globl _patch_fxrstor_ebx
+ .globl _patch_fxrstor_ebx
_patch_fxrstor_ebx:
+ .globl _patch_xrstor_ebx
+_patch_xrstor_ebx:
frstor (%ebx) /* may be patched to fxrstor */
nop /* (including this byte) */
popl %gs
popl %ds
+ popl %edx
popl %ebx
popl %eax
IRET
@@ -791,6 +818,7 @@ _patch_fxrstor_ebx:
.handle_in_trap:
popl %gs
popl %ds
+ popl %edx
popl %ebx
popl %eax
TRAP_NOERR(T_NOEXTFLT) /* $7 */
diff --git a/usr/src/uts/intel/ia32/ml/float.s b/usr/src/uts/intel/ia32/ml/float.s
index 7214f1ec64..5a8962c9ff 100644
--- a/usr/src/uts/intel/ia32/ml/float.s
+++ b/usr/src/uts/intel/ia32/ml/float.s
@@ -20,8 +20,7 @@
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved.
*/
/* Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. */
@@ -31,7 +30,10 @@
/* Copyright (c) 1987, 1988 Microsoft Corporation */
/* All Rights Reserved */
-#pragma ident "%Z%%M% %I% %E% SMI"
+/*
+ * Copyright (c) 2009, Intel Corporation.
+ * All rights reserved.
+ */
#include <sys/asm_linkage.h>
#include <sys/asm_misc.h>
@@ -152,6 +154,10 @@ void
patch_sse2(void)
{}
+void
+patch_xsave(void)
+{}
+
#else /* __lint */
ENTRY_NP(patch_sse)
@@ -188,10 +194,74 @@ _lfence_ret_insn: / see membar_consumer()
ret
SET_SIZE(patch_sse2)
+ /*
+ * Patch lazy fp restore instructions in the trap handler
+ * to use xrstor instead of frstor
+ */
+ ENTRY_NP(patch_xsave)
+ _HOT_PATCH_PROLOG
+ /
+ / frstor (%ebx); nop -> xrstor (%ebx)
+ /
+ _HOT_PATCH(_xrstor_ebx_insn, _patch_xrstor_ebx, 3)
+ _HOT_PATCH_EPILOG
+ ret
+_xrstor_ebx_insn: / see ndptrap_frstor()
+ #xrstor (%ebx)
+ .byte 0x0f, 0xae, 0x2b
+ SET_SIZE(patch_xsave)
+
#endif /* __lint */
#endif /* __i386 */
+#if defined(__amd64)
+#if defined(__lint)
+
+void
+patch_xsave(void)
+{}
+
+#else /* __lint */
+
+ /*
+ * Patch lazy fp restore instructions in the trap handler
+ * to use xrstor instead of fxrstorq
+ */
+ ENTRY_NP(patch_xsave)
+ pushq %rbx
+ pushq %rbp
+ pushq %r15
+ /
+ / FXRSTORQ (%rbx); -> xrstor (%rbx)
+ / hot_patch(_xrstor_rbx_insn, _patch_xrstorq_rbx, 4)
+ /
+ leaq _patch_xrstorq_rbx(%rip), %rbx
+ leaq _xrstor_rbx_insn(%rip), %rbp
+ movq $4, %r15
+1:
+ movq %rbx, %rdi /* patch address */
+ movzbq (%rbp), %rsi /* instruction byte */
+ movq $1, %rdx /* count */
+ call hot_patch_kernel_text
+ addq $1, %rbx
+ addq $1, %rbp
+ subq $1, %r15
+ jnz 1b
+ popq %r15
+ popq %rbp
+ popq %rbx
+ ret
+
+_xrstor_rbx_insn: / see ndptrap_frstor()
+ #rex.W=1 (.byte 0x48)
+ #xrstor (%rbx)
+ .byte 0x48, 0x0f, 0xae, 0x2b
+ SET_SIZE(patch_xsave)
+
+#endif /* __lint */
+#endif /* __amd64 */
+
/*
* One of these routines is called from any lwp with floating
* point context as part of the prolog of a context switch.
@@ -201,6 +271,11 @@ _lfence_ret_insn: / see membar_consumer()
/*ARGSUSED*/
void
+xsave_ctxt(void *arg)
+{}
+
+/*ARGSUSED*/
+void
fpxsave_ctxt(void *arg)
{}
@@ -242,6 +317,33 @@ fpnsave_ctxt(void *arg)
/* AMD Software Optimization Guide - Section 6.2 */
SET_SIZE(fpxsave_ctxt)
+ ENTRY_NP(xsave_ctxt)
+ cmpl $FPU_EN, FPU_CTX_FPU_FLAGS(%rdi)
+ jne 1f
+ movl $_CONST(FPU_VALID|FPU_EN), FPU_CTX_FPU_FLAGS(%rdi)
+ /*
+ * Setup xsave flags in EDX:EAX
+ */
+ movl FPU_CTX_FPU_XSAVE_MASK(%rdi), %eax
+ movl FPU_CTX_FPU_XSAVE_MASK+4(%rdi), %edx
+ leaq FPU_CTX_FPU_REGS(%rdi), %rsi
+ #xsave (%rsi)
+ .byte 0x0f, 0xae, 0x26
+
+ /*
+ * (see notes above about "exception pointers")
+ * TODO: does it apply to any machine that uses xsave?
+ */
+ btw $7, FXSAVE_STATE_FSW(%rdi) /* Test saved ES bit */
+ jnc 0f /* jump if ES = 0 */
+ fnclex /* clear pending x87 exceptions */
+0: ffree %st(7) /* clear tag bit to remove possible stack overflow */
+ fildl .fpzero_const(%rip)
+ /* dummy load changes all exception pointers */
+ STTS(%rsi) /* trap on next fpu touch */
+1: ret
+ SET_SIZE(xsave_ctxt)
+
#elif defined(__i386)
ENTRY_NP(fpnsave_ctxt)
@@ -276,6 +378,32 @@ fpnsave_ctxt(void *arg)
/* AMD Software Optimization Guide - Section 6.2 */
SET_SIZE(fpxsave_ctxt)
+ ENTRY_NP(xsave_ctxt)
+ movl 4(%esp), %ecx /* a struct fpu_ctx */
+ cmpl $FPU_EN, FPU_CTX_FPU_FLAGS(%ecx)
+ jne 1f
+
+ movl $_CONST(FPU_VALID|FPU_EN), FPU_CTX_FPU_FLAGS(%ecx)
+ movl FPU_CTX_FPU_XSAVE_MASK(%ecx), %eax
+ movl FPU_CTX_FPU_XSAVE_MASK+4(%ecx), %edx
+ leal FPU_CTX_FPU_REGS(%ecx), %ecx
+ #xsave (%ecx)
+ .byte 0x0f, 0xae, 0x21
+
+ /*
+ * (see notes above about "exception pointers")
+ * TODO: does it apply to any machine that uses xsave?
+ */
+ btw $7, FXSAVE_STATE_FSW(%ecx) /* Test saved ES bit */
+ jnc 0f /* jump if ES = 0 */
+ fnclex /* clear pending x87 exceptions */
+0: ffree %st(7) /* clear tag bit to remove possible stack overflow */
+ fildl .fpzero_const
+ /* dummy load changes all exception pointers */
+ STTS(%edx) /* trap on next fpu touch */
+1: ret
+ SET_SIZE(xsave_ctxt)
+
#endif /* __i386 */
.align 8
@@ -298,6 +426,11 @@ void
fpxsave(struct fxsave_state *f)
{}
+/*ARGSUSED*/
+void
+xsave(struct xsave_state *f, uint64_t m)
+{}
+
#else /* __lint */
#if defined(__amd64)
@@ -310,6 +443,19 @@ fpxsave(struct fxsave_state *f)
ret
SET_SIZE(fpxsave)
+ ENTRY_NP(xsave)
+ CLTS
+ movl %esi, %eax /* bv mask */
+ movq %rsi, %rdx
+ shrq $32, %rdx
+ #xsave (%rdi)
+ .byte 0x0f, 0xae, 0x27
+
+ fninit /* clear exceptions, init x87 tags */
+ STTS(%rdi) /* set TS bit in %cr0 (disable FPU) */
+ ret
+ SET_SIZE(xsave)
+
#elif defined(__i386)
ENTRY_NP(fpsave)
@@ -329,6 +475,19 @@ fpxsave(struct fxsave_state *f)
ret
SET_SIZE(fpxsave)
+ ENTRY_NP(xsave)
+ CLTS
+ movl 4(%esp), %ecx
+ movl 8(%esp), %eax
+ movl 12(%esp), %edx
+ #xsave (%ecx)
+ .byte 0x0f, 0xae, 0x21
+
+ fninit /* clear exceptions, init x87 tags */
+ STTS(%eax) /* set TS bit in %cr0 (disable FPU) */
+ ret
+ SET_SIZE(xsave)
+
#endif /* __i386 */
#endif /* __lint */
@@ -344,6 +503,11 @@ void
fpxrestore(struct fxsave_state *f)
{}
+/*ARGSUSED*/
+void
+xrestore(struct xsave_state *f, uint64_t m)
+{}
+
#else /* __lint */
#if defined(__amd64)
@@ -354,6 +518,16 @@ fpxrestore(struct fxsave_state *f)
ret
SET_SIZE(fpxrestore)
+ ENTRY_NP(xrestore)
+ CLTS
+ movl %esi, %eax /* bv mask */
+ movq %rsi, %rdx
+ shrq $32, %rdx
+ #xrstor (%rdi)
+ .byte 0x0f, 0xae, 0x2f
+ ret
+ SET_SIZE(xrestore)
+
#elif defined(__i386)
ENTRY_NP(fprestore)
@@ -370,6 +544,16 @@ fpxrestore(struct fxsave_state *f)
ret
SET_SIZE(fpxrestore)
+ ENTRY_NP(xrestore)
+ CLTS
+ movl 4(%esp), %ecx
+ movl 8(%esp), %eax
+ movl 12(%esp), %edx
+ #xrstor (%ecx)
+ .byte 0x0f, 0xae, 0x29
+ ret
+ SET_SIZE(xrestore)
+
#endif /* __i386 */
#endif /* __lint */
@@ -418,26 +602,56 @@ fpinit(void)
ENTRY_NP(fpinit)
CLTS
+ cmpl $FP_XSAVE, fp_save_mech
+ je 1f
+
+ /* fxsave */
leaq sse_initial(%rip), %rax
FXRSTORQ ((%rax)) /* load clean initial state */
ret
+
+1: /* xsave */
+ leaq avx_initial(%rip), %rcx
+ xorl %edx, %edx
+ movl $XFEATURE_AVX, %eax
+ bt $X86FSET_AVX, x86_featureset
+ cmovael %edx, %eax
+ orl $(XFEATURE_LEGACY_FP | XFEATURE_SSE), %eax
+ /* xrstor (%rcx) */
+ .byte 0x0f, 0xae, 0x29 /* load clean initial state */
+ ret
SET_SIZE(fpinit)
#elif defined(__i386)
ENTRY_NP(fpinit)
CLTS
- cmpl $__FP_SSE, fp_kind
+ cmpl $FP_FXSAVE, fp_save_mech
je 1f
+ cmpl $FP_XSAVE, fp_save_mech
+ je 2f
+ /* fnsave */
fninit
movl $x87_initial, %eax
frstor (%eax) /* load clean initial state */
ret
-1:
+
+1: /* fxsave */
movl $sse_initial, %eax
fxrstor (%eax) /* load clean initial state */
ret
+
+2: /* xsave */
+ movl $avx_initial, %ecx
+ xorl %edx, %edx
+ movl $XFEATURE_AVX, %eax
+ bt $X86FSET_AVX, x86_featureset
+ cmovael %edx, %eax
+ orl $(XFEATURE_LEGACY_FP | XFEATURE_SSE), %eax
+ /* xrstor (%ecx) */
+ .byte 0x0f, 0xae, 0x29 /* load clean initial state */
+ ret
SET_SIZE(fpinit)
#endif /* __i386 */
diff --git a/usr/src/uts/intel/ia32/ml/i86_subr.s b/usr/src/uts/intel/ia32/ml/i86_subr.s
index a4406b276d..e79eabd119 100644
--- a/usr/src/uts/intel/ia32/ml/i86_subr.s
+++ b/usr/src/uts/intel/ia32/ml/i86_subr.s
@@ -30,6 +30,11 @@
*/
/*
+ * Copyright (c) 2009, Intel Corporation.
+ * All rights reserved.
+ */
+
+/*
* General assembly language routines.
* It is the intent of this file to contain routines that are
* independent of the specific kernel architecture, and those that are
@@ -2867,6 +2872,16 @@ void
invalidate_cache(void)
{}
+/*ARGSUSED*/
+uint64_t
+get_xcr(uint_t r)
+{ return (0); }
+
+/*ARGSUSED*/
+void
+set_xcr(uint_t r, const uint64_t val)
+{}
+
#else /* __lint */
#define XMSR_ACCESS_VAL $0x9c5a203a
@@ -2914,7 +2929,26 @@ invalidate_cache(void)
leave
ret
SET_SIZE(xwrmsr)
-
+
+ ENTRY(get_xcr)
+ movl %edi, %ecx
+ #xgetbv
+ .byte 0x0f,0x01,0xd0
+ shlq $32, %rdx
+ orq %rdx, %rax
+ ret
+ SET_SIZE(get_xcr)
+
+ ENTRY(set_xcr)
+ movq %rsi, %rdx
+ shrq $32, %rdx
+ movl %esi, %eax
+ movl %edi, %ecx
+ #xsetbv
+ .byte 0x0f,0x01,0xd1
+ ret
+ SET_SIZE(set_xcr)
+
#elif defined(__i386)
ENTRY(rdmsr)
@@ -2957,6 +2991,22 @@ invalidate_cache(void)
ret
SET_SIZE(xwrmsr)
+ ENTRY(get_xcr)
+ movl 4(%esp), %ecx
+ #xgetbv
+ .byte 0x0f,0x01,0xd0
+ ret
+ SET_SIZE(get_xcr)
+
+ ENTRY(set_xcr)
+ movl 4(%esp), %ecx
+ movl 8(%esp), %eax
+ movl 12(%esp), %edx
+ #xsetbv
+ .byte 0x0f,0x01,0xd1
+ ret
+ SET_SIZE(set_xcr)
+
#endif /* __i386 */
ENTRY(invalidate_cache)
diff --git a/usr/src/uts/intel/ia32/os/archdep.c b/usr/src/uts/intel/ia32/os/archdep.c
index 506fbaf66e..7603ccb97d 100644
--- a/usr/src/uts/intel/ia32/os/archdep.c
+++ b/usr/src/uts/intel/ia32/os/archdep.c
@@ -62,6 +62,7 @@
#include <sys/dtrace.h>
#include <sys/brand.h>
#include <sys/machbrand.h>
+#include <sys/cmn_err.h>
extern const struct fnsave_state x87_initial;
extern const struct fxsave_state sse_initial;
@@ -278,41 +279,43 @@ setfpregs(klwp_t *lwp, fpregset_t *fp)
*/
fp_free(fpu, 0);
}
-#if !defined(__amd64)
- if (fp_kind == __FP_SSE) {
-#endif
- fpregset_to_fxsave(fp, &fpu->fpu_regs.kfpu_u.kfpu_fx);
- fpu->fpu_regs.kfpu_xstatus =
- fp->fp_reg_set.fpchip_state.xstatus;
-#if !defined(__amd64)
- } else
- bcopy(fp, &fpu->fpu_regs.kfpu_u.kfpu_fn,
- sizeof (fpu->fpu_regs.kfpu_u.kfpu_fn));
-#endif
- fpu->fpu_regs.kfpu_status = fp->fp_reg_set.fpchip_state.status;
- fpu->fpu_flags |= FPU_VALID;
- } else {
- /*
- * If we are trying to change the FPU state of a thread which
- * hasn't yet initialized floating point, store the state in
- * the pcb and indicate that the state is valid. When the
- * thread enables floating point, it will use this state instead
- * of the default state.
- */
-#if !defined(__amd64)
- if (fp_kind == __FP_SSE) {
-#endif
- fpregset_to_fxsave(fp, &fpu->fpu_regs.kfpu_u.kfpu_fx);
- fpu->fpu_regs.kfpu_xstatus =
- fp->fp_reg_set.fpchip_state.xstatus;
-#if !defined(__amd64)
- } else
- bcopy(fp, &fpu->fpu_regs.kfpu_u.kfpu_fn,
- sizeof (fpu->fpu_regs.kfpu_u.kfpu_fn));
+ }
+ /*
+ * Else: if we are trying to change the FPU state of a thread which
+ * hasn't yet initialized floating point, store the state in
+ * the pcb and indicate that the state is valid. When the
+ * thread enables floating point, it will use this state instead
+ * of the default state.
+ */
+
+ switch (fp_save_mech) {
+#if defined(__i386)
+ case FP_FNSAVE:
+ bcopy(fp, &fpu->fpu_regs.kfpu_u.kfpu_fn,
+ sizeof (fpu->fpu_regs.kfpu_u.kfpu_fn));
+ break;
#endif
- fpu->fpu_regs.kfpu_status = fp->fp_reg_set.fpchip_state.status;
- fpu->fpu_flags |= FPU_VALID;
+ case FP_FXSAVE:
+ fpregset_to_fxsave(fp, &fpu->fpu_regs.kfpu_u.kfpu_fx);
+ fpu->fpu_regs.kfpu_xstatus =
+ fp->fp_reg_set.fpchip_state.xstatus;
+ break;
+
+ case FP_XSAVE:
+ fpregset_to_fxsave(fp,
+ &fpu->fpu_regs.kfpu_u.kfpu_xs.xs_fxsave);
+ fpu->fpu_regs.kfpu_xstatus =
+ fp->fp_reg_set.fpchip_state.xstatus;
+ fpu->fpu_regs.kfpu_u.kfpu_xs.xs_xstate_bv |=
+ (XFEATURE_LEGACY_FP | XFEATURE_SSE);
+ break;
+ default:
+ panic("Invalid fp_save_mech");
+ /*NOTREACHED*/
}
+
+ fpu->fpu_regs.kfpu_status = fp->fp_reg_set.fpchip_state.status;
+ fpu->fpu_flags |= FPU_VALID;
}
/*
@@ -349,32 +352,54 @@ getfpregs(klwp_t *lwp, fpregset_t *fp)
/*
* Cases 1 and 3.
*/
-#if !defined(__amd64)
- if (fp_kind == __FP_SSE) {
+ switch (fp_save_mech) {
+#if defined(__i386)
+ case FP_FNSAVE:
+ bcopy(&fpu->fpu_regs.kfpu_u.kfpu_fn, fp,
+ sizeof (fpu->fpu_regs.kfpu_u.kfpu_fn));
+ break;
#endif
+ case FP_FXSAVE:
fxsave_to_fpregset(&fpu->fpu_regs.kfpu_u.kfpu_fx, fp);
fp->fp_reg_set.fpchip_state.xstatus =
fpu->fpu_regs.kfpu_xstatus;
-#if !defined(__amd64)
- } else
- bcopy(&fpu->fpu_regs.kfpu_u.kfpu_fn, fp,
- sizeof (fpu->fpu_regs.kfpu_u.kfpu_fn));
-#endif
+ break;
+ case FP_XSAVE:
+ fxsave_to_fpregset(
+ &fpu->fpu_regs.kfpu_u.kfpu_xs.xs_fxsave, fp);
+ fp->fp_reg_set.fpchip_state.xstatus =
+ fpu->fpu_regs.kfpu_xstatus;
+ break;
+ default:
+ panic("Invalid fp_save_mech");
+ /*NOTREACHED*/
+ }
fp->fp_reg_set.fpchip_state.status = fpu->fpu_regs.kfpu_status;
} else {
/*
* Case 2.
*/
-#if !defined(__amd64)
- if (fp_kind == __FP_SSE) {
+ switch (fp_save_mech) {
+#if defined(__i386)
+ case FP_FNSAVE:
+ bcopy(&x87_initial, fp, sizeof (x87_initial));
+ break;
#endif
+ case FP_FXSAVE:
+ case FP_XSAVE:
+ /*
+ * For now, we don't have any AVX specific field in ABI.
+ * If we add any in the future, we need to initial them
+ * as well.
+ */
fxsave_to_fpregset(&sse_initial, fp);
fp->fp_reg_set.fpchip_state.xstatus =
fpu->fpu_regs.kfpu_xstatus;
-#if !defined(__amd64)
- } else
- bcopy(&x87_initial, fp, sizeof (x87_initial));
-#endif
+ break;
+ default:
+ panic("Invalid fp_save_mech");
+ /*NOTREACHED*/
+ }
fp->fp_reg_set.fpchip_state.status = fpu->fpu_regs.kfpu_status;
}
kpreempt_enable();
diff --git a/usr/src/uts/intel/ia32/os/fpu.c b/usr/src/uts/intel/ia32/os/fpu.c
index 3c2a3bae65..b7022cf0e5 100644
--- a/usr/src/uts/intel/ia32/os/fpu.c
+++ b/usr/src/uts/intel/ia32/os/fpu.c
@@ -19,8 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved.
*/
/* Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. */
@@ -30,7 +29,10 @@
/* Copyright (c) 1987, 1988 Microsoft Corporation */
/* All Rights Reserved */
-#pragma ident "%Z%%M% %I% %E% SMI"
+/*
+ * Copyright (c) 2009, Intel Corporation.
+ * All rights reserved.
+ */
#include <sys/types.h>
#include <sys/param.h>
@@ -56,6 +58,10 @@
#include <sys/debug.h>
#include <sys/x86_archext.h>
#include <sys/sysmacros.h>
+#include <sys/cmn_err.h>
+
+/* Legacy fxsave layout + xsave header + ymm */
+#define AVX_XSAVE_SIZE (512 + 64 + 256)
/*CSTYLED*/
#pragma align 16 (sse_initial)
@@ -83,6 +89,45 @@ const struct fxsave_state sse_initial = {
/* rest of structure is zero */
};
+/*CSTYLED*/
+#pragma align 64 (avx_initial)
+
+/*
+ * Initial kfpu state for AVX used by fpinit()
+ */
+const struct xsave_state avx_initial = {
+ /*
+ * The definition below needs to be identical with sse_initial
+ * defined above.
+ */
+ {
+ FPU_CW_INIT, /* fx_fcw */
+ 0, /* fx_fsw */
+ 0, /* fx_fctw */
+ 0, /* fx_fop */
+#if defined(__amd64)
+ 0, /* fx_rip */
+ 0, /* fx_rdp */
+#else
+ 0, /* fx_eip */
+ 0, /* fx_cs */
+ 0, /* __fx_ign0 */
+ 0, /* fx_dp */
+ 0, /* fx_ds */
+ 0, /* __fx_ign1 */
+#endif /* __amd64 */
+ SSE_MXCSR_INIT /* fx_mxcsr */
+ /* rest of structure is zero */
+ },
+ /*
+ * bit0 = 1 for XSTATE_BV to indicate that legacy fields are valid,
+ * and CPU should initialize XMM/YMM.
+ */
+ 1,
+ {0, 0} /* These 2 bytes must be zero */
+ /* rest of structure is zero */
+};
+
/*
* mxcsr_mask value (possibly reset in fpu_probe); used to avoid
* the #gp exception caused by setting unsupported bits in the
@@ -103,11 +148,16 @@ const struct fnsave_state x87_initial = {
};
#if defined(__amd64)
-#define fpsave_ctxt fpxsave_ctxt
+/*
+ * This vector is patched to xsave_ctxt() if we discover we have an
+ * XSAVE-capable chip in fpu_probe.
+ */
+void (*fpsave_ctxt)(void *) = fpxsave_ctxt;
#elif defined(__i386)
/*
- * This vector is patched to fpxsave_ctxt() if we discover
- * we have an SSE-capable chip in fpu_probe().
+ * This vector is patched to fpxsave_ctxt() if we discover we have an
+ * SSE-capable chip in fpu_probe(). It is patched to xsave_ctxt
+ * if we discover we have an XSAVE-capable chip in fpu_probe.
*/
void (*fpsave_ctxt)(void *) = fpnsave_ctxt;
#endif
@@ -129,6 +179,10 @@ fp_new_lwp(kthread_id_t t, kthread_id_t ct)
struct fpu_ctx *fp; /* parent fpu context */
struct fpu_ctx *cfp; /* new fpu context */
struct fxsave_state *fx, *cfx;
+#if defined(__i386)
+ struct fnsave_state *fn, *cfn;
+#endif
+ struct xsave_state *cxs;
ASSERT(fp_kind != FP_NO);
@@ -145,27 +199,41 @@ fp_new_lwp(kthread_id_t t, kthread_id_t ct)
cfp->fpu_regs.kfpu_status = 0;
cfp->fpu_regs.kfpu_xstatus = 0;
-#if defined(__amd64)
- fx = &fp->fpu_regs.kfpu_u.kfpu_fx;
- cfx = &cfp->fpu_regs.kfpu_u.kfpu_fx;
- bcopy(&sse_initial, cfx, sizeof (*cfx));
- cfx->fx_mxcsr = fx->fx_mxcsr & ~SSE_MXCSR_EFLAGS;
- cfx->fx_fcw = fx->fx_fcw;
-#else
- if (fp_kind == __FP_SSE) {
+ switch (fp_save_mech) {
+#if defined(__i386)
+ case FP_FNSAVE:
+ fn = &fp->fpu_regs.kfpu_u.kfpu_fn;
+ cfn = &cfp->fpu_regs.kfpu_u.kfpu_fn;
+ bcopy(&x87_initial, cfn, sizeof (*cfn));
+ cfn->f_fcw = fn->f_fcw;
+ break;
+#endif
+ case FP_FXSAVE:
fx = &fp->fpu_regs.kfpu_u.kfpu_fx;
cfx = &cfp->fpu_regs.kfpu_u.kfpu_fx;
bcopy(&sse_initial, cfx, sizeof (*cfx));
cfx->fx_mxcsr = fx->fx_mxcsr & ~SSE_MXCSR_EFLAGS;
cfx->fx_fcw = fx->fx_fcw;
- } else {
- struct fnsave_state *fn = &fp->fpu_regs.kfpu_u.kfpu_fn;
- struct fnsave_state *cfn = &cfp->fpu_regs.kfpu_u.kfpu_fn;
+ break;
- bcopy(&x87_initial, cfn, sizeof (*cfn));
- cfn->f_fcw = fn->f_fcw;
+ case FP_XSAVE:
+ cfp->fpu_xsave_mask = fp->fpu_xsave_mask;
+
+ fx = &fp->fpu_regs.kfpu_u.kfpu_xs.xs_fxsave;
+ cxs = &cfp->fpu_regs.kfpu_u.kfpu_xs;
+ cfx = &cxs->xs_fxsave;
+
+ bcopy(&avx_initial, cxs, sizeof (*cxs));
+ cfx->fx_mxcsr = fx->fx_mxcsr & ~SSE_MXCSR_EFLAGS;
+ cfx->fx_fcw = fx->fx_fcw;
+ cxs->xs_xstate_bv |= (get_xcr(XFEATURE_ENABLED_MASK) &
+ XFEATURE_FP_ALL);
+ break;
+ default:
+ panic("Invalid fp_save_mech");
+ /*NOTREACHED*/
}
-#endif
+
installctx(ct, cfp,
fpsave_ctxt, NULL, fp_new_lwp, fp_new_lwp, NULL, fp_free);
/*
@@ -212,7 +280,7 @@ fp_free(struct fpu_ctx *fp, int isexec)
if (curthread->t_lwp && fp == &curthread->t_lwp->lwp_pcb.pcb_fpu) {
/* Clear errors if any to prevent frstor from complaining */
(void) fperr_reset();
- if (fp_kind == __FP_SSE)
+ if (fp_kind & __FP_SSE)
(void) fpxerr_reset();
fpdisable();
}
@@ -234,18 +302,24 @@ fp_save(struct fpu_ctx *fp)
}
ASSERT(curthread->t_lwp && fp == &curthread->t_lwp->lwp_pcb.pcb_fpu);
-#if defined(__amd64)
- fpxsave(&fp->fpu_regs.kfpu_u.kfpu_fx);
-#else
- switch (fp_kind) {
- case __FP_SSE:
+ switch (fp_save_mech) {
+#if defined(__i386)
+ case FP_FNSAVE:
+ fpsave(&fp->fpu_regs.kfpu_u.kfpu_fn);
+ break;
+#endif
+ case FP_FXSAVE:
fpxsave(&fp->fpu_regs.kfpu_u.kfpu_fx);
break;
- default:
- fpsave(&fp->fpu_regs.kfpu_u.kfpu_fn);
+
+ case FP_XSAVE:
+ xsave(&fp->fpu_regs.kfpu_u.kfpu_xs, fp->fpu_xsave_mask);
break;
+ default:
+ panic("Invalid fp_save_mech");
+ /*NOTREACHED*/
}
-#endif
+
fp->fpu_flags |= FPU_VALID;
kpreempt_enable();
}
@@ -259,15 +333,24 @@ fp_save(struct fpu_ctx *fp)
void
fp_restore(struct fpu_ctx *fp)
{
-#if defined(__amd64)
- fpxrestore(&fp->fpu_regs.kfpu_u.kfpu_fx);
-#else
- /* case 2 */
- if (fp_kind == __FP_SSE)
- fpxrestore(&fp->fpu_regs.kfpu_u.kfpu_fx);
- else
+ switch (fp_save_mech) {
+#if defined(__i386)
+ case FP_FNSAVE:
fprestore(&fp->fpu_regs.kfpu_u.kfpu_fn);
+ break;
#endif
+ case FP_FXSAVE:
+ fpxrestore(&fp->fpu_regs.kfpu_u.kfpu_fx);
+ break;
+
+ case FP_XSAVE:
+ xrestore(&fp->fpu_regs.kfpu_u.kfpu_xs, fp->fpu_xsave_mask);
+ break;
+ default:
+ panic("Invalid fp_save_mech");
+ /*NOTREACHED*/
+ }
+
fp->fpu_flags &= ~FPU_VALID;
}
@@ -289,6 +372,11 @@ fp_seed(void)
/*
* Always initialize a new context and initialize the hardware.
*/
+ if (fp_save_mech == FP_XSAVE) {
+ fp->fpu_xsave_mask = get_xcr(XFEATURE_ENABLED_MASK) &
+ XFEATURE_FP_ALL;
+ }
+
installctx(curthread, fp,
fpsave_ctxt, NULL, fp_new_lwp, fp_new_lwp, NULL, fp_free);
fpinit();
@@ -324,6 +412,9 @@ fpnoextflt(struct regs *rp)
ASSERT(sizeof (struct fxsave_state) == 512 &&
sizeof (struct fnsave_state) == 108);
ASSERT((offsetof(struct fxsave_state, fx_xmm[0]) & 0xf) == 0);
+
+ ASSERT(sizeof (struct xsave_state) >= AVX_XSAVE_SIZE);
+
#if defined(__i386)
ASSERT(sizeof (struct fpu) == sizeof (struct __old_fpu));
#endif /* __i386 */
@@ -375,8 +466,9 @@ fpnoextflt(struct regs *rp)
* configured to enable fully fledged (%xmm) fxsave/fxrestor on
* this CPU. For the non-SSE case, ensure that it isn't.
*/
- ASSERT((fp_kind == __FP_SSE && (getcr4() & CR4_OSFXSR) == CR4_OSFXSR) ||
- (fp_kind != __FP_SSE &&
+ ASSERT(((fp_kind & __FP_SSE) &&
+ (getcr4() & CR4_OSFXSR) == CR4_OSFXSR) ||
+ (!(fp_kind & __FP_SSE) &&
(getcr4() & (CR4_OSXMMEXCPT|CR4_OSFXSR)) == 0));
#endif
@@ -451,25 +543,36 @@ fpexterrflt(struct regs *rp)
fp_save(fp);
/* clear exception flags in saved state, as if by fnclex */
-#if defined(__amd64)
- fpsw = fp->fpu_regs.kfpu_u.kfpu_fx.fx_fsw;
- fpcw = fp->fpu_regs.kfpu_u.kfpu_fx.fx_fcw;
- fp->fpu_regs.kfpu_u.kfpu_fx.fx_fsw &= ~FPS_SW_EFLAGS;
-#else
- switch (fp_kind) {
- case __FP_SSE:
- fpsw = fp->fpu_regs.kfpu_u.kfpu_fx.fx_fsw;
- fpcw = fp->fpu_regs.kfpu_u.kfpu_fx.fx_fcw;
- fp->fpu_regs.kfpu_u.kfpu_fx.fx_fsw &= ~FPS_SW_EFLAGS;
- break;
- default:
- fpsw = fp->fpu_regs.kfpu_u.kfpu_fn.f_fsw;
- fpcw = fp->fpu_regs.kfpu_u.kfpu_fn.f_fcw;
- fp->fpu_regs.kfpu_u.kfpu_fn.f_fsw &= ~FPS_SW_EFLAGS;
- break;
- }
+ switch (fp_save_mech) {
+#if defined(__i386)
+ case FP_FNSAVE:
+ fpsw = fp->fpu_regs.kfpu_u.kfpu_fn.f_fsw;
+ fpcw = fp->fpu_regs.kfpu_u.kfpu_fn.f_fcw;
+ fp->fpu_regs.kfpu_u.kfpu_fn.f_fsw &= ~FPS_SW_EFLAGS;
+ break;
#endif
+ case FP_FXSAVE:
+ fpsw = fp->fpu_regs.kfpu_u.kfpu_fx.fx_fsw;
+ fpcw = fp->fpu_regs.kfpu_u.kfpu_fx.fx_fcw;
+ fp->fpu_regs.kfpu_u.kfpu_fx.fx_fsw &= ~FPS_SW_EFLAGS;
+ break;
+
+ case FP_XSAVE:
+ fpsw = fp->fpu_regs.kfpu_u.kfpu_xs.xs_fxsave.fx_fsw;
+ fpcw = fp->fpu_regs.kfpu_u.kfpu_xs.xs_fxsave.fx_fcw;
+ fp->fpu_regs.kfpu_u.kfpu_xs.xs_fxsave.fx_fsw &= ~FPS_SW_EFLAGS;
+ /*
+ * Always set LEGACY_FP as it may have been cleared by XSAVE
+ * instruction
+ */
+ fp->fpu_regs.kfpu_u.kfpu_xs.xs_xstate_bv |= XFEATURE_LEGACY_FP;
+ break;
+ default:
+ panic("Invalid fp_save_mech");
+ /*NOTREACHED*/
+ }
+
fp->fpu_regs.kfpu_status = fpsw;
if ((fpsw & FPS_ES) == 0)
@@ -493,7 +596,7 @@ fpsimderrflt(struct regs *rp)
uint32_t mxcsr, xmask;
fpu_ctx_t *fp = &ttolwp(curthread)->lwp_pcb.pcb_fpu;
- ASSERT(fp_kind == __FP_SSE);
+ ASSERT(fp_kind & __FP_SSE);
/*
* NOTE: Interrupts are disabled during execution of this
@@ -625,20 +728,30 @@ fpsetcw(uint16_t fcw, uint32_t mxcsr)
*/
fp_save(fp);
-#if defined(__amd64)
- fx = &fp->fpu_regs.kfpu_u.kfpu_fx;
- fx->fx_fcw = fcw;
- fx->fx_mxcsr = sse_mxcsr_mask & mxcsr;
-#else
- switch (fp_kind) {
- case __FP_SSE:
+ switch (fp_save_mech) {
+#if defined(__i386)
+ case FP_FNSAVE:
+ fp->fpu_regs.kfpu_u.kfpu_fn.f_fcw = fcw;
+ break;
+#endif
+ case FP_FXSAVE:
fx = &fp->fpu_regs.kfpu_u.kfpu_fx;
fx->fx_fcw = fcw;
fx->fx_mxcsr = sse_mxcsr_mask & mxcsr;
break;
- default:
- fp->fpu_regs.kfpu_u.kfpu_fn.f_fcw = fcw;
+
+ case FP_XSAVE:
+ fx = &fp->fpu_regs.kfpu_u.kfpu_xs.xs_fxsave;
+ fx->fx_fcw = fcw;
+ fx->fx_mxcsr = sse_mxcsr_mask & mxcsr;
+ /*
+ * Always set LEGACY_FP as it may have been cleared by XSAVE
+ * instruction
+ */
+ fp->fpu_regs.kfpu_u.kfpu_xs.xs_xstate_bv |= XFEATURE_LEGACY_FP;
break;
+ default:
+ panic("Invalid fp_save_mech");
+ /*NOTREACHED*/
}
-#endif
}
diff --git a/usr/src/uts/intel/ia32/os/sysi86.c b/usr/src/uts/intel/ia32/os/sysi86.c
index e677ba68fa..308cdddf4f 100644
--- a/usr/src/uts/intel/ia32/os/sysi86.c
+++ b/usr/src/uts/intel/ia32/os/sysi86.c
@@ -19,8 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved.
*/
/* Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. */
@@ -30,8 +29,6 @@
/* Copyright (c) 1987, 1988 Microsoft Corporation */
/* All Rights Reserved */
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <sys/param.h>
#include <sys/types.h>
#include <sys/sysmacros.h>
@@ -170,7 +167,7 @@ sysi86(short cmd, uintptr_t arg1, uintptr_t arg2, uintptr_t arg3)
break;
}
fpsetcw((uint16_t)arg2, (uint32_t)arg3);
- return (fp_kind == __FP_SSE ? 1 : 0);
+ return ((fp_kind & __FP_SSE) ? 1 : 0);
/* real time clock management commands */
diff --git a/usr/src/uts/intel/sys/archsystm.h b/usr/src/uts/intel/sys/archsystm.h
index 1821eb1dc7..a39221b353 100644
--- a/usr/src/uts/intel/sys/archsystm.h
+++ b/usr/src/uts/intel/sys/archsystm.h
@@ -58,6 +58,8 @@ extern void patch_sse(void);
extern void patch_sse2(void);
#endif
+extern void patch_xsave(void);
+
extern void cli(void);
extern void sti(void);
@@ -193,6 +195,7 @@ extern void patch_tsc_read(int);
#if defined(__amd64) && !defined(__xpv)
extern void patch_memops(uint_t);
#endif /* defined(__amd64) && !defined(__xpv) */
+extern void setup_xfem(void);
#define cpr_dprintf prom_printf
#define IN_XPV_PANIC() (__lintzero)
#endif
diff --git a/usr/src/uts/intel/sys/controlregs.h b/usr/src/uts/intel/sys/controlregs.h
index dc8ec9c8c5..aa9ab14a89 100644
--- a/usr/src/uts/intel/sys/controlregs.h
+++ b/usr/src/uts/intel/sys/controlregs.h
@@ -19,8 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#ifndef _SYS_CONTROLREGS_H
@@ -108,9 +107,10 @@ extern "C" {
/* 0x1000 reserved */
#define CR4_VMXE 0x2000
#define CR4_SMXE 0x4000
+#define CR4_OSXSAVE 0x40000 /* OS xsave/xrestore support */
#define FMT_CR4 \
- "\20\17smxe\16vmxe\13xmme\12fxsr\11pce\10pge" \
+ "\20\23osxsav\17smxe\16vmxe\13xmme\12fxsr\11pce\10pge" \
"\7mce\6pae\5pse\4de\3tsd\2pvi\1vme"
/*
diff --git a/usr/src/uts/intel/sys/fp.h b/usr/src/uts/intel/sys/fp.h
index 02fb34fc65..4956e2d318 100644
--- a/usr/src/uts/intel/sys/fp.h
+++ b/usr/src/uts/intel/sys/fp.h
@@ -19,8 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved.
*/
/* Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. */
@@ -30,8 +29,6 @@
#ifndef _SYS_FP_H
#define _SYS_FP_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#ifdef __cplusplus
extern "C" {
#endif
@@ -50,7 +47,20 @@ extern "C" {
#define FP_387 3 /* 80387 chip present */
#define FP_487 6 /* 80487 chip present */
#define FP_486 6 /* 80486 chip present */
-#define __FP_SSE 0x103 /* x87 plus SSE-capable CPU */
+/*
+ * The following values are bit flags instead of actual values.
+ * E.g. to know if we are using SSE, test (value & __FP_SSE) instead
+ * of (value == __FP_SSE).
+ */
+#define __FP_SSE 0x100 /* .. plus SSE-capable CPU */
+#define __FP_AVX 0x200 /* .. plus AVX-capable CPU */
+
+/*
+ * values that go into fp_save_mech
+ */
+#define FP_FNSAVE 1 /* fnsave/frstor instructions */
+#define FP_FXSAVE 2 /* fxsave/fxrstor instructions */
+#define FP_XSAVE 3 /* xsave/xrstor instructions */
/*
* masks for 80387 control word
@@ -159,6 +169,7 @@ extern "C" {
"\10im\7daz\6pe\5ue\4oe\3ze\2de\1ie"
extern int fp_kind; /* kind of fp support */
+extern int fp_save_mech; /* fp save/restore mechanism */
extern int fpu_exists; /* FPU hw exists */
#ifdef _KERNEL
@@ -174,15 +185,19 @@ extern int fpu_probe_pentium_fdivbug(void);
extern void fpnsave_ctxt(void *);
extern void fpxsave_ctxt(void *);
+extern void xsave_ctxt(void *);
extern void (*fpsave_ctxt)(void *);
struct fnsave_state;
struct fxsave_state;
+struct xsave_state;
extern void fxsave_insn(struct fxsave_state *);
extern void fpsave(struct fnsave_state *);
extern void fprestore(struct fnsave_state *);
extern void fpxsave(struct fxsave_state *);
extern void fpxrestore(struct fxsave_state *);
+extern void xsave(struct xsave_state *, uint64_t);
+extern void xrestore(struct xsave_state *, uint64_t);
extern void fpenable(void);
extern void fpdisable(void);
diff --git a/usr/src/uts/intel/sys/pcb.h b/usr/src/uts/intel/sys/pcb.h
index ec5dea501c..3a690bd980 100644
--- a/usr/src/uts/intel/sys/pcb.h
+++ b/usr/src/uts/intel/sys/pcb.h
@@ -20,8 +20,7 @@
*/
/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#ifndef _SYS_PCB_H
@@ -37,6 +36,10 @@ extern "C" {
#ifndef _ASM
typedef struct fpu_ctx {
kfpu_t fpu_regs; /* kernel save area for FPU */
+ uint64_t fpu_xsave_mask; /* xsave mask for FPU/SSE/AVX */
+#if defined(__i386)
+ uint64_t fpu_padding; /* fix 32bit libmicro regression */
+#endif
uint_t fpu_flags; /* FPU state flags */
} fpu_ctx_t;
diff --git a/usr/src/uts/intel/sys/regset.h b/usr/src/uts/intel/sys/regset.h
index ff0044e317..5436ae0be3 100644
--- a/usr/src/uts/intel/sys/regset.h
+++ b/usr/src/uts/intel/sys/regset.h
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,10 +19,8 @@
* CDDL HEADER END
*/
/*
- * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved.
*/
-
/* Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. */
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
@@ -32,8 +29,6 @@
#ifndef _SYS_REGSET_H
#define _SYS_REGSET_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <sys/feature_tests.h>
#if !defined(_ASM)
@@ -246,6 +241,18 @@ struct fxsave_state {
#endif
}; /* 512 bytes */
+/*
+ * This structure is written to memory by an 'xsave' instruction.
+ * First 512 byte is compatible with the format of an 'fxsave' area.
+ */
+struct xsave_state {
+ struct fxsave_state xs_fxsave;
+ uint64_t xs_xstate_bv; /* 512 */
+ uint64_t xs_rsv_mbz[2];
+ uint64_t xs_reserved[5];
+ upad128_t xs_ymm[16]; /* avx - 576 */
+}; /* 832 bytes, asserted in fpnoextflt() */
+
#if defined(__amd64)
typedef struct fpu {
@@ -352,6 +359,7 @@ typedef struct {
#if defined(__i386)
struct fnsave_state kfpu_fn;
#endif
+ struct xsave_state kfpu_xs;
} kfpu_u;
uint32_t kfpu_status; /* saved at #mf exception */
uint32_t kfpu_xstatus; /* saved at #xm exception */
diff --git a/usr/src/uts/intel/sys/x86_archext.h b/usr/src/uts/intel/sys/x86_archext.h
index 896fce14e1..fba9c6e896 100644
--- a/usr/src/uts/intel/sys/x86_archext.h
+++ b/usr/src/uts/intel/sys/x86_archext.h
@@ -112,9 +112,13 @@ extern "C" {
#define CPUID_INTC_ECX_MOVBE 0x00400000 /* MOVBE insn */
#define CPUID_INTC_ECX_POPCNT 0x00800000 /* POPCNT insn */
#define CPUID_INTC_ECX_AES 0x02000000 /* AES insns */
+#define CPUID_INTC_ECX_XSAVE 0x04000000 /* XSAVE/XRESTOR insns */
+#define CPUID_INTC_ECX_OSXSAVE 0x08000000 /* OS supports XSAVE insns */
+#define CPUID_INTC_ECX_AVX 0x10000000 /* AVX supported */
#define FMT_CPUID_INTC_ECX \
"\20" \
+ "\35avx\34osxsav\33xsave" \
"\32aes" \
"\30popcnt\27movbe\25sse4.2\24sse4.1\23dca" \
"\20\17etprd\16cx16\13cid\12ssse3\11tm2" \
@@ -356,6 +360,8 @@ extern "C" {
#define X86FSET_64 30
#define X86FSET_AES 31
#define X86FSET_PCLMULQDQ 32
+#define X86FSET_XSAVE 33
+#define X86FSET_AVX 34
/*
* flags to patch tsc_read routine.
@@ -561,6 +567,20 @@ extern "C" {
#define X86_SOCKET_ASB2 _X86_SOCKET_MKVAL(X86_VENDOR_AMD, 0x001000)
#define X86_SOCKET_C32 _X86_SOCKET_MKVAL(X86_VENDOR_AMD, 0x002000)
+/*
+ * xgetbv/xsetbv support
+ */
+
+#define XFEATURE_ENABLED_MASK 0x0
+/*
+ * XFEATURE_ENABLED_MASK values (eax)
+ */
+#define XFEATURE_LEGACY_FP 0x1
+#define XFEATURE_SSE 0x2
+#define XFEATURE_AVX 0x4
+#define XFEATURE_MAX XFEATURE_AVX
+#define XFEATURE_FP_ALL (XFEATURE_LEGACY_FP|XFEATURE_SSE|XFEATURE_AVX)
+
#if !defined(_ASM)
#if defined(_KERNEL) || defined(_KMEMUSER)
@@ -601,6 +621,13 @@ struct cpuid_regs {
uint32_t cp_edx;
};
+/*
+ * Utility functions to get/set extended control registers (XCR)
+ * Initial use is to get/set the contents of the XFEATURE_ENABLED_MASK.
+ */
+extern uint64_t get_xcr(uint_t);
+extern void set_xcr(uint_t, uint64_t);
+
extern uint64_t rdmsr(uint_t);
extern void wrmsr(uint_t, const uint64_t);
extern uint64_t xrdmsr(uint_t);
@@ -732,6 +759,8 @@ extern void patch_workaround_6323525(void);
extern int get_hwenv(void);
extern int is_controldom(void);
+extern void xsave_setup_msr(struct cpu *);
+
/*
* Defined hardware environments
*/