summaryrefslogtreecommitdiff
path: root/usr/src/uts/intel/ml/float.s
diff options
context:
space:
mode:
authorDan McDonald <danmcd@mnx.io>2022-05-13 17:20:24 -0400
committerGitHub <noreply@github.com>2022-05-13 17:20:24 -0400
commitbb7d6c9b47695f41cbacbcf6662baf3d0e152fdf (patch)
tree75f2d0cab5fb92f97f2ab2c3186a0b5d1579a33a /usr/src/uts/intel/ml/float.s
parent8ca5534c77e93c25d2c1f777499b12da0f7cc0cd (diff)
parent402559e299331588f209b3a9693e3bcd6a83d22c (diff)
downloadillumos-joyent-OS-8149.tar.gz
Merge branch 'master' into OS-8149OS-8149
Diffstat (limited to 'usr/src/uts/intel/ml/float.s')
-rw-r--r--usr/src/uts/intel/ml/float.s347
1 files changed, 347 insertions, 0 deletions
diff --git a/usr/src/uts/intel/ml/float.s b/usr/src/uts/intel/ml/float.s
new file mode 100644
index 0000000000..807647f553
--- /dev/null
+++ b/usr/src/uts/intel/ml/float.s
@@ -0,0 +1,347 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2018, Joyent, Inc.
+ * Copyright 2020 OmniOS Community Edition (OmniOSce) Association.
+ */
+
+/* Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. */
+/* Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T */
+/* All Rights Reserved */
+
+/* Copyright (c) 1987, 1988 Microsoft Corporation */
+/* All Rights Reserved */
+
+/*
+ * Copyright (c) 2009, Intel Corporation.
+ * All rights reserved.
+ */
+
+#include <sys/asm_linkage.h>
+#include <sys/asm_misc.h>
+#include <sys/regset.h>
+#include <sys/privregs.h>
+#include <sys/x86_archext.h>
+
+#include "assym.h"
+
+ /*
+ * Returns zero if x87 "chip" is present(!)
+ */
+ ENTRY_NP(fpu_initial_probe)
+ CLTS
+ fninit
+ fnstsw %ax
+ movzbl %al, %eax
+ ret
+ SET_SIZE(fpu_initial_probe)
+
+ ENTRY_NP(fxsave_insn)
+ fxsaveq (%rdi)
+ ret
+ SET_SIZE(fxsave_insn)
+
+/*
+ * One of these routines is called from any lwp with floating
+ * point context as part of the prolog of a context switch.
+ */
+
+/*
+ * These three functions define the Intel "xsave" handling for CPUs with
+ * different features. Newer AMD CPUs can also use these functions. See the
+ * 'exception pointers' comment below.
+ */
+ ENTRY_NP(fpxsave_ctxt) /* %rdi is a struct fpu_ctx */
+ cmpl $FPU_EN, FPU_CTX_FPU_FLAGS(%rdi)
+ jne 1f
+ movl $_CONST(FPU_VALID|FPU_EN), FPU_CTX_FPU_FLAGS(%rdi)
+ movq FPU_CTX_FPU_REGS(%rdi), %rdi /* fpu_regs.kfpu_u.kfpu_fx ptr */
+ fxsaveq (%rdi)
+ STTS(%rsi) /* trap on next fpu touch */
+1: rep; ret /* use 2 byte return instruction when branch target */
+ /* AMD Software Optimization Guide - Section 6.2 */
+ SET_SIZE(fpxsave_ctxt)
+
+ ENTRY_NP(xsave_ctxt)
+ cmpl $FPU_EN, FPU_CTX_FPU_FLAGS(%rdi)
+ jne 1f
+ movl $_CONST(FPU_VALID|FPU_EN), FPU_CTX_FPU_FLAGS(%rdi)
+ movl FPU_CTX_FPU_XSAVE_MASK(%rdi), %eax /* xsave flags in EDX:EAX */
+ movl FPU_CTX_FPU_XSAVE_MASK+4(%rdi), %edx
+ movq FPU_CTX_FPU_REGS(%rdi), %rsi /* fpu_regs.kfpu_u.kfpu_xs ptr */
+ xsave (%rsi)
+ STTS(%rsi) /* trap on next fpu touch */
+1: ret
+ SET_SIZE(xsave_ctxt)
+
+ ENTRY_NP(xsaveopt_ctxt)
+ cmpl $FPU_EN, FPU_CTX_FPU_FLAGS(%rdi)
+ jne 1f
+ movl $_CONST(FPU_VALID|FPU_EN), FPU_CTX_FPU_FLAGS(%rdi)
+ movl FPU_CTX_FPU_XSAVE_MASK(%rdi), %eax /* xsave flags in EDX:EAX */
+ movl FPU_CTX_FPU_XSAVE_MASK+4(%rdi), %edx
+ movq FPU_CTX_FPU_REGS(%rdi), %rsi /* fpu_regs.kfpu_u.kfpu_xs ptr */
+ xsaveopt (%rsi)
+ STTS(%rsi) /* trap on next fpu touch */
+1: ret
+ SET_SIZE(xsaveopt_ctxt)
+
+/*
+ * On certain AMD processors, the "exception pointers" (i.e. the last
+ * instruction pointer, last data pointer, and last opcode) are saved by the
+ * fxsave, xsave or xsaveopt instruction ONLY if the exception summary bit is
+ * set.
+ *
+ * On newer CPUs, AMD has changed their behavior to mirror the Intel behavior.
+ * We can detect this via an AMD specific cpuid feature bit
+ * (CPUID_AMD_EBX_ERR_PTR_ZERO) and use the simpler Intel-oriented functions.
+ * Otherwise we use these more complex functions on AMD CPUs. All three follow
+ * the same logic after the xsave* instruction.
+ */
+ ENTRY_NP(fpxsave_excp_clr_ctxt) /* %rdi is a struct fpu_ctx */
+ cmpl $FPU_EN, FPU_CTX_FPU_FLAGS(%rdi)
+ jne 1f
+ movl $_CONST(FPU_VALID|FPU_EN), FPU_CTX_FPU_FLAGS(%rdi)
+ movq FPU_CTX_FPU_REGS(%rdi), %rdi /* fpu_regs.kfpu_u.kfpu_fx ptr */
+ fxsaveq (%rdi)
+ /*
+ * To ensure that we don't leak these values into the next context
+ * on the cpu, we could just issue an fninit here, but that's
+ * rather slow and so we issue an instruction sequence that
+ * clears them more quickly, if a little obscurely.
+ */
+ btw $7, FXSAVE_STATE_FSW(%rdi) /* Test saved ES bit */
+ jnc 0f /* jump if ES = 0 */
+ fnclex /* clear pending x87 exceptions */
+0: ffree %st(7) /* clear tag bit to remove possible stack overflow */
+ fildl .fpzero_const(%rip)
+ /* dummy load changes all exception pointers */
+ STTS(%rsi) /* trap on next fpu touch */
+1: rep; ret /* use 2 byte return instruction when branch target */
+ /* AMD Software Optimization Guide - Section 6.2 */
+ SET_SIZE(fpxsave_excp_clr_ctxt)
+
+ ENTRY_NP(xsave_excp_clr_ctxt)
+ cmpl $FPU_EN, FPU_CTX_FPU_FLAGS(%rdi)
+ jne 1f
+ movl $_CONST(FPU_VALID|FPU_EN), FPU_CTX_FPU_FLAGS(%rdi)
+ movl FPU_CTX_FPU_XSAVE_MASK(%rdi), %eax
+ movl FPU_CTX_FPU_XSAVE_MASK+4(%rdi), %edx
+ movq FPU_CTX_FPU_REGS(%rdi), %rsi /* fpu_regs.kfpu_u.kfpu_xs ptr */
+ xsave (%rsi)
+ btw $7, FXSAVE_STATE_FSW(%rsi) /* Test saved ES bit */
+ jnc 0f /* jump if ES = 0 */
+ fnclex /* clear pending x87 exceptions */
+0: ffree %st(7) /* clear tag bit to remove possible stack overflow */
+ fildl .fpzero_const(%rip) /* dummy load changes all excp. pointers */
+ STTS(%rsi) /* trap on next fpu touch */
+1: ret
+ SET_SIZE(xsave_excp_clr_ctxt)
+
+ ENTRY_NP(xsaveopt_excp_clr_ctxt)
+ cmpl $FPU_EN, FPU_CTX_FPU_FLAGS(%rdi)
+ jne 1f
+ movl $_CONST(FPU_VALID|FPU_EN), FPU_CTX_FPU_FLAGS(%rdi)
+ movl FPU_CTX_FPU_XSAVE_MASK(%rdi), %eax
+ movl FPU_CTX_FPU_XSAVE_MASK+4(%rdi), %edx
+ movq FPU_CTX_FPU_REGS(%rdi), %rsi /* fpu_regs.kfpu_u.kfpu_xs ptr */
+ xsaveopt (%rsi)
+ btw $7, FXSAVE_STATE_FSW(%rsi) /* Test saved ES bit */
+ jnc 0f /* jump if ES = 0 */
+ fnclex /* clear pending x87 exceptions */
+0: ffree %st(7) /* clear tag bit to remove possible stack overflow */
+ fildl .fpzero_const(%rip) /* dummy load changes all excp. pointers */
+ STTS(%rsi) /* trap on next fpu touch */
+1: ret
+ SET_SIZE(xsaveopt_excp_clr_ctxt)
+
+ .align 8
+.fpzero_const:
+ .4byte 0x0
+ .4byte 0x0
+
+
+ ENTRY_NP(fpxsave)
+ CLTS
+ fxsaveq (%rdi)
+ fninit /* clear exceptions, init x87 tags */
+ STTS(%rdi) /* set TS bit in %cr0 (disable FPU) */
+ ret
+ SET_SIZE(fpxsave)
+
+ ENTRY_NP(xsave)
+ CLTS
+ movl %esi, %eax /* bv mask */
+ movq %rsi, %rdx
+ shrq $32, %rdx
+ xsave (%rdi)
+
+ fninit /* clear exceptions, init x87 tags */
+ STTS(%rdi) /* set TS bit in %cr0 (disable FPU) */
+ ret
+ SET_SIZE(xsave)
+
+ ENTRY_NP(xsaveopt)
+ CLTS
+ movl %esi, %eax /* bv mask */
+ movq %rsi, %rdx
+ shrq $32, %rdx
+ xsaveopt (%rdi)
+
+ fninit /* clear exceptions, init x87 tags */
+ STTS(%rdi) /* set TS bit in %cr0 (disable FPU) */
+ ret
+ SET_SIZE(xsaveopt)
+
+/*
+ * These functions are used when restoring the FPU as part of the epilogue of a
+ * context switch.
+ */
+
+ ENTRY(fpxrestore_ctxt)
+ cmpl $_CONST(FPU_EN|FPU_VALID), FPU_CTX_FPU_FLAGS(%rdi)
+ jne 1f
+ movl $_CONST(FPU_EN), FPU_CTX_FPU_FLAGS(%rdi)
+ movq FPU_CTX_FPU_REGS(%rdi), %rdi /* fpu_regs.kfpu_u.kfpu_fx ptr */
+ CLTS
+ fxrstorq (%rdi)
+1:
+ ret
+ SET_SIZE(fpxrestore_ctxt)
+
+ ENTRY(xrestore_ctxt)
+ cmpl $_CONST(FPU_EN|FPU_VALID), FPU_CTX_FPU_FLAGS(%rdi)
+ jne 1f
+ movl $_CONST(FPU_EN), FPU_CTX_FPU_FLAGS(%rdi)
+ movl FPU_CTX_FPU_XSAVE_MASK(%rdi), %eax /* xsave flags in EDX:EAX */
+ movl FPU_CTX_FPU_XSAVE_MASK+4(%rdi), %edx
+ movq FPU_CTX_FPU_REGS(%rdi), %rdi /* fpu_regs.kfpu_u.kfpu_xs ptr */
+ CLTS
+ xrstor (%rdi)
+1:
+ ret
+ SET_SIZE(xrestore_ctxt)
+
+
+ ENTRY_NP(fpxrestore)
+ CLTS
+ fxrstorq (%rdi)
+ ret
+ SET_SIZE(fpxrestore)
+
+ ENTRY_NP(xrestore)
+ CLTS
+ movl %esi, %eax /* bv mask */
+ movq %rsi, %rdx
+ shrq $32, %rdx
+ xrstor (%rdi)
+ ret
+ SET_SIZE(xrestore)
+
+/*
+ * Disable the floating point unit.
+ */
+
+ ENTRY_NP(fpdisable)
+ STTS(%rdi) /* set TS bit in %cr0 (disable FPU) */
+ ret
+ SET_SIZE(fpdisable)
+
+/*
+ * Initialize the fpu hardware.
+ */
+
+ ENTRY_NP(fpinit)
+ CLTS
+ cmpl $FP_XSAVE, fp_save_mech
+ je 1f
+
+ /* fxsave */
+ leaq sse_initial(%rip), %rax
+ fxrstorq (%rax) /* load clean initial state */
+ ret
+
+1: /* xsave */
+ leaq avx_initial(%rip), %rcx
+ xorl %edx, %edx
+ movl $XFEATURE_AVX, %eax
+ btl $X86FSET_AVX, x86_featureset
+ cmovael %edx, %eax
+ orl $(XFEATURE_LEGACY_FP | XFEATURE_SSE), %eax
+ xrstor (%rcx)
+ ret
+ SET_SIZE(fpinit)
+
+/*
+ * Clears FPU exception state.
+ * Returns the FP status word.
+ */
+
+ ENTRY_NP(fperr_reset)
+ CLTS
+ xorl %eax, %eax
+ fnstsw %ax
+ fnclex
+ ret
+ SET_SIZE(fperr_reset)
+
+ ENTRY_NP(fpxerr_reset)
+ pushq %rbp
+ movq %rsp, %rbp
+ subq $0x10, %rsp /* make some temporary space */
+ CLTS
+ stmxcsr (%rsp)
+ movl (%rsp), %eax
+ andl $_BITNOT(SSE_MXCSR_EFLAGS), (%rsp)
+ ldmxcsr (%rsp) /* clear processor exceptions */
+ leave
+ ret
+ SET_SIZE(fpxerr_reset)
+
+ ENTRY_NP(fpgetcwsw)
+ pushq %rbp
+ movq %rsp, %rbp
+ subq $0x10, %rsp /* make some temporary space */
+ CLTS
+ fnstsw (%rsp) /* store the status word */
+ fnstcw 2(%rsp) /* store the control word */
+ movl (%rsp), %eax /* put both in %eax */
+ leave
+ ret
+ SET_SIZE(fpgetcwsw)
+
+/*
+ * Returns the MXCSR register.
+ */
+
+ ENTRY_NP(fpgetmxcsr)
+ pushq %rbp
+ movq %rsp, %rbp
+ subq $0x10, %rsp /* make some temporary space */
+ CLTS
+ stmxcsr (%rsp)
+ movl (%rsp), %eax
+ leave
+ ret
+ SET_SIZE(fpgetmxcsr)
+