diff options
author | Robert Mustacchi <rm@joyent.com> | 2018-06-13 20:20:20 +0000 |
---|---|---|
committer | Robert Mustacchi <rm@joyent.com> | 2018-06-19 19:34:37 +0000 |
commit | 4c28a617e3922d92a58e813a5b955eb526b9c386 (patch) | |
tree | 0a6e6f67434e0fcc9d872b3e612b53cf8e2f766e | |
parent | a32a1f376ed9360264e4a374608fdcc5c4927d63 (diff) | |
download | illumos-joyent-4c28a617e3922d92a58e813a5b955eb526b9c386.tar.gz |
9598 Need Eager FPU
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Patrick Mooney <patrick.mooney@joyent.com>
Reviewed by: Bryan Cantrill <bryan@joyent.com>
Reviewed by: John Levon <john.levon@joyent.com>
Reviewed by: Toomas Soome <tsoome@me.com>
Approved by: Richard Lowe <richlowe@richlowe.net>
-rw-r--r-- | usr/src/cmd/mdb/intel/modules/genunix/gcore_isadep.c | 3 | ||||
-rw-r--r-- | usr/src/uts/common/brand/solaris10/s10_brand.c | 3 | ||||
-rw-r--r-- | usr/src/uts/i86pc/ml/kpti_trampolines.s | 28 | ||||
-rw-r--r-- | usr/src/uts/i86pc/ml/locore.s | 107 | ||||
-rw-r--r-- | usr/src/uts/i86pc/ml/syscall_asm_amd64.s | 40 | ||||
-rw-r--r-- | usr/src/uts/i86pc/os/fpu_subr.c | 5 | ||||
-rw-r--r-- | usr/src/uts/i86pc/os/intr.c | 33 | ||||
-rw-r--r-- | usr/src/uts/i86pc/os/trap.c | 57 | ||||
-rw-r--r-- | usr/src/uts/intel/ia32/ml/exception.s | 228 | ||||
-rw-r--r-- | usr/src/uts/intel/ia32/ml/float.s | 516 | ||||
-rw-r--r-- | usr/src/uts/intel/ia32/os/archdep.c | 9 | ||||
-rw-r--r-- | usr/src/uts/intel/ia32/os/fpu.c | 601 | ||||
-rw-r--r-- | usr/src/uts/intel/ia32/os/sundep.c | 55 | ||||
-rw-r--r-- | usr/src/uts/intel/ia32/os/sysi86.c | 2 | ||||
-rw-r--r-- | usr/src/uts/intel/ia32/syscall/lwp_private.c | 15 | ||||
-rw-r--r-- | usr/src/uts/intel/sys/archsystm.h | 3 | ||||
-rw-r--r-- | usr/src/uts/intel/sys/fp.h | 24 | ||||
-rw-r--r-- | usr/src/uts/intel/sys/pcb.h | 18 |
18 files changed, 668 insertions, 1079 deletions
diff --git a/usr/src/cmd/mdb/intel/modules/genunix/gcore_isadep.c b/usr/src/cmd/mdb/intel/modules/genunix/gcore_isadep.c index 73d5ecbb94..d4837bd475 100644 --- a/usr/src/cmd/mdb/intel/modules/genunix/gcore_isadep.c +++ b/usr/src/cmd/mdb/intel/modules/genunix/gcore_isadep.c @@ -10,6 +10,7 @@ */ /* * Copyright (c) 2013 by Delphix. All rights reserved. + * Copyright (c) 2018, Joyent, Inc. */ #include <mdb/mdb_modapi.h> @@ -71,7 +72,7 @@ gcore_getgregs(mdb_klwp_t *lwp, gregset_t grp) grp[REG_R15] = rp->r_r15; grp[REG_FSBASE] = pcb->pcb_fsbase; grp[REG_GSBASE] = pcb->pcb_gsbase; - if (pcb->pcb_rupdate == 1) { + if (PCB_NEED_UPDATE_SEGS(pcb)) { grp[REG_DS] = pcb->pcb_ds; grp[REG_ES] = pcb->pcb_es; grp[REG_FS] = pcb->pcb_fs; diff --git a/usr/src/uts/common/brand/solaris10/s10_brand.c b/usr/src/uts/common/brand/solaris10/s10_brand.c index f24b864eef..0841f02e51 100644 --- a/usr/src/uts/common/brand/solaris10/s10_brand.c +++ b/usr/src/uts/common/brand/solaris10/s10_brand.c @@ -22,6 +22,7 @@ /* * Copyright (c) 2013, OmniTI Computer Consulting, Inc. All rights reserved. * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright 2018, Joyent, Inc. */ #include <sys/errno.h> @@ -195,7 +196,7 @@ s10_amd64_correct_fsreg(klwp_t *l) if (lwp_getdatamodel(l) == DATAMODEL_NATIVE) { kpreempt_disable(); l->lwp_pcb.pcb_fs = LWPFS_SEL; - l->lwp_pcb.pcb_rupdate = 1; + PCB_SET_UPDATE_SEGS(&l->lwp_pcb); lwptot(l)->t_post_sys = 1; /* Guarantee update_sregs() */ kpreempt_enable(); } diff --git a/usr/src/uts/i86pc/ml/kpti_trampolines.s b/usr/src/uts/i86pc/ml/kpti_trampolines.s index 7e72841e32..036c254d08 100644 --- a/usr/src/uts/i86pc/ml/kpti_trampolines.s +++ b/usr/src/uts/i86pc/ml/kpti_trampolines.s @@ -137,6 +137,12 @@ DGDEF3(kpti_enable, 8, 8) .fill 1, 8, 1 +#if DEBUG + .data +_bad_ts_panic_msg: + .string "kpti_trampolines.s: tr_iret_user but CR0.TS set" +#endif + .section ".text"; .align MMU_PAGESIZE @@ -523,6 +529,28 @@ tr_intr_ret_start: SET_SIZE(tr_iret_kernel) ENTRY_NP(tr_iret_user) +#if DEBUG + /* + * Ensure that we return to user land with CR0.TS clear. We do this + * before we trampoline back and pivot the stack and %cr3. This way + * we're still on the kernel stack and kernel %cr3, though we are on the + * user GSBASE. + */ + pushq %rax + mov %cr0, %rax + testq $CR0_TS, %rax + jz 1f + swapgs + popq %rax + leaq _bad_ts_panic_msg(%rip), %rdi + xorl %eax, %eax + pushq %rbp + movq %rsp, %rbp + call panic +1: + popq %rax +#endif + cmpq $1, kpti_enable jne 1f diff --git a/usr/src/uts/i86pc/ml/locore.s b/usr/src/uts/i86pc/ml/locore.s index 4626dd1492..acd96e271a 100644 --- a/usr/src/uts/i86pc/ml/locore.s +++ b/usr/src/uts/i86pc/ml/locore.s @@ -239,6 +239,11 @@ __return_from_main: __unsupported_cpu: .string "486 style cpu detected - no longer supported!" +#if defined(DEBUG) +_no_pending_updates: + .string "locore.s:%d lwp_rtt(lwp %p) but pcb_rupdate != 1" +#endif + #endif /* !__lint */ #if !defined(__amd64) @@ -1505,8 +1510,6 @@ _sys_rtt(void) #else /* __lint */ -#if defined(__amd64) - ENTRY_NP(lwp_rtt_initial) movq %gs:CPU_THREAD, %r15 movq T_STACK(%r15), %rsp /* switch to the thread stack */ @@ -1549,8 +1552,6 @@ _lwp_rtt: movq %r14, %rdx xorl %eax, %eax call panic -_no_pending_updates: - .string "locore.s:%d lwp_rtt(lwp %p) but pcb_rupdate != 1" 1: #endif @@ -1571,11 +1572,6 @@ _no_pending_updates: call post_syscall /* post_syscall(rval1, rval2) */ /* - * set up to take fault on first use of fp - */ - STTS(%rdi) - - /* * XXX - may want a fast path that avoids sys_rtt_common in the * most common case. */ @@ -1636,99 +1632,6 @@ _sys_rtt_end: SET_SIZE(sys_rtt_syscall) SET_SIZE(sys_rtt_syscall32) -#elif defined(__i386) - - ENTRY_NP(lwp_rtt_initial) - movl %gs:CPU_THREAD, %eax - movl T_STACK(%eax), %esp /* switch to the thread stack */ - movl %esp, %ebp - call __dtrace_probe___proc_start - jmp _lwp_rtt - - ENTRY_NP(lwp_rtt) - movl %gs:CPU_THREAD, %eax - movl T_STACK(%eax), %esp /* switch to the thread stack */ - movl %esp, %ebp -_lwp_rtt: - call __dtrace_probe___proc_lwp__start - - /* - * If agent lwp, clear %fs and %gs. - */ - movl %gs:CPU_LWP, %eax - movl LWP_PROCP(%eax), %edx - - cmpl %eax, P_AGENTTP(%edx) - jne 1f - movl $0, REGOFF_FS(%esp) - movl $0, REGOFF_GS(%esp) -1: - call dtrace_systrace_rtt - movl REGOFF_EDX(%esp), %edx - movl REGOFF_EAX(%esp), %eax - pushl %edx - pushl %eax - call post_syscall /* post_syscall(rval1, rval2) */ - addl $8, %esp - - /* - * set up to take fault on first use of fp - */ - STTS(%eax) - - /* - * XXX - may want a fast path that avoids sys_rtt_common in the - * most common case. - */ - ALTENTRY(_sys_rtt) - CLI(%eax) /* disable interrupts */ - ALTENTRY(_sys_rtt_ints_disabled) - pushl %esp /* pass rp to sys_rtt_common */ - call sys_rtt_common - addl $4, %esp /* pop arg */ - testl %eax, %eax /* test for return to user mode */ - jz sr_sup - - /* - * Return to User. - */ - ALTENTRY(sys_rtt_syscall) - INTR_POP_USER - - /* - * There can be no instructions between this label and IRET or - * we could end up breaking linux brand support. See label usage - * in lx_brand_int80_callback for an example. - */ - ALTENTRY(nopop_sys_rtt_syscall) - IRET - /*NOTREACHED*/ - SET_SIZE(nopop_sys_rtt_syscall) - - ALTENTRY(_sys_rtt_end) - - /* - * Return to supervisor - */ - ALTENTRY(sr_sup) - - /* - * Restore regs before doing iret to kernel mode - */ - INTR_POP_KERNEL - IRET - /*NOTREACHED*/ - - SET_SIZE(sr_sup) - SET_SIZE(_sys_rtt_end) - SET_SIZE(lwp_rtt) - SET_SIZE(lwp_rtt_initial) - SET_SIZE(_sys_rtt_ints_disabled) - SET_SIZE(_sys_rtt) - SET_SIZE(sys_rtt_syscall) - -#endif /* __i386 */ - #endif /* __lint */ #if defined(__lint) diff --git a/usr/src/uts/i86pc/ml/syscall_asm_amd64.s b/usr/src/uts/i86pc/ml/syscall_asm_amd64.s index be6a94c61b..b09b4f1fdc 100644 --- a/usr/src/uts/i86pc/ml/syscall_asm_amd64.s +++ b/usr/src/uts/i86pc/ml/syscall_asm_amd64.s @@ -271,7 +271,18 @@ * between entering privileged mode and performing the assertion, * otherwise we may perform a context switch on the thread, which * will end up setting pcb_rupdate to 1 again. + * + * ASSERT(%cr0 & CR0_TS == 0); + * Preconditions: + * (%rsp is ready for normal call sequence) + * Postconditions (if assertion is true): + * (specified register is clobbered) + * + * Check to make sure that we are returning to user land and that CR0.TS + * is not set. This is required as part of the eager FPU (see + * uts/intel/ia32/os/fpu.c for more information). */ + #if defined(DEBUG) #if !defined(__lint) @@ -285,6 +296,9 @@ __codesel_msg: __no_rupdate_msg: .string "syscall_asm_amd64.s:%d lwp %p, pcb_rupdate != 0" +__bad_ts_msg: + .string "sysscall_asm_amd64.s:%d CR0.TS set on user return" + #endif /* !__lint */ #define ASSERT_LWPTOREGS(lwp, rp) \ @@ -310,9 +324,20 @@ __no_rupdate_msg: call panic; \ 8: +#define ASSERT_CR0TS_ZERO(reg) \ + movq %cr0, reg; \ + testq $CR0_TS, reg; \ + jz 9f; \ + leaq __bad_ts_msg(%rip), %rdi; \ + movl $__LINE__, %esi; \ + xorl %eax, %eax; \ + call panic; \ +9: + #else #define ASSERT_LWPTOREGS(lwp, rp) #define ASSERT_NO_RUPDATE_PENDING(lwp) +#define ASSERT_CR0TS_ZERO(reg) #endif /* @@ -614,6 +639,11 @@ _syscall_invoke: movq %r13, REGOFF_RDX(%rsp) /* + * Clobber %r11 as we check CR0.TS. + */ + ASSERT_CR0TS_ZERO(%r11) + + /* * To get back to userland, we need the return %rip in %rcx and * the return %rfl in %r11d. The sysretq instruction also arranges * to fix up %cs and %ss; everything else is our responsibility. @@ -897,6 +927,11 @@ _syscall32_save: SIMPLE_SYSCALL_POSTSYS(%r15, %r14, %bx) /* + * Clobber %r11 as we check CR0.TS. + */ + ASSERT_CR0TS_ZERO(%r11) + + /* * To get back to userland, we need to put the return %rip in %rcx and * the return %rfl in %r11d. The sysret instruction also arranges * to fix up %cs and %ss; everything else is our responsibility. @@ -1183,6 +1218,11 @@ sys_sysenter() andq $_BITNOT(PS_IE), REGOFF_RFL(%rsp) /* + * Clobber %r11 as we check CR0.TS. + */ + ASSERT_CR0TS_ZERO(%r11) + + /* * (There's no point in loading up %edx because the sysexit * mechanism smashes it.) */ diff --git a/usr/src/uts/i86pc/os/fpu_subr.c b/usr/src/uts/i86pc/os/fpu_subr.c index 3e027269fb..5c57bdcb8c 100644 --- a/usr/src/uts/i86pc/os/fpu_subr.c +++ b/usr/src/uts/i86pc/os/fpu_subr.c @@ -148,8 +148,7 @@ fpu_probe(void) ENABLE_SSE(); if (is_x86_feature(x86_featureset, X86FSET_AVX)) { - ASSERT(is_x86_feature(x86_featureset, - X86FSET_XSAVE)); + ASSERT(is_x86_feature(x86_featureset, X86FSET_XSAVE)); fp_kind |= __FP_AVX; } @@ -180,7 +179,7 @@ fpu_probe(void) fpsave_ctxt = xsave_ctxt; } } - patch_xsave(); + fprestore_ctxt = xrestore_ctxt; fpsave_cachep = kmem_cache_create("xsave_cache", cpuid_get_xsave_size(), XSAVE_ALIGN, NULL, NULL, NULL, NULL, NULL, 0); diff --git a/usr/src/uts/i86pc/os/intr.c b/usr/src/uts/i86pc/os/intr.c index f66f0e69e8..29fa78109c 100644 --- a/usr/src/uts/i86pc/os/intr.c +++ b/usr/src/uts/i86pc/os/intr.c @@ -1446,6 +1446,8 @@ loop: */ tp = CPU->cpu_thread; if (USERMODE(rp->r_cs)) { + pcb_t *pcb; + /* * Check if AST pending. */ @@ -1460,14 +1462,29 @@ loop: goto loop; } -#if defined(__amd64) + pcb = &tp->t_lwp->lwp_pcb; + + /* + * Check to see if we need to initialize the FPU for this + * thread. This should be an uncommon occurrence, but may happen + * in the case where the system creates an lwp through an + * abnormal path such as the agent lwp. Make sure that we still + * happen to have the FPU in a good state. + */ + if ((pcb->pcb_fpu.fpu_flags & FPU_EN) == 0) { + kpreempt_disable(); + fp_seed(); + kpreempt_enable(); + PCB_SET_UPDATE_FPU(pcb); + } + /* * We are done if segment registers do not need updating. */ - if (tp->t_lwp->lwp_pcb.pcb_rupdate == 0) + if (!PCB_NEED_UPDATE(pcb)) return (1); - if (update_sregs(rp, tp->t_lwp)) { + if (PCB_NEED_UPDATE_SEGS(pcb) && update_sregs(rp, tp->t_lwp)) { /* * 1 or more of the selectors is bad. * Deliver a SIGSEGV. @@ -1482,9 +1499,15 @@ loop: tp->t_sig_check = 1; cli(); } - tp->t_lwp->lwp_pcb.pcb_rupdate = 0; + PCB_CLEAR_UPDATE_SEGS(pcb); + + if (PCB_NEED_UPDATE_FPU(pcb)) { + fprestore_ctxt(&pcb->pcb_fpu); + } + PCB_CLEAR_UPDATE_FPU(pcb); + + ASSERT0(PCB_NEED_UPDATE(pcb)); -#endif /* __amd64 */ return (1); } diff --git a/usr/src/uts/i86pc/os/trap.c b/usr/src/uts/i86pc/os/trap.c index 1b14e61b11..be5ad49f0e 100644 --- a/usr/src/uts/i86pc/os/trap.c +++ b/usr/src/uts/i86pc/os/trap.c @@ -993,50 +993,25 @@ trap(struct regs *rp, caddr_t addr, processorid_t cpuid) fault = FLTIOVF; break; + /* + * When using an eager FPU on x86, the #NM trap is no longer meaningful. + * Userland should not be able to trigger it. Anything that does + * represents a fatal error in the kernel and likely in the register + * state of the system. User FPU state should always be valid. + */ case T_NOEXTFLT + USER: /* math coprocessor not available */ - if (tudebug && tudebugfpe) - showregs(type, rp, addr); - if (fpnoextflt(rp)) { - siginfo.si_signo = SIGILL; - siginfo.si_code = ILL_ILLOPC; - siginfo.si_addr = (caddr_t)rp->r_pc; - fault = FLTILL; - } - break; - - case T_EXTOVRFLT: /* extension overrun fault */ - /* check if we took a kernel trap on behalf of user */ - { - extern void ndptrap_frstor(void); - if (rp->r_pc != (uintptr_t)ndptrap_frstor) { - sti(); /* T_EXTOVRFLT comes in via cmninttrap */ - (void) die(type, rp, addr, cpuid); - } - type |= USER; - } - /*FALLTHROUGH*/ - case T_EXTOVRFLT + USER: /* extension overrun fault */ - if (tudebug && tudebugfpe) - showregs(type, rp, addr); - if (fpextovrflt(rp)) { - siginfo.si_signo = SIGSEGV; - siginfo.si_code = SEGV_MAPERR; - siginfo.si_addr = (caddr_t)rp->r_pc; - fault = FLTBOUNDS; - } + case T_NOEXTFLT: + (void) die(type, rp, addr, cpuid); break; + /* + * Kernel threads leveraging floating point need to mask the exceptions + * or ensure that they cannot happen. There is no recovery from this. + */ case T_EXTERRFLT: /* x87 floating point exception pending */ - /* check if we took a kernel trap on behalf of user */ - { - extern void ndptrap_frstor(void); - if (rp->r_pc != (uintptr_t)ndptrap_frstor) { - sti(); /* T_EXTERRFLT comes in via cmninttrap */ - (void) die(type, rp, addr, cpuid); - } - type |= USER; - } - /*FALLTHROUGH*/ + sti(); /* T_EXTERRFLT comes in via cmninttrap */ + (void) die(type, rp, addr, cpuid); + break; case T_EXTERRFLT + USER: /* x87 floating point exception pending */ if (tudebug && tudebugfpe) @@ -1939,7 +1914,7 @@ kern_gpfault(struct regs *rp) } #if defined(__amd64) - if (trp == NULL && lwp->lwp_pcb.pcb_rupdate != 0) { + if (trp == NULL && PCB_NEED_UPDATE_SEGS(&lwp->lwp_pcb)) { /* * This is the common case -- we're trying to load diff --git a/usr/src/uts/intel/ia32/ml/exception.s b/usr/src/uts/intel/ia32/ml/exception.s index 82d449f31c..66eda34c14 100644 --- a/usr/src/uts/intel/ia32/ml/exception.s +++ b/usr/src/uts/intel/ia32/ml/exception.s @@ -51,17 +51,7 @@ #include <sys/traptrace.h> #include <sys/machparam.h> -/* - * only one routine in this file is interesting to lint - */ - -#if defined(__lint) - -void -ndptrap_frstor(void) -{} - -#else +#if !defined(__lint) #include "assym.h" @@ -643,220 +633,16 @@ _emul_done: #endif /* __i386 */ -#if defined(__amd64) - /* * #NM */ -#if defined(__xpv) ENTRY_NP(ndptrap) - /* - * (On the hypervisor we must make a hypercall so we might as well - * save everything and handle as in a normal trap.) - */ - TRAP_NOERR(T_NOEXTFLT) /* $7 */ - INTR_PUSH - - /* - * We want to do this quickly as every lwp using fp will take this - * after a context switch -- we do the frequent path in ndptrap_frstor - * below; for all other cases, we let the trap code handle it - */ - LOADCPU(%rax) /* swapgs handled in hypervisor */ - cmpl $0, fpu_exists(%rip) - je .handle_in_trap /* let trap handle no fp case */ - movq CPU_THREAD(%rax), %rbx /* %rbx = curthread */ - movl $FPU_EN, %eax - movq T_LWP(%rbx), %rbx /* %rbx = lwp */ - testq %rbx, %rbx - jz .handle_in_trap /* should not happen? */ -#if LWP_PCB_FPU != 0 - addq $LWP_PCB_FPU, %rbx /* &lwp->lwp_pcb.pcb_fpu */ -#endif - testl %eax, PCB_FPU_FLAGS(%rbx) - jz .handle_in_trap /* must be the first fault */ - CLTS - andl $_BITNOT(FPU_VALID), PCB_FPU_FLAGS(%rbx) -#if FPU_CTX_FPU_REGS != 0 - addq $FPU_CTX_FPU_REGS, %rbx -#endif - - movl FPU_CTX_FPU_XSAVE_MASK(%rbx), %eax /* for xrstor */ - movl FPU_CTX_FPU_XSAVE_MASK+4(%rbx), %edx /* for xrstor */ - - /* - * the label below is used in trap.c to detect FP faults in - * kernel due to user fault. - */ - ALTENTRY(ndptrap_frstor) - movq (%rbx), %rbx /* fpu_regs.kfpu_u.kfpu_XX pointer */ - .globl _patch_xrstorq_rbx -_patch_xrstorq_rbx: - fxrstorq (%rbx) - cmpw $KCS_SEL, REGOFF_CS(%rsp) - je .return_to_kernel - - ASSERT_UPCALL_MASK_IS_SET - USER_POP - IRET /* return to user mode */ - /*NOTREACHED*/ - -.return_to_kernel: - INTR_POP - IRET - /*NOTREACHED*/ - -.handle_in_trap: - INTR_POP - pushq $0 /* can not use TRAP_NOERR */ - pushq $T_NOEXTFLT - jmp cmninttrap - SET_SIZE(ndptrap_frstor) - SET_SIZE(ndptrap) - -#else /* __xpv */ - - ENTRY_NP(ndptrap) - /* - * We want to do this quickly as every lwp using fp will take this - * after a context switch -- we do the frequent path in ndptrap_frstor - * below; for all other cases, we let the trap code handle it - */ - pushq %rax - pushq %rbx - cmpw $KCS_SEL, 24(%rsp) /* did we come from kernel mode? */ - jne 1f - LOADCPU(%rax) /* if yes, don't swapgs */ - jmp 2f -1: - SWAPGS /* if from user, need swapgs */ - LOADCPU(%rax) - SWAPGS -2: - /* - * Xrstor needs to use edx as part of its flag. - * NOTE: have to push rdx after "cmpw ...24(%rsp)", otherwise rsp+$24 - * will not point to CS. - */ - pushq %rdx - cmpl $0, fpu_exists(%rip) - je .handle_in_trap /* let trap handle no fp case */ - movq CPU_THREAD(%rax), %rbx /* %rbx = curthread */ - movl $FPU_EN, %eax - movq T_LWP(%rbx), %rbx /* %rbx = lwp */ - testq %rbx, %rbx - jz .handle_in_trap /* should not happen? */ -#if LWP_PCB_FPU != 0 - addq $LWP_PCB_FPU, %rbx /* &lwp->lwp_pcb.pcb_fpu */ -#endif - testl %eax, PCB_FPU_FLAGS(%rbx) - jz .handle_in_trap /* must be the first fault */ - clts - andl $_BITNOT(FPU_VALID), PCB_FPU_FLAGS(%rbx) -#if FPU_CTX_FPU_REGS != 0 - addq $FPU_CTX_FPU_REGS, %rbx -#endif - - movl FPU_CTX_FPU_XSAVE_MASK(%rbx), %eax /* for xrstor */ - movl FPU_CTX_FPU_XSAVE_MASK+4(%rbx), %edx /* for xrstor */ - - /* - * the label below is used in trap.c to detect FP faults in - * kernel due to user fault. - */ - ALTENTRY(ndptrap_frstor) - movq (%rbx), %rbx /* fpu_regs.kfpu_u.kfpu_XX pointer */ - .globl _patch_xrstorq_rbx -_patch_xrstorq_rbx: - fxrstorq (%rbx) - popq %rdx - popq %rbx - popq %rax - jmp tr_iret_auto - /*NOTREACHED*/ - -.handle_in_trap: - popq %rdx - popq %rbx - popq %rax - TRAP_NOERR(T_NOEXTFLT) /* $7 */ - jmp cmninttrap - SET_SIZE(ndptrap_frstor) - SET_SIZE(ndptrap) - -#endif /* __xpv */ - -#elif defined(__i386) - - ENTRY_NP(ndptrap) - /* - * We want to do this quickly as every lwp using fp will take this - * after a context switch -- we do the frequent path in fpnoextflt - * below; for all other cases, we let the trap code handle it - */ - pushl %eax - pushl %ebx - pushl %edx /* for xrstor */ - pushl %ds - pushl %gs - movl $KDS_SEL, %ebx - movw %bx, %ds - movl $KGS_SEL, %eax - movw %ax, %gs - LOADCPU(%eax) - cmpl $0, fpu_exists - je .handle_in_trap /* let trap handle no fp case */ - movl CPU_THREAD(%eax), %ebx /* %ebx = curthread */ - movl $FPU_EN, %eax - movl T_LWP(%ebx), %ebx /* %ebx = lwp */ - testl %ebx, %ebx - jz .handle_in_trap /* should not happen? */ -#if LWP_PCB_FPU != 0 - addl $LWP_PCB_FPU, %ebx /* &lwp->lwp_pcb.pcb_fpu */ -#endif - testl %eax, PCB_FPU_FLAGS(%ebx) - jz .handle_in_trap /* must be the first fault */ - CLTS - andl $_BITNOT(FPU_VALID), PCB_FPU_FLAGS(%ebx) -#if FPU_CTX_FPU_REGS != 0 - addl $FPU_CTX_FPU_REGS, %ebx -#endif - - movl FPU_CTX_FPU_XSAVE_MASK(%ebx), %eax /* for xrstor */ - movl FPU_CTX_FPU_XSAVE_MASK+4(%ebx), %edx /* for xrstor */ - - /* - * the label below is used in trap.c to detect FP faults in kernel - * due to user fault. - */ - ALTENTRY(ndptrap_frstor) - movl (%ebx), %ebx /* fpu_regs.kfpu_u.kfpu_XX pointer */ - .globl _patch_fxrstor_ebx -_patch_fxrstor_ebx: - .globl _patch_xrstor_ebx -_patch_xrstor_ebx: - frstor (%ebx) /* may be patched to fxrstor or xrstor */ - popl %gs - popl %ds - popl %edx - popl %ebx - popl %eax - IRET - -.handle_in_trap: - popl %gs - popl %ds - popl %edx - popl %ebx - popl %eax - TRAP_NOERR(T_NOEXTFLT) /* $7 */ - jmp cmninttrap - SET_SIZE(ndptrap_frstor) + TRAP_NOERR(T_NOEXTFLT) /* $0 */ + SET_CPU_GSBASE + jmp cmntrap SET_SIZE(ndptrap) -#endif /* __i386 */ - #if !defined(__xpv) #if defined(__amd64) @@ -1036,12 +822,6 @@ make_frame: #endif /* __i386 */ #endif /* !__xpv */ - ENTRY_NP(overrun) - push $0 - TRAP_NOERR(T_EXTOVRFLT) /* $9 i386 only - not generated */ - jmp cmninttrap - SET_SIZE(overrun) - /* * #TS */ diff --git a/usr/src/uts/intel/ia32/ml/float.s b/usr/src/uts/intel/ia32/ml/float.s index f154a96851..0a242e0475 100644 --- a/usr/src/uts/intel/ia32/ml/float.s +++ b/usr/src/uts/intel/ia32/ml/float.s @@ -21,7 +21,7 @@ /* * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2017, Joyent, Inc. + * Copyright (c) 2018, Joyent, Inc. */ /* Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. */ @@ -79,191 +79,12 @@ fxsave_insn(struct fxsave_state *fx) #else /* __lint */ -#if defined(__amd64) - ENTRY_NP(fxsave_insn) fxsaveq (%rdi) ret SET_SIZE(fxsave_insn) -#elif defined(__i386) - - ENTRY_NP(fxsave_insn) - movl 4(%esp), %eax - fxsave (%eax) - ret - SET_SIZE(fxsave_insn) - -#endif - -#endif /* __lint */ - -#if defined(__i386) - -/* - * If (num1/num2 > num1/num3) the FPU has the FDIV bug. - */ - -#if defined(__lint) - -int -fpu_probe_pentium_fdivbug(void) -{ return (0); } - -#else /* __lint */ - - ENTRY_NP(fpu_probe_pentium_fdivbug) - fldl .num1 - fldl .num2 - fdivr %st(1), %st - fxch %st(1) - fdivl .num3 - fcompp - fstsw %ax - sahf - jae 0f - movl $1, %eax - ret - -0: xorl %eax, %eax - ret - - .align 4 -.num1: .4byte 0xbce4217d /* 4.999999 */ - .4byte 0x4013ffff -.num2: .4byte 0x0 /* 15.0 */ - .4byte 0x402e0000 -.num3: .4byte 0xde7210bf /* 14.999999 */ - .4byte 0x402dffff - SET_SIZE(fpu_probe_pentium_fdivbug) - -#endif /* __lint */ - -/* - * To cope with processors that do not implement fxsave/fxrstor - * instructions, patch hot paths in the kernel to use them only - * when that feature has been detected. - */ - -#if defined(__lint) - -void -patch_sse(void) -{} - -void -patch_sse2(void) -{} - -void -patch_xsave(void) -{} - -#else /* __lint */ - - ENTRY_NP(patch_sse) - _HOT_PATCH_PROLOG - / - / frstor (%ebx); nop -> fxrstor (%ebx) - / - _HOT_PATCH(_fxrstor_ebx_insn, _patch_fxrstor_ebx, 3) - / - / lock; xorl $0, (%esp) -> sfence; ret - / - _HOT_PATCH(_sfence_ret_insn, _patch_sfence_ret, 4) - _HOT_PATCH_EPILOG - ret -_fxrstor_ebx_insn: / see ndptrap_frstor() - fxrstor (%ebx) -_ldmxcsr_ebx_insn: / see resume_from_zombie() - ldmxcsr (%ebx) -_sfence_ret_insn: / see membar_producer() - sfence - ret - SET_SIZE(patch_sse) - - ENTRY_NP(patch_sse2) - _HOT_PATCH_PROLOG - / - / lock; xorl $0, (%esp) -> lfence; ret - / - _HOT_PATCH(_lfence_ret_insn, _patch_lfence_ret, 4) - _HOT_PATCH_EPILOG - ret -_lfence_ret_insn: / see membar_consumer() - lfence - ret - SET_SIZE(patch_sse2) - - /* - * Patch lazy fp restore instructions in the trap handler - * to use xrstor instead of frstor - */ - ENTRY_NP(patch_xsave) - _HOT_PATCH_PROLOG - / - / frstor (%ebx); nop -> xrstor (%ebx) - / - _HOT_PATCH(_xrstor_ebx_insn, _patch_xrstor_ebx, 3) - _HOT_PATCH_EPILOG - ret -_xrstor_ebx_insn: / see ndptrap_frstor() - xrstor (%ebx) - SET_SIZE(patch_xsave) - -#endif /* __lint */ -#endif /* __i386 */ - -#if defined(__amd64) -#if defined(__lint) - -void -patch_xsave(void) -{} - -#else /* __lint */ - - /* - * Patch lazy fp restore instructions in the trap handler - * to use xrstor instead of fxrstorq - */ - ENTRY_NP(patch_xsave) - pushq %rbx - pushq %rbp - pushq %r15 - / - / fxrstorq (%rbx); -> nop; xrstor (%rbx) - / loop doing the following for 4 bytes: - / hot_patch_kernel_text(_patch_xrstorq_rbx, _xrstor_rbx_insn, 1) - / - leaq _patch_xrstorq_rbx(%rip), %rbx - leaq _xrstor_rbx_insn(%rip), %rbp - movq $4, %r15 -1: - movq %rbx, %rdi /* patch address */ - movzbq (%rbp), %rsi /* instruction byte */ - movq $1, %rdx /* count */ - call hot_patch_kernel_text - addq $1, %rbx - addq $1, %rbp - subq $1, %r15 - jnz 1b - - popq %r15 - popq %rbp - popq %rbx - ret - -_xrstor_rbx_insn: / see ndptrap_frstor() - # Because the fxrstorq instruction we're patching is 4 bytes long, due - # to the 0x48 prefix (indicating 64-bit operand size), we patch 4 bytes - # too. - nop - xrstor (%rbx) - SET_SIZE(patch_xsave) - #endif /* __lint */ -#endif /* __amd64 */ /* * One of these routines is called from any lwp with floating @@ -287,15 +108,8 @@ void fpxsave_ctxt(void *arg) {} -/*ARGSUSED*/ -void -fpnsave_ctxt(void *arg) -{} - #else /* __lint */ -#if defined(__amd64) - /* * These three functions define the Intel "xsave" handling for CPUs with * different features. Newer AMD CPUs can also use these functions. See the @@ -305,7 +119,7 @@ fpnsave_ctxt(void *arg) cmpl $FPU_EN, FPU_CTX_FPU_FLAGS(%rdi) jne 1f movl $_CONST(FPU_VALID|FPU_EN), FPU_CTX_FPU_FLAGS(%rdi) - movq FPU_CTX_FPU_REGS(%rdi), %rdi /* fpu_regs.kfpu_u.kfpu_fn ptr */ + movq FPU_CTX_FPU_REGS(%rdi), %rdi /* fpu_regs.kfpu_u.kfpu_fx ptr */ fxsaveq (%rdi) STTS(%rsi) /* trap on next fpu touch */ 1: rep; ret /* use 2 byte return instruction when branch target */ @@ -352,7 +166,7 @@ fpnsave_ctxt(void *arg) cmpl $FPU_EN, FPU_CTX_FPU_FLAGS(%rdi) jne 1f movl $_CONST(FPU_VALID|FPU_EN), FPU_CTX_FPU_FLAGS(%rdi) - movq FPU_CTX_FPU_REGS(%rdi), %rdi /* fpu_regs.kfpu_u.kfpu_fn ptr */ + movq FPU_CTX_FPU_REGS(%rdi), %rdi /* fpu_regs.kfpu_u.kfpu_fx ptr */ fxsaveq (%rdi) /* * To ensure that we don't leak these values into the next context @@ -405,126 +219,6 @@ fpnsave_ctxt(void *arg) 1: ret SET_SIZE(xsaveopt_excp_clr_ctxt) -#elif defined(__i386) - - ENTRY_NP(fpnsave_ctxt) - movl 4(%esp), %eax /* a struct fpu_ctx */ - cmpl $FPU_EN, FPU_CTX_FPU_FLAGS(%eax) - jne 1f - movl $_CONST(FPU_VALID|FPU_EN), FPU_CTX_FPU_FLAGS(%eax) - movl FPU_CTX_FPU_REGS(%eax), %eax /* fpu_regs.kfpu_u.kfpu_fx ptr */ - fnsave (%eax) - /* (fnsave also reinitializes x87 state) */ - STTS(%edx) /* trap on next fpu touch */ -1: rep; ret /* use 2 byte return instruction when branch target */ - /* AMD Software Optimization Guide - Section 6.2 */ - SET_SIZE(fpnsave_ctxt) - - ENTRY_NP(fpxsave_ctxt) - movl 4(%esp), %eax /* a struct fpu_ctx */ - cmpl $FPU_EN, FPU_CTX_FPU_FLAGS(%eax) - jne 1f - movl $_CONST(FPU_VALID|FPU_EN), FPU_CTX_FPU_FLAGS(%eax) - movl FPU_CTX_FPU_REGS(%eax), %eax /* fpu_regs.kfpu_u.kfpu_fn ptr */ - fxsave (%eax) - STTS(%edx) /* trap on next fpu touch */ -1: rep; ret /* use 2 byte return instruction when branch target */ - /* AMD Software Optimization Guide - Section 6.2 */ - SET_SIZE(fpxsave_ctxt) - - ENTRY_NP(xsave_ctxt) - movl 4(%esp), %ecx /* a struct fpu_ctx */ - cmpl $FPU_EN, FPU_CTX_FPU_FLAGS(%ecx) - jne 1f - movl $_CONST(FPU_VALID|FPU_EN), FPU_CTX_FPU_FLAGS(%ecx) - movl FPU_CTX_FPU_XSAVE_MASK(%ecx), %eax - movl FPU_CTX_FPU_XSAVE_MASK+4(%ecx), %edx - movl FPU_CTX_FPU_REGS(%ecx), %ecx /* fpu_regs.kfpu_u.kfpu_xs ptr */ - xsave (%ecx) - STTS(%edx) /* trap on next fpu touch */ -1: ret - SET_SIZE(xsave_ctxt) - - ENTRY_NP(xsaveopt_ctxt) - movl 4(%esp), %ecx /* a struct fpu_ctx */ - cmpl $FPU_EN, FPU_CTX_FPU_FLAGS(%ecx) - jne 1f - movl $_CONST(FPU_VALID|FPU_EN), FPU_CTX_FPU_FLAGS(%ecx) - movl FPU_CTX_FPU_XSAVE_MASK(%ecx), %eax - movl FPU_CTX_FPU_XSAVE_MASK+4(%ecx), %edx - movl FPU_CTX_FPU_REGS(%ecx), %ecx /* fpu_regs.kfpu_u.kfpu_xs ptr */ - xsaveopt (%ecx) - STTS(%edx) /* trap on next fpu touch */ -1: ret - SET_SIZE(xsaveopt_ctxt) - -/* - * See comment above the __amd64 implementation of fpxsave_excp_clr_ctxt() - * for details about the following threee functions for AMD "exception pointer" - * handling. - */ - - ENTRY_NP(fpxsave_excp_clr_ctxt) - movl 4(%esp), %eax /* a struct fpu_ctx */ - cmpl $FPU_EN, FPU_CTX_FPU_FLAGS(%eax) - jne 1f - - movl $_CONST(FPU_VALID|FPU_EN), FPU_CTX_FPU_FLAGS(%eax) - movl FPU_CTX_FPU_REGS(%eax), %eax /* fpu_regs.kfpu_u.kfpu_fn ptr */ - fxsave (%eax) - btw $7, FXSAVE_STATE_FSW(%eax) /* Test saved ES bit */ - jnc 0f /* jump if ES = 0 */ - fnclex /* clear pending x87 exceptions */ -0: ffree %st(7) /* clear tag bit to remove possible stack overflow */ - fildl .fpzero_const - /* dummy load changes all exception pointers */ - STTS(%edx) /* trap on next fpu touch */ -1: rep; ret /* use 2 byte return instruction when branch target */ - /* AMD Software Optimization Guide - Section 6.2 */ - SET_SIZE(fpxsave_excp_clr_ctxt) - - ENTRY_NP(xsave_excp_clr_ctxt) - movl 4(%esp), %ecx /* a struct fpu_ctx */ - cmpl $FPU_EN, FPU_CTX_FPU_FLAGS(%ecx) - jne 1f - - movl $_CONST(FPU_VALID|FPU_EN), FPU_CTX_FPU_FLAGS(%ecx) - movl FPU_CTX_FPU_XSAVE_MASK(%ecx), %eax - movl FPU_CTX_FPU_XSAVE_MASK+4(%ecx), %edx - movl FPU_CTX_FPU_REGS(%ecx), %ecx /* fpu_regs.kfpu_u.kfpu_xs ptr */ - xsave (%ecx) - btw $7, FXSAVE_STATE_FSW(%ecx) /* Test saved ES bit */ - jnc 0f /* jump if ES = 0 */ - fnclex /* clear pending x87 exceptions */ -0: ffree %st(7) /* clear tag bit to remove possible stack overflow */ - fildl .fpzero_const - /* dummy load changes all exception pointers */ - STTS(%edx) /* trap on next fpu touch */ -1: ret - SET_SIZE(xsave_excp_clr_ctxt) - - ENTRY_NP(xsaveopt_excp_clr_ctxt) - movl 4(%esp), %ecx /* a struct fpu_ctx */ - cmpl $FPU_EN, FPU_CTX_FPU_FLAGS(%ecx) - jne 1f - - movl $_CONST(FPU_VALID|FPU_EN), FPU_CTX_FPU_FLAGS(%ecx) - movl FPU_CTX_FPU_XSAVE_MASK(%ecx), %eax - movl FPU_CTX_FPU_XSAVE_MASK+4(%ecx), %edx - movl FPU_CTX_FPU_REGS(%ecx), %ecx /* fpu_regs.kfpu_u.kfpu_xs ptr */ - xsaveopt (%ecx) - btw $7, FXSAVE_STATE_FSW(%ecx) /* Test saved ES bit */ - jnc 0f /* jump if ES = 0 */ - fnclex /* clear pending x87 exceptions */ -0: ffree %st(7) /* clear tag bit to remove possible stack overflow */ - fildl .fpzero_const - /* dummy load changes all exception pointers */ - STTS(%edx) /* trap on next fpu touch */ -1: ret - SET_SIZE(xsaveopt_excp_clr_ctxt) - -#endif /* __i386 */ - .align 8 .fpzero_const: .4byte 0x0 @@ -557,8 +251,6 @@ xsaveopt(struct xsave_state *f, uint64_t m) #else /* __lint */ -#if defined(__amd64) - ENTRY_NP(fpxsave) CLTS fxsaveq (%rdi) @@ -591,58 +283,55 @@ xsaveopt(struct xsave_state *f, uint64_t m) ret SET_SIZE(xsaveopt) -#elif defined(__i386) +#endif /* __lint */ - ENTRY_NP(fpsave) - CLTS - movl 4(%esp), %eax - fnsave (%eax) - STTS(%eax) /* set TS bit in %cr0 (disable FPU) */ - ret - SET_SIZE(fpsave) +/* + * These functions are used when restoring the FPU as part of the epilogue of a + * context switch. + */ - ENTRY_NP(fpxsave) - CLTS - movl 4(%esp), %eax - fxsave (%eax) - fninit /* clear exceptions, init x87 tags */ - STTS(%eax) /* set TS bit in %cr0 (disable FPU) */ - ret - SET_SIZE(fpxsave) +#if defined(__lint) - ENTRY_NP(xsave) - CLTS - movl 4(%esp), %ecx - movl 8(%esp), %eax - movl 12(%esp), %edx - xsave (%ecx) +/*ARGSUSED*/ +void +fpxrestore_ctxt(void *arg) +{} - fninit /* clear exceptions, init x87 tags */ - STTS(%eax) /* set TS bit in %cr0 (disable FPU) */ - ret - SET_SIZE(xsave) +/*ARGSUSED*/ +void +xrestore_ctxt(void *arg) +{} - ENTRY_NP(xsaveopt) +#else /* __lint */ + + ENTRY(fpxrestore_ctxt) + cmpl $_CONST(FPU_EN|FPU_VALID), FPU_CTX_FPU_FLAGS(%rdi) + jne 1f + movl $_CONST(FPU_EN), FPU_CTX_FPU_FLAGS(%rdi) + movq FPU_CTX_FPU_REGS(%rdi), %rdi /* fpu_regs.kfpu_u.kfpu_fx ptr */ CLTS - movl 4(%esp), %ecx - movl 8(%esp), %eax - movl 12(%esp), %edx - xsaveopt (%ecx) + fxrstorq (%rdi) +1: + ret + SET_SIZE(fpxrestore_ctxt) - fninit /* clear exceptions, init x87 tags */ - STTS(%eax) /* set TS bit in %cr0 (disable FPU) */ + ENTRY(xrestore_ctxt) + cmpl $_CONST(FPU_EN|FPU_VALID), FPU_CTX_FPU_FLAGS(%rdi) + jne 1f + movl $_CONST(FPU_EN), FPU_CTX_FPU_FLAGS(%rdi) + movl FPU_CTX_FPU_XSAVE_MASK(%rdi), %eax /* xsave flags in EDX:EAX */ + movl FPU_CTX_FPU_XSAVE_MASK+4(%rdi), %edx + movq FPU_CTX_FPU_REGS(%rdi), %rdi /* fpu_regs.kfpu_u.kfpu_xs ptr */ + CLTS + xrstor (%rdi) +1: ret - SET_SIZE(xsaveopt) + SET_SIZE(xrestore_ctxt) -#endif /* __i386 */ #endif /* __lint */ -#if defined(__lint) -/*ARGSUSED*/ -void -fprestore(struct fnsave_state *f) -{} +#if defined(__lint) /*ARGSUSED*/ void @@ -656,8 +345,6 @@ xrestore(struct xsave_state *f, uint64_t m) #else /* __lint */ -#if defined(__amd64) - ENTRY_NP(fpxrestore) CLTS fxrstorq (%rdi) @@ -673,32 +360,6 @@ xrestore(struct xsave_state *f, uint64_t m) ret SET_SIZE(xrestore) -#elif defined(__i386) - - ENTRY_NP(fprestore) - CLTS - movl 4(%esp), %eax - frstor (%eax) - ret - SET_SIZE(fprestore) - - ENTRY_NP(fpxrestore) - CLTS - movl 4(%esp), %eax - fxrstor (%eax) - ret - SET_SIZE(fpxrestore) - - ENTRY_NP(xrestore) - CLTS - movl 4(%esp), %ecx - movl 8(%esp), %eax - movl 12(%esp), %edx - xrstor (%ecx) - ret - SET_SIZE(xrestore) - -#endif /* __i386 */ #endif /* __lint */ /* @@ -713,21 +374,11 @@ fpdisable(void) #else /* __lint */ -#if defined(__amd64) - ENTRY_NP(fpdisable) STTS(%rdi) /* set TS bit in %cr0 (disable FPU) */ ret SET_SIZE(fpdisable) -#elif defined(__i386) - - ENTRY_NP(fpdisable) - STTS(%eax) - ret - SET_SIZE(fpdisable) - -#endif /* __i386 */ #endif /* __lint */ /* @@ -742,8 +393,6 @@ fpinit(void) #else /* __lint */ -#if defined(__amd64) - ENTRY_NP(fpinit) CLTS cmpl $FP_XSAVE, fp_save_mech @@ -765,38 +414,6 @@ fpinit(void) ret SET_SIZE(fpinit) -#elif defined(__i386) - - ENTRY_NP(fpinit) - CLTS - cmpl $FP_FXSAVE, fp_save_mech - je 1f - cmpl $FP_XSAVE, fp_save_mech - je 2f - - /* fnsave */ - fninit - movl $x87_initial, %eax - frstor (%eax) /* load clean initial state */ - ret - -1: /* fxsave */ - movl $sse_initial, %eax - fxrstor (%eax) /* load clean initial state */ - ret - -2: /* xsave */ - movl $avx_initial, %ecx - xorl %edx, %edx - movl $XFEATURE_AVX, %eax - bt $X86FSET_AVX, x86_featureset - cmovael %edx, %eax - orl $(XFEATURE_LEGACY_FP | XFEATURE_SSE), %eax - xrstor (%ecx) - ret - SET_SIZE(fpinit) - -#endif /* __i386 */ #endif /* __lint */ /* @@ -816,8 +433,6 @@ fpxerr_reset(void) #else /* __lint */ -#if defined(__amd64) - ENTRY_NP(fperr_reset) CLTS xorl %eax, %eax @@ -839,28 +454,6 @@ fpxerr_reset(void) ret SET_SIZE(fpxerr_reset) -#elif defined(__i386) - - ENTRY_NP(fperr_reset) - CLTS - xorl %eax, %eax - fnstsw %ax - fnclex - ret - SET_SIZE(fperr_reset) - - ENTRY_NP(fpxerr_reset) - CLTS - subl $4, %esp /* make some temporary space */ - stmxcsr (%esp) - movl (%esp), %eax - andl $_BITNOT(SSE_MXCSR_EFLAGS), (%esp) - ldmxcsr (%esp) /* clear processor exceptions */ - addl $4, %esp - ret - SET_SIZE(fpxerr_reset) - -#endif /* __i386 */ #endif /* __lint */ #if defined(__lint) @@ -873,8 +466,6 @@ fpgetcwsw(void) #else /* __lint */ -#if defined(__amd64) - ENTRY_NP(fpgetcwsw) pushq %rbp movq %rsp, %rbp @@ -887,19 +478,6 @@ fpgetcwsw(void) ret SET_SIZE(fpgetcwsw) -#elif defined(__i386) - - ENTRY_NP(fpgetcwsw) - CLTS - subl $4, %esp /* make some temporary space */ - fnstsw (%esp) /* store the status word */ - fnstcw 2(%esp) /* store the control word */ - movl (%esp), %eax /* put both in %eax */ - addl $4, %esp - ret - SET_SIZE(fpgetcwsw) - -#endif /* __i386 */ #endif /* __lint */ /* @@ -916,8 +494,6 @@ fpgetmxcsr(void) #else /* __lint */ -#if defined(__amd64) - ENTRY_NP(fpgetmxcsr) pushq %rbp movq %rsp, %rbp @@ -929,16 +505,4 @@ fpgetmxcsr(void) ret SET_SIZE(fpgetmxcsr) -#elif defined(__i386) - - ENTRY_NP(fpgetmxcsr) - CLTS - subl $4, %esp /* make some temporary space */ - stmxcsr (%esp) - movl (%esp), %eax - addl $4, %esp - ret - SET_SIZE(fpgetmxcsr) - -#endif /* __i386 */ #endif /* __lint */ diff --git a/usr/src/uts/intel/ia32/os/archdep.c b/usr/src/uts/intel/ia32/os/archdep.c index 6f99571a5a..4c8d1acd9f 100644 --- a/usr/src/uts/intel/ia32/os/archdep.c +++ b/usr/src/uts/intel/ia32/os/archdep.c @@ -317,6 +317,7 @@ setfpregs(klwp_t *lwp, fpregset_t *fp) fpu->fpu_regs.kfpu_status = fp->fp_reg_set.fpchip_state.status; fpu->fpu_flags |= FPU_VALID; + PCB_SET_UPDATE_FPU(&lwp->lwp_pcb); } /* @@ -464,7 +465,7 @@ getgregs(klwp_t *lwp, gregset_t grp) grp[REG_GSBASE] = pcb->pcb_gsbase; if (thisthread) kpreempt_disable(); - if (pcb->pcb_rupdate == 1) { + if (PCB_NEED_UPDATE_SEGS(pcb)) { grp[REG_DS] = pcb->pcb_ds; grp[REG_ES] = pcb->pcb_es; grp[REG_FS] = pcb->pcb_fs; @@ -500,7 +501,7 @@ getgregs32(klwp_t *lwp, gregset32_t grp) if (thisthread) kpreempt_disable(); - if (pcb->pcb_rupdate == 1) { + if (PCB_NEED_UPDATE_SEGS(pcb)) { grp[GS] = (uint16_t)pcb->pcb_gs; grp[FS] = (uint16_t)pcb->pcb_fs; grp[DS] = (uint16_t)pcb->pcb_ds; @@ -753,7 +754,7 @@ setgregs(klwp_t *lwp, gregset_t grp) /* * Ensure that we go out via update_sregs */ - pcb->pcb_rupdate = 1; + PCB_SET_UPDATE_SEGS(pcb); lwptot(lwp)->t_post_sys = 1; if (thisthread) kpreempt_enable(); @@ -790,7 +791,7 @@ setgregs(klwp_t *lwp, gregset_t grp) /* * Ensure that we go out via update_sregs */ - pcb->pcb_rupdate = 1; + PCB_SET_UPDATE_SEGS(pcb); lwptot(lwp)->t_post_sys = 1; if (thisthread) kpreempt_enable(); diff --git a/usr/src/uts/intel/ia32/os/fpu.c b/usr/src/uts/intel/ia32/os/fpu.c index c307c97957..651f4a745f 100644 --- a/usr/src/uts/intel/ia32/os/fpu.c +++ b/usr/src/uts/intel/ia32/os/fpu.c @@ -61,11 +61,388 @@ #include <sys/sysmacros.h> #include <sys/cmn_err.h> +/* + * FPU Management Overview + * ----------------------- + * + * The x86 FPU has evolved substantially since its days as the x87 coprocessor; + * however, many aspects of its life as a coprocessor are still around in x86. + * + * Today, when we refer to the 'FPU', we don't just mean the original x87 FPU. + * While that state still exists, there is much more that is covered by the FPU. + * Today, this includes not just traditional FPU state, but also supervisor only + * state. The following state is currently managed and covered logically by the + * idea of the FPU registers: + * + * o Traditional x87 FPU + * o Vector Registers (%xmm, %ymm, %zmm) + * o Memory Protection Extensions (MPX) Bounds Registers + * o Protected Key Rights Registers (PKRU) + * o Processor Trace data + * + * The rest of this covers how the FPU is managed and controlled, how state is + * saved and restored between threads, interactions with hypervisors, and other + * information exported to user land through aux vectors. A lot of background + * information is here to synthesize major parts of the Intel SDM, but + * unfortunately, it is not a replacement for reading it. + * + * FPU Control Registers + * --------------------- + * + * Because the x87 FPU began its life as a co-processor and the FPU was + * optional there are several bits that show up in %cr0 that we have to + * manipulate when dealing with the FPU. These are: + * + * o CR0.ET The 'extension type' bit. This was used originally to indicate + * that the FPU co-processor was present. Now it is forced on for + * compatibility. This is often used to verify whether or not the + * FPU is present. + * + * o CR0.NE The 'native error' bit. Used to indicate that native error + * mode should be enabled. This indicates that we should take traps + * on FPU errors. The OS enables this early in boot. + * + * o CR0.MP The 'Monitor Coprocessor' bit. Used to control whether or not + * wait/fwait instructions generate a #NM if CR0.TS is set. + * + * o CR0.EM The 'Emulation' bit. This is used to cause floating point + * operations (x87 through SSE4) to trap with a #UD so they can be + * emulated. The system never sets this bit, but makes sure it is + * clear on processor start up. + * + * o CR0.TS The 'Task Switched' bit. When this is turned on, a floating + * point operation will generate a #NM. An fwait will as well, + * depending on the value in CR0.MP. + * + * Our general policy is that CR0.ET, CR0.NE, and CR0.MP are always set by + * the system. Similarly CR0.EM is always unset by the system. CR0.TS has a more + * complicated role. Historically it has been used to allow running systems to + * restore the FPU registers lazily. This will be discussed in greater depth + * later on. + * + * %cr4 is also used as part of the FPU control. Specifically we need to worry + * about the following bits in the system: + * + * o CR4.OSFXSR This bit is used to indicate that the OS understands and + * supports the execution of the fxsave and fxrstor + * instructions. This bit is required to be set to enable + * the use of the SSE->SSE4 instructions. + * + * o CR4.OSXMMEXCPT This bit is used to indicate that the OS can understand + * and take a SIMD floating point exception (#XM). This bit + * is always enabled by the system. + * + * o CR4.OSXSAVE This bit is used to indicate that the OS understands and + * supports the execution of the xsave and xrstor family of + * instructions. This bit is required to use any of the AVX + * and newer feature sets. + * + * Because all supported processors are 64-bit, they'll always support the XMM + * extensions and we will enable both CR4.OXFXSR and CR4.OSXMMEXCPT in boot. + * CR4.OSXSAVE will be enabled and used whenever xsave is reported in cpuid. + * + * %xcr0 is used to manage the behavior of the xsave feature set and is only + * present on the system if xsave is supported. %xcr0 is read and written to + * through by the xgetbv and xsetbv instructions. This register is present + * whenever the xsave feature set is supported. Each bit in %xcr0 refers to a + * different component of the xsave state and controls whether or not that + * information is saved and restored. For newer feature sets like AVX and MPX, + * it also controls whether or not the corresponding instructions can be + * executed (much like CR0.OSFXSR does for the SSE feature sets). + * + * Everything in %xcr0 is around features available to users. There is also the + * IA32_XSS MSR which is used to control supervisor-only features that are still + * part of the xsave state. Bits that can be set in %xcr0 are reserved in + * IA32_XSS and vice versa. This is an important property that is particularly + * relevant to how the xsave instructions operate. + * + * Save Mechanisms + * --------------- + * + * When switching between running threads the FPU state needs to be saved and + * restored by the OS. If this state was not saved, users would rightfully + * complain about corrupt state. There are three mechanisms that exist on the + * processor for saving and restoring these state images: + * + * o fsave + * o fxsave + * o xsave + * + * fsave saves and restores only the x87 FPU and is the oldest of these + * mechanisms. This mechanism is never used in the kernel today because we are + * always running on systems that support fxsave. + * + * The fxsave and fxrstor mechanism allows the x87 FPU and the SSE register + * state to be saved and restored to and from a struct fxsave_state. This is the + * default mechanism that is used to save and restore the FPU on amd64. An + * important aspect of fxsave that was different from the original i386 fsave + * mechanism is that the restoring of FPU state with pending exceptions will not + * generate an exception, it will be deferred to the next use of the FPU. + * + * The final and by far the most complex mechanism is that of the xsave set. + * xsave allows for saving and restoring all of the traditional x86 pieces (x87 + * and SSE), while allowing for extensions that will save the %ymm, %zmm, etc. + * registers. + * + * Data is saved and restored into and out of a struct xsave_state. The first + * part of the struct xsave_state is equivalent to the struct fxsave_state. + * After that, there is a header which is used to describe the remaining + * portions of the state. The header is a 64-byte value of which the first two + * uint64_t values are defined and the rest are reserved and must be zero. The + * first uint64_t is the xstate_bv member. This describes which values in the + * xsave_state are actually valid and present. This is updated on a save and + * used on restore. The second member is the xcomp_bv member. Its last bit + * determines whether or not a compressed version of the structure is used. + * + * When the uncompressed structure is used (currently the only format we + * support), then each state component is at a fixed offset in the structure, + * even if it is not being used. For example, if you only saved the AVX related + * state, but did not save the MPX related state, the offset would not change + * for any component. With the compressed format, components that aren't used + * are all elided (though the x87 and SSE state are always there). + * + * Unlike fxsave which saves all state, the xsave family does not always save + * and restore all the state that could be covered by the xsave_state. The + * instructions all take an argument which is a mask of what to consider. This + * is the same mask that will be used in the xstate_bv vector and it is also the + * same values that are present in %xcr0 and IA32_XSS. Though IA32_XSS is only + * considered with the xsaves and xrstors instructions. + * + * When a save or restore is requested, a bitwise and is performed between the + * requested bits and those that have been enabled in %xcr0. Only the bits that + * match that are then saved or restored. Others will be silently ignored by + * the processor. This idea is used often in the OS. We will always request that + * we save and restore all of the state, but only those portions that are + * actually enabled in %xcr0 will be touched. + * + * If a feature has been asked to be restored that is not set in the xstate_bv + * feature vector of the save state, then it will be set to its initial state by + * the processor (usually zeros). Also, when asked to save state, the processor + * may not write out data that is in its initial state as an optimization. This + * optimization only applies to saving data and not to restoring data. + * + * There are a few different variants of the xsave and xrstor instruction. They + * are: + * + * o xsave This is the original save instruction. It will save all of the + * requested data in the xsave state structure. It only saves data + * in the uncompressed (xcomp_bv[63] is zero) format. It may be + * executed at all privilege levels. + * + * o xrstor This is the original restore instruction. It will restore all of + * the requested data. The xrstor function can handle both the + * compressed and uncompressed formats. It may be executed at all + * privilege levels. + * + * o xsaveopt This is a variant of the xsave instruction that employs + * optimizations to try and only write out state that has been + * modified since the last time an xrstor instruction was called. + * The processor tracks a tuple of information about the last + * xrstor and tries to ensure that the same buffer is being used + * when this optimization is being used. However, because of the + * way that it tracks the xrstor buffer based on the address of it, + * it is not suitable for use if that buffer can be easily reused. + * The most common case is trying to save data to the stack in + * rtld. It may be executed at all privilege levels. + * + * o xsavec This is a variant of the xsave instruction that writes out the + * compressed form of the xsave_state. Otherwise it behaves as + * xsave. It may be executed at all privilege levels. + * + * o xsaves This is a variant of the xsave instruction. It is similar to + * xsavec in that it always writes the compressed form of the + * buffer. Unlike all the other forms, this instruction looks at + * both the user (%xcr0) and supervisor (IA32_XSS MSR) to determine + * what to save and restore. xsaves also implements the same + * optimization that xsaveopt does around modified pieces. User + * land may not execute the instruction. + * + * o xrstors This is a variant of the xrstor instruction. Similar to xsaves + * it can save and restore both the user and privileged states. + * Unlike xrstor it can only operate on the compressed form. + * User land may not execute the instruction. + * + * Based on all of these, the kernel has a precedence for what it will use. + * Basically, xsaves (not supported) is preferred to xsaveopt, which is + * preferred to xsave. A similar scheme is used when informing rtld (more later) + * about what it should use. xsavec is preferred to xsave. xsaveopt is not + * recommended due to the modified optimization not being appropriate for this + * use. + * + * Finally, there is one last gotcha with the xsave state. Importantly some AMD + * processors did not always save and restore some of the FPU exception state in + * some cases like Intel did. In those cases the OS will make up for this fact + * itself. + * + * FPU Initialization + * ------------------ + * + * One difference with the FPU registers is that not all threads have FPU state, + * only those that have an lwp. Generally this means kernel threads, which all + * share p0 and its lwp, do not have FPU state. Though there are definitely + * exceptions such as kcfpoold. In the rest of this discussion we'll use thread + * and lwp interchangeably, just think of thread meaning a thread that has a + * lwp. + * + * Each lwp has its FPU state allocated in its pcb (process control block). The + * actual storage comes from the fpsave_cachep kmem cache. This cache is sized + * dynamically at start up based on the save mechanism that we're using and the + * amount of memory required for it. This is dynamic because the xsave_state + * size varies based on the supported feature set. + * + * The hardware side of the FPU is initialized early in boot before we mount the + * root file system. This is effectively done in fpu_probe(). This is where we + * make the final decision about what the save and restore mechanisms we should + * use are, create the fpsave_cachep kmem cache, and initialize a number of + * function pointers that use save and restoring logic. + * + * The thread/lwp side is a a little more involved. There are two different + * things that we need to concern ourselves with. The first is how the FPU + * resources are allocated and the second is how the FPU state is initialized + * for a given lwp. + * + * We allocate the FPU save state from our kmem cache as part of lwp_fp_init(). + * This is always called unconditionally by the system as part of creating an + * LWP. + * + * There are three different initialization paths that we deal with. The first + * is when we are executing a new process. As part of exec all of the register + * state is reset. The exec case is particularly important because init is born + * like Athena, sprouting from the head of the kernel, without any true parent + * to fork from. The second is used whenever we fork or create a new lwp. The + * third is to deal with special lwps like the agent lwp. + * + * During exec, we will call fp_exec() which will initialize and set up the FPU + * state for the process. That will fill in the initial state for the FPU and + * also set that state in the FPU itself. As part of fp_exec() we also install a + * thread context operations vector that takes care of dealing with the saving + * and restoring of the FPU. These context handlers will also be called whenever + * an lwp is created or forked. In those cases, to initialize the FPU we will + * call fp_new_lwp(). Like fp_exec(), fp_new_lwp() will install a context + * operations vector for the new thread. + * + * Next we'll end up in the context operation fp_new_lwp(). This saves the + * current thread's state, initializes the new thread's state, and copies over + * the relevant parts of the originating thread's state. It's as this point that + * we also install the FPU context operations into the new thread, which ensures + * that all future threads that are descendants of the current one get the + * thread context operations (unless they call exec). + * + * To deal with some things like the agent lwp, we double check the state of the + * FPU in sys_rtt_common() to make sure that it has been enabled before + * returning to user land. In general, this path should be rare, but it's useful + * for the odd lwp here and there. + * + * The FPU state will remain valid most of the time. There are times that + * the state will be rewritten. For example in restorecontext, due to /proc, or + * the lwp calls exec(). Whether the context is being freed or we are resetting + * the state, we will call fp_free() to disable the FPU and our context. + * + * Finally, when the lwp is destroyed, it will actually destroy and free the FPU + * state by calling fp_lwp_cleanup(). + * + * Kernel FPU Multiplexing + * ----------------------- + * + * Just as the kernel has to maintain all of the general purpose registers when + * switching between scheduled threads, the same is true of the FPU registers. + * + * When a thread has FPU state, it also has a set of context operations + * installed. These context operations take care of making sure that the FPU is + * properly saved and restored during a context switch (fpsave_ctxt and + * fprestore_ctxt respectively). This means that the current implementation of + * the FPU is 'eager', when a thread is running the CPU will have its FPU state + * loaded. While this is always true when executing in userland, there are a few + * cases where this is not true in the kernel. + * + * This was not always the case. Traditionally on x86 a 'lazy' FPU restore was + * employed. This meant that the FPU would be saved on a context switch and the + * CR0.TS bit would be set. When a thread next tried to use the FPU, it would + * then take a #NM trap, at which point we would restore the FPU from the save + * area and return to user land. Given the frequency of use of the FPU alone by + * libc, there's no point returning to user land just to trap again. + * + * There are a few cases though where the FPU state may need to be changed for a + * thread on its behalf. The most notable cases are in the case of processes + * using /proc, restorecontext, forking, etc. In all of these cases the kernel + * will force a threads FPU state to be saved into the PCB through the fp_save() + * function. Whenever the FPU is saved, then the FPU_VALID flag is set on the + * pcb. This indicates that the save state holds currently valid data. As a side + * effect of this, CR0.TS will be set. To make sure that all of the state is + * updated before returning to user land, in these cases, we set a flag on the + * PCB that says the FPU needs to be updated. This will make sure that we take + * the slow path out of a system call to fix things up for the thread. Due to + * the fact that this is a rather rare case, effectively setting the equivalent + * of t_postsys is acceptable. + * + * CR0.TS will be set after a save occurs and cleared when a restore occurs. + * Generally this means it will be cleared immediately by the new thread that is + * running in a context switch. However, this isn't the case for kernel threads. + * They currently operate with CR0.TS set as no kernel state is restored for + * them. This means that using the FPU will cause a #NM and panic. + * + * The FPU_VALID flag on the currently executing thread's pcb is meant to track + * what the value of CR0.TS should be. If it is set, then CR0.TS will be set. + * However, because we eagerly restore, the only time that CR0.TS should be set + * for a non-kernel thread is during operations where it will be cleared before + * returning to user land and importantly, the only data that is in it is its + * own. + * + * FPU Exceptions + * -------------- + * + * Certain operations can cause the kernel to take traps due to FPU activity. + * Generally these events will cause a user process to receive a SIGFPU and if + * the kernel receives it in kernel context, we will die. Traditionally the #NM + * (Device Not Available / No Math) exception generated by CR0.TS would have + * caused us to restore the FPU. Now it is a fatal event regardless of whether + * or not user land causes it. + * + * While there are some cases where the kernel uses the FPU, it is up to the + * kernel to use the FPU in a way such that it cannot receive a trap or to use + * the appropriate trap protection mechanisms. + * + * Hypervisors + * ----------- + * + * When providing support for hypervisors things are a little bit more + * complicated because the FPU is not virtualized at all. This means that they + * need to save and restore the FPU and %xcr0 across entry and exit to the + * guest. To facilitate this, we provide a series of APIs in <sys/hma.h>. These + * allow us to use the full native state to make sure that we are always saving + * and restoring the full FPU that the host sees, even when the guest is using a + * subset. + * + * One tricky aspect of this is that the guest may be using a subset of %xcr0 + * and therefore changing our %xcr0 on the fly. It is vital that when we're + * saving and restoring the FPU that we always use the largest %xcr0 contents + * otherwise we will end up leaving behind data in it. + * + * ELF PLT Support + * --------------- + * + * rtld has to preserve a subset of the FPU when it is saving and restoring + * registers due to the amd64 SYS V ABI. See cmd/sgs/rtld/amd64/boot_elf.s for + * more information. As a result, we set up an aux vector that contains + * information about what save and restore mechanisms it should be using and + * the sizing thereof based on what the kernel supports. This is passed down in + * a series of aux vectors SUN_AT_FPTYPE and SUN_AT_FPSIZE. This information is + * initialized in fpu_subr.c. + */ + kmem_cache_t *fpsave_cachep; /* Legacy fxsave layout + xsave header + ymm */ #define AVX_XSAVE_SIZE (512 + 64 + 256) +/* + * Various sanity checks. + */ +CTASSERT(sizeof (struct fxsave_state) == 512); +CTASSERT(sizeof (struct fnsave_state) == 108); +CTASSERT((offsetof(struct fxsave_state, fx_xmm[0]) & 0xf) == 0); +CTASSERT(sizeof (struct xsave_state) >= AVX_XSAVE_SIZE); + /*CSTYLED*/ #pragma align 16 (sse_initial) @@ -150,20 +527,12 @@ const struct fnsave_state x87_initial = { /* rest of structure is zero */ }; -#if defined(__amd64) /* * This vector is patched to xsave_ctxt() if we discover we have an * XSAVE-capable chip in fpu_probe. */ void (*fpsave_ctxt)(void *) = fpxsave_ctxt; -#elif defined(__i386) -/* - * This vector is patched to fpxsave_ctxt() if we discover we have an - * SSE-capable chip in fpu_probe(). It is patched to xsave_ctxt - * if we discover we have an XSAVE-capable chip in fpu_probe. - */ -void (*fpsave_ctxt)(void *) = fpnsave_ctxt; -#endif +void (*fprestore_ctxt)(void *) = fpxrestore_ctxt; /* * This function pointer is changed to xsaveopt if the CPU is xsaveopt capable. @@ -187,9 +556,6 @@ fp_new_lwp(kthread_id_t t, kthread_id_t ct) struct fpu_ctx *fp; /* parent fpu context */ struct fpu_ctx *cfp; /* new fpu context */ struct fxsave_state *fx, *cfx; -#if defined(__i386) - struct fnsave_state *fn, *cfn; -#endif struct xsave_state *cxs; ASSERT(fp_kind != FP_NO); @@ -207,15 +573,13 @@ fp_new_lwp(kthread_id_t t, kthread_id_t ct) cfp->fpu_regs.kfpu_status = 0; cfp->fpu_regs.kfpu_xstatus = 0; + /* + * Make sure that the child's FPU is cleaned up and made ready for user + * land. + */ + PCB_SET_UPDATE_FPU(&ct->t_lwp->lwp_pcb); + switch (fp_save_mech) { -#if defined(__i386) - case FP_FNSAVE: - fn = fp->fpu_regs.kfpu_u.kfpu_fn; - cfn = cfp->fpu_regs.kfpu_u.kfpu_fn; - bcopy(&x87_initial, cfn, sizeof (*cfn)); - cfn->f_fcw = fn->f_fcw; - break; -#endif case FP_FXSAVE: fx = fp->fpu_regs.kfpu_u.kfpu_fx; cfx = cfp->fpu_regs.kfpu_u.kfpu_fx; @@ -244,14 +608,13 @@ fp_new_lwp(kthread_id_t t, kthread_id_t ct) /*NOTREACHED*/ } - installctx(ct, cfp, - fpsave_ctxt, NULL, fp_new_lwp, fp_new_lwp, NULL, fp_free); /* - * Now, when the new lwp starts running, it will take a trap - * that will be handled inline in the trap table to cause - * the appropriate f*rstor instruction to load the save area we - * constructed above directly into the hardware. + * Mark that both the parent and child need to have the FPU cleaned up + * before returning to user land. */ + + installctx(ct, cfp, fpsave_ctxt, fprestore_ctxt, fp_new_lwp, + fp_new_lwp, NULL, fp_free); } /* @@ -313,11 +676,6 @@ fp_save(struct fpu_ctx *fp) ASSERT(curthread->t_lwp && fp == &curthread->t_lwp->lwp_pcb.pcb_fpu); switch (fp_save_mech) { -#if defined(__i386) - case FP_FNSAVE: - fpsave(fp->fpu_regs.kfpu_u.kfpu_fn); - break; -#endif case FP_FXSAVE: fpxsave(fp->fpu_regs.kfpu_u.kfpu_fx); break; @@ -331,6 +689,18 @@ fp_save(struct fpu_ctx *fp) } fp->fpu_flags |= FPU_VALID; + + /* + * We save the FPU as part of forking, execing, modifications via /proc, + * restorecontext, etc. As such, we need to make sure that we return to + * userland with valid state in the FPU. If we're context switched out + * before we hit sys_rtt_common() we'll end up having restored the FPU + * as part of the context ops operations. The restore logic always makes + * sure that FPU_VALID is set before doing a restore so we don't restore + * it a second time. + */ + PCB_SET_UPDATE_FPU(&curthread->t_lwp->lwp_pcb); + kpreempt_enable(); } @@ -344,11 +714,6 @@ void fp_restore(struct fpu_ctx *fp) { switch (fp_save_mech) { -#if defined(__i386) - case FP_FNSAVE: - fprestore(fp->fpu_regs.kfpu_u.kfpu_fn); - break; -#endif case FP_FXSAVE: fpxrestore(fp->fpu_regs.kfpu_u.kfpu_fx); break; @@ -364,6 +729,33 @@ fp_restore(struct fpu_ctx *fp) fp->fpu_flags &= ~FPU_VALID; } +/* + * Reset the FPU such that it is in a valid state for a new thread that is + * coming out of exec. The FPU will be in a usable state at this point. At this + * point we know that the FPU state has already been allocated and if this + * wasn't an init process, then it will have had fp_free() previously called. + */ +void +fp_exec(void) +{ + struct fpu_ctx *fp = &ttolwp(curthread)->lwp_pcb.pcb_fpu; + + if (fp_save_mech == FP_XSAVE) { + fp->fpu_xsave_mask = XFEATURE_FP_ALL; + } + + /* + * Make sure that we're not preempted in the middle of initializing the + * FPU on CPU. + */ + kpreempt_disable(); + installctx(curthread, fp, fpsave_ctxt, fprestore_ctxt, fp_new_lwp, + fp_new_lwp, NULL, fp_free); + fpinit(); + fp->fpu_flags = FPU_EN; + kpreempt_enable(); +} + /* * Seeds the initial state for the current thread. The possibilities are: @@ -371,7 +763,7 @@ fp_restore(struct fpu_ctx *fp) * initialization: Load the FPU state from the LWP state. * 2. The FPU state has not been externally modified: Load a clean state. */ -static void +void fp_seed(void) { struct fpu_ctx *fp = &ttolwp(curthread)->lwp_pcb.pcb_fpu; @@ -386,8 +778,8 @@ fp_seed(void) fp->fpu_xsave_mask = XFEATURE_FP_ALL; } - installctx(curthread, fp, - fpsave_ctxt, NULL, fp_new_lwp, fp_new_lwp, NULL, fp_free); + installctx(curthread, fp, fpsave_ctxt, fprestore_ctxt, fp_new_lwp, + fp_new_lwp, NULL, fp_free); fpinit(); /* @@ -452,11 +844,6 @@ fp_lwp_dup(struct _klwp *lwp) size_t sz; switch (fp_save_mech) { -#if defined(__i386) - case FP_FNSAVE: - sz = sizeof (struct fnsave_state); - break; -#endif case FP_FXSAVE: sz = sizeof (struct fxsave_state); break; @@ -474,119 +861,6 @@ fp_lwp_dup(struct _klwp *lwp) lwp->lwp_pcb.pcb_fpu.fpu_regs.kfpu_u.kfpu_generic = xp; } - -/* - * This routine is called from trap() when User thread takes No Extension - * Fault. The possiblities are: - * 1. User thread has executed a FP instruction for the first time. - * Save current FPU context if any. Initialize FPU, setup FPU - * context for the thread and enable FP hw. - * 2. Thread's pcb has a valid FPU state: Restore the FPU state and - * enable FP hw. - * - * Note that case #2 is inlined in the trap table. - */ -int -fpnoextflt(struct regs *rp) -{ - struct fpu_ctx *fp = &ttolwp(curthread)->lwp_pcb.pcb_fpu; - -#if !defined(__lint) - ASSERT(sizeof (struct fxsave_state) == 512 && - sizeof (struct fnsave_state) == 108); - ASSERT((offsetof(struct fxsave_state, fx_xmm[0]) & 0xf) == 0); - - ASSERT(sizeof (struct xsave_state) >= AVX_XSAVE_SIZE); - -#if defined(__i386) - ASSERT(sizeof (struct _fpu) == sizeof (struct __old_fpu)); -#endif /* __i386 */ -#endif /* !__lint */ - - kpreempt_disable(); - /* - * Now we can enable the interrupts. - * (NOTE: fp-no-coprocessor comes thru interrupt gate) - */ - sti(); - - if (!fpu_exists) { /* check for FPU hw exists */ - if (fp_kind == FP_NO) { - uint32_t inst; - - /* - * When the system has no floating point support, - * i.e. no FP hardware and no emulator, skip the - * two kinds of FP instruction that occur in - * fpstart. Allows processes that do no real FP - * to run normally. - */ - if (fuword32((void *)rp->r_pc, &inst) != -1 && - ((inst & 0xFFFF) == 0x7dd9 || - (inst & 0xFFFF) == 0x6dd9)) { - rp->r_pc += 3; - kpreempt_enable(); - return (0); - } - } - - /* - * If we have neither a processor extension nor - * an emulator, kill the process OR panic the kernel. - */ - kpreempt_enable(); - return (1); /* error */ - } - -#if !defined(__xpv) /* XXPV Is this ifdef needed now? */ - /* - * A paranoid cross-check: for the SSE case, ensure that %cr4 is - * configured to enable fully fledged (%xmm) fxsave/fxrestor on - * this CPU. For the non-SSE case, ensure that it isn't. - */ - ASSERT(((fp_kind & __FP_SSE) && - (getcr4() & CR4_OSFXSR) == CR4_OSFXSR) || - (!(fp_kind & __FP_SSE) && - (getcr4() & (CR4_OSXMMEXCPT|CR4_OSFXSR)) == 0)); -#endif - - if (fp->fpu_flags & FPU_EN) { - /* case 2 */ - fp_restore(fp); - } else { - /* case 1 */ - fp_seed(); - } - kpreempt_enable(); - return (0); -} - - -/* - * Handle a processor extension overrun fault - * Returns non zero for error. - * - * XXX Shouldn't this just be abolished given that we're not supporting - * anything prior to Pentium? - */ - -/* ARGSUSED */ -int -fpextovrflt(struct regs *rp) -{ -#if !defined(__xpv) /* XXPV Do we need this ifdef either */ - ulong_t cur_cr0; - - ASSERT(fp_kind != FP_NO); - - cur_cr0 = getcr0(); - fpinit(); /* initialize the FPU hardware */ - setcr0(cur_cr0); -#endif - sti(); - return (1); /* error, send SIGSEGV signal to the thread */ -} - /* * Handle a processor extension error fault * Returns non zero for error. @@ -622,14 +896,6 @@ fpexterrflt(struct regs *rp) /* clear exception flags in saved state, as if by fnclex */ switch (fp_save_mech) { -#if defined(__i386) - case FP_FNSAVE: - fpsw = fp->fpu_regs.kfpu_u.kfpu_fn->f_fsw; - fpcw = fp->fpu_regs.kfpu_u.kfpu_fn->f_fcw; - fp->fpu_regs.kfpu_u.kfpu_fn->f_fsw &= ~FPS_SW_EFLAGS; - break; -#endif - case FP_FXSAVE: fpsw = fp->fpu_regs.kfpu_u.kfpu_fx->fx_fsw; fpcw = fp->fpu_regs.kfpu_u.kfpu_fx->fx_fcw; @@ -811,11 +1077,6 @@ fpsetcw(uint16_t fcw, uint32_t mxcsr) fp_save(fp); switch (fp_save_mech) { -#if defined(__i386) - case FP_FNSAVE: - fp->fpu_regs.kfpu_u.kfpu_fn->f_fcw = fcw; - break; -#endif case FP_FXSAVE: fx = fp->fpu_regs.kfpu_u.kfpu_fx; fx->fx_fcw = fcw; diff --git a/usr/src/uts/intel/ia32/os/sundep.c b/usr/src/uts/intel/ia32/os/sundep.c index 3911d6ebaa..cfb4552287 100644 --- a/usr/src/uts/intel/ia32/os/sundep.c +++ b/usr/src/uts/intel/ia32/os/sundep.c @@ -20,7 +20,7 @@ */ /* * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright 2017 Joyent, Inc. + * Copyright 2018 Joyent, Inc. */ /* Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. */ @@ -393,12 +393,12 @@ lwp_forkregs(klwp_t *lwp, klwp_t *clwp) struct pcb *pcb = &clwp->lwp_pcb; struct regs *rp = lwptoregs(lwp); - if (pcb->pcb_rupdate == 0) { + if (!PCB_NEED_UPDATE_SEGS(pcb)) { pcb->pcb_ds = rp->r_ds; pcb->pcb_es = rp->r_es; pcb->pcb_fs = rp->r_fs; pcb->pcb_gs = rp->r_gs; - pcb->pcb_rupdate = 1; + PCB_SET_UPDATE_SEGS(pcb); lwptot(clwp)->t_post_sys = 1; } ASSERT(lwptot(clwp)->t_post_sys); @@ -436,22 +436,22 @@ lwp_pcb_exit(void) * as a segment-not-present trap. * * Here we save the current values from the lwp regs into the pcb - * and set pcb->pcb_rupdate to 1 to tell the rest of the kernel - * that the pcb copy of the segment registers is the current one. - * This ensures the lwp's next trip to user land via update_sregs. - * Finally we set t_post_sys to ensure that no system call fast-path's - * its way out of the kernel via sysret. + * and or PCB_UPDATE_SEGS (1) in pcb->pcb_rupdate to tell the rest + * of the kernel that the pcb copy of the segment registers is the + * current one. This ensures the lwp's next trip to user land via + * update_sregs. Finally we set t_post_sys to ensure that no + * system call fast-path's its way out of the kernel via sysret. * - * (This means that we need to have interrupts disabled when we test - * t->t_post_sys in the syscall handlers; if the test fails, we need - * to keep interrupts disabled until we return to userland so we can't - * be switched away.) + * (This means that we need to have interrupts disabled when we + * test t->t_post_sys in the syscall handlers; if the test fails, + * we need to keep interrupts disabled until we return to userland + * so we can't be switched away.) * - * As a result of all this, we don't really have to do a whole lot if - * the thread is just mucking about in the kernel, switching on and - * off the cpu for whatever reason it feels like. And yet we still - * preserve fast syscalls, cause if we -don't- get descheduled, - * we never come here either. + * As a result of all this, we don't really have to do a whole lot + * if the thread is just mucking about in the kernel, switching on + * and off the cpu for whatever reason it feels like. And yet we + * still preserve fast syscalls, cause if we -don't- get + * descheduled, we never come here either. */ #define VALID_LWP_DESC(udp) ((udp)->usd_type == SDT_MEMRWA && \ @@ -468,7 +468,7 @@ lwp_segregs_save(klwp_t *lwp) ASSERT(VALID_LWP_DESC(&pcb->pcb_fsdesc)); ASSERT(VALID_LWP_DESC(&pcb->pcb_gsdesc)); - if (pcb->pcb_rupdate == 0) { + if (!PCB_NEED_UPDATE_SEGS(pcb)) { rp = lwptoregs(lwp); /* @@ -482,7 +482,7 @@ lwp_segregs_save(klwp_t *lwp) pcb->pcb_es = rp->r_es; pcb->pcb_fs = rp->r_fs; pcb->pcb_gs = rp->r_gs; - pcb->pcb_rupdate = 1; + PCB_SET_UPDATE_SEGS(pcb); lwp->lwp_thread->t_post_sys = 1; } #endif /* __amd64 */ @@ -833,7 +833,8 @@ lwp_installctx(klwp_t *lwp) * On the amd64 kernel, the context handlers are responsible for * virtualizing %ds, %es, %fs, and %gs to the lwp. The register * values are only ever changed via sys_rtt when the - * pcb->pcb_rupdate == 1. Only sys_rtt gets to clear the bit. + * PCB_UPDATE_SEGS bit (1) is set in pcb->pcb_rupdate. Only + * sys_rtt gets to clear the bit. * * On the i386 kernel, the context handlers are responsible for * virtualizing %gs/%fs to the lwp by updating the per-cpu GDTs @@ -964,7 +965,7 @@ setregs(uarg_t *args) pcb->pcb_ds = rp->r_ds; pcb->pcb_es = rp->r_es; - pcb->pcb_rupdate = 1; + PCB_SET_UPDATE_SEGS(pcb); #elif defined(__i386) @@ -991,17 +992,15 @@ setregs(uarg_t *args) t->t_post_sys = 1; /* - * Here we initialize minimal fpu state. - * The rest is done at the first floating - * point instruction that a process executes. - */ - pcb->pcb_fpu.fpu_flags = 0; - - /* * Add the lwp context handlers that virtualize segment registers, * and/or system call stacks etc. */ lwp_installctx(lwp); + + /* + * Reset the FPU flags and then initialize the FPU for this lwp. + */ + fp_exec(); } user_desc_t * diff --git a/usr/src/uts/intel/ia32/os/sysi86.c b/usr/src/uts/intel/ia32/os/sysi86.c index 4573d62fad..bdb66e3e1f 100644 --- a/usr/src/uts/intel/ia32/os/sysi86.c +++ b/usr/src/uts/intel/ia32/os/sysi86.c @@ -624,7 +624,7 @@ setdscr(struct ssd *ssd) } #if defined(__amd64) - if (pcb->pcb_rupdate == 1) { + if (PCB_NEED_UPDATE_SEGS(pcb)) { if (ssd->sel == pcb->pcb_ds || ssd->sel == pcb->pcb_es || ssd->sel == pcb->pcb_fs || diff --git a/usr/src/uts/intel/ia32/syscall/lwp_private.c b/usr/src/uts/intel/ia32/syscall/lwp_private.c index 79e9076ee0..479a800d9a 100644 --- a/usr/src/uts/intel/ia32/syscall/lwp_private.c +++ b/usr/src/uts/intel/ia32/syscall/lwp_private.c @@ -21,10 +21,9 @@ /* * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * Copyright (c) 2018, Joyent, Inc. */ -#pragma ident "%Z%%M% %I% %E% SMI" - #include <sys/param.h> #include <sys/types.h> #include <sys/disp.h> @@ -72,12 +71,12 @@ lwp_setprivate(klwp_t *lwp, int which, uintptr_t base) * of zero for %fs and %gs to use the 64-bit fs_base and gs_base * respectively. */ - if (pcb->pcb_rupdate == 0) { + if (!PCB_NEED_UPDATE_SEGS(pcb)) { pcb->pcb_ds = rp->r_ds; pcb->pcb_es = rp->r_es; pcb->pcb_fs = rp->r_fs; pcb->pcb_gs = rp->r_gs; - pcb->pcb_rupdate = 1; + PCB_SET_UPDATE_SEGS(pcb); t->t_post_sys = 1; } ASSERT(t->t_post_sys); @@ -171,7 +170,7 @@ lwp_getprivate(klwp_t *lwp, int which, uintptr_t base) case _LWP_FSBASE: if ((sbase = pcb->pcb_fsbase) != 0) { if (lwp_getdatamodel(lwp) == DATAMODEL_NATIVE) { - if (pcb->pcb_rupdate == 1) { + if (PCB_NEED_UPDATE_SEGS(pcb)) { if (pcb->pcb_fs == 0) break; } else { @@ -179,7 +178,7 @@ lwp_getprivate(klwp_t *lwp, int which, uintptr_t base) break; } } else { - if (pcb->pcb_rupdate == 1) { + if (PCB_NEED_UPDATE_SEGS(pcb)) { if (pcb->pcb_fs == LWPFS_SEL) break; } else { @@ -193,7 +192,7 @@ lwp_getprivate(klwp_t *lwp, int which, uintptr_t base) case _LWP_GSBASE: if ((sbase = pcb->pcb_gsbase) != 0) { if (lwp_getdatamodel(lwp) == DATAMODEL_NATIVE) { - if (pcb->pcb_rupdate == 1) { + if (PCB_NEED_UPDATE_SEGS(pcb)) { if (pcb->pcb_gs == 0) break; } else { @@ -201,7 +200,7 @@ lwp_getprivate(klwp_t *lwp, int which, uintptr_t base) break; } } else { - if (pcb->pcb_rupdate == 1) { + if (PCB_NEED_UPDATE_SEGS(pcb)) { if (pcb->pcb_gs == LWPGS_SEL) break; } else { diff --git a/usr/src/uts/intel/sys/archsystm.h b/usr/src/uts/intel/sys/archsystm.h index 8da14c1a75..93fed4e87d 100644 --- a/usr/src/uts/intel/sys/archsystm.h +++ b/usr/src/uts/intel/sys/archsystm.h @@ -55,11 +55,8 @@ extern void mfence_insn(void); extern uint16_t getgs(void); extern void setgs(uint16_t); -extern void patch_sse(void); -extern void patch_sse2(void); #endif -extern void patch_xsave(void); extern kmem_cache_t *fpsave_cachep; extern void cli(void); diff --git a/usr/src/uts/intel/sys/fp.h b/usr/src/uts/intel/sys/fp.h index fe5471e855..9841fe1c3b 100644 --- a/usr/src/uts/intel/sys/fp.h +++ b/usr/src/uts/intel/sys/fp.h @@ -236,13 +236,14 @@ struct fxsave_state { * 13.4.2 of the Intel 64 and IA-32 Architectures Software Developer’s Manual, * Volume 1 (IASDv1). The extended portion is documented in section 13.4.3. * - * Our size is at least AVX_XSAVE_SIZE (832 bytes), asserted in fpnoextflt(). - * Enabling additional xsave-related CPU features requires an increase in the - * size. We dynamically allocate the per-lwp xsave area at runtime, based on - * the size needed for the CPU-specific features. This xsave_state structure - * simply defines our historical layout for the beginning of the xsave area. The - * locations and size of new, extended, components is determined dynamically by - * querying the CPU. See the xsave_info structure in cpuid.c. + * Our size is at least AVX_XSAVE_SIZE (832 bytes), which is asserted + * statically. Enabling additional xsave-related CPU features requires an + * increase in the size. We dynamically allocate the per-lwp xsave area at + * runtime, based on the size needed for the CPU-specific features. This + * xsave_state structure simply defines our historical layout for the beginning + * of the xsave area. The locations and size of new, extended, components is + * determined dynamically by querying the CPU. See the xsave_info structure in + * cpuid.c. * * xsave component usage is tracked using bits in the xs_xstate_bv field. The * components are documented in section 13.1 of IASDv1. For easy reference, @@ -301,7 +302,6 @@ extern uint32_t sse_mxcsr_mask; extern void fpu_probe(void); extern uint_t fpu_initial_probe(void); -extern int fpu_probe_pentium_fdivbug(void); extern void fpu_auxv_info(int *, size_t *); @@ -315,6 +315,10 @@ extern void xsaveopt_excp_clr_ctxt(void *); extern void (*fpsave_ctxt)(void *); extern void (*xsavep)(struct xsave_state *, uint64_t); +extern void fpxrestore_ctxt(void *); +extern void xrestore_ctxt(void *); +extern void (*fprestore_ctxt)(void *); + extern void fxsave_insn(struct fxsave_state *); extern void fpsave(struct fnsave_state *); extern void fprestore(struct fnsave_state *); @@ -335,11 +339,11 @@ extern uint32_t fpgetcwsw(void); extern uint32_t fpgetmxcsr(void); struct regs; -extern int fpnoextflt(struct regs *); -extern int fpextovrflt(struct regs *); extern int fpexterrflt(struct regs *); extern int fpsimderrflt(struct regs *); extern void fpsetcw(uint16_t, uint32_t); +extern void fp_seed(void); +extern void fp_exec(void); struct _klwp; extern void fp_lwp_init(struct _klwp *); extern void fp_lwp_cleanup(struct _klwp *); diff --git a/usr/src/uts/intel/sys/pcb.h b/usr/src/uts/intel/sys/pcb.h index defd116eba..e7e2e2cdce 100644 --- a/usr/src/uts/intel/sys/pcb.h +++ b/usr/src/uts/intel/sys/pcb.h @@ -21,6 +21,7 @@ /* * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2018, Joyent, Inc. */ #ifndef _SYS_PCB_H @@ -51,7 +52,6 @@ typedef struct pcb { uint_t pcb_flags; /* state flags; cleared on fork */ greg_t pcb_drstat; /* status debug register (%dr6) */ unsigned char pcb_instr; /* /proc: instruction at stop */ -#if defined(__amd64) unsigned char pcb_rupdate; /* new register values in pcb -> regs */ uintptr_t pcb_fsbase; uintptr_t pcb_gsbase; @@ -59,7 +59,6 @@ typedef struct pcb { selector_t pcb_es; selector_t pcb_fs; selector_t pcb_gs; -#endif /* __amd64 */ user_desc_t pcb_fsdesc; /* private per-lwp %fs descriptors */ user_desc_t pcb_gsdesc; /* private per-lwp %gs descriptors */ } pcb_t; @@ -77,6 +76,21 @@ typedef struct pcb { #define REQUEST_NOSTEP 0x200 /* request pending to disable single-step */ #define ASYNC_HWERR 0x400 /* hardware error has corrupted context */ +/* pcb_rupdate values */ +#define PCB_UPDATE_SEGS 0x01 /* Update segment registers */ +#define PCB_UPDATE_FPU 0x02 /* Update FPU registers */ + +#define PCB_SET_UPDATE_SEGS(pcb) ((pcb)->pcb_rupdate |= PCB_UPDATE_SEGS) +#define PCB_SET_UPDATE_FPU(pcb) ((pcb)->pcb_rupdate |= PCB_UPDATE_FPU) +#define PCB_NEED_UPDATE_SEGS(pcb) \ + (((pcb)->pcb_rupdate & PCB_UPDATE_SEGS) != 0) +#define PCB_NEED_UPDATE_FPU(pcb) \ + (((pcb)->pcb_rupdate & PCB_UPDATE_FPU) != 0) +#define PCB_NEED_UPDATE(pcb) \ + (PCB_NEED_UPDATE_FPU(pcb) || PCB_NEED_UPDATE_SEGS(pcb)) +#define PCB_CLEAR_UPDATE_SEGS(pcb) ((pcb)->pcb_rupdate &= ~PCB_UPDATE_SEGS) +#define PCB_CLEAR_UPDATE_FPU(pcb) ((pcb)->pcb_rupdate &= ~PCB_UPDATE_FPU) + /* fpu_flags */ #define FPU_EN 0x1 /* flag signifying fpu in use */ #define FPU_VALID 0x2 /* fpu_regs has valid fpu state */ |