summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRobert Mustacchi <rm@joyent.com>2018-06-12 21:19:02 +0000
committerRobert Mustacchi <rm@joyent.com>2018-06-13 22:24:41 +0000
commitf48e1a4f1d835afe9a2607058270907434313e19 (patch)
tree6a8ac10bccd61f3ec0bf6bfba1be4bbe31a2fbaf
parent7dd56b107bd476ae57c659b6b9eb5ddd7bb26a2f (diff)
downloadillumos-joyent-f48e1a4f1d835afe9a2607058270907434313e19.tar.gz
OS-7000 Need Eager FPU
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com> Reviewed by: Patrick Mooney <patrick.mooney@joyent.com> Reviewed by: Bryan Cantrill <bryan@joyent.com> Reviewed by: John Levon <john.levon@joyent.com> Approved by: Bryan Cantrill <bryan@joyent.com>
-rw-r--r--usr/src/cmd/mdb/intel/modules/genunix/gcore_isadep.c3
-rw-r--r--usr/src/uts/common/brand/lx/syscall/lx_thread_area.c6
-rw-r--r--usr/src/uts/common/brand/solaris10/s10_brand.c4
-rw-r--r--usr/src/uts/i86pc/io/vmm/vmm.c9
-rw-r--r--usr/src/uts/i86pc/ml/kpti_trampolines.s28
-rw-r--r--usr/src/uts/i86pc/ml/locore.s107
-rw-r--r--usr/src/uts/i86pc/ml/syscall_asm_amd64.s40
-rw-r--r--usr/src/uts/i86pc/os/fpu_subr.c5
-rw-r--r--usr/src/uts/i86pc/os/intr.c33
-rw-r--r--usr/src/uts/i86pc/os/trap.c57
-rw-r--r--usr/src/uts/intel/brand/lx/lx_archdep.c18
-rw-r--r--usr/src/uts/intel/ia32/ml/exception.s228
-rw-r--r--usr/src/uts/intel/ia32/ml/float.s516
-rw-r--r--usr/src/uts/intel/ia32/os/archdep.c9
-rw-r--r--usr/src/uts/intel/ia32/os/fpu.c601
-rw-r--r--usr/src/uts/intel/ia32/os/sundep.c55
-rw-r--r--usr/src/uts/intel/ia32/os/sysi86.c2
-rw-r--r--usr/src/uts/intel/ia32/syscall/lwp_private.c15
-rw-r--r--usr/src/uts/intel/sys/archsystm.h3
-rw-r--r--usr/src/uts/intel/sys/fp.h24
-rw-r--r--usr/src/uts/intel/sys/pcb.h18
21 files changed, 688 insertions, 1093 deletions
diff --git a/usr/src/cmd/mdb/intel/modules/genunix/gcore_isadep.c b/usr/src/cmd/mdb/intel/modules/genunix/gcore_isadep.c
index 73d5ecbb94..d4837bd475 100644
--- a/usr/src/cmd/mdb/intel/modules/genunix/gcore_isadep.c
+++ b/usr/src/cmd/mdb/intel/modules/genunix/gcore_isadep.c
@@ -10,6 +10,7 @@
*/
/*
* Copyright (c) 2013 by Delphix. All rights reserved.
+ * Copyright (c) 2018, Joyent, Inc.
*/
#include <mdb/mdb_modapi.h>
@@ -71,7 +72,7 @@ gcore_getgregs(mdb_klwp_t *lwp, gregset_t grp)
grp[REG_R15] = rp->r_r15;
grp[REG_FSBASE] = pcb->pcb_fsbase;
grp[REG_GSBASE] = pcb->pcb_gsbase;
- if (pcb->pcb_rupdate == 1) {
+ if (PCB_NEED_UPDATE_SEGS(pcb)) {
grp[REG_DS] = pcb->pcb_ds;
grp[REG_ES] = pcb->pcb_es;
grp[REG_FS] = pcb->pcb_fs;
diff --git a/usr/src/uts/common/brand/lx/syscall/lx_thread_area.c b/usr/src/uts/common/brand/lx/syscall/lx_thread_area.c
index 98b1e44780..a84c17e139 100644
--- a/usr/src/uts/common/brand/lx/syscall/lx_thread_area.c
+++ b/usr/src/uts/common/brand/lx/syscall/lx_thread_area.c
@@ -21,7 +21,7 @@
/*
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
- * Copyright 2016 Joyent, Inc.
+ * Copyright 2018 Joyent, Inc.
*/
#include <sys/types.h>
@@ -65,7 +65,7 @@ lx_arch_prctl(int code, ulong_t addr)
/*
* Ensure we go out via update_sregs.
*/
- pcb->pcb_rupdate = 1;
+ PCB_SET_UPDATE_SEGS(pcb);
}
kpreempt_enable();
break;
@@ -87,7 +87,7 @@ lx_arch_prctl(int code, ulong_t addr)
/*
* Ensure we go out via update_sregs.
*/
- pcb->pcb_rupdate = 1;
+ PCB_SET_UPDATE_SEGS(pcb);
}
kpreempt_enable();
break;
diff --git a/usr/src/uts/common/brand/solaris10/s10_brand.c b/usr/src/uts/common/brand/solaris10/s10_brand.c
index 9da0855cc3..c5a9d10f58 100644
--- a/usr/src/uts/common/brand/solaris10/s10_brand.c
+++ b/usr/src/uts/common/brand/solaris10/s10_brand.c
@@ -22,7 +22,7 @@
/*
* Copyright (c) 2013, OmniTI Computer Consulting, Inc. All rights reserved.
* Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2017, Joyent, Inc.
+ * Copyright 2018, Joyent, Inc.
*/
#include <sys/errno.h>
@@ -229,7 +229,7 @@ s10_amd64_correct_fsreg(klwp_t *l)
if (lwp_getdatamodel(l) == DATAMODEL_NATIVE) {
kpreempt_disable();
l->lwp_pcb.pcb_fs = LWPFS_SEL;
- l->lwp_pcb.pcb_rupdate = 1;
+ PCB_SET_UPDATE_SEGS(&l->lwp_pcb);
lwptot(l)->t_post_sys = 1; /* Guarantee update_sregs() */
kpreempt_enable();
}
diff --git a/usr/src/uts/i86pc/io/vmm/vmm.c b/usr/src/uts/i86pc/io/vmm/vmm.c
index 4367be0df1..13a23dfc85 100644
--- a/usr/src/uts/i86pc/io/vmm/vmm.c
+++ b/usr/src/uts/i86pc/io/vmm/vmm.c
@@ -1282,7 +1282,14 @@ save_guest_fpustate(struct vcpu *vcpu)
/* save guest FPU state */
fpu_stop_emulating();
fpusave(vcpu->guestfpu);
+#ifdef __FreeBSD__
fpu_start_emulating();
+#else
+ /*
+ * When the host state has been restored, we should not re-enable
+ * CR0.TS on illumos for eager FPU.
+ */
+#endif
}
static VMM_STAT(VCPU_IDLE_TICKS, "number of ticks vcpu was idle");
@@ -2015,7 +2022,7 @@ restart:
set_pcb_flags(pcb, PCB_FULL_IRET);
#else
/* Force a trip through update_sregs to reload %fs/%gs and friends */
- ttolwp(curthread)->lwp_pcb.pcb_rupdate = 1;
+ PCB_SET_UPDATE_SEGS(&ttolwp(curthread)->lwp_pcb);
#endif
#ifdef __FreeBSD__
diff --git a/usr/src/uts/i86pc/ml/kpti_trampolines.s b/usr/src/uts/i86pc/ml/kpti_trampolines.s
index f8486c7403..e2e32bf092 100644
--- a/usr/src/uts/i86pc/ml/kpti_trampolines.s
+++ b/usr/src/uts/i86pc/ml/kpti_trampolines.s
@@ -137,6 +137,12 @@
DGDEF3(kpti_enable, 8, 8)
.fill 1, 8, 1
+#if DEBUG
+ .data
+_bad_ts_panic_msg:
+ .string "kpti_trampolines.s: tr_iret_user but CR0.TS set"
+#endif
+
.section ".text";
.align MMU_PAGESIZE
@@ -523,6 +529,28 @@ tr_intr_ret_start:
SET_SIZE(tr_iret_kernel)
ENTRY_NP(tr_iret_user)
+#if DEBUG
+ /*
+ * Ensure that we return to user land with CR0.TS clear. We do this
+ * before we trampoline back and pivot the stack and %cr3. This way
+ * we're still on the kernel stack and kernel %cr3, though we are on the
+ * user GSBASE.
+ */
+ pushq %rax
+ mov %cr0, %rax
+ testq $CR0_TS, %rax
+ jz 1f
+ swapgs
+ popq %rax
+ leaq _bad_ts_panic_msg(%rip), %rdi
+ xorl %eax, %eax
+ pushq %rbp
+ movq %rsp, %rbp
+ call panic
+1:
+ popq %rax
+#endif
+
cmpq $1, kpti_enable
jne 1f
diff --git a/usr/src/uts/i86pc/ml/locore.s b/usr/src/uts/i86pc/ml/locore.s
index 4626dd1492..acd96e271a 100644
--- a/usr/src/uts/i86pc/ml/locore.s
+++ b/usr/src/uts/i86pc/ml/locore.s
@@ -239,6 +239,11 @@ __return_from_main:
__unsupported_cpu:
.string "486 style cpu detected - no longer supported!"
+#if defined(DEBUG)
+_no_pending_updates:
+ .string "locore.s:%d lwp_rtt(lwp %p) but pcb_rupdate != 1"
+#endif
+
#endif /* !__lint */
#if !defined(__amd64)
@@ -1505,8 +1510,6 @@ _sys_rtt(void)
#else /* __lint */
-#if defined(__amd64)
-
ENTRY_NP(lwp_rtt_initial)
movq %gs:CPU_THREAD, %r15
movq T_STACK(%r15), %rsp /* switch to the thread stack */
@@ -1549,8 +1552,6 @@ _lwp_rtt:
movq %r14, %rdx
xorl %eax, %eax
call panic
-_no_pending_updates:
- .string "locore.s:%d lwp_rtt(lwp %p) but pcb_rupdate != 1"
1:
#endif
@@ -1571,11 +1572,6 @@ _no_pending_updates:
call post_syscall /* post_syscall(rval1, rval2) */
/*
- * set up to take fault on first use of fp
- */
- STTS(%rdi)
-
- /*
* XXX - may want a fast path that avoids sys_rtt_common in the
* most common case.
*/
@@ -1636,99 +1632,6 @@ _sys_rtt_end:
SET_SIZE(sys_rtt_syscall)
SET_SIZE(sys_rtt_syscall32)
-#elif defined(__i386)
-
- ENTRY_NP(lwp_rtt_initial)
- movl %gs:CPU_THREAD, %eax
- movl T_STACK(%eax), %esp /* switch to the thread stack */
- movl %esp, %ebp
- call __dtrace_probe___proc_start
- jmp _lwp_rtt
-
- ENTRY_NP(lwp_rtt)
- movl %gs:CPU_THREAD, %eax
- movl T_STACK(%eax), %esp /* switch to the thread stack */
- movl %esp, %ebp
-_lwp_rtt:
- call __dtrace_probe___proc_lwp__start
-
- /*
- * If agent lwp, clear %fs and %gs.
- */
- movl %gs:CPU_LWP, %eax
- movl LWP_PROCP(%eax), %edx
-
- cmpl %eax, P_AGENTTP(%edx)
- jne 1f
- movl $0, REGOFF_FS(%esp)
- movl $0, REGOFF_GS(%esp)
-1:
- call dtrace_systrace_rtt
- movl REGOFF_EDX(%esp), %edx
- movl REGOFF_EAX(%esp), %eax
- pushl %edx
- pushl %eax
- call post_syscall /* post_syscall(rval1, rval2) */
- addl $8, %esp
-
- /*
- * set up to take fault on first use of fp
- */
- STTS(%eax)
-
- /*
- * XXX - may want a fast path that avoids sys_rtt_common in the
- * most common case.
- */
- ALTENTRY(_sys_rtt)
- CLI(%eax) /* disable interrupts */
- ALTENTRY(_sys_rtt_ints_disabled)
- pushl %esp /* pass rp to sys_rtt_common */
- call sys_rtt_common
- addl $4, %esp /* pop arg */
- testl %eax, %eax /* test for return to user mode */
- jz sr_sup
-
- /*
- * Return to User.
- */
- ALTENTRY(sys_rtt_syscall)
- INTR_POP_USER
-
- /*
- * There can be no instructions between this label and IRET or
- * we could end up breaking linux brand support. See label usage
- * in lx_brand_int80_callback for an example.
- */
- ALTENTRY(nopop_sys_rtt_syscall)
- IRET
- /*NOTREACHED*/
- SET_SIZE(nopop_sys_rtt_syscall)
-
- ALTENTRY(_sys_rtt_end)
-
- /*
- * Return to supervisor
- */
- ALTENTRY(sr_sup)
-
- /*
- * Restore regs before doing iret to kernel mode
- */
- INTR_POP_KERNEL
- IRET
- /*NOTREACHED*/
-
- SET_SIZE(sr_sup)
- SET_SIZE(_sys_rtt_end)
- SET_SIZE(lwp_rtt)
- SET_SIZE(lwp_rtt_initial)
- SET_SIZE(_sys_rtt_ints_disabled)
- SET_SIZE(_sys_rtt)
- SET_SIZE(sys_rtt_syscall)
-
-#endif /* __i386 */
-
#endif /* __lint */
#if defined(__lint)
diff --git a/usr/src/uts/i86pc/ml/syscall_asm_amd64.s b/usr/src/uts/i86pc/ml/syscall_asm_amd64.s
index 98f8c8f8da..9727110109 100644
--- a/usr/src/uts/i86pc/ml/syscall_asm_amd64.s
+++ b/usr/src/uts/i86pc/ml/syscall_asm_amd64.s
@@ -271,7 +271,18 @@
* between entering privileged mode and performing the assertion,
* otherwise we may perform a context switch on the thread, which
* will end up setting pcb_rupdate to 1 again.
+ *
+ * ASSERT(%cr0 & CR0_TS == 0);
+ * Preconditions:
+ * (%rsp is ready for normal call sequence)
+ * Postconditions (if assertion is true):
+ * (specified register is clobbered)
+ *
+ * Check to make sure that we are returning to user land and that CR0.TS
+ * is not set. This is required as part of the eager FPU (see
+ * uts/intel/ia32/os/fpu.c for more information).
*/
+
#if defined(DEBUG)
#if !defined(__lint)
@@ -285,6 +296,9 @@ __codesel_msg:
__no_rupdate_msg:
.string "syscall_asm_amd64.s:%d lwp %p, pcb_rupdate != 0"
+__bad_ts_msg:
+ .string "sysscall_asm_amd64.s:%d CR0.TS set on user return"
+
#endif /* !__lint */
#define ASSERT_LWPTOREGS(lwp, rp) \
@@ -310,9 +324,20 @@ __no_rupdate_msg:
call panic; \
8:
+#define ASSERT_CR0TS_ZERO(reg) \
+ movq %cr0, reg; \
+ testq $CR0_TS, reg; \
+ jz 9f; \
+ leaq __bad_ts_msg(%rip), %rdi; \
+ movl $__LINE__, %esi; \
+ xorl %eax, %eax; \
+ call panic; \
+9:
+
#else
#define ASSERT_LWPTOREGS(lwp, rp)
#define ASSERT_NO_RUPDATE_PENDING(lwp)
+#define ASSERT_CR0TS_ZERO(reg)
#endif
/*
@@ -648,6 +673,11 @@ _syscall_after_brand:
movq %r13, REGOFF_RDX(%rsp)
/*
+ * Clobber %r11 as we check CR0.TS.
+ */
+ ASSERT_CR0TS_ZERO(%r11)
+
+ /*
* To get back to userland, we need the return %rip in %rcx and
* the return %rfl in %r11d. The sysretq instruction also arranges
* to fix up %cs and %ss; everything else is our responsibility.
@@ -972,6 +1002,11 @@ _syscall32_after_brand:
SIMPLE_SYSCALL_POSTSYS(%r15, %r14, %bx)
/*
+ * Clobber %r11 as we check CR0.TS.
+ */
+ ASSERT_CR0TS_ZERO(%r11)
+
+ /*
* To get back to userland, we need to put the return %rip in %rcx and
* the return %rfl in %r11d. The sysret instruction also arranges
* to fix up %cs and %ss; everything else is our responsibility.
@@ -1263,6 +1298,11 @@ sys_sysenter()
andq $_BITNOT(PS_IE), REGOFF_RFL(%rsp)
/*
+ * Clobber %r11 as we check CR0.TS.
+ */
+ ASSERT_CR0TS_ZERO(%r11)
+
+ /*
* (There's no point in loading up %edx because the sysexit
* mechanism smashes it.)
*/
diff --git a/usr/src/uts/i86pc/os/fpu_subr.c b/usr/src/uts/i86pc/os/fpu_subr.c
index 3e027269fb..5c57bdcb8c 100644
--- a/usr/src/uts/i86pc/os/fpu_subr.c
+++ b/usr/src/uts/i86pc/os/fpu_subr.c
@@ -148,8 +148,7 @@ fpu_probe(void)
ENABLE_SSE();
if (is_x86_feature(x86_featureset, X86FSET_AVX)) {
- ASSERT(is_x86_feature(x86_featureset,
- X86FSET_XSAVE));
+ ASSERT(is_x86_feature(x86_featureset, X86FSET_XSAVE));
fp_kind |= __FP_AVX;
}
@@ -180,7 +179,7 @@ fpu_probe(void)
fpsave_ctxt = xsave_ctxt;
}
}
- patch_xsave();
+ fprestore_ctxt = xrestore_ctxt;
fpsave_cachep = kmem_cache_create("xsave_cache",
cpuid_get_xsave_size(), XSAVE_ALIGN,
NULL, NULL, NULL, NULL, NULL, 0);
diff --git a/usr/src/uts/i86pc/os/intr.c b/usr/src/uts/i86pc/os/intr.c
index f66f0e69e8..29fa78109c 100644
--- a/usr/src/uts/i86pc/os/intr.c
+++ b/usr/src/uts/i86pc/os/intr.c
@@ -1446,6 +1446,8 @@ loop:
*/
tp = CPU->cpu_thread;
if (USERMODE(rp->r_cs)) {
+ pcb_t *pcb;
+
/*
* Check if AST pending.
*/
@@ -1460,14 +1462,29 @@ loop:
goto loop;
}
-#if defined(__amd64)
+ pcb = &tp->t_lwp->lwp_pcb;
+
+ /*
+ * Check to see if we need to initialize the FPU for this
+ * thread. This should be an uncommon occurrence, but may happen
+ * in the case where the system creates an lwp through an
+ * abnormal path such as the agent lwp. Make sure that we still
+ * happen to have the FPU in a good state.
+ */
+ if ((pcb->pcb_fpu.fpu_flags & FPU_EN) == 0) {
+ kpreempt_disable();
+ fp_seed();
+ kpreempt_enable();
+ PCB_SET_UPDATE_FPU(pcb);
+ }
+
/*
* We are done if segment registers do not need updating.
*/
- if (tp->t_lwp->lwp_pcb.pcb_rupdate == 0)
+ if (!PCB_NEED_UPDATE(pcb))
return (1);
- if (update_sregs(rp, tp->t_lwp)) {
+ if (PCB_NEED_UPDATE_SEGS(pcb) && update_sregs(rp, tp->t_lwp)) {
/*
* 1 or more of the selectors is bad.
* Deliver a SIGSEGV.
@@ -1482,9 +1499,15 @@ loop:
tp->t_sig_check = 1;
cli();
}
- tp->t_lwp->lwp_pcb.pcb_rupdate = 0;
+ PCB_CLEAR_UPDATE_SEGS(pcb);
+
+ if (PCB_NEED_UPDATE_FPU(pcb)) {
+ fprestore_ctxt(&pcb->pcb_fpu);
+ }
+ PCB_CLEAR_UPDATE_FPU(pcb);
+
+ ASSERT0(PCB_NEED_UPDATE(pcb));
-#endif /* __amd64 */
return (1);
}
diff --git a/usr/src/uts/i86pc/os/trap.c b/usr/src/uts/i86pc/os/trap.c
index 8142ef8491..cbb7026067 100644
--- a/usr/src/uts/i86pc/os/trap.c
+++ b/usr/src/uts/i86pc/os/trap.c
@@ -1005,50 +1005,25 @@ trap(struct regs *rp, caddr_t addr, processorid_t cpuid)
fault = FLTIOVF;
break;
+ /*
+ * When using an eager FPU on x86, the #NM trap is no longer meaningful.
+ * Userland should not be able to trigger it. Anything that does
+ * represents a fatal error in the kernel and likely in the register
+ * state of the system. User FPU state should always be valid.
+ */
case T_NOEXTFLT + USER: /* math coprocessor not available */
- if (tudebug && tudebugfpe)
- showregs(type, rp, addr);
- if (fpnoextflt(rp)) {
- siginfo.si_signo = SIGILL;
- siginfo.si_code = ILL_ILLOPC;
- siginfo.si_addr = (caddr_t)rp->r_pc;
- fault = FLTILL;
- }
- break;
-
- case T_EXTOVRFLT: /* extension overrun fault */
- /* check if we took a kernel trap on behalf of user */
- {
- extern void ndptrap_frstor(void);
- if (rp->r_pc != (uintptr_t)ndptrap_frstor) {
- sti(); /* T_EXTOVRFLT comes in via cmninttrap */
- (void) die(type, rp, addr, cpuid);
- }
- type |= USER;
- }
- /*FALLTHROUGH*/
- case T_EXTOVRFLT + USER: /* extension overrun fault */
- if (tudebug && tudebugfpe)
- showregs(type, rp, addr);
- if (fpextovrflt(rp)) {
- siginfo.si_signo = SIGSEGV;
- siginfo.si_code = SEGV_MAPERR;
- siginfo.si_addr = (caddr_t)rp->r_pc;
- fault = FLTBOUNDS;
- }
+ case T_NOEXTFLT:
+ (void) die(type, rp, addr, cpuid);
break;
+ /*
+ * Kernel threads leveraging floating point need to mask the exceptions
+ * or ensure that they cannot happen. There is no recovery from this.
+ */
case T_EXTERRFLT: /* x87 floating point exception pending */
- /* check if we took a kernel trap on behalf of user */
- {
- extern void ndptrap_frstor(void);
- if (rp->r_pc != (uintptr_t)ndptrap_frstor) {
- sti(); /* T_EXTERRFLT comes in via cmninttrap */
- (void) die(type, rp, addr, cpuid);
- }
- type |= USER;
- }
- /*FALLTHROUGH*/
+ sti(); /* T_EXTERRFLT comes in via cmninttrap */
+ (void) die(type, rp, addr, cpuid);
+ break;
case T_EXTERRFLT + USER: /* x87 floating point exception pending */
if (tudebug && tudebugfpe)
@@ -1951,7 +1926,7 @@ kern_gpfault(struct regs *rp)
}
#if defined(__amd64)
- if (trp == NULL && lwp->lwp_pcb.pcb_rupdate != 0) {
+ if (trp == NULL && PCB_NEED_UPDATE_SEGS(&lwp->lwp_pcb)) {
/*
* This is the common case -- we're trying to load
diff --git a/usr/src/uts/intel/brand/lx/lx_archdep.c b/usr/src/uts/intel/brand/lx/lx_archdep.c
index 5d2c33ab9f..24f3d2c446 100644
--- a/usr/src/uts/intel/brand/lx/lx_archdep.c
+++ b/usr/src/uts/intel/brand/lx/lx_archdep.c
@@ -10,7 +10,7 @@
*/
/*
- * Copyright 2016 Joyent, Inc.
+ * Copyright 2018 Joyent, Inc.
*/
/*
@@ -391,7 +391,7 @@ lx_get_user_regs32(lx_lwp_data_t *lwpd, lx_user_regs32_t *lxrp)
lxrp->lxur_xss = (int32_t)rp->r_ss;
kpreempt_disable();
- if (pcb->pcb_rupdate == 1) {
+ if (PCB_NEED_UPDATE_SEGS(pcb)) {
lxrp->lxur_xds = pcb->pcb_ds;
lxrp->lxur_xes = pcb->pcb_es;
lxrp->lxur_xfs = pcb->pcb_fs;
@@ -523,7 +523,7 @@ lx_set_user_regs32(lx_lwp_data_t *lwpd, lx_user_regs32_t *lxrp)
DATAMODEL_ILP32);
kpreempt_disable();
- pcb->pcb_rupdate = 1;
+ PCB_SET_UPDATE_SEGS(pcb);
pcb->pcb_ds = fix_segreg(lxrp->lxur_xds, IS_NOT_CS, DATAMODEL_ILP32);
pcb->pcb_es = fix_segreg(lxrp->lxur_xes, IS_NOT_CS, DATAMODEL_ILP32);
pcb->pcb_fs = fix_segreg(lxrp->lxur_xfs, IS_NOT_CS, DATAMODEL_ILP32);
@@ -738,7 +738,7 @@ lx_get_user_regs64(lx_lwp_data_t *lwpd, lx_user_regs64_t *lxrp)
}
kpreempt_disable();
- if (pcb->pcb_rupdate == 1) {
+ if (PCB_NEED_UPDATE_SEGS(pcb)) {
lxrp->lxur_xds = pcb->pcb_ds;
lxrp->lxur_xes = pcb->pcb_es;
lxrp->lxur_xfs = pcb->pcb_fs;
@@ -915,7 +915,7 @@ lx_set_user_regs64(lx_lwp_data_t *lwpd, lx_user_regs64_t *lxrp)
pcb->pcb_gsbase = lxrp->lxur_xgs_base;
kpreempt_disable();
- pcb->pcb_rupdate = 1;
+ PCB_SET_UPDATE_SEGS(pcb);
pcb->pcb_ds = fix_segreg(lxrp->lxur_xds, IS_NOT_CS, DATAMODEL_LP64);
pcb->pcb_es = fix_segreg(lxrp->lxur_xes, IS_NOT_CS, DATAMODEL_LP64);
pcb->pcb_fs = fix_segreg(lxrp->lxur_xfs, IS_NOT_CS, DATAMODEL_LP64);
@@ -1271,7 +1271,7 @@ lx_switch_to_native(klwp_t *lwp)
* is loaded:
*/
kpreempt_disable();
- if (pcb->pcb_rupdate == 1) {
+ if (PCB_NEED_UPDATE_SEGS(pcb)) {
/*
* If we are already flushing the segment registers,
* then ensure we are flushing the native %gs.
@@ -1290,7 +1290,7 @@ lx_switch_to_native(klwp_t *lwp)
/*
* Ensure we go out via update_sregs.
*/
- pcb->pcb_rupdate = 1;
+ PCB_SET_UPDATE_SEGS(pcb);
}
}
kpreempt_enable();
@@ -1314,7 +1314,7 @@ lx_switch_to_native(klwp_t *lwp)
/*
* Ensure we go out via update_sregs.
*/
- pcb->pcb_rupdate = 1;
+ PCB_SET_UPDATE_SEGS(pcb);
}
kpreempt_enable();
}
@@ -1331,7 +1331,7 @@ lx_switch_to_native(klwp_t *lwp)
/*
* Ensure we go out via update_sregs.
*/
- pcb->pcb_rupdate = 1;
+ PCB_SET_UPDATE_SEGS(pcb);
}
kpreempt_enable();
}
diff --git a/usr/src/uts/intel/ia32/ml/exception.s b/usr/src/uts/intel/ia32/ml/exception.s
index 82d449f31c..66eda34c14 100644
--- a/usr/src/uts/intel/ia32/ml/exception.s
+++ b/usr/src/uts/intel/ia32/ml/exception.s
@@ -51,17 +51,7 @@
#include <sys/traptrace.h>
#include <sys/machparam.h>
-/*
- * only one routine in this file is interesting to lint
- */
-
-#if defined(__lint)
-
-void
-ndptrap_frstor(void)
-{}
-
-#else
+#if !defined(__lint)
#include "assym.h"
@@ -643,220 +633,16 @@ _emul_done:
#endif /* __i386 */
-#if defined(__amd64)
-
/*
* #NM
*/
-#if defined(__xpv)
ENTRY_NP(ndptrap)
- /*
- * (On the hypervisor we must make a hypercall so we might as well
- * save everything and handle as in a normal trap.)
- */
- TRAP_NOERR(T_NOEXTFLT) /* $7 */
- INTR_PUSH
-
- /*
- * We want to do this quickly as every lwp using fp will take this
- * after a context switch -- we do the frequent path in ndptrap_frstor
- * below; for all other cases, we let the trap code handle it
- */
- LOADCPU(%rax) /* swapgs handled in hypervisor */
- cmpl $0, fpu_exists(%rip)
- je .handle_in_trap /* let trap handle no fp case */
- movq CPU_THREAD(%rax), %rbx /* %rbx = curthread */
- movl $FPU_EN, %eax
- movq T_LWP(%rbx), %rbx /* %rbx = lwp */
- testq %rbx, %rbx
- jz .handle_in_trap /* should not happen? */
-#if LWP_PCB_FPU != 0
- addq $LWP_PCB_FPU, %rbx /* &lwp->lwp_pcb.pcb_fpu */
-#endif
- testl %eax, PCB_FPU_FLAGS(%rbx)
- jz .handle_in_trap /* must be the first fault */
- CLTS
- andl $_BITNOT(FPU_VALID), PCB_FPU_FLAGS(%rbx)
-#if FPU_CTX_FPU_REGS != 0
- addq $FPU_CTX_FPU_REGS, %rbx
-#endif
-
- movl FPU_CTX_FPU_XSAVE_MASK(%rbx), %eax /* for xrstor */
- movl FPU_CTX_FPU_XSAVE_MASK+4(%rbx), %edx /* for xrstor */
-
- /*
- * the label below is used in trap.c to detect FP faults in
- * kernel due to user fault.
- */
- ALTENTRY(ndptrap_frstor)
- movq (%rbx), %rbx /* fpu_regs.kfpu_u.kfpu_XX pointer */
- .globl _patch_xrstorq_rbx
-_patch_xrstorq_rbx:
- fxrstorq (%rbx)
- cmpw $KCS_SEL, REGOFF_CS(%rsp)
- je .return_to_kernel
-
- ASSERT_UPCALL_MASK_IS_SET
- USER_POP
- IRET /* return to user mode */
- /*NOTREACHED*/
-
-.return_to_kernel:
- INTR_POP
- IRET
- /*NOTREACHED*/
-
-.handle_in_trap:
- INTR_POP
- pushq $0 /* can not use TRAP_NOERR */
- pushq $T_NOEXTFLT
- jmp cmninttrap
- SET_SIZE(ndptrap_frstor)
- SET_SIZE(ndptrap)
-
-#else /* __xpv */
-
- ENTRY_NP(ndptrap)
- /*
- * We want to do this quickly as every lwp using fp will take this
- * after a context switch -- we do the frequent path in ndptrap_frstor
- * below; for all other cases, we let the trap code handle it
- */
- pushq %rax
- pushq %rbx
- cmpw $KCS_SEL, 24(%rsp) /* did we come from kernel mode? */
- jne 1f
- LOADCPU(%rax) /* if yes, don't swapgs */
- jmp 2f
-1:
- SWAPGS /* if from user, need swapgs */
- LOADCPU(%rax)
- SWAPGS
-2:
- /*
- * Xrstor needs to use edx as part of its flag.
- * NOTE: have to push rdx after "cmpw ...24(%rsp)", otherwise rsp+$24
- * will not point to CS.
- */
- pushq %rdx
- cmpl $0, fpu_exists(%rip)
- je .handle_in_trap /* let trap handle no fp case */
- movq CPU_THREAD(%rax), %rbx /* %rbx = curthread */
- movl $FPU_EN, %eax
- movq T_LWP(%rbx), %rbx /* %rbx = lwp */
- testq %rbx, %rbx
- jz .handle_in_trap /* should not happen? */
-#if LWP_PCB_FPU != 0
- addq $LWP_PCB_FPU, %rbx /* &lwp->lwp_pcb.pcb_fpu */
-#endif
- testl %eax, PCB_FPU_FLAGS(%rbx)
- jz .handle_in_trap /* must be the first fault */
- clts
- andl $_BITNOT(FPU_VALID), PCB_FPU_FLAGS(%rbx)
-#if FPU_CTX_FPU_REGS != 0
- addq $FPU_CTX_FPU_REGS, %rbx
-#endif
-
- movl FPU_CTX_FPU_XSAVE_MASK(%rbx), %eax /* for xrstor */
- movl FPU_CTX_FPU_XSAVE_MASK+4(%rbx), %edx /* for xrstor */
-
- /*
- * the label below is used in trap.c to detect FP faults in
- * kernel due to user fault.
- */
- ALTENTRY(ndptrap_frstor)
- movq (%rbx), %rbx /* fpu_regs.kfpu_u.kfpu_XX pointer */
- .globl _patch_xrstorq_rbx
-_patch_xrstorq_rbx:
- fxrstorq (%rbx)
- popq %rdx
- popq %rbx
- popq %rax
- jmp tr_iret_auto
- /*NOTREACHED*/
-
-.handle_in_trap:
- popq %rdx
- popq %rbx
- popq %rax
- TRAP_NOERR(T_NOEXTFLT) /* $7 */
- jmp cmninttrap
- SET_SIZE(ndptrap_frstor)
- SET_SIZE(ndptrap)
-
-#endif /* __xpv */
-
-#elif defined(__i386)
-
- ENTRY_NP(ndptrap)
- /*
- * We want to do this quickly as every lwp using fp will take this
- * after a context switch -- we do the frequent path in fpnoextflt
- * below; for all other cases, we let the trap code handle it
- */
- pushl %eax
- pushl %ebx
- pushl %edx /* for xrstor */
- pushl %ds
- pushl %gs
- movl $KDS_SEL, %ebx
- movw %bx, %ds
- movl $KGS_SEL, %eax
- movw %ax, %gs
- LOADCPU(%eax)
- cmpl $0, fpu_exists
- je .handle_in_trap /* let trap handle no fp case */
- movl CPU_THREAD(%eax), %ebx /* %ebx = curthread */
- movl $FPU_EN, %eax
- movl T_LWP(%ebx), %ebx /* %ebx = lwp */
- testl %ebx, %ebx
- jz .handle_in_trap /* should not happen? */
-#if LWP_PCB_FPU != 0
- addl $LWP_PCB_FPU, %ebx /* &lwp->lwp_pcb.pcb_fpu */
-#endif
- testl %eax, PCB_FPU_FLAGS(%ebx)
- jz .handle_in_trap /* must be the first fault */
- CLTS
- andl $_BITNOT(FPU_VALID), PCB_FPU_FLAGS(%ebx)
-#if FPU_CTX_FPU_REGS != 0
- addl $FPU_CTX_FPU_REGS, %ebx
-#endif
-
- movl FPU_CTX_FPU_XSAVE_MASK(%ebx), %eax /* for xrstor */
- movl FPU_CTX_FPU_XSAVE_MASK+4(%ebx), %edx /* for xrstor */
-
- /*
- * the label below is used in trap.c to detect FP faults in kernel
- * due to user fault.
- */
- ALTENTRY(ndptrap_frstor)
- movl (%ebx), %ebx /* fpu_regs.kfpu_u.kfpu_XX pointer */
- .globl _patch_fxrstor_ebx
-_patch_fxrstor_ebx:
- .globl _patch_xrstor_ebx
-_patch_xrstor_ebx:
- frstor (%ebx) /* may be patched to fxrstor or xrstor */
- popl %gs
- popl %ds
- popl %edx
- popl %ebx
- popl %eax
- IRET
-
-.handle_in_trap:
- popl %gs
- popl %ds
- popl %edx
- popl %ebx
- popl %eax
- TRAP_NOERR(T_NOEXTFLT) /* $7 */
- jmp cmninttrap
- SET_SIZE(ndptrap_frstor)
+ TRAP_NOERR(T_NOEXTFLT) /* $0 */
+ SET_CPU_GSBASE
+ jmp cmntrap
SET_SIZE(ndptrap)
-#endif /* __i386 */
-
#if !defined(__xpv)
#if defined(__amd64)
@@ -1036,12 +822,6 @@ make_frame:
#endif /* __i386 */
#endif /* !__xpv */
- ENTRY_NP(overrun)
- push $0
- TRAP_NOERR(T_EXTOVRFLT) /* $9 i386 only - not generated */
- jmp cmninttrap
- SET_SIZE(overrun)
-
/*
* #TS
*/
diff --git a/usr/src/uts/intel/ia32/ml/float.s b/usr/src/uts/intel/ia32/ml/float.s
index f154a96851..0a242e0475 100644
--- a/usr/src/uts/intel/ia32/ml/float.s
+++ b/usr/src/uts/intel/ia32/ml/float.s
@@ -21,7 +21,7 @@
/*
* Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2017, Joyent, Inc.
+ * Copyright (c) 2018, Joyent, Inc.
*/
/* Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. */
@@ -79,191 +79,12 @@ fxsave_insn(struct fxsave_state *fx)
#else /* __lint */
-#if defined(__amd64)
-
ENTRY_NP(fxsave_insn)
fxsaveq (%rdi)
ret
SET_SIZE(fxsave_insn)
-#elif defined(__i386)
-
- ENTRY_NP(fxsave_insn)
- movl 4(%esp), %eax
- fxsave (%eax)
- ret
- SET_SIZE(fxsave_insn)
-
-#endif
-
-#endif /* __lint */
-
-#if defined(__i386)
-
-/*
- * If (num1/num2 > num1/num3) the FPU has the FDIV bug.
- */
-
-#if defined(__lint)
-
-int
-fpu_probe_pentium_fdivbug(void)
-{ return (0); }
-
-#else /* __lint */
-
- ENTRY_NP(fpu_probe_pentium_fdivbug)
- fldl .num1
- fldl .num2
- fdivr %st(1), %st
- fxch %st(1)
- fdivl .num3
- fcompp
- fstsw %ax
- sahf
- jae 0f
- movl $1, %eax
- ret
-
-0: xorl %eax, %eax
- ret
-
- .align 4
-.num1: .4byte 0xbce4217d /* 4.999999 */
- .4byte 0x4013ffff
-.num2: .4byte 0x0 /* 15.0 */
- .4byte 0x402e0000
-.num3: .4byte 0xde7210bf /* 14.999999 */
- .4byte 0x402dffff
- SET_SIZE(fpu_probe_pentium_fdivbug)
-
-#endif /* __lint */
-
-/*
- * To cope with processors that do not implement fxsave/fxrstor
- * instructions, patch hot paths in the kernel to use them only
- * when that feature has been detected.
- */
-
-#if defined(__lint)
-
-void
-patch_sse(void)
-{}
-
-void
-patch_sse2(void)
-{}
-
-void
-patch_xsave(void)
-{}
-
-#else /* __lint */
-
- ENTRY_NP(patch_sse)
- _HOT_PATCH_PROLOG
- /
- / frstor (%ebx); nop -> fxrstor (%ebx)
- /
- _HOT_PATCH(_fxrstor_ebx_insn, _patch_fxrstor_ebx, 3)
- /
- / lock; xorl $0, (%esp) -> sfence; ret
- /
- _HOT_PATCH(_sfence_ret_insn, _patch_sfence_ret, 4)
- _HOT_PATCH_EPILOG
- ret
-_fxrstor_ebx_insn: / see ndptrap_frstor()
- fxrstor (%ebx)
-_ldmxcsr_ebx_insn: / see resume_from_zombie()
- ldmxcsr (%ebx)
-_sfence_ret_insn: / see membar_producer()
- sfence
- ret
- SET_SIZE(patch_sse)
-
- ENTRY_NP(patch_sse2)
- _HOT_PATCH_PROLOG
- /
- / lock; xorl $0, (%esp) -> lfence; ret
- /
- _HOT_PATCH(_lfence_ret_insn, _patch_lfence_ret, 4)
- _HOT_PATCH_EPILOG
- ret
-_lfence_ret_insn: / see membar_consumer()
- lfence
- ret
- SET_SIZE(patch_sse2)
-
- /*
- * Patch lazy fp restore instructions in the trap handler
- * to use xrstor instead of frstor
- */
- ENTRY_NP(patch_xsave)
- _HOT_PATCH_PROLOG
- /
- / frstor (%ebx); nop -> xrstor (%ebx)
- /
- _HOT_PATCH(_xrstor_ebx_insn, _patch_xrstor_ebx, 3)
- _HOT_PATCH_EPILOG
- ret
-_xrstor_ebx_insn: / see ndptrap_frstor()
- xrstor (%ebx)
- SET_SIZE(patch_xsave)
-
-#endif /* __lint */
-#endif /* __i386 */
-
-#if defined(__amd64)
-#if defined(__lint)
-
-void
-patch_xsave(void)
-{}
-
-#else /* __lint */
-
- /*
- * Patch lazy fp restore instructions in the trap handler
- * to use xrstor instead of fxrstorq
- */
- ENTRY_NP(patch_xsave)
- pushq %rbx
- pushq %rbp
- pushq %r15
- /
- / fxrstorq (%rbx); -> nop; xrstor (%rbx)
- / loop doing the following for 4 bytes:
- / hot_patch_kernel_text(_patch_xrstorq_rbx, _xrstor_rbx_insn, 1)
- /
- leaq _patch_xrstorq_rbx(%rip), %rbx
- leaq _xrstor_rbx_insn(%rip), %rbp
- movq $4, %r15
-1:
- movq %rbx, %rdi /* patch address */
- movzbq (%rbp), %rsi /* instruction byte */
- movq $1, %rdx /* count */
- call hot_patch_kernel_text
- addq $1, %rbx
- addq $1, %rbp
- subq $1, %r15
- jnz 1b
-
- popq %r15
- popq %rbp
- popq %rbx
- ret
-
-_xrstor_rbx_insn: / see ndptrap_frstor()
- # Because the fxrstorq instruction we're patching is 4 bytes long, due
- # to the 0x48 prefix (indicating 64-bit operand size), we patch 4 bytes
- # too.
- nop
- xrstor (%rbx)
- SET_SIZE(patch_xsave)
-
#endif /* __lint */
-#endif /* __amd64 */
/*
* One of these routines is called from any lwp with floating
@@ -287,15 +108,8 @@ void
fpxsave_ctxt(void *arg)
{}
-/*ARGSUSED*/
-void
-fpnsave_ctxt(void *arg)
-{}
-
#else /* __lint */
-#if defined(__amd64)
-
/*
* These three functions define the Intel "xsave" handling for CPUs with
* different features. Newer AMD CPUs can also use these functions. See the
@@ -305,7 +119,7 @@ fpnsave_ctxt(void *arg)
cmpl $FPU_EN, FPU_CTX_FPU_FLAGS(%rdi)
jne 1f
movl $_CONST(FPU_VALID|FPU_EN), FPU_CTX_FPU_FLAGS(%rdi)
- movq FPU_CTX_FPU_REGS(%rdi), %rdi /* fpu_regs.kfpu_u.kfpu_fn ptr */
+ movq FPU_CTX_FPU_REGS(%rdi), %rdi /* fpu_regs.kfpu_u.kfpu_fx ptr */
fxsaveq (%rdi)
STTS(%rsi) /* trap on next fpu touch */
1: rep; ret /* use 2 byte return instruction when branch target */
@@ -352,7 +166,7 @@ fpnsave_ctxt(void *arg)
cmpl $FPU_EN, FPU_CTX_FPU_FLAGS(%rdi)
jne 1f
movl $_CONST(FPU_VALID|FPU_EN), FPU_CTX_FPU_FLAGS(%rdi)
- movq FPU_CTX_FPU_REGS(%rdi), %rdi /* fpu_regs.kfpu_u.kfpu_fn ptr */
+ movq FPU_CTX_FPU_REGS(%rdi), %rdi /* fpu_regs.kfpu_u.kfpu_fx ptr */
fxsaveq (%rdi)
/*
* To ensure that we don't leak these values into the next context
@@ -405,126 +219,6 @@ fpnsave_ctxt(void *arg)
1: ret
SET_SIZE(xsaveopt_excp_clr_ctxt)
-#elif defined(__i386)
-
- ENTRY_NP(fpnsave_ctxt)
- movl 4(%esp), %eax /* a struct fpu_ctx */
- cmpl $FPU_EN, FPU_CTX_FPU_FLAGS(%eax)
- jne 1f
- movl $_CONST(FPU_VALID|FPU_EN), FPU_CTX_FPU_FLAGS(%eax)
- movl FPU_CTX_FPU_REGS(%eax), %eax /* fpu_regs.kfpu_u.kfpu_fx ptr */
- fnsave (%eax)
- /* (fnsave also reinitializes x87 state) */
- STTS(%edx) /* trap on next fpu touch */
-1: rep; ret /* use 2 byte return instruction when branch target */
- /* AMD Software Optimization Guide - Section 6.2 */
- SET_SIZE(fpnsave_ctxt)
-
- ENTRY_NP(fpxsave_ctxt)
- movl 4(%esp), %eax /* a struct fpu_ctx */
- cmpl $FPU_EN, FPU_CTX_FPU_FLAGS(%eax)
- jne 1f
- movl $_CONST(FPU_VALID|FPU_EN), FPU_CTX_FPU_FLAGS(%eax)
- movl FPU_CTX_FPU_REGS(%eax), %eax /* fpu_regs.kfpu_u.kfpu_fn ptr */
- fxsave (%eax)
- STTS(%edx) /* trap on next fpu touch */
-1: rep; ret /* use 2 byte return instruction when branch target */
- /* AMD Software Optimization Guide - Section 6.2 */
- SET_SIZE(fpxsave_ctxt)
-
- ENTRY_NP(xsave_ctxt)
- movl 4(%esp), %ecx /* a struct fpu_ctx */
- cmpl $FPU_EN, FPU_CTX_FPU_FLAGS(%ecx)
- jne 1f
- movl $_CONST(FPU_VALID|FPU_EN), FPU_CTX_FPU_FLAGS(%ecx)
- movl FPU_CTX_FPU_XSAVE_MASK(%ecx), %eax
- movl FPU_CTX_FPU_XSAVE_MASK+4(%ecx), %edx
- movl FPU_CTX_FPU_REGS(%ecx), %ecx /* fpu_regs.kfpu_u.kfpu_xs ptr */
- xsave (%ecx)
- STTS(%edx) /* trap on next fpu touch */
-1: ret
- SET_SIZE(xsave_ctxt)
-
- ENTRY_NP(xsaveopt_ctxt)
- movl 4(%esp), %ecx /* a struct fpu_ctx */
- cmpl $FPU_EN, FPU_CTX_FPU_FLAGS(%ecx)
- jne 1f
- movl $_CONST(FPU_VALID|FPU_EN), FPU_CTX_FPU_FLAGS(%ecx)
- movl FPU_CTX_FPU_XSAVE_MASK(%ecx), %eax
- movl FPU_CTX_FPU_XSAVE_MASK+4(%ecx), %edx
- movl FPU_CTX_FPU_REGS(%ecx), %ecx /* fpu_regs.kfpu_u.kfpu_xs ptr */
- xsaveopt (%ecx)
- STTS(%edx) /* trap on next fpu touch */
-1: ret
- SET_SIZE(xsaveopt_ctxt)
-
-/*
- * See comment above the __amd64 implementation of fpxsave_excp_clr_ctxt()
- * for details about the following threee functions for AMD "exception pointer"
- * handling.
- */
-
- ENTRY_NP(fpxsave_excp_clr_ctxt)
- movl 4(%esp), %eax /* a struct fpu_ctx */
- cmpl $FPU_EN, FPU_CTX_FPU_FLAGS(%eax)
- jne 1f
-
- movl $_CONST(FPU_VALID|FPU_EN), FPU_CTX_FPU_FLAGS(%eax)
- movl FPU_CTX_FPU_REGS(%eax), %eax /* fpu_regs.kfpu_u.kfpu_fn ptr */
- fxsave (%eax)
- btw $7, FXSAVE_STATE_FSW(%eax) /* Test saved ES bit */
- jnc 0f /* jump if ES = 0 */
- fnclex /* clear pending x87 exceptions */
-0: ffree %st(7) /* clear tag bit to remove possible stack overflow */
- fildl .fpzero_const
- /* dummy load changes all exception pointers */
- STTS(%edx) /* trap on next fpu touch */
-1: rep; ret /* use 2 byte return instruction when branch target */
- /* AMD Software Optimization Guide - Section 6.2 */
- SET_SIZE(fpxsave_excp_clr_ctxt)
-
- ENTRY_NP(xsave_excp_clr_ctxt)
- movl 4(%esp), %ecx /* a struct fpu_ctx */
- cmpl $FPU_EN, FPU_CTX_FPU_FLAGS(%ecx)
- jne 1f
-
- movl $_CONST(FPU_VALID|FPU_EN), FPU_CTX_FPU_FLAGS(%ecx)
- movl FPU_CTX_FPU_XSAVE_MASK(%ecx), %eax
- movl FPU_CTX_FPU_XSAVE_MASK+4(%ecx), %edx
- movl FPU_CTX_FPU_REGS(%ecx), %ecx /* fpu_regs.kfpu_u.kfpu_xs ptr */
- xsave (%ecx)
- btw $7, FXSAVE_STATE_FSW(%ecx) /* Test saved ES bit */
- jnc 0f /* jump if ES = 0 */
- fnclex /* clear pending x87 exceptions */
-0: ffree %st(7) /* clear tag bit to remove possible stack overflow */
- fildl .fpzero_const
- /* dummy load changes all exception pointers */
- STTS(%edx) /* trap on next fpu touch */
-1: ret
- SET_SIZE(xsave_excp_clr_ctxt)
-
- ENTRY_NP(xsaveopt_excp_clr_ctxt)
- movl 4(%esp), %ecx /* a struct fpu_ctx */
- cmpl $FPU_EN, FPU_CTX_FPU_FLAGS(%ecx)
- jne 1f
-
- movl $_CONST(FPU_VALID|FPU_EN), FPU_CTX_FPU_FLAGS(%ecx)
- movl FPU_CTX_FPU_XSAVE_MASK(%ecx), %eax
- movl FPU_CTX_FPU_XSAVE_MASK+4(%ecx), %edx
- movl FPU_CTX_FPU_REGS(%ecx), %ecx /* fpu_regs.kfpu_u.kfpu_xs ptr */
- xsaveopt (%ecx)
- btw $7, FXSAVE_STATE_FSW(%ecx) /* Test saved ES bit */
- jnc 0f /* jump if ES = 0 */
- fnclex /* clear pending x87 exceptions */
-0: ffree %st(7) /* clear tag bit to remove possible stack overflow */
- fildl .fpzero_const
- /* dummy load changes all exception pointers */
- STTS(%edx) /* trap on next fpu touch */
-1: ret
- SET_SIZE(xsaveopt_excp_clr_ctxt)
-
-#endif /* __i386 */
-
.align 8
.fpzero_const:
.4byte 0x0
@@ -557,8 +251,6 @@ xsaveopt(struct xsave_state *f, uint64_t m)
#else /* __lint */
-#if defined(__amd64)
-
ENTRY_NP(fpxsave)
CLTS
fxsaveq (%rdi)
@@ -591,58 +283,55 @@ xsaveopt(struct xsave_state *f, uint64_t m)
ret
SET_SIZE(xsaveopt)
-#elif defined(__i386)
+#endif /* __lint */
- ENTRY_NP(fpsave)
- CLTS
- movl 4(%esp), %eax
- fnsave (%eax)
- STTS(%eax) /* set TS bit in %cr0 (disable FPU) */
- ret
- SET_SIZE(fpsave)
+/*
+ * These functions are used when restoring the FPU as part of the epilogue of a
+ * context switch.
+ */
- ENTRY_NP(fpxsave)
- CLTS
- movl 4(%esp), %eax
- fxsave (%eax)
- fninit /* clear exceptions, init x87 tags */
- STTS(%eax) /* set TS bit in %cr0 (disable FPU) */
- ret
- SET_SIZE(fpxsave)
+#if defined(__lint)
- ENTRY_NP(xsave)
- CLTS
- movl 4(%esp), %ecx
- movl 8(%esp), %eax
- movl 12(%esp), %edx
- xsave (%ecx)
+/*ARGSUSED*/
+void
+fpxrestore_ctxt(void *arg)
+{}
- fninit /* clear exceptions, init x87 tags */
- STTS(%eax) /* set TS bit in %cr0 (disable FPU) */
- ret
- SET_SIZE(xsave)
+/*ARGSUSED*/
+void
+xrestore_ctxt(void *arg)
+{}
- ENTRY_NP(xsaveopt)
+#else /* __lint */
+
+ ENTRY(fpxrestore_ctxt)
+ cmpl $_CONST(FPU_EN|FPU_VALID), FPU_CTX_FPU_FLAGS(%rdi)
+ jne 1f
+ movl $_CONST(FPU_EN), FPU_CTX_FPU_FLAGS(%rdi)
+ movq FPU_CTX_FPU_REGS(%rdi), %rdi /* fpu_regs.kfpu_u.kfpu_fx ptr */
CLTS
- movl 4(%esp), %ecx
- movl 8(%esp), %eax
- movl 12(%esp), %edx
- xsaveopt (%ecx)
+ fxrstorq (%rdi)
+1:
+ ret
+ SET_SIZE(fpxrestore_ctxt)
- fninit /* clear exceptions, init x87 tags */
- STTS(%eax) /* set TS bit in %cr0 (disable FPU) */
+ ENTRY(xrestore_ctxt)
+ cmpl $_CONST(FPU_EN|FPU_VALID), FPU_CTX_FPU_FLAGS(%rdi)
+ jne 1f
+ movl $_CONST(FPU_EN), FPU_CTX_FPU_FLAGS(%rdi)
+ movl FPU_CTX_FPU_XSAVE_MASK(%rdi), %eax /* xsave flags in EDX:EAX */
+ movl FPU_CTX_FPU_XSAVE_MASK+4(%rdi), %edx
+ movq FPU_CTX_FPU_REGS(%rdi), %rdi /* fpu_regs.kfpu_u.kfpu_xs ptr */
+ CLTS
+ xrstor (%rdi)
+1:
ret
- SET_SIZE(xsaveopt)
+ SET_SIZE(xrestore_ctxt)
-#endif /* __i386 */
#endif /* __lint */
-#if defined(__lint)
-/*ARGSUSED*/
-void
-fprestore(struct fnsave_state *f)
-{}
+#if defined(__lint)
/*ARGSUSED*/
void
@@ -656,8 +345,6 @@ xrestore(struct xsave_state *f, uint64_t m)
#else /* __lint */
-#if defined(__amd64)
-
ENTRY_NP(fpxrestore)
CLTS
fxrstorq (%rdi)
@@ -673,32 +360,6 @@ xrestore(struct xsave_state *f, uint64_t m)
ret
SET_SIZE(xrestore)
-#elif defined(__i386)
-
- ENTRY_NP(fprestore)
- CLTS
- movl 4(%esp), %eax
- frstor (%eax)
- ret
- SET_SIZE(fprestore)
-
- ENTRY_NP(fpxrestore)
- CLTS
- movl 4(%esp), %eax
- fxrstor (%eax)
- ret
- SET_SIZE(fpxrestore)
-
- ENTRY_NP(xrestore)
- CLTS
- movl 4(%esp), %ecx
- movl 8(%esp), %eax
- movl 12(%esp), %edx
- xrstor (%ecx)
- ret
- SET_SIZE(xrestore)
-
-#endif /* __i386 */
#endif /* __lint */
/*
@@ -713,21 +374,11 @@ fpdisable(void)
#else /* __lint */
-#if defined(__amd64)
-
ENTRY_NP(fpdisable)
STTS(%rdi) /* set TS bit in %cr0 (disable FPU) */
ret
SET_SIZE(fpdisable)
-#elif defined(__i386)
-
- ENTRY_NP(fpdisable)
- STTS(%eax)
- ret
- SET_SIZE(fpdisable)
-
-#endif /* __i386 */
#endif /* __lint */
/*
@@ -742,8 +393,6 @@ fpinit(void)
#else /* __lint */
-#if defined(__amd64)
-
ENTRY_NP(fpinit)
CLTS
cmpl $FP_XSAVE, fp_save_mech
@@ -765,38 +414,6 @@ fpinit(void)
ret
SET_SIZE(fpinit)
-#elif defined(__i386)
-
- ENTRY_NP(fpinit)
- CLTS
- cmpl $FP_FXSAVE, fp_save_mech
- je 1f
- cmpl $FP_XSAVE, fp_save_mech
- je 2f
-
- /* fnsave */
- fninit
- movl $x87_initial, %eax
- frstor (%eax) /* load clean initial state */
- ret
-
-1: /* fxsave */
- movl $sse_initial, %eax
- fxrstor (%eax) /* load clean initial state */
- ret
-
-2: /* xsave */
- movl $avx_initial, %ecx
- xorl %edx, %edx
- movl $XFEATURE_AVX, %eax
- bt $X86FSET_AVX, x86_featureset
- cmovael %edx, %eax
- orl $(XFEATURE_LEGACY_FP | XFEATURE_SSE), %eax
- xrstor (%ecx)
- ret
- SET_SIZE(fpinit)
-
-#endif /* __i386 */
#endif /* __lint */
/*
@@ -816,8 +433,6 @@ fpxerr_reset(void)
#else /* __lint */
-#if defined(__amd64)
-
ENTRY_NP(fperr_reset)
CLTS
xorl %eax, %eax
@@ -839,28 +454,6 @@ fpxerr_reset(void)
ret
SET_SIZE(fpxerr_reset)
-#elif defined(__i386)
-
- ENTRY_NP(fperr_reset)
- CLTS
- xorl %eax, %eax
- fnstsw %ax
- fnclex
- ret
- SET_SIZE(fperr_reset)
-
- ENTRY_NP(fpxerr_reset)
- CLTS
- subl $4, %esp /* make some temporary space */
- stmxcsr (%esp)
- movl (%esp), %eax
- andl $_BITNOT(SSE_MXCSR_EFLAGS), (%esp)
- ldmxcsr (%esp) /* clear processor exceptions */
- addl $4, %esp
- ret
- SET_SIZE(fpxerr_reset)
-
-#endif /* __i386 */
#endif /* __lint */
#if defined(__lint)
@@ -873,8 +466,6 @@ fpgetcwsw(void)
#else /* __lint */
-#if defined(__amd64)
-
ENTRY_NP(fpgetcwsw)
pushq %rbp
movq %rsp, %rbp
@@ -887,19 +478,6 @@ fpgetcwsw(void)
ret
SET_SIZE(fpgetcwsw)
-#elif defined(__i386)
-
- ENTRY_NP(fpgetcwsw)
- CLTS
- subl $4, %esp /* make some temporary space */
- fnstsw (%esp) /* store the status word */
- fnstcw 2(%esp) /* store the control word */
- movl (%esp), %eax /* put both in %eax */
- addl $4, %esp
- ret
- SET_SIZE(fpgetcwsw)
-
-#endif /* __i386 */
#endif /* __lint */
/*
@@ -916,8 +494,6 @@ fpgetmxcsr(void)
#else /* __lint */
-#if defined(__amd64)
-
ENTRY_NP(fpgetmxcsr)
pushq %rbp
movq %rsp, %rbp
@@ -929,16 +505,4 @@ fpgetmxcsr(void)
ret
SET_SIZE(fpgetmxcsr)
-#elif defined(__i386)
-
- ENTRY_NP(fpgetmxcsr)
- CLTS
- subl $4, %esp /* make some temporary space */
- stmxcsr (%esp)
- movl (%esp), %eax
- addl $4, %esp
- ret
- SET_SIZE(fpgetmxcsr)
-
-#endif /* __i386 */
#endif /* __lint */
diff --git a/usr/src/uts/intel/ia32/os/archdep.c b/usr/src/uts/intel/ia32/os/archdep.c
index e610f1fb09..830daa0af7 100644
--- a/usr/src/uts/intel/ia32/os/archdep.c
+++ b/usr/src/uts/intel/ia32/os/archdep.c
@@ -317,6 +317,7 @@ setfpregs(klwp_t *lwp, fpregset_t *fp)
fpu->fpu_regs.kfpu_status = fp->fp_reg_set.fpchip_state.status;
fpu->fpu_flags |= FPU_VALID;
+ PCB_SET_UPDATE_FPU(&lwp->lwp_pcb);
}
/*
@@ -464,7 +465,7 @@ getgregs(klwp_t *lwp, gregset_t grp)
grp[REG_GSBASE] = pcb->pcb_gsbase;
if (thisthread)
kpreempt_disable();
- if (pcb->pcb_rupdate == 1) {
+ if (PCB_NEED_UPDATE_SEGS(pcb)) {
grp[REG_DS] = pcb->pcb_ds;
grp[REG_ES] = pcb->pcb_es;
grp[REG_FS] = pcb->pcb_fs;
@@ -500,7 +501,7 @@ getgregs32(klwp_t *lwp, gregset32_t grp)
if (thisthread)
kpreempt_disable();
- if (pcb->pcb_rupdate == 1) {
+ if (PCB_NEED_UPDATE_SEGS(pcb)) {
grp[GS] = (uint16_t)pcb->pcb_gs;
grp[FS] = (uint16_t)pcb->pcb_fs;
grp[DS] = (uint16_t)pcb->pcb_ds;
@@ -775,7 +776,7 @@ setgregs(klwp_t *lwp, gregset_t grp)
/*
* Ensure that we go out via update_sregs
*/
- pcb->pcb_rupdate = 1;
+ PCB_SET_UPDATE_SEGS(pcb);
lwptot(lwp)->t_post_sys = 1;
if (thisthread)
kpreempt_enable();
@@ -812,7 +813,7 @@ setgregs(klwp_t *lwp, gregset_t grp)
/*
* Ensure that we go out via update_sregs
*/
- pcb->pcb_rupdate = 1;
+ PCB_SET_UPDATE_SEGS(pcb);
lwptot(lwp)->t_post_sys = 1;
if (thisthread)
kpreempt_enable();
diff --git a/usr/src/uts/intel/ia32/os/fpu.c b/usr/src/uts/intel/ia32/os/fpu.c
index c307c97957..ddfc821579 100644
--- a/usr/src/uts/intel/ia32/os/fpu.c
+++ b/usr/src/uts/intel/ia32/os/fpu.c
@@ -61,11 +61,388 @@
#include <sys/sysmacros.h>
#include <sys/cmn_err.h>
+/*
+ * FPU Management Overview
+ * -----------------------
+ *
+ * The x86 FPU has evolved substantially since its days as the x87 coprocessor;
+ * however, many aspects of its life as a coprocessor still still around in x86.
+ *
+ * Today, when we refer to the 'FPU', we don't just mean the original x87 FPU.
+ * While that state still exists, there is much more that is covered by the FPU.
+ * Today, this includes not just traditional FPU state, but also supervisor only
+ * state. The following state is currently managed and covered logically by the
+ * idea of the FPU registers:
+ *
+ * o Traditional x87 FPU
+ * o Vector Registers (%xmm, %ymm, %zmm)
+ * o Memory Protection Extensions (MPX) Bounds Registers
+ * o Protected Key Rights Registers (PKRU)
+ * o Processor Trace data
+ *
+ * The rest of this covers how the FPU is managed and controlled, how state is
+ * saved and restored between threads, interactions with hypervisors, and other
+ * information exported to user land through aux vectors. A lot of background
+ * information is here to synthesize major parts of the Intel SDM, but
+ * unfortunately, it is not a replacement for reading it.
+ *
+ * FPU Control Registers
+ * ---------------------
+ *
+ * Because the x87 FPU began its life as a co-processor and the FPU was
+ * optional there are several bits that show up in %cr0 that we have to
+ * manipulate when dealing with the FPU. These are:
+ *
+ * o CR0.ET The 'extension type' bit. This was used originally to indicate
+ * that the FPU co-processor was present. Now it is forced on for
+ * compatibility. This is often used to verify whether or not the
+ * FPU is present.
+ *
+ * o CR0.NE The 'native error' bit. Used to indicate that native error
+ * mode should be enabled. This indicates that we should take traps
+ * on FPU errors. The OS enables this early in boot.
+ *
+ * o CR0.MP The 'Monitor Coprocessor' bit. Used to control whether or not
+ * wait/fwait instructions generate a #NM if CR0.TS is set.
+ *
+ * o CR0.EM The 'Emulation' bit. This is used to cause floating point
+ * operations (x87 through SSE4) to trap with a #UD so they can be
+ * emulated. The system never sets this bit, but makes sure it is
+ * clear on processor start up.
+ *
+ * o CR0.TS The 'Task Switched' bit. When this is turned on, a floating
+ * point operation will generate a #NM. An fwait will as well,
+ * depending on the value in CR0.MP.
+ *
+ * Our general policy is that CR0.ET, CR0.NE, and CR0.MP are always set by
+ * the system. Similarly CR0.EM is always unset by the system. CR0.TS has a more
+ * complicated role. Historically it has been used to allow running systems to
+ * restore the FPU registers lazily. This will be discussed in greater depth
+ * later on.
+ *
+ * %cr4 is also used as part of the FPU control. Specifically we need to worry
+ * about the following bits in the system:
+ *
+ * o CR4.OSFXSR This bit is used to indicate that the OS understands and
+ * supports the execution of the fxsave and fxrstor
+ * instructions. This bit is required to be set to enable
+ * the use of the SSE->SSE4 instructions.
+ *
+ * o CR4.OSXMMEXCPT This bit is used to indicate that the OS can understand
+ * and take a SIMD floating point exception (#XM). This bit
+ * is always enabled by the system.
+ *
+ * o CR4.OSXSAVE This bit is used to indicate that the OS understands and
+ * supports the execution of the xsave and xrstor family of
+ * instructions. This bit is required to use any of the AVX
+ * and newer feature sets.
+ *
+ * Because all supported processors are 64-bit, they'll always support the XMM
+ * extensions and we will enable both CR4.OXFXSR and CR4.OSXMMEXCPT in boot.
+ * CR4.OSXSAVE will be enabled and used whenever xsave is reported in cpuid.
+ *
+ * %xcr0 is used to manage the behavior of the xsave feature set and is only
+ * present on the system if xsave is supported. %xcr0 is read and written to
+ * through by the xgetbv and xsetbv instructions. This register is present
+ * whenever the xsave feature set is supported. Each bit in %xcr0 refers to a
+ * different component of the xsave state and controls whether or not that
+ * information is saved and restored. For newer feature sets like AVX and MPX,
+ * it also controls whether or not the corresponding instructions can be
+ * executed (much like CR0.OSFXSR does for the SSE feature sets).
+ *
+ * Everything in %xcr0 is around features available to users. There is also the
+ * IA32_XSS MSR which is used to control supervisor-only features that are still
+ * part of the xsave state. Bits that can be set in %xcr0 are reserved in
+ * IA32_XSS and vice versa. This is an important property that is particularly
+ * relevant to how the xsave instructions operate.
+ *
+ * Save Mechanisms
+ * ---------------
+ *
+ * When switching between running threads the FPU state needs to be saved and
+ * restored by the OS. If this state was not saved, users would rightfully
+ * complain about corrupt state. There are three mechanisms that exist on the
+ * processor for saving and restoring these state images:
+ *
+ * o fsave
+ * o fxsave
+ * o xsave
+ *
+ * fsave saves and restores only the x87 FPU and is the oldest of these
+ * mechanisms. This mechanism is never used in the kernel today because we are
+ * always running on systems that support fxsave.
+ *
+ * The fxsave and fxrstor mechanism allows the x87 FPU and the SSE register
+ * state to be saved and restored to and from a struct fxsave_state. This is the
+ * default mechanism that is used to save and restore the FPU on amd64. An
+ * important aspect of fxsave that was different from the original i386 fsave
+ * mechanism is that the restoring of FPU state with pending exceptions will not
+ * generate an exception, it will be deferred to the next use of the FPU.
+ *
+ * The final and by far the most complex mechanism is that of the xsave set.
+ * xsave allows for saving and restoring all of the traditional x86 pieces (x87
+ * and SSE), while allowing for extensions that will save the %ymm, %zmm, etc.
+ * registers.
+ *
+ * Data is saved and restored into and out of a struct xsave_state. The first
+ * part of the struct xsave_state is equivalent to the struct fxsave_state.
+ * After that, there is a header which is used to describe the remaining
+ * portions of the state. The header is a 64-byte value of which the first two
+ * uint64_t values are defined and the rest are reserved and must be zero. The
+ * first uint64_t is the xstate_bv member. This describes which values in the
+ * xsave_state are actually valid and present. This is updated on a save and
+ * used on restore. The second member is the xcomp_bv member. Its last bit
+ * determines whether or not a compressed version of the structure is used.
+ *
+ * When the uncompressed structure is used (currently the only format we
+ * support), then each state component is at a fixed offset in the structure,
+ * even if it is not being used. For example, if you only saved the AVX related
+ * state, but did not save the MPX related state, the offset would not change
+ * for any component. With the compressed format, components that aren't used
+ * are all elided (though the x87 and SSE state are always there).
+ *
+ * Unlike fxsave which saves all state, the xsave family does not always save
+ * and restore all the state that could be covered by the xsave_state. The
+ * instructions all take an argument which is a mask of what to consider. This
+ * is the same mask that will be used in the xstate_bv vector and it is also the
+ * same values that are present in %xcr0 and IA32_XSS. Though IA32_XSS is only
+ * considered with the xsaves and xrstors instructions.
+ *
+ * When a save or restore is requested, a bitwise and is performed between the
+ * requested bits and those that have been enabled in %xcr0. Only the bits that
+ * match that are then saved or restored. Others will be silently ignored by
+ * the processor. This idea is used often in the OS. We will always request that
+ * we save and restore all of the state, but only those portions that are
+ * actually enabled in %xcr0 will be touched.
+ *
+ * If a feature has been asked to be restored that is not set in the xstate_bv
+ * feature vector of the save state, then it will be set to its initial state by
+ * the processor (usually zeros). Also, when asked to save state, the processor
+ * may not write out data that is in its initial state as an optimization. This
+ * optimization only applies to saving data and not to restoring data.
+ *
+ * There are a few different variants of the xsave and xrstor instruction. They
+ * are:
+ *
+ * o xsave This is the original save instruction. It will save all of the
+ * requested data in the xsave state structure. It only saves data
+ * in the uncompressed (xcomp_bv[63] is zero) format. It may be
+ * executed at all privilege levels.
+ *
+ * o xrstor This is the original restore instruction. It will restore all of
+ * the requested data. The xrstor function can handle both the
+ * compressed and uncompressed formats. It may be executed at all
+ * privilege levels.
+ *
+ * o xsaveopt This is a variant of the xsave instruction that employs
+ * optimizations to try and only write out state that has been
+ * modified since the last time an xrstor instruction was called.
+ * The processor tracks a tuple of information about the last
+ * xrstor and tries to ensure that the same buffer is being used
+ * when this optimization is being used. However, because of the
+ * way that it tracks the xrstor buffer based on the address of it,
+ * it is not suitable for use if that buffer can be easily reused.
+ * The most common case is trying to save data to the stack in
+ * rtld. It may be executed at all privilege levels.
+ *
+ * o xsavec This is a variant of the xsave instruction that writes out the
+ * compressed form of the xsave_state. Otherwise it behaves as
+ * xsave. It may be executed at all privilege levels.
+ *
+ * o xsaves This is a variant of the xsave instruction. It is similar to
+ * xsavec in that it always writes the compressed form of the
+ * buffer. Unlike all the other forms, this instruction looks at
+ * both the user (%xcr0) and supervisor (IA32_XSS MSR) to determine
+ * what to save and restore. xsaves also implements the same
+ * optimization that xsaveopt does around modified pieces. User
+ * land may not execute the instruction.
+ *
+ * o xrstors This is a variant of the xrstor instruction. Similar to xsaves
+ * it can save and restore both the user and privileged states.
+ * Unlike xrstor it can only operate on the compressed form.
+ * User land may not execute the instruction.
+ *
+ * Based on all of these, the kernel has a precedence for what it will use.
+ * Basically, xsaves (not supported) is preferred to xsaveopt, which is
+ * preferred to xsave. A similar scheme is used when informing rtld (more later)
+ * about what it should use. xsavec is preferred to xsave. xsaveopt is not
+ * recommended due to the modified optimization not being appropriate for this
+ * use.
+ *
+ * Finally, there is one last gotcha with the xsave state. Importantly some AMD
+ * processors did not always save and restore some of the FPU exception state in
+ * some cases like Intel did. In those cases the OS will make up for this fact
+ * itself.
+ *
+ * FPU Initialization
+ * ------------------
+ *
+ * One difference with the FPU registers is that not all threads have FPU state,
+ * only those that have an lwp. Generally this means kernel threads, which all
+ * share p0 and its lwp, do not have FPU state. Though there are definitely
+ * exceptions such as kcfpoold. In the rest of this discussion we'll use thread
+ * and lwp interchangeably, just think of thread meaning a thread that has a
+ * lwp.
+ *
+ * Each lwp has its FPU state allocated in its pcb (process control block). The
+ * actual storage comes from the fpsave_cachep kmem cache. This cache is sized
+ * dynamically at start up based on the save mechanism that we're using and the
+ * amount of memory required for it. This is dynamic because the xsave_state
+ * size varies based on the supported feature set.
+ *
+ * The hardware side of the FPU is initialized early in boot before we mount the
+ * root file system. This is effectively done in fpu_probe(). This is where we
+ * make the final decision about what the save and restore mechanisms we should
+ * use are, create the fpsave_cachep kmem cache, and initialize a number of
+ * function pointers that use save and restoring logic.
+ *
+ * The thread/lwp side is a a little more involved. There are two different
+ * things that we need to concern ourselves with. The first is how the FPU
+ * resources are allocated and the second is how the FPU state is initialized
+ * for a given lwp.
+ *
+ * We allocate the FPU save state from our kmem cache as part of lwp_fp_init().
+ * This is always called unconditionally by the system as part of creating an
+ * LWP.
+ *
+ * There are three different initialization paths that we deal with. The first
+ * is when we are executing a new process. As part of exec all of the register
+ * state is reset. The exec case is particularly important because init is born
+ * like Athena, sprouting from the head of the kernel, without any true parent
+ * to fork from. The second is used whenever we fork or create a new lwp. The
+ * third is to deal with special lwps like the agent lwp.
+ *
+ * During exec, we will call fp_exec() which will initialize and set up the FPU
+ * state for the process. That will fill in the initial state for the FPU and
+ * also set that state in the FPU itself. As part of fp_exec() we also install a
+ * thread context operations vector that takes care of dealing with the saving
+ * and restoring of the FPU. These context handlers will also be called whenever
+ * an lwp is created or forked. In those cases, to initialize the FPU we will
+ * call fp_new_lwp(). Like fp_exec(), fp_new_lwp() will install a context
+ * operations vector for the new thread.
+ *
+ * Next we'll end up in the context operation fp_new_lwp(). This saves the
+ * current thread's state, initializes the new thread's state, and copies over
+ * the relevant parts of the originating thread's state. It's as this point that
+ * we also install the FPU context operations into the new thread, which ensures
+ * that all future threads that are descendants of the current one get the
+ * thread context operations (unless they call exec).
+ *
+ * To deal with some things like the agent lwp, we double check the state of the
+ * FPU in sys_rtt_common() to make sure that it has been enabled before
+ * returning to user land. In general, this path should be rare, but it's useful
+ * for the odd lwp here and there.
+ *
+ * The FPU state will remain valid most of the time. There are times that
+ * the state will be rewritten. For example in restorecontext, due to /proc, or
+ * the lwp calls exec(). Whether the context is being freed or we are resetting
+ * the state, we will call fp_free() to disable the FPU and our context.
+ *
+ * Finally, when the lwp is destroyed, it will actually destroy and free the FPU
+ * state by calling fp_lwp_cleanup().
+ *
+ * Kernel FPU Multiplexing
+ * -----------------------
+ *
+ * Just as the kernel has to maintain all of the general purpose registers when
+ * switching between scheduled threads, the same is true of the FPU registers.
+ *
+ * When a thread has FPU state, it also has a set of context operations
+ * installed. These context operations take care of making sure that the FPU is
+ * properly saved and restored during a context switch (fpsave_ctxt and
+ * fprestore_ctxt respectively). This means that the current implementation of
+ * the FPU is 'eager', when a thread is running the CPU will have its FPU state
+ * loaded. While this is always true when executing in userland, there are a few
+ * cases where this is not true in the kernel.
+ *
+ * This was not always the case. Traditionally on x86 a 'lazy' FPU restore was
+ * employed. This meant that the FPU would be saved on a context switch and the
+ * CR0.TS bit would be set. When a thread next tried to use the FPU, it would
+ * then take a #NM trap, at which point we would restore the FPU from the save
+ * area and return to user land. Given the frequency of use of the FPU alone by
+ * libc, there's no point returning to user land just to trap again.
+ *
+ * There are a few cases though where the FPU state may need to be changed for a
+ * thread on its behalf. The most notable cases are in the case of processes
+ * using /proc, restorecontext, forking, etc. In all of these cases the kernel
+ * will force a threads FPU state to be saved into the PCB through the fp_save()
+ * function. Whenever the FPU is saved, then the FPU_VALID flag is set on the
+ * pcb. This indicates that the save state holds currently valid data. As a side
+ * effect of this, CR0.TS will be set. To make sure that all of the state is
+ * updated before returning to user land, in these cases, we set a flag on the
+ * PCB that says the FPU needs to be updated. This will make sure that we take
+ * the slow path out of a system call to fix things up for the thread. Due to
+ * the fact that this is a rather rare case, effectively setting the equivalent
+ * of t_postsys is acceptable.
+ *
+ * CR0.TS will be set after a save occurs and cleared when a restore occurs.
+ * Generally this means it will be cleared immediately by the new thread that is
+ * running in a context switch. However, this isn't the case for kernel threads.
+ * They currently operate with CR0.TS set as no kernel state is restored for
+ * them. This means that using the FPU will cause a #NM and panic.
+ *
+ * The FPU_VALID flag on the currently executing thread's pcb is meant to track
+ * what the value of CR0.TS should be. If it is set, then CR0.TS will be set.
+ * However, because we eagerly restore, the only time that CR0.TS should be set
+ * for a non-kernel thread is during operations where it will be cleared before
+ * returning to user land and importantly, the only data that is in it is its
+ * own.
+ *
+ * FPU Exceptions
+ * --------------
+ *
+ * Certain operations can cause the kernel to take traps due to FPU activity.
+ * Generally these events will cause a user process to receive a SIGFPU and if
+ * the kernel receives it in kernel context, we will die. Traditionally the #NM
+ * (Device Not Available / No Math) exception generated by CR0.TS would have
+ * caused us to restore the FPU. Now it is a fatal event regardless of whether
+ * or not user land causes it.
+ *
+ * While there are some cases where the kernel uses the FPU, it is up to the
+ * kernel to use the FPU in a way such that it cannot receive a trap or to use
+ * the appropriate trap protection mechanisms.
+ *
+ * Hypervisors
+ * -----------
+ *
+ * When providing support for hypervisors things are a little bit more
+ * complicated because the FPU is not virtualized at all. This means that they
+ * need to save and restore the FPU and %xcr0 across entry and exit to the
+ * guest. To facilitate this, we provide a series of APIs in <sys/hma.h>. These
+ * allow us to use the full native state to make sure that we are always saving
+ * and restoring the full FPU that the host sees, even when the guest is using a
+ * subset.
+ *
+ * One tricky aspect of this is that the guest may be using a subset of %xcr0
+ * and therefore changing our %xcr0 on the fly. It is vital that when we're
+ * saving and restoring the FPU that we always use the largest %xcr0 contents
+ * otherwise we will end up leaving behind data in it.
+ *
+ * ELF PLT Support
+ * ---------------
+ *
+ * rtld has to preserve a subset of the FPU when it is saving and restoring
+ * registers due to the amd64 SYS V ABI. See cmd/sgs/rtld/amd64/boot_elf.s for
+ * more information. As a result, we set up an aux vector that contains
+ * information about what save and restore mechanisms it should be using and
+ * the sizing thereof based on what the kernel supports. This is passed down in
+ * a series of aux vectors SUN_AT_FPTYPE and SUN_AT_FPSIZE. This information is
+ * initialized in fpu_subr.c.
+ */
+
kmem_cache_t *fpsave_cachep;
/* Legacy fxsave layout + xsave header + ymm */
#define AVX_XSAVE_SIZE (512 + 64 + 256)
+/*
+ * Various sanity checks.
+ */
+CTASSERT(sizeof (struct fxsave_state) == 512);
+CTASSERT(sizeof (struct fnsave_state) == 108);
+CTASSERT((offsetof(struct fxsave_state, fx_xmm[0]) & 0xf) == 0);
+CTASSERT(sizeof (struct xsave_state) >= AVX_XSAVE_SIZE);
+
/*CSTYLED*/
#pragma align 16 (sse_initial)
@@ -150,20 +527,12 @@ const struct fnsave_state x87_initial = {
/* rest of structure is zero */
};
-#if defined(__amd64)
/*
* This vector is patched to xsave_ctxt() if we discover we have an
* XSAVE-capable chip in fpu_probe.
*/
void (*fpsave_ctxt)(void *) = fpxsave_ctxt;
-#elif defined(__i386)
-/*
- * This vector is patched to fpxsave_ctxt() if we discover we have an
- * SSE-capable chip in fpu_probe(). It is patched to xsave_ctxt
- * if we discover we have an XSAVE-capable chip in fpu_probe.
- */
-void (*fpsave_ctxt)(void *) = fpnsave_ctxt;
-#endif
+void (*fprestore_ctxt)(void *) = fpxrestore_ctxt;
/*
* This function pointer is changed to xsaveopt if the CPU is xsaveopt capable.
@@ -187,9 +556,6 @@ fp_new_lwp(kthread_id_t t, kthread_id_t ct)
struct fpu_ctx *fp; /* parent fpu context */
struct fpu_ctx *cfp; /* new fpu context */
struct fxsave_state *fx, *cfx;
-#if defined(__i386)
- struct fnsave_state *fn, *cfn;
-#endif
struct xsave_state *cxs;
ASSERT(fp_kind != FP_NO);
@@ -207,15 +573,13 @@ fp_new_lwp(kthread_id_t t, kthread_id_t ct)
cfp->fpu_regs.kfpu_status = 0;
cfp->fpu_regs.kfpu_xstatus = 0;
+ /*
+ * Make sure that the child's FPU is cleaned up and made ready for user
+ * land.
+ */
+ PCB_SET_UPDATE_FPU(&ct->t_lwp->lwp_pcb);
+
switch (fp_save_mech) {
-#if defined(__i386)
- case FP_FNSAVE:
- fn = fp->fpu_regs.kfpu_u.kfpu_fn;
- cfn = cfp->fpu_regs.kfpu_u.kfpu_fn;
- bcopy(&x87_initial, cfn, sizeof (*cfn));
- cfn->f_fcw = fn->f_fcw;
- break;
-#endif
case FP_FXSAVE:
fx = fp->fpu_regs.kfpu_u.kfpu_fx;
cfx = cfp->fpu_regs.kfpu_u.kfpu_fx;
@@ -244,14 +608,13 @@ fp_new_lwp(kthread_id_t t, kthread_id_t ct)
/*NOTREACHED*/
}
- installctx(ct, cfp,
- fpsave_ctxt, NULL, fp_new_lwp, fp_new_lwp, NULL, fp_free);
/*
- * Now, when the new lwp starts running, it will take a trap
- * that will be handled inline in the trap table to cause
- * the appropriate f*rstor instruction to load the save area we
- * constructed above directly into the hardware.
+ * Mark that both the parent and child need to have the FPU cleaned up
+ * before returning to user land.
*/
+
+ installctx(ct, cfp, fpsave_ctxt, fprestore_ctxt, fp_new_lwp,
+ fp_new_lwp, NULL, fp_free);
}
/*
@@ -313,11 +676,6 @@ fp_save(struct fpu_ctx *fp)
ASSERT(curthread->t_lwp && fp == &curthread->t_lwp->lwp_pcb.pcb_fpu);
switch (fp_save_mech) {
-#if defined(__i386)
- case FP_FNSAVE:
- fpsave(fp->fpu_regs.kfpu_u.kfpu_fn);
- break;
-#endif
case FP_FXSAVE:
fpxsave(fp->fpu_regs.kfpu_u.kfpu_fx);
break;
@@ -331,6 +689,18 @@ fp_save(struct fpu_ctx *fp)
}
fp->fpu_flags |= FPU_VALID;
+
+ /*
+ * We save the FPU as part of forking, execing, modifications via /proc,
+ * restorecontext, etc. As such, we need to make sure that we return to
+ * userland with valid state in the FPU. If we're context switched out
+ * before we hit sys_rtt_common() we'll end up having restored the FPU
+ * as part of the context ops operations. The restore logic always makes
+ * sure that FPU_VALID is set before doing a restore so we don't restore
+ * it a second time.
+ */
+ PCB_SET_UPDATE_FPU(&curthread->t_lwp->lwp_pcb);
+
kpreempt_enable();
}
@@ -344,11 +714,6 @@ void
fp_restore(struct fpu_ctx *fp)
{
switch (fp_save_mech) {
-#if defined(__i386)
- case FP_FNSAVE:
- fprestore(fp->fpu_regs.kfpu_u.kfpu_fn);
- break;
-#endif
case FP_FXSAVE:
fpxrestore(fp->fpu_regs.kfpu_u.kfpu_fx);
break;
@@ -364,6 +729,33 @@ fp_restore(struct fpu_ctx *fp)
fp->fpu_flags &= ~FPU_VALID;
}
+/*
+ * Reset the FPU such that it is in a valid state for a new thread that is
+ * coming out of exec. The FPU will be in a usable state at this point. At this
+ * point we know that the FPU state has already been allocated and if this
+ * wasn't an init process, then it will have had fp_free() previously called.
+ */
+void
+fp_exec(void)
+{
+ struct fpu_ctx *fp = &ttolwp(curthread)->lwp_pcb.pcb_fpu;
+
+ if (fp_save_mech == FP_XSAVE) {
+ fp->fpu_xsave_mask = XFEATURE_FP_ALL;
+ }
+
+ /*
+ * Make sure that we're not preempted in the middle of initializing the
+ * FPU on CPU.
+ */
+ kpreempt_disable();
+ installctx(curthread, fp, fpsave_ctxt, fprestore_ctxt, fp_new_lwp,
+ fp_new_lwp, NULL, fp_free);
+ fpinit();
+ fp->fpu_flags = FPU_EN;
+ kpreempt_enable();
+}
+
/*
* Seeds the initial state for the current thread. The possibilities are:
@@ -371,7 +763,7 @@ fp_restore(struct fpu_ctx *fp)
* initialization: Load the FPU state from the LWP state.
* 2. The FPU state has not been externally modified: Load a clean state.
*/
-static void
+void
fp_seed(void)
{
struct fpu_ctx *fp = &ttolwp(curthread)->lwp_pcb.pcb_fpu;
@@ -386,8 +778,8 @@ fp_seed(void)
fp->fpu_xsave_mask = XFEATURE_FP_ALL;
}
- installctx(curthread, fp,
- fpsave_ctxt, NULL, fp_new_lwp, fp_new_lwp, NULL, fp_free);
+ installctx(curthread, fp, fpsave_ctxt, fprestore_ctxt, fp_new_lwp,
+ fp_new_lwp, NULL, fp_free);
fpinit();
/*
@@ -452,11 +844,6 @@ fp_lwp_dup(struct _klwp *lwp)
size_t sz;
switch (fp_save_mech) {
-#if defined(__i386)
- case FP_FNSAVE:
- sz = sizeof (struct fnsave_state);
- break;
-#endif
case FP_FXSAVE:
sz = sizeof (struct fxsave_state);
break;
@@ -474,119 +861,6 @@ fp_lwp_dup(struct _klwp *lwp)
lwp->lwp_pcb.pcb_fpu.fpu_regs.kfpu_u.kfpu_generic = xp;
}
-
-/*
- * This routine is called from trap() when User thread takes No Extension
- * Fault. The possiblities are:
- * 1. User thread has executed a FP instruction for the first time.
- * Save current FPU context if any. Initialize FPU, setup FPU
- * context for the thread and enable FP hw.
- * 2. Thread's pcb has a valid FPU state: Restore the FPU state and
- * enable FP hw.
- *
- * Note that case #2 is inlined in the trap table.
- */
-int
-fpnoextflt(struct regs *rp)
-{
- struct fpu_ctx *fp = &ttolwp(curthread)->lwp_pcb.pcb_fpu;
-
-#if !defined(__lint)
- ASSERT(sizeof (struct fxsave_state) == 512 &&
- sizeof (struct fnsave_state) == 108);
- ASSERT((offsetof(struct fxsave_state, fx_xmm[0]) & 0xf) == 0);
-
- ASSERT(sizeof (struct xsave_state) >= AVX_XSAVE_SIZE);
-
-#if defined(__i386)
- ASSERT(sizeof (struct _fpu) == sizeof (struct __old_fpu));
-#endif /* __i386 */
-#endif /* !__lint */
-
- kpreempt_disable();
- /*
- * Now we can enable the interrupts.
- * (NOTE: fp-no-coprocessor comes thru interrupt gate)
- */
- sti();
-
- if (!fpu_exists) { /* check for FPU hw exists */
- if (fp_kind == FP_NO) {
- uint32_t inst;
-
- /*
- * When the system has no floating point support,
- * i.e. no FP hardware and no emulator, skip the
- * two kinds of FP instruction that occur in
- * fpstart. Allows processes that do no real FP
- * to run normally.
- */
- if (fuword32((void *)rp->r_pc, &inst) != -1 &&
- ((inst & 0xFFFF) == 0x7dd9 ||
- (inst & 0xFFFF) == 0x6dd9)) {
- rp->r_pc += 3;
- kpreempt_enable();
- return (0);
- }
- }
-
- /*
- * If we have neither a processor extension nor
- * an emulator, kill the process OR panic the kernel.
- */
- kpreempt_enable();
- return (1); /* error */
- }
-
-#if !defined(__xpv) /* XXPV Is this ifdef needed now? */
- /*
- * A paranoid cross-check: for the SSE case, ensure that %cr4 is
- * configured to enable fully fledged (%xmm) fxsave/fxrestor on
- * this CPU. For the non-SSE case, ensure that it isn't.
- */
- ASSERT(((fp_kind & __FP_SSE) &&
- (getcr4() & CR4_OSFXSR) == CR4_OSFXSR) ||
- (!(fp_kind & __FP_SSE) &&
- (getcr4() & (CR4_OSXMMEXCPT|CR4_OSFXSR)) == 0));
-#endif
-
- if (fp->fpu_flags & FPU_EN) {
- /* case 2 */
- fp_restore(fp);
- } else {
- /* case 1 */
- fp_seed();
- }
- kpreempt_enable();
- return (0);
-}
-
-
-/*
- * Handle a processor extension overrun fault
- * Returns non zero for error.
- *
- * XXX Shouldn't this just be abolished given that we're not supporting
- * anything prior to Pentium?
- */
-
-/* ARGSUSED */
-int
-fpextovrflt(struct regs *rp)
-{
-#if !defined(__xpv) /* XXPV Do we need this ifdef either */
- ulong_t cur_cr0;
-
- ASSERT(fp_kind != FP_NO);
-
- cur_cr0 = getcr0();
- fpinit(); /* initialize the FPU hardware */
- setcr0(cur_cr0);
-#endif
- sti();
- return (1); /* error, send SIGSEGV signal to the thread */
-}
-
/*
* Handle a processor extension error fault
* Returns non zero for error.
@@ -622,14 +896,6 @@ fpexterrflt(struct regs *rp)
/* clear exception flags in saved state, as if by fnclex */
switch (fp_save_mech) {
-#if defined(__i386)
- case FP_FNSAVE:
- fpsw = fp->fpu_regs.kfpu_u.kfpu_fn->f_fsw;
- fpcw = fp->fpu_regs.kfpu_u.kfpu_fn->f_fcw;
- fp->fpu_regs.kfpu_u.kfpu_fn->f_fsw &= ~FPS_SW_EFLAGS;
- break;
-#endif
-
case FP_FXSAVE:
fpsw = fp->fpu_regs.kfpu_u.kfpu_fx->fx_fsw;
fpcw = fp->fpu_regs.kfpu_u.kfpu_fx->fx_fcw;
@@ -811,11 +1077,6 @@ fpsetcw(uint16_t fcw, uint32_t mxcsr)
fp_save(fp);
switch (fp_save_mech) {
-#if defined(__i386)
- case FP_FNSAVE:
- fp->fpu_regs.kfpu_u.kfpu_fn->f_fcw = fcw;
- break;
-#endif
case FP_FXSAVE:
fx = fp->fpu_regs.kfpu_u.kfpu_fx;
fx->fx_fcw = fcw;
diff --git a/usr/src/uts/intel/ia32/os/sundep.c b/usr/src/uts/intel/ia32/os/sundep.c
index 3911d6ebaa..cfb4552287 100644
--- a/usr/src/uts/intel/ia32/os/sundep.c
+++ b/usr/src/uts/intel/ia32/os/sundep.c
@@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2017 Joyent, Inc.
+ * Copyright 2018 Joyent, Inc.
*/
/* Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. */
@@ -393,12 +393,12 @@ lwp_forkregs(klwp_t *lwp, klwp_t *clwp)
struct pcb *pcb = &clwp->lwp_pcb;
struct regs *rp = lwptoregs(lwp);
- if (pcb->pcb_rupdate == 0) {
+ if (!PCB_NEED_UPDATE_SEGS(pcb)) {
pcb->pcb_ds = rp->r_ds;
pcb->pcb_es = rp->r_es;
pcb->pcb_fs = rp->r_fs;
pcb->pcb_gs = rp->r_gs;
- pcb->pcb_rupdate = 1;
+ PCB_SET_UPDATE_SEGS(pcb);
lwptot(clwp)->t_post_sys = 1;
}
ASSERT(lwptot(clwp)->t_post_sys);
@@ -436,22 +436,22 @@ lwp_pcb_exit(void)
* as a segment-not-present trap.
*
* Here we save the current values from the lwp regs into the pcb
- * and set pcb->pcb_rupdate to 1 to tell the rest of the kernel
- * that the pcb copy of the segment registers is the current one.
- * This ensures the lwp's next trip to user land via update_sregs.
- * Finally we set t_post_sys to ensure that no system call fast-path's
- * its way out of the kernel via sysret.
+ * and or PCB_UPDATE_SEGS (1) in pcb->pcb_rupdate to tell the rest
+ * of the kernel that the pcb copy of the segment registers is the
+ * current one. This ensures the lwp's next trip to user land via
+ * update_sregs. Finally we set t_post_sys to ensure that no
+ * system call fast-path's its way out of the kernel via sysret.
*
- * (This means that we need to have interrupts disabled when we test
- * t->t_post_sys in the syscall handlers; if the test fails, we need
- * to keep interrupts disabled until we return to userland so we can't
- * be switched away.)
+ * (This means that we need to have interrupts disabled when we
+ * test t->t_post_sys in the syscall handlers; if the test fails,
+ * we need to keep interrupts disabled until we return to userland
+ * so we can't be switched away.)
*
- * As a result of all this, we don't really have to do a whole lot if
- * the thread is just mucking about in the kernel, switching on and
- * off the cpu for whatever reason it feels like. And yet we still
- * preserve fast syscalls, cause if we -don't- get descheduled,
- * we never come here either.
+ * As a result of all this, we don't really have to do a whole lot
+ * if the thread is just mucking about in the kernel, switching on
+ * and off the cpu for whatever reason it feels like. And yet we
+ * still preserve fast syscalls, cause if we -don't- get
+ * descheduled, we never come here either.
*/
#define VALID_LWP_DESC(udp) ((udp)->usd_type == SDT_MEMRWA && \
@@ -468,7 +468,7 @@ lwp_segregs_save(klwp_t *lwp)
ASSERT(VALID_LWP_DESC(&pcb->pcb_fsdesc));
ASSERT(VALID_LWP_DESC(&pcb->pcb_gsdesc));
- if (pcb->pcb_rupdate == 0) {
+ if (!PCB_NEED_UPDATE_SEGS(pcb)) {
rp = lwptoregs(lwp);
/*
@@ -482,7 +482,7 @@ lwp_segregs_save(klwp_t *lwp)
pcb->pcb_es = rp->r_es;
pcb->pcb_fs = rp->r_fs;
pcb->pcb_gs = rp->r_gs;
- pcb->pcb_rupdate = 1;
+ PCB_SET_UPDATE_SEGS(pcb);
lwp->lwp_thread->t_post_sys = 1;
}
#endif /* __amd64 */
@@ -833,7 +833,8 @@ lwp_installctx(klwp_t *lwp)
* On the amd64 kernel, the context handlers are responsible for
* virtualizing %ds, %es, %fs, and %gs to the lwp. The register
* values are only ever changed via sys_rtt when the
- * pcb->pcb_rupdate == 1. Only sys_rtt gets to clear the bit.
+ * PCB_UPDATE_SEGS bit (1) is set in pcb->pcb_rupdate. Only
+ * sys_rtt gets to clear the bit.
*
* On the i386 kernel, the context handlers are responsible for
* virtualizing %gs/%fs to the lwp by updating the per-cpu GDTs
@@ -964,7 +965,7 @@ setregs(uarg_t *args)
pcb->pcb_ds = rp->r_ds;
pcb->pcb_es = rp->r_es;
- pcb->pcb_rupdate = 1;
+ PCB_SET_UPDATE_SEGS(pcb);
#elif defined(__i386)
@@ -991,17 +992,15 @@ setregs(uarg_t *args)
t->t_post_sys = 1;
/*
- * Here we initialize minimal fpu state.
- * The rest is done at the first floating
- * point instruction that a process executes.
- */
- pcb->pcb_fpu.fpu_flags = 0;
-
- /*
* Add the lwp context handlers that virtualize segment registers,
* and/or system call stacks etc.
*/
lwp_installctx(lwp);
+
+ /*
+ * Reset the FPU flags and then initialize the FPU for this lwp.
+ */
+ fp_exec();
}
user_desc_t *
diff --git a/usr/src/uts/intel/ia32/os/sysi86.c b/usr/src/uts/intel/ia32/os/sysi86.c
index f0cba7d7d5..e3f4e2608c 100644
--- a/usr/src/uts/intel/ia32/os/sysi86.c
+++ b/usr/src/uts/intel/ia32/os/sysi86.c
@@ -620,7 +620,7 @@ setdscr(struct ssd *ssd)
}
#if defined(__amd64)
- if (pcb->pcb_rupdate == 1) {
+ if (PCB_NEED_UPDATE_SEGS(pcb)) {
if (ssd->sel == pcb->pcb_ds ||
ssd->sel == pcb->pcb_es ||
ssd->sel == pcb->pcb_fs ||
diff --git a/usr/src/uts/intel/ia32/syscall/lwp_private.c b/usr/src/uts/intel/ia32/syscall/lwp_private.c
index 79e9076ee0..479a800d9a 100644
--- a/usr/src/uts/intel/ia32/syscall/lwp_private.c
+++ b/usr/src/uts/intel/ia32/syscall/lwp_private.c
@@ -21,10 +21,9 @@
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright (c) 2018, Joyent, Inc.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <sys/param.h>
#include <sys/types.h>
#include <sys/disp.h>
@@ -72,12 +71,12 @@ lwp_setprivate(klwp_t *lwp, int which, uintptr_t base)
* of zero for %fs and %gs to use the 64-bit fs_base and gs_base
* respectively.
*/
- if (pcb->pcb_rupdate == 0) {
+ if (!PCB_NEED_UPDATE_SEGS(pcb)) {
pcb->pcb_ds = rp->r_ds;
pcb->pcb_es = rp->r_es;
pcb->pcb_fs = rp->r_fs;
pcb->pcb_gs = rp->r_gs;
- pcb->pcb_rupdate = 1;
+ PCB_SET_UPDATE_SEGS(pcb);
t->t_post_sys = 1;
}
ASSERT(t->t_post_sys);
@@ -171,7 +170,7 @@ lwp_getprivate(klwp_t *lwp, int which, uintptr_t base)
case _LWP_FSBASE:
if ((sbase = pcb->pcb_fsbase) != 0) {
if (lwp_getdatamodel(lwp) == DATAMODEL_NATIVE) {
- if (pcb->pcb_rupdate == 1) {
+ if (PCB_NEED_UPDATE_SEGS(pcb)) {
if (pcb->pcb_fs == 0)
break;
} else {
@@ -179,7 +178,7 @@ lwp_getprivate(klwp_t *lwp, int which, uintptr_t base)
break;
}
} else {
- if (pcb->pcb_rupdate == 1) {
+ if (PCB_NEED_UPDATE_SEGS(pcb)) {
if (pcb->pcb_fs == LWPFS_SEL)
break;
} else {
@@ -193,7 +192,7 @@ lwp_getprivate(klwp_t *lwp, int which, uintptr_t base)
case _LWP_GSBASE:
if ((sbase = pcb->pcb_gsbase) != 0) {
if (lwp_getdatamodel(lwp) == DATAMODEL_NATIVE) {
- if (pcb->pcb_rupdate == 1) {
+ if (PCB_NEED_UPDATE_SEGS(pcb)) {
if (pcb->pcb_gs == 0)
break;
} else {
@@ -201,7 +200,7 @@ lwp_getprivate(klwp_t *lwp, int which, uintptr_t base)
break;
}
} else {
- if (pcb->pcb_rupdate == 1) {
+ if (PCB_NEED_UPDATE_SEGS(pcb)) {
if (pcb->pcb_gs == LWPGS_SEL)
break;
} else {
diff --git a/usr/src/uts/intel/sys/archsystm.h b/usr/src/uts/intel/sys/archsystm.h
index 4d14e58880..0c9ceac7be 100644
--- a/usr/src/uts/intel/sys/archsystm.h
+++ b/usr/src/uts/intel/sys/archsystm.h
@@ -55,11 +55,8 @@ extern void mfence_insn(void);
extern uint16_t getgs(void);
extern void setgs(uint16_t);
-extern void patch_sse(void);
-extern void patch_sse2(void);
#endif
-extern void patch_xsave(void);
extern kmem_cache_t *fpsave_cachep;
extern void cli(void);
diff --git a/usr/src/uts/intel/sys/fp.h b/usr/src/uts/intel/sys/fp.h
index fe5471e855..9841fe1c3b 100644
--- a/usr/src/uts/intel/sys/fp.h
+++ b/usr/src/uts/intel/sys/fp.h
@@ -236,13 +236,14 @@ struct fxsave_state {
* 13.4.2 of the Intel 64 and IA-32 Architectures Software Developer’s Manual,
* Volume 1 (IASDv1). The extended portion is documented in section 13.4.3.
*
- * Our size is at least AVX_XSAVE_SIZE (832 bytes), asserted in fpnoextflt().
- * Enabling additional xsave-related CPU features requires an increase in the
- * size. We dynamically allocate the per-lwp xsave area at runtime, based on
- * the size needed for the CPU-specific features. This xsave_state structure
- * simply defines our historical layout for the beginning of the xsave area. The
- * locations and size of new, extended, components is determined dynamically by
- * querying the CPU. See the xsave_info structure in cpuid.c.
+ * Our size is at least AVX_XSAVE_SIZE (832 bytes), which is asserted
+ * statically. Enabling additional xsave-related CPU features requires an
+ * increase in the size. We dynamically allocate the per-lwp xsave area at
+ * runtime, based on the size needed for the CPU-specific features. This
+ * xsave_state structure simply defines our historical layout for the beginning
+ * of the xsave area. The locations and size of new, extended, components is
+ * determined dynamically by querying the CPU. See the xsave_info structure in
+ * cpuid.c.
*
* xsave component usage is tracked using bits in the xs_xstate_bv field. The
* components are documented in section 13.1 of IASDv1. For easy reference,
@@ -301,7 +302,6 @@ extern uint32_t sse_mxcsr_mask;
extern void fpu_probe(void);
extern uint_t fpu_initial_probe(void);
-extern int fpu_probe_pentium_fdivbug(void);
extern void fpu_auxv_info(int *, size_t *);
@@ -315,6 +315,10 @@ extern void xsaveopt_excp_clr_ctxt(void *);
extern void (*fpsave_ctxt)(void *);
extern void (*xsavep)(struct xsave_state *, uint64_t);
+extern void fpxrestore_ctxt(void *);
+extern void xrestore_ctxt(void *);
+extern void (*fprestore_ctxt)(void *);
+
extern void fxsave_insn(struct fxsave_state *);
extern void fpsave(struct fnsave_state *);
extern void fprestore(struct fnsave_state *);
@@ -335,11 +339,11 @@ extern uint32_t fpgetcwsw(void);
extern uint32_t fpgetmxcsr(void);
struct regs;
-extern int fpnoextflt(struct regs *);
-extern int fpextovrflt(struct regs *);
extern int fpexterrflt(struct regs *);
extern int fpsimderrflt(struct regs *);
extern void fpsetcw(uint16_t, uint32_t);
+extern void fp_seed(void);
+extern void fp_exec(void);
struct _klwp;
extern void fp_lwp_init(struct _klwp *);
extern void fp_lwp_cleanup(struct _klwp *);
diff --git a/usr/src/uts/intel/sys/pcb.h b/usr/src/uts/intel/sys/pcb.h
index defd116eba..e7e2e2cdce 100644
--- a/usr/src/uts/intel/sys/pcb.h
+++ b/usr/src/uts/intel/sys/pcb.h
@@ -21,6 +21,7 @@
/*
* Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2018, Joyent, Inc.
*/
#ifndef _SYS_PCB_H
@@ -51,7 +52,6 @@ typedef struct pcb {
uint_t pcb_flags; /* state flags; cleared on fork */
greg_t pcb_drstat; /* status debug register (%dr6) */
unsigned char pcb_instr; /* /proc: instruction at stop */
-#if defined(__amd64)
unsigned char pcb_rupdate; /* new register values in pcb -> regs */
uintptr_t pcb_fsbase;
uintptr_t pcb_gsbase;
@@ -59,7 +59,6 @@ typedef struct pcb {
selector_t pcb_es;
selector_t pcb_fs;
selector_t pcb_gs;
-#endif /* __amd64 */
user_desc_t pcb_fsdesc; /* private per-lwp %fs descriptors */
user_desc_t pcb_gsdesc; /* private per-lwp %gs descriptors */
} pcb_t;
@@ -77,6 +76,21 @@ typedef struct pcb {
#define REQUEST_NOSTEP 0x200 /* request pending to disable single-step */
#define ASYNC_HWERR 0x400 /* hardware error has corrupted context */
+/* pcb_rupdate values */
+#define PCB_UPDATE_SEGS 0x01 /* Update segment registers */
+#define PCB_UPDATE_FPU 0x02 /* Update FPU registers */
+
+#define PCB_SET_UPDATE_SEGS(pcb) ((pcb)->pcb_rupdate |= PCB_UPDATE_SEGS)
+#define PCB_SET_UPDATE_FPU(pcb) ((pcb)->pcb_rupdate |= PCB_UPDATE_FPU)
+#define PCB_NEED_UPDATE_SEGS(pcb) \
+ (((pcb)->pcb_rupdate & PCB_UPDATE_SEGS) != 0)
+#define PCB_NEED_UPDATE_FPU(pcb) \
+ (((pcb)->pcb_rupdate & PCB_UPDATE_FPU) != 0)
+#define PCB_NEED_UPDATE(pcb) \
+ (PCB_NEED_UPDATE_FPU(pcb) || PCB_NEED_UPDATE_SEGS(pcb))
+#define PCB_CLEAR_UPDATE_SEGS(pcb) ((pcb)->pcb_rupdate &= ~PCB_UPDATE_SEGS)
+#define PCB_CLEAR_UPDATE_FPU(pcb) ((pcb)->pcb_rupdate &= ~PCB_UPDATE_FPU)
+
/* fpu_flags */
#define FPU_EN 0x1 /* flag signifying fpu in use */
#define FPU_VALID 0x2 /* fpu_regs has valid fpu state */