From d0158222a5936ac26b7a03241b7d2df18cc544c8 Mon Sep 17 00:00:00 2001 From: Robert Mustacchi Date: Wed, 13 Jun 2018 17:09:14 +0000 Subject: 9596 Initial xsave xstate_bv should not include all features 9595 rtld should conditionally save AVX-512 state Reviewed by: John Levon Reviewed by: Patrick Mooney Reviewed by: Jerry Jelinek Reviewed by: Bryan Cantrill Reviewed by: Toomas Soome Approved by: Richard Lowe --- usr/src/cmd/ptools/pargs/pargs.c | 24 +- usr/src/cmd/sgs/elfdump/common/corenote.c | 4 +- usr/src/cmd/sgs/libconv/common/corenote.c | 18 +- usr/src/cmd/sgs/libconv/common/corenote.msg | 10 +- usr/src/cmd/sgs/rtld/amd64/_setup.c | 79 ++- usr/src/cmd/sgs/rtld/amd64/boot_elf.s | 942 ++++++++++++---------------- usr/src/uts/common/exec/elf/elf.c | 27 +- usr/src/uts/common/sys/auxv.h | 7 +- usr/src/uts/common/sys/auxv_386.h | 15 +- usr/src/uts/common/sys/user.h | 4 +- usr/src/uts/i86pc/os/fpu_subr.c | 327 ++++------ usr/src/uts/intel/ia32/os/fpu.c | 7 +- usr/src/uts/intel/sys/fp.h | 10 +- usr/src/uts/intel/sys/x86_archext.h | 11 + 14 files changed, 698 insertions(+), 787 deletions(-) diff --git a/usr/src/cmd/ptools/pargs/pargs.c b/usr/src/cmd/ptools/pargs/pargs.c index 4eee215547..aa7b74dce9 100644 --- a/usr/src/cmd/ptools/pargs/pargs.c +++ b/usr/src/cmd/ptools/pargs/pargs.c @@ -23,7 +23,7 @@ * Use is subject to license terms. */ /* - * Copyright 2016 Joyent, Inc. + * Copyright (c) 2018, Joyent, Inc. */ /* @@ -306,15 +306,15 @@ unctrl_str(const char *src, int escape_slash, int *unprintable) * a set of safe characters and adding those few common punctuation characters * which are known to be safe. The rules are: * - * If this is a printable character (graph), and not punctuation, it is - * safe to leave unquoted. + * If this is a printable character (graph), and not punctuation, it is + * safe to leave unquoted. * - * If it's one of known hard-coded safe characters, it's also safe to leave - * unquoted. + * If it's one of the known hard-coded safe characters, it's also safe to + * leave unquoted. * - * Otherwise, the entire argument must be quoted. + * Otherwise, the entire argument must be quoted. * - * This will cause some strings to be unecessarily quoted, but it is safer than + * This will cause some strings to be unnecessarily quoted, but it is safer than * having a character unintentionally interpreted by the shell. */ static int @@ -357,12 +357,12 @@ quote_string_ascii(pargs_data_t *datap, char *src) * with by unctrl_str(). We make the following subtitution when we * encounter a single quote: * - * ' = '"'"' + * ' = '"'"' * * In addition, we put single quotes around the entire argument. For * example: * - * foo'bar = 'foo'"'"'bar' + * foo'bar = 'foo'"'"'bar' */ dstlen = strlen(src) + 3 + 4 * quote_count; dst = safe_zalloc(dstlen); @@ -711,7 +711,7 @@ at_str(long val, char *instr, size_t n, char *str) */ #define FMT_AV(s, n, hwcap, mask, name) \ - if ((hwcap) & (mask)) \ + if ((hwcap) & (mask)) \ (void) snprintf(s, n, "%s" name " | ", s) /*ARGSUSED*/ @@ -837,7 +837,9 @@ static struct aux_id aux_arr[] = { { AT_SUN_BRAND_AUX1, "AT_SUN_BRAND_AUX1", at_null }, { AT_SUN_BRAND_AUX2, "AT_SUN_BRAND_AUX2", at_null }, { AT_SUN_BRAND_AUX3, "AT_SUN_BRAND_AUX3", at_null }, - { AT_SUN_COMMPAGE, "AT_SUN_COMMPAGE", at_null } + { AT_SUN_COMMPAGE, "AT_SUN_COMMPAGE", at_null }, + { AT_SUN_FPTYPE, "AT_SUN_FPTYPE", at_null }, + { AT_SUN_FPSIZE, "AT_SUN_FPSIZE", at_null } }; #define N_AT_ENTS (sizeof (aux_arr) / sizeof (struct aux_id)) diff --git a/usr/src/cmd/sgs/elfdump/common/corenote.c b/usr/src/cmd/sgs/elfdump/common/corenote.c index 7de5e9dfcc..0777025523 100644 --- a/usr/src/cmd/sgs/elfdump/common/corenote.c +++ b/usr/src/cmd/sgs/elfdump/common/corenote.c @@ -25,7 +25,7 @@ */ /* * Copyright 2012 DEY Storage Systems, Inc. All rights reserved. - * Copyright (c) 2013, Joyent, Inc. All rights reserved. + * Copyright (c) 2018, Joyent, Inc. */ #include @@ -498,6 +498,8 @@ dump_auxv(note_state_t *state, const char *title) case AT_SUN_GID: case AT_SUN_RGID: case AT_SUN_LPAGESZ: + case AT_SUN_FPSIZE: + case AT_SUN_FPTYPE: num_fmt = SL_FMT_NUM_DEC; break; diff --git a/usr/src/cmd/sgs/libconv/common/corenote.c b/usr/src/cmd/sgs/libconv/common/corenote.c index 578d0e2774..02b3e5d59b 100644 --- a/usr/src/cmd/sgs/libconv/common/corenote.c +++ b/usr/src/cmd/sgs/libconv/common/corenote.c @@ -25,7 +25,7 @@ */ /* * Copyright 2012 DEY Storage Systems, Inc. All rights reserved. - * Copyright 2016 Joyent, Inc. + * Copyright (c) 2018 Joyent, Inc. */ /* @@ -105,20 +105,22 @@ conv_cnote_auxv_type(Word type, Conv_fmt_flags_t fmt_flags, static const conv_ds_msg_t ds_types_2000_2011 = { CONV_DS_MSG_INIT(2000, types_2000_2011) }; - static const Msg types_2014_2024[] = { + static const Msg types_2014_2028[] = { MSG_AUXV_AT_SUN_EXECNAME, MSG_AUXV_AT_SUN_MMU, MSG_AUXV_AT_SUN_LDDATA, MSG_AUXV_AT_SUN_AUXFLAGS, MSG_AUXV_AT_SUN_EMULATOR, MSG_AUXV_AT_SUN_BRANDNAME, MSG_AUXV_AT_SUN_BRAND_AUX1, MSG_AUXV_AT_SUN_BRAND_AUX2, MSG_AUXV_AT_SUN_BRAND_AUX3, MSG_AUXV_AT_SUN_HWCAP2, - MSG_AUXV_AT_SUN_COMMPAGE + NULL, NULL, + MSG_AUXV_AT_SUN_COMMPAGE, MSG_AUXV_AT_SUN_FPTYPE, + MSG_AUXV_AT_SUN_FPSIZE }; - static const conv_ds_msg_t ds_types_2014_2024 = { - CONV_DS_MSG_INIT(2014, types_2014_2024) }; + static const conv_ds_msg_t ds_types_2014_2028 = { + CONV_DS_MSG_INIT(2014, types_2014_2028) }; static const conv_ds_t *ds[] = { CONV_DS_ADDR(ds_types_0_22), CONV_DS_ADDR(ds_types_2000_2011), - CONV_DS_ADDR(ds_types_2014_2024), NULL }; + CONV_DS_ADDR(ds_types_2014_2028), NULL }; return (conv_map_ds(ELFOSABI_NONE, EM_NONE, type, ds, fmt_flags, inv_buf)); @@ -1172,7 +1174,7 @@ conv_cnote_pr_flags(int flags, Conv_fmt_flags_t fmt_flags, Conv_cnote_pr_flags_buf_t *cnote_pr_flags_buf) { static const Val_desc vda[] = { - { PR_STOPPED, MSG_PR_FLAGS_STOPPED }, + { PR_STOPPED, MSG_PR_FLAGS_STOPPED }, { PR_ISTOP, MSG_PR_FLAGS_ISTOP }, { PR_DSTOP, MSG_PR_FLAGS_DSTOP }, { PR_STEP, MSG_PR_FLAGS_STEP }, @@ -1331,7 +1333,7 @@ conv_cnote_proc_flag(int flags, Conv_fmt_flags_t fmt_flags, * their numeric value. */ static const Val_desc vda[] = { - { 0x00000001, MSG_PROC_FLAG_SSYS }, + { 0x00000001, MSG_PROC_FLAG_SSYS }, { 0x02000000, MSG_PROC_FLAG_SMSACCT }, { 0, 0 } }; diff --git a/usr/src/cmd/sgs/libconv/common/corenote.msg b/usr/src/cmd/sgs/libconv/common/corenote.msg index b5562577ae..b55e67ec07 100644 --- a/usr/src/cmd/sgs/libconv/common/corenote.msg +++ b/usr/src/cmd/sgs/libconv/common/corenote.msg @@ -24,7 +24,7 @@ # Use is subject to license terms. # # Copyright 2012 DEY Storage Systems, Inc. All rights reserved. -# Copyright 2016 Joyent, Inc. +# Copyright (c) 2018 Joyent, Inc. # @ MSG_NT_PRSTATUS "[ NT_PRSTATUS ]" @@ -102,6 +102,8 @@ @ MSG_AUXV_AT_SUN_BRAND_AUX3 "SUN_BRAND_AUX3" @ MSG_AUXV_AT_SUN_HWCAP2 "SUN_HWCAP2" @ MSG_AUXV_AT_SUN_COMMPAGE "SUN_COMMPAGE" +@ MSG_AUXV_AT_SUN_FPTYPE "SUN_FPTYPE" +@ MSG_AUXV_AT_SUN_FPSIZE "SUN_FPSIZE" @ MSG_CC_CONTENT_STACK "STACK" @@ -339,7 +341,7 @@ @ MSG_REG_AMD64_R12 "[ r12 ]" @ MSG_REG_AMD64_R11 "[ r11 ]" @ MSG_REG_AMD64_R10 "[ r10 ]" -@ MSG_REG_AMD64_R9 "[ r9 ]" +@ MSG_REG_AMD64_R9 "[ r9 ]" @ MSG_REG_AMD64_R8 "[ r8 ]" @ MSG_REG_AMD64_RDI "[ rdi ]" @ MSG_REG_AMD64_RSI "[ rsi ]" @@ -706,7 +708,7 @@ @ MSG_SYS_FCNTL "[ fcntl ]" # 62 @ MSG_SYS_FCNTL_ALT "fcntl" @ MSG_SYS_ULIMIT "[ ulimit ]" # 63 -@ MSG_SYS_ULIMIT_ALT "ulimit" +@ MSG_SYS_ULIMIT_ALT "ulimit" @ MSG_SYS_RENAMEAT "[ renameat ]" # 64 @ MSG_SYS_RENAMEAT_ALT "renameat" @ MSG_SYS_UNLINKAT "[ unlinkat ]" # 65 @@ -730,7 +732,7 @@ @ MSG_SYS_RCTLSYS "[ rctlsys ]" # 74 @ MSG_SYS_RCTLSYS_ALT "rctlsys " @ MSG_SYS_SIDSYS "[ sidsys ]" # 75 -@ MSG_SYS_SIDSYS_ALT "sidsys" +@ MSG_SYS_SIDSYS_ALT "sidsys" @ MSG_SYS_76 "76" # 76 (u) @ MSG_SYS_LWP_PARK "[ lwp_park ]" # 77 @ MSG_SYS_LWP_PARK_ALT "lwp_park" diff --git a/usr/src/cmd/sgs/rtld/amd64/_setup.c b/usr/src/cmd/sgs/rtld/amd64/_setup.c index cd47f3b91f..2a92d72565 100644 --- a/usr/src/cmd/sgs/rtld/amd64/_setup.c +++ b/usr/src/cmd/sgs/rtld/amd64/_setup.c @@ -24,7 +24,7 @@ * Use is subject to license terms. */ /* - * Copyright (c) 2012, Joyent, Inc. All rights reserved. + * Copyright (c) 2018, Joyent, Inc. */ /* @@ -49,6 +49,68 @@ #include "_audit.h" #include "msg.h" +/* + * Number of bytes to save for register usage. + */ +uint_t _plt_save_size; +void (*_plt_fp_save)(void *); +void (*_plt_fp_restore)(void *); + +extern void _elf_rtbndr_fp_save_orig(void *); +extern void _elf_rtbndr_fp_restore_orig(void *); +extern void _elf_rtbndr_fp_fxsave(void *); +extern void _elf_rtbndr_fp_fxrestore(void *); +extern void _elf_rtbndr_fp_xsave(void *); +extern void _elf_rtbndr_fp_xrestore(void *); + +/* + * Based on what the kernel has told us, go through and set up the various + * pointers that we'll need for elf_rtbndr for the FPU. + */ +static void +_setup_plt_fpu(int kind, size_t len) +{ + /* + * If we didn't get a length for some reason, fall back to the old + * implementation. + */ + if (len == 0) + kind = -1; + + switch (kind) { + case AT_386_FPINFO_FXSAVE: + _plt_fp_save = _elf_rtbndr_fp_fxsave; + _plt_fp_restore = _elf_rtbndr_fp_fxrestore; + _plt_save_size = len; + break; + /* + * We can treat processors that don't correctly handle the exception + * information in xsave the same way we do others. The information + * that may or may not be properly saved and restored should not be + * relevant to us because of the ABI. + */ + case AT_386_FPINFO_XSAVE: + case AT_386_FPINFO_XSAVE_AMD: + _plt_fp_save = _elf_rtbndr_fp_xsave; + _plt_fp_restore = _elf_rtbndr_fp_xrestore; + _plt_save_size = len; + break; + default: + _plt_fp_save = _elf_rtbndr_fp_save_orig; + _plt_fp_restore = _elf_rtbndr_fp_restore_orig; + /* + * The ABI says that 8 floating point registers are used for + * passing arguments (%xmm0 through %xmm7). Because these + * registers on some platforms may shadow the %ymm and %zmm + * registers, we end up needing to size this for the maximally + * sized register we care about, a 512-bit (64-byte) zmm + * register. + */ + _plt_save_size = 64 * 8; + break; + } +} + /* VARARGS */ unsigned long _setup(Boot *ebp, Dyn *ld_dyn) @@ -67,7 +129,8 @@ _setup(Boot *ebp, Dyn *ld_dyn) uid_t uid = (uid_t)-1, euid = (uid_t)-1; gid_t gid = (gid_t)-1, egid = (gid_t)-1; char *_platform = NULL, *_execname = NULL, *_emulator = NULL; - int auxflags = -1; + int auxflags = -1, fpkind = -1; + size_t fpsize = 0; /* * Scan the bootstrap structure to pick up the basics. @@ -158,6 +221,12 @@ _setup(Boot *ebp, Dyn *ld_dyn) /* name of emulation library, if any */ _emulator = auxv->a_un.a_ptr; break; + case AT_SUN_FPTYPE: + fpkind = (int)auxv->a_un.a_val; + break; + case AT_SUN_FPSIZE: + fpsize = (size_t)auxv->a_un.a_val; + break; } } @@ -230,6 +299,12 @@ _setup(Boot *ebp, Dyn *ld_dyn) sizeof (uintptr_t) + sizeof (uintptr_t) + sizeof (ulong_t) + sizeof (ulong_t) + sizeof (Sym); + /* + * Initialize the amd64 specific PLT relocation constants based on the + * FP information that we have. + */ + _setup_plt_fpu(fpkind, fpsize); + /* * Continue with generic startup processing. */ diff --git a/usr/src/cmd/sgs/rtld/amd64/boot_elf.s b/usr/src/cmd/sgs/rtld/amd64/boot_elf.s index 67301a0c96..36f136e31d 100644 --- a/usr/src/cmd/sgs/rtld/amd64/boot_elf.s +++ b/usr/src/cmd/sgs/rtld/amd64/boot_elf.s @@ -22,7 +22,86 @@ /* * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. - * Copyright (c) 2017 Joyent, Inc. All rights reserved. + * Copyright (c) 2018 Joyent, Inc. All rights reserved. + */ + +/* + * Welcome to the magic behind the PLT (procedure linkage table). When rtld + * fills out the PLT entries, it will refer initially to the functions in this + * file. As such our goal is simple: + * + * The lie of the function call must be preserved at all costs. + * + * This means that we need to prepare the system for an arbitrary series of + * instructions to be called. For example, as a side effect of resolving a + * symbol we may need to open a shared object which will cause any _init + * functions to be called. Those functions can use any and all of the ABI state + * that they desire (for example, the FPU registers). Therefore we must save and + * restore all the ABI mandated registers here. + * + * For the full information about what we need to save and restore and why, + * please see the System V amd64 PS ABI '3.2.3 Parameter Passing'. For general + * purpose registers, we need to take care of the following: + * + * %rax - Used for information about the number of vector arguments + * %rdi - arg0 + * %rsi - arg1 + * %rdx - arg2 + * %rcx - arg3 + * %r8 - arg4 + * %r9 - arg5 + * %r10 - static chain pointer + * + * Unfortunately, the world of the FPU is more complicated. + * + * The ABI mandates that we must save %xmm0-%xmm7. On newer Intel processors, + * %xmm0-%xmm7 shadow %ymm0-%ymm7 and %zmm0-%zmm7. Historically, when saving the + * FPU, we only saved and restored these eight registers. Unfortunately, this + * process itself ended up having side effects. Because the registers shadow one + * another, if we saved a full %zmm register when only a %xmm register was + * valid, we would end up causing the processor to think that the full %zmm + * register was valid. Once it believed that this was the case, it would then + * degrade performance of code that only used the %xmm registers. + * + * One way to tackle this problem would have been to use xgetbv with ecx=1 to + * get information about what was actually in use and only save and restore + * that. You can imagine that this logic roughly ends up as something like: + * + * if (zmm_inuse) + * save_zmm() + * if (ymm_inuse) + * save_ymm() + * save_xmm() + * + * However, this logic leaves us at the mercy of the branch predictor. This + * means that all of our efforts can end up still causing the CPU to execute + * things to make it think that some of these other FPU registers are in use and + * thus defeat the optimizations that it has. + * + * To deal with this problem, Intel has suggested using the xsave family of + * instructions. The kernel provides information about the size required for the + * floating point registers as well as which of several methods we need to + * employ through the aux vector. This gets us out of trying to look at the + * hardware capabilities and make decisions every time. As part of the + * amd64-specific portion of rtld, it will process those values and determine + * the functions on an as-needed basis. + * + * There are two different functions that we export. The first is elf_rtbndr(). + * This is basically the glue that gets us into the PLT and to perform + * relocations. elf_rtbndr() determines the address of the function that we must + * call and arranges its stack such that when we return from elf_rtbndr() we + * will instead jump to the actual relocated function which will return to the + * original caller. Because of this, we must preserve all of the registers that + * are used for arguments and restore them before returning. + * + * The second function we export is elf_plt_trace(). This is used to add support + * for audit libraries among other things. elf_plt_trace() may or may not call + * the underlying function as a side effect or merely set up its return to it. + * This changes how we handle %rax. If we call the function ourself, then we end + * up making sure that %rax is the return value versus the initial value. In + * addition, because we get %r11 from the surrounding PLT code, we opt to + * preserve it in case some of the relocation logic ever ends up calling back + * into us again. */ #if defined(lint) @@ -34,22 +113,170 @@ #include #include -/* ARGSUSED0 */ -int -elf_plt_trace() -{ - return (0); -} #else #include #include <_audit.h> #include #include +#include + +/* + * This macro is used to zero the xsave header. The contents of scratchreg will + * be destroyed. locreg should contain the starting address of the xsave header. + */ +#define XSAVE_HEADER_ZERO(scratch, loc) \ + xorq scratch, scratch; \ + movq scratch, 0x200(loc); \ + movq scratch, 0x208(loc); \ + movq scratch, 0x210(loc); \ + movq scratch, 0x218(loc); \ + movq scratch, 0x220(loc); \ + movq scratch, 0x228(loc); \ + movq scratch, 0x230(loc); \ + movq scratch, 0x238(loc) + .file "boot_elf.s" .text +/* + * This section of the code contains glue functions that are used to take care + * of saving and restoring the FPU. We deal with this in a few different ways + * based on the hardware support and what exists. Historically we've only saved + * and restored the first 8 floating point registers rather than the entire FPU. + * That implementation still exists here and is kept around mostly as an + * insurance policy. + */ + ENTRY(_elf_rtbndr_fp_save_orig) + movq org_scapset@GOTPCREL(%rip),%r11 + movq (%r11),%r11 /* Syscapset_t pointer */ + movl 8(%r11),%edx /* sc_hw_2 */ + testl $AV_386_2_AVX512F,%edx + jne .save_zmm + movl (%r11),%edx /* sc_hw_1 */ + testl $AV_386_AVX,%edx + jne .save_ymm + movdqa %xmm0, (%rdi) + movdqa %xmm1, 64(%rdi) + movdqa %xmm2, 128(%rdi) + movdqa %xmm3, 192(%rdi) + movdqa %xmm4, 256(%rdi) + movdqa %xmm5, 320(%rdi) + movdqa %xmm6, 384(%rdi) + movdqa %xmm7, 448(%rdi) + jmp .save_finish + +.save_ymm: + vmovdqa %ymm0, (%rdi) + vmovdqa %ymm1, 64(%rdi) + vmovdqa %ymm2, 128(%rdi) + vmovdqa %ymm3, 192(%rdi) + vmovdqa %ymm4, 256(%rdi) + vmovdqa %ymm5, 320(%rdi) + vmovdqa %ymm6, 384(%rdi) + vmovdqa %ymm7, 448(%rdi) + jmp .save_finish + +.save_zmm: + vmovdqa64 %zmm0, (%rdi) + vmovdqa64 %zmm1, 64(%rdi) + vmovdqa64 %zmm2, 128(%rdi) + vmovdqa64 %zmm3, 192(%rdi) + vmovdqa64 %zmm4, 256(%rdi) + vmovdqa64 %zmm5, 320(%rdi) + vmovdqa64 %zmm6, 384(%rdi) + vmovdqa64 %zmm7, 448(%rdi) + +.save_finish: + ret + SET_SIZE(_elf_rtbndr_fp_save_orig) + + ENTRY(_elf_rtbndr_fp_restore_orig) + movq org_scapset@GOTPCREL(%rip),%r11 + movq (%r11),%r11 /* Syscapset_t pointer */ + movl 8(%r11),%edx /* sc_hw_2 */ + testl $AV_386_2_AVX512F,%edx + jne .restore_zmm + movl (%r11),%edx /* sc_hw_1 */ + testl $AV_386_AVX,%edx + jne .restore_ymm + + movdqa (%rdi), %xmm0 + movdqa 64(%rdi), %xmm1 + movdqa 128(%rdi), %xmm2 + movdqa 192(%rdi), %xmm3 + movdqa 256(%rdi), %xmm4 + movdqa 320(%rdi), %xmm5 + movdqa 384(%rdi), %xmm6 + movdqa 448(%rdi), %xmm7 + jmp .restore_finish + +.restore_ymm: + vmovdqa (%rdi), %ymm0 + vmovdqa 64(%rdi), %ymm1 + vmovdqa 128(%rdi), %ymm2 + vmovdqa 192(%rdi), %ymm3 + vmovdqa 256(%rdi), %ymm4 + vmovdqa 320(%rdi), %ymm5 + vmovdqa 384(%rdi), %ymm6 + vmovdqa 448(%rdi), %ymm7 + jmp .restore_finish + +.restore_zmm: + vmovdqa64 (%rdi), %zmm0 + vmovdqa64 64(%rdi), %zmm1 + vmovdqa64 128(%rdi), %zmm2 + vmovdqa64 192(%rdi), %zmm3 + vmovdqa64 256(%rdi), %zmm4 + vmovdqa64 320(%rdi), %zmm5 + vmovdqa64 384(%rdi), %zmm6 + vmovdqa64 448(%rdi), %zmm7 + +.restore_finish: + ret + SET_SIZE(_elf_rtbndr_fp_restore_orig) + + ENTRY(_elf_rtbndr_fp_fxsave) + fxsaveq (%rdi) + ret + SET_SIZE(_elf_rtbndr_fp_fxsave) + + ENTRY(_elf_rtbndr_fp_fxrestore) + fxrstor (%rdi) + ret + SET_SIZE(_elf_rtbndr_fp_fxrestore) + + ENTRY(_elf_rtbndr_fp_xsave) + XSAVE_HEADER_ZERO(%rdx, %rdi) + movq $_CONST(XFEATURE_FP_ALL), %rdx + movl %edx, %eax + shrq $32, %rdx + xsave (%rdi) /* save data */ + ret + SET_SIZE(_elf_rtbndr_fp_xsave) + + ENTRY(_elf_rtbndr_fp_xrestore) + movq $_CONST(XFEATURE_FP_ALL), %rdx + movl %edx, %eax + shrq $32, %rdx + xrstor (%rdi) /* save data */ + ret + SET_SIZE(_elf_rtbndr_fp_xrestore) + +#endif + +#if defined(lint) + +/* ARGSUSED0 */ +int +elf_plt_trace() +{ + return (0); +} + +#else + /* * On entry the 'glue code' has already done the following: * @@ -79,173 +306,80 @@ elf_plt_trace() * the second 8 bytes are to align the stack and are available * for use. */ -#define REFLMP_OFF 0x0 -#define DEFLMP_OFF 0x8 +#define REFLMP_OFF 0x0 +#define DEFLMP_OFF 0x8 #define SYMNDX_OFF 0x10 #define SBFLAGS_OFF 0x14 #define SYMDEF_OFF 0x18 #define SYMDEF_VALUE_OFF 0x20 + /* - * Local stack space storage for elf_plt_trace is allocated - * as follows: - * - * First - before we got here - %rsp has been decremented - * by 0x10 to make space for the dyndata ptr (and another - * free word). In addition to that, we create space - * for the following: + * Next, we need to create a bunch of local storage. First, we have to preserve + * the standard registers per the amd64 ABI. This means we need to deal with: + * %rax - Used for information about the number of vector arguments + * %rdi - arg0 + * %rsi - arg1 + * %rdx - arg2 + * %rcx - arg3 + * %r8 - arg4 + * %r9 - arg5 + * %r10 - static chain pointer + * %r11 - PLT Interwork register, our caller is using this, so it's not + * a temporary for us. * - * La_amd64_regs 8 * 8: 64 - * prev_stack_size 8 8 - * Saved regs: - * %rdi 8 - * %rsi 8 - * %rdx 8 - * %rcx 8 - * %r8 8 - * %r9 8 - * %r10 8 - * %r11 8 - * %rax 8 - * ======= - * Subtotal: 144 (64byte aligned) - * - * The amd64 ABI says that the first 8 floating point registers are used to - * pass arguments. Because we may end up loading a DSO which has an _init - * section which uses these registers, we must save them. While various - * CPUs have different sized floating point registers, the largest are - * AVX512 512-bit (64-byte) %zmm registers. These need to be 64-byte aligned. - * Although the example below is showing offsets for %rbp, we actually save the - * registers offset from %rsp, which is forced into 64-bit alignment. - * - * Saved Media Regs (used to pass floating point args): - * %zmm0 - %zmm7 64 * 8: 512 - * ======= - * Total: 656 (64byte aligned) - * - * So - will subtract the following to create enough space - * - * -8(%rbp) store dyndata ptr - * -16(%rbp) store call destination - * -80(%rbp) space for La_amd64_regs - * -88(%rbp) prev stack size - * The next %rbp offsets are only true if the caller had correct stack - * alignment. See note above SPRDIOFF for why we use %rsp alignment to - * access these stack fields. - * -96(%rbp) entering %rdi - * -104(%rbp) entering %rsi - * -112(%rbp) entering %rdx - * -120(%rbp) entering %rcx - * -128(%rbp) entering %r8 - * -136(%rbp) entering %r9 - * -144(%rbp) entering %r10 - * -152(%rbp) entering %r11 - * -160(%rbp) entering %rax - * -224(%rbp) entering %xmm0 - * -288(%rbp) entering %xmm1 - * -352(%rbp) entering %xmm2 - * -288(%rbp) entering %xmm3 - * -416(%rbp) entering %xmm4 - * -480(%rbp) entering %xmm5 - * -544(%rbp) entering %xmm6 - * -608(%rbp) entering %xmm7 + * In addition, we need to save the amd64 ABI floating point arguments. Finally, + * we need to deal with our local storage. We need a La_amd64_regs and a + * uint64_t for the previous stack size. * + * To deal with this and the potentially variable size of the FPU regs, we have + * to play a few different games. We refer to all of the standard registers, the + * previous stack size, and La_amd64_regs structure off of %rbp. These are all + * values that are below %rbp. */ -#define SPDYNOFF -8 -#define SPDESTOFF -16 -#define SPLAREGOFF -80 -#define SPPRVSTKOFF -88 - -/* - * The next set of offsets are relative to %rsp. - * We guarantee %rsp is 64-byte aligned. This guarantees the zmm registers are - * saved to 64-byte aligned addresses. - * %rbp may only be 8 byte aligned if we came in from non-ABI compliant code. - */ -#define SPRDIOFF 576 -#define SPRSIOFF 568 -#define SPRDXOFF 560 -#define SPRCXOFF 552 -#define SPR8OFF 544 -#define SPR9OFF 536 -#define SPR10OFF 528 -#define SPR11OFF 520 -#define SPRAXOFF 512 -#define SPXMM0OFF 448 -#define SPXMM1OFF 384 -#define SPXMM2OFF 320 -#define SPXMM3OFF 256 -#define SPXMM4OFF 192 -#define SPXMM5OFF 128 -#define SPXMM6OFF 64 -#define SPXMM7OFF 0 - - /* See elf_rtbndr for explanation behind org_scapset */ - .extern org_scapset - .globl elf_plt_trace - .type elf_plt_trace,@function - .align 16 -elf_plt_trace: +#define SPDYNOFF -8 +#define SPDESTOFF -16 +#define SPPRVSTKOFF -24 +#define SPLAREGOFF -88 +#define ORIG_RDI -96 +#define ORIG_RSI -104 +#define ORIG_RDX -112 +#define ORIG_RCX -120 +#define ORIG_R8 -128 +#define ORIG_R9 -136 +#define ORIG_R10 -144 +#define ORIG_R11 -152 +#define ORIG_RAX -160 +#define PLT_SAVE_OFF 168 + + ENTRY(elf_plt_trace) /* - * Enforce 64-byte stack alignment here. - * The next andq instruction essentially does this: - * if necessary, subtract N bytes from %rsp to align on 64 bytes. + * Save our static registers. After that 64-byte align us and subtract + * the appropriate amount for the FPU. The frame pointer has already + * been pushed for us by the glue code. */ - andq $-64, %rsp /* enforce 64-byte stack alignment */ - subq $656,%rsp / create some local storage - - movq %rdi, SPRDIOFF(%rsp) - movq %rsi, SPRSIOFF(%rsp) - movq %rdx, SPRDXOFF(%rsp) - movq %rcx, SPRCXOFF(%rsp) - movq %r8, SPR8OFF(%rsp) - movq %r9, SPR9OFF(%rsp) - movq %r10, SPR10OFF(%rsp) - movq %r11, SPR11OFF(%rsp) - movq %rax, SPRAXOFF(%rsp) - - movq org_scapset@GOTPCREL(%rip),%r9 - movq (%r9),%r9 /* Syscapset_t pointer */ - movl 8(%r9),%edx /* sc_hw_2 */ - testl $AV_386_2_AVX512F,%edx - jne .trace_save_zmm - movl (%r9),%edx /* sc_hw_1 */ - testl $AV_386_AVX,%edx - jne .trace_save_ymm - -.trace_save_xmm: - movdqa %xmm0, SPXMM0OFF(%rsp) - movdqa %xmm1, SPXMM1OFF(%rsp) - movdqa %xmm2, SPXMM2OFF(%rsp) - movdqa %xmm3, SPXMM3OFF(%rsp) - movdqa %xmm4, SPXMM4OFF(%rsp) - movdqa %xmm5, SPXMM5OFF(%rsp) - movdqa %xmm6, SPXMM6OFF(%rsp) - movdqa %xmm7, SPXMM7OFF(%rsp) - jmp .trace_save_finish - -.trace_save_ymm: - vmovdqa %ymm0, SPXMM0OFF(%rsp) - vmovdqa %ymm1, SPXMM1OFF(%rsp) - vmovdqa %ymm2, SPXMM2OFF(%rsp) - vmovdqa %ymm3, SPXMM3OFF(%rsp) - vmovdqa %ymm4, SPXMM4OFF(%rsp) - vmovdqa %ymm5, SPXMM5OFF(%rsp) - vmovdqa %ymm6, SPXMM6OFF(%rsp) - vmovdqa %ymm7, SPXMM7OFF(%rsp) - jmp .trace_save_finish - -.trace_save_zmm: - vmovdqa64 %zmm0, SPXMM0OFF(%rsp) - vmovdqa64 %zmm1, SPXMM1OFF(%rsp) - vmovdqa64 %zmm2, SPXMM2OFF(%rsp) - vmovdqa64 %zmm3, SPXMM3OFF(%rsp) - vmovdqa64 %zmm4, SPXMM4OFF(%rsp) - vmovdqa64 %zmm5, SPXMM5OFF(%rsp) - vmovdqa64 %zmm6, SPXMM6OFF(%rsp) - vmovdqa64 %zmm7, SPXMM7OFF(%rsp) - -.trace_save_finish: + movq %rdi, ORIG_RDI(%rbp) + movq %rsi, ORIG_RSI(%rbp) + movq %rdx, ORIG_RDX(%rbp) + movq %rcx, ORIG_RCX(%rbp) + movq %r8, ORIG_R8(%rbp) + movq %r9, ORIG_R9(%rbp) + movq %r10, ORIG_R10(%rbp) + movq %r11, ORIG_R11(%rbp) + movq %rax, ORIG_RAX(%rbp) + + subq $PLT_SAVE_OFF, %rsp + + movq _plt_save_size@GOTPCREL(%rip),%r9 + movq _plt_fp_save@GOTPCREL(%rip),%r10 + subq (%r9), %rsp + andq $-64, %rsp + movq %rsp, %rdi + call *(%r10) + /* + * Now that we've saved all of our registers, figure out what we need to + * do next. + */ movq SPDYNOFF(%rbp), %rax / %rax = dyndata testb $LA_SYMB_NOPLTENTER, SBFLAGS_OFF(%rax) / je .start_pltenter @@ -262,17 +396,17 @@ elf_plt_trace: movq %rdi, 0(%rsi) / la_rsp movq 0(%rbp), %rdi movq %rdi, 8(%rsi) / la_rbp - movq SPRDIOFF(%rsp), %rdi + movq ORIG_RDI(%rbp), %rdi movq %rdi, 16(%rsi) / la_rdi - movq SPRSIOFF(%rsp), %rdi + movq ORIG_RSI(%rbp), %rdi movq %rdi, 24(%rsi) / la_rsi - movq SPRDXOFF(%rsp), %rdi + movq ORIG_RDX(%rbp), %rdi movq %rdi, 32(%rsi) / la_rdx - movq SPRCXOFF(%rsp), %rdi + movq ORIG_RCX(%rbp), %rdi movq %rdi, 40(%rsi) / la_rcx - movq SPR8OFF(%rsp), %rdi + movq ORIG_R8(%rbp), %rdi movq %rdi, 48(%rsi) / la_r8 - movq SPR9OFF(%rsp), %rdi + movq ORIG_R9(%rbp), %rdi movq %rdi, 56(%rsi) / la_r9 /* @@ -316,60 +450,21 @@ elf_plt_trace: movq SPDESTOFF(%rbp), %r11 / r11 == calling destination movq %r11, 0(%rbp) / store destination at top - / - / Restore registers - / - movq org_scapset@GOTPCREL(%rip),%r9 - movq (%r9),%r9 /* Syscapset_t pointer */ - movl 8(%r9),%edx /* sc_hw_2 */ - testl $AV_386_2_AVX512F,%edx - jne .trace_restore_zmm - movl (%r9),%edx /* sc_hw_1 */ - testl $AV_386_AVX,%edx - jne .trace_restore_ymm - -.trace_restore_xmm: - movdqa SPXMM0OFF(%rsp), %xmm0 - movdqa SPXMM1OFF(%rsp), %xmm1 - movdqa SPXMM2OFF(%rsp), %xmm2 - movdqa SPXMM3OFF(%rsp), %xmm3 - movdqa SPXMM4OFF(%rsp), %xmm4 - movdqa SPXMM5OFF(%rsp), %xmm5 - movdqa SPXMM6OFF(%rsp), %xmm6 - movdqa SPXMM7OFF(%rsp), %xmm7 - jmp .trace_restore_finish - -.trace_restore_ymm: - vmovdqa SPXMM0OFF(%rsp), %ymm0 - vmovdqa SPXMM1OFF(%rsp), %ymm1 - vmovdqa SPXMM2OFF(%rsp), %ymm2 - vmovdqa SPXMM3OFF(%rsp), %ymm3 - vmovdqa SPXMM4OFF(%rsp), %ymm4 - vmovdqa SPXMM5OFF(%rsp), %ymm5 - vmovdqa SPXMM6OFF(%rsp), %ymm6 - vmovdqa SPXMM7OFF(%rsp), %ymm7 - jmp .trace_restore_finish - -.trace_restore_zmm: - vmovdqa64 SPXMM0OFF(%rsp), %zmm0 - vmovdqa64 SPXMM1OFF(%rsp), %zmm1 - vmovdqa64 SPXMM2OFF(%rsp), %zmm2 - vmovdqa64 SPXMM3OFF(%rsp), %zmm3 - vmovdqa64 SPXMM4OFF(%rsp), %zmm4 - vmovdqa64 SPXMM5OFF(%rsp), %zmm5 - vmovdqa64 SPXMM6OFF(%rsp), %zmm6 - vmovdqa64 SPXMM7OFF(%rsp), %zmm7 - -.trace_restore_finish: - movq SPRDIOFF(%rsp), %rdi - movq SPRSIOFF(%rsp), %rsi - movq SPRDXOFF(%rsp), %rdx - movq SPRCXOFF(%rsp), %rcx - movq SPR8OFF(%rsp), %r8 - movq SPR9OFF(%rsp), %r9 - movq SPR10OFF(%rsp), %r10 - movq SPR11OFF(%rsp), %r11 - movq SPRAXOFF(%rsp), %rax + /* Restore FPU */ + movq _plt_fp_restore@GOTPCREL(%rip),%r10 + + movq %rsp, %rdi + call *(%r10) + + movq ORIG_RDI(%rbp), %rdi + movq ORIG_RSI(%rbp), %rsi + movq ORIG_RDX(%rbp), %rdx + movq ORIG_RCX(%rbp), %rcx + movq ORIG_R8(%rbp), %r8 + movq ORIG_R9(%rbp), %r9 + movq ORIG_R10(%rbp), %r10 + movq ORIG_R11(%rbp), %r11 + movq ORIG_RAX(%rbp), %rax subq $8, %rbp / adjust %rbp for 'ret' movq %rbp, %rsp / @@ -417,13 +512,10 @@ elf_plt_trace: /* * Grow the stack and duplicate the arguements of the * original caller. - * - * We save %rsp in %r11 since we need to use the current rsp for - * accessing the registers saved in our stack frame. */ .grow_stack: movq %rsp, %r11 - subq %rdx, %rsp / grow the stack + subq %rdx, %rsp / grow the stack movq %rdx, SPPRVSTKOFF(%rbp) / -88(%rbp) == prev frame sz movq %rsp, %rcx / %rcx = dest addq %rcx, %rdx / %rdx == tail of dest @@ -445,59 +537,20 @@ elf_plt_trace: / Restore registers using %r11 which contains our old %rsp value / before growing the stack. / - - / Yes, we have to do this dance again. Sorry. - movq org_scapset@GOTPCREL(%rip),%r9 - movq (%r9),%r9 /* Syscapset_t pointer */ - movl 8(%r9),%edx /* sc_hw_2 */ - testl $AV_386_2_AVX512F,%edx - jne .trace_r2_zmm - movl (%r9),%edx /* sc_hw_1 */ - testl $AV_386_AVX,%edx - jne .trace_r2_ymm - -.trace_r2_xmm: - movdqa SPXMM0OFF(%r11), %xmm0 - movdqa SPXMM1OFF(%r11), %xmm1 - movdqa SPXMM2OFF(%r11), %xmm2 - movdqa SPXMM3OFF(%r11), %xmm3 - movdqa SPXMM4OFF(%r11), %xmm4 - movdqa SPXMM5OFF(%r11), %xmm5 - movdqa SPXMM6OFF(%r11), %xmm6 - movdqa SPXMM7OFF(%r11), %xmm7 - jmp .trace_r2_finish - -.trace_r2_ymm: - vmovdqa SPXMM0OFF(%r11), %ymm0 - vmovdqa SPXMM1OFF(%r11), %ymm1 - vmovdqa SPXMM2OFF(%r11), %ymm2 - vmovdqa SPXMM3OFF(%r11), %ymm3 - vmovdqa SPXMM4OFF(%r11), %ymm4 - vmovdqa SPXMM5OFF(%r11), %ymm5 - vmovdqa SPXMM6OFF(%r11), %ymm6 - vmovdqa SPXMM7OFF(%r11), %ymm7 - jmp .trace_r2_finish - -.trace_r2_zmm: - vmovdqa64 SPXMM0OFF(%r11), %zmm0 - vmovdqa64 SPXMM1OFF(%r11), %zmm1 - vmovdqa64 SPXMM2OFF(%r11), %zmm2 - vmovdqa64 SPXMM3OFF(%r11), %zmm3 - vmovdqa64 SPXMM4OFF(%r11), %zmm4 - vmovdqa64 SPXMM5OFF(%r11), %zmm5 - vmovdqa64 SPXMM6OFF(%r11), %zmm6 - vmovdqa64 SPXMM7OFF(%r11), %zmm7 + movq _plt_fp_restore@GOTPCREL(%rip),%r10 + movq %r11, %rdi + call *(%r10) .trace_r2_finish: - movq SPRDIOFF(%r11), %rdi - movq SPRSIOFF(%r11), %rsi - movq SPRDXOFF(%r11), %rdx - movq SPRCXOFF(%r11), %rcx - movq SPR8OFF(%r11), %r8 - movq SPR9OFF(%r11), %r9 - movq SPR10OFF(%r11), %r10 - movq SPRAXOFF(%r11), %rax - movq SPR11OFF(%r11), %r11 / retore %r11 last + movq ORIG_RDI(%rbp), %rdi + movq ORIG_RSI(%rbp), %rsi + movq ORIG_RDX(%rbp), %rdx + movq ORIG_RCX(%rbp), %rcx + movq ORIG_R8(%rbp), %r8 + movq ORIG_R9(%rbp), %r9 + movq ORIG_R10(%rbp), %r10 + movq ORIG_RAX(%rbp), %rax + movq ORIG_R11(%rbp), %r11 /* * Call to desitnation function - we'll return here @@ -517,72 +570,32 @@ elf_plt_trace: movq REFLMP_OFF(%r11), %rsi / arg2 (rlmp) movq %rax, %rdi / arg1 (returnval) call audit_pltexit@PLT - + /* * Clean up after ourselves and return to the - * original calling procedure. + * original calling procedure. Make sure to restore + * registers. */ - / - / Restore registers - / - - movq org_scapset@GOTPCREL(%rip),%r9 - movq (%r9),%r9 /* Syscapset_t pointer */ - movl 8(%r9),%edx /* sc_hw_2 */ - testl $AV_386_2_AVX512F,%edx - jne .trace_r3_zmm - movl (%r9),%edx /* sc_hw_1 */ - testl $AV_386_AVX,%edx - jne .trace_r3_ymm - -.trace_r3_xmm: - movdqa SPXMM0OFF(%rsp), %xmm0 - movdqa SPXMM1OFF(%rsp), %xmm1 - movdqa SPXMM2OFF(%rsp), %xmm2 - movdqa SPXMM3OFF(%rsp), %xmm3 - movdqa SPXMM4OFF(%rsp), %xmm4 - movdqa SPXMM5OFF(%rsp), %xmm5 - movdqa SPXMM6OFF(%rsp), %xmm6 - movdqa SPXMM7OFF(%rsp), %xmm7 - jmp .trace_r3_finish - -.trace_r3_ymm: - vmovdqa SPXMM0OFF(%rsp), %ymm0 - vmovdqa SPXMM1OFF(%rsp), %ymm1 - vmovdqa SPXMM2OFF(%rsp), %ymm2 - vmovdqa SPXMM3OFF(%rsp), %ymm3 - vmovdqa SPXMM4OFF(%rsp), %ymm4 - vmovdqa SPXMM5OFF(%rsp), %ymm5 - vmovdqa SPXMM6OFF(%rsp), %ymm6 - vmovdqa SPXMM7OFF(%rsp), %ymm7 - jmp .trace_r3_finish - -.trace_r3_zmm: - vmovdqa64 SPXMM0OFF(%rsp), %zmm0 - vmovdqa64 SPXMM1OFF(%rsp), %zmm1 - vmovdqa64 SPXMM2OFF(%rsp), %zmm2 - vmovdqa64 SPXMM3OFF(%rsp), %zmm3 - vmovdqa64 SPXMM4OFF(%rsp), %zmm4 - vmovdqa64 SPXMM5OFF(%rsp), %zmm5 - vmovdqa64 SPXMM6OFF(%rsp), %zmm6 - vmovdqa64 SPXMM7OFF(%rsp), %zmm7 - -.trace_r3_finish: - movq SPRDIOFF(%rsp), %rdi - movq SPRSIOFF(%rsp), %rsi - movq SPRDXOFF(%rsp), %rdx - movq SPRCXOFF(%rsp), %rcx - movq SPR8OFF(%rsp), %r8 - movq SPR9OFF(%rsp), %r9 - movq SPR10OFF(%rsp), %r10 - movq SPR11OFF(%rsp), %r11 - // rax already contains return value + movq _plt_fp_restore@GOTPCREL(%rip),%r10 + movq %rsp, %rdi + movq %rax, SPPRVSTKOFF(%rbp) + call *(%r10) + + movq ORIG_RDI(%rbp), %rdi + movq ORIG_RSI(%rbp), %rsi + movq ORIG_RDX(%rbp), %rdx + movq ORIG_RCX(%rbp), %rcx + movq ORIG_R8(%rbp), %r8 + movq ORIG_R9(%rbp), %r9 + movq ORIG_R10(%rbp), %r10 + movq ORIG_R11(%rbp), %r11 + movq SPPRVSTKOFF(%rbp), %rax movq %rbp, %rsp / popq %rbp / ret / return to caller - .size elf_plt_trace, .-elf_plt_trace + SET_SIZE(elf_plt_trace) #endif /* @@ -640,227 +653,91 @@ elf_rtbndr(Rt_map * lmp, unsigned long reloc, caddr_t pc) #define LBRPCOFF 24 /* arg3 - pc of caller */ /* - * Possible arguments for the resolved function are in registers as per - * the AMD64 ABI. We must save on the local stack all possible register - * arguments before interposing functions to resolve the called function. - * Possible arguments must be restored before invoking the resolved function. - * - * Before the AVX and AVX512 instruction set enhancements to AMD64, there were - * no changes in the set of registers and their sizes across different - * processors. With AVX, the xmm registers became the lower 128 bits of the ymm - * registers. With AVX512, the ymm registers became the lower 256 bits of the - * zmm registers. Because of this, we need to conditionally save either 256 bits - * or 512 bits, instead of 128 bits. Regardless of whether we have zmm registers - * or not, we're always going to push the stack space assuming that we do, to - * simplify the code. - * - * Local stack space storage for elf_rtbndr is allocated as follows: - * - * Saved regs: - * %rax 8 - * %rdi 8 - * %rsi 8 - * %rdx 8 - * %rcx 8 - * %r8 8 - * %r9 8 - * %r10 8 - * ======= - * Subtotal: 64 (64byte aligned) + * With the above in place, we must now proceed to preserve all temporary + * registers that are also used for passing arguments. Specifically this + * means: * - * Saved Media Regs (used to pass floating point args): - * %zmm0 - %zmm7 64 * 8 512 - * ======= - * Total: 576 (64byte aligned) - * - * So - will subtract the following to create enough space + * %rax - Used for information about the number of vector arguments + * %rdi - arg0 + * %rsi - arg1 + * %rdx - arg2 + * %rcx - arg3 + * %r8 - arg4 + * %r9 - arg5 + * %r10 - static chain pointer * - * 0(%rsp) save %rax - * 8(%rsp) save %rdi - * 16(%rsp) save %rsi - * 24(%rsp) save %rdx - * 32(%rsp) save %rcx - * 40(%rsp) save %r8 - * 48(%rsp) save %r9 - * 56(%rsp) save %r10 - * 64(%rsp) save %zmm0 - * 128(%rsp) save %zmm1 - * 192(%rsp) save %zmm2 - * 256(%rsp) save %zmm3 - * 320(%rsp) save %zmm4 - * 384(%rsp) save %zmm5 - * 448(%rsp) save %zmm6 - * 512(%rsp) save %zmm7 - * - * Note: Some callers may use 8-byte stack alignment. We use %rsp offsets to - * save/restore registers because %rbp may not be 64-byte aligned. We guarantee - * %rsp is 64-byte aligned in the function preamble. + * While we don't have to preserve %r11, we do have to preserve the FPU + * registers. The FPU logic is delegated to a specific function that we'll call. + * However, it requires that its stack is 64-byte aligned. We defer the + * alignment to that point. This will also take care of the fact that a caller + * may not call us with a correctly aligned stack pointer per the amd64 ABI. */ -/* - * As the registers may either be xmm, ymm or zmm, we've left the name as xmm, - * but increased the offset between them to always cover the zmm case. - */ -#define LS_SIZE $576 /* local stack space to save all possible arguments */ -#define LSRAXOFF 0 /* for SSE register count */ -#define LSRDIOFF 8 /* arg 0 ... */ -#define LSRSIOFF 16 -#define LSRDXOFF 24 -#define LSRCXOFF 32 -#define LSR8OFF 40 -#define LSR9OFF 48 -#define LSR10OFF 56 /* ... arg 5 */ -#define LSXMM0OFF 64 /* SSE arg 0 ... */ -#define LSXMM1OFF 128 -#define LSXMM2OFF 192 -#define LSXMM3OFF 256 -#define LSXMM4OFF 320 -#define LSXMM5OFF 384 -#define LSXMM6OFF 448 -#define LSXMM7OFF 512 /* ... SSE arg 7 */ - /* - * The org_scapset is a global variable that is a part of rtld. It - * contains the capabilities that the kernel has told us are supported - * (auxv_hwcap). This is necessary for determining whether or not we - * need to save and restore AVX registers or simple SSE registers. Note, - * that the field we care about is currently at offset 0, if that - * changes, this code will have to be updated. - */ - .extern org_scapset + .extern _plt_save_size + .extern _plt_fp_save + .extern plt_fp_restore + .weak _elf_rtbndr _elf_rtbndr = elf_rtbndr ENTRY(elf_rtbndr) - - pushq %rbp + pushq %rbp /* Establish stack frame */ movq %rsp, %rbp /* - * Some libraries may (incorrectly) use non-ABI compliant 8-byte stack - * alignment. Enforce 64-byte stack alignment here. - * The next andq instruction essentially does this: - * if necessary, subtract N bytes from %rsp to align on 64 bytes. + * Save basic regs. */ - andq $-64, %rsp /* enforce 64-byte stack alignment */ - - subq LS_SIZE, %rsp /* save all ABI defined argument registers */ - - movq %rax, LSRAXOFF(%rsp) /* for SSE register count */ - movq %rdi, LSRDIOFF(%rsp) /* arg 0 .. */ - movq %rsi, LSRSIOFF(%rsp) - movq %rdx, LSRDXOFF(%rsp) - movq %rcx, LSRCXOFF(%rsp) - movq %r8, LSR8OFF(%rsp) - movq %r9, LSR9OFF(%rsp) /* .. arg 5 */ - movq %r10, LSR10OFF(%rsp) /* call chain reg */ + pushq %rax + pushq %rdi + pushq %rsi + pushq %rdx + pushq %rcx + pushq %r8 + pushq %r9 + pushq %r10 + pushq %r12 /* - * Our xmm registers could secretly be ymm or zmm registers in disguise. + * Save the amount of space we need for the FPU registers and call that + * function. Save %rsp before we manipulate it to make restore easier. */ - movq org_scapset@GOTPCREL(%rip),%r9 - movq (%r9),%r9 /* Syscapset_t pointer */ - movl 8(%r9),%edx /* sc_hw_2 */ - testl $AV_386_2_AVX512F,%edx - jne .save_zmm - movl (%r9),%edx /* sc_hw_1 */ - testl $AV_386_AVX,%edx - jne .save_ymm - -.save_xmm: - movdqa %xmm0, LSXMM0OFF(%rsp) /* SSE arg 0 ... */ - movdqa %xmm1, LSXMM1OFF(%rsp) - movdqa %xmm2, LSXMM2OFF(%rsp) - movdqa %xmm3, LSXMM3OFF(%rsp) - movdqa %xmm4, LSXMM4OFF(%rsp) - movdqa %xmm5, LSXMM5OFF(%rsp) - movdqa %xmm6, LSXMM6OFF(%rsp) - movdqa %xmm7, LSXMM7OFF(%rsp) /* ... SSE arg 7 */ - jmp .save_finish + movq %rsp, %r12 + movq _plt_save_size@GOTPCREL(%rip),%r9 + movq _plt_fp_save@GOTPCREL(%rip),%r10 + subq (%r9), %rsp + andq $-64, %rsp -.save_ymm: - vmovdqa %ymm0, LSXMM0OFF(%rsp) /* SSE arg 0 ... */ - vmovdqa %ymm1, LSXMM1OFF(%rsp) - vmovdqa %ymm2, LSXMM2OFF(%rsp) - vmovdqa %ymm3, LSXMM3OFF(%rsp) - vmovdqa %ymm4, LSXMM4OFF(%rsp) - vmovdqa %ymm5, LSXMM5OFF(%rsp) - vmovdqa %ymm6, LSXMM6OFF(%rsp) - vmovdqa %ymm7, LSXMM7OFF(%rsp) /* ... SSE arg 7 */ - jmp .save_finish + movq %rsp, %rdi + call *(%r10) -.save_zmm: - vmovdqa64 %zmm0, LSXMM0OFF(%rsp) /* SSE arg 0 ... */ - vmovdqa64 %zmm1, LSXMM1OFF(%rsp) - vmovdqa64 %zmm2, LSXMM2OFF(%rsp) - vmovdqa64 %zmm3, LSXMM3OFF(%rsp) - vmovdqa64 %zmm4, LSXMM4OFF(%rsp) - vmovdqa64 %zmm5, LSXMM5OFF(%rsp) - vmovdqa64 %zmm6, LSXMM6OFF(%rsp) - vmovdqa64 %zmm7, LSXMM7OFF(%rsp) /* ... SSE arg 7 */ - -.save_finish: + /* + * Perform actual PLT logic. Note that the plt related arguments are + * located at an offset relative to %rbp. + */ movq LBPLMPOFF(%rbp), %rdi /* arg1 - *lmp */ movq LBPRELOCOFF(%rbp), %rsi /* arg2 - reloc index */ movq LBRPCOFF(%rbp), %rdx /* arg3 - pc of caller */ call elf_bndr@PLT /* call elf_rtbndr(lmp, relndx, pc) */ movq %rax, LBPRELOCOFF(%rbp) /* store final destination */ - /* - * Restore possible arguments before invoking resolved function. We - * check the xmm, ymm, vs. zmm regs first so we can use the others. - */ - movq org_scapset@GOTPCREL(%rip),%r9 - movq (%r9),%r9 /* Syscapset_t pointer */ - movl 8(%r9),%edx /* sc_hw_2 */ - testl $AV_386_2_AVX512F,%edx - jne .restore_zmm - movl (%r9),%edx /* sc_hw_1 */ - testl $AV_386_AVX,%edx - jne .restore_ymm + /* Restore FPU */ + movq _plt_fp_restore@GOTPCREL(%rip),%r10 -.restore_xmm: - movdqa LSXMM0OFF(%rsp), %xmm0 - movdqa LSXMM1OFF(%rsp), %xmm1 - movdqa LSXMM2OFF(%rsp), %xmm2 - movdqa LSXMM3OFF(%rsp), %xmm3 - movdqa LSXMM4OFF(%rsp), %xmm4 - movdqa LSXMM5OFF(%rsp), %xmm5 - movdqa LSXMM6OFF(%rsp), %xmm6 - movdqa LSXMM7OFF(%rsp), %xmm7 - jmp .restore_finish + movq %rsp, %rdi + call *(%r10) -.restore_ymm: - vmovdqa LSXMM0OFF(%rsp), %ymm0 - vmovdqa LSXMM1OFF(%rsp), %ymm1 - vmovdqa LSXMM2OFF(%rsp), %ymm2 - vmovdqa LSXMM3OFF(%rsp), %ymm3 - vmovdqa LSXMM4OFF(%rsp), %ymm4 - vmovdqa LSXMM5OFF(%rsp), %ymm5 - vmovdqa LSXMM6OFF(%rsp), %ymm6 - vmovdqa LSXMM7OFF(%rsp), %ymm7 - jmp .restore_finish + movq %r12, %rsp + popq %r12 + popq %r10 + popq %r9 + popq %r8 + popq %rcx + popq %rdx + popq %rsi + popq %rdi + popq %rax -.restore_zmm: - vmovdqa64 LSXMM0OFF(%rsp), %zmm0 - vmovdqa64 LSXMM1OFF(%rsp), %zmm1 - vmovdqa64 LSXMM2OFF(%rsp), %zmm2 - vmovdqa64 LSXMM3OFF(%rsp), %zmm3 - vmovdqa64 LSXMM4OFF(%rsp), %zmm4 - vmovdqa64 LSXMM5OFF(%rsp), %zmm5 - vmovdqa64 LSXMM6OFF(%rsp), %zmm6 - vmovdqa64 LSXMM7OFF(%rsp), %zmm7 - -.restore_finish: - movq LSRAXOFF(%rsp), %rax - movq LSRDIOFF(%rsp), %rdi - movq LSRSIOFF(%rsp), %rsi - movq LSRDXOFF(%rsp), %rdx - movq LSRCXOFF(%rsp), %rcx - movq LSR8OFF(%rsp), %r8 - movq LSR9OFF(%rsp), %r9 - movq LSR10OFF(%rsp), %r10 - - movq %rbp, %rsp + movq %rbp, %rsp /* Restore our stack frame */ popq %rbp addq $8, %rsp /* pop 1st plt-pushed args */ @@ -869,5 +746,6 @@ elf_rtbndr(Rt_map * lmp, unsigned long reloc, caddr_t pc) /* final destination */ ret /* invoke resolved function */ - .size elf_rtbndr, .-elf_rtbndr + + SET_SIZE(elf_rtbndr) #endif diff --git a/usr/src/uts/common/exec/elf/elf.c b/usr/src/uts/common/exec/elf/elf.c index 5c8d03ae11..141baa4aeb 100644 --- a/usr/src/uts/common/exec/elf/elf.c +++ b/usr/src/uts/common/exec/elf/elf.c @@ -26,7 +26,7 @@ /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ /* All Rights Reserved */ /* - * Copyright 2016 Joyent, Inc. + * Copyright (c) 2018, Joyent, Inc. */ #include @@ -70,6 +70,7 @@ #if defined(__x86) #include +#include #endif /* defined(__x86) */ @@ -289,8 +290,8 @@ elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap, int brand_action) { caddr_t phdrbase = NULL; - caddr_t bssbase = 0; - caddr_t brkbase = 0; + caddr_t bssbase = 0; + caddr_t brkbase = 0; size_t brksize = 0; ssize_t dlnsize; aux_entry_t *aux; @@ -522,9 +523,12 @@ elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap, * On supported kernels (x86_64) make room in the auxv for the * AT_SUN_COMMPAGE entry. This will go unpopulated on i86xpv systems * which do not provide such functionality. + * + * Additionally cover the floating point information AT_SUN_FPSIZE and + * AT_SUN_FPTYPE. */ #if defined(__amd64) - args->auxsize += sizeof (aux_entry_t); + args->auxsize += 3 * sizeof (aux_entry_t); #endif /* defined(__amd64) */ if ((brand_action != EBA_NATIVE) && (PROC_IS_BRANDED(p))) { @@ -833,6 +837,8 @@ elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap, if (hasauxv) { int auxf = AF_SUN_HWCAPVERIFY; + size_t fpsize; + int fptype; /* * Note: AT_SUN_PLATFORM and AT_SUN_EXECNAME were filled in via @@ -910,7 +916,8 @@ elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap, } /* - * Add the comm page auxv entry, mapping it in if needed. + * Add the comm page auxv entry, mapping it in if needed. Also + * take care of the FPU entries. */ #if defined(__amd64) if (args->commpage != NULL || @@ -923,6 +930,16 @@ elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap, */ ADDAUX(aux, AT_NULL, 0) } + + fptype = AT_386_FPINFO_NONE; + fpu_auxv_info(&fptype, &fpsize); + if (fptype != AT_386_FPINFO_NONE) { + ADDAUX(aux, AT_SUN_FPTYPE, fptype) + ADDAUX(aux, AT_SUN_FPSIZE, fpsize) + } else { + ADDAUX(aux, AT_NULL, 0) + ADDAUX(aux, AT_NULL, 0) + } #endif /* defined(__amd64) */ ADDAUX(aux, AT_NULL, 0) diff --git a/usr/src/uts/common/sys/auxv.h b/usr/src/uts/common/sys/auxv.h index 5212a2efb6..1fb5011970 100644 --- a/usr/src/uts/common/sys/auxv.h +++ b/usr/src/uts/common/sys/auxv.h @@ -29,7 +29,7 @@ * Use is subject to license terms. */ /* - * Copyright 2016 Joyent, Inc. + * Copyright (c) 2018, Joyent, Inc. */ #ifndef _SYS_AUXV_H @@ -193,8 +193,11 @@ extern uint_t getisax(uint32_t *, uint_t); #define AT_SUN_COMMPAGE 2026 /* - * Note that 2023 is reserved for the AT_SUN_HWCAP2 word defined above. + * These two AUX vectors are generally used to define information about the FPU. + * These values are currently only used on the x86 platform. */ +#define AT_SUN_FPTYPE 2027 +#define AT_SUN_FPSIZE 2028 /* * The kernel is in a better position to determine whether a process needs to diff --git a/usr/src/uts/common/sys/auxv_386.h b/usr/src/uts/common/sys/auxv_386.h index d94eb18351..dbc6116c25 100644 --- a/usr/src/uts/common/sys/auxv_386.h +++ b/usr/src/uts/common/sys/auxv_386.h @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2017, Joyent, Inc. + * Copyright (c) 2018, Joyent, Inc. */ #ifndef _SYS_AUXV_386_H @@ -113,6 +113,19 @@ extern "C" { "\015avx512er\014avx512pf\013avx512ifma\012avx512dq\011avx512f" \ "\010rdseed\07adx\06avx2\05fma\04bmi2\03bmi1\02rdrand\01f16c" +/* + * Flags used in AT_SUN_FPTYPE on x86. + * + * We don't currently support xsavec in illumos. However, when we do, then we + * should add this to the type list and extend our primary consumer (rtld) to + * use it. xsaveopt is not in this list because it is not appropriate for the + * stack based storage. + */ +#define AT_386_FPINFO_NONE 0 +#define AT_386_FPINFO_FXSAVE 1 +#define AT_386_FPINFO_XSAVE 2 +#define AT_386_FPINFO_XSAVE_AMD 3 + #ifdef __cplusplus } #endif diff --git a/usr/src/uts/common/sys/user.h b/usr/src/uts/common/sys/user.h index 7ae8bdfa4b..0b997c518c 100644 --- a/usr/src/uts/common/sys/user.h +++ b/usr/src/uts/common/sys/user.h @@ -26,7 +26,7 @@ /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ /* All Rights Reserved */ /* - * Copyright 2016 Joyent, Inc. + * Copyright (c) 2018, Joyent, Inc. */ @@ -187,7 +187,7 @@ typedef struct { /* kernel syscall set type */ #if defined(__sparc) #define __KERN_NAUXV_IMPL 20 #elif defined(__i386) || defined(__amd64) -#define __KERN_NAUXV_IMPL 22 +#define __KERN_NAUXV_IMPL 25 #endif struct execsw; diff --git a/usr/src/uts/i86pc/os/fpu_subr.c b/usr/src/uts/i86pc/os/fpu_subr.c index 5ff8fdb655..3e027269fb 100644 --- a/usr/src/uts/i86pc/os/fpu_subr.c +++ b/usr/src/uts/i86pc/os/fpu_subr.c @@ -21,7 +21,7 @@ /* * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright 2017 Joyent, Inc. + * Copyright (c) 2018, Joyent, Inc. */ /* @@ -35,6 +35,7 @@ #include #include #include +#include #define XMM_ALIGN 16 @@ -60,14 +61,16 @@ int fpu_exists = 1; int fp_kind = FP_387; +/* + * The kind of FPU we advertise to rtld so it knows what to do on context + * switch. + */ +int fp_elf = AT_386_FPINFO_FXSAVE; + /* * Mechanism to save FPU state. */ -#if defined(__amd64) int fp_save_mech = FP_FXSAVE; -#elif defined(__i386) -int fp_save_mech = FP_FNSAVE; -#endif /* * The variable fpu_ignored is provided to allow other code to @@ -84,16 +87,6 @@ int use_sse_pagecopy = 0; int use_sse_pagezero = 0; int use_sse_copy = 0; -#if defined(__i386) - -/* - * The variable fpu_pentium_fdivbug is provided to allow other code to - * determine whether the system contains a Pentium with the FDIV problem. - */ -int fpu_pentium_fdivbug = 0; - -#endif - #if defined(__xpv) /* @@ -117,230 +110,142 @@ int fpu_pentium_fdivbug = 0; void fpu_probe(void) { - do { - if (fpu_initial_probe() != 0) - continue; - - if (fpu_exists == 0) { - fpu_ignored = 1; - continue; - } + if (fpu_initial_probe() != 0) + goto nofpu; -#if defined(__i386) - fpu_pentium_fdivbug = fpu_probe_pentium_fdivbug(); - /* - * The test does some real floating point operations. - * Reset it back to previous state. - */ - (void) fpu_initial_probe(); - - if (fpu_pentium_fdivbug != 0) { - fpu_ignored = 1; - continue; - } -#endif + if (fpu_exists == 0) { + fpu_ignored = 1; + goto nofpu; + } #ifndef __xpv - /* - * Check and see if the fpu is present by looking - * at the "extension type" bit. (While this used to - * indicate a 387DX coprocessor in days gone by, - * it's forced on by modern implementations for - * compatibility.) - */ - if ((getcr0() & CR0_ET) == 0) - continue; + /* + * Check and see if the fpu is present by looking + * at the "extension type" bit. (While this used to + * indicate a 387DX coprocessor in days gone by, + * it's forced on by modern implementations for + * compatibility.) + */ + if ((getcr0() & CR0_ET) == 0) + goto nofpu; #endif -#if defined(__amd64) - /* Use the more complex exception clearing code if necessary */ - if (cpuid_need_fp_excp_handling()) - fpsave_ctxt = fpxsave_excp_clr_ctxt; + /* Use the more complex exception clearing code if necessary */ + if (cpuid_need_fp_excp_handling()) + fpsave_ctxt = fpxsave_excp_clr_ctxt; - /* - * SSE and SSE2 are required for the 64-bit ABI. - * - * If they're not present, we can in principal run - * 32-bit userland, though 64-bit processes will be hosed. - * - * (Perhaps we should complain more about this case!) - */ - if (is_x86_feature(x86_featureset, X86FSET_SSE) && - is_x86_feature(x86_featureset, X86FSET_SSE2)) { - fp_kind |= __FP_SSE; - ENABLE_SSE(); - - if (is_x86_feature(x86_featureset, X86FSET_AVX)) { - ASSERT(is_x86_feature(x86_featureset, - X86FSET_XSAVE)); - fp_kind |= __FP_AVX; - } + /* + * SSE and SSE2 are required for the 64-bit ABI. + * + * If they're not present, we can in principal run + * 32-bit userland, though 64-bit processes will be hosed. + * + * (Perhaps we should complain more about this case!) + */ + if (is_x86_feature(x86_featureset, X86FSET_SSE) && + is_x86_feature(x86_featureset, X86FSET_SSE2)) { + fp_kind |= __FP_SSE; + ENABLE_SSE(); + + if (is_x86_feature(x86_featureset, X86FSET_AVX)) { + ASSERT(is_x86_feature(x86_featureset, + X86FSET_XSAVE)); + fp_kind |= __FP_AVX; + } - if (is_x86_feature(x86_featureset, X86FSET_XSAVE)) { - fp_save_mech = FP_XSAVE; - if (is_x86_feature(x86_featureset, - X86FSET_XSAVEOPT)) { - /* - * Use the more complex exception - * clearing code if necessary. - */ - if (cpuid_need_fp_excp_handling()) { - fpsave_ctxt = - xsaveopt_excp_clr_ctxt; - } else { - fpsave_ctxt = xsaveopt_ctxt; - } - xsavep = xsaveopt; + if (is_x86_feature(x86_featureset, X86FSET_XSAVE)) { + fp_save_mech = FP_XSAVE; + fp_elf = AT_386_FPINFO_XSAVE; + if (is_x86_feature(x86_featureset, X86FSET_XSAVEOPT)) { + /* + * Use the more complex exception + * clearing code if necessary. + */ + if (cpuid_need_fp_excp_handling()) { + fpsave_ctxt = xsaveopt_excp_clr_ctxt; + fp_elf = AT_386_FPINFO_XSAVE_AMD; } else { - /* - * Use the more complex exception - * clearing code if necessary. - */ - if (cpuid_need_fp_excp_handling()) { - fpsave_ctxt = - xsave_excp_clr_ctxt; - } else { - fpsave_ctxt = xsave_ctxt; - } + fpsave_ctxt = xsaveopt_ctxt; } - patch_xsave(); - fpsave_cachep = kmem_cache_create("xsave_cache", - cpuid_get_xsave_size(), XSAVE_ALIGN, - NULL, NULL, NULL, NULL, NULL, 0); + xsavep = xsaveopt; } else { - /* fp_save_mech defaults to FP_FXSAVE */ - fpsave_cachep = - kmem_cache_create("fxsave_cache", - sizeof (struct fxsave_state), FXSAVE_ALIGN, - NULL, NULL, NULL, NULL, NULL, 0); - } - } -#elif defined(__i386) - /* - * SSE and SSE2 are both optional, and we patch kernel - * code to exploit it when present. - */ - if (is_x86_feature(x86_featureset, X86FSET_SSE)) { - fp_kind |= __FP_SSE; - ENABLE_SSE(); - fp_save_mech = FP_FXSAVE; - /* - * Use the more complex exception clearing code if - * necessary. - */ - if (cpuid_need_fp_excp_handling()) { - fpsave_ctxt = fpxsave_excp_clr_ctxt; - } else { - fpsave_ctxt = fpxsave_ctxt; - } - - if (is_x86_feature(x86_featureset, X86FSET_SSE2)) { - patch_sse2(); - } - - if (is_x86_feature(x86_featureset, X86FSET_AVX)) { - ASSERT(is_x86_feature(x86_featureset, - X86FSET_XSAVE)); - fp_kind |= __FP_AVX; - } - - if (is_x86_feature(x86_featureset, X86FSET_XSAVE)) { - fp_save_mech = FP_XSAVE; - if (is_x86_feature(x86_featureset, - X86FSET_XSAVEOPT)) { - /* - * Use the more complex exception - * clearing code if necessary. - */ - if (cpuid_need_fp_excp_handling()) { - fpsave_ctxt = - xsaveopt_excp_clr_ctxt; - } else { - fpsave_ctxt = xsaveopt_ctxt; - } - xsavep = xsaveopt; + /* + * Use the more complex exception + * clearing code if necessary. + */ + if (cpuid_need_fp_excp_handling()) { + fpsave_ctxt = xsave_excp_clr_ctxt; + fp_elf = AT_386_FPINFO_XSAVE_AMD; } else { - /* - * Use the more complex exception - * clearing code if necessary. - */ - if (cpuid_need_fp_excp_handling()) { - fpsave_ctxt = - xsave_excp_clr_ctxt; - } else { - fpsave_ctxt = xsave_ctxt; - } + fpsave_ctxt = xsave_ctxt; } - patch_xsave(); - fpsave_cachep = kmem_cache_create("xsave_cache", - cpuid_get_xsave_size(), XSAVE_ALIGN, - NULL, NULL, NULL, NULL, NULL, 0); - } else { - patch_sse(); /* use fxrstor */ - fpsave_cachep = - kmem_cache_create("fxsave_cache", - sizeof (struct fxsave_state), FXSAVE_ALIGN, - NULL, NULL, NULL, NULL, NULL, 0); } + patch_xsave(); + fpsave_cachep = kmem_cache_create("xsave_cache", + cpuid_get_xsave_size(), XSAVE_ALIGN, + NULL, NULL, NULL, NULL, NULL, 0); } else { - remove_x86_feature(x86_featureset, X86FSET_SSE2); - /* - * We will not likely to have a chip with AVX but not - * SSE. But to be safe we disable AVX if SSE is not - * enabled. - */ - remove_x86_feature(x86_featureset, X86FSET_AVX); - /* - * (Just in case the BIOS decided we wanted SSE - * enabled when we didn't. See 4965674.) - */ - DISABLE_SSE(); - - /* - * fp_save_mech defaults to FP_FNSAVE. Use the same - * alignment as used for fxsave (preserves legacy - * behavior). - */ - fpsave_cachep = kmem_cache_create("fnsave_cache", - sizeof (struct fnsave_state), FXSAVE_ALIGN, + /* fp_save_mech defaults to FP_FXSAVE */ + fpsave_cachep = kmem_cache_create("fxsave_cache", + sizeof (struct fxsave_state), FXSAVE_ALIGN, NULL, NULL, NULL, NULL, NULL, 0); + fp_elf = AT_386_FPINFO_FXSAVE; } -#endif - if (is_x86_feature(x86_featureset, X86FSET_SSE2)) { - use_sse_pagecopy = use_sse_pagezero = use_sse_copy = 1; - } + } - if (fp_kind & __FP_SSE) { - struct fxsave_state *fx; - uint8_t fxsave_state[sizeof (struct fxsave_state) + - XMM_ALIGN]; + if (is_x86_feature(x86_featureset, X86FSET_SSE2)) { + use_sse_pagecopy = use_sse_pagezero = use_sse_copy = 1; + } + if (fp_kind & __FP_SSE) { + struct fxsave_state *fx; + uint8_t fxsave_state[sizeof (struct fxsave_state) + XMM_ALIGN]; + + /* + * Extract the mxcsr mask from our first fxsave + */ + fx = (void *)(((uintptr_t)(&fxsave_state[0]) + + XMM_ALIGN) & ~(XMM_ALIGN - 1ul)); + + fx->fx_mxcsr_mask = 0; + fxsave_insn(fx); + if (fx->fx_mxcsr_mask != 0) { /* - * Extract the mxcsr mask from our first fxsave + * Override default mask initialized in fpu.c */ - fx = (void *)(((uintptr_t)(&fxsave_state[0]) + - XMM_ALIGN) & ~(XMM_ALIGN - 1ul)); - - fx->fx_mxcsr_mask = 0; - fxsave_insn(fx); - if (fx->fx_mxcsr_mask != 0) { - /* - * Override default mask initialized in fpu.c - */ - sse_mxcsr_mask = fx->fx_mxcsr_mask; - } + sse_mxcsr_mask = fx->fx_mxcsr_mask; } + } - setcr0(CR0_ENABLE_FPU_FLAGS(getcr0())); - return; - /*CONSTANTCONDITION*/ - } while (0); + setcr0(CR0_ENABLE_FPU_FLAGS(getcr0())); + return; /* * No FPU hardware present */ +nofpu: setcr0(CR0_DISABLE_FPU_FLAGS(getcr0())); DISABLE_SSE(); fp_kind = FP_NO; fpu_exists = 0; } + +/* + * Fill in FPU information that is required by exec. + */ +void +fpu_auxv_info(int *typep, size_t *lenp) +{ + *typep = fp_elf; + switch (fp_save_mech) { + case FP_FXSAVE: + *lenp = sizeof (struct fxsave_state); + break; + case FP_XSAVE: + *lenp = cpuid_get_xsave_size(); + break; + default: + *lenp = 0; + break; + } +} diff --git a/usr/src/uts/intel/ia32/os/fpu.c b/usr/src/uts/intel/ia32/os/fpu.c index 694d0f9feb..c307c97957 100644 --- a/usr/src/uts/intel/ia32/os/fpu.c +++ b/usr/src/uts/intel/ia32/os/fpu.c @@ -20,7 +20,7 @@ */ /* * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright 2017 Joyent, Inc. + * Copyright (c) 2018, Joyent, Inc. */ /* Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. */ @@ -237,7 +237,7 @@ fp_new_lwp(kthread_id_t t, kthread_id_t ct) cfx->fx_mxcsr = fx->fx_mxcsr & ~SSE_MXCSR_EFLAGS; cfx->fx_fcw = fx->fx_fcw; cxs->xs_xstate_bv |= (get_xcr(XFEATURE_ENABLED_MASK) & - XFEATURE_FP_ALL); + XFEATURE_FP_INITIAL); break; default: panic("Invalid fp_save_mech"); @@ -383,8 +383,7 @@ fp_seed(void) * Always initialize a new context and initialize the hardware. */ if (fp_save_mech == FP_XSAVE) { - fp->fpu_xsave_mask = get_xcr(XFEATURE_ENABLED_MASK) & - XFEATURE_FP_ALL; + fp->fpu_xsave_mask = XFEATURE_FP_ALL; } installctx(curthread, fp, diff --git a/usr/src/uts/intel/sys/fp.h b/usr/src/uts/intel/sys/fp.h index e6d482fdd8..7f08f8c8f8 100644 --- a/usr/src/uts/intel/sys/fp.h +++ b/usr/src/uts/intel/sys/fp.h @@ -20,7 +20,7 @@ */ /* * Copyright 2015 Nexenta Systems, Inc. All rights reserved. - * Copyright 2017 Joyent, Inc. + * Copyright (c) 2018, Joyent, Inc. * * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved. */ @@ -140,8 +140,8 @@ extern "C" { /* * masks and flags for SSE/SSE2 MXCSR */ -#define SSE_IE 0x00000001 /* invalid operation */ -#define SSE_DE 0x00000002 /* denormalized operand */ +#define SSE_IE 0x00000001 /* invalid operation */ +#define SSE_DE 0x00000002 /* denormalized operand */ #define SSE_ZE 0x00000004 /* zero divide */ #define SSE_OE 0x00000008 /* overflow */ #define SSE_UE 0x00000010 /* underflow */ @@ -156,7 +156,7 @@ extern "C" { #define SSE_RC 0x00006000 /* rounding control */ #define SSE_RD 0x00002000 /* rounding control: round down */ #define SSE_RU 0x00004000 /* rounding control: round up */ -#define SSE_FZ 0x00008000 /* flush to zero for masked underflow */ +#define SSE_FZ 0x00008000 /* flush to zero for masked underflow */ #define SSE_MXCSR_EFLAGS \ (SSE_IE|SSE_DE|SSE_ZE|SSE_OE|SSE_UE|SSE_PE) /* 0x3f */ @@ -303,6 +303,8 @@ extern void fpu_probe(void); extern uint_t fpu_initial_probe(void); extern int fpu_probe_pentium_fdivbug(void); +extern void fpu_auxv_info(int *, size_t *); + extern void fpnsave_ctxt(void *); extern void fpxsave_ctxt(void *); extern void xsave_ctxt(void *); diff --git a/usr/src/uts/intel/sys/x86_archext.h b/usr/src/uts/intel/sys/x86_archext.h index 33c4cba41f..c90026f6fe 100644 --- a/usr/src/uts/intel/sys/x86_archext.h +++ b/usr/src/uts/intel/sys/x86_archext.h @@ -690,6 +690,17 @@ extern "C" { (XFEATURE_LEGACY_FP | XFEATURE_SSE | XFEATURE_AVX | XFEATURE_MPX | \ XFEATURE_AVX512 | XFEATURE_PKRU) +/* + * Define the set of xfeature flags that should be considered valid in the xsave + * state vector when we initialize an lwp. This is distinct from the full set so + * that all of the processor's normal logic and tracking of the xsave state is + * usable. This should correspond to the state that's been initialized by the + * ABI to hold meaningful values. Adding additional bits here can have serious + * performance implications and cause performance degradations when using the + * FPU vector (xmm) registers. + */ +#define XFEATURE_FP_INITIAL (XFEATURE_LEGACY_FP | XFEATURE_SSE) + #if !defined(_ASM) #if defined(_KERNEL) || defined(_KMEMUSER) -- cgit v1.2.3 From a32a1f376ed9360264e4a374608fdcc5c4927d63 Mon Sep 17 00:00:00 2001 From: Robert Mustacchi Date: Wed, 13 Jun 2018 18:40:29 +0000 Subject: 9597 Want hypervisor API for FPU management Reviewed by: Patrick Mooney Reviewed by: John Levon Reviewed by: Toomas Soome Approved by: Richard Lowe --- usr/src/uts/i86pc/Makefile.files | 1 + usr/src/uts/i86pc/os/hma_fpu.c | 225 ++++++++++++++++++++++++++++++++++++ usr/src/uts/i86pc/sys/hma.h | 103 +++++++++++++++++ usr/src/uts/intel/ia32/os/archdep.c | 5 +- usr/src/uts/intel/sys/fp.h | 3 + 5 files changed, 333 insertions(+), 4 deletions(-) create mode 100644 usr/src/uts/i86pc/os/hma_fpu.c create mode 100644 usr/src/uts/i86pc/sys/hma.h diff --git a/usr/src/uts/i86pc/Makefile.files b/usr/src/uts/i86pc/Makefile.files index d6eee2b807..b63d059c06 100644 --- a/usr/src/uts/i86pc/Makefile.files +++ b/usr/src/uts/i86pc/Makefile.files @@ -64,6 +64,7 @@ CORE_OBJS += \ hardclk.o \ hat_i86.o \ hat_kdi.o \ + hma_fpu.o \ hment.o \ hold_page.o \ hrtimers.o \ diff --git a/usr/src/uts/i86pc/os/hma_fpu.c b/usr/src/uts/i86pc/os/hma_fpu.c new file mode 100644 index 0000000000..14cfa8baed --- /dev/null +++ b/usr/src/uts/i86pc/os/hma_fpu.c @@ -0,0 +1,225 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright (c) 2018, Joyent, Inc. + */ + +/* + * This implements the hypervisor multiplexor FPU API. Its purpose is to make it + * easy to switch between the host and guest hypervisor while hiding all the + * details about CR0.TS and how to save the host's state as required. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct hma_fpu { + fpu_ctx_t hf_guest_fpu; + kthread_t *hf_curthread; + boolean_t hf_inguest; +}; + +int +hma_fpu_init(hma_fpu_t *fpu) +{ + struct xsave_state *xs; + + ASSERT0(fpu->hf_inguest); + + switch (fp_save_mech) { + case FP_FXSAVE: + bcopy(&sse_initial, fpu->hf_guest_fpu.fpu_regs.kfpu_u.kfpu_fx, + sizeof (struct fxsave_state)); + fpu->hf_guest_fpu.fpu_xsave_mask = 0; + break; + case FP_XSAVE: + /* + * Zero everything in the xsave case as we may have data in + * the structure that's not part of the initial value (which + * only really deals with a small portion of the xsave state). + */ + xs = fpu->hf_guest_fpu.fpu_regs.kfpu_u.kfpu_xs; + bzero(xs, cpuid_get_xsave_size()); + bcopy(&avx_initial, xs, sizeof (*xs)); + xs->xs_xstate_bv = XFEATURE_LEGACY_FP | XFEATURE_SSE; + fpu->hf_guest_fpu.fpu_xsave_mask = XFEATURE_FP_ALL; + break; + default: + panic("Invalid fp_save_mech"); + } + + fpu->hf_guest_fpu.fpu_flags = FPU_EN | FPU_VALID; + + return (0); +} + +void +hma_fpu_free(hma_fpu_t *fpu) +{ + if (fpu == NULL) + return; + + ASSERT3P(fpu->hf_guest_fpu.fpu_regs.kfpu_u.kfpu_generic, !=, NULL); + kmem_cache_free(fpsave_cachep, + fpu->hf_guest_fpu.fpu_regs.kfpu_u.kfpu_generic); + kmem_free(fpu, sizeof (*fpu)); +} + +hma_fpu_t * +hma_fpu_alloc(int kmflag) +{ + hma_fpu_t *fpu; + + fpu = kmem_zalloc(sizeof (hma_fpu_t), kmflag); + if (fpu == NULL) + return (NULL); + + fpu->hf_guest_fpu.fpu_regs.kfpu_u.kfpu_generic = + kmem_cache_alloc(fpsave_cachep, kmflag); + if (fpu->hf_guest_fpu.fpu_regs.kfpu_u.kfpu_generic == NULL) { + kmem_free(fpu, sizeof (hma_fpu_t)); + return (NULL); + } + fpu->hf_inguest = B_FALSE; + + /* + * Make sure the entire structure is zero. + */ + switch (fp_save_mech) { + case FP_FXSAVE: + bzero(fpu->hf_guest_fpu.fpu_regs.kfpu_u.kfpu_generic, + sizeof (struct fxsave_state)); + break; + case FP_XSAVE: + bzero(fpu->hf_guest_fpu.fpu_regs.kfpu_u.kfpu_generic, + cpuid_get_xsave_size()); + break; + default: + panic("Invalid fp_save_mech"); + } + + return (fpu); +} + +void +hma_fpu_start_guest(hma_fpu_t *fpu) +{ + /* + * Note, we don't check / assert whether or not t_prempt is true because + * there are contexts where this is safe to call (from a context op) + * where t_preempt may not be set. + */ + ASSERT3S(fpu->hf_inguest, ==, B_FALSE); + ASSERT3P(fpu->hf_curthread, ==, NULL); + ASSERT3P(curthread->t_lwp, !=, NULL); + ASSERT3U(fpu->hf_guest_fpu.fpu_flags & FPU_EN, !=, 0); + ASSERT3U(fpu->hf_guest_fpu.fpu_flags & FPU_VALID, !=, 0); + + fpu->hf_inguest = B_TRUE; + fpu->hf_curthread = curthread; + + + fp_save(&curthread->t_lwp->lwp_pcb.pcb_fpu); + fp_restore(&fpu->hf_guest_fpu); + fpu->hf_guest_fpu.fpu_flags &= ~FPU_VALID; +} + +void +hma_fpu_stop_guest(hma_fpu_t *fpu) +{ + ASSERT3S(fpu->hf_inguest, ==, B_TRUE); + ASSERT3P(fpu->hf_curthread, ==, curthread); + ASSERT3U(fpu->hf_guest_fpu.fpu_flags & FPU_EN, !=, 0); + ASSERT3U(fpu->hf_guest_fpu.fpu_flags & FPU_VALID, ==, 0); + + /* + * Note, we can't use fp_save because it assumes that we're saving to + * the thread's PCB and not somewhere else. Because this is a different + * FPU context, we instead have to do this ourselves. + */ + switch (fp_save_mech) { + case FP_FXSAVE: + fpxsave(fpu->hf_guest_fpu.fpu_regs.kfpu_u.kfpu_fx); + break; + case FP_XSAVE: + xsavep(fpu->hf_guest_fpu.fpu_regs.kfpu_u.kfpu_xs, + fpu->hf_guest_fpu.fpu_xsave_mask); + break; + default: + panic("Invalid fp_save_mech"); + /*NOTREACHED*/ + } + fpu->hf_guest_fpu.fpu_flags |= FPU_VALID; + + fp_restore(&curthread->t_lwp->lwp_pcb.pcb_fpu); + + fpu->hf_inguest = B_FALSE; + fpu->hf_curthread = NULL; +} + +void +hma_fpu_get_fxsave_state(const hma_fpu_t *fpu, struct fxsave_state *fx) +{ + const struct fxsave_state *guest; + + ASSERT3S(fpu->hf_inguest, ==, B_FALSE); + + guest = fpu->hf_guest_fpu.fpu_regs.kfpu_u.kfpu_fx; + bcopy(guest, fx, sizeof (*fx)); +} + +int +hma_fpu_set_fxsave_state(hma_fpu_t *fpu, const struct fxsave_state *fx) +{ + struct fxsave_state *gfx; + struct xsave_state *gxs; + + ASSERT3S(fpu->hf_inguest, ==, B_FALSE); + + /* + * If reserved bits are set in fx_mxcsr, then we will take a #GP when + * we restore them. Reject this outright. + * + * We do not need to check if we are dealing with state that has pending + * exceptions. This was only the case with the original FPU save and + * restore mechanisms (fsave/frstor). When using fxsave/fxrstor and + * xsave/xrstor they will be deferred to the user using the FPU, which + * is what we'd want here (they'd be used in guest context). + */ + if ((fx->fx_mxcsr & ~sse_mxcsr_mask) != 0) + return (EINVAL); + + switch (fp_save_mech) { + case FP_FXSAVE: + gfx = fpu->hf_guest_fpu.fpu_regs.kfpu_u.kfpu_fx; + bcopy(fx, gfx, sizeof (*fx)); + break; + case FP_XSAVE: + gxs = fpu->hf_guest_fpu.fpu_regs.kfpu_u.kfpu_xs; + bzero(gxs, cpuid_get_xsave_size()); + bcopy(fx, &gxs->xs_fxsave, sizeof (*fx)); + gxs->xs_xstate_bv = XFEATURE_LEGACY_FP | XFEATURE_SSE; + break; + default: + panic("Invalid fp_save_mech"); + /* NOTREACHED */ + } + + return (0); +} diff --git a/usr/src/uts/i86pc/sys/hma.h b/usr/src/uts/i86pc/sys/hma.h new file mode 100644 index 0000000000..00009cf439 --- /dev/null +++ b/usr/src/uts/i86pc/sys/hma.h @@ -0,0 +1,103 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright (c) 2018, Joyent, Inc. + */ + +#ifndef _SYS_HMA_H +#define _SYS_HMA_H + +/* + * Hypervisor Multiplexor API + * + * This provides a set of APIs that are usable by hypervisor implementations + * that allows them to coexist and to make sure that they are all in a + * consistent state. + */ + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * FPU related management. These functions provide a set of APIs to manage the + * FPU state and switch between host and guest management of this state. + */ + +typedef struct hma_fpu hma_fpu_t; + +/* + * Allocate and free FPU state management structures. + */ +extern hma_fpu_t *hma_fpu_alloc(int); +extern void hma_fpu_free(hma_fpu_t *); + +/* + * Resets the FPU to the standard x86 default state. This should be called after + * allocation and whenever the guest needs to logically reset the state (when + * the CPU is reset, etc.). If the system supports xsave, then the xbv state + * will be set to have the x87 and SSE portions as valid and the rest will be + * set to their initial states (regardless of whether or not they will be + * advertised in the host). + */ +extern int hma_fpu_init(hma_fpu_t *); + +/* + * Save the current host's FPU state and restore the guest's state in the FPU. + * At this point, CR0.TS will not be set. The caller must not use the FPU in any + * way before entering the guest. + * + * This should be used in normal operation before entering the guest. It should + * also be used in a thread context operation when the thread is being scheduled + * again. This interface has an implicit assumption that a given guest state + * will be mapped to only one specific OS thread at any given time. + * + * This must be called with preemption disabled. + */ +extern void hma_fpu_start_guest(hma_fpu_t *); + +/* + * Save the current guest's FPU state and restore the host's state in the FPU. + * By the time the thread returns to userland, the FPU will be in a usable + * state; however, the FPU will not be usable while inside the kernel (CR0.TS + * will be set). + * + * This should be used in normal operation after leaving the guest and returning + * to user land. It should also be used in a thread context operation when the + * thread is being descheduled. Like the hma_fpu_start_guest() interface, this + * interface has an implicit assumption that a given guest state will be mapped + * to only a single OS thread at any given time. + * + * This must be called with preemption disabled. + */ +extern void hma_fpu_stop_guest(hma_fpu_t *); + +/* + * Get and set the contents of the FPU save area. This sets the fxsave style + * information. In all cases when this is in use, if an XSAVE state is actually + * used by the host, then this will end up zeroing all of the non-fxsave state + * and it will reset the xbv to indicate that the legacy x87 and SSE portions + * are valid. + * + * These functions cannot be called while the FPU is in use by the guest. It is + * up to callers to guarantee this fact. + */ +extern void hma_fpu_get_fxsave_state(const hma_fpu_t *, struct fxsave_state *); +extern int hma_fpu_set_fxsave_state(hma_fpu_t *, const struct fxsave_state *); + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_HMA_H */ diff --git a/usr/src/uts/intel/ia32/os/archdep.c b/usr/src/uts/intel/ia32/os/archdep.c index fb4267665b..6f99571a5a 100644 --- a/usr/src/uts/intel/ia32/os/archdep.c +++ b/usr/src/uts/intel/ia32/os/archdep.c @@ -25,7 +25,7 @@ /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ /* All Rights Reserved */ /* - * Copyright 2017 Joyent, Inc. + * Copyright (c) 2018, Joyent, Inc. * Copyright 2012 Nexenta Systems, Inc. All rights reserved. */ @@ -68,9 +68,6 @@ #include #include -extern const struct fnsave_state x87_initial; -extern const struct fxsave_state sse_initial; - /* * Map an fnsave-formatted save area into an fxsave-formatted save area. * diff --git a/usr/src/uts/intel/sys/fp.h b/usr/src/uts/intel/sys/fp.h index 7f08f8c8f8..fe5471e855 100644 --- a/usr/src/uts/intel/sys/fp.h +++ b/usr/src/uts/intel/sys/fp.h @@ -345,6 +345,9 @@ extern void fp_lwp_init(struct _klwp *); extern void fp_lwp_cleanup(struct _klwp *); extern void fp_lwp_dup(struct _klwp *); +extern const struct fxsave_state sse_initial; +extern const struct xsave_state avx_initial; + #endif /* _KERNEL */ #ifdef __cplusplus -- cgit v1.2.3 From 4c28a617e3922d92a58e813a5b955eb526b9c386 Mon Sep 17 00:00:00 2001 From: Robert Mustacchi Date: Wed, 13 Jun 2018 20:20:20 +0000 Subject: 9598 Need Eager FPU Reviewed by: Jerry Jelinek Reviewed by: Patrick Mooney Reviewed by: Bryan Cantrill Reviewed by: John Levon Reviewed by: Toomas Soome Approved by: Richard Lowe --- .../cmd/mdb/intel/modules/genunix/gcore_isadep.c | 3 +- usr/src/uts/common/brand/solaris10/s10_brand.c | 3 +- usr/src/uts/i86pc/ml/kpti_trampolines.s | 28 + usr/src/uts/i86pc/ml/locore.s | 107 +--- usr/src/uts/i86pc/ml/syscall_asm_amd64.s | 40 ++ usr/src/uts/i86pc/os/fpu_subr.c | 5 +- usr/src/uts/i86pc/os/intr.c | 33 +- usr/src/uts/i86pc/os/trap.c | 57 +- usr/src/uts/intel/ia32/ml/exception.s | 228 +------- usr/src/uts/intel/ia32/ml/float.s | 516 ++---------------- usr/src/uts/intel/ia32/os/archdep.c | 9 +- usr/src/uts/intel/ia32/os/fpu.c | 601 +++++++++++++++------ usr/src/uts/intel/ia32/os/sundep.c | 55 +- usr/src/uts/intel/ia32/os/sysi86.c | 2 +- usr/src/uts/intel/ia32/syscall/lwp_private.c | 15 +- usr/src/uts/intel/sys/archsystm.h | 3 - usr/src/uts/intel/sys/fp.h | 24 +- usr/src/uts/intel/sys/pcb.h | 18 +- 18 files changed, 668 insertions(+), 1079 deletions(-) diff --git a/usr/src/cmd/mdb/intel/modules/genunix/gcore_isadep.c b/usr/src/cmd/mdb/intel/modules/genunix/gcore_isadep.c index 73d5ecbb94..d4837bd475 100644 --- a/usr/src/cmd/mdb/intel/modules/genunix/gcore_isadep.c +++ b/usr/src/cmd/mdb/intel/modules/genunix/gcore_isadep.c @@ -10,6 +10,7 @@ */ /* * Copyright (c) 2013 by Delphix. All rights reserved. + * Copyright (c) 2018, Joyent, Inc. */ #include @@ -71,7 +72,7 @@ gcore_getgregs(mdb_klwp_t *lwp, gregset_t grp) grp[REG_R15] = rp->r_r15; grp[REG_FSBASE] = pcb->pcb_fsbase; grp[REG_GSBASE] = pcb->pcb_gsbase; - if (pcb->pcb_rupdate == 1) { + if (PCB_NEED_UPDATE_SEGS(pcb)) { grp[REG_DS] = pcb->pcb_ds; grp[REG_ES] = pcb->pcb_es; grp[REG_FS] = pcb->pcb_fs; diff --git a/usr/src/uts/common/brand/solaris10/s10_brand.c b/usr/src/uts/common/brand/solaris10/s10_brand.c index f24b864eef..0841f02e51 100644 --- a/usr/src/uts/common/brand/solaris10/s10_brand.c +++ b/usr/src/uts/common/brand/solaris10/s10_brand.c @@ -22,6 +22,7 @@ /* * Copyright (c) 2013, OmniTI Computer Consulting, Inc. All rights reserved. * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright 2018, Joyent, Inc. */ #include @@ -195,7 +196,7 @@ s10_amd64_correct_fsreg(klwp_t *l) if (lwp_getdatamodel(l) == DATAMODEL_NATIVE) { kpreempt_disable(); l->lwp_pcb.pcb_fs = LWPFS_SEL; - l->lwp_pcb.pcb_rupdate = 1; + PCB_SET_UPDATE_SEGS(&l->lwp_pcb); lwptot(l)->t_post_sys = 1; /* Guarantee update_sregs() */ kpreempt_enable(); } diff --git a/usr/src/uts/i86pc/ml/kpti_trampolines.s b/usr/src/uts/i86pc/ml/kpti_trampolines.s index 7e72841e32..036c254d08 100644 --- a/usr/src/uts/i86pc/ml/kpti_trampolines.s +++ b/usr/src/uts/i86pc/ml/kpti_trampolines.s @@ -137,6 +137,12 @@ DGDEF3(kpti_enable, 8, 8) .fill 1, 8, 1 +#if DEBUG + .data +_bad_ts_panic_msg: + .string "kpti_trampolines.s: tr_iret_user but CR0.TS set" +#endif + .section ".text"; .align MMU_PAGESIZE @@ -523,6 +529,28 @@ tr_intr_ret_start: SET_SIZE(tr_iret_kernel) ENTRY_NP(tr_iret_user) +#if DEBUG + /* + * Ensure that we return to user land with CR0.TS clear. We do this + * before we trampoline back and pivot the stack and %cr3. This way + * we're still on the kernel stack and kernel %cr3, though we are on the + * user GSBASE. + */ + pushq %rax + mov %cr0, %rax + testq $CR0_TS, %rax + jz 1f + swapgs + popq %rax + leaq _bad_ts_panic_msg(%rip), %rdi + xorl %eax, %eax + pushq %rbp + movq %rsp, %rbp + call panic +1: + popq %rax +#endif + cmpq $1, kpti_enable jne 1f diff --git a/usr/src/uts/i86pc/ml/locore.s b/usr/src/uts/i86pc/ml/locore.s index 4626dd1492..acd96e271a 100644 --- a/usr/src/uts/i86pc/ml/locore.s +++ b/usr/src/uts/i86pc/ml/locore.s @@ -239,6 +239,11 @@ __return_from_main: __unsupported_cpu: .string "486 style cpu detected - no longer supported!" +#if defined(DEBUG) +_no_pending_updates: + .string "locore.s:%d lwp_rtt(lwp %p) but pcb_rupdate != 1" +#endif + #endif /* !__lint */ #if !defined(__amd64) @@ -1505,8 +1510,6 @@ _sys_rtt(void) #else /* __lint */ -#if defined(__amd64) - ENTRY_NP(lwp_rtt_initial) movq %gs:CPU_THREAD, %r15 movq T_STACK(%r15), %rsp /* switch to the thread stack */ @@ -1549,8 +1552,6 @@ _lwp_rtt: movq %r14, %rdx xorl %eax, %eax call panic -_no_pending_updates: - .string "locore.s:%d lwp_rtt(lwp %p) but pcb_rupdate != 1" 1: #endif @@ -1570,11 +1571,6 @@ _no_pending_updates: movq REGOFF_RAX(%rsp), %rdi call post_syscall /* post_syscall(rval1, rval2) */ - /* - * set up to take fault on first use of fp - */ - STTS(%rdi) - /* * XXX - may want a fast path that avoids sys_rtt_common in the * most common case. @@ -1636,99 +1632,6 @@ _sys_rtt_end: SET_SIZE(sys_rtt_syscall) SET_SIZE(sys_rtt_syscall32) -#elif defined(__i386) - - ENTRY_NP(lwp_rtt_initial) - movl %gs:CPU_THREAD, %eax - movl T_STACK(%eax), %esp /* switch to the thread stack */ - movl %esp, %ebp - call __dtrace_probe___proc_start - jmp _lwp_rtt - - ENTRY_NP(lwp_rtt) - movl %gs:CPU_THREAD, %eax - movl T_STACK(%eax), %esp /* switch to the thread stack */ - movl %esp, %ebp -_lwp_rtt: - call __dtrace_probe___proc_lwp__start - - /* - * If agent lwp, clear %fs and %gs. - */ - movl %gs:CPU_LWP, %eax - movl LWP_PROCP(%eax), %edx - - cmpl %eax, P_AGENTTP(%edx) - jne 1f - movl $0, REGOFF_FS(%esp) - movl $0, REGOFF_GS(%esp) -1: - call dtrace_systrace_rtt - movl REGOFF_EDX(%esp), %edx - movl REGOFF_EAX(%esp), %eax - pushl %edx - pushl %eax - call post_syscall /* post_syscall(rval1, rval2) */ - addl $8, %esp - - /* - * set up to take fault on first use of fp - */ - STTS(%eax) - - /* - * XXX - may want a fast path that avoids sys_rtt_common in the - * most common case. - */ - ALTENTRY(_sys_rtt) - CLI(%eax) /* disable interrupts */ - ALTENTRY(_sys_rtt_ints_disabled) - pushl %esp /* pass rp to sys_rtt_common */ - call sys_rtt_common - addl $4, %esp /* pop arg */ - testl %eax, %eax /* test for return to user mode */ - jz sr_sup - - /* - * Return to User. - */ - ALTENTRY(sys_rtt_syscall) - INTR_POP_USER - - /* - * There can be no instructions between this label and IRET or - * we could end up breaking linux brand support. See label usage - * in lx_brand_int80_callback for an example. - */ - ALTENTRY(nopop_sys_rtt_syscall) - IRET - /*NOTREACHED*/ - SET_SIZE(nopop_sys_rtt_syscall) - - ALTENTRY(_sys_rtt_end) - - /* - * Return to supervisor - */ - ALTENTRY(sr_sup) - - /* - * Restore regs before doing iret to kernel mode - */ - INTR_POP_KERNEL - IRET - /*NOTREACHED*/ - - SET_SIZE(sr_sup) - SET_SIZE(_sys_rtt_end) - SET_SIZE(lwp_rtt) - SET_SIZE(lwp_rtt_initial) - SET_SIZE(_sys_rtt_ints_disabled) - SET_SIZE(_sys_rtt) - SET_SIZE(sys_rtt_syscall) - -#endif /* __i386 */ - #endif /* __lint */ #if defined(__lint) diff --git a/usr/src/uts/i86pc/ml/syscall_asm_amd64.s b/usr/src/uts/i86pc/ml/syscall_asm_amd64.s index be6a94c61b..b09b4f1fdc 100644 --- a/usr/src/uts/i86pc/ml/syscall_asm_amd64.s +++ b/usr/src/uts/i86pc/ml/syscall_asm_amd64.s @@ -271,7 +271,18 @@ * between entering privileged mode and performing the assertion, * otherwise we may perform a context switch on the thread, which * will end up setting pcb_rupdate to 1 again. + * + * ASSERT(%cr0 & CR0_TS == 0); + * Preconditions: + * (%rsp is ready for normal call sequence) + * Postconditions (if assertion is true): + * (specified register is clobbered) + * + * Check to make sure that we are returning to user land and that CR0.TS + * is not set. This is required as part of the eager FPU (see + * uts/intel/ia32/os/fpu.c for more information). */ + #if defined(DEBUG) #if !defined(__lint) @@ -285,6 +296,9 @@ __codesel_msg: __no_rupdate_msg: .string "syscall_asm_amd64.s:%d lwp %p, pcb_rupdate != 0" +__bad_ts_msg: + .string "sysscall_asm_amd64.s:%d CR0.TS set on user return" + #endif /* !__lint */ #define ASSERT_LWPTOREGS(lwp, rp) \ @@ -310,9 +324,20 @@ __no_rupdate_msg: call panic; \ 8: +#define ASSERT_CR0TS_ZERO(reg) \ + movq %cr0, reg; \ + testq $CR0_TS, reg; \ + jz 9f; \ + leaq __bad_ts_msg(%rip), %rdi; \ + movl $__LINE__, %esi; \ + xorl %eax, %eax; \ + call panic; \ +9: + #else #define ASSERT_LWPTOREGS(lwp, rp) #define ASSERT_NO_RUPDATE_PENDING(lwp) +#define ASSERT_CR0TS_ZERO(reg) #endif /* @@ -613,6 +638,11 @@ _syscall_invoke: movq %r12, REGOFF_RAX(%rsp) movq %r13, REGOFF_RDX(%rsp) + /* + * Clobber %r11 as we check CR0.TS. + */ + ASSERT_CR0TS_ZERO(%r11) + /* * To get back to userland, we need the return %rip in %rcx and * the return %rfl in %r11d. The sysretq instruction also arranges @@ -896,6 +926,11 @@ _syscall32_save: jne _full_syscall_postsys32 SIMPLE_SYSCALL_POSTSYS(%r15, %r14, %bx) + /* + * Clobber %r11 as we check CR0.TS. + */ + ASSERT_CR0TS_ZERO(%r11) + /* * To get back to userland, we need to put the return %rip in %rcx and * the return %rfl in %r11d. The sysret instruction also arranges @@ -1182,6 +1217,11 @@ sys_sysenter() */ andq $_BITNOT(PS_IE), REGOFF_RFL(%rsp) + /* + * Clobber %r11 as we check CR0.TS. + */ + ASSERT_CR0TS_ZERO(%r11) + /* * (There's no point in loading up %edx because the sysexit * mechanism smashes it.) diff --git a/usr/src/uts/i86pc/os/fpu_subr.c b/usr/src/uts/i86pc/os/fpu_subr.c index 3e027269fb..5c57bdcb8c 100644 --- a/usr/src/uts/i86pc/os/fpu_subr.c +++ b/usr/src/uts/i86pc/os/fpu_subr.c @@ -148,8 +148,7 @@ fpu_probe(void) ENABLE_SSE(); if (is_x86_feature(x86_featureset, X86FSET_AVX)) { - ASSERT(is_x86_feature(x86_featureset, - X86FSET_XSAVE)); + ASSERT(is_x86_feature(x86_featureset, X86FSET_XSAVE)); fp_kind |= __FP_AVX; } @@ -180,7 +179,7 @@ fpu_probe(void) fpsave_ctxt = xsave_ctxt; } } - patch_xsave(); + fprestore_ctxt = xrestore_ctxt; fpsave_cachep = kmem_cache_create("xsave_cache", cpuid_get_xsave_size(), XSAVE_ALIGN, NULL, NULL, NULL, NULL, NULL, 0); diff --git a/usr/src/uts/i86pc/os/intr.c b/usr/src/uts/i86pc/os/intr.c index f66f0e69e8..29fa78109c 100644 --- a/usr/src/uts/i86pc/os/intr.c +++ b/usr/src/uts/i86pc/os/intr.c @@ -1446,6 +1446,8 @@ loop: */ tp = CPU->cpu_thread; if (USERMODE(rp->r_cs)) { + pcb_t *pcb; + /* * Check if AST pending. */ @@ -1460,14 +1462,29 @@ loop: goto loop; } -#if defined(__amd64) + pcb = &tp->t_lwp->lwp_pcb; + + /* + * Check to see if we need to initialize the FPU for this + * thread. This should be an uncommon occurrence, but may happen + * in the case where the system creates an lwp through an + * abnormal path such as the agent lwp. Make sure that we still + * happen to have the FPU in a good state. + */ + if ((pcb->pcb_fpu.fpu_flags & FPU_EN) == 0) { + kpreempt_disable(); + fp_seed(); + kpreempt_enable(); + PCB_SET_UPDATE_FPU(pcb); + } + /* * We are done if segment registers do not need updating. */ - if (tp->t_lwp->lwp_pcb.pcb_rupdate == 0) + if (!PCB_NEED_UPDATE(pcb)) return (1); - if (update_sregs(rp, tp->t_lwp)) { + if (PCB_NEED_UPDATE_SEGS(pcb) && update_sregs(rp, tp->t_lwp)) { /* * 1 or more of the selectors is bad. * Deliver a SIGSEGV. @@ -1482,9 +1499,15 @@ loop: tp->t_sig_check = 1; cli(); } - tp->t_lwp->lwp_pcb.pcb_rupdate = 0; + PCB_CLEAR_UPDATE_SEGS(pcb); + + if (PCB_NEED_UPDATE_FPU(pcb)) { + fprestore_ctxt(&pcb->pcb_fpu); + } + PCB_CLEAR_UPDATE_FPU(pcb); + + ASSERT0(PCB_NEED_UPDATE(pcb)); -#endif /* __amd64 */ return (1); } diff --git a/usr/src/uts/i86pc/os/trap.c b/usr/src/uts/i86pc/os/trap.c index 1b14e61b11..be5ad49f0e 100644 --- a/usr/src/uts/i86pc/os/trap.c +++ b/usr/src/uts/i86pc/os/trap.c @@ -993,50 +993,25 @@ trap(struct regs *rp, caddr_t addr, processorid_t cpuid) fault = FLTIOVF; break; + /* + * When using an eager FPU on x86, the #NM trap is no longer meaningful. + * Userland should not be able to trigger it. Anything that does + * represents a fatal error in the kernel and likely in the register + * state of the system. User FPU state should always be valid. + */ case T_NOEXTFLT + USER: /* math coprocessor not available */ - if (tudebug && tudebugfpe) - showregs(type, rp, addr); - if (fpnoextflt(rp)) { - siginfo.si_signo = SIGILL; - siginfo.si_code = ILL_ILLOPC; - siginfo.si_addr = (caddr_t)rp->r_pc; - fault = FLTILL; - } - break; - - case T_EXTOVRFLT: /* extension overrun fault */ - /* check if we took a kernel trap on behalf of user */ - { - extern void ndptrap_frstor(void); - if (rp->r_pc != (uintptr_t)ndptrap_frstor) { - sti(); /* T_EXTOVRFLT comes in via cmninttrap */ - (void) die(type, rp, addr, cpuid); - } - type |= USER; - } - /*FALLTHROUGH*/ - case T_EXTOVRFLT + USER: /* extension overrun fault */ - if (tudebug && tudebugfpe) - showregs(type, rp, addr); - if (fpextovrflt(rp)) { - siginfo.si_signo = SIGSEGV; - siginfo.si_code = SEGV_MAPERR; - siginfo.si_addr = (caddr_t)rp->r_pc; - fault = FLTBOUNDS; - } + case T_NOEXTFLT: + (void) die(type, rp, addr, cpuid); break; + /* + * Kernel threads leveraging floating point need to mask the exceptions + * or ensure that they cannot happen. There is no recovery from this. + */ case T_EXTERRFLT: /* x87 floating point exception pending */ - /* check if we took a kernel trap on behalf of user */ - { - extern void ndptrap_frstor(void); - if (rp->r_pc != (uintptr_t)ndptrap_frstor) { - sti(); /* T_EXTERRFLT comes in via cmninttrap */ - (void) die(type, rp, addr, cpuid); - } - type |= USER; - } - /*FALLTHROUGH*/ + sti(); /* T_EXTERRFLT comes in via cmninttrap */ + (void) die(type, rp, addr, cpuid); + break; case T_EXTERRFLT + USER: /* x87 floating point exception pending */ if (tudebug && tudebugfpe) @@ -1939,7 +1914,7 @@ kern_gpfault(struct regs *rp) } #if defined(__amd64) - if (trp == NULL && lwp->lwp_pcb.pcb_rupdate != 0) { + if (trp == NULL && PCB_NEED_UPDATE_SEGS(&lwp->lwp_pcb)) { /* * This is the common case -- we're trying to load diff --git a/usr/src/uts/intel/ia32/ml/exception.s b/usr/src/uts/intel/ia32/ml/exception.s index 82d449f31c..66eda34c14 100644 --- a/usr/src/uts/intel/ia32/ml/exception.s +++ b/usr/src/uts/intel/ia32/ml/exception.s @@ -51,17 +51,7 @@ #include #include -/* - * only one routine in this file is interesting to lint - */ - -#if defined(__lint) - -void -ndptrap_frstor(void) -{} - -#else +#if !defined(__lint) #include "assym.h" @@ -643,220 +633,16 @@ _emul_done: #endif /* __i386 */ -#if defined(__amd64) - /* * #NM */ -#if defined(__xpv) ENTRY_NP(ndptrap) - /* - * (On the hypervisor we must make a hypercall so we might as well - * save everything and handle as in a normal trap.) - */ - TRAP_NOERR(T_NOEXTFLT) /* $7 */ - INTR_PUSH - - /* - * We want to do this quickly as every lwp using fp will take this - * after a context switch -- we do the frequent path in ndptrap_frstor - * below; for all other cases, we let the trap code handle it - */ - LOADCPU(%rax) /* swapgs handled in hypervisor */ - cmpl $0, fpu_exists(%rip) - je .handle_in_trap /* let trap handle no fp case */ - movq CPU_THREAD(%rax), %rbx /* %rbx = curthread */ - movl $FPU_EN, %eax - movq T_LWP(%rbx), %rbx /* %rbx = lwp */ - testq %rbx, %rbx - jz .handle_in_trap /* should not happen? */ -#if LWP_PCB_FPU != 0 - addq $LWP_PCB_FPU, %rbx /* &lwp->lwp_pcb.pcb_fpu */ -#endif - testl %eax, PCB_FPU_FLAGS(%rbx) - jz .handle_in_trap /* must be the first fault */ - CLTS - andl $_BITNOT(FPU_VALID), PCB_FPU_FLAGS(%rbx) -#if FPU_CTX_FPU_REGS != 0 - addq $FPU_CTX_FPU_REGS, %rbx -#endif - - movl FPU_CTX_FPU_XSAVE_MASK(%rbx), %eax /* for xrstor */ - movl FPU_CTX_FPU_XSAVE_MASK+4(%rbx), %edx /* for xrstor */ - - /* - * the label below is used in trap.c to detect FP faults in - * kernel due to user fault. - */ - ALTENTRY(ndptrap_frstor) - movq (%rbx), %rbx /* fpu_regs.kfpu_u.kfpu_XX pointer */ - .globl _patch_xrstorq_rbx -_patch_xrstorq_rbx: - fxrstorq (%rbx) - cmpw $KCS_SEL, REGOFF_CS(%rsp) - je .return_to_kernel - - ASSERT_UPCALL_MASK_IS_SET - USER_POP - IRET /* return to user mode */ - /*NOTREACHED*/ - -.return_to_kernel: - INTR_POP - IRET - /*NOTREACHED*/ - -.handle_in_trap: - INTR_POP - pushq $0 /* can not use TRAP_NOERR */ - pushq $T_NOEXTFLT - jmp cmninttrap - SET_SIZE(ndptrap_frstor) - SET_SIZE(ndptrap) - -#else /* __xpv */ - - ENTRY_NP(ndptrap) - /* - * We want to do this quickly as every lwp using fp will take this - * after a context switch -- we do the frequent path in ndptrap_frstor - * below; for all other cases, we let the trap code handle it - */ - pushq %rax - pushq %rbx - cmpw $KCS_SEL, 24(%rsp) /* did we come from kernel mode? */ - jne 1f - LOADCPU(%rax) /* if yes, don't swapgs */ - jmp 2f -1: - SWAPGS /* if from user, need swapgs */ - LOADCPU(%rax) - SWAPGS -2: - /* - * Xrstor needs to use edx as part of its flag. - * NOTE: have to push rdx after "cmpw ...24(%rsp)", otherwise rsp+$24 - * will not point to CS. - */ - pushq %rdx - cmpl $0, fpu_exists(%rip) - je .handle_in_trap /* let trap handle no fp case */ - movq CPU_THREAD(%rax), %rbx /* %rbx = curthread */ - movl $FPU_EN, %eax - movq T_LWP(%rbx), %rbx /* %rbx = lwp */ - testq %rbx, %rbx - jz .handle_in_trap /* should not happen? */ -#if LWP_PCB_FPU != 0 - addq $LWP_PCB_FPU, %rbx /* &lwp->lwp_pcb.pcb_fpu */ -#endif - testl %eax, PCB_FPU_FLAGS(%rbx) - jz .handle_in_trap /* must be the first fault */ - clts - andl $_BITNOT(FPU_VALID), PCB_FPU_FLAGS(%rbx) -#if FPU_CTX_FPU_REGS != 0 - addq $FPU_CTX_FPU_REGS, %rbx -#endif - - movl FPU_CTX_FPU_XSAVE_MASK(%rbx), %eax /* for xrstor */ - movl FPU_CTX_FPU_XSAVE_MASK+4(%rbx), %edx /* for xrstor */ - - /* - * the label below is used in trap.c to detect FP faults in - * kernel due to user fault. - */ - ALTENTRY(ndptrap_frstor) - movq (%rbx), %rbx /* fpu_regs.kfpu_u.kfpu_XX pointer */ - .globl _patch_xrstorq_rbx -_patch_xrstorq_rbx: - fxrstorq (%rbx) - popq %rdx - popq %rbx - popq %rax - jmp tr_iret_auto - /*NOTREACHED*/ - -.handle_in_trap: - popq %rdx - popq %rbx - popq %rax - TRAP_NOERR(T_NOEXTFLT) /* $7 */ - jmp cmninttrap - SET_SIZE(ndptrap_frstor) - SET_SIZE(ndptrap) - -#endif /* __xpv */ - -#elif defined(__i386) - - ENTRY_NP(ndptrap) - /* - * We want to do this quickly as every lwp using fp will take this - * after a context switch -- we do the frequent path in fpnoextflt - * below; for all other cases, we let the trap code handle it - */ - pushl %eax - pushl %ebx - pushl %edx /* for xrstor */ - pushl %ds - pushl %gs - movl $KDS_SEL, %ebx - movw %bx, %ds - movl $KGS_SEL, %eax - movw %ax, %gs - LOADCPU(%eax) - cmpl $0, fpu_exists - je .handle_in_trap /* let trap handle no fp case */ - movl CPU_THREAD(%eax), %ebx /* %ebx = curthread */ - movl $FPU_EN, %eax - movl T_LWP(%ebx), %ebx /* %ebx = lwp */ - testl %ebx, %ebx - jz .handle_in_trap /* should not happen? */ -#if LWP_PCB_FPU != 0 - addl $LWP_PCB_FPU, %ebx /* &lwp->lwp_pcb.pcb_fpu */ -#endif - testl %eax, PCB_FPU_FLAGS(%ebx) - jz .handle_in_trap /* must be the first fault */ - CLTS - andl $_BITNOT(FPU_VALID), PCB_FPU_FLAGS(%ebx) -#if FPU_CTX_FPU_REGS != 0 - addl $FPU_CTX_FPU_REGS, %ebx -#endif - - movl FPU_CTX_FPU_XSAVE_MASK(%ebx), %eax /* for xrstor */ - movl FPU_CTX_FPU_XSAVE_MASK+4(%ebx), %edx /* for xrstor */ - - /* - * the label below is used in trap.c to detect FP faults in kernel - * due to user fault. - */ - ALTENTRY(ndptrap_frstor) - movl (%ebx), %ebx /* fpu_regs.kfpu_u.kfpu_XX pointer */ - .globl _patch_fxrstor_ebx -_patch_fxrstor_ebx: - .globl _patch_xrstor_ebx -_patch_xrstor_ebx: - frstor (%ebx) /* may be patched to fxrstor or xrstor */ - popl %gs - popl %ds - popl %edx - popl %ebx - popl %eax - IRET - -.handle_in_trap: - popl %gs - popl %ds - popl %edx - popl %ebx - popl %eax - TRAP_NOERR(T_NOEXTFLT) /* $7 */ - jmp cmninttrap - SET_SIZE(ndptrap_frstor) + TRAP_NOERR(T_NOEXTFLT) /* $0 */ + SET_CPU_GSBASE + jmp cmntrap SET_SIZE(ndptrap) -#endif /* __i386 */ - #if !defined(__xpv) #if defined(__amd64) @@ -1036,12 +822,6 @@ make_frame: #endif /* __i386 */ #endif /* !__xpv */ - ENTRY_NP(overrun) - push $0 - TRAP_NOERR(T_EXTOVRFLT) /* $9 i386 only - not generated */ - jmp cmninttrap - SET_SIZE(overrun) - /* * #TS */ diff --git a/usr/src/uts/intel/ia32/ml/float.s b/usr/src/uts/intel/ia32/ml/float.s index f154a96851..0a242e0475 100644 --- a/usr/src/uts/intel/ia32/ml/float.s +++ b/usr/src/uts/intel/ia32/ml/float.s @@ -21,7 +21,7 @@ /* * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2017, Joyent, Inc. + * Copyright (c) 2018, Joyent, Inc. */ /* Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. */ @@ -79,191 +79,12 @@ fxsave_insn(struct fxsave_state *fx) #else /* __lint */ -#if defined(__amd64) - ENTRY_NP(fxsave_insn) fxsaveq (%rdi) ret SET_SIZE(fxsave_insn) -#elif defined(__i386) - - ENTRY_NP(fxsave_insn) - movl 4(%esp), %eax - fxsave (%eax) - ret - SET_SIZE(fxsave_insn) - -#endif - -#endif /* __lint */ - -#if defined(__i386) - -/* - * If (num1/num2 > num1/num3) the FPU has the FDIV bug. - */ - -#if defined(__lint) - -int -fpu_probe_pentium_fdivbug(void) -{ return (0); } - -#else /* __lint */ - - ENTRY_NP(fpu_probe_pentium_fdivbug) - fldl .num1 - fldl .num2 - fdivr %st(1), %st - fxch %st(1) - fdivl .num3 - fcompp - fstsw %ax - sahf - jae 0f - movl $1, %eax - ret - -0: xorl %eax, %eax - ret - - .align 4 -.num1: .4byte 0xbce4217d /* 4.999999 */ - .4byte 0x4013ffff -.num2: .4byte 0x0 /* 15.0 */ - .4byte 0x402e0000 -.num3: .4byte 0xde7210bf /* 14.999999 */ - .4byte 0x402dffff - SET_SIZE(fpu_probe_pentium_fdivbug) - -#endif /* __lint */ - -/* - * To cope with processors that do not implement fxsave/fxrstor - * instructions, patch hot paths in the kernel to use them only - * when that feature has been detected. - */ - -#if defined(__lint) - -void -patch_sse(void) -{} - -void -patch_sse2(void) -{} - -void -patch_xsave(void) -{} - -#else /* __lint */ - - ENTRY_NP(patch_sse) - _HOT_PATCH_PROLOG - / - / frstor (%ebx); nop -> fxrstor (%ebx) - / - _HOT_PATCH(_fxrstor_ebx_insn, _patch_fxrstor_ebx, 3) - / - / lock; xorl $0, (%esp) -> sfence; ret - / - _HOT_PATCH(_sfence_ret_insn, _patch_sfence_ret, 4) - _HOT_PATCH_EPILOG - ret -_fxrstor_ebx_insn: / see ndptrap_frstor() - fxrstor (%ebx) -_ldmxcsr_ebx_insn: / see resume_from_zombie() - ldmxcsr (%ebx) -_sfence_ret_insn: / see membar_producer() - sfence - ret - SET_SIZE(patch_sse) - - ENTRY_NP(patch_sse2) - _HOT_PATCH_PROLOG - / - / lock; xorl $0, (%esp) -> lfence; ret - / - _HOT_PATCH(_lfence_ret_insn, _patch_lfence_ret, 4) - _HOT_PATCH_EPILOG - ret -_lfence_ret_insn: / see membar_consumer() - lfence - ret - SET_SIZE(patch_sse2) - - /* - * Patch lazy fp restore instructions in the trap handler - * to use xrstor instead of frstor - */ - ENTRY_NP(patch_xsave) - _HOT_PATCH_PROLOG - / - / frstor (%ebx); nop -> xrstor (%ebx) - / - _HOT_PATCH(_xrstor_ebx_insn, _patch_xrstor_ebx, 3) - _HOT_PATCH_EPILOG - ret -_xrstor_ebx_insn: / see ndptrap_frstor() - xrstor (%ebx) - SET_SIZE(patch_xsave) - -#endif /* __lint */ -#endif /* __i386 */ - -#if defined(__amd64) -#if defined(__lint) - -void -patch_xsave(void) -{} - -#else /* __lint */ - - /* - * Patch lazy fp restore instructions in the trap handler - * to use xrstor instead of fxrstorq - */ - ENTRY_NP(patch_xsave) - pushq %rbx - pushq %rbp - pushq %r15 - / - / fxrstorq (%rbx); -> nop; xrstor (%rbx) - / loop doing the following for 4 bytes: - / hot_patch_kernel_text(_patch_xrstorq_rbx, _xrstor_rbx_insn, 1) - / - leaq _patch_xrstorq_rbx(%rip), %rbx - leaq _xrstor_rbx_insn(%rip), %rbp - movq $4, %r15 -1: - movq %rbx, %rdi /* patch address */ - movzbq (%rbp), %rsi /* instruction byte */ - movq $1, %rdx /* count */ - call hot_patch_kernel_text - addq $1, %rbx - addq $1, %rbp - subq $1, %r15 - jnz 1b - - popq %r15 - popq %rbp - popq %rbx - ret - -_xrstor_rbx_insn: / see ndptrap_frstor() - # Because the fxrstorq instruction we're patching is 4 bytes long, due - # to the 0x48 prefix (indicating 64-bit operand size), we patch 4 bytes - # too. - nop - xrstor (%rbx) - SET_SIZE(patch_xsave) - #endif /* __lint */ -#endif /* __amd64 */ /* * One of these routines is called from any lwp with floating @@ -287,15 +108,8 @@ void fpxsave_ctxt(void *arg) {} -/*ARGSUSED*/ -void -fpnsave_ctxt(void *arg) -{} - #else /* __lint */ -#if defined(__amd64) - /* * These three functions define the Intel "xsave" handling for CPUs with * different features. Newer AMD CPUs can also use these functions. See the @@ -305,7 +119,7 @@ fpnsave_ctxt(void *arg) cmpl $FPU_EN, FPU_CTX_FPU_FLAGS(%rdi) jne 1f movl $_CONST(FPU_VALID|FPU_EN), FPU_CTX_FPU_FLAGS(%rdi) - movq FPU_CTX_FPU_REGS(%rdi), %rdi /* fpu_regs.kfpu_u.kfpu_fn ptr */ + movq FPU_CTX_FPU_REGS(%rdi), %rdi /* fpu_regs.kfpu_u.kfpu_fx ptr */ fxsaveq (%rdi) STTS(%rsi) /* trap on next fpu touch */ 1: rep; ret /* use 2 byte return instruction when branch target */ @@ -352,7 +166,7 @@ fpnsave_ctxt(void *arg) cmpl $FPU_EN, FPU_CTX_FPU_FLAGS(%rdi) jne 1f movl $_CONST(FPU_VALID|FPU_EN), FPU_CTX_FPU_FLAGS(%rdi) - movq FPU_CTX_FPU_REGS(%rdi), %rdi /* fpu_regs.kfpu_u.kfpu_fn ptr */ + movq FPU_CTX_FPU_REGS(%rdi), %rdi /* fpu_regs.kfpu_u.kfpu_fx ptr */ fxsaveq (%rdi) /* * To ensure that we don't leak these values into the next context @@ -405,126 +219,6 @@ fpnsave_ctxt(void *arg) 1: ret SET_SIZE(xsaveopt_excp_clr_ctxt) -#elif defined(__i386) - - ENTRY_NP(fpnsave_ctxt) - movl 4(%esp), %eax /* a struct fpu_ctx */ - cmpl $FPU_EN, FPU_CTX_FPU_FLAGS(%eax) - jne 1f - movl $_CONST(FPU_VALID|FPU_EN), FPU_CTX_FPU_FLAGS(%eax) - movl FPU_CTX_FPU_REGS(%eax), %eax /* fpu_regs.kfpu_u.kfpu_fx ptr */ - fnsave (%eax) - /* (fnsave also reinitializes x87 state) */ - STTS(%edx) /* trap on next fpu touch */ -1: rep; ret /* use 2 byte return instruction when branch target */ - /* AMD Software Optimization Guide - Section 6.2 */ - SET_SIZE(fpnsave_ctxt) - - ENTRY_NP(fpxsave_ctxt) - movl 4(%esp), %eax /* a struct fpu_ctx */ - cmpl $FPU_EN, FPU_CTX_FPU_FLAGS(%eax) - jne 1f - movl $_CONST(FPU_VALID|FPU_EN), FPU_CTX_FPU_FLAGS(%eax) - movl FPU_CTX_FPU_REGS(%eax), %eax /* fpu_regs.kfpu_u.kfpu_fn ptr */ - fxsave (%eax) - STTS(%edx) /* trap on next fpu touch */ -1: rep; ret /* use 2 byte return instruction when branch target */ - /* AMD Software Optimization Guide - Section 6.2 */ - SET_SIZE(fpxsave_ctxt) - - ENTRY_NP(xsave_ctxt) - movl 4(%esp), %ecx /* a struct fpu_ctx */ - cmpl $FPU_EN, FPU_CTX_FPU_FLAGS(%ecx) - jne 1f - movl $_CONST(FPU_VALID|FPU_EN), FPU_CTX_FPU_FLAGS(%ecx) - movl FPU_CTX_FPU_XSAVE_MASK(%ecx), %eax - movl FPU_CTX_FPU_XSAVE_MASK+4(%ecx), %edx - movl FPU_CTX_FPU_REGS(%ecx), %ecx /* fpu_regs.kfpu_u.kfpu_xs ptr */ - xsave (%ecx) - STTS(%edx) /* trap on next fpu touch */ -1: ret - SET_SIZE(xsave_ctxt) - - ENTRY_NP(xsaveopt_ctxt) - movl 4(%esp), %ecx /* a struct fpu_ctx */ - cmpl $FPU_EN, FPU_CTX_FPU_FLAGS(%ecx) - jne 1f - movl $_CONST(FPU_VALID|FPU_EN), FPU_CTX_FPU_FLAGS(%ecx) - movl FPU_CTX_FPU_XSAVE_MASK(%ecx), %eax - movl FPU_CTX_FPU_XSAVE_MASK+4(%ecx), %edx - movl FPU_CTX_FPU_REGS(%ecx), %ecx /* fpu_regs.kfpu_u.kfpu_xs ptr */ - xsaveopt (%ecx) - STTS(%edx) /* trap on next fpu touch */ -1: ret - SET_SIZE(xsaveopt_ctxt) - -/* - * See comment above the __amd64 implementation of fpxsave_excp_clr_ctxt() - * for details about the following threee functions for AMD "exception pointer" - * handling. - */ - - ENTRY_NP(fpxsave_excp_clr_ctxt) - movl 4(%esp), %eax /* a struct fpu_ctx */ - cmpl $FPU_EN, FPU_CTX_FPU_FLAGS(%eax) - jne 1f - - movl $_CONST(FPU_VALID|FPU_EN), FPU_CTX_FPU_FLAGS(%eax) - movl FPU_CTX_FPU_REGS(%eax), %eax /* fpu_regs.kfpu_u.kfpu_fn ptr */ - fxsave (%eax) - btw $7, FXSAVE_STATE_FSW(%eax) /* Test saved ES bit */ - jnc 0f /* jump if ES = 0 */ - fnclex /* clear pending x87 exceptions */ -0: ffree %st(7) /* clear tag bit to remove possible stack overflow */ - fildl .fpzero_const - /* dummy load changes all exception pointers */ - STTS(%edx) /* trap on next fpu touch */ -1: rep; ret /* use 2 byte return instruction when branch target */ - /* AMD Software Optimization Guide - Section 6.2 */ - SET_SIZE(fpxsave_excp_clr_ctxt) - - ENTRY_NP(xsave_excp_clr_ctxt) - movl 4(%esp), %ecx /* a struct fpu_ctx */ - cmpl $FPU_EN, FPU_CTX_FPU_FLAGS(%ecx) - jne 1f - - movl $_CONST(FPU_VALID|FPU_EN), FPU_CTX_FPU_FLAGS(%ecx) - movl FPU_CTX_FPU_XSAVE_MASK(%ecx), %eax - movl FPU_CTX_FPU_XSAVE_MASK+4(%ecx), %edx - movl FPU_CTX_FPU_REGS(%ecx), %ecx /* fpu_regs.kfpu_u.kfpu_xs ptr */ - xsave (%ecx) - btw $7, FXSAVE_STATE_FSW(%ecx) /* Test saved ES bit */ - jnc 0f /* jump if ES = 0 */ - fnclex /* clear pending x87 exceptions */ -0: ffree %st(7) /* clear tag bit to remove possible stack overflow */ - fildl .fpzero_const - /* dummy load changes all exception pointers */ - STTS(%edx) /* trap on next fpu touch */ -1: ret - SET_SIZE(xsave_excp_clr_ctxt) - - ENTRY_NP(xsaveopt_excp_clr_ctxt) - movl 4(%esp), %ecx /* a struct fpu_ctx */ - cmpl $FPU_EN, FPU_CTX_FPU_FLAGS(%ecx) - jne 1f - - movl $_CONST(FPU_VALID|FPU_EN), FPU_CTX_FPU_FLAGS(%ecx) - movl FPU_CTX_FPU_XSAVE_MASK(%ecx), %eax - movl FPU_CTX_FPU_XSAVE_MASK+4(%ecx), %edx - movl FPU_CTX_FPU_REGS(%ecx), %ecx /* fpu_regs.kfpu_u.kfpu_xs ptr */ - xsaveopt (%ecx) - btw $7, FXSAVE_STATE_FSW(%ecx) /* Test saved ES bit */ - jnc 0f /* jump if ES = 0 */ - fnclex /* clear pending x87 exceptions */ -0: ffree %st(7) /* clear tag bit to remove possible stack overflow */ - fildl .fpzero_const - /* dummy load changes all exception pointers */ - STTS(%edx) /* trap on next fpu touch */ -1: ret - SET_SIZE(xsaveopt_excp_clr_ctxt) - -#endif /* __i386 */ - .align 8 .fpzero_const: .4byte 0x0 @@ -557,8 +251,6 @@ xsaveopt(struct xsave_state *f, uint64_t m) #else /* __lint */ -#if defined(__amd64) - ENTRY_NP(fpxsave) CLTS fxsaveq (%rdi) @@ -591,58 +283,55 @@ xsaveopt(struct xsave_state *f, uint64_t m) ret SET_SIZE(xsaveopt) -#elif defined(__i386) +#endif /* __lint */ - ENTRY_NP(fpsave) - CLTS - movl 4(%esp), %eax - fnsave (%eax) - STTS(%eax) /* set TS bit in %cr0 (disable FPU) */ - ret - SET_SIZE(fpsave) +/* + * These functions are used when restoring the FPU as part of the epilogue of a + * context switch. + */ - ENTRY_NP(fpxsave) - CLTS - movl 4(%esp), %eax - fxsave (%eax) - fninit /* clear exceptions, init x87 tags */ - STTS(%eax) /* set TS bit in %cr0 (disable FPU) */ - ret - SET_SIZE(fpxsave) +#if defined(__lint) - ENTRY_NP(xsave) - CLTS - movl 4(%esp), %ecx - movl 8(%esp), %eax - movl 12(%esp), %edx - xsave (%ecx) +/*ARGSUSED*/ +void +fpxrestore_ctxt(void *arg) +{} - fninit /* clear exceptions, init x87 tags */ - STTS(%eax) /* set TS bit in %cr0 (disable FPU) */ - ret - SET_SIZE(xsave) +/*ARGSUSED*/ +void +xrestore_ctxt(void *arg) +{} - ENTRY_NP(xsaveopt) +#else /* __lint */ + + ENTRY(fpxrestore_ctxt) + cmpl $_CONST(FPU_EN|FPU_VALID), FPU_CTX_FPU_FLAGS(%rdi) + jne 1f + movl $_CONST(FPU_EN), FPU_CTX_FPU_FLAGS(%rdi) + movq FPU_CTX_FPU_REGS(%rdi), %rdi /* fpu_regs.kfpu_u.kfpu_fx ptr */ CLTS - movl 4(%esp), %ecx - movl 8(%esp), %eax - movl 12(%esp), %edx - xsaveopt (%ecx) + fxrstorq (%rdi) +1: + ret + SET_SIZE(fpxrestore_ctxt) - fninit /* clear exceptions, init x87 tags */ - STTS(%eax) /* set TS bit in %cr0 (disable FPU) */ + ENTRY(xrestore_ctxt) + cmpl $_CONST(FPU_EN|FPU_VALID), FPU_CTX_FPU_FLAGS(%rdi) + jne 1f + movl $_CONST(FPU_EN), FPU_CTX_FPU_FLAGS(%rdi) + movl FPU_CTX_FPU_XSAVE_MASK(%rdi), %eax /* xsave flags in EDX:EAX */ + movl FPU_CTX_FPU_XSAVE_MASK+4(%rdi), %edx + movq FPU_CTX_FPU_REGS(%rdi), %rdi /* fpu_regs.kfpu_u.kfpu_xs ptr */ + CLTS + xrstor (%rdi) +1: ret - SET_SIZE(xsaveopt) + SET_SIZE(xrestore_ctxt) -#endif /* __i386 */ #endif /* __lint */ -#if defined(__lint) -/*ARGSUSED*/ -void -fprestore(struct fnsave_state *f) -{} +#if defined(__lint) /*ARGSUSED*/ void @@ -656,8 +345,6 @@ xrestore(struct xsave_state *f, uint64_t m) #else /* __lint */ -#if defined(__amd64) - ENTRY_NP(fpxrestore) CLTS fxrstorq (%rdi) @@ -673,32 +360,6 @@ xrestore(struct xsave_state *f, uint64_t m) ret SET_SIZE(xrestore) -#elif defined(__i386) - - ENTRY_NP(fprestore) - CLTS - movl 4(%esp), %eax - frstor (%eax) - ret - SET_SIZE(fprestore) - - ENTRY_NP(fpxrestore) - CLTS - movl 4(%esp), %eax - fxrstor (%eax) - ret - SET_SIZE(fpxrestore) - - ENTRY_NP(xrestore) - CLTS - movl 4(%esp), %ecx - movl 8(%esp), %eax - movl 12(%esp), %edx - xrstor (%ecx) - ret - SET_SIZE(xrestore) - -#endif /* __i386 */ #endif /* __lint */ /* @@ -713,21 +374,11 @@ fpdisable(void) #else /* __lint */ -#if defined(__amd64) - ENTRY_NP(fpdisable) STTS(%rdi) /* set TS bit in %cr0 (disable FPU) */ ret SET_SIZE(fpdisable) -#elif defined(__i386) - - ENTRY_NP(fpdisable) - STTS(%eax) - ret - SET_SIZE(fpdisable) - -#endif /* __i386 */ #endif /* __lint */ /* @@ -742,8 +393,6 @@ fpinit(void) #else /* __lint */ -#if defined(__amd64) - ENTRY_NP(fpinit) CLTS cmpl $FP_XSAVE, fp_save_mech @@ -765,38 +414,6 @@ fpinit(void) ret SET_SIZE(fpinit) -#elif defined(__i386) - - ENTRY_NP(fpinit) - CLTS - cmpl $FP_FXSAVE, fp_save_mech - je 1f - cmpl $FP_XSAVE, fp_save_mech - je 2f - - /* fnsave */ - fninit - movl $x87_initial, %eax - frstor (%eax) /* load clean initial state */ - ret - -1: /* fxsave */ - movl $sse_initial, %eax - fxrstor (%eax) /* load clean initial state */ - ret - -2: /* xsave */ - movl $avx_initial, %ecx - xorl %edx, %edx - movl $XFEATURE_AVX, %eax - bt $X86FSET_AVX, x86_featureset - cmovael %edx, %eax - orl $(XFEATURE_LEGACY_FP | XFEATURE_SSE), %eax - xrstor (%ecx) - ret - SET_SIZE(fpinit) - -#endif /* __i386 */ #endif /* __lint */ /* @@ -816,8 +433,6 @@ fpxerr_reset(void) #else /* __lint */ -#if defined(__amd64) - ENTRY_NP(fperr_reset) CLTS xorl %eax, %eax @@ -839,28 +454,6 @@ fpxerr_reset(void) ret SET_SIZE(fpxerr_reset) -#elif defined(__i386) - - ENTRY_NP(fperr_reset) - CLTS - xorl %eax, %eax - fnstsw %ax - fnclex - ret - SET_SIZE(fperr_reset) - - ENTRY_NP(fpxerr_reset) - CLTS - subl $4, %esp /* make some temporary space */ - stmxcsr (%esp) - movl (%esp), %eax - andl $_BITNOT(SSE_MXCSR_EFLAGS), (%esp) - ldmxcsr (%esp) /* clear processor exceptions */ - addl $4, %esp - ret - SET_SIZE(fpxerr_reset) - -#endif /* __i386 */ #endif /* __lint */ #if defined(__lint) @@ -873,8 +466,6 @@ fpgetcwsw(void) #else /* __lint */ -#if defined(__amd64) - ENTRY_NP(fpgetcwsw) pushq %rbp movq %rsp, %rbp @@ -887,19 +478,6 @@ fpgetcwsw(void) ret SET_SIZE(fpgetcwsw) -#elif defined(__i386) - - ENTRY_NP(fpgetcwsw) - CLTS - subl $4, %esp /* make some temporary space */ - fnstsw (%esp) /* store the status word */ - fnstcw 2(%esp) /* store the control word */ - movl (%esp), %eax /* put both in %eax */ - addl $4, %esp - ret - SET_SIZE(fpgetcwsw) - -#endif /* __i386 */ #endif /* __lint */ /* @@ -916,8 +494,6 @@ fpgetmxcsr(void) #else /* __lint */ -#if defined(__amd64) - ENTRY_NP(fpgetmxcsr) pushq %rbp movq %rsp, %rbp @@ -929,16 +505,4 @@ fpgetmxcsr(void) ret SET_SIZE(fpgetmxcsr) -#elif defined(__i386) - - ENTRY_NP(fpgetmxcsr) - CLTS - subl $4, %esp /* make some temporary space */ - stmxcsr (%esp) - movl (%esp), %eax - addl $4, %esp - ret - SET_SIZE(fpgetmxcsr) - -#endif /* __i386 */ #endif /* __lint */ diff --git a/usr/src/uts/intel/ia32/os/archdep.c b/usr/src/uts/intel/ia32/os/archdep.c index 6f99571a5a..4c8d1acd9f 100644 --- a/usr/src/uts/intel/ia32/os/archdep.c +++ b/usr/src/uts/intel/ia32/os/archdep.c @@ -317,6 +317,7 @@ setfpregs(klwp_t *lwp, fpregset_t *fp) fpu->fpu_regs.kfpu_status = fp->fp_reg_set.fpchip_state.status; fpu->fpu_flags |= FPU_VALID; + PCB_SET_UPDATE_FPU(&lwp->lwp_pcb); } /* @@ -464,7 +465,7 @@ getgregs(klwp_t *lwp, gregset_t grp) grp[REG_GSBASE] = pcb->pcb_gsbase; if (thisthread) kpreempt_disable(); - if (pcb->pcb_rupdate == 1) { + if (PCB_NEED_UPDATE_SEGS(pcb)) { grp[REG_DS] = pcb->pcb_ds; grp[REG_ES] = pcb->pcb_es; grp[REG_FS] = pcb->pcb_fs; @@ -500,7 +501,7 @@ getgregs32(klwp_t *lwp, gregset32_t grp) if (thisthread) kpreempt_disable(); - if (pcb->pcb_rupdate == 1) { + if (PCB_NEED_UPDATE_SEGS(pcb)) { grp[GS] = (uint16_t)pcb->pcb_gs; grp[FS] = (uint16_t)pcb->pcb_fs; grp[DS] = (uint16_t)pcb->pcb_ds; @@ -753,7 +754,7 @@ setgregs(klwp_t *lwp, gregset_t grp) /* * Ensure that we go out via update_sregs */ - pcb->pcb_rupdate = 1; + PCB_SET_UPDATE_SEGS(pcb); lwptot(lwp)->t_post_sys = 1; if (thisthread) kpreempt_enable(); @@ -790,7 +791,7 @@ setgregs(klwp_t *lwp, gregset_t grp) /* * Ensure that we go out via update_sregs */ - pcb->pcb_rupdate = 1; + PCB_SET_UPDATE_SEGS(pcb); lwptot(lwp)->t_post_sys = 1; if (thisthread) kpreempt_enable(); diff --git a/usr/src/uts/intel/ia32/os/fpu.c b/usr/src/uts/intel/ia32/os/fpu.c index c307c97957..651f4a745f 100644 --- a/usr/src/uts/intel/ia32/os/fpu.c +++ b/usr/src/uts/intel/ia32/os/fpu.c @@ -61,11 +61,388 @@ #include #include +/* + * FPU Management Overview + * ----------------------- + * + * The x86 FPU has evolved substantially since its days as the x87 coprocessor; + * however, many aspects of its life as a coprocessor are still around in x86. + * + * Today, when we refer to the 'FPU', we don't just mean the original x87 FPU. + * While that state still exists, there is much more that is covered by the FPU. + * Today, this includes not just traditional FPU state, but also supervisor only + * state. The following state is currently managed and covered logically by the + * idea of the FPU registers: + * + * o Traditional x87 FPU + * o Vector Registers (%xmm, %ymm, %zmm) + * o Memory Protection Extensions (MPX) Bounds Registers + * o Protected Key Rights Registers (PKRU) + * o Processor Trace data + * + * The rest of this covers how the FPU is managed and controlled, how state is + * saved and restored between threads, interactions with hypervisors, and other + * information exported to user land through aux vectors. A lot of background + * information is here to synthesize major parts of the Intel SDM, but + * unfortunately, it is not a replacement for reading it. + * + * FPU Control Registers + * --------------------- + * + * Because the x87 FPU began its life as a co-processor and the FPU was + * optional there are several bits that show up in %cr0 that we have to + * manipulate when dealing with the FPU. These are: + * + * o CR0.ET The 'extension type' bit. This was used originally to indicate + * that the FPU co-processor was present. Now it is forced on for + * compatibility. This is often used to verify whether or not the + * FPU is present. + * + * o CR0.NE The 'native error' bit. Used to indicate that native error + * mode should be enabled. This indicates that we should take traps + * on FPU errors. The OS enables this early in boot. + * + * o CR0.MP The 'Monitor Coprocessor' bit. Used to control whether or not + * wait/fwait instructions generate a #NM if CR0.TS is set. + * + * o CR0.EM The 'Emulation' bit. This is used to cause floating point + * operations (x87 through SSE4) to trap with a #UD so they can be + * emulated. The system never sets this bit, but makes sure it is + * clear on processor start up. + * + * o CR0.TS The 'Task Switched' bit. When this is turned on, a floating + * point operation will generate a #NM. An fwait will as well, + * depending on the value in CR0.MP. + * + * Our general policy is that CR0.ET, CR0.NE, and CR0.MP are always set by + * the system. Similarly CR0.EM is always unset by the system. CR0.TS has a more + * complicated role. Historically it has been used to allow running systems to + * restore the FPU registers lazily. This will be discussed in greater depth + * later on. + * + * %cr4 is also used as part of the FPU control. Specifically we need to worry + * about the following bits in the system: + * + * o CR4.OSFXSR This bit is used to indicate that the OS understands and + * supports the execution of the fxsave and fxrstor + * instructions. This bit is required to be set to enable + * the use of the SSE->SSE4 instructions. + * + * o CR4.OSXMMEXCPT This bit is used to indicate that the OS can understand + * and take a SIMD floating point exception (#XM). This bit + * is always enabled by the system. + * + * o CR4.OSXSAVE This bit is used to indicate that the OS understands and + * supports the execution of the xsave and xrstor family of + * instructions. This bit is required to use any of the AVX + * and newer feature sets. + * + * Because all supported processors are 64-bit, they'll always support the XMM + * extensions and we will enable both CR4.OXFXSR and CR4.OSXMMEXCPT in boot. + * CR4.OSXSAVE will be enabled and used whenever xsave is reported in cpuid. + * + * %xcr0 is used to manage the behavior of the xsave feature set and is only + * present on the system if xsave is supported. %xcr0 is read and written to + * through by the xgetbv and xsetbv instructions. This register is present + * whenever the xsave feature set is supported. Each bit in %xcr0 refers to a + * different component of the xsave state and controls whether or not that + * information is saved and restored. For newer feature sets like AVX and MPX, + * it also controls whether or not the corresponding instructions can be + * executed (much like CR0.OSFXSR does for the SSE feature sets). + * + * Everything in %xcr0 is around features available to users. There is also the + * IA32_XSS MSR which is used to control supervisor-only features that are still + * part of the xsave state. Bits that can be set in %xcr0 are reserved in + * IA32_XSS and vice versa. This is an important property that is particularly + * relevant to how the xsave instructions operate. + * + * Save Mechanisms + * --------------- + * + * When switching between running threads the FPU state needs to be saved and + * restored by the OS. If this state was not saved, users would rightfully + * complain about corrupt state. There are three mechanisms that exist on the + * processor for saving and restoring these state images: + * + * o fsave + * o fxsave + * o xsave + * + * fsave saves and restores only the x87 FPU and is the oldest of these + * mechanisms. This mechanism is never used in the kernel today because we are + * always running on systems that support fxsave. + * + * The fxsave and fxrstor mechanism allows the x87 FPU and the SSE register + * state to be saved and restored to and from a struct fxsave_state. This is the + * default mechanism that is used to save and restore the FPU on amd64. An + * important aspect of fxsave that was different from the original i386 fsave + * mechanism is that the restoring of FPU state with pending exceptions will not + * generate an exception, it will be deferred to the next use of the FPU. + * + * The final and by far the most complex mechanism is that of the xsave set. + * xsave allows for saving and restoring all of the traditional x86 pieces (x87 + * and SSE), while allowing for extensions that will save the %ymm, %zmm, etc. + * registers. + * + * Data is saved and restored into and out of a struct xsave_state. The first + * part of the struct xsave_state is equivalent to the struct fxsave_state. + * After that, there is a header which is used to describe the remaining + * portions of the state. The header is a 64-byte value of which the first two + * uint64_t values are defined and the rest are reserved and must be zero. The + * first uint64_t is the xstate_bv member. This describes which values in the + * xsave_state are actually valid and present. This is updated on a save and + * used on restore. The second member is the xcomp_bv member. Its last bit + * determines whether or not a compressed version of the structure is used. + * + * When the uncompressed structure is used (currently the only format we + * support), then each state component is at a fixed offset in the structure, + * even if it is not being used. For example, if you only saved the AVX related + * state, but did not save the MPX related state, the offset would not change + * for any component. With the compressed format, components that aren't used + * are all elided (though the x87 and SSE state are always there). + * + * Unlike fxsave which saves all state, the xsave family does not always save + * and restore all the state that could be covered by the xsave_state. The + * instructions all take an argument which is a mask of what to consider. This + * is the same mask that will be used in the xstate_bv vector and it is also the + * same values that are present in %xcr0 and IA32_XSS. Though IA32_XSS is only + * considered with the xsaves and xrstors instructions. + * + * When a save or restore is requested, a bitwise and is performed between the + * requested bits and those that have been enabled in %xcr0. Only the bits that + * match that are then saved or restored. Others will be silently ignored by + * the processor. This idea is used often in the OS. We will always request that + * we save and restore all of the state, but only those portions that are + * actually enabled in %xcr0 will be touched. + * + * If a feature has been asked to be restored that is not set in the xstate_bv + * feature vector of the save state, then it will be set to its initial state by + * the processor (usually zeros). Also, when asked to save state, the processor + * may not write out data that is in its initial state as an optimization. This + * optimization only applies to saving data and not to restoring data. + * + * There are a few different variants of the xsave and xrstor instruction. They + * are: + * + * o xsave This is the original save instruction. It will save all of the + * requested data in the xsave state structure. It only saves data + * in the uncompressed (xcomp_bv[63] is zero) format. It may be + * executed at all privilege levels. + * + * o xrstor This is the original restore instruction. It will restore all of + * the requested data. The xrstor function can handle both the + * compressed and uncompressed formats. It may be executed at all + * privilege levels. + * + * o xsaveopt This is a variant of the xsave instruction that employs + * optimizations to try and only write out state that has been + * modified since the last time an xrstor instruction was called. + * The processor tracks a tuple of information about the last + * xrstor and tries to ensure that the same buffer is being used + * when this optimization is being used. However, because of the + * way that it tracks the xrstor buffer based on the address of it, + * it is not suitable for use if that buffer can be easily reused. + * The most common case is trying to save data to the stack in + * rtld. It may be executed at all privilege levels. + * + * o xsavec This is a variant of the xsave instruction that writes out the + * compressed form of the xsave_state. Otherwise it behaves as + * xsave. It may be executed at all privilege levels. + * + * o xsaves This is a variant of the xsave instruction. It is similar to + * xsavec in that it always writes the compressed form of the + * buffer. Unlike all the other forms, this instruction looks at + * both the user (%xcr0) and supervisor (IA32_XSS MSR) to determine + * what to save and restore. xsaves also implements the same + * optimization that xsaveopt does around modified pieces. User + * land may not execute the instruction. + * + * o xrstors This is a variant of the xrstor instruction. Similar to xsaves + * it can save and restore both the user and privileged states. + * Unlike xrstor it can only operate on the compressed form. + * User land may not execute the instruction. + * + * Based on all of these, the kernel has a precedence for what it will use. + * Basically, xsaves (not supported) is preferred to xsaveopt, which is + * preferred to xsave. A similar scheme is used when informing rtld (more later) + * about what it should use. xsavec is preferred to xsave. xsaveopt is not + * recommended due to the modified optimization not being appropriate for this + * use. + * + * Finally, there is one last gotcha with the xsave state. Importantly some AMD + * processors did not always save and restore some of the FPU exception state in + * some cases like Intel did. In those cases the OS will make up for this fact + * itself. + * + * FPU Initialization + * ------------------ + * + * One difference with the FPU registers is that not all threads have FPU state, + * only those that have an lwp. Generally this means kernel threads, which all + * share p0 and its lwp, do not have FPU state. Though there are definitely + * exceptions such as kcfpoold. In the rest of this discussion we'll use thread + * and lwp interchangeably, just think of thread meaning a thread that has a + * lwp. + * + * Each lwp has its FPU state allocated in its pcb (process control block). The + * actual storage comes from the fpsave_cachep kmem cache. This cache is sized + * dynamically at start up based on the save mechanism that we're using and the + * amount of memory required for it. This is dynamic because the xsave_state + * size varies based on the supported feature set. + * + * The hardware side of the FPU is initialized early in boot before we mount the + * root file system. This is effectively done in fpu_probe(). This is where we + * make the final decision about what the save and restore mechanisms we should + * use are, create the fpsave_cachep kmem cache, and initialize a number of + * function pointers that use save and restoring logic. + * + * The thread/lwp side is a a little more involved. There are two different + * things that we need to concern ourselves with. The first is how the FPU + * resources are allocated and the second is how the FPU state is initialized + * for a given lwp. + * + * We allocate the FPU save state from our kmem cache as part of lwp_fp_init(). + * This is always called unconditionally by the system as part of creating an + * LWP. + * + * There are three different initialization paths that we deal with. The first + * is when we are executing a new process. As part of exec all of the register + * state is reset. The exec case is particularly important because init is born + * like Athena, sprouting from the head of the kernel, without any true parent + * to fork from. The second is used whenever we fork or create a new lwp. The + * third is to deal with special lwps like the agent lwp. + * + * During exec, we will call fp_exec() which will initialize and set up the FPU + * state for the process. That will fill in the initial state for the FPU and + * also set that state in the FPU itself. As part of fp_exec() we also install a + * thread context operations vector that takes care of dealing with the saving + * and restoring of the FPU. These context handlers will also be called whenever + * an lwp is created or forked. In those cases, to initialize the FPU we will + * call fp_new_lwp(). Like fp_exec(), fp_new_lwp() will install a context + * operations vector for the new thread. + * + * Next we'll end up in the context operation fp_new_lwp(). This saves the + * current thread's state, initializes the new thread's state, and copies over + * the relevant parts of the originating thread's state. It's as this point that + * we also install the FPU context operations into the new thread, which ensures + * that all future threads that are descendants of the current one get the + * thread context operations (unless they call exec). + * + * To deal with some things like the agent lwp, we double check the state of the + * FPU in sys_rtt_common() to make sure that it has been enabled before + * returning to user land. In general, this path should be rare, but it's useful + * for the odd lwp here and there. + * + * The FPU state will remain valid most of the time. There are times that + * the state will be rewritten. For example in restorecontext, due to /proc, or + * the lwp calls exec(). Whether the context is being freed or we are resetting + * the state, we will call fp_free() to disable the FPU and our context. + * + * Finally, when the lwp is destroyed, it will actually destroy and free the FPU + * state by calling fp_lwp_cleanup(). + * + * Kernel FPU Multiplexing + * ----------------------- + * + * Just as the kernel has to maintain all of the general purpose registers when + * switching between scheduled threads, the same is true of the FPU registers. + * + * When a thread has FPU state, it also has a set of context operations + * installed. These context operations take care of making sure that the FPU is + * properly saved and restored during a context switch (fpsave_ctxt and + * fprestore_ctxt respectively). This means that the current implementation of + * the FPU is 'eager', when a thread is running the CPU will have its FPU state + * loaded. While this is always true when executing in userland, there are a few + * cases where this is not true in the kernel. + * + * This was not always the case. Traditionally on x86 a 'lazy' FPU restore was + * employed. This meant that the FPU would be saved on a context switch and the + * CR0.TS bit would be set. When a thread next tried to use the FPU, it would + * then take a #NM trap, at which point we would restore the FPU from the save + * area and return to user land. Given the frequency of use of the FPU alone by + * libc, there's no point returning to user land just to trap again. + * + * There are a few cases though where the FPU state may need to be changed for a + * thread on its behalf. The most notable cases are in the case of processes + * using /proc, restorecontext, forking, etc. In all of these cases the kernel + * will force a threads FPU state to be saved into the PCB through the fp_save() + * function. Whenever the FPU is saved, then the FPU_VALID flag is set on the + * pcb. This indicates that the save state holds currently valid data. As a side + * effect of this, CR0.TS will be set. To make sure that all of the state is + * updated before returning to user land, in these cases, we set a flag on the + * PCB that says the FPU needs to be updated. This will make sure that we take + * the slow path out of a system call to fix things up for the thread. Due to + * the fact that this is a rather rare case, effectively setting the equivalent + * of t_postsys is acceptable. + * + * CR0.TS will be set after a save occurs and cleared when a restore occurs. + * Generally this means it will be cleared immediately by the new thread that is + * running in a context switch. However, this isn't the case for kernel threads. + * They currently operate with CR0.TS set as no kernel state is restored for + * them. This means that using the FPU will cause a #NM and panic. + * + * The FPU_VALID flag on the currently executing thread's pcb is meant to track + * what the value of CR0.TS should be. If it is set, then CR0.TS will be set. + * However, because we eagerly restore, the only time that CR0.TS should be set + * for a non-kernel thread is during operations where it will be cleared before + * returning to user land and importantly, the only data that is in it is its + * own. + * + * FPU Exceptions + * -------------- + * + * Certain operations can cause the kernel to take traps due to FPU activity. + * Generally these events will cause a user process to receive a SIGFPU and if + * the kernel receives it in kernel context, we will die. Traditionally the #NM + * (Device Not Available / No Math) exception generated by CR0.TS would have + * caused us to restore the FPU. Now it is a fatal event regardless of whether + * or not user land causes it. + * + * While there are some cases where the kernel uses the FPU, it is up to the + * kernel to use the FPU in a way such that it cannot receive a trap or to use + * the appropriate trap protection mechanisms. + * + * Hypervisors + * ----------- + * + * When providing support for hypervisors things are a little bit more + * complicated because the FPU is not virtualized at all. This means that they + * need to save and restore the FPU and %xcr0 across entry and exit to the + * guest. To facilitate this, we provide a series of APIs in . These + * allow us to use the full native state to make sure that we are always saving + * and restoring the full FPU that the host sees, even when the guest is using a + * subset. + * + * One tricky aspect of this is that the guest may be using a subset of %xcr0 + * and therefore changing our %xcr0 on the fly. It is vital that when we're + * saving and restoring the FPU that we always use the largest %xcr0 contents + * otherwise we will end up leaving behind data in it. + * + * ELF PLT Support + * --------------- + * + * rtld has to preserve a subset of the FPU when it is saving and restoring + * registers due to the amd64 SYS V ABI. See cmd/sgs/rtld/amd64/boot_elf.s for + * more information. As a result, we set up an aux vector that contains + * information about what save and restore mechanisms it should be using and + * the sizing thereof based on what the kernel supports. This is passed down in + * a series of aux vectors SUN_AT_FPTYPE and SUN_AT_FPSIZE. This information is + * initialized in fpu_subr.c. + */ + kmem_cache_t *fpsave_cachep; /* Legacy fxsave layout + xsave header + ymm */ #define AVX_XSAVE_SIZE (512 + 64 + 256) +/* + * Various sanity checks. + */ +CTASSERT(sizeof (struct fxsave_state) == 512); +CTASSERT(sizeof (struct fnsave_state) == 108); +CTASSERT((offsetof(struct fxsave_state, fx_xmm[0]) & 0xf) == 0); +CTASSERT(sizeof (struct xsave_state) >= AVX_XSAVE_SIZE); + /*CSTYLED*/ #pragma align 16 (sse_initial) @@ -150,20 +527,12 @@ const struct fnsave_state x87_initial = { /* rest of structure is zero */ }; -#if defined(__amd64) /* * This vector is patched to xsave_ctxt() if we discover we have an * XSAVE-capable chip in fpu_probe. */ void (*fpsave_ctxt)(void *) = fpxsave_ctxt; -#elif defined(__i386) -/* - * This vector is patched to fpxsave_ctxt() if we discover we have an - * SSE-capable chip in fpu_probe(). It is patched to xsave_ctxt - * if we discover we have an XSAVE-capable chip in fpu_probe. - */ -void (*fpsave_ctxt)(void *) = fpnsave_ctxt; -#endif +void (*fprestore_ctxt)(void *) = fpxrestore_ctxt; /* * This function pointer is changed to xsaveopt if the CPU is xsaveopt capable. @@ -187,9 +556,6 @@ fp_new_lwp(kthread_id_t t, kthread_id_t ct) struct fpu_ctx *fp; /* parent fpu context */ struct fpu_ctx *cfp; /* new fpu context */ struct fxsave_state *fx, *cfx; -#if defined(__i386) - struct fnsave_state *fn, *cfn; -#endif struct xsave_state *cxs; ASSERT(fp_kind != FP_NO); @@ -207,15 +573,13 @@ fp_new_lwp(kthread_id_t t, kthread_id_t ct) cfp->fpu_regs.kfpu_status = 0; cfp->fpu_regs.kfpu_xstatus = 0; + /* + * Make sure that the child's FPU is cleaned up and made ready for user + * land. + */ + PCB_SET_UPDATE_FPU(&ct->t_lwp->lwp_pcb); + switch (fp_save_mech) { -#if defined(__i386) - case FP_FNSAVE: - fn = fp->fpu_regs.kfpu_u.kfpu_fn; - cfn = cfp->fpu_regs.kfpu_u.kfpu_fn; - bcopy(&x87_initial, cfn, sizeof (*cfn)); - cfn->f_fcw = fn->f_fcw; - break; -#endif case FP_FXSAVE: fx = fp->fpu_regs.kfpu_u.kfpu_fx; cfx = cfp->fpu_regs.kfpu_u.kfpu_fx; @@ -244,14 +608,13 @@ fp_new_lwp(kthread_id_t t, kthread_id_t ct) /*NOTREACHED*/ } - installctx(ct, cfp, - fpsave_ctxt, NULL, fp_new_lwp, fp_new_lwp, NULL, fp_free); /* - * Now, when the new lwp starts running, it will take a trap - * that will be handled inline in the trap table to cause - * the appropriate f*rstor instruction to load the save area we - * constructed above directly into the hardware. + * Mark that both the parent and child need to have the FPU cleaned up + * before returning to user land. */ + + installctx(ct, cfp, fpsave_ctxt, fprestore_ctxt, fp_new_lwp, + fp_new_lwp, NULL, fp_free); } /* @@ -313,11 +676,6 @@ fp_save(struct fpu_ctx *fp) ASSERT(curthread->t_lwp && fp == &curthread->t_lwp->lwp_pcb.pcb_fpu); switch (fp_save_mech) { -#if defined(__i386) - case FP_FNSAVE: - fpsave(fp->fpu_regs.kfpu_u.kfpu_fn); - break; -#endif case FP_FXSAVE: fpxsave(fp->fpu_regs.kfpu_u.kfpu_fx); break; @@ -331,6 +689,18 @@ fp_save(struct fpu_ctx *fp) } fp->fpu_flags |= FPU_VALID; + + /* + * We save the FPU as part of forking, execing, modifications via /proc, + * restorecontext, etc. As such, we need to make sure that we return to + * userland with valid state in the FPU. If we're context switched out + * before we hit sys_rtt_common() we'll end up having restored the FPU + * as part of the context ops operations. The restore logic always makes + * sure that FPU_VALID is set before doing a restore so we don't restore + * it a second time. + */ + PCB_SET_UPDATE_FPU(&curthread->t_lwp->lwp_pcb); + kpreempt_enable(); } @@ -344,11 +714,6 @@ void fp_restore(struct fpu_ctx *fp) { switch (fp_save_mech) { -#if defined(__i386) - case FP_FNSAVE: - fprestore(fp->fpu_regs.kfpu_u.kfpu_fn); - break; -#endif case FP_FXSAVE: fpxrestore(fp->fpu_regs.kfpu_u.kfpu_fx); break; @@ -364,6 +729,33 @@ fp_restore(struct fpu_ctx *fp) fp->fpu_flags &= ~FPU_VALID; } +/* + * Reset the FPU such that it is in a valid state for a new thread that is + * coming out of exec. The FPU will be in a usable state at this point. At this + * point we know that the FPU state has already been allocated and if this + * wasn't an init process, then it will have had fp_free() previously called. + */ +void +fp_exec(void) +{ + struct fpu_ctx *fp = &ttolwp(curthread)->lwp_pcb.pcb_fpu; + + if (fp_save_mech == FP_XSAVE) { + fp->fpu_xsave_mask = XFEATURE_FP_ALL; + } + + /* + * Make sure that we're not preempted in the middle of initializing the + * FPU on CPU. + */ + kpreempt_disable(); + installctx(curthread, fp, fpsave_ctxt, fprestore_ctxt, fp_new_lwp, + fp_new_lwp, NULL, fp_free); + fpinit(); + fp->fpu_flags = FPU_EN; + kpreempt_enable(); +} + /* * Seeds the initial state for the current thread. The possibilities are: @@ -371,7 +763,7 @@ fp_restore(struct fpu_ctx *fp) * initialization: Load the FPU state from the LWP state. * 2. The FPU state has not been externally modified: Load a clean state. */ -static void +void fp_seed(void) { struct fpu_ctx *fp = &ttolwp(curthread)->lwp_pcb.pcb_fpu; @@ -386,8 +778,8 @@ fp_seed(void) fp->fpu_xsave_mask = XFEATURE_FP_ALL; } - installctx(curthread, fp, - fpsave_ctxt, NULL, fp_new_lwp, fp_new_lwp, NULL, fp_free); + installctx(curthread, fp, fpsave_ctxt, fprestore_ctxt, fp_new_lwp, + fp_new_lwp, NULL, fp_free); fpinit(); /* @@ -452,11 +844,6 @@ fp_lwp_dup(struct _klwp *lwp) size_t sz; switch (fp_save_mech) { -#if defined(__i386) - case FP_FNSAVE: - sz = sizeof (struct fnsave_state); - break; -#endif case FP_FXSAVE: sz = sizeof (struct fxsave_state); break; @@ -474,119 +861,6 @@ fp_lwp_dup(struct _klwp *lwp) lwp->lwp_pcb.pcb_fpu.fpu_regs.kfpu_u.kfpu_generic = xp; } - -/* - * This routine is called from trap() when User thread takes No Extension - * Fault. The possiblities are: - * 1. User thread has executed a FP instruction for the first time. - * Save current FPU context if any. Initialize FPU, setup FPU - * context for the thread and enable FP hw. - * 2. Thread's pcb has a valid FPU state: Restore the FPU state and - * enable FP hw. - * - * Note that case #2 is inlined in the trap table. - */ -int -fpnoextflt(struct regs *rp) -{ - struct fpu_ctx *fp = &ttolwp(curthread)->lwp_pcb.pcb_fpu; - -#if !defined(__lint) - ASSERT(sizeof (struct fxsave_state) == 512 && - sizeof (struct fnsave_state) == 108); - ASSERT((offsetof(struct fxsave_state, fx_xmm[0]) & 0xf) == 0); - - ASSERT(sizeof (struct xsave_state) >= AVX_XSAVE_SIZE); - -#if defined(__i386) - ASSERT(sizeof (struct _fpu) == sizeof (struct __old_fpu)); -#endif /* __i386 */ -#endif /* !__lint */ - - kpreempt_disable(); - /* - * Now we can enable the interrupts. - * (NOTE: fp-no-coprocessor comes thru interrupt gate) - */ - sti(); - - if (!fpu_exists) { /* check for FPU hw exists */ - if (fp_kind == FP_NO) { - uint32_t inst; - - /* - * When the system has no floating point support, - * i.e. no FP hardware and no emulator, skip the - * two kinds of FP instruction that occur in - * fpstart. Allows processes that do no real FP - * to run normally. - */ - if (fuword32((void *)rp->r_pc, &inst) != -1 && - ((inst & 0xFFFF) == 0x7dd9 || - (inst & 0xFFFF) == 0x6dd9)) { - rp->r_pc += 3; - kpreempt_enable(); - return (0); - } - } - - /* - * If we have neither a processor extension nor - * an emulator, kill the process OR panic the kernel. - */ - kpreempt_enable(); - return (1); /* error */ - } - -#if !defined(__xpv) /* XXPV Is this ifdef needed now? */ - /* - * A paranoid cross-check: for the SSE case, ensure that %cr4 is - * configured to enable fully fledged (%xmm) fxsave/fxrestor on - * this CPU. For the non-SSE case, ensure that it isn't. - */ - ASSERT(((fp_kind & __FP_SSE) && - (getcr4() & CR4_OSFXSR) == CR4_OSFXSR) || - (!(fp_kind & __FP_SSE) && - (getcr4() & (CR4_OSXMMEXCPT|CR4_OSFXSR)) == 0)); -#endif - - if (fp->fpu_flags & FPU_EN) { - /* case 2 */ - fp_restore(fp); - } else { - /* case 1 */ - fp_seed(); - } - kpreempt_enable(); - return (0); -} - - -/* - * Handle a processor extension overrun fault - * Returns non zero for error. - * - * XXX Shouldn't this just be abolished given that we're not supporting - * anything prior to Pentium? - */ - -/* ARGSUSED */ -int -fpextovrflt(struct regs *rp) -{ -#if !defined(__xpv) /* XXPV Do we need this ifdef either */ - ulong_t cur_cr0; - - ASSERT(fp_kind != FP_NO); - - cur_cr0 = getcr0(); - fpinit(); /* initialize the FPU hardware */ - setcr0(cur_cr0); -#endif - sti(); - return (1); /* error, send SIGSEGV signal to the thread */ -} - /* * Handle a processor extension error fault * Returns non zero for error. @@ -622,14 +896,6 @@ fpexterrflt(struct regs *rp) /* clear exception flags in saved state, as if by fnclex */ switch (fp_save_mech) { -#if defined(__i386) - case FP_FNSAVE: - fpsw = fp->fpu_regs.kfpu_u.kfpu_fn->f_fsw; - fpcw = fp->fpu_regs.kfpu_u.kfpu_fn->f_fcw; - fp->fpu_regs.kfpu_u.kfpu_fn->f_fsw &= ~FPS_SW_EFLAGS; - break; -#endif - case FP_FXSAVE: fpsw = fp->fpu_regs.kfpu_u.kfpu_fx->fx_fsw; fpcw = fp->fpu_regs.kfpu_u.kfpu_fx->fx_fcw; @@ -811,11 +1077,6 @@ fpsetcw(uint16_t fcw, uint32_t mxcsr) fp_save(fp); switch (fp_save_mech) { -#if defined(__i386) - case FP_FNSAVE: - fp->fpu_regs.kfpu_u.kfpu_fn->f_fcw = fcw; - break; -#endif case FP_FXSAVE: fx = fp->fpu_regs.kfpu_u.kfpu_fx; fx->fx_fcw = fcw; diff --git a/usr/src/uts/intel/ia32/os/sundep.c b/usr/src/uts/intel/ia32/os/sundep.c index 3911d6ebaa..cfb4552287 100644 --- a/usr/src/uts/intel/ia32/os/sundep.c +++ b/usr/src/uts/intel/ia32/os/sundep.c @@ -20,7 +20,7 @@ */ /* * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright 2017 Joyent, Inc. + * Copyright 2018 Joyent, Inc. */ /* Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. */ @@ -393,12 +393,12 @@ lwp_forkregs(klwp_t *lwp, klwp_t *clwp) struct pcb *pcb = &clwp->lwp_pcb; struct regs *rp = lwptoregs(lwp); - if (pcb->pcb_rupdate == 0) { + if (!PCB_NEED_UPDATE_SEGS(pcb)) { pcb->pcb_ds = rp->r_ds; pcb->pcb_es = rp->r_es; pcb->pcb_fs = rp->r_fs; pcb->pcb_gs = rp->r_gs; - pcb->pcb_rupdate = 1; + PCB_SET_UPDATE_SEGS(pcb); lwptot(clwp)->t_post_sys = 1; } ASSERT(lwptot(clwp)->t_post_sys); @@ -436,22 +436,22 @@ lwp_pcb_exit(void) * as a segment-not-present trap. * * Here we save the current values from the lwp regs into the pcb - * and set pcb->pcb_rupdate to 1 to tell the rest of the kernel - * that the pcb copy of the segment registers is the current one. - * This ensures the lwp's next trip to user land via update_sregs. - * Finally we set t_post_sys to ensure that no system call fast-path's - * its way out of the kernel via sysret. + * and or PCB_UPDATE_SEGS (1) in pcb->pcb_rupdate to tell the rest + * of the kernel that the pcb copy of the segment registers is the + * current one. This ensures the lwp's next trip to user land via + * update_sregs. Finally we set t_post_sys to ensure that no + * system call fast-path's its way out of the kernel via sysret. * - * (This means that we need to have interrupts disabled when we test - * t->t_post_sys in the syscall handlers; if the test fails, we need - * to keep interrupts disabled until we return to userland so we can't - * be switched away.) + * (This means that we need to have interrupts disabled when we + * test t->t_post_sys in the syscall handlers; if the test fails, + * we need to keep interrupts disabled until we return to userland + * so we can't be switched away.) * - * As a result of all this, we don't really have to do a whole lot if - * the thread is just mucking about in the kernel, switching on and - * off the cpu for whatever reason it feels like. And yet we still - * preserve fast syscalls, cause if we -don't- get descheduled, - * we never come here either. + * As a result of all this, we don't really have to do a whole lot + * if the thread is just mucking about in the kernel, switching on + * and off the cpu for whatever reason it feels like. And yet we + * still preserve fast syscalls, cause if we -don't- get + * descheduled, we never come here either. */ #define VALID_LWP_DESC(udp) ((udp)->usd_type == SDT_MEMRWA && \ @@ -468,7 +468,7 @@ lwp_segregs_save(klwp_t *lwp) ASSERT(VALID_LWP_DESC(&pcb->pcb_fsdesc)); ASSERT(VALID_LWP_DESC(&pcb->pcb_gsdesc)); - if (pcb->pcb_rupdate == 0) { + if (!PCB_NEED_UPDATE_SEGS(pcb)) { rp = lwptoregs(lwp); /* @@ -482,7 +482,7 @@ lwp_segregs_save(klwp_t *lwp) pcb->pcb_es = rp->r_es; pcb->pcb_fs = rp->r_fs; pcb->pcb_gs = rp->r_gs; - pcb->pcb_rupdate = 1; + PCB_SET_UPDATE_SEGS(pcb); lwp->lwp_thread->t_post_sys = 1; } #endif /* __amd64 */ @@ -833,7 +833,8 @@ lwp_installctx(klwp_t *lwp) * On the amd64 kernel, the context handlers are responsible for * virtualizing %ds, %es, %fs, and %gs to the lwp. The register * values are only ever changed via sys_rtt when the - * pcb->pcb_rupdate == 1. Only sys_rtt gets to clear the bit. + * PCB_UPDATE_SEGS bit (1) is set in pcb->pcb_rupdate. Only + * sys_rtt gets to clear the bit. * * On the i386 kernel, the context handlers are responsible for * virtualizing %gs/%fs to the lwp by updating the per-cpu GDTs @@ -964,7 +965,7 @@ setregs(uarg_t *args) pcb->pcb_ds = rp->r_ds; pcb->pcb_es = rp->r_es; - pcb->pcb_rupdate = 1; + PCB_SET_UPDATE_SEGS(pcb); #elif defined(__i386) @@ -990,18 +991,16 @@ setregs(uarg_t *args) lwp->lwp_eosys = JUSTRETURN; t->t_post_sys = 1; - /* - * Here we initialize minimal fpu state. - * The rest is done at the first floating - * point instruction that a process executes. - */ - pcb->pcb_fpu.fpu_flags = 0; - /* * Add the lwp context handlers that virtualize segment registers, * and/or system call stacks etc. */ lwp_installctx(lwp); + + /* + * Reset the FPU flags and then initialize the FPU for this lwp. + */ + fp_exec(); } user_desc_t * diff --git a/usr/src/uts/intel/ia32/os/sysi86.c b/usr/src/uts/intel/ia32/os/sysi86.c index 4573d62fad..bdb66e3e1f 100644 --- a/usr/src/uts/intel/ia32/os/sysi86.c +++ b/usr/src/uts/intel/ia32/os/sysi86.c @@ -624,7 +624,7 @@ setdscr(struct ssd *ssd) } #if defined(__amd64) - if (pcb->pcb_rupdate == 1) { + if (PCB_NEED_UPDATE_SEGS(pcb)) { if (ssd->sel == pcb->pcb_ds || ssd->sel == pcb->pcb_es || ssd->sel == pcb->pcb_fs || diff --git a/usr/src/uts/intel/ia32/syscall/lwp_private.c b/usr/src/uts/intel/ia32/syscall/lwp_private.c index 79e9076ee0..479a800d9a 100644 --- a/usr/src/uts/intel/ia32/syscall/lwp_private.c +++ b/usr/src/uts/intel/ia32/syscall/lwp_private.c @@ -21,10 +21,9 @@ /* * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * Copyright (c) 2018, Joyent, Inc. */ -#pragma ident "%Z%%M% %I% %E% SMI" - #include #include #include @@ -72,12 +71,12 @@ lwp_setprivate(klwp_t *lwp, int which, uintptr_t base) * of zero for %fs and %gs to use the 64-bit fs_base and gs_base * respectively. */ - if (pcb->pcb_rupdate == 0) { + if (!PCB_NEED_UPDATE_SEGS(pcb)) { pcb->pcb_ds = rp->r_ds; pcb->pcb_es = rp->r_es; pcb->pcb_fs = rp->r_fs; pcb->pcb_gs = rp->r_gs; - pcb->pcb_rupdate = 1; + PCB_SET_UPDATE_SEGS(pcb); t->t_post_sys = 1; } ASSERT(t->t_post_sys); @@ -171,7 +170,7 @@ lwp_getprivate(klwp_t *lwp, int which, uintptr_t base) case _LWP_FSBASE: if ((sbase = pcb->pcb_fsbase) != 0) { if (lwp_getdatamodel(lwp) == DATAMODEL_NATIVE) { - if (pcb->pcb_rupdate == 1) { + if (PCB_NEED_UPDATE_SEGS(pcb)) { if (pcb->pcb_fs == 0) break; } else { @@ -179,7 +178,7 @@ lwp_getprivate(klwp_t *lwp, int which, uintptr_t base) break; } } else { - if (pcb->pcb_rupdate == 1) { + if (PCB_NEED_UPDATE_SEGS(pcb)) { if (pcb->pcb_fs == LWPFS_SEL) break; } else { @@ -193,7 +192,7 @@ lwp_getprivate(klwp_t *lwp, int which, uintptr_t base) case _LWP_GSBASE: if ((sbase = pcb->pcb_gsbase) != 0) { if (lwp_getdatamodel(lwp) == DATAMODEL_NATIVE) { - if (pcb->pcb_rupdate == 1) { + if (PCB_NEED_UPDATE_SEGS(pcb)) { if (pcb->pcb_gs == 0) break; } else { @@ -201,7 +200,7 @@ lwp_getprivate(klwp_t *lwp, int which, uintptr_t base) break; } } else { - if (pcb->pcb_rupdate == 1) { + if (PCB_NEED_UPDATE_SEGS(pcb)) { if (pcb->pcb_gs == LWPGS_SEL) break; } else { diff --git a/usr/src/uts/intel/sys/archsystm.h b/usr/src/uts/intel/sys/archsystm.h index 8da14c1a75..93fed4e87d 100644 --- a/usr/src/uts/intel/sys/archsystm.h +++ b/usr/src/uts/intel/sys/archsystm.h @@ -55,11 +55,8 @@ extern void mfence_insn(void); extern uint16_t getgs(void); extern void setgs(uint16_t); -extern void patch_sse(void); -extern void patch_sse2(void); #endif -extern void patch_xsave(void); extern kmem_cache_t *fpsave_cachep; extern void cli(void); diff --git a/usr/src/uts/intel/sys/fp.h b/usr/src/uts/intel/sys/fp.h index fe5471e855..9841fe1c3b 100644 --- a/usr/src/uts/intel/sys/fp.h +++ b/usr/src/uts/intel/sys/fp.h @@ -236,13 +236,14 @@ struct fxsave_state { * 13.4.2 of the Intel 64 and IA-32 Architectures Software Developer’s Manual, * Volume 1 (IASDv1). The extended portion is documented in section 13.4.3. * - * Our size is at least AVX_XSAVE_SIZE (832 bytes), asserted in fpnoextflt(). - * Enabling additional xsave-related CPU features requires an increase in the - * size. We dynamically allocate the per-lwp xsave area at runtime, based on - * the size needed for the CPU-specific features. This xsave_state structure - * simply defines our historical layout for the beginning of the xsave area. The - * locations and size of new, extended, components is determined dynamically by - * querying the CPU. See the xsave_info structure in cpuid.c. + * Our size is at least AVX_XSAVE_SIZE (832 bytes), which is asserted + * statically. Enabling additional xsave-related CPU features requires an + * increase in the size. We dynamically allocate the per-lwp xsave area at + * runtime, based on the size needed for the CPU-specific features. This + * xsave_state structure simply defines our historical layout for the beginning + * of the xsave area. The locations and size of new, extended, components is + * determined dynamically by querying the CPU. See the xsave_info structure in + * cpuid.c. * * xsave component usage is tracked using bits in the xs_xstate_bv field. The * components are documented in section 13.1 of IASDv1. For easy reference, @@ -301,7 +302,6 @@ extern uint32_t sse_mxcsr_mask; extern void fpu_probe(void); extern uint_t fpu_initial_probe(void); -extern int fpu_probe_pentium_fdivbug(void); extern void fpu_auxv_info(int *, size_t *); @@ -315,6 +315,10 @@ extern void xsaveopt_excp_clr_ctxt(void *); extern void (*fpsave_ctxt)(void *); extern void (*xsavep)(struct xsave_state *, uint64_t); +extern void fpxrestore_ctxt(void *); +extern void xrestore_ctxt(void *); +extern void (*fprestore_ctxt)(void *); + extern void fxsave_insn(struct fxsave_state *); extern void fpsave(struct fnsave_state *); extern void fprestore(struct fnsave_state *); @@ -335,11 +339,11 @@ extern uint32_t fpgetcwsw(void); extern uint32_t fpgetmxcsr(void); struct regs; -extern int fpnoextflt(struct regs *); -extern int fpextovrflt(struct regs *); extern int fpexterrflt(struct regs *); extern int fpsimderrflt(struct regs *); extern void fpsetcw(uint16_t, uint32_t); +extern void fp_seed(void); +extern void fp_exec(void); struct _klwp; extern void fp_lwp_init(struct _klwp *); extern void fp_lwp_cleanup(struct _klwp *); diff --git a/usr/src/uts/intel/sys/pcb.h b/usr/src/uts/intel/sys/pcb.h index defd116eba..e7e2e2cdce 100644 --- a/usr/src/uts/intel/sys/pcb.h +++ b/usr/src/uts/intel/sys/pcb.h @@ -21,6 +21,7 @@ /* * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2018, Joyent, Inc. */ #ifndef _SYS_PCB_H @@ -51,7 +52,6 @@ typedef struct pcb { uint_t pcb_flags; /* state flags; cleared on fork */ greg_t pcb_drstat; /* status debug register (%dr6) */ unsigned char pcb_instr; /* /proc: instruction at stop */ -#if defined(__amd64) unsigned char pcb_rupdate; /* new register values in pcb -> regs */ uintptr_t pcb_fsbase; uintptr_t pcb_gsbase; @@ -59,7 +59,6 @@ typedef struct pcb { selector_t pcb_es; selector_t pcb_fs; selector_t pcb_gs; -#endif /* __amd64 */ user_desc_t pcb_fsdesc; /* private per-lwp %fs descriptors */ user_desc_t pcb_gsdesc; /* private per-lwp %gs descriptors */ } pcb_t; @@ -77,6 +76,21 @@ typedef struct pcb { #define REQUEST_NOSTEP 0x200 /* request pending to disable single-step */ #define ASYNC_HWERR 0x400 /* hardware error has corrupted context */ +/* pcb_rupdate values */ +#define PCB_UPDATE_SEGS 0x01 /* Update segment registers */ +#define PCB_UPDATE_FPU 0x02 /* Update FPU registers */ + +#define PCB_SET_UPDATE_SEGS(pcb) ((pcb)->pcb_rupdate |= PCB_UPDATE_SEGS) +#define PCB_SET_UPDATE_FPU(pcb) ((pcb)->pcb_rupdate |= PCB_UPDATE_FPU) +#define PCB_NEED_UPDATE_SEGS(pcb) \ + (((pcb)->pcb_rupdate & PCB_UPDATE_SEGS) != 0) +#define PCB_NEED_UPDATE_FPU(pcb) \ + (((pcb)->pcb_rupdate & PCB_UPDATE_FPU) != 0) +#define PCB_NEED_UPDATE(pcb) \ + (PCB_NEED_UPDATE_FPU(pcb) || PCB_NEED_UPDATE_SEGS(pcb)) +#define PCB_CLEAR_UPDATE_SEGS(pcb) ((pcb)->pcb_rupdate &= ~PCB_UPDATE_SEGS) +#define PCB_CLEAR_UPDATE_FPU(pcb) ((pcb)->pcb_rupdate &= ~PCB_UPDATE_FPU) + /* fpu_flags */ #define FPU_EN 0x1 /* flag signifying fpu in use */ #define FPU_VALID 0x2 /* fpu_regs has valid fpu state */ -- cgit v1.2.3 From 7267b93f5f67b15927e4bf44f0ad2efa84727226 Mon Sep 17 00:00:00 2001 From: Marcel Telka Date: Tue, 19 Jun 2018 10:53:19 +0200 Subject: 9618 i40e_main.c: Few minor typos in comments Reviewed by: Yuri Pankov Reviewed by: Robert Mustacchi Approved by: Dan McDonald --- usr/src/uts/common/io/i40e/i40e_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/usr/src/uts/common/io/i40e/i40e_main.c b/usr/src/uts/common/io/i40e/i40e_main.c index 263f99dfdb..54aef43424 100644 --- a/usr/src/uts/common/io/i40e/i40e_main.c +++ b/usr/src/uts/common/io/i40e/i40e_main.c @@ -137,7 +137,7 @@ * resources between everything, we'll end up just dividing the resources * evenly between all of the functions. Longer term, if we don't have to declare * to the GLDv3 that these resources are shared, then we'll maintain a pool and - * hae each PF allocate from the pool in the device, thus if only two of four + * have each PF allocate from the pool in the device, thus if only two of four * ports are being used, for example, then all of the resources can still be * used. * @@ -169,7 +169,7 @@ * To receieve broadcast traffic, we enable it through the admin queue, rather * than use one of our filters for it. For multicast traffic, we reserve a * certain number of the hash filters and assign them to a given PF. When we - * exceed those, we then switch to using promicuous mode for multicast traffic. + * exceed those, we then switch to using promiscuous mode for multicast traffic. * * More specifically, once we exceed the number of filters (indicated because * the i40e_t`i40e_resources.ifr_nmcastfilt == -- cgit v1.2.3 From fec46055b92c6c6721fc6698843744a264e6ca70 Mon Sep 17 00:00:00 2001 From: Toomas Soome Date: Sun, 16 Jul 2017 12:10:02 +0300 Subject: 9376 sendmail: this statement may fall through Reviewed by: Andy Fiddaman Reviewed by: Hans Rosenfeld Approved by: Robert Mustacchi --- usr/src/cmd/sendmail/aux/editmap.c | 4 +--- usr/src/cmd/sendmail/aux/mail.local.c | 4 ++-- usr/src/cmd/sendmail/aux/mailstats.c | 3 +-- usr/src/cmd/sendmail/aux/makemap.c | 5 +---- usr/src/cmd/sendmail/aux/praliases.c | 1 + usr/src/cmd/sendmail/db/hash/hash_func.c | 21 ++++++++++++++------- usr/src/cmd/sendmail/src/alias.c | 3 +-- usr/src/cmd/sendmail/src/domain.c | 4 +--- usr/src/cmd/sendmail/src/queue.c | 1 + 9 files changed, 23 insertions(+), 23 deletions(-) diff --git a/usr/src/cmd/sendmail/aux/editmap.c b/usr/src/cmd/sendmail/aux/editmap.c index 46d9fd10ba..a86a24d456 100644 --- a/usr/src/cmd/sendmail/aux/editmap.c +++ b/usr/src/cmd/sendmail/aux/editmap.c @@ -11,8 +11,6 @@ * */ -#pragma ident "%Z%%M% %I% %E% SMI" - #include #ifndef lint SM_UNUSED(static char copyright[]) = @@ -263,7 +261,7 @@ main(argc, argv) # endif /* UID_MAX */ break; } - + /* FALLTHROUGH */ default: continue; diff --git a/usr/src/cmd/sendmail/aux/mail.local.c b/usr/src/cmd/sendmail/aux/mail.local.c index 98f4fb6636..0d04d3c58c 100644 --- a/usr/src/cmd/sendmail/aux/mail.local.c +++ b/usr/src/cmd/sendmail/aux/mail.local.c @@ -19,7 +19,7 @@ static char copyright[] = The Regents of the University of California. All rights reserved.\n"; #endif /* not lint */ -#pragma ident "%Z%%M% %I% %E% SMI" +#pragma ident "%Z%%M% %I% %E% SMI" #ifndef lint static char sccsid[] = "@(#)mail.local.c 8.83 (Berkeley) 12/17/98"; @@ -1145,9 +1145,9 @@ e_to_sys(num) eval = EX_UNAVAILABLE; break; } - /* FALLTHROUGH */ #endif /* EDQUOT */ #ifdef EAGAIN + /* FALLTHROUGH */ case EAGAIN: /* Resource temporarily unavailable */ #endif #ifdef EBUSY diff --git a/usr/src/cmd/sendmail/aux/mailstats.c b/usr/src/cmd/sendmail/aux/mailstats.c index f7870aa7de..6c5f9a18f8 100644 --- a/usr/src/cmd/sendmail/aux/mailstats.c +++ b/usr/src/cmd/sendmail/aux/mailstats.c @@ -12,8 +12,6 @@ * */ -#pragma ident "%Z%%M% %I% %E% SMI" - #include SM_IDSTR(copyright, @@ -182,6 +180,7 @@ main(argc, argv) } if (sfile == NULL) sfile = sfilebuf; + /* FALLTHROUGH */ default: continue; diff --git a/usr/src/cmd/sendmail/aux/makemap.c b/usr/src/cmd/sendmail/aux/makemap.c index d887386ffc..3b4ab68ba5 100644 --- a/usr/src/cmd/sendmail/aux/makemap.c +++ b/usr/src/cmd/sendmail/aux/makemap.c @@ -11,8 +11,6 @@ * */ -#pragma ident "%Z%%M% %I% %E% SMI" - #include SM_IDSTR(copyright, @@ -24,7 +22,6 @@ SM_IDSTR(copyright, SM_IDSTR(id, "@(#)$Id: makemap.c,v 8.179 2008/04/14 02:06:16 ca Exp $") - #include #ifndef ISC_UNIX # include @@ -290,7 +287,7 @@ main(argc, argv) # endif /* UID_MAX */ break; } - + /* FALLTHROUGH */ default: continue; diff --git a/usr/src/cmd/sendmail/aux/praliases.c b/usr/src/cmd/sendmail/aux/praliases.c index d0ee54e37a..8839334286 100644 --- a/usr/src/cmd/sendmail/aux/praliases.c +++ b/usr/src/cmd/sendmail/aux/praliases.c @@ -217,6 +217,7 @@ main(argc, argv) } praliases(b, argc, argv); } + /* FALLTHROUGH */ default: continue; diff --git a/usr/src/cmd/sendmail/db/hash/hash_func.c b/usr/src/cmd/sendmail/db/hash/hash_func.c index 4809a8098e..6819a54184 100644 --- a/usr/src/cmd/sendmail/db/hash/hash_func.c +++ b/usr/src/cmd/sendmail/db/hash/hash_func.c @@ -50,13 +50,6 @@ #include "config.h" -#pragma ident "%Z%%M% %I% %E% SMI" - -#ifndef lint -static const char sccsid[] = "@(#)hash_func.c 10.7 (Sleepycat) 9/16/97"; -static const char sccsi2[] = "%W% (Sun) %G%"; -#endif /* not lint */ - #ifndef NO_SYSTEM_INCLUDES #include #endif @@ -124,18 +117,25 @@ __ham_func3(key, len) case 0: do { HASHC; + /* FALLTHROUGH */ case 7: HASHC; + /* FALLTHROUGH */ case 6: HASHC; + /* FALLTHROUGH */ case 5: HASHC; + /* FALLTHROUGH */ case 4: HASHC; + /* FALLTHROUGH */ case 3: HASHC; + /* FALLTHROUGH */ case 2: HASHC; + /* FALLTHROUGH */ case 1: HASHC; } while (--loop); @@ -173,18 +173,25 @@ __ham_func4(key, len) case 0: do { HASH4; + /* FALLTHROUGH */ case 7: HASH4; + /* FALLTHROUGH */ case 6: HASH4; + /* FALLTHROUGH */ case 5: HASH4; + /* FALLTHROUGH */ case 4: HASH4; + /* FALLTHROUGH */ case 3: HASH4; + /* FALLTHROUGH */ case 2: HASH4; + /* FALLTHROUGH */ case 1: HASH4; } while (--loop); diff --git a/usr/src/cmd/sendmail/src/alias.c b/usr/src/cmd/sendmail/src/alias.c index d8a6562b8e..c94a47767a 100644 --- a/usr/src/cmd/sendmail/src/alias.c +++ b/usr/src/cmd/sendmail/src/alias.c @@ -11,8 +11,6 @@ * */ -#pragma ident "%Z%%M% %I% %E% SMI" - #include SM_RCSID("@(#)$Id: alias.c,v 8.219 2006/10/24 18:04:09 ca Exp $") @@ -990,6 +988,7 @@ forward(user, sendq, aliaslevel, e) break; #endif /* _FFR_FORWARD_SYSERR */ + /* FALLTHROUGH */ default: if (LogLevel > (RunAsUid == 0 ? 2 : 10)) sm_syslog(LOG_WARNING, e->e_id, diff --git a/usr/src/cmd/sendmail/src/domain.c b/usr/src/cmd/sendmail/src/domain.c index 3b3a6d3971..4c5b23819c 100644 --- a/usr/src/cmd/sendmail/src/domain.c +++ b/usr/src/cmd/sendmail/src/domain.c @@ -11,8 +11,6 @@ * */ -#pragma ident "%Z%%M% %I% %E% SMI" - #include #include "map.h" @@ -1043,9 +1041,9 @@ nexttype: ** Such MX matches are as good as an A match, ** fall through. */ - /* FALLTHROUGH */ # if NETINET6 + /* FALLTHROUGH */ case T_AAAA: # endif /* NETINET6 */ case T_A: diff --git a/usr/src/cmd/sendmail/src/queue.c b/usr/src/cmd/sendmail/src/queue.c index 194f5250d6..9bafb02194 100644 --- a/usr/src/cmd/sendmail/src/queue.c +++ b/usr/src/cmd/sendmail/src/queue.c @@ -4468,6 +4468,7 @@ readqf(e, openonly) /* format: flag (1 char) space long-integer */ e->e_dlvr_flag = buf[1]; e->e_deliver_by = strtol(&buf[3], NULL, 10); + /* FALLTHROUGH */ case '$': /* define macro */ { -- cgit v1.2.3 From 696c22af67f1944ec22b4086570891189a8e7201 Mon Sep 17 00:00:00 2001 From: Toomas Soome Date: Fri, 30 Mar 2018 21:12:42 +0300 Subject: 9475 libefi: Do not return only if ReceiveFilter fails Reviewed by: Andy Fiddaman Reviewed by: Ken Mays Reviewed by: Yuri Pankov Approved by: Robert Mustacchi --- usr/src/boot/sys/boot/efi/libefi/efinet.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/usr/src/boot/sys/boot/efi/libefi/efinet.c b/usr/src/boot/sys/boot/efi/libefi/efinet.c index 910707a680..c15364cb9d 100644 --- a/usr/src/boot/sys/boot/efi/libefi/efinet.c +++ b/usr/src/boot/sys/boot/efi/libefi/efinet.c @@ -48,7 +48,7 @@ static int efinet_match(struct netif *, void *); static int efinet_probe(struct netif *, void *); static ssize_t efinet_put(struct iodesc *, void *, size_t); -struct netif_driver efinetif = { +struct netif_driver efinetif = { .netif_bname = "efinet", .netif_match = efinet_match, .netif_probe = efinet_probe, @@ -132,7 +132,7 @@ efinet_put(struct iodesc *desc, void *pkt, size_t len) buf = NULL; /* XXX Is this needed? */ status = net->GetStatus(net, NULL, &buf); /* - * XXX EFI1.1 and the E1000 card returns a different + * XXX EFI1.1 and the E1000 card returns a different * address than we gave. Sigh. */ } while (status == EFI_SUCCESS && buf == NULL); @@ -224,11 +224,9 @@ efinet_init(struct iodesc *desc, void *machdep_hint) EFI_SIMPLE_NETWORK_RECEIVE_BROADCAST; status = net->ReceiveFilters(net, mask, 0, FALSE, 0, NULL); - if (status != EFI_SUCCESS) { + if (status != EFI_SUCCESS) printf("net%d: cannot set rx. filters (status=%lu)\n", nif->nif_unit, EFI_ERROR_CODE(status)); - return; - } #ifdef EFINET_DEBUG dump_mode(net->Mode); @@ -241,7 +239,7 @@ efinet_init(struct iodesc *desc, void *machdep_hint) static void efinet_end(struct netif *nif) { - EFI_SIMPLE_NETWORK *net = nif->nif_devdata; + EFI_SIMPLE_NETWORK *net = nif->nif_devdata; if (net == NULL) return; -- cgit v1.2.3