diff options
author | Robert Mustacchi <rm@joyent.com> | 2018-06-13 17:09:14 +0000 |
---|---|---|
committer | Robert Mustacchi <rm@joyent.com> | 2018-06-19 19:34:37 +0000 |
commit | d0158222a5936ac26b7a03241b7d2df18cc544c8 (patch) | |
tree | 6fa0050ece90a408b6962a4af14d676026388900 | |
parent | c7fbe46df966ea665df63b6e6071808987e839d1 (diff) | |
download | illumos-joyent-d0158222a5936ac26b7a03241b7d2df18cc544c8.tar.gz |
9596 Initial xsave xstate_bv should not include all features
9595 rtld should conditionally save AVX-512 state
Reviewed by: John Levon <john.levon@joyent.com>
Reviewed by: Patrick Mooney <patrick.mooney@joyent.com>
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Bryan Cantrill <bryan@joyent.com>
Reviewed by: Toomas Soome <tsoome@me.com>
Approved by: Richard Lowe <richlowe@richlowe.net>
-rw-r--r-- | usr/src/cmd/ptools/pargs/pargs.c | 24 | ||||
-rw-r--r-- | usr/src/cmd/sgs/elfdump/common/corenote.c | 4 | ||||
-rw-r--r-- | usr/src/cmd/sgs/libconv/common/corenote.c | 18 | ||||
-rw-r--r-- | usr/src/cmd/sgs/libconv/common/corenote.msg | 10 | ||||
-rw-r--r-- | usr/src/cmd/sgs/rtld/amd64/_setup.c | 79 | ||||
-rw-r--r-- | usr/src/cmd/sgs/rtld/amd64/boot_elf.s | 942 | ||||
-rw-r--r-- | usr/src/uts/common/exec/elf/elf.c | 27 | ||||
-rw-r--r-- | usr/src/uts/common/sys/auxv.h | 7 | ||||
-rw-r--r-- | usr/src/uts/common/sys/auxv_386.h | 15 | ||||
-rw-r--r-- | usr/src/uts/common/sys/user.h | 4 | ||||
-rw-r--r-- | usr/src/uts/i86pc/os/fpu_subr.c | 327 | ||||
-rw-r--r-- | usr/src/uts/intel/ia32/os/fpu.c | 7 | ||||
-rw-r--r-- | usr/src/uts/intel/sys/fp.h | 10 | ||||
-rw-r--r-- | usr/src/uts/intel/sys/x86_archext.h | 11 |
14 files changed, 698 insertions, 787 deletions
diff --git a/usr/src/cmd/ptools/pargs/pargs.c b/usr/src/cmd/ptools/pargs/pargs.c index 4eee215547..aa7b74dce9 100644 --- a/usr/src/cmd/ptools/pargs/pargs.c +++ b/usr/src/cmd/ptools/pargs/pargs.c @@ -23,7 +23,7 @@ * Use is subject to license terms. */ /* - * Copyright 2016 Joyent, Inc. + * Copyright (c) 2018, Joyent, Inc. */ /* @@ -306,15 +306,15 @@ unctrl_str(const char *src, int escape_slash, int *unprintable) * a set of safe characters and adding those few common punctuation characters * which are known to be safe. The rules are: * - * If this is a printable character (graph), and not punctuation, it is - * safe to leave unquoted. + * If this is a printable character (graph), and not punctuation, it is + * safe to leave unquoted. * - * If it's one of known hard-coded safe characters, it's also safe to leave - * unquoted. + * If it's one of the known hard-coded safe characters, it's also safe to + * leave unquoted. * - * Otherwise, the entire argument must be quoted. + * Otherwise, the entire argument must be quoted. * - * This will cause some strings to be unecessarily quoted, but it is safer than + * This will cause some strings to be unnecessarily quoted, but it is safer than * having a character unintentionally interpreted by the shell. */ static int @@ -357,12 +357,12 @@ quote_string_ascii(pargs_data_t *datap, char *src) * with by unctrl_str(). We make the following subtitution when we * encounter a single quote: * - * ' = '"'"' + * ' = '"'"' * * In addition, we put single quotes around the entire argument. For * example: * - * foo'bar = 'foo'"'"'bar' + * foo'bar = 'foo'"'"'bar' */ dstlen = strlen(src) + 3 + 4 * quote_count; dst = safe_zalloc(dstlen); @@ -711,7 +711,7 @@ at_str(long val, char *instr, size_t n, char *str) */ #define FMT_AV(s, n, hwcap, mask, name) \ - if ((hwcap) & (mask)) \ + if ((hwcap) & (mask)) \ (void) snprintf(s, n, "%s" name " | ", s) /*ARGSUSED*/ @@ -837,7 +837,9 @@ static struct aux_id aux_arr[] = { { AT_SUN_BRAND_AUX1, "AT_SUN_BRAND_AUX1", at_null }, { AT_SUN_BRAND_AUX2, "AT_SUN_BRAND_AUX2", at_null }, { AT_SUN_BRAND_AUX3, "AT_SUN_BRAND_AUX3", at_null }, - { AT_SUN_COMMPAGE, "AT_SUN_COMMPAGE", at_null } + { AT_SUN_COMMPAGE, "AT_SUN_COMMPAGE", at_null }, + { AT_SUN_FPTYPE, "AT_SUN_FPTYPE", at_null }, + { AT_SUN_FPSIZE, "AT_SUN_FPSIZE", at_null } }; #define N_AT_ENTS (sizeof (aux_arr) / sizeof (struct aux_id)) diff --git a/usr/src/cmd/sgs/elfdump/common/corenote.c b/usr/src/cmd/sgs/elfdump/common/corenote.c index 7de5e9dfcc..0777025523 100644 --- a/usr/src/cmd/sgs/elfdump/common/corenote.c +++ b/usr/src/cmd/sgs/elfdump/common/corenote.c @@ -25,7 +25,7 @@ */ /* * Copyright 2012 DEY Storage Systems, Inc. All rights reserved. - * Copyright (c) 2013, Joyent, Inc. All rights reserved. + * Copyright (c) 2018, Joyent, Inc. */ #include <stdlib.h> @@ -498,6 +498,8 @@ dump_auxv(note_state_t *state, const char *title) case AT_SUN_GID: case AT_SUN_RGID: case AT_SUN_LPAGESZ: + case AT_SUN_FPSIZE: + case AT_SUN_FPTYPE: num_fmt = SL_FMT_NUM_DEC; break; diff --git a/usr/src/cmd/sgs/libconv/common/corenote.c b/usr/src/cmd/sgs/libconv/common/corenote.c index 578d0e2774..02b3e5d59b 100644 --- a/usr/src/cmd/sgs/libconv/common/corenote.c +++ b/usr/src/cmd/sgs/libconv/common/corenote.c @@ -25,7 +25,7 @@ */ /* * Copyright 2012 DEY Storage Systems, Inc. All rights reserved. - * Copyright 2016 Joyent, Inc. + * Copyright (c) 2018 Joyent, Inc. */ /* @@ -105,20 +105,22 @@ conv_cnote_auxv_type(Word type, Conv_fmt_flags_t fmt_flags, static const conv_ds_msg_t ds_types_2000_2011 = { CONV_DS_MSG_INIT(2000, types_2000_2011) }; - static const Msg types_2014_2024[] = { + static const Msg types_2014_2028[] = { MSG_AUXV_AT_SUN_EXECNAME, MSG_AUXV_AT_SUN_MMU, MSG_AUXV_AT_SUN_LDDATA, MSG_AUXV_AT_SUN_AUXFLAGS, MSG_AUXV_AT_SUN_EMULATOR, MSG_AUXV_AT_SUN_BRANDNAME, MSG_AUXV_AT_SUN_BRAND_AUX1, MSG_AUXV_AT_SUN_BRAND_AUX2, MSG_AUXV_AT_SUN_BRAND_AUX3, MSG_AUXV_AT_SUN_HWCAP2, - MSG_AUXV_AT_SUN_COMMPAGE + NULL, NULL, + MSG_AUXV_AT_SUN_COMMPAGE, MSG_AUXV_AT_SUN_FPTYPE, + MSG_AUXV_AT_SUN_FPSIZE }; - static const conv_ds_msg_t ds_types_2014_2024 = { - CONV_DS_MSG_INIT(2014, types_2014_2024) }; + static const conv_ds_msg_t ds_types_2014_2028 = { + CONV_DS_MSG_INIT(2014, types_2014_2028) }; static const conv_ds_t *ds[] = { CONV_DS_ADDR(ds_types_0_22), CONV_DS_ADDR(ds_types_2000_2011), - CONV_DS_ADDR(ds_types_2014_2024), NULL }; + CONV_DS_ADDR(ds_types_2014_2028), NULL }; return (conv_map_ds(ELFOSABI_NONE, EM_NONE, type, ds, fmt_flags, inv_buf)); @@ -1172,7 +1174,7 @@ conv_cnote_pr_flags(int flags, Conv_fmt_flags_t fmt_flags, Conv_cnote_pr_flags_buf_t *cnote_pr_flags_buf) { static const Val_desc vda[] = { - { PR_STOPPED, MSG_PR_FLAGS_STOPPED }, + { PR_STOPPED, MSG_PR_FLAGS_STOPPED }, { PR_ISTOP, MSG_PR_FLAGS_ISTOP }, { PR_DSTOP, MSG_PR_FLAGS_DSTOP }, { PR_STEP, MSG_PR_FLAGS_STEP }, @@ -1331,7 +1333,7 @@ conv_cnote_proc_flag(int flags, Conv_fmt_flags_t fmt_flags, * their numeric value. */ static const Val_desc vda[] = { - { 0x00000001, MSG_PROC_FLAG_SSYS }, + { 0x00000001, MSG_PROC_FLAG_SSYS }, { 0x02000000, MSG_PROC_FLAG_SMSACCT }, { 0, 0 } }; diff --git a/usr/src/cmd/sgs/libconv/common/corenote.msg b/usr/src/cmd/sgs/libconv/common/corenote.msg index b5562577ae..b55e67ec07 100644 --- a/usr/src/cmd/sgs/libconv/common/corenote.msg +++ b/usr/src/cmd/sgs/libconv/common/corenote.msg @@ -24,7 +24,7 @@ # Use is subject to license terms. # # Copyright 2012 DEY Storage Systems, Inc. All rights reserved. -# Copyright 2016 Joyent, Inc. +# Copyright (c) 2018 Joyent, Inc. # @ MSG_NT_PRSTATUS "[ NT_PRSTATUS ]" @@ -102,6 +102,8 @@ @ MSG_AUXV_AT_SUN_BRAND_AUX3 "SUN_BRAND_AUX3" @ MSG_AUXV_AT_SUN_HWCAP2 "SUN_HWCAP2" @ MSG_AUXV_AT_SUN_COMMPAGE "SUN_COMMPAGE" +@ MSG_AUXV_AT_SUN_FPTYPE "SUN_FPTYPE" +@ MSG_AUXV_AT_SUN_FPSIZE "SUN_FPSIZE" @ MSG_CC_CONTENT_STACK "STACK" @@ -339,7 +341,7 @@ @ MSG_REG_AMD64_R12 "[ r12 ]" @ MSG_REG_AMD64_R11 "[ r11 ]" @ MSG_REG_AMD64_R10 "[ r10 ]" -@ MSG_REG_AMD64_R9 "[ r9 ]" +@ MSG_REG_AMD64_R9 "[ r9 ]" @ MSG_REG_AMD64_R8 "[ r8 ]" @ MSG_REG_AMD64_RDI "[ rdi ]" @ MSG_REG_AMD64_RSI "[ rsi ]" @@ -706,7 +708,7 @@ @ MSG_SYS_FCNTL "[ fcntl ]" # 62 @ MSG_SYS_FCNTL_ALT "fcntl" @ MSG_SYS_ULIMIT "[ ulimit ]" # 63 -@ MSG_SYS_ULIMIT_ALT "ulimit" +@ MSG_SYS_ULIMIT_ALT "ulimit" @ MSG_SYS_RENAMEAT "[ renameat ]" # 64 @ MSG_SYS_RENAMEAT_ALT "renameat" @ MSG_SYS_UNLINKAT "[ unlinkat ]" # 65 @@ -730,7 +732,7 @@ @ MSG_SYS_RCTLSYS "[ rctlsys ]" # 74 @ MSG_SYS_RCTLSYS_ALT "rctlsys " @ MSG_SYS_SIDSYS "[ sidsys ]" # 75 -@ MSG_SYS_SIDSYS_ALT "sidsys" +@ MSG_SYS_SIDSYS_ALT "sidsys" @ MSG_SYS_76 "76" # 76 (u) @ MSG_SYS_LWP_PARK "[ lwp_park ]" # 77 @ MSG_SYS_LWP_PARK_ALT "lwp_park" diff --git a/usr/src/cmd/sgs/rtld/amd64/_setup.c b/usr/src/cmd/sgs/rtld/amd64/_setup.c index cd47f3b91f..2a92d72565 100644 --- a/usr/src/cmd/sgs/rtld/amd64/_setup.c +++ b/usr/src/cmd/sgs/rtld/amd64/_setup.c @@ -24,7 +24,7 @@ * Use is subject to license terms. */ /* - * Copyright (c) 2012, Joyent, Inc. All rights reserved. + * Copyright (c) 2018, Joyent, Inc. */ /* @@ -49,6 +49,68 @@ #include "_audit.h" #include "msg.h" +/* + * Number of bytes to save for register usage. + */ +uint_t _plt_save_size; +void (*_plt_fp_save)(void *); +void (*_plt_fp_restore)(void *); + +extern void _elf_rtbndr_fp_save_orig(void *); +extern void _elf_rtbndr_fp_restore_orig(void *); +extern void _elf_rtbndr_fp_fxsave(void *); +extern void _elf_rtbndr_fp_fxrestore(void *); +extern void _elf_rtbndr_fp_xsave(void *); +extern void _elf_rtbndr_fp_xrestore(void *); + +/* + * Based on what the kernel has told us, go through and set up the various + * pointers that we'll need for elf_rtbndr for the FPU. + */ +static void +_setup_plt_fpu(int kind, size_t len) +{ + /* + * If we didn't get a length for some reason, fall back to the old + * implementation. + */ + if (len == 0) + kind = -1; + + switch (kind) { + case AT_386_FPINFO_FXSAVE: + _plt_fp_save = _elf_rtbndr_fp_fxsave; + _plt_fp_restore = _elf_rtbndr_fp_fxrestore; + _plt_save_size = len; + break; + /* + * We can treat processors that don't correctly handle the exception + * information in xsave the same way we do others. The information + * that may or may not be properly saved and restored should not be + * relevant to us because of the ABI. + */ + case AT_386_FPINFO_XSAVE: + case AT_386_FPINFO_XSAVE_AMD: + _plt_fp_save = _elf_rtbndr_fp_xsave; + _plt_fp_restore = _elf_rtbndr_fp_xrestore; + _plt_save_size = len; + break; + default: + _plt_fp_save = _elf_rtbndr_fp_save_orig; + _plt_fp_restore = _elf_rtbndr_fp_restore_orig; + /* + * The ABI says that 8 floating point registers are used for + * passing arguments (%xmm0 through %xmm7). Because these + * registers on some platforms may shadow the %ymm and %zmm + * registers, we end up needing to size this for the maximally + * sized register we care about, a 512-bit (64-byte) zmm + * register. + */ + _plt_save_size = 64 * 8; + break; + } +} + /* VARARGS */ unsigned long _setup(Boot *ebp, Dyn *ld_dyn) @@ -67,7 +129,8 @@ _setup(Boot *ebp, Dyn *ld_dyn) uid_t uid = (uid_t)-1, euid = (uid_t)-1; gid_t gid = (gid_t)-1, egid = (gid_t)-1; char *_platform = NULL, *_execname = NULL, *_emulator = NULL; - int auxflags = -1; + int auxflags = -1, fpkind = -1; + size_t fpsize = 0; /* * Scan the bootstrap structure to pick up the basics. @@ -158,6 +221,12 @@ _setup(Boot *ebp, Dyn *ld_dyn) /* name of emulation library, if any */ _emulator = auxv->a_un.a_ptr; break; + case AT_SUN_FPTYPE: + fpkind = (int)auxv->a_un.a_val; + break; + case AT_SUN_FPSIZE: + fpsize = (size_t)auxv->a_un.a_val; + break; } } @@ -231,6 +300,12 @@ _setup(Boot *ebp, Dyn *ld_dyn) sizeof (ulong_t) + sizeof (Sym); /* + * Initialize the amd64 specific PLT relocation constants based on the + * FP information that we have. + */ + _setup_plt_fpu(fpkind, fpsize); + + /* * Continue with generic startup processing. */ if ((lmp = setup((char **)_envp, (auxv_t *)_auxv, _flags, _platform, diff --git a/usr/src/cmd/sgs/rtld/amd64/boot_elf.s b/usr/src/cmd/sgs/rtld/amd64/boot_elf.s index 67301a0c96..36f136e31d 100644 --- a/usr/src/cmd/sgs/rtld/amd64/boot_elf.s +++ b/usr/src/cmd/sgs/rtld/amd64/boot_elf.s @@ -22,7 +22,86 @@ /* * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. - * Copyright (c) 2017 Joyent, Inc. All rights reserved. + * Copyright (c) 2018 Joyent, Inc. All rights reserved. + */ + +/* + * Welcome to the magic behind the PLT (procedure linkage table). When rtld + * fills out the PLT entries, it will refer initially to the functions in this + * file. As such our goal is simple: + * + * The lie of the function call must be preserved at all costs. + * + * This means that we need to prepare the system for an arbitrary series of + * instructions to be called. For example, as a side effect of resolving a + * symbol we may need to open a shared object which will cause any _init + * functions to be called. Those functions can use any and all of the ABI state + * that they desire (for example, the FPU registers). Therefore we must save and + * restore all the ABI mandated registers here. + * + * For the full information about what we need to save and restore and why, + * please see the System V amd64 PS ABI '3.2.3 Parameter Passing'. For general + * purpose registers, we need to take care of the following: + * + * %rax - Used for information about the number of vector arguments + * %rdi - arg0 + * %rsi - arg1 + * %rdx - arg2 + * %rcx - arg3 + * %r8 - arg4 + * %r9 - arg5 + * %r10 - static chain pointer + * + * Unfortunately, the world of the FPU is more complicated. + * + * The ABI mandates that we must save %xmm0-%xmm7. On newer Intel processors, + * %xmm0-%xmm7 shadow %ymm0-%ymm7 and %zmm0-%zmm7. Historically, when saving the + * FPU, we only saved and restored these eight registers. Unfortunately, this + * process itself ended up having side effects. Because the registers shadow one + * another, if we saved a full %zmm register when only a %xmm register was + * valid, we would end up causing the processor to think that the full %zmm + * register was valid. Once it believed that this was the case, it would then + * degrade performance of code that only used the %xmm registers. + * + * One way to tackle this problem would have been to use xgetbv with ecx=1 to + * get information about what was actually in use and only save and restore + * that. You can imagine that this logic roughly ends up as something like: + * + * if (zmm_inuse) + * save_zmm() + * if (ymm_inuse) + * save_ymm() + * save_xmm() + * + * However, this logic leaves us at the mercy of the branch predictor. This + * means that all of our efforts can end up still causing the CPU to execute + * things to make it think that some of these other FPU registers are in use and + * thus defeat the optimizations that it has. + * + * To deal with this problem, Intel has suggested using the xsave family of + * instructions. The kernel provides information about the size required for the + * floating point registers as well as which of several methods we need to + * employ through the aux vector. This gets us out of trying to look at the + * hardware capabilities and make decisions every time. As part of the + * amd64-specific portion of rtld, it will process those values and determine + * the functions on an as-needed basis. + * + * There are two different functions that we export. The first is elf_rtbndr(). + * This is basically the glue that gets us into the PLT and to perform + * relocations. elf_rtbndr() determines the address of the function that we must + * call and arranges its stack such that when we return from elf_rtbndr() we + * will instead jump to the actual relocated function which will return to the + * original caller. Because of this, we must preserve all of the registers that + * are used for arguments and restore them before returning. + * + * The second function we export is elf_plt_trace(). This is used to add support + * for audit libraries among other things. elf_plt_trace() may or may not call + * the underlying function as a side effect or merely set up its return to it. + * This changes how we handle %rax. If we call the function ourself, then we end + * up making sure that %rax is the return value versus the initial value. In + * addition, because we get %r11 from the surrounding PLT code, we opt to + * preserve it in case some of the relocation logic ever ends up calling back + * into us again. */ #if defined(lint) @@ -34,23 +113,171 @@ #include <sys/regset.h> #include <sys/auxv_386.h> -/* ARGSUSED0 */ -int -elf_plt_trace() -{ - return (0); -} #else #include <link.h> #include <_audit.h> #include <sys/asm_linkage.h> #include <sys/auxv_386.h> +#include <sys/x86_archext.h> + +/* + * This macro is used to zero the xsave header. The contents of scratchreg will + * be destroyed. locreg should contain the starting address of the xsave header. + */ +#define XSAVE_HEADER_ZERO(scratch, loc) \ + xorq scratch, scratch; \ + movq scratch, 0x200(loc); \ + movq scratch, 0x208(loc); \ + movq scratch, 0x210(loc); \ + movq scratch, 0x218(loc); \ + movq scratch, 0x220(loc); \ + movq scratch, 0x228(loc); \ + movq scratch, 0x230(loc); \ + movq scratch, 0x238(loc) + .file "boot_elf.s" .text /* + * This section of the code contains glue functions that are used to take care + * of saving and restoring the FPU. We deal with this in a few different ways + * based on the hardware support and what exists. Historically we've only saved + * and restored the first 8 floating point registers rather than the entire FPU. + * That implementation still exists here and is kept around mostly as an + * insurance policy. + */ + ENTRY(_elf_rtbndr_fp_save_orig) + movq org_scapset@GOTPCREL(%rip),%r11 + movq (%r11),%r11 /* Syscapset_t pointer */ + movl 8(%r11),%edx /* sc_hw_2 */ + testl $AV_386_2_AVX512F,%edx + jne .save_zmm + movl (%r11),%edx /* sc_hw_1 */ + testl $AV_386_AVX,%edx + jne .save_ymm + movdqa %xmm0, (%rdi) + movdqa %xmm1, 64(%rdi) + movdqa %xmm2, 128(%rdi) + movdqa %xmm3, 192(%rdi) + movdqa %xmm4, 256(%rdi) + movdqa %xmm5, 320(%rdi) + movdqa %xmm6, 384(%rdi) + movdqa %xmm7, 448(%rdi) + jmp .save_finish + +.save_ymm: + vmovdqa %ymm0, (%rdi) + vmovdqa %ymm1, 64(%rdi) + vmovdqa %ymm2, 128(%rdi) + vmovdqa %ymm3, 192(%rdi) + vmovdqa %ymm4, 256(%rdi) + vmovdqa %ymm5, 320(%rdi) + vmovdqa %ymm6, 384(%rdi) + vmovdqa %ymm7, 448(%rdi) + jmp .save_finish + +.save_zmm: + vmovdqa64 %zmm0, (%rdi) + vmovdqa64 %zmm1, 64(%rdi) + vmovdqa64 %zmm2, 128(%rdi) + vmovdqa64 %zmm3, 192(%rdi) + vmovdqa64 %zmm4, 256(%rdi) + vmovdqa64 %zmm5, 320(%rdi) + vmovdqa64 %zmm6, 384(%rdi) + vmovdqa64 %zmm7, 448(%rdi) + +.save_finish: + ret + SET_SIZE(_elf_rtbndr_fp_save_orig) + + ENTRY(_elf_rtbndr_fp_restore_orig) + movq org_scapset@GOTPCREL(%rip),%r11 + movq (%r11),%r11 /* Syscapset_t pointer */ + movl 8(%r11),%edx /* sc_hw_2 */ + testl $AV_386_2_AVX512F,%edx + jne .restore_zmm + movl (%r11),%edx /* sc_hw_1 */ + testl $AV_386_AVX,%edx + jne .restore_ymm + + movdqa (%rdi), %xmm0 + movdqa 64(%rdi), %xmm1 + movdqa 128(%rdi), %xmm2 + movdqa 192(%rdi), %xmm3 + movdqa 256(%rdi), %xmm4 + movdqa 320(%rdi), %xmm5 + movdqa 384(%rdi), %xmm6 + movdqa 448(%rdi), %xmm7 + jmp .restore_finish + +.restore_ymm: + vmovdqa (%rdi), %ymm0 + vmovdqa 64(%rdi), %ymm1 + vmovdqa 128(%rdi), %ymm2 + vmovdqa 192(%rdi), %ymm3 + vmovdqa 256(%rdi), %ymm4 + vmovdqa 320(%rdi), %ymm5 + vmovdqa 384(%rdi), %ymm6 + vmovdqa 448(%rdi), %ymm7 + jmp .restore_finish + +.restore_zmm: + vmovdqa64 (%rdi), %zmm0 + vmovdqa64 64(%rdi), %zmm1 + vmovdqa64 128(%rdi), %zmm2 + vmovdqa64 192(%rdi), %zmm3 + vmovdqa64 256(%rdi), %zmm4 + vmovdqa64 320(%rdi), %zmm5 + vmovdqa64 384(%rdi), %zmm6 + vmovdqa64 448(%rdi), %zmm7 + +.restore_finish: + ret + SET_SIZE(_elf_rtbndr_fp_restore_orig) + + ENTRY(_elf_rtbndr_fp_fxsave) + fxsaveq (%rdi) + ret + SET_SIZE(_elf_rtbndr_fp_fxsave) + + ENTRY(_elf_rtbndr_fp_fxrestore) + fxrstor (%rdi) + ret + SET_SIZE(_elf_rtbndr_fp_fxrestore) + + ENTRY(_elf_rtbndr_fp_xsave) + XSAVE_HEADER_ZERO(%rdx, %rdi) + movq $_CONST(XFEATURE_FP_ALL), %rdx + movl %edx, %eax + shrq $32, %rdx + xsave (%rdi) /* save data */ + ret + SET_SIZE(_elf_rtbndr_fp_xsave) + + ENTRY(_elf_rtbndr_fp_xrestore) + movq $_CONST(XFEATURE_FP_ALL), %rdx + movl %edx, %eax + shrq $32, %rdx + xrstor (%rdi) /* save data */ + ret + SET_SIZE(_elf_rtbndr_fp_xrestore) + +#endif + +#if defined(lint) + +/* ARGSUSED0 */ +int +elf_plt_trace() +{ + return (0); +} + +#else + +/* * On entry the 'glue code' has already done the following: * * pushq %rbp @@ -79,173 +306,80 @@ elf_plt_trace() * the second 8 bytes are to align the stack and are available * for use. */ -#define REFLMP_OFF 0x0 -#define DEFLMP_OFF 0x8 +#define REFLMP_OFF 0x0 +#define DEFLMP_OFF 0x8 #define SYMNDX_OFF 0x10 #define SBFLAGS_OFF 0x14 #define SYMDEF_OFF 0x18 #define SYMDEF_VALUE_OFF 0x20 + /* - * Local stack space storage for elf_plt_trace is allocated - * as follows: - * - * First - before we got here - %rsp has been decremented - * by 0x10 to make space for the dyndata ptr (and another - * free word). In addition to that, we create space - * for the following: + * Next, we need to create a bunch of local storage. First, we have to preserve + * the standard registers per the amd64 ABI. This means we need to deal with: + * %rax - Used for information about the number of vector arguments + * %rdi - arg0 + * %rsi - arg1 + * %rdx - arg2 + * %rcx - arg3 + * %r8 - arg4 + * %r9 - arg5 + * %r10 - static chain pointer + * %r11 - PLT Interwork register, our caller is using this, so it's not + * a temporary for us. * - * La_amd64_regs 8 * 8: 64 - * prev_stack_size 8 8 - * Saved regs: - * %rdi 8 - * %rsi 8 - * %rdx 8 - * %rcx 8 - * %r8 8 - * %r9 8 - * %r10 8 - * %r11 8 - * %rax 8 - * ======= - * Subtotal: 144 (64byte aligned) - * - * The amd64 ABI says that the first 8 floating point registers are used to - * pass arguments. Because we may end up loading a DSO which has an _init - * section which uses these registers, we must save them. While various - * CPUs have different sized floating point registers, the largest are - * AVX512 512-bit (64-byte) %zmm registers. These need to be 64-byte aligned. - * Although the example below is showing offsets for %rbp, we actually save the - * registers offset from %rsp, which is forced into 64-bit alignment. - * - * Saved Media Regs (used to pass floating point args): - * %zmm0 - %zmm7 64 * 8: 512 - * ======= - * Total: 656 (64byte aligned) - * - * So - will subtract the following to create enough space - * - * -8(%rbp) store dyndata ptr - * -16(%rbp) store call destination - * -80(%rbp) space for La_amd64_regs - * -88(%rbp) prev stack size - * The next %rbp offsets are only true if the caller had correct stack - * alignment. See note above SPRDIOFF for why we use %rsp alignment to - * access these stack fields. - * -96(%rbp) entering %rdi - * -104(%rbp) entering %rsi - * -112(%rbp) entering %rdx - * -120(%rbp) entering %rcx - * -128(%rbp) entering %r8 - * -136(%rbp) entering %r9 - * -144(%rbp) entering %r10 - * -152(%rbp) entering %r11 - * -160(%rbp) entering %rax - * -224(%rbp) entering %xmm0 - * -288(%rbp) entering %xmm1 - * -352(%rbp) entering %xmm2 - * -288(%rbp) entering %xmm3 - * -416(%rbp) entering %xmm4 - * -480(%rbp) entering %xmm5 - * -544(%rbp) entering %xmm6 - * -608(%rbp) entering %xmm7 + * In addition, we need to save the amd64 ABI floating point arguments. Finally, + * we need to deal with our local storage. We need a La_amd64_regs and a + * uint64_t for the previous stack size. * + * To deal with this and the potentially variable size of the FPU regs, we have + * to play a few different games. We refer to all of the standard registers, the + * previous stack size, and La_amd64_regs structure off of %rbp. These are all + * values that are below %rbp. */ -#define SPDYNOFF -8 -#define SPDESTOFF -16 -#define SPLAREGOFF -80 -#define SPPRVSTKOFF -88 - -/* - * The next set of offsets are relative to %rsp. - * We guarantee %rsp is 64-byte aligned. This guarantees the zmm registers are - * saved to 64-byte aligned addresses. - * %rbp may only be 8 byte aligned if we came in from non-ABI compliant code. - */ -#define SPRDIOFF 576 -#define SPRSIOFF 568 -#define SPRDXOFF 560 -#define SPRCXOFF 552 -#define SPR8OFF 544 -#define SPR9OFF 536 -#define SPR10OFF 528 -#define SPR11OFF 520 -#define SPRAXOFF 512 -#define SPXMM0OFF 448 -#define SPXMM1OFF 384 -#define SPXMM2OFF 320 -#define SPXMM3OFF 256 -#define SPXMM4OFF 192 -#define SPXMM5OFF 128 -#define SPXMM6OFF 64 -#define SPXMM7OFF 0 - - /* See elf_rtbndr for explanation behind org_scapset */ - .extern org_scapset - .globl elf_plt_trace - .type elf_plt_trace,@function - .align 16 -elf_plt_trace: +#define SPDYNOFF -8 +#define SPDESTOFF -16 +#define SPPRVSTKOFF -24 +#define SPLAREGOFF -88 +#define ORIG_RDI -96 +#define ORIG_RSI -104 +#define ORIG_RDX -112 +#define ORIG_RCX -120 +#define ORIG_R8 -128 +#define ORIG_R9 -136 +#define ORIG_R10 -144 +#define ORIG_R11 -152 +#define ORIG_RAX -160 +#define PLT_SAVE_OFF 168 + + ENTRY(elf_plt_trace) /* - * Enforce 64-byte stack alignment here. - * The next andq instruction essentially does this: - * if necessary, subtract N bytes from %rsp to align on 64 bytes. + * Save our static registers. After that 64-byte align us and subtract + * the appropriate amount for the FPU. The frame pointer has already + * been pushed for us by the glue code. */ - andq $-64, %rsp /* enforce 64-byte stack alignment */ - subq $656,%rsp / create some local storage - - movq %rdi, SPRDIOFF(%rsp) - movq %rsi, SPRSIOFF(%rsp) - movq %rdx, SPRDXOFF(%rsp) - movq %rcx, SPRCXOFF(%rsp) - movq %r8, SPR8OFF(%rsp) - movq %r9, SPR9OFF(%rsp) - movq %r10, SPR10OFF(%rsp) - movq %r11, SPR11OFF(%rsp) - movq %rax, SPRAXOFF(%rsp) - - movq org_scapset@GOTPCREL(%rip),%r9 - movq (%r9),%r9 /* Syscapset_t pointer */ - movl 8(%r9),%edx /* sc_hw_2 */ - testl $AV_386_2_AVX512F,%edx - jne .trace_save_zmm - movl (%r9),%edx /* sc_hw_1 */ - testl $AV_386_AVX,%edx - jne .trace_save_ymm - -.trace_save_xmm: - movdqa %xmm0, SPXMM0OFF(%rsp) - movdqa %xmm1, SPXMM1OFF(%rsp) - movdqa %xmm2, SPXMM2OFF(%rsp) - movdqa %xmm3, SPXMM3OFF(%rsp) - movdqa %xmm4, SPXMM4OFF(%rsp) - movdqa %xmm5, SPXMM5OFF(%rsp) - movdqa %xmm6, SPXMM6OFF(%rsp) - movdqa %xmm7, SPXMM7OFF(%rsp) - jmp .trace_save_finish - -.trace_save_ymm: - vmovdqa %ymm0, SPXMM0OFF(%rsp) - vmovdqa %ymm1, SPXMM1OFF(%rsp) - vmovdqa %ymm2, SPXMM2OFF(%rsp) - vmovdqa %ymm3, SPXMM3OFF(%rsp) - vmovdqa %ymm4, SPXMM4OFF(%rsp) - vmovdqa %ymm5, SPXMM5OFF(%rsp) - vmovdqa %ymm6, SPXMM6OFF(%rsp) - vmovdqa %ymm7, SPXMM7OFF(%rsp) - jmp .trace_save_finish - -.trace_save_zmm: - vmovdqa64 %zmm0, SPXMM0OFF(%rsp) - vmovdqa64 %zmm1, SPXMM1OFF(%rsp) - vmovdqa64 %zmm2, SPXMM2OFF(%rsp) - vmovdqa64 %zmm3, SPXMM3OFF(%rsp) - vmovdqa64 %zmm4, SPXMM4OFF(%rsp) - vmovdqa64 %zmm5, SPXMM5OFF(%rsp) - vmovdqa64 %zmm6, SPXMM6OFF(%rsp) - vmovdqa64 %zmm7, SPXMM7OFF(%rsp) - -.trace_save_finish: + movq %rdi, ORIG_RDI(%rbp) + movq %rsi, ORIG_RSI(%rbp) + movq %rdx, ORIG_RDX(%rbp) + movq %rcx, ORIG_RCX(%rbp) + movq %r8, ORIG_R8(%rbp) + movq %r9, ORIG_R9(%rbp) + movq %r10, ORIG_R10(%rbp) + movq %r11, ORIG_R11(%rbp) + movq %rax, ORIG_RAX(%rbp) + + subq $PLT_SAVE_OFF, %rsp + + movq _plt_save_size@GOTPCREL(%rip),%r9 + movq _plt_fp_save@GOTPCREL(%rip),%r10 + subq (%r9), %rsp + andq $-64, %rsp + movq %rsp, %rdi + call *(%r10) + /* + * Now that we've saved all of our registers, figure out what we need to + * do next. + */ movq SPDYNOFF(%rbp), %rax / %rax = dyndata testb $LA_SYMB_NOPLTENTER, SBFLAGS_OFF(%rax) / <link.h> je .start_pltenter @@ -262,17 +396,17 @@ elf_plt_trace: movq %rdi, 0(%rsi) / la_rsp movq 0(%rbp), %rdi movq %rdi, 8(%rsi) / la_rbp - movq SPRDIOFF(%rsp), %rdi + movq ORIG_RDI(%rbp), %rdi movq %rdi, 16(%rsi) / la_rdi - movq SPRSIOFF(%rsp), %rdi + movq ORIG_RSI(%rbp), %rdi movq %rdi, 24(%rsi) / la_rsi - movq SPRDXOFF(%rsp), %rdi + movq ORIG_RDX(%rbp), %rdi movq %rdi, 32(%rsi) / la_rdx - movq SPRCXOFF(%rsp), %rdi + movq ORIG_RCX(%rbp), %rdi movq %rdi, 40(%rsi) / la_rcx - movq SPR8OFF(%rsp), %rdi + movq ORIG_R8(%rbp), %rdi movq %rdi, 48(%rsi) / la_r8 - movq SPR9OFF(%rsp), %rdi + movq ORIG_R9(%rbp), %rdi movq %rdi, 56(%rsi) / la_r9 /* @@ -316,60 +450,21 @@ elf_plt_trace: movq SPDESTOFF(%rbp), %r11 / r11 == calling destination movq %r11, 0(%rbp) / store destination at top - / - / Restore registers - / - movq org_scapset@GOTPCREL(%rip),%r9 - movq (%r9),%r9 /* Syscapset_t pointer */ - movl 8(%r9),%edx /* sc_hw_2 */ - testl $AV_386_2_AVX512F,%edx - jne .trace_restore_zmm - movl (%r9),%edx /* sc_hw_1 */ - testl $AV_386_AVX,%edx - jne .trace_restore_ymm - -.trace_restore_xmm: - movdqa SPXMM0OFF(%rsp), %xmm0 - movdqa SPXMM1OFF(%rsp), %xmm1 - movdqa SPXMM2OFF(%rsp), %xmm2 - movdqa SPXMM3OFF(%rsp), %xmm3 - movdqa SPXMM4OFF(%rsp), %xmm4 - movdqa SPXMM5OFF(%rsp), %xmm5 - movdqa SPXMM6OFF(%rsp), %xmm6 - movdqa SPXMM7OFF(%rsp), %xmm7 - jmp .trace_restore_finish - -.trace_restore_ymm: - vmovdqa SPXMM0OFF(%rsp), %ymm0 - vmovdqa SPXMM1OFF(%rsp), %ymm1 - vmovdqa SPXMM2OFF(%rsp), %ymm2 - vmovdqa SPXMM3OFF(%rsp), %ymm3 - vmovdqa SPXMM4OFF(%rsp), %ymm4 - vmovdqa SPXMM5OFF(%rsp), %ymm5 - vmovdqa SPXMM6OFF(%rsp), %ymm6 - vmovdqa SPXMM7OFF(%rsp), %ymm7 - jmp .trace_restore_finish - -.trace_restore_zmm: - vmovdqa64 SPXMM0OFF(%rsp), %zmm0 - vmovdqa64 SPXMM1OFF(%rsp), %zmm1 - vmovdqa64 SPXMM2OFF(%rsp), %zmm2 - vmovdqa64 SPXMM3OFF(%rsp), %zmm3 - vmovdqa64 SPXMM4OFF(%rsp), %zmm4 - vmovdqa64 SPXMM5OFF(%rsp), %zmm5 - vmovdqa64 SPXMM6OFF(%rsp), %zmm6 - vmovdqa64 SPXMM7OFF(%rsp), %zmm7 - -.trace_restore_finish: - movq SPRDIOFF(%rsp), %rdi - movq SPRSIOFF(%rsp), %rsi - movq SPRDXOFF(%rsp), %rdx - movq SPRCXOFF(%rsp), %rcx - movq SPR8OFF(%rsp), %r8 - movq SPR9OFF(%rsp), %r9 - movq SPR10OFF(%rsp), %r10 - movq SPR11OFF(%rsp), %r11 - movq SPRAXOFF(%rsp), %rax + /* Restore FPU */ + movq _plt_fp_restore@GOTPCREL(%rip),%r10 + + movq %rsp, %rdi + call *(%r10) + + movq ORIG_RDI(%rbp), %rdi + movq ORIG_RSI(%rbp), %rsi + movq ORIG_RDX(%rbp), %rdx + movq ORIG_RCX(%rbp), %rcx + movq ORIG_R8(%rbp), %r8 + movq ORIG_R9(%rbp), %r9 + movq ORIG_R10(%rbp), %r10 + movq ORIG_R11(%rbp), %r11 + movq ORIG_RAX(%rbp), %rax subq $8, %rbp / adjust %rbp for 'ret' movq %rbp, %rsp / @@ -417,13 +512,10 @@ elf_plt_trace: /* * Grow the stack and duplicate the arguements of the * original caller. - * - * We save %rsp in %r11 since we need to use the current rsp for - * accessing the registers saved in our stack frame. */ .grow_stack: movq %rsp, %r11 - subq %rdx, %rsp / grow the stack + subq %rdx, %rsp / grow the stack movq %rdx, SPPRVSTKOFF(%rbp) / -88(%rbp) == prev frame sz movq %rsp, %rcx / %rcx = dest addq %rcx, %rdx / %rdx == tail of dest @@ -445,59 +537,20 @@ elf_plt_trace: / Restore registers using %r11 which contains our old %rsp value / before growing the stack. / - - / Yes, we have to do this dance again. Sorry. - movq org_scapset@GOTPCREL(%rip),%r9 - movq (%r9),%r9 /* Syscapset_t pointer */ - movl 8(%r9),%edx /* sc_hw_2 */ - testl $AV_386_2_AVX512F,%edx - jne .trace_r2_zmm - movl (%r9),%edx /* sc_hw_1 */ - testl $AV_386_AVX,%edx - jne .trace_r2_ymm - -.trace_r2_xmm: - movdqa SPXMM0OFF(%r11), %xmm0 - movdqa SPXMM1OFF(%r11), %xmm1 - movdqa SPXMM2OFF(%r11), %xmm2 - movdqa SPXMM3OFF(%r11), %xmm3 - movdqa SPXMM4OFF(%r11), %xmm4 - movdqa SPXMM5OFF(%r11), %xmm5 - movdqa SPXMM6OFF(%r11), %xmm6 - movdqa SPXMM7OFF(%r11), %xmm7 - jmp .trace_r2_finish - -.trace_r2_ymm: - vmovdqa SPXMM0OFF(%r11), %ymm0 - vmovdqa SPXMM1OFF(%r11), %ymm1 - vmovdqa SPXMM2OFF(%r11), %ymm2 - vmovdqa SPXMM3OFF(%r11), %ymm3 - vmovdqa SPXMM4OFF(%r11), %ymm4 - vmovdqa SPXMM5OFF(%r11), %ymm5 - vmovdqa SPXMM6OFF(%r11), %ymm6 - vmovdqa SPXMM7OFF(%r11), %ymm7 - jmp .trace_r2_finish - -.trace_r2_zmm: - vmovdqa64 SPXMM0OFF(%r11), %zmm0 - vmovdqa64 SPXMM1OFF(%r11), %zmm1 - vmovdqa64 SPXMM2OFF(%r11), %zmm2 - vmovdqa64 SPXMM3OFF(%r11), %zmm3 - vmovdqa64 SPXMM4OFF(%r11), %zmm4 - vmovdqa64 SPXMM5OFF(%r11), %zmm5 - vmovdqa64 SPXMM6OFF(%r11), %zmm6 - vmovdqa64 SPXMM7OFF(%r11), %zmm7 + movq _plt_fp_restore@GOTPCREL(%rip),%r10 + movq %r11, %rdi + call *(%r10) .trace_r2_finish: - movq SPRDIOFF(%r11), %rdi - movq SPRSIOFF(%r11), %rsi - movq SPRDXOFF(%r11), %rdx - movq SPRCXOFF(%r11), %rcx - movq SPR8OFF(%r11), %r8 - movq SPR9OFF(%r11), %r9 - movq SPR10OFF(%r11), %r10 - movq SPRAXOFF(%r11), %rax - movq SPR11OFF(%r11), %r11 / retore %r11 last + movq ORIG_RDI(%rbp), %rdi + movq ORIG_RSI(%rbp), %rsi + movq ORIG_RDX(%rbp), %rdx + movq ORIG_RCX(%rbp), %rcx + movq ORIG_R8(%rbp), %r8 + movq ORIG_R9(%rbp), %r9 + movq ORIG_R10(%rbp), %r10 + movq ORIG_RAX(%rbp), %rax + movq ORIG_R11(%rbp), %r11 /* * Call to desitnation function - we'll return here @@ -517,72 +570,32 @@ elf_plt_trace: movq REFLMP_OFF(%r11), %rsi / arg2 (rlmp) movq %rax, %rdi / arg1 (returnval) call audit_pltexit@PLT - + /* * Clean up after ourselves and return to the - * original calling procedure. + * original calling procedure. Make sure to restore + * registers. */ - / - / Restore registers - / - - movq org_scapset@GOTPCREL(%rip),%r9 - movq (%r9),%r9 /* Syscapset_t pointer */ - movl 8(%r9),%edx /* sc_hw_2 */ - testl $AV_386_2_AVX512F,%edx - jne .trace_r3_zmm - movl (%r9),%edx /* sc_hw_1 */ - testl $AV_386_AVX,%edx - jne .trace_r3_ymm - -.trace_r3_xmm: - movdqa SPXMM0OFF(%rsp), %xmm0 - movdqa SPXMM1OFF(%rsp), %xmm1 - movdqa SPXMM2OFF(%rsp), %xmm2 - movdqa SPXMM3OFF(%rsp), %xmm3 - movdqa SPXMM4OFF(%rsp), %xmm4 - movdqa SPXMM5OFF(%rsp), %xmm5 - movdqa SPXMM6OFF(%rsp), %xmm6 - movdqa SPXMM7OFF(%rsp), %xmm7 - jmp .trace_r3_finish - -.trace_r3_ymm: - vmovdqa SPXMM0OFF(%rsp), %ymm0 - vmovdqa SPXMM1OFF(%rsp), %ymm1 - vmovdqa SPXMM2OFF(%rsp), %ymm2 - vmovdqa SPXMM3OFF(%rsp), %ymm3 - vmovdqa SPXMM4OFF(%rsp), %ymm4 - vmovdqa SPXMM5OFF(%rsp), %ymm5 - vmovdqa SPXMM6OFF(%rsp), %ymm6 - vmovdqa SPXMM7OFF(%rsp), %ymm7 - jmp .trace_r3_finish - -.trace_r3_zmm: - vmovdqa64 SPXMM0OFF(%rsp), %zmm0 - vmovdqa64 SPXMM1OFF(%rsp), %zmm1 - vmovdqa64 SPXMM2OFF(%rsp), %zmm2 - vmovdqa64 SPXMM3OFF(%rsp), %zmm3 - vmovdqa64 SPXMM4OFF(%rsp), %zmm4 - vmovdqa64 SPXMM5OFF(%rsp), %zmm5 - vmovdqa64 SPXMM6OFF(%rsp), %zmm6 - vmovdqa64 SPXMM7OFF(%rsp), %zmm7 - -.trace_r3_finish: - movq SPRDIOFF(%rsp), %rdi - movq SPRSIOFF(%rsp), %rsi - movq SPRDXOFF(%rsp), %rdx - movq SPRCXOFF(%rsp), %rcx - movq SPR8OFF(%rsp), %r8 - movq SPR9OFF(%rsp), %r9 - movq SPR10OFF(%rsp), %r10 - movq SPR11OFF(%rsp), %r11 - // rax already contains return value + movq _plt_fp_restore@GOTPCREL(%rip),%r10 + movq %rsp, %rdi + movq %rax, SPPRVSTKOFF(%rbp) + call *(%r10) + + movq ORIG_RDI(%rbp), %rdi + movq ORIG_RSI(%rbp), %rsi + movq ORIG_RDX(%rbp), %rdx + movq ORIG_RCX(%rbp), %rcx + movq ORIG_R8(%rbp), %r8 + movq ORIG_R9(%rbp), %r9 + movq ORIG_R10(%rbp), %r10 + movq ORIG_R11(%rbp), %r11 + movq SPPRVSTKOFF(%rbp), %rax movq %rbp, %rsp / popq %rbp / ret / return to caller - .size elf_plt_trace, .-elf_plt_trace + SET_SIZE(elf_plt_trace) #endif /* @@ -640,227 +653,91 @@ elf_rtbndr(Rt_map * lmp, unsigned long reloc, caddr_t pc) #define LBRPCOFF 24 /* arg3 - pc of caller */ /* - * Possible arguments for the resolved function are in registers as per - * the AMD64 ABI. We must save on the local stack all possible register - * arguments before interposing functions to resolve the called function. - * Possible arguments must be restored before invoking the resolved function. - * - * Before the AVX and AVX512 instruction set enhancements to AMD64, there were - * no changes in the set of registers and their sizes across different - * processors. With AVX, the xmm registers became the lower 128 bits of the ymm - * registers. With AVX512, the ymm registers became the lower 256 bits of the - * zmm registers. Because of this, we need to conditionally save either 256 bits - * or 512 bits, instead of 128 bits. Regardless of whether we have zmm registers - * or not, we're always going to push the stack space assuming that we do, to - * simplify the code. - * - * Local stack space storage for elf_rtbndr is allocated as follows: - * - * Saved regs: - * %rax 8 - * %rdi 8 - * %rsi 8 - * %rdx 8 - * %rcx 8 - * %r8 8 - * %r9 8 - * %r10 8 - * ======= - * Subtotal: 64 (64byte aligned) + * With the above in place, we must now proceed to preserve all temporary + * registers that are also used for passing arguments. Specifically this + * means: * - * Saved Media Regs (used to pass floating point args): - * %zmm0 - %zmm7 64 * 8 512 - * ======= - * Total: 576 (64byte aligned) - * - * So - will subtract the following to create enough space + * %rax - Used for information about the number of vector arguments + * %rdi - arg0 + * %rsi - arg1 + * %rdx - arg2 + * %rcx - arg3 + * %r8 - arg4 + * %r9 - arg5 + * %r10 - static chain pointer * - * 0(%rsp) save %rax - * 8(%rsp) save %rdi - * 16(%rsp) save %rsi - * 24(%rsp) save %rdx - * 32(%rsp) save %rcx - * 40(%rsp) save %r8 - * 48(%rsp) save %r9 - * 56(%rsp) save %r10 - * 64(%rsp) save %zmm0 - * 128(%rsp) save %zmm1 - * 192(%rsp) save %zmm2 - * 256(%rsp) save %zmm3 - * 320(%rsp) save %zmm4 - * 384(%rsp) save %zmm5 - * 448(%rsp) save %zmm6 - * 512(%rsp) save %zmm7 - * - * Note: Some callers may use 8-byte stack alignment. We use %rsp offsets to - * save/restore registers because %rbp may not be 64-byte aligned. We guarantee - * %rsp is 64-byte aligned in the function preamble. + * While we don't have to preserve %r11, we do have to preserve the FPU + * registers. The FPU logic is delegated to a specific function that we'll call. + * However, it requires that its stack is 64-byte aligned. We defer the + * alignment to that point. This will also take care of the fact that a caller + * may not call us with a correctly aligned stack pointer per the amd64 ABI. */ -/* - * As the registers may either be xmm, ymm or zmm, we've left the name as xmm, - * but increased the offset between them to always cover the zmm case. - */ -#define LS_SIZE $576 /* local stack space to save all possible arguments */ -#define LSRAXOFF 0 /* for SSE register count */ -#define LSRDIOFF 8 /* arg 0 ... */ -#define LSRSIOFF 16 -#define LSRDXOFF 24 -#define LSRCXOFF 32 -#define LSR8OFF 40 -#define LSR9OFF 48 -#define LSR10OFF 56 /* ... arg 5 */ -#define LSXMM0OFF 64 /* SSE arg 0 ... */ -#define LSXMM1OFF 128 -#define LSXMM2OFF 192 -#define LSXMM3OFF 256 -#define LSXMM4OFF 320 -#define LSXMM5OFF 384 -#define LSXMM6OFF 448 -#define LSXMM7OFF 512 /* ... SSE arg 7 */ - /* - * The org_scapset is a global variable that is a part of rtld. It - * contains the capabilities that the kernel has told us are supported - * (auxv_hwcap). This is necessary for determining whether or not we - * need to save and restore AVX registers or simple SSE registers. Note, - * that the field we care about is currently at offset 0, if that - * changes, this code will have to be updated. - */ - .extern org_scapset + .extern _plt_save_size + .extern _plt_fp_save + .extern plt_fp_restore + .weak _elf_rtbndr _elf_rtbndr = elf_rtbndr ENTRY(elf_rtbndr) - - pushq %rbp + pushq %rbp /* Establish stack frame */ movq %rsp, %rbp /* - * Some libraries may (incorrectly) use non-ABI compliant 8-byte stack - * alignment. Enforce 64-byte stack alignment here. - * The next andq instruction essentially does this: - * if necessary, subtract N bytes from %rsp to align on 64 bytes. + * Save basic regs. */ - andq $-64, %rsp /* enforce 64-byte stack alignment */ - - subq LS_SIZE, %rsp /* save all ABI defined argument registers */ - - movq %rax, LSRAXOFF(%rsp) /* for SSE register count */ - movq %rdi, LSRDIOFF(%rsp) /* arg 0 .. */ - movq %rsi, LSRSIOFF(%rsp) - movq %rdx, LSRDXOFF(%rsp) - movq %rcx, LSRCXOFF(%rsp) - movq %r8, LSR8OFF(%rsp) - movq %r9, LSR9OFF(%rsp) /* .. arg 5 */ - movq %r10, LSR10OFF(%rsp) /* call chain reg */ + pushq %rax + pushq %rdi + pushq %rsi + pushq %rdx + pushq %rcx + pushq %r8 + pushq %r9 + pushq %r10 + pushq %r12 /* - * Our xmm registers could secretly be ymm or zmm registers in disguise. + * Save the amount of space we need for the FPU registers and call that + * function. Save %rsp before we manipulate it to make restore easier. */ - movq org_scapset@GOTPCREL(%rip),%r9 - movq (%r9),%r9 /* Syscapset_t pointer */ - movl 8(%r9),%edx /* sc_hw_2 */ - testl $AV_386_2_AVX512F,%edx - jne .save_zmm - movl (%r9),%edx /* sc_hw_1 */ - testl $AV_386_AVX,%edx - jne .save_ymm - -.save_xmm: - movdqa %xmm0, LSXMM0OFF(%rsp) /* SSE arg 0 ... */ - movdqa %xmm1, LSXMM1OFF(%rsp) - movdqa %xmm2, LSXMM2OFF(%rsp) - movdqa %xmm3, LSXMM3OFF(%rsp) - movdqa %xmm4, LSXMM4OFF(%rsp) - movdqa %xmm5, LSXMM5OFF(%rsp) - movdqa %xmm6, LSXMM6OFF(%rsp) - movdqa %xmm7, LSXMM7OFF(%rsp) /* ... SSE arg 7 */ - jmp .save_finish + movq %rsp, %r12 + movq _plt_save_size@GOTPCREL(%rip),%r9 + movq _plt_fp_save@GOTPCREL(%rip),%r10 + subq (%r9), %rsp + andq $-64, %rsp -.save_ymm: - vmovdqa %ymm0, LSXMM0OFF(%rsp) /* SSE arg 0 ... */ - vmovdqa %ymm1, LSXMM1OFF(%rsp) - vmovdqa %ymm2, LSXMM2OFF(%rsp) - vmovdqa %ymm3, LSXMM3OFF(%rsp) - vmovdqa %ymm4, LSXMM4OFF(%rsp) - vmovdqa %ymm5, LSXMM5OFF(%rsp) - vmovdqa %ymm6, LSXMM6OFF(%rsp) - vmovdqa %ymm7, LSXMM7OFF(%rsp) /* ... SSE arg 7 */ - jmp .save_finish + movq %rsp, %rdi + call *(%r10) -.save_zmm: - vmovdqa64 %zmm0, LSXMM0OFF(%rsp) /* SSE arg 0 ... */ - vmovdqa64 %zmm1, LSXMM1OFF(%rsp) - vmovdqa64 %zmm2, LSXMM2OFF(%rsp) - vmovdqa64 %zmm3, LSXMM3OFF(%rsp) - vmovdqa64 %zmm4, LSXMM4OFF(%rsp) - vmovdqa64 %zmm5, LSXMM5OFF(%rsp) - vmovdqa64 %zmm6, LSXMM6OFF(%rsp) - vmovdqa64 %zmm7, LSXMM7OFF(%rsp) /* ... SSE arg 7 */ - -.save_finish: + /* + * Perform actual PLT logic. Note that the plt related arguments are + * located at an offset relative to %rbp. + */ movq LBPLMPOFF(%rbp), %rdi /* arg1 - *lmp */ movq LBPRELOCOFF(%rbp), %rsi /* arg2 - reloc index */ movq LBRPCOFF(%rbp), %rdx /* arg3 - pc of caller */ call elf_bndr@PLT /* call elf_rtbndr(lmp, relndx, pc) */ movq %rax, LBPRELOCOFF(%rbp) /* store final destination */ - /* - * Restore possible arguments before invoking resolved function. We - * check the xmm, ymm, vs. zmm regs first so we can use the others. - */ - movq org_scapset@GOTPCREL(%rip),%r9 - movq (%r9),%r9 /* Syscapset_t pointer */ - movl 8(%r9),%edx /* sc_hw_2 */ - testl $AV_386_2_AVX512F,%edx - jne .restore_zmm - movl (%r9),%edx /* sc_hw_1 */ - testl $AV_386_AVX,%edx - jne .restore_ymm + /* Restore FPU */ + movq _plt_fp_restore@GOTPCREL(%rip),%r10 -.restore_xmm: - movdqa LSXMM0OFF(%rsp), %xmm0 - movdqa LSXMM1OFF(%rsp), %xmm1 - movdqa LSXMM2OFF(%rsp), %xmm2 - movdqa LSXMM3OFF(%rsp), %xmm3 - movdqa LSXMM4OFF(%rsp), %xmm4 - movdqa LSXMM5OFF(%rsp), %xmm5 - movdqa LSXMM6OFF(%rsp), %xmm6 - movdqa LSXMM7OFF(%rsp), %xmm7 - jmp .restore_finish + movq %rsp, %rdi + call *(%r10) -.restore_ymm: - vmovdqa LSXMM0OFF(%rsp), %ymm0 - vmovdqa LSXMM1OFF(%rsp), %ymm1 - vmovdqa LSXMM2OFF(%rsp), %ymm2 - vmovdqa LSXMM3OFF(%rsp), %ymm3 - vmovdqa LSXMM4OFF(%rsp), %ymm4 - vmovdqa LSXMM5OFF(%rsp), %ymm5 - vmovdqa LSXMM6OFF(%rsp), %ymm6 - vmovdqa LSXMM7OFF(%rsp), %ymm7 - jmp .restore_finish + movq %r12, %rsp + popq %r12 + popq %r10 + popq %r9 + popq %r8 + popq %rcx + popq %rdx + popq %rsi + popq %rdi + popq %rax -.restore_zmm: - vmovdqa64 LSXMM0OFF(%rsp), %zmm0 - vmovdqa64 LSXMM1OFF(%rsp), %zmm1 - vmovdqa64 LSXMM2OFF(%rsp), %zmm2 - vmovdqa64 LSXMM3OFF(%rsp), %zmm3 - vmovdqa64 LSXMM4OFF(%rsp), %zmm4 - vmovdqa64 LSXMM5OFF(%rsp), %zmm5 - vmovdqa64 LSXMM6OFF(%rsp), %zmm6 - vmovdqa64 LSXMM7OFF(%rsp), %zmm7 - -.restore_finish: - movq LSRAXOFF(%rsp), %rax - movq LSRDIOFF(%rsp), %rdi - movq LSRSIOFF(%rsp), %rsi - movq LSRDXOFF(%rsp), %rdx - movq LSRCXOFF(%rsp), %rcx - movq LSR8OFF(%rsp), %r8 - movq LSR9OFF(%rsp), %r9 - movq LSR10OFF(%rsp), %r10 - - movq %rbp, %rsp + movq %rbp, %rsp /* Restore our stack frame */ popq %rbp addq $8, %rsp /* pop 1st plt-pushed args */ @@ -869,5 +746,6 @@ elf_rtbndr(Rt_map * lmp, unsigned long reloc, caddr_t pc) /* final destination */ ret /* invoke resolved function */ - .size elf_rtbndr, .-elf_rtbndr + + SET_SIZE(elf_rtbndr) #endif diff --git a/usr/src/uts/common/exec/elf/elf.c b/usr/src/uts/common/exec/elf/elf.c index 5c8d03ae11..141baa4aeb 100644 --- a/usr/src/uts/common/exec/elf/elf.c +++ b/usr/src/uts/common/exec/elf/elf.c @@ -26,7 +26,7 @@ /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ /* All Rights Reserved */ /* - * Copyright 2016 Joyent, Inc. + * Copyright (c) 2018, Joyent, Inc. */ #include <sys/types.h> @@ -70,6 +70,7 @@ #if defined(__x86) #include <sys/comm_page_util.h> +#include <sys/fp.h> #endif /* defined(__x86) */ @@ -289,8 +290,8 @@ elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap, int brand_action) { caddr_t phdrbase = NULL; - caddr_t bssbase = 0; - caddr_t brkbase = 0; + caddr_t bssbase = 0; + caddr_t brkbase = 0; size_t brksize = 0; ssize_t dlnsize; aux_entry_t *aux; @@ -522,9 +523,12 @@ elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap, * On supported kernels (x86_64) make room in the auxv for the * AT_SUN_COMMPAGE entry. This will go unpopulated on i86xpv systems * which do not provide such functionality. + * + * Additionally cover the floating point information AT_SUN_FPSIZE and + * AT_SUN_FPTYPE. */ #if defined(__amd64) - args->auxsize += sizeof (aux_entry_t); + args->auxsize += 3 * sizeof (aux_entry_t); #endif /* defined(__amd64) */ if ((brand_action != EBA_NATIVE) && (PROC_IS_BRANDED(p))) { @@ -833,6 +837,8 @@ elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap, if (hasauxv) { int auxf = AF_SUN_HWCAPVERIFY; + size_t fpsize; + int fptype; /* * Note: AT_SUN_PLATFORM and AT_SUN_EXECNAME were filled in via @@ -910,7 +916,8 @@ elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap, } /* - * Add the comm page auxv entry, mapping it in if needed. + * Add the comm page auxv entry, mapping it in if needed. Also + * take care of the FPU entries. */ #if defined(__amd64) if (args->commpage != NULL || @@ -923,6 +930,16 @@ elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap, */ ADDAUX(aux, AT_NULL, 0) } + + fptype = AT_386_FPINFO_NONE; + fpu_auxv_info(&fptype, &fpsize); + if (fptype != AT_386_FPINFO_NONE) { + ADDAUX(aux, AT_SUN_FPTYPE, fptype) + ADDAUX(aux, AT_SUN_FPSIZE, fpsize) + } else { + ADDAUX(aux, AT_NULL, 0) + ADDAUX(aux, AT_NULL, 0) + } #endif /* defined(__amd64) */ ADDAUX(aux, AT_NULL, 0) diff --git a/usr/src/uts/common/sys/auxv.h b/usr/src/uts/common/sys/auxv.h index 5212a2efb6..1fb5011970 100644 --- a/usr/src/uts/common/sys/auxv.h +++ b/usr/src/uts/common/sys/auxv.h @@ -29,7 +29,7 @@ * Use is subject to license terms. */ /* - * Copyright 2016 Joyent, Inc. + * Copyright (c) 2018, Joyent, Inc. */ #ifndef _SYS_AUXV_H @@ -193,8 +193,11 @@ extern uint_t getisax(uint32_t *, uint_t); #define AT_SUN_COMMPAGE 2026 /* - * Note that 2023 is reserved for the AT_SUN_HWCAP2 word defined above. + * These two AUX vectors are generally used to define information about the FPU. + * These values are currently only used on the x86 platform. */ +#define AT_SUN_FPTYPE 2027 +#define AT_SUN_FPSIZE 2028 /* * The kernel is in a better position to determine whether a process needs to diff --git a/usr/src/uts/common/sys/auxv_386.h b/usr/src/uts/common/sys/auxv_386.h index d94eb18351..dbc6116c25 100644 --- a/usr/src/uts/common/sys/auxv_386.h +++ b/usr/src/uts/common/sys/auxv_386.h @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2017, Joyent, Inc. + * Copyright (c) 2018, Joyent, Inc. */ #ifndef _SYS_AUXV_386_H @@ -113,6 +113,19 @@ extern "C" { "\015avx512er\014avx512pf\013avx512ifma\012avx512dq\011avx512f" \ "\010rdseed\07adx\06avx2\05fma\04bmi2\03bmi1\02rdrand\01f16c" +/* + * Flags used in AT_SUN_FPTYPE on x86. + * + * We don't currently support xsavec in illumos. However, when we do, then we + * should add this to the type list and extend our primary consumer (rtld) to + * use it. xsaveopt is not in this list because it is not appropriate for the + * stack based storage. + */ +#define AT_386_FPINFO_NONE 0 +#define AT_386_FPINFO_FXSAVE 1 +#define AT_386_FPINFO_XSAVE 2 +#define AT_386_FPINFO_XSAVE_AMD 3 + #ifdef __cplusplus } #endif diff --git a/usr/src/uts/common/sys/user.h b/usr/src/uts/common/sys/user.h index 7ae8bdfa4b..0b997c518c 100644 --- a/usr/src/uts/common/sys/user.h +++ b/usr/src/uts/common/sys/user.h @@ -26,7 +26,7 @@ /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ /* All Rights Reserved */ /* - * Copyright 2016 Joyent, Inc. + * Copyright (c) 2018, Joyent, Inc. */ @@ -187,7 +187,7 @@ typedef struct { /* kernel syscall set type */ #if defined(__sparc) #define __KERN_NAUXV_IMPL 20 #elif defined(__i386) || defined(__amd64) -#define __KERN_NAUXV_IMPL 22 +#define __KERN_NAUXV_IMPL 25 #endif struct execsw; diff --git a/usr/src/uts/i86pc/os/fpu_subr.c b/usr/src/uts/i86pc/os/fpu_subr.c index 5ff8fdb655..3e027269fb 100644 --- a/usr/src/uts/i86pc/os/fpu_subr.c +++ b/usr/src/uts/i86pc/os/fpu_subr.c @@ -21,7 +21,7 @@ /* * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright 2017 Joyent, Inc. + * Copyright (c) 2018, Joyent, Inc. */ /* @@ -35,6 +35,7 @@ #include <sys/archsystm.h> #include <sys/fp.h> #include <sys/cmn_err.h> +#include <sys/exec.h> #define XMM_ALIGN 16 @@ -61,13 +62,15 @@ int fpu_exists = 1; int fp_kind = FP_387; /* + * The kind of FPU we advertise to rtld so it knows what to do on context + * switch. + */ +int fp_elf = AT_386_FPINFO_FXSAVE; + +/* * Mechanism to save FPU state. */ -#if defined(__amd64) int fp_save_mech = FP_FXSAVE; -#elif defined(__i386) -int fp_save_mech = FP_FNSAVE; -#endif /* * The variable fpu_ignored is provided to allow other code to @@ -84,16 +87,6 @@ int use_sse_pagecopy = 0; int use_sse_pagezero = 0; int use_sse_copy = 0; -#if defined(__i386) - -/* - * The variable fpu_pentium_fdivbug is provided to allow other code to - * determine whether the system contains a Pentium with the FDIV problem. - */ -int fpu_pentium_fdivbug = 0; - -#endif - #if defined(__xpv) /* @@ -117,230 +110,142 @@ int fpu_pentium_fdivbug = 0; void fpu_probe(void) { - do { - if (fpu_initial_probe() != 0) - continue; - - if (fpu_exists == 0) { - fpu_ignored = 1; - continue; - } + if (fpu_initial_probe() != 0) + goto nofpu; -#if defined(__i386) - fpu_pentium_fdivbug = fpu_probe_pentium_fdivbug(); - /* - * The test does some real floating point operations. - * Reset it back to previous state. - */ - (void) fpu_initial_probe(); - - if (fpu_pentium_fdivbug != 0) { - fpu_ignored = 1; - continue; - } -#endif + if (fpu_exists == 0) { + fpu_ignored = 1; + goto nofpu; + } #ifndef __xpv - /* - * Check and see if the fpu is present by looking - * at the "extension type" bit. (While this used to - * indicate a 387DX coprocessor in days gone by, - * it's forced on by modern implementations for - * compatibility.) - */ - if ((getcr0() & CR0_ET) == 0) - continue; + /* + * Check and see if the fpu is present by looking + * at the "extension type" bit. (While this used to + * indicate a 387DX coprocessor in days gone by, + * it's forced on by modern implementations for + * compatibility.) + */ + if ((getcr0() & CR0_ET) == 0) + goto nofpu; #endif -#if defined(__amd64) - /* Use the more complex exception clearing code if necessary */ - if (cpuid_need_fp_excp_handling()) - fpsave_ctxt = fpxsave_excp_clr_ctxt; + /* Use the more complex exception clearing code if necessary */ + if (cpuid_need_fp_excp_handling()) + fpsave_ctxt = fpxsave_excp_clr_ctxt; - /* - * SSE and SSE2 are required for the 64-bit ABI. - * - * If they're not present, we can in principal run - * 32-bit userland, though 64-bit processes will be hosed. - * - * (Perhaps we should complain more about this case!) - */ - if (is_x86_feature(x86_featureset, X86FSET_SSE) && - is_x86_feature(x86_featureset, X86FSET_SSE2)) { - fp_kind |= __FP_SSE; - ENABLE_SSE(); - - if (is_x86_feature(x86_featureset, X86FSET_AVX)) { - ASSERT(is_x86_feature(x86_featureset, - X86FSET_XSAVE)); - fp_kind |= __FP_AVX; - } + /* + * SSE and SSE2 are required for the 64-bit ABI. + * + * If they're not present, we can in principal run + * 32-bit userland, though 64-bit processes will be hosed. + * + * (Perhaps we should complain more about this case!) + */ + if (is_x86_feature(x86_featureset, X86FSET_SSE) && + is_x86_feature(x86_featureset, X86FSET_SSE2)) { + fp_kind |= __FP_SSE; + ENABLE_SSE(); + + if (is_x86_feature(x86_featureset, X86FSET_AVX)) { + ASSERT(is_x86_feature(x86_featureset, + X86FSET_XSAVE)); + fp_kind |= __FP_AVX; + } - if (is_x86_feature(x86_featureset, X86FSET_XSAVE)) { - fp_save_mech = FP_XSAVE; - if (is_x86_feature(x86_featureset, - X86FSET_XSAVEOPT)) { - /* - * Use the more complex exception - * clearing code if necessary. - */ - if (cpuid_need_fp_excp_handling()) { - fpsave_ctxt = - xsaveopt_excp_clr_ctxt; - } else { - fpsave_ctxt = xsaveopt_ctxt; - } - xsavep = xsaveopt; + if (is_x86_feature(x86_featureset, X86FSET_XSAVE)) { + fp_save_mech = FP_XSAVE; + fp_elf = AT_386_FPINFO_XSAVE; + if (is_x86_feature(x86_featureset, X86FSET_XSAVEOPT)) { + /* + * Use the more complex exception + * clearing code if necessary. + */ + if (cpuid_need_fp_excp_handling()) { + fpsave_ctxt = xsaveopt_excp_clr_ctxt; + fp_elf = AT_386_FPINFO_XSAVE_AMD; } else { - /* - * Use the more complex exception - * clearing code if necessary. - */ - if (cpuid_need_fp_excp_handling()) { - fpsave_ctxt = - xsave_excp_clr_ctxt; - } else { - fpsave_ctxt = xsave_ctxt; - } + fpsave_ctxt = xsaveopt_ctxt; } - patch_xsave(); - fpsave_cachep = kmem_cache_create("xsave_cache", - cpuid_get_xsave_size(), XSAVE_ALIGN, - NULL, NULL, NULL, NULL, NULL, 0); + xsavep = xsaveopt; } else { - /* fp_save_mech defaults to FP_FXSAVE */ - fpsave_cachep = - kmem_cache_create("fxsave_cache", - sizeof (struct fxsave_state), FXSAVE_ALIGN, - NULL, NULL, NULL, NULL, NULL, 0); - } - } -#elif defined(__i386) - /* - * SSE and SSE2 are both optional, and we patch kernel - * code to exploit it when present. - */ - if (is_x86_feature(x86_featureset, X86FSET_SSE)) { - fp_kind |= __FP_SSE; - ENABLE_SSE(); - fp_save_mech = FP_FXSAVE; - /* - * Use the more complex exception clearing code if - * necessary. - */ - if (cpuid_need_fp_excp_handling()) { - fpsave_ctxt = fpxsave_excp_clr_ctxt; - } else { - fpsave_ctxt = fpxsave_ctxt; - } - - if (is_x86_feature(x86_featureset, X86FSET_SSE2)) { - patch_sse2(); - } - - if (is_x86_feature(x86_featureset, X86FSET_AVX)) { - ASSERT(is_x86_feature(x86_featureset, - X86FSET_XSAVE)); - fp_kind |= __FP_AVX; - } - - if (is_x86_feature(x86_featureset, X86FSET_XSAVE)) { - fp_save_mech = FP_XSAVE; - if (is_x86_feature(x86_featureset, - X86FSET_XSAVEOPT)) { - /* - * Use the more complex exception - * clearing code if necessary. - */ - if (cpuid_need_fp_excp_handling()) { - fpsave_ctxt = - xsaveopt_excp_clr_ctxt; - } else { - fpsave_ctxt = xsaveopt_ctxt; - } - xsavep = xsaveopt; + /* + * Use the more complex exception + * clearing code if necessary. + */ + if (cpuid_need_fp_excp_handling()) { + fpsave_ctxt = xsave_excp_clr_ctxt; + fp_elf = AT_386_FPINFO_XSAVE_AMD; } else { - /* - * Use the more complex exception - * clearing code if necessary. - */ - if (cpuid_need_fp_excp_handling()) { - fpsave_ctxt = - xsave_excp_clr_ctxt; - } else { - fpsave_ctxt = xsave_ctxt; - } + fpsave_ctxt = xsave_ctxt; } - patch_xsave(); - fpsave_cachep = kmem_cache_create("xsave_cache", - cpuid_get_xsave_size(), XSAVE_ALIGN, - NULL, NULL, NULL, NULL, NULL, 0); - } else { - patch_sse(); /* use fxrstor */ - fpsave_cachep = - kmem_cache_create("fxsave_cache", - sizeof (struct fxsave_state), FXSAVE_ALIGN, - NULL, NULL, NULL, NULL, NULL, 0); } + patch_xsave(); + fpsave_cachep = kmem_cache_create("xsave_cache", + cpuid_get_xsave_size(), XSAVE_ALIGN, + NULL, NULL, NULL, NULL, NULL, 0); } else { - remove_x86_feature(x86_featureset, X86FSET_SSE2); - /* - * We will not likely to have a chip with AVX but not - * SSE. But to be safe we disable AVX if SSE is not - * enabled. - */ - remove_x86_feature(x86_featureset, X86FSET_AVX); - /* - * (Just in case the BIOS decided we wanted SSE - * enabled when we didn't. See 4965674.) - */ - DISABLE_SSE(); - - /* - * fp_save_mech defaults to FP_FNSAVE. Use the same - * alignment as used for fxsave (preserves legacy - * behavior). - */ - fpsave_cachep = kmem_cache_create("fnsave_cache", - sizeof (struct fnsave_state), FXSAVE_ALIGN, + /* fp_save_mech defaults to FP_FXSAVE */ + fpsave_cachep = kmem_cache_create("fxsave_cache", + sizeof (struct fxsave_state), FXSAVE_ALIGN, NULL, NULL, NULL, NULL, NULL, 0); + fp_elf = AT_386_FPINFO_FXSAVE; } -#endif - if (is_x86_feature(x86_featureset, X86FSET_SSE2)) { - use_sse_pagecopy = use_sse_pagezero = use_sse_copy = 1; - } + } - if (fp_kind & __FP_SSE) { - struct fxsave_state *fx; - uint8_t fxsave_state[sizeof (struct fxsave_state) + - XMM_ALIGN]; + if (is_x86_feature(x86_featureset, X86FSET_SSE2)) { + use_sse_pagecopy = use_sse_pagezero = use_sse_copy = 1; + } + if (fp_kind & __FP_SSE) { + struct fxsave_state *fx; + uint8_t fxsave_state[sizeof (struct fxsave_state) + XMM_ALIGN]; + + /* + * Extract the mxcsr mask from our first fxsave + */ + fx = (void *)(((uintptr_t)(&fxsave_state[0]) + + XMM_ALIGN) & ~(XMM_ALIGN - 1ul)); + + fx->fx_mxcsr_mask = 0; + fxsave_insn(fx); + if (fx->fx_mxcsr_mask != 0) { /* - * Extract the mxcsr mask from our first fxsave + * Override default mask initialized in fpu.c */ - fx = (void *)(((uintptr_t)(&fxsave_state[0]) + - XMM_ALIGN) & ~(XMM_ALIGN - 1ul)); - - fx->fx_mxcsr_mask = 0; - fxsave_insn(fx); - if (fx->fx_mxcsr_mask != 0) { - /* - * Override default mask initialized in fpu.c - */ - sse_mxcsr_mask = fx->fx_mxcsr_mask; - } + sse_mxcsr_mask = fx->fx_mxcsr_mask; } + } - setcr0(CR0_ENABLE_FPU_FLAGS(getcr0())); - return; - /*CONSTANTCONDITION*/ - } while (0); + setcr0(CR0_ENABLE_FPU_FLAGS(getcr0())); + return; /* * No FPU hardware present */ +nofpu: setcr0(CR0_DISABLE_FPU_FLAGS(getcr0())); DISABLE_SSE(); fp_kind = FP_NO; fpu_exists = 0; } + +/* + * Fill in FPU information that is required by exec. + */ +void +fpu_auxv_info(int *typep, size_t *lenp) +{ + *typep = fp_elf; + switch (fp_save_mech) { + case FP_FXSAVE: + *lenp = sizeof (struct fxsave_state); + break; + case FP_XSAVE: + *lenp = cpuid_get_xsave_size(); + break; + default: + *lenp = 0; + break; + } +} diff --git a/usr/src/uts/intel/ia32/os/fpu.c b/usr/src/uts/intel/ia32/os/fpu.c index 694d0f9feb..c307c97957 100644 --- a/usr/src/uts/intel/ia32/os/fpu.c +++ b/usr/src/uts/intel/ia32/os/fpu.c @@ -20,7 +20,7 @@ */ /* * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright 2017 Joyent, Inc. + * Copyright (c) 2018, Joyent, Inc. */ /* Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. */ @@ -237,7 +237,7 @@ fp_new_lwp(kthread_id_t t, kthread_id_t ct) cfx->fx_mxcsr = fx->fx_mxcsr & ~SSE_MXCSR_EFLAGS; cfx->fx_fcw = fx->fx_fcw; cxs->xs_xstate_bv |= (get_xcr(XFEATURE_ENABLED_MASK) & - XFEATURE_FP_ALL); + XFEATURE_FP_INITIAL); break; default: panic("Invalid fp_save_mech"); @@ -383,8 +383,7 @@ fp_seed(void) * Always initialize a new context and initialize the hardware. */ if (fp_save_mech == FP_XSAVE) { - fp->fpu_xsave_mask = get_xcr(XFEATURE_ENABLED_MASK) & - XFEATURE_FP_ALL; + fp->fpu_xsave_mask = XFEATURE_FP_ALL; } installctx(curthread, fp, diff --git a/usr/src/uts/intel/sys/fp.h b/usr/src/uts/intel/sys/fp.h index e6d482fdd8..7f08f8c8f8 100644 --- a/usr/src/uts/intel/sys/fp.h +++ b/usr/src/uts/intel/sys/fp.h @@ -20,7 +20,7 @@ */ /* * Copyright 2015 Nexenta Systems, Inc. All rights reserved. - * Copyright 2017 Joyent, Inc. + * Copyright (c) 2018, Joyent, Inc. * * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved. */ @@ -140,8 +140,8 @@ extern "C" { /* * masks and flags for SSE/SSE2 MXCSR */ -#define SSE_IE 0x00000001 /* invalid operation */ -#define SSE_DE 0x00000002 /* denormalized operand */ +#define SSE_IE 0x00000001 /* invalid operation */ +#define SSE_DE 0x00000002 /* denormalized operand */ #define SSE_ZE 0x00000004 /* zero divide */ #define SSE_OE 0x00000008 /* overflow */ #define SSE_UE 0x00000010 /* underflow */ @@ -156,7 +156,7 @@ extern "C" { #define SSE_RC 0x00006000 /* rounding control */ #define SSE_RD 0x00002000 /* rounding control: round down */ #define SSE_RU 0x00004000 /* rounding control: round up */ -#define SSE_FZ 0x00008000 /* flush to zero for masked underflow */ +#define SSE_FZ 0x00008000 /* flush to zero for masked underflow */ #define SSE_MXCSR_EFLAGS \ (SSE_IE|SSE_DE|SSE_ZE|SSE_OE|SSE_UE|SSE_PE) /* 0x3f */ @@ -303,6 +303,8 @@ extern void fpu_probe(void); extern uint_t fpu_initial_probe(void); extern int fpu_probe_pentium_fdivbug(void); +extern void fpu_auxv_info(int *, size_t *); + extern void fpnsave_ctxt(void *); extern void fpxsave_ctxt(void *); extern void xsave_ctxt(void *); diff --git a/usr/src/uts/intel/sys/x86_archext.h b/usr/src/uts/intel/sys/x86_archext.h index 33c4cba41f..c90026f6fe 100644 --- a/usr/src/uts/intel/sys/x86_archext.h +++ b/usr/src/uts/intel/sys/x86_archext.h @@ -690,6 +690,17 @@ extern "C" { (XFEATURE_LEGACY_FP | XFEATURE_SSE | XFEATURE_AVX | XFEATURE_MPX | \ XFEATURE_AVX512 | XFEATURE_PKRU) +/* + * Define the set of xfeature flags that should be considered valid in the xsave + * state vector when we initialize an lwp. This is distinct from the full set so + * that all of the processor's normal logic and tracking of the xsave state is + * usable. This should correspond to the state that's been initialized by the + * ABI to hold meaningful values. Adding additional bits here can have serious + * performance implications and cause performance degradations when using the + * FPU vector (xmm) registers. + */ +#define XFEATURE_FP_INITIAL (XFEATURE_LEGACY_FP | XFEATURE_SSE) + #if !defined(_ASM) #if defined(_KERNEL) || defined(_KMEMUSER) |