diff options
-rw-r--r-- | usr/src/lib/commpage/amd64/cp_subr.s | 11 | ||||
-rw-r--r-- | usr/src/lib/commpage/common/cp_main.c | 1 | ||||
-rw-r--r-- | usr/src/lib/commpage/i386/cp_subr.s | 11 | ||||
-rw-r--r-- | usr/src/uts/i86pc/io/vmm/amd/svm_msr.c | 20 | ||||
-rw-r--r-- | usr/src/uts/i86pc/os/cpuid.c | 101 | ||||
-rw-r--r-- | usr/src/uts/i86pc/os/mlsetup.c | 23 | ||||
-rw-r--r-- | usr/src/uts/i86pc/sys/tsc.h | 2 | ||||
-rw-r--r-- | usr/src/uts/intel/sys/x86_archext.h | 3 |
8 files changed, 90 insertions, 82 deletions
diff --git a/usr/src/lib/commpage/amd64/cp_subr.s b/usr/src/lib/commpage/amd64/cp_subr.s index 09b8deaf8d..ebae0ed307 100644 --- a/usr/src/lib/commpage/amd64/cp_subr.s +++ b/usr/src/lib/commpage/amd64/cp_subr.s @@ -11,6 +11,7 @@ /* * Copyright 2019 Joyent, Inc. + * Copyright 2020 Oxide Computer Company */ #include <sys/asm_linkage.h> @@ -110,11 +111,11 @@ movl %edx, %r10d 3: - cmpl $TSC_RDTSC_MFENCE, %esi - jne 4f - mfence - rdtsc - jmp 7f + /* + * TSC_RDTSC_MFENCE was used in the past for AMD chips, but has been + * supplanted by TSC_RDTSC_LFENCE, which works on Intel and AMD (when + * lfence can be confirmed as serializing). + */ 4: cmpl $TSC_RDTSC_LFENCE, %esi diff --git a/usr/src/lib/commpage/common/cp_main.c b/usr/src/lib/commpage/common/cp_main.c index 13a9aea3d7..42cc37d6b8 100644 --- a/usr/src/lib/commpage/common/cp_main.c +++ b/usr/src/lib/commpage/common/cp_main.c @@ -25,7 +25,6 @@ __cp_can_gettime(comm_page_t *cp) { switch (cp->cp_tsc_type) { case TSC_TSCP: - case TSC_RDTSC_MFENCE: case TSC_RDTSC_LFENCE: case TSC_RDTSC_CPUID: return (1); diff --git a/usr/src/lib/commpage/i386/cp_subr.s b/usr/src/lib/commpage/i386/cp_subr.s index d1e07008c4..83b7dcff56 100644 --- a/usr/src/lib/commpage/i386/cp_subr.s +++ b/usr/src/lib/commpage/i386/cp_subr.s @@ -11,6 +11,7 @@ /* * Copyright 2019 Joyent, Inc. + * Copyright 2020 Oxide Computer Company */ #include <sys/asm_linkage.h> @@ -88,11 +89,11 @@ movl CP_TSC_TYPE(%edi), %eax 4: - cmpl $TSC_RDTSC_MFENCE, %eax - jne 5f - mfence - rdtsc - jmp 8f + /* + * TSC_RDTSC_MFENCE was used in the past for AMD chips, but has been + * supplanted by TSC_RDTSC_LFENCE, which works on Intel and AMD (when + * lfence can be confirmed as serializing). + */ 5: cmpl $TSC_RDTSC_LFENCE, %eax diff --git a/usr/src/uts/i86pc/io/vmm/amd/svm_msr.c b/usr/src/uts/i86pc/io/vmm/amd/svm_msr.c index 234631def4..f453692855 100644 --- a/usr/src/uts/i86pc/io/vmm/amd/svm_msr.c +++ b/usr/src/uts/i86pc/io/vmm/amd/svm_msr.c @@ -44,6 +44,8 @@ __FBSDID("$FreeBSD$"); #include <sys/param.h> #include <sys/errno.h> #include <sys/systm.h> +#include <sys/x86_archext.h> +#include <sys/privregs.h> #include <machine/cpufunc.h> #include <machine/specialreg.h> @@ -162,13 +164,17 @@ svm_rdmsr(struct svm_softc *sc, int vcpu, uint_t num, uint64_t *result) case MSR_EXTFEATURES: *result = 0; break; - case MSR_DE_CFG: + case MSR_AMD_DE_CFG: + *result = 0; /* - * MSR_DE_CFG is used for a vast array of AMD errata, spanning - * from family 10h to 17h. In the future, it might make sense - * to more thoroughly emulate its contents. + * Bit 1 of DE_CFG is defined by AMD to control whether the + * lfence instruction is serializing. Practically all CPUs + * supported by bhyve also contain this MSR, making it safe to + * expose unconditionally. */ - *result = 0; + if (is_x86_feature(x86_featureset, X86FSET_LFENCE_SER)) { + *result |= AMD_DE_CFG_LFENCE_DISPATCH; + } break; default: error = EINVAL; @@ -197,8 +203,8 @@ svm_wrmsr(struct svm_softc *sc, int vcpu, uint_t num, uint64_t val) case MSR_SYSCFG: /* Ignore writes */ break; - case MSR_DE_CFG: - /* Ignore writes for now. (See: svm_rdmsr) */ + case MSR_AMD_DE_CFG: + /* Ignore writes */ break; case MSR_AMDK8_IPM: /* diff --git a/usr/src/uts/i86pc/os/cpuid.c b/usr/src/uts/i86pc/os/cpuid.c index 97e5c10aec..8273ea0554 100644 --- a/usr/src/uts/i86pc/os/cpuid.c +++ b/usr/src/uts/i86pc/os/cpuid.c @@ -24,6 +24,7 @@ * Copyright 2013 Nexenta Systems, Inc. All rights reserved. * Copyright 2014 Josef "Jeff" Sipek <jeffpc@josefsipek.net> * Copyright 2020 Joyent, Inc. + * Copyright 2020 Oxide Computer Company */ /* * Copyright (c) 2010, Intel Corporation. @@ -1439,7 +1440,8 @@ static char *x86_feature_names[NUM_X86_FEATURES] = { "taa_no", "ppin", "vaes", - "vpclmulqdq" + "vpclmulqdq", + "lfence_serializing" }; boolean_t @@ -2732,7 +2734,6 @@ cpuid_enable_enhanced_ibrs(void) wrmsr(MSR_IA32_SPEC_CTRL, val); } -#ifndef __xpv /* * Determine whether or not we can use the AMD optimized retpoline * functionality. We use this when we know we're on an AMD system and we can @@ -2741,46 +2742,12 @@ cpuid_enable_enhanced_ibrs(void) static boolean_t cpuid_use_amd_retpoline(struct cpuid_info *cpi) { - uint64_t val; - on_trap_data_t otd; - if (cpi->cpi_vendor != X86_VENDOR_AMD && cpi->cpi_vendor != X86_VENDOR_HYGON) return (B_FALSE); - /* - * We need to determine whether or not lfence is serializing. It always - * is on families 0xf and 0x11. On others, it's controlled by - * MSR_AMD_DE_CFG (MSRC001_1029). If some hypervisor gives us a crazy - * old family, don't try and do anything. - */ - if (cpi->cpi_family < 0xf) - return (B_FALSE); - if (cpi->cpi_family == 0xf || cpi->cpi_family == 0x11) - return (B_TRUE); - - /* - * While it may be tempting to use get_hwenv(), there are no promises - * that a hypervisor will actually declare themselves to be so in a - * friendly way. As such, try to read and set the MSR. If we can then - * read back the value we set (it wasn't just set to zero), then we go - * for it. - */ - if (!on_trap(&otd, OT_DATA_ACCESS)) { - val = rdmsr(MSR_AMD_DE_CFG); - val |= AMD_DE_CFG_LFENCE_DISPATCH; - wrmsr(MSR_AMD_DE_CFG, val); - val = rdmsr(MSR_AMD_DE_CFG); - } else { - val = 0; - } - no_trap(); - - if ((val & AMD_DE_CFG_LFENCE_DISPATCH) != 0) - return (B_TRUE); - return (B_FALSE); + return (is_x86_feature(x86_featureset, X86FSET_LFENCE_SER)); } -#endif /* !__xpv */ /* * Determine how we should mitigate TAA or if we need to. Regardless of TAA, if @@ -3019,10 +2986,8 @@ cpuid_scan_security(cpu_t *cpu, uchar_t *featureset) } else if (is_x86_feature(featureset, X86FSET_IBRS_ALL)) { cpuid_enable_enhanced_ibrs(); v2mit = X86_SPECTREV2_ENHANCED_IBRS; -#ifndef __xpv } else if (cpuid_use_amd_retpoline(cpi)) { v2mit = X86_SPECTREV2_RETPOLINE_AMD; -#endif /* !__xpv */ } else { v2mit = X86_SPECTREV2_RETPOLINE; } @@ -4186,6 +4151,59 @@ cpuid_pass1(cpu_t *cpu, uchar_t *featureset) } /* + * Check (and potentially set) if lfence is serializing. + * This is useful for accurate rdtsc measurements and AMD retpolines. + */ + if ((cpi->cpi_vendor == X86_VENDOR_AMD || + cpi->cpi_vendor == X86_VENDOR_HYGON) && + is_x86_feature(featureset, X86FSET_SSE2)) { + /* + * The AMD white paper Software Techniques For Managing + * Speculation on AMD Processors details circumstances for when + * lfence instructions are serializing. + * + * On family 0xf and 0x11, it is inherently so. On family 0x10 + * and later (excluding 0x11), a bit in the DE_CFG MSR + * determines the lfence behavior. Per that whitepaper, AMD has + * committed to supporting that MSR on all later CPUs. + */ + if (cpi->cpi_family == 0xf || cpi->cpi_family == 0x11) { + add_x86_feature(featureset, X86FSET_LFENCE_SER); + } else if (cpi->cpi_family >= 0x10) { + uint64_t val = 0; + +#if !defined(__xpv) + /* + * Be careful when attempting to enable the bit, and + * verify that it was actually set in case we are + * running in a hypervisor which is less than faithful + * about its emulation of this feature. + */ + on_trap_data_t otd; + if (!on_trap(&otd, OT_DATA_ACCESS)) { + val = rdmsr(MSR_AMD_DE_CFG); + val |= AMD_DE_CFG_LFENCE_DISPATCH; + wrmsr(MSR_AMD_DE_CFG, val); + val = rdmsr(MSR_AMD_DE_CFG); + } + no_trap(); +#endif + + if ((val & AMD_DE_CFG_LFENCE_DISPATCH) != 0) { + add_x86_feature(featureset, X86FSET_LFENCE_SER); + } + } + } else if (cpi->cpi_vendor == X86_VENDOR_Intel && + is_x86_feature(featureset, X86FSET_SSE2)) { + /* + * Documentation and other OSes indicate that lfence is always + * serializing on Intel CPUs. + */ + add_x86_feature(featureset, X86FSET_LFENCE_SER); + } + + + /* * Check the processor leaves that are used for security features. */ cpuid_scan_security(cpu, featureset); @@ -7256,11 +7274,6 @@ patch_tsc_read(int flag) cnt = &_no_rdtsc_end - &_no_rdtsc_start; (void) memcpy((void *)tsc_read, (void *)&_no_rdtsc_start, cnt); break; - case TSC_RDTSC_MFENCE: - cnt = &_tsc_mfence_end - &_tsc_mfence_start; - (void) memcpy((void *)tsc_read, - (void *)&_tsc_mfence_start, cnt); - break; case TSC_RDTSC_LFENCE: cnt = &_tsc_lfence_end - &_tsc_lfence_start; (void) memcpy((void *)tsc_read, diff --git a/usr/src/uts/i86pc/os/mlsetup.c b/usr/src/uts/i86pc/os/mlsetup.c index 9487552564..e1fd081cfa 100644 --- a/usr/src/uts/i86pc/os/mlsetup.c +++ b/usr/src/uts/i86pc/os/mlsetup.c @@ -24,6 +24,7 @@ * Copyright (c) 1993, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2011 by Delphix. All rights reserved. * Copyright 2019 Joyent, Inc. + * Copyright 2020 Oxide Computer Company */ /* * Copyright (c) 2010, Intel Corporation. @@ -265,30 +266,16 @@ mlsetup(struct regs *rp) * time-stamp counter while ensuring no out-of-order execution. * Patch it while the kernel text is still writable. * - * Note: tsc_read is not patched for intel processors whose family - * is >6 and for amd whose family >f (in case they don't support rdtscp - * instruction, unlikely). By default tsc_read will use cpuid for - * serialization in such cases. The following code needs to be - * revisited if intel processors of family >= f retains the - * instruction serialization nature of mfence instruction. - * Note: tsc_read is not patched for x86 processors which do - * not support "mfence". By default tsc_read will use cpuid for - * serialization in such cases. - * * The Xen hypervisor does not correctly report whether rdtscp is * supported or not, so we must assume that it is not. */ if ((get_hwenv() & HW_XEN_HVM) == 0 && - is_x86_feature(x86_featureset, X86FSET_TSCP)) + is_x86_feature(x86_featureset, X86FSET_TSCP)) { patch_tsc_read(TSC_TSCP); - else if (cpuid_getvendor(CPU) == X86_VENDOR_AMD && - cpuid_getfamily(CPU) <= 0xf && - is_x86_feature(x86_featureset, X86FSET_SSE2)) - patch_tsc_read(TSC_RDTSC_MFENCE); - else if (cpuid_getvendor(CPU) == X86_VENDOR_Intel && - cpuid_getfamily(CPU) <= 6 && - is_x86_feature(x86_featureset, X86FSET_SSE2)) + } else if (is_x86_feature(x86_featureset, X86FSET_LFENCE_SER)) { + ASSERT(is_x86_feature(x86_featureset, X86FSET_SSE2)); patch_tsc_read(TSC_RDTSC_LFENCE); + } #endif /* !__xpv */ diff --git a/usr/src/uts/i86pc/sys/tsc.h b/usr/src/uts/i86pc/sys/tsc.h index d4090381c4..82a1557bd0 100644 --- a/usr/src/uts/i86pc/sys/tsc.h +++ b/usr/src/uts/i86pc/sys/tsc.h @@ -21,7 +21,7 @@ */ #define TSC_NONE 0x0 #define TSC_RDTSC_CPUID 0x1 -#define TSC_RDTSC_MFENCE 0x2 +/* formerly TSC_RDTSC_MFENCE 0x2 */ #define TSC_RDTSC_LFENCE 0x3 #define TSC_TSCP 0x4 diff --git a/usr/src/uts/intel/sys/x86_archext.h b/usr/src/uts/intel/sys/x86_archext.h index 689ed9cc76..241ce2820b 100644 --- a/usr/src/uts/intel/sys/x86_archext.h +++ b/usr/src/uts/intel/sys/x86_archext.h @@ -738,6 +738,7 @@ extern "C" { #define X86FSET_PPIN 99 #define X86FSET_VAES 100 #define X86FSET_VPCLMULQDQ 101 +#define X86FSET_LFENCE_SER 102 /* * Intel Deep C-State invariant TSC in leaf 0x80000007. @@ -1138,7 +1139,7 @@ extern "C" { #if defined(_KERNEL) || defined(_KMEMUSER) -#define NUM_X86_FEATURES 102 +#define NUM_X86_FEATURES 103 extern uchar_t x86_featureset[]; extern void free_x86_featureset(void *featureset); |