summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--usr/src/lib/commpage/amd64/cp_subr.s11
-rw-r--r--usr/src/lib/commpage/common/cp_main.c1
-rw-r--r--usr/src/lib/commpage/i386/cp_subr.s11
-rw-r--r--usr/src/uts/i86pc/io/vmm/amd/svm_msr.c20
-rw-r--r--usr/src/uts/i86pc/os/cpuid.c101
-rw-r--r--usr/src/uts/i86pc/os/mlsetup.c23
-rw-r--r--usr/src/uts/i86pc/sys/tsc.h2
-rw-r--r--usr/src/uts/intel/sys/x86_archext.h3
8 files changed, 90 insertions, 82 deletions
diff --git a/usr/src/lib/commpage/amd64/cp_subr.s b/usr/src/lib/commpage/amd64/cp_subr.s
index 09b8deaf8d..ebae0ed307 100644
--- a/usr/src/lib/commpage/amd64/cp_subr.s
+++ b/usr/src/lib/commpage/amd64/cp_subr.s
@@ -11,6 +11,7 @@
/*
* Copyright 2019 Joyent, Inc.
+ * Copyright 2020 Oxide Computer Company
*/
#include <sys/asm_linkage.h>
@@ -110,11 +111,11 @@
movl %edx, %r10d
3:
- cmpl $TSC_RDTSC_MFENCE, %esi
- jne 4f
- mfence
- rdtsc
- jmp 7f
+ /*
+ * TSC_RDTSC_MFENCE was used in the past for AMD chips, but has been
+ * supplanted by TSC_RDTSC_LFENCE, which works on Intel and AMD (when
+ * lfence can be confirmed as serializing).
+ */
4:
cmpl $TSC_RDTSC_LFENCE, %esi
diff --git a/usr/src/lib/commpage/common/cp_main.c b/usr/src/lib/commpage/common/cp_main.c
index 13a9aea3d7..42cc37d6b8 100644
--- a/usr/src/lib/commpage/common/cp_main.c
+++ b/usr/src/lib/commpage/common/cp_main.c
@@ -25,7 +25,6 @@ __cp_can_gettime(comm_page_t *cp)
{
switch (cp->cp_tsc_type) {
case TSC_TSCP:
- case TSC_RDTSC_MFENCE:
case TSC_RDTSC_LFENCE:
case TSC_RDTSC_CPUID:
return (1);
diff --git a/usr/src/lib/commpage/i386/cp_subr.s b/usr/src/lib/commpage/i386/cp_subr.s
index d1e07008c4..83b7dcff56 100644
--- a/usr/src/lib/commpage/i386/cp_subr.s
+++ b/usr/src/lib/commpage/i386/cp_subr.s
@@ -11,6 +11,7 @@
/*
* Copyright 2019 Joyent, Inc.
+ * Copyright 2020 Oxide Computer Company
*/
#include <sys/asm_linkage.h>
@@ -88,11 +89,11 @@
movl CP_TSC_TYPE(%edi), %eax
4:
- cmpl $TSC_RDTSC_MFENCE, %eax
- jne 5f
- mfence
- rdtsc
- jmp 8f
+ /*
+ * TSC_RDTSC_MFENCE was used in the past for AMD chips, but has been
+ * supplanted by TSC_RDTSC_LFENCE, which works on Intel and AMD (when
+ * lfence can be confirmed as serializing).
+ */
5:
cmpl $TSC_RDTSC_LFENCE, %eax
diff --git a/usr/src/uts/i86pc/io/vmm/amd/svm_msr.c b/usr/src/uts/i86pc/io/vmm/amd/svm_msr.c
index 234631def4..f453692855 100644
--- a/usr/src/uts/i86pc/io/vmm/amd/svm_msr.c
+++ b/usr/src/uts/i86pc/io/vmm/amd/svm_msr.c
@@ -44,6 +44,8 @@ __FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/errno.h>
#include <sys/systm.h>
+#include <sys/x86_archext.h>
+#include <sys/privregs.h>
#include <machine/cpufunc.h>
#include <machine/specialreg.h>
@@ -162,13 +164,17 @@ svm_rdmsr(struct svm_softc *sc, int vcpu, uint_t num, uint64_t *result)
case MSR_EXTFEATURES:
*result = 0;
break;
- case MSR_DE_CFG:
+ case MSR_AMD_DE_CFG:
+ *result = 0;
/*
- * MSR_DE_CFG is used for a vast array of AMD errata, spanning
- * from family 10h to 17h. In the future, it might make sense
- * to more thoroughly emulate its contents.
+ * Bit 1 of DE_CFG is defined by AMD to control whether the
+ * lfence instruction is serializing. Practically all CPUs
+ * supported by bhyve also contain this MSR, making it safe to
+ * expose unconditionally.
*/
- *result = 0;
+ if (is_x86_feature(x86_featureset, X86FSET_LFENCE_SER)) {
+ *result |= AMD_DE_CFG_LFENCE_DISPATCH;
+ }
break;
default:
error = EINVAL;
@@ -197,8 +203,8 @@ svm_wrmsr(struct svm_softc *sc, int vcpu, uint_t num, uint64_t val)
case MSR_SYSCFG:
/* Ignore writes */
break;
- case MSR_DE_CFG:
- /* Ignore writes for now. (See: svm_rdmsr) */
+ case MSR_AMD_DE_CFG:
+ /* Ignore writes */
break;
case MSR_AMDK8_IPM:
/*
diff --git a/usr/src/uts/i86pc/os/cpuid.c b/usr/src/uts/i86pc/os/cpuid.c
index 97e5c10aec..8273ea0554 100644
--- a/usr/src/uts/i86pc/os/cpuid.c
+++ b/usr/src/uts/i86pc/os/cpuid.c
@@ -24,6 +24,7 @@
* Copyright 2013 Nexenta Systems, Inc. All rights reserved.
* Copyright 2014 Josef "Jeff" Sipek <jeffpc@josefsipek.net>
* Copyright 2020 Joyent, Inc.
+ * Copyright 2020 Oxide Computer Company
*/
/*
* Copyright (c) 2010, Intel Corporation.
@@ -1439,7 +1440,8 @@ static char *x86_feature_names[NUM_X86_FEATURES] = {
"taa_no",
"ppin",
"vaes",
- "vpclmulqdq"
+ "vpclmulqdq",
+ "lfence_serializing"
};
boolean_t
@@ -2732,7 +2734,6 @@ cpuid_enable_enhanced_ibrs(void)
wrmsr(MSR_IA32_SPEC_CTRL, val);
}
-#ifndef __xpv
/*
* Determine whether or not we can use the AMD optimized retpoline
* functionality. We use this when we know we're on an AMD system and we can
@@ -2741,46 +2742,12 @@ cpuid_enable_enhanced_ibrs(void)
static boolean_t
cpuid_use_amd_retpoline(struct cpuid_info *cpi)
{
- uint64_t val;
- on_trap_data_t otd;
-
if (cpi->cpi_vendor != X86_VENDOR_AMD &&
cpi->cpi_vendor != X86_VENDOR_HYGON)
return (B_FALSE);
- /*
- * We need to determine whether or not lfence is serializing. It always
- * is on families 0xf and 0x11. On others, it's controlled by
- * MSR_AMD_DE_CFG (MSRC001_1029). If some hypervisor gives us a crazy
- * old family, don't try and do anything.
- */
- if (cpi->cpi_family < 0xf)
- return (B_FALSE);
- if (cpi->cpi_family == 0xf || cpi->cpi_family == 0x11)
- return (B_TRUE);
-
- /*
- * While it may be tempting to use get_hwenv(), there are no promises
- * that a hypervisor will actually declare themselves to be so in a
- * friendly way. As such, try to read and set the MSR. If we can then
- * read back the value we set (it wasn't just set to zero), then we go
- * for it.
- */
- if (!on_trap(&otd, OT_DATA_ACCESS)) {
- val = rdmsr(MSR_AMD_DE_CFG);
- val |= AMD_DE_CFG_LFENCE_DISPATCH;
- wrmsr(MSR_AMD_DE_CFG, val);
- val = rdmsr(MSR_AMD_DE_CFG);
- } else {
- val = 0;
- }
- no_trap();
-
- if ((val & AMD_DE_CFG_LFENCE_DISPATCH) != 0)
- return (B_TRUE);
- return (B_FALSE);
+ return (is_x86_feature(x86_featureset, X86FSET_LFENCE_SER));
}
-#endif /* !__xpv */
/*
* Determine how we should mitigate TAA or if we need to. Regardless of TAA, if
@@ -3019,10 +2986,8 @@ cpuid_scan_security(cpu_t *cpu, uchar_t *featureset)
} else if (is_x86_feature(featureset, X86FSET_IBRS_ALL)) {
cpuid_enable_enhanced_ibrs();
v2mit = X86_SPECTREV2_ENHANCED_IBRS;
-#ifndef __xpv
} else if (cpuid_use_amd_retpoline(cpi)) {
v2mit = X86_SPECTREV2_RETPOLINE_AMD;
-#endif /* !__xpv */
} else {
v2mit = X86_SPECTREV2_RETPOLINE;
}
@@ -4186,6 +4151,59 @@ cpuid_pass1(cpu_t *cpu, uchar_t *featureset)
}
/*
+ * Check (and potentially set) if lfence is serializing.
+ * This is useful for accurate rdtsc measurements and AMD retpolines.
+ */
+ if ((cpi->cpi_vendor == X86_VENDOR_AMD ||
+ cpi->cpi_vendor == X86_VENDOR_HYGON) &&
+ is_x86_feature(featureset, X86FSET_SSE2)) {
+ /*
+ * The AMD white paper Software Techniques For Managing
+ * Speculation on AMD Processors details circumstances for when
+ * lfence instructions are serializing.
+ *
+ * On family 0xf and 0x11, it is inherently so. On family 0x10
+ * and later (excluding 0x11), a bit in the DE_CFG MSR
+ * determines the lfence behavior. Per that whitepaper, AMD has
+ * committed to supporting that MSR on all later CPUs.
+ */
+ if (cpi->cpi_family == 0xf || cpi->cpi_family == 0x11) {
+ add_x86_feature(featureset, X86FSET_LFENCE_SER);
+ } else if (cpi->cpi_family >= 0x10) {
+ uint64_t val = 0;
+
+#if !defined(__xpv)
+ /*
+ * Be careful when attempting to enable the bit, and
+ * verify that it was actually set in case we are
+ * running in a hypervisor which is less than faithful
+ * about its emulation of this feature.
+ */
+ on_trap_data_t otd;
+ if (!on_trap(&otd, OT_DATA_ACCESS)) {
+ val = rdmsr(MSR_AMD_DE_CFG);
+ val |= AMD_DE_CFG_LFENCE_DISPATCH;
+ wrmsr(MSR_AMD_DE_CFG, val);
+ val = rdmsr(MSR_AMD_DE_CFG);
+ }
+ no_trap();
+#endif
+
+ if ((val & AMD_DE_CFG_LFENCE_DISPATCH) != 0) {
+ add_x86_feature(featureset, X86FSET_LFENCE_SER);
+ }
+ }
+ } else if (cpi->cpi_vendor == X86_VENDOR_Intel &&
+ is_x86_feature(featureset, X86FSET_SSE2)) {
+ /*
+ * Documentation and other OSes indicate that lfence is always
+ * serializing on Intel CPUs.
+ */
+ add_x86_feature(featureset, X86FSET_LFENCE_SER);
+ }
+
+
+ /*
* Check the processor leaves that are used for security features.
*/
cpuid_scan_security(cpu, featureset);
@@ -7256,11 +7274,6 @@ patch_tsc_read(int flag)
cnt = &_no_rdtsc_end - &_no_rdtsc_start;
(void) memcpy((void *)tsc_read, (void *)&_no_rdtsc_start, cnt);
break;
- case TSC_RDTSC_MFENCE:
- cnt = &_tsc_mfence_end - &_tsc_mfence_start;
- (void) memcpy((void *)tsc_read,
- (void *)&_tsc_mfence_start, cnt);
- break;
case TSC_RDTSC_LFENCE:
cnt = &_tsc_lfence_end - &_tsc_lfence_start;
(void) memcpy((void *)tsc_read,
diff --git a/usr/src/uts/i86pc/os/mlsetup.c b/usr/src/uts/i86pc/os/mlsetup.c
index 9487552564..e1fd081cfa 100644
--- a/usr/src/uts/i86pc/os/mlsetup.c
+++ b/usr/src/uts/i86pc/os/mlsetup.c
@@ -24,6 +24,7 @@
* Copyright (c) 1993, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011 by Delphix. All rights reserved.
* Copyright 2019 Joyent, Inc.
+ * Copyright 2020 Oxide Computer Company
*/
/*
* Copyright (c) 2010, Intel Corporation.
@@ -265,30 +266,16 @@ mlsetup(struct regs *rp)
* time-stamp counter while ensuring no out-of-order execution.
* Patch it while the kernel text is still writable.
*
- * Note: tsc_read is not patched for intel processors whose family
- * is >6 and for amd whose family >f (in case they don't support rdtscp
- * instruction, unlikely). By default tsc_read will use cpuid for
- * serialization in such cases. The following code needs to be
- * revisited if intel processors of family >= f retains the
- * instruction serialization nature of mfence instruction.
- * Note: tsc_read is not patched for x86 processors which do
- * not support "mfence". By default tsc_read will use cpuid for
- * serialization in such cases.
- *
* The Xen hypervisor does not correctly report whether rdtscp is
* supported or not, so we must assume that it is not.
*/
if ((get_hwenv() & HW_XEN_HVM) == 0 &&
- is_x86_feature(x86_featureset, X86FSET_TSCP))
+ is_x86_feature(x86_featureset, X86FSET_TSCP)) {
patch_tsc_read(TSC_TSCP);
- else if (cpuid_getvendor(CPU) == X86_VENDOR_AMD &&
- cpuid_getfamily(CPU) <= 0xf &&
- is_x86_feature(x86_featureset, X86FSET_SSE2))
- patch_tsc_read(TSC_RDTSC_MFENCE);
- else if (cpuid_getvendor(CPU) == X86_VENDOR_Intel &&
- cpuid_getfamily(CPU) <= 6 &&
- is_x86_feature(x86_featureset, X86FSET_SSE2))
+ } else if (is_x86_feature(x86_featureset, X86FSET_LFENCE_SER)) {
+ ASSERT(is_x86_feature(x86_featureset, X86FSET_SSE2));
patch_tsc_read(TSC_RDTSC_LFENCE);
+ }
#endif /* !__xpv */
diff --git a/usr/src/uts/i86pc/sys/tsc.h b/usr/src/uts/i86pc/sys/tsc.h
index d4090381c4..82a1557bd0 100644
--- a/usr/src/uts/i86pc/sys/tsc.h
+++ b/usr/src/uts/i86pc/sys/tsc.h
@@ -21,7 +21,7 @@
*/
#define TSC_NONE 0x0
#define TSC_RDTSC_CPUID 0x1
-#define TSC_RDTSC_MFENCE 0x2
+/* formerly TSC_RDTSC_MFENCE 0x2 */
#define TSC_RDTSC_LFENCE 0x3
#define TSC_TSCP 0x4
diff --git a/usr/src/uts/intel/sys/x86_archext.h b/usr/src/uts/intel/sys/x86_archext.h
index 689ed9cc76..241ce2820b 100644
--- a/usr/src/uts/intel/sys/x86_archext.h
+++ b/usr/src/uts/intel/sys/x86_archext.h
@@ -738,6 +738,7 @@ extern "C" {
#define X86FSET_PPIN 99
#define X86FSET_VAES 100
#define X86FSET_VPCLMULQDQ 101
+#define X86FSET_LFENCE_SER 102
/*
* Intel Deep C-State invariant TSC in leaf 0x80000007.
@@ -1138,7 +1139,7 @@ extern "C" {
#if defined(_KERNEL) || defined(_KMEMUSER)
-#define NUM_X86_FEATURES 102
+#define NUM_X86_FEATURES 103
extern uchar_t x86_featureset[];
extern void free_x86_featureset(void *featureset);