summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRobert Mustacchi <rm@joyent.com>2019-05-13 22:45:10 +0000
committerRobert Mustacchi <rm@joyent.com>2019-05-17 13:55:07 +0000
commita9cc46cf4c50667eb8eaf3af6c3bc4a74677b725 (patch)
tree98999640b12deaf679af986dcb1b8303bc03b243
parent9907d578e7659d3b5a95811eccd1310078a43a81 (diff)
downloadillumos-joyent-a9cc46cf4c50667eb8eaf3af6c3bc4a74677b725.tar.gz
10953 Need mitigations for MDS
Reviewed by: John Levon <john.levon@joyent.com> Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com> Reviewed by: Toomas Soome <tsoome@me.com> Approved by: Richard Lowe <richlowe@richlowe.net>
-rw-r--r--usr/src/uts/i86pc/Makefile.files1
-rw-r--r--usr/src/uts/i86pc/ml/locore.s5
-rw-r--r--usr/src/uts/i86pc/ml/md_clear.s61
-rw-r--r--usr/src/uts/i86pc/ml/syscall_asm_amd64.s24
-rw-r--r--usr/src/uts/i86pc/os/cpuid.c171
-rw-r--r--usr/src/uts/i86pc/os/cpupm/cpu_idle.c16
-rw-r--r--usr/src/uts/i86pc/os/ht.c6
-rw-r--r--usr/src/uts/i86pc/os/mp_pc.c3
-rw-r--r--usr/src/uts/i86pc/sys/machprivregs.h4
-rw-r--r--usr/src/uts/i86xpv/Makefile.files3
-rw-r--r--usr/src/uts/intel/asm/cpu.h9
-rw-r--r--usr/src/uts/intel/brand/common/brand_solaris.s3
-rw-r--r--usr/src/uts/intel/ia32/ml/exception.s4
-rw-r--r--usr/src/uts/intel/ia32/ml/i86_subr.s3
-rw-r--r--usr/src/uts/intel/sys/x86_archext.h22
15 files changed, 289 insertions, 46 deletions
diff --git a/usr/src/uts/i86pc/Makefile.files b/usr/src/uts/i86pc/Makefile.files
index 022648201f..c816e106ac 100644
--- a/usr/src/uts/i86pc/Makefile.files
+++ b/usr/src/uts/i86pc/Makefile.files
@@ -85,6 +85,7 @@ CORE_OBJS += \
mach_kdi.o \
mach_sysconfig.o \
machdep.o \
+ md_clear.o \
mem_config.o \
mem_config_stubs.o \
mem_config_arch.o \
diff --git a/usr/src/uts/i86pc/ml/locore.s b/usr/src/uts/i86pc/ml/locore.s
index 10db95ab51..236f03b4ea 100644
--- a/usr/src/uts/i86pc/ml/locore.s
+++ b/usr/src/uts/i86pc/ml/locore.s
@@ -23,7 +23,7 @@
* Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved.
*/
/*
- * Copyright (c) 2018 Joyent, Inc.
+ * Copyright 2019 Joyent, Inc.
*/
/* Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. */
@@ -1191,6 +1191,7 @@ cmntrap()
addq %rax, %r12
movq %r12, REGOFF_RIP(%rbp)
INTR_POP
+ call *x86_md_clear
jmp tr_iret_auto
/*NOTREACHED*/
3:
@@ -1596,6 +1597,7 @@ _lwp_rtt:
*/
ALTENTRY(sys_rtt_syscall32)
USER32_POP
+ call *x86_md_clear
jmp tr_iret_user
/*NOTREACHED*/
@@ -1605,6 +1607,7 @@ _lwp_rtt:
*/
USER_POP
ALTENTRY(nopop_sys_rtt_syscall)
+ call *x86_md_clear
jmp tr_iret_user
/*NOTREACHED*/
SET_SIZE(nopop_sys_rtt_syscall)
diff --git a/usr/src/uts/i86pc/ml/md_clear.s b/usr/src/uts/i86pc/ml/md_clear.s
new file mode 100644
index 0000000000..50302b43c7
--- /dev/null
+++ b/usr/src/uts/i86pc/ml/md_clear.s
@@ -0,0 +1,61 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2019 Joyent, Inc.
+ */
+
+#include <sys/asm_linkage.h>
+
+/*
+ * This ASM file contains various routines that are designed to flush
+ * microarchitectural buffer state as part of dealing with the
+ * microarchitectural data sampling (MDS) vulnerabilities.
+ *
+ * These are called from various points in the system ranging from interrupts,
+ * before going idle, to returning from system calls. This means the following
+ * is true about the state of the system:
+ *
+ * o All register state is precious, we must not change register state upon
+ * entry or return from these functions.
+ *
+ * o %ds is valid.
+ *
+ * o %gs is arbitrary, it may be kernel or user. You cannot rely on it.
+ *
+ * o Interrupts should be disabled by the caller.
+ *
+ * o %cr3 is on the kernel-side and therefore we still have access to kernel
+ * text. In other words, we haven't switched back to the user page table.
+ *
+ * o It is up to the caller to insure that a sufficient serializing instruction
+ * has been executed after this to make sure any pending speculations are
+ * captured. In general, this should be handled by the fact that callers of
+ * this are either going to change privilege levels or halt, which makes
+ * these operations safer.
+ */
+ ENTRY_NP(x86_md_clear_noop)
+ ret
+ SET_SIZE(x86_md_clear_noop)
+
+ /*
+ * This uses the microcode based means of flushing state. VERW will
+ * clobber flags.
+ */
+ ENTRY_NP(x86_md_clear_verw)
+ pushfq
+ subq $8, %rsp
+ mov %ds, (%rsp)
+ verw (%rsp)
+ addq $8, %rsp
+ popfq
+ ret
+ SET_SIZE(x86_md_clear_verw)
diff --git a/usr/src/uts/i86pc/ml/syscall_asm_amd64.s b/usr/src/uts/i86pc/ml/syscall_asm_amd64.s
index b09b4f1fdc..86fbefe9cf 100644
--- a/usr/src/uts/i86pc/ml/syscall_asm_amd64.s
+++ b/usr/src/uts/i86pc/ml/syscall_asm_amd64.s
@@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2018 Joyent, Inc.
+ * Copyright 2019 Joyent, Inc.
* Copyright (c) 2016 by Delphix. All rights reserved.
*/
@@ -644,6 +644,16 @@ _syscall_invoke:
ASSERT_CR0TS_ZERO(%r11)
/*
+ * Unlike other cases, because we need to restore the user stack pointer
+ * before exiting the kernel we must clear the microarch state before
+ * getting here. This should be safe because it means that the only
+ * values on the bus after this are based on the user's registers and
+ * potentially the addresses where we stored them. Given the constraints
+ * of sysret, that's how it has to be.
+ */
+ call *x86_md_clear
+
+ /*
* To get back to userland, we need the return %rip in %rcx and
* the return %rfl in %r11d. The sysretq instruction also arranges
* to fix up %cs and %ss; everything else is our responsibility.
@@ -932,6 +942,16 @@ _syscall32_save:
ASSERT_CR0TS_ZERO(%r11)
/*
+ * Unlike other cases, because we need to restore the user stack pointer
+ * before exiting the kernel we must clear the microarch state before
+ * getting here. This should be safe because it means that the only
+ * values on the bus after this are based on the user's registers and
+ * potentially the addresses where we stored them. Given the constraints
+ * of sysret, that's how it has to be.
+ */
+ call *x86_md_clear
+
+ /*
* To get back to userland, we need to put the return %rip in %rcx and
* the return %rfl in %r11d. The sysret instruction also arranges
* to fix up %cs and %ss; everything else is our responsibility.
@@ -1237,6 +1257,7 @@ sys_sysenter()
popfq
movl REGOFF_RSP(%rsp), %ecx /* sysexit: %ecx -> %esp */
ALTENTRY(sys_sysenter_swapgs_sysexit)
+ call *x86_md_clear
jmp tr_sysexit
SET_SIZE(sys_sysenter_swapgs_sysexit)
SET_SIZE(sys_sysenter)
@@ -1293,6 +1314,7 @@ nopop_syscall_int:
* tr_iret_user are done on the user gsbase.
*/
ALTENTRY(sys_sysint_swapgs_iret)
+ call *x86_md_clear
SWAPGS
jmp tr_iret_user
/*NOTREACHED*/
diff --git a/usr/src/uts/i86pc/os/cpuid.c b/usr/src/uts/i86pc/os/cpuid.c
index 81d9e3abaa..5f95bc49bb 100644
--- a/usr/src/uts/i86pc/os/cpuid.c
+++ b/usr/src/uts/i86pc/os/cpuid.c
@@ -32,7 +32,7 @@
* Portions Copyright 2009 Advanced Micro Devices, Inc.
*/
/*
- * Copyright 2019, Joyent, Inc.
+ * Copyright 2019 Joyent, Inc.
*/
/*
@@ -1037,7 +1037,9 @@ static char *x86_feature_names[NUM_X86_FEATURES] = {
"fma4",
"tbm",
"avx512_vnni",
- "amd_pcec"
+ "amd_pcec",
+ "mb_clear",
+ "mds_no"
};
boolean_t
@@ -2121,17 +2123,125 @@ cpuid_amd_getids(cpu_t *cpu, uchar_t *features)
}
static void
-spec_l1d_flush_noop(void)
+spec_uarch_flush_noop(void)
{
}
+/*
+ * When microcode is present that mitigates MDS, this wrmsr will also flush the
+ * MDS-related micro-architectural state that would normally happen by calling
+ * x86_md_clear().
+ */
static void
-spec_l1d_flush_msr(void)
+spec_uarch_flush_msr(void)
{
wrmsr(MSR_IA32_FLUSH_CMD, IA32_FLUSH_CMD_L1D);
}
-void (*spec_l1d_flush)(void) = spec_l1d_flush_noop;
+/*
+ * This function points to a function that will flush certain
+ * micro-architectural state on the processor. This flush is used to mitigate
+ * two different classes of Intel CPU vulnerabilities: L1TF and MDS. This
+ * function can point to one of three functions:
+ *
+ * - A noop which is done because we either are vulnerable, but do not have
+ * microcode available to help deal with a fix, or because we aren't
+ * vulnerable.
+ *
+ * - spec_uarch_flush_msr which will issue an L1D flush and if microcode to
+ * mitigate MDS is present, also perform the equivalent of the MDS flush;
+ * however, it only flushes the MDS related micro-architectural state on the
+ * current hyperthread, it does not do anything for the twin.
+ *
+ * - x86_md_clear which will flush the MDS related state. This is done when we
+ * have a processor that is vulnerable to MDS, but is not vulnerable to L1TF
+ * (RDCL_NO is set).
+ */
+void (*spec_uarch_flush)(void) = spec_uarch_flush_noop;
+
+void (*x86_md_clear)(void) = x86_md_clear_noop;
+
+static void
+cpuid_update_md_clear(cpu_t *cpu, uchar_t *featureset)
+{
+ struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
+
+ /*
+ * While RDCL_NO indicates that one of the MDS vulnerabilities (MSBDS)
+ * has been fixed in hardware, it doesn't cover everything related to
+ * MDS. Therefore we can only rely on MDS_NO to determine that we don't
+ * need to mitigate this.
+ */
+ if (cpi->cpi_vendor != X86_VENDOR_Intel ||
+ is_x86_feature(featureset, X86FSET_MDS_NO)) {
+ x86_md_clear = x86_md_clear_noop;
+ membar_producer();
+ return;
+ }
+
+ if (is_x86_feature(featureset, X86FSET_MD_CLEAR)) {
+ x86_md_clear = x86_md_clear_verw;
+ }
+
+ membar_producer();
+}
+
+static void
+cpuid_update_l1d_flush(cpu_t *cpu, uchar_t *featureset)
+{
+ boolean_t need_l1d, need_mds;
+ struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
+
+ /*
+ * If we're not on Intel or we've mitigated both RDCL and MDS in
+ * hardware, then there's nothing left for us to do for enabling the
+ * flush. We can also go ahead and say that HT exclusion is unnecessary.
+ */
+ if (cpi->cpi_vendor != X86_VENDOR_Intel ||
+ (is_x86_feature(featureset, X86FSET_RDCL_NO) &&
+ is_x86_feature(featureset, X86FSET_MDS_NO))) {
+ extern int ht_exclusion;
+ ht_exclusion = 0;
+ spec_uarch_flush = spec_uarch_flush_noop;
+ membar_producer();
+ return;
+ }
+
+ /*
+ * The locations where we need to perform an L1D flush are required both
+ * for mitigating L1TF and MDS. When verw support is present in
+ * microcode, then the L1D flush will take care of doing that as well.
+ * However, if we have a system where RDCL_NO is present, but we don't
+ * have MDS_NO, then we need to do a verw (x86_md_clear) and not a full
+ * L1D flush.
+ */
+ if (!is_x86_feature(featureset, X86FSET_RDCL_NO) &&
+ is_x86_feature(featureset, X86FSET_FLUSH_CMD) &&
+ !is_x86_feature(featureset, X86FSET_L1D_VM_NO)) {
+ need_l1d = B_TRUE;
+ } else {
+ need_l1d = B_FALSE;
+ }
+
+ if (!is_x86_feature(featureset, X86FSET_MDS_NO) &&
+ is_x86_feature(featureset, X86FSET_MD_CLEAR)) {
+ need_mds = B_TRUE;
+ } else {
+ need_mds = B_FALSE;
+ }
+
+ if (need_l1d) {
+ spec_uarch_flush = spec_uarch_flush_msr;
+ } else if (need_mds) {
+ spec_uarch_flush = x86_md_clear;
+ } else {
+ /*
+ * We have no hardware mitigations available to us.
+ */
+ spec_uarch_flush = spec_uarch_flush_noop;
+ }
+ membar_producer();
+}
static void
cpuid_scan_security(cpu_t *cpu, uchar_t *featureset)
@@ -2163,6 +2273,10 @@ cpuid_scan_security(cpu_t *cpu, uchar_t *featureset)
struct cpuid_regs *ecp;
ecp = &cpi->cpi_std[7];
+ if (ecp->cp_edx & CPUID_INTC_EDX_7_0_MD_CLEAR) {
+ add_x86_feature(featureset, X86FSET_MD_CLEAR);
+ }
+
if (ecp->cp_edx & CPUID_INTC_EDX_7_0_SPEC_CTRL) {
add_x86_feature(featureset, X86FSET_IBRS);
add_x86_feature(featureset, X86FSET_IBPB);
@@ -2207,6 +2321,10 @@ cpuid_scan_security(cpu_t *cpu, uchar_t *featureset)
add_x86_feature(featureset,
X86FSET_SSB_NO);
}
+ if (reg & IA32_ARCH_CAP_MDS_NO) {
+ add_x86_feature(featureset,
+ X86FSET_MDS_NO);
+ }
}
no_trap();
}
@@ -2223,38 +2341,29 @@ cpuid_scan_security(cpu_t *cpu, uchar_t *featureset)
return;
/*
- * We're the boot CPU, so let's figure out our L1TF status.
+ * We need to determine what changes are required for mitigating L1TF
+ * and MDS. If the CPU suffers from either of them, then HT exclusion is
+ * required.
*
- * First, if this is a RDCL_NO CPU, then we are not vulnerable: we don't
- * need to exclude with ht_acquire(), and we don't need to flush.
+ * If any of these are present, then we need to flush u-arch state at
+ * various points. For MDS, we need to do so whenever we change to a
+ * lesser privilege level or we are halting the CPU. For L1TF we need to
+ * flush the L1D cache at VM entry. When we have microcode that handles
+ * MDS, the L1D flush also clears the other u-arch state that the
+ * mb_clear does.
*/
- if (is_x86_feature(featureset, X86FSET_RDCL_NO)) {
- extern int ht_exclusion;
- ht_exclusion = 0;
- spec_l1d_flush = spec_l1d_flush_noop;
- membar_producer();
- return;
- }
/*
- * If HT is enabled, we will need HT exclusion, as well as the flush on
- * VM entry. If HT isn't enabled, we still need at least the flush for
- * the L1TF sequential case.
- *
- * However, if X86FSET_L1D_VM_NO is set, we're most likely running
- * inside a VM ourselves, and we don't need the flush.
- *
- * If we don't have the FLUSH_CMD available at all, we'd better just
- * hope HT is disabled.
+ * Update whether or not we need to be taking explicit action against
+ * MDS.
*/
- if (is_x86_feature(featureset, X86FSET_FLUSH_CMD) &&
- !is_x86_feature(featureset, X86FSET_L1D_VM_NO)) {
- spec_l1d_flush = spec_l1d_flush_msr;
- } else {
- spec_l1d_flush = spec_l1d_flush_noop;
- }
+ cpuid_update_md_clear(cpu, featureset);
- membar_producer();
+ /*
+ * Determine whether HT exclusion is required and whether or not we need
+ * to perform an l1d flush.
+ */
+ cpuid_update_l1d_flush(cpu, featureset);
}
/*
diff --git a/usr/src/uts/i86pc/os/cpupm/cpu_idle.c b/usr/src/uts/i86pc/os/cpupm/cpu_idle.c
index 483e8865c7..de261ba1d2 100644
--- a/usr/src/uts/i86pc/os/cpupm/cpu_idle.c
+++ b/usr/src/uts/i86pc/os/cpupm/cpu_idle.c
@@ -26,6 +26,9 @@
* Copyright (c) 2009-2010, Intel Corporation.
* All rights reserved.
*/
+/*
+ * Copyright 2019 Joyent, Inc.
+ */
#include <sys/x86_archext.h>
#include <sys/machsystm.h>
@@ -518,6 +521,19 @@ acpi_cpu_cstate(cpu_acpi_cstate_t *cstate)
if (cpu_idle_enter((uint_t)cs_type, 0,
check_func, (void *)mcpu_mwait) == 0) {
if (*mcpu_mwait == MWAIT_WAKEUP_IPI) {
+ /*
+ * The following calls will cause us to
+ * halt which will cause the store
+ * buffer to be repartitioned,
+ * potentially exposing us to the Intel
+ * CPU vulnerability MDS. As such, we
+ * need to explicitly call that here.
+ * The other idle methods in this
+ * function do this automatically as
+ * part of the implementation of
+ * i86_mwait().
+ */
+ x86_md_clear();
(void) cpu_acpi_read_port(
cstate->cs_address, &value, 8);
acpica_get_global_FADT(&gbl_FADT);
diff --git a/usr/src/uts/i86pc/os/ht.c b/usr/src/uts/i86pc/os/ht.c
index 6e13eaedae..d5938fde51 100644
--- a/usr/src/uts/i86pc/os/ht.c
+++ b/usr/src/uts/i86pc/os/ht.c
@@ -10,7 +10,7 @@
*/
/*
- * Copyright 2018 Joyent, Inc.
+ * Copyright 2019 Joyent, Inc.
*/
/*
@@ -305,7 +305,7 @@ ht_acquire(void)
if (ht->ch_sib == NULL) {
/* For the "sequential" L1TF case. */
- spec_l1d_flush();
+ spec_uarch_flush();
return (1);
}
@@ -346,7 +346,7 @@ ht_acquire(void)
uint64_t, sibht->ch_intr_depth, clock_t, wait);
if (ret == 1)
- spec_l1d_flush();
+ spec_uarch_flush();
return (ret);
}
diff --git a/usr/src/uts/i86pc/os/mp_pc.c b/usr/src/uts/i86pc/os/mp_pc.c
index 98fa4cc131..e7b7142b17 100644
--- a/usr/src/uts/i86pc/os/mp_pc.c
+++ b/usr/src/uts/i86pc/os/mp_pc.c
@@ -26,7 +26,7 @@
* All rights reserved.
*/
/*
- * Copyright 2018 Joyent, Inc
+ * Copyright 2019 Joyent, Inc.
*/
/*
@@ -454,6 +454,7 @@ mach_cpu_halt(char *msg)
void
mach_cpu_idle(void)
{
+ x86_md_clear();
i86_halt();
}
diff --git a/usr/src/uts/i86pc/sys/machprivregs.h b/usr/src/uts/i86pc/sys/machprivregs.h
index 53b14a8de8..0ce3b19da4 100644
--- a/usr/src/uts/i86pc/sys/machprivregs.h
+++ b/usr/src/uts/i86pc/sys/machprivregs.h
@@ -23,7 +23,7 @@
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*
- * Copyright 2018 Joyent, Inc.
+ * Copyright 2019 Joyent, Inc.
*/
#ifndef _SYS_MACHPRIVREGS_H
@@ -129,7 +129,7 @@ extern "C" {
movq REGOFF_RDI(%rsp), %rdi; \
addq $REGOFF_RIP, %rsp
-#define FAST_INTR_RETURN jmp tr_iret_user
+#define FAST_INTR_RETURN call *x86_md_clear; jmp tr_iret_user
#elif defined(__i386)
diff --git a/usr/src/uts/i86xpv/Makefile.files b/usr/src/uts/i86xpv/Makefile.files
index 831eaba2e8..b7e213ab9b 100644
--- a/usr/src/uts/i86xpv/Makefile.files
+++ b/usr/src/uts/i86xpv/Makefile.files
@@ -22,7 +22,7 @@
#
# Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
#
-# Copyright 2018 Joyent, Inc.
+# Copyright 2019 Joyent, Inc.
#
# This Makefile defines file modules in the directory uts/i86xpv
@@ -75,6 +75,7 @@ CORE_OBJS += \
mach_kdi.o \
mach_sysconfig.o \
machdep.o \
+ md_clear.o \
mem_config_stubs.o \
memnode.o \
microcode.o \
diff --git a/usr/src/uts/intel/asm/cpu.h b/usr/src/uts/intel/asm/cpu.h
index a96d8ab6b6..faaaea7c8e 100644
--- a/usr/src/uts/intel/asm/cpu.h
+++ b/usr/src/uts/intel/asm/cpu.h
@@ -22,6 +22,9 @@
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
+/*
+ * Copyright 2019 Joyent, Inc.
+ */
#ifndef _ASM_CPU_H
#define _ASM_CPU_H
@@ -114,6 +117,12 @@ sti(void)
"sti");
}
+/*
+ * Any newer callers of halt need to make sure that they consider calling
+ * x86_md_clear() before calling this to deal with any potential issues with
+ * MDS. Because this version of hlt is also used in panic context, we do not
+ * unconditionally call x86_md_clear() here and require callers to do so.
+ */
extern __GNU_INLINE void
i86_halt(void)
{
diff --git a/usr/src/uts/intel/brand/common/brand_solaris.s b/usr/src/uts/intel/brand/common/brand_solaris.s
index eb4c6b6844..0d9b326b2f 100644
--- a/usr/src/uts/intel/brand/common/brand_solaris.s
+++ b/usr/src/uts/intel/brand/common/brand_solaris.s
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2019 Joyent, Inc.
*/
/*
@@ -88,6 +89,7 @@ ENTRY(XXX_brand_syscall32_callback)
mov %rcx, SYSCALL_REG; /* save orig return addr in syscall_reg */
mov SCR_REG, %rcx; /* place new return addr in %rcx */
mov %gs:CPU_RTMP_R15, SCR_REG; /* restore scratch register */
+ call *x86_md_clear /* Flush micro-arch state */
mov V_SSP(SP_REG), SP_REG /* restore user stack pointer */
jmp nopop_sys_syscall32_swapgs_sysretl
9:
@@ -107,6 +109,7 @@ ENTRY(XXX_brand_syscall_callback)
mov %rcx, SYSCALL_REG; /* save orig return addr in syscall_reg */
mov SCR_REG, %rcx; /* place new return addr in %rcx */
mov %gs:CPU_RTMP_R15, SCR_REG; /* restore scratch register */
+ call *x86_md_clear /* Flush micro-arch state */
mov V_SSP(SP_REG), SP_REG /* restore user stack pointer */
jmp nopop_sys_syscall_swapgs_sysretq
9:
diff --git a/usr/src/uts/intel/ia32/ml/exception.s b/usr/src/uts/intel/ia32/ml/exception.s
index 66eda34c14..e7fa6977f2 100644
--- a/usr/src/uts/intel/ia32/ml/exception.s
+++ b/usr/src/uts/intel/ia32/ml/exception.s
@@ -1,7 +1,7 @@
/*
* Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2013, 2014 by Delphix. All rights reserved.
- * Copyright (c) 2018 Joyent, Inc.
+ * Copyright 2019 Joyent, Inc.
*/
/*
@@ -308,6 +308,7 @@
call av_dispatch_nmivect
INTR_POP
+ call *x86_md_clear
jmp tr_iret_auto
/*NOTREACHED*/
SET_SIZE(nmiint)
@@ -1085,6 +1086,7 @@ check_for_user_address:
ENTRY_NP(fast_null)
XPV_TRAP_POP
orq $PS_C, 24(%rsp) /* set carry bit in user flags */
+ call *x86_md_clear
jmp tr_iret_auto
/*NOTREACHED*/
SET_SIZE(fast_null)
diff --git a/usr/src/uts/intel/ia32/ml/i86_subr.s b/usr/src/uts/intel/ia32/ml/i86_subr.s
index 30f1f673d4..072967fe07 100644
--- a/usr/src/uts/intel/ia32/ml/i86_subr.s
+++ b/usr/src/uts/intel/ia32/ml/i86_subr.s
@@ -23,7 +23,7 @@
* Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright 2014 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2014 by Delphix. All rights reserved.
- * Copyright 2018 Joyent, Inc.
+ * Copyright 2019 Joyent, Inc.
*/
/*
@@ -755,6 +755,7 @@ i86_mwait(uint32_t data, uint32_t extensions)
ENTRY_NP(i86_mwait)
pushq %rbp
+ call *x86_md_clear
movq %rsp, %rbp
movq %rdi, %rax /* data */
movq %rsi, %rcx /* extensions */
diff --git a/usr/src/uts/intel/sys/x86_archext.h b/usr/src/uts/intel/sys/x86_archext.h
index 2f1b52ba33..581aea703b 100644
--- a/usr/src/uts/intel/sys/x86_archext.h
+++ b/usr/src/uts/intel/sys/x86_archext.h
@@ -27,7 +27,7 @@
* All rights reserved.
*/
/*
- * Copyright 2019, Joyent, Inc.
+ * Copyright 2019 Joyent, Inc.
* Copyright 2012 Jens Elkner <jel+illumos@cs.uni-magdeburg.de>
* Copyright 2012 Hans Rosenfeld <rosenfeld@grumpf.hope-2000.org>
* Copyright 2014 Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
@@ -309,7 +309,9 @@ extern "C" {
#define CPUID_INTC_EDX_7_0_AVX5124NNIW 0x00000004 /* AVX512 4NNIW */
#define CPUID_INTC_EDX_7_0_AVX5124FMAPS 0x00000008 /* AVX512 4FMAPS */
#define CPUID_INTC_EDX_7_0_FSREPMOV 0x00000010 /* fast short rep mov */
-/* bits 5-17 are resreved */
+/* bits 5-9 are reserved */
+#define CPUID_INTC_EDX_7_0_MD_CLEAR 0x00000400 /* MB VERW */
+/* bits 11-17 are reserved */
#define CPUID_INTC_EDX_7_0_PCONFIG 0x00040000 /* PCONFIG */
/* bits 19-26 are reserved */
#define CPUID_INTC_EDX_7_0_SPEC_CTRL 0x04000000 /* Spec, IBPB, IBRS */
@@ -429,6 +431,7 @@ extern "C" {
#define IA32_ARCH_CAP_RSBA 0x0004
#define IA32_ARCH_CAP_SKIP_L1DFL_VMENTRY 0x0008
#define IA32_ARCH_CAP_SSB_NO 0x0010
+#define IA32_ARCH_CAP_MDS_NO 0x0020
/*
* Intel Speculation related MSRs
@@ -568,6 +571,8 @@ extern "C" {
#define X86FSET_TBM 90
#define X86FSET_AVX512VNNI 91
#define X86FSET_AMD_PCEC 92
+#define X86FSET_MD_CLEAR 93
+#define X86FSET_MDS_NO 94
/*
* Intel Deep C-State invariant TSC in leaf 0x80000007.
@@ -937,7 +942,7 @@ extern "C" {
#if defined(_KERNEL) || defined(_KMEMUSER)
-#define NUM_X86_FEATURES 93
+#define NUM_X86_FEATURES 95
extern uchar_t x86_featureset[];
extern void free_x86_featureset(void *featureset);
@@ -956,13 +961,22 @@ extern uint_t pentiumpro_bug4046376;
extern const char CyrixInstead[];
-extern void (*spec_l1d_flush)(void);
+extern void (*spec_uarch_flush)(void);
#endif
#if defined(_KERNEL)
/*
+ * x86_md_clear is the main entry point that should be called to deal with
+ * clearing u-arch buffers. Implementations are below because they're
+ * implemented in ASM. They shouldn't be used.
+ */
+extern void (*x86_md_clear)(void);
+extern void x86_md_clear_noop(void);
+extern void x86_md_clear_verw(void);
+
+/*
* This structure is used to pass arguments and get return values back
* from the CPUID instruction in __cpuid_insn() routine.
*/